From 91447636331957f3d9b5ca5b508f07c526b0074d Mon Sep 17 00:00:00 2001 From: Apple Date: Thu, 28 Apr 2005 00:52:30 +0000 Subject: [PATCH] xnu-792.tar.gz --- EXTERNAL_HEADERS/Info.plist | 22 + EXTERNAL_HEADERS/Makefile | 39 + EXTERNAL_HEADERS/ar.h | 2 + EXTERNAL_HEADERS/architecture/Makefile | 32 + EXTERNAL_HEADERS/architecture/byte_order.h | 15 +- EXTERNAL_HEADERS/architecture/i386/Makefile | 37 + EXTERNAL_HEADERS/architecture/ppc/Makefile | 34 + .../architecture/ppc/byte_order.h | 78 +- EXTERNAL_HEADERS/architecture/ppc/cframe.h | 18 +- EXTERNAL_HEADERS/bsd/i386/ansi.h | 104 - EXTERNAL_HEADERS/bsd/ppc/ansi.h | 104 - EXTERNAL_HEADERS/i386/Makefile | 27 + .../i386/_limits.h | 8 +- EXTERNAL_HEADERS/{bsd => }/i386/limits.h | 7 +- EXTERNAL_HEADERS/mach-o/Makefile | 30 + EXTERNAL_HEADERS/mach-o/fat.h | 15 + EXTERNAL_HEADERS/mach-o/loader.h | 116 +- EXTERNAL_HEADERS/mach-o/nlist.h | 14 + EXTERNAL_HEADERS/machine/Makefile | 30 + EXTERNAL_HEADERS/ppc/Makefile | 27 + .../disk.h => EXTERNAL_HEADERS/ppc/_limits.h | 8 +- EXTERNAL_HEADERS/{bsd => }/ppc/limits.h | 6 +- Makefile | 6 +- bsd/Makefile | 2 + bsd/bsm/Makefile | 5 +- bsd/bsm/audit.h | 5 +- bsd/bsm/audit_kernel.h | 47 +- bsd/bsm/audit_klib.h | 10 +- bsd/bsm/audit_record.h | 14 +- bsd/conf/MASTER | 18 +- bsd/conf/MASTER.i386 | 4 +- bsd/conf/MASTER.ppc | 6 +- bsd/conf/Makefile | 4 + bsd/conf/Makefile.i386 | 294 + bsd/conf/Makefile.template | 38 +- bsd/conf/files | 69 +- bsd/conf/files.i386 | 1 - bsd/conf/files.ppc | 2 +- bsd/conf/param.c | 6 +- bsd/conf/tools/Makefile | 8 +- bsd/conf/tools/newvers/Makefile | 49 - bsd/conf/tools/newvers/newvers.csh | 33 - bsd/conf/version.major | 1 - bsd/conf/version.minor | 1 - bsd/conf/version.variant | 1 - bsd/crypto/Makefile | 4 +- bsd/crypto/{rijndael => aes}/Makefile | 8 +- bsd/crypto/aes/aes.h | 175 + bsd/crypto/aes/aescrypt.c | 407 + bsd/crypto/aes/aeskey.c | 455 ++ bsd/crypto/aes/aesopt.h | 753 ++ bsd/crypto/aes/aestab.c | 384 + bsd/crypto/aes/aestab.h | 175 + bsd/crypto/blowfish/Makefile | 2 +- bsd/crypto/blowfish/blowfish.h | 6 +- bsd/crypto/cast128/Makefile | 2 +- bsd/crypto/cast128/cast128.h | 14 +- bsd/crypto/des/Makefile | 2 +- bsd/crypto/des/des.h | 41 +- bsd/crypto/md5.c | 2 +- bsd/crypto/md5.h | 8 +- bsd/crypto/rc4/Makefile | 2 +- bsd/crypto/rijndael/boxes-fst.dat | 958 --- bsd/crypto/rijndael/rijndael-alg-fst.c | 488 -- bsd/crypto/rijndael/rijndael-alg-fst.h | 34 - bsd/crypto/rijndael/rijndael-api-fst.c | 484 -- bsd/crypto/rijndael/rijndael-api-fst.h | 104 - bsd/crypto/rijndael/rijndael.h | 4 - bsd/crypto/rijndael/rijndael_local.h | 11 - bsd/crypto/sha1.c | 2 +- bsd/crypto/sha1.h | 8 +- bsd/crypto/sha2/Makefile | 2 +- bsd/crypto/sha2/sha2.h | 34 +- bsd/dev/Makefile | 7 +- bsd/dev/i386/conf.c | 113 +- bsd/dev/i386/cons.c | 137 +- bsd/dev/i386/cons.h | 16 +- bsd/dev/i386/kern_machdep.c | 98 +- bsd/dev/i386/km.c | 92 +- bsd/dev/i386/mem.c | 78 +- bsd/dev/i386/memmove.c | 6 +- bsd/dev/i386/stubs.c | 91 +- bsd/dev/i386/sysctl.c | 37 +- bsd/dev/i386/unix_signal.c | 254 +- bsd/dev/i386/unix_startup.c | 160 - bsd/dev/memdev.c | 172 +- bsd/dev/ppc/chud/chud_bsd_callback.c | 27 +- bsd/dev/ppc/chud/chud_process.c | 3 +- bsd/dev/ppc/conf.c | 7 +- bsd/dev/ppc/cons.c | 72 +- bsd/dev/ppc/cons.h | 8 +- bsd/dev/ppc/kern_machdep.c | 256 +- bsd/dev/ppc/km.c | 127 +- bsd/dev/ppc/mem.c | 76 +- bsd/dev/ppc/memmove.c | 4 +- bsd/dev/ppc/munge.s | 356 + bsd/dev/ppc/nvram.c | 7 +- bsd/dev/ppc/stubs.c | 51 +- bsd/dev/ppc/systemcalls.c | 383 +- bsd/dev/ppc/unix_signal.c | 612 +- bsd/dev/ppc/xsumas.s | 600 +- bsd/dev/random/YarrowCoreLib/port/smf.c | 7 +- bsd/dev/random/YarrowCoreLib/src/comp.c | 10 +- bsd/dev/random/YarrowCoreLib/src/prng.c | 13 +- bsd/dev/random/YarrowCoreLib/src/sha1mod.c | 11 +- bsd/dev/random/YarrowCoreLib/src/sha1mod.h | 4 +- bsd/dev/random/YarrowCoreLib/src/smf.h | 2 +- .../random/YarrowCoreLib/src/yarrowUtils.c | 1 + bsd/dev/random/randomdev.c | 44 +- bsd/dev/random/randomdev.h | 4 +- bsd/dev/{ppc => }/unix_startup.c | 129 +- bsd/dev/vn/shadow.c | 12 +- bsd/dev/vn/shadow.h | 5 + bsd/dev/vn/vn.c | 1060 ++- bsd/hfs/MacOSStubs.c | 16 +- bsd/hfs/Makefile | 2 +- bsd/hfs/hfs.h | 349 +- bsd/hfs/hfs_attrlist.c | 955 ++- bsd/hfs/hfs_btreeio.c | 305 +- bsd/hfs/hfs_catalog.c | 1055 ++- bsd/hfs/hfs_catalog.h | 103 +- bsd/hfs/hfs_chash.c | 392 +- bsd/hfs/hfs_cnode.c | 1257 +-- bsd/hfs/hfs_cnode.h | 202 +- bsd/hfs/hfs_dbg.h | 2 +- bsd/hfs/hfs_encodinghint.c | 10 +- bsd/hfs/hfs_encodings.c | 62 +- bsd/hfs/hfs_endian.c | 49 + bsd/hfs/hfs_format.h | 104 +- bsd/hfs/hfs_fsctl.h | 65 + bsd/hfs/hfs_hotfiles.c | 812 +- bsd/hfs/hfs_hotfiles.h | 12 +- bsd/hfs/hfs_link.c | 272 +- bsd/hfs/hfs_lockf.c | 707 -- bsd/hfs/hfs_lockf.h | 117 - bsd/hfs/hfs_lookup.c | 531 +- bsd/hfs/hfs_macos_defs.h | 3 +- bsd/hfs/hfs_mount.h | 11 +- bsd/hfs/hfs_notification.c | 11 +- bsd/hfs/hfs_quota.c | 543 +- bsd/hfs/hfs_quota.h | 26 +- bsd/hfs/hfs_readwrite.c | 3071 ++++---- bsd/hfs/hfs_search.c | 460 +- bsd/hfs/hfs_vfsops.c | 2721 ++++--- bsd/hfs/hfs_vfsutils.c | 969 ++- bsd/hfs/hfs_vnops.c | 4310 +++++------ bsd/hfs/hfs_xattr.c | 1062 +++ bsd/hfs/hfscommon/BTree/BTree.c | 12 +- bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c | 40 +- bsd/hfs/hfscommon/BTree/BTreeScanner.c | 32 +- bsd/hfs/hfscommon/BTree/BTreeTreeOps.c | 2 +- bsd/hfs/hfscommon/Catalog/Catalog.c | 245 - bsd/hfs/hfscommon/Catalog/CatalogIterators.c | 643 -- bsd/hfs/hfscommon/Catalog/CatalogUtilities.c | 4 +- bsd/hfs/hfscommon/Catalog/FileIDsServices.c | 29 +- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 336 +- bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 177 +- bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c | 32 +- bsd/hfs/hfscommon/headers/BTreeScanner.h | 3 +- bsd/hfs/hfscommon/headers/BTreesInternal.h | 6 +- bsd/hfs/hfscommon/headers/CatalogPrivate.h | 84 +- bsd/hfs/hfscommon/headers/FileMgrInternal.h | 57 +- bsd/i386/Makefile | 14 +- bsd/i386/_types.h | 118 + bsd/i386/cpu.h | 36 - bsd/i386/endian.h | 49 +- bsd/i386/exec.h | 11 +- bsd/i386/label_t.h | 40 - bsd/i386/param.h | 8 +- bsd/i386/reboot.h | 6 +- bsd/i386/setjmp.h | 33 +- bsd/i386/signal.h | 8 +- bsd/i386/spl.h | 55 - bsd/i386/table.h | 33 - bsd/i386/types.h | 35 +- bsd/i386/ucontext.h | 26 +- bsd/i386/vmparam.h | 2 +- bsd/isofs/cd9660/Makefile | 4 +- bsd/isofs/cd9660/cd9660_bmap.c | 96 +- bsd/isofs/cd9660/cd9660_lookup.c | 156 +- bsd/isofs/cd9660/cd9660_mount.h | 30 +- bsd/isofs/cd9660/cd9660_node.c | 192 +- bsd/isofs/cd9660/cd9660_node.h | 88 +- bsd/isofs/cd9660/cd9660_rrip.c | 162 +- bsd/isofs/cd9660/cd9660_util.c | 367 +- bsd/isofs/cd9660/cd9660_vfsops.c | 869 ++- bsd/isofs/cd9660/cd9660_vnops.c | 974 +-- bsd/isofs/cd9660/iso.h | 83 +- bsd/isofs/cd9660/iso_rrip.h | 16 +- bsd/kern/ast.h | 14 +- bsd/kern/bsd_init.c | 286 +- bsd/kern/bsd_stubs.c | 14 +- bsd/kern/init_sysent.c | 1240 ++- bsd/kern/kdebug.c | 588 +- bsd/kern/kern_acct.c | 161 +- bsd/kern/kern_aio.c | 569 +- bsd/kern/kern_audit.c | 709 +- bsd/kern/kern_authorization.c | 1014 +++ bsd/kern/kern_bsm_audit.c | 55 +- bsd/kern/kern_bsm_klib.c | 70 +- bsd/kern/kern_bsm_token.c | 112 +- bsd/kern/kern_clock.c | 96 +- bsd/kern/kern_control.c | 1099 ++- bsd/kern/kern_core.c | 238 +- bsd/kern/kern_credential.c | 2268 ++++++ bsd/kern/kern_descrip.c | 2437 ++++-- bsd/kern/kern_event.c | 2098 +++-- bsd/kern/kern_exec.c | 2138 +++--- bsd/kern/kern_exit.c | 880 ++- bsd/kern/kern_fork.c | 276 +- bsd/kern/kern_ktrace.c | 322 +- bsd/kern/kern_lock.c | 23 +- .../ufs/ufs_lockf.c => kern/kern_lockf.c} | 469 +- bsd/kern/kern_malloc.c | 51 +- bsd/kern/kern_mib.c | 99 +- bsd/kern/kern_mman.c | 865 +-- bsd/kern/kern_newsysctl.c | 64 +- bsd/kern/kern_panicinfo.c | 273 +- bsd/kern/kern_pcsamples.c | 215 +- bsd/kern/kern_physio.c | 145 +- bsd/kern/kern_proc.c | 221 +- bsd/kern/kern_prot.c | 931 ++- bsd/kern/kern_resource.c | 178 +- bsd/kern/kern_shutdown.c | 124 +- bsd/kern/kern_sig.c | 954 +-- bsd/kern/kern_subr.c | 1046 ++- bsd/kern/kern_symfile.c | 115 +- bsd/kern/kern_synch.c | 267 +- bsd/kern/kern_sysctl.c | 1180 ++- bsd/kern/kern_time.c | 297 +- bsd/kern/kern_xxx.c | 134 +- bsd/kern/kpi_mbuf.c | 939 +++ bsd/kern/kpi_socket.c | 772 ++ bsd/kern/kpi_socketfilter.c | 595 ++ bsd/kern/mach_fat.c | 61 +- bsd/kern/mach_header.c | 157 +- bsd/kern/mach_header.h | 21 +- bsd/kern/mach_loader.c | 745 +- bsd/kern/mach_loader.h | 23 +- bsd/kern/mach_process.c | 194 +- bsd/kern/makesyscalls.sh | 694 ++ bsd/kern/netboot.c | 61 +- bsd/kern/posix_sem.c | 626 +- bsd/kern/posix_shm.c | 551 +- bsd/kern/qsort.c | 4 +- bsd/kern/spl.c | 33 +- bsd/kern/subr_log.c | 103 +- bsd/kern/subr_prf.c | 9 +- bsd/kern/subr_prof.c | 247 +- bsd/kern/subr_xxx.c | 7 +- bsd/kern/sys_domain.c | 5 +- bsd/kern/sys_generic.c | 2385 +++--- bsd/kern/sys_pipe.c | 1646 ++++ bsd/kern/sys_socket.c | 274 +- bsd/kern/syscalls.c | 825 +- bsd/kern/syscalls.master | 474 ++ bsd/kern/sysctl_init.c | 120 +- bsd/kern/sysv_ipc.c | 81 +- bsd/kern/sysv_msg.c | 668 +- bsd/kern/sysv_sem.c | 911 ++- bsd/kern/sysv_shm.c | 853 ++- bsd/kern/tty.c | 325 +- bsd/kern/tty_compat.c | 358 +- bsd/kern/tty_conf.c | 50 +- bsd/kern/tty_pty.c | 350 +- bsd/kern/tty_subr.c | 17 +- bsd/kern/tty_tb.c | 2 +- bsd/kern/tty_tty.c | 117 +- bsd/kern/ubc_subr.c | 1049 +-- bsd/kern/uipc_domain.c | 154 +- bsd/kern/uipc_mbuf.c | 1251 ++- bsd/kern/uipc_mbuf2.c | 189 +- bsd/kern/uipc_proto.c | 19 +- bsd/kern/uipc_socket.c | 1687 +++-- bsd/kern/uipc_socket2.c | 598 +- bsd/kern/uipc_syscalls.c | 1673 ++-- bsd/kern/uipc_usrreq.c | 597 +- bsd/libkern/Makefile | 3 - bsd/libkern/crc32.c | 104 + bsd/libkern/inet_ntoa.c | 70 - bsd/libkern/inet_ntop.c | 208 + bsd/libkern/libkern.h | 50 +- bsd/libkern/scanc.c | 5 +- bsd/machine/Makefile | 16 +- bsd/machine/{unix_traps.h => _limits.h} | 16 +- bsd/machine/{table.h => _types.h} | 16 +- bsd/machine/cons.h | 4 +- bsd/machine/disklabel.h | 4 +- bsd/machine/endian.h | 4 +- bsd/machine/exec.h | 4 +- bsd/machine/param.h | 4 +- bsd/machine/profile.h | 4 +- bsd/machine/psl.h | 4 +- bsd/machine/ptrace.h | 4 +- bsd/machine/reboot.h | 4 +- bsd/machine/reg.h | 4 +- bsd/machine/setjmp.h | 9 +- bsd/machine/signal.h | 4 +- bsd/machine/spl.h | 37 +- bsd/machine/types.h | 4 +- bsd/machine/ucontext.h | 4 +- bsd/machine/user.h | 35 - bsd/machine/vmparam.h | 4 +- bsd/man/man2/Makefile | 24 +- bsd/man/man2/accept.2 | 2 +- bsd/man/man2/aio_cancel.2 | 117 + bsd/man/man2/aio_error.2 | 100 + bsd/man/man2/aio_read.2 | 211 + bsd/man/man2/aio_return.2 | 103 + bsd/man/man2/aio_suspend.2 | 113 + bsd/man/man2/aio_write.2 | 204 + bsd/man/man2/bind.2 | 2 +- bsd/man/man2/brk.2 | 150 - bsd/man/man2/chflags.2 | 4 +- bsd/man/man2/chown.2 | 94 +- bsd/man/man2/connect.2 | 14 +- bsd/man/man2/exchangedata.2 | 190 + bsd/man/man2/fcntl.2 | 13 + bsd/man/man2/flock.2 | 2 +- bsd/man/man2/fsync.2 | 42 +- bsd/man/man2/getattrlist.2 | 1684 +++++ bsd/man/man2/getdirentriesattr.2 | 427 ++ bsd/man/man2/getfsstat.2 | 21 +- bsd/man/man2/getpeername.2 | 2 +- bsd/man/man2/getsockname.2 | 2 +- bsd/man/man2/getsockopt.2 | 7 +- bsd/man/man2/getxattr.2 | 165 + bsd/man/man2/intro.2 | 29 +- bsd/man/man2/listxattr.2 | 153 + bsd/man/man2/madvise.2 | 2 +- bsd/man/man2/mkfifo.2 | 2 +- bsd/man/man2/poll.2 | 198 + bsd/man/man2/posix_madvise.2 | 2 +- bsd/man/man2/ptrace.2 | 245 +- bsd/man/man2/quotactl.2 | 2 +- bsd/man/man2/recv.2 | 16 +- bsd/man/man2/removexattr.2 | 135 + bsd/man/man2/sbrk.2 | 1 - bsd/man/man2/searchfs.2 | 804 ++ bsd/man/man2/select.2 | 10 +- bsd/man/man2/semctl.2 | 2 - bsd/man/man2/semget.2 | 2 - bsd/man/man2/semop.2 | 2 - bsd/man/man2/send.2 | 2 +- bsd/man/man2/setattrlist.2 | 363 + bsd/man/man2/setxattr.2 | 175 + bsd/man/man2/shmget.2 | 4 +- bsd/man/man2/shutdown.2 | 12 +- bsd/man/man2/vfork.2 | 2 +- bsd/man/man4/Makefile | 2 + bsd/man/man4/arp.4 | 3 +- bsd/man/man4/bpf.4 | 4 + bsd/man/man4/dummynet.4 | 64 + bsd/man/man4/icmp6.4 | 366 +- bsd/man/man4/ifmib.4 | 196 + bsd/man/man4/ip6.4 | 1155 ++- bsd/man/man4/termios.4 | 4 +- bsd/man/man4/unix.4 | 29 + bsd/man/man5/types.5 | 4 +- bsd/man/man9/fetch.9 | 49 +- bsd/man/man9/store.9 | 47 +- bsd/miscfs/deadfs/dead_vnops.c | 303 +- bsd/miscfs/devfs/devfs.h | 12 +- bsd/miscfs/devfs/devfs_proto.h | 9 +- bsd/miscfs/devfs/devfs_tree.c | 606 +- bsd/miscfs/devfs/devfs_vfsops.c | 227 +- bsd/miscfs/devfs/devfs_vnops.c | 1256 ++- bsd/miscfs/devfs/devfsdefs.h | 83 +- bsd/miscfs/fdesc/fdesc.h | 29 +- bsd/miscfs/fdesc/fdesc_vfsops.c | 153 +- bsd/miscfs/fdesc/fdesc_vnops.c | 515 +- bsd/miscfs/fifofs/fifo.h | 105 +- bsd/miscfs/fifofs/fifo_vnops.c | 486 +- bsd/miscfs/nullfs/null.h | 13 +- bsd/miscfs/nullfs/null_subr.c | 28 +- bsd/miscfs/nullfs/null_vfsops.c | 155 +- bsd/miscfs/nullfs/null_vnops.c | 233 +- bsd/miscfs/specfs/spec_lockf.c | 706 -- bsd/miscfs/specfs/spec_vnops.c | 692 +- bsd/miscfs/specfs/specdev.h | 98 +- bsd/miscfs/synthfs/synthfs.h | 97 +- bsd/miscfs/synthfs/synthfs_util.c | 106 +- bsd/miscfs/synthfs/synthfs_vfsops.c | 377 +- bsd/miscfs/synthfs/synthfs_vnops.c | 1146 +-- bsd/miscfs/union/union.h | 60 +- bsd/miscfs/union/union_subr.c | 323 +- bsd/miscfs/union/union_vfsops.c | 298 +- bsd/miscfs/union/union_vnops.c | 936 +-- bsd/miscfs/volfs/volfs.h | 139 +- bsd/miscfs/volfs/volfs_vfsops.c | 326 +- bsd/miscfs/volfs/volfs_vnops.c | 1607 ++-- bsd/net/Makefile | 28 +- bsd/net/bpf.c | 783 +- bsd/net/bpf.h | 63 +- bsd/net/bpf_filter.c | 28 +- bsd/net/bpfdesc.h | 4 +- bsd/net/bridge.c | 166 +- bsd/net/bridge.h | 1 - bsd/net/bsd_comp.c | 39 +- bsd/net/devtimer.c | 276 + bsd/net/devtimer.h | 89 + bsd/net/dlil.c | 3865 +++++----- bsd/net/dlil.h | 301 +- bsd/net/dlil_pvt.h | 16 +- bsd/net/ether_at_pr_module.c | 350 +- bsd/net/ether_if_module.c | 891 +-- bsd/net/ether_inet6_pr_module.c | 378 +- bsd/net/ether_inet_pr_module.c | 609 +- bsd/net/ethernet.h | 23 +- bsd/{ppc/label_t.h => net/ieee8023ad.h} | 36 +- bsd/net/if.c | 1253 +-- bsd/net/if.h | 206 +- bsd/net/if_arp.h | 54 - bsd/net/if_atm.h | 19 +- bsd/net/if_bond.c | 4485 +++++++++++ bsd/net/if_bond_var.h | 92 + bsd/net/if_disc.c | 4 +- bsd/net/if_dl.h | 11 +- bsd/net/if_dummy.c | 70 +- bsd/net/if_ether.h | 52 + bsd/net/if_ethersubr.c | 219 +- bsd/net/if_faith.c | 56 +- bsd/net/if_faith.h | 8 +- bsd/net/if_gif.c | 267 +- bsd/net/if_gif.h | 21 +- bsd/net/if_loop.c | 468 +- bsd/net/if_media.c | 18 +- bsd/net/if_media.h | 38 +- bsd/net/if_mib.c | 116 +- bsd/net/if_mib.h | 27 +- bsd/net/if_pppvar.h | 28 +- bsd/net/if_sppp.h | 205 - bsd/net/if_stf.c | 217 +- bsd/net/if_stf.h | 41 - bsd/net/if_tun.c | 764 -- bsd/net/if_tun.h | 66 - bsd/net/if_tunvar.h | 73 - bsd/net/if_types.h | 11 +- bsd/net/if_var.h | 599 +- bsd/net/if_vlan.c | 2108 ++++-- bsd/net/if_vlan_var.h | 4 + bsd/net/init.c | 107 + bsd/net/init.h | 59 + bsd/net/iso88025.h | 11 - bsd/net/kext_net.c | 5 +- bsd/net/kext_net.h | 209 +- bsd/net/kpi_interface.c | 1355 ++++ bsd/net/kpi_interface.h | 1617 ++++ bsd/net/kpi_interfacefilter.c | 47 + bsd/net/kpi_interfacefilter.h | 195 + bsd/net/kpi_protocol.c | 366 + bsd/net/kpi_protocol.h | 176 + bsd/net/lacp.h | 418 + bsd/net/multicast_list.c | 145 + bsd/{machine/ansi.h => net/multicast_list.h} | 49 +- bsd/net/ndrv.c | 571 +- bsd/net/ndrv.h | 15 +- bsd/net/ndrv_var.h | 15 +- bsd/net/net_osdep.c | 1 - bsd/net/net_osdep.h | 6 +- bsd/net/netisr.c | 133 - bsd/net/pfkeyv2.h | 8 +- bsd/net/ppp_comp.h | 38 +- bsd/net/ppp_deflate.c | 35 +- bsd/net/ppp_defs.h | 8 - bsd/net/radix.c | 74 +- bsd/net/radix.h | 65 +- bsd/net/raw_cb.c | 12 + bsd/net/raw_cb.h | 24 +- bsd/net/raw_usrreq.c | 70 +- bsd/net/route.c | 359 +- bsd/net/route.h | 116 +- bsd/net/rtsock.c | 793 +- bsd/net/slcompress.c | 635 -- bsd/net/slcompress.h | 188 - bsd/net/zlib.c | 15 +- bsd/net/zlib.h | 8 +- bsd/netat/Makefile | 20 +- bsd/netat/adsp.c | 285 +- bsd/netat/adsp.h | 42 +- bsd/netat/adsp_CLDeny.c | 4 +- bsd/netat/adsp_Close.c | 2 +- bsd/netat/adsp_Control.c | 2 +- bsd/netat/adsp_Timer.c | 4 +- bsd/netat/adsp_attention.c | 4 +- bsd/netat/adsp_internal.h | 37 +- bsd/netat/appletalk.h | 9 +- bsd/netat/asp.h | 9 +- bsd/netat/asp_proto.c | 65 +- bsd/netat/at.c | 47 +- bsd/netat/at_aarp.h | 15 +- bsd/netat/at_config.h | 3 + bsd/netat/at_ddp_brt.h | 6 +- bsd/netat/at_pat.h | 3 + bsd/netat/at_pcb.c | 7 +- bsd/netat/at_pcb.h | 27 +- bsd/netat/at_proto.c | 49 +- bsd/netat/at_snmp.h | 3 + bsd/netat/at_var.h | 40 +- bsd/netat/atp.h | 9 +- bsd/netat/atp_misc.c | 4 +- bsd/netat/atp_open.c | 2 +- bsd/netat/atp_read.c | 19 +- bsd/netat/atp_write.c | 117 +- bsd/netat/aurp.h | 87 +- bsd/netat/aurp_aurpd.c | 46 +- bsd/netat/aurp_cfg.c | 2 +- bsd/netat/aurp_misc.c | 7 +- bsd/netat/aurp_open.c | 12 +- bsd/netat/aurp_ri.c | 36 +- bsd/netat/aurp_rx.c | 1 + bsd/netat/aurp_tickle.c | 9 +- bsd/netat/ddp.c | 11 +- bsd/netat/ddp.h | 12 +- bsd/netat/ddp_aarp.c | 19 +- bsd/netat/ddp_brt.c | 14 +- bsd/netat/ddp_lap.c | 66 +- bsd/netat/ddp_nbp.c | 4 +- bsd/netat/ddp_proto.c | 1 - bsd/netat/ddp_r_rtmp.c | 51 +- bsd/netat/ddp_r_zip.c | 56 +- bsd/netat/ddp_rtmp.c | 13 +- bsd/netat/ddp_usrreq.c | 2 +- bsd/netat/debug.h | 6 +- bsd/netat/drv_dep.c | 64 +- bsd/netat/ep.h | 3 + bsd/netat/lap.h | 3 + bsd/netat/nbp.h | 19 +- bsd/netat/pap.h | 3 + bsd/netat/routing_tables.h | 17 +- bsd/netat/rtmp.h | 3 + bsd/netat/sys_dep.c | 185 +- bsd/netat/sys_glue.c | 176 +- bsd/netat/sysglue.h | 42 +- bsd/netat/zip.h | 3 + bsd/netinet/Makefile | 29 +- bsd/netinet/bootp.h | 2 + bsd/netinet/dhcp_options.h | 4 +- bsd/netinet/icmp6.h | 30 +- bsd/netinet/icmp_var.h | 12 +- bsd/netinet/if_atm.h | 10 +- bsd/netinet/if_ether.c | 923 --- bsd/netinet/if_ether.h | 76 +- bsd/netinet/if_fddi.h | 16 +- bsd/netinet/if_tun.h | 9 +- bsd/netinet/igmp.c | 45 +- bsd/netinet/igmp_var.h | 23 +- bsd/netinet/in.c | 432 +- bsd/netinet/in.h | 99 +- bsd/netinet/in_arp.c | 876 +++ bsd/netinet/in_arp.h | 131 + bsd/netinet/in_bootp.c | 63 +- bsd/netinet/in_gif.c | 16 +- bsd/netinet/in_gif.h | 14 +- bsd/netinet/in_pcb.c | 709 +- bsd/netinet/in_pcb.h | 293 +- bsd/netinet/in_proto.c | 116 +- bsd/netinet/in_rmx.c | 57 +- bsd/netinet/in_systm.h | 15 +- bsd/netinet/in_var.h | 49 +- bsd/netinet/ip.h | 4 + bsd/netinet/ip6.h | 18 +- bsd/netinet/ip_compat.h | 15 +- bsd/netinet/ip_divert.c | 300 +- bsd/netinet/ip_divert.h | 92 + bsd/netinet/ip_dummynet.c | 877 ++- bsd/netinet/ip_dummynet.h | 225 +- bsd/netinet/ip_ecn.h | 10 +- bsd/netinet/ip_encap.c | 10 +- bsd/netinet/ip_encap.h | 28 +- bsd/netinet/ip_flow.c | 4 +- bsd/netinet/ip_flow.h | 4 +- bsd/netinet/ip_fw.h | 21 +- bsd/netinet/ip_fw2.c | 3324 ++++++++ bsd/netinet/ip_fw2.h | 443 ++ bsd/netinet/ip_fw2_compat.c | 2253 ++++++ bsd/netinet/ip_fw2_compat.h | 375 + bsd/netinet/ip_icmp.c | 86 +- bsd/netinet/ip_icmp.h | 10 +- bsd/netinet/ip_id.c | 10 +- bsd/netinet/ip_input.c | 712 +- bsd/netinet/ip_mroute.c | 66 +- bsd/netinet/ip_mroute.h | 29 +- bsd/netinet/ip_output.c | 675 +- bsd/netinet/ip_var.h | 98 +- bsd/netinet/kpi_ipfilter.c | 496 ++ bsd/netinet/kpi_ipfilter.h | 193 + .../table.h => netinet/kpi_ipfilter_var.h} | 46 +- bsd/netinet/raw_ip.c | 247 +- bsd/netinet/tcp.h | 45 +- bsd/netinet/tcp_debug.h | 9 +- bsd/netinet/tcp_fsm.h | 10 +- bsd/netinet/tcp_input.c | 514 +- bsd/netinet/tcp_output.c | 146 +- bsd/netinet/tcp_seq.h | 10 +- bsd/netinet/tcp_subr.c | 217 +- bsd/netinet/tcp_timer.c | 236 +- bsd/netinet/tcp_timer.h | 20 +- bsd/netinet/tcp_usrreq.c | 105 +- bsd/netinet/tcp_var.h | 268 +- bsd/netinet/tcpip.h | 2 +- bsd/netinet/udp_usrreq.c | 494 +- bsd/netinet/udp_var.h | 28 +- bsd/netinet6/Makefile | 27 +- bsd/netinet6/ah.h | 32 +- bsd/netinet6/ah6.h | 18 +- bsd/netinet6/ah_core.c | 91 +- bsd/netinet6/ah_input.c | 53 +- bsd/netinet6/ah_output.c | 2 +- bsd/netinet6/dest6.c | 6 +- bsd/netinet6/esp.h | 48 +- bsd/netinet6/esp6.h | 16 +- bsd/netinet6/esp_core.c | 112 +- bsd/netinet6/esp_input.c | 49 +- bsd/netinet6/esp_output.c | 4 +- bsd/netinet6/esp_rijndael.c | 397 +- bsd/netinet6/esp_rijndael.h | 19 +- bsd/netinet6/frag6.c | 42 +- bsd/netinet6/icmp6.c | 168 +- bsd/netinet6/in6.c | 463 +- bsd/netinet6/in6.h | 305 +- bsd/netinet6/in6_gif.c | 14 +- bsd/netinet6/in6_gif.h | 12 +- bsd/netinet6/in6_ifattach.c | 220 +- bsd/netinet6/in6_ifattach.h | 22 +- bsd/netinet6/in6_pcb.c | 367 +- bsd/netinet6/in6_pcb.h | 66 +- bsd/netinet6/in6_prefix.c | 147 +- bsd/netinet6/in6_prefix.h | 9 +- bsd/netinet6/in6_proto.c | 89 +- bsd/netinet6/in6_rmx.c | 106 +- bsd/netinet6/in6_src.c | 112 +- bsd/netinet6/in6_var.h | 60 +- bsd/netinet6/ip6_ecn.h | 10 +- bsd/netinet6/ip6_forward.c | 77 +- bsd/netinet6/ip6_fw.c | 1369 ++++ bsd/netinet6/ip6_fw.h | 12 +- bsd/netinet6/ip6_input.c | 284 +- bsd/netinet6/ip6_mroute.c | 181 +- bsd/netinet6/ip6_mroute.h | 44 +- bsd/netinet6/ip6_output.c | 237 +- bsd/netinet6/ip6_var.h | 120 +- bsd/netinet6/ip6protosw.h | 41 +- bsd/netinet6/ipcomp.h | 18 +- bsd/netinet6/ipcomp6.h | 12 +- bsd/netinet6/ipcomp_core.c | 11 +- bsd/netinet6/ipcomp_input.c | 20 +- bsd/netinet6/ipcomp_output.c | 11 +- bsd/netinet6/ipsec.c | 185 +- bsd/netinet6/ipsec.h | 95 +- bsd/netinet6/ipsec6.h | 46 +- bsd/netinet6/mld6.c | 38 +- bsd/netinet6/mld6_var.h | 18 +- bsd/netinet6/nd6.c | 520 +- bsd/netinet6/nd6.h | 150 +- bsd/netinet6/nd6_nbr.c | 237 +- bsd/netinet6/nd6_rtr.c | 345 +- bsd/netinet6/pim6_var.h | 14 +- bsd/netinet6/raw_ip6.c | 126 +- bsd/netinet6/raw_ip6.h | 4 - bsd/netinet6/route6.c | 14 +- bsd/netinet6/scope6.c | 39 +- bsd/netinet6/scope6_var.h | 22 +- bsd/netinet6/tcp6_var.h | 16 +- bsd/netinet6/udp6_output.c | 25 +- bsd/netinet6/udp6_usrreq.c | 192 +- bsd/netinet6/udp6_var.h | 16 +- bsd/netkey/Makefile | 11 +- bsd/netkey/key.c | 743 +- bsd/netkey/key.h | 57 +- bsd/netkey/key_debug.c | 18 +- bsd/netkey/key_debug.h | 32 +- bsd/netkey/key_var.h | 11 +- bsd/netkey/keydb.c | 2 +- bsd/netkey/keydb.h | 28 +- bsd/netkey/keysock.c | 225 +- bsd/netkey/keysock.h | 16 +- bsd/nfs/Makefile | 4 +- bsd/nfs/krpc.h | 18 +- bsd/nfs/krpc_subr.c | 257 +- bsd/nfs/nfs.h | 826 +- bsd/nfs/nfs_bio.c | 1821 +++-- bsd/nfs/nfs_boot.c | 216 +- bsd/nfs/nfs_lock.c | 309 +- bsd/nfs/nfs_lock.h | 36 +- bsd/nfs/nfs_node.c | 323 +- bsd/nfs/nfs_nqlease.c | 1353 ---- bsd/nfs/nfs_serv.c | 4186 +++++----- bsd/nfs/nfs_socket.c | 1728 ++--- bsd/nfs/nfs_srvcache.c | 114 +- bsd/nfs/nfs_subs.c | 2294 ++++-- bsd/nfs/nfs_syscalls.c | 1296 ++-- bsd/nfs/nfs_vfsops.c | 1260 +-- bsd/nfs/nfs_vnops.c | 4475 ++++++----- bsd/nfs/nfsdiskless.h | 26 +- bsd/nfs/nfsm_subs.h | 433 +- bsd/nfs/nfsmount.h | 28 +- bsd/nfs/nfsnode.h | 205 +- bsd/nfs/nfsproto.h | 40 +- bsd/nfs/nfsrtt.h | 3 +- bsd/nfs/nfsrvcache.h | 5 +- bsd/nfs/nlminfo.h | 52 - bsd/nfs/nqnfs.h | 244 - bsd/nfs/rpcv2.h | 3 +- bsd/nfs/xdr_subs.h | 14 +- bsd/ppc/Makefile | 14 +- bsd/ppc/_types.h | 118 + bsd/ppc/disklabel.h | 10 +- bsd/ppc/endian.h | 74 +- bsd/ppc/exec.h | 10 +- bsd/ppc/param.h | 13 +- bsd/ppc/reboot.h | 6 +- bsd/ppc/reg.h | 6 +- bsd/ppc/setjmp.h | 61 +- bsd/ppc/signal.h | 28 +- bsd/ppc/spl.h | 55 - bsd/ppc/types.h | 46 +- bsd/ppc/ucontext.h | 39 +- bsd/ppc/user.h | 30 - bsd/ppc/vmparam.h | 8 +- bsd/sys/Makefile | 86 +- bsd/{miscfs/specfs/lockf.h => sys/_endian.h} | 118 +- bsd/sys/_types.h | 198 + bsd/sys/acct.h | 10 +- bsd/sys/aio.h | 34 +- bsd/sys/aio_kern.h | 32 +- bsd/sys/attr.h | 188 +- bsd/sys/audit.h | 208 - bsd/sys/buf.h | 576 +- bsd/sys/buf_internal.h | 252 + bsd/sys/cdefs.h | 183 +- bsd/sys/clist.h | 7 +- bsd/sys/conf.h | 144 +- bsd/sys/dirent.h | 52 +- bsd/sys/disk.h | 15 +- bsd/sys/disklabel.h | 3 +- bsd/sys/dkstat.h | 8 +- bsd/sys/domain.h | 55 +- bsd/sys/errno.h | 75 +- bsd/sys/ev.h | 17 +- bsd/sys/event.h | 178 +- bsd/sys/eventvar.h | 29 +- bsd/sys/exec.h | 33 +- bsd/sys/fcntl.h | 205 +- bsd/sys/file.h | 137 +- bsd/sys/file_internal.h | 208 + bsd/sys/filedesc.h | 35 +- bsd/sys/fsctl.h | 5 +- bsd/sys/fsevents.h | 88 + bsd/{net/if_slvar.h => sys/imgact.h} | 102 +- bsd/sys/ioctl.h | 6 +- bsd/sys/ioctl_compat.h | 17 +- bsd/sys/ipc.h | 136 +- bsd/sys/ipcs.h | 94 + bsd/sys/kauth.h | 652 ++ bsd/sys/kdebug.h | 31 +- bsd/sys/kern_audit.h | 285 - bsd/sys/kern_control.h | 515 +- bsd/sys/kern_event.h | 212 +- bsd/sys/kernel.h | 23 +- bsd/sys/kernel_types.h | 127 + bsd/sys/kpi_mbuf.h | 1127 +++ bsd/sys/kpi_socket.h | 375 + bsd/sys/kpi_socketfilter.h | 604 ++ bsd/sys/ktrace.h | 30 +- bsd/sys/loadable_fs.h | 3 - bsd/sys/lock.h | 91 +- bsd/sys/lockf.h | 73 +- bsd/sys/mach_swapon.h | 6 +- bsd/sys/malloc.h | 80 +- bsd/sys/mbuf.h | 264 +- bsd/sys/md5.h | 2 +- bsd/sys/mman.h | 184 +- bsd/sys/mount.h | 527 +- bsd/sys/mount_internal.h | 301 + bsd/sys/msg.h | 215 +- bsd/sys/mtio.h | 6 +- bsd/sys/namei.h | 180 +- bsd/sys/param.h | 19 +- bsd/sys/pipe.h | 157 + bsd/sys/poll.h | 38 +- bsd/sys/proc.h | 392 +- bsd/sys/proc_internal.h | 369 + bsd/sys/protosw.h | 152 +- bsd/sys/ptrace.h | 17 +- bsd/{net/netisr.h => sys/ptrace_internal.h} | 66 +- bsd/sys/queue.h | 4 +- bsd/sys/quota.h | 101 +- bsd/sys/random.h | 5 +- bsd/sys/reboot.h | 3 + bsd/sys/resource.h | 199 +- bsd/sys/resourcevar.h | 32 +- bsd/sys/select.h | 122 +- bsd/sys/sem.h | 297 +- bsd/sys/sem_internal.h | 208 + bsd/sys/semaphore.h | 4 +- bsd/sys/shm.h | 146 +- bsd/sys/shm_internal.h | 117 + bsd/sys/signal.h | 327 +- bsd/sys/signalvar.h | 80 +- bsd/sys/socket.h | 305 +- bsd/sys/socketvar.h | 226 +- bsd/sys/sockio.h | 48 +- bsd/sys/stat.h | 386 +- bsd/sys/sys_domain.h | 13 +- bsd/sys/syscall.h | 761 +- bsd/sys/sysctl.h | 308 +- bsd/sys/sysent.h | 75 + bsd/sys/syslimits.h | 8 +- bsd/sys/syslog.h | 151 +- bsd/sys/sysproto.h | 1610 ++++ bsd/sys/systm.h | 184 +- bsd/sys/table.h | 121 - bsd/sys/termios.h | 164 +- bsd/sys/time.h | 204 +- bsd/sys/timeb.h | 27 +- bsd/sys/times.h | 26 +- bsd/sys/tprintf.h | 11 +- bsd/sys/trace.h | 2 +- bsd/sys/tty.h | 146 +- bsd/sys/ttycom.h | 6 + bsd/sys/ttydefaults.h | 2 +- bsd/sys/types.h | 318 +- bsd/sys/ubc.h | 159 +- bsd/sys/ubc_internal.h | 154 + bsd/sys/ucontext.h | 65 +- bsd/sys/ucred.h | 55 +- bsd/sys/uio.h | 215 +- bsd/sys/uio_internal.h | 445 ++ bsd/sys/un.h | 43 +- bsd/sys/unistd.h | 72 +- bsd/sys/unpcb.h | 87 +- bsd/sys/user.h | 96 +- bsd/sys/utfconv.h | 13 +- bsd/sys/utsname.h | 16 +- bsd/sys/ux_exception.h | 4 + bsd/sys/version.h | 2 +- bsd/sys/vfs_context.h | 14 + bsd/sys/vm.h | 57 +- bsd/sys/vmmeter.h | 3 - bsd/sys/vnioctl.h | 60 +- bsd/sys/vnode.h | 788 +- bsd/sys/vnode_if.h | 1799 ++--- bsd/sys/vnode_internal.h | 370 + bsd/sys/vstat.h | 6 +- bsd/sys/wait.h | 138 +- bsd/sys/xattr.h | 74 + bsd/ufs/ffs/ffs_alloc.c | 238 +- bsd/ufs/ffs/ffs_balloc.c | 211 +- bsd/ufs/ffs/ffs_extern.h | 86 +- bsd/ufs/ffs/ffs_inode.c | 202 +- bsd/ufs/ffs/ffs_subr.c | 88 +- bsd/ufs/ffs/ffs_vfsops.c | 1067 +-- bsd/ufs/ffs/ffs_vnops.c | 393 +- bsd/ufs/ufs/Makefile | 2 +- bsd/ufs/ufs/inode.h | 12 +- bsd/ufs/ufs/lockf.h | 113 - bsd/ufs/ufs/quota.h | 26 +- bsd/ufs/ufs/ufs_attrlist.c | 666 +- bsd/ufs/ufs/ufs_bmap.c | 466 +- bsd/ufs/ufs/ufs_byte_order.c | 6 +- bsd/ufs/ufs/ufs_byte_order.h | 34 +- bsd/ufs/ufs/ufs_extern.h | 145 +- bsd/ufs/ufs/ufs_ihash.c | 45 +- bsd/ufs/ufs/ufs_inode.c | 32 +- bsd/ufs/ufs/ufs_lookup.c | 290 +- bsd/ufs/ufs/ufs_quota.c | 560 +- bsd/ufs/ufs/ufs_readwrite.c | 228 +- bsd/ufs/ufs/ufs_vfsops.c | 94 +- bsd/ufs/ufs/ufs_vnops.c | 1729 ++--- bsd/ufs/ufs/ufsmount.h | 4 - bsd/uuid/Makefile | 60 + bsd/uuid/uuid.h | 74 + bsd/uxkern/ux_exception.c | 99 +- bsd/vfs/kpi_vfs.c | 4626 ++++++++++++ bsd/vfs/vfs_attrlist.c | 1632 ++++ bsd/vfs/vfs_bio.c | 3710 ++++++--- bsd/vfs/vfs_cache.c | 1223 ++- bsd/vfs/vfs_cluster.c | 2661 ++++--- bsd/vfs/vfs_conf.c | 38 +- bsd/vfs/vfs_fsevents.c | 1402 ++++ bsd/vfs/vfs_init.c | 258 +- bsd/vfs/vfs_journal.c | 624 +- bsd/vfs/vfs_journal.h | 24 +- bsd/vfs/vfs_lookup.c | 734 +- bsd/vfs/vfs_quota.c | 849 ++- bsd/vfs/vfs_subr.c | 6727 +++++++++++------ bsd/vfs/vfs_support.c | 861 +-- bsd/vfs/vfs_support.h | 224 +- bsd/vfs/vfs_syscalls.c | 6379 +++++++++------- bsd/vfs/vfs_utfconv.c | 85 + bsd/vfs/vfs_vnops.c | 1042 ++- bsd/vfs/vfs_xattr.c | 2007 +++++ bsd/vfs/vnode_if.c | 998 +-- bsd/vfs/vnode_if.sh | 24 +- bsd/vfs/vnode_if.src | 698 +- bsd/vm/Makefile | 3 +- bsd/vm/dp_backing_file.c | 204 +- bsd/vm/vm_pager.h | 10 +- bsd/vm/vm_unix.c | 1263 +++- bsd/vm/vnode_pager.c | 268 +- bsd/vm/vnode_pager.h | 22 +- config/BSDKernel.exports | 4022 ++-------- config/BSDKernel.ppc.exports | 489 -- config/IOKit.exports | 379 +- config/IOKit.ppc.exports | 184 - config/IPFirewall.kext/Info.plist | 26 + config/Libkern.exports | 97 +- config/Libkern.ppc.exports | 2 + config/Mach.exports | 2042 +---- config/Mach.ppc.exports | 582 -- config/Makefile | 36 +- config/MasterVersion | 19 + config/System.kext/Info.plist | 8 +- .../PlugIns/AppleNMI.kext/Info.plist | 6 +- .../ApplePlatformFamily.kext/Info.plist | 6 +- .../PlugIns/BSDKernel.kext/Info.plist | 8 +- .../PlugIns/BSDKernel6.0.kext/Info.plist | 6 +- .../System.kext/PlugIns/IOKit.kext/Info.plist | 6 +- .../PlugIns/IOKit6.0.kext/Info.plist | 6 +- .../PlugIns/IONVRAMFamily.kext/Info.plist | 6 +- .../IOSystemManagement.kext/Info.plist | 6 +- .../PlugIns/Libkern.kext/Info.plist | 8 +- .../PlugIns/Libkern6.0.kext/Info.plist | 6 +- .../System.kext/PlugIns/Mach.kext/Info.plist | 8 +- .../PlugIns/Mach6.0.kext/Info.plist | 6 +- .../PlugIns/System6.0.kext/Info.plist | 8 +- .../PlugIns/Unsupported.kext/Info.plist | 32 + config/System6.0.exports | 5902 +-------------- config/System6.0.i386.exports | 108 +- config/System6.0.ppc.exports | 1047 --- config/Unsupported.exports | 268 + config/Unsupported.i386.exports | 8 + config/Unsupported.ppc.exports | 25 + config/newvers.pl | 110 + config/version.c | 42 + .../drvAppleIntelClock/AppleIntelClock.h | 2 - .../drvAppleIntelClock/IntelClock.cpp | 11 +- .../Drivers/platform/drvAppleNMI/AppleNMI.cpp | 22 +- .../drvApplePlatformExpert/AppleCPU.cpp | 22 +- iokit/IOKit/IOBufferMemoryDescriptor.h | 18 +- iokit/IOKit/IOCPU.h | 3 + iokit/IOKit/IOCatalogue.h | 2 + iokit/IOKit/IOCommand.h | 7 +- iokit/IOKit/IOCommandPool.h | 39 +- iokit/IOKit/IODeviceMemory.h | 19 +- iokit/IOKit/IODeviceTreeSupport.h | 5 + iokit/IOKit/IOEventSource.h | 2 +- iokit/IOKit/IOFilterInterruptEventSource.h | 6 +- iokit/IOKit/IOInterruptEventSource.h | 2 +- iokit/IOKit/IOKitDebug.h | 3 +- iokit/IOKit/IOKitKeys.h | 9 + iokit/IOKit/IOKitKeysPrivate.h | 1 + iokit/IOKit/IOLib.h | 8 +- iokit/IOKit/IOLocks.h | 234 +- iokit/IOKit/IOMemoryCursor.h | 187 +- iokit/IOKit/IOMemoryDescriptor.h | 130 +- iokit/IOKit/IOMessage.h | 11 +- iokit/IOKit/IONVRAM.h | 10 +- iokit/IOKit/IOPolledInterface.h | 93 + iokit/IOKit/IORangeAllocator.h | 40 +- iokit/IOKit/IORegistryEntry.h | 55 +- iokit/IOKit/IOReturn.h | 25 +- iokit/IOKit/IOService.h | 14 +- iokit/IOKit/IOServicePM.h | 240 +- iokit/IOKit/IOTimeStamp.h | 6 +- iokit/IOKit/IOTimerEventSource.h | 13 +- iokit/IOKit/IOTypes.h | 13 +- iokit/IOKit/IOUserClient.h | 17 +- iokit/IOKit/IOWorkLoop.h | 132 +- iokit/IOKit/Makefile | 6 +- iokit/IOKit/OSMessageNotification.h | 9 + iokit/IOKit/i386/IOSharedLockImp.h | 13 - iokit/IOKit/i386/Makefile | 2 +- iokit/IOKit/ppc/IOSharedLockImp.h | 37 - iokit/IOKit/ppc/Makefile | 2 +- iokit/IOKit/pwr_mgt/IOPM.h | 6 +- iokit/IOKit/pwr_mgt/IOPMlog.h | 149 +- iokit/IOKit/pwr_mgt/Makefile | 9 +- iokit/IOKit/pwr_mgt/RootDomain.h | 8 +- iokit/IOKit/system.h | 64 +- iokit/Kernel/IOBufferMemoryDescriptor.cpp | 191 +- iokit/Kernel/IOCPU.cpp | 83 +- iokit/Kernel/IOCatalogue.cpp | 77 +- iokit/Kernel/IOCommand.cpp | 2 +- iokit/Kernel/IOCommandQueue.cpp | 4 +- iokit/Kernel/IODeviceTreeSupport.cpp | 181 +- iokit/Kernel/IOInterruptController.cpp | 143 +- iokit/Kernel/IOInterruptEventSource.cpp | 8 +- iokit/Kernel/IOKitDebug.cpp | 19 +- iokit/Kernel/IOKitKernelInternal.h | 56 + iokit/Kernel/IOLib.c | 119 +- iokit/Kernel/IOLocks.cpp | 87 +- iokit/Kernel/IOMapper.cpp | 8 +- iokit/Kernel/IOMemoryCursor.cpp | 4 +- iokit/Kernel/IOMemoryDescriptor.cpp | 1410 ++-- iokit/Kernel/IONVRAM.cpp | 364 +- iokit/Kernel/IOPMPagingPlexus.cpp | 235 - iokit/Kernel/IOPMchangeNoteList.cpp | 4 +- iokit/Kernel/IOPMrootDomain.cpp | 153 +- iokit/Kernel/IOPlatformExpert.cpp | 57 +- iokit/Kernel/IORegistryEntry.cpp | 182 +- iokit/Kernel/IOService.cpp | 300 +- iokit/Kernel/IOServicePM.cpp | 321 +- iokit/Kernel/IOServicePrivate.h | 9 +- iokit/Kernel/IOStartIOKit.cpp | 17 +- iokit/Kernel/IOTimerEventSource.cpp | 122 +- iokit/Kernel/IOUserClient.cpp | 159 +- iokit/Kernel/IOWorkLoop.cpp | 18 +- iokit/Kernel/RootDomainUserClient.cpp | 70 +- iokit/Kernel/RootDomainUserClient.h | 6 + iokit/KernelConfigTables.cpp | 30 +- iokit/bsddev/IOKitBSDInit.cpp | 137 +- iokit/conf/Makefile.i386 | 27 + iokit/conf/Makefile.template | 19 +- iokit/conf/files | 2 +- iokit/conf/tools/Makefile | 8 +- iokit/conf/tools/newvers/Makefile | 49 - iokit/conf/tools/newvers/newvers.csh | 34 - iokit/conf/version.major | 1 - iokit/conf/version.minor | 1 - iokit/conf/version.variant | 1 - iokit/include/mach/mach.h | 2 +- iokit/mach-o/mach_header.h | 21 +- kgmacros | 680 +- libkern/Makefile | 16 +- libkern/c++/OSArray.cpp | 98 +- libkern/c++/OSCollection.cpp | 49 +- libkern/c++/OSDictionary.cpp | 86 +- libkern/c++/OSMetaClass.cpp | 32 +- libkern/c++/OSNumber.cpp | 15 +- libkern/c++/OSObjectAsm.s | 2 +- libkern/c++/OSOrderedSet.cpp | 88 +- libkern/c++/OSRuntime.cpp | 16 +- libkern/c++/OSSet.cpp | 77 +- libkern/c++/OSUnserialize.cpp | 2 +- libkern/c++/OSUnserializeXML.cpp | 10 +- libkern/c++/OSUnserializeXML.y | 10 +- .../test2/test2.pbproj/project.pbxproj | 30 +- .../TestSerialization/test2/test2_main.cpp | 124 +- libkern/conf/Makefile.i386 | 28 + libkern/conf/Makefile.template | 14 +- libkern/conf/files | 4 + libkern/conf/tools/Makefile | 8 +- libkern/conf/tools/newvers/Makefile | 49 - libkern/conf/tools/newvers/newvers.csh | 34 - libkern/conf/version.major | 1 - libkern/conf/version.minor | 1 - libkern/conf/version.variant | 1 - libkern/gen/OSAtomicOperations.c | 67 +- libkern/gen/OSDebug.cpp | 170 + libkern/i386/OSAtomic.s | 23 +- libkern/libkern/Makefile | 17 +- libkern/libkern/OSAtomic.h | 36 +- libkern/libkern/OSBase.h | 28 +- libkern/libkern/OSByteOrder.h | 20 +- .../trap.h => libkern/libkern/OSDebug.h | 26 +- libkern/libkern/OSMalloc.h | 73 + libkern/libkern/c++/OSArray.h | 24 +- libkern/libkern/c++/OSBoolean.h | 11 +- libkern/libkern/c++/OSCollection.h | 103 +- libkern/libkern/c++/OSDictionary.h | 18 + libkern/libkern/c++/OSLib.h | 2 + libkern/libkern/c++/OSMetaClass.h | 65 +- libkern/libkern/c++/OSNumber.h | 1 + libkern/libkern/c++/OSOrderedSet.h | 18 + libkern/libkern/c++/OSSet.h | 17 + libkern/libkern/i386/OSByteOrder.h | 20 +- bsd/i386/user.h => libkern/libkern/locks.h | 27 +- libkern/libkern/ppc/OSByteOrder.h | 18 +- libkern/libkern/sysctl.h | 108 + libkern/libkern/version.h.template | 94 + libkern/mach-o/loader.h | 107 +- libkern/mach-o/mach_header.h | 25 +- libkern/ppc/OSAtomic.s | 10 +- libkern/stdio/scanf.c | 660 ++ libkern/uuid/Makefile | 37 + libkern/uuid/uuid.c | 200 + libsa/catalogue.cpp | 50 +- libsa/conf/Makefile.i386 | 27 + libsa/conf/Makefile.template | 14 +- libsa/conf/tools/Makefile | 8 +- libsa/conf/tools/newvers/Makefile | 49 - libsa/conf/tools/newvers/newvers.csh | 34 - libsa/conf/version.major | 1 - libsa/conf/version.minor | 1 - libsa/conf/version.variant | 1 - libsa/dgraph.c | 32 +- libsa/dgraph.h | 2 + libsa/kext.cpp | 31 +- libsa/kld_patch.c | 62 +- libsa/kld_patch.h | 10 +- libsa/kmod.cpp | 2 +- libsa/libsa/Makefile | 7 +- libsa/libsa/i386/Makefile | 2 + libsa/libsa/mach/Makefile | 2 + libsa/libsa/ppc/Makefile | 2 + libsa/libsa/stdlib.h | 7 +- libsa/load.c | 128 +- libsa/mach_loader.h | 3 + libsa/malloc.c | 22 +- libsa/mkext.c | 52 +- libsa/ppc/setjmp.s | 8 +- makedefs/MakeInc.def | 84 +- makedefs/MakeInc.dir | 53 +- makedefs/MakeInc.rule | 514 +- osfmk/Makefile | 25 +- .../UserNotification/KUNCUserNotifications.c | 36 +- .../UserNotification/KUNCUserNotifications.h | 48 +- osfmk/UserNotification/UNDReply.defs | 2 +- osfmk/UserNotification/UNDRequest.defs | 6 +- osfmk/UserNotification/UNDTypes.h | 5 +- osfmk/conf/MASTER | 32 +- osfmk/conf/MASTER.i386 | 3 +- osfmk/conf/MASTER.ppc | 13 +- osfmk/conf/Makefile | 234 + osfmk/conf/Makefile.i386 | 36 + osfmk/conf/Makefile.ppc | 7 +- osfmk/conf/Makefile.template | 20 +- osfmk/conf/files | 26 +- osfmk/conf/files.i386 | 25 +- osfmk/conf/files.ppc | 9 +- osfmk/conf/kernelversion.major | 1 - osfmk/conf/kernelversion.minor | 1 - osfmk/conf/kernelversion.variant | 1 - osfmk/conf/tools/Makefile | 12 +- osfmk/conf/tools/kernel_newvers/Makefile | 49 - .../tools/kernel_newvers/kernel_newvers.csh | 39 - osfmk/conf/tools/newvers/Makefile | 49 - osfmk/conf/tools/newvers/newvers.csh | 33 - osfmk/conf/version.major | 1 - osfmk/conf/version.minor | 1 - osfmk/conf/version.variant | 1 - osfmk/console/i386/serial_console.c | 188 +- osfmk/console/i386/text_console.c | 60 +- osfmk/console/i386/video_scroll.c | 9 +- osfmk/console/panic_dialog.c | 1171 +-- osfmk/console/panic_image.c | 3713 +++++---- osfmk/console/panic_ui/README | 65 + osfmk/console/panic_ui/appleclut8.h | 51 + .../panic_ui/generated_files/panic_image.c | 1953 +++++ .../generated_files/rendered_numbers.c | 376 + osfmk/console/panic_ui/genimage.c | 1621 ++++ .../console/panic_ui/images/panic_dialog.tiff | Bin 0 -> 136036 bytes .../panic_ui/images/panic_dialogWHD.raw | Bin 0 -> 120366 bytes .../panic_ui/images/rendered_numbers.tiff | Bin 0 -> 3218 bytes .../panic_ui/images/rendered_numbersWHD.raw | Bin 0 -> 1425 bytes osfmk/console/panic_ui/qtif2kraw.c | 892 +++ osfmk/console/panic_ui/setupdialog.c | 359 + osfmk/console/panic_ui/systemCLUT.act | Bin 0 -> 768 bytes osfmk/console/ppc/serial_console.c | 134 +- osfmk/console/video_console.c | 187 +- osfmk/ddb/Makefile | 2 - osfmk/ddb/db_aout.c | 6 +- osfmk/ddb/db_break.c | 50 +- osfmk/ddb/db_break.h | 2 +- osfmk/ddb/db_command.c | 25 +- osfmk/ddb/db_command.h | 85 +- osfmk/ddb/db_examine.c | 80 +- osfmk/ddb/db_expr.c | 6 +- osfmk/ddb/db_ext_symtab.c | 14 +- osfmk/ddb/db_macro.c | 6 +- osfmk/ddb/db_output.c | 8 +- osfmk/ddb/db_output.h | 73 +- osfmk/ddb/db_print.c | 184 +- osfmk/ddb/db_sym.c | 76 +- osfmk/ddb/db_sym.h | 144 +- osfmk/ddb/db_task_thread.c | 46 +- osfmk/ddb/db_task_thread.h | 16 +- osfmk/ddb/db_trap.c | 10 +- osfmk/ddb/db_variables.c | 20 +- osfmk/ddb/db_variables.h | 2 +- osfmk/ddb/db_watch.c | 90 +- osfmk/ddb/db_write_cmd.c | 88 +- osfmk/ddb/tr.c | 2 +- osfmk/default_pager/Makefile | 1 + osfmk/default_pager/default_pager.c | 58 +- osfmk/default_pager/default_pager_internal.h | 48 +- osfmk/default_pager/default_pager_object.defs | 4 + osfmk/default_pager/default_pager_types.defs | 87 +- osfmk/default_pager/default_pager_types.h | 26 +- osfmk/default_pager/diag.h | 4 +- osfmk/default_pager/dp_backing_store.c | 460 +- osfmk/default_pager/dp_memory_object.c | 354 +- osfmk/device/device.defs | 11 + osfmk/device/device_init.c | 13 +- osfmk/device/device_port.h | 2 +- osfmk/device/device_types.h | 6 +- osfmk/device/iokit_rpc.c | 17 +- osfmk/device/subrs.c | 43 + osfmk/i386/AT386/asm_startup.h | 270 - osfmk/i386/AT386/bbclock.c | 47 +- osfmk/i386/AT386/bbclock_entries.h | 1 + osfmk/i386/AT386/conf.c | 6 +- osfmk/i386/AT386/himem.c | 14 +- osfmk/i386/AT386/machdep.mk | 3 +- osfmk/i386/AT386/model_dep.c | 96 +- osfmk/i386/Makefile | 14 +- osfmk/i386/acpi.c | 109 + bsd/machine/proc.h => osfmk/i386/acpi.h | 28 +- osfmk/i386/acpi_wakeup.s | 381 + osfmk/i386/apic.h | 38 +- osfmk/i386/asm.h | 11 +- osfmk/i386/ast_check.c | 1 - osfmk/i386/bcopy.s | 51 + osfmk/i386/bsd_i386.c | 442 +- osfmk/i386/commpage/atomic.s | 149 + osfmk/i386/commpage/commpage.c | 226 +- osfmk/i386/commpage/commpage.h | 9 + .../commpage/commpage_mach_absolute_time.s | 87 +- osfmk/i386/commpage/commpage_sigs.h | 57 - osfmk/i386/commpage/commpage_sigs.s | 69 - osfmk/i386/commpage/spinlocks.s | 18 +- osfmk/i386/cpu.c | 194 +- osfmk/i386/cpu_capabilities.h | 130 +- osfmk/i386/cpu_data.h | 221 +- osfmk/i386/cpu_number.h | 11 +- osfmk/i386/cpu_threads.c | 110 + osfmk/i386/cpu_threads.h | 56 + osfmk/i386/cpuid.c | 504 +- osfmk/i386/cpuid.h | 150 +- osfmk/i386/cswitch.s | 59 +- osfmk/i386/db_interface.c | 125 +- osfmk/i386/db_machdep.h | 2 +- osfmk/i386/db_trace.c | 110 +- osfmk/i386/endian.h | 20 +- osfmk/i386/fpu.c | 249 +- osfmk/i386/fpu.h | 27 +- osfmk/i386/gdt.c | 24 +- osfmk/i386/genassym.c | 175 +- osfmk/i386/hardclock.c | 235 - osfmk/i386/hardclock_entries.h | 50 - osfmk/i386/hw_lock_types.h | 9 +- osfmk/i386/i386_init.c | 66 +- osfmk/i386/i386_lock.s | 902 +-- osfmk/i386/i386_vm_init.c | 328 +- osfmk/i386/io_emulate.c | 20 +- osfmk/i386/io_map.c | 7 +- osfmk/i386/io_map_entries.h | 9 +- osfmk/i386/iopb.c | 14 +- osfmk/i386/iopb_entries.h | 2 +- osfmk/i386/ipl.h | 10 - osfmk/i386/ldt.c | 22 +- osfmk/i386/lock.h | 170 +- osfmk/i386/locks.h | 144 + osfmk/i386/locks_i386.c | 1870 +++++ osfmk/i386/locore.s | 658 +- osfmk/i386/loose_ends.c | 719 +- osfmk/i386/mach_param.h | 58 - osfmk/i386/machdep_call.c | 63 +- osfmk/i386/machdep_call.h | 20 +- osfmk/i386/machine_cpu.h | 26 +- osfmk/i386/machine_routines.c | 207 +- osfmk/i386/machine_routines.h | 25 +- osfmk/i386/machine_routines_asm.s | 250 - osfmk/i386/machparam.h | 8 - osfmk/i386/mcount.s | 20 +- osfmk/i386/misc_protos.h | 38 +- osfmk/i386/mp.c | 684 +- osfmk/i386/mp.h | 182 +- osfmk/i386/mp_desc.c | 259 +- osfmk/i386/mp_desc.h | 104 +- osfmk/i386/mp_events.h | 15 +- osfmk/i386/mp_slave_boot.h | 2 +- osfmk/i386/mp_slave_boot.s | 35 +- osfmk/i386/mtrr.c | 643 ++ bsd/ppc/cpu.h => osfmk/i386/mtrr.h | 48 +- osfmk/i386/pcb.c | 407 +- osfmk/i386/perfmon.c | 552 ++ osfmk/i386/perfmon.h | 305 + osfmk/i386/phys.c | 124 +- osfmk/i386/pio.h | 1 - osfmk/i386/pit.h | 2 +- osfmk/i386/pmap.c | 1606 ++-- osfmk/i386/pmap.h | 311 +- osfmk/i386/postcode.h | 160 + osfmk/i386/proc_reg.h | 103 +- osfmk/i386/read_fault.c | 45 +- osfmk/i386/rtclock.c | 1379 ++-- osfmk/i386/rtclock_entries.h | 9 +- osfmk/i386/seg.h | 105 +- osfmk/{kern/time_out.h => i386/simple_lock.h} | 77 +- osfmk/i386/start.s | 495 +- osfmk/i386/thread.h | 173 +- osfmk/i386/thread_act.h | 185 - osfmk/i386/trap.c | 189 +- osfmk/i386/trap.h | 11 +- osfmk/i386/user_ldt.c | 48 +- osfmk/i386/user_ldt.h | 2 +- osfmk/i386/xpr.h | 9 - osfmk/ipc/ipc_entry.c | 10 +- osfmk/ipc/ipc_entry.h | 29 +- osfmk/ipc/ipc_hash.c | 41 +- osfmk/ipc/ipc_hash.h | 9 +- osfmk/ipc/ipc_init.c | 33 +- osfmk/ipc/ipc_kmsg.c | 1201 ++- osfmk/ipc/ipc_kmsg.h | 85 +- osfmk/ipc/ipc_mqueue.c | 152 +- osfmk/ipc/ipc_mqueue.h | 42 +- osfmk/ipc/ipc_object.c | 31 +- osfmk/ipc/ipc_object.h | 32 +- osfmk/ipc/ipc_port.c | 103 +- osfmk/ipc/ipc_port.h | 40 +- osfmk/ipc/ipc_print.h | 65 +- osfmk/ipc/ipc_pset.c | 9 +- osfmk/ipc/ipc_pset.h | 16 +- osfmk/ipc/ipc_right.c | 34 +- osfmk/ipc/ipc_space.c | 5 +- osfmk/ipc/ipc_space.h | 13 +- osfmk/ipc/ipc_splay.c | 58 +- osfmk/ipc/ipc_table.c | 93 +- osfmk/ipc/ipc_table.h | 103 +- osfmk/ipc/ipc_types.h | 50 +- osfmk/ipc/mach_debug.c | 137 +- osfmk/ipc/mach_msg.c | 349 +- osfmk/ipc/mach_port.c | 118 +- osfmk/ipc/port.h | 7 +- osfmk/kdp/kdp.c | 9 +- osfmk/kdp/kdp_core.h | 28 +- osfmk/kdp/kdp_internal.h | 26 +- osfmk/kdp/kdp_udp.c | 40 +- osfmk/kdp/ml/i386/kdp_machdep.c | 95 +- osfmk/kdp/ml/i386/kdp_vm.c | 4 +- osfmk/kdp/ml/ppc/kdp_machdep.c | 22 +- osfmk/kdp/ml/ppc/kdp_vm.c | 102 +- osfmk/kern/Makefile | 7 +- osfmk/kern/assert.h | 9 +- osfmk/kern/ast.c | 87 +- osfmk/kern/ast.h | 65 +- osfmk/kern/bsd_kern.c | 275 +- osfmk/kern/clock.c | 213 +- osfmk/kern/clock.h | 176 +- osfmk/kern/counters.c | 71 +- osfmk/kern/counters.h | 85 +- osfmk/kern/cpu_data.c | 33 - osfmk/kern/cpu_data.h | 35 +- osfmk/kern/cpu_number.h | 10 +- osfmk/kern/debug.c | 29 +- osfmk/kern/debug.h | 22 +- osfmk/kern/etap.c | 1866 ----- osfmk/kern/etap_macros.h | 456 -- osfmk/kern/etap_map.c | 174 - osfmk/kern/etap_map.h | 84 - osfmk/kern/etap_options.h | 102 - osfmk/kern/etap_pool.c | 224 - osfmk/kern/etap_pool.h | 107 - osfmk/kern/exception.c | 88 +- osfmk/kern/exception.h | 10 +- osfmk/kern/host.c | 425 +- osfmk/kern/host.h | 23 +- osfmk/kern/host_notify.c | 12 +- osfmk/kern/host_statistics.h | 15 +- osfmk/kern/ipc_host.c | 19 +- osfmk/kern/ipc_kobject.c | 196 +- osfmk/kern/ipc_kobject.h | 7 +- osfmk/kern/ipc_mig.c | 124 +- osfmk/kern/ipc_mig.h | 57 +- osfmk/kern/ipc_tt.c | 1042 ++- osfmk/kern/ipc_tt.h | 62 +- osfmk/kern/kalloc.c | 381 +- osfmk/kern/kalloc.h | 52 +- osfmk/kern/kern_types.h | 58 +- osfmk/kern/kmod.c | 208 +- osfmk/kern/ledger.c | 52 +- osfmk/kern/ledger.h | 26 +- osfmk/kern/lock.c | 2384 ------ osfmk/kern/lock.h | 303 +- osfmk/kern/lock_mon.c | 415 - osfmk/kern/locks.c | 1055 +++ osfmk/kern/locks.h | 409 + osfmk/kern/mach_clock.c | 91 +- osfmk/kern/mach_param.h | 12 +- osfmk/kern/machine.c | 217 +- osfmk/kern/machine.h | 85 +- osfmk/kern/misc_protos.h | 88 +- osfmk/kern/mk_sp.c | 687 +- osfmk/kern/mk_sp.h | 67 - osfmk/kern/mk_timer.c | 43 +- osfmk/kern/mk_timer.h | 1 - osfmk/kern/norma_protos.h | 74 +- osfmk/kern/printf.c | 28 +- osfmk/kern/priority.c | 230 +- osfmk/kern/processor.c | 478 +- osfmk/kern/processor.h | 97 +- .../kern/processor_data.c | 27 +- osfmk/kern/processor_data.h | 80 + osfmk/kern/profile.c | 76 +- osfmk/kern/profile.h | 25 +- osfmk/kern/queue.h | 105 +- osfmk/kern/sched.h | 101 +- osfmk/kern/{mach_factor.c => sched_average.c} | 102 +- osfmk/kern/sched_prim.c | 1387 ++-- osfmk/kern/sched_prim.h | 283 +- osfmk/kern/simple_lock.h | 328 +- osfmk/kern/simple_lock_types.h | 285 - osfmk/kern/spl.h | 4 +- osfmk/kern/sscanf.c | 93 - osfmk/kern/stack.c | 470 ++ osfmk/kern/startup.c | 233 +- osfmk/kern/startup.h | 53 +- osfmk/kern/sync_lock.c | 121 +- osfmk/kern/sync_lock.h | 62 +- osfmk/kern/sync_sema.c | 360 +- osfmk/kern/syscall_emulation.c | 112 +- osfmk/kern/syscall_emulation.h | 96 - osfmk/kern/syscall_subr.c | 284 +- osfmk/kern/syscall_subr.h | 24 +- osfmk/kern/syscall_sw.c | 282 +- osfmk/kern/syscall_sw.h | 30 +- osfmk/kern/task.c | 1009 ++- osfmk/kern/task.h | 175 +- osfmk/kern/task_policy.c | 26 +- osfmk/kern/task_swap.c | 5 +- osfmk/kern/thread.c | 1445 ++-- osfmk/kern/thread.h | 454 +- osfmk/kern/thread_act.c | 1374 +--- osfmk/kern/thread_act.h | 53 - osfmk/kern/thread_call.c | 70 +- osfmk/kern/thread_call.h | 105 +- osfmk/kern/thread_policy.c | 68 +- osfmk/kern/thread_swap.c | 195 - osfmk/kern/thread_swap.h | 53 - osfmk/kern/timer.c | 551 +- osfmk/kern/timer.h | 189 +- osfmk/kern/timer_call.c | 33 +- osfmk/kern/wait_queue.c | 129 +- osfmk/kern/wait_queue.h | 80 +- osfmk/kern/xpr.c | 192 +- osfmk/kern/xpr.h | 118 +- osfmk/kern/zalloc.c | 210 +- osfmk/kern/zalloc.h | 166 +- osfmk/libsa/string.h | 46 +- osfmk/libsa/types.h | 2 + osfmk/mach-o/loader.h | 4 + osfmk/mach-o/mach_header.c | 147 +- osfmk/mach-o/mach_header.h | 23 +- osfmk/mach/AT386/machdep.mk | 35 - osfmk/mach/Makefile | 26 +- osfmk/mach/boolean.h | 73 +- osfmk/mach/boot_info.h | 253 - osfmk/mach/clock_types.defs | 4 +- osfmk/mach/clock_types.h | 19 +- osfmk/mach/error.h | 92 +- osfmk/mach/etap.h | 276 - osfmk/mach/etap_events.h | 347 - osfmk/mach/events_info.h | 57 +- osfmk/mach/exception.h | 9 - osfmk/mach/exception_types.h | 3 +- osfmk/mach/host_info.h | 83 +- osfmk/mach/host_notify.h | 8 - osfmk/mach/host_notify_reply.defs | 8 - osfmk/mach/host_priv.defs | 22 +- osfmk/mach/host_reboot.h | 28 +- osfmk/mach/host_special_ports.h | 1 - osfmk/mach/i386/Makefile | 30 +- osfmk/mach/i386/boolean.h | 48 - osfmk/mach/i386/exception.h | 75 - osfmk/mach/i386/flipc_dep.h | 39 - osfmk/mach/i386/fp_reg.h | 51 +- osfmk/mach/i386/kern_return.h | 52 +- osfmk/mach/i386/mach_i386.defs | 155 - osfmk/mach/i386/mach_i386_types.h | 111 - osfmk/mach/i386/machine_types.defs | 21 +- osfmk/mach/i386/ndr_def.h | 30 - osfmk/mach/i386/processor_info.h | 1 - osfmk/mach/i386/rpc.h | 8 +- osfmk/mach/i386/syscall_sw.h | 108 +- osfmk/mach/i386/thread_state.h | 24 +- osfmk/mach/i386/thread_status.h | 50 +- osfmk/mach/i386/vm_param.h | 97 +- osfmk/mach/i386/vm_types.h | 91 +- osfmk/mach/kern_return.h | 5 - osfmk/mach/kmod.h | 70 +- osfmk/mach/mach_host.defs | 18 +- osfmk/mach/mach_interface.h | 31 +- osfmk/mach/mach_param.h | 76 - osfmk/mach/mach_port.defs | 8 +- osfmk/mach/mach_syscalls.h | 20 +- osfmk/mach/mach_time.h | 26 +- osfmk/mach/mach_traps.h | 525 +- osfmk/mach/mach_types.defs | 84 +- osfmk/mach/mach_types.h | 39 +- osfmk/mach/mach_vm.defs | 441 ++ osfmk/mach/machine.h | 97 +- osfmk/mach/machine/asm.h | 7 +- osfmk/mach/machine/boolean.h | 7 +- osfmk/mach/machine/exception.h | 7 +- osfmk/mach/machine/kern_return.h | 7 +- osfmk/mach/machine/machine_types.defs | 7 +- osfmk/mach/machine/ndr_def.h | 7 +- osfmk/mach/machine/processor_info.h | 7 +- osfmk/mach/machine/rpc.h | 9 +- osfmk/mach/machine/syscall_sw.h | 11 +- osfmk/mach/machine/thread_state.h | 7 +- osfmk/mach/machine/thread_status.h | 7 +- osfmk/mach/machine/vm_param.h | 7 +- osfmk/mach/machine/vm_types.h | 7 +- osfmk/mach/memory_object.defs | 19 +- osfmk/mach/memory_object.h | 29 +- osfmk/mach/memory_object_control.defs | 2 + osfmk/mach/memory_object_types.h | 380 +- osfmk/mach/message.h | 111 +- osfmk/mach/mig.h | 90 +- osfmk/mach/mig_errors.h | 12 +- osfmk/mach/mk_timer.h | 9 +- osfmk/mach/ndr.h | 7 +- osfmk/mach/notify.h | 4 - osfmk/mach/policy.h | 53 +- osfmk/mach/port.h | 208 +- osfmk/mach/ppc/Makefile | 2 +- osfmk/mach/ppc/_types.h | 226 + osfmk/mach/ppc/boolean.h | 73 +- osfmk/mach/ppc/exception.h | 3 - osfmk/mach/ppc/kern_return.h | 71 +- osfmk/mach/ppc/machine_types.defs | 51 +- osfmk/mach/ppc/ndr_def.h | 27 - osfmk/mach/ppc/processor_info.h | 30 +- osfmk/mach/ppc/rpc.h | 17 +- osfmk/mach/ppc/syscall_sw.h | 22 +- osfmk/mach/ppc/thread_state.h | 5 +- osfmk/mach/ppc/thread_status.h | 202 +- osfmk/mach/ppc/vm_param.h | 66 +- osfmk/mach/ppc/vm_types.h | 101 +- osfmk/mach/processor_info.h | 29 +- osfmk/mach/rpc.h | 6 - osfmk/mach/semaphore.h | 25 +- osfmk/mach/shared_memory_server.h | 83 +- osfmk/mach/std_types.h | 6 +- osfmk/mach/sync_policy.h | 16 +- osfmk/mach/syscall_sw.h | 33 +- osfmk/mach/task.defs | 17 +- osfmk/mach/task_info.h | 108 +- osfmk/mach/task_ledger.h | 7 +- osfmk/mach/task_policy.h | 17 +- osfmk/mach/task_special_ports.h | 8 +- osfmk/mach/thread_act.defs | 10 +- osfmk/mach/thread_info.h | 15 +- osfmk/mach/thread_policy.h | 29 +- osfmk/mach/thread_status.h | 6 +- osfmk/mach/thread_switch.h | 8 +- osfmk/mach/time_value.h | 32 +- osfmk/mach/upl.defs | 14 +- osfmk/mach/vm_attributes.h | 6 +- osfmk/mach/vm_inherit.h | 6 +- osfmk/mach/vm_map.defs | 42 +- osfmk/mach/vm_param.h | 109 +- osfmk/mach/vm_prot.h | 9 +- osfmk/mach/vm_purgable.h | 55 + osfmk/mach/vm_region.h | 105 +- osfmk/mach/vm_statistics.h | 90 +- osfmk/mach/vm_sync.h | 7 +- osfmk/mach/vm_types.h | 56 +- osfmk/mach_debug/Makefile | 2 +- osfmk/mach_debug/hash_info.h | 48 +- osfmk/mach_debug/ipc_info.h | 62 +- osfmk/mach_debug/lockgroup_info.h | 69 + osfmk/mach_debug/mach_debug_types.defs | 110 +- osfmk/mach_debug/mach_debug_types.h | 1 + osfmk/mach_debug/page_info.h | 44 +- osfmk/mach_debug/vm_info.h | 55 +- osfmk/mach_debug/zone_info.h | 66 +- osfmk/machine/Makefile | 27 +- osfmk/machine/cpu_capabilities.h | 16 +- osfmk/machine/cpu_number.h | 4 + osfmk/machine/disk.h | 35 - osfmk/machine/gdb_defs.h | 35 - osfmk/machine/hw_lock_types.h | 35 - osfmk/machine/io_map_entries.h | 4 + osfmk/machine/iobus.h | 35 - osfmk/machine/kgdb_defs.h | 35 - osfmk/machine/kgdb_setjmp.h | 35 - osfmk/machine/lock.h | 4 + osfmk/machine/{spl.h => locks.h} | 12 +- osfmk/machine/mach_param.h | 35 - .../label_t.h => osfmk/machine/machine_cpu.h | 12 +- osfmk/machine/machine_rpc.h | 2 +- .../cpu.h => osfmk/machine/simple_lock.h | 13 +- osfmk/machine/thread_act.h | 35 - osfmk/man/host_basic_info.html | 2 +- osfmk/ppc/AltiAssist.s | 1 - osfmk/ppc/Diagnostics.c | 169 +- osfmk/ppc/Diagnostics.h | 19 +- osfmk/ppc/Emulate.s | 32 +- osfmk/ppc/Emulate64.s | 49 +- osfmk/ppc/Firmware.s | 7 +- osfmk/ppc/FirmwareC.c | 1 - osfmk/ppc/Makefile | 11 +- osfmk/ppc/PPCcalls.c | 1 - osfmk/ppc/PseudoKernel.c | 163 +- osfmk/ppc/_setjmp.s | 8 +- osfmk/ppc/aligned_data.s | 82 +- osfmk/ppc/asm.h | 14 +- osfmk/ppc/ast.h | 5 +- osfmk/ppc/atomic_switch.s | 6 +- osfmk/ppc/bcopy.s | 837 +- osfmk/ppc/bcopytest.c | 2 - osfmk/ppc/cache.s | 108 + osfmk/ppc/chud/chud_cpu.c | 982 ++- osfmk/ppc/chud/chud_cpu_asm.h | 80 +- osfmk/ppc/chud/chud_cpu_asm.s | 761 +- osfmk/ppc/chud/chud_memory.c | 5 +- osfmk/ppc/chud/chud_osfmk_callback.c | 307 +- osfmk/ppc/chud/chud_spr.h | 1 + osfmk/ppc/chud/chud_thread.c | 921 ++- osfmk/ppc/chud/chud_xnu.h | 101 +- osfmk/ppc/chud/chud_xnu_private.h | 44 + osfmk/ppc/clock.h | 52 - osfmk/ppc/commpage/atomic.s | 274 + osfmk/ppc/commpage/bcopy_64.s | 7 +- osfmk/ppc/commpage/bcopy_970.s | 42 +- osfmk/ppc/commpage/bcopy_g3.s | 3 +- osfmk/ppc/commpage/bcopy_g4.s | 3 +- osfmk/ppc/commpage/bigcopy_970.s | 605 +- osfmk/ppc/commpage/bzero_128.s | 29 +- osfmk/ppc/commpage/bzero_32.s | 3 +- osfmk/ppc/commpage/cacheflush.s | 20 +- osfmk/ppc/commpage/commpage.c | 437 +- osfmk/ppc/commpage/commpage.h | 16 +- osfmk/ppc/commpage/commpage_asm.s | 31 +- osfmk/ppc/commpage/gettimeofday.s | 83 +- osfmk/ppc/commpage/mach_absolute_time.s | 22 +- osfmk/ppc/commpage/memset_64.s | 91 + osfmk/ppc/commpage/memset_g3.s | 127 + osfmk/ppc/commpage/memset_g4.s | 126 + osfmk/ppc/commpage/memset_g5.s | 163 + osfmk/ppc/commpage/pthread.s | 43 +- osfmk/ppc/commpage/spinlocks.s | 112 +- osfmk/ppc/console_feed.c | 2 +- osfmk/ppc/cpu.c | 1368 ++-- osfmk/ppc/cpu_capabilities.h | 70 +- osfmk/ppc/cpu_data.h | 55 +- osfmk/ppc/cpu_internal.h | 92 + osfmk/ppc/cpu_number.h | 8 +- osfmk/ppc/cswtch.s | 129 +- osfmk/ppc/db_interface.c | 37 +- osfmk/ppc/db_low_trace.c | 130 +- osfmk/ppc/db_machdep.h | 2 +- osfmk/ppc/db_trace.c | 110 +- osfmk/ppc/exception.h | 106 +- osfmk/ppc/fpu_protos.h | 6 +- osfmk/ppc/genassym.c | 342 +- osfmk/ppc/hw_exception.s | 267 +- osfmk/ppc/hw_lock.s | 1673 ++-- osfmk/ppc/hw_lock_types.h | 46 +- osfmk/ppc/hw_perfmon.c | 159 +- osfmk/ppc/hw_perfmon.h | 2 +- osfmk/ppc/hw_vm.s | 3656 ++++++++- osfmk/ppc/interrupt.c | 51 +- osfmk/ppc/io_map.c | 8 +- osfmk/ppc/io_map_entries.h | 3 + osfmk/ppc/lock.h | 57 +- osfmk/ppc/locks.h | 206 + osfmk/ppc/locks_ppc.c | 2054 +++++ osfmk/ppc/lowglobals.h | 18 +- osfmk/ppc/lowmem_vectors.s | 549 +- osfmk/ppc/mach_param.h | 57 - osfmk/ppc/machine_cpu.h | 20 +- osfmk/ppc/machine_routines.c | 626 +- osfmk/ppc/machine_routines.h | 311 +- osfmk/ppc/machine_routines_asm.s | 275 +- osfmk/ppc/mappings.c | 441 +- osfmk/ppc/mappings.h | 305 +- osfmk/ppc/mcount.s | 7 +- osfmk/ppc/mem.h | 7 +- osfmk/ppc/misc.c | 10 +- osfmk/ppc/misc_asm.s | 46 +- osfmk/ppc/misc_protos.h | 134 +- osfmk/ppc/model_dep.c | 95 +- osfmk/ppc/movc.s | 692 +- osfmk/ppc/new_screen.h | 5 +- osfmk/ppc/pcb.c | 353 +- osfmk/ppc/pmap.c | 764 +- osfmk/ppc/pmap.h | 133 +- osfmk/ppc/ppc_init.c | 223 +- osfmk/ppc/ppc_vm_init.c | 139 +- osfmk/ppc/proc_reg.h | 6 +- osfmk/ppc/rtclock.c | 382 +- osfmk/ppc/savearea.c | 7 +- osfmk/ppc/savearea.h | 35 +- osfmk/ppc/savearea_asm.s | 166 +- osfmk/ppc/serial_io.c | 33 +- osfmk/ppc/serial_io.h | 4 +- osfmk/ppc/simple_lock.h | 172 + osfmk/ppc/skiplists.s | 86 +- osfmk/ppc/start.s | 145 +- osfmk/ppc/status.c | 450 +- osfmk/ppc/thread.h | 154 +- osfmk/ppc/thread_act.h | 180 - osfmk/ppc/trap.c | 170 +- osfmk/ppc/trap.h | 13 +- osfmk/ppc/vmachmon.c | 796 +- osfmk/ppc/vmachmon.h | 90 +- osfmk/ppc/vmachmon_asm.s | 102 +- osfmk/profiling/i386/profile-asm.s | 6 +- osfmk/profiling/i386/profile-md.c | 2 +- osfmk/sys/types.h | 3 +- osfmk/sys/version.h | 135 - osfmk/vm/Makefile | 10 +- osfmk/vm/bsd_vm.c | 592 +- osfmk/vm/cpm.h | 2 + osfmk/vm/device_vm.c | 172 +- osfmk/vm/memory_object.c | 1070 +-- osfmk/vm/memory_object.h | 26 +- osfmk/vm/pmap.h | 187 +- osfmk/vm/task_working_set.c | 399 +- osfmk/vm/task_working_set.h | 110 +- osfmk/vm/vm_debug.c | 113 +- osfmk/vm/vm_debug.h | 7 +- osfmk/vm/vm_external.c | 7 +- osfmk/vm/vm_fault.c | 498 +- osfmk/vm/vm_fault.h | 56 +- osfmk/vm/vm_init.c | 8 +- osfmk/vm/vm_kern.c | 505 +- osfmk/vm/vm_kern.h | 52 +- osfmk/vm/vm_map.c | 5889 +++++++++------ osfmk/vm/vm_map.h | 567 +- osfmk/vm/vm_object.c | 1199 ++- osfmk/vm/vm_object.h | 124 +- osfmk/vm/vm_page.h | 88 +- osfmk/vm/vm_pageout.c | 3973 +++++++--- osfmk/vm/vm_pageout.h | 142 +- osfmk/vm/vm_print.h | 8 +- osfmk/vm/vm_protos.h | 330 + osfmk/vm/vm_resident.c | 519 +- osfmk/vm/vm_shared_memory_server.c | 1874 ++++- osfmk/vm/vm_shared_memory_server.h | 83 +- osfmk/vm/vm_user.c | 3435 +++++---- pexpert/conf/Makefile.i386 | 16 +- pexpert/conf/Makefile.ppc | 1 - pexpert/conf/Makefile.template | 14 +- pexpert/conf/tools/Makefile | 8 +- pexpert/conf/tools/newvers/Makefile | 49 - pexpert/conf/tools/newvers/newvers.csh | 33 - pexpert/conf/version.major | 1 - pexpert/conf/version.minor | 1 - pexpert/conf/version.variant | 1 - pexpert/gen/bootargs.c | 33 +- pexpert/gen/device_tree.c | 15 +- pexpert/gen/pe_gen.c | 2 + pexpert/i386/fakePPCDeviceTree.c | 45 +- pexpert/i386/fakePPCDeviceTree.h | 9 +- pexpert/i386/fakePPCStructs.h | 26 +- pexpert/i386/kd.c | 8 +- pexpert/i386/pe_identify_machine.c | 8 +- pexpert/i386/pe_init.c | 82 +- pexpert/i386/pe_interrupt.c | 22 +- pexpert/i386/pe_kprintf.c | 13 +- pexpert/i386/pe_serial.c | 6 +- pexpert/pexpert/i386/protos.h | 1 - pexpert/pexpert/pexpert.h | 19 +- pexpert/pexpert/ppc/protos.h | 7 +- pexpert/pexpert/protos.h | 16 +- pexpert/ppc/pe_init.c | 1 - pexpert/ppc/pe_kprintf.c | 39 +- 1754 files changed, 236521 insertions(+), 155169 deletions(-) create mode 100644 EXTERNAL_HEADERS/Info.plist create mode 100644 EXTERNAL_HEADERS/Makefile create mode 100644 EXTERNAL_HEADERS/architecture/Makefile create mode 100644 EXTERNAL_HEADERS/architecture/i386/Makefile create mode 100644 EXTERNAL_HEADERS/architecture/ppc/Makefile delete mode 100644 EXTERNAL_HEADERS/bsd/i386/ansi.h delete mode 100644 EXTERNAL_HEADERS/bsd/ppc/ansi.h create mode 100644 EXTERNAL_HEADERS/i386/Makefile rename bsd/dev/disk_label.h => EXTERNAL_HEADERS/i386/_limits.h (81%) rename EXTERNAL_HEADERS/{bsd => }/i386/limits.h (97%) create mode 100644 EXTERNAL_HEADERS/mach-o/Makefile create mode 100644 EXTERNAL_HEADERS/machine/Makefile create mode 100644 EXTERNAL_HEADERS/ppc/Makefile rename bsd/dev/disk.h => EXTERNAL_HEADERS/ppc/_limits.h (81%) rename EXTERNAL_HEADERS/{bsd => }/ppc/limits.h (98%) delete mode 100644 bsd/conf/tools/newvers/Makefile delete mode 100644 bsd/conf/tools/newvers/newvers.csh delete mode 100644 bsd/conf/version.major delete mode 100644 bsd/conf/version.minor delete mode 100644 bsd/conf/version.variant rename bsd/crypto/{rijndael => aes}/Makefile (81%) create mode 100644 bsd/crypto/aes/aes.h create mode 100644 bsd/crypto/aes/aescrypt.c create mode 100644 bsd/crypto/aes/aeskey.c create mode 100644 bsd/crypto/aes/aesopt.h create mode 100644 bsd/crypto/aes/aestab.c create mode 100644 bsd/crypto/aes/aestab.h delete mode 100644 bsd/crypto/rijndael/boxes-fst.dat delete mode 100644 bsd/crypto/rijndael/rijndael-alg-fst.c delete mode 100644 bsd/crypto/rijndael/rijndael-alg-fst.h delete mode 100644 bsd/crypto/rijndael/rijndael-api-fst.c delete mode 100644 bsd/crypto/rijndael/rijndael-api-fst.h delete mode 100644 bsd/crypto/rijndael/rijndael.h delete mode 100644 bsd/crypto/rijndael/rijndael_local.h delete mode 100644 bsd/dev/i386/unix_startup.c create mode 100644 bsd/dev/ppc/munge.s rename bsd/dev/{ppc => }/unix_startup.c (55%) create mode 100644 bsd/hfs/hfs_fsctl.h delete mode 100644 bsd/hfs/hfs_lockf.c delete mode 100644 bsd/hfs/hfs_lockf.h create mode 100644 bsd/hfs/hfs_xattr.c delete mode 100644 bsd/hfs/hfscommon/Catalog/Catalog.c delete mode 100644 bsd/hfs/hfscommon/Catalog/CatalogIterators.c create mode 100644 bsd/i386/_types.h delete mode 100644 bsd/i386/cpu.h delete mode 100644 bsd/i386/label_t.h delete mode 100644 bsd/i386/spl.h delete mode 100644 bsd/i386/table.h create mode 100644 bsd/kern/kern_authorization.c create mode 100644 bsd/kern/kern_credential.c rename bsd/{ufs/ufs/ufs_lockf.c => kern/kern_lockf.c} (61%) create mode 100644 bsd/kern/kpi_mbuf.c create mode 100644 bsd/kern/kpi_socket.c create mode 100644 bsd/kern/kpi_socketfilter.c create mode 100755 bsd/kern/makesyscalls.sh create mode 100644 bsd/kern/sys_pipe.c create mode 100644 bsd/kern/syscalls.master create mode 100644 bsd/libkern/crc32.c delete mode 100644 bsd/libkern/inet_ntoa.c create mode 100644 bsd/libkern/inet_ntop.c rename bsd/machine/{unix_traps.h => _limits.h} (78%) rename bsd/machine/{table.h => _types.h} (78%) delete mode 100644 bsd/machine/user.h create mode 100644 bsd/man/man2/aio_cancel.2 create mode 100644 bsd/man/man2/aio_error.2 create mode 100644 bsd/man/man2/aio_read.2 create mode 100644 bsd/man/man2/aio_return.2 create mode 100644 bsd/man/man2/aio_suspend.2 create mode 100644 bsd/man/man2/aio_write.2 delete mode 100644 bsd/man/man2/brk.2 create mode 100644 bsd/man/man2/exchangedata.2 create mode 100644 bsd/man/man2/getattrlist.2 create mode 100644 bsd/man/man2/getdirentriesattr.2 create mode 100644 bsd/man/man2/getxattr.2 create mode 100644 bsd/man/man2/listxattr.2 create mode 100644 bsd/man/man2/poll.2 create mode 100644 bsd/man/man2/removexattr.2 delete mode 100644 bsd/man/man2/sbrk.2 create mode 100644 bsd/man/man2/searchfs.2 create mode 100644 bsd/man/man2/setattrlist.2 create mode 100644 bsd/man/man2/setxattr.2 create mode 100644 bsd/man/man4/dummynet.4 create mode 100644 bsd/man/man4/ifmib.4 delete mode 100644 bsd/miscfs/specfs/spec_lockf.c create mode 100644 bsd/net/devtimer.c create mode 100644 bsd/net/devtimer.h rename bsd/{ppc/label_t.h => net/ieee8023ad.h} (58%) create mode 100644 bsd/net/if_bond.c create mode 100644 bsd/net/if_bond_var.h create mode 100644 bsd/net/if_ether.h delete mode 100644 bsd/net/if_sppp.h delete mode 100644 bsd/net/if_stf.h delete mode 100644 bsd/net/if_tun.c delete mode 100644 bsd/net/if_tun.h delete mode 100644 bsd/net/if_tunvar.h create mode 100644 bsd/net/init.c create mode 100644 bsd/net/init.h create mode 100644 bsd/net/kpi_interface.c create mode 100644 bsd/net/kpi_interface.h create mode 100644 bsd/net/kpi_interfacefilter.c create mode 100644 bsd/net/kpi_interfacefilter.h create mode 100644 bsd/net/kpi_protocol.c create mode 100644 bsd/net/kpi_protocol.h create mode 100644 bsd/net/lacp.h create mode 100644 bsd/net/multicast_list.c rename bsd/{machine/ansi.h => net/multicast_list.h} (52%) delete mode 100644 bsd/net/netisr.c delete mode 100644 bsd/net/slcompress.c delete mode 100644 bsd/net/slcompress.h delete mode 100644 bsd/netinet/if_ether.c create mode 100644 bsd/netinet/in_arp.c create mode 100644 bsd/netinet/in_arp.h create mode 100644 bsd/netinet/ip_divert.h create mode 100644 bsd/netinet/ip_fw2.c create mode 100644 bsd/netinet/ip_fw2.h create mode 100644 bsd/netinet/ip_fw2_compat.c create mode 100644 bsd/netinet/ip_fw2_compat.h create mode 100644 bsd/netinet/kpi_ipfilter.c create mode 100644 bsd/netinet/kpi_ipfilter.h rename bsd/{ppc/table.h => netinet/kpi_ipfilter_var.h} (51%) create mode 100644 bsd/netinet6/ip6_fw.c delete mode 100644 bsd/nfs/nfs_nqlease.c delete mode 100644 bsd/nfs/nlminfo.h delete mode 100644 bsd/nfs/nqnfs.h create mode 100644 bsd/ppc/_types.h delete mode 100644 bsd/ppc/spl.h delete mode 100644 bsd/ppc/user.h rename bsd/{miscfs/specfs/lockf.h => sys/_endian.h} (53%) create mode 100644 bsd/sys/_types.h delete mode 100644 bsd/sys/audit.h create mode 100644 bsd/sys/buf_internal.h create mode 100644 bsd/sys/file_internal.h create mode 100644 bsd/sys/fsevents.h rename bsd/{net/if_slvar.h => sys/imgact.h} (54%) create mode 100644 bsd/sys/ipcs.h create mode 100644 bsd/sys/kauth.h delete mode 100644 bsd/sys/kern_audit.h create mode 100644 bsd/sys/kernel_types.h create mode 100644 bsd/sys/kpi_mbuf.h create mode 100644 bsd/sys/kpi_socket.h create mode 100644 bsd/sys/kpi_socketfilter.h create mode 100644 bsd/sys/mount_internal.h create mode 100644 bsd/sys/pipe.h create mode 100644 bsd/sys/proc_internal.h rename bsd/{net/netisr.h => sys/ptrace_internal.h} (63%) create mode 100644 bsd/sys/sem_internal.h create mode 100644 bsd/sys/shm_internal.h create mode 100644 bsd/sys/sysent.h create mode 100644 bsd/sys/sysproto.h delete mode 100644 bsd/sys/table.h create mode 100644 bsd/sys/ubc_internal.h create mode 100644 bsd/sys/uio_internal.h create mode 100644 bsd/sys/vfs_context.h create mode 100644 bsd/sys/vnode_internal.h create mode 100644 bsd/sys/xattr.h delete mode 100644 bsd/ufs/ufs/lockf.h create mode 100644 bsd/uuid/Makefile create mode 100644 bsd/uuid/uuid.h create mode 100644 bsd/vfs/kpi_vfs.c create mode 100644 bsd/vfs/vfs_attrlist.c create mode 100644 bsd/vfs/vfs_fsevents.c create mode 100644 bsd/vfs/vfs_xattr.c create mode 100644 config/IPFirewall.kext/Info.plist create mode 100644 config/MasterVersion create mode 100644 config/System.kext/PlugIns/Unsupported.kext/Info.plist create mode 100644 config/Unsupported.exports create mode 100644 config/Unsupported.i386.exports create mode 100644 config/Unsupported.ppc.exports create mode 100755 config/newvers.pl create mode 100644 config/version.c create mode 100644 iokit/IOKit/IOPolledInterface.h create mode 100644 iokit/Kernel/IOKitKernelInternal.h delete mode 100644 iokit/Kernel/IOPMPagingPlexus.cpp delete mode 100644 iokit/conf/tools/newvers/Makefile delete mode 100644 iokit/conf/tools/newvers/newvers.csh delete mode 100644 iokit/conf/version.major delete mode 100644 iokit/conf/version.minor delete mode 100644 iokit/conf/version.variant delete mode 100644 libkern/conf/tools/newvers/Makefile delete mode 100644 libkern/conf/tools/newvers/newvers.csh delete mode 100644 libkern/conf/version.major delete mode 100644 libkern/conf/version.minor delete mode 100644 libkern/conf/version.variant create mode 100644 libkern/gen/OSDebug.cpp rename bsd/machine/trap.h => libkern/libkern/OSDebug.h (71%) create mode 100644 libkern/libkern/OSMalloc.h rename bsd/i386/user.h => libkern/libkern/locks.h (79%) create mode 100644 libkern/libkern/sysctl.h create mode 100644 libkern/libkern/version.h.template create mode 100644 libkern/stdio/scanf.c create mode 100644 libkern/uuid/Makefile create mode 100644 libkern/uuid/uuid.c delete mode 100644 libsa/conf/tools/newvers/Makefile delete mode 100644 libsa/conf/tools/newvers/newvers.csh delete mode 100644 libsa/conf/version.major delete mode 100644 libsa/conf/version.minor delete mode 100644 libsa/conf/version.variant delete mode 100644 osfmk/conf/kernelversion.major delete mode 100644 osfmk/conf/kernelversion.minor delete mode 100644 osfmk/conf/kernelversion.variant delete mode 100644 osfmk/conf/tools/kernel_newvers/Makefile delete mode 100644 osfmk/conf/tools/kernel_newvers/kernel_newvers.csh delete mode 100644 osfmk/conf/tools/newvers/Makefile delete mode 100644 osfmk/conf/tools/newvers/newvers.csh delete mode 100644 osfmk/conf/version.major delete mode 100644 osfmk/conf/version.minor delete mode 100644 osfmk/conf/version.variant create mode 100644 osfmk/console/panic_ui/README create mode 100644 osfmk/console/panic_ui/appleclut8.h create mode 100644 osfmk/console/panic_ui/generated_files/panic_image.c create mode 100644 osfmk/console/panic_ui/generated_files/rendered_numbers.c create mode 100644 osfmk/console/panic_ui/genimage.c create mode 100644 osfmk/console/panic_ui/images/panic_dialog.tiff create mode 100644 osfmk/console/panic_ui/images/panic_dialogWHD.raw create mode 100644 osfmk/console/panic_ui/images/rendered_numbers.tiff create mode 100644 osfmk/console/panic_ui/images/rendered_numbersWHD.raw create mode 100644 osfmk/console/panic_ui/qtif2kraw.c create mode 100644 osfmk/console/panic_ui/setupdialog.c create mode 100644 osfmk/console/panic_ui/systemCLUT.act delete mode 100644 osfmk/i386/AT386/asm_startup.h create mode 100644 osfmk/i386/acpi.c rename bsd/machine/proc.h => osfmk/i386/acpi.h (66%) create mode 100644 osfmk/i386/acpi_wakeup.s create mode 100644 osfmk/i386/commpage/atomic.s create mode 100644 osfmk/i386/cpu_threads.c create mode 100644 osfmk/i386/cpu_threads.h delete mode 100644 osfmk/i386/hardclock.c delete mode 100644 osfmk/i386/hardclock_entries.h create mode 100644 osfmk/i386/locks.h create mode 100644 osfmk/i386/locks_i386.c delete mode 100644 osfmk/i386/mach_param.h create mode 100644 osfmk/i386/mtrr.c rename bsd/ppc/cpu.h => osfmk/i386/mtrr.h (51%) create mode 100644 osfmk/i386/perfmon.c create mode 100644 osfmk/i386/perfmon.h create mode 100644 osfmk/i386/postcode.h rename osfmk/{kern/time_out.h => i386/simple_lock.h} (55%) delete mode 100644 osfmk/i386/thread_act.h delete mode 100644 osfmk/kern/cpu_data.c delete mode 100644 osfmk/kern/etap.c delete mode 100644 osfmk/kern/etap_macros.h delete mode 100644 osfmk/kern/etap_map.c delete mode 100644 osfmk/kern/etap_map.h delete mode 100644 osfmk/kern/etap_options.h delete mode 100644 osfmk/kern/etap_pool.c delete mode 100644 osfmk/kern/etap_pool.h delete mode 100644 osfmk/kern/lock.c delete mode 100644 osfmk/kern/lock_mon.c create mode 100644 osfmk/kern/locks.c create mode 100644 osfmk/kern/locks.h delete mode 100644 osfmk/kern/mk_sp.h rename bsd/vm/vm_pageout.h => osfmk/kern/processor_data.c (65%) create mode 100644 osfmk/kern/processor_data.h rename osfmk/kern/{mach_factor.c => sched_average.c} (69%) delete mode 100644 osfmk/kern/simple_lock_types.h delete mode 100644 osfmk/kern/sscanf.c create mode 100644 osfmk/kern/stack.c delete mode 100644 osfmk/kern/syscall_emulation.h delete mode 100644 osfmk/kern/thread_act.h delete mode 100644 osfmk/kern/thread_swap.c delete mode 100644 osfmk/kern/thread_swap.h delete mode 100644 osfmk/mach/AT386/machdep.mk delete mode 100644 osfmk/mach/boot_info.h delete mode 100644 osfmk/mach/etap.h delete mode 100644 osfmk/mach/etap_events.h delete mode 100644 osfmk/mach/i386/mach_i386.defs delete mode 100644 osfmk/mach/i386/mach_i386_types.h create mode 100644 osfmk/mach/mach_vm.defs create mode 100644 osfmk/mach/ppc/_types.h create mode 100644 osfmk/mach/vm_purgable.h create mode 100644 osfmk/mach_debug/lockgroup_info.h delete mode 100644 osfmk/machine/disk.h delete mode 100644 osfmk/machine/gdb_defs.h delete mode 100644 osfmk/machine/hw_lock_types.h delete mode 100644 osfmk/machine/iobus.h delete mode 100644 osfmk/machine/kgdb_defs.h delete mode 100644 osfmk/machine/kgdb_setjmp.h rename osfmk/machine/{spl.h => locks.h} (83%) delete mode 100644 osfmk/machine/mach_param.h rename bsd/machine/label_t.h => osfmk/machine/machine_cpu.h (81%) rename bsd/machine/cpu.h => osfmk/machine/simple_lock.h (84%) delete mode 100644 osfmk/machine/thread_act.h create mode 100644 osfmk/ppc/chud/chud_xnu_private.h delete mode 100644 osfmk/ppc/clock.h create mode 100644 osfmk/ppc/commpage/atomic.s create mode 100644 osfmk/ppc/commpage/memset_64.s create mode 100644 osfmk/ppc/commpage/memset_g3.s create mode 100644 osfmk/ppc/commpage/memset_g4.s create mode 100644 osfmk/ppc/commpage/memset_g5.s create mode 100644 osfmk/ppc/cpu_internal.h create mode 100644 osfmk/ppc/locks.h create mode 100644 osfmk/ppc/locks_ppc.c delete mode 100644 osfmk/ppc/mach_param.h create mode 100644 osfmk/ppc/simple_lock.h delete mode 100644 osfmk/ppc/thread_act.h delete mode 100644 osfmk/sys/version.h create mode 100644 osfmk/vm/vm_protos.h delete mode 100644 pexpert/conf/tools/newvers/Makefile delete mode 100644 pexpert/conf/tools/newvers/newvers.csh delete mode 100644 pexpert/conf/version.major delete mode 100644 pexpert/conf/version.minor delete mode 100644 pexpert/conf/version.variant diff --git a/EXTERNAL_HEADERS/Info.plist b/EXTERNAL_HEADERS/Info.plist new file mode 100644 index 000000000..848b9fe75 --- /dev/null +++ b/EXTERNAL_HEADERS/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleIdentifier + com.apple.framework.kernel + CFBundleName + Kernel + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + FMWK + CFBundleShortVersionString + ###KERNEL_VERSION_SHORT### + CFBundleVersion + ###KERNEL_VERSION_LONG### + CFBundleSignature + ???? + + diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile new file mode 100644 index 000000000..0e05710a4 --- /dev/null +++ b/EXTERNAL_HEADERS/Makefile @@ -0,0 +1,39 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = \ + architecture \ + machine \ + mach-o + +INSTINC_SUBDIRS_PPC = \ + architecture \ + ppc + +INSTINC_SUBDIRS_I386 = \ + architecture \ + i386 + +EXPORT_FILES = \ + ar.h \ + stdarg.h \ + stdint.h + +INSTALL_MI_LIST = + +INSTALL_MI_DIR = . + +EXPORT_MI_LIST = ${EXPORT_FILES} + +EXPORT_MI_DIR = . + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/ar.h b/EXTERNAL_HEADERS/ar.h index def1c4320..79ab2ade0 100644 --- a/EXTERNAL_HEADERS/ar.h +++ b/EXTERNAL_HEADERS/ar.h @@ -61,6 +61,7 @@ * * @(#)ar.h 8.2 (Berkeley) 1/21/94 */ +#ifdef KERNEL_PRIVATE #ifndef _AR_H_ #define _AR_H_ @@ -86,3 +87,4 @@ struct ar_hdr { }; #endif /* !_AR_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile new file mode 100644 index 000000000..fd64ab197 --- /dev/null +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -0,0 +1,32 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = + +INSTINC_SUBDIRS_PPC = \ + ppc + +INSTINC_SUBDIRS_I386 = \ + i386 + +EXPORT_FILES = \ + byte_order.h + +INSTALL_MI_LIST = + +INSTALL_MI_DIR = architecture + +EXPORT_MI_LIST = ${EXPORT_FILES} + +EXPORT_MI_DIR = architecture + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/architecture/byte_order.h b/EXTERNAL_HEADERS/architecture/byte_order.h index b39c272cb..fe80ee110 100644 --- a/EXTERNAL_HEADERS/architecture/byte_order.h +++ b/EXTERNAL_HEADERS/architecture/byte_order.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,17 +23,6 @@ * Copyright (c) 1992 NeXT Computer, Inc. * * Byte ordering conversion. - * - * HISTORY - * - * 20 October 1992 ? at NeXT - * Added #ifdef wrapper to prevent multiple inclusions of this file. - * - * 8 October 1992 ? at NeXT - * Converted to NXxxx versions. Condensed history. - * - * 18 May 1992 ? at NeXT - * Created. */ #ifndef _ARCHITECTURE_BYTE_ORDER_H_ @@ -42,7 +31,7 @@ typedef unsigned long NXSwappedFloat; typedef unsigned long long NXSwappedDouble; -#if defined (__ppc__) +#if defined (__ppc__) || defined(__ppc64__) #include "architecture/ppc/byte_order.h" #elif defined (__i386__) #include "architecture/i386/byte_order.h" diff --git a/EXTERNAL_HEADERS/architecture/i386/Makefile b/EXTERNAL_HEADERS/architecture/i386/Makefile new file mode 100644 index 000000000..5140ed922 --- /dev/null +++ b/EXTERNAL_HEADERS/architecture/i386/Makefile @@ -0,0 +1,37 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS_I386 = + +EXPORT_FILES = \ + asm_help.h \ + cpu.h \ + fpu.h \ + io.h \ + sel.h \ + tss.h \ + byte_order.h \ + desc.h \ + frame.h \ + reg_help.h \ + table.h + + +INSTALL_MD_LIST = + +INSTALL_MD_DIR = + +EXPORT_MD_LIST = ${EXPORT_FILES} + +EXPORT_MD_DIR = architecture/i386 + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/architecture/ppc/Makefile b/EXTERNAL_HEADERS/architecture/ppc/Makefile new file mode 100644 index 000000000..8cf3b09c2 --- /dev/null +++ b/EXTERNAL_HEADERS/architecture/ppc/Makefile @@ -0,0 +1,34 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS_PPC = + +EXPORT_FILES = \ + asm_help.h \ + basic_regs.h \ + byte_order.h \ + cframe.h \ + fp_regs.h \ + macro_help.h \ + pseudo_inst.h \ + reg_help.h + + +INSTALL_MD_LIST = + +INSTALL_MD_DIR = + +EXPORT_MD_LIST = ${EXPORT_FILES} + +EXPORT_MD_DIR = architecture/ppc + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/architecture/ppc/byte_order.h b/EXTERNAL_HEADERS/architecture/ppc/byte_order.h index 77927e97a..4669264e4 100644 --- a/EXTERNAL_HEADERS/architecture/ppc/byte_order.h +++ b/EXTERNAL_HEADERS/architecture/ppc/byte_order.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,17 +23,6 @@ * Copyright (c) 1996 NeXT Software, Inc. * * Byte ordering conversion (for ppc). - * - * HISTORY - * - * 29-Dec-96 Umesh Vaishampayan (umeshv@NeXT.com) - * Ported from m98k. - * - * 8 October 1992 ? at NeXT - * Converted to NXxxx versions. Condensed history. - * - * 28 August 1992 Bruce Martin @NeXT - * Created. */ static __inline__ @@ -77,34 +66,40 @@ NXSwapInt( } static __inline__ -unsigned long -NXSwapLong( - unsigned long inv +unsigned long long +NXSwapLongLong( + unsigned long long inv ) { - union lconv { - unsigned long ul; - unsigned char uc[4]; + union llconv { + unsigned long long ull; + unsigned char uc[8]; } *inp, outv; - inp = (union lconv *)&inv; + inp = (union llconv *)&inv; - outv.uc[0] = inp->uc[3]; - outv.uc[1] = inp->uc[2]; - outv.uc[2] = inp->uc[1]; - outv.uc[3] = inp->uc[0]; + outv.uc[0] = inp->uc[7]; + outv.uc[1] = inp->uc[6]; + outv.uc[2] = inp->uc[5]; + outv.uc[3] = inp->uc[4]; + outv.uc[4] = inp->uc[3]; + outv.uc[5] = inp->uc[2]; + outv.uc[6] = inp->uc[1]; + outv.uc[7] = inp->uc[0]; - return (outv.ul); + return (outv.ull); } +#if defined(__LP64__) + static __inline__ -unsigned long long -NXSwapLongLong( - unsigned long long inv +unsigned long +NXSwapLong( + unsigned long inv ) { union llconv { - unsigned long long ull; + unsigned long ul; unsigned char uc[8]; } *inp, outv; @@ -119,9 +114,34 @@ NXSwapLongLong( outv.uc[6] = inp->uc[1]; outv.uc[7] = inp->uc[0]; - return (outv.ull); + return (outv.ul); } +#else + +static __inline__ +unsigned long +NXSwapLong( + unsigned long inv +) +{ + union lconv { + unsigned long ul; + unsigned char uc[4]; + } *inp, outv; + + inp = (union lconv *)&inv; + + outv.uc[0] = inp->uc[3]; + outv.uc[1] = inp->uc[2]; + outv.uc[2] = inp->uc[1]; + outv.uc[3] = inp->uc[0]; + + return (outv.ul); +} + +#endif /* __LP64__ */ + #ifndef KERNEL static __inline__ NXSwappedFloat diff --git a/EXTERNAL_HEADERS/architecture/ppc/cframe.h b/EXTERNAL_HEADERS/architecture/ppc/cframe.h index 80a08ab6c..3dc63034c 100644 --- a/EXTERNAL_HEADERS/architecture/ppc/cframe.h +++ b/EXTERNAL_HEADERS/architecture/ppc/cframe.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,21 +26,19 @@ * * This include file defines C calling sequence defines * for ppc port. - * - * HISTORY - * 20-May-97 Umesh Vaishampayan (umeshv@apple.com) - * Added C_RED_ZONE. - * 29-Dec-96 Umesh Vaishampayan (umeshv@NeXT.com) - * Ported from m98k. - * 11-June-91 Mike DeMoney (mike@next.com) - * Created. */ #ifndef _ARCH_PPC_CFRAME_H_ #define _ARCH_PPC_CFRAME_H_ +#if defined (__ppc64__) +#define C_ARGSAVE_LEN 64 /* at least 64 bytes of arg save */ +#define C_STACK_ALIGN 32 /* stack must be 32 byte aligned */ +#define C_RED_ZONE 320 /* 320 bytes to skip over saved registers */ +#else #define C_ARGSAVE_LEN 32 /* at least 32 bytes of arg save */ #define C_STACK_ALIGN 16 /* stack must be 16 byte aligned */ -#define C_RED_ZONE 244 /* 224 bytes to skip over saved registers */ +#define C_RED_ZONE 224 /* 224 bytes to skip over saved registers */ +#endif #endif /* _ARCH_PPC_CFRAME_H_ */ diff --git a/EXTERNAL_HEADERS/bsd/i386/ansi.h b/EXTERNAL_HEADERS/bsd/i386/ansi.h deleted file mode 100644 index e20404611..000000000 --- a/EXTERNAL_HEADERS/bsd/i386/ansi.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ansi.h 8.2 (Berkeley) 1/4/94 - */ - -#ifndef _ANSI_H_ -#define _ANSI_H_ - -/* - * Types which are fundamental to the implementation and may appear in - * more than one standard header are defined here. Standard headers - * then use: - * #ifdef _BSD_SIZE_T_ - * typedef _BSD_SIZE_T_ size_t; - * #undef _BSD_SIZE_T_ - * #endif - */ -#define _BSD_CLOCK_T_ unsigned long /* clock() */ -#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__) && defined(__SIZE_TYPE__) -#define _BSD_PTRDIFF_T_ __PTRDIFF_TYPE__ /* ptr1 - ptr2 */ -#define _BSD_SIZE_T_ __SIZE_TYPE__ /* sizeof() */ -#else -#define _BSD_PTRDIFF_T_ int /* ptr1 - ptr2 */ -#define _BSD_SIZE_T_ unsigned long /* sizeof() */ -#endif /* __GNUC__ */ -#define _BSD_SSIZE_T_ int /* byte count or error */ -#define _BSD_TIME_T_ long /* time() */ -#define _BSD_VA_LIST_ void * /* va_list */ -#define _BSD_SOCKLEN_T_ int32_t /* socklen_t (duh) */ - -/* - * Runes (wchar_t) is declared to be an ``int'' instead of the more natural - * ``unsigned long'' or ``long''. Two things are happening here. It is not - * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, - * it looks like 10646 will be a 31 bit standard. This means that if your - * ints cannot hold 32 bits, you will be in trouble. The reason an int was - * chosen over a long is that the is*() and to*() routines take ints (says - * ANSI C), but they use _RUNE_T_ instead of int. By changing it here, you - * lose a bit of ANSI conformance, but your programs will still work. - * - * Note that _WCHAR_T_ and _RUNE_T_ must be of the same type. When wchar_t - * and rune_t are typedef'd, _WCHAR_T_ will be undef'd, but _RUNE_T remains - * defined for ctype.h. - */ -#if defined(__GNUC__) && defined(__WCHAR_TYPE__) -#define _BSD_WCHAR_T_ __WCHAR_TYPE__ /* wchar_t */ -#define _BSD_RUNE_T_ __WCHAR_TYPE__ /* rune_t */ -#else -#define _BSD_WCHAR_T_ int /* wchar_t */ -#define _BSD_RUNE_T_ int /* rune_t */ -#endif /* __GNUC__ */ - -#endif /* _ANSI_H_ */ diff --git a/EXTERNAL_HEADERS/bsd/ppc/ansi.h b/EXTERNAL_HEADERS/bsd/ppc/ansi.h deleted file mode 100644 index 7bd74d59e..000000000 --- a/EXTERNAL_HEADERS/bsd/ppc/ansi.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ansi.h 8.2 (Berkeley) 1/4/94 - */ - -#ifndef _ANSI_H_ -#define _ANSI_H_ - -/* - * Types which are fundamental to the implementation and may appear in - * more than one standard header are defined here. Standard headers - * then use: - * #ifdef _BSD_SIZE_T_ - * typedef _BSD_SIZE_T_ size_t; - * #undef _BSD_SIZE_T_ - * #endif - */ -#define _BSD_CLOCK_T_ unsigned long /* clock() */ -#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__) && defined(__SIZE_TYPE__) -#define _BSD_PTRDIFF_T_ __PTRDIFF_TYPE__ /* ptr1 - ptr2 */ -#define _BSD_SIZE_T_ __SIZE_TYPE__ /* sizeof() */ -#else -#define _BSD_PTRDIFF_T_ int /* ptr1 - ptr2 */ -#define _BSD_SIZE_T_ unsigned long /* sizeof() */ -#endif /* __GNUC__ */ -#define _BSD_SSIZE_T_ int /* byte count or error */ -#define _BSD_TIME_T_ long /* time() */ -#define _BSD_VA_LIST_ char * /* va_list */ -#define _BSD_SOCKLEN_T_ int32_t /* socklen_t (duh) */ - -/* - * Runes (wchar_t) is declared to be an ``int'' instead of the more natural - * ``unsigned long'' or ``long''. Two things are happening here. It is not - * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, - * it looks like 10646 will be a 31 bit standard. This means that if your - * ints cannot hold 32 bits, you will be in trouble. The reason an int was - * chosen over a long is that the is*() and to*() routines take ints (says - * ANSI C), but they use _RUNE_T_ instead of int. By changing it here, you - * lose a bit of ANSI conformance, but your programs will still work. - * - * Note that _WCHAR_T_ and _RUNE_T_ must be of the same type. When wchar_t - * and rune_t are typedef'd, _WCHAR_T_ will be undef'd, but _RUNE_T remains - * defined for ctype.h. - */ -#if defined(__GNUC__) && defined(__WCHAR_TYPE__) -#define _BSD_WCHAR_T_ __WCHAR_TYPE__ /* wchar_t */ -#define _BSD_RUNE_T_ __WCHAR_TYPE__ /* rune_t */ -#else -#define _BSD_WCHAR_T_ int /* wchar_t */ -#define _BSD_RUNE_T_ int /* rune_t */ -#endif /* __GNUC__ */ - -#endif /* _ANSI_H_ */ diff --git a/EXTERNAL_HEADERS/i386/Makefile b/EXTERNAL_HEADERS/i386/Makefile new file mode 100644 index 000000000..850fc3599 --- /dev/null +++ b/EXTERNAL_HEADERS/i386/Makefile @@ -0,0 +1,27 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS_I386 = + +EXPORT_FILES = \ + _limits.h \ + limits.h + +INSTALL_MD_LIST = + +INSTALL_MD_DIR = + +EXPORT_MD_LIST = ${EXPORT_FILES} + +EXPORT_MD_DIR = i386 + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/bsd/dev/disk_label.h b/EXTERNAL_HEADERS/i386/_limits.h similarity index 81% rename from bsd/dev/disk_label.h rename to EXTERNAL_HEADERS/i386/_limits.h index 3819dcbdc..3b9e7a6f7 100644 --- a/bsd/dev/disk_label.h +++ b/EXTERNAL_HEADERS/i386/_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,5 +19,9 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifndef _I386__LIMITS_H_ +#define _I386__LIMITS_H_ -#warning is obsolete +#define __DARWIN_CLK_TCK 100 /* ticks per second */ + +#endif /* _I386__LIMITS_H_ */ diff --git a/EXTERNAL_HEADERS/bsd/i386/limits.h b/EXTERNAL_HEADERS/i386/limits.h similarity index 97% rename from EXTERNAL_HEADERS/bsd/i386/limits.h rename to EXTERNAL_HEADERS/i386/limits.h index 64eecd5a2..5c6e31df1 100644 --- a/EXTERNAL_HEADERS/bsd/i386/limits.h +++ b/EXTERNAL_HEADERS/i386/limits.h @@ -63,11 +63,14 @@ #ifndef _I386_LIMITS_H_ #define _I386_LIMITS_H_ +#include + #define CHAR_BIT 8 /* number of bits in a char */ #define MB_LEN_MAX 6 /* Allow 31 bit UTF2 */ - -#define CLK_TCK 100 /* ticks per second */ +#ifndef CLK_TCK +#define CLK_TCK __DARWIN_CLK_TCK /* ticks per second */ +#endif /* * According to ANSI (section 2.2.4.2), the values below must be usable by diff --git a/EXTERNAL_HEADERS/mach-o/Makefile b/EXTERNAL_HEADERS/mach-o/Makefile new file mode 100644 index 000000000..efac1ba25 --- /dev/null +++ b/EXTERNAL_HEADERS/mach-o/Makefile @@ -0,0 +1,30 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = \ + +EXPORT_FILES = \ + fat.h \ + kld.h \ + loader.h \ + nlist.h \ + reloc.h + +INSTALL_MI_LIST = + +INSTALL_MI_DIR = + +EXPORT_MI_LIST = ${EXPORT_FILES} + +EXPORT_MI_DIR = mach-o + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/mach-o/fat.h b/EXTERNAL_HEADERS/mach-o/fat.h index 3558e4acf..e964a1965 100644 --- a/EXTERNAL_HEADERS/mach-o/fat.h +++ b/EXTERNAL_HEADERS/mach-o/fat.h @@ -57,3 +57,18 @@ struct fat_arch { unsigned long align; /* alignment as a power of 2 */ }; +#ifdef KERNEL + +#include + +struct vnode; + +/* XXX return type should be load_return_t, but mach_loader.h is not in scope */ +int fatfile_getarch_affinity(struct vnode *vp, vm_offset_t data_ptr, + struct fat_arch *archret, int affinity); +int fatfile_getarch(struct vnode *vp, vm_offset_t data_ptr, + struct fat_arch *archret); +int fatfile_getarch_with_bits(struct vnode *vp, integer_t archbits, + vm_offset_t data_ptr, struct fat_arch *archret); + +#endif /* KERNEL */ diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h index 707dd35d8..d0d148fa9 100644 --- a/EXTERNAL_HEADERS/mach-o/loader.h +++ b/EXTERNAL_HEADERS/mach-o/loader.h @@ -46,22 +46,47 @@ #include /* - * The mach header appears at the very beginning of the object file. + * The mach header appears at the very beginning of the object file; it + * is the same for both 32-bit and 64-bit architectures. */ struct mach_header { - unsigned long magic; /* mach magic number identifier */ + uint32_t magic; /* mach magic number identifier */ cpu_type_t cputype; /* cpu specifier */ cpu_subtype_t cpusubtype; /* machine specifier */ - unsigned long filetype; /* type of file */ - unsigned long ncmds; /* number of load commands */ - unsigned long sizeofcmds; /* the size of all the load commands */ - unsigned long flags; /* flags */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ +}; + +/* + * The 64-bit mach header appears at the very beginning of object files for + * 64-bit architectures. + */ +struct mach_header_64 { + uint32_t magic; /* mach magic number identifier */ + cpu_type_t cputype; /* cpu specifier */ + cpu_subtype_t cpusubtype; /* machine specifier */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ + uint32_t reserved; /* reserved */ }; -/* Constant for the magic field of the mach_header */ +/* Constant for the magic field of the mach_header (32-bit architectures) */ #define MH_MAGIC 0xfeedface /* the mach magic number */ #define MH_CIGAM NXSwapInt(MH_MAGIC) +/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ +#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ +#define MH_CIGAM_64 NXSwapInt(MH_MAGIC_64) + +/* Constants for the cmd field of new load commands, the type */ +#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ +#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ + + /* * The layout of the file depends on the filetype. For all but the MH_OBJECT * file type the segments are padded out and aligned on a segment alignment @@ -118,7 +143,9 @@ struct mach_header { * of the particular load command structure plus anything that follows it that * is a part of the load command (i.e. section structures, strings, etc.). To * advance to the next load command the cmdsize can be added to the offset or - * pointer of the current load command. The cmdsize MUST be a multiple of + * pointer of the current load command. The cmdsize for 32-bit architectures + * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple + * of 8 bytes (these are forever the maximum alignment of any load commands). * sizeof(long) (this is forever the maximum alignment of any load commands). * The padded bytes must be zero. All tables in the object file must also * follow these rules so the file can be memory mapped. Otherwise the pointers @@ -174,7 +201,7 @@ union lc_str { * section structures directly follow the segment command and their size is * reflected in cmdsize. */ -struct segment_command { +struct segment_command { /* for 32-bit architectures */ unsigned long cmd; /* LC_SEGMENT */ unsigned long cmdsize; /* includes sizeof section structs */ char segname[16]; /* segment name */ @@ -188,6 +215,27 @@ struct segment_command { unsigned long flags; /* flags */ }; +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +struct segment_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_SEGMENT_64 */ + uint32_t cmdsize; /* includes sizeof section_64 structs */ + char segname[16]; /* segment name */ + uint64_t vmaddr; /* memory address of this segment */ + uint64_t vmsize; /* memory size of this segment */ + uint64_t fileoff; /* file offset of this segment */ + uint64_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + + /* Constants for the flags field of the segment_command */ #define SG_HIGHVM 0x1 /* the file contents for this segment is for the high part of the VM space, the low part @@ -207,7 +255,9 @@ struct segment_command { * and load commands of the object file before it's first section. The zero * fill sections are always last in their segment (in all formats). This * allows the zeroed segment padding to be mapped into memory where zero fill - * sections might be. + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. * * The MH_OBJECT format has all of it's sections in one segment for * compactness. There is no padding to a specified segment boundary and the @@ -224,7 +274,7 @@ struct segment_command { * fields of the section structure for mach object files is described in the * header file . */ -struct section { +struct section { /* for 32-bit architectures */ char sectname[16]; /* name of this section */ char segname[16]; /* segment this section goes in */ unsigned long addr; /* memory address of this section */ @@ -238,6 +288,22 @@ struct section { unsigned long reserved2; /* reserved */ }; +struct section_64 { /* for 64-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint64_t addr; /* memory address of this section */ + uint64_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ + uint32_t reserved3; /* reserved */ +}; + + /* * The flags field of a section structure is separated into two parts a section * type and section attributes. The section types are mutually exclusive (it @@ -667,6 +733,34 @@ struct dylib_module { objc_module_info_size; /* the (__OBJC,__module_info) section */ }; +/* a 64-bit module table entry */ +struct dylib_module_64 { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module size of the */ + objc_module_info_size; /* (__OBJC,__module_info) section */ + uint64_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ +}; + + /* * The entries in the reference symbol table are used when loading the module * (both by the static and dynamic link editors) and if the module is unloaded diff --git a/EXTERNAL_HEADERS/mach-o/nlist.h b/EXTERNAL_HEADERS/mach-o/nlist.h index 91763d098..3fe7c367c 100644 --- a/EXTERNAL_HEADERS/mach-o/nlist.h +++ b/EXTERNAL_HEADERS/mach-o/nlist.h @@ -81,6 +81,20 @@ struct nlist { unsigned long n_value; /* value of this symbol (or stab offset) */ }; +/* + * This is the symbol table entry structure for 64-bit architectures. + */ +struct nlist_64 { + union { + uint32_t n_strx; /* index into the string table */ + } n_un; + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + uint16_t n_desc; /* see */ + uint64_t n_value; /* value of this symbol (or stab offset) */ +}; + + /* * Symbols with a index into the string table of zero (n_un.n_strx == 0) are * defined to have a null, "", name. Therefore all string indexes to non null diff --git a/EXTERNAL_HEADERS/machine/Makefile b/EXTERNAL_HEADERS/machine/Makefile new file mode 100644 index 000000000..f26245e7c --- /dev/null +++ b/EXTERNAL_HEADERS/machine/Makefile @@ -0,0 +1,30 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = + +INSTINC_SUBDIRS_PPC = + +INSTINC_SUBDIRS_I386 = + +EXPORT_FILES = \ + limits.h + +INSTALL_MI_LIST = + +INSTALL_MI_DIR = . + +EXPORT_MI_LIST = ${EXPORT_FILES} + +EXPORT_MI_DIR = machine + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/EXTERNAL_HEADERS/ppc/Makefile b/EXTERNAL_HEADERS/ppc/Makefile new file mode 100644 index 000000000..843510dfd --- /dev/null +++ b/EXTERNAL_HEADERS/ppc/Makefile @@ -0,0 +1,27 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS_PPC = + +EXPORT_FILES = \ + _limits.h \ + limits.h + +INSTALL_MD_LIST = + +INSTALL_MD_DIR = + +EXPORT_MD_LIST = ${EXPORT_FILES} + +EXPORT_MD_DIR = ppc + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/bsd/dev/disk.h b/EXTERNAL_HEADERS/ppc/_limits.h similarity index 81% rename from bsd/dev/disk.h rename to EXTERNAL_HEADERS/ppc/_limits.h index 001bfb171..d512ec411 100644 --- a/bsd/dev/disk.h +++ b/EXTERNAL_HEADERS/ppc/_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,5 +19,9 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifndef _PPC__LIMITS_H_ +#define _PPC__LIMITS_H_ -#warning is obsolete, please use instead +#define __DARWIN_CLK_TCK 100 /* ticks per second */ + +#endif /* _PPC__LIMITS_H_ */ diff --git a/EXTERNAL_HEADERS/bsd/ppc/limits.h b/EXTERNAL_HEADERS/ppc/limits.h similarity index 98% rename from EXTERNAL_HEADERS/bsd/ppc/limits.h rename to EXTERNAL_HEADERS/ppc/limits.h index a5bc0b5df..69d6991dc 100644 --- a/EXTERNAL_HEADERS/bsd/ppc/limits.h +++ b/EXTERNAL_HEADERS/ppc/limits.h @@ -63,10 +63,14 @@ #ifndef _PPC_LIMITS_H_ #define _PPC_LIMITS_H_ +#include + #define CHAR_BIT 8 /* number of bits in a char */ #define MB_LEN_MAX 6 /* Allow 31 bit UTF2 */ -#define CLK_TCK 100 /* ticks per second */ +#ifndef CLK_TCK +#define CLK_TCK __DARWIN_CLK_TCK /* ticks per second */ +#endif /* * According to ANSI (section 2.2.4.2), the values below must be usable by diff --git a/Makefile b/Makefile index d2938322b..2855bb34c 100644 --- a/Makefile +++ b/Makefile @@ -33,11 +33,11 @@ ALL_SUBDIRS = \ CONFIG_SUBDIRS = config -INSTINC_SUBDIRS = $(ALL_SUBDIRS) +INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS -INSTINC_SUBDIRS_PPC = $(INSTINC_SUBDIRS) +INSTINC_SUBDIRS_PPC = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS -INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS) +INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS EXPINC_SUBDIRS = $(ALL_SUBDIRS) diff --git a/bsd/Makefile b/bsd/Makefile index 5a07086bc..7d5fb5325 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -24,6 +24,7 @@ INSTINC_SUBDIRS = \ nfs \ sys \ ufs \ + uuid \ vfs INSTINC_SUBDIRS_PPC = \ @@ -49,6 +50,7 @@ EXPINC_SUBDIRS = \ nfs \ sys \ ufs \ + uuid \ vfs \ vm diff --git a/bsd/bsm/Makefile b/bsd/bsm/Makefile index b1e6c12ab..ec3b7ceb4 100644 --- a/bsd/bsm/Makefile +++ b/bsd/bsm/Makefile @@ -21,12 +21,15 @@ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ audit.h audit_record.h audit_kevents.h +KERNFILES = \ + audit.h + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = bsm -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNFILES} EXPORT_MI_DIR = bsm diff --git a/bsd/bsm/audit.h b/bsd/bsm/audit.h index 4c48cff9f..7ee808102 100644 --- a/bsd/bsm/audit.h +++ b/bsd/bsm/audit.h @@ -25,9 +25,8 @@ #define _BSM_AUDIT_H #include -#include +#include #include -#include #include #include @@ -45,7 +44,7 @@ /* * Pre-defined audit IDs */ -#define AU_DEFAUDITID -1 +#define AU_DEFAUDITID ((uid_t)-1) /* * Define the masks for the classes of audit events. diff --git a/bsd/bsm/audit_kernel.h b/bsd/bsm/audit_kernel.h index 9dc1de446..5719f3107 100644 --- a/bsd/bsm/audit_kernel.h +++ b/bsd/bsm/audit_kernel.h @@ -29,8 +29,8 @@ #include #include -#include #include +#include /* * Audit subsystem condition flags. The audit_enabled flag is set and @@ -211,8 +211,8 @@ struct audit_record { int ar_arg_svipc_id; void * ar_arg_svipc_addr; struct posix_ipc_perm ar_arg_pipc_perm; - mach_port_t ar_arg_mach_port1; - mach_port_t ar_arg_mach_port2; + mach_port_name_t ar_arg_mach_port1; + mach_port_name_t ar_arg_mach_port2; union auditon_udata ar_arg_auditon; }; @@ -265,12 +265,12 @@ int kau_close(struct au_record *rec, struct timespec *endtime, short event); void kau_free(struct au_record *rec); void kau_init(void); -token_t *kau_to_file(char *file, struct timeval *tv); -token_t *kau_to_header(struct timespec *ctime, int rec_size, +token_t *kau_to_file(const char *file, const struct timeval *tv); +token_t *kau_to_header(const struct timespec *ctime, int rec_size, au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header32(struct timespec *ctime, int rec_size, +token_t *kau_to_header32(const struct timespec *ctime, int rec_size, au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header64(struct timespec *ctime, int rec_size, +token_t *kau_to_header64(const struct timespec *ctime, int rec_size, au_event_t e_type, au_emod_t e_mod); /* * The remaining kernel functions are conditionally compiled in as they @@ -278,17 +278,17 @@ token_t *kau_to_header64(struct timespec *ctime, int rec_size, * the source tree where these functions are referenced. */ #ifdef AUDIT -void audit_arg_addr(void * addr); -void audit_arg_len(int len); +void audit_arg_addr(user_addr_t addr); +void audit_arg_len(user_size_t len); void audit_arg_fd(int fd); void audit_arg_fflags(int fflags); void audit_arg_gid(gid_t gid, gid_t egid, gid_t rgid, gid_t sgid); void audit_arg_uid(uid_t uid, uid_t euid, uid_t ruid, uid_t suid); -void audit_arg_groupset(gid_t *gidset, u_int gidset_size); -void audit_arg_login(char *login); -void audit_arg_ctlname(int *name, int namelen); +void audit_arg_groupset(const gid_t *gidset, u_int gidset_size); +void audit_arg_login(const char *login); +void audit_arg_ctlname(const int *name, int namelen); void audit_arg_mask(int mask); void audit_arg_mode(mode_t mode); void audit_arg_dev(int dev); @@ -302,22 +302,23 @@ void audit_arg_socket(int sodomain, int sotype, void audit_arg_sockaddr(struct proc *p, struct sockaddr *so); void audit_arg_auid(uid_t auid); -void audit_arg_auditinfo(struct auditinfo *au_info); +void audit_arg_auditinfo(const struct auditinfo *au_info); void audit_arg_upath(struct proc *p, char *upath, u_int64_t flags); void audit_arg_vnpath(struct vnode *vp, u_int64_t flags); -void audit_arg_text(char *text); +void audit_arg_vnpath_withref(struct vnode *vp, u_int64_t flags); +void audit_arg_text(const char *text); void audit_arg_cmd(int cmd); void audit_arg_svipc_cmd(int cmd); -void audit_arg_svipc_perm(struct ipc_perm *perm); +void audit_arg_svipc_perm(const struct ipc_perm *perm); void audit_arg_svipc_id(int id); void audit_arg_svipc_addr(void *addr); void audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode); -void audit_arg_auditon(union auditon_udata *udata); -void audit_arg_file(struct proc *p, struct file *fp); -void audit_arg_mach_port1(mach_port_t port); -void audit_arg_mach_port2(mach_port_t port); +void audit_arg_auditon(const union auditon_udata *udata); +void audit_arg_file(struct proc *p, const struct fileproc *fp); +void audit_arg_mach_port1(mach_port_name_t port); +void audit_arg_mach_port2(mach_port_name_t port); void audit_sysclose(struct proc *p, int fd); @@ -347,7 +348,7 @@ void audit_proc_free(struct proc *p); * possible that an audit record was begun before auditing was turned off. */ #define AUDIT_SYSCALL_EXIT(error, proc, uthread) do { \ - if (audit_enabled | (uthread->uu_ar != NULL)) { \ + if (audit_enabled || (uthread->uu_ar != NULL)) { \ audit_syscall_exit(error, proc, uthread); \ } \ } while (0) @@ -363,9 +364,9 @@ void audit_proc_free(struct proc *p); } while (0) #define AUDIT_MACH_SYSCALL_EXIT(retval) do { \ - struct uthread *uthread = get_bsdthread_info(current_act()); \ - if (audit_enabled | (uthread->uu_ar != NULL)) { \ - audit_mach_syscall_exit(retval, uthread); \ + struct uthread *__uthread = get_bsdthread_info(current_thread()); \ + if (audit_enabled || (__uthread->uu_ar != NULL)) { \ + audit_mach_syscall_exit(retval, __uthread); \ } \ } while (0) diff --git a/bsd/bsm/audit_klib.h b/bsd/bsm/audit_klib.h index 9725885a8..00730f6a7 100644 --- a/bsd/bsm/audit_klib.h +++ b/bsd/bsm/audit_klib.h @@ -38,13 +38,21 @@ token_t *kau_to_socket(struct socket_au_info *soi); token_t *kau_to_attr32(struct vnode_au_info *vni); token_t *kau_to_attr64(struct vnode_au_info *vni); +int auditon_command_event(int cmd); int au_preselect(au_event_t event, au_mask_t *mask_p, int sorf); au_event_t flags_and_error_to_openevent(int oflags, int error); -void au_evclassmap_init(); +au_event_t ctlname_to_sysctlevent(int name[], uint64_t valid_arg); +au_event_t msgctl_to_event(int cmd); +au_event_t semctl_to_event(int cmd); +void au_evclassmap_init(void); void au_evclassmap_insert(au_event_t event, au_class_t class); au_class_t au_event_class(au_event_t event); int canon_path(struct proc *p, char *path, char *cpath); + + + + /* * Define a system call to audit event mapping table. */ diff --git a/bsd/bsm/audit_record.h b/bsd/bsm/audit_record.h index 3a64eabd0..da2c96388 100644 --- a/bsd/bsm/audit_record.h +++ b/bsd/bsm/audit_record.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include @@ -52,19 +52,19 @@ #define ADD_U_INT16(loc, val) \ do { \ - memcpy(loc, (u_char *)&val, sizeof(u_int16_t));\ + memcpy(loc, (const u_char *)&val, sizeof(u_int16_t));\ loc += sizeof(u_int16_t); \ }while(0) #define ADD_U_INT32(loc, val) \ do { \ - memcpy(loc, (u_char *)&val, sizeof(u_int32_t));\ + memcpy(loc, (const u_char *)&val, sizeof(u_int32_t));\ loc += sizeof(u_int32_t); \ }while(0) #define ADD_U_INT64(loc, val)\ do {\ - memcpy(loc, (u_char *)&val, sizeof(u_int64_t));\ + memcpy(loc, (const u_char *)&val, sizeof(u_int64_t));\ loc += sizeof(u_int64_t); \ }while(0) @@ -243,9 +243,9 @@ token_t *au_to_me(void); token_t *au_to_arg(char n, char *text, u_int32_t v); token_t *au_to_arg32(char n, char *text, u_int32_t v); token_t *au_to_arg64(char n, char *text, u_int64_t v); -token_t *au_to_attr(struct vattr *attr); -token_t *au_to_attr32(struct vattr *attr); -token_t *au_to_attr64(struct vattr *attr); +token_t *au_to_attr(struct vnode_attr *attr); +token_t *au_to_attr32(struct vnode_attr *attr); +token_t *au_to_attr64(struct vnode_attr *attr); token_t *au_to_data(char unit_print, char unit_type, char unit_count, char *p); token_t *au_to_exit(int retval, int err); diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER index 10ac705f2..90cb6ba5c 100644 --- a/bsd/conf/MASTER +++ b/bsd/conf/MASTER @@ -115,7 +115,6 @@ options NORMA_VM # NORMA virtual memory support # options NORMA_TASK # NORMA task support # options NORMA_ETHER # NORMA across ethernet # options SIMPLE_CLOCK # don't assume fixed tick # -options STAT_TIME # Use statistical timing # options XPR_DEBUG # kernel tracing # options KDEBUG # kernel tracing # options DDM_DEBUG # driverkit-style tracing # @@ -133,12 +132,16 @@ options ROUTING # routing # options TPIP # # options TUN # # options VLAN # # +options BOND # # +options NETMIBS # # options IPDIVERT # Divert sockets (for NAT) # options IPFIREWALL # IP Firewalling (used by NAT) # #options IPFIREWALL_VERBOSE # # options IPFIREWALL_FORWARD #Transparent proxy # options IPFIREWALL_DEFAULT_TO_ACCEPT # allow everything by default # -options IPFIREWALL_KEXT # Kernel extension # +#options IPFIREWALL_KEXT # Kernel extension # +options DUMMYNET # dummynet support # +options IPFW2 # IP firewall (new version) # options MULTICAST # Internet Protocol Class-D $ options TCPDEBUG # TCP debug # options RANDOM_IP_ID # random (not sequential) ip ids # @@ -150,7 +153,8 @@ options AUDIT # Security event auditing # # # 4.4 general kernel # -options COMPAT_43 # 4.3 BSD compatibility # +options COMPAT_43_TTY # 4.3 BSD tty compat # +options COMPAT_43_SOCKET # 4.3 BSD socket compat # options DIAGNOSTIC # diagnostics # options KTRACE # ktrace support # options GPROF # build profiling # @@ -202,8 +206,8 @@ options "INET6" # kernel IPv6 Support # options IPSEC # IP security # options IPSEC_ESP # IP security # options "IPV6FIREWALL" # IPv6 Firewall Feature # -options "IPV6FIREWALL_DEFAULT_TO_ACCEPT" #IPv6 Firewall Feature # -options "IPV6FIREWALL_VERBOSE" #IPv6 Firewall Feature # +options "IPV6FIREWALL_DEFAULT_TO_ACCEPT" #IPv6 Firewall Feature # +#options "IPV6FIREWALL_VERBOSE" #IPv6 Firewall Feature # pseudo-device gif 1 # pseudo-device dummy 2 # @@ -258,6 +262,10 @@ pseudo-device mdevdevice 1 init mdevinit # pseudo-device bpfilter 4 init bpf_init +# +# fsevents device +pseudo-device fsevents 1 init fsevents_init + # # shim to "linux" mach disk drivers (mach drivers must also be turned on) # diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386 index d1fdbe7a7..6c9d460bc 100644 --- a/bsd/conf/MASTER.i386 +++ b/bsd/conf/MASTER.i386 @@ -45,9 +45,9 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # -# RELEASE = [intel pc mach medium event vol pst gdb kernobjc libdriver fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_43 volfs devfs synthfs revfs hfs mrouting ipdivert ipfirewall inet6 ipsec gif tcpdrop_synfin ktrace stf vlan] +# RELEASE = [intel pc mach medium event vol pst gdb kernobjc libdriver fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock volfs devfs revfs hfs mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs] # PROFILE = [RELEASE profile] -# DEBUG = [intel pc mach medium event vol pst gdb kernobjc libdriver_g fixpri debug simple_clock mdebug kernserv driverkit xpr_debug uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_43 revfs hfs volfs devfs synthfs mach_assert mrouting ipdivert ipfirewall inet6 ipsec gif tcpdrop_synfin ktrace stf vlan] +# DEBUG = [intel pc mach medium event vol pst gdb kernobjc libdriver_g fixpri debug simple_clock mdebug kernserv driverkit xpr_debug uxpr kernstack ipc_compat ipc_debug nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs hfs volfs devfs mach_assert mrouting ipdivert ipfirewall ipfw2 dummynet ipv6firewall inet6 ipsec gif tcpdrop_synfin ktrace stf compat_43_tty compat_43_socket vlan bond netmibs] # ###################################################################### # diff --git a/bsd/conf/MASTER.ppc b/bsd/conf/MASTER.ppc index 8aea2f9d4..ee3e6839b 100644 --- a/bsd/conf/MASTER.ppc +++ b/bsd/conf/MASTER.ppc @@ -45,10 +45,10 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # -# RELEASE = [ppc mach medium vol pst gdb simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_43 revfs noprofiling hfs volfs devfs synthfs netat mrouting ipdivert ipfirewall ktrace inet6 ipsec tcpdrop_synfin gif stf vlan] +# RELEASE = [ppc mach medium vol pst gdb simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs noprofiling hfs volfs devfs netat mrouting ipdivert ipfirewall ipfw2 dummynet ktrace inet6 ipv6firewall ipsec tcpdrop_synfin gif stf compat_43_tty compat_43_socket vlan bond netmibs] # RELEASE_TRACE = [RELEASE kdebug] -# PROFILE = [ppc mach medium vol pst gdb simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_43 revfs profile hfs volfs devfs synthfs netat mrouting ipdivert ipfirewall ktrace inet6 ipsec tcpdrop_synfin gif stf vlan] -# DEBUG = [ppc mach medium vol pst gdb debug simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_43 revfs profiling hfs volfs devfs synthfs netat mrouting mach_assert ipdivert ipfirewall ktrace inet6 ipsec tcpdrop_synfin gif stf vlan] +# PROFILE = [ppc mach medium vol pst gdb simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs profile hfs volfs devfs netat mrouting ipdivert ipfirewall ipfw2 dummynet ktrace inet6 ipv6firewall ipsec tcpdrop_synfin gif stf compat_43_tty compat_43_socket vlan bond] +# DEBUG = [ppc mach medium vol pst gdb debug simple_clock kernstack nfsclient nfsserver quota fifo fdesc union ffs cd9660 compat_oldsock revfs profiling hfs volfs devfs netat mrouting mach_assert ipdivert ipfirewall ipfw2 dummynet ktrace inet6 ipv6firewall ipsec tcpdrop_synfin gif stf compat_43_tty compat_43_socket vlan bond netmibs] # DEBUG_TRACE = [DEBUG kdebug] # ###################################################################### diff --git a/bsd/conf/Makefile b/bsd/conf/Makefile index 3fbb79f00..b4b5a7d2f 100644 --- a/bsd/conf/Makefile +++ b/bsd/conf/Makefile @@ -3,6 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir +export vnode_pager.o_CFLAGS_ADD=-Werror +export vm_unix.o_CFLAGS_ADD=-Werror +export dp_backing_file.o_CFLAGS_ADD=-Werror +export if_mib.o_CFLAGS_ADD=-Wno-unused-parameter include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/bsd/conf/Makefile.i386 b/bsd/conf/Makefile.i386 index 2f6232c14..0b3a6a0d1 100644 --- a/bsd/conf/Makefile.i386 +++ b/bsd/conf/Makefile.i386 @@ -2,6 +2,300 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# Enable -Werror for i386 builds +CFLAGS+=$(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR = \ + ioconf.o \ + aescrypt.o \ + aeskey.o \ + des_setkey.o \ + sha2.o \ + shadow.o \ + vn.o \ + MacOSStubs.o \ + hfs_attrlist.o \ + hfs_btreeio.o \ + hfs_catalog.o \ + hfs_chash.o \ + hfs_cnode.o \ + hfs_encodinghint.o \ + hfs_encodings.o \ + hfs_endian.o \ + hfs_hotfiles.o \ + hfs_link.o \ + hfs_lookup.o \ + hfs_notification.o \ + hfs_quota.o \ + hfs_readwrite.o \ + hfs_search.o \ + hfs_vfsops.o \ + hfs_vfsutils.o \ + hfs_vnops.o \ + hfs_xattr.o \ + BTree.o \ + BTreeAllocate.o \ + BTreeMiscOps.o \ + BTreeNodeOps.o \ + BTreeNodeReserve.o \ + BTreeScanner.o \ + BTreeTreeOps.o \ + CatalogUtilities.o \ + FileIDsServices.o \ + BTreeWrapper.o \ + FileExtentMapping.o \ + VolumeAllocation.o \ + UnicodeWrappers.o \ + cd9660_bmap.o \ + cd9660_lookup.o \ + cd9660_node.o \ + cd9660_rrip.o \ + cd9660_util.o \ + cd9660_vfsops.o \ + cd9660_vnops.o \ + bsd_init.o \ + bsd_stubs.o \ + kdebug.o \ + kern_acct.o \ + kern_aio.o \ + kern_audit.o \ + kern_authorization.o \ + kern_bsm_audit.o \ + kern_bsm_klib.o \ + kern_bsm_token.o \ + kern_clock.o \ + kern_control.o \ + kern_core.o \ + kern_credential.o \ + kern_descrip.o \ + kern_event.o \ + kern_exec.o \ + kern_exit.o \ + kern_fork.o \ + kern_ktrace.o \ + kern_lock.o \ + kern_malloc.o \ + kern_mib.o \ + kern_mman.o \ + kern_newsysctl.o \ + kern_panicinfo.o \ + kern_pcsamples.o \ + kern_physio.o \ + kern_prot.o \ + kern_resource.o \ + kern_shutdown.o \ + kern_sig.o \ + kern_symfile.o \ + kern_synch.o \ + kern_sysctl.o \ + kern_time.o \ + kern_xxx.o \ + kpi_mbuf.o \ + kpi_socket.o \ + kpi_socketfilter.o \ + mach_fat.o \ + mach_header.o \ + mach_loader.o \ + mach_process.o \ + netboot.o \ + posix_sem.o \ + posix_shm.o \ + qsort.o \ + spl.o \ + subr_log.o \ + subr_prf.o \ + subr_prof.o \ + subr_xxx.o \ + sys_domain.o \ + sys_generic.o \ + sys_socket.o \ + sysctl_init.o \ + sysv_ipc.o \ + sys_pipe.o \ + sysv_sem.o \ + sysv_shm.o \ + tty.o \ + tty_compat.o \ + tty_conf.o \ + tty_pty.o \ + tty_subr.o \ + tty_tty.o \ + ubc_subr.o \ + uipc_domain.o \ + uipc_mbuf.o \ + uipc_mbuf2.o \ + uipc_proto.o \ + uipc_socket.o \ + uipc_socket2.o \ + uipc_syscalls.o \ + uipc_usrreq.o \ + random.o \ + dead_vnops.o \ + devfs_tree.o \ + devfs_vfsops.o \ + devfs_vnops.o \ + fdesc_vfsops.o \ + fdesc_vnops.o \ + fifo_vnops.o \ + spec_vnops.o \ + synthfs_util.o \ + synthfs_vfsops.o \ + synthfs_vnops.o \ + union_subr.o \ + union_vfsops.o \ + union_vnops.o \ + volfs_vfsops.o \ + volfs_vnops.o \ + bpf.o \ + dlil.o \ + ether_at_pr_module.o \ + ether_if_module.o \ + ether_inet6_pr_module.o \ + ether_inet_pr_module.o \ + if.o \ + if_bond.o \ + if_ethersubr.o \ + if_gif.o \ + if_loop.o \ + if_media.o \ + if_stf.o \ + if_vlan.o \ + kext_net.o \ + kpi_interface.o \ + kpi_protocol.o \ + ndrv.o \ + netisr.o \ + net_osdep.o \ + radix.o \ + raw_usrreq.o \ + route.o \ + rtsock.o \ + zlib.o \ + dhcp_options.o \ + if_ether.o \ + igmp.o \ + in.o \ + in_bootp.o \ + in_cksum.o \ + ip_fw2.o \ + ip_fw2_compat.o \ + kpi_ipfilter.o \ + in_gif.o \ + in_pcb.o \ + in_proto.o \ + in_rmx.o \ + ip_divert.o \ + ip_dummynet.o \ + ip_encap.o \ + ip_flow.o \ + ip_icmp.o \ + ip_input.o \ + ip_mroute.o \ + ip_output.o \ + raw_ip.o \ + tcp_input.o \ + tcp_output.o \ + tcp_subr.o \ + tcp_timer.o \ + tcp_usrreq.o \ + udp_usrreq.o \ + ah_core.o \ + ah_input.o \ + ah_output.o \ + dest6.o \ + esp_core.o \ + esp_input.o \ + esp_output.o \ + esp_rijndael.o \ + frag6.o \ + icmp6.o \ + in6.o \ + in6_cksum.o \ + in6_gif.o \ + in6_ifattach.o \ + in6_pcb.o \ + in6_prefix.o \ + in6_proto.o \ + in6_rmx.o \ + in6_src.o \ + ip6_forward.o \ + ip6_fw.o \ + ip6_input.o \ + ip6_mroute.o \ + ip6_output.o \ + ipcomp_core.o \ + ipcomp_input.o \ + ipcomp_output.o \ + ipsec.o \ + mld6.o \ + nd6.o \ + nd6_nbr.o \ + nd6_rtr.o \ + raw_ip6.o \ + route6.o \ + scope6.o \ + udp6_output.o \ + udp6_usrreq.o \ + key.o \ + keydb.o \ + keysock.o \ + krpc_subr.o \ + nfs_bio.o \ + nfs_boot.o \ + nfs_node.o \ + nfs_nqlease.o \ + nfs_socket.o \ + nfs_srvcache.o \ + nfs_subs.o \ + nfs_syscalls.o \ + nfs_vfsops.o \ + nfs_vnops.o \ + ffs_alloc.o \ + ffs_balloc.o \ + ffs_inode.o \ + ffs_subr.o \ + ffs_vfsops.o \ + ffs_vnops.o \ + ufs_attrlist.o \ + ufs_bmap.o \ + ufs_byte_order.o \ + ufs_ihash.o \ + ufs_inode.o \ + ufs_lockf.o \ + ufs_lookup.o \ + ufs_quota.o \ + ufs_readwrite.o \ + ufs_vfsops.o \ + ufs_vnops.o \ + ux_exception.o \ + vfs_bio.o \ + vfs_cache.o \ + vfs_cluster.o \ + vfs_conf.o \ + vfs_fsevents.o \ + vfs_init.o \ + vfs_journal.o \ + vfs_lookup.o \ + vfs_quota.o \ + vfs_subr.o \ + vfs_support.o \ + vfs_syscalls.o \ + vfs_utfconv.o \ + vfs_vnops.o \ + vfs_xattr.o \ + kpi_vfs.o \ + vnode_if.o \ + sysctl.o \ + unix_startup.o \ + memdev.o \ + init_sysent.o + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template index cd0355d62..7ba478229 100644 --- a/bsd/conf/Makefile.template +++ b/bsd/conf/Makefile.template @@ -1,3 +1,25 @@ +# +# Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. +# +# @APPLE_LICENSE_HEADER_START@ +# +# The contents of this file constitute Original Code as defined in and +# are subject to the Apple Public Source License Version 1.1 (the +# "License"). You may not use this file except in compliance with the +# License. Please obtain a copy of the License at +# http://www.apple.com/publicsource and read it before using this file. +# +# This Original Code and all software distributed under the License are +# distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +# License for the specific language governing rights and limitations +# under the License. +# +# @APPLE_LICENSE_HEADER_END@ +# + # # Mach Operating System # Copyright (c) 1986 Carnegie-Mellon University @@ -24,7 +46,7 @@ include $(MakeInc_def) CFLAGS+= -imacros meta_features.h -DARCH_PRIVATE -DKERNEL -DDRIVER_PRIVATE \ -D_KERNEL_BUILD -DKERNEL_BUILD -DMACH_KERNEL -DBSD_BUILD \ -DBSD_KERNEL_PRIVATE -DNCPUS=1 -Wno-four-char-constants -fpascal-strings \ - -D__APPLE__ -I. + -D__APPLE__ -DLP64KERN=1 -DLP64_DEBUG=0 -I. # XXX: ld flags for bsd.o export LDFLAGS_COMPONENT += -keep_private_externs @@ -40,14 +62,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -90,12 +104,8 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "[ creating $(COMPONENT).o ]" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c @echo [ updating $(COMPONENT).o ${BSD_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} do_depend: do_all ${MD} -u Makedep -f -d `ls *.d`; diff --git a/bsd/conf/files b/bsd/conf/files index 1a4bd5ee8..2b6779be7 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -37,7 +37,6 @@ OPTIONS/mach_xp optional mach_xp OPTIONS/mach_xp_fpd optional mach_xp_fpd OPTIONS/quota optional quota OPTIONS/simple_clock optional simple_clock -OPTIONS/stat_time optional stat_time OPTIONS/xpr_debug optional xpr_debug OPTIONS/kdebug optional kdebug OPTIONS/nfsclient optional nfsclient @@ -56,11 +55,14 @@ OPTIONS/norma_ether optional norma_ether OPTIONS/new_vm_code optional new_vm_code OPTIONS/old_vm_code optional old_vm_code OPTIONS/compat_43 optional compat_43 +OPTIONS/compat_43_tty optional compat_43_tty +OPTIONS/compat_43_socket optional compat_43_socket OPTIONS/diagnostic optional diagnostic OPTIONS/ktrace optional ktrace OPTIONS/profiling optional profiling OPTIONS/vndevice optional vndevice OPTIONS/audit optional audit +OPTIONS/fsevents optional fsevents # # Network options @@ -88,7 +90,9 @@ OPTIONS/fddi optional fddi OPTIONS/ipdivert optional ipdivert OPTIONS/dummynet optional dummynet +OPTIONS/ipfw2 optional ipfw2 OPTIONS/ipfirewall optional ipfirewall +OPTIONS/ipv6firewall optional ipv6firewall OPTIONS/tcpdebug optional tcpdebug OPTIONS/bridge optional bridge OPTIONS/faith optional faith @@ -117,17 +121,21 @@ bsd/dev/random/YarrowCoreLib/src/prng.c standard bsd/dev/random/YarrowCoreLib/src/sha1mod.c standard bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c standard -bsd/dev/memdev.c standard +bsd/dev/memdev.c standard + +bsd/dev/unix_startup.c standard bsd/dev/vn/vn.c optional vndevice bsd/dev/vn/shadow.c optional vndevice +bsd/libkern/crc32.c standard bsd/libkern/random.c standard bsd/libkern/scanc.c standard bsd/libkern/skpc.c standard -bsd/libkern/inet_ntoa.c standard +bsd/libkern/inet_ntop.c standard bsd/libkern/bcd.c standard +bsd/vfs/vfs_attrlist.c standard bsd/vfs/vfs_bio.c standard bsd/vfs/vfs_cache.c standard bsd/vfs/vfs_cluster.c standard @@ -140,8 +148,12 @@ bsd/vfs/vfs_syscalls.c standard bsd/vfs/vfs_support.c standard bsd/vfs/vfs_utfconv.c standard bsd/vfs/vfs_vnops.c standard +bsd/vfs/vfs_xattr.c standard bsd/vfs/vnode_if.c standard +bsd/vfs/kpi_vfs.c standard bsd/vfs/vfs_journal.c standard +#bsd/vfs/vfs_fsevents.c optional fsevents +bsd/vfs/vfs_fsevents.c standard bsd/miscfs/deadfs/dead_vnops.c standard bsd/miscfs/fdesc/fdesc_vfsops.c optional fdesc @@ -151,7 +163,6 @@ bsd/miscfs/nullfs/null_subr.c optional nullfs bsd/miscfs/nullfs/null_vfsops.c optional nullfs bsd/miscfs/nullfs/null_vnops.c optional nullfs bsd/miscfs/specfs/spec_vnops.c standard -bsd/miscfs/specfs/spec_lockf.c standard bsd/miscfs/union/union_subr.c optional union bsd/miscfs/union/union_vfsops.c optional union bsd/miscfs/union/union_vnops.c optional union @@ -175,7 +186,7 @@ bsd/isofs/cd9660/cd9660_util.c optional cd9660 bsd/isofs/cd9660/cd9660_vfsops.c optional cd9660 bsd/isofs/cd9660/cd9660_vnops.c optional cd9660 -bsd/net/slcompress.c optional i4bipr +#bsd/net/slcompress.c optional i4bipr bsd/net/bpf.c optional bpfilter bsd/net/bpf_filter.c optional bpfilter bsd/net/bridge.c optional bridge @@ -183,19 +194,23 @@ bsd/net/bsd_comp.c optional ppp_bsdcomp bsd/net/if.c standard bsd/net/if_atmsubr.c optional atm bsd/net/if_disc.c optional disc +bsd/net/init.c standard bsd/net/dlil.c standard bsd/net/ether_if_module.c optional ether bsd/net/ether_at_pr_module.c optional ether bsd/net/ether_inet_pr_module.c optional ether bsd/net/ether_inet6_pr_module.c optional ether inet6 -bsd/net/if_ethersubr.c optional ether +#bsd/net/if_ethersubr.c optional ether bsd/net/if_loop.c optional loop -bsd/net/if_media.c standard -bsd/net/if_mib.c standard +#bsd/net/if_media.c standard +bsd/net/if_mib.c optional netmibs bsd/net/if_sl.c optional sl bsd/net/if_tun.c optional tun bsd/net/if_vlan.c optional vlan -bsd/net/kext_net.c standard +bsd/net/multicast_list.c standard +bsd/net/if_bond.c optional bond +bsd/net/devtimer.c optional bond +#bsd/net/kext_net.c standard bsd/net/ndrv.c standard bsd/net/ppp_deflate.c optional ppp_deflate bsd/net/radix.c standard @@ -206,7 +221,7 @@ bsd/net/rtsock.c standard bsd/net/slcompress.c optional ppp bsd/net/slcompress.c optional sl bsd/net/zlib.c optional ppp_deflate -bsd/net/netisr.c standard +#bsd/net/netisr.c standard bsd/net/zlib.c optional ipsec bsd/net/if_dummy.c optional dummy bsd/net/if_gif.c optional gif @@ -215,20 +230,25 @@ bsd/net/if_stf.c optional stf bsd/net/if_faith.c optional faith bsd/net/net_osdep.c optional ipsec bsd/net/net_osdep.c optional inet6 +bsd/net/kpi_interface.c standard +bsd/net/kpi_protocol.c standard +bsd/net/kpi_interfacefilter.c standard bsd/netinet/if_atm.c optional atm -bsd/netinet/if_ether.c optional ether bsd/netinet/igmp.c standard bsd/netinet/in.c standard bsd/netinet/in_bootp.c standard bsd/netinet/dhcp_options.c standard +bsd/netinet/in_arp.c standard bsd/netinet/in_pcb.c standard bsd/netinet/in_proto.c standard bsd/netinet/in_rmx.c standard bsd/netinet/ip_divert.c optional ipdivert bsd/netinet/ip_dummynet.c optional dummynet bsd/netinet/ip_flow.c standard +bsd/netinet/ip_fw2.c optional ipfw2 +bsd/netinet/ip_fw2_compat.c optional ipfw2 bsd/netinet/ip_icmp.c standard bsd/netinet/ip_id.c optional randomipid bsd/netinet/ip_input.c standard @@ -246,6 +266,7 @@ bsd/netinet/in_gif.c optional gif inet bsd/netinet/ip_ecn.c optional inet inet6 bsd/netinet/ip_ecn.c optional inet ipsec bsd/netinet/ip_encap.c optional inet +bsd/netinet/kpi_ipfilter.c standard bsd/netinet6/ah_core.c optional ipsec bsd/netinet6/ah_input.c optional ipsec bsd/netinet6/ah_output.c optional ipsec @@ -260,10 +281,11 @@ bsd/netinet6/icmp6.c optional inet6 bsd/netinet6/in6.c optional inet6 bsd/netinet6/in6_cksum.c optional inet6 bsd/netinet6/in6_gif.c optional gif inet6 +bsd/netinet6/ip6_fw.c optional inet6 bsd/netinet6/ip6_forward.c optional inet6 bsd/netinet6/in6_ifattach.c optional inet6 bsd/netinet6/ip6_input.c optional inet6 -bsd/netinet6/ip6_mroute.c optional inet6 +bsd/netinet6/ip6_mroute.c optional inet6 bsd/netinet6/ip6_output.c optional inet6 bsd/netinet6/in6_src.c optional inet6 bsd/netinet6/ipcomp_core.c optional ipsec @@ -299,8 +321,9 @@ bsd/crypto/des/des_setkey.c optional crypto bsd/crypto/blowfish/bf_enc.c optional crypto bsd/crypto/blowfish/bf_skey.c optional crypto bsd/crypto/cast128/cast128.c optional crypto -bsd/crypto/rijndael/rijndael-alg-fst.c optional crypto -bsd/crypto/rijndael/rijndael-api-fst.c optional crypto +bsd/crypto/aes/aescrypt.c optional crypto +bsd/crypto/aes/aeskey.c optional crypto +bsd/crypto/aes/aestab.c optional crypto bsd/crypto/rc4/rc4.c optional crypto #bsd/netpm/pm_aTT.c optional pm @@ -378,7 +401,6 @@ bsd/nfs/krpc_subr.c optional nfsclient bsd/nfs/nfs_bio.c optional nfsclient bsd/nfs/nfs_boot.c optional nfsclient bsd/nfs/nfs_node.c optional nfsclient -bsd/nfs/nfs_nqlease.c optional nfsclient nfsserver bsd/nfs/nfs_serv.c optional nfsserver bsd/nfs/nfs_socket.c optional nfsclient nfsserver bsd/nfs/nfs_srvcache.c optional nfsserver @@ -404,7 +426,6 @@ bsd/ufs/ufs/ufs_bmap.c standard bsd/ufs/ufs/ufs_byte_order.c optional rev_endian_fs bsd/ufs/ufs/ufs_ihash.c standard bsd/ufs/ufs/ufs_inode.c standard -bsd/ufs/ufs/ufs_lockf.c standard bsd/ufs/ufs/ufs_lookup.c standard bsd/ufs/ufs/ufs_quota.c optional quota bsd/ufs/ufs/ufs_vfsops.c standard @@ -420,7 +441,6 @@ bsd/hfs/hfs_encodings.c optional hfs bsd/hfs/hfs_endian.c optional hfs bsd/hfs/hfs_hotfiles.c optional hfs bsd/hfs/hfs_link.c optional hfs -bsd/hfs/hfs_lockf.c optional hfs bsd/hfs/hfs_lookup.c optional hfs bsd/hfs/hfs_notification.c optional hfs bsd/hfs/hfs_quota.c optional quota @@ -429,6 +449,7 @@ bsd/hfs/hfs_search.c optional hfs bsd/hfs/hfs_vfsops.c optional hfs bsd/hfs/hfs_vfsutils.c optional hfs bsd/hfs/hfs_vnops.c optional hfs +bsd/hfs/hfs_xattr.c optional hfs bsd/hfs/MacOSStubs.c optional hfs bsd/hfs/rangelist.c optional hfs bsd/hfs/hfscommon/BTree/BTree.c optional hfs @@ -438,8 +459,6 @@ bsd/hfs/hfscommon/BTree/BTreeNodeOps.c optional hfs bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c optional hfs bsd/hfs/hfscommon/BTree/BTreeScanner.c optional hfs bsd/hfs/hfscommon/BTree/BTreeTreeOps.c optional hfs -bsd/hfs/hfscommon/Catalog/Catalog.c optional hfs -bsd/hfs/hfscommon/Catalog/CatalogIterators.c optional hfs bsd/hfs/hfscommon/Catalog/CatalogUtilities.c optional hfs bsd/hfs/hfscommon/Catalog/FileIDsServices.c optional hfs bsd/hfs/hfscommon/Misc/BTreeWrapper.c optional hfs @@ -451,19 +470,22 @@ bsd/kern/bsd_init.c standard bsd/kern/init_sysent.c standard bsd/kern/kdebug.c standard bsd/kern/kern_acct.c standard -bsd/kern/kern_aio.c standard +bsd/kern/kern_aio.c standard bsd/kern/kern_audit.c standard +bsd/kern/kern_authorization.c standard bsd/kern/kern_bsm_token.c standard bsd/kern/kern_bsm_audit.c standard bsd/kern/kern_bsm_klib.c standard bsd/kern/kern_clock.c standard bsd/kern/kern_core.c standard +bsd/kern/kern_credential.c standard bsd/kern/kern_symfile.c standard bsd/kern/kern_descrip.c standard bsd/kern/kern_event.c standard bsd/kern/kern_control.c standard bsd/kern/kern_exec.c standard bsd/kern/kern_exit.c standard +bsd/kern/kern_lockf.c standard bsd/kern/kern_fork.c standard bsd/kern/kern_ktrace.c standard bsd/kern/kern_lock.c optional cpus @@ -481,6 +503,7 @@ bsd/kern/kern_synch.c standard bsd/kern/kern_sysctl.c standard bsd/kern/kern_newsysctl.c standard bsd/kern/kern_mib.c standard +bsd/kern/kpi_mbuf.c standard bsd/kern/sysctl_init.c standard bsd/kern/kern_time.c standard bsd/kern/kern_xxx.c standard @@ -492,11 +515,12 @@ bsd/kern/subr_prf.c standard bsd/kern/subr_prof.c standard bsd/kern/subr_xxx.c standard bsd/kern/sys_generic.c standard +bsd/kern/sys_pipe.c standard bsd/kern/sys_socket.c standard bsd/kern/sys_domain.c standard bsd/kern/syscalls.c standard bsd/kern/tty.c standard -bsd/kern/tty_compat.c optional compat_43 +bsd/kern/tty_compat.c optional compat_43_tty bsd/kern/tty_conf.c standard bsd/kern/tty_pty.c optional pty bsd/kern/tty_subr.c standard @@ -513,6 +537,7 @@ bsd/kern/uipc_usrreq.c standard bsd/kern/sysv_ipc.c standard bsd/kern/sysv_shm.c standard bsd/kern/sysv_sem.c standard +bsd/kern/sysv_msg.c standard bsd/kern/mach_fat.c standard bsd/kern/mach_header.c standard bsd/kern/mach_loader.c standard @@ -520,6 +545,8 @@ bsd/kern/posix_sem.c standard bsd/kern/posix_shm.c standard # XXXdbg - I need this in the journaling and block cache code bsd/kern/qsort.c standard +bsd/kern/kpi_socket.c standard +bsd/kern/kpi_socketfilter.c standard bsd/vm/vnode_pager.c standard bsd/vm/vm_unix.c standard diff --git a/bsd/conf/files.i386 b/bsd/conf/files.i386 index 73da06e97..cc998565b 100644 --- a/bsd/conf/files.i386 +++ b/bsd/conf/files.i386 @@ -13,7 +13,6 @@ bsd/dev/i386/stubs.c standard bsd/dev/i386/lock_stubs.c standard bsd/dev/i386/sysctl.c standard bsd/dev/i386/unix_signal.c standard -bsd/dev/i386/unix_startup.c standard bsd/kern/bsd_stubs.c standard diff --git a/bsd/conf/files.ppc b/bsd/conf/files.ppc index d1b636110..36adc9a65 100644 --- a/bsd/conf/files.ppc +++ b/bsd/conf/files.ppc @@ -7,7 +7,6 @@ bsd/netinet/in_cksum.c standard bsd/dev/ppc/conf.c standard bsd/dev/ppc/cons.c standard bsd/dev/ppc/mem.c standard -bsd/dev/ppc/unix_startup.c standard bsd/dev/ppc/unix_signal.c standard bsd/dev/ppc/ffs.s standard bsd/dev/ppc/memmove.c standard @@ -18,6 +17,7 @@ bsd/dev/ppc/systemcalls.c standard bsd/dev/ppc/km.c standard bsd/dev/ppc/xsumas.s standard bsd/dev/ppc/sysctl.c standard +bsd/dev/ppc/munge.s standard bsd/dev/ppc/chud/chud_bsd_callback.c standard bsd/dev/ppc/chud/chud_process.c standard diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 5016d2029..0aede52e9 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -63,8 +63,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include struct timezone tz = { TIMEZONE, PST }; diff --git a/bsd/conf/tools/Makefile b/bsd/conf/tools/Makefile index 9df86ce8c..4f9ccd553 100644 --- a/bsd/conf/tools/Makefile +++ b/bsd/conf/tools/Makefile @@ -7,13 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - newvers +SETUP_SUBDIRS = doconf -COMP_SUBDIRS = \ - doconf \ - newvers +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/bsd/conf/tools/newvers/Makefile b/bsd/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/bsd/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/bsd/conf/tools/newvers/newvers.csh b/bsd/conf/tools/newvers/newvers.csh deleted file mode 100644 index 75324d3bc..000000000 --- a/bsd/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"BSD Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"BSD\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/bsd/conf/version.major b/bsd/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/bsd/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/bsd/conf/version.minor b/bsd/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/bsd/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/bsd/conf/version.variant b/bsd/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/bsd/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/bsd/crypto/Makefile b/bsd/crypto/Makefile index 4e0880559..e878376c2 100644 --- a/bsd/crypto/Makefile +++ b/bsd/crypto/Makefile @@ -12,7 +12,7 @@ INSTINC_SUBDIRS = \ cast128 \ des \ rc4 \ - rijndael \ + aes \ sha2 @@ -33,7 +33,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/rijndael/Makefile b/bsd/crypto/aes/Makefile similarity index 81% rename from bsd/crypto/rijndael/Makefile rename to bsd/crypto/aes/Makefile index 92d360eb6..9a6c0e847 100644 --- a/bsd/crypto/rijndael/Makefile +++ b/bsd/crypto/aes/Makefile @@ -20,13 +20,17 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ PRIVATE_DATAFILES = \ - rijndael-alg-fst.h rijndael-api-fst.h rijndael.h + aes.h aesopt.h aestab.h INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +EXPORT_MI_LIST = aes.h + +INSTALL_KF_MI_LIST = + +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/aes/aes.h b/bsd/crypto/aes/aes.h new file mode 100644 index 000000000..d2dd335c3 --- /dev/null +++ b/bsd/crypto/aes/aes.h @@ -0,0 +1,175 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + This file contains the definitions required to use AES in C. See aesopt.h + for optimisation details. +*/ + +#if !defined( _AES_H ) +#define _AES_H + +/* This include is used to find 8 & 32 bit unsigned integer types */ +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define AES_128 /* define if AES with 128 bit keys is needed */ +#define AES_192 /* define if AES with 192 bit keys is needed */ +#define AES_256 /* define if AES with 256 bit keys is needed */ +#define AES_VAR /* define if a variable key size is needed */ + +/* The following must also be set in assembler files if being used */ + +#define AES_ENCRYPT /* if support for encryption is needed */ +#define AES_DECRYPT /* if support for decryption is needed */ +//#define AES_ERR_CHK /* for parameter checks & error return codes */ + +#if UCHAR_MAX == 0xff /* an unsigned 8 bit type */ + typedef unsigned char aes_08t; +#else +# error Please define aes_08t as an 8-bit unsigned integer type in aes.h +#endif + +#if UINT_MAX == 4294967295 /* an unsigned 32 bit type */ + typedef unsigned int aes_32t; +#elif ULONG_MAX == 4294967295ul + typedef unsigned long aes_32t; +#else +# error Please define aes_32t as a 32-bit unsigned integer type in aes.h +#endif + +#define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ +#define N_COLS 4 /* the number of columns in the state */ + +/* The key schedule length is 11, 13 or 15 16-byte blocks for 128, */ +/* 192 or 256-bit keys respectively. That is 176, 208 or 240 bytes */ +/* or 44, 52 or 60 32-bit words. For simplicity this code allocates */ +/* the maximum 60 word array for the key schedule for all key sizes */ + +#if defined( AES_VAR ) || defined( AES_256 ) +#define KS_LENGTH 60 +#elif defined( AES_192 ) +#define KS_LENGTH 52 +#else +#define KS_LENGTH 44 +#endif + +#if defined( AES_ERR_CHK ) +#define aes_ret int +#define aes_good 0 +#define aes_error -1 +#else +#define aes_ret void +#endif + +#if !defined( AES_DLL ) /* implement normal/DLL functions */ +#define aes_rval aes_ret +#else +#define aes_rval aes_ret __declspec(dllexport) _stdcall +#endif + +typedef struct +{ aes_32t ks[KS_LENGTH]; + aes_32t rn; +} aes_encrypt_ctx; + +typedef struct +{ aes_32t ks[KS_LENGTH]; + aes_32t rn; +} aes_decrypt_ctx; + +typedef struct +{ + aes_decrypt_ctx decrypt; + aes_encrypt_ctx encrypt; +} aes_ctx; + + +/* This routine must be called before first use if non-static */ +/* tables are being used */ + +void gen_tabs(void); + +/* The key length (klen) is input in bytes when it is in the range */ +/* 16 <= klen <= 32 or in bits when in the range 128 <= klen <= 256 */ + +#if defined( AES_ENCRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +aes_rval aes_encrypt_key128(const unsigned char *in_key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +aes_rval aes_encrypt_key192(const unsigned char *in_key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +aes_rval aes_encrypt_key256(const unsigned char *in_key, aes_encrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +aes_rval aes_encrypt_key(const unsigned char *in_key, int key_len, aes_encrypt_ctx cx[1]); +#endif + +aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out_blk, const aes_encrypt_ctx cx[1]); +#endif + +#if defined( AES_DECRYPT ) + +#if defined(AES_128) || defined(AES_VAR) +aes_rval aes_decrypt_key128(const unsigned char *in_key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_192) || defined(AES_VAR) +aes_rval aes_decrypt_key192(const unsigned char *in_key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_256) || defined(AES_VAR) +aes_rval aes_decrypt_key256(const unsigned char *in_key, aes_decrypt_ctx cx[1]); +#endif + +#if defined(AES_VAR) +aes_rval aes_decrypt_key(const unsigned char *in_key, int key_len, aes_decrypt_ctx cx[1]); +#endif + +aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out_blk, const aes_decrypt_ctx cx[1]); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/bsd/crypto/aes/aescrypt.c b/bsd/crypto/aes/aescrypt.c new file mode 100644 index 000000000..141cd3fbf --- /dev/null +++ b/bsd/crypto/aes/aescrypt.c @@ -0,0 +1,407 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + This file contains the code for implementing encryption and decryption + for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It + can optionally be replaced by code written in assembler using NASM. For + further details see the file aesopt.h +*/ + +#include "aesopt.h" +#include "aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c]) +#define xo(y,x,c) (s(y,c) ^= s(x, c)) +#define si(y,x,c) (s(y,c) = word_in(x, c)) +#define so(y,x,c) word_out(y, c, s(x,c)) + +#if defined(ARRAYS) +#define locals(y,x) x[4],y[4] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 +#endif + +#define dtables(tab) const aes_32t *tab##0, *tab##1, *tab##2, *tab##3 +#define itables(tab) tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3] + +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); + +#define key_in(y,x,k) ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3) +#define cbc(y,x) xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3) +#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +#if defined(ENCRYPTION) && !defined(AES_ASM) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined(_MSC_VER) +#pragma optimize( "s", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#if defined(FT4_SET) +#undef dec_fmvars +# if defined(ENC_ROUND_CACHE_TABLES) +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c)) +# else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c)) +# endif +#elif defined(FT1_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c)) +#else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c))) +#endif + +#if defined(FL4_SET) +# if defined(LAST_ENC_ROUND_CACHE_TABLES) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c)) +# else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c)) +# endif +#elif defined(FL1_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c)) +#else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c)) +#endif + +aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out, const aes_encrypt_ctx cx[1]) +{ aes_32t locals(b0, b1); + const aes_32t *kp = cx->ks; +#if defined(ENC_ROUND_CACHE_TABLES) + dtables(t_fn); +#endif +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + dtables(t_fl); +#endif + +#if defined( dec_fmvars ) + dec_fmvars; /* declare variables for fwd_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) + return aes_error; +#endif + + // Load IV into b0. + state_in(b0, in_iv); + + for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk) + { +#if 0 + // Read the plaintext into b1 + state_in(b1, in); + // Do the CBC with b0 which is either the iv or the ciphertext of the previous block. + cbc(b1, b0); + + // Xor b1 with the key schedule to get things started. + key_in(b0, b1, kp); +#else + // Since xor is associative we mess with the ordering here to get the loads started early + key_in(b1, b0, kp); // Xor b0(IV) with the key schedule and assign to b1 + state_in(b0, in); // Load block into b0 + cbc(b0, b1); // Xor b0 with b1 and store in b0 +#endif + +#if defined(ENC_ROUND_CACHE_TABLES) + itables(t_fn); +#endif + +#if (ENC_UNROLL == FULL) + + switch(cx->rn) + { + case 14: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10: + default: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + itables(t_fl); +#endif + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); + } + +#else + + { aes_32t rnd; +#if (ENC_UNROLL == PARTIAL) + for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + kp += N_COLS; + round(fwd_rnd, b0, b1, kp); + } + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); +#else + for(rnd = 0; rnd < cx->rn - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + itables(t_fl); +#endif + kp += N_COLS; + round(fwd_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + } + +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(DECRYPTION) && !defined(AES_ASM) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined(_MSC_VER) +#pragma optimize( "t", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) + +#if defined(IT4_SET) +#undef dec_imvars +# if defined(DEC_ROUND_CACHE_TABLES) +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c)) +# else +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c)) +# endif +#elif defined(IT1_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c)) +#else +#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))) +#endif + +#if defined(IL4_SET) +# if defined(LAST_DEC_ROUND_CACHE_TABLES) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c)) +# else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c)) +# endif +#elif defined(IL1_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c)) +#else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)) +#endif + +aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out, const aes_decrypt_ctx cx[1]) +{ aes_32t locals(b0, b1); + const aes_32t *kp = cx->ks + cx->rn * N_COLS; +#if defined(DEC_ROUND_CACHE_TABLES) + dtables(t_in); +#endif +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + dtables(t_il); +#endif + +#if defined( dec_imvars ) + dec_imvars; /* declare variables for inv_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) + return aes_error; +#endif + +#if defined(DEC_ROUND_CACHE_TABLES) + itables(t_in); +#endif + + in += AES_BLOCK_SIZE * (num_blk - 1); + out += AES_BLOCK_SIZE * (num_blk - 1); + // Load the last block's ciphertext into b1 + state_in(b1, in); + + for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk) + { + // Do the xor part of state_in, where b1 is the previous block's ciphertext. + key_in(b0, b1, kp); + +#if (DEC_UNROLL == FULL) + + switch(cx->rn) + { + case 14: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + kp -= 2 * N_COLS; + case 12: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + kp -= 2 * N_COLS; + case 10: + default: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + round(inv_rnd, b1, b0, kp - 3 * N_COLS); + round(inv_rnd, b0, b1, kp - 4 * N_COLS); + round(inv_rnd, b1, b0, kp - 5 * N_COLS); + round(inv_rnd, b0, b1, kp - 6 * N_COLS); + round(inv_rnd, b1, b0, kp - 7 * N_COLS); + round(inv_rnd, b0, b1, kp - 8 * N_COLS); + round(inv_rnd, b1, b0, kp - 9 * N_COLS); +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + itables(t_il); +#endif + round(inv_lrnd, b0, b1, kp - 10 * N_COLS); + } + +#else + + { aes_32t rnd; +#if (DEC_UNROLL == PARTIAL) + for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) + { + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); + kp -= N_COLS; + round(inv_rnd, b0, b1, kp); + } + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); +#else + for(rnd = 0; rnd < cx->rn - 1; ++rnd) + { + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + itables(t_il); +#endif + kp -= N_COLS; + round(inv_lrnd, b0, b1, kp); + } +#endif + + if (num_blk == 1) + { + // We are doing the first block so we need the IV rather than the previous + // block for CBC (there is no previous block) + state_in(b1, in_iv); + } + else + { + in -= AES_BLOCK_SIZE; + state_in(b1, in); + } + + // Do the CBC with b1 which is either the IV or the ciphertext of the previous block. + cbc(b0, b1); + + state_out(out, b0); + } +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/bsd/crypto/aes/aeskey.c b/bsd/crypto/aes/aeskey.c new file mode 100644 index 000000000..0120e0c7d --- /dev/null +++ b/bsd/crypto/aes/aeskey.c @@ -0,0 +1,455 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 26/08/2003 + + This file contains the code for implementing the key schedule for AES + (Rijndael) for block and key sizes of 16, 24, and 32 bytes. See aesopt.h + for further details including optimisation. +*/ + +#include "aesopt.h" +#include "aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Initialise the key schedule from the user supplied key. The key + length can be specified in bytes, with legal values of 16, 24 + and 32, or in bits, with legal values of 128, 192 and 256. These + values correspond with Nk values of 4, 6 and 8 respectively. + + The following macros implement a single cycle in the key + schedule generation process. The number of cycles needed + for each cx->n_col and nk value is: + + nk = 4 5 6 7 8 + ------------------------------ + cx->n_col = 4 10 9 8 7 7 + cx->n_col = 5 14 11 10 9 9 + cx->n_col = 6 19 15 12 11 11 + cx->n_col = 7 21 19 16 13 14 + cx->n_col = 8 29 23 19 17 14 +*/ + +#define ke4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} +#define kel4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \ + k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \ +} +#define kel6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \ +} +#define kel8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#if defined(ENCRYPTION_KEY_SCHEDULE) + +#if defined(AES_128) || defined(AES_VAR) + +aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[4]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < ((11 * N_COLS - 5) / 4); ++i) + ke4(cx->ks, i); + } +#else + ke4(cx->ks, 0); ke4(cx->ks, 1); + ke4(cx->ks, 2); ke4(cx->ks, 3); + ke4(cx->ks, 4); ke4(cx->ks, 5); + ke4(cx->ks, 6); ke4(cx->ks, 7); + ke4(cx->ks, 8); +#endif + kel4(cx->ks, 9); + cx->rn = 10; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[6]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (13 * N_COLS - 7) / 6; ++i) + ke6(cx->ks, i); + } +#else + ke6(cx->ks, 0); ke6(cx->ks, 1); + ke6(cx->ks, 2); ke6(cx->ks, 3); + ke6(cx->ks, 4); ke6(cx->ks, 5); + ke6(cx->ks, 6); +#endif + kel6(cx->ks, 7); + cx->rn = 12; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[8]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (15 * N_COLS - 9) / 8; ++i) + ke8(cx->ks, i); + } +#else + ke8(cx->ks, 0); ke8(cx->ks, 1); + ke8(cx->ks, 2); ke8(cx->ks, 3); + ke8(cx->ks, 4); ke8(cx->ks, 5); +#endif + kel8(cx->ks, 6); + cx->rn = 14; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_VAR) + +aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_encrypt_key128(key, cx); + case 24: case 192: return aes_encrypt_key192(key, cx); + case 32: case 256: return aes_encrypt_key256(key, cx); + default: return aes_error; +#else + case 16: case 128: aes_encrypt_key128(key, cx); return; + case 24: case 192: aes_encrypt_key192(key, cx); return; + case 32: case 256: aes_encrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(DECRYPTION_KEY_SCHEDULE) + +#if DEC_ROUND == NO_TABLES +#define ff(x) (x) +#else +#define ff(x) inv_mcol(x) +#if defined( dec_imvars ) +#define d_vars dec_imvars +#endif +#endif + +#if 1 +#define kdf4(k,i) \ +{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[4*(i)]; k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \ +} +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ + k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ +} +#define kdl4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \ + k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \ +} +#else +#define kdf4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \ +} +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \ + ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \ + ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \ +} +#define kdl4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \ +} +#endif + +#define kdf6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \ + ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \ +} +#define kd6(k,i) \ +{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ + ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ + ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ + ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ + ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ +} +#define kdl6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \ +} + +#define kdf8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \ + ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \ +} +#define kd8(k,i) \ +{ aes_32t g = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \ + ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \ + ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \ + ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \ + g = ls_box(ss[3],0); \ + ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \ + ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \ + ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \ + ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \ +} +#define kdl8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \ + ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \ +} + +#if defined(AES_128) || defined(AES_VAR) + +aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[5]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (11 * N_COLS - 5) / 4; ++i) + ke4(cx->ks, i); + kel4(cx->ks, 9); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 10 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + kdf4(cx->ks, 0); kd4(cx->ks, 1); + kd4(cx->ks, 2); kd4(cx->ks, 3); + kd4(cx->ks, 4); kd4(cx->ks, 5); + kd4(cx->ks, 6); kd4(cx->ks, 7); + kd4(cx->ks, 8); kdl4(cx->ks, 9); +#endif + cx->rn = 10; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[7]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + { aes_32t i; + + for(i = 0; i < (13 * N_COLS - 7) / 6; ++i) + ke6(cx->ks, i); + kel6(cx->ks, 7); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 12 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + cx->ks[4] = ff(ss[4] = word_in(key, 4)); + cx->ks[5] = ff(ss[5] = word_in(key, 5)); + kdf6(cx->ks, 0); kd6(cx->ks, 1); + kd6(cx->ks, 2); kd6(cx->ks, 3); + kd6(cx->ks, 4); kd6(cx->ks, 5); + kd6(cx->ks, 6); kdl6(cx->ks, 7); +#endif + cx->rn = 12; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[8]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + { aes_32t i; + + for(i = 0; i < (15 * N_COLS - 9) / 8; ++i) + ke8(cx->ks, i); + kel8(cx->ks, i); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 14 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); + +#endif + } +#else + cx->ks[4] = ff(ss[4] = word_in(key, 4)); + cx->ks[5] = ff(ss[5] = word_in(key, 5)); + cx->ks[6] = ff(ss[6] = word_in(key, 6)); + cx->ks[7] = ff(ss[7] = word_in(key, 7)); + kdf8(cx->ks, 0); kd8(cx->ks, 1); + kd8(cx->ks, 2); kd8(cx->ks, 3); + kd8(cx->ks, 4); kd8(cx->ks, 5); + kdl8(cx->ks, 6); +#endif + cx->rn = 14; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_VAR) + +aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_decrypt_key128(key, cx); + case 24: case 192: return aes_decrypt_key192(key, cx); + case 32: case 256: return aes_decrypt_key256(key, cx); + default: return aes_error; +#else + case 16: case 128: aes_decrypt_key128(key, cx); return; + case 24: case 192: aes_decrypt_key192(key, cx); return; + case 32: case 256: aes_decrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/bsd/crypto/aes/aesopt.h b/bsd/crypto/aes/aesopt.h new file mode 100644 index 000000000..7b2ea04f0 --- /dev/null +++ b/bsd/crypto/aes/aesopt.h @@ -0,0 +1,753 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + My thanks go to Dag Arne Osvik for devising the schemes used here for key + length derivation from the form of the key schedule + + This file contains the compilation options for AES (Rijndael) and code + that is common across encryption, key scheduling and table generation. + + OPERATION + + These source code files implement the AES algorithm Rijndael designed by + Joan Daemen and Vincent Rijmen. This version is designed for the standard + block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 + and 32 bytes). + + This version is designed for flexibility and speed using operations on + 32-bit words rather than operations on bytes. It can be compiled with + either big or little endian internal byte order but is faster when the + native byte order for the processor is used. + + THE CIPHER INTERFACE + + The cipher interface is implemented as an array of bytes in which lower + AES bit sequence indexes map to higher numeric significance within bytes. + + aes_08t (an unsigned 8-bit type) + aes_32t (an unsigned 32-bit type) + struct aes_encrypt_ctx (structure for the cipher encryption context) + struct aes_decrypt_ctx (structure for the cipher decryption context) + aes_rval the function return type + + C subroutine calls: + + aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, + const aes_encrypt_ctx cx[1]); + + aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, + const aes_decrypt_ctx cx[1]); + + IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that + you call genTabs() before AES is used so that the tables are initialised. + + C++ aes class subroutines: + + Class AESencrypt for encryption + + Construtors: + AESencrypt(void) + AESencrypt(const unsigned char *key) - 128 bit key + Members: + aes_rval key128(const unsigned char *key) + aes_rval key192(const unsigned char *key) + aes_rval key256(const unsigned char *key) + aes_rval encrypt(const unsigned char *in, unsigned char *out) const + + Class AESdecrypt for encryption + Construtors: + AESdecrypt(void) + AESdecrypt(const unsigned char *key) - 128 bit key + Members: + aes_rval key128(const unsigned char *key) + aes_rval key192(const unsigned char *key) + aes_rval key256(const unsigned char *key) + aes_rval decrypt(const unsigned char *in, unsigned char *out) const + + COMPILATION + + The files used to provide AES (Rijndael) are + + a. aes.h for the definitions needed for use in C. + b. aescpp.h for the definitions needed for use in C++. + c. aesopt.h for setting compilation options (also includes common code). + d. aescrypt.c for encryption and decrytpion, or + e. aeskey.c for key scheduling. + f. aestab.c for table loading or generation. + g. aescrypt.asm for encryption and decryption using assembler code. + h. aescrypt.mmx.asm for encryption and decryption using MMX assembler. + + To compile AES (Rijndael) for use in C code use aes.h and set the + defines here for the facilities you need (key lengths, encryption + and/or decryption). Do not define AES_DLL or AES_CPP. Set the options + for optimisations and table sizes here. + + To compile AES (Rijndael) for use in in C++ code use aescpp.h but do + not define AES_DLL + + To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use + aes.h and include the AES_DLL define. + + CONFIGURATION OPTIONS (here and in aes.h) + + a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL + b. You may need to set PLATFORM_BYTE_ORDER to define the byte order. + c. If you want the code to run in a specific internal byte order, then + ALGORITHM_BYTE_ORDER must be set accordingly. + d. set other configuration options decribed below. +*/ + +#if !defined( _AESOPT_H ) +#define _AESOPT_H + +#include "aes.h" + +/* CONFIGURATION - USE OF DEFINES + + Later in this section there are a number of defines that control the + operation of the code. In each section, the purpose of each define is + explained so that the relevant form can be included or excluded by + setting either 1's or 0's respectively on the branches of the related + #if clauses. + + PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS + + To obtain the highest speed on processors with 32-bit words, this code + needs to determine the byte order of the target machine. The following + block of code is an attempt to capture the most obvious ways in which + various environemnts define byte order. It may well fail, in which case + the definitions will need to be set by editing at the points marked + **** EDIT HERE IF NECESSARY **** below. My thanks go to Peter Gutmann + for his assistance with this endian detection nightmare. +*/ + +#define BRG_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ +#define BRG_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ + +#if defined(__GNUC__) || defined(__GNU_LIBRARY__) +# if defined(__FreeBSD__) || defined(__OpenBSD__) +# include +# elif defined( BSD ) && BSD >= 199103 +# include +# elif defined(__APPLE__) +# if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN ) +# define BIG_ENDIAN +# elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN ) +# define LITTLE_ENDIAN +# endif +# else +# include +# if defined(__BEOS__) +# include +# endif +# endif +#endif + +#if !defined(PLATFORM_BYTE_ORDER) +# if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN) +# if defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN) +# if defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__) +# if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# endif +#endif + +/* if the platform is still unknown, try to find its byte order */ +/* from commonly used machine defines */ + +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ ) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN + +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +#else +# error Please edit aesopt.h (line 234 or 236) to set the platform byte order +#endif + +#endif + +/* SOME LOCAL DEFINITIONS */ + +#define NO_TABLES 0 +#define ONE_TABLE 1 +#define FOUR_TABLES 4 +#define NONE 0 +#define PARTIAL 1 +#define FULL 2 + +#if defined(bswap32) +#define aes_sw32 bswap32 +#elif defined(bswap_32) +#define aes_sw32 bswap_32 +#else +#define brot(x,n) (((aes_32t)(x) << n) | ((aes_32t)(x) >> (32 - n))) +#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) +#endif + +/* 1. FUNCTIONS REQUIRED + + This implementation provides subroutines for encryption, decryption + and for setting the three key lengths (separately) for encryption + and decryption. When the assembler code is not being used the following + definition blocks allow the selection of the routines that are to be + included in the compilation. +*/ +#if defined( AES_ENCRYPT ) +#define ENCRYPTION +#define ENCRYPTION_KEY_SCHEDULE +#endif + +#if defined( AES_DECRYPT ) +#define DECRYPTION +#define DECRYPTION_KEY_SCHEDULE +#endif + +/* 2. ASSEMBLER SUPPORT + + This define (which can be on the command line) enables the use of the + assembler code routines for encryption and decryption with the C code + only providing key scheduling +*/ +#if 0 && !defined(AES_ASM) +#define AES_ASM +#endif + +/* 3. BYTE ORDER WITHIN 32 BIT WORDS + + The fundamental data processing units in Rijndael are 8-bit bytes. The + input, output and key input are all enumerated arrays of bytes in which + bytes are numbered starting at zero and increasing to one less than the + number of bytes in the array in question. This enumeration is only used + for naming bytes and does not imply any adjacency or order relationship + from one byte to another. When these inputs and outputs are considered + as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to + byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. + In this implementation bits are numbered from 0 to 7 starting at the + numerically least significant end of each byte (bit n represents 2^n). + + However, Rijndael can be implemented more efficiently using 32-bit + words by packing bytes into words so that bytes 4*n to 4*n+3 are placed + into word[n]. While in principle these bytes can be assembled into words + in any positions, this implementation only supports the two formats in + which bytes in adjacent positions within words also have adjacent byte + numbers. This order is called big-endian if the lowest numbered bytes + in words have the highest numeric significance and little-endian if the + opposite applies. + + This code can work in either order irrespective of the order used by the + machine on which it runs. Normally the internal byte order will be set + to the order of the processor on which the code is to be run but this + define can be used to reverse this in special situations + + NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set +*/ +#if 1 || defined(AES_ASM) +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#elif 0 +#define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN +#elif 0 +#define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN +#else +#error The algorithm byte order is not defined +#endif + +/* 4. FAST INPUT/OUTPUT OPERATIONS. + + On some machines it is possible to improve speed by transferring the + bytes in the input and output arrays to and from the internal 32-bit + variables by addressing these arrays as if they are arrays of 32-bit + words. On some machines this will always be possible but there may + be a large performance penalty if the byte arrays are not aligned on + the normal word boundaries. On other machines this technique will + lead to memory access errors when such 32-bit word accesses are not + properly aligned. The option SAFE_IO avoids such problems but will + often be slower on those machines that support misaligned access + (especially so if care is taken to align the input and output byte + arrays on 32-bit word boundaries). If SAFE_IO is not defined it is + assumed that access to byte arrays as if they are arrays of 32-bit + words will not cause problems when such accesses are misaligned. +*/ +#if 0 && !defined(_MSC_VER) +#define SAFE_IO +#endif + +/* 5. LOOP UNROLLING + + The code for encryption and decrytpion cycles through a number of rounds + that can be implemented either in a loop or by expanding the code into a + long sequence of instructions, the latter producing a larger program but + one that will often be much faster. The latter is called loop unrolling. + There are also potential speed advantages in expanding two iterations in + a loop with half the number of iterations, which is called partial loop + unrolling. The following options allow partial or full loop unrolling + to be set independently for encryption and decryption +*/ +#if 1 +#define ENC_UNROLL FULL +#elif 0 +#define ENC_UNROLL PARTIAL +#else +#define ENC_UNROLL NONE +#endif + +#if 1 +#define DEC_UNROLL FULL +#elif 0 +#define DEC_UNROLL PARTIAL +#else +#define DEC_UNROLL NONE +#endif + +/* 6. FAST FINITE FIELD OPERATIONS + + If this section is included, tables are used to provide faster finite + field arithmetic (this has no effect if FIXED_TABLES is defined). +*/ +#if 1 +#define FF_TABLES +#endif + +/* 7. INTERNAL STATE VARIABLE FORMAT + + The internal state of Rijndael is stored in a number of local 32-bit + word varaibles which can be defined either as an array or as individual + names variables. Include this section if you want to store these local + varaibles in arrays. Otherwise individual local variables will be used. +*/ +#if 0 +#define ARRAYS +#endif + +/* In this implementation the columns of the state array are each held in + 32-bit words. The state array can be held in various ways: in an array + of words, in a number of individual word variables or in a number of + processor registers. The following define maps a variable name x and + a column number c to the way the state array variable is to be held. + The first define below maps the state into an array x[c] whereas the + second form maps the state into a number of individual variables x0, + x1, etc. Another form could map individual state colums to machine + register names. +*/ + +#if defined(ARRAYS) +#define s(x,c) x[c] +#else +#define s(x,c) x##c +#endif + +/* 8. FIXED OR DYNAMIC TABLES + + When this section is included the tables used by the code are compiled + statically into the binary file. Otherwise the subroutine gen_tabs() + must be called to compute them before the code is first used. +*/ +#if 1 +#define FIXED_TABLES +#endif + +/* 9. TABLE ALIGNMENT + + On some sytsems speed will be improved by aligning the AES large lookup + tables on particular boundaries. This define should be set to a power of + two giving the desired alignment. It can be left undefined if alignment + is not needed. This option is specific to the Microsft VC++ compiler - + it seems to sometimes cause trouble for the VC++ version 6 compiler. +*/ + +#if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300) +#define TABLE_ALIGN 64 +#endif + +/* 10. INTERNAL TABLE CONFIGURATION + + This cipher proceeds by repeating in a number of cycles known as 'rounds' + which are implemented by a round function which can optionally be speeded + up using tables. The basic tables are each 256 32-bit words, with either + one or four tables being required for each round function depending on + how much speed is required. The encryption and decryption round functions + are different and the last encryption and decrytpion round functions are + different again making four different round functions in all. + + This means that: + 1. Normal encryption and decryption rounds can each use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + 2. The last encryption and decryption rounds can also use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + + Include or exclude the appropriate definitions below to set the number + of tables used by this implementation. +*/ + +#if 1 /* set tables for the normal encryption round */ +#define ENC_ROUND FOUR_TABLES +#elif 0 +#define ENC_ROUND ONE_TABLE +#else +#define ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last encryption round */ +#define LAST_ENC_ROUND FOUR_TABLES +#elif 0 +#define LAST_ENC_ROUND ONE_TABLE +#else +#define LAST_ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the normal decryption round */ +#define DEC_ROUND FOUR_TABLES +#elif 0 +#define DEC_ROUND ONE_TABLE +#else +#define DEC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last decryption round */ +#define LAST_DEC_ROUND FOUR_TABLES +#elif 0 +#define LAST_DEC_ROUND ONE_TABLE +#else +#define LAST_DEC_ROUND NO_TABLES +#endif + +/* The decryption key schedule can be speeded up with tables in the same + way that the round functions can. Include or exclude the following + defines to set this requirement. +*/ +#if 1 +#define KEY_SCHED FOUR_TABLES +#elif 0 +#define KEY_SCHED ONE_TABLE +#else +#define KEY_SCHED NO_TABLES +#endif + +/* 11. TABLE POINTER CACHING + + Normally tables are referenced directly, Enable this option if you wish to + cache pointers to the tables in the encrypt/decrypt code. Note that this + only works if you are using FOUR_TABLES for the ROUND you enable this for. +*/ +#if 1 +#define ENC_ROUND_CACHE_TABLES +#endif +#if 1 +#define LAST_ENC_ROUND_CACHE_TABLES +#endif +#if 1 +#define DEC_ROUND_CACHE_TABLES +#endif +#if 1 +#define LAST_DEC_ROUND_CACHE_TABLES +#endif + + +/* END OF CONFIGURATION OPTIONS */ + +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +/* Disable or report errors on some combinations of options */ + +#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND NO_TABLES +#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND ONE_TABLE +#endif + +#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE +#undef ENC_UNROLL +#define ENC_UNROLL NONE +#endif + +#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND NO_TABLES +#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND ONE_TABLE +#endif + +#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE +#undef DEC_UNROLL +#define DEC_UNROLL NONE +#endif + +/* upr(x,n): rotates bytes within words by n positions, moving bytes to + higher index positions with wrap around into low positions + ups(x,n): moves bytes by n positions to higher index positions in + words but without wrap around + bval(x,n): extracts a byte from a word + + NOTE: The definitions given here are intended only for use with + unsigned variables and with shift counts that are compile + time constants +*/ + +#if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN) +#define upr(x,n) (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n)))) +#define ups(x,n) ((aes_32t) (x) << (8 * (n))) +#define bval(x,n) ((aes_08t)((x) >> (8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0)) +#endif + +#if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN) +#define upr(x,n) (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n)))) +#define ups(x,n) ((aes_32t) (x) >> (8 * (n)))) +#define bval(x,n) ((aes_08t)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3)) +#endif + +#if defined(SAFE_IO) + +#define word_in(x,c) bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \ + ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3]) +#define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \ + ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); } + +#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER) + +#define word_in(x,c) (*((aes_32t*)(x)+(c))) +#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v)) + +#else + +#define word_in(x,c) aes_sw32(*((aes_32t*)(x)+(c))) +#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v)) + +#endif + +/* the finite field modular polynomial and elements */ + +#define WPOLY 0x011b +#define BPOLY 0x1b + +/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ + +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) + +/* The following defines provide alternative definitions of gf_mulx that might + give improved performance if a fast 32-bit multiply is not available. Note + that a temporary variable u needs to be defined where gf_mulx is used. + +#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) +#define m4 (0x01010101 * BPOLY) +#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) +*/ + +/* Work out which tables are needed for the different options */ + +#if defined( AES_ASM ) +#if defined( ENC_ROUND ) +#undef ENC_ROUND +#endif +#define ENC_ROUND FOUR_TABLES +#if defined( LAST_ENC_ROUND ) +#undef LAST_ENC_ROUND +#endif +#define LAST_ENC_ROUND FOUR_TABLES +#if defined( DEC_ROUND ) +#undef DEC_ROUND +#endif +#define DEC_ROUND FOUR_TABLES +#if defined( LAST_DEC_ROUND ) +#undef LAST_DEC_ROUND +#endif +#define LAST_DEC_ROUND FOUR_TABLES +#if defined( KEY_SCHED ) +#undef KEY_SCHED +#define KEY_SCHED FOUR_TABLES +#endif +#endif + +#if defined(ENCRYPTION) || defined(AES_ASM) +#if ENC_ROUND == ONE_TABLE +#define FT1_SET +#elif ENC_ROUND == FOUR_TABLES +#define FT4_SET +#else +#define SBX_SET +#endif +#if LAST_ENC_ROUND == ONE_TABLE +#define FL1_SET +#elif LAST_ENC_ROUND == FOUR_TABLES +#define FL4_SET +#elif !defined(SBX_SET) +#define SBX_SET +#endif +#endif + +#if defined(DECRYPTION) || defined(AES_ASM) +#if DEC_ROUND == ONE_TABLE +#define IT1_SET +#elif DEC_ROUND == FOUR_TABLES +#define IT4_SET +#else +#define ISB_SET +#endif +#if LAST_DEC_ROUND == ONE_TABLE +#define IL1_SET +#elif LAST_DEC_ROUND == FOUR_TABLES +#define IL4_SET +#elif !defined(ISB_SET) +#define ISB_SET +#endif +#endif + +#if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE) +#if KEY_SCHED == ONE_TABLE +#define LS1_SET +#define IM1_SET +#elif KEY_SCHED == FOUR_TABLES +#define LS4_SET +#define IM4_SET +#elif !defined(SBX_SET) +#define SBX_SET +#endif +#endif + +/* generic definitions of Rijndael macros that use tables */ + +#define no_table(x,box,vf,rf,c) bytes2word( \ + box[bval(vf(x,0,c),rf(0,c))], \ + box[bval(vf(x,1,c),rf(1,c))], \ + box[bval(vf(x,2,c),rf(2,c))], \ + box[bval(vf(x,3,c),rf(3,c))]) + +#define one_table(x,op,tab,vf,rf,c) \ + ( tab[bval(vf(x,0,c),rf(0,c))] \ + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define four_cached_tables(x,tab,vf,rf,c) \ +( tab##0[bval(vf(x,0,c),rf(0,c))] \ + ^ tab##1[bval(vf(x,1,c),rf(1,c))] \ + ^ tab##2[bval(vf(x,2,c),rf(2,c))] \ + ^ tab##3[bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((8+r-c)&3) + +/* perform forward and inverse column mix operation on four bytes in long word x in */ +/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ + +#if defined(FM4_SET) /* not currently used */ +#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) +#elif defined(FM1_SET) /* not currently used */ +#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) +#else +#define dec_fmvars aes_32t g2 +#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) +#endif + +#if defined(IM4_SET) +#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) +#elif defined(IM1_SET) +#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) +#else +#define dec_imvars aes_32t g2, g4, g9 +#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ + (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) +#endif + +#if defined(FL4_SET) +#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) +#elif defined(LS4_SET) +#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) +#elif defined(FL1_SET) +#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) +#elif defined(LS1_SET) +#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) +#else +#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) +#endif + +#endif diff --git a/bsd/crypto/aes/aestab.c b/bsd/crypto/aes/aestab.c new file mode 100644 index 000000000..7997f2978 --- /dev/null +++ b/bsd/crypto/aes/aestab.c @@ -0,0 +1,384 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + +*/ + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define DO_TABLES + +#include "aesopt.h" + +#if defined(FIXED_TABLES) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define isb_data(w) {\ + w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ + w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ + w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ + w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ + w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ + w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ + w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ + w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ + w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ + w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ + w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ + w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ + w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ + w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ + w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ + w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ + w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ + w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ + w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ + w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ + w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ + w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ + w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ + w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ + w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ + w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ + w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ + w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ + w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ + w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ + w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ + w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } + +#define mm_data(w) {\ + w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ + w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ + w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ + w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ + w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ + w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ + w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ + w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ + w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ + w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ + w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ + w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ + w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ + w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ + w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ + w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ + w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ + w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ + w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ + w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ + w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ + w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ + w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ + w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ + w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ + w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ + w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ + w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ + w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ + w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ + w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ + w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define h0(x) (x) + +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#endif + +#if defined(FIXED_TABLES) || !defined(FF_TABLES) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#else + +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[ 255 - log[x]] : 0) + +#endif + +#include "aestab.h" + +#if defined(FIXED_TABLES) + +/* implemented in case of wrong call for fixed tables */ + +void gen_tabs(void) +{ +} + +#else /* dynamic table generation */ + +#if !defined(FF_TABLES) + +/* Generate the tables for the dynamic table option + + It will generally be sensible to use tables to compute finite + field multiplies and inverses but where memory is scarse this + code might sometimes be better. But it only has effect during + initialisation so its pretty unimportant in overall terms. +*/ + +/* return 2 ^ (n - 1) where n is the bit number of the highest bit + set in x with x in the range 1 < x < 0x00000200. This form is + used so that locals within fi can be bytes rather than words +*/ + +static aes_08t hibit(const aes_32t x) +{ aes_08t r = (aes_08t)((x >> 1) | (x >> 2)); + + r |= (r >> 2); + r |= (r >> 4); + return (r + 1) >> 1; +} + +/* return the inverse of the finite field element x */ + +static aes_08t fi(const aes_08t x) +{ aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; + + if(x < 2) return x; + + for(;;) + { + if(!n1) return v1; + + while(n2 >= n1) + { + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); + } + + if(!n2) return v2; + + while(n1 >= n2) + { + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); + } + } +} + +#endif + +/* The forward and inverse affine transformations used in the S-box */ + +#define fwd_affine(x) \ + (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8))) + +static int init = 0; + +void gen_tabs(void) +{ aes_32t i, w; + +#if defined(FF_TABLES) + + aes_08t pow[512], log[256]; + + if(init) return; + /* log and power tables for GF(2^8) finite field with + WPOLY as modular polynomial - the simplest primitive + root is 0x03, used here to generate the tables + */ + + i = 0; w = 1; + do + { + pow[i] = (aes_08t)w; + pow[i + 255] = (aes_08t)w; + log[w] = (aes_08t)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } + while (w != 1); + +#else + if(init) return; +#endif + + for(i = 0, w = 1; i < RC_LENGTH; ++i) + { + t_set(r,c)[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) + { aes_08t b; + + b = fwd_affine(fi((aes_08t)i)); + w = bytes2word(f2(b), b, b, f3(b)); + +#if defined( SBX_SET ) + t_set(s,box)[i] = b; +#endif + +#if defined( FT1_SET ) /* tables for a normal encryption round */ + t_set(f,n)[i] = w; +#endif +#if defined( FT4_SET ) + t_set(f,n)[0][i] = w; + t_set(f,n)[1][i] = upr(w,1); + t_set(f,n)[2][i] = upr(w,2); + t_set(f,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); + +#if defined( FL1_SET ) /* tables for last encryption round (may also */ + t_set(f,l)[i] = w; /* be used in the key schedule) */ +#endif +#if defined( FL4_SET ) + t_set(f,l)[0][i] = w; + t_set(f,l)[1][i] = upr(w,1); + t_set(f,l)[2][i] = upr(w,2); + t_set(f,l)[3][i] = upr(w,3); +#endif + +#if defined( LS1_SET ) /* table for key schedule if t_set(f,l) above is */ + t_set(l,s)[i] = w; /* not of the required form */ +#endif +#if defined( LS4_SET ) + t_set(l,s)[0][i] = w; + t_set(l,s)[1][i] = upr(w,1); + t_set(l,s)[2][i] = upr(w,2); + t_set(l,s)[3][i] = upr(w,3); +#endif + + b = fi(inv_affine((aes_08t)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + +#if defined( IM1_SET ) /* tables for the inverse mix column operation */ + t_set(i,m)[b] = w; +#endif +#if defined( IM4_SET ) + t_set(i,m)[0][b] = w; + t_set(i,m)[1][b] = upr(w,1); + t_set(i,m)[2][b] = upr(w,2); + t_set(i,m)[3][b] = upr(w,3); +#endif + +#if defined( ISB_SET ) + t_set(i,box)[i] = b; +#endif +#if defined( IT1_SET ) /* tables for a normal decryption round */ + t_set(i,n)[i] = w; +#endif +#if defined( IT4_SET ) + t_set(i,n)[0][i] = w; + t_set(i,n)[1][i] = upr(w,1); + t_set(i,n)[2][i] = upr(w,2); + t_set(i,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); +#if defined( IL1_SET ) /* tables for last decryption round */ + t_set(i,l)[i] = w; +#endif +#if defined( IL4_SET ) + t_set(i,l)[0][i] = w; + t_set(i,l)[1][i] = upr(w,1); + t_set(i,l)[2][i] = upr(w,2); + t_set(i,l)[3][i] = upr(w,3); +#endif + } + init = 1; +} + +#endif + +#if defined(__cplusplus) +} +#endif + diff --git a/bsd/crypto/aes/aestab.h b/bsd/crypto/aes/aestab.h new file mode 100644 index 000000000..c610f9d43 --- /dev/null +++ b/bsd/crypto/aes/aestab.h @@ -0,0 +1,175 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + This file contains the code for declaring the tables needed to implement + AES. The file aesopt.h is assumed to be included before this header file. + If there are no global variables, the definitions here can be used to put + the AES tables in a structure so that a pointer can then be added to the + AES context to pass them to the AES routines that need them. If this + facility is used, the calling program has to ensure that this pointer is + managed appropriately. In particular, the value of the t_dec(in,it) item + in the table structure must be set to zero in order to ensure that the + tables are initialised. In practice the three code sequences in aeskey.c + that control the calls to gen_tabs() and the gen_tabs() routine itself will + have to be changed for a specific implementation. If global variables are + available it will generally be preferable to use them with the precomputed + FIXED_TABLES option that uses static global tables. + + The following defines can be used to control the way the tables + are defined, initialised and used in embedded environments that + require special features for these purposes + + the 't_dec' construction is used to declare fixed table arrays + the 't_set' construction is used to set fixed table values + the 't_use' construction is used to access fixed table values + + 256 byte tables: + + t_xxx(s,box) => forward S box + t_xxx(i,box) => inverse S box + + 256 32-bit word OR 4 x 256 32-bit word tables: + + t_xxx(f,n) => forward normal round + t_xxx(f,l) => forward last round + t_xxx(i,n) => inverse normal round + t_xxx(i,l) => inverse last round + t_xxx(l,s) => key schedule table + t_xxx(i,m) => key schedule table + + Other variables and tables: + + t_xxx(r,c) => the rcon table +*/ + +#if !defined( _AESTAB_H ) +#define _AESTAB_H + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#if defined(FIXED_TABLES) +#define Const const +#else +#define Const +#endif + +#if defined(DO_TABLES) +#define Extern +#else +#define Extern extern +#endif + +#if defined(_MSC_VER) && defined(TABLE_ALIGN) +#define Align __declspec(align(TABLE_ALIGN)) +#else +#define Align +#endif + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(DO_TABLES) && defined(FIXED_TABLES) +#define d_1(t,n,b,e) Align Const t n[256] = b(e) +#define d_4(t,n,b,e,f,g,h) Align Const t n[4][256] = { b(e), b(f), b(g), b(h) } +Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); +#else +#define d_1(t,n,b,e) Extern Align Const t n[256] +#define d_4(t,n,b,e,f,g,h) Extern Align Const t n[4][256] +Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH]; +#endif + +#if defined( SBX_SET ) + d_1(aes_08t, t_dec(s,box), sb_data, h0); +#endif +#if defined( ISB_SET ) + d_1(aes_08t, t_dec(i,box), isb_data, h0); +#endif + +#if defined( FT1_SET ) + d_1(aes_32t, t_dec(f,n), sb_data, u0); +#endif +#if defined( FT4_SET ) + d_4(aes_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); +#endif + +#if defined( FL1_SET ) + d_1(aes_32t, t_dec(f,l), sb_data, w0); +#endif +#if defined( FL4_SET ) + d_4(aes_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); +#endif + +#if defined( IT1_SET ) + d_1(aes_32t, t_dec(i,n), isb_data, v0); +#endif +#if defined( IT4_SET ) + d_4(aes_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); +#endif + +#if defined( IL1_SET ) + d_1(aes_32t, t_dec(i,l), isb_data, w0); +#endif +#if defined( IL4_SET ) + d_4(aes_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); +#endif + +#if defined( LS1_SET ) +#if defined( FL1_SET ) +#undef LS1_SET +#else + d_1(aes_32t, t_dec(l,s), sb_data, w0); +#endif +#endif + +#if defined( LS4_SET ) +#if defined( FL4_SET ) +#undef LS4_SET +#else + d_4(aes_32t, t_dec(l,s), sb_data, w0, w1, w2, w3); +#endif +#endif + +#if defined( IM1_SET ) + d_1(aes_32t, t_dec(i,m), mm_data, v0); +#endif +#if defined( IM4_SET ) + d_4(aes_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/bsd/crypto/blowfish/Makefile b/bsd/crypto/blowfish/Makefile index e4885864b..0521cc6fd 100644 --- a/bsd/crypto/blowfish/Makefile +++ b/bsd/crypto/blowfish/Makefile @@ -26,7 +26,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/blowfish/blowfish.h b/bsd/crypto/blowfish/blowfish.h index 69b902426..121e9c394 100644 --- a/bsd/crypto/blowfish/blowfish.h +++ b/bsd/crypto/blowfish/blowfish.h @@ -80,9 +80,9 @@ typedef struct bf_key_st { BF_LONG S[4*256]; } BF_KEY; -void BF_set_key __P((BF_KEY *, int, unsigned char *)); -void BF_encrypt __P((BF_LONG *, BF_KEY *)); -void BF_decrypt __P((BF_LONG *, BF_KEY *)); +void BF_set_key(BF_KEY *, int, unsigned char *); +void BF_encrypt(BF_LONG *, BF_KEY *); +void BF_decrypt(BF_LONG *, BF_KEY *); void BF_cbc_encrypt(const unsigned char *, unsigned char *, long, const BF_KEY *, unsigned char *, int); diff --git a/bsd/crypto/cast128/Makefile b/bsd/crypto/cast128/Makefile index d67b89e11..d214498bb 100644 --- a/bsd/crypto/cast128/Makefile +++ b/bsd/crypto/cast128/Makefile @@ -26,7 +26,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/cast128/cast128.h b/bsd/crypto/cast128/cast128.h index 2dc90d318..d79eea55c 100644 --- a/bsd/crypto/cast128/cast128.h +++ b/bsd/crypto/cast128/cast128.h @@ -46,14 +46,10 @@ #define CAST128_DECRYPT 0 -extern void set_cast128_subkey __P((u_int32_t *, u_int8_t *, int)); -extern void cast128_encrypt_round16 __P((u_int8_t *, const u_int8_t *, - u_int32_t *)); -extern void cast128_decrypt_round16 __P((u_int8_t *, const u_int8_t *, - u_int32_t *)); -extern void cast128_encrypt_round12 __P((u_int8_t *, const u_int8_t *, - u_int32_t *)); -extern void cast128_decrypt_round12 __P((u_int8_t *, const u_int8_t *, - u_int32_t *)); +extern void set_cast128_subkey(u_int32_t *, u_int8_t *, int); +extern void cast128_encrypt_round16(u_int8_t *, const u_int8_t *, u_int32_t *); +extern void cast128_decrypt_round16(u_int8_t *, const u_int8_t *, u_int32_t *); +extern void cast128_encrypt_round12(u_int8_t *, const u_int8_t *, u_int32_t *); +extern void cast128_decrypt_round12(u_int8_t *, const u_int8_t *, u_int32_t *); #endif diff --git a/bsd/crypto/des/Makefile b/bsd/crypto/des/Makefile index 58c13ed5f..08483b21a 100644 --- a/bsd/crypto/des/Makefile +++ b/bsd/crypto/des/Makefile @@ -26,7 +26,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/des/des.h b/bsd/crypto/des/des.h index a21b6bfa2..9f232b185 100644 --- a/bsd/crypto/des/des.h +++ b/bsd/crypto/des/des.h @@ -81,35 +81,34 @@ typedef struct des_ks_struct extern int des_check_key; /* defaults to false */ -char *des_options __P((void)); -void des_ecb_encrypt __P((des_cblock *, des_cblock *, - des_key_schedule, int)); +char *des_options(void); +void des_ecb_encrypt(des_cblock *, des_cblock *, des_key_schedule, int); -void des_encrypt1 __P((DES_LONG *, des_key_schedule, int)); -void des_encrypt2 __P((DES_LONG *, des_key_schedule, int)); -void des_encrypt3 __P((DES_LONG *, des_key_schedule, des_key_schedule, - des_key_schedule)); -void des_decrypt3 __P((DES_LONG *, des_key_schedule, des_key_schedule, - des_key_schedule)); +void des_encrypt1(DES_LONG *, des_key_schedule, int); +void des_encrypt2(DES_LONG *, des_key_schedule, int); +void des_encrypt3(DES_LONG *, des_key_schedule, des_key_schedule, + des_key_schedule); +void des_decrypt3(DES_LONG *, des_key_schedule, des_key_schedule, + des_key_schedule); -void des_ecb3_encrypt __P((des_cblock *, des_cblock *, des_key_schedule, - des_key_schedule, des_key_schedule, int)); +void des_ecb3_encrypt(des_cblock *, des_cblock *, des_key_schedule, + des_key_schedule, des_key_schedule, int); -void des_ncbc_encrypt __P((const unsigned char *, unsigned char *, long, - des_key_schedule, des_cblock *, int)); +void des_ncbc_encrypt(const unsigned char *, unsigned char *, long, + des_key_schedule, des_cblock *, int); void des_ede3_cbc_encrypt(const unsigned char *, unsigned char *, long, des_key_schedule, des_key_schedule, des_key_schedule, des_cblock *, int); -void des_set_odd_parity __P((des_cblock *)); -void des_fixup_key_parity __P((des_cblock *)); -int des_is_weak_key __P((des_cblock *)); -int des_set_key __P((des_cblock *, des_key_schedule)); -int des_key_sched __P((des_cblock *, des_key_schedule)); -int des_set_key_checked __P((des_cblock *, des_key_schedule)); -void des_set_key_unchecked __P((des_cblock *, des_key_schedule)); -int des_check_key_parity __P((des_cblock *)); +void des_set_odd_parity(des_cblock *); +void des_fixup_key_parity(des_cblock *); +int des_is_weak_key(des_cblock *); +int des_set_key(des_cblock *, des_key_schedule); +int des_key_sched(des_cblock *, des_key_schedule); +int des_set_key_checked(des_cblock *, des_key_schedule); +void des_set_key_unchecked(des_cblock *, des_key_schedule); +int des_check_key_parity(des_cblock *); #ifdef __cplusplus } diff --git a/bsd/crypto/md5.c b/bsd/crypto/md5.c index 6fd600bd9..734232dac 100644 --- a/bsd/crypto/md5.c +++ b/bsd/crypto/md5.c @@ -127,7 +127,7 @@ static const u_int8_t md5_paddat[MD5_BUFLEN] = { 0, 0, 0, 0, 0, 0, 0, 0, }; -static void md5_calc __P((u_int8_t *, md5_ctxt *)); +static void md5_calc(u_int8_t *, md5_ctxt *); void md5_init(ctxt) md5_ctxt *ctxt; diff --git a/bsd/crypto/md5.h b/bsd/crypto/md5.h index 3d02afe6b..8a99300b8 100644 --- a/bsd/crypto/md5.h +++ b/bsd/crypto/md5.h @@ -58,10 +58,10 @@ typedef struct { u_int8_t md5_buf[MD5_BUFLEN]; } md5_ctxt; -extern void md5_init __P((md5_ctxt *)); -extern void md5_loop __P((md5_ctxt *, u_int8_t *, u_int)); -extern void md5_pad __P((md5_ctxt *)); -extern void md5_result __P((u_int8_t *, md5_ctxt *)); +extern void md5_init(md5_ctxt *); +extern void md5_loop(md5_ctxt *, u_int8_t *, u_int); +extern void md5_pad(md5_ctxt *); +extern void md5_result(u_int8_t *, md5_ctxt *); /* compatibility */ #define MD5_CTX md5_ctxt diff --git a/bsd/crypto/rc4/Makefile b/bsd/crypto/rc4/Makefile index 09d432842..23432a57e 100644 --- a/bsd/crypto/rc4/Makefile +++ b/bsd/crypto/rc4/Makefile @@ -26,7 +26,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/rijndael/boxes-fst.dat b/bsd/crypto/rijndael/boxes-fst.dat deleted file mode 100644 index 8b9e26c33..000000000 --- a/bsd/crypto/rijndael/boxes-fst.dat +++ /dev/null @@ -1,958 +0,0 @@ -/* $FreeBSD: src/sys/crypto/rijndael/boxes-fst.dat,v 1.2.2.1 2001/07/03 11:01:35 ume Exp $ */ -/* $KAME: boxes-fst.dat,v 1.6 2001/05/27 00:23:22 itojun Exp $ */ - -const word8 S[256] = { - 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, -202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, -183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, - 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, - 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, - 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, -208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, - 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, -205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, - 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, -224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, -231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, -186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, -112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, -225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, -140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 -}; - -#ifdef INTERMEDIATE_VALUE_KAT -static const word8 Si[256] = { - 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, -124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, - 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, - 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, -114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, -108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, -144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, -208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, - 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, -150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, - 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, -252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, - 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, - 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, -160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, - 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125 -}; -#endif /* INTERMEDIATE_VALUE_KAT */ - -union xtab { - word32 xt32[256]; - word8 xt8[256][4]; -}; - -static const union xtab xT1 = { - .xt8 = { -{0xc6,0x63,0x63,0xa5}, {0xf8,0x7c,0x7c,0x84}, {0xee,0x77,0x77,0x99}, {0xf6,0x7b,0x7b,0x8d}, -{0xff,0xf2,0xf2,0x0d}, {0xd6,0x6b,0x6b,0xbd}, {0xde,0x6f,0x6f,0xb1}, {0x91,0xc5,0xc5,0x54}, -{0x60,0x30,0x30,0x50}, {0x02,0x01,0x01,0x03}, {0xce,0x67,0x67,0xa9}, {0x56,0x2b,0x2b,0x7d}, -{0xe7,0xfe,0xfe,0x19}, {0xb5,0xd7,0xd7,0x62}, {0x4d,0xab,0xab,0xe6}, {0xec,0x76,0x76,0x9a}, -{0x8f,0xca,0xca,0x45}, {0x1f,0x82,0x82,0x9d}, {0x89,0xc9,0xc9,0x40}, {0xfa,0x7d,0x7d,0x87}, -{0xef,0xfa,0xfa,0x15}, {0xb2,0x59,0x59,0xeb}, {0x8e,0x47,0x47,0xc9}, {0xfb,0xf0,0xf0,0x0b}, -{0x41,0xad,0xad,0xec}, {0xb3,0xd4,0xd4,0x67}, {0x5f,0xa2,0xa2,0xfd}, {0x45,0xaf,0xaf,0xea}, -{0x23,0x9c,0x9c,0xbf}, {0x53,0xa4,0xa4,0xf7}, {0xe4,0x72,0x72,0x96}, {0x9b,0xc0,0xc0,0x5b}, -{0x75,0xb7,0xb7,0xc2}, {0xe1,0xfd,0xfd,0x1c}, {0x3d,0x93,0x93,0xae}, {0x4c,0x26,0x26,0x6a}, -{0x6c,0x36,0x36,0x5a}, {0x7e,0x3f,0x3f,0x41}, {0xf5,0xf7,0xf7,0x02}, {0x83,0xcc,0xcc,0x4f}, -{0x68,0x34,0x34,0x5c}, {0x51,0xa5,0xa5,0xf4}, {0xd1,0xe5,0xe5,0x34}, {0xf9,0xf1,0xf1,0x08}, -{0xe2,0x71,0x71,0x93}, {0xab,0xd8,0xd8,0x73}, {0x62,0x31,0x31,0x53}, {0x2a,0x15,0x15,0x3f}, -{0x08,0x04,0x04,0x0c}, {0x95,0xc7,0xc7,0x52}, {0x46,0x23,0x23,0x65}, {0x9d,0xc3,0xc3,0x5e}, -{0x30,0x18,0x18,0x28}, {0x37,0x96,0x96,0xa1}, {0x0a,0x05,0x05,0x0f}, {0x2f,0x9a,0x9a,0xb5}, -{0x0e,0x07,0x07,0x09}, {0x24,0x12,0x12,0x36}, {0x1b,0x80,0x80,0x9b}, {0xdf,0xe2,0xe2,0x3d}, -{0xcd,0xeb,0xeb,0x26}, {0x4e,0x27,0x27,0x69}, {0x7f,0xb2,0xb2,0xcd}, {0xea,0x75,0x75,0x9f}, -{0x12,0x09,0x09,0x1b}, {0x1d,0x83,0x83,0x9e}, {0x58,0x2c,0x2c,0x74}, {0x34,0x1a,0x1a,0x2e}, -{0x36,0x1b,0x1b,0x2d}, {0xdc,0x6e,0x6e,0xb2}, {0xb4,0x5a,0x5a,0xee}, {0x5b,0xa0,0xa0,0xfb}, -{0xa4,0x52,0x52,0xf6}, {0x76,0x3b,0x3b,0x4d}, {0xb7,0xd6,0xd6,0x61}, {0x7d,0xb3,0xb3,0xce}, -{0x52,0x29,0x29,0x7b}, {0xdd,0xe3,0xe3,0x3e}, {0x5e,0x2f,0x2f,0x71}, {0x13,0x84,0x84,0x97}, -{0xa6,0x53,0x53,0xf5}, {0xb9,0xd1,0xd1,0x68}, {0x00,0x00,0x00,0x00}, {0xc1,0xed,0xed,0x2c}, -{0x40,0x20,0x20,0x60}, {0xe3,0xfc,0xfc,0x1f}, {0x79,0xb1,0xb1,0xc8}, {0xb6,0x5b,0x5b,0xed}, -{0xd4,0x6a,0x6a,0xbe}, {0x8d,0xcb,0xcb,0x46}, {0x67,0xbe,0xbe,0xd9}, {0x72,0x39,0x39,0x4b}, -{0x94,0x4a,0x4a,0xde}, {0x98,0x4c,0x4c,0xd4}, {0xb0,0x58,0x58,0xe8}, {0x85,0xcf,0xcf,0x4a}, -{0xbb,0xd0,0xd0,0x6b}, {0xc5,0xef,0xef,0x2a}, {0x4f,0xaa,0xaa,0xe5}, {0xed,0xfb,0xfb,0x16}, -{0x86,0x43,0x43,0xc5}, {0x9a,0x4d,0x4d,0xd7}, {0x66,0x33,0x33,0x55}, {0x11,0x85,0x85,0x94}, -{0x8a,0x45,0x45,0xcf}, {0xe9,0xf9,0xf9,0x10}, {0x04,0x02,0x02,0x06}, {0xfe,0x7f,0x7f,0x81}, -{0xa0,0x50,0x50,0xf0}, {0x78,0x3c,0x3c,0x44}, {0x25,0x9f,0x9f,0xba}, {0x4b,0xa8,0xa8,0xe3}, -{0xa2,0x51,0x51,0xf3}, {0x5d,0xa3,0xa3,0xfe}, {0x80,0x40,0x40,0xc0}, {0x05,0x8f,0x8f,0x8a}, -{0x3f,0x92,0x92,0xad}, {0x21,0x9d,0x9d,0xbc}, {0x70,0x38,0x38,0x48}, {0xf1,0xf5,0xf5,0x04}, -{0x63,0xbc,0xbc,0xdf}, {0x77,0xb6,0xb6,0xc1}, {0xaf,0xda,0xda,0x75}, {0x42,0x21,0x21,0x63}, -{0x20,0x10,0x10,0x30}, {0xe5,0xff,0xff,0x1a}, {0xfd,0xf3,0xf3,0x0e}, {0xbf,0xd2,0xd2,0x6d}, -{0x81,0xcd,0xcd,0x4c}, {0x18,0x0c,0x0c,0x14}, {0x26,0x13,0x13,0x35}, {0xc3,0xec,0xec,0x2f}, -{0xbe,0x5f,0x5f,0xe1}, {0x35,0x97,0x97,0xa2}, {0x88,0x44,0x44,0xcc}, {0x2e,0x17,0x17,0x39}, -{0x93,0xc4,0xc4,0x57}, {0x55,0xa7,0xa7,0xf2}, {0xfc,0x7e,0x7e,0x82}, {0x7a,0x3d,0x3d,0x47}, -{0xc8,0x64,0x64,0xac}, {0xba,0x5d,0x5d,0xe7}, {0x32,0x19,0x19,0x2b}, {0xe6,0x73,0x73,0x95}, -{0xc0,0x60,0x60,0xa0}, {0x19,0x81,0x81,0x98}, {0x9e,0x4f,0x4f,0xd1}, {0xa3,0xdc,0xdc,0x7f}, -{0x44,0x22,0x22,0x66}, {0x54,0x2a,0x2a,0x7e}, {0x3b,0x90,0x90,0xab}, {0x0b,0x88,0x88,0x83}, -{0x8c,0x46,0x46,0xca}, {0xc7,0xee,0xee,0x29}, {0x6b,0xb8,0xb8,0xd3}, {0x28,0x14,0x14,0x3c}, -{0xa7,0xde,0xde,0x79}, {0xbc,0x5e,0x5e,0xe2}, {0x16,0x0b,0x0b,0x1d}, {0xad,0xdb,0xdb,0x76}, -{0xdb,0xe0,0xe0,0x3b}, {0x64,0x32,0x32,0x56}, {0x74,0x3a,0x3a,0x4e}, {0x14,0x0a,0x0a,0x1e}, -{0x92,0x49,0x49,0xdb}, {0x0c,0x06,0x06,0x0a}, {0x48,0x24,0x24,0x6c}, {0xb8,0x5c,0x5c,0xe4}, -{0x9f,0xc2,0xc2,0x5d}, {0xbd,0xd3,0xd3,0x6e}, {0x43,0xac,0xac,0xef}, {0xc4,0x62,0x62,0xa6}, -{0x39,0x91,0x91,0xa8}, {0x31,0x95,0x95,0xa4}, {0xd3,0xe4,0xe4,0x37}, {0xf2,0x79,0x79,0x8b}, -{0xd5,0xe7,0xe7,0x32}, {0x8b,0xc8,0xc8,0x43}, {0x6e,0x37,0x37,0x59}, {0xda,0x6d,0x6d,0xb7}, -{0x01,0x8d,0x8d,0x8c}, {0xb1,0xd5,0xd5,0x64}, {0x9c,0x4e,0x4e,0xd2}, {0x49,0xa9,0xa9,0xe0}, -{0xd8,0x6c,0x6c,0xb4}, {0xac,0x56,0x56,0xfa}, {0xf3,0xf4,0xf4,0x07}, {0xcf,0xea,0xea,0x25}, -{0xca,0x65,0x65,0xaf}, {0xf4,0x7a,0x7a,0x8e}, {0x47,0xae,0xae,0xe9}, {0x10,0x08,0x08,0x18}, -{0x6f,0xba,0xba,0xd5}, {0xf0,0x78,0x78,0x88}, {0x4a,0x25,0x25,0x6f}, {0x5c,0x2e,0x2e,0x72}, -{0x38,0x1c,0x1c,0x24}, {0x57,0xa6,0xa6,0xf1}, {0x73,0xb4,0xb4,0xc7}, {0x97,0xc6,0xc6,0x51}, -{0xcb,0xe8,0xe8,0x23}, {0xa1,0xdd,0xdd,0x7c}, {0xe8,0x74,0x74,0x9c}, {0x3e,0x1f,0x1f,0x21}, -{0x96,0x4b,0x4b,0xdd}, {0x61,0xbd,0xbd,0xdc}, {0x0d,0x8b,0x8b,0x86}, {0x0f,0x8a,0x8a,0x85}, -{0xe0,0x70,0x70,0x90}, {0x7c,0x3e,0x3e,0x42}, {0x71,0xb5,0xb5,0xc4}, {0xcc,0x66,0x66,0xaa}, -{0x90,0x48,0x48,0xd8}, {0x06,0x03,0x03,0x05}, {0xf7,0xf6,0xf6,0x01}, {0x1c,0x0e,0x0e,0x12}, -{0xc2,0x61,0x61,0xa3}, {0x6a,0x35,0x35,0x5f}, {0xae,0x57,0x57,0xf9}, {0x69,0xb9,0xb9,0xd0}, -{0x17,0x86,0x86,0x91}, {0x99,0xc1,0xc1,0x58}, {0x3a,0x1d,0x1d,0x27}, {0x27,0x9e,0x9e,0xb9}, -{0xd9,0xe1,0xe1,0x38}, {0xeb,0xf8,0xf8,0x13}, {0x2b,0x98,0x98,0xb3}, {0x22,0x11,0x11,0x33}, -{0xd2,0x69,0x69,0xbb}, {0xa9,0xd9,0xd9,0x70}, {0x07,0x8e,0x8e,0x89}, {0x33,0x94,0x94,0xa7}, -{0x2d,0x9b,0x9b,0xb6}, {0x3c,0x1e,0x1e,0x22}, {0x15,0x87,0x87,0x92}, {0xc9,0xe9,0xe9,0x20}, -{0x87,0xce,0xce,0x49}, {0xaa,0x55,0x55,0xff}, {0x50,0x28,0x28,0x78}, {0xa5,0xdf,0xdf,0x7a}, -{0x03,0x8c,0x8c,0x8f}, {0x59,0xa1,0xa1,0xf8}, {0x09,0x89,0x89,0x80}, {0x1a,0x0d,0x0d,0x17}, -{0x65,0xbf,0xbf,0xda}, {0xd7,0xe6,0xe6,0x31}, {0x84,0x42,0x42,0xc6}, {0xd0,0x68,0x68,0xb8}, -{0x82,0x41,0x41,0xc3}, {0x29,0x99,0x99,0xb0}, {0x5a,0x2d,0x2d,0x77}, {0x1e,0x0f,0x0f,0x11}, -{0x7b,0xb0,0xb0,0xcb}, {0xa8,0x54,0x54,0xfc}, {0x6d,0xbb,0xbb,0xd6}, {0x2c,0x16,0x16,0x3a} - } -}; -#define T1 xT1.xt8 - -static const union xtab xT2 = { - .xt8 = { -{0xa5,0xc6,0x63,0x63}, {0x84,0xf8,0x7c,0x7c}, {0x99,0xee,0x77,0x77}, {0x8d,0xf6,0x7b,0x7b}, -{0x0d,0xff,0xf2,0xf2}, {0xbd,0xd6,0x6b,0x6b}, {0xb1,0xde,0x6f,0x6f}, {0x54,0x91,0xc5,0xc5}, -{0x50,0x60,0x30,0x30}, {0x03,0x02,0x01,0x01}, {0xa9,0xce,0x67,0x67}, {0x7d,0x56,0x2b,0x2b}, -{0x19,0xe7,0xfe,0xfe}, {0x62,0xb5,0xd7,0xd7}, {0xe6,0x4d,0xab,0xab}, {0x9a,0xec,0x76,0x76}, -{0x45,0x8f,0xca,0xca}, {0x9d,0x1f,0x82,0x82}, {0x40,0x89,0xc9,0xc9}, {0x87,0xfa,0x7d,0x7d}, -{0x15,0xef,0xfa,0xfa}, {0xeb,0xb2,0x59,0x59}, {0xc9,0x8e,0x47,0x47}, {0x0b,0xfb,0xf0,0xf0}, -{0xec,0x41,0xad,0xad}, {0x67,0xb3,0xd4,0xd4}, {0xfd,0x5f,0xa2,0xa2}, {0xea,0x45,0xaf,0xaf}, -{0xbf,0x23,0x9c,0x9c}, {0xf7,0x53,0xa4,0xa4}, {0x96,0xe4,0x72,0x72}, {0x5b,0x9b,0xc0,0xc0}, -{0xc2,0x75,0xb7,0xb7}, {0x1c,0xe1,0xfd,0xfd}, {0xae,0x3d,0x93,0x93}, {0x6a,0x4c,0x26,0x26}, -{0x5a,0x6c,0x36,0x36}, {0x41,0x7e,0x3f,0x3f}, {0x02,0xf5,0xf7,0xf7}, {0x4f,0x83,0xcc,0xcc}, -{0x5c,0x68,0x34,0x34}, {0xf4,0x51,0xa5,0xa5}, {0x34,0xd1,0xe5,0xe5}, {0x08,0xf9,0xf1,0xf1}, -{0x93,0xe2,0x71,0x71}, {0x73,0xab,0xd8,0xd8}, {0x53,0x62,0x31,0x31}, {0x3f,0x2a,0x15,0x15}, -{0x0c,0x08,0x04,0x04}, {0x52,0x95,0xc7,0xc7}, {0x65,0x46,0x23,0x23}, {0x5e,0x9d,0xc3,0xc3}, -{0x28,0x30,0x18,0x18}, {0xa1,0x37,0x96,0x96}, {0x0f,0x0a,0x05,0x05}, {0xb5,0x2f,0x9a,0x9a}, -{0x09,0x0e,0x07,0x07}, {0x36,0x24,0x12,0x12}, {0x9b,0x1b,0x80,0x80}, {0x3d,0xdf,0xe2,0xe2}, -{0x26,0xcd,0xeb,0xeb}, {0x69,0x4e,0x27,0x27}, {0xcd,0x7f,0xb2,0xb2}, {0x9f,0xea,0x75,0x75}, -{0x1b,0x12,0x09,0x09}, {0x9e,0x1d,0x83,0x83}, {0x74,0x58,0x2c,0x2c}, {0x2e,0x34,0x1a,0x1a}, -{0x2d,0x36,0x1b,0x1b}, {0xb2,0xdc,0x6e,0x6e}, {0xee,0xb4,0x5a,0x5a}, {0xfb,0x5b,0xa0,0xa0}, -{0xf6,0xa4,0x52,0x52}, {0x4d,0x76,0x3b,0x3b}, {0x61,0xb7,0xd6,0xd6}, {0xce,0x7d,0xb3,0xb3}, -{0x7b,0x52,0x29,0x29}, {0x3e,0xdd,0xe3,0xe3}, {0x71,0x5e,0x2f,0x2f}, {0x97,0x13,0x84,0x84}, -{0xf5,0xa6,0x53,0x53}, {0x68,0xb9,0xd1,0xd1}, {0x00,0x00,0x00,0x00}, {0x2c,0xc1,0xed,0xed}, -{0x60,0x40,0x20,0x20}, {0x1f,0xe3,0xfc,0xfc}, {0xc8,0x79,0xb1,0xb1}, {0xed,0xb6,0x5b,0x5b}, -{0xbe,0xd4,0x6a,0x6a}, {0x46,0x8d,0xcb,0xcb}, {0xd9,0x67,0xbe,0xbe}, {0x4b,0x72,0x39,0x39}, -{0xde,0x94,0x4a,0x4a}, {0xd4,0x98,0x4c,0x4c}, {0xe8,0xb0,0x58,0x58}, {0x4a,0x85,0xcf,0xcf}, -{0x6b,0xbb,0xd0,0xd0}, {0x2a,0xc5,0xef,0xef}, {0xe5,0x4f,0xaa,0xaa}, {0x16,0xed,0xfb,0xfb}, -{0xc5,0x86,0x43,0x43}, {0xd7,0x9a,0x4d,0x4d}, {0x55,0x66,0x33,0x33}, {0x94,0x11,0x85,0x85}, -{0xcf,0x8a,0x45,0x45}, {0x10,0xe9,0xf9,0xf9}, {0x06,0x04,0x02,0x02}, {0x81,0xfe,0x7f,0x7f}, -{0xf0,0xa0,0x50,0x50}, {0x44,0x78,0x3c,0x3c}, {0xba,0x25,0x9f,0x9f}, {0xe3,0x4b,0xa8,0xa8}, -{0xf3,0xa2,0x51,0x51}, {0xfe,0x5d,0xa3,0xa3}, {0xc0,0x80,0x40,0x40}, {0x8a,0x05,0x8f,0x8f}, -{0xad,0x3f,0x92,0x92}, {0xbc,0x21,0x9d,0x9d}, {0x48,0x70,0x38,0x38}, {0x04,0xf1,0xf5,0xf5}, -{0xdf,0x63,0xbc,0xbc}, {0xc1,0x77,0xb6,0xb6}, {0x75,0xaf,0xda,0xda}, {0x63,0x42,0x21,0x21}, -{0x30,0x20,0x10,0x10}, {0x1a,0xe5,0xff,0xff}, {0x0e,0xfd,0xf3,0xf3}, {0x6d,0xbf,0xd2,0xd2}, -{0x4c,0x81,0xcd,0xcd}, {0x14,0x18,0x0c,0x0c}, {0x35,0x26,0x13,0x13}, {0x2f,0xc3,0xec,0xec}, -{0xe1,0xbe,0x5f,0x5f}, {0xa2,0x35,0x97,0x97}, {0xcc,0x88,0x44,0x44}, {0x39,0x2e,0x17,0x17}, -{0x57,0x93,0xc4,0xc4}, {0xf2,0x55,0xa7,0xa7}, {0x82,0xfc,0x7e,0x7e}, {0x47,0x7a,0x3d,0x3d}, -{0xac,0xc8,0x64,0x64}, {0xe7,0xba,0x5d,0x5d}, {0x2b,0x32,0x19,0x19}, {0x95,0xe6,0x73,0x73}, -{0xa0,0xc0,0x60,0x60}, {0x98,0x19,0x81,0x81}, {0xd1,0x9e,0x4f,0x4f}, {0x7f,0xa3,0xdc,0xdc}, -{0x66,0x44,0x22,0x22}, {0x7e,0x54,0x2a,0x2a}, {0xab,0x3b,0x90,0x90}, {0x83,0x0b,0x88,0x88}, -{0xca,0x8c,0x46,0x46}, {0x29,0xc7,0xee,0xee}, {0xd3,0x6b,0xb8,0xb8}, {0x3c,0x28,0x14,0x14}, -{0x79,0xa7,0xde,0xde}, {0xe2,0xbc,0x5e,0x5e}, {0x1d,0x16,0x0b,0x0b}, {0x76,0xad,0xdb,0xdb}, -{0x3b,0xdb,0xe0,0xe0}, {0x56,0x64,0x32,0x32}, {0x4e,0x74,0x3a,0x3a}, {0x1e,0x14,0x0a,0x0a}, -{0xdb,0x92,0x49,0x49}, {0x0a,0x0c,0x06,0x06}, {0x6c,0x48,0x24,0x24}, {0xe4,0xb8,0x5c,0x5c}, -{0x5d,0x9f,0xc2,0xc2}, {0x6e,0xbd,0xd3,0xd3}, {0xef,0x43,0xac,0xac}, {0xa6,0xc4,0x62,0x62}, -{0xa8,0x39,0x91,0x91}, {0xa4,0x31,0x95,0x95}, {0x37,0xd3,0xe4,0xe4}, {0x8b,0xf2,0x79,0x79}, -{0x32,0xd5,0xe7,0xe7}, {0x43,0x8b,0xc8,0xc8}, {0x59,0x6e,0x37,0x37}, {0xb7,0xda,0x6d,0x6d}, -{0x8c,0x01,0x8d,0x8d}, {0x64,0xb1,0xd5,0xd5}, {0xd2,0x9c,0x4e,0x4e}, {0xe0,0x49,0xa9,0xa9}, -{0xb4,0xd8,0x6c,0x6c}, {0xfa,0xac,0x56,0x56}, {0x07,0xf3,0xf4,0xf4}, {0x25,0xcf,0xea,0xea}, -{0xaf,0xca,0x65,0x65}, {0x8e,0xf4,0x7a,0x7a}, {0xe9,0x47,0xae,0xae}, {0x18,0x10,0x08,0x08}, -{0xd5,0x6f,0xba,0xba}, {0x88,0xf0,0x78,0x78}, {0x6f,0x4a,0x25,0x25}, {0x72,0x5c,0x2e,0x2e}, -{0x24,0x38,0x1c,0x1c}, {0xf1,0x57,0xa6,0xa6}, {0xc7,0x73,0xb4,0xb4}, {0x51,0x97,0xc6,0xc6}, -{0x23,0xcb,0xe8,0xe8}, {0x7c,0xa1,0xdd,0xdd}, {0x9c,0xe8,0x74,0x74}, {0x21,0x3e,0x1f,0x1f}, -{0xdd,0x96,0x4b,0x4b}, {0xdc,0x61,0xbd,0xbd}, {0x86,0x0d,0x8b,0x8b}, {0x85,0x0f,0x8a,0x8a}, -{0x90,0xe0,0x70,0x70}, {0x42,0x7c,0x3e,0x3e}, {0xc4,0x71,0xb5,0xb5}, {0xaa,0xcc,0x66,0x66}, -{0xd8,0x90,0x48,0x48}, {0x05,0x06,0x03,0x03}, {0x01,0xf7,0xf6,0xf6}, {0x12,0x1c,0x0e,0x0e}, -{0xa3,0xc2,0x61,0x61}, {0x5f,0x6a,0x35,0x35}, {0xf9,0xae,0x57,0x57}, {0xd0,0x69,0xb9,0xb9}, -{0x91,0x17,0x86,0x86}, {0x58,0x99,0xc1,0xc1}, {0x27,0x3a,0x1d,0x1d}, {0xb9,0x27,0x9e,0x9e}, -{0x38,0xd9,0xe1,0xe1}, {0x13,0xeb,0xf8,0xf8}, {0xb3,0x2b,0x98,0x98}, {0x33,0x22,0x11,0x11}, -{0xbb,0xd2,0x69,0x69}, {0x70,0xa9,0xd9,0xd9}, {0x89,0x07,0x8e,0x8e}, {0xa7,0x33,0x94,0x94}, -{0xb6,0x2d,0x9b,0x9b}, {0x22,0x3c,0x1e,0x1e}, {0x92,0x15,0x87,0x87}, {0x20,0xc9,0xe9,0xe9}, -{0x49,0x87,0xce,0xce}, {0xff,0xaa,0x55,0x55}, {0x78,0x50,0x28,0x28}, {0x7a,0xa5,0xdf,0xdf}, -{0x8f,0x03,0x8c,0x8c}, {0xf8,0x59,0xa1,0xa1}, {0x80,0x09,0x89,0x89}, {0x17,0x1a,0x0d,0x0d}, -{0xda,0x65,0xbf,0xbf}, {0x31,0xd7,0xe6,0xe6}, {0xc6,0x84,0x42,0x42}, {0xb8,0xd0,0x68,0x68}, -{0xc3,0x82,0x41,0x41}, {0xb0,0x29,0x99,0x99}, {0x77,0x5a,0x2d,0x2d}, {0x11,0x1e,0x0f,0x0f}, -{0xcb,0x7b,0xb0,0xb0}, {0xfc,0xa8,0x54,0x54}, {0xd6,0x6d,0xbb,0xbb}, {0x3a,0x2c,0x16,0x16} - } -}; -#define T2 xT2.xt8 - -static const union xtab xT3 = { - .xt8 = { -{0x63,0xa5,0xc6,0x63}, {0x7c,0x84,0xf8,0x7c}, {0x77,0x99,0xee,0x77}, {0x7b,0x8d,0xf6,0x7b}, -{0xf2,0x0d,0xff,0xf2}, {0x6b,0xbd,0xd6,0x6b}, {0x6f,0xb1,0xde,0x6f}, {0xc5,0x54,0x91,0xc5}, -{0x30,0x50,0x60,0x30}, {0x01,0x03,0x02,0x01}, {0x67,0xa9,0xce,0x67}, {0x2b,0x7d,0x56,0x2b}, -{0xfe,0x19,0xe7,0xfe}, {0xd7,0x62,0xb5,0xd7}, {0xab,0xe6,0x4d,0xab}, {0x76,0x9a,0xec,0x76}, -{0xca,0x45,0x8f,0xca}, {0x82,0x9d,0x1f,0x82}, {0xc9,0x40,0x89,0xc9}, {0x7d,0x87,0xfa,0x7d}, -{0xfa,0x15,0xef,0xfa}, {0x59,0xeb,0xb2,0x59}, {0x47,0xc9,0x8e,0x47}, {0xf0,0x0b,0xfb,0xf0}, -{0xad,0xec,0x41,0xad}, {0xd4,0x67,0xb3,0xd4}, {0xa2,0xfd,0x5f,0xa2}, {0xaf,0xea,0x45,0xaf}, -{0x9c,0xbf,0x23,0x9c}, {0xa4,0xf7,0x53,0xa4}, {0x72,0x96,0xe4,0x72}, {0xc0,0x5b,0x9b,0xc0}, -{0xb7,0xc2,0x75,0xb7}, {0xfd,0x1c,0xe1,0xfd}, {0x93,0xae,0x3d,0x93}, {0x26,0x6a,0x4c,0x26}, -{0x36,0x5a,0x6c,0x36}, {0x3f,0x41,0x7e,0x3f}, {0xf7,0x02,0xf5,0xf7}, {0xcc,0x4f,0x83,0xcc}, -{0x34,0x5c,0x68,0x34}, {0xa5,0xf4,0x51,0xa5}, {0xe5,0x34,0xd1,0xe5}, {0xf1,0x08,0xf9,0xf1}, -{0x71,0x93,0xe2,0x71}, {0xd8,0x73,0xab,0xd8}, {0x31,0x53,0x62,0x31}, {0x15,0x3f,0x2a,0x15}, -{0x04,0x0c,0x08,0x04}, {0xc7,0x52,0x95,0xc7}, {0x23,0x65,0x46,0x23}, {0xc3,0x5e,0x9d,0xc3}, -{0x18,0x28,0x30,0x18}, {0x96,0xa1,0x37,0x96}, {0x05,0x0f,0x0a,0x05}, {0x9a,0xb5,0x2f,0x9a}, -{0x07,0x09,0x0e,0x07}, {0x12,0x36,0x24,0x12}, {0x80,0x9b,0x1b,0x80}, {0xe2,0x3d,0xdf,0xe2}, -{0xeb,0x26,0xcd,0xeb}, {0x27,0x69,0x4e,0x27}, {0xb2,0xcd,0x7f,0xb2}, {0x75,0x9f,0xea,0x75}, -{0x09,0x1b,0x12,0x09}, {0x83,0x9e,0x1d,0x83}, {0x2c,0x74,0x58,0x2c}, {0x1a,0x2e,0x34,0x1a}, -{0x1b,0x2d,0x36,0x1b}, {0x6e,0xb2,0xdc,0x6e}, {0x5a,0xee,0xb4,0x5a}, {0xa0,0xfb,0x5b,0xa0}, -{0x52,0xf6,0xa4,0x52}, {0x3b,0x4d,0x76,0x3b}, {0xd6,0x61,0xb7,0xd6}, {0xb3,0xce,0x7d,0xb3}, -{0x29,0x7b,0x52,0x29}, {0xe3,0x3e,0xdd,0xe3}, {0x2f,0x71,0x5e,0x2f}, {0x84,0x97,0x13,0x84}, -{0x53,0xf5,0xa6,0x53}, {0xd1,0x68,0xb9,0xd1}, {0x00,0x00,0x00,0x00}, {0xed,0x2c,0xc1,0xed}, -{0x20,0x60,0x40,0x20}, {0xfc,0x1f,0xe3,0xfc}, {0xb1,0xc8,0x79,0xb1}, {0x5b,0xed,0xb6,0x5b}, -{0x6a,0xbe,0xd4,0x6a}, {0xcb,0x46,0x8d,0xcb}, {0xbe,0xd9,0x67,0xbe}, {0x39,0x4b,0x72,0x39}, -{0x4a,0xde,0x94,0x4a}, {0x4c,0xd4,0x98,0x4c}, {0x58,0xe8,0xb0,0x58}, {0xcf,0x4a,0x85,0xcf}, -{0xd0,0x6b,0xbb,0xd0}, {0xef,0x2a,0xc5,0xef}, {0xaa,0xe5,0x4f,0xaa}, {0xfb,0x16,0xed,0xfb}, -{0x43,0xc5,0x86,0x43}, {0x4d,0xd7,0x9a,0x4d}, {0x33,0x55,0x66,0x33}, {0x85,0x94,0x11,0x85}, -{0x45,0xcf,0x8a,0x45}, {0xf9,0x10,0xe9,0xf9}, {0x02,0x06,0x04,0x02}, {0x7f,0x81,0xfe,0x7f}, -{0x50,0xf0,0xa0,0x50}, {0x3c,0x44,0x78,0x3c}, {0x9f,0xba,0x25,0x9f}, {0xa8,0xe3,0x4b,0xa8}, -{0x51,0xf3,0xa2,0x51}, {0xa3,0xfe,0x5d,0xa3}, {0x40,0xc0,0x80,0x40}, {0x8f,0x8a,0x05,0x8f}, -{0x92,0xad,0x3f,0x92}, {0x9d,0xbc,0x21,0x9d}, {0x38,0x48,0x70,0x38}, {0xf5,0x04,0xf1,0xf5}, -{0xbc,0xdf,0x63,0xbc}, {0xb6,0xc1,0x77,0xb6}, {0xda,0x75,0xaf,0xda}, {0x21,0x63,0x42,0x21}, -{0x10,0x30,0x20,0x10}, {0xff,0x1a,0xe5,0xff}, {0xf3,0x0e,0xfd,0xf3}, {0xd2,0x6d,0xbf,0xd2}, -{0xcd,0x4c,0x81,0xcd}, {0x0c,0x14,0x18,0x0c}, {0x13,0x35,0x26,0x13}, {0xec,0x2f,0xc3,0xec}, -{0x5f,0xe1,0xbe,0x5f}, {0x97,0xa2,0x35,0x97}, {0x44,0xcc,0x88,0x44}, {0x17,0x39,0x2e,0x17}, -{0xc4,0x57,0x93,0xc4}, {0xa7,0xf2,0x55,0xa7}, {0x7e,0x82,0xfc,0x7e}, {0x3d,0x47,0x7a,0x3d}, -{0x64,0xac,0xc8,0x64}, {0x5d,0xe7,0xba,0x5d}, {0x19,0x2b,0x32,0x19}, {0x73,0x95,0xe6,0x73}, -{0x60,0xa0,0xc0,0x60}, {0x81,0x98,0x19,0x81}, {0x4f,0xd1,0x9e,0x4f}, {0xdc,0x7f,0xa3,0xdc}, -{0x22,0x66,0x44,0x22}, {0x2a,0x7e,0x54,0x2a}, {0x90,0xab,0x3b,0x90}, {0x88,0x83,0x0b,0x88}, -{0x46,0xca,0x8c,0x46}, {0xee,0x29,0xc7,0xee}, {0xb8,0xd3,0x6b,0xb8}, {0x14,0x3c,0x28,0x14}, -{0xde,0x79,0xa7,0xde}, {0x5e,0xe2,0xbc,0x5e}, {0x0b,0x1d,0x16,0x0b}, {0xdb,0x76,0xad,0xdb}, -{0xe0,0x3b,0xdb,0xe0}, {0x32,0x56,0x64,0x32}, {0x3a,0x4e,0x74,0x3a}, {0x0a,0x1e,0x14,0x0a}, -{0x49,0xdb,0x92,0x49}, {0x06,0x0a,0x0c,0x06}, {0x24,0x6c,0x48,0x24}, {0x5c,0xe4,0xb8,0x5c}, -{0xc2,0x5d,0x9f,0xc2}, {0xd3,0x6e,0xbd,0xd3}, {0xac,0xef,0x43,0xac}, {0x62,0xa6,0xc4,0x62}, -{0x91,0xa8,0x39,0x91}, {0x95,0xa4,0x31,0x95}, {0xe4,0x37,0xd3,0xe4}, {0x79,0x8b,0xf2,0x79}, -{0xe7,0x32,0xd5,0xe7}, {0xc8,0x43,0x8b,0xc8}, {0x37,0x59,0x6e,0x37}, {0x6d,0xb7,0xda,0x6d}, -{0x8d,0x8c,0x01,0x8d}, {0xd5,0x64,0xb1,0xd5}, {0x4e,0xd2,0x9c,0x4e}, {0xa9,0xe0,0x49,0xa9}, -{0x6c,0xb4,0xd8,0x6c}, {0x56,0xfa,0xac,0x56}, {0xf4,0x07,0xf3,0xf4}, {0xea,0x25,0xcf,0xea}, -{0x65,0xaf,0xca,0x65}, {0x7a,0x8e,0xf4,0x7a}, {0xae,0xe9,0x47,0xae}, {0x08,0x18,0x10,0x08}, -{0xba,0xd5,0x6f,0xba}, {0x78,0x88,0xf0,0x78}, {0x25,0x6f,0x4a,0x25}, {0x2e,0x72,0x5c,0x2e}, -{0x1c,0x24,0x38,0x1c}, {0xa6,0xf1,0x57,0xa6}, {0xb4,0xc7,0x73,0xb4}, {0xc6,0x51,0x97,0xc6}, -{0xe8,0x23,0xcb,0xe8}, {0xdd,0x7c,0xa1,0xdd}, {0x74,0x9c,0xe8,0x74}, {0x1f,0x21,0x3e,0x1f}, -{0x4b,0xdd,0x96,0x4b}, {0xbd,0xdc,0x61,0xbd}, {0x8b,0x86,0x0d,0x8b}, {0x8a,0x85,0x0f,0x8a}, -{0x70,0x90,0xe0,0x70}, {0x3e,0x42,0x7c,0x3e}, {0xb5,0xc4,0x71,0xb5}, {0x66,0xaa,0xcc,0x66}, -{0x48,0xd8,0x90,0x48}, {0x03,0x05,0x06,0x03}, {0xf6,0x01,0xf7,0xf6}, {0x0e,0x12,0x1c,0x0e}, -{0x61,0xa3,0xc2,0x61}, {0x35,0x5f,0x6a,0x35}, {0x57,0xf9,0xae,0x57}, {0xb9,0xd0,0x69,0xb9}, -{0x86,0x91,0x17,0x86}, {0xc1,0x58,0x99,0xc1}, {0x1d,0x27,0x3a,0x1d}, {0x9e,0xb9,0x27,0x9e}, -{0xe1,0x38,0xd9,0xe1}, {0xf8,0x13,0xeb,0xf8}, {0x98,0xb3,0x2b,0x98}, {0x11,0x33,0x22,0x11}, -{0x69,0xbb,0xd2,0x69}, {0xd9,0x70,0xa9,0xd9}, {0x8e,0x89,0x07,0x8e}, {0x94,0xa7,0x33,0x94}, -{0x9b,0xb6,0x2d,0x9b}, {0x1e,0x22,0x3c,0x1e}, {0x87,0x92,0x15,0x87}, {0xe9,0x20,0xc9,0xe9}, -{0xce,0x49,0x87,0xce}, {0x55,0xff,0xaa,0x55}, {0x28,0x78,0x50,0x28}, {0xdf,0x7a,0xa5,0xdf}, -{0x8c,0x8f,0x03,0x8c}, {0xa1,0xf8,0x59,0xa1}, {0x89,0x80,0x09,0x89}, {0x0d,0x17,0x1a,0x0d}, -{0xbf,0xda,0x65,0xbf}, {0xe6,0x31,0xd7,0xe6}, {0x42,0xc6,0x84,0x42}, {0x68,0xb8,0xd0,0x68}, -{0x41,0xc3,0x82,0x41}, {0x99,0xb0,0x29,0x99}, {0x2d,0x77,0x5a,0x2d}, {0x0f,0x11,0x1e,0x0f}, -{0xb0,0xcb,0x7b,0xb0}, {0x54,0xfc,0xa8,0x54}, {0xbb,0xd6,0x6d,0xbb}, {0x16,0x3a,0x2c,0x16} - } -}; -#define T3 xT3.xt8 - -static const union xtab xT4 = { - .xt8 = { -{0x63,0x63,0xa5,0xc6}, {0x7c,0x7c,0x84,0xf8}, {0x77,0x77,0x99,0xee}, {0x7b,0x7b,0x8d,0xf6}, -{0xf2,0xf2,0x0d,0xff}, {0x6b,0x6b,0xbd,0xd6}, {0x6f,0x6f,0xb1,0xde}, {0xc5,0xc5,0x54,0x91}, -{0x30,0x30,0x50,0x60}, {0x01,0x01,0x03,0x02}, {0x67,0x67,0xa9,0xce}, {0x2b,0x2b,0x7d,0x56}, -{0xfe,0xfe,0x19,0xe7}, {0xd7,0xd7,0x62,0xb5}, {0xab,0xab,0xe6,0x4d}, {0x76,0x76,0x9a,0xec}, -{0xca,0xca,0x45,0x8f}, {0x82,0x82,0x9d,0x1f}, {0xc9,0xc9,0x40,0x89}, {0x7d,0x7d,0x87,0xfa}, -{0xfa,0xfa,0x15,0xef}, {0x59,0x59,0xeb,0xb2}, {0x47,0x47,0xc9,0x8e}, {0xf0,0xf0,0x0b,0xfb}, -{0xad,0xad,0xec,0x41}, {0xd4,0xd4,0x67,0xb3}, {0xa2,0xa2,0xfd,0x5f}, {0xaf,0xaf,0xea,0x45}, -{0x9c,0x9c,0xbf,0x23}, {0xa4,0xa4,0xf7,0x53}, {0x72,0x72,0x96,0xe4}, {0xc0,0xc0,0x5b,0x9b}, -{0xb7,0xb7,0xc2,0x75}, {0xfd,0xfd,0x1c,0xe1}, {0x93,0x93,0xae,0x3d}, {0x26,0x26,0x6a,0x4c}, -{0x36,0x36,0x5a,0x6c}, {0x3f,0x3f,0x41,0x7e}, {0xf7,0xf7,0x02,0xf5}, {0xcc,0xcc,0x4f,0x83}, -{0x34,0x34,0x5c,0x68}, {0xa5,0xa5,0xf4,0x51}, {0xe5,0xe5,0x34,0xd1}, {0xf1,0xf1,0x08,0xf9}, -{0x71,0x71,0x93,0xe2}, {0xd8,0xd8,0x73,0xab}, {0x31,0x31,0x53,0x62}, {0x15,0x15,0x3f,0x2a}, -{0x04,0x04,0x0c,0x08}, {0xc7,0xc7,0x52,0x95}, {0x23,0x23,0x65,0x46}, {0xc3,0xc3,0x5e,0x9d}, -{0x18,0x18,0x28,0x30}, {0x96,0x96,0xa1,0x37}, {0x05,0x05,0x0f,0x0a}, {0x9a,0x9a,0xb5,0x2f}, -{0x07,0x07,0x09,0x0e}, {0x12,0x12,0x36,0x24}, {0x80,0x80,0x9b,0x1b}, {0xe2,0xe2,0x3d,0xdf}, -{0xeb,0xeb,0x26,0xcd}, {0x27,0x27,0x69,0x4e}, {0xb2,0xb2,0xcd,0x7f}, {0x75,0x75,0x9f,0xea}, -{0x09,0x09,0x1b,0x12}, {0x83,0x83,0x9e,0x1d}, {0x2c,0x2c,0x74,0x58}, {0x1a,0x1a,0x2e,0x34}, -{0x1b,0x1b,0x2d,0x36}, {0x6e,0x6e,0xb2,0xdc}, {0x5a,0x5a,0xee,0xb4}, {0xa0,0xa0,0xfb,0x5b}, -{0x52,0x52,0xf6,0xa4}, {0x3b,0x3b,0x4d,0x76}, {0xd6,0xd6,0x61,0xb7}, {0xb3,0xb3,0xce,0x7d}, -{0x29,0x29,0x7b,0x52}, {0xe3,0xe3,0x3e,0xdd}, {0x2f,0x2f,0x71,0x5e}, {0x84,0x84,0x97,0x13}, -{0x53,0x53,0xf5,0xa6}, {0xd1,0xd1,0x68,0xb9}, {0x00,0x00,0x00,0x00}, {0xed,0xed,0x2c,0xc1}, -{0x20,0x20,0x60,0x40}, {0xfc,0xfc,0x1f,0xe3}, {0xb1,0xb1,0xc8,0x79}, {0x5b,0x5b,0xed,0xb6}, -{0x6a,0x6a,0xbe,0xd4}, {0xcb,0xcb,0x46,0x8d}, {0xbe,0xbe,0xd9,0x67}, {0x39,0x39,0x4b,0x72}, -{0x4a,0x4a,0xde,0x94}, {0x4c,0x4c,0xd4,0x98}, {0x58,0x58,0xe8,0xb0}, {0xcf,0xcf,0x4a,0x85}, -{0xd0,0xd0,0x6b,0xbb}, {0xef,0xef,0x2a,0xc5}, {0xaa,0xaa,0xe5,0x4f}, {0xfb,0xfb,0x16,0xed}, -{0x43,0x43,0xc5,0x86}, {0x4d,0x4d,0xd7,0x9a}, {0x33,0x33,0x55,0x66}, {0x85,0x85,0x94,0x11}, -{0x45,0x45,0xcf,0x8a}, {0xf9,0xf9,0x10,0xe9}, {0x02,0x02,0x06,0x04}, {0x7f,0x7f,0x81,0xfe}, -{0x50,0x50,0xf0,0xa0}, {0x3c,0x3c,0x44,0x78}, {0x9f,0x9f,0xba,0x25}, {0xa8,0xa8,0xe3,0x4b}, -{0x51,0x51,0xf3,0xa2}, {0xa3,0xa3,0xfe,0x5d}, {0x40,0x40,0xc0,0x80}, {0x8f,0x8f,0x8a,0x05}, -{0x92,0x92,0xad,0x3f}, {0x9d,0x9d,0xbc,0x21}, {0x38,0x38,0x48,0x70}, {0xf5,0xf5,0x04,0xf1}, -{0xbc,0xbc,0xdf,0x63}, {0xb6,0xb6,0xc1,0x77}, {0xda,0xda,0x75,0xaf}, {0x21,0x21,0x63,0x42}, -{0x10,0x10,0x30,0x20}, {0xff,0xff,0x1a,0xe5}, {0xf3,0xf3,0x0e,0xfd}, {0xd2,0xd2,0x6d,0xbf}, -{0xcd,0xcd,0x4c,0x81}, {0x0c,0x0c,0x14,0x18}, {0x13,0x13,0x35,0x26}, {0xec,0xec,0x2f,0xc3}, -{0x5f,0x5f,0xe1,0xbe}, {0x97,0x97,0xa2,0x35}, {0x44,0x44,0xcc,0x88}, {0x17,0x17,0x39,0x2e}, -{0xc4,0xc4,0x57,0x93}, {0xa7,0xa7,0xf2,0x55}, {0x7e,0x7e,0x82,0xfc}, {0x3d,0x3d,0x47,0x7a}, -{0x64,0x64,0xac,0xc8}, {0x5d,0x5d,0xe7,0xba}, {0x19,0x19,0x2b,0x32}, {0x73,0x73,0x95,0xe6}, -{0x60,0x60,0xa0,0xc0}, {0x81,0x81,0x98,0x19}, {0x4f,0x4f,0xd1,0x9e}, {0xdc,0xdc,0x7f,0xa3}, -{0x22,0x22,0x66,0x44}, {0x2a,0x2a,0x7e,0x54}, {0x90,0x90,0xab,0x3b}, {0x88,0x88,0x83,0x0b}, -{0x46,0x46,0xca,0x8c}, {0xee,0xee,0x29,0xc7}, {0xb8,0xb8,0xd3,0x6b}, {0x14,0x14,0x3c,0x28}, -{0xde,0xde,0x79,0xa7}, {0x5e,0x5e,0xe2,0xbc}, {0x0b,0x0b,0x1d,0x16}, {0xdb,0xdb,0x76,0xad}, -{0xe0,0xe0,0x3b,0xdb}, {0x32,0x32,0x56,0x64}, {0x3a,0x3a,0x4e,0x74}, {0x0a,0x0a,0x1e,0x14}, -{0x49,0x49,0xdb,0x92}, {0x06,0x06,0x0a,0x0c}, {0x24,0x24,0x6c,0x48}, {0x5c,0x5c,0xe4,0xb8}, -{0xc2,0xc2,0x5d,0x9f}, {0xd3,0xd3,0x6e,0xbd}, {0xac,0xac,0xef,0x43}, {0x62,0x62,0xa6,0xc4}, -{0x91,0x91,0xa8,0x39}, {0x95,0x95,0xa4,0x31}, {0xe4,0xe4,0x37,0xd3}, {0x79,0x79,0x8b,0xf2}, -{0xe7,0xe7,0x32,0xd5}, {0xc8,0xc8,0x43,0x8b}, {0x37,0x37,0x59,0x6e}, {0x6d,0x6d,0xb7,0xda}, -{0x8d,0x8d,0x8c,0x01}, {0xd5,0xd5,0x64,0xb1}, {0x4e,0x4e,0xd2,0x9c}, {0xa9,0xa9,0xe0,0x49}, -{0x6c,0x6c,0xb4,0xd8}, {0x56,0x56,0xfa,0xac}, {0xf4,0xf4,0x07,0xf3}, {0xea,0xea,0x25,0xcf}, -{0x65,0x65,0xaf,0xca}, {0x7a,0x7a,0x8e,0xf4}, {0xae,0xae,0xe9,0x47}, {0x08,0x08,0x18,0x10}, -{0xba,0xba,0xd5,0x6f}, {0x78,0x78,0x88,0xf0}, {0x25,0x25,0x6f,0x4a}, {0x2e,0x2e,0x72,0x5c}, -{0x1c,0x1c,0x24,0x38}, {0xa6,0xa6,0xf1,0x57}, {0xb4,0xb4,0xc7,0x73}, {0xc6,0xc6,0x51,0x97}, -{0xe8,0xe8,0x23,0xcb}, {0xdd,0xdd,0x7c,0xa1}, {0x74,0x74,0x9c,0xe8}, {0x1f,0x1f,0x21,0x3e}, -{0x4b,0x4b,0xdd,0x96}, {0xbd,0xbd,0xdc,0x61}, {0x8b,0x8b,0x86,0x0d}, {0x8a,0x8a,0x85,0x0f}, -{0x70,0x70,0x90,0xe0}, {0x3e,0x3e,0x42,0x7c}, {0xb5,0xb5,0xc4,0x71}, {0x66,0x66,0xaa,0xcc}, -{0x48,0x48,0xd8,0x90}, {0x03,0x03,0x05,0x06}, {0xf6,0xf6,0x01,0xf7}, {0x0e,0x0e,0x12,0x1c}, -{0x61,0x61,0xa3,0xc2}, {0x35,0x35,0x5f,0x6a}, {0x57,0x57,0xf9,0xae}, {0xb9,0xb9,0xd0,0x69}, -{0x86,0x86,0x91,0x17}, {0xc1,0xc1,0x58,0x99}, {0x1d,0x1d,0x27,0x3a}, {0x9e,0x9e,0xb9,0x27}, -{0xe1,0xe1,0x38,0xd9}, {0xf8,0xf8,0x13,0xeb}, {0x98,0x98,0xb3,0x2b}, {0x11,0x11,0x33,0x22}, -{0x69,0x69,0xbb,0xd2}, {0xd9,0xd9,0x70,0xa9}, {0x8e,0x8e,0x89,0x07}, {0x94,0x94,0xa7,0x33}, -{0x9b,0x9b,0xb6,0x2d}, {0x1e,0x1e,0x22,0x3c}, {0x87,0x87,0x92,0x15}, {0xe9,0xe9,0x20,0xc9}, -{0xce,0xce,0x49,0x87}, {0x55,0x55,0xff,0xaa}, {0x28,0x28,0x78,0x50}, {0xdf,0xdf,0x7a,0xa5}, -{0x8c,0x8c,0x8f,0x03}, {0xa1,0xa1,0xf8,0x59}, {0x89,0x89,0x80,0x09}, {0x0d,0x0d,0x17,0x1a}, -{0xbf,0xbf,0xda,0x65}, {0xe6,0xe6,0x31,0xd7}, {0x42,0x42,0xc6,0x84}, {0x68,0x68,0xb8,0xd0}, -{0x41,0x41,0xc3,0x82}, {0x99,0x99,0xb0,0x29}, {0x2d,0x2d,0x77,0x5a}, {0x0f,0x0f,0x11,0x1e}, -{0xb0,0xb0,0xcb,0x7b}, {0x54,0x54,0xfc,0xa8}, {0xbb,0xbb,0xd6,0x6d}, {0x16,0x16,0x3a,0x2c} - } -}; -#define T4 xT4.xt8 - -static const union xtab xT5 = { - .xt8 = { -{0x51,0xf4,0xa7,0x50}, {0x7e,0x41,0x65,0x53}, {0x1a,0x17,0xa4,0xc3}, {0x3a,0x27,0x5e,0x96}, -{0x3b,0xab,0x6b,0xcb}, {0x1f,0x9d,0x45,0xf1}, {0xac,0xfa,0x58,0xab}, {0x4b,0xe3,0x03,0x93}, -{0x20,0x30,0xfa,0x55}, {0xad,0x76,0x6d,0xf6}, {0x88,0xcc,0x76,0x91}, {0xf5,0x02,0x4c,0x25}, -{0x4f,0xe5,0xd7,0xfc}, {0xc5,0x2a,0xcb,0xd7}, {0x26,0x35,0x44,0x80}, {0xb5,0x62,0xa3,0x8f}, -{0xde,0xb1,0x5a,0x49}, {0x25,0xba,0x1b,0x67}, {0x45,0xea,0x0e,0x98}, {0x5d,0xfe,0xc0,0xe1}, -{0xc3,0x2f,0x75,0x02}, {0x81,0x4c,0xf0,0x12}, {0x8d,0x46,0x97,0xa3}, {0x6b,0xd3,0xf9,0xc6}, -{0x03,0x8f,0x5f,0xe7}, {0x15,0x92,0x9c,0x95}, {0xbf,0x6d,0x7a,0xeb}, {0x95,0x52,0x59,0xda}, -{0xd4,0xbe,0x83,0x2d}, {0x58,0x74,0x21,0xd3}, {0x49,0xe0,0x69,0x29}, {0x8e,0xc9,0xc8,0x44}, -{0x75,0xc2,0x89,0x6a}, {0xf4,0x8e,0x79,0x78}, {0x99,0x58,0x3e,0x6b}, {0x27,0xb9,0x71,0xdd}, -{0xbe,0xe1,0x4f,0xb6}, {0xf0,0x88,0xad,0x17}, {0xc9,0x20,0xac,0x66}, {0x7d,0xce,0x3a,0xb4}, -{0x63,0xdf,0x4a,0x18}, {0xe5,0x1a,0x31,0x82}, {0x97,0x51,0x33,0x60}, {0x62,0x53,0x7f,0x45}, -{0xb1,0x64,0x77,0xe0}, {0xbb,0x6b,0xae,0x84}, {0xfe,0x81,0xa0,0x1c}, {0xf9,0x08,0x2b,0x94}, -{0x70,0x48,0x68,0x58}, {0x8f,0x45,0xfd,0x19}, {0x94,0xde,0x6c,0x87}, {0x52,0x7b,0xf8,0xb7}, -{0xab,0x73,0xd3,0x23}, {0x72,0x4b,0x02,0xe2}, {0xe3,0x1f,0x8f,0x57}, {0x66,0x55,0xab,0x2a}, -{0xb2,0xeb,0x28,0x07}, {0x2f,0xb5,0xc2,0x03}, {0x86,0xc5,0x7b,0x9a}, {0xd3,0x37,0x08,0xa5}, -{0x30,0x28,0x87,0xf2}, {0x23,0xbf,0xa5,0xb2}, {0x02,0x03,0x6a,0xba}, {0xed,0x16,0x82,0x5c}, -{0x8a,0xcf,0x1c,0x2b}, {0xa7,0x79,0xb4,0x92}, {0xf3,0x07,0xf2,0xf0}, {0x4e,0x69,0xe2,0xa1}, -{0x65,0xda,0xf4,0xcd}, {0x06,0x05,0xbe,0xd5}, {0xd1,0x34,0x62,0x1f}, {0xc4,0xa6,0xfe,0x8a}, -{0x34,0x2e,0x53,0x9d}, {0xa2,0xf3,0x55,0xa0}, {0x05,0x8a,0xe1,0x32}, {0xa4,0xf6,0xeb,0x75}, -{0x0b,0x83,0xec,0x39}, {0x40,0x60,0xef,0xaa}, {0x5e,0x71,0x9f,0x06}, {0xbd,0x6e,0x10,0x51}, -{0x3e,0x21,0x8a,0xf9}, {0x96,0xdd,0x06,0x3d}, {0xdd,0x3e,0x05,0xae}, {0x4d,0xe6,0xbd,0x46}, -{0x91,0x54,0x8d,0xb5}, {0x71,0xc4,0x5d,0x05}, {0x04,0x06,0xd4,0x6f}, {0x60,0x50,0x15,0xff}, -{0x19,0x98,0xfb,0x24}, {0xd6,0xbd,0xe9,0x97}, {0x89,0x40,0x43,0xcc}, {0x67,0xd9,0x9e,0x77}, -{0xb0,0xe8,0x42,0xbd}, {0x07,0x89,0x8b,0x88}, {0xe7,0x19,0x5b,0x38}, {0x79,0xc8,0xee,0xdb}, -{0xa1,0x7c,0x0a,0x47}, {0x7c,0x42,0x0f,0xe9}, {0xf8,0x84,0x1e,0xc9}, {0x00,0x00,0x00,0x00}, -{0x09,0x80,0x86,0x83}, {0x32,0x2b,0xed,0x48}, {0x1e,0x11,0x70,0xac}, {0x6c,0x5a,0x72,0x4e}, -{0xfd,0x0e,0xff,0xfb}, {0x0f,0x85,0x38,0x56}, {0x3d,0xae,0xd5,0x1e}, {0x36,0x2d,0x39,0x27}, -{0x0a,0x0f,0xd9,0x64}, {0x68,0x5c,0xa6,0x21}, {0x9b,0x5b,0x54,0xd1}, {0x24,0x36,0x2e,0x3a}, -{0x0c,0x0a,0x67,0xb1}, {0x93,0x57,0xe7,0x0f}, {0xb4,0xee,0x96,0xd2}, {0x1b,0x9b,0x91,0x9e}, -{0x80,0xc0,0xc5,0x4f}, {0x61,0xdc,0x20,0xa2}, {0x5a,0x77,0x4b,0x69}, {0x1c,0x12,0x1a,0x16}, -{0xe2,0x93,0xba,0x0a}, {0xc0,0xa0,0x2a,0xe5}, {0x3c,0x22,0xe0,0x43}, {0x12,0x1b,0x17,0x1d}, -{0x0e,0x09,0x0d,0x0b}, {0xf2,0x8b,0xc7,0xad}, {0x2d,0xb6,0xa8,0xb9}, {0x14,0x1e,0xa9,0xc8}, -{0x57,0xf1,0x19,0x85}, {0xaf,0x75,0x07,0x4c}, {0xee,0x99,0xdd,0xbb}, {0xa3,0x7f,0x60,0xfd}, -{0xf7,0x01,0x26,0x9f}, {0x5c,0x72,0xf5,0xbc}, {0x44,0x66,0x3b,0xc5}, {0x5b,0xfb,0x7e,0x34}, -{0x8b,0x43,0x29,0x76}, {0xcb,0x23,0xc6,0xdc}, {0xb6,0xed,0xfc,0x68}, {0xb8,0xe4,0xf1,0x63}, -{0xd7,0x31,0xdc,0xca}, {0x42,0x63,0x85,0x10}, {0x13,0x97,0x22,0x40}, {0x84,0xc6,0x11,0x20}, -{0x85,0x4a,0x24,0x7d}, {0xd2,0xbb,0x3d,0xf8}, {0xae,0xf9,0x32,0x11}, {0xc7,0x29,0xa1,0x6d}, -{0x1d,0x9e,0x2f,0x4b}, {0xdc,0xb2,0x30,0xf3}, {0x0d,0x86,0x52,0xec}, {0x77,0xc1,0xe3,0xd0}, -{0x2b,0xb3,0x16,0x6c}, {0xa9,0x70,0xb9,0x99}, {0x11,0x94,0x48,0xfa}, {0x47,0xe9,0x64,0x22}, -{0xa8,0xfc,0x8c,0xc4}, {0xa0,0xf0,0x3f,0x1a}, {0x56,0x7d,0x2c,0xd8}, {0x22,0x33,0x90,0xef}, -{0x87,0x49,0x4e,0xc7}, {0xd9,0x38,0xd1,0xc1}, {0x8c,0xca,0xa2,0xfe}, {0x98,0xd4,0x0b,0x36}, -{0xa6,0xf5,0x81,0xcf}, {0xa5,0x7a,0xde,0x28}, {0xda,0xb7,0x8e,0x26}, {0x3f,0xad,0xbf,0xa4}, -{0x2c,0x3a,0x9d,0xe4}, {0x50,0x78,0x92,0x0d}, {0x6a,0x5f,0xcc,0x9b}, {0x54,0x7e,0x46,0x62}, -{0xf6,0x8d,0x13,0xc2}, {0x90,0xd8,0xb8,0xe8}, {0x2e,0x39,0xf7,0x5e}, {0x82,0xc3,0xaf,0xf5}, -{0x9f,0x5d,0x80,0xbe}, {0x69,0xd0,0x93,0x7c}, {0x6f,0xd5,0x2d,0xa9}, {0xcf,0x25,0x12,0xb3}, -{0xc8,0xac,0x99,0x3b}, {0x10,0x18,0x7d,0xa7}, {0xe8,0x9c,0x63,0x6e}, {0xdb,0x3b,0xbb,0x7b}, -{0xcd,0x26,0x78,0x09}, {0x6e,0x59,0x18,0xf4}, {0xec,0x9a,0xb7,0x01}, {0x83,0x4f,0x9a,0xa8}, -{0xe6,0x95,0x6e,0x65}, {0xaa,0xff,0xe6,0x7e}, {0x21,0xbc,0xcf,0x08}, {0xef,0x15,0xe8,0xe6}, -{0xba,0xe7,0x9b,0xd9}, {0x4a,0x6f,0x36,0xce}, {0xea,0x9f,0x09,0xd4}, {0x29,0xb0,0x7c,0xd6}, -{0x31,0xa4,0xb2,0xaf}, {0x2a,0x3f,0x23,0x31}, {0xc6,0xa5,0x94,0x30}, {0x35,0xa2,0x66,0xc0}, -{0x74,0x4e,0xbc,0x37}, {0xfc,0x82,0xca,0xa6}, {0xe0,0x90,0xd0,0xb0}, {0x33,0xa7,0xd8,0x15}, -{0xf1,0x04,0x98,0x4a}, {0x41,0xec,0xda,0xf7}, {0x7f,0xcd,0x50,0x0e}, {0x17,0x91,0xf6,0x2f}, -{0x76,0x4d,0xd6,0x8d}, {0x43,0xef,0xb0,0x4d}, {0xcc,0xaa,0x4d,0x54}, {0xe4,0x96,0x04,0xdf}, -{0x9e,0xd1,0xb5,0xe3}, {0x4c,0x6a,0x88,0x1b}, {0xc1,0x2c,0x1f,0xb8}, {0x46,0x65,0x51,0x7f}, -{0x9d,0x5e,0xea,0x04}, {0x01,0x8c,0x35,0x5d}, {0xfa,0x87,0x74,0x73}, {0xfb,0x0b,0x41,0x2e}, -{0xb3,0x67,0x1d,0x5a}, {0x92,0xdb,0xd2,0x52}, {0xe9,0x10,0x56,0x33}, {0x6d,0xd6,0x47,0x13}, -{0x9a,0xd7,0x61,0x8c}, {0x37,0xa1,0x0c,0x7a}, {0x59,0xf8,0x14,0x8e}, {0xeb,0x13,0x3c,0x89}, -{0xce,0xa9,0x27,0xee}, {0xb7,0x61,0xc9,0x35}, {0xe1,0x1c,0xe5,0xed}, {0x7a,0x47,0xb1,0x3c}, -{0x9c,0xd2,0xdf,0x59}, {0x55,0xf2,0x73,0x3f}, {0x18,0x14,0xce,0x79}, {0x73,0xc7,0x37,0xbf}, -{0x53,0xf7,0xcd,0xea}, {0x5f,0xfd,0xaa,0x5b}, {0xdf,0x3d,0x6f,0x14}, {0x78,0x44,0xdb,0x86}, -{0xca,0xaf,0xf3,0x81}, {0xb9,0x68,0xc4,0x3e}, {0x38,0x24,0x34,0x2c}, {0xc2,0xa3,0x40,0x5f}, -{0x16,0x1d,0xc3,0x72}, {0xbc,0xe2,0x25,0x0c}, {0x28,0x3c,0x49,0x8b}, {0xff,0x0d,0x95,0x41}, -{0x39,0xa8,0x01,0x71}, {0x08,0x0c,0xb3,0xde}, {0xd8,0xb4,0xe4,0x9c}, {0x64,0x56,0xc1,0x90}, -{0x7b,0xcb,0x84,0x61}, {0xd5,0x32,0xb6,0x70}, {0x48,0x6c,0x5c,0x74}, {0xd0,0xb8,0x57,0x42} - } -}; -#define T5 xT5.xt8 - -static const union xtab xT6 = { - .xt8 = { -{0x50,0x51,0xf4,0xa7}, {0x53,0x7e,0x41,0x65}, {0xc3,0x1a,0x17,0xa4}, {0x96,0x3a,0x27,0x5e}, -{0xcb,0x3b,0xab,0x6b}, {0xf1,0x1f,0x9d,0x45}, {0xab,0xac,0xfa,0x58}, {0x93,0x4b,0xe3,0x03}, -{0x55,0x20,0x30,0xfa}, {0xf6,0xad,0x76,0x6d}, {0x91,0x88,0xcc,0x76}, {0x25,0xf5,0x02,0x4c}, -{0xfc,0x4f,0xe5,0xd7}, {0xd7,0xc5,0x2a,0xcb}, {0x80,0x26,0x35,0x44}, {0x8f,0xb5,0x62,0xa3}, -{0x49,0xde,0xb1,0x5a}, {0x67,0x25,0xba,0x1b}, {0x98,0x45,0xea,0x0e}, {0xe1,0x5d,0xfe,0xc0}, -{0x02,0xc3,0x2f,0x75}, {0x12,0x81,0x4c,0xf0}, {0xa3,0x8d,0x46,0x97}, {0xc6,0x6b,0xd3,0xf9}, -{0xe7,0x03,0x8f,0x5f}, {0x95,0x15,0x92,0x9c}, {0xeb,0xbf,0x6d,0x7a}, {0xda,0x95,0x52,0x59}, -{0x2d,0xd4,0xbe,0x83}, {0xd3,0x58,0x74,0x21}, {0x29,0x49,0xe0,0x69}, {0x44,0x8e,0xc9,0xc8}, -{0x6a,0x75,0xc2,0x89}, {0x78,0xf4,0x8e,0x79}, {0x6b,0x99,0x58,0x3e}, {0xdd,0x27,0xb9,0x71}, -{0xb6,0xbe,0xe1,0x4f}, {0x17,0xf0,0x88,0xad}, {0x66,0xc9,0x20,0xac}, {0xb4,0x7d,0xce,0x3a}, -{0x18,0x63,0xdf,0x4a}, {0x82,0xe5,0x1a,0x31}, {0x60,0x97,0x51,0x33}, {0x45,0x62,0x53,0x7f}, -{0xe0,0xb1,0x64,0x77}, {0x84,0xbb,0x6b,0xae}, {0x1c,0xfe,0x81,0xa0}, {0x94,0xf9,0x08,0x2b}, -{0x58,0x70,0x48,0x68}, {0x19,0x8f,0x45,0xfd}, {0x87,0x94,0xde,0x6c}, {0xb7,0x52,0x7b,0xf8}, -{0x23,0xab,0x73,0xd3}, {0xe2,0x72,0x4b,0x02}, {0x57,0xe3,0x1f,0x8f}, {0x2a,0x66,0x55,0xab}, -{0x07,0xb2,0xeb,0x28}, {0x03,0x2f,0xb5,0xc2}, {0x9a,0x86,0xc5,0x7b}, {0xa5,0xd3,0x37,0x08}, -{0xf2,0x30,0x28,0x87}, {0xb2,0x23,0xbf,0xa5}, {0xba,0x02,0x03,0x6a}, {0x5c,0xed,0x16,0x82}, -{0x2b,0x8a,0xcf,0x1c}, {0x92,0xa7,0x79,0xb4}, {0xf0,0xf3,0x07,0xf2}, {0xa1,0x4e,0x69,0xe2}, -{0xcd,0x65,0xda,0xf4}, {0xd5,0x06,0x05,0xbe}, {0x1f,0xd1,0x34,0x62}, {0x8a,0xc4,0xa6,0xfe}, -{0x9d,0x34,0x2e,0x53}, {0xa0,0xa2,0xf3,0x55}, {0x32,0x05,0x8a,0xe1}, {0x75,0xa4,0xf6,0xeb}, -{0x39,0x0b,0x83,0xec}, {0xaa,0x40,0x60,0xef}, {0x06,0x5e,0x71,0x9f}, {0x51,0xbd,0x6e,0x10}, -{0xf9,0x3e,0x21,0x8a}, {0x3d,0x96,0xdd,0x06}, {0xae,0xdd,0x3e,0x05}, {0x46,0x4d,0xe6,0xbd}, -{0xb5,0x91,0x54,0x8d}, {0x05,0x71,0xc4,0x5d}, {0x6f,0x04,0x06,0xd4}, {0xff,0x60,0x50,0x15}, -{0x24,0x19,0x98,0xfb}, {0x97,0xd6,0xbd,0xe9}, {0xcc,0x89,0x40,0x43}, {0x77,0x67,0xd9,0x9e}, -{0xbd,0xb0,0xe8,0x42}, {0x88,0x07,0x89,0x8b}, {0x38,0xe7,0x19,0x5b}, {0xdb,0x79,0xc8,0xee}, -{0x47,0xa1,0x7c,0x0a}, {0xe9,0x7c,0x42,0x0f}, {0xc9,0xf8,0x84,0x1e}, {0x00,0x00,0x00,0x00}, -{0x83,0x09,0x80,0x86}, {0x48,0x32,0x2b,0xed}, {0xac,0x1e,0x11,0x70}, {0x4e,0x6c,0x5a,0x72}, -{0xfb,0xfd,0x0e,0xff}, {0x56,0x0f,0x85,0x38}, {0x1e,0x3d,0xae,0xd5}, {0x27,0x36,0x2d,0x39}, -{0x64,0x0a,0x0f,0xd9}, {0x21,0x68,0x5c,0xa6}, {0xd1,0x9b,0x5b,0x54}, {0x3a,0x24,0x36,0x2e}, -{0xb1,0x0c,0x0a,0x67}, {0x0f,0x93,0x57,0xe7}, {0xd2,0xb4,0xee,0x96}, {0x9e,0x1b,0x9b,0x91}, -{0x4f,0x80,0xc0,0xc5}, {0xa2,0x61,0xdc,0x20}, {0x69,0x5a,0x77,0x4b}, {0x16,0x1c,0x12,0x1a}, -{0x0a,0xe2,0x93,0xba}, {0xe5,0xc0,0xa0,0x2a}, {0x43,0x3c,0x22,0xe0}, {0x1d,0x12,0x1b,0x17}, -{0x0b,0x0e,0x09,0x0d}, {0xad,0xf2,0x8b,0xc7}, {0xb9,0x2d,0xb6,0xa8}, {0xc8,0x14,0x1e,0xa9}, -{0x85,0x57,0xf1,0x19}, {0x4c,0xaf,0x75,0x07}, {0xbb,0xee,0x99,0xdd}, {0xfd,0xa3,0x7f,0x60}, -{0x9f,0xf7,0x01,0x26}, {0xbc,0x5c,0x72,0xf5}, {0xc5,0x44,0x66,0x3b}, {0x34,0x5b,0xfb,0x7e}, -{0x76,0x8b,0x43,0x29}, {0xdc,0xcb,0x23,0xc6}, {0x68,0xb6,0xed,0xfc}, {0x63,0xb8,0xe4,0xf1}, -{0xca,0xd7,0x31,0xdc}, {0x10,0x42,0x63,0x85}, {0x40,0x13,0x97,0x22}, {0x20,0x84,0xc6,0x11}, -{0x7d,0x85,0x4a,0x24}, {0xf8,0xd2,0xbb,0x3d}, {0x11,0xae,0xf9,0x32}, {0x6d,0xc7,0x29,0xa1}, -{0x4b,0x1d,0x9e,0x2f}, {0xf3,0xdc,0xb2,0x30}, {0xec,0x0d,0x86,0x52}, {0xd0,0x77,0xc1,0xe3}, -{0x6c,0x2b,0xb3,0x16}, {0x99,0xa9,0x70,0xb9}, {0xfa,0x11,0x94,0x48}, {0x22,0x47,0xe9,0x64}, -{0xc4,0xa8,0xfc,0x8c}, {0x1a,0xa0,0xf0,0x3f}, {0xd8,0x56,0x7d,0x2c}, {0xef,0x22,0x33,0x90}, -{0xc7,0x87,0x49,0x4e}, {0xc1,0xd9,0x38,0xd1}, {0xfe,0x8c,0xca,0xa2}, {0x36,0x98,0xd4,0x0b}, -{0xcf,0xa6,0xf5,0x81}, {0x28,0xa5,0x7a,0xde}, {0x26,0xda,0xb7,0x8e}, {0xa4,0x3f,0xad,0xbf}, -{0xe4,0x2c,0x3a,0x9d}, {0x0d,0x50,0x78,0x92}, {0x9b,0x6a,0x5f,0xcc}, {0x62,0x54,0x7e,0x46}, -{0xc2,0xf6,0x8d,0x13}, {0xe8,0x90,0xd8,0xb8}, {0x5e,0x2e,0x39,0xf7}, {0xf5,0x82,0xc3,0xaf}, -{0xbe,0x9f,0x5d,0x80}, {0x7c,0x69,0xd0,0x93}, {0xa9,0x6f,0xd5,0x2d}, {0xb3,0xcf,0x25,0x12}, -{0x3b,0xc8,0xac,0x99}, {0xa7,0x10,0x18,0x7d}, {0x6e,0xe8,0x9c,0x63}, {0x7b,0xdb,0x3b,0xbb}, -{0x09,0xcd,0x26,0x78}, {0xf4,0x6e,0x59,0x18}, {0x01,0xec,0x9a,0xb7}, {0xa8,0x83,0x4f,0x9a}, -{0x65,0xe6,0x95,0x6e}, {0x7e,0xaa,0xff,0xe6}, {0x08,0x21,0xbc,0xcf}, {0xe6,0xef,0x15,0xe8}, -{0xd9,0xba,0xe7,0x9b}, {0xce,0x4a,0x6f,0x36}, {0xd4,0xea,0x9f,0x09}, {0xd6,0x29,0xb0,0x7c}, -{0xaf,0x31,0xa4,0xb2}, {0x31,0x2a,0x3f,0x23}, {0x30,0xc6,0xa5,0x94}, {0xc0,0x35,0xa2,0x66}, -{0x37,0x74,0x4e,0xbc}, {0xa6,0xfc,0x82,0xca}, {0xb0,0xe0,0x90,0xd0}, {0x15,0x33,0xa7,0xd8}, -{0x4a,0xf1,0x04,0x98}, {0xf7,0x41,0xec,0xda}, {0x0e,0x7f,0xcd,0x50}, {0x2f,0x17,0x91,0xf6}, -{0x8d,0x76,0x4d,0xd6}, {0x4d,0x43,0xef,0xb0}, {0x54,0xcc,0xaa,0x4d}, {0xdf,0xe4,0x96,0x04}, -{0xe3,0x9e,0xd1,0xb5}, {0x1b,0x4c,0x6a,0x88}, {0xb8,0xc1,0x2c,0x1f}, {0x7f,0x46,0x65,0x51}, -{0x04,0x9d,0x5e,0xea}, {0x5d,0x01,0x8c,0x35}, {0x73,0xfa,0x87,0x74}, {0x2e,0xfb,0x0b,0x41}, -{0x5a,0xb3,0x67,0x1d}, {0x52,0x92,0xdb,0xd2}, {0x33,0xe9,0x10,0x56}, {0x13,0x6d,0xd6,0x47}, -{0x8c,0x9a,0xd7,0x61}, {0x7a,0x37,0xa1,0x0c}, {0x8e,0x59,0xf8,0x14}, {0x89,0xeb,0x13,0x3c}, -{0xee,0xce,0xa9,0x27}, {0x35,0xb7,0x61,0xc9}, {0xed,0xe1,0x1c,0xe5}, {0x3c,0x7a,0x47,0xb1}, -{0x59,0x9c,0xd2,0xdf}, {0x3f,0x55,0xf2,0x73}, {0x79,0x18,0x14,0xce}, {0xbf,0x73,0xc7,0x37}, -{0xea,0x53,0xf7,0xcd}, {0x5b,0x5f,0xfd,0xaa}, {0x14,0xdf,0x3d,0x6f}, {0x86,0x78,0x44,0xdb}, -{0x81,0xca,0xaf,0xf3}, {0x3e,0xb9,0x68,0xc4}, {0x2c,0x38,0x24,0x34}, {0x5f,0xc2,0xa3,0x40}, -{0x72,0x16,0x1d,0xc3}, {0x0c,0xbc,0xe2,0x25}, {0x8b,0x28,0x3c,0x49}, {0x41,0xff,0x0d,0x95}, -{0x71,0x39,0xa8,0x01}, {0xde,0x08,0x0c,0xb3}, {0x9c,0xd8,0xb4,0xe4}, {0x90,0x64,0x56,0xc1}, -{0x61,0x7b,0xcb,0x84}, {0x70,0xd5,0x32,0xb6}, {0x74,0x48,0x6c,0x5c}, {0x42,0xd0,0xb8,0x57} - } -}; -#define T6 xT6.xt8 - -static const union xtab xT7 = { - .xt8 = { -{0xa7,0x50,0x51,0xf4}, {0x65,0x53,0x7e,0x41}, {0xa4,0xc3,0x1a,0x17}, {0x5e,0x96,0x3a,0x27}, -{0x6b,0xcb,0x3b,0xab}, {0x45,0xf1,0x1f,0x9d}, {0x58,0xab,0xac,0xfa}, {0x03,0x93,0x4b,0xe3}, -{0xfa,0x55,0x20,0x30}, {0x6d,0xf6,0xad,0x76}, {0x76,0x91,0x88,0xcc}, {0x4c,0x25,0xf5,0x02}, -{0xd7,0xfc,0x4f,0xe5}, {0xcb,0xd7,0xc5,0x2a}, {0x44,0x80,0x26,0x35}, {0xa3,0x8f,0xb5,0x62}, -{0x5a,0x49,0xde,0xb1}, {0x1b,0x67,0x25,0xba}, {0x0e,0x98,0x45,0xea}, {0xc0,0xe1,0x5d,0xfe}, -{0x75,0x02,0xc3,0x2f}, {0xf0,0x12,0x81,0x4c}, {0x97,0xa3,0x8d,0x46}, {0xf9,0xc6,0x6b,0xd3}, -{0x5f,0xe7,0x03,0x8f}, {0x9c,0x95,0x15,0x92}, {0x7a,0xeb,0xbf,0x6d}, {0x59,0xda,0x95,0x52}, -{0x83,0x2d,0xd4,0xbe}, {0x21,0xd3,0x58,0x74}, {0x69,0x29,0x49,0xe0}, {0xc8,0x44,0x8e,0xc9}, -{0x89,0x6a,0x75,0xc2}, {0x79,0x78,0xf4,0x8e}, {0x3e,0x6b,0x99,0x58}, {0x71,0xdd,0x27,0xb9}, -{0x4f,0xb6,0xbe,0xe1}, {0xad,0x17,0xf0,0x88}, {0xac,0x66,0xc9,0x20}, {0x3a,0xb4,0x7d,0xce}, -{0x4a,0x18,0x63,0xdf}, {0x31,0x82,0xe5,0x1a}, {0x33,0x60,0x97,0x51}, {0x7f,0x45,0x62,0x53}, -{0x77,0xe0,0xb1,0x64}, {0xae,0x84,0xbb,0x6b}, {0xa0,0x1c,0xfe,0x81}, {0x2b,0x94,0xf9,0x08}, -{0x68,0x58,0x70,0x48}, {0xfd,0x19,0x8f,0x45}, {0x6c,0x87,0x94,0xde}, {0xf8,0xb7,0x52,0x7b}, -{0xd3,0x23,0xab,0x73}, {0x02,0xe2,0x72,0x4b}, {0x8f,0x57,0xe3,0x1f}, {0xab,0x2a,0x66,0x55}, -{0x28,0x07,0xb2,0xeb}, {0xc2,0x03,0x2f,0xb5}, {0x7b,0x9a,0x86,0xc5}, {0x08,0xa5,0xd3,0x37}, -{0x87,0xf2,0x30,0x28}, {0xa5,0xb2,0x23,0xbf}, {0x6a,0xba,0x02,0x03}, {0x82,0x5c,0xed,0x16}, -{0x1c,0x2b,0x8a,0xcf}, {0xb4,0x92,0xa7,0x79}, {0xf2,0xf0,0xf3,0x07}, {0xe2,0xa1,0x4e,0x69}, -{0xf4,0xcd,0x65,0xda}, {0xbe,0xd5,0x06,0x05}, {0x62,0x1f,0xd1,0x34}, {0xfe,0x8a,0xc4,0xa6}, -{0x53,0x9d,0x34,0x2e}, {0x55,0xa0,0xa2,0xf3}, {0xe1,0x32,0x05,0x8a}, {0xeb,0x75,0xa4,0xf6}, -{0xec,0x39,0x0b,0x83}, {0xef,0xaa,0x40,0x60}, {0x9f,0x06,0x5e,0x71}, {0x10,0x51,0xbd,0x6e}, -{0x8a,0xf9,0x3e,0x21}, {0x06,0x3d,0x96,0xdd}, {0x05,0xae,0xdd,0x3e}, {0xbd,0x46,0x4d,0xe6}, -{0x8d,0xb5,0x91,0x54}, {0x5d,0x05,0x71,0xc4}, {0xd4,0x6f,0x04,0x06}, {0x15,0xff,0x60,0x50}, -{0xfb,0x24,0x19,0x98}, {0xe9,0x97,0xd6,0xbd}, {0x43,0xcc,0x89,0x40}, {0x9e,0x77,0x67,0xd9}, -{0x42,0xbd,0xb0,0xe8}, {0x8b,0x88,0x07,0x89}, {0x5b,0x38,0xe7,0x19}, {0xee,0xdb,0x79,0xc8}, -{0x0a,0x47,0xa1,0x7c}, {0x0f,0xe9,0x7c,0x42}, {0x1e,0xc9,0xf8,0x84}, {0x00,0x00,0x00,0x00}, -{0x86,0x83,0x09,0x80}, {0xed,0x48,0x32,0x2b}, {0x70,0xac,0x1e,0x11}, {0x72,0x4e,0x6c,0x5a}, -{0xff,0xfb,0xfd,0x0e}, {0x38,0x56,0x0f,0x85}, {0xd5,0x1e,0x3d,0xae}, {0x39,0x27,0x36,0x2d}, -{0xd9,0x64,0x0a,0x0f}, {0xa6,0x21,0x68,0x5c}, {0x54,0xd1,0x9b,0x5b}, {0x2e,0x3a,0x24,0x36}, -{0x67,0xb1,0x0c,0x0a}, {0xe7,0x0f,0x93,0x57}, {0x96,0xd2,0xb4,0xee}, {0x91,0x9e,0x1b,0x9b}, -{0xc5,0x4f,0x80,0xc0}, {0x20,0xa2,0x61,0xdc}, {0x4b,0x69,0x5a,0x77}, {0x1a,0x16,0x1c,0x12}, -{0xba,0x0a,0xe2,0x93}, {0x2a,0xe5,0xc0,0xa0}, {0xe0,0x43,0x3c,0x22}, {0x17,0x1d,0x12,0x1b}, -{0x0d,0x0b,0x0e,0x09}, {0xc7,0xad,0xf2,0x8b}, {0xa8,0xb9,0x2d,0xb6}, {0xa9,0xc8,0x14,0x1e}, -{0x19,0x85,0x57,0xf1}, {0x07,0x4c,0xaf,0x75}, {0xdd,0xbb,0xee,0x99}, {0x60,0xfd,0xa3,0x7f}, -{0x26,0x9f,0xf7,0x01}, {0xf5,0xbc,0x5c,0x72}, {0x3b,0xc5,0x44,0x66}, {0x7e,0x34,0x5b,0xfb}, -{0x29,0x76,0x8b,0x43}, {0xc6,0xdc,0xcb,0x23}, {0xfc,0x68,0xb6,0xed}, {0xf1,0x63,0xb8,0xe4}, -{0xdc,0xca,0xd7,0x31}, {0x85,0x10,0x42,0x63}, {0x22,0x40,0x13,0x97}, {0x11,0x20,0x84,0xc6}, -{0x24,0x7d,0x85,0x4a}, {0x3d,0xf8,0xd2,0xbb}, {0x32,0x11,0xae,0xf9}, {0xa1,0x6d,0xc7,0x29}, -{0x2f,0x4b,0x1d,0x9e}, {0x30,0xf3,0xdc,0xb2}, {0x52,0xec,0x0d,0x86}, {0xe3,0xd0,0x77,0xc1}, -{0x16,0x6c,0x2b,0xb3}, {0xb9,0x99,0xa9,0x70}, {0x48,0xfa,0x11,0x94}, {0x64,0x22,0x47,0xe9}, -{0x8c,0xc4,0xa8,0xfc}, {0x3f,0x1a,0xa0,0xf0}, {0x2c,0xd8,0x56,0x7d}, {0x90,0xef,0x22,0x33}, -{0x4e,0xc7,0x87,0x49}, {0xd1,0xc1,0xd9,0x38}, {0xa2,0xfe,0x8c,0xca}, {0x0b,0x36,0x98,0xd4}, -{0x81,0xcf,0xa6,0xf5}, {0xde,0x28,0xa5,0x7a}, {0x8e,0x26,0xda,0xb7}, {0xbf,0xa4,0x3f,0xad}, -{0x9d,0xe4,0x2c,0x3a}, {0x92,0x0d,0x50,0x78}, {0xcc,0x9b,0x6a,0x5f}, {0x46,0x62,0x54,0x7e}, -{0x13,0xc2,0xf6,0x8d}, {0xb8,0xe8,0x90,0xd8}, {0xf7,0x5e,0x2e,0x39}, {0xaf,0xf5,0x82,0xc3}, -{0x80,0xbe,0x9f,0x5d}, {0x93,0x7c,0x69,0xd0}, {0x2d,0xa9,0x6f,0xd5}, {0x12,0xb3,0xcf,0x25}, -{0x99,0x3b,0xc8,0xac}, {0x7d,0xa7,0x10,0x18}, {0x63,0x6e,0xe8,0x9c}, {0xbb,0x7b,0xdb,0x3b}, -{0x78,0x09,0xcd,0x26}, {0x18,0xf4,0x6e,0x59}, {0xb7,0x01,0xec,0x9a}, {0x9a,0xa8,0x83,0x4f}, -{0x6e,0x65,0xe6,0x95}, {0xe6,0x7e,0xaa,0xff}, {0xcf,0x08,0x21,0xbc}, {0xe8,0xe6,0xef,0x15}, -{0x9b,0xd9,0xba,0xe7}, {0x36,0xce,0x4a,0x6f}, {0x09,0xd4,0xea,0x9f}, {0x7c,0xd6,0x29,0xb0}, -{0xb2,0xaf,0x31,0xa4}, {0x23,0x31,0x2a,0x3f}, {0x94,0x30,0xc6,0xa5}, {0x66,0xc0,0x35,0xa2}, -{0xbc,0x37,0x74,0x4e}, {0xca,0xa6,0xfc,0x82}, {0xd0,0xb0,0xe0,0x90}, {0xd8,0x15,0x33,0xa7}, -{0x98,0x4a,0xf1,0x04}, {0xda,0xf7,0x41,0xec}, {0x50,0x0e,0x7f,0xcd}, {0xf6,0x2f,0x17,0x91}, -{0xd6,0x8d,0x76,0x4d}, {0xb0,0x4d,0x43,0xef}, {0x4d,0x54,0xcc,0xaa}, {0x04,0xdf,0xe4,0x96}, -{0xb5,0xe3,0x9e,0xd1}, {0x88,0x1b,0x4c,0x6a}, {0x1f,0xb8,0xc1,0x2c}, {0x51,0x7f,0x46,0x65}, -{0xea,0x04,0x9d,0x5e}, {0x35,0x5d,0x01,0x8c}, {0x74,0x73,0xfa,0x87}, {0x41,0x2e,0xfb,0x0b}, -{0x1d,0x5a,0xb3,0x67}, {0xd2,0x52,0x92,0xdb}, {0x56,0x33,0xe9,0x10}, {0x47,0x13,0x6d,0xd6}, -{0x61,0x8c,0x9a,0xd7}, {0x0c,0x7a,0x37,0xa1}, {0x14,0x8e,0x59,0xf8}, {0x3c,0x89,0xeb,0x13}, -{0x27,0xee,0xce,0xa9}, {0xc9,0x35,0xb7,0x61}, {0xe5,0xed,0xe1,0x1c}, {0xb1,0x3c,0x7a,0x47}, -{0xdf,0x59,0x9c,0xd2}, {0x73,0x3f,0x55,0xf2}, {0xce,0x79,0x18,0x14}, {0x37,0xbf,0x73,0xc7}, -{0xcd,0xea,0x53,0xf7}, {0xaa,0x5b,0x5f,0xfd}, {0x6f,0x14,0xdf,0x3d}, {0xdb,0x86,0x78,0x44}, -{0xf3,0x81,0xca,0xaf}, {0xc4,0x3e,0xb9,0x68}, {0x34,0x2c,0x38,0x24}, {0x40,0x5f,0xc2,0xa3}, -{0xc3,0x72,0x16,0x1d}, {0x25,0x0c,0xbc,0xe2}, {0x49,0x8b,0x28,0x3c}, {0x95,0x41,0xff,0x0d}, -{0x01,0x71,0x39,0xa8}, {0xb3,0xde,0x08,0x0c}, {0xe4,0x9c,0xd8,0xb4}, {0xc1,0x90,0x64,0x56}, -{0x84,0x61,0x7b,0xcb}, {0xb6,0x70,0xd5,0x32}, {0x5c,0x74,0x48,0x6c}, {0x57,0x42,0xd0,0xb8} - } -}; -#define T7 xT7.xt8 - -static const union xtab xT8 = { - .xt8 = { -{0xf4,0xa7,0x50,0x51}, {0x41,0x65,0x53,0x7e}, {0x17,0xa4,0xc3,0x1a}, {0x27,0x5e,0x96,0x3a}, -{0xab,0x6b,0xcb,0x3b}, {0x9d,0x45,0xf1,0x1f}, {0xfa,0x58,0xab,0xac}, {0xe3,0x03,0x93,0x4b}, -{0x30,0xfa,0x55,0x20}, {0x76,0x6d,0xf6,0xad}, {0xcc,0x76,0x91,0x88}, {0x02,0x4c,0x25,0xf5}, -{0xe5,0xd7,0xfc,0x4f}, {0x2a,0xcb,0xd7,0xc5}, {0x35,0x44,0x80,0x26}, {0x62,0xa3,0x8f,0xb5}, -{0xb1,0x5a,0x49,0xde}, {0xba,0x1b,0x67,0x25}, {0xea,0x0e,0x98,0x45}, {0xfe,0xc0,0xe1,0x5d}, -{0x2f,0x75,0x02,0xc3}, {0x4c,0xf0,0x12,0x81}, {0x46,0x97,0xa3,0x8d}, {0xd3,0xf9,0xc6,0x6b}, -{0x8f,0x5f,0xe7,0x03}, {0x92,0x9c,0x95,0x15}, {0x6d,0x7a,0xeb,0xbf}, {0x52,0x59,0xda,0x95}, -{0xbe,0x83,0x2d,0xd4}, {0x74,0x21,0xd3,0x58}, {0xe0,0x69,0x29,0x49}, {0xc9,0xc8,0x44,0x8e}, -{0xc2,0x89,0x6a,0x75}, {0x8e,0x79,0x78,0xf4}, {0x58,0x3e,0x6b,0x99}, {0xb9,0x71,0xdd,0x27}, -{0xe1,0x4f,0xb6,0xbe}, {0x88,0xad,0x17,0xf0}, {0x20,0xac,0x66,0xc9}, {0xce,0x3a,0xb4,0x7d}, -{0xdf,0x4a,0x18,0x63}, {0x1a,0x31,0x82,0xe5}, {0x51,0x33,0x60,0x97}, {0x53,0x7f,0x45,0x62}, -{0x64,0x77,0xe0,0xb1}, {0x6b,0xae,0x84,0xbb}, {0x81,0xa0,0x1c,0xfe}, {0x08,0x2b,0x94,0xf9}, -{0x48,0x68,0x58,0x70}, {0x45,0xfd,0x19,0x8f}, {0xde,0x6c,0x87,0x94}, {0x7b,0xf8,0xb7,0x52}, -{0x73,0xd3,0x23,0xab}, {0x4b,0x02,0xe2,0x72}, {0x1f,0x8f,0x57,0xe3}, {0x55,0xab,0x2a,0x66}, -{0xeb,0x28,0x07,0xb2}, {0xb5,0xc2,0x03,0x2f}, {0xc5,0x7b,0x9a,0x86}, {0x37,0x08,0xa5,0xd3}, -{0x28,0x87,0xf2,0x30}, {0xbf,0xa5,0xb2,0x23}, {0x03,0x6a,0xba,0x02}, {0x16,0x82,0x5c,0xed}, -{0xcf,0x1c,0x2b,0x8a}, {0x79,0xb4,0x92,0xa7}, {0x07,0xf2,0xf0,0xf3}, {0x69,0xe2,0xa1,0x4e}, -{0xda,0xf4,0xcd,0x65}, {0x05,0xbe,0xd5,0x06}, {0x34,0x62,0x1f,0xd1}, {0xa6,0xfe,0x8a,0xc4}, -{0x2e,0x53,0x9d,0x34}, {0xf3,0x55,0xa0,0xa2}, {0x8a,0xe1,0x32,0x05}, {0xf6,0xeb,0x75,0xa4}, -{0x83,0xec,0x39,0x0b}, {0x60,0xef,0xaa,0x40}, {0x71,0x9f,0x06,0x5e}, {0x6e,0x10,0x51,0xbd}, -{0x21,0x8a,0xf9,0x3e}, {0xdd,0x06,0x3d,0x96}, {0x3e,0x05,0xae,0xdd}, {0xe6,0xbd,0x46,0x4d}, -{0x54,0x8d,0xb5,0x91}, {0xc4,0x5d,0x05,0x71}, {0x06,0xd4,0x6f,0x04}, {0x50,0x15,0xff,0x60}, -{0x98,0xfb,0x24,0x19}, {0xbd,0xe9,0x97,0xd6}, {0x40,0x43,0xcc,0x89}, {0xd9,0x9e,0x77,0x67}, -{0xe8,0x42,0xbd,0xb0}, {0x89,0x8b,0x88,0x07}, {0x19,0x5b,0x38,0xe7}, {0xc8,0xee,0xdb,0x79}, -{0x7c,0x0a,0x47,0xa1}, {0x42,0x0f,0xe9,0x7c}, {0x84,0x1e,0xc9,0xf8}, {0x00,0x00,0x00,0x00}, -{0x80,0x86,0x83,0x09}, {0x2b,0xed,0x48,0x32}, {0x11,0x70,0xac,0x1e}, {0x5a,0x72,0x4e,0x6c}, -{0x0e,0xff,0xfb,0xfd}, {0x85,0x38,0x56,0x0f}, {0xae,0xd5,0x1e,0x3d}, {0x2d,0x39,0x27,0x36}, -{0x0f,0xd9,0x64,0x0a}, {0x5c,0xa6,0x21,0x68}, {0x5b,0x54,0xd1,0x9b}, {0x36,0x2e,0x3a,0x24}, -{0x0a,0x67,0xb1,0x0c}, {0x57,0xe7,0x0f,0x93}, {0xee,0x96,0xd2,0xb4}, {0x9b,0x91,0x9e,0x1b}, -{0xc0,0xc5,0x4f,0x80}, {0xdc,0x20,0xa2,0x61}, {0x77,0x4b,0x69,0x5a}, {0x12,0x1a,0x16,0x1c}, -{0x93,0xba,0x0a,0xe2}, {0xa0,0x2a,0xe5,0xc0}, {0x22,0xe0,0x43,0x3c}, {0x1b,0x17,0x1d,0x12}, -{0x09,0x0d,0x0b,0x0e}, {0x8b,0xc7,0xad,0xf2}, {0xb6,0xa8,0xb9,0x2d}, {0x1e,0xa9,0xc8,0x14}, -{0xf1,0x19,0x85,0x57}, {0x75,0x07,0x4c,0xaf}, {0x99,0xdd,0xbb,0xee}, {0x7f,0x60,0xfd,0xa3}, -{0x01,0x26,0x9f,0xf7}, {0x72,0xf5,0xbc,0x5c}, {0x66,0x3b,0xc5,0x44}, {0xfb,0x7e,0x34,0x5b}, -{0x43,0x29,0x76,0x8b}, {0x23,0xc6,0xdc,0xcb}, {0xed,0xfc,0x68,0xb6}, {0xe4,0xf1,0x63,0xb8}, -{0x31,0xdc,0xca,0xd7}, {0x63,0x85,0x10,0x42}, {0x97,0x22,0x40,0x13}, {0xc6,0x11,0x20,0x84}, -{0x4a,0x24,0x7d,0x85}, {0xbb,0x3d,0xf8,0xd2}, {0xf9,0x32,0x11,0xae}, {0x29,0xa1,0x6d,0xc7}, -{0x9e,0x2f,0x4b,0x1d}, {0xb2,0x30,0xf3,0xdc}, {0x86,0x52,0xec,0x0d}, {0xc1,0xe3,0xd0,0x77}, -{0xb3,0x16,0x6c,0x2b}, {0x70,0xb9,0x99,0xa9}, {0x94,0x48,0xfa,0x11}, {0xe9,0x64,0x22,0x47}, -{0xfc,0x8c,0xc4,0xa8}, {0xf0,0x3f,0x1a,0xa0}, {0x7d,0x2c,0xd8,0x56}, {0x33,0x90,0xef,0x22}, -{0x49,0x4e,0xc7,0x87}, {0x38,0xd1,0xc1,0xd9}, {0xca,0xa2,0xfe,0x8c}, {0xd4,0x0b,0x36,0x98}, -{0xf5,0x81,0xcf,0xa6}, {0x7a,0xde,0x28,0xa5}, {0xb7,0x8e,0x26,0xda}, {0xad,0xbf,0xa4,0x3f}, -{0x3a,0x9d,0xe4,0x2c}, {0x78,0x92,0x0d,0x50}, {0x5f,0xcc,0x9b,0x6a}, {0x7e,0x46,0x62,0x54}, -{0x8d,0x13,0xc2,0xf6}, {0xd8,0xb8,0xe8,0x90}, {0x39,0xf7,0x5e,0x2e}, {0xc3,0xaf,0xf5,0x82}, -{0x5d,0x80,0xbe,0x9f}, {0xd0,0x93,0x7c,0x69}, {0xd5,0x2d,0xa9,0x6f}, {0x25,0x12,0xb3,0xcf}, -{0xac,0x99,0x3b,0xc8}, {0x18,0x7d,0xa7,0x10}, {0x9c,0x63,0x6e,0xe8}, {0x3b,0xbb,0x7b,0xdb}, -{0x26,0x78,0x09,0xcd}, {0x59,0x18,0xf4,0x6e}, {0x9a,0xb7,0x01,0xec}, {0x4f,0x9a,0xa8,0x83}, -{0x95,0x6e,0x65,0xe6}, {0xff,0xe6,0x7e,0xaa}, {0xbc,0xcf,0x08,0x21}, {0x15,0xe8,0xe6,0xef}, -{0xe7,0x9b,0xd9,0xba}, {0x6f,0x36,0xce,0x4a}, {0x9f,0x09,0xd4,0xea}, {0xb0,0x7c,0xd6,0x29}, -{0xa4,0xb2,0xaf,0x31}, {0x3f,0x23,0x31,0x2a}, {0xa5,0x94,0x30,0xc6}, {0xa2,0x66,0xc0,0x35}, -{0x4e,0xbc,0x37,0x74}, {0x82,0xca,0xa6,0xfc}, {0x90,0xd0,0xb0,0xe0}, {0xa7,0xd8,0x15,0x33}, -{0x04,0x98,0x4a,0xf1}, {0xec,0xda,0xf7,0x41}, {0xcd,0x50,0x0e,0x7f}, {0x91,0xf6,0x2f,0x17}, -{0x4d,0xd6,0x8d,0x76}, {0xef,0xb0,0x4d,0x43}, {0xaa,0x4d,0x54,0xcc}, {0x96,0x04,0xdf,0xe4}, -{0xd1,0xb5,0xe3,0x9e}, {0x6a,0x88,0x1b,0x4c}, {0x2c,0x1f,0xb8,0xc1}, {0x65,0x51,0x7f,0x46}, -{0x5e,0xea,0x04,0x9d}, {0x8c,0x35,0x5d,0x01}, {0x87,0x74,0x73,0xfa}, {0x0b,0x41,0x2e,0xfb}, -{0x67,0x1d,0x5a,0xb3}, {0xdb,0xd2,0x52,0x92}, {0x10,0x56,0x33,0xe9}, {0xd6,0x47,0x13,0x6d}, -{0xd7,0x61,0x8c,0x9a}, {0xa1,0x0c,0x7a,0x37}, {0xf8,0x14,0x8e,0x59}, {0x13,0x3c,0x89,0xeb}, -{0xa9,0x27,0xee,0xce}, {0x61,0xc9,0x35,0xb7}, {0x1c,0xe5,0xed,0xe1}, {0x47,0xb1,0x3c,0x7a}, -{0xd2,0xdf,0x59,0x9c}, {0xf2,0x73,0x3f,0x55}, {0x14,0xce,0x79,0x18}, {0xc7,0x37,0xbf,0x73}, -{0xf7,0xcd,0xea,0x53}, {0xfd,0xaa,0x5b,0x5f}, {0x3d,0x6f,0x14,0xdf}, {0x44,0xdb,0x86,0x78}, -{0xaf,0xf3,0x81,0xca}, {0x68,0xc4,0x3e,0xb9}, {0x24,0x34,0x2c,0x38}, {0xa3,0x40,0x5f,0xc2}, -{0x1d,0xc3,0x72,0x16}, {0xe2,0x25,0x0c,0xbc}, {0x3c,0x49,0x8b,0x28}, {0x0d,0x95,0x41,0xff}, -{0xa8,0x01,0x71,0x39}, {0x0c,0xb3,0xde,0x08}, {0xb4,0xe4,0x9c,0xd8}, {0x56,0xc1,0x90,0x64}, -{0xcb,0x84,0x61,0x7b}, {0x32,0xb6,0x70,0xd5}, {0x6c,0x5c,0x74,0x48}, {0xb8,0x57,0x42,0xd0} - } -}; -#define T8 xT8.xt8 - -static const word8 S5[256] = { -0x52,0x09,0x6a,0xd5, -0x30,0x36,0xa5,0x38, -0xbf,0x40,0xa3,0x9e, -0x81,0xf3,0xd7,0xfb, -0x7c,0xe3,0x39,0x82, -0x9b,0x2f,0xff,0x87, -0x34,0x8e,0x43,0x44, -0xc4,0xde,0xe9,0xcb, -0x54,0x7b,0x94,0x32, -0xa6,0xc2,0x23,0x3d, -0xee,0x4c,0x95,0x0b, -0x42,0xfa,0xc3,0x4e, -0x08,0x2e,0xa1,0x66, -0x28,0xd9,0x24,0xb2, -0x76,0x5b,0xa2,0x49, -0x6d,0x8b,0xd1,0x25, -0x72,0xf8,0xf6,0x64, -0x86,0x68,0x98,0x16, -0xd4,0xa4,0x5c,0xcc, -0x5d,0x65,0xb6,0x92, -0x6c,0x70,0x48,0x50, -0xfd,0xed,0xb9,0xda, -0x5e,0x15,0x46,0x57, -0xa7,0x8d,0x9d,0x84, -0x90,0xd8,0xab,0x00, -0x8c,0xbc,0xd3,0x0a, -0xf7,0xe4,0x58,0x05, -0xb8,0xb3,0x45,0x06, -0xd0,0x2c,0x1e,0x8f, -0xca,0x3f,0x0f,0x02, -0xc1,0xaf,0xbd,0x03, -0x01,0x13,0x8a,0x6b, -0x3a,0x91,0x11,0x41, -0x4f,0x67,0xdc,0xea, -0x97,0xf2,0xcf,0xce, -0xf0,0xb4,0xe6,0x73, -0x96,0xac,0x74,0x22, -0xe7,0xad,0x35,0x85, -0xe2,0xf9,0x37,0xe8, -0x1c,0x75,0xdf,0x6e, -0x47,0xf1,0x1a,0x71, -0x1d,0x29,0xc5,0x89, -0x6f,0xb7,0x62,0x0e, -0xaa,0x18,0xbe,0x1b, -0xfc,0x56,0x3e,0x4b, -0xc6,0xd2,0x79,0x20, -0x9a,0xdb,0xc0,0xfe, -0x78,0xcd,0x5a,0xf4, -0x1f,0xdd,0xa8,0x33, -0x88,0x07,0xc7,0x31, -0xb1,0x12,0x10,0x59, -0x27,0x80,0xec,0x5f, -0x60,0x51,0x7f,0xa9, -0x19,0xb5,0x4a,0x0d, -0x2d,0xe5,0x7a,0x9f, -0x93,0xc9,0x9c,0xef, -0xa0,0xe0,0x3b,0x4d, -0xae,0x2a,0xf5,0xb0, -0xc8,0xeb,0xbb,0x3c, -0x83,0x53,0x99,0x61, -0x17,0x2b,0x04,0x7e, -0xba,0x77,0xd6,0x26, -0xe1,0x69,0x14,0x63, -0x55,0x21,0x0c,0x7d -}; - -static const union xtab xU1 = { - .xt8 = { -{0x00,0x00,0x00,0x00}, {0x0e,0x09,0x0d,0x0b}, {0x1c,0x12,0x1a,0x16}, {0x12,0x1b,0x17,0x1d}, -{0x38,0x24,0x34,0x2c}, {0x36,0x2d,0x39,0x27}, {0x24,0x36,0x2e,0x3a}, {0x2a,0x3f,0x23,0x31}, -{0x70,0x48,0x68,0x58}, {0x7e,0x41,0x65,0x53}, {0x6c,0x5a,0x72,0x4e}, {0x62,0x53,0x7f,0x45}, -{0x48,0x6c,0x5c,0x74}, {0x46,0x65,0x51,0x7f}, {0x54,0x7e,0x46,0x62}, {0x5a,0x77,0x4b,0x69}, -{0xe0,0x90,0xd0,0xb0}, {0xee,0x99,0xdd,0xbb}, {0xfc,0x82,0xca,0xa6}, {0xf2,0x8b,0xc7,0xad}, -{0xd8,0xb4,0xe4,0x9c}, {0xd6,0xbd,0xe9,0x97}, {0xc4,0xa6,0xfe,0x8a}, {0xca,0xaf,0xf3,0x81}, -{0x90,0xd8,0xb8,0xe8}, {0x9e,0xd1,0xb5,0xe3}, {0x8c,0xca,0xa2,0xfe}, {0x82,0xc3,0xaf,0xf5}, -{0xa8,0xfc,0x8c,0xc4}, {0xa6,0xf5,0x81,0xcf}, {0xb4,0xee,0x96,0xd2}, {0xba,0xe7,0x9b,0xd9}, -{0xdb,0x3b,0xbb,0x7b}, {0xd5,0x32,0xb6,0x70}, {0xc7,0x29,0xa1,0x6d}, {0xc9,0x20,0xac,0x66}, -{0xe3,0x1f,0x8f,0x57}, {0xed,0x16,0x82,0x5c}, {0xff,0x0d,0x95,0x41}, {0xf1,0x04,0x98,0x4a}, -{0xab,0x73,0xd3,0x23}, {0xa5,0x7a,0xde,0x28}, {0xb7,0x61,0xc9,0x35}, {0xb9,0x68,0xc4,0x3e}, -{0x93,0x57,0xe7,0x0f}, {0x9d,0x5e,0xea,0x04}, {0x8f,0x45,0xfd,0x19}, {0x81,0x4c,0xf0,0x12}, -{0x3b,0xab,0x6b,0xcb}, {0x35,0xa2,0x66,0xc0}, {0x27,0xb9,0x71,0xdd}, {0x29,0xb0,0x7c,0xd6}, -{0x03,0x8f,0x5f,0xe7}, {0x0d,0x86,0x52,0xec}, {0x1f,0x9d,0x45,0xf1}, {0x11,0x94,0x48,0xfa}, -{0x4b,0xe3,0x03,0x93}, {0x45,0xea,0x0e,0x98}, {0x57,0xf1,0x19,0x85}, {0x59,0xf8,0x14,0x8e}, -{0x73,0xc7,0x37,0xbf}, {0x7d,0xce,0x3a,0xb4}, {0x6f,0xd5,0x2d,0xa9}, {0x61,0xdc,0x20,0xa2}, -{0xad,0x76,0x6d,0xf6}, {0xa3,0x7f,0x60,0xfd}, {0xb1,0x64,0x77,0xe0}, {0xbf,0x6d,0x7a,0xeb}, -{0x95,0x52,0x59,0xda}, {0x9b,0x5b,0x54,0xd1}, {0x89,0x40,0x43,0xcc}, {0x87,0x49,0x4e,0xc7}, -{0xdd,0x3e,0x05,0xae}, {0xd3,0x37,0x08,0xa5}, {0xc1,0x2c,0x1f,0xb8}, {0xcf,0x25,0x12,0xb3}, -{0xe5,0x1a,0x31,0x82}, {0xeb,0x13,0x3c,0x89}, {0xf9,0x08,0x2b,0x94}, {0xf7,0x01,0x26,0x9f}, -{0x4d,0xe6,0xbd,0x46}, {0x43,0xef,0xb0,0x4d}, {0x51,0xf4,0xa7,0x50}, {0x5f,0xfd,0xaa,0x5b}, -{0x75,0xc2,0x89,0x6a}, {0x7b,0xcb,0x84,0x61}, {0x69,0xd0,0x93,0x7c}, {0x67,0xd9,0x9e,0x77}, -{0x3d,0xae,0xd5,0x1e}, {0x33,0xa7,0xd8,0x15}, {0x21,0xbc,0xcf,0x08}, {0x2f,0xb5,0xc2,0x03}, -{0x05,0x8a,0xe1,0x32}, {0x0b,0x83,0xec,0x39}, {0x19,0x98,0xfb,0x24}, {0x17,0x91,0xf6,0x2f}, -{0x76,0x4d,0xd6,0x8d}, {0x78,0x44,0xdb,0x86}, {0x6a,0x5f,0xcc,0x9b}, {0x64,0x56,0xc1,0x90}, -{0x4e,0x69,0xe2,0xa1}, {0x40,0x60,0xef,0xaa}, {0x52,0x7b,0xf8,0xb7}, {0x5c,0x72,0xf5,0xbc}, -{0x06,0x05,0xbe,0xd5}, {0x08,0x0c,0xb3,0xde}, {0x1a,0x17,0xa4,0xc3}, {0x14,0x1e,0xa9,0xc8}, -{0x3e,0x21,0x8a,0xf9}, {0x30,0x28,0x87,0xf2}, {0x22,0x33,0x90,0xef}, {0x2c,0x3a,0x9d,0xe4}, -{0x96,0xdd,0x06,0x3d}, {0x98,0xd4,0x0b,0x36}, {0x8a,0xcf,0x1c,0x2b}, {0x84,0xc6,0x11,0x20}, -{0xae,0xf9,0x32,0x11}, {0xa0,0xf0,0x3f,0x1a}, {0xb2,0xeb,0x28,0x07}, {0xbc,0xe2,0x25,0x0c}, -{0xe6,0x95,0x6e,0x65}, {0xe8,0x9c,0x63,0x6e}, {0xfa,0x87,0x74,0x73}, {0xf4,0x8e,0x79,0x78}, -{0xde,0xb1,0x5a,0x49}, {0xd0,0xb8,0x57,0x42}, {0xc2,0xa3,0x40,0x5f}, {0xcc,0xaa,0x4d,0x54}, -{0x41,0xec,0xda,0xf7}, {0x4f,0xe5,0xd7,0xfc}, {0x5d,0xfe,0xc0,0xe1}, {0x53,0xf7,0xcd,0xea}, -{0x79,0xc8,0xee,0xdb}, {0x77,0xc1,0xe3,0xd0}, {0x65,0xda,0xf4,0xcd}, {0x6b,0xd3,0xf9,0xc6}, -{0x31,0xa4,0xb2,0xaf}, {0x3f,0xad,0xbf,0xa4}, {0x2d,0xb6,0xa8,0xb9}, {0x23,0xbf,0xa5,0xb2}, -{0x09,0x80,0x86,0x83}, {0x07,0x89,0x8b,0x88}, {0x15,0x92,0x9c,0x95}, {0x1b,0x9b,0x91,0x9e}, -{0xa1,0x7c,0x0a,0x47}, {0xaf,0x75,0x07,0x4c}, {0xbd,0x6e,0x10,0x51}, {0xb3,0x67,0x1d,0x5a}, -{0x99,0x58,0x3e,0x6b}, {0x97,0x51,0x33,0x60}, {0x85,0x4a,0x24,0x7d}, {0x8b,0x43,0x29,0x76}, -{0xd1,0x34,0x62,0x1f}, {0xdf,0x3d,0x6f,0x14}, {0xcd,0x26,0x78,0x09}, {0xc3,0x2f,0x75,0x02}, -{0xe9,0x10,0x56,0x33}, {0xe7,0x19,0x5b,0x38}, {0xf5,0x02,0x4c,0x25}, {0xfb,0x0b,0x41,0x2e}, -{0x9a,0xd7,0x61,0x8c}, {0x94,0xde,0x6c,0x87}, {0x86,0xc5,0x7b,0x9a}, {0x88,0xcc,0x76,0x91}, -{0xa2,0xf3,0x55,0xa0}, {0xac,0xfa,0x58,0xab}, {0xbe,0xe1,0x4f,0xb6}, {0xb0,0xe8,0x42,0xbd}, -{0xea,0x9f,0x09,0xd4}, {0xe4,0x96,0x04,0xdf}, {0xf6,0x8d,0x13,0xc2}, {0xf8,0x84,0x1e,0xc9}, -{0xd2,0xbb,0x3d,0xf8}, {0xdc,0xb2,0x30,0xf3}, {0xce,0xa9,0x27,0xee}, {0xc0,0xa0,0x2a,0xe5}, -{0x7a,0x47,0xb1,0x3c}, {0x74,0x4e,0xbc,0x37}, {0x66,0x55,0xab,0x2a}, {0x68,0x5c,0xa6,0x21}, -{0x42,0x63,0x85,0x10}, {0x4c,0x6a,0x88,0x1b}, {0x5e,0x71,0x9f,0x06}, {0x50,0x78,0x92,0x0d}, -{0x0a,0x0f,0xd9,0x64}, {0x04,0x06,0xd4,0x6f}, {0x16,0x1d,0xc3,0x72}, {0x18,0x14,0xce,0x79}, -{0x32,0x2b,0xed,0x48}, {0x3c,0x22,0xe0,0x43}, {0x2e,0x39,0xf7,0x5e}, {0x20,0x30,0xfa,0x55}, -{0xec,0x9a,0xb7,0x01}, {0xe2,0x93,0xba,0x0a}, {0xf0,0x88,0xad,0x17}, {0xfe,0x81,0xa0,0x1c}, -{0xd4,0xbe,0x83,0x2d}, {0xda,0xb7,0x8e,0x26}, {0xc8,0xac,0x99,0x3b}, {0xc6,0xa5,0x94,0x30}, -{0x9c,0xd2,0xdf,0x59}, {0x92,0xdb,0xd2,0x52}, {0x80,0xc0,0xc5,0x4f}, {0x8e,0xc9,0xc8,0x44}, -{0xa4,0xf6,0xeb,0x75}, {0xaa,0xff,0xe6,0x7e}, {0xb8,0xe4,0xf1,0x63}, {0xb6,0xed,0xfc,0x68}, -{0x0c,0x0a,0x67,0xb1}, {0x02,0x03,0x6a,0xba}, {0x10,0x18,0x7d,0xa7}, {0x1e,0x11,0x70,0xac}, -{0x34,0x2e,0x53,0x9d}, {0x3a,0x27,0x5e,0x96}, {0x28,0x3c,0x49,0x8b}, {0x26,0x35,0x44,0x80}, -{0x7c,0x42,0x0f,0xe9}, {0x72,0x4b,0x02,0xe2}, {0x60,0x50,0x15,0xff}, {0x6e,0x59,0x18,0xf4}, -{0x44,0x66,0x3b,0xc5}, {0x4a,0x6f,0x36,0xce}, {0x58,0x74,0x21,0xd3}, {0x56,0x7d,0x2c,0xd8}, -{0x37,0xa1,0x0c,0x7a}, {0x39,0xa8,0x01,0x71}, {0x2b,0xb3,0x16,0x6c}, {0x25,0xba,0x1b,0x67}, -{0x0f,0x85,0x38,0x56}, {0x01,0x8c,0x35,0x5d}, {0x13,0x97,0x22,0x40}, {0x1d,0x9e,0x2f,0x4b}, -{0x47,0xe9,0x64,0x22}, {0x49,0xe0,0x69,0x29}, {0x5b,0xfb,0x7e,0x34}, {0x55,0xf2,0x73,0x3f}, -{0x7f,0xcd,0x50,0x0e}, {0x71,0xc4,0x5d,0x05}, {0x63,0xdf,0x4a,0x18}, {0x6d,0xd6,0x47,0x13}, -{0xd7,0x31,0xdc,0xca}, {0xd9,0x38,0xd1,0xc1}, {0xcb,0x23,0xc6,0xdc}, {0xc5,0x2a,0xcb,0xd7}, -{0xef,0x15,0xe8,0xe6}, {0xe1,0x1c,0xe5,0xed}, {0xf3,0x07,0xf2,0xf0}, {0xfd,0x0e,0xff,0xfb}, -{0xa7,0x79,0xb4,0x92}, {0xa9,0x70,0xb9,0x99}, {0xbb,0x6b,0xae,0x84}, {0xb5,0x62,0xa3,0x8f}, -{0x9f,0x5d,0x80,0xbe}, {0x91,0x54,0x8d,0xb5}, {0x83,0x4f,0x9a,0xa8}, {0x8d,0x46,0x97,0xa3} - } -}; -#define U1 xU1.xt8 - -static const union xtab xU2 = { - .xt8 = { -{0x00,0x00,0x00,0x00}, {0x0b,0x0e,0x09,0x0d}, {0x16,0x1c,0x12,0x1a}, {0x1d,0x12,0x1b,0x17}, -{0x2c,0x38,0x24,0x34}, {0x27,0x36,0x2d,0x39}, {0x3a,0x24,0x36,0x2e}, {0x31,0x2a,0x3f,0x23}, -{0x58,0x70,0x48,0x68}, {0x53,0x7e,0x41,0x65}, {0x4e,0x6c,0x5a,0x72}, {0x45,0x62,0x53,0x7f}, -{0x74,0x48,0x6c,0x5c}, {0x7f,0x46,0x65,0x51}, {0x62,0x54,0x7e,0x46}, {0x69,0x5a,0x77,0x4b}, -{0xb0,0xe0,0x90,0xd0}, {0xbb,0xee,0x99,0xdd}, {0xa6,0xfc,0x82,0xca}, {0xad,0xf2,0x8b,0xc7}, -{0x9c,0xd8,0xb4,0xe4}, {0x97,0xd6,0xbd,0xe9}, {0x8a,0xc4,0xa6,0xfe}, {0x81,0xca,0xaf,0xf3}, -{0xe8,0x90,0xd8,0xb8}, {0xe3,0x9e,0xd1,0xb5}, {0xfe,0x8c,0xca,0xa2}, {0xf5,0x82,0xc3,0xaf}, -{0xc4,0xa8,0xfc,0x8c}, {0xcf,0xa6,0xf5,0x81}, {0xd2,0xb4,0xee,0x96}, {0xd9,0xba,0xe7,0x9b}, -{0x7b,0xdb,0x3b,0xbb}, {0x70,0xd5,0x32,0xb6}, {0x6d,0xc7,0x29,0xa1}, {0x66,0xc9,0x20,0xac}, -{0x57,0xe3,0x1f,0x8f}, {0x5c,0xed,0x16,0x82}, {0x41,0xff,0x0d,0x95}, {0x4a,0xf1,0x04,0x98}, -{0x23,0xab,0x73,0xd3}, {0x28,0xa5,0x7a,0xde}, {0x35,0xb7,0x61,0xc9}, {0x3e,0xb9,0x68,0xc4}, -{0x0f,0x93,0x57,0xe7}, {0x04,0x9d,0x5e,0xea}, {0x19,0x8f,0x45,0xfd}, {0x12,0x81,0x4c,0xf0}, -{0xcb,0x3b,0xab,0x6b}, {0xc0,0x35,0xa2,0x66}, {0xdd,0x27,0xb9,0x71}, {0xd6,0x29,0xb0,0x7c}, -{0xe7,0x03,0x8f,0x5f}, {0xec,0x0d,0x86,0x52}, {0xf1,0x1f,0x9d,0x45}, {0xfa,0x11,0x94,0x48}, -{0x93,0x4b,0xe3,0x03}, {0x98,0x45,0xea,0x0e}, {0x85,0x57,0xf1,0x19}, {0x8e,0x59,0xf8,0x14}, -{0xbf,0x73,0xc7,0x37}, {0xb4,0x7d,0xce,0x3a}, {0xa9,0x6f,0xd5,0x2d}, {0xa2,0x61,0xdc,0x20}, -{0xf6,0xad,0x76,0x6d}, {0xfd,0xa3,0x7f,0x60}, {0xe0,0xb1,0x64,0x77}, {0xeb,0xbf,0x6d,0x7a}, -{0xda,0x95,0x52,0x59}, {0xd1,0x9b,0x5b,0x54}, {0xcc,0x89,0x40,0x43}, {0xc7,0x87,0x49,0x4e}, -{0xae,0xdd,0x3e,0x05}, {0xa5,0xd3,0x37,0x08}, {0xb8,0xc1,0x2c,0x1f}, {0xb3,0xcf,0x25,0x12}, -{0x82,0xe5,0x1a,0x31}, {0x89,0xeb,0x13,0x3c}, {0x94,0xf9,0x08,0x2b}, {0x9f,0xf7,0x01,0x26}, -{0x46,0x4d,0xe6,0xbd}, {0x4d,0x43,0xef,0xb0}, {0x50,0x51,0xf4,0xa7}, {0x5b,0x5f,0xfd,0xaa}, -{0x6a,0x75,0xc2,0x89}, {0x61,0x7b,0xcb,0x84}, {0x7c,0x69,0xd0,0x93}, {0x77,0x67,0xd9,0x9e}, -{0x1e,0x3d,0xae,0xd5}, {0x15,0x33,0xa7,0xd8}, {0x08,0x21,0xbc,0xcf}, {0x03,0x2f,0xb5,0xc2}, -{0x32,0x05,0x8a,0xe1}, {0x39,0x0b,0x83,0xec}, {0x24,0x19,0x98,0xfb}, {0x2f,0x17,0x91,0xf6}, -{0x8d,0x76,0x4d,0xd6}, {0x86,0x78,0x44,0xdb}, {0x9b,0x6a,0x5f,0xcc}, {0x90,0x64,0x56,0xc1}, -{0xa1,0x4e,0x69,0xe2}, {0xaa,0x40,0x60,0xef}, {0xb7,0x52,0x7b,0xf8}, {0xbc,0x5c,0x72,0xf5}, -{0xd5,0x06,0x05,0xbe}, {0xde,0x08,0x0c,0xb3}, {0xc3,0x1a,0x17,0xa4}, {0xc8,0x14,0x1e,0xa9}, -{0xf9,0x3e,0x21,0x8a}, {0xf2,0x30,0x28,0x87}, {0xef,0x22,0x33,0x90}, {0xe4,0x2c,0x3a,0x9d}, -{0x3d,0x96,0xdd,0x06}, {0x36,0x98,0xd4,0x0b}, {0x2b,0x8a,0xcf,0x1c}, {0x20,0x84,0xc6,0x11}, -{0x11,0xae,0xf9,0x32}, {0x1a,0xa0,0xf0,0x3f}, {0x07,0xb2,0xeb,0x28}, {0x0c,0xbc,0xe2,0x25}, -{0x65,0xe6,0x95,0x6e}, {0x6e,0xe8,0x9c,0x63}, {0x73,0xfa,0x87,0x74}, {0x78,0xf4,0x8e,0x79}, -{0x49,0xde,0xb1,0x5a}, {0x42,0xd0,0xb8,0x57}, {0x5f,0xc2,0xa3,0x40}, {0x54,0xcc,0xaa,0x4d}, -{0xf7,0x41,0xec,0xda}, {0xfc,0x4f,0xe5,0xd7}, {0xe1,0x5d,0xfe,0xc0}, {0xea,0x53,0xf7,0xcd}, -{0xdb,0x79,0xc8,0xee}, {0xd0,0x77,0xc1,0xe3}, {0xcd,0x65,0xda,0xf4}, {0xc6,0x6b,0xd3,0xf9}, -{0xaf,0x31,0xa4,0xb2}, {0xa4,0x3f,0xad,0xbf}, {0xb9,0x2d,0xb6,0xa8}, {0xb2,0x23,0xbf,0xa5}, -{0x83,0x09,0x80,0x86}, {0x88,0x07,0x89,0x8b}, {0x95,0x15,0x92,0x9c}, {0x9e,0x1b,0x9b,0x91}, -{0x47,0xa1,0x7c,0x0a}, {0x4c,0xaf,0x75,0x07}, {0x51,0xbd,0x6e,0x10}, {0x5a,0xb3,0x67,0x1d}, -{0x6b,0x99,0x58,0x3e}, {0x60,0x97,0x51,0x33}, {0x7d,0x85,0x4a,0x24}, {0x76,0x8b,0x43,0x29}, -{0x1f,0xd1,0x34,0x62}, {0x14,0xdf,0x3d,0x6f}, {0x09,0xcd,0x26,0x78}, {0x02,0xc3,0x2f,0x75}, -{0x33,0xe9,0x10,0x56}, {0x38,0xe7,0x19,0x5b}, {0x25,0xf5,0x02,0x4c}, {0x2e,0xfb,0x0b,0x41}, -{0x8c,0x9a,0xd7,0x61}, {0x87,0x94,0xde,0x6c}, {0x9a,0x86,0xc5,0x7b}, {0x91,0x88,0xcc,0x76}, -{0xa0,0xa2,0xf3,0x55}, {0xab,0xac,0xfa,0x58}, {0xb6,0xbe,0xe1,0x4f}, {0xbd,0xb0,0xe8,0x42}, -{0xd4,0xea,0x9f,0x09}, {0xdf,0xe4,0x96,0x04}, {0xc2,0xf6,0x8d,0x13}, {0xc9,0xf8,0x84,0x1e}, -{0xf8,0xd2,0xbb,0x3d}, {0xf3,0xdc,0xb2,0x30}, {0xee,0xce,0xa9,0x27}, {0xe5,0xc0,0xa0,0x2a}, -{0x3c,0x7a,0x47,0xb1}, {0x37,0x74,0x4e,0xbc}, {0x2a,0x66,0x55,0xab}, {0x21,0x68,0x5c,0xa6}, -{0x10,0x42,0x63,0x85}, {0x1b,0x4c,0x6a,0x88}, {0x06,0x5e,0x71,0x9f}, {0x0d,0x50,0x78,0x92}, -{0x64,0x0a,0x0f,0xd9}, {0x6f,0x04,0x06,0xd4}, {0x72,0x16,0x1d,0xc3}, {0x79,0x18,0x14,0xce}, -{0x48,0x32,0x2b,0xed}, {0x43,0x3c,0x22,0xe0}, {0x5e,0x2e,0x39,0xf7}, {0x55,0x20,0x30,0xfa}, -{0x01,0xec,0x9a,0xb7}, {0x0a,0xe2,0x93,0xba}, {0x17,0xf0,0x88,0xad}, {0x1c,0xfe,0x81,0xa0}, -{0x2d,0xd4,0xbe,0x83}, {0x26,0xda,0xb7,0x8e}, {0x3b,0xc8,0xac,0x99}, {0x30,0xc6,0xa5,0x94}, -{0x59,0x9c,0xd2,0xdf}, {0x52,0x92,0xdb,0xd2}, {0x4f,0x80,0xc0,0xc5}, {0x44,0x8e,0xc9,0xc8}, -{0x75,0xa4,0xf6,0xeb}, {0x7e,0xaa,0xff,0xe6}, {0x63,0xb8,0xe4,0xf1}, {0x68,0xb6,0xed,0xfc}, -{0xb1,0x0c,0x0a,0x67}, {0xba,0x02,0x03,0x6a}, {0xa7,0x10,0x18,0x7d}, {0xac,0x1e,0x11,0x70}, -{0x9d,0x34,0x2e,0x53}, {0x96,0x3a,0x27,0x5e}, {0x8b,0x28,0x3c,0x49}, {0x80,0x26,0x35,0x44}, -{0xe9,0x7c,0x42,0x0f}, {0xe2,0x72,0x4b,0x02}, {0xff,0x60,0x50,0x15}, {0xf4,0x6e,0x59,0x18}, -{0xc5,0x44,0x66,0x3b}, {0xce,0x4a,0x6f,0x36}, {0xd3,0x58,0x74,0x21}, {0xd8,0x56,0x7d,0x2c}, -{0x7a,0x37,0xa1,0x0c}, {0x71,0x39,0xa8,0x01}, {0x6c,0x2b,0xb3,0x16}, {0x67,0x25,0xba,0x1b}, -{0x56,0x0f,0x85,0x38}, {0x5d,0x01,0x8c,0x35}, {0x40,0x13,0x97,0x22}, {0x4b,0x1d,0x9e,0x2f}, -{0x22,0x47,0xe9,0x64}, {0x29,0x49,0xe0,0x69}, {0x34,0x5b,0xfb,0x7e}, {0x3f,0x55,0xf2,0x73}, -{0x0e,0x7f,0xcd,0x50}, {0x05,0x71,0xc4,0x5d}, {0x18,0x63,0xdf,0x4a}, {0x13,0x6d,0xd6,0x47}, -{0xca,0xd7,0x31,0xdc}, {0xc1,0xd9,0x38,0xd1}, {0xdc,0xcb,0x23,0xc6}, {0xd7,0xc5,0x2a,0xcb}, -{0xe6,0xef,0x15,0xe8}, {0xed,0xe1,0x1c,0xe5}, {0xf0,0xf3,0x07,0xf2}, {0xfb,0xfd,0x0e,0xff}, -{0x92,0xa7,0x79,0xb4}, {0x99,0xa9,0x70,0xb9}, {0x84,0xbb,0x6b,0xae}, {0x8f,0xb5,0x62,0xa3}, -{0xbe,0x9f,0x5d,0x80}, {0xb5,0x91,0x54,0x8d}, {0xa8,0x83,0x4f,0x9a}, {0xa3,0x8d,0x46,0x97} - } -}; -#define U2 xU2.xt8 - -static const union xtab xU3 = { - .xt8 = { -{0x00,0x00,0x00,0x00}, {0x0d,0x0b,0x0e,0x09}, {0x1a,0x16,0x1c,0x12}, {0x17,0x1d,0x12,0x1b}, -{0x34,0x2c,0x38,0x24}, {0x39,0x27,0x36,0x2d}, {0x2e,0x3a,0x24,0x36}, {0x23,0x31,0x2a,0x3f}, -{0x68,0x58,0x70,0x48}, {0x65,0x53,0x7e,0x41}, {0x72,0x4e,0x6c,0x5a}, {0x7f,0x45,0x62,0x53}, -{0x5c,0x74,0x48,0x6c}, {0x51,0x7f,0x46,0x65}, {0x46,0x62,0x54,0x7e}, {0x4b,0x69,0x5a,0x77}, -{0xd0,0xb0,0xe0,0x90}, {0xdd,0xbb,0xee,0x99}, {0xca,0xa6,0xfc,0x82}, {0xc7,0xad,0xf2,0x8b}, -{0xe4,0x9c,0xd8,0xb4}, {0xe9,0x97,0xd6,0xbd}, {0xfe,0x8a,0xc4,0xa6}, {0xf3,0x81,0xca,0xaf}, -{0xb8,0xe8,0x90,0xd8}, {0xb5,0xe3,0x9e,0xd1}, {0xa2,0xfe,0x8c,0xca}, {0xaf,0xf5,0x82,0xc3}, -{0x8c,0xc4,0xa8,0xfc}, {0x81,0xcf,0xa6,0xf5}, {0x96,0xd2,0xb4,0xee}, {0x9b,0xd9,0xba,0xe7}, -{0xbb,0x7b,0xdb,0x3b}, {0xb6,0x70,0xd5,0x32}, {0xa1,0x6d,0xc7,0x29}, {0xac,0x66,0xc9,0x20}, -{0x8f,0x57,0xe3,0x1f}, {0x82,0x5c,0xed,0x16}, {0x95,0x41,0xff,0x0d}, {0x98,0x4a,0xf1,0x04}, -{0xd3,0x23,0xab,0x73}, {0xde,0x28,0xa5,0x7a}, {0xc9,0x35,0xb7,0x61}, {0xc4,0x3e,0xb9,0x68}, -{0xe7,0x0f,0x93,0x57}, {0xea,0x04,0x9d,0x5e}, {0xfd,0x19,0x8f,0x45}, {0xf0,0x12,0x81,0x4c}, -{0x6b,0xcb,0x3b,0xab}, {0x66,0xc0,0x35,0xa2}, {0x71,0xdd,0x27,0xb9}, {0x7c,0xd6,0x29,0xb0}, -{0x5f,0xe7,0x03,0x8f}, {0x52,0xec,0x0d,0x86}, {0x45,0xf1,0x1f,0x9d}, {0x48,0xfa,0x11,0x94}, -{0x03,0x93,0x4b,0xe3}, {0x0e,0x98,0x45,0xea}, {0x19,0x85,0x57,0xf1}, {0x14,0x8e,0x59,0xf8}, -{0x37,0xbf,0x73,0xc7}, {0x3a,0xb4,0x7d,0xce}, {0x2d,0xa9,0x6f,0xd5}, {0x20,0xa2,0x61,0xdc}, -{0x6d,0xf6,0xad,0x76}, {0x60,0xfd,0xa3,0x7f}, {0x77,0xe0,0xb1,0x64}, {0x7a,0xeb,0xbf,0x6d}, -{0x59,0xda,0x95,0x52}, {0x54,0xd1,0x9b,0x5b}, {0x43,0xcc,0x89,0x40}, {0x4e,0xc7,0x87,0x49}, -{0x05,0xae,0xdd,0x3e}, {0x08,0xa5,0xd3,0x37}, {0x1f,0xb8,0xc1,0x2c}, {0x12,0xb3,0xcf,0x25}, -{0x31,0x82,0xe5,0x1a}, {0x3c,0x89,0xeb,0x13}, {0x2b,0x94,0xf9,0x08}, {0x26,0x9f,0xf7,0x01}, -{0xbd,0x46,0x4d,0xe6}, {0xb0,0x4d,0x43,0xef}, {0xa7,0x50,0x51,0xf4}, {0xaa,0x5b,0x5f,0xfd}, -{0x89,0x6a,0x75,0xc2}, {0x84,0x61,0x7b,0xcb}, {0x93,0x7c,0x69,0xd0}, {0x9e,0x77,0x67,0xd9}, -{0xd5,0x1e,0x3d,0xae}, {0xd8,0x15,0x33,0xa7}, {0xcf,0x08,0x21,0xbc}, {0xc2,0x03,0x2f,0xb5}, -{0xe1,0x32,0x05,0x8a}, {0xec,0x39,0x0b,0x83}, {0xfb,0x24,0x19,0x98}, {0xf6,0x2f,0x17,0x91}, -{0xd6,0x8d,0x76,0x4d}, {0xdb,0x86,0x78,0x44}, {0xcc,0x9b,0x6a,0x5f}, {0xc1,0x90,0x64,0x56}, -{0xe2,0xa1,0x4e,0x69}, {0xef,0xaa,0x40,0x60}, {0xf8,0xb7,0x52,0x7b}, {0xf5,0xbc,0x5c,0x72}, -{0xbe,0xd5,0x06,0x05}, {0xb3,0xde,0x08,0x0c}, {0xa4,0xc3,0x1a,0x17}, {0xa9,0xc8,0x14,0x1e}, -{0x8a,0xf9,0x3e,0x21}, {0x87,0xf2,0x30,0x28}, {0x90,0xef,0x22,0x33}, {0x9d,0xe4,0x2c,0x3a}, -{0x06,0x3d,0x96,0xdd}, {0x0b,0x36,0x98,0xd4}, {0x1c,0x2b,0x8a,0xcf}, {0x11,0x20,0x84,0xc6}, -{0x32,0x11,0xae,0xf9}, {0x3f,0x1a,0xa0,0xf0}, {0x28,0x07,0xb2,0xeb}, {0x25,0x0c,0xbc,0xe2}, -{0x6e,0x65,0xe6,0x95}, {0x63,0x6e,0xe8,0x9c}, {0x74,0x73,0xfa,0x87}, {0x79,0x78,0xf4,0x8e}, -{0x5a,0x49,0xde,0xb1}, {0x57,0x42,0xd0,0xb8}, {0x40,0x5f,0xc2,0xa3}, {0x4d,0x54,0xcc,0xaa}, -{0xda,0xf7,0x41,0xec}, {0xd7,0xfc,0x4f,0xe5}, {0xc0,0xe1,0x5d,0xfe}, {0xcd,0xea,0x53,0xf7}, -{0xee,0xdb,0x79,0xc8}, {0xe3,0xd0,0x77,0xc1}, {0xf4,0xcd,0x65,0xda}, {0xf9,0xc6,0x6b,0xd3}, -{0xb2,0xaf,0x31,0xa4}, {0xbf,0xa4,0x3f,0xad}, {0xa8,0xb9,0x2d,0xb6}, {0xa5,0xb2,0x23,0xbf}, -{0x86,0x83,0x09,0x80}, {0x8b,0x88,0x07,0x89}, {0x9c,0x95,0x15,0x92}, {0x91,0x9e,0x1b,0x9b}, -{0x0a,0x47,0xa1,0x7c}, {0x07,0x4c,0xaf,0x75}, {0x10,0x51,0xbd,0x6e}, {0x1d,0x5a,0xb3,0x67}, -{0x3e,0x6b,0x99,0x58}, {0x33,0x60,0x97,0x51}, {0x24,0x7d,0x85,0x4a}, {0x29,0x76,0x8b,0x43}, -{0x62,0x1f,0xd1,0x34}, {0x6f,0x14,0xdf,0x3d}, {0x78,0x09,0xcd,0x26}, {0x75,0x02,0xc3,0x2f}, -{0x56,0x33,0xe9,0x10}, {0x5b,0x38,0xe7,0x19}, {0x4c,0x25,0xf5,0x02}, {0x41,0x2e,0xfb,0x0b}, -{0x61,0x8c,0x9a,0xd7}, {0x6c,0x87,0x94,0xde}, {0x7b,0x9a,0x86,0xc5}, {0x76,0x91,0x88,0xcc}, -{0x55,0xa0,0xa2,0xf3}, {0x58,0xab,0xac,0xfa}, {0x4f,0xb6,0xbe,0xe1}, {0x42,0xbd,0xb0,0xe8}, -{0x09,0xd4,0xea,0x9f}, {0x04,0xdf,0xe4,0x96}, {0x13,0xc2,0xf6,0x8d}, {0x1e,0xc9,0xf8,0x84}, -{0x3d,0xf8,0xd2,0xbb}, {0x30,0xf3,0xdc,0xb2}, {0x27,0xee,0xce,0xa9}, {0x2a,0xe5,0xc0,0xa0}, -{0xb1,0x3c,0x7a,0x47}, {0xbc,0x37,0x74,0x4e}, {0xab,0x2a,0x66,0x55}, {0xa6,0x21,0x68,0x5c}, -{0x85,0x10,0x42,0x63}, {0x88,0x1b,0x4c,0x6a}, {0x9f,0x06,0x5e,0x71}, {0x92,0x0d,0x50,0x78}, -{0xd9,0x64,0x0a,0x0f}, {0xd4,0x6f,0x04,0x06}, {0xc3,0x72,0x16,0x1d}, {0xce,0x79,0x18,0x14}, -{0xed,0x48,0x32,0x2b}, {0xe0,0x43,0x3c,0x22}, {0xf7,0x5e,0x2e,0x39}, {0xfa,0x55,0x20,0x30}, -{0xb7,0x01,0xec,0x9a}, {0xba,0x0a,0xe2,0x93}, {0xad,0x17,0xf0,0x88}, {0xa0,0x1c,0xfe,0x81}, -{0x83,0x2d,0xd4,0xbe}, {0x8e,0x26,0xda,0xb7}, {0x99,0x3b,0xc8,0xac}, {0x94,0x30,0xc6,0xa5}, -{0xdf,0x59,0x9c,0xd2}, {0xd2,0x52,0x92,0xdb}, {0xc5,0x4f,0x80,0xc0}, {0xc8,0x44,0x8e,0xc9}, -{0xeb,0x75,0xa4,0xf6}, {0xe6,0x7e,0xaa,0xff}, {0xf1,0x63,0xb8,0xe4}, {0xfc,0x68,0xb6,0xed}, -{0x67,0xb1,0x0c,0x0a}, {0x6a,0xba,0x02,0x03}, {0x7d,0xa7,0x10,0x18}, {0x70,0xac,0x1e,0x11}, -{0x53,0x9d,0x34,0x2e}, {0x5e,0x96,0x3a,0x27}, {0x49,0x8b,0x28,0x3c}, {0x44,0x80,0x26,0x35}, -{0x0f,0xe9,0x7c,0x42}, {0x02,0xe2,0x72,0x4b}, {0x15,0xff,0x60,0x50}, {0x18,0xf4,0x6e,0x59}, -{0x3b,0xc5,0x44,0x66}, {0x36,0xce,0x4a,0x6f}, {0x21,0xd3,0x58,0x74}, {0x2c,0xd8,0x56,0x7d}, -{0x0c,0x7a,0x37,0xa1}, {0x01,0x71,0x39,0xa8}, {0x16,0x6c,0x2b,0xb3}, {0x1b,0x67,0x25,0xba}, -{0x38,0x56,0x0f,0x85}, {0x35,0x5d,0x01,0x8c}, {0x22,0x40,0x13,0x97}, {0x2f,0x4b,0x1d,0x9e}, -{0x64,0x22,0x47,0xe9}, {0x69,0x29,0x49,0xe0}, {0x7e,0x34,0x5b,0xfb}, {0x73,0x3f,0x55,0xf2}, -{0x50,0x0e,0x7f,0xcd}, {0x5d,0x05,0x71,0xc4}, {0x4a,0x18,0x63,0xdf}, {0x47,0x13,0x6d,0xd6}, -{0xdc,0xca,0xd7,0x31}, {0xd1,0xc1,0xd9,0x38}, {0xc6,0xdc,0xcb,0x23}, {0xcb,0xd7,0xc5,0x2a}, -{0xe8,0xe6,0xef,0x15}, {0xe5,0xed,0xe1,0x1c}, {0xf2,0xf0,0xf3,0x07}, {0xff,0xfb,0xfd,0x0e}, -{0xb4,0x92,0xa7,0x79}, {0xb9,0x99,0xa9,0x70}, {0xae,0x84,0xbb,0x6b}, {0xa3,0x8f,0xb5,0x62}, -{0x80,0xbe,0x9f,0x5d}, {0x8d,0xb5,0x91,0x54}, {0x9a,0xa8,0x83,0x4f}, {0x97,0xa3,0x8d,0x46} - } -}; -#define U3 xU3.xt8 - -static const union xtab xU4 = { - .xt8 = { -{0x00,0x00,0x00,0x00}, {0x09,0x0d,0x0b,0x0e}, {0x12,0x1a,0x16,0x1c}, {0x1b,0x17,0x1d,0x12}, -{0x24,0x34,0x2c,0x38}, {0x2d,0x39,0x27,0x36}, {0x36,0x2e,0x3a,0x24}, {0x3f,0x23,0x31,0x2a}, -{0x48,0x68,0x58,0x70}, {0x41,0x65,0x53,0x7e}, {0x5a,0x72,0x4e,0x6c}, {0x53,0x7f,0x45,0x62}, -{0x6c,0x5c,0x74,0x48}, {0x65,0x51,0x7f,0x46}, {0x7e,0x46,0x62,0x54}, {0x77,0x4b,0x69,0x5a}, -{0x90,0xd0,0xb0,0xe0}, {0x99,0xdd,0xbb,0xee}, {0x82,0xca,0xa6,0xfc}, {0x8b,0xc7,0xad,0xf2}, -{0xb4,0xe4,0x9c,0xd8}, {0xbd,0xe9,0x97,0xd6}, {0xa6,0xfe,0x8a,0xc4}, {0xaf,0xf3,0x81,0xca}, -{0xd8,0xb8,0xe8,0x90}, {0xd1,0xb5,0xe3,0x9e}, {0xca,0xa2,0xfe,0x8c}, {0xc3,0xaf,0xf5,0x82}, -{0xfc,0x8c,0xc4,0xa8}, {0xf5,0x81,0xcf,0xa6}, {0xee,0x96,0xd2,0xb4}, {0xe7,0x9b,0xd9,0xba}, -{0x3b,0xbb,0x7b,0xdb}, {0x32,0xb6,0x70,0xd5}, {0x29,0xa1,0x6d,0xc7}, {0x20,0xac,0x66,0xc9}, -{0x1f,0x8f,0x57,0xe3}, {0x16,0x82,0x5c,0xed}, {0x0d,0x95,0x41,0xff}, {0x04,0x98,0x4a,0xf1}, -{0x73,0xd3,0x23,0xab}, {0x7a,0xde,0x28,0xa5}, {0x61,0xc9,0x35,0xb7}, {0x68,0xc4,0x3e,0xb9}, -{0x57,0xe7,0x0f,0x93}, {0x5e,0xea,0x04,0x9d}, {0x45,0xfd,0x19,0x8f}, {0x4c,0xf0,0x12,0x81}, -{0xab,0x6b,0xcb,0x3b}, {0xa2,0x66,0xc0,0x35}, {0xb9,0x71,0xdd,0x27}, {0xb0,0x7c,0xd6,0x29}, -{0x8f,0x5f,0xe7,0x03}, {0x86,0x52,0xec,0x0d}, {0x9d,0x45,0xf1,0x1f}, {0x94,0x48,0xfa,0x11}, -{0xe3,0x03,0x93,0x4b}, {0xea,0x0e,0x98,0x45}, {0xf1,0x19,0x85,0x57}, {0xf8,0x14,0x8e,0x59}, -{0xc7,0x37,0xbf,0x73}, {0xce,0x3a,0xb4,0x7d}, {0xd5,0x2d,0xa9,0x6f}, {0xdc,0x20,0xa2,0x61}, -{0x76,0x6d,0xf6,0xad}, {0x7f,0x60,0xfd,0xa3}, {0x64,0x77,0xe0,0xb1}, {0x6d,0x7a,0xeb,0xbf}, -{0x52,0x59,0xda,0x95}, {0x5b,0x54,0xd1,0x9b}, {0x40,0x43,0xcc,0x89}, {0x49,0x4e,0xc7,0x87}, -{0x3e,0x05,0xae,0xdd}, {0x37,0x08,0xa5,0xd3}, {0x2c,0x1f,0xb8,0xc1}, {0x25,0x12,0xb3,0xcf}, -{0x1a,0x31,0x82,0xe5}, {0x13,0x3c,0x89,0xeb}, {0x08,0x2b,0x94,0xf9}, {0x01,0x26,0x9f,0xf7}, -{0xe6,0xbd,0x46,0x4d}, {0xef,0xb0,0x4d,0x43}, {0xf4,0xa7,0x50,0x51}, {0xfd,0xaa,0x5b,0x5f}, -{0xc2,0x89,0x6a,0x75}, {0xcb,0x84,0x61,0x7b}, {0xd0,0x93,0x7c,0x69}, {0xd9,0x9e,0x77,0x67}, -{0xae,0xd5,0x1e,0x3d}, {0xa7,0xd8,0x15,0x33}, {0xbc,0xcf,0x08,0x21}, {0xb5,0xc2,0x03,0x2f}, -{0x8a,0xe1,0x32,0x05}, {0x83,0xec,0x39,0x0b}, {0x98,0xfb,0x24,0x19}, {0x91,0xf6,0x2f,0x17}, -{0x4d,0xd6,0x8d,0x76}, {0x44,0xdb,0x86,0x78}, {0x5f,0xcc,0x9b,0x6a}, {0x56,0xc1,0x90,0x64}, -{0x69,0xe2,0xa1,0x4e}, {0x60,0xef,0xaa,0x40}, {0x7b,0xf8,0xb7,0x52}, {0x72,0xf5,0xbc,0x5c}, -{0x05,0xbe,0xd5,0x06}, {0x0c,0xb3,0xde,0x08}, {0x17,0xa4,0xc3,0x1a}, {0x1e,0xa9,0xc8,0x14}, -{0x21,0x8a,0xf9,0x3e}, {0x28,0x87,0xf2,0x30}, {0x33,0x90,0xef,0x22}, {0x3a,0x9d,0xe4,0x2c}, -{0xdd,0x06,0x3d,0x96}, {0xd4,0x0b,0x36,0x98}, {0xcf,0x1c,0x2b,0x8a}, {0xc6,0x11,0x20,0x84}, -{0xf9,0x32,0x11,0xae}, {0xf0,0x3f,0x1a,0xa0}, {0xeb,0x28,0x07,0xb2}, {0xe2,0x25,0x0c,0xbc}, -{0x95,0x6e,0x65,0xe6}, {0x9c,0x63,0x6e,0xe8}, {0x87,0x74,0x73,0xfa}, {0x8e,0x79,0x78,0xf4}, -{0xb1,0x5a,0x49,0xde}, {0xb8,0x57,0x42,0xd0}, {0xa3,0x40,0x5f,0xc2}, {0xaa,0x4d,0x54,0xcc}, -{0xec,0xda,0xf7,0x41}, {0xe5,0xd7,0xfc,0x4f}, {0xfe,0xc0,0xe1,0x5d}, {0xf7,0xcd,0xea,0x53}, -{0xc8,0xee,0xdb,0x79}, {0xc1,0xe3,0xd0,0x77}, {0xda,0xf4,0xcd,0x65}, {0xd3,0xf9,0xc6,0x6b}, -{0xa4,0xb2,0xaf,0x31}, {0xad,0xbf,0xa4,0x3f}, {0xb6,0xa8,0xb9,0x2d}, {0xbf,0xa5,0xb2,0x23}, -{0x80,0x86,0x83,0x09}, {0x89,0x8b,0x88,0x07}, {0x92,0x9c,0x95,0x15}, {0x9b,0x91,0x9e,0x1b}, -{0x7c,0x0a,0x47,0xa1}, {0x75,0x07,0x4c,0xaf}, {0x6e,0x10,0x51,0xbd}, {0x67,0x1d,0x5a,0xb3}, -{0x58,0x3e,0x6b,0x99}, {0x51,0x33,0x60,0x97}, {0x4a,0x24,0x7d,0x85}, {0x43,0x29,0x76,0x8b}, -{0x34,0x62,0x1f,0xd1}, {0x3d,0x6f,0x14,0xdf}, {0x26,0x78,0x09,0xcd}, {0x2f,0x75,0x02,0xc3}, -{0x10,0x56,0x33,0xe9}, {0x19,0x5b,0x38,0xe7}, {0x02,0x4c,0x25,0xf5}, {0x0b,0x41,0x2e,0xfb}, -{0xd7,0x61,0x8c,0x9a}, {0xde,0x6c,0x87,0x94}, {0xc5,0x7b,0x9a,0x86}, {0xcc,0x76,0x91,0x88}, -{0xf3,0x55,0xa0,0xa2}, {0xfa,0x58,0xab,0xac}, {0xe1,0x4f,0xb6,0xbe}, {0xe8,0x42,0xbd,0xb0}, -{0x9f,0x09,0xd4,0xea}, {0x96,0x04,0xdf,0xe4}, {0x8d,0x13,0xc2,0xf6}, {0x84,0x1e,0xc9,0xf8}, -{0xbb,0x3d,0xf8,0xd2}, {0xb2,0x30,0xf3,0xdc}, {0xa9,0x27,0xee,0xce}, {0xa0,0x2a,0xe5,0xc0}, -{0x47,0xb1,0x3c,0x7a}, {0x4e,0xbc,0x37,0x74}, {0x55,0xab,0x2a,0x66}, {0x5c,0xa6,0x21,0x68}, -{0x63,0x85,0x10,0x42}, {0x6a,0x88,0x1b,0x4c}, {0x71,0x9f,0x06,0x5e}, {0x78,0x92,0x0d,0x50}, -{0x0f,0xd9,0x64,0x0a}, {0x06,0xd4,0x6f,0x04}, {0x1d,0xc3,0x72,0x16}, {0x14,0xce,0x79,0x18}, -{0x2b,0xed,0x48,0x32}, {0x22,0xe0,0x43,0x3c}, {0x39,0xf7,0x5e,0x2e}, {0x30,0xfa,0x55,0x20}, -{0x9a,0xb7,0x01,0xec}, {0x93,0xba,0x0a,0xe2}, {0x88,0xad,0x17,0xf0}, {0x81,0xa0,0x1c,0xfe}, -{0xbe,0x83,0x2d,0xd4}, {0xb7,0x8e,0x26,0xda}, {0xac,0x99,0x3b,0xc8}, {0xa5,0x94,0x30,0xc6}, -{0xd2,0xdf,0x59,0x9c}, {0xdb,0xd2,0x52,0x92}, {0xc0,0xc5,0x4f,0x80}, {0xc9,0xc8,0x44,0x8e}, -{0xf6,0xeb,0x75,0xa4}, {0xff,0xe6,0x7e,0xaa}, {0xe4,0xf1,0x63,0xb8}, {0xed,0xfc,0x68,0xb6}, -{0x0a,0x67,0xb1,0x0c}, {0x03,0x6a,0xba,0x02}, {0x18,0x7d,0xa7,0x10}, {0x11,0x70,0xac,0x1e}, -{0x2e,0x53,0x9d,0x34}, {0x27,0x5e,0x96,0x3a}, {0x3c,0x49,0x8b,0x28}, {0x35,0x44,0x80,0x26}, -{0x42,0x0f,0xe9,0x7c}, {0x4b,0x02,0xe2,0x72}, {0x50,0x15,0xff,0x60}, {0x59,0x18,0xf4,0x6e}, -{0x66,0x3b,0xc5,0x44}, {0x6f,0x36,0xce,0x4a}, {0x74,0x21,0xd3,0x58}, {0x7d,0x2c,0xd8,0x56}, -{0xa1,0x0c,0x7a,0x37}, {0xa8,0x01,0x71,0x39}, {0xb3,0x16,0x6c,0x2b}, {0xba,0x1b,0x67,0x25}, -{0x85,0x38,0x56,0x0f}, {0x8c,0x35,0x5d,0x01}, {0x97,0x22,0x40,0x13}, {0x9e,0x2f,0x4b,0x1d}, -{0xe9,0x64,0x22,0x47}, {0xe0,0x69,0x29,0x49}, {0xfb,0x7e,0x34,0x5b}, {0xf2,0x73,0x3f,0x55}, -{0xcd,0x50,0x0e,0x7f}, {0xc4,0x5d,0x05,0x71}, {0xdf,0x4a,0x18,0x63}, {0xd6,0x47,0x13,0x6d}, -{0x31,0xdc,0xca,0xd7}, {0x38,0xd1,0xc1,0xd9}, {0x23,0xc6,0xdc,0xcb}, {0x2a,0xcb,0xd7,0xc5}, -{0x15,0xe8,0xe6,0xef}, {0x1c,0xe5,0xed,0xe1}, {0x07,0xf2,0xf0,0xf3}, {0x0e,0xff,0xfb,0xfd}, -{0x79,0xb4,0x92,0xa7}, {0x70,0xb9,0x99,0xa9}, {0x6b,0xae,0x84,0xbb}, {0x62,0xa3,0x8f,0xb5}, -{0x5d,0x80,0xbe,0x9f}, {0x54,0x8d,0xb5,0x91}, {0x4f,0x9a,0xa8,0x83}, {0x46,0x97,0xa3,0x8d} - } -}; -#define U4 xU4.xt8 - -static const word32 rcon[30] = { - 0x01,0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 -}; diff --git a/bsd/crypto/rijndael/rijndael-alg-fst.c b/bsd/crypto/rijndael/rijndael-alg-fst.c deleted file mode 100644 index 5cd4857e4..000000000 --- a/bsd/crypto/rijndael/rijndael-alg-fst.c +++ /dev/null @@ -1,488 +0,0 @@ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael-alg-fst.c,v 1.3.2.1 2001/07/03 11:01:35 ume Exp $ */ -/* $KAME: rijndael-alg-fst.c,v 1.7 2001/05/27 00:23:23 itojun Exp $ */ - -/* - * rijndael-alg-fst.c v2.3 April '2000 - * - * Optimised ANSI C code - * - * authors: v1.0: Antoon Bosselaers - * v2.0: Vincent Rijmen - * v2.3: Paulo Barreto - * - * This code is placed in the public domain. - */ - -#include -#include -#ifdef KERNEL -#include -#else -#include -#endif -#include -#include - -#include - -int rijndaelKeySched(word8 k[MAXKC][4], word8 W[MAXROUNDS+1][4][4], int ROUNDS) { - /* Calculate the necessary round keys - * The number of calculations depends on keyBits and blockBits - */ - int j, r, t, rconpointer = 0; - union { - word8 x8[MAXKC][4]; - word32 x32[MAXKC]; - } xtk; -#define tk xtk.x8 - int KC = ROUNDS - 6; - - for (j = KC-1; j >= 0; j--) { - *((word32*)tk[j]) = *((word32*)k[j]); - } - r = 0; - t = 0; - /* copy values into round key array */ - for (j = 0; (j < KC) && (r < ROUNDS + 1); ) { - for (; (j < KC) && (t < 4); j++, t++) { - *((word32*)W[r][t]) = *((word32*)tk[j]); - } - if (t == 4) { - r++; - t = 0; - } - } - - while (r < ROUNDS + 1) { /* while not enough round key material calculated */ - /* calculate new values */ - tk[0][0] ^= S[tk[KC-1][1]]; - tk[0][1] ^= S[tk[KC-1][2]]; - tk[0][2] ^= S[tk[KC-1][3]]; - tk[0][3] ^= S[tk[KC-1][0]]; - tk[0][0] ^= rcon[rconpointer++]; - - if (KC != 8) { - for (j = 1; j < KC; j++) { - *((word32*)tk[j]) ^= *((word32*)tk[j-1]); - } - } else { - for (j = 1; j < KC/2; j++) { - *((word32*)tk[j]) ^= *((word32*)tk[j-1]); - } - tk[KC/2][0] ^= S[tk[KC/2 - 1][0]]; - tk[KC/2][1] ^= S[tk[KC/2 - 1][1]]; - tk[KC/2][2] ^= S[tk[KC/2 - 1][2]]; - tk[KC/2][3] ^= S[tk[KC/2 - 1][3]]; - for (j = KC/2 + 1; j < KC; j++) { - *((word32*)tk[j]) ^= *((word32*)tk[j-1]); - } - } - /* copy values into round key array */ - for (j = 0; (j < KC) && (r < ROUNDS + 1); ) { - for (; (j < KC) && (t < 4); j++, t++) { - *((word32*)W[r][t]) = *((word32*)tk[j]); - } - if (t == 4) { - r++; - t = 0; - } - } - } - return 0; -#undef tk -} - -int rijndaelKeyEncToDec(word8 W[MAXROUNDS+1][4][4], int ROUNDS) { - int r; - word8 *w; - - for (r = 1; r < ROUNDS; r++) { - w = W[r][0]; - *((word32*)w) = - *((const word32*)U1[w[0]]) - ^ *((const word32*)U2[w[1]]) - ^ *((const word32*)U3[w[2]]) - ^ *((const word32*)U4[w[3]]); - - w = W[r][1]; - *((word32*)w) = - *((const word32*)U1[w[0]]) - ^ *((const word32*)U2[w[1]]) - ^ *((const word32*)U3[w[2]]) - ^ *((const word32*)U4[w[3]]); - - w = W[r][2]; - *((word32*)w) = - *((const word32*)U1[w[0]]) - ^ *((const word32*)U2[w[1]]) - ^ *((const word32*)U3[w[2]]) - ^ *((const word32*)U4[w[3]]); - - w = W[r][3]; - *((word32*)w) = - *((const word32*)U1[w[0]]) - ^ *((const word32*)U2[w[1]]) - ^ *((const word32*)U3[w[2]]) - ^ *((const word32*)U4[w[3]]); - } - return 0; -} - -/** - * Encrypt a single block. - */ -int rijndaelEncrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) { - int r; - union { - word8 x8[16]; - word32 x32[4]; - } xa, xb; -#define a xa.x8 -#define b xb.x8 - union { - word8 x8[4][4]; - word32 x32[4]; - } xtemp; -#define temp xtemp.x8 - - memcpy(a, in, sizeof a); - - *((word32*)temp[0]) = *((word32*)(a )) ^ *((word32*)rk[0][0]); - *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[0][1]); - *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[0][2]); - *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[0][3]); - *((word32*)(b )) = *((const word32*)T1[temp[0][0]]) - ^ *((const word32*)T2[temp[1][1]]) - ^ *((const word32*)T3[temp[2][2]]) - ^ *((const word32*)T4[temp[3][3]]); - *((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]]) - ^ *((const word32*)T2[temp[2][1]]) - ^ *((const word32*)T3[temp[3][2]]) - ^ *((const word32*)T4[temp[0][3]]); - *((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]]) - ^ *((const word32*)T2[temp[3][1]]) - ^ *((const word32*)T3[temp[0][2]]) - ^ *((const word32*)T4[temp[1][3]]); - *((word32*)(b +12)) = *((const word32*)T1[temp[3][0]]) - ^ *((const word32*)T2[temp[0][1]]) - ^ *((const word32*)T3[temp[1][2]]) - ^ *((const word32*)T4[temp[2][3]]); - for (r = 1; r < ROUNDS-1; r++) { - *((word32*)temp[0]) = *((word32*)(b )) ^ *((word32*)rk[r][0]); - *((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]); - *((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]); - *((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]); - - *((word32*)(b )) = *((const word32*)T1[temp[0][0]]) - ^ *((const word32*)T2[temp[1][1]]) - ^ *((const word32*)T3[temp[2][2]]) - ^ *((const word32*)T4[temp[3][3]]); - *((word32*)(b + 4)) = *((const word32*)T1[temp[1][0]]) - ^ *((const word32*)T2[temp[2][1]]) - ^ *((const word32*)T3[temp[3][2]]) - ^ *((const word32*)T4[temp[0][3]]); - *((word32*)(b + 8)) = *((const word32*)T1[temp[2][0]]) - ^ *((const word32*)T2[temp[3][1]]) - ^ *((const word32*)T3[temp[0][2]]) - ^ *((const word32*)T4[temp[1][3]]); - *((word32*)(b +12)) = *((const word32*)T1[temp[3][0]]) - ^ *((const word32*)T2[temp[0][1]]) - ^ *((const word32*)T3[temp[1][2]]) - ^ *((const word32*)T4[temp[2][3]]); - } - /* last round is special */ - *((word32*)temp[0]) = *((word32*)(b )) ^ *((word32*)rk[ROUNDS-1][0]); - *((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[ROUNDS-1][1]); - *((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[ROUNDS-1][2]); - *((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[ROUNDS-1][3]); - b[ 0] = T1[temp[0][0]][1]; - b[ 1] = T1[temp[1][1]][1]; - b[ 2] = T1[temp[2][2]][1]; - b[ 3] = T1[temp[3][3]][1]; - b[ 4] = T1[temp[1][0]][1]; - b[ 5] = T1[temp[2][1]][1]; - b[ 6] = T1[temp[3][2]][1]; - b[ 7] = T1[temp[0][3]][1]; - b[ 8] = T1[temp[2][0]][1]; - b[ 9] = T1[temp[3][1]][1]; - b[10] = T1[temp[0][2]][1]; - b[11] = T1[temp[1][3]][1]; - b[12] = T1[temp[3][0]][1]; - b[13] = T1[temp[0][1]][1]; - b[14] = T1[temp[1][2]][1]; - b[15] = T1[temp[2][3]][1]; - *((word32*)(b )) ^= *((word32*)rk[ROUNDS][0]); - *((word32*)(b+ 4)) ^= *((word32*)rk[ROUNDS][1]); - *((word32*)(b+ 8)) ^= *((word32*)rk[ROUNDS][2]); - *((word32*)(b+12)) ^= *((word32*)rk[ROUNDS][3]); - - memcpy(out, b, sizeof b /* XXX out */); - - return 0; -#undef a -#undef b -#undef temp -} - -#ifdef INTERMEDIATE_VALUE_KAT -/** - * Encrypt only a certain number of rounds. - * Only used in the Intermediate Value Known Answer Test. - */ -int rijndaelEncryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) { - int r; - word8 temp[4][4]; - - /* make number of rounds sane */ - if (rounds > ROUNDS) { - rounds = ROUNDS; - } - - *((word32*)a[0]) = *((word32*)a[0]) ^ *((word32*)rk[0][0]); - *((word32*)a[1]) = *((word32*)a[1]) ^ *((word32*)rk[0][1]); - *((word32*)a[2]) = *((word32*)a[2]) ^ *((word32*)rk[0][2]); - *((word32*)a[3]) = *((word32*)a[3]) ^ *((word32*)rk[0][3]); - - for (r = 1; (r <= rounds) && (r < ROUNDS); r++) { - *((word32*)temp[0]) = *((word32*)T1[a[0][0]]) - ^ *((word32*)T2[a[1][1]]) - ^ *((word32*)T3[a[2][2]]) - ^ *((word32*)T4[a[3][3]]); - *((word32*)temp[1]) = *((word32*)T1[a[1][0]]) - ^ *((word32*)T2[a[2][1]]) - ^ *((word32*)T3[a[3][2]]) - ^ *((word32*)T4[a[0][3]]); - *((word32*)temp[2]) = *((word32*)T1[a[2][0]]) - ^ *((word32*)T2[a[3][1]]) - ^ *((word32*)T3[a[0][2]]) - ^ *((word32*)T4[a[1][3]]); - *((word32*)temp[3]) = *((word32*)T1[a[3][0]]) - ^ *((word32*)T2[a[0][1]]) - ^ *((word32*)T3[a[1][2]]) - ^ *((word32*)T4[a[2][3]]); - *((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[r][0]); - *((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[r][1]); - *((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[r][2]); - *((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[r][3]); - } - if (rounds == ROUNDS) { - /* last round is special */ - temp[0][0] = T1[a[0][0]][1]; - temp[0][1] = T1[a[1][1]][1]; - temp[0][2] = T1[a[2][2]][1]; - temp[0][3] = T1[a[3][3]][1]; - temp[1][0] = T1[a[1][0]][1]; - temp[1][1] = T1[a[2][1]][1]; - temp[1][2] = T1[a[3][2]][1]; - temp[1][3] = T1[a[0][3]][1]; - temp[2][0] = T1[a[2][0]][1]; - temp[2][1] = T1[a[3][1]][1]; - temp[2][2] = T1[a[0][2]][1]; - temp[2][3] = T1[a[1][3]][1]; - temp[3][0] = T1[a[3][0]][1]; - temp[3][1] = T1[a[0][1]][1]; - temp[3][2] = T1[a[1][2]][1]; - temp[3][3] = T1[a[2][3]][1]; - *((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[ROUNDS][0]); - *((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[ROUNDS][1]); - *((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[ROUNDS][2]); - *((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[ROUNDS][3]); - } - - return 0; -} -#endif /* INTERMEDIATE_VALUE_KAT */ - -/** - * Decrypt a single block. - */ -int rijndaelDecrypt(word8 in[16], word8 out[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) { - int r; - union { - word8 x8[16]; - word32 x32[4]; - } xa, xb; -#define a xa.x8 -#define b xb.x8 - union { - word8 x8[4][4]; - word32 x32[4]; - } xtemp; -#define temp xtemp.x8 - - memcpy(a, in, sizeof a); - - *((word32*)temp[0]) = *((word32*)(a )) ^ *((word32*)rk[ROUNDS][0]); - *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[ROUNDS][1]); - *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[ROUNDS][2]); - *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[ROUNDS][3]); - - *((word32*)(b )) = *((const word32*)T5[temp[0][0]]) - ^ *((const word32*)T6[temp[3][1]]) - ^ *((const word32*)T7[temp[2][2]]) - ^ *((const word32*)T8[temp[1][3]]); - *((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]]) - ^ *((const word32*)T6[temp[0][1]]) - ^ *((const word32*)T7[temp[3][2]]) - ^ *((const word32*)T8[temp[2][3]]); - *((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]]) - ^ *((const word32*)T6[temp[1][1]]) - ^ *((const word32*)T7[temp[0][2]]) - ^ *((const word32*)T8[temp[3][3]]); - *((word32*)(b+12)) = *((const word32*)T5[temp[3][0]]) - ^ *((const word32*)T6[temp[2][1]]) - ^ *((const word32*)T7[temp[1][2]]) - ^ *((const word32*)T8[temp[0][3]]); - for (r = ROUNDS-1; r > 1; r--) { - *((word32*)temp[0]) = *((word32*)(b )) ^ *((word32*)rk[r][0]); - *((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]); - *((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]); - *((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]); - *((word32*)(b )) = *((const word32*)T5[temp[0][0]]) - ^ *((const word32*)T6[temp[3][1]]) - ^ *((const word32*)T7[temp[2][2]]) - ^ *((const word32*)T8[temp[1][3]]); - *((word32*)(b+ 4)) = *((const word32*)T5[temp[1][0]]) - ^ *((const word32*)T6[temp[0][1]]) - ^ *((const word32*)T7[temp[3][2]]) - ^ *((const word32*)T8[temp[2][3]]); - *((word32*)(b+ 8)) = *((const word32*)T5[temp[2][0]]) - ^ *((const word32*)T6[temp[1][1]]) - ^ *((const word32*)T7[temp[0][2]]) - ^ *((const word32*)T8[temp[3][3]]); - *((word32*)(b+12)) = *((const word32*)T5[temp[3][0]]) - ^ *((const word32*)T6[temp[2][1]]) - ^ *((const word32*)T7[temp[1][2]]) - ^ *((const word32*)T8[temp[0][3]]); - } - /* last round is special */ - *((word32*)temp[0]) = *((word32*)(b )) ^ *((word32*)rk[1][0]); - *((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[1][1]); - *((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[1][2]); - *((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[1][3]); - b[ 0] = S5[temp[0][0]]; - b[ 1] = S5[temp[3][1]]; - b[ 2] = S5[temp[2][2]]; - b[ 3] = S5[temp[1][3]]; - b[ 4] = S5[temp[1][0]]; - b[ 5] = S5[temp[0][1]]; - b[ 6] = S5[temp[3][2]]; - b[ 7] = S5[temp[2][3]]; - b[ 8] = S5[temp[2][0]]; - b[ 9] = S5[temp[1][1]]; - b[10] = S5[temp[0][2]]; - b[11] = S5[temp[3][3]]; - b[12] = S5[temp[3][0]]; - b[13] = S5[temp[2][1]]; - b[14] = S5[temp[1][2]]; - b[15] = S5[temp[0][3]]; - *((word32*)(b )) ^= *((word32*)rk[0][0]); - *((word32*)(b+ 4)) ^= *((word32*)rk[0][1]); - *((word32*)(b+ 8)) ^= *((word32*)rk[0][2]); - *((word32*)(b+12)) ^= *((word32*)rk[0][3]); - - memcpy(out, b, sizeof b /* XXX out */); - - return 0; -#undef a -#undef b -#undef temp -} - - -#ifdef INTERMEDIATE_VALUE_KAT -/** - * Decrypt only a certain number of rounds. - * Only used in the Intermediate Value Known Answer Test. - * Operations rearranged such that the intermediate values - * of decryption correspond with the intermediate values - * of encryption. - */ -int rijndaelDecryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) { - int r, i; - word8 temp[4], shift; - - /* make number of rounds sane */ - if (rounds > ROUNDS) { - rounds = ROUNDS; - } - /* first round is special: */ - *(word32 *)a[0] ^= *(word32 *)rk[ROUNDS][0]; - *(word32 *)a[1] ^= *(word32 *)rk[ROUNDS][1]; - *(word32 *)a[2] ^= *(word32 *)rk[ROUNDS][2]; - *(word32 *)a[3] ^= *(word32 *)rk[ROUNDS][3]; - for (i = 0; i < 4; i++) { - a[i][0] = Si[a[i][0]]; - a[i][1] = Si[a[i][1]]; - a[i][2] = Si[a[i][2]]; - a[i][3] = Si[a[i][3]]; - } - for (i = 1; i < 4; i++) { - shift = (4 - i) & 3; - temp[0] = a[(0 + shift) & 3][i]; - temp[1] = a[(1 + shift) & 3][i]; - temp[2] = a[(2 + shift) & 3][i]; - temp[3] = a[(3 + shift) & 3][i]; - a[0][i] = temp[0]; - a[1][i] = temp[1]; - a[2][i] = temp[2]; - a[3][i] = temp[3]; - } - /* ROUNDS-1 ordinary rounds */ - for (r = ROUNDS-1; r > rounds; r--) { - *(word32 *)a[0] ^= *(word32 *)rk[r][0]; - *(word32 *)a[1] ^= *(word32 *)rk[r][1]; - *(word32 *)a[2] ^= *(word32 *)rk[r][2]; - *(word32 *)a[3] ^= *(word32 *)rk[r][3]; - - *((word32*)a[0]) = - *((word32*)U1[a[0][0]]) - ^ *((word32*)U2[a[0][1]]) - ^ *((word32*)U3[a[0][2]]) - ^ *((word32*)U4[a[0][3]]); - - *((word32*)a[1]) = - *((word32*)U1[a[1][0]]) - ^ *((word32*)U2[a[1][1]]) - ^ *((word32*)U3[a[1][2]]) - ^ *((word32*)U4[a[1][3]]); - - *((word32*)a[2]) = - *((word32*)U1[a[2][0]]) - ^ *((word32*)U2[a[2][1]]) - ^ *((word32*)U3[a[2][2]]) - ^ *((word32*)U4[a[2][3]]); - - *((word32*)a[3]) = - *((word32*)U1[a[3][0]]) - ^ *((word32*)U2[a[3][1]]) - ^ *((word32*)U3[a[3][2]]) - ^ *((word32*)U4[a[3][3]]); - for (i = 0; i < 4; i++) { - a[i][0] = Si[a[i][0]]; - a[i][1] = Si[a[i][1]]; - a[i][2] = Si[a[i][2]]; - a[i][3] = Si[a[i][3]]; - } - for (i = 1; i < 4; i++) { - shift = (4 - i) & 3; - temp[0] = a[(0 + shift) & 3][i]; - temp[1] = a[(1 + shift) & 3][i]; - temp[2] = a[(2 + shift) & 3][i]; - temp[3] = a[(3 + shift) & 3][i]; - a[0][i] = temp[0]; - a[1][i] = temp[1]; - a[2][i] = temp[2]; - a[3][i] = temp[3]; - } - } - if (rounds == 0) { - /* End with the extra key addition */ - *(word32 *)a[0] ^= *(word32 *)rk[0][0]; - *(word32 *)a[1] ^= *(word32 *)rk[0][1]; - *(word32 *)a[2] ^= *(word32 *)rk[0][2]; - *(word32 *)a[3] ^= *(word32 *)rk[0][3]; - } - return 0; -} -#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/bsd/crypto/rijndael/rijndael-alg-fst.h b/bsd/crypto/rijndael/rijndael-alg-fst.h deleted file mode 100644 index 811ce60d1..000000000 --- a/bsd/crypto/rijndael/rijndael-alg-fst.h +++ /dev/null @@ -1,34 +0,0 @@ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael-alg-fst.h,v 1.2.2.1 2001/07/03 11:01:35 ume Exp $ */ -/* $KAME: rijndael-alg-fst.h,v 1.4 2000/10/02 17:14:26 itojun Exp $ */ - -/* - * rijndael-alg-fst.h v2.3 April '2000 - * - * Optimised ANSI C code - * - * #define INTERMEDIATE_VALUE_KAT to generate the Intermediate Value Known Answer Test. - */ - -#ifndef __RIJNDAEL_ALG_FST_H -#define __RIJNDAEL_ALG_FST_H - -#define RIJNDAEL_MAXKC (256/32) -#define RIJNDAEL_MAXROUNDS 14 - -int rijndaelKeySched(u_int8_t k[RIJNDAEL_MAXKC][4], u_int8_t rk[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS); - -int rijndaelKeyEncToDec(u_int8_t W[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS); - -int rijndaelEncrypt(u_int8_t a[16], u_int8_t b[16], u_int8_t rk[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS); - -#ifdef INTERMEDIATE_VALUE_KAT -int rijndaelEncryptRound(u_int8_t a[4][4], u_int8_t rk[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS, int rounds); -#endif /* INTERMEDIATE_VALUE_KAT */ - -int rijndaelDecrypt(u_int8_t a[16], u_int8_t b[16], u_int8_t rk[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS); - -#ifdef INTERMEDIATE_VALUE_KAT -int rijndaelDecryptRound(u_int8_t a[4][4], u_int8_t rk[RIJNDAEL_MAXROUNDS+1][4][4], int ROUNDS, int rounds); -#endif /* INTERMEDIATE_VALUE_KAT */ - -#endif /* __RIJNDAEL_ALG_FST_H */ diff --git a/bsd/crypto/rijndael/rijndael-api-fst.c b/bsd/crypto/rijndael/rijndael-api-fst.c deleted file mode 100644 index 295bab387..000000000 --- a/bsd/crypto/rijndael/rijndael-api-fst.c +++ /dev/null @@ -1,484 +0,0 @@ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael-api-fst.c,v 1.2.2.1 2001/07/03 11:01:35 ume Exp $ */ -/* $KAME: rijndael-api-fst.c,v 1.10 2001/05/27 09:34:18 itojun Exp $ */ - -/* - * rijndael-api-fst.c v2.3 April '2000 - * - * Optimised ANSI C code - * - * authors: v1.0: Antoon Bosselaers - * v2.0: Vincent Rijmen - * v2.1: Vincent Rijmen - * v2.2: Vincent Rijmen - * v2.3: Paulo Barreto - * v2.4: Vincent Rijmen - * - * This code is placed in the public domain. - */ - -#include -#include -#ifdef KERNEL -#include -#else -#include -#endif -#include -#include -#include - -int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) { - word8 k[MAXKC][4]; - int i; - char *keyMat; - - if (key == NULL) { - return BAD_KEY_INSTANCE; - } - - if ((direction == DIR_ENCRYPT) || (direction == DIR_DECRYPT)) { - key->direction = direction; - } else { - return BAD_KEY_DIR; - } - - if ((keyLen == 128) || (keyLen == 192) || (keyLen == 256)) { - key->keyLen = keyLen; - } else { - return BAD_KEY_MAT; - } - - if (keyMaterial != NULL) { - bcopy(keyMaterial, key->keyMaterial, keyLen/8); - } - - key->ROUNDS = keyLen/32 + 6; - - /* initialize key schedule: */ - keyMat = key->keyMaterial; - for (i = 0; i < key->keyLen/8; i++) { - k[i >> 2][i & 3] = (word8)keyMat[i]; - } - rijndaelKeySched(k, key->keySched, key->ROUNDS); - if (direction == DIR_DECRYPT) { - rijndaelKeyEncToDec(key->keySched, key->ROUNDS); - } - - return TRUE; -} - -int rijndael_cipherInit(cipherInstance *cipher, BYTE mode, char *IV) { - if ((mode == MODE_ECB) || (mode == MODE_CBC) || (mode == MODE_CFB1)) { - cipher->mode = mode; - } else { - return BAD_CIPHER_MODE; - } - if (IV != NULL) { - bcopy(IV, cipher->IV, MAX_IV_SIZE); - } else { - bzero(cipher->IV, MAX_IV_SIZE); - } - return TRUE; -} - -int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { - int i, k, numBlocks; - word8 block[16], iv[4][4]; - - if (cipher == NULL || - key == NULL || - key->direction == DIR_DECRYPT) { - return BAD_CIPHER_STATE; - } - if (input == NULL || inputLen <= 0) { - return 0; /* nothing to do */ - } - - numBlocks = inputLen/128; - - switch (cipher->mode) { - case MODE_ECB: - for (i = numBlocks; i > 0; i--) { - rijndaelEncrypt(input, outBuffer, key->keySched, key->ROUNDS); - input += 16; - outBuffer += 16; - } - break; - - case MODE_CBC: -#if 1 /*STRICT_ALIGN*/ - bcopy(cipher->IV, block, 16); - bcopy(input, iv, 16); - ((word32*)block)[0] ^= ((word32*)iv)[0]; - ((word32*)block)[1] ^= ((word32*)iv)[1]; - ((word32*)block)[2] ^= ((word32*)iv)[2]; - ((word32*)block)[3] ^= ((word32*)iv)[3]; -#else - ((word32*)block)[0] = ((word32*)cipher->IV)[0] ^ ((word32*)input)[0]; - ((word32*)block)[1] = ((word32*)cipher->IV)[1] ^ ((word32*)input)[1]; - ((word32*)block)[2] = ((word32*)cipher->IV)[2] ^ ((word32*)input)[2]; - ((word32*)block)[3] = ((word32*)cipher->IV)[3] ^ ((word32*)input)[3]; -#endif - rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS); - input += 16; - for (i = numBlocks - 1; i > 0; i--) { -#if 1 /*STRICT_ALIGN*/ - bcopy(outBuffer, block, 16); - ((word32*)block)[0] ^= ((word32*)iv)[0]; - ((word32*)block)[1] ^= ((word32*)iv)[1]; - ((word32*)block)[2] ^= ((word32*)iv)[2]; - ((word32*)block)[3] ^= ((word32*)iv)[3]; -#else - ((word32*)block)[0] = ((word32*)outBuffer)[0] ^ ((word32*)input)[0]; - ((word32*)block)[1] = ((word32*)outBuffer)[1] ^ ((word32*)input)[1]; - ((word32*)block)[2] = ((word32*)outBuffer)[2] ^ ((word32*)input)[2]; - ((word32*)block)[3] = ((word32*)outBuffer)[3] ^ ((word32*)input)[3]; -#endif - outBuffer += 16; - rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS); - input += 16; - } - break; - - case MODE_CFB1: -#if 1 /*STRICT_ALIGN*/ - bcopy(cipher->IV, iv, 16); -#else /* !STRICT_ALIGN */ - *((word32*)iv[0]) = *((word32*)(cipher->IV )); - *((word32*)iv[1]) = *((word32*)(cipher->IV+ 4)); - *((word32*)iv[2]) = *((word32*)(cipher->IV+ 8)); - *((word32*)iv[3]) = *((word32*)(cipher->IV+12)); -#endif /* ?STRICT_ALIGN */ - for (i = numBlocks; i > 0; i--) { - for (k = 0; k < 128; k++) { - *((word32*) block ) = *((word32*)iv[0]); - *((word32*)(block+ 4)) = *((word32*)iv[1]); - *((word32*)(block+ 8)) = *((word32*)iv[2]); - *((word32*)(block+12)) = *((word32*)iv[3]); - rijndaelEncrypt(block, block, key->keySched, key->ROUNDS); - outBuffer[k/8] ^= (block[0] & 0x80) >> (k & 7); - iv[0][0] = (iv[0][0] << 1) | (iv[0][1] >> 7); - iv[0][1] = (iv[0][1] << 1) | (iv[0][2] >> 7); - iv[0][2] = (iv[0][2] << 1) | (iv[0][3] >> 7); - iv[0][3] = (iv[0][3] << 1) | (iv[1][0] >> 7); - iv[1][0] = (iv[1][0] << 1) | (iv[1][1] >> 7); - iv[1][1] = (iv[1][1] << 1) | (iv[1][2] >> 7); - iv[1][2] = (iv[1][2] << 1) | (iv[1][3] >> 7); - iv[1][3] = (iv[1][3] << 1) | (iv[2][0] >> 7); - iv[2][0] = (iv[2][0] << 1) | (iv[2][1] >> 7); - iv[2][1] = (iv[2][1] << 1) | (iv[2][2] >> 7); - iv[2][2] = (iv[2][2] << 1) | (iv[2][3] >> 7); - iv[2][3] = (iv[2][3] << 1) | (iv[3][0] >> 7); - iv[3][0] = (iv[3][0] << 1) | (iv[3][1] >> 7); - iv[3][1] = (iv[3][1] << 1) | (iv[3][2] >> 7); - iv[3][2] = (iv[3][2] << 1) | (iv[3][3] >> 7); - iv[3][3] = (iv[3][3] << 1) | ((outBuffer[k/8] >> (7-(k&7))) & 1); - } - } - break; - - default: - return BAD_CIPHER_STATE; - } - - return 128*numBlocks; -} - -/** - * Encrypt data partitioned in octets, using RFC 2040-like padding. - * - * @param input data to be encrypted (octet sequence) - * @param inputOctets input length in octets (not bits) - * @param outBuffer encrypted output data - * - * @return length in octets (not bits) of the encrypted output buffer. - */ -int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { - int i, numBlocks, padLen; - word8 block[16], *iv, *cp; - - if (cipher == NULL || - key == NULL || - key->direction == DIR_DECRYPT) { - return BAD_CIPHER_STATE; - } - if (input == NULL || inputOctets <= 0) { - return 0; /* nothing to do */ - } - - numBlocks = inputOctets/16; - - switch (cipher->mode) { - case MODE_ECB: - for (i = numBlocks; i > 0; i--) { - rijndaelEncrypt(input, outBuffer, key->keySched, key->ROUNDS); - input += 16; - outBuffer += 16; - } - padLen = 16 - (inputOctets - 16*numBlocks); - if (padLen > 0 && padLen <= 16) - panic("rijndael_padEncrypt(ECB)"); - bcopy(input, block, 16 - padLen); - for (cp = block + 16 - padLen; cp < block + 16; cp++) - *cp = padLen; - rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS); - break; - - case MODE_CBC: - iv = cipher->IV; - for (i = numBlocks; i > 0; i--) { - ((word32*)block)[0] = ((word32*)input)[0] ^ ((word32*)iv)[0]; - ((word32*)block)[1] = ((word32*)input)[1] ^ ((word32*)iv)[1]; - ((word32*)block)[2] = ((word32*)input)[2] ^ ((word32*)iv)[2]; - ((word32*)block)[3] = ((word32*)input)[3] ^ ((word32*)iv)[3]; - rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS); - iv = outBuffer; - input += 16; - outBuffer += 16; - } - padLen = 16 - (inputOctets - 16*numBlocks); - if (padLen > 0 && padLen <= 16) - panic("rijndael_padEncrypt(CBC)"); - for (i = 0; i < 16 - padLen; i++) { - block[i] = input[i] ^ iv[i]; - } - for (i = 16 - padLen; i < 16; i++) { - block[i] = (BYTE)padLen ^ iv[i]; - } - rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS); - break; - - default: - return BAD_CIPHER_STATE; - } - - return 16*(numBlocks + 1); -} - -int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer) { - int i, k, numBlocks; - word8 block[16], iv[4][4]; - - if (cipher == NULL || - key == NULL || - (cipher->mode != MODE_CFB1 && key->direction == DIR_ENCRYPT)) { - return BAD_CIPHER_STATE; - } - if (input == NULL || inputLen <= 0) { - return 0; /* nothing to do */ - } - - numBlocks = inputLen/128; - - switch (cipher->mode) { - case MODE_ECB: - for (i = numBlocks; i > 0; i--) { - rijndaelDecrypt(input, outBuffer, key->keySched, key->ROUNDS); - input += 16; - outBuffer += 16; - } - break; - - case MODE_CBC: -#if 1 /*STRICT_ALIGN */ - bcopy(cipher->IV, iv, 16); -#else - *((word32*)iv[0]) = *((word32*)(cipher->IV )); - *((word32*)iv[1]) = *((word32*)(cipher->IV+ 4)); - *((word32*)iv[2]) = *((word32*)(cipher->IV+ 8)); - *((word32*)iv[3]) = *((word32*)(cipher->IV+12)); -#endif - for (i = numBlocks; i > 0; i--) { - rijndaelDecrypt(input, block, key->keySched, key->ROUNDS); - ((word32*)block)[0] ^= *((word32*)iv[0]); - ((word32*)block)[1] ^= *((word32*)iv[1]); - ((word32*)block)[2] ^= *((word32*)iv[2]); - ((word32*)block)[3] ^= *((word32*)iv[3]); -#if 1 /*STRICT_ALIGN*/ - bcopy(input, iv, 16); - bcopy(block, outBuffer, 16); -#else - *((word32*)iv[0]) = ((word32*)input)[0]; ((word32*)outBuffer)[0] = ((word32*)block)[0]; - *((word32*)iv[1]) = ((word32*)input)[1]; ((word32*)outBuffer)[1] = ((word32*)block)[1]; - *((word32*)iv[2]) = ((word32*)input)[2]; ((word32*)outBuffer)[2] = ((word32*)block)[2]; - *((word32*)iv[3]) = ((word32*)input)[3]; ((word32*)outBuffer)[3] = ((word32*)block)[3]; -#endif - input += 16; - outBuffer += 16; - } - break; - - case MODE_CFB1: -#if 1 /*STRICT_ALIGN */ - bcopy(cipher->IV, iv, 16); -#else - *((word32*)iv[0]) = *((word32*)(cipher->IV)); - *((word32*)iv[1]) = *((word32*)(cipher->IV+ 4)); - *((word32*)iv[2]) = *((word32*)(cipher->IV+ 8)); - *((word32*)iv[3]) = *((word32*)(cipher->IV+12)); -#endif - for (i = numBlocks; i > 0; i--) { - for (k = 0; k < 128; k++) { - *((word32*) block ) = *((word32*)iv[0]); - *((word32*)(block+ 4)) = *((word32*)iv[1]); - *((word32*)(block+ 8)) = *((word32*)iv[2]); - *((word32*)(block+12)) = *((word32*)iv[3]); - rijndaelEncrypt(block, block, key->keySched, key->ROUNDS); - iv[0][0] = (iv[0][0] << 1) | (iv[0][1] >> 7); - iv[0][1] = (iv[0][1] << 1) | (iv[0][2] >> 7); - iv[0][2] = (iv[0][2] << 1) | (iv[0][3] >> 7); - iv[0][3] = (iv[0][3] << 1) | (iv[1][0] >> 7); - iv[1][0] = (iv[1][0] << 1) | (iv[1][1] >> 7); - iv[1][1] = (iv[1][1] << 1) | (iv[1][2] >> 7); - iv[1][2] = (iv[1][2] << 1) | (iv[1][3] >> 7); - iv[1][3] = (iv[1][3] << 1) | (iv[2][0] >> 7); - iv[2][0] = (iv[2][0] << 1) | (iv[2][1] >> 7); - iv[2][1] = (iv[2][1] << 1) | (iv[2][2] >> 7); - iv[2][2] = (iv[2][2] << 1) | (iv[2][3] >> 7); - iv[2][3] = (iv[2][3] << 1) | (iv[3][0] >> 7); - iv[3][0] = (iv[3][0] << 1) | (iv[3][1] >> 7); - iv[3][1] = (iv[3][1] << 1) | (iv[3][2] >> 7); - iv[3][2] = (iv[3][2] << 1) | (iv[3][3] >> 7); - iv[3][3] = (iv[3][3] << 1) | ((input[k/8] >> (7-(k&7))) & 1); - outBuffer[k/8] ^= (block[0] & 0x80) >> (k & 7); - } - } - break; - - default: - return BAD_CIPHER_STATE; - } - - return 128*numBlocks; -} - -int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputOctets, BYTE *outBuffer) { - int i, numBlocks, padLen; - word8 block[16]; - word32 iv[4]; - - if (cipher == NULL || - key == NULL || - key->direction == DIR_ENCRYPT) { - return BAD_CIPHER_STATE; - } - if (input == NULL || inputOctets <= 0) { - return 0; /* nothing to do */ - } - if (inputOctets % 16 != 0) { - return BAD_DATA; - } - - numBlocks = inputOctets/16; - - switch (cipher->mode) { - case MODE_ECB: - /* all blocks but last */ - for (i = numBlocks - 1; i > 0; i--) { - rijndaelDecrypt(input, outBuffer, key->keySched, key->ROUNDS); - input += 16; - outBuffer += 16; - } - /* last block */ - rijndaelDecrypt(input, block, key->keySched, key->ROUNDS); - padLen = block[15]; - if (padLen >= 16) { - return BAD_DATA; - } - for (i = 16 - padLen; i < 16; i++) { - if (block[i] != padLen) { - return BAD_DATA; - } - } - bcopy(block, outBuffer, 16 - padLen); - break; - - case MODE_CBC: - bcopy(cipher->IV, iv, 16); - /* all blocks but last */ - for (i = numBlocks - 1; i > 0; i--) { - rijndaelDecrypt(input, block, key->keySched, key->ROUNDS); - ((word32*)block)[0] ^= iv[0]; - ((word32*)block)[1] ^= iv[1]; - ((word32*)block)[2] ^= iv[2]; - ((word32*)block)[3] ^= iv[3]; - bcopy(input, iv, 16); - bcopy(block, outBuffer, 16); - input += 16; - outBuffer += 16; - } - /* last block */ - rijndaelDecrypt(input, block, key->keySched, key->ROUNDS); - ((word32*)block)[0] ^= iv[0]; - ((word32*)block)[1] ^= iv[1]; - ((word32*)block)[2] ^= iv[2]; - ((word32*)block)[3] ^= iv[3]; - padLen = block[15]; - if (padLen <= 0 || padLen > 16) { - return BAD_DATA; - } - for (i = 16 - padLen; i < 16; i++) { - if (block[i] != padLen) { - return BAD_DATA; - } - } - bcopy(block, outBuffer, 16 - padLen); - break; - - default: - return BAD_CIPHER_STATE; - } - - return 16*numBlocks - padLen; -} - -#ifdef INTERMEDIATE_VALUE_KAT -/** - * cipherUpdateRounds: - * - * Encrypts/Decrypts exactly one full block a specified number of rounds. - * Only used in the Intermediate Value Known Answer Test. - * - * Returns: - * TRUE - on success - * BAD_CIPHER_STATE - cipher in bad state (e.g., not initialized) - */ -int rijndael_cipherUpdateRounds(cipherInstance *cipher, keyInstance *key, - BYTE *input, int inputLen, BYTE *outBuffer, int rounds) { - int j; - word8 block[4][4]; - - if (cipher == NULL || key == NULL) { - return BAD_CIPHER_STATE; - } - - for (j = 3; j >= 0; j--) { - /* parse input stream into rectangular array */ - *((word32*)block[j]) = *((word32*)(input+4*j)); - } - - switch (key->direction) { - case DIR_ENCRYPT: - rijndaelEncryptRound(block, key->keySched, key->ROUNDS, rounds); - break; - - case DIR_DECRYPT: - rijndaelDecryptRound(block, key->keySched, key->ROUNDS, rounds); - break; - - default: - return BAD_KEY_DIR; - } - - for (j = 3; j >= 0; j--) { - /* parse rectangular array into output ciphertext bytes */ - *((word32*)(outBuffer+4*j)) = *((word32*)block[j]); - } - - return TRUE; -} -#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/bsd/crypto/rijndael/rijndael-api-fst.h b/bsd/crypto/rijndael/rijndael-api-fst.h deleted file mode 100644 index 682f2da69..000000000 --- a/bsd/crypto/rijndael/rijndael-api-fst.h +++ /dev/null @@ -1,104 +0,0 @@ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael-api-fst.h,v 1.2.2.1 2001/07/03 11:01:36 ume Exp $ */ -/* $KAME: rijndael-api-fst.h,v 1.6 2001/05/27 00:23:23 itojun Exp $ */ - -/* - * rijndael-api-fst.h v2.3 April '2000 - * - * Optimised ANSI C code - * - * #define INTERMEDIATE_VALUE_KAT to generate the Intermediate Value Known Answer Test. - */ - -#ifndef __RIJNDAEL_API_FST_H -#define __RIJNDAEL_API_FST_H - -#include - -/* Defines: - Add any additional defines you need -*/ - -#define DIR_ENCRYPT 0 /* Are we encrpyting? */ -#define DIR_DECRYPT 1 /* Are we decrpyting? */ -#define MODE_ECB 1 /* Are we ciphering in ECB mode? */ -#define MODE_CBC 2 /* Are we ciphering in CBC mode? */ -#define MODE_CFB1 3 /* Are we ciphering in 1-bit CFB mode? */ -#define TRUE 1 -#define FALSE 0 -#define BITSPERBLOCK 128 /* Default number of bits in a cipher block */ - -/* Error Codes - CHANGE POSSIBLE: inclusion of additional error codes */ -#define BAD_KEY_DIR -1 /* Key direction is invalid, e.g., unknown value */ -#define BAD_KEY_MAT -2 /* Key material not of correct length */ -#define BAD_KEY_INSTANCE -3 /* Key passed is not valid */ -#define BAD_CIPHER_MODE -4 /* Params struct passed to cipherInit invalid */ -#define BAD_CIPHER_STATE -5 /* Cipher in wrong state (e.g., not initialized) */ -#define BAD_BLOCK_LENGTH -6 -#define BAD_CIPHER_INSTANCE -7 -#define BAD_DATA -8 /* Data contents are invalid, e.g., invalid padding */ -#define BAD_OTHER -9 /* Unknown error */ - -/* CHANGE POSSIBLE: inclusion of algorithm specific defines */ -#define MAX_KEY_SIZE 64 /* # of ASCII char's needed to represent a key */ -#define MAX_IV_SIZE 16 /* # bytes needed to represent an IV */ - -/* Typedefs: - - Typedef'ed data storage elements. Add any algorithm specific -parameters at the bottom of the structs as appropriate. -*/ - -/* The structure for key information */ -typedef struct { - u_int8_t direction; /* Key used for encrypting or decrypting? */ - int keyLen; /* Length of the key */ - char keyMaterial[MAX_KEY_SIZE+1]; /* Raw key data in ASCII, e.g., user input or KAT values */ - /* The following parameters are algorithm dependent, replace or add as necessary */ - int ROUNDS; /* key-length-dependent number of rounds */ - int blockLen; /* block length */ - union { - u_int8_t xkS8[RIJNDAEL_MAXROUNDS+1][4][4]; /* key schedule */ - u_int32_t xkS32[RIJNDAEL_MAXROUNDS+1][4]; /* key schedule */ - } xKeySched; -#define keySched xKeySched.xkS8 -} keyInstance; - -/* The structure for cipher information */ -typedef struct { /* changed order of the components */ - u_int8_t mode; /* MODE_ECB, MODE_CBC, or MODE_CFB1 */ - u_int8_t IV[MAX_IV_SIZE]; /* A possible Initialization Vector for ciphering */ - /* Add any algorithm specific parameters needed here */ - int blockLen; /* Sample: Handles non-128 bit block sizes (if available) */ -} cipherInstance; - -/* Function prototypes */ -/* CHANGED: nothing - TODO: implement the following extensions to setup 192-bit and 256-bit block lengths: - makeKeyEx(): parameter blockLen added - -- this parameter is absolutely necessary if you want to - setup the round keys in a variable block length setting - cipherInitEx(): parameter blockLen added (for obvious reasons) - */ - -int rijndael_makeKey(keyInstance *key, u_int8_t direction, int keyLen, char *keyMaterial); - -int rijndael_cipherInit(cipherInstance *cipher, u_int8_t mode, char *IV); - -int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, - u_int8_t *input, int inputLen, u_int8_t *outBuffer); - -int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, - u_int8_t *input, int inputOctets, u_int8_t *outBuffer); - -int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, - u_int8_t *input, int inputLen, u_int8_t *outBuffer); - -int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, - u_int8_t *input, int inputOctets, u_int8_t *outBuffer); - -#ifdef INTERMEDIATE_VALUE_KAT -int rijndael_cipherUpdateRounds(cipherInstance *cipher, keyInstance *key, - u_int8_t *input, int inputLen, u_int8_t *outBuffer, int Rounds); -#endif /* INTERMEDIATE_VALUE_KAT */ - -#endif /* __RIJNDAEL_API_FST_H */ diff --git a/bsd/crypto/rijndael/rijndael.h b/bsd/crypto/rijndael/rijndael.h deleted file mode 100644 index 8dafa3b71..000000000 --- a/bsd/crypto/rijndael/rijndael.h +++ /dev/null @@ -1,4 +0,0 @@ -/* $KAME: rijndael.h,v 1.2 2000/10/02 17:14:27 itojun Exp $ */ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael.h,v 1.1.1.1.2.1 2001/07/03 11:01:36 ume Exp $ */ - -#include diff --git a/bsd/crypto/rijndael/rijndael_local.h b/bsd/crypto/rijndael/rijndael_local.h deleted file mode 100644 index 81e79604a..000000000 --- a/bsd/crypto/rijndael/rijndael_local.h +++ /dev/null @@ -1,11 +0,0 @@ -/* $KAME: rijndael_local.h,v 1.3 2000/10/02 17:14:27 itojun Exp $ */ -/* $FreeBSD: src/sys/crypto/rijndael/rijndael_local.h,v 1.3.2.1 2001/07/03 11:01:36 ume Exp $ */ - -/* the file should not be used from outside */ -typedef u_int8_t BYTE; -typedef u_int8_t word8; -typedef u_int16_t word16; -typedef u_int32_t word32; - -#define MAXKC RIJNDAEL_MAXKC -#define MAXROUNDS RIJNDAEL_MAXROUNDS diff --git a/bsd/crypto/sha1.c b/bsd/crypto/sha1.c index c5c7b27cd..cf6bbe72a 100644 --- a/bsd/crypto/sha1.c +++ b/bsd/crypto/sha1.c @@ -84,7 +84,7 @@ static u_int32_t _K[] = { 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 }; sha1_step(ctxt); \ } -static void sha1_step __P((struct sha1_ctxt *)); +static void sha1_step(struct sha1_ctxt *); static void sha1_step(ctxt) diff --git a/bsd/crypto/sha1.h b/bsd/crypto/sha1.h index a3ee2d834..f5dbac6eb 100644 --- a/bsd/crypto/sha1.h +++ b/bsd/crypto/sha1.h @@ -55,10 +55,10 @@ struct sha1_ctxt { }; #ifdef KERNEL -extern void sha1_init __P((struct sha1_ctxt *)); -extern void sha1_pad __P((struct sha1_ctxt *)); -extern void sha1_loop __P((struct sha1_ctxt *, const u_int8_t *, size_t)); -extern void sha1_result __P((struct sha1_ctxt *, caddr_t)); +extern void sha1_init(struct sha1_ctxt *); +extern void sha1_pad(struct sha1_ctxt *); +extern void sha1_loop(struct sha1_ctxt *, const u_int8_t *, size_t); +extern void sha1_result(struct sha1_ctxt *, caddr_t); /* compatibilty with other SHA1 source codes */ typedef struct sha1_ctxt SHA1_CTX; diff --git a/bsd/crypto/sha2/Makefile b/bsd/crypto/sha2/Makefile index 95aff4dee..72820c951 100644 --- a/bsd/crypto/sha2/Makefile +++ b/bsd/crypto/sha2/Makefile @@ -26,7 +26,7 @@ INSTALL_MI_DIR = crypto EXPORT_MI_DIR = ${INSTALL_MI_DIR} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/crypto/sha2/sha2.h b/bsd/crypto/sha2/sha2.h index 1f063d26e..3997e63f1 100644 --- a/bsd/crypto/sha2/sha2.h +++ b/bsd/crypto/sha2/sha2.h @@ -115,23 +115,23 @@ typedef SHA512_CTX SHA384_CTX; /*** SHA-256/384/512 Function Prototypes ******************************/ -void SHA256_Init __P((SHA256_CTX *)); -void SHA256_Update __P((SHA256_CTX*, const u_int8_t*, size_t)); -void SHA256_Final __P((u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*)); -char* SHA256_End __P((SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH])); -char* SHA256_Data __P((const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH])); - -void SHA384_Init __P((SHA384_CTX*)); -void SHA384_Update __P((SHA384_CTX*, const u_int8_t*, size_t)); -void SHA384_Final __P((u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*)); -char* SHA384_End __P((SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH])); -char* SHA384_Data __P((const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH])); - -void SHA512_Init __P((SHA512_CTX*)); -void SHA512_Update __P((SHA512_CTX*, const u_int8_t*, size_t)); -void SHA512_Final __P((u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*)); -char* SHA512_End __P((SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH])); -char* SHA512_Data __P((const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH])); +void SHA256_Init(SHA256_CTX *); +void SHA256_Update(SHA256_CTX*, const u_int8_t*, size_t); +void SHA256_Final(u_int8_t[SHA256_DIGEST_LENGTH], SHA256_CTX*); +char* SHA256_End(SHA256_CTX*, char[SHA256_DIGEST_STRING_LENGTH]); +char* SHA256_Data(const u_int8_t*, size_t, char[SHA256_DIGEST_STRING_LENGTH]); + +void SHA384_Init(SHA384_CTX*); +void SHA384_Update(SHA384_CTX*, const u_int8_t*, size_t); +void SHA384_Final(u_int8_t[SHA384_DIGEST_LENGTH], SHA384_CTX*); +char* SHA384_End(SHA384_CTX*, char[SHA384_DIGEST_STRING_LENGTH]); +char* SHA384_Data(const u_int8_t*, size_t, char[SHA384_DIGEST_STRING_LENGTH]); + +void SHA512_Init(SHA512_CTX*); +void SHA512_Update(SHA512_CTX*, const u_int8_t*, size_t); +void SHA512_Final(u_int8_t[SHA512_DIGEST_LENGTH], SHA512_CTX*); +char* SHA512_End(SHA512_CTX*, char[SHA512_DIGEST_STRING_LENGTH]); +char* SHA512_Data(const u_int8_t*, size_t, char[SHA512_DIGEST_STRING_LENGTH]); #ifdef __cplusplus } diff --git a/bsd/dev/Makefile b/bsd/dev/Makefile index 8e8cf9321..66b0e557c 100644 --- a/bsd/dev/Makefile +++ b/bsd/dev/Makefile @@ -19,14 +19,11 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ -DATAFILES = \ - disk.h disk_label.h kmreg_com.h - -INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_LIST = INSTALL_MI_DIR = dev -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = EXPORT_MI_DIR = dev diff --git a/bsd/dev/i386/conf.c b/bsd/dev/i386/conf.c index 1d872abbd..0e5ea99b4 100644 --- a/bsd/dev/i386/conf.c +++ b/bsd/dev/i386/conf.c @@ -35,13 +35,15 @@ #include #include -#include #include #include #include - -extern int nulldev(); +/* Prototypes that should be elsewhere: */ +extern int isdisk(dev_t dev, int type); +extern dev_t chrtoblk(dev_t dev); +extern int chrtoblk_set(int cdev, int bdev); +extern int iskmemdev(dev_t dev); struct bdevsw bdevsw[] = { @@ -89,32 +91,62 @@ struct bdevsw bdevsw[] = int nblkdev = sizeof (bdevsw) / sizeof (bdevsw[0]); extern struct tty *km_tty[]; -extern int cnopen(),cnclose(),cnread(),cnwrite(),cnioctl(), - cnselect(),cngetc(), cnputc(dev_t dev, char c); -extern int kmopen(),kmclose(),kmread(),kmwrite(),kmioctl(), - kmgetc(), kmputc(dev_t dev, char c); -extern int sgopen(),sgclose(), sgioctl(); +extern d_open_t cnopen; +extern d_close_t cnclose; +extern d_read_t cnread; +extern d_write_t cnwrite; +extern d_ioctl_t cnioctl; +extern d_select_t cnselect; +extern d_getc_t cngetc; +extern d_putc_t cnputc; +extern d_open_t kmopen; +extern d_close_t kmclose; +extern d_read_t kmread; +extern d_write_t kmwrite; +extern d_ioctl_t kmioctl; +extern d_getc_t kmgetc; +extern d_putc_t kmputc; +extern d_open_t sgopen; +extern d_close_t sgclose; +extern d_ioctl_t sgioctl; #if NVOL > 0 -extern int volopen(),volclose(),volioctl(); +extern d_open_t volopen; +extern d_close_t volclose; +extern d_ioctl_t volioctl; #else #define volopen eno_opcl #define volclose eno_opcl #define volioctl eno_ioctl #endif -extern int cttyopen(), cttyread(), cttywrite(), cttyioctl(), cttyselect(); +extern d_open_t cttyopen; +extern d_read_t cttyread; +extern d_write_t cttywrite; +extern d_ioctl_t cttyioctl; +extern d_select_t cttyselect; -extern int mmread(),mmwrite(); +extern d_read_t mmread; +extern d_write_t mmwrite; +extern d_ioctl_t mmioctl; #define mmselect (select_fcn_t *)seltrue #define mmmmap eno_mmap #include #if NPTY > 0 extern struct tty *pt_tty[]; -extern int ptsopen(),ptsclose(),ptsread(),ptswrite(),ptsstop(),ptsputc(); -extern int ptcopen(),ptcclose(),ptcread(),ptcwrite(),ptcselect(), - ptyioctl(); +extern d_open_t ptsopen; +extern d_close_t ptsclose; +extern d_read_t ptsread; +extern d_write_t ptswrite; +extern d_stop_t ptsstop; +extern d_putc_t ptsputc; +extern d_open_t ptcopen; +extern d_close_t ptcclose; +extern d_read_t ptcread; +extern d_write_t ptcwrite; +extern d_select_t ptcselect; +extern d_ioctl_t ptyioctl; #else #define ptsopen eno_opcl #define ptsclose eno_opcl @@ -131,9 +163,25 @@ extern int ptcopen(),ptcclose(),ptcread(),ptcwrite(),ptcselect(), #define ptyioctl eno_ioctl #endif -extern int logopen(),logclose(),logread(),logioctl(),logselect(); -extern int fdesc_open(), fdesc_read(), fdesc_write(), - fdesc_ioctl(), fdesc_select(); +extern d_open_t logopen; +extern d_close_t logclose; +extern d_read_t logread; +extern d_ioctl_t logioctl; +extern d_select_t logselect; +extern d_open_t fdesc_open; +extern d_read_t fdesc_read; +extern d_write_t fdesc_write; +extern d_ioctl_t fdesc_ioctl; +extern d_select_t fdesc_select; + +#define nullopen (d_open_t *)&nulldev +#define nullclose (d_close_t *)&nulldev +#define nullread (d_read_t *)&nulldev +#define nullwrite (d_write_t *)&nulldev +#define nullioctl (d_ioctl_t *)&nulldev +#define nullselect (d_select_t *)&nulldev +#define nullstop (d_stop_t *)&nulldev +#define nullreset (d_reset_t *)&nulldev struct cdevsw cdevsw[] = { @@ -150,33 +198,33 @@ struct cdevsw cdevsw[] = { cnopen, cnclose, cnread, cnwrite, /* 0*/ - cnioctl, nulldev, nulldev, 0, cnselect, + cnioctl, nullstop, nullreset, 0, cnselect, eno_mmap, eno_strat, cngetc, cnputc, D_TTY }, NO_CDEVICE, /* 1*/ { - cttyopen, nulldev, cttyread, cttywrite, /* 2*/ - cttyioctl, nulldev, nulldev, 0, cttyselect, + cttyopen, nullclose, cttyread, cttywrite, /* 2*/ + cttyioctl, nullstop, nullreset, 0, cttyselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, { - nulldev, nulldev, mmread, mmwrite, /* 3*/ - eno_ioctl, nulldev, nulldev, 0, mmselect, - mmmmap, eno_strat, eno_getc, eno_putc, 0 + nullopen, nullclose, mmread, mmwrite, /* 3*/ + mmioctl, nullstop, nullreset, 0, mmselect, + mmmmap, eno_strat, eno_getc, eno_putc, D_DISK }, { ptsopen, ptsclose, ptsread, ptswrite, /* 4*/ - ptyioctl, ptsstop, nulldev, pt_tty, ttselect, + ptyioctl, ptsstop, nullreset, pt_tty, ttselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, { ptcopen, ptcclose, ptcread, ptcwrite, /* 5*/ - ptyioctl, nulldev, nulldev, 0, ptcselect, + ptyioctl, nullstop, nullreset, 0, ptcselect, eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, { logopen, logclose, logread, eno_rdwrt, /* 6*/ - logioctl, eno_stop, nulldev, 0, logselect, + logioctl, eno_stop, nullreset, 0, logselect, eno_mmap, eno_strat, eno_getc, eno_putc, 0 }, NO_CDEVICE, /* 7*/ @@ -186,7 +234,7 @@ struct cdevsw cdevsw[] = NO_CDEVICE, /*11*/ { kmopen, kmclose, kmread, kmwrite, /*12*/ - kmioctl, nulldev, nulldev, km_tty, ttselect, + kmioctl, nullstop, nullreset, km_tty, ttselect, eno_mmap, eno_strat, kmgetc, kmputc, 0 }, NO_CDEVICE, /*13*/ @@ -248,9 +296,7 @@ int nchrdev = sizeof (cdevsw) / sizeof (cdevsw[0]); * return true if a disk */ int -isdisk(dev, type) - dev_t dev; - int type; +isdisk(dev_t dev, int type) { dev_t maj = major(dev); @@ -301,8 +347,7 @@ static int chrtoblktab[] = { * convert chr dev to blk dev */ dev_t -chrtoblk(dev) - dev_t dev; +chrtoblk(dev_t dev) { int blkmaj; @@ -328,9 +373,7 @@ chrtoblk_set(int cdev, int bdev) /* * Returns true if dev is /dev/mem or /dev/kmem. */ -int iskmemdev(dev) - dev_t dev; +int iskmemdev(dev_t dev) { - return (major(dev) == 3 && minor(dev) < 2); } diff --git a/bsd/dev/i386/cons.c b/bsd/dev/i386/cons.c index 118878bd1..43f98d1ef 100644 --- a/bsd/dev/i386/cons.c +++ b/bsd/dev/i386/cons.c @@ -44,80 +44,111 @@ struct tty cons; struct tty *constty; /* current console device */ +int cnopen(__unused dev_t dev, int flag, int devtype, struct proc *pp); +int cnclose(__unused dev_t dev, int flag, int mode, struct proc *pp); +int cnread(__unused dev_t dev, struct uio *uio, int ioflag); +int cnwrite(__unused dev_t dev, struct uio *uio, int ioflag); +int cnioctl(__unused dev_t dev, int cmd, caddr_t addr, int flg, struct proc *p); +int cnselect(__unused dev_t dev, int flag, void * wql, struct proc *p); + +void slave_cnenable(void); + +int alert( + __unused int width, + __unused int height, + __unused const char *title, + const char *msg, + int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8); +int alert_done(void); + /*ARGSUSED*/ int -cnopen(dev, flag, devtype, pp) - dev_t dev; - int flag, devtype; - struct proc *pp; +cnopen(__unused dev_t dev, int flag, int devtype, struct proc *pp) { dev_t device; + boolean_t funnel_state; + int error; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_open)(device, flag, devtype, pp)); + error = (*cdevsw[major(device)].d_open)(device, flag, devtype, pp); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ int -cnclose(dev, flag, mode, pp) - dev_t dev; - int flag, mode; - struct proc *pp; +cnclose(__unused dev_t dev, int flag, int mode, struct proc *pp) { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_close)(device, flag, mode, pp)); + error = (*cdevsw[major(device)].d_close)(device, flag, mode, pp); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); + + } /*ARGSUSED*/ int -cnread(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; +cnread(__unused dev_t dev, struct uio *uio, int ioflag) { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_read)(device, uio, ioflag)); + error = (*cdevsw[major(device)].d_read)(device, uio, ioflag); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ int -cnwrite(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; +cnwrite(__unused dev_t dev, struct uio *uio, int ioflag) { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_write)(device, uio, ioflag)); + error = (*cdevsw[major(device)].d_write)(device, uio, ioflag); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ int -cnioctl(dev, cmd, addr, flag, p) - dev_t dev; - int cmd; - caddr_t addr; - int flag; - struct proc *p; +cnioctl(__unused dev_t dev, int cmd, caddr_t addr, int flag, struct proc *p) { dev_t device; + boolean_t funnel_state; + int error; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; @@ -127,23 +158,26 @@ cnioctl(dev, cmd, addr, flag, p) * Superuser can always use this to wrest control of console * output from the "virtual" console. */ - if (cmd == TIOCCONS && constty) { - int error = suser(p->p_ucred, (u_short *) NULL); - if (error) - return (error); + if ((unsigned) cmd == TIOCCONS && constty) { + error = proc_suser(p); + if (error) { + goto out; + } constty = NULL; - return (0); + error = 0; + goto out; } - return ((*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p)); + error = (*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p); +out: + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ +/* called with funnel held */ int -cnselect(dev, flag, wql, p) - dev_t dev; - int flag; - void * wql; - struct proc *p; +cnselect(__unused dev_t dev, int flag, void * wql, struct proc *p) { dev_t device; @@ -159,12 +193,18 @@ int cngetc() { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_getc)(device)); + error = (*cdevsw[major(device)].d_getc)(device); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ @@ -173,21 +213,26 @@ cnputc(c) char c; { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_putc)(device, c)); + error = (*cdevsw[major(device)].d_putc)(device, c); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } #endif -#if NCPUS > 1 -slave_cnenable() +void +slave_cnenable(void) { /* FIXME: what to do here? */ } -#endif NCPUS > 1 #if 0 void @@ -207,9 +252,9 @@ kprintf( const char *format, ...) */ int alert( - int width, - int height, - const char *title, + __unused int width, + __unused int height, + __unused const char *title, const char *msg, int p1, int p2, @@ -233,7 +278,7 @@ alert( } int -alert_done() +alert_done(void) { /* DoRestore(); */ return 0; diff --git a/bsd/dev/i386/cons.h b/bsd/dev/i386/cons.h index 00d91a155..e14004aa2 100644 --- a/bsd/dev/i386/cons.h +++ b/bsd/dev/i386/cons.h @@ -24,14 +24,14 @@ */ struct consdev { - char *cn_name; /* name of device in dev_name_list */ - int (*cn_probe)(); /* probe hardware and fill in consdev info */ - int (*cn_init)(); /* turn on as console */ - int (*cn_getc)(); /* kernel getchar interface */ - int (*cn_putc)(); /* kernel putchar interface */ - struct tty *cn_tp; /* tty structure for console device */ - dev_t cn_dev; /* major/minor of device */ - short cn_pri; /* pecking order; the higher the better */ + char *cn_name; /* name of device in dev_name_list */ + int (*cn_probe)(void); /* probe hardware, fill consdev info */ + int (*cn_init)(void); /* turn on as console */ + int (*cn_getc)(void); /* kernel getchar interface */ + int (*cn_putc)(void); /* kernel putchar interface */ + struct tty *cn_tp; /* tty structure for console device */ + dev_t cn_dev; /* major/minor of device */ + short cn_pri; /* pecking order; higher the better */ }; /* values for cn_pri - reflect our policy for console selection */ diff --git a/bsd/dev/i386/kern_machdep.c b/bsd/dev/i386/kern_machdep.c index 0c3684b58..a78df67ed 100644 --- a/bsd/dev/i386/kern_machdep.c +++ b/bsd/dev/i386/kern_machdep.c @@ -26,78 +26,30 @@ * Author: John Seamons * * Machine-specific kernel routines. - * - * 8-Dec-91 Peter King (king) at NeXT - * Added grade_cpu_subtype(). - * FIXME: Do we want to merge this with check_cpu_subtype()? - * - * 5-Mar-90 John Seamons (jks) at NeXT - * Created. */ #include #include #include -check_cpu_subtype (cpu_subtype) - cpu_subtype_t cpu_subtype; -{ - struct machine_slot *ms = &machine_slot[cpu_number()]; - - switch (ms->cpu_subtype) { - case CPU_SUBTYPE_386: - if (cpu_subtype == CPU_SUBTYPE_386) - return (TRUE); - break; - - case CPU_SUBTYPE_486: - case CPU_SUBTYPE_486SX: - if ( cpu_subtype == CPU_SUBTYPE_486 || - cpu_subtype == CPU_SUBTYPE_486SX || - cpu_subtype == CPU_SUBTYPE_386 ) - return (TRUE); - break; - - case CPU_SUBTYPE_586: - if ( cpu_subtype == CPU_SUBTYPE_586 || - cpu_subtype == CPU_SUBTYPE_486 || - cpu_subtype == CPU_SUBTYPE_486SX || - cpu_subtype == CPU_SUBTYPE_386 ) - return (TRUE); - break; - - default: - if ( CPU_SUBTYPE_INTEL_MODEL(cpu_subtype) == - CPU_SUBTYPE_INTEL_MODEL_ALL) { - if ( CPU_SUBTYPE_INTEL_FAMILY(ms->cpu_subtype) >= - CPU_SUBTYPE_INTEL_FAMILY(cpu_subtype)) - return (TRUE); - } - else { - if ( ms->cpu_subtype == cpu_subtype) - return (TRUE); - } - break; - } - - return (FALSE); -} +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); /********************************************************************** - * Routine: grade_cpu_subtype() + * Routine: grade_binary() * - * Function: Return a relative preference for cpu_subtypes in fat - * executable files. The higher the grade, the higher the - * preference. A grade of 0 means not acceptable. + * Function: Return a relative preference for exectypes and + * execsubtypes in fat executable files. The higher the + * grade, the higher the preference. A grade of 0 means + * not acceptable. **********************************************************************/ -grade_cpu_subtype (cpu_subtype) - cpu_subtype_t cpu_subtype; +int +grade_binary(__unused cpu_type_t exectype, cpu_subtype_t execsubtype) { - struct machine_slot *ms = &machine_slot[cpu_number()]; + int cpusubtype = cpu_subtype(); - switch (ms->cpu_subtype) { + switch (cpusubtype) { case CPU_SUBTYPE_386: - switch (cpu_subtype) { + switch (execsubtype) { case CPU_SUBTYPE_386: return 1; default: @@ -105,7 +57,7 @@ grade_cpu_subtype (cpu_subtype) } case CPU_SUBTYPE_486: - switch (cpu_subtype) { + switch (execsubtype) { case CPU_SUBTYPE_386: return 1; @@ -120,7 +72,7 @@ grade_cpu_subtype (cpu_subtype) } case CPU_SUBTYPE_486SX: - switch (cpu_subtype) { + switch (execsubtype) { case CPU_SUBTYPE_386: return 1; @@ -135,7 +87,7 @@ grade_cpu_subtype (cpu_subtype) } case CPU_SUBTYPE_586: - switch (cpu_subtype) { + switch (execsubtype) { case CPU_SUBTYPE_386: return 1; @@ -153,18 +105,28 @@ grade_cpu_subtype (cpu_subtype) } default: - if ( CPU_SUBTYPE_INTEL_MODEL(cpu_subtype) == + if ( CPU_SUBTYPE_INTEL_MODEL(execsubtype) == CPU_SUBTYPE_INTEL_MODEL_ALL) { - if ( CPU_SUBTYPE_INTEL_FAMILY(ms->cpu_subtype) >= - CPU_SUBTYPE_INTEL_FAMILY(cpu_subtype)) + if ( CPU_SUBTYPE_INTEL_FAMILY(cpusubtype) >= + CPU_SUBTYPE_INTEL_FAMILY(execsubtype)) return CPU_SUBTYPE_INTEL_FAMILY_MAX - - CPU_SUBTYPE_INTEL_FAMILY(ms->cpu_subtype) - - CPU_SUBTYPE_INTEL_FAMILY(cpu_subtype); + CPU_SUBTYPE_INTEL_FAMILY(cpusubtype) - + CPU_SUBTYPE_INTEL_FAMILY(execsubtype); } else { - if ( ms->cpu_subtype == cpu_subtype) + if ( cpusubtype == execsubtype) return CPU_SUBTYPE_INTEL_FAMILY_MAX + 1; } return 0; } } + +extern void md_prepare_for_shutdown(int, int, char *); + +void +md_prepare_for_shutdown( + __unused int paniced, + __unused int howto, + __unused char * command) +{ +} diff --git a/bsd/dev/i386/km.c b/bsd/dev/i386/km.c index 048cb3951..6de6ff0bb 100644 --- a/bsd/dev/i386/km.c +++ b/bsd/dev/i386/km.c @@ -41,10 +41,25 @@ #include #include +extern int hz; + +extern void cnputcusr(char); +extern int cngetc(void); + +void kminit(void); +int kmopen(dev_t dev, int flag, int devtype, struct proc *pp); +int kmclose(dev_t dev, int flag, int mode, struct proc *p); +int kmread(dev_t dev, struct uio *uio, int ioflag); +int kmwrite(dev_t dev, struct uio *uio, int ioflag); +int kmioctl(dev_t dev, int cmd, caddr_t data, int flag, struct proc *p); +int kmputc(int c); +int kmgetc(dev_t dev); +int kmgetc_silent(dev_t dev); +void cons_cinput(char ch); + /* * 'Global' variables, shared only by this file and conf.c. */ -extern struct tty cons; struct tty *km_tty[1] = { &cons }; /* @@ -63,9 +78,10 @@ static void kmstart(struct tty *tp); extern void KeyboardOpen(void); -int kminit() +void +kminit(void) { - cons.t_dev = makedev(12, 0); + cons.t_dev = makedev(12, 0); initialized = 1; } /* @@ -75,10 +91,9 @@ int kmopen( dev_t dev, int flag, - int devtype, + __unused int devtype, struct proc *pp) { - int rtn; int unit; struct tty *tp; struct winsize *wp; @@ -101,7 +116,7 @@ kmopen( tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; termioschars(&tp->t_termios); ttsetwater(tp); - } else if ((tp->t_state & TS_XCLUDE) && pp->p_ucred->cr_uid != 0) + } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) return EBUSY; tp->t_state |= TS_CARR_ON; /* lie and say carrier exists and is on. */ @@ -133,10 +148,10 @@ kmopen( int kmclose( - dev_t dev, + __unused dev_t dev, int flag, - int mode, - struct proc *p) + __unused int mode, + __unused struct proc *p) { struct tty *tp; @@ -149,7 +164,7 @@ kmclose( int kmread( - dev_t dev, + __unused dev_t dev, struct uio *uio, int ioflag) { @@ -161,7 +176,7 @@ kmread( int kmwrite( - dev_t dev, + __unused dev_t dev, struct uio *uio, int ioflag) { @@ -173,7 +188,7 @@ kmwrite( int kmioctl( - dev_t dev, + __unused dev_t dev, int cmd, caddr_t data, int flag, @@ -209,16 +224,9 @@ kmioctl( } default: error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); - if (error >= 0) { + if (ENOTTY != error) return error; - } - error = ttioctl (tp, cmd, data, flag, p); - if (error >= 0) { - return error; - } - else { - return ENOTTY; - } + return ttioctl (tp, cmd, data, flag, p); } } @@ -234,16 +242,16 @@ kmputc( return( 0); if(c == '\n') - cnputc('\r'); + cnputcusr('\r'); - cnputc(c); + cnputcusr(c); return 0; } int kmgetc( - dev_t dev) + __unused dev_t dev) { int c; @@ -252,13 +260,13 @@ kmgetc( if (c == '\r') { c = '\n'; } - cnputc(c); + cnputcusr(c); return c; } int kmgetc_silent( - dev_t dev) + __unused dev_t dev) { int c; @@ -279,31 +287,17 @@ static void kmstart( struct tty *tp) { - extern int hz; if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) goto out; if (tp->t_outq.c_cc == 0) goto out; tp->t_state |= TS_BUSY; - if (tp->t_outq.c_cc > tp->t_lowat) { - /* - * Start immediately. - */ - kmoutput(tp); - } - else { - /* - * Wait a bit... - */ -#if 0 - /* FIXME */ - timeout(kmtimeout, tp, hz); -#else - kmoutput(tp); -#endif - } + kmoutput(tp); + return; + out: - ttwwakeup(tp); + (*linesw[tp->t_line].l_start)(tp); + return; } static void @@ -328,7 +322,6 @@ kmoutput( char buf[80]; char *cp; int cc = -1; - extern int hz; while (tp->t_outq.c_cc > 0) { @@ -345,17 +338,16 @@ kmoutput( timeout(kmtimeout, tp, hz); } tp->t_state &= ~TS_BUSY; - ttwwakeup(tp); + (*linesw[tp->t_line].l_start)(tp); return 0; } + +void cons_cinput(char ch) { struct tty *tp = &cons; - boolean_t funnel_state; - (*linesw[tp->t_line].l_rint) (ch, tp); - } diff --git a/bsd/dev/i386/mem.c b/bsd/dev/i386/mem.c index 5c3422a64..892be2473 100644 --- a/bsd/dev/i386/mem.c +++ b/bsd/dev/i386/mem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,52 +70,67 @@ #include #include #include -#include #include #include -#include +#include #include #include #include #include /* for kernel_map */ +extern vm_offset_t kvtophys(vm_offset_t va); +extern boolean_t kernacc(off_t, size_t ); + static caddr_t devzerobuf; -mmread(dev, uio) - dev_t dev; - struct uio *uio; +int mmread(dev_t dev, struct uio *uio); +int mmwrite(dev_t dev, struct uio *uio); +int mmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); +int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw); + +int +mmread(dev_t dev, struct uio *uio) { return (mmrw(dev, uio, UIO_READ)); } -mmwrite(dev, uio) - dev_t dev; - struct uio *uio; +int +mmwrite(dev_t dev, struct uio *uio) { return (mmrw(dev, uio, UIO_WRITE)); } -mmrw(dev, uio, rw) - dev_t dev; - struct uio *uio; - enum uio_rw rw; +int +mmioctl(__unused dev_t dev, u_long cmd, __unused caddr_t data, + __unused int flag, __unused struct proc *p) +{ + switch (cmd) { + case FIONBIO: + case FIOASYNC: + /* OK to do nothing: we always return immediately */ + break; + default: + return ENODEV; + } + + return (0); +} + +int +mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) { register int o; register u_int c, v; - register struct iovec *iov; int error = 0; vm_offset_t where; - int spl; vm_size_t size; - extern boolean_t kernacc(off_t, size_t ); - while (uio->uio_resid > 0 && error == 0) { - iov = uio->uio_iov; - if (iov->iov_len == 0) { - uio->uio_iov++; + while (uio_resid(uio) > 0 && error == 0) { + if (uio_iov_len(uio) == 0) { + uio_next_iov(uio); uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("mmrw"); @@ -135,7 +150,8 @@ mmrw(dev, uio, rw) goto fault; } o = uio->uio_offset - v; - c = min(PAGE_SIZE - o, (u_int)iov->iov_len); + // LP64todo - fix this! + c = min(PAGE_SIZE - o, (u_int)uio_iov_len(uio)); error = uiomove((caddr_t) (where + o), c, uio); kmem_free(kernel_map, where, PAGE_SIZE); continue; @@ -146,17 +162,18 @@ mmrw(dev, uio, rw) if (((vm_address_t)uio->uio_offset >= VM_MAX_KERNEL_ADDRESS) || ((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_ADDRESS)) goto fault; - c = iov->iov_len; + c = uio_iov_len(uio); if (!kernacc(uio->uio_offset, c)) goto fault; - error = uiomove((caddr_t)uio->uio_offset, (int)c, uio); + error = uiomove((caddr_t)(uintptr_t)uio->uio_offset, + (int)c, uio); continue; /* minor device 2 is EOF/RATHOLE */ case 2: if (rw == UIO_READ) return (0); - c = iov->iov_len; + c = uio_iov_len(uio); break; case 3: if(devzerobuf == NULL) { @@ -164,10 +181,11 @@ mmrw(dev, uio, rw) bzero(devzerobuf, PAGE_SIZE); } if(uio->uio_rw == UIO_WRITE) { - c = iov->iov_len; + c = uio_iov_len(uio); break; } - c = min(iov->iov_len, PAGE_SIZE); + // LP64todo - fix this! + c = min(uio_iov_len(uio), PAGE_SIZE); error = uiomove(devzerobuf, (int)c, uio); continue; default: @@ -177,10 +195,10 @@ mmrw(dev, uio, rw) if (error) break; - iov->iov_base += c; - iov->iov_len -= c; + uio_iov_base_add(uio, c); + uio_iov_len_add(uio, -((int)c)); uio->uio_offset += c; - uio->uio_resid -= c; + uio_setresid(uio, (uio_resid(uio) - c)); } return (error); fault: @@ -201,7 +219,7 @@ kernacc( end = start + len; while (base < end) { - if(kvtophys((vm_offset_t)base) == NULL) + if(kvtophys((vm_offset_t)base) == 0ULL) return(FALSE); base += page_size; } diff --git a/bsd/dev/i386/memmove.c b/bsd/dev/i386/memmove.c index 12b0e2070..5ef7f1291 100644 --- a/bsd/dev/i386/memmove.c +++ b/bsd/dev/i386/memmove.c @@ -34,7 +34,7 @@ * */ - +#include #if 0 void *memcpy(void *dst, const void *src, unsigned int ulen) @@ -43,7 +43,9 @@ void *memcpy(void *dst, const void *src, unsigned int ulen) return dst; } #endif /* 0 */ -void *memmove(void *dst, const void *src, unsigned int ulen) + +void * +memmove(void *dst, const void *src, size_t ulen) { bcopy(src, dst, ulen); return dst; diff --git a/bsd/dev/i386/stubs.c b/bsd/dev/i386/stubs.c index 1ae89fe67..5a15de0ea 100644 --- a/bsd/dev/i386/stubs.c +++ b/bsd/dev/i386/stubs.c @@ -27,17 +27,22 @@ #include #include -#include #include #include #include -#include +#include +#include +#include #include #include #include -#include #include +/* XXX should be elsewhere (cpeak) */ +extern int set_bsduthreadargs(thread_t, void *, void *); +extern void *get_bsduthreadarg(thread_t); +extern int *get_bsduthreadrval(thread_t); +extern int *get_bsduthreadlowpridelay(thread_t); /* * copy a null terminated string from the kernel address space into @@ -50,11 +55,11 @@ * the number of bytes copied is always returned in lencopied. */ int -copyoutstr(from, to, maxlen, lencopied) - void * from, * to; - size_t maxlen, *lencopied; +copyoutstr(const void *from, user_addr_t to, size_t maxlen, size_t *lencopied) { - int slen,len,error=0; + size_t slen; + size_t len; + int error = 0; slen = strlen(from) + 1; if (slen > maxlen) @@ -81,42 +86,58 @@ copyoutstr(from, to, maxlen, lencopied) */ /* from ppc/fault_copy.c -Titan1T4 VERSION */ int -copystr(vfrom, vto, maxlen, lencopied) - register void * vfrom, *vto; - size_t maxlen, *lencopied; +copystr(const void *vfrom, void *vto, size_t maxlen, size_t *lencopied) { - register unsigned l; - int error; -caddr_t from, to; - - from = vfrom; - to = vto; - for (l = 0; l < maxlen; l++) - if ((*to++ = *from++) == '\0') { - if (lencopied) - *lencopied = l + 1; - return 0; - } - if (lencopied) - *lencopied = maxlen; - return ENAMETOOLONG; + size_t l; + char const *from = (char const *) vfrom; + char *to = (char *) vto; + + for (l = 0; l < maxlen; l++) { + if ((*to++ = *from++) == '\0') { + if (lencopied) + *lencopied = l + 1; + return 0; + } + } + if (lencopied) + *lencopied = maxlen; + return ENAMETOOLONG; } -int copywithin(src, dst, count) -void * src, *dst; -size_t count; +int +copywithin(void *src, void *dst, size_t count) { bcopy(src,dst,count); return 0; } -set_bsduthreadargs(thread_t th, void * pcb, void *ignored_arg) +int +set_bsduthreadargs(thread_t th, void * pcb, __unused void *ignored_arg) { -struct uthread * ut; + struct uthread * ut; + struct proc *p = current_proc(); ut = get_bsdthread_info(th); ut->uu_ar0 = (int *)pcb; + /* + * Delayed binding of thread credential to process credential. + * + * XXX This doesn't really belong here, but the i386 code has a + * XXX number of seemingly gratuitous structural differences that + * XXX make this the most appropriate place to do the work. + */ + if (ut->uu_ucred != p->p_ucred && + (ut->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = ut->uu_ucred; + proc_lock(p); + ut->uu_ucred = p->p_ucred; + kauth_cred_ref(ut->uu_ucred); + proc_unlock(p); + if (old != NOCRED) + kauth_cred_rele(old); + } + return(1); } @@ -129,9 +150,17 @@ struct uthread *ut; } int * -get_bsduthreadrval(thread_act_t th) +get_bsduthreadrval(thread_t th) { struct uthread *ut; ut = get_bsdthread_info(th); return(&ut->uu_rval[0]); } + +int * +get_bsduthreadlowpridelay(thread_t th) +{ +struct uthread *ut; + ut = get_bsdthread_info(th); + return(&ut->uu_lowpri_delay); +} diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index a0c07910e..e3fe2fac6 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -20,6 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include #include #include #include @@ -28,31 +29,38 @@ static int hw_cpu_sysctl SYSCTL_HANDLER_ARGS { - i386_cpu_info_t cpu_info; - void *ptr = (uint8_t *)&cpu_info + (uint32_t)arg1; + __unused struct sysctl_oid *unused_oidp = oidp; + i386_cpu_info_t *cpu_info = cpuid_info(); + void *ptr = (uint8_t *)cpu_info + (uint32_t)arg1; int value; - cpuid_get_info(&cpu_info); + if (arg2 == -1) { + ptr = *(char **)ptr; + arg2 = 0; + } + + if (arg2 == 0 && ((char *)ptr)[0] == '\0') { + return ENOENT; + } if (arg2 == sizeof(uint8_t)) { value = (uint32_t) *(uint8_t *)ptr; ptr = &value; arg2 = sizeof(uint32_t); } - return SYSCTL_OUT(req, ptr, arg2 ? arg2 : strlen((char *)ptr)+1); - return 0; + return SYSCTL_OUT(req, ptr, arg2 ? (size_t) arg2 : strlen((char *)ptr)+1); } static int hw_cpu_features SYSCTL_HANDLER_ARGS { - i386_cpu_info_t cpu_info; + __unused struct sysctl_oid *unused_oidp = oidp; + __unused void *unused_arg1 = arg1; + __unused int unused_arg2 = arg2; char buf[256]; - vm_size_t size; - cpuid_get_info(&cpu_info); buf[0] = '\0'; - cpuid_get_feature_names(cpu_info.cpuid_features, buf, sizeof(buf)); + cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf)); return SYSCTL_OUT(req, buf, strlen(buf) + 1); } @@ -68,6 +76,10 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand_string, CTLTYPE_STRING | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_brand_string), 0, hw_cpu_sysctl, "A", "CPU brand string"); +SYSCTL_PROC(_machdep_cpu, OID_AUTO, model_string, CTLTYPE_STRING | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_model_string), -1, + hw_cpu_sysctl, "A", "CPU model string"); + SYSCTL_PROC(_machdep_cpu, OID_AUTO, value, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_value), sizeof(uint32_t), hw_cpu_sysctl, "I", "CPU value"); @@ -104,12 +116,6 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, brand, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_brand), sizeof(uint8_t), hw_cpu_sysctl, "I", "CPU brand"); -#if 0 -SYSCTL_PROC(_machdep_cpu, OID_AUTO, model_string, CTLTYPE_STRING | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, model_string), 0, - hw_cpu_sysctl, "A", "CPU model string"); -#endif - SYSCTL_PROC(_machdep_cpu, OID_AUTO, features, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, hw_cpu_features, "A", "CPU feature names"); @@ -120,6 +126,7 @@ struct sysctl_oid *machdep_sysctl_list[] = &sysctl__machdep_cpu, &sysctl__machdep_cpu_vendor, &sysctl__machdep_cpu_brand_string, + &sysctl__machdep_cpu_model_string, &sysctl__machdep_cpu_value, &sysctl__machdep_cpu_family, &sysctl__machdep_cpu_model, diff --git a/bsd/dev/i386/unix_signal.c b/bsd/dev/i386/unix_signal.c index efd73bfb3..ca3b9f2df 100644 --- a/bsd/dev/i386/unix_signal.c +++ b/bsd/dev/i386/unix_signal.c @@ -31,36 +31,31 @@ #include #include -#include +#include #include -#include +#include #include +#include +#include +#include /* for thread_abort_safely */ #include +#include #include -#include +extern struct i386_saved_state *get_user_regs(thread_t); -/* - * FIXME.. should be included from mach_kernel/i386/seg.h - */ - -#define USER_CS 0x17 -#define USER_DS 0x1f -#define USER_CTHREAD 0x27 - -#define UDATA_SEL USER_DS -#define UCODE_SEL USER_CS -#define UCTHREAD_SEL USER_CTHREAD - -#define valid_user_code_selector(x) (TRUE) -#define valid_user_data_selector(x) (TRUE) -#define valid_user_stack_selector(x) (TRUE) - +extern boolean_t valid_user_segment_selectors(uint16_t cs, + uint16_t ss, + uint16_t ds, + uint16_t es, + uint16_t fs, + uint16_t gs); -#define NULL_SEG 0 +/* Forward: */ +extern boolean_t machine_exception(int, int, int, int *, int *); /* Signal handler flavors supported */ /* These defns should match the Libc implmn */ @@ -76,11 +71,10 @@ * pointer, and the argument pointer, it returns * to the user specified pc, psl. */ - void sendsig(p, catcher, sig, mask, code) struct proc *p; - sig_t catcher; + user_addr_t catcher; /* sig_t */ int sig, mask; u_long code; { @@ -96,13 +90,11 @@ sendsig(p, catcher, sig, mask, code) struct sigacts *ps = p->p_sigacts; int oonstack; thread_t thread = current_thread(); - thread_act_t th_act = current_act(); struct uthread * ut; - struct i386_saved_state * saved_state = (struct i386_saved_state *) - get_user_regs(th_act); + struct i386_saved_state * saved_state = get_user_regs(thread); sig_t trampact; - ut = get_bsdthread_info(th_act); + ut = get_bsdthread_info(thread); oonstack = ps->ps_sigstk.ss_flags & SA_ONSTACK; if ((ps->ps_flags & SAS_ALTSTACK) && !oonstack && (ps->ps_sigonstack & sigmask(sig))) { @@ -116,10 +108,10 @@ sendsig(p, catcher, sig, mask, code) * Build the argument list for the signal handler. */ - trampact = ps->ps_trampact[sig]; + trampact = (sig_t)ps->ps_trampact[sig]; /* Handler should call sigreturn to get out of it */ frame.retaddr = 0xffffffff; - frame.catcher = catcher; + frame.catcher = CAST_DOWN(sig_t,catcher); /* XXX LP64 */ frame.sigstyle = UC_TRAD; frame.sig = sig; @@ -128,19 +120,9 @@ sendsig(p, catcher, sig, mask, code) } else frame.code = 0; frame.scp = scp; - if (copyout((caddr_t)&frame, (caddr_t)fp, sizeof (frame))) + if (copyout((caddr_t)&frame, (user_addr_t)fp, sizeof (frame))) goto bad; -#if PC_SUPPORT - { - PCcontext_t context = threadPCContext(thread); - - if (context && context->running) { - oonstack |= 02; - context->running = FALSE; - } - } -#endif /* * Build the signal context to be used by sigreturn. */ @@ -171,17 +153,17 @@ sendsig(p, catcher, sig, mask, code) context.sc_fs = saved_state->fs; context.sc_gs = saved_state->gs; } - if (copyout((caddr_t)&context, (caddr_t)scp, sizeof (context))) + if (copyout((caddr_t)&context, (user_addr_t)scp, sizeof (context))) goto bad; saved_state->eip = (unsigned int)trampact; - saved_state->cs = UCODE_SEL; + saved_state->cs = USER_CS; saved_state->uesp = (unsigned int)fp; - saved_state->ss = UDATA_SEL; + saved_state->ss = USER_DS; - saved_state->ds = UDATA_SEL; - saved_state->es = UDATA_SEL; + saved_state->ds = USER_DS; + saved_state->es = USER_DS; saved_state->fs = NULL_SEG; saved_state->gs = USER_CTHREAD; return; @@ -207,83 +189,88 @@ bad: * psl to gain improper priviledges or to cause * a machine fault. */ -struct sigreturn_args { - struct sigcontext *sigcntxp; -}; /* ARGSUSED */ int -sigreturn(p, uap, retval) - struct proc *p; - struct sigreturn_args *uap; - int *retval; +sigreturn( + struct proc *p, + struct sigreturn_args *uap, + __unused int *retval) { struct sigcontext context; thread_t thread = current_thread(); - thread_act_t th_act = current_act(); int error; struct i386_saved_state* saved_state = (struct i386_saved_state*) - get_user_regs(th_act); + get_user_regs(thread); struct uthread * ut; - if (saved_state == NULL) - return EINVAL; + if (saved_state == NULL) + return EINVAL; - if (error = copyin((caddr_t)uap->sigcntxp, (caddr_t)&context, - sizeof (context))) - return(error); - ut = (struct uthread *)get_bsdthread_info(th_act); + if ((error = copyin(CAST_USER_ADDR_T(uap->sigcntxp), (void *)&context, + sizeof (context)))) + return(error); - if (context.sc_onstack & 01) - p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; + /* + * Validate segment selectors. + * Bad values would result in kernel exception at context switch + * back to user mode. If other state is invalid an exception will + * occur in user context. + */ + if (!valid_user_segment_selectors(context.sc_cs, + context.sc_ss, + context.sc_ds, + context.sc_es, + context.sc_fs, + context.sc_gs)) { + return EINVAL; + } + + ut = (struct uthread *)get_bsdthread_info(thread); + + if (context.sc_onstack & 01) + p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; + ut->uu_sigmask = context.sc_mask &~ sigcantmask; if(ut->uu_siglist & ~ut->uu_sigmask) - signal_setast(current_act()); - saved_state->eax = context.sc_eax; - saved_state->ebx = context.sc_ebx; - saved_state->ecx = context.sc_ecx; - saved_state->edx = context.sc_edx; - saved_state->edi = context.sc_edi; - saved_state->esi = context.sc_esi; - saved_state->ebp = context.sc_ebp; - saved_state->uesp = context.sc_esp; - saved_state->ss = context.sc_ss; - saved_state->efl = context.sc_eflags; - saved_state->efl &= ~EFL_USERCLR; - saved_state->efl |= EFL_USERSET; - saved_state->eip = context.sc_eip; - saved_state->cs = context.sc_cs; - - if (context.sc_eflags & EFL_VM) { - saved_state->ds = NULL_SEG; - saved_state->es = NULL_SEG; - saved_state->fs = NULL_SEG; - saved_state->gs = NULL_SEG; - saved_state->v86_segs.v86_ds = context.sc_ds; - saved_state->v86_segs.v86_es = context.sc_es; - saved_state->v86_segs.v86_fs = context.sc_fs; - saved_state->v86_segs.v86_gs = context.sc_gs; - - saved_state->efl |= EFL_VM; - } - else { - saved_state->ds = context.sc_ds; - saved_state->es = context.sc_es; - saved_state->fs = context.sc_fs; - saved_state->gs = context.sc_gs; - } - -#if PC_SUPPORT - if (context.sc_onstack & 02) { - PCcontext_t context = threadPCContext(thread); - - if (context) - context->running = TRUE; - } -#endif + signal_setast(thread); + + saved_state->eax = context.sc_eax; + saved_state->ebx = context.sc_ebx; + saved_state->ecx = context.sc_ecx; + saved_state->edx = context.sc_edx; + saved_state->edi = context.sc_edi; + saved_state->esi = context.sc_esi; + saved_state->ebp = context.sc_ebp; + saved_state->uesp = context.sc_esp; + saved_state->ss = context.sc_ss; + saved_state->efl = context.sc_eflags; + saved_state->efl &= ~EFL_USERCLR; + saved_state->efl |= EFL_USERSET; + saved_state->eip = context.sc_eip; + saved_state->cs = context.sc_cs; + + if (context.sc_eflags & EFL_VM) { + saved_state->ds = NULL_SEG; + saved_state->es = NULL_SEG; + saved_state->fs = NULL_SEG; + saved_state->gs = NULL_SEG; + saved_state->v86_segs.v86_ds = context.sc_ds; + saved_state->v86_segs.v86_es = context.sc_es; + saved_state->v86_segs.v86_fs = context.sc_fs; + saved_state->v86_segs.v86_gs = context.sc_gs; + + saved_state->efl |= EFL_VM; + } + else { + saved_state->ds = context.sc_ds; + saved_state->es = context.sc_es; + saved_state->fs = context.sc_fs; + saved_state->gs = context.sc_gs; + } return (EJUSTRETURN); } @@ -295,11 +282,11 @@ sigreturn(p, uap, retval) boolean_t machine_exception( - int exception, - int code, - int subcode, - int *unix_signal, - int *unix_code + int exception, + int code, + __unused int subcode, + int *unix_signal, + int *unix_code ) { @@ -321,3 +308,52 @@ machine_exception( return(TRUE); } + +#include +#include + +int __pthread_cset(struct sysent *); +void __pthread_creset(struct sysent *); + +int +__pthread_cset(struct sysent *callp) +{ + unsigned int cancel_enable; + thread_t thread; + struct uthread * uthread; + + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + cancel_enable = callp->sy_cancel; + if (cancel_enable == _SYSCALL_CANCEL_NONE) { + uthread->uu_flag |= UT_NOTCANCELPT; + } else { + if((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + if (cancel_enable == _SYSCALL_CANCEL_PRE) + return(EINTR); + else + thread_abort_safely(thread); + } + } + return(0); +} + + +void +__pthread_creset(struct sysent *callp) +{ + + unsigned int cancel_enable; + thread_t thread; + struct uthread * uthread; + + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + cancel_enable = callp->sy_cancel; + if (!cancel_enable) + uthread->uu_flag &= ~UT_NOTCANCELPT; + +} + diff --git a/bsd/dev/i386/unix_startup.c b/bsd/dev/i386/unix_startup.c deleted file mode 100644 index f341af01a..000000000 --- a/bsd/dev/i386/unix_startup.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992,7 NeXT Computer, Inc. - * - * Unix data structure initialization. - * - */ - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -extern vm_map_t mb_map; - -/* - * Declare these as initialized data so we can patch them. - */ - -#ifdef NBUF -int nbuf = NBUF; -int niobuf = NBUF/2; -#else -int nbuf = 0; -int niobuf = 0; -#endif - -int srv = 0; /* Flag indicates a server boot when set */ -int ncl = 0; - -vm_map_t buffer_map; -vm_map_t bufferhdr_map; - -void -bsd_startupearly() -{ - vm_offset_t firstaddr; - vm_size_t size; - kern_return_t ret; - - if (nbuf == 0) - nbuf = atop(mem_size / 100); /* 1% */ - if (nbuf > 8192) - nbuf = 8192; - if (nbuf < 256) - nbuf = 256; - - if (niobuf == 0) - niobuf = (nbuf / 2) + 64; /* 64 reserved buffers */ - if (niobuf > 4096) - niobuf = 4096; - if (niobuf < 128) - niobuf = 128; - - size = (nbuf + niobuf) * sizeof (struct buf); - size = round_page(size); - - ret = kmem_suballoc(kernel_map, - &firstaddr, - size, - FALSE, - TRUE, - &bufferhdr_map); - - if (ret != KERN_SUCCESS) - panic("Failed to create bufferhdr_map"); - - ret = kernel_memory_allocate(bufferhdr_map, - &firstaddr, - size, - 0, - KMA_HERE | KMA_KOBJECT); - - if (ret != KERN_SUCCESS) - panic("Failed to allocate bufferhdr_map"); - - buf = (struct buf * )firstaddr; - bzero(buf,size); - - if (mem_size > (64 * 1024 * 1024)) { - int scale; - extern u_long tcp_sendspace; - extern u_long tcp_recvspace; - - if ((nmbclusters = ncl) == 0) { - if ((nmbclusters = ((mem_size / 16) / MCLBYTES)) > 16384) - nmbclusters = 16384; - } - if ((scale = nmbclusters / NMBCLUSTERS) > 1) { - tcp_sendspace *= scale; - tcp_recvspace *= scale; - - if (tcp_sendspace > (32 * 1024)) - tcp_sendspace = 32 * 1024; - if (tcp_recvspace > (32 * 1024)) - tcp_recvspace = 32 * 1024; - } - } -} - -void -bsd_bufferinit() -{ - unsigned int i; - vm_size_t size; - kern_return_t ret; - vm_offset_t firstaddr; - - cons.t_dev = makedev(12, 0); - - bsd_startupearly(); - - ret = kmem_suballoc(kernel_map, - (vm_offset_t *)&mbutl, - (vm_size_t) (nmbclusters * MCLBYTES), - FALSE, - TRUE, - &mb_map); - - if (ret != KERN_SUCCESS) - panic("Failed to allocate mb_map\n"); - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); -} - -void -md_prepare_for_shutdown(int paniced, int howto, char * command) -{ -} diff --git a/bsd/dev/memdev.c b/bsd/dev/memdev.c index 490d030cd..a01fcb258 100644 --- a/bsd/dev/memdev.c +++ b/bsd/dev/memdev.c @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 @@ -48,9 +69,7 @@ * */ - #include -#include #include #include #include @@ -58,30 +77,38 @@ #include #include #include -#include #include #include #include #include - #include +#include +#include +#include #include -#include #include #include -static open_close_fcn_t mdevopen; -static open_close_fcn_t mdevclose; + +void mdevinit(int the_cnt); + +static open_close_fcn_t mdevopen; +static open_close_fcn_t mdevclose; static psize_fcn_t mdevsize; -static strategy_fcn_t mdevstrategy; -static int mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); -static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); -static int mdevrw(dev_t dev, struct uio *uio, int ioflag); -static char *nonspace(char *pos, char *end); -static char *getspace(char *pos, char *end); -static char *cvtnum(char *pos, char *end, unsigned int *num); +static strategy_fcn_t mdevstrategy; +static int mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); +static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); +static int mdevrw(dev_t dev, struct uio *uio, int ioflag); +static char * nonspace(char *pos, char *end); +static char * getspace(char *pos, char *end); +static char * cvtnum(char *pos, char *end, unsigned int *num); + +extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t bytes); +extern void mapping_set_mod(ppnum_t pn); +extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); + /* * cdevsw @@ -139,11 +166,13 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys); dev_t mdevlookup(int devid); -static int mdevclose(dev_t dev, int flags, int devtype, struct proc *p) { +static int mdevclose(__unused dev_t dev, __unused int flags, + __unused int devtype, __unused struct proc *p) { + return (0); } -static int mdevopen(dev_t dev, int flags, int devtype, struct proc *p) { +static int mdevopen(dev_t dev, int flags, __unused int devtype, __unused struct proc *p) { int devid; @@ -156,12 +185,11 @@ static int mdevopen(dev_t dev, int flags, int devtype, struct proc *p) { return(0); } -static int mdevrw(dev_t dev, struct uio *uio, int ioflag) { +static int mdevrw(dev_t dev, struct uio *uio, __unused int ioflag) { int status; - int unit; addr64_t mdata; - int devid; - enum uio_seg saveflag; + int devid; + enum uio_seg saveflag; devid = minor(dev); /* Get minor device number */ @@ -171,82 +199,77 @@ static int mdevrw(dev_t dev, struct uio *uio, int ioflag) { mdata = ((addr64_t)mdev[devid].mdBase << 12) + uio->uio_offset; /* Point to the area in "file" */ saveflag = uio->uio_segflg; /* Remember what the request is */ - if (mdev[devid].mdFlags & mdPhys) uio->uio_segflg = UIO_PHYS_USERSPACE; /* Make sure we are moving from physical ram if physical device */ - status = uiomove64(mdata, uio->uio_resid, uio); /* Move the data */ +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("mdevrw - invalid uio_segflg\n"); + } +#endif /* LP64_DEBUG */ + /* Make sure we are moving from physical ram if physical device */ + if (mdev[devid].mdFlags & mdPhys) { + if (uio->uio_segflg == UIO_USERSPACE64) + uio->uio_segflg = UIO_PHYS_USERSPACE64; + else if (uio->uio_segflg == UIO_USERSPACE32) + uio->uio_segflg = UIO_PHYS_USERSPACE32; + else + uio->uio_segflg = UIO_PHYS_USERSPACE; + } + status = uiomove64(mdata, uio_resid(uio), uio); /* Move the data */ uio->uio_segflg = saveflag; /* Restore the flag */ return (status); } static void mdevstrategy(struct buf *bp) { - int unmap; - unsigned int sz, left, lop, csize; - kern_return_t ret; + unsigned int left, lop, csize; vm_offset_t vaddr, blkoff; - struct buf *tbuf; int devid; addr64_t paddr, fvaddr; ppnum_t pp; - devid = minor(bp->b_dev); /* Get minor device number */ + devid = minor(buf_device(bp)); /* Get minor device number */ if ((mdev[devid].mdFlags & mdInited) == 0) { /* Have we actually been defined yet? */ - bp->b_error = ENXIO; - bp->b_flags |= B_ERROR; - biodone(bp); + buf_seterror(bp, ENXIO); + buf_biodone(bp); return; } - bp->b_resid = bp->b_bcount; /* Set byte count */ + buf_setresid(bp, buf_count(bp)); /* Set byte count */ - blkoff = bp->b_blkno * mdev[devid].mdSecsize; /* Get offset into file */ + blkoff = buf_blkno(bp) * mdev[devid].mdSecsize; /* Get offset into file */ /* * Note that reading past end is an error, but reading at end is an EOF. For these - * we just return with b_resid == b_bcount. + * we just return with resid == count. */ if (blkoff >= (mdev[devid].mdSize << 12)) { /* Are they trying to read/write at/after end? */ if(blkoff != (mdev[devid].mdSize << 12)) { /* Are we trying to read after EOF? */ - bp->b_error = EINVAL; /* Yeah, this is an error */ - bp->b_flags |= B_ERROR | B_INVAL; + buf_seterror(bp, EINVAL); /* Yeah, this is an error */ } - biodone(bp); /* Return */ + buf_biodone(bp); /* Return */ return; } - if ((blkoff + bp->b_bcount) > (mdev[devid].mdSize << 12)) { /* Will this read go past end? */ - bp->b_bcount = ((mdev[devid].mdSize << 12) - blkoff); /* Yes, trim to max */ + if ((blkoff + buf_count(bp)) > (mdev[devid].mdSize << 12)) { /* Will this read go past end? */ + buf_setcount(bp, ((mdev[devid].mdSize << 12) - blkoff)); /* Yes, trim to max */ } + /* + * make sure the buffer's data area is + * accessible + */ + if (buf_map(bp, (caddr_t *)&vaddr)) + panic("ramstrategy: buf_map failed\n"); - vaddr = 0; /* Assume not mapped yet */ - unmap = 0; - - if (bp->b_flags & B_VECTORLIST) { /* Do we have a list of UPLs? */ - tbuf = (struct buf *)bp->b_real_bp; /* Get this for C's inadequacies */ - if((bp->b_flags & B_NEED_IODONE) && /* If we have a UPL, is it already mapped? */ - tbuf && - tbuf->b_data) { - vaddr = (vm_offset_t)tbuf->b_data; /* We already have this mapped in, get base address */ - } - else { /* Not mapped yet */ - ret = ubc_upl_map(bp->b_pagelist, &vaddr); /* Map it in */ - if(ret != KERN_SUCCESS) panic("ramstrategy: ubc_upl_map failed, rc = %08X\n", ret); - unmap = 1; /* Remember to unmap later */ - } - vaddr = vaddr += bp->b_uploffset; /* Calculate actual vaddr */ - } - else vaddr = (vm_offset_t)bp->b_data; /* No UPL, we already have address */ - fvaddr = (mdev[devid].mdBase << 12) + blkoff; /* Point to offset into ram disk */ - if(bp->b_flags & B_READ) { /* Is this a read? */ + if (buf_flags(bp) & B_READ) { /* Is this a read? */ if(!(mdev[devid].mdFlags & mdPhys)) { /* Physical mapped disk? */ bcopy((void *)((uintptr_t)fvaddr), - (void *)vaddr, (size_t)bp->b_bcount); /* This is virtual, just get the data */ + (void *)vaddr, (size_t)buf_count(bp)); /* This is virtual, just get the data */ } else { - left = bp->b_bcount; /* Init the amount left to copy */ + left = buf_count(bp); /* Init the amount left to copy */ while(left) { /* Go until it is all copied */ lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095))); /* Get smallest amount left on sink and source */ @@ -269,10 +292,10 @@ static void mdevstrategy(struct buf *bp) { else { /* This is a write */ if(!(mdev[devid].mdFlags & mdPhys)) { /* Physical mapped disk? */ bcopy((void *)vaddr, (void *)((uintptr_t)fvaddr), - (size_t)bp->b_bcount); /* This is virtual, just put the data */ + (size_t)buf_count(bp)); /* This is virtual, just put the data */ } else { - left = bp->b_bcount; /* Init the amount left to copy */ + left = buf_count(bp); /* Init the amount left to copy */ while(left) { /* Go until it is all copied */ lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095))); /* Get smallest amount left on sink and source */ @@ -292,13 +315,16 @@ static void mdevstrategy(struct buf *bp) { } } } - - if (unmap) { /* Do we need to unmap this? */ - ubc_upl_unmap(bp->b_pagelist); /* Yes, unmap it */ - } - - bp->b_resid = 0; /* Nothing more to do */ - biodone(bp); /* Say we've finished */ + /* + * buf_unmap takes care of all the cases + * it will unmap the buffer from kernel + * virtual space if that was the state + * when we mapped it. + */ + buf_unmap(bp); + + buf_setresid(bp, 0); /* Nothing more to do */ + buf_biodone(bp); /* Say we've finished */ } static int mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { @@ -309,8 +335,8 @@ static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc return (mdevioctl(dev, cmd, data, flag, p, 1)); } -static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, int is_char) { - +static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, + struct proc *p, int is_char) { int error; u_long *f; u_int64_t *o; @@ -320,7 +346,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc if (devid > 16) return (ENXIO); /* Not valid */ - error = suser(p->p_ucred, &p->p_acflag); /* Are we superman? */ + error = proc_suser(p); /* Are we superman? */ if (error) return (error); /* Nope... */ f = (u_long*)data; @@ -392,7 +418,7 @@ static int mdevsize(dev_t dev) { #include -void mdevinit(int cnt) { +void mdevinit(__unused int the_cnt) { int devid, phys; ppnum_t base; diff --git a/bsd/dev/ppc/chud/chud_bsd_callback.c b/bsd/dev/ppc/chud/chud_bsd_callback.c index e212ebe82..0302458f5 100644 --- a/bsd/dev/ppc/chud/chud_bsd_callback.c +++ b/bsd/dev/ppc/chud/chud_bsd_callback.c @@ -28,12 +28,7 @@ #include /* u_int */ #include /* struct proc */ #include /* struct sysent */ - -struct exit_args { - int rval; -}; -extern void exit(struct proc *p, struct exit_args *uap, int *retval); -extern struct sysent sysent[]; +#include #pragma mark **** kern debug **** typedef void (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); @@ -69,23 +64,3 @@ kern_return_t chudxnu_kdebug_callback_cancel(void) return KERN_SUCCESS; } - -#pragma mark **** task will exit **** - -typedef kern_return_t (*chudxnu_exit_callback_func_t)(int pid); - -__private_extern__ -kern_return_t chudxnu_exit_callback_enter(chudxnu_exit_callback_func_t func) -{ - - return KERN_FAILURE; - -} - -__private_extern__ -kern_return_t chudxnu_exit_callback_cancel(void) -{ - - return KERN_FAILURE; - -} diff --git a/bsd/dev/ppc/chud/chud_process.c b/bsd/dev/ppc/chud/chud_process.c index c0dcd504d..1fad77e4c 100644 --- a/bsd/dev/ppc/chud/chud_process.c +++ b/bsd/dev/ppc/chud/chud_process.c @@ -21,7 +21,8 @@ */ #include -#include +#include +#include // vn_getpath() __private_extern__ int chudxnu_pid_for_task(task_t task) diff --git a/bsd/dev/ppc/conf.c b/bsd/dev/ppc/conf.c index e4a751fca..463700245 100644 --- a/bsd/dev/ppc/conf.c +++ b/bsd/dev/ppc/conf.c @@ -35,7 +35,6 @@ #include #include -#include #include #include #include @@ -96,7 +95,7 @@ extern int kmopen(),kmclose(),kmread(),kmwrite(),kmioctl(), extern int cttyopen(), cttyread(), cttywrite(), cttyioctl(), cttyselect(); -extern int mmread(),mmwrite(); +extern int mmread(),mmwrite(),mmioctl(); #define mmselect seltrue #if 1 @@ -157,8 +156,8 @@ struct cdevsw cdevsw[] = }, { nulldev, nulldev, mmread, mmwrite, /* 3*/ - eno_ioctl, nulldev, nulldev, 0, (select_fcn_t *)mmselect, - eno_mmap, eno_strat, eno_getc, eno_putc, 0 + mmioctl, nulldev, nulldev, 0, (select_fcn_t *)mmselect, + eno_mmap, eno_strat, eno_getc, eno_putc, D_DISK }, { ptsopen, ptsclose, ptsread, ptswrite, /* 4*/ diff --git a/bsd/dev/ppc/cons.c b/bsd/dev/ppc/cons.c index 26290d9a3..b9d966909 100644 --- a/bsd/dev/ppc/cons.c +++ b/bsd/dev/ppc/cons.c @@ -53,12 +53,19 @@ consopen(dev, flag, devtype, pp) struct proc *pp; { dev_t device; + boolean_t funnel_state; + int error; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_open)(device, flag, devtype, pp)); + error = (*cdevsw[major(device)].d_open)(device, flag, devtype, pp); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ @@ -69,12 +76,20 @@ consclose(dev, flag, mode, pp) struct proc *pp; { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_close)(device, flag, mode, pp)); + error = (*cdevsw[major(device)].d_close)(device, flag, mode, pp); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); + + } /*ARGSUSED*/ @@ -85,12 +100,18 @@ consread(dev, uio, ioflag) int ioflag; { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_read)(device, uio, ioflag)); + error = (*cdevsw[major(device)].d_read)(device, uio, ioflag); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ @@ -101,12 +122,18 @@ conswrite(dev, uio, ioflag) int ioflag; { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_write)(device, uio, ioflag)); + error = (*cdevsw[major(device)].d_write)(device, uio, ioflag); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ @@ -119,6 +146,10 @@ consioctl(dev, cmd, addr, flag, p) struct proc *p; { dev_t device; + boolean_t funnel_state; + int error; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; @@ -129,16 +160,23 @@ consioctl(dev, cmd, addr, flag, p) * output from the "virtual" console. */ if (cmd == TIOCCONS && constty) { - int error = suser(p->p_ucred, (u_short *) NULL); - if (error) - return (error); + error = proc_suser(p); + if (error) { + goto out; + } constty = NULL; - return (0); + error = 0; + goto out; } - return ((*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p)); + error = (*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p); +out: + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ +/* called with funnel held */ int consselect(dev, flag, wql, p) dev_t dev; @@ -159,12 +197,18 @@ int cons_getc() { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_getc)(device)); + error = (*cdevsw[major(device)].d_getc)(device); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /*ARGSUSED*/ @@ -173,12 +217,18 @@ cons_putc(c) char c; { dev_t device; + boolean_t funnel_state; + int error; + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (constty) device = constty->t_dev; else device = cons.t_dev; - return ((*cdevsw[major(device)].d_putc)(device, c)); + error = (*cdevsw[major(device)].d_putc)(device, c); + thread_funnel_set(kernel_flock, funnel_state); + + return(error); } /* diff --git a/bsd/dev/ppc/cons.h b/bsd/dev/ppc/cons.h index 00d91a155..6da1b0ae7 100644 --- a/bsd/dev/ppc/cons.h +++ b/bsd/dev/ppc/cons.h @@ -25,10 +25,10 @@ struct consdev { char *cn_name; /* name of device in dev_name_list */ - int (*cn_probe)(); /* probe hardware and fill in consdev info */ - int (*cn_init)(); /* turn on as console */ - int (*cn_getc)(); /* kernel getchar interface */ - int (*cn_putc)(); /* kernel putchar interface */ + int (*cn_probe)(void); /* probe and fill in consdev info */ + int (*cn_init)(void); /* turn on as console */ + int (*cn_getc)(void); /* kernel getchar interface */ + int (*cn_putc)(void); /* kernel putchar interface */ struct tty *cn_tp; /* tty structure for console device */ dev_t cn_dev; /* major/minor of device */ short cn_pri; /* pecking order; the higher the better */ diff --git a/bsd/dev/ppc/kern_machdep.c b/bsd/dev/ppc/kern_machdep.c index ba6b61d50..9aefbe8ba 100644 --- a/bsd/dev/ppc/kern_machdep.c +++ b/bsd/dev/ppc/kern_machdep.c @@ -27,14 +27,6 @@ * Author: John Seamons * * Machine-specific kernel routines. - * - * HISTORY - * 8-Dec-91 Peter King (king) at NeXT - * Added grade_cpu_subtype(). - * FIXME: Do we want to merge this with check_cpu_subtype()? - * - * 5-Mar-90 John Seamons (jks) at NeXT - * Created. */ #include @@ -44,80 +36,189 @@ #include #include -int -check_cpu_subtype(cpu_subtype_t cpu_subtype) -{ - struct machine_slot *ms = &machine_slot[cpu_number()]; - - if (cpu_subtype == ms->cpu_subtype) - return (TRUE); - - switch (cpu_subtype) { - case CPU_SUBTYPE_POWERPC_970: - /* Do not allow a 970 binary to run on non-970 systems */ - if (ms->cpu_subtype != CPU_SUBTYPE_POWERPC_970) - break; - case CPU_SUBTYPE_POWERPC_7450: - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_750: - case CPU_SUBTYPE_POWERPC_ALL: - return (TRUE); - } - - return (FALSE); -} +int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); /* - * Routine: grade_cpu_subtype() + * Routine: grade_binary() * * Function: - * Return a relative preference for cpu_subtypes in fat executable files. - * The higher the grade, the higher the preference. + * Return a relative preference for exectypes and execsubtypes in fat + * executable files. The higher the grade, the higher the preference. * A grade of 0 means not acceptable. + * + * Note: We really don't care about the real cpu_type() here, + * because machines can only have one type. */ - int -grade_cpu_subtype(cpu_subtype_t cpu_subtype) +grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype) { - struct machine_slot *ms = &machine_slot[cpu_number()]; + int cpusubtype = cpu_subtype(); /* - * This code should match cpusubtype_findbestarch() in best_arch.c in the - * cctools project. As of 2/16/98 this is what has been agreed upon for - * the PowerPC subtypes. If an exact match is not found the subtype will - * be picked from the following order: + * This code should match cpusubtype_findbestarch() in best_arch.c + * in the cctools project. As of 2/16/98 this is what has been + * agreed upon for the PowerPC subtypes. If an exact match is not + * found the subtype will be picked from the following order: * 970(but only on 970), 7450, 7400, 750, ALL - * Note the 601 is NOT in the list above. It is only picked via an exact - * match. For details see Radar 2213821. - * - * To implement this function to follow what was agreed upon above, we use - * the fact there are currently 4 different subtypes. Exact matches return - * the value 6, and the values 5 thru 1 are returned for the - * subtypes listed in the order above. + * Note the 601 is NOT in the list above. It is only picked via + * an exact match. For details see Radar 2213821. */ - if (ms->cpu_subtype == cpu_subtype) - return 6; - switch (cpu_subtype) { - case CPU_SUBTYPE_POWERPC_970: - /* Do not allow a 970 binary to run on non-970 systems */ - if (ms->cpu_subtype != CPU_SUBTYPE_POWERPC_970) - break; - return 5; - case CPU_SUBTYPE_POWERPC_7450: - return 4; - case CPU_SUBTYPE_POWERPC_7400: - return 3; - case CPU_SUBTYPE_POWERPC_750: - return 2; - case CPU_SUBTYPE_POWERPC_ALL: - return 1; + + switch (cpusubtype) { + case CPU_SUBTYPE_POWERPC_970: + switch(exectype) { + case CPU_TYPE_POWERPC64: /* CPU_IS64BIT | CPU_POWERPC */ + switch(execsubtype) { + /* + * Prefer 64 bit architecture specific binaries; note + * that this value does not mean the same thing here + * as it does below. + */ + case CPU_SUBTYPE_POWERPC_970: + return 8; + /* Prefer generic binaries */ + case CPU_SUBTYPE_POWERPC_ALL: + return 7; + default: + return 0; + } + /* NOTREACHED */ + + case CPU_TYPE_POWERPC: + switch(execsubtype) { + /* + * Prefer 32 bit binaries with 64 bit leaf functions; + * this is actually bogus use of the subtype to encode + * CPU feature bits. + */ + case CPU_SUBTYPE_POWERPC_970: + return 6; + case CPU_SUBTYPE_POWERPC_7450: + return 4; + case CPU_SUBTYPE_POWERPC_7400: + return 3; + case CPU_SUBTYPE_POWERPC_750: + return 2; + case CPU_SUBTYPE_POWERPC_ALL: + return 1; + default: + return 0; + } + /* NOTREACHED */ + + default: + return 0; + } + /* NOTREACHED */ + + case CPU_SUBTYPE_POWERPC_7450: + switch(exectype) { + case CPU_TYPE_POWERPC64: /* CPU_IS64BIT | CPU_POWERPC */ + return 0; + + case CPU_TYPE_POWERPC: + switch(execsubtype) { + case CPU_SUBTYPE_POWERPC_7450: + return 6; + case CPU_SUBTYPE_POWERPC_7400: + return 4; + case CPU_SUBTYPE_POWERPC_750: + return 3; + case CPU_SUBTYPE_POWERPC_ALL: + return 1; + default: + return 0; + } + /* NOTREACHED */ + + default: + return 0; + } + /* NOTREACHED */ + + case CPU_SUBTYPE_POWERPC_7400: + switch(exectype) { + case CPU_TYPE_POWERPC64: /* CPU_IS64BIT | CPU_POWERPC */ + return 0; + + case CPU_TYPE_POWERPC: + switch(execsubtype) { + case CPU_SUBTYPE_POWERPC_7400: + return 6; + case CPU_SUBTYPE_POWERPC_7450: + return 4; + case CPU_SUBTYPE_POWERPC_750: + return 3; + case CPU_SUBTYPE_POWERPC_ALL: + return 1; + default: + return 0; + } + /* NOTREACHED */ + + default: + return 0; + } + /* NOTREACHED */ + + case CPU_SUBTYPE_POWERPC_750: + switch(exectype) { + case CPU_TYPE_POWERPC64: /* CPU_IS64BIT | CPU_POWERPC */ + return 0; + + case CPU_TYPE_POWERPC: + switch(execsubtype) { + case CPU_SUBTYPE_POWERPC_750: + return 6; +#ifndef ADDRESS_RADAR_2678019 + /* + * Currently implemented because dropping this would + * turn the executable subtype into a "has Altivec" + * flag, which we do not want to permit. It could + * also break working third party applications + * already in use in the field. + */ + case CPU_SUBTYPE_POWERPC_7400: + return 4; + case CPU_SUBTYPE_POWERPC_7450: + return 3; +#endif /* ADDRESS_RADAR_2678019 */ + case CPU_SUBTYPE_POWERPC_ALL: + return 1; + default: + return 0; + } + /* NOTREACHED */ + + default: + return 0; + } + /* NOTREACHED */ + + default: + switch(exectype) { + case CPU_TYPE_POWERPC64: /* CPU_IS64BIT | CPU_POWERPC */ + return 0; + + case CPU_TYPE_POWERPC: + /* Special case for PPC601 */ + if (cpusubtype == execsubtype) + return 6; + /* + * If we get here it is because it is a cpusubtype we + * don't support or a new cpusubtype that was added + * since this code was written. Both will be + * considered unacceptable. + */ + return 0; + /* NOTREACHED */ + + default: + return 0; + } + /* NOTREACHED */ } - /* - * If we get here it is because it is a cpusubtype we don't support - * or a new cpusubtype that was added since this code was written. Both - * will be considered unacceptable. - */ - return 0; + /* NOTREACHED */ } boolean_t @@ -140,3 +241,20 @@ kernacc( return (TRUE); } + +void +md_prepare_for_shutdown(int paniced, int howto, char * command); + +extern void IOSystemShutdownNotification(void); + +void +md_prepare_for_shutdown(__unused int paniced, __unused int howto, + __unused char * command) +{ + + /* + * Temporary hack to notify the power management root domain + * that the system will shut down. + */ + IOSystemShutdownNotification(); +} diff --git a/bsd/dev/ppc/km.c b/bsd/dev/ppc/km.c index cc6a8f009..db5d95169 100644 --- a/bsd/dev/ppc/km.c +++ b/bsd/dev/ppc/km.c @@ -26,7 +26,7 @@ * HISTORY */ -#include +#include #include #include @@ -44,7 +44,6 @@ /* * 'Global' variables, shared only by this file and conf.c. */ -extern struct tty cons; struct tty *km_tty[1] = { &cons }; /* @@ -53,10 +52,23 @@ struct tty *km_tty[1] = { &cons }; */ int disableConsoleOutput; -/* - * 'Global' variables, shared only by this file and kmDevice.m. - */ -int initialized = 0; +static int initialized = 0; + +// Function prototypes +extern d_open_t kmopen; +extern d_close_t kmclose; +extern d_read_t kmread; +extern d_write_t kmwrite; +extern d_ioctl_t kmioctl; +extern d_getc_t kmgetc; +extern d_putc_t kmputc; + +extern void kminit(void); + +// used by or implemented in the osfmk project +extern void cnputcusr(char); // From osfmk +extern int cngetc(void); // From osfmk +extern void cons_cinput(char ch); // Used by osfmk static int kmoutput(struct tty *tp); static void kmtimeout(struct tty *tp); @@ -64,7 +76,8 @@ static void kmstart(struct tty *tp); extern void KeyboardOpen(void); -int kminit() +void +kminit(void) { cons.t_dev = makedev(12, 0); initialized = 1; @@ -73,13 +86,8 @@ int kminit() * cdevsw interface to km driver. */ int -kmopen( - dev_t dev, - int flag, - int devtype, - struct proc *pp) +kmopen(dev_t dev, int flag, __unused int devtype, struct proc *pp) { - int rtn; int unit; struct tty *tp; struct winsize *wp; @@ -102,7 +110,7 @@ kmopen( tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; termioschars(&tp->t_termios); ttsetwater(tp); - } else if ((tp->t_state & TS_XCLUDE) && pp->p_ucred->cr_uid != 0) + } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) return EBUSY; tp->t_state |= TS_CARR_ON; /* lie and say carrier exists and is on. */ @@ -133,11 +141,8 @@ kmopen( } int -kmclose( - dev_t dev, - int flag, - int mode, - struct proc *p) +kmclose(__unused dev_t dev, __unused int flag, __unused int mode, + __unused struct proc *p) { struct tty *tp; @@ -149,10 +154,7 @@ kmclose( } int -kmread( - dev_t dev, - struct uio *uio, - int ioflag) +kmread(__unused dev_t dev, struct uio *uio, int ioflag) { register struct tty *tp; @@ -161,10 +163,7 @@ kmread( } int -kmwrite( - dev_t dev, - struct uio *uio, - int ioflag) +kmwrite(__unused dev_t dev, struct uio *uio, int ioflag) { register struct tty *tp; @@ -173,11 +172,7 @@ kmwrite( } int -kmioctl( - dev_t dev, - int cmd, - caddr_t data, - int flag, +kmioctl( __unused dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { int error; @@ -210,22 +205,14 @@ kmioctl( } default: error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); - if (error >= 0) { + if (ENOTTY != error) return error; - } - error = ttioctl (tp, cmd, data, flag, p); - if (error >= 0) { - return error; - } - else { - return ENOTTY; - } + return ttioctl (tp, cmd, data, flag, p); } } int -kmputc( - int c) +kmputc(__unused dev_t dev, char c) { if( disableConsoleOutput) @@ -243,8 +230,7 @@ kmputc( } int -kmgetc( - dev_t dev) +kmgetc(__unused dev_t dev) { int c; @@ -257,9 +243,10 @@ kmgetc( return c; } +#if 0 int kmgetc_silent( - dev_t dev) + __unused dev_t dev) { int c; @@ -269,6 +256,7 @@ kmgetc_silent( } return c; } +#endif /* 0 */ /* * Callouts from linesw. @@ -277,38 +265,23 @@ kmgetc_silent( #define KM_LOWAT_DELAY ((ns_time_t)1000) static void -kmstart( - struct tty *tp) +kmstart(struct tty *tp) { - extern int hz; if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) goto out; if (tp->t_outq.c_cc == 0) goto out; tp->t_state |= TS_BUSY; - if (tp->t_outq.c_cc > tp->t_lowat) { - /* - * Start immediately. - */ - kmoutput(tp); - } - else { - /* - * Wait a bit... - */ -#if 0 - /* FIXME */ - timeout(kmtimeout, tp, hz); -#else - kmoutput(tp); -#endif - } + kmoutput(tp); + return; + out: - ttwwakeup(tp); + (*linesw[tp->t_line].l_start)(tp); + return; } static void -kmtimeout( struct tty *tp) +kmtimeout(struct tty *tp) { boolean_t funnel_state; @@ -319,8 +292,7 @@ kmtimeout( struct tty *tp) } static int -kmoutput( - struct tty *tp) +kmoutput(struct tty *tp) { /* * FIXME - to be grokked...copied from m68k km.c. @@ -328,8 +300,6 @@ kmoutput( char buf[80]; char *cp; int cc = -1; - extern int hz; - while (tp->t_outq.c_cc > 0) { cc = ndqb(&tp->t_outq, 0); @@ -337,25 +307,22 @@ kmoutput( break; cc = min(cc, sizeof buf); (void) q_to_b(&tp->t_outq, buf, cc); - for (cp = buf; cp < &buf[cc]; cp++) { - kmputc(*cp & 0x7f); - } + for (cp = buf; cp < &buf[cc]; cp++) + kmputc(tp->t_dev, *cp & 0x7f); } if (tp->t_outq.c_cc > 0) { timeout((timeout_fcn_t)kmtimeout, tp, hz); } tp->t_state &= ~TS_BUSY; - ttwwakeup(tp); + (*linesw[tp->t_line].l_start)(tp); return 0; } -cons_cinput(char ch) + +void cons_cinput(char ch) { struct tty *tp = &cons; - boolean_t funnel_state; - (*linesw[tp->t_line].l_rint) (ch, tp); - } diff --git a/bsd/dev/ppc/mem.c b/bsd/dev/ppc/mem.c index 9c44dc246..4e7c8f8c3 100644 --- a/bsd/dev/ppc/mem.c +++ b/bsd/dev/ppc/mem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,10 +70,9 @@ #include #include #include -#include #include #include -#include +#include #include #include @@ -87,7 +86,14 @@ static caddr_t devzerobuf; extern pmap_t kernel_pmap; +extern boolean_t kernacc(off_t, size_t ); +int mmread(dev_t dev, struct uio *uio); +int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw); +int mmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); +int mmwrite(dev_t dev, struct uio *uio); + +int mmread(dev, uio) dev_t dev; struct uio *uio; @@ -96,6 +102,7 @@ mmread(dev, uio) return (mmrw(dev, uio, UIO_READ)); } +int mmwrite(dev, uio) dev_t dev; struct uio *uio; @@ -104,25 +111,41 @@ mmwrite(dev, uio) return (mmrw(dev, uio, UIO_WRITE)); } +int +mmioctl(__unused dev_t dev, u_long cmd, __unused caddr_t data, + __unused int flag, __unused struct proc *p) +{ + switch (cmd) { + case FIONBIO: + case FIOASYNC: + /* OK to do nothing: we always return immediately */ + break; + default: + return ENODEV; + } + + return (0); +} + +int mmrw(dev, uio, rw) dev_t dev; struct uio *uio; enum uio_rw rw; { register int o; - register u_int c, v; +#if LP64KERN + register uint64_t c; +#else + register uint c; +#endif addr64_t vll; - register struct iovec *iov; int error = 0; vm_offset_t where; - int spl; - vm_size_t size; - extern boolean_t kernacc(off_t, size_t ); - - while (uio->uio_resid > 0 && error == 0) { - iov = uio->uio_iov; - if (iov->iov_len == 0) { - uio->uio_iov++; + + while (uio_resid(uio) > 0 && error == 0) { + if (uio_iov_len(uio) == 0) { + uio_next_iov(uio); uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("mmrw"); @@ -157,7 +180,8 @@ mmrw(dev, uio, rw) } } o = uio->uio_offset - vll; - c = min(PAGE_SIZE - o, (u_int)iov->iov_len); + // LP64todo - fix this! + c = min(PAGE_SIZE - o, uio_iov_len(uio)); error = uiomove((caddr_t)(where + o), c, uio); if(dgWork.dgFlags & enaDiagDM) (void)mapping_remove(kernel_pmap, (addr64_t)where); /* Unmap it */ @@ -170,17 +194,17 @@ mmrw(dev, uio, rw) if (((addr64_t)uio->uio_offset > vm_last_addr) || ((addr64_t)uio->uio_offset < VM_MIN_KERNEL_ADDRESS)) goto fault; - c = iov->iov_len; + c = uio_iov_len(uio); if (!kernacc(uio->uio_offset, c)) goto fault; - error = uiomove64(uio->uio_offset, (int)c, uio); + error = uiomove64(uio->uio_offset, c, uio); continue; /* minor device 2 is EOF/RATHOLE */ case 2: if (rw == UIO_READ) return (0); - c = iov->iov_len; + c = uio_iov_len(uio); break; /* minor device 3 is ZERO/RATHOLE */ case 3: @@ -189,11 +213,12 @@ mmrw(dev, uio, rw) bzero(devzerobuf, PAGE_SIZE); } if(uio->uio_rw == UIO_WRITE) { - c = iov->iov_len; + c = uio_iov_len(uio); break; } - c = min(iov->iov_len, PAGE_SIZE); - error = uiomove(devzerobuf, (int)c, uio); + // LP64todo - fix this! + c = min(uio_iov_len(uio), PAGE_SIZE); + error = uiomove(devzerobuf, c, uio); continue; default: goto fault; @@ -202,10 +227,15 @@ mmrw(dev, uio, rw) if (error) break; - iov->iov_base += c; - iov->iov_len -= c; + uio_iov_base_add(uio, c); uio->uio_offset += c; - uio->uio_resid -= c; +#if LP64KERN + uio_setresid(uio, (uio_resid(uio) - c)); + uio_iov_len_add(uio, -((int64_t)c)); +#else + uio_setresid(uio, (uio_resid(uio) - c)); + uio_iov_len_add(uio, -((int)c)); +#endif } return (error); fault: diff --git a/bsd/dev/ppc/memmove.c b/bsd/dev/ppc/memmove.c index e36599aa8..c9a091bb1 100644 --- a/bsd/dev/ppc/memmove.c +++ b/bsd/dev/ppc/memmove.c @@ -36,11 +36,13 @@ void *memcpy(void *dst, const void *src, unsigned int ulen) bcopy(src, dst, ulen); return dst; } -#endif /* 0 */ + void *memmove(void *dst, const void *src, unsigned int ulen) { bcopy(src, dst, ulen); return dst; } +#endif /* 0 */ + diff --git a/bsd/dev/ppc/munge.s b/bsd/dev/ppc/munge.s new file mode 100644 index 000000000..6c835dddd --- /dev/null +++ b/bsd/dev/ppc/munge.s @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_EN + */ + +/* + * Syscall argument mungers. + * + * Passed a pointer to the users register array in the savearea, we copy args into + * the uu_arg[] array, padding etc as appropriate. The issue is that parameters + * passed in registers from a 32-bit address space do not map directly into the uu_args. + * For example, a 32-bit long-long comes in two registers, but we need to combine + * them into one 64-bit long-long in the uu_args. + * + * There are several functions in this file. Each takes two parameters: + * + * void munge_XXXX( const void *regs, void *uu_args); + * + * The name of the function encodes the number and type of the parameters, as follows: + * + * w = a 32-bit value such as an int or a 32-bit ptr, that does not require + * sign extension. These are handled by skipping a word in the input, + * zeroing a word of output, and copying a word from input to output. + * + * s = a 32-bit value such as a long, which must be sign-extended to a 64-bit + * long-long in the uu_args. These are handled by skipping a word of + * input, loading a word of input and sign extending it to a double, + * and storing two words of output. + * + * l = a 64-bit long-long, passed in two registers. These are handled by skipping + * a word of input, copying a word, skipping another word of input, and + * copying another word. + * + * d = a 32-bit int or a 64-bit ptr or long, passed in via a 64-bit GPR + * from a 64-bit process. We copy two words from input to output. + * + * For example, "munge_wls" takes a word, a long-long, and a word. This takes + * four registers: the first word is in one, the long-long takes two, and the + * final word is in the fourth. We store six words: a 0, the low words of the + * first three registers, and the two words resulting from sign-extending the + * low word of the fourth register. + * + * As you can see, we save a lot of code by collapsing mungers that are prefixes + * of each other, into the more general routine. This ends up copying a few extra + * bytes of parameters, but big deal. The old kernel copied all eight words for + * every system call. + * + * These routines assume explicit pad words in the uu_arg structures, that fill out + * int parameters to 64 bits. Having pad words makes munging args for 64-bit + * processes the equivalent of a simple bcopy(), though it does introduce an + * endian dependency. + */ + + .align 5 + .globl _munge_dddddddd // that is 8 'd's +_munge_dddddddd: + .globl _munge_ddddddd +_munge_ddddddd: + .globl _munge_dddddd +_munge_dddddd: + .globl _munge_ddddd +_munge_ddddd: + ld r5,0*8+0(r3) + ld r6,1*8+0(r3) + ld r7,2*8+0(r3) + ld r8,3*8+0(r3) + ld r9,4*8+0(r3) + ld r10,5*8+0(r3) + ld r11,6*8+0(r3) + ld r12,7*8+0(r3) + + std r5,0*8+0(r4) + std r6,1*8+0(r4) + std r7,2*8+0(r4) + std r8,3*8+0(r4) + std r9,4*8+0(r4) + std r10,5*8+0(r4) + std r11,6*8+0(r4) + std r12,7*8+0(r4) + + blr + + + .align 5 + .globl _munge_dddd +_munge_dddd: + .globl _munge_ddd +_munge_ddd: + .globl _munge_dd +_munge_dd: + .globl _munge_d +_munge_d: + ld r5,0*8+0(r3) + ld r6,1*8+0(r3) + ld r7,2*8+0(r3) + ld r8,3*8+0(r3) + + std r5,0*8+0(r4) + std r6,1*8+0(r4) + std r7,2*8+0(r4) + std r8,3*8+0(r4) + + blr + + + .align 5 + .globl _munge_wwwwwwww // that is 8 'w's +_munge_wwwwwwww: + .globl _munge_wwwwwww +_munge_wwwwwww: + .globl _munge_wwwwww +_munge_wwwwww: + .globl _munge_wwwww +_munge_wwwww: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + lwz r10,5*8+4(r3) + lwz r11,6*8+4(r3) + lwz r12,7*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r0,3*8+0(r4) + stw r8,3*8+4(r4) + stw r0,4*8+0(r4) + stw r9,4*8+4(r4) + stw r0,5*8+0(r4) + stw r10,5*8+4(r4) + stw r0,6*8+0(r4) + stw r11,6*8+4(r4) + stw r0,7*8+0(r4) + stw r12,7*8+4(r4) + + blr + + + .align 5 + .globl _munge_wwww +_munge_wwww: + .globl _munge_www +_munge_www: + .globl _munge_ww +_munge_ww: + .globl _munge_w +_munge_w: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r0,3*8+0(r4) + stw r8,3*8+4(r4) + + blr + + .align 5 + .globl _munge_l +_munge_l: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + + stw r5,0*8+0(r4) + stw r6,0*8+4(r4) + + blr + + .align 5 + .globl _munge_wlw +_munge_wlw: + .globl _munge_wl +_munge_wl: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r6,1*8+0(r4) + stw r7,1*8+4(r4) + stw r0,2*8+0(r4) + stw r8,2*8+4(r4) + + blr + + + .align 5 + .globl _munge_wwwl +_munge_wwwl: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r8,3*8+0(r4) + stw r9,3*8+4(r4) + + blr + + + .align 5 + .globl _munge_wwwwl // 4 'w's and an l +_munge_wwwwl: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + lwz r10,5*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r0,3*8+0(r4) + stw r8,3*8+4(r4) + stw r9,4*8+0(r4) + stw r10,4*8+4(r4) + + blr + + + .align 5 + .globl _munge_wwwwwl // 5 'w's and an l +_munge_wwwwwl: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + lwz r10,5*8+4(r3) + lwz r11,6*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r0,3*8+0(r4) + stw r8,3*8+4(r4) + stw r0,4*8+0(r4) + stw r9,4*8+4(r4) + stw r10,5*8+0(r4) + stw r11,5*8+4(r4) + + blr + + + .align 5 + .globl _munge_wsw +_munge_wsw: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + + stw r0,0*8+0(r4) + srawi r2,r6,31 + stw r5,0*8+4(r4) + stw r2,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + + blr + + + .align 5 + .globl _munge_wws +_munge_wws: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + srawi r2,r7,31 + stw r6,1*8+4(r4) + stw r2,2*8+0(r4) + stw r7,2*8+4(r4) + + blr + + + .align 5 + .globl _munge_wwwsw +_munge_wwwsw: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + srawi r2,r8,31 + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r2,3*8+0(r4) + stw r8,3*8+4(r4) + stw r0,4*8+0(r4) + stw r9,4*8+4(r4) + + blr diff --git a/bsd/dev/ppc/nvram.c b/bsd/dev/ppc/nvram.c index d4900147f..bf466872a 100644 --- a/bsd/dev/ppc/nvram.c +++ b/bsd/dev/ppc/nvram.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,7 +68,7 @@ nvread(dev, uio, ioflag) int error = 0; offset = uio->uio_offset; - size = uio->uio_resid; + size = uio_resid(uio); for (read = 0; read < size; read++, offset++) { error = PEnvread(offset, 1, &cc); @@ -88,7 +88,6 @@ nvread(dev, uio, ioflag) nvwrite(dev_t dev, struct uio *uio, int ioflag) { - register struct iovec *iov; long offset; long size; int c; @@ -97,7 +96,7 @@ nvwrite(dev_t dev, struct uio *uio, int ioflag) int error = 0; offset = uio->uio_offset; - size = uio->uio_resid; + size = uio_resid(uio); for (wrote = 0; wrote < size; wrote++, offset++) { c = uwritec(uio); diff --git a/bsd/dev/ppc/stubs.c b/bsd/dev/ppc/stubs.c index 27bf27f71..3f0df507d 100644 --- a/bsd/dev/ppc/stubs.c +++ b/bsd/dev/ppc/stubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,14 +27,12 @@ #include #include -#include #include #include #include #include #include #include -#include #include #include @@ -51,13 +49,10 @@ */ /* from ppc/fault_copy.c -Titan1T4 VERSION */ int -copystr(vfrom, vto, maxlen, lencopied) - register void * vfrom, *vto; - size_t maxlen, *lencopied; +copystr(const void *vfrom, void *vto, size_t maxlen, size_t *lencopied) { register unsigned l; - int error; -caddr_t from, to; + caddr_t from, to; from = vfrom; to = vto; @@ -80,44 +75,6 @@ size_t count; return 0; } -struct unix_syscallargs { - int flavor; - int r3; - int arg1, arg2,arg3,arg4,arg5,arg6,arg7; -}; - -set_bsduthreadargs(thread_t th, void * pcb, struct unix_syscallargs * sarg) -{ -struct uthread * ut; - - ut = get_bsdthread_info(th); - ut->uu_ar0 = (int *)pcb; - - if (sarg->flavor) - { - ut->uu_arg[0] = sarg->arg1; - ut->uu_arg[1] = sarg->arg2; - ut->uu_arg[2] = sarg->arg3; - ut->uu_arg[3] = sarg->arg4; - ut->uu_arg[4] = sarg->arg5; - ut->uu_arg[5] = sarg->arg6; - ut->uu_arg[7] = sarg->arg7; - } - else - { - ut->uu_arg[0] = sarg->r3; - ut->uu_arg[1] = sarg->arg1; - ut->uu_arg[2] = sarg->arg2; - ut->uu_arg[3] = sarg->arg3; - ut->uu_arg[4] = sarg->arg4; - ut->uu_arg[5] = sarg->arg5; - ut->uu_arg[6] = sarg->arg6; - ut->uu_arg[7] = sarg->arg7; - } - - return(1); -} - void * get_bsduthreadarg(thread_t th) { @@ -127,7 +84,7 @@ struct uthread *ut; } int * -get_bsduthreadrval(thread_act_t th) +get_bsduthreadrval(thread_t th) { struct uthread *ut; ut = get_bsdthread_info(th); diff --git a/bsd/dev/ppc/systemcalls.c b/bsd/dev/ppc/systemcalls.c index 79dcb99f5..a20314a54 100644 --- a/bsd/dev/ppc/systemcalls.c +++ b/bsd/dev/ppc/systemcalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,32 +22,36 @@ #include #include -#include #include #include +#include +#include #include #include #include #include -#include +#include #include #include #include #include #include #include +#include +#include +#include #include extern void -unix_syscall( - struct savearea *regs -); +unix_syscall(struct savearea *regs); +void +unix_syscall_return(int error); extern struct savearea * find_user_regs( - thread_act_t act); + thread_t act); extern void enter_funnel_section(funnel_t *funnel_lock); extern void exit_funnel_section(void); @@ -60,11 +64,9 @@ extern void exit_funnel_section(void); * Outputs: none */ void -unix_syscall( - struct savearea *regs -) +unix_syscall(struct savearea *regs) { - thread_act_t thread_act; + thread_t thread_act; struct uthread *uthread; struct proc *proc; struct sysent *callp; @@ -72,8 +74,9 @@ unix_syscall( unsigned short code; boolean_t flavor; int funnel_type; + unsigned int cancel_enable; - flavor = (((unsigned int)regs->save_r0) == NULL)? 1: 0; + flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0; if (flavor) code = regs->save_r3; @@ -88,49 +91,83 @@ unix_syscall( KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, regs->save_r3, regs->save_r4, regs->save_r5, regs->save_r6, 0); } - thread_act = current_act(); + thread_act = current_thread(); uthread = get_bsdthread_info(thread_act); - if (!(uthread->uu_flag & P_VFORK)) + if (!(uthread->uu_flag & UT_VFORK)) proc = (struct proc *)get_bsdtask_info(current_task()); else proc = current_proc(); + /* + * Delayed binding of thread credential to process credential, if we + * are not running with an explicitly set thread credential. + */ + if (uthread->uu_ucred != proc->p_ucred && + (uthread->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = uthread->uu_ucred; + proc_lock(proc); + uthread->uu_ucred = proc->p_ucred; + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(proc); + if (old != NOCRED) + kauth_cred_rele(old); + } + uthread->uu_ar0 = (int *)regs; callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; -#ifdef DEBUG - if (callp->sy_narg > 8) - panic("unix_syscall: max arg count exceeded"); -#endif - if (callp->sy_narg != 0) { + void *regsp; + sy_munge_t *mungerp; + + if (IS_64BIT_PROCESS(proc)) { + /* XXX Turn 64 bit unsafe calls into nosys() */ + if (callp->sy_funnel & UNSAFE_64BIT) { + callp = &sysent[63]; + goto unsafe; + } + mungerp = callp->sy_arg_munge64; + } + else { + mungerp = callp->sy_arg_munge32; + } if ( !flavor) { - uthread->uu_arg[0] = regs->save_r3; - uthread->uu_arg[1] = regs->save_r4; - uthread->uu_arg[2] = regs->save_r5; - uthread->uu_arg[3] = regs->save_r6; - uthread->uu_arg[4] = regs->save_r7; - uthread->uu_arg[5] = regs->save_r8; - uthread->uu_arg[6] = regs->save_r9; - uthread->uu_arg[7] = regs->save_r10; + regsp = (void *) ®s->save_r3; } else { - uthread->uu_arg[0] = regs->save_r4; - uthread->uu_arg[1] = regs->save_r5; - uthread->uu_arg[2] = regs->save_r6; - uthread->uu_arg[3] = regs->save_r7; - uthread->uu_arg[4] = regs->save_r8; - uthread->uu_arg[5] = regs->save_r9; - uthread->uu_arg[7] = regs->save_r10; + /* indirect system call consumes an argument so only 7 are supported */ + if (callp->sy_narg > 7) { + callp = &sysent[63]; + goto unsafe; + } + regsp = (void *) ®s->save_r4; } + /* call syscall argument munger to copy in arguments (see xnu/bsd/dev/ppc/munge.s) */ + (*mungerp)(regsp, (void *) &uthread->uu_arg[0]); } - funnel_type = (int)callp->sy_funnel; +unsafe: + cancel_enable = callp->sy_cancel; + + if (cancel_enable == _SYSCALL_CANCEL_NONE) { + uthread->uu_flag |= UT_NOTCANCELPT; + } else { + if((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + if (cancel_enable == _SYSCALL_CANCEL_PRE) { + /* system call cancelled; return to handle cancellation */ + regs->save_r3 = (long long)EINTR; + thread_exception_return(); + /* NOTREACHED */ + } else { + thread_abort_safely(thread_act); + } + } + } + + funnel_type = (int)(callp->sy_funnel & FUNNEL_MASK); if (funnel_type == KERNEL_FUNNEL) enter_funnel_section(kernel_flock); - else if (funnel_type == NETWORK_FUNNEL) - enter_funnel_section(network_flock); uthread->uu_rval[0] = 0; @@ -150,12 +187,20 @@ unix_syscall( regs->save_srr0 += 4; if (KTRPOINT(proc, KTR_SYSCALL)) - ktrsyscall(proc, code, callp->sy_narg, uthread->uu_arg, funnel_type); + ktrsyscall(proc, code, callp->sy_narg, uthread->uu_arg); +#ifdef JOE_DEBUG + uthread->uu_iocount = 0; + uthread->uu_vpindex = 0; +#endif AUDIT_SYSCALL_ENTER(code, proc, uthread); error = (*(callp->sy_call))(proc, (void *)uthread->uu_arg, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(error, proc, uthread); +#ifdef JOE_DEBUG + if (uthread->uu_iocount) + joe_debug("system call returned with uu_iocount != 0"); +#endif regs = find_user_regs(thread_act); if (error == ERESTART) { @@ -166,42 +211,124 @@ unix_syscall( /* set the "pc" to execute cerror routine */ regs->save_srr0 -= 4; } else { /* (not error) */ - regs->save_r3 = uthread->uu_rval[0]; - regs->save_r4 = uthread->uu_rval[1]; + switch (callp->sy_return_type) { + case _SYSCALL_RET_INT_T: + regs->save_r3 = uthread->uu_rval[0]; + regs->save_r4 = uthread->uu_rval[1]; + break; + case _SYSCALL_RET_UINT_T: + regs->save_r3 = ((u_int)uthread->uu_rval[0]); + regs->save_r4 = ((u_int)uthread->uu_rval[1]); + break; + case _SYSCALL_RET_OFF_T: + /* off_t returns 64 bits split across two registers for 32 bit */ + /* process and in one register for 64 bit process */ + if (IS_64BIT_PROCESS(proc)) { + u_int64_t *retp = (u_int64_t *)&uthread->uu_rval[0]; + regs->save_r3 = *retp; + regs->save_r4 = 0; + } + else { + regs->save_r3 = uthread->uu_rval[0]; + regs->save_r4 = uthread->uu_rval[1]; + } + break; + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + /* the variable length return types (user_addr_t, user_ssize_t, + * and user_size_t) are always the largest possible size in the + * kernel (we use uu_rval[0] and [1] as one 64 bit value). + */ + { + user_addr_t *retp = (user_addr_t *)&uthread->uu_rval[0]; + regs->save_r3 = *retp; + regs->save_r4 = 0; + } + break; + case _SYSCALL_RET_NONE: + break; + default: + panic("unix_syscall: unknown return type"); + break; + } } } /* else (error == EJUSTRETURN) { nothing } */ - if (KTRPOINT(proc, KTR_SYSRET)) - ktrsysret(proc, code, error, uthread->uu_rval[0], funnel_type); - exit_funnel_section(); + if (KTRPOINT(proc, KTR_SYSRET)) { + switch(callp->sy_return_type) { + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + /* + * Trace the value of the least significant bits, + * until we can revise the ktrace API safely. + */ + ktrsysret(proc, code, error, uthread->uu_rval[1]); + break; + default: + ktrsysret(proc, code, error, uthread->uu_rval[0]); + break; + } + } + if (cancel_enable == _SYSCALL_CANCEL_NONE) + uthread->uu_flag &= ~UT_NOTCANCELPT; + + exit_funnel_section(); + + if (uthread->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(uthread->uu_lowpri_delay); + uthread->uu_lowpri_delay = 0; + } if (kdebug_enable && (code != 180)) { - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + + if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T) + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[1], 0, 0, 0); + else + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); } thread_exception_return(); /* NOTREACHED */ } -unix_syscall_return(error) +void +unix_syscall_return(int error) { - thread_act_t thread_act; + thread_t thread_act; struct uthread *uthread; struct proc *proc; struct savearea *regs; unsigned short code; struct sysent *callp; int funnel_type; + unsigned int cancel_enable; - thread_act = current_act(); + thread_act = current_thread(); proc = current_proc(); uthread = get_bsdthread_info(thread_act); regs = find_user_regs(thread_act); + if (regs->save_r0 != 0) + code = regs->save_r0; + else + code = regs->save_r3; + + callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + /* * Get index into sysent table */ @@ -213,29 +340,91 @@ unix_syscall_return(error) /* set the "pc" to execute cerror routine */ regs->save_srr0 -= 4; } else { /* (not error) */ - regs->save_r3 = uthread->uu_rval[0]; - regs->save_r4 = uthread->uu_rval[1]; + switch (callp->sy_return_type) { + case _SYSCALL_RET_INT_T: + regs->save_r3 = uthread->uu_rval[0]; + regs->save_r4 = uthread->uu_rval[1]; + break; + case _SYSCALL_RET_UINT_T: + regs->save_r3 = ((u_int)uthread->uu_rval[0]); + regs->save_r4 = ((u_int)uthread->uu_rval[1]); + break; + case _SYSCALL_RET_OFF_T: + /* off_t returns 64 bits split across two registers for 32 bit */ + /* process and in one register for 64 bit process */ + if (IS_64BIT_PROCESS(proc)) { + u_int64_t *retp = (u_int64_t *)&uthread->uu_rval[0]; + regs->save_r3 = *retp; + } + else { + regs->save_r3 = uthread->uu_rval[0]; + regs->save_r4 = uthread->uu_rval[1]; + } + break; + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + /* the variable length return types (user_addr_t, user_ssize_t, + * and user_size_t) are always the largest possible size in the + * kernel (we use uu_rval[0] and [1] as one 64 bit value). + */ + { + u_int64_t *retp = (u_int64_t *)&uthread->uu_rval[0]; + regs->save_r3 = *retp; + } + break; + case _SYSCALL_RET_NONE: + break; + default: + panic("unix_syscall: unknown return type"); + break; + } } } /* else (error == EJUSTRETURN) { nothing } */ - if (regs->save_r0 != NULL) - code = regs->save_r0; - else - code = regs->save_r3; - - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + if (KTRPOINT(proc, KTR_SYSRET)) { + switch(callp->sy_return_type) { + case _SYSCALL_RET_ADDR_T: + case _SYSCALL_RET_SIZE_T: + case _SYSCALL_RET_SSIZE_T: + /* + * Trace the value of the least significant bits, + * until we can revise the ktrace API safely. + */ + ktrsysret(proc, code, error, uthread->uu_rval[1]); + break; + default: + ktrsysret(proc, code, error, uthread->uu_rval[0]); + break; + } + } - funnel_type = (int)callp->sy_funnel; + cancel_enable = callp->sy_cancel; - if (KTRPOINT(proc, KTR_SYSRET)) - ktrsysret(proc, code, error, uthread->uu_rval[0], funnel_type); + if (cancel_enable == _SYSCALL_CANCEL_NONE) + uthread->uu_flag &= ~UT_NOTCANCELPT; - exit_funnel_section(); + exit_funnel_section(); + if (uthread->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(uthread->uu_lowpri_delay); + uthread->uu_lowpri_delay = 0; + } if (kdebug_enable && (code != 180)) { - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T) + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[1], 0, 0, 0); + else + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); } thread_exception_return(); @@ -251,35 +440,81 @@ unix_syscall_return(error) * and decrementing interval timers, optionally reloading the interval * timers when they expire. */ -struct gettimeofday_args{ - struct timeval *tp; - struct timezone *tzp; -}; /* NOTE THIS implementation is for ppc architectures only. * It is infrequently called, since the commpage intercepts * most calls in user mode. + * + * XXX Y2038 bug because of assumed return of 32 bit seconds value, and + * XXX first parameter to clock_gettimeofday() */ int -ppc_gettimeofday(p, uap, retval) - struct proc *p; - register struct gettimeofday_args *uap; - register_t *retval; +ppc_gettimeofday(__unused struct proc *p, + register struct ppc_gettimeofday_args *uap, + register_t *retval) { int error = 0; + extern lck_spin_t * tz_slock; if (uap->tp) clock_gettimeofday(&retval[0], &retval[1]); if (uap->tzp) { struct timezone ltz; - extern simple_lock_data_t tz_slock; - usimple_lock(&tz_slock); + lck_spin_lock(tz_slock); ltz = tz; - usimple_unlock(&tz_slock); - error = copyout((caddr_t)<z, (caddr_t)uap->tzp, sizeof (tz)); + lck_spin_unlock(tz_slock); + error = copyout((caddr_t)<z, uap->tzp, sizeof (tz)); } return (error); } +#ifdef JOE_DEBUG +joe_debug(char *p) { + + printf("%s\n", p); +} +#endif + + +/* + * WARNING - this is a temporary workaround for binary compatibility issues + * with anti-piracy software that relies on patching ptrace (3928003). + * This KPI will be removed in the system release after Tiger. + */ +uintptr_t temp_patch_ptrace(uintptr_t new_ptrace) +{ + struct sysent * callp; + sy_call_t * old_ptrace; + + if (new_ptrace == 0) + return(0); + + enter_funnel_section(kernel_flock); + callp = &sysent[26]; + old_ptrace = callp->sy_call; + + /* only allow one patcher of ptrace */ + if (old_ptrace == (sy_call_t *) ptrace) { + callp->sy_call = (sy_call_t *) new_ptrace; + } + else { + old_ptrace = NULL; + } + exit_funnel_section( ); + + return((uintptr_t)old_ptrace); +} + +void temp_unpatch_ptrace(void) +{ + struct sysent * callp; + + enter_funnel_section(kernel_flock); + callp = &sysent[26]; + callp->sy_call = (sy_call_t *) ptrace; + exit_funnel_section( ); + + return; +} diff --git a/bsd/dev/ppc/unix_signal.c b/bsd/dev/ppc/unix_signal.c index 501bc87e8..75a700d51 100644 --- a/bsd/dev/ppc/unix_signal.c +++ b/bsd/dev/ppc/unix_signal.c @@ -27,24 +27,48 @@ #include #include -#include +#include #include #include +#include +#include +#include #include #include #include #include #include -#include #include #include -#define C_REDZONE_LEN 224 -#define C_STK_ALIGN 16 -#define C_PARAMSAVE_LEN 64 -#define C_LINKAGE_LEN 48 -#define TRUNC_DOWN(a,b,c) (((((unsigned)a)-(b))/(c)) * (c)) +// #include XXX include path messed up for some reason... + +/* XXX functions not in a Mach headers */ +extern kern_return_t thread_getstatus(register thread_t act, int flavor, + thread_state_t tstate, mach_msg_type_number_t *count); +extern int is_64signalregset(void); +extern unsigned int get_msr_exportmask(void); +extern kern_return_t thread_setstatus(thread_t thread, int flavor, + thread_state_t tstate, mach_msg_type_number_t count); +extern void ppc_checkthreadstate(void *, int); +extern struct savearea_vec *find_user_vec_curr(void); +extern int thread_enable_fpe(thread_t act, int onoff); + + + +#define C_32_REDZONE_LEN 224 +#define C_32_STK_ALIGN 16 +#define C_32_PARAMSAVE_LEN 64 +#define C_32_LINKAGE_LEN 48 + +#define C_64_REDZONE_LEN 320 +#define C_64_STK_ALIGN 32 +#define C_64_PARAMSAVE_LEN 64 +#define C_64_LINKAGE_LEN 48 + +#define TRUNC_DOWN32(a,b,c) ((((uint32_t)a)-(b)) & ((uint32_t)(-(c)))) +#define TRUNC_DOWN64(a,b,c) ((((uint64_t)a)-(b)) & ((uint64_t)(-(c)))) /* * The stack layout possibilities (info style); This needs to mach with signal trampoline code @@ -82,42 +106,99 @@ #define UC_FLAVOR64_VEC_SIZE ((PPC_THREAD_STATE64_COUNT + PPC_EXCEPTION_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int)) +/* + * NOTE: Source and target may *NOT* overlap! + */ +static void +ucontext_32to64(struct ucontext64 *in, struct user_ucontext64 *out) +{ + out->uc_onstack = in->uc_onstack; + out->uc_sigmask = in->uc_sigmask; + + /* internal "structure assign" */ + out->uc_stack.ss_sp = CAST_USER_ADDR_T(in->uc_stack.ss_sp); + out->uc_stack.ss_size = in->uc_stack.ss_size; + out->uc_stack.ss_flags = in->uc_stack.ss_flags; + + out->uc_link = CAST_USER_ADDR_T(in->uc_link); + out->uc_mcsize = in->uc_mcsize; + out->uc_mcontext64 = CAST_USER_ADDR_T(in->uc_mcontext64); +} + +/* + * This conversion is safe, since if we are converting for a 32 bit process, + * then it's values of uc-stack.ss_size and uc_mcsize will never exceed 4G. + * + * NOTE: Source and target may *NOT* overlap! + */ +static void +ucontext_64to32(struct user_ucontext64 *in, struct ucontext64 *out) +{ + out->uc_onstack = in->uc_onstack; + out->uc_sigmask = in->uc_sigmask; + + /* internal "structure assign" */ + out->uc_stack.ss_sp = CAST_DOWN(void *,in->uc_stack.ss_sp); + out->uc_stack.ss_size = in->uc_stack.ss_size; /* range reduction */ + out->uc_stack.ss_flags = in->uc_stack.ss_flags; + + out->uc_link = CAST_DOWN(void *,in->uc_link); + out->uc_mcsize = in->uc_mcsize; /* range reduction */ + out->uc_mcontext64 = CAST_DOWN(void *,in->uc_mcontext64); +} + +/* + * NOTE: Source and target may *NOT* overlap! + */ +static void +siginfo_64to32(user_siginfo_t *in, siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = CAST_DOWN(void *,in->si_addr); + /* following cast works for sival_int because of padding */ + out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr); + out->si_band = in->si_band; /* range reduction */ + out->pad[0] = in->pad[0]; /* mcontext.ss.r1 */ +} + + /* * Arrange for this process to run a signal handler */ void -sendsig(p, catcher, sig, mask, code) - struct proc *p; - sig_t catcher; - int sig, mask; - u_long code; +sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long code) { kern_return_t kretn; - struct mcontext mctx, *p_mctx; - struct mcontext64 mctx64, *p_mctx64; - struct ucontext uctx, *p_uctx; - siginfo_t sinfo, *p_sinfo; + struct mcontext mctx; + user_addr_t p_mctx = USER_ADDR_NULL; /* mcontext dest. */ + struct mcontext64 mctx64; + user_addr_t p_mctx64 = USER_ADDR_NULL; /* mcontext dest. */ + struct user_ucontext64 uctx; + user_addr_t p_uctx; /* user stack addr top copy ucontext */ + user_siginfo_t sinfo; + user_addr_t p_sinfo; /* user stack addr top copy siginfo */ struct sigacts *ps = p->p_sigacts; - int framesize; int oonstack; - unsigned long sp; - unsigned long state_count; - thread_act_t th_act; + user_addr_t sp; + mach_msg_type_number_t state_count; + thread_t th_act; struct uthread *ut; - unsigned long paramp,linkp; int infostyle = UC_TRAD; int dualcontext =0; - sig_t trampact; + user_addr_t trampact; int vec_used = 0; int stack_size = 0; - int stack_flags = 0; void * tstate; int flavor; int ctx32 = 1; - int is_64signalregset(void); - th_act = current_act(); + th_act = current_thread(); ut = get_bsdthread_info(th_act); @@ -128,15 +209,21 @@ sendsig(p, catcher, sig, mask, code) dualcontext = 1; infostyle = UC_DUAL; } - if (p->p_sigacts->ps_64regset & sigmask(sig)) { + if (p->p_sigacts->ps_64regset & sigmask(sig)) { dualcontext = 0; ctx32 = 0; infostyle = UC_FLAVOR64; } - if (is_64signalregset() && (infostyle == UC_TRAD)) { + /* treat 64 bit processes as having used 64 bit registers */ + if ((IS_64BIT_PROCESS(p) || is_64signalregset()) && + (infostyle == UC_TRAD)) { ctx32=0; infostyle = UC_TRAD64; - } + } + if (IS_64BIT_PROCESS(p)) { + ctx32=0; + dualcontext = 0; + } /* I need this for SIGINFO anyway */ flavor = PPC_THREAD_STATE; @@ -199,6 +286,7 @@ sendsig(p, catcher, sig, mask, code) state_count = PPC_VECTOR_STATE_COUNT; if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count) != KERN_SUCCESS) goto bad; + infostyle += 5; } if ((ctx32 == 0) || dualcontext) { @@ -207,8 +295,8 @@ sendsig(p, catcher, sig, mask, code) state_count = PPC_VECTOR_STATE_COUNT; if (thread_getstatus(th_act, flavor, (thread_state_t)tstate, &state_count) != KERN_SUCCESS) goto bad; + infostyle += 5; } - infostyle += 5; } trampact = ps->ps_trampact[sig]; @@ -217,54 +305,69 @@ sendsig(p, catcher, sig, mask, code) /* figure out where our new stack lives */ if ((ps->ps_flags & SAS_ALTSTACK) && !oonstack && (ps->ps_sigonstack & sigmask(sig))) { - sp = (unsigned long)(ps->ps_sigstk.ss_sp); + sp = ps->ps_sigstk.ss_sp; sp += ps->ps_sigstk.ss_size; stack_size = ps->ps_sigstk.ss_size; ps->ps_sigstk.ss_flags |= SA_ONSTACK; } else { if (ctx32 == 0) - sp = (unsigned int)mctx64.ss.r1; + sp = mctx64.ss.r1; else - sp = mctx.ss.r1; + sp = CAST_USER_ADDR_T(mctx.ss.r1); } /* put siginfo on top */ /* preserve RED ZONE area */ - sp = TRUNC_DOWN(sp, C_REDZONE_LEN, C_STK_ALIGN); + if (IS_64BIT_PROCESS(p)) + sp = TRUNC_DOWN64(sp, C_64_REDZONE_LEN, C_64_STK_ALIGN); + else + sp = TRUNC_DOWN32(sp, C_32_REDZONE_LEN, C_32_STK_ALIGN); /* next are the saved registers */ if ((ctx32 == 0) || dualcontext) { - sp -= sizeof(*p_mctx64); - p_mctx64 = (struct mcontext64 *)sp; + sp -= sizeof(struct mcontext64); + p_mctx64 = sp; } if ((ctx32 == 1) || dualcontext) { - sp -= sizeof(*p_mctx); - p_mctx = (struct mcontext *)sp; + sp -= sizeof(struct mcontext); + p_mctx = sp; } - /* context goes first on stack */ - sp -= sizeof(*p_uctx); - p_uctx = (struct ucontext *) sp; - - /* this is where siginfo goes on stack */ - sp -= sizeof(*p_sinfo); - p_sinfo = (siginfo_t *) sp; - - /* C calling conventions, create param save and linkage - * areas - */ - - sp = TRUNC_DOWN(sp, C_PARAMSAVE_LEN, C_STK_ALIGN); - paramp = sp; - sp -= C_LINKAGE_LEN; - linkp = sp; + if (IS_64BIT_PROCESS(p)) { + /* context goes first on stack */ + sp -= sizeof(struct user_ucontext64); + p_uctx = sp; + + /* this is where siginfo goes on stack */ + sp -= sizeof(user_siginfo_t); + p_sinfo = sp; + + sp = TRUNC_DOWN64(sp, C_64_PARAMSAVE_LEN+C_64_LINKAGE_LEN, C_64_STK_ALIGN); + } else { + /* + * struct ucontext and struct ucontext64 are identical in + * size and content; the only difference is the internal + * pointer type for the last element, which makes no + * difference for the copyout(). + */ + + /* context goes first on stack */ + sp -= sizeof(struct ucontext64); + p_uctx = sp; + + /* this is where siginfo goes on stack */ + sp -= sizeof(siginfo_t); + p_sinfo = sp; + + sp = TRUNC_DOWN32(sp, C_32_PARAMSAVE_LEN+C_32_LINKAGE_LEN, C_32_STK_ALIGN); + } uctx.uc_onstack = oonstack; uctx.uc_sigmask = mask; - uctx.uc_stack.ss_sp = (char *)sp; + uctx.uc_stack.ss_sp = sp; uctx.uc_stack.ss_size = stack_size; if (oonstack) uctx.uc_stack.ss_flags |= SS_ONSTACK; @@ -279,15 +382,20 @@ sendsig(p, catcher, sig, mask, code) uctx.uc_mcsize += (size_t)(PPC_VECTOR_STATE_COUNT * sizeof(int)); if (ctx32 == 0) - uctx.uc_mcontext = (void *)p_mctx64; + uctx.uc_mcontext64 = p_mctx64; else - uctx.uc_mcontext = (void *)p_mctx; + uctx.uc_mcontext64 = p_mctx; /* setup siginfo */ - bzero((caddr_t)&sinfo, sizeof(siginfo_t)); + bzero((caddr_t)&sinfo, sizeof(user_siginfo_t)); sinfo.si_signo = sig; - sinfo.si_addr = (void *)mctx.ss.srr0; - sinfo.pad[0] = (unsigned int)mctx.ss.r1; + if (ctx32 == 0) { + sinfo.si_addr = mctx64.ss.srr0; + sinfo.pad[0] = mctx64.ss.r1; + } else { + sinfo.si_addr = CAST_USER_ADDR_T(mctx.ss.srr0); + sinfo.pad[0] = CAST_USER_ADDR_T(mctx.ss.r1); + } switch (sig) { case SIGCHLD: @@ -311,15 +419,30 @@ sendsig(p, catcher, sig, mask, code) } break; case SIGILL: - sinfo.si_addr = (void *)mctx.ss.srr0; - if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT))) - sinfo.si_code = ILL_ILLOPC; - else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT))) - sinfo.si_code = ILL_PRVOPC; - else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT))) - sinfo.si_code = ILL_ILLTRP; - else - sinfo.si_code = ILL_NOOP; + /* + * If it's 64 bit and not a dual context, mctx will + * contain uninitialized data, so we have to use + * mctx64 here. + */ + if(ctx32 == 0) { + if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT))) + sinfo.si_code = ILL_ILLOPC; + else if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT))) + sinfo.si_code = ILL_PRVOPC; + else if (mctx64.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT))) + sinfo.si_code = ILL_ILLTRP; + else + sinfo.si_code = ILL_NOOP; + } else { + if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_ILL_INS_BIT))) + sinfo.si_code = ILL_ILLOPC; + else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_PRV_INS_BIT))) + sinfo.si_code = ILL_PRVOPC; + else if (mctx.ss.srr1 & (1 << (31 - SRR1_PRG_TRAP_BIT))) + sinfo.si_code = ILL_ILLTRP; + else + sinfo.si_code = ILL_NOOP; + } break; case SIGFPE: #define FPSCR_VX 2 @@ -327,36 +450,75 @@ sendsig(p, catcher, sig, mask, code) #define FPSCR_UX 4 #define FPSCR_ZX 5 #define FPSCR_XX 6 - sinfo.si_addr = (void *)mctx.ss.srr0; - if (mctx.fs.fpscr & (1 << (31 - FPSCR_VX))) - sinfo.si_code = FPE_FLTINV; - else if (mctx.fs.fpscr & (1 << (31 - FPSCR_OX))) - sinfo.si_code = FPE_FLTOVF; - else if (mctx.fs.fpscr & (1 << (31 - FPSCR_UX))) - sinfo.si_code = FPE_FLTUND; - else if (mctx.fs.fpscr & (1 << (31 - FPSCR_ZX))) - sinfo.si_code = FPE_FLTDIV; - else if (mctx.fs.fpscr & (1 << (31 - FPSCR_XX))) - sinfo.si_code = FPE_FLTRES; - else - sinfo.si_code = FPE_NOOP; + /* + * If it's 64 bit and not a dual context, mctx will + * contain uninitialized data, so we have to use + * mctx64 here. + */ + if(ctx32 == 0) { + if (mctx64.fs.fpscr & (1 << (31 - FPSCR_VX))) + sinfo.si_code = FPE_FLTINV; + else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_OX))) + sinfo.si_code = FPE_FLTOVF; + else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_UX))) + sinfo.si_code = FPE_FLTUND; + else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_ZX))) + sinfo.si_code = FPE_FLTDIV; + else if (mctx64.fs.fpscr & (1 << (31 - FPSCR_XX))) + sinfo.si_code = FPE_FLTRES; + else + sinfo.si_code = FPE_NOOP; + } else { + if (mctx.fs.fpscr & (1 << (31 - FPSCR_VX))) + sinfo.si_code = FPE_FLTINV; + else if (mctx.fs.fpscr & (1 << (31 - FPSCR_OX))) + sinfo.si_code = FPE_FLTOVF; + else if (mctx.fs.fpscr & (1 << (31 - FPSCR_UX))) + sinfo.si_code = FPE_FLTUND; + else if (mctx.fs.fpscr & (1 << (31 - FPSCR_ZX))) + sinfo.si_code = FPE_FLTDIV; + else if (mctx.fs.fpscr & (1 << (31 - FPSCR_XX))) + sinfo.si_code = FPE_FLTRES; + else + sinfo.si_code = FPE_NOOP; + } break; case SIGBUS: - sinfo.si_addr = (void *)mctx.ss.srr0; + if (ctx32 == 0) { + sinfo.si_addr = mctx64.es.dar; + } else { + sinfo.si_addr = CAST_USER_ADDR_T(mctx.es.dar); + } /* on ppc we generate only if EXC_PPC_UNALIGNED */ sinfo.si_code = BUS_ADRALN; break; case SIGSEGV: - sinfo.si_addr = (void *)mctx.ss.srr0; - /* First check in srr1 and then in dsisr */ - if (mctx.ss.srr1 & (1 << (31 - DSISR_PROT_BIT))) - sinfo.si_code = SEGV_ACCERR; - else if (mctx.es.dsisr & (1 << (31 - DSISR_PROT_BIT))) - sinfo.si_code = SEGV_ACCERR; - else - sinfo.si_code = SEGV_MAPERR; + /* + * If it's 64 bit and not a dual context, mctx will + * contain uninitialized data, so we have to use + * mctx64 here. + */ + if (ctx32 == 0) { + sinfo.si_addr = mctx64.es.dar; + /* First check in srr1 and then in dsisr */ + if (mctx64.ss.srr1 & (1 << (31 - DSISR_PROT_BIT))) + sinfo.si_code = SEGV_ACCERR; + else if (mctx64.es.dsisr & (1 << (31 - DSISR_PROT_BIT))) + sinfo.si_code = SEGV_ACCERR; + else + sinfo.si_code = SEGV_MAPERR; + } else { + sinfo.si_addr = CAST_USER_ADDR_T(mctx.es.dar); + /* First check in srr1 and then in dsisr */ + if (mctx.ss.srr1 & (1 << (31 - DSISR_PROT_BIT))) + sinfo.si_code = SEGV_ACCERR; + else if (mctx.es.dsisr & (1 << (31 - DSISR_PROT_BIT))) + sinfo.si_code = SEGV_ACCERR; + else + sinfo.si_code = SEGV_MAPERR; + } break; default: break; @@ -364,37 +526,69 @@ sendsig(p, catcher, sig, mask, code) /* copy info out to user space */ - if (copyout((caddr_t)&uctx, (caddr_t)p_uctx, sizeof(struct ucontext))) - goto bad; - if (copyout((caddr_t)&sinfo, (caddr_t)p_sinfo, sizeof(siginfo_t))) - goto bad; - if ((ctx32 == 0) || dualcontext) { - tstate = &mctx64; - if (copyout((caddr_t)tstate, (caddr_t)p_mctx64, (vec_used? UC_FLAVOR64_VEC_SIZE: UC_FLAVOR64_SIZE))) + if (IS_64BIT_PROCESS(p)) { + if (copyout(&uctx, p_uctx, sizeof(struct user_ucontext64))) goto bad; - } - if ((ctx32 == 1) || dualcontext) { - tstate = &mctx; - if (copyout((caddr_t)tstate, (caddr_t)p_mctx, uctx.uc_mcsize)) + if (copyout(&sinfo, p_sinfo, sizeof(user_siginfo_t))) goto bad; - } + } else { + struct ucontext64 uctx32; + siginfo_t sinfo32; + ucontext_64to32(&uctx, &uctx32); + if (copyout(&uctx32, p_uctx, sizeof(struct ucontext64))) + goto bad; - /* Place our arguments in arg registers: rtm dependent */ + siginfo_64to32(&sinfo,&sinfo32); + if (copyout(&sinfo32, p_sinfo, sizeof(siginfo_t))) + goto bad; + } + if ((ctx32 == 0) || dualcontext) { + /* + * NOTE: Size of mcontext is not variant between 64bit and + * 32bit programs usng 64bit registers. + */ + if (copyout(&mctx64, p_mctx64, (vec_used? UC_FLAVOR64_VEC_SIZE: UC_FLAVOR64_SIZE))) + goto bad; + } + if ((ctx32 == 1) || dualcontext) { + if (copyout(&mctx, p_mctx, uctx.uc_mcsize)) + goto bad; + } - mctx.ss.r3 = (unsigned long)catcher; - mctx.ss.r4 = (unsigned long)infostyle; - mctx.ss.r5 = (unsigned long)sig; - mctx.ss.r6 = (unsigned long)p_sinfo; - mctx.ss.r7 = (unsigned long)p_uctx; - mctx.ss.srr0 = (unsigned long)trampact; - mctx.ss.srr1 = get_msr_exportmask(); /* MSR_EXPORT_MASK_SET */ - mctx.ss.r1 = sp; - state_count = PPC_THREAD_STATE_COUNT; - if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE, &mctx.ss, &state_count)) != KERN_SUCCESS) { - panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn); - } + /* Place our arguments in arg registers: rtm dependent */ + if(IS_64BIT_PROCESS(p)) { + mctx64.ss.r3 = catcher; + mctx64.ss.r4 = CAST_USER_ADDR_T(infostyle); + mctx64.ss.r5 = CAST_USER_ADDR_T(sig); + mctx64.ss.r6 = p_sinfo; + mctx64.ss.r7 = p_uctx; + + mctx64.ss.srr0 = trampact; + /* MSR_EXPORT_MASK_SET */ + mctx64.ss.srr1 = CAST_USER_ADDR_T(get_msr_exportmask()); + mctx64.ss.r1 = sp; + state_count = PPC_THREAD_STATE64_COUNT; + if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE64, (void *)&mctx64.ss, state_count)) != KERN_SUCCESS) { + panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn); + } + } else { + mctx.ss.r3 = CAST_DOWN(unsigned long,catcher); + mctx.ss.r4 = (unsigned long)infostyle; + mctx.ss.r5 = (unsigned long)sig; + mctx.ss.r6 = CAST_DOWN(unsigned long,p_sinfo); + mctx.ss.r7 = CAST_DOWN(unsigned long,p_uctx); + + mctx.ss.srr0 = CAST_DOWN(unsigned long,trampact); + /* MSR_EXPORT_MASK_SET */ + mctx.ss.srr1 = get_msr_exportmask(); + mctx.ss.r1 = CAST_DOWN(unsigned long,sp); + state_count = PPC_THREAD_STATE_COUNT; + if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE, (void *)&mctx.ss, state_count)) != KERN_SUCCESS) { + panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn); + } + } return; bad: @@ -419,167 +613,50 @@ bad: * a machine fault. */ -#define FOR64_TRANSITION 1 - - -#ifdef FOR64_TRANSITION - -struct osigreturn_args { - struct ucontext *uctx; -}; - /* ARGSUSED */ int -osigreturn(p, uap, retval) - struct proc *p; - struct osigreturn_args *uap; - int *retval; +sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) { - struct ucontext uctx; - struct ucontext *p_uctx; - struct mcontext64 mctx64; - struct mcontext64 *p_64mctx; - struct mcontext *p_mctx; - int error; - thread_act_t th_act; - struct sigacts *ps = p->p_sigacts; - sigset_t mask; - register sig_t action; - unsigned long state_count; - unsigned int state_flavor; - struct uthread * ut; - int vec_used = 0; - void *tsptr, *fptr, *vptr, *mactx; - void ppc_checkthreadstate(void *, int); - - th_act = current_act(); - /* lets use the larger one */ - mactx = (void *)&mctx64; - - ut = (struct uthread *)get_bsdthread_info(th_act); - if (error = copyin(uap->uctx, &uctx, sizeof(struct ucontext))) { - return(error); - } - - /* validate the machine context size */ - switch (uctx.uc_mcsize) { - case UC_FLAVOR64_VEC_SIZE : - case UC_FLAVOR64_SIZE : - case UC_FLAVOR_VEC_SIZE : - case UC_FLAVOR_SIZE: - break; - default: - return(EINVAL); - } - - if (error = copyin(uctx.uc_mcontext, mactx, uctx.uc_mcsize)) { - return(error); - } - - if (uctx.uc_onstack & 01) - p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; - else - p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; - - ut->uu_sigmask = uctx.uc_sigmask & ~sigcantmask; - if (ut->uu_siglist & ~ut->uu_sigmask) - signal_setast(current_act()); - - vec_used = 0; - switch (uctx.uc_mcsize) { - case UC_FLAVOR64_VEC_SIZE : - vec_used = 1; - case UC_FLAVOR64_SIZE : { - p_64mctx = (struct mcontext64 *)mactx; - tsptr = (void *)&p_64mctx->ss; - fptr = (void *)&p_64mctx->fs; - vptr = (void *)&p_64mctx->vs; - state_flavor = PPC_THREAD_STATE64; - state_count = PPC_THREAD_STATE64_COUNT; - } - break; - case UC_FLAVOR_VEC_SIZE : - vec_used = 1; - case UC_FLAVOR_SIZE: - default: { - p_mctx = (struct mcontext *)mactx; - tsptr = (void *)&p_mctx->ss; - fptr = (void *)&p_mctx->fs; - vptr = (void *)&p_mctx->vs; - state_flavor = PPC_THREAD_STATE; - state_count = PPC_THREAD_STATE_COUNT; - } - break; - } /* switch () */ - - /* validate the thread state, set/reset appropriate mode bits in srr1 */ - (void)ppc_checkthreadstate(tsptr, state_flavor); + struct user_ucontext64 uctx; - if (thread_setstatus(th_act, state_flavor, tsptr, &state_count) != KERN_SUCCESS) { - return(EINVAL); - } - - state_count = PPC_FLOAT_STATE_COUNT; - if (thread_setstatus(th_act, PPC_FLOAT_STATE, fptr, &state_count) != KERN_SUCCESS) { - return(EINVAL); - } - - mask = sigmask(SIGFPE); - if (((ut->uu_sigmask & mask) == 0) && (p->p_sigcatch & mask) && ((p->p_sigignore & mask) == 0)) { - action = ps->ps_sigact[SIGFPE]; - if((action != SIG_DFL) && (action != SIG_IGN)) { - thread_enable_fpe(th_act, 1); - } - } - - if (vec_used) { - state_count = PPC_VECTOR_STATE_COUNT; - if (thread_setstatus(th_act, PPC_VECTOR_STATE, vptr, &state_count) != KERN_SUCCESS) { - return(EINVAL); - } - } - return (EJUSTRETURN); -} - -#endif /* FOR64_TRANSITION */ - -struct sigreturn_args { - struct ucontext *uctx; - int infostyle; -}; - -/* ARGSUSED */ -int -sigreturn(p, uap, retval) - struct proc *p; - struct sigreturn_args *uap; - int *retval; -{ - struct ucontext uctx; - struct ucontext *p_uctx; - char mactx[sizeof(struct mcontext64)]; - struct mcontext *p_mctx; + char mactx[sizeof(struct mcontext64)]; + struct mcontext *p_mctx; struct mcontext64 *p_64mctx; int error; - thread_act_t th_act; + thread_t th_act; struct sigacts *ps = p->p_sigacts; sigset_t mask; - register sig_t action; + user_addr_t action; unsigned long state_count; unsigned int state_flavor; struct uthread * ut; int vec_used = 0; void *tsptr, *fptr, *vptr; int infostyle = uap->infostyle; - void ppc_checkthreadstate(void *, int); - th_act = current_act(); + th_act = current_thread(); ut = (struct uthread *)get_bsdthread_info(th_act); - if (error = copyin(uap->uctx, &uctx, sizeof(struct ucontext))) { - return(error); + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->uctx, &uctx, sizeof(struct user_ucontext64)); + if (error) + return(error); + } else { + struct ucontext64 uctx32; + + /* + * struct ucontext and struct ucontext64 are identical in + * size and content; the only difference is the internal + * pointer type for the last element, which makes no + * difference for the copyin(). + */ + error = copyin(uap->uctx, &uctx32, sizeof(struct ucontext)); + if (error) + return(error); + ucontext_32to64(&uctx32, &uctx); } + /* validate the machine context size */ switch (uctx.uc_mcsize) { case UC_FLAVOR64_VEC_SIZE: @@ -590,18 +667,23 @@ sigreturn(p, uap, retval) default: return(EINVAL); } - if (error = copyin(uctx.uc_mcontext, mactx, uctx.uc_mcsize)) { + + /* + * The 64 bit process mcontext is identical to the mcontext64, so + * there is no conversion necessary. + */ + error = copyin(uctx.uc_mcontext64, mactx, uctx.uc_mcsize); + if (error) return(error); - } - if (uctx.uc_onstack & 01) + if ((uctx.uc_onstack & 01)) p->p_sigacts->ps_sigstk.ss_flags |= SA_ONSTACK; else p->p_sigacts->ps_sigstk.ss_flags &= ~SA_ONSTACK; ut->uu_sigmask = uctx.uc_sigmask & ~sigcantmask; if (ut->uu_siglist & ~ut->uu_sigmask) - signal_setast(current_act()); + signal_setast(current_thread()); vec_used = 0; switch (infostyle) { @@ -637,12 +719,12 @@ sigreturn(p, uap, retval) /* validate the thread state, set/reset appropriate mode bits in srr1 */ (void)ppc_checkthreadstate(tsptr, state_flavor); - if (thread_setstatus(th_act, state_flavor, tsptr, &state_count) != KERN_SUCCESS) { + if (thread_setstatus(th_act, state_flavor, tsptr, state_count) != KERN_SUCCESS) { return(EINVAL); } state_count = PPC_FLOAT_STATE_COUNT; - if (thread_setstatus(th_act, PPC_FLOAT_STATE, fptr, &state_count) != KERN_SUCCESS) { + if (thread_setstatus(th_act, PPC_FLOAT_STATE, fptr, state_count) != KERN_SUCCESS) { return(EINVAL); } @@ -656,7 +738,7 @@ sigreturn(p, uap, retval) if (vec_used) { state_count = PPC_VECTOR_STATE_COUNT; - if (thread_setstatus(th_act, PPC_VECTOR_STATE, vptr, &state_count) != KERN_SUCCESS) { + if (thread_setstatus(th_act, PPC_VECTOR_STATE, vptr, state_count) != KERN_SUCCESS) { return(EINVAL); } } @@ -672,7 +754,7 @@ boolean_t machine_exception( int exception, int code, - int subcode, + __unused int subcode, int *unix_signal, int *unix_code ) diff --git a/bsd/dev/ppc/xsumas.s b/bsd/dev/ppc/xsumas.s index c83a688f1..dae54fb13 100644 --- a/bsd/dev/ppc/xsumas.s +++ b/bsd/dev/ppc/xsumas.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,231 +19,377 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#define STANDALONE 0 - -#if STANDALONE -#include "asm.h" -#include "assym.h" -#include "proc_reg.h" /* For CACHE_LINE_SIZE */ - -#else - -#include -#if 0 -/* #include */ -#include /* For CACHE_LINE_SIZE */ -#endif 0 -#endif + +#define kShort 11 +#define cr1_gt 5 // bit 1 of cr1 /* - * Reg 3 - Pointer to data - * Reg 4 - Length of data - * Reg 5 - Accumulated sum value - * Reg 6 - Starting on odd boundary flag (relative to byte 0 of the checksumed data) - */ - -ENTRY(xsum_assym, TAG_NO_FRAME_USED) - - mr r11, r6 ; Swapped flag - addi r8, 0, 0 - addi r10, 0, 0x1f - addi r7, 0, 1 - addic r7, r7, 0 ; This clears the carry bit! - mr r12, r5 ; Save the passed-in checksum value - - /* - * Sum bytes before cache line boundary - */ - - cmpi cr0,0,r4,0 ; Check for length of 0 - beq Lleftovers - - and. r9, r3, r10 - beq Laligned32 ; 32 byte aligned - - andi. r9, r3, 0x3 - beq Laligned4 - - andi. r9, r3, 0x1 - beq Laligned2 ; 2 byte aligned - - addi r11, 0, 1 ; swap bytes at end - lbz r8, 0(r3) - add r3, r3, r7 - subf. r4, r7, r4 - beq Ldone - -Laligned2: - cmpi cr0,0,r4,2 ; If remaining length is less than two - go to wrap-up - blt Lleftovers - andi. r9, r3, 0x3 ; If aligned on a 4-byte boundary, go to that code - beq Laligned4 - lhz r5, 0(r3) ; Load and add a halfword to the checksum - adde r8, r8, r5 - slwi r7, r7, 1 - add r3, r3, r7 - subf. r4, r7, r4 - beq Ldone - - - /* - Add longwords up to the 32 byte boundary - */ - -Laligned4: - addi r7, 0, 4 -Lloop4: - cmpi cr0,0,r4,4 - blt Lleftovers - and. r9, r3, r10 - beq Laligned32 - lwz r5, 0(r3) - adde r8, r8, r5 - add r3, r3, r7 - subf. r4, r7, r4 - bne Lloop4 - b Ldone - - - /* - We're aligned on a 32 byte boundary now - add 8 longwords to checksum - until the remaining length is less than 32 - */ -Laligned32: - andis. r6, r4, 0xffff - bne Lmainloop - andi. r6, r4, 0xffe0 - beq Lleftovers - -Lmainloop: - addi r9, 0, 64 - addi r10, 0, 32 - cmpi cr0,0,r4,64 - blt Lnopretouch - dcbt r3, r10 ; Touch one cache-line ahead -Lnopretouch: - lwz r5, 0(r3) - - /* - * This is the main meat of the checksum. I attempted to arrange this code - * such that the processor would execute as many instructions as possible - * in parallel. - */ - -Lloop: - cmpi cr0,0,r4,96 - blt Lnotouch - dcbt r3, r9 ; Touch two cache lines ahead -Lnotouch: - adde r8, r8, r5 - lwz r5, 4(r3) - lwz r6, 8(r3) - lwz r7, 12(r3) - adde r8, r8, r5 - lwz r5, 16(r3) - adde r8, r8, r6 - lwz r6, 20(r3) - adde r8, r8, r7 - lwz r7, 24(r3) - adde r8, r8, r5 - lwz r5, 28(r3) - add r3, r3, r10 - adde r8, r8, r6 - adde r8, r8, r7 - adde r8, r8, r5 - subf r4, r10, r4 - andi. r6, r4, 0xffe0 - beq Lleftovers - lwz r5, 0(r3) - b Lloop - - /* - * Handle whatever bytes are left - */ - -Lleftovers: - /* - * Handle leftover bytes - */ - cmpi cr0,0,r4,0 - beq Ldone - - addi r7, 0, 1 - addi r10, 0, 0x7ffc - - and. r9, r4, r10 - bne Lfourormore - srw r10, r10, r7 - and. r9, r4, r10 - bne Ltwoormore - b Loneleft - -Lfourormore: - addi r10, 0, 4 - -Lfourloop: - lwz r5, 0(r3) - adde r8, r8, r5 - add r3, r3, r10 - subf r4, r10, r4 - andi. r6, r4, 0xfffc - bne Lfourloop - -Ltwoormore: - andi. r6, r4, 0xfffe - beq Loneleft - lhz r5, 0(r3) - adde r8, r8, r5 - addi r3, r3, 2 - subi r4, r4, 2 - -Loneleft: - cmpi cr0,0,r4,0 - beq Ldone - lbz r5, 0(r3) - slwi r5, r5, 8 - adde r8, r8, r5 - - /* - * Wrap the longword around, adding the two 16-bit portions - * to each other along with any previous and subsequent carries. - */ -Ldone: - addze r8, r8 ; Add the carry - addze r8, r8 ; Add the carry again (the last add may have carried) - andis. r6, r8, 0xffff ; Stuff r6 with the high order 16 bits of sum word - srwi r6, r6, 16 ; Shift it to the low order word - andi. r8, r8, 0xffff ; Zero out the high order word - add r8, r8, r6 ; Add the two halves - - andis. r6, r8, 0xffff ; Do the above again in case we carried into the - srwi r6, r6, 16 ; high order word with the last add. - andi. r8, r8, 0xffff - add r3, r8, r6 - - cmpi cr0,0,r11,0 ; Check to see if we need to swap the bytes - beq Ldontswap - - /* - * Our buffer began on an odd boundary, so we need to swap - * the checksum bytes. - */ - slwi r8, r3, 8 ; shift byte 0 to byte 1 - clrlwi r8, r8, 16 ; Clear top 16 bits - srwi r3, r3, 8 ; shift byte 1 to byte 0 - or r3, r8, r3 ; or them - -Ldontswap: - add r3, r3, r12 ; Add in the passed-in checksum - andis. r6, r3, 0xffff ; Wrap and add any carries into the top 16 bits - srwi r6, r6, 16 - andi. r3, r3, 0xffff - add r3, r3, r6 - - andis. r6, r3, 0xffff ; Do the above again in case we carried into the - srwi r6, r6, 16 ; high order word with the last add. - andi. r3, r3, 0xffff - add r3, r3, r6 - blr - - + * short xsum_assym( short *p, int len, short xsum, boolean odd); + * + * r3 - Pointer to data + * r4 - Length of data + * r5 - Accumulated sum value + * r6 -"Starting on odd address" flag (relative to byte 0 of the checksumed data) + * + * Note: If the "odd" flag is set, the address in r3 will be even. Nonetheless, we + * correctly handle the case where the flag is set and the address is odd. + * + * This is the internet (IP, TCP) checksum algorithm, which is the 1s-complement sum + * of the data, treated as an array of 16-bit integers. 1s-complement sums are done + * via "add with carry" operations on a 2s-complement machine like PPC. Note that + * the adds can be done in parallel on 32-bit (or 64-bit) registers, as long as the + * final sum is folded down to 16 bits. On 32-bit machines we use "adde", which is + * perfect except that it serializes the adds on the carry bit. On 64-bit machines + * we avoid this serialization by adding 32-bit words into 64-bit sums, then folding + * all 64-bits into a 16-bit sum at the end. We cannot use "adde" on 64-bit sums, + * because the kernel runs in 32-bit mode even on 64-bit machines (so the carry bit + * is set on the low 32-bits of the sum.) + * + * Using Altivec is tempting, but the performance impact of the greatly increased + * number of exceptions and register save/restore traffic probably make it impractical + * for now. + */ + .globl _xsum_assym + .globl _xsum_nop_if_32bit + .text + .align 5 +_xsum_assym: + cmplwi cr0,r4,kShort ; too short to word align? + rlwinm r2,r3,0,0x3 ; get byte offset in word + dcbt 0,r3 ; touch in 1st cache line + cmpwi cr6,r2,0 ; is address word aligned? + ble cr0,Lshort ; skip if too short to bother aligning + + subfic r0,r2,4 ; get #bytes in partial word + cmplwi cr1,r6,0 ; set cr1_gt if "starting on odd address" flag is set + addic r0,r0,0 ; turn off carry + beq cr6,Laligned ; skip if already word aligned (r2==0 if aligned) + +; Partial word at start: zero filled on left, it becomes initial checksum. + + rlwinm r3,r3,0,0,29 ; word align address + mtcrf 0x01,r2 ; move byte offset to cr7 + lwz r6,0(r3) ; get partial word + li r7,-1 ; start of mask for partial fill + slwi r8,r2,3 ; multiply byte offset by 8 + sub r4,r4,r0 ; adjust length for bytes in partial word + crxor cr1_gt,31,cr1_gt; set flag if byte-lane swap will be necessary + srw r7,r7,r8 ; get mask for bytes to keep in partial word + addi r3,r3,4 ; point to next word of input + and r2,r6,r7 ; zero fill on left + +; Address is now word aligned. Prepare for inner loop over 32-byte chunks. +; r2 = initial checksum +; r3 = word aligned address +; r4 = length remaining +; r5 = accumulated sum parameter +; carry = off +; cr1_gt = "starting on odd address" flag + +Laligned: + srwi. r0,r4,5 ; get count of 32-byte chunks + mtcrf 0x02,r4 ; move residual length to cr6 and cr7 + mtcrf 0x01,r4 + beq cr0,Lleftovers ; no chunks + + mtctr r0 ; set up loop count + li r4,32 ; offset to next chunk +_xsum_nop_if_32bit: + b L64BitPath ; use the 64-bit path (patched to nop on 32-bit machine) + dcbt r4,r3 ; touch in 2nd cache line + li r0,96 ; get touch offset + b LInnerLoop32 ; enter 32-bit loop + +; Inner loop for 32-bit machines. + + .align 4 +LInnerLoop32: + lwz r4,0(r3) + lwz r6,4(r3) + lwz r7,8(r3) + lwz r8,12(r3) + adde r2,r2,r4 + lwz r9,16(r3) + adde r2,r2,r6 + lwz r10,20(r3) + adde r2,r2,r7 + lwz r11,24(r3) + adde r2,r2,r8 + lwz r12,28(r3) + adde r2,r2,r9 + dcbt r3,r0 + adde r2,r2,r10 + addi r3,r3,32 + adde r2,r2,r11 + adde r2,r2,r12 + bdnz+ LInnerLoop32 + +; Handle leftover bytes. +; r2 = checksum so far +; r3 = word aligned address +; r5 = accumulated sum parameter +; carry = live +; cr1_gt = "starting on odd address" flag +; cr6,cr7 = residual length + +Lleftovers: + bf 27,Lleftover8 ; test 0x10 bit of residual length + lwz r4,0(r3) + lwz r6,4(r3) + lwz r7,8(r3) + lwz r8,12(r3) + addi r3,r3,16 + adde r2,r2,r4 + adde r2,r2,r6 + adde r2,r2,r7 + adde r2,r2,r8 +Lleftover8: + bf 28,Lleftover4 + lwz r4,0(r3) + lwz r6,4(r3) + addi r3,r3,8 + adde r2,r2,r4 + adde r2,r2,r6 +Lleftover4: + bf 29,Lleftover2 + lwz r4,0(r3) + addi r3,r3,4 + adde r2,r2,r4 +Lleftover2: + bf 30,Lleftover1 + lhz r4,0(r3) + addi r3,r3,2 + adde r2,r2,r4 +Lleftover1: + bf 31,Lwrapup + lbz r4,0(r3) + slwi r4,r4,8 ; shift last byte into proper lane + adde r2,r2,r4 + +; All data bytes checksummed. Wrap up. +; r2 = checksum so far (word parallel) +; r5 = accumulated sum parameter +; carry = live +; cr1_gt = "starting on odd address" flag + +Lwrapup: + addze r2,r2 ; add in last carry + addze r2,r2 ; in case the "addze" carries +Lwrapupx: ; here from short-operand case, with xer(ca) undefined + srwi r6,r2,16 ; top half of 32-bit checksum + rlwinm r7,r2,0,0xFFFF ; lower half + add r2,r6,r7 ; add them together + srwi r6,r2,16 ; then do it again, in case first carried + rlwinm r7,r2,0,0xFFFF + add r2,r6,r7 + bf cr1_gt,Lswapped ; test "starting on odd address" flag + +; The checksum began on an odd address, so swap bytes. + + rlwinm r6,r2,24,0x00FF ; move top byte to bottom + rlwinm r7,r2,8,0xFF00 ; bottom to top + or r2,r6,r7 ; rejoin + +; Finally, add in checksum passed in as a parameter. + +Lswapped: + add r2,r2,r5 ; add passed-in checksum + srwi r6,r2,16 ; top half of 32-bit checksum + rlwinm r7,r2,0,0xFFFF ; lower half + add r2,r6,r7 ; add them together + srwi r6,r2,16 ; then do it again, in case first carried + rlwinm r7,r2,0,0xFFFF + add r3,r6,r7 ; steer result into r3 + blr + +; Handle short operands. Do a halfword at a time. +; r3 = address +; r4 = length (<= kShort) +; r5 = accumulated sum parameter +; r6 = "starting on odd byte" flag + +Lshort: + cmpwi cr6,r4,2 ; at least two bytes? + andi. r0,r4,1 ; odd length? + li r2,0 ; initialize checksum + cmplwi cr1,r6,0 ; set cr1_gt if "starting on odd address" flag is set + blt cr6,Lshort2 ; fewer than two bytes, so skip +Lshort1: + cmpwi cr6,r4,4 ; two more bytes (after we decrement)? + lhz r7,0(r3) + subi r4,r4,2 + addi r3,r3,2 + add r2,r2,r7 ; note no need for "adde" + bge cr6,Lshort1 ; loop for 2 more bytes +Lshort2: + beq Lwrapupx ; no byte at end, proceed to checkout with carry undefined + lbz r7,0(r3) + slwi r7,r7,8 ; shift last byte into proper lane + add r2,r2,r7 + b Lwrapupx + +; Handle 64-bit machine. The major improvement over the 32-bit path is that we use +; four parallel 32-bit accumulators, which carry into the upper half naturally so we +; do not have to use "adde", which serializes on the carry bit. Note that we cannot +; do 64-bit "adde"s, because we run in 32-bit mode so carry would not be set correctly. +; r2 = checksum so far (ie, the zero-filled partial first word) +; r3 = word aligned address +; r5 = accumulated sum parameter +; ctr = number of 32-byte chunks of input +; carry = unused in this code +; cr1_gt = "starting on odd address" flag +; cr6,cr7 = residual length + +L64BitPath: + stw r13,-4(r1) ; save a few nonvolatile regs in red zone so we can use them + stw r14,-8(r1) + stw r15,-12(r1) + stw r16,-16(r1) + li r0,128 ; to touch next line + li r13,0 ; r13-r15 are the accumulators, so initialize them + dcbt r3,r0 ; touch in next cache line, and keep loads away from the above stores + lwz r4,0(r3) ; start pipeline by loading first 32 bytes into r4, r6-r12 + lwz r6,4(r3) + lwz r7,8(r3) + mr r14,r2 ; just copy incoming partial word into one of the accumulators + li r15,0 + lwz r8,12(r3) + lwz r9,16(r3) + li r16,0 + li r0,256 ; get touch offset + lwz r10,20(r3) + lwz r11,24(r3) + lwz r12,28(r3) ; load last word of previous chunk + addi r3,r3,32 ; skip past the chunk + bdnz++ LInnerLoop64 ; enter loop if another chunk to go + + b LAddLastChunk ; only one chunk + +; Inner loop for 64-bit processors. This loop is scheduled for the 970. +; It is pipelined (loads are one iteration ahead of adds), and unrolled. +; It should take 9-10 cycles per iteration, which consumes 64 bytes of input. + + .align 5 +LInnerLoop64: ; 64 bytes/iteration + add r13,r13,r4 ; cycle 1 + add r14,r14,r6 + dcbt r3,r0 ; touch in 2 lines ahead + lwz r4,0(r3) + + add r15,r15,r7 ; cycle 2, etc + lwz r6,4(r3) + lwz r7,8(r3) + add r16,r16,r8 + + lwz r8,12(r3) + add r13,r13,r9 + add r14,r14,r10 + lwz r9,16(r3) + + add r15,r15,r11 + lwz r10,20(r3) + lwz r11,24(r3) + add r16,r16,r12 + bdz-- LEarlyExit ; early exit if no more chunks + + lwz r12,28(r3) + add r13,r13,r4 + add r14,r14,r6 + lwz r4,32(r3) + + add r15,r15,r7 + lwz r6,36(r3) + lwz r7,40(r3) + add r16,r16,r8 + + lwz r8,44(r3) + add r13,r13,r9 + add r14,r14,r10 + lwz r9,48(r3) + + add r15,r15,r11 + lwz r10,52(r3) + lwz r11,56(r3) + add r16,r16,r12 + + nop ; position last load in 2nd dispatch slot + lwz r12,60(r3) + addi r3,r3,64 + bdnz++ LInnerLoop64 + + b LAddLastChunk + +; Add in the last 32-byte chunk, and any leftover bytes. +; r3 = word aligned address of next byte of data +; r5 = accumulated sum parameter +; r13-r16 = the four accumulators +; cr1_gt = "starting on odd address" flag +; cr6,cr7 = residual length + +LEarlyExit: ; here from middle of inner loop + lwz r12,28(r3) ; load last word of last chunk + addi r3,r3,32 +LAddLastChunk: ; last 32-byte chunk of input is in r4,r6-r12 + add r13,r13,r4 ; add in last chunk + add r14,r14,r6 ; these are 64-bit adds + add r15,r15,r7 + add r16,r16,r8 + add r13,r13,r9 + add r14,r14,r10 + add r15,r15,r11 + add r16,r16,r12 + +; Handle leftover bytes, if any. + + bf 27,Lleft1 ; test 0x10 bit of residual length + lwz r4,0(r3) + lwz r6,4(r3) + lwz r7,8(r3) + lwz r8,12(r3) + addi r3,r3,16 + add r13,r13,r4 + add r14,r14,r6 + add r15,r15,r7 + add r16,r16,r8 +Lleft1: + bf 28,Lleft2 + lwz r4,0(r3) + lwz r6,4(r3) + addi r3,r3,8 + add r13,r13,r4 + add r14,r14,r6 +Lleft2: + bf 29,Lleft3 + lwz r4,0(r3) + addi r3,r3,4 + add r14,r14,r4 +Lleft3: + bf 30,Lleft4 + lhz r4,0(r3) + addi r3,r3,2 + add r15,r15,r4 +Lleft4: + bf 31,Lleft5 + lbz r4,0(r3) + slwi r4,r4,8 ; shift last byte into proper lane + add r16,r16,r4 + +; All data bytes have been checksummed. Now we must add together the four +; accumulators and restore the regs from the red zone. +; r3 = word aligned address of next byte of data +; r5 = accumulated sum parameter +; r13-r16 = the four accumulators +; carry = not used so far +; cr1_gt = "starting on odd address" flag + +Lleft5: + add r8,r13,r14 ; add the four accumulators together + add r9,r15,r16 + lwz r13,-4(r1) ; start to restore nonvolatiles from red zone + lwz r14,-8(r1) + add r8,r8,r9 ; now r8 is 64-bit sum of the four accumulators + lwz r15,-12(r1) + lwz r16,-16(r1) + srdi r7,r8,32 ; get upper half of 64-bit sum + addc r2,r7,r8 ; finally, do a 32-bit add of the two halves of r8 (setting carry) + b Lwrapup ; merge r2, r5, and carry into a 16-bit checksum diff --git a/bsd/dev/random/YarrowCoreLib/port/smf.c b/bsd/dev/random/YarrowCoreLib/port/smf.c index 838a87601..297fe0f58 100644 --- a/bsd/dev/random/YarrowCoreLib/port/smf.c +++ b/bsd/dev/random/YarrowCoreLib/port/smf.c @@ -29,10 +29,12 @@ #include #include +#include -SMFAPI void mmInit() +SMFAPI void mmInit( void ) { + return; } SMFAPI MMPTR mmMalloc(DWORD request) @@ -62,8 +64,9 @@ SMFAPI LPVOID mmGetPtr(MMPTR ptrnum) return (LPVOID)ptrnum; } -SMFAPI void mmReturnPtr(MMPTR ptrnum) +SMFAPI void mmReturnPtr(__unused MMPTR ptrnum) { /* nothing */ + return; } diff --git a/bsd/dev/random/YarrowCoreLib/src/comp.c b/bsd/dev/random/YarrowCoreLib/src/comp.c index 91c1844a0..8d2faeea0 100644 --- a/bsd/dev/random/YarrowCoreLib/src/comp.c +++ b/bsd/dev/random/YarrowCoreLib/src/comp.c @@ -32,24 +32,26 @@ #ifdef YARROW_KERNEL /* null compression */ -comp_error_status comp_init(COMP_CTX* ctx) +comp_error_status comp_init(__unused COMP_CTX* ctx) { return COMP_SUCCESS; } -comp_error_status comp_add_data(COMP_CTX* ctx,Bytef* inp,uInt inplen) +comp_error_status comp_add_data( __unused COMP_CTX* ctx, + __unused Bytef* inp, + __unused uInt inplen ) { return COMP_SUCCESS; } -comp_error_status comp_get_ratio(COMP_CTX* ctx,float* out) +comp_error_status comp_get_ratio( __unused COMP_CTX* ctx,float* out ) { *out = 1.0; return COMP_SUCCESS; } -comp_error_status comp_end(COMP_CTX* ctx) +comp_error_status comp_end( __unused COMP_CTX* ctx ) { return COMP_SUCCESS; } diff --git a/bsd/dev/random/YarrowCoreLib/src/prng.c b/bsd/dev/random/YarrowCoreLib/src/prng.c index 5fe3bfd94..e2ba0a2ee 100644 --- a/bsd/dev/random/YarrowCoreLib/src/prng.c +++ b/bsd/dev/random/YarrowCoreLib/src/prng.c @@ -182,9 +182,10 @@ cleanup_slow_init: /* In-place modifed bubble sort */ static void -bubbleSort(UINT *data,UINT len) +bubbleSort( UINT *data, LONG len ) { - UINT i,last,newlast,temp; + LONG i,last,newlast; + UINT temp; last = len-1; while(last!=-1) @@ -476,7 +477,7 @@ prngStretch(BYTE *inbuf,UINT inbuflen,BYTE *outbuf,UINT outbuflen) { /* Add entropy to the PRNG from a source */ prng_error_status -prngInput(PRNG *p, BYTE *inbuf,UINT inbuflen,UINT poolnum,UINT estbits) +prngInput(PRNG *p, BYTE *inbuf,UINT inbuflen,UINT poolnum, __unused UINT estbits) { #ifndef YARROW_KERNEL comp_error_status resp; @@ -513,13 +514,15 @@ prng_error_status prngAllowReseed(PRNG *p, LONGLONG ticks) { UINT temp[TOTAL_SOURCES]; - UINT i,sum; + LONG i; + UINT sum; #ifndef KERNEL_BUILD float ratio; #endif +#ifndef KERNEL_BUILD comp_error_status resp; - +#endif CHECKSTATE(p); diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c b/bsd/dev/random/YarrowCoreLib/src/sha1mod.c index 03dfa5a23..f58585865 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c +++ b/bsd/dev/random/YarrowCoreLib/src/sha1mod.c @@ -27,6 +27,9 @@ By Steve Reid */ /* Header portion split from main code for convenience (AYB 3/02/98) */ #include "sha1mod.h" +#ifdef SHA1HANDSOFF +#include +#endif #define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits)))) @@ -58,7 +61,7 @@ By Steve Reid /* Hash a single 512-bit block. This is the core of the algorithm. */ -void SHA1Transform(unsigned long state[5], unsigned char buffer[64]) +void SHA1Transform(unsigned long state[5], const unsigned char buffer[64]) { unsigned long a, b, c, d, e; typedef union { @@ -127,7 +130,7 @@ void SHA1Init(SHA1_CTX* context) /* Run your data through this. */ -void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int len) +void SHA1Update(SHA1_CTX* context, const unsigned char* data, unsigned int len) { unsigned int i, j; @@ -158,9 +161,9 @@ unsigned char finalcount[8]; finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ } - SHA1Update(context, (unsigned char *)"\200", 1); + SHA1Update(context, "\200", 1); while ((context->count[0] & 504) != 448) { - SHA1Update(context, (unsigned char *)"\0", 1); + SHA1Update(context, "\0", 1); } SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */ for (i = 0; i < 20; i++) { diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h b/bsd/dev/random/YarrowCoreLib/src/sha1mod.h index 839168e8b..c066767bb 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h +++ b/bsd/dev/random/YarrowCoreLib/src/sha1mod.h @@ -53,9 +53,9 @@ typedef struct { } SHA1_CTX; //Function forward declerations -void SHA1Transform(unsigned long state[5], unsigned char buffer[64]); +void SHA1Transform(unsigned long state[5], const unsigned char buffer[64]); void SHA1Init(SHA1_CTX* context); -void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int len); +void SHA1Update(SHA1_CTX* context, const unsigned char* data, unsigned int len); void SHA1Final(unsigned char digest[20], SHA1_CTX* context); #endif /* __SHA1_H__ */ diff --git a/bsd/dev/random/YarrowCoreLib/src/smf.h b/bsd/dev/random/YarrowCoreLib/src/smf.h index 538b815f4..ad4fcf321 100644 --- a/bsd/dev/random/YarrowCoreLib/src/smf.h +++ b/bsd/dev/random/YarrowCoreLib/src/smf.h @@ -79,7 +79,7 @@ extern "C" { #define MM_NULL 0 /* Function forward declarations */ -SMFAPI void mmInit(); +SMFAPI void mmInit( void ); SMFAPI MMPTR mmMalloc(DWORD request); SMFAPI void mmFree(MMPTR ptrnum); SMFAPI LPVOID mmGetPtr(MMPTR ptrnum); diff --git a/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c b/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c index a441c2780..bd8b19794 100644 --- a/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c +++ b/bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c @@ -36,6 +36,7 @@ */ #include "dev/random/YarrowCoreLib/include/yarrowUtils.h" +#include void trashMemory(void* mem, int len) diff --git a/bsd/dev/random/randomdev.c b/bsd/dev/random/randomdev.c index dfb37cf51..747656750 100644 --- a/bsd/dev/random/randomdev.c +++ b/bsd/dev/random/randomdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -49,9 +50,9 @@ static struct cdevsw random_cdevsw = random_close, /* close */ random_read, /* read */ random_write, /* write */ - random_ioctl, /* ioctl */ - nulldev, /* stop */ - nulldev, /* reset */ + random_ioctl, /* ioctl */ + (stop_fcn_t *)nulldev, /* stop */ + (reset_fcn_t *)nulldev, /* reset */ NULL, /* tty's */ eno_select, /* select */ eno_mmap, /* mmap */ @@ -69,10 +70,11 @@ static mutex_t *gYarrowMutex = 0; #define RESEED_TICKS 50 /* how long a reseed operation can take */ + /* *Initialize ONLY the Yarrow generator. */ -void PreliminarySetup () +void PreliminarySetup( void ) { prng_error_status perr; struct timeval tt; @@ -120,7 +122,7 @@ void PreliminarySetup () * and to register ourselves with devfs */ void -random_init() +random_init( void ) { int ret; @@ -152,12 +154,8 @@ random_init() } int -random_ioctl(dev, cmd, data, flag, p) - dev_t dev; - u_long cmd; - caddr_t data; - int flag; - struct proc *p; +random_ioctl( __unused dev_t dev, u_long cmd, __unused caddr_t data, + __unused int flag, __unused struct proc *p ) { switch (cmd) { case FIONBIO: @@ -176,7 +174,7 @@ random_ioctl(dev, cmd, data, flag, p) */ int -random_open(dev_t dev, int flags, int devtype, struct proc *p) +random_open(__unused dev_t dev, int flags, __unused int devtype, __unused struct proc *p) { if (gRandomError != 0) { /* forget it, yarrow didn't come up */ @@ -191,7 +189,7 @@ random_open(dev_t dev, int flags, int devtype, struct proc *p) if (securelevel >= 2) return (EPERM); #ifndef __APPLE__ - if ((securelevel >= 1) && suser(p->p_ucred, &p->p_acflag)) + if ((securelevel >= 1) && proc_suser(p)) return (EPERM); #endif /* !__APPLE__ */ } @@ -205,7 +203,7 @@ random_open(dev_t dev, int flags, int devtype, struct proc *p) */ int -random_close(dev_t dev, int flags, int mode, struct proc *p) +random_close(__unused dev_t dev, __unused int flags, __unused int mode, __unused struct proc *p) { return (0); } @@ -216,7 +214,7 @@ random_close(dev_t dev, int flags, int mode, struct proc *p) * prng. */ int -random_write (dev_t dev, struct uio *uio, int ioflag) +random_write (__unused dev_t dev, struct uio *uio, __unused int ioflag) { int retCode = 0; char rdBuffer[256]; @@ -230,9 +228,10 @@ random_write (dev_t dev, struct uio *uio, int ioflag) /* Security server is sending us entropy */ - while (uio->uio_resid > 0 && retCode == 0) { + while (uio_resid(uio) > 0 && retCode == 0) { /* get the user's data */ - int bytesToInput = min(uio->uio_resid, sizeof (rdBuffer)); + // LP64todo - fix this! uio_resid may be 64-bit value + int bytesToInput = min(uio_resid(uio), sizeof (rdBuffer)); retCode = uiomove(rdBuffer, bytesToInput, uio); if (retCode != 0) goto /*ugh*/ error_exit; @@ -263,7 +262,7 @@ error_exit: /* do this to make sure the mutex unlocks. */ * return data to the caller. Results unpredictable. */ int -random_read(dev_t dev, struct uio *uio, int ioflag) +random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) { int retCode = 0; char wrBuffer[512]; @@ -274,9 +273,10 @@ random_read(dev_t dev, struct uio *uio, int ioflag) /* lock down the mutex */ mutex_lock(gYarrowMutex); - while (uio->uio_resid > 0 && retCode == 0) { + while (uio_resid(uio) > 0 && retCode == 0) { /* get the user's data */ - int bytesToRead = min(uio->uio_resid, sizeof (wrBuffer)); + // LP64todo - fix this! uio_resid may be 64-bit value + int bytesToRead = min(uio_resid(uio), sizeof (wrBuffer)); /* get the data from Yarrow */ if (prngOutput(gPrngRef, (BYTE *) wrBuffer, sizeof (wrBuffer)) != 0) { @@ -317,7 +317,7 @@ read_random(void* buffer, u_int numbytes) * Return an unsigned long pseudo-random number. */ u_long -RandomULong() +RandomULong( void ) { u_long buf; read_random(&buf, sizeof (buf)); diff --git a/bsd/dev/random/randomdev.h b/bsd/dev/random/randomdev.h index efa6703e9..e5c65aea1 100644 --- a/bsd/dev/random/randomdev.h +++ b/bsd/dev/random/randomdev.h @@ -29,12 +29,14 @@ #include +void PreliminarySetup( void ); +void random_init( void ); int random_open(dev_t dev, int flags, int devtype, struct proc *pp); int random_close(dev_t dev, int flags, int mode, struct proc *pp); int random_read(dev_t dev, struct uio *uio, int ioflag); int random_write(dev_t dev, struct uio *uio, int ioflag); -u_long RandomULong(); +u_long RandomULong( void ); #endif /* __APPLE_API_PRIVATE */ #endif /* __DEV_RANDOMDEV_H__ */ diff --git a/bsd/dev/ppc/unix_startup.c b/bsd/dev/unix_startup.c similarity index 55% rename from bsd/dev/ppc/unix_startup.c rename to bsd/dev/unix_startup.c index 717fddc9f..33070c045 100644 --- a/bsd/dev/ppc/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -1,14 +1,14 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ - * + * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. - * + * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -16,13 +16,14 @@ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. - * + * * @APPLE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1992,7 NeXT Computer, Inc. * * Unix data structure initialization. + * */ #include @@ -31,41 +32,53 @@ #include #include -#include +#include #include #include #include #include #include -extern vm_map_t mb_map; +extern vm_map_t mb_map; + +extern u_long tcp_sendspace; +extern u_long tcp_recvspace; + +void bsd_bufferinit(void); +extern void md_prepare_for_shutdown(int, int, char *); /* * Declare these as initialized data so we can patch them. */ #ifdef NBUF -int nbuf = NBUF; -int niobuf = NBUF/2; +int nbuf = NBUF; +int niobuf = NBUF / 2; + #else -int nbuf = 0; -int niobuf = 0; +int nbuf = 0; +int niobuf = 0; + #endif -int srv = 0; /* Flag indicates a server boot when set */ -int ncl = 0; +int srv = 0; /* Flag indicates a server boot when set */ +int ncl = 0; + +vm_map_t buffer_map; +vm_map_t bufferhdr_map; -vm_map_t bufferhdr_map; + +extern void bsd_startupearly(void); void -bsd_startupearly() +bsd_startupearly(void) { - vm_offset_t firstaddr; - vm_size_t size; - kern_return_t ret; + vm_offset_t firstaddr; + vm_size_t size; + kern_return_t ret; if (nbuf == 0) - nbuf = atop_64(sane_size / 100); /* Get 1% of ram, but no more than we can map */ + nbuf = atop(sane_size / 100); /* Get 1% of ram, but no more than we can map */ if (nbuf > 8192) nbuf = 8192; if (nbuf < 256) @@ -78,39 +91,37 @@ bsd_startupearly() if (niobuf < 128) niobuf = 128; - size = (nbuf + niobuf) * sizeof (struct buf); - size = round_page_32(size); + size = (nbuf + niobuf) * sizeof(struct buf); + size = round_page(size); ret = kmem_suballoc(kernel_map, - &firstaddr, - size, - FALSE, - TRUE, - &bufferhdr_map); + &firstaddr, + size, + FALSE, + VM_FLAGS_ANYWHERE, + &bufferhdr_map); - if (ret != KERN_SUCCESS) + if (ret != KERN_SUCCESS) panic("Failed to create bufferhdr_map"); - + ret = kernel_memory_allocate(bufferhdr_map, - &firstaddr, - size, - 0, - KMA_HERE | KMA_KOBJECT); + &firstaddr, + size, + 0, + KMA_HERE | KMA_KOBJECT); if (ret != KERN_SUCCESS) panic("Failed to allocate bufferhdr_map"); - buf = (struct buf * )firstaddr; - bzero(buf,size); + buf = (struct buf *) firstaddr; + bzero(buf, size); - if ((sane_size > (64 * 1024 * 1024)) || ncl) { - int scale; - extern u_long tcp_sendspace; - extern u_long tcp_recvspace; + if (sane_size > (64 * 1024 * 1024) || ncl) { + int scale; if ((nmbclusters = ncl) == 0) { - if ((nmbclusters = ((sane_size / 16) / MCLBYTES)) > 16384) - nmbclusters = 16384; + if ((nmbclusters = ((sane_size / 16)/MCLBYTES)) > 32768) + nmbclusters = 32768; } if ((scale = nmbclusters / NMBCLUSTERS) > 1) { tcp_sendspace *= scale; @@ -125,38 +136,26 @@ bsd_startupearly() } void -bsd_bufferinit() +bsd_bufferinit(void) { - kern_return_t ret; + kern_return_t ret; - cons.t_dev = makedev(12, 0); + cons.t_dev = makedev(12, 0); bsd_startupearly(); - ret = kmem_suballoc(kernel_map, - (vm_offset_t *) &mbutl, - (vm_size_t) (nmbclusters * MCLBYTES), - FALSE, - TRUE, - &mb_map); + ret = kmem_suballoc(kernel_map, + (vm_offset_t *) & mbutl, + (vm_size_t) (nmbclusters * MCLBYTES), + FALSE, + VM_FLAGS_ANYWHERE, + &mb_map); - if (ret != KERN_SUCCESS) + if (ret != KERN_SUCCESS) panic("Failed to allocate mb_map\n"); - - /* - * Set up buffers, so they can be used to read disk labels. - */ - bufinit(); -} - -void -md_prepare_for_shutdown(int paniced, int howto, char * command) -{ - extern void IOSystemShutdownNotification(); - /* - * Temporary hack to notify the power management root domain - * that the system will shut down. - */ - IOSystemShutdownNotification(); + /* + * Set up buffers, so they can be used to read disk labels. + */ + bufinit(); } diff --git a/bsd/dev/vn/shadow.c b/bsd/dev/vn/shadow.c index 12a20c725..cb5fbfd6d 100644 --- a/bsd/dev/vn/shadow.c +++ b/bsd/dev/vn/shadow.c @@ -62,6 +62,7 @@ #include #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK) #define my_free(a) FREE(a, M_TEMP) +#include #endif /* TEST_SHADOW */ #include "shadow.h" @@ -289,7 +290,7 @@ bitmap_get(u_char * map, u_long start_bit, u_long bit_count, } end: - for (i = start.bit; i < end.bit; i++) { + for (i = start.bit; i < (int)end.bit; i++) { boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE; if (this_is_set != is_set) { @@ -525,6 +526,15 @@ shadow_map_write(shadow_map_t * map, u_long block_offset, return (shadow_grew); } +boolean_t +shadow_map_is_written(shadow_map_t * map, u_long block_offset) +{ + bitmap_offset_t b; + + b = bitmap_offset(block_offset); + return ((map->block_bitmap[b.byte] & bit(b.bit)) ? TRUE : FALSE); +} + /* * Function: shadow_map_shadow_size * diff --git a/bsd/dev/vn/shadow.h b/bsd/dev/vn/shadow.h index 074ba9e4c..b610fd828 100644 --- a/bsd/dev/vn/shadow.h +++ b/bsd/dev/vn/shadow.h @@ -35,12 +35,17 @@ shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, boolean_t shadow_map_write(shadow_map_t * map, u_long block_offset, u_long block_count, u_long * incr_block_offset, u_long * incr_block_count); +boolean_t +shadow_map_is_written(shadow_map_t * map, u_long block_offset); + u_long shadow_map_shadow_size(shadow_map_t * map); shadow_map_t * shadow_map_create(off_t file_size, off_t shadow_size, unsigned long band_size, unsigned long block_size); +void +shadow_map_free(shadow_map_t * map); #endif /* __APPLE_API_PRIVATE */ #endif /* __VN_SHADOW_H__ */ diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c index 19f246616..265270a3a 100644 --- a/bsd/dev/vn/vn.c +++ b/bsd/dev/vn/vn.c @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* * Copyright (c) 1988 University of Utah. @@ -48,7 +69,7 @@ * Block/character interface to a vnode. Allows one to treat a file * as a disk (e.g. build a filesystem in it, mount it, etc.). * - * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode + * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode * instead of a simple VOP_RDWR. We do this to avoid distorting the * local buffer cache. * @@ -71,35 +92,26 @@ #include #include #include +#include #include #include -#include +#include #include #include #include #include #include +#include #include #include #include -#include #include #include -extern void -vfs_io_maxsegsize(struct vnode *vp, - int flags, /* B_READ or B_WRITE */ - int *maxsegsize); - -extern void -vfs_io_attributes(struct vnode *vp, - int flags, /* B_READ or B_WRITE */ - int *iosize, - int *vectors); #include "shadow.h" @@ -152,13 +164,15 @@ struct vn_softc { u_int64_t sc_fsize; /* file size in bytes */ u_int64_t sc_size; /* size of vn, sc_secsize scale */ int sc_flags; /* flags */ - int sc_secsize; /* sector size */ + u_long sc_secsize; /* sector size */ struct vnode *sc_vp; /* vnode if not NULL */ + uint32_t sc_vid; int sc_open_flags; struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */ + uint32_t sc_shadow_vid; shadow_map_t * sc_shadow_map; /* shadow map if not NULL */ - struct ucred *sc_cred; /* credentials */ - u_long sc_options; /* options */ + kauth_cred_t sc_cred; /* credentials */ + u_int32_t sc_options; /* options */ void * sc_bdev; void * sc_cdev; } vn_table[NVNDEVICE]; @@ -169,40 +183,45 @@ struct vn_softc { #define VNF_INITED 0x01 #define VNF_READONLY 0x02 -static u_long vn_options; +static u_int32_t vn_options; #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) -static int vnsetcred (struct vn_softc *vn, struct proc *p); -static void vnclear (struct vn_softc *vn); +static int setcred(struct vnode * vp, struct proc * p, + kauth_cred_t cred); +static void vnclear (struct vn_softc *vn, struct proc * p); +static void vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to); +void vndevice_init(void); +int vndevice_root_image(char * path, char devname[], dev_t * dev_p); static int vniocattach_file(struct vn_softc *vn, - struct vn_ioctl *vio, + struct user_vn_ioctl *vniop, dev_t dev, int in_kernel, struct proc *p); static int vniocattach_shadow(struct vn_softc * vn, - struct vn_ioctl *vio, + struct user_vn_ioctl *vniop, dev_t dev, int in_kernel, struct proc *p); -static __inline__ +static __inline__ int vnunit(dev_t dev) { return (minor(dev)); } static int -vnclose(dev_t dev, int flags, int devtype, struct proc *p) +vnclose(__unused dev_t dev, __unused int flags, + __unused int devtype, __unused struct proc *p) { return (0); } static int -vnopen(dev_t dev, int flags, int devtype, struct proc *p) +vnopen(dev_t dev, int flags, __unused int devtype, __unused struct proc *p) { struct vn_softc *vn; int unit; @@ -218,11 +237,260 @@ vnopen(dev_t dev, int flags, int devtype, struct proc *p) return(0); } +static int +file_io(struct vnode * vp, struct vfs_context * context_p, + enum uio_rw op, char * base, off_t offset, user_ssize_t count, + user_ssize_t * resid) +{ + uio_t auio; + int error; + char uio_buf[UIO_SIZEOF(1)]; + + auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(base), count); + if (op == UIO_READ) + error = VNOP_READ(vp, auio, IO_SYNC, context_p); + else + error = VNOP_WRITE(vp, auio, IO_SYNC, context_p); + + if (resid != NULL) { + *resid = uio_resid(auio); + } + return (error); +} + +static __inline__ off_t +block_round(off_t o, int blocksize) +{ + return ((o + blocksize - 1) / blocksize); +} + +static __inline__ off_t +block_truncate(off_t o, int blocksize) +{ + return (o / blocksize); +} + +static __inline__ int +block_remainder(off_t o, int blocksize) +{ + return (o % blocksize); +} + +static int +vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, + struct vfs_context * context_p) +{ + u_long blocksize = vn->sc_secsize; + int error = 0; + off_t offset; + user_ssize_t resid; + off_t orig_offset; + user_ssize_t orig_resid; + + orig_resid = resid = uio_resid(uio); + orig_offset = offset = uio_offset(uio); + + while (resid > 0) { + u_long remainder; + u_long this_block_number; + u_long this_block_count; + off_t this_offset; + user_ssize_t this_resid; + struct vnode * vp; + + /* figure out which blocks to read */ + remainder = block_remainder(offset, blocksize); + if (shadow_map_read(vn->sc_shadow_map, + block_truncate(offset, blocksize), + block_round(resid + remainder, blocksize), + &this_block_number, &this_block_count)) { + vp = vn->sc_shadow_vp; + } + else { + vp = vn->sc_vp; + } + + /* read the blocks (or parts thereof) */ + this_offset = (off_t)this_block_number * blocksize + remainder; + uio_setoffset(uio, this_offset); + this_resid = this_block_count * blocksize - remainder; + if (this_resid > resid) { + this_resid = resid; + } + uio_setresid(uio, this_resid); + error = VNOP_READ(vp, uio, ioflag, context_p); + if (error) { + break; + } + + /* figure out how much we actually read */ + this_resid -= uio_resid(uio); + if (this_resid == 0) { + printf("vn device: vnread_shadow zero length read\n"); + break; + } + resid -= this_resid; + offset += this_resid; + } + uio_setresid(uio, resid); + uio_setoffset(uio, offset); + return (error); +} + +static int +vncopy_block_to_shadow(struct vn_softc * vn, struct vfs_context * context_p, + u_long file_block, u_long shadow_block) +{ + int error; + char * tmpbuf; + + tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK); + if (tmpbuf == NULL) { + return (ENOMEM); + } + /* read one block from file at file_block offset */ + error = file_io(vn->sc_vp, context_p, UIO_READ, + tmpbuf, (off_t)file_block * vn->sc_secsize, + vn->sc_secsize, NULL); + if (error) { + goto done; + } + /* write one block to shadow file at shadow_block offset */ + error = file_io(vn->sc_shadow_vp, context_p, UIO_WRITE, + tmpbuf, (off_t)shadow_block * vn->sc_secsize, + vn->sc_secsize, NULL); + done: + FREE(tmpbuf, M_TEMP); + return (error); +} + +enum { + FLAGS_FIRST_BLOCK_PARTIAL = 0x1, + FLAGS_LAST_BLOCK_PARTIAL = 0x2 +}; + +static int +vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, + struct vfs_context * context_p) +{ + u_long blocksize = vn->sc_secsize; + int error = 0; + user_ssize_t resid; + off_t offset; + + resid = uio_resid(uio); + offset = uio_offset(uio); + + while (resid > 0) { + int flags = 0; + u_long offset_block_number; + u_long remainder; + u_long resid_block_count; + u_long shadow_block_count; + u_long shadow_block_number; + user_ssize_t this_resid; + + /* figure out which blocks to write */ + offset_block_number = block_truncate(offset, blocksize); + remainder = block_remainder(offset, blocksize); + resid_block_count = block_round(resid + remainder, blocksize); + /* figure out if the first or last blocks are partial writes */ + if (remainder > 0 + && !shadow_map_is_written(vn->sc_shadow_map, + offset_block_number)) { + /* the first block is a partial write */ + flags |= FLAGS_FIRST_BLOCK_PARTIAL; + } + if (resid_block_count > 1 + && !shadow_map_is_written(vn->sc_shadow_map, + offset_block_number + + resid_block_count - 1) + && block_remainder(offset + resid, blocksize) > 0) { + /* the last block is a partial write */ + flags |= FLAGS_LAST_BLOCK_PARTIAL; + } + if (shadow_map_write(vn->sc_shadow_map, + offset_block_number, resid_block_count, + &shadow_block_number, + &shadow_block_count)) { + /* shadow file is growing */ +#if 0 + /* truncate the file to its new length before write */ + off_t size; + size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) + * vn->sc_secsize; + vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, + context_p); +#endif 0 + } + /* write the blocks (or parts thereof) */ + uio_setoffset(uio, (off_t) + shadow_block_number * blocksize + remainder); + this_resid = (off_t)shadow_block_count * blocksize - remainder; + if (this_resid >= resid) { + this_resid = resid; + if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) { + /* copy the last block to the shadow */ + u_long d; + u_long s; + + s = offset_block_number + + resid_block_count - 1; + d = shadow_block_number + + shadow_block_count - 1; + error = vncopy_block_to_shadow(vn, context_p, + s, d); + if (error) { + printf("vnwrite_shadow: failed to copy" + " block %d to shadow block %d\n", + s, d); + break; + } + } + } + uio_setresid(uio, this_resid); + if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) { + /* copy the first block to the shadow */ + error = vncopy_block_to_shadow(vn, context_p, + offset_block_number, + shadow_block_number); + if (error) { + printf("vnwrite_shadow: failed to" + " copy block %d to shadow block %d\n", + offset_block_number, + shadow_block_number); + break; + } + } + error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, context_p); + if (error) { + break; + } + /* figure out how much we actually wrote */ + this_resid -= uio_resid(uio); + if (this_resid == 0) { + printf("vn device: vnwrite_shadow zero length write\n"); + break; + } + resid -= this_resid; + offset += this_resid; + } + uio_setresid(uio, resid); + uio_setoffset(uio, offset); + return (error); +} + static int vnread(dev_t dev, struct uio *uio, int ioflag) { - struct proc * p = current_proc(); - int status; + struct vfs_context context; + int error = 0; + boolean_t funnel_state; + off_t offset; + struct proc * p; + user_ssize_t resid; struct vn_softc * vn; int unit; @@ -230,25 +498,74 @@ vnread(dev_t dev, struct uio *uio, int ioflag) if (vnunit(dev) >= NVNDEVICE) { return (ENXIO); } + p = current_proc(); + funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) { - return (ENXIO); + error = ENXIO; + goto done; } - if (vn->sc_shadow_vp != NULL) { - return (ENODEV); + error = vnode_getwithvid(vn->sc_vp, vn->sc_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnclear(vn, p); + goto done; + } + + resid = uio_resid(uio); + offset = uio_offset(uio); + + /* + * If out of bounds return an error. If at the EOF point, + * simply read less. + */ + if (offset >= (off_t)vn->sc_fsize) { + if (offset > (off_t)vn->sc_fsize) { + error = EINVAL; + } + goto done; + } + /* + * If the request crosses EOF, truncate the request. + */ + if ((offset + resid) > (off_t)vn->sc_fsize) { + resid = vn->sc_fsize - offset; + uio_setresid(uio, resid); } - vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); - status = VOP_READ(vn->sc_vp, uio, ioflag, vn->sc_cred); - VOP_UNLOCK(vn->sc_vp, 0, p); - return (status); + context.vc_proc = p; + context.vc_ucred = vn->sc_cred; + if (vn->sc_shadow_vp != NULL) { + error = vnode_getwithvid(vn->sc_shadow_vp, + vn->sc_shadow_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnode_put(vn->sc_vp); + vnclear(vn, p); + goto done; + } + error = vnread_shadow(vn, uio, ioflag, &context); + vnode_put(vn->sc_shadow_vp); + } else { + error = VNOP_READ(vn->sc_vp, uio, ioflag, &context); + } + vnode_put(vn->sc_vp); + done: + (void) thread_funnel_set(kernel_flock, funnel_state); + return (error); } static int vnwrite(dev_t dev, struct uio *uio, int ioflag) { - struct proc * p = current_proc(); - int status; + struct vfs_context context; + int error; + boolean_t funnel_state; + off_t offset; + struct proc * p; + user_ssize_t resid; struct vn_softc * vn; int unit; @@ -256,83 +573,86 @@ vnwrite(dev_t dev, struct uio *uio, int ioflag) if (vnunit(dev) >= NVNDEVICE) { return (ENXIO); } + p = current_proc(); + funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) { - return (ENXIO); - } - if (vn->sc_shadow_vp != NULL) { - return (ENODEV); + error = ENXIO; + goto done; } if (vn->sc_flags & VNF_READONLY) { - return (EROFS); + error = EROFS; + goto done; } + error = vnode_getwithvid(vn->sc_vp, vn->sc_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnclear(vn, p); + goto done; + } + resid = uio_resid(uio); + offset = uio_offset(uio); - vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); - status = VOP_WRITE(vn->sc_vp, uio, ioflag, vn->sc_cred); - VOP_UNLOCK(vn->sc_vp, 0, p); - - return (status); -} - -static boolean_t -bp_is_mapped(struct buf * bp, vm_offset_t * vaddr) -{ - boolean_t is_mapped = FALSE; + /* + * If out of bounds return an error. If at the EOF point, + * simply write less. + */ + if (offset >= (off_t)vn->sc_fsize) { + if (offset > (off_t)vn->sc_fsize) { + error = EINVAL; + } + goto done; + } + /* + * If the request crosses EOF, truncate the request. + */ + if ((offset + resid) > (off_t)vn->sc_fsize) { + resid = (off_t)vn->sc_fsize - offset; + uio_setresid(uio, resid); + } - if (bp->b_flags & B_NEED_IODONE) { - struct buf * real_bp = (struct buf *)bp->b_real_bp; + context.vc_proc = p; + context.vc_ucred = vn->sc_cred; - if (real_bp && real_bp->b_data) { - *vaddr = (vm_offset_t)real_bp->b_data; - is_mapped = TRUE; + if (vn->sc_shadow_vp != NULL) { + error = vnode_getwithvid(vn->sc_shadow_vp, + vn->sc_shadow_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnode_put(vn->sc_vp); + vnclear(vn, p); + goto done; } + error = vnwrite_shadow(vn, uio, ioflag, &context); + vnode_put(vn->sc_shadow_vp); + } else { + error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context); } - return (is_mapped); -} - -static __inline__ int -file_io(struct vnode * vp, struct ucred * cred, - enum uio_rw op, char * base, off_t offset, long count, - struct proc * p, long * resid) -{ - struct uio auio; - struct iovec aiov; - int error; - - bzero(&auio, sizeof(auio)); - aiov.iov_base = base; - aiov.iov_len = count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_offset = offset; - auio.uio_rw = op; - auio.uio_resid = count; - auio.uio_procp = p; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (op == UIO_READ) - error = VOP_READ(vp, &auio, IO_SYNC, cred); - else - error = VOP_WRITE(vp, &auio, IO_SYNC, cred); - VOP_UNLOCK(vp, 0, p); - *resid = auio.uio_resid; + vnode_put(vn->sc_vp); + done: + (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } static int shadow_read(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p) { + u_long blocksize = vn->sc_secsize; + struct vfs_context context; int error = 0; u_long offset; boolean_t read_shadow; u_long resid; u_long start = 0; - offset = bp->b_blkno; - resid = bp->b_bcount / vn->sc_secsize; - + context.vc_proc = p; + context.vc_ucred = vn->sc_cred; + offset = buf_blkno(bp); + resid = buf_resid(bp) / blocksize; while (resid > 0) { - u_long temp_resid; + user_ssize_t temp_resid; u_long this_offset; u_long this_resid; struct vnode * vp; @@ -346,23 +666,23 @@ shadow_read(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p) else { vp = vn->sc_vp; } - error = file_io(vp, vn->sc_cred, UIO_READ, base + start, - (off_t)this_offset * vn->sc_secsize, - this_resid * vn->sc_secsize, p, &temp_resid); - if (error) + error = file_io(vp, &context, UIO_READ, base + start, + (off_t)this_offset * blocksize, + (user_ssize_t)this_resid * blocksize, + &temp_resid); + if (error) { break; - temp_resid = this_resid - temp_resid / vn->sc_secsize; - if (temp_resid == 0) { - static int printed = 0; - printf("vn device: shadow_write zero length read (printed %d)\n", printed); - printed++; + } + this_resid -= (temp_resid / blocksize); + if (this_resid == 0) { + printf("vn device: shadow_read zero length read\n"); break; } - resid -= temp_resid; - offset += temp_resid; - start += temp_resid * vn->sc_secsize;; + resid -= this_resid; + offset += this_resid; + start += this_resid * blocksize; } - bp->b_resid = resid * vn->sc_secsize; + buf_setresid(bp, resid * blocksize); return (error); } @@ -370,20 +690,22 @@ static int shadow_write(struct vn_softc * vn, struct buf * bp, char * base, struct proc * p) { + u_long blocksize = vn->sc_secsize; + struct vfs_context context; int error = 0; u_long offset; boolean_t shadow_grew; u_long resid; u_long start = 0; - offset = bp->b_blkno; - resid = bp->b_bcount / vn->sc_secsize; - + context.vc_proc = p; + context.vc_ucred = vn->sc_cred; + offset = buf_blkno(bp); + resid = buf_resid(bp) / blocksize; while (resid > 0) { - u_long temp_resid; + user_ssize_t temp_resid; u_long this_offset; u_long this_resid; - struct vnode * vp; shadow_grew = shadow_map_write(vn->sc_shadow_map, offset, resid, @@ -393,80 +715,66 @@ shadow_write(struct vn_softc * vn, struct buf * bp, char * base, off_t size; /* truncate the file to its new length before write */ size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) - * vn->sc_secsize; - vn_lock(vn->sc_shadow_vp, LK_EXCLUSIVE | LK_RETRY, p); - VOP_TRUNCATE(vn->sc_shadow_vp, size, - IO_SYNC, vn->sc_cred, p); - VOP_UNLOCK(vn->sc_shadow_vp, 0, p); + * blocksize; + vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, + &context); #endif } - error = file_io(vn->sc_shadow_vp, vn->sc_cred, UIO_WRITE, + error = file_io(vn->sc_shadow_vp, &context, UIO_WRITE, base + start, - (off_t)this_offset * vn->sc_secsize, - this_resid * vn->sc_secsize, p, &temp_resid); + (off_t)this_offset * blocksize, + (user_ssize_t)this_resid * blocksize, + &temp_resid); if (error) { break; } - temp_resid = this_resid - temp_resid / vn->sc_secsize; - if (temp_resid == 0) { - static int printed = 0; - printf("vn device: shadow_write zero length write (printed %d)\n", printed); - printed++; + this_resid -= (temp_resid / blocksize); + if (this_resid == 0) { + printf("vn device: shadow_write zero length write\n"); break; } - resid -= temp_resid; - offset += temp_resid; - start += temp_resid * vn->sc_secsize;; + resid -= this_resid; + offset += this_resid; + start += this_resid * blocksize; } - bp->b_resid = resid * vn->sc_secsize; + buf_setresid(bp, resid * blocksize); return (error); } static int -vn_readwrite_io(struct vn_softc * vn, struct buf * bp) +vn_readwrite_io(struct vn_softc * vn, struct buf * bp, struct proc * p) { int error = 0; char * iov_base; - boolean_t need_unmap = FALSE; - struct proc * p = current_proc(); - vm_offset_t vaddr = NULL; + caddr_t vaddr; - if (bp->b_flags & B_VECTORLIST) { - if (bp_is_mapped(bp, &vaddr) == FALSE) { - if (ubc_upl_map(bp->b_pagelist, &vaddr) - != KERN_SUCCESS) { - panic("vn device: ubc_upl_map failed"); - } - else { - need_unmap = TRUE; - } - } - } - if (error) - return (error); - if (vaddr != NULL) - iov_base = (caddr_t)(vaddr + bp->b_uploffset); - else - iov_base = bp->b_data; + if (buf_map(bp, &vaddr)) + panic("vn device: buf_map failed"); + iov_base = (char *)vaddr; + if (vn->sc_shadow_vp == NULL) { - error = file_io(vn->sc_vp, vn->sc_cred, - bp->b_flags & B_READ ? UIO_READ : UIO_WRITE, - iov_base, (off_t)bp->b_blkno * vn->sc_secsize, - bp->b_bcount, p, &bp->b_resid); + struct vfs_context context; + user_ssize_t temp_resid; + + context.vc_proc = p; + context.vc_ucred = vn->sc_cred; + + error = file_io(vn->sc_vp, &context, + buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE, + iov_base, + (off_t)buf_blkno(bp) * vn->sc_secsize, + buf_resid(bp), &temp_resid); + buf_setresid(bp, temp_resid); } else { - if (bp->b_flags & B_READ) + if (buf_flags(bp) & B_READ) error = shadow_read(vn, bp, iov_base, p); else error = shadow_write(vn, bp, iov_base, p); - if (error == 0) - bp->b_resid = 0; - - } - if (need_unmap) { - ubc_upl_unmap(bp->b_pagelist); } + buf_unmap(bp); + return (error); } @@ -476,94 +784,123 @@ vnstrategy(struct buf *bp) struct vn_softc *vn; int error = 0; long sz; /* in sc_secsize chunks */ + daddr64_t blk_num; + boolean_t funnel_state; + struct proc * p = current_proc(); + struct vnode * shadow_vp = NULL; + struct vnode * vp = NULL; - vn = vn_table + vnunit(bp->b_dev); + funnel_state = thread_funnel_set(kernel_flock, TRUE); + vn = vn_table + vnunit(buf_device(bp)); if ((vn->sc_flags & VNF_INITED) == 0) { - bp->b_error = ENXIO; - bp->b_flags |= B_ERROR; - biodone(bp); - return; + error = ENXIO; + goto done; } - bp->b_resid = bp->b_bcount; + buf_setresid(bp, buf_count(bp)); /* * Check for required alignment. Transfers must be a valid * multiple of the sector size. */ - if (bp->b_bcount % vn->sc_secsize != 0 || - bp->b_blkno % (vn->sc_secsize / DEV_BSIZE) != 0) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR | B_INVAL; - biodone(bp); - return; + blk_num = buf_blkno(bp); + if (buf_count(bp) % vn->sc_secsize != 0) { + error = EINVAL; + goto done; } - sz = howmany(bp->b_bcount, vn->sc_secsize); + sz = howmany(buf_count(bp), vn->sc_secsize); /* * If out of bounds return an error. If at the EOF point, * simply read or write less. */ - if (bp->b_blkno >= vn->sc_size) { - if (bp->b_blkno > vn->sc_size) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR | B_INVAL; + if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) { + if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) { + error = EINVAL; } - biodone(bp); - return; + goto done; } /* * If the request crosses EOF, truncate the request. */ - if ((bp->b_blkno + sz) > vn->sc_size) { - bp->b_bcount = (vn->sc_size - bp->b_blkno) * vn->sc_secsize; - bp->b_resid = bp->b_bcount; + if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) { + buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize); + buf_setresid(bp, buf_count(bp)); } - - if (vn->sc_vp) { - error = vn_readwrite_io(vn, bp); - if (error) { - bp->b_error = error; - bp->b_flags |= B_ERROR; + vp = vn->sc_vp; + if (vp == NULL) { + error = ENXIO; + goto done; + } + error = vnode_getwithvid(vp, vn->sc_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnclear(vn, p); + goto done; + } + shadow_vp = vn->sc_shadow_vp; + if (shadow_vp != NULL) { + error = vnode_getwithvid(shadow_vp, + vn->sc_shadow_vid); + if (error != 0) { + /* the vnode is no longer available, abort */ + error = ENXIO; + vnode_put(vn->sc_vp); + vnclear(vn, p); + goto done; } - biodone(bp); } - else { - bp->b_flags |= B_ERROR; - bp->b_error = EINVAL; - biodone(bp); + error = vn_readwrite_io(vn, bp, p); + vnode_put(vp); + if (shadow_vp != NULL) { + vnode_put(shadow_vp); } + + done: + (void) thread_funnel_set(kernel_flock, funnel_state); + if (error) { + buf_seterror(bp, error); + } + buf_biodone(bp); + return; } /* ARGSUSED */ static int -vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, +vnioctl(dev_t dev, u_long cmd, caddr_t data, + __unused int flag, struct proc *p, int is_char) { struct vn_softc *vn; - struct vn_ioctl *vio; + struct user_vn_ioctl *viop; int error; - u_long *f; - int num = 0; + u_int32_t *f; u_int64_t * o; int unit; - int size = 0; + struct vfsioattr ioattr; + struct user_vn_ioctl user_vnio; + boolean_t funnel_state; unit = vnunit(dev); if (vnunit(dev) >= NVNDEVICE) { return (ENXIO); } + + funnel_state = thread_funnel_set(kernel_flock, TRUE); vn = vn_table + unit; - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); + error = proc_suser(p); + if (error) { + goto done; + } - vio = (struct vn_ioctl *)data; - f = (u_long*)data; + viop = (struct user_vn_ioctl *)data; + f = (u_int32_t *)data; o = (u_int64_t *)data; switch (cmd) { case VNIOCDETACH: + case VNIOCDETACH64: case DKIOCGETBLOCKSIZE: - case DKIOCSETBLOCKSIZE: + case DKIOCSETBLOCKSIZE: case DKIOCGETMAXBLOCKCOUNTREAD: case DKIOCGETMAXBLOCKCOUNTWRITE: case DKIOCGETMAXSEGMENTCOUNTREAD: @@ -573,59 +910,67 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, case DKIOCGETBLOCKCOUNT: case DKIOCGETBLOCKCOUNT32: if ((vn->sc_flags & VNF_INITED) == 0) { - return (ENXIO); + error = ENXIO; + goto done; } break; default: break; } + + if (vn->sc_vp != NULL) + vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr); + else + bzero(&ioattr, sizeof(ioattr)); + switch (cmd) { + case DKIOCISVIRTUAL: + *f = 1; + break; case DKIOCGETMAXBLOCKCOUNTREAD: - vfs_io_attributes(vn->sc_vp, B_READ, &size, &num); - *o = size / vn->sc_secsize; + *o = ioattr.io_maxreadcnt / vn->sc_secsize; break; case DKIOCGETMAXBLOCKCOUNTWRITE: - vfs_io_attributes(vn->sc_vp, B_WRITE, &size, &num); - *o = size / vn->sc_secsize; + *o = ioattr.io_maxwritecnt / vn->sc_secsize; break; case DKIOCGETMAXBYTECOUNTREAD: - vfs_io_attributes(vn->sc_vp, B_READ, &size, &num); - *o = size; + *o = ioattr.io_maxreadcnt; break; case DKIOCGETMAXBYTECOUNTWRITE: - vfs_io_attributes(vn->sc_vp, B_WRITE, &size, &num); - *o = size; + *o = ioattr.io_maxwritecnt; break; case DKIOCGETMAXSEGMENTCOUNTREAD: - vfs_io_attributes(vn->sc_vp, B_READ, &size, &num); - *o = num; + *o = ioattr.io_segreadcnt; break; case DKIOCGETMAXSEGMENTCOUNTWRITE: - vfs_io_attributes(vn->sc_vp, B_WRITE, &size, &num); - *o = num; + *o = ioattr.io_segwritecnt; break; case DKIOCGETMAXSEGMENTBYTECOUNTREAD: - vfs_io_maxsegsize(vn->sc_vp, B_READ, &size); - *o = size; + *o = ioattr.io_maxsegreadsize; break; case DKIOCGETMAXSEGMENTBYTECOUNTWRITE: - vfs_io_maxsegsize(vn->sc_vp, B_WRITE, &size); - *o = size; + *o = ioattr.io_maxsegwritesize; break; - case DKIOCGETBLOCKSIZE: - *f = vn->sc_secsize; + case DKIOCGETBLOCKSIZE: + *f = vn->sc_secsize; break; - case DKIOCSETBLOCKSIZE: + case DKIOCSETBLOCKSIZE: if (is_char) { /* can only set block size on block device */ - return (ENODEV); - } - if (vn->sc_shadow_vp != NULL) { - /* can't set the block size if already shadowing */ - return (EBUSY); + error = ENODEV; + break; } if (*f < DEV_BSIZE) { - return (EINVAL); + error = EINVAL; + break; + } + if (vn->sc_shadow_vp != NULL) { + if (*f == (unsigned)vn->sc_secsize) { + break; + } + /* can't change the block size if already shadowing */ + error = EBUSY; + break; } vn->sc_secsize = *f; /* recompute the size in terms of the new blocksize */ @@ -641,37 +986,57 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, *o = vn->sc_size; break; case VNIOCSHADOW: + case VNIOCSHADOW64: if (vn->sc_shadow_vp != NULL) { - return (EBUSY); + error = EBUSY; + break; } if (vn->sc_vp == NULL) { /* much be attached before we can shadow */ - return (EINVAL); + error = EINVAL; + break; + } + if (!proc_is64bit(p)) { + /* downstream code expects LP64 version of vn_ioctl structure */ + vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio); + viop = &user_vnio; } - if (vio->vn_file == NULL) { - return (EINVAL); + if (viop->vn_file == USER_ADDR_NULL) { + error = EINVAL; + break; } - error = vniocattach_shadow(vn, vio, dev, 0, p); + error = vniocattach_shadow(vn, viop, dev, 0, p); break; case VNIOCATTACH: + case VNIOCATTACH64: if (is_char) { /* attach only on block device */ - return (ENODEV); + error = ENODEV; + break; } if (vn->sc_flags & VNF_INITED) { - return (EBUSY); + error = EBUSY; + break; + } + if (!proc_is64bit(p)) { + /* downstream code expects LP64 version of vn_ioctl structure */ + vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio); + viop = &user_vnio; } - if (vio->vn_file == NULL) { - return (EINVAL); + if (viop->vn_file == USER_ADDR_NULL) { + error = EINVAL; + break; } - error = vniocattach_file(vn, vio, dev, 0, p); + error = vniocattach_file(vn, viop, dev, 0, p); break; case VNIOCDETACH: + case VNIOCDETACH64: if (is_char) { /* detach only on block device */ - return (ENODEV); + error = ENODEV; + break; } /* Note: spec_open won't open a mounted block device */ @@ -683,7 +1048,7 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, * How are these problems handled for removable and failing * hardware devices? (Hint: They are not) */ - vnclear(vn); + vnclear(vn, p); break; case VNIOCGSET: @@ -710,6 +1075,8 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p, error = ENOTTY; break; } + done: + (void) thread_funnel_set(kernel_flock, funnel_state); return(error); } @@ -734,34 +1101,44 @@ vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) static int vniocattach_file(struct vn_softc *vn, - struct vn_ioctl *vio, + struct user_vn_ioctl *vniop, dev_t dev, int in_kernel, struct proc *p) { - struct vattr vattr; + dev_t cdev; + struct vfs_context context; + kauth_cred_t cred; struct nameidata nd; + off_t file_size; int error, flags; + + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); flags = FREAD|FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context); } else { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + vniop->vn_file, &context); } + /* vn_open gives both long- and short-term references */ error = vn_open(&nd, flags, 0); if (error) { if (error != EACCES && error != EPERM && error != EROFS) return (error); flags &= ~FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, - vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, + vniop->vn_file, &context); } else { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, - vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + vniop->vn_file, &context); } error = vn_open(&nd, flags, 0); if (error) @@ -770,99 +1147,93 @@ vniocattach_file(struct vn_softc *vn, if (nd.ni_vp->v_type != VREG) { error = EINVAL; } - else if (ubc_isinuse(nd.ni_vp, 1)) { - error = EBUSY; - } else { - error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); + error = vnode_size(nd.ni_vp, &file_size, &context); } if (error != 0) { - VOP_UNLOCK(nd.ni_vp, 0, p); - (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); + (void) vn_close(nd.ni_vp, flags, proc_ucred(p), p); + vnode_put(nd.ni_vp); return (error); } - vn->sc_vp = nd.ni_vp; - vn->sc_vp->v_flag |= VNOCACHE_DATA; - VOP_UNLOCK(nd.ni_vp, 0, p); - - vn->sc_open_flags = flags; - - /* - * If the size is specified, override the file attributes. Note that - * the vn_size argument is in PAGE_SIZE sized blocks. - */ -#if 0 - if (vio->vn_size) - vn->sc_size = (quad_t)vio->vn_size * PAGE_SIZE / vn->sc_secsize; - else - vn->sc_size = vattr.va_size / vn->sc_secsize; -#endif - vn->sc_secsize = DEV_BSIZE; - vn->sc_fsize = vattr.va_size; - vn->sc_size = vattr.va_size / vn->sc_secsize; - error = vnsetcred(vn, p); + cred = kauth_cred_proc_ref(p); + nd.ni_vp->v_flag |= VNOCACHE_DATA; + error = setcred(nd.ni_vp, p, cred); if (error) { - (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); + (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p); + vnode_put(nd.ni_vp); + kauth_cred_rele(cred); return(error); } - { - dev_t cdev = makedev(vndevice_cdev_major, - minor(dev)); - vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR, - UID_ROOT, GID_OPERATOR, - 0600, "rvn%d", - minor(dev)); - } + vn->sc_secsize = DEV_BSIZE; + vn->sc_fsize = file_size; + vn->sc_size = file_size / vn->sc_secsize; + vn->sc_vp = nd.ni_vp; + vn->sc_vid = vnode_vid(nd.ni_vp); + vn->sc_open_flags = flags; + vn->sc_cred = cred; + cdev = makedev(vndevice_cdev_major, minor(dev)); + vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR, + UID_ROOT, GID_OPERATOR, + 0600, "rvn%d", + minor(dev)); vn->sc_flags |= VNF_INITED; if (flags == FREAD) vn->sc_flags |= VNF_READONLY; + /* lose the short-term reference */ + vnode_put(nd.ni_vp); return(0); } static int -vniocattach_shadow(vn, vio, dev, in_kernel, p) - struct vn_softc *vn; - struct vn_ioctl *vio; - dev_t dev; - int in_kernel; - struct proc *p; +vniocattach_shadow(struct vn_softc *vn, struct user_vn_ioctl *vniop, + __unused int dev, int in_kernel, struct proc *p) { - struct vattr vattr; + struct vfs_context context; struct nameidata nd; int error, flags; shadow_map_t * map; + off_t file_size; + + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); flags = FREAD|FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, &context); } else { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); + NDINIT(&nd, LOOKUP, FOLLOW, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + vniop->vn_file, &context); } + /* vn_open gives both long- and short-term references */ error = vn_open(&nd, flags, 0); if (error) { /* shadow MUST be writable! */ return (error); } - if (nd.ni_vp->v_type != VREG || - (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) { - VOP_UNLOCK(nd.ni_vp, 0, p); - (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); + if (nd.ni_vp->v_type != VREG + || (error = vnode_size(nd.ni_vp, &file_size, &context))) { + (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p); + vnode_put(nd.ni_vp); return (error ? error : EINVAL); } - vn->sc_shadow_vp = nd.ni_vp; - vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA; - VOP_UNLOCK(nd.ni_vp, 0, p); - - map = shadow_map_create(vn->sc_fsize, vattr.va_size, + map = shadow_map_create(vn->sc_fsize, file_size, 0, vn->sc_secsize); if (map == NULL) { - (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); + (void)vn_close(nd.ni_vp, flags, proc_ucred(p), p); + vnode_put(nd.ni_vp); vn->sc_shadow_vp = NULL; return (ENOMEM); } + vn->sc_shadow_vp = nd.ni_vp; + vn->sc_shadow_vid = vnode_vid(nd.ni_vp); + vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA; vn->sc_shadow_map = map; vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */ + + /* lose the short-term reference */ + vnode_put(nd.ni_vp); return(0); } @@ -870,18 +1241,17 @@ int vndevice_root_image(char * path, char devname[], dev_t * dev_p) { int error = 0; - int flags; - struct vn_softc * vn; - struct vn_ioctl vio; + struct vn_softc * vn; + struct user_vn_ioctl vnio; - vio.vn_file = path; - vio.vn_size = 0; + vnio.vn_file = CAST_USER_ADDR_T(path); + vnio.vn_size = 0; vn = vn_table + ROOT_IMAGE_UNIT; *dev_p = makedev(vndevice_bdev_major, ROOT_IMAGE_UNIT); sprintf(devname, "vn%d", ROOT_IMAGE_UNIT); - error = vniocattach_file(vn, &vio, *dev_p, 1, current_proc()); + error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc()); return (error); } @@ -891,60 +1261,34 @@ vndevice_root_image(char * path, char devname[], dev_t * dev_p) * to this "disk" is essentially as root. Note that credentials may change * if some other uid can write directly to the mapped file (NFS). */ -int -vnsetcred(struct vn_softc *vn, struct proc * p) +static int +setcred(struct vnode * vp, struct proc * p, kauth_cred_t cred) { char *tmpbuf; int error = 0; - struct proc * current_proc(); - struct ucred * cred = p->p_ucred; - - /* - * Set credits in our softc - */ - - if (vn->sc_cred) - crfree(vn->sc_cred); - vn->sc_cred = crdup(cred); + struct vfs_context context; /* * Horrible kludge to establish credentials for NFS XXX. */ - - if (vn->sc_vp) { - struct uio auio; - struct iovec aiov; - - tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK); - bzero(&auio, sizeof(auio)); - - aiov.iov_base = tmpbuf; - aiov.iov_len = vn->sc_secsize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_resid = aiov.iov_len; - vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); - VOP_UNLOCK(vn->sc_vp, 0, p); - FREE(tmpbuf, M_TEMP); - } + context.vc_proc = p; + context.vc_ucred = cred; + tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK); + error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL); + FREE(tmpbuf, M_TEMP); return (error); } void -vnclear(struct vn_softc *vn) +vnclear(struct vn_softc *vn, struct proc * p) { - int flags; - struct proc * p = current_proc(); /* XXX */ - if (vn->sc_vp != NULL) { + /* release long-term reference */ (void)vn_close(vn->sc_vp, vn->sc_open_flags, vn->sc_cred, p); vn->sc_vp = NULL; } if (vn->sc_shadow_vp != NULL) { + /* release long-term reference */ (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE, vn->sc_cred, p); vn->sc_shadow_vp = NULL; @@ -953,9 +1297,9 @@ vnclear(struct vn_softc *vn) shadow_map_free(vn->sc_shadow_map); vn->sc_shadow_map = NULL; } - vn->sc_flags = ~(VNF_INITED | VNF_READONLY); + vn->sc_flags &= ~(VNF_INITED | VNF_READONLY); if (vn->sc_cred) { - crfree(vn->sc_cred); + kauth_cred_rele(vn->sc_cred); vn->sc_cred = NULL; } vn->sc_size = 0; @@ -969,19 +1313,24 @@ vnclear(struct vn_softc *vn) static int vnsize(dev_t dev) { + int secsize; struct vn_softc *vn; int unit; + boolean_t funnel_state; unit = vnunit(dev); if (vnunit(dev) >= NVNDEVICE) { - return (ENXIO); + return (-1); } - vn = vn_table + unit; + funnel_state = thread_funnel_set(kernel_flock, TRUE); + vn = vn_table + unit; if ((vn->sc_flags & VNF_INITED) == 0) - return(-1); - - return(vn->sc_secsize); + secsize = -1; + else + secsize = vn->sc_secsize; + (void) thread_funnel_set(kernel_flock, funnel_state); + return (secsize); } #define CDEV_MAJOR -1 @@ -989,7 +1338,7 @@ vnsize(dev_t dev) static int vndevice_inited = 0; void -vndevice_init() +vndevice_init(void) { int i; @@ -1019,4 +1368,13 @@ vndevice_init() printf("vninit: devfs_make_node failed!\n"); } } + +static void +vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to) +{ + to->vn_file = CAST_USER_ADDR_T(from->vn_file); + to->vn_size = from->vn_size; + to->vn_control = from->vn_control; +} + #endif /* NVNDEVICE */ diff --git a/bsd/hfs/MacOSStubs.c b/bsd/hfs/MacOSStubs.c index 1dd6e6860..1b0fee293 100644 --- a/bsd/hfs/MacOSStubs.c +++ b/bsd/hfs/MacOSStubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -43,7 +43,11 @@ struct timezone gTimeZone = {8*60,1}; */ UInt32 GetTimeUTC(void) { - return (time.tv_sec + MAC_GMT_FACTOR); + struct timeval tv; + + microtime(&tv); + + return (tv.tv_sec + MAC_GMT_FACTOR); } @@ -93,7 +97,7 @@ UInt32 UTCToLocal(UInt32 utcTime) * to_bsd_time - convert from Mac OS time (seconds since 1/1/1904) * to BSD time (seconds since 1/1/1970) */ -u_int32_t to_bsd_time(u_int32_t hfs_time) +time_t to_bsd_time(u_int32_t hfs_time) { u_int32_t gmt = hfs_time; @@ -102,16 +106,16 @@ u_int32_t to_bsd_time(u_int32_t hfs_time) else gmt = 0; /* don't let date go negative! */ - return gmt; + return (time_t)gmt; } /* * to_hfs_time - convert from BSD time (seconds since 1/1/1970) * to Mac OS time (seconds since 1/1/1904) */ -u_int32_t to_hfs_time(u_int32_t bsd_time) +u_int32_t to_hfs_time(time_t bsd_time) { - u_int32_t hfs_time = bsd_time; + u_int32_t hfs_time = (u_int32_t)bsd_time; /* don't adjust zero - treat as uninitialzed */ if (hfs_time != 0) diff --git a/bsd/hfs/Makefile b/bsd/hfs/Makefile index bea8d9526..cdc1fb8ba 100644 --- a/bsd/hfs/Makefile +++ b/bsd/hfs/Makefile @@ -24,7 +24,7 @@ DATAFILES = \ PRIVATE_DATAFILES = \ hfs.h hfs_attrlist.h hfs_catalog.h hfs_cnode.h hfs_endian.h \ - hfs_lockf.h hfs_macos_defs.h hfs_quota.h rangelist.h + hfs_fsctl.h hfs_macos_defs.h hfs_quota.h rangelist.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index defa4dc72..e9b96c239 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,15 +30,15 @@ #ifdef KERNEL #ifdef __APPLE_API_PRIVATE #include -#include #include #include -#include #include #include #include #include +#include + #include #include @@ -48,10 +48,6 @@ #include #include - -struct uio; // This is more effective than #include in case KERNEL is undefined... -struct hfslockf; /* For advisory locking */ - /* * Just reported via MIG interface. */ @@ -72,17 +68,8 @@ enum { kMDBSize = 512 }; /* Size of I/O transfer to read entire MDB */ enum { kMasterDirectoryBlock = 2 }; /* MDB offset on disk in 512-byte blocks */ enum { kMDBOffset = kMasterDirectoryBlock * 512 }; /* MDB offset on disk in bytes */ -enum { - kUnknownID = 0, - kRootParID = 1, - kRootDirID = 2 -}; +#define kRootDirID kHFSRootFolderID -enum { - kDataFork, - kRsrcFork, - kDirectory -}; /* number of locked buffer caches to hold for b-tree meta data */ #define kMaxLockedMetaBuffers 32 @@ -131,103 +118,79 @@ extern struct timezone gTimeZone; /* Internal Data structures*/ -struct vcb_t { - u_int16_t vcbSigWord; - int16_t vcbAtrb; - int16_t vcbFlags; - int16_t vcbspare; - u_int32_t vcbJinfoBlock; - - u_int32_t vcbCrDate; - u_int32_t vcbLsMod; - u_int32_t vcbVolBkUp; - - int32_t vcbFilCnt; - int32_t vcbDirCnt; - u_int32_t blockSize; /* size of allocation blocks */ - u_int32_t totalBlocks; /* total allocation blocks */ - u_int32_t freeBlocks; /* free allocation blocks */ - u_int32_t nextAllocation; /* start of next allocation search */ - int32_t vcbClpSiz; - u_int32_t vcbNxtCNID; - u_int32_t vcbCNIDGen; - int32_t vcbWrCnt; - - int32_t vcbFndrInfo[8]; - - u_int64_t encodingsBitmap; /* HFS Plus only */ - - u_int16_t vcbNmFls; /* HFS only */ - u_int16_t vcbNmRtDirs; /* HFS only */ - int16_t vcbVBMSt; /* HFS only */ - int16_t vcbAlBlSt; /* HFS only */ - - struct vnode * extentsRefNum; - struct vnode * catalogRefNum; - struct vnode * allocationsRefNum; - - u_int8_t vcbVN[256]; /* volume name in UTF-8 */ - u_int32_t volumeNameEncodingHint; - u_int32_t hfsPlusIOPosOffset; /* Disk block where HFS+ starts */ - u_int32_t vcbVBMIOSize; /* volume bitmap I/O size */ - - /* cache of largest known free extents */ - u_int32_t vcbFreeExtCnt; - HFSPlusExtentDescriptor vcbFreeExt[kMaxFreeExtents]; - - u_int32_t reserveBlocks; /* free block reserve */ - u_int32_t loanedBlocks; /* blocks on loan for delayed allocations */ - - u_int32_t localCreateDate; /* creation times for HFS+ volumes are in local time */ - simple_lock_data_t vcbSimpleLock; /* simple lock to allow concurrent access to vcb data */ -}; -typedef struct vcb_t ExtendedVCB; - #define kHFS_DamagedVolume 0x1 /* This volume has errors, unmount dirty */ /* XXX */ #define MARK_VOLUMEDAMAGED(fcb) -/* - * NOTE: The code relies on being able to cast an ExtendedVCB* to a vfsVCB* in order - * to gain access to the mount point pointer from a pointer - * to an ExtendedVCB. DO NOT INSERT OTHER FIELDS BEFORE THE vcb FIELD!! - * - * vcbFlags, vcbLsMod, vcbFilCnt, vcbDirCnt, vcbNxtCNID, etc - * are locked by the hfs_lock simple lock. - */ -typedef struct vfsVCB { - ExtendedVCB vcb_vcb; - struct hfsmount *vcb_hfsmp; /* Pointer to hfsmount structure */ -} vfsVCB_t; - - /* This structure describes the HFS specific mount structure data. */ typedef struct hfsmount { - u_int32_t hfs_flags; /* see below */ - + u_int32_t hfs_flags; /* see below */ + /* Physical Description */ - u_long hfs_phys_block_count; /* Num of PHYSICAL blocks of volume */ - u_long hfs_phys_block_size; /* Always a multiple of 512 */ + u_long hfs_phys_block_size; /* Always a multiple of 512 */ + daddr64_t hfs_phys_block_count; /* Num of PHYSICAL blocks of volume */ + daddr64_t hfs_alt_id_sector; /* location of alternate VH/MDB */ /* Access to VFS and devices */ struct mount *hfs_mp; /* filesystem vfs structure */ struct vnode *hfs_devvp; /* block device mounted vnode */ - dev_t hfs_raw_dev; /* device mounted */ - struct netexport hfs_export; /* Export information */ - u_int32_t hfs_logBlockSize; /* Size of buffer cache buffer for I/O */ + struct vnode * hfs_extents_vp; + struct vnode * hfs_catalog_vp; + struct vnode * hfs_allocation_vp; + struct vnode * hfs_attribute_vp; + dev_t hfs_raw_dev; /* device mounted */ + u_int32_t hfs_logBlockSize; /* Size of buffer cache buffer for I/O */ /* Default values for HFS standard and non-init access */ - uid_t hfs_uid; /* uid to set as owner of the files */ - gid_t hfs_gid; /* gid to set as owner of the files */ - mode_t hfs_dir_mask; /* mask to and with directory protection bits */ - mode_t hfs_file_mask; /* mask to and with file protection bits */ - u_long hfs_encoding; /* Defualt encoding for non hfs+ volumes */ - - /* HFS Specific */ - struct vfsVCB hfs_vcb; + uid_t hfs_uid; /* uid to set as owner of the files */ + gid_t hfs_gid; /* gid to set as owner of the files */ + mode_t hfs_dir_mask; /* mask to and with directory protection bits */ + mode_t hfs_file_mask; /* mask to and with file protection bits */ + u_long hfs_encoding; /* Defualt encoding for non hfs+ volumes */ + + /* Persistent fields (on disk, dynamic) */ + time_t hfs_mtime; /* file system last modification time */ + u_int32_t hfs_filecount; /* number of files in file system */ + u_int32_t hfs_dircount; /* number of directories in file system */ + u_int32_t freeBlocks; /* free allocation blocks */ + u_int32_t nextAllocation; /* start of next allocation search */ + u_int32_t vcbNxtCNID; /* next unused catalog node ID */ + u_int32_t vcbWrCnt; /* file system write count */ + u_int64_t encodingsBitmap; /* in-use encodings */ + u_int16_t vcbNmFls; /* HFS Only - root dir file count */ + u_int16_t vcbNmRtDirs; /* HFS Only - root dir directory count */ + + /* Persistent fields (on disk, static) */ + u_int16_t vcbSigWord; + int16_t vcbFlags; + u_int32_t vcbAtrb; + u_int32_t vcbJinfoBlock; + time_t hfs_itime; /* file system creation time */ + time_t hfs_btime; /* file system last backup time */ + u_int32_t blockSize; /* size of allocation blocks */ + u_int32_t totalBlocks; /* total allocation blocks */ + int32_t vcbClpSiz; + u_int32_t vcbFndrInfo[8]; + int16_t vcbVBMSt; /* HFS only */ + int16_t vcbAlBlSt; /* HFS only */ + + /* vcb stuff */ + u_int8_t vcbVN[256]; /* volume name in UTF-8 */ + u_int32_t volumeNameEncodingHint; + u_int32_t hfsPlusIOPosOffset; /* Disk block where HFS+ starts */ + u_int32_t vcbVBMIOSize; /* volume bitmap I/O size */ + + /* cache of largest known free extents */ + u_int32_t vcbFreeExtCnt; + HFSPlusExtentDescriptor vcbFreeExt[kMaxFreeExtents]; + + u_int32_t reserveBlocks; /* free block reserve */ + u_int32_t loanedBlocks; /* blocks on loan for delayed allocations */ + + u_int32_t localCreateDate; /* creation times for HFS+ volumes are in local time */ struct cat_desc hfs_privdir_desc; struct cat_attr hfs_privdir_attr; u_int32_t hfs_metadata_createdate; @@ -244,14 +207,18 @@ typedef struct hfsmount { u_int32_t jnl_size; u_int32_t hfs_jnlfileid; u_int32_t hfs_jnlinfoblkid; - volatile int readers; - volatile int blocker; + lck_rw_t hfs_global_lock; + u_int32_t hfs_global_lock_nesting; /* Notification variables: */ unsigned long hfs_notification_conditions; u_int32_t hfs_freespace_notify_warninglimit; u_int32_t hfs_freespace_notify_desiredlevel; + /* time mounted and last mounted mod time "snapshot" */ + time_t hfs_mount_time; + time_t hfs_last_mounted_mtime; + /* Metadata allocation zone variables: */ u_int32_t hfs_metazone_start; u_int32_t hfs_metazone_end; @@ -263,6 +230,7 @@ typedef struct hfsmount { int hfs_catalog_maxblks; /* Hot File Clustering variables: */ + lck_mtx_t hfc_mutex; /* serialize hot file stages */ enum hfc_stage hfc_stage; /* what are we up to... */ time_t hfc_timebase; /* recording period start time */ time_t hfc_timeout; /* recording period stop time */ @@ -275,8 +243,24 @@ typedef struct hfsmount { struct vnode * hfs_backingfs_rootvp; int hfs_sparsebandblks; #endif + size_t hfs_max_inline_attrsize; + + lck_mtx_t hfs_mutex; /* protects access to hfsmount data */ + void *hfs_freezing_proc; /* who froze the fs */ } hfsmount_t; +typedef hfsmount_t ExtendedVCB; + +/* Aliases for legacy field names */ +#define vcbCrDate hfs_itime +#define vcbLsMod hfs_mtime +#define vcbVolBkUp hfs_btime +#define extentsRefNum hfs_extents_vp +#define catalogRefNum hfs_catalog_vp +#define allocationsRefNum hfs_allocation_vp +#define vcbFilCnt hfs_filecount +#define vcbDirCnt hfs_dircount + /* HFS mount point flags */ #define HFS_READ_ONLY 0x001 @@ -289,47 +273,24 @@ typedef struct hfsmount { #define HFS_METADATA_ZONE 0x080 #define HFS_FRAGMENTED_FREESPACE 0x100 #define HFS_NEED_JNL_RESET 0x200 - -#ifdef HFS_SPARSE_DEV #define HFS_HAS_SPARSE_DEVICE 0x400 -#endif -#define hfs_global_shared_lock_acquire(hfsmp) \ - do { \ - if (hfsmp->blocker) { \ - tsleep((caddr_t)&hfsmp->blocker, PRIBIO, "journal_blocker", 0); \ - continue; \ - } \ - hfsmp->readers++; \ - break; \ - } while (1) - -#define hfs_global_shared_lock_release(hfsmp) \ - do { \ - hfsmp->readers--; \ - if (hfsmp->readers == 0) { \ - wakeup((caddr_t)&hfsmp->readers); \ - } \ - } while (0) - -#define hfs_global_exclusive_lock_acquire(hfsmp) \ - do { \ - if (hfsmp->blocker) { \ - tsleep((caddr_t)&hfsmp->blocker, PRIBIO, "journal_blocker", 0); \ - continue; \ - } \ - if (hfsmp->readers != 0) { \ - tsleep((caddr_t)&hfsmp->readers, PRIBIO, "journal_enable/disble", 0); \ - continue; \ - } \ - hfsmp->blocker = 1; \ - break; \ - } while (1) - -#define hfs_global_exclusive_lock_release(hfsmp) \ - hfsmp->blocker = 0; \ - wakeup((caddr_t)&hfsmp->blocker) +#define HFS_MOUNT_LOCK(hfsmp, metadata) \ + { \ + if ((metadata) && 1) \ + lck_mtx_lock(&(hfsmp)->hfs_mutex); \ + } \ + +#define HFS_MOUNT_UNLOCK(hfsmp, metadata) \ + { \ + if ((metadata) && 1) \ + lck_mtx_unlock(&(hfsmp)->hfs_mutex); \ + } \ + +#define hfs_global_exclusive_lock_acquire(hfsmp) lck_rw_lock_exclusive(&(hfsmp)->hfs_global_lock) +#define hfs_global_exclusive_lock_release(hfsmp) lck_rw_done(&(hfsmp)->hfs_global_lock) + #define MAXHFSVNODELEN 31 @@ -341,28 +302,19 @@ typedef struct filefork FCB; (void) sprintf((name), "%s%d", HFS_INODE_PREFIX, (linkno)) -/* structure to hold a "." or ".." directory entry (12 bytes) */ -typedef struct hfsdotentry { - u_int32_t d_fileno; /* unique file number */ - u_int16_t d_reclen; /* length of this structure */ - u_int8_t d_type; /* dirent file type */ - u_int8_t d_namelen; /* len of filename */ - char d_name[4]; /* "." or ".." */ -} hfsdotentry; #define HFS_AVERAGE_NAME_SIZE 22 #define AVERAGE_HFSDIRENTRY_SIZE (8+HFS_AVERAGE_NAME_SIZE+4) -#define MAX_HFSDIRENTRY_SIZE sizeof(struct dirent) -#define DIRENTRY_SIZE(namlen) \ - ((sizeof(struct dirent) - (NAME_MAX+1)) + (((namlen)+1 + 3) &~ 3)) +#define STD_DIRENT_LEN(namlen) \ + ((sizeof(struct dirent) - (NAME_MAX+1)) + (((namlen)+1 + 3) &~ 3)) +#define EXT_DIRENT_LEN(namlen) \ + ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 3) & ~3) enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; -enum { kdirentMaxNameBytes = NAME_MAX }; - /* macro to determine if hfs or hfsplus */ #define ISHFSPLUS(VCB) ((VCB)->vcbSigWord == kHFSPlusSigWord) @@ -372,25 +324,25 @@ enum { kdirentMaxNameBytes = NAME_MAX }; /* * Various ways to acquire a VFS mount point pointer: */ -#define VTOVFS(VP) ((VP)->v_mount) +#define VTOVFS(VP) vnode_mount((VP)) #define HFSTOVFS(HFSMP) ((HFSMP)->hfs_mp) -#define VCBTOVFS(VCB) (((struct vfsVCB *)(VCB))->vcb_hfsmp->hfs_mp) +#define VCBTOVFS(VCB) HFSTOVFS(VCB) /* * Various ways to acquire an HFS mount point pointer: */ -#define VTOHFS(VP) ((struct hfsmount *)((VP)->v_mount->mnt_data)) -#define VFSTOHFS(MP) ((struct hfsmount *)(MP)->mnt_data) -#define VCBTOHFS(VCB) (((struct vfsVCB *)(VCB))->vcb_hfsmp) -#define FCBTOHFS(FCB) ((struct hfsmount *)(FCB)->ff_cp->c_vp->v_mount->mnt_data) +#define VTOHFS(VP) ((struct hfsmount *)vfs_fsprivate(vnode_mount((VP)))) +#define VFSTOHFS(MP) ((struct hfsmount *)vfs_fsprivate((MP))) +#define VCBTOHFS(VCB) (VCB) +#define FCBTOHFS(FCB) ((struct hfsmount *)vfs_fsprivate(vnode_mount((FCB)->ff_cp->c_vp))) /* - * Various ways to acquire a VCB pointer: + * Various ways to acquire a VCB (legacy) pointer: */ -#define VTOVCB(VP) (&(((struct hfsmount *)((VP)->v_mount->mnt_data))->hfs_vcb.vcb_vcb)) -#define VFSTOVCB(MP) (&(((struct hfsmount *)(MP)->mnt_data)->hfs_vcb.vcb_vcb)) -#define HFSTOVCB(HFSMP) (&(HFSMP)->hfs_vcb.vcb_vcb) -#define FCBTOVCB(FCB) (&(((struct hfsmount *)((FCB)->ff_cp->c_vp->v_mount->mnt_data))->hfs_vcb.vcb_vcb)) +#define VTOVCB(VP) VTOHFS(VP) +#define VFSTOVCB(MP) VFSTOHFS(MP) +#define HFSTOVCB(HFSMP) (HFSMP) +#define FCBTOVCB(FCB) FCBTOHFS(FCB) #define HFS_KNOTE(vp, hint) KNOTE(&VTOC(vp)->c_knotes, (hint)) @@ -408,6 +360,17 @@ enum { kdirentMaxNameBytes = NAME_MAX }; #define HFS_ALT_SECTOR(blksize, blkcnt) (((blkcnt) - 1) - (512 / (blksize))) #define HFS_ALT_OFFSET(blksize) ((blksize) > 1024 ? (blksize) - 1024 : 0) + +/* + * HFS specific fcntl()'s + */ +#define HFS_BULKACCESS (FCNTL_FS_SPECIFIC_BASE + 0x00001) +#define HFS_GET_MOUNT_TIME (FCNTL_FS_SPECIFIC_BASE + 0x00002) +#define HFS_GET_LAST_MTIME (FCNTL_FS_SPECIFIC_BASE + 0x00003) +#define HFS_GET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00004) +#define HFS_SET_BOOT_INFO (FCNTL_FS_SPECIFIC_BASE + 0x00005) + + /* * This is the straight GMT conversion constant: * 00:00:00 January 1, 1970 - 00:00:00 January 1, 1904 @@ -416,25 +379,18 @@ enum { kdirentMaxNameBytes = NAME_MAX }; #define MAC_GMT_FACTOR 2082844800UL -u_int32_t to_bsd_time(u_int32_t hfs_time); -u_int32_t to_hfs_time(u_int32_t bsd_time); +time_t to_bsd_time(u_int32_t hfs_time); +u_int32_t to_hfs_time(time_t bsd_time); int hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush); #define HFS_ALTFLUSH 1 extern int hfsUnmount(struct hfsmount *hfsmp, struct proc *p); - -extern int hfs_getcnode(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *descp, - int wantrsrc, struct cat_attr *attrp, struct cat_fork *forkp, - struct vnode **vpp); - -extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct cnode *cp, +extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, struct cat_desc *descp, int wantrsrc, struct cat_attr *attrp, struct cat_fork *forkp, struct vnode **vpp); -extern int hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct proc *p); - extern u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve); extern void hfs_remove_orphans(struct hfsmount *); @@ -464,13 +420,13 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, struct proc *p); OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args); + off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args, kauth_cred_t cred); extern int hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - void *_args, int embeddedOffset, int mdb_offset, + void *_args, off_t embeddedOffset, daddr64_t mdb_offset, HFSMasterDirectoryBlock *mdbp, struct ucred *cred); -extern u_long GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name, - struct cat_attr *fattr, struct cat_fork *forkinfo); +extern u_long GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, + struct cat_attr *fattr, struct cat_fork *forkinfo); int hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname); @@ -505,6 +461,47 @@ extern int hfs_virtualmetafile(struct cnode *); void hfs_generate_volume_notifications(struct hfsmount *hfsmp); +__private_extern__ u_int32_t hfs_getencodingbias(void); +__private_extern__ void hfs_setencodingbias(u_int32_t bias); + +extern int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, + struct vnode **rvpp, struct proc *p); + +extern int hfs_update(struct vnode *, int); + +extern int hfs_truncate(struct vnode *, off_t, int, int, vfs_context_t); + +extern int hfs_fsync(struct vnode *, int, int, struct proc *); + +extern int hfs_access(struct vnode *, mode_t, struct ucred *, struct proc *); + +extern int hfs_vget(struct hfsmount *, cnid_t, struct vnode **, int); + +extern int hfs_bmap(struct vnode *, daddr_t, struct vnode **, daddr64_t *, int *); + +extern int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid); + +__private_extern__ int hfs_start_transaction(struct hfsmount *hfsmp); +__private_extern__ int hfs_end_transaction(struct hfsmount *hfsmp); + +extern int hfs_setextendedsecurity(struct hfsmount *hfsmp, int state); +extern void hfs_checkextendedsecurity(struct hfsmount *hfsmp); + +extern int hfs_extendfs(struct hfsmount *, u_int64_t, vfs_context_t); +extern int hfs_truncatefs(struct hfsmount *, u_int64_t, vfs_context_t); + +extern int hfs_isallocated(struct hfsmount *, u_long, u_long); + + +/* HFS System file locking */ +#define SFL_CATALOG 0x0001 +#define SFL_EXTENTS 0x0002 +#define SFL_BITMAP 0x0004 +#define SFL_ATTRIBUTE 0x0008 +#define SFL_VALIDMASK (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE) + +extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfslocktype); +extern void hfs_systemfile_unlock(struct hfsmount *, int); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c index 23084bcda..c3d29a9e8 100644 --- a/bsd/hfs/hfs_attrlist.c +++ b/bsd/hfs/hfs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,6 +33,10 @@ #include #include #include +#include +#include + +#include #include "hfs.h" #include "hfs_cnode.h" @@ -43,23 +47,23 @@ /* Routines that are shared by hfs_setattr: */ -extern int hfs_write_access(struct vnode *vp, struct ucred *cred, +extern int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags); -extern int hfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, +extern int hfs_chflags(struct vnode *vp, uint32_t flags, kauth_cred_t cred, struct proc *p); -extern int hfs_chmod(struct vnode *vp, int mode, struct ucred *cred, +extern int hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p); -extern int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, +extern int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, struct proc *p); -extern char * hfs_getnamehint(struct cnode *dcp, int index); +__private_extern__ int hfs_vnop_readdirattr(struct vnop_readdirattr_args *ap); -extern void hfs_savenamehint(struct cnode *dcp, int index, const char * namehint); +__private_extern__ int hfs_vnop_setattrlist(struct vnop_setattrlist_args *ap); -extern void hfs_relnamehint(struct cnode *dcp, int index); +__private_extern__ int hfs_vnop_getattrlist(struct vnop_getattrlist_args *ap); /* Packing routines: */ @@ -68,7 +72,7 @@ static void packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct proc *p); static void packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, - struct vnode *vp, struct proc *p); + struct vnode *vp); static void packcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * cdp, @@ -76,60 +80,52 @@ static void packcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, static void packfileattr(struct attrblock *abp, struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork, - struct cat_fork *rsrcfork, struct proc *p); + struct cat_fork *rsrcfork); static void packdirattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * descp, - struct cat_attr * cattrp, struct proc *p); + struct cat_attr * cattrp); -static void unpackattrblk(struct attrblock *abp, struct vnode *vp); + +#if 0 +static int unpackattrblk(struct attrblock *abp, struct vnode *vp); static void unpackcommonattr(struct attrblock *abp, struct vnode *vp); -static void unpackvolattr(struct attrblock *abp, struct hfsmount *hfsmp, - struct vnode *rootvp); +static int unpackvolattr(struct attrblock *abp, struct hfsmount *hfsmp, + struct vnode *root_vp); /* - -# -#% getattrlist vp = = = -# - vop_getattrlist { - IN struct vnode *vp; - IN struct attrlist *alist; - INOUT struct uio *uio; - IN struct ucred *cred; - IN struct proc *p; - }; - + * Get a list of attributes. */ __private_extern__ int -hfs_getattrlist(ap) - struct vop_getattrlist_args /* { +hfs_vnop_getattrlist(ap) + struct vnop_getattrlist_args /* { struct vnode *a_vp; struct attrlist *a_alist struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct hfsmount *hfsmp = VTOHFS(vp); + struct cnode *cp; + struct hfsmount *hfsmp; struct attrlist *alist = ap->a_alist; - struct timeval tv; + proc_t p = vfs_context_proc(ap->a_context); int fixedblocksize; int attrblocksize; int attrbufsize; - void *attrbufptr; + void *attrbufptr = NULL; void *attrptr; void *varptr; struct attrblock attrblk; struct cat_fork *datafp = NULL; struct cat_fork *rsrcfp = NULL; - struct cat_fork rsrcfork = {0}; + struct cat_fork rsrcfork; + int lockflags; int error = 0; if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || @@ -157,68 +153,75 @@ hfs_getattrlist(ap) return (EINVAL); } - /* Requesting volume information requires root vnode */ - if ((alist->volattr) && cp->c_fileid != kRootDirID) - return (EINVAL); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); + cp = VTOC(vp); + hfsmp = VTOHFS(vp); + /* Requesting volume information requires root vnode */ + if ((alist->volattr) && cp->c_fileid != kHFSRootFolderID) { + error = EINVAL; + goto exit; + } /* Asking for data fork attributes from the rsrc fork is not supported */ - if (VNODE_IS_RSRC(vp) && (alist->fileattr & ATTR_DATAFORK_MASK)) - return (EINVAL); - + if (VNODE_IS_RSRC(vp) && (alist->fileattr & ATTR_DATAFORK_MASK)) { + error = EINVAL; + goto exit; + } /* This file no longer exists! */ - if (cp->c_flag & (C_NOEXISTS | C_DELETED)) - return (ENOENT); - + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto exit; + } /* This file doesn't have a name! */ - if ((cp->c_desc.cd_namelen == 0) && (alist->commonattr & ATTR_CMN_NAME)) - return (ENOENT); + if ((cp->c_desc.cd_namelen == 0) && (alist->commonattr & ATTR_CMN_NAME)) { + error = ENOENT; + goto exit; + } /* Update cnode times if needed */ - tv = time; - CTIMES(cp, &tv, &tv); + hfs_touchtimes(hfsmp, cp); /* * If a File ID (ATTR_CMN_OBJPERMANENTID) is requested on * an HFS volume we must be sure to create the thread * record before returning it. (yikes) */ - if ((vp->v_type == VREG) && + if (vnode_isreg(vp) && (alist->commonattr & ATTR_CMN_OBJPERMANENTID) && (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord)) { - cat_cookie_t cookie = {0}; - - if (hfsmp->hfs_flags & HFS_READ_ONLY) - return (EROFS); - if ((error = hfs_write_access(vp, ap->a_cred, ap->a_p, false)) != 0) - return (error); + cat_cookie_t cookie; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + error = EROFS; + goto exit; + } + if ((error = hfs_write_access(vp, vfs_context_ucred(ap->a_context), + p, false)) != 0) { + goto exit; + } /* * Reserve some space in the Catalog file. */ - error = cat_preflight(hfsmp, CAT_CREATE, &cookie, ap->a_p); - if (error) - return (error); - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, - LK_EXCLUSIVE, ap->a_p); + bzero(&cookie, sizeof(cookie)); + error = cat_preflight(hfsmp, CAT_CREATE, &cookie, p); if (error) { - cat_postflight(hfsmp, &cookie, ap->a_p); - return (error); - } + goto exit; + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_insertfilethread(hfsmp, &cp->c_desc); - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, - ap->a_p); + hfs_systemfile_unlock(hfsmp, lockflags); - cat_postflight(hfsmp, &cookie, ap->a_p); + cat_postflight(hfsmp, &cookie, p); if (error) - return (error); + goto exit; } - + bzero(&rsrcfork, sizeof(rsrcfork)); /* Establish known fork data */ if (cp->c_datafork != NULL) { datafp = &cp->c_datafork->ff_data; @@ -235,25 +238,23 @@ hfs_getattrlist(ap) * fetched from the catalog. */ if ((alist->fileattr & ATTR_RSRCFORK_MASK) && (rsrcfp == NULL)) { - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, ap->a_p); - if (error) - return (error); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); /* Get resource fork data */ error = cat_lookup(hfsmp, &cp->c_desc, 1, - (struct cat_desc *)0, (struct cat_attr *)0, &rsrcfork); + (struct cat_desc *)0, (struct cat_attr *)0, &rsrcfork, NULL); + + hfs_systemfile_unlock(hfsmp, lockflags); - /* Unlock the Catalog */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, ap->a_p); if (error) - return (error); + goto exit; rsrcfp = &rsrcfork; } fixedblocksize = hfs_attrblksize(alist); - attrblocksize = fixedblocksize + (sizeof(u_long)); /* u_long for length longword */ + attrblocksize = fixedblocksize + (sizeof(uint32_t)); /* uint32_t for length word */ if (alist->commonattr & ATTR_CMN_NAME) attrblocksize += kHFSPlusMaxFileNameBytes + 1; if (alist->volattr & ATTR_VOL_MOUNTPOINT) @@ -266,11 +267,11 @@ hfs_getattrlist(ap) if (alist->fileattr & ATTR_FILE_FORKLIST) attrblocksize += 0; #endif - attrbufsize = MIN(ap->a_uio->uio_resid, attrblocksize); + attrbufsize = MIN(uio_resid(ap->a_uio), attrblocksize); MALLOC(attrbufptr, void *, attrblocksize, M_TEMP, M_WAITOK); attrptr = attrbufptr; - *((u_long *)attrptr) = 0; /* Set buffer length in case of errors */ - ++((u_long *)attrptr); /* Reserve space for length field */ + *((uint32_t *)attrptr) = 0; /* Set buffer length in case of errors */ + ++((uint32_t *)attrptr); /* Reserve space for length field */ varptr = ((char *)attrptr) + fixedblocksize; attrblk.ab_attrlist = alist; @@ -280,50 +281,41 @@ hfs_getattrlist(ap) attrblk.ab_blocksize = attrblocksize; hfs_packattrblk(&attrblk, hfsmp, vp, &cp->c_desc, &cp->c_attr, - datafp, rsrcfp, ap->a_p); + datafp, rsrcfp, p); /* Don't copy out more data than was generated */ - attrbufsize = MIN(attrbufsize, (u_int)varptr - (u_int)attrbufptr); + attrbufsize = MIN((u_int)attrbufsize, (u_int)varptr - (u_int)attrbufptr); /* Set actual buffer length for return to caller */ - *((u_long *)attrbufptr) = attrbufsize; + *((uint32_t *)attrbufptr) = attrbufsize; error = uiomove((caddr_t)attrbufptr, attrbufsize, ap->a_uio); - - FREE(attrbufptr, M_TEMP); +exit: + if (attrbufptr) + FREE(attrbufptr, M_TEMP); + hfs_unlock(cp); return (error); } /* - -# -#% setattrlist vp L L L -# - vop_setattrlist { - IN struct vnode *vp; - IN struct attrlist *alist; - INOUT struct uio *uio; - IN struct ucred *cred; - IN struct proc *p; - }; - + * Set a list of attributes. */ __private_extern__ int -hfs_setattrlist(ap) - struct vop_setattrlist_args /* { +hfs_vnop_setattrlist(ap) + struct vnop_setattrlist_args /* { struct vnode *a_vp; struct attrlist *a_alist struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct hfsmount * hfsmp = VTOHFS(vp); + struct cnode *cp; + struct hfsmount * hfsmp; struct attrlist *alist = ap->a_alist; - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); int attrblocksize; void *attrbufptr = NULL; void *attrptr; @@ -332,9 +324,11 @@ hfs_setattrlist(ap) uid_t saved_uid; gid_t saved_gid; mode_t saved_mode; - u_long saved_flags; + uint32_t saved_flags; int error = 0; + hfsmp = VTOHFS(vp); + if (hfsmp->hfs_flags & HFS_READ_ONLY) return (EROFS); if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || @@ -344,6 +338,10 @@ hfs_setattrlist(ap) ((alist->fileattr & ~ATTR_FILE_SETMASK) != 0)) { return (EINVAL); } + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); + cp = VTOC(vp); + /* * When setting volume attributes make sure * that ATTR_VOL_INFO is set and that all @@ -352,24 +350,27 @@ hfs_setattrlist(ap) if ((alist->volattr != 0) && (((alist->volattr & ATTR_VOL_INFO) == 0) || (alist->commonattr & ~ATTR_CMN_VOLSETMASK) || - (cp->c_fileid != kRootDirID))) { + (cp->c_fileid != kHFSRootFolderID))) { if ((alist->volattr & ATTR_VOL_INFO) == 0) printf("hfs_setattrlist: you forgot to set ATTR_VOL_INFO bit!\n"); else printf("hfs_setattrlist: you cannot set bits 0x%08X!\n", alist->commonattr & ~ATTR_CMN_VOLSETMASK); - return (EINVAL); + error = EINVAL; + goto ErrorExit; + } + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto ErrorExit; } - if (cp->c_flag & (C_NOEXISTS | C_DELETED)) - return (ENOENT); - // XXXdbg - don't allow modifying the journal or journal_info_block if (hfsmp->jnl && cp->c_datafork) { struct HFSPlusExtentDescriptor *extd; extd = &cp->c_datafork->ff_extents[0]; if (extd->startBlock == HFSTOVCB(hfsmp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - return EPERM; + error = EPERM; + goto ErrorExit; } } @@ -394,27 +395,30 @@ hfs_setattrlist(ap) * change so this check is sufficient for now. */ if ((error = hfs_owner_rights(hfsmp, cp->c_uid, cred, p, true)) != 0) - return (error); + goto ErrorExit; } /* * For any other attributes, check to see if the user has - * write access to the cnode in question [unlike VOP_ACCESS, + * write access to the cnode in question [unlike vn_access, * ignore IMMUTABLE here]: */ if (((alist->commonattr & ~ATTR_OWNERSHIP_SETMASK) != 0) || (alist->volattr != 0) || (alist->dirattr != 0) || (alist->fileattr != 0)) { if ((error = hfs_write_access(vp, cred, p, false)) != 0) - return (error); + goto ErrorExit; } /* * Allocate the buffer now to minimize the time we might * be blocked holding the catalog lock. */ - attrblocksize = ap->a_uio->uio_resid; - if (attrblocksize < hfs_attrblksize(alist)) - return (EINVAL); + // LP64todo - fix this + attrblocksize = uio_resid(ap->a_uio); + if (attrblocksize < hfs_attrblksize(alist)) { + error = EINVAL; + goto ErrorExit; + } MALLOC(attrbufptr, void *, attrblocksize, M_TEMP, M_WAITOK); @@ -434,7 +438,9 @@ hfs_setattrlist(ap) attrblk.ab_varbufpp = &varptr; attrblk.ab_flags = 0; attrblk.ab_blocksize = attrblocksize; - unpackattrblk(&attrblk, vp); + error = unpackattrblk(&attrblk, vp); + if (error) + goto ErrorExit; /* If unpacking changed the owner/group then call hfs_chown() */ if ((saved_uid != cp->c_uid) || (saved_gid != cp->c_gid)) { @@ -459,7 +465,7 @@ hfs_setattrlist(ap) } /* If unpacking changed the flags then call hfs_chflags() */ if (saved_flags !=cp->c_flags) { - u_long flags; + uint32_t flags; flags = cp->c_flags; cp->c_flags = saved_flags; @@ -470,13 +476,10 @@ hfs_setattrlist(ap) * If any cnode attributes changed then do an update. */ if (alist->volattr == 0) { - struct timeval tv; - cp->c_flag |= C_MODIFIED; - tv = time; - CTIMES(cp, &tv, &tv); - if ((error = VOP_UPDATE(vp, &tv, &tv, 1))) + if ((error = hfs_update(vp, TRUE))) { goto ErrorExit; + } } /* Volume Rename */ if (alist->volattr & ATTR_VOL_NAME) { @@ -489,32 +492,34 @@ hfs_setattrlist(ap) */ copystr(cp->c_desc.cd_nameptr, vcb->vcbVN, sizeof(vcb->vcbVN), NULL); } else { - struct cat_desc to_desc = {0}; - struct cat_desc todir_desc = {0}; - struct cat_desc new_desc = {0}; - cat_cookie_t cookie = {0}; + struct cat_desc to_desc; + struct cat_desc todir_desc; + struct cat_desc new_desc; + cat_cookie_t cookie; int catreserve = 0; int catlocked = 0; int started_tr = 0; + int lockflags; + + bzero(&to_desc, sizeof(to_desc)); + bzero(&todir_desc, sizeof(todir_desc)); + bzero(&new_desc, sizeof(new_desc)); + bzero(&cookie, sizeof(cookie)); - todir_desc.cd_parentcnid = kRootParID; - todir_desc.cd_cnid = kRootParID; + todir_desc.cd_parentcnid = kHFSRootParentID; + todir_desc.cd_cnid = kHFSRootFolderID; todir_desc.cd_flags = CD_ISDIR; to_desc.cd_nameptr = vcb->vcbVN; to_desc.cd_namelen = strlen(vcb->vcbVN); - to_desc.cd_parentcnid = kRootParID; + to_desc.cd_parentcnid = kHFSRootParentID; to_desc.cd_cnid = cp->c_cnid; to_desc.cd_flags = CD_ISDIR; - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl) != 0)) { - goto rename_out; - } - started_tr = 1; + if ((error = hfs_start_transaction(hfsmp) != 0)) { + goto rename_out; } + started_tr = 1; /* * Reserve some space in the Catalog file. @@ -525,25 +530,21 @@ hfs_setattrlist(ap) } catreserve = 1; - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) { - goto rename_out; - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); catlocked = 1; error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc); rename_out: if (catlocked) { - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); } if (catreserve) { cat_postflight(hfsmp, &cookie, p); } + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (started_tr) { - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); } - hfs_global_shared_lock_release(hfsmp); if (error) { /* Restore the old name in the VCB */ @@ -558,12 +559,12 @@ rename_out: cp->c_desc.cd_nameptr = 0; cp->c_desc.cd_namelen = 0; cp->c_desc.cd_flags &= ~CD_HASBUF; - remove_name(name); + vfs_removename(name); } /* Update cnode's catalog descriptor */ replace_desc(cp, &new_desc); vcb->volumeNameEncodingHint = new_desc.cd_encoding; - cp->c_flag |= C_CHANGE; + cp->c_touch_chgtime = TRUE; } } @@ -580,9 +581,10 @@ ErrorExit: if (attrbufptr) FREE(attrbufptr, M_TEMP); + hfs_unlock(cp); return (error); } - +#endif /* * readdirattr operation will return attributes for the items in the @@ -604,7 +606,7 @@ ErrorExit: # #% readdirattr vp L L L # -vop_readdirattr { +vnop_readdirattr { IN struct vnode *vp; IN struct attrlist *alist; INOUT struct uio *uio; @@ -614,13 +616,13 @@ vop_readdirattr { OUT int *eofflag; OUT u_long *actualCount; OUT u_long **cookies; - IN struct ucred *cred; + IN kauth_cred_t cred; }; */ __private_extern__ int -hfs_readdirattr(ap) - struct vop_readdirattr_args /* { +hfs_vnop_readdirattr(ap) + struct vnop_readdirattr_args /* { struct vnode *a_vp; struct attrlist *a_alist; struct uio *a_uio; @@ -629,49 +631,39 @@ hfs_readdirattr(ap) u_long *a_newstate; int *a_eofflag; u_long *a_actualcount; - u_long **a_cookies; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { struct vnode *dvp = ap->a_vp; - struct cnode *dcp = VTOC(dvp); - struct hfsmount * hfsmp = VTOHFS(dvp); + struct cnode *dcp; + struct hfsmount * hfsmp; struct attrlist *alist = ap->a_alist; - struct uio *uio = ap->a_uio; + uio_t uio = ap->a_uio; int maxcount = ap->a_maxcount; - struct proc *p = current_proc(); - u_long fixedblocksize; - u_long maxattrblocksize; - u_long currattrbufsize; + struct proc *p = vfs_context_proc(ap->a_context); + uint32_t fixedblocksize; + uint32_t maxattrblocksize; + uint32_t currattrbufsize; void *attrbufptr = NULL; void *attrptr; void *varptr; struct attrblock attrblk; int error = 0; int depleted = 0; - int index, startindex; + int index; int i, dir_entries; struct cat_desc *lastdescp = NULL; - struct cat_desc prevdesc; - char * prevnamebuf = NULL; struct cat_entrylist *ce_list = NULL; - - dir_entries = dcp->c_entries; - if (dcp->c_attr.ca_fileid == kHFSRootFolderID && hfsmp->jnl) { - dir_entries -= 3; - } + directoryhint_t *dirhint = NULL; + unsigned int tag; + int shared_cnode_lock = 0; *(ap->a_actualcount) = 0; *(ap->a_eofflag) = 0; - - if (ap->a_cookies != NULL) { - printf("readdirattr: no cookies!\n"); - return (EINVAL); - } /* Check for invalid options and buffer space. */ if (((ap->a_options & ~(FSOPT_NOINMEMUPDATE | FSOPT_NOFOLLOW)) != 0) - || (uio->uio_resid <= 0) || (uio->uio_iovcnt > 1) || (maxcount <= 0)) + || (uio_resid(uio) <= 0) || (uio_iovcnt(uio) > 1) || (maxcount <= 0)) return (EINVAL); /* This call doesn't take volume attributes. */ @@ -682,17 +674,29 @@ hfs_readdirattr(ap) ((alist->fileattr & ~ATTR_FILE_VALIDMASK) != 0)) return (EINVAL); + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) + return (error); + dcp = VTOC(dvp); + hfsmp = VTOHFS(dvp); + /* Reject requests for unsupported options. */ if ((alist->commonattr & (ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_OBJPERMANENTID)) || (alist->fileattr & (ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST | ATTR_FILE_DATAEXTENTS | ATTR_FILE_RSRCEXTENTS))) { printf("readdirattr: unsupported attributes! (%s)\n", dcp->c_desc.cd_nameptr); - return (EINVAL); + error = EINVAL; + goto exit; + } + + dir_entries = dcp->c_entries; + if (dcp->c_attr.ca_fileid == kHFSRootFolderID && hfsmp->jnl) { + dir_entries -= 3; } /* Convert uio_offset into a directory index. */ - startindex = index = uio->uio_offset / sizeof(struct dirent); + index = uio_offset(uio) & HFS_INDEX_MASK; + tag = uio_offset(uio) & ~HFS_INDEX_MASK; if ((index + 1) > dir_entries) { *(ap->a_eofflag) = 1; error = 0; @@ -700,7 +704,7 @@ hfs_readdirattr(ap) } /* Get a buffer to hold packed attributes. */ - fixedblocksize = (sizeof(u_long) + hfs_attrblksize(alist)); /* u_long for length */ + fixedblocksize = (sizeof(uint32_t) + hfs_attrblksize(alist)); /* 4 bytes for length */ maxattrblocksize = fixedblocksize; if (alist->commonattr & ATTR_CMN_NAME) maxattrblocksize += kHFSPlusMaxFileNameBytes + 1; @@ -713,38 +717,48 @@ hfs_readdirattr(ap) bzero(ce_list, sizeof(*ce_list)); ce_list->maxentries = MAXCATENTRIES; - /* Initialize a starting descriptor. */ - bzero(&prevdesc, sizeof(prevdesc)); - prevdesc.cd_flags = CD_DECOMPOSED; - prevdesc.cd_hint = dcp->c_childhint; - prevdesc.cd_parentcnid = dcp->c_cnid; - prevdesc.cd_nameptr = hfs_getnamehint(dcp, index); - prevdesc.cd_namelen = prevdesc.cd_nameptr ? strlen(prevdesc.cd_nameptr) : 0; - + /* Get a directory hint (cnode must be locked exclusive) */ + dirhint = hfs_getdirhint(dcp, ((index - 1) & HFS_INDEX_MASK) | tag); + + /* Hide tag from catalog layer. */ + dirhint->dh_index &= HFS_INDEX_MASK; + if (dirhint->dh_index == HFS_INDEX_MASK) { + dirhint->dh_index = -1; + } + + /* + * An ATTR_CMN_USERACCESS attribute request can result in a + * call to kauth_cred_ismember_gid(). So when requesting + * this attribute we downgrade our exclusive lock on dcp to + * a shared lock in case kauth_cred_ismember_gid generates + * an indirect call back into the file system. + */ + if (alist->commonattr & ATTR_CMN_USERACCESS) { + lck_rw_lock_exclusive_to_shared(&dcp->c_rwlock); + dcp->c_lockowner = HFS_SHARED_OWNER; + shared_cnode_lock = 1; + } /* * Obtain a list of catalog entries and pack their attributes until * the output buffer is full or maxcount entries have been packed. */ while (!depleted) { int maxentries; + int lockflags; /* Constrain our list size. */ - maxentries = uio->uio_resid / (fixedblocksize + HFS_AVERAGE_NAME_SIZE); + maxentries = uio_resid(uio) / (fixedblocksize + HFS_AVERAGE_NAME_SIZE); maxentries = min(maxentries, dcp->c_entries - index); maxentries = min(maxentries, maxcount); ce_list->maxentries = min(maxentries, ce_list->maxentries); lastdescp = NULL; - /* Lock catalog b-tree. */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (error) - goto exit; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_getentriesattr(hfsmp, &prevdesc, index, ce_list); + error = cat_getentriesattr(hfsmp, dirhint, ce_list); /* Don't forget to release the descriptors later! */ - /* Unlock catalog b-tree. */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (error == ENOENT) { *(ap->a_eofflag) = TRUE; @@ -755,15 +769,16 @@ hfs_readdirattr(ap) break; /* Process the catalog entries. */ - for (i = 0; i < ce_list->realentries; ++i) { + for (i = 0; i < (int)ce_list->realentries; ++i) { struct cnode *cp = NULL; struct vnode *vp = NULL; - struct vnode *rvp = NULL; struct cat_desc * cdescp; struct cat_attr * cattrp; - struct cat_fork c_datafork = {0}; - struct cat_fork c_rsrcfork = {0}; + struct cat_fork c_datafork; + struct cat_fork c_rsrcfork; + bzero(&c_datafork, sizeof(c_datafork)); + bzero(&c_rsrcfork, sizeof(c_rsrcfork)); cdescp = &ce_list->entry[i].ce_desc; cattrp = &ce_list->entry[i].ce_attr; c_datafork.cf_size = ce_list->entry[i].ce_datasize; @@ -774,8 +789,10 @@ hfs_readdirattr(ap) * Get in memory cnode data (if any). */ if (!(ap->a_options & FSOPT_NOINMEMUPDATE)) { - cp = hfs_chashget(dcp->c_dev, cattrp->ca_fileid, 0, &vp, &rvp); - if (cp != NULL) { + vp = hfs_chash_getvnode(dcp->c_dev, cattrp->ca_fileid, 0, 0); + + if (vp != NULL) { + cp = VTOC(vp); /* Only use cnode's decriptor for non-hardlinks */ if (!(cp->c_flag & C_HARDLINK)) cdescp = &cp->c_desc; @@ -790,7 +807,7 @@ hfs_readdirattr(ap) } } } - *((u_long *)attrptr)++ = 0; /* move it past length */ + *((uint32_t *)attrptr)++ = 0; /* move it past length */ attrblk.ab_attrlist = alist; attrblk.ab_attrbufpp = &attrptr; attrblk.ab_varbufpp = &varptr; @@ -803,21 +820,20 @@ hfs_readdirattr(ap) currattrbufsize = ((char *)varptr - (char *)attrbufptr); /* All done with cnode. */ - if (vp) { - vput(vp); + if (vp != NULL) { + hfs_unlock(VTOC(vp)); + vnode_put(vp); vp = NULL; - } else if (rvp) { - vput(rvp); - rvp = NULL; + cp = NULL; } - cp = NULL; /* Make sure there's enough buffer space remaining. */ - if (currattrbufsize > uio->uio_resid) { + // LP64todo - fix this! + if (uio_resid(uio) < 0 || currattrbufsize > (uint32_t)uio_resid(uio)) { depleted = 1; break; } else { - *((u_long *)attrbufptr) = currattrbufsize; + *((uint32_t *)attrbufptr) = currattrbufsize; error = uiomove((caddr_t)attrbufptr, currattrbufsize, ap->a_uio); if (error != E_NONE) { depleted = 1; @@ -832,7 +848,9 @@ hfs_readdirattr(ap) /* Termination checks */ if ((--maxcount <= 0) || - (uio->uio_resid < (fixedblocksize + HFS_AVERAGE_NAME_SIZE)) || + // LP64todo - fix this! + uio_resid(uio) < 0 || + ((uint32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE)) || (index >= dir_entries)) { depleted = 1; break; @@ -844,46 +862,56 @@ hfs_readdirattr(ap) if (index < dir_entries && !(*(ap->a_eofflag)) && lastdescp != NULL) { - if (prevnamebuf == NULL) - MALLOC(prevnamebuf, char *, kHFSPlusMaxFileNameBytes + 1, M_TEMP, M_WAITOK); - bcopy(lastdescp->cd_nameptr, prevnamebuf, lastdescp->cd_namelen + 1); - if (!depleted) { - prevdesc.cd_hint = lastdescp->cd_hint; - prevdesc.cd_nameptr = prevnamebuf; - prevdesc.cd_namelen = lastdescp->cd_namelen + 1; + + /* Remember last entry */ + if (dirhint->dh_desc.cd_nameptr != NULL) { + vfs_removename(dirhint->dh_desc.cd_nameptr); } + dirhint->dh_desc.cd_namelen = lastdescp->cd_namelen; + dirhint->dh_desc.cd_nameptr = + vfs_addname(lastdescp->cd_nameptr, lastdescp->cd_namelen, 0, 0); + dirhint->dh_index = index - 1; + dirhint->dh_desc.cd_cnid = lastdescp->cd_cnid; + dirhint->dh_desc.cd_hint = lastdescp->cd_hint; + dirhint->dh_desc.cd_encoding = lastdescp->cd_encoding; } /* All done with the catalog descriptors. */ - for (i = 0; i < ce_list->realentries; ++i) + for (i = 0; i < (int)ce_list->realentries; ++i) cat_releasedesc(&ce_list->entry[i].ce_desc); ce_list->realentries = 0; } /* while not depleted */ *ap->a_newstate = dcp->c_mtime; - - /* All done with last name hint */ - hfs_relnamehint(dcp, startindex); - startindex = 0; - /* Convert directory index into uio_offset. */ - uio->uio_offset = index * sizeof(struct dirent); + /* Make sure dcp is locked exclusive before changing c_dirhinttag. */ + if (shared_cnode_lock) { + lck_rw_lock_shared_to_exclusive(&dcp->c_rwlock); + dcp->c_lockowner = current_thread(); + shared_cnode_lock = 0; + } - /* Save a name hint if there are more entries */ - if ((error == 0) && prevnamebuf && (index + 1) < dcp->c_entries) - hfs_savenamehint(dcp, index, prevnamebuf); -exit: - if (startindex > 0) - hfs_relnamehint(dcp, startindex); + /* Convert directory index back into a uio_offset. */ + while (tag == 0) tag = (++dcp->c_dirhinttag) << HFS_INDEX_BITS; + uio_setoffset(uio, index | tag); + dirhint->dh_index |= tag; +exit: + /* Drop directory hint on error or if there are no more entries */ + if (dirhint && (error || index >= dir_entries)) { + if (shared_cnode_lock) { + lck_rw_lock_shared_to_exclusive(&dcp->c_rwlock); + dcp->c_lockowner = current_thread(); + } + hfs_reldirhint(dcp, dirhint); + } if (attrbufptr) FREE(attrbufptr, M_TEMP); if (ce_list) FREE(ce_list, M_TEMP); - if (prevnamebuf) - FREE(prevnamebuf, M_TEMP); + hfs_unlock(dcp); return (error); } @@ -911,16 +939,16 @@ hfs_packattrblk(struct attrblock *abp, packvolcommonattr(abp, hfsmp, vp, p); if (attrlistp->volattr & ~ATTR_VOL_INFO) - packvolattr(abp, hfsmp, vp, p); + packvolattr(abp, hfsmp, vp); } else { if (attrlistp->commonattr) packcommonattr(abp, hfsmp, vp, descp, attrp, p); if (attrlistp->dirattr && S_ISDIR(attrp->ca_mode)) - packdirattr(abp, hfsmp, vp, descp,attrp, p); + packdirattr(abp, hfsmp, vp, descp,attrp); if (attrlistp->fileattr && !S_ISDIR(attrp->ca_mode)) - packfileattr(abp, hfsmp, attrp, datafork, rsrcfork, p); + packfileattr(abp, hfsmp, attrp, datafork, rsrcfork); } } @@ -928,7 +956,7 @@ hfs_packattrblk(struct attrblock *abp, static char* mountpointname(struct mount *mp) { - size_t namelength = strlen(mp->mnt_stat.f_mntonname); + size_t namelength = strlen(mp->mnt_vfsstat.f_mntonname); int foundchars = 0; char *c; @@ -940,7 +968,7 @@ mountpointname(struct mount *mp) * the first slash encountered (which must precede the * last part of the pathname). */ - for (c = mp->mnt_stat.f_mntonname + namelength - 1; + for (c = mp->mnt_vfsstat.f_mntonname + namelength - 1; namelength > 0; --c, --namelength) { if (*c != '/') { foundchars = 1; @@ -949,7 +977,7 @@ mountpointname(struct mount *mp) } } - return (mp->mnt_stat.f_mntonname); + return (mp->mnt_vfsstat.f_mntonname); } @@ -958,14 +986,13 @@ packnameattr( struct attrblock *abp, struct vnode *vp, char *name, - int namelen, - struct proc *p) + int namelen) { void *varbufptr; struct attrreference * attr_refptr; char *mpname; size_t mpnamelen; - u_long attrlength; + uint32_t attrlength; char empty = 0; /* A cnode's name may be incorrect for the root of a mounted @@ -974,8 +1001,8 @@ packnameattr( * root directory, it's best to return the last element of the location where the volume's mounted: */ - if ((vp != NULL) && (vp->v_flag & VROOT) && - (mpname = mountpointname(vp->v_mount))) { + if ((vp != NULL) && vnode_isvroot(vp) && + (mpname = mountpointname(vnode_mount(vp)))) { mpnamelen = strlen(mpname); /* Trim off any trailing slashes: */ @@ -1023,12 +1050,13 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v struct cnode *cp = VTOC(vp); struct mount *mp = VTOVFS(vp); ExtendedVCB *vcb = HFSTOVCB(hfsmp); - u_long attrlength; + u_int32_t attrlength; + boolean_t is_64_bit = proc_is64bit(p); attr = abp->ab_attrlist->commonattr; if (ATTR_CMN_NAME & attr) { - packnameattr(abp, vp, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, p); + packnameattr(abp, vp, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen); attrbufptr = *abp->ab_attrbufpp; varbufptr = *abp->ab_varbufpp; } @@ -1036,7 +1064,11 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v *((dev_t *)attrbufptr)++ = hfsmp->hfs_raw_dev; } if (ATTR_CMN_FSID & attr) { - *((fsid_t *)attrbufptr) = mp->mnt_stat.f_fsid; + fsid_t fsid; + + fsid.val[0] = (long)hfsmp->hfs_raw_dev; + fsid.val[1] = (long)vfs_typenum(mp); + *((fsid_t *)attrbufptr) = fsid; ++((fsid_t *)attrbufptr); } if (ATTR_CMN_OBJTYPE & attr) { @@ -1061,7 +1093,7 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v ++((fsobj_id_t *)attrbufptr); } if (ATTR_CMN_SCRIPT & attr) { - u_long encoding; + uint32_t encoding; if (vcb->vcbSigWord == kHFSPlusSigWord) encoding = vcb->volumeNameEncodingHint; @@ -1070,29 +1102,64 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v *((text_encoding_t *)attrbufptr)++ = encoding; } if (ATTR_CMN_CRTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbCrDate; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = vcb->vcbCrDate; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbCrDate; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_MODTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_CHGTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_ACCTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbLsMod; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_BKUPTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbVolBkUp; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = vcb->vcbVolBkUp; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = vcb->vcbVolBkUp; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_FNDRINFO & attr) { bcopy (&vcb->vcbFndrInfo, attrbufptr, sizeof(vcb->vcbFndrInfo)); @@ -1100,13 +1167,14 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v } if (ATTR_CMN_OWNERID & attr) { if (cp->c_uid == UNKNOWNUID) - *((uid_t *)attrbufptr)++ = p->p_ucred->cr_uid; + *((uid_t *)attrbufptr)++ = kauth_cred_getuid(proc_ucred(p)); else *((uid_t *)attrbufptr)++ = cp->c_uid; } if (ATTR_CMN_GRPID & attr) { *((gid_t *)attrbufptr)++ = cp->c_gid; } + if (ATTR_CMN_ACCESSMASK & attr) { /* * [2856576] Since we are dynamically changing the owner, also @@ -1115,11 +1183,11 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v * a security hole where set-user-id programs run as whoever is * logged on (or root if nobody is logged in yet!) */ - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = (cp->c_uid == UNKNOWNUID) ? cp->c_mode & ~(S_ISUID | S_ISGID) : cp->c_mode; } if (ATTR_CMN_NAMEDATTRCOUNT & attr) { - *((u_long *)attrbufptr)++ = 0; /* XXX PPD TBC */ + *((uint32_t *)attrbufptr)++ = 0; /* XXX PPD TBC */ } if (ATTR_CMN_NAMEDATTRLIST & attr) { attrlength = 0; @@ -1133,12 +1201,12 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v ++((struct attrreference *)attrbufptr); } if (ATTR_CMN_FLAGS & attr) { - *((u_long *)attrbufptr)++ = cp->c_flags; + *((uint32_t *)attrbufptr)++ = cp->c_flags; } if (ATTR_CMN_USERACCESS & attr) { - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = DerivePermissionSummary(cp->c_uid, cp->c_gid, cp->c_mode, - VTOVFS(vp), current_proc()->p_ucred, current_proc()); + VTOVFS(vp), kauth_cred_get(), proc_self()); } *abp->ab_attrbufpp = attrbufptr; @@ -1147,7 +1215,7 @@ packvolcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *v static void -packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct proc *p) +packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp) { attrgroup_t attr; void *attrbufptr = *abp->ab_attrbufpp; @@ -1155,15 +1223,15 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str struct cnode *cp = VTOC(vp); struct mount *mp = VTOVFS(vp); ExtendedVCB *vcb = HFSTOVCB(hfsmp); - u_long attrlength; + uint32_t attrlength; attr = abp->ab_attrlist->volattr; if (ATTR_VOL_FSTYPE & attr) { - *((u_long *)attrbufptr)++ = (u_long)mp->mnt_vfc->vfc_typenum; + *((uint32_t *)attrbufptr)++ = (uint32_t)vfs_typenum(mp); } if (ATTR_VOL_SIGNATURE & attr) { - *((u_long *)attrbufptr)++ = (u_long)vcb->vcbSigWord; + *((uint32_t *)attrbufptr)++ = (uint32_t)vcb->vcbSigWord; } if (ATTR_VOL_SIZE & attr) { *((off_t *)attrbufptr)++ = @@ -1184,30 +1252,30 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str *((off_t *)attrbufptr)++ = (off_t)(vcb->vcbClpSiz); } if (ATTR_VOL_IOBLOCKSIZE & attr) { - *((u_long *)attrbufptr)++ = (u_long)hfsmp->hfs_logBlockSize; + *((uint32_t *)attrbufptr)++ = hfsmp->hfs_logBlockSize; } if (ATTR_VOL_OBJCOUNT & attr) { - *((u_long *)attrbufptr)++ = - (u_long)vcb->vcbFilCnt + (u_long)vcb->vcbDirCnt; + *((uint32_t *)attrbufptr)++ = + (uint32_t)vcb->vcbFilCnt + (uint32_t)vcb->vcbDirCnt; } if (ATTR_VOL_FILECOUNT & attr) { - *((u_long *)attrbufptr)++ = (u_long)vcb->vcbFilCnt; + *((uint32_t *)attrbufptr)++ = (uint32_t)vcb->vcbFilCnt; } if (ATTR_VOL_DIRCOUNT & attr) { - *((u_long *)attrbufptr)++ = (u_long)vcb->vcbDirCnt; + *((uint32_t *)attrbufptr)++ = (uint32_t)vcb->vcbDirCnt; } if (ATTR_VOL_MAXOBJCOUNT & attr) { - *((u_long *)attrbufptr)++ = 0xFFFFFFFF; + *((uint32_t *)attrbufptr)++ = 0xFFFFFFFF; } if (ATTR_VOL_MOUNTPOINT & attr) { ((struct attrreference *)attrbufptr)->attr_dataoffset = (char *)varbufptr - (char *)attrbufptr; ((struct attrreference *)attrbufptr)->attr_length = - strlen(mp->mnt_stat.f_mntonname) + 1; + strlen(mp->mnt_vfsstat.f_mntonname) + 1; attrlength = ((struct attrreference *)attrbufptr)->attr_length; /* round up to the next 4-byte boundary: */ attrlength = attrlength + ((4 - (attrlength & 3)) & 3); - (void) bcopy(mp->mnt_stat.f_mntonname, varbufptr, attrlength); + (void) bcopy(mp->mnt_vfsstat.f_mntonname, varbufptr, attrlength); /* Advance beyond the space just allocated: */ (char *)varbufptr += attrlength; @@ -1228,18 +1296,18 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str (char *)varbufptr += attrlength; ++((struct attrreference *)attrbufptr); } - if (ATTR_VOL_MOUNTFLAGS & attr) { - *((u_long *)attrbufptr)++ = (u_long)mp->mnt_flag; - } + if (ATTR_VOL_MOUNTFLAGS & attr) { + *((uint32_t *)attrbufptr)++ = (uint32_t)vfs_flags(mp); + } if (ATTR_VOL_MOUNTEDDEVICE & attr) { ((struct attrreference *)attrbufptr)->attr_dataoffset = (char *)varbufptr - (char *)attrbufptr; ((struct attrreference *)attrbufptr)->attr_length = - strlen(mp->mnt_stat.f_mntfromname) + 1; + strlen(mp->mnt_vfsstat.f_mntfromname) + 1; attrlength = ((struct attrreference *)attrbufptr)->attr_length; /* round up to the next 4-byte boundary: */ attrlength = attrlength + ((4 - (attrlength & 3)) & 3); - (void) bcopy(mp->mnt_stat.f_mntfromname, varbufptr, attrlength); + (void) bcopy(mp->mnt_vfsstat.f_mntfromname, varbufptr, attrlength); /* Advance beyond the space just allocated: */ (char *)varbufptr += attrlength; @@ -1255,13 +1323,13 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str vcapattrptr = (vol_capabilities_attr_t *)attrbufptr; if (vcb->vcbSigWord == kHFSPlusSigWord) { - u_int32_t journal_active; + u_int32_t journal_active_cap; u_int32_t case_sensitive; if (hfsmp->jnl) - journal_active = VOL_CAP_FMT_JOURNAL_ACTIVE; + journal_active_cap = VOL_CAP_FMT_JOURNAL_ACTIVE; else - journal_active = 0; + journal_active_cap = 0; if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) case_sensitive = VOL_CAP_FMT_CASE_SENSITIVE; @@ -1273,10 +1341,11 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str VOL_CAP_FMT_SYMBOLICLINKS | VOL_CAP_FMT_HARDLINKS | VOL_CAP_FMT_JOURNAL | - journal_active | + journal_active_cap | case_sensitive | VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS ; + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; } else { /* Plain HFS */ vcapattrptr->capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_PERSISTENTOBJECTIDS | @@ -1307,7 +1376,8 @@ packvolattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, str VOL_CAP_FMT_ZERO_RUNS | VOL_CAP_FMT_CASE_SENSITIVE | VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS ; + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; vcapattrptr->valid[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | @@ -1360,10 +1430,11 @@ packcommonattr( struct mount *mp = HFSTOVFS(hfsmp); void *attrbufptr = *abp->ab_attrbufpp; void *varbufptr = *abp->ab_varbufpp; - u_long attrlength = 0; + uint32_t attrlength = 0; + boolean_t is_64_bit = proc_is64bit(p); if (ATTR_CMN_NAME & attr) { - packnameattr(abp, vp, cdp->cd_nameptr, cdp->cd_namelen, p); + packnameattr(abp, vp, cdp->cd_nameptr, cdp->cd_namelen); attrbufptr = *abp->ab_attrbufpp; varbufptr = *abp->ab_varbufpp; } @@ -1371,7 +1442,11 @@ packcommonattr( *((dev_t *)attrbufptr)++ = hfsmp->hfs_raw_dev; } if (ATTR_CMN_FSID & attr) { - *((fsid_t *)attrbufptr) = mp->mnt_stat.f_fsid; + fsid_t fsid; + + fsid.val[0] = (long)hfsmp->hfs_raw_dev; + fsid.val[1] = (long)vfs_typenum(mp); + *((fsid_t *)attrbufptr) = fsid; ++((fsid_t *)attrbufptr); } if (ATTR_CMN_OBJTYPE & attr) { @@ -1392,7 +1467,7 @@ packcommonattr( * and Carbon APIs, which are hardlink-ignorant, will always * receive the c_cnid (from getattrlist). */ - if (ATTR_CMN_OBJID & attr) { + if (ATTR_CMN_OBJID & attr) { ((fsobj_id_t *)attrbufptr)->fid_objno = cdp->cd_cnid; ((fsobj_id_t *)attrbufptr)->fid_generation = 0; ++((fsobj_id_t *)attrbufptr); @@ -1411,29 +1486,64 @@ packcommonattr( *((text_encoding_t *)attrbufptr)++ = cdp->cd_encoding; } if (ATTR_CMN_CRTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_itime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_itime; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = cap->ca_itime; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_MODTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_mtime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_mtime; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = cap->ca_mtime; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_CHGTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_ctime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_ctime; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = cap->ca_ctime; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_ACCTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_atime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_atime; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = cap->ca_atime; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_BKUPTIME & attr) { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_btime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_btime; + ((struct user_timespec *)attrbufptr)->tv_nsec = 0; + ++((struct user_timespec *)attrbufptr); + } + else { + ((struct timespec *)attrbufptr)->tv_sec = cap->ca_btime; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_FNDRINFO & attr) { bcopy(&cap->ca_finderinfo, attrbufptr, sizeof(u_int8_t) * 32); @@ -1441,7 +1551,7 @@ packcommonattr( } if (ATTR_CMN_OWNERID & attr) { *((uid_t *)attrbufptr)++ = - (cap->ca_uid == UNKNOWNUID) ? p->p_ucred->cr_uid : cap->ca_uid; + (cap->ca_uid == UNKNOWNUID) ? kauth_cred_getuid(proc_ucred(p)) : cap->ca_uid; } if (ATTR_CMN_GRPID & attr) { *((gid_t *)attrbufptr)++ = cap->ca_gid; @@ -1454,11 +1564,11 @@ packcommonattr( * a security hole where set-user-id programs run as whoever is * logged on (or root if nobody is logged in yet!) */ - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = (cap->ca_uid == UNKNOWNUID) ? cap->ca_mode & ~(S_ISUID | S_ISGID) : cap->ca_mode; } if (ATTR_CMN_NAMEDATTRCOUNT & attr) { - *((u_long *)attrbufptr)++ = 0; + *((uint32_t *)attrbufptr)++ = 0; } if (ATTR_CMN_NAMEDATTRLIST & attr) { attrlength = 0; @@ -1472,12 +1582,12 @@ packcommonattr( ++((struct attrreference *)attrbufptr); } if (ATTR_CMN_FLAGS & attr) { - *((u_long *)attrbufptr)++ = cap->ca_flags; + *((uint32_t *)attrbufptr)++ = cap->ca_flags; } if (ATTR_CMN_USERACCESS & attr) { - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = DerivePermissionSummary(cap->ca_uid, cap->ca_gid, - cap->ca_mode, mp, current_proc()->p_ucred, + cap->ca_mode, mp, proc_ucred(current_proc()), current_proc()); } @@ -1491,31 +1601,30 @@ packdirattr( struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * descp, - struct cat_attr * cattrp, - struct proc *p) + struct cat_attr * cattrp) { attrgroup_t attr = abp->ab_attrlist->dirattr; void *attrbufptr = *abp->ab_attrbufpp; if (ATTR_DIR_LINKCOUNT & attr) - *((u_long *)attrbufptr)++ = cattrp->ca_nlink; + *((uint32_t *)attrbufptr)++ = cattrp->ca_nlink; if (ATTR_DIR_ENTRYCOUNT & attr) { - u_long entries = cattrp->ca_entries; + uint32_t entries = cattrp->ca_entries; - if (descp->cd_parentcnid == kRootParID) { + if (descp->cd_parentcnid == kHFSRootParentID) { if (hfsmp->hfs_privdir_desc.cd_cnid != 0) --entries; /* hide private dir */ if (hfsmp->jnl) entries -= 2; /* hide the journal files */ } - *((u_long *)attrbufptr)++ = entries; + *((uint32_t *)attrbufptr)++ = entries; } if (ATTR_DIR_MOUNTSTATUS & attr) { - if (vp != NULL && vp->v_mountedhere != NULL) - *((u_long *)attrbufptr)++ = DIR_MNTSTATUS_MNTPOINT; + if (vp != NULL && vnode_mountedhere(vp) != NULL) + *((uint32_t *)attrbufptr)++ = DIR_MNTSTATUS_MNTPOINT; else - *((u_long *)attrbufptr)++ = 0; + *((uint32_t *)attrbufptr)++ = 0; } *abp->ab_attrbufpp = attrbufptr; } @@ -1526,19 +1635,18 @@ packfileattr( struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork, - struct cat_fork *rsrcfork, - struct proc *p) + struct cat_fork *rsrcfork) { attrgroup_t attr = abp->ab_attrlist->fileattr; void *attrbufptr = *abp->ab_attrbufpp; void *varbufptr = *abp->ab_varbufpp; - u_long attrlength; - u_long allocblksize; + uint32_t attrlength; + uint32_t allocblksize; allocblksize = HFSTOVCB(hfsmp)->blockSize; if (ATTR_FILE_LINKCOUNT & attr) { - *((u_long *)attrbufptr)++ = cattrp->ca_nlink; + *((uint32_t *)attrbufptr)++ = cattrp->ca_nlink; } if (ATTR_FILE_TOTALSIZE & attr) { *((off_t *)attrbufptr)++ = datafork->cf_size + rsrcfork->cf_size; @@ -1548,22 +1656,22 @@ packfileattr( (off_t)cattrp->ca_blocks * (off_t)allocblksize; } if (ATTR_FILE_IOBLOCKSIZE & attr) { - *((u_long *)attrbufptr)++ = hfsmp->hfs_logBlockSize; + *((uint32_t *)attrbufptr)++ = hfsmp->hfs_logBlockSize; } if (ATTR_FILE_CLUMPSIZE & attr) { - *((u_long *)attrbufptr)++ = HFSTOVCB(hfsmp)->vcbClpSiz; + *((uint32_t *)attrbufptr)++ = HFSTOVCB(hfsmp)->vcbClpSiz; } if (ATTR_FILE_DEVTYPE & attr) { if (S_ISBLK(cattrp->ca_mode) || S_ISCHR(cattrp->ca_mode)) - *((u_long *)attrbufptr)++ = (u_long)cattrp->ca_rdev; + *((uint32_t *)attrbufptr)++ = (uint32_t)cattrp->ca_rdev; else - *((u_long *)attrbufptr)++ = 0; + *((uint32_t *)attrbufptr)++ = 0; } if (ATTR_FILE_FILETYPE & attr) { - *((u_long *)attrbufptr)++ = 0; + *((uint32_t *)attrbufptr)++ = 0; } if (ATTR_FILE_FORKCOUNT & attr) { - *((u_long *)attrbufptr)++ = 2; + *((uint32_t *)attrbufptr)++ = 2; } if (ATTR_FILE_FORKLIST & attr) { attrlength = 0; @@ -1602,16 +1710,21 @@ packfileattr( *abp->ab_varbufpp = varbufptr; } - -static void +#if 0 +static int unpackattrblk(struct attrblock *abp, struct vnode *vp) { struct attrlist *attrlistp = abp->ab_attrlist; + int error; - if (attrlistp->volattr) - unpackvolattr(abp, VTOHFS(vp), vp); - else if (attrlistp->commonattr) + if (attrlistp->volattr) { + error = unpackvolattr(abp, VTOHFS(vp), vp); + if (error) + return (error); + } else if (attrlistp->commonattr) { unpackcommonattr(abp, vp); + } + return (0); } @@ -1623,30 +1736,36 @@ unpackcommonattr( attrgroup_t attr = abp->ab_attrlist->commonattr; void *attrbufptr = *abp->ab_attrbufpp; struct cnode *cp = VTOC(vp); + boolean_t is_64_bit = proc_is64bit(current_proc()); if (ATTR_CMN_SCRIPT & attr) { cp->c_encoding = (u_int32_t)*((text_encoding_t *)attrbufptr)++; hfs_setencodingbits(VTOHFS(vp), cp->c_encoding); } if (ATTR_CMN_CRTIME & attr) { - cp->c_itime = ((struct timespec *)attrbufptr)->tv_sec; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + cp->c_itime = ((struct user_timespec *)attrbufptr)->tv_sec; + ++((struct user_timespec *)attrbufptr); + } + else { + cp->c_itime = ((struct timespec *)attrbufptr)->tv_sec; + ++((struct timespec *)attrbufptr); + } } if (ATTR_CMN_MODTIME & attr) { cp->c_mtime = ((struct timespec *)attrbufptr)->tv_sec; - cp->c_mtime_nsec = ((struct timespec *)attrbufptr)->tv_nsec; ++((struct timespec *)attrbufptr); - cp->c_flag &= ~C_UPDATE; + cp->c_touch_modtime = FALSE; } if (ATTR_CMN_CHGTIME & attr) { cp->c_ctime = ((struct timespec *)attrbufptr)->tv_sec; ++((struct timespec *)attrbufptr); - cp->c_flag &= ~C_CHANGE; + cp->c_touch_chgtime = FALSE; } if (ATTR_CMN_ACCTIME & attr) { cp->c_atime = ((struct timespec *)attrbufptr)->tv_sec; ++((struct timespec *)attrbufptr); - cp->c_flag &= ~C_ACCESS; + cp->c_touch_acctime = FALSE; } if (ATTR_CMN_BKUPTIME & attr) { cp->c_btime = ((struct timespec *)attrbufptr)->tv_sec; @@ -1674,7 +1793,7 @@ unpackcommonattr( } } if (ATTR_CMN_ACCESSMASK & attr) { - u_int16_t mode = (u_int16_t)*((u_long *)attrbufptr)++; + u_int16_t mode = (u_int16_t)*((uint32_t *)attrbufptr)++; if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) { if (mode != (mode_t)VNOVAL) { cp->c_mode &= ~ALLPERMS; @@ -1683,7 +1802,7 @@ unpackcommonattr( } } if (ATTR_CMN_FLAGS & attr) { - u_long flags = *((u_long *)attrbufptr)++; + uint32_t flags = *((uint32_t *)attrbufptr)++; /* * Flags are settable only on HFS+ volumes. A special * exception is made for the IMMUTABLE flags @@ -1693,7 +1812,7 @@ unpackcommonattr( if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) || ((VTOVCB(vp)->vcbSigWord == kHFSSigWord) && ((flags & ~IMMUTABLE) == 0))) { - if (flags != (u_long)VNOVAL) { + if (flags != (uint32_t)VNOVAL) { cp->c_flags = flags; } } @@ -1702,47 +1821,56 @@ unpackcommonattr( } -static void +static int unpackvolattr( struct attrblock *abp, struct hfsmount *hfsmp, - struct vnode *rootvp) + struct vnode *root_vp) { void *attrbufptr = *abp->ab_attrbufpp; - ExtendedVCB *vcb = HFSTOVCB(hfsmp); attrgroup_t attr; + int error = 0; + boolean_t is_64_bit = proc_is64bit(current_proc()); + + HFS_MOUNT_LOCK(hfsmp, TRUE); attr = abp->ab_attrlist->commonattr; if (attr == 0) goto volattr; if (ATTR_CMN_SCRIPT & attr) { - vcb->volumeNameEncodingHint = + hfsmp->volumeNameEncodingHint = (u_int32_t)*(((text_encoding_t *)attrbufptr)++); } if (ATTR_CMN_CRTIME & attr) { - vcb->vcbCrDate = ((struct timespec *)attrbufptr)->tv_sec; - ++((struct timespec *)attrbufptr); + if (is_64_bit) { + hfsmp->vcbCrDate = ((struct user_timespec *)attrbufptr)->tv_sec; + ++((struct user_timespec *)attrbufptr); + } + else { + hfsmp->vcbCrDate = ((struct timespec *)attrbufptr)->tv_sec; + ++((struct timespec *)attrbufptr); + } /* The volume's create date comes from the root directory */ - VTOC(rootvp)->c_itime = vcb->vcbCrDate; - VTOC(rootvp)->c_flag |= C_MODIFIED; + VTOC(root_vp)->c_itime = hfsmp->vcbCrDate; + VTOC(root_vp)->c_flag |= C_MODIFIED; /* * XXX Should we also do a relative change to the * the volume header's create date in local time? */ } if (ATTR_CMN_MODTIME & attr) { - vcb->vcbLsMod = ((struct timespec *)attrbufptr)->tv_sec; + hfsmp->vcbLsMod = ((struct timespec *)attrbufptr)->tv_sec; ++((struct timespec *)attrbufptr); } if (ATTR_CMN_BKUPTIME & attr) { - vcb->vcbVolBkUp = ((struct timespec *)attrbufptr)->tv_sec; + hfsmp->vcbVolBkUp = ((struct timespec *)attrbufptr)->tv_sec; ++((struct timespec *)attrbufptr); } if (ATTR_CMN_FNDRINFO & attr) { - bcopy(attrbufptr, &vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo)); - (char *)attrbufptr += sizeof(vcb->vcbFndrInfo); + bcopy(attrbufptr, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); + (char *)attrbufptr += sizeof(hfsmp->vcbFndrInfo); } volattr: @@ -1752,14 +1880,22 @@ volattr: * It could be empty or garbage (bad UTF-8). */ if (ATTR_VOL_NAME & attr) { - copystr(((char *)attrbufptr) + *((u_long *)attrbufptr), - vcb->vcbVN, sizeof(vcb->vcbVN), NULL); - (char *)attrbufptr += sizeof(struct attrreference); + attrreference_t * attr_refp = (attrreference_t *) attrbufptr; + + error = copystr(((char *)attrbufptr) + attr_refp->attr_dataoffset, + hfsmp->vcbVN, MIN(attr_refp->attr_length, sizeof(hfsmp->vcbVN)), + NULL); + if (error == 0) + (char *)attrbufptr += sizeof(struct attrreference); } *abp->ab_attrbufpp = attrbufptr; - vcb->vcbFlags |= 0xFF00; + hfsmp->vcbFlags |= 0xFF00; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + + return (error); } +#endif /* * Calculate the total size of an attribute block. @@ -1770,7 +1906,14 @@ hfs_attrblksize(struct attrlist *attrlist) { int size; attrgroup_t a; + int sizeof_timespec; + boolean_t is_64_bit = proc_is64bit(current_proc()); + if (is_64_bit) + sizeof_timespec = sizeof(struct user_timespec); + else + sizeof_timespec = sizeof(struct timespec); + #if ((ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | ATTR_CMN_OBJTYPE | \ ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | \ ATTR_CMN_PAROBJID | ATTR_CMN_SCRIPT | ATTR_CMN_CRTIME | \ @@ -1828,55 +1971,55 @@ hfs_attrblksize(struct attrlist *attrlist) if (a & ATTR_CMN_OBJPERMANENTID) size += sizeof(fsobj_id_t); if (a & ATTR_CMN_PAROBJID) size += sizeof(fsobj_id_t); if (a & ATTR_CMN_SCRIPT) size += sizeof(text_encoding_t); - if (a & ATTR_CMN_CRTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_MODTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_CHGTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_ACCTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_BKUPTIME) size += sizeof(struct timespec); + if (a & ATTR_CMN_CRTIME) size += sizeof_timespec; + if (a & ATTR_CMN_MODTIME) size += sizeof_timespec; + if (a & ATTR_CMN_CHGTIME) size += sizeof_timespec; + if (a & ATTR_CMN_ACCTIME) size += sizeof_timespec; + if (a & ATTR_CMN_BKUPTIME) size += sizeof_timespec; if (a & ATTR_CMN_FNDRINFO) size += 32 * sizeof(u_int8_t); if (a & ATTR_CMN_OWNERID) size += sizeof(uid_t); if (a & ATTR_CMN_GRPID) size += sizeof(gid_t); - if (a & ATTR_CMN_ACCESSMASK) size += sizeof(u_long); - if (a & ATTR_CMN_NAMEDATTRCOUNT) size += sizeof(u_long); + if (a & ATTR_CMN_ACCESSMASK) size += sizeof(uint32_t); + if (a & ATTR_CMN_NAMEDATTRCOUNT) size += sizeof(uint32_t); if (a & ATTR_CMN_NAMEDATTRLIST) size += sizeof(struct attrreference); - if (a & ATTR_CMN_FLAGS) size += sizeof(u_long); - if (a & ATTR_CMN_USERACCESS) size += sizeof(u_long); + if (a & ATTR_CMN_FLAGS) size += sizeof(uint32_t); + if (a & ATTR_CMN_USERACCESS) size += sizeof(uint32_t); }; if ((a = attrlist->volattr) != 0) { - if (a & ATTR_VOL_FSTYPE) size += sizeof(u_long); - if (a & ATTR_VOL_SIGNATURE) size += sizeof(u_long); + if (a & ATTR_VOL_FSTYPE) size += sizeof(uint32_t); + if (a & ATTR_VOL_SIGNATURE) size += sizeof(uint32_t); if (a & ATTR_VOL_SIZE) size += sizeof(off_t); if (a & ATTR_VOL_SPACEFREE) size += sizeof(off_t); if (a & ATTR_VOL_SPACEAVAIL) size += sizeof(off_t); if (a & ATTR_VOL_MINALLOCATION) size += sizeof(off_t); if (a & ATTR_VOL_ALLOCATIONCLUMP) size += sizeof(off_t); - if (a & ATTR_VOL_IOBLOCKSIZE) size += sizeof(u_long); - if (a & ATTR_VOL_OBJCOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_FILECOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_DIRCOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_MAXOBJCOUNT) size += sizeof(u_long); + if (a & ATTR_VOL_IOBLOCKSIZE) size += sizeof(uint32_t); + if (a & ATTR_VOL_OBJCOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_FILECOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_DIRCOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_MAXOBJCOUNT) size += sizeof(uint32_t); if (a & ATTR_VOL_MOUNTPOINT) size += sizeof(struct attrreference); if (a & ATTR_VOL_NAME) size += sizeof(struct attrreference); - if (a & ATTR_VOL_MOUNTFLAGS) size += sizeof(u_long); + if (a & ATTR_VOL_MOUNTFLAGS) size += sizeof(uint32_t); if (a & ATTR_VOL_MOUNTEDDEVICE) size += sizeof(struct attrreference); if (a & ATTR_VOL_ENCODINGSUSED) size += sizeof(unsigned long long); if (a & ATTR_VOL_CAPABILITIES) size += sizeof(vol_capabilities_attr_t); if (a & ATTR_VOL_ATTRIBUTES) size += sizeof(vol_attributes_attr_t); }; if ((a = attrlist->dirattr) != 0) { - if (a & ATTR_DIR_LINKCOUNT) size += sizeof(u_long); - if (a & ATTR_DIR_ENTRYCOUNT) size += sizeof(u_long); - if (a & ATTR_DIR_MOUNTSTATUS) size += sizeof(u_long); + if (a & ATTR_DIR_LINKCOUNT) size += sizeof(uint32_t); + if (a & ATTR_DIR_ENTRYCOUNT) size += sizeof(uint32_t); + if (a & ATTR_DIR_MOUNTSTATUS) size += sizeof(uint32_t); }; if ((a = attrlist->fileattr) != 0) { - if (a & ATTR_FILE_LINKCOUNT) size += sizeof(u_long); + if (a & ATTR_FILE_LINKCOUNT) size += sizeof(uint32_t); if (a & ATTR_FILE_TOTALSIZE) size += sizeof(off_t); if (a & ATTR_FILE_ALLOCSIZE) size += sizeof(off_t); - if (a & ATTR_FILE_IOBLOCKSIZE) size += sizeof(size_t); - if (a & ATTR_FILE_CLUMPSIZE) size += sizeof(off_t); - if (a & ATTR_FILE_DEVTYPE) size += sizeof(u_long); - if (a & ATTR_FILE_FILETYPE) size += sizeof(u_long); - if (a & ATTR_FILE_FORKCOUNT) size += sizeof(u_long); + if (a & ATTR_FILE_IOBLOCKSIZE) size += sizeof(uint32_t); + if (a & ATTR_FILE_CLUMPSIZE) size += sizeof(uint32_t); + if (a & ATTR_FILE_DEVTYPE) size += sizeof(uint32_t); + if (a & ATTR_FILE_FILETYPE) size += sizeof(uint32_t); + if (a & ATTR_FILE_FORKCOUNT) size += sizeof(uint32_t); if (a & ATTR_FILE_FORKLIST) size += sizeof(struct attrreference); if (a & ATTR_FILE_DATALENGTH) size += sizeof(off_t); if (a & ATTR_FILE_DATAALLOCSIZE) size += sizeof(off_t); @@ -1897,17 +2040,15 @@ hfs_attrblksize(struct attrlist *attrlist) __private_extern__ unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, - struct mount *mp, struct ucred *cred, struct proc *p) + struct mount *mp, kauth_cred_t cred, struct proc *p) { - register gid_t *gp; unsigned long permissions; - int i; if (obj_uid == UNKNOWNUID) - obj_uid = p->p_ucred->cr_uid; + obj_uid = kauth_cred_getuid(proc_ucred(p)); /* User id 0 (root) always gets access. */ - if (cred->cr_uid == 0) { + if (!suser(cred, NULL)) { permissions = R_OK | W_OK | X_OK; goto Exit; }; @@ -1919,12 +2060,12 @@ DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, } /* Otherwise, check the groups. */ - if (! (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS)) { - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) { - if (obj_gid == *gp) { - permissions = ((unsigned long)obj_mode & S_IRWXG) >> 3; - goto Exit; - } + if (! (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)) { + int is_member; + + if (kauth_cred_ismember_gid(cred, obj_gid, &is_member) == 0 && is_member) { + permissions = ((unsigned long)obj_mode & S_IRWXG) >> 3; + goto Exit; } } diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 3791361fa..688983419 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -65,23 +66,26 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option OSStatus retval = E_NONE; struct buf *bp = NULL; - if (options & kGetEmptyBlock) - bp = getblk(vp, blockNum, block->blockSize, 0, 0, BLK_META); - else - retval = meta_bread(vp, blockNum, block->blockSize, NOCRED, &bp); - - DBG_ASSERT(bp != NULL); - DBG_ASSERT(bp->b_data != NULL); - DBG_ASSERT(bp->b_bcount == block->blockSize); - DBG_ASSERT(bp->b_lblkno == blockNum); + if (options & kGetEmptyBlock) { + daddr64_t blkno; + off_t offset; + offset = (daddr64_t)blockNum * (daddr64_t)block->blockSize; + bp = buf_getblk(vp, (daddr64_t)blockNum, block->blockSize, 0, 0, BLK_META); + if (bp && + VNOP_BLOCKMAP(vp, offset, block->blockSize, &blkno, NULL, NULL, 0, NULL) == 0) { + buf_setblkno(bp, blkno); + } + } else { + retval = buf_meta_bread(vp, (daddr64_t)blockNum, block->blockSize, NOCRED, &bp); + } if (bp == NULL) retval = -1; //XXX need better error if (retval == E_NONE) { block->blockHeader = bp; - block->buffer = bp->b_data; - block->blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; /* not found in cache ==> came from disk */ + block->buffer = (char *)buf_dataptr(bp); + block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */ // XXXdbg block->isModified = 0; @@ -91,8 +95,8 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option if (!(options & kGetEmptyBlock)) { /* This happens when we first open the b-tree, we might not have all the node data on hand */ if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && - (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != bp->b_bcount) && - (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != bp->b_bcount)) { + (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && + (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */ SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3); @@ -110,7 +114,7 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option #endif } else { if (bp) - brelse(bp); + buf_brelse(bp); block->blockHeader = NULL; block->buffer = NULL; } @@ -143,15 +147,15 @@ static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) { #if BYTE_ORDER == LITTLE_ENDIAN - struct vnode *vp = bp->b_vp; + struct vnode *vp = buf_vnode(bp); BlockDescriptor block; /* Prepare the block pointer */ block.blockHeader = bp; - block.buffer = bp->b_data; + block.buffer = (char *)buf_dataptr(bp); /* not found in cache ==> came from disk */ - block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; - block.blockSize = bp->b_bcount; + block.blockReadFromDisk = (buf_fromcache(bp) == 0); + block.blockSize = buf_count(bp); // XXXdbg have to swap the data before it goes in the journal SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1); @@ -177,11 +181,12 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } if (options & kTrashBlock) { - bp->b_flags |= B_INVAL; - if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) { + buf_markinvalid(bp); + + if (hfsmp->jnl && (buf_flags(bp) & B_LOCKED)) { journal_kill_block(hfsmp->jnl, bp); } else { - brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ + buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } } else { if (options & kForceWriteBlock) { @@ -193,27 +198,29 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock retval = btree_journal_modify_block_end(hfsmp, bp); blockPtr->isModified = 0; } else { - retval = VOP_BWRITE(bp); + retval = VNOP_BWRITE(bp); } } else if (options & kMarkBlockDirty) { + struct timeval tv; + microuptime(&tv); if ((options & kLockTransaction) && hfsmp->jnl == NULL) { /* * - * Set the B_LOCKED flag and unlock the buffer, causing brelse to move + * Set the B_LOCKED flag and unlock the buffer, causing buf_brelse to move * the buffer onto the LOCKED free list. This is necessary, otherwise - * getnewbuf() would try to reclaim the buffers using bawrite, which + * getnewbuf() would try to reclaim the buffers using buf_bawrite, which * isn't going to work. * */ - extern int count_lock_queue __P((void)); + extern int count_lock_queue(void); + /* Don't hog all the buffers... */ if (count_lock_queue() > kMaxLockedMetaBuffers) { hfs_btsync(vp, HFS_SYNCTRANS); /* Rollback sync time to cause a sync on lock release... */ - (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1)); + (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1)); } - - bp->b_flags |= B_LOCKED; + buf_setflags(bp, B_LOCKED); } /* @@ -230,13 +237,14 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock } else if (bdwrite_internal(bp, 1) != 0) { hfs_btsync(vp, 0); /* Rollback sync time to cause a sync on lock release... */ - (void) BTSetLastSync(VTOF(vp), time.tv_sec - (kMaxSecsForFsync + 1)); - bp->b_flags &= ~B_LOCKED; - bawrite(bp); + (void) BTSetLastSync(VTOF(vp), tv.tv_sec - (kMaxSecsForFsync + 1)); + + buf_clearflags(bp, B_LOCKED); + buf_bawrite(bp); } } else { // check if we had previously called journal_modify_block_start() - // on this block and if so, abort it (which will call brelse()). + // on this block and if so, abort it (which will call buf_brelse()). if (hfsmp->jnl && blockPtr->isModified) { // XXXdbg - I don't want to call modify_block_abort() // because I think it may be screwing up the @@ -248,7 +256,7 @@ OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlock btree_journal_modify_block_end(hfsmp, bp); blockPtr->isModified = 0; } else { - brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ + buf_brelse(bp); /* note: B-tree code will clear blockPtr->blockHeader and blockPtr->buffer */ } }; }; @@ -263,7 +271,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) { #pragma unused (maxEOF) - OSStatus retval, ret; + OSStatus retval = 0, ret = 0; UInt64 actualBytesAdded, origSize; UInt64 bytesToAdd; u_int32_t startAllocation; @@ -272,7 +280,8 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) ExtendedVCB *vcb; FCB *filePtr; struct proc *p = NULL; - UInt64 trim = 0; + UInt64 trim = 0; + int lockflags = 0; filePtr = GetFileControlBlock(vp); @@ -295,17 +304,14 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) * return an error if an attempt is made to extend the Extents B-tree * when the resident extents are exhausted. */ - /* XXX warning - this can leave the volume bitmap unprotected during ExtendFileC call */ - if(VTOC(vp)->c_fileid != kHFSExtentsFileID) - { - p = current_proc(); - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, p); - if (retval) - return (retval); - } - (void) BTGetInformation(filePtr, 0, &btInfo); + /* Protect allocation bitmap and extents overflow file. */ + lockflags = SFL_BITMAP; + if (VTOC(vp)->c_fileid != kHFSExtentsFileID) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(vcb, lockflags, HFS_EXCLUSIVE_LOCK); + + (void) BTGetInformation(filePtr, 0, &btInfo); #if 0 // XXXdbg /* @@ -350,7 +356,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) } } } while (retval == dskFulErr && actualBytesAdded == 0); - + /* * If a new extent was added then move the roving allocator * reference forward by the current b-tree file size so @@ -411,7 +417,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) // XXXdbg - this probably doesn't need to be a panic() panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %d)\n", filePtr->fcbEOF, trim, ret); - return ret; + goto out; } actualBytesAdded -= trim; } @@ -421,10 +427,10 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) * Get any extents overflow b-tree changes to disk ASAP! */ (void) BTFlushPath(VTOF(vcb->extentsRefNum)); - (void) VOP_FSYNC(vcb->extentsRefNum, NOCRED, MNT_WAIT, p); - - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + (void) hfs_fsync(vcb->extentsRefNum, MNT_WAIT, 0, p); } + hfs_systemfile_unlock(vcb, lockflags); + lockflags = 0; if ((filePtr->fcbEOF % btInfo.nodeSize) != 0) { panic("hfs: extendbtree: fcb 0x%x has eof 0x%llx not a multiple of 0x%x (trim %llx)\n", @@ -438,18 +444,22 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) (VTOC(vp)->c_fileid == kHFSCatalogFileID) || (VTOC(vp)->c_fileid == kHFSAttributesFileID) ) { + VTOC(vp)->c_flag |= C_MODIFIED; MarkVCBDirty( vcb ); ret = hfs_flushvolumeheader(VCBTOHFS(vcb), MNT_WAIT, HFS_ALTFLUSH); } else { - struct timeval tv = time; - - VTOC(vp)->c_flag |= C_CHANGE | C_UPDATE; - (void) VOP_UPDATE(vp, &tv, &tv, MNT_WAIT); + VTOC(vp)->c_touch_chgtime = TRUE; + VTOC(vp)->c_touch_modtime = TRUE; + (void) hfs_update(vp, TRUE); } ret = ClearBTNodes(vp, btInfo.nodeSize, filePtr->fcbEOF - actualBytesAdded, actualBytesAdded); - if (ret) - return (ret); +out: + if (retval == 0) + retval = ret; + + if (lockflags) + hfs_systemfile_unlock(vcb, lockflags); return retval; } @@ -463,14 +473,14 @@ ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) { struct hfsmount *hfsmp = VTOHFS(vp); struct buf *bp = NULL; - daddr_t blk; - daddr_t blkcnt; + daddr64_t blk; + daddr64_t blkcnt; blk = offset / blksize; blkcnt = amount / blksize; while (blkcnt > 0) { - bp = getblk(vp, blk, blksize, 0, 0, BLK_META); + bp = buf_getblk(vp, blk, blksize, 0, 0, BLK_META); if (bp == NULL) continue; @@ -480,9 +490,9 @@ ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) // become *way* too large //journal_modify_block_start(hfsmp->jnl, bp); } + bzero((char *)buf_dataptr(bp), blksize); - bzero((char *)bp->b_data, blksize); - bp->b_flags |= B_AGE; + buf_markaged(bp); // XXXdbg if (hfsmp->jnl) { @@ -493,15 +503,15 @@ ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) // XXXdbg - remove this once we decide what to do with the // writes to the journal if ((blk % 32) == 0) - VOP_BWRITE(bp); + VNOP_BWRITE(bp); else - bawrite(bp); + buf_bawrite(bp); } else { /* wait/yield every 32 blocks so we don't hog all the buffers */ if ((blk % 32) == 0) - VOP_BWRITE(bp); + VNOP_BWRITE(bp); else - bawrite(bp); + buf_bawrite(bp); } --blkcnt; ++blk; @@ -509,3 +519,166 @@ ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) return (0); } + + +extern char hfs_attrname[]; + +extern int hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey); + +int hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecnt); + +/* + * Create an HFS+ Attribute B-tree File. + * + * A journal transaction must be already started. + */ +int +hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecnt) +{ + struct vnode* vp = NULL; + struct cat_desc cndesc; + struct cat_attr cnattr; + struct cat_fork cfork; + BlockDescriptor blkdesc; + BTNodeDescriptor *ndp; + BTHeaderRec *bthp; + BTreeControlBlockPtr btcb = NULL; + struct buf *bp = NULL; + void * buffer; + u_int16_t *index; + u_int16_t offset; + int result; + + printf("Creating HFS+ Attribute B-tree File (%d nodes) on %s\n", nodecnt, hfsmp->vcbVN); + + /* + * Set up Attribute B-tree vnode + */ + bzero(&cndesc, sizeof(cndesc)); + cndesc.cd_parentcnid = kHFSRootParentID; + cndesc.cd_flags |= CD_ISMETA; + cndesc.cd_nameptr = hfs_attrname; + cndesc.cd_namelen = strlen(hfs_attrname); + cndesc.cd_cnid = kHFSAttributesFileID; + + bzero(&cnattr, sizeof(cnattr)); + cnattr.ca_nlink = 1; + cnattr.ca_mode = S_IFREG; + cnattr.ca_fileid = cndesc.cd_cnid; + + bzero(&cfork, sizeof(cfork)); + cfork.cf_clump = nodesize * nodecnt; + + result = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, &vp); + if (result) + return (result); + + /* + * Set up Attribute B-tree control block + */ + MALLOC(btcb, BTreeControlBlock *, sizeof(BTreeControlBlock), M_TEMP, M_WAITOK); + bzero(btcb, sizeof(BTreeControlBlock)); + + btcb->nodeSize = nodesize; + btcb->maxKeyLength = kHFSPlusAttrKeyMaximumLength; + btcb->btreeType = 0xFF; + btcb->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; + btcb->version = kBTreeVersion; + btcb->writeCount = 1; + btcb->flags = 0; /* kBTHeaderDirty */ + btcb->fileRefNum = vp; + btcb->getBlockProc = GetBTreeBlock; + btcb->releaseBlockProc = ReleaseBTreeBlock; + btcb->setEndOfForkProc = ExtendBTreeFile; + btcb->keyCompareProc = (KeyCompareProcPtr)hfs_attrkeycompare; + VTOF(vp)->fcbBTCBPtr = btcb; + + /* + * Allocate some space + */ + result = ExtendBTreeFile(vp, nodesize, cfork.cf_clump); + if (result) + goto exit; + + btcb->totalNodes = VTOF(vp)->ff_size / nodesize; + btcb->freeNodes = btcb->totalNodes - 1; + + /* + * Initialize the b-tree header on disk + */ + bp = buf_getblk(vp, 0, nodesize, 0, 0, BLK_META); + if (bp == NULL) { + result = EIO; + goto exit; + } + + buffer = (void *)buf_dataptr(bp); + blkdesc.buffer = buffer; + blkdesc.blockHeader = (void *)bp; + blkdesc.blockReadFromDisk = 0; + blkdesc.isModified = 0; + + ModifyBlockStart(vp, &blkdesc); + + if (buf_size(bp) != nodesize) + panic("hfs_create_attr_btree: bad buffer size (%d)\n", buf_size(bp)); + + bzero(buffer, nodesize); + index = (int16_t *)buffer; + + /* FILL IN THE NODE DESCRIPTOR: */ + ndp = (BTNodeDescriptor *)buffer; + ndp->kind = kBTHeaderNode; + ndp->numRecords = 3; + offset = sizeof(BTNodeDescriptor); + index[(nodesize / 2) - 1] = offset; + + /* FILL IN THE HEADER RECORD: */ + bthp = (BTHeaderRec *)((UInt8 *)buffer + offset); + bthp->nodeSize = nodesize; + bthp->totalNodes = btcb->totalNodes; + bthp->freeNodes = btcb->freeNodes; + bthp->clumpSize = cfork.cf_clump; + bthp->btreeType = 0xFF; + bthp->attributes = kBTVariableIndexKeysMask | kBTBigKeysMask; + bthp->maxKeyLength = kHFSPlusAttrKeyMaximumLength; + bthp->keyCompareType = kHFSBinaryCompare; + offset += sizeof(BTHeaderRec); + index[(nodesize / 2) - 2] = offset; + + /* FILL IN THE USER RECORD: */ + offset += kBTreeHeaderUserBytes; + index[(nodesize / 2) - 3] = offset; + + /* FILL IN THE MAP RECORD (only one node in use). */ + *((u_int8_t *)buffer + offset) = 0x80; + offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) + - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); + index[(nodesize / 2) - 4] = offset; + + if (hfsmp->jnl) { + result = btree_journal_modify_block_end(hfsmp, bp); + } else { + result = VNOP_BWRITE(bp); + } + if (result) + goto exit; + + /* Publish new btree file */ + hfsmp->hfs_attribute_vp = vp; + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + +exit: + hfs_unlock(VTOC(vp)); + if (result) { + if (btcb) { + FREE (btcb, M_TEMP); + } + vnode_put(vp); + // hfs_truncate(); /* XXX need to give back blocks */ + } + return (result); +} + + + diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 87a7e6bf0..64dd3c8fb 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -39,10 +38,8 @@ #include "hfs_endian.h" #include "hfscommon/headers/BTreesInternal.h" -#include "hfscommon/headers/CatalogPrivate.h" #include "hfscommon/headers/HFSUnicodeWrappers.h" -extern OSErr PositionIterator(CatalogIterator *cip, UInt32 offset, BTreeIterator *bip, UInt16 *op); /* * Initialization of an FSBufferDescriptor structure. @@ -68,9 +65,26 @@ struct update_state { struct hfsmount * s_hfsmp; }; +struct position_state { + int error; + u_int32_t count; + u_int32_t index; + u_int32_t parentID; + struct hfsmount *hfsmp; +}; + +/* Map file mode type to directory entry types */ +u_char modetodirtype[16] = { + DT_REG, DT_FIFO, DT_CHR, DT_UNKNOWN, + DT_DIR, DT_UNKNOWN, DT_BLK, DT_UNKNOWN, + DT_REG, DT_UNKNOWN, DT_LNK, DT_UNKNOWN, + DT_SOCK, DT_UNKNOWN, DT_WHT, DT_UNKNOWN +}; +#define MODE_TO_DT(mode) (modetodirtype[((mode) & S_IFMT) >> 12]) + static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantrsrc, - struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp); + struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid); static int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp); @@ -84,7 +98,8 @@ extern int unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, /* Internal catalog support routines */ -int resolvelink(struct hfsmount *hfsmp, u_long linkref, struct HFSPlusCatalogFile *recp); +static int cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, + struct position_state *state); static int resolvelinkid(struct hfsmount *hfsmp, u_long linkref, ino_t *ino); @@ -97,7 +112,7 @@ static void buildthreadkey(HFSCatalogNodeID parentID, int std_hfs, CatalogKey *k static void buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding, CatalogRecord *crp, int *recordSize); -static int catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, struct update_state *state); +static int catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *state); static int builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_long hint, u_long encoding, int isdir, struct cat_desc *descp); @@ -122,21 +137,18 @@ int cat_preflight(struct hfsmount *hfsmp, catops_t ops, cat_cookie_t *cookie, struct proc *p) { FCB *fcb; + int lockflags; int result; - fcb = GetFileControlBlock(HFSTOVCB(hfsmp)->catalogRefNum); + fcb = GetFileControlBlock(hfsmp->hfs_catalog_vp); - /* Lock catalog b-tree */ - result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (result) - return (result); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); result = BTReserveSpace(fcb, ops, (void*)cookie); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); - MacToVFSError(result); + return MacToVFSError(result); } __private_extern__ @@ -144,15 +156,15 @@ void cat_postflight(struct hfsmount *hfsmp, cat_cookie_t *cookie, struct proc *p) { FCB *fcb; - int error; + int lockflags; - fcb = GetFileControlBlock(HFSTOVCB(hfsmp)->catalogRefNum); + fcb = GetFileControlBlock(hfsmp->hfs_catalog_vp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); (void) BTReleaseReserve(fcb, (void*)cookie); - if (error == 0) { - hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - } + + hfs_systemfile_unlock(hfsmp, lockflags); } @@ -261,7 +273,7 @@ cat_releasedesc(struct cat_desc *descp) descp->cd_nameptr = NULL; descp->cd_namelen = 0; descp->cd_flags &= ~CD_HASBUF; - remove_name(name); + vfs_removename(name); } descp->cd_nameptr = NULL; descp->cd_namelen = 0; @@ -279,7 +291,7 @@ __private_extern__ int cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, struct cat_desc *outdescp, struct cat_attr *attrp, - struct cat_fork *forkp) + struct cat_fork *forkp, cnid_t *desc_cnid) { CatalogKey * keyp; int std_hfs; @@ -293,11 +305,23 @@ cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, if (result) goto exit; - result = cat_lookupbykey(hfsmp, keyp, descp->cd_hint, wantrsrc, outdescp, attrp, forkp); + result = cat_lookupbykey(hfsmp, keyp, descp->cd_hint, wantrsrc, outdescp, attrp, forkp, desc_cnid); if (result == ENOENT) { if (!std_hfs) { + struct cat_desc temp_desc; + if (outdescp == NULL) { + bzero(&temp_desc, sizeof(temp_desc)); + outdescp = &temp_desc; + } result = cat_lookupmangled(hfsmp, descp, wantrsrc, outdescp, attrp, forkp); + if (desc_cnid) { + *desc_cnid = outdescp->cd_cnid; + } + if (outdescp == &temp_desc) { + /* Release the local copy of desc */ + cat_releasedesc(outdescp); + } } else if (hfsmp->hfs_encoding != kTextEncodingMacRoman) { // make MacRoman key from utf-8 // result = cat_lookupbykey(hfsmp, keyp, descp->cd_hint, attrp, forkp); @@ -366,6 +390,78 @@ exit: } +/* + * cat_findname - obtain a descriptor from cnid + * + * Only a thread lookup is performed. + */ +__private_extern__ +int +cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp) +{ + struct BTreeIterator * iterator; + FSBufferDescriptor btdata; + CatalogKey * keyp; + CatalogRecord * recp; + int isdir; + int result; + int std_hfs; + + isdir = 0; + std_hfs = (hfsmp->hfs_flags & HFS_STANDARD); + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key); + iterator->hint.nodeNum = 0; + + MALLOC(recp, CatalogRecord *, sizeof(CatalogRecord), M_TEMP, M_WAITOK); + BDINIT(btdata, recp); + + result = BTSearchRecord(VTOF(hfsmp->hfs_catalog_vp), iterator, &btdata, NULL, NULL); + if (result) + goto exit; + + /* Turn thread record into a cnode key (in place). */ + switch (recp->recordType) { + case kHFSFolderThreadRecord: + isdir = 1; + /* fall through */ + case kHFSFileThreadRecord: + keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); + keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; + break; + + case kHFSPlusFolderThreadRecord: + isdir = 1; + /* fall through */ + case kHFSPlusFileThreadRecord: + keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; + keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + + (keyp->hfsPlus.nodeName.length * 2); + break; + default: + result = ENOENT; + goto exit; + } + if (std_hfs) { + HFSPlusCatalogKey * pluskey = NULL; + u_long encoding; + + MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); + promotekey(hfsmp, &keyp->hfs, pluskey, &encoding); + builddesc(pluskey, cnid, 0, encoding, isdir, outdescp); + FREE(pluskey, M_TEMP); + + } else { + builddesc((HFSPlusCatalogKey *)keyp, cnid, 0, 0, isdir, outdescp); + } +exit: + FREE(recp, M_TEMP); + FREE(iterator, M_TEMP); + + return MacToVFSError(result); +} + /* * cat_idlookup - lookup a catalog node using a cnode id */ @@ -416,7 +512,7 @@ cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp, goto exit; } - result = cat_lookupbykey(hfsmp, keyp, 0, 0, outdescp, attrp, forkp); + result = cat_lookupbykey(hfsmp, keyp, 0, 0, outdescp, attrp, forkp, NULL); exit: FREE(recp, M_TEMP); FREE(iterator, M_TEMP); @@ -468,7 +564,7 @@ falsematch: */ static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantrsrc, - struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp) + struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid) { struct BTreeIterator * iterator; FSBufferDescriptor btdata; @@ -516,8 +612,8 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantr && (recp->recordType == kHFSPlusFileRecord) && (SWAP_BE32(recp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && (SWAP_BE32(recp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) - && ((to_bsd_time(recp->hfsPlusFile.createDate) == HFSTOVCB(hfsmp)->vcbCrDate) || - (to_bsd_time(recp->hfsPlusFile.createDate) == hfsmp->hfs_metadata_createdate))) { + && ((to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)HFSTOVCB(hfsmp)->vcbCrDate) || + (to_bsd_time(recp->hfsPlusFile.createDate) == (time_t)hfsmp->hfs_metadata_createdate))) { ilink = recp->hfsPlusFile.bsdInfo.special.iNodeNum; @@ -588,6 +684,10 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, u_long hint, int wantr FREE(pluskey, M_TEMP); } } + + if (desc_cnid != NULL) { + *desc_cnid = cnid; + } exit: FREE(iterator, M_TEMP); FREE(recp, M_TEMP); @@ -611,23 +711,46 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr u_int32_t nextCNID; u_int32_t datalen; int std_hfs; - int result; + int result = 0; u_long encoding; int modeformat; + int mntlock = 0; modeformat = attrp->ca_mode & S_IFMT; vcb = HFSTOVCB(hfsmp); fcb = GetFileControlBlock(vcb->catalogRefNum); - nextCNID = vcb->vcbNxtCNID; std_hfs = (vcb->vcbSigWord == kHFSSigWord); - if (std_hfs && nextCNID == 0xFFFFFFFF) - return (ENOSPC); + /* + * Atomically get the next CNID. If we have wrapped the CNIDs + * then keep the hfsmp lock held until we have found a CNID. + */ + HFS_MOUNT_LOCK(hfsmp, TRUE); + mntlock = 1; + nextCNID = hfsmp->vcbNxtCNID; + if (nextCNID == 0xFFFFFFFF) { + if (std_hfs) { + result = ENOSPC; + } else { + hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; + hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; + } + } else { + hfsmp->vcbNxtCNID++; + } + hfsmp->vcbFlags |= 0xFF00; + /* OK to drop lock if CNIDs are not wrapping */ + if ((hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask) == 0) { + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + mntlock = 0; + if (result) + return (result); /* HFS only exit */ + } /* Get space for iterator, key and data */ MALLOC(bto, struct btobj *, sizeof(struct btobj), M_TEMP, M_WAITOK); - bzero(bto, sizeof(struct btobj)); + bto->iterator.hint.nodeNum = 0; result = buildkey(hfsmp, descp, &bto->key, 0); if (result) @@ -653,14 +776,11 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr buildthreadkey(nextCNID, std_hfs, (CatalogKey *) &bto->iterator.key); result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen); - if (result == btExists && !std_hfs) { + if ((result == btExists) && !std_hfs && mntlock) { /* * Allow CNIDs on HFS Plus volumes to wrap around */ - ++nextCNID; - if (nextCNID < kHFSFirstUserCatalogNodeID) { - vcb->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; - vcb->vcbFlags |= 0xFF00; + if (++nextCNID < kHFSFirstUserCatalogNodeID) { nextCNID = kHFSFirstUserCatalogNodeID; } continue; @@ -669,6 +789,19 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr } if (result) goto exit; } + + /* + * CNID is now established. If we have wrapped then + * update the vcbNxtCNID and drop the vcb lock. + */ + if (mntlock) { + hfsmp->vcbNxtCNID = nextCNID + 1; + if (hfsmp->vcbNxtCNID < kHFSFirstUserCatalogNodeID) { + hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; + } + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + mntlock = 0; + } /* * Now insert the file/directory record @@ -716,18 +849,10 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr } attrp->ca_fileid = nextCNID; - /* Update parent stats */ - TrashCatalogIterator(vcb, descp->cd_parentcnid); - - /* Update volume stats */ - if (++nextCNID < kHFSFirstUserCatalogNodeID) { - vcb->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; - nextCNID = kHFSFirstUserCatalogNodeID; - } - vcb->vcbNxtCNID = nextCNID; - vcb->vcbFlags |= 0xFF00; - exit: + if (mntlock) + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + (void) BTFlushPath(fcb); FREE(bto, M_TEMP); @@ -796,7 +921,7 @@ cat_rename ( * When moving a directory, make sure its a valid move. */ if (directory && (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid)) { - struct BTreeIterator iterator = {0}; + struct BTreeIterator iterator; cnid_t cnid = from_cdp->cd_cnid; cnid_t pathcnid = todir_cdp->cd_parentcnid; @@ -807,7 +932,7 @@ cat_rename ( result = EINVAL; goto exit; } - + bzero(&iterator, sizeof(iterator)); /* * Traverese destination path all the way back to the root * making sure that source directory is not encountered. @@ -833,8 +958,33 @@ cat_rename ( */ result = BTSearchRecord(fcb, from_iterator, &btdata, &datasize, from_iterator); - if (result) - goto exit; + if (result) { + if (std_hfs || (result != btNotFound)) + goto exit; + + struct cat_desc temp_desc; + + /* Probably the node has mangled name */ + result = cat_lookupmangled(hfsmp, from_cdp, 0, &temp_desc, NULL, NULL); + if (result) + goto exit; + + /* The file has mangled name. Search the cnode data using full name */ + bzero(from_iterator, sizeof(*from_iterator)); + result = buildkey(hfsmp, &temp_desc, (HFSPlusCatalogKey *)&from_iterator->key, 0); + if (result) { + cat_releasedesc(&temp_desc); + goto exit; + } + + result = BTSearchRecord(fcb, from_iterator, &btdata, &datasize, from_iterator); + if (result) { + cat_releasedesc(&temp_desc); + goto exit; + } + + cat_releasedesc(&temp_desc); + } /* Update the text encoding (on disk and in descriptor) */ if (!std_hfs) { @@ -862,11 +1012,6 @@ cat_rename ( goto exit; #endif - /* Trash the iterator caches */ - TrashCatalogIterator(vcb, from_cdp->cd_parentcnid); - if (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid) - TrashCatalogIterator(vcb, to_cdp->cd_parentcnid); - /* Step 2: Insert cnode at new location */ result = BTInsertRecord(fcb, to_iterator, &btdata, datasize); if (result == btExists) { @@ -1014,22 +1159,22 @@ cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr * A file must be zero length (no blocks) */ if (descp->cd_cnid < kHFSFirstUserCatalogNodeID || - descp->cd_parentcnid == kRootParID) + descp->cd_parentcnid == kHFSRootParentID) return (EINVAL); /* XXX Preflight Missing */ /* Get space for iterator */ MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); - bzero(iterator, sizeof(*iterator)); + iterator->hint.nodeNum = 0; /* * Derive a key from either the file ID (for a virtual inode) * or the descriptor. */ if (descp->cd_namelen == 0) { - result = getkey(hfsmp, attrp->ca_fileid, (CatalogKey *)&iterator->key); - cnid = attrp->ca_fileid; + result = getkey(hfsmp, attrp->ca_fileid, (CatalogKey *)&iterator->key); + cnid = attrp->ca_fileid; } else { result = buildkey(hfsmp, descp, (HFSPlusCatalogKey *)&iterator->key, 0); cnid = descp->cd_cnid; @@ -1039,15 +1184,39 @@ cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr /* Delete record */ result = BTDeleteRecord(fcb, iterator); - if (result) - goto exit; + if (result) { + if (std_hfs || (result != btNotFound)) + goto exit; + + struct cat_desc temp_desc; + + /* Probably the node has mangled name */ + result = cat_lookupmangled(hfsmp, descp, 0, &temp_desc, attrp, NULL); + if (result) + goto exit; + + /* The file has mangled name. Delete the file using full name */ + bzero(iterator, sizeof(*iterator)); + result = buildkey(hfsmp, &temp_desc, (HFSPlusCatalogKey *)&iterator->key, 0); + cnid = temp_desc.cd_cnid; + if (result) { + cat_releasedesc(&temp_desc); + goto exit; + } + + result = BTDeleteRecord(fcb, iterator); + if (result) { + cat_releasedesc(&temp_desc); + goto exit; + } + + cat_releasedesc(&temp_desc); + } /* Delete thread record, ignore errors */ buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key); (void) BTDeleteRecord(fcb, iterator); - TrashCatalogIterator(vcb, descp->cd_parentcnid); - exit: (void) BTFlushPath(fcb); FREE(iterator, M_TEMP); @@ -1084,7 +1253,6 @@ cat_update(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr /* Get space for iterator */ MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); - bzero(iterator, sizeof(*iterator)); /* * For open-deleted files we need to do a lookup by cnid @@ -1124,8 +1292,7 @@ exit: * This is called from within BTUpdateRecord. */ static int -catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, - struct update_state *state) +catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *state) { struct cat_desc *descp; struct cat_attr *attrp; @@ -1199,15 +1366,18 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, dir = (struct HFSPlusCatalogFolder *)crp; /* Do a quick sanity check */ if ((ckp->hfsPlus.parentID != descp->cd_parentcnid) || - (dir->folderID != descp->cd_cnid)) + (dir->folderID != descp->cd_cnid)) return (btNotFound); + dir->flags = attrp->ca_recflags; dir->valence = attrp->ca_entries; dir->createDate = to_hfs_time(attrp->ca_itime); dir->contentModDate = to_hfs_time(attrp->ca_mtime); dir->backupDate = to_hfs_time(attrp->ca_btime); dir->accessDate = to_hfs_time(attrp->ca_atime); + attrp->ca_atimeondisk = attrp->ca_atime; dir->attributeModDate = to_hfs_time(attrp->ca_ctime); dir->textEncoding = descp->cd_encoding; + dir->attrBlocks = attrp->ca_attrblks; bcopy(&attrp->ca_finderinfo[0], &dir->userInfo, 32); /* * Update the BSD Info if it was already initialized on @@ -1237,8 +1407,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, ((attrp->ca_mode & ALLPERMS) != (hfsmp->hfs_dir_mask & ACCESSPERMS))) { if ((dir->bsdInfo.fileMode == 0) || - (HFSTOVFS(hfsmp)->mnt_flag & - MNT_UNKNOWNPERMISSIONS) == 0) { + (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) == 0) { dir->bsdInfo.ownerID = attrp->ca_uid; dir->bsdInfo.groupID = attrp->ca_gid; } @@ -1255,12 +1424,15 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, /* Do a quick sanity check */ if (file->fileID != attrp->ca_fileid) return (btNotFound); + file->flags = attrp->ca_recflags; file->createDate = to_hfs_time(attrp->ca_itime); file->contentModDate = to_hfs_time(attrp->ca_mtime); file->backupDate = to_hfs_time(attrp->ca_btime); file->accessDate = to_hfs_time(attrp->ca_atime); + attrp->ca_atimeondisk = attrp->ca_atime; file->attributeModDate = to_hfs_time(attrp->ca_ctime); file->textEncoding = descp->cd_encoding; + file->attrBlocks = attrp->ca_attrblks; bcopy(&attrp->ca_finderinfo[0], &file->userInfo, 32); /* * Update the BSD Info if it was already initialized on @@ -1290,8 +1462,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, ((attrp->ca_mode & ALLPERMS) != (hfsmp->hfs_file_mask & ACCESSPERMS))) { if ((file->bsdInfo.fileMode == 0) || - (HFSTOVFS(hfsmp)->mnt_flag & - MNT_UNKNOWNPERMISSIONS) == 0) { + (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) == 0) { file->bsdInfo.ownerID = attrp->ca_uid; file->bsdInfo.groupID = attrp->ca_gid; } @@ -1316,7 +1487,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, bcopy(&forkp->cf_extents[0], &file->dataFork.extents, sizeof(HFSPlusExtentRecord)); /* Push blocks read to disk */ - file->resourceFork.clumpSize = + file->dataFork.clumpSize = howmany(forkp->cf_bytesread, blksize); } @@ -1346,8 +1517,8 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, u_int16_t reclen, } /* - * catrec_readattr - - * This is called from within BTIterateRecords. + * Callback to collect directory entries. + * Called with readattr_state for each item in a directory. */ struct readattr_state { struct hfsmount *hfsmp; @@ -1358,8 +1529,8 @@ struct readattr_state { }; static int -catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, - u_long node, struct readattr_state *state) +cat_readattr(const CatalogKey *key, const CatalogRecord *rec, + struct readattr_state *state) { struct cat_entrylist *list = state->list; struct hfsmount *hfsmp = state->hfsmp; @@ -1387,7 +1558,7 @@ catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, } /* Hide the private meta data directory and journal files */ - if (parentcnid == kRootDirID) { + if (parentcnid == kHFSRootFolderID) { if ((rec->recordType == kHFSPlusFolderRecord) && (rec->hfsPlusFolder.folderID == hfsmp->hfs_privdir_desc.cd_cnid)) { return (1); /* continue */ @@ -1401,7 +1572,6 @@ catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, } } - cep = &list->entry[list->realentries++]; if (state->stdhfs) { @@ -1414,7 +1584,7 @@ catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); promotekey(hfsmp, (HFSCatalogKey *)key, pluskey, &encoding); - builddesc(pluskey, getcnid(rec), node, encoding, isadir(rec), &cep->ce_desc); + builddesc(pluskey, getcnid(rec), 0, encoding, isadir(rec), &cep->ce_desc); FREE(pluskey, M_TEMP); if (rec->recordType == kHFSFileRecord) { @@ -1427,7 +1597,7 @@ catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, } } else { getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)rec, &cep->ce_attr); - builddesc((HFSPlusCatalogKey *)key, getcnid(rec), node, getencoding(rec), + builddesc((HFSPlusCatalogKey *)key, getcnid(rec), 0, getencoding(rec), isadir(rec), &cep->ce_desc); if (rec->recordType == kHFSPlusFileRecord) { @@ -1447,12 +1617,13 @@ catrec_readattr(const CatalogKey *key, const CatalogRecord *rec, } /* + * Pack a cat_entrylist buffer with attributes from the catalog + * * Note: index is zero relative */ __private_extern__ int -cat_getentriesattr(struct hfsmount *hfsmp, struct cat_desc *prevdesc, int index, - struct cat_entrylist *ce_list) +cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_entrylist *ce_list) { FCB* fcb; CatalogKey * key; @@ -1461,13 +1632,15 @@ cat_getentriesattr(struct hfsmount *hfsmp, struct cat_desc *prevdesc, int index, cnid_t parentcnid; int i; int std_hfs; + int index; + int have_key; int result = 0; ce_list->realentries = 0; fcb = GetFileControlBlock(HFSTOVCB(hfsmp)->catalogRefNum); std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord); - parentcnid = prevdesc->cd_parentcnid; + parentcnid = dirhint->dh_desc.cd_parentcnid; state.hfsmp = hfsmp; state.list = ce_list; @@ -1478,37 +1651,63 @@ cat_getentriesattr(struct hfsmount *hfsmp, struct cat_desc *prevdesc, int index, MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); key = (CatalogKey *)&iterator->key; - iterator->hint.nodeNum = prevdesc->cd_hint; + have_key = 0; + iterator->hint.nodeNum = dirhint->dh_desc.cd_hint; + index = dirhint->dh_index + 1; /* - * If the last entry wasn't cached then establish the iterator + * Attempt to build a key from cached filename */ - if ((index == 0) || - (prevdesc->cd_namelen == 0) || - (buildkey(hfsmp, prevdesc, (HFSPlusCatalogKey *)key, 0) != 0)) { - int i; + if (dirhint->dh_desc.cd_namelen != 0) { + if (buildkey(hfsmp, &dirhint->dh_desc, (HFSPlusCatalogKey *)key, 0) == 0) { + have_key = 1; + } + } + + /* + * If the last entry wasn't cached then position the btree iterator + */ + if ((index == 0) || !have_key) { /* - * Position the iterator at the directory thread. - * (ie just before the first entry) + * Position the iterator at the directory's thread record. + * (i.e. just before the first entry) */ - buildthreadkey(parentcnid, std_hfs, key); + buildthreadkey(dirhint->dh_desc.cd_parentcnid, (hfsmp->hfs_flags & HFS_STANDARD), key); result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator); - if (result) - goto exit; /* bad news */ + if (result) { + result = MacToVFSError(result); + goto exit; + } + /* * Iterate until we reach the entry just * before the one we want to start with. */ - for (i = 0; i < index; ++i) { - result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, NULL, NULL); - if (result) - goto exit; /* bad news */ + if (index > 0) { + struct position_state ps; + + ps.error = 0; + ps.count = 0; + ps.index = index; + ps.parentID = dirhint->dh_desc.cd_parentcnid; + ps.hfsmp = hfsmp; + + result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, + (IterateCallBackProcPtr)cat_findposition, &ps); + if (ps.error) + result = ps.error; + else + result = MacToVFSError(result); + if (result) { + result = MacToVFSError(result); + goto exit; + } } } - /* Fill list with entries. */ + /* Fill list with entries starting at iterator->key. */ result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, - (IterateCallBackProcPtr)catrec_readattr, &state); + (IterateCallBackProcPtr)cat_readattr, &state); if (state.error) result = state.error; @@ -1523,7 +1722,7 @@ cat_getentriesattr(struct hfsmount *hfsmp, struct cat_desc *prevdesc, int index, /* * Resolve any hard links. */ - for (i = 0; i < ce_list->realentries; ++i) { + for (i = 0; i < (int)ce_list->realentries; ++i) { struct FndrFileInfo *fip; struct cat_entry *cep; struct HFSPlusCatalogFile filerec; @@ -1539,8 +1738,8 @@ cat_getentriesattr(struct hfsmount *hfsmp, struct cat_desc *prevdesc, int index, if ((cep->ce_attr.ca_rdev != 0) && (SWAP_BE32(fip->fdType) == kHardLinkFileType) && (SWAP_BE32(fip->fdCreator) == kHFSPlusCreator) - && ((cep->ce_attr.ca_itime == HFSTOVCB(hfsmp)->vcbCrDate) || - (cep->ce_attr.ca_itime == hfsmp->hfs_metadata_createdate))) { + && ((cep->ce_attr.ca_itime == (time_t)HFSTOVCB(hfsmp)->vcbCrDate) || + (cep->ce_attr.ca_itime == (time_t)hfsmp->hfs_metadata_createdate))) { if (resolvelink(hfsmp, cep->ce_attr.ca_rdev, &filerec) != 0) continue; @@ -1558,109 +1757,113 @@ exit: return MacToVFSError(result); } -struct linkinfo { - u_long link_ref; - void * dirent_addr; -}; +#define SMALL_DIRENTRY_SIZE (int)(sizeof(struct dirent) - (MAXNAMLEN + 1) + 8) -struct read_state { - u_int32_t cbs_parentID; - u_int32_t cbs_hiddenDirID; - u_int32_t cbs_hiddenJournalID; - u_int32_t cbs_hiddenInfoBlkID; - off_t cbs_lastoffset; - struct uio * cbs_uio; - ExtendedVCB * cbs_vcb; - int8_t cbs_hfsPlus; - int8_t cbs_case_sensitive; - int16_t cbs_result; - int32_t cbs_numresults; - u_long *cbs_cookies; - int32_t cbs_ncookies; - int32_t cbs_nlinks; - int32_t cbs_maxlinks; - struct linkinfo *cbs_linkinfo; -}; +/* + * Callback to pack directory entries. + * Called with packdirentry_state for each item in a directory. + */ -/* Map file mode type to directory entry types */ -u_char modetodirtype[16] = { - DT_REG, DT_FIFO, DT_CHR, DT_UNKNOWN, - DT_DIR, DT_UNKNOWN, DT_BLK, DT_UNKNOWN, - DT_REG, DT_UNKNOWN, DT_LNK, DT_UNKNOWN, - DT_SOCK, DT_UNKNOWN, DT_WHT, DT_UNKNOWN +/* Hard link information collected during cat_getdirentries. */ +struct linkinfo { + u_long link_ref; + user_addr_t dirent_addr; +}; +typedef struct linkinfo linkinfo_t; + +/* State information for the cat_packdirentry callback function. */ +struct packdirentry_state { + int cbs_extended; + u_int32_t cbs_parentID; + u_int32_t cbs_index; + uio_t cbs_uio; + ExtendedVCB * cbs_hfsmp; + int cbs_result; + int32_t cbs_nlinks; + int32_t cbs_maxlinks; + linkinfo_t * cbs_linkinfo; + struct cat_desc * cbs_desc; +// struct dirent * cbs_stdentry; + struct direntry * cbs_direntry; }; - -#define MODE_TO_DT(mode) (modetodirtype[((mode) & S_IFMT) >> 12]) static int -catrec_read(const CatalogKey *ckp, const CatalogRecord *crp, - u_int16_t recordLen, struct read_state *state) +cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, + struct packdirentry_state *state) { struct hfsmount *hfsmp; CatalogName *cnp; - size_t utf8chars; - u_int32_t curID; + cnid_t curID; OSErr result; struct dirent catent; + struct direntry * entry = NULL; time_t itime; u_long ilinkref = 0; - void * uiobase; + cnid_t cnid; + int hide = 0; + u_int8_t type; + u_int8_t is_mangled = 0; + char *nameptr; + user_addr_t uiobase; + size_t namelen = 0; + size_t maxnamelen; + size_t uiosize = 0; + caddr_t uioaddr; - if (state->cbs_hfsPlus) - curID = ckp->hfsPlus.parentID; - else + hfsmp = state->cbs_hfsmp; + + if (hfsmp->hfs_flags & HFS_STANDARD) curID = ckp->hfs.parentID; + else + curID = ckp->hfsPlus.parentID; /* We're done when parent directory changes */ if (state->cbs_parentID != curID) { -lastitem: -/* - * The NSDirectoryList class chokes on empty records (it doesnt check d_reclen!) - * so remove padding for now... - */ -#if 0 - /* - * Pad the end of list with an empty record. - * This eliminates an extra call by readdir(3c). - */ - catent.d_fileno = 0; - catent.d_reclen = 0; - catent.d_type = 0; - catent.d_namlen = 0; - *(int32_t*)&catent.d_name[0] = 0; - - state->cbs_lastoffset = state->cbs_uio->uio_offset; - - state->cbs_result = uiomove((caddr_t) &catent, 12, state->cbs_uio); - if (state->cbs_result == 0) - state->cbs_result = ENOENT; -#else - state->cbs_lastoffset = state->cbs_uio->uio_offset; state->cbs_result = ENOENT; -#endif return (0); /* stop */ } - if (state->cbs_hfsPlus) { + if (state->cbs_extended) { + entry = state->cbs_direntry; + nameptr = &entry->d_name[0]; + maxnamelen = NAME_MAX; + } else { + nameptr = &catent.d_name[0]; + maxnamelen = NAME_MAX; + } + + if (!(hfsmp->hfs_flags & HFS_STANDARD)) { switch(crp->recordType) { case kHFSPlusFolderRecord: - catent.d_type = DT_DIR; - catent.d_fileno = crp->hfsPlusFolder.folderID; + type = DT_DIR; + cnid = crp->hfsPlusFolder.folderID; + /* Hide our private meta data directory */ + if ((curID == kHFSRootFolderID) && + (cnid == hfsmp->hfs_privdir_desc.cd_cnid)) { + hide = 1; + } + break; case kHFSPlusFileRecord: itime = to_bsd_time(crp->hfsPlusFile.createDate); - hfsmp = VCBTOHFS(state->cbs_vcb); /* * When a hardlink link is encountered save its link ref. */ if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) && - ((itime == state->cbs_vcb->vcbCrDate) || - (itime == hfsmp->hfs_metadata_createdate))) { + ((itime == (time_t)hfsmp->hfs_itime) || + (itime == (time_t)hfsmp->hfs_metadata_createdate))) { ilinkref = crp->hfsPlusFile.bsdInfo.special.iNodeNum; } - catent.d_type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode); - catent.d_fileno = crp->hfsPlusFile.fileID; + type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode); + cnid = crp->hfsPlusFile.fileID; + /* Hide the journal files */ + if ((curID == kHFSRootFolderID) && + (hfsmp->jnl) && + ((cnid == hfsmp->hfs_jnlfileid) || + (cnid == hfsmp->hfs_jnlinfoblkid))) { + hide = 1; + } break; default: return (0); /* stop */ @@ -1668,83 +1871,119 @@ lastitem: cnp = (CatalogName*) &ckp->hfsPlus.nodeName; result = utf8_encodestr(cnp->ustr.unicode, cnp->ustr.length * sizeof(UniChar), - catent.d_name, &utf8chars, kdirentMaxNameBytes + 1, ':', 0); + nameptr, &namelen, maxnamelen + 1, ':', 0); if (result == ENAMETOOLONG) { result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar), - cnp->ustr.unicode, kdirentMaxNameBytes + 1, (ByteCount*)&utf8chars, catent.d_name, catent.d_fileno); + cnp->ustr.unicode, maxnamelen + 1, + (ByteCount*)&namelen, nameptr, + cnid); + is_mangled = 1; } } else { /* hfs */ switch(crp->recordType) { case kHFSFolderRecord: - catent.d_type = DT_DIR; - catent.d_fileno = crp->hfsFolder.folderID; + type = DT_DIR; + cnid = crp->hfsFolder.folderID; break; case kHFSFileRecord: - catent.d_type = DT_REG; - catent.d_fileno = crp->hfsFile.fileID; + type = DT_REG; + cnid = crp->hfsFile.fileID; break; default: return (0); /* stop */ }; cnp = (CatalogName*) ckp->hfs.nodeName; - result = hfs_to_utf8(state->cbs_vcb, cnp->pstr, kdirentMaxNameBytes + 1, - (ByteCount *)&utf8chars, catent.d_name); + result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1, + (ByteCount *)&namelen, nameptr); /* * When an HFS name cannot be encoded with the current * volume encoding we use MacRoman as a fallback. */ if (result) - result = mac_roman_to_utf8(cnp->pstr, kdirentMaxNameBytes + 1, - (ByteCount *)&utf8chars, catent.d_name); + result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1, + (ByteCount *)&namelen, nameptr); } - catent.d_namlen = utf8chars; - catent.d_reclen = DIRENTRY_SIZE(utf8chars); - - /* hide our private meta data directory */ - if (curID == kRootDirID && - catent.d_fileno == state->cbs_hiddenDirID && - catent.d_type == DT_DIR) { - if (state->cbs_case_sensitive) { - // This is how we skip over these entries. The next - // time we fill in a real item the uio_offset will - // point to the correct place in the "virtual" directory - // so that PositionIterator() will do the right thing - // when scanning to get to a particular position in the - // directory. - state->cbs_uio->uio_offset += catent.d_reclen; - state->cbs_lastoffset = state->cbs_uio->uio_offset; - - return (1); /* skip and continue */ - } else - goto lastitem; - } - - /* Hide the journal files */ - if ((curID == kRootDirID) && - (catent.d_type == DT_REG) && - ((catent.d_fileno == state->cbs_hiddenJournalID) || - (catent.d_fileno == state->cbs_hiddenInfoBlkID))) { - - // see comment up above for why this is here - state->cbs_uio->uio_offset += catent.d_reclen; - state->cbs_lastoffset = state->cbs_uio->uio_offset; + if (state->cbs_extended) { + entry->d_type = type; + entry->d_namlen = namelen; + entry->d_reclen = uiosize = EXT_DIRENT_LEN(namelen); + if (hide) + entry->d_fileno = 0; /* file number = 0 means skip entry */ + else + entry->d_fileno = cnid; - return (1); /* skip and continue */ + /* + * The index is 1 relative and includes "." and ".." + * + * Also stuff the cnid in the upper 32 bits of the cookie. + */ + entry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32); + uioaddr = (caddr_t) entry; + } else { + catent.d_type = type; + catent.d_namlen = namelen; + catent.d_reclen = uiosize = STD_DIRENT_LEN(namelen); + if (hide) + catent.d_fileno = 0; /* file number = 0 means skip entry */ + else + catent.d_fileno = cnid; + uioaddr = (caddr_t) &catent; } - state->cbs_lastoffset = state->cbs_uio->uio_offset; - uiobase = state->cbs_uio->uio_iov->iov_base; + /* Save current base address for post processing of hard-links. */ + uiobase = uio_curriovbase(state->cbs_uio); - /* if this entry won't fit then we're done */ - if (catent.d_reclen > state->cbs_uio->uio_resid || - (ilinkref != 0 && state->cbs_nlinks == state->cbs_maxlinks) || - (state->cbs_ncookies != 0 && state->cbs_numresults >= state->cbs_ncookies)) + /* If this entry won't fit then we're done */ + if ((uiosize > uio_resid(state->cbs_uio)) || + (ilinkref != 0 && state->cbs_nlinks == state->cbs_maxlinks)) { return (0); /* stop */ + } - state->cbs_result = uiomove((caddr_t) &catent, catent.d_reclen, state->cbs_uio); + state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio); + if (state->cbs_result == 0) { + ++state->cbs_index; + /* Remember previous entry */ + state->cbs_desc->cd_cnid = cnid; + if (type == DT_DIR) { + state->cbs_desc->cd_flags |= CD_ISDIR; + } else { + state->cbs_desc->cd_flags &= ~CD_ISDIR; + } + if (state->cbs_desc->cd_nameptr != NULL) { + vfs_removename(state->cbs_desc->cd_nameptr); + } +#if 0 + state->cbs_desc->cd_encoding = xxxx; +#endif + if (!is_mangled) { + state->cbs_desc->cd_namelen = namelen; + state->cbs_desc->cd_nameptr = vfs_addname(nameptr, namelen, 0, 0); + } else { + /* Store unmangled name for the directory hint else it will + * restart readdir at the last location again + */ + char *new_nameptr; + size_t bufsize; + + cnp = (CatalogName *)&ckp->hfsPlus.nodeName; + bufsize = 1 + utf8_encodelen(cnp->ustr.unicode, + cnp->ustr.length * sizeof(UniChar), + ':', 0); + MALLOC(new_nameptr, char *, bufsize, M_TEMP, M_WAITOK); + result = utf8_encodestr(cnp->ustr.unicode, + cnp->ustr.length * sizeof(UniChar), + new_nameptr, &namelen, + bufsize, ':', 0); + + state->cbs_desc->cd_namelen = namelen; + state->cbs_desc->cd_nameptr = vfs_addname(new_nameptr, namelen, 0, 0); + + FREE(new_nameptr, M_TEMP); + } + } /* * Record any hard links for post processing. */ @@ -1756,161 +1995,211 @@ lastitem: state->cbs_nlinks++; } - if (state->cbs_cookies) { - state->cbs_cookies[state->cbs_numresults++] = state->cbs_uio->uio_offset; - } else { - state->cbs_numresults++; - } - - /* continue iteration if there's room */ + /* Continue iteration if there's room */ return (state->cbs_result == 0 && - state->cbs_uio->uio_resid >= AVERAGE_HFSDIRENTRY_SIZE); + uio_resid(state->cbs_uio) >= SMALL_DIRENTRY_SIZE); } -#define SMALL_DIRENTRY_SIZE (sizeof(struct dirent) - (MAXNAMLEN + 1) + 8) + /* - * + * Pack a uio buffer with directory entries from the catalog */ __private_extern__ int -cat_getdirentries(struct hfsmount *hfsmp, struct cat_desc *descp, int entrycnt, - struct uio *uio, int *eofflag, u_long *cookies, int ncookies) +cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint, + uio_t uio, int extended, int * items) { - ExtendedVCB *vcb = HFSTOVCB(hfsmp); + FCB* fcb; BTreeIterator * iterator; - CatalogIterator *cip; - u_int32_t diroffset; - u_int16_t op; - struct read_state state; - u_int32_t dirID = descp->cd_cnid; + CatalogKey * key; + struct packdirentry_state state; void * buffer; int bufsize; - int maxdirentries; + int maxlinks; int result; - - diroffset = uio->uio_offset; - *eofflag = 0; - maxdirentries = MIN(entrycnt, uio->uio_resid / SMALL_DIRENTRY_SIZE); + int index; + int have_key; + + fcb = GetFileControlBlock(hfsmp->hfs_catalog_vp); /* Get a buffer for collecting link info and for a btree iterator */ - bufsize = (maxdirentries * sizeof(struct linkinfo)) + sizeof(*iterator); + maxlinks = MIN(entrycnt, uio_resid(uio) / SMALL_DIRENTRY_SIZE); + bufsize = (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator); + if (extended) { + bufsize += sizeof(struct direntry); + } MALLOC(buffer, void *, bufsize, M_TEMP, M_WAITOK); bzero(buffer, bufsize); + state.cbs_extended = extended; state.cbs_nlinks = 0; - state.cbs_maxlinks = maxdirentries; - state.cbs_linkinfo = (struct linkinfo *) buffer; - iterator = (BTreeIterator *) ((char *)buffer + (maxdirentries * sizeof(struct linkinfo))); - - /* get an iterator and position it */ - cip = GetCatalogIterator(vcb, dirID, diroffset); + state.cbs_maxlinks = maxlinks; + state.cbs_linkinfo = (linkinfo_t *) buffer; - result = PositionIterator(cip, diroffset, iterator, &op); - if (result == cmNotFound) { - *eofflag = 1; - result = 0; - AgeCatalogIterator(cip); - goto cleanup; - } else if ((result = MacToVFSError(result))) - goto cleanup; + iterator = (BTreeIterator *) ((char *)buffer + (maxlinks * sizeof(linkinfo_t))); + key = (CatalogKey *)&iterator->key; + have_key = 0; + index = dirhint->dh_index + 1; + if (extended) { + state.cbs_direntry = (struct direntry *)((char *)buffer + sizeof(BTreeIterator)); + } + /* + * Attempt to build a key from cached filename + */ + if (dirhint->dh_desc.cd_namelen != 0) { + if (buildkey(hfsmp, &dirhint->dh_desc, (HFSPlusCatalogKey *)key, 0) == 0) { + have_key = 1; + } + } - state.cbs_hiddenDirID = hfsmp->hfs_privdir_desc.cd_cnid; - if (hfsmp->jnl) { - state.cbs_hiddenJournalID = hfsmp->hfs_jnlfileid; - state.cbs_hiddenInfoBlkID = hfsmp->hfs_jnlinfoblkid; + /* + * If the last entry wasn't cached then position the btree iterator + */ + if ((index == 0) || !have_key) { + /* + * Position the iterator at the directory's thread record. + * (i.e. just before the first entry) + */ + buildthreadkey(dirhint->dh_desc.cd_parentcnid, (hfsmp->hfs_flags & HFS_STANDARD), key); + result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator); + if (result) { + result = MacToVFSError(result); + goto cleanup; + } + + /* + * Iterate until we reach the entry just + * before the one we want to start with. + */ + if (index > 0) { + struct position_state ps; + + ps.error = 0; + ps.count = 0; + ps.index = index; + ps.parentID = dirhint->dh_desc.cd_parentcnid; + ps.hfsmp = hfsmp; + + result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, + (IterateCallBackProcPtr)cat_findposition, &ps); + if (ps.error) + result = ps.error; + else + result = MacToVFSError(result); + if (result) { + result = MacToVFSError(result); + goto cleanup; + } + } } - state.cbs_lastoffset = cip->currentOffset; - state.cbs_vcb = vcb; + state.cbs_index = index; + state.cbs_hfsmp = hfsmp; state.cbs_uio = uio; + state.cbs_desc = &dirhint->dh_desc; state.cbs_result = 0; - state.cbs_parentID = dirID; - if (diroffset <= 2*sizeof(struct hfsdotentry)) { - state.cbs_numresults = diroffset/sizeof(struct hfsdotentry); - } else { - state.cbs_numresults = 0; - } - state.cbs_cookies = cookies; - state.cbs_ncookies = ncookies; + state.cbs_parentID = dirhint->dh_desc.cd_parentcnid; - if (vcb->vcbSigWord == kHFSPlusSigWord) - state.cbs_hfsPlus = 1; - else - state.cbs_hfsPlus = 0; - - if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) - state.cbs_case_sensitive = 1; - else - state.cbs_case_sensitive = 0; - - /* process as many entries as possible... */ - result = BTIterateRecords(GetFileControlBlock(vcb->catalogRefNum), op, - iterator, (IterateCallBackProcPtr)catrec_read, &state); + /* + * Process as many entries as possible starting at iterator->key. + */ + result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, + (IterateCallBackProcPtr)cat_packdirentry, &state); + /* Note that state.cbs_index is still valid on errors */ + *items = state.cbs_index - index; + index = state.cbs_index; + + /* Finish updating the catalog iterator. */ + dirhint->dh_desc.cd_hint = iterator->hint.nodeNum; + dirhint->dh_desc.cd_flags |= CD_DECOMPOSED; + dirhint->dh_index = index - 1; + /* * Post process any hard links to get the real file id. */ if (state.cbs_nlinks > 0) { - struct iovec aiov; - struct uio auio; - u_int32_t fileid; + u_int32_t fileid = 0; + user_addr_t address; int i; - u_int32_t tempid; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = uio->uio_segflg; - auio.uio_rw = UIO_READ; /* read kernel memory into user memory */ - auio.uio_procp = uio->uio_procp; for (i = 0; i < state.cbs_nlinks; ++i) { - fileid = 0; - if (resolvelinkid(hfsmp, state.cbs_linkinfo[i].link_ref, &fileid) != 0) continue; - - /* Update the file id in the user's buffer */ - aiov.iov_base = (char *) state.cbs_linkinfo[i].dirent_addr; - aiov.iov_len = sizeof(fileid); - auio.uio_offset = 0; - auio.uio_resid = aiov.iov_len; - (void) uiomove((caddr_t)&fileid, sizeof(fileid), &auio); + /* This assumes that d_ino is always first field. */ + address = state.cbs_linkinfo[i].dirent_addr; + if (address == (user_addr_t)0) + continue; + if (uio_isuserspace(uio)) { + (void) copyout(&fileid, address, + extended ? sizeof(ino64_t) : sizeof(ino_t)); + } else /* system space */ { + ino64_t *inoptr = (ino64_t *)CAST_DOWN(caddr_t, address); + *inoptr = fileid; + } } } + if (state.cbs_result) result = state.cbs_result; else result = MacToVFSError(result); if (result == ENOENT) { - *eofflag = 1; result = 0; } - if (result == 0) { - cip->currentOffset = state.cbs_lastoffset; - cip->nextOffset = uio->uio_offset; - UpdateCatalogIterator(iterator, cip); - } - cleanup: - if (result) { - cip->volume = 0; - cip->folderID = 0; - AgeCatalogIterator(cip); - } - - (void) ReleaseCatalogIterator(cip); FREE(buffer, M_TEMP); return (result); } +/* + * Callback to establish directory position. + * Called with position_state for each item in a directory. + */ +static int +cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, + struct position_state *state) +{ + cnid_t curID; + + if (state->hfsmp->hfs_flags & HFS_STANDARD) + curID = ckp->hfs.parentID; + else + curID = ckp->hfsPlus.parentID; + + /* Make sure parent directory didn't change */ + if (state->parentID != curID) { + state->error = EINVAL; + return (0); /* stop */ + } + + /* Count this entry */ + switch(crp->recordType) { + case kHFSPlusFolderRecord: + case kHFSPlusFileRecord: + case kHFSFolderRecord: + case kHFSFileRecord: + ++state->count; + break; + default: + printf("cat_findposition: invalid record type %d in dir %d\n", + crp->recordType, curID); + state->error = EINVAL; + return (0); /* stop */ + }; + + return (state->count < state->index); +} + + /* * cat_binarykeycompare - compare two HFS Plus catalog keys. - * The name portion of the key is comapred using a 16-bit binary comparison. + * The name portion of the key is compared using a 16-bit binary comparison. * This is called from the b-tree code. */ __private_extern__ @@ -1965,6 +2254,69 @@ cat_binarykeycompare(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey) } +/* + * Compare two standard HFS catalog keys + * + * Result: +n search key > trial key + * 0 search key = trial key + * -n search key < trial key + */ +int +CompareCatalogKeys(HFSCatalogKey *searchKey, HFSCatalogKey *trialKey) +{ + cnid_t searchParentID, trialParentID; + int result; + + searchParentID = searchKey->parentID; + trialParentID = trialKey->parentID; + + if (searchParentID > trialParentID) + result = 1; + else if (searchParentID < trialParentID) + result = -1; + else /* parent dirID's are equal, compare names */ + result = FastRelString(searchKey->nodeName, trialKey->nodeName); + + return result; +} + + +/* + * Compare two HFS+ catalog keys + * + * Result: +n search key > trial key + * 0 search key = trial key + * -n search key < trial key + */ +int +CompareExtendedCatalogKeys(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey) +{ + cnid_t searchParentID, trialParentID; + int result; + + searchParentID = searchKey->parentID; + trialParentID = trialKey->parentID; + + if (searchParentID > trialParentID) { + result = 1; + } + else if (searchParentID < trialParentID) { + result = -1; + } else { + /* parent node ID's are equal, compare names */ + if ( searchKey->nodeName.length == 0 || trialKey->nodeName.length == 0 ) + result = searchKey->nodeName.length - trialKey->nodeName.length; + else + result = FastUnicodeCompare(&searchKey->nodeName.unicode[0], + searchKey->nodeName.length, + &trialKey->nodeName.unicode[0], + trialKey->nodeName.length); + } + + return result; +} + + /* * buildkey - build a Catalog b-tree key from a cnode descriptor */ @@ -2146,6 +2498,26 @@ exit: return MacToVFSError(result); } +/* + * getkeyplusattr - From id, fetch the key and the bsd attrs for a file/dir (could pass + * null arguments to cat_idlookup instead, but we save around 10% by not building the + * cat_desc here). Both key and attrp must point to real structures. + */ +__private_extern__ +int +cat_getkeyplusattr(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key, struct cat_attr *attrp) +{ + int result; + + result = getkey(hfsmp, cnid, key); + + if (result == 0) { + result = cat_lookupbykey(hfsmp, key, 0, 0, NULL, attrp, NULL, NULL); + } + + return MacToVFSError(result); +} + /* * buildrecord - build a default catalog directory or file record @@ -2182,33 +2554,41 @@ buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding struct FndrFileInfo * fip = NULL; if (type == S_IFDIR) { - bzero(crp, sizeof(HFSPlusCatalogFolder)); crp->recordType = kHFSPlusFolderRecord; + crp->hfsPlusFolder.flags = 0; + crp->hfsPlusFolder.valence = 0; crp->hfsPlusFolder.folderID = cnid; crp->hfsPlusFolder.createDate = createtime; crp->hfsPlusFolder.contentModDate = createtime; - crp->hfsPlusFolder.accessDate = createtime; crp->hfsPlusFolder.attributeModDate = createtime; + crp->hfsPlusFolder.accessDate = createtime; + crp->hfsPlusFolder.backupDate = 0; crp->hfsPlusFolder.textEncoding = encoding; + crp->hfsPlusFolder.attrBlocks = 0; bcopy(attrp->ca_finderinfo, &crp->hfsPlusFolder.userInfo, 32); bsdp = &crp->hfsPlusFolder.bsdInfo; + bsdp->special.rawDevice = 0; *recordSize = sizeof(HFSPlusCatalogFolder); } else { - bzero(crp, sizeof(HFSPlusCatalogFile)); crp->recordType = kHFSPlusFileRecord; + crp->hfsPlusFile.flags = kHFSThreadExistsMask; + crp->hfsPlusFile.reserved1 = 0; crp->hfsPlusFile.fileID = cnid; crp->hfsPlusFile.createDate = createtime; crp->hfsPlusFile.contentModDate = createtime; crp->hfsPlusFile.accessDate = createtime; crp->hfsPlusFile.attributeModDate = createtime; - crp->hfsPlusFile.flags |= kHFSThreadExistsMask; + crp->hfsPlusFile.backupDate = 0; crp->hfsPlusFile.textEncoding = encoding; + crp->hfsPlusFile.attrBlocks = 0; bsdp = &crp->hfsPlusFile.bsdInfo; + bsdp->special.rawDevice = 0; switch(type) { case S_IFBLK: case S_IFCHR: /* BLK/CHR need to save the device info */ bsdp->special.rawDevice = attrp->ca_rdev; + bzero(&crp->hfsPlusFile.userInfo, 32); break; case S_IFREG: /* Hardlink links need to save the linkref */ @@ -2224,6 +2604,7 @@ buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding bcopy(attrp->ca_finderinfo, &crp->hfsPlusFile.userInfo, 32); break; } + bzero(&crp->hfsPlusFile.dataFork, 2*sizeof(HFSPlusForkData)); *recordSize = sizeof(HFSPlusCatalogFile); } bsdp->ownerID = attrp->ca_uid; @@ -2244,13 +2625,13 @@ builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_long hint, u_long encodin { int result = 0; char * nameptr; - long bufsize; + size_t bufsize; size_t utf8len; char tmpbuff[128]; /* guess a size... */ bufsize = (3 * key->nodeName.length) + 1; - if (bufsize >= sizeof(tmpbuff)-1) { + if (bufsize >= sizeof(tmpbuff) - 1) { MALLOC(nameptr, char *, bufsize, M_TEMP, M_WAITOK); } else { nameptr = &tmpbuff[0]; @@ -2274,7 +2655,7 @@ builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_long hint, u_long encodin bufsize, ':', 0); } descp->cd_parentcnid = key->parentID; - descp->cd_nameptr = add_name(nameptr, utf8len, 0, 0); + descp->cd_nameptr = vfs_addname(nameptr, utf8len, 0, 0); descp->cd_namelen = utf8len; descp->cd_cnid = cnid; descp->cd_hint = hint; @@ -2299,10 +2680,11 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct int isDirectory = (crp->recordType == kHFSPlusFolderRecord); const struct HFSPlusBSDInfo *bsd = &crp->bsdInfo; + attrp->ca_recflags = crp->flags; attrp->ca_nlink = 1; attrp->ca_atime = to_bsd_time(crp->accessDate); + attrp->ca_atimeondisk = attrp->ca_atime; attrp->ca_mtime = to_bsd_time(crp->contentModDate); - attrp->ca_mtime_nsec = 0; attrp->ca_ctime = to_bsd_time(crp->attributeModDate); attrp->ca_itime = to_bsd_time(crp->createDate); attrp->ca_btime = to_bsd_time(crp->backupDate); @@ -2334,7 +2716,7 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct break; } - if (HFSTOVFS(hfsmp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) { + if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) { /* * Override the permissions as determined by the mount auguments * in ALMOST the same way unset permissions are treated but keep @@ -2354,6 +2736,7 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct } attrp->ca_nlink = 2 + ((HFSPlusCatalogFolder *)crp)->valence; attrp->ca_entries = ((HFSPlusCatalogFolder *)crp)->valence; + attrp->ca_attrblks = ((HFSPlusCatalogFolder *)crp)->attrBlocks; } else { /* Keep IMMUTABLE bits in sync with HFS locked flag */ if (crp->flags & kHFSFileLockedMask) { @@ -2367,6 +2750,7 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct } /* get total blocks (both forks) */ attrp->ca_blocks = crp->dataFork.totalBlocks + crp->resourceFork.totalBlocks; + attrp->ca_attrblks = crp->attrBlocks; } attrp->ca_fileid = crp->fileID; @@ -2485,7 +2869,7 @@ promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlus crp->attributeModDate = crp->contentModDate; crp->accessDate = crp->contentModDate; bzero(&crp->bsdInfo, sizeof(HFSPlusBSDInfo)); - crp->reserved2 = 0; + crp->attrBlocks = 0; } /* @@ -2590,7 +2974,7 @@ getcnid(const CatalogRecord *crp) cnid = crp->hfsPlusFile.fileID; break; default: - panic("hfs: getcnid: unknown recordType (crp @ 0x%x)\n", crp); + printf("hfs: getcnid: unknown recordType (crp @ 0x%x)\n", crp); break; } @@ -2633,4 +3017,3 @@ isadir(const CatalogRecord *crp) crp->recordType == kHFSPlusFolderRecord); } - diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index 0f67eadf8..991478558 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,9 +26,7 @@ #ifdef KERNEL #ifdef __APPLE_API_PRIVATE -#include #include -#include #include @@ -71,22 +69,24 @@ struct cat_desc { struct cat_attr { cnid_t ca_fileid; /* inode number (for stat) normally == cnid */ mode_t ca_mode; /* file access mode and type (16 bits) */ - nlink_t ca_nlink; /* file link count (16 bit integer) */ + u_int16_t ca_recflags; /* catalog record flags (16 bit integer) */ + u_int32_t ca_nlink; /* file link count */ uid_t ca_uid; /* file owner */ gid_t ca_gid; /* file group */ dev_t ca_rdev; /* device a special file represents */ time_t ca_atime; /* last access time */ + time_t ca_atimeondisk; /* access time value on disk */ time_t ca_mtime; /* last data modification time */ - int32_t ca_mtime_nsec; /* last data modification time nanosec */ time_t ca_ctime; /* last file status change */ time_t ca_itime; /* file initialization time */ time_t ca_btime; /* last backup time */ - u_long ca_flags; /* status flags (chflags) */ + u_int32_t ca_flags; /* status flags (chflags) */ union { u_int32_t cau_blocks; /* total file blocks used (rsrc + data) */ u_int32_t cau_entries; /* total directory entries (valence) */ } ca_union; u_int8_t ca_finderinfo[32]; /* Opaque Finder information */ + u_int32_t ca_attrblks; /* cached count of attribute data blocks */ }; /* Aliases for common fields */ #define ca_blocks ca_union.cau_blocks @@ -112,6 +112,26 @@ struct cat_fork { #define cf_bytesread cf_union.cfu_bytesread +/* + * Directory Hint + * Used to hold state across directory enumerations. + * + */ +struct directoryhint { + SLIST_ENTRY(directoryhint) dh_link; /* chain */ + int dh_index; /* index into directory (zero relative) */ + u_int32_t dh_time; + struct cat_desc dh_desc; /* entry's descriptor */ +}; +typedef struct directoryhint directoryhint_t; + +#define HFS_MAXDIRHINTS 32 +#define HFS_DIRHINT_TTL 45 + +#define HFS_INDEX_MASK 0x03ffffff +#define HFS_INDEX_BITS 26 + + /* * Catalog Node Entry * @@ -160,6 +180,26 @@ typedef struct cat_cookie_t { char opaque[24]; } cat_cookie_t; +/* Universal catalog key */ +union CatalogKey { + HFSCatalogKey hfs; + HFSPlusCatalogKey hfsPlus; +}; +typedef union CatalogKey CatalogKey; + +/* Universal catalog data record */ +union CatalogRecord { + int16_t recordType; + HFSCatalogFolder hfsFolder; + HFSCatalogFile hfsFile; + HFSCatalogThread hfsThread; + HFSPlusCatalogFolder hfsPlusFolder; + HFSPlusCatalogFile hfsPlusFile; + HFSPlusCatalogThread hfsPlusThread; +}; +typedef union CatalogRecord CatalogRecord; + + /* * Catalog Interface * @@ -186,7 +226,8 @@ extern int cat_lookup ( struct hfsmount *hfsmp, int wantrsrc, struct cat_desc *outdescp, struct cat_attr *attrp, - struct cat_fork *forkp); + struct cat_fork *forkp, + cnid_t *desc_cnid); extern int cat_idlookup (struct hfsmount *hfsmp, cnid_t cnid, @@ -194,10 +235,13 @@ extern int cat_idlookup (struct hfsmount *hfsmp, struct cat_attr *attrp, struct cat_fork *forkp); +extern int cat_findname (struct hfsmount *hfsmp, + cnid_t cnid, + struct cat_desc *outdescp); + extern int cat_getentriesattr( struct hfsmount *hfsmp, - struct cat_desc *prevdesc, - int index, + directoryhint_t *dirhint, struct cat_entrylist *ce_list); extern int cat_rename ( struct hfsmount * hfsmp, @@ -214,12 +258,11 @@ extern int cat_update ( struct hfsmount *hfsmp, extern int cat_getdirentries( struct hfsmount *hfsmp, - struct cat_desc *descp, int entrycnt, - struct uio *uio, - int *eofflag, - u_long *cookies, - int ncookies); + directoryhint_t *dirhint, + uio_t uio, + int extended, + int * items); extern int cat_insertfilethread ( struct hfsmount *hfsmp, @@ -240,6 +283,38 @@ extern int cat_binarykeycompare( HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey); +extern int CompareCatalogKeys( + HFSCatalogKey *searchKey, + HFSCatalogKey *trialKey); + +extern int CompareExtendedCatalogKeys( + HFSPlusCatalogKey *searchKey, + HFSPlusCatalogKey *trialKey); + +extern void cat_convertattr( + struct hfsmount *hfsmp, + CatalogRecord * recp, + struct cat_attr *attrp, + struct cat_fork *datafp, + struct cat_fork *rsrcfp); + +extern int cat_convertkey( + struct hfsmount *hfsmp, + CatalogKey *key, + CatalogRecord * recp, + struct cat_desc *descp); + +extern int resolvelink( + struct hfsmount *hfsmp, + u_long linkref, + struct HFSPlusCatalogFile *recp); + +extern int cat_getkeyplusattr( + struct hfsmount *hfsmp, + cnid_t cnid, + CatalogKey *key, + struct cat_attr *attrp); + #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ #endif /* __HFS_CATALOG__ */ diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c index 6a4b950ee..1cbaf8186 100644 --- a/bsd/hfs/hfs_chash.c +++ b/bsd/hfs/hfs_chash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,12 +59,22 @@ #include #include #include +#include #include #include #include + +#include "hfs.h" /* XXX bringup */ #include "hfs_cnode.h" +extern lck_attr_t * hfs_lock_attr; +extern lck_grp_t * hfs_mutex_group; +extern lck_grp_t * hfs_rwlock_group; + +lck_grp_t * chash_lck_grp; +lck_grp_attr_t * chash_lck_grp_attr; +lck_attr_t * chash_lck_attr; /* * Structures associated with cnode caching. @@ -72,7 +82,9 @@ LIST_HEAD(cnodehashhead, cnode) *cnodehashtbl; u_long cnodehash; /* size of hash table - 1 */ #define CNODEHASH(device, inum) (&cnodehashtbl[((device) + (inum)) & cnodehash]) -struct slock hfs_chash_slock; + +lck_mtx_t hfs_chash_mutex; + /* * Initialize cnode hash table. @@ -82,7 +94,15 @@ void hfs_chashinit() { cnodehashtbl = hashinit(desiredvnodes, M_HFSMNT, &cnodehash); - simple_lock_init(&hfs_chash_slock); + + chash_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(chash_lck_grp_attr); + chash_lck_grp = lck_grp_alloc_init("cnode_hash", chash_lck_grp_attr); + + chash_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(chash_lck_attr); + + lck_mtx_init(&hfs_chash_mutex, chash_lck_grp, chash_lck_attr); } @@ -90,123 +110,288 @@ hfs_chashinit() * Use the device, inum pair to find the incore cnode. * * If it is in core, but locked, wait for it. - * - * If the requested vnode (fork) is not available, then - * take a reference on the other vnode (fork) so that - * the upcoming getnewvnode can not aquire it. */ __private_extern__ -struct cnode * -hfs_chashget(dev_t dev, ino_t inum, int wantrsrc, - struct vnode **vpp, struct vnode **rvpp) +struct vnode * +hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) { - struct proc *p = current_proc(); struct cnode *cp; struct vnode *vp; int error; + uint32_t vid; - *vpp = NULLVP; - *rvpp = NULLVP; /* * Go through the hash list * If a cnode is in the process of being cleaned out or being * allocated, wait for it to be finished and then try again. */ loop: - simple_lock(&hfs_chash_slock); + lck_mtx_lock(&hfs_chash_mutex); for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { if ((cp->c_fileid != inum) || (cp->c_dev != dev)) continue; - if (ISSET(cp->c_flag, C_ALLOC)) { - /* - * cnode is being created. Wait for it to finish. - */ - SET(cp->c_flag, C_WALLOC); - simple_unlock(&hfs_chash_slock); - (void) tsleep((caddr_t)cp, PINOD, "hfs_chashget-1", 0); + /* Wait if cnode is being created or reclaimed. */ + if (ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) { + SET(cp->c_hflag, H_WAITING); + + (void) msleep(cp, &hfs_chash_mutex, PDROP | PINOD, + "hfs_chash_getvnode", 0); goto loop; - } - if (ISSET(cp->c_flag, C_TRANSIT)) { - /* - * cnode is getting reclaimed wait for - * the operation to complete and return - * error + } + /* + * Skip cnodes that are not in the name space anymore + * note that this check is done outside of the proper + * lock to catch nodes already in this state... this + * state must be rechecked after we acquire the cnode lock + */ + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + continue; + } + /* Obtain the desired vnode. */ + vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp; + if (vp == NULLVP) + goto exit; + + vid = vnode_vid(vp); + lck_mtx_unlock(&hfs_chash_mutex); + + if ((error = vnode_getwithvid(vp, vid))) { + /* + * If vnode is being reclaimed, or has + * already changed identity, no need to wait */ - SET(cp->c_flag, C_WTRANSIT); - simple_unlock(&hfs_chash_slock); - (void)tsleep((caddr_t)cp, PINOD, "hfs_chashget-2", 0); - goto loop; + return (NULL); } - if (cp->c_flag & (C_NOEXISTS | C_DELETED)) + if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK) != 0) { + vnode_put(vp); + return (NULL); + } + + /* + * Skip cnodes that are not in the name space anymore + * we need to check again with the cnode lock held + * because we may have blocked acquiring the vnode ref + * or the lock on the cnode which would allow the node + * to be unlinked + */ + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + if (!skiplock) + hfs_unlock(cp); + vnode_put(vp); + + return (NULL); + } + return (vp); + } +exit: + lck_mtx_unlock(&hfs_chash_mutex); + return (NULL); +} + + +/* + * Use the device, fileid pair to find the incore cnode. + * If no cnode if found one is created + * + * If it is in core, but locked, wait for it. + */ +__private_extern__ +int +hfs_chash_snoop(dev_t dev, ino_t inum, int (*callout)(const struct cat_desc *, + const struct cat_attr *, void *), void * arg) +{ + struct cnode *cp; + int result = ENOENT; + + /* + * Go through the hash list + * If a cnode is in the process of being cleaned out or being + * allocated, wait for it to be finished and then try again. + */ + lck_mtx_lock(&hfs_chash_mutex); + for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { + if ((cp->c_fileid != inum) || (cp->c_dev != dev)) continue; + /* Skip cnodes being created or reclaimed. */ + if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) { + result = callout(&cp->c_desc, &cp->c_attr, arg); + } + break; + } + lck_mtx_unlock(&hfs_chash_mutex); + return (result); +} + +/* + * Use the device, fileid pair to find the incore cnode. + * If no cnode if found one is created + * + * If it is in core, but locked, wait for it. + */ +__private_extern__ +struct cnode * +hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock) +{ + struct cnode *cp; + struct cnode *ncp = NULL; + vnode_t vp; + uint32_t vid; + + /* + * Go through the hash list + * If a cnode is in the process of being cleaned out or being + * allocated, wait for it to be finished and then try again. + */ +loop: + lck_mtx_lock(&hfs_chash_mutex); + +loop_with_lock: + for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { + if ((cp->c_fileid != inum) || (cp->c_dev != dev)) + continue; /* - * Try getting the desired vnode first. If - * it isn't available then take a reference - * on the other vnode. + * Wait if cnode is being created, attached to or reclaimed. */ - vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp; - if (vp == NULLVP) - vp = wantrsrc ? cp->c_vp : cp->c_rsrc_vp; - if (vp == NULLVP) - panic("hfs_chashget: orphaned cnode in hash"); + if (ISSET(cp->c_hflag, H_ALLOC | H_ATTACH | H_TRANSIT)) { + SET(cp->c_hflag, H_WAITING); - simple_lock(&vp->v_interlock); - simple_unlock(&hfs_chash_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) - goto loop; - else if (cp->c_flag & C_NOEXISTS) { + (void) msleep(cp, &hfs_chash_mutex, PINOD, + "hfs_chash_getcnode", 0); + goto loop_with_lock; + } + /* + * Skip cnodes that are not in the name space anymore + * note that this check is done outside of the proper + * lock to catch nodes already in this state... this + * state must be rechecked after we acquire the cnode lock + */ + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + continue; + } + vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp; + if (vp == NULL) { /* - * While we were blocked the cnode got deleted. + * The desired vnode isn't there so tag the cnode. */ - vput(vp); - goto loop; + SET(cp->c_hflag, H_ATTACH); + + lck_mtx_unlock(&hfs_chash_mutex); + } else { + vid = vnode_vid(vp); + + lck_mtx_unlock(&hfs_chash_mutex); + + if (vnode_getwithvid(vp, vid)) + goto loop; + } + if (ncp) { + /* + * someone else won the race to create + * this cnode and add it to the hash + * just dump our allocation + */ + FREE_ZONE(ncp, sizeof(struct cnode), M_HFSNODE); + ncp = NULL; } + if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK) != 0) { + if (vp != NULLVP) + vnode_put(vp); + lck_mtx_lock(&hfs_chash_mutex); - if (VNODE_IS_RSRC(vp)) - *rvpp = vp; - else - *vpp = vp; + if (vp == NULLVP) + CLR(cp->c_hflag, H_ATTACH); + goto loop_with_lock; + } /* - * Note that vget can block before aquiring the - * cnode lock. So we need to check if the vnode - * we wanted was created while we blocked. + * Skip cnodes that are not in the name space anymore + * we need to check again with the cnode lock held + * because we may have blocked acquiring the vnode ref + * or the lock on the cnode which would allow the node + * to be unlinked */ - if (wantrsrc && *rvpp == NULL && cp->c_rsrc_vp) { - error = vget(cp->c_rsrc_vp, 0, p); - vrele(*vpp); /* ref no longer needed */ - *vpp = NULL; - if (error) - goto loop; - *rvpp = cp->c_rsrc_vp; - - } else if (!wantrsrc && *vpp == NULL && cp->c_vp) { - error = vget(cp->c_vp, 0, p); - vrele(*rvpp); /* ref no longer needed */ - *rvpp = NULL; - if (error) - goto loop; - *vpp = cp->c_vp; + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + if (!skiplock) + hfs_unlock(cp); + if (vp != NULLVP) + vnode_put(vp); + lck_mtx_lock(&hfs_chash_mutex); + + if (vp == NULLVP) + CLR(cp->c_hflag, H_ATTACH); + goto loop_with_lock; } + *vpp = vp; return (cp); } - simple_unlock(&hfs_chash_slock); - return (NULL); + + /* + * Allocate a new cnode + */ + if (skiplock) + panic("%s - should never get here when skiplock is set \n", __FUNCTION__); + + if (ncp == NULL) { + lck_mtx_unlock(&hfs_chash_mutex); + + MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK); + /* + * since we dropped the chash lock, + * we need to go back and re-verify + * that this node hasn't come into + * existence... + */ + goto loop; + } + bzero(ncp, sizeof(struct cnode)); + SET(ncp->c_hflag, H_ALLOC); + ncp->c_fileid = inum; + ncp->c_dev = dev; + + lck_rw_init(&ncp->c_rwlock, hfs_rwlock_group, hfs_lock_attr); + if (!skiplock) + (void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK); + + /* Insert the new cnode with it's H_ALLOC flag set */ + LIST_INSERT_HEAD(CNODEHASH(dev, inum), ncp, c_hash); + lck_mtx_unlock(&hfs_chash_mutex); + + *vpp = NULL; + return (ncp); +} + + +__private_extern__ +void +hfs_chashwakeup(struct cnode *cp, int hflags) +{ + lck_mtx_lock(&hfs_chash_mutex); + + CLR(cp->c_hflag, hflags); + + if (ISSET(cp->c_hflag, H_WAITING)) { + CLR(cp->c_hflag, H_WAITING); + wakeup((caddr_t)cp); + } + lck_mtx_unlock(&hfs_chash_mutex); } /* - * Insert a cnode into the hash table. + * Re-hash two cnodes in the hash table. */ __private_extern__ void -hfs_chashinsert(struct cnode *cp) +hfs_chash_rehash(struct cnode *cp1, struct cnode *cp2) { - if (cp->c_fileid != 0) { - simple_lock(&hfs_chash_slock); - LIST_INSERT_HEAD(CNODEHASH(cp->c_dev, cp->c_fileid), cp, c_hash); - simple_unlock(&hfs_chash_slock); - } + lck_mtx_lock(&hfs_chash_mutex); + + LIST_REMOVE(cp1, c_hash); + LIST_REMOVE(cp2, c_hash); + LIST_INSERT_HEAD(CNODEHASH(cp1->c_dev, cp1->c_fileid), cp1, c_hash); + LIST_INSERT_HEAD(CNODEHASH(cp2->c_dev, cp2->c_fileid), cp2, c_hash); + + lck_mtx_unlock(&hfs_chash_mutex); } @@ -214,13 +399,56 @@ hfs_chashinsert(struct cnode *cp) * Remove a cnode from the hash table. */ __private_extern__ -void +int hfs_chashremove(struct cnode *cp) { - simple_lock(&hfs_chash_slock); + lck_mtx_lock(&hfs_chash_mutex); + + /* Check if a vnode is getting attached */ + if (ISSET(cp->c_hflag, H_ATTACH)) { + lck_mtx_unlock(&hfs_chash_mutex); + return (EBUSY); + } + LIST_REMOVE(cp, c_hash); + cp->c_hash.le_next = NULL; + cp->c_hash.le_prev = NULL; + + lck_mtx_unlock(&hfs_chash_mutex); + return (0); +} + +/* + * Remove a cnode from the hash table and wakeup any waiters. + */ +__private_extern__ +void +hfs_chash_abort(struct cnode *cp) +{ + lck_mtx_lock(&hfs_chash_mutex); + LIST_REMOVE(cp, c_hash); cp->c_hash.le_next = NULL; cp->c_hash.le_prev = NULL; - simple_unlock(&hfs_chash_slock); + + CLR(cp->c_hflag, H_ATTACH | H_ALLOC); + if (ISSET(cp->c_hflag, H_WAITING)) { + CLR(cp->c_hflag, H_WAITING); + wakeup((caddr_t)cp); + } + lck_mtx_unlock(&hfs_chash_mutex); } + +/* + * mark a cnode as in transistion + */ +__private_extern__ +void +hfs_chash_mark_in_transit(struct cnode *cp) +{ + lck_mtx_lock(&hfs_chash_mutex); + + SET(cp->c_hflag, H_TRANSIT); + + lck_mtx_unlock(&hfs_chash_mutex); +} diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 3abf79b4f..1fb30d020 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,8 +26,12 @@ #include #include #include +#include #include #include +#include + +#include #include #include @@ -39,8 +43,21 @@ extern int prtactive; +extern lck_attr_t * hfs_lock_attr; +extern lck_grp_t * hfs_mutex_group; +extern lck_grp_t * hfs_rwlock_group; + +static int hfs_filedone(struct vnode *vp, vfs_context_t context); + +static void hfs_reclaim_cnode(struct cnode *); + +static int hfs_valid_cnode(struct hfsmount *, struct vnode *, struct componentname *, cnid_t); + +static int hfs_isordered(struct cnode *, struct cnode *); + +int hfs_vnop_inactive(struct vnop_inactive_args *); -extern void hfs_relnamehints(struct cnode *dcp); +int hfs_vnop_reclaim(struct vnop_reclaim_args *); /* @@ -48,35 +65,64 @@ extern void hfs_relnamehints(struct cnode *dcp); */ __private_extern__ int -hfs_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - } */ *ap; +hfs_vnop_inactive(struct vnop_inactive_args *ap) { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); + struct cnode *cp; struct hfsmount *hfsmp = VTOHFS(vp); - struct proc *p = ap->a_p; - struct timeval tv; + struct proc *p = vfs_context_proc(ap->a_context); int error = 0; int recycle = 0; int forkcount = 0; int truncated = 0; - int started_tr = 0, grabbed_lock = 0; + int started_tr = 0; + int took_trunc_lock = 0; cat_cookie_t cookie; int cat_reserve = 0; + int lockflags; + enum vtype v_type; - if (prtactive && vp->v_usecount != 0) - vprint("hfs_inactive: pushing active", vp); + v_type = vnode_vtype(vp); + cp = VTOC(vp); + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp)) { + return (0); + } /* * Ignore nodes related to stale file handles. */ - if (cp->c_mode == 0) - goto out; + if (cp->c_mode == 0) { + vnode_recycle(vp); + return (0); + } + + if ((v_type == VREG) && + (ISSET(cp->c_flag, C_DELETED) || VTOF(vp)->ff_blocks)) { + hfs_lock_truncate(cp, TRUE); + took_trunc_lock = 1; + } + + /* + * We do the ubc_setsize before we take the cnode + * lock and before the hfs_truncate (since we'll + * be inside a transaction). + */ + if ((v_type == VREG || v_type == VLNK) && + (cp->c_flag & C_DELETED) && + (VTOF(vp)->ff_blocks != 0)) { + ubc_setsize(vp, 0); + } + + (void) hfs_lock(cp, HFS_FORCE_LOCK); - if (hfsmp->hfs_flags & HFS_READ_ONLY) - goto out; + if (v_type == VREG && !ISSET(cp->c_flag, C_DELETED) && VTOF(vp)->ff_blocks) { + hfs_filedone(vp, ap->a_context); + } + /* + * Remove any directory hints + */ + if (v_type == VDIR) + hfs_reldirhints(cp, 0); if (cp->c_datafork) ++forkcount; @@ -84,9 +130,29 @@ hfs_inactive(ap) ++forkcount; /* If needed, get rid of any fork's data for a deleted file */ - if ((vp->v_type == VREG) && (cp->c_flag & C_DELETED)) { + if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) { if (VTOF(vp)->ff_blocks != 0) { - error = VOP_TRUNCATE(vp, (off_t)0, IO_NDELAY, NOCRED, p); + // start the transaction out here so that + // the truncate and the removal of the file + // are all in one transaction. otherwise + // because this cnode is marked for deletion + // the truncate won't cause the catalog entry + // to get updated which means that we could + // free blocks but still keep a reference to + // them in the catalog entry and then double + // free them later. + // + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + started_tr = 1; + + /* + * Since we're already inside a transaction, + * tell hfs_truncate to skip the ubc_setsize. + */ + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context); if (error) goto out; truncated = 1; @@ -103,21 +169,20 @@ hfs_inactive(ap) * Mark cnode in transit so that no one can get this * cnode from cnode hash. */ - SET(cp->c_flag, C_TRANSIT); + hfs_chash_mark_in_transit(cp); + cp->c_flag &= ~C_DELETED; + cp->c_flag |= C_NOEXISTS; // XXXdbg cp->c_rdev = 0; - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - error = EINVAL; - goto out; + if (started_tr == 0) { + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; } started_tr = 1; } - + /* * Reserve some space in the Catalog file. */ @@ -126,14 +191,21 @@ hfs_inactive(ap) } cat_reserve = 1; - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) goto out; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); if (cp->c_blocks > 0) printf("hfs_inactive: attempting to delete a non-empty file!"); + + // + // release the name pointer in the descriptor so that + // cat_delete() will use the file-id to do the deletion. + // in the case of hard links this is imperative (in the + // case of regular files the fileid and cnid are the + // same so it doesn't matter). + // + cat_releasedesc(&cp->c_desc); + /* * The descriptor name may be zero, * in which case the fileid is used. @@ -150,30 +222,32 @@ hfs_inactive(ap) &hfsmp->hfs_privdir_attr, NULL, NULL); } - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (error) goto out; + if (error == 0) { + /* Delete any attributes, ignore errors */ + (void) hfs_removeallattr(hfsmp, cp->c_fileid); + } + + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error) + goto out; #if QUOTA (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ cp->c_mode = 0; - cp->c_flag |= C_NOEXISTS | C_CHANGE | C_UPDATE; + cp->c_flag |= C_NOEXISTS; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; if (error == 0) hfs_volupdate(hfsmp, VOL_RMFILE, 0); } - if (cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE)) { - tv = time; - // if the only thing being updated is the access time - // then set the modified bit too so that update will - // flush it to disk. otherwise it'll get dropped. - if ((cp->c_flag & C_CHANGEMASK) == C_ACCESS) { - cp->c_flag |= C_MODIFIED; - } - VOP_UPDATE(vp, &tv, &tv, 0); + if ((cp->c_flag & C_MODIFIED) || + cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { + hfs_update(vp, 0); } out: if (cat_reserve) @@ -181,424 +255,256 @@ out: // XXXdbg - have to do this because a goto could have come here if (started_tr) { - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); started_tr = 0; } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - } - VOP_UNLOCK(vp, 0, p); + hfs_unlock(cp); + + if (took_trunc_lock) + hfs_unlock_truncate(cp); + /* * If we are done with the vnode, reclaim it * so that it can be reused immediately. */ if (cp->c_mode == 0 || recycle) - vrecycle(vp, (struct slock *)0, p); + vnode_recycle(vp); return (error); } +/* + * File clean-up (zero fill and shrink peof). + */ +static int +hfs_filedone(struct vnode *vp, vfs_context_t context) +{ + struct cnode *cp; + struct filefork *fp; + struct hfsmount *hfsmp; + off_t leof; + u_long blks, blocksize; + + cp = VTOC(vp); + fp = VTOF(vp); + hfsmp = VTOHFS(vp); + leof = fp->ff_size; + + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (fp->ff_blocks == 0)) + return (0); + + hfs_unlock(cp); + (void) cluster_push(vp, IO_CLOSE); + hfs_lock(cp, HFS_FORCE_LOCK); + + /* + * Explicitly zero out the areas of file + * that are currently marked invalid. + */ + while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { + struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); + off_t start = invalid_range->rl_start; + off_t end = invalid_range->rl_end; + + /* The range about to be written must be validated + * first, so that VNOP_BLOCKMAP() will return the + * appropriate mapping for the cluster code: + */ + rl_remove(start, end, &fp->ff_invalidranges); + + hfs_unlock(cp); + (void) cluster_write(vp, (struct uio *) 0, + leof, end + 1, start, (off_t)0, + IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_flag |= C_MODIFIED; + } + cp->c_flag &= ~C_ZFWANTSYNC; + cp->c_zftimeout = 0; + blocksize = VTOVCB(vp)->blockSize; + blks = leof / blocksize; + if (((off_t)blks * (off_t)blocksize) != leof) + blks++; + /* + * Shrink the peof to the smallest size neccessary to contain the leof. + */ + if (blks < fp->ff_blocks) + (void) hfs_truncate(vp, leof, IO_NDELAY, 0, context); + hfs_unlock(cp); + (void) cluster_push(vp, IO_CLOSE); + hfs_lock(cp, HFS_FORCE_LOCK); + + /* + * If the hfs_truncate didn't happen to flush the vnode's + * information out to disk, force it to be updated now that + * all invalid ranges have been zero-filled and validated: + */ + if (cp->c_flag & C_MODIFIED) { + hfs_update(vp, 0); + } + return (0); +} + /* * Reclaim a cnode so that it can be used for other purposes. */ __private_extern__ int -hfs_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; - } */ *ap; +hfs_vnop_reclaim(struct vnop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct vnode *devvp = NULL; + struct cnode *cp; struct filefork *fp = NULL; struct filefork *altfp = NULL; - int i; + int reclaim_cnode = 0; - if (prtactive && vp->v_usecount != 0) - vprint("hfs_reclaim(): pushing active", vp); + (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + cp = VTOC(vp); /* * Keep track of an inactive hot file. */ - (void) hfs_addhotfile(vp); + if (!vnode_isdir(vp) && !vnode_issystem(vp)) + (void) hfs_addhotfile(vp); - devvp = cp->c_devvp; /* For later releasing */ + vnode_removefsref(vp); /* * Find file fork for this vnode (if any) * Also check if another fork is active */ - if ((fp = cp->c_datafork) && (cp->c_vp == vp)) { + if (cp->c_vp == vp) { + fp = cp->c_datafork; + altfp = cp->c_rsrcfork; + cp->c_datafork = NULL; cp->c_vp = NULL; - altfp = cp->c_rsrcfork; - } else if ((fp = cp->c_rsrcfork) && (cp->c_rsrc_vp == vp)) { + } else if (cp->c_rsrc_vp == vp) { + fp = cp->c_rsrcfork; + altfp = cp->c_datafork; + cp->c_rsrcfork = NULL; cp->c_rsrc_vp = NULL; - if (VPARENT(vp) == cp->c_vp) { - cp->c_flag &= ~C_VPREFHELD; - } - altfp = cp->c_datafork; } else { - cp->c_vp = NULL; - fp = NULL; - altfp = NULL; + panic("hfs_vnop_reclaim: vp points to wrong cnode\n"); } - /* * On the last fork, remove the cnode from its hash chain. */ - if (altfp == NULL) - hfs_chashremove(cp); - - /* Release the file fork and related data (can block) */ + if (altfp == NULL) { + /* If we can't remove it then the cnode must persist! */ + if (hfs_chashremove(cp) == 0) + reclaim_cnode = 1; + /* + * Remove any directory hints + */ + if (vnode_isdir(vp)) { + hfs_reldirhints(cp, 0); + } + } + /* Release the file fork and related data */ if (fp) { - fp->ff_cp = NULL; /* Dump cached symlink data */ - if ((vp->v_type == VLNK) && (fp->ff_symlinkptr != NULL)) { + if (vnode_islnk(vp) && (fp->ff_symlinkptr != NULL)) { FREE(fp->ff_symlinkptr, M_TEMP); - fp->ff_symlinkptr = NULL; - } + } FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); - fp = NULL; } - /* - * Purge old data structures associated with the cnode. - */ - cache_purge(vp); - if (devvp && altfp == NULL) { - cp->c_devvp = NULL; - vrele(devvp); - } - - vp->v_data = NULL; - /* * If there was only one active fork then we can release the cnode. */ - if (altfp == NULL) { -#if QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - if (cp->c_dquot[i] != NODQUOT) { - dqreclaim(vp, cp->c_dquot[i]); - cp->c_dquot[i] = NODQUOT; - } - } -#endif /* QUOTA */ - /* - * Free any left over directory indices - */ - if (vp->v_type == VDIR) - hfs_relnamehints(cp); - - /* - * If the descriptor has a name then release it - */ - if (cp->c_desc.cd_flags & CD_HASBUF) { - char *nameptr; - - nameptr = cp->c_desc.cd_nameptr; - cp->c_desc.cd_nameptr = 0; - cp->c_desc.cd_flags &= ~CD_HASBUF; - cp->c_desc.cd_namelen = 0; - remove_name(nameptr); - } - CLR(cp->c_flag, (C_ALLOC | C_TRANSIT)); - if (ISSET(cp->c_flag, C_WALLOC) || ISSET(cp->c_flag, C_WTRANSIT)) - wakeup(cp); - FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); - + if (reclaim_cnode) { + hfs_chashwakeup(cp, H_ALLOC | H_TRANSIT); + hfs_reclaim_cnode(cp); + } else /* cnode in use */ { + hfs_unlock(cp); } + vnode_clearfsnode(vp); return (0); } -/* - * get a cnode - * - * called by hfs_lookup and hfs_vget (descp == NULL) - * - * returns a locked vnode for cnode for given cnid/fileid - */ -__private_extern__ -int -hfs_getcnode(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *descp, int wantrsrc, - struct cat_attr *attrp, struct cat_fork *forkp, struct vnode **vpp) -{ - dev_t dev = hfsmp->hfs_raw_dev; - struct vnode *vp = NULL; - struct vnode *rvp = NULL; - struct vnode *new_vp = NULL; - struct cnode *cp = NULL; - struct proc *p = current_proc(); - int retval = E_NONE; - - /* Check if unmount in progress */ - if (HFSTOVFS(hfsmp)->mnt_kern_flag & MNTK_UNMOUNT) { - *vpp = NULL; - return (EPERM); - } - - /* - * Check the hash for an active cnode - */ - cp = hfs_chashget(dev, cnid, wantrsrc, &vp, &rvp); - if (cp != NULL) { - /* hide open files that have been deleted */ - if ((hfsmp->hfs_privdir_desc.cd_cnid != 0) - && (cp->c_parentcnid == hfsmp->hfs_privdir_desc.cd_cnid) - && (cp->c_nlink == 0)) { - retval = ENOENT; - goto exit; - } - - /* Hide private journal files */ - if (hfsmp->jnl && - (cp->c_parentcnid == kRootDirID) && - ((cp->c_cnid == hfsmp->hfs_jnlfileid) || - (cp->c_cnid == hfsmp->hfs_jnlinfoblkid))) { - retval = ENOENT; - goto exit; - } - - if (wantrsrc && rvp != NULL) { - vp = rvp; - rvp = NULL; - goto done; - } - if (!wantrsrc && vp != NULL) { - /* Hardlinks need an updated catalog descriptor */ - if (descp && cp->c_flag & C_HARDLINK) { - replace_desc(cp, descp); - } - /* We have a vnode so we're done. */ - goto done; - } - } - - /* - * There was no active vnode so get a new one. - * Use the existing cnode (if any). - */ - if (descp != NULL) { - /* - * hfs_lookup case, use descp, attrp and forkp - */ - retval = hfs_getnewvnode(hfsmp, cp, descp, wantrsrc, attrp, - forkp, &new_vp); - } else { - struct cat_desc cndesc = {0}; - struct cat_attr cnattr = {0}; - struct cat_fork cnfork = {0}; - - /* - * hfs_vget case, need to lookup entry (by file id) - */ - if (cnid == kRootParID) { - static char hfs_rootname[] = "/"; - - cndesc.cd_nameptr = &hfs_rootname[0]; - cndesc.cd_namelen = 1; - cndesc.cd_parentcnid = kRootParID; - cndesc.cd_cnid = kRootParID; - cndesc.cd_flags = CD_ISDIR; - - cnattr.ca_fileid = kRootParID; - cnattr.ca_nlink = 2; - cnattr.ca_entries = 1; - cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO); - } else { - /* Lock catalog b-tree */ - retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (retval) - goto exit; - - retval = cat_idlookup(hfsmp, cnid, &cndesc, &cnattr, &cnfork); - - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (retval) - goto exit; - - /* Hide open files that have been deleted */ - if ((hfsmp->hfs_privdir_desc.cd_cnid != 0) && - (cndesc.cd_parentcnid == hfsmp->hfs_privdir_desc.cd_cnid) && - (cnattr.ca_nlink == 0)) { - cat_releasedesc(&cndesc); - retval = ENOENT; - goto exit; - } - } - - retval = hfs_getnewvnode(hfsmp, cp, &cndesc, 0, &cnattr, &cnfork, &new_vp); - - /* Hardlinks may need an updated catalog descriptor */ - if (retval == 0 - && new_vp - && (VTOC(new_vp)->c_flag & C_HARDLINK) - && cndesc.cd_nameptr - && cndesc.cd_namelen > 0) { - replace_desc(VTOC(new_vp), &cndesc); - } - - cat_releasedesc(&cndesc); - } - -exit: - /* Release reference taken on opposite vnode (if any). */ - if (vp) - vrele(vp); - else if (rvp) - vrele(rvp); - - if (retval) { - *vpp = NULL; - return (retval); - } - vp = new_vp; -done: - /* The cnode's vnode should be in vp. */ - if (vp == NULL) - panic("hfs_getcnode: missing vp!"); - - if (UBCISVALID(vp)) - UBCINFOCHECK("hfs_getcnode", vp); - *vpp = vp; - return (0); -} - +extern int (**hfs_vnodeop_p) (void *); +extern int (**hfs_specop_p) (void *); +extern int (**hfs_fifoop_p) (void *); /* * hfs_getnewvnode - get new default vnode * - * the vnode is returned locked + * The vnode is returned with an iocount and the cnode locked */ -extern int (**hfs_vnodeop_p) (void *); -extern int (**hfs_specop_p) (void *); -extern int (**hfs_fifoop_p) (void *); - __private_extern__ int -hfs_getnewvnode(struct hfsmount *hfsmp, struct cnode *cp, - struct cat_desc *descp, int wantrsrc, - struct cat_attr *attrp, struct cat_fork *forkp, +hfs_getnewvnode( + struct hfsmount *hfsmp, + struct vnode *dvp, + struct componentname *cnp, + struct cat_desc *descp, + int wantrsrc, + struct cat_attr *attrp, + struct cat_fork *forkp, struct vnode **vpp) { struct mount *mp = HFSTOVFS(hfsmp); struct vnode *vp = NULL; - struct vnode *rvp = NULL; - struct vnode *new_vp = NULL; - struct cnode *cp2 = NULL; + struct vnode **cvpp; + struct vnode *tvp = NULLVP; + struct cnode *cp = NULL; struct filefork *fp = NULL; - int allocated = 0; int i; int retval; - dev_t dev; - struct proc *p = current_proc(); -#if 0 - /* Bail when unmount is in progress */ - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + int issystemfile; + struct vnode_fsparam vfsp; + enum vtype vtype; + + if (attrp->ca_fileid == 0) { *vpp = NULL; - return (EPERM); + return (ENOENT); } -#endif #if !FIFO if (IFTOVT(attrp->ca_mode) == VFIFO) { *vpp = NULL; - return (EOPNOTSUPP); + return (ENOTSUP); } #endif - dev = hfsmp->hfs_raw_dev; - - /* If no cnode was passed in then create one */ - if (cp == NULL) { - MALLOC_ZONE(cp2, struct cnode *, sizeof(struct cnode), - M_HFSNODE, M_WAITOK); - bzero(cp2, sizeof(struct cnode)); - allocated = 1; - SET(cp2->c_flag, C_ALLOC); - cp2->c_cnid = descp->cd_cnid; - cp2->c_fileid = attrp->ca_fileid; - if (cp2->c_fileid == 0) { - FREE_ZONE(cp2, sizeof(struct cnode), M_HFSNODE); - *vpp = NULL; - return (ENOENT); - } - cp2->c_dev = dev; - lockinit(&cp2->c_lock, PINOD, "cnode", 0, 0); - (void) lockmgr(&cp2->c_lock, LK_EXCLUSIVE, (struct slock *)0, p); - /* - * There were several blocking points since we first - * checked the hash. Now that we're through blocking, - * check the hash again in case we're racing for the - * same cnode. - */ - cp = hfs_chashget(dev, attrp->ca_fileid, wantrsrc, &vp, &rvp); - if (cp != NULL) { - /* We lost the race - use the winner's cnode */ - FREE_ZONE(cp2, sizeof(struct cnode), M_HFSNODE); - allocated = 0; - if (wantrsrc && rvp != NULL) { - *vpp = rvp; - return (0); - } - if (!wantrsrc && vp != NULL) { - *vpp = vp; - return (0); - } - } else /* allocated */ { - cp = cp2; - hfs_chashinsert(cp); - } + vtype = IFTOVT(attrp->ca_mode); + issystemfile = (descp->cd_flags & CD_ISMETA) && (vtype == VREG); + + /* + * Get a cnode (new or existing) + * skip getting the cnode lock if we are getting resource fork (wantrsrc == 2) + */ + cp = hfs_chash_getcnode(hfsmp->hfs_raw_dev, attrp->ca_fileid, vpp, wantrsrc, (wantrsrc == 2)); + + /* Hardlinks may need an updated catalog descriptor */ + if ((cp->c_flag & C_HARDLINK) && descp->cd_nameptr && descp->cd_namelen > 0) { + replace_desc(cp, descp); } + /* Check if we found a matching vnode */ + if (*vpp != NULL) + return (0); - /* Allocate a new vnode. If unsuccesful, leave after freeing memory */ - if ((retval = getnewvnode(VT_HFS, mp, hfs_vnodeop_p, &new_vp))) { - if (allocated) { - hfs_chashremove(cp); - if (ISSET(cp->c_flag, C_WALLOC)) { - CLR(cp->c_flag, C_WALLOC); - wakeup(cp); - } - FREE_ZONE(cp2, sizeof(struct cnode), M_HFSNODE); - allocated = 0; - } else if (rvp) { - vput(rvp); - } else if (vp) { - vput(vp); + /* + * If this is a new cnode then initialize it. + */ + if (ISSET(cp->c_hflag, H_ALLOC)) { + lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr); + + /* Make sure its still valid (ie exists on disk). */ + if (!hfs_valid_cnode(hfsmp, dvp, cnp, cp->c_fileid)) { + hfs_chash_abort(cp); + hfs_reclaim_cnode(cp); + *vpp = NULL; + return (ENOENT); } - *vpp = NULL; - return (retval); - } - if (allocated) { bcopy(attrp, &cp->c_attr, sizeof(struct cat_attr)); bcopy(descp, &cp->c_desc, sizeof(struct cat_desc)); - } - new_vp->v_data = cp; - if (wantrsrc && S_ISREG(cp->c_mode)) - cp->c_rsrc_vp = new_vp; - else - cp->c_vp = new_vp; - - /* Release reference taken on opposite vnode (if any). */ - if (rvp) - vrele(rvp); - if (vp) - vrele(vp); - - vp = new_vp; - vp->v_ubcinfo = UBC_NOINFO; - /* - * If this is a new cnode then initialize it using descp and attrp... - */ - if (allocated) { /* The name was inherited so clear descriptor state... */ descp->cd_namelen = 0; descp->cd_nameptr = NULL; @@ -613,7 +519,7 @@ hfs_getnewvnode(struct hfsmount *hfsmp, struct cnode *cp, /* Take one dev reference for each non-directory cnode */ if (IFTOVT(cp->c_mode) != VDIR) { cp->c_devvp = hfsmp->hfs_devvp; - VREF(cp->c_devvp); + vnode_ref(cp->c_devvp); } #if QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -621,7 +527,11 @@ hfs_getnewvnode(struct hfsmount *hfsmp, struct cnode *cp, #endif /* QUOTA */ } - if (IFTOVT(cp->c_mode) != VDIR) { + if (IFTOVT(cp->c_mode) == VDIR) { + if (cp->c_vp != NULL) + panic("hfs_getnewvnode: orphaned vnode (data)"); + cvpp = &cp->c_vp; + } else { if (forkp && attrp->ca_blocks < forkp->cf_blocks) panic("hfs_getnewvnode: bad ca_blocks (too small)"); /* @@ -629,89 +539,578 @@ hfs_getnewvnode(struct hfsmount *hfsmp, struct cnode *cp, */ MALLOC_ZONE(fp, struct filefork *, sizeof(struct filefork), M_HFSFORK, M_WAITOK); - bzero(fp, sizeof(struct filefork)); fp->ff_cp = cp; if (forkp) bcopy(forkp, &fp->ff_data, sizeof(struct cat_fork)); + else + bzero(&fp->ff_data, sizeof(struct cat_fork)); rl_init(&fp->ff_invalidranges); + fp->ff_sysfileinfo = 0; + if (wantrsrc) { if (cp->c_rsrcfork != NULL) - panic("stale rsrc fork"); + panic("hfs_getnewvnode: orphaned rsrc fork"); + if (cp->c_rsrc_vp != NULL) + panic("hfs_getnewvnode: orphaned vnode (rsrc)"); cp->c_rsrcfork = fp; + cvpp = &cp->c_rsrc_vp; + if ( (tvp = cp->c_vp) != NULLVP ) + cp->c_flag |= C_NEED_DVNODE_PUT; } else { if (cp->c_datafork != NULL) - panic("stale data fork"); + panic("hfs_getnewvnode: orphaned data fork"); + if (cp->c_vp != NULL) + panic("hfs_getnewvnode: orphaned vnode (data)"); cp->c_datafork = fp; + cvpp = &cp->c_vp; + if ( (tvp = cp->c_rsrc_vp) != NULLVP) + cp->c_flag |= C_NEED_RVNODE_PUT; } } + if (tvp != NULLVP) { + /* + * grab an iocount on the vnode we weren't + * interested in (i.e. we want the resource fork + * but the cnode already has the data fork) + * to prevent it from being + * recycled by us when we call vnode_create + * which will result in a deadlock when we + * try to take the cnode lock in hfs_vnop_fsync or + * hfs_vnop_reclaim... vnode_get can be called here + * because we already hold the cnode lock which will + * prevent the vnode from changing identity until + * we drop it.. vnode_get will not block waiting for + * a change of state... however, it will return an + * error if the current iocount == 0 and we've already + * started to terminate the vnode... we don't need/want to + * grab an iocount in the case since we can't cause + * the fileystem to be re-entered on this thread for this vp + * + * the matching vnode_put will happen in hfs_unlock + * after we've dropped the cnode lock + */ + if ( vnode_get(tvp) != 0) + cp->c_flag &= ~(C_NEED_RVNODE_PUT | C_NEED_DVNODE_PUT); + } + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "hfs"; + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_fsnode = cp; + vfsp.vnfs_cnp = cnp; + if (vtype == VFIFO ) + vfsp.vnfs_vops = hfs_fifoop_p; + else if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_vops = hfs_specop_p; + else + vfsp.vnfs_vops = hfs_vnodeop_p; + + if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_rdev = attrp->ca_rdev; + else + vfsp.vnfs_rdev = 0; - /* - * Finish vnode initialization. - * Setting the v_type 'stamps' the vnode as 'complete', - * so should be done almost last. - * - * At this point the vnode should be locked and fully - * allocated. And ready to be used or accessed. (though - * having it locked prevents most of this, it can still - * be accessed through lists and hashes). - */ - vp->v_type = IFTOVT(cp->c_mode); + if (forkp) + vfsp.vnfs_filesize = forkp->cf_size; + else + vfsp.vnfs_filesize = 0; + + if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) + vfsp.vnfs_flags = 0; + else + vfsp.vnfs_flags = VNFS_NOCACHE; /* Tag system files */ - if ((descp->cd_flags & CD_ISMETA) && (vp->v_type == VREG)) - vp->v_flag |= VSYSTEM; + vfsp.vnfs_marksystem = issystemfile; + /* Tag root directory */ - if (cp->c_cnid == kRootDirID) - vp->v_flag |= VROOT; + if (descp->cd_cnid == kHFSRootFolderID) + vfsp.vnfs_markroot = 1; + else + vfsp.vnfs_markroot = 0; + + if ((retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, cvpp))) { + if (fp) { + if (fp == cp->c_datafork) + cp->c_datafork = NULL; + else + cp->c_rsrcfork = NULL; + + FREE_ZONE(fp, sizeof(struct filefork), M_HFSFORK); + } + /* + * If this is a newly created cnode or a vnode reclaim + * occurred during the attachment, then cleanup the cnode. + */ + if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) { + hfs_chash_abort(cp); + hfs_reclaim_cnode(cp); + } else { + hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); + hfs_unlock(cp); + } + *vpp = NULL; + return (retval); + } + vp = *cvpp; + vnode_addfsref(vp); + vnode_settag(vp, VT_HFS); + if (cp->c_flag & C_HARDLINK) + vnode_set_hard_link(vp); + hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); + + /* + * Stop tracking an active hot file. + */ + if (!vnode_isdir(vp) && !vnode_issystem(vp)) + (void) hfs_removehotfile(vp); + + *vpp = vp; + return (0); +} + + +static void +hfs_reclaim_cnode(struct cnode *cp) +{ +#if QUOTA + int i; + + for (i = 0; i < MAXQUOTAS; i++) { + if (cp->c_dquot[i] != NODQUOT) { + dqreclaim(cp->c_dquot[i]); + cp->c_dquot[i] = NODQUOT; + } + } +#endif /* QUOTA */ + + if (cp->c_devvp) { + struct vnode *tmp_vp = cp->c_devvp; + + cp->c_devvp = NULL; + vnode_rele(tmp_vp); + } + + /* + * If the descriptor has a name then release it + */ + if (cp->c_desc.cd_flags & CD_HASBUF) { + char *nameptr; + + nameptr = cp->c_desc.cd_nameptr; + cp->c_desc.cd_nameptr = 0; + cp->c_desc.cd_flags &= ~CD_HASBUF; + cp->c_desc.cd_namelen = 0; + vfs_removename(nameptr); + } + + lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group); + lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group); + bzero(cp, sizeof(struct cnode)); + FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); +} + + +static int +hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid) +{ + struct cat_attr attr; + struct cat_desc cndesc; + int stillvalid = 0; + int lockflags; - if ((vp->v_type == VREG) && !(vp->v_flag & VSYSTEM) - && (UBCINFOMISSING(vp) || UBCINFORECLAIMED(vp))) { - ubc_info_init(vp); + /* System files are always valid */ + if (cnid < kHFSFirstUserCatalogNodeID) + return (1); + + /* XXX optimization: check write count in dvp */ + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + if (dvp && cnp) { + bzero(&cndesc, sizeof(cndesc)); + cndesc.cd_nameptr = cnp->cn_nameptr; + cndesc.cd_namelen = cnp->cn_namelen; + cndesc.cd_parentcnid = VTOC(dvp)->c_cnid; + cndesc.cd_hint = VTOC(dvp)->c_childhint; + + if ((cat_lookup(hfsmp, &cndesc, 0, NULL, &attr, NULL, NULL) == 0) && + (cnid == attr.ca_fileid)) { + stillvalid = 1; + } } else { - vp->v_ubcinfo = UBC_NOINFO; + if (cat_idlookup(hfsmp, cnid, NULL, NULL, NULL) == 0) { + stillvalid = 1; + } + } + hfs_systemfile_unlock(hfsmp, lockflags); + + return (stillvalid); +} + +/* + * Touch cnode times based on c_touch_xxx flags + * + * cnode must be locked exclusive + * + * This will also update the volume modify time + */ +__private_extern__ +void +hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) +{ + /* HFS Standard doesn't support access times */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + cp->c_touch_acctime = FALSE; } - if (vp->v_type == VCHR || vp->v_type == VBLK) { - struct vnode *nvp; + if (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { + struct timeval tv; + int touchvol = 0; - vp->v_op = hfs_specop_p; - if ((nvp = checkalias(vp, cp->c_rdev, mp))) { + microtime(&tv); + + if (cp->c_touch_acctime) { + cp->c_atime = tv.tv_sec; /* - * Discard unneeded vnode, but save its cnode. - * Note that the lock is carried over in the - * cnode to the replacement vnode. + * When the access time is the only thing changing + * then make sure its sufficiently newer before + * committing it to disk. */ - nvp->v_data = vp->v_data; - vp->v_data = NULL; - vp->v_op = spec_vnodeop_p; - vrele(vp); - vgone(vp); + if ((((u_int32_t)cp->c_atime - (u_int32_t)(cp)->c_attr.ca_atimeondisk) > + ATIME_ONDISK_ACCURACY)) { + cp->c_flag |= C_MODIFIED; + } + cp->c_touch_acctime = FALSE; + } + if (cp->c_touch_modtime) { + cp->c_mtime = tv.tv_sec; + cp->c_touch_modtime = FALSE; + cp->c_flag |= C_MODIFIED; + touchvol = 1; +#if 1 /* - * Reinitialize aliased cnode. - * Assume its not a resource fork. + * HFS dates that WE set must be adjusted for DST */ - cp->c_vp = nvp; - vp = nvp; - } - } else if (vp->v_type == VFIFO) { -#if FIFO - vp->v_op = hfs_fifoop_p; + if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { + cp->c_mtime += 3600; + } #endif + } + if (cp->c_touch_chgtime) { + cp->c_ctime = tv.tv_sec; + cp->c_touch_chgtime = FALSE; + cp->c_flag |= C_MODIFIED; + touchvol = 1; + } + + /* Touch the volume modtime if needed */ + if (touchvol) { + HFSTOVCB(hfsmp)->vcbFlags |= 0xFF00; + HFSTOVCB(hfsmp)->vcbLsMod = tv.tv_sec; + } } +} + +/* + * Lock a cnode. + */ +__private_extern__ +int +hfs_lock(struct cnode *cp, enum hfslocktype locktype) +{ + void * thread = current_thread(); + + /* System files need to keep track of owner */ + if ((cp->c_fileid < kHFSFirstUserCatalogNodeID) && + (cp->c_fileid > kHFSRootFolderID) && + (locktype != HFS_SHARED_LOCK)) { + /* + * The extents and bitmap file locks support + * recursion and are always taken exclusive. + */ + if (cp->c_fileid == kHFSExtentsFileID || + cp->c_fileid == kHFSAllocationFileID) { + if (cp->c_lockowner == thread) { + cp->c_syslockcount++; + } else { + lck_rw_lock_exclusive(&cp->c_rwlock); + cp->c_lockowner = thread; + cp->c_syslockcount = 1; + } + } else { + lck_rw_lock_exclusive(&cp->c_rwlock); + cp->c_lockowner = thread; + } + } else if (locktype == HFS_SHARED_LOCK) { + lck_rw_lock_shared(&cp->c_rwlock); + cp->c_lockowner = HFS_SHARED_OWNER; + } else { + lck_rw_lock_exclusive(&cp->c_rwlock); + cp->c_lockowner = thread; + } /* - * Stop tracking an active hot file. + * Skip cnodes that no longer exist (were deleted). */ - (void) hfs_removehotfile(vp); + if ((locktype != HFS_FORCE_LOCK) && + ((cp->c_desc.cd_flags & CD_ISMETA) == 0) && + (cp->c_flag & C_NOEXISTS)) { + hfs_unlock(cp); + return (ENOENT); + } + return (0); +} - /* Vnode is now initialized - see if anyone was waiting for it. */ - CLR(cp->c_flag, C_ALLOC); - if (ISSET(cp->c_flag, C_WALLOC)) { - CLR(cp->c_flag, C_WALLOC); - wakeup((caddr_t)cp); +/* + * Lock a pair of cnodes. + */ +__private_extern__ +int +hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) +{ + struct cnode *first, *last; + int error; + + /* + * If cnodes match then just lock one. + */ + if (cp1 == cp2) { + return hfs_lock(cp1, locktype); } - *vpp = vp; + /* + * Lock in cnode parent-child order (if there is a relationship); + * otherwise lock in cnode address order. + */ + if ((IFTOVT(cp1->c_mode) == VDIR) && (cp1->c_fileid == cp2->c_parentcnid)) { + first = cp1; + last = cp2; + } else if (cp1 < cp2) { + first = cp1; + last = cp2; + } else { + first = cp2; + last = cp1; + } + + if ( (error = hfs_lock(first, locktype))) { + return (error); + } + if ( (error = hfs_lock(last, locktype))) { + hfs_unlock(first); + return (error); + } return (0); } +/* + * Check ordering of two cnodes. Return true if they are are in-order. + */ +static int +hfs_isordered(struct cnode *cp1, struct cnode *cp2) +{ + if (cp1 == cp2) + return (0); + if (cp1 == NULL || cp2 == (struct cnode *)0xffffffff) + return (1); + if (cp2 == NULL || cp1 == (struct cnode *)0xffffffff) + return (0); + if (cp1->c_fileid == cp2->c_parentcnid) + return (1); /* cp1 is the parent and should go first */ + if (cp2->c_fileid == cp1->c_parentcnid) + return (0); /* cp1 is the child and should go last */ + + return (cp1 < cp2); /* fall-back is to use address order */ +} + +/* + * Acquire 4 cnode locks. + * - locked in cnode parent-child order (if there is a relationship) + * otherwise lock in cnode address order (lesser address first). + * - all or none of the locks are taken + * - only one lock taken per cnode (dup cnodes are skipped) + * - some of the cnode pointers may be null + */ +__private_extern__ +int +hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, + struct cnode *cp4, enum hfslocktype locktype) +{ + struct cnode * a[3]; + struct cnode * b[3]; + struct cnode * list[4]; + struct cnode * tmp; + int i, j, k; + int error; + + if (hfs_isordered(cp1, cp2)) { + a[0] = cp1; a[1] = cp2; + } else { + a[0] = cp2; a[1] = cp1; + } + if (hfs_isordered(cp3, cp4)) { + b[0] = cp3; b[1] = cp4; + } else { + b[0] = cp4; b[1] = cp3; + } + a[2] = (struct cnode *)0xffffffff; /* sentinel value */ + b[2] = (struct cnode *)0xffffffff; /* sentinel value */ + + /* + * Build the lock list, skipping over duplicates + */ + for (i = 0, j = 0, k = 0; (i < 2 || j < 2); ) { + tmp = hfs_isordered(a[i], b[j]) ? a[i++] : b[j++]; + if (k == 0 || tmp != list[k-1]) + list[k++] = tmp; + } + + /* + * Now we can lock using list[0 - k]. + * Skip over NULL entries. + */ + for (i = 0; i < k; ++i) { + if (list[i]) + if ((error = hfs_lock(list[i], locktype))) { + /* Drop any locks we acquired. */ + while (--i >= 0) { + if (list[i]) + hfs_unlock(list[i]); + } + return (error); + } + } + return (0); +} + + +/* + * Unlock a cnode. + */ +__private_extern__ +void +hfs_unlock(struct cnode *cp) +{ + vnode_t rvp = NULLVP; + vnode_t dvp = NULLVP; + + /* System files need to keep track of owner */ + if ((cp->c_fileid < kHFSFirstUserCatalogNodeID) && + (cp->c_fileid > kHFSRootFolderID) && + (cp->c_datafork != NULL)) { + /* + * The extents and bitmap file locks support + * recursion and are always taken exclusive. + */ + if (cp->c_fileid == kHFSExtentsFileID || + cp->c_fileid == kHFSAllocationFileID) { + if (--cp->c_syslockcount > 0) { + return; + } + } + } + if (cp->c_flag & C_NEED_DVNODE_PUT) + dvp = cp->c_vp; + + if (cp->c_flag & C_NEED_RVNODE_PUT) + rvp = cp->c_rsrc_vp; + + cp->c_flag &= ~(C_NEED_DVNODE_PUT | C_NEED_RVNODE_PUT); + + cp-> c_lockowner = NULL; + lck_rw_done(&cp->c_rwlock); + + if (dvp) + vnode_put(dvp); + if (rvp) + vnode_put(rvp); +} + +/* + * Unlock a pair of cnodes. + */ +__private_extern__ +void +hfs_unlockpair(struct cnode *cp1, struct cnode *cp2) +{ + hfs_unlock(cp1); + if (cp2 != cp1) + hfs_unlock(cp2); +} + +/* + * Unlock a group of cnodes. + */ +__private_extern__ +void +hfs_unlockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode *cp4) +{ + struct cnode * list[4]; + int i, k = 0; + + if (cp1) { + hfs_unlock(cp1); + list[k++] = cp1; + } + if (cp2) { + for (i = 0; i < k; ++i) { + if (list[i] == cp2) + goto skip1; + } + hfs_unlock(cp2); + list[k++] = cp2; + } +skip1: + if (cp3) { + for (i = 0; i < k; ++i) { + if (list[i] == cp3) + goto skip2; + } + hfs_unlock(cp3); + list[k++] = cp3; + } +skip2: + if (cp4) { + for (i = 0; i < k; ++i) { + if (list[i] == cp4) + return; + } + hfs_unlock(cp4); + } +} + + +/* + * Protect a cnode against a truncation. + * + * Used mainly by read/write since they don't hold the + * cnode lock across calls to the cluster layer. + * + * The process doing a truncation must take the lock + * exclusive. The read/write processes can take it + * non-exclusive. + */ +__private_extern__ +void +hfs_lock_truncate(struct cnode *cp, int exclusive) +{ + if (cp->c_lockowner == current_thread()) + panic("hfs_lock_truncate: cnode 0x%08x locked!", cp); + + if (exclusive) + lck_rw_lock_exclusive(&cp->c_truncatelock); + else + lck_rw_lock_shared(&cp->c_truncatelock); +} + +__private_extern__ +void +hfs_unlock_truncate(struct cnode *cp) +{ + lck_rw_done(&cp->c_truncatelock); +} + + + + diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 38ca8996e..64d2fd70d 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,12 +27,13 @@ #ifdef KERNEL #ifdef __APPLE_API_PRIVATE #include -#include #include #include #include #include +#include + #include #include @@ -42,45 +43,35 @@ * Reading or writing any of these fields requires holding cnode lock. */ struct filefork { - struct cnode *ff_cp; /* cnode associated with this fork */ - struct rl_head ff_invalidranges; /* Areas of disk that should read back as zeroes */ - long ff_evtonly_refs; /* number of vnode references used solely for events (O_EVTONLY) */ + struct cnode *ff_cp; /* cnode associated with this fork */ + struct rl_head ff_invalidranges; /* Areas of disk that should read back as zeroes */ union { - struct hfslockf *ffu_lockf; /* Head of byte-level lock list. */ - void *ffu_sysdata; /* private data for system files */ - char *ffu_symlinkptr; /* symbolic link pathname */ - } ff_un; - struct cat_fork ff_data; + void *ffu_sysfileinfo; /* additional info for system files */ + char *ffu_symlinkptr; /* symbolic link pathname */ + } ff_union; + struct cat_fork ff_data; /* fork data (size, extents) */ }; typedef struct filefork filefork_t; /* Aliases for common fields */ -#define ff_size ff_data.cf_size -#define ff_clumpsize ff_data.cf_clump -#define ff_bytesread ff_data.cf_bytesread -#define ff_blocks ff_data.cf_blocks -#define ff_extents ff_data.cf_extents +#define ff_size ff_data.cf_size +#define ff_clumpsize ff_data.cf_clump +#define ff_bytesread ff_data.cf_bytesread +#define ff_blocks ff_data.cf_blocks +#define ff_extents ff_data.cf_extents #define ff_unallocblocks ff_data.cf_vblocks -#define ff_symlinkptr ff_un.ffu_symlinkptr -#define ff_lockf ff_un.ffu_lockf +#define ff_symlinkptr ff_union.ffu_symlinkptr +#define ff_sysfileinfo ff_union.ffu_sysfileinfo /* The btree code still needs these... */ -#define fcbEOF ff_size -#define fcbExtents ff_extents -#define fcbBTCBPtr ff_un.ffu_sysdata - - -/* - * Directory index entry - */ -struct hfs_index { - SLIST_ENTRY(hfs_index) hi_link; - int hi_index; - char hi_name[1]; -}; +#define fcbEOF ff_size +#define fcbExtents ff_extents +#define fcbBTCBPtr ff_sysfileinfo +typedef u_int8_t atomicflag_t; + /* * The cnode is used to represent each active (or recently active) * file or directory in the HFS filesystem. @@ -88,22 +79,32 @@ struct hfs_index { * Reading or writing any of these fields requires holding c_lock. */ struct cnode { - struct lock__bsd__ c_lock; /* cnode's lock */ + lck_rw_t c_rwlock; /* cnode's lock */ + void * c_lockowner; /* cnode's lock owner (exclusive case only) */ + lck_rw_t c_truncatelock; /* protects file from truncation during read/write */ LIST_ENTRY(cnode) c_hash; /* cnode's hash chain */ u_int32_t c_flag; /* cnode's runtime flags */ + u_int32_t c_hflag; /* cnode's flags for maintaining hash - protected by global hash lock */ struct vnode *c_vp; /* vnode for data fork or dir */ struct vnode *c_rsrc_vp; /* vnode for resource fork */ struct vnode *c_devvp; /* vnode for block I/O */ dev_t c_dev; /* cnode's device */ struct dquot *c_dquot[MAXQUOTAS]; /* cnode's quota info */ struct klist c_knotes; /* knotes attached to this vnode */ - cnid_t c_childhint; /* catalog hint for children */ + u_long c_childhint; /* catalog hint for children */ struct cat_desc c_desc; /* cnode's descriptor */ struct cat_attr c_attr; /* cnode's attributes */ - SLIST_HEAD(hfs_indexhead, hfs_index) c_indexlist; /* directory index list */ - long c_evtonly_refs; /* number of vnode references used solely for events (O_EVTONLY) */ + SLIST_HEAD(hfs_hinthead, directoryhint) c_hintlist; /* directory hint list */ + int16_t c_dirhinttag; /* directory hint tag */ + union { + int16_t cu_dirhintcnt; /* directory hint count */ + int16_t cu_syslockcount; /* system file use only */ + } c_union; struct filefork *c_datafork; /* cnode's data fork */ struct filefork *c_rsrcfork; /* cnode's rsrc fork */ + atomicflag_t c_touch_acctime; + atomicflag_t c_touch_chgtime; + atomicflag_t c_touch_modtime; }; typedef struct cnode cnode_t; @@ -121,40 +122,40 @@ typedef struct cnode cnode_t; #define c_rdev c_attr.ca_rdev #define c_atime c_attr.ca_atime #define c_mtime c_attr.ca_mtime -#define c_mtime_nsec c_attr.ca_mtime_nsec #define c_ctime c_attr.ca_ctime #define c_itime c_attr.ca_itime #define c_btime c_attr.ca_btime #define c_flags c_attr.ca_flags #define c_finderinfo c_attr.ca_finderinfo #define c_blocks c_attr.ca_blocks +#define c_attrblks c_attr.ca_attrblks #define c_entries c_attr.ca_entries #define c_zftimeout c_childhint +#define c_dirhintcnt c_union.cu_dirhintcnt +#define c_syslockcount c_union.cu_syslockcount -/* Runtime cnode flags (kept in c_flag) */ -#define C_ACCESS 0x00001 /* Access time update request */ -#define C_CHANGE 0x00002 /* Change time update request */ -#define C_UPDATE 0x00004 /* Modification time update request */ -#define C_MODIFIED 0x00008 /* CNode has been modified */ -#define C_RELOCATING 0x00010 /* CNode's fork is being relocated */ -#define C_NOEXISTS 0x00020 /* CNode has been deleted, catalog entry is gone */ -#define C_DELETED 0x00040 /* CNode has been marked to be deleted */ -#define C_HARDLINK 0x00080 /* CNode is a hard link */ +/* hash maintenance flags kept in c_hflag and protected by hfs_chash_mutex */ +#define H_ALLOC 0x00001 /* CNode is being allocated */ +#define H_ATTACH 0x00002 /* CNode is being attached to by another vnode */ +#define H_TRANSIT 0x00004 /* CNode is getting recycled */ +#define H_WAITING 0x00008 /* CNode is being waited for */ + -#define C_ALLOC 0x00100 /* CNode is being allocated */ -#define C_WALLOC 0x00200 /* Waiting for allocation to finish */ -#define C_TRANSIT 0x00400 /* CNode is getting recycled */ -#define C_WTRANSIT 0x00800 /* Waiting for cnode getting recycled */ -#define C_NOBLKMAP 0x01000 /* CNode blocks cannot be mapped */ -#define C_WBLKMAP 0x02000 /* Waiting for block map */ +/* Runtime cnode flags (kept in c_flag) */ +#define C_NEED_RVNODE_PUT 0x00001 /* Need to do a vnode_put on c_rsrc_vp after the unlock */ +#define C_NEED_DVNODE_PUT 0x00002 /* Need to do a vnode_put on c_vp after the unlock */ +#define C_ZFWANTSYNC 0x00004 /* fsync requested and file has holes */ +#define C_FROMSYNC 0x00008 /* fsync was called from sync */ -#define C_ZFWANTSYNC 0x04000 /* fsync requested and file has holes */ -#define C_VPREFHELD 0x08000 /* resource fork has done a vget() on c_vp (for its parent ptr) */ +#define C_MODIFIED 0x00010 /* CNode has been modified */ +#define C_NOEXISTS 0x00020 /* CNode has been deleted, catalog entry is gone */ +#define C_DELETED 0x00040 /* CNode has been marked to be deleted */ +#define C_HARDLINK 0x00080 /* CNode is a hard link */ -#define C_FROMSYNC 0x10000 /* fsync was called from sync */ -#define C_FORCEUPDATE 0x20000 /* force the catalog entry update */ +#define C_FORCEUPDATE 0x00100 /* force the catalog entry update */ +#define C_HASXATTRS 0x00200 /* cnode has extended attributes */ #define ZFTIMELIMIT (5 * 60) @@ -162,7 +163,7 @@ typedef struct cnode cnode_t; /* * Convert between cnode pointers and vnode pointers */ -#define VTOC(vp) ((struct cnode *)(vp)->v_data) +#define VTOC(vp) ((struct cnode *)vnode_fsnode((vp))) #define CTOV(cp,rsrc) (((rsrc) && S_ISREG((cp)->c_mode)) ? \ (cp)->c_rsrc_vp : (cp)->c_vp) @@ -183,7 +184,6 @@ typedef struct cnode cnode_t; FTOC(fp)->c_rsrc_vp : \ FTOC(fp)->c_vp) -#define EVTONLYREFS(vp) ((vp->v_type == VREG) ? VTOF(vp)->ff_evtonly_refs : VTOC(vp)->c_evtonly_refs) /* * Test for a resource fork @@ -193,57 +193,71 @@ typedef struct cnode cnode_t; #define VNODE_IS_RSRC(vp) ((vp) == VTOC((vp))->c_rsrc_vp) -/* - * CTIMES should be an inline function... - */ -#define C_TIMEMASK (C_ACCESS | C_CHANGE | C_UPDATE) - -#define C_CHANGEMASK (C_ACCESS | C_CHANGE | C_UPDATE | C_MODIFIED) - -#define ATIME_ACCURACY 1 #define ATIME_ONDISK_ACCURACY 300 -#define CTIMES(cp, t1, t2) { \ - if ((cp)->c_flag & C_TIMEMASK) { \ - /* \ - * Only do the update if it is more than just \ - * the C_ACCESS field being updated. \ - */ \ - if (((cp)->c_flag & C_CHANGEMASK) != C_ACCESS) { \ - if ((cp)->c_flag & C_ACCESS) { \ - (cp)->c_atime = (t1)->tv_sec; \ - } \ - if ((cp)->c_flag & C_UPDATE) { \ - (cp)->c_mtime = (t2)->tv_sec; \ - (cp)->c_mtime_nsec = (t2)->tv_usec * 1000; \ - } \ - if ((cp)->c_flag & C_CHANGE) { \ - (cp)->c_ctime = time.tv_sec; \ - } \ - (cp)->c_flag |= C_MODIFIED; \ - (cp)->c_flag &= ~C_TIMEMASK; \ - } \ - } \ -} - -/* This overlays the fid structure (see mount.h). */ + +/* This overlays the FileID portion of NFS file handles. */ struct hfsfid { - u_int16_t hfsfid_len; /* Length of structure. */ - u_int16_t hfsfid_pad; /* Force 32-bit alignment. */ - /* The following data is filesystem-dependent, up to MAXFIDSZ (16) bytes: */ u_int32_t hfsfid_cnid; /* Catalog node ID. */ u_int32_t hfsfid_gen; /* Generation number (create date). */ }; +extern void hfs_touchtimes(struct hfsmount *, struct cnode *); + /* * HFS cnode hash functions. */ extern void hfs_chashinit(void); extern void hfs_chashinsert(struct cnode *cp); -extern void hfs_chashremove(struct cnode *cp); -extern struct cnode * hfs_chashget(dev_t dev, ino_t inum, int wantrsrc, - struct vnode **vpp, struct vnode **rvpp); +extern int hfs_chashremove(struct cnode *cp); +extern void hfs_chash_abort(struct cnode *cp); +extern void hfs_chash_rehash(struct cnode *cp1, struct cnode *cp2); +extern void hfs_chashwakeup(struct cnode *cp, int flags); +extern void hfs_chash_mark_in_transit(struct cnode *cp); + +extern struct vnode * hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock); +extern struct cnode * hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock); +extern int hfs_chash_snoop(dev_t, ino_t, int (*)(const struct cat_desc *, + const struct cat_attr *, void *), void *); + +/* + * HFS directory hint functions. + */ +extern directoryhint_t * hfs_getdirhint(struct cnode *, int); +extern void hfs_reldirhint(struct cnode *, directoryhint_t *); +extern void hfs_reldirhints(struct cnode *, int); + +/* + * HFS cnode lock functions. + * + * HFS Locking Order: + * + * 1. cnode truncate lock (if needed) + * 2. cnode lock (in parent-child order if related, otherwise by address order) + * 3. journal (if needed) + * 4. system files (as needed) + * A. Catalog B-tree file + * B. Attributes B-tree file + * C. Allocation Bitmap file (always exclusive, supports recursion) + * D. Overflow Extents B-tree file (always exclusive, supports recursion) + * 5. hfs mount point (always last) + * + */ +enum hfslocktype {HFS_SHARED_LOCK = 1, HFS_EXCLUSIVE_LOCK = 2, HFS_FORCE_LOCK = 3}; +#define HFS_SHARED_OWNER (void *)0xffffffff + +extern int hfs_lock(struct cnode *, enum hfslocktype); +extern int hfs_lockpair(struct cnode *, struct cnode *, enum hfslocktype); +extern int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *, + enum hfslocktype); + +extern void hfs_unlock(struct cnode *); +extern void hfs_unlockpair(struct cnode *, struct cnode *); +extern void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *); + +extern void hfs_lock_truncate(struct cnode *, int); +extern void hfs_unlock_truncate(struct cnode *); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_dbg.h b/bsd/hfs/hfs_dbg.h index 0b4942f36..fe1d71c89 100644 --- a/bsd/hfs/hfs_dbg.h +++ b/bsd/hfs/hfs_dbg.h @@ -98,7 +98,7 @@ extern int hfs_dbg_err; #define DBG_ERR(x) { \ if(hfs_dbg_all || hfs_dbg_err) { \ - PRINTIT("%X: ", current_proc()->p_pid); \ + PRINTIT("%X: ", proc_selfpid()); \ PRINTIT("HFS ERROR: "); \ PRINTIT x; \ PRINT_DELAY; \ diff --git a/bsd/hfs/hfs_encodinghint.c b/bsd/hfs/hfs_encodinghint.c index 2aede276a..02a1fce32 100644 --- a/bsd/hfs/hfs_encodinghint.c +++ b/bsd/hfs/hfs_encodinghint.c @@ -22,6 +22,7 @@ #include #include +#include /* CJK Mac Encoding Bits */ @@ -42,6 +43,9 @@ u_int8_t cjk_lastunique = 0; u_int32_t hfs_encodingbias = 0; int hfs_islatinbias = 0; +extern lck_mtx_t encodinglst_mutex; + + /* Map CJK bits to Mac encoding */ u_int8_t cjk_encoding[] = { /* 0000 */ kTextEncodingMacUnicode, @@ -889,7 +893,7 @@ hfs_pickencoding(const u_int16_t *src, int len) __private_extern__ u_int32_t -hfs_getencodingbias() +hfs_getencodingbias(void) { return (hfs_encodingbias); } @@ -899,6 +903,8 @@ __private_extern__ void hfs_setencodingbias(u_int32_t bias) { + lck_mtx_lock(&encodinglst_mutex); + hfs_encodingbias = bias; switch (bias) { @@ -914,5 +920,7 @@ hfs_setencodingbias(u_int32_t bias) hfs_islatinbias = 0; break; } + + lck_mtx_unlock(&encodinglst_mutex); } diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c index 4c2229c95..94029ef73 100644 --- a/bsd/hfs/hfs_encodings.c +++ b/bsd/hfs/hfs_encodings.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -31,9 +30,16 @@ #include "hfs.h" +lck_grp_t * encodinglst_lck_grp; +lck_grp_attr_t * encodinglst_lck_grp_attr; +lck_attr_t * encodinglst_lck_attr; + + /* hfs encoding converter list */ SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0}; -decl_simple_lock_data(,hfs_encoding_list_slock); + +lck_mtx_t encodinglst_mutex; + /* hfs encoding converter entry */ @@ -61,7 +67,15 @@ void hfs_converterinit(void) { SLIST_INIT(&hfs_encoding_list); - simple_lock_init(&hfs_encoding_list_slock); + + encodinglst_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(encodinglst_lck_grp_attr); + encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr); + + encodinglst_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(encodinglst_lck_attr); + + lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); /* * add resident MacRoman converter and take a reference @@ -87,7 +101,7 @@ hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, uni MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK); - simple_lock(&hfs_encoding_list_slock); + lck_mtx_lock(&encodinglst_mutex); encp->link.sle_next = NULL; encp->refcount = 0; @@ -97,7 +111,7 @@ hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, uni encp->kmod_id = id; SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link); - simple_unlock(&hfs_encoding_list_slock); + lck_mtx_unlock(&encodinglst_mutex); return (0); } @@ -117,9 +131,8 @@ int hfs_remconverter(int id, UInt32 encoding) { struct hfs_encoding *encp; - int busy = 0; - simple_lock(&hfs_encoding_list_slock); + lck_mtx_lock(&encodinglst_mutex); SLIST_FOREACH(encp, &hfs_encoding_list, link) { if (encp->encoding == encoding && encp->kmod_id == id) { encp->refcount--; @@ -127,16 +140,19 @@ hfs_remconverter(int id, UInt32 encoding) /* if converter is no longer in use, release it */ if (encp->refcount <= 0 && encp->kmod_id != 0) { SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); + lck_mtx_unlock(&encodinglst_mutex); FREE(encp, M_TEMP); + return (0); } else { - busy = 1; + lck_mtx_unlock(&encodinglst_mutex); + return (1); /* busy */ } break; } } - simple_unlock(&hfs_encoding_list_slock); + lck_mtx_unlock(&encodinglst_mutex); - return (busy); + return (0); } @@ -151,7 +167,7 @@ hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to struct hfs_encoding *encp; int found = 0; - simple_lock(&hfs_encoding_list_slock); + lck_mtx_lock(&encodinglst_mutex); SLIST_FOREACH(encp, &hfs_encoding_list, link) { if (encp->encoding == encoding) { found = 1; @@ -161,7 +177,7 @@ hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to break; } } - simple_unlock(&hfs_encoding_list_slock); + lck_mtx_unlock(&encodinglst_mutex); if (!found) { *get_unicode = NULL; @@ -182,12 +198,10 @@ int hfs_relconverter(UInt32 encoding) { struct hfs_encoding *encp; - int found = 0; - simple_lock(&hfs_encoding_list_slock); + lck_mtx_lock(&encodinglst_mutex); SLIST_FOREACH(encp, &hfs_encoding_list, link) { if (encp->encoding == encoding) { - found = 1; encp->refcount--; /* if converter is no longer in use, release it */ @@ -195,19 +209,19 @@ hfs_relconverter(UInt32 encoding) int id = encp->kmod_id; SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); - FREE(encp, M_TEMP); - encp = NULL; - - simple_unlock(&hfs_encoding_list_slock); - kmod_destroy((host_priv_t) host_priv_self(), id); - simple_lock(&hfs_encoding_list_slock); + lck_mtx_unlock(&encodinglst_mutex); + + FREE(encp, M_TEMP); + kmod_destroy((host_priv_t) host_priv_self(), id); + return (0); } - break; + lck_mtx_unlock(&encodinglst_mutex); + return (0); } } - simple_unlock(&hfs_encoding_list_slock); + lck_mtx_unlock(&encodinglst_mutex); - return (found ? 0 : EINVAL); + return (EINVAL); } diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index cd5843aaf..0341f15db 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -355,6 +355,55 @@ hfs_swap_HFSPlusBTInternalNode ( if (unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]); } + } else if (fileID == kHFSAttributesFileID) { + HFSPlusAttrKey *srcKey; + HFSPlusAttrRecord *srcRec; + + for (i = 0; i < srcDesc->numRecords; i++) { + srcKey = (HFSPlusAttrKey *)((char *)src->buffer + srcOffs[i]); + + if (!unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength); + srcRec = (HFSPlusAttrRecord *)((char *)srcKey + srcKey->keyLength + 2); + if (unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength); + + srcKey->fileID = SWAP_BE32(srcKey->fileID); + srcKey->startBlock = SWAP_BE32(srcKey->startBlock); + + if (!unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); + for (j = 0; j < srcKey->attrNameLen; j++) + srcKey->attrName[j] = SWAP_BE16(srcKey->attrName[j]); + if (unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); + + /* If this is an index node, just swap the child node number */ + if (srcDesc->kind == kBTIndexNode) { + *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec)); + continue; + } + + /* Swap the data record */ + if (!unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType); + switch (srcRec->recordType) { + case kHFSPlusAttrInlineData: + /* We're not swapping the reserved fields */ + srcRec->attrData.attrSize = SWAP_BE32(srcRec->attrData.attrSize); + /* Not swapping the attrData */ + break; + case kHFSPlusAttrForkData: + /* We're not swapping the reserved field */ + hfs_swap_HFSPlusForkData(&srcRec->forkData.theFork); + break; + case kHFSPlusAttrExtents: + /* We're not swapping the reserved field */ + for (j = 0; j < kHFSPlusExtentDensity; j++) { + srcRec->overflowExtents.extents[j].startBlock = + SWAP_BE32(srcRec->overflowExtents.extents[j].startBlock); + srcRec->overflowExtents.extents[j].blockCount = + SWAP_BE32(srcRec->overflowExtents.extents[j].blockCount); + } + break; + } + if (unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType); + } } else if (fileID > kHFSFirstUserCatalogNodeID) { HotFileKey *srcKey; UInt32 *srcRec; diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h index 5caa3d2c8..001206d45 100644 --- a/bsd/hfs/hfs_format.h +++ b/bsd/hfs/hfs_format.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,8 +22,6 @@ #ifndef __HFS_FORMAT__ #define __HFS_FORMAT__ -#ifndef __HFSVOLUMES__ - #include #include @@ -54,7 +52,8 @@ enum { kHFSXVersion = 0x0005, /* 'HX' volumes start with version 5 */ kHFSPlusMountVersion = 0x31302E30, /* '10.0' for Mac OS X */ - kHFSJMountVersion = 0x4846534a /* 'HFSJ' for journaled HFS+ on OS X */ + kHFSJMountVersion = 0x4846534a, /* 'HFSJ' for journaled HFS+ on OS X */ + kFSKMountVersion = 0x46534b21 /* 'FSK!' for failed journal replay */ }; @@ -91,7 +90,8 @@ enum { }; -#ifndef __FILES__ +#ifndef _HFSUNISTR255_DEFINED_ +#define _HFSUNISTR255_DEFINED_ /* Unicode strings are used for HFS Plus file and folder names */ struct HFSUniStr255 { u_int16_t length; /* number of unicode characters */ @@ -99,7 +99,7 @@ struct HFSUniStr255 { }; typedef struct HFSUniStr255 HFSUniStr255; typedef const HFSUniStr255 *ConstHFSUniStr255Param; -#endif /* __FILES__ */ +#endif /* _HFSUNISTR255_DEFINED_ */ enum { kHFSMaxVolumeNameChars = 27, @@ -242,7 +242,7 @@ struct HFSCatalogKey { u_int8_t keyLength; /* key length (in bytes) */ u_int8_t reserved; /* reserved (set to zero) */ u_int32_t parentID; /* parent folder ID */ - u_char nodeName[kHFSMaxFileNameChars + 1]; /* catalog node name */ + u_int8_t nodeName[kHFSMaxFileNameChars + 1]; /* catalog node name */ }; typedef struct HFSCatalogKey HFSCatalogKey; @@ -274,8 +274,15 @@ enum { enum { kHFSFileLockedBit = 0x0000, /* file is locked and cannot be written to */ kHFSFileLockedMask = 0x0001, + kHFSThreadExistsBit = 0x0001, /* a file thread record exists for this file */ - kHFSThreadExistsMask = 0x0002 + kHFSThreadExistsMask = 0x0002, + + kHFSHasAttributesBit = 0x0002, /* object has extended attributes */ + kHFSHasAttributesMask = 0x0004, + + kHFSHasSecurityBit = 0x0003, /* object has security data (ACLs) */ + kHFSHasSecurityMask = 0x0008 }; @@ -309,7 +316,7 @@ struct HFSPlusCatalogFolder { FndrDirInfo userInfo; /* Finder information */ FndrOpaqueInfo finderInfo; /* additional Finder information */ u_int32_t textEncoding; /* hint for name conversions */ - u_int32_t reserved; /* reserved - initialized as zero */ + u_int32_t attrBlocks; /* cached count of attribute data blocks */ }; typedef struct HFSPlusCatalogFolder HFSPlusCatalogFolder; @@ -352,7 +359,7 @@ struct HFSPlusCatalogFile { FndrFileInfo userInfo; /* Finder information */ FndrOpaqueInfo finderInfo; /* additional Finder information */ u_int32_t textEncoding; /* hint for name conversions */ - u_int32_t reserved2; /* reserved - initialized as zero */ + u_int32_t attrBlocks; /* cached count of attribute data blocks */ /* Note: these start on double long (64 bit) boundry */ HFSPlusForkData dataFork; /* size and block data for data fork */ @@ -365,7 +372,7 @@ struct HFSCatalogThread { int16_t recordType; /* == kHFSFolderThreadRecord or kHFSFileThreadRecord */ int32_t reserved[2]; /* reserved - initialized as zero */ u_int32_t parentID; /* parent ID for this catalog node */ - u_char nodeName[kHFSMaxFileNameChars + 1]; /* name of this catalog node */ + u_int8_t nodeName[kHFSMaxFileNameChars + 1]; /* name of this catalog node */ }; typedef struct HFSCatalogThread HFSCatalogThread; @@ -384,25 +391,10 @@ typedef struct HFSPlusCatalogThread HFSPlusCatalogThread; chosen so that they wouldn't conflict with the catalog record types. */ enum { - kHFSPlusAttrInlineData = 0x10, /* if size < kAttrOverflowSize */ - kHFSPlusAttrForkData = 0x20, /* if size >= kAttrOverflowSize */ - kHFSPlusAttrExtents = 0x30 /* overflow extents for large attributes */ -}; - - -/* - HFSPlusAttrInlineData - For small attributes, whose entire value is stored within this one - B-tree record. - There would not be any other records for this attribute. -*/ -struct HFSPlusAttrInlineData { - u_int32_t recordType; /* == kHFSPlusAttrInlineData*/ - u_int32_t reserved; - u_int32_t logicalSize; /* size in bytes of userData*/ - u_int8_t userData[2]; /* variable length; space allocated is a multiple of 2 bytes*/ + kHFSPlusAttrInlineData = 0x10, /* if size < kAttrOverflowSize */ + kHFSPlusAttrForkData = 0x20, /* if size >= kAttrOverflowSize */ + kHFSPlusAttrExtents = 0x30 /* overflow extents for large attributes */ }; -typedef struct HFSPlusAttrInlineData HFSPlusAttrInlineData; /* @@ -430,15 +422,58 @@ struct HFSPlusAttrExtents { }; typedef struct HFSPlusAttrExtents HFSPlusAttrExtents; +/* + * Atrributes B-tree Data Record + * + * For small attributes, whose entire value is stored + * within a single B-tree record. + */ +struct HFSPlusAttrData { + u_int32_t recordType; /* == kHFSPlusAttrInlineData */ + u_int32_t reserved[2]; + u_int32_t attrSize; /* size of attribute data in bytes */ + u_int8_t attrData[2]; /* variable length */ +}; +typedef struct HFSPlusAttrData HFSPlusAttrData; + + +/* HFSPlusAttrInlineData is obsolete use HFSPlusAttrData instead */ +struct HFSPlusAttrInlineData { + u_int32_t recordType; + u_int32_t reserved; + u_int32_t logicalSize; + u_int8_t userData[2]; +}; +typedef struct HFSPlusAttrInlineData HFSPlusAttrInlineData; + + /* A generic Attribute Record*/ union HFSPlusAttrRecord { u_int32_t recordType; - HFSPlusAttrInlineData inlineData; + HFSPlusAttrInlineData inlineData; /* NOT USED */ + HFSPlusAttrData attrData; HFSPlusAttrForkData forkData; HFSPlusAttrExtents overflowExtents; }; typedef union HFSPlusAttrRecord HFSPlusAttrRecord; +/* Attribute key */ +struct HFSPlusAttrKey { + u_int16_t keyLength; /* key length (in bytes) */ + u_int16_t pad; /* set to zero */ + u_int32_t fileID; /* file associated with attribute */ + u_int32_t startBlock; /* first attribue allocation block number for extents */ + u_int16_t attrNameLen; /* number of unicode characters */ + u_int16_t attrName[127]; /* attribute name (Unicode) */ +}; +typedef struct HFSPlusAttrKey HFSPlusAttrKey; + +#define kHFSPlusAttrKeyMaximumLength (sizeof(HFSPlusAttrKey) - sizeof(u_int16_t)) +#define kHFSPlusAttrKeyMinimumLength (kHFSPlusAttrKeyMaximumLength - (127 * sizeof(u_int16_t))) + +#endif /* __APPLE_API_UNSTABLE */ + + /* Key and node lengths */ enum { kHFSPlusExtentKeyMaximumLength = sizeof(HFSPlusExtentKey) - sizeof(u_int16_t), @@ -451,7 +486,6 @@ enum { kHFSPlusExtentMinNodeSize = 512, kHFSPlusAttrMinNodeSize = 4096 }; -#endif /* __APPLE_API_UNSTABLE */ /* HFS and HFS Plus volume attribute bits */ enum { @@ -495,7 +529,7 @@ struct HFSMasterDirectoryBlock { u_int16_t drAlBlSt; /* first allocation block in volume */ u_int32_t drNxtCNID; /* next unused catalog node ID */ u_int16_t drFreeBks; /* number of unused allocation blocks */ - u_char drVN[kHFSMaxVolumeNameChars + 1]; /* volume name */ + u_int8_t drVN[kHFSMaxVolumeNameChars + 1]; /* volume name */ u_int32_t drVolBkUp; /* date and time of last backup */ u_int16_t drVSeqNum; /* volume backup sequence number */ u_int32_t drWrCnt; /* volume write count */ @@ -626,7 +660,7 @@ enum { /* Catalog Key Name Comparison Type */ enum { kHFSCaseFolding = 0xCF, /* case folding (case-insensitive) */ - kHFSBinaryCompare = 0xBC, /* binary compare (case-sensitive) */ + kHFSBinaryCompare = 0xBC /* binary compare (case-sensitive) */ }; /* JournalInfoBlock - Structure that describes where our journal lives */ @@ -652,8 +686,4 @@ enum { } #endif -#else -#warning hfs_format.h is not compatible with HFSVolumes.h (include only one) -#endif /* __HFSVOLUMES__ */ - #endif /* __HFS_FORMAT__ */ diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h new file mode 100644 index 000000000..573b0c9e0 --- /dev/null +++ b/bsd/hfs/hfs_fsctl.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _HFS_FSCTL_H_ +#define _HFS_FSCTL_H_ + +#include + +#include +#include + + +#ifdef __APPLE_API_UNSTABLE + +struct hfs_backingstoreinfo { + int signature; /* == 3419115 */ + int version; /* version of this struct (1) */ + int backingfd; /* disk image file (on backing fs) */ + int bandsize; /* sparse disk image band size */ +}; + + +/* HFS FS CONTROL COMMANDS */ + +#define HFSIOC_RESIZE_VOLUME _IOW('h', 2, u_int64_t) +#define HFS_RESIZE_VOLUME IOCBASECMD(HFSIOC_RESIZE_VOLUME) + +#define HFSIOC_CHANGE_NEXT_ALLOCATION _IOWR('h', 3, u_int32_t) +#define HFS_CHANGE_NEXT_ALLOCATION IOCBASECMD(HFSIOC_CHANGE_NEXT_ALLOCATION) + +#define HFSIOC_GETCREATETIME _IOR('h', 4, time_t) +#define HFS_GETCREATETIME IOCBASECMD(HFSIOC_GETCREATETIME) + +#define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo) +#define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO) + +#define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) +#define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) + +#define HFSIOC_SETACLSTATE _IOW('h', 10, int32_t) +#define HFS_SETACLSTATE IOCBASECMD(HFSIOC_SETACLSTATE) + +#endif /* __APPLE_API_UNSTABLE */ + + +#endif /* ! _HFS_FSCTL_H_ */ diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index e5f94cd64..536136fe6 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -90,6 +92,8 @@ typedef struct hotfile_data { hotfile_entry_t entries[1]; } hotfile_data_t; +static int hfs_recording_start (struct hfsmount *); +static int hfs_recording_stop (struct hfsmount *); /* @@ -97,21 +101,27 @@ typedef struct hotfile_data { */ static void hf_insert (hotfile_data_t *, hotfile_entry_t *); static void hf_delete (hotfile_data_t *, u_int32_t, u_int32_t); -static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t); static hotfile_entry_t * hf_coldest (hotfile_data_t *); static hotfile_entry_t * hf_getnewentry (hotfile_data_t *); -static int hf_getsortedlist (hotfile_data_t *, hotfilelist_t *); -static void hf_printtree (hotfile_entry_t *); +static void hf_getsortedlist (hotfile_data_t *, hotfilelist_t *); + +#if HFC_DEBUG +static hotfile_entry_t * hf_lookup (hotfile_data_t *, u_int32_t, u_int32_t); +static void hf_maxdepth(hotfile_entry_t *, int, int *); +static void hf_printtree (hotfile_entry_t *); +#endif /* * Hot File misc support functions. */ -static int hotfiles_collect (struct hfsmount *, struct proc *); -static int hotfiles_age (struct hfsmount *, struct proc *); -static int hotfiles_adopt (struct hfsmount *, struct proc *); +static int hotfiles_collect (struct hfsmount *); +static int hotfiles_age (struct hfsmount *); +static int hotfiles_adopt (struct hfsmount *); static int hotfiles_evict (struct hfsmount *, struct proc *); -static int hotfiles_refine (struct hfsmount *, struct proc *); +static int hotfiles_refine (struct hfsmount *); static int hotextents(struct hfsmount *, HFSPlusExtentDescriptor *); +static int hfs_addhotfile_internal(struct vnode *); + /* * Hot File Cluster B-tree (on disk) functions. @@ -124,6 +134,9 @@ static int hfc_comparekeys (HotFileKey *, HotFileKey *); char hfc_tag[] = "CLUSTERED HOT FILES B-TREE "; +extern int UBCINFOEXISTS(struct vnode * vp); +extern int hfs_vnop_write(struct vnop_write_args *ap); + /* *======================================================================== @@ -134,12 +147,13 @@ char hfc_tag[] = "CLUSTERED HOT FILES B-TREE "; /* * Start recording the hotest files on a file system. * + * Requires that the hfc_mutex be held. */ -__private_extern__ -int -hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) +static int +hfs_recording_start(struct hfsmount *hfsmp) { hotfile_data_t *hotdata; + struct timeval tv; int maxentries; size_t size; int i; @@ -150,7 +164,7 @@ hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) (hfsmp->hfs_flags & HFS_METADATA_ZONE) == 0) { return (EPERM); } - if (HFSTOVCB(hfsmp)->freeBlocks < (2 * hfsmp->hfs_hotfile_maxblks)) { + if (HFSTOVCB(hfsmp)->freeBlocks < (2 * (u_int32_t)hfsmp->hfs_hotfile_maxblks)) { return (ENOSPC); } if (hfsmp->hfc_stage != HFC_IDLE) { @@ -169,6 +183,8 @@ hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) FREE(tmp, M_TEMP); } + microuptime(&tv); + /* * On first startup check for suspended recording. */ @@ -182,14 +198,15 @@ hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) (SWAP_BE32 (hotfileinfo.timeleft) > 0) && (SWAP_BE32 (hotfileinfo.timebase) > 0)) { hfsmp->hfc_maxfiles = SWAP_BE32 (hotfileinfo.maxfilecnt); - hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + time.tv_sec ; + hfsmp->hfc_timeout = SWAP_BE32 (hotfileinfo.timeleft) + tv.tv_sec ; hfsmp->hfc_timebase = SWAP_BE32 (hotfileinfo.timebase); #if HFC_VERBOSE - printf("HFS: resume recording hot files (%d left)\n", SWAP_BE32 (hotfileinfo.timeleft)); + printf("Resume recording hot files on %s (%d secs left)\n", + hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft)); #endif } else { hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT; - hfsmp->hfc_timebase = time.tv_sec + 1; + hfsmp->hfc_timebase = tv.tv_sec + 1; hfsmp->hfc_timeout = hfsmp->hfc_timebase + HFC_DEFAULT_DURATION; } (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); @@ -210,17 +227,16 @@ hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) return (error); } #if HFC_VERBOSE - printf("HFS: begin recording hot files\n"); + printf("HFS: begin recording hot files on %s\n", hfsmp->vcbVN); #endif hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT; - hfsmp->hfc_timeout = time.tv_sec + HFC_DEFAULT_DURATION; + hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION; /* Reset time base. */ if (hfsmp->hfc_timebase == 0) { - hfsmp->hfc_timebase = time.tv_sec + 1; + hfsmp->hfc_timebase = tv.tv_sec + 1; } else { - u_int32_t cumulativebase; - u_int32_t oldbase = hfsmp->hfc_timebase; + time_t cumulativebase; cumulativebase = hfsmp->hfc_timeout - (HFC_CUMULATIVE_CYCLES * HFC_DEFAULT_DURATION); hfsmp->hfc_timebase = MAX(hfsmp->hfc_timebase, cumulativebase); @@ -249,7 +265,6 @@ hfs_recording_start(struct hfsmount *hfsmp, struct proc *p) hotdata->hfsmp = hfsmp; hfsmp->hfc_recdata = hotdata; -out: hfsmp->hfc_stage = HFC_RECORDING; wakeup((caddr_t)&hfsmp->hfc_stage); return (0); @@ -257,23 +272,23 @@ out: /* * Stop recording the hotest files on a file system. + * + * Requires that the hfc_mutex be held. */ -__private_extern__ -int -hfs_recording_stop(struct hfsmount *hfsmp, struct proc *p) +static int +hfs_recording_stop(struct hfsmount *hfsmp) { hotfile_data_t *hotdata; hotfilelist_t *listp; + struct timeval tv; size_t size; enum hfc_stage newstage = HFC_IDLE; - void * tmp; int error; - if (hfsmp->hfc_stage != HFC_RECORDING) return (EPERM); - hotfiles_collect(hfsmp, p); + hotfiles_collect(hfsmp); if (hfsmp->hfc_stage != HFC_RECORDING) return (0); @@ -286,7 +301,7 @@ hfs_recording_stop(struct hfsmount *hfsmp, struct proc *p) * then dump the sample data */ #if HFC_VERBOSE - printf("HFS: end of hot file recording\n"); + printf("HFS: end of hot file recording on %s\n", hfsmp->vcbVN); #endif hotdata = (hotfile_data_t *)hfsmp->hfc_recdata; if (hotdata == NULL) @@ -318,7 +333,7 @@ hfs_recording_stop(struct hfsmount *hfsmp, struct proc *p) /* * Age the previous set of clustered hot files. */ - error = hotfiles_age(hfsmp, p); + error = hotfiles_age(hfsmp); if (error) { (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); hfsmp->hfc_filevp = NULL; @@ -333,14 +348,15 @@ hfs_recording_stop(struct hfsmount *hfsmp, struct proc *p) MALLOC(listp, hotfilelist_t *, size, M_TEMP, M_WAITOK); bzero(listp, size); - hf_getsortedlist(hotdata, listp); - listp->hfl_duration = time.tv_sec - hfsmp->hfc_timebase; + hf_getsortedlist(hotdata, listp); /* NOTE: destroys hot file tree! */ + microuptime(&tv); + listp->hfl_duration = tv.tv_sec - hfsmp->hfc_timebase; hfsmp->hfc_recdata = listp; /* * Account for duplicates. */ - error = hotfiles_refine(hfsmp, p); + error = hotfiles_refine(hfsmp); if (error) { (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); hfsmp->hfc_filevp = NULL; @@ -389,24 +405,34 @@ out: */ __private_extern__ int -hfs_recording_suspend(struct hfsmount *hfsmp, struct proc *p) +hfs_recording_suspend(struct hfsmount *hfsmp) { HotFilesInfo hotfileinfo; - hotfile_data_t *hotdata; + hotfile_data_t *hotdata = NULL; + struct timeval tv; int error; - if (hfsmp->hfc_stage != HFC_RECORDING) + if (hfsmp->hfc_stage == HFC_DISABLED) return (0); + lck_mtx_lock(&hfsmp->hfc_mutex); + + /* + * XXX NOTE + * A suspend can occur during eval/evict/adopt stage. + * In that case we would need to write out info and + * flush our HFBT vnode. Currently we just bail. + */ + hotdata = (hotfile_data_t *)hfsmp->hfc_recdata; - if (hotdata == NULL) { - hfsmp->hfc_stage = HFC_DISABLED; - return (0); + if (hotdata == NULL || hfsmp->hfc_stage != HFC_RECORDING) { + error = 0; + goto out; } hfsmp->hfc_stage = HFC_BUSY; #if HFC_VERBOSE - printf("HFS: suspend hot file recording\n"); + printf("HFS: suspend hot file recording on %s\n", hfsmp->vcbVN); #endif error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); if (error) { @@ -414,78 +440,52 @@ hfs_recording_suspend(struct hfsmount *hfsmp, struct proc *p) goto out; } - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - error = EINVAL; - goto out; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + error = EPERM; + goto out; } - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); + microuptime(&tv); hotfileinfo.magic = SWAP_BE32 (HFC_MAGIC); hotfileinfo.version = SWAP_BE32 (HFC_VERSION); hotfileinfo.duration = SWAP_BE32 (HFC_DEFAULT_DURATION); hotfileinfo.timebase = SWAP_BE32 (hfsmp->hfc_timebase); - hotfileinfo.timeleft = SWAP_BE32 (hfsmp->hfc_timeout - time.tv_sec); + hotfileinfo.timeleft = SWAP_BE32 (hfsmp->hfc_timeout - tv.tv_sec); hotfileinfo.threshold = SWAP_BE32 (hotdata->threshold); hotfileinfo.maxfileblks = SWAP_BE32 (hotdata->maxblocks); hotfileinfo.maxfilecnt = SWAP_BE32 (HFC_DEFAULT_FILE_COUNT); strcpy(hotfileinfo.tag, hfc_tag); (void) BTSetUserData(VTOF(hfsmp->hfc_filevp), &hotfileinfo, sizeof(hotfileinfo)); - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - - (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); - hfsmp->hfc_filevp = NULL; + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + hfs_end_transaction(hfsmp); out: - FREE(hotdata, M_TEMP); - - hfsmp->hfc_stage = HFC_DISABLED; - wakeup((caddr_t)&hfsmp->hfc_stage); - return (error); -} - -/* - * Abort a hot file recording session. - */ -__private_extern__ -int -hfs_recording_abort(struct hfsmount *hfsmp, struct proc *p) -{ - void * tmp; - - if (hfsmp->hfc_stage == HFC_DISABLED) - return (0); - - if (hfsmp->hfc_stage == HFC_BUSY) { - (void) tsleep((caddr_t)&hfsmp->hfc_stage, PINOD, "hfs_recording_abort", 0); + if (hfsmp->hfc_filevp) { + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; } - hfsmp->hfc_stage = HFC_BUSY; - - printf("HFS: terminate hot file recording\n"); - - if (hfsmp->hfc_recdata) { - tmp = hfsmp->hfc_recdata; + if (hotdata) { + FREE(hotdata, M_TEMP); hfsmp->hfc_recdata = NULL; - FREE(tmp, M_TEMP); } hfsmp->hfc_stage = HFC_DISABLED; wakeup((caddr_t)&hfsmp->hfc_stage); - return (0); +exit: + lck_mtx_unlock(&hfsmp->hfc_mutex); + return (error); } + /* * */ __private_extern__ int -hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) +hfs_recording_init(struct hfsmount *hfsmp) { CatalogKey * keyp; CatalogRecord * datap; @@ -504,6 +504,14 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) int inserted = 0; /* debug variables */ int filecount = 0; + /* + * For now, only the boot volume is supported. + */ + if ((vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) == 0) { + hfsmp->hfc_stage = HFC_DISABLED; + return (EPERM); + } + /* * If the Hot File btree exists then metadata zone is ready. */ @@ -513,15 +521,11 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) hfsmp->hfc_stage = HFC_IDLE; return (0); } - /* - * For now, only the boot volume is supported. - */ - if ((HFSTOVFS(hfsmp)->mnt_flag & MNT_ROOTFS) == 0) { - hfsmp->hfs_flags &= ~HFS_METADATA_ZONE; - return (EPERM); - } error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT); if (error) { +#if HFC_VERBOSE + printf("Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN); +#endif return (error); } /* @@ -531,6 +535,9 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) panic("hfs_recording_init: hfc_filevp exists (vp = 0x%08x)", hfsmp->hfc_filevp); error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); if (error) { +#if HFC_VERBOSE + printf("Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN); +#endif return (error); } MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); @@ -557,15 +564,14 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) /* * The writes to Hot File B-tree file are journaled. */ - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - error = EINVAL; - goto out1; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out1; } - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + error = EPERM; + goto out1; + } filefork = VTOF(hfsmp->hfc_filevp); /* @@ -614,7 +620,7 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) key->fileID = cnid; key->forkType = 0; data = 0x3f3f3f3f; - error = BTInsertRecord(filefork, iterator, &record, sizeof(data)); + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); @@ -627,7 +633,7 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) key->fileID = cnid; key->forkType = 0; data = HFC_MINIMUM_TEMPERATURE; - error = BTInsertRecord(filefork, iterator, &record, sizeof(data)); + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hfs_recording_init: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); @@ -636,12 +642,9 @@ hfs_recording_init(struct hfsmount *hfsmp, struct proc *p) inserted++; } (void) BTFlushPath(filefork); - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); + hfs_unlock(VTOC(hfsmp->hfc_filevp)); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); #if HFC_VERBOSE printf("%d files identified out of %d\n", inserted, filecount); #endif @@ -667,15 +670,20 @@ __private_extern__ int hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p) { - if ((HFSTOVFS(hfsmp)->mnt_kern_flag & MNTK_UNMOUNT) == 0 && hfsmp->hfc_stage) { + if (hfsmp->hfc_stage) { + struct timeval tv; + + lck_mtx_lock(&hfsmp->hfc_mutex); + switch (hfsmp->hfc_stage) { case HFC_IDLE: - (void) hfs_recording_start(hfsmp, p); + (void) hfs_recording_start(hfsmp); break; case HFC_RECORDING: - if (time.tv_sec > hfsmp->hfc_timeout) - (void) hfs_recording_stop(hfsmp, p); + microuptime(&tv); + if (tv.tv_sec > hfsmp->hfc_timeout) + (void) hfs_recording_stop(hfsmp); break; case HFC_EVICTION: @@ -683,9 +691,13 @@ hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p) break; case HFC_ADOPTION: - (void) hotfiles_adopt(hfsmp, p); + (void) hotfiles_adopt(hfsmp); + break; + default: break; } + + lck_mtx_unlock(&hfsmp->hfc_mutex); } return (0); } @@ -699,10 +711,27 @@ hfs_hotfilesync(struct hfsmount *hfsmp, struct proc *p) * NOTE: Since both the data and resource fork can be hot, * there can be two entries for the same file id. * + * Note: the cnode is locked on entry. */ __private_extern__ int hfs_addhotfile(struct vnode *vp) +{ + hfsmount_t *hfsmp; + int error; + + hfsmp = VTOHFS(vp); + if (hfsmp->hfc_stage != HFC_RECORDING) + return (0); + + lck_mtx_lock(&hfsmp->hfc_mutex); + error = hfs_addhotfile_internal(vp); + lck_mtx_unlock(&hfsmp->hfc_mutex); + return (error); +} + +static int +hfs_addhotfile_internal(struct vnode *vp) { hotfile_data_t *hotdata; hotfile_entry_t *entry; @@ -714,9 +743,8 @@ hfs_addhotfile(struct vnode *vp) hfsmp = VTOHFS(vp); if (hfsmp->hfc_stage != HFC_RECORDING) return (0); - - if (!(vp->v_type == VREG || vp->v_type == VLNK) || - (vp->v_flag & (VSYSTEM | VSWAP))) { + + if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) { return (0); } /* Skip resource forks for now. */ @@ -763,13 +791,14 @@ hfs_addhotfile(struct vnode *vp) } /* - * Remove a hot file to the recording list. + * Remove a hot file from the recording list. * * This can happen when a hot file becomes * an active vnode (active hot files are * not kept in the recording list until the * end of the recording period). * + * Note: the cnode is locked on entry. */ __private_extern__ int @@ -785,12 +814,9 @@ hfs_removehotfile(struct vnode *vp) if (hfsmp->hfc_stage != HFC_RECORDING) return (0); - if (!(vp->v_type == VREG || vp->v_type == VLNK) || - (vp->v_flag & (VSYSTEM | VSWAP))) { + if ((!vnode_isreg(vp) && !vnode_islnk(vp)) || vnode_issystem(vp)) { return (0); } - if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) - return (0); ffp = VTOF(vp); cp = VTOC(vp); @@ -800,16 +826,23 @@ hfs_removehotfile(struct vnode *vp) return (0); } + lck_mtx_lock(&hfsmp->hfc_mutex); + if (hfsmp->hfc_stage != HFC_RECORDING) + goto out; + if ((hotdata = (hotfile_data_t *)hfsmp->hfc_recdata) == NULL) + goto out; + temperature = ffp->ff_bytesread / ffp->ff_size; if (temperature < hotdata->threshold) - return (0); + goto out; if (hotdata->coldest && (temperature >= hotdata->coldest->temperature)) { ++hotdata->refcount; hf_delete(hotdata, VTOC(vp)->c_fileid, temperature); --hotdata->refcount; } - +out: + lck_mtx_unlock(&hfsmp->hfc_mutex); return (0); } @@ -820,58 +853,35 @@ hfs_removehotfile(struct vnode *vp) *======================================================================== */ +static int +hotfiles_collect_callback(struct vnode *vp, __unused void *cargs) +{ + if ((vnode_isreg(vp) || vnode_islnk(vp)) && !vnode_issystem(vp)) + (void) hfs_addhotfile_internal(vp); + + return (VNODE_RETURNED); +} + /* * Add all active hot files to the recording list. */ static int -hotfiles_collect(struct hfsmount *hfsmp, struct proc *p) +hotfiles_collect(struct hfsmount *hfsmp) { struct mount *mp = HFSTOVFS(hfsmp); - struct vnode *nvp, *vp; - struct cnode *cp; - int error; - if (vfs_busy(mp, LK_NOWAIT, 0, p)) + if (vfs_busy(mp, LK_NOWAIT)) return (0); -loop: - simple_lock(&mntvnode_slock); - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); - goto loop; - } - simple_lock(&vp->v_interlock); - nvp = vp->v_mntvnodes.le_next; - - if ((vp->v_flag & VSYSTEM) || - !(vp->v_type == VREG || vp->v_type == VLNK)) { - simple_unlock(&vp->v_interlock); - continue; - } - cp = VTOC(vp); - if (cp == NULL || vp->v_flag & (VXLOCK|VORECLAIM)) { - simple_unlock(&vp->v_interlock); - continue; - } - - simple_unlock(&mntvnode_slock); - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { - if (error == ENOENT) - goto loop; - simple_lock(&mntvnode_slock); - continue; - } - (void) hfs_addhotfile(vp); - vput(vp); - - simple_lock(&mntvnode_slock); - } - - simple_unlock(&mntvnode_slock); + /* + * hotfiles_collect_callback will be called for each vnode + * hung off of this mount point + * the vnode will be + * properly referenced and unreferenced around the callback + */ + vnode_iterate(mp, 0, hotfiles_collect_callback, (void *)NULL); - vfs_unbusy(mp, p); + vfs_unbusy(mp); return (0); } @@ -882,7 +892,7 @@ loop: * This is called from within BTUpdateRecord. */ static int -update_callback(const HotFileKey *key, u_int32_t *data, u_int16_t datalen, u_int32_t *state) +update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state) { if (key->temperature == HFC_LOOKUPTAG) *data = *state; @@ -893,11 +903,10 @@ update_callback(const HotFileKey *key, u_int32_t *data, u_int16_t datalen, u_int * Identify files already in hot area. */ static int -hotfiles_refine(struct hfsmount *hfsmp, struct proc *p) +hotfiles_refine(struct hfsmount *hfsmp) { BTreeIterator * iterator; struct mount *mp; - struct vnode *vp; filefork_t * filefork; hotfilelist_t *listp; FSBufferDescriptor record; @@ -920,15 +929,14 @@ hotfiles_refine(struct hfsmount *hfsmp, struct proc *p) record.itemSize = sizeof(u_int32_t); record.itemCount = 1; - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - error = EINVAL; - goto out; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; } - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + error = EPERM; + goto out; + } filefork = VTOF(hfsmp->hfc_filevp); for (i = 0; i < listp->hfl_count; ++i) { @@ -975,7 +983,7 @@ hotfiles_refine(struct hfsmount *hfsmp, struct proc *p) key->temperature = listp->hfl_hotfile[i].hf_temperature; key->fileID = listp->hfl_hotfile[i].hf_fileid; key->forkType = 0; - error = BTInsertRecord(filefork, iterator, &record, sizeof(data)); + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID); error = MacToVFSError(error); @@ -991,12 +999,9 @@ hotfiles_refine(struct hfsmount *hfsmp, struct proc *p) } /* end for */ (void) BTFlushPath(filefork); - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); + hfs_unlock(VTOC(hfsmp->hfc_filevp)); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); out: FREE(iterator, M_TEMP); return (error); @@ -1004,12 +1009,13 @@ out: /* * Move new hot files into hot area. + * + * Requires that the hfc_mutex be held. */ static int -hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) +hotfiles_adopt(struct hfsmount *hfsmp) { BTreeIterator * iterator; - struct mount *mp; struct vnode *vp; filefork_t * filefork; hotfilelist_t *listp; @@ -1023,7 +1029,6 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) int last; int error = 0; int startedtrans = 0; - int aquiredlock = 0; if ((listp = (hotfilelist_t *)hfsmp->hfc_recdata) == NULL) return (0); @@ -1031,10 +1036,13 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) if (hfsmp->hfc_stage != HFC_ADOPTION) { return (EBUSY); } + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + return (EPERM); + } + stage = hfsmp->hfc_stage; hfsmp->hfc_stage = HFC_BUSY; - mp = HFSTOVFS(hfsmp); blksmoved = 0; last = listp->hfl_next + HFC_FILESPERSYNC; if (last > listp->hfl_count) @@ -1049,7 +1057,6 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) record.itemSize = sizeof(u_int32_t); record.itemCount = 1; - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); filefork = VTOF(hfsmp->hfc_filevp); for (i = listp->hfl_next; (i < last) && (blksmoved < HFC_BLKSPERSYNC); ++i) { @@ -1063,7 +1070,7 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) /* * Acquire a vnode for this file. */ - error = VFS_VGET(mp, &listp->hfl_hotfile[i].hf_fileid, &vp); + error = hfs_vget(hfsmp, listp->hfl_hotfile[i].hf_fileid, &vp, 0); if (error) { if (error == ENOENT) { error = 0; @@ -1072,23 +1079,26 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) } break; } - if (vp->v_type != VREG && vp->v_type != VLNK) { + if (!vnode_isreg(vp) && !vnode_islnk(vp)) { printf("hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); - vput(vp); - listp->hfl_hotfile[i].hf_temperature == 0; + hfs_unlock(VTOC(vp)); + vnode_put(vp); + listp->hfl_hotfile[i].hf_temperature = 0; listp->hfl_next++; continue; /* stale entry, go to next */ } if (hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) { - vput(vp); - listp->hfl_hotfile[i].hf_temperature == 0; + hfs_unlock(VTOC(vp)); + vnode_put(vp); + listp->hfl_hotfile[i].hf_temperature = 0; listp->hfl_next++; listp->hfl_totalblocks -= listp->hfl_hotfile[i].hf_blocks; continue; /* stale entry, go to next */ } fileblocks = VTOF(vp)->ff_blocks; if (fileblocks > hfsmp->hfs_hotfile_freeblks) { - vput(vp); + hfs_unlock(VTOC(vp)); + vnode_put(vp); listp->hfl_next++; listp->hfl_totalblocks -= fileblocks; continue; /* entry too big, go to next */ @@ -1096,23 +1106,27 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) if ((blksmoved > 0) && (blksmoved + fileblocks) > HFC_BLKSPERSYNC) { - vput(vp); - break; + hfs_unlock(VTOC(vp)); + vnode_put(vp); + break; /* adopt this entry the next time around */ } /* Start a new transaction. */ - hfs_global_shared_lock_acquire(hfsmp); - aquiredlock = 1; - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - error = EINVAL; - vput(vp); - break; - } - startedtrans = 1; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + hfs_unlock(VTOC(vp)); + vnode_put(vp); + break; + } + startedtrans = 1; + + if (VTOC(vp)->c_desc.cd_nameptr) + data = *(u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr); + else + data = 0x3f3f3f3f; - error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, p->p_ucred, p); - vput(vp); + error = hfs_relocate(vp, hfsmp->hfs_hotfile_start, kauth_cred_get(), current_proc()); + hfs_unlock(VTOC(vp)); + vnode_put(vp); if (error) break; @@ -1125,12 +1139,8 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) key->temperature = listp->hfl_hotfile[i].hf_temperature; key->fileID = listp->hfl_hotfile[i].hf_fileid; key->forkType = 0; - if (VTOC(vp)->c_desc.cd_nameptr) - data = *(u_int32_t *)(VTOC(vp)->c_desc.cd_nameptr); - else - data = 0x3f3f3f3f; - error = BTInsertRecord(filefork, iterator, &record, sizeof(data)); + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); @@ -1144,7 +1154,7 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) key->fileID = listp->hfl_hotfile[i].hf_fileid; key->forkType = 0; data = listp->hfl_hotfile[i].hf_temperature; - error = BTInsertRecord(filefork, iterator, &record, sizeof(data)); + error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); @@ -1155,11 +1165,9 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) /* Transaction complete. */ if (startedtrans) { - journal_end_transaction(hfsmp->jnl); - startedtrans = 0; + hfs_end_transaction(hfsmp); + startedtrans = 0; } - hfs_global_shared_lock_release(hfsmp); - aquiredlock = 0; blksmoved += fileblocks; listp->hfl_next++; @@ -1180,14 +1188,10 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) /* Finish any outstanding transactions. */ if (startedtrans) { (void) BTFlushPath(filefork); - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); startedtrans = 0; } - if (aquiredlock) { - hfs_global_shared_lock_release(hfsmp); - aquiredlock = 0; - } - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); + hfs_unlock(VTOC(hfsmp->hfc_filevp)); if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) { #if HFC_VERBOSE @@ -1209,12 +1213,13 @@ hotfiles_adopt(struct hfsmount *hfsmp, struct proc *p) /* * Reclaim space by evicting the coldest files. + * + * Requires that the hfc_mutex be held. */ static int hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) { BTreeIterator * iterator; - struct mount *mp; struct vnode *vp; HotFileKey * key; filefork_t * filefork; @@ -1225,7 +1230,6 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) int fileblocks; int error = 0; int startedtrans = 0; - int aquiredlock = 0; if (hfsmp->hfc_stage != HFC_EVICTION) { return (EBUSY); @@ -1234,17 +1238,19 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) if ((listp = (hotfilelist_t *)hfsmp->hfc_recdata) == NULL) return (0); + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + return (EPERM); + } + stage = hfsmp->hfc_stage; hfsmp->hfc_stage = HFC_BUSY; - mp = HFSTOVFS(hfsmp); filesmoved = blksmoved = 0; MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); key = (HotFileKey*) &iterator->key; - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); filefork = VTOF(hfsmp->hfc_filevp); while (listp->hfl_reclaimblks > 0 && @@ -1278,20 +1284,19 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) /* * Aquire the vnode for this file. */ - error = VFS_VGET(mp, &key->fileID, &vp); + error = hfs_vget(hfsmp, key->fileID, &vp, 0); /* Start a new transaction. */ - hfs_global_shared_lock_acquire(hfsmp); - aquiredlock = 1; - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - if (error == 0) - vput(vp); - error = EINVAL; - break; - } - startedtrans = 1; - } + if (hfs_start_transaction(hfsmp) != 0) { + if (error == 0) { + hfs_unlock(VTOC(vp)); + vnode_put(vp); + } + error = EINVAL; + break; + } + startedtrans = 1; + if (error) { if (error == ENOENT) { (void) BTDeleteRecord(filefork, iterator); @@ -1299,14 +1304,15 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) (void) BTDeleteRecord(filefork, iterator); goto next; /* stale entry, go to next */ } else { - printf("hotfiles_evict: err %d getting file %d (%d)\n", + printf("hotfiles_evict: err %d getting file %d\n", error, key->fileID); } break; } - if (vp->v_type != VREG && vp->v_type != VLNK) { + if (!vnode_isreg(vp) && !vnode_islnk(vp)) { printf("hotfiles_evict: huh, not a file %d\n", key->fileID); - vput(vp); + hfs_unlock(VTOC(vp)); + vnode_put(vp); (void) BTDeleteRecord(filefork, iterator); key->temperature = HFC_LOOKUPTAG; (void) BTDeleteRecord(filefork, iterator); @@ -1315,7 +1321,8 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) fileblocks = VTOF(vp)->ff_blocks; if ((blksmoved > 0) && (blksmoved + fileblocks) > HFC_BLKSPERSYNC) { - vput(vp); + hfs_unlock(VTOC(vp)); + vnode_put(vp); break; } /* @@ -1325,7 +1332,8 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) #if HFC_VERBOSE printf("hotfiles_evict: file %d isn't hot!\n", key->fileID); #endif - vput(vp); + hfs_unlock(VTOC(vp)); + vnode_put(vp); (void) BTDeleteRecord(filefork, iterator); key->temperature = HFC_LOOKUPTAG; (void) BTDeleteRecord(filefork, iterator); @@ -1335,16 +1343,23 @@ hotfiles_evict(struct hfsmount *hfsmp, struct proc *p) /* * Relocate file out of hot area. */ - error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, p->p_ucred, p); + error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, proc_ucred(p), p); if (error) { - /* XXX skip to next record here! */ - printf("hotfiles_evict: err % relocating file\n", error, key->fileID); - vput(vp); - break; + printf("hotfiles_evict: err %d relocating file %d\n", error, key->fileID); + hfs_unlock(VTOC(vp)); + vnode_put(vp); + goto next; /* go to next */ } - (void) VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); - vput(vp); + // + // We do not believe that this call to hfs_fsync() is + // necessary and it causes a journal transaction + // deadlock so we are removing it. + // + // (void) hfs_fsync(vp, MNT_WAIT, 0, p); + + hfs_unlock(VTOC(vp)); + vnode_put(vp); hfsmp->hfs_hotfile_freeblks += fileblocks; listp->hfl_reclaimblks -= fileblocks; @@ -1371,11 +1386,9 @@ next: /* Transaction complete. */ if (startedtrans) { - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); startedtrans = 0; } - hfs_global_shared_lock_release(hfsmp); - aquiredlock = 0; } /* end while */ @@ -1385,14 +1398,10 @@ next: /* Finish any outstanding transactions. */ if (startedtrans) { (void) BTFlushPath(filefork); - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); startedtrans = 0; } - if (aquiredlock) { - hfs_global_shared_lock_release(hfsmp); - aquiredlock = 0; - } - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); + hfs_unlock(VTOC(hfsmp->hfc_filevp)); /* * Move to next stage when finished. @@ -1413,7 +1422,7 @@ next: * Age the existing records in the hot files b-tree. */ static int -hotfiles_age(struct hfsmount *hfsmp, struct proc *p) +hotfiles_age(struct hfsmount *hfsmp) { BTreeInfoRec btinfo; BTreeIterator * iterator; @@ -1450,15 +1459,14 @@ hotfiles_age(struct hfsmount *hfsmp, struct proc *p) /* * Capture b-tree changes inside a transaction */ - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - error = EINVAL; - goto out2; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out2; } - vn_lock(hfsmp->hfc_filevp, LK_EXCLUSIVE | LK_RETRY, p); + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + error = EPERM; + goto out1; + } filefork = VTOF(hfsmp->hfc_filevp); error = BTGetInformation(filefork, 0, &btinfo); @@ -1527,7 +1535,7 @@ hotfiles_age(struct hfsmount *hfsmp, struct proc *p) newtemp = MAX(prev_key->temperature >> 1, 4); prev_key->temperature = newtemp; - error = BTInsertRecord(filefork, prev_iterator, &prev_record, sizeof(data)); + error = BTInsertRecord(filefork, prev_iterator, &prev_record, prev_record.itemSize); if (error) { printf("hfs_agehotfiles: BTInsertRecord failed %d (file %d)\n", error, prev_key->fileID); error = MacToVFSError(error); @@ -1559,13 +1567,9 @@ hotfiles_age(struct hfsmount *hfsmp, struct proc *p) #endif (void) BTFlushPath(filefork); out: - (void) VOP_UNLOCK(hfsmp->hfc_filevp, 0, p); - - if (hfsmp->jnl) { - // hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_unlock(VTOC(hfsmp->hfc_filevp)); +out1: + hfs_end_transaction(hfsmp); out2: FREE(iterator, M_TEMP); return (error); @@ -1608,36 +1612,34 @@ hotextents(struct hfsmount *hfsmp, HFSPlusExtentDescriptor * extents) /* * Open the hot files b-tree for writing. * - * On successful exit the vnode has a reference but is unlocked. + * On successful exit the vnode has a reference but not an iocount. */ static int hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) { struct proc *p; struct vnode *vp; - struct cat_desc cdesc = {0}; + struct cat_desc cdesc; struct cat_attr cattr; struct cat_fork cfork; static char filename[] = HFC_FILENAME; int error; int retry = 0; + int lockflags; *vpp = NULL; p = current_proc(); + bzero(&cdesc, sizeof(cdesc)); cdesc.cd_parentcnid = kRootDirID; cdesc.cd_nameptr = filename; cdesc.cd_namelen = strlen(filename); - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (error) - return (error); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(hfsmp, &cdesc, 0, &cdesc, &cattr, &cfork); + error = cat_lookup(hfsmp, &cdesc, 0, &cdesc, &cattr, &cfork, NULL); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (error) { printf("hfc_btree_open: cat_lookup error %d\n", error); @@ -1645,18 +1647,19 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) } again: cdesc.cd_flags |= CD_ISMETA; - error = hfs_getnewvnode(hfsmp, NULL, &cdesc, 0, &cattr, &cfork, &vp); + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, &cfork, &vp); if (error) { printf("hfc_btree_open: hfs_getnewvnode error %d\n", error); cat_releasedesc(&cdesc); return (error); } - if ((vp->v_flag & VSYSTEM) == 0) { + if (!vnode_issystem(vp)) { #if HFC_VERBOSE printf("hfc_btree_open: file has UBC, try again\n"); #endif - vput(vp); - vgone(vp); + hfs_unlock(VTOC(vp)); + vnode_recycle(vp); + vnode_put(vp); if (retry++ == 0) goto again; else @@ -1668,28 +1671,16 @@ again: if (error) { printf("hfc_btree_open: BTOpenPath error %d\n", error); error = MacToVFSError(error); - } else { -#if HFC_VERBOSE - struct BTreeInfoRec btinfo; - - if (BTGetInformation(VTOF(vp), 0, &btinfo) == 0) { - printf("btinfo: nodeSize %d\n", btinfo.nodeSize); - printf("btinfo: maxKeyLength %d\n", btinfo.maxKeyLength); - printf("btinfo: treeDepth %d\n", btinfo.treeDepth); - printf("btinfo: numRecords %d\n", btinfo.numRecords); - printf("btinfo: numNodes %d\n", btinfo.numNodes); - printf("btinfo: numFreeNodes %d\n", btinfo.numFreeNodes); - } -#endif } - VOP_UNLOCK(vp, 0, p); /* unlocked with a single reference */ - if (error) - vrele(vp); - else + hfs_unlock(VTOC(vp)); + if (error == 0) { *vpp = vp; + vnode_ref(vp); /* keep a reference while its open */ + } + vnode_put(vp); - if ((vp->v_flag & VSYSTEM) == 0) + if (!vnode_issystem(vp)) panic("hfc_btree_open: not a system file (vp = 0x%08x)", vp); if (UBCINFOEXISTS(vp)) @@ -1701,31 +1692,32 @@ again: /* * Close the hot files b-tree. * - * On entry the vnode is not locked but has a reference. + * On entry the vnode has a reference. */ static int hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) { struct proc *p = current_proc(); - int error; + int error = 0; if (hfsmp->jnl) { journal_flush(hfsmp->jnl); } - if (vget(vp, LK_EXCLUSIVE, p) == 0) { - (void) VOP_FSYNC(vp, NOCRED, MNT_WAIT, p); - error = BTClosePath(VTOF(vp)); - if (error) - printf("hfc_btree_close: BTClosePath error %d\n", error); - vput(vp); + if (vnode_get(vp) == 0) { + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + if (error == 0) { + (void) hfs_fsync(vp, MNT_WAIT, 0, p); + error = BTClosePath(VTOF(vp)); + hfs_unlock(VTOC(vp)); + } + vnode_rele(vp); + vnode_recycle(vp); + vnode_put(vp); } - vrele(vp); - vgone(vp); - vp = NULL; - return (0); + return (error); } /* @@ -1735,34 +1727,65 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) static int hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries) { - struct proc *p; - struct nameidata nd; - struct vnode *vp; - char path[128]; + struct vnode *dvp = NULL; + struct vnode *vp = NULL; + struct cnode *cp = NULL; + struct vfs_context context; + struct vnode_attr va; + struct componentname cname; + static char filename[] = HFC_FILENAME; int error; + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); if (hfsmp->hfc_filevp) panic("hfc_btree_create: hfc_filevp exists (vp = 0x%08x)", hfsmp->hfc_filevp); - p = current_proc(); - snprintf(path, sizeof(path), "%s/%s", - hfsmp->hfs_mp->mnt_stat.f_mntonname, HFC_FILENAME); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); - if ((error = vn_open(&nd, O_CREAT | FWRITE, S_IRUSR | S_IWUSR)) != 0) { + error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, &context); + if (error) { return (error); } - vp = nd.ni_vp; - + cname.cn_nameiop = CREATE; + cname.cn_flags = ISLASTCN; + cname.cn_context = &context; + cname.cn_pnbuf = filename; + cname.cn_pnlen = sizeof(filename); + cname.cn_nameptr = filename; + cname.cn_namelen = strlen(filename); + cname.cn_hash = 0; + cname.cn_consume = 0; + + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VREG); + VATTR_SET(&va, va_mode, S_IFREG | S_IRUSR | S_IWUSR); + VATTR_SET(&va, va_uid, 0); + VATTR_SET(&va, va_gid, 0); + + /* call ourselves directly, ignore the higher-level VFS file creation code */ + error = VNOP_CREATE(dvp, &vp, &cname, &va, &context); + if (error) { + printf("HFS: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + goto out; + } + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + goto out; + } + cp = VTOC(vp); + /* Don't use non-regular files or files with links. */ - if (vp->v_type != VREG || VTOC(vp)->c_nlink != 1) { + if (!vnode_isreg(vp) || cp->c_nlink != 1) { error = EFTYPE; goto out; } printf("HFS: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN); - if (VTOF(vp)->ff_size < nodesize) { + if (VTOF(vp)->ff_size < (u_int64_t)nodesize) { caddr_t buffer; u_int16_t *index; u_int16_t offset; @@ -1776,7 +1799,7 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries) /* * Mark it invisible (truncate will pull these changes). */ - ((FndrFileInfo *)&VTOC(vp)->c_finderinfo[0])->fdFlags |= + ((FndrFileInfo *)&cp->c_finderinfo[0])->fdFlags |= SWAP_BE16 (kIsInvisible + kNameLocked); if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, nodesize)) { @@ -1831,29 +1854,49 @@ hfc_btree_create(struct hfsmount *hfsmp, int nodesize, int entries) - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); index[(nodesize / 2) - 4] = SWAP_BE16 (offset); - vp->v_flag |= VNOFLUSH; - error = VOP_TRUNCATE(vp, (off_t)filesize, IO_NDELAY, NOCRED, p); + vnode_setnoflush(vp); + error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, &context); + if (error) { + printf("HFS: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + goto out; + } + cp->c_flag |= C_ZFWANTSYNC; + cp->c_zftimeout = 1; + if (error == 0) { - struct iovec aiov; - struct uio auio; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = buffer; - aiov.iov_len = filesize; - auio.uio_resid = nodesize; - auio.uio_offset = (off_t)(0); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(vp, &auio, 0, kernproc->p_ucred); + struct vnop_write_args args; + uio_t auio; + + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + uio_addiov(auio, (uintptr_t)buffer, nodesize); + + args.a_desc = &vnop_write_desc; + args.a_vp = vp; + args.a_uio = auio; + args.a_ioflag = 0; + args.a_context = &context; + + hfs_unlock(cp); + cp = NULL; + + error = hfs_vnop_write(&args); + if (error) + printf("HFS: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + + uio_free(auio); } kmem_free(kernel_map, (vm_offset_t)buffer, nodesize); } out: - (void) VOP_UNLOCK(vp, 0, p); - (void) vn_close(vp, FWRITE, kernproc->p_ucred, p); - vgone(vp); + if (dvp) { + vnode_put(dvp); + } + if (vp) { + if (cp) + hfs_unlock(cp); + vnode_recycle(vp); + vnode_put(vp); + } return (error); } @@ -1903,6 +1946,7 @@ hfc_comparekeys(HotFileKey *searchKey, HotFileKey *trialKey) /* * Lookup a hot file entry in the tree. */ +#if HFC_DEBUG static hotfile_entry_t * hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature) { @@ -1923,6 +1967,7 @@ hf_lookup(hotfile_data_t *hotdata, u_int32_t fileid, u_int32_t temperature) } return (entry); } +#endif /* * Insert a hot file entry into the tree. @@ -1993,6 +2038,21 @@ hf_coldest(hotfile_data_t *hotdata) return (entry); } +/* + * Find the hottest entry in the tree. + */ +static hotfile_entry_t * +hf_hottest(hotfile_data_t *hotdata) +{ + hotfile_entry_t *entry = hotdata->rootentry; + + if (entry) { + while (entry->right) + entry = entry->right; + } + return (entry); +} + /* * Delete a hot file entry from the tree. */ @@ -2093,38 +2153,32 @@ hf_getnewentry(hotfile_data_t *hotdata) /* - * Visit the tree in desending order. + * Generate a sorted list of hot files (hottest to coldest). + * + * As a side effect, every node in the hot file tree will be + * deleted (moved to the free list). */ static void -hf_sortlist(hotfile_entry_t * root, int *index, hotfilelist_t *sortedlist) -{ - if (root) { - int i; - - hf_sortlist(root->right, index, sortedlist); - i = *index; - ++(*index); - sortedlist->hfl_hotfile[i].hf_fileid = root->fileid; - sortedlist->hfl_hotfile[i].hf_temperature = root->temperature; - sortedlist->hfl_hotfile[i].hf_blocks = root->blocks; - sortedlist->hfl_totalblocks += root->blocks; - hf_sortlist(root->left, index, sortedlist); - } -} - -/* - * Generate a sorted list of hot files. - */ -static int hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist) { - int index = 0; - - hf_sortlist(hotdata->rootentry, &index, sortedlist); + int i = 0; + hotfile_entry_t *entry; + + while ((entry = hf_hottest(hotdata)) != NULL) { + sortedlist->hfl_hotfile[i].hf_fileid = entry->fileid; + sortedlist->hfl_hotfile[i].hf_temperature = entry->temperature; + sortedlist->hfl_hotfile[i].hf_blocks = entry->blocks; + sortedlist->hfl_totalblocks += entry->blocks; + ++i; - sortedlist->hfl_count = hotdata->activefiles; + hf_delete(hotdata, entry->fileid, entry->temperature); + } - return (index); + sortedlist->hfl_count = i; + +#if HFC_VERBOSE + printf("HFS: hf_getsortedlist returned %d entries\n", i); +#endif } diff --git a/bsd/hfs/hfs_hotfiles.h b/bsd/hfs/hfs_hotfiles.h index b1370b849..a9db6d619 100644 --- a/bsd/hfs/hfs_hotfiles.h +++ b/bsd/hfs/hfs_hotfiles.h @@ -39,7 +39,7 @@ #define HFC_CUMULATIVE_CYCLES 4 #define HFC_MAXIMUM_FILE_COUNT 5000 #define HFC_MAXIMUM_FILESIZE (10 * 1024 * 1024) -#define HFC_MINIMUM_TEMPERATURE 16 +#define HFC_MINIMUM_TEMPERATURE 24 /* @@ -107,15 +107,15 @@ struct vnode; */ int hfs_hotfilesync (struct hfsmount *, struct proc *); -int hfs_recording_init(struct hfsmount *, struct proc *); -int hfs_recording_start (struct hfsmount *, struct proc *); -int hfs_recording_stop (struct hfsmount *, struct proc *); -int hfs_recording_suspend (struct hfsmount *, struct proc *); -int hfs_recording_abort (struct hfsmount *, struct proc *); +int hfs_recording_init(struct hfsmount *); +int hfs_recording_suspend (struct hfsmount *); int hfs_addhotfile (struct vnode *); int hfs_removehotfile (struct vnode *); +int hfs_relocate(struct vnode *, u_int32_t, kauth_cred_t, struct proc *); + + #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ #endif /* __HFS_HOTFILES__ */ diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index 97dfde7bb..8ab33cf97 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,9 @@ #include "hfs_endian.h" +static int cur_link_id = 0; + + /* * Create a new indirect link * @@ -72,25 +74,12 @@ createindirectlink(struct hfsmount *hfsmp, u_int32_t linknum, fip->fdCreator = SWAP_BE32 (kHFSPlusCreator); /* 'hfs+' */ fip->fdFlags = SWAP_BE16 (kHasBeenInited); - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - return EINVAL; - } - } - /* Create the indirect link directly in the catalog */ result = cat_create(hfsmp, &desc, &attr, NULL); if (result == 0 && linkcnid != NULL) *linkcnid = attr.ca_fileid; - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - return (result); } @@ -107,15 +96,22 @@ static int hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, struct componentname *cnp) { - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); u_int32_t indnodeno = 0; - char inodename[32]; + char inodename[32]; struct cat_desc to_desc; int newlink = 0; + int lockflags; int retval; - cat_cookie_t cookie = {0}; - + cat_cookie_t cookie; + cnid_t orig_cnid; + if (cur_link_id == 0) { + cur_link_id = ((random() & 0x3fffffff) + 100); + // printf("hfs: initializing cur link id to: 0x%.8x\n", cur_link_id); + } + /* We don't allow link nodes in our Private Meta Data folder! */ if (dcp->c_fileid == hfsmp->hfs_privdir_desc.cd_cnid) return (EPERM); @@ -123,16 +119,17 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, if (hfs_freeblks(hfsmp, 0) == 0) return (ENOSPC); + bzero(&cookie, sizeof(cat_cookie_t)); /* Reserve some space in the Catalog file. */ if ((retval = cat_preflight(hfsmp, (2 * CAT_CREATE)+ CAT_RENAME, &cookie, p))) { return (retval); } - /* Lock catalog b-tree */ - retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (retval) { - goto out2; - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + // save off a copy of the current cnid so we can put + // it back if we get errors down below + orig_cnid = cp->c_desc.cd_cnid; /* * If this is a new hardlink then we need to create the data @@ -146,7 +143,12 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, do { /* get a unique indirect node number */ - indnodeno = ((random() & 0x3fffffff) + 100); + if (retval == 0) { + indnodeno = cp->c_fileid; + } else { + indnodeno = cur_link_id++; + } + MAKE_INODE_NAME(inodename, indnodeno); /* move source file to data node directory */ @@ -156,6 +158,11 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, retval = cat_rename(hfsmp, &cp->c_desc, &hfsmp->hfs_privdir_desc, &to_desc, NULL); + if (retval != 0 && retval != EEXIST) { + printf("hfs_makelink: cat_rename to %s failed (%d). fileid %d\n", + inodename, retval, cp->c_fileid); + } + } while (retval == EEXIST); if (retval) goto out; @@ -164,19 +171,16 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, retval = createindirectlink(hfsmp, indnodeno, cp->c_parentcnid, cp->c_desc.cd_nameptr, &cp->c_desc.cd_cnid); if (retval) { - /* put it source file back */ - // XXXdbg - #if 1 - { - int err; - err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - if (err) - panic("hfs_makelink: error %d from cat_rename backout 1", err); - } - #else - (void) cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - #endif - goto out; + /* put it source file back */ + int err; + + // Put this back to what it was before. + cp->c_desc.cd_cnid = orig_cnid; + + err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); + if (err) + panic("hfs_makelink: error %d from cat_rename backout 1", err); + goto out; } cp->c_rdev = indnodeno; } else { @@ -188,22 +192,20 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, */ retval = createindirectlink(hfsmp, indnodeno, dcp->c_fileid, cnp->cn_nameptr, NULL); if (retval && newlink) { - /* Get rid of new link */ - (void) cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); - - /* Put the source file back */ - // XXXdbg - #if 1 - { - int err; - err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - if (err) - panic("hfs_makelink: error %d from cat_rename backout 2", err); - } - #else - (void) cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); - #endif - goto out; + int err; + + /* Get rid of new link */ + (void) cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + + // Put this back to what it was before. + cp->c_desc.cd_cnid = orig_cnid; + + /* Put the source file back */ + err = cat_rename(hfsmp, &to_desc, &dcp->c_desc, &cp->c_desc, NULL); + if (err) + panic("hfs_makelink: error %d from cat_rename backout 2", err); + + goto out; } /* @@ -212,17 +214,41 @@ hfs_makelink(struct hfsmount *hfsmp, struct cnode *cp, struct cnode *dcp, * - mark the cnode as a hard link */ if (newlink) { + vnode_t vp; + + if (retval != 0) { + panic("hfs_makelink: retval %d but newlink = 1!\n", retval); + } + hfsmp->hfs_privdir_attr.ca_entries++; - (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); + retval = cat_update(hfsmp, &hfsmp->hfs_privdir_desc, + &hfsmp->hfs_privdir_attr, NULL, NULL); + if (retval != 0) { + panic("hfs_makelink: cat_update of privdir failed! (%d)\n", + retval); + } hfs_volupdate(hfsmp, VOL_MKFILE, 0); - cp->c_flag |= (C_CHANGE | C_HARDLINK); + cp->c_flag |= C_HARDLINK; + if ((vp = cp->c_vp) != NULLVP) { + if (vnode_get(vp) == 0) { + vnode_set_hard_link(vp); + vnode_put(vp); + } + } + if ((vp = cp->c_rsrc_vp) != NULLVP) { + if (vnode_get(vp) == 0) { + vnode_set_hard_link(vp); + vnode_put(vp); + } + } + cp->c_touch_chgtime = TRUE; + cp->c_flag |= C_FORCEUPDATE; } + dcp->c_flag |= C_FORCEUPDATE; out: - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); -out2: + hfs_systemfile_unlock(hfsmp, lockflags); + cat_postflight(hfsmp, &cookie, p); return (retval); } @@ -233,128 +259,106 @@ out2: #% link vp U U U #% link tdvp L U U # - vop_link { + vnop_link { IN WILLRELE struct vnode *vp; IN struct vnode *targetPar_vp; IN struct componentname *cnp; + IN vfs_context_t context; */ __private_extern__ int -hfs_link(ap) - struct vop_link_args /* { - struct vnode *a_vp; - struct vnode *a_tdvp; - struct componentname *a_cnp; - } */ *ap; +hfs_vnop_link(struct vnop_link_args *ap) { struct hfsmount *hfsmp; struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; struct cnode *cp; struct cnode *tdcp; - struct timeval tv; - int error; + enum vtype v_type; + int error, ret, lockflags; + struct cat_desc cndesc; - hfsmp = VTOHFS(vp); - -#if HFS_DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("hfs_link: no name"); -#endif - if (tdvp->v_mount != vp->v_mount) { - VOP_ABORTOP(tdvp, cnp); - error = EXDEV; - goto out2; - } - if (VTOVCB(tdvp)->vcbSigWord != kHFSPlusSigWord) + if (VTOVCB(tdvp)->vcbSigWord != kHFSPlusSigWord) { return err_link(ap); /* hfs disks don't support hard links */ - - if (hfsmp->hfs_privdir_desc.cd_cnid == 0) + } + if (VTOHFS(vp)->hfs_privdir_desc.cd_cnid == 0) { return err_link(ap); /* no private metadata dir, no links possible */ - - if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { - VOP_ABORTOP(tdvp, cnp); - goto out2; } - cp = VTOC(vp); + if (vnode_mount(tdvp) != vnode_mount(vp)) { + return (EXDEV); + } + if ((error = hfs_lockpair(VTOC(tdvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + return (error); + } tdcp = VTOC(tdvp); + cp = VTOC(vp); + hfsmp = VTOHFS(vp); if (cp->c_nlink >= HFS_LINK_MAX) { - VOP_ABORTOP(tdvp, cnp); error = EMLINK; - goto out1; + goto out; } if (cp->c_flags & (IMMUTABLE | APPEND)) { - VOP_ABORTOP(tdvp, cnp); error = EPERM; - goto out1; + goto out; } - if (vp->v_type == VBLK || vp->v_type == VCHR) { - VOP_ABORTOP(tdvp, cnp); + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto out; + } + + v_type = vnode_vtype(vp); + if (v_type == VBLK || v_type == VCHR) { error = EINVAL; /* cannot link to a special file */ - goto out1; + goto out; } - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - VOP_ABORTOP(tdvp, cnp); - error = EINVAL; /* cannot link to a special file */ - goto out1; - } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; /* cannot link to a special file */ + goto out; } cp->c_nlink++; - cp->c_flag |= C_CHANGE; - tv = time; + cp->c_touch_chgtime = TRUE; - error = VOP_UPDATE(vp, &tv, &tv, 1); - if (!error) { - error = hfs_makelink(hfsmp, cp, tdcp, cnp); - } + error = hfs_makelink(hfsmp, cp, tdcp, cnp); if (error) { cp->c_nlink--; - cp->c_flag |= C_CHANGE; + hfs_volupdate(hfsmp, VOL_UPDATE, 0); } else { + /* Invalidate negative cache entries in the destination directory */ + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + cache_purge_negatives(tdvp); + /* Update the target directory and volume stats */ tdcp->c_nlink++; tdcp->c_entries++; - tdcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(tdvp, &tv, &tv, 0); + tdcp->c_touch_chgtime = TRUE; + tdcp->c_touch_modtime = TRUE; + tdcp->c_flag |= C_FORCEUPDATE; + + error = hfs_update(tdvp, 0); + if (error) { + panic("hfs_vnop_link: error updating tdvp 0x%x\n", tdvp); + } hfs_volupdate(hfsmp, VOL_MKFILE, (tdcp->c_cnid == kHFSRootFolderID)); } - // XXXdbg - need to do this here as well because cp could have changed - (void) VOP_UPDATE(vp, &tv, &tv, 1); - + cp->c_flag |= C_FORCEUPDATE; // otherwise hfs_update() might skip the update - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - - /* free the pathname buffer */ - { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); + if ((ret = hfs_update(vp, TRUE)) != 0) { + panic("hfs_vnop_link: error %d updating vp @ 0x%x\n", ret, vp); } + + hfs_end_transaction(hfsmp); HFS_KNOTE(vp, NOTE_LINK); HFS_KNOTE(tdvp, NOTE_WRITE); - -out1: - if (tdvp != vp) - VOP_UNLOCK(vp, 0, p); -out2: - vput(tdvp); +out: + hfs_unlockpair(tdcp, cp); return (error); } diff --git a/bsd/hfs/hfs_lockf.c b/bsd/hfs/hfs_lockf.c deleted file mode 100644 index 63c1c0cb4..000000000 --- a/bsd/hfs/hfs_lockf.c +++ /dev/null @@ -1,707 +0,0 @@ -/* - * Copyright (c) 1999,2001-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -/* (c) 1997-1998,2001 Apple Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Scooter Morris at Genentech Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)hfs_lockf.c 1.0 - * derived from @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "hfs_cnode.h" -#include "hfs_lockf.h" - -/* - * This variable controls the maximum number of processes that will - * be checked in doing deadlock detection. - */ -int hfsmaxlockdepth = MAXDEPTH; - -#ifdef LOCKF_DEBUG -#include -#include -int lockf_debug = 0; -struct ctldebug debug4 = { "lockf_debug", &lockf_debug }; -#endif - -#define NOLOCKF (struct hfslockf *)0 -#define SELF 0x1 -#define OTHERS 0x2 - -/* - * Set a byte-range lock. - */ -int -hfs_setlock(lock) - register struct hfslockf *lock; -{ - register struct hfslockf *block; - struct filefork *fork = lock->lf_fork; - struct hfslockf **prev, *overlap, *ltmp; - static char lockstr[] = "hfslockf"; - int ovcase, priority, needtolink, error; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - hfs_lprint("hfs_setlock", lock); -#endif /* LOCKF_DEBUG */ - - /* - * Set the priority - */ - priority = PLOCK; - if (lock->lf_type == F_WRLCK) - priority += 4; - priority |= PCATCH; - /* - * Scan lock list for this file looking for locks that would block us. - */ - while ((block = hfs_getblock(lock))) { - /* - * Free the structure and return if nonblocking. - */ - if ((lock->lf_flags & F_WAIT) == 0) { - FREE(lock, M_LOCKF); - return (EAGAIN); - } - /* - * We are blocked. Since flock style locks cover - * the whole file, there is no chance for deadlock. - * For byte-range locks we must check for deadlock. - * - * Deadlock detection is done by looking through the - * wait channels to see if there are any cycles that - * involve us. MAXDEPTH is set just to make sure we - * do not go off into neverland. - */ - if ((lock->lf_flags & F_POSIX) && - (block->lf_flags & F_POSIX)) { - register struct proc *wproc; - register struct hfslockf *waitblock; - int i = 0; - - /* The block is waiting on something */ - wproc = (struct proc *)block->lf_id; - while (wproc->p_wchan && - (wproc->p_wmesg == lockstr) && - (i++ < hfsmaxlockdepth)) { - waitblock = (struct hfslockf *)wproc->p_wchan; - /* Get the owner of the blocking lock */ - waitblock = waitblock->lf_next; - if ((waitblock->lf_flags & F_POSIX) == 0) - break; - wproc = (struct proc *)waitblock->lf_id; - if (wproc == (struct proc *)lock->lf_id) { - _FREE(lock, M_LOCKF); - return (EDEADLK); - } - } - } - /* - * For flock type locks, we must first remove - * any shared locks that we hold before we sleep - * waiting for an exclusive lock. - */ - if ((lock->lf_flags & F_FLOCK) && - lock->lf_type == F_WRLCK) { - lock->lf_type = F_UNLCK; - (void) hfs_clearlock(lock); - lock->lf_type = F_WRLCK; - } - /* - * Add our lock to the blocked list and sleep until we're free. - * Remember who blocked us (for deadlock detection). - */ - lock->lf_next = block; - TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) { - hfs_lprint("hfs_setlock: blocking on", block); - hfs_lprintlist("hfs_setlock", block); - } -#endif /* LOCKF_DEBUG */ - if ((error = tsleep((caddr_t)lock, priority, lockstr, 0))) { - /* - * We may have been awakened by a signal (in - * which case we must remove ourselves from the - * blocked list) and/or by another process - * releasing a lock (in which case we have already - * been removed from the blocked list and our - * lf_next field set to NOLOCKF). - */ - if (lock->lf_next) - TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, - lf_block); - _FREE(lock, M_LOCKF); - return (error); - } - } - /* - * No blocks!! Add the lock. Note that we will - * downgrade or upgrade any overlapping locks this - * process already owns. - * - * Skip over locks owned by other processes. - * Handle any locks that overlap and are owned by ourselves. - */ - prev = &fork->ff_lockf; - block = fork->ff_lockf; - needtolink = 1; - for (;;) { - if ((ovcase = hfs_findoverlap(block, lock, SELF, &prev, &overlap))) - block = overlap->lf_next; - /* - * Six cases: - * 0) no overlap - * 1) overlap == lock - * 2) overlap contains lock - * 3) lock contains overlap - * 4) overlap starts before lock - * 5) overlap ends after lock - */ - switch (ovcase) { - case 0: /* no overlap */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap; - } - break; - - case 1: /* overlap == lock */ - /* - * If downgrading lock, others may be - * able to acquire it. - */ - if (lock->lf_type == F_RDLCK && - overlap->lf_type == F_WRLCK) - hfs_wakelock(overlap); - overlap->lf_type = lock->lf_type; - FREE(lock, M_LOCKF); - lock = overlap; /* for debug output below */ - break; - - case 2: /* overlap contains lock */ - /* - * Check for common starting point and different types. - */ - if (overlap->lf_type == lock->lf_type) { - _FREE(lock, M_LOCKF); - lock = overlap; /* for debug output below */ - break; - } - if (overlap->lf_start == lock->lf_start) { - *prev = lock; - lock->lf_next = overlap; - overlap->lf_start = lock->lf_end + 1; - } else - hfs_split(overlap, lock); - hfs_wakelock(overlap); - break; - - case 3: /* lock contains overlap */ - /* - * If downgrading lock, others may be able to - * acquire it, otherwise take the list. - */ - if (lock->lf_type == F_RDLCK && - overlap->lf_type == F_WRLCK) { - hfs_wakelock(overlap); - } else { - while ((ltmp = overlap->lf_blkhd.tqh_first)) { - TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, - lf_block); - TAILQ_INSERT_TAIL(&lock->lf_blkhd, - ltmp, lf_block); - } - } - /* - * Add the new lock if necessary and delete the overlap. - */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap->lf_next; - prev = &lock->lf_next; - needtolink = 0; - } else - *prev = overlap->lf_next; - _FREE(overlap, M_LOCKF); - continue; - - case 4: /* overlap starts before lock */ - /* - * Add lock after overlap on the list. - */ - lock->lf_next = overlap->lf_next; - overlap->lf_next = lock; - overlap->lf_end = lock->lf_start - 1; - prev = &lock->lf_next; - hfs_wakelock(overlap); - needtolink = 0; - continue; - - case 5: /* overlap ends after lock */ - /* - * Add the new lock before overlap. - */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap; - } - overlap->lf_start = lock->lf_end + 1; - hfs_wakelock(overlap); - break; - } - break; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) { - hfs_lprint("hfs_setlock: got the lock", lock); - hfs_lprintlist("hfs_setlock", lock); - } -#endif /* LOCKF_DEBUG */ - return (0); -} - -/* - * Remove a file fork's byte-range lock. - * - * Generally, find the lock (or an overlap to that lock) - * and remove it (or shrink it), then wakeup anyone we can. - */ -int -hfs_clearlock(unlock) - register struct hfslockf *unlock; -{ - struct filefork *fork = unlock->lf_fork; - register struct hfslockf *lf = fork->ff_lockf; - struct hfslockf *overlap, **prev; - int ovcase; - - if (lf == NOLOCKF) - return (0); -#ifdef LOCKF_DEBUG - if (unlock->lf_type != F_UNLCK) - panic("hfs_clearlock: bad type"); - if (lockf_debug & 1) - hfs_lprint("hfs_clearlock", unlock); -#endif /* LOCKF_DEBUG */ - prev = &fork->ff_lockf; - while ((ovcase = hfs_findoverlap(lf, unlock, SELF, &prev, &overlap))) { - /* - * Wakeup the list of locks to be retried. - */ - hfs_wakelock(overlap); - - switch (ovcase) { - - case 1: /* overlap == lock */ - *prev = overlap->lf_next; - FREE(overlap, M_LOCKF); - break; - - case 2: /* overlap contains lock: split it */ - if (overlap->lf_start == unlock->lf_start) { - overlap->lf_start = unlock->lf_end + 1; - break; - } - hfs_split(overlap, unlock); - overlap->lf_next = unlock->lf_next; - break; - - case 3: /* lock contains overlap */ - *prev = overlap->lf_next; - lf = overlap->lf_next; - _FREE(overlap, M_LOCKF); - continue; - - case 4: /* overlap starts before lock */ - overlap->lf_end = unlock->lf_start - 1; - prev = &overlap->lf_next; - lf = overlap->lf_next; - continue; - - case 5: /* overlap ends after lock */ - overlap->lf_start = unlock->lf_end + 1; - break; - } - break; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - hfs_lprintlist("hfs_clearlock", unlock); -#endif /* LOCKF_DEBUG */ - return (0); -} - -/* - * Check whether there is a blocking lock, - * and if so return its process identifier. - */ -int -hfs_getlock(lock, fl) - register struct hfslockf *lock; - register struct flock *fl; -{ - register struct hfslockf *block; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - hfs_lprint("hfs_getlock", lock); -#endif /* LOCKF_DEBUG */ - - if ((block = hfs_getblock(lock))) { - fl->l_type = block->lf_type; - fl->l_whence = SEEK_SET; - fl->l_start = block->lf_start; - if (block->lf_end == -1) - fl->l_len = 0; - else - fl->l_len = block->lf_end - block->lf_start + 1; - if (block->lf_flags & F_POSIX) - fl->l_pid = ((struct proc *)(block->lf_id))->p_pid; - else - fl->l_pid = -1; - } else { - fl->l_type = F_UNLCK; - } - return (0); -} - -/* - * Walk a file fork's list of locks and - * return the first blocking lock. - */ -struct hfslockf * -hfs_getblock(lock) - register struct hfslockf *lock; -{ - struct hfslockf **prev, *overlap, *lf = lock->lf_fork->ff_lockf; - int ovcase; - - prev = &lock->lf_fork->ff_lockf; - while ((ovcase = hfs_findoverlap(lf, lock, OTHERS, &prev, &overlap))) { - /* - * We've found an overlap, see if it blocks us - */ - if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) - return (overlap); - /* - * Nope, point to the next one on the list and - * see if it blocks us - */ - lf = overlap->lf_next; - } - return (NOLOCKF); -} - -/* - * Walk a file fork's list of locks to - * find an overlapping lock (if any). - * - * NOTE: this returns only the FIRST overlapping lock. There - * may be more than one. - */ -int -hfs_findoverlap(lf, lock, type, prev, overlap) - register struct hfslockf *lf; - struct hfslockf *lock; - int type; - struct hfslockf ***prev; - struct hfslockf **overlap; -{ - off_t start, end; - - *overlap = lf; - if (lf == NOLOCKF) - return (0); -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - hfs_lprint("hfs_findoverlap: looking for overlap in", lock); -#endif /* LOCKF_DEBUG */ - start = lock->lf_start; - end = lock->lf_end; - while (lf != NOLOCKF) { - if (((type & SELF) && lf->lf_id != lock->lf_id) || - ((type & OTHERS) && lf->lf_id == lock->lf_id)) { - *prev = &lf->lf_next; - *overlap = lf = lf->lf_next; - continue; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - hfs_lprint("\tchecking", lf); -#endif /* LOCKF_DEBUG */ - /* - * OK, check for overlap - * - * Six cases: - * 0) no overlap - * 1) overlap == lock - * 2) overlap contains lock - * 3) lock contains overlap - * 4) overlap starts before lock - * 5) overlap ends after lock - */ - if ((lf->lf_end != -1 && start > lf->lf_end) || - (end != -1 && lf->lf_start > end)) { - /* Case 0 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("no overlap\n"); -#endif /* LOCKF_DEBUG */ - if ((type & SELF) && end != -1 && lf->lf_start > end) - return (0); - *prev = &lf->lf_next; - *overlap = lf = lf->lf_next; - continue; - } - if ((lf->lf_start == start) && (lf->lf_end == end)) { - /* Case 1 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap == lock\n"); -#endif /* LOCKF_DEBUG */ - return (1); - } - if ((lf->lf_start <= start) && - (end != -1) && - ((lf->lf_end >= end) || (lf->lf_end == -1))) { - /* Case 2 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap contains lock\n"); -#endif /* LOCKF_DEBUG */ - return (2); - } - if (start <= lf->lf_start && - (end == -1 || - (lf->lf_end != -1 && end >= lf->lf_end))) { - /* Case 3 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("lock contains overlap\n"); -#endif /* LOCKF_DEBUG */ - return (3); - } - if ((lf->lf_start < start) && - ((lf->lf_end >= start) || (lf->lf_end == -1))) { - /* Case 4 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap starts before lock\n"); -#endif /* LOCKF_DEBUG */ - return (4); - } - if ((lf->lf_start > start) && - (end != -1) && - ((lf->lf_end > end) || (lf->lf_end == -1))) { - /* Case 5 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap ends after lock\n"); -#endif /* LOCKF_DEBUG */ - return (5); - } - panic("hfs_findoverlap: default"); - } - return (0); -} - -/* - * Split a lock and a contained region into - * two or three locks as necessary. - */ -void -hfs_split(lock1, lock2) - register struct hfslockf *lock1; - register struct hfslockf *lock2; -{ - register struct hfslockf *splitlock; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) { - hfs_lprint("hfs_split", lock1); - hfs_lprint("splitting from", lock2); - } -#endif /* LOCKF_DEBUG */ - /* - * Check to see if spliting into only two pieces. - */ - if (lock1->lf_start == lock2->lf_start) { - lock1->lf_start = lock2->lf_end + 1; - lock2->lf_next = lock1; - return; - } - if (lock1->lf_end == lock2->lf_end) { - lock1->lf_end = lock2->lf_start - 1; - lock2->lf_next = lock1->lf_next; - lock1->lf_next = lock2; - return; - } - /* - * Make a new lock consisting of the last part of - * the encompassing lock - */ - MALLOC(splitlock, struct hfslockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); - bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock); - splitlock->lf_start = lock2->lf_end + 1; - TAILQ_INIT(&splitlock->lf_blkhd); - lock1->lf_end = lock2->lf_start - 1; - /* - * OK, now link it in - */ - splitlock->lf_next = lock1->lf_next; - lock2->lf_next = splitlock; - lock1->lf_next = lock2; -} - -/* - * Wakeup a blocklist - */ -void -hfs_wakelock(listhead) - struct hfslockf *listhead; -{ - register struct hfslockf *wakelock; - - while ((wakelock = listhead->lf_blkhd.tqh_first)) { - TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); - wakelock->lf_next = NOLOCKF; -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - hfs_lprint("hfs_wakelock: awakening", wakelock); -#endif /* LOCKF_DEBUG */ - wakeup((caddr_t)wakelock); - } -} - -#ifdef LOCKF_DEBUG -/* - * Print out a lock. - */ -hfs_lprint(tag, lock) - char *tag; - register struct hfslockf *lock; -{ - - printf("%s: lock 0x%lx for ", tag, lock); - if (lock->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid); - else - printf("id 0x%x", lock->lf_id); - printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d", - FTOC(lock->lf_fork)->c_fileid, - major(FTOC(lock->lf_fork)->c_dev), - minor(FTOC(lock->lf_fork)->c_dev), - lock->lf_type == F_RDLCK ? "shared" : - lock->lf_type == F_WRLCK ? "exclusive" : - lock->lf_type == F_UNLCK ? "unlock" : - "unknown", lock->lf_start, lock->lf_end); - if (lock->lf_blkhd.tqh_first) - printf(" block 0x%x\n", lock->lf_blkhd.tqh_first); - else - printf("\n"); -} - -hfs_lprintlist(tag, lock) - char *tag; - struct hfslockf *lock; -{ - register struct hfslockf *lf, *blk; - - printf("%s: Lock list for ino %d on dev <%d, %d>:\n", - tag, FTOC(lock->lf_fork)->i_number, - major(FTOC(lock->lf_fork)->c_dev), - minor(FTOC(lock->lf_fork)->c_dev)); - for (lf = lock->lf_fork->ff_lockf; lf; lf = lf->lf_next) { - printf("\tlock 0x%lx for ", lf); - if (lf->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid); - else - printf("id 0x%x", lf->lf_id); - printf(", %s, start %d, end %d", - lf->lf_type == F_RDLCK ? "shared" : - lf->lf_type == F_WRLCK ? "exclusive" : - lf->lf_type == F_UNLCK ? "unlock" : - "unknown", lf->lf_start, lf->lf_end); - for (blk = lf->lf_blkhd.tqh_first; blk; - blk = blk->lf_block.tqe_next) { - printf("\n\t\tlock request 0x%lx for ", blk); - if (blk->lf_flags & F_POSIX) - printf("proc %d", - ((struct proc *)(blk->lf_id))->p_pid); - else - printf("id 0x%x", blk->lf_id); - printf(", %s, start %d, end %d", - blk->lf_type == F_RDLCK ? "shared" : - blk->lf_type == F_WRLCK ? "exclusive" : - blk->lf_type == F_UNLCK ? "unlock" : - "unknown", blk->lf_start, blk->lf_end); - if (blk->lf_blkhd.tqh_first) - panic("hfs_lprintlist: bad list"); - } - printf("\n"); - } -} -#endif /* LOCKF_DEBUG */ diff --git a/bsd/hfs/hfs_lockf.h b/bsd/hfs/hfs_lockf.h deleted file mode 100644 index 0ae8db758..000000000 --- a/bsd/hfs/hfs_lockf.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 1999,2001-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -/* (c) 1997-1998,2001 Apple Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Scooter Morris at Genentech Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * - * @(#)hfs_lockf.h 1.0 5/5/98 - * derived from @(#)lockf.h 8.2 (Berkeley) 10/26/94 - */ - -#ifndef __HFS_LOCKF__ -#define __HFS_LOCKF__ - -#include - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -/* - * The hfslockf structure is a kernel structure which contains the information - * associated with a byte range lock. The hfslockf structures are linked into - * a cnode's file fork. Locks are sorted by the starting byte of the lock for - * efficiency. - */ -TAILQ_HEAD(locklist, hfslockf); - -struct hfslockf { - short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ - short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ - off_t lf_start; /* Byte # of the start of the lock */ - off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ - caddr_t lf_id; /* Id of the resource holding the lock */ - struct filefork *lf_fork; /* Back pointer to the file fork */ - struct hfslockf *lf_next; /* Pointer to the next lock on this fork */ - struct locklist lf_blkhd; /* List of requests blocked on this lock */ - TAILQ_ENTRY(hfslockf) lf_block;/* A request waiting for a lock */ -}; - -/* Maximum length of sleep chains to traverse to try and detect deadlock. */ -#define MAXDEPTH 50 - -__BEGIN_DECLS -void hfs_addblock __P((struct hfslockf *, struct hfslockf *)); -int hfs_clearlock __P((struct hfslockf *)); -int hfs_findoverlap __P((struct hfslockf *, - struct hfslockf *, int, struct hfslockf ***, struct hfslockf **)); -struct hfslockf * - hfs_getblock __P((struct hfslockf *)); -int hfs_getlock __P((struct hfslockf *, struct flock *)); -int hfs_setlock __P((struct hfslockf *)); -void hfs_split __P((struct hfslockf *, struct hfslockf *)); -void hfs_wakelock __P((struct hfslockf *)); -__END_DECLS - -#ifdef LOCKF_DEBUG -extern int lockf_debug; - -__BEGIN_DECLS -void hfs_lprint __P((char *, struct hfslockf *)); -void hfs_lprintlist __P((char *, struct hfslockf *)); -__END_DECLS -#endif /* LOCKF_DEBUG */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ -#endif /* __HFS_LOCKF__ */ diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index d707d1b18..1942c91d0 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,27 +65,27 @@ * * hfs_lookup.c -- code to handle directory traversal on HFS/HFS+ volume */ -#define LEGACY_FORK_NAMES 0 #include -#include #include #include #include -#include #include #include +#include +#include #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" +#define LEGACY_FORK_NAMES 1 static int forkcomponent(struct componentname *cnp, int *rsrcfork); #define _PATH_DATAFORKSPEC "/..namedfork/data" -#ifdef LEGACY_FORK_NAMES +#if LEGACY_FORK_NAMES #define LEGACY_RSRCFORKSPEC "/rsrc" #endif @@ -102,13 +102,6 @@ static int forkcomponent(struct componentname *cnp, int *rsrcfork); * creating, renaming, or deleting a directory entry may be calculated. * Notice that these are the only operations that can affect the directory of the target. * - * If flag has LOCKPARENT or'ed into it and the target of the pathname - * exists, lookup returns both the target and its parent directory locked. - * When creating or renaming and LOCKPARENT is specified, the target may - * not be ".". When deleting and LOCKPARENT is specified, the target may - * be "."., but the caller must check to ensure it does an vrele and vput - * instead of two vputs. - * * LOCKPARENT and WANTPARENT actually refer to the parent of the last item, * so if ISLASTCN is not set, they should be ignored. Also they are mutually exclusive, or * WANTPARENT really implies DONTLOCKPARENT. Either of them set means that the calling @@ -117,10 +110,6 @@ static int forkcomponent(struct componentname *cnp, int *rsrcfork); * Keeping the parent locked as long as possible protects from other processes * looking up the same item, so it has to be locked until the cnode is totally finished * - * This routine is actually used as VOP_CACHEDLOOKUP method, and the - * filesystem employs the generic hfs_cache_lookup() as VOP_LOOKUP - * method. - * * hfs_cache_lookup() performs the following for us: * check that it is a directory * check accessibility of directory @@ -130,7 +119,7 @@ static int forkcomponent(struct componentname *cnp, int *rsrcfork); * drop it * else * return name. - * return VOP_CACHEDLOOKUP() + * return hfs_lookup() * * Overall outline of hfs_lookup: * @@ -147,9 +136,10 @@ static int forkcomponent(struct componentname *cnp, int *rsrcfork); * nor deleting, add name to cache */ + /* - * Lookup *nm in directory *pvp, return it in *a_vpp. - * **a_vpp is held on exit. + * Lookup *cnp in directory *dvp, return it in *vpp. + * **vpp is held on exit. * We create a cnode for the file, but we do NOT open the file here. #% lookup dvp L ? ? @@ -162,65 +152,66 @@ static int forkcomponent(struct componentname *cnp, int *rsrcfork); * When should we lock parent_hp in here ?? */ - -__private_extern__ -int -hfs_lookup(ap) - struct vop_cachedlookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; +static int +hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, vfs_context_t context, int *cnode_locked) { - struct vnode *dvp; /* vnode for directory being searched */ struct cnode *dcp; /* cnode for directory being searched */ struct vnode *tvp; /* target vnode */ struct hfsmount *hfsmp; - struct componentname *cnp; - struct ucred *cred; + kauth_cred_t cred; struct proc *p; int wantrsrc = 0; int forknamelen = 0; int flags; - int wantparent; int nameiop; int retval = 0; int isDot; - struct cat_desc desc = {0}; + struct cat_desc desc; struct cat_desc cndesc; struct cat_attr attr; struct cat_fork fork; - struct vnode **vpp; + int lockflags; - vpp = ap->a_vpp; - cnp = ap->a_cnp; - dvp = ap->a_dvp; dcp = VTOC(dvp); hfsmp = VTOHFS(dvp); *vpp = NULL; + *cnode_locked = 0; isDot = FALSE; tvp = NULL; nameiop = cnp->cn_nameiop; - cred = cnp->cn_cred; - p = cnp->cn_proc; flags = cnp->cn_flags; - wantparent = flags & (LOCKPARENT|WANTPARENT); + bzero(&desc, sizeof(desc)); + + cred = vfs_context_ucred(context); + p = vfs_context_proc(context); /* * First check to see if it is a . or .., else look it up. */ if (flags & ISDOTDOT) { /* Wanting the parent */ + cnp->cn_flags &= ~MAKEENTRY; goto found; /* .. is always defined */ } else if ((cnp->cn_nameptr[0] == '.') && (cnp->cn_namelen == 1)) { isDot = TRUE; + cnp->cn_flags &= ~MAKEENTRY; goto found; /* We always know who we are */ } else { /* Check fork suffix to see if we want the resource fork */ forknamelen = forkcomponent(cnp, &wantrsrc); + + /* Resource fork names are not cached. */ + if (wantrsrc) + cnp->cn_flags &= ~MAKEENTRY; + + if (hfs_lock(dcp, HFS_EXCLUSIVE_LOCK) != 0) { + goto notfound; + } /* No need to go to catalog if there are no children */ - if (dcp->c_entries == 0) + if (dcp->c_entries == 0) { + hfs_unlock(dcp); goto notfound; + } bzero(&cndesc, sizeof(cndesc)); cndesc.cd_nameptr = cnp->cn_nameptr; @@ -228,22 +219,27 @@ hfs_lookup(ap) cndesc.cd_parentcnid = dcp->c_cnid; cndesc.cd_hint = dcp->c_childhint; - /* Lock catalog b-tree */ - retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (retval) - goto exit; - retval = cat_lookup(hfsmp, &cndesc, wantrsrc, &desc, &attr, &fork); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + retval = cat_lookup(hfsmp, &cndesc, wantrsrc, &desc, &attr, &fork, NULL); - if (retval == 0 && S_ISREG(attr.ca_mode) && attr.ca_blocks < fork.cf_blocks) - panic("hfs_lookup: bad ca_blocks (too small)"); - - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); + if (retval == 0) { dcp->c_childhint = desc.cd_hint; + hfs_unlock(dcp); goto found; } + hfs_unlock(dcp); notfound: + /* ENAMETOOLONG supersedes other errors */ + if (((nameiop != CREATE) && (nameiop != RENAME)) && + (retval != ENAMETOOLONG) && + (cnp->cn_namelen > kHFSPlusMaxFileNameChars)) { + retval = ENAMETOOLONG; + } else if (retval == 0) { + retval = ENOENT; + } /* * This is a non-existing entry * @@ -253,34 +249,23 @@ notfound: */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && - (ap->a_cnp->cn_flags & DOWHITEOUT) && - (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN)) { - /* - * Access for write is interpreted as allowing - * creation of files in the directory. - */ - retval = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_proc); - if (retval) { - goto exit; - } - - cnp->cn_flags |= SAVENAME; - if (!(flags & LOCKPARENT)) - VOP_UNLOCK(dvp, 0, p); + (cnp->cn_flags & DOWHITEOUT) && + (cnp->cn_flags & ISWHITEOUT))) && + (flags & ISLASTCN) && + (retval == ENOENT)) { retval = EJUSTRETURN; goto exit; } - /* * Insert name into cache (as non-existent) if appropriate. * * Only done for case-sensitive HFS+ volumes. */ - if ((hfsmp->hfs_flags & HFS_CASE_SENSITIVE) && - (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) - cache_enter(dvp, *vpp, cnp); - retval = ENOENT; + if ((retval == ENOENT) && + (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) && + (cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) { + cache_enter(dvp, NULL, cnp); + } goto exit; } @@ -300,186 +285,57 @@ found: wantrsrc = 0; forknamelen = 0; } - - /* - * If deleting, and at end of pathname, return - * parameters which can be used to remove file. - */ - if (nameiop == DELETE && (flags & ISLASTCN)) { - /* - * Write access to directory required to delete files. - */ - if ((retval = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_proc))) - goto exit; - - if (isDot) { /* Want to return ourselves */ - VREF(dvp); - *vpp = dvp; - goto exit; - } else if (flags & ISDOTDOT) { - retval = hfs_getcnode(hfsmp, dcp->c_parentcnid, - NULL, 0, NULL, NULL, &tvp); - if (retval) - goto exit; - } else { - retval = hfs_getcnode(hfsmp, attr.ca_fileid, - &desc, wantrsrc, &attr, &fork, &tvp); - if (retval) - goto exit; - } - - /* - * If directory is "sticky", then user must own - * the directory, or the file in it, else she - * may not delete it (unless she's root). This - * implements append-only directories. - */ - if ((dcp->c_mode & S_ISTXT) && - (cred->cr_uid != 0) && - (cred->cr_uid != dcp->c_uid) && - (tvp->v_type != VLNK) && - (hfs_owner_rights(hfsmp, VTOC(tvp)->c_uid, cred, p, false))) { - vput(tvp); - retval = EPERM; - goto exit; - } - - /* - * If this is a link node then we need to save the name - * (of the link) so we can delete it from the catalog b-tree. - * In this case, hfs_remove will then free the component name. - * - * DJB - IS THIS STILL NEEDED???? - */ - if (tvp && (VTOC(tvp)->c_flag & C_HARDLINK)) - cnp->cn_flags |= SAVENAME; - - if (!(flags & LOCKPARENT)) - VOP_UNLOCK(dvp, 0, p); - *vpp = tvp; - goto exit; - } - - /* - * If renaming, return the cnode and save the current name. - */ - if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { - if ((retval = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_proc)) != 0) - goto exit; - /* - * Careful about locking second cnode. - */ - if (isDot) { - retval = EISDIR; - goto exit; - } else if (flags & ISDOTDOT) { - retval = hfs_getcnode(hfsmp, dcp->c_parentcnid, - NULL, 0, NULL, NULL, &tvp); - if (retval) - goto exit; - } else { - retval = hfs_getcnode(hfsmp, attr.ca_fileid, - &desc, wantrsrc, &attr, &fork, &tvp); - if (retval) + if (flags & ISLASTCN) { + switch(nameiop) { + case DELETE: + cnp->cn_flags &= ~MAKEENTRY; + break; + + case RENAME: + cnp->cn_flags &= ~MAKEENTRY; + if (isDot) { + retval = EISDIR; goto exit; + } + break; } - cnp->cn_flags |= SAVENAME; - if (!(flags & LOCKPARENT)) - VOP_UNLOCK(dvp, 0, p); - *vpp = tvp; - goto exit; - } + } - /* - * We must get the target cnode before unlocking - * the directory to insure that the cnode will not be removed - * before we get it. We prevent deadlock by always fetching - * cnodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the VFS_VGET for the - * cnode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - */ - if (flags & ISDOTDOT) { - VOP_UNLOCK(dvp, 0, p); /* race to get the cnode */ - retval = hfs_getcnode(hfsmp, dcp->c_parentcnid, - NULL, 0, NULL, NULL, &tvp); - if (retval) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (isDot) { + if ((retval = vnode_get(dvp))) goto exit; - } - if ((flags & LOCKPARENT) && (flags & ISLASTCN) && (dvp != tvp) && - (retval = vn_lock(dvp, LK_EXCLUSIVE, p))) { - vput(tvp); + *vpp = dvp; + } else if (flags & ISDOTDOT) { + if ((retval = hfs_vget(hfsmp, dcp->c_parentcnid, &tvp, 0))) goto exit; - } + *cnode_locked = 1; *vpp = tvp; - } else if (isDot) { - VREF(dvp); /* we want ourself, ie "." */ - *vpp = dvp; } else { int type = (attr.ca_mode & S_IFMT); - if (!(flags & ISLASTCN) && type != S_IFDIR && type != S_IFLNK) { + if (!(flags & ISLASTCN) && (type != S_IFDIR) && (type != S_IFLNK)) { retval = ENOTDIR; goto exit; } - retval = hfs_getcnode(hfsmp, attr.ca_fileid, - &desc, wantrsrc, &attr, &fork, &tvp); - if (retval) - goto exit; + /* Names with composed chars are not cached. */ + if (cnp->cn_namelen != desc.cd_namelen) + cnp->cn_flags &= ~MAKEENTRY; - if (!(flags & LOCKPARENT) || !(flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); - *vpp = tvp; - } + /* Resource fork vnode names include the fork specifier. */ + if (wantrsrc && (flags & ISLASTCN)) + cnp->cn_namelen += forknamelen; - /* - * Insert name in cache if appropriate. - * - "." and ".." are not cached. - * - Resource fork names are not cached. - * - Names with composed chars are not cached. - */ - if ((cnp->cn_flags & MAKEENTRY) - && !isDot - && !(flags & ISDOTDOT) - && !wantrsrc - && (cnp->cn_namelen == VTOC(*vpp)->c_desc.cd_namelen)) { - cache_enter(dvp, *vpp, cnp); - } + retval = hfs_getnewvnode(hfsmp, dvp, cnp, &desc, wantrsrc, &attr, &fork, &tvp); + if (wantrsrc && (flags & ISLASTCN)) + cnp->cn_namelen -= forknamelen; - // - // have to patch up the resource fork name because - // it won't happen properly in the layers above us. - // - if (wantrsrc) { - if (VTOC(*vpp)->c_vp == NULL) { - if (VNAME(*vpp) == NULL) { - VNAME(*vpp) = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); - } - if (VPARENT(*vpp) == NULL) { - vget(dvp, 0, p); - VPARENT(*vpp) = dvp; - } - } else { - if (VNAME(*vpp) == NULL) { - // the +1/-2 thing is to skip the leading "/" on the rsrc fork spec - // and to not count the trailing null byte at the end of the string. - VNAME(*vpp) = add_name(_PATH_RSRCFORKSPEC+1, sizeof(_PATH_RSRCFORKSPEC)-2, 0, 0); - } - if (VPARENT(*vpp) == NULL && *vpp != VTOC(*vpp)->c_vp) { - VPARENT(*vpp) = VTOC(*vpp)->c_vp; - VTOC(*vpp)->c_flag |= C_VPREFHELD; - vget(VTOC(*vpp)->c_vp, 0, p); - } - } + if (retval) + goto exit; + *cnode_locked = 1; + *vpp = tvp; } - exit: cat_releasedesc(&desc); return (retval); @@ -488,8 +344,6 @@ exit: /* - * Based on vn_cache_lookup (which is vfs_cache_lookup in FreeBSD 3.1) - * * Name caching works as follows: * * Names found by directory scans are retained in a cache @@ -511,65 +365,44 @@ exit: __private_extern__ int -hfs_cache_lookup(ap) - struct vop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; +hfs_vnop_lookup(struct vnop_lookup_args *ap) { - struct vnode *dvp; + struct vnode *dvp = ap->a_dvp; struct vnode *vp; struct cnode *cp; struct cnode *dcp; - int lockparent; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; int flags = cnp->cn_flags; - struct proc *p = cnp->cn_proc; - u_long vpid; /* capability number of vnode */ - - dvp = ap->a_dvp; - lockparent = flags & LOCKPARENT; + int cnode_locked; - /* - * Check accessiblity of directory. - */ - if (dvp->v_type != VDIR) - return (ENOTDIR); - if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { - error = EROFS; - goto err_exit; - } + *vpp = NULL; dcp = VTOC(dvp); - if (((dcp->c_mode & S_IXALL) != S_IXALL) && (cnp->cn_cred->cr_uid != 0)) { - if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) { - goto err_exit; - } - } /* * Lookup an entry in the cache - * If the lookup succeeds, the vnode is returned in *vpp, and a status of -1 is - * returned. If the lookup determines that the name does not exist - * (negative cacheing), a status of ENOENT is returned. If the lookup - * fails, a status of zero is returned. + * + * If the lookup succeeds, the vnode is returned in *vpp, + * and a status of -1 is returned. + * + * If the lookup determines that the name does not exist + * (negative cacheing), a status of ENOENT is returned. + * + * If the lookup fails, a status of zero is returned. */ error = cache_lookup(dvp, vpp, cnp); if (error != -1) { - if (error == 0) { /* Unsuccessfull */ - goto lookup; - } - - if (error == ENOENT) { - goto err_exit; - } + if (error == ENOENT) /* found a negative cache entry */ + goto exit; + goto lookup; /* did not find it in the cache */ } - /* We have a name that matched */ + /* + * We have a name that matched + * cache_lookup returns the vp with an iocount reference already taken + */ + error = 0; vp = *vpp; - vpid = vp->v_id; /* * If this is a hard-link vnode then we need to update @@ -578,40 +411,32 @@ hfs_cache_lookup(ap) * getattrlist calls to return the correct link info. */ cp = VTOC(vp); - if ((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK) && - ((cp->c_parentcnid != VTOC(ap->a_dvp)->c_cnid) || - (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0))) { - - struct cat_desc desc; - /* - * Get an updated descriptor - */ - bzero(&desc, sizeof(desc)); - desc.cd_nameptr = cnp->cn_nameptr; - desc.cd_namelen = cnp->cn_namelen; - desc.cd_parentcnid = VTOC(ap->a_dvp)->c_cnid; - desc.cd_hint = VTOC(ap->a_dvp)->c_childhint; - if (cat_lookup(VTOHFS(vp), &desc, 0, &desc, NULL, NULL) == 0) - replace_desc(cp, &desc); - } + if ((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) { + hfs_lock(cp, HFS_FORCE_LOCK); + if ((cp->c_parentcnid != VTOC(dvp)->c_cnid) || + (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) { + struct cat_desc desc; + int lockflags; - if (dvp == vp) { /* lookup on "." */ - VREF(vp); - error = 0; - } else if (flags & ISDOTDOT) { - /* - * Carefull on the locking policy, - * remember we always lock from parent to child, so have - * to release lock on child before trying to lock parent - * then regain lock if needed - */ - VOP_UNLOCK(dvp, 0, p); - error = vget(vp, LK_EXCLUSIVE, p); - if (!error && lockparent && (flags & ISLASTCN)) - error = vn_lock(dvp, LK_EXCLUSIVE, p); - } else { - if ((flags & ISLASTCN) == 0 && vp->v_type == VREG) { + /* + * Get an updated descriptor + */ + bzero(&desc, sizeof(desc)); + desc.cd_nameptr = cnp->cn_nameptr; + desc.cd_namelen = cnp->cn_namelen; + desc.cd_parentcnid = VTOC(dvp)->c_cnid; + desc.cd_hint = VTOC(dvp)->c_childhint; + + lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK); + if (cat_lookup(VTOHFS(vp), &desc, 0, &desc, NULL, NULL, NULL) == 0) + replace_desc(cp, &desc); + hfs_systemfile_unlock(VTOHFS(dvp), lockflags); + } + hfs_unlock(cp); + } + if (dvp != vp && !(flags & ISDOTDOT)) { + if ((flags & ISLASTCN) == 0 && vnode_isreg(vp)) { int wantrsrc = 0; cnp->cn_consume = forkcomponent(cnp, &wantrsrc); @@ -620,70 +445,51 @@ hfs_cache_lookup(ap) /* Fork names are only for lookups */ if (cnp->cn_nameiop != LOOKUP && cnp->cn_nameiop != CREATE) { + vnode_put(vp); error = EPERM; - - goto err_exit; + goto exit; } } - + /* + * Use cnode's rsrcfork vnode if possible. + */ if (wantrsrc) { - /* Use cnode's rsrcfork vnode (if available) */ - if (cp->c_rsrc_vp != NULL) { - *vpp = vp = cp->c_rsrc_vp; - if (VNAME(vp) == NULL) { - // the +1/-2 thing is to skip the leading "/" on the rsrc fork spec - // and to not count the trailing null byte at the end of the string. - VNAME(vp) = add_name(_PATH_RSRCFORKSPEC+1, sizeof(_PATH_RSRCFORKSPEC)-2, 0, 0); - } - if (VPARENT(vp) == NULL) { - vget(cp->c_vp, 0, p); - VPARENT(vp) = cp->c_vp; - } - vpid = vp->v_id; - } else { - goto lookup; + int vid; + + *vpp = NULL; + + if (cp->c_rsrc_vp == NULL) { + vnode_put(vp); + goto lookup; } + vid = vnode_vid(cp->c_rsrc_vp); + + error = vnode_getwithvid(cp->c_rsrc_vp, vid); + if (error) { + vnode_put(vp); + goto lookup; + } + *vpp = cp->c_rsrc_vp; + vnode_put(vp); + vp = *vpp; } } - error = vget(vp, 0, p); - if (error == 0) { - if (VTOC(vp) == NULL || vp->v_data != (void *)cp) { - panic("hfs: cache lookup: my cnode disappeared/went bad! vp 0x%x 0x%x 0x%x\n", - vp, vp->v_data, cp); - } - if (cnp->cn_nameiop == LOOKUP && - (!(flags & ISLASTCN) || (flags & SHAREDLEAF))) - error = lockmgr(&VTOC(vp)->c_lock, LK_SHARED, NULL, p); - else - error = lockmgr(&VTOC(vp)->c_lock, LK_EXCLUSIVE, NULL, p); - } - if (!lockparent || error || !(flags & ISLASTCN)) { - (void) lockmgr(&dcp->c_lock, LK_RELEASE, NULL, p); - } } + return (error); + +lookup: /* - * Check that the capability number did not change - * while we were waiting for the lock. + * The vnode was not in the name cache or it was stale. + * + * So we need to do a real lookup. */ - if (!error) { - if (vpid == vp->v_id) - return (0); - /* - * The above is the NORMAL exit, after this point is an error - * condition. - */ - vput(vp); - if (lockparent && (dvp != vp) && (flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); - } - - if ((error = vn_lock(dvp, LK_EXCLUSIVE, p))) - return (error); -lookup: - return (hfs_lookup(ap)); + cnode_locked = 0; -err_exit: - *vpp = NULL; + error = hfs_lookup(dvp, vpp, cnp, ap->a_context, &cnode_locked); + + if (cnode_locked) + hfs_unlock(VTOC(*vpp)); +exit: return (error); } @@ -714,10 +520,11 @@ forkcomponent(struct componentname *cnp, int *rsrcfork) consume = sizeof(_PATH_DATAFORKSPEC) - 1; } -#ifdef LEGACY_FORK_NAMES +#if LEGACY_FORK_NAMES else if (bcmp(suffix, LEGACY_RSRCFORKSPEC, sizeof(LEGACY_RSRCFORKSPEC)) == 0) { consume = sizeof(LEGACY_RSRCFORKSPEC) - 1; *rsrcfork = 1; + printf("HFS: /rsrc paths are deprecated (%s)\n", cnp->cn_nameptr); } #endif return (consume); diff --git a/bsd/hfs/hfs_macos_defs.h b/bsd/hfs/hfs_macos_defs.h index 33461cc17..56a0f2296 100644 --- a/bsd/hfs/hfs_macos_defs.h +++ b/bsd/hfs/hfs_macos_defs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,7 +37,6 @@ #include #include #include -#include #define TARGET_OS_MAC 0 diff --git a/bsd/hfs/hfs_mount.h b/bsd/hfs/hfs_mount.h index 28ee2fa90..d09334757 100644 --- a/bsd/hfs/hfs_mount.h +++ b/bsd/hfs/hfs_mount.h @@ -44,17 +44,18 @@ #ifdef __APPLE_API_UNSTABLE struct hfs_mount_args { +#ifndef KERNEL char *fspec; /* block special device to mount */ - struct export_args export; /* network export information */ +#endif uid_t hfs_uid; /* uid that owns hfs files (standard HFS only) */ gid_t hfs_gid; /* gid that owns hfs files (standard HFS only) */ mode_t hfs_mask; /* mask to be applied for hfs perms (standard HFS only) */ - u_long hfs_encoding; /* encoding for this volume (standard HFS only) */ + uint32_t hfs_encoding; /* encoding for this volume (standard HFS only) */ struct timezone hfs_timezone; /* user time zone info (standard HFS only) */ - int flags; /* mounting flags, see below */ + int flags; /* mounting flags, see below */ int journal_tbuffer_size; /* size in bytes of the journal transaction buffer */ - int journal_flags; /* flags to pass to journal_open/create */ - int journal_disable; /* don't use journaling (potentially dangerous) */ + int journal_flags; /* flags to pass to journal_open/create */ + int journal_disable; /* don't use journaling (potentially dangerous) */ }; #define HFSFSMNT_NOXONFILES 0x1 /* disable execute permissions for files */ diff --git a/bsd/hfs/hfs_notification.c b/bsd/hfs/hfs_notification.c index cb85a7ea8..60e96c5b1 100644 --- a/bsd/hfs/hfs_notification.c +++ b/bsd/hfs/hfs_notification.c @@ -25,11 +25,9 @@ #include #include #include -#include #include #include #include -#include #include #include @@ -38,7 +36,6 @@ #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" -#include "hfs_lockf.h" #include "hfs_dbg.h" #include "hfs_mount.h" #include "hfs_quota.h" @@ -51,18 +48,22 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); + fsid_t fsid; + + fsid.val[0] = (long)hfsmp->hfs_raw_dev; + fsid.val[1] = (long)vfs_typenum(HFSTOVFS(hfsmp)); if (hfsmp->hfs_notification_conditions & VQ_LOWDISK) { /* Check to see whether the free space is back above the minimal level: */ if (hfs_freeblks(hfsmp, 1) > hfsmp->hfs_freespace_notify_desiredlevel) { hfsmp->hfs_notification_conditions &= ~VQ_LOWDISK; - vfs_event_signal(&HFSTOVFS(hfsmp)->mnt_stat.f_fsid, hfsmp->hfs_notification_conditions, NULL); + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, NULL); } } else { /* Check to see whether the free space fell below the requested limit: */ if (hfs_freeblks(hfsmp, 1) < hfsmp->hfs_freespace_notify_warninglimit) { hfsmp->hfs_notification_conditions |= VQ_LOWDISK; - vfs_event_signal(&HFSTOVFS(hfsmp)->mnt_stat.f_fsid, hfsmp->hfs_notification_conditions, NULL); + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, NULL); } }; } diff --git a/bsd/hfs/hfs_quota.c b/bsd/hfs/hfs_quota.c index cfe328de1..80b01d62c 100644 --- a/bsd/hfs/hfs_quota.c +++ b/bsd/hfs/hfs_quota.c @@ -62,12 +62,14 @@ #include #include #include -#include #include #include #include +#include #include #include +#include +#include #include #include @@ -77,7 +79,9 @@ /* * Quota name to error message mapping. */ +#if 0 static char *quotatypes[] = INITQFNAMES; +#endif /* * Set up the quotas for a cnode. @@ -96,14 +100,14 @@ hfs_getinoquota(cp) int error; vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp; - hfsmp = VFSTOHFS(vp->v_mount); + hfsmp = VTOHFS(vp); /* * Set up the user quota based on file uid. * EINVAL means that quotas are not enabled. */ if (cp->c_dquot[USRQUOTA] == NODQUOT && (error = - dqget(vp, cp->c_uid, &hfsmp->hfs_qfiles[USRQUOTA], USRQUOTA, &cp->c_dquot[USRQUOTA])) && + dqget(cp->c_uid, &hfsmp->hfs_qfiles[USRQUOTA], USRQUOTA, &cp->c_dquot[USRQUOTA])) && error != EINVAL) return (error); /* @@ -112,7 +116,7 @@ hfs_getinoquota(cp) */ if (cp->c_dquot[GRPQUOTA] == NODQUOT && (error = - dqget(vp, cp->c_gid, &hfsmp->hfs_qfiles[GRPQUOTA], GRPQUOTA, &cp->c_dquot[GRPQUOTA])) && + dqget(cp->c_gid, &hfsmp->hfs_qfiles[GRPQUOTA], GRPQUOTA, &cp->c_dquot[GRPQUOTA])) && error != EINVAL) return (error); return (0); @@ -125,7 +129,7 @@ int hfs_chkdq(cp, change, cred, flags) register struct cnode *cp; int64_t change; - struct ucred *cred; + kauth_cred_t cred; int flags; { register struct dquot *dq; @@ -144,10 +148,8 @@ hfs_chkdq(cp, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + ncurbytes = dq->dq_curbytes + change; if (ncurbytes >= 0) dq->dq_curbytes = ncurbytes; @@ -155,13 +157,15 @@ hfs_chkdq(cp, change, cred, flags) dq->dq_curbytes = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } p = current_proc(); if (cred == NOCRED) - cred = kernproc->p_ucred; - if ((cred->cr_uid != 0) || (p->p_flag & P_FORCEQUOTA)) { + cred = proc_ucred(kernproc); + if (suser(cred, NULL) || proc_forcequota(p)) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; @@ -175,12 +179,12 @@ hfs_chkdq(cp, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + dq->dq_curbytes += change; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } } return (error); @@ -194,26 +198,30 @@ int hfs_chkdqchg(cp, change, cred, type) struct cnode *cp; int64_t change; - struct ucred *cred; + kauth_cred_t cred; int type; { register struct dquot *dq = cp->c_dquot[type]; - u_int64_t ncurbytes = dq->dq_curbytes + change; + u_int64_t ncurbytes; struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp; - + + dqlock(dq); + + ncurbytes = dq->dq_curbytes + change; /* * If user would exceed their hard limit, disallow space allocation. */ if (ncurbytes >= dq->dq_bhardlimit && dq->dq_bhardlimit) { if ((dq->dq_flags & DQ_BLKS) == 0 && - cp->c_uid == cred->cr_uid) { + cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\n%s: write failed, %s disk limit reached\n", - vp->v_mount->mnt_stat.f_mntonname, + printf("\nwrite failed, %s disk limit reached\n", quotatypes[type]); #endif dq->dq_flags |= DQ_BLKS; } + dqunlock(dq); + return (EDQUOT); } /* @@ -221,31 +229,38 @@ hfs_chkdqchg(cp, change, cred, type) * allocation. Reset time limit as they cross their soft limit. */ if (ncurbytes >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { + struct timeval tv; + + microuptime(&tv); if (dq->dq_curbytes < dq->dq_bsoftlimit) { - dq->dq_btime = time.tv_sec + - VFSTOHFS(vp->v_mount)->hfs_qfiles[type].qf_btime; + dq->dq_btime = tv.tv_sec + + VTOHFS(vp)->hfs_qfiles[type].qf_btime; #if 0 - if (cp->c_uid == cred->cr_uid) - printf("\n%s: warning, %s %s\n", - vp->v_mount->mnt_stat.f_mntonname, + if (cp->c_uid == kauth_cred_getuid(cred)) + printf("\nwarning, %s %s\n", quotatypes[type], "disk quota exceeded"); #endif + dqunlock(dq); + return (0); } - if (time.tv_sec > dq->dq_btime) { + if (tv.tv_sec > dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && - cp->c_uid == cred->cr_uid) { + cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\n%s: write failed, %s %s\n", - vp->v_mount->mnt_stat.f_mntonname, + printf("\nwrite failed, %s %s\n", quotatypes[type], "disk quota exceeded for too long"); #endif dq->dq_flags |= DQ_BLKS; } + dqunlock(dq); + return (EDQUOT); } } + dqunlock(dq); + return (0); } @@ -256,7 +271,7 @@ int hfs_chkiq(cp, change, cred, flags) register struct cnode *cp; long change; - struct ucred *cred; + kauth_cred_t cred; int flags; { register struct dquot *dq; @@ -274,10 +289,8 @@ hfs_chkiq(cp, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + ncurinodes = dq->dq_curinodes + change; if (ncurinodes >= 0) dq->dq_curinodes = ncurinodes; @@ -285,13 +298,15 @@ hfs_chkiq(cp, change, cred, flags) dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } p = current_proc(); if (cred == NOCRED) - cred = kernproc->p_ucred; - if ((cred->cr_uid != 0) || (p->p_flag & P_FORCEQUOTA)) { + cred = proc_ucred(kernproc); + if (suser(cred, NULL) || proc_forcequota(p)) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; @@ -305,12 +320,12 @@ hfs_chkiq(cp, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = cp->c_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + dq->dq_curinodes += change; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } } return (error); @@ -324,26 +339,30 @@ int hfs_chkiqchg(cp, change, cred, type) struct cnode *cp; long change; - struct ucred *cred; + kauth_cred_t cred; int type; { register struct dquot *dq = cp->c_dquot[type]; - long ncurinodes = dq->dq_curinodes + change; + long ncurinodes; struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp; + dqlock(dq); + + ncurinodes = dq->dq_curinodes + change; /* * If user would exceed their hard limit, disallow cnode allocation. */ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { if ((dq->dq_flags & DQ_INODS) == 0 && - cp->c_uid == cred->cr_uid) { + cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\n%s: write failed, %s cnode limit reached\n", - vp->v_mount->mnt_stat.f_mntonname, + printf("\nwrite failed, %s cnode limit reached\n", quotatypes[type]); #endif dq->dq_flags |= DQ_INODS; } + dqunlock(dq); + return (EDQUOT); } /* @@ -351,31 +370,38 @@ hfs_chkiqchg(cp, change, cred, type) * allocation. Reset time limit as they cross their soft limit. */ if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { + struct timeval tv; + + microuptime(&tv); if (dq->dq_curinodes < dq->dq_isoftlimit) { - dq->dq_itime = time.tv_sec + - VFSTOHFS(vp->v_mount)->hfs_qfiles[type].qf_itime; + dq->dq_itime = tv.tv_sec + + VTOHFS(vp)->hfs_qfiles[type].qf_itime; #if 0 - if (cp->c_uid == cred->cr_uid) - printf("\n%s: warning, %s %s\n", - vp->v_mount->mnt_stat.f_mntonname, + if (cp->c_uid == kauth_cred_getuid(cred)) + printf("\nwarning, %s %s\n", quotatypes[type], "cnode quota exceeded"); #endif + dqunlock(dq); + return (0); } - if (time.tv_sec > dq->dq_itime) { + if (tv.tv_sec > dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && - cp->c_uid == cred->cr_uid) { + cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\n%s: write failed, %s %s\n", - vp->v_mount->mnt_stat.f_mntonname, + printf("\nwrite failed, %s %s\n", quotatypes[type], "cnode quota exceeded for too long"); #endif dq->dq_flags |= DQ_INODS; } + dqunlock(dq); + return (EDQUOT); } } + dqunlock(dq); + return (0); } @@ -389,12 +415,11 @@ hfs_chkdquot(cp) register struct cnode *cp; { struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp; - struct hfsmount *hfsmp = VFSTOHFS(vp->v_mount); + struct hfsmount *hfsmp = VTOHFS(vp); register int i; for (i = 0; i < MAXQUOTAS; i++) { - if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP || - (hfsmp->hfs_qfiles[i].qf_qflags & (QTF_OPENING|QTF_CLOSING))) + if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP) continue; if (cp->c_dquot[i] == NODQUOT) { vprint("chkdquot: missing dquot", vp); @@ -411,132 +436,181 @@ hfs_chkdquot(cp) /* * Q_QUOTAON - set up a quota file for a particular file system. */ +struct hfs_quotaon_cargs { + int error; +}; + +static int +hfs_quotaon_callback(struct vnode *vp, void *cargs) +{ + struct hfs_quotaon_cargs *args; + + args = (struct hfs_quotaon_cargs *)cargs; + + args->error = hfs_getinoquota(VTOC(vp)); + if (args->error) + return (VNODE_RETURNED_DONE); + + return (VNODE_RETURNED); +} + int -hfs_quotaon(p, mp, type, fname, segflg) +hfs_quotaon(p, mp, type, fnamep) struct proc *p; struct mount *mp; register int type; - caddr_t fname; - enum uio_seg segflg; + caddr_t fnamep; { struct hfsmount *hfsmp = VFSTOHFS(mp); - struct vnode *vp, **vpp; - struct vnode *nextvp; - struct dquot *dq; - int error; - struct nameidata nd; + struct quotafile *qfp; + struct vnode *vp; + int error = 0; + struct hfs_quotaon_cargs args; - vpp = &hfsmp->hfs_qfiles[type].qf_vp; - NDINIT(&nd, LOOKUP, FOLLOW, segflg, fname, p); - if (error = vn_open(&nd, FREAD|FWRITE, 0)) - return (error); - vp = nd.ni_vp; - VOP_UNLOCK(vp, 0, p); - if (vp->v_type != VREG) { - (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); - return (EACCES); + qfp = &hfsmp->hfs_qfiles[type]; + + if ( (qf_get(qfp, QTF_OPENING)) ) + return (0); + + error = vnode_open(fnamep, FREAD|FWRITE, 0, 0, &vp, NULL); + if (error) { + goto out; } - if (*vpp != vp) - hfs_quotaoff(p, mp, type); - hfsmp->hfs_qfiles[type].qf_qflags |= QTF_OPENING; - mp->mnt_flag |= MNT_QUOTA; - vp->v_flag |= VNOFLUSH; - *vpp = vp; + if (!vnode_isreg(vp)) { + (void) vnode_close(vp, FREAD|FWRITE, NULL); + error = EACCES; + goto out; + } + vfs_setflags(mp, (uint64_t)((unsigned int)MNT_QUOTA)); + vnode_setnoflush(vp); /* * Save the credential of the process that turned on quotas. */ - crhold(p->p_ucred); - hfsmp->hfs_qfiles[type].qf_cred = p->p_ucred; - /* Finish initializing the quota file */ - if (error = dqfileopen(&hfsmp->hfs_qfiles[type], type)) - goto exit; + qfp->qf_cred = kauth_cred_proc_ref(p); + qfp->qf_vp = vp; + /* + * Finish initializing the quota file + */ + error = dqfileopen(qfp, type); + if (error) { + (void) vnode_close(vp, FREAD|FWRITE, NULL); + + kauth_cred_rele(qfp->qf_cred); + qfp->qf_cred = NOCRED; + qfp->qf_vp = NULLVP; + goto out; + } + qf_put(qfp, QTF_OPENING); + /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for cnodes being modified. + * + * hfs_quota_callback will be called for each vnode open for + * 'write' (VNODE_WRITEABLE) hung off of this mount point + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback */ -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) - continue; - if (vget(vp, LK_EXCLUSIVE, p)) - goto again; - if (error = hfs_getinoquota(VTOC(vp))) { - vput(vp); - break; - } - vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } -exit: - hfsmp->hfs_qfiles[type].qf_qflags &= ~QTF_OPENING; - if (error) + args.error = 0; + + vnode_iterate(mp, VNODE_WRITEABLE | VNODE_WAIT, hfs_quotaon_callback, (void *)&args); + + error = args.error; + + if (error) { hfs_quotaoff(p, mp, type); + } + return (error); + +out: + qf_put(qfp, QTF_OPENING); + return (error); } + /* * Q_QUOTAOFF - turn off disk quotas for a filesystem. */ +struct hfs_quotaoff_cargs { + int type; +}; + +static int +hfs_quotaoff_callback(struct vnode *vp, void *cargs) +{ + struct hfs_quotaoff_cargs *args; + struct cnode *cp; + struct dquot *dq; + + args = (struct hfs_quotaoff_cargs *)cargs; + + cp = VTOC(vp); + + dq = cp->c_dquot[args->type]; + cp->c_dquot[args->type] = NODQUOT; + + dqrele(dq); + + return (VNODE_RETURNED); +} + int -hfs_quotaoff(p, mp, type) - struct proc *p; - struct mount *mp; - register int type; +hfs_quotaoff(__unused struct proc *p, struct mount *mp, register int type) { - struct vnode *vp; - struct vnode *qvp, *nextvp; + struct vnode *qvp; struct hfsmount *hfsmp = VFSTOHFS(mp); - struct dquot *dq; - struct cnode *cp; + struct quotafile *qfp; int error; - struct ucred *cred; + kauth_cred_t cred; + struct hfs_quotaoff_cargs args; + + qfp = &hfsmp->hfs_qfiles[type]; - if ((qvp = hfsmp->hfs_qfiles[type].qf_vp) == NULLVP) - return (0); - hfsmp->hfs_qfiles[type].qf_qflags |= QTF_CLOSING; + if ( (qf_get(qfp, QTF_CLOSING)) ) + return (0); + qvp = qfp->qf_vp; /* * Sync out any orpaned dirty dquot entries. */ - dqsync_orphans(&hfsmp->hfs_qfiles[type]); + dqsync_orphans(qfp); /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. + * + * hfs_quotaoff_callback will be called for each vnode + * hung off of this mount point + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback */ -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (vget(vp, LK_EXCLUSIVE, p)) - goto again; - cp = VTOC(vp); - dq = cp->c_dquot[type]; - cp->c_dquot[type] = NODQUOT; - dqrele(vp, dq); - vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } + args.type = type; + + vnode_iterate(mp, VNODE_WAIT, hfs_quotaoff_callback, (void *)&args); + dqflush(qvp); /* Finish tearing down the quota file */ - dqfileclose(&hfsmp->hfs_qfiles[type], type); - qvp->v_flag &= ~VNOFLUSH; - error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p); - hfsmp->hfs_qfiles[type].qf_vp = NULLVP; - cred = hfsmp->hfs_qfiles[type].qf_cred; + dqfileclose(qfp, type); + + vnode_clearnoflush(qvp); + error = vnode_close(qvp, FREAD|FWRITE, NULL); + + qfp->qf_vp = NULLVP; + cred = qfp->qf_cred; if (cred != NOCRED) { - hfsmp->hfs_qfiles[type].qf_cred = NOCRED; - crfree(cred); + qfp->qf_cred = NOCRED; + kauth_cred_rele(cred); } - hfsmp->hfs_qfiles[type].qf_qflags &= ~QTF_CLOSING; for (type = 0; type < MAXQUOTAS; type++) if (hfsmp->hfs_qfiles[type].qf_vp != NULLVP) break; if (type == MAXQUOTAS) - mp->mnt_flag &= ~MNT_QUOTA; + vfs_clearflags(mp, (uint64_t)((unsigned int)MNT_QUOTA)); + + qf_put(qfp, QTF_CLOSING); + return (error); } @@ -544,19 +618,25 @@ again: * Q_GETQUOTA - return current values in a dqblk structure. */ int -hfs_getquota(mp, id, type, addr) +hfs_getquota(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { struct dquot *dq; int error; - if (error = dqget(NULLVP, id, &VFSTOHFS(mp)->hfs_qfiles[type], type, &dq)) + error = dqget(id, &VFSTOHFS(mp)->hfs_qfiles[type], type, &dq); + if (error) return (error); - error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk)); - dqrele(NULLVP, dq); + dqlock(dq); + + bcopy(&dq->dq_dqb, datap, sizeof(dq->dq_dqb)); + + dqunlock(dq); + dqrele(dq); + return (error); } @@ -564,47 +644,47 @@ hfs_getquota(mp, id, type, addr) * Q_SETQUOTA - assign an entire dqblk structure. */ int -hfs_setquota(mp, id, type, addr) +hfs_setquota(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { - register struct dquot *dq; - struct dquot *ndq; + struct dquot *dq; struct hfsmount *hfsmp = VFSTOHFS(mp); - struct dqblk newlim; + struct dqblk * newlimp = (struct dqblk *) datap; + struct timeval tv; int error; - if (error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk))) - return (error); - if (error = dqget(NULLVP, id, &hfsmp->hfs_qfiles[type], type, &ndq)) + error = dqget(id, &hfsmp->hfs_qfiles[type], type, &dq); + if (error) return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were * under it, but now have a soft limit and are over it. */ - newlim.dqb_curbytes = dq->dq_curbytes; - newlim.dqb_curinodes = dq->dq_curinodes; + newlimp->dqb_curbytes = dq->dq_curbytes; + newlimp->dqb_curinodes = dq->dq_curinodes; if (dq->dq_id != 0) { - newlim.dqb_btime = dq->dq_btime; - newlim.dqb_itime = dq->dq_itime; + newlimp->dqb_btime = dq->dq_btime; + newlimp->dqb_itime = dq->dq_itime; } - if (newlim.dqb_bsoftlimit && - dq->dq_curbytes >= newlim.dqb_bsoftlimit && - (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) - newlim.dqb_btime = time.tv_sec + hfsmp->hfs_qfiles[type].qf_btime; - if (newlim.dqb_isoftlimit && - dq->dq_curinodes >= newlim.dqb_isoftlimit && - (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) - newlim.dqb_itime = time.tv_sec + hfsmp->hfs_qfiles[type].qf_itime; - dq->dq_dqb = newlim; + if (newlimp->dqb_bsoftlimit && + dq->dq_curbytes >= newlimp->dqb_bsoftlimit && + (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) { + microuptime(&tv); + newlimp->dqb_btime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_btime; + } + if (newlimp->dqb_isoftlimit && + dq->dq_curinodes >= newlimp->dqb_isoftlimit && + (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) { + microuptime(&tv); + newlimp->dqb_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime; + } + bcopy(newlimp, &dq->dq_dqb, sizeof(dq->dq_dqb)); if (dq->dq_curbytes < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) @@ -615,7 +695,10 @@ hfs_setquota(mp, id, type, addr) else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); + + dqunlock(dq); + dqrele(dq); + return (0); } @@ -623,60 +706,78 @@ hfs_setquota(mp, id, type, addr) * Q_SETUSE - set current cnode and byte usage. */ int -hfs_setuse(mp, id, type, addr) +hfs_setuse(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { - register struct dquot *dq; struct hfsmount *hfsmp = VFSTOHFS(mp); - struct dquot *ndq; - struct dqblk usage; + struct dquot *dq; + struct timeval tv; int error; + struct dqblk *quotablkp = (struct dqblk *) datap; + + error = dqget(id, &hfsmp->hfs_qfiles[type], type, &dq); + if (error) + return (error); + dqlock(dq); - if (error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk))) - return (error); - if (error = dqget(NULLVP, id, &hfsmp->hfs_qfiles[type], type, &ndq)) - return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dq->dq_bsoftlimit && dq->dq_curbytes < dq->dq_bsoftlimit && - usage.dqb_curbytes >= dq->dq_bsoftlimit) - dq->dq_btime = time.tv_sec + hfsmp->hfs_qfiles[type].qf_btime; + quotablkp->dqb_curbytes >= dq->dq_bsoftlimit) { + microuptime(&tv); + dq->dq_btime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_btime; + } if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && - usage.dqb_curinodes >= dq->dq_isoftlimit) - dq->dq_itime = time.tv_sec + hfsmp->hfs_qfiles[type].qf_itime; - dq->dq_curbytes = usage.dqb_curbytes; - dq->dq_curinodes = usage.dqb_curinodes; + quotablkp->dqb_curinodes >= dq->dq_isoftlimit) { + microuptime(&tv); + dq->dq_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime; + } + dq->dq_curbytes = quotablkp->dqb_curbytes; + dq->dq_curinodes = quotablkp->dqb_curinodes; if (dq->dq_curbytes < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); + + dqunlock(dq); + dqrele(dq); + return (0); } + /* * Q_SYNC - sync quota files to disk. */ +static int +hfs_qsync_callback(struct vnode *vp, __unused void *cargs) +{ + struct cnode *cp; + struct dquot *dq; + int i; + + cp = VTOC(vp); + + for (i = 0; i < MAXQUOTAS; i++) { + dq = cp->c_dquot[i]; + if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) + dqsync(dq); + } + return (VNODE_RETURNED); +} + int hfs_qsync(mp) struct mount *mp; { struct hfsmount *hfsmp = VFSTOHFS(mp); - struct proc *p = current_proc(); /* XXX */ - struct vnode *vp, *nextvp; - struct dquot *dq; - int i, error; + int i; /* * Check if the mount point has any quotas. @@ -698,44 +799,14 @@ hfs_qsync(mp) /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. + * + * hfs_qsync_callback will be called for each vnode + * hung off of this mount point + * the vnode will be + * properly referenced and unreferenced around the callback */ - simple_lock(&mntvnode_slock); -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - if (vp->v_mount != mp) - goto again; - nextvp = vp->v_mntvnodes.le_next; - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) - goto again; - continue; - } - - /* Make sure that this is really an hfs vnode. */ - if (vp->v_mount != mp || - vp->v_type == VNON || - vp->v_tag != VT_HFS || - VTOC(vp) == NULL) { - vput(vp); - simple_lock(&mntvnode_slock); - goto again; - } + vnode_iterate(mp, 0, hfs_qsync_callback, (void *)NULL); - for (i = 0; i < MAXQUOTAS; i++) { - dq = VTOC(vp)->c_dquot[i]; - if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) - dqsync(vp, dq); - } - vput(vp); - simple_lock(&mntvnode_slock); - if (vp->v_mntvnodes.le_next != nextvp) - goto again; - } - simple_unlock(&mntvnode_slock); return (0); } @@ -743,21 +814,21 @@ again: * Q_QUOTASTAT - get quota on/off status */ int -hfs_quotastat(mp, type, addr) +hfs_quotastat(mp, type, datap) struct mount *mp; register int type; - caddr_t addr; + caddr_t datap; { struct hfsmount *hfsmp = VFSTOHFS(mp); int error = 0; int qstat; - if ((mp->mnt_flag & MNT_QUOTA) && (hfsmp->hfs_qfiles[type].qf_vp != NULLVP)) + if ((((unsigned int)vfs_flags(mp)) & MNT_QUOTA) && (hfsmp->hfs_qfiles[type].qf_vp != NULLVP)) qstat = 1; /* quotas are on for this type */ else qstat = 0; /* quotas are off for this type */ - error = copyout ((caddr_t)&qstat, addr, sizeof(qstat)); + *((int *)datap) = qstat; return (error); } diff --git a/bsd/hfs/hfs_quota.h b/bsd/hfs/hfs_quota.h index 2b4ded302..bde8fc5cd 100644 --- a/bsd/hfs/hfs_quota.h +++ b/bsd/hfs/hfs_quota.h @@ -74,23 +74,23 @@ struct mount; struct proc; struct ucred; __BEGIN_DECLS -int hfs_chkdq __P((struct cnode *, int64_t, struct ucred *, int)); -int hfs_chkdqchg __P((struct cnode *, int64_t, struct ucred *, int)); -int hfs_chkiq __P((struct cnode *, long, struct ucred *, int)); -int hfs_chkiqchg __P((struct cnode *, long, struct ucred *, int)); -int hfs_getinoquota __P((struct cnode *)); -int hfs_getquota __P((struct mount *, u_long, int, caddr_t)); -int hfs_qsync __P((struct mount *mp)); -int hfs_quotaoff __P((struct proc *, struct mount *, int)); -int hfs_quotaon __P((struct proc *, struct mount *, int, caddr_t, enum uio_seg)); -int hfs_setquota __P((struct mount *, u_long, int, caddr_t)); -int hfs_setuse __P((struct mount *, u_long, int, caddr_t)); -int hfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); +int hfs_chkdq(struct cnode *, int64_t, struct ucred *, int); +int hfs_chkdqchg(struct cnode *, int64_t, struct ucred *, int); +int hfs_chkiq(struct cnode *, long, struct ucred *, int); +int hfs_chkiqchg(struct cnode *, long, struct ucred *, int); +int hfs_getinoquota(struct cnode *); +int hfs_getquota(struct mount *, u_long, int, caddr_t); +int hfs_qsync(struct mount *mp); +int hfs_quotaoff(struct proc *, struct mount *, int); +int hfs_quotaon(struct proc *, struct mount *, int, caddr_t); +int hfs_quotastat(struct mount *, int, caddr_t); +int hfs_setquota(struct mount *, u_long, int, caddr_t); +int hfs_setuse(struct mount *, u_long, int, caddr_t); __END_DECLS #if DIAGNOSTIC __BEGIN_DECLS -void hfs_chkdquot __P((struct cnode *)); +void hfs_chkdquot(struct cnode *); __END_DECLS #endif #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index d49ca795c..3a54712da 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,18 +36,22 @@ #include #include #include +#include #include #include +#include #include #include #include +#include #include #include "hfs.h" #include "hfs_endian.h" +#include "hfs_fsctl.h" #include "hfs_quota.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" @@ -64,204 +68,186 @@ enum { extern u_int32_t GetLogicalBlockSize(struct vnode *vp); -static int hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *); -static int hfs_clonefile(struct vnode *, int, int, int, struct ucred *, struct proc *); -static int hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *); +extern int hfs_setextendedsecurity(struct hfsmount *, int); + + +static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); +static int hfs_clonefile(struct vnode *, int, int, int); +static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); /***************************************************************************** * -* Operations on vnodes +* I/O Operations on vnodes * *****************************************************************************/ +int hfs_vnop_read(struct vnop_read_args *); +int hfs_vnop_write(struct vnop_write_args *); +int hfs_vnop_ioctl(struct vnop_ioctl_args *); +int hfs_vnop_select(struct vnop_select_args *); +int hfs_vnop_blktooff(struct vnop_blktooff_args *); +int hfs_vnop_offtoblk(struct vnop_offtoblk_args *); +int hfs_vnop_blockmap(struct vnop_blockmap_args *); +int hfs_vnop_strategy(struct vnop_strategy_args *); +int hfs_vnop_allocate(struct vnop_allocate_args *); +int hfs_vnop_pagein(struct vnop_pagein_args *); +int hfs_vnop_pageout(struct vnop_pageout_args *); +int hfs_vnop_bwrite(struct vnop_bwrite_args *); -/* -#% read vp L L L -# - vop_read { - IN struct vnode *vp; - INOUT struct uio *uio; - IN int ioflag; - IN struct ucred *cred; - - */ +/* + * Read data from a file. + */ int -hfs_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +hfs_vnop_read(struct vnop_read_args *ap) { - register struct uio *uio = ap->a_uio; - register struct vnode *vp = ap->a_vp; + uio_t uio = ap->a_uio; + struct vnode *vp = ap->a_vp; struct cnode *cp; struct filefork *fp; - int devBlockSize = 0; + struct hfsmount *hfsmp; + off_t filesize; + off_t filebytes; + off_t start_resid = uio_resid(uio); + off_t offset = uio_offset(uio); int retval = 0; - off_t filesize; - off_t filebytes; - off_t start_resid = uio->uio_resid; /* Preflight checks */ - if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) - return (EPERM); /* can only read regular files */ - if (uio->uio_resid == 0) + if (!vnode_isreg(vp)) { + /* can only read regular files */ + if (vnode_isdir(vp)) + return (EISDIR); + else + return (EPERM); + } + if (start_resid == 0) return (0); /* Nothing left to do */ - if (uio->uio_offset < 0) + if (offset < 0) return (EINVAL); /* cant read from a negative offset */ cp = VTOC(vp); fp = VTOF(vp); + hfsmp = VTOHFS(vp); + + /* Protect against a size change. */ + hfs_lock_truncate(cp, 0); + filesize = fp->ff_size; - filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize; - if (uio->uio_offset > filesize) { - if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE)) - return (EFBIG); - else - return (0); + filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; + if (offset > filesize) { + if ((hfsmp->hfs_flags & HFS_STANDARD) && + (offset > (off_t)MAXHFSFILESIZE)) { + retval = EFBIG; + } + goto exit; } - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); + (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); - retval = cluster_read(vp, uio, filesize, devBlockSize, 0); + retval = cluster_read(vp, uio, filesize, 0); - cp->c_flag |= C_ACCESS; + cp->c_touch_acctime = TRUE; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); + (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); /* * Keep track blocks read */ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) { + int took_cnode_lock = 0; + off_t bytesread; + + bytesread = start_resid - uio_resid(uio); + + /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ + if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { + hfs_lock(cp, HFS_FORCE_LOCK); + took_cnode_lock = 1; + } /* * If this file hasn't been seen since the start of * the current sampling period then start over. */ if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { - fp->ff_bytesread = start_resid - uio->uio_resid; - cp->c_atime = time.tv_sec; + struct timeval tv; + + fp->ff_bytesread = bytesread; + microtime(&tv); + cp->c_atime = tv.tv_sec; } else { - fp->ff_bytesread += start_resid - uio->uio_resid; + fp->ff_bytesread += bytesread; } + if (took_cnode_lock) + hfs_unlock(cp); } - +exit: + hfs_unlock_truncate(cp); return (retval); } /* - * Write data to a file or directory. -#% write vp L L L -# - vop_write { - IN struct vnode *vp; - INOUT struct uio *uio; - IN int ioflag; - IN struct ucred *cred; - - */ + * Write data to a file. + */ int -hfs_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +hfs_vnop_write(struct vnop_write_args *ap) { + uio_t uio = ap->a_uio; struct vnode *vp = ap->a_vp; - struct uio *uio = ap->a_uio; struct cnode *cp; struct filefork *fp; - struct proc *p; - struct timeval tv; - ExtendedVCB *vcb; - int devBlockSize = 0; - off_t origFileSize, writelimit, bytesToAdd; + struct hfsmount *hfsmp; + kauth_cred_t cred = NULL; + off_t origFileSize; + off_t writelimit; + off_t bytesToAdd; off_t actualBytesAdded; - u_long resid; - int eflags, ioflag; - int retval; off_t filebytes; - struct hfsmount *hfsmp; - int started_tr = 0, grabbed_lock = 0; + off_t offset; + size_t resid; + int eflags; + int ioflag = ap->a_ioflag; + int retval = 0; + int lockflags; + int cnode_locked = 0; + // LP64todo - fix this! uio_resid may be 64-bit value + resid = uio_resid(uio); + offset = uio_offset(uio); - if (uio->uio_offset < 0) + if (offset < 0) return (EINVAL); - if (uio->uio_resid == 0) + if (resid == 0) return (E_NONE); - if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) - return (EPERM); /* Can only write regular files */ + if (!vnode_isreg(vp)) + return (EPERM); /* Can only write regular files */ + + /* Protect against a size change. */ + hfs_lock_truncate(VTOC(vp), TRUE); - ioflag = ap->a_ioflag; + if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(VTOC(vp)); + return (retval); + } + cnode_locked = 1; cp = VTOC(vp); fp = VTOF(vp); - vcb = VTOVCB(vp); - filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; - - if (ioflag & IO_APPEND) - uio->uio_offset = fp->ff_size; - if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size) - return (EPERM); - - // XXXdbg - don't allow modification of the journal or journal_info_block - if (VTOHFS(vp)->jnl && cp->c_datafork) { - struct HFSPlusExtentDescriptor *extd; + hfsmp = VTOHFS(vp); + filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; - extd = &cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { - return EPERM; - } + if (ioflag & IO_APPEND) { + uio_setoffset(uio, fp->ff_size); + offset = fp->ff_size; } - - writelimit = uio->uio_offset + uio->uio_resid; - - /* - * Maybe this should be above the vnode op call, but so long as - * file servers have no limits, I don't think it matters. - */ - p = uio->uio_procp; - if (vp->v_type == VREG && p && - writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { - psignal(p, SIGXFSZ); - return (EFBIG); + if ((cp->c_flags & APPEND) && offset != fp->ff_size) { + retval = EPERM; + goto exit; } - p = current_proc(); - - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); - resid = uio->uio_resid; origFileSize = fp->ff_size; eflags = kEFDeferMask; /* defer file block allocations */ - filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); - retval = 0; - - /* Now test if we need to extend the file */ - /* Doing so will adjust the filebytes for us */ - -#if QUOTA - if(writelimit > filebytes) { - bytesToAdd = writelimit - filebytes; - - retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)), - ap->a_cred, 0); - if (retval) - return (retval); - } -#endif /* QUOTA */ - - hfsmp = VTOHFS(vp); #ifdef HFS_SPARSE_DEV /* @@ -276,66 +262,70 @@ hfs_write(ap) } #endif /* HFS_SPARSE_DEV */ - if (writelimit > filebytes) { - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - } - if (hfsmp->jnl && (writelimit > filebytes)) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - return EINVAL; - } - started_tr = 1; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, + (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); + + /* Now test if we need to extend the file */ + /* Doing so will adjust the filebytes for us */ + + writelimit = offset + resid; + if (writelimit <= filebytes) + goto sizeok; + + cred = vfs_context_ucred(ap->a_context); +#if QUOTA + bytesToAdd = writelimit - filebytes; + retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), + cred, 0); + if (retval) + goto exit; +#endif /* QUOTA */ + + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto exit; } while (writelimit > filebytes) { bytesToAdd = writelimit - filebytes; - if (ap->a_cred && suser(ap->a_cred, NULL) != 0) + if (cred && suser(cred, NULL) != 0) eflags |= kEFReserveMask; - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc()); - if (retval != E_NONE) - break; + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); /* Files that are changing size are not hot file candidates. */ if (hfsmp->hfc_stage == HFC_RECORDING) { fp->ff_bytesread = 0; } - retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd, + retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd, 0, eflags, &actualBytesAdded)); - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); + if ((actualBytesAdded == 0) && (retval == E_NONE)) retval = ENOSPC; if (retval != E_NONE) break; - filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE, - (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); - } - - // XXXdbg - if (started_tr) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 1); - - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); - started_tr = 0; - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - grabbed_lock = 0; + (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); } + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + (void) hfs_end_transaction(hfsmp); +sizeok: if (retval == E_NONE) { off_t filesize; off_t zero_off; off_t tail_off; off_t inval_start; off_t inval_end; - off_t io_start, io_end; + off_t io_start; int lflag; struct rl_entry *invalid_range; @@ -346,15 +336,15 @@ hfs_write(ap) lflag = (ioflag & IO_SYNC); - if (uio->uio_offset <= fp->ff_size) { - zero_off = uio->uio_offset & ~PAGE_MASK_64; + if (offset <= fp->ff_size) { + zero_off = offset & ~PAGE_MASK_64; /* Check to see whether the area between the zero_offset and the start of the transfer to see whether is invalid and should be zero-filled as part of the transfer: */ - if (uio->uio_offset > zero_off) { - if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) + if (offset > zero_off) { + if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP) lflag |= IO_HEADZEROFILL; } } else { @@ -373,7 +363,7 @@ hfs_write(ap) will be handled by the cluser_write of the actual data. */ inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; - inval_end = uio->uio_offset & ~PAGE_MASK_64; + inval_end = offset & ~PAGE_MASK_64; zero_off = fp->ff_size; if ((fp->ff_size & PAGE_MASK_64) && @@ -397,6 +387,7 @@ hfs_write(ap) }; if (inval_start < inval_end) { + struct timeval tv; /* There's some range of data that's going to be marked invalid */ if (zero_off < inval_start) { @@ -404,20 +395,26 @@ hfs_write(ap) and the actual write will start on a page past inval_end. Now's the last chance to zero-fill the page containing the EOF: */ - retval = cluster_write(vp, (struct uio *) 0, + hfs_unlock(cp); + cnode_locked = 0; + retval = cluster_write(vp, (uio_t) 0, fp->ff_size, inval_start, - zero_off, (off_t)0, devBlockSize, + zero_off, (off_t)0, lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); + hfs_lock(cp, HFS_FORCE_LOCK); + cnode_locked = 1; if (retval) goto ioerr_exit; + offset = uio_offset(uio); }; /* Mark the remaining area of the newly allocated space as invalid: */ rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges); - cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT; + microuptime(&tv); + cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; zero_off = fp->ff_size = inval_end; }; - if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL; + if (offset > zero_off) lflag |= IO_HEADZEROFILL; }; /* Check to see whether the area between the end of the write and the end of @@ -441,23 +438,32 @@ hfs_write(ap) * made readable (removed from the invalid ranges) before cluster_write * tries to write it: */ - io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset; - io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; + io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset; if (io_start < fp->ff_size) { + off_t io_end; + + io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit; rl_remove(io_start, io_end - 1, &fp->ff_invalidranges); }; + + hfs_unlock(cp); + cnode_locked = 0; retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off, - tail_off, devBlockSize, lflag | IO_NOZERODIRTY); - - if (uio->uio_offset > fp->ff_size) { - fp->ff_size = uio->uio_offset; + tail_off, lflag | IO_NOZERODIRTY); + offset = uio_offset(uio); + if (offset > fp->ff_size) { + fp->ff_size = offset; ubc_setsize(vp, fp->ff_size); /* XXX check errors */ + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) + fp->ff_bytesread = 0; + } + if (resid > uio_resid(uio)) { + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; } - if (resid > uio->uio_resid) - cp->c_flag |= C_CHANGE | C_UPDATE; } - HFS_KNOTE(vp, NOTE_WRITE); ioerr_exit: @@ -466,109 +472,466 @@ ioerr_exit: * we clear the setuid and setgid bits as a precaution against * tampering. */ - if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) - cp->c_mode &= ~(S_ISUID | S_ISGID); - + if (cp->c_mode & (S_ISUID | S_ISGID)) { + cred = vfs_context_ucred(ap->a_context); + if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { + if (!cnode_locked) { + hfs_lock(cp, HFS_FORCE_LOCK); + cnode_locked = 1; + } + cp->c_mode &= ~(S_ISUID | S_ISGID); + } + } if (retval) { if (ioflag & IO_UNIT) { - (void)VOP_TRUNCATE(vp, origFileSize, - ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); - uio->uio_offset -= resid - uio->uio_resid; - uio->uio_resid = resid; - filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; + if (!cnode_locked) { + hfs_lock(cp, HFS_FORCE_LOCK); + cnode_locked = 1; + } + (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, + 0, ap->a_context); + // LP64todo - fix this! resid needs to by user_ssize_t + uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); + uio_setresid(uio, resid); + filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; + } + } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { + if (!cnode_locked) { + hfs_lock(cp, HFS_FORCE_LOCK); + cnode_locked = 1; } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { - tv = time; - retval = VOP_UPDATE(vp, &tv, &tv, 1); + retval = hfs_update(vp, TRUE); } - vcb->vcbWrCnt++; + /* Updating vcbWrCnt doesn't need to be atomic. */ + hfsmp->vcbWrCnt++; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); - + (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); +exit: + if (cnode_locked) + hfs_unlock(cp); + hfs_unlock_truncate(cp); return (retval); } +/* support for the "bulk-access" fcntl */ -#ifdef HFS_SPARSE_DEV -struct hfs_backingstoreinfo { - int signature; /* == 3419115 */ - int version; /* version of this struct (1) */ - int backingfd; /* disk image file (on backing fs) */ - int bandsize; /* sparse disk image band size */ +#define CACHE_ELEMS 64 +#define CACHE_LEVELS 16 +#define PARENT_IDS_FLAG 0x100 + +/* from hfs_attrlist.c */ +extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, + mode_t obj_mode, struct mount *mp, + kauth_cred_t cred, struct proc *p); + +/* from vfs/vfs_fsevents.c */ +extern char *get_pathbuff(void); +extern void release_pathbuff(char *buff); + +struct access_cache { + int numcached; + int cachehits; /* these two for statistics gathering */ + int lookups; + unsigned int *acache; + Boolean *haveaccess; }; -#define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo) -#define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) +struct access_t { + uid_t uid; /* IN: effective user id */ + short flags; /* IN: access requested (i.e. R_OK) */ + short num_groups; /* IN: number of groups user belongs to */ + int num_files; /* IN: number of files to process */ + int *file_ids; /* IN: array of file ids */ + gid_t *groups; /* IN: array of groups */ + short *access; /* OUT: access info for each file (0 for 'has access') */ +}; -#define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO) -#define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) +struct user_access_t { + uid_t uid; /* IN: effective user id */ + short flags; /* IN: access requested (i.e. R_OK) */ + short num_groups; /* IN: number of groups user belongs to */ + int num_files; /* IN: number of files to process */ + user_addr_t file_ids; /* IN: array of file ids */ + user_addr_t groups; /* IN: array of groups */ + user_addr_t access; /* OUT: access info for each file (0 for 'has access') */ +}; -#endif /* HFS_SPARSE_DEV */ +/* + * Perform a binary search for the given parent_id. Return value is + * found/not found boolean, and indexp will be the index of the item + * or the index at which to insert the item if it's not found. + */ +static int +lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) +{ + unsigned int lo, hi; + int index, matches = 0; + + if (cache->numcached == 0) { + *indexp = 0; + return 0; // table is empty, so insert at index=0 and report no match + } + + if (cache->numcached > CACHE_ELEMS) { + /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n", + cache->numcached, CACHE_ELEMS);*/ + cache->numcached = CACHE_ELEMS; + } + + lo = 0; + hi = cache->numcached - 1; + index = -1; + + /* perform binary search for parent_id */ + do { + unsigned int mid = (hi - lo)/2 + lo; + unsigned int this_id = cache->acache[mid]; + + if (parent_id == this_id) { + index = mid; + break; + } + + if (parent_id < this_id) { + hi = mid; + continue; + } + + if (parent_id > this_id) { + lo = mid + 1; + continue; + } + } while(lo < hi); + + /* check if lo and hi converged on the match */ + if (parent_id == cache->acache[hi]) { + index = hi; + } + + /* if no existing entry found, find index for new one */ + if (index == -1) { + index = (parent_id < cache->acache[hi]) ? hi : hi + 1; + matches = 0; + } else { + matches = 1; + } + + *indexp = index; + return matches; +} + +/* + * Add a node to the access_cache at the given index (or do a lookup first + * to find the index if -1 is passed in). We currently do a replace rather + * than an insert if the cache is full. + */ +static void +add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) +{ + int lookup_index = -1; + + /* need to do a lookup first if -1 passed for index */ + if (index == -1) { + if (lookup_bucket(cache, &lookup_index, nodeID)) { + if (cache->haveaccess[lookup_index] != access) { + /* change access info for existing entry... should never happen */ + cache->haveaccess[lookup_index] = access; + } + + /* mission accomplished */ + return; + } else { + index = lookup_index; + } + + } + + /* if the cache is full, do a replace rather than an insert */ + if (cache->numcached >= CACHE_ELEMS) { + //printf("cache is full (%d). replace at index %d\n", cache->numcached, index); + cache->numcached = CACHE_ELEMS-1; + + if (index > cache->numcached) { + // printf("index %d pinned to %d\n", index, cache->numcached); + index = cache->numcached; + } + } else if (index >= 0 && index < cache->numcached) { + /* only do bcopy if we're inserting */ + bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) ); + bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) ); + } + + cache->acache[index] = nodeID; + cache->haveaccess[index] = access; + cache->numcached++; +} + + +struct cinfo { + uid_t uid; + gid_t gid; + mode_t mode; + cnid_t parentcnid; +}; + +static int +snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg) +{ + struct cinfo *cip = (struct cinfo *)arg; + + cip->uid = attrp->ca_uid; + cip->gid = attrp->ca_gid; + cip->mode = attrp->ca_mode; + cip->parentcnid = descp->cd_parentcnid; + + return (0); +} + +/* + * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item + * isn't incore, then go to the catalog. + */ +static int +do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid, + struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p) +{ + int error = 0; + + /* if this id matches the one the fsctl was called with, skip the lookup */ + if (cnid == skip_cp->c_cnid) { + cnattrp->ca_uid = skip_cp->c_uid; + cnattrp->ca_gid = skip_cp->c_gid; + cnattrp->ca_mode = skip_cp->c_mode; + keyp->hfsPlus.parentID = skip_cp->c_parentcnid; + } else { + struct cinfo c_info; + + /* otherwise, check the cnode hash incase the file/dir is incore */ + if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) { + cnattrp->ca_uid = c_info.uid; + cnattrp->ca_gid = c_info.gid; + cnattrp->ca_mode = c_info.mode; + keyp->hfsPlus.parentID = c_info.parentcnid; + } else { + int lockflags; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + /* lookup this cnid in the catalog */ + error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); + + hfs_systemfile_unlock(hfsmp, lockflags); + + cache->lookups++; + } + } + + return (error); +} /* + * Compute whether we have access to the given directory (nodeID) and all its parents. Cache + * up to CACHE_LEVELS as we progress towards the root. + */ +static int +do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, + struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev ) +{ + int myErr = 0; + int myResult; + HFSCatalogNodeID thisNodeID; + unsigned long myPerms; + struct cat_attr cnattr; + int cache_index = -1; + CatalogKey catkey; + + int i = 0, ids_to_cache = 0; + int parent_ids[CACHE_LEVELS]; + + /* root always has access */ + if (!suser(myp_ucred, NULL)) { + return (1); + } + + thisNodeID = nodeID; + while (thisNodeID >= kRootDirID) { + myResult = 0; /* default to "no access" */ + + /* check the cache before resorting to hitting the catalog */ + + /* ASSUMPTION: access info of cached entries is "final"... i.e. no need + * to look any further after hitting cached dir */ + + if (lookup_bucket(cache, &cache_index, thisNodeID)) { + cache->cachehits++; + myResult = cache->haveaccess[cache_index]; + goto ExitThisRoutine; + } + + /* remember which parents we want to cache */ + if (ids_to_cache < CACHE_LEVELS) { + parent_ids[ids_to_cache] = thisNodeID; + ids_to_cache++; + } + + /* do the lookup (checks the cnode hash, then the catalog) */ + myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr); + if (myErr) { + goto ExitThisRoutine; /* no access */ + } + + myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, + cnattr.ca_mode, hfsmp->hfs_mp, + myp_ucred, theProcPtr); + + if ( (myPerms & X_OK) == 0 ) { + myResult = 0; + goto ExitThisRoutine; /* no access */ + } + + /* up the hierarchy we go */ + thisNodeID = catkey.hfsPlus.parentID; + } + + /* if here, we have access to this node */ + myResult = 1; + + ExitThisRoutine: + if (myErr) { + //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID); + myResult = 0; + } + *err = myErr; + + /* cache the parent directory(ies) */ + for (i = 0; i < ids_to_cache; i++) { + /* small optimization: get rid of double-lookup for all these */ + // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult); + add_node(cache, -1, parent_ids[i], myResult); + } + + return (myResult); +} +/* end "bulk-access" support */ -#% ioctl vp U U U -# - vop_ioctl { - IN struct vnode *vp; - IN u_long command; - IN caddr_t data; - IN int fflag; - IN struct ucred *cred; - IN struct proc *p; - */ +/* + * Callback for use with freeze ioctl. + */ +static int +hfs_freezewrite_callback(struct vnode *vp, void *cargs) +{ + vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze"); + + return 0; +} -/* ARGSUSED */ +/* + * Control filesystem operating characteristics. + */ int -hfs_ioctl(ap) - struct vop_ioctl_args /* { - struct vnode *a_vp; +hfs_vnop_ioctl( struct vnop_ioctl_args /* { + vnode_t a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { + struct vnode * vp = ap->a_vp; + struct hfsmount *hfsmp = VTOHFS(vp); + vfs_context_t context = ap->a_context; + kauth_cred_t cred = vfs_context_ucred(context); + proc_t p = vfs_context_proc(context); + struct vfsstatfs *vfsp; + boolean_t is64bit; + + is64bit = proc_is64bit(p); + switch (ap->a_command) { + case HFS_RESIZE_VOLUME: { + u_int64_t newsize; + u_int64_t cursize; + + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); + if (suser(cred, NULL) && + kauth_cred_getuid(cred) != vfsp->f_owner) { + return (EACCES); /* must be owner of file system */ + } + if (!vnode_isvroot(vp)) { + return (EINVAL); + } + newsize = *(u_int64_t *)ap->a_data; + cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; + + if (newsize > cursize) { + return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context); + } else if (newsize < cursize) { + return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context); + } else { + return (0); + } + } + case HFS_CHANGE_NEXT_ALLOCATION: { + u_int32_t location; + + if (vnode_vfsisrdonly(vp)) { + return (EROFS); + } + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); + if (suser(cred, NULL) && + kauth_cred_getuid(cred) != vfsp->f_owner) { + return (EACCES); /* must be owner of file system */ + } + if (!vnode_isvroot(vp)) { + return (EINVAL); + } + location = *(u_int32_t *)ap->a_data; + if (location > hfsmp->totalBlocks - 1) { + return (EINVAL); + } + /* Return previous value. */ + *(u_int32_t *)ap->a_data = hfsmp->nextAllocation; + HFS_MOUNT_LOCK(hfsmp, TRUE); + hfsmp->nextAllocation = location; + hfsmp->vcbFlags |= 0xFF00; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + return (0); + } + #ifdef HFS_SPARSE_DEV case HFS_SETBACKINGSTOREINFO: { - struct hfsmount * hfsmp; struct vnode * bsfs_rootvp; struct vnode * di_vp; - struct file * di_fp; struct hfs_backingstoreinfo *bsdata; int error = 0; - hfsmp = VTOHFS(ap->a_vp); if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { return (EALREADY); } - if (ap->a_p->p_ucred->cr_uid != 0 && - ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); + if (suser(cred, NULL) && + kauth_cred_getuid(cred) != vfsp->f_owner) { return (EACCES); /* must be owner of file system */ } bsdata = (struct hfs_backingstoreinfo *)ap->a_data; if (bsdata == NULL) { return (EINVAL); } - if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) { + if ((error = file_vnode(bsdata->backingfd, &di_vp))) { return (error); } - if (fref(di_fp) == -1) { - return (EBADF); + if ((error = vnode_getwithref(di_vp))) { + file_drop(bsdata->backingfd); + return(error); } - if (di_fp->f_type != DTYPE_VNODE) { - frele(di_fp); - return (EINVAL); - } - di_vp = (struct vnode *)di_fp->f_data; - if (ap->a_vp->v_mount == di_vp->v_mount) { - frele(di_fp); + + if (vnode_mount(vp) == vnode_mount(di_vp)) { + (void)vnode_put(di_vp); + file_drop(bsdata->backingfd); return (EINVAL); } @@ -576,28 +939,30 @@ hfs_ioctl(ap) * Obtain the backing fs root vnode and keep a reference * on it. This reference will be dropped in hfs_unmount. */ - error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp); + error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */ if (error) { - frele(di_fp); + (void)vnode_put(di_vp); + file_drop(bsdata->backingfd); return (error); } - VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p); /* Hold on to the reference */ + vnode_ref(bsfs_rootvp); + vnode_put(bsfs_rootvp); hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; hfsmp->hfs_sparsebandblks *= 4; - frele(di_fp); + (void)vnode_put(di_vp); + file_drop(bsdata->backingfd); return (0); } case HFS_CLRBACKINGSTOREINFO: { - struct hfsmount * hfsmp; struct vnode * tmpvp; - hfsmp = VTOHFS(ap->a_vp); - if (ap->a_p->p_ucred->cr_uid != 0 && - ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); + if (suser(cred, NULL) && + kauth_cred_getuid(cred) != vfsp->f_owner) { return (EACCES); /* must be owner of file system */ } if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && @@ -607,166 +972,502 @@ hfs_ioctl(ap) tmpvp = hfsmp->hfs_backingfs_rootvp; hfsmp->hfs_backingfs_rootvp = NULLVP; hfsmp->hfs_sparsebandblks = 0; - vrele(tmpvp); + vnode_rele(tmpvp); } return (0); } #endif /* HFS_SPARSE_DEV */ - case 6: { + case F_FREEZE_FS: { + struct mount *mp; + task_t task; + + if (!is_suser()) + return (EACCES); + + mp = vnode_mount(vp); + hfsmp = VFSTOHFS(mp); + + if (!(hfsmp->jnl)) + return (ENOTSUP); + + task = current_task(); + task_working_set_disable(task); + + // flush things before we get started to try and prevent + // dirty data from being paged out while we're frozen. + // note: can't do this after taking the lock as it will + // deadlock against ourselves. + vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); + hfs_global_exclusive_lock_acquire(hfsmp); + journal_flush(hfsmp->jnl); + // don't need to iterate on all vnodes, we just need to + // wait for writes to the system files and the device vnode + // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); + if (HFSTOVCB(hfsmp)->extentsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); + if (HFSTOVCB(hfsmp)->catalogRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze"); + if (HFSTOVCB(hfsmp)->allocationsRefNum) + vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze"); + if (hfsmp->hfs_attribute_vp) + vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze"); + vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze"); + + hfsmp->hfs_freezing_proc = current_proc(); + + return (0); + } + + case F_THAW_FS: { + if (!is_suser()) + return (EACCES); + + // if we're not the one who froze the fs then we + // can't thaw it. + if (hfsmp->hfs_freezing_proc != current_proc()) { + return EINVAL; + } + + // NOTE: if you add code here, also go check the + // code that "thaws" the fs in hfs_vnop_close() + // + hfsmp->hfs_freezing_proc = NULL; + hfs_global_exclusive_lock_release(hfsmp); + + return (0); + } + +#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t) +#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS) + + case HFS_BULKACCESS_FSCTL: + case HFS_BULKACCESS: { + /* + * NOTE: on entry, the vnode is locked. Incase this vnode + * happens to be in our list of file_ids, we'll note it + * avoid calling hfs_chashget_nowait() on that id as that + * will cause a "locking against myself" panic. + */ + Boolean check_leaf = true; + + struct user_access_t *user_access_structp; + struct user_access_t tmp_user_access_t; + struct access_cache cache; + + int error = 0, i; + + dev_t dev = VTOC(vp)->c_dev; + + short flags; + struct ucred myucred; /* XXX ILLEGAL */ + int num_files; + int *file_ids = NULL; + short *access = NULL; + + cnid_t cnid; + cnid_t prevParent_cnid = 0; + unsigned long myPerms; + short myaccess = 0; + struct cat_attr cnattr; + CatalogKey catkey; + struct cnode *skip_cp = VTOC(vp); + struct vfs_context my_context; + + /* first, return error if not run as root */ + if (cred->cr_ruid != 0) { + return EPERM; + } + + /* initialize the local cache and buffers */ + cache.numcached = 0; + cache.cachehits = 0; + cache.lookups = 0; + + file_ids = (int *) get_pathbuff(); + access = (short *) get_pathbuff(); + cache.acache = (int *) get_pathbuff(); + cache.haveaccess = (Boolean *) get_pathbuff(); + + if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) { + release_pathbuff((char *) file_ids); + release_pathbuff((char *) access); + release_pathbuff((char *) cache.acache); + release_pathbuff((char *) cache.haveaccess); + + return ENOMEM; + } + + /* struct copyin done during dispatch... need to copy file_id array separately */ + if (ap->a_data == NULL) { + error = EINVAL; + goto err_exit_bulk_access; + } + + if (is64bit) { + user_access_structp = (struct user_access_t *)ap->a_data; + } + else { + struct access_t * accessp = (struct access_t *)ap->a_data; + tmp_user_access_t.uid = accessp->uid; + tmp_user_access_t.flags = accessp->flags; + tmp_user_access_t.num_groups = accessp->num_groups; + tmp_user_access_t.num_files = accessp->num_files; + tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids); + tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups); + tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access); + user_access_structp = &tmp_user_access_t; + } + + num_files = user_access_structp->num_files; + if (num_files < 1) { + goto err_exit_bulk_access; + } + if (num_files > 256) { + error = EINVAL; + goto err_exit_bulk_access; + } + + if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids, + num_files * sizeof(int)))) { + goto err_exit_bulk_access; + } + + /* fill in the ucred structure */ + flags = user_access_structp->flags; + if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) { + flags = R_OK; + } + + /* check if we've been passed leaf node ids or parent ids */ + if (flags & PARENT_IDS_FLAG) { + check_leaf = false; + } + + memset(&myucred, 0, sizeof(myucred)); + myucred.cr_ref = 1; + myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid; + myucred.cr_ngroups = user_access_structp->num_groups; + if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) { + myucred.cr_ngroups = 0; + } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups, + myucred.cr_ngroups * sizeof(gid_t)))) { + goto err_exit_bulk_access; + } + myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0]; + + my_context.vc_proc = p; + my_context.vc_ucred = &myucred; + + /* Check access to each file_id passed in */ + for (i = 0; i < num_files; i++) { +#if 0 + cnid = (cnid_t) file_ids[i]; + + /* root always has access */ + if (!suser(&myucred, NULL)) { + access[i] = 0; + continue; + } + + if (check_leaf) { + + /* do the lookup (checks the cnode hash, then the catalog) */ + error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p); + if (error) { + access[i] = (short) error; + continue; + } + + /* before calling CheckAccess(), check the target file for read access */ + myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, + cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p ); + + + /* fail fast if no access */ + if ((myPerms & flags) == 0) { + access[i] = EACCES; + continue; + } + } else { + /* we were passed an array of parent ids */ + catkey.hfsPlus.parentID = cnid; + } + + /* if the last guy had the same parent and had access, we're done */ + if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) { + cache.cachehits++; + access[i] = 0; + continue; + } + + myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, + skip_cp, p, &myucred, dev); + + if ( myaccess ) { + access[i] = 0; // have access.. no errors to report + } else { + access[i] = (error != 0 ? (short) error : EACCES); + } + + prevParent_cnid = catkey.hfsPlus.parentID; +#else + int myErr; + + cnid = (cnid_t)file_ids[i]; + + while (cnid >= kRootDirID) { + /* get the vnode for this cnid */ + myErr = hfs_vget(hfsmp, cnid, &vp, 0); + if ( myErr ) { + access[i] = EACCES; + break; + } + + cnid = VTOC(vp)->c_parentcnid; + + hfs_unlock(VTOC(vp)); + if (vnode_vtype(vp) == VDIR) { + myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, &my_context); + if (myErr) { + // try again with just read-access + myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context); + } + } else { + myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context); + } + vnode_put(vp); + access[i] = myErr; + if (myErr) { + break; + } + } +#endif + } + + /* copyout the access array */ + if ((error = copyout((caddr_t)access, user_access_structp->access, + num_files * sizeof (short)))) { + goto err_exit_bulk_access; + } + + err_exit_bulk_access: + + //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); + + release_pathbuff((char *) cache.acache); + release_pathbuff((char *) cache.haveaccess); + release_pathbuff((char *) file_ids); + release_pathbuff((char *) access); + + return (error); + } /* HFS_BULKACCESS */ + + case HFS_SETACLSTATE: { + int state; + + if (!is_suser()) { + return (EPERM); + } + if (ap->a_data == NULL) { + return (EINVAL); + } + state = *(int *)ap->a_data; + if (state == 0 || state == 1) + return hfs_setextendedsecurity(hfsmp, state); + else + return (EINVAL); + } + + case F_FULLFSYNC: { int error; - ap->a_vp->v_flag |= VFULLFSYNC; - error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p); - ap->a_vp->v_flag &= ~VFULLFSYNC; + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + if (error == 0) { + error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p); + hfs_unlock(VTOC(vp)); + } return error; } - case 5: { - register struct vnode *vp; + + case F_CHKCLEAN: { register struct cnode *cp; - struct filefork *fp; int error; - vp = ap->a_vp; - cp = VTOC(vp); - fp = VTOF(vp); - - if (vp->v_type != VREG) + if (!vnode_isreg(vp)) return EINVAL; - VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ); - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - if (error) - return (error); - - /* - * used by regression test to determine if - * all the dirty pages (via write) have been cleaned - * after a call to 'fsysnc'. - */ - error = is_file_clean(vp, fp->ff_size); - VOP_UNLOCK(vp, 0, ap->a_p); - + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + if (error == 0) { + cp = VTOC(vp); + /* + * used by regression test to determine if + * all the dirty pages (via write) have been cleaned + * after a call to 'fsysnc'. + */ + error = is_file_clean(vp, VTOF(vp)->ff_size); + hfs_unlock(cp); + } return (error); } - case 1: { - register struct vnode *vp; + case F_RDADVISE: { register struct radvisory *ra; - register struct cnode *cp; struct filefork *fp; - int devBlockSize = 0; int error; - vp = ap->a_vp; - - if (vp->v_type != VREG) + if (!vnode_isreg(vp)) return EINVAL; - VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ); - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - if (error) - return (error); - ra = (struct radvisory *)(ap->a_data); - cp = VTOC(vp); fp = VTOF(vp); + /* Protect against a size change. */ + hfs_lock_truncate(VTOC(vp), TRUE); + if (ra->ra_offset >= fp->ff_size) { - VOP_UNLOCK(vp, 0, ap->a_p); - return (EFBIG); + error = EFBIG; + } else { + error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); } - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); - - error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize); - VOP_UNLOCK(vp, 0, ap->a_p); + hfs_unlock_truncate(VTOC(vp)); return (error); } - case 2: /* F_READBOOTBLOCKS */ - case 3: /* F_WRITEBOOTBLOCKS */ - { - struct vnode *vp = ap->a_vp; + case F_READBOOTSTRAP: + case F_WRITEBOOTSTRAP: + { struct vnode *devvp = NULL; - struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data; + user_fbootstraptransfer_t *user_bootstrapp; int devBlockSize; int error; - struct iovec aiov; - struct uio auio; - u_long blockNumber; + uio_t auio; + daddr64_t blockNumber; u_long blockOffset; u_long xfersize; struct buf *bp; + user_fbootstraptransfer_t user_bootstrap; - if ((vp->v_flag & VROOT) == 0) return EINVAL; - if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL; + if (!vnode_isvroot(vp)) + return (EINVAL); + /* LP64 - when caller is a 64 bit process then we are passed a pointer + * to a user_fbootstraptransfer_t else we get a pointer to a + * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t + */ + if (is64bit) { + user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data; + } + else { + fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data; + user_bootstrapp = &user_bootstrap; + user_bootstrap.fbt_offset = bootstrapp->fbt_offset; + user_bootstrap.fbt_length = bootstrapp->fbt_length; + user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer); + } + if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) + return EINVAL; devvp = VTOHFS(vp)->hfs_devvp; - aiov.iov_base = btd->fbt_buffer; - aiov.iov_len = btd->fbt_length; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = btd->fbt_offset; - auio.uio_resid = btd->fbt_length; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */ - auio.uio_procp = ap->a_p; - - VOP_DEVBLOCKSIZE(devvp, &devBlockSize); - - while (auio.uio_resid > 0) { - blockNumber = auio.uio_offset / devBlockSize; - error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp); - if (error) { - if (bp) brelse(bp); - return error; - }; - - blockOffset = auio.uio_offset % devBlockSize; - xfersize = devBlockSize - blockOffset; - error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio); - if (error) { - brelse(bp); - return error; - }; - if (auio.uio_rw == UIO_WRITE) { - error = VOP_BWRITE(bp); - if (error) return error; - } else { - brelse(bp); - }; - }; - }; - return 0; - - case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ - { - *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate); - return 0; - } - - default: - return (ENOTTY); - } + auio = uio_create(1, user_bootstrapp->fbt_offset, + is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32, + (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ); + uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length); + + devBlockSize = vfs_devblocksize(vnode_mount(vp)); + + while (uio_resid(auio) > 0) { + blockNumber = uio_offset(auio) / devBlockSize; + error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp); + if (error) { + if (bp) buf_brelse(bp); + uio_free(auio); + return error; + }; + + blockOffset = uio_offset(auio) % devBlockSize; + xfersize = devBlockSize - blockOffset; + error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio); + if (error) { + buf_brelse(bp); + uio_free(auio); + return error; + }; + if (uio_rw(auio) == UIO_WRITE) { + error = VNOP_BWRITE(bp); + if (error) { + uio_free(auio); + return error; + } + } else { + buf_brelse(bp); + }; + }; + uio_free(auio); + }; + return 0; + + case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ + { + if (is64bit) { + *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); + } + else { + *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate); + } + return 0; + } + + case HFS_GET_MOUNT_TIME: + return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time)); + break; + + case HFS_GET_LAST_MTIME: + return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime)); + break; + + case HFS_SET_BOOT_INFO: + if (!vnode_isvroot(vp)) + return(EINVAL); + if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) + return(EACCES); /* must be superuser or owner of filesystem */ + HFS_MOUNT_LOCK(hfsmp, TRUE); + bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + break; + + case HFS_GET_BOOT_INFO: + if (!vnode_isvroot(vp)) + return(EINVAL); + HFS_MOUNT_LOCK(hfsmp, TRUE); + bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo)); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + break; + + default: + return (ENOTTY); + } /* Should never get here */ return 0; } -/* ARGSUSED */ +/* + * select + */ int -hfs_select(ap) - struct vop_select_args /* { - struct vnode *a_vp; +hfs_vnop_select(__unused struct vnop_select_args *ap) +/* + struct vnop_select_args { + vnode_t a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void *a_wql; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + }; +*/ { /* * We should really check to see if I/O is possible. @@ -774,192 +1475,139 @@ hfs_select(ap) return (1); } -/* - * Bmap converts a the logical block number of a file to its physical block - * number on the disk. - */ - -/* - * vp - address of vnode file the file - * bn - which logical block to convert to a physical block number. - * vpp - returns the vnode for the block special file holding the filesystem - * containing the file of interest - * bnp - address of where to return the filesystem physical block number -#% bmap vp L L L -#% bmap vpp - U - -# - vop_bmap { - IN struct vnode *vp; - IN daddr_t bn; - OUT struct vnode **vpp; - IN daddr_t *bnp; - OUT int *runp; - */ /* * Converts a logical block number to a physical block, and optionally returns * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize. * The physical block number is based on the device block size, currently its 512. * The block run is returned in logical blocks, and is the REMAINING amount of blocks */ - int -hfs_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; +hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp) { - struct vnode *vp = ap->a_vp; struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); struct hfsmount *hfsmp = VTOHFS(vp); - int retval = E_NONE; - daddr_t logBlockSize; - size_t bytesContAvail = 0; - off_t blockposition; - struct proc *p = NULL; - int lockExtBtree; - struct rl_entry *invalid_range; - enum rl_overlaptype overlaptype; + int retval = E_NONE; + daddr_t logBlockSize; + size_t bytesContAvail = 0; + off_t blockposition; + int lockExtBtree; + int lockflags = 0; /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ - if (ap->a_vpp != NULL) - *ap->a_vpp = cp->c_devvp; - if (ap->a_bnp == NULL) + if (vpp != NULL) + *vpp = cp->c_devvp; + if (bnp == NULL) return (0); - /* Only clustered I/O should have delayed allocations. */ - DBG_ASSERT(fp->ff_unallocblocks == 0); - logBlockSize = GetLogicalBlockSize(vp); - blockposition = (off_t)ap->a_bn * (off_t)logBlockSize; + blockposition = (off_t)bn * (off_t)logBlockSize; lockExtBtree = overflow_extents(fp); - if (lockExtBtree) { - p = current_proc(); - retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, - LK_EXCLUSIVE | LK_CANRECURSE, p); - if (retval) - return (retval); - } + + if (lockExtBtree) + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK); retval = MacToVFSError( MapFileBlockC (HFSTOVCB(hfsmp), (FCB*)fp, MAXPHYSIO, blockposition, - ap->a_bnp, + bnp, &bytesContAvail)); - if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); - - if (retval == E_NONE) { - /* Adjust the mapping information for invalid file ranges: */ - overlaptype = rl_scan(&fp->ff_invalidranges, - blockposition, - blockposition + MAXPHYSIO - 1, - &invalid_range); - if (overlaptype != RL_NOOVERLAP) { - switch(overlaptype) { - case RL_MATCHINGOVERLAP: - case RL_OVERLAPCONTAINSRANGE: - case RL_OVERLAPSTARTSBEFORE: - /* There's no valid block for this byte offset: */ - *ap->a_bnp = (daddr_t)-1; - bytesContAvail = invalid_range->rl_end + 1 - blockposition; - break; - - case RL_OVERLAPISCONTAINED: - case RL_OVERLAPENDSAFTER: - /* The range of interest hits an invalid block before the end: */ - if (invalid_range->rl_start == blockposition) { - /* There's actually no valid information to be had starting here: */ - *ap->a_bnp = (daddr_t)-1; - if ((fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) { - bytesContAvail = invalid_range->rl_end + 1 - blockposition; - }; - } else { - bytesContAvail = invalid_range->rl_start - blockposition; - }; - break; - }; - if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO; - }; - - /* Figure out how many read ahead blocks there are */ - if (ap->a_runp != NULL) { - if (can_cluster(logBlockSize)) { - /* Make sure this result never goes negative: */ - *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1; - } else { - *ap->a_runp = 0; - }; - }; - }; - - return (retval); -} + if (lockExtBtree) + hfs_systemfile_unlock(hfsmp, lockflags); -/* blktooff converts logical block number to file offset */ + if (retval == E_NONE) { + /* Figure out how many read ahead blocks there are */ + if (runp != NULL) { + if (can_cluster(logBlockSize)) { + /* Make sure this result never goes negative: */ + *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1; + } else { + *runp = 0; + } + } + } + return (retval); +} +/* + * Convert logical block number to file offset. + */ int -hfs_blktooff(ap) - struct vop_blktooff_args /* { - struct vnode *a_vp; - daddr_t a_lblkno; +hfs_vnop_blktooff(struct vnop_blktooff_args *ap) +/* + struct vnop_blktooff_args { + vnode_t a_vp; + daddr64_t a_lblkno; off_t *a_offset; - } */ *ap; + }; +*/ { if (ap->a_vp == NULL) return (EINVAL); - *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64; + *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp); return(0); } +/* + * Convert file offset to logical block number. + */ int -hfs_offtoblk(ap) - struct vop_offtoblk_args /* { - struct vnode *a_vp; +hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap) +/* + struct vnop_offtoblk_args { + vnode_t a_vp; off_t a_offset; - daddr_t *a_lblkno; - } */ *ap; + daddr64_t *a_lblkno; + }; +*/ { if (ap->a_vp == NULL) return (EINVAL); - *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64; + *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp)); return(0); } +/* + * Map file offset to physical block number. + * + * System file cnodes are expected to be locked (shared or exclusive). + */ int -hfs_cmap(ap) - struct vop_cmap_args /* { - struct vnode *a_vp; +hfs_vnop_blockmap(struct vnop_blockmap_args *ap) +/* + struct vnop_blockmap_args { + vnode_t a_vp; off_t a_foffset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; - } */ *ap; + int a_flags; + vfs_context_t a_context; + }; +*/ { - struct hfsmount *hfsmp = VTOHFS(ap->a_vp); - struct filefork *fp = VTOF(ap->a_vp); - size_t bytesContAvail = 0; - int retval = E_NONE; - int lockExtBtree = 0; - struct proc *p = NULL; - struct rl_entry *invalid_range; - enum rl_overlaptype overlaptype; - int started_tr = 0, grabbed_lock = 0; - struct timeval tv; + struct vnode *vp = ap->a_vp; + struct cnode *cp; + struct filefork *fp; + struct hfsmount *hfsmp; + size_t bytesContAvail = 0; + int retval = E_NONE; + int syslocks = 0; + int lockflags = 0; + struct rl_entry *invalid_range; + enum rl_overlaptype overlaptype; + int started_tr = 0; + int tooklock = 0; /* * Check for underlying vnode requests and ensure that logical @@ -968,56 +1616,43 @@ hfs_cmap(ap) if (ap->a_bpn == NULL) return (0); - p = current_proc(); - - if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) { - /* - * File blocks are getting remapped. Wait until its finished. - */ - SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP); - (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0); - if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) - panic("hfs_cmap: no mappable blocks"); - } + if ( !vnode_issystem(vp) && !vnode_islnk(vp)) { + if (VTOC(vp)->c_lockowner != current_thread()) { + hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + tooklock = 1; + } else { + cp = VTOC(vp); + panic("blockmap: %s cnode lock already held!\n", + cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : ""); + } + } + hfsmp = VTOHFS(vp); + cp = VTOC(vp); + fp = VTOF(vp); - retry: +retry: if (fp->ff_unallocblocks) { - lockExtBtree = 1; - - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - return EINVAL; - } else { - started_tr = 1; - } - } - - if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) { - if (started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - } - return (retval); + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto exit; + } else { + started_tr = 1; } + syslocks = SFL_EXTENTS | SFL_BITMAP; + } else if (overflow_extents(fp)) { - lockExtBtree = 1; - if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) { - return retval; - } + syslocks = SFL_EXTENTS; } + + if (syslocks) + lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK); /* * Check for any delayed allocations. */ if (fp->ff_unallocblocks) { - SInt64 reqbytes, actbytes; + SInt64 actbytes; + u_int32_t loanedBlocks; // // Make sure we have a transaction. It's possible @@ -1026,345 +1661,144 @@ hfs_cmap(ap) // btree, ff_unallocblocks became non-zero and so we // will need to start a transaction. // - if (hfsmp->jnl && started_tr == 0) { - if (lockExtBtree) { - (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); - lockExtBtree = 0; - } - - goto retry; + if (started_tr == 0) { + if (syslocks) { + hfs_systemfile_unlock(hfsmp, lockflags); + syslocks = 0; + } + goto retry; } - reqbytes = (SInt64)fp->ff_unallocblocks * - (SInt64)HFSTOVCB(hfsmp)->blockSize; /* - * Release the blocks on loan and aquire some real ones. - * Note that we can race someone else for these blocks - * (and lose) so cmap needs to handle a failure here. - * Currently this race can't occur because all allocations - * are protected by an exclusive lock on the Extents - * Overflow file. + * Note: ExtendFileC will Release any blocks on loan and + * aquire real blocks. So we ask to extend by zero bytes + * since ExtendFileC will account for the virtual blocks. */ - HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks; - FTOC(fp)->c_blocks -= fp->ff_unallocblocks; - fp->ff_blocks -= fp->ff_unallocblocks; - fp->ff_unallocblocks = 0; - /* Files that are changing size are not hot file candidates. */ - if (hfsmp->hfc_stage == HFC_RECORDING) { - fp->ff_bytesread = 0; - } - while (retval == 0 && reqbytes > 0) { - retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp), - (FCB*)fp, reqbytes, 0, - kEFAllMask | kEFNoClumpMask, &actbytes)); - if (retval == 0 && actbytes == 0) - retval = ENOSPC; - - if (retval) { - fp->ff_unallocblocks = - reqbytes / HFSTOVCB(hfsmp)->blockSize; - HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks; - FTOC(fp)->c_blocks += fp->ff_unallocblocks; - fp->ff_blocks += fp->ff_unallocblocks; - } - reqbytes -= actbytes; + loanedBlocks = fp->ff_unallocblocks; + retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0, + kEFAllMask | kEFNoClumpMask, &actbytes); + + if (retval) { + fp->ff_unallocblocks = loanedBlocks; + cp->c_blocks += loanedBlocks; + fp->ff_blocks += loanedBlocks; + + HFS_MOUNT_LOCK(hfsmp, TRUE); + hfsmp->loanedBlocks += loanedBlocks; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); } if (retval) { - (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); - VTOC(ap->a_vp)->c_flag |= C_MODIFIED; + hfs_systemfile_unlock(hfsmp, lockflags); + cp->c_flag |= C_MODIFIED; if (started_tr) { - tv = time; - VOP_UPDATE(ap->a_vp, &tv, &tv, 1); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); } - return (retval); + goto exit; } } - retval = MacToVFSError( - MapFileBlockC (HFSTOVCB(hfsmp), - (FCB *)fp, - ap->a_size, - ap->a_foffset, - ap->a_bpn, - &bytesContAvail)); - - if (lockExtBtree) - (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); + retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset, + ap->a_bpn, &bytesContAvail); + if (syslocks) { + hfs_systemfile_unlock(hfsmp, lockflags); + syslocks = 0; + } - // XXXdbg if (started_tr) { - tv = time; - retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1); - - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + hfs_end_transaction(hfsmp); started_tr = 0; - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - grabbed_lock = 0; - } - - if (retval == E_NONE) { - /* Adjust the mapping information for invalid file ranges: */ - overlaptype = rl_scan(&fp->ff_invalidranges, - ap->a_foffset, - ap->a_foffset + (off_t)bytesContAvail - 1, - &invalid_range); - if (overlaptype != RL_NOOVERLAP) { - switch(overlaptype) { - case RL_MATCHINGOVERLAP: - case RL_OVERLAPCONTAINSRANGE: - case RL_OVERLAPSTARTSBEFORE: - /* There's no valid block for this byte offset: */ - *ap->a_bpn = (daddr_t)-1; - - /* There's no point limiting the amount to be returned if the - invalid range that was hit extends all the way to the EOF - (i.e. there's no valid bytes between the end of this range - and the file's EOF): - */ - if ((fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { - bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; - }; - break; - - case RL_OVERLAPISCONTAINED: - case RL_OVERLAPENDSAFTER: - /* The range of interest hits an invalid block before the end: */ - if (invalid_range->rl_start == ap->a_foffset) { - /* There's actually no valid information to be had starting here: */ - *ap->a_bpn = (daddr_t)-1; - if ((fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { - bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; - }; - } else { - bytesContAvail = invalid_range->rl_start - ap->a_foffset; - }; - break; - }; - if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size; - }; - - if (ap->a_run) *ap->a_run = bytesContAvail; - }; - - if (ap->a_poff) - *(int *)ap->a_poff = 0; - - return (retval); -} - + } + if (retval) { + goto exit; + } -/* - * Read or write a buffer that is not contiguous on disk. We loop over - * each device block, copying to or from caller's buffer. - * - * We could be a bit more efficient by transferring as much data as is - * contiguous. But since this routine should rarely be called, and that - * would be more complicated; best to keep it simple. - */ -static int -hfs_strategy_fragmented(struct buf *bp) -{ - register struct vnode *vp = bp->b_vp; - register struct cnode *cp = VTOC(vp); - register struct vnode *devvp = cp->c_devvp; - caddr_t ioaddr; /* Address of fragment within bp */ - struct buf *frag = NULL; /* For reading or writing a single block */ - int retval = 0; - long remaining; /* Bytes (in bp) left to transfer */ - off_t offset; /* Logical offset of current fragment in vp */ - u_long block_size; /* Size of one device block (and one I/O) */ - - /* Make sure we redo this mapping for the next I/O */ - bp->b_blkno = bp->b_lblkno; - - /* Set up the logical position and number of bytes to read/write */ - offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp); - block_size = VTOHFS(vp)->hfs_phys_block_size; + /* Adjust the mapping information for invalid file ranges: */ + overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset, + ap->a_foffset + (off_t)bytesContAvail - 1, + &invalid_range); + if (overlaptype != RL_NOOVERLAP) { + switch(overlaptype) { + case RL_MATCHINGOVERLAP: + case RL_OVERLAPCONTAINSRANGE: + case RL_OVERLAPSTARTSBEFORE: + /* There's no valid block for this byte offset: */ + *ap->a_bpn = (daddr64_t)-1; + /* There's no point limiting the amount to be returned + * if the invalid range that was hit extends all the way + * to the EOF (i.e. there's no valid bytes between the + * end of this range and the file's EOF): + */ + if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && + (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; + } + break; - /* Get an empty buffer to do the deblocking */ - frag = geteblk(block_size); - if (ISSET(bp->b_flags, B_READ)) - SET(frag->b_flags, B_READ); - - for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0; - ioaddr += block_size, offset += block_size, - remaining -= block_size) { - frag->b_resid = frag->b_bcount; - CLR(frag->b_flags, B_DONE); - - /* Map the current position to a physical block number */ - retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno, - NULL, NULL); - if (retval != 0) + case RL_OVERLAPISCONTAINED: + case RL_OVERLAPENDSAFTER: + /* The range of interest hits an invalid block before the end: */ + if (invalid_range->rl_start == ap->a_foffset) { + /* There's actually no valid information to be had starting here: */ + *ap->a_bpn = (daddr64_t)-1; + if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && + (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; + } + } else { + bytesContAvail = invalid_range->rl_start - ap->a_foffset; + } break; - /* - * Did we try to read a hole? - * (Should never happen for metadata!) - */ - if ((long)frag->b_lblkno == -1) { - bzero(ioaddr, block_size); - continue; - } - - /* If writing, copy before I/O */ - if (!ISSET(bp->b_flags, B_READ)) - bcopy(ioaddr, frag->b_data, block_size); - - /* Call the device to do the I/O and wait for it */ - frag->b_blkno = frag->b_lblkno; - frag->b_vp = devvp; /* Used to dispatch via VOP_STRATEGY */ - frag->b_dev = devvp->v_rdev; - retval = VOP_STRATEGY(frag); - frag->b_vp = NULL; - if (retval != 0) - break; - retval = biowait(frag); - if (retval != 0) + case RL_NOOVERLAP: break; - - /* If reading, copy after the I/O */ - if (ISSET(bp->b_flags, B_READ)) - bcopy(frag->b_data, ioaddr, block_size); + } /* end switch */ + if (bytesContAvail > ap->a_size) + bytesContAvail = ap->a_size; } - - frag->b_vp = NULL; - // - // XXXdbg - in the case that this is a meta-data block, it won't affect - // the journal because this bp is for a physical disk block, - // not a logical block that is part of the catalog or extents - // files. - SET(frag->b_flags, B_INVAL); - brelse(frag); - - if ((bp->b_error = retval) != 0) - SET(bp->b_flags, B_ERROR); - - biodone(bp); /* This I/O is now complete */ - return retval; + if (ap->a_run) + *ap->a_run = bytesContAvail; + + if (ap->a_poff) + *(int *)ap->a_poff = 0; +exit: + if (tooklock) + hfs_unlock(cp); + + return (MacToVFSError(retval)); } /* - * Calculate the logical to physical mapping if not done already, - * then call the device strategy routine. -# -#vop_strategy { -# IN struct buf *bp; - */ + * prepare and issue the I/O + * buf_strategy knows how to deal + * with requests that require + * fragmented I/Os + */ int -hfs_strategy(ap) - struct vop_strategy_args /* { - struct buf *a_bp; - } */ *ap; +hfs_vnop_strategy(struct vnop_strategy_args *ap) { - register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; - register struct cnode *cp = VTOC(vp); - int retval = 0; - off_t offset; - size_t bytes_contig; - - if ( !(bp->b_flags & B_VECTORLIST)) { - if (vp->v_type == VBLK || vp->v_type == VCHR) - panic("hfs_strategy: device vnode passed!"); - - if (bp->b_flags & B_PAGELIST) { - /* - * If we have a page list associated with this bp, - * then go through cluster_bp since it knows how to - * deal with a page request that might span non- - * contiguous physical blocks on the disk... - */ - retval = cluster_bp(bp); - vp = cp->c_devvp; - bp->b_dev = vp->v_rdev; - - return (retval); - } - - /* - * If we don't already know the filesystem relative block - * number then get it using VOP_BMAP(). If VOP_BMAP() - * returns the block number as -1 then we've got a hole in - * the file. Although HFS filesystems don't create files with - * holes, invalidating of subranges of the file (lazy zero - * filling) may create such a situation. - */ - if (bp->b_blkno == bp->b_lblkno) { - offset = (off_t) bp->b_lblkno * - (off_t) GetLogicalBlockSize(vp); - - if ((retval = VOP_CMAP(vp, offset, bp->b_bcount, - &bp->b_blkno, &bytes_contig, NULL))) { - bp->b_error = retval; - bp->b_flags |= B_ERROR; - biodone(bp); - return (retval); - } - if (bytes_contig < bp->b_bcount) - { - /* - * We were asked to read a block that wasn't - * contiguous, so we have to read each of the - * pieces and copy them into the buffer. - * Since ordinary file I/O goes through - * cluster_io (which won't ask us for - * discontiguous data), this is probably an - * attempt to read or write metadata. - */ - return hfs_strategy_fragmented(bp); - } - if ((long)bp->b_blkno == -1) - clrbuf(bp); - } - if ((long)bp->b_blkno == -1) { - biodone(bp); - return (0); - } - if (bp->b_validend == 0) { - /* - * Record the exact size of the I/O transfer about to - * be made: - */ - bp->b_validend = bp->b_bcount; - } - } - vp = cp->c_devvp; - bp->b_dev = vp->v_rdev; + buf_t bp = ap->a_bp; + vnode_t vp = buf_vnode(bp); + struct cnode *cp = VTOC(vp); - return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap); + return (buf_strategy(cp->c_devvp, ap)); } -static int do_hfs_truncate(ap) - struct vop_truncate_args /* { - struct vnode *a_vp; - off_t a_length; - int a_flags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +static int +do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context) { - register struct vnode *vp = ap->a_vp; register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); - off_t length; - long vflags; - struct timeval tv; + struct proc *p = vfs_context_proc(context);; + kauth_cred_t cred = vfs_context_ucred(context); int retval; off_t bytesToAdd; off_t actualBytesAdded; @@ -1372,11 +1806,8 @@ static int do_hfs_truncate(ap) u_long fileblocks; int blksize; struct hfsmount *hfsmp; + int lockflags; - if (vp->v_type != VREG && vp->v_type != VLNK) - return (EISDIR); /* cannot truncate an HFS directory! */ - - length = ap->a_length; blksize = VTOVCB(vp)->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; @@ -1392,7 +1823,6 @@ static int do_hfs_truncate(ap) hfsmp = VTOHFS(vp); - tv = time; retval = E_NONE; /* Files that are changing size are not hot file candidates. */ @@ -1405,7 +1835,7 @@ static int do_hfs_truncate(ap) * since there may be extra physical blocks that also need truncation. */ #if QUOTA - if (retval = hfs_getinoquota(cp)) + if ((retval = hfs_getinoquota(cp))) return(retval); #endif /* QUOTA */ @@ -1414,10 +1844,10 @@ static int do_hfs_truncate(ap) * last byte of the file is allocated. Since the smallest * value of ff_size is 0, length will be at least 1. */ - if (length > fp->ff_size) { + if (length > (off_t)fp->ff_size) { #if QUOTA retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)), - ap->a_cred, 0); + cred, 0); if (retval) goto Err_Exit; #endif /* QUOTA */ @@ -1432,7 +1862,7 @@ static int do_hfs_truncate(ap) /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; - if (ap->a_cred && suser(ap->a_cred, NULL) != 0) + if (cred && suser(cred, NULL) != 0) eflags |= kEFReserveMask; /* keep a reserve */ /* @@ -1444,25 +1874,16 @@ static int do_hfs_truncate(ap) eflags |= kEFMetadataMask; blockHint = hfsmp->hfs_metazone_start; } - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - retval = EINVAL; - goto Err_Exit; - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; } - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - - goto Err_Exit; - } + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); while ((length > filebytes) && (retval == E_NONE)) { bytesToAdd = length - filebytes; @@ -1481,17 +1902,14 @@ static int do_hfs_truncate(ap) } } /* endwhile */ - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); + hfs_systemfile_unlock(hfsmp, lockflags); - // XXXdbg if (hfsmp->jnl) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 1); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); if (retval) goto Err_Exit; @@ -1500,16 +1918,17 @@ static int do_hfs_truncate(ap) (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); } - if (!(ap->a_flags & IO_NOZEROFILL)) { + if (!(flags & IO_NOZEROFILL)) { if (UBCINFOEXISTS(vp) && retval == E_NONE) { struct rl_entry *invalid_range; - int devBlockSize; off_t zero_limit; zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64; if (length < zero_limit) zero_limit = length; - if (length > fp->ff_size) { + if (length > (off_t)fp->ff_size) { + struct timeval tv; + /* Extending the file: time to fill out the current last page w. zeroes? */ if ((fp->ff_size & PAGE_MASK_64) && (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64, @@ -1519,50 +1938,66 @@ static int do_hfs_truncate(ap) of the file, so zero out the remainder of that page to ensure the entire page contains valid data. Since there is no invalid range possible past the (current) eof, there's no need to remove anything - from the invalid range list before calling cluster_write(): */ - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); + from the invalid range list before calling cluster_write(): */ + hfs_unlock(cp); retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, - fp->ff_size, (off_t)0, devBlockSize, - (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); + fp->ff_size, (off_t)0, + (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); + hfs_lock(cp, HFS_FORCE_LOCK); if (retval) goto Err_Exit; /* Merely invalidate the remaining area, if necessary: */ if (length > zero_limit) { + microuptime(&tv); rl_add(zero_limit, length - 1, &fp->ff_invalidranges); - cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT; + cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; } } else { /* The page containing the (current) eof is invalid: just add the remainder of the page to the invalid list, along with the area being newly allocated: */ + microuptime(&tv); rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges); - cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT; + cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT; }; } } else { panic("hfs_truncate: invoked on non-UBC object?!"); }; } - cp->c_flag |= C_UPDATE; + cp->c_touch_modtime = TRUE; fp->ff_size = length; - if (UBCISVALID(vp)) - ubc_setsize(vp, fp->ff_size); /* XXX check errors */ + /* Nested transactions will do their own ubc_setsize. */ + if (!skipsetsize) { + /* + * ubc_setsize can cause a pagein here + * so we need to drop cnode lock. + */ + hfs_unlock(cp); + ubc_setsize(vp, length); + hfs_lock(cp, HFS_FORCE_LOCK); + } } else { /* Shorten the size of the file */ - if (fp->ff_size > length) { + if ((off_t)fp->ff_size > length) { /* * Any buffers that are past the truncation point need to be - * invalidated (to maintain buffer cache consistency). For - * simplicity, we invalidate all the buffers by calling vinvalbuf. + * invalidated (to maintain buffer cache consistency). */ - if (UBCISVALID(vp)) - ubc_setsize(vp, length); /* XXX check errors */ - vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; - retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0); + /* Nested transactions will do their own ubc_setsize. */ + if (!skipsetsize) { + /* + * ubc_setsize can cause a pageout here + * so we need to drop cnode lock. + */ + hfs_unlock(cp); + ubc_setsize(vp, length); + hfs_lock(cp, HFS_FORCE_LOCK); + } /* Any space previously marked as invalid is now irrelevant: */ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); @@ -1574,28 +2009,28 @@ static int do_hfs_truncate(ap) */ if (fp->ff_unallocblocks > 0) { u_int32_t finalblks; + u_int32_t loanedBlocks; - /* lock extents b-tree */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, - LK_EXCLUSIVE, ap->a_p); - if (retval) - goto Err_Exit; + HFS_MOUNT_LOCK(hfsmp, TRUE); + + loanedBlocks = fp->ff_unallocblocks; + cp->c_blocks -= loanedBlocks; + fp->ff_blocks -= loanedBlocks; + fp->ff_unallocblocks = 0; - VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks; - cp->c_blocks -= fp->ff_unallocblocks; - fp->ff_blocks -= fp->ff_unallocblocks; - fp->ff_unallocblocks = 0; + hfsmp->loanedBlocks -= loanedBlocks; finalblks = (length + blksize - 1) / blksize; if (finalblks > fp->ff_blocks) { /* calculate required unmapped blocks */ - fp->ff_unallocblocks = finalblks - fp->ff_blocks; - VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks; - cp->c_blocks += fp->ff_unallocblocks; - fp->ff_blocks += fp->ff_unallocblocks; + loanedBlocks = finalblks - fp->ff_blocks; + hfsmp->loanedBlocks += loanedBlocks; + + fp->ff_unallocblocks = loanedBlocks; + cp->c_blocks += loanedBlocks; + fp->ff_blocks += loanedBlocks; } - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, - LK_RELEASE, ap->a_p); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); } /* @@ -1604,44 +2039,33 @@ static int do_hfs_truncate(ap) * truncate with the IO_NDELAY flag set. So when IO_NDELAY * isn't set, we make sure this isn't a TBE process. */ - if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) { + if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) { #if QUOTA off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); #endif /* QUOTA */ - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - retval = EINVAL; - goto Err_Exit; - } - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; + } + + if (fp->ff_unallocblocks == 0) { + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - goto Err_Exit; - } - - if (fp->ff_unallocblocks == 0) retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, false)); - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); - - // XXXdbg + hfs_systemfile_unlock(hfsmp, lockflags); + } if (hfsmp->jnl) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 1); - - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); } - hfs_global_shared_lock_release(hfsmp); + + hfs_end_transaction(hfsmp); filebytes = (off_t)fp->ff_blocks * (off_t)blksize; if (retval) @@ -1652,12 +2076,12 @@ static int do_hfs_truncate(ap) #endif /* QUOTA */ } /* Only set update flag if the logical length changes */ - if (fp->ff_size != length) - cp->c_flag |= C_UPDATE; + if ((off_t)fp->ff_size != length) + cp->c_touch_modtime = TRUE; fp->ff_size = length; } - cp->c_flag |= C_CHANGE; - retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT); + cp->c_touch_chgtime = TRUE; + retval = hfs_update(vp, MNT_WAIT); if (retval) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, -1, -1, -1, retval, 0); @@ -1672,42 +2096,24 @@ Err_Exit: } + /* -# -#% truncate vp L L L -# -vop_truncate { - IN struct vnode *vp; - IN off_t length; - IN int flags; (IO_SYNC) - IN struct ucred *cred; - IN struct proc *p; -}; * Truncate a cnode to at most length size, freeing (or adding) the * disk blocks. */ -int hfs_truncate(ap) - struct vop_truncate_args /* { - struct vnode *a_vp; - off_t a_length; - int a_flags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +__private_extern__ +int +hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, + vfs_context_t context) { - register struct vnode *vp = ap->a_vp; - register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); - off_t length; off_t filebytes; u_long fileblocks; - int blksize, error; - u_int64_t nsize; + int blksize, error = 0; - if (vp->v_type != VREG && vp->v_type != VLNK) + if (vnode_isdir(vp)) return (EISDIR); /* cannot truncate an HFS directory! */ - length = ap->a_length; blksize = VTOVCB(vp)->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; @@ -1715,96 +2121,94 @@ int hfs_truncate(ap) // have to loop truncating or growing files that are // really big because otherwise transactions can get // enormous and consume too many kernel resources. - if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) { - while (filebytes > length) { - if ((filebytes - length) > HFS_BIGFILE_SIZE) { - filebytes -= HFS_BIGFILE_SIZE; - } else { - filebytes = length; - } - - ap->a_length = filebytes; - error = do_hfs_truncate(ap); - if (error) - break; - } - } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) { - while (filebytes < length) { - if ((length - filebytes) > HFS_BIGFILE_SIZE) { - filebytes += HFS_BIGFILE_SIZE; - } else { - filebytes = (length - filebytes); + + if (length < filebytes) { + while (filebytes > length) { + if ((filebytes - length) > HFS_BIGFILE_SIZE) { + filebytes -= HFS_BIGFILE_SIZE; + } else { + filebytes = length; + } + error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context); + if (error) + break; + } + } else if (length > filebytes) { + while (filebytes < length) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { + filebytes += HFS_BIGFILE_SIZE; + } else { + filebytes = length; + } + error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context); + if (error) + break; } + } else /* Same logical size */ { - ap->a_length = filebytes; - error = do_hfs_truncate(ap); - if (error) - break; - } - } else { - error = do_hfs_truncate(ap); + error = do_hfs_truncate(vp, length, flags, skipsetsize, context); + } + /* Files that are changing size are not hot file candidates. */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; } - return error; + return (error); } /* -# -#% allocate vp L L L -# -vop_allocate { - IN struct vnode *vp; - IN off_t length; - IN int flags; - OUT off_t *bytesallocated; - IN off_t offset; - IN struct ucred *cred; - IN struct proc *p; -}; - * allocate a cnode to at most length size + * Preallocate file storage space. */ -int hfs_allocate(ap) - struct vop_allocate_args /* { - struct vnode *a_vp; +int +hfs_vnop_allocate(struct vnop_allocate_args /* { + vnode_t a_vp; off_t a_length; u_int32_t a_flags; off_t *a_bytesallocated; off_t a_offset; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct filefork *fp = VTOF(vp); - ExtendedVCB *vcb = VTOVCB(vp); + struct cnode *cp; + struct filefork *fp; + ExtendedVCB *vcb; off_t length = ap->a_length; off_t startingPEOF; off_t moreBytesRequested; off_t actualBytesAdded; off_t filebytes; u_long fileblocks; - long vflags; - struct timeval tv; int retval, retval2; UInt32 blockHint; UInt32 extendFlags; /* For call to ExtendFileC */ struct hfsmount *hfsmp; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + int lockflags; + + *(ap->a_bytesallocated) = 0; + + if (!vnode_isreg(vp)) + return (EISDIR); + if (length < (off_t)0) + return (EINVAL); + if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (retval); + cp = VTOC(vp); + fp = VTOF(vp); hfsmp = VTOHFS(vp); + vcb = VTOVCB(vp); - *(ap->a_bytesallocated) = 0; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; - if (length < (off_t)0) - return (EINVAL); - if (vp->v_type != VREG) - return (EISDIR); - if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) - return (EINVAL); + if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) { + retval = EINVAL; + goto Err_Exit; + } /* Fill in the flags word for the call to Extend the file */ @@ -1813,10 +2217,9 @@ int hfs_allocate(ap) extendFlags |= kEFContigMask; if (ap->a_flags & ALLOCATEALL) extendFlags |= kEFAllMask; - if (ap->a_cred && suser(ap->a_cred, NULL) != 0) + if (cred && suser(cred, NULL) != 0) extendFlags |= kEFReserveMask; - tv = time; retval = E_NONE; blockHint = 0; startingPEOF = filebytes; @@ -1841,9 +2244,9 @@ int hfs_allocate(ap) #if QUOTA retval = hfs_chkdq(cp, (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), - ap->a_cred, 0); + cred, 0); if (retval) - return (retval); + goto Err_Exit; #endif /* QUOTA */ /* @@ -1865,24 +2268,16 @@ int hfs_allocate(ap) } } - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - retval = EINVAL; - goto Err_Exit; - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; } - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - goto Err_Exit; - } + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); retval = MacToVFSError(ExtendFileC(vcb, (FCB*)fp, @@ -1894,21 +2289,18 @@ int hfs_allocate(ap) *(ap->a_bytesallocated) = actualBytesAdded; filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); + hfs_systemfile_unlock(hfsmp, lockflags); - // XXXdbg if (hfsmp->jnl) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 1); - - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); } - hfs_global_shared_lock_release(hfsmp); + + hfs_end_transaction(hfsmp); /* * if we get an error and no changes were made then exit - * otherwise we must do the VOP_UPDATE to reflect the changes + * otherwise we must do the hfs_update to reflect the changes */ if (retval && (startingPEOF == filebytes)) goto Err_Exit; @@ -1929,55 +2321,38 @@ int hfs_allocate(ap) if (fp->ff_size > length) { /* * Any buffers that are past the truncation point need to be - * invalidated (to maintain buffer cache consistency). For - * simplicity, we invalidate all the buffers by calling vinvalbuf. + * invalidated (to maintain buffer cache consistency). */ - vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; - (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0); } - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - retval = EINVAL; - goto Err_Exit; - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; } - /* lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - goto Err_Exit; - } + retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false)); + + hfs_systemfile_unlock(hfsmp, lockflags); - retval = MacToVFSError( - TruncateFileC( - vcb, - (FCB*)fp, - length, - false)); - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; if (hfsmp->jnl) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 1); - - hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); } - hfs_global_shared_lock_release(hfsmp); + + hfs_end_transaction(hfsmp); /* * if we get an error and no changes were made then exit - * otherwise we must do the VOP_UPDATE to reflect the changes + * otherwise we must do the hfs_update to reflect the changes */ if (retval && (startingPEOF == filebytes)) goto Err_Exit; #if QUOTA @@ -1988,158 +2363,179 @@ int hfs_allocate(ap) if (fp->ff_size > filebytes) { fp->ff_size = filebytes; - if (UBCISVALID(vp)) - ubc_setsize(vp, fp->ff_size); /* XXX check errors */ + hfs_unlock(cp); + ubc_setsize(vp, fp->ff_size); + hfs_lock(cp, HFS_FORCE_LOCK); } } Std_Exit: - cp->c_flag |= C_CHANGE | C_UPDATE; - retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT); + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + retval2 = hfs_update(vp, MNT_WAIT); if (retval == 0) retval = retval2; Err_Exit: + hfs_unlock(cp); return (retval); } /* - * pagein for HFS filesystem + * Pagein for HFS filesystem */ int -hfs_pagein(ap) - struct vop_pagein_args /* { - struct vnode *a_vp, +hfs_vnop_pagein(struct vnop_pagein_args *ap) +/* + struct vnop_pagein_args { + vnode_t a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags - } */ *ap; + vfs_context_t a_context; + }; +*/ { - register struct vnode *vp = ap->a_vp; - int devBlockSize = 0; + vnode_t vp = ap->a_vp; int error; - if (vp->v_type != VREG) - panic("hfs_pagein: vp not UBC type\n"); - - VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize); - error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, - ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize, - ap->a_flags); + ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags); /* - * Keep track blocks read + * Keep track of blocks read. */ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { struct cnode *cp; + struct filefork *fp; + int bytesread; + int took_cnode_lock = 0; - cp = VTOC(vp); + cp = VTOC(vp); + fp = VTOF(vp); + + if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE) + bytesread = fp->ff_size; + else + bytesread = ap->a_size; + + /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ + if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { + hfs_lock(cp, HFS_FORCE_LOCK); + took_cnode_lock = 1; + } /* * If this file hasn't been seen since the start of * the current sampling period then start over. */ - if (cp->c_atime < VTOHFS(vp)->hfc_timebase) - VTOF(vp)->ff_bytesread = ap->a_size; - else - VTOF(vp)->ff_bytesread += ap->a_size; + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { + struct timeval tv; - cp->c_flag |= C_ACCESS; + fp->ff_bytesread = bytesread; + microtime(&tv); + cp->c_atime = tv.tv_sec; + } else { + fp->ff_bytesread += bytesread; + } + cp->c_touch_acctime = TRUE; + if (took_cnode_lock) + hfs_unlock(cp); } - return (error); } /* - * pageout for HFS filesystem. + * Pageout for HFS filesystem. */ int -hfs_pageout(ap) - struct vop_pageout_args /* { - struct vnode *a_vp, +hfs_vnop_pageout(struct vnop_pageout_args *ap) +/* + struct vnop_pageout_args { + vnode_t a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags - } */ *ap; + vfs_context_t a_context; + }; +*/ { - struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct filefork *fp = VTOF(vp); + vnode_t vp = ap->a_vp; + struct cnode *cp; + struct filefork *fp; int retval; - int devBlockSize = 0; off_t end_of_range; off_t filesize; - if (UBCINVALID(vp)) - panic("hfs_pageout: Not a VREG: vp=%x", vp); + cp = VTOC(vp); + if (cp->c_lockowner == current_thread()) { + panic("pageout: %s cnode lock already held!\n", + cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : ""); + } + if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + return (retval); + } + fp = VTOF(vp); - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); filesize = fp->ff_size; end_of_range = ap->a_f_offset + ap->a_size - 1; - if (cp->c_flag & C_RELOCATING) { - if (end_of_range < (filesize / 2)) { - return (EBUSY); - } - } - - if (end_of_range >= filesize) + if (end_of_range >= filesize) { end_of_range = (off_t)(filesize - 1); + } if (ap->a_f_offset < filesize) { rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); cp->c_flag |= C_MODIFIED; /* leof is dirty */ } + hfs_unlock(cp); - retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, - filesize, devBlockSize, ap->a_flags); + retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, + ap->a_size, filesize, ap->a_flags); /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. + * If data was written, and setuid or setgid bits are set and + * this process is not the superuser then clear the setuid and + * setgid bits as a precaution against tampering. */ - if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0) + if ((retval == 0) && + (cp->c_mode & (S_ISUID | S_ISGID)) && + (vfs_context_suser(ap->a_context) != 0)) { + hfs_lock(cp, HFS_FORCE_LOCK); cp->c_mode &= ~(S_ISUID | S_ISGID); - + cp->c_touch_chgtime = TRUE; + hfs_unlock(cp); + } return (retval); } /* * Intercept B-Tree node writes to unswap them if necessary. -# -#vop_bwrite { -# IN struct buf *bp; */ int -hfs_bwrite(ap) - struct vop_bwrite_args /* { - struct buf *a_bp; - } */ *ap; +hfs_vnop_bwrite(struct vnop_bwrite_args *ap) { int retval = 0; register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; + register struct vnode *vp = buf_vnode(bp); #if BYTE_ORDER == LITTLE_ENDIAN BlockDescriptor block; /* Trap B-Tree writes */ if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) || - (VTOC(vp)->c_fileid == kHFSCatalogFileID)) { + (VTOC(vp)->c_fileid == kHFSCatalogFileID) || + (VTOC(vp)->c_fileid == kHFSAttributesFileID)) { /* Swap if the B-Tree node is in native byte order */ - if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) { + if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) { /* Prepare the block pointer */ block.blockHeader = bp; - block.buffer = bp->b_data; + block.buffer = (char *)buf_dataptr(bp); /* not found in cache ==> came from disk */ - block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0; - block.blockSize = bp->b_bcount; + block.blockReadFromDisk = (buf_fromcache(bp) == 0); + block.blockSize = buf_count(bp); /* Endian un-swap B-Tree node */ SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1); @@ -2149,13 +2545,12 @@ hfs_bwrite(ap) } #endif /* This buffer shouldn't be locked anymore but if it is clear it */ - if (ISSET(bp->b_flags, B_LOCKED)) { - // XXXdbg - if (VTOHFS(vp)->jnl) { - panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp); - } - CLR(bp->b_flags, B_LOCKED); - printf("hfs_bwrite: called with lock bit set\n"); + if ((buf_flags(bp) & B_LOCKED)) { + // XXXdbg + if (VTOHFS(vp)->jnl) { + panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp); + } + buf_clearflags(bp, B_LOCKED); } retval = vn_bwrite (ap); @@ -2198,30 +2593,29 @@ hfs_bwrite(ap) */ __private_extern__ int -hfs_relocate(vp, blockHint, cred, p) - struct vnode *vp; - u_int32_t blockHint; - struct ucred *cred; - struct proc *p; +hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, + struct proc *p) { + struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; - ExtendedVCB *vcb; - u_int32_t headblks; u_int32_t datablks; u_int32_t blksize; - u_int32_t realsize; u_int32_t growsize; u_int32_t nextallocsave; - u_int32_t sector_a; - u_int32_t sector_b; + daddr64_t sector_a, sector_b; + int disabled_caching = 0; int eflags; - u_int32_t oldstart; /* debug only */ off_t newbytes; - int retval, need_vinval=0; - - if (vp->v_type != VREG && vp->v_type != VLNK) { + int retval; + int lockflags = 0; + int took_trunc_lock = 0; + int started_tr = 0; + enum vtype vnodetype; + + vnodetype = vnode_vtype(vp); + if (vnodetype != VREG && vnodetype != VLNK) { return (EPERM); } @@ -2230,41 +2624,63 @@ hfs_relocate(vp, blockHint, cred, p) return (ENOSPC); } + cp = VTOC(vp); fp = VTOF(vp); if (fp->ff_unallocblocks) return (EINVAL); - vcb = VTOVCB(vp); - blksize = vcb->blockSize; + blksize = hfsmp->blockSize; if (blockHint == 0) - blockHint = vcb->nextAllocation; + blockHint = hfsmp->nextAllocation; if ((fp->ff_size > (u_int64_t)0x7fffffff) || - (vp->v_type == VLNK && fp->ff_size > blksize)) { + ((fp->ff_size > blksize) && vnodetype == VLNK)) { return (EFBIG); } + // + // We do not believe that this call to hfs_fsync() is + // necessary and it causes a journal transaction + // deadlock so we are removing it. + // + //if (vnodetype == VREG && !vnode_issystem(vp)) { + // retval = hfs_fsync(vp, MNT_WAIT, 0, p); + // if (retval) + // return (retval); + //} + + if (!vnode_issystem(vp) && (vnodetype != VLNK)) { + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(cp); + return (retval); + } + took_trunc_lock = 1; + } headblks = fp->ff_blocks; datablks = howmany(fp->ff_size, blksize); growsize = datablks * blksize; - realsize = fp->ff_size; eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; if (blockHint >= hfsmp->hfs_metazone_start && blockHint <= hfsmp->hfs_metazone_end) eflags |= kEFMetadataMask; - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - return (EINVAL); - } + if (hfs_start_transaction(hfsmp) != 0) { + if (took_trunc_lock) + hfs_unlock_truncate(cp); + return (EINVAL); } + started_tr = 1; + /* + * Protect the extents b-tree and the allocation bitmap + * during MapFileBlockC and ExtendFileC operations. + */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - /* Lock extents b-tree (also protects volume bitmap) */ - retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p); - if (retval) - goto out2; - - retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, §or_a, NULL); + retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, §or_a, NULL); if (retval) { retval = MacToVFSError(retval); goto out; @@ -2273,14 +2689,23 @@ hfs_relocate(vp, blockHint, cred, p) /* * STEP 1 - aquire new allocation blocks. */ - nextallocsave = vcb->nextAllocation; - retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes); - if (eflags & kEFMetadataMask) - vcb->nextAllocation = nextallocsave; + if (!vnode_isnocache(vp)) { + vnode_setnocache(vp); + disabled_caching = 1; + + } + nextallocsave = hfsmp->nextAllocation; + retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes); + if (eflags & kEFMetadataMask) { + HFS_MOUNT_LOCK(hfsmp, TRUE); + hfsmp->nextAllocation = nextallocsave; + hfsmp->vcbFlags |= 0xFF00; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); + } retval = MacToVFSError(retval); if (retval == 0) { - VTOC(vp)->c_flag |= C_MODIFIED; + cp->c_flag |= C_MODIFIED; if (newbytes < growsize) { retval = ENOSPC; goto restore; @@ -2290,7 +2715,7 @@ hfs_relocate(vp, blockHint, cred, p) goto restore; } - retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, §or_b, NULL); + retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, §or_b, NULL); if (retval) { retval = MacToVFSError(retval); } else if ((sector_a + 1) == sector_b) { @@ -2304,101 +2729,106 @@ hfs_relocate(vp, blockHint, cred, p) goto restore; } } + /* Done with system locks and journal for now. */ + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + hfs_end_transaction(hfsmp); + started_tr = 0; + if (retval) { /* * Check to see if failure is due to excessive fragmentation. */ - if (retval == ENOSPC && - hfs_freeblks(hfsmp, 0) > (datablks * 2)) { + if ((retval == ENOSPC) && + (hfs_freeblks(hfsmp, 0) > (datablks * 2))) { hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; } goto out; } - - fp->ff_size = fp->ff_blocks * blksize; - if (UBCISVALID(vp)) - (void) ubc_setsize(vp, fp->ff_size); - /* - * STEP 2 - clone data into the new allocation blocks. + * STEP 2 - clone file data into the new allocation blocks. */ - // XXXdbg - unlock the extents overflow file because hfs_clonefile() - // calls vinvalbuf() which calls hfs_fsync() which can - // call hfs_metasync() which may need to lock the catalog - // file -- but the catalog file may be locked and blocked - // waiting for the extents overflow file if we're unlucky. - // see radar 3742973 for more details. - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); - - if (vp->v_type == VLNK) + if (vnodetype == VLNK) retval = hfs_clonelink(vp, blksize, cred, p); - else if (vp->v_flag & VSYSTEM) + else if (vnode_issystem(vp)) retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); else - retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p); - - // XXXdbg - relock the extents overflow file - (void)hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p); + retval = hfs_clonefile(vp, headblks, datablks, blksize); + /* Start transaction for step 3 or for a restore. */ + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto out; + } + started_tr = 1; if (retval) goto restore; - - oldstart = fp->ff_extents[0].startBlock; /* - * STEP 3 - switch to clone and remove old blocks. + * STEP 3 - switch to cloned data and remove old blocks. */ - SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - retval = HeadTruncateFile(vcb, (FCB*)fp, headblks); + retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks); - CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */ - if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP)) - wakeup(VTOC(vp)); + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; if (retval) goto restore; - - fp->ff_size = realsize; - if (UBCISVALID(vp)) { - (void) ubc_setsize(vp, realsize); - need_vinval = 1; - } - - CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */ out: - (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + if (took_trunc_lock) + hfs_unlock_truncate(cp); - // XXXdbg - do this after unlocking the extents-overflow - // file to avoid deadlocks (see comment above by STEP 2) - if (need_vinval) { - (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; } - retval = VOP_FSYNC(vp, cred, MNT_WAIT, p); -out2: + // See comment up above about calls to hfs_fsync() + // + //if (retval == 0) + // retval = hfs_fsync(vp, MNT_WAIT, 0, p); + if (hfsmp->jnl) { - if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID) + if (cp->c_cnid < kHFSFirstUserCatalogNodeID) (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); else (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); - journal_end_transaction(hfsmp->jnl); } - hfs_global_shared_lock_release(hfsmp); +exit: + if (disabled_caching) { + vnode_clearnocache(vp); + } + if (started_tr) + hfs_end_transaction(hfsmp); return (retval); restore: + if (fp->ff_blocks == headblks) + goto exit; /* * Give back any newly allocated space. */ - if (fp->ff_size != realsize) - fp->ff_size = realsize; - (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false); - if (UBCISVALID(vp)) - (void) ubc_setsize(vp, fp->ff_size); - CLR(VTOC(vp)->c_flag, C_RELOCATING); - goto out; + if (lockflags == 0) { + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + } + + (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false); + + hfs_systemfile_unlock(hfsmp, lockflags); + lockflags = 0; + + if (took_trunc_lock) + hfs_unlock_truncate(cp); + goto exit; } @@ -2407,30 +2837,30 @@ restore: * */ static int -hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p) +hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p) { struct buf *head_bp = NULL; struct buf *tail_bp = NULL; int error; - error = meta_bread(vp, 0, blksize, cred, &head_bp); + error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp); if (error) goto out; - tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META); + tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META); if (tail_bp == NULL) { error = EIO; goto out; } - bcopy(head_bp->b_data, tail_bp->b_data, blksize); - error = bwrite(tail_bp); + bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize); + error = (int)buf_bwrite(tail_bp); out: if (head_bp) { - head_bp->b_flags |= B_INVAL; - brelse(head_bp); + buf_markinvalid(head_bp); + buf_brelse(head_bp); } - (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); return (error); } @@ -2440,39 +2870,19 @@ out: * */ static int -hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize, - struct ucred *cred, struct proc *p) +hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) { caddr_t bufp; size_t writebase; size_t bufsize; size_t copysize; size_t iosize; - size_t filesize; + off_t filesize; size_t offset; - struct uio auio; - struct iovec aiov; - int devblocksize; - int didhold; - int error; - - - if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) { - printf("hfs_clonefile: vinvalbuf failed - %d\n", error); - return (error); - } - - if (!ubc_clean(vp, 1)) { - printf("hfs_clonefile: not ubc_clean\n"); - return (EIO); /* XXX error code */ - } - - /* - * Suspend page-outs for this file. - */ - SET(VTOC(vp)->c_flag, C_RELOCATING); + uio_t auio; + int error = 0; - filesize = VTOF(vp)->ff_size; + filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */ writebase = blkstart * blksize; copysize = blkcnt * blksize; iosize = bufsize = MIN(copysize, 4096 * 16); @@ -2481,71 +2891,54 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize, if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { return (ENOMEM); } + hfs_unlock(VTOC(vp)); - VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize); - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); while (offset < copysize) { iosize = MIN(copysize - offset, iosize); - aiov.iov_base = bufp; - aiov.iov_len = iosize; - auio.uio_resid = iosize; - auio.uio_offset = offset; - auio.uio_rw = UIO_READ; + uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ); + uio_addiov(auio, (uintptr_t)bufp, iosize); - error = cluster_read(vp, &auio, copysize, devblocksize, 0); + error = cluster_read(vp, auio, copysize, 0); if (error) { printf("hfs_clonefile: cluster_read failed - %d\n", error); break; } - if (auio.uio_resid != 0) { - printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid); + if (uio_resid(auio) != 0) { + printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio)); error = EIO; break; } + uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE); + uio_addiov(auio, (uintptr_t)bufp, iosize); - aiov.iov_base = bufp; - aiov.iov_len = iosize; - auio.uio_resid = iosize; - auio.uio_offset = writebase + offset; - auio.uio_rw = UIO_WRITE; - - error = cluster_write(vp, &auio, filesize + offset, + error = cluster_write(vp, auio, filesize + offset, filesize + offset + iosize, - auio.uio_offset, 0, devblocksize, 0); + uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); if (error) { printf("hfs_clonefile: cluster_write failed - %d\n", error); break; } - if (auio.uio_resid != 0) { + if (uio_resid(auio) != 0) { printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); error = EIO; break; } offset += iosize; } - if (error == 0) { - /* Clean the pages in VM. */ - didhold = ubc_hold(vp); - if (didhold) - (void) ubc_clean(vp, 1); - - /* - * Clean out all associated buffers. - */ - (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); - - if (didhold) - ubc_rele(vp); - } + uio_free(auio); + + /* + * No need to call ubc_sync_range or hfs_invalbuf + * since the file was copied using IO_NOCACHE. + */ + kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); - + + hfs_lock(VTOC(vp), HFS_FORCE_LOCK); return (error); } @@ -2555,15 +2948,17 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize, */ static int hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, - struct ucred *cred, struct proc *p) + kauth_cred_t cred, struct proc *p) { caddr_t bufp; char * offset; size_t bufsize; size_t iosize; struct buf *bp = NULL; - daddr_t blkno; - daddr_t blk; + daddr64_t blkno; + daddr64_t blk; + daddr64_t start_blk; + daddr64_t last_blk; int breadcnt; int i; int error = 0; @@ -2576,30 +2971,31 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { return (ENOMEM); } - blkstart = (blkstart * blksize) / iosize; - blkcnt = (blkcnt * blksize) / iosize; + start_blk = ((daddr64_t)blkstart * blksize) / iosize; + last_blk = ((daddr64_t)blkcnt * blksize) / iosize; blkno = 0; - while (blkno < blkcnt) { + while (blkno < last_blk) { /* * Read up to a megabyte */ offset = bufp; - for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) { - error = meta_bread(vp, blk, iosize, cred, &bp); + for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) { + error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp); if (error) { printf("hfs_clonesysfile: meta_bread error %d\n", error); goto out; } - if (bp->b_bcount != iosize) { - printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount); + if (buf_count(bp) != iosize) { + printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp)); goto out; } - - bcopy(bp->b_data, offset, iosize); - bp->b_flags |= B_INVAL; - brelse(bp); + bcopy((char *)buf_dataptr(bp), offset, iosize); + + buf_markinvalid(bp); + buf_brelse(bp); bp = NULL; + offset += iosize; } @@ -2607,15 +3003,15 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, * Write up to a megabyte */ offset = bufp; - for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) { - bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META); + for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) { + bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META); if (bp == NULL) { - printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno); + printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno); error = EIO; goto out; } - bcopy(offset, bp->b_data, iosize); - error = bwrite(bp); + bcopy(offset, (char *)buf_dataptr(bp), iosize); + error = (int)buf_bwrite(bp); bp = NULL; if (error) goto out; @@ -2624,13 +3020,12 @@ hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, } out: if (bp) { - brelse(bp); + buf_brelse(bp); } kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); - error = VOP_FSYNC(vp, cred, MNT_WAIT, p); + error = hfs_fsync(vp, MNT_WAIT, 0, p); return (error); } - diff --git a/bsd/hfs/hfs_search.c b/bsd/hfs/hfs_search.c index 83fef8f2e..930f9776c 100644 --- a/bsd/hfs/hfs_search.c +++ b/bsd/hfs/hfs_search.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1997-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -35,6 +34,8 @@ #include #include #include +#include +#include #include "hfs.h" #include "hfs_dbg.h" @@ -43,16 +44,15 @@ #include "hfs_endian.h" #include "hfscommon/headers/FileMgrInternal.h" -#include "hfscommon/headers/CatalogPrivate.h" #include "hfscommon/headers/HFSUnicodeWrappers.h" #include "hfscommon/headers/BTreesPrivate.h" #include "hfscommon/headers/BTreeScanner.h" - +#include "hfscommon/headers/CatalogPrivate.h" /* Search criterea. */ struct directoryInfoSpec { - u_long numFiles; + u_int32_t numFiles; }; struct fileInfoSpec @@ -75,7 +75,7 @@ struct searchinfospec struct timespec changeDate; struct timespec accessDate; struct timespec lastBackupDate; - u_long finderInfo[8]; + uint8_t finderInfo[32]; uid_t uid; gid_t gid; mode_t mask; @@ -87,7 +87,7 @@ typedef struct searchinfospec searchinfospec_t; static void ResolveHardlink(ExtendedVCB *vcb, HFSPlusCatalogFile *recp); -static int UnpackSearchAttributeBlock(struct vnode *vp, struct attrlist *alist, +static int UnpackSearchAttributeBlock(struct hfsmount *hfsmp, struct attrlist *alist, searchinfospec_t *searchInfo, void *attributeBuffer); static int CheckCriteria( ExtendedVCB *vcb, @@ -101,10 +101,10 @@ static int CheckCriteria( ExtendedVCB *vcb, static int CheckAccess(ExtendedVCB *vcb, u_long searchBits, CatalogKey *key, struct proc *p); -static int InsertMatch(struct vnode *vp, struct uio *a_uio, CatalogRecord *rec, +static int InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, CatalogKey *key, struct attrlist *returnAttrList, void *attributesBuffer, void *variableBuffer, - u_long bufferSize, u_long * nummatches ); + u_long * nummatches ); static Boolean CompareRange(u_long val, u_long low, u_long high); static Boolean CompareWideRange(u_int64_t val, u_int64_t low, u_int64_t high); @@ -124,21 +124,8 @@ static Boolean CompareWideRange( u_int64_t val, u_int64_t low, u_int64_t high ) static Boolean IsTargetName( searchinfospec_t * searchInfoPtr, Boolean isHFSPlus ); #endif // Installer workaround -extern int cat_convertkey( - struct hfsmount *hfsmp, - CatalogKey *key, - CatalogRecord * recp, - struct cat_desc *descp); +__private_extern__ int hfs_vnop_search(struct vnop_searchfs_args *ap); -extern void cat_convertattr( - struct hfsmount *hfsmp, - CatalogRecord * recp, - struct cat_attr *attrp, - struct cat_fork *datafp, - struct cat_fork *rsrcfp); - -extern int resolvelink(struct hfsmount *hfsmp, u_long linkref, - struct HFSPlusCatalogFile *recp); /************************************************************************/ /* Entry for searchfs() */ @@ -149,19 +136,19 @@ extern int resolvelink(struct hfsmount *hfsmp, u_long linkref, # #% searchfs vp L L L # -vop_searchfs { +vnop_searchfs { IN struct vnode *vp; IN off_t length; IN int flags; - IN struct ucred *cred; + IN kauth_cred_t cred; IN struct proc *p; }; */ __private_extern__ int -hfs_search( ap ) - struct vop_searchfs_args *ap; /* +hfs_vnop_search(ap) + struct vnop_searchfs_args *ap; /* struct vnodeop_desc *a_desc; struct vnode *a_vp; void *a_searchparams1; @@ -175,9 +162,11 @@ hfs_search( ap ) u_long a_options; struct uio *a_uio; struct searchstate *a_searchstate; + vfs_context_t a_context; */ { ExtendedVCB *vcb = VTOVCB(ap->a_vp); + struct hfsmount *hfsmp; FCB * catalogFCB; searchinfospec_t searchInfo1; searchinfospec_t searchInfo2; @@ -185,7 +174,7 @@ hfs_search( ap ) void *variableBuffer; u_long fixedBlockSize; u_long eachReturnBufferSize; - struct proc *p = current_proc(); + struct proc *p = proc_self(); int err = E_NONE; int isHFSPlus; int timerExpired = false; @@ -194,9 +183,10 @@ hfs_search( ap ) CatalogRecord * myCurrentDataPtr; CatPosition * myCatPositionPtr; BTScanState myBTScanState; - void *user_start = NULL; - int user_len; + user_addr_t user_start = 0; + user_size_t user_len = 0; int32_t searchTime; + int lockflags; /* XXX Parameter check a_searchattrs? */ @@ -216,14 +206,15 @@ hfs_search( ap ) attrs = ap->a_searchattrs->commonattr | ap->a_returnattrs->commonattr; if (attrs & (ATTR_CMN_NAME | ATTR_CMN_PAROBJID)) return (EINVAL); - if ((err = suser(p->p_ucred, &p->p_acflag))) + if ((err = suser(kauth_cred_get(), 0))) return (err); } - if (ap->a_uio->uio_resid <= 0) + if (uio_resid(ap->a_uio) <= 0) return (EINVAL); isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord); + hfsmp = VTOHFS(ap->a_vp); searchTime = kMaxMicroSecsInKernel; if (ap->a_timelimit->tv_sec == 0 && @@ -233,14 +224,15 @@ hfs_search( ap ) } /* UnPack the search boundries, searchInfo1, searchInfo2 */ - err = UnpackSearchAttributeBlock(ap->a_vp, ap->a_searchattrs, + err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, &searchInfo1, ap->a_searchparams1); if (err) return err; - err = UnpackSearchAttributeBlock(ap->a_vp, ap->a_searchattrs, + err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, &searchInfo2, ap->a_searchparams2); if (err) return err; - fixedBlockSize = sizeof(u_long) + hfs_attrblksize(ap->a_returnattrs); /* u_long for length longword */ + fixedBlockSize = sizeof(uint32_t) + hfs_attrblksize(ap->a_returnattrs); /* uint32_t for length word */ + eachReturnBufferSize = fixedBlockSize; if ( ap->a_returnattrs->commonattr & ATTR_CMN_NAME ) /* XXX should be more robust! */ @@ -253,20 +245,17 @@ hfs_search( ap ) // while holding the shared catalog file lock. see the comment // in hfs_readdir() for more details. // - if (VTOHFS(ap->a_vp)->jnl && ap->a_uio->uio_segflg == UIO_USERSPACE) { - user_start = ap->a_uio->uio_iov->iov_base; - user_len = ap->a_uio->uio_iov->iov_len; + if (hfsmp->jnl && uio_isuserspace(ap->a_uio)) { + user_start = uio_curriovbase(ap->a_uio); + user_len = uio_curriovlen(ap->a_uio); if ((err = vslock(user_start, user_len)) != 0) { - user_start = NULL; + user_start = 0; goto ExitThisRoutine; } } - /* Lock catalog b-tree */ - err = hfs_metafilelocking(VTOHFS(ap->a_vp), kHFSCatalogFileID, LK_SHARED, p); - if (err) - goto ExitThisRoutine; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); catalogFCB = GetFileControlBlock(vcb->catalogRefNum); myCurrentKeyPtr = NULL; @@ -276,9 +265,11 @@ hfs_search( ap ) if (ap->a_options & SRCHFS_START) { /* Starting a new search. */ /* Make sure the on-disk Catalog file is current */ - (void) VOP_FSYNC(vcb->catalogRefNum, NOCRED, MNT_WAIT, p); - if (VTOHFS(ap->a_vp)->jnl) { - journal_flush(VTOHFS(ap->a_vp)->jnl); + (void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p); + if (hfsmp->jnl) { + hfs_systemfile_unlock(hfsmp, lockflags); + journal_flush(hfsmp->jnl); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); } ap->a_options &= ~SRCHFS_START; @@ -312,17 +303,20 @@ hfs_search( ap ) result = BTSearchRecord( catalogFCB, &iterator, &btrec, &reclen, &iterator ); if ( result == E_NONE ) { + // need to unlock since CheckAccess assumes no lock held + hfs_systemfile_unlock(hfsmp, lockflags); if (CheckCriteria(vcb, ap->a_options, ap->a_searchattrs, &rec, keyp, &searchInfo1, &searchInfo2, false) && - CheckAccess(vcb, ap->a_options, keyp, ap->a_uio->uio_procp)) { + CheckAccess(vcb, ap->a_options, keyp, p)) { - result = InsertMatch(ap->a_vp, ap->a_uio, &rec, + result = InsertMatch(hfsmp, ap->a_uio, &rec, keyp, ap->a_returnattrs, attributesBuffer, variableBuffer, - eachReturnBufferSize, ap->a_nummatches); + ap->a_nummatches); if (result == E_NONE && *(ap->a_nummatches) >= ap->a_maxmatches) doQuickExit = true; } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); } } #endif // Installer workaround @@ -340,9 +334,8 @@ hfs_search( ap ) err = EBUSY; /* catChangedErr */ } } + hfs_systemfile_unlock(hfsmp, lockflags); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(VTOHFS(ap->a_vp), kHFSCatalogFileID, LK_RELEASE, p); if (err) goto ExitThisRoutine; #if 1 // Installer workaround (2940423) @@ -365,16 +358,17 @@ hfs_search( ap ) break; /* Resolve any hardlinks */ - if (isHFSPlus && (ap->a_options & SRCHFS_SKIPLINKS) == 0) + if (isHFSPlus && (ap->a_options & SRCHFS_SKIPLINKS) == 0) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); ResolveHardlink(vcb, (HFSPlusCatalogFile *) myCurrentDataPtr); - + hfs_systemfile_unlock(hfsmp, lockflags); + } if (CheckCriteria( vcb, ap->a_options, ap->a_searchattrs, myCurrentDataPtr, myCurrentKeyPtr, &searchInfo1, &searchInfo2, true ) - && CheckAccess(vcb, ap->a_options, myCurrentKeyPtr, ap->a_uio->uio_procp)) { - err = InsertMatch(ap->a_vp, ap->a_uio, myCurrentDataPtr, + && CheckAccess(vcb, ap->a_options, myCurrentKeyPtr, p)) { + err = InsertMatch(hfsmp, ap->a_uio, myCurrentDataPtr, myCurrentKeyPtr, ap->a_returnattrs, - attributesBuffer, variableBuffer, - eachReturnBufferSize, ap->a_nummatches); + attributesBuffer, variableBuffer, ap->a_nummatches); if (err) { /* * The last match didn't fit so come back @@ -394,7 +388,7 @@ hfs_search( ap ) * The idea here is to throttle the amount of time we * spend in the kernel. */ - myCurrentTime = time; + microuptime(&myCurrentTime); timersub(&myCurrentTime, &myBTScanState.startTime, &myElapsedTime); /* Note: assumes kMaxMicroSecsInKernel is less than 1,000,000 */ if (myElapsedTime.tv_sec > 0 @@ -425,9 +419,9 @@ QuickExit: } ExitThisRoutine: - FREE( attributesBuffer, M_TEMP ); + FREE( attributesBuffer, M_TEMP ); - if (VTOHFS(ap->a_vp)->jnl && user_start) { + if (hfsmp->jnl && user_start) { vsunlock(user_start, user_len, TRUE); } @@ -514,100 +508,6 @@ ComparePartialPascalName ( register ConstStr31Param str, register ConstStr31Para } - -static char *extension_table=NULL; -static int nexts; -static int max_ext_width; - -static int -extension_cmp(void *a, void *b) -{ - return (strlen((char *)a) - strlen((char *)b)); -} - - -// -// This is the api LaunchServices uses to inform the kernel -// the list of package extensions to ignore. -// -// Internally we keep the list sorted by the length of the -// the extension (from longest to shortest). We sort the -// list of extensions so that we can speed up our searches -// when comparing file names -- we only compare extensions -// that could possibly fit into the file name, not all of -// them (i.e. a short 8 character name can't have an 8 -// character extension). -// -__private_extern__ int -set_package_extensions_table(void *data, int nentries, int maxwidth) -{ - char *new_exts, *ptr; - int error, i, len; - - if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { - return EINVAL; - } - - MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); - - error = copyin(data, new_exts, nentries * maxwidth); - if (error) { - FREE(new_exts, M_TEMP); - return error; - } - - if (extension_table) { - FREE(extension_table, M_TEMP); - } - extension_table = new_exts; - nexts = nentries; - max_ext_width = maxwidth; - - qsort(extension_table, nexts, maxwidth, extension_cmp); - - return 0; -} - - -static int -is_package_name(char *name, int len) -{ - int i, extlen; - char *ptr, *name_ext; - - if (len <= 3) { - return 0; - } - - name_ext = NULL; - for(ptr=name; *ptr != '\0'; ptr++) { - if (*ptr == '.') { - name_ext = ptr; - } - } - - // if there is no "." extension, it can't match - if (name_ext == NULL) { - return 0; - } - - // advance over the "." - name_ext++; - - // now iterate over all the extensions to see if any match - ptr = &extension_table[0]; - for(i=0; i < nexts; i++, ptr+=max_ext_width) { - extlen = strlen(ptr); - if (strncmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { - // aha, a match! - return 1; - } - } - - // if we get here, no extension matched - return 0; -} - // // Determine if a name is "inappropriate" where the definition // of "inappropriate" is up to higher level execs. Currently @@ -616,11 +516,11 @@ is_package_name(char *name, int len) static int is_inappropriate_name(char *name, int len) { - char *bad_names[] = { "System" }; + const char *bad_names[] = { "System" }; int bad_len[] = { 6 }; int i; - for(i=0; i < sizeof(bad_names) / sizeof(bad_names[0]); i++) { + for(i=0; i < (int) (sizeof(bad_names) / sizeof(bad_names[0])); i++) { if (len == bad_len[i] && strcmp(name, bad_names[i]) == 0) { return 1; } @@ -639,27 +539,25 @@ is_inappropriate_name(char *name, int len) static int CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, struct proc *theProcPtr) { - Boolean isHFSPlus; - int myErr; - int myResult; + Boolean isHFSPlus; + int myErr; + int myResult; HFSCatalogNodeID myNodeID; - unsigned long myPerms; - hfsmount_t * my_hfsmountPtr; - struct cat_desc my_cat_desc; - struct cat_attr my_cat_attr; - struct FndrDirInfo *finder_info; + hfsmount_t * hfsmp; + struct FndrDirInfo *finfop; + struct vnode * vp = NULL; + struct vfs_context my_context; - myResult = 0; /* default to "no access" */ - my_cat_desc.cd_nameptr = NULL; - my_cat_desc.cd_namelen = 0; + my_context.vc_proc = theProcPtr; + my_context.vc_ucred = kauth_cred_get(); - if ( theProcPtr->p_ucred->cr_uid == 0 ) { + if (!proc_suser(theProcPtr)) { myResult = 1; /* allow access */ goto ExitThisRoutine; /* root always has access */ } - my_hfsmountPtr = VCBTOHFS( theVCBPtr ); + hfsmp = VCBTOHFS( theVCBPtr ); isHFSPlus = ( theVCBPtr->vcbSigWord == kHFSPlusSigWord ); if ( isHFSPlus ) myNodeID = theKeyPtr->hfsPlus.parentID; @@ -667,54 +565,57 @@ CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, st myNodeID = theKeyPtr->hfs.parentID; while ( myNodeID >= kRootDirID ) { + cnode_t * cp; + /* now go get catalog data for this directory */ - myErr = hfs_metafilelocking( my_hfsmountPtr, kHFSCatalogFileID, LK_SHARED, theProcPtr ); - if ( myErr ) - goto ExitThisRoutine; /* no access */ - - myErr = cat_idlookup( my_hfsmountPtr, myNodeID, &my_cat_desc, &my_cat_attr, NULL ); - (void) hfs_metafilelocking( my_hfsmountPtr, kHFSCatalogFileID, LK_RELEASE, theProcPtr ); - if ( myErr ) + myErr = hfs_vget(hfsmp, myNodeID, &vp, 0); + if ( myErr ) { goto ExitThisRoutine; /* no access */ + } - if (searchBits & SRCHFS_SKIPPACKAGES) { - if (is_package_name(my_cat_desc.cd_nameptr, my_cat_desc.cd_namelen)) { - myResult = 0; - goto ExitThisRoutine; + cp = VTOC(vp); + finfop = (struct FndrDirInfo *)&cp->c_attr.ca_finderinfo[0]; + + if ( searchBits & SRCHFS_SKIPPACKAGES ) { + if ( (SWAP_BE16(finfop->frFlags) & kHasBundle) + || (cp->c_desc.cd_nameptr != NULL + && is_package_name(cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen)) ) { + myResult = 0; + goto ExitThisRoutine; } } - if (searchBits & SRCHFS_SKIPINAPPROPRIATE) { - if ( my_cat_desc.cd_parentcnid == kRootDirID - && is_inappropriate_name(my_cat_desc.cd_nameptr, my_cat_desc.cd_namelen)) { - myResult = 0; - goto ExitThisRoutine; + if ( searchBits & SRCHFS_SKIPINAPPROPRIATE ) { + if ( cp->c_parentcnid == kRootDirID && cp->c_desc.cd_nameptr != NULL && + is_inappropriate_name(cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) ) { + myResult = 0; + goto ExitThisRoutine; } } - finder_info = (struct FndrDirInfo *)&my_cat_attr.ca_finderinfo[0]; - if ( (searchBits & SRCHFS_SKIPINVISIBLE) - && (SWAP_BE16(finder_info->frFlags) & kIsInvisible)) { - + if ( (searchBits & SRCHFS_SKIPINVISIBLE) && + (SWAP_BE16(finfop->frFlags) & kIsInvisible) ) { myResult = 0; goto ExitThisRoutine; } - myNodeID = my_cat_desc.cd_parentcnid; /* move up the hierarchy */ - myPerms = DerivePermissionSummary(my_cat_attr.ca_uid, my_cat_attr.ca_gid, - my_cat_attr.ca_mode, my_hfsmountPtr->hfs_mp, - theProcPtr->p_ucred, theProcPtr ); - - cat_releasedesc( &my_cat_desc ); - - if ( (myPerms & X_OK) == 0 ) + myNodeID = cp->c_parentcnid; /* move up the hierarchy */ + hfs_unlock(VTOC(vp)); + myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH), &my_context); + //myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context); + vnode_put(vp); + vp = NULL; + if ( myErr ) { goto ExitThisRoutine; /* no access */ + } } - myResult = 1; /* allow access */ ExitThisRoutine: - cat_releasedesc( &my_cat_desc ); + if ( vp != NULL ) { + hfs_unlock(VTOC(vp)); + vnode_put(vp); + } return ( myResult ); } @@ -732,10 +633,11 @@ CheckCriteria( ExtendedVCB *vcb, Boolean matched, atleastone; Boolean isHFSPlus; attrgroup_t searchAttributes; - struct cat_attr c_attr = {0}; + struct cat_attr c_attr; struct cat_fork datafork; struct cat_fork rsrcfork; + bzero(&c_attr, sizeof(c_attr)); isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord); switch (rec->recordType) { @@ -1056,9 +958,9 @@ CheckCriteria( ExtendedVCB *vcb, /* mode */ if ( searchAttributes & ATTR_CMN_ACCESSMASK ) { - matched = CompareRange((u_long)c_attr.ca_mode, - (u_long)searchInfo1->mask, - (u_long)searchInfo2->mask); + matched = CompareRange((uint32_t)c_attr.ca_mode, + (uint32_t)searchInfo1->mask, + (uint32_t)searchInfo2->mask); if (matched == false) goto TestDone; atleastone = true; } @@ -1084,28 +986,28 @@ TestDone: * Adds another record to the packed array for output */ static int -InsertMatch( struct vnode *root_vp, struct uio *a_uio, CatalogRecord *rec, - CatalogKey *key, struct attrlist *returnAttrList, void *attributesBuffer, - void *variableBuffer, u_long bufferSize, u_long * nummatches ) +InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, + CatalogKey *key, struct attrlist *returnAttrList, + void *attributesBuffer, void *variableBuffer, u_long * nummatches) { int err; void *rovingAttributesBuffer; void *rovingVariableBuffer; u_long packedBufferSize; - ExtendedVCB *vcb = VTOVCB(root_vp); - Boolean isHFSPlus = vcb->vcbSigWord == kHFSPlusSigWord; - u_long privateDir = VTOHFS(root_vp)->hfs_privdir_desc.cd_cnid; + u_long privateDir = hfsmp->hfs_privdir_desc.cd_cnid; struct attrblock attrblk; - struct cat_desc c_desc = {0}; - struct cat_attr c_attr = {0}; + struct cat_desc c_desc; + struct cat_attr c_attr; struct cat_fork datafork; struct cat_fork rsrcfork; + bzero(&c_desc, sizeof(c_desc)); + bzero(&c_attr, sizeof(c_attr)); rovingAttributesBuffer = (char*)attributesBuffer + sizeof(u_long); /* Reserve space for length field */ rovingVariableBuffer = variableBuffer; /* Convert catalog record into cat_attr format. */ - cat_convertattr(VTOHFS(root_vp), rec, &c_attr, &datafork, &rsrcfork); + cat_convertattr(hfsmp, rec, &c_attr, &datafork, &rsrcfork); /* hide our private meta data directory */ if ((privateDir != 0) && (c_attr.ca_fileid == privateDir)) { @@ -1114,21 +1016,21 @@ InsertMatch( struct vnode *root_vp, struct uio *a_uio, CatalogRecord *rec, } /* Hide the private journal files */ - if (VTOHFS(root_vp)->jnl && - ((c_attr.ca_fileid == VTOHFS(root_vp)->hfs_jnlfileid) || - (c_attr.ca_fileid == VTOHFS(root_vp)->hfs_jnlinfoblkid))) { + if (hfsmp->jnl && + ((c_attr.ca_fileid == hfsmp->hfs_jnlfileid) || + (c_attr.ca_fileid == hfsmp->hfs_jnlinfoblkid))) { err = 0; goto exit; } if (returnAttrList->commonattr & ATTR_CMN_NAME) { - cat_convertkey(VTOHFS(root_vp), key, rec, &c_desc); + cat_convertkey(hfsmp, key, rec, &c_desc); } else { c_desc.cd_cnid = c_attr.ca_fileid; - if (isHFSPlus) - c_desc.cd_parentcnid = key->hfsPlus.parentID; - else + if (hfsmp->hfs_flags & HFS_STANDARD) c_desc.cd_parentcnid = key->hfs.parentID; + else + c_desc.cd_parentcnid = key->hfsPlus.parentID; } attrblk.ab_attrlist = returnAttrList; @@ -1137,11 +1039,11 @@ InsertMatch( struct vnode *root_vp, struct uio *a_uio, CatalogRecord *rec, attrblk.ab_flags = 0; attrblk.ab_blocksize = 0; - hfs_packattrblk(&attrblk, VTOHFS(root_vp), NULL, &c_desc, &c_attr, &datafork, &rsrcfork, a_uio->uio_procp); + hfs_packattrblk(&attrblk, hfsmp, NULL, &c_desc, &c_attr, &datafork, &rsrcfork, current_proc()); packedBufferSize = (char*)rovingVariableBuffer - (char*)attributesBuffer; - if ( packedBufferSize > a_uio->uio_resid ) + if ( packedBufferSize > uio_resid(a_uio) ) return( errSearchBufferFull ); (* nummatches)++; @@ -1157,18 +1059,21 @@ exit: static int -UnpackSearchAttributeBlock( struct vnode *vp, struct attrlist *alist, searchinfospec_t *searchInfo, void *attributeBuffer ) +UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, searchinfospec_t *searchInfo, void *attributeBuffer ) { attrgroup_t a; u_long bufferSize; + boolean_t is_64_bit; DBG_ASSERT(searchInfo != NULL); + + is_64_bit = proc_is64bit(current_proc()); - bufferSize = *((u_long *)attributeBuffer); + bufferSize = *((uint32_t *)attributeBuffer); if (bufferSize == 0) return (EINVAL); /* XXX -DJB is a buffer size of zero ever valid for searchfs? */ - ++((u_long *)attributeBuffer); /* advance past the size */ + ++((uint32_t *)attributeBuffer); /* advance past the size */ /* * UnPack common attributes @@ -1176,39 +1081,41 @@ UnpackSearchAttributeBlock( struct vnode *vp, struct attrlist *alist, searchinfo a = alist->commonattr; if ( a != 0 ) { if ( a & ATTR_CMN_NAME ) { - char *s = (char*) attributeBuffer + ((attrreference_t *) attributeBuffer)->attr_dataoffset; - size_t len = ((attrreference_t *) attributeBuffer)->attr_length; + char *s; + u_int32_t len; + + s = (char*) attributeBuffer + ((attrreference_t *) attributeBuffer)->attr_dataoffset; + len = ((attrreference_t *) attributeBuffer)->attr_length; if (len > sizeof(searchInfo->name)) return (EINVAL); - if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) { - size_t ucslen; - /* Convert name to Unicode to match HFS Plus B-Tree names */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + /* Convert name to pascal string to match HFS B-Tree names */ if (len > 0) { - if (utf8_decodestr(s, len-1, (UniChar*)searchInfo->name, &ucslen, - sizeof(searchInfo->name), ':', UTF_DECOMPOSED)) + if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, s, (u_char*)searchInfo->name) != 0) return (EINVAL); - searchInfo->nameLength = ucslen / sizeof(UniChar); + searchInfo->nameLength = searchInfo->name[0]; } else { - searchInfo->nameLength = 0; + searchInfo->name[0] = searchInfo->nameLength = 0; } - ++((attrreference_t *)attributeBuffer); - + ++((attrreference_t *)attributeBuffer); } else { - /* Convert name to pascal string to match HFS B-Tree names */ + size_t ucslen; + /* Convert name to Unicode to match HFS Plus B-Tree names */ if (len > 0) { - if (utf8_to_hfs(VTOVCB(vp), len-1, s, (u_char*)searchInfo->name) != 0) + if (utf8_decodestr(s, len-1, (UniChar*)searchInfo->name, &ucslen, + sizeof(searchInfo->name), ':', UTF_DECOMPOSED)) return (EINVAL); - searchInfo->nameLength = searchInfo->name[0]; + searchInfo->nameLength = ucslen / sizeof(UniChar); } else { - searchInfo->name[0] = searchInfo->nameLength = 0; + searchInfo->nameLength = 0; } - ++((attrreference_t *)attributeBuffer); + ++((attrreference_t *)attributeBuffer); } } if ( a & ATTR_CMN_OBJID ) { @@ -1220,28 +1127,73 @@ UnpackSearchAttributeBlock( struct vnode *vp, struct attrlist *alist, searchinfo ++((fsobj_id_t *)attributeBuffer); } if ( a & ATTR_CMN_CRTIME ) { - searchInfo->creationDate = *((struct timespec *)attributeBuffer); - ++((struct timespec *)attributeBuffer); + if (is_64_bit) { + struct user_timespec tmp; + tmp = *((struct user_timespec *)attributeBuffer); + searchInfo->creationDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->creationDate.tv_nsec = tmp.tv_nsec; + ++((struct user_timespec *)attributeBuffer); + } + else { + searchInfo->creationDate = *((struct timespec *)attributeBuffer); + ++((struct timespec *)attributeBuffer); + } } if ( a & ATTR_CMN_MODTIME ) { - searchInfo->modificationDate = *((struct timespec *)attributeBuffer); - ++((struct timespec *)attributeBuffer); + if (is_64_bit) { + struct user_timespec tmp; + tmp = *((struct user_timespec *)attributeBuffer); + searchInfo->modificationDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->modificationDate.tv_nsec = tmp.tv_nsec; + ++((struct user_timespec *)attributeBuffer); + } + else { + searchInfo->modificationDate = *((struct timespec *)attributeBuffer); + ++((struct timespec *)attributeBuffer); + } } if ( a & ATTR_CMN_CHGTIME ) { - searchInfo->changeDate = *((struct timespec *)attributeBuffer); - ++((struct timespec *)attributeBuffer); + if (is_64_bit) { + struct user_timespec tmp; + tmp = *((struct user_timespec *)attributeBuffer); + searchInfo->changeDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->changeDate.tv_nsec = tmp.tv_nsec; + ++((struct user_timespec *)attributeBuffer); + } + else { + searchInfo->changeDate = *((struct timespec *)attributeBuffer); + ++((struct timespec *)attributeBuffer); + } } if ( a & ATTR_CMN_ACCTIME ) { - searchInfo->accessDate = *((struct timespec *)attributeBuffer); - ++((struct timespec *)attributeBuffer); + if (is_64_bit) { + struct user_timespec tmp; + tmp = *((struct user_timespec *)attributeBuffer); + searchInfo->accessDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->accessDate.tv_nsec = tmp.tv_nsec; + ++((struct user_timespec *)attributeBuffer); + } + else { + searchInfo->accessDate = *((struct timespec *)attributeBuffer); + ++((struct timespec *)attributeBuffer); + } } if ( a & ATTR_CMN_BKUPTIME ) { - searchInfo->lastBackupDate = *((struct timespec *)attributeBuffer); - ++((struct timespec *)attributeBuffer); + if (is_64_bit) { + struct user_timespec tmp; + tmp = *((struct user_timespec *)attributeBuffer); + searchInfo->lastBackupDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->lastBackupDate.tv_nsec = tmp.tv_nsec; + ++((struct user_timespec *)attributeBuffer); + } + else { + searchInfo->lastBackupDate = *((struct timespec *)attributeBuffer); + ++((struct timespec *)attributeBuffer); + } } if ( a & ATTR_CMN_FNDRINFO ) { - bcopy( attributeBuffer, searchInfo->finderInfo, sizeof(u_long) * 8 ); - (u_long *)attributeBuffer += 8; + bcopy( attributeBuffer, searchInfo->finderInfo, sizeof(searchInfo->finderInfo) ); + (uint8_t *)attributeBuffer += 32; } if ( a & ATTR_CMN_OWNERID ) { searchInfo->uid = *((uid_t *)attributeBuffer); @@ -1260,8 +1212,8 @@ UnpackSearchAttributeBlock( struct vnode *vp, struct attrlist *alist, searchinfo a = alist->dirattr; if ( a != 0 ) { if ( a & ATTR_DIR_ENTRYCOUNT ) { - searchInfo->d.numFiles = *((u_long *)attributeBuffer); - ++((u_long *)attributeBuffer); + searchInfo->d.numFiles = *((u_int32_t *)attributeBuffer); + ++((u_int32_t *)attributeBuffer); } } diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index af66e398b..f6569bf71 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,21 +66,22 @@ */ #include #include +#include #include -#include -#include -#include +#include +#include #include #include #include -#include #include #include #include #include +#include + +#include -// XXXdbg #include #include @@ -91,6 +92,7 @@ #include "hfs_cnode.h" #include "hfs_dbg.h" #include "hfs_endian.h" +#include "hfs_hotfiles.h" #include "hfs_quota.h" #include "hfscommon/headers/FileMgrInternal.h" @@ -103,65 +105,60 @@ int hfs_dbg_err = 0; #endif +lck_grp_attr_t * hfs_group_attr; +lck_attr_t * hfs_lock_attr; +lck_grp_t * hfs_mutex_group; +lck_grp_t * hfs_rwlock_group; + + extern struct vnodeopv_desc hfs_vnodeop_opv_desc; extern void hfs_converterinit(void); -extern void inittodr( time_t base); +extern void inittodr(time_t base); +extern int hfs_write_access(struct vnode *, kauth_cred_t, struct proc *, Boolean); -static int hfs_changefs __P((struct mount *mp, struct hfs_mount_args *args, - struct proc *p)); -static int hfs_reload __P((struct mount *mp, struct ucred *cred, struct proc *p)); -static int hfs_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, - struct hfs_mount_args *args)); -static int hfs_statfs __P((struct mount *mp, register struct statfs *sbp, - struct proc *p)); -static int hfs_flushfiles __P((struct mount *, int, struct proc *)); +static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); +static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); +static int hfs_flushfiles(struct mount *, int, struct proc *); +static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush); +static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp); +static int hfs_init(struct vfsconf *vfsp); +static int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); +static int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, vfs_context_t context); +static int hfs_reload(struct mount *mp, kauth_cred_t cred, struct proc *p); +static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context); +static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context); +static int hfs_start(struct mount *mp, int flags, vfs_context_t context); +static int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context); +static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context); +static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, vfs_context_t context); +static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); +static int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); +static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); + +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk); -static int hfs_extendfs __P((struct mount *, u_int64_t, struct proc *)); /* * Called by vfs_mountroot when mounting HFS Plus as root. */ + __private_extern__ int -hfs_mountroot() +hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) { - extern struct vnode *rootvp; - struct mount *mp; - struct proc *p = current_proc(); /* XXX */ struct hfsmount *hfsmp; ExtendedVCB *vcb; + struct vfsstatfs *vfsp; int error; - /* - * Get vnode for rootdev. - */ - if ((error = bdevvp(rootdev, &rootvp))) { - printf("hfs_mountroot: can't setup bdevvp"); + if ((error = hfs_mountfs(rvp, mp, NULL, context))) return (error); - } - if ((error = vfs_rootmountalloc("hfs", "root_device", &mp))) { - vrele(rootvp); /* release the reference from bdevvp() */ - return (error); - } - if ((error = hfs_mountfs(rootvp, mp, p, NULL))) { - mp->mnt_vfc->vfc_refcount--; - - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, p); - vrele(rootvp); /* release the reference from bdevvp() */ - FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); - return (error); - } - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - /* Init hfsmp */ hfsmp = VFSTOHFS(mp); @@ -175,10 +172,9 @@ hfs_mountroot() vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100; vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize); - (void)hfs_statfs(mp, &mp->mnt_stat, p); - - vfs_unbusy(mp, p); - inittodr(HFSTOVCB(hfsmp)->vcbLsMod); + vfsp = vfs_statfs(mp); + (void)hfs_statfs(mp, vfsp, NULL); + return (0); } @@ -190,57 +186,62 @@ hfs_mountroot() */ static int -hfs_mount(mp, path, data, ndp, p) - register struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context) { + struct proc *p = vfs_context_proc(context); struct hfsmount *hfsmp = NULL; - struct vnode *devvp; struct hfs_mount_args args; - size_t size; int retval = E_NONE; - int flags; - mode_t accessmode; - - if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) - goto error_exit; + uint32_t cmdflags; - /* - * If updating, check whether changing from read-only to - * read/write; if there is no device name, that's all we do. - */ - if (mp->mnt_flag & MNT_UPDATE) { - + if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) { + return (retval); + } + cmdflags = (uint32_t)vfs_flags(mp) & MNT_CMDFLAGS; + if (cmdflags & MNT_UPDATE) { hfsmp = VFSTOHFS(mp); + + /* Reload incore data after an fsck. */ + if (cmdflags & MNT_RELOAD) { + if (vfs_isrdonly(mp)) + return hfs_reload(mp, vfs_context_ucred(context), p); + else + return (EINVAL); + } + + /* Change to a read-only file system. */ if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) && - (mp->mnt_flag & MNT_RDONLY)) { - + vfs_isrdonly(mp)) { + int flags; + /* use VFS_SYNC to push out System (btree) files */ - retval = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); - if (retval && ((mp->mnt_flag & MNT_FORCE) == 0)) - goto error_exit; + retval = VFS_SYNC(mp, MNT_WAIT, context); + if (retval && ((cmdflags & MNT_FORCE) == 0)) + goto out; flags = WRITECLOSE; - if (mp->mnt_flag & MNT_FORCE) + if (cmdflags & MNT_FORCE) flags |= FORCECLOSE; if ((retval = hfs_flushfiles(mp, flags, p))) - goto error_exit; + goto out; hfsmp->hfs_flags |= HFS_READ_ONLY; retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); /* also get the volume bitmap blocks */ - if (!retval) - retval = VOP_FSYNC(hfsmp->hfs_devvp, NOCRED, MNT_WAIT, p); - + if (!retval) { + if (vnode_mount(hfsmp->hfs_devvp) == mp) { + retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p); + } else { + vnode_get(hfsmp->hfs_devvp); + retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); + vnode_put(hfsmp->hfs_devvp); + } + } if (retval) { hfsmp->hfs_flags &= ~HFS_READ_ONLY; - goto error_exit; + goto out; } - if (hfsmp->jnl) { hfs_global_exclusive_lock_acquire(hfsmp); @@ -255,29 +256,11 @@ hfs_mount(mp, path, data, ndp, p) } } - if ((mp->mnt_flag & MNT_RELOAD) && - (retval = hfs_reload(mp, ndp->ni_cnd.cn_cred, p))) - goto error_exit; - - if ((hfsmp->hfs_flags & HFS_READ_ONLY) && - (mp->mnt_kern_flag & MNTK_WANTRDWR)) { - /* - * If upgrade to read-write by non-root, then verify - * that user has necessary permissions on the device. - */ - if (p->p_ucred->cr_uid != 0) { - devvp = hfsmp->hfs_devvp; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - if ((retval = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p))) { - VOP_UNLOCK(devvp, 0, p); - goto error_exit; - } - VOP_UNLOCK(devvp, 0, p); - } + /* Change to a writable file system. */ + if (vfs_iswriteupgrade(mp)) { retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); - if (retval != E_NONE) - goto error_exit; + goto out; // If the journal was shut-down previously because we were // asked to be read-only, let's start it back up again now @@ -285,12 +268,12 @@ hfs_mount(mp, path, data, ndp, p) if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && hfsmp->jnl == NULL && hfsmp->jvp != NULL) { - int flags; + int jflags; if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) { - flags = JOURNAL_RESET; + jflags = JOURNAL_RESET; } else { - flags = 0; + jflags = 0; } hfs_global_exclusive_lock_acquire(hfsmp); @@ -300,7 +283,7 @@ hfs_mount(mp, path, data, ndp, p) hfsmp->jnl_size, hfsmp->hfs_devvp, hfsmp->hfs_phys_block_size, - flags, + jflags, 0, hfs_sync_metadata, hfsmp->hfs_mp); @@ -308,7 +291,7 @@ hfs_mount(mp, path, data, ndp, p) if (hfsmp->jnl == NULL) { retval = EINVAL; - goto error_exit; + goto out; } else { hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET; } @@ -317,141 +300,125 @@ hfs_mount(mp, path, data, ndp, p) /* Only clear HFS_READ_ONLY after a successfull write */ hfsmp->hfs_flags &= ~HFS_READ_ONLY; - } - if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) && - (HFSTOVCB(hfsmp)->vcbSigWord == kHFSPlusSigWord)) { - /* setup private/hidden directory for unlinked files */ - FindMetaDataDirectory(HFSTOVCB(hfsmp)); - if (hfsmp->jnl) + if (!(hfsmp->hfs_flags & (HFS_READ_ONLY & HFS_STANDARD))) { + /* setup private/hidden directory for unlinked files */ + FindMetaDataDirectory(HFSTOVCB(hfsmp)); hfs_remove_orphans(hfsmp); - - /* - * Allow hot file clustering if conditions allow. - */ - if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - (mp->mnt_flag & MNT_RDONLY) && - (mp->mnt_kern_flag & MNTK_WANTRDWR)) { - (void) hfs_recording_init(hfsmp, p); + + /* + * Allow hot file clustering if conditions allow. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + (void) hfs_recording_init(hfsmp); + } } } - if (args.fspec == 0) { - /* - * Process export requests. - */ - return vfs_export(mp, &hfsmp->hfs_export, &args.export); - } - } + /* Update file system parameters. */ + retval = hfs_changefs(mp, &args); - /* - * Not an update, or updating the name: look up the name - * and verify that it refers to a sensible block device. - */ - NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); - retval = namei(ndp); - if (retval != E_NONE) { - DBG_ERR(("hfs_mount: CAN'T GET DEVICE: %s, %x\n", args.fspec, ndp->ni_vp->v_rdev)); - goto error_exit; - } + } else /* not an update request */ { - devvp = ndp->ni_vp; + /* Set the mount flag to indicate that we support volfs */ + vfs_setflags(mp, (uint64_t)((unsigned int)MNT_DOVOLFS)); - if (devvp->v_type != VBLK) { - vrele(devvp); - retval = ENOTBLK; - goto error_exit; + retval = hfs_mountfs(devvp, mp, &args, context); } - if (major(devvp->v_rdev) >= nblkdev) { - vrele(devvp); - retval = ENXIO; - goto error_exit; +out: + if (retval == 0) { + (void)hfs_statfs(mp, vfs_statfs(mp), context); } + return (retval); +} - /* - * If mount by non-root, then verify that user has necessary - * permissions on the device. - */ - if (p->p_ucred->cr_uid != 0) { - accessmode = VREAD; - if ((mp->mnt_flag & MNT_RDONLY) == 0) - accessmode |= VWRITE; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - if ((retval = VOP_ACCESS(devvp, accessmode, p->p_ucred, p))) { - vput(devvp); - goto error_exit; - } - VOP_UNLOCK(devvp, 0, p); - } - if ((mp->mnt_flag & MNT_UPDATE) == 0) { - retval = hfs_mountfs(devvp, mp, p, &args); - if (retval != E_NONE) - vrele(devvp); - } else { - if (devvp != hfsmp->hfs_devvp) - retval = EINVAL; /* needs translation */ - else - retval = hfs_changefs(mp, &args, p); - vrele(devvp); - } +struct hfs_changefs_cargs { + struct hfsmount *hfsmp; + int namefix; + int permfix; + int permswitch; +}; - if (retval != E_NONE) { - goto error_exit; - } +static int +hfs_changefs_callback(struct vnode *vp, void *cargs) +{ + ExtendedVCB *vcb; + struct cnode *cp; + struct cat_desc cndesc; + struct cat_attr cnattr; + struct hfs_changefs_cargs *args; - /* Set the mount flag to indicate that we support volfs */ - mp->mnt_flag |= MNT_DOVOLFS; - if (VFSTOVCB(mp)->vcbSigWord == kHFSSigWord) { - /* HFS volumes only want roman-encoded names: */ - mp->mnt_flag |= MNT_FIXEDSCRIPTENCODING; - } - (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN-1, &size); + args = (struct hfs_changefs_cargs *)cargs; - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)hfs_statfs(mp, &mp->mnt_stat, p); - return (E_NONE); + cp = VTOC(vp); + vcb = HFSTOVCB(args->hfsmp); -error_exit: + if (cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL)) { + /* + * If we couldn't find this guy skip to the next one + */ + if (args->namefix) + cache_purge(vp); - return (retval); -} + return (VNODE_RETURNED); + } + /* + * Get the real uid/gid and perm mask from disk. + */ + if (args->permswitch || args->permfix) { + cp->c_uid = cnattr.ca_uid; + cp->c_gid = cnattr.ca_gid; + cp->c_mode = cnattr.ca_mode; + } + /* + * If we're switching name converters then... + * Remove the existing entry from the namei cache. + * Update name to one based on new encoder. + */ + if (args->namefix) { + cache_purge(vp); + replace_desc(cp, &cndesc); + if (cndesc.cd_cnid == kHFSRootFolderID) { + strncpy(vcb->vcbVN, cp->c_desc.cd_nameptr, NAME_MAX); + cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding; + } + } else { + cat_releasedesc(&cndesc); + } + return (VNODE_RETURNED); +} /* Change fs mount parameters */ static int -hfs_changefs(mp, args, p) - struct mount *mp; - struct hfs_mount_args *args; - struct proc *p; +hfs_changefs(struct mount *mp, struct hfs_mount_args *args) { int retval = 0; int namefix, permfix, permswitch; struct hfsmount *hfsmp; - struct cnode *cp; ExtendedVCB *vcb; - register struct vnode *vp, *nvp; hfs_to_unicode_func_t get_unicode_func; unicode_to_hfs_func_t get_hfsname_func; - struct cat_desc cndesc; - struct cat_attr cnattr; - u_long old_encoding; + u_long old_encoding = 0; + struct hfs_changefs_cargs cargs; + uint32_t mount_flags; hfsmp = VFSTOHFS(mp); vcb = HFSTOVCB(hfsmp); + mount_flags = (unsigned int)vfs_flags(mp); + permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) && - ((mp->mnt_flag & MNT_UNKNOWNPERMISSIONS) == 0)) || + ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) || (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) && - (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS))); + (mount_flags & MNT_UNKNOWNPERMISSIONS))); /* The root filesystem must operate with actual permissions: */ - if (permswitch && (mp->mnt_flag & MNT_ROOTFS) && (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS)) { - mp->mnt_flag &= ~MNT_UNKNOWNPERMISSIONS; /* Just say "No". */ + if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) { + vfs_clearflags(mp, (uint64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */ return EINVAL; } - if (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS) + if (mount_flags & MNT_UNKNOWNPERMISSIONS) hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; else hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS; @@ -466,12 +433,12 @@ hfs_changefs(mp, args, p) /* Change the default uid, gid and/or mask */ if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) { hfsmp->hfs_uid = args->hfs_uid; - if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSPlusSigWord) + if (vcb->vcbSigWord == kHFSPlusSigWord) ++permfix; } if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) { hfsmp->hfs_gid = args->hfs_gid; - if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSPlusSigWord) + if (vcb->vcbSigWord == kHFSPlusSigWord) ++permfix; } if (args->hfs_mask != (mode_t)VNOVAL) { @@ -480,13 +447,13 @@ hfs_changefs(mp, args, p) hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS; if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES)) hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE); - if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSPlusSigWord) + if (vcb->vcbSigWord == kHFSPlusSigWord) ++permfix; } } /* Change the hfs encoding value (hfs only) */ - if ((HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) && + if ((vcb->vcbSigWord == kHFSSigWord) && (args->hfs_encoding != (u_long)VNOVAL) && (hfsmp->hfs_encoding != args->hfs_encoding)) { @@ -513,77 +480,30 @@ hfs_changefs(mp, args, p) if (!(namefix || permfix || permswitch)) goto exit; + /* XXX 3762912 hack to support HFS filesystem 'owner' */ + if (permfix) + vfs_setowner(mp, + hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid, + hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid); + /* * For each active vnode fix things that changed * * Note that we can visit a vnode more than once * and we can race with fsync. + * + * hfs_changefs_callback will be called for each vnode + * hung off of this mount point + * the vnode will be + * properly referenced and unreferenced around the callback */ - simple_lock(&mntvnode_slock); -loop: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - /* - * If the vnode that we are about to fix is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - - simple_lock(&vp->v_interlock); - nvp = vp->v_mntvnodes.le_next; - if (vp->v_flag & VSYSTEM) { - simple_unlock(&vp->v_interlock); - continue; - } - simple_unlock(&mntvnode_slock); - retval = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (retval) { - simple_lock(&mntvnode_slock); - if (retval == ENOENT) - goto loop; - continue; - } - - cp = VTOC(vp); - - retval = cat_lookup(hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL); - /* If we couldn't find this guy skip to the next one */ - if (retval) { - if (namefix) - cache_purge(vp); - vput(vp); - simple_lock(&mntvnode_slock); - continue; - } - - /* Get the real uid/gid and perm mask from disk. */ - if (permswitch || permfix) { - cp->c_uid = cnattr.ca_uid; - cp->c_gid = cnattr.ca_gid; - cp->c_mode = cnattr.ca_mode; - } - - /* - * If we're switching name converters then... - * Remove the existing entry from the namei cache. - * Update name to one based on new encoder. - */ - if (namefix) { - cache_purge(vp); - replace_desc(cp, &cndesc); + cargs.hfsmp = hfsmp; + cargs.namefix = namefix; + cargs.permfix = permfix; + cargs.permswitch = permswitch; - if (cndesc.cd_cnid == kHFSRootFolderID) { - strncpy(vcb->vcbVN, cp->c_desc.cd_nameptr, NAME_MAX); - cp->c_desc.cd_encoding = hfsmp->hfs_encoding; - } - } else { - cat_releasedesc(&cndesc); - } - vput(vp); - simple_lock(&mntvnode_slock); + vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs); - } /* end for (vp...) */ - simple_unlock(&mntvnode_slock); /* * If we're switching name converters we can now * connect the new hfs_get_hfsname converter and @@ -599,6 +519,51 @@ exit: } +struct hfs_reload_cargs { + struct hfsmount *hfsmp; + kauth_cred_t cred; + struct proc *p; + int error; +}; + +static int +hfs_reload_callback(struct vnode *vp, void *cargs) +{ + struct cnode *cp; + struct hfs_reload_cargs *args; + + args = (struct hfs_reload_cargs *)cargs; + /* + * flush all the buffers associated with this node + */ + (void) buf_invalidateblks(vp, 0, 0, 0); + + cp = VTOC(vp); + /* + * Remove any directory hints + */ + if (vnode_isdir(vp)) + hfs_reldirhints(cp, 0); + + /* + * Re-read cnode data for all active vnodes (non-metadata files). + */ + if (!VNODE_IS_RSRC(vp)) { + struct cat_fork *datafork; + struct cat_desc desc; + + datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL; + + /* lookup by fileID since name could have changed */ + if ((args->error = cat_idlookup(args->hfsmp, cp->c_fileid, &desc, &cp->c_attr, datafork))) + return (VNODE_RETURNED_DONE); + + /* update cnode's catalog descriptor */ + (void) replace_desc(cp, &desc); + } + return (VNODE_RETURNED); +} + /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must @@ -614,13 +579,9 @@ exit: * re-read cnode data for all active vnodes. */ static int -hfs_reload(mountp, cred, p) - register struct mount *mountp; - struct ucred *cred; - struct proc *p; +hfs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) { - register struct vnode *vp, *nvp, *devvp; - struct cnode *cp; + register struct vnode *devvp; struct buf *bp; int sectorsize; int error, i; @@ -629,9 +590,8 @@ hfs_reload(mountp, cred, p) ExtendedVCB *vcb; struct filefork *forkp; struct cat_desc cndesc; - - if ((mountp->mnt_flag & MNT_RDONLY) == 0) - return (EINVAL); + struct hfs_reload_cargs args; + int lockflags; hfsmp = VFSTOHFS(mountp); vcb = HFSTOVCB(hfsmp); @@ -643,75 +603,42 @@ hfs_reload(mountp, cred, p) * Invalidate all cached meta-data. */ devvp = hfsmp->hfs_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + if (buf_invalidateblks(devvp, 0, 0, 0)) panic("hfs_reload: dirty1"); - InvalidateCatalogCache(vcb); - -loop: - simple_lock(&mntvnode_slock); - for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - if (vp->v_mount != mountp) { - simple_unlock(&mntvnode_slock); - goto loop; - } - nvp = vp->v_mntvnodes.le_next; - - /* - * Invalidate all inactive vnodes. - */ - if (vrecycle(vp, &mntvnode_slock, p)) - goto loop; - - /* - * Invalidate all cached file data. - */ - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { - goto loop; - } - if (vinvalbuf(vp, 0, cred, p, 0, 0)) - panic("hfs_reload: dirty2"); - - /* - * Re-read cnode data for all active vnodes (non-metadata files). - */ - cp = VTOC(vp); - if ((vp->v_flag & VSYSTEM) == 0 && !VNODE_IS_RSRC(vp)) { - struct cat_fork *datafork; - struct cat_desc desc; - datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL; - - /* lookup by fileID since name could have changed */ - if ((error = cat_idlookup(hfsmp, cp->c_fileid, &desc, &cp->c_attr, datafork))) { - vput(vp); - return (error); - } + args.hfsmp = hfsmp; + args.cred = cred; + args.p = p; + args.error = 0; + /* + * hfs_reload_callback will be called for each vnode + * hung off of this mount point that can't be recycled... + * vnode_iterate will recycle those that it can (the VNODE_RELOAD option) + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback + */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args); + hfs_systemfile_unlock(hfsmp, lockflags); - /* update cnode's catalog descriptor */ - (void) replace_desc(cp, &desc); - } - vput(vp); - simple_lock(&mntvnode_slock); - } - simple_unlock(&mntvnode_slock); + if (args.error) + return (args.error); /* * Re-read VolumeHeader from disk. */ sectorsize = hfsmp->hfs_phys_block_size; - error = meta_bread(hfsmp->hfs_devvp, - (vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize), + error = (int)buf_meta_bread(hfsmp->hfs_devvp, + (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize)), sectorsize, NOCRED, &bp); if (error) { if (bp != NULL) - brelse(bp); + buf_brelse(bp); return (error); } - vhp = (HFSPlusVolumeHeader *) (bp->b_data + HFS_PRI_OFFSET(sectorsize)); + vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize)); /* Do a quick sanity check */ if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord && @@ -719,12 +646,12 @@ loop: (SWAP_BE16(vhp->version) != kHFSPlusVersion && SWAP_BE16(vhp->version) != kHFSXVersion) || SWAP_BE32(vhp->blockSize) != vcb->blockSize) { - brelse(bp); + buf_brelse(bp); return (EIO); } vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate)); - vcb->vcbAtrb = (UInt16) SWAP_BE32 (vhp->attributes); /* VCB only uses lower 16 bits */ + vcb->vcbAtrb = SWAP_BE32 (vhp->attributes); vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock); vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize); vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID); @@ -765,6 +692,18 @@ loop: forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks); forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize); + if (hfsmp->hfs_attribute_vp) { + forkp = VTOF(hfsmp->hfs_attribute_vp); + for (i = 0; i < kHFSPlusExtentDensity; i++) { + forkp->ff_extents[i].startBlock = + SWAP_BE32 (vhp->attributesFile.extents[i].startBlock); + forkp->ff_extents[i].blockCount = + SWAP_BE32 (vhp->attributesFile.extents[i].blockCount); + } + forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize); + forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks); + forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize); + } forkp = VTOF((struct vnode *)vcb->allocationsRefNum); for (i = 0; i < kHFSPlusExtentDensity; i++) { @@ -777,20 +716,26 @@ loop: forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks); forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize); - brelse(bp); + buf_brelse(bp); vhp = NULL; /* * Re-load B-tree header data */ forkp = VTOF((struct vnode *)vcb->extentsRefNum); - if (error = MacToVFSError( BTReloadData((FCB*)forkp) )) + if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) return (error); forkp = VTOF((struct vnode *)vcb->catalogRefNum); - if (error = MacToVFSError( BTReloadData((FCB*)forkp) )) + if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) return (error); + if (hfsmp->hfs_attribute_vp) { + forkp = VTOF(hfsmp->hfs_attribute_vp); + if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) ) + return (error); + } + /* Reload the volume name */ if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, &cndesc, NULL, NULL))) return (error); @@ -808,91 +753,14 @@ loop: } -static int -get_raw_device(char *fspec, int is_user, int ronly, struct vnode **rvp, struct ucred *cred, struct proc *p) -{ - char *rawbuf; - char *dp; - size_t namelen; - struct nameidata nd; - int retval; - - *rvp = NULL; - - MALLOC(rawbuf, char *, MAXPATHLEN, M_HFSMNT, M_WAITOK); - if (rawbuf == NULL) { - retval = ENOMEM; - goto error_exit; - } - - if (is_user) { - retval = copyinstr(fspec, rawbuf, MAXPATHLEN - 1, &namelen); - if (retval != E_NONE) { - FREE(rawbuf, M_HFSMNT); - goto error_exit; - } - } else { - strcpy(rawbuf, fspec); - namelen = strlen(rawbuf); - } - - /* make sure it's null terminated */ - rawbuf[MAXPATHLEN-1] = '\0'; - - dp = &rawbuf[namelen-1]; - while(dp >= rawbuf && *dp != '/') { - dp--; - } - - if (dp != NULL) { - dp++; - } else { - dp = rawbuf; - } - - /* make room for and insert the 'r' for the raw device */ - memmove(dp+1, dp, strlen(dp)+1); - *dp = 'r'; - - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, rawbuf, p); - retval = namei(&nd); - if (retval != E_NONE) { - DBG_ERR(("hfs_mountfs: can't open raw device for journal: %s, %x\n", rawbuf, nd.ni_vp->v_rdev)); - FREE(rawbuf, M_HFSMNT); - goto error_exit; - } - - *rvp = nd.ni_vp; - if ((retval = VOP_OPEN(*rvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))) { - *rvp = NULL; - goto error_exit; - } - - // don't need this any more - FREE(rawbuf, M_HFSMNT); - - return 0; - - error_exit: - if (*rvp) { - (void)VOP_CLOSE(*rvp, ronly ? FREAD : FREAD|FWRITE, cred, p); - } - - if (rawbuf) { - FREE(rawbuf, M_HFSMNT); - } - return retval; -} - - - /* * Common code for mount and mountroot */ static int -hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, - struct hfs_mount_args *args) +hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, + vfs_context_t context) { + struct proc *p = vfs_context_proc(context); int retval = E_NONE; struct hfsmount *hfsmp; struct buf *bp; @@ -901,41 +769,29 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, int ronly; int i; int mntwrapper; - struct ucred *cred; + kauth_cred_t cred; u_int64_t disksize; - u_int64_t blkcnt; + daddr64_t blkcnt; u_int32_t blksize; u_int32_t minblksize; u_int32_t iswritable; - daddr_t mdb_offset; + daddr64_t mdb_offset; - dev = devvp->v_rdev; - cred = p ? p->p_ucred : NOCRED; + ronly = vfs_isrdonly(mp); + dev = vnode_specrdev(devvp); + cred = p ? vfs_context_ucred(context) : NOCRED; mntwrapper = 0; - /* - * Disallow multiple mounts of the same device. - * Disallow mounting of a device that is currently in use - * (except for root, which might share swap device for miniroot). - * Flush out any old buffers remaining from a previous use. - */ - if ((retval = vfs_mountedon(devvp))) - return (retval); - if ((vcount(devvp) > 1) && (devvp != rootvp)) - return (EBUSY); - if ((retval = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))) - return (retval); - - ronly = (mp->mnt_flag & MNT_RDONLY) != 0; - if ((retval = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))) - return (retval); bp = NULL; hfsmp = NULL; mdbp = NULL; minblksize = kHFSBlockSize; + /* Advisory locking should be handled at the VFS layer */ + vfs_setlocklocal(mp); + /* Get the real physical block size. */ - if (VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context)) { retval = ENXIO; goto error_exit; } @@ -943,90 +799,88 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, if (blksize > 512) { u_int32_t size512 = 512; - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) { retval = ENXIO; goto error_exit; } } /* Get the number of 512 byte physical blocks. */ - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } /* Compute an accurate disk size (i.e. within 512 bytes) */ - disksize = blkcnt * (u_int64_t)512; + disksize = (u_int64_t)blkcnt * (u_int64_t)512; /* - * There are only 31 bits worth of block count in - * the buffer cache. So for large volumes a 4K - * physical block size is needed. + * On Tiger it is not necessary to switch the device + * block size to be 4k if there are more than 31-bits + * worth of blocks but to insure compatibility with + * pre-Tiger systems we have to do it. */ if (blkcnt > (u_int64_t)0x000000007fffffff) { minblksize = blksize = 4096; } + /* Now switch to our prefered physical block size. */ if (blksize > 512) { - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } /* Get the count of physical blocks. */ - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } } - /* * At this point: * minblksize is the minimum physical block size * blksize has our prefered physical block size * blkcnt has the total number of physical blocks */ - devvp->v_specsize = blksize; - - /* cache the IO attributes */ - if ((retval = vfs_init_io_attributes(devvp, mp))) { - printf("hfs_mountfs: vfs_init_io_attributes returned %d\n", - retval); - return (retval); - } - mdb_offset = HFS_PRI_SECTOR(blksize); - if ((retval = meta_bread(devvp, HFS_PRI_SECTOR(blksize), blksize, cred, &bp))) { + mdb_offset = (daddr64_t)HFS_PRI_SECTOR(blksize); + if ((retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp))) { goto error_exit; } MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK); - bcopy(bp->b_data + HFS_PRI_OFFSET(blksize), mdbp, kMDBSize); - brelse(bp); + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, kMDBSize); + buf_brelse(bp); bp = NULL; MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK); bzero(hfsmp, sizeof(struct hfsmount)); /* - * Init the volume information structure - */ - mp->mnt_data = (qaddr_t)hfsmp; + * Init the volume information structure + */ + + lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr); + lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr); + lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr); + + vfs_setfsprivate(mp, hfsmp); hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */ - hfsmp->hfs_vcb.vcb_hfsmp = hfsmp; /* Make VCBTOHFS work */ - hfsmp->hfs_raw_dev = devvp->v_rdev; + hfsmp->hfs_raw_dev = vnode_specrdev(devvp); hfsmp->hfs_devvp = devvp; hfsmp->hfs_phys_block_size = blksize; hfsmp->hfs_phys_block_count = blkcnt; hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA; if (ronly) hfsmp->hfs_flags |= HFS_READ_ONLY; - if (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS) + if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS; for (i = 0; i < MAXQUOTAS; i++) - hfsmp->hfs_qfiles[i].qf_vp = NULLVP; + dqfileinit(&hfsmp->hfs_qfiles[i]); if (args) { hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid; if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID; hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid; if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID; + vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */ if (args->hfs_mask != (mode_t)VNOVAL) { hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS; if (args->flags & HFSFSMNT_NOXONFILES) { @@ -1042,41 +896,48 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, mntwrapper = 1; } else { /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */ - if (mp->mnt_flag & MNT_UNKNOWNPERMISSIONS) { + if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) { hfsmp->hfs_uid = UNKNOWNUID; hfsmp->hfs_gid = UNKNOWNGID; + vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */ hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */ hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */ } } /* Find out if disk media is writable. */ - if (VOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, cred, p) == 0) { + if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) { if (iswritable) hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA; else hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; } + // record the current time at which we're mounting this volume + { + struct timeval tv; + microtime(&tv); + hfsmp->hfs_mount_time = tv.tv_sec; + } + /* Mount a standard HFS disk */ if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) { - if (devvp == rootvp) { + if ((vfs_flags(mp) & MNT_ROOTFS)) { retval = EINVAL; /* Cannot root from HFS standard disks */ goto error_exit; } /* HFS disks can only use 512 byte physical blocks */ if (blksize > kHFSBlockSize) { blksize = kHFSBlockSize; - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } - devvp->v_specsize = blksize; hfsmp->hfs_phys_block_size = blksize; hfsmp->hfs_phys_block_count = blkcnt; } @@ -1119,18 +980,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, " a multiple of physical block size (%d);" " switching to 512\n", blksize); blksize = 512; - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&blksize, FWRITE, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, + (caddr_t)&blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, - (caddr_t)&blkcnt, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, + (caddr_t)&blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } - /* XXX do we need to call vfs_init_io_attributes again? */ - devvp->v_specsize = blksize; /* Note: relative block count adjustment */ hfsmp->hfs_phys_block_count *= hfsmp->hfs_phys_block_size / blksize; @@ -1142,12 +1001,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, hfsmp->hfs_phys_block_count = disksize / blksize; - mdb_offset = (embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize); - retval = meta_bread(devvp, mdb_offset, blksize, cred, &bp); + mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); + retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); if (retval) goto error_exit; - bcopy(bp->b_data + HFS_PRI_OFFSET(blksize), mdbp, 512); - brelse(bp); + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512); + buf_brelse(bp); bp = NULL; vhp = (HFSPlusVolumeHeader*) mdbp; @@ -1181,42 +1040,45 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, // point as journaled. // if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { - mp->mnt_flag |= MNT_JOURNALED; + vfs_setflags(mp, (uint64_t)((unsigned int)MNT_JOURNALED)); } else { // if the journal failed to open, then set the lastMountedVersion // to be "FSK!" which fsck_hfs will see and force the fsck instead // of just bailing out because the volume is journaled. - if (ronly != 0 || devvp == rootvp) { - HFSPlusVolumeHeader *vhp; + if (!ronly) { + HFSPlusVolumeHeader *jvhp; hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; if (mdb_offset == 0) { - mdb_offset = (embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize); + mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); } bp = NULL; - retval = meta_bread(devvp, mdb_offset, blksize, cred, &bp); + retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); if (retval == 0) { - vhp = (HFSPlusVolumeHeader *)(bp->b_data + HFS_PRI_OFFSET(blksize)); + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize)); - if (SWAP_BE16(vhp->signature) == kHFSPlusSigWord || SWAP_BE16(vhp->signature) == kHFSXSigWord) { - vhp->lastMountedVersion = SWAP_BE32('FSK!'); - bwrite(bp); + if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { + printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n"); + jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); + buf_bwrite(bp); } else { - brelse(bp); + buf_brelse(bp); } bp = NULL; } else if (bp) { - brelse(bp); + buf_brelse(bp); + // clear this so the error exit path won't try to use it + bp = NULL; } } // if this isn't the root device just bail out. - // if it is the root device we just continue on + // If it is the root device we just continue on // in the hopes that fsck_hfs will be able to // fix any damage that exists on the volume. - if (devvp != rootvp) { + if ( !(vfs_flags(mp) & MNT_ROOTFS)) { retval = EINVAL; goto error_exit; } @@ -1226,7 +1088,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname); - retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args); + retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred); /* * If the backend didn't like our physical blocksize * then retry with physical blocksize of 512. @@ -1235,11 +1097,11 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, printf("HFS Mount: could not use physical block size " "(%d) switching to 512\n", blksize); blksize = 512; - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) { retval = ENXIO; goto error_exit; } - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, cred, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) { retval = ENXIO; goto error_exit; } @@ -1253,25 +1115,71 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, journal_close(hfsmp->jnl); hfsmp->jnl = NULL; if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { - mp->mnt_flag |= MNT_JOURNALED; - } + vfs_setflags(mp, (uint64_t)((unsigned int)MNT_JOURNALED)); + } else { + // if the journal failed to open, then set the lastMountedVersion + // to be "FSK!" which fsck_hfs will see and force the fsck instead + // of just bailing out because the volume is journaled. + if (!ronly) { + HFSPlusVolumeHeader *jvhp; + + hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; + + if (mdb_offset == 0) { + mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); + } + + bp = NULL; + retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); + if (retval == 0) { + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize)); + + if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { + printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n"); + jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); + buf_bwrite(bp); + } else { + buf_brelse(bp); + } + bp = NULL; + } else if (bp) { + buf_brelse(bp); + // clear this so the error exit path won't try to use it + bp = NULL; + } + } + + // if this isn't the root device just bail out. + // If it is the root device we just continue on + // in the hopes that fsck_hfs will be able to + // fix any damage that exists on the volume. + if ( !(vfs_flags(mp) & MNT_ROOTFS)) { + retval = EINVAL; + goto error_exit; + } + } } /* Try again with a smaller block size... */ - retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args); + retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred); } if (retval) (void) hfs_relconverter(0); } + // save off a snapshot of the mtime from the previous mount + // (for matador). + hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime; + if ( retval ) { goto error_exit; } - mp->mnt_stat.f_fsid.val[0] = (long)dev; - mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; - mp->mnt_maxsymlinklen = 0; - devvp->v_specflags |= SI_MOUNTEDON; + mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; + mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); + vfs_setmaxsymlen(mp, 0); + mp->mnt_vtable->vfc_threadsafe = TRUE; + mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR; if (args) { /* @@ -1320,17 +1228,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, error_exit: if (bp) - brelse(bp); + buf_brelse(bp); if (mdbp) FREE(mdbp, M_TEMP); - (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); + if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - (void)VOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, cred, p); + (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, context); hfsmp->jvp = NULL; } if (hfsmp) { FREE(hfsmp, M_HFSMNT); - mp->mnt_data = (qaddr_t)0; + vfs_setfsprivate(mp, NULL); } return (retval); } @@ -1342,10 +1250,7 @@ error_exit: */ /* ARGSUSED */ static int -hfs_start(mp, flags, p) - struct mount *mp; - int flags; - struct proc *p; +hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) { return (0); } @@ -1355,16 +1260,14 @@ hfs_start(mp, flags, p) * unmount system call */ static int -hfs_unmount(mp, mntflags, p) - struct mount *mp; - int mntflags; - struct proc *p; +hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) { + struct proc *p = vfs_context_proc(context); struct hfsmount *hfsmp = VFSTOHFS(mp); int retval = E_NONE; int flags; int force; - int started_tr = 0, grabbed_lock = 0; + int started_tr = 0; flags = 0; force = 0; @@ -1377,47 +1280,48 @@ hfs_unmount(mp, mntflags, p) return (retval); if (hfsmp->hfs_flags & HFS_METADATA_ZONE) - (void) hfs_recording_suspend(hfsmp, p); + (void) hfs_recording_suspend(hfsmp); /* * Flush out the b-trees, volume bitmap and Volume Header */ if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - journal_start_transaction(hfsmp->jnl); - started_tr = 1; + hfs_start_transaction(hfsmp); + started_tr = 1; + + if (hfsmp->hfs_attribute_vp) { + (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK); + retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); + if (retval && !force) + goto err_exit; } - - retval = VOP_FSYNC(HFSTOVCB(hfsmp)->catalogRefNum, NOCRED, MNT_WAIT, p); + + (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK); + retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); if (retval && !force) goto err_exit; - retval = VOP_FSYNC(HFSTOVCB(hfsmp)->extentsRefNum, NOCRED, MNT_WAIT, p); + (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK); + retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); if (retval && !force) goto err_exit; - // if we have an allocation file, sync it too so we don't leave dirty - // blocks around - if (HFSTOVCB(hfsmp)->allocationsRefNum) { - if (retval = VOP_FSYNC(HFSTOVCB(hfsmp)->allocationsRefNum, NOCRED, MNT_WAIT, p)) { - if (!force) - goto err_exit; - } - } - - if (hfsmp->hfc_filevp && (hfsmp->hfc_filevp->v_flag & VSYSTEM)) { - retval = VOP_FSYNC(hfsmp->hfc_filevp, NOCRED, MNT_WAIT, p); + if (hfsmp->hfs_allocation_vp) { + (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK); + retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); if (retval && !force) goto err_exit; } - if (retval = VOP_FSYNC(hfsmp->hfs_devvp, NOCRED, MNT_WAIT, p)) { - if (!force) + if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) { + retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p); + if (retval && !force) goto err_exit; } - #if 0 /* See if this volume is damaged, is so do not unmount cleanly */ if (HFSTOVCB(hfsmp)->vcbFlags & kHFS_DamagedVolume) { @@ -1428,21 +1332,15 @@ hfs_unmount(mp, mntflags, p) #else HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; #endif - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (retval) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (!force) goto err_exit; /* could not flush everything */ } - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - started_tr = 0; - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - grabbed_lock = 0; - } + hfs_end_transaction(hfsmp); + started_tr = 0; } if (hfsmp->jnl) { @@ -1468,11 +1366,13 @@ hfs_unmount(mp, mntflags, p) hfsmp->jnl = NULL; } + VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); + if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - retval = VOP_CLOSE(hfsmp->jvp, + retval = VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, - NOCRED, p); - vrele(hfsmp->jvp); + context); + vnode_put(hfsmp->jvp); hfsmp->jvp = NULL; } // XXXdbg @@ -1485,28 +1385,17 @@ hfs_unmount(mp, mntflags, p) hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; tmpvp = hfsmp->hfs_backingfs_rootvp; hfsmp->hfs_backingfs_rootvp = NULLVP; - vrele(tmpvp); + vnode_rele(tmpvp); } #endif /* HFS_SPARSE_DEV */ - - hfsmp->hfs_devvp->v_specflags &= ~SI_MOUNTEDON; - retval = VOP_CLOSE(hfsmp->hfs_devvp, - hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, - NOCRED, p); - if (retval && !force) - return(retval); - - vrele(hfsmp->hfs_devvp); + lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); FREE(hfsmp, M_HFSMNT); - mp->mnt_data = (qaddr_t)0; + return (0); err_exit: - if (hfsmp->jnl && started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + if (started_tr) { + hfs_end_transaction(hfsmp); } return retval; } @@ -1514,44 +1403,28 @@ hfs_unmount(mp, mntflags, p) /* * Return the root of a filesystem. - * - * OUT - vpp, should be locked and vget()'d (to increment usecount and lock) */ static int -hfs_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; +hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) { - struct vnode *nvp; - int retval; - UInt32 rootObjID = kRootDirID; - - if ((retval = VFS_VGET(mp, &rootObjID, &nvp))) - return (retval); - - *vpp = nvp; - return (0); + return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1); } /* * Do operations associated with quotas */ -int -hfs_quotactl(mp, cmds, uid, arg, p) - struct mount *mp; - int cmds; - uid_t uid; - caddr_t arg; - struct proc *p; +static int +hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context) { + struct proc *p = vfs_context_proc(context); int cmd, type, error; #if !QUOTA - return (EOPNOTSUPP); + return (ENOTSUP); #else if (uid == -1) - uid = p->p_cred->p_ruid; + uid = vfs_context_ucred(context)->cr_ruid; cmd = cmds >> SUBCMDSHIFT; switch (cmd) { @@ -1559,24 +1432,24 @@ hfs_quotactl(mp, cmds, uid, arg, p) case Q_QUOTASTAT: break; case Q_GETQUOTA: - if (uid == p->p_cred->p_ruid) + if (uid == vfs_context_ucred(context)->cr_ruid) break; /* fall through */ default: - if (error = suser(p->p_ucred, &p->p_acflag)) + if ( (error = vfs_context_suser(context)) ) return (error); } type = cmds & SUBCMDMASK; if ((u_int)type >= MAXQUOTAS) return (EINVAL); - if (vfs_busy(mp, LK_NOWAIT, 0, p)) + if (vfs_busy(mp, LK_NOWAIT)) return (0); switch (cmd) { case Q_QUOTAON: - error = hfs_quotaon(p, mp, type, arg, UIO_USERSPACE); + error = hfs_quotaon(p, mp, type, datap); break; case Q_QUOTAOFF: @@ -1584,15 +1457,15 @@ hfs_quotactl(mp, cmds, uid, arg, p) break; case Q_SETQUOTA: - error = hfs_setquota(mp, uid, type, arg); + error = hfs_setquota(mp, uid, type, datap); break; case Q_SETUSE: - error = hfs_setuse(mp, uid, type, arg); + error = hfs_setuse(mp, uid, type, datap); break; case Q_GETQUOTA: - error = hfs_getquota(mp, uid, type, arg); + error = hfs_getquota(mp, uid, type, datap); break; case Q_SYNC: @@ -1600,52 +1473,66 @@ hfs_quotactl(mp, cmds, uid, arg, p) break; case Q_QUOTASTAT: - error = hfs_quotastat(mp, type, arg); + error = hfs_quotastat(mp, type, datap); break; default: error = EINVAL; break; } - vfs_unbusy(mp, p); + vfs_unbusy(mp); + return (error); #endif /* QUOTA */ } - - +/* Subtype is composite of bits */ +#define HFS_SUBTYPE_JOURNALED 0x01 +#define HFS_SUBTYPE_CASESENSITIVE 0x02 +/* bits 2 - 6 reserved */ +#define HFS_SUBTYPE_STANDARDHFS 0x80 /* * Get file system statistics. */ static int -hfs_statfs(mp, sbp, p) - struct mount *mp; - register struct statfs *sbp; - struct proc *p; +hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context) { ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); u_long freeCNIDs; + uint16_t subtype = 0; freeCNIDs = (u_long)0xFFFFFFFF - (u_long)vcb->vcbNxtCNID; - sbp->f_bsize = vcb->blockSize; - sbp->f_iosize = hfsmp->hfs_logBlockSize; - sbp->f_blocks = vcb->totalBlocks; - sbp->f_bfree = hfs_freeblks(hfsmp, 0); - sbp->f_bavail = hfs_freeblks(hfsmp, 1); - sbp->f_files = vcb->totalBlocks - 2; /* max files is constrained by total blocks */ - sbp->f_ffree = MIN(freeCNIDs, sbp->f_bavail); - - sbp->f_type = 0; - if (sbp != &mp->mnt_stat) { - sbp->f_type = mp->mnt_vfc->vfc_typenum; - bcopy((caddr_t)mp->mnt_stat.f_mntonname, - (caddr_t)&sbp->f_mntonname[0], MNAMELEN); - bcopy((caddr_t)mp->mnt_stat.f_mntfromname, - (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + sbp->f_bsize = (uint32_t)vcb->blockSize; + sbp->f_iosize = (size_t)(MAX_UPL_TRANSFER * PAGE_SIZE); + sbp->f_blocks = (uint64_t)((unsigned long)vcb->totalBlocks); + sbp->f_bfree = (uint64_t)((unsigned long )hfs_freeblks(hfsmp, 0)); + sbp->f_bavail = (uint64_t)((unsigned long )hfs_freeblks(hfsmp, 1)); + sbp->f_files = (uint64_t)((unsigned long )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ + sbp->f_ffree = (uint64_t)((unsigned long )(MIN(freeCNIDs, sbp->f_bavail))); + + /* + * Subtypes (flavors) for HFS + * 0: Mac OS Extended + * 1: Mac OS Extended (Journaled) + * 2: Mac OS Extended (Case Sensitive) + * 3: Mac OS Extended (Case Sensitive, Journaled) + * 4 - 127: Reserved + * 128: Mac OS Standard + * + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + subtype = HFS_SUBTYPE_STANDARDHFS; + } else /* HFS Plus */ { + if (hfsmp->jnl) + subtype |= HFS_SUBTYPE_JOURNALED; + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + subtype |= HFS_SUBTYPE_CASESENSITIVE; } + sbp->f_fssubtype = subtype; + return (0); } @@ -1663,57 +1550,81 @@ void hfs_sync_metadata(void *arg) { struct mount *mp = (struct mount *)arg; - struct cnode *cp; struct hfsmount *hfsmp; ExtendedVCB *vcb; - struct vnode *meta_vp[3]; - struct buf *bp; - int i, sectorsize, priIDSector, altIDSector, retval; - int error, allerror = 0; - + buf_t bp; + int sectorsize, retval; + daddr64_t priIDSector; hfsmp = VFSTOHFS(mp); vcb = HFSTOVCB(hfsmp); - bflushq(BQ_META, mp); - - -#if 1 // XXXdbg - I do not believe this is necessary... - // but if I pull it out, then the journal - // does not seem to get flushed properly - // when it is closed.... - // now make sure the super block is flushed sectorsize = hfsmp->hfs_phys_block_size; - priIDSector = (vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_PRI_SECTOR(sectorsize); - retval = meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); + priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + + HFS_PRI_SECTOR(sectorsize)); + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); if (retval != 0) { panic("hfs: sync_metadata: can't read super-block?! (retval 0x%x, priIDSector)\n", retval, priIDSector); } - if (retval == 0 && (bp->b_flags & B_DELWRI) && (bp->b_flags & B_LOCKED) == 0) { - bwrite(bp); + if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { + buf_bwrite(bp); } else if (bp) { - brelse(bp); + buf_brelse(bp); } // the alternate super block... // XXXdbg - we probably don't need to do this each and every time. // hfs_btreeio.c:FlushAlternate() should flag when it was // written... - altIDSector = (vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); - retval = meta_bread(hfsmp->hfs_devvp, altIDSector, sectorsize, NOCRED, &bp); - if (retval == 0 && (bp->b_flags & B_DELWRI) && (bp->b_flags & B_LOCKED) == 0) { - bwrite(bp); - } else if (bp) { - brelse(bp); + if (hfsmp->hfs_alt_id_sector) { + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &bp); + if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) { + buf_bwrite(bp); + } else if (bp) { + buf_brelse(bp); + } } -#endif - } + +struct hfs_sync_cargs { + kauth_cred_t cred; + struct proc *p; + int waitfor; + int error; +}; + + +static int +hfs_sync_callback(struct vnode *vp, void *cargs) +{ + struct cnode *cp; + struct hfs_sync_cargs *args; + int error; + + args = (struct hfs_sync_cargs *)cargs; + + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { + return (VNODE_RETURNED); + } + cp = VTOC(vp); + + if ((cp->c_flag & C_MODIFIED) || + (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) || + vnode_hasdirtyblks(vp)) { + error = hfs_fsync(vp, args->waitfor, 0, args->p); + + if (error) + args->error = error; + } + hfs_unlock(cp); + return (VNODE_RETURNED); +} + + + /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; @@ -1722,155 +1633,88 @@ hfs_sync_metadata(void *arg) * Note: we are always called with the filesystem marked `MPBUSY'. */ static int -hfs_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; +hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) { - struct vnode *nvp, *vp; + struct proc *p = vfs_context_proc(context); struct cnode *cp; struct hfsmount *hfsmp; ExtendedVCB *vcb; - struct vnode *meta_vp[3]; + struct vnode *meta_vp[4]; int i; int error, allerror = 0; + struct hfs_sync_cargs args; /* * During MNT_UPDATE hfs_changefs might be manipulating * vnodes so back off */ - if (mp->mnt_flag & MNT_UPDATE) + if (((uint32_t)vfs_flags(mp)) & MNT_UPDATE) /* XXX MNT_UPDATE may not be visible here */ return (0); hfsmp = VFSTOHFS(mp); if (hfsmp->hfs_flags & HFS_READ_ONLY) return (EROFS); -#if 0 - // XXXdbg first go through and flush out any modified - // meta data blocks so they go out in order... - bflushq(BQ_META, mp); - bflushq(BQ_LRU, mp); - // only flush locked blocks if we're not doing journaling - if (hfsmp->jnl == NULL) { - bflushq(BQ_LOCKED, mp); - } -#endif - + args.cred = vfs_context_proc(context); + args.waitfor = waitfor; + args.p = p; + args.error = 0; /* - * Write back each 'modified' vnode + * hfs_sync_callback will be called for each vnode + * hung off of this mount point... the vnode will be + * properly referenced and unreferenced around the callback */ + vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args); -loop: - simple_lock(&mntvnode_slock); - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - int didhold; - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); - goto loop; - } - - simple_lock(&vp->v_interlock); - nvp = vp->v_mntvnodes.le_next; - - cp = VTOC(vp); - - // restart our whole search if this guy is locked - // or being reclaimed. - if (vp->v_tag != VT_HFS || cp == NULL || vp->v_flag & (VXLOCK|VORECLAIM)) { - simple_unlock(&vp->v_interlock); - continue; - } - - if ((vp->v_flag & VSYSTEM) || (vp->v_type == VNON) || - (((cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE)) == 0) && - (vp->v_dirtyblkhd.lh_first == NULL) && !(vp->v_flag & VHASDIRTY))) { - simple_unlock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); - simple_lock(&mntvnode_slock); - continue; - } - - simple_unlock(&mntvnode_slock); - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { - if (error == ENOENT) { - /* - * If vnode is being reclaimed, yield so - * that it can be removed from our list. - */ - if (UBCISVALID(vp)) - (void) tsleep((caddr_t)&lbolt, PINOD, "hfs_sync", 0); - goto loop; - } - simple_lock(&mntvnode_slock); - continue; - } - - didhold = ubc_hold(vp); - - // mark the cnode so that fsync won't flush - // the journal since we're going to do that... - cp->c_flag |= C_FROMSYNC; - if ((error = VOP_FSYNC(vp, cred, waitfor, p))) { - allerror = error; - }; - cp->c_flag &= ~C_FROMSYNC; - - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); - simple_lock(&mntvnode_slock); - }; + if (args.error) + allerror = args.error; vcb = HFSTOVCB(hfsmp); meta_vp[0] = vcb->extentsRefNum; meta_vp[1] = vcb->catalogRefNum; meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */ + meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */ /* Now sync our three metadata files */ - for (i = 0; i < 3; ++i) { + for (i = 0; i < 4; ++i) { struct vnode *btvp; - btvp = btvp = meta_vp[i];; - if ((btvp==0) || (btvp->v_type == VNON) || (btvp->v_mount != mp)) + btvp = meta_vp[i];; + if ((btvp==0) || (vnode_mount(btvp) != mp)) continue; - simple_lock(&btvp->v_interlock); + /* XXX use hfs_systemfile_lock instead ? */ + (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK); cp = VTOC(btvp); - if (((cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE)) == 0) && - (btvp->v_dirtyblkhd.lh_first == NULL) && !(btvp->v_flag & VHASDIRTY)) { - simple_unlock(&btvp->v_interlock); + + if (((cp->c_flag & C_MODIFIED) == 0) && + (cp->c_touch_acctime == 0) && + (cp->c_touch_chgtime == 0) && + (cp->c_touch_modtime == 0) && + vnode_hasdirtyblks(btvp) == 0) { + hfs_unlock(VTOC(btvp)); continue; } - simple_unlock(&mntvnode_slock); - error = vget(btvp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + error = vnode_get(btvp); if (error) { - simple_lock(&mntvnode_slock); + hfs_unlock(VTOC(btvp)); continue; } - if ((error = VOP_FSYNC(btvp, cred, waitfor, p))) + if ((error = hfs_fsync(btvp, waitfor, 0, p))) allerror = error; - VOP_UNLOCK(btvp, 0, p); - vrele(btvp); - simple_lock(&mntvnode_slock); - }; - simple_unlock(&mntvnode_slock); + hfs_unlock(cp); + vnode_put(btvp); + }; /* * Force stale file system control information to be flushed. */ if (vcb->vcbSigWord == kHFSSigWord) { - if ((error = VOP_FSYNC(hfsmp->hfs_devvp, cred, waitfor, p))) + if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) { allerror = error; + } } #if QUOTA hfs_qsync(mp); @@ -1882,12 +1726,6 @@ loop: */ if (IsVCBDirty(vcb)) { - // XXXdbg - debugging, remove - if (hfsmp->jnl) { - //printf("hfs: sync: strange, a journaled volume w/dirty VCB? jnl 0x%x hfsmp 0x%x\n", - // hfsmp->jnl, hfsmp); - } - error = hfs_flushvolumeheader(hfsmp, waitfor, 0); if (error) allerror = error; @@ -1897,7 +1735,6 @@ loop: journal_flush(hfsmp->jnl); } - err_exit: return (allerror); } @@ -1913,33 +1750,24 @@ loop: * those rights via. exflagsp and credanonp */ static int -hfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) - register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; +hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context) { struct hfsfid *hfsfhp; struct vnode *nvp; int result; - struct netcred *np; *vpp = NULL; hfsfhp = (struct hfsfid *)fhp; - /* - * Get the export permission structure for this tuple. - */ - np = vfs_export_lookup(mp, &VFSTOHFS(mp)->hfs_export, nam); - if (nam && (np == NULL)) { - return EACCES; - }; + if (fhlen < sizeof(struct hfsfid)) + return (EINVAL); - result = VFS_VGET(mp, &hfsfhp->hfsfid_cnid, &nvp); - if (result) return result; - if (nvp == NULL) return ESTALE; + result = hfs_vget(VFSTOHFS(mp), hfsfhp->hfsfid_cnid, &nvp, 0); + if (result) { + if (result == ENOENT) + result = ESTALE; + return result; + } /* The createtime can be changed by hfs_setattr or hfs_setattrlist. * For NFS, we are assuming that only if the createtime was moved @@ -1953,28 +1781,13 @@ hfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) * wrap number and use that for generation number. For now do this. */ if ((hfsfhp->hfsfid_gen < VTOC(nvp)->c_itime)) { - vput(nvp); + hfs_unlock(VTOC(nvp)); + vnode_put(nvp); return (ESTALE); - }; - - if (VNAME(nvp) == NULL) { - struct cnode *cp = VTOC(nvp); - - if (nvp == cp->c_rsrc_vp) { - // the +1/-2 thing is to skip the leading "/" on the rsrc fork spec - // and to not count the trailing null byte at the end of the string. - VNAME(nvp) = add_name(_PATH_RSRCFORKSPEC+1, sizeof(_PATH_RSRCFORKSPEC)-2, 0, 0); - } else { - VNAME(nvp) = add_name(cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen, 0, 0); - } } - *vpp = nvp; - if (np) { - *exflagsp = np->netc_exflags; - *credanonp = &np->netc_anon; - } - + + hfs_unlock(VTOC(nvp)); return (0); } @@ -1984,22 +1797,22 @@ hfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) */ /* ARGSUSED */ static int -hfs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; +hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context) { struct cnode *cp; struct hfsfid *hfsfhp; if (ISHFS(VTOVCB(vp))) - return (EOPNOTSUPP); /* hfs standard is not exportable */ + return (ENOTSUP); /* hfs standard is not exportable */ + + if (*fhlenp < (int)sizeof(struct hfsfid)) + return (EOVERFLOW); cp = VTOC(vp); hfsfhp = (struct hfsfid *)fhp; - hfsfhp->hfsfid_len = sizeof(struct hfsfid); - hfsfhp->hfsfid_pad = 0; hfsfhp->hfsfid_cnid = cp->c_fileid; hfsfhp->hfsfid_gen = cp->c_itime; + *fhlenp = sizeof(struct hfsfid); return (0); } @@ -2009,8 +1822,7 @@ hfs_vptofh(vp, fhp) * Initial HFS filesystems, done only once. */ static int -hfs_init(vfsp) - struct vfsconf *vfsp; +hfs_init(__unused struct vfsconf *vfsp) { static int done = 0; @@ -2024,11 +1836,16 @@ hfs_init(vfsp) #endif /* QUOTA */ BTReserveSetup(); + + + hfs_lock_attr = lck_attr_alloc_init(); + hfs_group_attr = lck_grp_attr_alloc_init(); + hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); + hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); + + /* Turn on lock debugging */ + //lck_attr_setdebug(hfs_lock_attr); - /* - * Allocate Catalog Iterator cache... - */ - (void) InitCatalogCache(); return (0); } @@ -2039,14 +1856,16 @@ hfs_getmountpoint(vp, hfsmpp) struct hfsmount **hfsmpp; { struct hfsmount * hfsmp; + char fstypename[MFSNAMELEN]; if (vp == NULL) return (EINVAL); - if ((vp->v_flag & VROOT) == 0) + if (!vnode_isvroot(vp)) return (EINVAL); - if (strcmp(vp->v_mount->mnt_stat.f_fstypename, "hfs") != 0) + vnode_vfsname(vp, fstypename); + if (strcmp(fstypename, "hfs") != 0) return (EINVAL); hfsmp = VTOHFS(vp); @@ -2062,29 +1881,16 @@ hfs_getmountpoint(vp, hfsmpp) // XXXdbg #include - /* * HFS filesystem related variables. */ static int -hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, vfs_context_t context) { - extern u_int32_t hfs_getencodingbias(void); - extern void hfs_setencodingbias(u_int32_t); - + struct proc *p = vfs_context_proc(context); int error; - struct sysctl_req *req; - struct vfsidctl vc; - struct mount *mp; struct hfsmount *hfsmp; - struct vfsquery vq; /* all sysctl names at this level are terminal */ @@ -2098,17 +1904,18 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (error); } else if (name[0] == HFS_EXTEND_FS) { - u_int64_t newsize; - - if (newp == NULL) + u_int64_t newsize; + vnode_t vp = p->p_fd->fd_cdir; + + if (newp == USER_ADDR_NULL || vp == NULL) return (EINVAL); - if ((error = hfs_getmountpoint(p->p_fd->fd_cdir, &hfsmp))) + if ((error = hfs_getmountpoint(vp, &hfsmp))) return (error); error = sysctl_quad(oldp, oldlenp, newp, newlen, &newsize); if (error) return (error); - error = hfs_extendfs(HFSTOVFS(hfsmp), newsize, p); + error = hfs_extendfs(hfsmp, newsize, context); return (error); } else if (name[0] == HFS_ENCODINGHINT) { @@ -2128,7 +1935,7 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) &bytes, bufsize, 0, UTF_DECOMPOSED); if (error == 0) { hint = hfs_pickencoding(unicode_name, bytes / 2); - error = sysctl_int(oldp, oldlenp, NULL, NULL, &hint); + error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, &hint); } } FREE(unicode_name, M_TEMP); @@ -2139,15 +1946,17 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) // make the file system journaled... struct vnode *vp = p->p_fd->fd_cdir, *jvp; ExtendedVCB *vcb; - int retval; struct cat_attr jnl_attr, jinfo_attr; struct cat_fork jnl_fork, jinfo_fork; void *jnl = NULL; + int lockflags; /* Only root can enable journaling */ - if (current_proc()->p_ucred->cr_uid != 0) { + if (!is_suser()) { return (EPERM); } + if (vp == NULL) + return EINVAL; hfsmp = VTOHFS(vp); if (hfsmp->hfs_flags & HFS_READ_ONLY) { @@ -2159,27 +1968,29 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) } if (hfsmp->jnl) { - printf("hfs: volume @ mp 0x%x is already journaled!\n", vp->v_mount); + printf("hfs: volume @ mp 0x%x is already journaled!\n", vnode_mount(vp)); return EAGAIN; } vcb = HFSTOVCB(hfsmp); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 || BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) { printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n"); + hfs_systemfile_unlock(hfsmp, lockflags); return EINVAL; } + hfs_systemfile_unlock(hfsmp, lockflags); // make sure these both exist! - if ( GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0 - || GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, &jnl_fork) == 0) { + if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0 + || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) { return EINVAL; } - hfs_sync(hfsmp->hfs_mp, MNT_WAIT, FSCRED, p); - bflushq(BQ_META); + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context); printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", (off_t)name[2], (off_t)name[3]); @@ -2198,7 +2009,7 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) if (jnl == NULL) { printf("hfs: FAILED to create the journal!\n"); if (jvp && jvp != hfsmp->hfs_devvp) { - VOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, FSCRED, p); + VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); } jvp = NULL; @@ -2218,7 +2029,7 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid; hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid; - hfsmp->hfs_mp->mnt_flag |= MNT_JOURNALED; + vfs_setflags(hfsmp->hfs_mp, (uint64_t)((unsigned int)MNT_JOURNALED)); hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); @@ -2227,32 +2038,29 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) } else if (name[0] == HFS_DISABLE_JOURNALING) { // clear the journaling bit struct vnode *vp = p->p_fd->fd_cdir; - void *jnl; - int retval; /* Only root can disable journaling */ - if (current_proc()->p_ucred->cr_uid != 0) { + if (!is_suser()) { return (EPERM); } + if (vp == NULL) + return EINVAL; hfsmp = VTOHFS(vp); - printf("hfs: disabling journaling for mount @ 0x%x\n", vp->v_mount); + printf("hfs: disabling journaling for mount @ 0x%x\n", vnode_mount(vp)); - jnl = hfsmp->jnl; - hfs_global_exclusive_lock_acquire(hfsmp); // Lights out for you buddy! + journal_close(hfsmp->jnl); hfsmp->jnl = NULL; - journal_close(jnl); if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - VOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, FSCRED, p); + VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); } - hfsmp->jnl = NULL; hfsmp->jvp = NULL; - hfsmp->hfs_mp->mnt_flag &= ~MNT_JOURNALED; + vfs_clearflags(hfsmp->hfs_mp, (uint64_t)((unsigned int)MNT_JOURNALED)); hfsmp->jnl_start = 0; hfsmp->hfs_jnlinfoblkid = 0; hfsmp->hfs_jnlfileid = 0; @@ -2267,6 +2075,9 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) struct vnode *vp = p->p_fd->fd_cdir; off_t jnl_start, jnl_size; + if (vp == NULL) + return EINVAL; + hfsmp = VTOHFS(vp); if (hfsmp->jnl == NULL) { jnl_start = 0; @@ -2276,10 +2087,10 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) jnl_size = (off_t)hfsmp->jnl_size; } - if ((error = copyout((caddr_t)&jnl_start, (void *)name[1], sizeof(off_t))) != 0) { + if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) { return error; } - if ((error = copyout((caddr_t)&jnl_size, (void *)name[2], sizeof(off_t))) != 0) { + if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) { return error; } @@ -2289,13 +2100,29 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return set_package_extensions_table((void *)name[1], name[2], name[3]); } else if (name[0] == VFS_CTL_QUERY) { - req = oldp; /* we're new style vfs sysctl. */ - - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) return (error); + struct sysctl_req *req; + struct vfsidctl vc; + struct user_vfsidctl user_vc; + struct mount *mp; + struct vfsquery vq; + boolean_t is_64_bit; + + is_64_bit = proc_is64bit(p); + req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ - mp = vfs_getvfs(&vc.vc_fsid); - if (mp == NULL) return (ENOENT); + if (is_64_bit) { + error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); + if (error) return (error); + + mp = vfs_getvfs(&user_vc.vc_fsid); + } + else { + error = SYSCTL_IN(req, &vc, sizeof(vc)); + if (error) return (error); + + mp = vfs_getvfs(&vc.vc_fsid); + } + if (mp == NULL) return (ENOENT); hfsmp = VFSTOHFS(mp); bzero(&vq, sizeof(vq)); @@ -2303,101 +2130,126 @@ hfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return SYSCTL_OUT(req, &vq, sizeof(vq));; }; - return (EOPNOTSUPP); + return (ENOTSUP); } -/* This will return a vnode of either a directory or a data vnode based on an object id. If - * it is a file id, its data fork will be returned. - */ static int -hfs_vget(mp, ino, vpp) - struct mount *mp; - void *ino; - struct vnode **vpp; +hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) +{ + return hfs_vget(VFSTOHFS(mp), (cnid_t)ino, vpp, 1); +} + + +/* + * Look up an HFS object by ID. + * + * The object is returned with an iocount reference and the cnode locked. + * + * If the object is a file then it will represent the data fork. + */ +__private_extern__ +int +hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) { - cnid_t cnid = *(cnid_t *)ino; + struct vnode *vp = NULL; + struct cat_desc cndesc; + struct cat_attr cnattr; + struct cat_fork cnfork; + struct componentname cn; + int error; /* Check for cnids that should't be exported. */ if ((cnid < kHFSFirstUserCatalogNodeID) && (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) return (ENOENT); + /* Don't export HFS Private Data dir. */ - if (cnid == VFSTOHFS(mp)->hfs_privdir_desc.cd_cnid) + if (cnid == hfsmp->hfs_privdir_desc.cd_cnid) return (ENOENT); - return (hfs_getcnode(VFSTOHFS(mp), cnid, NULL, 0, NULL, NULL, vpp)); -} + /* + * Check the hash first + */ + vp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, cnid, 0, skiplock); + if (vp) { + *vpp = vp; + return(0); + } -/* - * Check to see if a given vnode is only referenced for events: - * [ entered with vp->v_interlock locked ] - */ -static int -hfs_evtonly(struct vnode *vp) -{ - int ubc_refcount; + bzero(&cndesc, sizeof(cndesc)); + bzero(&cnattr, sizeof(cnattr)); + bzero(&cnfork, sizeof(cnfork)); - ubc_refcount = UBCINFOEXISTS(vp) ? 1 : 0; - return (vp->v_usecount == (ubc_refcount + EVTONLYREFS(vp))); -} + /* + * Not in hash, lookup in catalog + */ + if (cnid == kHFSRootParentID) { + static char hfs_rootname[] = "/"; + + cndesc.cd_nameptr = &hfs_rootname[0]; + cndesc.cd_namelen = 1; + cndesc.cd_parentcnid = kHFSRootParentID; + cndesc.cd_cnid = kHFSRootFolderID; + cndesc.cd_flags = CD_ISDIR; + + cnattr.ca_fileid = kHFSRootFolderID; + cnattr.ca_nlink = 2; + cnattr.ca_entries = 1; + cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO); + } else { + int lockflags; -/* - * Check to see if all non-system vnodes for a given mountpoint are events-only - */ -static int -hfs_flush_evtonly(struct mount *mp, int flags, int dispose, struct proc *p) -{ - struct vnode *vp, *nvp; - int busy = 0; - - simple_lock(&mntvnode_slock); -loop: - for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { - if (vp->v_mount != mp) goto loop; - nvp = vp->v_mntvnodes.le_next; - - simple_lock(&vp->v_interlock); - /* - * Skip over a vnodes marked VSYSTEM or VNOFLUSH. - */ - if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) { - simple_unlock(&vp->v_interlock); - continue; - }; - /* - * Skip over a vnodes marked VSWAP. - */ - if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { - simple_unlock(&vp->v_interlock); - continue; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_idlookup(hfsmp, cnid, &cndesc, &cnattr, &cnfork); + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error) { + *vpp = NULL; + return (error); } - if (hfs_evtonly(vp)) { - if (dispose) { - /* "dispose" implies "forcibly", a la "FORCECLOSE": */ - simple_unlock(&mntvnode_slock); - vgonel(vp, p); - simple_lock(&mntvnode_slock); - } else { - simple_unlock(&vp->v_interlock); - }; - continue; - }; - - simple_unlock(&vp->v_interlock); - ++busy; - /* If asked to dispose, keep trying. If only checking, the answer is now known. */ - if (dispose) { - continue; - } else { - break; - }; - } - simple_unlock(&mntvnode_slock); - - return (busy == 0); + + /* Hide open files that have been deleted */ + if ((hfsmp->hfs_privdir_desc.cd_cnid != 0) && + (cndesc.cd_parentcnid == hfsmp->hfs_privdir_desc.cd_cnid)) { + // XXXdbg - if this is a hardlink, we could call + // hfs_chash_snoop() to see if there is + // already a cnode and vnode present for + // this fileid. however I'd rather not + // risk it at this point in Tiger. + cat_releasedesc(&cndesc); + error = ENOENT; + *vpp = NULL; + return (error); + } + } + + /* + * Supply hfs_getnewvnode with a component name. + */ + MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + cn.cn_nameiop = LOOKUP; + cn.cn_flags = ISLASTCN | HASBUF; + cn.cn_context = NULL; + cn.cn_pnlen = MAXPATHLEN; + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_namelen = cndesc.cd_namelen; + cn.cn_hash = 0; + cn.cn_consume = 0; + bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1); + + /* XXX should we supply the parent as well... ? */ + error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); + FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); + + cat_releasedesc(&cndesc); + *vpp = vp; + if (vp && skiplock) + hfs_unlock(VTOC(vp)); + return (error); } + /* * Flush out all the files in a filesystem. */ @@ -2406,7 +2258,6 @@ hfs_flushfiles(struct mount *mp, int flags, struct proc *p) { struct hfsmount *hfsmp; struct vnode *skipvp = NULLVP; - struct vnode *rsrcvp; int quotafilecnt; int i; int error; @@ -2420,7 +2271,7 @@ hfs_flushfiles(struct mount *mp, int flags, struct proc *p) * extra reference when doing the intial vflush. */ quotafilecnt = 0; - if (mp->mnt_flag & MNT_QUOTA) { + if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { /* Find out how many quota files we have open. */ for (i = 0; i < MAXQUOTAS; i++) { @@ -2429,37 +2280,30 @@ hfs_flushfiles(struct mount *mp, int flags, struct proc *p) } /* Obtain the root vnode so we can skip over it. */ - if (hfs_chashget(hfsmp->hfs_raw_dev, kRootDirID, 0, - &skipvp, &rsrcvp) == NULL) { - skipvp = NULLVP; - } + skipvp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, kHFSRootFolderID, 0, 0); } #endif /* QUOTA */ error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags); - /* - * If the vflush() call failed solely because there are - * some event-only vnodes in the list, then forcibly get - * rid of those vnodes before the final vflush() pass. - */ - if ((error == EBUSY) && hfs_flush_evtonly(mp, SKIPSYSTEM | SKIPSWAP, 0, p)) { - (void) hfs_flush_evtonly(mp, SKIPSYSTEM | SKIPSWAP, 1, p); - }; + if (error != 0) + return(error); + error = vflush(mp, skipvp, SKIPSYSTEM | flags); #if QUOTA - if (mp->mnt_flag & MNT_QUOTA) { + if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { if (skipvp) { /* * See if there are additional references on the * root vp besides the ones obtained from the open - * quota files and the hfs_chashget call above. + * quota files and the hfs_chash_getvnode call above. */ if ((error == 0) && - (skipvp->v_usecount > (1 + quotafilecnt))) { + (vnode_isinuse(skipvp, quotafilecnt))) { error = EBUSY; /* root directory is still open */ } - vput(skipvp); + hfs_unlock(VTOC(skipvp)); + vnode_put(skipvp); } if (error && (flags & FORCECLOSE) == 0) return (error); @@ -2501,53 +2345,62 @@ hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding) } if (index < 64) { - HFSTOVCB(hfsmp)->encodingsBitmap |= (u_int64_t)(1ULL << index); - HFSTOVCB(hfsmp)->vcbFlags |= 0xFF00; + HFS_MOUNT_LOCK(hfsmp, TRUE) + hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index); + hfsmp->vcbFlags |= 0xFF00; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); } } /* * Update volume stats + * + * On journal volumes this will cause a volume header flush */ __private_extern__ int hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) { - ExtendedVCB *vcb; + struct timeval tv; - vcb = HFSTOVCB(hfsmp); - vcb->vcbFlags |= 0xFF00; - vcb->vcbLsMod = time.tv_sec; + microtime(&tv); + + lck_mtx_lock(&hfsmp->hfs_mutex); + + hfsmp->vcbFlags |= 0xFF00; + hfsmp->hfs_mtime = tv.tv_sec; switch (op) { case VOL_UPDATE: break; case VOL_MKDIR: - if (vcb->vcbDirCnt != 0xFFFFFFFF) - ++vcb->vcbDirCnt; - if (inroot && vcb->vcbNmRtDirs != 0xFFFF) - ++vcb->vcbNmRtDirs; + if (hfsmp->hfs_dircount != 0xFFFFFFFF) + ++hfsmp->hfs_dircount; + if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF) + ++hfsmp->vcbNmRtDirs; break; case VOL_RMDIR: - if (vcb->vcbDirCnt != 0) - --vcb->vcbDirCnt; - if (inroot && vcb->vcbNmRtDirs != 0xFFFF) - --vcb->vcbNmRtDirs; + if (hfsmp->hfs_dircount != 0) + --hfsmp->hfs_dircount; + if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF) + --hfsmp->vcbNmRtDirs; break; case VOL_MKFILE: - if (vcb->vcbFilCnt != 0xFFFFFFFF) - ++vcb->vcbFilCnt; - if (inroot && vcb->vcbNmFls != 0xFFFF) - ++vcb->vcbNmFls; + if (hfsmp->hfs_filecount != 0xFFFFFFFF) + ++hfsmp->hfs_filecount; + if (inroot && hfsmp->vcbNmFls != 0xFFFF) + ++hfsmp->vcbNmFls; break; case VOL_RMFILE: - if (vcb->vcbFilCnt != 0) - --vcb->vcbFilCnt; - if (inroot && vcb->vcbNmFls != 0xFFFF) - --vcb->vcbNmFls; + if (hfsmp->hfs_filecount != 0) + --hfsmp->hfs_filecount; + if (inroot && hfsmp->vcbNmFls != 0xFFFF) + --hfsmp->vcbNmFls; break; } + lck_mtx_unlock(&hfsmp->hfs_mutex); + if (hfsmp->jnl) { hfs_flushvolumeheader(hfsmp, 0, 0); } @@ -2568,22 +2421,16 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) ByteCount namelen; sectorsize = hfsmp->hfs_phys_block_size; - retval = bread(hfsmp->hfs_devvp, HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp); + retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp); if (retval) { if (bp) - brelse(bp); + buf_brelse(bp); return retval; } - DBG_ASSERT(bp != NULL); - DBG_ASSERT(bp->b_data != NULL); - DBG_ASSERT(bp->b_bcount == size); - - if (hfsmp->jnl) { - panic("hfs: standard hfs volumes should not be journaled!\n"); - } + lck_mtx_lock(&hfsmp->hfs_mutex); - mdb = (HFSMasterDirectoryBlock *)(bp->b_data + HFS_PRI_OFFSET(sectorsize)); + mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize)); mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbCrDate))); mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod))); @@ -2617,6 +2464,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount); mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize); mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; fp = VTOF(vcb->catalogRefNum); mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock); @@ -2627,28 +2475,28 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount); mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize); mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; + + MarkVCBClean( vcb ); + + lck_mtx_unlock(&hfsmp->hfs_mutex); /* If requested, flush out the alternate MDB */ if (altflush) { struct buf *alt_bp = NULL; - u_long altIDSector; - - altIDSector = HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); - if (meta_bread(hfsmp->hfs_devvp, altIDSector, sectorsize, NOCRED, &alt_bp) == 0) { - bcopy(mdb, alt_bp->b_data + HFS_ALT_OFFSET(sectorsize), kMDBSize); + if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) { + bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize); - (void) VOP_BWRITE(alt_bp); + (void) VNOP_BWRITE(alt_bp); } else if (alt_bp) - brelse(alt_bp); + buf_brelse(alt_bp); } if (waitfor != MNT_WAIT) - bawrite(bp); + buf_bawrite(bp); else - retval = VOP_BWRITE(bp); - - MarkVCBClean( vcb ); + retval = VNOP_BWRITE(bp); return (retval); } @@ -2672,10 +2520,10 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) struct buf *bp; int i; int sectorsize; - int priIDSector; + daddr64_t priIDSector; int critical = 0; u_int16_t signature; - u_int16_t version; + u_int16_t hfsversion; if (hfsmp->hfs_flags & HFS_READ_ONLY) { return(0); @@ -2686,27 +2534,19 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) if (altflush) critical = 1; sectorsize = hfsmp->hfs_phys_block_size; - priIDSector = (vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_PRI_SECTOR(sectorsize); + priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + + HFS_PRI_SECTOR(sectorsize)); - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); - return EINVAL; - } + if (hfs_start_transaction(hfsmp) != 0) { + return EINVAL; } - retval = meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp); if (retval) { if (bp) - brelse(bp); + buf_brelse(bp); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); printf("HFS: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); return (retval); @@ -2716,23 +2556,23 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) journal_modify_block_start(hfsmp->jnl, bp); } - volumeHeader = (HFSPlusVolumeHeader *)((char *)bp->b_data + HFS_PRI_OFFSET(sectorsize)); + volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize)); /* * Sanity check what we just read. */ signature = SWAP_BE16 (volumeHeader->signature); - version = SWAP_BE16 (volumeHeader->version); + hfsversion = SWAP_BE16 (volumeHeader->version); if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || - (version < kHFSPlusVersion) || (version > 100) || + (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { #if 1 panic("HFS: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d", - vcb->vcbVN, signature, version, + vcb->vcbVN, signature, hfsversion, SWAP_BE32 (volumeHeader->blockSize)); #endif printf("HFS: corrupt VH blk (%s)\n", vcb->vcbVN); - brelse(bp); + buf_brelse(bp); return (EIO); } @@ -2745,42 +2585,44 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) struct buf *bp2; HFSMasterDirectoryBlock *mdb; - retval = meta_bread(hfsmp->hfs_devvp, HFS_PRI_SECTOR(sectorsize), + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp2); if (retval) { if (bp2) - brelse(bp2); + buf_brelse(bp2); retval = 0; } else { - mdb = (HFSMasterDirectoryBlock *)(bp2->b_data + + mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) + HFS_PRI_OFFSET(sectorsize)); if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate ) { - // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, bp2); } mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */ - // XXXdbg if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp2); } else { - (void) VOP_BWRITE(bp2); /* write out the changes */ + (void) VNOP_BWRITE(bp2); /* write out the changes */ } } else { - brelse(bp2); /* just release it */ + buf_brelse(bp2); /* just release it */ } } } + if (1 /* hfsmp->jnl == 0 */) { + lck_mtx_lock(&hfsmp->hfs_mutex); + } + /* Note: only update the lower 16 bits worth of attributes */ - volumeHeader->attributes = SWAP_BE32 ((SWAP_BE32 (volumeHeader->attributes) & 0xFFFF0000) + (UInt16) vcb->vcbAtrb); - volumeHeader->journalInfoBlock = SWAP_BE32(vcb->vcbJinfoBlock); + volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb); + volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock); if (hfsmp->jnl) { volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion); } else { @@ -2791,6 +2633,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp)); volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt); volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt); + volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks); volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks); volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation); volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz); @@ -2799,92 +2642,113 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt); volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap); - if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) + if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) { + bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)); critical = 1; - bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)); + } /* Sync Extents over-flow file meta data */ fp = VTOF(vcb->extentsRefNum); - for (i = 0; i < kHFSPlusExtentDensity; i++) { - volumeHeader->extentsFile.extents[i].startBlock = - SWAP_BE32 (fp->ff_extents[i].startBlock); - volumeHeader->extentsFile.extents[i].blockCount = - SWAP_BE32 (fp->ff_extents[i].blockCount); + if (FTOC(fp)->c_flag & C_MODIFIED) { + for (i = 0; i < kHFSPlusExtentDensity; i++) { + volumeHeader->extentsFile.extents[i].startBlock = + SWAP_BE32 (fp->ff_extents[i].startBlock); + volumeHeader->extentsFile.extents[i].blockCount = + SWAP_BE32 (fp->ff_extents[i].blockCount); + } + volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size); + volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); + volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; } - FTOC(fp)->c_flag &= ~C_MODIFIED; - volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size); - volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); - volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); /* Sync Catalog file meta data */ fp = VTOF(vcb->catalogRefNum); - for (i = 0; i < kHFSPlusExtentDensity; i++) { - volumeHeader->catalogFile.extents[i].startBlock = - SWAP_BE32 (fp->ff_extents[i].startBlock); - volumeHeader->catalogFile.extents[i].blockCount = - SWAP_BE32 (fp->ff_extents[i].blockCount); + if (FTOC(fp)->c_flag & C_MODIFIED) { + for (i = 0; i < kHFSPlusExtentDensity; i++) { + volumeHeader->catalogFile.extents[i].startBlock = + SWAP_BE32 (fp->ff_extents[i].startBlock); + volumeHeader->catalogFile.extents[i].blockCount = + SWAP_BE32 (fp->ff_extents[i].blockCount); + } + volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size); + volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); + volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; } - FTOC(fp)->c_flag &= ~C_MODIFIED; - volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size); - volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); - volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); /* Sync Allocation file meta data */ fp = VTOF(vcb->allocationsRefNum); - for (i = 0; i < kHFSPlusExtentDensity; i++) { - volumeHeader->allocationFile.extents[i].startBlock = - SWAP_BE32 (fp->ff_extents[i].startBlock); - volumeHeader->allocationFile.extents[i].blockCount = - SWAP_BE32 (fp->ff_extents[i].blockCount); + if (FTOC(fp)->c_flag & C_MODIFIED) { + for (i = 0; i < kHFSPlusExtentDensity; i++) { + volumeHeader->allocationFile.extents[i].startBlock = + SWAP_BE32 (fp->ff_extents[i].startBlock); + volumeHeader->allocationFile.extents[i].blockCount = + SWAP_BE32 (fp->ff_extents[i].blockCount); + } + volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size); + volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); + volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); + FTOC(fp)->c_flag &= ~C_MODIFIED; + } + + /* Sync Attribute file meta data */ + if (hfsmp->hfs_attribute_vp) { + fp = VTOF(hfsmp->hfs_attribute_vp); + for (i = 0; i < kHFSPlusExtentDensity; i++) { + volumeHeader->attributesFile.extents[i].startBlock = + SWAP_BE32 (fp->ff_extents[i].startBlock); + volumeHeader->attributesFile.extents[i].blockCount = + SWAP_BE32 (fp->ff_extents[i].blockCount); + } + FTOC(fp)->c_flag &= ~C_MODIFIED; + volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size); + volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); + volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); + } + + vcb->vcbFlags &= 0x00FF; + + if (1 /* hfsmp->jnl == 0 */) { + lck_mtx_unlock(&hfsmp->hfs_mutex); } - FTOC(fp)->c_flag &= ~C_MODIFIED; - volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size); - volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks); - volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize); /* If requested, flush out the alternate volume header */ - if (altflush) { + if (altflush && hfsmp->hfs_alt_id_sector) { struct buf *alt_bp = NULL; - u_long altIDSector; - - altIDSector = (vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); - if (meta_bread(hfsmp->hfs_devvp, altIDSector, sectorsize, NOCRED, &alt_bp) == 0) { + if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) { if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, alt_bp); } - bcopy(volumeHeader, alt_bp->b_data + HFS_ALT_OFFSET(sectorsize), kMDBSize); + bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize); if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, alt_bp); } else { - (void) VOP_BWRITE(alt_bp); + (void) VNOP_BWRITE(alt_bp); } } else if (alt_bp) - brelse(alt_bp); + buf_brelse(alt_bp); } - // XXXdbg if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp); - journal_end_transaction(hfsmp->jnl); } else { if (waitfor != MNT_WAIT) - bawrite(bp); + buf_bawrite(bp); else { - retval = VOP_BWRITE(bp); + retval = VNOP_BWRITE(bp); /* When critical data changes, flush the device cache */ if (critical && (retval == 0)) { - (void) VOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, - NULL, FWRITE, NOCRED, current_proc()); + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, + NULL, FWRITE, NULL); } } } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); - vcb->vcbFlags &= 0x00FF; return (retval); } @@ -2892,26 +2756,29 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) /* * Extend a file system. */ -static int -hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) +__private_extern__ +int +hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) { + struct proc *p = vfs_context_proc(context); + kauth_cred_t cred = vfs_context_ucred(context); struct vnode *vp; struct vnode *devvp; struct buf *bp; - struct hfsmount *hfsmp; struct filefork *fp = NULL; ExtendedVCB *vcb; struct cat_fork forkdata; u_int64_t oldsize; u_int64_t newblkcnt; + u_int64_t prev_phys_block_count; u_int32_t addblks; u_int64_t sectorcnt; u_int32_t sectorsize; - daddr_t prev_alt_sector; - daddr_t bitmapblks; + daddr64_t prev_alt_sector; + daddr_t bitmapblks; + int lockflags; int error; - hfsmp = VFSTOHFS(mp); devvp = hfsmp->hfs_devvp; vcb = HFSTOVCB(hfsmp); @@ -2929,44 +2796,43 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) * If extending file system by non-root, then verify * ownership and check permissions. */ - if (p->p_ucred->cr_uid != 0) { - error = hfs_root(mp, &vp); + if (suser(cred, NULL)) { + error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0); + if (error) return (error); - error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, p->p_ucred, p, 0); + error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0); if (error == 0) { - error = hfs_write_access(vp, p->p_ucred, p, false); + error = hfs_write_access(vp, cred, p, false); } - vput(vp); + hfs_unlock(VTOC(vp)); + vnode_put(vp); if (error) return (error); - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); - VOP_UNLOCK(devvp, 0, p); + error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context); if (error) return (error); } - if (VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§orsize, 0, FSCRED, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§orsize, 0, context)) { return (ENXIO); } if (sectorsize != hfsmp->hfs_phys_block_size) { return (ENXIO); } - if (VOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§orcnt, 0, FSCRED, p)) { + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§orcnt, 0, context)) { return (ENXIO); } if ((sectorsize * sectorcnt) < newsize) { printf("hfs_extendfs: not enough space on device\n"); return (ENOSPC); } - oldsize = (u_int64_t)hfsmp->hfs_phys_block_count * - (u_int64_t)hfsmp->hfs_phys_block_size; + oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; /* * Validate new size. */ - if ((newsize <= oldsize) || (newsize % vcb->blockSize)) { + if ((newsize <= oldsize) || (newsize % sectorsize)) { printf("hfs_extendfs: invalid size\n"); return (EINVAL); } @@ -2980,23 +2846,12 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) /* * Enclose changes inside a transaction. */ - hfs_global_shared_lock_acquire(hfsmp); - if (journal_start_transaction(hfsmp->jnl) != 0) { - hfs_global_shared_lock_release(hfsmp); + if (hfs_start_transaction(hfsmp) != 0) { return (EINVAL); } - /* - * Remember the location of existing alternate VH. - */ - prev_alt_sector = (vcb->hfsPlusIOPosOffset / sectorsize) + - HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); - + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); vp = vcb->allocationsRefNum; - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) { - goto out2; - } fp = VTOF(vp); bcopy(&fp->ff_data, &forkdata, sizeof(forkdata)); @@ -3004,13 +2859,13 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) * Calculate additional space required (if any) by allocation bitmap. */ bitmapblks = roundup(newblkcnt / 8, vcb->vcbVBMIOSize) / vcb->blockSize; - if (bitmapblks > fp->ff_blocks) + if (bitmapblks > (daddr_t)fp->ff_blocks) bitmapblks -= fp->ff_blocks; else bitmapblks = 0; if (bitmapblks > 0) { - daddr_t blkno; + daddr64_t blkno; daddr_t blkcnt; /* @@ -3022,7 +2877,7 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) goto out; } blkcnt = bitmapblks; - blkno = fp->ff_blocks; + blkno = (daddr64_t)fp->ff_blocks; fp->ff_blocks += bitmapblks; fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; VTOC(vp)->c_blocks = fp->ff_blocks; @@ -3033,16 +2888,16 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) bp = NULL; while (blkcnt > 0) { - error = meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); + error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp); if (error) { if (bp) { - brelse(bp); + buf_brelse(bp); } break; } - bzero((char *)bp->b_data, vcb->blockSize); - bp->b_flags |= B_AGE; - error = bwrite(bp); + bzero((char *)buf_dataptr(bp), vcb->blockSize); + buf_markaged(bp); + error = (int)buf_bwrite(bp); if (error) break; --blkcnt; @@ -3080,14 +2935,17 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2); else (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1); - /* * Adjust file system variables for new space. */ + prev_phys_block_count = hfsmp->hfs_phys_block_count; + prev_alt_sector = hfsmp->hfs_alt_id_sector; + vcb->totalBlocks += addblks; vcb->freeBlocks += addblks - bitmapblks; hfsmp->hfs_phys_block_count = newsize / sectorsize; - + hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) + + HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count); MarkVCBDirty(vcb); error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); if (error) { @@ -3098,7 +2956,8 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize; vcb->totalBlocks -= addblks; vcb->freeBlocks -= addblks - bitmapblks; - hfsmp->hfs_phys_block_count = oldsize / sectorsize; + hfsmp->hfs_phys_block_count = prev_phys_block_count; + hfsmp->hfs_alt_id_sector = prev_alt_sector; MarkVCBDirty(vcb); if (vcb->blockSize == 512) (void) BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2); @@ -3110,13 +2969,17 @@ hfs_extendfs(struct mount *mp, u_int64_t newsize, struct proc *p) * Invalidate the old alternate volume header. */ bp = NULL; - if (meta_bread(hfsmp->hfs_devvp, prev_alt_sector, sectorsize, - NOCRED, &bp) == 0) { - journal_modify_block_start(hfsmp->jnl, bp); - bzero(bp->b_data + HFS_ALT_OFFSET(sectorsize), kMDBSize); - journal_modify_block_end(hfsmp->jnl, bp); - } else if (bp) { - brelse(bp); + if (prev_alt_sector) { + if (buf_meta_bread(hfsmp->hfs_devvp, prev_alt_sector, sectorsize, + NOCRED, &bp) == 0) { + journal_modify_block_start(hfsmp->jnl, bp); + + bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize); + + journal_modify_block_end(hfsmp->jnl, bp); + } else if (bp) { + buf_brelse(bp); + } } out: if (error && fp) { @@ -3125,14 +2988,601 @@ out: VTOC(vp)->c_blocks = fp->ff_blocks; } - VOP_UNLOCK(vp, 0, p); -out2: - journal_end_transaction(hfsmp->jnl); - hfs_global_shared_lock_release(hfsmp); + hfs_systemfile_unlock(hfsmp, lockflags); + hfs_end_transaction(hfsmp); + + return (error); +} + +#define HFS_MIN_SIZE (32LL * 1024LL * 1024LL) + +/* + * Truncate a file system (while still mounted). + */ +__private_extern__ +int +hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, __unused vfs_context_t context) +{ + struct vnode* rvp = NULL; + struct buf *bp = NULL; + u_int64_t oldsize; + u_int32_t newblkcnt; + u_int32_t reclaimblks; + int lockflags = 0; + int transaction_begun = 0; + int error; + + /* + * Grab the root vnode to serialize with another hfs_truncatefs call. + */ + error = hfs_vget(hfsmp, kHFSRootFolderID, &rvp, 0); + if (error) { + return (error); + } + /* + * - HFS Plus file systems only. + * - Journaling must be enabled. + * - No embedded volumes. + */ + if ((hfsmp->hfs_flags & HFS_STANDARD) || + (hfsmp->jnl == NULL) || + (hfsmp->hfsPlusIOPosOffset != 0)) { + error = EPERM; + goto out; + } + oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; + newblkcnt = newsize / hfsmp->blockSize; + reclaimblks = hfsmp->totalBlocks - newblkcnt; + + /* Make sure new size is valid. */ + if ((newsize < HFS_MIN_SIZE) || + (newsize >= oldsize) || + (newsize % hfsmp->hfs_phys_block_size)) { + error = EINVAL; + goto out; + } + /* Make sure there's enough space to work with. */ + if (reclaimblks > (hfsmp->freeBlocks / 4)) { + error = ENOSPC; + goto out; + } + + printf("hfs_truncatefs: shrinking %s by %d blocks out of %d\n", + hfsmp->vcbVN, reclaimblks, hfsmp->totalBlocks); + + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + transaction_begun = 1; + + /* + * Look for files that have blocks beyond newblkcnt. + */ + if (hfs_isallocated(hfsmp, newblkcnt, reclaimblks - 1)) { + /* + * hfs_reclaimspace will use separate transactions when + * relocating files (so we don't overwhelm the journal). + */ + hfs_end_transaction(hfsmp); + transaction_begun = 0; + + /* Attempt to reclaim some space. */ + if (hfs_reclaimspace(hfsmp, newblkcnt) != 0) { + printf("hfs_truncatefs: couldn't reclaim space on %s\n", hfsmp->vcbVN); + error = ENOSPC; + goto out; + } + if (hfs_start_transaction(hfsmp) != 0) { + error = EINVAL; + goto out; + } + transaction_begun = 1; + + /* Check if we're clear now. */ + if (hfs_isallocated(hfsmp, newblkcnt, reclaimblks - 1)) { + printf("hfs_truncatefs: didn't reclaim enough space on %s\n", hfsmp->vcbVN); + error = ENOSPC; + goto out; + } + } + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* + * Mark the old alternate volume header as free. + * We don't bother shrinking allocation bitmap file. + */ + if (hfsmp->blockSize == 512) + (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2); + else + (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1); + + /* + * Allocate last block for alternate volume header. + */ + if (hfsmp->blockSize == 512) + error = BlockMarkAllocated(hfsmp, newblkcnt - 2, 2); + else + error = BlockMarkAllocated(hfsmp, newblkcnt - 1, 1); + + if (error) { + goto out; + } + /* + * Invalidate the existing alternate volume header. + */ + if (hfsmp->hfs_alt_id_sector) { + if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, + hfsmp->hfs_phys_block_size, NOCRED, &bp) == 0) { + journal_modify_block_start(hfsmp->jnl, bp); + + bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_phys_block_size)), kMDBSize); + + journal_modify_block_end(hfsmp->jnl, bp); + } else if (bp) { + buf_brelse(bp); + } + bp = NULL; + } + + /* + * Adjust file system variables and flush them to disk. + */ + hfsmp->freeBlocks -= hfsmp->totalBlocks - newblkcnt; + hfsmp->totalBlocks = newblkcnt; + hfsmp->hfs_phys_block_count = newsize / hfsmp->hfs_phys_block_size; + hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, hfsmp->hfs_phys_block_count); + MarkVCBDirty(hfsmp); + error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + if (error) + panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); +out: + if (lockflags) { + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (transaction_begun) { + hfs_end_transaction(hfsmp); + } + if (rvp) { + hfs_unlock(VTOC(rvp)); + vnode_put(rvp); + } return (error); } +/* + * Reclaim space at the end of a file system. + */ +static int +hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk) +{ + struct vnode *vp = NULL; + FCB *fcb; + struct BTreeIterator * iterator = NULL; + struct FSBufferDescriptor btdata; + struct HFSPlusCatalogFile filerec; + u_int32_t saved_next_allocation; + cnid_t * cnidbufp; + size_t cnidbufsize; + int filecnt; + int maxfilecnt; + u_long block; + int lockflags; + int i; + int error; + + /* + * Check if Attributes file overlaps. + */ + if (hfsmp->hfs_attribute_vp) { + struct filefork *fp; + + fp = VTOF(hfsmp->hfs_attribute_vp); + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + block = fp->ff_extents[i].startBlock + + fp->ff_extents[i].blockCount; + if (block >= startblk) { + printf("hfs_reclaimspace: Attributes file can't move\n"); + return (EPERM); + } + } + } + + /* For now we'll move a maximum of 16,384 files. */ + maxfilecnt = MIN(hfsmp->hfs_filecount, 16384); + cnidbufsize = maxfilecnt * sizeof(cnid_t); + if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) { + return (ENOMEM); + } + if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) { + kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize); + return (ENOMEM); + } + + saved_next_allocation = hfsmp->nextAllocation; + hfsmp->nextAllocation = hfsmp->hfs_metazone_start; + + fcb = VTOF(hfsmp->hfs_catalog_vp); + bzero(iterator, sizeof(*iterator)); + + btdata.bufferAddress = &filerec; + btdata.itemSize = sizeof(filerec); + btdata.itemCount = 1; + + /* Keep the Catalog file locked during iteration. */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = BTIterateRecord(fcb, kBTreeFirstRecord, iterator, NULL, NULL); + if (error) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; + } + + /* + * Iterate over all the catalog records looking for files + * that overlap into the space we're trying to free up. + */ + for (filecnt = 0; filecnt < maxfilecnt; ) { + error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); + if (error) { + if (error == btNotFound) + error = 0; + break; + } + if (filerec.recordType != kHFSPlusFileRecord || + filerec.fileID == hfsmp->hfs_jnlfileid) + continue; + /* + * Check if either fork overlaps target space. + */ + for (i = 0; i < kHFSPlusExtentDensity; ++i) { + block = filerec.dataFork.extents[i].startBlock + + filerec.dataFork.extents[i].blockCount; + if (block >= startblk) { + if (filerec.fileID == hfsmp->hfs_jnlfileid) { + printf("hfs_reclaimspace: cannot move active journal\n"); + error = EPERM; + break; + } + cnidbufp[filecnt++] = filerec.fileID; + break; + } + block = filerec.resourceFork.extents[i].startBlock + + filerec.resourceFork.extents[i].blockCount; + if (block >= startblk) { + cnidbufp[filecnt++] = filerec.fileID; + break; + } + } + } + /* All done with catalog. */ + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) + goto out; + + /* Now move any files that are in the way. */ + for (i = 0; i < filecnt; ++i) { + struct vnode * rvp; + + if (hfs_vget(hfsmp, cnidbufp[i], &vp, 0) != 0) + continue; + + /* Relocate any data fork blocks. */ + if (VTOF(vp)->ff_blocks > 0) { + error = hfs_relocate(vp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); + } + hfs_unlock(VTOC(vp)); + if (error) + break; + + /* Relocate any resource fork blocks. */ + if ((VTOC((vp))->c_blocks - VTOF((vp))->ff_blocks) > 0) { + error = hfs_vgetrsrc(hfsmp, vp, &rvp, current_proc()); + if (error) + break; + hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK); + error = hfs_relocate(rvp, hfsmp->hfs_metazone_end + 1, kauth_cred_get(), current_proc()); + hfs_unlock(VTOC(rvp)); + vnode_put(rvp); + if (error) + break; + } + vnode_put(vp); + vp = NULL; + } + if (vp) { + vnode_put(vp); + vp = NULL; + } + + /* + * Note: this implementation doesn't handle overflow extents. + */ +out: + kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator)); + kmem_free(kernel_map, (vm_offset_t)cnidbufp, cnidbufsize); + + /* On errors restore the roving allocation pointer. */ + if (error) { + hfsmp->nextAllocation = saved_next_allocation; + } + return (error); +} + + +/* + * Get file system attributes. + */ +static int +hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) +{ + ExtendedVCB *vcb = VFSTOVCB(mp); + struct hfsmount *hfsmp = VFSTOHFS(mp); + u_long freeCNIDs; + + freeCNIDs = (u_long)0xFFFFFFFF - (u_long)hfsmp->vcbNxtCNID; + + VFSATTR_RETURN(fsap, f_objcount, (uint64_t)hfsmp->vcbFilCnt + (uint64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_filecount, (uint64_t)hfsmp->vcbFilCnt); + VFSATTR_RETURN(fsap, f_dircount, (uint64_t)hfsmp->vcbDirCnt); + VFSATTR_RETURN(fsap, f_maxobjcount, (uint64_t)0xFFFFFFFF); + VFSATTR_RETURN(fsap, f_iosize, (size_t)(MAX_UPL_TRANSFER * PAGE_SIZE)); + VFSATTR_RETURN(fsap, f_blocks, (uint64_t)hfsmp->totalBlocks); + VFSATTR_RETURN(fsap, f_bfree, (uint64_t)hfs_freeblks(hfsmp, 0)); + VFSATTR_RETURN(fsap, f_bavail, (uint64_t)hfs_freeblks(hfsmp, 1)); + VFSATTR_RETURN(fsap, f_bsize, (uint32_t)vcb->blockSize); + /* XXX needs clarification */ + VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1)); + /* Maximum files is constrained by total blocks. */ + VFSATTR_RETURN(fsap, f_files, (uint64_t)(hfsmp->totalBlocks - 2)); + VFSATTR_RETURN(fsap, f_ffree, MIN((uint64_t)freeCNIDs, (uint64_t)hfs_freeblks(hfsmp, 1))); + + fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev; + fsap->f_fsid.val[1] = vfs_typenum(mp); + VFSATTR_SET_SUPPORTED(fsap, f_fsid); + + VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord); + VFSATTR_RETURN(fsap, f_carbon_fsid, 0); + + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + vol_capabilities_attr_t *cap; + + cap = &fsap->f_capabilities; + + if (hfsmp->hfs_flags & HFS_STANDARD) { + cap->capabilities[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS; + } else { + cap->capabilities[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_JOURNAL | + (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) | + (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; + } + cap->capabilities[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK; + cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0; + cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0; + + cap->valid[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_JOURNAL | + VOL_CAP_FMT_JOURNAL_ACTIVE | + VOL_CAP_FMT_NO_ROOT_TIMES | + VOL_CAP_FMT_SPARSE_FILES | + VOL_CAP_FMT_ZERO_RUNS | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; + cap->valid[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_COPYFILE | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK; + cap->valid[VOL_CAPABILITIES_RESERVED1] = 0; + cap->valid[VOL_CAPABILITIES_RESERVED2] = 0; + VFSATTR_SET_SUPPORTED(fsap, f_capabilities); + } + if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { + vol_attributes_attr_t *attrp = &fsap->f_attributes; + + attrp->validattr.commonattr = ATTR_CMN_VALIDMASK; + attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; + attrp->validattr.dirattr = ATTR_DIR_VALIDMASK; + attrp->validattr.fileattr = ATTR_FILE_VALIDMASK; + attrp->validattr.forkattr = 0; + + attrp->nativeattr.commonattr = ATTR_CMN_VALIDMASK; + attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO; + attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK; + attrp->nativeattr.fileattr = ATTR_FILE_VALIDMASK; + attrp->nativeattr.forkattr = 0; + VFSATTR_SET_SUPPORTED(fsap, f_attributes); + } + fsap->f_create_time.tv_sec = hfsmp->vcbCrDate; + fsap->f_create_time.tv_nsec = 0; + VFSATTR_SET_SUPPORTED(fsap, f_create_time); + fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod; + fsap->f_modify_time.tv_nsec = 0; + VFSATTR_SET_SUPPORTED(fsap, f_modify_time); + + fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp; + fsap->f_backup_time.tv_nsec = 0; + VFSATTR_SET_SUPPORTED(fsap, f_backup_time); + if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) { + uint16_t subtype = 0; + + /* + * Subtypes (flavors) for HFS + * 0: Mac OS Extended + * 1: Mac OS Extended (Journaled) + * 2: Mac OS Extended (Case Sensitive) + * 3: Mac OS Extended (Case Sensitive, Journaled) + * 4 - 127: Reserved + * 128: Mac OS Standard + * + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + subtype = HFS_SUBTYPE_STANDARDHFS; + } else /* HFS Plus */ { + if (hfsmp->jnl) + subtype |= HFS_SUBTYPE_JOURNALED; + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + subtype |= HFS_SUBTYPE_CASESENSITIVE; + } + fsap->f_fssubtype = subtype; + VFSATTR_SET_SUPPORTED(fsap, f_fssubtype); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { + strncpy(fsap->f_vol_name, hfsmp->vcbVN, MAXPATHLEN); + fsap->f_vol_name[MAXPATHLEN - 1] = 0; + VFSATTR_SET_SUPPORTED(fsap, f_vol_name); + } + return (0); +} + +/* + * Perform a volume rename. Requires the FS' root vp. + */ +static int +hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) +{ + ExtendedVCB *vcb = VTOVCB(vp); + struct cnode *cp = VTOC(vp); + struct hfsmount *hfsmp = VTOHFS(vp); + struct cat_desc to_desc; + struct cat_desc todir_desc; + struct cat_desc new_desc; + cat_cookie_t cookie; + int lockflags; + int error = 0; + + /* + * Ignore attempts to rename a volume to a zero-length name. + */ + if (name[0] == 0) + return(0); + + bzero(&to_desc, sizeof(to_desc)); + bzero(&todir_desc, sizeof(todir_desc)); + bzero(&new_desc, sizeof(new_desc)); + bzero(&cookie, sizeof(cookie)); + + todir_desc.cd_parentcnid = kHFSRootParentID; + todir_desc.cd_cnid = kHFSRootFolderID; + todir_desc.cd_flags = CD_ISDIR; + + to_desc.cd_nameptr = name; + to_desc.cd_namelen = strlen(name); + to_desc.cd_parentcnid = kHFSRootParentID; + to_desc.cd_cnid = cp->c_cnid; + to_desc.cd_flags = CD_ISDIR; + + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) { + if ((error = hfs_start_transaction(hfsmp)) == 0) { + if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc); + + /* + * If successful, update the name in the VCB, ensure it's terminated. + */ + if (!error) { + strncpy(vcb->vcbVN, name, sizeof(vcb->vcbVN)); + vcb->vcbVN[sizeof(vcb->vcbVN) - 1] = 0; + } + + hfs_systemfile_unlock(hfsmp, lockflags); + cat_postflight(hfsmp, &cookie, p); + + if (error) + vcb->vcbFlags |= 0xFF00; + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); + } + hfs_end_transaction(hfsmp); + } + if (!error) { + /* Release old allocated name buffer */ + if (cp->c_desc.cd_flags & CD_HASBUF) { + char *name = cp->c_desc.cd_nameptr; + + cp->c_desc.cd_nameptr = 0; + cp->c_desc.cd_namelen = 0; + cp->c_desc.cd_flags &= ~CD_HASBUF; + vfs_removename(name); + } + /* Update cnode's catalog descriptor */ + replace_desc(cp, &new_desc); + vcb->volumeNameEncodingHint = new_desc.cd_encoding; + cp->c_touch_chgtime = TRUE; + } + + hfs_unlock(cp); + } + + return(error); +} + +/* + * Get file system attributes. + */ +static int +hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) +{ + kauth_cred_t cred = vfs_context_ucred(context); + int error = 0; + + /* + * Must be superuser or owner of filesystem to change volume attributes + */ + if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner)) + return(EACCES); + + if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { + vnode_t root_vp; + + error = hfs_vfs_root(mp, &root_vp, context); + if (error) + goto out; + + error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context)); + (void) vnode_put(root_vp); + if (error) + goto out; + + VFSATTR_SET_SUPPORTED(fsap, f_vol_name); + } + +out: + return error; +} + /* * hfs vfs operations. @@ -3141,13 +3591,14 @@ struct vfsops hfs_vfsops = { hfs_mount, hfs_start, hfs_unmount, - hfs_root, + hfs_vfs_root, hfs_quotactl, - hfs_statfs, + hfs_vfs_getattr, /* was hfs_statfs */ hfs_sync, - hfs_vget, + hfs_vfs_vget, hfs_fhtovp, hfs_vptofh, hfs_init, - hfs_sysctl + hfs_sysctl, + hfs_vfs_setattr }; diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 68263e6b4..c35236e69 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,11 +32,11 @@ #include #include #include -#include -#include #include #include #include +#include +#include #include "hfs.h" #include "hfs_catalog.h" @@ -50,7 +50,7 @@ #include "hfscommon/headers/HFSUnicodeWrappers.h" -extern int count_lock_queue __P((void)); +extern int count_lock_queue(void); static void ReleaseMetaFileVNode(struct vnode *vp); @@ -63,11 +63,8 @@ static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); u_int32_t GetLogicalBlockSize(struct vnode *vp); -/* BTree accessor routines */ -extern OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions options, BlockDescriptor *block); -extern OSStatus SetBTreeBlockSize(FileReference vp, ByteCount blockSize, ItemCount minBlockCount); -extern OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF); -extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, ReleaseBlockOptions options); +extern int hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey); + //******************************************************************************* // Note: Finder information in the HFS/HFS+ metadata are considered opaque and @@ -83,6 +80,7 @@ extern OSStatus ReleaseBTreeBlock(FileReference vp, BlockDescPtr blockPtr, Relea char hfs_catname[] = "Catalog B-tree"; char hfs_extname[] = "Extents B-tree"; char hfs_vbmname[] = "Volume Bitmap"; +char hfs_attrname[] = "Attribute B-tree"; char hfs_privdirname[] = "\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80\xE2\x90\x80HFS+ Private Data"; @@ -149,10 +147,11 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size); vcb->vcbVBMIOSize = kHFSBlockSize; - VCB_LOCK_INIT(vcb); + hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, + hfsmp->hfs_phys_block_count); bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_parentcnid = kRootParID; + cndesc.cd_parentcnid = kHFSRootParentID; cndesc.cd_flags |= CD_ISMETA; bzero(&cnattr, sizeof(cnattr)); cnattr.ca_nlink = 1; @@ -177,13 +176,13 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount); cnattr.ca_blocks = fork.cf_blocks; - error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork, - &vcb->extentsRefNum); + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_extents_vp); if (error) goto MtVolErr; - error = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum), + error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp), (KeyCompareProcPtr)CompareExtentKeys)); if (error) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } @@ -205,17 +204,34 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount); cnattr.ca_blocks = fork.cf_blocks; - error = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &fork, - &vcb->catalogRefNum); + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_catalog_vp); if (error) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } - error = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum), + error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), (KeyCompareProcPtr)CompareCatalogKeys)); if (error) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + goto MtVolErr; + } + + /* + * Set up dummy Allocation file vnode (used only for locking bitmap) + */ + cndesc.cd_nameptr = hfs_vbmname; + cndesc.cd_namelen = strlen(hfs_vbmname); + cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID; + bzero(&fork, sizeof(fork)); + cnattr.ca_blocks = 0; + + error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork, + &hfsmp->hfs_allocation_vp); + if (error) { + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto MtVolErr; } @@ -223,10 +239,11 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; /* - * all done with b-trees so we can unlock now... + * all done with system files so we can unlock now... */ - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); if ( error == noErr ) { @@ -239,8 +256,8 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, //-- Release any resources allocated so far before exiting with an error: MtVolErr: - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); + ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); + ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); CmdDone: return (error); @@ -254,14 +271,14 @@ CmdDone: __private_extern__ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args) + off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args, kauth_cred_t cred) { register ExtendedVCB *vcb; struct cat_desc cndesc; struct cat_attr cnattr; struct cat_fork cfork; UInt32 blockSize; - u_int64_t volumesize; + daddr64_t spare_sectors; struct BTreeInfoRec btinfo; u_int16_t signature; u_int16_t version; @@ -285,7 +302,9 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, signature = kHFSPlusSigWord; hfsmp->hfs_flags |= HFS_X; } else { - printf("hfs_mount: invalid HFS+ sig 0x%04x\n", signature); + /* Removed printf for invalid HFS+ signature because it gives + * false error for UFS root volume + */ return (EINVAL); } @@ -314,7 +333,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, vcb->vcbSigWord = signature; vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock); vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate)); - vcb->vcbAtrb = (UInt16)SWAP_BE32(vhp->attributes); + vcb->vcbAtrb = SWAP_BE32(vhp->attributes); vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize); vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID); vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate)); @@ -329,8 +348,6 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */ - VCB_LOCK_INIT(vcb); - /* Now fill in the Extended VCB info */ vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation); vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks); @@ -352,8 +369,23 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size); vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO); + /* + * Validate and initialize the location of the alternate volume header. + */ + spare_sectors = hfsmp->hfs_phys_block_count - + (((daddr64_t)vcb->totalBlocks * blockSize) / + hfsmp->hfs_phys_block_size); + + if (spare_sectors > (blockSize / hfsmp->hfs_phys_block_size)) { + hfsmp->hfs_alt_id_sector = 0; /* partition has grown! */ + } else { + hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size) + + HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, + hfsmp->hfs_phys_block_count); + } + bzero(&cndesc, sizeof(cndesc)); - cndesc.cd_parentcnid = kRootParID; + cndesc.cd_parentcnid = kHFSRootParentID; cndesc.cd_flags |= CD_ISMETA; bzero(&cnattr, sizeof(cnattr)); cnattr.ca_nlink = 1; @@ -377,14 +409,14 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, cfork.cf_extents[i].blockCount = SWAP_BE32 (vhp->extentsFile.extents[i].blockCount); } - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork, - &vcb->extentsRefNum); + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_extents_vp); if (retval) goto ErrorExit; - retval = MacToVFSError(BTOpenPath(VTOF(vcb->extentsRefNum), + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp), (KeyCompareProcPtr) CompareExtentKeysPlus)); if (retval) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto ErrorExit; } @@ -406,25 +438,25 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, cfork.cf_extents[i].blockCount = SWAP_BE32 (vhp->catalogFile.extents[i].blockCount); } - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork, - &vcb->catalogRefNum); + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_catalog_vp); if (retval) { - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto ErrorExit; } - retval = MacToVFSError(BTOpenPath(VTOF(vcb->catalogRefNum), + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), (KeyCompareProcPtr) CompareExtendedCatalogKeys)); if (retval) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto ErrorExit; } if ((hfsmp->hfs_flags & HFS_X) && - BTGetInformation(VTOF(vcb->catalogRefNum), 0, &btinfo) == 0) { + BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) { if (btinfo.keyCompareType == kHFSBinaryCompare) { hfsmp->hfs_flags |= HFS_CASE_SENSITIVE; /* Install a case-sensitive key compare */ - (void) BTOpenPath(VTOF(vcb->catalogRefNum), + (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp), (KeyCompareProcPtr)cat_binarykeycompare); } } @@ -447,20 +479,59 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, cfork.cf_extents[i].blockCount = SWAP_BE32 (vhp->allocationFile.extents[i].blockCount); } - retval = hfs_getnewvnode(hfsmp, NULL, &cndesc, 0, &cnattr, &cfork, - &vcb->allocationsRefNum); + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_allocation_vp); if (retval) { - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto ErrorExit; } + /* + * Set up Attribute B-tree vnode + */ + if (vhp->attributesFile.totalBlocks != 0) { + cndesc.cd_nameptr = hfs_attrname; + cndesc.cd_namelen = strlen(hfs_attrname); + cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID; + + cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize); + cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize); + cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks); + cfork.cf_vblocks = 0; + cnattr.ca_blocks = cfork.cf_blocks; + for (i = 0; i < kHFSPlusExtentDensity; i++) { + cfork.cf_extents[i].startBlock = + SWAP_BE32 (vhp->attributesFile.extents[i].startBlock); + cfork.cf_extents[i].blockCount = + SWAP_BE32 (vhp->attributesFile.extents[i].blockCount); + } + retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork, + &hfsmp->hfs_attribute_vp); + if (retval) { + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + goto ErrorExit; + } + retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp), + (KeyCompareProcPtr) hfs_attrkeycompare)); + if (retval) { + hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); + goto ErrorExit; + } + } + + /* Pick up volume name and create date */ retval = cat_idlookup(hfsmp, kHFSRootFolderID, &cndesc, &cnattr, NULL); if (retval) { - VOP_UNLOCK(vcb->allocationsRefNum, 0, p); - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); goto ErrorExit; } vcb->vcbCrDate = cnattr.ca_itime; @@ -471,15 +542,17 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfs_flushvolumeheader(hfsmp, TRUE, TRUE); + hfs_flushvolumeheader(hfsmp, TRUE, 0); } /* * all done with metadata files so we can unlock now... */ - VOP_UNLOCK(vcb->allocationsRefNum, 0, p); - VOP_UNLOCK(vcb->catalogRefNum, 0, p); - VOP_UNLOCK(vcb->extentsRefNum, 0, p); + if (hfsmp->hfs_attribute_vp) + hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); // // Check if we need to do late journal initialization. This only @@ -494,9 +567,42 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = hfs_late_journal_init(hfsmp, vhp, args); if (retval != 0) { hfsmp->jnl = NULL; + + // if the journal failed to open, then set the lastMountedVersion + // to be "FSK!" which fsck_hfs will see and force the fsck instead + // of just bailing out because the volume is journaled. + if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) { + HFSPlusVolumeHeader *jvhp; + daddr64_t mdb_offset; + struct buf *bp = NULL; + + hfsmp->hfs_flags |= HFS_NEED_JNL_RESET; + + mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize)); + + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, mdb_offset, blockSize, cred, &bp); + if (retval == 0) { + jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blockSize)); + + if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) { + printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n"); + jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion); + buf_bwrite(bp); + } else { + buf_brelse(bp); + } + bp = NULL; + } else if (bp) { + buf_brelse(bp); + // clear this so the error exit path won't try to use it + bp = NULL; + } + } + + retval = EINVAL; goto ErrorExit; } else if (hfsmp->jnl) { - hfsmp->hfs_mp->mnt_flag |= MNT_JOURNALED; + vfs_setflags(hfsmp->hfs_mp, (uint64_t)((unsigned int)MNT_JOURNALED)); } } else if (hfsmp->jnl) { struct cat_attr jinfo_attr, jnl_attr; @@ -529,7 +635,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* setup private/hidden directory for unlinked files */ FindMetaDataDirectory(vcb); - if (hfsmp->jnl && ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) hfs_remove_orphans(hfsmp); if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected @@ -537,27 +643,28 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, MarkVCBDirty( vcb ); // mark VCB dirty so it will be written } - /* * Allow hot file clustering if conditions allow. */ if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { - (void) hfs_recording_init(hfsmp, p); + (void) hfs_recording_init(hfsmp); } + hfs_checkextendedsecurity(hfsmp); + return (0); ErrorExit: /* - * A fatal error occured and the volume cannot be mounted + * A fatal error occurred and the volume cannot be mounted * release any resources that we aquired... */ - - InvalidateCatalogCache(vcb); - ReleaseMetaFileVNode(vcb->allocationsRefNum); - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); + if (hfsmp->hfs_attribute_vp) + ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp); + ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); + ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); + ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); return (retval); } @@ -573,12 +680,15 @@ static void ReleaseMetaFileVNode(struct vnode *vp) struct filefork *fp; if (vp && (fp = VTOF(vp))) { - if (fp->fcbBTCBPtr != NULL) + if (fp->fcbBTCBPtr != NULL) { + (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); (void) BTClosePath(fp); + hfs_unlock(VTOC(vp)); + } /* release the node even if BTClosePath fails */ - vrele(vp); - vgone(vp); + vnode_recycle(vp); + vnode_put(vp); } } @@ -594,28 +704,21 @@ __private_extern__ int hfsUnmount( register struct hfsmount *hfsmp, struct proc *p) { - ExtendedVCB *vcb = HFSTOVCB(hfsmp); - int retval = E_NONE; - - InvalidateCatalogCache( vcb ); + if (hfsmp->hfs_allocation_vp) + ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp); - if (hfsmp->hfc_filevp) { - ReleaseMetaFileVNode(hfsmp->hfc_filevp); - hfsmp->hfc_filevp = NULL; - } - - if (vcb->vcbSigWord == kHFSPlusSigWord) - ReleaseMetaFileVNode(vcb->allocationsRefNum); + if (hfsmp->hfs_attribute_vp) + ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp); - ReleaseMetaFileVNode(vcb->catalogRefNum); - ReleaseMetaFileVNode(vcb->extentsRefNum); + ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp); + ReleaseMetaFileVNode(hfsmp->hfs_extents_vp); - return (retval); + return (0); } /* - * Test is fork has overflow extents. + * Test if fork has overflow extents. */ __private_extern__ int @@ -649,55 +752,128 @@ overflow_extents(struct filefork *fp) /* - * Lock/Unlock a metadata file. + * Lock HFS system file(s). */ __private_extern__ int -hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct proc *p) +hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype) { - ExtendedVCB *vcb; - struct vnode *vp = NULL; - int numOfLockedBuffs; - int retval = 0; - - vcb = HFSTOVCB(hfsmp); - - switch (fileID) { - case kHFSExtentsFileID: - vp = vcb->extentsRefNum; - break; + if (flags & ~SFL_VALIDMASK) + panic("hfs_systemfile_lock: invalid lock request (0x%x)", (unsigned long) flags); + /* + * Locking order is Catalog file, Attributes file, Bitmap file, Extents file + */ + if (flags & SFL_CATALOG) { + (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), locktype); + /* + * When the catalog file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if ((flags & SFL_EXTENTS) == 0 && + overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) { + flags |= SFL_EXTENTS; + } + } + if (flags & SFL_ATTRIBUTE) { + if (hfsmp->hfs_attribute_vp) { + (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), locktype); + /* + * When the attribute file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if ((flags & SFL_EXTENTS) == 0 && + overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) { + flags |= SFL_EXTENTS; + } + } else { + flags &= ~SFL_ATTRIBUTE; + } + } + if (flags & SFL_BITMAP) { + /* + * Since the only bitmap operations are clearing and + * setting bits we always need exclusive access. And + * when we have a journal, we can "hide" behind that + * lock since we can only change the bitmap from + * within a transaction. + */ + if (hfsmp->jnl) { + flags &= ~SFL_BITMAP; + } else { + (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK); + } + } + if (flags & SFL_EXTENTS) { + /* + * Since the extents btree lock is recursive we always + * need exclusive access. + */ + (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK); + } + return (flags); +} - case kHFSCatalogFileID: - vp = vcb->catalogRefNum; - break; +/* + * unlock HFS system file(s). + */ +__private_extern__ +void +hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags) +{ + struct timeval tv; + u_int32_t lastfsync; + int numOfLockedBuffs; - case kHFSAllocationFileID: - /* bitmap is covered by Extents B-tree locking */ - /* FALL THROUGH */ - default: - panic("hfs_lockmetafile: invalid fileID"); + microuptime(&tv); + lastfsync = tv.tv_sec; + + if (flags & ~SFL_VALIDMASK) + panic("hfs_systemfile_unlock: invalid lock request (0x%x)", (unsigned long) flags); + + if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_vp) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS); + } + } + hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); } - - if ((flags & LK_TYPE_MASK) != LK_RELEASE) { - flags |= LK_RETRY; - } else if (hfsmp->jnl == NULL) { - struct timeval tv = time; - u_int32_t lastfsync = tv.tv_sec; - - (void) BTGetLastSync((FCB*)VTOF(vp), &lastfsync); - - numOfLockedBuffs = count_lock_queue(); - if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || - ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > kMaxSecsForFsync))) { - hfs_btsync(vp, HFS_SYNCTRANS); + if (flags & SFL_CATALOG) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS); + } } + hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); + } + if (flags & SFL_BITMAP) { + hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); + } + if (flags & SFL_EXTENTS) { + if (hfsmp->jnl == NULL) { + BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync); + numOfLockedBuffs = count_lock_queue(); + if ((numOfLockedBuffs > kMaxLockedMetaBuffers) || + ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) > + kMaxSecsForFsync))) { + hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS); + } + } + hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); } - - retval = lockmgr(&VTOC(vp)->c_lock, flags, &vp->v_interlock, p); - - return (retval); } + /* * RequireFileLock * @@ -707,37 +883,32 @@ hfs_metafilelocking(struct hfsmount *hfsmp, u_long fileID, u_int flags, struct p #if HFS_DIAGNOSTIC void RequireFileLock(FileReference vp, int shareable) { - struct lock__bsd__ *lkp; - int locked = false; - pid_t pid; - void * self; - - pid = current_proc()->p_pid; - self = (void *) current_act(); - lkp = &VTOC(vp)->c_lock; + int locked; - simple_lock(&lkp->lk_interlock); + /* The extents btree and allocation bitmap are always exclusive. */ + if (VTOC(vp)->c_fileid == kHFSExtentsFileID || + VTOC(vp)->c_fileid == kHFSAllocationFileID) { + shareable = 0; + } - if (shareable && (lkp->lk_sharecount > 0) && (lkp->lk_lockholder == LK_NOPROC)) - locked = true; - else if ((lkp->lk_exclusivecount > 0) && (lkp->lk_lockholder == pid) && (lkp->lk_lockthread == self)) - locked = true; - - simple_unlock(&lkp->lk_interlock); + locked = VTOC(vp)->c_lockowner == (void *)current_thread(); - if (!locked) { + if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { - case 3: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: extent btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp)); - break; - - case 4: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: catalog btree vnode not locked! v: 0x%08X\n #\n", (u_int)vp)); - break; - - default: - DEBUG_BREAK_MSG((" #\n # RequireFileLock: file (%d) not locked! v: 0x%08X\n #\n", VTOC(vp)->c_fileid, (u_int)vp)); - break; + case kHFSExtentsFileID: + panic("extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSCatalogFileID: + panic("catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSAllocationFileID: + /* The allocation file can hide behind the jornal lock. */ + if (VTOHFS(vp)->jnl == NULL) + panic("allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); + break; + case kHFSAttributesFileID: + panic("attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); + break; } } } @@ -757,15 +928,15 @@ void RequireFileLock(FileReference vp, int shareable) * */ int -hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, struct ucred *cred, +hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred, struct proc *p, int invokesuperuserstatus) { - if ((cred->cr_uid == cnode_uid) || /* [1a] */ + if ((kauth_cred_getuid(cred) == cnode_uid) || /* [1a] */ (cnode_uid == UNKNOWNUID) || /* [1b] */ - ((HFSTOVFS(hfsmp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) && /* [2] */ - ((cred->cr_uid == hfsmp->hfs_uid) || /* [2a] */ + ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */ + ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) || /* [2a] */ (hfsmp->hfs_uid == UNKNOWNUID))) || /* [2b] */ - (invokesuperuserstatus && (suser(cred, &p->p_acflag) == 0))) { /* [3] */ + (invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */ return (0); } else { return (EPERM); @@ -834,6 +1005,7 @@ FindMetaDataDirectory(ExtendedVCB *vcb) struct proc *p = current_proc(); struct timeval tv; cat_cookie_t cookie; + int lockflags; int error; if (vcb->vcbSigWord != kHFSPlusSigWord) @@ -848,15 +1020,12 @@ FindMetaDataDirectory(ExtendedVCB *vcb) hfsmp->hfs_privdir_desc.cd_flags = CD_ISDIR; } - /* Lock catalog b-tree */ - if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p) != 0) - return (0); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); error = cat_lookup(hfsmp, &hfsmp->hfs_privdir_desc, 0, NULL, - &hfsmp->hfs_privdir_attr, NULL); + &hfsmp->hfs_privdir_attr, NULL, NULL); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { hfsmp->hfs_metadata_createdate = hfsmp->hfs_privdir_attr.ca_itime; @@ -868,22 +1037,16 @@ FindMetaDataDirectory(ExtendedVCB *vcb) (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { hfsmp->hfs_privdir_attr.ca_flags &= ~SF_IMMUTABLE; - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - hfs_global_shared_lock_release(hfsmp); - return (hfsmp->hfs_privdir_attr.ca_fileid); - } + if ((error = hfs_start_transaction(hfsmp)) != 0) { + return (hfsmp->hfs_privdir_attr.ca_fileid); } - if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p) == 0) { - (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - } - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + (void) cat_update(hfsmp, &hfsmp->hfs_privdir_desc, + &hfsmp->hfs_privdir_attr, NULL, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); + + hfs_end_transaction(hfsmp); } return (hfsmp->hfs_privdir_attr.ca_fileid); @@ -897,7 +1060,8 @@ FindMetaDataDirectory(ExtendedVCB *vcb) hfsmp->hfs_privdir_attr.ca_mode = S_IFDIR; hfsmp->hfs_privdir_attr.ca_nlink = 2; hfsmp->hfs_privdir_attr.ca_itime = vcb->vcbCrDate; - hfsmp->hfs_privdir_attr.ca_mtime = time.tv_sec; + microtime(&tv); + hfsmp->hfs_privdir_attr.ca_mtime = tv.tv_sec; /* hidden and off the desktop view */ fndrinfo = (struct FndrDirInfo *)&hfsmp->hfs_privdir_attr.ca_finderinfo; @@ -905,61 +1069,51 @@ FindMetaDataDirectory(ExtendedVCB *vcb) fndrinfo->frLocation.h = SWAP_BE16 (22460); fndrinfo->frFlags |= SWAP_BE16 (kIsInvisible + kNameLocked); - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - hfs_global_shared_lock_release(hfsmp); - return (0); - } + if ((error = hfs_start_transaction(hfsmp)) != 0) { + return (0); } /* Reserve some space in the Catalog file. */ if (cat_preflight(hfsmp, CAT_CREATE, &cookie, p) != 0) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - return (0); + hfs_end_transaction(hfsmp); + + return (0); } - if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p) == 0) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + error = cat_create(hfsmp, &hfsmp->hfs_privdir_desc, &hfsmp->hfs_privdir_attr, &out_desc); - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - } + hfs_systemfile_unlock(hfsmp, lockflags); cat_postflight(hfsmp, &cookie, p); if (error) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_volupdate(hfsmp, VOL_UPDATE, 0); - return (0); + hfs_end_transaction(hfsmp); + + return (0); } hfsmp->hfs_privdir_desc.cd_hint = out_desc.cd_hint; hfsmp->hfs_privdir_desc.cd_cnid = out_desc.cd_cnid; hfsmp->hfs_privdir_attr.ca_fileid = out_desc.cd_cnid; hfsmp->hfs_metadata_createdate = vcb->vcbCrDate; - - if (VFS_ROOT(HFSTOVFS(hfsmp), &dvp) == 0) { + + if (hfs_vget(hfsmp, kRootDirID, &dvp, 0) == 0) { dcp = VTOC(dvp); dcp->c_childhint = out_desc.cd_hint; dcp->c_nlink++; dcp->c_entries++; - dcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(dvp, &tv, &tv, 0); - vput(dvp); + dcp->c_touch_chgtime = TRUE; + dcp->c_touch_modtime = TRUE; + (void) hfs_update(dvp, 0); + hfs_unlock(dcp); + vnode_put(dvp); } hfs_volupdate(hfsmp, VOL_MKDIR, 1); - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); cat_releasedesc(&out_desc); @@ -968,7 +1122,7 @@ FindMetaDataDirectory(ExtendedVCB *vcb) __private_extern__ u_long -GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name, +GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, struct cat_attr *fattr, struct cat_fork *forkinfo) { struct hfsmount * hfsmp; @@ -976,7 +1130,7 @@ GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name, struct cnode * dcp = NULL; struct FndrDirInfo * fndrinfo; struct cat_desc jdesc; - struct timeval tv; + int lockflags; int error; if (vcb->vcbSigWord != kHFSPlusSigWord) @@ -989,25 +1143,22 @@ GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, char *name, jdesc.cd_nameptr = name; jdesc.cd_namelen = strlen(name); - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, current_proc()); - if (error) - return (0); - - error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo); - - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, current_proc()); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { return (fattr->ca_fileid); } else if (hfsmp->hfs_flags & HFS_READ_ONLY) { return (0); } + + return (0); /* XXX what callers expect on an error */ } /* - * On Journaled HFS, there can be orphaned files. These + * On HFS Plus Volume, there can be orphaned files. These * are files that were unlinked while busy. If the volume * was not cleanly unmounted then some of these files may * have persisted and need to be removed. @@ -1026,18 +1177,21 @@ hfs_remove_orphans(struct hfsmount * hfsmp) char filename[32]; char tempname[32]; size_t namelen; - cat_cookie_t cookie = {0}; + cat_cookie_t cookie; int catlock = 0; int catreserve = 0; int started_tr = 0; - int shared_lock = 0; + int lockflags; int result; - + int orphanedlinks = 0; + + bzero(&cookie, sizeof(cookie)); + if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS) return; vcb = HFSTOVCB(hfsmp); - fcb = VTOF(vcb->catalogRefNum); + fcb = VTOF(hfsmp->hfs_catalog_vp); btdata.bufferAddress = &filerec; btdata.itemSize = sizeof(filerec); @@ -1045,34 +1199,31 @@ hfs_remove_orphans(struct hfsmount * hfsmp) MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); + + /* Build a key to "temp" */ keyp = (HFSPlusCatalogKey*)&iterator->key; keyp->parentID = hfsmp->hfs_privdir_desc.cd_cnid; + keyp->nodeName.length = 4; /* "temp" */ + keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2; + keyp->nodeName.unicode[0] = 't'; + keyp->nodeName.unicode[1] = 'e'; + keyp->nodeName.unicode[2] = 'm'; + keyp->nodeName.unicode[3] = 'p'; - result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (result) - goto exit; /* - * Position the iterator at the folder thread record. - * (i.e. one record before first child) + * Position the iterator just before the first real temp file. */ - result = BTSearchRecord(fcb, iterator, NULL, NULL, iterator); - - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (result) - goto exit; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator); + hfs_systemfile_unlock(hfsmp, lockflags); - /* Visit all the children in the HFS+ private directory. */ + /* Visit all the temp files in the HFS+ private directory. */ for (;;) { - result = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (result) - goto exit; - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL); - - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (result) break; - if (keyp->parentID != hfsmp->hfs_privdir_desc.cd_cnid) break; if (filerec.recordType != kHFSPlusFileRecord) @@ -1089,33 +1240,30 @@ hfs_remove_orphans(struct hfsmount * hfsmp) * */ if (bcmp(tempname, filename, namelen) == 0) { - struct filefork dfork = {0}; - struct filefork rfork = {0}; - struct cnode cnode = {0}; - - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - shared_lock = 1; - if (hfsmp->jnl) { - if (journal_start_transaction(hfsmp->jnl) != 0) { - goto exit; - } - started_tr = 1; + struct filefork dfork; + struct filefork rfork; + struct cnode cnode; + + bzero(&dfork, sizeof(dfork)); + bzero(&rfork, sizeof(rfork)); + bzero(&cnode, sizeof(cnode)); + + if (hfs_start_transaction(hfsmp) != 0) { + printf("hfs_remove_orphans: failed to start transaction\n"); + goto exit; } + started_tr = 1; /* * Reserve some space in the Catalog file. */ if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) { + printf("hfs_remove_orphans: cat_preflight failed\n"); goto exit; } catreserve = 1; - /* Lock catalog b-tree */ - if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, - LK_EXCLUSIVE, p) != 0) { - goto exit; - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); catlock = 1; /* Build a fake cnode */ @@ -1159,8 +1307,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) // that no one transaction gets too big. // if (fsize > 0 && started_tr) { - journal_end_transaction(hfsmp->jnl); - if (journal_start_transaction(hfsmp->jnl) != 0) { + hfs_end_transaction(hfsmp); + if (hfs_start_transaction(hfsmp) != 0) { started_tr = 0; break; } @@ -1180,9 +1328,14 @@ hfs_remove_orphans(struct hfsmount * hfsmp) /* Remove the file record from the Catalog */ if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) { - printf("error deleting cat rec!\n"); + printf("hfs_remove_oprhans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid); + hfs_volupdate(hfsmp, VOL_UPDATE, 0); break; } + ++orphanedlinks; + + /* Delete any attributes, ignore errors */ + (void) hfs_removeallattr(hfsmp, cnode.c_fileid); /* Update parent and volume counts */ hfsmp->hfs_privdir_attr.ca_entries--; @@ -1191,31 +1344,27 @@ hfs_remove_orphans(struct hfsmount * hfsmp) hfs_volupdate(hfsmp, VOL_RMFILE, 0); /* Drop locks and end the transaction */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); cat_postflight(hfsmp, &cookie, p); catlock = catreserve = 0; if (started_tr) { - journal_end_transaction(hfsmp->jnl); + hfs_end_transaction(hfsmp); started_tr = 0; } - hfs_global_shared_lock_release(hfsmp); - shared_lock = 0; } /* end if */ } /* end for */ - + if (orphanedlinks > 0) + printf("HFS: Removed %d orphaned unlinked files\n", orphanedlinks); exit: if (catlock) { - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); } if (catreserve) { cat_postflight(hfsmp, &cookie, p); } if (started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (shared_lock) { - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); } FREE(iterator, M_TEMP); @@ -1238,7 +1387,7 @@ u_int32_t logBlockSize; /* start with default */ logBlockSize = VTOHFS(vp)->hfs_logBlockSize; - if (vp->v_flag & VSYSTEM) { + if (vnode_issystem(vp)) { if (VTOF(vp)->fcbBTCBPtr != NULL) { BTreeInfoRec bTreeInfo; @@ -1268,9 +1417,10 @@ __private_extern__ u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) { - struct vcb_t *vcb = HFSTOVCB(hfsmp); + ExtendedVCB *vcb = HFSTOVCB(hfsmp); u_int32_t freeblks; + HFS_MOUNT_LOCK(hfsmp, TRUE); freeblks = vcb->freeBlocks; if (wantreserve) { if (freeblks > vcb->reserveBlocks) @@ -1282,6 +1432,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) freeblks -= vcb->loanedBlocks; else freeblks = 0; + HFS_MOUNT_UNLOCK(hfsmp, TRUE); #ifdef HFS_SPARSE_DEV /* @@ -1289,18 +1440,19 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) * available space on the backing store volume. */ if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { - struct statfs statbuf; /* 272 bytes */ + struct vfsstatfs *vfsp; /* 272 bytes */ u_int32_t vfreeblks; u_int32_t loanedblks; struct mount * backingfs_mp; - backingfs_mp = hfsmp->hfs_backingfs_rootvp->v_mount; + backingfs_mp = vnode_mount(hfsmp->hfs_backingfs_rootvp); - if (VFS_STATFS(backingfs_mp, &statbuf, current_proc()) == 0) { - vfreeblks = statbuf.f_bavail; + if (vfsp = vfs_statfs(backingfs_mp)) { + HFS_MOUNT_LOCK(hfsmp, TRUE); + vfreeblks = (u_int32_t)vfsp->f_bavail; /* Normalize block count if needed. */ - if (statbuf.f_bsize != vcb->blockSize) { - vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)statbuf.f_bsize) / vcb->blockSize; + if (vfsp->f_bsize != vcb->blockSize) { + vfreeblks = ((u_int64_t)vfreeblks * (u_int64_t)(vfsp->f_bsize)) / vcb->blockSize; } if (vfreeblks > hfsmp->hfs_sparsebandblks) vfreeblks -= hfsmp->hfs_sparsebandblks; @@ -1315,6 +1467,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) vfreeblks = 0; freeblks = MIN(vfreeblks, freeblks); + HFS_MOUNT_UNLOCK(hfsmp, TRUE); } } #endif /* HFS_SPARSE_DEV */ @@ -1378,87 +1531,121 @@ short MacToVFSError(OSErr err) /* - * Get the directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. + * Find the current thread's directory hint for a given index. + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ -char * -hfs_getnamehint(struct cnode *dcp, int index) +directoryhint_t * +hfs_getdirhint(struct cnode *dcp, int index) { - struct hfs_index *entry; - - if (index > 0) { - SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) { - if (entry->hi_index == index) - return (entry->hi_name); + struct timeval tv; + directoryhint_t *hint, *next, *oldest; + char * name; + + oldest = NULL; + microuptime(&tv); + + /* Look for an existing hint first */ + for(hint = dcp->c_hintlist.slh_first; hint != NULL; hint = next) { + next = hint->dh_link.sle_next; + if (hint->dh_index == index) { + goto out; + } else if (oldest == NULL || (hint->dh_time < oldest->dh_time)) { + oldest = hint; } } - - return (NULL); -} - -/* - * Save a directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. - */ -__private_extern__ -void -hfs_savenamehint(struct cnode *dcp, int index, const char * namehint) -{ - struct hfs_index *entry; - int len; - - if (index > 0) { - len = strlen(namehint); - MALLOC(entry, struct hfs_index *, len + sizeof(struct hfs_index), - M_TEMP, M_WAITOK); - entry->hi_index = index; - bcopy(namehint, entry->hi_name, len + 1); - SLIST_INSERT_HEAD(&dcp->c_indexlist, entry, hi_link); + /* Recycle one if we have too many already. */ + if ((dcp->c_dirhintcnt >= HFS_MAXDIRHINTS) && (oldest != NULL)) { + hint = oldest; + if ((name = hint->dh_desc.cd_nameptr)) { + hint->dh_desc.cd_nameptr = NULL; + vfs_removename(name); + } + goto init; } + + /* Create a default directory hint */ + MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK); + SLIST_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link); + ++dcp->c_dirhintcnt; +init: + hint->dh_index = index; + hint->dh_desc.cd_flags = 0; + hint->dh_desc.cd_encoding = 0; + hint->dh_desc.cd_namelen = 0; + hint->dh_desc.cd_nameptr = NULL; + hint->dh_desc.cd_parentcnid = dcp->c_cnid; + hint->dh_desc.cd_hint = dcp->c_childhint; + hint->dh_desc.cd_cnid = 0; +out: + hint->dh_time = tv.tv_sec; + return (hint); } /* - * Release the directory entry name hint for a given index. - * The directory cnode (dcp) must be locked. + * Release a single directory hint. + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ void -hfs_relnamehint(struct cnode *dcp, int index) +hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint) { - struct hfs_index *entry; - - if (index > 0) { - SLIST_FOREACH(entry, &dcp->c_indexlist, hi_link) { - if (entry->hi_index == index) { - SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index, - hi_link); - FREE(entry, M_TEMP); - break; + directoryhint_t *hint; + char * name; + + SLIST_FOREACH(hint, &dcp->c_hintlist, dh_link) { + if (hint == relhint) { + SLIST_REMOVE(&dcp->c_hintlist, hint, directoryhint, dh_link); + name = hint->dh_desc.cd_nameptr; + if (name != NULL) { + hint->dh_desc.cd_nameptr = NULL; + vfs_removename(name); } + FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT); + --dcp->c_dirhintcnt; + break; } } } /* - * Release all directory entry name hints. + * Release directory hints for given directory + * + * Requires an exclusive lock on directory cnode. */ __private_extern__ void -hfs_relnamehints(struct cnode *dcp) +hfs_reldirhints(struct cnode *dcp, int stale_hints_only) { - struct hfs_index *entry; - struct hfs_index *next; - - if (!SLIST_EMPTY(&dcp->c_indexlist)) { - for(entry = SLIST_FIRST(&dcp->c_indexlist); - entry != NULL; - entry = next) { - next = SLIST_NEXT(entry, hi_link); - SLIST_REMOVE(&dcp->c_indexlist, entry, hfs_index, hi_link); - FREE(entry, M_TEMP); + struct timeval tv; + directoryhint_t *hint, *next; + char * name; + + if (stale_hints_only) + microuptime(&tv); + else + tv.tv_sec = 0; + + for (hint = dcp->c_hintlist.slh_first; hint != NULL; hint = next) { + next = hint->dh_link.sle_next; + if (stale_hints_only) { + /* Skip over newer entries. */ + if ((tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL) + continue; + SLIST_REMOVE(&dcp->c_hintlist, hint, directoryhint, dh_link); + } + name = hint->dh_desc.cd_nameptr; + if (name != NULL) { + hint->dh_desc.cd_nameptr = NULL; + vfs_removename(name); } + FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT); + --dcp->c_dirhintcnt; } + if (!stale_hints_only) + dcp->c_hintlist.slh_first = NULL; } @@ -1498,8 +1685,8 @@ out: __private_extern__ int hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, - void *_args, int embeddedOffset, int mdb_offset, - HFSMasterDirectoryBlock *mdbp, struct ucred *cred) + void *_args, off_t embeddedOffset, daddr64_t mdb_offset, + HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred) { JournalInfoBlock *jibp; struct buf *jinfo_bp, *bp; @@ -1517,14 +1704,14 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize; - retval = meta_bread(devvp, - embeddedOffset/blksize + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock), + retval = (int)buf_meta_bread(devvp, + (daddr64_t)((embeddedOffset/blksize) + + (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), SWAP_BE32(vhp->blockSize), cred, &jinfo_bp); if (retval) return retval; - jibp = (JournalInfoBlock *)jinfo_bp->b_data; + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); jibp->flags = SWAP_BE32(jibp->flags); jibp->offset = SWAP_BE64(jibp->offset); jibp->size = SWAP_BE64(jibp->size); @@ -1533,7 +1720,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfsmp->jvp = hfsmp->hfs_devvp; } else { printf("hfs: journal not stored in fs! don't know what to do.\n"); - brelse(jinfo_bp); + buf_brelse(jinfo_bp); return EINVAL; } @@ -1543,9 +1730,9 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (jibp->flags & kJIJournalNeedInitMask) { printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", - jibp->offset + (off_t)embeddedOffset, jibp->size); + jibp->offset + embeddedOffset, jibp->size); hfsmp->jnl = journal_create(hfsmp->jvp, - jibp->offset + (off_t)embeddedOffset, + jibp->offset + embeddedOffset, jibp->size, devvp, blksize, @@ -1559,16 +1746,16 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, jibp->flags = SWAP_BE32(jibp->flags); jibp->offset = SWAP_BE64(jibp->offset); jibp->size = SWAP_BE64(jibp->size); - bwrite(jinfo_bp); + buf_bwrite(jinfo_bp); jinfo_bp = NULL; jibp = NULL; } else { //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n", - // jibp->offset + (off_t)embeddedOffset, + // jibp->offset + embeddedOffset, // jibp->size, SWAP_BE32(vhp->blockSize)); hfsmp->jnl = journal_open(hfsmp->jvp, - jibp->offset + (off_t)embeddedOffset, + jibp->offset + embeddedOffset, jibp->size, devvp, blksize, @@ -1576,7 +1763,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, arg_tbufsz, hfs_sync_metadata, hfsmp->hfs_mp); - brelse(jinfo_bp); + buf_brelse(jinfo_bp); jinfo_bp = NULL; jibp = NULL; @@ -1584,17 +1771,17 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, // reload the mdb because it could have changed // if the journal had to be replayed. if (mdb_offset == 0) { - mdb_offset = (embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize); + mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); } - retval = meta_bread(devvp, mdb_offset, blksize, cred, &bp); + retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp); if (retval) { - brelse(bp); + buf_brelse(bp); printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n", retval); return retval; } - bcopy(bp->b_data + HFS_PRI_OFFSET(blksize), mdbp, 512); - brelse(bp); + bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512); + buf_brelse(bp); bp = NULL; } } @@ -1673,9 +1860,9 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_phys_block_size; - retval = meta_bread(devvp, - vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock), + retval = (int)buf_meta_bread(devvp, + (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size + + (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp); if (retval) { printf("hfs: can't read journal info block. disabling journaling.\n"); @@ -1683,7 +1870,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a return 0; } - jibp = (JournalInfoBlock *)jinfo_bp->b_data; + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); jibp->flags = SWAP_BE32(jibp->flags); jibp->offset = SWAP_BE64(jibp->offset); jibp->size = SWAP_BE64(jibp->size); @@ -1692,7 +1879,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) { printf("hfs: can't find the journal file! disabling journaling (start: %d)\n", jfork.cf_extents[0].startBlock); - brelse(jinfo_bp); + buf_brelse(jinfo_bp); vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; return 0; } @@ -1720,7 +1907,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->jvp = hfsmp->hfs_devvp; } else { printf("hfs: journal not stored in fs! don't know what to do.\n"); - brelse(jinfo_bp); + buf_brelse(jinfo_bp); return EINVAL; } @@ -1776,9 +1963,9 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a jibp->offset = SWAP_BE64(jibp->offset); jibp->size = SWAP_BE64(jibp->size); - bwrite(jinfo_bp); + buf_bwrite(jinfo_bp); } else { - brelse(jinfo_bp); + buf_brelse(jinfo_bp); } jinfo_bp = NULL; jibp = NULL; @@ -1828,7 +2015,6 @@ static void hfs_metadatazone_init(struct hfsmount *hfsmp) { ExtendedVCB *vcb; - struct BTreeInfoRec btinfo; u_int64_t fs_size; u_int64_t zonesize; u_int64_t temp; @@ -1931,7 +2117,6 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) } } filesize += (items + 1) * sizeof(struct dqblk); - hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; zonesize += filesize; /* @@ -1944,6 +2129,8 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) filesize += temp / 3; hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; + hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize; + /* Convert to allocation blocks. */ blk = zonesize / vcb->blockSize; @@ -1968,15 +2155,19 @@ static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *hfsmp) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); + int lockflags; int freeblocks; + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); freeblocks = MetaZoneFreeBlocks(vcb); + hfs_systemfile_unlock(hfsmp, lockflags); + /* Minus Extents overflow file reserve. */ freeblocks -= - hfsmp->hfs_overflow_maxblks - VTOF(vcb->extentsRefNum)->ff_blocks; + hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks; /* Minus catalog file reserve. */ freeblocks -= - hfsmp->hfs_catalog_maxblks - VTOF(vcb->catalogRefNum)->ff_blocks; + hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks; if (freeblocks < 0) freeblocks = 0; @@ -2011,3 +2202,55 @@ hfs_virtualmetafile(struct cnode *cp) return (0); } + +__private_extern__ +int +hfs_start_transaction(struct hfsmount *hfsmp) +{ + int ret; + + if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != current_thread()) { + lck_rw_lock_shared(&hfsmp->hfs_global_lock); + } + + if (hfsmp->jnl) { + ret = journal_start_transaction(hfsmp->jnl); + if (ret == 0) { + OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting); + } + } else { + ret = 0; + } + + if (ret != 0) { + lck_rw_done(&hfsmp->hfs_global_lock); + } + + return ret; +} + +__private_extern__ +int +hfs_end_transaction(struct hfsmount *hfsmp) +{ + int need_unlock=0, ret; + + if ( hfsmp->jnl == NULL + || ( journal_owner(hfsmp->jnl) == current_thread() + && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) { + + need_unlock = 1; + } + + if (hfsmp->jnl) { + ret = journal_end_transaction(hfsmp->jnl); + } else { + ret = 0; + } + + if (need_unlock) { + lck_rw_done(&hfsmp->hfs_global_lock); + } + + return ret; +} diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 0f4848c24..873ff095c 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,18 +22,20 @@ #include #include -#include +#include #include #include #include #include -#include +#include #include -#include #include +#include #include #include #include +#include +#include #include #include @@ -45,7 +47,6 @@ #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" -#include "hfs_lockf.h" #include "hfs_dbg.h" #include "hfs_mount.h" #include "hfs_quota.h" @@ -67,37 +68,45 @@ extern unsigned long strtoul(const char *, char **, int); -extern int groupmember(gid_t gid, struct ucred *cred); +static int hfs_makenode(struct vnode *dvp, struct vnode **vpp, + struct componentname *cnp, struct vnode_attr *vap, + vfs_context_t ctx); -static int hfs_makenode(int mode, struct vnode *dvp, struct vnode **vpp, - struct componentname *cnp); - -static int hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, - struct vnode **rvpp, struct proc *p); - -static int hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p); +static int hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, struct proc *p); static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, int); static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, - int); + int, int); + +static int hfs_vnop_close(struct vnop_close_args*); +static int hfs_vnop_create(struct vnop_create_args*); +static int hfs_vnop_exchange(struct vnop_exchange_args*); +static int hfs_vnop_fsync(struct vnop_fsync_args*); +static int hfs_vnop_mkdir(struct vnop_mkdir_args*); +static int hfs_vnop_mknod(struct vnop_mknod_args*); +static int hfs_vnop_getattr(struct vnop_getattr_args*); +static int hfs_vnop_open(struct vnop_open_args*); +static int hfs_vnop_readdir(struct vnop_readdir_args*); +static int hfs_vnop_remove(struct vnop_remove_args*); +static int hfs_vnop_rename(struct vnop_rename_args*); +static int hfs_vnop_rmdir(struct vnop_rmdir_args*); +static int hfs_vnop_symlink(struct vnop_symlink_args*); +static int hfs_vnop_setattr(struct vnop_setattr_args*); /* Options for hfs_removedir and hfs_removefile */ -#define HFSRM_PARENT_LOCKED 0x01 -#define HFSRM_SKIP_RESERVE 0x02 -#define HFSRM_SAVE_NAME 0x04 -#define HFSRM_RENAMEOPTS 0x07 +#define HFSRM_SKIP_RESERVE 0x01 -int hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags); +int hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags); -int hfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, +int hfs_chflags(struct vnode *vp, uint32_t flags, kauth_cred_t cred, struct proc *p); -int hfs_chmod(struct vnode *vp, int mode, struct ucred *cred, +int hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p); int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, - struct ucred *cred, struct proc *p); + kauth_cred_t cred, struct proc *p); /***************************************************************************** * @@ -106,140 +115,83 @@ int hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, *****************************************************************************/ /* - * Create a regular file -#% create dvp L U U -#% create vpp - L - -# - vop_create { - IN WILLRELE struct vnode *dvp; - OUT struct vnode **vpp; - IN struct componentname *cnp; - IN struct vattr *vap; - - We are responsible for freeing the namei buffer, - it is done in hfs_makenode() -*/ - + * Create a regular file. + */ static int -hfs_create(ap) - struct vop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; +hfs_vnop_create(struct vnop_create_args *ap) { - struct vattr *vap = ap->a_vap; - - return (hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, ap->a_vpp, ap->a_cnp)); + return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); } - /* - * Mknod vnode call - -#% mknod dvp L U U -#% mknod vpp - X - -# - vop_mknod { - IN WILLRELE struct vnode *dvp; - OUT WILLRELE struct vnode **vpp; - IN struct componentname *cnp; - IN struct vattr *vap; - */ -/* ARGSUSED */ - + * Make device special file. + */ static int -hfs_mknod(ap) - struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; +hfs_vnop_mknod(struct vnop_mknod_args *ap) { - struct vattr *vap = ap->a_vap; + struct vnode_attr *vap = ap->a_vap; + struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct cnode *cp; int error; - if (VTOVCB(ap->a_dvp)->vcbSigWord != kHFSPlusSigWord) { - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); - return (EOPNOTSUPP); + if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord) { + return (ENOTSUP); } /* Create the vnode */ - error = hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, vpp, ap->a_cnp); + error = hfs_makenode(dvp, vpp, ap->a_cnp, vap, ap->a_context); if (error) return (error); + cp = VTOC(*vpp); - cp->c_flag |= C_ACCESS | C_CHANGE | C_UPDATE; + cp->c_touch_acctime = TRUE; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + if ((vap->va_rdev != VNOVAL) && (vap->va_type == VBLK || vap->va_type == VCHR)) cp->c_rdev = vap->va_rdev; - /* - * Remove cnode so that it will be reloaded by lookup and - * checked to see if it is an alias of an existing vnode. - * Note: unlike UFS, we don't bash v_type here. - */ - vput(*vpp); - vgone(*vpp); - *vpp = 0; + return (0); } - /* - * Open called. -#% open vp L L L -# - vop_open { - IN struct vnode *vp; - IN int mode; - IN struct ucred *cred; - IN struct proc *p; - */ - - + * Open a file/directory. + */ static int -hfs_open(ap) - struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +hfs_vnop_open(struct vnop_open_args *ap) { struct vnode *vp = ap->a_vp; - struct filefork *fp = VTOF(vp); + struct filefork *fp; struct timeval tv; + int error; /* * Files marked append-only must be opened for appending. */ - if ((vp->v_type != VDIR) && (VTOC(vp)->c_flags & APPEND) && + if ((VTOC(vp)->c_flags & APPEND) && !vnode_isdir(vp) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); - if (ap->a_mode & O_EVTONLY) { - if (vp->v_type == VREG) { - ++VTOF(vp)->ff_evtonly_refs; - } else { - ++VTOC(vp)->c_evtonly_refs; - }; - }; + if (vnode_isreg(vp) && !UBCINFOEXISTS(vp)) + return (EBUSY); /* file is in use by the kernel */ + /* Don't allow journal file to be opened externally. */ + if (VTOC(vp)->c_fileid == VTOHFS(vp)->hfs_jnlfileid) + return (EPERM); /* * On the first (non-busy) open of a fragmented * file attempt to de-frag it (if its less than 20MB). */ if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) || - !UBCISVALID(vp) || ubc_isinuse(vp, 1)) { + (VTOHFS(vp)->jnl == NULL) || + !vnode_isreg(vp) || vnode_isinuse(vp, 0)) { return (0); } + + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); fp = VTOF(vp); if (fp->ff_blocks && fp->ff_extents[7].blockCount != 0 && @@ -248,299 +200,219 @@ hfs_open(ap) * Wait until system bootup is done (3 min). */ microuptime(&tv); - if (tv.tv_sec < (60 * 3)) { - return (0); + if (tv.tv_sec > (60 * 3)) { + (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context)); } - (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, ap->a_cred, ap->a_p); } + hfs_unlock(VTOC(vp)); return (0); } -/* - * Close called. - * - * Update the times on the cnode. -#% close vp U U U -# - vop_close { - IN struct vnode *vp; - IN int fflag; - IN struct ucred *cred; - IN struct proc *p; - */ - +/* + * Close a file/directory. + */ static int -hfs_close(ap) - struct vop_close_args /* { +hfs_vnop_close(ap) + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; - register struct cnode *cp = VTOC(vp); - register struct filefork *fp = VTOF(vp); - struct proc *p = ap->a_p; - struct timeval tv; - off_t leof; - u_long blks, blocksize; - int devBlockSize; - int error; - - simple_lock(&vp->v_interlock); - if ((!UBCISVALID(vp) && vp->v_usecount > 1) - || (UBCISVALID(vp) && ubc_isinuse(vp, 1))) { - tv = time; - CTIMES(cp, &tv, &tv); - } - simple_unlock(&vp->v_interlock); - - if (ap->a_fflag & O_EVTONLY) { - if (vp->v_type == VREG) { - --VTOF(vp)->ff_evtonly_refs; - } else { - --VTOC(vp)->c_evtonly_refs; - }; - }; + register struct cnode *cp; + struct proc *p = vfs_context_proc(ap->a_context); + struct hfsmount *hfsmp; + int busy; - /* - * VOP_CLOSE can be called with vp locked (from vclean). - * We check for this case using VOP_ISLOCKED and bail. - * - * XXX During a force unmount we won't do the cleanup below! - */ - if (vp->v_type == VDIR || VOP_ISLOCKED(vp)) + if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) return (0); + cp = VTOC(vp); + hfsmp = VTOHFS(vp); - leof = fp->ff_size; - - if ((fp->ff_blocks > 0) && - !ISSET(cp->c_flag, C_DELETED) && - ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) == 0)) { - enum vtype our_type = vp->v_type; - u_long our_id = vp->v_id; - int was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA); - - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) - return (0); - /* - * Since we can context switch in vn_lock our vnode - * could get recycled (eg umount -f). Double check - * that its still ours. - */ - if (vp->v_type != our_type || vp->v_id != our_id - || cp != VTOC(vp) || !UBCINFOEXISTS(vp)) { - VOP_UNLOCK(vp, 0, p); - return (0); - } - - /* - * Last chance to explicitly zero out the areas - * that are currently marked invalid: - */ - VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize); - (void) cluster_push(vp); - SET(vp->v_flag, VNOCACHE_DATA); /* Don't cache zeros */ - while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { - struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; - - /* The range about to be written must be validated - * first, so that VOP_CMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); - - (void) cluster_write(vp, (struct uio *) 0, leof, - invalid_range->rl_end + 1, invalid_range->rl_start, - (off_t)0, devBlockSize, IO_HEADZEROFILL | IO_NOZERODIRTY); - - if (ISSET(vp->v_flag, VHASDIRTY)) - (void) cluster_push(vp); + // if we froze the fs and we're exiting, then "thaw" the fs + if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { + hfsmp->hfs_freezing_proc = NULL; + hfs_global_exclusive_lock_release(hfsmp); + } - cp->c_flag |= C_MODIFIED; - } - cp->c_flag &= ~C_ZFWANTSYNC; - cp->c_zftimeout = 0; - blocksize = VTOVCB(vp)->blockSize; - blks = leof / blocksize; - if (((off_t)blks * (off_t)blocksize) != leof) - blks++; - /* - * Shrink the peof to the smallest size neccessary to contain the leof. - */ - if (blks < fp->ff_blocks) - (void) VOP_TRUNCATE(vp, leof, IO_NDELAY, ap->a_cred, p); - (void) cluster_push(vp); + busy = vnode_isinuse(vp, 1); - if (!was_nocache) - CLR(vp->v_flag, VNOCACHE_DATA); - - /* - * If the VOP_TRUNCATE didn't happen to flush the vnode's - * information out to disk, force it to be updated now that - * all invalid ranges have been zero-filled and validated: - */ - if (cp->c_flag & C_MODIFIED) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 0); - } - VOP_UNLOCK(vp, 0, p); + if (busy) { + hfs_touchtimes(VTOHFS(vp), cp); + } + if (vnode_isdir(vp)) { + hfs_reldirhints(cp, busy); + } else if (vnode_issystem(vp) && !busy) { + vnode_recycle(vp); } - if ((vp->v_flag & VSYSTEM) && (vp->v_usecount == 1)) - vgone(vp); + + hfs_unlock(cp); return (0); } /* -#% access vp L L L -# - vop_access { - IN struct vnode *vp; - IN int mode; - IN struct ucred *cred; - IN struct proc *p; - - */ - + * Get basic attributes. + */ static int -hfs_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +hfs_vnop_getattr(struct vnop_getattr_args *ap) { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct ucred *cred = ap->a_cred; - register gid_t *gp; - mode_t mode = ap->a_mode; - mode_t mask = 0; - int i; - int error; + struct vnode_attr *vap = ap->a_vap; + struct vnode *rvp = NULL; + struct hfsmount *hfsmp; + struct cnode *cp; + enum vtype v_type; + int error = 0; - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); -#if QUOTA - if ((error = hfs_getinoquota(cp))) - return (error); -#endif /* QUOTA */ - break; - } - /* If immutable bit set, nobody gets to write it. */ - if (cp->c_flags & IMMUTABLE) - return (EPERM); + if ((error = hfs_lock(VTOC(vp), HFS_SHARED_LOCK))) { + return (error); } + cp = VTOC(vp); + hfsmp = VTOHFS(vp); + hfs_touchtimes(hfsmp, cp); + v_type = vnode_vtype(vp); + VATTR_RETURN(vap, va_rdev, (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0); + if (v_type == VDIR) { + if (VATTR_IS_ACTIVE(vap, va_nlink)) { + int entries; + + entries = cp->c_nlink; + if (vnode_isvroot(vp)) { + if (hfsmp->hfs_privdir_desc.cd_cnid != 0) + --entries; /* hide private dir */ + if (hfsmp->jnl) + entries -= 2; /* hide the journal files */ + } + VATTR_RETURN(vap, va_nlink, (uint64_t)entries); + } + + if (VATTR_IS_ACTIVE(vap, va_nchildren)) { + int entries; + + entries = cp->c_entries; + if (vnode_isvroot(vp)) { + if (hfsmp->hfs_privdir_desc.cd_cnid != 0) + --entries; /* hide private dir */ + if (hfsmp->jnl) + entries -= 2; /* hide the journal files */ + } + VATTR_RETURN(vap, va_nchildren, entries); + } + } else { + VATTR_RETURN(vap, va_nlink, (uint64_t)cp->c_nlink); + } - /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) - return (0); + /* conditional because 64-bit arithmetic can be expensive */ + if (VATTR_IS_ACTIVE(vap, va_total_size)) { + if (v_type == VDIR) { + VATTR_RETURN(vap, va_total_size, cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE); + } else { + uint64_t total_size = 0; + struct cnode *rcp; + + if (cp->c_datafork) { + total_size = cp->c_datafork->ff_size; + } - mask = 0; + if (cp->c_blocks - VTOF(vp)->ff_blocks) { + /* hfs_vgetrsrc does not use struct proc - therefore passing NULL */ + error = hfs_vgetrsrc(hfsmp, vp, &rvp, NULL); + if (error) { + goto out; + } + + rcp = VTOC(rvp); + if (rcp && rcp->c_rsrcfork) { + total_size += rcp->c_rsrcfork->ff_size; + } + } - /* Otherwise, check the owner. */ - if ( (cp->c_uid == cred->cr_uid) || (cp->c_uid == UNKNOWNUID) ) { - if (mode & VEXEC) - mask |= S_IXUSR; - if (mode & VREAD) - mask |= S_IRUSR; - if (mode & VWRITE) - mask |= S_IWUSR; - return ((cp->c_mode & mask) == mask ? 0 : EACCES); + VATTR_RETURN(vap, va_total_size, total_size); + /* Include size of attibute data (extents), if any */ + if (cp->c_attrblks) { + vap->va_total_size += (uint64_t)cp->c_attrblks * (uint64_t)hfsmp->blockSize; + } + } } - - /* Otherwise, check the groups. */ - if (! (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS)) { - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (cp->c_gid == *gp) { - if (mode & VEXEC) - mask |= S_IXGRP; - if (mode & VREAD) - mask |= S_IRGRP; - if (mode & VWRITE) - mask |= S_IWGRP; - return ((cp->c_mode & mask) == mask ? 0 : EACCES); + if (VATTR_IS_ACTIVE(vap, va_total_alloc)) { + if (v_type == VDIR) { + VATTR_RETURN(vap, va_total_alloc, 0); + } else { + VATTR_RETURN(vap, va_total_alloc, (uint64_t)cp->c_blocks * (uint64_t)hfsmp->blockSize); + /* Include size of attibute data (extents), if any */ + if (cp->c_attrblks) { + vap->va_total_alloc += (uint64_t)cp->c_attrblks * (uint64_t)hfsmp->blockSize; } + } } + /* XXX broken... if ask for "data size" of rsrc fork vp you get rsrc fork size! */ + if (v_type == VDIR) { + VATTR_RETURN(vap, va_data_size, cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE); + } else { + VATTR_RETURN(vap, va_data_size, VTOF(vp)->ff_size); + } + if (VATTR_IS_ACTIVE(vap, va_data_alloc) && (v_type != VDIR)) { + /* XXX do we need to account for ff_unallocblocks ? */ + VATTR_RETURN(vap, va_data_alloc, (uint64_t)VTOF(vp)->ff_blocks * (uint64_t)hfsmp->blockSize); + } + /* XXX is this really a good 'optimal I/O size'? */ + VATTR_RETURN(vap, va_iosize, hfsmp->hfs_logBlockSize); + VATTR_RETURN(vap, va_uid, cp->c_uid); + VATTR_RETURN(vap, va_gid, cp->c_gid); + VATTR_RETURN(vap, va_mode, cp->c_mode); +#if 0 + /* XXX is S_IFXATTR still needed ??? */ + if (VNODE_IS_RSRC(vp)) + vap->va_mode |= S_IFXATTR; +#endif + VATTR_RETURN(vap, va_flags, cp->c_flags); - /* Otherwise, check everyone else. */ - if (mode & VEXEC) - mask |= S_IXOTH; - if (mode & VREAD) - mask |= S_IROTH; - if (mode & VWRITE) - mask |= S_IWOTH; - return ((cp->c_mode & mask) == mask ? 0 : EACCES); -} - - - -/* -#% getattr vp = = = -# - vop_getattr { - IN struct vnode *vp; - IN struct vattr *vap; - IN struct ucred *cred; - IN struct proc *p; - - */ - - -/* ARGSUSED */ -static int -hfs_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct vattr *vap = ap->a_vap; - struct timeval tv; - - tv = time; - CTIMES(cp, &tv, &tv); - - vap->va_type = vp->v_type; - vap->va_mode = cp->c_mode; - vap->va_nlink = cp->c_nlink; /* - * [2856576] Since we are dynamically changing the owner, also - * effectively turn off the set-user-id and set-group-id bits, - * just like chmod(2) would when changing ownership. This prevents - * a security hole where set-user-id programs run as whoever is - * logged on (or root if nobody is logged in yet!) + * If the VFS wants extended security data, and we know that we + * don't have any (because it never told us it was setting any) + * then we can return the supported bit and no data. If we do + * have extended security, we can just leave the bit alone and + * the VFS will use the fallback path to fetch it. */ - if (cp->c_uid == UNKNOWNUID) { - vap->va_mode &= ~(S_ISUID | S_ISGID); - vap->va_uid = ap->a_cred->cr_uid; - } else { - vap->va_uid = cp->c_uid; + if (VATTR_IS_ACTIVE(vap, va_acl)) { + if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { + vap->va_acl = KAUTH_FILESEC_NONE; + VATTR_SET_SUPPORTED(vap, va_acl); + } } - vap->va_gid = cp->c_gid; - vap->va_fsid = cp->c_dev; + vap->va_create_time.tv_sec = cp->c_itime; + vap->va_create_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_create_time); + + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + /* Access times are lazyily updated, get current time if needed */ + if (cp->c_touch_acctime) { + struct timeval tv; + + microtime(&tv); + vap->va_access_time.tv_sec = tv.tv_sec; + } else { + vap->va_access_time.tv_sec = cp->c_atime; + } + vap->va_access_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_access_time); + } + vap->va_modify_time.tv_sec = cp->c_mtime; + vap->va_modify_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_modify_time); + vap->va_change_time.tv_sec = cp->c_ctime; + vap->va_change_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_change_time); + vap->va_backup_time.tv_sec = cp->c_btime; + vap->va_backup_time.tv_nsec = 0; + VATTR_SET_SUPPORTED(vap, va_backup_time); + /* * Exporting file IDs from HFS Plus: * @@ -549,175 +421,204 @@ hfs_getattr(ap) * c_cnid belongs to the active directory entry (ie the link) * and the c_fileid is for the actual inode (ie the data file). * - * The stat call (getattr) will always return the c_fileid - * and Carbon APIs, which are hardlink-ignorant, will always - * receive the c_cnid (from getattrlist). + * The stat call (getattr) uses va_fileid and the Carbon APIs, + * which are hardlink-ignorant, will ask for va_linkid. */ - vap->va_fileid = cp->c_fileid; - vap->va_atime.tv_sec = cp->c_atime; - vap->va_atime.tv_nsec = 0; - vap->va_mtime.tv_sec = cp->c_mtime; - vap->va_mtime.tv_nsec = cp->c_mtime_nsec; - vap->va_ctime.tv_sec = cp->c_ctime; - vap->va_ctime.tv_nsec = 0; - vap->va_gen = 0; - vap->va_flags = cp->c_flags; - vap->va_rdev = 0; - vap->va_blocksize = VTOVFS(vp)->mnt_stat.f_iosize; - vap->va_filerev = 0; - if (vp->v_type == VDIR) { - vap->va_size = cp->c_nlink * AVERAGE_HFSDIRENTRY_SIZE; - vap->va_bytes = 0; - } else { - vap->va_size = VTOF(vp)->ff_size; - vap->va_bytes = (u_quad_t)cp->c_blocks * - (u_quad_t)VTOVCB(vp)->blockSize; - if (vp->v_type == VBLK || vp->v_type == VCHR) - vap->va_rdev = cp->c_rdev; + VATTR_RETURN(vap, va_fileid, (uint64_t)cp->c_fileid); + VATTR_RETURN(vap, va_linkid, (uint64_t)cp->c_cnid); + VATTR_RETURN(vap, va_parentid, (uint64_t)cp->c_parentcnid); + VATTR_RETURN(vap, va_fsid, cp->c_dev); + VATTR_RETURN(vap, va_filerev, 0); + + VATTR_RETURN(vap, va_encoding, cp->c_encoding); + + /* if this is the root, let VFS to find out the mount name, which may be different from the real name */ + if (VATTR_IS_ACTIVE(vap, va_name) && !vnode_isvroot(vp)) { + /* Return the name for ATTR_CMN_NAME */ + if (cp->c_desc.cd_namelen == 0) { + error = ENOENT; + goto out; + } + + strncpy(vap->va_name, cp->c_desc.cd_nameptr, MAXPATHLEN); + vap->va_name[MAXPATHLEN-1] = '\0'; + VATTR_SET_SUPPORTED(vap, va_name); } - return (0); -} -/* - * Set attribute vnode op. called from several syscalls -#% setattr vp L L L -# - vop_setattr { - IN struct vnode *vp; - IN struct vattr *vap; - IN struct ucred *cred; - IN struct proc *p; - - */ +out: + hfs_unlock(cp); + if (rvp) { + vnode_put(rvp); + } + return (error); +} static int -hfs_setattr(ap) - struct vop_setattr_args /* { +hfs_vnop_setattr(ap) + struct vnop_setattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - struct vattr *vap = ap->a_vap; + struct vnode_attr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; - struct timeval atimeval, mtimeval; - int error; + struct cnode *cp = NULL; + struct hfsmount *hfsmp; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); + int error = 0; + uid_t nuid; + gid_t ngid; - /* - * Check for unsettable attributes. - */ - if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || - (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || - (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || - ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { - return (EINVAL); - } + hfsmp = VTOHFS(vp); - // XXXdbg - // don't allow people to set the attributes of symlinks - // (nfs has a bad habit of doing ths and it can cause - // problems for journaling). - // - if (vp->v_type == VLNK) { - return 0; + /* Don't allow modification of the journal file. */ + if (hfsmp->hfs_jnlfileid == VTOC(vp)->c_fileid) { + return (EPERM); } + /* + * File size change request. + * We are guaranteed that this is not a directory, and that + * the filesystem object is writeable. + */ + VATTR_SET_SUPPORTED(vap, va_data_size); + if (VATTR_IS_ACTIVE(vap, va_data_size) && !vnode_islnk(vp)) { + /* Take truncate lock before taking cnode lock. */ + hfs_lock_truncate(VTOC(vp), TRUE); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(VTOC(vp)); + return (error); + } + cp = VTOC(vp); - if (vap->va_flags != VNOVAL) { - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); - if ((error = hfs_chflags(vp, vap->va_flags, cred, p))) + error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 0, ap->a_context); + + hfs_unlock_truncate(cp); + if (error) + goto out; + } + if (cp == NULL) { + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) return (error); - if (vap->va_flags & (IMMUTABLE | APPEND)) - return (0); + cp = VTOC(vp); } - if (cp->c_flags & (IMMUTABLE | APPEND)) - return (EPERM); + /* + * Owner/group change request. + * We are guaranteed that the new owner/group is valid and legal. + */ + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + nuid = VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uid_t)VNOVAL; + ngid = VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (gid_t)VNOVAL; + if (((nuid != (uid_t)VNOVAL) || (ngid != (gid_t)VNOVAL)) && + ((error = hfs_chown(vp, nuid, ngid, cred, p)) != 0)) + goto out; - // XXXdbg - don't allow modification of the journal or journal_info_block - if (VTOHFS(vp)->jnl && cp->c_datafork) { - struct HFSPlusExtentDescriptor *extd; + /* + * Mode change request. + * We are guaranteed that the mode value is valid and that in + * conjunction with the owner and group, this change is legal. + */ + VATTR_SET_SUPPORTED(vap, va_mode); + if (VATTR_IS_ACTIVE(vap, va_mode) && + ((error = hfs_chmod(vp, (int)vap->va_mode, cred, p)) != 0)) + goto out; - extd = &cp->c_datafork->ff_extents[0]; - if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { - return EPERM; - } - } + /* + * File flags change. + * We are guaranteed that only flags allowed to change given the + * current securelevel are being changed. + */ + VATTR_SET_SUPPORTED(vap, va_flags); + if (VATTR_IS_ACTIVE(vap, va_flags) && + ((error = hfs_chflags(vp, vap->va_flags, cred, p)) != 0)) + goto out; /* - * Go through the fields and update iff not VNOVAL. + * If the file's extended security data is being changed, we + * need to note the change. Note that because we don't store + * the data, we do not set the SUPPORTED bit; this will cause + * the VFS to use a fallback strategy. */ - if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); - if ((error = hfs_chown(vp, vap->va_uid, vap->va_gid, cred, p))) - return (error); - } - if (vap->va_size != VNOVAL) { - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - switch (vp->v_type) { - case VDIR: - return (EISDIR); - case VLNK: - case VREG: - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); - break; - default: - break; + if (VATTR_IS_ACTIVE(vap, va_acl)) { + /* Remember if any ACL data was set or cleared. */ + if (vap->va_acl == NULL) { + /* being cleared */ + if (cp->c_attr.ca_recflags & kHFSHasSecurityMask) { + cp->c_attr.ca_recflags &= ~kHFSHasSecurityMask; + cp->c_touch_chgtime = TRUE; + } + } else { + /* being set */ + if ((cp->c_attr.ca_recflags & kHFSHasSecurityMask) == 0) { + cp->c_attr.ca_recflags |= kHFSHasSecurityMask; + cp->c_touch_chgtime = TRUE; + } } - if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p))) - return (error); } - cp = VTOC(vp); - if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); - if (((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) && - ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || - (error = VOP_ACCESS(vp, VWRITE, cred, p)))) { - return (error); + + /* + * Timestamp updates. + */ + VATTR_SET_SUPPORTED(vap, va_create_time); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + VATTR_SET_SUPPORTED(vap, va_backup_time); + VATTR_SET_SUPPORTED(vap, va_change_time); + if (VATTR_IS_ACTIVE(vap, va_create_time) || + VATTR_IS_ACTIVE(vap, va_access_time) || + VATTR_IS_ACTIVE(vap, va_modify_time) || + VATTR_IS_ACTIVE(vap, va_backup_time)) { + if (vnode_islnk(vp)) + goto done; + if (VATTR_IS_ACTIVE(vap, va_create_time)) + cp->c_itime = vap->va_create_time.tv_sec; + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + cp->c_atime = vap->va_access_time.tv_sec; + cp->c_touch_acctime = FALSE; } - if (vap->va_atime.tv_sec != VNOVAL) - cp->c_flag |= C_ACCESS; - if (vap->va_mtime.tv_sec != VNOVAL) { - cp->c_flag |= C_CHANGE | C_UPDATE; + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + cp->c_mtime = vap->va_modify_time.tv_sec; + cp->c_touch_modtime = FALSE; + cp->c_touch_chgtime = TRUE; + /* * The utimes system call can reset the modification * time but it doesn't know about HFS create times. - * So we need to insure that the creation time is + * So we need to ensure that the creation time is * always at least as old as the modification time. */ if ((VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) && - (cp->c_cnid != kRootDirID) && - (vap->va_mtime.tv_sec < cp->c_itime)) { - cp->c_itime = vap->va_mtime.tv_sec; + (cp->c_cnid != kHFSRootFolderID) && + (cp->c_mtime < cp->c_itime)) { + cp->c_itime = cp->c_mtime; } } - atimeval.tv_sec = vap->va_atime.tv_sec; - atimeval.tv_usec = 0; - mtimeval.tv_sec = vap->va_mtime.tv_sec; - mtimeval.tv_usec = 0; - if ((error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1))) - return (error); + if (VATTR_IS_ACTIVE(vap, va_backup_time)) + cp->c_btime = vap->va_backup_time.tv_sec; + cp->c_flag |= C_MODIFIED; } - error = 0; - if (vap->va_mode != (mode_t)VNOVAL) { - if (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) - return (EROFS); - error = hfs_chmod(vp, (int)vap->va_mode, cred, p); + + /* + * Set name encoding. + */ + VATTR_SET_SUPPORTED(vap, va_encoding); + if (VATTR_IS_ACTIVE(vap, va_encoding)) { + cp->c_encoding = vap->va_encoding; + hfs_setencodingbits(hfsmp, cp->c_encoding); } + +done: + if ((error = hfs_update(vp, TRUE)) != 0) + goto out; HFS_KNOTE(vp, NOTE_ATTRIB); +out: + if (cp) + hfs_unlock(cp); return (error); } @@ -728,11 +629,7 @@ hfs_setattr(ap) */ __private_extern__ int -hfs_chmod(vp, mode, cred, p) - register struct vnode *vp; - register int mode; - register struct ucred *cred; - struct proc *p; +hfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) { register struct cnode *cp = VTOC(vp); int error; @@ -751,40 +648,31 @@ hfs_chmod(vp, mode, cred, p) } #if OVERRIDE_UNKNOWN_PERMISSIONS - if (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) { + if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS) { return (0); }; #endif - if ((error = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) - return (error); - if (cred->cr_uid) { - if (vp->v_type != VDIR && (mode & S_ISTXT)) - return (EFTYPE); - if (!groupmember(cp->c_gid, cred) && (mode & S_ISGID)) - return (EPERM); - } cp->c_mode &= ~ALLPERMS; cp->c_mode |= (mode & ALLPERMS); - cp->c_flag |= C_CHANGE; + cp->c_touch_chgtime = TRUE; return (0); } __private_extern__ int -hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean considerFlags) +hfs_write_access(struct vnode *vp, kauth_cred_t cred, struct proc *p, Boolean considerFlags) { struct cnode *cp = VTOC(vp); - gid_t *gp; int retval = 0; - int i; + int is_member; /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ - switch (vp->v_type) { + switch (vnode_vtype(vp)) { case VDIR: case VLNK: case VREG: @@ -800,7 +688,7 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c return (EPERM); /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) + if (!suser(cred, NULL)) return (0); /* Otherwise, check the owner. */ @@ -808,9 +696,8 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c return ((cp->c_mode & S_IWUSR) == S_IWUSR ? 0 : EACCES); /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) { - if (cp->c_gid == *gp) - return ((cp->c_mode & S_IWGRP) == S_IWGRP ? 0 : EACCES); + if (kauth_cred_ismember_gid(cred, cp->c_gid, &is_member) == 0 && is_member) { + return ((cp->c_mode & S_IWGRP) == S_IWGRP ? 0 : EACCES); } /* Otherwise, check everyone else. */ @@ -825,38 +712,13 @@ hfs_write_access(struct vnode *vp, struct ucred *cred, struct proc *p, Boolean c */ __private_extern__ int -hfs_chflags(vp, flags, cred, p) - register struct vnode *vp; - register u_long flags; - register struct ucred *cred; - struct proc *p; +hfs_chflags(struct vnode *vp, uint32_t flags, __unused kauth_cred_t cred, __unused struct proc *p) { register struct cnode *cp = VTOC(vp); - int retval; - if (VTOVCB(vp)->vcbSigWord == kHFSSigWord) { - if ((retval = hfs_write_access(vp, cred, p, false)) != 0) { - return retval; - }; - } else if ((retval = hfs_owner_rights(VTOHFS(vp), cp->c_uid, cred, p, true)) != 0) { - return retval; - }; - - if (cred->cr_uid == 0) { - if ((cp->c_flags & (SF_IMMUTABLE | SF_APPEND)) && - securelevel > 0) { - return EPERM; - }; - cp->c_flags = flags; - } else { - if (cp->c_flags & (SF_IMMUTABLE | SF_APPEND) || - (flags & UF_SETTABLE) != flags) { - return EPERM; - }; - cp->c_flags &= SF_SETTABLE; - cp->c_flags |= (flags & UF_SETTABLE); - } - cp->c_flag |= C_CHANGE; + cp->c_flags &= SF_SETTABLE; + cp->c_flags |= (flags & UF_SETTABLE); + cp->c_touch_chgtime = TRUE; return (0); } @@ -868,41 +730,42 @@ hfs_chflags(vp, flags, cred, p) */ __private_extern__ int -hfs_chown(vp, uid, gid, cred, p) - register struct vnode *vp; - uid_t uid; - gid_t gid; - struct ucred *cred; - struct proc *p; +hfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, + struct proc *p) { register struct cnode *cp = VTOC(vp); uid_t ouid; gid_t ogid; int error = 0; + int is_member; #if QUOTA register int i; int64_t change; #endif /* QUOTA */ if (VTOVCB(vp)->vcbSigWord != kHFSPlusSigWord) - return (EOPNOTSUPP); + return (ENOTSUP); - if (VTOVFS(vp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) + if (((unsigned int)vfs_flags(VTOVFS(vp))) & MNT_UNKNOWNPERMISSIONS) return (0); if (uid == (uid_t)VNOVAL) uid = cp->c_uid; if (gid == (gid_t)VNOVAL) gid = cp->c_gid; + +#if 0 /* we are guaranteed that this is already the case */ /* * If we don't own the file, are trying to change the owner * of the file, or are not a member of the target group, * the caller must be superuser or the call fails. */ - if ((cred->cr_uid != cp->c_uid || uid != cp->c_uid || - (gid != cp->c_gid && !groupmember((gid_t)gid, cred))) && - (error = suser(cred, &p->p_acflag))) + if ((kauth_cred_getuid(cred) != cp->c_uid || uid != cp->c_uid || + (gid != cp->c_gid && + (kauth_cred_ismember_gid(cred, gid, &is_member) || !is_member))) && + (error = suser(cred, 0))) return (error); +#endif ogid = cp->c_gid; ouid = cp->c_uid; @@ -910,26 +773,26 @@ hfs_chown(vp, uid, gid, cred, p) if ((error = hfs_getinoquota(cp))) return (error); if (ouid == uid) { - dqrele(vp, cp->c_dquot[USRQUOTA]); + dqrele(cp->c_dquot[USRQUOTA]); cp->c_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, cp->c_dquot[GRPQUOTA]); + dqrele(cp->c_dquot[GRPQUOTA]); cp->c_dquot[GRPQUOTA] = NODQUOT; } /* * Eventually need to account for (fake) a block per directory - *if (vp->v_type == VDIR) - *change = VTOVCB(vp)->blockSize; - *else + * if (vnode_isdir(vp)) + * change = VTOHFS(vp)->blockSize; + * else */ change = (int64_t)(cp->c_blocks) * (int64_t)VTOVCB(vp)->blockSize; (void) hfs_chkdq(cp, -change, cred, CHOWN); (void) hfs_chkiq(cp, -1, cred, CHOWN); for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, cp->c_dquot[i]); + dqrele(cp->c_dquot[i]); cp->c_dquot[i] = NODQUOT; } #endif /* QUOTA */ @@ -938,11 +801,11 @@ hfs_chown(vp, uid, gid, cred, p) #if QUOTA if ((error = hfs_getinoquota(cp)) == 0) { if (ouid == uid) { - dqrele(vp, cp->c_dquot[USRQUOTA]); + dqrele(cp->c_dquot[USRQUOTA]); cp->c_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, cp->c_dquot[GRPQUOTA]); + dqrele(cp->c_dquot[GRPQUOTA]); cp->c_dquot[GRPQUOTA] = NODQUOT; } if ((error = hfs_chkdq(cp, change, cred, CHOWN)) == 0) { @@ -952,7 +815,7 @@ hfs_chown(vp, uid, gid, cred, p) (void) hfs_chkdq(cp, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, cp->c_dquot[i]); + dqrele(cp->c_dquot[i]); cp->c_dquot[i] = NODQUOT; } } @@ -960,11 +823,11 @@ hfs_chown(vp, uid, gid, cred, p) cp->c_uid = ouid; if (hfs_getinoquota(cp) == 0) { if (ouid == uid) { - dqrele(vp, cp->c_dquot[USRQUOTA]); + dqrele(cp->c_dquot[USRQUOTA]); cp->c_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, cp->c_dquot[GRPQUOTA]); + dqrele(cp->c_dquot[GRPQUOTA]); cp->c_dquot[GRPQUOTA] = NODQUOT; } (void) hfs_chkdq(cp, change, cred, FORCE|CHOWN); @@ -978,57 +841,59 @@ good: #endif /* QUOTA */ if (ouid != uid || ogid != gid) - cp->c_flag |= C_CHANGE; - if (ouid != uid && cred->cr_uid != 0) - cp->c_mode &= ~S_ISUID; - if (ogid != gid && cred->cr_uid != 0) - cp->c_mode &= ~S_ISGID; + cp->c_touch_chgtime = TRUE; return (0); } /* -# -#% exchange fvp L L L -#% exchange tvp L L L -# + * The hfs_exchange routine swaps the fork data in two files by + * exchanging some of the information in the cnode. It is used + * to preserve the file ID when updating an existing file, in + * case the file is being tracked through its file ID. Typically + * its used after creating a new file during a safe-save. */ - /* - * The hfs_exchange routine swaps the fork data in two files by - * exchanging some of the information in the cnode. It is used - * to preserve the file ID when updating an existing file, in - * case the file is being tracked through its file ID. Typically - * its used after creating a new file during a safe-save. - */ - static int -hfs_exchange(ap) - struct vop_exchange_args /* { +hfs_vnop_exchange(ap) + struct vnop_exchange_args /* { struct vnode *a_fvp; struct vnode *a_tvp; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; } */ *ap; { struct vnode *from_vp = ap->a_fvp; struct vnode *to_vp = ap->a_tvp; - struct cnode *from_cp = VTOC(from_vp); - struct cnode *to_cp = VTOC(to_vp); - struct hfsmount *hfsmp = VTOHFS(from_vp); + struct cnode *from_cp; + struct cnode *to_cp; + struct hfsmount *hfsmp; struct cat_desc tempdesc; struct cat_attr tempattr; - int error = 0, started_tr = 0, grabbed_lock = 0; - cat_cookie_t cookie = {0}; + int lockflags; + int error = 0, started_tr = 0, got_cookie = 0; + cat_cookie_t cookie; /* The files must be on the same volume. */ - if (from_vp->v_mount != to_vp->v_mount) + if (vnode_mount(from_vp) != vnode_mount(to_vp)) return (EXDEV); + if (from_vp == to_vp) + return (EINVAL); + + if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK))) + return (error); + + from_cp = VTOC(from_vp); + to_cp = VTOC(to_vp); + hfsmp = VTOHFS(from_vp); + /* Only normal files can be exchanged. */ - if ((from_vp->v_type != VREG) || (to_vp->v_type != VREG) || + if (!vnode_isreg(from_vp) || !vnode_isreg(to_vp) || (from_cp->c_flag & C_HARDLINK) || (to_cp->c_flag & C_HARDLINK) || - VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) - return (EINVAL); + VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { + error = EINVAL; + goto exit; + } // XXXdbg - don't allow modification of the journal or journal_info_block if (hfsmp->jnl) { @@ -1037,60 +902,58 @@ hfs_exchange(ap) if (from_cp->c_datafork) { extd = &from_cp->c_datafork->ff_extents[0]; if (extd->startBlock == VTOVCB(from_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - return EPERM; + error = EPERM; + goto exit; } } if (to_cp->c_datafork) { extd = &to_cp->c_datafork->ff_extents[0]; if (extd->startBlock == VTOVCB(to_vp)->vcbJinfoBlock || extd->startBlock == hfsmp->jnl_start) { - return EPERM; + error = EPERM; + goto exit; } } } - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - goto Err_Exit; - } - started_tr = 1; + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto exit; } + started_tr = 1; /* * Reserve some space in the Catalog file. */ - if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, ap->a_p))) { - goto Err_Exit; + bzero(&cookie, sizeof(cookie)); + if ((error = cat_preflight(hfsmp, CAT_EXCHANGE, &cookie, vfs_context_proc(ap->a_context)))) { + goto exit; } - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, ap->a_p); - if (error) goto Err_Exit; + got_cookie = 1; /* The backend code always tries to delete the virtual * extent id for exchanging files so we neeed to lock * the extents b-tree. */ - error = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (error) { - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, ap->a_p); - goto Err_Exit; - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); /* Do the exchange */ - error = MacToVFSError(ExchangeFileIDs(HFSTOVCB(hfsmp), - from_cp->c_desc.cd_nameptr, to_cp->c_desc.cd_nameptr, - from_cp->c_parentcnid, to_cp->c_parentcnid, - from_cp->c_hint, to_cp->c_hint)); + error = ExchangeFileIDs(hfsmp, + from_cp->c_desc.cd_nameptr, + to_cp->c_desc.cd_nameptr, + from_cp->c_parentcnid, + to_cp->c_parentcnid, + from_cp->c_hint, + to_cp->c_hint); + hfs_systemfile_unlock(hfsmp, lockflags); - (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, ap->a_p); - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, ap->a_p); + /* + * Note that we don't need to exchange any extended attributes + * since the attributes are keyed by file ID. + */ if (error != E_NONE) { - goto Err_Exit; + error = MacToVFSError(error); + goto exit; } /* Purge the vnodes from the name cache */ @@ -1134,12 +997,8 @@ hfs_exchange(ap) to_cp->c_mode = tempattr.ca_mode; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); - /* Reinsert into the cnode hash under new file IDs*/ - hfs_chashremove(from_cp); - hfs_chashremove(to_cp); - - hfs_chashinsert(from_cp); - hfs_chashinsert(to_cp); + /* Rehash the cnodes using their new file IDs */ + hfs_chash_rehash(from_cp, to_cp); /* * When a file moves out of "Cleanup At Startup" @@ -1148,84 +1007,73 @@ hfs_exchange(ap) if ((from_cp->c_flags & UF_NODUMP) && (from_cp->c_parentcnid != to_cp->c_parentcnid)) { from_cp->c_flags &= ~UF_NODUMP; - from_cp->c_flag |= C_CHANGE; + from_cp->c_touch_chgtime = TRUE; } if ((to_cp->c_flags & UF_NODUMP) && (to_cp->c_parentcnid != from_cp->c_parentcnid)) { to_cp->c_flags &= ~UF_NODUMP; - to_cp->c_flag |= C_CHANGE; + to_cp->c_touch_chgtime = TRUE; } HFS_KNOTE(from_vp, NOTE_ATTRIB); HFS_KNOTE(to_vp, NOTE_ATTRIB); -Err_Exit: - cat_postflight(hfsmp, &cookie, ap->a_p); - - // XXXdbg - if (started_tr) { - journal_end_transaction(hfsmp->jnl); +exit: + if (got_cookie) { + cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context)); } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + if (started_tr) { + hfs_end_transaction(hfsmp); } + hfs_unlockpair(from_cp, to_cp); return (error); } /* - -#% fsync vp L L L -# - vop_fsync { - IN struct vnode *vp; - IN struct ucred *cred; - IN int waitfor; - IN struct proc *p; - - */ -static int -hfs_fsync(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct proc *a_p; - } */ *ap; + * cnode must be locked + */ +__private_extern__ +int +hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) { - struct vnode *vp = ap->a_vp; struct cnode *cp = VTOC(vp); struct filefork *fp = NULL; int retval = 0; - register struct buf *bp; + struct hfsmount *hfsmp = VTOHFS(vp); struct timeval tv; - struct buf *nbp; - struct hfsmount *hfsmp = VTOHFS(ap->a_vp); - int s; int wait; - int retry = 0; + int lockflag; + int took_trunc_lock = 0; - wait = (ap->a_waitfor == MNT_WAIT); + wait = (waitfor == MNT_WAIT); /* HFS directories don't have any data blocks. */ - if (vp->v_type == VDIR) + if (vnode_isdir(vp)) goto metasync; /* * For system files flush the B-tree header and * for regular files write out any clusters */ - if (vp->v_flag & VSYSTEM) { + if (vnode_issystem(vp)) { if (VTOF(vp)->fcbBTCBPtr != NULL) { // XXXdbg if (hfsmp->jnl == NULL) { BTFlushPath(VTOF(vp)); } } - } else if (UBCINFOEXISTS(vp)) - (void) cluster_push(vp); + } else if (UBCINFOEXISTS(vp)) { + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + took_trunc_lock = 1; + /* Don't hold cnode lock when calling into cluster layer. */ + (void) cluster_push(vp, 0); + + hfs_lock(cp, HFS_FORCE_LOCK); + } /* * When MNT_WAIT is requested and the zero fill timeout * has expired then we must explicitly zero out any areas @@ -1237,17 +1085,18 @@ hfs_fsync(ap) ((cp->c_flags & UF_NODUMP) == 0) && UBCINFOEXISTS(vp) && (fp = VTOF(vp)) && cp->c_zftimeout != 0) { - int devblksize; - int was_nocache; - - if (time.tv_sec < cp->c_zftimeout) { + microuptime(&tv); + if (tv.tv_sec < cp->c_zftimeout) { /* Remember that a force sync was requested. */ cp->c_flag |= C_ZFWANTSYNC; - goto loop; - } - VOP_DEVBLOCKSIZE(cp->c_devvp, &devblksize); - was_nocache = ISSET(vp->v_flag, VNOCACHE_DATA); - SET(vp->v_flag, VNOCACHE_DATA); /* Don't cache zeros */ + goto datasync; + } + if (!took_trunc_lock) { + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + hfs_lock(cp, HFS_FORCE_LOCK); + took_trunc_lock = 1; + } while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); @@ -1255,125 +1104,78 @@ hfs_fsync(ap) off_t end = invalid_range->rl_end; /* The range about to be written must be validated - * first, so that VOP_CMAP() will return the + * first, so that VNOP_BLOCKMAP() will return the * appropriate mapping for the cluster code: */ rl_remove(start, end, &fp->ff_invalidranges); + /* Don't hold cnode lock when calling into cluster layer. */ + hfs_unlock(cp); (void) cluster_write(vp, (struct uio *) 0, - fp->ff_size, - invalid_range->rl_end + 1, - invalid_range->rl_start, - (off_t)0, devblksize, - IO_HEADZEROFILL | IO_NOZERODIRTY); + fp->ff_size, end + 1, start, (off_t)0, + IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + hfs_lock(cp, HFS_FORCE_LOCK); cp->c_flag |= C_MODIFIED; } - (void) cluster_push(vp); - if (!was_nocache) - CLR(vp->v_flag, VNOCACHE_DATA); + hfs_unlock(cp); + (void) cluster_push(vp, 0); + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_flag &= ~C_ZFWANTSYNC; cp->c_zftimeout = 0; } +datasync: + if (took_trunc_lock) + hfs_unlock_truncate(cp); + + /* + * if we have a journal and if journal_active() returns != 0 then the + * we shouldn't do anything to a locked block (because it is part + * of a transaction). otherwise we'll just go through the normal + * code path and flush the buffer. note journal_active() can return + * -1 if the journal is invalid -- however we still need to skip any + * locked blocks as they get cleaned up when we finish the transaction + * or close the journal. + */ + // if (hfsmp->jnl && journal_active(hfsmp->jnl) >= 0) + if (hfsmp->jnl) + lockflag = BUF_SKIP_LOCKED; + else + lockflag = 0; /* * Flush all dirty buffers associated with a vnode. */ -loop: - s = splbio(); - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("hfs_fsync: bp 0x% not dirty (hfsmp 0x%x)", bp, hfsmp); - // XXXdbg - if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) { - if ((bp->b_flags & B_META) == 0) { - panic("hfs: bp @ 0x%x is locked but not meta! jnl 0x%x\n", - bp, hfsmp->jnl); - } - // if journal_active() returns >= 0 then the journal is ok and we - // shouldn't do anything to this locked block (because it is part - // of a transaction). otherwise we'll just go through the normal - // code path and flush the buffer. - if (journal_active(hfsmp->jnl) >= 0) { - continue; - } - } - - bremfree(bp); - bp->b_flags |= B_BUSY; - /* Clear B_LOCKED, should only be set on meta files */ - bp->b_flags &= ~B_LOCKED; - - splx(s); - /* - * Wait for I/O associated with indirect blocks to complete, - * since there is no way to quickly wait for them below. - */ - if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) - (void) bawrite(bp); - else - (void) VOP_BWRITE(bp); - goto loop; - } - - if (wait) { - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "hfs_fsync", 0); - } - - // XXXdbg -- is checking for hfsmp->jnl == NULL the right - // thing to do? - if (hfsmp->jnl == NULL && vp->v_dirtyblkhd.lh_first) { - /* still have some dirty buffers */ - if (retry++ > 10) { - vprint("hfs_fsync: dirty", vp); - splx(s); - /* - * Looks like the requests are not - * getting queued to the driver. - * Retrying here causes a cpu bound loop. - * Yield to the other threads and hope - * for the best. - */ - (void)tsleep((caddr_t)&vp->v_numoutput, - PRIBIO + 1, "hfs_fsync", hz/10); - retry = 0; - } else { - splx(s); - } - /* try again */ - goto loop; - } - } - splx(s); + buf_flushdirtyblks(vp, wait, lockflag, "hfs_fsync"); metasync: - tv = time; - if (vp->v_flag & VSYSTEM) { - if (VTOF(vp)->fcbBTCBPtr != NULL) + if (vnode_isreg(vp) && vnode_issystem(vp)) { + if (VTOF(vp)->fcbBTCBPtr != NULL) { + microuptime(&tv); BTSetLastSync(VTOF(vp), tv.tv_sec); - cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE); + } + cp->c_touch_acctime = FALSE; + cp->c_touch_chgtime = FALSE; + cp->c_touch_modtime = FALSE; } else /* User file */ { - retval = VOP_UPDATE(ap->a_vp, &tv, &tv, wait); + retval = hfs_update(vp, wait); /* When MNT_WAIT is requested push out any delayed meta data */ if ((retval == 0) && wait && cp->c_hint && !ISSET(cp->c_flag, C_DELETED | C_NOEXISTS)) { - hfs_metasync(VTOHFS(vp), cp->c_hint, ap->a_p); + hfs_metasync(VTOHFS(vp), (daddr64_t)cp->c_hint, p); } // make sure that we've really been called from the user // fsync() and if so push out any pending transactions // that this file might is a part of (and get them on // stable storage). - if (vp->v_flag & VFULLFSYNC) { + if (fullsync) { if (hfsmp->jnl) { journal_flush(hfsmp->jnl); } else { - VOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NOCRED, ap->a_p); + /* XXX need to pass context! */ + VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); } } } @@ -1381,14 +1183,14 @@ metasync: return (retval); } + /* Sync an hfs catalog b-tree node */ static int -hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p) +hfs_metasync(struct hfsmount *hfsmp, daddr64_t node, struct proc *p) { - struct vnode *vp; - struct buf *bp; - struct buf *nbp; - int s; + vnode_t vp; + buf_t bp; + int lockflags; vp = HFSTOVCB(hfsmp)->catalogRefNum; @@ -1397,168 +1199,143 @@ hfs_metasync(struct hfsmount *hfsmp, daddr_t node, struct proc *p) return 0; } - if (hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p) != 0) - return (0); - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); /* * Look for a matching node that has been delayed * but is not part of a set (B_LOCKED). + * + * BLK_ONLYVALID causes buf_getblk to return a + * buf_t for the daddr64_t specified only if it's + * currently resident in the cache... the size + * parameter to buf_getblk is ignored when this flag + * is set */ - s = splbio(); - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if (bp->b_flags & B_BUSY) - continue; - if (bp->b_lblkno == node) { - if (bp->b_flags & B_LOCKED) - break; - - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - (void) VOP_BWRITE(bp); - goto exit; - } + bp = buf_getblk(vp, node, 0, 0, 0, BLK_META | BLK_ONLYVALID); + + if (bp) { + if ((buf_flags(bp) & (B_LOCKED | B_DELWRI)) == B_DELWRI) + (void) VNOP_BWRITE(bp); + else + buf_brelse(bp); } - splx(s); -exit: - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + + hfs_systemfile_unlock(hfsmp, lockflags); return (0); } + +/*ARGSUSED 1*/ +static int +hfs_btsync_callback(struct buf *bp, void *dummy) +{ + buf_clearflags(bp, B_LOCKED); + (void) buf_bawrite(bp); + + return(BUF_CLAIMED); +} + + __private_extern__ int hfs_btsync(struct vnode *vp, int sync_transaction) { struct cnode *cp = VTOC(vp); - register struct buf *bp; struct timeval tv; - struct buf *nbp; - struct hfsmount *hfsmp = VTOHFS(vp); - int s; + int flags = 0; + if (sync_transaction) + flags |= BUF_SKIP_NONLOCKED; /* * Flush all dirty buffers associated with b-tree. */ -loop: - s = splbio(); - - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("hfs_btsync: not dirty (bp 0x%x hfsmp 0x%x)", bp, hfsmp); - - // XXXdbg - if (hfsmp->jnl && (bp->b_flags & B_LOCKED)) { - if ((bp->b_flags & B_META) == 0) { - panic("hfs: bp @ 0x%x is locked but not meta! jnl 0x%x\n", - bp, hfsmp->jnl); - } - // if journal_active() returns >= 0 then the journal is ok and we - // shouldn't do anything to this locked block (because it is part - // of a transaction). otherwise we'll just go through the normal - // code path and flush the buffer. - if (journal_active(hfsmp->jnl) >= 0) { - continue; - } - } - - if (sync_transaction && !(bp->b_flags & B_LOCKED)) - continue; - - bremfree(bp); - bp->b_flags |= B_BUSY; - bp->b_flags &= ~B_LOCKED; + buf_iterate(vp, hfs_btsync_callback, flags, 0); - splx(s); - - (void) bawrite(bp); - - goto loop; - } - splx(s); - - tv = time; - if ((vp->v_flag & VSYSTEM) && (VTOF(vp)->fcbBTCBPtr != NULL)) + microuptime(&tv); + if (vnode_issystem(vp) && (VTOF(vp)->fcbBTCBPtr != NULL)) (void) BTSetLastSync(VTOF(vp), tv.tv_sec); - cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE); + cp->c_touch_acctime = FALSE; + cp->c_touch_chgtime = FALSE; + cp->c_touch_modtime = FALSE; return 0; } /* - * Rmdir system call. -#% rmdir dvp L U U -#% rmdir vp L U U -# - vop_rmdir { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; - IN struct componentname *cnp; - + * Remove a directory. */ static int -hfs_rmdir(ap) - struct vop_rmdir_args /* { +hfs_vnop_rmdir(ap) + struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - return (hfs_removedir(ap->a_dvp, ap->a_vp, ap->a_cnp, 0)); + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + int error; + + if (!vnode_isdir(vp)) { + return (ENOTDIR); + } + if (dvp == vp) { + return (EINVAL); + } + if ((error = hfs_lockpair(VTOC(dvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); + + error = hfs_removedir(dvp, vp, ap->a_cnp, 0); + + hfs_unlockpair(VTOC(dvp), VTOC(vp)); + + return (error); } /* - * hfs_removedir + * Remove a directory + * + * Both dvp and vp cnodes are locked */ static int -hfs_removedir(dvp, vp, cnp, options) - struct vnode *dvp; - struct vnode *vp; - struct componentname *cnp; - int options; +hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, + int skip_reserve) { - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); struct cnode *cp; struct cnode *dcp; struct hfsmount * hfsmp; - struct timeval tv; - cat_cookie_t cookie = {0}; - int error = 0, started_tr = 0, grabbed_lock = 0; + struct cat_desc desc; + cat_cookie_t cookie; + int lockflags; + int error = 0, started_tr = 0, got_cookie = 0; cp = VTOC(vp); dcp = VTOC(dvp); hfsmp = VTOHFS(vp); - if (dcp == cp) { - vrele(dvp); - vput(vp); + if (dcp == cp) return (EINVAL); /* cannot remove "." */ - } #if QUOTA (void)hfs_getinoquota(cp); #endif - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - goto out; - } - started_tr = 1; + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto out; } + started_tr = 1; - if (!(options & HFSRM_SKIP_RESERVE)) { + if (!skip_reserve) { /* * Reserve some space in the Catalog file. */ + bzero(&cookie, sizeof(cookie)); if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { goto out; } + got_cookie = 1; } /* @@ -1577,21 +1354,34 @@ hfs_removedir(dvp, vp, cnp, options) goto out; } + if (cp->c_entries > 0) + panic("hfs_rmdir: attempting to delete a non-empty directory!"); + /* Remove the entry from the namei cache: */ cache_purge(vp); - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) goto out; + /* + * Protect against a race with rename by using the component + * name passed in and parent id from dvp (instead of using + * the cp->c_desc which may have changed). + */ + bzero(&desc, sizeof(desc)); + desc.cd_nameptr = cnp->cn_nameptr; + desc.cd_namelen = cnp->cn_namelen; + desc.cd_parentcnid = dcp->c_cnid; + desc.cd_cnid = cp->c_cnid; - if (cp->c_entries > 0) - panic("hfs_rmdir: attempting to delete a non-empty directory!"); /* Remove entry from catalog */ - error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + error = cat_delete(hfsmp, &desc, &cp->c_attr); + if (error == 0) { + /* Delete any attributes, ignore errors */ + (void) hfs_removeallattr(hfsmp, cp->c_fileid); + } + hfs_systemfile_unlock(hfsmp, lockflags); - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (error) goto out; + if (error) + goto out; #if QUOTA (void)hfs_chkiq(cp, -1, NOCRED, 0); @@ -1602,9 +1392,12 @@ hfs_removedir(dvp, vp, cnp, options) dcp->c_entries--; if (dcp->c_nlink > 0) dcp->c_nlink--; - dcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(dvp, &tv, &tv, 0); + dcp->c_touch_chgtime = TRUE; + dcp->c_touch_modtime = TRUE; + + dcp->c_flag |= C_FORCEUPDATE; // XXXdbg - don't screw around, force this guy out + + (void) hfs_update(dvp, 0); HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID)); @@ -1612,106 +1405,129 @@ hfs_removedir(dvp, vp, cnp, options) cp->c_mode = 0; /* Makes the vnode go away...see inactive */ cp->c_flag |= C_NOEXISTS; out: - if (!(options & HFSRM_PARENT_LOCKED)) { - vput(dvp); - } HFS_KNOTE(vp, NOTE_DELETE); - vput(vp); - if (!(options & HFSRM_SKIP_RESERVE)) { + if (got_cookie) { cat_postflight(hfsmp, &cookie, p); } - // XXXdbg if (started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); } return (error); } -/* - -#% remove dvp L U U -#% remove vp L U U -# - vop_remove { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; - IN struct componentname *cnp; - - */ +/* + * Remove a file or link. + */ static int -hfs_remove(ap) - struct vop_remove_args /* { +hfs_vnop_remove(ap) + struct vnop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; } */ *ap; { - return (hfs_removefile(ap->a_dvp, ap->a_vp, ap->a_cnp, 0)); + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + int error; + + if (dvp == vp) { + return (EINVAL); + } + + hfs_lock_truncate(VTOC(vp), TRUE); + + if ((error = hfs_lockpair(VTOC(dvp), VTOC(vp), HFS_EXCLUSIVE_LOCK))) + goto out; + + error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0); + + hfs_unlockpair(VTOC(dvp), VTOC(vp)); +out: + hfs_unlock_truncate(VTOC(vp)); + return (error); } +static int +hfs_removefile_callback(struct buf *bp, void *hfsmp) { + + if ( !(buf_flags(bp) & B_META)) + panic("hfs: symlink bp @ 0x%x is not marked meta-data!\n", bp); + /* + * it's part of the current transaction, kill it. + */ + journal_kill_block(((struct hfsmount *)hfsmp)->jnl, bp); + + return (BUF_CLAIMED); +} /* * hfs_removefile * - * Similar to hfs_remove except there are additional options. + * Similar to hfs_vnop_remove except there are additional options. + * + * Requires cnode and truncate locks to be held. */ static int -hfs_removefile(dvp, vp, cnp, options) - struct vnode *dvp; - struct vnode *vp; - struct componentname *cnp; - int options; +hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, + int flags, int skip_reserve) { struct vnode *rvp = NULL; struct cnode *cp; struct cnode *dcp; struct hfsmount *hfsmp; - struct proc *p = cnp->cn_proc; + struct cat_desc desc; + struct timeval tv; + vfs_context_t ctx = cnp->cn_context; int dataforkbusy = 0; int rsrcforkbusy = 0; int truncated = 0; - struct timeval tv; - cat_cookie_t cookie = {0}; + cat_cookie_t cookie; + int lockflags; int error = 0; - int started_tr = 0, grabbed_lock = 0; - int refcount, isbigfile = 0; + int started_tr = 0, got_cookie = 0; + int isbigfile = 0; + cnid_t real_cnid = 0; /* Directories should call hfs_rmdir! */ - if (vp->v_type == VDIR) { - error = EISDIR; - goto out; + if (vnode_isdir(vp)) { + return (EISDIR); } cp = VTOC(vp); dcp = VTOC(dvp); hfsmp = VTOHFS(vp); + + if (cp->c_flag & (C_NOEXISTS | C_DELETED)) { + return 0; + } - if (cp->c_parentcnid != dcp->c_cnid) { + // if it's not a hardlink, check that the parent + // cnid is the same as the directory cnid + if ( (cp->c_flag & C_HARDLINK) == 0 + && (cp->c_parentcnid != hfsmp->hfs_privdir_desc.cd_cnid) + && (cp->c_parentcnid != dcp->c_cnid)) { error = EINVAL; goto out; } /* Make sure a remove is permitted */ - if ((cp->c_flags & (IMMUTABLE | APPEND)) || - (VTOC(dvp)->c_flags & APPEND) || - VNODE_IS_RSRC(vp)) { + if (VNODE_IS_RSRC(vp)) { error = EPERM; goto out; } /* * Aquire a vnode for a non-empty resource fork. - * (needed for VOP_TRUNCATE) + * (needed for hfs_truncate) */ if (cp->c_blocks - VTOF(vp)->ff_blocks) { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, p); + error = hfs_vgetrsrc(hfsmp, vp, &rvp, 0); if (error) goto out; } @@ -1729,19 +1545,10 @@ hfs_removefile(dvp, vp, cnp, options) /* * Check if this file is being used. - * - * The namei done for the remove took a reference on the - * vnode (vp). And we took a ref on the resource vnode (rvp). - * Hence set 1 in the tookref parameter of ubc_isinuse(). */ - if (VTOC(vp)->c_flag & C_VPREFHELD) { - refcount = 2; - } else { - refcount = 1; - } - if (UBCISVALID(vp) && ubc_isinuse(vp, refcount)) + if (vnode_isinuse(vp, 0)) dataforkbusy = 1; - if (rvp && UBCISVALID(rvp) && ubc_isinuse(rvp, 1)) + if (rvp && vnode_isinuse(rvp, 0)) rsrcforkbusy = 1; // need this to check if we have to break the deletion @@ -1750,96 +1557,134 @@ hfs_removefile(dvp, vp, cnp, options) /* * Carbon semantics prohibit deleting busy files. - * (enforced when NODELETEBUSY is requested) + * (enforced when VNODE_REMOVE_NODELETEBUSY is requested) */ - if ((dataforkbusy || rsrcforkbusy) && - ((cnp->cn_flags & NODELETEBUSY) || - (hfsmp->hfs_privdir_desc.cd_cnid == 0))) { - error = EBUSY; - goto out; + if (dataforkbusy || rsrcforkbusy) { + if ((flags & VNODE_REMOVE_NODELETEBUSY) || + (hfsmp->hfs_privdir_desc.cd_cnid == 0)) { + error = EBUSY; + goto out; + } } #if QUOTA (void)hfs_getinoquota(cp); #endif /* QUOTA */ - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - goto out; + /* + * We do the ubc_setsize before the hfs_truncate + * since we'll be inside a transaction. + */ + if ((cp->c_flag & C_HARDLINK) == 0 && + (!dataforkbusy || !rsrcforkbusy)) { + /* + * A ubc_setsize can cause a pagein here + * so we need to the drop cnode lock. Note + * that we still hold the truncate lock. + */ + hfs_unlock(cp); + if (!dataforkbusy && cp->c_datafork->ff_blocks && !isbigfile) { + ubc_setsize(vp, 0); + } + if (!rsrcforkbusy && rvp) { + ubc_setsize(rvp, 0); + } + hfs_lock(cp, HFS_FORCE_LOCK); + } else { + struct cat_desc cndesc; + + // for hard links, re-lookup the name that was passed + // in so we get the correct cnid for the name (as + // opposed to the c_cnid in the cnode which could have + // been changed before this node got locked). + bzero(&cndesc, sizeof(cndesc)); + cndesc.cd_nameptr = cnp->cn_nameptr; + cndesc.cd_namelen = cnp->cn_namelen; + cndesc.cd_parentcnid = VTOC(dvp)->c_cnid; + cndesc.cd_hint = VTOC(dvp)->c_childhint; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + if (cat_lookup(hfsmp, &cndesc, 0, NULL, NULL, NULL, &real_cnid) != 0) { + hfs_systemfile_unlock(hfsmp, lockflags); + error = ENOENT; + goto out; } - started_tr = 1; + + hfs_systemfile_unlock(hfsmp, lockflags); + } + + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto out; } + started_tr = 1; - if (!(options & HFSRM_SKIP_RESERVE)) { + if (!skip_reserve) { /* * Reserve some space in the Catalog file. */ - if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, p))) { + if ((error = cat_preflight(hfsmp, CAT_DELETE, &cookie, 0))) { goto out; } + got_cookie = 1; } /* Remove our entry from the namei cache. */ cache_purge(vp); // XXXdbg - if we're journaled, kill any dirty symlink buffers - if (hfsmp->jnl && vp->v_type == VLNK && vp->v_dirtyblkhd.lh_first) { - struct buf *bp, *nbp; - - recheck: - for (bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) { - nbp = bp->b_vnbufs.le_next; - - if ((bp->b_flags & B_BUSY)) { - // if it was busy, someone else must be dealing - // with it so just move on. - continue; - } - - if (!(bp->b_flags & B_META)) { - panic("hfs: symlink bp @ 0x%x is not marked meta-data!\n", bp); - } - - // if it's part of the current transaction, kill it. - if (bp->b_flags & B_LOCKED) { - bremfree(bp); - bp->b_flags |= B_BUSY; - journal_kill_block(hfsmp->jnl, bp); - goto recheck; - } - } - } - // XXXdbg + if (hfsmp->jnl && vnode_islnk(vp)) + buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); /* * Truncate any non-busy forks. Busy forks will * get trucated when their vnode goes inactive. * + * Since we're already inside a transaction, + * tell hfs_truncate to skip the ubc_setsize. + * * (Note: hard links are truncated in VOP_INACTIVE) */ if ((cp->c_flag & C_HARDLINK) == 0) { int mode = cp->c_mode; if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) { - cp->c_mode = 0; /* Suppress VOP_UPDATES */ - error = VOP_TRUNCATE(vp, (off_t)0, IO_NDELAY, NOCRED, p); + cp->c_mode = 0; /* Suppress hfs_update */ + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ctx); cp->c_mode = mode; if (error) goto out; truncated = 1; } if (!rsrcforkbusy && rvp) { - cp->c_mode = 0; /* Suppress VOP_UPDATES */ - error = VOP_TRUNCATE(rvp, (off_t)0, IO_NDELAY, NOCRED, p); + cp->c_mode = 0; /* Suppress hfs_update */ + error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ctx); cp->c_mode = mode; if (error) goto out; truncated = 1; } } + + /* + * Protect against a race with rename by using the component + * name passed in and parent id from dvp (instead of using + * the cp->c_desc which may have changed). + */ + desc.cd_flags = 0; + desc.cd_encoding = cp->c_desc.cd_encoding; + desc.cd_nameptr = cnp->cn_nameptr; + desc.cd_namelen = cnp->cn_namelen; + desc.cd_parentcnid = dcp->c_cnid; + desc.cd_hint = cp->c_desc.cd_hint; + if (real_cnid) { + // if it was a hardlink we had to re-lookup the cnid + desc.cd_cnid = real_cnid; + } else { + desc.cd_cnid = cp->c_cnid; + } + microtime(&tv); + /* * There are 3 remove cases to consider: * 1. File is a hardlink ==> remove the link @@ -1848,73 +1693,70 @@ hfs_removefile(dvp, vp, cnp, options) */ if (cp->c_flag & C_HARDLINK) { - struct cat_desc desc; - - if ((cnp->cn_flags & HASBUF) == 0 || - cnp->cn_nameptr[0] == '\0') { - error = ENOENT; /* name missing! */ - goto out; - } - - /* Setup a descriptor for the link */ - bzero(&desc, sizeof(desc)); - desc.cd_nameptr = cnp->cn_nameptr; - desc.cd_namelen = cnp->cn_namelen; - desc.cd_parentcnid = dcp->c_cnid; - /* XXX - if cnid is out of sync then the wrong thread rec will get deleted. */ - desc.cd_cnid = cp->c_cnid; - - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) - goto out; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); /* Delete the link record */ error = cat_delete(hfsmp, &desc, &cp->c_attr); + if (error == 0) { + /* Update the parent directory */ + if (dcp->c_entries > 0) + dcp->c_entries--; + if (dcp->c_nlink > 0) + dcp->c_nlink--; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void ) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + + if (--cp->c_nlink < 1) { + char inodename[32]; + char delname[32]; + struct cat_desc to_desc; + struct cat_desc from_desc; - if ((error == 0) && (--cp->c_nlink < 1)) { - char inodename[32]; - char delname[32]; - struct cat_desc to_desc; - struct cat_desc from_desc; - - /* - * This is now esentially an open deleted file. - * Rename it to reflect this state which makes - * orphan file cleanup easier (see hfs_remove_orphans). - * Note: a rename failure here is not fatal. - */ - MAKE_INODE_NAME(inodename, cp->c_rdev); - bzero(&from_desc, sizeof(from_desc)); - from_desc.cd_nameptr = inodename; - from_desc.cd_namelen = strlen(inodename); - from_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; - from_desc.cd_flags = 0; - from_desc.cd_cnid = cp->c_fileid; - - MAKE_DELETED_NAME(delname, cp->c_fileid); - bzero(&to_desc, sizeof(to_desc)); - to_desc.cd_nameptr = delname; - to_desc.cd_namelen = strlen(delname); - to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; - to_desc.cd_flags = 0; - to_desc.cd_cnid = cp->c_fileid; + /* + * This is now esentially an open deleted file. + * Rename it to reflect this state which makes + * orphan file cleanup easier (see hfs_remove_orphans). + * Note: a rename failure here is not fatal. + */ + MAKE_INODE_NAME(inodename, cp->c_rdev); + bzero(&from_desc, sizeof(from_desc)); + from_desc.cd_nameptr = inodename; + from_desc.cd_namelen = strlen(inodename); + from_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; + from_desc.cd_flags = 0; + from_desc.cd_cnid = cp->c_fileid; + + MAKE_DELETED_NAME(delname, cp->c_fileid); + bzero(&to_desc, sizeof(to_desc)); + to_desc.cd_nameptr = delname; + to_desc.cd_namelen = strlen(delname); + to_desc.cd_parentcnid = hfsmp->hfs_privdir_desc.cd_cnid; + to_desc.cd_flags = 0; + to_desc.cd_cnid = cp->c_fileid; - (void) cat_rename(hfsmp, &from_desc, &hfsmp->hfs_privdir_desc, - &to_desc, (struct cat_desc *)NULL); - cp->c_flag |= C_DELETED; + error = cat_rename(hfsmp, &from_desc, &hfsmp->hfs_privdir_desc, + &to_desc, (struct cat_desc *)NULL); + if (error != 0) { + panic("hfs_removefile: error %d from cat_rename(%s %s) cp 0x%x\n", + inodename, delname, cp); + } + if (error == 0) { + /* Update the file's state */ + cp->c_flag |= C_DELETED; + cp->c_ctime = tv.tv_sec; + (void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL); + } + } else { + /* Update the file's state */ + cp->c_ctime = tv.tv_sec; + (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + } } - - /* Unlock the Catalog */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - + hfs_systemfile_unlock(hfsmp, lockflags); if (error != 0) goto out; - cp->c_flag |= C_CHANGE; - tv = time; - (void) VOP_UPDATE(vp, &tv, &tv, 0); - hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); } else if (dataforkbusy || rsrcforkbusy || isbigfile) { @@ -1936,49 +1778,47 @@ hfs_removefile(dvp, vp, cnp, options) to_desc.cd_flags = 0; to_desc.cd_cnid = cp->c_cnid; - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) - goto out; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); - error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, + error = cat_rename(hfsmp, &desc, &todir_desc, &to_desc, (struct cat_desc *)NULL); - // XXXdbg - only bump this count if we were successful if (error == 0) { hfsmp->hfs_privdir_attr.ca_entries++; + (void) cat_update(hfsmp, &hfsmp->hfs_privdir_desc, + &hfsmp->hfs_privdir_attr, NULL, NULL); + + /* Update the parent directory */ + if (dcp->c_entries > 0) + dcp->c_entries--; + if (dcp->c_nlink > 0) + dcp->c_nlink--; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + + /* Update the file's state */ + cp->c_flag |= C_DELETED; + cp->c_ctime = tv.tv_sec; + --cp->c_nlink; + (void) cat_update(hfsmp, &to_desc, &cp->c_attr, NULL, NULL); } - (void)cat_update(hfsmp, &hfsmp->hfs_privdir_desc, - &hfsmp->hfs_privdir_attr, NULL, NULL); - - /* Unlock the Catalog */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (error) goto out; - - cp->c_flag |= C_CHANGE | C_DELETED | C_NOEXISTS; - --cp->c_nlink; - tv = time; - (void) VOP_UPDATE(vp, &tv, &tv, 0); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) + goto out; } else /* Not busy */ { if (cp->c_blocks > 0) { -#if 0 - panic("hfs_remove: attempting to delete a non-empty file!"); -#else printf("hfs_remove: attempting to delete a non-empty file %s\n", cp->c_desc.cd_nameptr); error = EBUSY; goto out; -#endif } - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) - goto out; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); - error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); + error = cat_delete(hfsmp, &desc, &cp->c_attr); if (error && error != ENXIO && error != ENOENT && truncated) { if ((cp->c_datafork && cp->c_datafork->ff_size != 0) || @@ -1990,10 +1830,22 @@ hfs_removefile(dvp, vp, cnp, options) cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); } } - - /* Unlock the Catalog */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - if (error) goto out; + if (error == 0) { + /* Delete any attributes, ignore errors */ + (void) hfs_removeallattr(hfsmp, cp->c_fileid); + + /* Update the parent directory */ + if (dcp->c_entries > 0) + dcp->c_entries--; + if (dcp->c_nlink > 0) + dcp->c_nlink--; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + } + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) + goto out; #if QUOTA (void)hfs_chkiq(cp, -1, NOCRED, 0); @@ -2001,8 +1853,10 @@ hfs_removefile(dvp, vp, cnp, options) cp->c_mode = 0; truncated = 0; // because the catalog entry is gone - cp->c_flag |= C_CHANGE | C_NOEXISTS; + cp->c_flag |= C_NOEXISTS; + cp->c_touch_chgtime = TRUE; /* XXX needed ? */ --cp->c_nlink; + hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); } @@ -2016,66 +1870,32 @@ hfs_removefile(dvp, vp, cnp, options) */ cat_releasedesc(&cp->c_desc); - /* In all three cases the parent lost a child */ - if (dcp->c_entries > 0) - dcp->c_entries--; - if (dcp->c_nlink > 0) - dcp->c_nlink--; - dcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(dvp, &tv, &tv, 0); HFS_KNOTE(dvp, NOTE_WRITE); out: - /* All done with component name... */ - if ((options & HFSRM_SAVE_NAME) == 0 && - (cnp != 0) && - (cnp->cn_flags & (HASBUF | SAVENAME)) == (HASBUF | SAVENAME)) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - - if (!(options & HFSRM_SKIP_RESERVE)) { - cat_postflight(hfsmp, &cookie, p); + if (got_cookie) { + cat_postflight(hfsmp, &cookie, 0); } /* Commit the truncation to the catalog record */ if (truncated) { - cp->c_flag |= C_CHANGE | C_UPDATE | C_FORCEUPDATE; - tv = time; - (void) VOP_UPDATE(vp, &tv, &tv, 0); + cp->c_flag |= C_FORCEUPDATE; + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + (void) hfs_update(vp, 0); } - // XXXdbg if (started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); } HFS_KNOTE(vp, NOTE_DELETE); if (rvp) { HFS_KNOTE(rvp, NOTE_DELETE); - vrele(rvp); + /* Defer the vnode_put on rvp until the hfs_unlock(). */ + cp->c_flag |= C_NEED_RVNODE_PUT; }; - if (error) { - vput(vp); - } else { - VOP_UNLOCK(vp, 0, p); - // XXXdbg - try to prevent the lost ubc_info panic - if ((cp->c_flag & C_HARDLINK) == 0 || cp->c_nlink == 0) { - (void) ubc_uncache(vp); - } - vrele(vp); - } - if (!(options & HFSRM_PARENT_LOCKED)) { - vput(dvp); - } - return (error); } @@ -2090,7 +1910,7 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp) cp->c_desc.cd_nameptr = 0; cp->c_desc.cd_namelen = 0; cp->c_desc.cd_flags &= ~CD_HASBUF; - remove_name(name); + vfs_removename(name); } bcopy(cdp, &cp->c_desc, sizeof(cp->c_desc)); @@ -2101,36 +1921,26 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp) } -/* -# -#% rename fdvp U U U -#% rename fvp U U U -#% rename tdvp L U U -#% rename tvp X U U -# -*/ /* * Rename a cnode. * - * The VFS layer guarantees that source and destination will - * either both be directories, or both not be directories. - * - * When the target is a directory, hfs_rename must ensure - * that it is empty. + * The VFS layer guarantees that: + * - source and destination will either both be directories, or + * both not be directories. + * - all the vnodes are from the same file system * - * The rename system call is responsible for freeing - * the pathname buffers (ie no need to call VOP_ABORTOP). + * When the target is a directory, HFS must ensure that its empty. */ - static int -hfs_rename(ap) - struct vop_rename_args /* { +hfs_vnop_rename(ap) + struct vnop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ *ap; { struct vnode *tvp = ap->a_tvp; @@ -2139,70 +1949,48 @@ hfs_rename(ap) struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; - struct proc *p = fcnp->cn_proc; - struct cnode *fcp = NULL; - struct cnode *fdcp = NULL; - struct cnode *tdcp = VTOC(tdvp); + struct proc *p = vfs_context_proc(ap->a_context); + struct cnode *fcp; + struct cnode *fdcp; + struct cnode *tdcp; + struct cnode *tcp; struct cat_desc from_desc; struct cat_desc to_desc; struct cat_desc out_desc; - struct hfsmount *hfsmp = NULL; - struct timeval tv; - cat_cookie_t cookie = {0}; - int fdvp_locked, fvp_locked, tdvp_locked, tvp_locked; - int tvp_deleted; - int started_tr = 0, grabbed_lock = 0; - int error = 0; - + struct hfsmount *hfsmp; + cat_cookie_t cookie; + int tvp_deleted = 0; + int started_tr = 0, got_cookie = 0; + int took_trunc_lock = 0; + int lockflags; + int error; - /* Establish our vnode lock state. */ - tdvp_locked = 1; - tvp_locked = (tvp != 0); - fdvp_locked = 0; - fvp_locked = 0; - tvp_deleted = 0; + /* When tvp exist, take the truncate lock for the hfs_removefile(). */ + if (tvp && vnode_isreg(tvp)) { + hfs_lock_truncate(VTOC(tvp), TRUE); + took_trunc_lock = 1; + } - /* - * Check for cross-device rename. - */ - if ((fvp->v_mount != tdvp->v_mount) || - (tvp && (fvp->v_mount != tvp->v_mount))) { - error = EXDEV; - goto out; + error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL, + HFS_EXCLUSIVE_LOCK); + if (error) { + if (took_trunc_lock) + hfs_unlock_truncate(VTOC(tvp)); + return (error); } - /* - * When fvp matches tvp they must be case variants - * or hard links. - * - * In some cases tvp will be locked in other cases - * it be unlocked with no reference. Normalize the - * state here (unlocked with a reference) so that - * we can exit in a known state. - */ - if (fvp == tvp) { - if (VOP_ISLOCKED(tvp) && - (VTOC(tvp)->c_lock.lk_lockholder == p->p_pid) && - (VTOC(tvp)->c_lock.lk_lockthread == current_thread())) { - vput(tvp); - } - tvp = NULL; - tvp_locked = 0; + fdcp = VTOC(fdvp); + fcp = VTOC(fvp); + tdcp = VTOC(tdvp); + tcp = tvp ? VTOC(tvp) : NULL; + hfsmp = VTOHFS(tdvp); - /* - * If this a hard link with different parents - * and its not a case variant then keep tvp - * around for removal. - */ - if ((VTOC(fvp)->c_flag & C_HARDLINK) && - ((fdvp != tdvp) || - (hfs_namecmp(fcnp->cn_nameptr, fcnp->cn_namelen, - tcnp->cn_nameptr, tcnp->cn_namelen) != 0))) { - tvp = fvp; - vref(tvp); - } + /* Check for a race against unlink. */ + if (fcp->c_flag & C_NOEXISTS) { + error = ENOENT; + goto out; } - + /* * The following edge case is caught here: * (to cannot be a descendent of from) @@ -2218,7 +2006,7 @@ hfs_rename(ap) * / * o tvp */ - if (tdcp->c_parentcnid == VTOC(fvp)->c_cnid) { + if (tdcp->c_parentcnid == fcp->c_cnid) { error = EINVAL; goto out; } @@ -2238,7 +2026,7 @@ hfs_rename(ap) * / * o fvp */ - if (tvp && (tvp->v_type == VDIR) && (VTOC(tvp)->c_entries != 0)) { + if (tvp && vnode_isdir(tvp) && (tcp->c_entries != 0) && fvp != tvp) { error = ENOTEMPTY; goto out; } @@ -2260,14 +2048,11 @@ hfs_rename(ap) /* * Make sure "from" vnode and its parent are changeable. */ - if ((VTOC(fvp)->c_flags & (IMMUTABLE | APPEND)) || - (VTOC(fdvp)->c_flags & APPEND)) { + if ((fcp->c_flags & (IMMUTABLE | APPEND)) || (fdcp->c_flags & APPEND)) { error = EPERM; goto out; } - hfsmp = VTOHFS(tdvp); - /* * If the destination parent directory is "sticky", then the * user must own the parent directory, or the destination of @@ -2275,120 +2060,21 @@ hfs_rename(ap) * (except by root). This implements append-only directories. * * Note that checks for immutable and write access are done - * by the call to VOP_REMOVE. + * by the call to hfs_removefile. */ if (tvp && (tdcp->c_mode & S_ISTXT) && - (tcnp->cn_cred->cr_uid != 0) && - (tcnp->cn_cred->cr_uid != tdcp->c_uid) && - (hfs_owner_rights(hfsmp, VTOC(tvp)->c_uid, tcnp->cn_cred, p, false)) ) { + (suser(vfs_context_ucred(tcnp->cn_context), NULL)) && + (kauth_cred_getuid(vfs_context_ucred(tcnp->cn_context)) != tdcp->c_uid) && + (hfs_owner_rights(hfsmp, tcp->c_uid, vfs_context_ucred(tcnp->cn_context), p, false)) ) { error = EPERM; goto out; } #if QUOTA if (tvp) - (void)hfs_getinoquota(VTOC(tvp)); + (void)hfs_getinoquota(tcp); #endif - - /* - * Lock all the vnodes before starting a journal transaction. - */ - - /* - * Simple case (same parent) - just lock child (fvp). - */ - if (fdvp == tdvp) { - if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)) - goto out; - fvp_locked = 1; - goto vnlocked; - } - - /* - * If fdvp is the parent of tdvp then we'll need to - * drop tdvp's lock before acquiring a lock on fdvp. - * - * fdvp - * o - * / \ - * / \ - * tdvp o o fvp - * \ - * \ - * o tvp - * - * - * If the parent directories are unrelated then we'll - * need to aquire their vnode locks in vnode address - * order. Otherwise we can race with another rename - * call that involves the same vnodes except that to - * and from are switched and potentially deadlock. - * [ie rename("a/b", "c/d") vs rename("c/d", "a/b")] - * - * If its not either of the two above cases then we - * can safely lock fdvp and fvp. - */ - if ((VTOC(fdvp)->c_cnid == VTOC(tdvp)->c_parentcnid) || - ((VTOC(tdvp)->c_cnid != VTOC(fdvp)->c_parentcnid) && - (fdvp < tdvp))) { - - /* Drop locks on tvp and tdvp */ - if (tvp_locked) { - VOP_UNLOCK(tvp, 0, p); - tvp_locked = 0; - } - VOP_UNLOCK(tdvp, 0, p); - tdvp_locked = 0; - - /* Aquire locks in correct order */ - if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - fdvp_locked = 1; - if ((error = vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - tdvp_locked = 1; - - /* - * Now that the parents are locked only one thread - * can continue. So the lock order of the children - * doesn't really matter - */ - if (tvp == fvp) { - if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - tvp_locked = 1; - } else { - if (tvp) { - if ((error = vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - tvp_locked = 1; - } - if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - fvp_locked = 1; - } - - } else /* OK to lock fdvp and fvp */ { - if ((error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, p))) - goto out; - fdvp_locked = 1; - if (error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p)) - goto out; - if (tvp == fvp) - tvp_locked = 1; - else - fvp_locked = 1; - } - -vnlocked: - fdcp = VTOC(fdvp); - fcp = VTOC(fvp); - - /* - * While fvp is still locked, purge it from the name cache and - * grab it's c_cnid value. Note that the removal of tvp (below) - * can drop fvp's lock when fvp == tvp. - */ + /* Preflighting done, take fvp out of the name space. */ cache_purge(fvp); /* @@ -2396,14 +2082,13 @@ vnlocked: * we can drop its NODUMP status. */ if ((fcp->c_flags & UF_NODUMP) && - (fvp->v_type == VREG) && + vnode_isreg(fvp) && (fdvp != tdvp) && (fdcp->c_desc.cd_nameptr != NULL) && (strcmp(fdcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) { fcp->c_flags &= ~UF_NODUMP; - fcp->c_flag |= C_CHANGE; - tv = time; - (void) VOP_UPDATE(fvp, &tv, &tv, 0); + fcp->c_touch_chgtime = TRUE; + (void) hfs_update(fvp, 0); } bzero(&from_desc, sizeof(from_desc)); @@ -2420,80 +2105,108 @@ vnlocked: to_desc.cd_flags = fcp->c_desc.cd_flags & ~(CD_HASBUF | CD_DECOMPOSED); to_desc.cd_cnid = fcp->c_cnid; - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - goto out; + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto out; + } + started_tr = 1; + + // if it's a hardlink then re-lookup the name so + // that we get the correct cnid in from_desc (see + // the comment in hfs_removefile for more details) + // + if (fcp->c_flag & C_HARDLINK) { + struct cat_desc tmpdesc; + cnid_t real_cnid; + + bzero(&tmpdesc, sizeof(tmpdesc)); + tmpdesc.cd_nameptr = fcnp->cn_nameptr; + tmpdesc.cd_namelen = fcnp->cn_namelen; + tmpdesc.cd_parentcnid = fdcp->c_cnid; + tmpdesc.cd_hint = fdcp->c_childhint; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + if (cat_lookup(hfsmp, &tmpdesc, 0, NULL, NULL, NULL, &real_cnid) != 0) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto out; } - started_tr = 1; + + // use the real cnid instead of whatever happened to be there + from_desc.cd_cnid = real_cnid; + hfs_systemfile_unlock(hfsmp, lockflags); } /* * Reserve some space in the Catalog file. */ + bzero(&cookie, sizeof(cookie)); if ((error = cat_preflight(hfsmp, CAT_RENAME + CAT_DELETE, &cookie, p))) { goto out; } + got_cookie = 1; /* - * If the destination exists then it needs to be removed. + * If the destination exists then it may need to be removed. */ - if (tvp) { - if (tvp != fvp) - cache_purge(tvp); /* - * Note that hfs_removedir and hfs_removefile - * will keep tdvp locked with a reference. - * But tvp will lose its lock and reference. + * When fvp matches tvp they must be case variants + * or hard links. */ - if (tvp->v_type == VDIR) - error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS); - else - error = hfs_removefile(tdvp, tvp, tcnp, HFSRM_RENAMEOPTS); + if (fvp == tvp) { + /* + * If this a hard link with different parents + * and its not a case variant then tvp should + * be removed. + */ + if (!((fcp->c_flag & C_HARDLINK) && + ((fdvp != tdvp) || + (hfs_namecmp(fcnp->cn_nameptr, fcnp->cn_namelen, + tcnp->cn_nameptr, tcnp->cn_namelen) != 0)))) { + goto skip; + } + } else { + cache_purge(tvp); + } + + if (vnode_isdir(tvp)) + error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE); + else { + error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE); + } - if (tvp == fvp) - fvp_locked = 0; - tvp = NULL; - tvp_locked = 0; - tvp_deleted = 1; if (error) goto out; + tvp_deleted = 1; } - +skip: /* * All done with tvp and fvp */ - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) - goto out; - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc); - - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (error) { goto out; } + /* Invalidate negative cache entries in the destination directory */ + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + cache_purge_negatives(tdvp); + /* Update cnode's catalog descriptor */ - if (fvp_locked) { - replace_desc(fcp, &out_desc); - fcp->c_parentcnid = tdcp->c_cnid; - fcp->c_hint = 0; - } + replace_desc(fcp, &out_desc); + fcp->c_parentcnid = tdcp->c_cnid; + fcp->c_hint = 0; - hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_RMDIR : VOL_RMFILE, + hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_RMDIR : VOL_RMFILE, (fdcp->c_cnid == kHFSRootFolderID)); - hfs_volupdate(hfsmp, fvp->v_type == VDIR ? VOL_MKDIR : VOL_MKFILE, + hfs_volupdate(hfsmp, vnode_isdir(fvp) ? VOL_MKDIR : VOL_MKFILE, (tdcp->c_cnid == kHFSRootFolderID)); /* Update both parent directories. */ - tv = time; if (fdvp != tdvp) { tdcp->c_nlink++; tdcp->c_entries++; @@ -2501,22 +2214,24 @@ vnlocked: fdcp->c_nlink--; if (fdcp->c_entries > 0) fdcp->c_entries--; - fdcp->c_flag |= C_CHANGE | C_UPDATE; - (void) VOP_UPDATE(fdvp, &tv, &tv, 0); + fdcp->c_touch_chgtime = TRUE; + fdcp->c_touch_modtime = TRUE; + + fdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! + (void) hfs_update(fdvp, 0); } tdcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ - tdcp->c_flag |= C_CHANGE | C_UPDATE; - (void) VOP_UPDATE(tdvp, &tv, &tv, 0); + tdcp->c_touch_chgtime = TRUE; + tdcp->c_touch_modtime = TRUE; + tdcp->c_flag |= C_FORCEUPDATE; // XXXdbg - force it out! + (void) hfs_update(tdvp, 0); out: - if (hfsmp) { + if (got_cookie) { cat_postflight(hfsmp, &cookie, p); } if (started_tr) { - journal_end_transaction(hfsmp->jnl); - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); } /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have @@ -2527,24 +2242,11 @@ out: HFS_KNOTE(fdvp, NOTE_WRITE); if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE); }; - if (fvp_locked) { - VOP_UNLOCK(fvp, 0, p); - } - if (fdvp_locked) { - VOP_UNLOCK(fdvp, 0, p); - } - if (tdvp_locked) { - VOP_UNLOCK(tdvp, 0, p); - } - if (tvp_locked) { - VOP_UNLOCK(tvp, 0, p); - } - vrele(fvp); - vrele(fdvp); - if (tvp) - vrele(tvp); - vrele(tdvp); + if (took_trunc_lock) + hfs_unlock_truncate(VTOC(tvp)); + + hfs_unlockfour(fdcp, fcp, tdcp, tcp); /* After tvp is removed the only acceptable error is EIO */ if (error && tvp_deleted) @@ -2554,239 +2256,276 @@ out: } - /* - * Mkdir system call -#% mkdir dvp L U U -#% mkdir vpp - L - -# - vop_mkdir { - IN WILLRELE struct vnode *dvp; - OUT struct vnode **vpp; - IN struct componentname *cnp; - IN struct vattr *vap; - - We are responsible for freeing the namei buffer, - it is done in hfs_makenode() -*/ - + * Make a directory. + */ static int -hfs_mkdir(ap) - struct vop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; +hfs_vnop_mkdir(struct vnop_mkdir_args *ap) { - struct vattr *vap = ap->a_vap; - - return (hfs_makenode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, ap->a_vpp, ap->a_cnp)); + /***** HACK ALERT ********/ + ap->a_cnp->cn_flags |= MAKEENTRY; + return hfs_makenode(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, ap->a_context); } /* - * symlink -- make a symbolic link -#% symlink dvp L U U -#% symlink vpp - U - -# -# XXX - note that the return vnode has already been VRELE'ed -# by the filesystem layer. To use it you must use vget, -# possibly with a further namei. -# - vop_symlink { - IN WILLRELE struct vnode *dvp; - OUT WILLRELE struct vnode **vpp; - IN struct componentname *cnp; - IN struct vattr *vap; - IN char *target; - - We are responsible for freeing the namei buffer, - it is done in hfs_makenode(). - -*/ - + * Create a symbolic link. + */ static int -hfs_symlink(ap) - struct vop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - char *a_target; - } */ *ap; +hfs_vnop_symlink(struct vnop_symlink_args *ap) { - register struct vnode *vp, **vpp = ap->a_vpp; + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = NULL; struct hfsmount *hfsmp; struct filefork *fp; - int len, error; struct buf *bp = NULL; + char *datap; + int started_tr = 0; + int len, error; /* HFS standard disks don't support symbolic links */ - if (VTOVCB(ap->a_dvp)->vcbSigWord != kHFSPlusSigWord) { - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); - return (EOPNOTSUPP); - } + if (VTOVCB(dvp)->vcbSigWord != kHFSPlusSigWord) + return (ENOTSUP); /* Check for empty target name */ - if (ap->a_target[0] == 0) { - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); + if (ap->a_target[0] == 0) return (EINVAL); - } - - - hfsmp = VTOHFS(ap->a_dvp); /* Create the vnode */ - if ((error = hfs_makenode(S_IFLNK | ap->a_vap->va_mode, - ap->a_dvp, vpp, ap->a_cnp))) { - return (error); + ap->a_vap->va_mode |= S_IFLNK; + if ((error = hfs_makenode(dvp, vpp, ap->a_cnp, ap->a_vap, ap->a_context))) { + goto out; } - vp = *vpp; - len = strlen(ap->a_target); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); fp = VTOF(vp); + hfsmp = VTOHFS(dvp); + len = strlen(ap->a_target); #if QUOTA (void)hfs_getinoquota(VTOC(vp)); #endif /* QUOTA */ - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - hfs_global_shared_lock_release(hfsmp); - vput(vp); - return error; - } + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto out; } + started_tr = 1; - /* Allocate space for the link */ - error = VOP_TRUNCATE(vp, len, IO_NOZEROFILL, - ap->a_cnp->cn_cred, ap->a_cnp->cn_proc); + /* + * Allocate space for the link. + * + * Since we're already inside a transaction, + * tell hfs_truncate to skip the ubc_setsize. + * + * Don't need truncate lock since a symlink is treated as a system file. + */ + error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, ap->a_context); if (error) goto out; /* XXX need to remove link */ /* Write the link to disk */ - bp = getblk(vp, 0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size), + bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size), 0, 0, BLK_META); if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, bp); } - bzero(bp->b_data, bp->b_bufsize); - bcopy(ap->a_target, bp->b_data, len); + datap = (char *)buf_dataptr(bp); + bzero(datap, buf_size(bp)); + bcopy(ap->a_target, datap, len); + if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp); } else { - bawrite(bp); + buf_bawrite(bp); } + /* + * We defered the ubc_setsize for hfs_truncate + * since we were inside a transaction. + * + * We don't need to drop the cnode lock here + * since this is a symlink. + */ + ubc_setsize(vp, len); out: - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); + if (started_tr) + hfs_end_transaction(hfsmp); + if (vp) { + hfs_unlock(VTOC(vp)); } - hfs_global_shared_lock_release(hfsmp); - vput(vp); return (error); } -/* - * Dummy dirents to simulate the "." and ".." entries of the directory - * in a hfs filesystem. HFS doesn't provide these on disk. Note that - * the size of these entries is the smallest needed to represent them - * (only 12 byte each). - */ -static hfsdotentry rootdots[2] = { - { - 1, /* d_fileno */ - sizeof(struct hfsdotentry), /* d_reclen */ - DT_DIR, /* d_type */ - 1, /* d_namlen */ - "." /* d_name */ - }, - { - 1, /* d_fileno */ - sizeof(struct hfsdotentry), /* d_reclen */ - DT_DIR, /* d_type */ - 2, /* d_namlen */ - ".." /* d_name */ - } +/* structures to hold a "." or ".." directory entry */ +struct hfs_stddotentry { + u_int32_t d_fileno; /* unique file number */ + u_int16_t d_reclen; /* length of this structure */ + u_int8_t d_type; /* dirent file type */ + u_int8_t d_namlen; /* len of filename */ + char d_name[4]; /* "." or ".." */ }; -/* 4.3 Note: -* There is some confusion as to what the semantics of uio_offset are. -* In ufs, it represents the actual byte offset within the directory -* "file." HFS, however, just uses it as an entry counter - essentially -* assuming that it has no meaning except to the hfs_readdir function. -* This approach would be more efficient here, but some callers may -* assume the uio_offset acts like a byte offset. NFS in fact -* monkeys around with the offset field a lot between readdir calls. -* -* The use of the resid uiop->uio_resid and uiop->uio_iov->iov_len -* fields is a mess as well. The libc function readdir() returns -* NULL (indicating the end of a directory) when either -* the getdirentries() syscall (which calls this and returns -* the size of the buffer passed in less the value of uiop->uio_resid) -* returns 0, or a direct record with a d_reclen of zero. -* nfs_server.c:rfs_readdir(), on the other hand, checks for the end -* of the directory by testing uiop->uio_resid == 0. The solution -* is to pad the size of the last struct direct in a given -* block to fill the block if we are not at the end of the directory. -*/ +struct hfs_extdotentry { + u_int64_t d_fileno; /* unique file number */ + u_int64_t d_seekoff; /* seek offset (optional, used by servers) */ + u_int16_t d_reclen; /* length of this structure */ + u_int16_t d_namlen; /* len of filename */ + u_int8_t d_type; /* dirent file type */ + u_char d_name[3]; /* "." or ".." */ +}; +typedef union { + struct hfs_stddotentry std; + struct hfs_extdotentry ext; +} hfs_dotentry_t; /* - * NOTE: We require a minimal buffer size of DIRBLKSIZ for two reasons. One, it is the same value - * returned be stat() call as the block size. This is mentioned in the man page for getdirentries(): - * "Nbytes must be greater than or equal to the block size associated with the file, - * see stat(2)". Might as well settle on the same size of ufs. Second, this makes sure there is enough - * room for the . and .. entries that have to added manually. + * hfs_vnop_readdir reads directory entries into the buffer pointed + * to by uio, in a filesystem independent format. Up to uio_resid + * bytes of data can be transferred. The data in the buffer is a + * series of packed dirent structures where each one contains the + * following entries: + * + * u_int32_t d_fileno; // file number of entry + * u_int16_t d_reclen; // length of this record + * u_int8_t d_type; // file type + * u_int8_t d_namlen; // length of string in d_name + * char d_name[MAXNAMELEN+1]; // null terminated file name + * + * The current position (uio_offset) refers to the next block of + * entries. The offset can only be set to a value previously + * returned by hfs_vnop_readdir or zero. This offset does not have + * to match the number of bytes returned (in uio_resid). + * + * In fact, the offset used by HFS is essentially an index (26 bits) + * with a tag (6 bits). The tag is for associating the next request + * with the current request. This enables us to have multiple threads + * reading the directory while the directory is also being modified. + * + * Each tag/index pair is tied to a unique directory hint. The hint + * contains information (filename) needed to build the catalog b-tree + * key for finding the next set of entries. */ - -/* -#% readdir vp L L L -# -vop_readdir { - IN struct vnode *vp; - INOUT struct uio *uio; - IN struct ucred *cred; - INOUT int *eofflag; - OUT int *ncookies; - INOUT u_long **cookies; - */ static int -hfs_readdir(ap) - struct vop_readdir_args /* { - struct vnode *vp; - struct uio *uio; - struct ucred *cred; - int *eofflag; - int *ncookies; - u_long **cookies; +hfs_vnop_readdir(ap) + struct vnop_readdir_args /* { + vnode_t a_vp; + uio_t a_uio; + int a_flags; + int *a_eofflag; + int *a_numdirent; + vfs_context_t a_context; } */ *ap; { - register struct uio *uio = ap->a_uio; - struct cnode *cp = VTOC(ap->a_vp); - struct hfsmount *hfsmp = VTOHFS(ap->a_vp); - struct proc *p = current_proc(); - off_t off = uio->uio_offset; - int retval = 0; + struct vnode *vp = ap->a_vp; + uio_t uio = ap->a_uio; + struct cnode *cp; + struct hfsmount *hfsmp; + directoryhint_t *dirhint = NULL; + directoryhint_t localhint; + off_t offset; + off_t startoffset; + int error = 0; int eofflag = 0; - void *user_start = NULL; - int user_len; + user_addr_t user_start = 0; + user_size_t user_len = 0; + int index; + unsigned int tag; + int items; + int lockflags; + int extended; + int nfs_cookies; + caddr_t bufstart; + cnid_t cnid_hint = 0; + + items = 0; + startoffset = offset = uio_offset(uio); + bufstart = CAST_DOWN(caddr_t, uio_iov_base(uio)); + extended = (ap->a_flags & VNODE_READDIR_EXTENDED); + nfs_cookies = extended && (ap->a_flags & VNODE_READDIR_REQSEEKOFF); + + /* Sanity check the uio data. */ + if ((uio_iovcnt(uio) > 1) || + (uio_resid(uio) < (int)sizeof(struct dirent))) { + return (EINVAL); + } + /* Note that the dirhint calls require an exclusive lock. */ + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); + cp = VTOC(vp); + hfsmp = VTOHFS(vp); - int ncookies=0; - u_long *cookies=NULL; - u_long *cookiep=NULL; - - /* We assume it's all one big buffer... */ - if (uio->uio_iovcnt > 1 || uio->uio_resid < AVERAGE_HFSDIRENTRY_SIZE) - return EINVAL; + /* Pick up cnid hint (if any). */ + if (nfs_cookies) { + cnid_hint = (cnid_t)(uio_offset(uio) >> 32); + uio_setoffset(uio, uio_offset(uio) & 0x00000000ffffffffLL); + } + /* + * Synthesize entries for "." and ".." + */ + if (offset == 0) { + hfs_dotentry_t dotentry[2]; + size_t uiosize; + + if (extended) { + struct hfs_extdotentry *entry = &dotentry[0].ext; + + entry->d_fileno = cp->c_cnid; + entry->d_reclen = sizeof(struct hfs_extdotentry); + entry->d_type = DT_DIR; + entry->d_namlen = 1; + entry->d_name[0] = '.'; + entry->d_name[1] = '\0'; + entry->d_name[2] = '\0'; + entry->d_seekoff = 1; + + ++entry; + entry->d_fileno = cp->c_parentcnid; + entry->d_reclen = sizeof(struct hfs_extdotentry); + entry->d_type = DT_DIR; + entry->d_namlen = 2; + entry->d_name[0] = '.'; + entry->d_name[1] = '.'; + entry->d_name[2] = '\0'; + entry->d_seekoff = 2; + uiosize = 2 * sizeof(struct hfs_extdotentry); + } else { + struct hfs_stddotentry *entry = &dotentry[0].std; + + entry->d_fileno = cp->c_cnid; + entry->d_reclen = sizeof(struct hfs_stddotentry); + entry->d_type = DT_DIR; + entry->d_namlen = 1; + *(int *)&entry->d_name[0] = 0; + entry->d_name[0] = '.'; + + ++entry; + entry->d_fileno = cp->c_parentcnid; + entry->d_reclen = sizeof(struct hfs_stddotentry); + entry->d_type = DT_DIR; + entry->d_namlen = 2; + *(int *)&entry->d_name[0] = 0; + entry->d_name[0] = '.'; + entry->d_name[1] = '.'; + uiosize = 2 * sizeof(struct hfs_stddotentry); + } + if ((error = uiomove((caddr_t)&dotentry, uiosize, uio))) { + goto out; + } + offset += 2; + } - // XXXdbg + /* If there are no real entries then we're done. */ + if (cp->c_entries == 0) { + error = 0; + eofflag = 1; + uio_setoffset(uio, offset); + goto seekoffcalc; + } + + // // We have to lock the user's buffer here so that we won't // fault on it after we've acquired a shared lock on the // catalog file. The issue is that you can get a 3-way @@ -2804,168 +2543,129 @@ hfs_readdir(ap) // currently (10/30/02) that can fault on user data with a // shared lock on the catalog file. // - if (hfsmp->jnl && uio->uio_segflg == UIO_USERSPACE) { - user_start = uio->uio_iov->iov_base; - user_len = uio->uio_iov->iov_len; + if (hfsmp->jnl && uio_isuserspace(uio)) { + user_start = uio_curriovbase(uio); + user_len = uio_curriovlen(uio); - if ((retval = vslock(user_start, user_len)) != 0) { - return retval; + if ((error = vslock(user_start, user_len)) != 0) { + user_start = 0; + goto out; } } - - /* Create the entries for . and .. */ - if (uio->uio_offset < sizeof(rootdots)) { - caddr_t dep; - size_t dotsize; - - rootdots[0].d_fileno = cp->c_cnid; - rootdots[1].d_fileno = cp->c_parentcnid; - - if (uio->uio_offset == 0) { - dep = (caddr_t) &rootdots[0]; - dotsize = 2* sizeof(struct hfsdotentry); - } else if (uio->uio_offset == sizeof(struct hfsdotentry)) { - dep = (caddr_t) &rootdots[1]; - dotsize = sizeof(struct hfsdotentry); - } else { - retval = EINVAL; - goto Exit; + /* Convert offset into a catalog directory index. */ + index = (offset & HFS_INDEX_MASK) - 2; + tag = offset & ~HFS_INDEX_MASK; + + /* Lock catalog during cat_findname and cat_getdirentries. */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + /* When called from NFS, try and resolve a cnid hint. */ + if (nfs_cookies && cnid_hint != 0) { + if (cat_findname(hfsmp, cnid_hint, &localhint.dh_desc) == 0) { + if ( localhint.dh_desc.cd_parentcnid == cp->c_cnid) { + localhint.dh_index = index - 1; + localhint.dh_time = 0; + localhint.dh_link.sle_next = 0; + dirhint = &localhint; /* don't forget to release the descriptor */ + } else { + cat_releasedesc(&localhint.dh_desc); + } } - - retval = uiomove(dep, dotsize, uio); - if (retval != 0) - goto Exit; } - if (ap->a_ncookies != NULL) { - /* - * These cookies are handles that allow NFS to restart - * scanning through a directory. If a directory is large - * enough, NFS will issue a successive readdir() with a - * uio->uio_offset that is equal to one of these cookies. - * - * The cookies that we generate are synthesized byte-offsets. - * The offset is where the dirent the dirent would be if the - * directory were an array of packed dirent structs. It is - * synthetic because that's not how directories are stored in - * HFS but other code expects that the cookie is a byte offset. - * - * We have to pre-allocate the cookies because cat_getdirentries() - * is the only one that can properly synthesize the offsets (since - * it may have to skip over entries and only it knows the true - * virtual offset of any particular directory entry). So we allocate - * a cookie table here and pass it in to cat_getdirentries(). - * - * Note that the handling of "." and ".." is mostly done here but - * cat_getdirentries() is aware of. - * - * Only the NFS server uses cookies so fortunately this code is - * not executed unless the NFS server is issuing the readdir - * request. - * - * Also note that the NFS server is the one responsible for - * free'ing the cookies even though we allocated them. Ick. - * - * We allocate a reasonable number of entries for the size of - * the buffer that we're going to fill in. cat_getdirentries() - * is smart enough to not overflow if there's more room in the - * buffer but not enough room in the cookie table. - */ - if (uio->uio_segflg != UIO_SYSSPACE) - panic("hfs_readdir: unexpected uio from NFS server"); - - ncookies = uio->uio_iov->iov_len / (AVERAGE_HFSDIRENTRY_SIZE/2); - MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); + /* Get a directory hint (cnode must be locked exclusive) */ + if (dirhint == NULL) { + dirhint = hfs_getdirhint(cp, ((index - 1) & HFS_INDEX_MASK) | tag); - *ap->a_ncookies = ncookies; - *ap->a_cookies = cookies; - - /* handle cookies for "." and ".." */ - if (off == 0) { - cookies[0] = 0; - cookies[1] = sizeof(struct hfsdotentry); - } else if (off == sizeof(struct hfsdotentry)) { - cookies[0] = sizeof(struct hfsdotentry); + /* Hide tag from catalog layer. */ + dirhint->dh_index &= HFS_INDEX_MASK; + if (dirhint->dh_index == HFS_INDEX_MASK) { + dirhint->dh_index = -1; } } + + /* Pack the buffer with dirent entries. */ + error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items); - /* If there are no children then we're done */ - if (cp->c_entries == 0) { + hfs_systemfile_unlock(hfsmp, lockflags); + + if (error != 0) { + goto out; + } + + /* Get index to the next item */ + index += items; + + if (items >= (int)cp->c_entries) { eofflag = 1; - retval = 0; - if (cookies) { - cookies[0] = 0; - cookies[1] = sizeof(struct hfsdotentry); - } - goto Exit; } - /* Lock catalog b-tree */ - retval = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (retval) goto Exit; + /* Convert catalog directory index back into an offset. */ + while (tag == 0) + tag = (++cp->c_dirhinttag) << HFS_INDEX_BITS; + uio_setoffset(uio, (index + 2) | tag); + dirhint->dh_index |= tag; - retval = cat_getdirentries(hfsmp, &cp->c_desc, cp->c_entries, uio, &eofflag, cookies, ncookies); +seekoffcalc: + cp->c_touch_acctime = TRUE; - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - - if (retval != E_NONE) { - goto Exit; - } - - /* were we already past eof ? */ - if (uio->uio_offset == off) { - retval = E_NONE; - goto Exit; + if (ap->a_numdirent) { + if (startoffset == 0) + items += 2; + *ap->a_numdirent = items; } - - cp->c_flag |= C_ACCESS; -Exit:; +out: if (hfsmp->jnl && user_start) { vsunlock(user_start, user_len, TRUE); } - - if (ap->a_eofflag) + /* If we didn't do anything then go ahead and dump the hint. */ + if ((dirhint != NULL) && + (dirhint != &localhint) && + (uio_offset(uio) == startoffset)) { + hfs_reldirhint(cp, dirhint); + eofflag = 1; + } + if (ap->a_eofflag) { *ap->a_eofflag = eofflag; - - return (retval); + } + if (dirhint == &localhint) { + cat_releasedesc(&localhint.dh_desc); + } + hfs_unlock(cp); + return (error); } /* - * Return target name of a symbolic link -#% readlink vp L L L -# - vop_readlink { - IN struct vnode *vp; - INOUT struct uio *uio; - IN struct ucred *cred; - */ - + * Read contents of a symbolic link. + */ static int -hfs_readlink(ap) - struct vop_readlink_args /* { +hfs_vnop_readlink(ap) + struct vnop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { - int retval; struct vnode *vp = ap->a_vp; struct cnode *cp; struct filefork *fp; + int error; - if (vp->v_type != VLNK) + if (!vnode_islnk(vp)) return (EINVAL); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); cp = VTOC(vp); fp = VTOF(vp); /* Zero length sym links are not allowed */ if (fp->ff_size == 0 || fp->ff_size > MAXPATHLEN) { VTOVCB(vp)->vcbFlags |= kHFS_DamagedVolume; - return (EINVAL); + error = EINVAL; + goto exit; } /* Cache the path so we don't waste buffer cache resources */ @@ -2973,182 +2673,79 @@ hfs_readlink(ap) struct buf *bp = NULL; MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK); - retval = meta_bread(vp, 0, - roundup((int)fp->ff_size, - VTOHFS(vp)->hfs_phys_block_size), - ap->a_cred, &bp); - if (retval) { + error = (int)buf_meta_bread(vp, (daddr64_t)0, + roundup((int)fp->ff_size, + VTOHFS(vp)->hfs_phys_block_size), + vfs_context_ucred(ap->a_context), &bp); + if (error) { if (bp) - brelse(bp); + buf_brelse(bp); if (fp->ff_symlinkptr) { FREE(fp->ff_symlinkptr, M_TEMP); fp->ff_symlinkptr = NULL; } - return (retval); - } - bcopy(bp->b_data, fp->ff_symlinkptr, (size_t)fp->ff_size); - if (bp) { - if (VTOHFS(vp)->jnl && (bp->b_flags & B_LOCKED) == 0) { - bp->b_flags |= B_INVAL; /* data no longer needed */ - } - brelse(bp); + goto exit; } - } - retval = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio); -#if 1 - /* - * Keep track blocks read - */ - if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (retval == 0)) { - - /* - * If this file hasn't been seen since the start of - * the current sampling period then start over. - */ - if (cp->c_atime < VTOHFS(vp)->hfc_timebase) - VTOF(vp)->ff_bytesread = fp->ff_size; - else - VTOF(vp)->ff_bytesread += fp->ff_size; - - // if (VTOF(vp)->ff_bytesread > fp->ff_size) - // cp->c_flag |= C_ACCESS; - } -#endif - return (retval); -} - -/* - * Lock an cnode. If its already locked, set the WANT bit and sleep. -#% lock vp U L U -# - vop_lock { - IN struct vnode *vp; - IN int flags; - IN struct proc *p; - */ - -static int -hfs_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - - return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, ap->a_p)); -} + bcopy((char *)buf_dataptr(bp), fp->ff_symlinkptr, (size_t)fp->ff_size); -/* - * Unlock an cnode. -#% unlock vp L U L -# - vop_unlock { - IN struct vnode *vp; - IN int flags; - IN struct proc *p; - - */ -static int -hfs_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); -#if 0 - if (!lockstatus(&cp->c_lock)) { - printf("hfs_unlock: vnode %s wasn't locked!\n", - cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : ""); + if (VTOHFS(vp)->jnl && (buf_flags(bp) & B_LOCKED) == 0) { + buf_markinvalid(bp); /* data no longer needed */ + } + buf_brelse(bp); } -#endif - return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, - &vp->v_interlock, ap->a_p)); -} + error = uiomove((caddr_t)fp->ff_symlinkptr, (int)fp->ff_size, ap->a_uio); + /* + * Keep track blocks read + */ + if ((VTOHFS(vp)->hfc_stage == HFC_RECORDING) && (error == 0)) { + + /* + * If this file hasn't been seen since the start of + * the current sampling period then start over. + */ + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) + VTOF(vp)->ff_bytesread = fp->ff_size; + else + VTOF(vp)->ff_bytesread += fp->ff_size; + + // if (VTOF(vp)->ff_bytesread > fp->ff_size) + // cp->c_touch_acctime = TRUE; + } -/* - * Print out the contents of a cnode. -#% print vp = = = -# - vop_print { - IN struct vnode *vp; - */ -static int -hfs_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - struct vnode * vp = ap->a_vp; - struct cnode *cp = VTOC(vp); - - printf("tag VT_HFS, cnid %d, on dev %d, %d", cp->c_cnid, - major(cp->c_dev), minor(cp->c_dev)); -#if FIFO - if (vp->v_type == VFIFO) - fifo_printinfo(vp); -#endif /* FIFO */ - lockmgr_printinfo(&cp->c_lock); - printf("\n"); - return (0); +exit: + hfs_unlock(cp); + return (error); } /* - * Check for a locked cnode. -#% islocked vp = = = -# - vop_islocked { - IN struct vnode *vp; - - */ -static int -hfs_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - return (lockstatus(&VTOC(ap->a_vp)->c_lock)); -} - -/* - -#% pathconf vp L L L -# - vop_pathconf { - IN struct vnode *vp; - IN int name; - OUT register_t *retval; - - */ + * Get configurable pathname variables. + */ static int -hfs_pathconf(ap) - struct vop_pathconf_args /* { +hfs_vnop_pathconf(ap) + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { - int retval = 0; - switch (ap->a_name) { case _PC_LINK_MAX: - if (VTOVCB(ap->a_vp)->vcbSigWord == kHFSPlusSigWord) - *ap->a_retval = HFS_LINK_MAX; - else + if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) *ap->a_retval = 1; + else + *ap->a_retval = HFS_LINK_MAX; break; case _PC_NAME_MAX: - *ap->a_retval = kHFSPlusMaxFileNameBytes; /* max # of characters x max utf8 representation */ + if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + *ap->a_retval = kHFSMaxFileNameChars; /* 255 */ + else + *ap->a_retval = kHFSPlusMaxFileNameChars; /* 31 */ break; case _PC_PATH_MAX: - *ap->a_retval = PATH_MAX; /* 1024 */ + *ap->a_retval = PATH_MAX; /* 1024 */ break; case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; @@ -3172,214 +2769,60 @@ hfs_pathconf(ap) *ap->a_retval = 1; break; default: - retval = EINVAL; - } - - return (retval); -} - - -/* - * Advisory record locking support -#% advlock vp U U U -# - vop_advlock { - IN struct vnode *vp; - IN caddr_t id; - IN int op; - IN struct flock *fl; - IN int flags; - - */ -static int -hfs_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct flock *fl = ap->a_fl; - struct hfslockf *lock; - struct filefork *fork; - off_t start, end; - int retval; - - /* Only regular files can have locks */ - if (vp->v_type != VREG) - return (EISDIR); - - fork = VTOF(ap->a_vp); - /* - * Avoid the common case of unlocking when cnode has no locks. - */ - if (fork->ff_lockf == (struct hfslockf *)0) { - if (ap->a_op != F_SETLK) { - fl->l_type = F_UNLCK; - return (0); - } - } - /* - * Convert the flock structure into a start and end. - */ - start = 0; - switch (fl->l_whence) { - case SEEK_SET: - case SEEK_CUR: - /* - * Caller is responsible for adding any necessary offset - * when SEEK_CUR is used. - */ - start = fl->l_start; - break; - case SEEK_END: - start = fork->ff_size + fl->l_start; - break; - default: - return (EINVAL); - } - - if (fl->l_len == 0) - end = -1; - else if (fl->l_len > 0) - end = start + fl->l_len - 1; - else { /* l_len is negative */ - end = start - 1; - start += fl->l_len; - } - if (start < 0) return (EINVAL); - - /* - * Create the hfslockf structure - */ - MALLOC(lock, struct hfslockf *, sizeof *lock, M_LOCKF, M_WAITOK); - lock->lf_start = start; - lock->lf_end = end; - lock->lf_id = ap->a_id; - lock->lf_fork = fork; - lock->lf_type = fl->l_type; - lock->lf_next = (struct hfslockf *)0; - TAILQ_INIT(&lock->lf_blkhd); - lock->lf_flags = ap->a_flags; - /* - * Do the requested operation. - */ - switch(ap->a_op) { - case F_SETLK: - retval = hfs_setlock(lock); - break; - case F_UNLCK: - retval = hfs_clearlock(lock); - FREE(lock, M_LOCKF); - break; - case F_GETLK: - retval = hfs_getlock(lock, fl); - FREE(lock, M_LOCKF); - break; - default: - retval = EINVAL; - _FREE(lock, M_LOCKF); - break; } - return (retval); + return (0); } - /* - * Update the access, modified, and node change times as specified - * by the C_ACCESS, C_UPDATE, and C_CHANGE flags respectively. The - * C_MODIFIED flag is used to specify that the node needs to be - * updated but that the times have already been set. The access and - * modified times are input parameters but the node change time is - * always taken from the current time. If waitfor is set, then wait - * for the disk write of the node to complete. + * Update a cnode's on-disk metadata. + * + * If waitfor is set, then wait for the disk write of + * the node to complete. + * + * The cnode must be locked exclusive */ -/* -#% update vp L L L - IN struct vnode *vp; - IN struct timeval *access; - IN struct timeval *modify; - IN int waitfor; -*/ -static int -hfs_update(ap) - struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; - } */ *ap; +__private_extern__ +int +hfs_update(struct vnode *vp, __unused int waitfor) { - struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(ap->a_vp); + struct cnode *cp = VTOC(vp); struct proc *p; struct cat_fork *dataforkp = NULL; struct cat_fork *rsrcforkp = NULL; struct cat_fork datafork; - int updateflag; struct hfsmount *hfsmp; + int lockflags; int error; + p = current_proc(); hfsmp = VTOHFS(vp); - /* XXX do we really want to clear the sytem cnode flags here???? */ - if (((vp->v_flag & VSYSTEM) && (cp->c_cnid < kHFSFirstUserCatalogNodeID))|| - (VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) || - (cp->c_mode == 0)) { - cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE); + if (vnode_issystem(vp) && (cp->c_cnid < kHFSFirstUserCatalogNodeID)) { + return (0); + } + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || (cp->c_mode == 0)) { + cp->c_flag &= ~C_MODIFIED; + cp->c_touch_acctime = 0; + cp->c_touch_chgtime = 0; + cp->c_touch_modtime = 0; return (0); } - updateflag = cp->c_flag & (C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE); + hfs_touchtimes(hfsmp, cp); /* Nothing to update. */ - if (updateflag == 0) { + if ((cp->c_flag & (C_MODIFIED | C_FORCEUPDATE)) == 0) { return (0); } - /* HFS standard doesn't have access times. */ - if ((updateflag == C_ACCESS) && (VTOVCB(vp)->vcbSigWord == kHFSSigWord)) { - return (0); - } - if (updateflag & C_ACCESS) { - /* - * When the access time is the only thing changing - * then make sure its sufficiently newer before - * committing it to disk. - */ - if ((updateflag == C_ACCESS) && - (ap->a_access->tv_sec < (cp->c_atime + ATIME_ONDISK_ACCURACY))) { - return (0); - } - cp->c_atime = ap->a_access->tv_sec; - } - if (updateflag & C_UPDATE) { - cp->c_mtime = ap->a_modify->tv_sec; - cp->c_mtime_nsec = ap->a_modify->tv_usec * 1000; - } - if (updateflag & C_CHANGE) { - cp->c_ctime = time.tv_sec; - /* - * HFS dates that WE set must be adjusted for DST - */ - if ((VTOVCB(vp)->vcbSigWord == kHFSSigWord) && gTimeZone.tz_dsttime) { - cp->c_ctime += 3600; - cp->c_mtime = cp->c_ctime; - } - } if (cp->c_datafork) dataforkp = &cp->c_datafork->ff_data; if (cp->c_rsrcfork) rsrcforkp = &cp->c_rsrcfork->ff_data; - p = current_proc(); - /* * For delayed allocations updates are * postponed until an fsync or the file @@ -3394,9 +2837,7 @@ hfs_update(ap) (ISSET(cp->c_flag, C_DELETED) || (dataforkp && cp->c_datafork->ff_unallocblocks) || (rsrcforkp && cp->c_rsrcfork->ff_unallocblocks))) { - if (updateflag & (C_CHANGE | C_UPDATE)) - hfs_volupdate(hfsmp, VOL_UPDATE, 0); - cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); + // cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); cp->c_flag |= C_MODIFIED; HFS_KNOTE(vp, NOTE_ATTRIB); @@ -3404,16 +2845,9 @@ hfs_update(ap) return (0); } - - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - hfs_global_shared_lock_release(hfsmp); - return error; - } + if ((error = hfs_start_transaction(hfsmp)) != 0) { + return error; } - /* * For files with invalid ranges (holes) the on-disk @@ -3444,32 +2878,17 @@ hfs_update(ap) * A shared lock is sufficient since an update doesn't change * the tree and the lock on vp protects the cnode. */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (error) { - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); - return (error); - } + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); /* XXX - waitfor is not enforced */ error = cat_update(hfsmp, &cp->c_desc, &cp->c_attr, dataforkp, rsrcforkp); - /* Unlock the Catalog b-tree file. */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); - - if (updateflag & (C_CHANGE | C_UPDATE | C_FORCEUPDATE)) - hfs_volupdate(hfsmp, VOL_UPDATE, 0); + hfs_systemfile_unlock(hfsmp, lockflags); /* After the updates are finished, clear the flags */ - cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_MODIFIED | C_UPDATE | C_FORCEUPDATE); + cp->c_flag &= ~(C_MODIFIED | C_FORCEUPDATE); - // XXXdbg - if (hfsmp->jnl) { - journal_end_transaction(hfsmp->jnl); - } - hfs_global_shared_lock_release(hfsmp); + hfs_end_transaction(hfsmp); HFS_KNOTE(vp, NOTE_ATTRIB); @@ -3478,78 +2897,63 @@ hfs_update(ap) /* * Allocate a new node - * - * Upon leaving, namei buffer must be freed. - * */ static int -hfs_makenode(mode, dvp, vpp, cnp) - int mode; - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; +hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + struct vnode_attr *vap, vfs_context_t ctx) { - struct cnode *cp; + struct cnode *cp = NULL; struct cnode *dcp; struct vnode *tvp; struct hfsmount *hfsmp; - struct timeval tv; - struct proc *p; struct cat_desc in_desc, out_desc; struct cat_attr attr; - cat_cookie_t cookie = {0}; - int error, started_tr = 0, grabbed_lock = 0; + struct timeval tv; + cat_cookie_t cookie; + int lockflags; + int error, started_tr = 0, got_cookie = 0; enum vtype vnodetype; + int mode; - p = cnp->cn_proc; + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) + return (error); dcp = VTOC(dvp); hfsmp = VTOHFS(dvp); *vpp = NULL; tvp = NULL; - bzero(&out_desc, sizeof(out_desc)); + out_desc.cd_flags = 0; + out_desc.cd_nameptr = NULL; + + mode = MAKEIMODE(vap->va_type, vap->va_mode); if ((mode & S_IFMT) == 0) mode |= S_IFREG; vnodetype = IFTOVT(mode); - /* Check if unmount in progress */ - if (VTOVFS(dvp)->mnt_kern_flag & MNTK_UNMOUNT) { - error = EPERM; - goto exit; - } /* Check if were out of usable disk space. */ - if ((suser(cnp->cn_cred, NULL) != 0) && (hfs_freeblks(hfsmp, 1) <= 0)) { + if ((hfs_freeblks(hfsmp, 1) <= 0) && (suser(vfs_context_ucred(ctx), NULL) != 0)) { error = ENOSPC; goto exit; } + microtime(&tv); + /* Setup the default attributes */ bzero(&attr, sizeof(attr)); attr.ca_mode = mode; attr.ca_nlink = vnodetype == VDIR ? 2 : 1; - attr.ca_mtime = time.tv_sec; - attr.ca_mtime_nsec = time.tv_usec * 1000; + attr.ca_mtime = tv.tv_sec; if ((VTOVCB(dvp)->vcbSigWord == kHFSSigWord) && gTimeZone.tz_dsttime) { attr.ca_mtime += 3600; /* Same as what hfs_update does */ } attr.ca_atime = attr.ca_ctime = attr.ca_itime = attr.ca_mtime; - if (VTOVFS(dvp)->mnt_flag & MNT_UNKNOWNPERMISSIONS) { - attr.ca_uid = hfsmp->hfs_uid; - attr.ca_gid = hfsmp->hfs_gid; - } else { - if (vnodetype == VLNK) - attr.ca_uid = dcp->c_uid; - else - attr.ca_uid = cnp->cn_cred->cr_uid; - attr.ca_gid = dcp->c_gid; - } - /* - * Don't tag as a special file (BLK or CHR) until *after* - * hfs_getnewvnode is called. This insures that any - * alias checking is defered until hfs_mknod completes. - */ - if (vnodetype == VBLK || vnodetype == VCHR) - attr.ca_mode = (attr.ca_mode & ~S_IFMT) | S_IFREG; + attr.ca_atimeondisk = attr.ca_atime; + + attr.ca_uid = vap->va_uid; + attr.ca_gid = vap->va_gid; + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); /* Tag symlinks with a type and creator. */ if (vnodetype == VLNK) { @@ -3559,30 +2963,21 @@ hfs_makenode(mode, dvp, vpp, cnp) fip->fdType = SWAP_BE32(kSymLinkFileType); fip->fdCreator = SWAP_BE32(kSymLinkCreator); } - if ((attr.ca_mode & S_ISGID) && - !groupmember(dcp->c_gid, cnp->cn_cred) && - suser(cnp->cn_cred, NULL)) { - attr.ca_mode &= ~S_ISGID; - } if (cnp->cn_flags & ISWHITEOUT) attr.ca_flags |= UF_OPAQUE; /* Setup the descriptor */ - bzero(&in_desc, sizeof(in_desc)); in_desc.cd_nameptr = cnp->cn_nameptr; in_desc.cd_namelen = cnp->cn_namelen; in_desc.cd_parentcnid = dcp->c_cnid; in_desc.cd_flags = S_ISDIR(mode) ? CD_ISDIR : 0; + in_desc.cd_hint = dcp->c_childhint; + in_desc.cd_encoding = 0; - // XXXdbg - hfs_global_shared_lock_acquire(hfsmp); - grabbed_lock = 1; - if (hfsmp->jnl) { - if ((error = journal_start_transaction(hfsmp->jnl)) != 0) { - goto exit; - } - started_tr = 1; + if ((error = hfs_start_transaction(hfsmp)) != 0) { + goto exit; } + started_tr = 1; /* * Reserve some space in the Catalog file. @@ -3591,29 +2986,31 @@ hfs_makenode(mode, dvp, vpp, cnp) * request can cause an hfs_inactive call to * delete an unlinked file) */ - if ((error = cat_preflight(hfsmp, CAT_CREATE | CAT_DELETE, &cookie, p))) { + if ((error = cat_preflight(hfsmp, CAT_CREATE | CAT_DELETE, &cookie, 0))) { goto exit; } + got_cookie = 1; - /* Lock catalog b-tree */ - error = hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_EXCLUSIVE, p); - if (error) - goto exit; - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_create(hfsmp, &in_desc, &attr, &out_desc); - - /* Unlock catalog b-tree */ - (void) hfs_metafilelocking(VTOHFS(dvp), kHFSCatalogFileID, LK_RELEASE, p); + if (error == 0) { + /* Update the parent directory */ + dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ + dcp->c_nlink++; + dcp->c_entries++; + dcp->c_ctime = tv.tv_sec; + dcp->c_mtime = tv.tv_sec; + (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); + HFS_KNOTE(dvp, NOTE_ATTRIB); + } + hfs_systemfile_unlock(hfsmp, lockflags); if (error) goto exit; - /* Update the parent directory */ - dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ - dcp->c_nlink++; - dcp->c_entries++; - dcp->c_flag |= C_CHANGE | C_UPDATE; - tv = time; - (void) VOP_UPDATE(dvp, &tv, &tv, 0); + /* Invalidate negative cache entries in the directory */ + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + cache_purge_negatives(dvp); + if (vnodetype == VDIR) { HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); } else { @@ -3630,77 +3027,74 @@ hfs_makenode(mode, dvp, vpp, cnp) // deadlock with someone on that other file system (since we could be // holding two transaction locks as well as various vnodes and we did // not obtain the locks on them in the proper order). - // + // // NOTE: this means that if the quota check fails or we have to update // the change time on a block-special device that those changes // will happen as part of independent transactions. // if (started_tr) { - journal_end_transaction(hfsmp->jnl); - started_tr = 0; - } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - grabbed_lock = 0; + hfs_end_transaction(hfsmp); + started_tr = 0; } - /* Create a vnode for the object just created: */ - error = hfs_getnewvnode(hfsmp, NULL, &out_desc, 0, &attr, NULL, &tvp); + /* + * Create a vnode for the object just created. + * + * The cnode is locked on successful return. + */ + error = hfs_getnewvnode(hfsmp, dvp, cnp, &out_desc, 0, &attr, NULL, &tvp); if (error) goto exit; // XXXdbg - cache_enter(dvp, tvp, cnp); + //cache_enter(dvp, tvp, cnp); -#if QUOTA cp = VTOC(tvp); +#if QUOTA /* * We call hfs_chkiq with FORCE flag so that if we * fall through to the rmdir we actually have * accounted for the inode */ - if ((error = hfs_getinoquota(cp)) || - (error = hfs_chkiq(cp, 1, cnp->cn_cred, FORCE))) { - if (tvp->v_type == VDIR) - VOP_RMDIR(dvp,tvp, cnp); - else - VOP_REMOVE(dvp,tvp, cnp); - - // because VOP_RMDIR and VOP_REMOVE already - // have done the vput() - dvp = NULL; - goto exit; - } -#endif /* QUOTA */ - - /* - * restore vtype and mode for VBLK and VCHR - */ - if (vnodetype == VBLK || vnodetype == VCHR) { - struct cnode *cp; - - cp = VTOC(tvp); - cp->c_mode = mode; - tvp->v_type = IFTOVT(mode); - cp->c_flag |= C_CHANGE; - tv = time; - if ((error = VOP_UPDATE(tvp, &tv, &tv, 1))) { - vput(tvp); + if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_QUOTA) { + if ((error = hfs_getinoquota(cp)) || + (error = hfs_chkiq(cp, 1, vfs_context_ucred(ctx), FORCE))) { + + if (vnode_isdir(tvp)) + (void) hfs_removedir(dvp, tvp, cnp, 0); + else { + hfs_unlock(cp); + hfs_lock_truncate(cp, TRUE); + hfs_lock(cp, HFS_FORCE_LOCK); + (void) hfs_removefile(dvp, tvp, cnp, 0, 0); + hfs_unlock_truncate(cp); + } + /* + * we successfully allocated a new vnode, but + * the quota check is telling us we're beyond + * our limit, so we need to dump our lock + reference + */ + hfs_unlock(cp); + vnode_put(tvp); + goto exit; } } +#endif /* QUOTA */ + /* Remember if any ACL data was set. */ + if (VATTR_IS_ACTIVE(vap, va_acl) && + (vap->va_acl != NULL)) { + cp->c_attr.ca_recflags |= kHFSHasSecurityMask; + cp->c_touch_chgtime = TRUE; + (void) hfs_update(tvp, TRUE); + } *vpp = tvp; exit: cat_releasedesc(&out_desc); - cat_postflight(hfsmp, &cookie, p); - - if ((cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); + if (got_cookie) { + cat_postflight(hfsmp, &cookie, 0); } /* * Check if a file is located in the "Cleanup At Startup" @@ -3711,55 +3105,61 @@ exit: (dcp->c_desc.cd_nameptr != NULL) && (strcmp(dcp->c_desc.cd_nameptr, CARBON_TEMP_DIR_NAME) == 0)) { struct vnode *ddvp; - cnid_t parid; - parid = dcp->c_parentcnid; - vput(dvp); + hfs_unlock(dcp); dvp = NULL; /* * The parent of "Cleanup At Startup" should * have the ASCII name of the userid. */ - if (VFS_VGET(HFSTOVFS(hfsmp), &parid, &ddvp) == 0) { - if (VTOC(ddvp)->c_desc.cd_nameptr) { - uid_t uid; - - uid = strtoul(VTOC(ddvp)->c_desc.cd_nameptr, 0, 0); - if (uid == cp->c_uid || uid == cnp->cn_cred->cr_uid) { - cp->c_flags |= UF_NODUMP; - cp->c_flag |= C_CHANGE; - } + if (hfs_vget(hfsmp, dcp->c_parentcnid, &ddvp, 0) == 0) { + if (VTOC(ddvp)->c_desc.cd_nameptr) { + uid_t uid; + + uid = strtoul(VTOC(ddvp)->c_desc.cd_nameptr, 0, 0); + if ((uid == cp->c_uid) || + (uid == vfs_context_ucred(ctx)->cr_uid)) { + cp->c_flags |= UF_NODUMP; + cp->c_touch_chgtime = TRUE; + } } - vput(ddvp); + hfs_unlock(VTOC(ddvp)); + vnode_put(ddvp); } } - if (dvp) - vput(dvp); - - if (started_tr) { - journal_end_transaction(hfsmp->jnl); - started_tr = 0; + if (dvp) { + hfs_unlock(dcp); + } + if (error == 0 && cp != NULL) { + hfs_unlock(cp); } - if (grabbed_lock) { - hfs_global_shared_lock_release(hfsmp); - grabbed_lock = 0; + if (started_tr) { + hfs_end_transaction(hfsmp); + started_tr = 0; } return (error); } -static int -hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, struct proc *p) +/* + * WARNING - assumes caller has cnode lock. + */ +__private_extern__ +int +hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, __unused struct proc *p) { struct vnode *rvp; struct cnode *cp = VTOC(vp); int error; + int vid; if ((rvp = cp->c_rsrc_vp)) { + vid = vnode_vid(rvp); + /* Use exising vnode */ - error = vget(rvp, 0, p); + error = vnode_getwithvid(rvp, vid); if (error) { char * name = VTOC(vp)->c_desc.cd_nameptr; @@ -3770,23 +3170,38 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, stru } } else { struct cat_fork rsrcfork; + struct componentname cn; + int lockflags; - /* Lock catalog b-tree */ - error = hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_SHARED, p); - if (error) - return (error); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); /* Get resource fork data */ error = cat_lookup(hfsmp, &cp->c_desc, 1, (struct cat_desc *)0, - (struct cat_attr *)0, &rsrcfork); + (struct cat_attr *)0, &rsrcfork, NULL); - /* Unlock the Catalog */ - (void) hfs_metafilelocking(hfsmp, kHFSCatalogFileID, LK_RELEASE, p); + hfs_systemfile_unlock(hfsmp, lockflags); if (error) return (error); - error = hfs_getnewvnode(hfsmp, cp, &cp->c_desc, 1, &cp->c_attr, - &rsrcfork, &rvp); + /* + * Supply hfs_getnewvnode with a component name. + */ + cn.cn_pnbuf = NULL; + if (cp->c_desc.cd_nameptr) { + MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + cn.cn_nameiop = LOOKUP; + cn.cn_flags = ISLASTCN | HASBUF; + cn.cn_context = NULL; + cn.cn_pnlen = MAXPATHLEN; + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_hash = 0; + cn.cn_consume = 0; + cn.cn_namelen = sprintf(cn.cn_nameptr, "%s%s", cp->c_desc.cd_nameptr, _PATH_RSRCFORKSPEC); + } + error = hfs_getnewvnode(hfsmp, vnode_parent(vp), cn.cn_pnbuf ? &cn : NULL, + &cp->c_desc, 2, &cp->c_attr, &rsrcfork, &rvp); + if (cn.cn_pnbuf) + FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); if (error) return (error); } @@ -3800,28 +3215,34 @@ static void filt_hfsdetach(struct knote *kn) { struct vnode *vp; - int result; - struct proc *p = current_proc(); vp = (struct vnode *)kn->kn_hook; - if (1) { /* ! KNDETACH_VNLOCKED */ - result = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (result) return; - }; + if (vnode_getwithvid(vp, kn->kn_hookid)) + return; - result = KNOTE_DETACH(&VTOC(vp)->c_knotes, kn); - - if (1) { /* ! KNDETACH_VNLOCKED */ - VOP_UNLOCK(vp, 0, p); - }; + if (1) { /* ! KNDETACH_VNLOCKED */ + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + (void) KNOTE_DETACH(&VTOC(vp)->c_knotes, kn); + hfs_unlock(VTOC(vp)); + } + } + + vnode_put(vp); } /*ARGSUSED*/ static int filt_hfsread(struct knote *kn, long hint) { - struct vnode *vp = (struct vnode *)kn->kn_fp->f_data; + struct vnode *vp = (struct vnode *)kn->kn_hook; + int dropvp = 0; + if (hint == 0) { + if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + dropvp = 1; + } if (hint == NOTE_REVOKE) { /* * filesystem is gone, so set the EOF flag and schedule @@ -3831,7 +3252,13 @@ filt_hfsread(struct knote *kn, long hint) return (1); } - kn->kn_data = VTOF(vp)->ff_size - kn->kn_fp->f_offset; + /* poll(2) semantics dictate always saying there is data */ + kn->kn_data = (!(kn->kn_flags & EV_POLL)) ? + VTOF(vp)->ff_size - kn->kn_fp->f_fglob->fg_offset : 1; + + if (dropvp) + vnode_put(vp); + return (kn->kn_data != 0); } @@ -3839,14 +3266,23 @@ filt_hfsread(struct knote *kn, long hint) static int filt_hfswrite(struct knote *kn, long hint) { + int dropvp = 0; + + if (hint == 0) { + if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + vnode_put(kn->kn_hook); + } if (hint == NOTE_REVOKE) { /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ + kn->kn_data = 0; kn->kn_flags |= (EV_EOF | EV_ONESHOT); + return (1); } - kn->kn_data = 0; return (1); } @@ -3855,12 +3291,19 @@ static int filt_hfsvnode(struct knote *kn, long hint) { + if (hint == 0) { + if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + vnode_put(kn->kn_hook); + } if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; - if (hint == NOTE_REVOKE) { - kn->kn_flags |= EV_EOF; + if ((hint == NOTE_REVOKE)) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } + return (kn->kn_fflags != 0); } @@ -3872,35 +3315,31 @@ static struct filterops hfsvnode_filtops = { 1, NULL, filt_hfsdetach, filt_hfsvnode }; /* - # - #% kqfilt_add vp L L L - # - vop_kqfilt_add - IN struct vnode *vp; - IN struct knote *kn; - IN struct proc *p; + * Add a kqueue filter. */ static int -hfs_kqfilt_add(ap) - struct vop_kqfilt_add_args /* { +hfs_vnop_kqfiltadd( + struct vnop_kqfilt_add_args /* { struct vnode *a_vp; struct knote *a_kn; struct proc *p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; + int error; switch (kn->kn_filter) { case EVFILT_READ: - if (vp->v_type == VREG) { + if (vnode_isreg(vp)) { kn->kn_fop = &hfsread_filtops; } else { return EINVAL; }; break; case EVFILT_WRITE: - if (vp->v_type == VREG) { + if (vnode_isreg(vp)) { kn->kn_fop = &hfswrite_filtops; } else { return EINVAL; @@ -3914,33 +3353,27 @@ hfs_kqfilt_add(ap) } kn->kn_hook = (caddr_t)vp; + kn->kn_hookid = vnode_vid(vp); - /* simple_lock(&vp->v_pollinfo.vpi_lock); */ + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + return (error); KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn); - /* simple_unlock(&vp->v_pollinfo.vpi_lock); */ + hfs_unlock(VTOC(vp)); return (0); } /* - # - #% kqfilt_remove vp L L L - # - vop_kqfilt_remove - IN struct vnode *vp; - IN uintptr_t ident; - IN struct proc *p; + * Remove a kqueue filter */ static int -hfs_kqfilt_remove(ap) - struct vop_kqfilt_remove_args /* { +hfs_vnop_kqfiltremove(ap) + struct vnop_kqfilt_remove_args /* { struct vnode *a_vp; uintptr_t ident; - struct proc *p; + vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - uintptr_t ident = ap->a_ident; int result; result = ENOTSUP; /* XXX */ @@ -3953,18 +3386,18 @@ hfs_kqfilt_remove(ap) */ static int hfsspec_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { /* * Set access flag. */ - VTOC(ap->a_vp)->c_flag |= C_ACCESS; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); + VTOC(ap->a_vp)->c_touch_acctime = TRUE; + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap)); } /* @@ -3972,18 +3405,19 @@ hfsspec_read(ap) */ static int hfsspec_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { /* * Set update and change flags. */ - VTOC(ap->a_vp)->c_flag |= C_CHANGE | C_UPDATE; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); + VTOC(ap->a_vp)->c_touch_chgtime = TRUE; + VTOC(ap->a_vp)->c_touch_modtime = TRUE; + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -3993,21 +3427,23 @@ hfsspec_write(ap) */ static int hfsspec_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); + struct cnode *cp; - simple_lock(&vp->v_interlock); - if (ap->a_vp->v_usecount > 1) - CTIMES(cp, &time, &time); - simple_unlock(&vp->v_interlock); - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); + if (vnode_isinuse(ap->a_vp, 1)) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + cp = VTOC(vp); + hfs_touchtimes(VTOHFS(vp), cp); + hfs_unlock(cp); + } + } + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap)); } #if FIFO @@ -4016,11 +3452,11 @@ hfsspec_close(ap) */ static int hfsfifo_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { extern int (**fifo_vnodeop_p)(void *); @@ -4028,8 +3464,8 @@ hfsfifo_read(ap) /* * Set access flag. */ - VTOC(ap->a_vp)->c_flag |= C_ACCESS; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); + VTOC(ap->a_vp)->c_touch_acctime = TRUE; + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_read), ap)); } /* @@ -4037,11 +3473,11 @@ hfsfifo_read(ap) */ static int hfsfifo_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { extern int (**fifo_vnodeop_p)(void *); @@ -4049,8 +3485,9 @@ hfsfifo_write(ap) /* * Set update and change flags. */ - VTOC(ap->a_vp)->c_flag |= C_CHANGE | C_UPDATE; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); + VTOC(ap->a_vp)->c_touch_chgtime = TRUE; + VTOC(ap->a_vp)->c_touch_modtime = TRUE; + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -4060,22 +3497,24 @@ hfsfifo_write(ap) */ static int hfsfifo_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { extern int (**fifo_vnodeop_p)(void *); struct vnode *vp = ap->a_vp; - struct cnode *cp = VTOC(vp); + struct cnode *cp; - simple_lock(&vp->v_interlock); - if (ap->a_vp->v_usecount > 1) - CTIMES(cp, &time, &time); - simple_unlock(&vp->v_interlock); - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); + if (vnode_isinuse(ap->a_vp, 1)) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + cp = VTOC(vp); + hfs_touchtimes(VTOHFS(vp), cp); + hfs_unlock(cp); + } + } + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } /* @@ -4085,14 +3524,14 @@ hfsfifo_close(ap) */ int hfsfifo_kqfilt_add(ap) - struct vop_kqfilt_add_args *ap; + struct vnop_kqfilt_add_args *ap; { extern int (**fifo_vnodeop_p)(void *); int error; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_add), ap); + error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap); if (error) - error = hfs_kqfilt_add(ap); + error = hfs_vnop_kqfiltadd(ap); return (error); } @@ -4103,109 +3542,126 @@ hfsfifo_kqfilt_add(ap) */ int hfsfifo_kqfilt_remove(ap) - struct vop_kqfilt_remove_args *ap; + struct vnop_kqfilt_remove_args *ap; { extern int (**fifo_vnodeop_p)(void *); int error; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_remove), ap); + error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap); if (error) - error = hfs_kqfilt_remove(ap); + error = hfs_vnop_kqfiltremove(ap); return (error); } #endif /* FIFO */ +/* + * Synchronize a file's in-core state with that on disk. + */ +static int +hfs_vnop_fsync(ap) + struct vnop_fsync_args /* { + struct vnode *a_vp; + int a_waitfor; + vfs_context_t a_context; + } */ *ap; +{ + struct vnode* vp = ap->a_vp; + int error; + + /* + * We need to allow ENOENT lock errors since unlink + * systenm call can call VNOP_FSYNC during vclean. + */ + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + if (error) + return (0); + + error = hfs_fsync(vp, ap->a_waitfor, 0, vfs_context_proc(ap->a_context)); + + hfs_unlock(VTOC(vp)); + return (error); +} /***************************************************************************** * * VOP Tables * *****************************************************************************/ -int hfs_cache_lookup(); /* in hfs_lookup.c */ -int hfs_lookup(); /* in hfs_lookup.c */ -int hfs_read(); /* in hfs_readwrite.c */ -int hfs_write(); /* in hfs_readwrite.c */ -int hfs_ioctl(); /* in hfs_readwrite.c */ -int hfs_select(); /* in hfs_readwrite.c */ -int hfs_bmap(); /* in hfs_readwrite.c */ -int hfs_strategy(); /* in hfs_readwrite.c */ -int hfs_truncate(); /* in hfs_readwrite.c */ -int hfs_allocate(); /* in hfs_readwrite.c */ -int hfs_pagein(); /* in hfs_readwrite.c */ -int hfs_pageout(); /* in hfs_readwrite.c */ -int hfs_search(); /* in hfs_search.c */ -int hfs_bwrite(); /* in hfs_readwrite.c */ -int hfs_link(); /* in hfs_link.c */ -int hfs_blktooff(); /* in hfs_readwrite.c */ -int hfs_offtoblk(); /* in hfs_readwrite.c */ -int hfs_cmap(); /* in hfs_readwrite.c */ -int hfs_getattrlist(); /* in hfs_attrlist.c */ -int hfs_setattrlist(); /* in hfs_attrlist.c */ -int hfs_readdirattr(); /* in hfs_attrlist.c */ -int hfs_inactive(); /* in hfs_cnode.c */ -int hfs_reclaim(); /* in hfs_cnode.c */ +int hfs_vnop_readdirattr(struct vnop_readdirattr_args *); /* in hfs_attrlist.c */ +int hfs_vnop_inactive(struct vnop_inactive_args *); /* in hfs_cnode.c */ +int hfs_vnop_reclaim(struct vnop_reclaim_args *); /* in hfs_cnode.c */ +int hfs_vnop_link(struct vnop_link_args *); /* in hfs_link.c */ +int hfs_vnop_lookup(struct vnop_lookup_args *); /* in hfs_lookup.c */ +int hfs_vnop_search(struct vnop_searchfs_args *); /* in hfs_search.c */ + +int hfs_vnop_read(struct vnop_read_args *); /* in hfs_readwrite.c */ +int hfs_vnop_write(struct vnop_write_args *); /* in hfs_readwrite.c */ +int hfs_vnop_ioctl(struct vnop_ioctl_args *); /* in hfs_readwrite.c */ +int hfs_vnop_select(struct vnop_select_args *); /* in hfs_readwrite.c */ +int hfs_vnop_strategy(struct vnop_strategy_args *); /* in hfs_readwrite.c */ +int hfs_vnop_allocate(struct vnop_allocate_args *); /* in hfs_readwrite.c */ +int hfs_vnop_pagein(struct vnop_pagein_args *); /* in hfs_readwrite.c */ +int hfs_vnop_pageout(struct vnop_pageout_args *); /* in hfs_readwrite.c */ +int hfs_vnop_bwrite(struct vnop_bwrite_args *); /* in hfs_readwrite.c */ +int hfs_vnop_blktooff(struct vnop_blktooff_args *); /* in hfs_readwrite.c */ +int hfs_vnop_offtoblk(struct vnop_offtoblk_args *); /* in hfs_readwrite.c */ +int hfs_vnop_blockmap(struct vnop_blockmap_args *); /* in hfs_readwrite.c */ +int hfs_vnop_getxattr(struct vnop_getxattr_args *); /* in hfs_xattr.c */ +int hfs_vnop_setxattr(struct vnop_setxattr_args *); /* in hfs_xattr.c */ +int hfs_vnop_removexattr(struct vnop_removexattr_args *); /* in hfs_xattr.c */ +int hfs_vnop_listxattr(struct vnop_listxattr_args *); /* in hfs_xattr.c */ int (**hfs_vnodeop_p)(void *); #define VOPFUNC int (*)(void *) struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)hfs_cache_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)hfs_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)hfs_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)hfs_open }, /* open */ - { &vop_close_desc, (VOPFUNC)hfs_close }, /* close */ - { &vop_access_desc, (VOPFUNC)hfs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)hfs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)hfs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)hfs_read }, /* read */ - { &vop_write_desc, (VOPFUNC)hfs_write }, /* write */ - { &vop_ioctl_desc, (VOPFUNC)hfs_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)hfs_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ - { &vop_exchange_desc, (VOPFUNC)hfs_exchange }, /* exchange */ - { &vop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)hfs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)nop_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)hfs_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)hfs_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)hfs_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)hfs_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)hfs_rmdir }, /* rmdir */ - { &vop_mkcomplex_desc, (VOPFUNC)err_mkcomplex }, /* mkcomplex */ - { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist }, /* getattrlist */ - { &vop_setattrlist_desc, (VOPFUNC)hfs_setattrlist }, /* setattrlist */ - { &vop_symlink_desc, (VOPFUNC)hfs_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)hfs_readdir }, /* readdir */ - { &vop_readdirattr_desc, (VOPFUNC)hfs_readdirattr }, /* readdirattr */ - { &vop_readlink_desc, (VOPFUNC)hfs_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)nop_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)hfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)hfs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)hfs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)hfs_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)hfs_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)hfs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)hfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)hfs_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)hfs_advlock }, /* advlock */ - { &vop_reallocblks_desc, (VOPFUNC)err_reallocblks }, /* reallocblks */ - { &vop_truncate_desc, (VOPFUNC)hfs_truncate }, /* truncate */ - { &vop_allocate_desc, (VOPFUNC)hfs_allocate }, /* allocate */ - { &vop_update_desc, (VOPFUNC)hfs_update }, /* update */ - { &vop_searchfs_desc, (VOPFUNC)hfs_search }, /* search fs */ - { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite }, /* bwrite */ - { &vop_pagein_desc, (VOPFUNC)hfs_pagein }, /* pagein */ - { &vop_pageout_desc,(VOPFUNC) hfs_pageout }, /* pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ - { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)hfs_cmap }, /* cmap */ - { &vop_kqfilt_add_desc, (VOPFUNC)hfs_kqfilt_add }, /* kqfilt_add */ - { &vop_kqfilt_remove_desc, (VOPFUNC)hfs_kqfilt_remove }, /* kqfilt_remove */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)hfs_vnop_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)hfs_vnop_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)hfs_vnop_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)hfs_vnop_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)hfs_vnop_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)hfs_vnop_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)hfs_vnop_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ + { &vnop_exchange_desc, (VOPFUNC)hfs_vnop_exchange }, /* exchange */ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)hfs_vnop_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)hfs_vnop_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)hfs_vnop_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)hfs_vnop_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)hfs_vnop_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)hfs_vnop_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir }, /* readdir */ + { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr }, /* readdirattr */ + { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_allocate_desc, (VOPFUNC)hfs_vnop_allocate }, /* allocate */ + { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ + { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ + { &vnop_pageout_desc,(VOPFUNC) hfs_vnop_pageout }, /* pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_kqfilt_add_desc, (VOPFUNC)hfs_vnop_kqfiltadd }, /* kqfilt_add */ + { &vnop_kqfilt_remove_desc, (VOPFUNC)hfs_vnop_kqfiltremove }, /* kqfilt_remove */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, + { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, { NULL, (VOPFUNC)NULL } }; @@ -4214,57 +3670,41 @@ struct vnodeopv_desc hfs_vnodeop_opv_desc = int (**hfs_specop_p)(void *); struct vnodeopv_entry_desc hfs_specop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)hfsspec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)hfs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)hfs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)hfs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)hfsspec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)hfsspec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)spec_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)hfs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)spec_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)spec_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist }, - { &vop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)spec_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)hfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)hfs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)hfs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)hfs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)hfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)spec_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)spec_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)spec_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)spec_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)hfs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite }, - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)hfs_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)hfs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ - { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk }, /* offtoblk */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)hfsspec_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)hfsspec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)hfsspec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)spec_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, + { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ + { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_specop_opv_desc = @@ -4273,59 +3713,43 @@ struct vnodeopv_desc hfs_specop_opv_desc = #if FIFO int (**hfs_fifoop_p)(void *); struct vnodeopv_entry_desc hfs_fifoop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)fifo_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vop_close_desc, (VOPFUNC)hfsfifo_close }, /* close */ - { &vop_access_desc, (VOPFUNC)hfs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)hfs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)hfs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)hfsfifo_read }, /* read */ - { &vop_write_desc, (VOPFUNC)hfsfifo_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)fifo_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)hfs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)fifo_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)fifo_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ - { &vop_getattrlist_desc, (VOPFUNC)hfs_getattrlist }, - { &vop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)fifo_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)hfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)hfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)hfs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)hfs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)fifo_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)hfs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)hfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)fifo_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)fifo_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)fifo_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)fifo_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)hfs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)hfs_bwrite }, - { &vop_pagein_desc, (VOPFUNC)hfs_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)hfs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ - { &vop_blktooff_desc, (VOPFUNC)hfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)hfs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)hfs_cmap }, /* cmap */ - { &vop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add }, /* kqfilt_add */ - { &vop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove }, /* kqfilt_remove */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)fifo_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)hfsfifo_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)hfs_vnop_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)hfsfifo_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)hfsfifo_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)hfs_vnop_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)fifo_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add }, /* kqfilt_add */ + { &vnop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove }, /* kqfilt_remove */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_fifoop_opv_desc = diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c new file mode 100644 index 000000000..5030db050 --- /dev/null +++ b/bsd/hfs/hfs_xattr.c @@ -0,0 +1,1062 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "hfs.h" +#include "hfs_cnode.h" +#include "hfs_mount.h" +#include "hfs_format.h" +#include "hfs_endian.h" + +#include "hfscommon/headers/BTreesInternal.h" + + +#define ATTRIBUTE_FILE_NODE_SIZE 8192 + + +/* State information for the listattr_callback callback function. */ +struct listattr_callback_state { + u_int32_t fileID; + int result; + uio_t uio; + size_t size; +}; + +#define HFS_MAXATTRIBUTESIZE (1024*1024) + +/* HFS Internal Names */ +#define XATTR_EXTENDEDSECURITY_NAME "system.extendedsecurity" + + +#define RESOURCE_FORK_EXISTS(VP) \ + ((VTOC((VP))->c_blocks - VTOF((VP))->ff_blocks) > 0) + +static u_int32_t emptyfinfo[8] = {0}; + + +extern int hfs_create_attr_btree(struct hfsmount *hfsmp, uint32_t nodesize, uint32_t nodecnt); + + +int hfs_vnop_getxattr(struct vnop_getxattr_args *ap); +int hfs_vnop_setxattr(struct vnop_setxattr_args *ap); +int hfs_vnop_removexattr(struct vnop_removexattr_args *ap); +int hfs_vnop_listxattr(struct vnop_listxattr_args *ap); +int hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey); + + + +static int listattr_callback(const HFSPlusAttrKey *key, const HFSPlusAttrData *data, + struct listattr_callback_state *state); + +static int buildkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key); + +static int getnodecount(struct hfsmount *hfsmp, size_t nodesize); + +static size_t getmaxinlineattrsize(struct vnode * attrvp); + +/* + * Retrieve the data of an extended attribute. + */ +__private_extern__ +int +hfs_vnop_getxattr(struct vnop_getxattr_args *ap) +/* + struct vnop_getxattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + char * a_name; + uio_t a_uio; + size_t *a_size; + int a_options; + vfs_context_t a_context; + }; +*/ +{ + struct vnode *vp = ap->a_vp; + struct hfsmount *hfsmp; + uio_t uio = ap->a_uio; + struct BTreeIterator * iterator = NULL; + struct filefork *btfile; + FSBufferDescriptor btdata; + HFSPlusAttrData * datap = NULL; + size_t bufsize; + UInt16 datasize; + int lockflags; + int result; + + if (ap->a_name == NULL || ap->a_name[0] == '\0') { + return (EINVAL); /* invalid name */ + } + hfsmp = VTOHFS(vp); + + if (!VNODE_IS_RSRC(vp)) { + /* Get the Finder Info. */ + if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + bufsize = 32; + + /* If Finder Info is empty then it doesn't exist. */ + if (bcmp(VTOC(vp)->c_finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) { + return (ENOATTR); + } + if (uio == NULL) { + *ap->a_size = bufsize; + return (0); + } + if (uio_resid(uio) < bufsize) + return (ERANGE); + + result = uiomove((caddr_t) &VTOC(vp)->c_finderinfo , bufsize, uio); + + return (result); + } + /* Read the Resource Fork. */ + if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + struct vnode *rvp = NULL; + + if ( !vnode_isreg(vp) ) { + return (EPERM); + } + if ( !RESOURCE_FORK_EXISTS(vp)) { + return (ENOATTR); + } + if ((result = hfs_vgetrsrc(hfsmp, vp, &rvp, vfs_context_proc(ap->a_context)))) { + return (result); + } + if (uio == NULL) { + *ap->a_size = (size_t)VTOF(rvp)->ff_size; + } else { + result = VNOP_READ(rvp, uio, 0, ap->a_context); + } + vnode_put(rvp); + return (result); + } + } + /* + * Standard HFS only supports native FinderInfo and Resource Forks. + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + return (EPERM); + } + /* Bail if we don't have any extended attributes. */ + if ((hfsmp->hfs_attribute_vp == NULL) || + (VTOC(vp)->c_attr.ca_recflags & kHFSHasAttributesMask) == 0) { + return (ENOATTR); + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + + bufsize = sizeof(HFSPlusAttrData) - 2; + if (uio) + bufsize += uio_resid(uio); + MALLOC(datap, HFSPlusAttrData *, bufsize, M_TEMP, M_WAITOK); + btdata.bufferAddress = datap; + btdata.itemSize = bufsize; + btdata.itemCount = 1; + + result = buildkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key); + if (result) + goto exit; + + /* Lookup the attribute. */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); + + if (result) { + if (result == btNotFound) + result = ENOATTR; + goto exit; + } + + *ap->a_size = datap->attrSize; + + /* Copy out the attribute data. */ + if (uio) { + if (datap->attrSize > uio_resid(uio)) + result = ERANGE; + else + result = uiomove((caddr_t) &datap->attrData , datap->attrSize, uio); + } +exit: + FREE(datap, M_TEMP); + FREE(iterator, M_TEMP); + + return MacToVFSError(result); +} + +/* + * Set the data of an extended attribute. + */ +__private_extern__ +int +hfs_vnop_setxattr(struct vnop_setxattr_args *ap) +/* + struct vnop_setxattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + char * a_name; + uio_t a_uio; + int a_options; + vfs_context_t a_context; + }; +*/ +{ + struct vnode *vp = ap->a_vp; + struct hfsmount *hfsmp; + uio_t uio = ap->a_uio; + struct BTreeIterator * iterator = NULL; + struct filefork *btfile; + size_t attrsize; + FSBufferDescriptor btdata; + HFSPlusAttrData * datap = NULL; + UInt16 datasize; + int lockflags; + int result; + + if (ap->a_name == NULL || ap->a_name[0] == '\0') { + return (EINVAL); /* invalid name */ + } + hfsmp = VTOHFS(vp); + if (VNODE_IS_RSRC(vp)) { + return (EPERM); + } + /* Set the Finder Info. */ + if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + attrsize = 32; + + if (bcmp(VTOC(vp)->c_finderinfo, emptyfinfo, sizeof(emptyfinfo))) { + /* attr exists and "create" was specified. */ + if (ap->a_options & XATTR_CREATE) { + return (EEXIST); + } + } else { + /* attr doesn't exists and "replace" was specified. */ + if (ap->a_options & XATTR_REPLACE) { + return (ENOATTR); + } + } + if (uio_resid(uio) != attrsize) + return (ERANGE); + + result = uiomove((caddr_t) &VTOC(vp)->c_finderinfo , attrsize, uio); + if (result == 0) { + VTOC(vp)->c_touch_chgtime = TRUE; + VTOC(vp)->c_flag |= C_MODIFIED; + result = hfs_update(vp, FALSE); + } + return (result); + } + /* Write the Resource Fork. */ + if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + struct vnode *rvp = NULL; + + if (!vnode_isreg(vp)) { + return (EPERM); + } + if (RESOURCE_FORK_EXISTS(vp)) { + /* attr exists and "create" was specified. */ + if (ap->a_options & XATTR_CREATE) { + return (EEXIST); + } + } else { + /* attr doesn't exists and "replace" was specified. */ + if (ap->a_options & XATTR_REPLACE) { + return (ENOATTR); + } + } + if ((result = hfs_vgetrsrc(hfsmp, vp, &rvp, vfs_context_proc(ap->a_context)))) { + return (result); + } + result = VNOP_WRITE(rvp, uio, 0, ap->a_context); + vnode_put(rvp); + return (result); + } + /* + * Standard HFS only supports native FinderInfo and Resource Forks. + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + return (EPERM); + } + if (hfsmp->hfs_max_inline_attrsize == 0) { + hfsmp->hfs_max_inline_attrsize = getmaxinlineattrsize(hfsmp->hfs_attribute_vp); + } + attrsize = uio_resid(uio); + if (attrsize > hfsmp->hfs_max_inline_attrsize) { + /* + * XXX Need to support extent-based attributes XXX + */ + return (E2BIG); + } + /* Calculate size of record rounded up to multiple of 2 bytes. */ + datasize = sizeof(HFSPlusAttrData) - 2 + attrsize + ((attrsize & 1) ? 1 : 0); + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + + MALLOC(datap, HFSPlusAttrData *, datasize, M_TEMP, M_WAITOK); + btdata.bufferAddress = datap; + btdata.itemSize = datasize; + btdata.itemCount = 1; + datap->recordType = kHFSPlusAttrInlineData; + datap->reserved[0] = 0; + datap->reserved[1] = 0; + datap->attrSize = attrsize; + + /* Copy in the attribute data. */ + result = uiomove((caddr_t) &datap->attrData , attrsize, uio); + if (result) { + goto exit2; + } + /* Build a b-tree key. */ + result = buildkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key); + if (result) { + goto exit2; + } + /* Start a transaction for our changes. */ + if (hfs_start_transaction(hfsmp) != 0) { + result = EINVAL; + goto exit2; + } + + /* once we started the transaction, nobody can compete with us, so make sure this file is still there */ + struct cnode *cp; + cp = VTOC(vp); + if (cp->c_flag & C_NOEXISTS) { /* this file has already been removed */ + result = ENOENT; + goto exit1; + } + + /* + * If there isn't an attributes b-tree then create one. + */ + if (hfsmp->hfs_attribute_vp == NULL) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + result = hfs_create_attr_btree(hfsmp, ATTRIBUTE_FILE_NODE_SIZE, + getnodecount(hfsmp, ATTRIBUTE_FILE_NODE_SIZE)); + hfs_systemfile_unlock(hfsmp, lockflags); + if (result) { + goto exit1; + } + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + if (ap->a_options & XATTR_REPLACE) { + result = BTReplaceRecord(btfile, iterator, &btdata, datasize); + if (result) + goto exit0; + else + goto exit; + } + + /* Insert the attribute. */ + result = BTInsertRecord(btfile, iterator, &btdata, datasize); + if (result) { + if (result != btExists) { + goto exit0; + } + + // if it exists and XATTR_CREATE was specified, + // the spec says to return EEXIST + if (ap->a_options & XATTR_CREATE) { + result = EEXIST; + goto exit0; + } + /* XXX need to account for old size in c_attrblks */ + result = BTReplaceRecord(btfile, iterator, &btdata, datasize); + } +exit: + (void) BTFlushPath(btfile); +exit0: + hfs_systemfile_unlock(hfsmp, lockflags); + if (result == 0) { + struct cnode * cp; + + cp = VTOC(vp); + cp->c_touch_chgtime = TRUE; + if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) == 0) { + cp->c_attr.ca_recflags |= kHFSHasAttributesMask; + (void) hfs_update(vp, 0); + } + HFS_KNOTE(vp, NOTE_ATTRIB); + } +exit1: + /* Finish the transaction of our changes. */ + hfs_end_transaction(hfsmp); +exit2: + FREE(datap, M_TEMP); + FREE(iterator, M_TEMP); + + if (result == btNotFound) + result = ENOATTR; + else + result = MacToVFSError(result); + + return (result); +} + +/* + * Remove an extended attribute. + */ +__private_extern__ +int +hfs_vnop_removexattr(struct vnop_removexattr_args *ap) +/* + struct vnop_removexattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + char * a_name; + int a_options; + vfs_context_t a_context; + }; +*/ +{ + struct vnode *vp = ap->a_vp; + struct hfsmount *hfsmp; + struct BTreeIterator * iterator = NULL; + struct filefork *btfile; + struct proc *p = vfs_context_proc(ap->a_context); + FSBufferDescriptor btdata; + HFSPlusAttrData attrdata; + int lockflags; + int result; + + if (ap->a_name == NULL || ap->a_name[0] == '\0') { + return (EINVAL); /* invalid name */ + } + hfsmp = VTOHFS(vp); + if (VNODE_IS_RSRC(vp)) { + return (EPERM); + } + + /* If Resource Fork is non-empty then truncate it. */ + if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + struct vnode *rvp = NULL; + + if ( !vnode_isreg(vp) ) { + return (EPERM); + } + if ( !RESOURCE_FORK_EXISTS(vp) ) { + return (ENOATTR); + } + if ((result = hfs_vgetrsrc(hfsmp, vp, &rvp, p))) { + return (result); + } + hfs_lock_truncate(VTOC(rvp), TRUE); + if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK))) { + hfs_unlock_truncate(VTOC(vp)); + vnode_put(rvp); + return (result); + } + result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, ap->a_context); + + hfs_unlock_truncate(VTOC(rvp)); + hfs_unlock(VTOC(rvp)); + + vnode_put(rvp); + return (result); + } + /* Clear out the Finder Info. */ + if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + if (bcmp(VTOC(vp)->c_finderinfo, emptyfinfo, sizeof(emptyfinfo)) == 0) { + return (ENOATTR); + } + bzero(VTOC(vp)->c_finderinfo, sizeof(emptyfinfo)); + return (0); + } + /* + * Standard HFS only supports native FinderInfo and Resource Forks. + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + return (EPERM); + } + if (hfsmp->hfs_attribute_vp == NULL) { + return (ENOATTR); + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + + if (hfs_start_transaction(hfsmp) != 0) { + result = EINVAL; + goto exit2; + } + + result = buildkey(VTOC(vp)->c_fileid, ap->a_name, (HFSPlusAttrKey *)&iterator->key); + if (result) + goto exit2; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + btdata.bufferAddress = &attrdata; + btdata.itemSize = sizeof(attrdata); + btdata.itemCount = 1; + result = BTSearchRecord(btfile, iterator, &btdata, NULL, NULL); + if (result) + goto exit1; + + result = BTDeleteRecord(btfile, iterator); + (void) BTFlushPath(btfile); +exit1: + hfs_systemfile_unlock(hfsmp, lockflags); + if (result == 0) { + VTOC(vp)->c_touch_chgtime = TRUE; + HFS_KNOTE(vp, NOTE_ATTRIB); + } +exit2: + if (result == btNotFound) { + result = ENOATTR; + } + hfs_end_transaction(hfsmp); + + FREE(iterator, M_TEMP); + + return MacToVFSError(result); +} + + +/* + * Retrieve the list of extended attribute names. + */ +__private_extern__ +int +hfs_vnop_listxattr(struct vnop_listxattr_args *ap) +/* + struct vnop_listxattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + uio_t a_uio; + size_t *a_size; + int a_options; + vfs_context_t a_context; +*/ +{ + struct vnode *vp = ap->a_vp; + struct hfsmount *hfsmp; + uio_t uio = ap->a_uio; + struct BTreeIterator * iterator = NULL; + struct filefork *btfile; + struct listattr_callback_state state; + int lockflags; + int result; + + if (VNODE_IS_RSRC(vp)) { + return (EPERM); + } + hfsmp = VTOHFS(vp); + *ap->a_size = 0; + + /* If Finder Info is non-empty then export it. */ + if (bcmp(VTOC(vp)->c_finderinfo, emptyfinfo, sizeof(emptyfinfo)) != 0) { + if (uio == NULL) { + *ap->a_size += sizeof(XATTR_FINDERINFO_NAME); + } else if (uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) { + return (ERANGE); + } else { + result = uiomove((caddr_t)XATTR_FINDERINFO_NAME, + sizeof(XATTR_FINDERINFO_NAME), uio); + if (result) + return (result); + } + } + /* If Resource Fork is non-empty then export it. */ + if (vnode_isreg(vp) && RESOURCE_FORK_EXISTS(vp)) { + if (uio == NULL) { + *ap->a_size += sizeof(XATTR_RESOURCEFORK_NAME); + } else if (uio_resid(uio) < sizeof(XATTR_RESOURCEFORK_NAME)) { + return (ERANGE); + } else { + result = uiomove((caddr_t)XATTR_RESOURCEFORK_NAME, + sizeof(XATTR_RESOURCEFORK_NAME), uio); + if (result) + return (result); + } + } + /* + * Standard HFS only supports native FinderInfo and Resource Forks. + * Return at this point. + */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + return (0); + } + /* Bail if we don't have any extended attributes. */ + if ((hfsmp->hfs_attribute_vp == NULL) || + (VTOC(vp)->c_attr.ca_recflags & kHFSHasAttributesMask) == 0) { + return (0); + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + result = buildkey(VTOC(vp)->c_fileid, NULL, (HFSPlusAttrKey *)&iterator->key); + if (result) + goto exit; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + + result = BTSearchRecord(btfile, iterator, NULL, NULL, NULL); + if (result && result != btNotFound) { + hfs_systemfile_unlock(hfsmp, lockflags); + goto exit; + } + + state.fileID = VTOC(vp)->c_fileid; + state.result = 0; + state.uio = uio; + state.size = 0; + + /* + * Process entries starting just after iterator->key. + */ + result = BTIterateRecords(btfile, kBTreeNextRecord, iterator, + (IterateCallBackProcPtr)listattr_callback, &state); + hfs_systemfile_unlock(hfsmp, lockflags); + if (uio == NULL) { + *ap->a_size += state.size; + } +exit: + FREE(iterator, M_TEMP); + + if (state.result || result == btNotFound) + result = state.result; + + return MacToVFSError(result); +} + + +/* + * Callback - called for each attribute + */ +static int +listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *data, struct listattr_callback_state *state) +{ + char attrname[XATTR_MAXNAMELEN + 1]; + size_t bytecount; + int result; + + if (state->fileID != key->fileID) { + state->result = 0; + return (0); /* stop */ + } + /* + * Skip over non-primary keys + */ + if (key->startBlock != 0) { + return (1); /* continue */ + } + + result = utf8_encodestr(key->attrName, key->attrNameLen * sizeof(UniChar), + attrname, &bytecount, sizeof(attrname), 0, 0); + if (result) { + state->result = result; + return (0); /* stop */ + } + bytecount++; /* account for null termination char */ + + if (xattr_protected(attrname)) + return (1); /* continue */ + + if (state->uio == NULL) { + state->size += bytecount; + } else { + if (bytecount > uio_resid(state->uio)) { + state->result = ERANGE; + return (0); /* stop */ + } + result = uiomove((caddr_t) attrname, bytecount, state->uio); + if (result) { + state->result = result; + return (0); /* stop */ + } + } + return (1); /* continue */ +} + + +/* + * Remove all the attributes from a cnode. + * + * A jornal transaction must be already started. + * Attributes b-Tree must have exclusive lock held. + */ +__private_extern__ +int +hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) +{ + BTreeIterator *next_iterator, *del_iterator; + HFSPlusAttrKey *next_key; + struct filefork *btfile; + int result, iter_result; + + if (hfsmp->hfs_attribute_vp == NULL) { + return (0); + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + MALLOC(next_iterator, BTreeIterator *, sizeof(BTreeIterator) * 2, M_TEMP, M_WAITOK); + bzero(next_iterator, sizeof(BTreeIterator) * 2); + del_iterator = &next_iterator[1]; + next_key = (HFSPlusAttrKey *)&next_iterator->key; + + /* + * Go to first possible attribute key/record pair + */ + (void) buildkey(fileid, NULL, next_key); + result = BTIterateRecord(btfile, kBTreeNextRecord, next_iterator, NULL, NULL); + if (result || next_key->fileID != fileid) { + goto exit; + } + /* Remember iterator of attribute to delete */ + bcopy(next_iterator, del_iterator, sizeof(BTreeIterator)); + + /* Loop until there are no more attributes for this file id */ + for(;;) { + iter_result = BTIterateRecord(btfile, kBTreeNextRecord, next_iterator, NULL, NULL); + + /* XXX need to free and extents for record types 0x20 and 0x30 */ + result = BTDeleteRecord(btfile, del_iterator); + if (result) { + goto exit; + } + if (iter_result) { + result = iter_result; + break; + } + if (iter_result || next_key->fileID != fileid) { + break; /* end of attributes for this file id */ + } + bcopy(next_iterator, del_iterator, sizeof(BTreeIterator)); + } +exit: + (void) BTFlushPath(btfile); + + if (result == btNotFound) { + result = 0; + } + FREE(next_iterator, M_TEMP); + return (result); +} + +/* + * Enable/Disable extended security (ACLs). + */ +__private_extern__ +int +hfs_setextendedsecurity(struct hfsmount *hfsmp, int state) +{ + struct BTreeIterator * iterator = NULL; + struct filefork *btfile; + int lockflags; + int result; + + if (hfsmp->hfs_flags & HFS_STANDARD) { + return (ENOTSUP); + } + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + + /* + * Build a b-tree key. + * We use the root's parent id (1) to hold this volume attribute. + */ + (void) buildkey(kHFSRootParentID, XATTR_EXTENDEDSECURITY_NAME, + (HFSPlusAttrKey *)&iterator->key); + + /* Start a transaction for our changes. */ + if (hfs_start_transaction(hfsmp) != 0) { + result = EINVAL; + goto exit2; + } + /* + * If there isn't an attributes b-tree then create one. + */ + if (hfsmp->hfs_attribute_vp == NULL) { + lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + result = hfs_create_attr_btree(hfsmp, ATTRIBUTE_FILE_NODE_SIZE, + getnodecount(hfsmp, ATTRIBUTE_FILE_NODE_SIZE)); + hfs_systemfile_unlock(hfsmp, lockflags); + if (result) { + goto exit1; + } + } + btfile = VTOF(hfsmp->hfs_attribute_vp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + if (state == 0) { + /* Remove the attribute. */ + result = BTDeleteRecord(btfile, iterator); + if (result == btNotFound) + result = 0; + } else { + FSBufferDescriptor btdata; + HFSPlusAttrData attrdata; + UInt16 datasize; + + datasize = sizeof(attrdata); + btdata.bufferAddress = &attrdata; + btdata.itemSize = datasize; + btdata.itemCount = 1; + attrdata.recordType = kHFSPlusAttrInlineData; + attrdata.reserved[0] = 0; + attrdata.reserved[1] = 0; + attrdata.attrSize = 2; + attrdata.attrData[0] = 0; + attrdata.attrData[1] = 0; + + /* Insert the attribute. */ + result = BTInsertRecord(btfile, iterator, &btdata, datasize); + if (result == btExists) + result = 0; + } + (void) BTFlushPath(btfile); + + hfs_systemfile_unlock(hfsmp, lockflags); +exit1: + /* Finish the transaction of our changes. */ + hfs_end_transaction(hfsmp); +exit2: + FREE(iterator, M_TEMP); + + if (result == 0) { + if (state == 0) + vfs_clearextendedsecurity(HFSTOVFS(hfsmp)); + else + vfs_setextendedsecurity(HFSTOVFS(hfsmp)); + printf("hfs: %s extended security on %s\n", + state == 0 ? "disabling" : "enabling", hfsmp->vcbVN); + } + + return MacToVFSError(result); +} + +/* + * Check for extended security (ACLs). + */ +__private_extern__ +void +hfs_checkextendedsecurity(struct hfsmount *hfsmp) +{ + struct BTreeIterator * iterator; + struct filefork *btfile; + int lockflags; + int result; + + if (hfsmp->hfs_flags & HFS_STANDARD || + hfsmp->hfs_attribute_vp == NULL) { + return; + } + + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + + /* + * Build a b-tree key. + * We use the root's parent id (1) to hold this volume attribute. + */ + (void) buildkey(kHFSRootParentID, XATTR_EXTENDEDSECURITY_NAME, + (HFSPlusAttrKey *)&iterator->key); + + btfile = VTOF(hfsmp->hfs_attribute_vp); + + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + + /* Check for our attribute. */ + result = BTSearchRecord(btfile, iterator, NULL, NULL, NULL); + + hfs_systemfile_unlock(hfsmp, lockflags); + FREE(iterator, M_TEMP); + + if (result == 0) { + vfs_setextendedsecurity(HFSTOVFS(hfsmp)); + printf("hfs mount: enabling extended security on %s\n", hfsmp->vcbVN); + } +} + + +/* + * hfs_attrkeycompare - compare two attribute b-tree keys. + * + * The name portion of the key is compared using a 16-bit binary comparison. + * This is called from the b-tree code. + */ +__private_extern__ +int +hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey) +{ + u_int32_t searchFileID, trialFileID; + int result; + + searchFileID = searchKey->fileID; + trialFileID = trialKey->fileID; + result = 0; + + if (searchFileID > trialFileID) { + ++result; + } else if (searchFileID < trialFileID) { + --result; + } else { + u_int16_t * str1 = &searchKey->attrName[0]; + u_int16_t * str2 = &trialKey->attrName[0]; + int length1 = searchKey->attrNameLen; + int length2 = trialKey->attrNameLen; + u_int16_t c1, c2; + int length; + + if (length1 < length2) { + length = length1; + --result; + } else if (length1 > length2) { + length = length2; + ++result; + } else { + length = length1; + } + + while (length--) { + c1 = *(str1++); + c2 = *(str2++); + + if (c1 > c2) { + result = 1; + break; + } + if (c1 < c2) { + result = -1; + break; + } + } + if (result) + return (result); + /* + * Names are equal; compare startBlock + */ + if (searchKey->startBlock == trialKey->startBlock) + return (0); + else + return (searchKey->startBlock < trialKey->startBlock ? -1 : 1); + } + + return result; +} + + +/* + * buildkey - build an Attribute b-tree key + */ +static int +buildkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key) +{ + int result = 0; + size_t unicodeBytes = 0; + + if (attrname != NULL) { + /* + * Convert filename from UTF-8 into Unicode + */ + result = utf8_decodestr(attrname, strlen(attrname), key->attrName, + &unicodeBytes, sizeof(key->attrName), 0, 0); + if (result) { + if (result != ENAMETOOLONG) + result = EINVAL; /* name has invalid characters */ + return (result); + } + key->attrNameLen = unicodeBytes / sizeof(UniChar); + key->keyLength = kHFSPlusAttrKeyMinimumLength + unicodeBytes; + } else { + key->attrNameLen = 0; + key->keyLength = kHFSPlusAttrKeyMinimumLength; + } + key->pad = 0; + key->fileID = fileID; + key->startBlock = 0; + + return (0); + } + +/* + * getnodecount - calculate starting node count for attributes b-tree. + */ +static int +getnodecount(struct hfsmount *hfsmp, size_t nodesize) +{ + int avedatasize; + int recpernode; + int count; + + avedatasize = sizeof(u_int16_t); /* index slot */ + avedatasize += kHFSPlusAttrKeyMinimumLength + HFS_AVERAGE_NAME_SIZE * sizeof(u_int16_t); + avedatasize += sizeof(HFSPlusAttrData) + 32; + + recpernode = (nodesize - sizeof(BTNodeDescriptor)) / avedatasize; + + count = (hfsmp->hfs_filecount + hfsmp->hfs_dircount) / 8; + count /= recpernode; + + /* XXX should also consider volume size XXX */ + + return (MAX(count, (int)(1024 * 1024) / (int)nodesize)); +} + + +/* + * getmaxinlineattrsize - calculate maximum inline attribute size. + * + * This yields 3,802 bytes for an 8K node size. + */ +static size_t +getmaxinlineattrsize(struct vnode * attrvp) +{ + struct BTreeInfoRec btinfo; + size_t nodesize = ATTRIBUTE_FILE_NODE_SIZE; + size_t maxsize; + + if (attrvp != NULL) { + (void) hfs_lock(VTOC(attrvp), HFS_SHARED_LOCK); + if (BTGetInformation(VTOF(attrvp), 0, &btinfo) == 0) + nodesize = btinfo.nodeSize; + hfs_unlock(VTOC(attrvp)); + } + maxsize = nodesize; + maxsize -= sizeof(BTNodeDescriptor); /* minus node descriptor */ + maxsize -= 3 * sizeof(UInt16); /* minus 3 index slots */ + maxsize /= 2; /* 2 key/rec pairs minumum */ + maxsize -= sizeof(HFSPlusAttrKey); /* minus maximum key size */ + maxsize -= sizeof(HFSPlusAttrData) - 2; /* minus data header */ + maxsize &= 0xFFFFFFFE; /* multiple of 2 bytes */ + + return (maxsize); +} + + diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index dc6c30940..f11af332a 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -1120,7 +1120,7 @@ ProcessData: } while (err == 0) { - if (callBackProc(keyPtr, recordPtr, len, callBackState) == 0) + if (callBackProc(keyPtr, recordPtr, callBackState) == 0) break; if ((index+1) < ((NodeDescPtr)node.buffer)->numRecords) { @@ -1548,7 +1548,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator, btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr; - REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false); + REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true); ////////////////////////////// Take A Hint ////////////////////////////////// @@ -1571,7 +1571,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator, // XXXdbg ModifyBlockStart(btreePtr->fileRefNum, &nodeRec); - err = callBackProc(keyPtr, recordPtr, recordLen, callBackState); + err = callBackProc(keyPtr, recordPtr, callBackState); M_ExitOnError (err); err = UpdateNode (btreePtr, &nodeRec, 0, 0); @@ -1606,7 +1606,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator, // XXXdbg ModifyBlockStart(btreePtr->fileRefNum, &nodeRec); - err = callBackProc(keyPtr, recordPtr, recordLen, callBackState); + err = callBackProc(keyPtr, recordPtr, callBackState); M_ExitOnError (err); err = UpdateNode (btreePtr, &nodeRec, 0, 0); @@ -1786,7 +1786,7 @@ OSStatus BTFlushPath (FCB *filePtr) M_ReturnErrorIf (btreePtr == nil, fsBTInvalidFileErr); - REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false); + REQUIRE_FILE_LOCK(btreePtr->fileRefNum, true); err = UpdateHeader (btreePtr, false); diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c index 980541c3f..7baf03fb4 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,6 +21,7 @@ */ #include "../headers/BTreesPrivate.h" #include "sys/malloc.h" +#include /* @@ -53,7 +54,7 @@ struct nreserve { void *nr_tag; /* unique tag (per thread) */ }; -#define NR_GET_TAG() (current_act()) +#define NR_GET_TAG() (current_thread()) #define NR_CACHE 17 @@ -64,6 +65,11 @@ LIST_HEAD(nodereserve, nreserve) *nr_hashtbl; u_long nr_hashmask; +lck_grp_t * nr_lck_grp; +lck_grp_attr_t * nr_lck_grp_attr; +lck_attr_t * nr_lck_attr; + +lck_mtx_t nr_mutex; /* Internal Node Reserve Hash Routines (private) */ static void nr_insert (struct vnode *, struct nreserve *nrp, int); @@ -83,6 +89,15 @@ BTReserveSetup() panic("BTReserveSetup: nreserve size != opaque struct size"); nr_hashtbl = hashinit(NR_CACHE, M_HFSMNT, &nr_hashmask); + + nr_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nr_lck_grp_attr); + nr_lck_grp = lck_grp_alloc_init("btree_node_reserve", nr_lck_grp_attr); + + nr_lck_attr = lck_attr_alloc_init(); + lck_attr_setdebug(nr_lck_attr); + + lck_mtx_init(&nr_mutex, nr_lck_grp, nr_lck_attr); } @@ -142,7 +157,7 @@ BTReserveSpace(FCB *file, int operations, void* data) totalNodes = rsrvNodes + btree->totalNodes - availNodes; /* See if we also need a map node */ - if (totalNodes > CalcMapBits(btree)) + if (totalNodes > (int)CalcMapBits(btree)) ++totalNodes; if ((err = ExtendBTree(btree, totalNodes))) return (err); @@ -179,7 +194,7 @@ BTReleaseReserve(FCB *file, void* data) } /* - * BTUpdateReserve - update a node reserve for allocations that occured. + * BTUpdateReserve - update a node reserve for allocations that occurred. */ __private_extern__ void @@ -209,11 +224,13 @@ nr_insert(struct vnode * btvp, struct nreserve *nrp, int nodecnt) /* * Check the cache - there may already be a reserve */ + lck_mtx_lock(&nr_mutex); nrhead = NR_HASH(btvp, tag); for (tmp_nrp = nrhead->lh_first; tmp_nrp; tmp_nrp = tmp_nrp->nr_hash.le_next) { if ((tmp_nrp->nr_tag == tag) && (tmp_nrp->nr_btvp == btvp)) { nrp->nr_tag = 0; + lck_mtx_unlock(&nr_mutex); return; } } @@ -224,6 +241,7 @@ nr_insert(struct vnode * btvp, struct nreserve *nrp, int nodecnt) nrp->nr_tag = tag; LIST_INSERT_HEAD(nrhead, nrp, nr_hash); ++nrinserts; + lck_mtx_unlock(&nr_mutex); } /* @@ -234,6 +252,7 @@ nr_delete(struct vnode * btvp, struct nreserve *nrp, int *nodecnt) { void * tag = NR_GET_TAG(); + lck_mtx_lock(&nr_mutex); if (nrp->nr_tag) { if ((nrp->nr_tag != tag) || (nrp->nr_btvp != btvp)) panic("nr_delete: invalid NR (%08x)", nrp); @@ -244,6 +263,7 @@ nr_delete(struct vnode * btvp, struct nreserve *nrp, int *nodecnt) } else { *nodecnt = 0; } + lck_mtx_unlock(&nr_mutex); } /* @@ -256,16 +276,21 @@ nr_lookup(struct vnode * btvp) struct nreserve *nrp; void* tag = NR_GET_TAG(); + lck_mtx_lock(&nr_mutex); + nrhead = NR_HASH(btvp, tag); for (nrp = nrhead->lh_first; nrp; nrp = nrp->nr_hash.le_next) { - if ((nrp->nr_tag == tag) && (nrp->nr_btvp == btvp)) + if ((nrp->nr_tag == tag) && (nrp->nr_btvp == btvp)) { + lck_mtx_unlock(&nr_mutex); return (nrp->nr_nodecnt - nrp->nr_newnodes); + } } + lck_mtx_unlock(&nr_mutex); return (0); } /* - * Update a node reserve for any allocations that occured. + * Update a node reserve for any allocations that occurred. */ static void nr_update(struct vnode * btvp, int nodecnt) @@ -274,6 +299,8 @@ nr_update(struct vnode * btvp, int nodecnt) struct nreserve *nrp; void* tag = NR_GET_TAG(); + lck_mtx_lock(&nr_mutex); + nrhead = NR_HASH(btvp, tag); for (nrp = nrhead->lh_first; nrp; nrp = nrp->nr_hash.le_next) { if ((nrp->nr_tag == tag) && (nrp->nr_btvp == btvp)) { @@ -281,4 +308,5 @@ nr_update(struct vnode * btvp, int nodecnt) break; } } + lck_mtx_unlock(&nr_mutex); } diff --git a/bsd/hfs/hfscommon/BTree/BTreeScanner.c b/bsd/hfs/hfscommon/BTree/BTreeScanner.c index 06e15a807..66521dbbd 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeScanner.c +++ b/bsd/hfs/hfscommon/BTree/BTreeScanner.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1996-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -230,7 +230,7 @@ static int ReadMultipleNodes( BTScanState *theScanStatePtr ) { int myErr = E_NONE; BTreeControlBlockPtr myBTreeCBPtr; - daddr_t myPhyBlockNum; + daddr64_t myPhyBlockNum; u_int32_t myBufferSize; struct vnode * myDevPtr; int myBlockRun; @@ -239,8 +239,8 @@ static int ReadMultipleNodes( BTScanState *theScanStatePtr ) // release old buffer if we have one if ( theScanStatePtr->bufferPtr != NULL ) { - theScanStatePtr->bufferPtr->b_flags |= (B_INVAL | B_AGE); - brelse( theScanStatePtr->bufferPtr ); + buf_markinvalid(theScanStatePtr->bufferPtr); + buf_brelse( theScanStatePtr->bufferPtr ); theScanStatePtr->bufferPtr = NULL; theScanStatePtr->currentNodePtr = NULL; } @@ -248,8 +248,8 @@ static int ReadMultipleNodes( BTScanState *theScanStatePtr ) myBTreeCBPtr = theScanStatePtr->btcb; // map logical block in catalog btree file to physical block on volume - myErr = VOP_BMAP( myBTreeCBPtr->fileRefNum, theScanStatePtr->nodeNum, - &myDevPtr, &myPhyBlockNum, &myBlockRun ); + myErr = hfs_bmap(myBTreeCBPtr->fileRefNum, theScanStatePtr->nodeNum, + &myDevPtr, &myPhyBlockNum, &myBlockRun); if ( myErr != E_NONE ) { goto ExitThisRoutine; @@ -266,18 +266,18 @@ static int ReadMultipleNodes( BTScanState *theScanStatePtr ) } // now read blocks from the device - myErr = bread( myDevPtr, - myPhyBlockNum, - myBufferSize, - NOCRED, - &theScanStatePtr->bufferPtr ); + myErr = (int)buf_bread(myDevPtr, + myPhyBlockNum, + myBufferSize, + NOCRED, + &theScanStatePtr->bufferPtr ); if ( myErr != E_NONE ) { goto ExitThisRoutine; } - theScanStatePtr->nodesLeftInBuffer = theScanStatePtr->bufferPtr->b_bcount / theScanStatePtr->btcb->nodeSize; - theScanStatePtr->currentNodePtr = (BTNodeDescriptor *) theScanStatePtr->bufferPtr->b_data; + theScanStatePtr->nodesLeftInBuffer = buf_count(theScanStatePtr->bufferPtr) / theScanStatePtr->btcb->nodeSize; + theScanStatePtr->currentNodePtr = (BTNodeDescriptor *) buf_dataptr(theScanStatePtr->bufferPtr); ExitThisRoutine: return myErr; @@ -357,7 +357,7 @@ int BTScanInitialize( const FCB * btreeFile, scanState->currentNodePtr = NULL; scanState->nodesLeftInBuffer = 0; // no nodes currently in buffer scanState->recordsFound = recordsFound; - scanState->startTime = time; // initialize our throttle + microuptime(&scanState->startTime); // initialize our throttle return noErr; @@ -391,8 +391,8 @@ int BTScanTerminate( BTScanState * scanState, if ( scanState->bufferPtr != NULL ) { - scanState->bufferPtr->b_flags |= (B_INVAL | B_AGE); - brelse( scanState->bufferPtr ); + buf_markinvalid(scanState->bufferPtr); + buf_brelse( scanState->bufferPtr ); scanState->bufferPtr = NULL; scanState->currentNodePtr = NULL; } diff --git a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c index 3a8463911..777e6f0fc 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c @@ -537,7 +537,7 @@ ErrorExit: (void) ReleaseNode (btreePtr, targetNode); (void) ReleaseNode (btreePtr, &leftNode); - Panic ("\p InsertLevel: an error occured!"); + Panic ("\p InsertLevel: an error occurred!"); return err; diff --git a/bsd/hfs/hfscommon/Catalog/Catalog.c b/bsd/hfs/hfscommon/Catalog/Catalog.c deleted file mode 100644 index e7134028f..000000000 --- a/bsd/hfs/hfscommon/Catalog/Catalog.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -#pragma segment Catalog - -#include -#include - -#include "../../hfs_endian.h" - -#include "../headers/FileMgrInternal.h" -#include "../headers/BTreesInternal.h" -#include "../headers/CatalogPrivate.h" -#include "../headers/HFSUnicodeWrappers.h" - - -// External routines - -extern SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 ); - - -//_________________________________________________________________________________ -// Exported Routines -// -// CompareCatalogKeys - Compares two catalog keys. -// -//_________________________________________________________________________________ - - - -UInt32 -GetDirEntrySize(BTreeIterator *bip, ExtendedVCB * vol) -{ - CatalogKey * ckp; - CatalogName * cnp; - ByteCount utf8chars; - UInt8 name[kdirentMaxNameBytes + 1]; - OSErr result; - - ckp = (CatalogKey*) &bip->key; - - if (vol->vcbSigWord == kHFSPlusSigWord) { - cnp = (CatalogName*) &ckp->hfsPlus.nodeName; - utf8chars = utf8_encodelen(cnp->ustr.unicode, - cnp->ustr.length * sizeof(UniChar), ':', 0); - if (utf8chars > kdirentMaxNameBytes) - utf8chars = kdirentMaxNameBytes; - } else { /* hfs */ - cnp = (CatalogName*) ckp->hfs.nodeName; - result = hfs_to_utf8(vol, cnp->pstr, kdirentMaxNameBytes + 1, - &utf8chars, name); - if (result) { - /* - * When an HFS name cannot be encoded with the current - * volume encoding we use MacRoman as a fallback. - */ - result = mac_roman_to_utf8(cnp->pstr, MAXHFSVNODELEN + 1, - &utf8chars, name); - } - } - - return DIRENTRY_SIZE(utf8chars); -} -/* - * NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE - * - * This is assuming maxinum size of a name is 255 (kdirentMaxNameBytes), which is incorrect. - * Any caller of this has to make sure names > 255 are mangled!!!!!!!! - */ - -OSErr -PositionIterator(CatalogIterator *cip, UInt32 offset, BTreeIterator *bip, UInt16 *op) -{ -#define CAT_START_OFFSET (2 * sizeof(struct hfsdotentry)) - ExtendedVCB * vol; - FCB * fcb; - OSErr result = 0; - - /* are we past the end of a directory? */ - if (cip->folderID != cip->parentID) - return(cmNotFound); - - vol = cip->volume; - fcb = GetFileControlBlock(vol->catalogRefNum); - - /* make a btree iterator from catalog iterator */ - UpdateBtreeIterator(cip, bip); - - if (cip->currentOffset == offset) { - *op = kBTreeCurrentRecord; - - } else if (cip->nextOffset == offset) { - *op = kBTreeNextRecord; - - } else { /* start from beginning */ - *op = kBTreeNextRecord; - - /* Position iterator at the folder's thread record */ - result = BTSearchRecord(fcb, bip, NULL, NULL, bip); - if (result) - goto exit; - - /* find offset (note: n^2 / 2) */ - if (offset > CAT_START_OFFSET) { - HFSCatalogNodeID pid, *idp; - UInt32 curOffset, nextOffset; - - /* get first record (ie offset 24) */ - result = BTIterateRecord( fcb, kBTreeNextRecord, bip, NULL, NULL ); - if (result) - goto exit; - - if (vol->vcbSigWord == kHFSPlusSigWord) - idp = &((CatalogKey*) &bip->key)->hfsPlus.parentID; - else - idp = &((CatalogKey*) &bip->key)->hfs.parentID; - - pid = *idp; - - curOffset = CAT_START_OFFSET; - nextOffset = CAT_START_OFFSET + GetDirEntrySize(bip, vol); - - while (nextOffset < offset) { - result = BTIterateRecord( fcb, kBTreeNextRecord, bip, NULL, NULL ); - if (result) - goto exit; - - /* check for parent change */ - if (pid != *idp) { - result = cmNotFound; /* offset past end of directory */ - goto exit; - } - - curOffset = nextOffset; - nextOffset += GetDirEntrySize(bip, vol); - }; - - if (nextOffset != offset) { - result = cmNotFound; - goto exit; - } - - UpdateCatalogIterator(bip, cip); - cip->currentOffset = curOffset; - cip->nextOffset = nextOffset; - } - } - -exit: - if (result == btNotFound) - result = cmNotFound; - - return result; - -} /* end PositionIterator */ - - -//_________________________________________________________________________________ -// Routine: CompareCatalogKeys -// -// Function: Compares two catalog keys (a search key and a trial key). -// -// Result: +n search key > trial key -// 0 search key = trial key -// -n search key < trial key -//_________________________________________________________________________________ - -SInt32 -CompareCatalogKeys(HFSCatalogKey *searchKey, HFSCatalogKey *trialKey) -{ - HFSCatalogNodeID searchParentID, trialParentID; - SInt32 result; - - searchParentID = searchKey->parentID; - trialParentID = trialKey->parentID; - - if ( searchParentID > trialParentID ) // parent dirID is unsigned - result = 1; - else if ( searchParentID < trialParentID ) - result = -1; - else // parent dirID's are equal, compare names - result = FastRelString(searchKey->nodeName, trialKey->nodeName); - - return result; -} - - -//_________________________________________________________________________________ -// Routine: CompareExtendedCatalogKeys -// -// Function: Compares two large catalog keys (a search key and a trial key). -// -// Result: +n search key > trial key -// 0 search key = trial key -// -n search key < trial key -//_________________________________________________________________________________ - -SInt32 -CompareExtendedCatalogKeys(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey) -{ - SInt32 result; - HFSCatalogNodeID searchParentID, trialParentID; - - searchParentID = searchKey->parentID; - trialParentID = trialKey->parentID; - - if ( searchParentID > trialParentID ) // parent node IDs are unsigned - { - result = 1; - } - else if ( searchParentID < trialParentID ) - { - result = -1; - } - else // parent node ID's are equal, compare names - { - if ( searchKey->nodeName.length == 0 || trialKey->nodeName.length == 0 ) - result = searchKey->nodeName.length - trialKey->nodeName.length; - else - result = FastUnicodeCompare(&searchKey->nodeName.unicode[0], searchKey->nodeName.length, - &trialKey->nodeName.unicode[0], trialKey->nodeName.length); - } - - return result; -} - diff --git a/bsd/hfs/hfscommon/Catalog/CatalogIterators.c b/bsd/hfs/hfscommon/Catalog/CatalogIterators.c deleted file mode 100644 index ddca514d4..000000000 --- a/bsd/hfs/hfscommon/Catalog/CatalogIterators.c +++ /dev/null @@ -1,643 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - File: CatalogIterators.c - - Contains: Catalog Iterator Implementation - - Version: HFS Plus 1.0 - - Copyright: © 1997-1998 by Apple Computer, Inc., all rights reserved. - - File Ownership: - - DRI: Don Brady - - Other Contact: Mark Day - - Technology: Mac OS File System - - Writers: - - (msd) Mark Day - (djb) Don Brady - - Change History (most recent first): - 4/23/98 djb Re-enable InvalidateCatalogCache (was commented out). - 4/6/98 djb Add locking for cache globals (list) and iterators. - 4/2/98 djb Define gCatalogCacheGlobals here instead of FSVars. - 3/31/98 djb Sync up with final HFSVolumes.h header file. - - 11/13/97 djb Radar #1683572 - Fix for indexed GetFileInfo. - 10/17/97 msd Bug 1683506. Add support for long Unicode names in - CatalogIterators. Added a single global buffer for long Unicode - names; it is used by at most one CatalogIterator at a time. - 10/1/97 djb first checked in -*/ - - -#include "../../hfs_macos_defs.h" -#include "../../hfs.h" -#include "../../hfs_dbg.h" -#include "../../hfs_format.h" - -#include "../headers/FileMgrInternal.h" -#include "../headers/BTreesInternal.h" -#include "../headers/CatalogPrivate.h" - - -#include -#include -#include -#include - -static void InsertCatalogIteratorAsMRU( CatalogCacheGlobals *cacheGlobals, CatalogIterator *iterator ); - -static void InsertCatalogIteratorAsLRU( CatalogCacheGlobals *cacheGlobals, CatalogIterator *iterator ); - -static void PrepareForLongName( CatalogIterator *iterator ); - - -#if TARGET_API_MACOS_X - CatalogCacheGlobals *gCatalogCacheGlobals; - - #define GetCatalogCacheGlobals() (gCatalogCacheGlobals) - - #define CATALOG_ITER_LIST_LOCK(g) simple_lock(&(g)->simplelock) - - #define CATALOG_ITER_LIST_UNLOCK(g) simple_unlock(&(g)->simplelock) - - #define CI_LOCK(i) lockmgr(&(i)->iterator_lock, LK_EXCLUSIVE, (simple_lock_t) 0, current_proc()) - -#define CI_UNLOCK(i) lockmgr(&(i)->iterator_lock, LK_RELEASE, (simple_lock_t) 0, current_proc()) - -#define CI_SLEEPLESS_LOCK(i) lockmgr(&(i)->iterator_lock, LK_EXCLUSIVE | LK_NOWAIT, (simple_lock_t) 0, current_proc()) - -#define CI_LOCK_FROM_LIST(g,i) lockmgr(&(i)->iterator_lock, LK_EXCLUSIVE | LK_INTERLOCK, &(g)->simplelock, current_proc()) - -#else /* TARGET_API_MACOS_X */ - - #define GetCatalogCacheGlobals() ((CatalogCacheGlobals*) ((FSVarsRec*) LMGetFSMVars()->gCatalogCacheGlobals)) - - #define CATALOG_ITER_LIST_LOCK(g) - - #define CATALOG_ITER_LIST_UNLOCK(g) - - #define CI_LOCK(i) 0 - - #define CI_UNLOCK(i) 0 - - #define CI_SLEEPLESS_LOCK(i) 0 - - #define CI_LOCK_FROM_LIST(g,i) 0 - -#endif - - -//_______________________________________________________________________________ -// Routine: InitCatalogCache -// -// Function: Allocates cache, and initializes all the cache structures. -// -//_______________________________________________________________________________ -OSErr -InitCatalogCache(void) -{ - CatalogCacheGlobals * cacheGlobals; - CatalogIterator * iterator; - UInt32 cacheSize; - UInt16 i; - UInt16 lastIterator; - OSErr err; - - - cacheSize = sizeof(CatalogCacheGlobals) + ( kCatalogIteratorCount * sizeof(CatalogIterator) ); - cacheGlobals = (CatalogCacheGlobals *) NewPtrSysClear( cacheSize ); - - cacheGlobals->iteratorCount = kCatalogIteratorCount; - - lastIterator = kCatalogIteratorCount - 1; // last iterator number, since they start at 0 - - // Initialize the MRU order for the cache - cacheGlobals->mru = (CatalogIterator *) ( (Ptr)cacheGlobals + sizeof(CatalogCacheGlobals) ); - - // Initialize the LRU order for the cache - cacheGlobals->lru = (CatalogIterator *) ( (Ptr)(cacheGlobals->mru) + (lastIterator * sizeof(CatalogIterator)) ); - - - // Traverse iterators, setting initial mru, lru, and default values - for ( i = 0, iterator = cacheGlobals->mru; i < kCatalogIteratorCount ; i++, iterator = iterator->nextMRU ) - { - if ( i == lastIterator ) - iterator->nextMRU = nil; // terminate the list - else - iterator->nextMRU = (CatalogIterator *) ( (Ptr)iterator + sizeof(CatalogIterator) ); - - if ( i == 0 ) - iterator->nextLRU = nil; // terminate the list - else - iterator->nextLRU = (CatalogIterator *) ( (Ptr)iterator - sizeof(CatalogIterator) ); - - #if TARGET_API_MACOS_X - lockinit(&iterator->iterator_lock, PINOD, "hfs_catalog_iterator", 0, 0); - #endif - } - - #if TARGET_API_MAC_OS8 - (FSVarsRec*) LMGetFSMVars()->gCatalogCacheGlobals = (Ptr) cacheGlobals; - #endif - - #if TARGET_API_MACOS_X - gCatalogCacheGlobals = cacheGlobals; - simple_lock_init(&cacheGlobals->simplelock); - #endif - - return noErr; -} - - -//_______________________________________________________________________________ -// Routine: InvalidateCatalogCache -// -// Function: Trash any interators matching volume parameter -// -//_______________________________________________________________________________ -void PrintCatalogIterator( void ); - -void -InvalidateCatalogCache( ExtendedVCB *volume ) -{ - TrashCatalogIterator( volume, 0 ); -} - - -//_______________________________________________________________________________ -// Routine: PrintCatalogIterator -// -// Function: Prints all interators -// -//_______________________________________________________________________________ -#if HFS_DIAGNOSTIC -void -PrintCatalogIterator( void ) -{ - CatalogIterator *iterator; - CatalogCacheGlobals *cacheGlobals = GetCatalogCacheGlobals(); - int i; - - PRINTIT("CatalogCacheGlobals @ 0x%08lX are:\n", (unsigned long)cacheGlobals); - PRINTIT("\titeratorCount: %ld \n", cacheGlobals->iteratorCount); - PRINTIT("\tmru: 0x%08lX \n", (unsigned long)cacheGlobals->mru); - PRINTIT("\tlru: 0x%08lX \n", (unsigned long)cacheGlobals->lru); - - for ( iterator = cacheGlobals->mru, i=0 ; iterator != nil && i<32 ; iterator = iterator->nextMRU, i++) - { - PRINTIT("%d: ", i); - PRINTIT(" i: 0x%08lX", (unsigned long)iterator); - PRINTIT(" M: 0x%08lX", (unsigned long)iterator->nextMRU); - PRINTIT(" L: 0x%08lX", (unsigned long)iterator->nextLRU); - PRINTIT("\n"); - } -} -#endif - -//_______________________________________________________________________________ -// Routine: TrashCatalogIterator -// -// Function: Trash any interators matching volume and folder parameters -// -//_______________________________________________________________________________ -void -TrashCatalogIterator( const ExtendedVCB *volume, HFSCatalogNodeID folderID ) -{ - CatalogIterator *iterator; - CatalogCacheGlobals *cacheGlobals = GetCatalogCacheGlobals(); - - CATALOG_ITER_LIST_LOCK(cacheGlobals); - - for ( iterator = cacheGlobals->mru ; iterator != nil ; iterator = iterator->nextMRU ) - { - top: - - // first match the volume - if ( iterator->volume != volume ) - continue; - - // now match the folder (or all folders if 0) - if ( (folderID == 0) || (folderID == iterator->folderID) ) - { - CatalogIterator *next; - - iterator->volume = 0; // trash it - iterator->folderID = 0; - - next = iterator->nextMRU; // remember the next iterator - - // if iterator is not already last then make it last - if ( next != nil ) - { - InsertCatalogIteratorAsLRU( cacheGlobals, iterator ); - - // iterator->nextMRU will always be zero (since we moved it to the end) - // so set up the next iterator manually (we know its not nil) - iterator = next; - goto top; // process the next iterator - } - } - } - - CATALOG_ITER_LIST_UNLOCK(cacheGlobals); -} - - -//_______________________________________________________________________________ -// Routine: AgeCatalogIterator -// -// Function: Move iterator to the end of the list... -// -//_______________________________________________________________________________ -void -AgeCatalogIterator ( CatalogIterator *catalogIterator ) -{ - CatalogCacheGlobals * cacheGlobals = GetCatalogCacheGlobals(); - - CATALOG_ITER_LIST_LOCK(cacheGlobals); - - //PRINTIT(" AgeCatalogIterator: v=%d, d=%ld, i=%d\n", catalogIterator->volRefNum, catalogIterator->folderID, catalogIterator->currentIndex); - - InsertCatalogIteratorAsLRU( cacheGlobals, catalogIterator ); - - CATALOG_ITER_LIST_UNLOCK(cacheGlobals); -} - - -//_______________________________________________________________________________ -// Routine: GetCatalogIterator -// -// Function: Release interest in Catalog iterator -// -//_______________________________________________________________________________ -OSErr -ReleaseCatalogIterator( CatalogIterator* catalogIterator) -{ -#if TARGET_API_MACOS_X - //PRINTIT(" ReleaseCatalogIterator: v=%d, d=%ld, i=%d\n", catalogIterator->volRefNum, catalogIterator->folderID, catalogIterator->currentIndex); - return CI_UNLOCK(catalogIterator); -#else - return noErr; -#endif -} - - -//_______________________________________________________________________________ -// Routine: GetCatalogIterator -// -// Function: Returns an iterator associated with the volume, folderID, index, -// and iterationType (kIterateFilesOnly or kIterateAll). -// Searches the cache in MRU order. -// Inserts the resulting iterator at the head of mru automatically -// -// Note: The returned iterator is locked and ReleaseCatalogIterator must -// be called to unlock it. -// -//_______________________________________________________________________________ - -CatalogIterator* -GetCatalogIterator(ExtendedVCB *volume, HFSCatalogNodeID folderID, UInt32 offset) -{ - CatalogCacheGlobals *cacheGlobals = GetCatalogCacheGlobals(); - CatalogIterator *iterator; - CatalogIterator *bestIterator; - - bestIterator = NULL; - - CATALOG_ITER_LIST_LOCK(cacheGlobals); - - for (iterator = cacheGlobals->mru ; iterator != nil ; iterator = iterator->nextMRU) { - - /* first make sure volume and folder id match */ - if ((iterator->volume != volume) || (iterator->folderID != folderID)) { - continue; - } - - /* ignore busy iterators */ - if ( CI_SLEEPLESS_LOCK(iterator) == EBUSY ) { - //PRINTIT(" GetCatalogIterator: busy v=%d, d=%ld, i=%d\n", volume, folderID, iterator->currentIndex); - continue; - } - - /* we matched volume, folder id, now check the offset */ - if ( iterator->currentOffset == offset || iterator->nextOffset == offset) { - bestIterator = iterator; // we scored! - so get out of this loop - break; // break with iterator locked - } - - (void) CI_UNLOCK(iterator); // unlock iterator before moving to the next one - } - - // check if we didn't get one or if the one we got is too far away... - if (bestIterator == NULL) - { - bestIterator = cacheGlobals->lru; // start over with a new iterator - - //PRINTIT(" GetCatalogIterator: recycle v=%d, d=%ld, i=%d\n", bestIterator->volume, bestIterator->folderID, bestIterator->currentIndex); - (void) CI_LOCK_FROM_LIST(cacheGlobals, bestIterator); // XXX we should not eat the error! - - CATALOG_ITER_LIST_LOCK(cacheGlobals); // grab the lock again for MRU Insert below... - - bestIterator->volume = volume; // update the iterator's volume - bestIterator->folderID = folderID; // ... and folderID - bestIterator->currentIndex = 0xFFFF; // ... and offspring index marker - bestIterator->currentOffset = 0xFFFFFFFF; - bestIterator->nextOffset = 0xFFFFFFFF; - - bestIterator->btreeNodeHint = 0; - bestIterator->btreeIndexHint = 0; - bestIterator->parentID = folderID; // set key to folderID + empty name - bestIterator->folderName.unicodeName.length = 0; // clear pascal/unicode name - - if ( volume->vcbSigWord == kHFSPlusSigWord ) - bestIterator->nameType = kShortUnicodeName; - else - bestIterator->nameType = kShortPascalName; - } - else { - //PRINTIT(" GetCatalogIterator: found v=%d, d=%ld, i=%d\n", bestIterator->volume, bestIterator->folderID, bestIterator->currentIndex); - } - - // put this iterator at the front of the list - InsertCatalogIteratorAsMRU( cacheGlobals, bestIterator ); - - CATALOG_ITER_LIST_UNLOCK(cacheGlobals); - - return bestIterator; // return our best shot - -} /* GetCatalogIterator */ - - -//_______________________________________________________________________________ -// Routine: UpdateBtreeIterator -// -// Function: Fills in a BTreeIterator from a CatalogIterator -// -// Assumes: catalogIterator->nameType is correctly initialized! -// catalogIterator is locked (MacOS X) -//_______________________________________________________________________________ -void -UpdateBtreeIterator(const CatalogIterator *catalogIterator, BTreeIterator *btreeIterator) -{ - CatalogName * nodeName; - Boolean isHFSPlus; - - - btreeIterator->hint.writeCount = 0; - btreeIterator->hint.nodeNum = catalogIterator->btreeNodeHint; - btreeIterator->hint.index = catalogIterator->btreeIndexHint; - - switch (catalogIterator->nameType) - { - case kShortPascalName: - if ( catalogIterator->folderName.pascalName[0] > 0 ) - nodeName = (CatalogName *) catalogIterator->folderName.pascalName; - else - nodeName = NULL; - - isHFSPlus = false; - break; - - case kShortUnicodeName: - if ( catalogIterator->folderName.unicodeName.length > 0 ) - nodeName = (CatalogName *) &catalogIterator->folderName.unicodeName; - else - nodeName = NULL; - - isHFSPlus = true; - break; - - case kLongUnicodeName: - if ( catalogIterator->folderName.longNamePtr->length > 0 ) - nodeName = (CatalogName *) catalogIterator->folderName.longNamePtr; - else - nodeName = NULL; - - isHFSPlus = true; - break; - - default: - return; - } - - BuildCatalogKey(catalogIterator->parentID, nodeName, isHFSPlus, (CatalogKey*) &btreeIterator->key); -} - - -//_______________________________________________________________________________ -// Routine: UpdateCatalogIterator -// -// Function: Updates a CatalogIterator from a BTreeIterator -// -// Assumes: catalogIterator->nameType is correctly initialized! -// catalogIterator is locked (MacOS X) -//_______________________________________________________________________________ -void -UpdateCatalogIterator (const BTreeIterator *btreeIterator, CatalogIterator *catalogIterator) -{ - void * srcName; - void * dstName; - UInt16 nameSize; - CatalogKey * catalogKey; - - - catalogIterator->btreeNodeHint = btreeIterator->hint.nodeNum; - catalogIterator->btreeIndexHint = btreeIterator->hint.index; - - catalogKey = (CatalogKey*) &btreeIterator->key; - - switch (catalogIterator->nameType) - { - case kShortPascalName: - catalogIterator->parentID = catalogKey->hfs.parentID; - - dstName = catalogIterator->folderName.pascalName; - srcName = catalogKey->hfs.nodeName; - nameSize = catalogKey->hfs.nodeName[0] + sizeof(UInt8); - break; - - case kShortUnicodeName: - catalogIterator->parentID = catalogKey->hfsPlus.parentID; - - dstName = &catalogIterator->folderName.unicodeName; - srcName = &catalogKey->hfsPlus.nodeName; - nameSize = (catalogKey->hfsPlus.nodeName.length + 1) * sizeof(UInt16); - - // See if we need to make this iterator use long names - if ( nameSize > sizeof(catalogIterator->folderName.unicodeName) ) - { - PrepareForLongName(catalogIterator); // Find a long name buffer to use - dstName = catalogIterator->folderName.longNamePtr; - } - break; - - case kLongUnicodeName: - catalogIterator->parentID = catalogKey->hfsPlus.parentID; - - dstName = catalogIterator->folderName.longNamePtr; - srcName = &catalogKey->hfsPlus.nodeName; - nameSize = (catalogKey->hfsPlus.nodeName.length + 1) * sizeof(UInt16); - break; - - default: - return; - } - - if (catalogIterator->parentID != catalogIterator->folderID) - catalogIterator->nextOffset = 0xFFFFFFFF; - - BlockMoveData(srcName, dstName, nameSize); - -} // end UpdateCatalogIterator - - -//_______________________________________________________________________________ -// Routine: InsertCatalogIteratorAsMRU -// -// Function: Moves catalog iterator to head of mru order in double linked list -// -// Assumes list simple lock is held -//_______________________________________________________________________________ -static void -InsertCatalogIteratorAsMRU ( CatalogCacheGlobals *cacheGlobals, CatalogIterator *iterator ) -{ - CatalogIterator *swapIterator; - - if ( cacheGlobals->mru != iterator ) // if it's not already the mru iterator - { - swapIterator = cacheGlobals->mru; // put it in the front of the double queue - cacheGlobals->mru = iterator; - iterator->nextLRU->nextMRU = iterator->nextMRU; - if ( iterator->nextMRU != nil ) - iterator->nextMRU->nextLRU = iterator->nextLRU; - else - cacheGlobals->lru= iterator->nextLRU; - iterator->nextMRU = swapIterator; - iterator->nextLRU = nil; - swapIterator->nextLRU = iterator; - } -} - - -//________________________________________________________________________________ -// Routine: InsertCatalogIteratorAsLRU -// -// Function: Moves catalog iterator to head of lru order in double linked list -// -// Assumes list simple lock is held -//_______________________________________________________________________________ -static void -InsertCatalogIteratorAsLRU ( CatalogCacheGlobals *cacheGlobals, CatalogIterator *iterator ) -{ - CatalogIterator *swapIterator; - - if ( cacheGlobals->lru != iterator ) - { - swapIterator = cacheGlobals->lru; - cacheGlobals->lru = iterator; - iterator->nextMRU->nextLRU = iterator->nextLRU; - if ( iterator->nextLRU != nil ) - iterator->nextLRU->nextMRU = iterator->nextMRU; - else - cacheGlobals->mru= iterator->nextMRU; - iterator->nextLRU = swapIterator; - iterator->nextMRU = nil; - swapIterator->nextMRU = iterator; - } -} - - - -//_______________________________________________________________________________ -// Routine: PrepareForLongName -// -// Function: Takes a CatalogIterator whose nameType is kShortUnicodeName, and -// changes the nameType to kLongUnicodeName. -// -// Since long Unicode names aren't stored in the CatalogIterator itself, we have -// to point to an HFSUniStr255 for storage. In the current implementation, we have -// just one such global buffer in the cache globals. We'll set the iterator to -// point to the global buffer and invalidate the iterator that was using it -// (i.e. the iterator whose nameType is kLongUnicodeName). -// -// Eventually, we might want to have a list of long name buffers which we recycle -// using an LRU algorithm. Or perhaps, some other way.... -// -// Assumes: catalogIterator is locked (MacOS X) -//_______________________________________________________________________________ -static void -PrepareForLongName ( CatalogIterator *iterator ) -{ - CatalogCacheGlobals *cacheGlobals = GetCatalogCacheGlobals(); - CatalogIterator *iter; - - if (DEBUG_BUILD && iterator->nameType != kShortUnicodeName) - DebugStr("\p PrepareForLongName: nameType is wrong!"); - - // - // Walk through all the iterators. The first iterator whose nameType - // is kLongUnicodeName is invalidated (because it is using the global - // long name buffer). - // - - CATALOG_ITER_LIST_LOCK(cacheGlobals); - - for ( iter = cacheGlobals->mru ; iter != nil ; iter = iter->nextMRU ) - { - if (iter->nameType == kLongUnicodeName) - { - // if iterator is not already last then make it last - if ( iter->nextMRU != nil ) - InsertCatalogIteratorAsLRU( cacheGlobals, iter ); - - (void) CI_LOCK_FROM_LIST(cacheGlobals,iter); - iter->volume = 0; // trash it - iter->folderID = 0; - (void) CI_UNLOCK(iter); - - #if TARGET_API_MACOS_X - break; - #endif - } - } - - /* - * if iter is nil then none of the iterators was using the LongUnicodeName buffer - */ - if (iter == nil) - CATALOG_ITER_LIST_UNLOCK(cacheGlobals); - - // - // Change the nameType of this iterator and point to the global - // long name buffer. Note - this iterator is already locked - // - iterator->nameType = kLongUnicodeName; - iterator->folderName.longNamePtr = &cacheGlobals->longName; -} - diff --git a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c index d1a43afb8..cad8b871e 100644 --- a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c +++ b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c @@ -291,10 +291,10 @@ FlushCatalog(ExtendedVCB *volume) if ( 0 /*fcb->fcbFlags & fcbModifiedMask*/ ) { - VCB_LOCK(volume); + HFS_MOUNT_LOCK(volume, TRUE); volume->vcbFlags |= 0xFF00; // Mark the VCB dirty volume->vcbLsMod = GetTimeUTC(); // update last modified date - VCB_UNLOCK(volume); + HFS_MOUNT_UNLOCK(volume, TRUE); // result = FlushVolumeControlBlock(volume); } diff --git a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c index 80d7da83b..812f3e58c 100644 --- a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c +++ b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,12 +35,12 @@ struct ExtentsRecBuffer { typedef struct ExtentsRecBuffer ExtentsRecBuffer; -UInt32 CheckExtents( void *extents, UInt32 blocks, Boolean isHFSPlus ); -OSErr DeleteExtents( ExtendedVCB *vcb, UInt32 fileNumber, Boolean isHFSPlus ); -OSErr MoveExtents( ExtendedVCB *vcb, UInt32 srcFileID, UInt32 destFileID, Boolean isHFSPlus ); -void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); -void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); -void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, UInt16 bufferCount ); +static UInt32 CheckExtents( void *extents, UInt32 blocks, Boolean isHFSPlus ); +static OSErr DeleteExtents( ExtendedVCB *vcb, UInt32 fileNumber, Boolean isHFSPlus ); +static OSErr MoveExtents( ExtendedVCB *vcb, UInt32 srcFileID, UInt32 destFileID, Boolean isHFSPlus ); +static void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); +static void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); +static void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, UInt16 bufferCount ); @@ -56,9 +56,6 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param OSErr err; Boolean isHFSPlus = ( vcb->vcbSigWord == kHFSPlusSigWord ); - TrashCatalogIterator(vcb, srcID); // invalidate any iterators for this parentID - TrashCatalogIterator(vcb, destID); // invalidate any iterators for this parentID - err = BuildCatalogKeyUTF8(vcb, srcID, srcName, kUndefinedStrLen, &srcKey, NULL); ReturnIfError(err); @@ -351,7 +348,7 @@ FlushAndReturn: } -void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) +static void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) { dest->hfsFile.dataLogicalSize = src->hfsFile.dataLogicalSize; dest->hfsFile.dataPhysicalSize = src->hfsFile.dataPhysicalSize; @@ -362,7 +359,7 @@ void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) BlockMoveData( src->hfsFile.rsrcExtents, dest->hfsFile.rsrcExtents, sizeof(HFSExtentRecord) ); } -void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) +static void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) { BlockMoveData( &src->hfsPlusFile.dataFork, &dest->hfsPlusFile.dataFork, sizeof(HFSPlusForkData) ); BlockMoveData( &src->hfsPlusFile.resourceFork, &dest->hfsPlusFile.resourceFork, sizeof(HFSPlusForkData) ); @@ -370,7 +367,7 @@ void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) } -OSErr MoveExtents( ExtendedVCB *vcb, UInt32 srcFileID, UInt32 destFileID, Boolean isHFSPlus ) +static OSErr MoveExtents( ExtendedVCB *vcb, UInt32 srcFileID, UInt32 destFileID, Boolean isHFSPlus ) { FCB * fcb; ExtentsRecBuffer extentsBuffer[kNumExtentsToCache]; @@ -528,7 +525,7 @@ OSErr MoveExtents( ExtendedVCB *vcb, UInt32 srcFileID, UInt32 destFileID, Boolea } -void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, UInt16 bufferCount ) +static void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, UInt16 bufferCount ) { BlockMoveData( key, &(buffer[bufferCount].extentKey), sizeof( ExtentKey ) ); BlockMoveData( data, &(buffer[bufferCount].extentData), sizeof( ExtentRecord ) ); @@ -536,7 +533,7 @@ void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffe //-- Delete all extents in extent file that have the ID given. -OSErr DeleteExtents( ExtendedVCB *vcb, UInt32 fileID, Boolean isHFSPlus ) +static OSErr DeleteExtents( ExtendedVCB *vcb, UInt32 fileID, Boolean isHFSPlus ) { FCB * fcb; ExtentKey * extentKeyPtr; @@ -614,7 +611,7 @@ OSErr DeleteExtents( ExtendedVCB *vcb, UInt32 fileID, Boolean isHFSPlus ) // Check if there are extents represented in the extents overflow file. -UInt32 CheckExtents( void *extents, UInt32 totalBlocks, Boolean isHFSPlus ) +static UInt32 CheckExtents( void *extents, UInt32 totalBlocks, Boolean isHFSPlus ) { UInt32 extentAllocationBlocks; UInt16 i; diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index 6ac3df68d..76c6a407a 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,136 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - File: FileExtentMapping.c - - Contains: xxx put contents here xxx - - Version: HFS Plus 1.0 - - Written by: Dave Heller, Mark Day - - Copyright: © 1996-1999 by Apple Computer, Inc., all rights reserved. - - File Ownership: - - DRI: Mark Day - - Other Contact: xxx put other contact here xxx - - Technology: xxx put technology here xxx - - Writers: - - (DSH) Deric Horn - (msd) Mark Day - (djb) Don Brady - - Change History (most recent first): - 9/9/99 djb Fix fcbModifiedMask flag testing logic. - 8/25/98 djb Flush extents b-tree header if dirty (2371088). - 6/30/98 djb Add functions NodesAreContiguous and ExtentsAreIntegral (for radar #2249539). - 6/23/98 djb Changed DeallocFile to DeleteFile which now deletes the catalog record. - Fixed UpdateExtentRecord to pass correct fcb to Btree routines. Fixed - hfs+ bug in CreateExtentRecord (double dereference). - 5/20/98 djb In ExtendFileC don't lie about the peof! (radar #2230094). - 4/17/98 djb Add VCB locking. - 4/2/98 djb Switch over to real BTree interface (no more BTreeWrapper.c). - 3/31/98 djb Sync up with final HFSVolumes.h header file. - - 1/23/98 msd Bug 2208024: AllocContig is actually allocating one extent even - though there is not enough contiguous space. - 12/2/97 DSH GetFCBExtentRecord no longer static so DFA can use it. - 10/20/97 msd When allocating more space for a file, do the clump size - calculations in ExtendFileC, not BlockAllocate. Undo change from - . - 10/17/97 msd Conditionalize DebugStrs. - 10/16/97 msd Simplify the code path for MapFileBlockC (logical to physical - block mapping) in the typical case where the file isn't - fragmented so badly that it has extents in the extents B-tree. - Simplified some of the calculations for all cases. - 10/13/97 DSH FindExtentRecord & DeleteExtentRecord are also being used by DFA - no longer static. - 10/6/97 msd When extending a file, set the physical EOF to include any extra - space allocated due to a file's clump size. - 9/19/97 msd Remove the MapLogicalToPhysical SPI. It was never used and is - not being tested anyway. - 9/5/97 msd In CompareExtentKeys and CompareExtentKeysPlus, use the symbolic - constants for key length. Don't DebugStr unless DEBUG_BUILD is - set. - 7/24/97 djb Add instrumentation to MapFileBlockC - 7/16/97 DSH FilesInternal.i renamed FileMgrInternal.i to avoid name - collision - 7/15/97 DSH AdjEOF() mark the FCB as modified. (1664389) - 7/8/97 DSH Loading PrecompiledHeaders from define passed in on C line - 7/3/97 msd Bug #1663518. Remove DebugStr when setting the FCB extent record - for a volume control file. - 6/27/97 msd Moved enum kFirstFileRefnum to FilesInternal. - 6/24/97 djb Include "CatalogPrivate.h" - 6/16/97 msd Finish implementation of CreateLargeFile SPI. - 6/12/97 msd Add stub for CreateLargeFile SPI. - 6/5/97 msd Add MapLogicalToPhysical. - 6/2/97 msd In TruncateFileC, don't update the extent record unless it was - actually changed (prevents extra updates when truncating to the - end of the extent, and it is the last extent of the file.) Added - an AdjustEOF routine called by the assembly AdjEOF routine. It - copies the EOF, physical length, and extent information from one - FCB to all other FCBs for that fork. - 5/20/97 DSH Removed const declaration in MapFileBlocC, const is benign when - passing by value, and SC requires it to match prototype. - 5/15/97 msd Change enum kResourceForkType from -1 to 0xFF since it is now - unsigned. Change all forkType parameters to UInt8. - 5/7/97 msd When checking for an unused extent descriptor, check the length, - not the starting block. - 4/24/97 djb first checked in - 4/11/97 DSH use extended VCB fields catalogRefNum, and extentsRefNum. - 4/4/97 djb Get in sync with volume format changes. - 3/17/97 DSH Casting to compile with SC. - 2/26/97 msd Add instrumentation in ExtendFileC and TruncateFileC. In - CompareExtentKeys and CompareExtentKeysPlus, make sure the key - lengths are correct. - 2/5/97 msd The comparison with fsBTStartOfIterationErr didn't work because - the enum is an unsigned long; it is now casted to an OSErr - before comparing. - 1/31/97 msd In FindExtentRecord, turn an fsBTStartOfIterationErr error into - btNotFound. - 1/28/97 msd Fixed bug in MapFileBlockC where it returned the wrong number of - bytes available at the given block number. This could - potentially cause programs to read or write over other files. - 1/16/97 djb Extent key compare procs now return SInt32. Fixed - UpdateExtentRecord - it was passing a pointer to an ExtentKey - pointer. - 1/10/97 msd Change TruncateFileC to call DellocateFork when the new PEOF is - 0. Fixes a fxRangeErr returned when no extents existed. - 1/6/97 msd Previous change prevents extent records from being removed if - the files new PEOF is in the local (FCB/catalog) extents. - 1/3/97 djb Temp fix in TruncateFileC to prevent unwanted calls to - TruncateExtents. - 12/23/96 msd Previous change to SearchExtentFile didn't set up the outputs - for hint and key when the FCB extent record wasn't full. - 12/20/96 msd In SearchExtentFile, don't bother searching the extents file if - the FCB's extent record wasn't full, or if the FCB was for the - extents file itself. Modified SearchExtentRecord to return a - Boolean to indicate that the record was not full. - 12/19/96 DSH Changed refs from VCB to ExtendedVCB - 12/19/96 djb Updated for new B-tree Manager interface. - 12/12/96 djb Really use new SPI for GetCatalogNode. - 12/12/96 djb Use new Catalog SPI for GetCatalogNode. Added Mark's changes to - MapFileBlockC. - 12/11/96 msd TruncateFileC must always release extents, even if PEOF hasn't - changed (since allocation may have been rounded up due to clump - size). - 12/10/96 msd Check PRAGMA_LOAD_SUPPORTED before loading precompiled headers. - 12/4/96 DSH Precompiled headers - 11/26/96 msd Add an exported routine to grow the parallel FCB table to - accomodate the HFS+ ExtentRecord. - 11/26/96 msd Convert internal routines to use ExtentKey and ExtentRecord - (instead of the raw HFS structures). - 11/21/96 msd Added CompareExtentKeysPlus(). - 11/20/96 msd Finish porting FXM to C. - 11/6/96 DKH first checked in - -*/ #include "../../hfs.h" @@ -157,7 +27,6 @@ #include "../headers/FileMgrInternal.h" #include "../headers/BTreesInternal.h" -#include "../headers/CatalogPrivate.h" // calling a private catalog routine (LocateCatalogNode) #include @@ -165,8 +34,6 @@ ============================================================ Public (Exported) Routines: ============================================================ - DeAllocFile Deallocate all disk space allocated to a specified file. - Both forks are deallocated. ExtendFileC Allocate more space to a given file. @@ -193,21 +60,8 @@ Public (Exported) Routines: FlushExtentFile Flush the extents file for a given volume. - GrowParallelFCBs - Make sure the parallel FCB entries are big enough to support - the HFS+ ExtentRecord. If not, the array is grown and the - pre-existing data copied over. - AdjustEOF - Copy EOF, physical length, and extent records from one FCB - to all other FCBs for that fork. This is used when a file is - grown or shrunk as the result of a Write, SetEOF, or Allocate. - MapLogicalToPhysical - Map some position in a file to a volume block number. Also - returns the number of contiguous bytes that are mapped there. - This is a queued HFSDispatch call that does the equivalent of - MapFileBlockC, using a parameter block. ============================================================ Internal Routines: @@ -269,7 +123,7 @@ static OSErr DeleteExtentRecord( UInt32 startBlock); static OSErr CreateExtentRecord( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, HFSPlusExtentKey *key, HFSPlusExtentRecord extents, UInt32 *hint); @@ -280,7 +134,7 @@ static OSErr GetFCBExtentRecord( HFSPlusExtentRecord extents); static OSErr SearchExtentFile( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, const FCB *fcb, SInt64 filePosition, HFSPlusExtentKey *foundExtentKey, @@ -290,7 +144,7 @@ static OSErr SearchExtentFile( UInt32 *endingFABNPlusOne ); static OSErr SearchExtentRecord( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, UInt32 searchFABN, const HFSPlusExtentRecord extentData, UInt32 extentDataStartFABN, @@ -319,7 +173,7 @@ static OSErr TruncateExtents( Boolean * recordDeleted); static OSErr UpdateExtentRecord ( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, FCB *fcb, const HFSPlusExtentKey *extentFileKey, const HFSPlusExtentRecord extentData, @@ -484,22 +338,32 @@ static OSErr FindExtentRecord( static OSErr CreateExtentRecord( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, HFSPlusExtentKey *key, HFSPlusExtentRecord extents, UInt32 *hint) { BTreeIterator * btIterator; FSBufferDescriptor btRecord; - UInt16 btRecordSize; - OSErr err; + UInt16 btRecordSize; + int lockflags; + OSErr err; err = noErr; *hint = 0; MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); bzero(btIterator, sizeof(*btIterator)); - + + /* + * The lock taken by callers of ExtendFileC is speculative and + * only occurs when the file already has overflow extents. So + * We need to make sure we have the lock here. The extents + * btree lock can be nested (its recursive) so we always take + * it here. + */ + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + if (vcb->vcbSigWord == kHFSSigWord) { HFSExtentKey * keyPtr; HFSExtentRecord data; @@ -534,6 +398,8 @@ static OSErr CreateExtentRecord( (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum)); + hfs_systemfile_unlock(vcb, lockflags); + FREE(btIterator, M_TEMP); return err; } @@ -588,20 +454,6 @@ static OSErr DeleteExtentRecord( // // Function: Maps a file position into a physical disk address. // -// Input: A2.L - VCB pointer -// (A1,D1.W) - FCB pointer -// D4.L - number of bytes desired -// D5.L - file position (byte address) -// -// Output: D3.L - physical start block -// D6.L - number of contiguous bytes available (up to D4 bytes) -// D0.L - result code <01Oct85> -// 0 = ok -// FXRangeErr = file position beyond mapped range <17Oct85> -// FXOvFlErr = extents file overflow <17Oct85> -// other = error <17Oct85> -// -// Called By: Log2Phys (read/write in place), Cache (map a file block). //_________________________________________________________________________________ __private_extern__ @@ -610,7 +462,7 @@ OSErr MapFileBlockC ( FCB *fcb, // FCB of file size_t numberOfBytes, // number of contiguous bytes desired off_t offset, // starting offset within file (in bytes) - daddr_t *startSector, // first sector (NOT an allocation block) + daddr64_t *startSector, // first sector (NOT an allocation block) size_t *availableBytes) // number of contiguous bytes (up to numberOfBytes) { OSErr err; @@ -625,12 +477,12 @@ OSErr MapFileBlockC ( off_t dataEnd; // (offset) end of range that is contiguous UInt32 sectorsPerBlock; // Number of sectors per allocation block UInt32 startBlock; // volume allocation block corresponding to firstFABN - daddr_t temp; + daddr64_t temp; off_t tmpOff; allocBlockSize = vcb->blockSize; sectorSize = VCBTOHFS(vcb)->hfs_phys_block_size; - + err = SearchExtentFile(vcb, fcb, offset, &foundKey, foundData, &foundIndex, &hint, &nextFABN); if (err == noErr) { startBlock = foundData[foundIndex].startBlock; @@ -658,7 +510,7 @@ OSErr MapFileBlockC ( // offset in sectors from start of the extent + // offset in sectors from start of allocation block space // - temp = (daddr_t)((offset - (off_t)((off_t)(firstFABN) * (off_t)(allocBlockSize)))/sectorSize); + temp = (daddr64_t)((offset - (off_t)((off_t)(firstFABN) * (off_t)(allocBlockSize)))/sectorSize); temp += startBlock * sectorsPerBlock; /* Add in any volume offsets */ @@ -682,6 +534,7 @@ OSErr MapFileBlockC ( else *availableBytes = tmpOff; } + return noErr; } @@ -762,6 +615,16 @@ static OSErr TruncateExtents( UInt32 hint; HFSPlusExtentKey key; HFSPlusExtentRecord extents; + int lockflags; + + /* + * The lock taken by callers of TruncateFileC is speculative and + * only occurs when the file already has overflow extents. So + * We need to make sure we have the lock here. The extents + * btree lock can be nested (its recursive) so we always take + * it here. + */ + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); while (true) { err = FindExtentRecord(vcb, forkType, fileID, startBlock, false, &key, extents, &hint); @@ -780,6 +643,7 @@ static OSErr TruncateExtents( *recordDeleted = true; startBlock += numberExtentsReleased; } + hfs_systemfile_unlock(vcb, lockflags); return err; } @@ -823,9 +687,14 @@ OSErr FlushExtentFile( ExtendedVCB *vcb ) { FCB * fcb; OSErr err; + int lockflags; fcb = GetFileControlBlock(vcb->extentsRefNum); + + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); err = BTFlushPath(fcb); + hfs_systemfile_unlock(vcb, lockflags); + if ( err == noErr ) { // If the FCB for the extent "file" is dirty, mark the VCB as dirty. @@ -1041,23 +910,6 @@ AddFileExtent(ExtendedVCB *vcb, FCB *fcb, UInt32 startBlock, UInt32 blockCount) // // Function: Extends the disk space allocated to a file. // -// Input: A2.L - VCB pointer -// A1.L - pointer to FCB array -// D1.W - file refnum -// D3.B - option flags -// kEFContigMask - force contiguous allocation -// kEFAllMask - allocate all requested bytes or none -// NOTE: You may not set both options. -// D4.L - number of additional bytes to allocate -// -// Output: D0.W - result code -// 0 = ok -// -n = IO error -// D6.L - number of bytes allocated -// -// Called by: FileAloc,FileWrite,SetEof -// -// Note: ExtendFile updates the PEOF in the FCB. //_________________________________________________________________________________ __private_extern__ @@ -1127,8 +979,11 @@ OSErr ExtendFileC ( && (vcb->vcbSigWord == kHFSPlusSigWord) && (bytesToAdd < (SInt64)HFS_MAX_DEFERED_ALLOC) && (blocksToAdd < hfs_freeblks(VCBTOHFS(vcb), 1))) { + HFS_MOUNT_LOCK(vcb, TRUE); + vcb->loanedBlocks += blocksToAdd; + HFS_MOUNT_UNLOCK(vcb, TRUE); + fcb->ff_unallocblocks += blocksToAdd; - vcb->loanedBlocks += blocksToAdd; FTOC(fcb)->c_blocks += blocksToAdd; fcb->ff_blocks += blocksToAdd; @@ -1140,13 +995,18 @@ OSErr ExtendFileC ( * Give back any unallocated blocks before doing real allocations. */ if (fcb->ff_unallocblocks > 0) { - blocksToAdd += fcb->ff_unallocblocks; - bytesToAdd = (SInt64)blocksToAdd * (SInt64)volumeBlockSize; + u_int32_t loanedBlocks; - vcb->loanedBlocks -= fcb->ff_unallocblocks; - FTOC(fcb)->c_blocks -= fcb->ff_unallocblocks; - fcb->ff_blocks -= fcb->ff_unallocblocks; + loanedBlocks = fcb->ff_unallocblocks; + blocksToAdd += loanedBlocks; + bytesToAdd = (SInt64)blocksToAdd * (SInt64)volumeBlockSize; + FTOC(fcb)->c_blocks -= loanedBlocks; + fcb->ff_blocks -= loanedBlocks; fcb->ff_unallocblocks = 0; + + HFS_MOUNT_LOCK(vcb, TRUE); + vcb->loanedBlocks -= loanedBlocks; + HFS_MOUNT_UNLOCK(vcb, TRUE); } // @@ -1154,7 +1014,7 @@ OSErr ExtendFileC ( // then set the maximum number of bytes to the requested number of bytes // rounded up to a multiple of the clump size. // - if ((vcb->vcbClpSiz > volumeBlockSize) + if ((vcb->vcbClpSiz > (int32_t)volumeBlockSize) && (bytesToAdd < (SInt64)HFS_MAX_DEFERED_ALLOC) && (flags & kEFNoClumpMask) == 0) { maximumBytes = (SInt64)howmany(bytesToAdd, vcb->vcbClpSiz); @@ -1166,13 +1026,15 @@ OSErr ExtendFileC ( // // Compute new physical EOF, rounded up to a multiple of a block. // - if ((vcb->vcbSigWord == kHFSSigWord) && ((((SInt64)fcb->ff_blocks * (SInt64)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes)) // Too big? + if ( (vcb->vcbSigWord == kHFSSigWord) && // Too big? + ((((SInt64)fcb->ff_blocks * (SInt64)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes) ) { if (allOrNothing) // Yes, must they have it all? goto Overflow; // Yes, can't have it else { --blocksToAdd; // No, give give 'em one block less bytesToAdd -= volumeBlockSize; } + } // // If allocation is all-or-nothing, make sure there are @@ -1379,10 +1241,17 @@ Exit: /* Keep the roving allocator out of the metadata zone. */ if (vcb->nextAllocation >= VCBTOHFS(vcb)->hfs_metazone_start && vcb->nextAllocation <= VCBTOHFS(vcb)->hfs_metazone_end) { + HFS_MOUNT_LOCK(vcb, TRUE); vcb->nextAllocation = VCBTOHFS(vcb)->hfs_metazone_end + 1; + vcb->vcbFlags |= 0xFF00; + HFS_MOUNT_UNLOCK(vcb, TRUE); } } - *actualBytesAdded = (SInt64)(fcb->ff_blocks - prevblocks) * (SInt64)volumeBlockSize; + if (prevblocks < fcb->ff_blocks) { + *actualBytesAdded = (SInt64)(fcb->ff_blocks - prevblocks) * (SInt64)volumeBlockSize; + } else { + *actualBytesAdded = 0; + } if (needsFlush) (void) FlushExtentFile(vcb); @@ -1405,18 +1274,6 @@ Overflow: // block boundry. If the 'TFTrunExt' option is specified, the file is // truncated to the end of the extent containing the new PEOF. // -// Input: A2.L - VCB pointer -// A1.L - pointer to FCB array -// D1.W - file refnum -// D2.B - option flags -// TFTrunExt - truncate to the extent containing new PEOF -// D3.L - new PEOF -// -// Output: D0.W - result code -// 0 = ok -// -n = IO error -// -// Note: TruncateFile updates the PEOF in the FCB. //_________________________________________________________________________________ __private_extern__ @@ -1441,7 +1298,6 @@ OSErr TruncateFileC ( UInt8 forkType; Boolean extentChanged; // true if we actually changed an extent Boolean recordDeleted; // true if an extent record got deleted - recordDeleted = false; @@ -1585,7 +1441,6 @@ OSErr TruncateFileC ( Done: ErrorExit: - if (recordDeleted) (void) FlushExtentFile(vcb); @@ -1611,7 +1466,8 @@ OSErr HeadTruncateFile ( UInt32 startblk; UInt32 blksfreed; int i, j; - int error; + int error = 0; + int lockflags; if (vcb->vcbSigWord != kHFSPlusSigWord) @@ -1663,6 +1519,8 @@ OSErr HeadTruncateFile ( if (blkcnt == 0) goto CopyExtents; + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + /* * Process overflow extents */ @@ -1716,6 +1574,7 @@ OSErr HeadTruncateFile ( startblk += extblks; } + hfs_systemfile_unlock(vcb, lockflags); CopyExtents: if (blksfreed) { @@ -1724,7 +1583,8 @@ CopyExtents: FTOC(fcb)->c_blocks -= blkcnt; fcb->ff_blocks = blkcnt; - FTOC(fcb)->c_flag |= C_CHANGE | C_FORCEUPDATE; + FTOC(fcb)->c_flag |= C_FORCEUPDATE; + FTOC(fcb)->c_touch_chgtime = TRUE; (void) FlushExtentFile(vcb); } @@ -1758,7 +1618,7 @@ ErrorExit: //‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ static OSErr SearchExtentRecord( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, UInt32 searchFABN, const HFSPlusExtentRecord extentData, UInt32 extentDataStartFABN, @@ -1859,7 +1719,7 @@ static OSErr SearchExtentRecord( //‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ static OSErr SearchExtentFile( - const ExtendedVCB *vcb, + ExtendedVCB *vcb, const FCB *fcb, SInt64 filePosition, HFSPlusExtentKey *foundExtentKey, @@ -1872,6 +1732,7 @@ static OSErr SearchExtentFile( UInt32 filePositionBlock; SInt64 temp64; Boolean noMoreExtents; + int lockflags; temp64 = filePosition / (SInt64)vcb->blockSize; filePositionBlock = (UInt32)temp64; @@ -1904,8 +1765,11 @@ static OSErr SearchExtentFile( // // Find the desired record, or the previous record if it is the same fork // + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + err = FindExtentRecord(vcb, FORK_IS_RSRC(fcb) ? kResourceForkType : kDataForkType, FTOC(fcb)->c_fileid, filePositionBlock, true, foundExtentKey, foundExtentData, extentBTreeHint); + hfs_systemfile_unlock(vcb, lockflags); if (err == btNotFound) { // @@ -1938,7 +1802,7 @@ Exit: -//‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ +//============================================================================ // Routine: UpdateExtentRecord // // Function: Write new extent data to an existing extent record with a given key. @@ -1955,14 +1819,14 @@ Exit: // // Result: noErr = ok // (other) = error from BTree -//‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ +//============================================================================ static OSErr UpdateExtentRecord ( - const ExtendedVCB *vcb, - FCB *fcb, - const HFSPlusExtentKey *extentFileKey, - const HFSPlusExtentRecord extentData, - UInt32 extentBTreeHint) + ExtendedVCB *vcb, + FCB *fcb, + const HFSPlusExtentKey *extentFileKey, + const HFSPlusExtentRecord extentData, + UInt32 extentBTreeHint) { OSErr err = noErr; @@ -1975,6 +1839,7 @@ static OSErr UpdateExtentRecord ( FSBufferDescriptor btRecord; UInt16 btRecordSize; FCB * btFCB; + int lockflags; // // Need to find and change a record in Extents BTree @@ -1984,6 +1849,15 @@ static OSErr UpdateExtentRecord ( MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); bzero(btIterator, sizeof(*btIterator)); + /* + * The lock taken by callers of ExtendFileC/TruncateFileC is + * speculative and only occurs when the file already has + * overflow extents. So we need to make sure we have the lock + * here. The extents btree lock can be nested (its recursive) + * so we always take it here. + */ + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + if (vcb->vcbSigWord == kHFSSigWord) { HFSExtentKey * key; // Actual extent key used on disk in HFS HFSExtentRecord foundData; // The extent data actually found @@ -2030,6 +1904,7 @@ static OSErr UpdateExtentRecord ( } (void) BTFlushPath(btFCB); } + hfs_systemfile_unlock(vcb, lockflags); FREE(btIterator, M_TEMP); } @@ -2141,6 +2016,7 @@ Boolean NodesAreContiguous( HFSPlusExtentRecord extents; OSErr result; Boolean lastExtentReached; + int lockflags; if (vcb->blockSize >= nodeSize) @@ -2153,23 +2029,27 @@ Boolean NodesAreContiguous( if ( !ExtentsAreIntegral(extents, mask, &blocksChecked, &lastExtentReached) ) return FALSE; - if (lastExtentReached || (SInt64)((SInt64)blocksChecked * (SInt64)vcb->blockSize) >= fcb->ff_size) + if ( lastExtentReached || + (SInt64)((SInt64)blocksChecked * (SInt64)vcb->blockSize) >= (SInt64)fcb->ff_size) return TRUE; startBlock = blocksChecked; + lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); + // check the overflow extents (if any) while ( !lastExtentReached ) { result = FindExtentRecord(vcb, kDataForkType, fcb->ff_cp->c_fileid, startBlock, FALSE, &key, extents, &hint); if (result) break; - if ( !ExtentsAreIntegral(extents, mask, &blocksChecked, &lastExtentReached) ) + if ( !ExtentsAreIntegral(extents, mask, &blocksChecked, &lastExtentReached) ) { + hfs_systemfile_unlock(vcb, lockflags); return FALSE; - + } startBlock += blocksChecked; } - + hfs_systemfile_unlock(vcb, lockflags); return TRUE; } diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index 1fa82a921..157b7fb57 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -240,9 +240,9 @@ OSErr BlockAllocate ( // next block to allocate from. // if (startingBlock == 0) { - VCB_LOCK(vcb); + HFS_MOUNT_LOCK(vcb, TRUE); startingBlock = vcb->nextAllocation; - VCB_UNLOCK(vcb); + HFS_MOUNT_UNLOCK(vcb, TRUE); updateAllocPtr = true; } if (startingBlock >= vcb->totalBlocks) { @@ -264,7 +264,9 @@ OSErr BlockAllocate ( (*actualStartBlock > startingBlock) && ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { - vcb->nextAllocation = *actualStartBlock; /* XXX */ + HFS_MOUNT_LOCK(vcb, TRUE); + vcb->nextAllocation = *actualStartBlock; + HFS_MOUNT_UNLOCK(vcb, TRUE); } } else { /* @@ -285,7 +287,13 @@ OSErr BlockAllocate ( actualNumBlocks); } - if (err == noErr) { +Exit: + // if we actually allocated something then go update the + // various bits of state that we maintain regardless of + // whether there was an error (i.e. partial allocations + // still need to update things like the free block count). + // + if (*actualNumBlocks != 0) { // // If we used the volume's roving allocation pointer, then we need to update it. // Adding in the length of the current allocation might reduce the next allocate @@ -294,7 +302,7 @@ OSErr BlockAllocate ( // the file is closed or its EOF changed. Leaving the allocation pointer at the // start of the last allocation will avoid unnecessary fragmentation in this case. // - VCB_LOCK(vcb); + HFS_MOUNT_LOCK(vcb, TRUE); if (updateAllocPtr && ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || @@ -305,14 +313,12 @@ OSErr BlockAllocate ( // Update the number of free blocks on the volume // vcb->freeBlocks -= *actualNumBlocks; - hfs_generate_volume_notifications(VCBTOHFS(vcb)); - VCB_UNLOCK(vcb); - MarkVCBDirty(vcb); + HFS_MOUNT_UNLOCK(vcb, TRUE); + + hfs_generate_volume_notifications(VCBTOHFS(vcb)); } -Exit: - return err; } @@ -363,14 +369,14 @@ OSErr BlockDeallocate ( // // Update the volume's free block count, and mark the VCB as dirty. // - VCB_LOCK(vcb); + HFS_MOUNT_LOCK(vcb, TRUE); vcb->freeBlocks += numBlocks; - hfs_generate_volume_notifications(VCBTOHFS(vcb)); if (vcb->nextAllocation == (firstBlock + numBlocks)) vcb->nextAllocation -= numBlocks; - VCB_UNLOCK(vcb); MarkVCBDirty(vcb); - + HFS_MOUNT_UNLOCK(vcb, TRUE); + + hfs_generate_volume_notifications(VCBTOHFS(vcb)); Exit: return err; @@ -395,8 +401,10 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb) int bytesperblock; UInt8 byte; UInt8 *buffer; + blockRef = 0; bytesleft = freeblocks = 0; + buffer = NULL; bit = VCBTOHFS(vcb)->hfs_metazone_start; if (bit == 1) bit = 0; @@ -484,35 +492,35 @@ static OSErr ReadBitmapBlock( OSErr err; struct buf *bp = NULL; struct vnode *vp = NULL; - UInt32 block; + daddr64_t block; UInt32 blockSize; /* - * volume bitmap blocks are protected by the Extents B-tree lock + * volume bitmap blocks are protected by the allocation file lock */ - REQUIRE_FILE_LOCK(vcb->extentsRefNum, false); + REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); blockSize = (UInt32)vcb->vcbVBMIOSize; - block = bit / (blockSize * kBitsPerByte); + block = (daddr64_t)(bit / (blockSize * kBitsPerByte)); if (vcb->vcbSigWord == kHFSPlusSigWord) { - vp = vcb->allocationsRefNum; /* use allocation file vnode */ + vp = vcb->hfs_allocation_vp; /* use allocation file vnode */ } else /* hfs */ { vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */ block += vcb->vcbVBMSt; /* map to physical block */ } - err = meta_bread(vp, block, blockSize, NOCRED, &bp); + err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp); if (bp) { if (err) { - brelse(bp); + buf_brelse(bp); *blockRef = NULL; *buffer = NULL; } else { *blockRef = (UInt32)bp; - *buffer = (UInt32 *)bp->b_data; + *buffer = (UInt32 *)buf_dataptr(bp); } } @@ -554,10 +562,10 @@ static OSErr ReleaseBitmapBlock( if (hfsmp->jnl) { journal_modify_block_end(hfsmp->jnl, bp); } else { - bdwrite(bp); + buf_bdwrite(bp); } } else { - brelse(bp); + buf_brelse(bp); } } @@ -1616,4 +1624,125 @@ ErrorExit: return err; } +/* + * Test to see if any blocks in a range are allocated. + * + * The journal or allocation file lock must be held. + */ +__private_extern__ +int +hfs_isallocated(struct hfsmount *hfsmp, u_long startingBlock, u_long numBlocks) +{ + UInt32 *currentWord; // Pointer to current word within bitmap block + UInt32 wordsLeft; // Number of words left in this bitmap block + UInt32 bitMask; // Word with given bits already set (ready to test) + UInt32 firstBit; // Bit index within word of first bit to allocate + UInt32 numBits; // Number of bits in word to allocate + UInt32 *buffer = NULL; + UInt32 blockRef; + UInt32 bitsPerBlock; + UInt32 wordsPerBlock; + int inuse = 0; + int error; + + /* + * Pre-read the bitmap block containing the first word of allocation + */ + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + if (error) + return (error); + + /* + * Initialize currentWord, and wordsLeft. + */ + { + UInt32 wordIndexInBlock; + + bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; + wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; + + wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; + currentWord = buffer + wordIndexInBlock; + wordsLeft = wordsPerBlock - wordIndexInBlock; + } + + /* + * First test any non word aligned bits. + */ + firstBit = startingBlock % kBitsPerWord; + if (firstBit != 0) { + bitMask = kAllBitsSetInWord >> firstBit; + numBits = kBitsPerWord - firstBit; + if (numBits > numBlocks) { + numBits = numBlocks; + bitMask &= ~(kAllBitsSetInWord >> (firstBit + numBits)); + } + if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { + inuse = 1; + goto Exit; + } + numBlocks -= numBits; + ++currentWord; + --wordsLeft; + } + + /* + * Test whole words (32 blocks) at a time. + */ + while (numBlocks >= kBitsPerWord) { + if (wordsLeft == 0) { + /* Read in the next bitmap block. */ + startingBlock += bitsPerBlock; + + buffer = NULL; + error = ReleaseBitmapBlock(hfsmp, blockRef, false); + if (error) goto Exit; + + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + if (error) goto Exit; + + /* Readjust currentWord and wordsLeft. */ + currentWord = buffer; + wordsLeft = wordsPerBlock; + } + if (*currentWord != 0) { + inuse = 1; + goto Exit; + } + numBlocks -= kBitsPerWord; + ++currentWord; + --wordsLeft; + } + + /* + * Test any remaining blocks. + */ + if (numBlocks != 0) { + bitMask = ~(kAllBitsSetInWord >> numBlocks); + if (wordsLeft == 0) { + /* Read in the next bitmap block */ + startingBlock += bitsPerBlock; + + buffer = NULL; + error = ReleaseBitmapBlock(hfsmp, blockRef, false); + if (error) goto Exit; + + error = ReadBitmapBlock(hfsmp, startingBlock, &buffer, &blockRef); + if (error) goto Exit; + + currentWord = buffer; + wordsLeft = wordsPerBlock; + } + if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { + inuse = 1; + goto Exit; + } + } +Exit: + if (buffer) { + (void)ReleaseBitmapBlock(hfsmp, blockRef, false); + } + return (inuse); +} + diff --git a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c index 1e02d0932..91b3e7a98 100644 --- a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c +++ b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -51,36 +51,10 @@ enum { static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr ); -static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr ); static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr ); - -/* - * Convert file ID into a hexidecimal string with no leading zeros - */ -static void -GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr ) -{ - SInt32 i, b; - UInt8 *translate = (UInt8 *) "0123456789ABCDEF"; - UInt8 c; - - fileIDStr[0] = '#'; - - for ( i = 0, b = 28; b >= 0; b -= 4 ) { - c = *(translate + ((fileID >> b) & 0x0000000F)); - - /* if its not a leading zero add it to our string */ - if ( (c != (UInt8) '0') || (i > 1) || (b == 0) ) - fileIDStr[++i] = c; - } - - fileIDStr[++i] = '\0'; -} - - /* * Get filename extension (if any) as a C string */ @@ -235,7 +209,7 @@ static UInt32 HexStringToInteger(UInt32 length, const UInt8 *hexStr) { UInt32 value; - short i; + UInt32 i; UInt8 c; const UInt8 *p; @@ -448,7 +422,7 @@ ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteC char fileIDStr[15]; char extStr[15]; - GetFileIDString(cnid, fileIDStr); + sprintf(fileIDStr, "#%X", cnid); GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr); /* remove extension chars from source */ diff --git a/bsd/hfs/hfscommon/headers/BTreeScanner.h b/bsd/hfs/hfscommon/headers/BTreeScanner.h index ce9cf3002..368dd18c1 100644 --- a/bsd/hfs/hfscommon/headers/BTreeScanner.h +++ b/bsd/hfs/hfscommon/headers/BTreeScanner.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1996-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,7 +36,6 @@ // amount of time we are allowed to process a catalog search (in µ secs) // NOTE - code assumes kMaxMicroSecsInKernel is less than 1,000,000 -// jertodo - what should we set this to? enum { kMaxMicroSecsInKernel = (1000 * 100) }; // 1 tenth of a second // btree node scanner buffer size. at 32K we get 8 nodes. this is the size used diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index 650d82099..0cce7eb23 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -280,7 +280,7 @@ typedef BTreeIterator *BTreeIteratorPtr; //typedef SInt32 (* KeyCompareProcPtr)(BTreeKeyPtr a, BTreeKeyPtr b); -typedef SInt32 (* IterateCallBackProcPtr)(BTreeKeyPtr key, void * record, UInt16 recordLen, void * state); +typedef SInt32 (* IterateCallBackProcPtr)(BTreeKeyPtr key, void * record, void * state); extern OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc); @@ -323,7 +323,7 @@ extern OSStatus BTDeleteRecord (FCB *filePtr, BTreeIterator *iterator ); extern OSStatus BTGetInformation (FCB *filePtr, - UInt16 version, + UInt16 vers, BTreeInfoRec *info ); extern OSStatus BTFlushPath (FCB *filePtr ); diff --git a/bsd/hfs/hfscommon/headers/CatalogPrivate.h b/bsd/hfs/hfscommon/headers/CatalogPrivate.h index e18592ebc..fcf12ac7c 100644 --- a/bsd/hfs/hfscommon/headers/CatalogPrivate.h +++ b/bsd/hfs/hfscommon/headers/CatalogPrivate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -79,73 +79,6 @@ #include "FileMgrInternal.h" #include "BTreesInternal.h" - #include - -// private catalog data cache - - - -enum { - kCatalogIteratorCount = 16 // total number of Catalog iterators (shared by all HFS/HFS Plus volumes) -}; - - -// Catalog Iterator Name Types -enum { - kShortPascalName, - kShortUnicodeName, - kLongUnicodeName // non-local name -}; - - -// short unicode name (used by CatalogIterator) -struct UniStr63 { - UInt16 length; /* number of unicode characters */ - UniChar unicode[63]; /* unicode characters */ -}; -typedef struct UniStr63 UniStr63; - - -struct CatalogIterator -{ - struct CatalogIterator *nextMRU; // next iterator in MRU order - struct CatalogIterator *nextLRU; // next iterator in LRU order - - ExtendedVCB *volume; - SInt16 currentIndex; - SInt16 reserved; - UInt32 currentOffset; - UInt32 nextOffset; - HFSCatalogNodeID folderID; - - UInt32 btreeNodeHint; // node the key was last seen in - UInt16 btreeIndexHint; // index the key was last seen at - UInt16 nameType; // { 0 = Pascal, 1 = Unicode, 3 = long name} - HFSCatalogNodeID parentID; // parent folder ID - union - { - Str31 pascalName; - UniStr63 unicodeName; - HFSUniStr255 * longNamePtr; - } folderName; - - struct lock__bsd__ iterator_lock; -}; -typedef struct CatalogIterator CatalogIterator; - - -struct CatalogCacheGlobals { - UInt32 iteratorCount; // Number of iterators in cache - CatalogIterator * mru; - CatalogIterator * lru; - UInt32 reserved; - HFSUniStr255 longName; // used by a single kLongUnicodeName iterator - - simple_lock_data_t simplelock; -}; -typedef struct CatalogCacheGlobals CatalogCacheGlobals; - - // // Private Catalog Manager Routines (for use only by Catalog Manager, CatSearch and FileID Services) // @@ -188,21 +121,6 @@ extern OSErr ExchangeFiles( FIDParam *filePB, WDCBRecPtr *wdcbPtr ); extern void UpdateCatalogName( ConstStr31Param srcName, Str31 destName ); -// Catalog Iterator Routines - -extern CatalogIterator* GetCatalogIterator(ExtendedVCB *volume, HFSCatalogNodeID folderID, UInt32 offset); - -extern OSErr ReleaseCatalogIterator( CatalogIterator *catalogIterator ); - -extern void TrashCatalogIterator( const ExtendedVCB *volume, HFSCatalogNodeID folderID ); - -void AgeCatalogIterator( CatalogIterator *catalogIterator ); - -extern void UpdateBtreeIterator( const CatalogIterator *catalogIterator, BTreeIterator *btreeIterator ); - -extern void UpdateCatalogIterator( const BTreeIterator *btreeIterator, CatalogIterator *catalogIterator ); - - #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ #endif //__CATALOGPRIVATE__ diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h index 8ed75e35f..15ccb6e63 100644 --- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h +++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -151,25 +151,6 @@ union ExtentRecord { HFSPlusExtentRecord hfsPlus; }; typedef union ExtentRecord ExtentRecord; -/* Universal catalog key */ - -union CatalogKey { - HFSCatalogKey hfs; - HFSPlusCatalogKey hfsPlus; -}; -typedef union CatalogKey CatalogKey; -/* Universal catalog data record */ - -union CatalogRecord { - SInt16 recordType; - HFSCatalogFolder hfsFolder; - HFSCatalogFile hfsFile; - HFSCatalogThread hfsThread; - HFSPlusCatalogFolder hfsPlusFolder; - HFSPlusCatalogFile hfsPlusFile; - HFSPlusCatalogThread hfsPlusThread; -}; -typedef union CatalogRecord CatalogRecord; enum { @@ -205,10 +186,6 @@ EXTERN_API_C( Boolean ) IsVCBDirty (ExtendedVCB *vcb); -#define VCB_LOCK_INIT(vcb) simple_lock_init(&vcb->vcbSimpleLock) -#define VCB_LOCK(vcb) simple_lock(&vcb->vcbSimpleLock) -#define VCB_UNLOCK(vcb) simple_unlock(&vcb->vcbSimpleLock) - #define MarkVCBDirty(vcb) { ((vcb)->vcbFlags |= 0xFF00); } #define MarkVCBClean(vcb) { ((vcb)->vcbFlags &= 0x00FF); } #define IsVCBDirty(vcb) ((Boolean) ((vcb->vcbFlags & 0xFF00) != 0)) @@ -219,12 +196,7 @@ EXTERN_API_C( void ) ReturnIfError (OSErr result); #define ReturnIfError(result) if ( (result) != noErr ) return (result); else ; -/* Test for passed condition and return if true*/ -EXTERN_API_C( void ) -ReturnErrorIf (Boolean condition, - OSErr result); -#define ReturnErrorIf(condition, error) if ( (condition) ) return( (error) ); /* Exit function on error*/ EXTERN_API_C( void ) ExitOnError (OSErr result); @@ -244,21 +216,6 @@ ExchangeFileIDs (ExtendedVCB * volume, UInt32 srcHint, UInt32 destHint ); -EXTERN_API_C( SInt32 ) -CompareCatalogKeys (HFSCatalogKey * searchKey, - HFSCatalogKey * trialKey); - -EXTERN_API_C( SInt32 ) -CompareExtendedCatalogKeys (HFSPlusCatalogKey * searchKey, - HFSPlusCatalogKey * trialKey); - -EXTERN_API_C( OSErr ) -InitCatalogCache (void); - -EXTERN_API_C( void ) -InvalidateCatalogCache (ExtendedVCB * volume); - - /* BTree Manager Routines*/ @@ -305,10 +262,6 @@ BlockMarkAllocated(ExtendedVCB *vcb, UInt32 startingBlock, UInt32 numBlocks); EXTERN_API_C( OSErr ) BlockMarkFree( ExtendedVCB *vcb, UInt32 startingBlock, UInt32 numBlocks); -EXTERN_API_C( UInt32 ) -FileBytesToBlocks (SInt64 numerator, - UInt32 denominator); - EXTERN_API_C( UInt32 ) MetaZoneFreeBlocks(ExtendedVCB *vcb); @@ -343,9 +296,11 @@ MapFileBlockC (ExtendedVCB * vcb, FCB * fcb, size_t numberOfBytes, off_t offset, - daddr_t * startBlock, + daddr64_t * startBlock, size_t * availableBytes); +OSErr HeadTruncateFile(ExtendedVCB *vcb, FCB *fcb, UInt32 headblks); + EXTERN_API_C( int ) AddFileExtent (ExtendedVCB *vcb, FCB *fcb, UInt32 startBlock, UInt32 blockCount); @@ -356,10 +311,6 @@ NodesAreContiguous (ExtendedVCB * vcb, UInt32 nodeSize); #endif -/* Utility routines*/ - -EXTERN_API_C( OSErr ) -VolumeWritable (ExtendedVCB * vcb); /* Get the current time in UTC (GMT)*/ diff --git a/bsd/i386/Makefile b/bsd/i386/Makefile index e3f4901de..bb3988af6 100644 --- a/bsd/i386/Makefile +++ b/bsd/i386/Makefile @@ -8,16 +8,22 @@ include $(MakeInc_cmd) include $(MakeInc_def) DATAFILES = \ - cpu.h disklabel.h endian.h exec.h label_t.h param.h \ - profile.h psl.h ptrace.h reboot.h setjmp.h signal.h \ - spl.h table.h types.h ucontext.h user.h vmparam.h + endian.h param.h \ + profile.h setjmp.h signal.h \ + types.h ucontext.h vmparam.h _types.h + +KERNELFILES = \ + endian.h param.h \ + profile.h setjmp.h signal.h \ + types.h vmparam.h _types.h INSTALL_MD_LIST = ${DATAFILES} +INSTALL_MD_LCL_LIST = ${DATAFILES} disklabel.h INSTALL_MD_DIR = i386 -EXPORT_MD_LIST = ${DATAFILES} +EXPORT_MD_LIST = ${KERNELFILES} EXPORT_MD_DIR = i386 diff --git a/bsd/i386/_types.h b/bsd/i386/_types.h new file mode 100644 index 000000000..2a69df571 --- /dev/null +++ b/bsd/i386/_types.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _BSD_I386__TYPES_H_ +#define _BSD_I386__TYPES_H_ + +/* + * This header file contains integer types. It's intended to also contain + * flotaing point and other arithmetic types, as needed, later. + */ + +#ifdef __GNUC__ +typedef __signed char __int8_t; +#else /* !__GNUC__ */ +typedef char __int8_t; +#endif /* !__GNUC__ */ +typedef unsigned char __uint8_t; +typedef unsigned short __int16_t; +typedef unsigned short __uint16_t; +typedef int __int32_t; +typedef unsigned int __uint32_t; +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + +typedef long __darwin_intptr_t; +typedef unsigned int __darwin_natural_t; + +/* + * The rune type below is declared to be an ``int'' instead of the more natural + * ``unsigned long'' or ``long''. Two things are happening here. It is not + * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, + * it looks like 10646 will be a 31 bit standard. This means that if your + * ints cannot hold 32 bits, you will be in trouble. The reason an int was + * chosen over a long is that the is*() and to*() routines take ints (says + * ANSI C), but they use __darwin_ct_rune_t instead of int. By changing it + * here, you lose a bit of ANSI conformance, but your programs will still + * work. + * + * NOTE: rune_t is not covered by ANSI nor other standards, and should not + * be instantiated outside of lib/libc/locale. Use wchar_t. wchar_t and + * rune_t must be the same type. Also wint_t must be no narrower than + * wchar_t, and should also be able to hold all members of the largest + * character set plus one extra value (WEOF). wint_t must be at least 16 bits. + */ + +typedef int __darwin_ct_rune_t; /* ct_rune_t */ + +/* + * mbstate_t is an opaque object to keep conversion state, during multibyte + * stream conversions. The content must not be referenced by user programs. + */ +typedef union { + char __mbstate8[128]; + long long _mbstateL; /* for alignment */ +} __mbstate_t; + +typedef __mbstate_t __darwin_mbstate_t; /* mbstate_t */ + +#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__) +typedef __PTRDIFF_TYPE__ __darwin_ptrdiff_t; /* ptr1 - ptr2 */ +#else +typedef int __darwin_ptrdiff_t; /* ptr1 - ptr2 */ +#endif /* __GNUC__ */ + +#if defined(__GNUC__) && defined(__SIZE_TYPE__) +typedef __SIZE_TYPE__ __darwin_size_t; /* sizeof() */ +#else +typedef unsigned long __darwin_size_t; /* sizeof() */ +#endif + +#ifdef KERNEL +typedef void * __darwin_va_list; /* va_list */ +#else /* !KERNEL */ +#if (__GNUC__ > 2) +typedef __builtin_va_list __darwin_va_list; /* va_list */ +#else +typedef void * __darwin_va_list; /* va_list */ +#endif +#endif /* KERNEL */ + +#if defined(__GNUC__) && defined(__WCHAR_TYPE__) +typedef __WCHAR_TYPE__ __darwin_wchar_t; /* wchar_t */ +#else +typedef __darwin_ct_rune_t __darwin_wchar_t; /* wchar_t */ +#endif + +typedef __darwin_wchar_t __darwin_rune_t; /* rune_t */ + +#if defined(__GNUC__) && defined(__WINT_TYPE__) +typedef __WINT_TYPE__ __darwin_wint_t; /* wint_t */ +#else +typedef __darwin_ct_rune_t __darwin_wint_t; /* wint_t */ +#endif + +typedef unsigned long __darwin_clock_t; /* clock() */ +typedef __uint32_t __darwin_socklen_t; /* socklen_t (duh) */ +typedef long __darwin_ssize_t; /* byte count or error */ +typedef long __darwin_time_t; /* time() */ + +#endif /* _BSD_I386__TYPES_H_ */ diff --git a/bsd/i386/cpu.h b/bsd/i386/cpu.h deleted file mode 100644 index fbace41fc..000000000 --- a/bsd/i386/cpu.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * HISTORY - * - */ - -#ifndef _BSD_I386_CPU_H_ -#define _BSD_I386_CPU_H_ - -#include - -#ifdef __APPLE_API_OBSOLETE -#define cpu_number() (0) -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* _BSD_I386_CPU_H_ */ diff --git a/bsd/i386/endian.h b/bsd/i386/endian.h index 2d9a1d3a1..3e42f8a8b 100644 --- a/bsd/i386/endian.h +++ b/bsd/i386/endian.h @@ -71,52 +71,25 @@ #define _QUAD_HIGHWORD 1 #define _QUAD_LOWWORD 0 -#if defined(KERNEL) || !defined(_POSIX_SOURCE) /* * Definitions for byte order, according to byte significance from low * address to high. */ -#define LITTLE_ENDIAN 1234 /* LSB first: i386, vax */ -#define BIG_ENDIAN 4321 /* MSB first: 68000, ibm, net */ -#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long */ +#define __DARWIN_LITTLE_ENDIAN 1234 /* LSB first: i386, vax */ +#define __DARWIN_BIG_ENDIAN 4321 /* MSB first: 68000, ibm, net */ +#define __DARWIN_PDP_ENDIAN 3412 /* LSB first in word, MSW first in long */ -#define BYTE_ORDER LITTLE_ENDIAN +#define __DARWIN_BYTE_ORDER __DARWIN_LITTLE_ENDIAN -#include +#if defined(KERNEL) || !defined(_POSIX_C_SOURCE) -__BEGIN_DECLS -unsigned long htonl __P((unsigned long)); -unsigned short htons __P((unsigned short)); -unsigned long ntohl __P((unsigned long)); -unsigned short ntohs __P((unsigned short)); -__END_DECLS +#define LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN +#define BIG_ENDIAN __DARWIN_BIG_ENDIAN +#define PDP_ENDIAN __DARWIN_PDP_ENDIAN -/* - * Macros for network/external number representation conversion. - */ -#if BYTE_ORDER == BIG_ENDIAN && !defined(lint) -#define ntohl(x) (x) -#define ntohs(x) (x) -#define htonl(x) (x) -#define htons(x) (x) - -#define NTOHL(x) (x) -#define NTOHS(x) (x) -#define HTONL(x) (x) -#define HTONS(x) (x) +#define BYTE_ORDER __DARWIN_BYTE_ORDER -#else -#include - -#define ntohl(x) NXSwapBigLongToHost(x) -#define ntohs(x) NXSwapBigShortToHost(x) -#define htonl(x) NXSwapHostLongToBig(x) -#define htons(x) NXSwapHostShortToBig(x) +#include -#define NTOHL(x) (x) = ntohl((u_long)x) -#define NTOHS(x) (x) = ntohs((u_short)x) -#define HTONL(x) (x) = htonl((u_long)x) -#define HTONS(x) (x) = htons((u_short)x) -#endif -#endif /* defined(KERNEL) || !defined(_POSIX_SOURCE) */ +#endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ #endif /* !_I386__ENDIAN_H_ */ diff --git a/bsd/i386/exec.h b/bsd/i386/exec.h index 237ecd5fc..882e9cd79 100644 --- a/bsd/i386/exec.h +++ b/bsd/i386/exec.h @@ -58,9 +58,7 @@ #define _BSD_I386_EXEC_H_ -#include - -#ifdef __APPLE_API_OBSOLETE +#ifdef BSD_KERNEL_PRIVATE /* Size of a page in an object file. */ #define __LDPGSZ 4096 @@ -111,11 +109,6 @@ struct exec { unsigned int a_drsize; /* data relocation size */ }; -/* - * Address of ps_strings structure (in user space). - */ -#define PS_STRINGS \ - ((struct ps_strings *)(USRSTACK - sizeof(struct ps_strings))) -#endif /* __APPLE_API_OBSOLETE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _BSD_I386_EXEC_H_ */ diff --git a/bsd/i386/label_t.h b/bsd/i386/label_t.h deleted file mode 100644 index f47065aff..000000000 --- a/bsd/i386/label_t.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Intel386 Family: For setjmp/longjmp (kernel version). - * - */ - -#ifndef _BSD_I386_LABEL_T_H_ -#define _BSD_I386_LABEL_T_H_ - -#include - -#ifdef __APPLE_API_OBSOLETE -typedef struct label_t { - int val[14]; -} label_t; -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* _BSD_I386_LABEL_T_H_ */ diff --git a/bsd/i386/param.h b/bsd/i386/param.h index f5e43d3f6..6be5ae90a 100644 --- a/bsd/i386/param.h +++ b/bsd/i386/param.h @@ -68,11 +68,11 @@ /* * Round p (pointer or byte index) up to a correctly-aligned value for all - * data types (int, long, ...). The result is u_int and must be cast to - * any desired pointer type. + * data types (int, long, ...). The result is unsigned int and must be + * cast to any desired pointer type. */ #define ALIGNBYTES 3 -#define ALIGN(p) (((u_int)(p) + ALIGNBYTES) &~ ALIGNBYTES) +#define ALIGN(p) (((unsigned int)(p) + ALIGNBYTES) &~ ALIGNBYTES) #define NBPG 4096 /* bytes/page */ #define PGOFSET (NBPG-1) /* byte offset into page */ @@ -83,8 +83,6 @@ #define BLKDEV_IOSIZE 2048 #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ -#define STACK_GROWS_UP 0 /* stack grows to lower addresses */ - #define CLSIZE 1 #define CLSIZELOG2 0 diff --git a/bsd/i386/reboot.h b/bsd/i386/reboot.h index dad563257..0724538c9 100644 --- a/bsd/i386/reboot.h +++ b/bsd/i386/reboot.h @@ -29,8 +29,7 @@ #include -#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Use most significant 16 bits to avoid collisions with @@ -46,7 +45,6 @@ #define RB_BOOTDOS 0x00800000 /* reboot into DOS */ #define RB_PRETTY 0x01000000 /* shutdown with pretty graphics */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _BSD_I386_REBOOT_H_ */ diff --git a/bsd/i386/setjmp.h b/bsd/i386/setjmp.h index b2584eb47..4d7ba1573 100644 --- a/bsd/i386/setjmp.h +++ b/bsd/i386/setjmp.h @@ -29,26 +29,39 @@ #define _BSD_I386_SETJMP_H #include -#include +#include -typedef struct sigcontext jmp_buf[1]; +/* + * _JBLEN is number of ints required to save the following: + * eax, ebx, ecx, edx, edi, esi, ebp, esp, ss, eflags, eip, + * cs, de, es, fs, gs == 16 ints + * onstack, mask = 2 ints + */ +#if defined(KERNEL) +typedef struct sigcontext jmp_buf[1]; #define _JBLEN ((sizeof(struct sigcontext)) / sizeof(int)) typedef int sigjmp_buf[_JBLEN+1]; +#else +#define _JBLEN (18) +typedef int jmp_buf[_JBLEN]; +typedef int sigjmp_buf[_JBLEN + 1]; +#endif + __BEGIN_DECLS -extern int setjmp __P((jmp_buf env)); -extern void longjmp __P((jmp_buf env, int val)); +extern int setjmp(jmp_buf env); +extern void longjmp(jmp_buf env, int val); #ifndef _ANSI_SOURCE -int sigsetjmp __P((sigjmp_buf env, int val)); -void siglongjmp __P((sigjmp_buf env, int val)); +int _setjmp(jmp_buf env); +void _longjmp(jmp_buf, int val); +int sigsetjmp(sigjmp_buf env, int val); +void siglongjmp(sigjmp_buf env, int val); #endif /* _ANSI_SOURCE */ -#if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE) -int _setjmp __P((jmp_buf env)); -void _longjmp __P((jmp_buf, int val)); -void longjmperror __P((void)); +#if !defined(_ANSI_SOURCE) && !defined(_POSIX_C_SOURCE) +void longjmperror(void); #endif /* neither ANSI nor POSIX */ __END_DECLS #endif /* !_BSD_I386_SETJMP_H */ diff --git a/bsd/i386/signal.h b/bsd/i386/signal.h index 3c73b16a9..d1316b5df 100644 --- a/bsd/i386/signal.h +++ b/bsd/i386/signal.h @@ -27,10 +27,14 @@ #ifndef _i386_SIGNAL_ #define _i386_SIGNAL_ 1 +#ifndef _ANSI_SOURCE +typedef int sig_atomic_t; + +#ifndef _POSIX_C_SOURCE + #include #ifdef __APPLE_API_OBSOLETE -typedef int sig_atomic_t; /* * Information pushed on stack when a signal is delivered. @@ -61,6 +65,8 @@ struct sigcontext { }; #endif /* __APPLE_API_OBSOLETE */ +#endif /* ! _POSIX_C_SOURCE */ +#endif /* ! _ANSI_SOURCE */ #endif /* _i386_SIGNAL_ */ diff --git a/bsd/i386/spl.h b/bsd/i386/spl.h deleted file mode 100644 index 0f6be5565..000000000 --- a/bsd/i386/spl.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _BSD_I386_SPL_H_ -#define _BSD_I386_SPL_H_ - -#ifdef KERNEL -#ifndef __ASSEMBLER__ -/* - * Machine-dependent SPL definitions. - * - */ -typedef unsigned spl_t; - -extern unsigned sploff(void); -extern unsigned splhigh(void); -extern unsigned splsched(void); -extern unsigned splclock(void); -extern unsigned splpower(void); -extern unsigned splvm(void); -extern unsigned splbio(void); -extern unsigned splimp(void); -extern unsigned spltty(void); -extern unsigned splnet(void); -extern unsigned splsoftclock(void); - -extern void spllo(void); -extern void splon(unsigned level); -extern void splx(unsigned level); -extern void spln(unsigned level); -#define splstatclock() splhigh() - -#endif /* __ASSEMBLER__ */ - -#endif - -#endif /* _BSD_I386_SPL_H_ */ diff --git a/bsd/i386/table.h b/bsd/i386/table.h deleted file mode 100644 index f55484ddc..000000000 --- a/bsd/i386/table.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1989 Next, Inc. - */ - -#ifndef _BSD_I386_TABLE_H_ -#define _BSD_I386_TABLE_H_ - -/* - * Empty file. - */ - -#endif /* _BSD_I386_TABLE_H_ */ diff --git a/bsd/i386/types.h b/bsd/i386/types.h index f370e9bf1..ab1c10837 100644 --- a/bsd/i386/types.h +++ b/bsd/i386/types.h @@ -61,26 +61,59 @@ #define _MACHTYPES_H_ #ifndef __ASSEMBLER__ +#include #include /* * Basic integral types. Omit the typedef if * not possible for a machine/compiler combination. */ +#ifndef _INT8_T +#define _INT8_T typedef __signed char int8_t; +#endif typedef unsigned char u_int8_t; +#ifndef _INT16_T +#define _INT16_T typedef short int16_t; +#endif typedef unsigned short u_int16_t; +#ifndef _INT32_T +#define _INT32_T typedef int int32_t; +#endif typedef unsigned int u_int32_t; +#ifndef _INT64_T +#define _INT64_T typedef long long int64_t; +#endif typedef unsigned long long u_int64_t; typedef int32_t register_t; -typedef long int intptr_t; +#ifndef _INTPTR_T +#define _INTPTR_T +typedef __darwin_intptr_t intptr_t; +#endif +#ifndef _UINTPTR_T +#define _UINTPTR_T typedef unsigned long int uintptr_t; +#endif +/* These types are used for reserving the largest possible size. */ +// LP64todo - typedef mach_vm_address_t user_addr_t; /* varying length pointers from user space */ +// LP64todo - typedef mach_vm_size_t user_size_t; /* varying length values from user space (unsigned) */ +typedef u_int32_t user_addr_t; +typedef u_int32_t user_size_t; +typedef int32_t user_ssize_t; +typedef int32_t user_long_t; +typedef u_int32_t user_ulong_t; +typedef int32_t user_time_t; +#define USER_ADDR_NULL ((user_addr_t) 0) +#define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)(a_ptr)) + +#ifndef __offsetof #define __offsetof(type, field) ((size_t)(&((type *)0)->field)) +#endif #endif /* __ASSEMBLER__ */ #endif /* _MACHTYPES_H_ */ diff --git a/bsd/i386/ucontext.h b/bsd/i386/ucontext.h index 8ba671a2f..7f7a04474 100644 --- a/bsd/i386/ucontext.h +++ b/bsd/i386/ucontext.h @@ -26,26 +26,42 @@ #include #include -#include - #ifdef __APPLE_API_UNSTABLE /* WARNING: THIS WILL CHANGE; DO NOT COUNT ON THIS */ /* Needs to be finalized as to what it should contain */ -struct mcontext { +#ifndef _POSIX_C_SOURCE +struct mcontext +#else /* _POSIX_C_SOURCE */ +struct __darwin_mcontext +#endif /* _POSIX_C_SOURCE */ +{ struct sigcontext sc; }; +#ifndef _POSIX_C_SOURCE #define I386_MCONTEXT_SIZE sizeof(struct mcontext) +#endif /* _POSIX_C_SOURCE */ + +#ifndef _MCONTEXT_T +#define _MCONTEXT_T +typedef __darwin_mcontext_t mcontext_t; +#endif -typedef struct mcontext * mcontext_t; +#ifndef _POSIX_C_SOURCE -struct mcontext64 { +struct mcontext64 +{ struct sigcontext sc; }; #define I386_MCONTEXT64_SIZE sizeof(struct mcontext64) +#ifndef _MCONTEXT64_T +#define _MCONTEXT64_T typedef struct mcontext64 * mcontext64_t; +#endif + +#endif /* _POSIX_C_SOURCE */ #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/i386/vmparam.h b/bsd/i386/vmparam.h index cba3d8b69..ffb77f55b 100644 --- a/bsd/i386/vmparam.h +++ b/bsd/i386/vmparam.h @@ -25,7 +25,7 @@ #include -#define USRSTACK 0xbfff9000 +#define USRSTACK (0xC0000000) /* * Virtual memory related constants, all in bytes diff --git a/bsd/isofs/cd9660/Makefile b/bsd/isofs/cd9660/Makefile index 27f7df03a..fe4e6d369 100644 --- a/bsd/isofs/cd9660/Makefile +++ b/bsd/isofs/cd9660/Makefile @@ -26,9 +26,9 @@ INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = isofs/cd9660 -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = -EXPORT_MI_DIR = isofs/cd9660 +EXPORT_MI_DIR = include $(MakeInc_rule) diff --git a/bsd/isofs/cd9660/cd9660_bmap.c b/bsd/isofs/cd9660/cd9660_bmap.c index f01787f5f..53cbb55d9 100644 --- a/bsd/isofs/cd9660/cd9660_bmap.c +++ b/bsd/isofs/cd9660/cd9660_bmap.c @@ -65,84 +65,15 @@ #include #include #include -#include #include #include #include -/* - * Bmap converts the logical block number of a file to its physical block - * number on the disk. The conversion is done by using the logical block - * number to index into the data block (extent) for the file. - */ -int -cd9660_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - struct iso_node *ip = VTOI(ap->a_vp); - daddr_t lblkno = ap->a_bn; - int bshift; - - /* - * Check for underlying vnode requests and ensure that logical - * to physical mapping is requested. - */ - if (ap->a_vpp != NULL) - *ap->a_vpp = ip->i_devvp; - if (ap->a_bnp == NULL) - return (0); - - /* - * Associated files have an Apple Double header - */ - if ((ip->i_flag & ISO_ASSOCIATED) && (lblkno > (ADH_BLKS - 1))) { - lblkno -= ADH_BLKS; - *ap->a_bnp = (ip->iso_start + lblkno); - if (ap->a_runp) - *ap->a_runp = 0; - return (0); - } - - /* - * Compute the requested block number - */ - bshift = ip->i_mnt->im_bshift; - *ap->a_bnp = (ip->iso_start + lblkno); - - /* - * Determine maximum number of readahead blocks following the - * requested block. - */ - if (ap->a_runp) { - int nblk; - - nblk = (ip->i_size >> bshift) - (lblkno + 1); - if (nblk <= 0) - *ap->a_runp = 0; - else if (nblk >= (MAXBSIZE >> bshift)) - *ap->a_runp = (MAXBSIZE >> bshift) - 1; - else - *ap->a_runp = nblk; - } - - return (0); -} /* blktooff converts a logical block number to a file offset */ int -cd9660_blktooff(ap) - struct vop_blktooff_args /* { - struct vnode *a_vp; - daddr_t a_lblkno; - off_t *a_offset; - } */ *ap; +cd9660_blktooff(struct vnop_blktooff_args *ap) { register struct iso_node *ip; register struct iso_mnt *imp; @@ -159,12 +90,7 @@ cd9660_blktooff(ap) /* offtoblk converts a file offset to a logical block number */ int -cd9660_offtoblk(ap) -struct vop_offtoblk_args /* { - struct vnode *a_vp; - off_t a_offset; - daddr_t *a_lblkno; - } */ *ap; +cd9660_offtoblk(struct vnop_offtoblk_args *ap) { register struct iso_node *ip; register struct iso_mnt *imp; @@ -175,20 +101,12 @@ struct vop_offtoblk_args /* { ip = VTOI(ap->a_vp); imp = ip->i_mnt; - *ap->a_lblkno = (daddr_t)lblkno(imp, ap->a_offset); + *ap->a_lblkno = (daddr64_t)lblkno(imp, ap->a_offset); return (0); } int -cd9660_cmap(ap) -struct vop_cmap_args /* { - struct vnode *a_vp; - off_t a_offset; - size_t a_size; - daddr_t *a_bpn; - size_t *a_run; - void *a_poff; -} */ *ap; +cd9660_blockmap(struct vnop_blockmap_args *ap) { struct iso_node *ip = VTOI(ap->a_vp); size_t cbytes; @@ -202,7 +120,7 @@ struct vop_cmap_args /* { if (ap->a_bpn == NULL) return (0); - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(ap->a_vp)); /* * Associated files have an Apple Double header @@ -211,14 +129,14 @@ struct vop_cmap_args /* { if (offset < ADH_SIZE) { if (ap->a_run) *ap->a_run = 0; - *ap->a_bpn = -1; + *ap->a_bpn = (daddr64_t)-1; goto out; } else { offset -= ADH_SIZE; } } - *ap->a_bpn = (daddr_t)(ip->iso_start + lblkno(ip->i_mnt, offset)); + *ap->a_bpn = (daddr64_t)(ip->iso_start + lblkno(ip->i_mnt, offset)); /* * Determine maximum number of contiguous bytes following the diff --git a/bsd/isofs/cd9660/cd9660_lookup.c b/bsd/isofs/cd9660/cd9660_lookup.c index 34aaadd58..2ecf2568e 100644 --- a/bsd/isofs/cd9660/cd9660_lookup.c +++ b/bsd/isofs/cd9660/cd9660_lookup.c @@ -115,12 +115,7 @@ struct nchstats iso_nchstats; * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. */ int -cd9660_lookup(ap) - struct vop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; +cd9660_lookup(struct vnop_lookup_args *ap) { register struct vnode *vdp; /* vnode for directory being searched */ register struct iso_node *dp; /* inode for directory being searched */ @@ -150,8 +145,8 @@ cd9660_lookup(ap) struct componentname *cnp = ap->a_cnp; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; - struct proc *p = cnp->cn_proc; - int devBlockSize=0; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); size_t altlen; bp = NULL; @@ -164,14 +159,6 @@ cd9660_lookup(ap) wantassoc = 0; - /* - * Check accessiblity of directory. - */ - if (vdp->v_type != VDIR) - return (ENOTDIR); - if ( (error = VOP_ACCESS(vdp, VEXEC, cnp->cn_cred, p)) ) - return (error); - /* * We now have a segment name to search for, and a directory to search. * @@ -180,48 +167,9 @@ cd9660_lookup(ap) * we are looking for is known already. */ if ((error = cache_lookup(vdp, vpp, cnp))) { - int vpid; /* capability number of vnode */ - if (error == ENOENT) return (error); - /* - * Get the next vnode in the path. - * See comment below starting `Step through' for - * an explaination of the locking protocol. - */ - pdp = vdp; - dp = VTOI(*vpp); - vdp = *vpp; - vpid = vdp->v_id; - if (pdp == vdp) { - VREF(vdp); - error = 0; - } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp, 0, p); - error = vget(vdp, LK_EXCLUSIVE | LK_RETRY, p); - if (!error && lockparent && (flags & ISLASTCN)) - error = VOP_LOCK(pdp, LK_EXCLUSIVE | LK_RETRY, p); - } else { - error = vget(vdp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); - } - /* - * Check that the capability number did not change - * while we were waiting for the lock. - */ - if (!error) { - if (vpid == vdp->v_id) - return (0); - vput(vdp); - if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); - } - if ( (error = VOP_LOCK(pdp, LK_EXCLUSIVE | LK_RETRY, p)) ) - return (error); - vdp = pdp; - dp = VTOI(pdp); - *vpp = NULL; + return (0); } len = cnp->cn_namelen; @@ -241,13 +189,13 @@ cd9660_lookup(ap) */ if ((imp->iso_ftype == ISO_FTYPE_JOLIET) && !((len == 1 && *name == '.') || (flags & ISDOTDOT))) { - int flags = UTF_PRECOMPOSED; + int flags1 = UTF_PRECOMPOSED; if (BYTE_ORDER != BIG_ENDIAN) - flags |= UTF_REVERSE_ENDIAN; + flags1 |= UTF_REVERSE_ENDIAN; (void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen, - sizeof(altname), 0, flags); + sizeof(altname), 0, flags1); name = altname; len = altlen; } @@ -272,7 +220,7 @@ cd9660_lookup(ap) dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = VOP_BLKATOFF(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp))) + (error = cd9660_blkatoff(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp))) return (error); numdirpasses = 2; iso_nchstats.ncs_2passes++; @@ -288,8 +236,8 @@ searchloop: */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) - brelse(bp); - if ( (error = VOP_BLKATOFF(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) ) + buf_brelse(bp); + if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) ) return (error); entryoffsetinblock = 0; } @@ -297,7 +245,7 @@ searchloop: * Get pointer to next entry. */ ep = (struct iso_directory_record *) - ((char *)bp->b_data + entryoffsetinblock); + ((char *)buf_dataptr(bp) + entryoffsetinblock); reclen = isonum_711(ep->length); if (reclen == 0) { @@ -328,7 +276,7 @@ searchloop: if (isoflags & directoryBit) ino = isodirino(ep, imp); else - ino = (bp->b_blkno << imp->im_bshift) + entryoffsetinblock; + ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; dp->i_ino = ino; cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp); if (namelen == cnp->cn_namelen @@ -357,14 +305,14 @@ searchloop: if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else - ino = (bp->b_blkno << imp->im_bshift) + entryoffsetinblock; + ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (imp->iso_ftype == ISO_FTYPE_JOLIET && !(res = ucsfncmp((u_int16_t*)name, len, (u_int16_t*) ep->name, namelen))) { if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else - ino = (bp->b_blkno << imp->im_bshift) + entryoffsetinblock; + ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (ino) goto foundino; @@ -387,13 +335,13 @@ foundino: if (lblkno(imp, dp->i_offset) != lblkno(imp, saveoffset)) { if (bp != NULL) - brelse(bp); - if ( (error = VOP_BLKATOFF(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) ) + buf_brelse(bp); + if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) ) return (error); } entryoffsetinblock = saveoffset & bmask; ep = (struct iso_directory_record *) - ((char *)bp->b_data + entryoffsetinblock); + ((char *)buf_dataptr(bp) + entryoffsetinblock); dp->i_offset = saveoffset; } goto found; @@ -410,20 +358,13 @@ notfound: goto searchloop; } if (bp != NULL) - brelse(bp); + buf_brelse(bp); /* * Insert name into cache (as non-existent) if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); - if (nameiop == CREATE || nameiop == RENAME) { - /* - * return EROFS (NOT EJUSTRETURN). The caller will then unlock - * the parent for us. - */ - return (EROFS); - } return (ENOENT); found: @@ -463,44 +404,26 @@ found: * it's a relocated directory. */ if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ - error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp, + error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, NULL, NULL, dp->i_ino != ino, ep, p); VTOI(tdp)->i_parent = VTOI(pdp)->i_number; - brelse(bp); - if (error) { - VOP_LOCK(pdp, LK_EXCLUSIVE | LK_RETRY, p); - return (error); - } - if (lockparent && (flags & ISLASTCN) && - (error = VOP_LOCK(pdp, LK_EXCLUSIVE | LK_RETRY, p))) { - vput(tdp); - return (error); - } + buf_brelse(bp); + *vpp = tdp; } else if (dp->i_number == dp->i_ino) { - brelse(bp); - VREF(vdp); /* we want ourself, ie "." */ + buf_brelse(bp); + vnode_get(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { - error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp, + error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, vdp, cnp, dp->i_ino != ino, ep, p); /* save parent inode number */ VTOI(tdp)->i_parent = VTOI(pdp)->i_number; - brelse(bp); + buf_brelse(bp); if (error) return (error); - if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); *vpp = tdp; } - - /* - * Insert name into cache if appropriate. - */ - if (cnp->cn_flags & MAKEENTRY) - cache_enter(vdp, *vpp, cnp); - return (0); } @@ -511,37 +434,32 @@ found: * remaining space in the directory. */ int -cd9660_blkatoff(ap) - struct vop_blkatoff_args /* { - struct vnode *a_vp; - off_t a_offset; - char **a_res; - struct buf **a_bpp; - } */ *ap; +cd9660_blkatoff(vnode_t vp, off_t offset, char **res, buf_t *bpp) { struct iso_node *ip; register struct iso_mnt *imp; - struct buf *bp; + buf_t bp; daddr_t lbn; int bsize, error; - ip = VTOI(ap->a_vp); + ip = VTOI(vp); imp = ip->i_mnt; - lbn = lblkno(imp, ap->a_offset); + lbn = lblkno(imp, offset); bsize = blksize(imp, ip, lbn); + if ((bsize != imp->im_sector_size) && - (ap->a_offset & (imp->im_sector_size - 1)) == 0) { + (offset & (imp->im_sector_size - 1)) == 0) { bsize = imp->im_sector_size; } - if ( (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) ) { - brelse(bp); - *ap->a_bpp = NULL; + if ( (error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), bsize, NOCRED, &bp)) ) { + buf_brelse(bp); + *bpp = NULL; return (error); } - if (ap->a_res) - *ap->a_res = (char *)bp->b_data + blkoff(imp, ap->a_offset); - *ap->a_bpp = bp; + if (res) + *res = (char *)buf_dataptr(bp) + blkoff(imp, offset); + *bpp = bp; return (0); } diff --git a/bsd/isofs/cd9660/cd9660_mount.h b/bsd/isofs/cd9660/cd9660_mount.h index 462b612ae..c673db1b1 100644 --- a/bsd/isofs/cd9660/cd9660_mount.h +++ b/bsd/isofs/cd9660/cd9660_mount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,6 +63,7 @@ #define __ISOFS_CD9660_CD9660_MOUNT_H__ #include +#include #ifdef __APPLE_API_UNSTABLE /* @@ -70,8 +71,9 @@ */ struct CDTOC; struct iso_args { +#ifndef KERNEL char *fspec; /* block special device to mount */ - struct export_args export; /* network export info */ +#endif int flags; /* mounting flags, see below */ int ssector; /* starting sector, 0 for 1st session */ int toc_length; /* Size of *toc, including the toc.length field */ @@ -83,5 +85,29 @@ struct iso_args { #define ISOFSMNT_NOJOLIET 0x00000008 /* disable Joliet Ext.*/ #define ISOFSMNT_TOC 0x00000010 /* iso_args.toc is valid */ +#ifdef KERNEL +/* LP64 version of iso_args. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with iso_args + */ +/* LP64todo - should this move? */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_iso_args { + int flags; /* mounting flags, see below */ + int ssector; /* starting sector, 0 for 1st session */ + int toc_length; /* Size of *toc, including the toc.length field */ + user_addr_t toc; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL */ + #endif /* __APPLE_API_UNSTABLE */ #endif /* __ISOFS_CD9660_CD9660_MOUNT_H__ */ diff --git a/bsd/isofs/cd9660/cd9660_node.c b/bsd/isofs/cd9660/cd9660_node.c index 5ff664592..4418c9147 100644 --- a/bsd/isofs/cd9660/cd9660_node.c +++ b/bsd/isofs/cd9660/cd9660_node.c @@ -107,7 +107,7 @@ extern u_char isonullname[]; * Initialize hash links for inodes and dnodes. */ int -cd9660_init() +cd9660_init(__unused struct vfsconf *cp) { isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash); @@ -122,10 +122,7 @@ cd9660_init() * Enter a new node into the device hash list */ struct iso_dnode * -iso_dmap(device, inum, create) - dev_t device; - ino_t inum; - int create; +iso_dmap(dev_t device, ino_t inum, int create) { register struct iso_dnode **dpp, *dp, *dq; @@ -154,8 +151,7 @@ iso_dmap(device, inum, create) } void -iso_dunmap(device) - dev_t device; +iso_dunmap(dev_t device) { struct iso_dnode **dpp, *dp, *dq; @@ -178,48 +174,60 @@ iso_dunmap(device) * to it. If it is in core, but locked, wait for it. */ struct vnode * -cd9660_ihashget(device, inum, p) - dev_t device; - ino_t inum; - struct proc *p; +cd9660_ihashget(dev_t device, ino_t inum, struct proc *p) { register struct iso_node *ip; struct vnode *vp; - - for (;;) - for (ip = isohashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) { - /* - * This is my most dangerous change. I am not waiting for - * the inode lock anymore (ufs doesn't, why should we) and - * I'm worried because there is not lock on the hashtable, - * but there wasn't before so I'll let it go for now. - * -- chw -- + uint32_t vid; + +retry: + for (ip = isohashtbl[INOHASH(device, inum)]; ip; ip = ip->i_next) { + if (inum == ip->i_number && device == ip->i_dev) { + + if (ISSET(ip->i_flag, ISO_INALLOC)) { + /* + * inode is being created... wait for it + * to be ready for consumption */ - vp = ITOV(ip); - simple_lock(&vp->v_interlock); - if (!vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p)) - return (vp); - break; + SET(ip->i_flag, ISO_INWALLOC); + tsleep((caddr_t)ip, PINOD, "cd9960_ihashget", 0); + goto retry; } + vp = ITOV(ip); + /* + * the vid needs to be grabbed before we drop + * lock protecting the hash + */ + vid = vnode_vid(vp); + + /* + * we currently depend on running under the FS funnel + * when we do proper locking and advertise ourselves + * as thread safe, we'll need a lock to protect the + * hash lookup... this is where we would drop it + */ + if (vnode_getwithvid(vp, vid)) { + /* + * If vnode is being reclaimed, or has + * already changed identity, no need to wait + */ + return (NULL); + } + return (vp); } - /* NOTREACHED */ + } + return (NULL); } /* * Insert the inode into the hash table, and return it locked. */ void -cd9660_ihashins(ip) - struct iso_node *ip; +cd9660_ihashins(struct iso_node *ip) { struct iso_node **ipp, *iq; - struct proc *p = current_proc(); /* lock the inode, then put it on the appropriate hash list */ - lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p); ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)]; if ((iq = *ipp)) @@ -227,14 +235,13 @@ cd9660_ihashins(ip) ip->i_next = iq; ip->i_prev = ipp; *ipp = ip; - } +} /* * Remove the inode from the hash table. */ void -cd9660_ihashrem(ip) - register struct iso_node *ip; +cd9660_ihashrem(register struct iso_node *ip) { register struct iso_node *iq; @@ -248,73 +255,53 @@ cd9660_ihashrem(ip) } /* - * Last reference to an inode, write the inode out and if necessary, - * truncate and deallocate the file. + * Last reference to an inode... if we're done with + * it, go ahead and recycle it for other use */ int -cd9660_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct proc *a_p; - } */ *ap; +cd9660_inactive(struct vnop_inactive_args *ap) { - struct vnode *vp = ap->a_vp; - struct proc *p = ap->a_p; - register struct iso_node *ip = VTOI(vp); - int error = 0; + vnode_t vp = ap->a_vp; + struct iso_node *ip = VTOI(vp); - if (prtactive && vp->v_usecount != 0) - vprint("cd9660_inactive: pushing active", vp); - /* - * We need to unlock the inode here. If we don't panics or - * hangs will ensue. Our callers expect us to take care of this. - */ - - VOP_UNLOCK(vp,0,p); - /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (vp->v_usecount == 0 && ip->inode.iso_mode == 0) - vgone(vp); + if (ip->inode.iso_mode == 0) + vnode_recycle(vp); - return error; + return 0; } /* * Reclaim an inode so that it can be used for other purposes. */ int -cd9660_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; - } */ *ap; +cd9660_reclaim(struct vnop_reclaim_args *ap) { - register struct vnode *vp = ap->a_vp; - register struct iso_node *ip = VTOI(vp); + vnode_t vp = ap->a_vp; + struct iso_node *ip = VTOI(vp); - if (prtactive && vp->v_usecount != 0) - vprint("cd9660_reclaim: pushing active", vp); + vnode_removefsref(vp); /* * Remove the inode from its hash chain. */ cd9660_ihashrem(ip); - /* - * Purge old data structures associated with the inode. - */ - cache_purge(vp); + if (ip->i_devvp) { - struct vnode *tvp = ip->i_devvp; + vnode_t devvp = ip->i_devvp; ip->i_devvp = NULL; - vrele(tvp); + vnode_rele(devvp); } + vnode_clearfsnode(vp); + if (ip->i_namep != isonullname) FREE(ip->i_namep, M_TEMP); if (ip->i_riff != NULL) FREE(ip->i_riff, M_TEMP); - FREE_ZONE(vp->v_data, sizeof(struct iso_node), M_ISOFSNODE); - vp->v_data = NULL; + FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); + return (0); } @@ -322,10 +309,8 @@ cd9660_reclaim(ap) * File attributes */ void -cd9660_defattr(isodir, inop, bp) - struct iso_directory_record *isodir; - struct iso_node *inop; - struct buf *bp; +cd9660_defattr(struct iso_directory_record *isodir, struct iso_node *inop, + struct buf *bp) { struct buf *bp2 = NULL; struct iso_mnt *imp; @@ -346,12 +331,11 @@ cd9660_defattr(isodir, inop, bp) if (!bp && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) { - VOP_BLKATOFF(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, - &bp2); + cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, &bp2); bp = bp2; } if (bp) { - ap = (struct iso_extended_attributes *)bp->b_data; + ap = (struct iso_extended_attributes *)buf_dataptr(bp); if (isonum_711(ap->version) == 1) { if (!(ap->perm[0]&0x40)) @@ -372,22 +356,20 @@ cd9660_defattr(isodir, inop, bp) ap = NULL; } if (!ap) { - inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6; - inop->inode.iso_uid = (uid_t)0; - inop->inode.iso_gid = (gid_t)0; + inop->inode.iso_mode |= VREAD|VWRITE|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6; + inop->inode.iso_uid = ISO_UNKNOWNUID; + inop->inode.iso_gid = ISO_UNKNOWNGID; } if (bp2) - brelse(bp2); + buf_brelse(bp2); } /* * Time stamps */ void -cd9660_deftstamp(isodir,inop,bp) - struct iso_directory_record *isodir; - struct iso_node *inop; - struct buf *bp; +cd9660_deftstamp(struct iso_directory_record *isodir, struct iso_node *inop, + struct buf *bp) { struct buf *bp2 = NULL; struct iso_mnt *imp; @@ -398,11 +380,11 @@ cd9660_deftstamp(isodir,inop,bp) && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) { - VOP_BLKATOFF(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, &bp2); + cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, &bp2); bp = bp2; } if (bp) { - ap = (struct iso_extended_attributes *)bp->b_data; + ap = (struct iso_extended_attributes *)buf_dataptr(bp); if (isonum_711(ap->version) == 1) { if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime)) @@ -420,16 +402,14 @@ cd9660_deftstamp(isodir,inop,bp) inop->inode.iso_mtime = inop->inode.iso_ctime; } if (bp2) - brelse(bp2); + buf_brelse(bp2); } int -cd9660_tstamp_conv7(pi,pu) - u_char *pi; - struct timespec *pu; +cd9660_tstamp_conv7(u_char *pi, struct timespec *pu) { int crtime, days; - int y, m, d, hour, minute, second, tz; + int y, m, d, hour, minute, second, mytz; y = pi[0] + 1900; m = pi[1]; @@ -437,7 +417,7 @@ cd9660_tstamp_conv7(pi,pu) hour = pi[3]; minute = pi[4]; second = pi[5]; - tz = pi[6]; + mytz = pi[6]; if (y < 1970) { pu->tv_sec = 0; @@ -458,8 +438,8 @@ cd9660_tstamp_conv7(pi,pu) crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second; /* timezone offset is unreliable on some disks */ - if (-48 <= tz && tz <= 52) - crtime -= tz * 15 * 60; + if (-48 <= mytz && mytz <= 52) + crtime -= mytz * 15 * 60; } pu->tv_sec = crtime; pu->tv_nsec = 0; @@ -467,9 +447,7 @@ cd9660_tstamp_conv7(pi,pu) } static u_int -cd9660_chars2ui(begin,len) - u_char *begin; - int len; +cd9660_chars2ui(u_char *begin, int len) { u_int rc; @@ -481,9 +459,7 @@ cd9660_chars2ui(begin,len) } int -cd9660_tstamp_conv17(pi,pu) - u_char *pi; - struct timespec *pu; +cd9660_tstamp_conv17(u_char *pi, struct timespec *pu) { u_char buf[7]; @@ -512,9 +488,7 @@ cd9660_tstamp_conv17(pi,pu) } ino_t -isodirino(isodir, imp) - struct iso_directory_record *isodir; - struct iso_mnt *imp; +isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp) { ino_t ino; diff --git a/bsd/isofs/cd9660/cd9660_node.h b/bsd/isofs/cd9660/cd9660_node.h index 68d471496..faa7450dd 100644 --- a/bsd/isofs/cd9660/cd9660_node.h +++ b/bsd/isofs/cd9660/cd9660_node.h @@ -75,6 +75,7 @@ */ #include +#include #include #ifndef doff_t @@ -121,7 +122,7 @@ struct iso_node { doff_t i_diroff; /* offset in dir, where we found last entry */ doff_t i_offset; /* offset of free space in directory */ ino_t i_ino; /* inode number of found directory */ - struct lock__bsd__ i_lock; /* Inode lock. */ + daddr_t i_lastr; /* last read (read ahead) */ long iso_extent; /* extent of file */ long i_size; long iso_start; /* actual start of data of file (may be different */ @@ -147,54 +148,65 @@ struct iso_node { /* These flags are kept in i_flag. */ #define ISO_ASSOCIATED 0x0001 /* node is an associated file. */ +#define ISO_INALLOC 0x0002 +#define ISO_INWALLOC 0x0004 + /* defines VTOI and ITOV macros */ #undef VTOI #undef ITOV -#define VTOI(vp) ((struct iso_node *)(vp)->v_data) +#define VTOI(vp) ((struct iso_node *)(vnode_fsnode(vp))) #define ITOV(ip) ((ip)->i_vnode) +/* similar in as default UID and GID */ +#define ISO_UNKNOWNUID ((uid_t)99) +#define ISO_UNKNOWNGID ((gid_t)99) + +int cd9660_access_internal(vnode_t, mode_t, kauth_cred_t); + /* * Prototypes for ISOFS vnode operations */ -int cd9660_lookup __P((struct vop_lookup_args *)); -int cd9660_open __P((struct vop_open_args *)); -int cd9660_close __P((struct vop_close_args *)); -int cd9660_access __P((struct vop_access_args *)); -int cd9660_getattr __P((struct vop_getattr_args *)); -int cd9660_read __P((struct vop_read_args *)); -int cd9660_xa_read __P((struct vop_read_args *)); -int cd9660_ioctl __P((struct vop_ioctl_args *)); -int cd9660_select __P((struct vop_select_args *)); -int cd9660_mmap __P((struct vop_mmap_args *)); -int cd9660_seek __P((struct vop_seek_args *)); -int cd9660_readdir __P((struct vop_readdir_args *)); -int cd9660_readlink __P((struct vop_readlink_args *)); -int cd9660_inactive __P((struct vop_inactive_args *)); -int cd9660_reclaim __P((struct vop_reclaim_args *)); -int cd9660_bmap __P((struct vop_bmap_args *)); -int cd9660_lock __P((struct vop_lock_args *)); -int cd9660_unlock __P((struct vop_unlock_args *)); -int cd9660_strategy __P((struct vop_strategy_args *)); -int cd9660_print __P((struct vop_print_args *)); -int cd9660_islocked __P((struct vop_islocked_args *)); -int cd9660_pathconf __P((struct vop_pathconf_args *)); -int cd9660_blkatoff __P((struct vop_blkatoff_args *)); - -void cd9660_defattr __P((struct iso_directory_record *, - struct iso_node *, struct buf *)); -void cd9660_deftstamp __P((struct iso_directory_record *, - struct iso_node *, struct buf *)); -struct vnode *cd9660_ihashget __P((dev_t, ino_t, struct proc *)); -void cd9660_ihashins __P((struct iso_node *)); -void cd9660_ihashrem __P((struct iso_node *)); -int cd9660_tstamp_conv7 __P((u_char *, struct timespec *)); -int cd9660_tstamp_conv17 __P((u_char *, struct timespec *)); -ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *)); +int cd9660_lookup (struct vnop_lookup_args *); +int cd9660_open (struct vnop_open_args *); +int cd9660_close (struct vnop_close_args *); +int cd9660_access (struct vnop_access_args *); +int cd9660_getattr (struct vnop_getattr_args *); +int cd9660_read (struct vnop_read_args *); +int cd9660_xa_read (struct vnop_read_args *); +int cd9660_ioctl (struct vnop_ioctl_args *); +int cd9660_select (struct vnop_select_args *); +int cd9660_mmap (struct vnop_mmap_args *); +int cd9660_readdir (struct vnop_readdir_args *); +int cd9660_readlink (struct vnop_readlink_args *); +int cd9660_inactive (struct vnop_inactive_args *); +int cd9660_reclaim (struct vnop_reclaim_args *); +int cd9660_strategy (struct vnop_strategy_args *); +int cd9660_pathconf (struct vnop_pathconf_args *); +int cd9660_enotsupp(void); +int cd9660_pagein(struct vnop_pagein_args *ap); +int cd9660_remove(struct vnop_remove_args *ap); +int cd9660_rmdir(struct vnop_rmdir_args *ap); +int cd9660_getattrlist(struct vnop_getattrlist_args *ap); + +__private_extern__ void cd9660_xa_init(struct iso_node *ip, + struct iso_directory_record *isodir); +__private_extern__ int cd9660_blkatoff (vnode_t, off_t, char **, buf_t *); + +void cd9660_defattr (struct iso_directory_record *, + struct iso_node *, struct buf *); +void cd9660_deftstamp (struct iso_directory_record *, + struct iso_node *, struct buf *); +struct vnode *cd9660_ihashget (dev_t, ino_t, struct proc *); +void cd9660_ihashins (struct iso_node *); +void cd9660_ihashrem (struct iso_node *); +int cd9660_tstamp_conv7 (u_char *, struct timespec *); +int cd9660_tstamp_conv17 (u_char *, struct timespec *); +ino_t isodirino (struct iso_directory_record *, struct iso_mnt *); #ifdef ISODEVMAP -struct iso_dnode *iso_dmap __P((dev_t, ino_t, int)); -void iso_dunmap __P((dev_t)); +struct iso_dnode *iso_dmap (dev_t, ino_t, int); +void iso_dunmap (dev_t); #endif #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/isofs/cd9660/cd9660_rrip.c b/bsd/isofs/cd9660/cd9660_rrip.c index 1481c728c..66d8e231f 100644 --- a/bsd/isofs/cd9660/cd9660_rrip.c +++ b/bsd/isofs/cd9660/cd9660_rrip.c @@ -89,9 +89,7 @@ * POSIX file attribute */ static int -cd9660_rrip_attr(p,ana) - ISO_RRIP_ATTR *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_attr(ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana) { ana->inop->inode.iso_mode = isonum_733(p->mode); ana->inop->inode.iso_uid = isonum_733(p->uid); @@ -102,9 +100,7 @@ cd9660_rrip_attr(p,ana) } static void -cd9660_rrip_defattr(isodir,ana) - struct iso_directory_record *isodir; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_defattr(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana) { /* But this is a required field! */ printf("RRIP without PX field?\n"); @@ -115,9 +111,7 @@ cd9660_rrip_defattr(isodir,ana) * Symbolic Links */ static int -cd9660_rrip_slink(p,ana) - ISO_RRIP_SLINK *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_slink(ISO_RRIP_SLINK *p, ISO_RRIP_ANALYZE *ana) { register ISO_RRIP_SLINK_COMPONENT *pcomp; register ISO_RRIP_SLINK_COMPONENT *pcompe; @@ -173,7 +167,7 @@ cd9660_rrip_slink(p,ana) /* same as above */ outbuf -= len; len = 0; - inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname; + inbuf = &(vfs_statfs(ana->imp->im_mountp)->f_mntonname); wlen = strlen(inbuf); break; @@ -226,9 +220,7 @@ cd9660_rrip_slink(p,ana) * Alternate name */ static int -cd9660_rrip_altname(p,ana) - ISO_RRIP_ALTNAME *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_altname(ISO_RRIP_ALTNAME *p, ISO_RRIP_ANALYZE *ana) { char *inbuf; int wlen; @@ -289,9 +281,7 @@ cd9660_rrip_altname(p,ana) } static void -cd9660_rrip_defname(isodir,ana) - struct iso_directory_record *isodir; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_defname(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana) { strcpy(ana->outbuf,".."); switch (*isodir->name) { @@ -313,9 +303,7 @@ cd9660_rrip_defname(isodir,ana) * Parent or Child Link */ static int -cd9660_rrip_pclink(p,ana) - ISO_RRIP_CLINK *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_pclink(ISO_RRIP_CLINK *p, ISO_RRIP_ANALYZE *ana) { *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift; ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK); @@ -325,10 +313,9 @@ cd9660_rrip_pclink(p,ana) /* * Relocated directory */ +/* ARGSUSED */ static int -cd9660_rrip_reldir(p,ana) - ISO_RRIP_RELDIR *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_reldir(__unused ISO_RRIP_RELDIR *p, ISO_RRIP_ANALYZE *ana) { /* special hack to make caller aware of RE field */ *ana->outlen = 0; @@ -337,9 +324,7 @@ cd9660_rrip_reldir(p,ana) } static int -cd9660_rrip_tstamp(p,ana) - ISO_RRIP_TSTAMP *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_tstamp(ISO_RRIP_TSTAMP *p, ISO_RRIP_ANALYZE *ana) { u_char *ptime; @@ -394,9 +379,8 @@ cd9660_rrip_tstamp(p,ana) } static void -cd9660_rrip_deftstamp(isodir,ana) - struct iso_directory_record *isodir; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_deftstamp(struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana) { cd9660_deftstamp(isodir,ana->inop,NULL); } @@ -405,9 +389,7 @@ cd9660_rrip_deftstamp(isodir,ana) * POSIX device modes */ static int -cd9660_rrip_device(p,ana) - ISO_RRIP_DEVICE *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_device(ISO_RRIP_DEVICE *p, ISO_RRIP_ANALYZE *ana) { u_int high, low; @@ -426,9 +408,7 @@ cd9660_rrip_device(p,ana) * Flag indicating */ static int -cd9660_rrip_idflag(p,ana) - ISO_RRIP_IDFLAG *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_idflag(ISO_RRIP_IDFLAG *p, ISO_RRIP_ANALYZE *ana) { ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */ /* special handling of RE field */ @@ -442,9 +422,7 @@ cd9660_rrip_idflag(p,ana) * Continuation pointer */ static int -cd9660_rrip_cont(p,ana) - ISO_RRIP_CONT *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_cont(ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana) { ana->iso_ce_blk = isonum_733(p->location); ana->iso_ce_off = isonum_733(p->offset); @@ -455,10 +433,9 @@ cd9660_rrip_cont(p,ana) /* * System Use end */ +/* ARGSUSED */ static int -cd9660_rrip_stop(p,ana) - ISO_SUSP_HEADER *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_stop(__unused ISO_SUSP_HEADER *p, __unused ISO_RRIP_ANALYZE *ana) { return ISO_SUSP_STOP; } @@ -467,9 +444,7 @@ cd9660_rrip_stop(p,ana) * Extension reference */ static int -cd9660_rrip_extref(p,ana) - ISO_RRIP_EXTREF *p; - ISO_RRIP_ANALYZE *ana; +cd9660_rrip_extref(ISO_RRIP_EXTREF *p, ISO_RRIP_ANALYZE *ana) { if (isonum_711(p->len_id) != 10 || bcmp((char *)p + 8,"RRIP_1991A",10) @@ -479,18 +454,19 @@ cd9660_rrip_extref(p,ana) return ISO_SUSP_EXTREF; } +typedef int (*rrip_table_func)(ISO_SUSP_HEADER *phead, ISO_RRIP_ANALYZE *ana); +typedef int (*rrip_table_func2)(struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana); typedef struct { char type[2]; - int (*func)(); - void (*func2)(); + rrip_table_func func; + rrip_table_func2 func2; int result; } RRIP_TABLE; static int -cd9660_rrip_loop(isodir,ana,table) - struct iso_directory_record *isodir; - ISO_RRIP_ANALYZE *ana; - RRIP_TABLE *table; +cd9660_rrip_loop(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana, + RRIP_TABLE *table) { register RRIP_TABLE *ptable; register ISO_SUSP_HEADER *phead; @@ -528,7 +504,7 @@ cd9660_rrip_loop(isodir,ana,table) for (ptable = table; ptable->func; ptable++) { if (*phead->type == *ptable->type && phead->type[1] == ptable->type[1]) { - result |= ptable->func(phead,ana); + result |= (ptable->func(phead,ana)); break; } } @@ -552,22 +528,22 @@ cd9660_rrip_loop(isodir,ana,table) if (ana->fields && ana->iso_ce_len) { if (ana->iso_ce_blk >= ana->imp->volume_space_size || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size - || bread(ana->imp->im_devvp, + || buf_bread(ana->imp->im_devvp, #if 1 // radar 1669467 - logical and physical blocksize are the same - ana->iso_ce_blk, + (daddr64_t)((unsigned)ana->iso_ce_blk), #else - ana->iso_ce_blk << (ana->imp->im_bshift - DEV_BSHIFT), + (daddr64_t)((unsigned)(ana->iso_ce_blk << (ana->imp->im_bshift - DEV_BSHIFT))), #endif // radar 1669467 ana->imp->logical_block_size, NOCRED, &bp)) /* what to do now? */ break; - phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off); + phead = (ISO_SUSP_HEADER *)((char *)buf_dataptr(bp) + ana->iso_ce_off); pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len); } else break; } if (bp) - brelse(bp); + buf_brelse(bp); /* * If we don't find the Basic SUSP stuffs, just set default value * (attribute/time stamp) @@ -583,20 +559,25 @@ cd9660_rrip_loop(isodir,ana,table) * Get Attributes. */ static RRIP_TABLE rrip_table_analyze[] = { - { "PX", cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR }, - { "TF", cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP }, - { "PN", cd9660_rrip_device, 0, ISO_SUSP_DEVICE }, - { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, - { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "PX", (rrip_table_func)cd9660_rrip_attr, + (rrip_table_func2)cd9660_rrip_defattr, + ISO_SUSP_ATTR }, + { "TF", (rrip_table_func)cd9660_rrip_tstamp, + (rrip_table_func2)cd9660_rrip_deftstamp, + ISO_SUSP_TSTAMP }, + { "PN", (rrip_table_func)cd9660_rrip_device, + 0, ISO_SUSP_DEVICE }, + { "RR", (rrip_table_func)cd9660_rrip_idflag, + 0, ISO_SUSP_IDFLAG }, + { "CE", (rrip_table_func)cd9660_rrip_cont, + 0, ISO_SUSP_CONT }, { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int -cd9660_rrip_analyze(isodir,inop,imp) - struct iso_directory_record *isodir; - struct iso_node *inop; - struct iso_mnt *imp; +cd9660_rrip_analyze(struct iso_directory_record *isodir, struct iso_node *inop, + struct iso_mnt *imp) { ISO_RRIP_ANALYZE analyze; @@ -611,23 +592,26 @@ cd9660_rrip_analyze(isodir,inop,imp) * Get Alternate Name. */ static RRIP_TABLE rrip_table_getname[] = { - { "NM", cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME }, - { "CL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, - { "PL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, - { "RE", cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR }, - { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, - { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "NM", (rrip_table_func)cd9660_rrip_altname, + (rrip_table_func2)cd9660_rrip_defname, + ISO_SUSP_ALTNAME }, + { "CL", (rrip_table_func)cd9660_rrip_pclink, + 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, + { "PL", (rrip_table_func)cd9660_rrip_pclink, + 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, + { "RE", (rrip_table_func)cd9660_rrip_reldir, + 0, ISO_SUSP_RELDIR }, + { "RR", (rrip_table_func)cd9660_rrip_idflag, + 0, ISO_SUSP_IDFLAG }, + { "CE", (rrip_table_func)cd9660_rrip_cont, + 0, ISO_SUSP_CONT }, { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int -cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp) - struct iso_directory_record *isodir; - char *outbuf; - u_short *outlen; - ino_t *inump; - struct iso_mnt *imp; +cd9660_rrip_getname(struct iso_directory_record *isodir, char *outbuf, + u_short *outlen, ino_t *inump, struct iso_mnt *imp) { ISO_RRIP_ANALYZE analyze; RRIP_TABLE *tab; @@ -656,19 +640,19 @@ cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp) * Get Symbolic Link. */ static RRIP_TABLE rrip_table_getsymname[] = { - { "SL", cd9660_rrip_slink, 0, ISO_SUSP_SLINK }, - { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, - { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "SL", (rrip_table_func)cd9660_rrip_slink, + 0, ISO_SUSP_SLINK }, + { "RR", (rrip_table_func)cd9660_rrip_idflag, + 0, ISO_SUSP_IDFLAG }, + { "CE", (rrip_table_func)cd9660_rrip_cont, + 0, ISO_SUSP_CONT }, { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int -cd9660_rrip_getsymname(isodir,outbuf,outlen,imp) - struct iso_directory_record *isodir; - char *outbuf; - u_short *outlen; - struct iso_mnt *imp; +cd9660_rrip_getsymname(struct iso_directory_record *isodir, char *outbuf, + u_short *outlen, struct iso_mnt *imp) { ISO_RRIP_ANALYZE analyze; @@ -684,8 +668,10 @@ cd9660_rrip_getsymname(isodir,outbuf,outlen,imp) } static RRIP_TABLE rrip_table_extref[] = { - { "ER", cd9660_rrip_extref, 0, ISO_SUSP_EXTREF }, - { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "ER", (rrip_table_func)cd9660_rrip_extref, + 0, ISO_SUSP_EXTREF }, + { "CE", (rrip_table_func)cd9660_rrip_cont, + 0, ISO_SUSP_CONT }, { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; @@ -695,9 +681,7 @@ static RRIP_TABLE rrip_table_extref[] = { * Note: We insist on the ER field. */ int -cd9660_rrip_offset(isodir,imp) - struct iso_directory_record *isodir; - struct iso_mnt *imp; +cd9660_rrip_offset(struct iso_directory_record *isodir, struct iso_mnt *imp) { ISO_RRIP_OFFSET *p; ISO_RRIP_ANALYZE analyze; diff --git a/bsd/isofs/cd9660/cd9660_util.c b/bsd/isofs/cd9660/cd9660_util.c index 2a3798e1d..a858d848b 100644 --- a/bsd/isofs/cd9660/cd9660_util.c +++ b/bsd/isofs/cd9660/cd9660_util.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include /* XXX */ @@ -95,9 +96,7 @@ * Note: Version number plus ';' may be omitted. */ int -isofncmp(fn, fnlen, isofn, isolen) - u_char *fn, *isofn; - int fnlen, isolen; +isofncmp(u_char *fn, int fnlen, u_char *isofn, int isolen) { int i, j; char c; @@ -160,11 +159,7 @@ isofncmp(fn, fnlen, isofn, isolen) */ int -ucsfncmp(fn, fnlen, ucsfn, ucslen) - u_int16_t *fn; - int fnlen; - u_int16_t *ucsfn; - int ucslen; +ucsfncmp(u_int16_t *fn, int fnlen, u_int16_t *ucsfn, int ucslen) { int i, j; u_int16_t c; @@ -216,12 +211,8 @@ ucsfncmp(fn, fnlen, ucsfn, ucslen) * translate a filename */ void -isofntrans(infn, infnlen, outfn, outfnlen, original, assoc) - u_char *infn, *outfn; - int infnlen; - u_short *outfnlen; - int original; - int assoc; +isofntrans(u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen, + int original, int assoc) { int fnidx = 0; @@ -266,13 +257,8 @@ isofntrans(infn, infnlen, outfn, outfnlen, original, assoc) * translate a UCS-2 filename to UTF-8 */ void -ucsfntrans(infn, infnlen, outfn, outfnlen, dir, assoc) - u_int16_t *infn; - int infnlen; - u_char *outfn; - u_short *outfnlen; - int dir; - int assoc; +ucsfntrans(u_int16_t *infn, int infnlen, u_char *outfn, u_short *outfnlen, + int dir, int assoc) { if (infnlen == 1) { strcpy(outfn, ".."); @@ -325,22 +311,19 @@ ucsfntrans(infn, infnlen, outfn, outfnlen, dir, assoc) * count the number of children by enumerating the directory */ static int -isochildcount(vdp, dircnt, filcnt) - struct vnode *vdp; - int *dircnt; - int *filcnt; +isochildcount(struct vnode *vdp, int *dircnt, int *filcnt) { struct iso_node *dp; struct buf *bp = NULL; struct iso_mnt *imp; struct iso_directory_record *ep; - u_long bmask; + uint32_t bmask; int error = 0; int reclen; int dirs, files; int blkoffset; int logblksize; - long diroffset; + int32_t diroffset; dp = VTOI(vdp); imp = dp->i_mnt; @@ -356,14 +339,14 @@ isochildcount(vdp, dircnt, filcnt) */ if ((diroffset & bmask) == 0) { if (bp != NULL) - brelse(bp); - if ( (error = VOP_BLKATOFF(vdp, SECTOFF(imp, diroffset), NULL, &bp)) ) + buf_brelse(bp); + if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, diroffset), NULL, &bp)) ) break; blkoffset = 0; } ep = (struct iso_directory_record *) - ((char *)bp->b_data + blkoffset); + (buf_dataptr(bp) + blkoffset); reclen = isonum_711(ep->length); if (reclen == 0) { @@ -399,7 +382,7 @@ isochildcount(vdp, dircnt, filcnt) } if (bp) - brelse (bp); + buf_brelse (bp); *dircnt = dirs; *filcnt = files; @@ -408,47 +391,33 @@ isochildcount(vdp, dircnt, filcnt) } -/* - * There are two ways to qualify for ownership rights on an object: - * - * 1. Your UID matches the UID of the vnode - * 2. You are root - * - */ -static int cd9660_owner_rights(uid_t owner, struct iso_mnt *imp, struct ucred *cred, struct proc *p, int invokesuperuserstatus) { - return ((cred->cr_uid == owner) || /* [1] */ - (invokesuperuserstatus && (suser(cred, &p->p_acflag) == 0))) ? 0 : EPERM; /* [2] */ -} - - - -static unsigned long DerivePermissionSummary(uid_t owner, gid_t group, mode_t obj_mode, struct iso_mnt *imp, struct ucred *cred, struct proc *p) { - register gid_t *gp; - unsigned long permissions; - int i; +static uint32_t +DerivePermissionSummary(uid_t owner, gid_t group, mode_t obj_mode, __unused struct iso_mnt *imp) +{ + kauth_cred_t cred = kauth_cred_get(); + uint32_t permissions; + int is_member; /* User id 0 (root) always gets access. */ - if (cred->cr_uid == 0) { + if (!suser(cred, NULL)) { permissions = R_OK | X_OK; goto Exit; }; /* Otherwise, check the owner. */ - if (cd9660_owner_rights(owner, imp, cred, p, 0) == 0) { - permissions = ((unsigned long)obj_mode & S_IRWXU) >> 6; + if (owner == kauth_cred_getuid(cred)) { + permissions = ((uint32_t)obj_mode & S_IRWXU) >> 6; goto Exit; } /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) { - if (group == *gp) { - permissions = ((unsigned long)obj_mode & S_IRWXG) >> 3; + if (kauth_cred_ismember_gid(cred, group, &is_member) == 0 && is_member) { + permissions = ((uint32_t)obj_mode & S_IRWXG) >> 3; goto Exit; - } - }; + } /* Otherwise, settle for 'others' access. */ - permissions = (unsigned long)obj_mode & S_IRWXO; + permissions = (uint32_t)obj_mode & S_IRWXO; Exit: return permissions & ~W_OK; /* Write access is always impossible */ @@ -460,6 +429,7 @@ attrcalcsize(struct attrlist *attrlist) { int size; attrgroup_t a; + boolean_t is_64_bit = proc_is64bit(current_proc()); #if ((ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | ATTR_CMN_OBJTYPE | \ ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | ATTR_CMN_PAROBJID | \ @@ -509,55 +479,80 @@ attrcalcsize(struct attrlist *attrlist) if (a & ATTR_CMN_OBJPERMANENTID) size += sizeof(fsobj_id_t); if (a & ATTR_CMN_PAROBJID) size += sizeof(fsobj_id_t); if (a & ATTR_CMN_SCRIPT) size += sizeof(text_encoding_t); - if (a & ATTR_CMN_CRTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_MODTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_CHGTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_ACCTIME) size += sizeof(struct timespec); - if (a & ATTR_CMN_BKUPTIME) size += sizeof(struct timespec); + if (a & ATTR_CMN_CRTIME) { + if (is_64_bit) + size += sizeof(struct user_timespec); + else + size += sizeof(struct timespec); + } + if (a & ATTR_CMN_MODTIME) { + if (is_64_bit) + size += sizeof(struct user_timespec); + else + size += sizeof(struct timespec); + } + if (a & ATTR_CMN_CHGTIME) { + if (is_64_bit) + size += sizeof(struct user_timespec); + else + size += sizeof(struct timespec); + } + if (a & ATTR_CMN_ACCTIME) { + if (is_64_bit) + size += sizeof(struct user_timespec); + else + size += sizeof(struct timespec); + } + if (a & ATTR_CMN_BKUPTIME) { + if (is_64_bit) + size += sizeof(struct user_timespec); + else + size += sizeof(struct timespec); + } if (a & ATTR_CMN_FNDRINFO) size += 32 * sizeof(u_int8_t); if (a & ATTR_CMN_OWNERID) size += sizeof(uid_t); if (a & ATTR_CMN_GRPID) size += sizeof(gid_t); - if (a & ATTR_CMN_ACCESSMASK) size += sizeof(u_long); - if (a & ATTR_CMN_NAMEDATTRCOUNT) size += sizeof(u_long); + if (a & ATTR_CMN_ACCESSMASK) size += sizeof(uint32_t); + if (a & ATTR_CMN_NAMEDATTRCOUNT) size += sizeof(uint32_t); if (a & ATTR_CMN_NAMEDATTRLIST) size += sizeof(struct attrreference); - if (a & ATTR_CMN_FLAGS) size += sizeof(u_long); - if (a & ATTR_CMN_USERACCESS) size += sizeof(u_long); + if (a & ATTR_CMN_FLAGS) size += sizeof(uint32_t); + if (a & ATTR_CMN_USERACCESS) size += sizeof(uint32_t); }; if ((a = attrlist->volattr) != 0) { - if (a & ATTR_VOL_FSTYPE) size += sizeof(u_long); - if (a & ATTR_VOL_SIGNATURE) size += sizeof(u_long); + if (a & ATTR_VOL_FSTYPE) size += sizeof(uint32_t); + if (a & ATTR_VOL_SIGNATURE) size += sizeof(uint32_t); if (a & ATTR_VOL_SIZE) size += sizeof(off_t); if (a & ATTR_VOL_SPACEFREE) size += sizeof(off_t); if (a & ATTR_VOL_SPACEAVAIL) size += sizeof(off_t); if (a & ATTR_VOL_MINALLOCATION) size += sizeof(off_t); if (a & ATTR_VOL_ALLOCATIONCLUMP) size += sizeof(off_t); - if (a & ATTR_VOL_IOBLOCKSIZE) size += sizeof(size_t); - if (a & ATTR_VOL_OBJCOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_FILECOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_DIRCOUNT) size += sizeof(u_long); - if (a & ATTR_VOL_MAXOBJCOUNT) size += sizeof(u_long); + if (a & ATTR_VOL_IOBLOCKSIZE) size += sizeof(uint32_t); + if (a & ATTR_VOL_OBJCOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_FILECOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_DIRCOUNT) size += sizeof(uint32_t); + if (a & ATTR_VOL_MAXOBJCOUNT) size += sizeof(uint32_t); if (a & ATTR_VOL_MOUNTPOINT) size += sizeof(struct attrreference); if (a & ATTR_VOL_NAME) size += sizeof(struct attrreference); - if (a & ATTR_VOL_MOUNTFLAGS) size += sizeof(u_long); + if (a & ATTR_VOL_MOUNTFLAGS) size += sizeof(uint32_t); if (a & ATTR_VOL_MOUNTEDDEVICE) size += sizeof(struct attrreference); if (a & ATTR_VOL_ENCODINGSUSED) size += sizeof(unsigned long long); if (a & ATTR_VOL_CAPABILITIES) size += sizeof(vol_capabilities_attr_t); if (a & ATTR_VOL_ATTRIBUTES) size += sizeof(vol_attributes_attr_t); }; if ((a = attrlist->dirattr) != 0) { - if (a & ATTR_DIR_LINKCOUNT) size += sizeof(u_long); - if (a & ATTR_DIR_ENTRYCOUNT) size += sizeof(u_long); - if (a & ATTR_DIR_MOUNTSTATUS) size += sizeof(u_long); + if (a & ATTR_DIR_LINKCOUNT) size += sizeof(uint32_t); + if (a & ATTR_DIR_ENTRYCOUNT) size += sizeof(uint32_t); + if (a & ATTR_DIR_MOUNTSTATUS) size += sizeof(uint32_t); }; if ((a = attrlist->fileattr) != 0) { - if (a & ATTR_FILE_LINKCOUNT) size += sizeof(u_long); + if (a & ATTR_FILE_LINKCOUNT) size += sizeof(uint32_t); if (a & ATTR_FILE_TOTALSIZE) size += sizeof(off_t); if (a & ATTR_FILE_ALLOCSIZE) size += sizeof(off_t); - if (a & ATTR_FILE_IOBLOCKSIZE) size += sizeof(size_t); - if (a & ATTR_FILE_CLUMPSIZE) size += sizeof(off_t); - if (a & ATTR_FILE_DEVTYPE) size += sizeof(u_long); - if (a & ATTR_FILE_FILETYPE) size += sizeof(u_long); - if (a & ATTR_FILE_FORKCOUNT) size += sizeof(u_long); + if (a & ATTR_FILE_IOBLOCKSIZE) size += sizeof(uint32_t); + if (a & ATTR_FILE_CLUMPSIZE) size += sizeof(uint32_t); + if (a & ATTR_FILE_DEVTYPE) size += sizeof(uint32_t); + if (a & ATTR_FILE_FILETYPE) size += sizeof(uint32_t); + if (a & ATTR_FILE_FORKCOUNT) size += sizeof(uint32_t); if (a & ATTR_FILE_FORKLIST) size += sizeof(struct attrreference); if (a & ATTR_FILE_DATALENGTH) size += sizeof(off_t); if (a & ATTR_FILE_DATAALLOCSIZE) size += sizeof(off_t); @@ -576,7 +571,7 @@ attrcalcsize(struct attrlist *attrlist) -void +static void packvolattr (struct attrlist *alist, struct iso_node *ip, /* ip for root directory */ void **attrbufptrptr, @@ -587,7 +582,8 @@ packvolattr (struct attrlist *alist, struct iso_mnt *imp; struct mount *mp; attrgroup_t a; - u_long attrlength; + uint32_t attrlength; + boolean_t is_64_bit = proc_is64bit(current_proc()); attrbufptr = *attrbufptrptr; varbufptr = *varbufptrptr; @@ -605,8 +601,8 @@ packvolattr (struct attrlist *alist, (u_int8_t *)varbufptr += attrlength + ((4 - (attrlength & 3)) & 3); ++((struct attrreference *)attrbufptr); }; - if (a & ATTR_CMN_DEVID) *((dev_t *)attrbufptr)++ = imp->im_devvp->v_rdev; - if (a & ATTR_CMN_FSID) *((fsid_t *)attrbufptr)++ = ITOV(ip)->v_mount->mnt_stat.f_fsid; + if (a & ATTR_CMN_DEVID) *((dev_t *)attrbufptr)++ = vnode_specrdev(imp->im_devvp); + if (a & ATTR_CMN_FSID) *((fsid_t *)attrbufptr)++ = vfs_statfs(vnode_mount(ITOV(ip)))->f_fsid; if (a & ATTR_CMN_OBJTYPE) *((fsobj_type_t *)attrbufptr)++ = 0; if (a & ATTR_CMN_OBJTAG) *((fsobj_tag_t *)attrbufptr)++ = VT_ISOFS; if (a & ATTR_CMN_OBJID) { @@ -625,10 +621,46 @@ packvolattr (struct attrlist *alist, ++((fsobj_id_t *)attrbufptr); }; if (a & ATTR_CMN_SCRIPT) *((text_encoding_t *)attrbufptr)++ = 0; - if (a & ATTR_CMN_CRTIME) *((struct timespec *)attrbufptr)++ = imp->creation_date; - if (a & ATTR_CMN_MODTIME) *((struct timespec *)attrbufptr)++ = imp->modification_date; - if (a & ATTR_CMN_CHGTIME) *((struct timespec *)attrbufptr)++ = imp->modification_date; - if (a & ATTR_CMN_ACCTIME) *((struct timespec *)attrbufptr)++ = imp->modification_date; + if (a & ATTR_CMN_CRTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) imp->creation_date.tv_sec; + tmpp->tv_nsec = imp->creation_date.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = imp->creation_date; + } + } + if (a & ATTR_CMN_MODTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) imp->modification_date.tv_sec; + tmpp->tv_nsec = imp->modification_date.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = imp->modification_date; + } + } + if (a & ATTR_CMN_CHGTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) imp->modification_date.tv_sec; + tmpp->tv_nsec = imp->modification_date.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = imp->modification_date; + } + } + if (a & ATTR_CMN_ACCTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) imp->modification_date.tv_sec; + tmpp->tv_nsec = imp->modification_date.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = imp->modification_date; + } + } if (a & ATTR_CMN_BKUPTIME) { ((struct timespec *)attrbufptr)->tv_sec = 0; ((struct timespec *)attrbufptr)->tv_nsec = 0; @@ -640,34 +672,32 @@ packvolattr (struct attrlist *alist, }; if (a & ATTR_CMN_OWNERID) *((uid_t *)attrbufptr)++ = ip->inode.iso_uid; if (a & ATTR_CMN_GRPID) *((gid_t *)attrbufptr)++ = ip->inode.iso_gid; - if (a & ATTR_CMN_ACCESSMASK) *((u_long *)attrbufptr)++ = (u_long)ip->inode.iso_mode; - if (a & ATTR_CMN_FLAGS) *((u_long *)attrbufptr)++ = 0; + if (a & ATTR_CMN_ACCESSMASK) *((uint32_t *)attrbufptr)++ = (uint32_t)ip->inode.iso_mode; + if (a & ATTR_CMN_FLAGS) *((uint32_t *)attrbufptr)++ = 0; if (a & ATTR_CMN_USERACCESS) { - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = DerivePermissionSummary(ip->inode.iso_uid, ip->inode.iso_gid, ip->inode.iso_mode, - imp, - current_proc()->p_ucred, - current_proc()); + imp); }; }; if ((a = alist->volattr) != 0) { off_t blocksize = (off_t)imp->logical_block_size; - if (a & ATTR_VOL_FSTYPE) *((u_long *)attrbufptr)++ = (u_long)imp->im_mountp->mnt_vfc->vfc_typenum; - if (a & ATTR_VOL_SIGNATURE) *((u_long *)attrbufptr)++ = (u_long)ISO9660SIGNATURE; + if (a & ATTR_VOL_FSTYPE) *((uint32_t *)attrbufptr)++ = (uint32_t)vfs_typenum(mp); + if (a & ATTR_VOL_SIGNATURE) *((uint32_t *)attrbufptr)++ = (uint32_t)ISO9660SIGNATURE; if (a & ATTR_VOL_SIZE) *((off_t *)attrbufptr)++ = (off_t)imp->volume_space_size * blocksize; if (a & ATTR_VOL_SPACEFREE) *((off_t *)attrbufptr)++ = 0; if (a & ATTR_VOL_SPACEAVAIL) *((off_t *)attrbufptr)++ = 0; if (a & ATTR_VOL_MINALLOCATION) *((off_t *)attrbufptr)++ = blocksize; if (a & ATTR_VOL_ALLOCATIONCLUMP) *((off_t *)attrbufptr)++ = blocksize; - if (a & ATTR_VOL_IOBLOCKSIZE) *((size_t *)attrbufptr)++ = blocksize; - if (a & ATTR_VOL_OBJCOUNT) *((u_long *)attrbufptr)++ = 0; - if (a & ATTR_VOL_FILECOUNT) *((u_long *)attrbufptr)++ = 0; - if (a & ATTR_VOL_DIRCOUNT) *((u_long *)attrbufptr)++ = 0; - if (a & ATTR_VOL_MAXOBJCOUNT) *((u_long *)attrbufptr)++ = 0xFFFFFFFF; + if (a & ATTR_VOL_IOBLOCKSIZE) *((uint32_t *)attrbufptr)++ = (uint32_t)blocksize; + if (a & ATTR_VOL_OBJCOUNT) *((uint32_t *)attrbufptr)++ = 0; + if (a & ATTR_VOL_FILECOUNT) *((uint32_t *)attrbufptr)++ = 0; + if (a & ATTR_VOL_DIRCOUNT) *((uint32_t *)attrbufptr)++ = 0; + if (a & ATTR_VOL_MAXOBJCOUNT) *((uint32_t *)attrbufptr)++ = 0xFFFFFFFF; if (a & ATTR_VOL_NAME) { attrlength = strlen( imp->volume_id ) + 1; ((struct attrreference *)attrbufptr)->attr_dataoffset = (u_int8_t *)varbufptr - (u_int8_t *)attrbufptr; @@ -678,13 +708,15 @@ packvolattr (struct attrlist *alist, (u_int8_t *)varbufptr += attrlength + ((4 - (attrlength & 3)) & 3); ++((struct attrreference *)attrbufptr); }; - if (a & ATTR_VOL_MOUNTFLAGS) *((u_long *)attrbufptr)++ = (u_long)imp->im_mountp->mnt_flag; + if (a & ATTR_VOL_MOUNTFLAGS) { + *((uint32_t *)attrbufptr)++ = (uint32_t)vfs_flags(mp); + } if (a & ATTR_VOL_MOUNTEDDEVICE) { ((struct attrreference *)attrbufptr)->attr_dataoffset = (u_int8_t *)varbufptr - (u_int8_t *)attrbufptr; - ((struct attrreference *)attrbufptr)->attr_length = strlen(mp->mnt_stat.f_mntfromname) + 1; + ((struct attrreference *)attrbufptr)->attr_length = strlen(vfs_statfs(mp)->f_mntfromname) + 1; attrlength = ((struct attrreference *)attrbufptr)->attr_length; attrlength = attrlength + ((4 - (attrlength & 3)) & 3); /* round up to the next 4-byte boundary: */ - (void) bcopy(mp->mnt_stat.f_mntfromname, varbufptr, attrlength); + (void) bcopy(vfs_statfs(mp)->f_mntfromname, varbufptr, attrlength); /* Advance beyond the space just allocated: */ (u_int8_t *)varbufptr += attrlength; @@ -716,7 +748,8 @@ packvolattr (struct attrlist *alist, VOL_CAP_FMT_ZERO_RUNS | VOL_CAP_FMT_CASE_SENSITIVE | VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS; + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; ((vol_capabilities_attr_t *)attrbufptr)->valid[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | @@ -764,7 +797,8 @@ packcommonattr (struct attrlist *alist, void *attrbufptr; void *varbufptr; attrgroup_t a; - u_long attrlength; + uint32_t attrlength; + boolean_t is_64_bit = proc_is64bit(current_proc()); attrbufptr = *attrbufptrptr; varbufptr = *varbufptrptr; @@ -774,7 +808,7 @@ packcommonattr (struct attrlist *alist, if (a & ATTR_CMN_NAME) { /* special case root since we know how to get it's name */ - if (ITOV(ip)->v_flag & VROOT) { + if (vnode_isvroot(ITOV(ip))) { attrlength = strlen( imp->volume_id ) + 1; (void) strncpy((unsigned char *)varbufptr, imp->volume_id, attrlength); } else { @@ -789,11 +823,11 @@ packcommonattr (struct attrlist *alist, ++((struct attrreference *)attrbufptr); }; if (a & ATTR_CMN_DEVID) *((dev_t *)attrbufptr)++ = ip->i_dev; - if (a & ATTR_CMN_FSID) *((fsid_t *)attrbufptr)++ = ITOV(ip)->v_mount->mnt_stat.f_fsid; - if (a & ATTR_CMN_OBJTYPE) *((fsobj_type_t *)attrbufptr)++ = ITOV(ip)->v_type; - if (a & ATTR_CMN_OBJTAG) *((fsobj_tag_t *)attrbufptr)++ = ITOV(ip)->v_tag; + if (a & ATTR_CMN_FSID) *((fsid_t *)attrbufptr)++ = vfs_statfs(vnode_mount(ITOV(ip)))->f_fsid; + if (a & ATTR_CMN_OBJTYPE) *((fsobj_type_t *)attrbufptr)++ = vnode_vtype(ITOV(ip)); + if (a & ATTR_CMN_OBJTAG) *((fsobj_tag_t *)attrbufptr)++ = vnode_tag(ITOV(ip)); if (a & ATTR_CMN_OBJID) { - if (ITOV(ip)->v_flag & VROOT) + if (vnode_isvroot(ITOV(ip))) ((fsobj_id_t *)attrbufptr)->fid_objno = 2; /* force root to be 2 */ else ((fsobj_id_t *)attrbufptr)->fid_objno = ip->i_number; @@ -801,7 +835,7 @@ packcommonattr (struct attrlist *alist, ++((fsobj_id_t *)attrbufptr); }; if (a & ATTR_CMN_OBJPERMANENTID) { - if (ITOV(ip)->v_flag & VROOT) + if (vnode_isvroot(ITOV(ip))) ((fsobj_id_t *)attrbufptr)->fid_objno = 2; /* force root to be 2 */ else ((fsobj_id_t *)attrbufptr)->fid_objno = ip->i_number; @@ -822,22 +856,67 @@ packcommonattr (struct attrlist *alist, ++((fsobj_id_t *)attrbufptr); }; if (a & ATTR_CMN_SCRIPT) *((text_encoding_t *)attrbufptr)++ = 0; - if (a & ATTR_CMN_CRTIME) *((struct timespec *)attrbufptr)++ = ip->inode.iso_mtime; - if (a & ATTR_CMN_MODTIME) *((struct timespec *)attrbufptr)++ = ip->inode.iso_mtime; - if (a & ATTR_CMN_CHGTIME) *((struct timespec *)attrbufptr)++ = ip->inode.iso_ctime; - if (a & ATTR_CMN_ACCTIME) *((struct timespec *)attrbufptr)++ = ip->inode.iso_atime; + if (a & ATTR_CMN_CRTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) ip->inode.iso_mtime.tv_sec; + tmpp->tv_nsec = ip->inode.iso_mtime.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = ip->inode.iso_mtime; + } + } + if (a & ATTR_CMN_MODTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) ip->inode.iso_mtime.tv_sec; + tmpp->tv_nsec = ip->inode.iso_mtime.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = ip->inode.iso_mtime; + } + } + if (a & ATTR_CMN_CHGTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) ip->inode.iso_ctime.tv_sec; + tmpp->tv_nsec = ip->inode.iso_ctime.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = ip->inode.iso_ctime; + } + } + if (a & ATTR_CMN_ACCTIME) { + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) ip->inode.iso_atime.tv_sec; + tmpp->tv_nsec = ip->inode.iso_atime.tv_nsec; + } + else { + *((struct timespec *)attrbufptr)++ = ip->inode.iso_atime; + } + } if (a & ATTR_CMN_BKUPTIME) { - ((struct timespec *)attrbufptr)->tv_sec = 0; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - ++((struct timespec *)attrbufptr); - }; + if (is_64_bit) { + struct user_timespec *tmpp = ((struct user_timespec *)attrbufptr)++; + tmpp->tv_sec = (user_time_t) 0; + tmpp->tv_nsec = 0; + } + else { + ((struct timespec *)attrbufptr)->tv_sec = 0; + ((struct timespec *)attrbufptr)->tv_nsec = 0; + ++((struct timespec *)attrbufptr); + *((struct timespec *)attrbufptr)++ = ip->inode.iso_atime; + } + } if (a & ATTR_CMN_FNDRINFO) { - struct finder_info finfo = {0}; + struct finder_info finfo; + bzero(&finfo, sizeof(finfo)); finfo.fdFlags = ip->i_FinderFlags; finfo.fdLocation.v = -1; finfo.fdLocation.h = -1; - if (ITOV(ip)->v_type == VREG) { + if (vnode_isreg(ITOV(ip))) { finfo.fdType = ip->i_FileType; finfo.fdCreator = ip->i_Creator; } @@ -848,16 +927,14 @@ packcommonattr (struct attrlist *alist, }; if (a & ATTR_CMN_OWNERID) *((uid_t *)attrbufptr)++ = ip->inode.iso_uid; if (a & ATTR_CMN_GRPID) *((gid_t *)attrbufptr)++ = ip->inode.iso_gid; - if (a & ATTR_CMN_ACCESSMASK) *((u_long *)attrbufptr)++ = (u_long)ip->inode.iso_mode; - if (a & ATTR_CMN_FLAGS) *((u_long *)attrbufptr)++ = 0; /* could also use ip->i_flag */ + if (a & ATTR_CMN_ACCESSMASK) *((uint32_t *)attrbufptr)++ = (uint32_t)ip->inode.iso_mode; + if (a & ATTR_CMN_FLAGS) *((uint32_t *)attrbufptr)++ = 0; /* could also use ip->i_flag */ if (a & ATTR_CMN_USERACCESS) { - *((u_long *)attrbufptr)++ = + *((uint32_t *)attrbufptr)++ = DerivePermissionSummary(ip->inode.iso_uid, ip->inode.iso_gid, ip->inode.iso_mode, - imp, - current_proc()->p_ucred, - current_proc()); + imp); }; }; @@ -870,7 +947,7 @@ void packdirattr(struct attrlist *alist, struct iso_node *ip, void **attrbufptrptr, - void **varbufptrptr) + __unused void **varbufptrptr) { void *attrbufptr; attrgroup_t a; @@ -880,7 +957,7 @@ packdirattr(struct attrlist *alist, filcnt = dircnt = 0; a = alist->dirattr; - if ((ITOV(ip)->v_type == VDIR) && (a != 0)) { + if (vnode_isdir(ITOV(ip)) && (a != 0)) { /* * if we haven't counted our children yet, do it now... */ @@ -895,17 +972,17 @@ packdirattr(struct attrlist *alist, } if (a & ATTR_DIR_LINKCOUNT) { - *((u_long *)attrbufptr)++ = ip->inode.iso_links; + *((uint32_t *)attrbufptr)++ = ip->inode.iso_links; } if (a & ATTR_DIR_ENTRYCOUNT) { /* exclude '.' and '..' from total caount */ - *((u_long *)attrbufptr)++ = ((ip->i_entries <= 2) ? 0 : (ip->i_entries - 2)); + *((uint32_t *)attrbufptr)++ = ((ip->i_entries <= 2) ? 0 : (ip->i_entries - 2)); } if (a & ATTR_DIR_MOUNTSTATUS) { - if (ITOV(ip)->v_mountedhere) { - *((u_long *)attrbufptr)++ = DIR_MNTSTATUS_MNTPOINT; + if (vnode_mountedhere(ITOV(ip))) { + *((uint32_t *)attrbufptr)++ = DIR_MNTSTATUS_MNTPOINT; } else { - *((u_long *)attrbufptr)++ = 0; + *((uint32_t *)attrbufptr)++ = 0; }; }; }; @@ -924,19 +1001,19 @@ packfileattr(struct attrlist *alist, void *varbufptr = *varbufptrptr; attrgroup_t a = alist->fileattr; - if ((ITOV(ip)->v_type == VREG) && (a != 0)) { + if (vnode_isreg(ITOV(ip)) && (a != 0)) { if (a & ATTR_FILE_LINKCOUNT) - *((u_long *)attrbufptr)++ = ip->inode.iso_links; + *((uint32_t *)attrbufptr)++ = ip->inode.iso_links; if (a & ATTR_FILE_TOTALSIZE) *((off_t *)attrbufptr)++ = (off_t)ip->i_size; if (a & ATTR_FILE_ALLOCSIZE) *((off_t *)attrbufptr)++ = (off_t)ip->i_size; if (a & ATTR_FILE_IOBLOCKSIZE) - *((u_long *)attrbufptr)++ = ip->i_mnt->logical_block_size; + *((uint32_t *)attrbufptr)++ = ip->i_mnt->logical_block_size; if (a & ATTR_FILE_CLUMPSIZE) - *((u_long *)attrbufptr)++ = ip->i_mnt->logical_block_size; + *((uint32_t *)attrbufptr)++ = ip->i_mnt->logical_block_size; if (a & ATTR_FILE_DEVTYPE) - *((u_long *)attrbufptr)++ = (u_long)ip->inode.iso_rdev; + *((uint32_t *)attrbufptr)++ = (uint32_t)ip->inode.iso_rdev; if (a & ATTR_FILE_DATALENGTH) *((off_t *)attrbufptr)++ = (off_t)ip->i_size; if (a & ATTR_FILE_DATAALLOCSIZE) @@ -965,7 +1042,7 @@ packattrblk(struct attrlist *alist, } else { packcommonattr(alist, ip, attrbufptrptr, varbufptrptr); - switch (ITOV(ip)->v_type) { + switch (vnode_vtype(ITOV(ip))) { case VDIR: packdirattr(alist, ip, attrbufptrptr, varbufptrptr); break; diff --git a/bsd/isofs/cd9660/cd9660_vfsops.c b/bsd/isofs/cd9660/cd9660_vfsops.c index a6e51cc5c..f026c811f 100644 --- a/bsd/isofs/cd9660/cd9660_vfsops.c +++ b/bsd/isofs/cd9660/cd9660_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,10 +63,11 @@ #include #include -#include +#include #include #include #include +#include #include #include #include @@ -119,15 +120,13 @@ struct CDTOC { u_char isonullname[] = "\0"; -extern int enodev (); - struct vfsops cd9660_vfsops = { cd9660_mount, cd9660_start, cd9660_unmount, cd9660_root, - cd9660_quotactl, - cd9660_statfs, + NULL, /* quotactl */ + cd9660_vfs_getattr, cd9660_sync, cd9660_vget, cd9660_fhtovp, @@ -143,8 +142,8 @@ struct vfsops cd9660_vfsops = { */ #define ROOTNAME "root_device" -static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, - struct proc *p, struct iso_args *argp)); +static int iso_mountfs(struct vnode *devvp, struct mount *mp, struct user_iso_args *argp, + vfs_context_t context); static void DRGetTypeCreatorAndFlags( struct iso_mnt * theMountPointPtr, @@ -153,67 +152,22 @@ static void DRGetTypeCreatorAndFlags( u_int32_t * theCreatorPtr, u_int16_t * theFlagsPtr); -int cd9660_vget_internal( - struct mount *mp, - ino_t ino, - struct vnode **vpp, - int relocated, - struct iso_directory_record *isodir, - struct proc *p); - int -cd9660_mountroot() +cd9660_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) { - register struct mount *mp; - extern struct vnode *rootvp; - struct proc *p = current_proc(); /* XXX */ - struct iso_mnt *imp; - size_t size; - int error; - struct iso_args args; - - /* - * Get vnodes for swapdev and rootdev. - */ - if ( bdevvp(rootdev, &rootvp)) - panic("cd9660_mountroot: can't setup bdevvp's"); + int error; + struct user_iso_args args; - MALLOC_ZONE(mp, struct mount *, - sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - - mp->mnt_op = &cd9660_vfsops; - mp->mnt_flag = MNT_RDONLY; - LIST_INIT(&mp->mnt_vnodelist); args.flags = ISOFSMNT_ROOT; args.ssector = 0; - args.fspec = 0; args.toc_length = 0; - args.toc = 0; - if ((error = iso_mountfs(rootvp, mp, p, &args))) { - vrele(rootvp); /* release the reference from bdevvp() */ + args.toc = USER_ADDR_NULL; - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); + if ((error = iso_mountfs(rvp, mp, &args, context))) return (error); - } - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - mp->mnt_vnodecovered = NULLVP; - imp = VFSTOISOFS(mp); - (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)cd9660_statfs(mp, &mp->mnt_stat, p); + + (void)cd9660_statfs(mp, vfs_statfs(mp), context); + return (0); } @@ -223,72 +177,51 @@ cd9660_mountroot() * mount system call */ int -cd9660_mount(mp, path, data, ndp, p) - register struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +cd9660_mount(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t context) { - struct vnode *devvp; - struct iso_args args; - size_t size; + struct user_iso_args args; int error; struct iso_mnt *imp = NULL; - - if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) + + if (vfs_context_is64bit(context)) { + error = copyin(data, (caddr_t)&args, sizeof (args)); + } + else { + struct iso_args temp; + error = copyin(data, (caddr_t)&temp, sizeof (temp)); + args.flags = temp.flags; + args.ssector = temp.ssector; + args.toc_length = temp.toc_length; + args.toc = CAST_USER_ADDR_T(temp.toc); + } + if (error) return (error); - if ((mp->mnt_flag & MNT_RDONLY) == 0) + if (vfs_isrdwr(mp)) return (EROFS); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ - if (mp->mnt_flag & MNT_UPDATE) { + if (vfs_isupdate(mp)) { imp = VFSTOISOFS(mp); - if (args.fspec == 0) - return (vfs_export(mp, &imp->im_export, &args.export)); - } - /* - * Not an update, or updating the name: look up the name - * and verify that it refers to a sensible block device. - */ - NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); - if ((error = namei(ndp))) - return (error); - devvp = ndp->ni_vp; - - if (devvp->v_type != VBLK) { - vrele(devvp); - return (ENOTBLK); - } - if (major(devvp->v_rdev) >= nblkdev) { - vrele(devvp); - return (ENXIO); + if (devvp == 0) + return (0); } - if ((mp->mnt_flag & MNT_UPDATE) == 0) - error = iso_mountfs(devvp, mp, p, &args); + if ( !vfs_isupdate(mp)) + error = iso_mountfs(devvp, mp, &args, context); else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ - else - vrele(devvp); } if (error) { - vrele(devvp); return (error); } /* Indicate that we don't support volfs */ - mp->mnt_flag &= ~MNT_DOVOLFS; + vfs_clearflags(mp, MNT_DOVOLFS); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); return (0); } @@ -305,23 +238,23 @@ cd9660_mount(mp, path, data, ndp, p) * device's path. It is assumed to be in user memory. */ static struct vnode * -cd9660_phys_device(char *path, struct proc *p) +cd9660_phys_device(mount_t mp, vfs_context_t context) { int err; - char *whole_path = NULL; // path to "whole" device + char whole_path[64]; // path to "whole" device char *s, *saved; struct nameidata nd; struct vnode *result; - size_t actual_size; + struct vfsstatfs * sfs; - if (path == NULL) - return NULL; - + sfs = vfs_statfs(mp); result = NULL; + if (strlen(sfs->f_mntfromname) >= sizeof(whole_path)) + return (NULL); + /* Make a copy of the mount from name, then remove trailing "s...". */ - MALLOC(whole_path, char *, MNAMELEN, M_ISOFSMNT, M_WAITOK); - copyinstr(path, whole_path, MNAMELEN-1, &actual_size); + strncpy(whole_path, sfs->f_mntfromname, sizeof(whole_path)-1); /* * I would use strrchr or rindex here, but those are declared __private_extern__, @@ -333,25 +266,23 @@ cd9660_phys_device(char *path, struct proc *p) *saved = '\0'; /* Lookup the "whole" device. */ - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, whole_path, p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(whole_path), context); err = namei(&nd); if (err) { printf("isofs: Cannot find physical device: %s\n", whole_path); goto done; } - + nameidone(&nd); + /* Open the "whole" device. */ - err = VOP_OPEN(nd.ni_vp, FREAD, FSCRED, p); + err = VNOP_OPEN(nd.ni_vp, FREAD, context); if (err) { - vrele(nd.ni_vp); + vnode_put(nd.ni_vp); printf("isofs: Cannot open physical device: %s\n", whole_path); goto done; } - result = nd.ni_vp; - done: - FREE(whole_path, M_ISOFSMNT); return result; } @@ -366,41 +297,42 @@ static int cd9660_find_video_dir(struct iso_mnt *isomp) { int result, err; - struct vnode *rootvp = NULL; + struct vnode *rvp = NULL; struct vnode *videovp = NULL; struct componentname cn; + struct vfs_context context; char dirname[] = "MPEGAV"; result = 0; /* Assume not a video CD */ - err = cd9660_root(isomp->im_mountp, &rootvp); + err = cd9660_root(isomp->im_mountp, &rvp, NULL); if (err) { printf("cd9660_find_video_dir: cd9660_root failed (%d)\n", err); return 0; /* couldn't find video dir */ } + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + cn.cn_nameiop = LOOKUP; - cn.cn_flags = LOCKPARENT|ISLASTCN; - cn.cn_proc = current_proc(); - cn.cn_cred = cn.cn_proc->p_ucred; + cn.cn_flags = ISLASTCN; + cn.cn_context = &context; cn.cn_pnbuf = dirname; cn.cn_pnlen = sizeof(dirname)-1; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_namelen = cn.cn_pnlen; - err = VOP_LOOKUP(rootvp, &videovp, &cn); + err = VNOP_LOOKUP(rvp, &videovp, &cn, &context); if (err == 0) { struct iso_node *ip = VTOI(videovp); result = 1; /* Looks like video CD */ isomp->video_dir_start = ip->iso_start; isomp->video_dir_end = ip->iso_start + (ip->i_size >> isomp->im_bshift); isomp->im_flags2 |= IMF2_IS_VCD; - } - if (videovp != NULL) - vput(videovp); - if (rootvp != NULL) - vput(rootvp); + vnode_put(videovp); + } + vnode_put(rvp); return result; } @@ -409,20 +341,19 @@ cd9660_find_video_dir(struct iso_mnt *isomp) * Common code for mount and mountroot */ static int -iso_mountfs(devvp, mp, p, argp) +iso_mountfs(devvp, mp, argp, context) register struct vnode *devvp; struct mount *mp; - struct proc *p; - struct iso_args *argp; + struct user_iso_args *argp; + vfs_context_t context; { + struct proc *p; register struct iso_mnt *isomp = (struct iso_mnt *)0; struct buf *bp = NULL; struct buf *pribp = NULL, *supbp = NULL; - dev_t dev = devvp->v_rdev; + dev_t dev = vnode_specrdev(devvp); int error = EINVAL; int breaderr = 0; - int needclose = 0; - extern struct vnode *rootvp; u_long iso_bsize; int iso_blknum; int joliet_level; @@ -434,26 +365,9 @@ iso_mountfs(devvp, mp, p, argp) u_int8_t vdtype; int blkoff = argp->ssector; - if (!(mp->mnt_flag & MNT_RDONLY)) + if (vfs_isrdwr(mp)) return (EROFS); - /* - * Disallow multiple mounts of the same device. - * Disallow mounting of a device that is currently in use - * (except for root, which might share swap device for miniroot). - * Flush out any old buffers remaining from a previous use. - */ - if ((error = vfs_mountedon(devvp))) - return (error); - if (vcount(devvp) > 1 && devvp != rootvp) - return (EBUSY); - if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) - return (error); - - if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p))) - return (error); - needclose = 1; - /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. For now, we'll just use a constant. @@ -461,24 +375,24 @@ iso_mountfs(devvp, mp, p, argp) iso_bsize = ISO_DEFAULT_BLOCK_SIZE; /* tell IOKit that we're assuming 2K sectors */ - if ((error = VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&iso_bsize, FWRITE, p->p_ucred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, + (caddr_t)&iso_bsize, FWRITE, context))) return (error); - devvp->v_specsize = iso_bsize; + joliet_level = 0; for (iso_blknum = 16 + blkoff; iso_blknum < (100 + blkoff); iso_blknum++) { - if ((error = bread(devvp, iso_blknum, iso_bsize, NOCRED, &bp))) { + if ((error = (int)buf_bread(devvp, (daddr64_t)((unsigned)iso_blknum), iso_bsize, NOCRED, &bp))) { if (bp) { - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); bp = NULL; } breaderr = error; - printf("iso_mountfs: bread error %d reading block %d\n", error, iso_blknum); + printf("iso_mountfs: buf_bread error %d reading block %d\n", error, iso_blknum); continue; } - vdp = (struct iso_volume_descriptor *)bp->b_data; + vdp = (struct iso_volume_descriptor *)buf_dataptr(bp); if (bcmp (vdp->volume_desc_id, ISO_STANDARD_ID, sizeof(vdp->volume_desc_id)) != 0) { #ifdef DEBUG printf("cd9660_vfsops.c: iso_mountfs: " @@ -531,15 +445,15 @@ iso_mountfs(devvp, mp, p, argp) } if (bp) { - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); bp = NULL; } } if (bp) { - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); bp = NULL; } @@ -612,22 +526,20 @@ iso_mountfs(devvp, mp, p, argp) while ((1 << isomp->im_bshift) < isomp->logical_block_size) isomp->im_bshift++; - pribp->b_flags |= B_AGE; - brelse(pribp); + buf_markaged(pribp); + buf_brelse(pribp); pribp = NULL; - mp->mnt_data = (qaddr_t)isomp; - mp->mnt_stat.f_fsid.val[0] = (long)dev; - mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; - mp->mnt_maxsymlinklen = 0; - mp->mnt_flag |= MNT_LOCAL; + vfs_setfsprivate(mp, (void *)isomp); + vfs_statfs(mp)->f_fsid.val[0] = (long)dev; + vfs_statfs(mp)->f_fsid.val[1] = vfs_typenum(mp); + vfs_setmaxsymlen(mp, 0); + vfs_setflags(mp, MNT_LOCAL); isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; - devvp->v_specflags |= SI_MOUNTEDON; - /* * If the logical block size is not 2K then we must * set the block device's physical block size to this @@ -636,24 +548,23 @@ iso_mountfs(devvp, mp, p, argp) */ if (logical_block_size != iso_bsize) { iso_bsize = logical_block_size; - if ((error = VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&iso_bsize, FWRITE, p->p_ucred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, + (caddr_t)&iso_bsize, FWRITE, context))) goto out; - devvp->v_specsize = iso_bsize; } /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { - if ( (error = bread(isomp->im_devvp, - (isomp->root_extent + isonum_711(rootp->ext_attr_length)), - isomp->logical_block_size, NOCRED, &bp)) ) { + if ( (error = (int)buf_bread(isomp->im_devvp, + (daddr64_t)((unsigned)((isomp->root_extent + isonum_711(rootp->ext_attr_length)))), + isomp->logical_block_size, NOCRED, &bp)) ) { - printf("iso_mountfs: bread error %d reading block %d\n", + printf("iso_mountfs: buf_bread error %d reading block %d\n", error, isomp->root_extent + isonum_711(rootp->ext_attr_length)); argp->flags |= ISOFSMNT_NORRIP; goto skipRRIP; } - rootp = (struct iso_directory_record *)bp->b_data; + rootp = (struct iso_directory_record *)buf_dataptr(bp); if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { argp->flags |= ISOFSMNT_NORRIP; @@ -665,8 +576,8 @@ iso_mountfs(devvp, mp, p, argp) * The contents are valid, * but they will get reread as part of another vnode, so... */ - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); bp = NULL; } skipRRIP: @@ -721,12 +632,12 @@ skipRRIP: bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); - supbp->b_flags |= B_AGE; + buf_markaged(supbp); isomp->iso_ftype = ISO_FTYPE_JOLIET; } if (supbp) { - brelse(supbp); + buf_brelse(supbp); supbp = NULL; } @@ -740,34 +651,28 @@ skipRRIP: /* See if this could be a Video CD */ if ((isomp->im_flags2 & IMF2_IS_CDXA) && cd9660_find_video_dir(isomp)) { /* Get the 2352-bytes-per-block device. */ - isomp->phys_devvp = cd9660_phys_device(argp->fspec, p); + isomp->phys_devvp = cd9660_phys_device(mp, context); } + /* Fill the default statfs information */ + (void) cd9660_statfs(mp, vfs_statfs(mp), context); + return (0); out: if (bp) - brelse(bp); + buf_brelse(bp); if (pribp) - brelse(pribp); + buf_brelse(pribp); if (supbp) - brelse(supbp); - if (needclose) - (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); + buf_brelse(supbp); + if (isomp) { if (isomp->toc) FREE((caddr_t)isomp->toc, M_ISOFSMNT); FREE((caddr_t)isomp, M_ISOFSMNT); - mp->mnt_data = (qaddr_t)0; - } - - /* Clear the mounted on bit in the devvp If it */ - /* not set, this is a nop and there is no way to */ - /* get here with it set unless we did it. If you*/ - /* are making code changes which makes the above */ - /* assumption not true, change this code. */ - - devvp->v_specflags &= ~SI_MOUNTEDON; + vfs_setfsprivate(mp, (void *)0); + } return (error); } @@ -777,10 +682,8 @@ out: */ /* ARGSUSED */ int -cd9660_start(mp, flags, p) - struct mount *mp; - int flags; - struct proc *p; +cd9660_start(__unused struct mount *mp, __unused int flags, + __unused vfs_context_t context) { return (0); } @@ -789,10 +692,7 @@ cd9660_start(mp, flags, p) * unmount system call */ int -cd9660_unmount(mp, mntflags, p) - struct mount *mp; - int mntflags; - struct proc *p; +cd9660_unmount(struct mount *mp, int mntflags, vfs_context_t context) { register struct iso_mnt *isomp; int error, flags = 0; @@ -812,27 +712,17 @@ cd9660_unmount(mp, mntflags, p) if (isomp->iso_ftype == ISO_FTYPE_RRIP) iso_dunmap(isomp->im_dev); #endif - - isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON; - error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); - if (error && !force ) - return(error); - - vrele(isomp->im_devvp); - if (isomp->phys_devvp) { - error = VOP_CLOSE(isomp->phys_devvp, FREAD, FSCRED, p); + error = VNOP_CLOSE(isomp->phys_devvp, FREAD, context); if (error && !force) return error; - vrele(isomp->phys_devvp); + vnode_put(isomp->phys_devvp); } if (isomp->toc) FREE((caddr_t)isomp->toc, M_ISOFSMNT); - FREE((caddr_t)isomp, M_ISOFSMNT); - mp->mnt_data = (qaddr_t)0; - mp->mnt_flag &= ~MNT_LOCAL; + return (0); } @@ -840,9 +730,7 @@ cd9660_unmount(mp, mntflags, p) * Return root of a filesystem */ int -cd9660_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; +cd9660_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = @@ -853,58 +741,50 @@ cd9660_root(mp, vpp) * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ - return (cd9660_vget_internal(mp, ino, vpp, + return (cd9660_vget_internal(mp, ino, vpp, NULL, NULL, imp->iso_ftype == ISO_FTYPE_RRIP, dp, current_proc())); } -/* - * Do operations associated with quotas, not supported - */ -/* ARGSUSED */ -int -cd9660_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - /* * Get file system statistics. */ +/* ARGSUSED */ int -cd9660_statfs(mp, sbp, p) - struct mount *mp; - register struct statfs *sbp; - struct proc *p; +cd9660_statfs(struct mount *mp, register struct vfsstatfs *sbp, + __unused vfs_context_t context) { register struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); +#if 0 #ifdef COMPAT_09 sbp->f_type = 5; #else sbp->f_type = 0; #endif - sbp->f_bsize = isomp->logical_block_size; - sbp->f_iosize = sbp->f_bsize; /* XXX */ - sbp->f_blocks = isomp->volume_space_size; - sbp->f_bfree = 0; /* total free blocks */ - sbp->f_bavail = 0; /* blocks free for non superuser */ - sbp->f_files = 0; /* total files */ - sbp->f_ffree = 0; /* free file nodes */ - if (sbp != &mp->mnt_stat) { - bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); - bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); - } +#endif + sbp->f_bsize = (uint32_t)isomp->logical_block_size; + sbp->f_iosize = (size_t)sbp->f_bsize; /* XXX */ + sbp->f_blocks = (uint64_t)((unsigned long)isomp->volume_space_size); + sbp->f_bfree = (uint64_t)0; /* total free blocks */ + sbp->f_bavail = (uint64_t)0; /* blocks free for non superuser */ + sbp->f_files = (uint64_t)0; /* total files */ + sbp->f_ffree = (uint64_t)0; /* free file nodes */ + sbp->f_fstypename[(MFSTYPENAMELEN - 1)] = '\0'; - strncpy( sbp->f_fstypename, mp->mnt_vfc->vfc_name, (MFSNAMELEN - 1) ); - sbp->f_fstypename[(MFSNAMELEN - 1)] = '\0'; + /* + * Subtypes (flavors) for ISO 9660 + * 0: ISO-9660 + * 1: ISO-9660 (Joliet) + * 2: ISO-9660 (Rockridge) + */ + if (isomp->iso_ftype == ISO_FTYPE_JOLIET) + sbp->f_fssubtype = 1; + else if (isomp->iso_ftype == ISO_FTYPE_RRIP) + sbp->f_fssubtype = 2; + else + sbp->f_fssubtype = 0; /* DO NOT use the first spare for flags; it's been reassigned for another use: */ /* sbp->f_spare[0] = isomp->im_flags; */ @@ -912,13 +792,109 @@ cd9660_statfs(mp, sbp, p) return (0); } +int cd9660_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, vfs_context_t context) +{ + struct iso_mnt *imp; + struct vfsstatfs *stats = vfs_statfs(mp); + + imp = VFSTOISOFS(mp); + + /* + * We don't know reasonable values for f_objcount, f_filecount, + * f_dircount, f_maxobjcount so don't bother making up (poor) + * numbers like 10.3.x and earlier did. + */ + + VFSATTR_RETURN(fsap, f_iosize, stats->f_iosize); + VFSATTR_RETURN(fsap, f_blocks, stats->f_blocks); + VFSATTR_RETURN(fsap, f_bfree, stats->f_bfree); + VFSATTR_RETURN(fsap, f_bavail, stats->f_bavail); + VFSATTR_RETURN(fsap, f_bused, stats->f_blocks); + + /* We don't have file counts, so don't return them */ + + /* f_fsid and f_owner should be handled by VFS */ + + /* We don't have a value for f_uuid */ + + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = + (imp->iso_ftype == ISO_FTYPE_RRIP ? VOL_CAP_FMT_SYMBOLICLINKS : 0) | + (imp->iso_ftype == ISO_FTYPE_RRIP ? VOL_CAP_FMT_HARDLINKS : 0) | + (imp->iso_ftype == ISO_FTYPE_RRIP || imp->iso_ftype == ISO_FTYPE_JOLIET + ? VOL_CAP_FMT_CASE_SENSITIVE : 0) | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0; + + fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_JOURNAL | + VOL_CAP_FMT_JOURNAL_ACTIVE | + VOL_CAP_FMT_NO_ROOT_TIMES | + VOL_CAP_FMT_SPARSE_FILES | + VOL_CAP_FMT_ZERO_RUNS | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; + fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_COPYFILE | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_capabilities); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { + /* + * VFS should really set these based on the vfs_attr and vnop_attr + * fields the file system supports, combined with the conversions + * VFS has implemented. + */ + + fsap->f_attributes.validattr.commonattr = ATTR_CMN_VALIDMASK; + fsap->f_attributes.validattr.volattr = ATTR_VOL_VALIDMASK; + fsap->f_attributes.validattr.dirattr = ATTR_DIR_VALIDMASK; + fsap->f_attributes.validattr.fileattr = ATTR_FILE_VALIDMASK; + fsap->f_attributes.validattr.forkattr = ATTR_FORK_VALIDMASK; + + fsap->f_attributes.nativeattr.commonattr = ATTR_CMN_VALIDMASK; + fsap->f_attributes.nativeattr.volattr = ATTR_VOL_VALIDMASK; + fsap->f_attributes.nativeattr.dirattr = ATTR_DIR_VALIDMASK; + fsap->f_attributes.nativeattr.fileattr = ATTR_FILE_VALIDMASK; + fsap->f_attributes.nativeattr.forkattr = ATTR_FORK_VALIDMASK; + + VFSATTR_SET_SUPPORTED(fsap, f_attributes); + } + + VFSATTR_RETURN(fsap, f_create_time, imp->creation_date); + VFSATTR_RETURN(fsap, f_modify_time, imp->modification_date); + /* No explicit access time, so let VFS pick a default value */ + /* No explicit backup time, so let VFS pick a default value */ + + return 0; +} + /* ARGSUSED */ int -cd9660_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; +cd9660_sync(__unused struct mount *mp, __unused int waitfor, + __unused vfs_context_t context) { return (0); @@ -935,56 +911,38 @@ cd9660_sync(mp, waitfor, cred, p) */ struct ifid { - ushort ifid_len; - ushort ifid_pad; int ifid_ino; long ifid_start; }; /* ARGSUSED */ int -cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) - register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; +cd9660_fhtovp(mount_t mp, int fhlen, unsigned char *fhp, vnode_t *vpp, vfs_context_t context) { struct ifid *ifhp = (struct ifid *)fhp; register struct iso_node *ip; - register struct netcred *np; - register struct iso_mnt *imp = VFSTOISOFS(mp); struct vnode *nvp; int error; + if (fhlen < (int)sizeof(struct ifid)) + return (EINVAL); + #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifhp->ifid_ino, ifhp->ifid_start); #endif - /* - * Get the export permission structure for this tuple. - */ - np = vfs_export_lookup(mp, &imp->im_export, nam); - if (nam && (np == NULL)) - return (EACCES); - - if ( (error = VFS_VGET(mp, &ifhp->ifid_ino, &nvp)) ) { + if ( (error = VFS_VGET(mp, (ino64_t)ifhp->ifid_ino, &nvp, context)) ) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { - vput(nvp); + vnode_put(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; - if (np) { - *exflagsp = np->netc_exflags; - *credanonp = &np->netc_anon; - } return (0); } @@ -1075,10 +1033,7 @@ cd9660_is_video_file(struct iso_node *ip, struct iso_mnt *imp) } int -cd9660_vget(mp, ino, vpp) - struct mount *mp; - void *ino; - struct vnode **vpp; +cd9660_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { /* * XXXX @@ -1087,55 +1042,56 @@ cd9660_vget(mp, ino, vpp) * that right now. */ - return ( cd9660_vget_internal( mp, *(ino_t*)ino, vpp, 0, - (struct iso_directory_record *) 0, - current_proc()) ); + return ( cd9660_vget_internal( mp, (ino_t)ino, vpp, NULL, NULL, + 0, (struct iso_directory_record *) 0, current_proc()) ); } int -cd9660_vget_internal(mp, ino, vpp, relocated, isodir, p) - struct mount *mp; - ino_t ino; - struct vnode **vpp; - int relocated; - struct iso_directory_record *isodir; - struct proc *p; +cd9660_vget_internal(mount_t mp, ino_t ino, vnode_t *vpp, vnode_t dvp, + struct componentname *cnp, int relocated, + struct iso_directory_record *isodir, proc_t p) { register struct iso_mnt *imp; struct iso_node *ip; - struct buf *bp; - struct vnode *vp, *nvp; - dev_t dev; - int error; - - imp = VFSTOISOFS(mp); - dev = imp->im_dev; - + buf_t bp = NULL; + vnode_t vp; + dev_t dev; + int error; + struct vnode_fsparam vfsp; + enum vtype vtype; + int is_video_file = 0; + + *vpp = NULLVP; + imp = VFSTOISOFS(mp); + dev = imp->im_dev; +#if 0 /* Check for unmount in progress */ - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - *vpp = NULLVP; - return (EPERM); - } - - if ((*vpp = cd9660_ihashget(dev, ino, p)) != NULLVP) - return (0); + if (mp->mnt_kern_flag & MNTK_UNMOUNT) + return (EPERM); +#endif MALLOC_ZONE(ip, struct iso_node *, sizeof(struct iso_node), - M_ISOFSNODE, M_WAITOK); - /* Allocate a new vnode/iso_node. */ - if ( (error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) ) { - FREE_ZONE(ip,sizeof(struct iso_node), M_ISOFSNODE); - *vpp = NULLVP; - return (error); + M_ISOFSNODE, M_WAITOK); + /* + * MALLOC_ZONE may block, so check for the inode being + * present in the hash after we get back... + * we also assume that we're under a filesystem lock + * so that we're not reentered between the ihashget and + * the ihashins... + */ + if ((*vpp = cd9660_ihashget(dev, ino, p)) != NULLVP) { + FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); + return (0); } bzero((caddr_t)ip, sizeof(struct iso_node)); - lockinit(&ip->i_lock, PINOD,"isonode",0,0); - vp->v_data = ip; - ip->i_vnode = vp; + ip->i_dev = dev; ip->i_number = ino; ip->i_namep = &isonullname[0]; + ip->i_mnt = imp; + ip->i_devvp = imp->im_devvp; + SET(ip->i_flag, ISO_INALLOC); /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting @@ -1148,40 +1104,36 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir, p) int lbn, off; lbn = lblkno(imp, ino); + if (lbn >= imp->volume_space_size) { - vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); - return (ESTALE); + error = ESTALE; + goto errout; } - off = blkoff(imp, ino); + if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { - vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); - return (ESTALE); + error = ESTALE; + goto errout; } - error = bread(imp->im_devvp, lbn, - imp->logical_block_size, NOCRED, &bp); + error = (int)buf_bread(imp->im_devvp, (daddr64_t)((unsigned)lbn), + imp->logical_block_size, NOCRED, &bp); if (error) { - vput(vp); - brelse(bp); - printf("fhtovp: bread error %d\n",error); - return (error); + printf("fhtovp: buf_bread error %d\n",error); + goto errout; } - isodir = (struct iso_directory_record *)(bp->b_data + off); + isodir = (struct iso_directory_record *)(buf_dataptr(bp) + off); - if (off + isonum_711(isodir->length) > - imp->logical_block_size) { - vput(vp); - if (bp != 0) - brelse(bp); + if (off + isonum_711(isodir->length) > imp->logical_block_size) { printf("fhtovp: directory crosses block boundary " "%d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); - return (ESTALE); + error = ESTALE; + goto errout; } /* @@ -1193,31 +1145,40 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir, p) struct iso_directory_record *pdp; pdp = (struct iso_directory_record *) - ((char *)bp->b_data + isonum_711(isodir->length)); + ((char *)buf_dataptr(bp) + isonum_711(isodir->length)); if ((isonum_711(pdp->flags) & directoryBit) && (pdp->name[0] == 1)) ip->i_parent = isodirino(pdp, imp); } - } else - bp = 0; - - ip->i_mnt = imp; - ip->i_devvp = imp->im_devvp; - VREF(ip->i_devvp); - + } if (relocated) { + daddr64_t lbn; + + if (bp) { + buf_brelse(bp); + bp = NULL; + } /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; - if (bp != 0) - brelse(bp); - if ( (error = VOP_BLKATOFF(vp, (off_t)0, NULL, &bp)) ) { - vput(vp); - return (error); - } - isodir = (struct iso_directory_record *)bp->b_data; + /* + * caclulate the correct lbn to read block 0 + * of this node... this used to be a cd9660_blkatoff, but + * that requires the vnode to already be 'cooked'... in + * the new world, we don't create a vnode until the inode + * has been fully initialized... cd9660_blkatoff generates + * a buf_bread for im_sector_size associated with the node's vp + * I'm replacing it with a buf_bread for the same size and from + * the same location on the disk, but associated with the devvp + */ + lbn = (daddr64_t)((unsigned)ip->iso_start) + 0; + + if ((error = (int)buf_bread(imp->im_devvp, lbn, imp->im_sector_size, NOCRED, &bp))) + goto errout; + + isodir = (struct iso_directory_record *)buf_dataptr(bp); } /* @@ -1287,32 +1248,40 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir, p) /* * Setup time stamp, attribute */ - vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { - struct buf *bp2; - int off; - if ((imp->im_flags & ISOFSMNT_EXTATT) - && (off = isonum_711(isodir->ext_attr_length))) - VOP_BLKATOFF(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); - else + buf_t bp2 = NULL; + daddr64_t lbn; + int off; + + if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) { + + lbn = (daddr64_t)((unsigned)ip->iso_start - off); + + if ((error = (int)buf_bread(imp->im_devvp, lbn, imp->im_sector_size, NOCRED, &bp2))) { + if (bp2) + buf_brelse(bp2); + goto errout; + } + } else bp2 = NULL; + cd9660_defattr(isodir, ip, bp2); cd9660_deftstamp(isodir, ip, bp2); + if (bp2) - brelse(bp2); + buf_brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } - /* * See if this is a Video CD file. If so, we must adjust the * length to account for larger sectors plus the RIFF header. - * We also must substitute the VOP_READ and VOP_PAGEIN functions. + * We also must substitute the vnop_read and vnop_pagein functions. * * The cd9660_is_video_file routine assumes that the inode has * been completely set up; it refers to several fields. @@ -1322,75 +1291,101 @@ cd9660_vget_internal(mp, ino, vpp, relocated, isodir, p) */ if (cd9660_is_video_file(ip, imp)) { - cd9660_xa_init(vp, isodir); + cd9660_xa_init(ip, isodir); + + is_video_file = 1; } - - if (bp != 0) - brelse(bp); - - /* - * Initialize the associated vnode - */ - if (ip->iso_extent == imp->root_extent) { - vp->v_flag |= VROOT; ip->i_parent = 1; /* root's parent is always 1 by convention */ /* mode type must be S_IFDIR */ ip->inode.iso_mode = (ip->inode.iso_mode & ~S_IFMT) | S_IFDIR; } - - switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { - case VFIFO: -#if FIFO - vp->v_op = cd9660_fifoop_p; - break; -#else - vput(vp); - return (EOPNOTSUPP); -#endif /* FIFO */ - case VCHR: - case VBLK: - /* - * if device, look at device number table for translation - */ -#ifdef ISODEVMAP - if (dp = iso_dmap(dev, ino, 0)) - ip->inode.iso_rdev = dp->d_dev; + vtype = IFTOVT(ip->inode.iso_mode); +#if !FIFO + if (vtype == VFIFO) { + error = ENOTSUP; + goto errout; + } #endif - vp->v_op = cd9660_specop_p; - if ( (nvp = checkalias(vp, ip->inode.iso_rdev, mp)) ) { - /* - * Discard unneeded vnode, but save its iso_node. - */ - cd9660_ihashrem(ip); - VOP_UNLOCK(vp, 0, p); - nvp->v_data = vp->v_data; - vp->v_data = NULL; - vp->v_op = spec_vnodeop_p; - vrele(vp); - vgone(vp); - /* - * Reinitialize aliased inode. - */ - vp = nvp; - ip->i_vnode = vp; - cd9660_ihashins(ip); - } - break; - case VREG: - ubc_info_init(vp); - break; - default: - break; +#ifdef ISODEVMAP + if (vtype == VCHR || vtype == VBLK) { + struct iso_dnode *dp; + + if (dp = iso_dmap(dev, ino, 0)) + ip->inode.iso_rdev = dp->d_dev; } - +#endif /* - * XXX need generation number? + * create the associated vnode */ + //bzero(&vfsp, sizeof(struct vnode_fsparam)); + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "cd9660"; + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_fsnode = ip; + vfsp.vnfs_cnp = cnp; + + if (is_video_file) + vfsp.vnfs_vops = cd9660_cdxaop_p; + else if (vtype == VFIFO ) + vfsp.vnfs_vops = cd9660_fifoop_p; + else if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_vops = cd9660_specop_p; + else + vfsp.vnfs_vops = cd9660_vnodeop_p; + + if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_rdev = ip->inode.iso_rdev; + else + vfsp.vnfs_rdev = 0; + + vfsp.vnfs_filesize = ip->i_size; + if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) + vfsp.vnfs_flags = 0; + else + vfsp.vnfs_flags = VNFS_NOCACHE; + + /* Tag root directory */ + if (ip->iso_extent == imp->root_extent) + vfsp.vnfs_markroot = 1; + else + vfsp.vnfs_markroot = 0; + + vfsp.vnfs_marksystem = 0; + + if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)) ) + goto errout; + + ip->i_vnode = vp; + + vnode_ref(ip->i_devvp); + vnode_addfsref(vp); + vnode_settag(vp, VT_ISOFS); + + if (bp) + buf_brelse(bp); *vpp = vp; + CLR(ip->i_flag, ISO_INALLOC); + + if (ISSET(ip->i_flag, ISO_INWALLOC)) + wakeup(ip); + return (0); + +errout: + if (bp) + buf_brelse(bp); + cd9660_ihashrem(ip); + + if (ISSET(ip->i_flag, ISO_INWALLOC)) + wakeup(ip); + + FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); + + return (error); } @@ -1587,18 +1582,19 @@ DoneLooking: */ /* ARGSUSED */ int -cd9660_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; +cd9660_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context) { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; + + if (*fhlenp < (int)sizeof(struct ifid)) + return (EOVERFLOW); ifhp = (struct ifid *)fhp; - ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = ip->i_number; ifhp->ifid_start = ip->iso_start; + *fhlenp = sizeof(struct ifid); #ifdef ISOFS_DBG printf("vptofh: ino %d, start %ld\n", @@ -1611,15 +1607,10 @@ cd9660_vptofh(vp, fhp) * Fast-FileSystem only? */ int -cd9660_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int * name; - u_int namelen; - void* oldp; - size_t * oldlenp; - void * newp; - size_t newlen; - struct proc * p; +cd9660_sysctl(__unused int *name, __unused u_int namelen, __unused user_addr_t oldp, + __unused size_t *oldlenp, __unused user_addr_t newp, + __unused size_t newlen, __unused vfs_context_t context) { - return (EOPNOTSUPP); + return (ENOTSUP); } diff --git a/bsd/isofs/cd9660/cd9660_vnops.c b/bsd/isofs/cd9660/cd9660_vnops.c index 9484b5084..6789bfc1b 100644 --- a/bsd/isofs/cd9660/cd9660_vnops.c +++ b/bsd/isofs/cd9660/cd9660_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -87,10 +88,16 @@ #include #include #include +#include #include #include +#include +#include #include +#include +#include /* kmem_alloc, kmem_free */ + #include #include #include @@ -100,15 +107,8 @@ * * Nothing to do. */ -/* ARGSUSED */ int -cd9660_open(ap) - struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +cd9660_open(__unused struct vnop_open_args *ap) { return (0); } @@ -118,157 +118,55 @@ cd9660_open(ap) * * Update the times on the inode on writeable file systems. */ -/* ARGSUSED */ int -cd9660_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +cd9660_close(__unused struct vnop_close_args *ap) { return (0); } -/* - * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. - * The mode is shifted to select the owner/group/other fields. The - * super user is granted all permissions. - */ -/* ARGSUSED */ -int -cd9660_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct iso_node *ip = VTOI(vp); - struct ucred *cred = ap->a_cred; - mode_t mask, mode = ap->a_mode; - register gid_t *gp; - int i, error; - - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - return (EROFS); - /* NOT REACHED */ - default: - break; - } - } - - /* If immutable bit set, nobody gets to write it. */ -#if 0 - if ((mode & VWRITE) && (ip->i_flag & IMMUTABLE)) - return (EPERM); -#endif - /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) - return (0); - - mask = 0; - - /* Otherwise, check the owner. */ - if (cred->cr_uid == ip->inode.iso_uid) { - if (mode & VEXEC) - mask |= S_IXUSR; - if (mode & VREAD) - mask |= S_IRUSR; - if (mode & VWRITE) - mask |= S_IWUSR; - return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES); - } - - /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (ip->inode.iso_gid == *gp) { - if (mode & VEXEC) - mask |= S_IXGRP; - if (mode & VREAD) - mask |= S_IRGRP; - if (mode & VWRITE) - mask |= S_IWGRP; - return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES); - } - - /* Otherwise, check everyone else. */ - if (mode & VEXEC) - mask |= S_IXOTH; - if (mode & VREAD) - mask |= S_IROTH; - if (mode & VWRITE) - mask |= S_IWOTH; - return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES); -} - int -cd9660_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; - +cd9660_getattr(struct vnop_getattr_args *ap) { struct vnode *vp = ap->a_vp; - register struct vattr *vap = ap->a_vap; + register struct vnode_attr *vap = ap->a_vap; register struct iso_node *ip = VTOI(vp); - vap->va_fsid = ip->i_dev; - vap->va_fileid = ip->i_number; + VATTR_RETURN(vap, va_fsid, ip->i_dev); + VATTR_RETURN(vap, va_fileid, ip->i_number); - vap->va_mode = ip->inode.iso_mode; - vap->va_nlink = ip->inode.iso_links; - vap->va_uid = ip->inode.iso_uid; - vap->va_gid = ip->inode.iso_gid; - vap->va_atime = ip->inode.iso_atime; - vap->va_mtime = ip->inode.iso_mtime; - vap->va_ctime = ip->inode.iso_ctime; - vap->va_rdev = ip->inode.iso_rdev; + VATTR_RETURN(vap, va_mode, ip->inode.iso_mode); + VATTR_RETURN(vap, va_nlink, ip->inode.iso_links); + VATTR_RETURN(vap, va_uid, ip->inode.iso_uid); + VATTR_RETURN(vap, va_gid, ip->inode.iso_gid); + VATTR_RETURN(vap, va_access_time, ip->inode.iso_atime); + VATTR_RETURN(vap, va_modify_time, ip->inode.iso_mtime); + VATTR_RETURN(vap, va_change_time, ip->inode.iso_ctime); + VATTR_RETURN(vap, va_rdev, ip->inode.iso_rdev); - vap->va_size = (u_quad_t) ip->i_size; + VATTR_RETURN(vap, va_data_size, (off_t)ip->i_size); if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) { - struct vop_readlink_args rdlnk; - struct iovec aiov; - struct uio auio; + struct vnop_readlink_args rdlnk; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; char *cp; MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK); - aiov.iov_base = cp; - aiov.iov_len = MAXPATHLEN; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = ap->a_p; - auio.uio_resid = MAXPATHLEN; - rdlnk.a_uio = &auio; + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); + + rdlnk.a_uio = auio; rdlnk.a_vp = ap->a_vp; - rdlnk.a_cred = ap->a_cred; + rdlnk.a_context = ap->a_context; if (cd9660_readlink(&rdlnk) == 0) - vap->va_size = MAXPATHLEN - auio.uio_resid; + // LP64todo - fix this! + VATTR_RETURN(vap, va_data_size, MAXPATHLEN - uio_resid(auio)); FREE(cp, M_TEMP); } - vap->va_flags = 0; - vap->va_gen = 1; - vap->va_blocksize = ip->i_mnt->logical_block_size; - vap->va_bytes = (u_quad_t) (ip->i_size + ip->i_rsrcsize); - vap->va_type = vp->v_type; + VATTR_RETURN(vap, va_flags, 0); + VATTR_RETURN(vap, va_gen, 1); + VATTR_RETURN(vap, va_iosize, ip->i_mnt->logical_block_size); + VATTR_RETURN(vap, va_total_size, ip->i_size + ip->i_rsrcsize); return (0); } @@ -278,34 +176,27 @@ cd9660_getattr(ap) * Vnode op for reading. */ int -cd9660_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +cd9660_read(struct vnop_read_args *ap) { struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; register struct iso_node *ip = VTOI(vp); register struct iso_mnt *imp; struct buf *bp; - daddr_t lbn, rablock; + daddr_t lbn; + daddr64_t rablock; off_t diff; int rasize, error = 0; - long size, n, on; - int devBlockSize = 0; + int32_t size, n, on; - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); imp = ip->i_mnt; - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - if (UBCISVALID(vp)) { + if (UBCINFOEXISTS(vp)) { /* * Copy any part of the Apple Double header. */ @@ -330,93 +221,75 @@ cd9660_read(ap) header.finfo.fdLocation.h = -1; header.finfo.fdReserved = 0; - bytes = min(uio->uio_resid, sizeof(apple_double_header_t) - uio->uio_offset); + bytes = min(uio_resid(uio), sizeof(apple_double_header_t) - uio->uio_offset); error = uiomove(((char *) &header) + uio->uio_offset, bytes, uio); if (error) return error; } - if (uio->uio_resid && uio->uio_offset < ADH_SIZE) { + if (uio_resid(uio) && uio->uio_offset < ADH_SIZE) { caddr_t buffer; if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, ADH_SIZE)) { return (ENOMEM); } - bytes = min(uio->uio_resid, ADH_SIZE - uio->uio_offset); + bytes = min(uio_resid(uio), ADH_SIZE - uio->uio_offset); error = uiomove(((char *) buffer) + uio->uio_offset, bytes, uio); kmem_free(kernel_map, (vm_offset_t)buffer, ADH_SIZE); if (error) return error; } } - if (uio->uio_resid > 0) - error = cluster_read(vp, uio, (off_t)ip->i_size, devBlockSize, 0); + if (uio_resid(uio) > 0) + error = cluster_read(vp, uio, (off_t)ip->i_size, 0); } else { do { lbn = lblkno(imp, uio->uio_offset); on = blkoff(imp, uio->uio_offset); n = min((u_int)(imp->logical_block_size - on), - uio->uio_resid); + uio_resid(uio)); diff = (off_t)ip->i_size - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; size = blksize(imp, ip, lbn); - rablock = lbn + 1; + rablock = (daddr64_t)lbn + 1; - if (vp->v_lastr + 1 == lbn && + if (ip->i_lastr + 1 == lbn && lblktosize(imp, rablock) < ip->i_size) { - rasize = blksize(imp, ip, rablock); - error = breadn(vp, lbn, size, &rablock, + rasize = blksize(imp, ip, (daddr_t)rablock); + error = (int)buf_breadn(vp, (daddr64_t)((unsigned)lbn), size, &rablock, &rasize, 1, NOCRED, &bp); } else - error = bread(vp, lbn, size, NOCRED, &bp); + error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), size, NOCRED, &bp); - vp->v_lastr = lbn; - n = min(n, size - bp->b_resid); + ip->i_lastr = lbn; + n = min(n, size - buf_resid(bp)); if (error) { - brelse(bp); + buf_brelse(bp); return (error); } - error = uiomove(bp->b_data + on, (int)n, uio); + error = uiomove((caddr_t)(buf_dataptr(bp) + on), (int)n, uio); if (n + on == imp->logical_block_size || uio->uio_offset == (off_t)ip->i_size) - bp->b_flags |= B_AGE; - brelse(bp); - } while (error == 0 && uio->uio_resid > 0 && n != 0); + buf_markaged(bp); + buf_brelse(bp); + } while (error == 0 && uio_resid(uio) > 0 && n != 0); } return (error); } -/* ARGSUSED */ int -cd9660_ioctl(ap) - struct vop_ioctl_args /* { - struct vnode *a_vp; - u_long a_command; - caddr_t a_data; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +cd9660_ioctl(__unused struct vnop_ioctl_args *ap) { return (ENOTTY); } -/* ARGSUSED */ int -cd9660_select(ap) - struct vop_select_args /* { - struct vnode *a_vp; - int a_which; - int a_fflags; - struct ucred *a_cred; - void *a_wql; - struct proc *a_p; - } */ *ap; +cd9660_select(__unused struct vnop_select_args *ap) { /* * We should really check to see if I/O is possible. @@ -429,39 +302,13 @@ cd9660_select(ap) * * NB Currently unsupported. */ -/* ARGSUSED */ int -cd9660_mmap(ap) - struct vop_mmap_args /* { - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +cd9660_mmap(__unused struct vnop_mmap_args *ap) { return (EINVAL); } -/* - * Seek on a file - * - * Nothing to do, so just return. - */ -/* ARGSUSED */ -int -cd9660_seek(ap) - struct vop_seek_args /* { - struct vnode *a_vp; - off_t a_oldoff; - off_t a_newoff; - struct ucred *a_cred; - } */ *ap; -{ - - return (0); -} - /* * Structure for reading directories */ @@ -478,17 +325,14 @@ struct isoreaddir { }; static int -iso_uiodir(idp,dp,off) - struct isoreaddir *idp; - struct dirent *dp; - off_t off; +iso_uiodir(struct isoreaddir *idp, struct dirent *dp, off_t off) { int error; dp->d_name[dp->d_namlen] = 0; dp->d_reclen = DIRSIZ(dp); - if (idp->uio->uio_resid < dp->d_reclen) { + if (uio_resid(idp->uio) < dp->d_reclen) { idp->eofflag = 0; return (-1); } @@ -512,8 +356,7 @@ iso_uiodir(idp,dp,off) } static int -iso_shipdir(idp) - struct isoreaddir *idp; +iso_shipdir(struct isoreaddir *idp) { struct dirent *dp; int cl, sl; @@ -550,16 +393,7 @@ iso_shipdir(idp) * a sector. */ int -cd9660_readdir(ap) - struct vop_readdir_args /* { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - int *a_eofflag; - int *a_ncookies; - u_long **a_cookies; - } */ *ap; +cd9660_readdir(struct vnop_readdir_args *ap) { register struct uio *uio = ap->a_uio; off_t startingOffset = uio->uio_offset; @@ -572,11 +406,14 @@ cd9660_readdir(ap) struct iso_directory_record *ep; int entryoffsetinblock; doff_t endsearch; - u_long bmask; + uint32_t bmask; int error = 0; int reclen; u_short namelen; + if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) + return (EINVAL); + dp = VTOI(vdp); imp = dp->i_mnt; bmask = imp->im_sector_size - 1; @@ -593,7 +430,7 @@ cd9660_readdir(ap) idp->curroff = uio->uio_offset; if ((entryoffsetinblock = idp->curroff & bmask) && - (error = VOP_BLKATOFF(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) { + (error = cd9660_blkatoff(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) { FREE(idp, M_TEMP); return (error); } @@ -607,8 +444,8 @@ cd9660_readdir(ap) */ if ((idp->curroff & bmask) == 0) { if (bp != NULL) - brelse(bp); - if ((error = VOP_BLKATOFF(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) + buf_brelse(bp); + if ((error = cd9660_blkatoff(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) break; entryoffsetinblock = 0; } @@ -616,7 +453,7 @@ cd9660_readdir(ap) * Get pointer to next entry. */ ep = (struct iso_directory_record *) - ((char *)bp->b_data + entryoffsetinblock); + (buf_dataptr(bp) + entryoffsetinblock); reclen = isonum_711(ep->length); if (reclen == 0) { @@ -658,7 +495,7 @@ cd9660_readdir(ap) if ( isonum_711(ep->flags) & directoryBit ) idp->current.d_fileno = isodirino(ep, imp); else { - idp->current.d_fileno = (bp->b_blkno << imp->im_bshift) + + idp->current.d_fileno = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; } @@ -717,9 +554,9 @@ cd9660_readdir(ap) idp->current.d_namlen = 0; error = iso_shipdir(idp); } - +#if 0 if (!error && ap->a_ncookies) { - struct dirent *dp, *dpstart; + struct dirent *dirp, *dpstart; off_t bufferOffset; u_long *cookies; int ncookies; @@ -731,21 +568,23 @@ cd9660_readdir(ap) * * We assume the entire transfer is done to a single contiguous buffer. */ - if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg) || uio->uio_iovcnt != 1) panic("ufs_readdir: lost in space"); /* * Make a first pass over the buffer just generated, * counting the number of entries: */ - dpstart = (struct dirent *) (uio->uio_iov->iov_base - (uio->uio_offset - startingOffset)); - for (dp = dpstart, bufferOffset = startingOffset, ncookies = 0; + // LP64todo - fix this! + dpstart = (struct dirent *) + CAST_DOWN(caddr_t, (uio_iov_base(uio) - (uio->uio_offset - startingOffset))); + for (dirp = dpstart, bufferOffset = startingOffset, ncookies = 0; bufferOffset < uio->uio_offset; ) { - if (dp->d_reclen == 0) + if (dirp->d_reclen == 0) break; - bufferOffset += dp->d_reclen; + bufferOffset += dirp->d_reclen; ncookies++; - dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + dirp = (struct dirent *)((caddr_t)dirp + dirp->d_reclen); } lost += uio->uio_offset - bufferOffset; uio->uio_offset = bufferOffset; @@ -760,18 +599,18 @@ cd9660_readdir(ap) /* * Fill in the offsets for each entry in the buffer just allocated: */ - for (bufferOffset = startingOffset, dp = dpstart; bufferOffset < uio->uio_offset; ) { + for (bufferOffset = startingOffset, dirp = dpstart; bufferOffset < uio->uio_offset; ) { *(cookies++) = bufferOffset; - bufferOffset += dp->d_reclen; - dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + bufferOffset += dirp->d_reclen; + dirp = (struct dirent *)((caddr_t)dirp + dirp->d_reclen); } } - +#endif if (error < 0) error = 0; if (bp) - brelse (bp); + buf_brelse (bp); uio->uio_offset = idp->uio_off; *ap->a_eofflag = idp->eofflag; @@ -791,12 +630,7 @@ typedef struct iso_directory_record ISODIR; typedef struct iso_node ISONODE; typedef struct iso_mnt ISOMNT; int -cd9660_readlink(ap) - struct vop_readlink_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - } */ *ap; +cd9660_readlink(struct vnop_readlink_args *ap) { ISONODE *ip; ISODIR *dirp; @@ -817,18 +651,18 @@ cd9660_readlink(ap) /* * Get parents directory record block that this inode included. */ - error = bread(imp->im_devvp, - (ip->i_number >> imp->im_bshift), + error = (int)buf_bread(imp->im_devvp, + (daddr64_t)((unsigned)(ip->i_number >> imp->im_bshift)), imp->logical_block_size, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (EINVAL); } /* * Setup the directory pointer for this inode */ - dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask)); + dirp = (ISODIR *)(buf_dataptr(bp) + (ip->i_number & imp->im_bmask)); /* * Just make sure, we have a right one.... @@ -836,7 +670,7 @@ cd9660_readlink(ap) */ if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length) > imp->logical_block_size) { - brelse(bp); + buf_brelse(bp); return (EINVAL); } @@ -844,151 +678,65 @@ cd9660_readlink(ap) * Now get a buffer * Abuse a namei buffer for now. */ - if (uio->uio_segflg == UIO_SYSSPACE) - symname = uio->uio_iov->iov_base; - else + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) MALLOC_ZONE(symname, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + else + // LP64todo - fix this! + symname = CAST_DOWN(caddr_t, uio_iov_base(uio)); /* * Ok, we just gathering a symbolic name in SL record. */ if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) { - if (uio->uio_segflg != UIO_SYSSPACE) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) FREE_ZONE(symname, MAXPATHLEN, M_NAMEI); - brelse(bp); + buf_brelse(bp); return (EINVAL); } /* * Don't forget before you leave from home ;-) */ - brelse(bp); + buf_brelse(bp); /* * return with the symbolic name to caller's. */ - if (uio->uio_segflg != UIO_SYSSPACE) { + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { error = uiomove(symname, symlen, uio); FREE_ZONE(symname, MAXPATHLEN, M_NAMEI); return (error); } - uio->uio_resid -= symlen; - uio->uio_iov->iov_base += symlen; - uio->uio_iov->iov_len -= symlen; +#if LP64KERN + uio_setresid(uio, (uio_resid(uio) - symlen)); + uio_iov_len_add(uio, -((int64_t)symlen)); +#else + uio_setresid(uio, (uio_resid(uio) - symlen)); + uio_iov_len_add(uio, -((int)symlen)); +#endif + uio_iov_base_add(uio, symlen); return (0); } -/* - * Lock an inode. - */ - -int -cd9660_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - if (VTOI(vp) == (struct iso_node *) NULL) - panic ("cd9660_lock: null inode"); - return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock,ap->a_p)); -} - -/* - * Unlock an inode. - */ - -int -cd9660_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock,ap->a_p)); - -} /* - * Calculate the logical to physical mapping if not done already, - * then call the device strategy routine. + * prepare and issue the I/O */ int -cd9660_strategy(ap) - struct vop_strategy_args /* { - struct buf *a_bp; - } */ *ap; -{ - register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; - register struct iso_node *ip; - int error; - - ip = VTOI(vp); - if (vp->v_type == VBLK || vp->v_type == VCHR) - panic("cd9660_strategy: spec"); - if (bp->b_blkno == bp->b_lblkno) { - if ( (error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) ) { - bp->b_error = error; - bp->b_flags |= B_ERROR; - biodone(bp); - return (error); - } - if ((long)bp->b_blkno == -1) - clrbuf(bp); - } - if ((long)bp->b_blkno == -1) { - biodone(bp); - return (0); - } - vp = ip->i_devvp; - bp->b_dev = vp->v_rdev; - VOCALL (vp->v_op, VOFFSET(vop_strategy), ap); - return (0); -} - -/* - * Print out the contents of an inode. - */ -int -cd9660_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; +cd9660_strategy(struct vnop_strategy_args *ap) { + buf_t bp = ap->a_bp; + vnode_t vp = buf_vnode(bp); + struct iso_node *ip = VTOI(vp); - printf("tag VT_ISOFS, isofs vnode\n"); - return (0); + return (buf_strategy(ip->i_devvp, ap)); } -/* - * Check for a locked inode. - */ -int -cd9660_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return (lockstatus(&VTOI(ap->a_vp)->i_lock)); -} /* * Return POSIX pathconf information applicable to cd9660 filesystems. */ int -cd9660_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - register_t *a_retval; - } */ *ap; +cd9660_pathconf(struct vnop_pathconf_args *ap) { switch (ap->a_name) { @@ -1029,23 +777,13 @@ cd9660_pathconf(ap) * Unsupported operation */ int -cd9660_enotsupp() +cd9660_enotsupp(void) { - - return (EOPNOTSUPP); + return (ENOTSUP); } /* Pagein. similar to read */ int -cd9660_pagein(ap) - struct vop_pagein_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - struct ucred *a_cred, - int a_flags - } */ *ap; +cd9660_pagein(struct vnop_pagein_args *ap) { struct vnode *vp = ap->a_vp; upl_t pl = ap->a_pl; @@ -1096,17 +834,13 @@ cd9660_pagein(ap) ubc_upl_commit_range(pl, pl_offset, size, UPL_COMMIT_FREE_ON_EMPTY); } } else { - int devBlockSize = 0; - /* check pageouts are for reg file only and ubc info is present*/ if (UBCINVALID(vp)) panic("cd9660_pagein: Not a VREG"); UBCINFOCHECK("cd9660_pagein", vp); - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - error = cluster_pagein(vp, pl, pl_offset, f_offset, size, - (off_t)ip->i_size, devBlockSize, flags); + (off_t)ip->i_size, flags); } return (error); } @@ -1117,16 +851,8 @@ cd9660_pagein(ap) * Locking policy: a_dvp and vp locked on entry, unlocked on exit */ int -cd9660_remove(ap) - struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; - struct componentname *a_cnp; } */ *ap; +cd9660_remove(__unused struct vnop_remove_args *ap) { - if (ap->a_dvp == ap->a_vp) - vrele(ap->a_vp); - else - vput(ap->a_vp); - vput(ap->a_dvp); - return (EROFS); } @@ -1137,9 +863,7 @@ cd9660_remove(ap) * Locking policy: a_dvp and vp locked on entry, unlocked on exit */ int -cd9660_rmdir(ap) - struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; - struct componentname *a_cnp; } */ *ap; +cd9660_rmdir(struct vnop_rmdir_args *ap) { (void) nop_rmdir(ap); return (EROFS); @@ -1150,24 +874,16 @@ cd9660_rmdir(ap) # #% getattrlist vp = = = # - vop_getattrlist { + vnop_getattrlist { IN struct vnode *vp; IN struct attrlist *alist; INOUT struct uio *uio; - IN struct ucred *cred; - IN struct proc *p; + IN vfs_context_t context; }; */ int -cd9660_getattrlist(ap) - struct vop_getattrlist_args /* { - struct vnode *a_vp; - struct attrlist *a_alist - struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +cd9660_getattrlist(struct vnop_getattrlist_args *ap) { struct attrlist *alist = ap->a_alist; int fixedblocksize; @@ -1215,24 +931,24 @@ cd9660_getattrlist(ap) fixedblocksize = attrcalcsize(alist); - attrblocksize = fixedblocksize + (sizeof(u_long)); /* u_long for length longword */ + attrblocksize = fixedblocksize + (sizeof(uint32_t)); /* uint32_t for length word */ if (alist->commonattr & ATTR_CMN_NAME) attrblocksize += NAME_MAX; if (alist->commonattr & ATTR_CMN_NAMEDATTRLIST) attrblocksize += 0; /* XXX PPD */ if (alist->volattr & ATTR_VOL_MOUNTPOINT) attrblocksize += PATH_MAX; if (alist->volattr & ATTR_VOL_NAME) attrblocksize += NAME_MAX; if (alist->fileattr & ATTR_FILE_FORKLIST) attrblocksize += 0; /* XXX PPD */ - attrbufsize = MIN(ap->a_uio->uio_resid, attrblocksize); + attrbufsize = MIN(uio_resid(ap->a_uio), attrblocksize); MALLOC(attrbufptr, void *, attrblocksize, M_TEMP, M_WAITOK); attrptr = attrbufptr; - *((u_long *)attrptr) = 0; /* Set buffer length in case of errors */ - ++((u_long *)attrptr); /* Reserve space for length field */ + *((uint32_t *)attrptr) = 0; /* Set buffer length in case of errors */ + ++((uint32_t *)attrptr); /* Reserve space for length field */ varptr = ((char *)attrptr) + fixedblocksize; /* Point to variable-length storage */ packattrblk(alist, ap->a_vp, &attrptr, &varptr); /* Store length of fixed + var block */ - *((u_long *)attrbufptr) = ((char*)varptr - (char*)attrbufptr); + *((uint32_t *)attrbufptr) = ((char*)varptr - (char*)attrbufptr); /* Don't copy out more data than was generated */ attrbufsize = MIN(attrbufsize, (char*)varptr - (char*)attrbufptr); @@ -1247,10 +963,9 @@ cd9660_getattrlist(ap) * Make a RIFF file header for a CD-ROM XA media file. */ __private_extern__ void -cd9660_xa_init(struct vnode *vp, struct iso_directory_record *isodir) +cd9660_xa_init(struct iso_node *ip, struct iso_directory_record *isodir) { - u_long sectors; - struct iso_node *ip = VTOI(vp); + uint32_t sectors; struct riff_header *header; u_char name_len; char *cdxa; @@ -1285,17 +1000,14 @@ cd9660_xa_init(struct vnode *vp, struct iso_directory_record *isodir) * device. This allows cd9660_strategy to be ignorant of the block * (sector) size. */ - vrele(ip->i_devvp); ip->i_devvp = ip->i_mnt->phys_devvp; - VREF(ip->i_devvp); ip->i_size = sectors * CDXA_SECTOR_SIZE + sizeof(struct riff_header); ip->i_riff = header; - vp->v_op = cd9660_cdxaop_p; } /* - * Helper routine for VOP_READ and VOP_PAGEIN of CD-ROM XA multimedia files. + * Helper routine for vnop_read and vnop_pagein of CD-ROM XA multimedia files. * This routine determines the physical location of the file, then reads * sectors directly from the device into a buffer. It also handles inserting * the RIFF header at the beginning of the file. @@ -1303,7 +1015,7 @@ cd9660_xa_init(struct vnode *vp, struct iso_directory_record *isodir) * Exactly one of buffer or uio must be non-zero. It will either bcopy to * buffer, or uiomove via uio. * - * XXX Should this code be using breadn and vp->v_lastr to support single-block + * XXX Should this code be using buf_breadn and ip->i_lastr to support single-block * read-ahead? Should we try more aggressive read-ahead like cluster_io does? * * XXX This could be made to do larger I/O to the device (reading all the @@ -1361,13 +1073,13 @@ cd9660_xa_read_common( /* Get a block from the underlying device */ block = ip->iso_start + (offset - sizeof(struct riff_header))/CDXA_SECTOR_SIZE; - error = bread(ip->i_devvp, block, CDXA_SECTOR_SIZE, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)block), CDXA_SECTOR_SIZE, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return error; } - if (bp->b_resid) { - printf("isofs: cd9660_xa_read_common: bread didn't read full sector\n"); + if (buf_resid(bp)) { + printf("isofs: cd9660_xa_read_common: buf_bread didn't read full sector\n"); return EIO; } @@ -1378,10 +1090,10 @@ cd9660_xa_read_common( count = diff; if (buffer) { - bcopy(bp->b_data+sect_off, buffer, count); + bcopy(CAST_DOWN(caddr_t, (buf_dataptr(bp)+sect_off)), buffer, count); buffer += count; } else { - error = uiomove(bp->b_data+sect_off, count, uio); + error = uiomove(CAST_DOWN(caddr_t, (buf_dataptr(bp)+sect_off)), count, uio); } amount -= count; offset += count; @@ -1391,8 +1103,8 @@ cd9660_xa_read_common( * age the device block. This is optimized for sequential access. */ if (sect_off+count == CDXA_SECTOR_SIZE || offset == (off_t)ip->i_size) - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); } return error; @@ -1416,19 +1128,14 @@ cd9660_xa_read_common( * important. */ int -cd9660_xa_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +cd9660_xa_read(struct vnop_read_args *ap) { struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; register struct iso_node *ip = VTOI(vp); off_t offset = uio->uio_offset; - size_t size = uio->uio_resid; + // LP64todo - fix this! + size_t size = uio_resid(uio); /* Check for some obvious parameter problems */ if (offset < 0) @@ -1452,16 +1159,7 @@ cd9660_xa_read(ap) * cluster_pagein. Instead, we have to map the page and read into it. */ static int -cd9660_xa_pagein(ap) - struct vop_pagein_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - struct ucred *a_cred, - int a_flags - } */ *ap; +cd9660_xa_pagein(struct vnop_pagein_args *ap) { struct vnode *vp = ap->a_vp; upl_t pl = ap->a_pl; @@ -1518,45 +1216,27 @@ cd9660_xa_pagein(ap) * Global vfs data structures for isofs */ #define cd9660_create \ - ((int (*) __P((struct vop_create_args *)))err_create) -#define cd9660_mknod ((int (*) __P((struct vop_mknod_args *)))err_mknod) -#define cd9660_setattr \ - ((int (*) __P((struct vop_setattr_args *)))cd9660_enotsupp) -#define cd9660_write ((int (*) __P((struct vop_write_args *)))cd9660_enotsupp) -#if NFSSERVER -int lease_check __P((struct vop_lease_args *)); -#define cd9660_lease_check lease_check -#else -#define cd9660_lease_check ((int (*) __P((struct vop_lease_args *)))nullop) -#endif -#define cd9660_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) + ((int (*)(struct vnop_create_args *))err_create) +#define cd9660_mknod ((int (*)(struct vnop_mknod_args *))err_mknod) +#define cd9660_write ((int (*)(struct vnop_write_args *))cd9660_enotsupp) +#define cd9660_fsync ((int (*)(struct vnop_fsync_args *))nullop) #define cd9660_rename \ - ((int (*) __P((struct vop_rename_args *)))err_rename) + ((int (*)(struct vnop_rename_args *))err_rename) #define cd9660_copyfile \ - ((int (*) __P((struct vop_copyfile_args *)))err_copyfile) -#define cd9660_link ((int (*) __P((struct vop_link_args *)))err_link) -#define cd9660_mkdir ((int (*) __P((struct vop_mkdir_args *)))err_mkdir) + ((int (*)(struct vnop_copyfile_args *))err_copyfile) +#define cd9660_link ((int (*)(struct vnop_link_args *))err_link) +#define cd9660_mkdir ((int (*)(struct vnop_mkdir_args *))err_mkdir) #define cd9660_symlink \ - ((int (*) __P((struct vop_symlink_args *)))err_symlink) + ((int (*)(struct vnop_symlink_args *))err_symlink) #define cd9660_advlock \ - ((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp) -#define cd9660_valloc ((int(*) __P(( \ - struct vnode *pvp, \ - int mode, \ - struct ucred *cred, \ - struct vnode **vpp))) cd9660_enotsupp) -#define cd9660_vfree ((int (*) __P((struct vop_vfree_args *)))cd9660_enotsupp) -#define cd9660_truncate \ - ((int (*) __P((struct vop_truncate_args *)))cd9660_enotsupp) -#define cd9660_update \ - ((int (*) __P((struct vop_update_args *)))cd9660_enotsupp) + ((int (*)(struct vnop_advlock_args *))cd9660_enotsupp) #define cd9660_bwrite \ - ((int (*) __P((struct vop_bwrite_args *)))cd9660_enotsupp) + ((int (*)(struct vnop_bwrite_args *))cd9660_enotsupp) #define cd9660_pageout \ - ((int (*) __P((struct vop_pageout_args *)))cd9660_enotsupp) -int cd9660_blktooff(struct vop_blktooff_args *ap); -int cd9660_offtoblk(struct vop_offtoblk_args *ap); -int cd9660_cmap(struct vop_cmap_args *ap); + ((int (*)(struct vnop_pageout_args *))cd9660_enotsupp) +int cd9660_blktooff(struct vnop_blktooff_args *ap); +int cd9660_offtoblk(struct vnop_offtoblk_args *ap); +int cd9660_blockmap(struct vnop_blockmap_args *ap); #define VOPFUNC int (*)(void *) /* @@ -1564,55 +1244,40 @@ int cd9660_cmap(struct vop_cmap_args *ap); */ int (**cd9660_vnodeop_p)(void *); struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)cd9660_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)cd9660_open }, /* open */ - { &vop_close_desc, (VOPFUNC)cd9660_close }, /* close */ - { &vop_access_desc, (VOPFUNC)cd9660_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)cd9660_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)cd9660_read }, /* read */ - { &vop_write_desc, (VOPFUNC)cd9660_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)cd9660_lease_check },/* lease */ - { &vop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)cd9660_select }, /* select */ - { &vop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)cd9660_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)cd9660_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ - { &vop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ - { &vop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ - { &vop_abortop_desc, (VOPFUNC)nop_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)cd9660_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)cd9660_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)cd9660_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ - { &vop_print_desc, (VOPFUNC)cd9660_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)cd9660_islocked },/* islocked */ - { &vop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ - { &vop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)cd9660_blkatoff },/* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)cd9660_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)cd9660_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)cd9660_truncate },/* truncate */ - { &vop_update_desc, (VOPFUNC)cd9660_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vop_getattrlist_desc, (VOPFUNC)cd9660_getattrlist }, /* getattrlist */ - { &vop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)cd9660_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)cd9660_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)cd9660_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)cd9660_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ + { &vnop_read_desc, (VOPFUNC)cd9660_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)cd9660_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)cd9660_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)cd9660_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ + { &vnop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ + { &vnop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ + { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ + { &vnop_getattrlist_desc, (VOPFUNC)cd9660_getattrlist }, /* getattrlist */ + { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)cd9660_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc cd9660_vnodeop_opv_desc = @@ -1621,57 +1286,44 @@ struct vnodeopv_desc cd9660_vnodeop_opv_desc = /* * The VOP table for CD-ROM XA (media) files is almost the same * as for ordinary files, except for read, and pagein. - * Note that cd9660_xa_read doesn't use cluster I/O, so cmap + * Note that cd9660_xa_read doesn't use cluster I/O, so blockmap * isn't needed, and isn't implemented. Similarly, it doesn't - * do bread() on CD XA vnodes, so bmap, blktooff, offtoblk + * do buf_bread() on CD XA vnodes, so bmap, blktooff, offtoblk * aren't needed. */ int (**cd9660_cdxaop_p)(void *); struct vnodeopv_entry_desc cd9660_cdxaop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)cd9660_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)cd9660_open }, /* open */ - { &vop_close_desc, (VOPFUNC)cd9660_close }, /* close */ - { &vop_access_desc, (VOPFUNC)cd9660_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)cd9660_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)cd9660_xa_read }, /* read */ - { &vop_write_desc, (VOPFUNC)cd9660_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)cd9660_lease_check },/* lease */ - { &vop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)cd9660_select }, /* select */ - { &vop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)cd9660_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)cd9660_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ - { &vop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ - { &vop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ - { &vop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)cd9660_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)cd9660_unlock }, /* unlock */ - { &vop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ - { &vop_print_desc, (VOPFUNC)cd9660_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)cd9660_islocked },/* islocked */ - { &vop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ - { &vop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)cd9660_blkatoff },/* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)cd9660_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)cd9660_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)cd9660_truncate },/* truncate */ - { &vop_update_desc, (VOPFUNC)cd9660_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_pagein_desc, (VOPFUNC)cd9660_xa_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vop_getattrlist_desc, (VOPFUNC)cd9660_getattrlist }, /* getattrlist */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)cd9660_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)cd9660_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)cd9660_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ + { &vnop_read_desc, (VOPFUNC)cd9660_xa_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)cd9660_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)cd9660_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)cd9660_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ + { &vnop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ + { &vnop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ + { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)cd9660_xa_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ + { &vnop_getattrlist_desc, (VOPFUNC)cd9660_getattrlist }, /* getattrlist */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc cd9660_cdxaop_opv_desc = @@ -1682,54 +1334,39 @@ struct vnodeopv_desc cd9660_cdxaop_opv_desc = */ int (**cd9660_specop_p)(void *); struct vnodeopv_entry_desc cd9660_specop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)spec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)cd9660_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)cd9660_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)spec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)spec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)spec_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)spec_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)spec_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)spec_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)cd9660_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)cd9660_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)cd9660_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)cd9660_islocked },/* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)spec_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)spec_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)spec_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)spec_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)cd9660_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)cd9660_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ + { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)spec_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ + { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ + { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)cd9660_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc cd9660_specop_opv_desc = @@ -1738,52 +1375,37 @@ struct vnodeopv_desc cd9660_specop_opv_desc = #if FIFO int (**cd9660_fifoop_p)(void *); struct vnodeopv_entry_desc cd9660_fifoop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)fifo_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vop_close_desc, (VOPFUNC)fifo_close }, /* close */ - { &vop_access_desc, (VOPFUNC)cd9660_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)cd9660_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)fifo_read }, /* read */ - { &vop_write_desc, (VOPFUNC)fifo_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)fifo_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)fifo_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)fifo_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)fifo_link } , /* link */ - { &vop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)fifo_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)cd9660_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)cd9660_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)fifo_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)cd9660_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)cd9660_islocked },/* islocked */ - { &vop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)fifo_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)fifo_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)fifo_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)fifo_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)cd9660_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)fifo_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)fifo_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ + { &vnop_read_desc, (VOPFUNC)fifo_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)fifo_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)fifo_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)fifo_link } , /* link */ + { &vnop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ + { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc cd9660_fifoop_opv_desc = diff --git a/bsd/isofs/cd9660/iso.h b/bsd/isofs/cd9660/iso.h index b97e89154..683f9f0e1 100644 --- a/bsd/isofs/cd9660/iso.h +++ b/bsd/isofs/cd9660/iso.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -261,7 +261,6 @@ struct iso_mnt { int im_sector_size; int volume_space_size; - struct netexport im_export; char root[ISODCL (157, 190)]; int root_extent; @@ -291,7 +290,7 @@ struct iso_mnt { /* CD is Video CD (version < 2.0) */ #define IMF2_IS_VCD 0x00000002 -#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data)) +#define VFSTOISOFS(mp) ((struct iso_mnt *)(vfs_fsprivate(mp))) #define blkoff(imp, loc) ((loc) & (imp)->im_bmask) #define lblktosize(imp, blk) ((blk) << (imp)->im_bshift) @@ -302,23 +301,22 @@ struct iso_mnt { (off_t)(((off) / (imp)->im_sector_size) * (imp)->im_sector_size) -int cd9660_mount __P((struct mount *, - char *, caddr_t, struct nameidata *, struct proc *)); -int cd9660_start __P((struct mount *, int, struct proc *)); -int cd9660_unmount __P((struct mount *, int, struct proc *)); -int cd9660_root __P((struct mount *, struct vnode **)); -int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); -int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); -int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int cd9660_vget __P((struct mount *, void *, struct vnode **)); -int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int cd9660_vptofh __P((struct vnode *, struct fid *)); -int cd9660_init __P(()); +int cd9660_mount(struct mount *, vnode_t, user_addr_t, vfs_context_t); +int cd9660_start(struct mount *, int, vfs_context_t); +int cd9660_unmount(struct mount *, int, vfs_context_t); +int cd9660_root(struct mount *, struct vnode **, vfs_context_t); +int cd9660_statfs(struct mount *, struct vfsstatfs *, vfs_context_t); +int cd9660_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, vfs_context_t context); +int cd9660_sync(struct mount *, int, vfs_context_t); +int cd9660_vget(struct mount *, ino64_t, struct vnode **, vfs_context_t); +int cd9660_fhtovp(struct mount *, int, unsigned char *, struct vnode **, vfs_context_t); +int cd9660_vptofh(struct vnode *, int *, unsigned char *, vfs_context_t); +int cd9660_init(struct vfsconf *); +int cd9660_mountroot(mount_t, vnode_t, vfs_context_t); +int cd9660_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); -int cd9660_mountroot __P((void)); - -int cd9660_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); +int cd9660_vget_internal(mount_t, ino_t, vnode_t *, vnode_t, struct componentname *, + int, struct iso_directory_record *, proc_t); extern int (**cd9660_vnodeop_p)(void *); extern int (**cd9660_specop_p)(void *); @@ -328,15 +326,13 @@ extern int (**cd9660_fifoop_p)(void *); extern int (**cd9660_cdxaop_p)(void *); static __inline int -isonum_711(p) - u_char *p; +isonum_711(u_char *p) { return *p; } static __inline int -isonum_712(p) - char *p; +isonum_712(char *p) { return *p; } @@ -344,15 +340,13 @@ isonum_712(p) #ifndef UNALIGNED_ACCESS static __inline int -isonum_723(p) - u_char *p; +isonum_723(u_char *p) { return *p|(p[1] << 8); } static __inline int -isonum_733(p) - u_char *p; +isonum_733(u_char *p) { return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24); } @@ -362,15 +356,13 @@ isonum_733(p) #if BYTE_ORDER == LITTLE_ENDIAN static __inline int -isonum_723(p) - u_char *p +isonum_723(u_char *p) { return *(u_int16t *)p; } static __inline int -isonum_733(p) - u_char *p; +isonum_733(u_char *p) { return *(u_int32t *)p; } @@ -380,15 +372,13 @@ isonum_733(p) #if BYTE_ORDER == BIG_ENDIAN static __inline int -isonum_723(p) - u_char *p +isonum_723(u_char *p) { return *(u_int16t *)(p + 2); } static __inline int -isonum_733(p) - u_char *p; +isonum_733(u_char *p) { return *(u_int32t *)(p + 4); } @@ -397,14 +387,21 @@ isonum_733(p) #endif /* UNALIGNED_ACCESS */ -int isofncmp __P((u_char *, int, u_char *, int)); -int ucsfncmp __P((u_int16_t *, int, u_int16_t *, int)); -void isofntrans __P((u_char *, int, u_char *, u_short *, int, int)); -void ucsfntrans __P((u_int16_t *, int, u_char *, u_short *, int, int)); -ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *)); -int attrcalcsize __P((struct attrlist *attrlist)); -void packattrblk __P((struct attrlist *alist, struct vnode *vp, - void **attrbufptrptr, void **varbufptrptr)); +int isofncmp(u_char *fn, int fnlen, u_char *isofn, int isolen); +int ucsfncmp(u_int16_t *, int, u_int16_t *, int); +void isofntrans(u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen, + int original, int assoc); +void ucsfntrans(u_int16_t *, int, u_char *, u_short *, int, int); +int attrcalcsize(struct attrlist *attrlist); +struct iso_node; +void packcommonattr(struct attrlist *alist, struct iso_node *ip, + void **attrbufptrptr, void **varbufptrptr); +void packdirattr(struct attrlist *alist, struct iso_node *ip, + void **attrbufptrptr, void **varbufptrptr); +void packfileattr(struct attrlist *alist, struct iso_node *ip, + void **attrbufptrptr, void **varbufptrptr); +void packattrblk(struct attrlist *alist, struct vnode *vp, + void **attrbufptrptr, void **varbufptrptr); /* diff --git a/bsd/isofs/cd9660/iso_rrip.h b/bsd/isofs/cd9660/iso_rrip.h index 5081d1bdc..cfc5f1397 100644 --- a/bsd/isofs/cd9660/iso_rrip.h +++ b/bsd/isofs/cd9660/iso_rrip.h @@ -99,15 +99,15 @@ typedef struct { int cont; /* continuation of above */ } ISO_RRIP_ANALYZE; -int cd9660_rrip_analyze __P((struct iso_directory_record *isodir, - struct iso_node *inop, struct iso_mnt *imp)); -int cd9660_rrip_getname __P((struct iso_directory_record *isodir, +int cd9660_rrip_analyze(struct iso_directory_record *isodir, + struct iso_node *inop, struct iso_mnt *imp); +int cd9660_rrip_getname(struct iso_directory_record *isodir, char *outbuf, u_short *outlen, - ino_t *inump, struct iso_mnt *imp)); -int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir, + ino_t *inump, struct iso_mnt *imp); +int cd9660_rrip_getsymname(struct iso_directory_record *isodir, char *outbuf, u_short *outlen, - struct iso_mnt *imp)); -int cd9660_rrip_offset __P((struct iso_directory_record *isodir, - struct iso_mnt *imp)); + struct iso_mnt *imp); +int cd9660_rrip_offset(struct iso_directory_record *isodir, + struct iso_mnt *imp); #endif /* __APPLE_API_PRIVATE */ #endif /* __ISOFS_CD9660_ISO_RRIP_H__ */ diff --git a/bsd/kern/ast.h b/bsd/kern/ast.h index f2b6aa07e..d320003a9 100644 --- a/bsd/kern/ast.h +++ b/bsd/kern/ast.h @@ -28,18 +28,10 @@ #ifndef _KERN_AST_H_ #define _KERN_AST_H_ -#include - -#ifdef BSD_USE_APC - -extern thread_apc_handler_t bsd_ast; - -#else /* !BSD_USE_APC */ +#include extern void astbsd_on(void); -extern void act_set_astbsd(thread_act_t); -extern void bsd_ast(thread_act_t); - -#endif /* !BSD_USE_APC */ +extern void act_set_astbsd(thread_t); +extern void bsd_ast(thread_t); #endif /* _KERN_AST_H_ */ diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 2864a6cfd..a0d66765f 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,23 +70,25 @@ #include #include #include -#include -#include +#include +#include +#include #include -#include +#include #include -#include +#include #include #include #include #include +#include #include #include #include -#include +#include #include #include #include @@ -113,6 +115,8 @@ #include #include +#include + extern int app_profile; /* on/off switch for pre-heat cache */ char copyright[] = @@ -126,7 +130,6 @@ extern void ux_handler(); struct proc proc0; struct session session0; struct pgrp pgrp0; -struct pcred cred0; struct filedesc filedesc0; struct plimit limit0; struct pstats pstats0; @@ -138,6 +141,7 @@ long tk_nin; long tk_nout; long tk_rawcc; +int lock_trace = 0; /* Global variables to make pstat happy. We do swapping differently */ int nswdev, nswap; int nswapmap; @@ -153,11 +157,10 @@ int hostnamelen; char domainname[MAXDOMNAMELEN]; int domainnamelen; char classichandler[32] = {0}; -long classichandler_fsid = -1L; +uint32_t classichandler_fsid = -1L; long classichandler_fileid = -1L; char rootdevice[16]; /* hfs device names have at least 9 chars */ -struct timeval boottime; /* GRODY! This has to go... */ #ifdef KMEMSTATS struct kmemstats kmemstats[M_LAST]; @@ -179,11 +182,17 @@ extern int bsd_hardclockinit; extern task_t bsd_init_task; extern char init_task_failure_data[]; extern void time_zone_slock_init(void); +static void process_name(char *, struct proc *); + +static void setconf(void); funnel_t *kernel_flock; -funnel_t *network_flock; -int disable_funnel = 0; /* disables split funnel */ -int enable_funnel = 0; /* disables split funnel */ + +extern void sysv_shm_lock_init(void); +extern void sysv_sem_lock_init(void); +extern void sysv_msg_lock_init(void); +extern void pshm_lock_init(); +extern void psem_lock_init(); /* * Initialization code. @@ -200,8 +209,8 @@ int enable_funnel = 0; /* disables split funnel */ /* * Sets the name for the given task. */ -void -proc_name(s, p) +static void +process_name(s, p) char *s; struct proc *p; { @@ -218,31 +227,47 @@ struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ }; struct rlimit vm_initial_limit_data = { DFLDSIZ, MAXDSIZ }; struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ }; -extern thread_t first_thread; -extern thread_act_t cloneproc(struct proc *, int); -extern int (*mountroot) __P((void)); +extern thread_t cloneproc(struct proc *, int); +extern int (*mountroot)(void); extern int netboot_mountroot(); /* netboot.c */ extern int netboot_setup(struct proc * p); +lck_grp_t * proc_lck_grp; +lck_grp_attr_t * proc_lck_grp_attr; +lck_attr_t * proc_lck_attr; + /* hook called after root is mounted XXX temporary hack */ void (*mountroot_post_hook)(void); +/* + * This function is called very early on in the Mach startup, from the + * function start_kernel_threads() in osfmk/kern/startup.c. It's called + * in the context of the current (startup) task using a call to the + * function kernel_thread_create() to jump into start_kernel_threads(). + * Internally, kernel_thread_create() calls thread_create_internal(), + * which calls uthread_alloc(). The function of uthread_alloc() is + * normally to allocate a uthread structure, and fill out the uu_sigmask, + * uu_act, and uu_ucred fields. It skips filling these out in the case + * of the "task" being "kernel_task", because the order of operation is + * inverted. To account for that, we need to manually fill in at least + * the uu_cred field so that the uthread structure can be used like any + * other. + */ void bsd_init() { register struct proc *p; - extern struct ucred *rootcred; + struct uthread *ut; + extern kauth_cred_t rootcred; register int i; int s; thread_t th; + struct vfs_context context; void lightning_bolt(void ); kern_return_t ret; boolean_t funnel_state; - extern void uthread_zone_init(); - - - /* split funnel is enabled by default */ - PE_parse_boot_arg("dfnl", &disable_funnel); + struct ucred temp_cred; + extern void file_lock_init(void); kernel_flock = funnel_alloc(KERNEL_FUNNEL); if (kernel_flock == (funnel_t *)0 ) { @@ -251,29 +276,19 @@ bsd_init() funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (!disable_funnel) { - network_flock = funnel_alloc(NETWORK_FUNNEL); - if (network_flock == (funnel_t *)0 ) { - panic("bsd_init: Failed to allocate network funnel"); - } - } else { - network_flock = kernel_flock; - } - printf(copyright); - + kmeminit(); parse_bsd_args(); - bsd_bufferinit(); - /* Initialize the uthread zone */ - uthread_zone_init(); + //uthread_zone_init(); /* XXX redundant: previous uthread_alloc() */ - /* - * Initialize process and pgrp structures. - */ + /* Initialize kauth subsystem before instancing the first credential */ + kauth_init(); + + /* Initialize process and pgrp structures. */ procinit(); kernproc = &proc0; @@ -285,12 +300,29 @@ bsd_init() p->p_pid = 0; /* give kernproc a name */ - proc_name("kernel_task", p); + process_name("kernel_task", p); + + + /* allocate proc lock group attribute and group */ + proc_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(proc_lck_grp_attr); + + proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); + + + /* Allocate proc lock attribute */ + proc_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(proc_lck_attr); + + lck_mtx_init(&p->p_mlock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&p->p_fdmlock, proc_lck_grp, proc_lck_attr); if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); + ut = (uthread_t)get_bsdthread_info(current_thread()); + /* * Create process 0. */ @@ -307,7 +339,7 @@ bsd_init() p->task = kernel_task; p->p_stat = SRUN; - p->p_flag = P_INMEM|P_SYSTEM; + p->p_flag = P_SYSTEM; p->p_nice = NZERO; p->p_pptr = p; lockinit(&p->signal_lock, PVM, "signal", 0, 0); @@ -316,20 +348,26 @@ bsd_init() p->sigwait_thread = THREAD_NULL; p->exit_thread = THREAD_NULL; - /* Create credentials. */ - lockinit(&cred0.pc_lock, PLOCK, "proc0 cred", 0, 0); - cred0.p_refcnt = 1; - p->p_cred = &cred0; - p->p_ucred = crget(); - p->p_ucred->cr_ngroups = 1; /* group 0 */ + /* + * Create credential. This also Initializes the audit information. + * XXX It is not clear what the initial values should be for audit ID, + * XXX session ID, etc.. + */ + bzero(&temp_cred, sizeof(temp_cred)); + temp_cred.cr_ngroups = 1; + + p->p_ucred = kauth_cred_create(&temp_cred); + + /* give the (already exisiting) initial thread a reference on it */ + kauth_cred_ref(p->p_ucred); + ut->uu_ucred = p->p_ucred; TAILQ_INIT(&p->aio_activeq); TAILQ_INIT(&p->aio_doneq); p->aio_active_count = 0; p->aio_done_count = 0; - /* Set the audit info for this process */ - audit_proc_init(p); + file_lock_init(); /* Create the file descriptor table. */ filedesc0.fd_refcnt = 1+1; /* +1 so shutdown will not _FREE_ZONE */ @@ -357,7 +395,7 @@ bsd_init() p->p_sigacts = &sigacts0; /* - * Charge root for one process. + * Charge root for two processes: init and mach_init. */ (void)chgproccnt(0, 1); @@ -372,12 +410,21 @@ bsd_init() &min, (vm_size_t)BSD_PAGABLE_MAP_SIZE, TRUE, - TRUE, + VM_FLAGS_ANYWHERE, &bsd_pageable_map); if (ret != KERN_SUCCESS) panic("bsd_init: Failed to allocate bsd pageable map"); } + /* + * Initialize buffers and hash links for buffers + * + * SIDE EFFECT: Starts a thread for bcleanbuf_thread(), so must + * happen after a credential has been associated with + * the kernel task. + */ + bsd_bufferinit(); + /* Initialize the execve() semaphore */ ret = semaphore_create(kernel_task, &execve_semaphore, SYNC_POLICY_FIFO, (BSD_PAGABLE_MAP_SIZE / NCARGS)); @@ -397,9 +444,6 @@ bsd_init() /* Initialize mbuf's. */ mbinit(); - /* Initialize syslog */ - log_init(); - /* * Initializes security event auditing. * XXX: Should/could this occur later? @@ -412,6 +456,18 @@ bsd_init() /* Initialize for async IO */ aio_init(); + /* Initialize pipes */ + pipeinit(); + + /* Initialize SysV shm subsystem locks; the subsystem proper is + * initialized through a sysctl. + */ + sysv_shm_lock_init(); + sysv_sem_lock_init(); + sysv_msg_lock_init(); + pshm_lock_init(); + psem_lock_init(); + /* POSIX Shm and Sem */ pshm_cache_init(); psem_cache_init(); @@ -421,13 +477,12 @@ bsd_init() * Initialize protocols. Block reception of incoming packets * until everything is ready. */ - s = splimp(); sysctl_register_fixed(); sysctl_mib_init(); dlil_init(); + proto_kpi_init(); socketinit(); domaininit(); - splx(s); p->p_fd->fd_cdir = NULL; p->p_fd->fd_rdir = NULL; @@ -456,42 +511,53 @@ bsd_init() /* Register the built-in dlil ethernet interface family */ ether_family_init(); + /* Call any kext code that wants to run just after network init */ + net_init_run(); + vnode_pager_bootstrap(); +#if 0 + /* XXX Hack for early debug stop */ + printf("\nabout to sleep for 10 seconds\n"); + IOSleep( 10 * 1000 ); + /* Debugger("hello"); */ +#endif + + inittodr(0); /* Mount the root file system. */ while( TRUE) { int err; setconf(); - /* - * read the time after clock_initialize_calendar() - * and before nfs mount - */ - microtime((struct timeval *)&time); - bsd_hardclockinit = -1; /* start ticking */ if (0 == (err = vfs_mountroot())) break; +#if NFSCLIENT if (mountroot == netboot_mountroot) { printf("cannot mount network root, errno = %d\n", err); mountroot = NULL; if (0 == (err = vfs_mountroot())) break; } +#endif printf("cannot mount root, errno = %d\n", err); boothowto |= RB_ASKNAME; } - mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; + mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ - if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) + if (VFS_ROOT(mountlist.tqh_first, &rootvnode, &context)) panic("bsd_init: cannot find root vnode"); - VREF(rootvnode); + rootvnode->v_flag |= VROOT; + (void)vnode_ref(rootvnode); + (void)vnode_put(rootvnode); filedesc0.fd_cdir = rootvnode; - VOP_UNLOCK(rootvnode, 0, p); +#if NFSCLIENT if (mountroot == netboot_mountroot) { int err; /* post mount setup */ @@ -499,14 +565,10 @@ bsd_init() panic("bsd_init: NetBoot could not find root, %d", err); } } +#endif - /* - * Now can look at time, having had a chance to verify the time - * from the file system. Reset p->p_rtime as it may have been - * munched in mi_switch() after the time got set. - */ - p->p_stats->p_start = boottime = time; + microtime(&p->p_stats->p_start); p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; #if DEVFS @@ -536,14 +598,14 @@ bsdinit_task(void) struct proc *p = current_proc(); struct uthread *ut; kern_return_t kr; - thread_act_t th_act; + thread_t th_act; shared_region_mapping_t system_region; - proc_name("init", p); + process_name("init", p); ux_handler_init(); - th_act = current_act(); + th_act = current_thread(); (void) host_set_exception_ports(host_priv_self(), EXC_MASK_ALL & ~(EXC_MASK_SYSCALL | EXC_MASK_MACH_SYSCALL | @@ -567,17 +629,16 @@ bsdinit_task(void) bsd_hardclockinit = 1; /* Start bsd hardclock */ bsd_init_task = get_threadtask(th_act); init_task_failure_data[0] = 0; - system_region = lookup_default_shared_region(ENV_DEFAULT_ROOT, - machine_slot[cpu_number()].cpu_type); + system_region = lookup_default_shared_region(ENV_DEFAULT_ROOT, cpu_type()); if (system_region == NULL) { - shared_file_boot_time_init(ENV_DEFAULT_ROOT, - machine_slot[cpu_number()].cpu_type); + shared_file_boot_time_init(ENV_DEFAULT_ROOT, cpu_type()); } else { vm_set_shared_region(get_threadtask(th_act), system_region); } load_init_program(p); /* turn on app-profiling i.e. pre-heating */ app_profile = 1; + lock_trace = 1; } void @@ -617,7 +678,8 @@ bsd_autoconf() #include /* for MAXPARTITIONS */ -setconf() +static void +setconf(void) { extern kern_return_t IOFindBSDRoot( char * rootName, dev_t * root, u_int32_t * flags ); @@ -640,25 +702,29 @@ setconf() flags = 0; } +#if NFSCLIENT if( flags & 1 ) { /* network device */ mountroot = netboot_mountroot; } else { +#endif /* otherwise have vfs determine root filesystem */ mountroot = NULL; +#if NFSCLIENT } +#endif } bsd_utaskbootstrap() { - thread_act_t th_act; + thread_t th_act; struct uthread *ut; th_act = cloneproc(kernproc, 0); initproc = pfind(1); /* Set the launch time for init */ - initproc->p_stats->p_start = time; + microtime(&initproc->p_stats->p_start); ut = (struct uthread *)get_bsdthread_info(th_act); ut->uu_sigmask = 0; @@ -733,56 +799,10 @@ parse_bsd_args() return 0; } -boolean_t -thread_funnel_switch( - int oldfnl, - int newfnl) +#if !NFSCLIENT +int +netboot_root(void) { - boolean_t funnel_state_prev; - int curfnl; - funnel_t * curflock; - funnel_t * oldflock; - funnel_t * newflock; - funnel_t * exist_funnel; - extern int disable_funnel; - - - if (disable_funnel) - return(TRUE); - - if(oldfnl == newfnl) { - panic("thread_funnel_switch: can't switch to same funnel"); - } - - if ((oldfnl != NETWORK_FUNNEL) && (oldfnl != KERNEL_FUNNEL)) { - panic("thread_funnel_switch: invalid oldfunnel"); - } - if ((newfnl != NETWORK_FUNNEL) && (newfnl != KERNEL_FUNNEL)) { - panic("thread_funnel_switch: invalid newfunnel"); - } - - if((curflock = thread_funnel_get()) == THR_FUNNEL_NULL) { - panic("thread_funnel_switch: no funnel held"); - } - - if ((oldfnl == NETWORK_FUNNEL) && (curflock != network_flock)) - panic("thread_funnel_switch: network funnel not held"); - - if ((oldfnl == KERNEL_FUNNEL) && (curflock != kernel_flock)) - panic("thread_funnel_switch: kernel funnel not held"); - - if(oldfnl == NETWORK_FUNNEL) { - oldflock = network_flock; - newflock = kernel_flock; - } else { - oldflock = kernel_flock; - newflock = network_flock; - } - KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE, oldflock, 1, 0, 0, 0); - thread_funnel_set(oldflock, FALSE); - KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE, newflock, 1, 0, 0, 0); - thread_funnel_set(newflock, TRUE); - KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE, newflock, 1, 0, 0, 0); - - return(TRUE); + return(0); } +#endif diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index 0597434fa..9a0c06054 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -28,12 +28,14 @@ #include #include #include +#include #include /* for SET */ #include /* Just to satisfy pstat command */ int dmmin, dmmax, dmtext; +vm_offset_t kmem_mb_alloc(vm_map_t mbmap, int size) { vm_offset_t addr; @@ -46,7 +48,13 @@ kmem_mb_alloc(vm_map_t mbmap, int size) } -pcb_synch() {} +/* + * XXX this function only exists to be exported and do nothing. + */ +void +pcb_synch(void) +{ +} struct proc * current_proc(void) @@ -54,10 +62,10 @@ current_proc(void) /* Never returns a NULL */ struct uthread * ut; struct proc *p; - thread_act_t thr_act = current_act(); + thread_t thr_act = current_thread(); ut = (struct uthread *)get_bsdthread_info(thr_act); - if (ut && (ut->uu_flag & P_VFORK) && ut->uu_proc) { + if (ut && (ut->uu_flag & UT_VFORK) && ut->uu_proc) { p = ut->uu_proc; if ((p->p_flag & P_INVFORK) == 0) panic("returning child proc not under vfork"); diff --git a/bsd/kern/init_sysent.c b/bsd/kern/init_sysent.c index 431063091..d0ea18c24 100644 --- a/bsd/kern/init_sysent.c +++ b/bsd/kern/init_sysent.c @@ -1,851 +1,463 @@ /* - * Copyright (c) 1995-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * @APPLE_LICENSE_HEADER_END@ * - * @APPLE_LICENSE_HEADER_END@ + * + * System call switch table. + * + * DO NOT EDIT-- this file is automatically generated. + * created from syscalls.master */ #include #include #include -#include -#include - -/* serial or parallel system call */ -#define syss(fn,no) {no, 0, KERNEL_FUNNEL, fn} -#define sysp(fn,no) {no, 1, KERNEL_FUNNEL, fn} -#define sysnets(fn,no) {no, 0, NETWORK_FUNNEL, fn} -#define sysnetp(fn,no) {no, 1, NETWORK_FUNNEL, fn} -#define sysnofnl(fn,no) {no, 0, NO_FUNNEL, fn} - -/* - * definitions - */ -int nosys(); -int exit(); -int fork(); -int read(); -int write(); -int open(); -int close(); -int wait4(); -int link(); -int unlink(); -int chdir(); -int fchdir(); -int mknod(); -int chmod(); -int chown(); -int obreak(); -int getfsstat(); -#if COMPAT_GETFSSTAT -int ogetfsstat(); -#endif -int getpid(); -int mount(); -int unmount(); -int setuid(); -int getuid(); -int geteuid(); -int ptrace(); -int recvmsg(); -int sendmsg(); -int recvfrom(); -int accept(); -int getpeername(); -int getsockname(); -int access(); -int chflags(); -int fchflags(); -int sync(); -int kill(); -int getppid(); -int dup(); -int pipe(); -int getegid(); -int profil(); -int load_shared_file(); -int reset_shared_file(); -int new_system_shared_regions(); -int ktrace(); -int sigaction(); -int getgid(); -int sigprocmask(); -int getlogin(); -int setlogin(); -int acct(); -int sigpending(); -int sigaltstack(); -int ioctl(); -int reboot(); -int revoke(); -int symlink(); -int readlink(); -int execve(); -int umask(); -int chroot(); -int msync(); -int vfork(); -int sbrk(); -int sstk(); -int ovadvise(); -int munmap(); -int mprotect(); -int madvise(); -int mincore(); -int getgroups(); -int setgroups(); -int getpgrp(); -int setpgid(); -int setitimer(); -int swapon(); -int getitimer(); -int getdtablesize(); -int dup2(); -int fcntl(); -int select(); -int fsync(); -int setpriority(); -int socket(); -int connect(); -int getpriority(); +#include +#include +#include #ifdef __ppc__ -int osigreturn(); -#endif -int sigreturn(); -int bind(); -int setsockopt(); -int listen(); -int sigsuspend(); -#if TRACE -int vtrace(); +#define AC(name) (sizeof(struct name) / sizeof(uint64_t)) #else +#define AC(name) (sizeof(struct name) / sizeof(register_t)) #endif -int gettimeofday(); -#ifdef __ppc__ -int ppc_gettimeofday(); -#endif -int getrusage(); -int getsockopt(); -int readv(); -int writev(); -int settimeofday(); -int fchown(); -int fchmod(); -int rename(); -int flock(); -int mkfifo(); -int sendto(); -int shutdown(); -int socketpair(); -int mkdir(); -int rmdir(); -int utimes(); -int futimes(); -int adjtime(); -int setsid(); -int quotactl(); -int nfssvc(); -int statfs(); -int fstatfs(); -int getfh(); -int setgid(); -int setegid(); -int seteuid(); -int stat(); -int fstat(); -int lstat(); -int pathconf(); -int fpathconf(); -int getrlimit(); -int setrlimit(); -int getdirentries(); -int mmap(); -int nosys(); -int lseek(); -int truncate(); -int ftruncate(); -int __sysctl(); -int undelete(); -int setprivexec(); -int add_profil(); - -int kdebug_trace(); - -int mlock(); -int munlock(); -int minherit(); -int mlockall(); -int munlockall(); -#if COMPAT_43 -#define compat(name,n) syss(__CONCAT(o,name),n) -#define compatp(name,n) sysp(__CONCAT(o,name),n) -#define comaptnet(name,n) sysnets(__CONCAT(o,name),n) -#define comaptnetp(name,n) sysnetp(__CONCAT(o,name),n) - -int ocreat(); -int olseek(); -int ostat(); -int olstat(); -int ofstat(); -int ogetkerninfo(); -int osmmap(); -int ogetpagesize(); -int ommap(); -int owait(); -int ogethostname(); -int osethostname(); -int oaccept(); -int osend(); -int orecv(); -int osigvec(); -int osigblock(); -int osigsetmask(); -int osigstack(); -int orecvmsg(); -int osendmsg(); -int orecvfrom(); -int osetreuid(); -int osetregid(); -int otruncate(); -int oftruncate(); -int ogetpeername(); -int ogethostid(); -int osethostid(); -int ogetrlimit(); -int osetrlimit(); -int okillpg(); -int oquota(); -int ogetsockname(); -int ogetdomainname(); -int osetdomainname(); -int owait3(); -int ogetdirentries(); -#if NETAT -int ATsocket(); -int ATgetmsg(); -int ATputmsg(); -int ATPsndreq(); -int ATPsndrsp(); -int ATPgetreq(); -int ATPgetrsp(); -#endif /* NETAT */ - -/* Calls for supporting HFS Semantics */ - -int mkcomplex(); -int statv(); -int lstatv(); -int fstatv(); -int getattrlist(); -int setattrlist(); -int getdirentriesattr(); -int exchangedata(); -int checkuseraccess(); -int searchfs(); -int delete(); -int copyfile(); - -/* end of HFS calls */ - -#else /* COMPAT_43 */ -#define compat(n, name) syss(nosys,0) -#define compatp(n, name) sysp(nosys,0) -#define comaptnet(n, name) sysnets(nosys,0) -#define comaptnetp(n, name) sysnetp(nosys,0) -#endif /* COMPAT_43 */ - -int watchevent(); -int waitevent(); -int modwatch(); -int fsctl(); -int semsys(); -int msgsys(); -int shmsys(); -int semctl(); -int semget(); -int semop(); -int semconfig(); -int msgctl(); -int msgget(); -int msgsnd(); -int msgrcv(); -int shmat(); -int shmctl(); -int shmdt(); -int shmget(); -int shm_open(); -int shm_unlink(); -int sem_open(); -int sem_close(); -int sem_unlink(); -int sem_wait(); -int sem_trywait(); -int sem_post(); -int sem_getvalue(); -int sem_init(); -int sem_destroy(); - -int fmod_watch_enable(); -int fmod_watch(); - -int issetugid(); -int utrace(); -int pread(); -int pwrite(); -int getsid(); -int getpgid(); - -int __pthread_kill(); -int sigwait(); -int pthread_sigmask(); -int __disable_threadsignal(); - -int nfsclnt(); -int fhopen(); - -int aio_cancel(); -int aio_error(); -int aio_fsync(); -int aio_read(); -int aio_return(); -int aio_suspend(); -int aio_write(); -int lio_listio(); - -int kqueue(); -int kqueue_portset_np(); -int kqueue_from_portset_np(); -int kevent(); - -int audit(); -int auditon(); -int getauid(); -int setauid(); -int getaudit(); -int setaudit(); -int getaudit_addr(); -int setaudit_addr(); -int auditctl(); - -/* - * System call switch table. - */ - -/* - * N.B. - * The argument count numbers in this table are actually - * the number of UInt32 words that comprise the arguments - * not the number of arguments - * - * This value is not currently used on PPC but Intel Darwin - * does use it and will not work correctly if the values - * are wrong - */ -struct sysent sysent[] = { - syss(nosys,0), /* 0 = indir */ - syss(exit,1), /* 1 = exit */ - syss(fork,0), /* 2 = fork */ - sysp(read,3), /* 3 = read */ - sysp(write,3), /* 4 = write */ - syss(open,3), /* 5 = open */ - syss(close,1), /* 6 = close */ - syss(wait4, 4), /* 7 = wait4 */ - compat(creat,2), /* 8 = old creat */ - syss(link,2), /* 9 = link */ - syss(unlink,1), /* 10 = unlink */ - syss(nosys, 0), /* 11 was obsolete execv */ - syss(chdir,1), /* 12 = chdir */ - syss(fchdir,1), /* 13 = fchdir */ - syss(mknod,3), /* 14 = mknod */ - syss(chmod,2), /* 15 = chmod */ - syss(chown,3), /* 16 = chown; now 3 args */ - syss(obreak,1), /* 17 = old break */ +/* The casts are bogus but will do for now. */ +__private_extern__ struct sysent sysent[] = { + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 0 = nosys indirect syscall */ + {AC(exit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)exit, munge_w, munge_d, _SYSCALL_RET_NONE}, /* 1 = exit */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)fork, NULL, NULL, _SYSCALL_RET_INT_T}, /* 2 = fork */ + {AC(read_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)read, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T}, /* 3 = read */ + {AC(write_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)write, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T}, /* 4 = write */ + {AC(open_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)open, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 5 = open */ + {AC(close_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)close, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 6 = close */ + {AC(wait4_args), _SYSCALL_CANCEL_PRE, KERNEL_FUNNEL, (sy_call_t *)wait4, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 7 = wait4 */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 8 = nosys old creat */ + {AC(link_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)link, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 9 = link */ + {AC(unlink_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)unlink, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 10 = unlink */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 11 = nosys old execv */ + {AC(chdir_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)chdir, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 12 = chdir */ + {AC(fchdir_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fchdir, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 13 = fchdir */ + {AC(mknod_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mknod, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 14 = mknod */ + {AC(chmod_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)chmod, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 15 = chmod */ + {AC(chown_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)chown, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 16 = chown */ + {AC(obreak_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)obreak, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 17 = obreak old break */ #if COMPAT_GETFSSTAT - syss(ogetfsstat, 3), /* 18 = ogetfsstat */ -#else - syss(getfsstat, 3), /* 18 = getfsstat */ -#endif - compat(lseek,3), /* 19 = old lseek */ - sysp(getpid,0), /* 20 = getpid */ - syss(nosys, 0), /* 21 was obsolete mount */ - syss(nosys, 0), /* 22 was obsolete umount */ - syss(setuid,1), /* 23 = setuid */ - sysp(getuid,0), /* 24 = getuid */ - sysp(geteuid,0), /* 25 = geteuid */ - syss(ptrace,4), /* 26 = ptrace */ - sysnets(recvmsg,3), /* 27 = recvmsg */ - sysnets(sendmsg,3), /* 28 = sendmsg */ - sysnets(recvfrom,6), /* 29 = recvfrom */ - sysnets(accept,3), /* 30 = accept */ - sysnets(getpeername,3), /* 31 = getpeername */ - sysnets(getsockname,3), /* 32 = getsockname */ - syss(access,2), /* 33 = access */ - syss(chflags,2), /* 34 = chflags */ - syss(fchflags,2), /* 35 = fchflags */ - syss(sync,0), /* 36 = sync */ - syss(kill,2), /* 37 = kill */ - compat(stat,2), /* 38 = old stat */ - sysp(getppid,0), /* 39 = getppid */ - compat(lstat,2), /* 40 = old lstat */ - syss(dup,1), /* 41 = dup */ - syss(pipe,0), /* 42 = pipe */ - sysp(getegid,0), /* 43 = getegid */ - syss(profil,4), /* 44 = profil */ - syss(ktrace,4), /* 45 = ktrace */ - syss(sigaction,3), /* 46 = sigaction */ - sysp(getgid,0), /* 47 = getgid */ - syss(sigprocmask,3), /* 48 = sigprocmask */ - syss(getlogin,2), /* 49 = getlogin */ - syss(setlogin,1), /* 50 = setlogin */ - syss(acct,1), /* 51 = turn acct off/on */ - syss(sigpending,1), /* 52 = sigpending */ - syss(sigaltstack,2), /* 53 = sigaltstack */ - syss(ioctl,3), /* 54 = ioctl */ - syss(reboot,2), /* 55 = reboot */ - syss(revoke,1), /* 56 = revoke */ - syss(symlink,2), /* 57 = symlink */ - syss(readlink,3), /* 58 = readlink */ - syss(execve,3), /* 59 = execve */ - syss(umask,1), /* 60 = umask */ - syss(chroot,1), /* 61 = chroot */ - compat(fstat,2), /* 62 = old fstat */ - syss(nosys,0), /* 63 = used internally, reserved */ - compat(getpagesize,0), /* 64 = old getpagesize */ - syss(msync,3), /* 65 = msync */ - syss(vfork,0), /* 66 = vfork */ - syss(nosys,0), /* 67 was obsolete vread */ - syss(nosys,0), /* 68 was obsolete vwrite */ - syss(sbrk,1), /* 69 = sbrk */ - syss(sstk,1), /* 70 = sstk */ - compat(smmap,6), /* 71 = old mmap */ - syss(ovadvise,1), /* 72 = old vadvise */ - sysnofnl(munmap,2), /* 73 = munmap */ - syss(mprotect,3), /* 74 = mprotect */ - syss(madvise,3), /* 75 = madvise */ - syss(nosys,0), /* 76 was obsolete vhangup */ - syss(nosys,0), /* 77 was obsolete vlimit */ - syss(mincore,3), /* 78 = mincore */ - sysp(getgroups,2), /* 79 = getgroups */ - sysp(setgroups,2), /* 80 = setgroups */ - sysp(getpgrp,0), /* 81 = getpgrp */ - sysp(setpgid,2), /* 82 = setpgid */ - syss(setitimer,3), /* 83 = setitimer */ - compat(wait,1), /* 84 = old wait */ - syss(swapon,1), /* 85 = swapon */ - syss(getitimer,2), /* 86 = getitimer */ - compat(gethostname,2), /* 87 = old gethostname */ - compat(sethostname,2), /* 88 = old sethostname */ - sysp(getdtablesize, 0), /* 89 getdtablesize */ - syss(dup2,2), /* 90 = dup2 */ - syss(nosys,0), /* 91 was obsolete getdopt */ - syss(fcntl,3), /* 92 = fcntl */ - syss(select,5), /* 93 = select */ - syss(nosys,0), /* 94 was obsolete setdopt */ - syss(fsync,1), /* 95 = fsync */ - sysp(setpriority,3), /* 96 = setpriority */ - sysnets(socket,3), /* 97 = socket */ - sysnets(connect,3), /* 98 = connect */ - comaptnet(accept,3), /* 99 = accept */ - sysp(getpriority,2), /* 100 = getpriority */ - comaptnet(send,4), /* 101 = old send */ - comaptnet(recv,4), /* 102 = old recv */ -#ifdef __ppc__ - syss(osigreturn,1), /* 103 = sigreturn ; compat for jaguar*/ + {AC(ogetfsstat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ogetfsstat, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 18 = ogetfsstat */ #else - syss(sigreturn,1), /* 103 = sigreturn */ + {AC(getfsstat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getfsstat, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 18 = getfsstat */ #endif - sysnets(bind,3), /* 104 = bind */ - sysnets(setsockopt,5), /* 105 = setsockopt */ - sysnets(listen,2), /* 106 = listen */ - syss(nosys,0), /* 107 was vtimes */ - compat(sigvec,3), /* 108 = sigvec */ - compat(sigblock,1), /* 109 = sigblock */ - compat(sigsetmask,1), /* 110 = sigsetmask */ - syss(sigsuspend,1), /* 111 = sigpause */ - compat(sigstack,2), /* 112 = sigstack */ - comaptnet(recvmsg,3), /* 113 = recvmsg */ - comaptnet(sendmsg,3), /* 114 = sendmsg */ - syss(nosys,0), /* 115 = old vtrace */ - -/* - * N.B. - * The argument count numbers in this table are actually - * the number of UInt32 words that comprise the arguments - * not the number of arguments - * - * This value is not currently used on PPC but Intel Darwin - * does use it and will not work correctly if the values - * are wrong - */ - + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 19 = nosys old lseek */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getpid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 20 = getpid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 21 = nosys old mount */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 22 = nosys old umount */ + {AC(setuid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setuid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 23 = setuid */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getuid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 24 = getuid */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)geteuid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 25 = geteuid */ + {AC(ptrace_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)ptrace, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 26 = ptrace */ + {AC(recvmsg_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)recvmsg, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 27 = recvmsg */ + {AC(sendmsg_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)sendmsg, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 28 = sendmsg */ + {AC(recvfrom_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)recvfrom, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 29 = recvfrom */ + {AC(accept_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)accept, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 30 = accept */ + {AC(getpeername_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getpeername, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 31 = getpeername */ + {AC(getsockname_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getsockname, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 32 = getsockname */ + {AC(access_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)access, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 33 = access */ + {AC(chflags_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)chflags, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 34 = chflags */ + {AC(fchflags_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fchflags, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 35 = fchflags */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sync, NULL, NULL, _SYSCALL_RET_INT_T}, /* 36 = sync */ + {AC(kill_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)kill, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 37 = kill */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 38 = nosys old stat */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getppid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 39 = getppid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 40 = nosys old lstat */ + {AC(dup_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)dup, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 41 = dup */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)pipe, NULL, NULL, _SYSCALL_RET_INT_T}, /* 42 = pipe */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getegid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 43 = getegid */ + {AC(profil_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)profil, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 44 = profil */ + {AC(ktrace_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)ktrace, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 45 = ktrace */ + {AC(sigaction_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigaction, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 46 = sigaction */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getgid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 47 = getgid */ + {AC(sigprocmask_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigprocmask, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 48 = sigprocmask */ + {AC(getlogin_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getlogin, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 49 = getlogin */ + {AC(setlogin_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setlogin, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 50 = setlogin */ + {AC(acct_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)acct, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 51 = acct */ + {AC(sigpending_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigpending, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 52 = sigpending */ + {AC(sigaltstack_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigaltstack, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 53 = sigaltstack */ + {AC(ioctl_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ioctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 54 = ioctl */ + {AC(reboot_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)reboot, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 55 = reboot */ + {AC(revoke_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)revoke, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 56 = revoke */ + {AC(symlink_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)symlink, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 57 = symlink */ + {AC(readlink_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)readlink, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 58 = readlink */ + {AC(execve_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)execve, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 59 = execve */ + {AC(umask_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)umask, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 60 = umask */ + {AC(chroot_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)chroot, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 61 = chroot */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 62 = nosys old fstat */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 63 = nosys used internally , reserved */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 64 = nosys old getpagesize */ + {AC(msync_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)msync, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 65 = msync */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)vfork, NULL, NULL, _SYSCALL_RET_INT_T}, /* 66 = vfork */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 67 = nosys old vread */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 68 = nosys old vwrite */ + {AC(sbrk_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sbrk, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 69 = sbrk */ + {AC(sstk_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sstk, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 70 = sstk */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 71 = nosys old mmap */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ovadvise, NULL, NULL, _SYSCALL_RET_INT_T}, /* 72 = ovadvise old vadvise */ + {AC(munmap_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)munmap, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 73 = munmap */ + {AC(mprotect_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mprotect, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 74 = mprotect */ + {AC(madvise_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)madvise, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 75 = madvise */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 76 = nosys old vhangup */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 77 = nosys old vlimit */ + {AC(mincore_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mincore, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 78 = mincore */ + {AC(getgroups_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 79 = getgroups */ + {AC(setgroups_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 80 = setgroups */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getpgrp, NULL, NULL, _SYSCALL_RET_INT_T}, /* 81 = getpgrp */ + {AC(setpgid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setpgid, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 82 = setpgid */ + {AC(setitimer_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setitimer, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 83 = setitimer */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 84 = nosys old wait */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)swapon, NULL, NULL, _SYSCALL_RET_INT_T}, /* 85 = swapon */ + {AC(getitimer_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getitimer, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 86 = getitimer */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 87 = nosys old gethostname */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 88 = nosys old sethostname */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getdtablesize, NULL, NULL, _SYSCALL_RET_INT_T}, /* 89 = getdtablesize */ + {AC(dup2_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)dup2, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 90 = dup2 */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 91 = nosys old getdopt */ + {AC(fcntl_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)fcntl, munge_wws, munge_ddd, _SYSCALL_RET_INT_T}, /* 92 = fcntl */ + {AC(select_args), _SYSCALL_CANCEL_PRE, KERNEL_FUNNEL, (sy_call_t *)select, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 93 = select */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 94 = nosys old setdopt */ + {AC(fsync_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)fsync, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 95 = fsync */ + {AC(setpriority_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setpriority, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 96 = setpriority */ + {AC(socket_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)socket, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 97 = socket */ + {AC(connect_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)connect, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 98 = connect */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 99 = nosys old accept */ + {AC(getpriority_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getpriority, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 100 = getpriority */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 101 = nosys old send */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 102 = nosys old recv */ #ifdef __ppc__ - sysnofnl(ppc_gettimeofday,2), /* 116 = gettimeofday */ -#else - sysnofnl(gettimeofday,2), /* 116 = gettimeofday */ -#endif - sysp(getrusage,2), /* 117 = getrusage */ - sysnets(getsockopt,5), /* 118 = getsockopt */ - syss(nosys,0), /* 119 = old resuba */ - sysp(readv,3), /* 120 = readv */ - sysp(writev,3), /* 121 = writev */ - syss(settimeofday,2), /* 122 = settimeofday */ - syss(fchown,3), /* 123 = fchown */ - syss(fchmod,2), /* 124 = fchmod */ - comaptnet(recvfrom,6), /* 125 = recvfrom */ - compat(setreuid,2), /* 126 = setreuid */ - compat(setregid,2), /* 127 = setregid */ - syss(rename,2), /* 128 = rename */ - compat(truncate,2), /* 129 = old truncate */ - compat(ftruncate,2), /* 130 = ftruncate */ - syss(flock,2), /* 131 = flock */ - syss(mkfifo,2), /* 132 = mkfifo */ - sysnets(sendto,6), /* 133 = sendto */ - sysnets(shutdown,2), /* 134 = shutdown */ - sysnets(socketpair,4), /* 135 = socketpair */ - syss(mkdir,2), /* 136 = mkdir */ - syss(rmdir,1), /* 137 = rmdir */ - syss(utimes,2), /* 138 = utimes */ - syss(futimes,2), /* 139 = futimes */ - syss(adjtime,2), /* 140 = adjtime */ - comaptnet(getpeername,3),/* 141 = getpeername */ - compat(gethostid,0), /* 142 = old gethostid */ - sysp(nosys,0), /* 143 = old sethostid */ - compat(getrlimit,2), /* 144 = old getrlimit */ - compat(setrlimit,2), /* 145 = old setrlimit */ - compat(killpg,2), /* 146 = old killpg */ - syss(setsid,0), /* 147 = setsid */ - syss(nosys,0), /* 148 was setquota */ - syss(nosys,0), /* 149 was qquota */ - comaptnet(getsockname,3),/* 150 = getsockname */ - syss(getpgid,1), /* 151 = getpgid */ - sysp(setprivexec,1),/* 152 = setprivexec */ -#ifdef DOUBLE_ALIGN_PARAMS - syss(pread,6), /* 153 = pread */ - syss(pwrite,6), /* 154 = pwrite */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 103 = nosys old sigreturn */ #else - syss(pread,5), /* 153 = pread */ - syss(pwrite,5), /* 154 = pwrite */ + {AC(sigreturn_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)sigreturn, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 103 = sigreturn */ #endif - syss(nfssvc,2), /* 155 = nfs_svc */ - compat(getdirentries,4), /* 156 = old getdirentries */ - syss(statfs, 2), /* 157 = statfs */ - syss(fstatfs, 2), /* 158 = fstatfs */ - syss(unmount, 2), /* 159 = unmount */ - syss(nosys,0), /* 160 was async_daemon */ - syss(getfh,2), /* 161 = get file handle */ - compat(getdomainname,2), /* 162 = getdomainname */ - compat(setdomainname,2), /* 163 = setdomainname */ - syss(nosys,0), /* 164 */ -#if QUOTA - syss(quotactl, 4), /* 165 = quotactl */ -#else /* QUOTA */ - syss(nosys, 0), /* 165 = not configured */ -#endif /* QUOTA */ - syss(nosys,0), /* 166 was exportfs */ - syss(mount, 4), /* 167 = mount */ - syss(nosys,0), /* 168 was ustat */ - syss(nosys,0), /* 169 = nosys */ - syss(nosys,0), /* 170 was table */ - compat(wait3,3), /* 171 = old wait3 */ - syss(nosys,0), /* 172 was rpause */ - syss(nosys,0), /* 173 = nosys */ - syss(nosys,0), /* 174 was getdents */ - syss(nosys,0), /* 175 was gc_control */ - syss(add_profil,4), /* 176 = add_profil */ - syss(nosys,0), /* 177 */ - syss(nosys,0), /* 178 */ - syss(nosys,0), /* 179 */ - sysnofnl(kdebug_trace,6), /* 180 */ - syss(setgid,1), /* 181 */ - syss(setegid,1), /* 182 */ - syss(seteuid,1), /* 183 */ + {AC(bind_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)bind, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 104 = bind */ + {AC(setsockopt_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 105 = setsockopt */ + {AC(listen_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)listen, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 106 = listen */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 107 = nosys old vtimes */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 108 = nosys old sigvec */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 109 = nosys old sigblock */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 110 = nosys old sigsetmask */ + {AC(sigsuspend_args), _SYSCALL_CANCEL_PRE, KERNEL_FUNNEL, (sy_call_t *)sigsuspend, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 111 = sigsuspend */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 112 = nosys old sigstack */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 113 = nosys old recvmsg */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 114 = nosys old sendmsg */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 115 = nosys old vtrace */ #ifdef __ppc__ - syss(sigreturn, 2), /* 184 = nosys */ + {AC(ppc_gettimeofday_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ppc_gettimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 116 = ppc_gettimeofday */ #else - syss(nosys,0), /* 184 = nosys */ + {AC(gettimeofday_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)gettimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 116 = gettimeofday */ #endif - syss(nosys,0), /* 185 = nosys */ - syss(nosys,0), /* 186 = nosys */ - syss(nosys,0), /* 187 = nosys */ - syss(stat,2), /* 188 = stat */ - syss(fstat,2), /* 189 = fstat */ - syss(lstat,2), /* 190 = lstat */ - syss(pathconf,2), /* 191 = pathconf */ - syss(fpathconf,2), /* 192 = fpathconf */ - -/* - * N.B. - * The argument count numbers in this table are actually - * the number of UInt32 words that comprise the arguments - * not the number of arguments - * - * This value is not currently used on PPC but Intel Darwin - * does use it and will not work correctly if the values - * are wrong - */ - -#if COMPAT_GETFSSTAT - syss(getfsstat,3), /* 193 = getfsstat */ + {AC(getrusage_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getrusage, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 117 = getrusage */ + {AC(getsockopt_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 118 = getsockopt */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 119 = nosys old resuba */ + {AC(readv_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)readv, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T}, /* 120 = readv */ + {AC(writev_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)writev, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T}, /* 121 = writev */ + {AC(settimeofday_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)settimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 122 = settimeofday */ + {AC(fchown_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 123 = fchown */ + {AC(fchmod_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fchmod, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 124 = fchmod */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 125 = nosys old recvfrom */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 126 = nosys old setreuid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 127 = nosys old setregid */ + {AC(rename_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)rename, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 128 = rename */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 129 = nosys old truncate */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 130 = nosys old ftruncate */ + {AC(flock_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)flock, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 131 = flock */ + {AC(mkfifo_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mkfifo, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 132 = mkfifo */ + {AC(sendto_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)sendto, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 133 = sendto */ + {AC(shutdown_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shutdown, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 134 = shutdown */ + {AC(socketpair_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)socketpair, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 135 = socketpair */ + {AC(mkdir_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mkdir, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 136 = mkdir */ + {AC(rmdir_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)rmdir, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 137 = rmdir */ + {AC(utimes_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)utimes, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 138 = utimes */ + {AC(futimes_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)futimes, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 139 = futimes */ + {AC(adjtime_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)adjtime, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 140 = adjtime */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 141 = nosys old getpeername */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 142 = nosys old gethostid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 143 = nosys old sethostid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 144 = nosys old getrlimit */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 145 = nosys old setrlimit */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 146 = nosys old killpg */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setsid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 147 = setsid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 148 = nosys old setquota */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 149 = nosys old qquota */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 150 = nosys old getsockname */ + {AC(getpgid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getpgid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 151 = getpgid */ + {AC(setprivexec_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setprivexec, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 152 = setprivexec */ + {AC(pread_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)pread, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T}, /* 153 = pread */ + {AC(pwrite_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)pwrite, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T}, /* 154 = pwrite */ +#if NFSSERVER + {AC(nfssvc_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)nfssvc, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 155 = nfssvc */ #else - syss(nosys,0), /* 193 is unused */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 155 = nosys */ #endif - syss(getrlimit,2), /* 194 = getrlimit */ - syss(setrlimit,2), /* 195 = setrlimit */ - syss(getdirentries,4), /* 196 = getdirentries */ -#ifdef DOUBLE_ALIGN_PARAMS - syss(mmap,8), /* 197 = mmap */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 156 = nosys old getdirentries */ + {AC(statfs_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)statfs, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 157 = statfs */ + {AC(fstatfs_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fstatfs, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 158 = fstatfs */ + {AC(unmount_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)unmount, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 159 = unmount */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 160 = nosys old async_daemon */ +#if NFSCLIENT + {AC(getfh_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getfh, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 161 = getfh */ #else - syss(mmap,7), /* 197 = mmap */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 161 = nosys */ #endif - syss(nosys,0), /* 198 = __syscall */ -#ifdef DOUBLE_ALIGN_PARAMS - syss(lseek,5), /* 199 = lseek */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 162 = nosys old getdomainname */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 163 = nosys old setdomainname */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 164 = nosys */ + {AC(quotactl_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)quotactl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 165 = quotactl */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 166 = nosys old exportfs */ + {AC(mount_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mount, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 167 = mount */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 168 = nosys old ustat */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 169 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 170 = table old table */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 171 = nosys old wait3 */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 172 = nosys old rpause */ + {AC(waitid_args), _SYSCALL_CANCEL_PRE, KERNEL_FUNNEL, (sy_call_t *)waitid, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 173 = waitid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 174 = nosys old getdents */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 175 = nosys old gc_control */ + {AC(add_profil_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)add_profil, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 176 = add_profil */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 177 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 178 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 179 = nosys */ + {AC(kdebug_trace_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)kdebug_trace, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 180 = kdebug_trace */ + {AC(setgid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setgid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 181 = setgid */ + {AC(setegid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setegid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 182 = setegid */ + {AC(seteuid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)seteuid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 183 = seteuid */ +#ifdef __ppc__ + {AC(sigreturn_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)sigreturn, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 184 = sigreturn */ #else - syss(lseek,4), /* 199 = lseek */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 184 = nosys */ #endif -#ifdef DOUBLE_ALIGN_PARAMS - syss(truncate,4), /* 200 = truncate */ - syss(ftruncate,4), /* 201 = ftruncate */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 185 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 186 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 187 = nosys */ + {AC(stat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)stat, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 188 = stat */ + {AC(fstat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fstat, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 189 = fstat */ + {AC(lstat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lstat, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 190 = lstat */ + {AC(pathconf_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)pathconf, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 191 = pathconf */ + {AC(fpathconf_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fpathconf, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 192 = fpathconf */ +#if COMPAT_GETFSSTAT + {AC(getfsstat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getfsstat, munge_wsw, munge_ddd, _SYSCALL_RET_INT_T}, /* 193 = getfsstat */ #else - syss(truncate,3), /* 200 = truncate */ - syss(ftruncate,3), /* 201 = ftruncate */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 193 = nosys */ #endif - syss(__sysctl,6), /* 202 = __sysctl */ - sysp(mlock, 2), /* 203 = mlock */ - syss(munlock, 2), /* 204 = munlock */ - syss(undelete,1), /* 205 = undelete */ -#if NETAT - sysnets(ATsocket,1), /* 206 = ATsocket */ - sysnets(ATgetmsg,4), /* 207 = ATgetmsg*/ - sysnets(ATputmsg,4), /* 208 = ATputmsg*/ - sysnets(ATPsndreq,4), /* 209 = ATPsndreq*/ - sysnets(ATPsndrsp,4), /* 210 = ATPsndrsp*/ - sysnets(ATPgetreq,3), /* 211 = ATPgetreq*/ - sysnets(ATPgetrsp,2), /* 212 = ATPgetrsp*/ - syss(nosys,0), /* 213 = Reserved for AppleTalk */ - syss(kqueue_from_portset_np,1), /* 214 = kqueue_from_portset_np */ - syss(kqueue_portset_np,1), /* 215 = kqueue_portset_np */ + {AC(getrlimit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getrlimit, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 194 = getrlimit */ + {AC(setrlimit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setrlimit, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 195 = setrlimit */ + {AC(getdirentries_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getdirentries, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 196 = getdirentries */ + {AC(mmap_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mmap, munge_wwwwwl, munge_dddddd, _SYSCALL_RET_ADDR_T}, /* 197 = mmap */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 198 = nosys __syscall */ + {AC(lseek_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lseek, munge_wlw, munge_ddd, _SYSCALL_RET_OFF_T}, /* 199 = lseek */ + {AC(truncate_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)truncate, munge_wl, munge_dd, _SYSCALL_RET_INT_T}, /* 200 = truncate */ + {AC(ftruncate_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ftruncate, munge_wl, munge_dd, _SYSCALL_RET_INT_T}, /* 201 = ftruncate */ + {AC(__sysctl_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)__sysctl, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 202 = __sysctl */ + {AC(mlock_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 203 = mlock */ + {AC(munlock_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)munlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 204 = munlock */ + {AC(undelete_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)undelete, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 205 = undelete */ +#ifdef __ppc__ + {AC(ATsocket_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)ATsocket, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 206 = ATsocket */ + {AC(ATgetmsg_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATgetmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 207 = ATgetmsg */ + {AC(ATputmsg_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATputmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 208 = ATputmsg */ + {AC(ATPsndreq_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPsndreq, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 209 = ATPsndreq */ + {AC(ATPsndrsp_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPsndrsp, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 210 = ATPsndrsp */ + {AC(ATPgetreq_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPgetreq, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 211 = ATPgetreq */ + {AC(ATPgetrsp_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)ATPgetrsp, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 212 = ATPgetrsp */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 213 = nosys Reserved for AppleTalk */ #else - syss(nosys,0), /* 206 = Reserved for AppleTalk */ - syss(nosys,0), /* 207 = Reserved for AppleTalk */ - syss(nosys,0), /* 208 = Reserved for AppleTalk */ - syss(nosys,0), /* 209 = Reserved for AppleTalk */ - syss(nosys,0), /* 210 = Reserved for AppleTalk */ - syss(nosys,0), /* 211 = Reserved for AppleTalk */ - syss(nosys,0), /* 212 = Reserved for AppleTalk */ - syss(nosys,0), /* 213 = Reserved for AppleTalk */ - syss(nosys,0), /* 214 = Reserved for AppleTalk */ - syss(nosys,0), /* 215 = Reserved for AppleTalk */ -#endif /* NETAT */ - -/* - * System Calls 216 - 230 are reserved for calls to support HFS/HFS Plus - * file system semantics. Currently, we only use 215-227. The rest is - * for future expansion in anticipation of new MacOS APIs for HFS Plus. - * These calls are not conditionalized becuase while they are specific - * to HFS semantics, they are not specific to the HFS filesystem. - * We expect all filesystems to recognize the call and report that it is - * not supported or to actually implement it. - */ - -/* - * N.B. - * The argument count numbers in this table are actually - * the number of UInt32 words that comprise the arguments - * not the number of arguments - * - * This value is not currently used on PPC but Intel Darwin - * does use it and will not work correctly if the values - * are wrong - */ - - syss(nosys,3), /* 216 = HFS make complex file call (multipel forks */ - syss(nosys,2), /* 217 = HFS statv extended stat call for HFS */ - syss(nosys,2), /* 218 = HFS lstatv extended lstat call for HFS */ - syss(nosys,2), /* 219 = HFS fstatv extended fstat call for HFS */ - syss(getattrlist,5), /* 220 = HFS getarrtlist get attribute list cal */ - syss(setattrlist,5), /* 221 = HFS setattrlist set attribute list */ - syss(getdirentriesattr,8), /* 222 = HFS getdirentriesattr get directory attributes */ - syss(exchangedata,3), /* 223 = HFS exchangedata exchange file contents */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 206 = ATsocket */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 207 = ATgetmsg */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 208 = ATputmsg */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 209 = ATPsndreq */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 210 = ATPsndrsp */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 211 = ATPgetreq */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 212 = ATPgetrsp */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 213 = nosys Reserved for AppleTalk */ +#endif /* __ppc__ */ + {AC(kqueue_from_portset_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)kqueue_from_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 214 = kqueue_from_portset_np */ + {AC(kqueue_portset_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)kqueue_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 215 = kqueue_portset_np */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 216 = mkcomplex soon to be obsolete */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 217 = statv soon to be obsolete */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 218 = lstatv soon to be obsolete */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE}, /* 219 = fstatv soon to be obsolete */ + {AC(getattrlist_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getattrlist, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 220 = getattrlist */ + {AC(setattrlist_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setattrlist, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 221 = setattrlist */ + {AC(getdirentriesattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getdirentriesattr, munge_wwwwwwww, munge_dddddddd, _SYSCALL_RET_INT_T}, /* 222 = getdirentriesattr */ + {AC(exchangedata_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)exchangedata, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 223 = exchangedata */ #ifdef __APPLE_API_OBSOLETE - syss(checkuseraccess,6),/* 224 = HFS checkuseraccess check access to a file */ + {AC(checkuseraccess_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)checkuseraccess, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 224 = checkuseraccess */ #else - syss(nosys,6),/* 224 = HFS checkuseraccess check access to a file */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 224 = nosys HFS checkuseraccess check access to a file */ #endif /* __APPLE_API_OBSOLETE */ - syss(searchfs,6), /* 225 = HFS searchfs to implement catalog searching */ - syss(delete,1), /* 226 = private delete (Carbon semantics) */ - syss(copyfile,6), /* 227 = copyfile - orignally for AFP */ - syss(nosys,0), /* 228 */ - syss(nosys,0), /* 229 */ - syss(nosys,0), /* 230 */ - sysnets(watchevent,2), /* 231 */ - sysnets(waitevent,2), /* 232 */ - sysnets(modwatch,2), /* 233 */ - syss(nosys,0), /* 234 */ - syss(nosys,0), /* 235 */ - syss(nosys,0), /* 236 */ - syss(nosys,0), /* 237 */ - syss(nosys,0), /* 238 */ - syss(nosys,0), /* 239 */ - syss(nosys,0), /* 240 */ - syss(nosys,0), /* 241 */ - syss(fsctl,4), /* 242 = fsctl */ - syss(nosys,0), /* 243 */ - syss(nosys,0), /* 244 */ - syss(nosys,0), /* 245 */ - syss(nosys,0), /* 246 */ - syss(nfsclnt,2), /* 247 = nfsclnt*/ - syss(fhopen,2), /* 248 = fhopen */ - syss(nosys,0), /* 249 */ - syss(minherit,3), /* 250 = minherit */ - syss(semsys,5), /* 251 = semsys */ - syss(msgsys,6), /* 252 = msgsys */ - syss(shmsys,4), /* 253 = shmsys */ - syss(semctl,4), /* 254 = semctl */ - syss(semget,3), /* 255 = semget */ - syss(semop,3), /* 256 = semop */ - syss(semconfig,1), /* 257 = semconfig */ - syss(msgctl,3), /* 258 = msgctl */ - syss(msgget,2), /* 259 = msgget */ - syss(msgsnd,4), /* 260 = msgsnd */ - syss(msgrcv,5), /* 261 = msgrcv */ - syss(shmat,3), /* 262 = shmat */ - syss(shmctl,3), /* 263 = shmctl */ - syss(shmdt,1), /* 264 = shmdt */ - syss(shmget,3), /* 265 = shmget */ - syss(shm_open,3), /* 266 = shm_open */ - syss(shm_unlink,1), /* 267 = shm_unlink */ - syss(sem_open,4), /* 268 = sem_open */ - syss(sem_close,1), /* 269 = sem_close */ - syss(sem_unlink,1), /* 270 = sem_unlink */ - syss(sem_wait,1), /* 271 = sem_wait */ - syss(sem_trywait,1), /* 272 = sem_trywait */ - syss(sem_post,1), /* 273 = sem_post */ - syss(sem_getvalue,2), /* 274 = sem_getvalue */ - syss(sem_init,3), /* 275 = sem_init */ - syss(sem_destroy,1), /* 276 = sem_destroy */ - syss(nosys,0), /* 277 */ - syss(nosys,0), /* 278 */ - syss(nosys,0), /* 279 */ - syss(nosys,0), /* 280 */ - syss(nosys,0), /* 281 */ - syss(nosys,0), /* 282 */ - syss(nosys,0), /* 283 */ - syss(nosys,0), /* 284 */ - syss(nosys,0), /* 285 */ - syss(nosys,0), /* 286 */ - syss(nosys,0), /* 287 */ - syss(nosys,0), /* 288 */ - syss(fmod_watch_enable, 1), /* 289 = fmod_watching */ - syss(fmod_watch, 4), /* 290 = fmod_watch */ - syss(nosys,0), /* 291 */ - syss(nosys,0), /* 292 */ - syss(nosys,0), /* 293 */ - syss(nosys,0), /* 294 */ - syss(nosys,0), /* 295 */ - syss(load_shared_file,7), /* 296 = load_shared_file */ - syss(reset_shared_file,3), /* 297 = reset_shared_file */ - syss(new_system_shared_regions,0), /* 298 = new_system_shared_regions */ - syss(nosys,0), /* 299 */ - syss(nosys,0), /* 300 */ - syss(nosys,0), /* 301 */ - syss(nosys,0), /* 302 */ - syss(nosys,0), /* 303 */ - syss(nosys,0), /* 304 */ - syss(nosys,0), /* 305 */ - syss(nosys,0), /* 306 */ - syss(nosys,0), /* 307 */ - syss(nosys,0), /* 308 */ - syss(nosys,0), /* 309 */ - syss(getsid,1), /* 310 = getsid */ - syss(nosys,0), /* 311 */ - syss(nosys,0), /* 312 */ - sysnofnl(aio_fsync,1), /* 313 = aio_fsync */ - sysnofnl(aio_return,1), /* 314 = aio_return */ - sysnofnl(aio_suspend,3), /* 315 = aio_suspend */ - sysnofnl(aio_cancel,2), /* 316 = aio_cancel */ - sysnofnl(aio_error,1), /* 317 = aio_error */ - sysnofnl(aio_read,1), /* 318 = aio_read */ - sysnofnl(aio_write,1), /* 319 = aio_write */ - sysnofnl(lio_listio,4), /* 320 = lio_listio */ - syss(nosys,0), /* 321 */ - syss(nosys,0), /* 322 */ - syss(nosys,0), /* 323 */ - syss(mlockall,1), /* 324 = mlockall*/ - syss(munlockall,1), /* 325 = munlockall*/ - syss(nosys,0), /* 326 */ - sysp(issetugid,0), /* 327 = issetugid */ - syss(__pthread_kill,2), /* 328 */ - syss(pthread_sigmask,3), /* 329 */ - syss(sigwait,2), /* 330 */ - syss(__disable_threadsignal,1), /* 331 */ - syss(nosys,0), /* 332 */ - syss(nosys,0), /* 333 */ - syss(nosys,0), /* 334 */ - syss(utrace,2), /* 335 = utrace */ - syss(nosys,0), /* 336 */ - syss(nosys,0), /* 337 */ - syss(nosys,0), /* 338 */ - syss(nosys,0), /* 339 */ - syss(nosys,0), /* 340 */ - syss(nosys,0), /* 341 */ - syss(nosys,0), /* 342 */ - syss(nosys,0), /* 343 */ - syss(nosys,0), /* 344 */ - syss(nosys,0), /* 345 */ - syss(nosys,0), /* 346 */ - syss(nosys,0), /* 347 */ - syss(nosys,0), /* 348 */ - syss(nosys,0), /* 349 */ - syss(audit,2), /* 350 */ - syss(auditon,3), /* 351 */ - syss(nosys,0), /* 352 */ - syss(getauid,1), /* 353 */ - syss(setauid,1), /* 354 */ - syss(getaudit,1), /* 355 */ - syss(setaudit,1), /* 356 */ - syss(getaudit_addr,2), /* 357 */ - syss(setaudit_addr,2), /* 358 */ - syss(auditctl,1), /* 359 */ - syss(nosys,0), /* 360 */ - syss(nosys,0), /* 361 */ - syss(kqueue,0), /* 362 = kqueue */ - syss(kevent,6), /* 363 = kevent */ - syss(nosys,0), /* 364 */ - syss(nosys,0), /* 365 */ - syss(nosys,0), /* 366 */ - syss(nosys,0), /* 367 */ - syss(nosys,0), /* 368 */ - syss(nosys,0) /* 369 */ - -/* - * N.B. - * The argument count numbers in this table are actually - * the number of UInt32 words that comprise the arguments - * not the number of arguments - * - * This value is not currently used on PPC but Intel Darwin - * does use it and will not work correctly if the values - * are wrong - */ - + {AC(searchfs_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)searchfs, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 225 = searchfs */ + {AC(delete_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)delete, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 226 = delete private delete ( Carbon semantics ) */ + {AC(copyfile_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)copyfile, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 227 = copyfile */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 228 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 229 = nosys */ + {AC(poll_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)poll, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 230 = poll */ + {AC(watchevent_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)watchevent, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 231 = watchevent */ + {AC(waitevent_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)waitevent, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 232 = waitevent */ + {AC(modwatch_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL|UNSAFE_64BIT, (sy_call_t *)modwatch, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 233 = modwatch */ + {AC(getxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_SSIZE_T}, /* 234 = getxattr */ + {AC(fgetxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fgetxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_SSIZE_T}, /* 235 = fgetxattr */ + {AC(setxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 236 = setxattr */ + {AC(fsetxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fsetxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 237 = fsetxattr */ + {AC(removexattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)removexattr, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 238 = removexattr */ + {AC(fremovexattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fremovexattr, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 239 = fremovexattr */ + {AC(listxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)listxattr, munge_wwww, munge_dddd, _SYSCALL_RET_SSIZE_T}, /* 240 = listxattr */ + {AC(flistxattr_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)flistxattr, munge_wwww, munge_dddd, _SYSCALL_RET_SSIZE_T}, /* 241 = flistxattr */ + {AC(fsctl_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)fsctl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 242 = fsctl */ + {AC(initgroups_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)initgroups, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 243 = initgroups */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 244 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 245 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 246 = nosys */ +#if NFSCLIENT + {AC(nfsclnt_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)nfsclnt, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 247 = nfsclnt */ + {AC(fhopen_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)fhopen, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 248 = fhopen */ +#else + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 247 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 248 = nosys */ +#endif + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 249 = nosys */ + {AC(minherit_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)minherit, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 250 = minherit */ + {AC(semsys_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)semsys, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 251 = semsys */ + {AC(msgsys_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)msgsys, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 252 = msgsys */ + {AC(shmsys_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shmsys, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 253 = shmsys */ + {AC(semctl_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)semctl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 254 = semctl */ + {AC(semget_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)semget, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 255 = semget */ + {AC(semop_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)semop, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 256 = semop */ + {AC(semconfig_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)semconfig, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 257 = semconfig */ + {AC(msgctl_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)msgctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 258 = msgctl */ + {AC(msgget_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)msgget, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 259 = msgget */ + {AC(msgsnd_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)msgsnd, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 260 = msgsnd */ + {AC(msgrcv_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)msgrcv, munge_wwwsw, munge_ddddd, _SYSCALL_RET_SSIZE_T}, /* 261 = msgrcv */ + {AC(shmat_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shmat, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 262 = shmat */ + {AC(shmctl_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shmctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 263 = shmctl */ + {AC(shmdt_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shmdt, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 264 = shmdt */ + {AC(shmget_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shmget, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 265 = shmget */ + {AC(shm_open_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shm_open, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 266 = shm_open */ + {AC(shm_unlink_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)shm_unlink, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 267 = shm_unlink */ + {AC(sem_open_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_open, munge_wwww, munge_dddd, _SYSCALL_RET_ADDR_T}, /* 268 = sem_open */ + {AC(sem_close_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_close, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 269 = sem_close */ + {AC(sem_unlink_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_unlink, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 270 = sem_unlink */ + {AC(sem_wait_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)sem_wait, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 271 = sem_wait */ + {AC(sem_trywait_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_trywait, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 272 = sem_trywait */ + {AC(sem_post_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_post, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 273 = sem_post */ + {AC(sem_getvalue_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_getvalue, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 274 = sem_getvalue */ + {AC(sem_init_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_init, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 275 = sem_init */ + {AC(sem_destroy_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)sem_destroy, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 276 = sem_destroy */ + {AC(open_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)open_extended, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 277 = open_extended */ + {AC(umask_extended_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)umask_extended, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 278 = umask_extended */ + {AC(stat_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)stat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 279 = stat_extended */ + {AC(lstat_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lstat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 280 = lstat_extended */ + {AC(fstat_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fstat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 281 = fstat_extended */ + {AC(chmod_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)chmod_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 282 = chmod_extended */ + {AC(fchmod_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)fchmod_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 283 = fchmod_extended */ + {AC(access_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)access_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 284 = access_extended */ + {AC(settid_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)settid, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 285 = settid */ + {AC(gettid_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)gettid, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 286 = gettid */ + {AC(setsgroups_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setsgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 287 = setsgroups */ + {AC(getsgroups_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getsgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 288 = getsgroups */ + {AC(setwgroups_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)setwgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 289 = setwgroups */ + {AC(getwgroups_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)getwgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 290 = getwgroups */ + {AC(mkfifo_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mkfifo_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 291 = mkfifo_extended */ + {AC(mkdir_extended_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mkdir_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T}, /* 292 = mkdir_extended */ + {AC(identitysvc_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)identitysvc, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 293 = identitysvc */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 294 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 295 = nosys */ + {AC(load_shared_file_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)load_shared_file, munge_wwwwwww, munge_ddddddd, _SYSCALL_RET_INT_T}, /* 296 = load_shared_file */ + {AC(reset_shared_file_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)reset_shared_file, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 297 = reset_shared_file */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)new_system_shared_regions, NULL, NULL, _SYSCALL_RET_INT_T}, /* 298 = new_system_shared_regions */ + {AC(shared_region_map_file_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)shared_region_map_file_np, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 299 = shared_region_map_file_np */ + {AC(shared_region_make_private_np_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL|UNSAFE_64BIT, (sy_call_t *)shared_region_make_private_np, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 300 = shared_region_make_private_np */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 301 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 302 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 303 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 304 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 305 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 306 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 307 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 308 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 309 = nosys */ + {AC(getsid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getsid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 310 = getsid */ + {AC(settid_with_pid_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)settid_with_pid, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 311 = settid_with_pid */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 312 = nosys */ + {AC(aio_fsync_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_fsync, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 313 = aio_fsync */ + {AC(aio_return_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_return, munge_w, munge_d, _SYSCALL_RET_SSIZE_T}, /* 314 = aio_return */ + {AC(aio_suspend_args), _SYSCALL_CANCEL_PRE, NO_FUNNEL, (sy_call_t *)aio_suspend, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 315 = aio_suspend */ + {AC(aio_cancel_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_cancel, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 316 = aio_cancel */ + {AC(aio_error_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_error, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 317 = aio_error */ + {AC(aio_read_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_read, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 318 = aio_read */ + {AC(aio_write_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)aio_write, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 319 = aio_write */ + {AC(lio_listio_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lio_listio, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T}, /* 320 = lio_listio */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 321 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 322 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 323 = nosys */ + {AC(mlockall_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)mlockall, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 324 = mlockall */ + {AC(munlockall_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)munlockall, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 325 = munlockall */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 326 = nosys */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)issetugid, NULL, NULL, _SYSCALL_RET_INT_T}, /* 327 = issetugid */ + {AC(__pthread_kill_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)__pthread_kill, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 328 = __pthread_kill */ + {AC(pthread_sigmask_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)pthread_sigmask, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 329 = pthread_sigmask */ + {AC(sigwait_args), _SYSCALL_CANCEL_PRE, KERNEL_FUNNEL, (sy_call_t *)sigwait, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 330 = sigwait */ + {AC(__disable_threadsignal_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)__disable_threadsignal, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 331 = __disable_threadsignal */ + {AC(__pthread_markcancel_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)__pthread_markcancel, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 332 = __pthread_markcancel */ + {AC(__pthread_canceled_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)__pthread_canceled, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 333 = __pthread_canceled */ + {AC(__semwait_signal_args), _SYSCALL_CANCEL_POST, NO_FUNNEL, (sy_call_t *)__semwait_signal, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 334 = __semwait_signal */ + {AC(utrace_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)utrace, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 335 = utrace */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 336 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 337 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 338 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 339 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 340 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 341 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 342 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 343 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 344 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 345 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 346 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 347 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 348 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 349 = nosys */ + {AC(audit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)audit, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 350 = audit */ + {AC(auditon_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)auditon, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 351 = auditon */ + {0, _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 352 = nosys */ + {AC(getauid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getauid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 353 = getauid */ + {AC(setauid_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setauid, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 354 = setauid */ + {AC(getaudit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getaudit, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 355 = getaudit */ + {AC(setaudit_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setaudit, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 356 = setaudit */ + {AC(getaudit_addr_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)getaudit_addr, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 357 = getaudit_addr */ + {AC(setaudit_addr_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)setaudit_addr, munge_ww, munge_dd, _SYSCALL_RET_INT_T}, /* 358 = setaudit_addr */ + {AC(auditctl_args), _SYSCALL_CANCEL_NONE, KERNEL_FUNNEL, (sy_call_t *)auditctl, munge_w, munge_d, _SYSCALL_RET_INT_T}, /* 359 = auditctl */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 360 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 361 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)kqueue, NULL, NULL, _SYSCALL_RET_INT_T}, /* 362 = kqueue */ + {AC(kevent_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)kevent, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T}, /* 363 = kevent */ + {AC(lchown_args), _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)lchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T}, /* 364 = lchown */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 365 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 366 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 367 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 368 = nosys */ + {0, _SYSCALL_CANCEL_NONE, NO_FUNNEL, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T}, /* 369 = nosys */ }; int nsysent = sizeof(sysent) / sizeof(sysent[0]); diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index afa4305d1..381b74fe2 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -1,7 +1,7 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @Apple_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the @@ -22,19 +22,20 @@ #include +#include +#include +#include +#include +#include +#include +#include + #define HZ 100 #include #include #include #include -#include -#include -#include -#include -#include -#include - #include #include #include @@ -50,6 +51,13 @@ unsigned int kd_entropy_count = 0; unsigned int kd_entropy_indx = 0; unsigned int kd_entropy_buftomem = 0; + +#define SLOW_NOLOG 0x01 +#define SLOW_CHECKS 0x02 +#define SLOW_ENTROPY 0x04 + +unsigned int kdebug_slowcheck=SLOW_NOLOG; + /* kd_buf kd_buffer[kd_bufsize/sizeof(kd_buf)]; */ kd_buf * kd_bufptr; unsigned int kd_buftomem=0; @@ -59,7 +67,6 @@ kd_buf * kd_readlast; unsigned int nkdbufs = 8192; unsigned int kd_bufsize = 0; unsigned int kdebug_flags = 0; -unsigned int kdebug_nolog=1; unsigned int kdlog_beg=0; unsigned int kdlog_end=0; unsigned int kdlog_value1=0; @@ -68,7 +75,16 @@ unsigned int kdlog_value3=0; unsigned int kdlog_value4=0; unsigned long long kd_prev_timebase = 0LL; -decl_simple_lock_data(,kd_trace_lock); + +static lck_mtx_t * kd_trace_mtx; +static lck_grp_t * kd_trace_mtx_grp; +static lck_attr_t * kd_trace_mtx_attr; +static lck_grp_attr_t *kd_trace_mtx_grp_attr; + +static lck_spin_t * kd_trace_lock; +static lck_grp_t * kd_trace_lock_grp; +static lck_attr_t * kd_trace_lock_attr; +static lck_grp_attr_t *kd_trace_lock_grp_attr; kd_threadmap *kd_mapptr = 0; unsigned int kd_mapsize = 0; @@ -83,15 +99,6 @@ pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer extern natural_t rtclock_decrementer_min; #endif /* ppc */ -struct kdebug_args { - int code; - int arg1; - int arg2; - int arg3; - int arg4; - int arg5; -}; - /* task to string structure */ struct tts { @@ -119,17 +126,18 @@ typedef void (*kd_chudhook_fn) (unsigned int debugid, unsigned int arg1, kd_chudhook_fn kdebug_chudhook = 0; /* pointer to CHUD toolkit function */ + /* Support syscall SYS_kdebug_trace */ kdebug_trace(p, uap, retval) struct proc *p; - struct kdebug_args *uap; + struct kdebug_trace_args *uap; register_t *retval; { - if (kdebug_nolog) - return(EINVAL); + if ( (kdebug_enable == 0) ) + return(EINVAL); - kernel_debug(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, 0); - return(0); + kernel_debug(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, 0); + return(0); } @@ -141,18 +149,20 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; struct proc *curproc; int s; unsigned long long now; - mach_timespec_t *tsp; + if (kdebug_enable & KDEBUG_ENABLE_CHUD) { - if (kdebug_chudhook) - kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); + if (kdebug_chudhook) + kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); - if (!((kdebug_enable & KDEBUG_ENABLE_ENTROPY) || - (kdebug_enable & KDEBUG_ENABLE_TRACE))) - return; + if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) + return; } - s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kd_trace_lock); + + if (kdebug_slowcheck == 0) + goto record_trace; if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) { @@ -166,16 +176,17 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; { /* Disable entropy collection */ kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck &= ~SLOW_ENTROPY; } } - if (kdebug_nolog) + if ( (kdebug_slowcheck & SLOW_NOLOG) ) { + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } - - usimple_lock(&kd_trace_lock); + if (kdebug_flags & KDBG_PIDCHECK) { /* If kdebug flag is not set for current proc, return */ @@ -183,7 +194,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if ((curproc && !(curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -195,7 +206,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if ((curproc && (curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -203,10 +214,10 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if (kdebug_flags & KDBG_RANGECHECK) { - if ((debugid < kdlog_beg) || (debugid > kdlog_end) + if ((debugid < kdlog_beg) || (debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE)) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -219,35 +230,35 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; (debugid & DBG_FUNC_MASK) != kdlog_value4 && (debugid >> 24 != DBG_TRACE)) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } } + +record_trace: kd = kd_bufptr; kd->debugid = debugid; kd->arg1 = arg1; kd->arg2 = arg2; kd->arg3 = arg3; kd->arg4 = arg4; - kd->arg5 = (int)current_act(); - if (cpu_number()) - kd->arg5 |= KDBG_CPU_MASK; + kd->arg5 = (int)current_thread(); - now = kd->timestamp = mach_absolute_time(); + now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; /* Watch for out of order timestamps */ if (now < kd_prev_timebase) { - kd->timestamp = ++kd_prev_timebase; + now = ++kd_prev_timebase & KDBG_TIMESTAMP_MASK; } else { /* Then just store the previous timestamp */ kd_prev_timebase = now; } - + kd->timestamp = now | (((uint64_t)cpu_number()) << KDBG_CPU_SHIFT); kd_bufptr++; @@ -255,10 +266,10 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; kd_bufptr = kd_buffer; if (kd_bufptr == kd_readlast) { if (kdebug_flags & KDBG_NOWRAP) - kdebug_nolog = 1; + kdebug_slowcheck |= SLOW_NOLOG; kdebug_flags |= KDBG_WRAPPED; } - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); } @@ -270,26 +281,27 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; struct proc *curproc; int s; unsigned long long now; - mach_timespec_t *tsp; if (kdebug_enable & KDEBUG_ENABLE_CHUD) { - if (kdebug_chudhook) - (void)kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); + if (kdebug_chudhook) + (void)kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); - if (!((kdebug_enable & KDEBUG_ENABLE_ENTROPY) || - (kdebug_enable & KDEBUG_ENABLE_TRACE))) - return; + if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) + return; } - s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kd_trace_lock); - if (kdebug_nolog) + if (kdebug_slowcheck == 0) + goto record_trace1; + + if ( (kdebug_slowcheck & SLOW_NOLOG) ) { + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } - usimple_lock(&kd_trace_lock); if (kdebug_flags & KDBG_PIDCHECK) { /* If kdebug flag is not set for current proc, return */ @@ -297,7 +309,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if ((curproc && !(curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -309,7 +321,7 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if ((curproc && (curproc->p_flag & P_KDEBUG)) && ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -317,10 +329,10 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; if (kdebug_flags & KDBG_RANGECHECK) { - if ((debugid < kdlog_beg) || (debugid > kdlog_end) + if ((debugid < kdlog_beg) || (debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE)) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } @@ -333,12 +345,13 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; (debugid & DBG_FUNC_MASK) != kdlog_value4 && (debugid >> 24 != DBG_TRACE)) { - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); return; } } +record_trace1: kd = kd_bufptr; kd->debugid = debugid; kd->arg1 = arg1; @@ -346,20 +359,21 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; kd->arg3 = arg3; kd->arg4 = arg4; kd->arg5 = arg5; - now = kd->timestamp = mach_absolute_time(); + + now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; /* Watch for out of order timestamps */ if (now < kd_prev_timebase) { - /* timestamps are out of order -- adjust */ - kd->timestamp = ++kd_prev_timebase; + now = ++kd_prev_timebase & KDBG_TIMESTAMP_MASK; } else { /* Then just store the previous timestamp */ kd_prev_timebase = now; } + kd->timestamp = now | (((uint64_t)cpu_number()) << KDBG_CPU_SHIFT); kd_bufptr++; @@ -367,24 +381,65 @@ unsigned int debugid, arg1, arg2, arg3, arg4, arg5; kd_bufptr = kd_buffer; if (kd_bufptr == kd_readlast) { if (kdebug_flags & KDBG_NOWRAP) - kdebug_nolog = 1; + kdebug_slowcheck |= SLOW_NOLOG; kdebug_flags |= KDBG_WRAPPED; } - usimple_unlock(&kd_trace_lock); + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); } +static void +kdbg_lock_init() +{ + + if (kdebug_flags & KDBG_LOCKINIT) + return; + /* + * allocate lock group attribute and group + */ + kd_trace_lock_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(kd_trace_lock_grp_attr); + kd_trace_lock_grp = lck_grp_alloc_init("kdebug", kd_trace_lock_grp_attr); + + kd_trace_mtx_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(kd_trace_mtx_grp_attr); + kd_trace_mtx_grp = lck_grp_alloc_init("kdebug", kd_trace_mtx_grp_attr); + + /* + * allocate the lock attribute + */ + kd_trace_lock_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(kd_trace_lock_attr); + + kd_trace_mtx_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(kd_trace_mtx_attr); + + + /* + * allocate and initialize spin lock and mutex + */ + kd_trace_lock = lck_spin_alloc_init(kd_trace_lock_grp, kd_trace_lock_attr); + kd_trace_mtx = lck_mtx_alloc_init(kd_trace_mtx_grp, kd_trace_mtx_attr); + + kdebug_flags |= KDBG_LOCKINIT; +} + + +int kdbg_bootstrap() { + kd_bufsize = nkdbufs * sizeof(kd_buf); + if (kmem_alloc(kernel_map, &kd_buftomem, (vm_size_t)kd_bufsize) == KERN_SUCCESS) - kd_buffer = (kd_buf *) kd_buftomem; - else kd_buffer= (kd_buf *) 0; + kd_buffer = (kd_buf *) kd_buftomem; + else + kd_buffer= (kd_buf *) 0; kdebug_flags &= ~KDBG_WRAPPED; + if (kd_buffer) { - simple_lock_init(&kd_trace_lock); kdebug_flags |= (KDBG_INIT | KDBG_BUFINIT); kd_bufptr = kd_buffer; kd_buflast = &kd_bufptr[nkdbufs]; @@ -401,12 +456,22 @@ kdbg_bootstrap() kdbg_reinit() { - int x; + int s; int ret=0; - /* Disable trace collecting */ + /* + * Disable trace collecting + * First make sure we're not in + * the middle of cutting a trace + */ + s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kd_trace_lock); + kdebug_enable &= ~KDEBUG_ENABLE_TRACE; - kdebug_nolog = 1; + kdebug_slowcheck |= SLOW_NOLOG; + + lck_spin_unlock(kd_trace_lock); + ml_set_interrupts_enabled(s); if ((kdebug_flags & KDBG_INIT) && (kdebug_flags & KDBG_BUFINIT) && kd_bufsize && kd_buffer) kmem_free(kernel_map, (vm_offset_t)kd_buffer, kd_bufsize); @@ -476,7 +541,8 @@ void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, lo *arg4=dbg_parms[3]; } -kdbg_resolve_map(thread_act_t th_act, krt_t *t) +static void +kdbg_resolve_map(thread_t th_act, krt_t *t) { kd_threadmap *mapptr; @@ -565,11 +631,12 @@ void kdbg_mapinit() if (p->p_flag & P_WEXIT) continue; - if (task_reference_try(p->task)) { - tts_mapptr[i].task = p->task; + if (p->task) { + task_reference(p->task); + tts_mapptr[i].task = p->task; tts_mapptr[i].pid = p->p_pid; - (void)strncpy(&tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm) - 1); - i++; + (void)strncpy(&tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm) - 1); + i++; } } tts_count = i; @@ -594,14 +661,29 @@ void kdbg_mapinit() } } -kdbg_clear() +static void +kdbg_clear(void) { -int x; + int s; + + /* + * Clean up the trace buffer + * First make sure we're not in + * the middle of cutting a trace + */ + s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kd_trace_lock); - /* Clean up the trace buffer */ - global_state_pid = -1; kdebug_enable &= ~KDEBUG_ENABLE_TRACE; - kdebug_nolog = 1; + kdebug_slowcheck = SLOW_NOLOG; + + if (kdebug_enable & KDEBUG_ENABLE_ENTROPY) + kdebug_slowcheck |= SLOW_ENTROPY; + + lck_spin_unlock(kd_trace_lock); + ml_set_interrupts_enabled(s); + + global_state_pid = -1; kdebug_flags &= ~KDBG_BUFINIT; kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK); @@ -638,6 +720,8 @@ kdbg_setpid(kd_regtype *kdr) { kdebug_flags |= KDBG_PIDCHECK; kdebug_flags &= ~KDBG_PIDEXCLUDE; + kdebug_slowcheck |= SLOW_CHECKS; + p->p_flag |= P_KDEBUG; } else /* turn off pid check for this pid value */ @@ -673,6 +757,8 @@ kdbg_setpidex(kd_regtype *kdr) { kdebug_flags |= KDBG_PIDEXCLUDE; kdebug_flags &= ~KDBG_PIDCHECK; + kdebug_slowcheck |= SLOW_CHECKS; + p->p_flag |= P_KDEBUG; } else /* turn off pid exclusion for this pid value */ @@ -703,7 +789,7 @@ kdbg_setrtcdec(kd_regtype *kdr) rtclock_decrementer_min = decval; #else else - ret = EOPNOTSUPP; + ret = ENOTSUP; #endif /* ppc */ return(ret); @@ -723,6 +809,7 @@ kdbg_setreg(kd_regtype * kdr) kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */ kdebug_flags |= (KDBG_RANGECHECK | KDBG_CLASSTYPE); + kdebug_slowcheck |= SLOW_CHECKS; break; case KDBG_SUBCLSTYPE : val_1 = (kdr->value1 & 0xff); @@ -733,6 +820,7 @@ kdbg_setreg(kd_regtype * kdr) kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */ kdebug_flags |= (KDBG_RANGECHECK | KDBG_SUBCLSTYPE); + kdebug_slowcheck |= SLOW_CHECKS; break; case KDBG_RANGETYPE : kdlog_beg = (kdr->value1); @@ -740,6 +828,7 @@ kdbg_setreg(kd_regtype * kdr) kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~KDBG_VALCHECK; /* Turn off specific value check */ kdebug_flags |= (KDBG_RANGECHECK | KDBG_RANGETYPE); + kdebug_slowcheck |= SLOW_CHECKS; break; case KDBG_VALCHECK: kdlog_value1 = (kdr->value1); @@ -749,9 +838,16 @@ kdbg_setreg(kd_regtype * kdr) kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kdebug_flags &= ~KDBG_RANGECHECK; /* Turn off range check */ kdebug_flags |= KDBG_VALCHECK; /* Turn on specific value check */ + kdebug_slowcheck |= SLOW_CHECKS; break; case KDBG_TYPENONE : kdebug_flags &= (unsigned int)~KDBG_CKTYPES; + + if ( (kdebug_flags & (KDBG_RANGECHECK | KDBG_VALCHECK | KDBG_PIDCHECK | KDBG_PIDEXCLUDE)) ) + kdebug_slowcheck |= SLOW_CHECKS; + else + kdebug_slowcheck &= ~SLOW_CHECKS; + kdlog_beg = 0; kdlog_end = 0; break; @@ -805,8 +901,8 @@ kdbg_getreg(kd_regtype * kdr) } - -kdbg_readmap(kd_threadmap *buffer, size_t *number) +int +kdbg_readmap(user_addr_t buffer, size_t *number) { int avail = *number; int ret = 0; @@ -844,7 +940,8 @@ kdbg_readmap(kd_threadmap *buffer, size_t *number) return(ret); } -kdbg_getentropy (mach_timespec_t * buffer, size_t *number, int ms_timeout) +int +kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout) { int avail = *number; int ret = 0; @@ -878,11 +975,13 @@ kdbg_getentropy (mach_timespec_t * buffer, size_t *number, int ms_timeout) /* Enable entropy sampling */ kdebug_enable |= KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck |= SLOW_ENTROPY; ret = tsleep (kdbg_getentropy, PRIBIO | PCATCH, "kd_entropy", (ms_timeout/(1000/HZ))); /* Disable entropy sampling */ kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck &= ~SLOW_ENTROPY; *number = 0; ret = 0; @@ -919,8 +1018,8 @@ void kdbg_control_chud(int val, void *fn) { if (val) { /* enable chudhook */ - kdebug_enable |= KDEBUG_ENABLE_CHUD; kdebug_chudhook = fn; + kdebug_enable |= KDEBUG_ENABLE_CHUD; } else { /* disable chudhook */ @@ -930,84 +1029,103 @@ void kdbg_control_chud(int val, void *fn) } -kdbg_control(name, namelen, where, sizep) -int *name; -u_int namelen; -char *where; -size_t *sizep; +kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) { -int ret=0; -int size=*sizep; -int max_entries; -unsigned int value = name[1]; -kd_regtype kd_Reg; -kbufinfo_t kd_bufinfo; - -pid_t curpid; -struct proc *p, *curproc; - - if (name[0] == KERN_KDGETBUF) { - /* - Does not alter the global_state_pid - This is a passive request. - */ - if (size < sizeof(kd_bufinfo.nkdbufs)) { - /* - There is not enough room to return even - the first element of the info structure. + int ret=0; + int size=*sizep; + int max_entries; + unsigned int value = name[1]; + kd_regtype kd_Reg; + kbufinfo_t kd_bufinfo; + pid_t curpid; + struct proc *p, *curproc; + + + kdbg_lock_init(); + lck_mtx_lock(kd_trace_mtx); + + if (name[0] == KERN_KDGETBUF) { + /* + * Does not alter the global_state_pid + * This is a passive request. */ - return(EINVAL); - } - - kd_bufinfo.nkdbufs = nkdbufs; - kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap); - kd_bufinfo.nolog = kdebug_nolog; - kd_bufinfo.flags = kdebug_flags; - kd_bufinfo.bufid = global_state_pid; + if (size < sizeof(kd_bufinfo.nkdbufs)) { + /* + * There is not enough room to return even + * the first element of the info structure. + */ + lck_mtx_unlock(kd_trace_mtx); + + return(EINVAL); + } + kd_bufinfo.nkdbufs = nkdbufs; + kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap); + + if ( (kdebug_slowcheck & SLOW_NOLOG) ) + kd_bufinfo.nolog = 1; + else + kd_bufinfo.nolog = 0; + kd_bufinfo.flags = kdebug_flags; + kd_bufinfo.bufid = global_state_pid; - if(size >= sizeof(kbufinfo_t)) { - /* Provide all the info we have */ - if(copyout (&kd_bufinfo, where, sizeof(kbufinfo_t))) - return(EINVAL); - } - else { - /* - For backwards compatibility, only provide - as much info as there is room for. - */ - if(copyout (&kd_bufinfo, where, size)) - return(EINVAL); - } - return(0); - } - else if (name[0] == KERN_KDGETENTROPY) { - if (kd_entropy_buffer) - return(EBUSY); - else - ret = kdbg_getentropy((mach_timespec_t *)where, sizep, value); - return (ret); - } - - if(curproc = current_proc()) - curpid = curproc->p_pid; - else - return (ESRCH); + if (size >= sizeof(kd_bufinfo)) { + /* + * Provide all the info we have + */ + if (copyout (&kd_bufinfo, where, sizeof(kd_bufinfo))) { + lck_mtx_unlock(kd_trace_mtx); + + return(EINVAL); + } + } + else { + /* + * For backwards compatibility, only provide + * as much info as there is room for. + */ + if (copyout (&kd_bufinfo, where, size)) { + lck_mtx_unlock(kd_trace_mtx); + + return(EINVAL); + } + } + lck_mtx_unlock(kd_trace_mtx); + return(0); + } else if (name[0] == KERN_KDGETENTROPY) { + if (kd_entropy_buffer) + ret = EBUSY; + else + ret = kdbg_getentropy(where, sizep, value); + lck_mtx_unlock(kd_trace_mtx); + + return (ret); + } + + if (curproc = current_proc()) + curpid = curproc->p_pid; + else { + lck_mtx_unlock(kd_trace_mtx); + + return (ESRCH); + } if (global_state_pid == -1) global_state_pid = curpid; - else if (global_state_pid != curpid) - { - if((p = pfind(global_state_pid)) == NULL) - { - /* The global pid no longer exists */ - global_state_pid = curpid; - } - else - { - /* The global pid exists, deny this request */ + else if (global_state_pid != curpid) { + if ((p = pfind(global_state_pid)) == NULL) { + /* + * The global pid no longer exists + */ + global_state_pid = curpid; + } else { + /* + * The global pid exists, deny this request + */ + lck_mtx_unlock(kd_trace_mtx); + return(EBUSY); - } - } + } + } switch(name[0]) { case KERN_KDEFLAGS: @@ -1027,17 +1145,15 @@ struct proc *p, *curproc; ret=EINVAL; break; } + kdebug_enable |= KDEBUG_ENABLE_TRACE; + kdebug_slowcheck &= ~SLOW_NOLOG; } - - if (value) - kdebug_enable |= KDEBUG_ENABLE_TRACE; else - kdebug_enable &= ~KDEBUG_ENABLE_TRACE; - - kdebug_nolog = (value)?0:1; - - if (kdebug_enable & KDEBUG_ENABLE_TRACE) - kdbg_mapinit(); + { + kdebug_enable &= ~KDEBUG_ENABLE_TRACE; + kdebug_slowcheck |= SLOW_NOLOG; + } + kdbg_mapinit(); break; case KERN_KDSETBUF: /* We allow a maximum buffer size of 25% of either ram or max mapped address, whichever is smaller */ @@ -1101,7 +1217,7 @@ struct proc *p, *curproc; ret = kdbg_setpidex(&kd_Reg); break; case KERN_KDTHRMAP: - ret = kdbg_readmap((kd_threadmap *)where, sizep); + ret = kdbg_readmap(where, sizep); break; case KERN_KDSETRTCDEC: if (size < sizeof(kd_regtype)) { @@ -1118,10 +1234,12 @@ struct proc *p, *curproc; default: ret= EINVAL; } + lck_mtx_unlock(kd_trace_mtx); + return(ret); } -kdbg_read(kd_buf * buffer, size_t *number) +kdbg_read(user_addr_t buffer, size_t *number) { int avail=*number; int count=0; @@ -1132,89 +1250,85 @@ unsigned int my_kdebug_flags; kd_buf * my_kd_bufptr; s = ml_set_interrupts_enabled(FALSE); - usimple_lock(&kd_trace_lock); + lck_spin_lock(kd_trace_lock); + my_kdebug_flags = kdebug_flags; my_kd_bufptr = kd_bufptr; - usimple_unlock(&kd_trace_lock); + + lck_spin_unlock(kd_trace_lock); ml_set_interrupts_enabled(s); count = avail/sizeof(kd_buf); + if (count) { if ((my_kdebug_flags & KDBG_BUFINIT) && kd_bufsize && kd_buffer) { if (count > nkdbufs) count = nkdbufs; - if (!(my_kdebug_flags & KDBG_WRAPPED) && (my_kd_bufptr > kd_readlast)) - { - copycount = my_kd_bufptr-kd_readlast; - if (copycount > count) - copycount = count; - - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) - { - *number = 0; - return(EINVAL); - } - kd_readlast += copycount; - *number = copycount; - return(0); - } - else if (!(my_kdebug_flags & KDBG_WRAPPED) && (my_kd_bufptr == kd_readlast)) - { - *number = 0; - return(0); - } - else - { - if (my_kdebug_flags & KDBG_WRAPPED) - { - kd_readlast = my_kd_bufptr; + + if (!(my_kdebug_flags & KDBG_WRAPPED)) { + if (my_kd_bufptr == kd_readlast) { + *number = 0; + return(0); + } + if (my_kd_bufptr > kd_readlast) { + copycount = my_kd_bufptr - kd_readlast; + if (copycount > count) + copycount = count; + + if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) { + *number = 0; + return(EINVAL); + } + kd_readlast += copycount; + *number = copycount; + return(0); + } + } + if ( (my_kdebug_flags & KDBG_WRAPPED) ) { + /* Note that by setting kd_readlast equal to my_kd_bufptr, + * we now treat the kd_buffer read the same as if we weren't + * wrapped and my_kd_bufptr was less than kd_readlast. + */ + kd_readlast = my_kd_bufptr; kdebug_flags &= ~KDBG_WRAPPED; - } - - /* Note that by setting kd_readlast equal to my_kd_bufptr, - we now treat the kd_buffer read the same as if we weren't - wrapped and my_kd_bufptr was less than kd_readlast. - */ - - /* first copyout from readlast to end of kd_buffer */ - copycount = kd_buflast - kd_readlast; - if (copycount > count) - copycount = count; - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) - { - *number = 0; + } + /* + * first copyout from readlast to end of kd_buffer + */ + copycount = kd_buflast - kd_readlast; + if (copycount > count) + copycount = count; + if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) { + *number = 0; return(EINVAL); - } - buffer += copycount; - count -= copycount; - totalcount = copycount; - kd_readlast += copycount; - if (kd_readlast == kd_buflast) - kd_readlast = kd_buffer; - if (count == 0) - { + } + buffer += (copycount * sizeof(kd_buf)); + count -= copycount; + totalcount = copycount; + kd_readlast += copycount; + + if (kd_readlast == kd_buflast) + kd_readlast = kd_buffer; + if (count == 0) { *number = totalcount; return(0); - } - - /* second copyout from top of kd_buffer to bufptr */ - copycount = my_kd_bufptr - kd_readlast; - if (copycount > count) - copycount = count; - if (copycount == 0) - { + } + /* second copyout from top of kd_buffer to bufptr */ + copycount = my_kd_bufptr - kd_readlast; + if (copycount > count) + copycount = count; + if (copycount == 0) { *number = totalcount; return(0); - } - if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) - { + } + if (copyout(kd_readlast, buffer, copycount * sizeof(kd_buf))) return(EINVAL); - } - kd_readlast += copycount; - totalcount += copycount; - *number = totalcount; - return(0); - } + + kd_readlast += copycount; + totalcount += copycount; + *number = totalcount; + return(0); + } /* end if KDBG_BUFINIT */ } /* end if count */ return (EINVAL); diff --git a/bsd/kern/kern_acct.c b/bsd/kern/kern_acct.c index 3654a9dc8..0b3168147 100644 --- a/bsd/kern/kern_acct.c +++ b/bsd/kern/kern_acct.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,10 +67,11 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include @@ -79,6 +80,8 @@ #include #include #include +#include +#include /* * The routines implemented in this file are described in: @@ -96,15 +99,23 @@ * The former's operation is described in Leffler, et al., and the latter * was provided by UCB with the 4.4BSD-Lite release */ -comp_t encode_comp_t __P((u_long, u_long)); -void acctwatch __P((void *)); -void acctwatch_funnel __P((void *)); +comp_t encode_comp_t(u_long, u_long); +void acctwatch(void *); +void acctwatch_funnel(void *); /* - * Accounting vnode pointer, and saved vnode pointer. + * Accounting vnode pointer, and suspended accounting vnode pointer. States + * are as follows: + * + * acctp suspend_acctp state + * ------------- ------------ ------------------------------ + * NULL NULL Accounting disabled + * !NULL NULL Accounting enabled + * NULL !NULL Accounting enabled, but suspended + * !NULL !NULL */ struct vnode *acctp; -struct vnode *savacctp; +struct vnode *suspend_acctp; /* * Values associated with enabling and disabling accounting @@ -117,32 +128,32 @@ int acctchkfreq = 15; /* frequency (in seconds) to check space */ * Accounting system call. Written based on the specification and * previous implementation done by Mark Tinguely. */ -struct acct_args { - char *path; -}; -acct(p, uap, retval) - struct proc *p; - struct acct_args *uap; - int *retval; +int +acct(struct proc *p, struct acct_args *uap, __unused int *retval) { struct nameidata nd; int error; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* Make sure that the caller is root. */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); /* * If accounting is to be started to a file, open that file for * writing and make sure it's a 'normal'. */ - if (uap->path != NULL) { - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, p); - if (error = vn_open(&nd, FWRITE, 0)) + if (uap->path != USER_ADDR_NULL) { + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, &context); + if ((error = vn_open(&nd, FWRITE, 0))) return (error); - VOP_UNLOCK(nd.ni_vp, 0, p); + vnode_put(nd.ni_vp); + if (nd.ni_vp->v_type != VREG) { - vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); + vn_close(nd.ni_vp, FWRITE, kauth_cred_get(), p); return (EACCES); } } @@ -151,13 +162,14 @@ acct(p, uap, retval) * If accounting was previously enabled, kill the old space-watcher, * close the file, and (if no new file was specified, leave). */ - if (acctp != NULLVP || savacctp != NULLVP) { + if (acctp != NULLVP || suspend_acctp != NULLVP) { untimeout(acctwatch_funnel, NULL); - error = vn_close((acctp != NULLVP ? acctp : savacctp), FWRITE, - p->p_ucred, p); - acctp = savacctp = NULLVP; + error = vn_close((acctp != NULLVP ? acctp : suspend_acctp), FWRITE, + kauth_cred_get(), p); + + acctp = suspend_acctp = NULLVP; } - if (uap->path == NULL) + if (uap->path == USER_ADDR_NULL) return (error); /* @@ -175,13 +187,15 @@ acct(p, uap, retval) * and are enumerated below. (They're also noted in the system * "acct.h" header file.) */ +int acct_process(p) struct proc *p; { - struct acct acct; + struct acct an_acct; struct rusage *r; struct timeval ut, st, tmp; - int s, t; + int t; + int error; struct vnode *vp; /* If accounting isn't enabled, don't bother */ @@ -194,20 +208,18 @@ acct_process(p) */ /* (1) The name of the command that ran */ - bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); + bcopy(p->p_comm, an_acct.ac_comm, sizeof an_acct.ac_comm); /* (2) The amount of user and system time that was used */ calcru(p, &ut, &st, NULL); - acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); - acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); + an_acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); + an_acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); /* (3) The elapsed time the commmand ran (and its starting time) */ - acct.ac_btime = p->p_stats->p_start.tv_sec; - s = splclock(); - tmp = time; - splx(s); + an_acct.ac_btime = p->p_stats->p_start.tv_sec; + microtime(&tmp); timevalsub(&tmp, &p->p_stats->p_start); - acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); + an_acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); /* (4) The average amount of memory used */ r = &p->p_stats->p_ru; @@ -215,33 +227,36 @@ acct_process(p) timevaladd(&tmp, &st); t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) - acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; + an_acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; else - acct.ac_mem = 0; + an_acct.ac_mem = 0; /* (5) The number of disk I/O operations done */ - acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); + an_acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); /* (6) The UID and GID of the process */ - acct.ac_uid = p->p_cred->p_ruid; - acct.ac_gid = p->p_cred->p_rgid; + an_acct.ac_uid = p->p_ucred->cr_ruid; + an_acct.ac_gid = p->p_ucred->cr_rgid; /* (7) The terminal from which the process was started */ if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp) - acct.ac_tty = p->p_pgrp->pg_session->s_ttyp->t_dev; + an_acct.ac_tty = p->p_pgrp->pg_session->s_ttyp->t_dev; else - acct.ac_tty = NODEV; + an_acct.ac_tty = NODEV; /* (8) The boolean flags that tell how the process terminated, etc. */ - acct.ac_flag = p->p_acflag; + an_acct.ac_flag = p->p_acflag; /* * Now, just write the accounting information to the file. */ - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - return (vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct), - (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, - (int *)0, p)); + if ((error = vnode_getwithref(vp)) == 0) { + error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&an_acct, sizeof (an_acct), + (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, p->p_ucred, + (int *)0, p); + vnode_put(vp); + } + return (error); } /* @@ -301,32 +316,48 @@ acctwatch_funnel(a) */ /* ARGSUSED */ void -acctwatch(a) - void *a; +acctwatch(__unused void *a) { - struct statfs sb; - - if (savacctp != NULLVP) { - if (savacctp->v_type == VBAD) { - (void) vn_close(savacctp, FWRITE, NOCRED, NULL); - savacctp = NULLVP; + struct vfs_context context; + struct vfs_attr va; + + VFSATTR_INIT(&va); + VFSATTR_WANTED(&va, f_blocks); + VFSATTR_WANTED(&va, f_bavail); + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + + if (suspend_acctp != NULLVP) { + /* + * Resuming accounting when accounting is suspended, and the + * filesystem containing the suspended accounting file goes + * below a low watermark + */ + if (suspend_acctp->v_type == VBAD) { + (void) vn_close(suspend_acctp, FWRITE, NOCRED, NULL); + suspend_acctp = NULLVP; return; } - (void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0); - if (sb.f_bavail > acctresume * sb.f_blocks / 100) { - acctp = savacctp; - savacctp = NULLVP; + (void)vfs_getattr(suspend_acctp->v_mount, &va, &context); + if (va.f_bavail > acctresume * va.f_blocks / 100) { + acctp = suspend_acctp; + suspend_acctp = NULLVP; log(LOG_NOTICE, "Accounting resumed\n"); } } else if (acctp != NULLVP) { + /* + * Suspending accounting when accounting is currently active, + * and the filesystem containing the active accounting file + * goes over a high watermark + */ if (acctp->v_type == VBAD) { (void) vn_close(acctp, FWRITE, NOCRED, NULL); acctp = NULLVP; return; } - (void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0); - if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) { - savacctp = acctp; + (void)vfs_getattr(acctp->v_mount, &va, &context); + if (va.f_bavail <= acctsuspend * va.f_blocks / 100) { + suspend_acctp = acctp; acctp = NULLVP; log(LOG_NOTICE, "Accounting suspended\n"); } diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index f618a08eb..386774f05 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,25 +37,31 @@ */ #include -#include #include -#include +#include #include #include -#include +#include #include -#include +#include #include -#include +#include #include #include #include #include +#include #include + +#include +#include #include #include +#include + +#include #include #define AIO_work_queued 1 @@ -130,8 +136,8 @@ typedef struct aio_anchor_cb aio_anchor_cb; /* * aysnc IO locking macros used to protect critical sections. */ -#define AIO_LOCK usimple_lock( &aio_lock ) -#define AIO_UNLOCK usimple_unlock( &aio_lock ) +#define AIO_LOCK lck_mtx_lock(aio_lock) +#define AIO_UNLOCK lck_mtx_unlock(aio_lock) /* @@ -146,45 +152,44 @@ static aio_workq_entry * aio_get_some_work( void ); static boolean_t aio_last_group_io( aio_workq_entry *entryp ); static void aio_mark_requests( aio_workq_entry *entryp ); static int aio_queue_async_request( struct proc *procp, - struct aiocb *aiocbp, + user_addr_t aiocbp, int kindOfIO ); static int aio_validate( aio_workq_entry *entryp ); static void aio_work_thread( void ); static int do_aio_cancel( struct proc *p, int fd, - struct aiocb *aiocbp, + user_addr_t aiocbp, boolean_t wait_for_completion, boolean_t disable_notification ); static void do_aio_completion( aio_workq_entry *entryp ); static int do_aio_fsync( aio_workq_entry *entryp ); static int do_aio_read( aio_workq_entry *entryp ); static int do_aio_write( aio_workq_entry *entryp ); +static void do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ); static boolean_t is_already_queued( struct proc *procp, - struct aiocb *aiocbp ); + user_addr_t aiocbp ); static int lio_create_async_entry( struct proc *procp, - struct aiocb *aiocbp, - struct sigevent *sigp, + user_addr_t aiocbp, + user_addr_t sigp, long group_tag, aio_workq_entry **entrypp ); static int lio_create_sync_entry( struct proc *procp, - struct aiocb *aiocbp, + user_addr_t aiocbp, long group_tag, aio_workq_entry **entrypp ); + /* * EXTERNAL PROTOTYPES */ /* in ...bsd/kern/sys_generic.c */ -extern struct file* holdfp( struct filedesc* fdp, int fd, int flag ); -extern int dofileread( struct proc *p, struct file *fp, int fd, - void *buf, size_t nbyte, off_t offset, - int flags, int *retval ); -extern int dofilewrite( struct proc *p, struct file *fp, int fd, - const void *buf, size_t nbyte, off_t offset, - int flags, int *retval ); -extern vm_map_t vm_map_switch( vm_map_t map ); - +extern int dofileread( struct proc *p, struct fileproc *fp, int fd, + user_addr_t bufp, user_size_t nbyte, + off_t offset, int flags, user_ssize_t *retval ); +extern int dofilewrite( struct proc *p, struct fileproc *fp, int fd, + user_addr_t bufp, user_size_t nbyte, off_t offset, + int flags, user_ssize_t *retval ); /* * aio external global variables. @@ -198,55 +203,13 @@ extern int aio_worker_threads; /* AIO_THREAD_COUNT - configurable */ * aio static variables. */ static aio_anchor_cb aio_anchor; -static simple_lock_data_t aio_lock; +static lck_mtx_t * aio_lock; +static lck_grp_t * aio_lock_grp; +static lck_attr_t * aio_lock_attr; +static lck_grp_attr_t * aio_lock_grp_attr; static struct zone *aio_workq_zonep; -/* - * syscall input parameters - */ -#ifndef _SYS_SYSPROTO_H_ - -struct aio_cancel_args { - int fd; - struct aiocb *aiocbp; -}; - -struct aio_error_args { - struct aiocb *aiocbp; -}; - -struct aio_fsync_args { - int op; - struct aiocb *aiocbp; -}; - -struct aio_read_args { - struct aiocb *aiocbp; -}; - -struct aio_return_args { - struct aiocb *aiocbp; -}; - -struct aio_suspend_args { - struct aiocb *const *aiocblist; - int nent; - const struct timespec *timeoutp; -}; - -struct aio_write_args { - struct aiocb *aiocbp; -}; - -struct lio_listio_args { - int mode; - struct aiocb *const *aiocblist; - int nent; - struct sigevent *sigp; -}; - -#endif /* _SYS_SYSPROTO_H_ */ /* @@ -260,9 +223,8 @@ struct lio_listio_args { int aio_cancel( struct proc *p, struct aio_cancel_args *uap, int *retval ) { - struct aiocb my_aiocb; + struct user_aiocb my_aiocb; int result; - boolean_t funnel_state; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel)) | DBG_FUNC_START, (int)p, (int)uap->aiocbp, 0, 0, 0 ); @@ -277,8 +239,16 @@ aio_cancel( struct proc *p, struct aio_cancel_args *uap, int *retval ) } *retval = -1; - if ( uap->aiocbp != NULL ) { - result = copyin( uap->aiocbp, &my_aiocb, sizeof(my_aiocb) ); + if ( uap->aiocbp != USER_ADDR_NULL ) { + if ( !IS_64BIT_PROCESS(p) ) { + struct aiocb aiocb32; + + result = copyin( uap->aiocbp, &aiocb32, sizeof(aiocb32) ); + if ( result == 0 ) + do_munge_aiocb( &aiocb32, &my_aiocb ); + } else + result = copyin( uap->aiocbp, &my_aiocb, sizeof(my_aiocb) ); + if ( result != 0 ) { result = EAGAIN; goto ExitRoutine; @@ -293,11 +263,7 @@ aio_cancel( struct proc *p, struct aio_cancel_args *uap, int *retval ) goto ExitRoutine; } } - - /* current BSD code assumes funnel lock is held */ - funnel_state = thread_funnel_set( kernel_flock, TRUE ); result = do_aio_cancel( p, uap->fd, uap->aiocbp, FALSE, FALSE ); - (void) thread_funnel_set( kernel_flock, funnel_state ); if ( result != -1 ) { *retval = result; @@ -319,7 +285,6 @@ ExitRoutine: /* * _aio_close - internal function used to clean up async IO requests for * a file descriptor that is closing. - * NOTE - kernel funnel lock is held when we get called. * THIS MAY BLOCK. */ @@ -339,7 +304,7 @@ _aio_close( struct proc *p, int fd ) (int)p, fd, 0, 0, 0 ); /* cancel all async IO requests on our todo queues for this file descriptor */ - error = do_aio_cancel( p, fd, NULL, TRUE, FALSE ); + error = do_aio_cancel( p, fd, 0, TRUE, FALSE ); if ( error == AIO_NOTCANCELED ) { /* * AIO_NOTCANCELED is returned when we find an aio request for this process @@ -450,7 +415,8 @@ aio_fsync( struct proc *p, struct aio_fsync_args *uap, int *retval ) (int)p, (int)uap->aiocbp, uap->op, 0, 0 ); *retval = 0; - if ( uap->op == O_SYNC ) + /* 0 := O_SYNC for binary backward compatibility with Panther */ + if (uap->op == O_SYNC || uap->op == 0) fsync_kind = AIO_FSYNC; #if 0 // we don't support fdatasync() call yet else if ( uap->op == O_DSYNC ) @@ -511,7 +477,7 @@ aio_read( struct proc *p, struct aio_read_args *uap, int *retval ) */ int -aio_return( struct proc *p, struct aio_return_args *uap, register_t *retval ) +aio_return( struct proc *p, struct aio_return_args *uap, user_ssize_t *retval ) { aio_workq_entry *entryp; int error; @@ -596,7 +562,6 @@ ExitRoutine: * a process that is going away due to exec(). We cancel any async IOs * we can and wait for those already active. We also disable signaling * for cancelled or active aio requests that complete. - * NOTE - kernel funnel lock is held when we get called. * This routine MAY block! */ @@ -622,7 +587,6 @@ _aio_exec( struct proc *p ) * a process that is terminating (via exit() or exec() ). We cancel any async IOs * we can and wait for those already active. We also disable signaling * for cancelled or active aio requests that complete. This routine MAY block! - * NOTE - kernel funnel lock is held when we get called. */ __private_extern__ void @@ -646,7 +610,7 @@ _aio_exit( struct proc *p ) * cancel async IO requests on the todo work queue and wait for those * already active to complete. */ - error = do_aio_cancel( p, 0, NULL, TRUE, TRUE ); + error = do_aio_cancel( p, 0, 0, TRUE, TRUE ); if ( error == AIO_NOTCANCELED ) { /* * AIO_NOTCANCELED is returned when we find an aio request for this process @@ -696,7 +660,6 @@ _aio_exit( struct proc *p ) } AIO_UNLOCK; -ExitRoutine: KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_END, (int)p, 0, 0, 0, 0 ); @@ -718,11 +681,10 @@ ExitRoutine: * were already complete. * WARNING - do not deference aiocbp in this routine, it may point to user * land data that has not been copied in (when called from aio_cancel() ) - * NOTE - kernel funnel lock is held when we get called. */ static int -do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, +do_aio_cancel( struct proc *p, int fd, user_addr_t aiocbp, boolean_t wait_for_completion, boolean_t disable_notification ) { aio_workq_entry *entryp; @@ -738,9 +700,9 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, next_entryp = TAILQ_NEXT( entryp, aio_workq_link ); if ( p == entryp->procp ) { - if ( (aiocbp == NULL && fd == 0) || - (aiocbp != NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == NULL && fd == entryp->aiocb.aio_fildes) ) { + if ( (aiocbp == USER_ADDR_NULL && fd == 0) || + (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || + (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { /* we found a match so we remove the entry from the */ /* todo work queue and place it on the done queue */ TAILQ_REMOVE( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); @@ -776,7 +738,7 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, else AIO_UNLOCK; - if ( aiocbp != NULL ) { + if ( aiocbp != USER_ADDR_NULL ) { return( result ); } @@ -801,9 +763,9 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, next_entryp = TAILQ_NEXT( entryp, aio_workq_link ); if ( p == entryp->procp ) { - if ( (aiocbp == NULL && fd == 0) || - (aiocbp != NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == NULL && fd == entryp->aiocb.aio_fildes) ) { + if ( (aiocbp == USER_ADDR_NULL && fd == 0) || + (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || + (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { /* we found a match so we remove the entry from the */ /* todo work queue and place it on the done queue */ TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link ); @@ -820,7 +782,7 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link ); aio_anchor.aio_done_count++; p->aio_done_count++; - if ( aiocbp != NULL ) { + if ( aiocbp != USER_ADDR_NULL ) { AIO_UNLOCK; return( result ); } @@ -834,9 +796,9 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, * return AIO_NOTCANCELED result. */ TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) { - if ( (aiocbp == NULL && fd == 0) || - (aiocbp != NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == NULL && fd == entryp->aiocb.aio_fildes) ) { + if ( (aiocbp == USER_ADDR_NULL && fd == 0) || + (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || + (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { result = AIO_NOTCANCELED; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_activeq)) | DBG_FUNC_NONE, @@ -846,7 +808,7 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, entryp->flags |= AIO_WAITING; /* flag for special completion processing */ if ( disable_notification ) entryp->flags |= AIO_DISABLE; /* flag for special completion processing */ - if ( aiocbp != NULL ) { + if ( aiocbp != USER_ADDR_NULL ) { AIO_UNLOCK; return( result ); } @@ -860,15 +822,15 @@ do_aio_cancel( struct proc *p, int fd, struct aiocb *aiocbp, */ if ( result == -1 ) { TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { - if ( (aiocbp == NULL && fd == 0) || - (aiocbp != NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == NULL && fd == entryp->aiocb.aio_fildes) ) { + if ( (aiocbp == USER_ADDR_NULL && fd == 0) || + (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || + (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { result = AIO_ALLDONE; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_doneq)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); - if ( aiocbp != NULL ) { + if ( aiocbp != USER_ADDR_NULL ) { AIO_UNLOCK; return( result ); } @@ -898,10 +860,9 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) int error; int i, count; uint64_t abstime; - struct timespec ts; - struct timeval tv; + struct user_timespec ts; aio_workq_entry *entryp; - struct aiocb * *aiocbpp; + user_addr_t *aiocbpp; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend)) | DBG_FUNC_START, (int)p, uap->nent, 0, 0, 0 ); @@ -919,13 +880,23 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) goto ExitThisRoutine; } - if ( uap->nent < 1 || uap->nent > AIO_LISTIO_MAX ) { + if ( uap->nent < 1 || uap->nent > aio_max_requests_per_process ) { error = EINVAL; goto ExitThisRoutine; } - if ( uap->timeoutp != NULL ) { - error = copyin( (void *)uap->timeoutp, &ts, sizeof(ts) ); + if ( uap->timeoutp != USER_ADDR_NULL ) { + if ( proc_is64bit(p) ) { + error = copyin( uap->timeoutp, &ts, sizeof(ts) ); + } + else { + struct timespec temp; + error = copyin( uap->timeoutp, &temp, sizeof(temp) ); + if ( error == 0 ) { + ts.tv_sec = temp.tv_sec; + ts.tv_nsec = temp.tv_nsec; + } + } if ( error != 0 ) { error = EAGAIN; goto ExitThisRoutine; @@ -941,30 +912,44 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) clock_absolutetime_interval_to_deadline( abstime, &abstime ); } - MALLOC( aiocbpp, void *, (uap->nent * sizeof(struct aiocb *)), M_TEMP, M_WAITOK ); + /* we reserve enough space for largest possible pointer size */ + MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK ); if ( aiocbpp == NULL ) { error = EAGAIN; goto ExitThisRoutine; } - /* check list of aio requests to see if any have completed */ - for ( i = 0; i < uap->nent; i++ ) { - struct aiocb *aiocbp; + /* copyin our aiocb pointers from list */ + error = copyin( uap->aiocblist, aiocbpp, + proc_is64bit(p) ? (uap->nent * sizeof(user_addr_t)) + : (uap->nent * sizeof(uintptr_t)) ); + if ( error != 0 ) { + error = EAGAIN; + goto ExitThisRoutine; + } - /* copyin in aiocb pointer from list */ - error = copyin( (void *)(uap->aiocblist + i), (aiocbpp + i), sizeof(aiocbp) ); - if ( error != 0 ) { - error = EAGAIN; - goto ExitThisRoutine; + /* we depend on a list of user_addr_t's so we need to munge and expand */ + /* when these pointers came from a 32-bit process */ + if ( !proc_is64bit(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) { + /* position to the last entry and work back from there */ + uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1); + user_addr_t *my_addrp = aiocbpp + (uap->nent - 1); + for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) { + *my_addrp = (user_addr_t) (*my_ptrp); } + } + /* check list of aio requests to see if any have completed */ + AIO_LOCK; + for ( i = 0; i < uap->nent; i++ ) { + user_addr_t aiocbp; + /* NULL elements are legal so check for 'em */ aiocbp = *(aiocbpp + i); - if ( aiocbp == NULL ) + if ( aiocbp == USER_ADDR_NULL ) continue; - + /* return immediately if any aio request in the list is done */ - AIO_LOCK; TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { if ( entryp->uaiocbp == aiocbp ) { *retval = 0; @@ -973,7 +958,6 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) goto ExitThisRoutine; } } - AIO_UNLOCK; } /* for ( ; i < uap->nent; ) */ KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend_sleep)) | DBG_FUNC_NONE, @@ -983,19 +967,15 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) * wait for an async IO to complete or a signal fires or timeout expires. * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal * interrupts us. If an async IO completes before a signal fires or our - * timeout expires, we get a wakeup call from aio_work_thread(). We do not - * use tsleep() here in order to avoid getting kernel funnel lock. + * timeout expires, we get a wakeup call from aio_work_thread(). */ - assert_wait( (event_t) &p->AIO_SUSPEND_SLEEP_CHAN, THREAD_ABORTSAFE ); - if ( abstime > 0 ) { - thread_set_timer_deadline( abstime ); - } + assert_wait_deadline( (event_t) &p->AIO_SUSPEND_SLEEP_CHAN, THREAD_ABORTSAFE, abstime ); + AIO_UNLOCK; + error = thread_block( THREAD_CONTINUE_NULL ); + if ( error == THREAD_AWAKENED ) { /* got our wakeup call from aio_work_thread() */ - if ( abstime > 0 ) { - thread_cancel_timer(); - } *retval = 0; error = 0; } @@ -1005,9 +985,6 @@ aio_suspend( struct proc *p, struct aio_suspend_args *uap, int *retval ) } else { /* we were interrupted */ - if ( abstime > 0 ) { - thread_cancel_timer(); - } error = EINTR; } @@ -1066,11 +1043,13 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) int result; long group_tag; aio_workq_entry * *entryp_listp; + user_addr_t *aiocbpp; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START, (int)p, uap->nent, uap->mode, 0, 0 ); entryp_listp = NULL; + aiocbpp = NULL; call_result = -1; *retval = -1; if ( !(uap->mode == LIO_NOWAIT || uap->mode == LIO_WAIT) ) { @@ -1095,27 +1074,48 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) * allocate a list of aio_workq_entry pointers that we will use to queue * up all our requests at once while holding our lock. */ - MALLOC( entryp_listp, void *, (uap->nent * sizeof(struct aiocb *)), M_TEMP, M_WAITOK ); + MALLOC( entryp_listp, void *, (uap->nent * sizeof(aio_workq_entry *)), M_TEMP, M_WAITOK ); if ( entryp_listp == NULL ) { call_result = EAGAIN; goto ExitRoutine; } + /* we reserve enough space for largest possible pointer size */ + MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK ); + if ( aiocbpp == NULL ) { + call_result = EAGAIN; + goto ExitRoutine; + } + + /* copyin our aiocb pointers from list */ + result = copyin( uap->aiocblist, aiocbpp, + IS_64BIT_PROCESS(p) ? (uap->nent * sizeof(user_addr_t)) + : (uap->nent * sizeof(uintptr_t)) ); + if ( result != 0 ) { + call_result = EAGAIN; + goto ExitRoutine; + } + + /* we depend on a list of user_addr_t's so we need to munge and expand */ + /* when these pointers came from a 32-bit process */ + if ( !IS_64BIT_PROCESS(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) { + /* position to the last entry and work back from there */ + uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1); + user_addr_t *my_addrp = aiocbpp + (uap->nent - 1); + for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) { + *my_addrp = (user_addr_t) (*my_ptrp); + } + } + /* process list of aio requests */ for ( i = 0; i < uap->nent; i++ ) { - struct aiocb *my_aiocbp; + user_addr_t my_aiocbp; *(entryp_listp + i) = NULL; + my_aiocbp = *(aiocbpp + i); - /* copyin in aiocb pointer from list */ - result = copyin( (void *)(uap->aiocblist + i), &my_aiocbp, sizeof(my_aiocbp) ); - if ( result != 0 ) { - call_result = EAGAIN; - continue; - } - /* NULL elements are legal so check for 'em */ - if ( my_aiocbp == NULL ) + if ( my_aiocbp == USER_ADDR_NULL ) continue; if ( uap->mode == LIO_NOWAIT ) @@ -1150,7 +1150,8 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) my_map = entryp->aio_map; entryp->aio_map = VM_MAP_NULL; - result = EAGAIN; + if ( call_result == -1 ) + call_result = EAGAIN; AIO_UNLOCK; aio_free_request( entryp, my_map ); AIO_LOCK; @@ -1170,11 +1171,11 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) aio_anchor.lio_sync_workq_count++; } } - AIO_UNLOCK; - if ( uap->mode == LIO_NOWAIT ) + if ( uap->mode == LIO_NOWAIT ) { /* caller does not want to wait so we'll fire off a worker thread and return */ - wakeup_one( &aio_anchor.aio_async_workq ); + wakeup_one( (caddr_t) &aio_anchor.aio_async_workq ); + } else { aio_workq_entry *entryp; int error; @@ -1182,18 +1183,14 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) /* * mode is LIO_WAIT - handle the IO requests now. */ - AIO_LOCK; entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq ); while ( entryp != NULL ) { if ( p == entryp->procp && group_tag == entryp->group_tag ) { - boolean_t funnel_state; TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link ); aio_anchor.lio_sync_workq_count--; AIO_UNLOCK; - // file system IO code path requires kernel funnel lock - funnel_state = thread_funnel_set( kernel_flock, TRUE ); if ( (entryp->flags & AIO_READ) != 0 ) { error = do_aio_read( entryp ); } @@ -1211,7 +1208,6 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) entryp->errorval = error; if ( error != 0 && call_result == -1 ) call_result = EIO; - (void) thread_funnel_set( kernel_flock, funnel_state ); AIO_LOCK; /* we're done with the IO request so move it on the done queue */ @@ -1227,8 +1223,8 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) entryp = TAILQ_NEXT( entryp, aio_workq_link ); } /* while ( entryp != NULL ) */ - AIO_UNLOCK; } /* uap->mode == LIO_WAIT */ + AIO_UNLOCK; /* call_result == -1 means we had no trouble queueing up requests */ if ( call_result == -1 ) { @@ -1239,6 +1235,8 @@ lio_listio( struct proc *p, struct lio_listio_args *uap, int *retval ) ExitRoutine: if ( entryp_listp != NULL ) FREE( entryp_listp, M_TEMP ); + if ( aiocbpp != NULL ) + FREE( aiocbpp, M_TEMP ); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_END, (int)p, call_result, 0, 0, 0 ); @@ -1258,30 +1256,31 @@ static void aio_work_thread( void ) { aio_workq_entry *entryp; - struct uthread *uthread = (struct uthread *)get_bsdthread_info(current_act()); for( ;; ) { + AIO_LOCK; entryp = aio_get_some_work(); if ( entryp == NULL ) { /* * aio worker threads wait for some work to get queued up * by aio_queue_async_request. Once some work gets queued * it will wake up one of these worker threads just before - * returning to our caller in user land. We do not use - * tsleep() here in order to avoid getting kernel funnel lock. + * returning to our caller in user land. */ assert_wait( (event_t) &aio_anchor.aio_async_workq, THREAD_UNINT ); - thread_block( THREAD_CONTINUE_NULL ); + AIO_UNLOCK; - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_wake)) | DBG_FUNC_NONE, - 0, 0, 0, 0, 0 ); + thread_block( (thread_continue_t)aio_work_thread ); + /* NOT REACHED */ } else { int error; - boolean_t funnel_state; vm_map_t currentmap; vm_map_t oldmap = VM_MAP_NULL; task_t oldaiotask = TASK_NULL; + struct uthread *uthreadp = NULL; + + AIO_UNLOCK; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_START, (int)entryp->procp, (int)entryp->uaiocbp, entryp->flags, 0, 0 ); @@ -1290,12 +1289,11 @@ aio_work_thread( void ) * Assume the target's address space identity for the duration * of the IO. */ - funnel_state = thread_funnel_set( kernel_flock, TRUE ); - currentmap = get_task_map( (current_proc())->task ); if ( currentmap != entryp->aio_map ) { - oldaiotask = uthread->uu_aio_task; - uthread->uu_aio_task = entryp->procp->task; + uthreadp = (struct uthread *) get_bsdthread_info(current_thread()); + oldaiotask = uthreadp->uu_aio_task; + uthreadp->uu_aio_task = entryp->procp->task; oldmap = vm_map_switch( entryp->aio_map ); } @@ -1316,7 +1314,7 @@ aio_work_thread( void ) entryp->errorval = error; if ( currentmap != entryp->aio_map ) { (void) vm_map_switch( oldmap ); - uthread->uu_aio_task = oldaiotask; + uthreadp->uu_aio_task = oldaiotask; } /* we're done with the IO request so pop it off the active queue and */ @@ -1344,7 +1342,6 @@ aio_work_thread( void ) } do_aio_completion( entryp ); - (void) thread_funnel_set( kernel_flock, funnel_state ); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_END, (int)entryp->procp, (int)entryp->uaiocbp, entryp->errorval, @@ -1374,16 +1371,15 @@ aio_work_thread( void ) * aio_get_some_work - get the next async IO request that is ready to be executed. * aio_fsync complicates matters a bit since we cannot do the fsync until all async * IO requests at the time the aio_fsync call came in have completed. + * NOTE - AIO_LOCK must be held by caller */ static aio_workq_entry * aio_get_some_work( void ) { aio_workq_entry *entryp; - int skip_count = 0; /* pop some work off the work queue and add to our active queue */ - AIO_LOCK; for ( entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq ); entryp != NULL; entryp = TAILQ_NEXT( entryp, aio_workq_link ) ) { @@ -1408,7 +1404,6 @@ aio_get_some_work( void ) aio_anchor.aio_active_count++; entryp->procp->aio_active_count++; } - AIO_UNLOCK; return( entryp ); @@ -1427,7 +1422,7 @@ aio_delay_fsync_request( aio_workq_entry *entryp ) aio_workq_entry *my_entryp; TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) { - if ( my_entryp->fsyncp != NULL && + if ( my_entryp->fsyncp != USER_ADDR_NULL && entryp->uaiocbp == my_entryp->fsyncp && entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) { return( TRUE ); @@ -1447,7 +1442,7 @@ aio_delay_fsync_request( aio_workq_entry *entryp ) */ static int -aio_queue_async_request( struct proc *procp, struct aiocb *aiocbp, int kindOfIO ) +aio_queue_async_request( struct proc *procp, user_addr_t aiocbp, int kindOfIO ) { aio_workq_entry *entryp; int result; @@ -1464,7 +1459,16 @@ aio_queue_async_request( struct proc *procp, struct aiocb *aiocbp, int kindOfIO entryp->uaiocbp = aiocbp; entryp->flags |= kindOfIO; entryp->aio_map = VM_MAP_NULL; - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + + if ( !IS_64BIT_PROCESS(procp) ) { + struct aiocb aiocb32; + + result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); + if ( result == 0 ) + do_munge_aiocb( &aiocb32, &entryp->aiocb ); + } else + result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + if ( result != 0 ) { result = EAGAIN; goto error_exit; @@ -1510,13 +1514,12 @@ aio_queue_async_request( struct proc *procp, struct aiocb *aiocbp, int kindOfIO TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); aio_anchor.aio_async_workq_count++; - AIO_UNLOCK; + wakeup_one( (caddr_t) &aio_anchor.aio_async_workq ); + AIO_UNLOCK; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE, (int)procp, (int)aiocbp, 0, 0, 0 ); - - wakeup_one( &aio_anchor.aio_async_workq ); - + return( 0 ); error_exit: @@ -1542,8 +1545,8 @@ error_exit: */ static int -lio_create_async_entry( struct proc *procp, struct aiocb *aiocbp, - struct sigevent *sigp, long group_tag, +lio_create_async_entry( struct proc *procp, user_addr_t aiocbp, + user_addr_t sigp, long group_tag, aio_workq_entry **entrypp ) { aio_workq_entry *entryp; @@ -1562,7 +1565,16 @@ lio_create_async_entry( struct proc *procp, struct aiocb *aiocbp, entryp->flags |= AIO_LIO; entryp->group_tag = group_tag; entryp->aio_map = VM_MAP_NULL; - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + + if ( !IS_64BIT_PROCESS(procp) ) { + struct aiocb aiocb32; + + result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); + if ( result == 0 ) + do_munge_aiocb( &aiocb32, &entryp->aiocb ); + } else + result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + if ( result != 0 ) { result = EAGAIN; goto error_exit; @@ -1577,8 +1589,32 @@ lio_create_async_entry( struct proc *procp, struct aiocb *aiocbp, /* use sigevent passed in to lio_listio for each of our calls, but only */ /* do completion notification after the last request completes. */ - if ( sigp != NULL ) { - result = copyin( sigp, &entryp->aiocb.aio_sigevent, sizeof(entryp->aiocb.aio_sigevent) ); + if ( sigp != USER_ADDR_NULL ) { + if ( !IS_64BIT_PROCESS(procp) ) { + struct sigevent sigevent32; + + result = copyin( sigp, &sigevent32, sizeof(sigevent32) ); + if ( result == 0 ) { + /* also need to munge aio_sigevent since it contains pointers */ + /* special case here. since we do not know if sigev_value is an */ + /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */ + /* means if we send this info back to user space we need to remember */ + /* sigev_value was not expanded for the 32-bit case. */ + /* NOTE - this does NOT affect us since we don't support sigev_value */ + /* yet in the aio context. */ + //LP64 + entryp->aiocb.aio_sigevent.sigev_notify = sigevent32.sigev_notify; + entryp->aiocb.aio_sigevent.sigev_signo = sigevent32.sigev_signo; + entryp->aiocb.aio_sigevent.sigev_value.size_equivalent.sival_int = + sigevent32.sigev_value.sival_int; + entryp->aiocb.aio_sigevent.sigev_notify_function = + CAST_USER_ADDR_T(sigevent32.sigev_notify_function); + entryp->aiocb.aio_sigevent.sigev_notify_attributes = + CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes); + } + } else + result = copyin( sigp, &entryp->aiocb.aio_sigevent, sizeof(entryp->aiocb.aio_sigevent) ); + if ( result != 0 ) { result = EAGAIN; goto error_exit; @@ -1599,7 +1635,7 @@ lio_create_async_entry( struct proc *procp, struct aiocb *aiocbp, error_exit: if ( entryp != NULL ) - zfree( aio_workq_zonep, (vm_offset_t) entryp ); + zfree( aio_workq_zonep, entryp ); return( result ); @@ -1645,7 +1681,7 @@ aio_mark_requests( aio_workq_entry *entryp ) */ static int -lio_create_sync_entry( struct proc *procp, struct aiocb *aiocbp, +lio_create_sync_entry( struct proc *procp, user_addr_t aiocbp, long group_tag, aio_workq_entry **entrypp ) { aio_workq_entry *entryp; @@ -1664,7 +1700,16 @@ lio_create_sync_entry( struct proc *procp, struct aiocb *aiocbp, entryp->flags |= AIO_LIO; entryp->group_tag = group_tag; entryp->aio_map = VM_MAP_NULL; - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + + if ( !IS_64BIT_PROCESS(procp) ) { + struct aiocb aiocb32; + + result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); + if ( result == 0 ) + do_munge_aiocb( &aiocb32, &entryp->aiocb ); + } else + result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + if ( result != 0 ) { result = EAGAIN; goto error_exit; @@ -1687,7 +1732,7 @@ lio_create_sync_entry( struct proc *procp, struct aiocb *aiocbp, error_exit: if ( entryp != NULL ) - zfree( aio_workq_zonep, (vm_offset_t) entryp ); + zfree( aio_workq_zonep, entryp ); return( result ); @@ -1709,7 +1754,7 @@ aio_free_request( aio_workq_entry *entryp, vm_map_t the_map ) vm_map_deallocate( the_map ); } - zfree( aio_workq_zonep, (vm_offset_t) entryp ); + zfree( aio_workq_zonep, entryp ); return( 0 ); @@ -1722,8 +1767,7 @@ aio_free_request( aio_workq_entry *entryp, vm_map_t the_map ) static int aio_validate( aio_workq_entry *entryp ) { - boolean_t funnel_state; - struct file *fp; + struct fileproc *fp; int flag; int result; @@ -1746,10 +1790,10 @@ aio_validate( aio_workq_entry *entryp ) } if ( (entryp->flags & (AIO_READ | AIO_WRITE)) != 0 ) { - if ( entryp->aiocb.aio_offset < 0 || - entryp->aiocb.aio_nbytes < 0 || - entryp->aiocb.aio_nbytes > INT_MAX || - entryp->aiocb.aio_buf == NULL ) + // LP64todo - does max value for aio_nbytes need to grow? + if ( entryp->aiocb.aio_nbytes > INT_MAX || + entryp->aiocb.aio_buf == USER_ADDR_NULL || + entryp->aiocb.aio_offset < 0 ) return( EINVAL ); } @@ -1769,27 +1813,29 @@ aio_validate( aio_workq_entry *entryp ) return (EINVAL); /* validate the file descriptor and that the file was opened - * for the appropriate read / write access. This section requires - * kernel funnel lock. + * for the appropriate read / write access. */ - funnel_state = thread_funnel_set( kernel_flock, TRUE ); + proc_fdlock(entryp->procp); - result = fdgetf( entryp->procp, entryp->aiocb.aio_fildes, &fp ); + result = fp_lookup( entryp->procp, entryp->aiocb.aio_fildes, &fp , 1); if ( result == 0 ) { - if ( (fp->f_flag & flag) == 0 ) { + if ( (fp->f_fglob->fg_flag & flag) == 0 ) { /* we don't have read or write access */ result = EBADF; } - else if ( fp->f_type != DTYPE_VNODE ) { + else if ( fp->f_fglob->fg_type != DTYPE_VNODE ) { /* this is not a file */ result = ESPIPE; - } + } else + fp->f_flags |= FP_AIOISSUED; + + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp , 1); } else { result = EBADF; } - (void) thread_funnel_set( kernel_flock, funnel_state ); + proc_fdunlock(entryp->procp); return( result ); @@ -1807,7 +1853,6 @@ static int aio_get_process_count( struct proc *procp ) { aio_workq_entry *entryp; - int error; int count; /* begin with count of completed async IO requests for this process */ @@ -1898,15 +1943,15 @@ do_aio_completion( aio_workq_entry *entryp ) AIO_LOCK; active_requests = aio_active_requests_for_process( entryp->procp ); - AIO_UNLOCK; + //AIO_UNLOCK; if ( active_requests < 1 ) { /* no active aio requests for this process, continue exiting */ + wakeup_one( (caddr_t) &entryp->procp->AIO_CLEANUP_SLEEP_CHAN ); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); - - wakeup_one( &entryp->procp->AIO_CLEANUP_SLEEP_CHAN ); } + AIO_UNLOCK; return; } @@ -1920,10 +1965,12 @@ do_aio_completion( aio_workq_entry *entryp ) * call wakeup for them. If we do mark them we should unmark them after * the aio_suspend wakes up. */ + AIO_LOCK; + wakeup_one( (caddr_t) &entryp->procp->AIO_SUSPEND_SLEEP_CHAN ); + AIO_UNLOCK; + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_suspend_wake)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); - - wakeup_one( &entryp->procp->AIO_SUSPEND_SLEEP_CHAN ); return; @@ -1971,20 +2018,27 @@ aio_last_group_io( aio_workq_entry *entryp ) static int do_aio_read( aio_workq_entry *entryp ) { - struct file *fp; + struct fileproc *fp; int error; - fp = holdfp( entryp->procp->p_fd, entryp->aiocb.aio_fildes, FREAD ); + if ( (error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp , 0)) ) + return(error); + if ( (fp->f_fglob->fg_flag & FREAD) == 0 ) { + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); + return(EBADF); + } if ( fp != NULL ) { error = dofileread( entryp->procp, fp, entryp->aiocb.aio_fildes, - (void *)entryp->aiocb.aio_buf, + entryp->aiocb.aio_buf, entryp->aiocb.aio_nbytes, entryp->aiocb.aio_offset, FOF_OFFSET, &entryp->returnval ); - frele( fp ); + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); } - else + else { + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); error = EBADF; + } return( error ); @@ -1997,20 +2051,28 @@ do_aio_read( aio_workq_entry *entryp ) static int do_aio_write( aio_workq_entry *entryp ) { - struct file *fp; + struct fileproc *fp; int error; - fp = holdfp( entryp->procp->p_fd, entryp->aiocb.aio_fildes, FWRITE ); + if ( (error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp , 0)) ) + return(error); + if ( (fp->f_fglob->fg_flag & FWRITE) == 0 ) { + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); + return(EBADF); + } if ( fp != NULL ) { error = dofilewrite( entryp->procp, fp, entryp->aiocb.aio_fildes, - (const void *)entryp->aiocb.aio_buf, + entryp->aiocb.aio_buf, entryp->aiocb.aio_nbytes, entryp->aiocb.aio_offset, FOF_OFFSET, &entryp->returnval ); - frele( fp ); + + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); } - else + else { + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); error = EBADF; + } return( error ); @@ -2038,21 +2100,32 @@ aio_active_requests_for_process( struct proc *procp ) static int do_aio_fsync( aio_workq_entry *entryp ) { - register struct vnode *vp; - struct file *fp; - int error; + struct vfs_context context; + struct vnode *vp; + struct fileproc *fp; + int error; /* * NOTE - we will not support AIO_DSYNC until fdatasync() is supported. * AIO_DSYNC is caught before we queue up a request and flagged as an error. * The following was shamelessly extracted from fsync() implementation. */ - error = getvnode( entryp->procp, entryp->aiocb.aio_fildes, &fp ); + + error = fp_getfvp( entryp->procp, entryp->aiocb.aio_fildes, &fp, &vp); if ( error == 0 ) { - vp = (struct vnode *)fp->f_data; - vn_lock( vp, LK_EXCLUSIVE | LK_RETRY, entryp->procp ); - error = VOP_FSYNC( vp, fp->f_cred, MNT_WAIT, entryp->procp ); - VOP_UNLOCK( vp, 0, entryp->procp ); + if ( (error = vnode_getwithref(vp)) ) { + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); + entryp->returnval = -1; + return(error); + } + context.vc_proc = entryp->procp; + context.vc_ucred = fp->f_fglob->fg_cred; + + error = VNOP_FSYNC( vp, MNT_WAIT, &context); + + (void)vnode_put(vp); + + fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); } if ( error != 0 ) entryp->returnval = -1; @@ -2071,7 +2144,7 @@ do_aio_fsync( aio_workq_entry *entryp ) static boolean_t is_already_queued( struct proc *procp, - struct aiocb *aiocbp ) + user_addr_t aiocbp ) { aio_workq_entry *entryp; boolean_t result; @@ -2124,7 +2197,13 @@ aio_init( void ) { int i; - simple_lock_init( &aio_lock ); + aio_lock_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(aio_lock_grp_attr); + aio_lock_grp = lck_grp_alloc_init("aio", aio_lock_grp_attr); + aio_lock_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(aio_lock_attr); + + aio_lock = lck_mtx_alloc_init(aio_lock_grp, aio_lock_attr); AIO_LOCK; TAILQ_INIT( &aio_anchor.aio_async_workq ); @@ -2173,5 +2252,39 @@ _aio_create_worker_threads( int num ) task_t get_aiotask(void) { - return ((struct uthread *)get_bsdthread_info(current_act()))->uu_aio_task; + return ((struct uthread *)get_bsdthread_info(current_thread()))->uu_aio_task; +} + + +/* + * In the case of an aiocb from a + * 32-bit process we need to expand some longs and pointers to the correct + * sizes in order to let downstream code always work on the same type of + * aiocb (in our case that is a user_aiocb) + */ +static void +do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ) +{ + the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes; + the_user_aiocbp->aio_offset = my_aiocbp->aio_offset; + the_user_aiocbp->aio_buf = CAST_USER_ADDR_T(my_aiocbp->aio_buf); + the_user_aiocbp->aio_nbytes = my_aiocbp->aio_nbytes; + the_user_aiocbp->aio_reqprio = my_aiocbp->aio_reqprio; + the_user_aiocbp->aio_lio_opcode = my_aiocbp->aio_lio_opcode; + + /* special case here. since we do not know if sigev_value is an */ + /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */ + /* means if we send this info back to user space we need to remember */ + /* sigev_value was not expanded for the 32-bit case. */ + /* NOTE - this does NOT affect us since we don't support sigev_value */ + /* yet in the aio context. */ + //LP64 + the_user_aiocbp->aio_sigevent.sigev_notify = my_aiocbp->aio_sigevent.sigev_notify; + the_user_aiocbp->aio_sigevent.sigev_signo = my_aiocbp->aio_sigevent.sigev_signo; + the_user_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int = + my_aiocbp->aio_sigevent.sigev_value.sival_int; + the_user_aiocbp->aio_sigevent.sigev_notify_function = + CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_function); + the_user_aiocbp->aio_sigevent.sigev_notify_attributes = + CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_attributes); } diff --git a/bsd/kern/kern_audit.c b/bsd/kern/kern_audit.c index 2fa7b1d50..131047494 100644 --- a/bsd/kern/kern_audit.c +++ b/bsd/kern/kern_audit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,49 +19,52 @@ * * @APPLE_LICENSE_HEADER_END@ */ - #include -#include #include #include #include #include -#include +#include +#include #include #include #include #include #include #include -#include +#include +#include #include #include #include #include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include +#include +#include #include #include #include #include -#include -#include +#include #include -#include +#include +#include +#include +#include #include #include -#include -#include +#include + +#include -#include +#include +#include #ifdef AUDIT @@ -73,8 +76,10 @@ */ /* #define AUDIT_EXCESSIVELY_VERBOSE */ #ifdef AUDIT_EXCESSIVELY_VERBOSE +#define AUDIT_PRINTF_ONLY #define AUDIT_PRINTF(x) printf x #else +#define AUDIT_PRINTF_ONLY __unused #define AUDIT_PRINTF(X) #endif @@ -108,9 +113,9 @@ static mutex_t *audit_mtx; * not yet in the queue, which is needed to estimate the total * size of the combined set of records outstanding in the system. */ -static TAILQ_HEAD(, kaudit_record) audit_q; -static int audit_q_len; -static int audit_pre_q_len; +static TAILQ_HEAD(, kaudit_record) audit_q; +static size_t audit_q_len; +static size_t audit_pre_q_len; static wait_queue_t audit_wait_queue; static zone_t audit_zone; @@ -123,6 +128,12 @@ static zone_t audit_zone; static int audit_worker_event; #define AUDIT_WORKER_EVENT ((event_t)&audit_worker_event) +/* + * The audit worker thread (which is lazy started when we first + * rotate the audit log. + */ +static thread_t audit_worker_thread = THREAD_NULL; + /* * When an audit log is rotated, the actual rotation must be performed * by the audit worker thread, as it may have outstanding writes on the @@ -140,9 +151,9 @@ static int audit_worker_event; static int audit_replacement_event; #define AUDIT_REPLACEMENT_EVENT ((event_t)&audit_replacement_event) -static int audit_replacement_flag; +static int audit_replacement_flag; static struct vnode *audit_replacement_vp; -static struct ucred *audit_replacement_cred; +static kauth_cred_t audit_replacement_cred; /* * Wait queue for auditing threads that cannot commit the audit @@ -157,8 +168,8 @@ static struct au_qctrl audit_qctrl; /* * Flags to use on audit files when opening and closing. */ -const static int audit_open_flags = FWRITE | O_APPEND; -const static int audit_close_flags = FWRITE | O_APPEND; +static const int audit_open_flags = FWRITE | O_APPEND; +static const int audit_close_flags = FWRITE | O_APPEND; /* * Global audit statistiscs. @@ -203,54 +214,61 @@ static void audit_free(struct kaudit_record *ar) { if (ar->k_ar.ar_arg_upath1 != NULL) { - kfree((vm_offset_t)ar->k_ar.ar_arg_upath1, MAXPATHLEN); + kfree(ar->k_ar.ar_arg_upath1, MAXPATHLEN); } if (ar->k_ar.ar_arg_upath2 != NULL) { - kfree((vm_offset_t)ar->k_ar.ar_arg_upath2, MAXPATHLEN); + kfree(ar->k_ar.ar_arg_upath2, MAXPATHLEN); + } if (ar->k_ar.ar_arg_kpath1 != NULL) { - kfree((vm_offset_t)ar->k_ar.ar_arg_kpath1, MAXPATHLEN); + kfree(ar->k_ar.ar_arg_kpath1, MAXPATHLEN); + } if (ar->k_ar.ar_arg_kpath2 != NULL) { - kfree((vm_offset_t)ar->k_ar.ar_arg_kpath2, MAXPATHLEN); + kfree(ar->k_ar.ar_arg_kpath2, MAXPATHLEN); + } if (ar->k_ar.ar_arg_text != NULL) { - kfree((vm_offset_t)ar->k_ar.ar_arg_text, MAXPATHLEN); + kfree(ar->k_ar.ar_arg_text, MAXPATHLEN); + } if (ar->k_udata != NULL) { - kfree((vm_offset_t)ar->k_udata, (vm_size_t)ar->k_ulen); + kfree(ar->k_udata, ar->k_ulen); + } - zfree(audit_zone, (vm_offset_t)ar); + zfree(audit_zone, ar); } static int -audit_write(struct vnode *vp, struct kaudit_record *ar, struct ucred *cred, +audit_write(struct vnode *vp, struct kaudit_record *ar, kauth_cred_t cred, struct proc *p) { - struct statfs *mnt_stat = &vp->v_mount->mnt_stat; + struct vfsstatfs *mnt_stat = &vp->v_mount->mnt_vfsstat; int ret; struct au_record *bsm; - struct vattr vattr; + /* KVV maybe we should take a context as a param to audit_write? */ + struct vfs_context context; + off_t file_size; mach_port_t audit_port; - /* + /* * First, gather statistics on the audit log file and file system * so that we know how we're doing on space. In both cases, * if we're unable to perform the operation, we drop the record * and return. However, this is arguably an assertion failure. */ - ret = VFS_STATFS(vp->v_mount, mnt_stat, p); - if (ret) - goto out; - - ret = VOP_GETATTR(vp, &vattr, cred, p); + context.vc_proc = p; + context.vc_ucred = cred; + ret = vfs_update_vfsstat(vp->v_mount, &context); if (ret) goto out; /* update the global stats struct */ - audit_fstat.af_currsz = vattr.va_size; - + if ((ret = vnode_size(vp, &file_size, &context)) != 0) + goto out; + audit_fstat.af_currsz = file_size; + /* * Send a message to the audit daemon when disk space is getting * low. @@ -262,7 +280,7 @@ audit_write(struct vnode *vp, struct kaudit_record *ar, struct ucred *cred, printf("Cannot get audit control port\n"); if (audit_port != MACH_PORT_NULL) { - long temp; + uint64_t temp; /* * If we fall below percent free blocks, then trigger the @@ -290,7 +308,7 @@ audit_write(struct vnode *vp, struct kaudit_record *ar, struct ucred *cred, */ if ((audit_fstat.af_filesz != 0) && (audit_file_rotate_wait == 0) && - (vattr.va_size >= audit_fstat.af_filesz)) { + (file_size >= audit_fstat.af_filesz)) { audit_file_rotate_wait = 1; ret = audit_triggers(audit_port, AUDIT_TRIGGER_FILE_FULL); @@ -334,10 +352,15 @@ audit_write(struct vnode *vp, struct kaudit_record *ar, struct ucred *cred, * we ignore errors. */ if (ar->k_ar_commit & AR_COMMIT_USER) { - ret = vn_rdwr(UIO_WRITE, vp, (void *)ar->k_udata, ar->k_ulen, - (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, cred, NULL, p); - if (ret) + if (vnode_getwithref(vp) == 0) { + ret = vn_rdwr(UIO_WRITE, vp, (void *)ar->k_udata, ar->k_ulen, + (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, cred, NULL, p); + vnode_put(vp); + if (ret) + goto out; + } else { goto out; + } } /* @@ -371,9 +394,11 @@ audit_write(struct vnode *vp, struct kaudit_record *ar, struct ucred *cred, * done before this function is called. This function will then * take the BSM record as a parameter. */ - ret = (vn_rdwr(UIO_WRITE, vp, (void *)bsm->data, bsm->len, - (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, cred, NULL, p)); - + if ((ret = vnode_getwithref(vp)) == 0) { + ret = (vn_rdwr(UIO_WRITE, vp, (void *)bsm->data, bsm->len, + (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, cred, NULL, p)); + vnode_put(vp); + } kau_free(bsm); out: @@ -385,9 +410,7 @@ out: */ if (audit_in_failure && audit_q_len == 0 && audit_pre_q_len == 0) { - VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); - (void)VOP_FSYNC(vp, cred, MNT_WAIT, p); - VOP_UNLOCK(vp, 0, p); + (void)VNOP_FSYNC(vp, MNT_WAIT, &context); panic("Audit store overflow; record queue drained."); } @@ -395,14 +418,14 @@ out: } static void -audit_worker() +audit_worker(void) { int do_replacement_signal, error, release_funnel; TAILQ_HEAD(, kaudit_record) ar_worklist; - struct kaudit_record *ar, *ar_start, *ar_stop; + struct kaudit_record *ar; struct vnode *audit_vp, *old_vp; - - struct ucred *audit_cred, *old_cred; + kauth_cred_t audit_cred; + kauth_cred_t old_cred; struct proc *audit_p; AUDIT_PRINTF(("audit_worker starting\n")); @@ -456,8 +479,8 @@ audit_worker() AUDIT_PRINTF(("Closing old audit file\n")); vn_close(old_vp, audit_close_flags, old_cred, audit_p); - crfree(old_cred); - old_cred = NULL; + kauth_cred_rele(old_cred); + old_cred = NOCRED; old_vp = NULL; AUDIT_PRINTF(("Audit file closed\n")); } @@ -492,7 +515,8 @@ audit_worker() AUDIT_PRINTF(("audit_worker waiting\n")); ret = wait_queue_assert_wait(audit_wait_queue, AUDIT_WORKER_EVENT, - THREAD_UNINT); + THREAD_UNINT, + 0); mutex_unlock(audit_mtx); assert(ret == THREAD_WAITING); @@ -501,6 +525,7 @@ audit_worker() AUDIT_PRINTF(("audit_worker woken up\n")); AUDIT_PRINTF(("audit_worker: new vp = %p; value of flag %d\n", audit_replacement_vp, audit_replacement_flag)); + mutex_lock(audit_mtx); continue; } @@ -561,7 +586,6 @@ audit_worker() TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); } - mutex_unlock(audit_mtx); release_funnel = 0; while ((ar = TAILQ_FIRST(&ar_worklist))) { @@ -575,17 +599,16 @@ audit_worker() thread_funnel_set(kernel_flock, TRUE); release_funnel = 1; } - VOP_LEASE(audit_vp, audit_p, audit_cred, - LEASE_WRITE); error = audit_write(audit_vp, ar, audit_cred, audit_p); - if (error && audit_panic_on_write_fail) + if (error && audit_panic_on_write_fail) { panic("audit_worker: write error %d\n", error); - else if (error) + } else if (error) { printf("audit_worker: write error %d\n", error); } + } audit_free(ar); } if (release_funnel) @@ -623,7 +646,7 @@ audit_init(void) audit_qctrl.aq_bufsz = AQ_BUFSZ; audit_qctrl.aq_minfree = AU_FS_MINFREE; - audit_mtx = mutex_alloc(ETAP_NO_TRACE); + audit_mtx = mutex_alloc(0); audit_wait_queue = wait_queue_alloc(SYNC_POLICY_FIFO); audit_zone = zinit(sizeof(struct kaudit_record), AQ_HIWATER*sizeof(struct kaudit_record), @@ -632,12 +655,10 @@ audit_init(void) /* Initialize the BSM audit subsystem. */ kau_init(); - - kernel_thread(kernel_task, audit_worker); } static void -audit_rotate_vnode(struct ucred *cred, struct vnode *vp) +audit_rotate_vnode(kauth_cred_t cred, struct vnode *vp) { int ret; @@ -652,7 +673,8 @@ audit_rotate_vnode(struct ucred *cred, struct vnode *vp) "flag\n")); ret = wait_queue_assert_wait(audit_wait_queue, AUDIT_REPLACEMENT_EVENT, - THREAD_UNINT); + THREAD_UNINT, + 0); mutex_unlock(audit_mtx); assert(ret == THREAD_WAITING); @@ -668,10 +690,16 @@ audit_rotate_vnode(struct ucred *cred, struct vnode *vp) audit_replacement_vp = vp; /* - * Wake up the audit worker to perform the exchange once we - * release the mutex. + * Start or wake up the audit worker to perform the exchange. + * It will have to wait until we release the mutex. */ - wait_queue_wakeup_one(audit_wait_queue, AUDIT_WORKER_EVENT, THREAD_AWAKENED); + if (audit_worker_thread == THREAD_NULL) + audit_worker_thread = kernel_thread(kernel_task, + audit_worker); + else + wait_queue_wakeup_one(audit_wait_queue, + AUDIT_WORKER_EVENT, + THREAD_AWAKENED); /* * Wait for the audit_worker to broadcast that a replacement has @@ -682,7 +710,8 @@ audit_rotate_vnode(struct ucred *cred, struct vnode *vp) "replacement\n")); ret = wait_queue_assert_wait(audit_wait_queue, AUDIT_REPLACEMENT_EVENT, - THREAD_UNINT); + THREAD_UNINT, + 0); mutex_unlock(audit_mtx); assert(ret == THREAD_WAITING); @@ -706,7 +735,7 @@ audit_shutdown(void) static __inline__ struct uthread * curuthread(void) { - return (get_bsdthread_info(current_act())); + return (get_bsdthread_info(current_thread())); } static __inline__ struct kaudit_record * @@ -727,25 +756,20 @@ currecord(void) * work, since we pre-select only based on the AUE_audit event type, * not the event type submitted as part of the user audit data. */ -struct audit_args { - void * record; - int length; -}; /* ARGSUSED */ int -audit(struct proc *p, struct audit_args *uap, register_t *retval) +audit(struct proc *p, struct audit_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int error; void * rec; struct kaudit_record *ar; struct uthread *uthr; - error = suser(pc->pc_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); - if ((uap->length <= 0) || (uap->length > audit_qctrl.aq_bufsz)) + if ((uap->length <= 0) || (uap->length > (int)audit_qctrl.aq_bufsz)) return (EINVAL); ar = currecord(); @@ -756,7 +780,7 @@ audit(struct proc *p, struct audit_args *uap, register_t *retval) if (ar == NULL) { uthr = curuthread(); if (uthr == NULL) /* can this happen? */ - return (ENOTSUP); + return (ENOTSUP); /* This is not very efficient; we're required to allocate * a complete kernel audit record just so the user record @@ -796,35 +820,29 @@ free_out: /* audit_syscall_exit() will free the audit record on the thread * even if we allocated it above. */ - kfree((vm_offset_t)rec, (vm_size_t)uap->length); + kfree(rec, uap->length); return (error); } /* * System call to manipulate auditing. */ -struct auditon_args { - int cmd; - void * data; - int length; -}; /* ARGSUSED */ int -auditon(struct proc *p, struct auditon_args *uap, register_t *retval) +auditon(struct proc *p, __unused struct auditon_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int ret; int len; union auditon_udata udata; struct proc *tp; AUDIT_ARG(cmd, uap->cmd); - ret = suser(pc->pc_ucred, &p->p_acflag); + ret = suser(kauth_cred_get(), &p->p_acflag); if (ret) return (ret); len = uap->length; - if ((len <= 0) || (len > sizeof(union auditon_udata))) + if ((len <= 0) || (len > (int)sizeof(union auditon_udata))) return (EINVAL); memset((void *)&udata, 0, sizeof(udata)); @@ -850,7 +868,7 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) return (ret); AUDIT_ARG(auditon, &udata); break; - } +} /* XXX Need to implement these commands by accessing the global * values associated with the commands. @@ -865,9 +883,9 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) case A_SETPOLICY: if (udata.au_policy & ~(AUDIT_CNT|AUDIT_AHLT)) return (EINVAL); - /* +/* * XXX - Need to wake up waiters if the policy relaxes? - */ + */ audit_fail_stop = ((udata.au_policy & AUDIT_CNT) == 0); audit_panic_on_write_fail = (udata.au_policy & AUDIT_AHLT); break; @@ -940,16 +958,16 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) if ((tp = pfind(udata.au_aupinfo.ap_pid)) == NULL) return (EINVAL); - udata.au_aupinfo.ap_auid = tp->p_au->ai_auid; + udata.au_aupinfo.ap_auid = tp->p_ucred->cr_au.ai_auid; udata.au_aupinfo.ap_mask.am_success = - tp->p_au->ai_mask.am_success; + tp->p_ucred->cr_au.ai_mask.am_success; udata.au_aupinfo.ap_mask.am_failure = - tp->p_au->ai_mask.am_failure; + tp->p_ucred->cr_au.ai_mask.am_failure; udata.au_aupinfo.ap_termid.machine = - tp->p_au->ai_termid.machine; + tp->p_ucred->cr_au.ai_termid.machine; udata.au_aupinfo.ap_termid.port = - tp->p_au->ai_termid.port; - udata.au_aupinfo.ap_asid = tp->p_au->ai_asid; + tp->p_ucred->cr_au.ai_termid.port; + udata.au_aupinfo.ap_asid = tp->p_ucred->cr_au.ai_asid; break; case A_SETPMASK: if (udata.au_aupinfo.ap_pid < 1) @@ -957,10 +975,49 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) if ((tp = pfind(udata.au_aupinfo.ap_pid)) == NULL) return (EINVAL); - tp->p_au->ai_mask.am_success = - udata.au_aupinfo.ap_mask.am_success; - tp->p_au->ai_mask.am_failure = - udata.au_aupinfo.ap_mask.am_failure; + /* + * we are modifying the audit info in a credential so we need a new + * credential (or take another reference on an existing credential that + * matches our new one). We must do this because the audit info in the + * credential is used as part of our hash key. Get current credential + * in the target process and take a reference while we muck with it. + */ + for (;;) { + kauth_cred_t my_cred, my_new_cred; + struct auditinfo temp_auditinfo; + + my_cred = kauth_cred_proc_ref(tp); + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + temp_auditinfo = my_cred->cr_au; + temp_auditinfo.ai_mask.am_success = + udata.au_aupinfo.ap_mask.am_success; + temp_auditinfo.ai_mask.am_failure = + udata.au_aupinfo.ap_mask.am_failure; + my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); + + if (my_cred != my_new_cred) { + proc_lock(tp); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (tp->p_ucred != my_cred) { + proc_unlock(tp); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + tp->p_ucred = my_new_cred; + proc_unlock(tp); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } break; case A_SETFSIZE: if ((udata.au_fstat.af_filesz != 0) && @@ -979,9 +1036,9 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) return (ENOSYS); break; case A_SETKAUDIT: - return (ENOSYS); + return (ENOSYS); break; - } +} /* Copy data back to userspace for the GET comands */ switch (uap->cmd) { case A_GETPOLICY: @@ -1009,47 +1066,82 @@ auditon(struct proc *p, struct auditon_args *uap, register_t *retval) * System calls to manage the user audit information. * XXXAUDIT May need to lock the proc structure. */ -struct getauid_args { - au_id_t *auid; -}; /* ARGSUSED */ int -getauid(struct proc *p, struct getauid_args *uap, register_t *retval) +getauid(struct proc *p, struct getauid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int error; - error = copyout((void *)&p->p_au->ai_auid, (void *)uap->auid, - sizeof(*uap->auid)); + error = copyout((void *)&kauth_cred_get()->cr_au.ai_auid, + uap->auid, sizeof(au_id_t)); if (error) return (error); return (0); } -struct setauid_args { - au_id_t *auid; -}; /* ARGSUSED */ int -setauid(struct proc *p, struct setauid_args *uap, register_t *retval) +setauid(struct proc *p, struct setauid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int error; + au_id_t temp_au_id; - error = suser(pc->pc_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); - error = copyin((void *)uap->auid, (void *)&p->p_au->ai_auid, - sizeof(p->p_au->ai_auid)); + error = copyin(uap->auid, + (void *)&temp_au_id, + sizeof(au_id_t)); if (error) return (error); + /* + * we are modifying the audit info in a credential so we need a new + * credential (or take another reference on an existing credential that + * matches our new one). We must do this because the audit info in the + * credential is used as part of our hash key. Get current credential + * in the target process and take a reference while we muck with it. + */ + for (;;) { + kauth_cred_t my_cred, my_new_cred; + struct auditinfo temp_auditinfo; + + my_cred = kauth_cred_proc_ref(p); + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + temp_auditinfo = my_cred->cr_au; + temp_auditinfo.ai_auid = temp_au_id; + my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); + + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + /* propagate the change from the process to Mach task */ set_security_token(p); - audit_arg_auid(p->p_au->ai_auid); + audit_arg_auid(kauth_cred_get()->cr_au.ai_auid); return (0); } @@ -1060,80 +1152,106 @@ setauid(struct proc *p, struct setauid_args *uap, register_t *retval) * filtered out - but the rest of the information is * returned. */ -struct getaudit_args { - struct auditinfo *auditinfo; -}; /* ARGSUSED */ int -getaudit(struct proc *p, struct getaudit_args *uap, register_t *retval) +getaudit(struct proc *p, struct getaudit_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; - struct auditinfo ai = *p->p_au; + struct auditinfo ai; int error; + ai = kauth_cred_get()->cr_au; + /* only superuser gets to see the real mask */ - error = suser(pc->pc_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) { ai.ai_mask.am_success = ~0; ai.ai_mask.am_failure = ~0; } - error = copyout((void *)&ai, (void *)uap->auditinfo, sizeof(ai)); + error = copyout(&ai, uap->auditinfo, sizeof(ai)); if (error) return (error); return (0); } -struct setaudit_args { - struct auditinfo *auditinfo; -}; /* ARGSUSED */ int -setaudit(struct proc *p, struct setaudit_args *uap, register_t *retval) +setaudit(struct proc *p, struct setaudit_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int error; + struct auditinfo temp_auditinfo; - error = suser(pc->pc_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); - error = copyin((void *)uap->auditinfo, (void *)p->p_au, - sizeof(*p->p_au)); + + error = copyin(uap->auditinfo, + (void *)&temp_auditinfo, + sizeof(temp_auditinfo)); if (error) return (error); + /* + * we are modifying the audit info in a credential so we need a new + * credential (or take another reference on an existing credential that + * matches our new one). We must do this because the audit info in the + * credential is used as part of our hash key. Get current credential + * in the target process and take a reference while we muck with it. + */ + for (;;) { + kauth_cred_t my_cred, my_new_cred; + + my_cred = kauth_cred_proc_ref(p); + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); + + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + /* propagate the change from the process to Mach task */ set_security_token(p); - audit_arg_auditinfo(p->p_au); + audit_arg_auditinfo(&p->p_ucred->cr_au); return (0); } -struct getaudit_addr_args { - struct auditinfo_addr *auditinfo_addr; - int length; -}; /* ARGSUSED */ int -getaudit_addr(struct proc *p, struct getaudit_addr_args *uap, register_t *retval) +getaudit_addr(struct proc *p, __unused struct getaudit_addr_args *uap, __unused register_t *retval) { return (ENOSYS); } -struct setaudit_addr_args { - struct auditinfo_addr *auditinfo_addr; - int length; -}; /* ARGSUSED */ int -setaudit_addr(struct proc *p, struct setaudit_addr_args *uap, register_t *retval) +setaudit_addr(struct proc *p, __unused struct setaudit_addr_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; int error; - error = suser(pc->pc_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); return (ENOSYS); @@ -1143,20 +1261,20 @@ setaudit_addr(struct proc *p, struct setaudit_addr_args *uap, register_t *retval * Syscall to manage audit files. * */ -struct auditctl_args { - char *path; -}; /* ARGSUSED */ int -auditctl(struct proc *p, struct auditctl_args *uap) +auditctl(struct proc *p, struct auditctl_args *uap, __unused register_t *retval) { - struct kaudit_record *ar; struct nameidata nd; - struct ucred *cred; + kauth_cred_t cred; struct vnode *vp; - int error, flags, ret; + int error, flags; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - error = suser(p->p_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); @@ -1168,26 +1286,31 @@ auditctl(struct proc *p, struct auditctl_args *uap) * validity checks, and grab another reference to the current * credential. */ - if (uap->path != NULL) { + if (uap->path != 0) { NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - UIO_USERSPACE, uap->path, p); + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + uap->path, &context); flags = audit_open_flags; error = vn_open(&nd, flags, 0); if (error) goto out; - VOP_UNLOCK(nd.ni_vp, 0, p); vp = nd.ni_vp; if (vp->v_type != VREG) { - vn_close(vp, audit_close_flags, p->p_ucred, p); + vn_close(vp, audit_close_flags, kauth_cred_get(), p); + vnode_put(vp); error = EINVAL; goto out; } - cred = p->p_ucred; - crhold(cred); + cred = kauth_cred_get_with_ref(); audit_suspended = 0; } - + /* + * a vp and cred of NULL is valid at this point + * and indicates we're to turn off auditing... + */ audit_rotate_vnode(cred, vp); + if (vp) + vnode_put(vp); out: return (error); } @@ -1200,7 +1323,7 @@ out: * MPSAFE */ struct kaudit_record * -audit_new(int event, struct proc *p, struct uthread *uthread) +audit_new(int event, struct proc *p, __unused struct uthread *uthread) { struct kaudit_record *ar; int no_record; @@ -1248,14 +1371,14 @@ audit_new(int event, struct proc *p, struct uthread *uthread) /* Export the subject credential. */ cru2x(p->p_ucred, &ar->k_ar.ar_subj_cred); - ar->k_ar.ar_subj_ruid = p->p_cred->p_ruid; - ar->k_ar.ar_subj_rgid = p->p_cred->p_rgid; + ar->k_ar.ar_subj_ruid = p->p_ucred->cr_ruid; + ar->k_ar.ar_subj_rgid = p->p_ucred->cr_rgid; ar->k_ar.ar_subj_egid = p->p_ucred->cr_groups[0]; - ar->k_ar.ar_subj_auid = p->p_au->ai_auid; - ar->k_ar.ar_subj_asid = p->p_au->ai_asid; + ar->k_ar.ar_subj_auid = p->p_ucred->cr_au.ai_auid; + ar->k_ar.ar_subj_asid = p->p_ucred->cr_au.ai_asid; ar->k_ar.ar_subj_pid = p->p_pid; - ar->k_ar.ar_subj_amask = p->p_au->ai_mask; - ar->k_ar.ar_subj_term = p->p_au->ai_termid; + ar->k_ar.ar_subj_amask = p->p_ucred->cr_au.ai_mask; + ar->k_ar.ar_subj_term = p->p_ucred->cr_au.ai_termid; bcopy(p->p_comm, ar->k_ar.ar_subj_comm, MAXCOMLEN); return (ar); @@ -1268,7 +1391,6 @@ audit_new(int event, struct proc *p, struct uthread *uthread) void audit_abort(struct kaudit_record *ar) { - mutex_lock(audit_mtx); audit_pre_q_len--; mutex_unlock(audit_mtx); @@ -1326,7 +1448,7 @@ audit_commit(struct kaudit_record *ar, int error, int retval) if (au_preselect(ar->k_ar.ar_event, aumask, sorf) != 0) ar->k_ar_commit |= AR_COMMIT_KERNEL; - if (ar->k_ar_commit & (AR_COMMIT_USER | AR_COMMIT_KERNEL) == 0) { + if ((ar->k_ar_commit & (AR_COMMIT_USER | AR_COMMIT_KERNEL)) == 0) { mutex_lock(audit_mtx); audit_pre_q_len--; mutex_unlock(audit_mtx); @@ -1348,7 +1470,6 @@ audit_commit(struct kaudit_record *ar, int error, int retval) nanotime(&ar->k_ar.ar_endtime); mutex_lock(audit_mtx); - /* * Note: it could be that some records initiated while audit was * enabled should still be committed? @@ -1359,7 +1480,7 @@ audit_commit(struct kaudit_record *ar, int error, int retval) audit_free(ar); return; } - + /* * Constrain the number of committed audit records based on * the configurable parameter. @@ -1368,7 +1489,8 @@ audit_commit(struct kaudit_record *ar, int error, int retval) ret = wait_queue_assert_wait(audit_wait_queue, AUDIT_COMMIT_EVENT, - THREAD_UNINT); + THREAD_UNINT, + 0); mutex_unlock(audit_mtx); assert(ret == THREAD_WAITING); @@ -1405,39 +1527,41 @@ audit_syscall_enter(unsigned short code, struct proc *proc, /* Check which audit mask to use; either the kernel non-attributable * event mask or the process audit mask. */ - if (proc->p_au->ai_auid == AU_DEFAUDITID) + if (proc->p_ucred->cr_au.ai_auid == AU_DEFAUDITID) aumask = &audit_nae_mask; else - aumask = &proc->p_au->ai_mask; - + aumask = &proc->p_ucred->cr_au.ai_mask; + /* * Allocate an audit record, if preselection allows it, and store * in the BSD thread for later use. */ if (au_preselect(audit_event, aumask, - AU_PRS_FAILURE | AU_PRS_SUCCESS)) { + AU_PRS_FAILURE | AU_PRS_SUCCESS)) { /* * If we're out of space and need to suspend unprivileged * processes, do that here rather than trying to allocate * another audit record. */ if (audit_in_failure && - suser(proc->p_ucred, &proc->p_acflag) != 0) { + suser(kauth_cred_get(), &proc->p_acflag) != 0) { int ret; + assert(audit_worker_thread != THREAD_NULL); ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_FAILURE_EVENT, THREAD_UNINT); + AUDIT_FAILURE_EVENT, THREAD_UNINT, 0); assert(ret == THREAD_WAITING); (void)thread_block(THREAD_CONTINUE_NULL); panic("audit_failing_stop: thread continued"); } - uthread->uu_ar = audit_new(audit_event, proc, uthread); - } else - uthread->uu_ar = NULL; -} + uthread->uu_ar = audit_new(audit_event, proc, uthread); + } else { + uthread->uu_ar = NULL; + } + } void -audit_syscall_exit(int error, struct proc *proc, struct uthread *uthread) +audit_syscall_exit(int error, AUDIT_PRINTF_ONLY struct proc *proc, struct uthread *uthread) { int retval; @@ -1455,8 +1579,9 @@ audit_syscall_exit(int error, struct proc *proc, struct uthread *uthread) retval = uthread->uu_rval[0]; audit_commit(uthread->uu_ar, error, retval); - if (uthread->uu_ar != NULL) + if (uthread->uu_ar != NULL) { AUDIT_PRINTF(("audit record committed by pid %d\n", proc->p_pid)); + } uthread->uu_ar = NULL; } @@ -1488,10 +1613,10 @@ audit_mach_syscall_enter(unsigned short audit_event) /* Check which audit mask to use; either the kernel non-attributable * event mask or the process audit mask. */ - if (proc->p_au->ai_auid == AU_DEFAUDITID) + if (proc->p_ucred->cr_au.ai_auid == AU_DEFAUDITID) aumask = &audit_nae_mask; else - aumask = &proc->p_au->ai_mask; + aumask = &proc->p_ucred->cr_au.ai_mask; /* * Allocate an audit record, if desired, and store in the BSD @@ -1526,7 +1651,7 @@ audit_mach_syscall_exit(int retval, struct uthread *uthread) * record for this event. */ void -audit_arg_addr(void * addr) +audit_arg_addr(user_addr_t addr) { struct kaudit_record *ar; @@ -1534,12 +1659,12 @@ audit_arg_addr(void * addr) if (ar == NULL) return; - ar->k_ar.ar_arg_addr = addr; + ar->k_ar.ar_arg_addr = CAST_DOWN(void *, addr); /* XXX */ ar->k_ar.ar_valid_arg |= ARG_ADDR; } void -audit_arg_len(int len) +audit_arg_len(user_size_t len) { struct kaudit_record *ar; @@ -1547,7 +1672,7 @@ audit_arg_len(int len) if (ar == NULL) return; - ar->k_ar.ar_arg_len = len; + ar->k_ar.ar_arg_len = CAST_DOWN(int, len); /* XXX */ ar->k_ar.ar_valid_arg |= ARG_LEN; } @@ -1610,9 +1735,9 @@ audit_arg_uid(uid_t uid, uid_t euid, uid_t ruid, uid_t suid) } void -audit_arg_groupset(gid_t *gidset, u_int gidset_size) +audit_arg_groupset(const gid_t *gidset, u_int gidset_size) { - int i; + uint i; struct kaudit_record *ar; ar = currecord(); @@ -1626,7 +1751,7 @@ audit_arg_groupset(gid_t *gidset, u_int gidset_size) } void -audit_arg_login(char *login) +audit_arg_login(const char *login) { struct kaudit_record *ar; @@ -1647,7 +1772,7 @@ audit_arg_login(char *login) } void -audit_arg_ctlname(int *name, int namelen) +audit_arg_ctlname(const int *name, int namelen) { struct kaudit_record *ar; @@ -1730,7 +1855,6 @@ void audit_arg_pid(pid_t pid) { struct kaudit_record *ar; - struct proc *p; ar = currecord(); if (ar == NULL) @@ -1738,7 +1862,6 @@ audit_arg_pid(pid_t pid) ar->k_ar.ar_arg_pid = pid; ar->k_ar.ar_valid_arg |= ARG_PID; - } void @@ -1750,15 +1873,13 @@ audit_arg_process(struct proc *p) if ((ar == NULL) || (p == NULL)) return; - /* XXX May need to lock the credentials structures */ - ar->k_ar.ar_arg_auid = p->p_au->ai_auid; + ar->k_ar.ar_arg_auid = p->p_ucred->cr_au.ai_auid; ar->k_ar.ar_arg_euid = p->p_ucred->cr_uid; ar->k_ar.ar_arg_egid = p->p_ucred->cr_groups[0]; - ar->k_ar.ar_arg_ruid = p->p_cred->p_ruid; - ar->k_ar.ar_arg_rgid = p->p_cred->p_rgid; - ar->k_ar.ar_arg_asid = p->p_au->ai_asid; - - ar->k_ar.ar_arg_termid = p->p_au->ai_termid; + ar->k_ar.ar_arg_ruid = p->p_ucred->cr_ruid; + ar->k_ar.ar_arg_rgid = p->p_ucred->cr_rgid; + ar->k_ar.ar_arg_asid = p->p_ucred->cr_au.ai_asid; + ar->k_ar.ar_arg_termid = p->p_ucred->cr_au.ai_termid; ar->k_ar.ar_valid_arg |= ARG_AUID | ARG_EUID | ARG_EGID | ARG_RUID | ARG_RGID | ARG_ASID | ARG_TERMID | ARG_PROCESS; @@ -1832,7 +1953,7 @@ audit_arg_auid(uid_t auid) } void -audit_arg_auditinfo(struct auditinfo *au_info) +audit_arg_auditinfo(const struct auditinfo *au_info) { struct kaudit_record *ar; @@ -1850,7 +1971,7 @@ audit_arg_auditinfo(struct auditinfo *au_info) } void -audit_arg_text(char *text) +audit_arg_text(const char *text) { struct kaudit_record *ar; @@ -1900,7 +2021,7 @@ audit_arg_svipc_cmd(int cmd) } void -audit_arg_svipc_perm(struct ipc_perm *perm) +audit_arg_svipc_perm(const struct ipc_perm *perm) { struct kaudit_record *ar; @@ -1955,7 +2076,7 @@ audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode) } void -audit_arg_auditon(union auditon_udata *udata) +audit_arg_auditon(const union auditon_udata *udata) { struct kaudit_record *ar; @@ -1963,32 +2084,32 @@ audit_arg_auditon(union auditon_udata *udata) if (ar == NULL) return; - bcopy((void *)udata, &ar->k_ar.ar_arg_auditon, + bcopy((const void *)udata, &ar->k_ar.ar_arg_auditon, sizeof(ar->k_ar.ar_arg_auditon)); ar->k_ar.ar_valid_arg |= ARG_AUDITON; } -/* +/* * Audit information about a file, either the file's vnode info, or its * socket address info. */ void -audit_arg_file(struct proc *p, struct file *fp) +audit_arg_file(__unused struct proc *p, const struct fileproc *fp) { struct kaudit_record *ar; struct socket *so; struct inpcb *pcb; - if (fp->f_type == DTYPE_VNODE) { - audit_arg_vnpath((struct vnode *)fp->f_data, ARG_VNODE1); + if (fp->f_fglob->fg_type == DTYPE_VNODE) { + audit_arg_vnpath_withref((struct vnode *)fp->f_fglob->fg_data, ARG_VNODE1); return; } - if (fp->f_type == DTYPE_SOCKET) { + if (fp->f_fglob->fg_type == DTYPE_SOCKET) { ar = currecord(); if (ar == NULL) return; - so = (struct socket *)fp->f_data; + so = (struct socket *)fp->f_fglob->fg_data; if (INP_CHECK_SOCKAF(so, PF_INET)) { if (so->so_pcb == NULL) return; @@ -2013,51 +2134,6 @@ audit_arg_file(struct proc *p, struct file *fp) } -/* - * Initialize the audit information for the a process, presumably the first - * process in the system. - * XXX It is not clear what the initial values should be for session ID, - * terminal ID etc. - */ -void -audit_proc_init(struct proc *p) -{ - MALLOC_ZONE(p->p_au, struct auditinfo *, sizeof(*p->p_au), - M_SUBPROC, M_WAITOK); - - bzero((void *)p->p_au, sizeof(*p->p_au)); - - p->p_au->ai_auid = AU_DEFAUDITID; -} - -/* - * Copy the audit info from the parent process to the child process when - * a fork takes place. - * XXX Need to check for failure from the memory allocation, in here - * as well as in any functions that use the process auditing info. - */ -void -audit_proc_fork(struct proc *parent, struct proc *child) -{ - /* Always set up the audit information pointer as this function - * should only be called when the proc is new. If proc structures - * are ever cached and reused, then this behavior will leak memory. - */ - MALLOC_ZONE(child->p_au, struct auditinfo *, sizeof(*child->p_au), - M_SUBPROC, M_WAITOK); - - bcopy(parent->p_au, child->p_au, sizeof(*child->p_au)); -} - -/* - * Free the auditing structure for the process. - */ -void -audit_proc_free(struct proc *p) -{ - FREE_ZONE((void *)p->p_au, sizeof(*p->p_au), M_SUBPROC); - p->p_au = NULL; -} /* * Store a path as given by the user process for auditing into the audit @@ -2074,7 +2150,7 @@ audit_arg_upath(struct proc *p, char *upath, u_int64_t flags) if (p == NULL || upath == NULL) return; /* nothing to do! */ - if (flags & (ARG_UPATH1 | ARG_UPATH2) == 0) + if ((flags & (ARG_UPATH1 | ARG_UPATH2)) == 0) return; ar = currecord(); @@ -2101,9 +2177,9 @@ audit_arg_upath(struct proc *p, char *upath, u_int64_t flags) ar->k_ar.ar_valid_arg |= ARG_UPATH1; else ar->k_ar.ar_valid_arg |= ARG_UPATH2; - } else { - kfree((vm_offset_t)*pathp, MAXPATHLEN); - *pathp = NULL; + } else { + kfree(*pathp, MAXPATHLEN); + *pathp = NULL; } } @@ -2112,7 +2188,7 @@ audit_arg_upath(struct proc *p, char *upath, u_int64_t flags) * record. * * It is assumed that the caller will hold any vnode locks necessary to - * perform a VOP_GETATTR() on the passed vnode. + * perform a VNOP_GETATTR() on the passed vnode. * * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but * always provides access to the generation number as we need that @@ -2125,12 +2201,13 @@ void audit_arg_vnpath(struct vnode *vp, u_int64_t flags) { struct kaudit_record *ar; - struct vattr vattr; + struct vnode_attr va; int error; int len; char **pathp; struct vnode_au_info *vnp; struct proc *p; + struct vfs_context context; if (vp == NULL) return; @@ -2139,7 +2216,7 @@ audit_arg_vnpath(struct vnode *vp, u_int64_t flags) if (ar == NULL) /* This will be the case for unaudited system calls */ return; - if (flags & (ARG_VNODE1 | ARG_VNODE2) == 0) + if ((flags & (ARG_VNODE1 | ARG_VNODE2)) == 0) return; p = current_proc(); @@ -2170,32 +2247,40 @@ audit_arg_vnpath(struct vnode *vp, u_int64_t flags) */ len = MAXPATHLEN; if (vn_getpath(vp, *pathp, &len) == 0) { - if (flags & ARG_VNODE1) - ar->k_ar.ar_valid_arg |= ARG_KPATH1; - else - ar->k_ar.ar_valid_arg |= ARG_KPATH2; + if (flags & ARG_VNODE1) + ar->k_ar.ar_valid_arg |= ARG_KPATH1; + else + ar->k_ar.ar_valid_arg |= ARG_KPATH2; } else { - kfree((vm_offset_t)*pathp, MAXPATHLEN); + kfree(*pathp, MAXPATHLEN); *pathp = NULL; } - /* - * XXX: We'd assert the vnode lock here, only Darwin doesn't - * appear to have vnode locking assertions. - */ - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_rdev); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_gen); + error = vnode_getattr(vp, &va, &context); if (error) { /* XXX: How to handle this case? */ return; } - vnp->vn_mode = vattr.va_mode; - vnp->vn_uid = vattr.va_uid; - vnp->vn_gid = vattr.va_gid; - vnp->vn_dev = vattr.va_rdev; - vnp->vn_fsid = vattr.va_fsid; - vnp->vn_fileid = vattr.va_fileid; - vnp->vn_gen = vattr.va_gen; + /* XXX do we want to fall back here when these aren't supported? */ + vnp->vn_mode = va.va_mode; + vnp->vn_uid = va.va_uid; + vnp->vn_gid = va.va_gid; + vnp->vn_dev = va.va_rdev; + vnp->vn_fsid = va.va_fsid; + vnp->vn_fileid = (u_long)va.va_fileid; + vnp->vn_gen = va.va_gen; if (flags & ARG_VNODE1) ar->k_ar.ar_valid_arg |= ARG_VNODE1; else @@ -2204,7 +2289,16 @@ audit_arg_vnpath(struct vnode *vp, u_int64_t flags) } void -audit_arg_mach_port1(mach_port_t port) +audit_arg_vnpath_withref(struct vnode *vp, u_int64_t flags) +{ + if (vp == NULL || vnode_getwithref(vp)) + return; + audit_arg_vnpath(vp, flags); + (void)vnode_put(vp); +} + +void +audit_arg_mach_port1(mach_port_name_t port) { struct kaudit_record *ar; @@ -2217,7 +2311,7 @@ audit_arg_mach_port1(mach_port_t port) } void -audit_arg_mach_port2(mach_port_t port) +audit_arg_mach_port2(mach_port_name_t port) { struct kaudit_record *ar; @@ -2237,15 +2331,16 @@ audit_arg_mach_port2(mach_port_t port) void audit_sysclose(struct proc *p, int fd) { - struct file *fp; + struct fileproc *fp; + struct vnode *vp; audit_arg_fd(fd); - if (getvnode(p, fd, &fp) != 0) + if (fp_getfvp(p, fd, &fp, &vp) != 0) return; - audit_arg_vnpath((struct vnode *)fp->f_data, ARG_VNODE1); - + audit_arg_vnpath_withref((struct vnode *)fp->f_fglob->fg_data, ARG_VNODE1); + file_drop(fd); } #else /* !AUDIT */ @@ -2316,22 +2411,4 @@ auditctl(struct proc *p, struct auditctl_args *uap, register_t *retval) return (ENOSYS); } -void -audit_proc_init(struct proc *p) -{ - -} - -void -audit_proc_fork(struct proc *parent, struct proc *child) -{ - -} - -void -audit_proc_free(struct proc *p) -{ - -} - #endif /* AUDIT */ diff --git a/bsd/kern/kern_authorization.c b/bsd/kern/kern_authorization.c new file mode 100644 index 000000000..b5dbe6706 --- /dev/null +++ b/bsd/kern/kern_authorization.c @@ -0,0 +1,1014 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Centralized authorisation framework. + */ + +#include +#include /* XXX trim includes */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + + +/* + * Authorization scopes. + */ + +lck_grp_t *kauth_lck_grp; +static lck_mtx_t *kauth_scope_mtx; +#define KAUTH_SCOPELOCK() lck_mtx_lock(kauth_scope_mtx); +#define KAUTH_SCOPEUNLOCK() lck_mtx_unlock(kauth_scope_mtx); + +/* + * We support listeners for scopes that have not been registered yet. + * If a listener comes in for a scope that is not active we hang the listener + * off our kauth_dangling_listeners list and once the scope becomes active we + * remove it from kauth_dangling_listeners and add it to the active scope. + */ +struct kauth_listener { + TAILQ_ENTRY(kauth_listener) kl_link; + const char * kl_identifier; + kauth_scope_callback_t kl_callback; + void * kl_idata; +}; + +/* XXX - kauth_todo - there is a race if a scope listener is removed while we + * we are in the kauth_authorize_action code path. We intentionally do not take + * a scope lock in order to get the best possible performance. we will fix this + * post Tiger. + * Until the race is fixed our kext clients are responsible for all active + * requests that may be in their callback code or on the way to their callback + * code before they free kauth_listener.kl_callback or kauth_listener.kl_idata. + * We keep copies of these in our kauth_local_listener in an attempt to limit + * our expose to unlisten race. + */ +struct kauth_local_listener { + kauth_listener_t kll_listenerp; + kauth_scope_callback_t kll_callback; + void * kll_idata; +}; +typedef struct kauth_local_listener *kauth_local_listener_t; + +static TAILQ_HEAD(,kauth_listener) kauth_dangling_listeners; + +/* + * Scope listeners need to be reworked to be dynamic. + * We intentionally used a static table to avoid locking issues with linked + * lists. The listeners may be called quite often. + * XXX - kauth_todo + */ +#define KAUTH_SCOPE_MAX_LISTENERS 15 + +struct kauth_scope { + TAILQ_ENTRY(kauth_scope) ks_link; + volatile struct kauth_local_listener ks_listeners[KAUTH_SCOPE_MAX_LISTENERS]; + const char * ks_identifier; + kauth_scope_callback_t ks_callback; + void * ks_idata; + u_int ks_flags; +}; + +/* values for kauth_scope.ks_flags */ +#define KS_F_HAS_LISTENERS (1 << 0) + +static TAILQ_HEAD(,kauth_scope) kauth_scopes; + +static int kauth_add_callback_to_scope(kauth_scope_t sp, kauth_listener_t klp); +static void kauth_scope_init(void); +static kauth_scope_t kauth_alloc_scope(const char *identifier, kauth_scope_callback_t callback, void *idata); +static kauth_listener_t kauth_alloc_listener(const char *identifier, kauth_scope_callback_t callback, void *idata); +#if 0 +static int kauth_scope_valid(kauth_scope_t scope); +#endif + +kauth_scope_t kauth_scope_process; +static int kauth_authorize_process_callback(kauth_cred_t _credential, void *_idata, kauth_action_t _action, + uintptr_t arg0, uintptr_t arg1, __unused uintptr_t arg2, __unused uintptr_t arg3); +kauth_scope_t kauth_scope_generic; +static int kauth_authorize_generic_callback(kauth_cred_t _credential, void *_idata, kauth_action_t _action, + uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3); +kauth_scope_t kauth_scope_fileop; + +extern int cansignal(struct proc *, kauth_cred_t, struct proc *, int); +extern char * get_pathbuff(void); +extern void release_pathbuff(char *path); + +/* + * Initialization. + */ +void +kauth_init(void) +{ + lck_grp_attr_t *grp_attributes; + + TAILQ_INIT(&kauth_scopes); + TAILQ_INIT(&kauth_dangling_listeners); + + /* set up our lock group */ + grp_attributes = lck_grp_attr_alloc_init(); + kauth_lck_grp = lck_grp_alloc_init("kauth", grp_attributes); + lck_grp_attr_free(grp_attributes); + + /* bring up kauth subsystem components */ + kauth_cred_init(); + kauth_identity_init(); + kauth_groups_init(); + kauth_scope_init(); + kauth_resolver_init(); + + /* can't alloc locks after this */ + lck_grp_free(kauth_lck_grp); + kauth_lck_grp = NULL; +} + +static void +kauth_scope_init(void) +{ + kauth_scope_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0 /*LCK_ATTR_NULL*/); + kauth_scope_process = kauth_register_scope(KAUTH_SCOPE_PROCESS, kauth_authorize_process_callback, NULL); + kauth_scope_generic = kauth_register_scope(KAUTH_SCOPE_GENERIC, kauth_authorize_generic_callback, NULL); + kauth_scope_fileop = kauth_register_scope(KAUTH_SCOPE_FILEOP, NULL, NULL); +} + +/* + * Scope registration. + */ + +static kauth_scope_t +kauth_alloc_scope(const char *identifier, kauth_scope_callback_t callback, void *idata) +{ + kauth_scope_t sp; + + /* + * Allocate and populate the scope structure. + */ + MALLOC(sp, kauth_scope_t, sizeof(*sp), M_KAUTH, M_WAITOK); + if (sp == NULL) + return(NULL); + bzero(&sp->ks_listeners, sizeof(sp->ks_listeners)); + sp->ks_flags = 0; + sp->ks_identifier = identifier; + sp->ks_idata = idata; + sp->ks_callback = callback; + return(sp); +} + +static kauth_listener_t +kauth_alloc_listener(const char *identifier, kauth_scope_callback_t callback, void *idata) +{ + kauth_listener_t lsp; + + /* + * Allocate and populate the listener structure. + */ + MALLOC(lsp, kauth_listener_t, sizeof(*lsp), M_KAUTH, M_WAITOK); + if (lsp == NULL) + return(NULL); + lsp->kl_identifier = identifier; + lsp->kl_idata = idata; + lsp->kl_callback = callback; + return(lsp); +} + +kauth_scope_t +kauth_register_scope(const char *identifier, kauth_scope_callback_t callback, void *idata) +{ + kauth_scope_t sp, tsp; + kauth_listener_t klp; + + if ((sp = kauth_alloc_scope(identifier, callback, idata)) == NULL) + return(NULL); + + /* + * Lock the list and insert. + */ + KAUTH_SCOPELOCK(); + TAILQ_FOREACH(tsp, &kauth_scopes, ks_link) { + /* duplicate! */ + if (strcmp(tsp->ks_identifier, identifier) == 0) { + KAUTH_SCOPEUNLOCK(); + FREE(sp, M_KAUTH); + return(NULL); + } + } + TAILQ_INSERT_TAIL(&kauth_scopes, sp, ks_link); + + /* + * Look for listeners waiting for this scope, move them to the active scope + * listener table. + * Note that we have to restart the scan every time we remove an entry + * from the list, since we can't remove the current item from the list. + */ +restart: + TAILQ_FOREACH(klp, &kauth_dangling_listeners, kl_link) { + if (strcmp(klp->kl_identifier, sp->ks_identifier) == 0) { + /* found a match on the dangling listener list. add it to the + * the active scope. + */ + if (kauth_add_callback_to_scope(sp, klp) == 0) { + TAILQ_REMOVE(&kauth_dangling_listeners, klp, kl_link); + } + else { +#if 0 + printf("%s - failed to add listener to scope \"%s\" \n", __FUNCTION__, sp->ks_identifier); +#endif + break; + } + goto restart; + } + } + + KAUTH_SCOPEUNLOCK(); + return(sp); +} + + + +void +kauth_deregister_scope(kauth_scope_t scope) +{ + int i; + + KAUTH_SCOPELOCK(); + + TAILQ_REMOVE(&kauth_scopes, scope, ks_link); + + /* relocate listeners back to the waiting list */ + for (i = 0; i < KAUTH_SCOPE_MAX_LISTENERS; i++) { + if (scope->ks_listeners[i].kll_listenerp != NULL) { + TAILQ_INSERT_TAIL(&kauth_dangling_listeners, scope->ks_listeners[i].kll_listenerp, kl_link); + scope->ks_listeners[i].kll_listenerp = NULL; + /* + * XXX - kauth_todo - WARNING, do not clear kll_callback or + * kll_idata here. they are part of our scope unlisten race hack + */ + } + } + KAUTH_SCOPEUNLOCK(); + FREE(scope, M_KAUTH); + + return; +} + +kauth_listener_t +kauth_listen_scope(const char *identifier, kauth_scope_callback_t callback, void *idata) +{ + kauth_listener_t klp; + kauth_scope_t sp; + + if ((klp = kauth_alloc_listener(identifier, callback, idata)) == NULL) + return(NULL); + + /* + * Lock the scope list and check to see whether this scope already exists. + */ + KAUTH_SCOPELOCK(); + TAILQ_FOREACH(sp, &kauth_scopes, ks_link) { + if (strcmp(sp->ks_identifier, identifier) == 0) { + /* scope exists, add it to scope listener table */ + if (kauth_add_callback_to_scope(sp, klp) == 0) { + KAUTH_SCOPEUNLOCK(); + return(klp); + } + /* table already full */ + KAUTH_SCOPEUNLOCK(); + FREE(klp, M_KAUTH); + return(NULL); + } + } + + /* scope doesn't exist, put on waiting list. */ + TAILQ_INSERT_TAIL(&kauth_dangling_listeners, klp, kl_link); + + KAUTH_SCOPEUNLOCK(); + + return(klp); +} + +void +kauth_unlisten_scope(kauth_listener_t listener) +{ + kauth_scope_t sp; + kauth_listener_t klp; + int i, listener_count, do_free; + + KAUTH_SCOPELOCK(); + + /* search the active scope for this listener */ + TAILQ_FOREACH(sp, &kauth_scopes, ks_link) { + do_free = 0; + if ((sp->ks_flags & KS_F_HAS_LISTENERS) != 0) { + listener_count = 0; + for (i = 0; i < KAUTH_SCOPE_MAX_LISTENERS; i++) { + if (sp->ks_listeners[i].kll_listenerp == listener) { + sp->ks_listeners[i].kll_listenerp = NULL; + do_free = 1; + /* + * XXX - kauth_todo - WARNING, do not clear kll_callback or + * kll_idata here. they are part of our scope unlisten race hack + */ + } + else if (sp->ks_listeners[i].kll_listenerp != NULL) { + listener_count++; + } + } + if (do_free) { + if (listener_count == 0) { + sp->ks_flags &= ~KS_F_HAS_LISTENERS; + } + KAUTH_SCOPEUNLOCK(); + FREE(listener, M_KAUTH); + return; + } + } + } + + /* if not active, check the dangling list */ + TAILQ_FOREACH(klp, &kauth_dangling_listeners, kl_link) { + if (klp == listener) { + TAILQ_REMOVE(&kauth_dangling_listeners, klp, kl_link); + KAUTH_SCOPEUNLOCK(); + FREE(listener, M_KAUTH); + return; + } + } + + KAUTH_SCOPEUNLOCK(); + return; +} + +/* + * Authorization requests. + */ +int +kauth_authorize_action(kauth_scope_t scope, kauth_cred_t credential, kauth_action_t action, + uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) +{ + int result, ret, i; + + /* ask the scope */ + if (scope->ks_callback != NULL) + result = scope->ks_callback(credential, scope->ks_idata, action, arg0, arg1, arg2, arg3); + else + result = KAUTH_RESULT_DEFER; + + /* check with listeners */ + if ((scope->ks_flags & KS_F_HAS_LISTENERS) != 0) { + for (i = 0; i < KAUTH_SCOPE_MAX_LISTENERS; i++) { + /* XXX - kauth_todo - there is a race here if listener is removed - we will fix this post Tiger. + * Until the race is fixed our kext clients are responsible for all active requests that may + * be in their callbacks or on the way to their callbacks before they free kl_callback or kl_idata. + * We keep copies of these in our kauth_local_listener in an attempt to limit our expose to + * unlisten race. + */ + if (scope->ks_listeners[i].kll_listenerp == NULL || + scope->ks_listeners[i].kll_callback == NULL) + continue; + + ret = scope->ks_listeners[i].kll_callback( + credential, scope->ks_listeners[i].kll_idata, + action, arg0, arg1, arg2, arg3); + if ((ret == KAUTH_RESULT_DENY) || + (result == KAUTH_RESULT_DEFER)) + result = ret; + } + } + + /* we need an explicit allow, or the auth fails */ + /* XXX need a mechanism for auth failure to be signalled vs. denial */ + return(result == KAUTH_RESULT_ALLOW ? 0 : EPERM); +} + +/* + * Default authorization handlers. + */ +int +kauth_authorize_allow(__unused kauth_cred_t credential, __unused void *idata, __unused kauth_action_t action, + __unused uintptr_t arg0, __unused uintptr_t arg1, __unused uintptr_t arg2, __unused uintptr_t arg3) +{ + + return(KAUTH_RESULT_ALLOW); +} + +#if 0 +/* + * Debugging support. + */ +static int +kauth_scope_valid(kauth_scope_t scope) +{ + kauth_scope_t sp; + + KAUTH_SCOPELOCK(); + TAILQ_FOREACH(sp, &kauth_scopes, ks_link) { + if (sp == scope) + break; + } + KAUTH_SCOPEUNLOCK(); + return((sp == NULL) ? 0 : 1); +} +#endif + +/* + * Process authorization scope. + */ + +int +kauth_authorize_process(kauth_cred_t credential, kauth_action_t action, struct proc *process, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) +{ + return(kauth_authorize_action(kauth_scope_process, credential, action, (uintptr_t)process, arg1, arg2, arg3)); +} + +static int +kauth_authorize_process_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, + uintptr_t arg0, uintptr_t arg1, __unused uintptr_t arg2, __unused uintptr_t arg3) +{ + switch(action) { + case KAUTH_PROCESS_CANSIGNAL: + panic("KAUTH_PROCESS_CANSIGNAL not implemented"); + /* XXX credential wrong here */ + /* arg0 - process to signal + * arg1 - signal to send the process + */ + if (cansignal(current_proc(), credential, (struct proc *)arg0, (int)arg1)) + return(KAUTH_RESULT_ALLOW); + break; + case KAUTH_PROCESS_CANTRACE: + /* current_proc() - process that will do the tracing + * arg0 - process to be traced + * arg1 - pointer to int - reason (errno) for denial + */ + if (cantrace(current_proc(), credential, (proc_t)arg0, (int *)arg1)) + return(KAUTH_RESULT_ALLOW); + break; + } + + /* no explicit result, so defer to others in the chain */ + return(KAUTH_RESULT_DEFER); +} + +/* + * File system operation authorization scope. This is really only a notification + * of the file system operation, not an authorization check. Thus the result is + * not relevant. + * arguments passed to KAUTH_FILEOP_OPEN listeners + * arg0 is pointer to vnode (vnode *) for given user path. + * arg1 is pointer to path (char *) passed in to open. + * arguments passed to KAUTH_FILEOP_CLOSE listeners + * arg0 is pointer to vnode (vnode *) for file to be closed. + * arg1 is pointer to path (char *) of file to be closed. + * arg2 is close flags. + * arguments passed to KAUTH_FILEOP_RENAME listeners + * arg0 is pointer to "from" path (char *). + * arg1 is pointer to "to" path (char *). + * arguments passed to KAUTH_FILEOP_EXCHANGE listeners + * arg0 is pointer to file 1 path (char *). + * arg1 is pointer to file 2 path (char *). + * arguments passed to KAUTH_FILEOP_EXEC listeners + * arg0 is pointer to vnode (vnode *) for executable. + * arg1 is pointer to path (char *) to executable. + */ + +int +kauth_authorize_fileop_has_listeners(void) +{ + /* + * return 1 if we have any listeners for the fileop scope + * otherwize return 0 + */ + if ((kauth_scope_fileop->ks_flags & KS_F_HAS_LISTENERS) != 0) { + return(1); + } + return (0); +} + +int +kauth_authorize_fileop(kauth_cred_t credential, kauth_action_t action, uintptr_t arg0, uintptr_t arg1) +{ + char *namep = NULL; + int name_len; + uintptr_t arg2 = 0; + + /* we do not have a primary handler for the fileop scope so bail out if + * there are no listeners. + */ + if ((kauth_scope_fileop->ks_flags & KS_F_HAS_LISTENERS) == 0) { + return(0); + } + + if (action == KAUTH_FILEOP_OPEN || action == KAUTH_FILEOP_CLOSE || action == KAUTH_FILEOP_EXEC) { + /* get path to the given vnode as a convenience to our listeners. + */ + namep = get_pathbuff(); + name_len = MAXPATHLEN; + if (vn_getpath((vnode_t)arg0, namep, &name_len) != 0) { + release_pathbuff(namep); + return(0); + } + if (action == KAUTH_FILEOP_CLOSE) { + arg2 = arg1; /* close has some flags that come in via arg1 */ + } + arg1 = (uintptr_t)namep; + } + kauth_authorize_action(kauth_scope_fileop, credential, action, arg0, arg1, arg2, 0); + + if (namep != NULL) { + release_pathbuff(namep); + } + + return(0); +} + +/* + * Generic authorization scope. + */ + +int +kauth_authorize_generic(kauth_cred_t credential, kauth_action_t action) +{ + if (credential == NULL) + panic("auth against NULL credential"); + + return(kauth_authorize_action(kauth_scope_generic, credential, action, 0, 0, 0, 0)); + +} + +static int +kauth_authorize_generic_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, + __unused uintptr_t arg0, __unused uintptr_t arg1, __unused uintptr_t arg2, __unused uintptr_t arg3) +{ + switch(action) { + case KAUTH_GENERIC_ISSUSER: + /* XXX == 0 ? */ + return((kauth_cred_getuid(credential) == 0) ? + KAUTH_RESULT_ALLOW : KAUTH_RESULT_DENY); + break; + } + + /* no explicit result, so defer to others in the chain */ + return(KAUTH_RESULT_DEFER); +} + +/* + * ACL evaluator. + * + * Determines whether the credential has the requested rights for an object secured by the supplied + * ACL. + * + * Evaluation proceeds from the top down, with access denied if any ACE denies any of the requested + * rights, or granted if all of the requested rights are satisfied by the ACEs so far. + */ +int +kauth_acl_evaluate(kauth_cred_t cred, kauth_acl_eval_t eval) +{ + int applies, error, i; + kauth_ace_t ace; + guid_t guid; + uint32_t rights; + int wkguid; + + /* always allowed to do nothing */ + if (eval->ae_requested == 0) { + eval->ae_result = KAUTH_RESULT_ALLOW; + return(0); + } + + eval->ae_residual = eval->ae_requested; + + /* + * Get our guid for comparison purposes. + */ + if ((error = kauth_cred_getguid(cred, &guid)) != 0) { + eval->ae_result = KAUTH_RESULT_DENY; + KAUTH_DEBUG(" ACL - can't get credential GUID (%d), ACL denied", error); + return(error); + } + + KAUTH_DEBUG(" ACL - %d entries, initial residual %x", eval->ae_count, eval->ae_residual); + for (i = 0, ace = eval->ae_acl; i < eval->ae_count; i++, ace++) { + + /* + * Skip inherit-only entries. + */ + if (ace->ace_flags & KAUTH_ACE_ONLY_INHERIT) + continue; + + /* + * Expand generic rights, if appropriate. + */ + rights = ace->ace_rights; + if (rights & KAUTH_ACE_GENERIC_ALL) + rights |= eval->ae_exp_gall; + if (rights & KAUTH_ACE_GENERIC_READ) + rights |= eval->ae_exp_gread; + if (rights & KAUTH_ACE_GENERIC_WRITE) + rights |= eval->ae_exp_gwrite; + if (rights & KAUTH_ACE_GENERIC_EXECUTE) + rights |= eval->ae_exp_gexec; + + /* + * Determine whether this entry applies to the current request. This + * saves us checking the GUID if the entry has nothing to do with what + * we're currently doing. + */ + switch(ace->ace_flags & KAUTH_ACE_KINDMASK) { + case KAUTH_ACE_PERMIT: + if (!(eval->ae_residual & rights)) + continue; + break; + case KAUTH_ACE_DENY: + if (!(eval->ae_requested & rights)) + continue; + break; + default: + /* we don't recognise this ACE, skip it */ + continue; + } + + /* + * Verify whether this entry applies to the credential. + */ + wkguid = kauth_wellknown_guid(&ace->ace_applicable); + switch(wkguid) { + case KAUTH_WKG_OWNER: + applies = eval->ae_options & KAUTH_AEVAL_IS_OWNER; + break; + case KAUTH_WKG_GROUP: + applies = eval->ae_options & KAUTH_AEVAL_IN_GROUP; + break; + /* we short-circuit these here rather than wasting time calling the group membership code */ + case KAUTH_WKG_EVERYBODY: + applies = 1; + break; + case KAUTH_WKG_NOBODY: + applies = 0; + break; + + default: + /* check to see whether it's exactly us, or a group we are a member of */ + applies = kauth_guid_equal(&guid, &ace->ace_applicable); + KAUTH_DEBUG(" ACL - ACE applicable " K_UUID_FMT " caller " K_UUID_FMT " %smatched", + K_UUID_ARG(ace->ace_applicable), K_UUID_ARG(guid), applies ? "" : "not "); + + if (!applies) { + error = kauth_cred_ismember_guid(cred, &ace->ace_applicable, &applies); + /* + * If we can't resolve group membership, we have to limit misbehaviour. + * If the ACE is an 'allow' ACE, assume the cred is not a member (avoid + * granting excess access). If the ACE is a 'deny' ACE, assume the cred + * is a member (avoid failing to deny). + */ + if (error != 0) { + KAUTH_DEBUG(" ACL[%d] - can't get membership, making pessimistic assumption", i); + switch(ace->ace_flags & KAUTH_ACE_KINDMASK) { + case KAUTH_ACE_PERMIT: + applies = 0; + break; + case KAUTH_ACE_DENY: + applies = 1; + break; + } + } else { + KAUTH_DEBUG(" ACL - %s group member", applies ? "is" : "not"); + } + } else { + KAUTH_DEBUG(" ACL - entry matches caller"); + } + } + if (!applies) + continue; + + /* + * Apply ACE to outstanding rights. + */ + switch(ace->ace_flags & KAUTH_ACE_KINDMASK) { + case KAUTH_ACE_PERMIT: + /* satisfy any rights that this ACE grants */ + eval->ae_residual = eval->ae_residual & ~rights; + KAUTH_DEBUG(" ACL[%d] - rights %x leave residual %x", i, rights, eval->ae_residual); + /* all rights satisfied? */ + if (eval->ae_residual == 0) { + eval->ae_result = KAUTH_RESULT_ALLOW; + return(0); + } + break; + case KAUTH_ACE_DENY: + /* deny the request if any of the requested rights is denied */ + if (eval->ae_requested & rights) { + KAUTH_DEBUG(" ACL[%d] - denying based on %x", i, rights); + eval->ae_result = KAUTH_RESULT_DENY; + return(0); + } + break; + default: + KAUTH_DEBUG(" ACL - unknown entry kind %d", ace->ace_flags & KAUTH_ACE_KINDMASK); + break; + } + } + /* if not permitted, defer to other modes of authorisation */ + eval->ae_result = KAUTH_RESULT_DEFER; + return(0); +} + +/* + * Perform ACL inheritance and umask-ACL handling. + * + * Entries are inherited from the ACL on dvp. A caller-supplied + * ACL is in initial, and the result is output into product. + * If the process has a umask ACL and one is not supplied, we use + * the umask ACL. + * If isdir is set, the resultant ACL is for a directory, otherwise it is for a file. + */ +int +kauth_acl_inherit(vnode_t dvp, kauth_acl_t initial, kauth_acl_t *product, int isdir, vfs_context_t ctx) +{ + int entries, error, index; + unsigned int i; + struct vnode_attr dva; + kauth_acl_t inherit, result; + + /* + * Fetch the ACL from the directory. This should never fail. Note that we don't + * manage inheritance when the remote server is doing authorization; we just + * want to compose the umask-ACL and any initial ACL. + */ + inherit = NULL; + if ((dvp != NULL) && !vfs_authopaque(vnode_mount(dvp))) { + VATTR_INIT(&dva); + VATTR_WANTED(&dva, va_acl); + if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) { + KAUTH_DEBUG(" ERROR - could not get parent directory ACL for inheritance"); + return(error); + } + if (VATTR_IS_SUPPORTED(&dva, va_acl)) + inherit = dva.va_acl; + } + + /* + * Compute the number of entries in the result ACL by scanning the input lists. + */ + entries = 0; + if (inherit != NULL) { + for (i = 0; i < inherit->acl_entrycount; i++) { + if (inherit->acl_ace[i].ace_flags & (isdir ? KAUTH_ACE_DIRECTORY_INHERIT : KAUTH_ACE_FILE_INHERIT)) + entries++; + } + } + + if (initial == NULL) { + /* XXX 3634665 TODO: fetch umask ACL from the process, set in initial */ + } + + if (initial != NULL) { + entries += initial->acl_entrycount; + } + + /* + * If there is no initial ACL, and no inheritable entries, the + * object should have no ACL at all. + * Note that this differs from the case where the initial ACL + * is empty, in which case the object must also have an empty ACL. + */ + if ((entries == 0) && (initial == NULL)) { + *product = NULL; + error = 0; + goto out; + } + + /* + * Allocate the result buffer. + */ + if ((result = kauth_acl_alloc(entries)) == NULL) { + KAUTH_DEBUG(" ERROR - could not allocate %d-entry result buffer for inherited ACL"); + error = ENOMEM; + goto out; + } + + /* + * Composition is simply: + * - initial + * - inherited + */ + index = 0; + if (initial != NULL) { + for (i = 0; i < initial->acl_entrycount; i++) + result->acl_ace[index++] = initial->acl_ace[i]; + KAUTH_DEBUG(" INHERIT - applied %d initial entries", index); + } + if (inherit != NULL) { + for (i = 0; i < inherit->acl_entrycount; i++) { + /* inherit onto this object? */ + if (inherit->acl_ace[i].ace_flags & (isdir ? KAUTH_ACE_DIRECTORY_INHERIT : KAUTH_ACE_FILE_INHERIT)) { + result->acl_ace[index] = inherit->acl_ace[i]; + result->acl_ace[index].ace_flags |= KAUTH_ACE_INHERITED; + /* don't re-inherit? */ + if (result->acl_ace[index].ace_flags & KAUTH_ACE_LIMIT_INHERIT) + result->acl_ace[index].ace_flags &= + ~(KAUTH_ACE_DIRECTORY_INHERIT | KAUTH_ACE_FILE_INHERIT | KAUTH_ACE_LIMIT_INHERIT); + index++; + } + } + } + result->acl_entrycount = index; + *product = result; + KAUTH_DEBUG(" INHERIT - product ACL has %d entries", index); + error = 0; +out: + if (inherit != NULL) + kauth_acl_free(inherit); + return(error); +} + +/* + * Optimistically copy in a kauth_filesec structure + * Parameters: xsecurity user space kauth_filesec_t + * xsecdstpp pointer to kauth_filesec_t + * + * Returns: 0 on success, EINVAL or EFAULT depending on failure mode. + * Modifies: xsecdestpp, which contains a pointer to an allocated + * and copied-in kauth_filesec_t + */ + +int +kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) +{ + user_addr_t uaddr, known_bound; + int error; + kauth_filesec_t fsec; + u_int32_t count; + size_t copysize; + + error = 0; + fsec = NULL; + + /* + * Make a guess at the size of the filesec. We start with the base + * pointer, and look at how much room is left on the page, clipped + * to a sensible upper bound. If it turns out this isn't enough, + * we'll size based on the actual ACL contents and come back again. + * + * The upper bound must be less than KAUTH_ACL_MAX_ENTRIES. The + * value here is fairly arbitrary. It's ok to have a zero count. + */ + known_bound = xsecurity + sizeof(struct kauth_filesec); + uaddr = mach_vm_round_page(known_bound); + count = (uaddr - known_bound) / sizeof(struct kauth_ace); + if (count > 32) + count = 32; +restart: + if ((fsec = kauth_filesec_alloc(count)) == NULL) { + error = ENOMEM; + goto out; + } + copysize = KAUTH_FILESEC_SIZE(count); + if ((error = copyin(xsecurity, (caddr_t)fsec, copysize)) != 0) + goto out; + + /* validate the filesec header */ + if (fsec->fsec_magic != KAUTH_FILESEC_MAGIC) { + error = EINVAL; + goto out; + } + + /* + * Is there an ACL payload, and is it too big? + */ + if ((fsec->fsec_entrycount != KAUTH_FILESEC_NOACL) && + (fsec->fsec_entrycount > count)) { + if (fsec->fsec_entrycount > KAUTH_ACL_MAX_ENTRIES) { + error = EINVAL; + goto out; + } + count = fsec->fsec_entrycount; + kauth_filesec_free(fsec); + goto restart; + } + +out: + if (error) { + if (fsec) + kauth_filesec_free(fsec); + } else { + *xsecdestpp = fsec; + } + return(error); +} + +/* + * Allocate a filesec structure. + */ +kauth_filesec_t +kauth_filesec_alloc(int count) +{ + kauth_filesec_t fsp; + + /* if the caller hasn't given us a valid size hint, assume the worst */ + if ((count < 0) || (count > KAUTH_ACL_MAX_ENTRIES)) + return(NULL); + + MALLOC(fsp, kauth_filesec_t, KAUTH_FILESEC_SIZE(count), M_KAUTH, M_WAITOK); + if (fsp != NULL) { + fsp->fsec_magic = KAUTH_FILESEC_MAGIC; + fsp->fsec_owner = kauth_null_guid; + fsp->fsec_group = kauth_null_guid; + fsp->fsec_entrycount = KAUTH_FILESEC_NOACL; + fsp->fsec_flags = 0; + } + return(fsp); +} + +void +kauth_filesec_free(kauth_filesec_t fsp) +{ +#ifdef KAUTH_DEBUG_ENABLE + if (fsp == KAUTH_FILESEC_NONE) + panic("freeing KAUTH_FILESEC_NONE"); + if (fsp == KAUTH_FILESEC_WANTED) + panic("freeing KAUTH_FILESEC_WANTED"); +#endif + FREE(fsp, M_KAUTH); +} + + +/* + * Allocate an ACL buffer. + */ +kauth_acl_t +kauth_acl_alloc(int count) +{ + kauth_acl_t aclp; + + /* if the caller hasn't given us a valid size hint, assume the worst */ + if ((count < 0) || (count > KAUTH_ACL_MAX_ENTRIES)) + return(NULL); + + MALLOC(aclp, kauth_acl_t, KAUTH_ACL_SIZE(count), M_KAUTH, M_WAITOK); + if (aclp != NULL) { + aclp->acl_entrycount = 0; + aclp->acl_flags = 0; + } + return(aclp); +} + +void +kauth_acl_free(kauth_acl_t aclp) +{ + FREE(aclp, M_KAUTH); +} + + +/* + * WARNING - caller must hold KAUTH_SCOPELOCK + */ +static int kauth_add_callback_to_scope(kauth_scope_t sp, kauth_listener_t klp) +{ + int i; + + for (i = 0; i < KAUTH_SCOPE_MAX_LISTENERS; i++) { + if (sp->ks_listeners[i].kll_listenerp == NULL) { + sp->ks_listeners[i].kll_callback = klp->kl_callback; + sp->ks_listeners[i].kll_idata = klp->kl_idata; + sp->ks_listeners[i].kll_listenerp = klp; + sp->ks_flags |= KS_F_HAS_LISTENERS; + return(0); + } + } + return(ENOSPC); +} diff --git a/bsd/kern/kern_bsm_audit.c b/bsd/kern/kern_bsm_audit.c index 44367bf9d..b4ddb4064 100644 --- a/bsd/kern/kern_bsm_audit.c +++ b/bsd/kern/kern_bsm_audit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,9 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ - #include -#include +#include #include #include #include @@ -30,6 +29,7 @@ #include #include +#include #include #include #include @@ -41,6 +41,7 @@ #include #include +#include /* The number of BSM records allocated. */ static int bsm_rec_count = 0; @@ -58,6 +59,8 @@ LIST_HEAD(, au_record) bsm_free_q; */ static mutex_t *bsm_audit_mutex; +static void audit_sys_auditon(struct audit_record *ar, struct au_record *rec); + /* * Initialize the BSM auditing subsystem. */ @@ -66,7 +69,7 @@ kau_init(void) { printf("BSM auditing present\n"); LIST_INIT(&bsm_free_q); - bsm_audit_mutex = mutex_alloc(ETAP_NO_TRACE); + bsm_audit_mutex = mutex_alloc(0); au_evclassmap_init(); } @@ -111,7 +114,7 @@ kau_open(void) } rec->data = (u_char *)kalloc(MAX_AUDIT_RECORD_SIZE * sizeof(u_char)); if((rec->data) == NULL) { - kfree((vm_offset_t)rec, (vm_size_t)sizeof(*rec)); + kfree(rec, sizeof(*rec)); return NULL; } mutex_lock(bsm_audit_mutex); @@ -153,7 +156,8 @@ int kau_write(struct au_record *rec, struct au_token *tok) * Close out the audit record by adding the header token, identifying * any missing tokens. Write out the tokens to the record memory. */ -int kau_close(struct au_record *rec, struct timespec *ctime, short event) +int +kau_close(struct au_record *rec, struct timespec *ctime, short event) { u_char *dptr; size_t tot_rec_size; @@ -183,6 +187,8 @@ int kau_close(struct au_record *rec, struct timespec *ctime, short event) dptr += cur->len; } } + + return(retval); } /* @@ -196,7 +202,7 @@ void kau_free(struct au_record *rec) /* Free the token list */ while ((tok = TAILQ_FIRST(&rec->token_q))) { TAILQ_REMOVE(&rec->token_q, tok, tokens); - kfree((vm_offset_t)tok, sizeof(*tok) + tok->len); + kfree(tok, sizeof(*tok) + tok->len); } rec->used = 0; @@ -246,7 +252,7 @@ void kau_free(struct au_record *rec) kau_write(rec, tok); \ } \ } while (0) - + #define KPATH1_VNODE1_TOKENS \ do { \ if (ar->ar_valid_arg & ARG_KPATH1) { \ @@ -307,13 +313,13 @@ void kau_free(struct au_record *rec) * auditon() system call. * */ -void +static void audit_sys_auditon(struct audit_record *ar, struct au_record *rec) { struct au_token *tok; switch (ar->ar_arg_cmd) { - case A_SETPOLICY: + case A_SETPOLICY: if (sizeof(ar->ar_arg_auditon.au_flags) > 4) tok = au_to_arg64(1, "policy", ar->ar_arg_auditon.au_flags); @@ -322,7 +328,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_flags); kau_write(rec, tok); break; - case A_SETKMASK: + case A_SETKMASK: tok = au_to_arg32(2, "setkmask:as_success", ar->ar_arg_auditon.au_mask.am_success); kau_write(rec, tok); @@ -330,7 +336,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_mask.am_failure); kau_write(rec, tok); break; - case A_SETQCTRL: + case A_SETQCTRL: tok = au_to_arg32(3, "setqctrl:aq_hiwater", ar->ar_arg_auditon.au_qctrl.aq_hiwater); kau_write(rec, tok); @@ -347,7 +353,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_qctrl.aq_minfree); kau_write(rec, tok); break; - case A_SETUMASK: + case A_SETUMASK: tok = au_to_arg32(3, "setumask:as_success", ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); kau_write(rec, tok); @@ -355,7 +361,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); kau_write(rec, tok); break; - case A_SETSMASK: + case A_SETSMASK: tok = au_to_arg32(3, "setsmask:as_success", ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); kau_write(rec, tok); @@ -363,7 +369,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); kau_write(rec, tok); break; - case A_SETCOND: + case A_SETCOND: if (sizeof(ar->ar_arg_auditon.au_cond) > 4) tok = au_to_arg64(3, "setcond", ar->ar_arg_auditon.au_cond); @@ -372,7 +378,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_cond); kau_write(rec, tok); break; - case A_SETCLASS: + case A_SETCLASS: tok = au_to_arg32(2, "setclass:ec_event", ar->ar_arg_auditon.au_evclass.ec_number); kau_write(rec, tok); @@ -380,7 +386,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_evclass.ec_class); kau_write(rec, tok); break; - case A_SETPMASK: + case A_SETPMASK: tok = au_to_arg32(2, "setpmask:as_success", ar->ar_arg_auditon.au_aupinfo.ap_mask.am_success); kau_write(rec, tok); @@ -388,7 +394,7 @@ audit_sys_auditon(struct audit_record *ar, struct au_record *rec) ar->ar_arg_auditon.au_aupinfo.ap_mask.am_failure); kau_write(rec, tok); break; - case A_SETFSIZE: + case A_SETFSIZE: tok = au_to_arg32(2, "setfsize:filesize", ar->ar_arg_auditon.au_fstat.af_filesz); kau_write(rec, tok); @@ -608,6 +614,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) break; case AUE_CHOWN: + case AUE_LCHOWN: tok = au_to_arg32(2, "new file uid", ar->ar_arg_uid); kau_write(rec, tok); tok = au_to_arg32(3, "new file gid", ar->ar_arg_gid); @@ -729,7 +736,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) kau_write(rec, tok); UPATH1_KPATH1_VNODE1_TOKENS; break; - + case AUE_MKDIR: tok = au_to_arg32(2, "mode", ar->ar_arg_mode); kau_write(rec, tok); @@ -750,9 +757,9 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_MLOCK: case AUE_MUNLOCK: case AUE_MINHERIT: - tok = au_to_arg32(1, "addr", (u_int32_t)ar->ar_arg_addr); + tok = au_to_arg32(1, "addr", (u_int32_t)ar->ar_arg_addr); /* LP64todo */ kau_write(rec, tok); - tok = au_to_arg32(2, "len", ar->ar_arg_len); + tok = au_to_arg32(2, "len", ar->ar_arg_len); /* LP64todo */ kau_write(rec, tok); if (ar->ar_event == AUE_MMAP) FD_KPATH1_VNODE1_TOKENS; @@ -829,7 +836,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) case AUE_PTRACE: tok = au_to_arg32(1, "request", ar->ar_arg_cmd); kau_write(rec, tok); - tok = au_to_arg32(3, "addr", (u_int32_t)ar->ar_arg_addr); + tok = au_to_arg32(3, "addr", (u_int32_t)ar->ar_arg_addr); /* LP64todo */ kau_write(rec, tok); tok = au_to_arg32(4, "data", ar->ar_arg_value); kau_write(rec, tok); @@ -886,7 +893,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) if (ar->ar_valid_arg & ARG_GROUPSET) { for(ctr = 0; ctr < ar->ar_arg_groups.gidset_size; ctr++) { - tok = au_to_arg32(1, "setgroups", ar->ar_arg_groups.gidset[ctr]); + tok = au_to_arg32(1, "setgroups", ar->ar_arg_groups.gidset[ctr]); kau_write(rec, tok); } } @@ -1140,7 +1147,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) * */ int -bsm_rec_verify(void *rec) +bsm_rec_verify(void* rec) { char c = *(char *)rec; /* diff --git a/bsd/kern/kern_bsm_klib.c b/bsd/kern/kern_bsm_klib.c index b3e33f193..1aacd0dd0 100644 --- a/bsd/kern/kern_bsm_klib.c +++ b/bsd/kern/kern_bsm_klib.c @@ -20,8 +20,10 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include #include -#include +#include +#include #include #include #include @@ -352,7 +354,7 @@ au_event_t sys_au_event[] = { AUE_NULL, /* 295 */ AUE_LOADSHFILE, /* 296 = load_shared_file */ AUE_RESETSHFILE, /* 297 = reset_shared_file */ - AUE_NEWSYSTEMSHREG, /* 298 = new_system_shared_regions */ + AUE_NEWSYSTEMSHREG, /* 298 = new_system_shared_regions */ AUE_NULL, /* 299 */ AUE_NULL, /* 300 */ AUE_NULL, /* 301 */ @@ -418,7 +420,7 @@ au_event_t sys_au_event[] = { AUE_NULL, /* 361 */ AUE_NULL, /* 362 = kqueue */ AUE_NULL, /* 363 = kevent */ - AUE_NULL, /* 364 */ + AUE_LCHOWN, /* 364 = lchown */ AUE_NULL, /* 365 */ AUE_NULL, /* 366 */ AUE_NULL, /* 367 */ @@ -459,12 +461,12 @@ au_class_t au_event_class(au_event_t event) return (AU_NULL); } -/* + /* * Insert a event to class mapping. If the event already exists in the * mapping, then replace the mapping with the new one. * XXX There is currently no constraints placed on the number of mappings. * May want to either limit to a number, or in terms of memory usage. - */ + */ void au_evclassmap_insert(au_event_t event, au_class_t class) { struct evclass_list *evcl; @@ -478,14 +480,13 @@ void au_evclassmap_insert(au_event_t event, au_class_t class) return; } } - kmem_alloc(kernel_map, &evc, sizeof(*evc)); + kmem_alloc(kernel_map, (vm_offset_t *)&evc, sizeof(*evc)); if (evc == NULL) { return; } evc->event = event; evc->class = class; LIST_INSERT_HEAD(&evcl->head, evc, entry); - } void au_evclassmap_init() @@ -499,7 +500,7 @@ void au_evclassmap_init() for (i = 0; i < nsys_au_event; i++) { if (sys_au_event[i] != AUE_NULL) { au_evclassmap_insert(sys_au_event[i], AU_NULL); - } + } } /* Add the Mach system call events */ au_evclassmap_insert(AUE_TASKFORPID, AU_NULL); @@ -508,27 +509,26 @@ void au_evclassmap_init() au_evclassmap_insert(AUE_SWAPOFF, AU_NULL); au_evclassmap_insert(AUE_MAPFD, AU_NULL); au_evclassmap_insert(AUE_INITPROCESS, AU_NULL); - + /* Add the specific open events to the mapping. */ au_evclassmap_insert(AUE_OPEN_R, AU_FREAD); - au_evclassmap_insert(AUE_OPEN_RC, AU_FREAD|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_RTC, AU_FREAD|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RT, AU_FREAD|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RW, AU_FREAD|AU_FWRITE); - au_evclassmap_insert(AUE_OPEN_RWC, AU_FREAD|AU_FWRITE|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_RWTC, AU_FREAD|AU_FWRITE|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RWT, AU_FREAD|AU_FWRITE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_W, AU_FWRITE); - au_evclassmap_insert(AUE_OPEN_WC, AU_FWRITE|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_WTC, AU_FWRITE|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_WT, AU_FWRITE|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_RC, AU_FREAD|AU_FCREATE); + au_evclassmap_insert(AUE_OPEN_RTC, AU_FREAD|AU_FCREATE|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_RT, AU_FREAD|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_RW, AU_FREAD|AU_FWRITE); + au_evclassmap_insert(AUE_OPEN_RWC, AU_FREAD|AU_FWRITE|AU_FCREATE); + au_evclassmap_insert(AUE_OPEN_RWTC, AU_FREAD|AU_FWRITE|AU_FCREATE|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_RWT, AU_FREAD|AU_FWRITE|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_W, AU_FWRITE); + au_evclassmap_insert(AUE_OPEN_WC, AU_FWRITE|AU_FCREATE); + au_evclassmap_insert(AUE_OPEN_WTC, AU_FWRITE|AU_FCREATE|AU_FDELETE); + au_evclassmap_insert(AUE_OPEN_WT, AU_FWRITE|AU_FDELETE); } -/* + /* * Check whether an event is aditable by comparing the mask of classes this * event is part of against the given mask. - * - */ + */ int au_preselect(au_event_t event, au_mask_t *mask_p, int sorf) { au_class_t effmask = 0; @@ -538,10 +538,10 @@ int au_preselect(au_event_t event, au_mask_t *mask_p, int sorf) return (-1); ae_class = au_event_class(event); - /* + /* * Perform the actual check of the masks against the event. */ - if (sorf & AU_PRS_SUCCESS) { + if(sorf & AU_PRS_SUCCESS) { effmask |= (mask_p->am_success & ae_class); } @@ -580,6 +580,7 @@ au_event_t ctlname_to_sysctlevent(int name[], uint64_t valid_arg) { case KERN_SAVED_IDS: case KERN_NETBOOT: case KERN_SYMFILE: + case KERN_SHREG_PRIVATIZABLE: return AUE_SYSCTL_NONADMIN; /* only treat the sets as admin */ @@ -656,13 +657,13 @@ au_event_t flags_and_error_to_openevent(int oflags, int error) { default: aevent = AUE_OPEN; break; - } +} - /* +/* * Convert chatty errors to better matching events. * Failures to find a file are really just attribute * events - so recast them as such. - */ +*/ switch (aevent) { case AUE_OPEN_R: case AUE_OPEN_RT: @@ -672,12 +673,12 @@ au_event_t flags_and_error_to_openevent(int oflags, int error) { case AUE_OPEN_WT: if (error == ENOENT) aevent = AUE_OPEN; - } +} return aevent; } /* Convert a MSGCTL command to a specific event. */ -int msgctl_to_event(int cmd) +au_event_t msgctl_to_event(int cmd) { switch (cmd) { case IPC_RMID: @@ -693,7 +694,7 @@ int msgctl_to_event(int cmd) } /* Convert a SEMCTL command to a specific event. */ -int semctl_to_event(int cmd) +au_event_t semctl_to_event(int cmd) { switch (cmd) { case GETALL: @@ -829,12 +830,9 @@ int canon_path(struct proc *p, char *path, char *cpath) cpath[0] = '\0'; return (ret); } - /* The length returned by vn_getpath() is two greater than the - * number of characters in the string. - */ if (len < MAXPATHLEN) - cpath[len-2] = '/'; - strncpy(cpath + len-1, bufp, MAXPATHLEN - len); + cpath[len-1] = '/'; + strncpy(cpath + len, bufp, MAXPATHLEN - len); } else { strncpy(cpath, bufp, MAXPATHLEN); } diff --git a/bsd/kern/kern_bsm_token.c b/bsd/kern/kern_bsm_token.c index cceb7c6df..7be61356e 100644 --- a/bsd/kern/kern_bsm_token.c +++ b/bsd/kern/kern_bsm_token.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,13 +23,18 @@ #include #include #include +#include +#include #include #include #include #include #include +#include + +#include #define GET_TOKEN_AREA(tok, dptr, length) \ do {\ @@ -38,8 +43,8 @@ {\ tok->len = length;\ dptr = tok->t_data = (u_char *)&tok[1];\ - memset(dptr, 0, length);\ - }\ + memset(dptr, 0, length);\ + }\ }while(0) @@ -131,7 +136,7 @@ token_t *au_to_arg(char n, char *text, u_int32_t v) * node ID 8 bytes * device 4 bytes/8 bytes (32-bit/64-bit) */ -token_t *au_to_attr32(struct vattr *attr) +token_t *au_to_attr32(__unused struct vnode_attr *attr) { return NULL; } @@ -180,16 +185,17 @@ token_t *kau_to_attr32(struct vnode_au_info *vni) return t; } -token_t *au_to_attr64(struct vattr *attr) +token_t *au_to_attr64(__unused struct vnode_attr *attr) { + return NULL; } - -token_t *kau_to_attr64(struct vnode_au_info *vni) + +token_t *kau_to_attr64(__unused struct vnode_au_info *vni) { return NULL; } -token_t *au_to_attr(struct vattr *attr) +token_t *au_to_attr(struct vnode_attr *attr) { return au_to_attr32(attr); @@ -519,7 +525,7 @@ token_t *au_to_opaque(char *data, u_int16_t bytes) * file name len 2 bytes * file pathname N bytes + 1 terminating NULL byte */ -token_t *kau_to_file(char *file, struct timeval *tv) +token_t *kau_to_file(const char *file, const struct timeval *tv) { token_t *t; u_char *dptr; @@ -666,12 +672,17 @@ token_t *au_to_process32(au_id_t auid, uid_t euid, gid_t egid, return t; } -token_t *au_to_process64(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) +token_t *au_to_process64(__unused au_id_t auid, + __unused uid_t euid, + __unused gid_t egid, + __unused uid_t ruid, + __unused gid_t rgid, + __unused pid_t pid, + __unused au_asid_t sid, + __unused au_tid_t *tid) { - return NULL; -} + return NULL; + } token_t *au_to_process(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, @@ -730,13 +741,19 @@ token_t *au_to_process32_ex(au_id_t auid, uid_t euid, gid_t egid, return t; } -token_t *au_to_process64_ex(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) +token_t *au_to_process64_ex( + __unused au_id_t auid, + __unused uid_t euid, + __unused gid_t egid, + __unused uid_t ruid, + __unused gid_t rgid, + __unused pid_t pid, + __unused au_asid_t sid, + __unused au_tid_addr_t *tid) { return NULL; } - + token_t *au_to_process_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) @@ -820,7 +837,7 @@ token_t *au_to_seq(long audit_count) * remote port 2 bytes * remote Internet address 4 bytes */ -token_t *au_to_socket(struct socket *so) +token_t *au_to_socket(__unused struct socket *so) { return NULL; } @@ -865,14 +882,20 @@ token_t *kau_to_socket(struct socket_au_info *soi) * address type/length 4 bytes * remote Internet address 4 bytes/16 bytes (IPv4/IPv6 address) */ -token_t *au_to_socket_ex_32(u_int16_t lp, u_int16_t rp, - struct sockaddr *la, struct sockaddr *ra) +token_t *au_to_socket_ex_32( + __unused u_int16_t lp, + __unused u_int16_t rp, + __unused struct sockaddr *la, + __unused struct sockaddr *ra) { return NULL; } -token_t *au_to_socket_ex_128(u_int16_t lp, u_int16_t rp, - struct sockaddr *la, struct sockaddr *ra) +token_t *au_to_socket_ex_128( + __unused u_int16_t lp, + __unused u_int16_t rp, + __unused struct sockaddr *la, + __unused struct sockaddr *ra) { return NULL; } @@ -1019,13 +1042,19 @@ token_t *au_to_subject32(au_id_t auid, uid_t euid, gid_t egid, return t; } -token_t *au_to_subject64(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) +token_t *au_to_subject64( + __unused au_id_t auid, + __unused uid_t euid, + __unused gid_t egid, + __unused uid_t ruid, + __unused gid_t rgid, + __unused pid_t pid, + __unused au_asid_t sid, + __unused au_tid_t *tid) { - return NULL; -} - + return NULL; + } + token_t *au_to_subject(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid) @@ -1083,9 +1112,15 @@ token_t *au_to_subject32_ex(au_id_t auid, uid_t euid, return t; } -token_t *au_to_subject64_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) +token_t *au_to_subject64_ex( + __unused au_id_t auid, + __unused uid_t euid, + __unused gid_t egid, + __unused uid_t ruid, + __unused gid_t rgid, + __unused pid_t pid, + __unused au_asid_t sid, + __unused au_tid_addr_t *tid) { return NULL; } @@ -1211,7 +1246,7 @@ token_t *au_to_exec_env(const char **env) * seconds of time 4 bytes/8 bytes (32-bit/64-bit value) * milliseconds of time 4 bytes/8 bytes (32-bit/64-bit value) */ -token_t *kau_to_header32(struct timespec *ctime, int rec_size, +token_t *kau_to_header32(const struct timespec *ctime, int rec_size, au_event_t e_type, au_emod_t e_mod) { token_t *t; @@ -1236,13 +1271,16 @@ token_t *kau_to_header32(struct timespec *ctime, int rec_size, return t; } -token_t *kau_to_header64(struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod) +token_t *kau_to_header64( + __unused const struct timespec *ctime, + __unused int rec_size, + __unused au_event_t e_type, + __unused au_emod_t e_mod) { return NULL; } - -token_t *kau_to_header(struct timespec *ctime, int rec_size, + +token_t *kau_to_header(const struct timespec *ctime, int rec_size, au_event_t e_type, au_emod_t e_mod) { return kau_to_header32(ctime, rec_size, e_type, e_mod); diff --git a/bsd/kern/kern_clock.c b/bsd/kern/kern_clock.c index 8e34ca9e2..76c5353de 100644 --- a/bsd/kern/kern_clock.c +++ b/bsd/kern/kern_clock.c @@ -71,8 +71,9 @@ #include #include #include -#include +#include #include +#include #ifdef GPROF #include @@ -85,6 +86,14 @@ #include +void bsd_uprofil(struct time_value *syst, user_addr_t pc); +void get_procrustime(time_value_t *tv); +int sysctl_clockrate(user_addr_t where, size_t *sizep); +int tvtohz(struct timeval *tv); +extern void psignal_sigprof(struct proc *); +extern void psignal_vtalarm(struct proc *); +extern void psignal_xcpu(struct proc *); + /* * Clock handling routines. * @@ -107,13 +116,21 @@ * we run through the statistics gathering routine as well. */ +int hz = 100; /* GET RID OF THIS !!! */ +int tick = (1000000 / 100); /* GET RID OF THIS !!! */ + int bsd_hardclockinit = 0; /*ARGSUSED*/ void -bsd_hardclock(usermode, pc, numticks) - boolean_t usermode; - caddr_t pc; - int numticks; +bsd_hardclock( + boolean_t usermode, +#ifdef GPROF + caddr_t pc, +#else + __unused caddr_t pc, +#endif + int numticks + ) { register struct proc *p; register thread_t thread; @@ -123,17 +140,11 @@ bsd_hardclock(usermode, pc, numticks) if (!bsd_hardclockinit) return; - /* - * Increment the time-of-day. - */ - microtime(&tv); - time = tv; - if (bsd_hardclockinit < 0) { return; } - thread = current_act(); + thread = current_thread(); /* * Charge the time out based on the mode the cpu is in. * Here again we fudge for the lack of proper interval timers @@ -141,7 +152,7 @@ bsd_hardclock(usermode, pc, numticks) * one tick. */ p = (struct proc *)current_proc(); - if (p && ((p->p_flag & P_WEXIT) == NULL)) { + if (p && ((p->p_flag & P_WEXIT) == 0)) { if (usermode) { if (p->p_stats && p->p_stats->p_prof.pr_scale) { p->p_flag |= P_OWEUPC; @@ -156,7 +167,6 @@ bsd_hardclock(usermode, pc, numticks) if (p->p_stats && timerisset(&p->p_stats->p_timer[ITIMER_VIRTUAL].it_value) && !itimerdecr(&p->p_stats->p_timer[ITIMER_VIRTUAL], nusecs)) { - extern void psignal_vtalarm(struct proc *); /* does psignal(p, SIGVTALRM) in a thread context */ thread_call_func((thread_call_func_t)psignal_vtalarm, p, FALSE); @@ -179,7 +189,6 @@ bsd_hardclock(usermode, pc, numticks) thread_read_times(thread, &user_time, &sys_time); if ((sys_time.seconds + user_time.seconds + 1) > p->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur) { - extern void psignal_xcpu(struct proc *); /* does psignal(p, SIGXCPU) in a thread context */ thread_call_func((thread_call_func_t)psignal_xcpu, p, FALSE); @@ -191,7 +200,6 @@ bsd_hardclock(usermode, pc, numticks) } if (timerisset(&p->p_stats->p_timer[ITIMER_PROF].it_value) && !itimerdecr(&p->p_stats->p_timer[ITIMER_PROF], nusecs)) { - extern void psignal_sigprof(struct proc *); /* does psignal(p, SIGPROF) in a thread context */ thread_call_func((thread_call_func_t)psignal_sigprof, p, FALSE); @@ -213,8 +221,15 @@ bsd_hardclock(usermode, pc, numticks) /*ARGSUSED*/ void gatherstats( - boolean_t usermode, - caddr_t pc) +#ifdef GPROF + boolean_t usermode, + caddr_t pc +#else + __unused boolean_t usermode, + __unused caddr_t pc +#endif + ) + { #ifdef GPROF if (!usermode) { @@ -269,12 +284,46 @@ untimeout( } +/* + * Set a timeout. + * + * fcn: function to call + * param: parameter to pass to function + * ts: timeout interval, in timespec + */ +void +bsd_timeout( + timeout_fcn_t fcn, + void *param, + struct timespec *ts) +{ + uint64_t deadline = 0; + + if (ts && (ts->tv_sec || ts->tv_nsec)) { + nanoseconds_to_absolutetime((uint64_t)ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec, &deadline ); + clock_absolutetime_interval_to_deadline( deadline, &deadline ); + } + thread_call_func_delayed((thread_call_func_t)fcn, param, deadline); +} + +/* + * Cancel a timeout. + */ +void +bsd_untimeout( + register timeout_fcn_t fcn, + register void *param) +{ + thread_call_func_cancel((thread_call_func_t)fcn, param, FALSE); +} + /* * Compute number of hz until specified time. * Used to compute third argument to timeout() from an * absolute time. */ +int hzto(tv) struct timeval *tv; { @@ -309,9 +358,7 @@ hzto(tv) * Return information about system clocks. */ int -sysctl_clockrate(where, sizep) - register char *where; - size_t *sizep; +sysctl_clockrate(user_addr_t where, size_t *sizep) { struct clockinfo clkinfo; @@ -322,7 +369,7 @@ sysctl_clockrate(where, sizep) clkinfo.tick = tick; clkinfo.profhz = hz; clkinfo.stathz = hz; - return sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)); + return sysctl_rdstruct(where, sizep, USER_ADDR_NULL, &clkinfo, sizeof(clkinfo)); } @@ -330,8 +377,7 @@ sysctl_clockrate(where, sizep) * Compute number of ticks in the specified amount of time. */ int -tvtohz(tv) - struct timeval *tv; +tvtohz(struct timeval *tv) { register unsigned long ticks; register long sec, usec; @@ -412,7 +458,7 @@ stopprofclock(p) } void -bsd_uprofil(struct time_value *syst, unsigned int pc) +bsd_uprofil(struct time_value *syst, user_addr_t pc) { struct proc *p = current_proc(); int ticks; diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index d57e83851..955bbd375 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,12 +19,12 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (C) 1999 Apple Computer, Inc. */ /* - * NKE management domain - allows control connections to - * an NKE and to read/write data. + * Kernel Control domain - allows control connections to + * and to read/write data. * + * Vincent Lubet, 040506 * Christophe Allie, 010928 * Justin C. Walker, 990319 */ @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -50,7 +49,6 @@ #include - /* * Definitions and vars for we support */ @@ -59,390 +57,842 @@ #define CTL_RECVSIZE (8 * 1024) /* default buffer size */ /* - internal structure maintained for each register controller -*/ -struct ctl -{ - TAILQ_ENTRY(ctl) next; /* controller chain */ - struct socket *skt; /* current controlling socket */ + * Definitions and vars for we support + */ - /* controller information provided when registering */ - u_int32_t id; /* unique nke identifier, provided by DTS */ - u_int32_t unit; /* unit number for use by the nke */ - void *userdata; /* for private use by nke */ - - /* misc communication information */ - u_int32_t flags; /* support flags */ - u_int32_t recvbufsize; /* request more than the default buffer size */ - u_int32_t sendbufsize; /* request more than the default buffer size */ - - /* Dispatch functions */ - int (*connect)(kern_ctl_ref, void *); /* Make contact */ - void (*disconnect)(kern_ctl_ref, void *); /* Break contact */ - int (*write) (kern_ctl_ref, void *, struct mbuf *); /* Send data to nke */ - int (*set)(kern_ctl_ref, void *, int, void *, size_t ); /* set ctl configuration */ - int (*get)(kern_ctl_ref, void *, int, void *, size_t *); /* get ctl configuration */ +static u_int32_t ctl_last_id = 0; +static u_int32_t ctl_max = 256; +static u_int32_t ctl_maxunit = 65536; +static lck_grp_attr_t *ctl_lck_grp_attr = 0; +static lck_attr_t *ctl_lck_attr = 0; +static lck_grp_t *ctl_lck_grp = 0; +static lck_mtx_t *ctl_mtx; + +/* + * internal structure maintained for each register controller + */ + +struct ctl_cb; + +struct kctl +{ + TAILQ_ENTRY(kctl) next; /* controller chain */ + + /* controller information provided when registering */ + char name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ + u_int32_t id; + u_int32_t reg_unit; + + /* misc communication information */ + u_int32_t flags; /* support flags */ + u_int32_t recvbufsize; /* request more than the default buffer size */ + u_int32_t sendbufsize; /* request more than the default buffer size */ + + /* Dispatch functions */ + ctl_connect_func connect; /* Make contact */ + ctl_disconnect_func disconnect; /* Break contact */ + ctl_send_func send; /* Send data to nke */ + ctl_setopt_func setopt; /* set kctl configuration */ + ctl_getopt_func getopt; /* get kctl configuration */ + + TAILQ_HEAD(, ctl_cb) kcb_head; + u_int32_t lastunit; }; +struct ctl_cb { + TAILQ_ENTRY(ctl_cb) next; /* controller chain */ + lck_mtx_t *mtx; + struct socket *so; /* controlling socket */ + struct kctl *kctl; /* back pointer to controller */ + u_int32_t unit; + void *userdata; +}; /* all the controllers are chained */ -TAILQ_HEAD(, ctl) ctl_head; - -int ctl_attach(struct socket *, int, struct proc *); -int ctl_connect(struct socket *, struct sockaddr *, struct proc *); -int ctl_disconnect(struct socket *); -int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, +TAILQ_HEAD(, kctl) ctl_head; + +static int ctl_attach(struct socket *, int, struct proc *); +static int ctl_detach(struct socket *); +static int ctl_sofreelastref(struct socket *so); +static int ctl_connect(struct socket *, struct sockaddr *, struct proc *); +static int ctl_disconnect(struct socket *); +static int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p); -int ctl_send(struct socket *, int, struct mbuf *, +static int ctl_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); -int ctl_ctloutput(struct socket *, struct sockopt *); +static int ctl_ctloutput(struct socket *, struct sockopt *); +static int ctl_peeraddr(struct socket *so, struct sockaddr **nam); + +static struct kctl *ctl_find_by_id(u_int32_t); +static struct kctl *ctl_find_by_name(const char *); +static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit); -struct ctl *ctl_find(u_int32_t, u_int32_t unit); -void ctl_post_msg(u_long event_code, u_int32_t id, u_int32_t unit); +static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit); +static void ctl_post_msg(u_long event_code, u_int32_t id); +static int ctl_lock(struct socket *, int, int); +static int ctl_unlock(struct socket *, int, int); +static lck_mtx_t * ctl_getlock(struct socket *, int); -struct pr_usrreqs ctl_usrreqs = +static struct pr_usrreqs ctl_usrreqs = { pru_abort_notsupp, pru_accept_notsupp, ctl_attach, pru_bind_notsupp, - ctl_connect, pru_connect2_notsupp, ctl_ioctl, pru_detach_notsupp, - ctl_disconnect, pru_listen_notsupp, pru_peeraddr_notsupp, + ctl_connect, pru_connect2_notsupp, ctl_ioctl, ctl_detach, + ctl_disconnect, pru_listen_notsupp, ctl_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, ctl_send, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp, - sosend, soreceive, sopoll + sosend, soreceive, pru_sopoll_notsupp +}; + +static struct protosw kctlswk_dgram = +{ + SOCK_DGRAM, &systemdomain, SYSPROTO_CONTROL, + PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK, + NULL, NULL, NULL, ctl_ctloutput, + NULL, NULL, + NULL, NULL, NULL, NULL, &ctl_usrreqs, + ctl_lock, ctl_unlock, ctl_getlock, { 0, 0 } , 0, { 0 } }; -struct protosw ctlsw = +static struct protosw kctlswk_stream = { - SOCK_DGRAM, &systemdomain, SYSPROTO_CONTROL, PR_ATOMIC|PR_CONNREQUIRED, + SOCK_STREAM, &systemdomain, SYSPROTO_CONTROL, + PR_CONNREQUIRED|PR_PCBLOCK, NULL, NULL, NULL, ctl_ctloutput, NULL, NULL, - NULL, NULL, NULL, NULL, &ctl_usrreqs + NULL, NULL, NULL, NULL, &ctl_usrreqs, + ctl_lock, ctl_unlock, ctl_getlock, { 0, 0 } , 0, { 0 } }; + /* - * Install the protosw's for the NKE manager. + * Install the protosw's for the Kernel Control manager. */ -int +__private_extern__ int kern_control_init(void) { - int retval; - - retval = net_add_proto(&ctlsw, &systemdomain); - if (retval) { - log(LOG_WARNING, "Can't install Kernel Controller Manager (%d)\n", retval); - return retval; - } + int error = 0; + + ctl_lck_grp_attr = lck_grp_attr_alloc_init(); + if (ctl_lck_grp_attr == 0) { + printf(": lck_grp_attr_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + lck_grp_attr_setdefault(ctl_lck_grp_attr); + + ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol", ctl_lck_grp_attr); + if (ctl_lck_grp == 0) { + printf("kern_control_init: lck_grp_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + + ctl_lck_attr = lck_attr_alloc_init(); + if (ctl_lck_attr == 0) { + printf("kern_control_init: lck_attr_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + lck_attr_setdefault(ctl_lck_attr); + + ctl_mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr); + if (ctl_mtx == 0) { + printf("kern_control_init: lck_mtx_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + TAILQ_INIT(&ctl_head); + + error = net_add_proto(&kctlswk_dgram, &systemdomain); + if (error) { + log(LOG_WARNING, "kern_control_init: net_add_proto dgram failed (%d)\n", error); + } + error = net_add_proto(&kctlswk_stream, &systemdomain); + if (error) { + log(LOG_WARNING, "kern_control_init: net_add_proto stream failed (%d)\n", error); + } + + done: + if (error != 0) { + if (ctl_mtx) { + lck_mtx_free(ctl_mtx, ctl_lck_grp); + ctl_mtx = 0; + } + if (ctl_lck_grp) { + lck_grp_free(ctl_lck_grp); + ctl_lck_grp = 0; + } + if (ctl_lck_grp_attr) { + lck_grp_attr_free(ctl_lck_grp_attr); + ctl_lck_grp_attr = 0; + } + if (ctl_lck_attr) { + lck_attr_free(ctl_lck_attr); + ctl_lck_attr = 0; + } + } + return error; +} - TAILQ_INIT(&ctl_head); - - return(KERN_SUCCESS); +static void +kcb_delete(struct ctl_cb *kcb) +{ + if (kcb != 0) { + if (kcb->mtx != 0) + lck_mtx_free(kcb->mtx, ctl_lck_grp); + FREE(kcb, M_TEMP); + } } /* * Kernel Controller user-request functions + * attach function must exist and succeed + * detach not necessary + * we need a pcb for the per socket mutex */ -int -ctl_attach (struct socket *so, int proto, struct proc *p) +static int +ctl_attach(__unused struct socket *so, __unused int proto, __unused struct proc *p) { - /* - * attach function must exist and succeed - * detach not necessary since we use - * connect/disconnect to handle so_pcb - */ + int error = 0; + struct ctl_cb *kcb = 0; + + MALLOC(kcb, struct ctl_cb *, sizeof(struct ctl_cb), M_TEMP, M_WAITOK); + if (kcb == NULL) { + error = ENOMEM; + goto quit; + } + bzero(kcb, sizeof(struct ctl_cb)); + + kcb->mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr); + if (kcb->mtx == NULL) { + error = ENOMEM; + goto quit; + } + kcb->so = so; + so->so_pcb = (caddr_t)kcb; + +quit: + if (error != 0) { + kcb_delete(kcb); + kcb = 0; + } + return error; +} + +static int +ctl_sofreelastref(struct socket *so) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + so->so_pcb = 0; + + if (kcb != 0) { + struct kctl *kctl; + if ((kctl = kcb->kctl) != 0) { + lck_mtx_lock(ctl_mtx); + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + lck_mtx_lock(ctl_mtx); + } + kcb_delete(kcb); + } + return 0; +} + +static int +ctl_detach(struct socket *so) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + if (kcb == 0) + return 0; + + soisdisconnected(so); + so->so_flags |= SOF_PCBCLEARING; return 0; } -int -ctl_connect(struct socket *so, struct sockaddr *nam, struct proc *p) + +static int +ctl_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { - struct ctl *ctl; - int error = 0; - struct sockaddr_ctl *sa = (struct sockaddr_ctl *)nam; + struct kctl *kctl; + int error = 0; + struct sockaddr_ctl sa; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if (kcb == 0) + panic("ctl_connect so_pcb null\n"); + + if (nam->sa_len != sizeof(struct sockaddr_ctl)) + return(EINVAL); + + bcopy(nam, &sa, sizeof(struct sockaddr_ctl)); + + lck_mtx_lock(ctl_mtx); + kctl = ctl_find_by_id_unit(sa.sc_id, sa.sc_unit); + if (kctl == NULL) { + lck_mtx_unlock(ctl_mtx); + return ENOENT; + } - ctl = ctl_find(sa->sc_id, sa->sc_unit); - if (ctl == NULL) - return(EADDRNOTAVAIL); + if (((kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && (so->so_type != SOCK_STREAM)) || + (!(kctl->flags & CTL_FLAG_REG_SOCK_STREAM) && (so->so_type != SOCK_DGRAM))) { + lck_mtx_unlock(ctl_mtx); + return EPROTOTYPE; + } - if (ctl->flags & CTL_FLAG_PRIVILEGED) { - if (p == 0) + if (kctl->flags & CTL_FLAG_PRIVILEGED) { + if (p == 0) { + lck_mtx_unlock(ctl_mtx); return(EINVAL); - if (error = suser(p->p_ucred, &p->p_acflag)) + } + if ((error = proc_suser(p))) { + lck_mtx_unlock(ctl_mtx); return error; + } + } + + if ((kctl->flags & CTL_FLAG_REG_ID_UNIT) || sa.sc_unit != 0) { + if (kcb_find(kctl, sa.sc_unit) != NULL) { + lck_mtx_unlock(ctl_mtx); + return EBUSY; + } + } else { + u_int32_t unit = kctl->lastunit + 1; + + while (1) { + if (unit == ctl_maxunit) + unit = 1; + if (kcb_find(kctl, unit) == NULL) { + kctl->lastunit = sa.sc_unit = unit; + break; + } + if (unit++ == kctl->lastunit) { + lck_mtx_unlock(ctl_mtx); + return EBUSY; + } + } } - if (ctl->skt != NULL) - return(EBUSY); + kcb->unit = sa.sc_unit; + kcb->kctl = kctl; + TAILQ_INSERT_TAIL(&kctl->kcb_head, kcb, next); + lck_mtx_unlock(ctl_mtx); - error = soreserve(so, - ctl->sendbufsize ? ctl->sendbufsize : CTL_SENDSIZE, - ctl->recvbufsize ? ctl->recvbufsize : CTL_RECVSIZE); + error = soreserve(so, kctl->sendbufsize, kctl->recvbufsize); if (error) - return error; + goto done; + soisconnecting(so); - ctl->skt = so; + socket_unlock(so, 0); + error = (*kctl->connect)(kctl, &sa, &kcb->userdata); + socket_lock(so, 0); + if (error) + goto done; - if (ctl->connect) - error = (*ctl->connect)(ctl, ctl->userdata); + soisconnected(so); + +done: if (error) { - ctl->skt = NULL; - return error; + soisdisconnected(so); + lck_mtx_lock(ctl_mtx); + kcb->kctl = 0; + kcb->unit = 0; + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); + lck_mtx_unlock(ctl_mtx); } - - so->so_pcb = (caddr_t)ctl; - soisconnected(so); - return error; } -int +static int ctl_disconnect(struct socket *so) { - struct ctl *ctl; - - if ((ctl = (struct ctl *)so->so_pcb)) - { - if (ctl->disconnect) - (*ctl->disconnect)(ctl, ctl->userdata); - ctl->skt = NULL; - so->so_pcb = NULL; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if ((kcb = (struct ctl_cb *)so->so_pcb)) { + struct kctl *kctl = kcb->kctl; + + if (kctl && kctl->disconnect) { + socket_unlock(so, 0); + (*kctl->disconnect)(kctl, kcb->unit, kcb->userdata); + socket_lock(so, 0); + } + lck_mtx_lock(ctl_mtx); + kcb->kctl = 0; + kcb->unit = 0; + TAILQ_REMOVE(&kctl->kcb_head, kcb, next); soisdisconnected(so); + lck_mtx_unlock(ctl_mtx); } return 0; } -int -ctl_send(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p) +static int +ctl_peeraddr(struct socket *so, struct sockaddr **nam) { - struct ctl *ctl = (struct ctl *)so->so_pcb; - int error = 0; - - if (ctl == NULL) - return(ENOTCONN); - - if (ctl->write) - error = (*ctl->write)(ctl, ctl->userdata, m); - - return error; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kctl *kctl; + struct sockaddr_ctl sc; + + if (kcb == NULL) /* sanity check */ + return(ENOTCONN); + + if ((kctl = kcb->kctl) == NULL) + return(EINVAL); + + bzero(&sc, sizeof(struct sockaddr_ctl)); + sc.sc_len = sizeof(struct sockaddr_ctl); + sc.sc_family = AF_SYSTEM; + sc.ss_sysaddr = AF_SYS_CONTROL; + sc.sc_id = kctl->id; + sc.sc_unit = kcb->unit; + + *nam = dup_sockaddr((struct sockaddr *)&sc, 1); + + return 0; } -int -ctl_enqueuembuf(void *ctlref, struct mbuf *m, u_int32_t flags) +static int +ctl_send(struct socket *so, int flags, struct mbuf *m, + __unused struct sockaddr *addr, __unused struct mbuf *control, + __unused struct proc *p) { - struct ctl *ctl = (struct ctl *)ctlref; - struct socket *so = (struct socket *)ctl->skt; - - if (ctl == NULL) /* sanity check */ - return(EINVAL); - - if (so == NULL) - return(ENOTCONN); - - if (sbspace(&so->so_rcv) < m->m_pkthdr.len) - return(ENOBUFS); - - sbappend(&so->so_rcv, m); - if ((flags & CTL_DATA_NOWAKEUP) == 0) - sorwakeup(so); - return 0; + int error = 0; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kctl *kctl; + + if (kcb == NULL) /* sanity check */ + return(ENOTCONN); + + if ((kctl = kcb->kctl) == NULL) + return(EINVAL); + + if (kctl->send) { + socket_unlock(so, 0); + error = (*kctl->send)(kctl, kcb->unit, kcb->userdata, m, flags); + socket_lock(so, 0); + } + return error; } -int -ctl_enqueuedata(void *ctlref, void *data, size_t len, u_int32_t flags) +errno_t +ctl_enqueuembuf(void *kctlref, u_int32_t unit, struct mbuf *m, u_int32_t flags) { - struct ctl *ctl = (struct ctl *)ctlref; - struct socket *so = (struct socket *)ctl->skt; - struct mbuf *m; - - if (ctl == NULL) /* sanity check */ - return(EINVAL); - - if (so == NULL) - return(ENOTCONN); - - if (len > MCLBYTES) - return(EMSGSIZE); + struct ctl_cb *kcb; + struct socket *so; + errno_t error = 0; + struct kctl *kctl = (struct kctl *)kctlref; + + if (kctl == NULL) + return EINVAL; + + kcb = kcb_find(kctl, unit); + if (kcb == NULL) + return EINVAL; + + so = (struct socket *)kcb->so; + if (so == NULL) + return EINVAL; + + socket_lock(so, 1); + if (sbspace(&so->so_rcv) < m->m_pkthdr.len) { + error = ENOBUFS; + goto bye; + } + if ((flags & CTL_DATA_EOR)) + m->m_flags |= M_EOR; + if (sbappend(&so->so_rcv, m) && (flags & CTL_DATA_NOWAKEUP) == 0) + sorwakeup(so); +bye: + socket_unlock(so, 1); + return error; +} - if (sbspace(&so->so_rcv) < len) - return(ENOBUFS); - - if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return (ENOBUFS); - - if (len > MHLEN) { - MCLGET(m, M_NOWAIT); - if (!(m->m_flags & M_EXT)) { - m_freem(m); - return(ENOBUFS); - } - } +errno_t +ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t flags) +{ + struct ctl_cb *kcb; + struct socket *so; + struct mbuf *m; + errno_t error = 0; + struct kctl *kctl = (struct kctl *)kctlref; + unsigned int num_needed; + struct mbuf *n; + size_t curlen = 0; + + if (kctlref == NULL) + return EINVAL; + + kcb = kcb_find(kctl, unit); + if (kcb == NULL) + return EINVAL; + + so = (struct socket *)kcb->so; + if (so == NULL) + return EINVAL; + + socket_lock(so, 1); + if ((size_t)sbspace(&so->so_rcv) < len) { + error = ENOBUFS; + goto bye; + } + + num_needed = 1; + m = m_allocpacket_internal(&num_needed, len, NULL, M_NOWAIT, 1, 0); + if (m == NULL) { + printf("ctl_enqueuedata: m_allocpacket_internal(%lu) failed\n", len); + error = ENOBUFS; + goto bye; + } + + for (n = m; n != NULL; n = n->m_next) { + size_t mlen = mbuf_maxlen(n); + + if (mlen + curlen > len) + mlen = len - curlen; + n->m_len = mlen; + bcopy((char *)data + curlen, n->m_data, mlen); + curlen += mlen; + } + mbuf_pkthdr_setlen(m, curlen); + + if ((flags & CTL_DATA_EOR)) + m->m_flags |= M_EOR; + if (sbappend(&so->so_rcv, m) && (flags & CTL_DATA_NOWAKEUP) == 0) + sorwakeup(so); +bye: + socket_unlock(so, 1); + return error; +} - bcopy(data, mtod(m, void *), len); - m->m_pkthdr.len = m->m_len = len; - sbappend(&so->so_rcv, m); - if ((flags & CTL_DATA_NOWAKEUP) == 0) - sorwakeup(so); - return 0; +errno_t +ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space) +{ + struct ctl_cb *kcb; + struct kctl *kctl = (struct kctl *)kctlref; + struct socket *so; + + if (kctlref == NULL || space == NULL) + return EINVAL; + + kcb = kcb_find(kctl, unit); + if (kcb == NULL) + return EINVAL; + + so = (struct socket *)kcb->so; + if (so == NULL) + return EINVAL; + + socket_lock(so, 1); + *space = sbspace(&so->so_rcv); + socket_unlock(so, 1); + + return 0; } -int +static int ctl_ctloutput(struct socket *so, struct sockopt *sopt) { - struct ctl *ctl = (struct ctl *)so->so_pcb; - int error = 0, s; - void *data; - size_t len; + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kctl *kctl; + int error = 0; + void *data; + size_t len; + + if (sopt->sopt_level != SYSPROTO_CONTROL) { + return(EINVAL); + } + + if (kcb == NULL) /* sanity check */ + return(ENOTCONN); + + if ((kctl = kcb->kctl) == NULL) + return(EINVAL); + + switch (sopt->sopt_dir) { + case SOPT_SET: + if (kctl->setopt == NULL) + return(ENOTSUP); + MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); + if (data == NULL) + return(ENOMEM); + error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); + if (error == 0) { + socket_unlock(so, 0); + error = (*kctl->setopt)(kcb->kctl, kcb->unit, kcb->userdata, sopt->sopt_name, + data, sopt->sopt_valsize); + socket_lock(so, 0); + } + FREE(data, M_TEMP); + break; + + case SOPT_GET: + if (kctl->getopt == NULL) + return(ENOTSUP); + data = NULL; + if (sopt->sopt_valsize && sopt->sopt_val) { + MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); + if (data == NULL) + return(ENOMEM); + } + len = sopt->sopt_valsize; + socket_unlock(so, 0); + error = (*kctl->getopt)(kcb->kctl, kcb->unit, kcb->userdata, sopt->sopt_name, + data, &len); + socket_lock(so, 0); + if (error == 0) { + if (data != NULL) + error = sooptcopyout(sopt, data, len); + else + sopt->sopt_valsize = len; + } + if (data != NULL) + FREE(data, M_TEMP); + break; + } + return error; +} - if (sopt->sopt_level != SYSPROTO_CONTROL) { - return(EINVAL); - } +static int +ctl_ioctl(__unused struct socket *so, u_long cmd, caddr_t data, + __unused struct ifnet *ifp, __unused struct proc *p) +{ + int error = ENOTSUP; + + switch (cmd) { + /* get the number of controllers */ + case CTLIOCGCOUNT: { + struct kctl *kctl; + int n = 0; + + lck_mtx_lock(ctl_mtx); + TAILQ_FOREACH(kctl, &ctl_head, next) + n++; + lck_mtx_unlock(ctl_mtx); + + *(u_int32_t *)data = n; + error = 0; + break; + } + case CTLIOCGINFO: { + struct ctl_info *ctl_info = (struct ctl_info *)data; + struct kctl *kctl = 0; + size_t name_len = strlen(ctl_info->ctl_name); + + if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME) { + error = EINVAL; + break; + } + lck_mtx_lock(ctl_mtx); + kctl = ctl_find_by_name(ctl_info->ctl_name); + lck_mtx_unlock(ctl_mtx); + if (kctl == 0) { + error = ENOENT; + break; + } + ctl_info->ctl_id = kctl->id; + error = 0; + break; + } + + /* add controls to get list of NKEs */ + + } + + return error; +} - if (ctl == NULL) - return(ENOTCONN); - - switch (sopt->sopt_dir) { - case SOPT_SET: - if (ctl->set == NULL) - return(ENOTSUP); - MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); - if (data == NULL) - return(ENOMEM); - error = sooptcopyin(sopt, data, sopt->sopt_valsize, sopt->sopt_valsize); - if (error == 0) - error = (*ctl->set)(ctl, ctl->userdata, sopt->sopt_name, data, sopt->sopt_valsize); - FREE(data, M_TEMP); - break; - - case SOPT_GET: - if (ctl->get == NULL) - return(ENOTSUP); - data = NULL; - if (sopt->sopt_valsize && sopt->sopt_val) { - MALLOC(data, void *, sopt->sopt_valsize, M_TEMP, M_WAITOK); - if (data == NULL) - return(ENOMEM); - } - len = sopt->sopt_valsize; - error = (*ctl->get)(ctl, ctl->userdata, sopt->sopt_name, data, &len); - if (error == 0) { - if (data != NULL) - error = sooptcopyout(sopt, data, len); - else - sopt->sopt_valsize = len; - } - if (data != NULL) - FREE(data, M_TEMP); - break; - } - return error; +/* + * Register/unregister a NKE + */ +errno_t +ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) +{ + struct kctl *kctl = 0; + u_int32_t id = -1; + u_int32_t n; + size_t name_len; + + if (userkctl == NULL) /* sanity check */ + return(EINVAL); + if (userkctl->ctl_connect == NULL) + return(EINVAL); + name_len = strlen(userkctl->ctl_name); + if (name_len == 0 || name_len + 1 > MAX_KCTL_NAME) + return(EINVAL); + + MALLOC(kctl, struct kctl *, sizeof(*kctl), M_TEMP, M_WAITOK); + if (kctl == NULL) + return(ENOMEM); + bzero((char *)kctl, sizeof(*kctl)); + + lck_mtx_lock(ctl_mtx); + + if ((userkctl->ctl_flags & CTL_FLAG_REG_ID_UNIT) == 0) { + if (ctl_find_by_name(userkctl->ctl_name) != NULL) { + lck_mtx_unlock(ctl_mtx); + FREE(kctl, M_TEMP); + return(EEXIST); + } + for (n = 0, id = ctl_last_id + 1; n < ctl_max; id++, n++) { + if (id == 0) { + n--; + continue; + } + if (ctl_find_by_id(id) == 0) + break; + } + if (id == ctl_max) { + lck_mtx_unlock(ctl_mtx); + FREE(kctl, M_TEMP); + return(ENOBUFS); + } + userkctl->ctl_id =id; + kctl->id = id; + kctl->reg_unit = -1; + } else { + if (ctl_find_by_id_unit(userkctl->ctl_id, userkctl->ctl_unit) != NULL) { + lck_mtx_unlock(ctl_mtx); + FREE(kctl, M_TEMP); + return(EEXIST); + } + kctl->id = userkctl->ctl_id; + kctl->reg_unit = userkctl->ctl_unit; + } + strcpy(kctl->name, userkctl->ctl_name); + kctl->flags = userkctl->ctl_flags; + + /* Let the caller know the default send and receive sizes */ + if (userkctl->ctl_sendsize == 0) + userkctl->ctl_sendsize = CTL_SENDSIZE; + kctl->sendbufsize = userkctl->ctl_sendsize; + + if (kctl->recvbufsize == 0) + userkctl->ctl_recvsize = CTL_RECVSIZE; + kctl->recvbufsize = userkctl->ctl_recvsize; + + kctl->connect = userkctl->ctl_connect; + kctl->disconnect = userkctl->ctl_disconnect; + kctl->send = userkctl->ctl_send; + kctl->setopt = userkctl->ctl_setopt; + kctl->getopt = userkctl->ctl_getopt; + + TAILQ_INIT(&kctl->kcb_head); + + TAILQ_INSERT_TAIL(&ctl_head, kctl, next); + ctl_max++; + + lck_mtx_unlock(ctl_mtx); + + *kctlref = kctl; + + ctl_post_msg(KEV_CTL_REGISTERED, kctl->id); + return(0); } -int ctl_ioctl(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p) -{ - int error = ENOTSUP, s, n; - struct ctl *ctl = (struct ctl *)so->so_pcb; - - switch (cmd) { - /* get the number of controllers */ - case CTLIOCGCOUNT: - n = 0; - TAILQ_FOREACH(ctl, &ctl_head, next) - n++; - *(u_int32_t *)data = n; - error = 0; - break; - +errno_t +ctl_deregister(void *kctlref) +{ + struct kctl *kctl; - /* add controls to get list of NKEs */ + if (kctlref == NULL) /* sanity check */ + return(EINVAL); + lck_mtx_lock(ctl_mtx); + TAILQ_FOREACH(kctl, &ctl_head, next) { + if (kctl == (struct kctl *)kctlref) + break; } + if (kctl != (struct kctl *)kctlref) { + lck_mtx_unlock(ctl_mtx); + return EINVAL; + } + if (!TAILQ_EMPTY(&kctl->kcb_head)) { + lck_mtx_unlock(ctl_mtx); + return EBUSY; + } + + TAILQ_REMOVE(&ctl_head, kctl, next); + ctl_max--; + + lck_mtx_unlock(ctl_mtx); - return error; + ctl_post_msg(KEV_CTL_DEREGISTERED, kctl->id); + FREE(kctl, M_TEMP); + return(0); } /* - * Register/unregister a NKE + * Must be called with global lock taked */ -int -ctl_register(struct kern_ctl_reg *userctl, void *userdata, kern_ctl_ref *ctlref) +static struct kctl * +ctl_find_by_id(u_int32_t id) { - struct ctl *ctl; + struct kctl *kctl; - if (userctl == NULL) /* sanity check */ - return(EINVAL); - - ctl = ctl_find(userctl->ctl_id, userctl->ctl_unit); - if (ctl != NULL) - return(EEXIST); - - MALLOC(ctl, struct ctl *, sizeof(*ctl), M_TEMP, M_WAITOK); - if (ctl == NULL) - return(ENOMEM); - - bzero((char *)ctl, sizeof(*ctl)); - - ctl->id = userctl->ctl_id; - ctl->unit = userctl->ctl_unit; - ctl->flags = userctl->ctl_flags; - ctl->sendbufsize = userctl->ctl_sendsize; - ctl->recvbufsize = userctl->ctl_recvsize; - ctl->userdata = userdata; - ctl->connect = userctl->ctl_connect; - ctl->disconnect = userctl->ctl_disconnect; - ctl->write = userctl->ctl_write; - ctl->set = userctl->ctl_set; - ctl->get = userctl->ctl_get; - - TAILQ_INSERT_TAIL(&ctl_head, ctl, next); - - *ctlref = ctl; + TAILQ_FOREACH(kctl, &ctl_head, next) + if (kctl->id == id) + return kctl; - ctl_post_msg(KEV_CTL_REGISTERED, ctl->id, ctl->unit); - return(0); + return NULL; } -int -ctl_deregister(void *ctlref) +/* + * Must be called with global ctl_mtx lock taked + */ +static struct kctl * +ctl_find_by_name(const char *name) { - struct ctl *ctl = (struct ctl *)ctlref; - struct socket *so; + struct kctl *kctl; - if (ctl == NULL) /* sanity check */ - return(EINVAL); + TAILQ_FOREACH(kctl, &ctl_head, next) + if (strcmp(kctl->name, name) == 0) + return kctl; - TAILQ_REMOVE(&ctl_head, ctl, next); + return NULL; +} - if (ctl->skt) { - ctl->skt->so_pcb = 0; - soisdisconnected(ctl->skt); +/* + * Must be called with global ctl_mtx lock taked + * + */ +static struct kctl * +ctl_find_by_id_unit(u_int32_t id, u_int32_t unit) +{ + struct kctl *kctl; + + TAILQ_FOREACH(kctl, &ctl_head, next) { + if (kctl->id == id && (kctl->flags & CTL_FLAG_REG_ID_UNIT) == 0) + return kctl; + else if (kctl->id == id && kctl->reg_unit == unit) + return kctl; } - - ctl_post_msg(KEV_CTL_DEREGISTERED, ctl->id, ctl->unit); - FREE(ctl, M_TEMP); - return(0); + return NULL; } /* - * Locate a NKE + * Must be called with kernel controller lock taken */ -struct ctl * -ctl_find(u_int32_t id, u_int32_t unit) +static struct ctl_cb * +kcb_find(struct kctl *kctl, u_int32_t unit) { - struct ctl *ctl; + struct ctl_cb *kcb; - TAILQ_FOREACH(ctl, &ctl_head, next) - if ((ctl->id == id) && (ctl->unit == unit)) - return ctl; + TAILQ_FOREACH(kcb, &kctl->kcb_head, next) + if ((kcb->unit == unit)) + return kcb; return NULL; } -void ctl_post_msg(u_long event_code, u_int32_t id, u_int32_t unit) +/* + * Must be called witout lock + */ +static void +ctl_post_msg(u_long event_code, u_int32_t id) { struct ctl_event_data ctl_ev_data; struct kev_msg ev_msg; @@ -456,7 +906,6 @@ void ctl_post_msg(u_long event_code, u_int32_t id, u_int32_t unit) /* common nke subclass data */ bzero(&ctl_ev_data, sizeof(ctl_ev_data)); ctl_ev_data.ctl_id = id; - ctl_ev_data.ctl_unit = unit; ev_msg.dv[0].data_ptr = &ctl_ev_data; ev_msg.dv[0].data_length = sizeof(ctl_ev_data); @@ -465,3 +914,83 @@ void ctl_post_msg(u_long event_code, u_int32_t id, u_int32_t unit) kev_post_msg(&ev_msg); } +static int +ctl_lock(struct socket *so, int refcount, int lr) + { + int lr_saved; +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; +#endif + + if (so->so_pcb) { + lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx); + } else { + panic("ctl_lock: so=%x NO PCB! lr=%x\n", so, lr_saved); + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + } + + if (so->so_usecount < 0) + panic("ctl_lock: so=%x so_pcb=%x lr=%x ref=%x\n", + so, so->so_pcb, lr_saved, so->so_usecount); + + if (refcount) + so->so_usecount++; + so->reserved3 = (void *)lr_saved; + return (0); +} + +static int +ctl_unlock(struct socket *so, int refcount, int lr) +{ + int lr_saved; + lck_mtx_t * mutex_held; + +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; +#endif + +#ifdef MORE_KCTLLOCK_DEBUG + printf("ctl_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", + so, so->so_pcb, ((struct ctl_cb *)so->so_pcb)->mtx, so->so_usecount, lr_saved); +#endif + if (refcount) + so->so_usecount--; + + if (so->so_usecount < 0) + panic("ctl_unlock: so=%x usecount=%x\n", so, so->so_usecount); + if (so->so_pcb == NULL) { + panic("ctl_unlock: so=%x NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); + mutex_held = so->so_proto->pr_domain->dom_mtx; + } else { + mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx; + } + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(mutex_held); + so->reserved4 = (void *)lr_saved; + + if (so->so_usecount == 0) + ctl_sofreelastref(so); + + return (0); +} + +static lck_mtx_t * +ctl_getlock(struct socket *so, __unused int locktype) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + + if (so->so_pcb) { + if (so->so_usecount < 0) + panic("ctl_getlock: so=%x usecount=%x\n", so, so->so_usecount); + return(kcb->mtx); + } else { + panic("ctl_getlock: so=%x NULL so_pcb\n", so); + return (so->so_proto->pr_domain->dom_mtx); + } +} diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index fe156fcbf..d17444fd6 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,13 +35,13 @@ #include #include #include -#include -#include +#include +#include +#include #include #include -#include #include -#include +#include #include #include #include @@ -51,6 +51,11 @@ #include #include +#include /* last */ +#include /* current_map() */ +#include /* mach_vm_region_recurse() */ +#include /* task_suspend() */ +#include /* get_task_numacts() */ typedef struct { int flavor; /* the number for this flavor */ @@ -93,23 +98,26 @@ typedef struct { } tir_t; /* XXX should be static */ -void collectth_state(thread_act_t th_act, tir_t *t); +void collectth_state(thread_t th_act, void *tirp); /* XXX not in a Mach header anywhere */ -kern_return_t thread_getstatus(register thread_act_t act, int flavor, +kern_return_t thread_getstatus(register thread_t act, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); +void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); -__private_extern__ do_coredump = 1; /* default: dump cores */ -__private_extern__ sugid_coredump = 0; /* deafult: but not on SGUID binaries */ +__private_extern__ int do_coredump = 1; /* default: dump cores */ +__private_extern__ int sugid_coredump = 0; /* default: but not SGUID binaries */ void -collectth_state(thread_act_t th_act, tir_t *t) +collectth_state(thread_t th_act, void *tirp) { vm_offset_t header; int hoffset, i ; mythread_state_flavor_t *flavors; struct thread_command *tc; + tir_t *t = (tir_t *)tirp; + /* * Fill in thread command structure. */ @@ -140,9 +148,6 @@ collectth_state(thread_act_t th_act, tir_t *t) t->hoffset = hoffset; } -extern boolean_t coredumpok(vm_map_t map, vm_offset_t va); /* temp fix */ -extern task_t current_task(void); /* XXX */ - /* * Create a core image on the file "core". */ @@ -151,19 +156,17 @@ int coredump(struct proc *p) { int error=0; - register struct pcred *pcred = p->p_cred; - register struct ucred *cred = pcred->pc_ucred; - struct nameidata nd; - struct vattr vattr; + kauth_cred_t cred = kauth_cred_get(); + struct vnode_attr va; + struct vfs_context context; vm_map_t map; int thread_count, segment_count; int command_size, header_size, tstate_size; - int hoffset, foffset, vmoffset; + int hoffset; + off_t foffset; + vm_map_offset_t vmoffset; vm_offset_t header; - struct machine_slot *ms; - struct mach_header *mh; - struct segment_command *sc; - vm_size_t size; + vm_map_size_t vmsize; vm_prot_t prot; vm_prot_t maxprot; vm_inherit_t inherit; @@ -180,15 +183,26 @@ coredump(struct proc *p) int vbrcount=0; tir_t tir1; struct vnode * vp; + struct mach_header *mh; + struct mach_header_64 *mh64; + int is_64 = 0; + size_t mach_header_sz = sizeof(struct mach_header); + size_t segment_command_sz = sizeof(struct segment_command); if (do_coredump == 0 || /* Not dumping at all */ ( (sugid_coredump == 0) && /* Not dumping SUID/SGID binaries */ - ( (pcred->p_svuid != pcred->p_ruid) || - (pcred->p_svgid != pcred->p_rgid)))) { + ( (cred->cr_svuid != cred->cr_ruid) || + (cred->cr_svgid != cred->cr_rgid)))) { return (EFAULT); } + if (IS_64BIT_PROCESS(p)) { + is_64 = 1; + mach_header_sz = sizeof(struct mach_header_64); + segment_command_sz = sizeof(struct segment_command_64); + } + task = current_task(); map = current_map(); mapsize = get_vmmap_size(map); @@ -198,30 +212,31 @@ coredump(struct proc *p) (void) task_suspend(task); /* create name according to sysctl'able format string */ - name = proc_core_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid); + name = proc_core_name(p->p_comm, kauth_cred_getuid(cred), p->p_pid); /* if name creation fails, fall back to historical behaviour... */ if (name == NULL) { - sprintf(core_name, "/cores/core.%d", p->p_pid); + sprintf(core_name, "/cores/core.%d", p->p_pid); name = core_name; } + context.vc_proc = p; + context.vc_ucred = cred; - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p); - if((error = vn_open(&nd, O_CREAT | FWRITE | O_NOFOLLOW, S_IRUSR )) != 0) - return (error); - vp = nd.ni_vp; - + if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, 0, &vp, &context))) + return (error); + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); /* Don't dump to non-regular files or files with links. */ if (vp->v_type != VREG || - VOP_GETATTR(vp, &vattr, cred, p) || vattr.va_nlink != 1) { + vnode_getattr(vp, &va, &context) || va.va_nlink != 1) { error = EFAULT; goto out; } - VATTR_NULL(&vattr); - vattr.va_size = 0; - VOP_LEASE(vp, p, cred, LEASE_WRITE); - VOP_SETATTR(vp, &vattr, cred, p); + VATTR_INIT(&va); /* better to do it here than waste more stack in vnode_setsize */ + VATTR_SET(&va, va_data_size, 0); + vnode_setattr(vp, &va, &context); p->p_acflag |= ACORE; /* @@ -238,45 +253,71 @@ coredump(struct proc *p) tstate_size += sizeof(mythread_state_flavor_t) + (flavors[i].count * sizeof(int)); - command_size = segment_count*sizeof(struct segment_command) + + command_size = segment_count * segment_command_sz + thread_count*sizeof(struct thread_command) + tstate_size*thread_count; - header_size = command_size + sizeof(struct mach_header); + header_size = command_size + mach_header_sz; - (void) kmem_alloc_wired(kernel_map, + (void) kmem_alloc(kernel_map, (vm_offset_t *)&header, (vm_size_t)header_size); /* * Set up Mach-O header. */ - mh = (struct mach_header *) header; - ms = &machine_slot[cpu_number()]; - mh->magic = MH_MAGIC; - mh->cputype = ms->cpu_type; - mh->cpusubtype = ms->cpu_subtype; - mh->filetype = MH_CORE; - mh->ncmds = segment_count + thread_count; - mh->sizeofcmds = command_size; - - hoffset = sizeof(struct mach_header); /* offset into header */ - foffset = round_page_32(header_size); /* offset into file */ - vmoffset = VM_MIN_ADDRESS; /* offset into VM */ + if (is_64) { + mh64 = (struct mach_header_64 *)header; + mh64->magic = MH_MAGIC_64; + mh64->cputype = cpu_type(); + mh64->cpusubtype = cpu_subtype(); + mh64->filetype = MH_CORE; + mh64->ncmds = segment_count + thread_count; + mh64->sizeofcmds = command_size; + mh64->reserved = 0; /* 8 byte alignment */ + } else { + mh = (struct mach_header *)header; + mh->magic = MH_MAGIC; + mh->cputype = cpu_type(); + mh->cpusubtype = cpu_subtype(); + mh->filetype = MH_CORE; + mh->ncmds = segment_count + thread_count; + mh->sizeofcmds = command_size; + } + + hoffset = mach_header_sz; /* offset into header */ + foffset = round_page(header_size); /* offset into file */ + vmoffset = MACH_VM_MIN_ADDRESS; /* offset into VM */ + /* * We use to check for an error, here, now we try and get * as much as we can */ - while (segment_count > 0){ + while (segment_count > 0) { + struct segment_command *sc; + struct segment_command_64 *sc64; + /* * Get region information for next region. */ while (1) { vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; - if((kret = vm_region_recurse_64(map, - &vmoffset, &size, &nesting_depth, - &vbr, &vbrcount)) != KERN_SUCCESS) { + if((kret = mach_vm_region_recurse(map, + &vmoffset, &vmsize, &nesting_depth, + (vm_region_recurse_info_t)&vbr, + &vbrcount)) != KERN_SUCCESS) { + break; + } + /* + * If we get a valid mapping back, but we're dumping + * a 32 bit process, and it's over the allowable + * address space of a 32 bit process, it's the same + * as if mach_vm_region_recurse() failed. + */ + if (!(is_64) && + (vmoffset + vmsize > VM_MAX_ADDRESS)) { + kret = KERN_INVALID_ADDRESS; break; } if(vbr.is_submap) { @@ -295,26 +336,41 @@ coredump(struct proc *p) /* * Fill in segment command structure. */ - sc = (struct segment_command *) (header + hoffset); - sc->cmd = LC_SEGMENT; - sc->cmdsize = sizeof(struct segment_command); - /* segment name is zerod by kmem_alloc */ - sc->segname[0] = 0; - sc->vmaddr = vmoffset; - sc->vmsize = size; - sc->fileoff = foffset; - sc->filesize = size; - sc->maxprot = maxprot; - sc->initprot = prot; - sc->nsects = 0; + if (is_64) { + sc64 = (struct segment_command_64 *)(header + hoffset); + sc64->cmd = LC_SEGMENT_64; + sc64->cmdsize = sizeof(struct segment_command_64); + /* segment name is zeroed by kmem_alloc */ + sc64->segname[0] = 0; + sc64->vmaddr = vmoffset; + sc64->vmsize = vmsize; + sc64->fileoff = foffset; + sc64->filesize = vmsize; + sc64->maxprot = maxprot; + sc64->initprot = prot; + sc64->nsects = 0; + } else { + sc = (struct segment_command *) (header + hoffset); + sc->cmd = LC_SEGMENT; + sc->cmdsize = sizeof(struct segment_command); + /* segment name is zeroed by kmem_alloc */ + sc->segname[0] = 0; + sc->vmaddr = CAST_DOWN(vm_offset_t,vmoffset); + sc->vmsize = CAST_DOWN(vm_size_t,vmsize); + sc->fileoff = CAST_DOWN(uint32_t,foffset); + sc->filesize = CAST_DOWN(uint32_t,vmsize); + sc->maxprot = maxprot; + sc->initprot = prot; + sc->nsects = 0; + } /* * Write segment out. Try as hard as possible to * get read access to the data. */ if ((prot & VM_PROT_READ) == 0) { - vm_protect(map, vmoffset, size, FALSE, - prot|VM_PROT_READ); + mach_vm_protect(map, vmoffset, vmsize, FALSE, + prot|VM_PROT_READ); } /* * Only actually perform write if we can read. @@ -324,16 +380,42 @@ coredump(struct proc *p) if ((maxprot & VM_PROT_READ) == VM_PROT_READ && vbr.user_tag != VM_MEMORY_IOKIT && coredumpok(map,vmoffset)) { - error = vn_rdwr(UIO_WRITE, vp, (caddr_t)vmoffset, size, foffset, - UIO_USERSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); + vm_map_size_t tmp_vmsize = vmsize; + off_t xfer_foffset = foffset; + + //LP64todo - works around vn_rdwr_64() 2G limit + while (tmp_vmsize > 0) { + vm_map_size_t xfer_vmsize = tmp_vmsize; + if (xfer_vmsize > INT_MAX) + xfer_vmsize = INT_MAX; + error = vn_rdwr_64(UIO_WRITE, vp, + vmoffset, xfer_vmsize, xfer_foffset, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); + tmp_vmsize -= xfer_vmsize; + xfer_foffset += xfer_vmsize; + } } - hoffset += sizeof(struct segment_command); - foffset += size; - vmoffset += size; + hoffset += segment_command_sz; + foffset += vmsize; + vmoffset += vmsize; segment_count--; } + /* + * If there are remaining segments which have not been written + * out because break in the loop above, then they were not counted + * because they exceed the real address space of the executable + * type: remove them from the header's count. This is OK, since + * we are allowed to have a sparse area following the segments. + */ + if (is_64) { + mh64->ncmds -= segment_count; + } else { + mh->ncmds -= segment_count; + } + tir1.header = header; tir1.hoffset = hoffset; tir1.flavors = flavors; @@ -342,15 +424,15 @@ coredump(struct proc *p) /* * Write out the Mach header at the beginning of the - * file. + * file. OK to use a 32 bit write for this. */ error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, header_size, (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); + UIO_SYSSPACE32, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); kmem_free(kernel_map, header, header_size); out: - VOP_UNLOCK(vp, 0, p); - error1 = vn_close(vp, FWRITE, cred, p); + error1 = vnode_close(vp, FWRITE, &context); if (error == 0) error = error1; + return (error); } diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c new file mode 100644 index 000000000..0a917310f --- /dev/null +++ b/bsd/kern/kern_credential.c @@ -0,0 +1,2268 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Kernel Authorization framework: Management of process/thread credentials and identity information. + */ + + +#include /* XXX trim includes */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#ifdef MACH_ASSERT +# undef MACH_ASSERT +#endif +#define MACH_ASSERT 1 /* XXX so bogus */ +#include + +#define CRED_DIAGNOSTIC 1 + +# define NULLCRED_CHECK(_c) do {if (((_c) == NOCRED) || ((_c) == FSCRED)) panic("bad credential %p", _c);} while(0) + +/* + * Interface to external identity resolver. + * + * The architecture of the interface is simple; the external resolver calls in to + * get work, then calls back with completed work. It also calls us to let us know + * that it's (re)started, so that we can resubmit work if it times out. + */ + +static lck_mtx_t *kauth_resolver_mtx; +#define KAUTH_RESOLVER_LOCK() lck_mtx_lock(kauth_resolver_mtx); +#define KAUTH_RESOLVER_UNLOCK() lck_mtx_unlock(kauth_resolver_mtx); + +static volatile pid_t kauth_resolver_identity; +static int kauth_resolver_registered; +static uint32_t kauth_resolver_sequence; + +struct kauth_resolver_work { + TAILQ_ENTRY(kauth_resolver_work) kr_link; + struct kauth_identity_extlookup kr_work; + uint32_t kr_seqno; + int kr_refs; + int kr_flags; +#define KAUTH_REQUEST_UNSUBMITTED (1<<0) +#define KAUTH_REQUEST_SUBMITTED (1<<1) +#define KAUTH_REQUEST_DONE (1<<2) + int kr_result; +}; + +TAILQ_HEAD(kauth_resolver_unsubmitted_head, kauth_resolver_work) kauth_resolver_unsubmitted; +TAILQ_HEAD(kauth_resolver_submitted_head, kauth_resolver_work) kauth_resolver_submitted; +TAILQ_HEAD(kauth_resolver_done_head, kauth_resolver_work) kauth_resolver_done; + +static int kauth_resolver_submit(struct kauth_identity_extlookup *lkp); +static int kauth_resolver_complete(user_addr_t message); +static int kauth_resolver_getwork(user_addr_t message); + +#define KAUTH_CRED_PRIMES_COUNT 7 +static const int kauth_cred_primes[KAUTH_CRED_PRIMES_COUNT] = {97, 241, 397, 743, 1499, 3989, 7499}; +static int kauth_cred_primes_index = 0; +static int kauth_cred_table_size = 0; + +TAILQ_HEAD(kauth_cred_entry_head, ucred); +static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL; + +#define KAUTH_CRED_HASH_DEBUG 0 + +static int kauth_cred_add(kauth_cred_t new_cred); +static void kauth_cred_remove(kauth_cred_t cred); +static inline u_long kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key); +static u_long kauth_cred_get_hashkey(kauth_cred_t cred); +static kauth_cred_t kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t new_cred, boolean_t retain_auditinfo); + +#if KAUTH_CRED_HASH_DEBUG +static int kauth_cred_count = 0; +static void kauth_cred_hash_print(void); +static void kauth_cred_print(kauth_cred_t cred); +#endif + +void +kauth_resolver_init(void) +{ + TAILQ_INIT(&kauth_resolver_unsubmitted); + TAILQ_INIT(&kauth_resolver_submitted); + TAILQ_INIT(&kauth_resolver_done); + kauth_resolver_sequence = 31337; + kauth_resolver_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); +} + +/* + * Allocate a work queue entry, submit the work and wait for completion. + * + * XXX do we want an 'interruptible' flag vs. always being interruptible? + */ +static int +kauth_resolver_submit(struct kauth_identity_extlookup *lkp) +{ + struct kauth_resolver_work *workp, *killp; + struct timespec ts; + int error, shouldfree; + + /* no point actually blocking if the resolver isn't up yet */ + if (kauth_resolver_identity == 0) { + /* + * We've already waited an initial 30 seconds with no result. + * Sleep on a stack address so no one wakes us before timeout; + * we sleep a half a second in case we are a high priority + * process, so that memberd doesn't starve while we are in a + * tight loop between user and kernel, eating all the CPU. + */ + error = tsleep(&ts, PZERO | PCATCH, "kr_submit", hz/2); + if (kauth_resolver_identity == 0) { + /* + * if things haven't changed while we were asleep, + * tell the caller we couldn't get an authoritative + * answer. + */ + return(EWOULDBLOCK); + } + } + + MALLOC(workp, struct kauth_resolver_work *, sizeof(*workp), M_KAUTH, M_WAITOK); + if (workp == NULL) + return(ENOMEM); + + workp->kr_work = *lkp; + workp->kr_refs = 1; + workp->kr_flags = KAUTH_REQUEST_UNSUBMITTED; + workp->kr_result = 0; + + /* + * We insert the request onto the unsubmitted queue, the call in from the + * resolver will it to the submitted thread when appropriate. + */ + KAUTH_RESOLVER_LOCK(); + workp->kr_seqno = workp->kr_work.el_seqno = kauth_resolver_sequence++; + workp->kr_work.el_result = KAUTH_EXTLOOKUP_INPROG; + + /* XXX as an optimisation, we could check the queue for identical items and coalesce */ + TAILQ_INSERT_TAIL(&kauth_resolver_unsubmitted, workp, kr_link); + + wakeup_one((caddr_t)&kauth_resolver_unsubmitted); + for (;;) { + /* we could compute a better timeout here */ + ts.tv_sec = 30; + ts.tv_nsec = 0; + error = msleep(workp, kauth_resolver_mtx, PCATCH, "kr_submit", &ts); + /* request has been completed? */ + if ((error == 0) && (workp->kr_flags & KAUTH_REQUEST_DONE)) + break; + /* woken because the resolver has died? */ + if (kauth_resolver_identity == 0) { + error = EIO; + break; + } + /* an error? */ + if (error != 0) + break; + } + /* if the request was processed, copy the result */ + if (error == 0) + *lkp = workp->kr_work; + + /* + * If the request timed out and was never collected, the resolver is dead and + * probably not coming back anytime soon. In this case we revert to no-resolver + * behaviour, and punt all the other sleeping requests to clear the backlog. + */ + if ((error == EWOULDBLOCK) && (workp->kr_flags & KAUTH_REQUEST_UNSUBMITTED)) { + KAUTH_DEBUG("RESOLVER - request timed out without being collected for processing, resolver dead"); + kauth_resolver_identity = 0; + /* kill all the other requestes that are waiting as well */ + TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) + wakeup(killp); + TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) + wakeup(killp); + } + + /* drop our reference on the work item, and note whether we should free it or not */ + if (--workp->kr_refs <= 0) { + /* work out which list we have to remove it from */ + if (workp->kr_flags & KAUTH_REQUEST_DONE) { + TAILQ_REMOVE(&kauth_resolver_done, workp, kr_link); + } else if (workp->kr_flags & KAUTH_REQUEST_SUBMITTED) { + TAILQ_REMOVE(&kauth_resolver_submitted, workp, kr_link); + } else if (workp->kr_flags & KAUTH_REQUEST_UNSUBMITTED) { + TAILQ_REMOVE(&kauth_resolver_unsubmitted, workp, kr_link); + } else { + KAUTH_DEBUG("RESOLVER - completed request has no valid queue"); + } + shouldfree = 1; + } else { + /* someone else still has a reference on this request */ + shouldfree = 0; + } + /* collect request result */ + if (error == 0) + error = workp->kr_result; + KAUTH_RESOLVER_UNLOCK(); + /* + * If we dropped the last reference, free the request. + */ + if (shouldfree) + FREE(workp, M_KAUTH); + + KAUTH_DEBUG("RESOLVER - returning %d", error); + return(error); +} + +/* + * System call interface for the external identity resolver. + */ +int +identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused register_t *retval) +{ + int opcode = uap->opcode; + user_addr_t message = uap->message; + struct kauth_resolver_work *workp; + int error; + pid_t new_id; + + /* + * New server registering itself. + */ + if (opcode == KAUTH_EXTLOOKUP_REGISTER) { + new_id = current_proc()->p_pid; + if ((error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER)) != 0) { + KAUTH_DEBUG("RESOLVER - pid %d refused permission to become identity resolver", new_id); + return(error); + } + KAUTH_RESOLVER_LOCK(); + if (kauth_resolver_identity != new_id) { + KAUTH_DEBUG("RESOLVER - new resolver %d taking over from old %d", new_id, kauth_resolver_identity); + /* + * We have a new server, so assume that all the old requests have been lost. + */ + while ((workp = TAILQ_LAST(&kauth_resolver_submitted, kauth_resolver_submitted_head)) != NULL) { + TAILQ_REMOVE(&kauth_resolver_submitted, workp, kr_link); + workp->kr_flags &= ~KAUTH_REQUEST_SUBMITTED; + workp->kr_flags |= KAUTH_REQUEST_UNSUBMITTED; + TAILQ_INSERT_HEAD(&kauth_resolver_unsubmitted, workp, kr_link); + } + kauth_resolver_identity = new_id; + kauth_resolver_registered = 1; + wakeup(&kauth_resolver_unsubmitted); + } + KAUTH_RESOLVER_UNLOCK(); + return(0); + } + + /* + * Beyond this point, we must be the resolver process. + */ + if (current_proc()->p_pid != kauth_resolver_identity) { + KAUTH_DEBUG("RESOLVER - call from bogus resolver %d\n", current_proc()->p_pid); + return(EPERM); + } + + /* + * Got a result returning? + */ + if (opcode & KAUTH_EXTLOOKUP_RESULT) { + if ((error = kauth_resolver_complete(message)) != 0) + return(error); + } + + /* + * Caller wants to take more work? + */ + if (opcode & KAUTH_EXTLOOKUP_WORKER) { + if ((error = kauth_resolver_getwork(message)) != 0) + return(error); + } + + return(0); +} + +/* + * Get work for a caller. + */ +static int +kauth_resolver_getwork(user_addr_t message) +{ + struct kauth_resolver_work *workp; + int error; + + KAUTH_RESOLVER_LOCK(); + error = 0; + while ((workp = TAILQ_FIRST(&kauth_resolver_unsubmitted)) == NULL) { + error = msleep(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0); + if (error != 0) + break; + } + if (workp != NULL) { + if ((error = copyout(&workp->kr_work, message, sizeof(workp->kr_work))) != 0) { + KAUTH_DEBUG("RESOLVER - error submitting work to resolve"); + goto out; + } + TAILQ_REMOVE(&kauth_resolver_unsubmitted, workp, kr_link); + workp->kr_flags &= ~KAUTH_REQUEST_UNSUBMITTED; + workp->kr_flags |= KAUTH_REQUEST_SUBMITTED; + TAILQ_INSERT_TAIL(&kauth_resolver_submitted, workp, kr_link); + } + +out: + KAUTH_RESOLVER_UNLOCK(); + return(error); +} + +/* + * Return a result from userspace. + */ +static int +kauth_resolver_complete(user_addr_t message) +{ + struct kauth_identity_extlookup extl; + struct kauth_resolver_work *workp; + int error, result; + + if ((error = copyin(message, &extl, sizeof(extl))) != 0) { + KAUTH_DEBUG("RESOLVER - error getting completed work\n"); + return(error); + } + + KAUTH_RESOLVER_LOCK(); + + error = 0; + result = 0; + switch (extl.el_result) { + case KAUTH_EXTLOOKUP_INPROG: + { + static int once = 0; + + /* XXX this should go away once memberd is updated */ + if (!once) { + printf("kauth_resolver: memberd is not setting valid result codes (assuming always successful)\n"); + once = 1; + } + } + /* FALLTHROUGH */ + case KAUTH_EXTLOOKUP_SUCCESS: + break; + + case KAUTH_EXTLOOKUP_FATAL: + /* fatal error means the resolver is dead */ + KAUTH_DEBUG("RESOLVER - resolver %d died, waiting for a new one", kauth_resolver_identity); + kauth_resolver_identity = 0; + /* XXX should we terminate all outstanding requests? */ + error = EIO; + break; + case KAUTH_EXTLOOKUP_BADRQ: + KAUTH_DEBUG("RESOLVER - resolver reported invalid request %d", extl.el_seqno); + result = EINVAL; + break; + case KAUTH_EXTLOOKUP_FAILURE: + KAUTH_DEBUG("RESOLVER - resolver reported transient failure for request %d", extl.el_seqno); + result = EIO; + break; + default: + KAUTH_DEBUG("RESOLVER - resolver returned unexpected status %d", extl.el_result); + result = EIO; + break; + } + + /* + * In the case of a fatal error, we assume that the resolver will restart + * quickly and re-collect all of the outstanding requests. Thus, we don't + * complete the request which returned the fatal error status. + */ + if (extl.el_result != KAUTH_EXTLOOKUP_FATAL) { + /* scan our list for this request */ + TAILQ_FOREACH(workp, &kauth_resolver_submitted, kr_link) { + /* found it? */ + if (workp->kr_seqno == extl.el_seqno) { + /* copy result */ + workp->kr_work = extl; + /* move onto completed list and wake up requester(s) */ + TAILQ_REMOVE(&kauth_resolver_submitted, workp, kr_link); + workp->kr_flags &= ~KAUTH_REQUEST_SUBMITTED; + workp->kr_flags |= KAUTH_REQUEST_DONE; + workp->kr_result = result; + TAILQ_INSERT_TAIL(&kauth_resolver_done, workp, kr_link); + wakeup(workp); + break; + } + } + } + /* + * Note that it's OK for us not to find anything; if the request has + * timed out the work record will be gone. + */ + KAUTH_RESOLVER_UNLOCK(); + + return(error); +} + + +/* + * Identity cache. + */ + +struct kauth_identity { + TAILQ_ENTRY(kauth_identity) ki_link; + int ki_valid; +#define KI_VALID_UID (1<<0) /* UID and GID are mutually exclusive */ +#define KI_VALID_GID (1<<1) +#define KI_VALID_GUID (1<<2) +#define KI_VALID_NTSID (1<<3) + uid_t ki_uid; + gid_t ki_gid; + guid_t ki_guid; + ntsid_t ki_ntsid; + /* + * Expiry times are the earliest time at which we will disregard the cached state and go to + * userland. Before then if the valid bit is set, we will return the cached value. If it's + * not set, we will not go to userland to resolve, just assume that there is no answer + * available. + */ + time_t ki_guid_expiry; + time_t ki_ntsid_expiry; +}; + +static TAILQ_HEAD(kauth_identity_head, kauth_identity) kauth_identities; +#define KAUTH_IDENTITY_CACHEMAX 100 /* XXX sizing? */ +static int kauth_identity_count; + +static lck_mtx_t *kauth_identity_mtx; +#define KAUTH_IDENTITY_LOCK() lck_mtx_lock(kauth_identity_mtx); +#define KAUTH_IDENTITY_UNLOCK() lck_mtx_unlock(kauth_identity_mtx); + + +static struct kauth_identity *kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, + ntsid_t *ntsidp, time_t ntsid_expiry); +static void kauth_identity_register(struct kauth_identity *kip); +static void kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip); +static void kauth_identity_lru(struct kauth_identity *kip); +static int kauth_identity_guid_expired(struct kauth_identity *kip); +static int kauth_identity_ntsid_expired(struct kauth_identity *kip); +static int kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir); +static int kauth_identity_find_gid(gid_t gid, struct kauth_identity *kir); +static int kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir); +static int kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir); + +void +kauth_identity_init(void) +{ + TAILQ_INIT(&kauth_identities); + kauth_identity_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); +} + +static int +kauth_identity_resolve(__unused struct kauth_identity_extlookup *el) +{ + return(kauth_resolver_submit(el)); +} + +static struct kauth_identity * +kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, ntsid_t *ntsidp, time_t ntsid_expiry) +{ + struct kauth_identity *kip; + + /* get and fill in a new identity */ + MALLOC(kip, struct kauth_identity *, sizeof(*kip), M_KAUTH, M_WAITOK | M_ZERO); + if (kip != NULL) { + if (gid != KAUTH_GID_NONE) { + kip->ki_gid = gid; + kip->ki_valid = KI_VALID_GID; + } + if (uid != KAUTH_UID_NONE) { + if (kip->ki_valid & KI_VALID_GID) + panic("can't allocate kauth identity with both uid and gid"); + kip->ki_uid = uid; + kip->ki_valid = KI_VALID_UID; + } + if (guidp != NULL) { + kip->ki_guid = *guidp; + kip->ki_valid |= KI_VALID_GUID; + } + kip->ki_guid_expiry = guid_expiry; + if (ntsidp != NULL) { + kip->ki_ntsid = *ntsidp; + kip->ki_valid |= KI_VALID_NTSID; + } + kip->ki_ntsid_expiry = ntsid_expiry; + } + return(kip); +} + +/* + * Register an association between identity tokens. + */ +static void +kauth_identity_register(struct kauth_identity *kip) +{ + struct kauth_identity *ip; + + /* + * We search the cache for the UID listed in the incoming association. If we + * already have an entry, the new information is merged. + */ + ip = NULL; + KAUTH_IDENTITY_LOCK(); + if (kip->ki_valid & KI_VALID_UID) { + if (kip->ki_valid & KI_VALID_GID) + panic("kauth_identity: can't insert record with both UID and GID as key"); + TAILQ_FOREACH(ip, &kauth_identities, ki_link) + if ((ip->ki_valid & KI_VALID_UID) && (ip->ki_uid == kip->ki_uid)) + break; + } else if (kip->ki_valid & KI_VALID_GID) { + TAILQ_FOREACH(ip, &kauth_identities, ki_link) + if ((ip->ki_valid & KI_VALID_GID) && (ip->ki_gid == kip->ki_gid)) + break; + } else { + panic("kauth_identity: can't insert record without UID or GID as key"); + } + + if (ip != NULL) { + /* we already have an entry, merge/overwrite */ + if (kip->ki_valid & KI_VALID_GUID) { + ip->ki_guid = kip->ki_guid; + ip->ki_valid |= KI_VALID_GUID; + } + ip->ki_guid_expiry = kip->ki_guid_expiry; + if (kip->ki_valid & KI_VALID_NTSID) { + ip->ki_ntsid = kip->ki_ntsid; + ip->ki_valid |= KI_VALID_NTSID; + } + ip->ki_ntsid_expiry = kip->ki_ntsid_expiry; + /* and discard the incoming identity */ + FREE(kip, M_KAUTH); + ip = NULL; + } else { + /* don't have any information on this identity, so just add it */ + TAILQ_INSERT_HEAD(&kauth_identities, kip, ki_link); + if (++kauth_identity_count > KAUTH_IDENTITY_CACHEMAX) { + ip = TAILQ_LAST(&kauth_identities, kauth_identity_head); + TAILQ_REMOVE(&kauth_identities, ip, ki_link); + kauth_identity_count--; + } + } + KAUTH_IDENTITY_UNLOCK(); + /* have to drop lock before freeing expired entry */ + if (ip != NULL) + FREE(ip, M_KAUTH); +} + +/* + * Given a lookup result, add any associations that we don't + * currently have. + */ +static void +kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *rkip) +{ + struct timeval tv; + struct kauth_identity *kip; + + microuptime(&tv); + + /* user identity? */ + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UID) { + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + /* matching record */ + if ((kip->ki_valid & KI_VALID_UID) && (kip->ki_uid == elp->el_uid)) { + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UGUID) { + kip->ki_guid = elp->el_uguid; + kip->ki_valid |= KI_VALID_GUID; + } + kip->ki_guid_expiry = tv.tv_sec + elp->el_uguid_valid; + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) { + kip->ki_ntsid = elp->el_usid; + kip->ki_valid |= KI_VALID_NTSID; + } + kip->ki_ntsid_expiry = tv.tv_sec + elp->el_usid_valid; + kauth_identity_lru(kip); + if (rkip != NULL) + *rkip = *kip; + KAUTH_DEBUG("CACHE - refreshed %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid)); + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + /* not found in cache, add new record */ + if (kip == NULL) { + kip = kauth_identity_alloc(elp->el_uid, KAUTH_GID_NONE, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UGUID) ? &elp->el_uguid : NULL, + tv.tv_sec + elp->el_uguid_valid, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) ? &elp->el_usid : NULL, + tv.tv_sec + elp->el_usid_valid); + if (kip != NULL) { + if (rkip != NULL) + *rkip = *kip; + KAUTH_DEBUG("CACHE - learned %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid)); + kauth_identity_register(kip); + } + } + } + + /* group identity? */ + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GID) { + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + /* matching record */ + if ((kip->ki_valid & KI_VALID_GID) && (kip->ki_gid == elp->el_gid)) { + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GGUID) { + kip->ki_guid = elp->el_gguid; + kip->ki_valid |= KI_VALID_GUID; + } + kip->ki_guid_expiry = tv.tv_sec + elp->el_gguid_valid; + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) { + kip->ki_ntsid = elp->el_gsid; + kip->ki_valid |= KI_VALID_NTSID; + } + kip->ki_ntsid_expiry = tv.tv_sec + elp->el_gsid_valid; + kauth_identity_lru(kip); + if (rkip != NULL) + *rkip = *kip; + KAUTH_DEBUG("CACHE - refreshed %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid)); + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + /* not found in cache, add new record */ + if (kip == NULL) { + kip = kauth_identity_alloc(KAUTH_UID_NONE, elp->el_gid, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GGUID) ? &elp->el_gguid : NULL, + tv.tv_sec + elp->el_gguid_valid, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) ? &elp->el_gsid : NULL, + tv.tv_sec + elp->el_gsid_valid); + if (kip != NULL) { + if (rkip != NULL) + *rkip = *kip; + KAUTH_DEBUG("CACHE - learned %d is " K_UUID_FMT, kip->ki_uid, K_UUID_ARG(kip->ki_guid)); + kauth_identity_register(kip); + } + } + } + +} + +/* + * Promote the entry to the head of the LRU, assumes the cache is locked. + * + * This is called even if the entry has expired; typically an expired entry + * that's been looked up is about to be revalidated, and having it closer to + * the head of the LRU means finding it quickly again when the revalidation + * comes through. + */ +static void +kauth_identity_lru(struct kauth_identity *kip) +{ + if (kip != TAILQ_FIRST(&kauth_identities)) { + TAILQ_REMOVE(&kauth_identities, kip, ki_link); + TAILQ_INSERT_HEAD(&kauth_identities, kip, ki_link); + } +} + +/* + * Handly lazy expiration of translations. + */ +static int +kauth_identity_guid_expired(struct kauth_identity *kip) +{ + struct timeval tv; + + microuptime(&tv); + KAUTH_DEBUG("CACHE - GUID expires @ %d now %d", kip->ki_guid_expiry, tv.tv_sec); + return((kip->ki_guid_expiry <= tv.tv_sec) ? 1 : 0); +} + +static int +kauth_identity_ntsid_expired(struct kauth_identity *kip) +{ + struct timeval tv; + + microuptime(&tv); + KAUTH_DEBUG("CACHE - NTSID expires @ %d now %d", kip->ki_ntsid_expiry, tv.tv_sec); + return((kip->ki_ntsid_expiry <= tv.tv_sec) ? 1 : 0); +} + +/* + * Search for an entry by UID. Returns a copy of the entry, ENOENT if no valid + * association exists for the UID. + */ +static int +kauth_identity_find_uid(uid_t uid, struct kauth_identity *kir) +{ + struct kauth_identity *kip; + + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + if ((kip->ki_valid & KI_VALID_UID) && (uid == kip->ki_uid)) { + kauth_identity_lru(kip); + *kir = *kip; + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + return((kip == NULL) ? ENOENT : 0); +} + + +/* + * Search for an entry by GID. Returns a copy of the entry, ENOENT if no valid + * association exists for the GID. + */ +static int +kauth_identity_find_gid(uid_t gid, struct kauth_identity *kir) +{ + struct kauth_identity *kip; + + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + if ((kip->ki_valid & KI_VALID_GID) && (gid == kip->ki_gid)) { + kauth_identity_lru(kip); + *kir = *kip; + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + return((kip == NULL) ? ENOENT : 0); +} + + +/* + * Search for an entry by GUID. Returns a copy of the entry, ENOENT if no valid + * association exists for the GUID. Note that the association may be expired, + * in which case the caller may elect to call out to userland to revalidate. + */ +static int +kauth_identity_find_guid(guid_t *guidp, struct kauth_identity *kir) +{ + struct kauth_identity *kip; + + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + if ((kip->ki_valid & KI_VALID_GUID) && (kauth_guid_equal(guidp, &kip->ki_guid))) { + kauth_identity_lru(kip); + *kir = *kip; + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + return((kip == NULL) ? ENOENT : 0); +} + +/* + * Search for an entry by NT Security ID. Returns a copy of the entry, ENOENT if no valid + * association exists for the SID. Note that the association may be expired, + * in which case the caller may elect to call out to userland to revalidate. + */ +static int +kauth_identity_find_ntsid(ntsid_t *ntsid, struct kauth_identity *kir) +{ + struct kauth_identity *kip; + + KAUTH_IDENTITY_LOCK(); + TAILQ_FOREACH(kip, &kauth_identities, ki_link) { + if ((kip->ki_valid & KI_VALID_NTSID) && (kauth_ntsid_equal(ntsid, &kip->ki_ntsid))) { + kauth_identity_lru(kip); + *kir = *kip; + break; + } + } + KAUTH_IDENTITY_UNLOCK(); + return((kip == NULL) ? ENOENT : 0); +} + +/* + * GUID handling. + */ +guid_t kauth_null_guid; + +int +kauth_guid_equal(guid_t *guid1, guid_t *guid2) +{ + return(!bcmp(guid1, guid2, sizeof(*guid1))); +} + +/* + * Look for well-known GUIDs. + */ +int +kauth_wellknown_guid(guid_t *guid) +{ + static char fingerprint[] = {0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef}; + int code; + /* + * All WKGs begin with the same 12 bytes. + */ + if (!bcmp((void *)guid, fingerprint, 12)) { + /* + * The final 4 bytes are our code. + */ + code = *(u_int32_t *)&guid->g_guid[12]; + switch(code) { + case 0x0000000c: + return(KAUTH_WKG_EVERYBODY); + case 0xfffffffe: + return(KAUTH_WKG_NOBODY); + case 0x0000000a: + return(KAUTH_WKG_OWNER); + case 0x00000010: + return(KAUTH_WKG_GROUP); + } + } + return(KAUTH_WKG_NOT); +} + + +/* + * NT Security Identifier handling. + */ +int +kauth_ntsid_equal(ntsid_t *sid1, ntsid_t *sid2) +{ + /* check sizes for equality, also sanity-check size while we're at it */ + if ((KAUTH_NTSID_SIZE(sid1) == KAUTH_NTSID_SIZE(sid2)) && + (KAUTH_NTSID_SIZE(sid1) <= sizeof(*sid1)) && + !bcmp(sid1, sid2, KAUTH_NTSID_SIZE(sid1))) + return(1); + return(0); +} + +/* + * Identity KPI + * + * We support four tokens representing identity: + * - Credential reference + * - UID + * - GUID + * - NT security identifier + * + * Of these, the UID is the ubiquitous identifier; cross-referencing should + * be done using it. + */ + +static int kauth_cred_cache_lookup(int from, int to, void *src, void *dst); + +/* + * Fetch UID from credential. + */ +uid_t +kauth_cred_getuid(kauth_cred_t cred) +{ + NULLCRED_CHECK(cred); + return(cred->cr_uid); +} + +/* + * Fetch GID from credential. + */ +uid_t +kauth_cred_getgid(kauth_cred_t cred) +{ + NULLCRED_CHECK(cred); + return(cred->cr_gid); +} + +/* + * Fetch UID from GUID. + */ +int +kauth_cred_guid2uid(guid_t *guidp, uid_t *uidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_GUID, KI_VALID_UID, guidp, uidp)); +} + +/* + * Fetch GID from GUID. + */ +int +kauth_cred_guid2gid(guid_t *guidp, gid_t *gidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_GUID, KI_VALID_GID, guidp, gidp)); +} + +/* + * Fetch UID from NT SID. + */ +int +kauth_cred_ntsid2uid(ntsid_t *sidp, uid_t *uidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_NTSID, KI_VALID_UID, sidp, uidp)); +} + +/* + * Fetch GID from NT SID. + */ +int +kauth_cred_ntsid2gid(ntsid_t *sidp, gid_t *gidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_NTSID, KI_VALID_GID, sidp, gidp)); +} + +/* + * Fetch GUID from NT SID. + */ +int +kauth_cred_ntsid2guid(ntsid_t *sidp, guid_t *guidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_NTSID, KI_VALID_GUID, sidp, guidp)); +} + +/* + * Fetch GUID from UID. + */ +int +kauth_cred_uid2guid(uid_t uid, guid_t *guidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_UID, KI_VALID_GUID, &uid, guidp)); +} + +/* + * Fetch user GUID from credential. + */ +int +kauth_cred_getguid(kauth_cred_t cred, guid_t *guidp) +{ + NULLCRED_CHECK(cred); + return(kauth_cred_uid2guid(kauth_cred_getuid(cred), guidp)); +} + +/* + * Fetch GUID from GID. + */ +int +kauth_cred_gid2guid(gid_t gid, guid_t *guidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_GID, KI_VALID_GUID, &gid, guidp)); +} + +/* + * Fetch NT SID from UID. + */ +int +kauth_cred_uid2ntsid(uid_t uid, ntsid_t *sidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_UID, KI_VALID_NTSID, &uid, sidp)); +} + +/* + * Fetch NT SID from credential. + */ +int +kauth_cred_getntsid(kauth_cred_t cred, ntsid_t *sidp) +{ + NULLCRED_CHECK(cred); + return(kauth_cred_uid2ntsid(kauth_cred_getuid(cred), sidp)); +} + +/* + * Fetch NT SID from GID. + */ +int +kauth_cred_gid2ntsid(gid_t gid, ntsid_t *sidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_GID, KI_VALID_NTSID, &gid, sidp)); +} + +/* + * Fetch NT SID from GUID. + */ +int +kauth_cred_guid2ntsid(guid_t *guidp, ntsid_t *sidp) +{ + return(kauth_cred_cache_lookup(KI_VALID_GUID, KI_VALID_NTSID, guidp, sidp)); +} + + + +/* + * Lookup a translation in the cache. + */ +static int +kauth_cred_cache_lookup(int from, int to, void *src, void *dst) +{ + struct kauth_identity ki; + struct kauth_identity_extlookup el; + int error; + int (* expired)(struct kauth_identity *kip); + + KAUTH_DEBUG("CACHE - translate %d to %d", from, to); + + /* + * Look for an existing cache entry for this association. + * If the entry has not expired, return the cached information. + */ + ki.ki_valid = 0; + switch(from) { + case KI_VALID_UID: + error = kauth_identity_find_uid(*(uid_t *)src, &ki); + break; + case KI_VALID_GID: + error = kauth_identity_find_gid(*(gid_t *)src, &ki); + break; + case KI_VALID_GUID: + error = kauth_identity_find_guid((guid_t *)src, &ki); + break; + case KI_VALID_NTSID: + error = kauth_identity_find_ntsid((ntsid_t *)src, &ki); + break; + default: + return(EINVAL); + } + /* lookup failure or error */ + if (error != 0) { + /* any other error is fatal */ + if (error != ENOENT) { + KAUTH_DEBUG("CACHE - cache search error %d", error); + return(error); + } + } else { + /* do we have a translation? */ + if (ki.ki_valid & to) { + /* found a valid cached entry, check expiry */ + switch(to) { + case KI_VALID_GUID: + expired = kauth_identity_guid_expired; + break; + case KI_VALID_NTSID: + expired = kauth_identity_ntsid_expired; + break; + default: + switch(from) { + case KI_VALID_GUID: + expired = kauth_identity_guid_expired; + break; + case KI_VALID_NTSID: + expired = kauth_identity_ntsid_expired; + break; + default: + expired = NULL; + } + } + KAUTH_DEBUG("CACHE - found matching entry with valid %d", ki.ki_valid); + /* + * If no expiry function, or not expired, we have found + * a hit. + */ + if (!expired) { + KAUTH_DEBUG("CACHE - no expiry function"); + goto found; + } + if (!expired(&ki)) { + KAUTH_DEBUG("CACHE - entry valid, unexpired"); + goto found; + } + /* + * We leave ki_valid set here; it contains a translation but the TTL has + * expired. If we can't get a result from the resolver, we will + * use it as a better-than nothing alternative. + */ + KAUTH_DEBUG("CACHE - expired entry found"); + } + } + + /* + * Call the resolver. We ask for as much data as we can get. + */ + switch(from) { + case KI_VALID_UID: + el.el_flags = KAUTH_EXTLOOKUP_VALID_UID; + el.el_uid = *(uid_t *)src; + break; + case KI_VALID_GID: + el.el_flags = KAUTH_EXTLOOKUP_VALID_GID; + el.el_gid = *(gid_t *)src; + break; + case KI_VALID_GUID: + el.el_flags = KAUTH_EXTLOOKUP_VALID_UGUID | KAUTH_EXTLOOKUP_VALID_GGUID; + el.el_uguid = *(guid_t *)src; + el.el_gguid = *(guid_t *)src; + break; + case KI_VALID_NTSID: + el.el_flags = KAUTH_EXTLOOKUP_VALID_USID | KAUTH_EXTLOOKUP_VALID_GSID; + el.el_usid = *(ntsid_t *)src; + el.el_gsid = *(ntsid_t *)src; + break; + default: + return(EINVAL); + } + /* + * Here we ask for everything all at once, to avoid having to work + * out what we really want now, or might want soon. + * + * Asking for SID translations when we don't know we need them right + * now is going to cause excess work to be done if we're connected + * to a network that thinks it can translate them. This list needs + * to get smaller/smarter. + */ + el.el_flags |= KAUTH_EXTLOOKUP_WANT_UID | KAUTH_EXTLOOKUP_WANT_GID | + KAUTH_EXTLOOKUP_WANT_UGUID | KAUTH_EXTLOOKUP_WANT_GGUID | + KAUTH_EXTLOOKUP_WANT_USID | KAUTH_EXTLOOKUP_WANT_GSID; + KAUTH_DEBUG("CACHE - calling resolver for %x", el.el_flags); + error = kauth_identity_resolve(&el); + KAUTH_DEBUG("CACHE - resolver returned %d", error); + /* was the lookup successful? */ + if (error == 0) { + /* + * Save the results from the lookup - may have other information even if we didn't + * get a guid. + */ + kauth_identity_updatecache(&el, &ki); + } + /* + * Check to see if we have a valid result. + */ + if (!error && !(ki.ki_valid & to)) + error = ENOENT; + if (error) + return(error); +found: + switch(to) { + case KI_VALID_UID: + *(uid_t *)dst = ki.ki_uid; + break; + case KI_VALID_GID: + *(gid_t *)dst = ki.ki_gid; + break; + case KI_VALID_GUID: + *(guid_t *)dst = ki.ki_guid; + break; + case KI_VALID_NTSID: + *(ntsid_t *)dst = ki.ki_ntsid; + break; + default: + return(EINVAL); + } + KAUTH_DEBUG("CACHE - returned successfully"); + return(0); +} + + +/* + * Group membership cache. + * + * XXX the linked-list implementation here needs to be optimized. + */ + +struct kauth_group_membership { + TAILQ_ENTRY(kauth_group_membership) gm_link; + uid_t gm_uid; /* the identity whose membership we're recording */ + gid_t gm_gid; /* group of which they are a member */ + time_t gm_expiry; /* TTL for the membership */ + int gm_flags; +#define KAUTH_GROUP_ISMEMBER (1<<0) +}; + +TAILQ_HEAD(kauth_groups_head, kauth_group_membership) kauth_groups; +#define KAUTH_GROUPS_CACHEMAX 100 /* XXX sizing? */ +static int kauth_groups_count; + +static lck_mtx_t *kauth_groups_mtx; +#define KAUTH_GROUPS_LOCK() lck_mtx_lock(kauth_groups_mtx); +#define KAUTH_GROUPS_UNLOCK() lck_mtx_unlock(kauth_groups_mtx); + +static int kauth_groups_expired(struct kauth_group_membership *gm); +static void kauth_groups_lru(struct kauth_group_membership *gm); +static void kauth_groups_updatecache(struct kauth_identity_extlookup *el); + +void +kauth_groups_init(void) +{ + TAILQ_INIT(&kauth_groups); + kauth_groups_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); +} + +static int +kauth_groups_expired(struct kauth_group_membership *gm) +{ + struct timeval tv; + + microuptime(&tv); + return((gm->gm_expiry <= tv.tv_sec) ? 1 : 0); +} + +static void +kauth_groups_lru(struct kauth_group_membership *gm) +{ + if (gm != TAILQ_FIRST(&kauth_groups)) { + TAILQ_REMOVE(&kauth_groups, gm, gm_link); + TAILQ_INSERT_HEAD(&kauth_groups, gm, gm_link); + } +} + +static void +kauth_groups_updatecache(struct kauth_identity_extlookup *el) +{ + struct kauth_group_membership *gm; + struct timeval tv; + + /* need a valid response if we are to cache anything */ + if ((el->el_flags & + (KAUTH_EXTLOOKUP_VALID_UID | KAUTH_EXTLOOKUP_VALID_GID | KAUTH_EXTLOOKUP_VALID_MEMBERSHIP)) != + (KAUTH_EXTLOOKUP_VALID_UID | KAUTH_EXTLOOKUP_VALID_GID | KAUTH_EXTLOOKUP_VALID_MEMBERSHIP)) + return; + + microuptime(&tv); + + /* search for an existing record for this association before inserting */ + KAUTH_GROUPS_LOCK(); + TAILQ_FOREACH(gm, &kauth_groups, gm_link) { + if ((el->el_uid == gm->gm_uid) && + (el->el_gid == gm->gm_gid)) { + if (el->el_flags & KAUTH_EXTLOOKUP_ISMEMBER) { + gm->gm_flags |= KAUTH_GROUP_ISMEMBER; + } else { + gm->gm_flags &= ~KAUTH_GROUP_ISMEMBER; + } + gm->gm_expiry = el->el_member_valid + tv.tv_sec; + kauth_groups_lru(gm); + break; + } + } + KAUTH_GROUPS_UNLOCK(); + + /* if we found an entry to update, stop here */ + if (gm != NULL) + return; + + /* allocate a new record */ + MALLOC(gm, struct kauth_group_membership *, sizeof(*gm), M_KAUTH, M_WAITOK); + if (gm != NULL) { + gm->gm_uid = el->el_uid; + gm->gm_gid = el->el_gid; + if (el->el_flags & KAUTH_EXTLOOKUP_ISMEMBER) { + gm->gm_flags |= KAUTH_GROUP_ISMEMBER; + } else { + gm->gm_flags &= ~KAUTH_GROUP_ISMEMBER; + } + gm->gm_expiry = el->el_member_valid + tv.tv_sec; + } + + /* + * Insert the new entry. Note that it's possible to race ourselves here + * and end up with duplicate entries in the list. Wasteful, but harmless + * since the first into the list will never be looked up, and thus will + * eventually just fall off the end. + */ + KAUTH_GROUPS_LOCK(); + TAILQ_INSERT_HEAD(&kauth_groups, gm, gm_link); + if (kauth_groups_count++ > KAUTH_GROUPS_CACHEMAX) { + gm = TAILQ_LAST(&kauth_groups, kauth_groups_head); + TAILQ_REMOVE(&kauth_groups, gm, gm_link); + kauth_groups_count--; + } else { + gm = NULL; + } + KAUTH_GROUPS_UNLOCK(); + + /* free expired cache entry */ + if (gm != NULL) + FREE(gm, M_KAUTH); +} + +/* + * Group membership KPI + */ +/* + * This function guarantees not to modify resultp when returning an error. + */ +int +kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp) +{ + struct kauth_group_membership *gm; + struct kauth_identity_extlookup el; + int i, error; + + /* + * Check the per-credential list of override groups. + * + * We can conditionalise this on cred->cr_gmuid == KAUTH_UID_NONE since + * the cache should be used for that case. + */ + for (i = 0; i < cred->cr_ngroups; i++) { + if (gid == cred->cr_groups[i]) { + *resultp = 1; + return(0); + } + } + + /* + * If we don't have a UID for group membership checks, the in-cred list + * was authoritative and we can stop here. + */ + if (cred->cr_gmuid == KAUTH_UID_NONE) { + *resultp = 0; + return(0); + } + + + /* + * If the resolver hasn't checked in yet, we are early in the boot phase and + * the local group list is complete and authoritative. + */ + if (!kauth_resolver_registered) { + *resultp = 0; + return(0); + } + + /* TODO: */ + /* XXX check supplementary groups */ + /* XXX check whiteout groups */ + /* XXX nesting of supplementary/whiteout groups? */ + + /* + * Check the group cache. + */ + KAUTH_GROUPS_LOCK(); + TAILQ_FOREACH(gm, &kauth_groups, gm_link) { + if ((gm->gm_uid == cred->cr_gmuid) && (gm->gm_gid == gid) && !kauth_groups_expired(gm)) { + kauth_groups_lru(gm); + break; + } + } + + /* did we find a membership entry? */ + if (gm != NULL) + *resultp = (gm->gm_flags & KAUTH_GROUP_ISMEMBER) ? 1 : 0; + KAUTH_GROUPS_UNLOCK(); + + /* if we did, we can return now */ + if (gm != NULL) + return(0); + + /* nothing in the cache, need to go to userland */ + el.el_flags = KAUTH_EXTLOOKUP_VALID_UID | KAUTH_EXTLOOKUP_VALID_GID | KAUTH_EXTLOOKUP_WANT_MEMBERSHIP; + el.el_uid = cred->cr_gmuid; + el.el_gid = gid; + error = kauth_identity_resolve(&el); + if (error != 0) + return(error); + /* save the results from the lookup */ + kauth_groups_updatecache(&el); + + /* if we successfully ascertained membership, report */ + if (el.el_flags & KAUTH_EXTLOOKUP_VALID_MEMBERSHIP) { + *resultp = (el.el_flags & KAUTH_EXTLOOKUP_ISMEMBER) ? 1 : 0; + return(0); + } + + return(ENOENT); +} + +/* + * Determine whether the supplied credential is a member of the + * group nominated by GUID. + */ +int +kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp) +{ + gid_t gid; + int error, wkg; + + error = 0; + wkg = kauth_wellknown_guid(guidp); + switch(wkg) { + case KAUTH_WKG_NOBODY: + *resultp = 0; + break; + case KAUTH_WKG_EVERYBODY: + *resultp = 1; + break; + default: + /* translate guid to gid */ + if ((error = kauth_cred_guid2gid(guidp, &gid)) != 0) { + /* + * If we have no guid -> gid translation, it's not a group and + * thus the cred can't be a member. + */ + if (error == ENOENT) { + *resultp = 0; + error = 0; + } + } else { + error = kauth_cred_ismember_gid(cred, gid, resultp); + } + } + return(error); +} + +/* + * Fast replacement for issuser() + */ +int +kauth_cred_issuser(kauth_cred_t cred) +{ + return(cred->cr_uid == 0); +} + +/* + * Credential KPI + */ + +/* lock protecting credential hash table */ +static lck_mtx_t *kauth_cred_hash_mtx; +#define KAUTH_CRED_HASH_LOCK() lck_mtx_lock(kauth_cred_hash_mtx); +#define KAUTH_CRED_HASH_UNLOCK() lck_mtx_unlock(kauth_cred_hash_mtx); + +void +kauth_cred_init(void) +{ + int i; + + kauth_cred_hash_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); + kauth_cred_table_size = kauth_cred_primes[kauth_cred_primes_index]; + + /*allocate credential hash table */ + MALLOC(kauth_cred_table_anchor, struct kauth_cred_entry_head *, + (sizeof(struct kauth_cred_entry_head) * kauth_cred_table_size), + M_KAUTH, M_WAITOK | M_ZERO); + for (i = 0; i < kauth_cred_table_size; i++) { + TAILQ_INIT(&kauth_cred_table_anchor[i]); + } +} + +/* + * Return the current thread's effective UID. + */ +uid_t +kauth_getuid(void) +{ + return(kauth_cred_get()->cr_uid); +} + +/* + * Return the current thread's real UID. + */ +uid_t +kauth_getruid(void) +{ + return(kauth_cred_get()->cr_ruid); +} + +/* + * Return the current thread's effective GID. + */ +gid_t +kauth_getgid(void) +{ + return(kauth_cred_get()->cr_groups[0]); +} + +/* + * Return the current thread's real GID. + */ +gid_t +kauth_getrgid(void) +{ + return(kauth_cred_get()->cr_rgid); +} + +/* + * Returns a pointer to the current thread's credential, does not take a + * reference (so the caller must not do anything that would let the thread's + * credential change while using the returned value). + */ +kauth_cred_t +kauth_cred_get(void) +{ + struct proc *p; + struct uthread *uthread; + + uthread = get_bsdthread_info(current_thread()); + /* sanity */ + if (uthread == NULL) + panic("thread wants credential but has no BSD thread info"); + /* + * We can lazy-bind credentials to threads, as long as their processes have them. + * If we later inline this function, the code in this block should probably be + * called out in a function. + */ + if (uthread->uu_ucred == NOCRED) { + if ((p = (proc_t) get_bsdtask_info(get_threadtask(current_thread()))) == NULL) + panic("thread wants credential but has no BSD process"); + proc_lock(p); + kauth_cred_ref(uthread->uu_ucred = p->p_ucred); + proc_unlock(p); + } + return(uthread->uu_ucred); +} + +/* + * Returns a pointer to the current thread's credential, takes a reference. + */ +kauth_cred_t +kauth_cred_get_with_ref(void) +{ + struct proc *procp; + struct uthread *uthread; + + uthread = get_bsdthread_info(current_thread()); + /* sanity checks */ + if (uthread == NULL) + panic("%s - thread wants credential but has no BSD thread info", __FUNCTION__); + if ((procp = (proc_t) get_bsdtask_info(get_threadtask(current_thread()))) == NULL) + panic("%s - thread wants credential but has no BSD process", __FUNCTION__); + + /* + * We can lazy-bind credentials to threads, as long as their processes have them. + * If we later inline this function, the code in this block should probably be + * called out in a function. + */ + proc_lock(procp); + if (uthread->uu_ucred == NOCRED) { + /* take reference for new cred in thread */ + kauth_cred_ref(uthread->uu_ucred = proc_ucred(procp)); + } + /* take a reference for our caller */ + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(procp); + return(uthread->uu_ucred); +} + +/* + * Returns a pointer to the given process's credential, takes a reference. + */ +kauth_cred_t +kauth_cred_proc_ref(proc_t procp) +{ + kauth_cred_t cred; + + proc_lock(procp); + cred = proc_ucred(procp); + kauth_cred_ref(cred); + proc_unlock(procp); + return(cred); +} + +/* + * Allocates a new credential. + */ +kauth_cred_t +kauth_cred_alloc(void) +{ + kauth_cred_t newcred; + + MALLOC(newcred, kauth_cred_t, sizeof(*newcred), M_KAUTH, M_WAITOK | M_ZERO); + if (newcred != 0) { + newcred->cr_ref = 1; + /* must do this, or cred has same group membership as uid 0 */ + newcred->cr_gmuid = KAUTH_UID_NONE; +#if CRED_DIAGNOSTIC + } else { + panic("kauth_cred_alloc: couldn't allocate credential"); +#endif + } + +#if KAUTH_CRED_HASH_DEBUG + kauth_cred_count++; +#endif + + return(newcred); +} + +/* + * Looks to see if we already have a known credential and if found bumps the + * reference count and returns it. If there are no credentials that match + * the given credential then we allocate a new credential. + * + * Note that the gmuid is hard-defaulted to the UID specified. Since we maintain + * this field, we can't expect callers to know how it needs to be set. Callers + * should be prepared for this field to be overwritten. + */ +kauth_cred_t +kauth_cred_create(kauth_cred_t cred) +{ + kauth_cred_t found_cred, new_cred = NULL; + + cred->cr_gmuid = cred->cr_uid; + + for (;;) { + KAUTH_CRED_HASH_LOCK(); + found_cred = kauth_cred_find(cred); + if (found_cred != NULL) { + /* found an existing credential so we'll bump reference count and return */ + kauth_cred_ref(found_cred); + KAUTH_CRED_HASH_UNLOCK(); + return(found_cred); + } + KAUTH_CRED_HASH_UNLOCK(); + + /* no existing credential found. create one and add it to our hash table */ + new_cred = kauth_cred_alloc(); + if (new_cred != NULL) { + int err; + new_cred->cr_uid = cred->cr_uid; + new_cred->cr_ruid = cred->cr_ruid; + new_cred->cr_svuid = cred->cr_svuid; + new_cred->cr_rgid = cred->cr_rgid; + new_cred->cr_svgid = cred->cr_svgid; + new_cred->cr_gmuid = cred->cr_gmuid; + new_cred->cr_ngroups = cred->cr_ngroups; + bcopy(&cred->cr_groups[0], &new_cred->cr_groups[0], sizeof(new_cred->cr_groups)); + KAUTH_CRED_HASH_LOCK(); + err = kauth_cred_add(new_cred); + KAUTH_CRED_HASH_UNLOCK(); + + /* retry if kauth_cred_add returns non zero value */ + if (err == 0) + break; + FREE(new_cred, M_KAUTH); + new_cred = NULL; + } + } + + return(new_cred); +} + +/* + * Update the given credential using the uid argument. The given uid is used + * set the effective user ID, real user ID, and saved user ID. We only + * allocate a new credential when the given uid actually results in changes to + * the existing credential. + */ +kauth_cred_t +kauth_cred_setuid(kauth_cred_t cred, uid_t uid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the effective, real and saved user IDs are + * already the same as the user ID passed in + */ + if (cred->cr_uid == uid && cred->cr_ruid == uid && cred->cr_svuid == uid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_uid = uid; + temp_cred.cr_ruid = uid; + temp_cred.cr_svuid = uid; + temp_cred.cr_gmuid = uid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the euid argument. The given uid is used + * set the effective user ID. We only allocate a new credential when the given + * uid actually results in changes to the existing credential. + */ +kauth_cred_t +kauth_cred_seteuid(kauth_cred_t cred, uid_t euid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the given effective user ID is already the + * same as the effective user ID in the credential. + */ + if (cred->cr_uid == euid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_uid = euid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the gid argument. The given gid is used + * set the effective group ID, real group ID, and saved group ID. We only + * allocate a new credential when the given gid actually results in changes to + * the existing credential. + */ +kauth_cred_t +kauth_cred_setgid(kauth_cred_t cred, gid_t gid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the given group ID is already the + * same as the group ID in the credential. + */ + if (cred->cr_groups[0] == gid && cred->cr_rgid == gid && cred->cr_svgid == gid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_groups[0] = gid; + temp_cred.cr_rgid = gid; + temp_cred.cr_svgid = gid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the egid argument. The given gid is used + * set the effective user ID. We only allocate a new credential when the given + * gid actually results in changes to the existing credential. + */ +kauth_cred_t +kauth_cred_setegid(kauth_cred_t cred, gid_t egid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the given group ID is already the + * same as the group Id in the credential. + */ + if (cred->cr_groups[0] == egid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_groups[0] = egid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential with the given groups. We only allocate a new + * credential when the given gid actually results in changes to the existing + * credential. + * The gmuid argument supplies a new uid (or KAUTH_UID_NONE to opt out) + * which will be used for group membership checking. + */ +kauth_cred_t +kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmuid) +{ + int i; + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the given list of groups does not change. + */ + if ((cred->cr_gmuid == gmuid) && (cred->cr_ngroups == groupcount)) { + for (i = 0; i < groupcount; i++) { + if (cred->cr_groups[i] != groups[i]) + break; + } + if (i == groupcount) { + /* no change needed */ + return(cred); + } + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_ngroups = groupcount; + bcopy(groups, temp_cred.cr_groups, sizeof(temp_cred.cr_groups)); + temp_cred.cr_gmuid = gmuid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the uid and gid arguments. The given uid + * is used set the effective user ID, real user ID, and saved user ID. + * The given gid is used set the effective group ID, real group ID, and saved + * group ID. + * We only allocate a new credential when the given uid and gid actually results + * in changes to the existing credential. + */ +kauth_cred_t +kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the effective, real and saved user IDs are + * already the same as the user ID passed in + */ + if (cred->cr_uid == uid && cred->cr_ruid == uid && cred->cr_svuid == uid && + cred->cr_groups[0] == gid && cred->cr_rgid == gid && cred->cr_svgid == gid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bzero(&temp_cred, sizeof(temp_cred)); + temp_cred.cr_uid = uid; + temp_cred.cr_ruid = uid; + temp_cred.cr_svuid = uid; + temp_cred.cr_gmuid = uid; + temp_cred.cr_ngroups = 1; + temp_cred.cr_groups[0] = gid; + temp_cred.cr_rgid = gid; + temp_cred.cr_svgid = gid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the uid and gid arguments. The given uid + * is used to set the saved user ID. The given gid is used to set the + * saved group ID. + * We only allocate a new credential when the given uid and gid actually results + * in changes to the existing credential. + */ +kauth_cred_t +kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the effective, real and saved user IDs are + * already the same as the user ID passed in + */ + if (cred->cr_svuid == uid && cred->cr_svgid == gid) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_svuid = uid; + temp_cred.cr_svgid = gid; + + return(kauth_cred_update(cred, &temp_cred, TRUE)); +} + +/* + * Update the given credential using the given auditinfo_t. + * We only allocate a new credential when the given auditinfo_t actually results + * in changes to the existing credential. + */ +kauth_cred_t +kauth_cred_setauditinfo(kauth_cred_t cred, auditinfo_t *auditinfo_p) +{ + struct ucred temp_cred; + + NULLCRED_CHECK(cred); + + /* don't need to do anything if the audit info is already the same as the + * audit info in the credential passed in + */ + if (bcmp(&cred->cr_au, auditinfo_p, sizeof(cred->cr_au)) == 0) { + /* no change needed */ + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + bcopy(auditinfo_p, &temp_cred.cr_au, sizeof(temp_cred.cr_au)); + + return(kauth_cred_update(cred, &temp_cred, FALSE)); +} + +/* + * Add a reference to the passed credential. + */ +void +kauth_cred_ref(kauth_cred_t cred) +{ + int old_value; + + NULLCRED_CHECK(cred); + + old_value = OSAddAtomic(1, &cred->cr_ref); + + if (old_value < 1) + panic("kauth_cred_ref: trying to take a reference on a cred with no references"); + + return; +} + +/* + * Drop a reference from the passed credential, potentially destroying it. + */ +void +kauth_cred_rele(kauth_cred_t cred) +{ + int old_value; + + NULLCRED_CHECK(cred); + + KAUTH_CRED_HASH_LOCK(); + old_value = OSAddAtomic(-1, &cred->cr_ref); + +#if DIAGNOSTIC + if (old_value == 0) + panic("kauth_cred_rele: dropping a reference on a cred with no references"); +#endif + + if (old_value < 3) { + /* the last reference is our credential hash table */ + kauth_cred_remove(cred); + } + KAUTH_CRED_HASH_UNLOCK(); +} + +/* + * Duplicate a credential. + * NOTE - caller should call kauth_cred_add after any credential changes are made. + */ +kauth_cred_t +kauth_cred_dup(kauth_cred_t cred) +{ + kauth_cred_t newcred; + +#if CRED_DIAGNOSTIC + if (cred == NOCRED || cred == FSCRED) + panic("kauth_cred_dup: bad credential"); +#endif + newcred = kauth_cred_alloc(); + if (newcred != NULL) { + bcopy(cred, newcred, sizeof(*newcred)); + newcred->cr_ref = 1; + } + return(newcred); +} + +/* + * Returns a credential based on the passed credential but which + * reflects the real rather than effective UID and GID. + * NOTE - we do NOT decrement cred reference count on passed in credential + */ +kauth_cred_t +kauth_cred_copy_real(kauth_cred_t cred) +{ + kauth_cred_t newcred = NULL, found_cred; + struct ucred temp_cred; + + /* if the credential is already 'real', just take a reference */ + if ((cred->cr_ruid == cred->cr_uid) && + (cred->cr_rgid == cred->cr_gid)) { + kauth_cred_ref(cred); + return(cred); + } + + /* look up in cred hash table to see if we have a matching credential + * with new values. + */ + bcopy(cred, &temp_cred, sizeof(temp_cred)); + temp_cred.cr_uid = cred->cr_ruid; + temp_cred.cr_groups[0] = cred->cr_rgid; + /* if the cred is not opted out, make sure we are using the r/euid for group checks */ + if (temp_cred.cr_gmuid != KAUTH_UID_NONE) + temp_cred.cr_gmuid = cred->cr_ruid; + + for (;;) { + int err; + + KAUTH_CRED_HASH_LOCK(); + found_cred = kauth_cred_find(&temp_cred); + if (found_cred == cred) { + /* same cred so just bail */ + KAUTH_CRED_HASH_UNLOCK(); + return(cred); + } + if (found_cred != NULL) { + /* found a match so we bump reference count on new one and decrement + * reference count on the old one. + */ + kauth_cred_ref(found_cred); + KAUTH_CRED_HASH_UNLOCK(); + return(found_cred); + } + + /* must allocate a new credential, copy in old credential data and update + * with real user and group IDs. + */ + newcred = kauth_cred_dup(&temp_cred); + err = kauth_cred_add(newcred); + KAUTH_CRED_HASH_UNLOCK(); + + /* retry if kauth_cred_add returns non zero value */ + if (err == 0) + break; + FREE(newcred, M_KAUTH); + newcred = NULL; + } + + return(newcred); +} + +/* + * common code to update a credential. model_cred is a temporary, non reference + * counted credential used only for comparison and modeling purposes. old_cred + * is a live reference counted credential that we intend to update using model_cred + * as our model. + */ +static kauth_cred_t kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t model_cred, boolean_t retain_auditinfo) +{ + kauth_cred_t found_cred, new_cred = NULL; + + /* make sure we carry the auditinfo forward to the new credential unless + * we are actually updating the auditinfo. + */ + if (retain_auditinfo) + bcopy(&old_cred->cr_au, &model_cred->cr_au, sizeof(model_cred->cr_au)); + + for (;;) { + int err; + + KAUTH_CRED_HASH_LOCK(); + found_cred = kauth_cred_find(model_cred); + if (found_cred == old_cred) { + /* same cred so just bail */ + KAUTH_CRED_HASH_UNLOCK(); + return(old_cred); + } + if (found_cred != NULL) { + /* found a match so we bump reference count on new one and decrement + * reference count on the old one. + */ + kauth_cred_ref(found_cred); + KAUTH_CRED_HASH_UNLOCK(); + kauth_cred_rele(old_cred); + return(found_cred); + } + + /* must allocate a new credential using the model. also + * adds the new credential to the credential hash table. + */ + new_cred = kauth_cred_dup(model_cred); + err = kauth_cred_add(new_cred); + KAUTH_CRED_HASH_UNLOCK(); + + /* retry if kauth_cred_add returns non zero value */ + if (err == 0) + break; + FREE(new_cred, M_KAUTH); + new_cred = NULL; + } + + kauth_cred_rele(old_cred); + return(new_cred); +} + +/* + * Add the given credential to our credential hash table and take an additional + * reference to account for our use of the credential in the hash table. + * NOTE - expects caller to hold KAUTH_CRED_HASH_LOCK! + */ +static int kauth_cred_add(kauth_cred_t new_cred) +{ + u_long hash_key; + + hash_key = kauth_cred_get_hashkey(new_cred); + hash_key %= kauth_cred_table_size; + + /* race fix - there is a window where another matching credential + * could have been inserted between the time this one was created and we + * got the hash lock. If we find a match return an error and have the + * the caller retry. + */ + if (kauth_cred_find(new_cred) != NULL) { + return(-1); + } + + /* take a reference for our use in credential hash table */ + kauth_cred_ref(new_cred); + + /* insert the credential into the hash table */ + TAILQ_INSERT_HEAD(&kauth_cred_table_anchor[hash_key], new_cred, cr_link); + + return(0); +} + +/* + * Remove the given credential from our credential hash table. + * NOTE - expects caller to hold KAUTH_CRED_HASH_LOCK! + */ +static void kauth_cred_remove(kauth_cred_t cred) +{ + u_long hash_key; + kauth_cred_t found_cred; + + hash_key = kauth_cred_get_hashkey(cred); + hash_key %= kauth_cred_table_size; + + /* avoid race */ + if (cred->cr_ref < 1) + panic("cred reference underflow"); + if (cred->cr_ref > 1) + return; /* someone else got a ref */ + + /* find cred in the credential hash table */ + TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) { + if (found_cred == cred) { + /* found a match, remove it from the hash table */ + TAILQ_REMOVE(&kauth_cred_table_anchor[hash_key], found_cred, cr_link); + FREE(cred, M_KAUTH); +#if KAUTH_CRED_HASH_DEBUG + kauth_cred_count--; +#endif + return; + } + } + + /* did not find a match. this should not happen! */ + printf("%s - %d - %s - did not find a match \n", __FILE__, __LINE__, __FUNCTION__); + return; +} + +/* + * Using the given credential data, look for a match in our credential hash + * table. + * NOTE - expects caller to hold KAUTH_CRED_HASH_LOCK! + */ +kauth_cred_t kauth_cred_find(kauth_cred_t cred) +{ + u_long hash_key; + kauth_cred_t found_cred; + +#if KAUTH_CRED_HASH_DEBUG + static int test_count = 0; + + test_count++; + if ((test_count % 200) == 0) { + kauth_cred_hash_print(); + } +#endif + + hash_key = kauth_cred_get_hashkey(cred); + hash_key %= kauth_cred_table_size; + + /* find cred in the credential hash table */ + TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) { + if (bcmp(&found_cred->cr_uid, &cred->cr_uid, (sizeof(struct ucred) - offsetof(struct ucred, cr_uid))) == 0) { + /* found a match */ + return(found_cred); + } + } + /* no match found */ + return(NULL); +} + +/* + * Generates a hash key using data that makes up a credential. Based on ElfHash. + */ +static u_long kauth_cred_get_hashkey(kauth_cred_t cred) +{ + u_long hash_key = 0; + + hash_key = kauth_cred_hash((uint8_t *)&cred->cr_uid, + (sizeof(struct ucred) - offsetof(struct ucred, cr_uid)), + hash_key); + return(hash_key); +} + +/* + * Generates a hash key using data that makes up a credential. Based on ElfHash. + */ +static inline u_long kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key) +{ + u_long hash_key = start_key; + u_long temp; + + while (data_len > 0) { + hash_key = (hash_key << 4) + *datap++; + temp = hash_key & 0xF0000000; + if (temp) { + hash_key ^= temp >> 24; + } + hash_key &= ~temp; + data_len--; + } + return(hash_key); +} + +#if KAUTH_CRED_HASH_DEBUG +static void kauth_cred_hash_print(void) +{ + int i, j; + kauth_cred_t found_cred; + + printf("\n\t kauth credential hash table statistics - current cred count %d \n", kauth_cred_count); + /* count slot hits, misses, collisions, and max depth */ + for (i = 0; i < kauth_cred_table_size; i++) { + printf("[%02d] ", i); + j = 0; + TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[i], cr_link) { + if (j > 0) { + printf("---- "); + } + j++; + kauth_cred_print(found_cred); + printf("\n"); + } + if (j == 0) { + printf("NOCRED \n"); + } + } +} + + +static void kauth_cred_print(kauth_cred_t cred) +{ + int i; + + printf("0x%02X - refs %d uids %d %d %d ", cred, cred->cr_ref, cred->cr_uid, cred->cr_ruid, cred->cr_svuid); + printf("group count %d gids ", cred->cr_ngroups); + for (i = 0; i < NGROUPS; i++) { + printf("%d ", cred->cr_groups[i]); + } + printf("%d %d %d ", cred->cr_rgid, cred->cr_svgid, cred->cr_gmuid); + printf("auditinfo %d %d %d %d %d %d ", + cred->cr_au.ai_auid, cred->cr_au.ai_mask.am_success, cred->cr_au.ai_mask.am_failure, + cred->cr_au.ai_termid.port, cred->cr_au.ai_termid.machine, cred->cr_au.ai_asid); + +} +#endif diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index d72fb8932..b0c759539 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -64,61 +64,145 @@ #include #include #include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include #include +#include #include #include #include #include +#include +#include #include -#include - +#include +#include +#include +#include +#include +#include + +struct psemnode; +struct pshmnode; + +int fdopen(dev_t dev, int mode, int type, struct proc *p); +int ogetdtablesize(struct proc *p, void *uap, register_t *retval); +int finishdup(struct proc * p, struct filedesc *fdp, int old, int new, register_t *retval); + +int closef(struct fileglob *fg, struct proc *p); +int falloc_locked(struct proc *p, struct fileproc **resultfp, int *resultfd, int locked); +void fddrop(struct proc *p, int fd); +int fdgetf_noref(struct proc *p, int fd, struct fileproc **resultfp); +void fg_drop(struct fileproc * fp); +void fg_free(struct fileglob *fg); +void fg_ref(struct fileproc * fp); +int fp_getfpshm(struct proc *p, int fd, struct fileproc **resultfp, struct pshmnode **resultpshm); + +static int closef_finish(struct fileproc *fp, struct fileglob *fg, struct proc *p); + +extern void file_lock_init(void); +extern int is_suser(void); +extern int kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p); +extern int soo_stat(struct socket *so, struct stat *ub); +extern int vn_path_package_check(vnode_t vp, char *path, int pathlen, int *component); + +extern kauth_scope_t kauth_scope_fileop; + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data /* * Descriptor management. */ struct filelist filehead; /* head of list of open files */ +struct fmsglist fmsghead; /* head of list of open files */ +struct fmsglist fmsg_ithead; /* head of list of open files */ int nfiles; /* actual number of open files */ -static int frele_internal(struct file *); + +lck_grp_attr_t * file_lck_grp_attr; +lck_grp_t * file_lck_grp; +lck_attr_t * file_lck_attr; + +lck_mtx_t * uipc_lock; +lck_mtx_t * file_iterate_lcok; +lck_mtx_t * file_flist_lock; + + +void +file_lock_init(void) +{ + + /* allocate file lock group attribute and group */ + file_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(file_lck_grp_attr); + + file_lck_grp = lck_grp_alloc_init("file", file_lck_grp_attr); + + /* Allocate file lock attribute */ + file_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(file_lck_attr); + + uipc_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr); + file_iterate_lcok = lck_mtx_alloc_init(file_lck_grp, file_lck_attr); + file_flist_lock = lck_mtx_alloc_init(file_lck_grp, file_lck_attr); + + + +} + + +void +proc_fdlock(struct proc *p) +{ + lck_mtx_lock(&p->p_fdmlock); +} + +void +proc_fdunlock(struct proc *p) +{ + lck_mtx_unlock(&p->p_fdmlock); +} /* * System calls on descriptors. */ -/* ARGSUSED */ + int -getdtablesize(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +getdtablesize(struct proc *p, __unused struct getdtablesize_args *uap, register_t *retval) { + proc_fdlock(p); *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); + proc_fdunlock(p); + return (0); } -/* ARGSUSED */ int -ogetdtablesize(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +ogetdtablesize(struct proc *p, __unused void *uap, register_t *retval) { + proc_fdlock(p); *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, NOFILE); + proc_fdunlock(p); + return (0); } -static __inline__ -void _fdrelse(fdp, fd) - register struct filedesc *fdp; - register int fd; +static __inline__ void +_fdrelse(struct filedesc *fdp, int fd) { if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; @@ -128,6 +212,7 @@ void _fdrelse(fdp, fd) #endif fdp->fd_ofiles[fd] = NULL; fdp->fd_ofileflags[fd] = 0; + while ((fd = fdp->fd_lastfile) > 0 && fdp->fd_ofiles[fd] == NULL && !(fdp->fd_ofileflags[fd] & UF_RESERVED)) @@ -137,9 +222,6 @@ void _fdrelse(fdp, fd) /* * Duplicate a file descriptor. */ -struct dup_args { - u_int fd; -}; /* ARGSUSED */ int dup(p, uap, retval) @@ -150,23 +232,28 @@ dup(p, uap, retval) register struct filedesc *fdp = p->p_fd; register int old = uap->fd; int new, error; + struct fileproc *fp; - if ((u_int)old >= fdp->fd_nfiles || - fdp->fd_ofiles[old] == NULL || - (fdp->fd_ofileflags[old] & UF_RESERVED)) - return (EBADF); - if (error = fdalloc(p, 0, &new)) + proc_fdlock(p); + if ( (error = fp_lookup(p, old, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + if ( (error = fdalloc(p, 0, &new)) ) { + fp_drop(p, old, fp, 1); + proc_fdunlock(p); return (error); - return (finishdup(fdp, old, new, retval)); + } + error = finishdup(p, fdp, old, new, retval); + fp_drop(p, old, fp, 1); + proc_fdunlock(p); + + return (error); } /* * Duplicate a file descriptor to a particular value. */ -struct dup2_args { - u_int from; - u_int to; -}; /* ARGSUSED */ int dup2(p, uap, retval) @@ -177,224 +264,308 @@ dup2(p, uap, retval) register struct filedesc *fdp = p->p_fd; register int old = uap->from, new = uap->to; int i, error; + struct fileproc *fp; - if ((u_int)old >= fdp->fd_nfiles || - fdp->fd_ofiles[old] == NULL || - (fdp->fd_ofileflags[old] & UF_RESERVED) || - (u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || - (u_int)new >= maxfiles) + proc_fdlock(p); + + if ( (error = fp_lookup(p, old, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + if (new < 0 || + new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || + new >= maxfiles) { + fp_drop(p, old, fp, 1); + proc_fdunlock(p); return (EBADF); + } if (old == new) { + fp_drop(p, old, fp, 1); *retval = new; + proc_fdunlock(p); return (0); } - if ((u_int)new >= fdp->fd_nfiles) { - if (error = fdalloc(p, new, &i)) + if (new < 0 || new >= fdp->fd_nfiles) { + if ( (error = fdalloc(p, new, &i)) ) { + fp_drop(p, old, fp, 1); + proc_fdunlock(p); return (error); + } if (new != i) { _fdrelse(fdp, i); goto closeit; } } else { - struct file **fpp; + struct fileproc **fpp; char flags; closeit: - if ((flags = fdp->fd_ofileflags[new]) & UF_RESERVED) + flags = fdp->fd_ofileflags[new]; + if ((flags & (UF_RESERVED | UF_CLOSING)) == UF_RESERVED) { + fp_drop(p, old, fp, 1); + proc_fdunlock(p); return (EBADF); - fdp->fd_ofileflags[new] = (flags & ~UF_MAPPED) | UF_RESERVED; + } + /* * dup2() must succeed even if the close has an error. */ if (*(fpp = &fdp->fd_ofiles[new])) { - struct file *fp = *fpp; + struct fileproc *nfp = *fpp; + close_internal(p, new, nfp, (CLOSEINT_LOCKED | CLOSEINT_WAITONCLOSE | CLOSEINT_NOFDRELSE | CLOSEINT_NOFDNOREF)); *fpp = NULL; - (void) closef(fp, p); } } - return (finishdup(fdp, old, new, retval)); + error = finishdup(p, fdp, old, new, retval); + fp_drop(p, old, fp, 1); + proc_fdunlock(p); + + return(error); } /* * The file control system call. */ -struct fcntl_args { - int fd; - int cmd; - int arg; -}; -/* ARGSUSED */ int fcntl(p, uap, retval) struct proc *p; - register struct fcntl_args *uap; + struct fcntl_args *uap; register_t *retval; { int fd = uap->fd; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - register char *pop; - struct vnode *vp, *devvp; + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + char *pop; + struct vnode *vp; int i, tmp, error, error2, flg = F_POSIX; struct flock fl; - fstore_t alloc_struct; /* structure for allocate command */ - u_int32_t alloc_flags = 0; - off_t offset; /* used for F_SETSIZE */ + struct vfs_context context; + off_t offset; int newmin; - struct radvisory ra_struct; - fbootstraptransfer_t fbt_struct; /* for F_READBOOTSTRAP and F_WRITEBOOTSTRAP */ - struct log2phys l2p_struct; /* structure for allocate command */ - daddr_t lbn, bn; + daddr64_t lbn, bn; int devBlockSize = 0; + unsigned int fflag; + user_addr_t argp; AUDIT_ARG(fd, uap->fd); AUDIT_ARG(cmd, uap->cmd); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); + + proc_fdlock(p); + if ( (error = fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + context.vc_proc = p; + context.vc_ucred = fp->f_cred; + if (proc_is64bit(p)) { + argp = uap->arg; + } + else { + /* since the arg parameter is defined as a long but may be either + * a long or a pointer we must take care to handle sign extension + * issues. Our sys call munger will sign extend a long when we are + * called from a 32-bit process. Since we can never have an address + * greater than 32-bits from a 32-bit process we lop off the top + * 32-bits to avoid getting the wrong address + */ + argp = CAST_USER_ADDR_T(uap->arg); + } + pop = &fdp->fd_ofileflags[fd]; switch (uap->cmd) { case F_DUPFD: - newmin = (long)uap->arg; + newmin = CAST_DOWN(int, uap->arg); if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || - (u_int)newmin >= maxfiles) - return (EINVAL); - if (error = fdalloc(p, newmin, &i)) - return (error); - return (finishdup(fdp, fd, i, retval)); + newmin >= maxfiles) { + error = EINVAL; + goto out; + } + if ( (error = fdalloc(p, newmin, &i)) ) + goto out; + error = finishdup(p, fdp, fd, i, retval); + goto out; case F_GETFD: *retval = (*pop & UF_EXCLOSE)? 1 : 0; - return (0); + error = 0; + goto out; case F_SETFD: *pop = (*pop &~ UF_EXCLOSE) | - ((long)(uap->arg) & 1)? UF_EXCLOSE : 0; - return (0); + (uap->arg & 1)? UF_EXCLOSE : 0; + error = 0; + goto out; case F_GETFL: *retval = OFLAGS(fp->f_flag); - return (0); + error = 0; + goto out; case F_SETFL: fp->f_flag &= ~FCNTLFLAGS; - fp->f_flag |= FFLAGS((long)uap->arg) & FCNTLFLAGS; + tmp = CAST_DOWN(int, uap->arg); + fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); if (error) - return (error); + goto out; tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); if (!error) - return (0); + goto out; fp->f_flag &= ~FNONBLOCK; tmp = 0; (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); - return (error); + goto out; case F_GETOWN: if (fp->f_type == DTYPE_SOCKET) { *retval = ((struct socket *)fp->f_data)->so_pgid; - return (0); + error = 0; + goto out; } error = fo_ioctl(fp, (int)TIOCGPGRP, (caddr_t)retval, p); *retval = -*retval; - return (error); + goto out; case F_SETOWN: + tmp = CAST_DOWN(pid_t, uap->arg); if (fp->f_type == DTYPE_SOCKET) { - ((struct socket *)fp->f_data)->so_pgid = - (long)uap->arg; - return (0); + ((struct socket *)fp->f_data)->so_pgid = tmp; + error =0; + goto out; } - if ((long)uap->arg <= 0) { - uap->arg = (int)(-(long)(uap->arg)); + if (fp->f_type == DTYPE_PIPE) { + error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); + goto out; + } + + if (tmp <= 0) { + tmp = -tmp; } else { - struct proc *p1 = pfind((long)uap->arg); - if (p1 == 0) - return (ESRCH); - uap->arg = (int)p1->p_pgrp->pg_id; + struct proc *p1 = pfind(tmp); + if (p1 == 0) { + error = ESRCH; + goto out; + } + tmp = (int)p1->p_pgrp->pg_id; } - return (fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p)); + error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); + goto out; case F_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ case F_SETLK: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + fflag = fp->f_flag; + offset = fp->f_offset; + proc_fdunlock(p); + /* Copy in the lock structure */ - error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); - if (error) - break; + error = copyin(argp, (caddr_t)&fl, sizeof (fl)); + if (error) { + goto outdrop; + } + if ( (error = vnode_getwithref(vp)) ) { + goto outdrop; + } if (fl.l_whence == SEEK_CUR) - fl.l_start += fp->f_offset; + fl.l_start += offset; + switch (fl.l_type) { case F_RDLCK: - if ((fp->f_flag & FREAD) != 0) { - p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg); - } else + if ((fflag & FREAD) == 0) { + (void)vnode_put(vp); error = EBADF; - break; + goto outdrop; + } + p->p_flag |= P_ADVLOCK; + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); + (void)vnode_put(vp); + goto outdrop; case F_WRLCK: - if ((fp->f_flag & FWRITE) != 0) { - p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg); - } else + if ((fflag & FWRITE) == 0) { + (void)vnode_put(vp); error = EBADF; - break; + goto outdrop; + } + p->p_flag |= P_ADVLOCK; + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); + (void)vnode_put(vp); + goto outdrop; case F_UNLCK: - error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, F_POSIX); - break; + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, + F_POSIX, &context); + (void)vnode_put(vp); + goto outdrop; default: + (void)vnode_put(vp); error = EINVAL; - break; + goto outdrop; } - break; case F_GETLK: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + offset = fp->f_offset; + proc_fdunlock(p); + /* Copy in the lock structure */ - error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); + error = copyin(argp, (caddr_t)&fl, sizeof (fl)); if (error) - break; - if (fl.l_whence == SEEK_CUR) - fl.l_start += fp->f_offset; - error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX); - if (error) - break; - error = copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)); - break; + goto outdrop; + + if ( (error = vnode_getwithref(vp)) == 0 ) { + if (fl.l_whence == SEEK_CUR) + fl.l_start += offset; + + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX, &context); + + (void)vnode_put(vp); + + if (error == 0) + error = copyout((caddr_t)&fl, argp, sizeof (fl)); + } + goto outdrop; + + case F_PREALLOCATE: { + fstore_t alloc_struct; /* structure for allocate command */ + u_int32_t alloc_flags = 0; + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } - case F_PREALLOCATE: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); /* make sure that we have write permission */ if ((fp->f_flag & FWRITE) == 0) { error = EBADF; - break; + goto outdrop; } - error = copyin((caddr_t)uap->arg, (caddr_t)&alloc_struct, - sizeof (alloc_struct)); + error = copyin(argp, (caddr_t)&alloc_struct, sizeof (alloc_struct)); if (error) - break; + goto outdrop; /* now set the space allocated to 0 */ alloc_struct.fst_bytesalloc = 0; @@ -421,53 +592,55 @@ fcntl(p, uap, retval) switch (alloc_struct.fst_posmode) { case F_PEOFPOSMODE: - if (alloc_struct.fst_offset == 0) - alloc_flags |= ALLOCATEFROMPEOF; - else + if (alloc_struct.fst_offset != 0) { error = EINVAL; + goto outdrop; + } + + alloc_flags |= ALLOCATEFROMPEOF; break; case F_VOLPOSMODE: - if (alloc_struct.fst_offset > 0) - alloc_flags |= ALLOCATEFROMVOL; - else + if (alloc_struct.fst_offset <= 0) { error = EINVAL; + goto outdrop; + } + + alloc_flags |= ALLOCATEFROMVOL; break; - default: + default: { error = EINVAL; - break; + goto outdrop; + } } + if ( (error = vnode_getwithref(vp)) == 0 ) { + /* + * call allocate to get the space + */ + error = VNOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags, + &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset, + &context); + (void)vnode_put(vp); - if (error) - break; + error2 = copyout((caddr_t)&alloc_struct, argp, sizeof (alloc_struct)); - /* lock the vnode and call allocate to get the space */ - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) - break; - error = VOP_ALLOCATE(vp,alloc_struct.fst_length,alloc_flags, - &alloc_struct.fst_bytesalloc, alloc_struct.fst_offset, - fp->f_cred, p); - VOP_UNLOCK(vp, 0, p); - - if (error2 = copyout((caddr_t)&alloc_struct, - (caddr_t)uap->arg, - sizeof (alloc_struct))) { - if (!error) + if (error == 0) error = error2; } - break; + goto outdrop; + } case F_SETSIZE: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); - vp = (struct vnode *)fp->f_data; + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + proc_fdunlock(p); - error = copyin((caddr_t)uap->arg, (caddr_t)&offset, - sizeof (off_t)); + error = copyin(argp, (caddr_t)&offset, sizeof (off_t)); if (error) - break; + goto outdrop; /* * Make sure that we are root. Growing a file @@ -477,390 +650,613 @@ fcntl(p, uap, retval) if (!is_suser()) { error = EACCES; - break; + goto outdrop; } + vp = (struct vnode *)fp->f_data; - /* lock the vnode and call allocate to get the space */ - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) - break; - error = VOP_TRUNCATE(vp,offset,IO_NOZEROFILL,fp->f_cred,p); - VOP_UNLOCK(vp,0,p); - break; + if ( (error = vnode_getwithref(vp)) == 0 ) { + /* + * set the file size + */ + error = vnode_setsize(vp, offset, IO_NOZEROFILL, &context); + + (void)vnode_put(vp); + } + goto outdrop; case F_RDAHEAD: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - simple_lock(&vp->v_interlock); - if (uap->arg) - vp->v_flag &= ~VRAOFF; - else - vp->v_flag |= VRAOFF; - simple_unlock(&vp->v_interlock); - error = 0; - break; + if ( (error = vnode_getwithref(vp)) == 0) { + if (uap->arg) + vnode_clearnoreadahead(vp); + else + vnode_setnoreadahead(vp); + + (void)vnode_put(vp); + } + goto outdrop; case F_NOCACHE: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - simple_lock(&vp->v_interlock); - if (uap->arg) - vp->v_flag |= VNOCACHE_DATA; - else - vp->v_flag &= ~VNOCACHE_DATA; - simple_unlock(&vp->v_interlock); - error = 0; - break; + if ( (error = vnode_getwithref(vp)) == 0 ) { + if (uap->arg) + vnode_setnocache(vp); + else + vnode_clearnocache(vp); - case F_RDADVISE: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); - vp = (struct vnode *)fp->f_data; + (void)vnode_put(vp); + } + goto outdrop; - if (error = copyin((caddr_t)uap->arg, - (caddr_t)&ra_struct, sizeof (ra_struct))) - break; - error = VOP_IOCTL(vp, 1, (caddr_t)&ra_struct, 0, fp->f_cred, p); - break; + case F_RDADVISE: { + struct radvisory ra_struct; - case F_CHKCLEAN: - /* - * used by regression test to determine if - * all the dirty pages (via write) have been cleaned - * after a call to 'fsysnc'. - */ - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - error = VOP_IOCTL(vp, 5, 0, 0, fp->f_cred, p); - break; + if ( (error = copyin(argp, (caddr_t)&ra_struct, sizeof (ra_struct))) ) + goto outdrop; + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = VNOP_IOCTL(vp, F_RDADVISE, (caddr_t)&ra_struct, 0, &context); + + (void)vnode_put(vp); + } + goto outdrop; + } case F_READBOOTSTRAP: - case F_WRITEBOOTSTRAP: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + case F_WRITEBOOTSTRAP: { + fbootstraptransfer_t fbt_struct; + user_fbootstraptransfer_t user_fbt_struct; + int sizeof_struct; + caddr_t boot_structp; + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - error = copyin((caddr_t)uap->arg, (caddr_t)&fbt_struct, - sizeof (fbt_struct)); + if (IS_64BIT_PROCESS(p)) { + sizeof_struct = sizeof(user_fbt_struct); + boot_structp = (caddr_t) &user_fbt_struct; + } + else { + sizeof_struct = sizeof(fbt_struct); + boot_structp = (caddr_t) &fbt_struct; + } + error = copyin(argp, boot_structp, sizeof_struct); if (error) - break; - + goto outdrop; + if ( (error = vnode_getwithref(vp)) ) { + goto outdrop; + } if (uap->cmd == F_WRITEBOOTSTRAP) { - /* - * Make sure that we are root. Updating the - * bootstrap on a disk could be a security hole - */ + /* + * Make sure that we are root. Updating the + * bootstrap on a disk could be a security hole + */ if (!is_suser()) { + (void)vnode_put(vp); error = EACCES; - break; + goto outdrop; } } - - if (vp->v_tag != VT_HFS) /* XXX */ + if (strcmp(vnode_mount(vp)->mnt_vfsstat.f_fstypename, "hfs") != 0) { error = EINVAL; - else { - /* lock the vnode and call VOP_IOCTL to handle the I/O */ - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) - break; - error = VOP_IOCTL(vp, (uap->cmd == F_WRITEBOOTSTRAP) ? 3 : 2, - (caddr_t)&fbt_struct, 0, fp->f_cred, p); - VOP_UNLOCK(vp,0,p); + } else { + /* + * call vnop_ioctl to handle the I/O + */ + error = VNOP_IOCTL(vp, uap->cmd, boot_structp, 0, &context); } - break; + (void)vnode_put(vp); + goto outdrop; + } + case F_LOG2PHYS: { + struct log2phys l2p_struct; /* structure for allocate command */ - case F_LOG2PHYS: - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + if ( (error = vnode_getwithref(vp)) ) { + goto outdrop; + } + error = VNOP_OFFTOBLK(vp, fp->f_offset, &lbn); + if (error) { + (void)vnode_put(vp); + goto outdrop; + } + error = VNOP_BLKTOOFF(vp, lbn, &offset); + if (error) { + (void)vnode_put(vp); + goto outdrop; + } + devBlockSize = vfs_devblocksize(vnode_mount(vp)); + + error = VNOP_BLOCKMAP(vp, offset, devBlockSize, &bn, NULL, NULL, 0, &context); + + (void)vnode_put(vp); - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) - break; - error = VOP_OFFTOBLK(vp, fp->f_offset, &lbn); - if (error) - break; - error = VOP_BLKTOOFF(vp, lbn, &offset); - if (error) - break; - error = VOP_BMAP(vp, lbn, &devvp, &bn, 0); - VOP_DEVBLOCKSIZE(devvp, &devBlockSize); - VOP_UNLOCK(vp, 0, p); if (!error) { l2p_struct.l2p_flags = 0; /* for now */ l2p_struct.l2p_contigbytes = 0; /* for now */ l2p_struct.l2p_devoffset = bn * devBlockSize; l2p_struct.l2p_devoffset += fp->f_offset - offset; - error = copyout((caddr_t)&l2p_struct, - (caddr_t)uap->arg, - sizeof (l2p_struct)); + error = copyout((caddr_t)&l2p_struct, argp, sizeof (l2p_struct)); + } + goto outdrop; } - break; - case F_GETPATH: { - char *pathbuf; - int len; - extern int vn_getpath(struct vnode *vp, char *pathbuf, int *len); + char *pathbufp; + int pathlen; - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - len = MAXPATHLEN; - MALLOC(pathbuf, char *, len, M_TEMP, M_WAITOK); + pathlen = MAXPATHLEN; + MALLOC(pathbufp, char *, pathlen, M_TEMP, M_WAITOK); + if (pathbufp == NULL) { + error = ENOMEM; + goto outdrop; + } + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = vn_getpath(vp, pathbufp, &pathlen); + (void)vnode_put(vp); - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) { - FREE(pathbuf, M_TEMP); - break; - } - error = vn_getpath(vp, pathbuf, &len); - if (error == 0) - error = copyout((caddr_t)pathbuf, (caddr_t)uap->arg, len); - VOP_UNLOCK(vp, 0, p); - FREE(pathbuf, M_TEMP); - break; + if (error == 0) + error = copyout((caddr_t)pathbufp, argp, pathlen); + } + FREE(pathbufp, M_TEMP); + goto outdrop; } - case F_FULLFSYNC: { - if (fp->f_type != DTYPE_VNODE) - return (EBADF); + case F_PATHPKG_CHECK: { + char *pathbufp; + size_t pathlen; + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - error = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - if (error) - break; + pathlen = MAXPATHLEN; + pathbufp = kalloc(MAXPATHLEN); + + if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) { + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = vn_path_package_check(vp, pathbufp, pathlen, retval); + + (void)vnode_put(vp); + } + } + kfree(pathbufp, MAXPATHLEN); + goto outdrop; + } + + case F_CHKCLEAN: // used by regression tests to see if all dirty pages got cleaned by fsync() + case F_FULLFSYNC: // fsync + flush the journal + DKIOCSYNCHRONIZECACHE + case F_FREEZE_FS: // freeze all other fs operations for the fs of this fd + case F_THAW_FS: { // thaw all frozen fs operations for the fs of this fd + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); - error = VOP_IOCTL(vp, 6, (caddr_t)NULL, 0, fp->f_cred, p); - VOP_UNLOCK(vp, 0, p); + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)NULL, 0, &context); + + (void)vnode_put(vp); + } break; } default: - return (EINVAL); + if (uap->cmd < FCNTL_FS_SPECIFIC_BASE) { + error = EINVAL; + goto out; + } + + // if it's a fs-specific fcntl() then just pass it through + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, argp), 0, &context); + + (void)vnode_put(vp); + } + break; + } - /* - * Fall thru to here for all vnode operations. - * We audit the path after the call to avoid - * triggering file table state changes during - * the audit pathname allocation. - */ - AUDIT_ARG(vnpath, vp, ARG_VNODE1); - return error; +outdrop: + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + fp_drop(p, fd, fp, 0); + return(error); +out: + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return(error); } /* * Common code for dup, dup2, and fcntl(F_DUPFD). */ int -finishdup(fdp, old, new, retval) - register struct filedesc *fdp; - register int old, new; - register_t *retval; +finishdup(struct proc * p, struct filedesc *fdp, int old, int new, register_t *retval) { - register struct file *fp; + struct fileproc *nfp; + struct fileproc *ofp; - if ((fp = fdp->fd_ofiles[old]) == NULL || + if ((ofp = fdp->fd_ofiles[old]) == NULL || (fdp->fd_ofileflags[old] & UF_RESERVED)) { _fdrelse(fdp, new); return (EBADF); } - fdp->fd_ofiles[new] = fp; + fg_ref(ofp); + proc_fdunlock(p); + + MALLOC_ZONE(nfp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + bzero(nfp, sizeof(struct fileproc)); + + proc_fdlock(p); + nfp->f_flags = ofp->f_flags; + nfp->f_fglob = ofp->f_fglob; + nfp->f_iocount = 0; + + fdp->fd_ofiles[new] = nfp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; - (void)fref(fp); if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; return (0); } -/* - * Close a file descriptor. - */ -struct close_args { - int fd; -}; -/* ARGSUSED */ + int -close(p, uap, retval) - struct proc *p; - struct close_args *uap; - register_t *retval; +close(struct proc *p, struct close_args *uap, __unused register_t *retval) { + struct fileproc *fp; int fd = uap->fd; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; + int error =0; AUDIT_SYSCLOSE(p, fd); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); + + proc_fdlock(p); + + if ( (error = fp_lookup(p,fd,&fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + + error = close_internal(p, fd, fp, CLOSEINT_LOCKED | CLOSEINT_WAITONCLOSE); + + proc_fdunlock(p); + + return(error); +} + + +/* + * Close a file descriptor. + */ +int +close_internal(struct proc *p, int fd, struct fileproc *fp, int flags) +{ + struct filedesc *fdp = p->p_fd; + int error =0; + int locked = flags & CLOSEINT_LOCKED; + int waitonclose = flags & CLOSEINT_WAITONCLOSE; + int norelse = flags & CLOSEINT_NOFDRELSE; + int nofdref = flags & CLOSEINT_NOFDNOREF; + int slpstate = PRIBIO; + + if (!locked) + proc_fdlock(p); /* Keep people from using the filedesc while we are closing it */ fdp->fd_ofileflags[fd] |= UF_RESERVED; - - /* cancel all async IO requests that can be cancelled. */ - _aio_close( p, fd ); - if (fd < fdp->fd_knlistsize) + fdp->fd_ofileflags[fd] |= UF_CLOSING; + + + if ((waitonclose && ((fp->f_flags & FP_CLOSING) == FP_CLOSING))) { + if (nofdref == 0) + fp_drop(p, fd, fp, 1); + fp->f_flags |= FP_WAITCLOSE; + if (!locked) + slpstate |= PDROP; + msleep(&fp->f_flags, &p->p_fdmlock, slpstate, "close wait",0) ; + return(EBADF); + } + + fp->f_flags |= FP_CLOSING; + if (nofdref) + fp->f_iocount++; + + if ( (fp->f_flags & FP_AIOISSUED) || kauth_authorize_fileop_has_listeners() ) { + + proc_fdunlock(p); + + if ( (fp->f_type == DTYPE_VNODE) && kauth_authorize_fileop_has_listeners() ) { + /* + * call out to allow 3rd party notification of close. + * Ignore result of kauth_authorize_fileop call. + */ + if (vnode_getwithref((vnode_t)fp->f_data) == 0) { + u_int fileop_flags = 0; + if ((fp->f_flags & FP_WRITTEN) != 0) + fileop_flags |= KAUTH_FILEOP_CLOSE_MODIFIED; + kauth_authorize_fileop(fp->f_fglob->fg_cred, KAUTH_FILEOP_CLOSE, + (uintptr_t)fp->f_data, (uintptr_t)fileop_flags); + vnode_put((vnode_t)fp->f_data); + } + } + if (fp->f_flags & FP_AIOISSUED) + /* + * cancel all async IO requests that can be cancelled. + */ + _aio_close( p, fd ); + + proc_fdlock(p); + } + + if (fd < fdp->fd_knlistsize) knote_fdclose(p, fd); - _fdrelse(fdp, fd); - return (closef(fp, p)); + if (fp->f_flags & FP_WAITEVENT) + (void)waitevent_close(p, fp); + + if ((fp->f_flags & FP_INCHRREAD) == 0) + fileproc_drain(p, fp); + if (norelse == 0) + _fdrelse(fdp, fd); + error = closef_locked(fp, fp->f_fglob, p); + if ((fp->f_flags & FP_WAITCLOSE) == FP_WAITCLOSE) + wakeup(&fp->f_flags); + fp->f_flags &= ~(FP_WAITCLOSE | FP_CLOSING); + + if (!locked) + proc_fdunlock(p); + + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); + return(error); } /* * Return status information about a file descriptor. + * + * XXX switch on node type is bogus; need a stat in struct fileops instead. */ -struct fstat_args { - int fd; - struct stat *sb; -}; -/* ARGSUSED */ -int -fstat(p, uap, retval) - struct proc *p; - register struct fstat_args *uap; - register_t *retval; +static int +fstat1(struct proc *p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) { - int fd = uap->fd; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - struct stat ub; - int error; + struct fileproc *fp; + struct stat sb; + struct user_stat user_sb; + int error, my_size; + int funnel_state; + short type; + caddr_t data; + kauth_filesec_t fsec; + ssize_t xsecurity_bufsize; + int entrycount; + struct vfs_context context; - AUDIT_ARG(fd, uap->fd); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - switch (fp->f_type) { + + AUDIT_ARG(fd, fd); + + if ((error = fp_lookup(p, fd, &fp, 0)) != 0) + return(error); + type = fp->f_type; + data = fp->f_data; + fsec = KAUTH_FILESEC_NONE; + + switch (type) { case DTYPE_VNODE: - error = vn_stat((struct vnode *)fp->f_data, &ub, p); - if (error == 0) { - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + if ((error = vnode_getwithref((vnode_t)data)) == 0) { + /* + * If the caller has the file open, and is not requesting extended security, + * we are going to let them get the basic stat information. + */ + if (xsecurity == USER_ADDR_NULL) { + error = vn_stat_noauth((vnode_t)data, &sb, NULL, &context); + } else { + error = vn_stat((vnode_t)data, &sb, &fsec, &context); + } + + AUDIT_ARG(vnpath, (struct vnode *)data, ARG_VNODE1); + (void)vnode_put((vnode_t)data); } break; case DTYPE_SOCKET: - error = soo_stat((struct socket *)fp->f_data, &ub); + error = soo_stat((struct socket *)data, &sb); + break; + + case DTYPE_PIPE: + error = pipe_stat((void *)data, &sb); break; case DTYPE_PSXSHM: - error = pshm_stat((void *)fp->f_data, &ub); + error = pshm_stat((void *)data, &sb); break; case DTYPE_KQUEUE: - error = kqueue_stat(fp, &ub, p); - break; + funnel_state = thread_funnel_set(kernel_flock, TRUE); + error = kqueue_stat(fp, &sb, p); + thread_funnel_set(kernel_flock, funnel_state); + break; default: - panic("fstat"); - /*NOTREACHED*/ + error = EBADF; + goto out; + } + /* Zap spare fields */ + sb.st_lspare = 0; + sb.st_qspare[0] = 0LL; + sb.st_qspare[1] = 0LL; + if (error == 0) { + caddr_t sbp; + if (IS_64BIT_PROCESS(current_proc())) { + munge_stat(&sb, &user_sb); + my_size = sizeof(user_sb); + sbp = (caddr_t)&user_sb; + } + else { + my_size = sizeof(sb); + sbp = (caddr_t)&sb; + } + error = copyout(sbp, ub, my_size); } - if (error == 0) - error = copyout((caddr_t)&ub, (caddr_t)uap->sb, - sizeof (ub)); - return (error); -} - -#if COMPAT_43 -/* - * Return status information about a file descriptor. - */ -struct ofstat_args { - int fd; - struct ostat *sb; -}; -/* ARGSUSED */ -ofstat(p, uap, retval) - struct proc *p; - register struct ofstat_args *uap; - register_t *retval; -{ - int fd = uap->fd; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - struct stat ub; - struct ostat oub; - int error; - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - switch (fp->f_type) { + /* caller wants extended security information? */ + if (xsecurity != USER_ADDR_NULL) { - case DTYPE_VNODE: - error = vn_stat((struct vnode *)fp->f_data, &ub, p); - break; + /* did we get any? */ + if (fsec == KAUTH_FILESEC_NONE) { + if (susize(xsecurity_size, 0) != 0) { + error = EFAULT; + goto out; + } + } else { + /* find the user buffer size */ + xsecurity_bufsize = fusize(xsecurity_size); - case DTYPE_SOCKET: - error = soo_stat((struct socket *)fp->f_data, &ub); - break; + /* copy out the actual data size */ + if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) { + error = EFAULT; + goto out; + } - default: - panic("ofstat"); - /*NOTREACHED*/ + /* if the caller supplied enough room, copy out to it */ + if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) + error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec)); + } } - cvtstat(&ub, &oub); - if (error == 0) - error = copyout((caddr_t)&oub, (caddr_t)uap->sb, - sizeof (oub)); +out: + fp_drop(p, fd, fp, 0); + if (fsec != NULL) + kauth_filesec_free(fsec); return (error); } -#endif /* COMPAT_43 */ + +int +fstat_extended(struct proc *p, struct fstat_extended_args *uap, __unused register_t *retval) +{ + return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size)); +} + +int +fstat(struct proc *p, register struct fstat_args *uap, __unused register_t *retval) +{ + return(fstat1(p, uap->fd, uap->ub, 0, 0)); +} /* * Return pathconf information about a file descriptor. */ -struct fpathconf_args { - int fd; - int name; -}; -/* ARGSUSED */ +int fpathconf(p, uap, retval) struct proc *p; register struct fpathconf_args *uap; register_t *retval; { int fd = uap->fd; - struct filedesc *fdp = p->p_fd; - struct file *fp; + struct fileproc *fp; struct vnode *vp; + struct vfs_context context; + int error = 0; + short type; + caddr_t data; + AUDIT_ARG(fd, uap->fd); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - switch (fp->f_type) { + if ( (error = fp_lookup(p, fd, &fp, 0)) ) + return(error); + type = fp->f_type; + data = fp->f_data; + + switch (type) { case DTYPE_SOCKET: - if (uap->name != _PC_PIPE_BUF) - return (EINVAL); + if (uap->name != _PC_PIPE_BUF) { + error = EINVAL; + goto out; + } *retval = PIPE_BUF; - return (0); + error = 0; + goto out; + + case DTYPE_PIPE: + *retval = PIPE_BUF; + error = 0; + goto out; case DTYPE_VNODE: - vp = (struct vnode *)fp->f_data; - AUDIT_ARG(vnpath, vp, ARG_VNODE1); + vp = (struct vnode *)data; + + if ( (error = vnode_getwithref(vp)) == 0) { + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + error = vn_pathconf(vp, uap->name, retval, &context); + + (void)vnode_put(vp); + } + goto out; - return (VOP_PATHCONF(vp, uap->name, retval)); + case DTYPE_PSXSHM: + case DTYPE_KQUEUE: + error = EINVAL; + goto out; default: - panic("fpathconf"); + panic("fpathconf (unrecognized - %d)", type); } /*NOTREACHED*/ +out: + fp_drop(p, fd, fp, 0); + return(error); } /* @@ -876,8 +1272,8 @@ fdalloc(p, want, result) { register struct filedesc *fdp = p->p_fd; register int i; - int lim, last, nfiles, oldnfiles; - struct file **newofiles, **ofiles; + int lim, last, numfiles, oldnfiles; + struct fileproc **newofiles, **ofiles; char *newofileflags, *ofileflags; /* @@ -911,19 +1307,24 @@ fdalloc(p, want, result) if (fdp->fd_nfiles >= lim) return (EMFILE); if (fdp->fd_nfiles < NDEXTENT) - nfiles = NDEXTENT; + numfiles = NDEXTENT; else - nfiles = 2 * fdp->fd_nfiles; + numfiles = 2 * fdp->fd_nfiles; /* Enforce lim */ - if (nfiles > lim) - nfiles = lim; - MALLOC_ZONE(newofiles, struct file **, - nfiles * OFILESIZE, M_OFILETABL, M_WAITOK); - if (fdp->fd_nfiles >= nfiles) { - FREE_ZONE(newofiles, nfiles * OFILESIZE, M_OFILETABL); + if (numfiles > lim) + numfiles = lim; + proc_fdunlock(p); + MALLOC_ZONE(newofiles, struct fileproc **, + numfiles * OFILESIZE, M_OFILETABL, M_WAITOK); + proc_fdlock(p); + if (newofiles == NULL) { + return (ENOMEM); + } + if (fdp->fd_nfiles >= numfiles) { + FREE_ZONE(newofiles, numfiles * OFILESIZE, M_OFILETABL); continue; } - newofileflags = (char *) &newofiles[nfiles]; + newofileflags = (char *) &newofiles[numfiles]; /* * Copy the existing ofile and ofileflags arrays * and zero the new portion of each array. @@ -932,90 +1333,517 @@ fdalloc(p, want, result) (void) memcpy(newofiles, fdp->fd_ofiles, oldnfiles * sizeof *fdp->fd_ofiles); (void) memset(&newofiles[oldnfiles], 0, - (nfiles - oldnfiles) * sizeof *fdp->fd_ofiles); + (numfiles - oldnfiles) * sizeof *fdp->fd_ofiles); (void) memcpy(newofileflags, fdp->fd_ofileflags, oldnfiles * sizeof *fdp->fd_ofileflags); (void) memset(&newofileflags[oldnfiles], 0, - (nfiles - oldnfiles) * + (numfiles - oldnfiles) * sizeof *fdp->fd_ofileflags); ofiles = fdp->fd_ofiles; fdp->fd_ofiles = newofiles; fdp->fd_ofileflags = newofileflags; - fdp->fd_nfiles = nfiles; + fdp->fd_nfiles = numfiles; FREE_ZONE(ofiles, oldnfiles * OFILESIZE, M_OFILETABL); fdexpand++; } } -/* - * Check to see whether n user file descriptors - * are available to the process p. - */ +/* + * Check to see whether n user file descriptors + * are available to the process p. + */ +int +fdavail(p, n) + struct proc *p; + int n; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc **fpp; + char *flags; + int i, lim; + + lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); + if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) + return (1); + fpp = &fdp->fd_ofiles[fdp->fd_freefile]; + flags = &fdp->fd_ofileflags[fdp->fd_freefile]; + for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) + if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) + return (1); + return (0); +} + +void +fdrelse(p, fd) + struct proc *p; + int fd; +{ + _fdrelse(p->p_fd, fd); +} + +void +fddrop(p, fd) + struct proc *p; + int fd; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + if (fd < fdp->fd_freefile) + fdp->fd_freefile = fd; +#if DIAGNOSTIC + if (fd > fdp->fd_lastfile) + panic("fdrelse: fd_lastfile inconsistent"); +#endif + fp = fdp->fd_ofiles[fd]; + fdp->fd_ofiles[fd] = NULL; + fdp->fd_ofileflags[fd] = 0; + + while ((fd = fdp->fd_lastfile) > 0 && + fdp->fd_ofiles[fd] == NULL && + !(fdp->fd_ofileflags[fd] & UF_RESERVED)) + fdp->fd_lastfile--; + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); +} + + +int +fdgetf_noref(p, fd, resultfp) + struct proc *p; + int fd; + struct fileproc **resultfp; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + return (EBADF); + } + if (resultfp) + *resultfp = fp; + return (0); +} + + +/* should be called only when proc_fdlock is held */ +void +fp_setflags(proc_t p, struct fileproc * fp, int flags) +{ + proc_fdlock(p); + fp->f_flags |= flags; + proc_fdunlock(p); +} + +void +fp_clearflags(proc_t p, struct fileproc * fp, int flags) +{ + + proc_fdlock(p); + if (fp) + fp->f_flags &= ~flags; + proc_fdunlock(p); +} + +int +fp_getfvp(p, fd, resultfp, resultvp) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct vnode **resultvp; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_VNODE) { + proc_fdunlock(p); + return(ENOTSUP); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultvp) + *resultvp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + + +/* + * Returns: EBADF The file descriptor is invalid + * EOPNOTSUPP The file descriptor is not a socket + * 0 Success + * + * Notes: EOPNOTSUPP should probably be ENOTSOCK; this function is only + * ever called from accept1(). + */ +int +fp_getfsock(p, fd, resultfp, results) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct socket **results; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_SOCKET) { + proc_fdunlock(p); + return(EOPNOTSUPP); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (results) + *results = (struct socket *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + + +int +fp_getfkq(p, fd, resultfp, resultkq) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct kqueue **resultkq; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if ( fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_KQUEUE) { + proc_fdunlock(p); + return(EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultkq) + *resultkq = (struct kqueue *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + +int +fp_getfpshm(p, fd, resultfp, resultpshm) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct pshmnode **resultpshm; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_PSXSHM) { + + proc_fdunlock(p); + return(EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultpshm) + *resultpshm = (struct pshmnode *)fp->f_data; + proc_fdunlock(p); + + return (0); +} + + +int +fp_getfpsem(p, fd, resultfp, resultpsem) + struct proc *p; + int fd; + struct fileproc **resultfp; + struct psemnode **resultpsem; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } + if (fp->f_type != DTYPE_PSXSEM) { + proc_fdunlock(p); + return(EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (resultpsem) + *resultpsem = (struct psemnode *)fp->f_data; + proc_fdunlock(p); + + return (0); +} +int +fp_lookup(p, fd, resultfp, locked) + struct proc *p; + int fd; + struct fileproc **resultfp; + int locked; +{ + struct filedesc *fdp = p->p_fd; + struct fileproc *fp; + + if (!locked) + proc_fdlock(p); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + (fdp->fd_ofileflags[fd] & UF_RESERVED)) { + if (!locked) + proc_fdunlock(p); + return (EBADF); + } + fp->f_iocount++; + + if (resultfp) + *resultfp = fp; + if (!locked) + proc_fdunlock(p); + + return (0); +} + +int +fp_drop_written(proc_t p, int fd, struct fileproc *fp) +{ + int error; + + proc_fdlock(p); + + fp->f_flags |= FP_WRITTEN; + + error = fp_drop(p, fd, fp, 1); + + proc_fdunlock(p); + + return (error); +} + + +int +fp_drop_event(proc_t p, int fd, struct fileproc *fp) +{ + int error; + + proc_fdlock(p); + + fp->f_flags |= FP_WAITEVENT; + + error = fp_drop(p, fd, fp, 1); + + proc_fdunlock(p); + + return (error); +} + int -fdavail(p, n) +fp_drop(p, fd, fp, locked) struct proc *p; - register int n; + int fd; + struct fileproc *fp; + int locked; { - register struct filedesc *fdp = p->p_fd; - register struct file **fpp; - register char *flags; - register int i, lim; + struct filedesc *fdp = p->p_fd; - lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); - if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) - return (1); - fpp = &fdp->fd_ofiles[fdp->fd_freefile]; - flags = &fdp->fd_ofileflags[fdp->fd_freefile]; - for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++, flags++) - if (*fpp == NULL && !(*flags & UF_RESERVED) && --n <= 0) - return (1); + if (!locked) + proc_fdlock(p); + if ((fp == FILEPROC_NULL) && (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + ((fdp->fd_ofileflags[fd] & UF_RESERVED) && + !(fdp->fd_ofileflags[fd] & UF_CLOSING)))) { + if (!locked) + proc_fdunlock(p); + return (EBADF); + } + fp->f_iocount--; + + if (p->p_fpdrainwait && fp->f_iocount == 0) { + p->p_fpdrainwait = 0; + wakeup(&p->p_fpdrainwait); + } + if (!locked) + proc_fdunlock(p); + return (0); } -void -fdrelse(p, fd) - struct proc *p; - int fd; +int +file_vnode(int fd, struct vnode **vpp) { - _fdrelse(p->p_fd, fd); + struct proc * p = current_proc(); + struct fileproc *fp; + int error; + + proc_fdlock(p); + if ( (error = fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + if (fp->f_type != DTYPE_VNODE) { + fp_drop(p, fd, fp,1); + proc_fdunlock(p); + return(EINVAL); + } + *vpp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + return(0); } + int -fdgetf(p, fd, resultfp) - register struct proc *p; - register int fd; - struct file **resultfp; +file_socket(int fd, struct socket **sp) { - register struct filedesc *fdp = p->p_fd; - struct file *fp; + struct proc * p = current_proc(); + struct fileproc *fp; + int error; + + proc_fdlock(p); + if ( (error = fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + if (fp->f_type != DTYPE_SOCKET) { + fp_drop(p, fd, fp,1); + proc_fdunlock(p); + return(ENOTSOCK); + } + *sp = (struct socket *)fp->f_data; + proc_fdunlock(p); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) + return(0); +} + +int +file_flags(int fd, int * flags) +{ + + struct proc * p = current_proc(); + struct fileproc *fp; + int error; + + proc_fdlock(p); + if ( (error = fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + *flags = (int)fp->f_flag; + fp_drop(p, fd, fp,1); + proc_fdunlock(p); + + return(0); +} + + +int +file_drop(int fd) +{ + struct fileproc *fp; + struct proc *p = current_proc(); + + proc_fdlock(p); + if (fd < 0 || fd >= p->p_fd->fd_nfiles || + (fp = p->p_fd->fd_ofiles[fd]) == NULL || + ((p->p_fd->fd_ofileflags[fd] & UF_RESERVED) && + !(p->p_fd->fd_ofileflags[fd] & UF_CLOSING))) { + proc_fdunlock(p); return (EBADF); + } + fp->f_iocount --; + + if (p->p_fpdrainwait && fp->f_iocount == 0) { + p->p_fpdrainwait = 0; + wakeup(&p->p_fpdrainwait); + } + proc_fdunlock(p); + return(0); + - if (resultfp) - *resultfp = fp; - return (0); } +int +falloc(p, resultfp, resultfd ) + struct proc *p; + struct fileproc **resultfp; + int *resultfd; +{ + int error; + + proc_fdlock(p); + error = falloc_locked(p, resultfp, resultfd, 1); + proc_fdunlock(p); + + return(error); +} /* * Create a new open file structure and allocate * a file decriptor for the process that refers to it. */ int -falloc(p, resultfp, resultfd) - register struct proc *p; - struct file **resultfp; +falloc_locked(p, resultfp, resultfd, locked) + struct proc *p; + struct fileproc **resultfp; int *resultfd; + int locked; { - register struct file *fp, *fq; - int error, i; - - if (error = fdalloc(p, 0, &i)) + struct fileproc *fp, *fq; + struct fileglob *fg; + int error, nfd; + + if (!locked) + proc_fdlock(p); + if ( (error = fdalloc(p, 0, &nfd)) ) { + if (!locked) + proc_fdunlock(p); return (error); + } if (nfiles >= maxfiles) { + if (!locked) + proc_fdunlock(p); tablefull("file"); return (ENFILE); } @@ -1025,22 +1853,43 @@ falloc(p, resultfp, resultfd) * of open files at that point, otherwise put it at the front of * the list of open files. */ + proc_fdunlock(p); + + MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK); + bzero(fp, sizeof(struct fileproc)); + bzero(fg, sizeof(struct fileglob)); + lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr); + + fp->f_iocount = 1; + fg->fg_count = 1; + fp->f_fglob = fg; + + proc_fdlock(p); + + fp->f_cred = kauth_cred_proc_ref(p); + + lck_mtx_lock(file_flist_lock); + nfiles++; - MALLOC_ZONE(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); - bzero(fp, sizeof(struct file)); - p->p_fd->fd_ofiles[i] = fp; - fp->f_count = 1; - fp->f_cred = p->p_ucred; - crhold(fp->f_cred); + + if ( (fq = p->p_fd->fd_ofiles[0]) ) { + LIST_INSERT_AFTER(fq->f_fglob, fg, f_list); + } else { + LIST_INSERT_HEAD(&filehead, fg, f_list); + } + lck_mtx_unlock(file_flist_lock); + + p->p_fd->fd_ofiles[nfd] = fp; + + if (!locked) + proc_fdunlock(p); + if (resultfp) *resultfp = fp; if (resultfd) - *resultfd = i; - if (fq = p->p_fd->fd_ofiles[0]) { - LIST_INSERT_AFTER(fq, fp, f_list); - } else { - LIST_INSERT_HEAD(&filehead, fp, f_list); - } + *resultfd = nfd; + return (0); } @@ -1048,38 +1897,42 @@ falloc(p, resultfp, resultfd) * Free a file structure. */ void -ffree(fp) - register struct file *fp; +fg_free(fg) + struct fileglob *fg; { - register struct file *fq; - struct ucred *cred; + kauth_cred_t cred; + + lck_mtx_lock(file_flist_lock); + LIST_REMOVE(fg, f_list); + nfiles--; + lck_mtx_unlock(file_flist_lock); - LIST_REMOVE(fp, f_list); - cred = fp->f_cred; + cred = fg->fg_cred; if (cred != NOCRED) { - fp->f_cred = NOCRED; - crfree(cred); + fg->fg_cred = NOCRED; + kauth_cred_rele(cred); } + lck_mtx_destroy(&fg->fg_lock, file_lck_grp); - nfiles--; - memset(fp, 0xff, sizeof *fp); - fp->f_count = (short)0xffff; - - FREE_ZONE(fp, sizeof *fp, M_FILE); + FREE_ZONE(fg, sizeof *fg, M_FILEGLOB); } void fdexec(p) struct proc *p; { - register struct filedesc *fdp = p->p_fd; - register int i = fdp->fd_lastfile; - register struct file **fpp = &fdp->fd_ofiles[i]; - register char *flags = &fdp->fd_ofileflags[i]; + struct filedesc *fdp = p->p_fd; + int i = fdp->fd_lastfile; + struct fileproc **fpp = &fdp->fd_ofiles[i]; + char *flags = &fdp->fd_ofileflags[i]; + int funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, FALSE); + proc_fdlock(p); while (i >= 0) { if ((*flags & (UF_RESERVED|UF_EXCLOSE)) == UF_EXCLOSE) { - register struct file *fp = *fpp; + struct fileproc *fp = *fpp; if (i < fdp->fd_knlistsize) knote_fdclose(p, i); @@ -1087,13 +1940,14 @@ fdexec(p) *fpp = NULL; *flags = 0; if (i == fdp->fd_lastfile && i > 0) fdp->fd_lastfile--; - closef(fp, p); + closef_locked(fp, fp->f_fglob, p); + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); } - else - *flags &= ~UF_MAPPED; i--; fpp--; flags--; } + proc_fdunlock(p); + thread_funnel_set(kernel_flock, funnel_state); } /* @@ -1103,15 +1957,74 @@ struct filedesc * fdcopy(p) struct proc *p; { - register struct filedesc *newfdp, *fdp = p->p_fd; - register int i; + struct filedesc *newfdp, *fdp = p->p_fd; + int i; + struct fileproc *ofp, *fp; + vnode_t v_dir; MALLOC_ZONE(newfdp, struct filedesc *, sizeof *newfdp, M_FILEDESC, M_WAITOK); + if (newfdp == NULL) + return(NULL); + + proc_fdlock(p); + + /* + * the FD_CHROOT flag will be inherited via this copy + */ (void) memcpy(newfdp, fdp, sizeof *newfdp); - VREF(newfdp->fd_cdir); - if (newfdp->fd_rdir) - VREF(newfdp->fd_rdir); + + /* + * for both fd_cdir and fd_rdir make sure we get + * a valid reference... if we can't, than set + * set the pointer(s) to NULL in the child... this + * will keep us from using a non-referenced vp + * and allows us to do the vnode_rele only on + * a properly referenced vp + */ + if ( (v_dir = newfdp->fd_cdir) ) { + if (vnode_getwithref(v_dir) == 0) { + if ( (vnode_ref(v_dir)) ) + newfdp->fd_cdir = NULL; + vnode_put(v_dir); + } else + newfdp->fd_cdir = NULL; + } + if (newfdp->fd_cdir == NULL && fdp->fd_cdir) { + /* + * we couldn't get a new reference on + * the current working directory being + * inherited... we might as well drop + * our reference from the parent also + * since the vnode has gone DEAD making + * it useless... by dropping it we'll + * be that much closer to recyling it + */ + vnode_rele(fdp->fd_cdir); + fdp->fd_cdir = NULL; + } + + if ( (v_dir = newfdp->fd_rdir) ) { + if (vnode_getwithref(v_dir) == 0) { + if ( (vnode_ref(v_dir)) ) + newfdp->fd_rdir = NULL; + vnode_put(v_dir); + } else + newfdp->fd_rdir = NULL; + } + if (newfdp->fd_rdir == NULL && fdp->fd_rdir) { + /* + * we couldn't get a new reference on + * the root directory being + * inherited... we might as well drop + * our reference from the parent also + * since the vnode has gone DEAD making + * it useless... by dropping it we'll + * be that much closer to recyling it + */ + vnode_rele(fdp->fd_rdir); + fdp->fd_rdir = NULL; + } newfdp->fd_refcnt = 1; /* @@ -1132,13 +2045,27 @@ fdcopy(p) while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; } - MALLOC_ZONE(newfdp->fd_ofiles, struct file **, + proc_fdunlock(p); + + MALLOC_ZONE(newfdp->fd_ofiles, struct fileproc **, i * OFILESIZE, M_OFILETABL, M_WAITOK); + if (newfdp->fd_ofiles == NULL) { + if (newfdp->fd_cdir) + vnode_rele(newfdp->fd_cdir); + if (newfdp->fd_rdir) + vnode_rele(newfdp->fd_rdir); + + FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC); + return(NULL); + } + proc_fdlock(p); + newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; newfdp->fd_nfiles = i; + if (fdp->fd_nfiles > 0) { - register struct file **fpp; - register char *flags; + struct fileproc **fpp; + char *flags; (void) memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof *fdp->fd_ofiles); @@ -1164,12 +2091,19 @@ fdcopy(p) newfdp->fd_knhash = NULL; newfdp->fd_knhashmask = 0; } - fpp = newfdp->fd_ofiles; flags = newfdp->fd_ofileflags; + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++, flags++) - if (*fpp != NULL && !(*flags & UF_RESERVED)) { - (void)fref(*fpp); + if ((ofp = *fpp) != NULL && !(*flags & UF_RESERVED)) { + MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + bzero(fp, sizeof(struct fileproc)); + fp->f_flags = ofp->f_flags; + //fp->f_iocount = ofp->f_iocount; + fp->f_iocount = 0; + fp->f_fglob = ofp->f_fglob; + (void)fg_ref(fp); + *fpp = fp; } else { *fpp = NULL; *flags = 0; @@ -1177,6 +2111,7 @@ fdcopy(p) } else (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE); + proc_fdunlock(p); return (newfdp); } @@ -1188,63 +2123,59 @@ fdfree(p) struct proc *p; { struct filedesc *fdp; - struct file *fp; + struct fileproc *fp; int i; - struct vnode *tvp; + + proc_fdlock(p); /* Certain daemons might not have file descriptors */ - if ((fdp = p->p_fd) == NULL) - return; + fdp = p->p_fd; - if (--fdp->fd_refcnt > 0) + if ((fdp == NULL) || (--fdp->fd_refcnt > 0)) { + proc_fdunlock(p); return; + } + if (fdp->fd_refcnt == 0xffff) + panic("fdfree: bad fd_refcnt"); /* Last reference: the structure can't change out from under us */ - if (fdp->fd_nfiles > 0) { - for (i = fdp->fd_lastfile; i >= 0; i--) -#if 1 /* WORKAROUND */ - /* - * Merlot: need to remove the bogus f_data check - * from the following "if" statement. It's there - * because of the network/kernel funnel race on a - * close of a socket vs. fdfree on exit. See - * Radar rdar://problem/3365650 for details, but - * the sort version is the commment before the "if" - * above is wrong under certain circumstances. - * - * We have to do this twice, in case knote_fdclose() - * results in a block. - * - * This works because an fdfree() will set all fields - * in the struct file to -1. - */ - if ((fp = fdp->fd_ofiles[i]) != NULL && - fp->f_data != (caddr_t)-1) { - if (i < fdp->fd_knlistsize) - knote_fdclose(p, i); - if (fp->f_data != (caddr_t)-1) - (void) closef(fp, p); - } -#else /* !WORKAROUND */ + + if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) { + for (i = fdp->fd_lastfile; i >= 0; i--) { if ((fp = fdp->fd_ofiles[i]) != NULL) { + + if (fdp->fd_ofileflags[i] & UF_RESERVED) + panic("fdfree: found fp with UF_RESERVED\n"); + + /* closef drops the iocount ... */ + if ((fp->f_flags & FP_INCHRREAD) != 0) + fp->f_iocount++; + fdp->fd_ofiles[i] = NULL; + fdp->fd_ofileflags[i] |= UF_RESERVED; + if (i < fdp->fd_knlistsize) knote_fdclose(p, i); - (void) closef(fp, p); + if (fp->f_flags & FP_WAITEVENT) + (void)waitevent_close(p, fp); + (void) closef_locked(fp, fp->f_fglob, p); + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); } -#endif /* !WORKAROUND */ - FREE_ZONE(fdp->fd_ofiles, - fdp->fd_nfiles * OFILESIZE, M_OFILETABL); - } + } + FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL); + fdp->fd_ofiles = NULL; + fdp->fd_nfiles = 0; + } - tvp = fdp->fd_cdir; - fdp->fd_cdir = NULL; - vrele(tvp); + proc_fdunlock(p); + + if (fdp->fd_cdir) + vnode_rele(fdp->fd_cdir); + if (fdp->fd_rdir) + vnode_rele(fdp->fd_rdir); - if (fdp->fd_rdir) { - tvp = fdp->fd_rdir; - fdp->fd_rdir = NULL; - vrele(tvp); - } + proc_fdlock(p); + p->p_fd = NULL; + proc_fdunlock(p); if (fdp->fd_knlist) FREE(fdp->fd_knlist, M_KQUEUE); @@ -1252,39 +2183,60 @@ fdfree(p) FREE(fdp->fd_knhash, M_KQUEUE); FREE_ZONE(fdp, sizeof *fdp, M_FILEDESC); - - // XXXdbg - { - void clean_up_fmod_watch(struct proc *p); - clean_up_fmod_watch(p); - } } static int -closef_finish(fp, p) - register struct file *fp; - register struct proc *p; +closef_finish(fp, fg, p) + struct fileproc *fp; + struct fileglob *fg; + struct proc *p; { struct vnode *vp; struct flock lf; int error; + struct vfs_context context; - if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { + if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = (struct vnode *)fp->f_data; - (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); + vp = (struct vnode *)fg->fg_data; + context.vc_proc = p; + context.vc_ucred = fg->fg_cred; + + (void) VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, &context); } - if (fp->f_ops) - error = fo_close(fp, p); + if (fg->fg_ops) + error = fo_close(fg, p); else error = 0; - ffree(fp); + + if (((fp != (struct fileproc *)0) && ((fp->f_flags & FP_INCHRREAD) != 0))) { + proc_fdlock(p); + if ( ((fp->f_flags & FP_INCHRREAD) != 0) ) { + fileproc_drain(p, fp); + } + proc_fdunlock(p); + } + fg_free(fg); + return (error); } +int +closef(fg, p) + struct fileglob *fg; + struct proc *p; +{ + int error; + + proc_fdlock(p); + error = closef_locked((struct fileproc *)0, fg, p); + proc_fdunlock(p); + + return(error); +} /* * Internal form of close. * Decrement reference count on file structure. @@ -1292,16 +2244,19 @@ closef_finish(fp, p) * that was being passed in a message. */ int -closef(fp, p) - register struct file *fp; - register struct proc *p; +closef_locked(fp, fg, p) + struct fileproc *fp; + struct fileglob *fg; + struct proc *p; { struct vnode *vp; struct flock lf; + struct vfs_context context; int error; - if (fp == NULL) + if (fg == NULL) { return (0); + } /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting @@ -1310,70 +2265,140 @@ closef(fp, p) * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ - if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { + if (p && (p->p_flag & P_ADVLOCK) && fg->fg_type == DTYPE_VNODE) { + proc_fdunlock(p); + lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = (struct vnode *)fp->f_data; - (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX); + vp = (struct vnode *)fg->fg_data; + + if ( (error = vnode_getwithref(vp)) == 0 ) { + context.vc_proc = p; + context.vc_ucred = fg->fg_cred; + (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context); + + (void)vnode_put(vp); + } + proc_fdlock(p); } - if (frele_internal(fp) > 0) + lck_mtx_lock(&fg->fg_lock); + fg->fg_count--; + + if (fg->fg_count > 0) { + lck_mtx_unlock(&fg->fg_lock); return (0); - return(closef_finish(fp, p)); + } + if (fg->fg_count != 0) + panic("fg: being freed with bad fg_count (%d)", fg, fg->fg_count); + + if (fp && (fp->f_flags & FP_WRITTEN)) + fg->fg_flag |= FWASWRITTEN; + + fg->fg_lflags |= FG_TERM; + lck_mtx_unlock(&fg->fg_lock); + + proc_fdunlock(p); + error = closef_finish(fp, fg, p); + proc_fdlock(p); + + return(error); +} + + +extern int selwait; +void +fileproc_drain(struct proc *p, struct fileproc * fp) +{ + fp->f_iocount-- ; /* (the one the close holds) */ + + while (fp->f_iocount) { + if (((fp->f_flags & FP_INSELECT)== FP_INSELECT)) { + wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, &selwait, THREAD_INTERRUPTED); + } else { + if (fp->f_fglob->fg_ops->fo_drain) { + (*fp->f_fglob->fg_ops->fo_drain)(fp, p); + } + } + p->p_fpdrainwait = 1; + + msleep(&p->p_fpdrainwait, &p->p_fdmlock, PRIBIO, "fpdrain",0); + + //panic("successful wait after drain\n"); + } +} + +int +fp_free(struct proc * p, int fd, struct fileproc * fp) +{ + proc_fdlock(p); + fdrelse(p, fd); + proc_fdunlock(p); + + fg_free(fp->f_fglob); + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); } + /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ -struct flock_args { - int fd; - int how; -}; -/* ARGSUSED */ int -flock(p, uap, retval) - struct proc *p; - register struct flock_args *uap; - register_t *retval; +flock(struct proc *p, register struct flock_args *uap, __unused register_t *retval) { int fd = uap->fd; int how = uap->how; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; + struct fileproc *fp; struct vnode *vp; struct flock lf; + struct vfs_context context; + int error=0; AUDIT_ARG(fd, uap->fd); - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (EOPNOTSUPP); - vp = (struct vnode *)fp->f_data; + if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) { + return(error); + } + if ( (error = vnode_getwithref(vp)) ) { + goto out1; + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + context.vc_proc = p; + context.vc_ucred = fp->f_cred; + lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (how & LOCK_UN) { lf.l_type = F_UNLCK; fp->f_flag &= ~FHASLOCK; - return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, &context); + goto out; } if (how & LOCK_EX) lf.l_type = F_WRLCK; else if (how & LOCK_SH) lf.l_type = F_RDLCK; - else - return (EBADF); + else { + error = EBADF; + goto out; + } fp->f_flag |= FHASLOCK; - if (how & LOCK_NB) - return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); - return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); + if (how & LOCK_NB) { + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, &context); + goto out; + } + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, &context); +out: + (void)vnode_put(vp); +out1: + fp_drop(p, fd, fp, 0); + return(error); + } /* @@ -1384,12 +2409,8 @@ flock(p, uap, retval) * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. */ -/* ARGSUSED */ int -fdopen(dev, mode, type, p) - dev_t dev; - int mode, type; - struct proc *p; +fdopen(dev_t dev, __unused int mode, __unused int type, struct proc *p) { /* @@ -1397,7 +2418,7 @@ fdopen(dev, mode, type, p) * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the - * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN + * actions in dupfdopen below. Other callers of vn_open or vnop_open * will simply report the error. */ p->p_dupfd = minor(dev); @@ -1414,8 +2435,9 @@ dupfdopen(fdp, indx, dfd, mode, error) int mode; int error; { - register struct file *wfp; - struct file *fp; + struct fileproc *wfp; + struct fileproc *fp; + struct proc * p = current_proc(); /* * If the to-be-dup'd fd number is greater than the allowed number @@ -1424,12 +2446,16 @@ dupfdopen(fdp, indx, dfd, mode, error) * falloc could allocate an already closed to-be-dup'd descriptor * as the new descriptor. */ + proc_fdlock(p); + fp = fdp->fd_ofiles[indx]; - if ((u_int)dfd >= fdp->fd_nfiles || + if (dfd < 0 || dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL || wfp == fp || - (fdp->fd_ofileflags[dfd] & UF_RESERVED)) - return (EBADF); + (fdp->fd_ofileflags[dfd] & UF_RESERVED)) { + proc_fdunlock(p); + return (EBADF); + } /* * There are two cases of interest here. * @@ -1448,13 +2474,21 @@ dupfdopen(fdp, indx, dfd, mode, error) * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ - if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) + if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { + proc_fdunlock(p); return (EACCES); - (void)fref(wfp); + } if (indx > fdp->fd_lastfile) - fdp->fd_lastfile = indx;; - fdp->fd_ofiles[indx] = wfp; + fdp->fd_lastfile = indx; + (void)fg_ref(wfp); + + if (fp->f_fglob) + fg_free(fp->f_fglob); + fp->f_fglob = wfp->f_fglob; + fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; + + proc_fdunlock(p); return (0); case ENXIO: @@ -1462,72 +2496,157 @@ dupfdopen(fdp, indx, dfd, mode, error) * Steal away the file pointer from dfd, and stuff it into indx. */ if (indx > fdp->fd_lastfile) - fdp->fd_lastfile = indx;; - fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + fdp->fd_lastfile = indx; + + if (fp->f_fglob) + fg_free(fp->f_fglob); + fp->f_fglob = wfp->f_fglob; + fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; _fdrelse(fdp, dfd); + + proc_fdunlock(p); + + FREE_ZONE(wfp, sizeof *fp, M_FILEPROC); + return (0); default: + proc_fdunlock(p); return (error); } /* NOTREACHED */ } -/* Reference manipulation routines for the file structure */ - -int -fref(struct file *fp) +void +fg_ref(struct fileproc * fp) { - if (fp->f_count == (short)0xffff) - return (-1); - if (++fp->f_count <= 0) - panic("fref: f_count"); - return ((int)fp->f_count); + struct fileglob *fg; + + fg = fp->f_fglob; + + lck_mtx_lock(&fg->fg_lock); + fg->fg_count++; + lck_mtx_unlock(&fg->fg_lock); } -static int -frele_internal(struct file *fp) +void +fg_drop(struct fileproc * fp) { - if (fp->f_count == (short)0xffff) - panic("frele: stale"); - if (--fp->f_count < 0) - panic("frele: count < 0"); - return ((int)fp->f_count); + struct fileglob *fg; + + fg = fp->f_fglob; + lck_mtx_lock(&fg->fg_lock); + fg->fg_count--; + lck_mtx_unlock(&fg->fg_lock); } -int -frele(struct file *fp) +void +fg_insertuipc(struct fileglob * fg) { - int count; - funnel_t * fnl; - extern int disable_funnel; +int insertque = 0; - fnl = thread_funnel_get(); - /* - * If the funnels are merged then atleast a funnel should be held - * else frele should come in with kernel funnel only - */ - if (!disable_funnel && (fnl != kernel_flock)) { - panic("frele: kernel funnel not held"); + lck_mtx_lock(&fg->fg_lock); + + while (fg->fg_lflags & FG_RMMSGQ) { + fg->fg_lflags |= FG_WRMMSGQ; + msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", 0); + } - } else if (fnl == THR_FUNNEL_NULL) { - panic("frele: no funnel held"); + fg->fg_count++; + fg->fg_msgcount++; + if (fg->fg_msgcount == 1) { + fg->fg_lflags |= FG_INSMSGQ; + insertque=1; } + lck_mtx_unlock(&fg->fg_lock); + + if (insertque) { + lck_mtx_lock(uipc_lock); + LIST_INSERT_HEAD(&fmsghead, fg, f_msglist); + lck_mtx_unlock(uipc_lock); + lck_mtx_lock(&fg->fg_lock); + fg->fg_lflags &= ~FG_INSMSGQ; + if (fg->fg_lflags & FG_WINSMSGQ) { + fg->fg_lflags &= ~FG_WINSMSGQ; + wakeup(&fg->fg_lflags); + } + lck_mtx_unlock(&fg->fg_lock); + } + +} - if ((count = frele_internal(fp)) == 0) { - /* some one closed the fd while we were blocked */ - (void)closef_finish(fp, current_proc()); +void +fg_removeuipc(struct fileglob * fg) +{ +int removeque = 0; + + lck_mtx_lock(&fg->fg_lock); + while (fg->fg_lflags & FG_INSMSGQ) { + fg->fg_lflags |= FG_WINSMSGQ; + msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", 0); + } + fg->fg_msgcount--; + if (fg->fg_msgcount == 0) { + fg->fg_lflags |= FG_RMMSGQ; + removeque=1; } - return(count); + lck_mtx_unlock(&fg->fg_lock); + + if (removeque) { + lck_mtx_lock(uipc_lock); + LIST_REMOVE(fg, f_msglist); + lck_mtx_unlock(uipc_lock); + lck_mtx_lock(&fg->fg_lock); + fg->fg_lflags &= ~FG_RMMSGQ; + if (fg->fg_lflags & FG_WRMMSGQ) { + fg->fg_lflags &= ~FG_WRMMSGQ; + wakeup(&fg->fg_lflags); + } + lck_mtx_unlock(&fg->fg_lock); + } +} + + +int +fo_read(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, int flags, struct proc *p) +{ + return ((*fp->f_ops->fo_read)(fp, uio, cred, flags, p)); +} + +int +fo_write(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, int flags, struct proc *p) +{ + return((*fp->f_ops->fo_write)(fp, uio, cred, flags, p)); +} + +int +fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, struct proc *p) +{ +int error; + + proc_fdunlock(p); + error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); + proc_fdlock(p); + return(error); +} + +int +fo_select(struct fileproc *fp, int which, void *wql, struct proc *p) +{ + return((*fp->f_ops->fo_select)(fp, which, wql, p)); +} + +int +fo_close(struct fileglob *fg, struct proc *p) +{ + return((*fg->fg_ops->fo_close)(fg, p)); } int -fcount(struct file *fp) +fo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p) { - if (fp->f_count == (short)0xffff) - panic("fcount: stale"); - return ((int)fp->f_count); + return ((*fp->f_ops->fo_kqfilter)(fp, kn, p)); } diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 55c2fab03..1bf948822 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,15 +48,17 @@ /* * @(#)kern_event.c 1.0 (3/31/2000) */ +#include #include #include #include #include -#include +#include +#include #include #include -#include +#include #include #include #include @@ -68,26 +70,43 @@ #include #include #include - +#include +#include +#include + +#include +#include +#include +#include #include +#include + +#include + +extern void unix_syscall_return(int); MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); -static int kqueue_scan(struct file *fp, int maxevents, - struct kevent *ulistp, const struct timespec *timeout, - register_t *retval, struct proc *p); -static void kqueue_wakeup(struct kqueue *kq); +static inline void kqlock(struct kqueue *kq); +static inline void kqunlock(struct kqueue *kq); + +static int kqlock2knoteuse(struct kqueue *kq, struct knote *kn); +static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn); +static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn); +static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn); -static int kqueue_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int kqueue_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int kqueue_ioctl __P((struct file *fp, u_long com, caddr_t data, - struct proc *p)); -static int kqueue_select __P((struct file *fp, int which, void *wql, - struct proc *p)); -static int kqueue_close __P((struct file *fp, struct proc *p)); -static int kqueue_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p)); +static void kqueue_wakeup(struct kqueue *kq); +static int kqueue_read(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int kqueue_write(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, + struct proc *p); +static int kqueue_select(struct fileproc *fp, int which, void *wql, + struct proc *p); +static int kqueue_close(struct fileglob *fp, struct proc *p); +static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); +extern int kqueue_stat(struct fileproc *fp, struct stat *st, struct proc *p); static struct fileops kqueueops = { kqueue_read, @@ -95,15 +114,28 @@ static struct fileops kqueueops = { kqueue_ioctl, kqueue_select, kqueue_close, - kqueue_kqfilter + kqueue_kqfilter, + 0 }; -static void knote_fdpattach(struct knote *kn, struct filedesc *fdp); +static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p); +static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p); + +static int kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data); +static void kevent_continue(struct kqueue *kq, void *data, int error); +static void kevent_scan_continue(void *contp, wait_result_t wait_result); +static int kevent_process(struct kqueue *kq, kevent_callback_t callback, + void *data, int *countp, struct proc *p); +static void knote_put(struct knote *kn); +static int knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p); static void knote_drop(struct knote *kn, struct proc *p); +static void knote_activate(struct knote *kn); +static void knote_deactivate(struct knote *kn); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); static struct knote *knote_alloc(void); static void knote_free(struct knote *kn); +extern void knote_init(void); static int filt_fileattach(struct knote *kn); static struct filterops file_filtops = @@ -115,7 +147,7 @@ static struct filterops kqread_filtops = { 1, NULL, filt_kqdetach, filt_kqueue }; /* - * JMM - placeholder for not-yet-implemented filters + * placeholder for not-yet-implemented filters */ static int filt_badattach(struct knote *kn); static struct filterops bad_filtops = @@ -132,9 +164,10 @@ extern struct filterops fs_filtops; extern struct filterops sig_filtops; -#if 0 -/* JMM - We don't implement these now */ -static void filt_timerexpire(void *knx); + +/* Timer filter */ +static int filt_timercompute(struct knote *kn, uint64_t *abs_time); +static void filt_timerexpire(void *knx, void *param1); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static int filt_timer(struct knote *kn, long hint); @@ -142,20 +175,21 @@ static int filt_timer(struct knote *kn, long hint); static struct filterops timer_filtops = { 0, filt_timerattach, filt_timerdetach, filt_timer }; -static int kq_ncallouts = 0; -static int kq_calloutmax = (4 * 1024); +/* to avoid arming timers that fire quicker than we can handle */ +static uint64_t filt_timerfloor = 0; -SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, - &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); -#endif /* 0 */ +static lck_mtx_t _filt_timerlock; +static void filt_timerlock(void); +static void filt_timerunlock(void); -static zone_t knote_zone; +/* + * Sentinel marker for a thread scanning through the list of + * active knotes. + */ +static struct filterops threadmarker_filtops = + { 0, filt_badattach, 0, 0 }; -#define KNOTE_ACTIVATE(kn) do { \ - kn->kn_status |= KN_ACTIVE; \ - if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ - knote_enqueue(kn); \ -} while(0) +static zone_t knote_zone; #define KN_HASHSIZE 64 /* XXX should be tunable */ #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) @@ -178,15 +212,155 @@ static struct filterops *sysfilt_ops[] = { &file_filtops, /* EVFILT_VNODE */ &proc_filtops, /* EVFILT_PROC */ &sig_filtops, /* EVFILT_SIGNAL */ -#if 0 &timer_filtops, /* EVFILT_TIMER */ -#else - &bad_filtops, /* EVFILT_TIMER */ -#endif &bad_filtops, /* EVFILT_MACHPORT */ - &fs_filtops /* EVFILT_FS */ + &fs_filtops /* EVFILT_FS */ }; +/* + * kqueue/note lock attributes and implementations + * + * kqueues have locks, while knotes have use counts + * Most of the knote state is guarded by the object lock. + * the knote "inuse" count and status use the kqueue lock. + */ +lck_grp_attr_t * kq_lck_grp_attr; +lck_grp_t * kq_lck_grp; +lck_attr_t * kq_lck_attr; + +static inline void +kqlock(struct kqueue *kq) +{ + lck_spin_lock(&kq->kq_lock); +} + +static inline void +kqunlock(struct kqueue *kq) +{ + lck_spin_unlock(&kq->kq_lock); +} + +/* + * Convert a kq lock to a knote use referece. + * + * If the knote is being dropped, we can't get + * a use reference, so just return with it + * still locked. + * + * - kq locked at entry + * - unlock on exit if we get the use reference + */ +static int +kqlock2knoteuse(struct kqueue *kq, struct knote *kn) +{ + if (kn->kn_status & KN_DROPPING) + return 0; + kn->kn_inuse++; + kqunlock(kq); + return 1; + } + +/* + * Convert a kq lock to a knote use referece. + * + * If the knote is being dropped, we can't get + * a use reference, so just return with it + * still locked. + * + * - kq locked at entry + * - kq always unlocked on exit + */ +static int +kqlock2knoteusewait(struct kqueue *kq, struct knote *kn) +{ + if (!kqlock2knoteuse(kq, kn)) { + kn->kn_status |= KN_DROPWAIT; + assert_wait(&kn->kn_status, THREAD_UNINT); + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + return 0; + } + return 1; + } + +/* + * Convert from a knote use reference back to kq lock. + * + * Drop a use reference and wake any waiters if + * this is the last one. + * + * The exit return indicates if the knote is + * still alive - but the kqueue lock is taken + * unconditionally. + */ +static int +knoteuse2kqlock(struct kqueue *kq, struct knote *kn) +{ + kqlock(kq); + if ((--kn->kn_inuse == 0) && + (kn->kn_status & KN_USEWAIT)) { + kn->kn_status &= ~KN_USEWAIT; + thread_wakeup(&kn->kn_inuse); + } + return ((kn->kn_status & KN_DROPPING) == 0); + } + +/* + * Convert a kq lock to a knote drop referece. + * + * If the knote is in use, wait for the use count + * to subside. We first mark our intention to drop + * it - keeping other users from "piling on." + * If we are too late, we have to wait for the + * other drop to complete. + * + * - kq locked at entry + * - always unlocked on exit. + * - caller can't hold any locks that would prevent + * the other dropper from completing. + */ +static int +kqlock2knotedrop(struct kqueue *kq, struct knote *kn) +{ + + if ((kn->kn_status & KN_DROPPING) == 0) { + kn->kn_status |= KN_DROPPING; + if (kn->kn_inuse > 0) { + kn->kn_status |= KN_USEWAIT; + assert_wait(&kn->kn_inuse, THREAD_UNINT); + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + } else + kqunlock(kq); + return 1; + } else { + kn->kn_status |= KN_DROPWAIT; + assert_wait(&kn->kn_status, THREAD_UNINT); + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + return 0; + } +} + +/* + * Release a knote use count reference. + */ +static void +knote_put(struct knote *kn) +{ + struct kqueue *kq = kn->kn_kq; + + kqlock(kq); + if ((--kn->kn_inuse == 0) && + (kn->kn_status & KN_USEWAIT)) { + kn->kn_status &= ~KN_USEWAIT; + thread_wakeup(&kn->kn_inuse); + } + kqunlock(kq); + } + + + static int filt_fileattach(struct knote *kn) { @@ -194,20 +368,27 @@ filt_fileattach(struct knote *kn) return (fo_kqfilter(kn->kn_fp, kn, current_proc())); } +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data + static void filt_kqdetach(struct knote *kn) { struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; - if (kq->kq_state & KQ_SEL) - return; - + kqlock(kq); KNOTE_DETACH(&kq->kq_sel.si_note, kn); + kqunlock(kq); } /*ARGSUSED*/ static int -filt_kqueue(struct knote *kn, long hint) +filt_kqueue(struct knote *kn, __unused long hint) { struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; @@ -219,21 +400,23 @@ static int filt_procattach(struct knote *kn) { struct proc *p; + int funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); p = pfind(kn->kn_id); - if (p == NULL) + if (p == NULL) { + thread_funnel_set(kernel_flock, funnel_state); return (ESRCH); - if (! PRISON_CHECK(current_proc(), p)) - return (EACCES); + } - kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ /* * internal flag indicating registration done by kernel */ if (kn->kn_flags & EV_FLAG1) { - kn->kn_data = kn->kn_sdata; /* ppid */ + kn->kn_data = (int)kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; kn->kn_flags &= ~EV_FLAG1; } @@ -241,6 +424,8 @@ filt_procattach(struct knote *kn) /* XXX lock the proc here while adding to the list? */ KNOTE_ATTACH(&p->p_klist, kn); + thread_funnel_set(kernel_flock, funnel_state); + return (0); } @@ -255,19 +440,25 @@ filt_procattach(struct knote *kn) static void filt_procdetach(struct knote *kn) { - struct proc *p = kn->kn_ptr.p_proc; + struct proc *p; + int funnel_state; - if (kn->kn_status & KN_DETACHED) - return; + funnel_state = thread_funnel_set(kernel_flock, TRUE); + p = pfind(kn->kn_id); + + if (p != (struct proc *)NULL) + KNOTE_DETACH(&p->p_klist, kn); - /* XXX locking? this might modify another process. */ - KNOTE_DETACH(&p->p_klist, kn); + thread_funnel_set(kernel_flock, funnel_state); } static int filt_proc(struct knote *kn, long hint) { u_int event; + int funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); /* * mask off extra data @@ -284,8 +475,8 @@ filt_proc(struct knote *kn, long hint) * process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { - kn->kn_status |= KN_DETACHED; kn->kn_flags |= (EV_EOF | EV_ONESHOT); + thread_funnel_set(kernel_flock, funnel_state); return (1); } @@ -307,240 +498,654 @@ filt_proc(struct knote *kn, long hint) kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata; /* preserve udata */ - error = kqueue_register(kn->kn_kq, &kev, NULL); + error = kevent_register(kn->kn_kq, &kev, NULL); if (error) kn->kn_fflags |= NOTE_TRACKERR; } + event = kn->kn_fflags; + thread_funnel_set(kernel_flock, funnel_state); - return (kn->kn_fflags != 0); + return (event != 0); } -#if 0 +/* + * filt_timercompute - compute absolute timeout + * + * The saved-data field in the knote contains the + * time value. The saved filter-flags indicates + * the unit of measurement. + * + * If the timeout is not absolute, adjust it for + * the current time. + */ +static int +filt_timercompute(struct knote *kn, uint64_t *abs_time) +{ + uint64_t multiplier; + uint64_t raw; + + switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) { + case NOTE_SECONDS: + multiplier = NSEC_PER_SEC; + break; + case NOTE_USECONDS: + multiplier = NSEC_PER_USEC; + break; + case NOTE_NSECONDS: + multiplier = 1; + break; + case 0: /* milliseconds (default) */ + multiplier = NSEC_PER_SEC / 1000; + break; + default: + return EINVAL; + } + nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw); + if (raw <= filt_timerfloor) { + *abs_time = 0; + return 0; + } + if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) { + uint32_t seconds, nanoseconds; + uint64_t now; + + clock_get_calendar_nanotime(&seconds, &nanoseconds); + nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds, + &now); + if (now >= raw + filt_timerfloor) { + *abs_time = 0; + return 0; + } + raw -= now; + } + clock_absolutetime_interval_to_deadline(raw, abs_time); + return 0; +} + +/* + * filt_timerexpire - the timer callout routine + * + * Just propagate the timer event into the knote + * filter routine (by going through the knote + * synchronization point). Pass a hint to + * indicate this is a real event, not just a + * query from above. + */ static void -filt_timerexpire(void *knx) +filt_timerexpire(void *knx, __unused void *spare) { + struct klist timer_list; struct knote *kn = knx; - struct callout *calloutp; - struct timeval tv; - int tticks; - - kn->kn_data++; - KNOTE_ACTIVATE(kn); - - if ((kn->kn_flags & EV_ONESHOT) == 0) { - tv.tv_sec = kn->kn_sdata / 1000; - tv.tv_usec = (kn->kn_sdata % 1000) * 1000; - tticks = tvtohz(&tv); - calloutp = (struct callout *)kn->kn_hook; - callout_reset(calloutp, tticks, filt_timerexpire, kn); - } + + /* no "object" for timers, so fake a list */ + SLIST_INIT(&timer_list); + SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext); + KNOTE(&timer_list, 1); } /* - * data contains amount of time to sleep, in milliseconds + * data contains amount of time to sleep, in milliseconds, + * or a pointer to a timespec structure. */ static int filt_timerattach(struct knote *kn) { - struct callout *calloutp; - struct timeval tv; - int tticks; + thread_call_t callout; + uint64_t deadline; + int error; - if (kq_ncallouts >= kq_calloutmax) - return (ENOMEM); - kq_ncallouts++; + error = filt_timercompute(kn, &deadline); + if (error) + return (error); - tv.tv_sec = kn->kn_sdata / 1000; - tv.tv_usec = (kn->kn_sdata % 1000) * 1000; - tticks = tvtohz(&tv); + if (deadline) { + callout = thread_call_allocate(filt_timerexpire, kn); + if (NULL == callout) + return (ENOMEM); + } else { + /* handle as immediate */ + kn->kn_sdata = 0; + callout = NULL; + } - kn->kn_flags |= EV_CLEAR; /* automatically set */ - MALLOC(calloutp, struct callout *, sizeof(*calloutp), - M_KQUEUE, M_WAITOK); - callout_init(calloutp); - callout_reset(calloutp, tticks, filt_timerexpire, kn); - kn->kn_hook = (caddr_t)calloutp; + filt_timerlock(); + kn->kn_hook = (caddr_t)callout; + /* absolute=EV_ONESHOT */ + if (kn->kn_sfflags & NOTE_ABSOLUTE) + kn->kn_flags |= EV_ONESHOT; + + if (deadline) { + /* all others - if not faking immediate */ + kn->kn_flags |= EV_CLEAR; + thread_call_enter_delayed(callout, deadline); + kn->kn_hookid = 0; + } else { + /* fake immediate */ + kn->kn_hookid = 1; + } + filt_timerunlock(); return (0); } static void filt_timerdetach(struct knote *kn) { - struct callout *calloutp; - - calloutp = (struct callout *)kn->kn_hook; - callout_stop(calloutp); - FREE(calloutp, M_KQUEUE); - kq_ncallouts--; + thread_call_t callout; + + filt_timerlock(); + callout = (thread_call_t)kn->kn_hook; + if (callout != NULL) { + boolean_t cancelled; + + /* cancel the callout if we can */ + cancelled = thread_call_cancel(callout); + if (cancelled) { + /* got it, just free it */ + kn->kn_hook = NULL; + filt_timerunlock(); + thread_call_free(callout); + return; + } + /* we have to wait for the expire routine. */ + kn->kn_hookid = -1; /* we are detaching */ + assert_wait(&kn->kn_hook, THREAD_UNINT); + filt_timerunlock(); + thread_block(THREAD_CONTINUE_NULL); + assert(kn->kn_hook == NULL); + return; + } + /* nothing to do */ + filt_timerunlock(); } + + static int -filt_timer(struct knote *kn, long hint) +filt_timer(struct knote *kn, __unused long hint) { + int result; + + if (hint) { + /* real timer pop */ + thread_call_t callout; + boolean_t detaching; + + filt_timerlock(); + + kn->kn_data++; + + detaching = (kn->kn_hookid < 0); + callout = (thread_call_t)kn->kn_hook; - return (kn->kn_data != 0); + if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) { + uint64_t deadline; + int error; + + /* user input data may have changed - deal */ + error = filt_timercompute(kn, &deadline); + if (error) { + kn->kn_flags |= EV_ERROR; + kn->kn_data = error; + } else if (deadline == 0) { + /* revert to fake immediate */ + kn->kn_flags &= ~EV_CLEAR; + kn->kn_sdata = 0; + kn->kn_hookid = 1; + } else { + /* keep the callout and re-arm */ + thread_call_enter_delayed(callout, deadline); + filt_timerunlock(); + return 1; + } + } + kn->kn_hook = NULL; + filt_timerunlock(); + thread_call_free(callout); + + /* if someone is waiting for timer to pop */ + if (detaching) + thread_wakeup(&kn->kn_hook); + + return 1; + } + + /* user-query */ + filt_timerlock(); + + /* change fake timer to real if needed */ + while (kn->kn_hookid > 0 && kn->kn_sdata > 0) { + int error; + + /* update the fake timer (make real) */ + kn->kn_hookid = 0; + kn->kn_data = 0; + filt_timerunlock(); + error = filt_timerattach(kn); + filt_timerlock(); + if (error) { + kn->kn_flags |= EV_ERROR; + kn->kn_data = error; + filt_timerunlock(); + return 1; + } + } + + /* if still fake, pretend it fired */ + if (kn->kn_hookid > 0) + kn->kn_data = 1; + + result = (kn->kn_data != 0); + filt_timerunlock(); + return result; +} + +static void +filt_timerlock(void) +{ + lck_mtx_lock(&_filt_timerlock); +} + +static void +filt_timerunlock(void) +{ + lck_mtx_unlock(&_filt_timerlock); } -#endif /* 0 */ /* * JMM - placeholder for not-yet-implemented filters */ static int -filt_badattach(struct knote *kn) +filt_badattach(__unused struct knote *kn) { - return(EOPNOTSUPP); + return(ENOTSUP); } -#ifndef _SYS_SYSPROTO_H_ -struct kqueue_args { - int dummy; -}; -#endif -int -kqueue(struct proc *p, struct kqueue_args *uap, register_t *retval) +struct kqueue * +kqueue_alloc(struct proc *p) +{ + struct filedesc *fdp = p->p_fd; + struct kqueue *kq; + + MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK); + if (kq != NULL) { + bzero(kq, sizeof(struct kqueue)); + lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); + TAILQ_INIT(&kq->kq_head); + TAILQ_INIT(&kq->kq_inprocess); + kq->kq_fdp = fdp; + } + + if (fdp->fd_knlistsize < 0) { + proc_fdlock(p); + if (fdp->fd_knlistsize < 0) + fdp->fd_knlistsize = 0; /* this process has had a kq */ + proc_fdunlock(p); + } + + return kq; +} + + +/* + * kqueue_dealloc - detach all knotes from a kqueue and free it + * + * We walk each list looking for knotes referencing this + * this kqueue. If we find one, we try to drop it. But + * if we fail to get a drop reference, that will wait + * until it is dropped. So, we can just restart again + * safe in the assumption that the list will eventually + * not contain any more references to this kqueue (either + * we dropped them all, or someone else did). + * + * Assumes no new events are being added to the kqueue. + * Nothing locked on entry or exit. + */ +void +kqueue_dealloc(struct kqueue *kq, struct proc *p) { struct filedesc *fdp = p->p_fd; + struct knote *kn; + int i; + + proc_fdlock(p); + for (i = 0; i < fdp->fd_knlistsize; i++) { + kn = SLIST_FIRST(&fdp->fd_knlist[i]); + while (kn != NULL) { + if (kq == kn->kn_kq) { + kqlock(kq); + proc_fdunlock(p); + /* drop it ourselves or wait */ + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } + proc_fdlock(p); + /* start over at beginning of list */ + kn = SLIST_FIRST(&fdp->fd_knlist[i]); + continue; + } + kn = SLIST_NEXT(kn, kn_link); + } + } + if (fdp->fd_knhashmask != 0) { + for (i = 0; i < (int)fdp->fd_knhashmask + 1; i++) { + kn = SLIST_FIRST(&fdp->fd_knhash[i]); + while (kn != NULL) { + if (kq == kn->kn_kq) { + kqlock(kq); + proc_fdunlock(p); + /* drop it ourselves or wait */ + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } + proc_fdlock(p); + /* start over at beginning of list */ + kn = SLIST_FIRST(&fdp->fd_knhash[i]); + continue; + } + kn = SLIST_NEXT(kn, kn_link); + } + } + } + proc_fdunlock(p); + lck_spin_destroy(&kq->kq_lock, kq_lck_grp); + FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE); +} + +int +kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval) +{ struct kqueue *kq; - struct file *fp; + struct fileproc *fp; int fd, error; error = falloc(p, &fp, &fd); - if (error) + if (error) { return (error); + } + + kq = kqueue_alloc(p); + if (kq == NULL) { + fp_free(p, fd, fp); + return (ENOMEM); + } + fp->f_flag = FREAD | FWRITE; fp->f_type = DTYPE_KQUEUE; fp->f_ops = &kqueueops; - kq = (struct kqueue *)_MALLOC(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); - TAILQ_INIT(&kq->kq_head); fp->f_data = (caddr_t)kq; + + proc_fdlock(p); + *fdflags(p, fd) &= ~UF_RESERVED; + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + *retval = fd; - if (fdp->fd_knlistsize < 0) - fdp->fd_knlistsize = 0; /* this process has a kq */ - kq->kq_fdp = fdp; return (error); } -#ifndef _SYS_SYSPROTO_H_ -struct kqueue_portset_np_args { - int fd; -}; -#endif int -kqueue_portset_np(struct proc *p, struct kqueue_portset_np_args *uap, register_t *retval) +kqueue_portset_np(__unused struct proc *p, + __unused struct kqueue_portset_np_args *uap, + __unused register_t *retval) { /* JMM - Placeholder for now */ - return (EOPNOTSUPP); + return (ENOTSUP); } -#ifndef _SYS_SYSPROTO_H_ -struct kqueue_from_portset_np_args { - int fd; -}; -#endif int -kqueue_from_portset_np(struct proc *p, struct kqueue_from_portset_np_args *uap, register_t *retval) +kqueue_from_portset_np(__unused struct proc *p, + __unused struct kqueue_from_portset_np_args *uap, + __unused register_t *retval) { /* JMM - Placeholder for now */ - return (EOPNOTSUPP); + return (ENOTSUP); } -#if !0 -/* JMM - We don't implement this yet */ -#define fhold(fp) -#define fdrop(fp, p) -#endif /* !0 */ - -#ifndef _SYS_SYSPROTO_H_ -struct kevent_args { - int fd; - const struct kevent *changelist; - int nchanges; - struct kevent *eventlist; - int nevents; - const struct timespec *timeout; -}; -#endif -int -kevent(struct proc *p, struct kevent_args *uap, register_t *retval) +static int +kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p) { - struct filedesc* fdp = p->p_fd; - struct kqueue *kq; - struct file *fp = NULL; - struct timespec ts; - int i, nerrors, error; + int advance; + int error; - if (uap->timeout != NULL) { - error = copyin((caddr_t)uap->timeout, (caddr_t)&ts, sizeof(ts)); + if (IS_64BIT_PROCESS(p)) { + struct user_kevent kev64; + + advance = sizeof(kev64); + error = copyin(*addrp, (caddr_t)&kev64, advance); if (error) - goto done; - uap->timeout = &ts; + return error; + kevp->ident = CAST_DOWN(uintptr_t, kev64.ident); + kevp->filter = kev64.filter; + kevp->flags = kev64.flags; + kevp->fflags = kev64.fflags; + kevp->data = CAST_DOWN(intptr_t, kev64.data); + kevp->udata = kev64.udata; + } else { + /* + * compensate for legacy in-kernel kevent layout + * where the udata field is alredy 64-bit. + */ + advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t); + error = copyin(*addrp, (caddr_t)kevp, advance); } + if (!error) + *addrp += advance; + return error; +} - if (((u_int)uap->fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL || - (fp->f_type != DTYPE_KQUEUE)) - return (EBADF); +static int +kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p) +{ + int advance; + int error; + + if (IS_64BIT_PROCESS(p)) { + struct user_kevent kev64; + + kev64.ident = (uint64_t) kevp->ident; + kev64.filter = kevp->filter; + kev64.flags = kevp->flags; + kev64.fflags = kevp->fflags; + kev64.data = (int64_t) kevp->data; + kev64.udata = kevp->udata; + advance = sizeof(kev64); + error = copyout((caddr_t)&kev64, *addrp, advance); + } else { + /* + * compensate for legacy in-kernel kevent layout + * where the udata field is alredy 64-bit. + */ + advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t); + error = copyout((caddr_t)kevp, *addrp, advance); + } + if (!error) + *addrp += advance; + return error; +} - fhold(fp); +/* + * kevent_continue - continue a kevent syscall after blocking + * + * assume we inherit a use count on the kq fileglob. + */ - kq = (struct kqueue *)fp->f_data; - nerrors = 0; +static void +kevent_continue(__unused struct kqueue *kq, void *data, int error) +{ + struct _kevent *cont_args; + struct fileproc *fp; + register_t *retval; + int noutputs; + int fd; + struct proc *p = current_proc(); + + cont_args = (struct _kevent *)data; + noutputs = cont_args->eventout; + retval = cont_args->retval; + fd = cont_args->fd; + fp = cont_args->fp; + + fp_drop(p, fd, fp, 0); + + /* don't restart after signals... */ + if (error == ERESTART) + error = EINTR; + else if (error == EWOULDBLOCK) + error = 0; + if (error == 0) + *retval = noutputs; + unix_syscall_return(error); +} - while (uap->nchanges > 0) { - int i; - int n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; - struct kevent kq_kev[n]; +/* + * kevent - [syscall] register and wait for kernel events + * + */ - error = copyin((caddr_t)uap->changelist, (caddr_t)kq_kev, - n * sizeof(struct kevent)); +int +kevent(struct proc *p, struct kevent_args *uap, register_t *retval) +{ + user_addr_t changelist = uap->changelist; + user_addr_t ueventlist = uap->eventlist; + int nchanges = uap->nchanges; + int nevents = uap->nevents; + int fd = uap->fd; + + struct _kevent *cont_args; + uthread_t ut; + struct kqueue *kq; + struct fileproc *fp; + struct kevent kev; + int error, noutputs; + struct timeval atv; + + /* convert timeout to absolute - if we have one */ + if (uap->timeout != USER_ADDR_NULL) { + struct timeval rtv; + if ( IS_64BIT_PROCESS(p) ) { + struct user_timespec ts; + error = copyin( uap->timeout, &ts, sizeof(ts) ); + if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0) + error = EINVAL; + else + TIMESPEC_TO_TIMEVAL(&rtv, &ts); + } else { + struct timespec ts; + error = copyin( uap->timeout, &ts, sizeof(ts) ); + TIMESPEC_TO_TIMEVAL(&rtv, &ts); + } if (error) - goto done; - for (i = 0; i < n; i++) { - struct kevent *kevp = &kq_kev[i]; + return error; + if (itimerfix(&rtv)) + return EINVAL; + getmicrouptime(&atv); + timevaladd(&atv, &rtv); + } else { + atv.tv_sec = 0; + atv.tv_usec = 0; + } - kevp->flags &= ~EV_SYSFLAGS; - error = kqueue_register(kq, kevp, p); - if (error) { - if (uap->nevents != 0) { - kevp->flags = EV_ERROR; - kevp->data = error; - (void) copyout((caddr_t)kevp, - (caddr_t)uap->eventlist, - sizeof(*kevp)); - uap->eventlist++; - uap->nevents--; - nerrors++; - } else { - goto done; - } - } + /* get a usecount for the kq itself */ + if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) + return(error); + + /* register all the change requests the user provided... */ + noutputs = 0; + while (nchanges > 0) { + error = kevent_copyin(&changelist, &kev, p); + if (error) + break; + + kev.flags &= ~EV_SYSFLAGS; + error = kevent_register(kq, &kev, p); + if (error) { + if (nevents == 0) + break; + kev.flags = EV_ERROR; + kev.data = error; + (void) kevent_copyout(&kev, &ueventlist, p); + nevents--; + noutputs++; } - uap->nchanges -= n; - uap->changelist += n; - } - if (nerrors) { - *retval = nerrors; - error = 0; - goto done; + nchanges--; } - error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, retval, p); -done: - if (fp != NULL) - fdrop(fp, p); - return (error); + /* store the continuation/completion data in the uthread */ + ut = (uthread_t)get_bsdthread_info(current_thread()); + cont_args = (struct _kevent *)&ut->uu_state.ss_kevent; + cont_args->fp = fp; + cont_args->fd = fd; + cont_args->retval = retval; + cont_args->eventlist = ueventlist; + cont_args->eventcount = nevents; + cont_args->eventout = noutputs; + + if (nevents > 0 && noutputs == 0 && error == 0) + error = kevent_scan(kq, kevent_callback, + kevent_continue, cont_args, + &atv, p); + kevent_continue(kq, cont_args, error); + /* NOTREACHED */ + return error; +} + + +/* + * kevent_callback - callback for each individual event + * + * called with nothing locked + * caller holds a reference on the kqueue + */ + +static int +kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) +{ + struct _kevent *cont_args; + int error; + + cont_args = (struct _kevent *)data; + assert(cont_args->eventout < cont_arg->eventcount); + + /* + * Copy out the appropriate amount of event data for this user. + */ + error = kevent_copyout(kevp, &cont_args->eventlist, current_proc()); + + /* + * If there isn't space for additional events, return + * a harmless error to stop the processing here + */ + if (error == 0 && ++cont_args->eventout == cont_args->eventcount) + error = EWOULDBLOCK; + return error; } +/* + * kevent_register - add a new event to a kqueue + * + * Creates a mapping between the event source and + * the kqueue via a knote data structure. + * + * Because many/most the event sources are file + * descriptor related, the knote is linked off + * the filedescriptor table for quick access. + * + * called with nothing locked + * caller holds a reference on the kqueue + */ + int -kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) +kevent_register(struct kqueue *kq, struct kevent *kev, struct proc *p) { struct filedesc *fdp = kq->kq_fdp; struct filterops *fops; - struct file *fp = NULL; + struct fileproc *fp = NULL; struct knote *kn = NULL; - int s, error = 0; + int error = 0; if (kev->filter < 0) { if (kev->filter + EVFILT_SYSCOUNT < 0) @@ -556,20 +1161,22 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) return (EINVAL); } - if (fops->f_isfd) { - /* validate descriptor */ - if ((u_int)kev->ident >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[kev->ident]) == NULL) - return (EBADF); - fhold(fp); + /* this iocount needs to be dropped if it is not registered */ + if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0) + return(error); - if (kev->ident < fdp->fd_knlistsize) { + restart: + proc_fdlock(p); + if (fops->f_isfd) { + /* fd-based knotes are linked off the fd table */ + if (kev->ident < (u_int)fdp->fd_knlistsize) { SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) if (kq == kn->kn_kq && kev->filter == kn->kn_filter) break; } } else { + /* hash non-fd knotes here too */ if (fdp->fd_knhashmask != 0) { struct klist *list; @@ -583,329 +1190,497 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) } } - if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { - error = ENOENT; - goto done; + /* + * kn now contains the matching knote, or NULL if no match + */ + if (kn == NULL) { + if ((kev->flags & (EV_ADD|EV_DELETE)) == EV_ADD) { + kn = knote_alloc(); + if (kn == NULL) { + proc_fdunlock(p); + error = ENOMEM; + goto done; + } + kn->kn_fp = fp; + kn->kn_kq = kq; + kn->kn_tq = &kq->kq_head; + kn->kn_fop = fops; + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + kev->fflags = 0; + kev->data = 0; + kn->kn_kevent = *kev; + kn->kn_inuse = 1; /* for f_attach() */ + kn->kn_status = 0; + + /* before anyone can find it */ + if (kev->flags & EV_DISABLE) + kn->kn_status |= KN_DISABLED; + + error = knote_fdpattach(kn, fdp, p); + proc_fdunlock(p); + + if (error) { + knote_free(kn); + goto done; + } + + /* + * apply reference count to knote structure, and + * do not release it at the end of this routine. + */ + fp = NULL; + + /* + * If the attach fails here, we can drop it knowing + * that nobody else has a reference to the knote. + */ + if ((error = fops->f_attach(kn)) != 0) { + knote_drop(kn, p); + goto done; + } + } else { + proc_fdunlock(p); + error = ENOENT; + goto done; + } + } else { + /* existing knote - get kqueue lock */ + kqlock(kq); + proc_fdunlock(p); + + if (kev->flags & EV_DELETE) { + knote_dequeue(kn); + kn->kn_status |= KN_DISABLED; + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } + goto done; + } + + /* update status flags for existing knote */ + if (kev->flags & EV_DISABLE) { + knote_dequeue(kn); + kn->kn_status |= KN_DISABLED; + } else if (kev->flags & EV_ENABLE) { + kn->kn_status &= ~KN_DISABLED; + if (kn->kn_status & KN_ACTIVE) + knote_enqueue(kn); + } + + /* + * If somebody is in the middle of dropping this + * knote - go find/insert a new one. But we have + * wait for this one to go away first. + */ + if (!kqlock2knoteusewait(kq, kn)) + /* kqueue unlocked */ + goto restart; + + /* + * The user may change some filter values after the + * initial EV_ADD, but doing so will not reset any + * filter which have already been triggered. + */ + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + kn->kn_kevent.udata = kev->udata; + } + + /* still have use ref on knote */ + if (kn->kn_fop->f_event(kn, 0)) { + if (knoteuse2kqlock(kq, kn)) + knote_activate(kn); + kqunlock(kq); + } else { + knote_put(kn); + } + +done: + if (fp != NULL) + fp_drop(p, kev->ident, fp, 0); + return (error); +} + +/* + * kevent_process - process the triggered events in a kqueue + * + * Walk the queued knotes and validate that they are + * really still triggered events by calling the filter + * routines (if necessary). Hold a use reference on + * the knote to avoid it being detached. For each event + * that is still considered triggered, invoke the + * callback routine provided. + * + * caller holds a reference on the kqueue. + * kqueue locked on entry and exit - but may be dropped + */ + +static int +kevent_process(struct kqueue *kq, + kevent_callback_t callback, + void *data, + int *countp, + struct proc *p) +{ + struct knote *kn; + struct kevent kev; + int nevents; + int error; + + restart: + if (kq->kq_count == 0) { + *countp = 0; + return 0; + } + + /* if someone else is processing the queue, wait */ + if (!TAILQ_EMPTY(&kq->kq_inprocess)) { + assert_wait(&kq->kq_inprocess, THREAD_UNINT); + kq->kq_state |= KQ_PROCWAIT; + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + kqlock(kq); + goto restart; } - /* - * kn now contains the matching knote, or NULL if no match - */ - if (kev->flags & EV_ADD) { + error = 0; + nevents = 0; + while (error == 0 && + (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { - if (kn == NULL) { - kn = knote_alloc(); - if (kn == NULL) { - error = ENOMEM; - goto done; - } - kn->kn_fp = fp; - kn->kn_kq = kq; - kn->kn_fop = fops; + /* + * move knote to the processed queue. + * this is also protected by the kq lock. + */ + assert(kn->kn_tq == &kq->kq_head); + TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); + kn->kn_tq = &kq->kq_inprocess; + TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe); - /* - * apply reference count to knote structure, and - * do not release it at the end of this routine. - */ - fp = NULL; + /* + * Non-EV_ONESHOT events must be re-validated. + * + * Convert our lock to a use-count and call the event's + * filter routine to update. + * + * If the event is dropping (or no longer valid), we + * already have it off the active queue, so just + * finish the job of deactivating it. + */ + if ((kn->kn_flags & EV_ONESHOT) == 0) { + int result; - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; - kev->fflags = 0; - kev->data = 0; - kn->kn_kevent = *kev; + if (kqlock2knoteuse(kq, kn)) { + + /* call the filter with just a ref */ + result = kn->kn_fop->f_event(kn, 0); - knote_fdpattach(kn, fdp); - if ((error = fops->f_attach(kn)) != 0) { + if (!knoteuse2kqlock(kq, kn) || result == 0) { + knote_deactivate(kn); + continue; + } + } else { + knote_deactivate(kn); + continue; + } + } + + /* + * Got a valid triggered knote with the kqueue + * still locked. Snapshot the data, and determine + * how to dispatch the knote for future events. + */ + kev = kn->kn_kevent; + + /* now what happens to it? */ + if (kn->kn_flags & EV_ONESHOT) { + knote_deactivate(kn); + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); knote_drop(kn, p); - goto done; } + } else if (kn->kn_flags & EV_CLEAR) { + knote_deactivate(kn); + kn->kn_data = 0; + kn->kn_fflags = 0; + kqunlock(kq); } else { /* - * The user may change some filter values after the - * initial EV_ADD, but doing so will not reset any - * filter which have already been triggered. + * leave on in-process queue. We'll + * move all the remaining ones back + * the kq queue and wakeup any + * waiters when we are done. */ - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; - kn->kn_kevent.udata = kev->udata; + kqunlock(kq); } - s = splhigh(); - if (kn->kn_fop->f_event(kn, 0)) - KNOTE_ACTIVATE(kn); - splx(s); + /* callback to handle each event as we find it */ + error = (callback)(kq, &kev, data); + nevents++; - } else if (kev->flags & EV_DELETE) { - kn->kn_fop->f_detach(kn); - knote_drop(kn, p); - goto done; + kqlock(kq); } - if ((kev->flags & EV_DISABLE) && - ((kn->kn_status & KN_DISABLED) == 0)) { - s = splhigh(); - kn->kn_status |= KN_DISABLED; - splx(s); + /* + * With the kqueue still locked, move any knotes + * remaining on the in-process queue back to the + * kq's queue and wake up any waiters. + */ + while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) { + assert(kn->kn_tq == &kq->kq_inprocess); + TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe); + kn->kn_tq = &kq->kq_head; + TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } - - if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { - s = splhigh(); - kn->kn_status &= ~KN_DISABLED; - if ((kn->kn_status & KN_ACTIVE) && - ((kn->kn_status & KN_QUEUED) == 0)) - knote_enqueue(kn); - splx(s); + if (kq->kq_state & KQ_PROCWAIT) { + kq->kq_state &= ~KQ_PROCWAIT; + thread_wakeup(&kq->kq_inprocess); } -done: - if (fp != NULL) - fdrop(fp, p); - return (error); + *countp = nevents; + return error; } -static int -kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, - const struct timespec *tsp, register_t *retval, struct proc *p) + +static void +kevent_scan_continue(void *data, wait_result_t wait_result) { - struct kqueue *kq = (struct kqueue *)fp->f_data; - struct timeval atv, rtv, ttv; - int s, count, timeout, error = 0; - struct knote marker; - - count = maxevents; - if (count == 0) - goto done; - - if (tsp != NULL) { - TIMESPEC_TO_TIMEVAL(&atv, tsp); - if (itimerfix(&atv)) { - error = EINVAL; - goto done; + uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); + struct _kevent_scan * cont_args = &ut->uu_state.ss_kevent_scan; + struct kqueue *kq = (struct kqueue *)data; + int error; + int count; + + /* convert the (previous) wait_result to a proper error */ + switch (wait_result) { + case THREAD_AWAKENED: + kqlock(kq); + error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc()); + if (error == 0 && count == 0) { + assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline); + kq->kq_state |= KQ_SLEEP; + kqunlock(kq); + thread_block_parameter(kevent_scan_continue, kq); + /* NOTREACHED */ } - if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) - timeout = -1; - else - timeout = atv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&atv); - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); - } else { - atv.tv_sec = 0; - atv.tv_usec = 0; - timeout = 0; + kqunlock(kq); + break; + case THREAD_TIMED_OUT: + error = EWOULDBLOCK; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result); + error = 0; } - goto start; + + /* call the continuation with the results */ + assert(cont_args->cont != NULL); + (cont_args->cont)(kq, cont_args->data, error); +} -retry: - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) - goto done; - ttv = atv; - timevalsub(&ttv, &rtv); - timeout = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); - } -start: - s = splhigh(); - if (kq->kq_count == 0) { - if (timeout < 0) { - error = EWOULDBLOCK; - } else { - kq->kq_state |= KQ_SLEEP; - error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); - } - splx(s); - if (error == 0) - goto retry; - /* don't restart after signals... */ - if (error == ERESTART) - error = EINTR; - else if (error == EWOULDBLOCK) - error = 0; - goto done; - } +/* + * kevent_scan - scan and wait for events in a kqueue + * + * Process the triggered events in a kqueue. + * + * If there are no events triggered arrange to + * wait for them. If the caller provided a + * continuation routine, then kevent_scan will + * also. + * + * The callback routine must be valid. + * The caller must hold a use-count reference on the kq. + */ - /* JMM - This marker trick doesn't work with multiple threads */ - TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); - while (count) { - int maxkev = (count > KQ_NEVENTS) ? KQ_NEVENTS : count; - struct kevent kq_kev[maxkev]; - struct kevent *kevp = kq_kev; - struct knote *kn; - int nkev = 0; - - while (nkev < maxkev) { - kn = TAILQ_FIRST(&kq->kq_head); - TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); - if (kn == &marker) { - if (count == maxevents) - goto retry; - break; - } else if (kn->kn_status & KN_DISABLED) { - kn->kn_status &= ~KN_QUEUED; - kq->kq_count--; - continue; - } else if ((kn->kn_flags & EV_ONESHOT) == 0 && - kn->kn_fop->f_event(kn, 0) == 0) { - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); - kq->kq_count--; - continue; - } +int +kevent_scan(struct kqueue *kq, + kevent_callback_t callback, + kevent_continue_t continuation, + void *data, + struct timeval *atvp, + struct proc *p) +{ + thread_continue_t cont = THREAD_CONTINUE_NULL; + uint64_t deadline; + int error; + int first; - *kevp = kn->kn_kevent; - kevp++; - nkev++; - count--; + assert(callback != NULL); - if (kn->kn_flags & EV_ONESHOT) { - kn->kn_status &= ~KN_QUEUED; - kq->kq_count--; - splx(s); - kn->kn_fop->f_detach(kn); - knote_drop(kn, p); - s = splhigh(); - } else if (kn->kn_flags & EV_CLEAR) { - kn->kn_data = 0; - kn->kn_fflags = 0; - kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); - kq->kq_count--; + first = 1; + for (;;) { + wait_result_t wait_result; + int count; + + /* + * Make a pass through the kq to find events already + * triggered. + */ + kqlock(kq); + error = kevent_process(kq, callback, data, &count, p); + if (error || count) + break; /* lock still held */ + + /* looks like we have to consider blocking */ + if (first) { + first = 0; + /* convert the timeout to a deadline once */ + if (atvp->tv_sec || atvp->tv_usec) { + uint32_t seconds, nanoseconds; + uint64_t now; + + clock_get_uptime(&now); + nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC + + atvp->tv_usec * NSEC_PER_USEC, + &deadline); + if (now >= deadline) { + /* non-blocking call */ + error = EWOULDBLOCK; + break; /* lock still held */ + } + deadline -= now; + clock_absolutetime_interval_to_deadline(deadline, &deadline); } else { - TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); + deadline = 0; /* block forever */ + } + + if (continuation) { + uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); + struct _kevent_scan *cont_args = &ut->uu_state.ss_kevent_scan; + + cont_args->call = callback; + cont_args->cont = continuation; + cont_args->deadline = deadline; + cont_args->data = data; + cont = kevent_scan_continue; } } - splx(s); - error = copyout((caddr_t)kq_kev, (caddr_t)ulistp, - sizeof(struct kevent) * nkev); - if (kn == &marker) - goto done; - ulistp += nkev; - s = splhigh(); - if (error) - break; + + /* go ahead and wait */ + assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline); + kq->kq_state |= KQ_SLEEP; + kqunlock(kq); + wait_result = thread_block_parameter(cont, kq); + /* NOTREACHED if (continuation != NULL) */ + + switch (wait_result) { + case THREAD_AWAKENED: + continue; + case THREAD_TIMED_OUT: + return EWOULDBLOCK; + case THREAD_INTERRUPTED: + return EINTR; + default: + panic("kevent_scan - bad wait_result (%d)", + wait_result); + error = 0; + } } - TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); - splx(s); -done: - *retval = maxevents - count; - return (error); + kqunlock(kq); + return error; } + /* * XXX * This could be expanded to call kqueue_scan, if desired. */ /*ARGSUSED*/ static int -kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, - int flags, struct proc *p) +kqueue_read(__unused struct fileproc *fp, + __unused struct uio *uio, + __unused kauth_cred_t cred, + __unused int flags, + __unused struct proc *p) { return (ENXIO); } /*ARGSUSED*/ static int -kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, - int flags, struct proc *p) +kqueue_write(__unused struct fileproc *fp, + __unused struct uio *uio, + __unused kauth_cred_t cred, + __unused int flags, + __unused struct proc *p) { return (ENXIO); } /*ARGSUSED*/ static int -kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) +kqueue_ioctl(__unused struct fileproc *fp, + __unused u_long com, + __unused caddr_t data, + __unused struct proc *p) { return (ENOTTY); } /*ARGSUSED*/ static int -kqueue_select(struct file *fp, int which, void *wql, struct proc *p) +kqueue_select(struct fileproc *fp, int which, void *wql, struct proc *p) { struct kqueue *kq = (struct kqueue *)fp->f_data; int retnum = 0; - int s = splnet(); if (which == FREAD) { + kqlock(kq); if (kq->kq_count) { retnum = 1; } else { - selrecord(p, &kq->kq_sel, wql); + selrecord(p, &kq->kq_sel, wql); kq->kq_state |= KQ_SEL; } + kqunlock(kq); } - splx(s); return (retnum); } +/* + * kqueue_close - + */ /*ARGSUSED*/ static int -kqueue_close(struct file *fp, struct proc *p) +kqueue_close(struct fileglob *fg, struct proc *p) { - struct kqueue *kq = (struct kqueue *)fp->f_data; - struct filedesc *fdp = p->p_fd; - struct knote **knp, *kn, *kn0; - int i; - - for (i = 0; i < fdp->fd_knlistsize; i++) { - knp = &SLIST_FIRST(&fdp->fd_knlist[i]); - kn = *knp; - while (kn != NULL) { - kn0 = SLIST_NEXT(kn, kn_link); - if (kq == kn->kn_kq) { - kn->kn_fop->f_detach(kn); - fdrop(kn->kn_fp, p); - knote_free(kn); - *knp = kn0; - } else { - knp = &SLIST_NEXT(kn, kn_link); - } - kn = kn0; - } - } - if (fdp->fd_knhashmask != 0) { - for (i = 0; i < fdp->fd_knhashmask + 1; i++) { - knp = &SLIST_FIRST(&fdp->fd_knhash[i]); - kn = *knp; - while (kn != NULL) { - kn0 = SLIST_NEXT(kn, kn_link); - if (kq == kn->kn_kq) { - kn->kn_fop->f_detach(kn); - /* XXX non-fd release of kn->kn_ptr */ - knote_free(kn); - *knp = kn0; - } else { - knp = &SLIST_NEXT(kn, kn_link); - } - kn = kn0; - } - } - } - _FREE(kq, M_KQUEUE); - fp->f_data = NULL; + struct kqueue *kq = (struct kqueue *)fg->fg_data; + kqueue_dealloc(kq, p); + fg->fg_data = NULL; return (0); } /*ARGSUSED*/ +/* + * The callers has taken a use-count reference on this kqueue and will donate it + * to the kqueue we are being added to. This keeps the kqueue from closing until + * that relationship is torn down. + */ static int -kqueue_kqfilter(struct file *fp, struct knote *kn, struct proc *p) +kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) { struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; - if (kn->kn_filter != EVFILT_READ || (kq->kq_state & KQ_SEL)) + if (kn->kn_filter != EVFILT_READ) return (1); kn->kn_fop = &kqread_filtops; + kqlock(kq); KNOTE_ATTACH(&kq->kq_sel.si_note, kn); + kqunlock(kq); return (0); } /*ARGSUSED*/ int -kqueue_stat(struct file *fp, struct stat *st, struct proc *p) +kqueue_stat(struct fileproc *fp, struct stat *st, __unused struct proc *p) { struct kqueue *kq = (struct kqueue *)fp->f_data; @@ -916,19 +1691,22 @@ kqueue_stat(struct file *fp, struct stat *st, struct proc *p) return (0); } +/* + * Called with the kqueue locked + */ static void kqueue_wakeup(struct kqueue *kq) { if (kq->kq_state & KQ_SLEEP) { kq->kq_state &= ~KQ_SLEEP; - wakeup(kq); + thread_wakeup(kq); } if (kq->kq_state & KQ_SEL) { - // kq->kq_state &= ~KQ_SEL; /* remove for now */ + kq->kq_state &= ~KQ_SEL; selwakeup(&kq->kq_sel); - } else - KNOTE(&kq->kq_sel.si_note, 0); + } + KNOTE(&kq->kq_sel.si_note, 0); } void @@ -937,21 +1715,46 @@ klist_init(struct klist *list) SLIST_INIT(list); } + /* - * walk down a list of knotes, activating them if their event has triggered. + * Query/Post each knote in the object's list + * + * The object lock protects the list. It is assumed + * that the filter/event routine for the object can + * determine that the object is already locked (via + * the hind) and not deadlock itself. + * + * The object lock should also hold off pending + * detach/drop operations. But we'll prevent it here + * too - just in case. */ void knote(struct klist *list, long hint) { struct knote *kn; - SLIST_FOREACH(kn, list, kn_selnext) - if (kn->kn_fop->f_event(kn, hint)) - KNOTE_ACTIVATE(kn); + SLIST_FOREACH(kn, list, kn_selnext) { + struct kqueue *kq = kn->kn_kq; + + kqlock(kq); + if (kqlock2knoteuse(kq, kn)) { + int result; + + /* call the event with only a use count */ + result = kn->kn_fop->f_event(kn, hint); + + /* if its not going away and triggered */ + if (knoteuse2kqlock(kq, kn) && result) + knote_activate(kn); + /* lock held again */ + } + kqunlock(kq); + } } /* * attach a knote to the specified list. Return true if this is the first entry. + * The list is protected by whatever lock the object it is associated with uses. */ int knote_attach(struct klist *list, struct knote *kn) @@ -963,6 +1766,7 @@ knote_attach(struct klist *list, struct knote *kn) /* * detach a knote from the specified list. Return true if that was the last entry. + * The list is protected by whatever lock the object it is associated with uses. */ int knote_detach(struct klist *list, struct knote *kn) @@ -972,67 +1776,88 @@ knote_detach(struct klist *list, struct knote *kn) } /* - * remove all knotes from a specified klist + * remove all knotes referencing a specified fd + * + * Essentially an inlined knote_remove & knote_drop + * when we know for sure that the thing is a file + * + * Entered with the proc_fd lock already held. + * It returns the same way, but may drop it temporarily. */ void -knote_remove(struct proc *p, struct klist *list) +knote_fdclose(struct proc *p, int fd) { + struct filedesc *fdp = p->p_fd; + struct klist *list; struct knote *kn; + list = &fdp->fd_knlist[fd]; while ((kn = SLIST_FIRST(list)) != NULL) { - kn->kn_fop->f_detach(kn); - knote_drop(kn, p); - } -} + struct kqueue *kq = kn->kn_kq; -/* - * remove all knotes referencing a specified fd - */ -void -knote_fdclose(struct proc *p, int fd) -{ - struct filedesc *fdp = p->p_fd; - struct klist *list = &fdp->fd_knlist[fd]; + kqlock(kq); + proc_fdunlock(p); + + /* + * Convert the lock to a drop ref. + * If we get it, go ahead and drop it. + * Otherwise, we waited for it to + * be dropped by the other guy, so + * it is safe to move on in the list. + */ + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } + + proc_fdlock(p); - knote_remove(p, list); + /* the fd tables may have changed - start over */ + list = &fdp->fd_knlist[fd]; + } } -static void -knote_fdpattach(struct knote *kn, struct filedesc *fdp) +/* proc_fdlock held on entry (and exit) */ +static int +knote_fdpattach(struct knote *kn, struct filedesc *fdp, __unused struct proc *p) { - struct klist *list; - int size; + struct klist *list = NULL; if (! kn->kn_fop->f_isfd) { if (fdp->fd_knhashmask == 0) fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, &fdp->fd_knhashmask); list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; - goto done; - } - - if (fdp->fd_knlistsize <= kn->kn_id) { - size = fdp->fd_knlistsize; - while (size <= kn->kn_id) - size += KQEXTENT; - MALLOC(list, struct klist *, - size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); - bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, - fdp->fd_knlistsize * sizeof(struct klist *)); - bzero((caddr_t)list + - fdp->fd_knlistsize * sizeof(struct klist *), - (size - fdp->fd_knlistsize) * sizeof(struct klist *)); - if (fdp->fd_knlist != NULL) + } else { + if ((u_int)fdp->fd_knlistsize <= kn->kn_id) { + u_int size = 0; + + /* have to grow the fd_knlist */ + size = fdp->fd_knlistsize; + while (size <= kn->kn_id) + size += KQEXTENT; + MALLOC(list, struct klist *, + size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); + if (list == NULL) + return (ENOMEM); + + bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, + fdp->fd_knlistsize * sizeof(struct klist *)); + bzero((caddr_t)list + + fdp->fd_knlistsize * sizeof(struct klist *), + (size - fdp->fd_knlistsize) * sizeof(struct klist *)); FREE(fdp->fd_knlist, M_KQUEUE); - fdp->fd_knlistsize = size; - fdp->fd_knlist = list; + fdp->fd_knlist = list; + fdp->fd_knlistsize = size; + } + list = &fdp->fd_knlist[kn->kn_id]; } - list = &fdp->fd_knlist[kn->kn_id]; -done: SLIST_INSERT_HEAD(list, kn, kn_link); - kn->kn_status = 0; + return (0); } + + /* * should be called at spl == 0, since we don't want to hold spl * while calling fdrop and free. @@ -1041,55 +1866,97 @@ static void knote_drop(struct knote *kn, struct proc *p) { struct filedesc *fdp = p->p_fd; + struct kqueue *kq = kn->kn_kq; struct klist *list; + proc_fdlock(p); if (kn->kn_fop->f_isfd) list = &fdp->fd_knlist[kn->kn_id]; else list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; SLIST_REMOVE(list, kn, knote, kn_link); - if (kn->kn_status & KN_QUEUED) - knote_dequeue(kn); + kqlock(kq); + knote_dequeue(kn); + if (kn->kn_status & KN_DROPWAIT) + thread_wakeup(&kn->kn_status); + kqunlock(kq); + proc_fdunlock(p); + if (kn->kn_fop->f_isfd) - fdrop(kn->kn_fp, p); + fp_drop(p, kn->kn_id, kn->kn_fp, 0); + knote_free(kn); } +/* called with kqueue lock held */ +static void +knote_activate(struct knote *kn) +{ + struct kqueue *kq = kn->kn_kq; + + kn->kn_status |= KN_ACTIVE; + knote_enqueue(kn); + kqueue_wakeup(kq); + } + +/* called with kqueue lock held */ +static void +knote_deactivate(struct knote *kn) +{ + kn->kn_status &= ~KN_ACTIVE; + knote_dequeue(kn); +} +/* called with kqueue lock held */ static void knote_enqueue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; - int s = splhigh(); - KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); + if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { + struct kqtailq *tq = kn->kn_tq; - TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status |= KN_QUEUED; - kq->kq_count++; - splx(s); - kqueue_wakeup(kq); + TAILQ_INSERT_TAIL(tq, kn, kn_tqe); + kn->kn_status |= KN_QUEUED; + kq->kq_count++; + } } +/* called with kqueue lock held */ static void knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; - int s = splhigh(); - KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); + assert((kn->kn_status & KN_DISABLED) == 0); + if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) { + struct kqtailq *tq = kn->kn_tq; - TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~KN_QUEUED; - kq->kq_count--; - splx(s); + TAILQ_REMOVE(tq, kn, kn_tqe); + kn->kn_tq = &kq->kq_head; + kn->kn_status &= ~KN_QUEUED; + kq->kq_count--; + } } void knote_init(void) { knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone"); + + /* allocate kq lock group attribute and group */ + kq_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(kq_lck_grp_attr); + + kq_lck_grp = lck_grp_alloc_init("kqueue", kq_lck_grp_attr); + + /* Allocate kq lock attribute */ + kq_lck_attr = lck_attr_alloc_init(); + lck_attr_setdefault(kq_lck_attr); + + /* Initialize the timer filter lock */ + lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr); } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) @@ -1102,7 +1969,7 @@ knote_alloc(void) static void knote_free(struct knote *kn) { - zfree(knote_zone, (vm_offset_t)kn); + zfree(knote_zone, kn); } #include @@ -1116,8 +1983,17 @@ knote_free(struct knote *kn) #include -int raw_usrreq(); -struct pr_usrreqs event_usrreqs; +static int kev_attach(struct socket *so, int proto, struct proc *p); +static int kev_detach(struct socket *so); +static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p); + +struct pr_usrreqs event_usrreqs = { + pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp, + pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp, + pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp, + pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp, + pru_sosend_notsupp, soreceive, pru_sopoll_notsupp +}; struct protosw eventsw[] = { { @@ -1125,7 +2001,14 @@ struct protosw eventsw[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &event_usrreqs +#if __APPLE__ + 0, +#endif + &event_usrreqs, + 0, 0, 0, +#if __APPLE__ + {0, 0}, 0, {0} +#endif } }; @@ -1133,7 +2016,12 @@ static struct kern_event_head kern_event_head; static u_long static_event_id = 0; +struct domain *sysdom = &systemdomain; +static lck_grp_t *evt_mtx_grp; +static lck_attr_t *evt_mtx_attr; +static lck_grp_attr_t *evt_mtx_grp_attr; +lck_mtx_t *evt_mutex; /* * Install the protosw's for the NKE manager. Invoked at * extension load time @@ -1143,14 +2031,32 @@ kern_event_init(void) { int retval; - if ((retval = net_add_proto(eventsw, &systemdomain)) == 0) - return(KERN_SUCCESS); + if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) { + log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval); + return(retval); + } - log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval); - return(retval); + /* + * allocate lock group attribute and group for kern event + */ + evt_mtx_grp_attr = lck_grp_attr_alloc_init(); + + evt_mtx_grp = lck_grp_alloc_init("eventlist", evt_mtx_grp_attr); + + /* + * allocate the lock attribute for mutexes + */ + evt_mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(evt_mtx_attr); + evt_mutex = lck_mtx_alloc_init(evt_mtx_grp, evt_mtx_attr); + if (evt_mutex == NULL) + return (ENOMEM); + + return(KERN_SUCCESS); } -int kev_attach(struct socket *so, int proto, struct proc *p) +static int +kev_attach(struct socket *so, __unused int proto, __unused struct proc *p) { int error; struct kern_event_pcb *ev_pcb; @@ -1159,7 +2065,7 @@ int kev_attach(struct socket *so, int proto, struct proc *p) if (error) return error; - ev_pcb = _MALLOC(sizeof(struct kern_event_pcb), M_PCB, M_WAITOK); + MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK); if (ev_pcb == 0) return ENOBUFS; @@ -1167,25 +2073,69 @@ int kev_attach(struct socket *so, int proto, struct proc *p) ev_pcb->vendor_code_filter = 0xffffffff; so->so_pcb = (caddr_t) ev_pcb; + lck_mtx_lock(evt_mutex); LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link); + lck_mtx_unlock(evt_mutex); return 0; } -int kev_detach(struct socket *so) +static int +kev_detach(struct socket *so) { struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb; if (ev_pcb != 0) { - LIST_REMOVE(ev_pcb, ev_link); - FREE(ev_pcb, M_PCB); - so->so_pcb = 0; + lck_mtx_lock(evt_mutex); + LIST_REMOVE(ev_pcb, ev_link); + lck_mtx_unlock(evt_mutex); + FREE(ev_pcb, M_PCB); + so->so_pcb = 0; + so->so_flags |= SOF_PCBCLEARING; } return 0; } +/* + * For now, kev_vender_code and mbuf_tags use the same + * mechanism. + */ +extern errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id, + int create); + +errno_t kev_vendor_code_find( + const char *string, + u_long *out_vender_code) +{ + if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) { + return EINVAL; + } + return mbuf_tag_id_find_internal(string, out_vender_code, 1); +} + +extern void mbuf_tag_id_first_last(u_long *first, u_long *last); + +errno_t kev_msg_post(struct kev_msg *event_msg) +{ + u_long min_vendor, max_vendor; + + mbuf_tag_id_first_last(&min_vendor, &max_vendor); + + if (event_msg == NULL) + return EINVAL; + + /* Limit third parties to posting events for registered vendor codes only */ + if (event_msg->vendor_code < min_vendor || + event_msg->vendor_code > max_vendor) + { + return EINVAL; + } + + return kev_post_msg(event_msg); +} + int kev_post_msg(struct kev_msg *event_msg) { @@ -1193,9 +2143,21 @@ int kev_post_msg(struct kev_msg *event_msg) struct kern_event_pcb *ev_pcb; struct kern_event_msg *ev; char *tmp; - int total_size; + unsigned long total_size; int i; + /* Verify the message is small enough to fit in one mbuf w/o cluster */ + total_size = KEV_MSG_HEADER_SIZE; + + for (i = 0; i < 5; i++) { + if (event_msg->dv[i].data_length == 0) + break; + total_size += event_msg->dv[i].data_length; + } + + if (total_size > MLEN) { + return EMSGSIZE; + } m = m_get(M_DONTWAIT, MT_DATA); if (m == 0) @@ -1215,7 +2177,6 @@ int kev_post_msg(struct kev_msg *event_msg) tmp += event_msg->dv[i].data_length; } - ev->id = ++static_event_id; ev->total_size = total_size; ev->vendor_code = event_msg->vendor_code; @@ -1224,6 +2185,7 @@ int kev_post_msg(struct kev_msg *event_msg) ev->event_code = event_msg->event_code; m->m_len = total_size; + lck_mtx_lock(evt_mutex); for (ev_pcb = LIST_FIRST(&kern_event_head); ev_pcb; ev_pcb = LIST_NEXT(ev_pcb, ev_link)) { @@ -1245,67 +2207,69 @@ int kev_post_msg(struct kev_msg *event_msg) m2 = m_copym(m, 0, m->m_len, M_NOWAIT); if (m2 == 0) { m_free(m); + lck_mtx_unlock(evt_mutex); return ENOBUFS; } - - sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2); - sorwakeup(ev_pcb->ev_socket); + socket_lock(ev_pcb->ev_socket, 1); + if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2)) + sorwakeup(ev_pcb->ev_socket); + socket_unlock(ev_pcb->ev_socket, 1); } - m_free(m); + lck_mtx_unlock(evt_mutex); return 0; } - -int kev_control(so, cmd, data, ifp, p) - struct socket *so; - u_long cmd; - caddr_t data; - register struct ifnet *ifp; - struct proc *p; +static int +kev_control(struct socket *so, + u_long cmd, + caddr_t data, + __unused struct ifnet *ifp, + __unused struct proc *p) { - struct kev_request *kev_req = (struct kev_request *) data; - int stat = 0; - struct kern_event_pcb *ev_pcb; - u_long *id_value = (u_long *) data; - - - switch (cmd) { - - case SIOCGKEVID: - *id_value = static_event_id; - break; - - case SIOCSKEVFILT: - ev_pcb = (struct kern_event_pcb *) so->so_pcb; - ev_pcb->vendor_code_filter = kev_req->vendor_code; - ev_pcb->class_filter = kev_req->kev_class; - ev_pcb->subclass_filter = kev_req->kev_subclass; - break; - - case SIOCGKEVFILT: - ev_pcb = (struct kern_event_pcb *) so->so_pcb; - kev_req->vendor_code = ev_pcb->vendor_code_filter; - kev_req->kev_class = ev_pcb->class_filter; - kev_req->kev_subclass = ev_pcb->subclass_filter; - break; - - default: - return EOPNOTSUPP; - } - - return 0; + struct kev_request *kev_req = (struct kev_request *) data; + struct kern_event_pcb *ev_pcb; + struct kev_vendor_code *kev_vendor; + u_long *id_value = (u_long *) data; + + + switch (cmd) { + + case SIOCGKEVID: + *id_value = static_event_id; + break; + + case SIOCSKEVFILT: + ev_pcb = (struct kern_event_pcb *) so->so_pcb; + ev_pcb->vendor_code_filter = kev_req->vendor_code; + ev_pcb->class_filter = kev_req->kev_class; + ev_pcb->subclass_filter = kev_req->kev_subclass; + break; + + case SIOCGKEVFILT: + ev_pcb = (struct kern_event_pcb *) so->so_pcb; + kev_req->vendor_code = ev_pcb->vendor_code_filter; + kev_req->kev_class = ev_pcb->class_filter; + kev_req->kev_subclass = ev_pcb->subclass_filter; + break; + + case SIOCGKEVVENDOR: + kev_vendor = (struct kev_vendor_code*)data; + + /* Make sure string is NULL terminated */ + kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0; + + return mbuf_tag_id_find_internal(kev_vendor->vendor_string, + &kev_vendor->vendor_code, 0); + + default: + return ENOTSUP; + } + + return 0; } -struct pr_usrreqs event_usrreqs = { - pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp, - pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp, - pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp, - pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp, - pru_sosend_notsupp, soreceive, sopoll -}; - diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 191e3f396..e3ed77f2c 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -74,37 +74,53 @@ #include #include #include -#include +#include +#include #include -#include #include #include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include #include #include #include #include +#include +#include /* shmexec() */ +#include /* ubc_map() */ #include +#include +#include +#include +#include +#include #include #include - -extern vm_map_t vm_map_switch(vm_map_t map); /* XXX */ - #include +#include +#include +#include #include +/* + * Mach things for which prototypes are unavailable from Mach headers + */ +void ipc_task_reset( + task_t task); + +extern struct savearea *get_user_regs(thread_t); + + #include #include - #include #include #include @@ -112,152 +128,723 @@ extern vm_map_t vm_map_switch(vm_map_t map); /* XXX */ #include #if KTRACE #include -#include #endif +#include + + +/* + * SIZE_MAXPTR The maximum size of a user space pointer, in bytes + * SIZE_IMG_STRSPACE The available string space, minus two pointers; we + * define it interms of the maximum, since we don't + * know the pointer size going in, until after we've + * parsed the executable image. + */ +#define SIZE_MAXPTR 8 /* 64 bits */ +#define SIZE_IMG_STRSPACE (NCARGS - 2 * SIZE_MAXPTR) int app_profile = 0; extern vm_map_t bsd_pageable_map; +extern struct fileops vnops; #define ROUND_PTR(type, addr) \ (type *)( ( (unsigned)(addr) + 16 - 1) \ & ~(16 - 1) ) +struct image_params; /* Forward */ +static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp); static int load_return_to_errno(load_return_t lrtn); -int execve(struct proc *p, struct execve_args *uap, register_t *retval); -static int execargs_alloc(vm_offset_t *addrp); -static int execargs_free(vm_offset_t addr); +static int execargs_alloc(struct image_params *imgp); +static int execargs_free(struct image_params *imgp); +static int exec_check_permissions(struct image_params *imgp); +static int exec_extract_strings(struct image_params *imgp); +static int exec_handle_sugid(struct image_params *imgp); static int sugid_scripts = 0; SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, ""); +static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack, + int customstack, struct proc *p); +static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); + +/* XXX forward; should be in headers, but can't be for one reason or another */ +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); +extern void vfork_return(thread_t th_act, + struct proc * p, + struct proc *p2, + register_t *retval); -int -execv(p, args, retval) - struct proc *p; - void *args; - int *retval; -{ - ((struct execve_args *)args)->envp = NULL; - return (execve(p, args, retval)); -} extern char classichandler[32]; -extern long classichandler_fsid; +extern uint32_t classichandler_fsid; extern long classichandler_fileid; + /* - * Helper routine to get rid of a loop in execve. Given a pointer to - * something for the arg list (which might be in kernel space or in user - * space), copy it into the kernel buffer at the currentWritePt. This code - * does the proper thing to get the data transferred. - * bytesWritten, currentWritePt, and bytesLeft are kept up-to-date. + * exec_add_string + * + * Add the requested string to the string space area. + * + * Parameters; struct image_params * image parameter block + * user_addr_t string to add to strings area + * uio_seg segment where string is located + * + * Returns: 0 Success + * !0 Failure errno from copyinstr() + * + * Implicit returns: + * (imgp->ip_strendp) updated location of next add, if any + * (imgp->ip_strspace) updated byte count of space remaining */ - -static int copyArgument(char *argument, int pointerInKernel, - int *bytesWritten,char **currentWritePt, - int *bytesLeft){ +static int +exec_add_string(struct image_params *imgp, user_addr_t str, /*uio_seg*/int seg) +{ int error = 0; + do { size_t len = 0; - if (*bytesLeft <= 0) { + if (imgp->ip_strspace <= 0) { error = E2BIG; break; } - if (pointerInKernel == UIO_SYSSPACE) { - error = copystr(argument, *currentWritePt, (unsigned)*bytesLeft, &len); + if (IS_UIO_SYS_SPACE(seg)) { + char *kstr = CAST_DOWN(char *,str); /* SAFE */ + error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len); } else { - /* - * pointer in kernel == UIO_USERSPACE - * Copy in from user space. - */ - error = copyinstr((caddr_t)argument, *currentWritePt, (unsigned)*bytesLeft, + error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace, &len); } - *currentWritePt += len; - *bytesWritten += len; - *bytesLeft -= len; + imgp->ip_strendp += len; + imgp->ip_strspace -= len; } while (error == ENAMETOOLONG); + return error; } -/* ARGSUSED */ -int -execve(p, uap, retval) - register struct proc *p; - register struct execve_args *uap; - register_t *retval; +/* + * exec_save_path + * + * To support new app package launching for Mac OS X, the dyld needs the + * first argument to execve() stored on the user stack. + * + * Save the executable path name at the top of the strings area and set + * the argument vector pointer to the location following that to indicate + * the start of the argument and environment tuples, setting the remaining + * string space count to the size of the string area minus the path length + * and a reserve for two pointers. + * + * Parameters; struct image_params * image parameter block + * char * path used to invoke program + * uio_seg segment where path is located + * + * Returns: int 0 Success + * !0 Failure: error number + * Implicit returns: + * (imgp->ip_strings) saved path + * (imgp->ip_strspace) space remaining in ip_strings + * (imgp->ip_argv) beginning of argument list + * (imgp->ip_strendp) start of remaining copy area + * + * Note: We have to do this before the initial namei() since in the + * path contains symbolic links, namei() will overwrite the + * original path buffer contents. If the last symbolic link + * resolved was a relative pathname, we would lose the original + * "path", which could be an absolute pathname. This might be + * unacceptable for dyld. + */ +static int +exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg) { - register struct ucred *cred = p->p_ucred; - register struct filedesc *fdp = p->p_fd; - int nc; - char *cp; - int na, ne, ucp, ap, cc; - unsigned len; - int executingInterpreter=0; - - int executingClassic=0; - char binaryWithClassicName[sizeof(p->p_comm)] = {0}; - char *execnamep; - struct vnode *vp; - struct vattr vattr; - struct vattr origvattr; - vm_offset_t execargs; - struct nameidata nd; - struct ps_strings ps; -#define SHSIZE 512 - /* Argument(s) to an interpreter. If we're executing a shell - * script, the name (#!/bin/csh) is allowed to be followed by - * arguments. cfarg holds these arguments. + int error; + size_t len; + char *kpath = CAST_DOWN(char *,path); /* SAFE */ + + imgp->ip_strendp = imgp->ip_strings; + imgp->ip_strspace = SIZE_IMG_STRSPACE; + + len = MIN(MAXPATHLEN, imgp->ip_strspace); + + switch( seg) { + case UIO_USERSPACE32: + case UIO_USERSPACE64: /* Same for copyin()... */ + error = copyinstr(path, imgp->ip_strings, len, &len); + break; + case UIO_SYSSPACE32: + error = copystr(kpath, imgp->ip_strings, len, &len); + break; + default: + error = EFAULT; + break; + } + + if (!error) { + imgp->ip_strendp += len; + imgp->ip_strspace -= len; + imgp->ip_argv = imgp->ip_strendp; + } + + return(error); +} + + + +/* + * exec_shell_imgact + * + * Image activator for interpreter scripts. If the image begins with the + * characters "#!", then it is an interpreter script. Verify that we are + * not already executing in Classic mode, and that the length of the script + * line indicating the interpreter is not in excess of the maximum allowed + * size. If this is the case, then break out the arguments, if any, which + * are separated by white space, and copy them into the argument save area + * as if they were provided on the command line before all other arguments. + * The line ends when we encounter a comment character ('#') or newline. + * + * Parameters; struct image_params * image parameter block + * + * Returns: -1 not an interpreter (keep looking) + * -3 Success: interpreter: relookup + * >0 Failure: interpreter: error number + * + * A return value other than -1 indicates subsequent image activators should + * not be given the opportunity to attempt to activate the image. + */ +static int +exec_shell_imgact(struct image_params *imgp) +{ + char *vdata = imgp->ip_vdata; + char *ihp; + char *line_endp; + char *interp; + + /* + * Make sure it's a shell script. If we've already redirected + * from an interpreted file once, don't do it again. + * + * Note: We disallow Classic, since the expectation is that we + * may run a Classic interpreter, but not an interpret a Classic + * image. This is consistent with historical behaviour. */ - char cfarg[SHSIZE]; - boolean_t is_fat; - kern_return_t ret; - struct mach_header *mach_header; - struct fat_header *fat_header; - struct fat_arch fat_arch; - load_return_t lret; - load_result_t load_result; + if (vdata[0] != '#' || + vdata[1] != '!' || + (imgp->ip_flags & IMGPF_INTERPRET) != 0) { + return (-1); + } + + + imgp->ip_flags |= IMGPF_INTERPRET; + + /* Check to see if SUGID scripts are permitted. If they aren't then + * clear the SUGID bits. + * imgp->ip_vattr is known to be valid. + */ + if (sugid_scripts == 0) { + imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID); + } + + /* Find the nominal end of the interpreter line */ + for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) { + if (ihp >= &vdata[IMG_SHSIZE]) + return (ENOEXEC); + } + + line_endp = ihp; + ihp = &vdata[2]; + /* Skip over leading spaces - until the interpreter name */ + while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t'))) + ihp++; + + /* + * Find the last non-whitespace character before the end of line or + * the beginning of a comment; this is our new end of line. + */ + for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--) + continue; + + /* Empty? */ + if (line_endp == ihp) + return (ENOEXEC); + + /* copy the interpreter name */ + interp = imgp->ip_interp_name; + while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) + *interp++ = *ihp++; + *interp = '\0'; + + exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name), + UIO_SYSSPACE32); + + ihp = &vdata[2]; + while (ihp < line_endp) { + /* Skip leading whitespace before each argument */ + while ((*ihp == ' ') || (*ihp == '\t')) + ihp++; + + if (ihp >= line_endp) + break; + + /* We have an argument; copy it */ + while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) { + *imgp->ip_strendp++ = *ihp++; + imgp->ip_strspace--; + } + *imgp->ip_strendp++ = 0; + imgp->ip_strspace--; + imgp->ip_argc++; + } + + return (-3); +} + + + +/* + * exec_fat_imgact + * + * Image activator for fat 1.0 binaries. If the binary is fat, then we + * need to select an image from it internally, and make that the image + * we are going to attempt to execute. At present, this consists of + * reloading the first page for the image with a first page from the + * offset location indicated by the fat header. + * + * Important: This image activator is byte order neutral. + * + * Note: If we find an encapsulated binary, we make no assertions + * about its validity; instead, we leave that up to a rescan + * for an activator to claim it, and, if it is claimed by one, + * that activator is responsible for determining validity. + */ +static int +exec_fat_imgact(struct image_params *imgp) +{ + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + kauth_cred_t cred = p->p_ucred; + struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata; + struct fat_arch fat_arch; + int resid, error; + load_return_t lret; + + /* Make sure it's a fat binary */ + if ((fat_header->magic != FAT_MAGIC) && + (fat_header->magic != FAT_CIGAM)) { + error = -1; + goto bad; + } + + /* Look up our preferred architecture in the fat file. */ + lret = fatfile_getarch_affinity(imgp->ip_vp, + (vm_offset_t)fat_header, + &fat_arch, + (p->p_flag & P_AFFINITY)); + if (lret != LOAD_SUCCESS) { + error = load_return_to_errno(lret); + goto bad; + } + + /* Read the Mach-O header out of it */ + error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, + PAGE_SIZE, fat_arch.offset, + UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED), + cred, &resid, p); + if (error) { + goto bad; + } + + /* Did we read a complete header? */ + if (resid) { + error = EBADEXEC; + goto bad; + } + + /* Success. Indicate we have identified an encapsulated binary */ + error = -2; + imgp->ip_arch_offset = (user_size_t)fat_arch.offset; + imgp->ip_arch_size = (user_size_t)fat_arch.size; + +bad: + return (error); +} + +/* + * exec_mach_imgact + * + * Image activator for mach-o 1.0 binaries. + * + * Important: This image activator is NOT byte order neutral. + */ +static int +exec_mach_imgact(struct image_params *imgp) +{ + struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata; + kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context); + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + int error = 0; + int vfexec = 0; + task_t task; + task_t new_task; + thread_t thread; struct uthread *uthread; - vm_map_t old_map; + vm_map_t old_map = VM_MAP_NULL; vm_map_t map; - int i; boolean_t clean_regions = FALSE; - shared_region_mapping_t shared_region = NULL; shared_region_mapping_t initial_region = NULL; + load_return_t lret; + load_result_t load_result; + + /* + * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference + * is a reserved field on the end, so for the most part, we can + * treat them as if they were identical. + */ + if ((mach_header->magic != MH_MAGIC) && + (mach_header->magic != MH_MAGIC_64)) { + error = -1; + goto bad; + } + + task = current_task(); + thread = current_thread(); + uthread = get_bsdthread_info(thread); + + if (uthread->uu_flag & UT_VFORK) + vfexec = 1; /* Mark in exec */ + + if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64) + imgp->ip_flags |= IMGPF_IS_64BIT; + + if (!grade_binary(mach_header->cputype, mach_header->cpusubtype)) { + error = EBADARCH; + goto bad; + } + + /* + * Copy in arguments/environment from the old process, if the + * vector is non-NULL (i.e. exec is not being called from + * load_init_program(), as a special case, at system startup). + */ + if (imgp->ip_user_argv != 0LL) { + error = exec_extract_strings(imgp); + if (error) + goto bad; + } + + /* + * Hack for binary compatability; put three NULs on the end of the + * string area, and round it up to the next word boundary. This + * ensures padding with NULs to the boundary. + */ + imgp->ip_strendp[0] = 0; + imgp->ip_strendp[1] = 0; + imgp->ip_strendp[2] = 0; + imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1)); + + + if (vfexec) { + kern_return_t result; + + result = task_create_internal(task, FALSE, &new_task); + if (result != KERN_SUCCESS) + printf("execve: task_create failed. Code: 0x%x\n", result); + p->task = new_task; + set_bsdtask_info(new_task, p); + if (p->p_nice != 0) + resetpriority(p); + map = get_task_map(new_task); + result = thread_create(new_task, &imgp->ip_vfork_thread); + if (result != KERN_SUCCESS) + printf("execve: thread_create failed. Code: 0x%x\n", result); + /* reset local idea of task, thread, uthread */ + task = new_task; + thread = imgp->ip_vfork_thread; + uthread = get_bsdthread_info(thread); + } else { + map = VM_MAP_NULL; + } + + /* + * We set these flags here; this is OK, since if we fail after + * this point, we have already destroyed the parent process anyway. + */ + if (imgp->ip_flags & IMGPF_IS_64BIT) { + task_set_64bit(task, TRUE); + p->p_flag |= P_LP64; + } else { + task_set_64bit(task, FALSE); + p->p_flag &= ~P_LP64; + } + + /* + * Load the Mach-O file. + */ +/* LP64 - remove following "if" statement after osfmk/vm/task_working_set.c */ +if((imgp->ip_flags & IMGPF_IS_64BIT) == 0) + if(imgp->ip_tws_cache_name) { + tws_handle_startup_file(task, kauth_cred_getuid(cred), + imgp->ip_tws_cache_name, imgp->ip_vp, &clean_regions); + } + + vm_get_shared_region(task, &initial_region); + + + /* + * NOTE: An error after this point indicates we have potentially + * destroyed or overwrote some process state while attempting an + * execve() following a vfork(), which is an unrecoverable condition. + */ + + /* + * We reset the task to 64-bit (or not) here. It may have picked up + * a new map, and we need that to reflect its true 64-bit nature. + */ + task_set_64bit(task, + ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)); + + /* + * Actually load the image file we previously decided to load. + */ + lret = load_machfile(imgp, mach_header, thread, map, clean_regions, &load_result); + + if (lret != LOAD_SUCCESS) { + error = load_return_to_errno(lret); + goto badtoolate; + } + + /* load_machfile() maps the vnode */ + (void)ubc_map(imgp->ip_vp, PROT_EXEC); + + /* + * deal with set[ug]id. + */ + error = exec_handle_sugid(imgp); + + KNOTE(&p->p_klist, NOTE_EXEC); + + if (!vfexec && (p->p_flag & P_TRACED)) + psignal(p, SIGTRAP); + + if (error) { + goto badtoolate; + } + vnode_put(imgp->ip_vp); + imgp->ip_vp = NULL; + + if (load_result.unixproc && + create_unix_stack(get_task_map(task), + load_result.user_stack, load_result.customstack, p)) { + error = load_return_to_errno(LOAD_NOSPACE); + goto badtoolate; + } + + if (vfexec) { + uthread->uu_ar0 = (void *)get_user_regs(thread); + old_map = vm_map_switch(get_task_map(task)); + } + + if (load_result.unixproc) { + user_addr_t ap; + + /* + * Copy the strings area out into the new process address + * space. + */ + ap = p->user_stack; + error = exec_copyout_strings(imgp, &ap); + if (error) { + if (vfexec) + vm_map_switch(old_map); + goto badtoolate; + } + /* Set the stack */ + thread_setuserstack(thread, ap); + } + + if (load_result.dynlinker) { + uint64_t ap; + + /* Adjust the stack */ + if (imgp->ip_flags & IMGPF_IS_64BIT) { + ap = thread_adjuserstack(thread, -8); + (void)copyoutptr(load_result.mach_header, ap, 8); + } else { + ap = thread_adjuserstack(thread, -4); + (void)suword(ap, load_result.mach_header); + } + } + + if (vfexec) { + vm_map_switch(old_map); + } + /* Set the entry point */ + thread_setentrypoint(thread, load_result.entry_point); + + /* Stop profiling */ + stopprofclock(p); + + /* + * Reset signal state. + */ + execsigs(p, thread); + + /* + * Close file descriptors + * which specify close-on-exec. + */ + fdexec(p); + + /* + * need to cancel async IO requests that can be cancelled and wait for those + * already active. MAY BLOCK! + */ + _aio_exec( p ); + + /* FIXME: Till vmspace inherit is fixed: */ + if (!vfexec && p->vm_shm) + shmexec(p); + /* Clean up the semaphores */ + semexit(p); + + /* + * Remember file name for accounting. + */ + p->p_acflag &= ~AFORK; + /* If the translated name isn't NULL, then we want to use + * that translated name as the name we show as the "real" name. + * Otherwise, use the name passed into exec. + */ + if (0 != imgp->ip_p_comm[0]) { + bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm, + sizeof(p->p_comm)); + } else { + if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN) + imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN; + bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm, + (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen); + p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; + } - union { - /* #! and name of interpreter */ - char ex_shell[SHSIZE]; - /* Mach-O executable */ - struct mach_header mach_header; - /* Fat executable */ - struct fat_header fat_header; - char pad[512]; - } exdata; + { + /* This is for kdebug */ + long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; + + /* Collect the pathname for tracing */ + kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); + + + + if (vfexec) + { + KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, + p->p_pid ,0,0,0, (unsigned int)thread); + KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread); + } + else + { + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, + p->p_pid ,0,0,0,0); + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); + } + } + + p->p_flag &= ~P_CLASSIC; + + /* + * mark as execed, wakeup the process that vforked (if any) and tell + * it that it now has it's own resources back + */ + p->p_flag |= P_EXEC; + if (p->p_pptr && (p->p_flag & P_PPWAIT)) { + p->p_flag &= ~P_PPWAIT; + wakeup((caddr_t)p->p_pptr); + } + + if (vfexec && (p->p_flag & P_TRACED)) { + psignal_vfork(p, new_task, thread, SIGTRAP); + } + +badtoolate: + if (vfexec) { + task_deallocate(new_task); + thread_deallocate(thread); + if (error) + error = 0; + } + +bad: + return(error); +} + + + + +/* + * Our image activator table; this is the table of the image types we are + * capable of loading. We list them in order of preference to ensure the + * fastest image load speed. + * + * XXX hardcoded, for now; should use linker sets + */ +struct execsw { + int (*ex_imgact)(struct image_params *); + const char *ex_name; +} execsw[] = { + { exec_mach_imgact, "Mach-o Binary" }, + { exec_fat_imgact, "Fat Binary" }, + { exec_shell_imgact, "Interpreter Script" }, + { NULL, NULL} +}; + + +/* + * TODO: Dynamic linker header address on stack is copied via suword() + */ +/* ARGSUSED */ +int +execve(struct proc *p, struct execve_args *uap, register_t *retval) +{ + kauth_cred_t cred = p->p_ucred; + struct image_params image_params, *imgp; + struct vnode_attr va; + struct vnode_attr origva; + struct nameidata nd; + struct uthread *uthread; + int i; int resid, error; - char *savedpath; - int savedpathlen = 0; - vm_offset_t *execargsp; - char *cpnospace; task_t task; - task_t new_task; - thread_act_t thr_act; int numthreads; int vfexec=0; - unsigned long arch_offset =0; - unsigned long arch_size = 0; - char *ws_cache_name = NULL; /* used for pre-heat */ + int once = 1; /* save SGUID-ness for interpreted files */ + char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for Classic */ + int is_64 = IS_64BIT_PROCESS(p); + int seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */ + + + imgp = &image_params; + + /* Initialize the common data in the image_params structure */ + bzero(imgp, sizeof(*imgp)); + imgp->ip_user_fname = uap->fname; + imgp->ip_user_argv = uap->argp; + imgp->ip_user_envv = uap->envp; + imgp->ip_vattr = &va; + imgp->ip_origvattr = &origva; + imgp->ip_vfs_context = &context; + imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); + imgp->ip_tws_cache_name = NULL; + imgp->ip_p_comm = alt_p_comm; /* for Classic */ - /* + /* * XXXAUDIT: Currently, we only audit the pathname of the binary. * There may also be poor interaction with dyld. */ - cfarg[0] = '\0'; /* initialize to null value. */ task = current_task(); - thr_act = current_act(); - uthread = get_bsdthread_info(thr_act); + uthread = get_bsdthread_info(current_thread()); - if (uthread->uu_flag & P_VFORK) { + if (uthread->uu_flag & UT_VFORK) { vfexec = 1; /* Mark in exec */ } else { if (task != kernel_task) { @@ -265,543 +852,556 @@ execve(p, uap, retval) if (numthreads <= 0 ) return(EINVAL); if (numthreads > 1) { - return(EOPNOTSUPP); + return(ENOTSUP); } } } - error = execargs_alloc(&execargs); + error = execargs_alloc(imgp); if (error) return(error); - - savedpath = (char *)execargs; - - /* - * To support new app package launching for Mac OS X, the dyld - * needs the first argument to execve() stored on the user stack. - * Copyin the "path" at the begining of the "execargs" buffer - * allocated above. - * - * We have to do this before namei() because in case of - * symbolic links, namei() would overwrite the original "path". - * In case the last symbolic link resolved was a relative pathname - * we would lose the original "path", which could be an - * absolute pathname. This might be unacceptable for dyld. - */ - /* XXX We could optimize to avoid copyinstr in the namei() */ - + /* * XXXAUDIT: Note: the double copyin introduces an audit * race. To correct this race, we must use a single - * copyin(). + * copyin(), e.g. by passing a flag to namei to indicate an + * external path buffer is being used. */ - - error = copyinstr(uap->fname, savedpath, - MAXPATHLEN, (size_t *)&savedpathlen); + error = exec_save_path(imgp, uap->fname, seg); if (error) { - execargs_free(execargs); + execargs_free(imgp); return(error); } + /* - * copyinstr will put in savedpathlen, the count of - * characters (including NULL) in the path. * No app profiles under chroot */ - - if((fdp->fd_rdir == NULLVP) && (app_profile != 0)) { + if((p->p_fd->fd_rdir == NULLVP) && (app_profile != 0)) { /* grab the name of the file out of its path */ /* we will need this for lookup within the */ /* name file */ - ws_cache_name = savedpath + savedpathlen; - while (ws_cache_name[0] != '/') { - if(ws_cache_name == savedpath) { - ws_cache_name--; + /* Scan backwards for the first '/' or start of string */ + imgp->ip_tws_cache_name = imgp->ip_strendp; + while (imgp->ip_tws_cache_name[0] != '/') { + if(imgp->ip_tws_cache_name == imgp->ip_strings) { + imgp->ip_tws_cache_name--; break; } - ws_cache_name--; + imgp->ip_tws_cache_name--; } - ws_cache_name++; + imgp->ip_tws_cache_name++; } + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + seg, uap->fname, imgp->ip_vfs_context); - /* Save the name aside for future use */ - execargsp = (vm_offset_t *)((char *)(execargs) + savedpathlen); - - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SAVENAME | AUDITVNPATH1, - UIO_USERSPACE, uap->fname, p); +again: error = namei(&nd); if (error) - goto bad1; - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_READ); - - if ((error = VOP_GETATTR(vp, &origvattr, p->p_ucred, p))) goto bad; + imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */ + imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */ - /* Check mount point */ - if (vp->v_mount->mnt_flag & MNT_NOEXEC) { - error = EACCES; + error = exec_check_permissions(imgp); + if (error) goto bad; - } - if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED)) - origvattr.va_mode &= ~(VSUID | VSGID); - - *(&vattr) = *(&origvattr); + /* Copy; avoid invocation of an interpreter overwriting the original */ + if (once) { + once = 0; + origva = va; + } -again: - error = check_exec_access(p, vp, &vattr); + error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0, + UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p); if (error) goto bad; + +encapsulated_binary: + error = -1; + for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) { - /* - * Read in first few bytes of file for segment sizes, magic number: - * 407 = plain executable - * 410 = RO text - * 413 = demand paged RO text - * Also an ASCII line beginning with #! is - * the file name of a ``shell'' and arguments may be prepended - * to the argument list if given here. - * - * SHELL NAMES ARE LIMITED IN LENGTH. - * - * ONLY ONE ARGUMENT MAY BE PASSED TO THE SHELL FROM - * THE ASCII LINE. - */ + error = (*execsw[i].ex_imgact)(imgp); - exdata.ex_shell[0] = '\0'; /* for zero length files */ + switch (error) { + /* case -1: not claimed: continue */ + case -2: /* Encapsulated binary */ + goto encapsulated_binary; - error = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata, sizeof (exdata), 0, - UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p); + case -3: /* Interpreter */ + vnode_put(imgp->ip_vp); + imgp->ip_vp = NULL; /* already put */ + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) | + (FOLLOW | LOCKLEAF); - if (error) - goto bad; -#ifndef lint - if (resid > sizeof(exdata) - min(sizeof(exdata.mach_header), - sizeof(exdata.fat_header)) - && exdata.ex_shell[0] != '#') { + nd.ni_segflg = UIO_SYSSPACE32; + nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name); + goto again; + + default: + break; + } + } + + /* call out to allow 3rd party notification of exec. + * Ignore result of kauth_authorize_fileop call. + */ + if (error == 0 && kauth_authorize_fileop_has_listeners()) { + kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_EXEC, + (uintptr_t)nd.ni_vp, 0); + } + + /* Image not claimed by any activator? */ + if (error == -1) error = ENOEXEC; - goto bad; + +bad: + if (imgp->ip_ndp) + nameidone(imgp->ip_ndp); + if (imgp->ip_vp) + vnode_put(imgp->ip_vp); + if (imgp->ip_strings) + execargs_free(imgp); + if (!error && vfexec) { + vfork_return(current_thread(), p->p_pptr, p, retval); + (void)thread_resume(imgp->ip_vfork_thread); + return(0); } -#endif /* lint */ - mach_header = &exdata.mach_header; - fat_header = &exdata.fat_header; - if ((mach_header->magic == MH_CIGAM) && - (classichandler[0] == 0)) { - error = EBADARCH; - goto bad; - } else if ((mach_header->magic == MH_MAGIC) || - (mach_header->magic == MH_CIGAM)) { - is_fat = FALSE; - } else if ((fat_header->magic == FAT_MAGIC) || - (fat_header->magic == FAT_CIGAM)) { - is_fat = TRUE; + return(error); +} + + +static int +copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size) +{ + int error; + + if (ptr_size == 4) { + /* 64 bit value containing 32 bit address */ + unsigned int i; + + error = copyin(froma, &i, 4); + *toptr = CAST_USER_ADDR_T(i); /* SAFE */ } else { - /* If we've already redirected once from an interpreted file - * to an interpreter, don't permit the second time. - */ - if (exdata.ex_shell[0] != '#' || - exdata.ex_shell[1] != '!' || - executingInterpreter) { - error = ENOEXEC; - goto bad; - } - if (executingClassic == 1) { - error = EBADARCH; - goto bad; - } + error = copyin(froma, toptr, 8); + } + return (error); +} - /* Check to see if SUGID scripts are permitted. If they aren't then - * clear the SUGID bits. - */ - if (sugid_scripts == 0) { - origvattr.va_mode &= ~(VSUID | VSGID); - } - - cp = &exdata.ex_shell[2]; /* skip "#!" */ - while (cp < &exdata.ex_shell[SHSIZE]) { - if (*cp == '\t') /* convert all tabs to spaces */ - *cp = ' '; - else if (*cp == '\n' || *cp == '#') { - *cp = '\0'; /* trunc the line at nl or comment */ - - /* go back and remove the spaces before the /n or # */ - /* todo: do we have to do this if we fix the passing of args to shells ? */ - if ( cp != &exdata.ex_shell[2] ) { - do { - if ( *(cp-1) != ' ') - break; - *(--cp) = '\0'; - } while ( cp != &exdata.ex_shell[2] ); - } - break; - } - cp++; - } - if (*cp != '\0') { - error = ENOEXEC; - goto bad; - } - cp = &exdata.ex_shell[2]; - while (*cp == ' ') - cp++; - execnamep = cp; - while (*cp && *cp != ' ') - cp++; - cfarg[0] = '\0'; - cpnospace = cp; - if (*cp) { - *cp++ = '\0'; - while (*cp == ' ') - cp++; - if (*cp) - bcopy((caddr_t)cp, (caddr_t)cfarg, SHSIZE); - } - /* - * Support for new app package launching for Mac OS X. - * We are about to retry the execve() by changing the path to the - * interpreter name. Need to re-initialize the savedpath and - * savedpathlen. +1 for NULL. - */ - savedpathlen = (cpnospace - execnamep + 1); - error = copystr(execnamep, savedpath, - savedpathlen, (size_t *)&savedpathlen); - if (error) - goto bad; +static int +copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size) +{ + int error; - /* Save the name aside for future use */ - execargsp = (vm_offset_t *)((char *)(execargs) + savedpathlen); - - executingInterpreter= 1; - vput(vp); - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) | - (FOLLOW | LOCKLEAF | SAVENAME); - nd.ni_segflg = UIO_SYSSPACE; - nd.ni_dirp = execnamep; - if ((error = namei(&nd))) - goto bad1; - vp = nd.ni_vp; - VOP_LEASE(vp, p, cred, LEASE_READ); - if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))) - goto bad; - goto again; + if (ptr_size == 4) { + /* 64 bit value containing 32 bit address */ + unsigned int i = CAST_DOWN(unsigned int,ua); /* SAFE */ + + error = copyout(&i, ptr, 4); + } else { + error = copyout(&ua, ptr, 8); } + return (error); +} + + +/* + * exec_copyout_strings + * + * Copy out the strings segment to user space. The strings segment is put + * on a preinitialized stack frame. + * + * Parameters: struct image_params * the image parameter block + * int * a pointer to the stack offset variable + * + * Returns: 0 Success + * !0 Faiure: errno + * + * Implicit returns: + * (*stackp) The stack offset, modified + * + * Note: The strings segment layout is backward, from the beginning + * of the top of the stack to consume the minimal amount of + * space possible; the returned stack pointer points to the + * end of the area consumed (stacks grow upward). + * + * argc is an int; arg[i] are pointers; env[i] are pointers; + * exec_path is a pointer; the 0's are (void *)NULL's + * + * The stack frame layout is: + * + * +-------------+ + * sp-> | argc | + * +-------------+ + * | arg[0] | + * +-------------+ + * : + * : + * +-------------+ + * | arg[argc-1] | + * +-------------+ + * | 0 | + * +-------------+ + * | env[0] | + * +-------------+ + * : + * : + * +-------------+ + * | env[n] | + * +-------------+ + * | 0 | + * +-------------+ + * | exec_path | In MacOS X PR2 Beaker2E the path passed to exec() is + * +-------------+ passed on the stack just after the trailing 0 of the + * | 0 | the envp[] array as a pointer to a string. + * +-------------+ + * | PATH AREA | + * +-------------+ + * | STRING AREA | + * : + * : + * | | <- p->user_stack + * +-------------+ + * + * Although technically a part of the STRING AREA, we treat the PATH AREA as + * a separate entity. This allows us to align the beginning of the PATH AREA + * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers + * which preceed it on the stack are properly aligned. + * + * TODO: argc copied with suword(), which takes a 64 bit address + */ +static int +exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) +{ + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4; + char *argv = imgp->ip_argv; /* modifiable copy of argv */ + user_addr_t string_area; /* *argv[], *env[] */ + user_addr_t path_area; /* package launch path */ + user_addr_t ptr_area; /* argv[], env[], exec_path */ + user_addr_t stack; + int stringc = imgp->ip_argc + imgp->ip_envc; + int len; + int error; + int strspace; + + stack = *stackp; + + /* + * Set up pointers to the beginning of the string area, the beginning + * of the path area, and the beginning of the pointer area (actually, + * the location of argc, an int, which may be smaller than a pointer, + * but we use ptr_size worth of space for it, for alignment). + */ + string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size; + path_area = string_area - (((imgp->ip_argv - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)); + ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4) * ptr_size) - ptr_size /*argc*/; + + /* Return the initial stack address: the location of argc */ + *stackp = ptr_area; /* - * Collect arguments on "file" in swap space. + * Record the size of the arguments area so that sysctl_procargs() + * can return the argument area without having to parse the arguments. */ - na = 0; - ne = 0; - nc = 0; - cc = 0; + p->p_argc = imgp->ip_argc; + p->p_argslen = (int)(stack - path_area); + + /* * Support for new app package launching for Mac OS X allocates - * the "path" at the begining. - * execargs get allocated after that + * the "path" at the begining of the imgp->ip_strings buffer. + * copy it just before the string area. */ - cp = (char *) execargsp; /* running pointer for copy */ + len = 0; + error = copyoutstr(imgp->ip_strings, path_area, + (unsigned)(imgp->ip_argv - imgp->ip_strings), + (size_t *)&len); + if (error) + goto bad; + + + /* Save a NULL pointer below it */ + (void)copyoutptr(0LL, path_area - ptr_size, ptr_size); + + /* Save the pointer to "path" just below it */ + (void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size); + /* - * size of execargs less sizeof "path", - * a pointer to "path" and a NULL poiter + * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n] + * ptr_size for argc + * skip over saved path, ptr_size for pointer to path, + * and ptr_size for the NULL after pointer to path. */ - cc = NCARGS - savedpathlen - 2*NBPW; + + /* argc (int32, stored in a ptr_size area) */ + (void)suword(ptr_area, imgp->ip_argc); + ptr_area += sizeof(int); + /* pad to ptr_size, if 64 bit image, to ensure user stack alignment */ + if (imgp->ip_flags & IMGPF_IS_64BIT) { + (void)suword(ptr_area, 0); /* int, not long: ignored */ + ptr_area += sizeof(int); + } + + /* - * Copy arguments into file in argdev area. + * We use (string_area - path_area) here rather than the more + * intuitive (imgp->ip_argv - imgp->ip_strings) because we are + * interested in the length of the PATH_AREA in user space, + * rather than the actual length of the execution path, since + * it includes alignment padding of the PATH_AREA + STRING_AREA + * to a ptr_size boundary. */ + strspace = SIZE_IMG_STRSPACE - (string_area - path_area); + for (;;) { + if (stringc == imgp->ip_envc) { + /* argv[n] = NULL */ + (void)copyoutptr(0LL, ptr_area, ptr_size); + ptr_area += ptr_size; + } + if (--stringc < 0) + break; + + /* pointer: argv[n]/env[n] */ + (void)copyoutptr(string_area, ptr_area, ptr_size); + /* string : argv[n][]/env[n][] */ + do { + if (strspace <= 0) { + error = E2BIG; + break; + } + error = copyoutstr(argv, string_area, + (unsigned)strspace, + (size_t *)&len); + string_area += len; + argv += len; + strspace -= len; + } while (error == ENAMETOOLONG); + if (error == EFAULT || error == E2BIG) + break; /* bad stack - user's problem */ + ptr_area += ptr_size; + } + /* env[n] = NULL */ + (void)copyoutptr(0LL, ptr_area, ptr_size); + +bad: + return(error); +} + + +/* + * exec_extract_strings + * + * Copy arguments and environment from user space into work area; we may + * have already copied some early arguments into the work area, and if + * so, any arguments opied in are appended to those already there. + * + * Parameters: struct image_params * the image parameter block + * + * Returns: 0 Success + * !0 Failure: errno + * + * Implicit returns; + * (imgp->ip_argc) Count of arguments, updated + * (imgp->ip_envc) Count of environment strings, updated + * + * + * Notes: The argument and environment vectors are user space pointers + * to arrays of user space pointers. + */ +static int +exec_extract_strings(struct image_params *imgp) +{ + int error = 0; + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + int seg = (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32); + int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4; + user_addr_t argv = imgp->ip_user_argv; + user_addr_t envv = imgp->ip_user_envv; + + /* Now, get rest of arguments */ /* - * If we have a fat file, find "our" executable. + * If we are running an interpreter, replace the av[0] that was + * passed to execve() with the fully qualified path name that was + * passed to execve() for interpreters which do not use the PATH + * to locate their script arguments. */ - if (is_fat) { - /* - * Look up our architecture in the fat file. - */ - lret = fatfile_getarch_affinity(vp,(vm_offset_t)fat_header, &fat_arch, - (p->p_flag & P_AFFINITY)); - if (lret != LOAD_SUCCESS) { - error = load_return_to_errno(lret); + if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) { + user_addr_t arg; + + error = copyinptr(argv, &arg, ptr_size); + if (error) goto bad; + if (arg != 0LL && arg != (user_addr_t)-1) { + argv += ptr_size; + error = exec_add_string(imgp, imgp->ip_user_fname, seg); + if (error) + goto bad; + imgp->ip_argc++; } - /* Read the Mach-O header out of it */ - error = vn_rdwr(UIO_READ, vp, (caddr_t)&exdata.mach_header, - sizeof (exdata.mach_header), - fat_arch.offset, - UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &resid, p); + } - if (error) { + while (argv != 0LL) { + user_addr_t arg; + + error = copyinptr(argv, &arg, ptr_size); + if (error) goto bad; - } - /* Did we read a complete header? */ - if (resid) { - error = EBADEXEC; + argv += ptr_size; + if (arg == 0LL) { + break; + } else if (arg == (user_addr_t)-1) { + /* Um... why would it be -1? */ + error = EFAULT; goto bad; } + /* + * av[n...] = arg[n] + */ + error = exec_add_string(imgp, arg, seg); + if (error) + goto bad; + imgp->ip_argc++; + } + + /* Now, get the environment */ + while (envv != 0LL) { + user_addr_t env; + + error = copyinptr(envv, &env, ptr_size); + if (error) + goto bad; - /* Is what we found a Mach-O executable */ - if ((mach_header->magic != MH_MAGIC) && - (mach_header->magic != MH_CIGAM)) { - error = ENOEXEC; + envv += ptr_size; + if (env == 0LL) { + break; + } else if (env == (user_addr_t)-1) { + error = EFAULT; goto bad; } - - arch_offset = fat_arch.offset; - arch_size = fat_arch.size; - } else { /* - * Load the Mach-O file. - */ - arch_offset = 0; - arch_size = (u_long)vattr.va_size; - } - - if ( ! check_cpu_subtype(mach_header->cpusubtype) ) { - error = EBADARCH; - goto bad; - } - - if (mach_header->magic == MH_CIGAM) { - - int classicBinaryLen = nd.ni_cnd.cn_namelen; - if (classicBinaryLen > MAXCOMLEN) - classicBinaryLen = MAXCOMLEN; - bcopy((caddr_t)nd.ni_cnd.cn_nameptr, - (caddr_t)binaryWithClassicName, - (unsigned)classicBinaryLen); - binaryWithClassicName[classicBinaryLen] = '\0'; - executingClassic = 1; - - vput(vp); /* cleanup? */ - nd.ni_cnd.cn_nameiop = LOOKUP; - - nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) | - /* (FOLLOW | LOCKLEAF | SAVENAME) */ - (LOCKLEAF | SAVENAME); - nd.ni_segflg = UIO_SYSSPACE; - - nd.ni_dirp = classichandler; - if ((error = namei(&nd)) != 0) { - error = EBADARCH; - goto bad1; - } - vp = nd.ni_vp; - - VOP_LEASE(vp,p,cred,LEASE_READ); - if ((error = VOP_GETATTR(vp,&vattr,p->p_ucred,p))) { + * av[n...] = env[n] + */ + error = exec_add_string(imgp, env, seg); + if (error) goto bad; - } - goto again; + imgp->ip_envc++; } +bad: + return error; +} - if (uap->argp != NULL) { - /* geez -- why would argp ever be NULL, and why would we proceed? */ - - /* First, handle any argument massaging */ - if (executingInterpreter && executingClassic) { - error = copyArgument(classichandler,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - if (error) goto bad; - - /* Now name the interpreter. */ - error = copyArgument(savedpath,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - if (error) goto bad; - /* - * if we're running an interpreter, as we'd be passing the - * command line executable as an argument to the interpreter already. - * Doing "execve("myShellScript","bogusName",arg1,arg2,...) - * probably shouldn't ever let bogusName be seen by the shell - * script. - */ - - if (cfarg[0]) { - error = copyArgument(cfarg,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - if (error) goto bad; - } - - char* originalExecutable = uap->fname; - error = copyArgument(originalExecutable,UIO_USERSPACE,&nc,&cp,&cc); - na++; - /* remove argv[0] b/c we've already placed it at */ - /* this point */ - uap->argp++; - if (error) goto bad; - - /* and continue with rest of the arguments. */ - } else if (executingClassic) { - error = copyArgument(classichandler,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - if (error) goto bad; - - char* originalExecutable = uap->fname; - error = copyArgument(originalExecutable,UIO_USERSPACE,&nc,&cp,&cc); - if (error) goto bad; - uap->argp++; - na++; - - /* and rest of arguments continue as before. */ - } else if (executingInterpreter) { - char *actualExecutable = nd.ni_cnd.cn_nameptr; - error = copyArgument(actualExecutable,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - /* remove argv[0] b/c we just placed it in the arg list. */ - uap->argp++; - if (error) goto bad; - /* Copy the argument in the interpreter first line if there - * was one. - */ - if (cfarg[0]) { - error = copyArgument(cfarg,UIO_SYSSPACE,&nc,&cp,&cc); - na++; - if (error) goto bad; - } - - /* copy the name of the file being interpreted, gotten from - * the structures passed in to execve. - */ - error = copyArgument(uap->fname,UIO_USERSPACE,&nc,&cp,&cc); - na++; - } - /* Now, get rest of arguments */ - while (uap->argp != NULL) { - char* userArgument = (char*)fuword((caddr_t) uap->argp); - uap->argp++; - if (userArgument == NULL) { - break; - } else if ((int)userArgument == -1) { - /* Um... why would it be -1? */ - error = EFAULT; - goto bad; - } - error = copyArgument(userArgument, UIO_USERSPACE,&nc,&cp,&cc); - if (error) goto bad; - na++; - } - /* Now, get the environment */ - while (uap->envp != NULL) { - char *userEnv = (char*) fuword((caddr_t) uap->envp); - uap->envp++; - if (userEnv == NULL) { - break; - } else if ((int)userEnv == -1) { - error = EFAULT; - goto bad; - } - error = copyArgument(userEnv,UIO_USERSPACE,&nc,&cp,&cc); - if (error) goto bad; - na++; - ne++; - } - } +#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur) - /* make sure there are nulls are the end!! */ - { - int cnt = 3; - char *mp = cp; +static int +exec_check_permissions(struct image_params *imgp) +{ + struct vnode *vp = imgp->ip_vp; + struct vnode_attr *vap = imgp->ip_vattr; + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + int error; + kauth_action_t action; - while ( cnt-- ) - *mp++ = '\0'; - } + /* Only allow execution of regular files */ + if (!vnode_isreg(vp)) + return (EACCES); + + /* Get the file attributes that we will be using here and elsewhere */ + VATTR_INIT(vap); + VATTR_WANTED(vap, va_uid); + VATTR_WANTED(vap, va_gid); + VATTR_WANTED(vap, va_mode); + VATTR_WANTED(vap, va_fsid); + VATTR_WANTED(vap, va_fileid); + VATTR_WANTED(vap, va_data_size); + if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0) + return (error); - /* and round up count of bytes written to next word. */ - nc = (nc + NBPW-1) & ~(NBPW-1); + /* + * Ensure that at least one execute bit is on - otherwise root + * will always succeed, and we don't want to happen unless the + * file really is executable. + */ + if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) + return (EACCES); - if (vattr.va_fsid == classichandler_fsid && - vattr.va_fileid == classichandler_fileid) { - executingClassic = 1; - } + /* Disallow zero length files */ + if (vap->va_data_size == 0) + return (ENOEXEC); - if (vfexec) { - kern_return_t result; + imgp->ip_arch_offset = (user_size_t)0; + imgp->ip_arch_size = vap->va_data_size; - result = task_create_internal(task, FALSE, &new_task); - if (result != KERN_SUCCESS) - printf("execve: task_create failed. Code: 0x%x\n", result); - p->task = new_task; - set_bsdtask_info(new_task, p); - if (p->p_nice != 0) - resetpriority(p); - task = new_task; - map = get_task_map(new_task); - result = thread_create(new_task, &thr_act); - if (result != KERN_SUCCESS) - printf("execve: thread_create failed. Code: 0x%x\n", result); - uthread = get_bsdthread_info(thr_act); - } else { - map = VM_MAP_NULL; - } + /* Disable setuid-ness for traced programs or if MNT_NOSUID */ + if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED)) + vap->va_mode &= ~(VSUID | VSGID); + + /* Check for execute permission */ + action = KAUTH_VNODE_EXECUTE; + /* Traced images must also be readable */ + if (p->p_flag & P_TRACED) + action |= KAUTH_VNODE_READ_DATA; + if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0) + return (error); - /* - * Load the Mach-O file. - */ - VOP_UNLOCK(vp, 0, p); /* XXX */ - if(ws_cache_name) { - tws_handle_startup_file(task, cred->cr_uid, - ws_cache_name, vp, &clean_regions); - } + /* Don't let it run if anyone had it open for writing */ + if (vp->v_writecount) + return (ETXTBSY); - vm_get_shared_region(task, &initial_region); - int parentIsClassic = (p->p_flag & P_CLASSIC); - struct vnode *rootDir = p->p_fd->fd_rdir; - - if ((parentIsClassic && !executingClassic) || - (!parentIsClassic && executingClassic)) { - shared_region = lookup_default_shared_region( - (int)rootDir, - (executingClassic ? - CPU_TYPE_POWERPC : - machine_slot[cpu_number()].cpu_type)); - if (shared_region == NULL) { - shared_region_mapping_t old_region; - shared_region_mapping_t new_region; - vm_get_shared_region(current_task(), &old_region); - /* grrrr... this sets current_task(), not task - * -- they're different (usually) - */ - shared_file_boot_time_init( - (int)rootDir, - (executingClassic ? - CPU_TYPE_POWERPC : - machine_slot[cpu_number()].cpu_type)); - if ( current_task() != task ) { - vm_get_shared_region(current_task(),&new_region); - vm_set_shared_region(task,new_region); - vm_set_shared_region(current_task(),old_region); - } - } else { - vm_set_shared_region(task, shared_region); - } - shared_region_mapping_dealloc(initial_region); - } - - lret = load_machfile(vp, mach_header, arch_offset, - arch_size, &load_result, thr_act, map, clean_regions); - if (lret != LOAD_SUCCESS) { - error = load_return_to_errno(lret); - vrele(vp); - vp = NULL; - goto badtoolate; - } + /* XXX May want to indicate to underlying FS that vnode is open */ - /* load_machfile() maps the vnode */ - ubc_map(vp); + return (error); +} + +/* + * exec_handle_sugid + * + * Initially clear the P_SUGID in the process flags; if an SUGID process is + * exec'ing a non-SUGID image, then this is the point of no return. + * + * If the image being activated is SUGI, then replace the credential with a + * copy, disable tracing (unless the tracing process is root), reset the + * mach task port to revoke it, set the P_SUGID bit, + * + * If the saved user and group ID will be changing, then make sure it happens + * to a new credential, rather than a shared one. + * + * Set the security token (this is probably obsolete, given that the token + * should not technically be separate from the credential itself). + * + * Parameters: struct image_params * the image parameter block + * + * Returns: void No failure indication + * + * Implicit returns: + * Potentially modified/replaced + * Potentially revoked + * P_SUGID bit potentially modified + * Potentially modified + */ +static int +exec_handle_sugid(struct image_params *imgp) +{ + kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context); + struct proc *p = vfs_context_proc(imgp->ip_vfs_context); + int i; + int error = 0; + static struct vnode *dev_null = NULLVP; - /* - * deal with set[ug]id. - */ p->p_flag &= ~P_SUGID; - if (((origvattr.va_mode & VSUID) != 0 && - p->p_ucred->cr_uid != origvattr.va_uid) - || (origvattr.va_mode & VSGID) != 0 && - p->p_ucred->cr_gid != origvattr.va_gid) { - p->p_ucred = crcopy(cred); + + if (((imgp->ip_origvattr->va_mode & VSUID) != 0 && + kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) || + ((imgp->ip_origvattr->va_mode & VSGID) != 0 && + cred->cr_gid != imgp->ip_origvattr->va_gid)) { #if KTRACE /* * If process is being ktraced, turn off - unless @@ -811,16 +1411,18 @@ again: struct vnode *tvp = p->p_tracep; p->p_tracep = NULL; p->p_traceflag = 0; - - if (UBCINFOEXISTS(tvp)) - ubc_rele(tvp); - vrele(tvp); + vnode_rele(tvp); } #endif - if (origvattr.va_mode & VSUID) - p->p_ucred->cr_uid = origvattr.va_uid; - if (origvattr.va_mode & VSGID) - p->p_ucred->cr_gid = origvattr.va_gid; + /* + * Replace the credential with a copy of itself if euid or egid change. + */ + if (imgp->ip_origvattr->va_mode & VSUID) { + p->p_ucred = kauth_cred_seteuid(p->p_ucred, imgp->ip_origvattr->va_uid); + } + if (imgp->ip_origvattr->va_mode & VSGID) { + p->p_ucred = kauth_cred_setegid(p->p_ucred, imgp->ip_origvattr->va_gid); + } /* * Have mach reset the task port. We don't want @@ -828,10 +1430,30 @@ again: * exec to be able to access/control the task * after. */ - ipc_task_reset(task); + if (current_task() == p->task) + ipc_task_reset(p->task); p->p_flag |= P_SUGID; + /* Cache the vnode for /dev/null the first time around */ + if (dev_null == NULLVP) { + struct nameidata nd1; + + NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32, + CAST_USER_ADDR_T("/dev/null"), + imgp->ip_vfs_context); + + if ((error = vn_open(&nd1, FREAD, 0)) == 0) { + dev_null = nd1.ni_vp; + /* + * vn_open returns with both a use_count + * and an io_count on the found vnode + * drop the io_count, but keep the use_count + */ + vnode_put(nd1.ni_vp); + } + } + /* Radar 2261856; setuid security hole fix */ /* Patch from OpenBSD: A. Ramesh */ /* @@ -841,307 +1463,77 @@ again: * descriptors in this range which has implied meaning * to libc. */ - for (i = 0; i < 3; i++) { - extern struct fileops vnops; - struct nameidata nd1; - struct file *fp; - int indx; + if (dev_null != NULLVP) { + for (i = 0; i < 3; i++) { + struct fileproc *fp; + int indx; + + if (p->p_fd->fd_ofiles[i] != NULL) + continue; - if (p->p_fd->fd_ofiles[i] == NULL) { if ((error = falloc(p, &fp, &indx)) != 0) continue; - NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE, - "/dev/null", p); - if ((error = vn_open(&nd1, FREAD, 0)) != 0) { - ffree(fp); - p->p_fd->fd_ofiles[indx] = NULL; + + if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) { + fp_free(p, indx, fp); break; } - fp->f_flag = FREAD; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)nd1.ni_vp; - VOP_UNLOCK(nd1.ni_vp, 0, p); - } - } - } - p->p_cred->p_svuid = p->p_ucred->cr_uid; - p->p_cred->p_svgid = p->p_ucred->cr_gid; - set_security_token(p); - - KNOTE(&p->p_klist, NOTE_EXEC); - - if (!vfexec && (p->p_flag & P_TRACED)) - psignal(p, SIGTRAP); - - if (error) { - vrele(vp); - vp = NULL; - goto badtoolate; - } - VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - vput(vp); - vp = NULL; - - if (load_result.unixproc && - create_unix_stack(get_task_map(task), - load_result.user_stack, load_result.customstack, p)) { - error = load_return_to_errno(LOAD_NOSPACE); - goto badtoolate; - } - - if (vfexec) { - uthread->uu_ar0 = (void *)get_user_regs(thr_act); - } - - /* - * Copy back arglist if necessary. - */ - - - ucp = (int)p->user_stack; - if (vfexec) { - old_map = vm_map_switch(get_task_map(task)); - } - if (load_result.unixproc) { - int pathptr; - - ucp = ucp - nc - NBPW; /* begining of the STRING AREA */ - - /* - * Support for new app package launching for Mac OS X allocates - * the "path" at the begining of the execargs buffer. - * copy it just before the string area. - */ - len = 0; - pathptr = ucp - ((savedpathlen + NBPW-1) & ~(NBPW-1)); - error = copyoutstr(savedpath, (caddr_t)pathptr, - (unsigned)savedpathlen, (size_t *)&len); - savedpathlen = (savedpathlen + NBPW-1) & ~(NBPW-1); - - if (error) { - if (vfexec) - vm_map_switch(old_map); - goto badtoolate; - } - - /* - * Record the size of the arguments area so that - * sysctl_procargs() can return the argument area without having - * to parse the arguments. - */ - p->p_argslen = (int)p->user_stack - pathptr; - p->p_argc = na - ne; /* save argc for sysctl_procargs() */ - - /* Save a NULL pointer below it */ - (void) suword((caddr_t)(pathptr - NBPW), 0); - /* Save the pointer to "path" just below it */ - (void) suword((caddr_t)(pathptr - 2*NBPW), pathptr); - - /* - * na includes arg[] and env[]. - * NBPW for 2 NULL one each ofter arg[argc -1] and env[n] - * NBPW for argc - * skip over saved path, NBPW for pointer to path, - * and NBPW for the NULL after pointer to path. - */ - ap = ucp - na*NBPW - 3*NBPW - savedpathlen - 2*NBPW; -#if defined(ppc) - thread_setuserstack(thr_act, ap); /* Set the stack */ -#else - uthread->uu_ar0[SP] = ap; -#endif - (void) suword((caddr_t)ap, na-ne); /* argc */ - nc = 0; - cc = 0; - - cp = (char *) execargsp; - cc = NCARGS - savedpathlen - 2*NBPW; - ps.ps_argvstr = (char *)ucp; /* first argv string */ - ps.ps_nargvstr = na - ne; /* argc */ - for (;;) { - ap += NBPW; - if (na == ne) { - (void) suword((caddr_t)ap, 0); - ap += NBPW; - ps.ps_envstr = (char *)ucp; - ps.ps_nenvstr = ne; + fp->f_fglob->fg_flag = FREAD; + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)dev_null; + + proc_fdlock(p); + *fdflags(p, indx) &= ~UF_RESERVED; + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); } - if (--na < 0) - break; - (void) suword((caddr_t)ap, ucp); - do { - error = copyoutstr(cp, (caddr_t)ucp, - (unsigned)cc, (size_t *)&len); - ucp += len; - cp += len; - nc += len; - cc -= len; - } while (error == ENAMETOOLONG); - if (error == EFAULT) - break; /* bad stack - user's problem */ + /* + * for now we need to drop the reference immediately + * since we don't have any mechanism in place to + * release it before starting to unmount "/dev" + * during a reboot/shutdown + */ + vnode_rele(dev_null); + dev_null = NULLVP; } - (void) suword((caddr_t)ap, 0); - } - - if (load_result.dynlinker) { -#if defined(ppc) - ap = thread_adjuserstack(thr_act, -4); /* Adjust the stack */ -#else - ap = uthread->uu_ar0[SP] -= 4; -#endif - (void) suword((caddr_t)ap, load_result.mach_header); - } - - if (vfexec) { - vm_map_switch(old_map); - } -#if defined(ppc) - thread_setentrypoint(thr_act, load_result.entry_point); /* Set the entry point */ -#elif defined(i386) - uthread->uu_ar0[PC] = load_result.entry_point; -#else -#error architecture not implemented! -#endif - - /* Stop profiling */ - stopprofclock(p); - - /* - * Reset signal state. - */ - execsigs(p, thr_act); - - /* - * Close file descriptors - * which specify close-on-exec. - */ - fdexec(p); - - /* - * need to cancel async IO requests that can be cancelled and wait for those - * already active. MAY BLOCK! - */ - _aio_exec( p ); - - /* FIXME: Till vmspace inherit is fixed: */ - if (!vfexec && p->vm_shm) - shmexec(p); - /* Clean up the semaphores */ - semexit(p); - - /* - * Remember file name for accounting. - */ - p->p_acflag &= ~AFORK; - /* If the translated name isn't NULL, then we want to use - * that translated name as the name we show as the "real" name. - * Otherwise, use the name passed into exec. - */ - if (0 != binaryWithClassicName[0]) { - bcopy((caddr_t)binaryWithClassicName, (caddr_t)p->p_comm, - sizeof(binaryWithClassicName)); - } else { - if (nd.ni_cnd.cn_namelen > MAXCOMLEN) - nd.ni_cnd.cn_namelen = MAXCOMLEN; - bcopy((caddr_t)nd.ni_cnd.cn_nameptr, (caddr_t)p->p_comm, - (unsigned)nd.ni_cnd.cn_namelen); - p->p_comm[nd.ni_cnd.cn_namelen] = '\0'; - } - - { - /* This is for kdebug */ - long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; - - /* Collect the pathname for tracing */ - kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); - - - - if (vfexec) - { - KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, - p->p_pid ,0,0,0, (unsigned int)thr_act); - KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thr_act); - } - else - { - KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, - p->p_pid ,0,0,0,0); - KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); - } } - if (executingClassic) - p->p_flag |= P_CLASSIC | P_AFFINITY; - else - p->p_flag &= ~P_CLASSIC; - /* - * mark as execed, wakeup the process that vforked (if any) and tell - * it that it now has it's own resources back + * Implement the semantic where the effective user and group become + * the saved user and group in exec'ed programs. */ - p->p_flag |= P_EXEC; - if (p->p_pptr && (p->p_flag & P_PPWAIT)) { - p->p_flag &= ~P_PPWAIT; - wakeup((caddr_t)p->p_pptr); - } - - if (vfexec && (p->p_flag & P_TRACED)) { - psignal_vfork(p, new_task, thr_act, SIGTRAP); - } + p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid); + + /* XXX Obsolete; security token should not be separate from cred */ + set_security_token(p); -badtoolate: - if (vfexec) { - task_deallocate(new_task); - act_deallocate(thr_act); - if (error) - error = 0; - } -bad: - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - if (vp) - vput(vp); -bad1: - if (execargs) - execargs_free(execargs); - if (!error && vfexec) { - vfork_return(current_act(), p->p_pptr, p, retval); - (void) thread_resume(thr_act); - return(0); - } return(error); } - -#define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur) - -kern_return_t -create_unix_stack(map, user_stack, customstack, p) - vm_map_t map; - vm_offset_t user_stack; - int customstack; - struct proc *p; +static kern_return_t +create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack, + struct proc *p) { - vm_size_t size; - vm_offset_t addr; + mach_vm_size_t size; + mach_vm_offset_t addr; - p->user_stack = (caddr_t)user_stack; + p->user_stack = user_stack; if (!customstack) { - size = round_page_64(unix_stack_size(p)); - addr = trunc_page_32(user_stack - size); - return (vm_allocate(map, &addr, size, - VM_MAKE_TAG(VM_MEMORY_STACK) | FALSE)); + size = mach_vm_round_page(unix_stack_size(p)); + addr = mach_vm_trunc_page(user_stack - size); + return (mach_vm_allocate(map, &addr, size, + VM_MAKE_TAG(VM_MEMORY_STACK) | + VM_FLAGS_FIXED)); } else return(KERN_SUCCESS); } #include -char init_program_name[128] = "/sbin/mach_init\0"; +static char init_program_name[128] = "/sbin/launchd"; +static const char * other_init = "/sbin/mach_init"; char init_args[128] = ""; @@ -1150,15 +1542,12 @@ int init_attempts = 0; void -load_init_program(p) - struct proc *p; +load_init_program(struct proc *p) { vm_offset_t init_addr; - int *old_ap; char *argv[3]; - int error; - register_t retval[2]; - struct uthread * ut; + int error; + register_t retval[2]; error = 0; @@ -1174,7 +1563,6 @@ load_init_program(p) if (error && ((boothowto & RB_INITNAME) == 0) && (init_attempts == 1)) { - static char other_init[] = "/etc/mach_init"; printf("Load of %s, errno %d, trying %s\n", init_program_name, error, other_init); error = 0; @@ -1198,11 +1586,12 @@ load_init_program(p) init_addr = VM_MIN_ADDRESS; (void) vm_allocate(current_map(), &init_addr, - PAGE_SIZE, TRUE); + PAGE_SIZE, VM_FLAGS_ANYWHERE); if (init_addr == 0) init_addr++; + (void) copyout((caddr_t) init_program_name, - (caddr_t) (init_addr), + CAST_USER_ADDR_T(init_addr), (unsigned) sizeof(init_program_name)+1); argv[0] = (char *) init_addr; @@ -1216,7 +1605,7 @@ load_init_program(p) */ (void) copyout((caddr_t) init_args, - (caddr_t) (init_addr), + CAST_USER_ADDR_T(init_addr), (unsigned) sizeof(init_args)); argv[1] = (char *) init_addr; @@ -1234,16 +1623,16 @@ load_init_program(p) */ (void) copyout((caddr_t) argv, - (caddr_t) (init_addr), + CAST_USER_ADDR_T(init_addr), (unsigned) sizeof(argv)); /* * Set up argument block for fake call to execve. */ - init_exec_args.fname = argv[0]; - init_exec_args.argp = (char **) init_addr; - init_exec_args.envp = 0; + init_exec_args.fname = CAST_USER_ADDR_T(argv[0]); + init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr); + init_exec_args.envp = CAST_USER_ADDR_T(0); /* So that mach_init task * is set with uid,gid 0 token @@ -1284,31 +1673,6 @@ load_return_to_errno(load_return_t lrtn) } } -/* - * exec_check_access() - */ -int -check_exec_access(p, vp, vap) - struct proc *p; - struct vnode *vp; - struct vattr *vap; -{ - int flag; - int error; - - if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) - return (error); - flag = p->p_flag; - if (flag & P_TRACED) { - if (error = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) - return (error); - } - if (vp->v_type != VREG || - (vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) - return (EACCES); - return (0); -} - #include #include #include @@ -1318,9 +1682,12 @@ check_exec_access(p, vp, vap) extern semaphore_t execve_semaphore; +/* + * The block of memory used by the execve arguments. At the same time, + * we allocate a page so that we can read in the first page of the image. + */ static int -execargs_alloc(addrp) - vm_offset_t *addrp; +execargs_alloc(struct image_params *imgp) { kern_return_t kret; @@ -1337,7 +1704,8 @@ execargs_alloc(addrp) return (EINTR); } - kret = kmem_alloc_pageable(bsd_pageable_map, addrp, NCARGS); + kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE); + imgp->ip_vdata = imgp->ip_strings + NCARGS; if (kret != KERN_SUCCESS) { semaphore_signal(execve_semaphore); return (ENOMEM); @@ -1346,12 +1714,12 @@ execargs_alloc(addrp) } static int -execargs_free(addr) - vm_offset_t addr; +execargs_free(struct image_params *imgp) { kern_return_t kret; - kmem_free(bsd_pageable_map, addr, NCARGS); + kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE); + imgp->ip_strings = NULL; kret = semaphore_signal(execve_semaphore); switch (kret) { diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index 157ec5f05..cb515a0dd 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -68,53 +68,93 @@ #include #include #include -#include +#include +#include #include #include #include #include -#include #include -#include -#include +#include +#include #include #include #include #include #include #include +#include +#include +#include /* fdfree */ +#include /* shmexit */ +#include /* acct_process */ +#include #include #include #include + +#include +#include +#include #include -#include #include #include #if KTRACE #include -#include #endif +#include +#include +#include +#include /* init_process */ + extern char init_task_failure_data[]; -int exit1 __P((struct proc *, int, int *)); +int exit1(struct proc *, int, int *); void proc_prepareexit(struct proc *p); -int vfork_exit(struct proc *p, int rv); +void vfork_exit(struct proc *p, int rv); void vproc_exit(struct proc *p); +__private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p); + +/* + * Things which should have prototypes in headers, but don't + */ +void unix_syscall_return(int); +void *get_bsduthreadarg(thread_t); +void proc_exit(struct proc *p); +int wait1continue(int result); +int waitidcontinue(int result); +int *get_bsduthreadrval(thread_t); +kern_return_t sys_perf_notify(struct task *task, exception_data_t code, + mach_msg_type_number_t codeCnt); + +/* + * NOTE: Source and target may *NOT* overlap! + * XXX Should share code with bsd/dev/ppc/unix_signal.c + */ +static void +siginfo_64to32(user_siginfo_t *in, siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = CAST_DOWN(void *,in->si_addr); + /* following cast works for sival_int because of padding */ + out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr); + out->si_band = in->si_band; /* range reduction */ + out->pad[0] = in->pad[0]; /* mcontext.ss.r1 */ +} /* * exit -- * Death of process. */ -struct exit_args { - int rval; -}; void -exit(p, uap, retval) - struct proc *p; - struct exit_args *uap; - int *retval; +exit(struct proc *p, struct exit_args *uap, int *retval) { exit1(p, W_EXITCODE(uap->rval, 0), retval); @@ -133,15 +173,11 @@ exit(p, uap, retval) * status and rusage for wait(). Check for child processes and orphan them. */ int -exit1(p, rv, retval) - register struct proc *p; - int rv; - int * retval; +exit1(struct proc *p, int rv, int *retval) { - register struct proc *q, *nq; - thread_act_t self = current_act(); + thread_t self = current_thread(); struct task *task = p->task; - register int i,s; + register int s; struct uthread *ut; /* @@ -151,13 +187,11 @@ exit1(p, rv, retval) */ ut = get_bsdthread_info(self); - if (ut->uu_flag & P_VFORK) { - if (!vfork_exit(p, rv)) { + if (ut->uu_flag & UT_VFORK) { + vfork_exit(p, rv); vfork_return(self, p->p_pptr, p , retval); unix_syscall_return(0); /* NOT REACHED */ - } - return(EINVAL); } AUDIT_SYSCALL_EXIT(0, p, ut); /* Exit is always successfull */ signal_lock(p); @@ -198,14 +232,14 @@ exit1(p, rv, retval) void proc_prepareexit(struct proc *p) { - int s; struct uthread *ut; exception_data_t code[EXCEPTION_CODE_MAX]; - thread_act_t self = current_act(); + thread_t self = current_thread(); - code[0] = 0xFF000001; /* Set terminate code */ - code[1] = p->p_pid; /* Pass out the pid */ - (void)sys_perf_notify(p->task, &code, 2); /* Notify the perf server */ + code[0] = (exception_data_t)0xFF000001; /* Set terminate code */ + code[1] = (exception_data_t)p->p_pid; /* Pass out the pid */ + /* Notify the perf server */ + (void)sys_perf_notify(p->task, (exception_data_t)&code, 2); /* * Remove proc from allproc queue and from pidhash chain. @@ -225,7 +259,7 @@ proc_prepareexit(struct proc *p) * P_PPWAIT is set; we will wakeup the parent below. */ p->p_flag &= ~(P_TRACED | P_PPWAIT); - p->p_sigignore = ~0; + p->p_sigignore = ~(sigcantmask); p->p_siglist = 0; ut = get_bsdthread_info(self); ut->uu_siglist = 0; @@ -237,7 +271,7 @@ proc_exit(struct proc *p) { register struct proc *q, *nq, *pp; struct task *task = p->task; - register int i,s; + register int s; boolean_t funnel_state; /* This can happen if thread_terminate of the single thread @@ -252,6 +286,8 @@ proc_exit(struct proc *p) proc_prepareexit(p); } + p->p_lflag |= P_LPEXIT; + /* XXX Zombie allocation may fail, in which case stats get lost */ MALLOC_ZONE(p->p_ru, struct rusage *, sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK); @@ -278,6 +314,7 @@ proc_exit(struct proc *p) if (sp->s_ttyvp) { struct vnode *ttyvp; + struct vfs_context context; /* * Controlling process. @@ -293,13 +330,16 @@ proc_exit(struct proc *p) * The tty could have been revoked * if we blocked. */ + context.vc_proc = p; + context.vc_ucred = p->p_ucred; if (sp->s_ttyvp) - VOP_REVOKE(sp->s_ttyvp, REVOKEALL); + VNOP_REVOKE(sp->s_ttyvp, REVOKEALL, &context); } ttyvp = sp->s_ttyvp; sp->s_ttyvp = NULL; - if (ttyvp) - vrele(ttyvp); + if (ttyvp) { + vnode_rele(ttyvp); + } /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. @@ -317,22 +357,15 @@ proc_exit(struct proc *p) /* * release trace file */ - p->p_traceflag = 0; /* don't trace the vrele() */ + p->p_traceflag = 0; /* don't trace the vnode_put() */ if (p->p_tracep) { struct vnode *tvp = p->p_tracep; p->p_tracep = NULL; - - if (UBCINFOEXISTS(tvp)) - ubc_rele(tvp); - vrele(tvp); + vnode_rele(tvp); } #endif - q = p->p_children.lh_first; - if (q) /* only need this if any child is S_ZOMB */ - wakeup((caddr_t) initproc); - for (; q != 0; q = nq) { - nq = q->p_sibling.le_next; + while (q = p->p_children.lh_first) { proc_reparent(q, initproc); /* * Traced processes are killed @@ -348,9 +381,9 @@ proc_exit(struct proc *p) * the first thread in the task. So any attempts to kill * the process would result into a deadlock on q->sigwait. */ - thread_resume((thread_act_t)q->sigwait_thread); + thread_resume((thread_t)q->sigwait_thread); clear_wait(q->sigwait_thread, THREAD_INTERRUPTED); - threadsignal((thread_act_t)q->sigwait_thread, SIGKILL, 0); + threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0); } psignal(q, SIGKILL); } @@ -358,14 +391,16 @@ proc_exit(struct proc *p) /* * Save exit status and final rusage info, adding in child rusage - * info and self times. + * info and self times. If we were unable to allocate a zombie + * structure, this information is lost. */ - *p->p_ru = p->p_stats->p_ru; + if (p->p_ru != NULL) { + *p->p_ru = p->p_stats->p_ru; - timerclear(&p->p_ru->ru_utime); - timerclear(&p->p_ru->ru_stime); + timerclear(&p->p_ru->ru_utime); + timerclear(&p->p_ru->ru_stime); - if (task) { + if (task) { task_basic_info_data_t tinfo; task_thread_times_info_data_t ttimesinfo; int task_info_stuff, task_ttimes_stuff; @@ -373,7 +408,7 @@ proc_exit(struct proc *p) task_info_stuff = TASK_BASIC_INFO_COUNT; task_info(task, TASK_BASIC_INFO, - &tinfo, &task_info_stuff); + (task_info_t)&tinfo, &task_info_stuff); p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds; p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds; p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds; @@ -381,7 +416,7 @@ proc_exit(struct proc *p) task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT; task_info(task, TASK_THREAD_TIMES_INFO, - &ttimesinfo, &task_ttimes_stuff); + (task_info_t)&ttimesinfo, &task_ttimes_stuff); ut.tv_sec = ttimesinfo.user_time.seconds; ut.tv_usec = ttimesinfo.user_time.microseconds; @@ -389,9 +424,10 @@ proc_exit(struct proc *p) st.tv_usec = ttimesinfo.system_time.microseconds; timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime); timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime); - } + } - ruadd(p->p_ru, &p->p_stats->p_cru); + ruadd(p->p_ru, &p->p_stats->p_cru); + } /* * Free up profiling buffers. @@ -405,7 +441,7 @@ proc_exit(struct proc *p) for (; p1 != NULL; p1 = pn) { pn = p1->pr_next; - kfree((vm_offset_t)p1, sizeof *p1); + kfree(p1, sizeof *p1); } } @@ -422,9 +458,6 @@ proc_exit(struct proc *p) FREE_ZONE(p->p_limit, sizeof *p->p_limit, M_SUBPROC); p->p_limit = NULL; - /* Free the auditing info */ - audit_proc_free(p); - /* * Finish up by terminating the task * and halt this thread (only if a @@ -440,18 +473,20 @@ proc_exit(struct proc *p) * Notify parent that we're gone. */ if (p->p_pptr->p_flag & P_NOCLDWAIT) { - struct proc * pp = p->p_pptr; + struct proc *opp = p->p_pptr; /* * Add child resource usage to parent before giving - * zombie to init + * zombie to init. If we were unable to allocate a + * zombie structure, this information is lost. */ - ruadd(&p->p_pptr->p_stats->p_cru, p->p_ru); + if (p->p_ru != NULL) + ruadd(&p->p_pptr->p_stats->p_cru, p->p_ru); proc_reparent(p, initproc); /* If there are no more children wakeup parent */ - if (LIST_EMPTY(&pp->p_children)) - wakeup((caddr_t)pp); + if (LIST_EMPTY(&opp->p_children)) + wakeup((caddr_t)opp); } /* should be fine as parent proc would be initproc */ pp = p->p_pptr; @@ -459,14 +494,13 @@ proc_exit(struct proc *p) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_EXITED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; } - psignal(pp, SIGCHLD); - - /* mark as a zombie */ p->p_stat = SZOMB; + psignal(pp, SIGCHLD); + /* and now wakeup the parent */ wakeup((caddr_t)p->p_pptr); @@ -474,73 +508,98 @@ proc_exit(struct proc *p) } -struct wait4_args { - int pid; - int *status; - int options; - struct rusage *rusage; -}; - -#if COMPAT_43 -int -owait(p, uap, retval) - struct proc *p; - void *uap; - int *retval; -{ - struct wait4_args *a; - - a = (struct wait4_args *)get_bsduthreadarg(current_act()); - - a->options = 0; - a->rusage = NULL; - a->pid = WAIT_ANY; - a->status = NULL; - return (wait1(p, a, retval, 1)); -} - -int -wait4(p, uap, retval) - struct proc *p; - struct wait4_args *uap; - int *retval; +/* + * reap_child_process + * + * Description: Given a process from which all status information needed + * has already been extracted, if the process is a ptrace + * attach process, detach it and give it back to its real + * parent, else recover all resources remaining associated + * with it. + * + * Parameters: struct proc *parent Parent of process being reaped + * struct proc *child Process to reap + * + * Returns: 0 Process was not reaped because it + * came from an attach + * 1 Process was reaped + */ +static int +reap_child_process(struct proc *parent, struct proc *child) { - return (wait1(p, uap, retval, 0)); -} + struct proc *trace_parent; /* Traced parent process, if tracing */ + struct vnode *tvp; /* Traced vnode pointer, if used */ -struct owait3_args { - int *status; - int options; - struct rusage *rusage; -}; + /* + * If we got the child via a ptrace 'attach', + * we need to give it back to the old parent. + */ + if (child->p_oppid && (trace_parent = pfind(child->p_oppid))) { + child->p_oppid = 0; + proc_reparent(child, trace_parent); + if (trace_parent != initproc) { + trace_parent->si_pid = child->p_pid; + trace_parent->si_status = child->p_xstat; + trace_parent->si_code = CLD_CONTINUED; + trace_parent->si_uid = child->p_ucred->cr_ruid; + } + psignal(trace_parent, SIGCHLD); + wakeup((caddr_t)trace_parent); + return (0); + } + child->p_xstat = 0; + if (child->p_ru) { + ruadd(&parent->p_stats->p_cru, child->p_ru); + FREE_ZONE(child->p_ru, sizeof *child->p_ru, M_ZOMBIE); + child->p_ru = NULL; + } else { + printf("Warning : lost p_ru for %s\n", child->p_comm); + } -int -owait3(p, uap, retval) - struct proc *p; - struct owait3_args *uap; - int *retval; -{ - struct wait4_args *a; + /* + * Decrement the count of procs running with this uid. + */ + (void)chgproccnt(child->p_ucred->cr_ruid, -1); - a = (struct wait4_args *)get_bsduthreadarg(current_act()); + /* + * Free up credentials. + */ + if (child->p_ucred != NOCRED) { + kauth_cred_t ucr = child->p_ucred; + child->p_ucred = NOCRED; + kauth_cred_rele(ucr); + } - a->rusage = uap->rusage; - a->options = uap->options; - a->status = uap->status; - a->pid = WAIT_ANY; + /* + * Release reference to text vnode + */ + tvp = child->p_textvp; + child->p_textvp = NULL; + if (tvp) { + vnode_rele(tvp); + } + /* + * Finally finished with old proc entry. + * Unlink it from its process group and free it. + */ + leavepgrp(child); + LIST_REMOVE(child, p_list); /* off zombproc */ + LIST_REMOVE(child, p_sibling); + child->p_flag &= ~P_WAITING; - return (wait1(p, a, retval, 1)); + lck_mtx_destroy(&child->p_mlock, proc_lck_grp); + lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp); + FREE_ZONE(child, sizeof *child, M_PROC); + nprocs--; + return (1); } -#else -#define wait1 wait4 -#endif int -wait1continue(result) +wait1continue(int result) { void *vt; - thread_act_t thread; + thread_t thread; int *retval; struct proc *p; @@ -548,27 +607,19 @@ wait1continue(result) return(result); p = current_proc(); - thread = current_act(); - vt = (void *)get_bsduthreadarg(thread); - retval = (int *)get_bsduthreadrval(thread); - return(wait1((struct proc *)p, (struct wait4_args *)vt, retval, 0)); + thread = current_thread(); + vt = get_bsduthreadarg(thread); + retval = get_bsduthreadrval(thread); + return(wait4((struct proc *)p, (struct wait4_args *)vt, retval)); } int -wait1(q, uap, retval, compat) - register struct proc *q; - register struct wait4_args *uap; - register_t *retval; -#if COMPAT_43 - int compat; -#endif +wait4(struct proc *q, struct wait4_args *uap, register_t *retval) { register int nfound; - register struct proc *p, *t; + register struct proc *p; int status, error; - struct vnode *tvp; -retry: if (uap->pid == 0) uap->pid = -q->p_pgid; @@ -580,6 +631,9 @@ loop: p->p_pgid != -(uap->pid)) continue; nfound++; + + /* XXX This is racy because we don't get the lock!!!! */ + if (p->p_flag & P_WAITING) { (void)tsleep(&p->p_stat, PWAIT, "waitcoll", 0); goto loop; @@ -588,113 +642,59 @@ loop: if (p->p_stat == SZOMB) { retval[0] = p->p_pid; -#if COMPAT_43 - if (compat) - retval[1] = p->p_xstat; - else -#endif if (uap->status) { status = p->p_xstat; /* convert to int */ - if (error = copyout((caddr_t)&status, - (caddr_t)uap->status, - sizeof(status))) { + error = copyout((caddr_t)&status, + uap->status, + sizeof(status)); + if (error) { p->p_flag &= ~P_WAITING; wakeup(&p->p_stat); return (error); } } - if (uap->rusage && - (error = copyout((caddr_t)p->p_ru, - (caddr_t)uap->rusage, - sizeof (struct rusage)))) { - p->p_flag &= ~P_WAITING; - wakeup(&p->p_stat); - return (error); - } - /* - * If we got the child via a ptrace 'attach', - * we need to give it back to the old parent. - */ - if (p->p_oppid && (t = pfind(p->p_oppid))) { - p->p_oppid = 0; - proc_reparent(p, t); - if (t != initproc) { - t->si_pid = p->p_pid; - t->si_status = p->p_xstat; - t->si_code = CLD_CONTINUED; - t->si_uid = p->p_cred->p_ruid; + if (uap->rusage) { + if (p->p_ru == NULL) { + error = ENOMEM; + } else { + if (IS_64BIT_PROCESS(q)) { + struct user_rusage my_rusage; + munge_rusage(p->p_ru, &my_rusage); + error = copyout((caddr_t)&my_rusage, + uap->rusage, + sizeof (my_rusage)); + } + else { + error = copyout((caddr_t)p->p_ru, + uap->rusage, + sizeof (struct rusage)); + } } - psignal(t, SIGCHLD); - wakeup((caddr_t)t); - p->p_flag &= ~P_WAITING; - wakeup(&p->p_stat); - return (0); - } - p->p_xstat = 0; - if (p->p_ru) { - ruadd(&q->p_stats->p_cru, p->p_ru); - FREE_ZONE(p->p_ru, sizeof *p->p_ru, M_ZOMBIE); - p->p_ru = NULL; - } else { - printf("Warning : lost p_ru for %s\n", p->p_comm); - } - - /* - * Decrement the count of procs running with this uid. - */ - (void)chgproccnt(p->p_cred->p_ruid, -1); - - /* - * Free up credentials. - */ - if (--p->p_cred->p_refcnt == 0) { - struct ucred *ucr = p->p_ucred; - struct pcred *pcr; - - if (ucr != NOCRED) { - p->p_ucred = NOCRED; - crfree(ucr); + /* information unavailable? */ + if (error) { + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (error); } - pcr = p->p_cred; - p->p_cred = NULL; - FREE_ZONE(pcr, sizeof *pcr, M_SUBPROC); } - /* - * Release reference to text vnode - */ - tvp = p->p_textvp; - p->p_textvp = NULL; - if (tvp) - vrele(tvp); + /* Clean up */ + if (!reap_child_process(q, p)) + p->p_flag &= ~P_WAITING; - /* - * Finally finished with old proc entry. - * Unlink it from its process group and free it. - */ - leavepgrp(p); - LIST_REMOVE(p, p_list); /* off zombproc */ - LIST_REMOVE(p, p_sibling); - p->p_flag &= ~P_WAITING; - FREE_ZONE(p, sizeof *p, M_PROC); - nprocs--; + /* Wake other wait'ers, if any */ wakeup(&p->p_stat); + return (0); } if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || uap->options & WUNTRACED)) { p->p_flag |= P_WAITED; retval[0] = p->p_pid; -#if COMPAT_43 - if (compat) { - retval[1] = W_STOPCODE(p->p_xstat); - error = 0; - } else -#endif if (uap->status) { status = W_STOPCODE(p->p_xstat); error = copyout((caddr_t)&status, - (caddr_t)uap->status, + uap->status, sizeof(status)); } else error = 0; @@ -713,7 +713,262 @@ loop: return (0); } - if (error = tsleep0((caddr_t)q, PWAIT | PCATCH, "wait", 0, wait1continue)) + if ((error = tsleep0((caddr_t)q, PWAIT | PCATCH, "wait", 0, wait1continue))) + return (error); + + goto loop; +} + + +int +waitidcontinue(int result) +{ + void *vt; + thread_t thread; + int *retval; + struct proc *p; + + if (result) + return(result); + + p = current_proc(); + thread = current_thread(); + vt = get_bsduthreadarg(thread); + retval = get_bsduthreadrval(thread); + return(waitid((struct proc *)p, (struct waitid_args *)vt, retval)); +} + +/* + * Description: Suspend the calling thread until one child of the process + * containing the calling thread changes state. + * + * Parameters: uap->idtype one of P_PID, P_PGID, P_ALL + * uap->id pid_t or gid_t or ignored + * uap->infop Address of signinfo_t struct in + * user space into which to return status + * uap->options flag values + * + * Returns: 0 Success + * !0 Error returning status to user space + */ +int +waitid(struct proc *q, struct waitid_args *uap, register_t *retval) +{ + user_siginfo_t collect64; /* siginfo data to return to caller */ + + register int nfound; + register struct proc *p; + int error; + +loop: + nfound = 0; + for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) { + switch(uap->idtype) { + case P_PID: /* child with process ID equal to... */ + if (p->p_pid != (pid_t)uap->id) + continue; + break; + case P_PGID: /* child with process group ID equal to... */ + if (p->p_pgid != (pid_t)uap->id) + continue; + break; + case P_ALL: /* any child */ + break; + } + + /* XXX This is racy because we don't get the lock!!!! */ + + /* + * Wait collision; go to sleep and restart; used to maintain + * the single return for waited process guarantee. + */ + if (p->p_flag & P_WAITING) { + (void)tsleep(&p->p_stat, PWAIT, "waitidcoll", 0); + goto loop; + } + p->p_flag |= P_WAITING; /* mark busy */ + + nfound++; + + /* + * Types of processes we are interested in + * + * XXX Don't know what to do for WCONTINUED?!? + */ + switch(p->p_stat) { + case SZOMB: /* Exited */ + if (!(uap->options & WEXITED)) + break; + + /* Collect "siginfo" information for caller */ + collect64.si_signo = 0; + collect64.si_code = 0; + collect64.si_errno = 0; + collect64.si_pid = 0; + collect64.si_uid = 0; + collect64.si_addr = 0; + collect64.si_status = p->p_xstat; + collect64.si_band = 0; + + if (IS_64BIT_PROCESS(p)) { + error = copyout((caddr_t)&collect64, + uap->infop, + sizeof(collect64)); + } else { + siginfo_t collect; + siginfo_64to32(&collect64,&collect); + error = copyout((caddr_t)&collect, + uap->infop, + sizeof(collect)); + } + /* information unavailable? */ + if (error) { + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (error); + } + + /* Prevent other process for waiting for this event? */ + if (!(uap->options & WNOWAIT)) { + /* Clean up */ + if (!reap_child_process(q, p)) + p->p_flag &= ~P_WAITING; + + /* Wake other wait'ers, if any */ + wakeup(&p->p_stat); + } + + return (0); + + case SSTOP: /* Stopped */ + /* + * If we are not interested in stopped processes, then + * ignore this one. + */ + if (!(uap->options & WSTOPPED)) + break; + + /* + * If someone has already waited it, we lost a race + * to be the one to return status. + */ + if ((p->p_flag & P_WAITED) != 0) + break; + + /* + * If this is not a traced process, and they haven't + * indicated an interest in untraced processes, then + * ignore this one. + */ + if (!(p->p_flag & P_TRACED) && !(uap->options & WUNTRACED)) + break; + + /* Collect "siginfo" information for caller */ + collect64.si_signo = 0; + collect64.si_code = 0; + collect64.si_errno = 0; + collect64.si_pid = 0; + collect64.si_uid = 0; + collect64.si_addr = 0; + collect64.si_status = p->p_xstat; + collect64.si_band = 0; + + if (IS_64BIT_PROCESS(p)) { + error = copyout((caddr_t)&collect64, + uap->infop, + sizeof(collect64)); + } else { + siginfo_t collect; + siginfo_64to32(&collect64,&collect); + error = copyout((caddr_t)&collect, + uap->infop, + sizeof(collect)); + } + /* information unavailable? */ + if (error) { + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (error); + } + + /* Prevent other process for waiting for this event? */ + if (!(uap->options & WNOWAIT)) { + p->p_flag |= P_WAITED; + } + + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (0); + + default: /* All others */ + /* ...meaning Continued */ + if (!(uap->options & WCONTINUED)) + break; + + /* + * If the flag isn't set, then this process has not + * been stopped and continued, or the status has + * already been reaped by another caller of waitid(). + */ + if ((p->p_flag & P_CONTINUED) == 0) + break; + + /* Collect "siginfo" information for caller */ + collect64.si_signo = 0; + collect64.si_code = 0; + collect64.si_errno = 0; + collect64.si_pid = 0; + collect64.si_uid = 0; + collect64.si_addr = 0; + collect64.si_status = p->p_xstat; + collect64.si_band = 0; + + if (IS_64BIT_PROCESS(p)) { + error = copyout((caddr_t)&collect64, + uap->infop, + sizeof(collect64)); + } else { + siginfo_t collect; + siginfo_64to32(&collect64,&collect); + error = copyout((caddr_t)&collect, + uap->infop, + sizeof(collect)); + } + /* information unavailable? */ + if (error) { + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (error); + } + + /* Prevent other process for waiting for this event? */ + if (!(uap->options & WNOWAIT)) { + p->p_flag &= ~P_CONTINUED; + } + + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + return (0); + + break; + } + + + /* Not a process we are interested in; go on to next child */ + p->p_flag &= ~P_WAITING; + wakeup(&p->p_stat); + } + + /* No child processes that could possibly satisfy the request? */ + if (nfound == 0) + return (ECHILD); + + if (uap->options & WNOHANG) { + retval[0] = 0; + return (0); + } + + if ((error = tsleep0((caddr_t)q, PWAIT | PCATCH, "waitid", 0, waitidcontinue))) return (error); goto loop; @@ -723,9 +978,7 @@ loop: * make process 'parent' the new parent of process 'child'. */ void -proc_reparent(child, parent) - register struct proc *child; - register struct proc *parent; +proc_reparent(struct proc *child, struct proc *parent) { if (child->p_pptr == parent) @@ -734,6 +987,9 @@ proc_reparent(child, parent) LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); child->p_pptr = parent; + + if (initproc == parent && child->p_stat == SZOMB) + psignal(initproc, SIGCHLD); } /* @@ -742,12 +998,12 @@ proc_reparent(child, parent) * gunned down by kill(-1, 0). */ kern_return_t -init_process(void) +init_process(__unused struct init_process_args *args) { register struct proc *p = current_proc(); AUDIT_MACH_SYSCALL_ENTER(AUE_INITPROCESS); - if (suser(p->p_ucred, &p->p_acflag)) { + if (suser(kauth_cred_get(), &p->p_acflag)) { AUDIT_MACH_SYSCALL_EXIT(KERN_NO_ACCESS); return(KERN_NO_ACCESS); } @@ -769,16 +1025,6 @@ init_process(void) return(KERN_SUCCESS); } -void -process_terminate_self(void) -{ - struct proc *p = current_proc(); - - if (p != NULL) { - exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL); - /*NOTREACHED*/ - } -} /* * Exit: deallocate address space and other resources, change proc state @@ -786,31 +1032,57 @@ process_terminate_self(void) * status and rusage for wait(). Check for child processes and orphan them. */ -int -vfork_exit(p, rv) - struct proc *p; - int rv; +void +vfork_exit(struct proc *p, int rv) { - register struct proc *q, *nq; - thread_act_t self = current_act(); + thread_t self = current_thread(); +#ifdef FIXME struct task *task = p->task; - register int i,s; +#endif + register int s; struct uthread *ut; exception_data_t code[EXCEPTION_CODE_MAX]; - ut = get_bsdthread_info(self); - if (p->exit_thread) { - return(1); - } - p->exit_thread = self; - + /* + * If a thread in this task has already + * called exit(), then halt any others + * right here. + */ + + ut = get_bsdthread_info(self); +#ifdef FIXME + signal_lock(p); + while (p->exit_thread != self) { + if (sig_try_locked(p) <= 0) { + if (get_threadtask(self) != task) { + signal_unlock(p); + return; + } + signal_unlock(p); + thread_terminate(self); + thread_funnel_set(kernel_flock, FALSE); + thread_exception_return(); + /* NOTREACHED */ + } + sig_lock_to_exit(p); + } + signal_unlock(p); + if (p->p_pid == 1) { + printf("pid 1 exited (signal %d, exit %d)", + WTERMSIG(rv), WEXITSTATUS(rv)); +panic("init died\nState at Last Exception:\n\n%s", init_task_failure_data); + } +#endif /* FIXME */ + s = splsched(); p->p_flag |= P_WEXIT; + p->p_lflag |= P_LPEXIT; splx(s); - code[0] = 0xFF000001; /* Set terminate code */ - code[1] = p->p_pid; /* Pass out the pid */ - (void)sys_perf_notify(p->task, &code, 2); /* Notify the perf server */ + code[0] = (exception_data_t)0xFF000001; /* Set terminate code */ + code[1] = (exception_data_t)p->p_pid; /* Pass out the pid */ + /* Notify the perf server */ + (void)sys_perf_notify(p->task, (exception_data_t)&code, 2); /* * Remove proc from allproc queue and from pidhash chain. @@ -835,17 +1107,17 @@ vfork_exit(p, rv) p->p_xstat = rv; vproc_exit(p); - return(0); } void vproc_exit(struct proc *p) { register struct proc *q, *nq, *pp; +#ifdef FIXME struct task *task = p->task; - register int i,s; - boolean_t funnel_state; +#endif + /* XXX Zombie allocation may fail, in which case stats get lost */ MALLOC_ZONE(p->p_ru, struct rusage *, sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK); @@ -860,6 +1132,7 @@ vproc_exit(struct proc *p) if (sp->s_ttyvp) { struct vnode *ttyvp; + struct vfs_context context; /* * Controlling process. @@ -875,13 +1148,16 @@ vproc_exit(struct proc *p) * The tty could have been revoked * if we blocked. */ + context.vc_proc = p; + context.vc_ucred = p->p_ucred; if (sp->s_ttyvp) - VOP_REVOKE(sp->s_ttyvp, REVOKEALL); + VNOP_REVOKE(sp->s_ttyvp, REVOKEALL, &context); } ttyvp = sp->s_ttyvp; sp->s_ttyvp = NULL; - if (ttyvp) - vrele(ttyvp); + if (ttyvp) { + vnode_rele(ttyvp); + } /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. @@ -898,22 +1174,15 @@ vproc_exit(struct proc *p) /* * release trace file */ - p->p_traceflag = 0; /* don't trace the vrele() */ + p->p_traceflag = 0; /* don't trace the vnode_rele() */ if (p->p_tracep) { struct vnode *tvp = p->p_tracep; p->p_tracep = NULL; - - if (UBCINFOEXISTS(tvp)) - ubc_rele(tvp); - vrele(tvp); + vnode_rele(tvp); } #endif - q = p->p_children.lh_first; - if (q) /* only need this if any child is S_ZOMB */ - wakeup((caddr_t) initproc); - for (; q != 0; q = nq) { - nq = q->p_sibling.le_next; + while (q = p->p_children.lh_first) { proc_reparent(q, initproc); /* * Traced processes are killed @@ -929,9 +1198,9 @@ vproc_exit(struct proc *p) * the first thread in the task. So any attempts to kill * the process would result into a deadlock on q->sigwait. */ - thread_resume((thread_act_t)q->sigwait_thread); + thread_resume((thread_t)q->sigwait_thread); clear_wait(q->sigwait_thread, THREAD_INTERRUPTED); - threadsignal((thread_act_t)q->sigwait_thread, SIGKILL, 0); + threadsignal((thread_t)q->sigwait_thread, SIGKILL, 0); } psignal(q, SIGKILL); } @@ -939,15 +1208,16 @@ vproc_exit(struct proc *p) /* * Save exit status and final rusage info, adding in child rusage - * info and self times. + * info and self times. If we were unable to allocate a zombie + * structure, this information is lost. */ - *p->p_ru = p->p_stats->p_ru; - - timerclear(&p->p_ru->ru_utime); - timerclear(&p->p_ru->ru_stime); + if (p->p_ru != NULL) { + *p->p_ru = p->p_stats->p_ru; + timerclear(&p->p_ru->ru_utime); + timerclear(&p->p_ru->ru_stime); #ifdef FIXME - if (task) { + if (task) { task_basic_info_data_t tinfo; task_thread_times_info_data_t ttimesinfo; int task_info_stuff, task_ttimes_stuff; @@ -970,11 +1240,12 @@ vproc_exit(struct proc *p) st.tv_sec = ttimesinfo.system_time.seconds; st.tv_usec = ttimesinfo.system_time.microseconds; timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime); - timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime); - } + timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime); + } #endif /* FIXME */ - ruadd(p->p_ru, &p->p_stats->p_cru); + ruadd(p->p_ru, &p->p_stats->p_cru); + } /* * Free up profiling buffers. @@ -988,7 +1259,7 @@ vproc_exit(struct proc *p) for (; p1 != NULL; p1 = pn) { pn = p1->pr_next; - kfree((vm_offset_t)p1, sizeof *p1); + kfree(p1, sizeof *p1); } } @@ -1020,13 +1291,48 @@ vproc_exit(struct proc *p) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_EXITED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; } - psignal(p->p_pptr, SIGCHLD); - /* mark as a zombie */ p->p_stat = SZOMB; + psignal(p->p_pptr, SIGCHLD); + /* and now wakeup the parent */ wakeup((caddr_t)p->p_pptr); } + + +/* + * munge_rusage + * LP64 support - long is 64 bits if we are dealing with a 64 bit user + * process. We munge the kernel (32 bit) version of rusage into the + * 64 bit version. + */ +__private_extern__ void +munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p) +{ + /* timeval changes size, so utime and stime need special handling */ + a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec; + a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec; + a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec; + a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec; + /* + * everything else can be a direct assign, since there is no loss + * of precision implied boing 32->64. + */ + a_user_rusage_p->ru_maxrss = a_rusage_p->ru_maxrss; + a_user_rusage_p->ru_ixrss = a_rusage_p->ru_ixrss; + a_user_rusage_p->ru_idrss = a_rusage_p->ru_idrss; + a_user_rusage_p->ru_isrss = a_rusage_p->ru_isrss; + a_user_rusage_p->ru_minflt = a_rusage_p->ru_minflt; + a_user_rusage_p->ru_majflt = a_rusage_p->ru_majflt; + a_user_rusage_p->ru_nswap = a_rusage_p->ru_nswap; + a_user_rusage_p->ru_inblock = a_rusage_p->ru_inblock; + a_user_rusage_p->ru_oublock = a_rusage_p->ru_oublock; + a_user_rusage_p->ru_msgsnd = a_rusage_p->ru_msgsnd; + a_user_rusage_p->ru_msgrcv = a_rusage_p->ru_msgrcv; + a_user_rusage_p->ru_nsignals = a_rusage_p->ru_nsignals; + a_user_rusage_p->ru_nvcsw = a_rusage_p->ru_nvcsw; + a_user_rusage_p->ru_nivcsw = a_rusage_p->ru_nivcsw; +} diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 40a2275c1..a993e3356 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -66,29 +66,33 @@ #include #include #include -#include +#include +#include #include #include -#include -#include +#include +#include #include -#include - -#include - #if KTRACE #include -#include #endif +#include + #include +#include +#include #include +#include +#include #include -thread_act_t cloneproc(struct proc *, int); +#include // for vm_map_commpage64 + +thread_t cloneproc(struct proc *, int); struct proc * forkproc(struct proc *, int); -thread_act_t procdup(); +thread_t procdup(struct proc *child, struct proc *parent); #define DOFORK 0x1 /* fork() system call */ #define DOVFORK 0x2 /* vfork() system call */ @@ -98,10 +102,7 @@ static int fork1(struct proc *, long, register_t *); * fork system call. */ int -fork(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +fork(struct proc *p, __unused void *uap, register_t *retval) { return (fork1(p, (long)DOFORK, retval)); } @@ -110,18 +111,15 @@ fork(p, uap, retval) * vfork system call */ int -vfork(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +vfork(struct proc *p, void *uap, register_t *retval) { register struct proc * newproc; register uid_t uid; - thread_act_t cur_act = (thread_act_t)current_act(); + thread_t cur_act = (thread_t)current_thread(); int count; task_t t; uthread_t ut; - + /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow @@ -129,7 +127,7 @@ vfork(p, uap, retval) * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ - uid = p->p_cred->p_ruid; + uid = kauth_cred_get()->cr_ruid; if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { tablefull("proc"); retval[1] = 0; @@ -147,7 +145,7 @@ vfork(p, uap, retval) } ut = (struct uthread *)get_bsdthread_info(cur_act); - if (ut->uu_flag & P_VFORK) { + if (ut->uu_flag & UT_VFORK) { printf("vfork called recursively by %s\n", p->p_comm); (void)chgproccnt(uid, -1); return (EINVAL); @@ -172,14 +170,20 @@ vfork(p, uap, retval) newproc->p_flag |= P_INVFORK; newproc->p_vforkact = cur_act; - ut->uu_flag |= P_VFORK; + ut->uu_flag |= UT_VFORK; ut->uu_proc = newproc; ut->uu_userstate = (void *)act_thread_csave(); ut->uu_vforkmask = ut->uu_sigmask; + /* temporarily drop thread-set-id state */ + if (ut->uu_flag & UT_SETUID) { + ut->uu_flag |= UT_WASSETUID; + ut->uu_flag &= ~UT_SETUID; + } + thread_set_child(cur_act, newproc->p_pid); - newproc->p_stats->p_start = time; + microtime(&newproc->p_stats->p_start); newproc->p_acflag = AFORK; /* @@ -202,38 +206,35 @@ vfork(p, uap, retval) * Return to parent vfork ehread() */ void -vfork_return(th_act, p, p2, retval) - thread_act_t th_act; - struct proc * p; - struct proc *p2; - register_t *retval; +vfork_return(__unused thread_t th_act, struct proc *p, struct proc *p2, + register_t *retval) { - long flags; - register uid_t uid; - int s, count; - task_t t; + thread_t cur_act = (thread_t)current_thread(); uthread_t ut; - ut = (struct uthread *)get_bsdthread_info(th_act); + ut = (struct uthread *)get_bsdthread_info(cur_act); act_thread_catt(ut->uu_userstate); /* Make sure only one at this time */ - if (p) { - p->p_vforkcnt--; - if (p->p_vforkcnt <0) - panic("vfork cnt is -ve"); - if (p->p_vforkcnt <=0) - p->p_flag &= ~P_VFORK; - } + p->p_vforkcnt--; + if (p->p_vforkcnt <0) + panic("vfork cnt is -ve"); + if (p->p_vforkcnt <=0) + p->p_flag &= ~P_VFORK; ut->uu_userstate = 0; - ut->uu_flag &= ~P_VFORK; + ut->uu_flag &= ~UT_VFORK; + /* restore thread-set-id state */ + if (ut->uu_flag & UT_WASSETUID) { + ut->uu_flag |= UT_SETUID; + ut->uu_flag &= UT_WASSETUID; + } ut->uu_proc = 0; ut->uu_sigmask = ut->uu_vforkmask; p2->p_flag &= ~P_INVFORK; p2->p_vforkact = (void *)0; - thread_set_parent(th_act, p2->p_pid); + thread_set_parent(cur_act, p2->p_pid); if (retval) { retval[0] = p2->p_pid; @@ -243,16 +244,12 @@ vfork_return(th_act, p, p2, retval) return; } -thread_act_t -procdup( - struct proc *child, - struct proc *parent) +thread_t +procdup(struct proc *child, struct proc *parent) { - thread_act_t thread; + thread_t thread; task_t task; kern_return_t result; - pmap_t pmap; - extern task_t kernel_task; if (parent->task == kernel_task) result = task_create_internal(TASK_NULL, FALSE, &task); @@ -263,6 +260,18 @@ procdup( child->task = task; /* task->proc = child; */ set_bsdtask_info(task, child); + if (parent->p_flag & P_LP64) { + task_set_64bit(task, TRUE); + child->p_flag |= P_LP64; +#ifdef __PPC__ + /* LP64todo - clean up this hacked mapping of commpage */ + pmap_map_sharedpage(task, get_map_pmap(get_task_map(task))); + vm_map_commpage64(get_task_map(task)); +#endif /* __PPC__ */ + } else { + task_set_64bit(task, FALSE); + child->p_flag &= ~P_LP64; + } if (child->p_nice != 0) resetpriority(child); @@ -282,9 +291,9 @@ fork1(p1, flags, retval) { register struct proc *p2; register uid_t uid; - thread_act_t newth; - int s, count; - task_t t; + thread_t newth; + int count; + task_t t; /* * Although process entries are dynamically created, we still keep @@ -293,7 +302,7 @@ fork1(p1, flags, retval) * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ - uid = p1->p_cred->p_ruid; + uid = kauth_cred_get()->cr_ruid; if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { tablefull("proc"); retval[1] = 0; @@ -321,9 +330,7 @@ fork1(p1, flags, retval) thread_set_child(newth, p2->p_pid); - s = splhigh(); - p2->p_stats->p_start = time; - splx(s); + microtime(&p2->p_stats->p_start); p2->p_acflag = AFORK; /* @@ -339,10 +346,10 @@ fork1(p1, flags, retval) (void) thread_resume(newth); /* drop the extra references we got during the creation */ - if (t = (task_t)get_threadtask(newth)) { + if ((t = (task_t)get_threadtask(newth)) != NULL) { task_deallocate(t); } - act_deallocate(newth); + thread_deallocate(newth); KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); @@ -364,13 +371,13 @@ fork1(p1, flags, retval) * lock set. fork() code needs to explicity remove this lock * before signals can be delivered */ -thread_act_t +thread_t cloneproc(p1, lock) register struct proc *p1; register int lock; { register struct proc *p2; - thread_act_t th; + thread_t th; p2 = (struct proc *)forkproc(p1,lock); @@ -399,17 +406,20 @@ forkproc(p1, lock) { register struct proc *p2, *newproc; static int nextpid = 0, pidchecked = 0; - thread_t th; /* Allocate new proc. */ MALLOC_ZONE(newproc, struct proc *, sizeof *newproc, M_PROC, M_WAITOK); - MALLOC_ZONE(newproc->p_cred, struct pcred *, - sizeof *newproc->p_cred, M_SUBPROC, M_WAITOK); + if (newproc == NULL) + panic("forkproc: M_PROC zone exhausted"); MALLOC_ZONE(newproc->p_stats, struct pstats *, sizeof *newproc->p_stats, M_SUBPROC, M_WAITOK); + if (newproc->p_stats == NULL) + panic("forkproc: M_SUBPROC zone exhausted (p_stats)"); MALLOC_ZONE(newproc->p_sigacts, struct sigacts *, sizeof *newproc->p_sigacts, M_SUBPROC, M_WAITOK); + if (newproc->p_sigacts == NULL) + panic("forkproc: M_SUBPROC zone exhausted (p_sigacts)"); /* * Find an unused process ID. We remember a range of unused IDs @@ -464,9 +474,9 @@ again: nprocs++; p2 = newproc; p2->p_stat = SIDL; + p2->p_shutdownstate = 0; p2->p_pid = nextpid; - p2->p_shutdownstate = 0; /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, @@ -479,34 +489,35 @@ again: p2->vm_shm = (void *)NULL; /* Make sure it is zero */ /* - * Copy the audit info. - */ - audit_proc_fork(p1, p2); - - /* + * Some flags are inherited from the parent. * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ - p2->p_flag = P_INMEM; - p2->p_flag |= (p1->p_flag & P_CLASSIC); // copy from parent - p2->p_flag |= (p1->p_flag & P_AFFINITY); // copy from parent + p2->p_flag = (p1->p_flag & (P_LP64 | P_CLASSIC | P_AFFINITY)); if (p1->p_flag & P_PROFIL) startprofclock(p2); - bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); - p2->p_cred->p_refcnt = 1; - crhold(p1->p_ucred); - lockinit(&p2->p_cred->pc_lock, PLOCK, "proc cred", 0, 0); + /* + * Note that if the current thread has an assumed identity, this + * credential will be granted to the new process. + */ + p2->p_ucred = kauth_cred_get_with_ref(); + + lck_mtx_init(&p2->p_mlock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&p2->p_fdmlock, proc_lck_grp, proc_lck_attr); klist_init(&p2->p_klist); /* bump references to the text vnode */ p2->p_textvp = p1->p_textvp; - if (p2->p_textvp) - VREF(p2->p_textvp); - + if (p2->p_textvp) { + vnode_rele(p2->p_textvp); + } + /* XXX may fail to copy descriptors to child */ p2->p_fd = fdcopy(p1); + if (p1->vm_shm) { - shmfork(p1,p2); + /* XXX may fail to attach shm to child */ + (void)shmfork(p1,p2); } /* * If p_limit is still copy-on-write, bump refcnt, @@ -528,6 +539,8 @@ again: ((caddr_t)&p2->p_stats->pstat_endcopy - (caddr_t)&p2->p_stats->pstat_startcopy)); + bzero(&p2->p_stats->user_p_prof, sizeof(struct user_uprof)); + if (p1->p_sigacts != NULL) (void)memcpy(p2->p_sigacts, p1->p_sigacts, sizeof *p2->p_sigacts); @@ -553,6 +566,7 @@ again: p2->user_stack = p1->user_stack; p2->p_vforkcnt = 0; p2->p_vforkact = 0; + p2->p_lflag = 0; TAILQ_INIT(&p2->p_uthlist); TAILQ_INIT(&p2->aio_activeq); TAILQ_INIT(&p2->aio_doneq); @@ -567,9 +581,7 @@ again: if (p1->p_traceflag&KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) { - if (UBCINFOEXISTS(p2->p_tracep)) - ubc_hold(p2->p_tracep); - VREF(p2->p_tracep); + vnode_ref(p2->p_tracep); } } #endif @@ -577,30 +589,41 @@ again: } +void +proc_lock(proc_t p) +{ + lck_mtx_lock(&p->p_mlock); +} + +void +proc_unlock(proc_t p) +{ + lck_mtx_unlock(&p->p_mlock); +} + #include struct zone *uthread_zone; int uthread_zone_inited = 0; void -uthread_zone_init() +uthread_zone_init(void) { if (!uthread_zone_inited) { uthread_zone = zinit(sizeof(struct uthread), - THREAD_MAX * sizeof(struct uthread), - THREAD_CHUNK * sizeof(struct uthread), - "uthreads"); + THREAD_MAX * sizeof(struct uthread), + THREAD_CHUNK * sizeof(struct uthread), + "uthreads"); uthread_zone_inited = 1; } } void * -uthread_alloc(task_t task, thread_act_t thr_act ) +uthread_alloc(task_t task, thread_t thr_act ) { struct proc *p; struct uthread *uth, *uth_parent; void *ut; - extern task_t kernel_task; boolean_t funnel_state; if (!uthread_zone_inited) @@ -609,22 +632,44 @@ uthread_alloc(task_t task, thread_act_t thr_act ) ut = (void *)zalloc(uthread_zone); bzero(ut, sizeof(struct uthread)); - if (task != kernel_task) { - uth = (struct uthread *)ut; - p = (struct proc *) get_bsdtask_info(task); + p = (struct proc *) get_bsdtask_info(task); + uth = (struct uthread *)ut; + /* + * Thread inherits credential from the creating thread, if both + * are in the same task. + * + * If the creating thread has no credential or is from another + * task we can leave the new thread credential NULL. If it needs + * one later, it will be lazily assigned from the task's process. + */ + uth_parent = (struct uthread *)get_bsdthread_info(current_thread()); + if ((task == current_task()) && + (uth_parent != NULL) && + (uth_parent->uu_ucred != NOCRED)) { + uth->uu_ucred = uth_parent->uu_ucred; + kauth_cred_ref(uth->uu_ucred); + /* the credential we just inherited is an assumed credential */ + if (uth_parent->uu_flag & UT_SETUID) + uth->uu_flag |= UT_SETUID; + } else { + uth->uu_ucred = NOCRED; + } + + if (task != kernel_task) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); - uth_parent = (struct uthread *)get_bsdthread_info(current_act()); if (uth_parent) { - if (uth_parent->uu_flag & USAS_OLDMASK) + if (uth_parent->uu_flag & UT_SAS_OLDMASK) uth->uu_sigmask = uth_parent->uu_oldmask; else uth->uu_sigmask = uth_parent->uu_sigmask; } uth->uu_act = thr_act; //signal_lock(p); - if (p) + if (p) { TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list); + } //signal_unlock(p); (void)thread_funnel_set(kernel_flock, funnel_state); } @@ -634,16 +679,12 @@ uthread_alloc(task_t task, thread_act_t thr_act ) void -uthread_free(task_t task, thread_t act, void *uthread, void * bsd_info) +uthread_free(task_t task, void *uthread, void * bsd_info) { struct _select *sel; struct uthread *uth = (struct uthread *)uthread; struct proc * p = (struct proc *)bsd_info; - extern task_t kernel_task; - int size; boolean_t funnel_state; - struct nlminfo *nlmp; - struct proc * vproc; /* * Per-thread audit state should never last beyond system @@ -653,40 +694,31 @@ uthread_free(task_t task, thread_t act, void *uthread, void * bsd_info) */ assert(uth->uu_ar == NULL); - sel = &uth->uu_state.ss_select; + sel = &uth->uu_select; /* cleanup the select bit space */ if (sel->nbytes) { FREE(sel->ibits, M_TEMP); FREE(sel->obits, M_TEMP); } - if (sel->allocsize && uth->uu_wqsub){ - kfree(uth->uu_wqsub, sel->allocsize); - sel->count = sel->nfcount = 0; + if (sel->allocsize && sel->wqset){ + kfree(sel->wqset, sel->allocsize); + sel->count = 0; sel->allocsize = 0; - uth->uu_wqsub = 0; + sel->wqset = 0; sel->wql = 0; } - if ((nlmp = uth->uu_nlminfo)) { - uth->uu_nlminfo = 0; - FREE(nlmp, M_LOCKF); - } - - if ((task != kernel_task) ) { - int vfork_exit(struct proc *, int); + if (uth->uu_ucred != NOCRED) + kauth_cred_rele(uth->uu_ucred); + if ((task != kernel_task) && p) { funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (p) - TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); - if ((uth->uu_flag & P_VFORK) && (vproc = uth->uu_proc) - && (vproc->p_flag & P_INVFORK)) { - if (!vfork_exit(vproc, W_EXITCODE(0, SIGKILL))) - vfork_return(act, p, vproc, NULL); - - } + //signal_lock(p); + TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); + //signal_unlock(p); (void)thread_funnel_set(kernel_flock, funnel_state); } /* and free the uthread itself */ - zfree(uthread_zone, (vm_offset_t)uthread); + zfree(uthread_zone, uthread); } diff --git a/bsd/kern/kern_ktrace.c b/bsd/kern/kern_ktrace.c index 4234c2fbc..c77a03c90 100644 --- a/bsd/kern/kern_ktrace.c +++ b/bsd/kern/kern_ktrace.c @@ -60,27 +60,28 @@ #include #include #include -#include -#include +#include +#include +#include #include -#include +#include #if KTRACE #include #endif #include #include -#include +#include +#include #include #if KTRACE -static struct ktr_header *ktrgetheader __P((int type)); -static void ktrwrite __P((struct vnode *, struct ktr_header *, - struct uio *, int)); -static int ktrcanset __P((struct proc *,struct proc *)); -static int ktrsetchildren __P((struct proc *,struct proc *, - int, int, struct vnode *)); -static int ktrops __P((struct proc *,struct proc *,int,int,struct vnode *)); +static struct ktr_header *ktrgetheader(int type); +static void ktrwrite(struct vnode *, struct ktr_header *, struct uio *); +static int ktrcanset(struct proc *,struct proc *); +static int ktrsetchildren(struct proc *,struct proc *, + int, int, struct vnode *); +static int ktrops(struct proc *,struct proc *,int,int,struct vnode *); static struct ktr_header * @@ -92,27 +93,28 @@ ktrgetheader(type) MALLOC(kth, struct ktr_header *, sizeof (struct ktr_header), M_KTRACE, M_WAITOK); - kth->ktr_type = type; - microtime(&kth->ktr_time); - kth->ktr_pid = p->p_pid; - bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); + if (kth != NULL) { + kth->ktr_type = type; + microtime(&kth->ktr_time); + kth->ktr_pid = p->p_pid; + bcopy(p->p_comm, kth->ktr_comm, MAXCOMLEN); + } return (kth); } #endif void -ktrsyscall(p, code, narg, args, funnel_type) +ktrsyscall(p, code, narg, args) struct proc *p; int code, narg; - register_t args[]; - int funnel_type; + u_int64_t args[]; { #if KTRACE struct vnode *vp; struct ktr_header *kth; struct ktr_syscall *ktp; register int len; - register_t *argp; + u_int64_t *argp; int i; if (!KTRPOINT(p, KTR_SYSCALL)) @@ -120,10 +122,18 @@ ktrsyscall(p, code, narg, args, funnel_type) vp = p->p_tracep; len = __offsetof(struct ktr_syscall, ktr_args) + - (narg * sizeof(register_t)); + (narg * sizeof(u_int64_t)); p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_SYSCALL); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } MALLOC(ktp, struct ktr_syscall *, len, M_KTRACE, M_WAITOK); + if (ktp == NULL) { + FREE(kth, M_KTRACE); + return; + } ktp->ktr_code = code; ktp->ktr_narg = narg; argp = &ktp->ktr_args[0]; @@ -131,7 +141,7 @@ ktrsyscall(p, code, narg, args, funnel_type) *argp++ = args[i]; kth->ktr_buf = (caddr_t)ktp; kth->ktr_len = len; - ktrwrite(vp, kth, NULL, funnel_type); + ktrwrite(vp, kth, NULL); FREE(ktp, M_KTRACE); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; @@ -141,11 +151,10 @@ ktrsyscall(p, code, narg, args, funnel_type) } void -ktrsysret(p, code, error, retval, funnel_type) +ktrsysret(p, code, error, retval) struct proc *p; int code, error; register_t retval; - int funnel_type; { #if KTRACE struct vnode *vp; @@ -158,6 +167,10 @@ ktrsysret(p, code, error, retval, funnel_type) vp = p->p_tracep; p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_SYSRET); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } ktp.ktr_code = code; ktp.ktr_error = error; ktp.ktr_retval = retval; /* what about val2 ? */ @@ -165,7 +178,7 @@ ktrsysret(p, code, error, retval, funnel_type) kth->ktr_buf = (caddr_t)&ktp; kth->ktr_len = sizeof(struct ktr_sysret); - ktrwrite(vp, kth, NULL, funnel_type); + ktrwrite(vp, kth, NULL); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; #else @@ -184,22 +197,25 @@ ktrnamei(vp, path) p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_NAMEI); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } kth->ktr_len = strlen(path); kth->ktr_buf = path; - ktrwrite(vp, kth, NULL, KERNEL_FUNNEL); + ktrwrite(vp, kth, NULL); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; } void -ktrgenio(vp, fd, rw, uio, error, funnel_type) +ktrgenio(vp, fd, rw, uio, error) struct vnode *vp; int fd; enum uio_rw rw; struct uio *uio; int error; - int funnel_type; { struct ktr_header *kth; struct ktr_genio ktg; @@ -210,6 +226,10 @@ ktrgenio(vp, fd, rw, uio, error, funnel_type) p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_GENIO); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } ktg.ktr_fd = fd; ktg.ktr_rw = rw; kth->ktr_buf = (caddr_t)&ktg; @@ -217,19 +237,18 @@ ktrgenio(vp, fd, rw, uio, error, funnel_type) uio->uio_offset = 0; uio->uio_rw = UIO_WRITE; - ktrwrite(vp, kth, uio, funnel_type); + ktrwrite(vp, kth, uio); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; } void -ktrpsig(vp, sig, action, mask, code, funnel_type) +ktrpsig(vp, sig, action, mask, code) struct vnode *vp; int sig; sig_t action; sigset_t *mask; int code; - int funnel_type; { struct ktr_header *kth; struct ktr_psig kp; @@ -237,6 +256,10 @@ ktrpsig(vp, sig, action, mask, code, funnel_type) p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_PSIG); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } kp.signo = (char)sig; kp.action = action; kp.mask = *mask; @@ -244,16 +267,15 @@ ktrpsig(vp, sig, action, mask, code, funnel_type) kth->ktr_buf = (caddr_t)&kp; kth->ktr_len = sizeof (struct ktr_psig); - ktrwrite(vp, kth, NULL, funnel_type); + ktrwrite(vp, kth, NULL); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; } void -ktrcsw(vp, out, user, funnel_type) +ktrcsw(vp, out, user) struct vnode *vp; int out, user; - int funnel_type; { struct ktr_header *kth; struct ktr_csw kc; @@ -261,12 +283,16 @@ ktrcsw(vp, out, user, funnel_type) p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_CSW); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return; + } kc.out = out; kc.user = user; kth->ktr_buf = (caddr_t)&kc; kth->ktr_len = sizeof (struct ktr_csw); - ktrwrite(vp, kth, NULL, funnel_type); + ktrwrite(vp, kth, NULL); FREE(kth, M_KTRACE); p->p_traceflag &= ~KTRFAC_ACTIVE; } @@ -277,18 +303,9 @@ ktrcsw(vp, out, user, funnel_type) /* * ktrace system call */ -struct ktrace_args { - char *fname; - int ops; - int facs; - int pid; -}; /* ARGSUSED */ int -ktrace(curp, uap, retval) - struct proc *curp; - register struct ktrace_args *uap; - register_t *retval; +ktrace(struct proc *curp, register struct ktrace_args *uap, __unused register_t *retval) { #if KTRACE register struct vnode *vp = NULL; @@ -300,25 +317,33 @@ ktrace(curp, uap, retval) int ret = 0; int error = 0; struct nameidata nd; + struct vfs_context context; AUDIT_ARG(cmd, uap->ops); AUDIT_ARG(pid, uap->pid); AUDIT_ARG(value, uap->facs); + + context.vc_proc = curp; + context.vc_ucred = kauth_cred_get(); + curp->p_traceflag |= KTRFAC_ACTIVE; if (ops != KTROP_CLEAR) { /* * an operation which requires a file argument. */ - NDINIT(&nd, LOOKUP, (NOFOLLOW|LOCKLEAF), UIO_USERSPACE, uap->fname, curp); + NDINIT(&nd, LOOKUP, (NOFOLLOW|LOCKLEAF), UIO_USERSPACE, + uap->fname, &context); error = vn_open(&nd, FREAD|FWRITE|O_NOFOLLOW, 0); if (error) { curp->p_traceflag &= ~KTRFAC_ACTIVE; return (error); } vp = nd.ni_vp; - VOP_UNLOCK(vp, 0, curp); + if (vp->v_type != VREG) { - (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); + (void) vn_close(vp, FREAD|FWRITE, kauth_cred_get(), curp); + (void) vnode_put(vp); + curp->p_traceflag &= ~KTRFAC_ACTIVE; return (EACCES); } @@ -335,10 +360,7 @@ ktrace(curp, uap, retval) p->p_traceflag = 0; if (tvp != NULL) { p->p_tracep = NULL; - - VOP_CLOSE(vp, FREAD|FWRITE, curp->p_ucred, curp); - ubc_rele(tvp); - vrele(tvp); + vnode_rele(tvp); } } else error = EPERM; @@ -390,8 +412,10 @@ ktrace(curp, uap, retval) if (!ret) error = EPERM; done: - if (vp != NULL) - (void) vn_close(vp, FWRITE, curp->p_ucred, curp); + if (vp != NULL) { + (void) vn_close(vp, FWRITE, kauth_cred_get(), curp); + (void) vnode_put(vp); + } curp->p_traceflag &= ~KTRFAC_ACTIVE; return (error); #else @@ -402,17 +426,10 @@ done: /* * utrace system call */ -struct utrace_args { - const void * addr; - size_t len; -}; /* ARGSUSED */ int -utrace(curp, uap, retval) - struct proc *curp; - register struct utrace_args *uap; - register_t *retval; +utrace(__unused struct proc *curp, register struct utrace_args *uap, __unused register_t *retval) { #if KTRACE struct ktr_header *kth; @@ -425,11 +442,19 @@ utrace(curp, uap, retval) return (EINVAL); p->p_traceflag |= KTRFAC_ACTIVE; kth = ktrgetheader(KTR_USER); + if (kth == NULL) { + p->p_traceflag &= ~KTRFAC_ACTIVE; + return(ENOMEM); + } MALLOC(cp, caddr_t, uap->len, M_KTRACE, M_WAITOK); - if (!copyin((caddr_t)uap->addr, cp, uap->len)) { + if (cp == NULL) { + FREE(kth, M_KTRACE); + return(ENOMEM); + } + if (copyin(uap->addr, cp, uap->len) == 0) { kth->ktr_buf = cp; kth->ktr_len = uap->len; - ktrwrite(p->p_tracep, kth, NULL, KERNEL_FUNNEL); + ktrwrite(p->p_tracep, kth, NULL); } FREE(kth, M_KTRACE); FREE(cp, M_KTRACE); @@ -454,24 +479,19 @@ ktrops(curp, p, ops, facs, vp) return (0); if (ops == KTROP_SET) { if (p->p_tracep != vp) { - /* - * if trace file already in use, relinquish - */ tvp = p->p_tracep; - - if (UBCINFOEXISTS(vp)) - ubc_hold(vp); - VREF(vp); - + vnode_ref(vp); p->p_tracep = vp; + if (tvp != NULL) { - VOP_CLOSE(tvp, FREAD|FWRITE, p->p_ucred, p); - ubc_rele(tvp); - vrele(tvp); + /* + * if trace file already in use, relinquish + */ + vnode_rele(tvp); } } p->p_traceflag |= facs; - if (curp->p_ucred->cr_uid == 0) + if (!suser(kauth_cred_get(), NULL)) p->p_traceflag |= KTRFAC_ROOT; } else { /* KTROP_CLEAR */ @@ -481,10 +501,7 @@ ktrops(curp, p, ops, facs, vp) p->p_traceflag = 0; if (tvp != NULL) { p->p_tracep = NULL; - - VOP_CLOSE(tvp, FREAD|FWRITE, p->p_ucred, p); - ubc_rele(tvp); - vrele(tvp); + vnode_rele(tvp); } } } @@ -525,118 +542,49 @@ ktrsetchildren(curp, top, ops, facs, vp) } static void -ktrwrite(vp, kth, uio, funnel_type) - struct vnode *vp; - register struct ktr_header *kth; - struct uio *uio; +ktrwrite(struct vnode *vp, struct ktr_header *kth, struct uio *uio) { - struct uio auio; - struct iovec aiov[2]; + uio_t auio; register struct proc *p = current_proc(); /* XXX */ + struct vfs_context context; int error; + char uio_buf[ UIO_SIZEOF(2) ]; if (vp == NULL) return; - if (funnel_type == -1) { - funnel_t *f = thread_funnel_get(); - if(f == THR_FUNNEL_NULL) - funnel_type = NO_FUNNEL; - else if (f == (funnel_t *)network_flock) - funnel_type = NETWORK_FUNNEL; - else if (f == (funnel_t *)kernel_flock) - funnel_type = KERNEL_FUNNEL; - } - - switch (funnel_type) { - case KERNEL_FUNNEL: - /* Nothing more to do */ - break; - case NETWORK_FUNNEL: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - break; - case NO_FUNNEL: - (void) thread_funnel_set(kernel_flock, TRUE); - break; - default: - panic("Invalid funnel (%)", funnel_type); - } - auio.uio_iov = &aiov[0]; - auio.uio_offset = 0; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - aiov[0].iov_base = (caddr_t)kth; - aiov[0].iov_len = sizeof(struct ktr_header); - auio.uio_resid = sizeof(struct ktr_header); - auio.uio_iovcnt = 1; - auio.uio_procp = current_proc(); + auio = uio_createwithbuffer(2, 0, UIO_SYSSPACE, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(kth), sizeof(struct ktr_header)); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + if (kth->ktr_len > 0) { - auio.uio_iovcnt++; - aiov[1].iov_base = kth->ktr_buf; - aiov[1].iov_len = kth->ktr_len; - auio.uio_resid += kth->ktr_len; + uio_addiov(auio, CAST_USER_ADDR_T(kth->ktr_buf), kth->ktr_len); if (uio != NULL) - kth->ktr_len += uio->uio_resid; + kth->ktr_len += uio_resid(uio); } - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) - goto bad; - (void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, p->p_ucred); - if (error == 0 && uio != NULL) { - (void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, p->p_ucred); - } - VOP_UNLOCK(vp, 0, p); - if (!error) { - switch (funnel_type) { - case KERNEL_FUNNEL: - /* Nothing more to do */ - break; - case NETWORK_FUNNEL: - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - /* switch funnel to NETWORK_FUNNEL */ - break; - case NO_FUNNEL: - (void) thread_funnel_set(kernel_flock, FALSE); - break; - default: - panic("Invalid funnel (%)", funnel_type); + if ((error = vnode_getwithref(vp)) == 0) { + error = VNOP_WRITE(vp, auio, IO_UNIT | IO_APPEND, &context); + if (error == 0 && uio != NULL) { + error = VNOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, &context); } - return; + vnode_put(vp); } - -bad: - /* - * If error encountered, give up tracing on this vnode. - */ - log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", - error); - LIST_FOREACH(p, &allproc, p_list) { - if (p->p_tracep == vp) { - p->p_tracep = NULL; - p->p_traceflag = 0; - - VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - ubc_rele(vp); - vrele(vp); + if (error) { + /* + * If error encountered, give up tracing on this vnode. + */ + log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", + error); + LIST_FOREACH(p, &allproc, p_list) { + if (p->p_tracep == vp) { + p->p_tracep = NULL; + p->p_traceflag = 0; + vnode_rele(vp); + } } } - - switch (funnel_type) { - case KERNEL_FUNNEL: - /* Nothing more to do */ - break; - case NETWORK_FUNNEL: - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - /* switch funnel to NETWORK_FUNNEL */ - break; - case NO_FUNNEL: - (void) thread_funnel_set(kernel_flock, FALSE); - break; - default: - panic("Invalid funnel (%)", funnel_type); - } } /* @@ -649,21 +597,23 @@ bad: * TODO: check groups. use caller effective gid. */ static int -ktrcanset(callp, targetp) - struct proc *callp, *targetp; +ktrcanset(__unused struct proc *callp, struct proc *targetp) { - register struct pcred *caller = callp->p_cred; - register struct pcred *target = targetp->p_cred; + kauth_cred_t caller = kauth_cred_get(); + kauth_cred_t target = targetp->p_ucred; /* XXX */ +#if 0 + /* PRISON_CHECK was defined to 1 always .... */ if (!PRISON_CHECK(callp, targetp)) return (0); - if ((caller->pc_ucred->cr_uid == target->p_ruid && - target->p_ruid == target->p_svuid && - caller->p_rgid == target->p_rgid && /* XXX */ - target->p_rgid == target->p_svgid && +#endif + if ((kauth_cred_getuid(caller) == target->cr_ruid && + target->cr_ruid == target->cr_svuid && + caller->cr_rgid == target->cr_rgid && /* XXX */ + target->cr_rgid == target->cr_svgid && (targetp->p_traceflag & KTRFAC_ROOT) == 0 && (targetp->p_flag & P_SUGID) == 0) || - caller->pc_ucred->cr_uid == 0) + !suser(caller, NULL)) return (1); return (0); diff --git a/bsd/kern/kern_lock.c b/bsd/kern/kern_lock.c index 898924500..c69140fda 100644 --- a/bsd/kern/kern_lock.c +++ b/bsd/kern/kern_lock.c @@ -60,7 +60,7 @@ */ #include -#include +#include #include #include #include @@ -91,11 +91,9 @@ int lock_wait_time = 100; if (lock_wait_time > 0) { \ int i; \ \ - simple_unlock(&lkp->lk_interlock); \ for (i = lock_wait_time; i > 0; i--) \ if (!(wanted)) \ break; \ - simple_lock(&lkp->lk_interlock); \ } \ if (!(wanted)) \ break; @@ -117,10 +115,8 @@ int lock_wait_time = 100; PAUSE(lkp, wanted); \ for (error = 0; wanted; ) { \ (lkp)->lk_waitcount++; \ - simple_unlock(&(lkp)->lk_interlock); \ error = tsleep((void *)lkp, (lkp)->lk_prio, \ (lkp)->lk_wmesg, (lkp)->lk_timo); \ - simple_lock(&(lkp)->lk_interlock); \ (lkp)->lk_waitcount--; \ if (error) \ break; \ @@ -137,13 +133,12 @@ void lockinit(lkp, prio, wmesg, timo, flags) struct lock__bsd__ *lkp; int prio; - char *wmesg; + const char *wmesg; int timo; int flags; { bzero(lkp, sizeof(struct lock__bsd__)); - simple_lock_init(&lkp->lk_interlock); lkp->lk_flags = flags & LK_EXTFLG_MASK; lkp->lk_prio = prio; lkp->lk_timo = timo; @@ -161,12 +156,10 @@ lockstatus(lkp) { int lock_type = 0; - simple_lock(&lkp->lk_interlock); if (lkp->lk_exclusivecount != 0) lock_type = LK_EXCLUSIVE; else if (lkp->lk_sharecount != 0) lock_type = LK_SHARED; - simple_unlock(&lkp->lk_interlock); return (lock_type); } @@ -181,7 +174,7 @@ int lockmgr(lkp, flags, interlkp, p) struct lock__bsd__ *lkp; u_int flags; - simple_lock_t interlkp; + void * interlkp; struct proc *p; { int error; @@ -189,14 +182,11 @@ lockmgr(lkp, flags, interlkp, p) int extflags; void *self; - error = 0; self = current_act(); + error = 0; self = current_thread(); if (p) pid = p->p_pid; else pid = LK_KERNPROC; - simple_lock(&lkp->lk_interlock); - if (flags & LK_INTERLOCK) - simple_unlock(interlkp); extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; #if 0 /* @@ -429,13 +419,11 @@ lockmgr(lkp, flags, interlkp, p) (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE)) || lkp->lk_sharecount != 0 || lkp->lk_waitcount != 0); ) { lkp->lk_flags |= LK_WAITDRAIN; - simple_unlock(&lkp->lk_interlock); if (error = tsleep((void *)&lkp->lk_flags, lkp->lk_prio, lkp->lk_wmesg, lkp->lk_timo)) return (error); if ((extflags) & LK_SLEEPFAIL) return (ENOLCK); - simple_lock(&lkp->lk_interlock); } lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL; lkp->lk_lockholder = pid; @@ -445,7 +433,6 @@ lockmgr(lkp, flags, interlkp, p) break; default: - simple_unlock(&lkp->lk_interlock); panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ @@ -456,7 +443,6 @@ lockmgr(lkp, flags, interlkp, p) lkp->lk_flags &= ~LK_WAITDRAIN; wakeup((void *)&lkp->lk_flags); } - simple_unlock(&lkp->lk_interlock); return (error); } @@ -464,6 +450,7 @@ lockmgr(lkp, flags, interlkp, p) * Print out information about state of a lock. Used by VOP_PRINT * routines to display ststus about contained locks. */ +void lockmgr_printinfo(lkp) struct lock__bsd__ *lkp; { diff --git a/bsd/ufs/ufs/ufs_lockf.c b/bsd/kern/kern_lockf.c similarity index 61% rename from bsd/ufs/ufs/ufs_lockf.c rename to bsd/kern/kern_lockf.c index 4f4a71933..1ef3470ce 100644 --- a/bsd/ufs/ufs/ufs_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -1,25 +1,3 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -35,10 +13,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -55,53 +29,229 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 + * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 */ +#include #include #include #include -#include +#include +#include #include +#include #include +#include +#include #include #include -#include - -#include -#include -#include -#include +#include +#if DEAD_CODE /* * This variable controls the maximum number of processes that will * be checked in doing deadlock detection. */ -int maxlockdepth = MAXDEPTH; +static int maxlockdepth = MAXDEPTH; +#endif /* DEAD_CODE */ #ifdef LOCKF_DEBUG -#include #include -int lockf_debug = 0; -struct ctldebug debug4 = { "lockf_debug", &lockf_debug }; + +#include +#include + + +static int lockf_debug = 2; +SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); #endif +MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); + #define NOLOCKF (struct lockf *)0 #define SELF 0x1 #define OTHERS 0x2 +#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */ +static int lf_clearlock(struct lockf *); +static int lf_findoverlap(struct lockf *, + struct lockf *, int, struct lockf ***, struct lockf **); +static struct lockf * + lf_getblock(struct lockf *); +static int lf_getlock(struct lockf *, struct flock *); +static int lf_setlock(struct lockf *); +static void lf_split(struct lockf *, struct lockf *); +static void lf_wakelock(struct lockf *); /* - * Set a byte-range lock. + * Advisory record locking support */ int +lf_advlock(ap) + struct vnop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + vfs_context_t a_context; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct flock *fl = ap->a_fl; + vfs_context_t context = ap->a_context; + struct lockf *lock; + off_t start, end, oadd; + u_quad_t size; + int error; + struct lockf **head = &vp->v_lockf; + + /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */ + + /* + * Avoid the common case of unlocking when inode has no locks. + */ + if (*head == (struct lockf *)0) { + if (ap->a_op != F_SETLK) { + fl->l_type = F_UNLCK; +#ifdef LOCKF_DEBUG + printf("lf_advlock: unlock without lock\n"); +#endif /* LOCKF_DEBUG */ + return (0); + } + } + + /* + * Convert the flock structure into a start and end. + */ + switch (fl->l_whence) { + + case SEEK_SET: + case SEEK_CUR: + /* + * Caller is responsible for adding any necessary offset + * when SEEK_CUR is used. + */ + start = fl->l_start; + break; + + case SEEK_END: + + if ((error = vnode_size(vp, &size, context))) +{ +#ifdef LOCKF_DEBUG + printf("lf_advlock: vnode_getattr failed: %d\n", error); +#endif /* LOCKF_DEBUG */ + return (error); +} + + if (size > OFF_MAX || + (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) + return (EOVERFLOW); + start = size + fl->l_start; + break; + + default: +#ifdef LOCKF_DEBUG + printf("lf_advlock: unknown whence %d\n", fl->l_whence); +#endif /* LOCKF_DEBUG */ + return (EINVAL); + } + if (start < 0) +{ +#ifdef LOCKF_DEBUG + printf("lf_advlock: start < 0 (%qd)\n", start); +#endif /* LOCKF_DEBUG */ + return (EINVAL); +} + if (fl->l_len < 0) { + if (start == 0) +{ +#ifdef LOCKF_DEBUG + printf("lf_advlock: len < 0 & start == 0\n"); +#endif /* LOCKF_DEBUG */ + return (EINVAL); +} + end = start - 1; + start += fl->l_len; + if (start < 0) +{ +#ifdef LOCKF_DEBUG + printf("lf_advlock: start < 0 (%qd)\n", start); +#endif /* LOCKF_DEBUG */ + return (EINVAL); +} + } else if (fl->l_len == 0) + end = -1; + else { + oadd = fl->l_len - 1; + if (oadd > (off_t)(OFF_MAX - start)) +{ +#ifdef LOCKF_DEBUG + printf("lf_advlock: overflow\n"); +#endif /* LOCKF_DEBUG */ + return (EOVERFLOW); +} + end = start + oadd; + } + /* + * Create the lockf structure + */ + MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); + lock->lf_start = start; + lock->lf_end = end; + lock->lf_id = ap->a_id; + lock->lf_vnode = vp; + lock->lf_type = fl->l_type; + lock->lf_head = head; + lock->lf_next = (struct lockf *)0; + TAILQ_INIT(&lock->lf_blkhd); + lock->lf_flags = ap->a_flags; + + lck_mtx_lock(&vp->v_lock); /* protect the lockf list */ + /* + * Do the requested operation. + */ + switch(ap->a_op) { + case F_SETLK: + error = lf_setlock(lock); + break; + + case F_UNLCK: + error = lf_clearlock(lock); + FREE(lock, M_LOCKF); + break; + + case F_GETLK: + error = lf_getlock(lock, fl); + FREE(lock, M_LOCKF); + break; + + default: + FREE(lock, M_LOCKF); + error = EINVAL; + break; + } + lck_mtx_unlock(&vp->v_lock); /* done maniplulating the list */ + +#ifdef LOCKF_DEBUG + printf("lf_advlock: normal exit: %d\n", error); +#endif /* LOCKF_DEBUG */ + return (error); +} + +/* + * Set a byte-range lock. + */ +static int lf_setlock(lock) - register struct lockf *lock; + struct lockf *lock; { - register struct lockf *block; - struct inode *ip = lock->lf_inode; + struct lockf *block; + struct lockf **head = lock->lf_head; struct lockf **prev, *overlap, *ltmp; static char lockstr[] = "lockf"; int ovcase, priority, needtolink, error; + struct vnode *vp = lock->lf_vnode; #ifdef LOCKF_DEBUG if (lockf_debug & 1) @@ -118,7 +268,7 @@ lf_setlock(lock) /* * Scan lock list for this file looking for locks that would block us. */ - while (block = lf_getblock(lock)) { + while ((block = lf_getblock(lock))) { /* * Free the structure and return if nonblocking. */ @@ -126,6 +276,10 @@ lf_setlock(lock) FREE(lock, M_LOCKF); return (EAGAIN); } +#if DEAD_CODE +/* + * XXX This is dead code on MacOS X; it shouldn't be. + */ /* * We are blocked. Since flock style locks cover * the whole file, there is no chance for deadlock. @@ -138,27 +292,35 @@ lf_setlock(lock) */ if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) { - register struct proc *wproc; - register struct lockf *waitblock; + struct proc *wproc; + struct thread *td; + struct lockf *waitblock; int i = 0; /* The block is waiting on something */ + /* XXXKSE this is not complete under threads */ wproc = (struct proc *)block->lf_id; - while (wproc->p_wchan && - (wproc->p_wmesg == lockstr) && - (i++ < maxlockdepth)) { - waitblock = (struct lockf *)wproc->p_wchan; - /* Get the owner of the blocking lock */ - waitblock = waitblock->lf_next; - if ((waitblock->lf_flags & F_POSIX) == 0) - break; - wproc = (struct proc *)waitblock->lf_id; - if (wproc == (struct proc *)lock->lf_id) { - _FREE(lock, M_LOCKF); - return (EDEADLK); + mtx_lock_spin(&sched_lock); + FOREACH_THREAD_IN_PROC(wproc, td) { + while (td->td_wchan && + (td->td_wmesg == lockstr) && + (i++ < maxlockdepth)) { + waitblock = (struct lockf *)td->td_wchan; + /* Get the owner of the blocking lock */ + waitblock = waitblock->lf_next; + if ((waitblock->lf_flags & F_POSIX) == 0) + break; + wproc = (struct proc *)waitblock->lf_id; + if (wproc == (struct proc *)lock->lf_id) { + mtx_unlock_spin(&sched_lock); + FREE(lock, M_LOCKF); + return (EDEADLK); + } } } + mtx_unlock_spin(&sched_lock); } +#endif /* DEAD_CODE */ /* * For flock type locks, we must first remove * any shared locks that we hold before we sleep @@ -182,21 +344,23 @@ lf_setlock(lock) lf_printlist("lf_setlock", block); } #endif /* LOCKF_DEBUG */ - if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) { + error = msleep(lock, &vp->v_lock, priority, lockstr, 0); + if (error) { /* XXX */ /* - * We may have been awakened by a signal (in - * which case we must remove ourselves from the - * blocked list) and/or by another process - * releasing a lock (in which case we have already - * been removed from the blocked list and our + * We may have been awakened by a signal and/or by a + * debugger continuing us (in which cases we must remove + * ourselves from the blocked list) and/or by another + * process releasing a lock (in which case we have + * already been removed from the blocked list and our * lf_next field set to NOLOCKF). */ - if (lock->lf_next) - TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, - lf_block); - _FREE(lock, M_LOCKF); + if (lock->lf_next) { + TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); + lock->lf_next = NOLOCKF; + } + FREE(lock, M_LOCKF); return (error); - } + } /* XXX */ } /* * No blocks!! Add the lock. Note that we will @@ -206,11 +370,12 @@ lf_setlock(lock) * Skip over locks owned by other processes. * Handle any locks that overlap and are owned by ourselves. */ - prev = &ip->i_lockf; - block = ip->i_lockf; + prev = head; + block = *head; needtolink = 1; for (;;) { - if (ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap)) + ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); + if (ovcase) block = overlap->lf_next; /* * Six cases: @@ -247,7 +412,7 @@ lf_setlock(lock) * Check for common starting point and different types. */ if (overlap->lf_type == lock->lf_type) { - _FREE(lock, M_LOCKF); + FREE(lock, M_LOCKF); lock = overlap; /* for debug output below */ break; } @@ -269,11 +434,13 @@ lf_setlock(lock) overlap->lf_type == F_WRLCK) { lf_wakelock(overlap); } else { - while (ltmp = overlap->lf_blkhd.tqh_first) { + while (!TAILQ_EMPTY(&overlap->lf_blkhd)) { + ltmp = TAILQ_FIRST(&overlap->lf_blkhd); TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, lf_block); TAILQ_INSERT_TAIL(&lock->lf_blkhd, ltmp, lf_block); + ltmp->lf_next = lock; } } /* @@ -286,7 +453,7 @@ lf_setlock(lock) needtolink = 0; } else *prev = overlap->lf_next; - _FREE(overlap, M_LOCKF); + FREE(overlap, M_LOCKF); continue; case 4: /* overlap starts before lock */ @@ -330,12 +497,12 @@ lf_setlock(lock) * Generally, find the lock (or an overlap to that lock) * and remove it (or shrink it), then wakeup anyone we can. */ -int +static int lf_clearlock(unlock) - register struct lockf *unlock; + struct lockf *unlock; { - struct inode *ip = unlock->lf_inode; - register struct lockf *lf = ip->i_lockf; + struct lockf **head = unlock->lf_head; + struct lockf *lf = *head; struct lockf *overlap, **prev; int ovcase; @@ -347,8 +514,8 @@ lf_clearlock(unlock) if (lockf_debug & 1) lf_print("lf_clearlock", unlock); #endif /* LOCKF_DEBUG */ - prev = &ip->i_lockf; - while (ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) { + prev = head; + while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) { /* * Wakeup the list of locks to be retried. */ @@ -373,7 +540,7 @@ lf_clearlock(unlock) case 3: /* lock contains overlap */ *prev = overlap->lf_next; lf = overlap->lf_next; - _FREE(overlap, M_LOCKF); + FREE(overlap, M_LOCKF); continue; case 4: /* overlap starts before lock */ @@ -399,19 +566,19 @@ lf_clearlock(unlock) * Check whether there is a blocking lock, * and if so return its process identifier. */ -int +static int lf_getlock(lock, fl) - register struct lockf *lock; - register struct flock *fl; + struct lockf *lock; + struct flock *fl; { - register struct lockf *block; + struct lockf *block; #ifdef LOCKF_DEBUG if (lockf_debug & 1) lf_print("lf_getlock", lock); #endif /* LOCKF_DEBUG */ - if (block = lf_getblock(lock)) { + if ((block = lf_getblock(lock))) { fl->l_type = block->lf_type; fl->l_whence = SEEK_SET; fl->l_start = block->lf_start; @@ -420,7 +587,7 @@ lf_getlock(lock, fl) else fl->l_len = block->lf_end - block->lf_start + 1; if (block->lf_flags & F_POSIX) - fl->l_pid = ((struct proc *)(block->lf_id))->p_pid; + fl->l_pid = proc_pid((struct proc *)(block->lf_id)); else fl->l_pid = -1; } else { @@ -433,15 +600,15 @@ lf_getlock(lock, fl) * Walk the list of locks for an inode and * return the first blocking lock. */ -struct lockf * +static struct lockf * lf_getblock(lock) - register struct lockf *lock; + struct lockf *lock; { - struct lockf **prev, *overlap, *lf = lock->lf_inode->i_lockf; + struct lockf **prev, *overlap, *lf = *(lock->lf_head); int ovcase; - prev = &lock->lf_inode->i_lockf; - while (ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) { + prev = lock->lf_head; + while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) { /* * We've found an overlap, see if it blocks us */ @@ -457,15 +624,15 @@ lf_getblock(lock) } /* - * Walk the list of locks for an inode to + * Walk the list of locks to * find an overlapping lock (if any). * * NOTE: this returns only the FIRST overlapping lock. There * may be more than one. */ -int +static int lf_findoverlap(lf, lock, type, prev, overlap) - register struct lockf *lf; + struct lockf *lf; struct lockf *lock; int type; struct lockf ***prev; @@ -573,12 +740,12 @@ lf_findoverlap(lf, lock, type, prev, overlap) * Split a lock and a contained region into * two or three locks as necessary. */ -void +static void lf_split(lock1, lock2) - register struct lockf *lock1; - register struct lockf *lock2; + struct lockf *lock1; + struct lockf *lock2; { - register struct lockf *splitlock; + struct lockf *splitlock; #ifdef LOCKF_DEBUG if (lockf_debug & 2) { @@ -605,7 +772,7 @@ lf_split(lock1, lock2) * the encompassing lock */ MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); - bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock); + bcopy(lock1, splitlock, sizeof *splitlock); splitlock->lf_start = lock2->lf_end + 1; TAILQ_INIT(&splitlock->lf_blkhd); lock1->lf_end = lock2->lf_start - 1; @@ -620,20 +787,21 @@ lf_split(lock1, lock2) /* * Wakeup a blocklist */ -void +static void lf_wakelock(listhead) struct lockf *listhead; { - register struct lockf *wakelock; + struct lockf *wakelock; - while (wakelock = listhead->lf_blkhd.tqh_first) { + while (!TAILQ_EMPTY(&listhead->lf_blkhd)) { + wakelock = TAILQ_FIRST(&listhead->lf_blkhd); TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); wakelock->lf_next = NOLOCKF; #ifdef LOCKF_DEBUG if (lockf_debug & 2) lf_print("lf_wakelock: awakening", wakelock); #endif /* LOCKF_DEBUG */ - wakeup((caddr_t)wakelock); + wakeup(wakelock); } } @@ -641,65 +809,74 @@ lf_wakelock(listhead) /* * Print out a lock. */ +void lf_print(tag, lock) char *tag; - register struct lockf *lock; + struct lockf *lock; { - - printf("%s: lock 0x%lx for ", tag, lock); + + printf("%s: lock %p for ", tag, (void *)lock); if (lock->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid); + printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid); + else + printf("id %p", (void *)lock->lf_id); + if (lock->lf_vnode != 0) + printf(" in vno 0x%08x, %s, start %jd, end %jd", + lock->lf_vnode, + lock->lf_type == F_RDLCK ? "shared" : + lock->lf_type == F_WRLCK ? "exclusive" : + lock->lf_type == F_UNLCK ? "unlock" : "unknown", + (intmax_t)lock->lf_start, (intmax_t)lock->lf_end); else - printf("id 0x%x", lock->lf_id); - printf(" in ino %d on dev <%d, %d>, %s, start %d, end %d", - lock->lf_inode->i_number, - major(lock->lf_inode->i_dev), - minor(lock->lf_inode->i_dev), - lock->lf_type == F_RDLCK ? "shared" : - lock->lf_type == F_WRLCK ? "exclusive" : - lock->lf_type == F_UNLCK ? "unlock" : - "unknown", lock->lf_start, lock->lf_end); - if (lock->lf_blkhd.tqh_first) - printf(" block 0x%x\n", lock->lf_blkhd.tqh_first); + printf(" %s, start %jd, end %jd", + lock->lf_type == F_RDLCK ? "shared" : + lock->lf_type == F_WRLCK ? "exclusive" : + lock->lf_type == F_UNLCK ? "unlock" : "unknown", + (intmax_t)lock->lf_start, (intmax_t)lock->lf_end); + if (!TAILQ_EMPTY(&lock->lf_blkhd)) + printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd)); else printf("\n"); } +void lf_printlist(tag, lock) char *tag; struct lockf *lock; { - register struct lockf *lf, *blk; - - printf("%s: Lock list for ino %d on dev <%d, %d>:\n", - tag, lock->lf_inode->i_number, - major(lock->lf_inode->i_dev), - minor(lock->lf_inode->i_dev)); - for (lf = lock->lf_inode->i_lockf; lf; lf = lf->lf_next) { - printf("\tlock 0x%lx for ", lf); + struct lockf *lf, *blk; + + if (lock->lf_vnode == 0) + return; + + printf("%s: Lock list for vno 0x%08x:\n", + tag, lock->lf_vnode); + for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) { + printf("\tlock %p for ",(void *)lf); if (lf->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid); + printf("proc %ld", + (long)((struct proc *)lf->lf_id)->p_pid); else - printf("id 0x%x", lf->lf_id); - printf(", %s, start %d, end %d", - lf->lf_type == F_RDLCK ? "shared" : - lf->lf_type == F_WRLCK ? "exclusive" : - lf->lf_type == F_UNLCK ? "unlock" : - "unknown", lf->lf_start, lf->lf_end); - for (blk = lf->lf_blkhd.tqh_first; blk; - blk = blk->lf_block.tqe_next) { - printf("\n\t\tlock request 0x%lx for ", blk); + printf("id %p", (void *)lf->lf_id); + printf(", %s, start %jd, end %jd", + lf->lf_type == F_RDLCK ? "shared" : + lf->lf_type == F_WRLCK ? "exclusive" : + lf->lf_type == F_UNLCK ? "unlock" : + "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); + TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { + printf("\n\t\tlock request %p for ", (void *)blk); if (blk->lf_flags & F_POSIX) - printf("proc %d", - ((struct proc *)(blk->lf_id))->p_pid); + printf("proc %ld", + (long)((struct proc *)blk->lf_id)->p_pid); else - printf("id 0x%x", blk->lf_id); - printf(", %s, start %d, end %d", - blk->lf_type == F_RDLCK ? "shared" : - blk->lf_type == F_WRLCK ? "exclusive" : - blk->lf_type == F_UNLCK ? "unlock" : - "unknown", blk->lf_start, blk->lf_end); - if (blk->lf_blkhd.tqh_first) + printf("id %p", (void *)blk->lf_id); + printf(", %s, start %jd, end %jd", + blk->lf_type == F_RDLCK ? "shared" : + blk->lf_type == F_WRLCK ? "exclusive" : + blk->lf_type == F_UNLCK ? "unlock" : + "unknown", (intmax_t)blk->lf_start, + (intmax_t)blk->lf_end); + if (!TAILQ_EMPTY(&blk->lf_blkhd)) panic("lf_printlist: bad list"); } printf("\n"); diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index 3f1a92d27..5ae60405a 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -71,14 +71,16 @@ #include #include -#include -#include -#include +#include +#include +#include +#include #include -#include +#include #include #include #include +#include #include @@ -92,7 +94,6 @@ #include #include #include -#include #include @@ -133,7 +134,7 @@ struct kmzones { SOS(ucred), KMZ_CREATEZONE, /* 16 M_CRED */ SOS(pgrp), KMZ_CREATEZONE, /* 17 M_PGRP */ SOS(session), KMZ_CREATEZONE, /* 18 M_SESSION */ - SOS(iovec), KMZ_LOOKUPZONE, /* 19 M_IOV */ + SOS(iovec_32), KMZ_LOOKUPZONE, /* 19 M_IOV32 */ SOS(mount), KMZ_CREATEZONE, /* 20 M_MOUNT */ 0, KMZ_MALLOC, /* 21 M_FHANDLE */ SOS(nfsreq), KMZ_CREATEZONE, /* 22 M_NFSREQ */ @@ -152,17 +153,17 @@ struct kmzones { 0, KMZ_MALLOC, /* 35 M_VMPVENT */ 0, KMZ_MALLOC, /* 36 M_VMPAGER */ 0, KMZ_MALLOC, /* 37 M_VMPGDATA */ - SOS(file), KMZ_CREATEZONE, /* 38 M_FILE */ + SOS(fileproc), KMZ_CREATEZONE, /* 38 M_FILEPROC */ SOS(filedesc), KMZ_CREATEZONE, /* 39 M_FILEDESC */ SOX(lockf), KMZ_CREATEZONE, /* 40 M_LOCKF */ SOS(proc), KMZ_CREATEZONE, /* 41 M_PROC */ - SOS(pcred), KMZ_CREATEZONE, /* 42 M_SUBPROC */ + SOS(pstats), KMZ_CREATEZONE, /* 42 M_SUBPROC */ 0, KMZ_MALLOC, /* 43 M_SEGMENT */ M_FFSNODE, KMZ_SHAREZONE, /* 44 M_LFSNODE */ SOS(inode), KMZ_CREATEZONE, /* 45 M_FFSNODE */ M_FFSNODE, KMZ_SHAREZONE, /* 46 M_MFSNODE */ - SOS(nqlease), KMZ_CREATEZONE, /* 47 M_NQLEASE */ - SOS(nqm), KMZ_CREATEZONE, /* 48 M_NQMHOST */ + 0, KMZ_MALLOC, /* 47 M_NQLEASE */ + 0, KMZ_MALLOC, /* 48 M_NQMHOST */ 0, KMZ_MALLOC, /* 49 M_NETADDR */ SOX(nfssvc_sock), KMZ_CREATEZONE, /* 50 M_NFSSVC */ @@ -215,6 +216,14 @@ struct kmzones { SOS(transaction), KMZ_CREATEZONE, /* 92 M_JNL_TR */ SOS(specinfo), KMZ_CREATEZONE, /* 93 M_SPECINFO */ SOS(kqueue), KMZ_CREATEZONE, /* 94 M_KQUEUE */ + SOS(directoryhint), KMZ_CREATEZONE, /* 95 M_HFSDIRHINT */ + SOS(cl_readahead), KMZ_CREATEZONE, /* 96 M_CLRDAHEAD */ + SOS(cl_writebehind),KMZ_CREATEZONE, /* 97 M_CLWRBEHIND */ + SOS(iovec_64), KMZ_LOOKUPZONE, /* 98 M_IOV64 */ + SOS(fileglob), KMZ_CREATEZONE, /* 99 M_FILEGLOB */ + 0, KMZ_MALLOC, /* 100 M_KAUTH */ + 0, KMZ_MALLOC, /* 101 M_DUMMYNET */ + SOS(unsafe_fsnode),KMZ_CREATEZONE, /* 102 M_UNSAFEFS */ #undef SOS #undef SOX }; @@ -283,7 +292,8 @@ struct _mhead { #define ZEROSIZETOKEN (void *)0xFADEDFAD -void *_MALLOC( +void * +_MALLOC( size_t size, int type, int flags) @@ -317,7 +327,8 @@ void *_MALLOC( return (mem->hdr.dat); } -void _FREE( +void +_FREE( void *addr, int type) { @@ -332,10 +343,11 @@ void _FREE( return; /* correct (convenient bsd kernel legacy) */ hdr = addr; hdr--; - kfree((vm_offset_t)hdr, hdr->mlen); + kfree(hdr, hdr->mlen); } -void *_MALLOC_ZONE( +void * +_MALLOC_ZONE( size_t size, int type, int flags) @@ -348,7 +360,7 @@ void *_MALLOC_ZONE( kmz = &kmzones[type]; if (kmz->kz_zalloczone == KMZ_MALLOC) - panic("_malloc_zone ZONE"); + panic("_malloc_zone ZONE: type = %d", type); /* XXX */ if (kmz->kz_elemsize == -1) @@ -370,7 +382,8 @@ void *_MALLOC_ZONE( return (elem); } -void _FREE_ZONE( +void +_FREE_ZONE( void *elem, size_t size, int type) @@ -389,7 +402,7 @@ void _FREE_ZONE( panic("FREE_SIZE XXX"); /* XXX */ if (size == kmz->kz_elemsize) - zfree(kmz->kz_zalloczone, (vm_offset_t)elem); + zfree(kmz->kz_zalloczone, elem); else - kfree((vm_offset_t)elem, size); + kfree(elem, size); } diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index 35b5ee7bf..11d967a42 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include #if defined(SMP) @@ -76,10 +76,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include @@ -96,7 +95,7 @@ extern vm_map_t bsd_pageable_map; -#include +#include #include #include @@ -105,6 +104,7 @@ extern vm_map_t bsd_pageable_map; #include #include +static int cputype, cpusubtype, cputhreadtype; SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); @@ -132,6 +132,14 @@ SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0, */ #define CTLHW_RETQUAD (1 << 31) +#define CTLHW_LOCAL (1 << 30) + +#define HW_LOCAL_CPUTHREADTYPE (1 | CTLHW_LOCAL) +#define HW_LOCAL_PHYSICALCPU (2 | CTLHW_LOCAL) +#define HW_LOCAL_PHYSICALCPUMAX (3 | CTLHW_LOCAL) +#define HW_LOCAL_LOGICALCPU (4 | CTLHW_LOCAL) +#define HW_LOCAL_LOGICALCPUMAX (5 | CTLHW_LOCAL) + /* * Supporting some variables requires us to do "real" work. We @@ -146,6 +154,9 @@ sysctl_hw_generic SYSCTL_HANDLER_ARGS ml_cpu_info_t cpu_info; int val, doquad; long long qval; + host_basic_info_data_t hinfo; + kern_return_t kret; + int count = HOST_BASIC_INFO_COUNT; /* * Test and mask off the 'return quad' flag. @@ -156,6 +167,9 @@ sysctl_hw_generic SYSCTL_HANDLER_ARGS ml_cpu_get_info(&cpu_info); +#define BSD_HOST 1 + kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); + /* * Handle various OIDs. * @@ -164,32 +178,40 @@ sysctl_hw_generic SYSCTL_HANDLER_ARGS */ switch (arg2) { case HW_NCPU: - { - host_basic_info_data_t hinfo; - kern_return_t kret; - int count = HOST_BASIC_INFO_COUNT; -#define BSD_HOST 1 - - kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); - if (kret == KERN_SUCCESS) { - return(SYSCTL_RETURN(req, hinfo.max_cpus)); - } else { - return(EINVAL); - } + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.max_cpus)); + } else { + return(EINVAL); } case HW_AVAILCPU: - { - host_basic_info_data_t hinfo; - kern_return_t kret; - int count = HOST_BASIC_INFO_COUNT; -#define BSD_HOST 1 - - kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); - if (kret == KERN_SUCCESS) { - return(SYSCTL_RETURN(req, hinfo.avail_cpus)); - } else { - return(EINVAL); - } + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.avail_cpus)); + } else { + return(EINVAL); + } + case HW_LOCAL_PHYSICALCPU: + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.physical_cpu)); + } else { + return(EINVAL); + } + case HW_LOCAL_PHYSICALCPUMAX: + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.physical_cpu_max)); + } else { + return(EINVAL); + } + case HW_LOCAL_LOGICALCPU: + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.logical_cpu)); + } else { + return(EINVAL); + } + case HW_LOCAL_LOGICALCPUMAX: + if (kret == KERN_SUCCESS) { + return(SYSCTL_RETURN(req, hinfo.logical_cpu_max)); + } else { + return(EINVAL); } case HW_CACHELINE: val = cpu_info.cache_line_size; @@ -268,11 +290,15 @@ sysctl_hw_generic SYSCTL_HANDLER_ARGS /* * hw.* MIB variables. */ -SYSCTL_PROC (_hw, HW_NCPU, ncpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_NCPU, sysctl_hw_generic, "I", ""); +SYSCTL_PROC (_hw, HW_NCPU, ncpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_NCPU, sysctl_hw_generic, "I", ""); SYSCTL_PROC (_hw, HW_AVAILCPU, activecpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_AVAILCPU, sysctl_hw_generic, "I", ""); +SYSCTL_PROC (_hw, OID_AUTO, physicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_PHYSICALCPU, sysctl_hw_generic, "I", ""); +SYSCTL_PROC (_hw, OID_AUTO, physicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_PHYSICALCPUMAX, sysctl_hw_generic, "I", ""); +SYSCTL_PROC (_hw, OID_AUTO, logicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPU, sysctl_hw_generic, "I", ""); +SYSCTL_PROC (_hw, OID_AUTO, logicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPUMAX, sysctl_hw_generic, "I", ""); SYSCTL_INT (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN, NULL, BYTE_ORDER, ""); -SYSCTL_INT (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN, &machine_slot[0].cpu_type, 0, ""); -SYSCTL_INT (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN, &machine_slot[0].cpu_subtype, 0, ""); +SYSCTL_INT (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN, &cputype, 0, ""); +SYSCTL_INT (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN, &cpusubtype, 0, ""); SYSCTL_INT2QUAD(_hw, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_KERN, &page_size, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_hz, ""); SYSCTL_QUAD (_hw, OID_AUTO, busfrequency_min, CTLFLAG_RD | CTLFLAG_KERN, &gPEClockFrequencyInfo.bus_frequency_min_hz, ""); @@ -339,7 +365,10 @@ SYSCTL_PROC(_hw, HW_L3SETTINGS, l3settings, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG void sysctl_mib_init(void) { - + cputype = cpu_type(); + cpusubtype = cpu_subtype(); + cputhreadtype = cpu_threadtype(); + /* * Populate the optional portion of the hw.* MIB. * @@ -347,6 +376,12 @@ sysctl_mib_init(void) * that actually directly relate to the functions in * question. */ + + if (cputhreadtype != CPU_THREADTYPE_NONE) { + static SYSCTL_INT(_hw, OID_AUTO, cputhreadtype, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &cputhreadtype, 0, ""); + sysctl_register_oid(&sysctl__hw_cputhreadtype); + } + #ifdef __ppc__ { static int altivec_flag = -1; diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index e234d8955..f91cbf079 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -68,79 +68,55 @@ #include #include #include -#include +#include +#include #include -#include -#include +#include #include #include -#include +#include #include #include #include #include #include #include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include #include -#include -#include -#include -#include - -struct sbrk_args { - int incr; -}; - -/* ARGSUSED */ int -sbrk(p, uap, retval) - struct proc *p; - struct sbrk_args *uap; - register_t *retval; +sbrk(__unused struct proc *p, __unused struct sbrk_args *uap, __unused register_t *retval) { /* Not yet implemented */ - return (EOPNOTSUPP); + return (ENOTSUP); } -struct sstk_args { - int incr; -} *uap; - -/* ARGSUSED */ int -sstk(p, uap, retval) - struct proc *p; - struct sstk_args *uap; - register_t *retval; +sstk(__unused struct proc *p, __unused struct sstk_args *uap, __unused register_t *retval) { /* Not yet implemented */ - return (EOPNOTSUPP); + return (ENOTSUP); } -#if COMPAT_43 -/* ARGSUSED */ -int -ogetpagesize(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; -{ - - *retval = PAGE_SIZE; - return (0); -} -#endif /* COMPAT_43 */ struct osmmap_args { caddr_t addr; @@ -152,80 +128,68 @@ struct osmmap_args { }; int -osmmap(curp, uap, retval) - struct proc *curp; - register struct osmmap_args *uap; - register_t *retval; +osmmap( + struct proc *curp, + register struct osmmap_args *uap, + register_t *retval) { -struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - long pad; -#endif - off_t pos; -} newargs; + struct mmap_args newargs; + user_addr_t addr; + int ret; if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) { - newargs.addr = uap->addr; - newargs.len = (size_t)uap->len; + newargs.addr = CAST_USER_ADDR_T(uap->addr); + newargs.len = CAST_USER_ADDR_T(uap->len); newargs.prot = uap->prot; newargs.flags = uap->share; newargs.fd = uap->fd; newargs.pos = (off_t)uap->pos; - return(mmap(curp,&newargs, retval)); + ret = mmap(curp, &newargs, &addr); + if (ret == 0) + *retval = CAST_DOWN(register_t, addr); } else - return(EINVAL); + ret = EINVAL; + return ret; } -struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - long pad; -#endif - off_t pos; -}; + int -mmap(p, uap, retval) - struct proc *p; - struct mmap_args *uap; - register_t *retval; +mmap(struct proc *p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ - struct file *fp; + struct fileproc *fp; register struct vnode *vp; int flags; int prot; int err=0; vm_map_t user_map; kern_return_t result; - vm_offset_t user_addr; - vm_size_t user_size; - vm_offset_t pageoff; + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; + vm_object_offset_t pageoff; vm_object_offset_t file_pos; - boolean_t find_space, docow; + int alloc_flags; + boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; + int fpref=0; + int error =0; + int fd = uap->fd; - user_addr = (vm_offset_t)uap->addr; - user_size = (vm_size_t) uap->len; - AUDIT_ARG(addr, (void *)user_addr); - AUDIT_ARG(len, (int) user_size); + user_addr = (mach_vm_offset_t)uap->addr; + user_size = (mach_vm_size_t) uap->len; + + AUDIT_ARG(addr, user_addr); + AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); flags = uap->flags; + vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' @@ -237,21 +201,20 @@ mmap(p, uap, retval) /* make sure mapping fits into numeric range etc */ if ((file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) || - ((ssize_t) uap->len < 0 )|| - ((flags & MAP_ANON) && uap->fd != -1)) + ((flags & MAP_ANON) && fd != -1)) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ - pageoff = ((vm_offset_t)file_pos & PAGE_MASK); + pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ - user_size = (vm_size_t) round_page_32(user_size); /* hi end */ + user_size = mach_vm_round_page(user_size); /* hi end */ /* @@ -267,13 +230,6 @@ mmap(p, uap, retval) user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); - /* Address range must be all in user VM space. */ - if (VM_MAX_ADDRESS > 0 && (user_addr + user_size > VM_MAX_ADDRESS)) - return (EINVAL); - if (VM_MIN_ADDRESS > 0 && user_addr < VM_MIN_ADDRESS) - return (EINVAL); - if (user_addr + user_size < user_addr) - return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ @@ -285,8 +241,8 @@ mmap(p, uap, retval) * There should really be a pmap call to determine a reasonable * location. */ - else if (addr < round_page_32(p->p_vmspace->vm_daddr + MAXDSIZ)) - addr = round_page_32(p->p_vmspace->vm_daddr + MAXDSIZ); + else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) + addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif @@ -300,37 +256,61 @@ mmap(p, uap, retval) file_pos = 0; mapanon = 1; } else { + struct vnode_attr va; + struct vfs_context context; /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ - err = fdgetf(p, uap->fd, &fp); + err = fp_lookup(p, fd, &fp, 0); if (err) return(err); - if(fp->f_type == DTYPE_PSXSHM) { - uap->addr = (caddr_t)user_addr; - uap->len = user_size; + fpref = 1; + if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { + uap->addr = (user_addr_t)user_addr; + uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; - return(pshm_mmap(p, uap, retval, fp , pageoff)); + error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); + goto bad; } - if (fp->f_type != DTYPE_VNODE) - return(EINVAL); - vp = (struct vnode *)fp->f_data; - - if (vp->v_type != VREG && vp->v_type != VCHR) - return (EINVAL); + if (fp->f_fglob->fg_type != DTYPE_VNODE) { + error = EINVAL; + goto bad; + } + vp = (struct vnode *)fp->f_fglob->fg_data; + error = vnode_getwithref(vp); + if(error != 0) + goto bad; + + if (vp->v_type != VREG && vp->v_type != VCHR) { + (void)vnode_put(vp); + error = EINVAL; + goto bad; + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + /* conformance change - mmap needs to update access time for mapped + * files + */ + VATTR_INIT(&va); + nanotime(&va.va_access_time); + VATTR_SET_ACTIVE(&va, va_access_time); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + vnode_setattr(vp, &va, &context); /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { - return(ENODEV); + (void)vnode_put(vp); + error = ENODEV; + goto bad; } else { /* * Ensure that file and memory protections are @@ -342,10 +322,13 @@ mmap(p, uap, retval) * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ - if (fp->f_flag & FREAD) + if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; - else if (prot & PROT_READ) - return (EACCES); + else if (prot & PROT_READ) { + (void)vnode_put(vp); + error = EACCES; + goto bad; + } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character @@ -355,19 +338,30 @@ mmap(p, uap, retval) */ if ((flags & MAP_SHARED) != 0) { - if ((fp->f_flag & FWRITE) != 0) { - struct vattr va; - if ((err = - VOP_GETATTR(vp, &va, - p->p_ucred, p))) - return (err); - if ((va.va_flags & - (IMMUTABLE|APPEND)) == 0) - maxprot |= VM_PROT_WRITE; - else if (prot & PROT_WRITE) - return (EPERM); - } else if ((prot & PROT_WRITE) != 0) - return (EACCES); + if ((fp->f_fglob->fg_flag & FWRITE) != 0) { + /* + * check for write access + * + * Note that we already made this check when granting FWRITE + * against the file, so it seems redundant here. + */ + error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, &context); + + /* if not granted for any reason, but we wanted it, bad */ + if ((prot & PROT_WRITE) && (error != 0)) { + vnode_put(vp); + goto bad; + } + + /* if writable, remember */ + if (error == 0) + maxprot |= VM_PROT_WRITE; + + } else if ((prot & PROT_WRITE) != 0) { + (void)vnode_put(vp); + error = EACCES; + goto bad; + } } else maxprot |= VM_PROT_WRITE; @@ -375,42 +369,56 @@ mmap(p, uap, retval) } } - if (user_size == 0) - return(0); + if (user_size == 0) { + if (!mapanon) + (void)vnode_put(vp); + error = 0; + goto bad; + } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ - user_size = round_page_32(user_size); + user_size = mach_vm_round_page(user_size); - if (file_pos & PAGE_MASK_64) - return (EINVAL); + if (file_pos & PAGE_MASK_64) { + if (!mapanon) + (void)vnode_put(vp); + error = EINVAL; + goto bad; + } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { - find_space = TRUE; - user_addr = round_page_32(user_addr); + alloc_flags = VM_FLAGS_ANYWHERE; + user_addr = mach_vm_round_page(user_addr); } else { - if (user_addr != trunc_page_32(user_addr)) - return (EINVAL); - find_space = FALSE; - (void) vm_deallocate(user_map, user_addr, user_size); + if (user_addr != mach_vm_trunc_page(user_addr)) { + if (!mapanon) + (void)vnode_put(vp); + error = EINVAL; + goto bad; + } + /* + * mmap(MAP_FIXED) will replace any existing mappings in the + * specified range, if the new mapping is successful. + * If we just deallocate the specified address range here, + * another thread might jump in and allocate memory in that + * range before we get a chance to establish the new mapping, + * and we won't have a chance to restore the old mappings. + * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it + * has to deallocate the existing mappings and establish the + * new ones atomically. + */ + alloc_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } /* * Lookup/allocate object. */ - if (flags & MAP_ANON) { - /* - * Unnamed anonymous regions always start at 0. - */ - if (handle == 0) - file_pos = 0; - } - if (handle == NULL) { pager = NULL; #ifdef notyet @@ -423,23 +431,22 @@ mmap(p, uap, retval) maxprot |= VM_PROT_EXECUTE; #endif #endif - result = vm_allocate(user_map, &user_addr, user_size, find_space); + result = mach_vm_map(user_map, &user_addr, user_size, 0, + alloc_flags, IPC_PORT_NULL, 0, + FALSE, prot, maxprot, + (flags & MAP_SHARED) ? VM_INHERIT_SHARE : + VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; - - result = vm_protect(user_map, user_addr, user_size, TRUE, maxprot); - if (result != KERN_SUCCESS) - goto out; - result = vm_protect(user_map, user_addr, user_size, FALSE, prot); - if (result != KERN_SUCCESS) - goto out; - } else { UBCINFOCHECK("mmap", vp); pager = (vm_pager_t)ubc_getpager(vp); - if (pager == NULL) - return (ENOMEM); + if (pager == NULL) { + (void)vnode_put(vp); + error = ENOMEM; + goto bad; + } /* * Set credentials: @@ -465,80 +472,63 @@ mmap(p, uap, retval) #endif #endif /* notyet */ - result = vm_map_64(user_map, &user_addr, user_size, - 0, find_space, pager, file_pos, docow, - prot, maxprot, - VM_INHERIT_DEFAULT); + result = mach_vm_map(user_map, &user_addr, user_size, + 0, alloc_flags, (ipc_port_t)pager, file_pos, + docow, prot, maxprot, + (flags & MAP_SHARED) ? VM_INHERIT_SHARE : + VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) + if (result != KERN_SUCCESS) { + (void)vnode_put(vp); goto out; + } - ubc_map(vp); + (void)ubc_map(vp,(prot & ( PROT_EXEC | PROT_READ | PROT_WRITE | PROT_EXEC))); } - if (flags & MAP_SHARED) { - result = vm_inherit(user_map, user_addr, user_size, - VM_INHERIT_SHARE); - if (result != KERN_SUCCESS) { - (void) vm_deallocate(user_map, user_addr, user_size); - goto out; - } - } + if (!mapanon) + (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: - if (!mapanon) - *fdflags(p, uap->fd) |= UF_MAPPED; - *retval = (register_t)(user_addr + pageoff); - return (0); + *retval = user_addr + pageoff; + error = 0; + break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: - return (ENOMEM); + error = ENOMEM; + break; case KERN_PROTECTION_FAILURE: - return (EACCES); + error = EACCES; + break; default: - return (EINVAL); + error = EINVAL; + break; } - /*NOTREACHED*/ +bad: + if (fpref) + fp_drop(p, fd, fp, 0); + return(error); } -struct msync_args { - caddr_t addr; - int len; - int flags; -}; int -msync(p, uap, retval) - struct proc *p; - struct msync_args *uap; - register_t *retval; +msync(__unused struct proc *p, struct msync_args *uap, __unused register_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; + mach_vm_offset_t addr; + mach_vm_size_t size; int flags; vm_map_t user_map; int rv; vm_sync_t sync_flags=0; - addr = (vm_offset_t) uap->addr; - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size = uap->len; - size = (vm_size_t) round_page_32(size); - flags = uap->flags; - - if (addr + size < addr) - return(EINVAL); - - user_map = current_map(); - - if ((flags & (MS_ASYNC|MS_SYNC)) == (MS_ASYNC|MS_SYNC)) - return (EINVAL); - - if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) - return (EINVAL); + addr = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t)uap->len; + if (addr & PAGE_MASK_64) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } if (size == 0) { /* * We cannot support this properly without maintaining @@ -550,6 +540,12 @@ msync(p, uap, retval) return (EINVAL); /* XXX breaks posix apps */ } + flags = uap->flags; + /* disallow contradictory flags */ + if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC) || + (flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) + return (EINVAL); + if (flags & MS_KILLPAGES) sync_flags |= VM_SYNC_KILLPAGES; if (flags & MS_DEACTIVATE) @@ -563,111 +559,88 @@ msync(p, uap, retval) else sync_flags |= VM_SYNC_SYNCHRONOUS; } - rv = vm_msync(user_map, addr, size, sync_flags); + + sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */ + + user_map = current_map(); + rv = mach_vm_msync(user_map, addr, size, sync_flags); switch (rv) { case KERN_SUCCESS: break; - case KERN_INVALID_ADDRESS: - return (EINVAL); /* Sun returns ENOMEM? */ + case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */ + return (ENOMEM); case KERN_FAILURE: return (EIO); default: return (EINVAL); } - return (0); } int -mremap() +mremap(void) { /* Not yet implemented */ - return (EOPNOTSUPP); + return (ENOTSUP); } -struct munmap_args { - caddr_t addr; - int len; -}; int -munmap(p, uap, retval) - struct proc *p; - struct munmap_args *uap; - register_t *retval; - +munmap(__unused struct proc *p, struct munmap_args *uap, __unused register_t *retval) { - vm_offset_t user_addr; - vm_size_t user_size, pageoff; + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; kern_return_t result; - user_addr = (vm_offset_t) uap->addr; - user_size = (vm_size_t) uap->len; + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; - AUDIT_ARG(addr, (void *)user_addr); - AUDIT_ARG(len, (int) user_size); + AUDIT_ARG(addr, user_addr); + AUDIT_ARG(len, user_size); - pageoff = (user_addr & PAGE_MASK); + if (user_addr & PAGE_MASK_64) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } - user_addr -= pageoff; - user_size += pageoff; - user_size = round_page_32(user_size); if (user_addr + user_size < user_addr) return(EINVAL); - if (user_size == 0) - return (0); - - /* Address range must be all in user VM space. */ - if (VM_MAX_ADDRESS > 0 && (user_addr + user_size > VM_MAX_ADDRESS)) - return (EINVAL); - if (VM_MIN_ADDRESS > 0 && user_addr < VM_MIN_ADDRESS) - return (EINVAL); - + if (user_size == 0) { + /* UNIX SPEC: size is 0, return EINVAL */ + return EINVAL; + } - result = vm_deallocate(current_map(), user_addr, user_size); + result = mach_vm_deallocate(current_map(), user_addr, user_size); if (result != KERN_SUCCESS) { return(EINVAL); } return(0); } -void -munmapfd(p, fd) - struct proc *p; - int fd; -{ - /* - * XXX should vm_deallocate any regions mapped to this file - */ - *fdflags(p, fd) &= ~UF_MAPPED; -} - -struct mprotect_args { - caddr_t addr; - int len; - int prot; -}; int -mprotect(p, uap, retval) - struct proc *p; - struct mprotect_args *uap; - register_t *retval; +mprotect(__unused struct proc *p, struct mprotect_args *uap, __unused register_t *retval) { register vm_prot_t prot; - vm_offset_t user_addr; - vm_size_t user_size, pageoff; + mach_vm_offset_t user_addr; + mach_vm_size_t user_size; kern_return_t result; vm_map_t user_map; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); AUDIT_ARG(value, uap->prot); - user_addr = (vm_offset_t) uap->addr; - user_size = (vm_size_t) uap->len; + + user_addr = (mach_vm_offset_t) uap->addr; + user_size = (mach_vm_size_t) uap->len; prot = (vm_prot_t)(uap->prot & VM_PROT_ALL); + if (user_addr & PAGE_MASK_64) { + /* UNIX SPEC: user address is not page-aligned, return EINVAL */ + return EINVAL; + } + #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) @@ -676,41 +649,28 @@ mprotect(p, uap, retval) #endif #endif /* notyet */ - pageoff = (user_addr & PAGE_MASK); - user_addr -= pageoff; - user_size += pageoff; - user_size = round_page_32(user_size); - if (user_addr + user_size < user_addr) - return(EINVAL); - user_map = current_map(); - result = vm_map_protect(user_map, user_addr, user_addr+user_size, prot, - FALSE); + result = mach_vm_protect(user_map, user_addr, user_size, + FALSE, prot); switch (result) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); + case KERN_INVALID_ADDRESS: + /* UNIX SPEC: for an invalid address range, return ENOMEM */ + return ENOMEM; } return (EINVAL); } -struct minherit_args { - void *addr; - size_t len; - int inherit; -}; - int -minherit(p, uap, retval) - struct proc *p; - struct minherit_args *uap; - register_t *retval; +minherit(__unused struct proc *p, struct minherit_args *uap, __unused register_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; + mach_vm_offset_t addr; + mach_vm_size_t size; register vm_inherit_t inherit; vm_map_t user_map; kern_return_t result; @@ -718,19 +678,13 @@ minherit(p, uap, retval) AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); AUDIT_ARG(value, uap->inherit); - addr = (vm_offset_t)uap->addr; - size = uap->len; - inherit = uap->inherit; - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); - if (addr + size < addr) - return(EINVAL); + addr = (mach_vm_offset_t)uap->addr; + size = (mach_vm_size_t)uap->len; + inherit = uap->inherit; user_map = current_map(); - result = vm_inherit(user_map, addr, size, + result = mach_vm_inherit(user_map, addr, size, inherit); switch (result) { case KERN_SUCCESS: @@ -741,45 +695,19 @@ minherit(p, uap, retval) return (EINVAL); } -struct madvise_args { - caddr_t addr; - int len; - int behav; -}; -/* ARGSUSED */ int -madvise(p, uap, retval) - struct proc *p; - struct madvise_args *uap; - register_t *retval; +madvise(__unused struct proc *p, struct madvise_args *uap, __unused register_t *retval) { vm_map_t user_map; - vm_offset_t start, end; + mach_vm_offset_t start; + mach_vm_size_t size; vm_behavior_t new_behavior; kern_return_t result; - /* - * Check for illegal addresses. Watch out for address wrap... Note - * that VM_*_ADDRESS are not constants due to casts (argh). - */ - if (VM_MAX_ADDRESS > 0 && - ((vm_offset_t) uap->addr + uap->len) > VM_MAX_ADDRESS) - return (ENOMEM); - if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS) - return (ENOMEM); - - if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) - return (ENOMEM); - /* * Since this routine is only advisory, we default to conservative * behavior. */ - start = trunc_page_32((vm_offset_t) uap->addr); - end = round_page_32((vm_offset_t) uap->addr + uap->len); - - user_map = current_map(); - switch (uap->behav) { case MADV_RANDOM: new_behavior = VM_BEHAVIOR_RANDOM; @@ -800,33 +728,28 @@ madvise(p, uap, retval) return(EINVAL); } - result = vm_behavior_set(user_map, start, end, new_behavior); + start = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t) uap->len; + + user_map = current_map(); + + result = mach_vm_behavior_set(user_map, start, size, new_behavior); switch (result) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: - return (EINVAL); + return (ENOMEM); } return (EINVAL); } -struct mincore_args { - const void *addr; - size_t len; - char *vec; -}; -/* ARGSUSED */ int -mincore(p, uap, retval) - struct proc *p; - struct mincore_args *uap; - register_t *retval; +mincore(__unused struct proc *p, struct mincore_args *uap, __unused register_t *retval) { - vm_offset_t addr, first_addr; - vm_offset_t end; + mach_vm_offset_t addr, first_addr, end; vm_map_t map; - char *vec; + user_addr_t vec; int error; int vecindex, lastvecindex; int mincoreinfo=0; @@ -834,17 +757,17 @@ mincore(p, uap, retval) kern_return_t ret; int numref; + char c; + map = current_map(); /* * Make sure that the addresses presented are valid for user * mode. */ - first_addr = addr = trunc_page_32((vm_offset_t) uap->addr); - end = addr + (vm_size_t)round_page_32(uap->len); + first_addr = addr = mach_vm_trunc_page(uap->addr); + end = addr + mach_vm_round_page(uap->len); - if (VM_MAX_ADDRESS > 0 && end > VM_MAX_ADDRESS) - return (EINVAL); if (end < addr) return (EINVAL); @@ -861,7 +784,7 @@ mincore(p, uap, retval) * up the pages elsewhere. */ lastvecindex = -1; - for(addr; addr < end; addr += PAGE_SIZE) { + for( ; addr < end; addr += PAGE_SIZE ) { pqueryinfo = 0; ret = vm_map_page_query(map, addr, &pqueryinfo, &numref); if (ret != KERN_SUCCESS) @@ -885,7 +808,8 @@ mincore(p, uap, retval) * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { - error = subyte( vec + lastvecindex, 0); + c = 0; + error = copyout(&c, vec + lastvecindex, 1); if (error) { return (EFAULT); } @@ -895,7 +819,8 @@ mincore(p, uap, retval) /* * Pass the page information to the user */ - error = subyte( vec + vecindex, mincoreinfo); + c = (char)mincoreinfo; + error = copyout(&c, vec + vecindex, 1); if (error) { return (EFAULT); } @@ -908,7 +833,8 @@ mincore(p, uap, retval) */ vecindex = (end - first_addr) >> PAGE_SHIFT; while((lastvecindex + 1) < vecindex) { - error = subyte( vec + lastvecindex, 0); + c = 0; + error = copyout(&c, vec + lastvecindex, 1); if (error) { return (EFAULT); } @@ -918,36 +844,31 @@ mincore(p, uap, retval) return (0); } -struct mlock_args { - caddr_t addr; - size_t len; -}; - int -mlock(p, uap, retval) - struct proc *p; - struct mlock_args *uap; - register_t *retval; +mlock(__unused struct proc *p, struct mlock_args *uap, __unused register_t *retvalval) { vm_map_t user_map; - vm_offset_t addr; - vm_size_t size, pageoff; - int error; + vm_map_offset_t addr; + vm_map_size_t size, pageoff; kern_return_t result; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - addr = (vm_offset_t) uap->addr; - size = uap->len; - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); + addr = (vm_map_offset_t) uap->addr; + size = (vm_map_size_t)uap->len; /* disable wrap around */ if (addr + size < addr) return (EINVAL); + + if (size == 0) + return (0); + + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size = vm_map_round_page(size+pageoff); + #ifdef notyet /* Hmm.. What am I going to do with this? */ if (atop(size) + cnt.v_wire_count > vm_page_max_wired) @@ -957,7 +878,7 @@ mlock(p, uap, retval) p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) return (ENOMEM); #else - error = suser(p->p_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); #endif @@ -965,45 +886,30 @@ mlock(p, uap, retval) user_map = current_map(); - /* vm_wire */ - result = vm_map_wire(user_map, addr, (vm_offset_t)(addr+size), VM_PROT_NONE, TRUE); + /* have to call vm_map_wire directly to pass "I don't know" protections */ + result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE); return (result == KERN_SUCCESS ? 0 : ENOMEM); } -struct munlock_args { - caddr_t addr; - size_t len; -}; int -munlock(p, uap, retval) - struct proc *p; - struct munlock_args *uap; - register_t *retval; +munlock(__unused struct proc *p, struct munlock_args *uap, __unused register_t *retval) { - vm_offset_t addr; - vm_size_t size, pageoff; - int error; + mach_vm_offset_t addr; + mach_vm_size_t size; vm_map_t user_map; kern_return_t result; AUDIT_ARG(addr, uap->addr); - AUDIT_ARG(len, uap->len); - addr = (vm_offset_t) uap->addr; - size = uap->len; + AUDIT_ARG(addr, uap->len); - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size += pageoff; - size = (vm_size_t) round_page_32(size); + addr = (mach_vm_offset_t) uap->addr; + size = (mach_vm_size_t)uap->len; - /* disable wrap around */ - if (addr + size < addr) - return (EINVAL); #ifdef notyet /* Hmm.. What am I going to do with this? */ #ifndef pmap_wired_count - error = suser(p->p_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); #endif @@ -1011,46 +917,28 @@ munlock(p, uap, retval) user_map = current_map(); - /* vm_wire */ - result = vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE); + /* JMM - need to remove all wirings by spec - this just removes one */ + result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE); return (result == KERN_SUCCESS ? 0 : ENOMEM); } -struct mlockall_args { - int how; -}; - int -mlockall(p, uap) - struct proc *p; - struct mlockall_args *uap; +mlockall(__unused struct proc *p, __unused struct mlockall_args *uap, __unused register_t *retval) { return (ENOSYS); } -struct munlockall_args { - int how; -}; - int -munlockall(p, uap) - struct proc *p; - struct munlockall_args *uap; +munlockall(__unused struct proc *p, __unused struct munlockall_args *uap, __unused register_t *retval) { return(ENOSYS); } /* BEGIN DEFUNCT */ -struct obreak_args { - char *nsiz; -}; int -obreak(p, uap, retval) - struct proc *p; - struct obreak_args *uap; - register_t *retval; +obreak(__unused struct proc *p, __unused struct obreak_args *uap, __unused register_t *retval) { /* Not implemented, obsolete */ return (ENOMEM); @@ -1059,38 +947,32 @@ obreak(p, uap, retval) int both; int -ovadvise() +ovadvise(__unused struct proc *p, __unused struct ovadvise_args *uap, __unused register_t *retval) { #ifdef lint both = 0; #endif + return( 0 ); } /* END DEFUNCT */ -/* CDY need to fix interface to allow user to map above 32 bits */ /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ kern_return_t -map_fd( - int fd, - vm_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size) +map_fd(struct map_fd_args *args) { + int fd = args->fd; + vm_offset_t offset = args->offset; + vm_offset_t *va = args->va; + boolean_t findspace = args->findspace; + vm_size_t size = args->size; kern_return_t ret; - boolean_t funnel_state; AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD); - AUDIT_ARG(addr, va); + AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va)); AUDIT_ARG(fd, fd); - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - ret = map_fd_funneled( fd, (vm_object_offset_t)offset, - va, findspace, size); - - (void) thread_funnel_set(kernel_flock, FALSE); + ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size); AUDIT_MACH_SYSCALL_EXIT(ret); return ret; @@ -1105,114 +987,153 @@ map_fd_funneled( vm_size_t size) { kern_return_t result; - struct file *fp; + struct fileproc *fp; struct vnode *vp; void * pager; vm_offset_t map_addr=0; vm_size_t map_size; - vm_map_copy_t tmp; int err=0; vm_map_t my_map; struct proc *p =(struct proc *)current_proc(); + struct vnode_attr vattr; + struct vfs_context context; /* * Find the inode; verify that it's a regular file. */ - err = fdgetf(p, fd, &fp); + err = fp_lookup(p, fd, &fp, 0); if (err) return(err); - if (fp->f_type != DTYPE_VNODE) - return(KERN_INVALID_ARGUMENT); + if (fp->f_fglob->fg_type != DTYPE_VNODE){ + err = KERN_INVALID_ARGUMENT; + goto bad; + } - if (!(fp->f_flag & FREAD)) - return (KERN_PROTECTION_FAILURE); + if (!(fp->f_fglob->fg_flag & FREAD)) { + err = KERN_PROTECTION_FAILURE; + goto bad; + } - vp = (struct vnode *)fp->f_data; + vp = (struct vnode *)fp->f_fglob->fg_data; + err = vnode_getwithref(vp); + if(err != 0) + goto bad; - if (vp->v_type != VREG) - return (KERN_INVALID_ARGUMENT); + if (vp->v_type != VREG) { + (void)vnode_put(vp); + err = KERN_INVALID_ARGUMENT; + goto bad; + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); + /* conformance change - mmap needs to update access time for mapped + * files + */ + VATTR_INIT(&vattr); + nanotime(&vattr.va_access_time); + VATTR_SET_ACTIVE(&vattr, va_access_time); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + vnode_setattr(vp, &vattr, &context); + if (offset & PAGE_MASK_64) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); - return (KERN_INVALID_ARGUMENT); + (void)vnode_put(vp); + err = KERN_INVALID_ARGUMENT; + goto bad; } - map_size = round_page_32(size); + map_size = round_page(size); /* * Allow user to map in a zero length file. */ - if (size == 0) - return (KERN_SUCCESS); + if (size == 0) { + (void)vnode_put(vp); + err = KERN_SUCCESS; + goto bad; + } /* * Map in the file. */ UBCINFOCHECK("map_fd_funneled", vp); pager = (void *) ubc_getpager(vp); - if (pager == NULL) - return (KERN_FAILURE); + if (pager == NULL) { + (void)vnode_put(vp); + err = KERN_FAILURE; + goto bad; + } my_map = current_map(); result = vm_map_64( my_map, - &map_addr, map_size, (vm_offset_t)0, TRUE, - pager, offset, TRUE, + &map_addr, map_size, (vm_offset_t)0, + VM_FLAGS_ANYWHERE, pager, offset, TRUE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) - return (result); + if (result != KERN_SUCCESS) { + (void)vnode_put(vp); + err = result; + goto bad; + } if (!findspace) { vm_offset_t dst_addr; vm_map_copy_t tmp; - if (copyin(va, &dst_addr, sizeof (dst_addr)) || + if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || trunc_page_32(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); - return (KERN_INVALID_ADDRESS); + (void)vnode_put(vp); + err = KERN_INVALID_ADDRESS; + goto bad; } - result = vm_map_copyin( - my_map, - map_addr, map_size, TRUE, - &tmp); + result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, + (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, + (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); - return (result); + (void)vnode_put(vp); + err = result; + goto bad; } - result = vm_map_copy_overwrite( - my_map, - dst_addr, tmp, FALSE); + result = vm_map_copy_overwrite(my_map, + (vm_map_address_t)dst_addr, tmp, FALSE); if (result != KERN_SUCCESS) { vm_map_copy_discard(tmp); - return (result); + (void)vnode_put(vp); + err = result; + goto bad; } } else { - if (copyout(&map_addr, va, sizeof (map_addr))) { - (void) vm_map_remove( - my_map, - map_addr, map_addr + map_size, + if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { + (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); - return (KERN_INVALID_ADDRESS); + (void)vnode_put(vp); + err = KERN_INVALID_ADDRESS; + goto bad; } } ubc_setcred(vp, current_proc()); - ubc_map(vp); - - return (KERN_SUCCESS); + (void)ubc_map(vp, (PROT_READ | PROT_WRITE | PROT_EXEC)); + (void)vnode_put(vp); + err = 0; +bad: + fp_drop(p, fd, fp, 0); + return (err); } + diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c index 064678311..63524f2c4 100644 --- a/bsd/kern/kern_newsysctl.c +++ b/bsd/kern/kern_newsysctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include @@ -307,15 +307,15 @@ sysctl_sysctl_name SYSCTL_HANDLER_ARGS int error = 0; struct sysctl_oid *oid; struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; - char buf[10]; + char tempbuf[10]; while (namelen) { if (!lsp) { - snprintf(buf,sizeof(buf),"%d",*name); + snprintf(tempbuf,sizeof(tempbuf),"%d",*name); if (req->oldidx) error = SYSCTL_OUT(req, ".", 1); if (!error) - error = SYSCTL_OUT(req, buf, strlen(buf)); + error = SYSCTL_OUT(req, tempbuf, strlen(tempbuf)); if (error) return (error); namelen--; @@ -497,8 +497,7 @@ sysctl_sysctl_name2oid SYSCTL_HANDLER_ARGS if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ return (ENAMETOOLONG); - p = _MALLOC(req->newlen+1, M_TEMP, M_WAITOK); - + MALLOC(p, char *,req->newlen+1, M_TEMP, M_WAITOK); if (!p) return ENOMEM; @@ -737,14 +736,13 @@ static int sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) { size_t i = 0; - int error = 0; if (req->oldptr) { i = l; if (i > req->oldlen - req->oldidx) i = req->oldlen - req->oldidx; if (i > 0) - bcopy((void*)p, (char *)req->oldptr + req->oldidx, i); + bcopy((void*)p, CAST_DOWN(char *, (req->oldptr + req->oldidx)), i); } req->oldidx += l; if (req->oldptr && i != l) @@ -759,7 +757,7 @@ sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) return 0; if (req->newlen - req->newidx < l) return (EINVAL); - bcopy((char *)req->newptr + req->newidx, p, l); + bcopy(CAST_DOWN(char *, (req->newptr + req->newidx)), p, l); req->newidx += l; return (0); } @@ -779,10 +777,10 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle if (oldlenp) req.oldlen = *oldlenp; if (old) - req.oldptr= old; + req.oldptr = CAST_USER_ADDR_T(old); if (newlen) { req.newlen = newlen; - req.newptr = new; + req.newptr = CAST_USER_ADDR_T(new); } req.oldfunc = sysctl_old_kernel; req.newfunc = sysctl_new_kernel; @@ -806,7 +804,7 @@ kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldle /* unlock memory if required */ if (req.lock == 2) - vsunlock(req.oldptr, req.oldlen, B_WRITE); + vsunlock(req.oldptr, (user_size_t)req.oldlen, B_WRITE); memlock.sl_lock = 0; @@ -845,8 +843,7 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) if (i > req->oldlen - req->oldidx) i = req->oldlen - req->oldidx; if (i > 0) - error = copyout((void*)p, (char *)req->oldptr + req->oldidx, - i); + error = copyout((void*)p, (req->oldptr + req->oldidx), i); } req->oldidx += l; if (error) @@ -865,7 +862,7 @@ sysctl_new_user(struct sysctl_req *req, void *p, size_t l) return 0; if (req->newlen - req->newidx < l) return (EINVAL); - error = copyin((char *)req->newptr + req->newidx, p, l); + error = copyin((req->newptr + req->newidx), p, l); req->newidx += l; return (error); } @@ -934,13 +931,6 @@ found: return EINVAL; } - /* - * Switch to the NETWORK funnel for CTL_NET and KERN_IPC sysctls - */ - - if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) && - (name[1] == KERN_IPC)))) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { i = (oid->oid_handler) (oid, @@ -952,14 +942,6 @@ found: req); } - /* - * Switch back to the KERNEL funnel, if necessary - */ - - if (((name[0] == CTL_NET) || ((name[0] == CTL_KERN) && - (name[1] == KERN_IPC)))) - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (i); } @@ -984,17 +966,17 @@ new_sysctl(struct proc *p, struct sysctl_args *uap) if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); - error = copyin(uap->name, &name, uap->namelen * sizeof(int)); + error = copyin(CAST_USER_ADDR_T(uap->name), &name, uap->namelen * sizeof(int)); if (error) return (error); error = userland_sysctl(p, name, uap->namelen, - uap->old, uap->oldlenp, 0, - uap->new, uap->newlen, &j); + CAST_USER_ADDR_T(uap->old), uap->oldlenp, 0, + CAST_USER_ADDR_T(uap->new), uap->newlen, &j); if (error && error != ENOMEM) return (error); if (uap->oldlenp) { - i = copyout(&j, uap->oldlenp, sizeof(j)); + i = copyout(&j, CAST_USER_ADDR_T(uap->oldlenp), sizeof(j)); if (i) return (i); } @@ -1006,7 +988,9 @@ new_sysctl(struct proc *p, struct sysctl_args *uap) * must be in kernel space. */ int -userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval) +userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp, + size_t *oldlenp, int inkernel, user_addr_t newp, size_t newlen, + size_t *retval) { int error = 0; struct sysctl_req req, req2; @@ -1019,19 +1003,19 @@ userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *old if (inkernel) { req.oldlen = *oldlenp; } else { - error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); + error = copyin(CAST_USER_ADDR_T(oldlenp), &req.oldlen, sizeof(*oldlenp)); if (error) return (error); } } - if (old) { - req.oldptr= old; + if (oldp) { + req.oldptr = oldp; } if (newlen) { req.newlen = newlen; - req.newptr = new; + req.newptr = newp; } req.oldfunc = sysctl_old_user; diff --git a/bsd/kern/kern_panicinfo.c b/bsd/kern/kern_panicinfo.c index 83f753872..9ad8549f1 100644 --- a/bsd/kern/kern_panicinfo.c +++ b/bsd/kern/kern_panicinfo.c @@ -23,210 +23,159 @@ #include #include #include -#include #include -#include #include #include #include +#include +#include #include +#include +#include -/* prototypes not exported by osfmk. */ -extern void kmem_free(vm_map_t, vm_offset_t, vm_size_t); -extern kern_return_t kmem_alloc_wired(vm_map_t, vm_offset_t *, vm_size_t); +/* prototypes not exported by osfmk/console. */ +extern void panic_dialog_test( void ); +extern int panic_dialog_set_image( const unsigned char * ptr, unsigned int size ); +extern void panic_dialog_get_image( unsigned char ** ptr, unsigned int * size ); -/* Globals */ -static off_t imagesizelimit = (4 * 4096); +/* make the compiler happy */ +extern int sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, struct proc *); -/* Information about the current panic image */ -static int image_bits = 32; /* Bitdepth */ -static char *image_pathname = NULL; /* path to it */ -static size_t image_pathlen = 0; /* and the length of the pathname */ +#define PANIC_IMAGE_SIZE_LIMIT (32 * 4096) /* 128K - Maximum amount of memory consumed for the panic UI */ +#define KERN_PANICINFO_TEST (KERN_PANICINFO_IMAGE+2) /* Allow the panic UI to be tested by root without causing a panic */ -static vm_offset_t image_ptr = NULL; /* the image itself */ -static off_t image_size = 0; /* and the imagesize */ - - -__private_extern__ void -get_panicimage(vm_offset_t *imageptr, vm_size_t *imagesize, int *imagebits) -{ - *imageptr = image_ptr; - *imagesize = image_size; - *imagebits = image_bits; -} - -static int -panicimage_from_file( - char *imname, - off_t sizelimit, - vm_offset_t *image, - off_t *filesize, - struct proc *p) -{ - int error = 0; - int error1 = 0; - int aresid; - struct nameidata nd; - struct vattr vattr; - struct vnode * vp; - kern_return_t kret; - struct pcred *pcred = p->p_cred; - struct ucred *cred = pcred->pc_ucred; - vm_offset_t iobuf; - - /* Open the file */ - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, imname, p); - error = vn_open(&nd, FREAD, S_IRUSR); - if (error) - return (error); - vp = nd.ni_vp; - - if (vp->v_type != VREG) { - error = EFAULT; - goto out; - } - - /* get the file size */ - error = VOP_GETATTR(vp, &vattr, cred, p); - if (error) - goto out; - - /* validate the file size */ - if (vattr.va_size > sizelimit) { - error = EFBIG; - goto out; - } - - /* allocate kernel wired memory */ - kret = kmem_alloc_wired(kernel_map, &iobuf, - (vm_size_t)vattr.va_size); - if (kret != KERN_SUCCESS) { - switch (kret) { - default: - error = EINVAL; - break; - case KERN_NO_SPACE: - case KERN_RESOURCE_SHORTAGE: - error = ENOMEM; - break; - case KERN_PROTECTION_FAILURE: - error = EPERM; - break; - } - goto out; - } - - /* read the file in the kernel buffer */ - error = vn_rdwr(UIO_READ, vp, (caddr_t)iobuf, (int)vattr.va_size, - (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, - cred, &aresid, p); - if (error) { - (void)kmem_free(kernel_map, iobuf, (vm_size_t)vattr.va_size); - goto out; - } - - /* - * return the image to the caller - * freeing this memory is callers responsibility - */ - *image = iobuf; - *filesize = (off_t)vattr.va_size; - -out: - VOP_UNLOCK(vp, 0, p); - error1 = vn_close(vp, FREAD, cred, p); - if (error == 0) - error = error1; - return (error); -} +/* Local data */ +static int image_size_limit = PANIC_IMAGE_SIZE_LIMIT; __private_extern__ int sysctl_dopanicinfo(name, namelen, oldp, oldlenp, newp, newlen, p) int *name; u_int namelen; - void *oldp; + user_addr_t oldp; size_t *oldlenp; - void *newp; + user_addr_t newp; size_t newlen; struct proc *p; { int error = 0; - int bitdepth = 32; /* default is 32 bits */ - char *imname; + vm_offset_t newimage = (vm_offset_t )NULL; + kern_return_t kret; + unsigned char * prev_image_ptr; + unsigned int prev_image_size; + /* all sysctl names at this level are terminal */ if (namelen != 1) return (ENOTDIR); /* overloaded */ + if ( (error = proc_suser(p)) ) /* must be super user to muck with image */ + return (error); + switch (name[0]) { default: - return (EOPNOTSUPP); + return (ENOTSUP); + + case KERN_PANICINFO_TEST: + + panic_dialog_test(); + return (0); + case KERN_PANICINFO_MAXSIZE: - if (newp != NULL && (error = suser(p->p_ucred, &p->p_acflag))) - return (error); - error = sysctl_quad(oldp, oldlenp, newp, newlen, &imagesizelimit); + + /* return the image size limits */ + + newlen = 0; + newp = USER_ADDR_NULL; + + error = sysctl_int(oldp, oldlenp, newp, newlen, &image_size_limit); + return (error); - case KERN_PANICINFO_IMAGE16: - bitdepth = 16; - /* and fall through */ - case KERN_PANICINFO_IMAGE32: - /* allocate a buffer for the image pathname */ - MALLOC_ZONE(imname, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - - if (!newp) { - bcopy(image_pathname, imname, image_pathlen); - imname[image_pathlen] = '\0'; - } else - imname[0] = '\0'; - error = sysctl_string(oldp, oldlenp, newp, newlen, - imname, MAXPATHLEN); - if (newp && !error) { - char *tmpstr, *oldstr; - off_t filesize = 0; - size_t len; - vm_offset_t image; - vm_offset_t oimage = NULL; - vm_size_t osize = 0; /* covariable: quiet compiler */ - - len = strlen(imname); - oldstr = image_pathname; - - error = panicimage_from_file(imname, imagesizelimit, - &image, &filesize, p); - if (error) + case KERN_PANICINFO_IMAGE: + + /* If we have a new image, allocate wired kernel memory and copy it in from user space */ + if ( newp != USER_ADDR_NULL ) { + + /* check the length of the incoming image before allocating space for it. */ + if ( newlen > (size_t)image_size_limit ) + return (ENOMEM); + + /* allocate some kernel wired memory for the new image */ + kret = kmem_alloc(kernel_map, &newimage, (vm_size_t)round_page_32(newlen)); + + if (kret != KERN_SUCCESS) { + switch (kret) { + default: + error = EINVAL; + break; + case KERN_NO_SPACE: + case KERN_RESOURCE_SHORTAGE: + error = ENOMEM; + break; + case KERN_PROTECTION_FAILURE: + error = EPERM; + break; + } + + return (error); + } + + /* copy the image in from user space */ + if ( (error = copyin(newp, (char *) newimage, newlen)) ) goto errout; - /* release the old image */ - if (image_ptr) { - oimage = image_ptr; - osize = image_size; + } else { /* setup to make the default image active */ + + newimage = (vm_offset_t )NULL; + newlen = 0; + } + + /* get the current image location and size */ + panic_dialog_get_image( &prev_image_ptr, &prev_image_size ); + + /* did the caller request a copy of the previous image ? */ + if ( oldp != USER_ADDR_NULL ) { + if ( *oldlenp < prev_image_size ) { + error = ERANGE; + goto errout; } - /* remember the new one */ - image_ptr = image; - image_bits = bitdepth; /* new bith depth */ - image_size = filesize; /* new imagesize */ + /* copy the image to user space or zero the size if the default image is active */ + if ( prev_image_ptr != NULL ) { + if ( (error = copyout( prev_image_ptr, oldp, prev_image_size )) ) + goto errout; - if (oimage) - kmem_free(kernel_map, oimage, osize); + *oldlenp = prev_image_size; + } + else /* tell the user that the default image is active */ + *oldlenp = 0; + } - /* save the new name */ - MALLOC(tmpstr, char *, len+1, M_TEMP, M_WAITOK); - bcopy(imname, tmpstr, len); - tmpstr[len] = '\0'; + /* Make the new image active, or reactivate the default image. + But, handle the special case of asking for the current image + without changing the current image. + */ - image_pathname = tmpstr; /* new pathname */ - image_pathlen = len; /* new pathname length */ + if ( !(oldp && newp == USER_ADDR_NULL) ) { + if ( (error = panic_dialog_set_image( (unsigned char *) newimage, newlen )) ) + goto errout; - /* free the old name */ - FREE(oldstr, M_TEMP); + /* free the wired memory used by the previous image */ + if ( prev_image_ptr != NULL ) { + (void)kmem_free(kernel_map, (vm_offset_t) prev_image_ptr, (vm_size_t)round_page_32(prev_image_size)); + printf("Panic UI memory freed (%d)\n", round_page_32(prev_image_size)); + } } + + return (0); + errout: - FREE_ZONE(imname, MAXPATHLEN, M_NAMEI); + if ( newimage != (vm_offset_t )NULL ) + (void)kmem_free(kernel_map, newimage, (vm_size_t)round_page_32(newlen)); + return (error); } } diff --git a/bsd/kern/kern_pcsamples.c b/bsd/kern/kern_pcsamples.c index 7d7ab169f..f231dd6cb 100644 --- a/bsd/kern/kern_pcsamples.c +++ b/bsd/kern/kern_pcsamples.c @@ -23,15 +23,17 @@ #include #include #include -#include +#include #include #include +#include #include +#include -unsigned int pc_buftomem = 0; -u_long * pc_buffer = 0; /* buffer that holds each pc */ -u_long * pc_bufptr = 0; -u_long * pc_buflast = 0; +vm_offset_t pc_buftomem = 0; +unsigned int * pc_buffer = 0; /* buffer that holds each pc */ +unsigned int * pc_bufptr = 0; +unsigned int * pc_buflast = 0; unsigned int npcbufs = 8192; /* number of pc entries in buffer */ unsigned int pc_bufsize = 0; unsigned int pcsample_flags = 0; @@ -43,16 +45,26 @@ boolean_t pc_trace_frameworks = FALSE; char pcsample_comm[MAXCOMLEN + 1]; /* Set the default framework boundaries */ -u_long pcsample_beg = 0; -u_long pcsample_end = 0; +unsigned int pcsample_beg = 0; +unsigned int pcsample_end = 0; static pid_t global_state_pid = -1; /* Used to control exclusive use of pc_buffer */ extern int pc_trace_buf[]; extern int pc_trace_cnt; +void add_pcbuffer(void); +int branch_tracing_enabled(void); +int disable_branch_tracing(void); +int enable_branch_tracing(void); +int pcsamples_bootstrap(void); +void pcsamples_clear(void); +int pcsamples_control(int *name, u_int namelen, user_addr_t where, size_t *sizep); +int pcsamples_read(user_addr_t buffer, size_t *number); +int pcsamples_reinit(void); + int -enable_branch_tracing() +enable_branch_tracing(void) { #ifndef i386 struct proc *p; @@ -74,24 +86,24 @@ enable_branch_tracing() } int -disable_branch_tracing() +disable_branch_tracing(void) { - struct proc *p; - switch (pc_sample_pid) { + struct proc *p; + switch (pc_sample_pid) { case -1: - pc_trace_frameworks = FALSE; - break; - case 0: - break; - default: - p = pfind(pc_sample_pid); - if (p) { - p->p_flag &= ~P_BTRACE; - } - break; -} - clr_be_bit(); - return 1; + pc_trace_frameworks = FALSE; + break; + case 0: + break; + default: + p = pfind(pc_sample_pid); + if (p) { + p->p_flag &= ~P_BTRACE; + } + break; + } + clr_be_bit(); + return 1; } /* @@ -99,7 +111,7 @@ disable_branch_tracing() * is called from context_switch in the scheduler */ int -branch_tracing_enabled() +branch_tracing_enabled(void) { struct proc *p = current_proc(); if (TRUE == pc_trace_frameworks) return TRUE; @@ -111,12 +123,10 @@ branch_tracing_enabled() void -add_pcbuffer() +add_pcbuffer(void) { int i; - u_long pc; - struct proc *curproc; - extern unsigned int kdebug_flags; + unsigned int pc; if (!pcsample_enable) return; @@ -134,7 +144,7 @@ add_pcbuffer() } /* Then the sample is in our range */ - *pc_bufptr = (u_long)pc; + *pc_bufptr = pc; pc_bufptr++; } } @@ -149,7 +159,8 @@ add_pcbuffer() return; } -pcsamples_bootstrap() +int +pcsamples_bootstrap(void) { if (!disable_branch_tracing()) return(ENOTSUP); @@ -157,9 +168,9 @@ pcsamples_bootstrap() pc_bufsize = npcbufs * sizeof(* pc_buffer); if (kmem_alloc(kernel_map, &pc_buftomem, (vm_size_t)pc_bufsize) == KERN_SUCCESS) - pc_buffer = (u_long *) pc_buftomem; + pc_buffer = (unsigned int *) pc_buftomem; else - pc_buffer= (u_long *) 0; + pc_buffer = NULL; if (pc_buffer) { pc_bufptr = pc_buffer; @@ -173,12 +184,12 @@ pcsamples_bootstrap() } -pcsamples_reinit() +int +pcsamples_reinit(void) { -int x; -int ret=0; + int ret=0; - pcsample_enable = 0; + pcsample_enable = 0; if (pc_bufsize && pc_buffer) kmem_free(kernel_map, (vm_offset_t)pc_buffer, pc_bufsize); @@ -187,16 +198,17 @@ int ret=0; return(ret); } -pcsamples_clear() +void +pcsamples_clear(void) { - /* Clean up the sample buffer, set defaults */ - global_state_pid = -1; + /* Clean up the sample buffer, set defaults */ + global_state_pid = -1; pcsample_enable = 0; if(pc_bufsize && pc_buffer) kmem_free(kernel_map, (vm_offset_t)pc_buffer, pc_bufsize); - pc_buffer = (u_long *)0; - pc_bufptr = (u_long *)0; - pc_buflast = (u_long *)0; + pc_buffer = NULL; + pc_bufptr = NULL; + pc_buflast = NULL; pc_bufsize = 0; pcsample_beg= 0; pcsample_end= 0; @@ -204,27 +216,24 @@ pcsamples_clear() (void)disable_branch_tracing(); pc_sample_pid = 0; pc_trace_frameworks = FALSE; - } -pcsamples_control(name, namelen, where, sizep) -int *name; -u_int namelen; -char *where; -size_t *sizep; +int +pcsamples_control(int *name, __unused u_int namelen, user_addr_t where, size_t *sizep) { -int ret=0; -int size=*sizep; -unsigned int value = name[1]; -pcinfo_t pc_bufinfo; -pid_t *pidcheck; - -pid_t curpid; -struct proc *p, *curproc; - - if (name[0] != PCSAMPLE_GETNUMBUF) - { - if(curproc = current_proc()) + int ret=0; + size_t size=*sizep; + int value = name[1]; + pcinfo_t pc_bufinfo; + pid_t *pidcheck; + + pid_t curpid; + struct proc *p, *curproc; + + if (name[0] != PCSAMPLE_GETNUMBUF) + { + curproc = current_proc(); + if (curproc) curpid = curproc->p_pid; else return (ESRCH); @@ -243,29 +252,29 @@ struct proc *p, *curproc; /* The global pid exists, deny this request */ return(EBUSY); } - } - } + } + } switch(name[0]) { - case PCSAMPLE_DISABLE: /* used to disable */ + case PCSAMPLE_DISABLE: /* used to disable */ pcsample_enable=0; break; - case PCSAMPLE_SETNUMBUF: - /* The buffer size is bounded by a min and max number of samples */ - if (value < pc_trace_cnt) { - ret=EINVAL; + case PCSAMPLE_SETNUMBUF: + /* The buffer size is bounded by a min and max number of samples */ + if (value < pc_trace_cnt) { + ret=EINVAL; break; } if (value <= MAX_PCSAMPLES) - /* npcbufs = value & ~(PC_TRACE_CNT-1); */ - npcbufs = value; + /* npcbufs = value & ~(PC_TRACE_CNT-1); */ + npcbufs = value; else - npcbufs = MAX_PCSAMPLES; + npcbufs = MAX_PCSAMPLES; break; - case PCSAMPLE_GETNUMBUF: - if(size < sizeof(pcinfo_t)) { - ret=EINVAL; + case PCSAMPLE_GETNUMBUF: + if (size < sizeof(pc_bufinfo)) { + ret=EINVAL; break; } pc_bufinfo.npcbufs = npcbufs; @@ -278,13 +287,13 @@ struct proc *p, *curproc; ret=EINVAL; } break; - case PCSAMPLE_SETUP: + case PCSAMPLE_SETUP: ret=pcsamples_reinit(); break; - case PCSAMPLE_REMOVE: + case PCSAMPLE_REMOVE: pcsamples_clear(); break; - case PCSAMPLE_READBUF: + case PCSAMPLE_READBUF: /* A nonzero value says enable and wait on the buffer */ /* A zero value says read up the buffer immediately */ if (value == 0) @@ -333,13 +342,13 @@ struct proc *p, *curproc; } break; - case PCSAMPLE_SETREG: - if (size < sizeof(pcinfo_t)) + case PCSAMPLE_SETREG: + if (size < sizeof(pc_bufinfo)) { ret = EINVAL; break; } - if (copyin(where, &pc_bufinfo, sizeof(pcinfo_t))) + if (copyin(where, &pc_bufinfo, sizeof(pc_bufinfo))) { ret = EINVAL; break; @@ -348,25 +357,25 @@ struct proc *p, *curproc; pcsample_beg = pc_bufinfo.pcsample_beg; pcsample_end = pc_bufinfo.pcsample_end; break; - case PCSAMPLE_COMM: - if (!(sizeof(pcsample_comm) > size)) - { - ret = EINVAL; - break; - } - bzero((void *)pcsample_comm, sizeof(pcsample_comm)); - if (copyin(where, pcsample_comm, size)) - { - ret = EINVAL; + case PCSAMPLE_COMM: + if (!(sizeof(pcsample_comm) > size)) + { + ret = EINVAL; + break; + } + bzero((void *)pcsample_comm, sizeof(pcsample_comm)); + if (copyin(where, pcsample_comm, size)) + { + ret = EINVAL; break; - } + } /* Check for command name or pid */ - if (pcsample_comm[0] != '\0') - { - ret= EOPNOTSUPP; + if (pcsample_comm[0] != '\0') + { + ret= ENOTSUP; break; - } + } else { if (size != (2 * sizeof(pid_t))) @@ -381,8 +390,8 @@ struct proc *p, *curproc; } } break; - default: - ret= EOPNOTSUPP; + default: + ret= ENOTSUP; break; } return(ret); @@ -396,13 +405,13 @@ struct proc *p, *curproc; to fill the buffer and throw the rest away. This buffer never wraps. */ -pcsamples_read(u_long *buffer, size_t *number) +int +pcsamples_read(user_addr_t buffer, size_t *number) { -int count=0; -int ret=0; -int copycount; + size_t count=0; + size_t copycount; - count = (*number)/sizeof(u_long); + count = (*number)/sizeof(* pc_buffer); if (count && pc_bufsize && pc_buffer) { @@ -418,7 +427,7 @@ int copycount; copycount = count; /* We actually have data to send up */ - if(copyout(pc_buffer, buffer, copycount * sizeof(u_long))) + if(copyout(pc_buffer, buffer, copycount * sizeof(* pc_buffer))) { *number = 0; return(EINVAL); diff --git a/bsd/kern/kern_physio.c b/bsd/kern/kern_physio.c index c4f2415f5..2b6ba9062 100644 --- a/bsd/kern/kern_physio.c +++ b/bsd/kern/kern_physio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,23 +66,28 @@ #include #include -#include +#include #include -#include +#include +#include int physio(strategy, bp, dev, flags, minphys, uio, blocksize) void (*strategy)(); - struct buf *bp; + buf_t bp; dev_t dev; int flags; u_int (*minphys)(); struct uio *uio; int blocksize; { - struct iovec *iovp; struct proc *p = current_proc(); - int error, done, i, nobuf, s, todo; + int error, i, nobuf, todo, iosize; +#if LP64KERN + int64_t done; +#else + int done; +#endif error = 0; flags &= B_READ | B_WRITE; @@ -95,64 +100,56 @@ physio(strategy, bp, dev, flags, minphys, uio, blocksize) * we're doing a read, that's a *write* to user-space. */ for (i = 0; i < uio->uio_iovcnt; i++) { - if(uio->uio_segflg != UIO_SYSSPACE) { - if (!useracc(uio->uio_iov[i].iov_base, - uio->uio_iov[i].iov_len, + if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { + if (!useracc(uio_iov_base_at(uio, i), + uio_iov_len_at(uio, i), (flags == B_READ) ? B_WRITE : B_READ)) return (EFAULT); } } /* Make sure we have a buffer, creating one if necessary. */ if (nobuf = (bp == NULL)) { -// bp = getphysbuf(); - panic("physio: null buf pointer\n"); + bp = buf_alloc((vnode_t)0); } - /* [raise the processor priority level to splbio;] */ - s = splbio(); - /* [while the buffer is marked busy] */ - while (bp->b_flags & B_BUSY) { - /* [mark the buffer wanted] */ - bp->b_flags |= B_WANTED; - /* [wait until the buffer is available] */ - tsleep((caddr_t)bp, PRIBIO+1, "physbuf", 0); + while (((error = (int)buf_acquire(bp, 0, 0, 0)) == EAGAIN)); + + if (error) { + if (nobuf) + buf_free(bp); + return (error); } - /* Mark it busy, so nobody else will use it. */ - bp->b_flags |= B_BUSY; - - /* [lower the priority level] */ - splx(s); - /* [set up the fixed part of the buffer for a transfer] */ bp->b_dev = dev; - bp->b_error = 0; bp->b_proc = p; + buf_seterror(bp, 0); /* - * [while there are data to transfer and no I/O error] + * [while there is data to transfer and no I/O error] * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ for (i = 0; i < uio->uio_iovcnt; i++) { - iovp = &uio->uio_iov[i]; - while (iovp->iov_len > 0) { + while (uio_iov_len_at(uio, i) > 0) { /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory, and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition - * to the "busy" and read/write flag.) + * to the read/write flag.) */ - s = splbio(); - bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags; - splx(s); + buf_setflags(bp, B_PHYS | B_RAW | flags); + + if ( (iosize = uio_iov_len_at(uio, i)) > MAXPHYSIO_WIRED) + iosize = MAXPHYSIO_WIRED; /* [set up the buffer for a maximum-sized transfer] */ - bp->b_blkno = uio->uio_offset / blocksize; - bp->b_bcount = iovp->iov_len; - bp->b_data = iovp->iov_base; + buf_setblkno(bp, uio->uio_offset / blocksize); + buf_setcount(bp, iosize); + // LP64todo - fix this! + buf_setdataptr(bp, CAST_DOWN(caddr_t, uio_iov_base_at(uio, i))); /* * [call minphys to bound the tranfer size] @@ -160,65 +157,42 @@ physio(strategy, bp, dev, flags, minphys, uio, blocksize) * for later comparison. */ (*minphys)(bp); - todo = bp->b_bcount; + todo = buf_count(bp); /* * [lock the part of the user address space involved * in the transfer] - * Beware vmapbuf(); it clobbers b_data and - * saves it in b_saveaddr. However, vunmapbuf() - * restores it. */ - if(uio->uio_segflg != UIO_SYSSPACE) - vslock(bp->b_data, todo); + if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) + vslock(CAST_USER_ADDR_T(buf_dataptr(bp)), + (user_size_t)todo); -#if 0 - vmapbuf(bp, todo); -#endif /* 0 */ /* [call strategy to start the transfer] */ (*strategy)(bp); - /* - * Note that the raise/wait/lower/get error - * steps below would be done by biowait(), but - * we want to unlock the address space before - * we lower the priority. - * - * [raise the priority level to splbio] - */ - s = splbio(); /* [wait for the transfer to complete] */ - while ((bp->b_flags & B_DONE) == 0) - tsleep((caddr_t) bp, PRIBIO + 1, "physio", 0); + error = (int)buf_biowait(bp); /* * [unlock the part of the address space previously * locked] */ -#if 0 - vunmapbuf(bp, todo); -#endif /* 0 */ - if(uio->uio_segflg != UIO_SYSSPACE) - vsunlock(bp->b_data, todo); - - /* remember error value (save a splbio/splx pair) */ - if (bp->b_flags & B_ERROR) - error = (bp->b_error ? bp->b_error : EIO); - - /* [lower the priority level] */ - splx(s); + if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) + vsunlock(CAST_USER_ADDR_T(buf_dataptr(bp)), + (user_size_t)todo, + (flags & B_READ)); /* * [deduct the transfer size from the total number * of data to transfer] */ - done = bp->b_bcount - bp->b_resid; - iovp->iov_len -= done; - iovp->iov_base += done; - uio->uio_offset += done; - uio->uio_resid -= done; + done = buf_count(bp) - buf_resid(bp); + uio_iov_len_add_at(uio, -done, i); + uio_iov_base_add_at(uio, done, i); + uio->uio_offset += done; + uio_setresid(uio, (uio_resid(uio) - done)); /* * Now, check for an error. @@ -235,25 +209,14 @@ done: * Remember if somebody wants it, so we can wake them up below. * Also, if we had to steal it, give it back. */ - s = splbio(); - bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW); -#if 0 - if (nobuf) - putphysbuf(bp); + buf_clearflags(bp, B_PHYS | B_RAW); + if (nobuf) + buf_free(bp); else -#endif /* 0 */ - { - /* - * [if another process is waiting for the raw I/O buffer, - * wake up processes waiting to do physical I/O; - */ - if (bp->b_flags & B_WANTED) { - bp->b_flags &= ~B_WANTED; - wakeup(bp); + { + buf_drop(bp); } - } - splx(s); return (error); } @@ -272,8 +235,8 @@ minphys(bp) struct buf *bp; { - bp->b_bcount = min(MAXPHYS, bp->b_bcount); - return bp->b_bcount; + buf_setcount(bp, min(MAXPHYS, buf_count(bp))); + return buf_count(bp); } /* diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 2a4636217..89a60a915 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -67,11 +67,10 @@ #include #include #include -#include -#include +#include #include #include -#include +#include #include #include #include @@ -80,6 +79,7 @@ #include #include #include +#include /* * Structure associated with user cacheing. @@ -102,6 +102,7 @@ struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; +extern struct tty cons; /* Name to give to core files */ __private_extern__ char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; @@ -154,6 +155,8 @@ chgproccnt(uid, diff) panic("chgproccnt: lost user"); } MALLOC_ZONE(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); + if (uip == NULL) + panic("chgproccnt: M_PROC zone depleted"); LIST_INSERT_HEAD(uipp, uip, ui_hash); uip->ui_uid = uid; uip->ui_proccnt = diff; @@ -177,7 +180,7 @@ inferior(p) * Is p an inferior of t ? */ int -isinferior(struct proc *p, register struct proc *t) +isinferior(struct proc *p, struct proc *t) { /* if p==t they are not inferior */ @@ -189,6 +192,186 @@ isinferior(struct proc *p, register struct proc *t) return (1); } +int +proc_isinferior(int pid1, int pid2) +{ + proc_t p; + proc_t t; + + if (((p = pfind(pid1)) != (struct proc *)0 ) && ((t = pfind(pid2)) != (struct proc *)0)) + return (isinferior(p, t)); + return(0); +} + +proc_t +proc_find(int pid) +{ + return(pfind(pid)); +} + +int +proc_rele(__unused proc_t p) +{ + return(0); +} + +proc_t +proc_self() +{ + return(current_proc()); +} + + +int +proc_pid(proc_t p) +{ + return(p->p_pid); +} + +int +proc_ppid(proc_t p) +{ + if (p->p_pptr != (struct proc *)0) + return(p->p_pptr->p_pid); + return(0); +} + +int +proc_selfpid(void) +{ + struct proc *p = current_proc(); + return(p->p_pid); +} + + +int +proc_selfppid(void) +{ + struct proc *p = current_proc(); + if (p->p_pptr) + return(p->p_pptr->p_pid); + else + return(0); +} + +void +proc_name(int pid, char * buf, int size) +{ + struct proc *p; + + if ((p = pfind(pid))!= (struct proc *)0) { + strncpy(buf, &p->p_comm[0], size); + buf[size-1] = 0; + } +} + +void +proc_selfname(char * buf, int size) +{ + struct proc *p; + + if ((p = current_proc())!= (struct proc *)0) { + strncpy(buf, &p->p_comm[0], size); + buf[size-1] = 0; + } +} + +void +proc_signal(int pid, int signum) +{ + proc_t p; + + if ((p = pfind(pid))!= (struct proc *)0) { + psignal(p, signum); + } +} + +int +proc_issignal(int pid, sigset_t mask) +{ + proc_t p; + + if ((p = pfind(pid))!= (struct proc *)0) { + return(proc_pendingsignals(p, mask)); + } + return(0); +} + +int +proc_noremotehang(proc_t p) +{ + int retval = 0; + + if (p) + retval = p->p_flag & P_NOREMOTEHANG; + return(retval? 1: 0); + +} + +int +proc_exiting(proc_t p) +{ + int retval = 0; + + if (p) + retval = p->p_flag & P_WEXIT; + return(retval? 1: 0); +} + + +int +proc_forcequota(proc_t p) +{ + int retval = 0; + + if (p) + retval = p->p_flag & P_FORCEQUOTA; + return(retval? 1: 0); + +} + +int +proc_tbe(proc_t p) +{ + int retval = 0; + + if (p) + retval = p->p_flag & P_TBE; + return(retval? 1: 0); + +} + +int +proc_suser(proc_t p) +{ + return(suser(p->p_ucred, NULL)); + +} + +kauth_cred_t +proc_ucred(proc_t p) +{ + return(p->p_ucred); +} + + +int +proc_is64bit(proc_t p) +{ + return(IS_64BIT_PROCESS(p)); +} + +/* LP64todo - figure out how to identify 64-bit processes if NULL procp */ +int +IS_64BIT_PROCESS(proc_t p) +{ + if (p && (p->p_flag & P_LP64)) + return(1); + else + return(0); +} + + /* * Locate a process by number */ @@ -267,6 +450,8 @@ enterpgrp(p, pgid, mksess) #endif MALLOC_ZONE(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, M_WAITOK); + if (pgrp == NULL) + panic("enterpgrp: M_PGRP zone depleted"); if ((np = pfind(savepid)) == NULL || np != p) { FREE_ZONE(pgrp, sizeof(struct pgrp), M_PGRP); return (ESRCH); @@ -279,6 +464,8 @@ enterpgrp(p, pgid, mksess) */ MALLOC_ZONE(sess, struct session *, sizeof(struct session), M_SESSION, M_WAITOK); + if (sess == NULL) + panic("enterpgrp: M_SESSION zone depleted"); sess->s_leader = p; sess->s_sid = p->p_pid; sess->s_count = 1; @@ -341,13 +528,21 @@ void pgdelete(pgrp) register struct pgrp *pgrp; { + struct tty * ttyp; + int removettypgrp = 0; + ttyp = pgrp->pg_session->s_ttyp; if (pgrp->pg_session->s_ttyp != NULL && - pgrp->pg_session->s_ttyp->t_pgrp == pgrp) + pgrp->pg_session->s_ttyp->t_pgrp == pgrp) { pgrp->pg_session->s_ttyp->t_pgrp = NULL; + removettypgrp = 1; + } LIST_REMOVE(pgrp, pg_hash); - if (--pgrp->pg_session->s_count == 0) + if (--pgrp->pg_session->s_count == 0) { + if (removettypgrp && (ttyp == &cons) && (ttyp->t_session == pgrp->pg_session)) + ttyp->t_session = 0; FREE_ZONE(pgrp->pg_session, sizeof(struct session), M_SESSION); + } FREE_ZONE(pgrp, sizeof *pgrp, M_PGRP); } @@ -400,7 +595,7 @@ fixjobc(struct proc *p, struct pgrp *pgrp, int entering) hispgrp->pg_jobc++; else if (--hispgrp->pg_jobc == 0) orphanpg(hispgrp); -} + } } /* @@ -427,15 +622,17 @@ orphanpg(struct pgrp *pg) } #ifdef DEBUG +void pgrpdump(void); /* forward declare here (called from debugger) */ + void -pgrpdump() +pgrpdump(void) { - register struct pgrp *pgrp; - register struct proc *p; - register i; + struct pgrp *pgrp; + struct proc *p; + u_long i; for (i = 0; i <= pgrphash; i++) { - if (pgrp = pgrphashtbl[i].lh_first) { + if ((pgrp = pgrphashtbl[i].lh_first) != NULL) { printf("\tindx %d\n", i); for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) { printf("\tpgrp 0x%08x, pgid %d, sess %p, sesscnt %d, mem %p\n", diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index db25475d6..1a963663b 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,31 +68,34 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include -#include +#include +#include #include #include #include +int groupmember(gid_t gid, kauth_cred_t cred); +int is_suser(void); +int is_suser1(void); + +extern int prepare_profile_database(int user); + /* * setprivexec: (dis)allow this process to hold * task, thread, or execption ports of processes about to exec. */ -struct setprivexec_args { - int flag; -}; int -setprivexec(p, uap, retval) - struct proc *p; - register struct setprivexec_args *uap; - register_t *retval; +setprivexec(struct proc *p, struct setprivexec_args *uap, register_t *retval) { AUDIT_ARG(value, uap->flag); *retval = p->p_debugger; @@ -101,24 +104,17 @@ setprivexec(p, uap, retval) } /* ARGSUSED */ -getpid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getpid(struct proc *p, __unused struct getpid_args *uap, register_t *retval) { *retval = p->p_pid; -#if COMPAT_43 - retval[1] = p->p_pptr->p_pid; -#endif return (0); } /* ARGSUSED */ -getppid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getppid(struct proc *p, __unused struct getppid_args *uap, register_t *retval) { *retval = p->p_pptr->p_pid; @@ -126,10 +122,8 @@ getppid(p, uap, retval) } /* Get process group ID; note that POSIX getpgrp takes no parameter */ -getpgrp(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getpgrp(struct proc *p, __unused struct getpgrp_args *uap, register_t *retval) { *retval = p->p_pgrp->pg_id; @@ -137,15 +131,8 @@ getpgrp(p, uap, retval) } /* Get an arbitary pid's process group id */ -struct getpgid_args { - pid_t pid; -}; - int -getpgid(p, uap, retval) - struct proc *p; - struct getpgid_args *uap; - register_t *retval; +getpgid(struct proc *p, struct getpgid_args *uap, register_t *retval) { struct proc *pt; @@ -163,15 +150,9 @@ found: /* * Get an arbitary pid's session id. */ -struct getsid_args { - pid_t pid; -}; int -getsid(p, uap, retval) - struct proc *p; - struct getsid_args *uap; - register_t *retval; +getsid(struct proc *p, struct getsid_args *uap, register_t *retval) { struct proc *pt; @@ -187,41 +168,54 @@ found: } /* ARGSUSED */ -getuid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getuid(__unused struct proc *p, __unused struct getuid_args *uap, register_t *retval) { - *retval = p->p_cred->p_ruid; -#if COMPAT_43 - retval[1] = p->p_ucred->cr_uid; -#endif + *retval = kauth_getruid(); return (0); } /* ARGSUSED */ -geteuid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +geteuid(__unused struct proc *p, __unused struct geteuid_args *uap, register_t *retval) +{ + + *retval = kauth_getuid(); + return (0); +} + +/* + * Return the per-thread override identity. + */ +int +gettid(__unused struct proc *p, struct gettid_args *uap, register_t *retval) { + struct uthread *uthread = get_bsdthread_info(current_thread()); + int error; - *retval = p->p_ucred->cr_uid; + /* + * If this thread is not running with an override identity, we can't + * return one to the caller, so return an error instead. + */ + if (!(uthread->uu_flag & UT_SETUID)) + return (ESRCH); + + if ((error = suword(uap->uidp, uthread->uu_ucred->cr_ruid))) + return (error); + if ((error = suword(uap->gidp, uthread->uu_ucred->cr_rgid))) + return (error); + + *retval = 0; return (0); } /* ARGSUSED */ -getgid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getgid(__unused struct proc *p, __unused struct getgid_args *uap, register_t *retval) { - *retval = p->p_cred->p_rgid; -#if COMPAT_43 - retval[1] = p->p_ucred->cr_groups[0]; -#endif + *retval = kauth_getrgid(); return (0); } @@ -231,52 +225,70 @@ getgid(p, uap, retval) * correctly in a library function. */ /* ARGSUSED */ -getegid(p, uap, retval) - struct proc *p; - void *uap; - register_t *retval; +int +getegid(struct proc *p, __unused struct getegid_args *uap, register_t *retval) { - *retval = p->p_ucred->cr_groups[0]; + *retval = kauth_getgid(); return (0); } -struct getgroups_args { - u_int gidsetsize; - gid_t *gidset; -}; -getgroups(p, uap, retval) - struct proc *p; - register struct getgroups_args *uap; - register_t *retval; +int +getgroups(__unused struct proc *p, struct getgroups_args *uap, register_t *retval) { - register struct pcred *pc = p->p_cred; - register u_int ngrp; + register int ngrp; int error; + kauth_cred_t cred; + + /* grab reference while we muck around with the credential */ + cred = kauth_cred_get_with_ref(); if ((ngrp = uap->gidsetsize) == 0) { - *retval = pc->pc_ucred->cr_ngroups; + *retval = cred->cr_ngroups; + kauth_cred_rele(cred); return (0); } - if (ngrp < pc->pc_ucred->cr_ngroups) + if (ngrp < cred->cr_ngroups) { + kauth_cred_rele(cred); return (EINVAL); - pcred_readlock(p); - ngrp = pc->pc_ucred->cr_ngroups; - if (error = copyout((caddr_t)pc->pc_ucred->cr_groups, - (caddr_t)uap->gidset, ngrp * sizeof(gid_t))) { - pcred_unlock(p); + } + ngrp = cred->cr_ngroups; + if ((error = copyout((caddr_t)cred->cr_groups, + uap->gidset, + ngrp * sizeof(gid_t)))) { + kauth_cred_rele(cred); return (error); } - pcred_unlock(p); + kauth_cred_rele(cred); *retval = ngrp; return (0); } +/* + * Return the per-thread/per-process supplementary groups list. + */ +#warning XXX implement +int +getsgroups(__unused struct proc *p, __unused struct getsgroups_args *uap, __unused register_t *retval) +{ + /* XXX implement */ + return(ENOTSUP); +} + +/* + * Return the per-thread/per-process whiteout groups list. + */ +#warning XXX implement +int +getwgroups(__unused struct proc *p, __unused struct getwgroups_args *uap, __unused register_t *retval) +{ + /* XXX implement */ + return(ENOTSUP); +} + /* ARGSUSED */ -setsid(p, uap, retval) - register struct proc *p; - void *uap; - register_t *retval; +int +setsid(struct proc *p, __unused struct setsid_args *uap, register_t *retval) { if (p->p_pgid == p->p_pid || pgfind(p->p_pid) || p->p_flag & P_INVFORK) { @@ -297,19 +309,14 @@ setsid(p, uap, retval) * if a child * pid must be in same session (EPERM) * pid can't have done an exec (EACCES) + * ig pgid is -ve return EINVAL (as per SUV spec) * if pgid != pid * there must exist some pid in same session having pgid (EPERM) * pid must not be session leader (EPERM) */ -struct setpgid_args { - int pid; - int pgid; -}; /* ARGSUSED */ -setpgid(curp, uap, retval) - struct proc *curp; - register struct setpgid_args *uap; - register_t *retval; +int +setpgid(struct proc *curp, register struct setpgid_args *uap, __unused register_t *retval) { register struct proc *targp; /* target process */ register struct pgrp *pgrp; /* target pgrp */ @@ -325,6 +332,8 @@ setpgid(curp, uap, retval) targp = curp; if (SESS_LEADER(targp)) return (EPERM); + if (uap->pgid < 0) + return(EINVAL); if (uap->pgid == 0) uap->pgid = targp->p_pid; else if (uap->pgid != targp->p_pid) @@ -334,13 +343,8 @@ setpgid(curp, uap, retval) return (enterpgrp(targp, uap->pgid, 0)); } -struct issetugid_args { - int dummy; -}; -issetugid(p, uap, retval) - struct proc *p; - struct issetugid_args *uap; - register_t *retval; +int +issetugid(struct proc *p, __unused struct issetugid_args *uap, register_t *retval) { /* * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time, @@ -355,23 +359,18 @@ issetugid(p, uap, retval) return (0); } -struct setuid_args { - uid_t uid; -}; /* ARGSUSED */ -setuid(p, uap, retval) - struct proc *p; - struct setuid_args *uap; - register_t *retval; +int +setuid(struct proc *p, struct setuid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; register uid_t uid; int error; + kauth_cred_t my_cred, my_new_cred; uid = uap->uid; AUDIT_ARG(uid, uid, 0, 0, 0); - if (uid != pc->p_ruid && - (error = suser(pc->pc_ucred, &p->p_acflag))) + if (uid != p->p_ucred->cr_ruid && + (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Everything's okay, do it. @@ -381,239 +380,447 @@ setuid(p, uap, retval) /* prepare app access profile files */ prepare_profile_database(uap->uid); - pcred_writelock(p); - (void)chgproccnt(pc->p_ruid, -1); + (void)chgproccnt(kauth_getruid(), -1); (void)chgproccnt(uid, 1); - pc->pc_ucred = crcopy(pc->pc_ucred); - pc->pc_ucred->cr_uid = uid; - pc->p_ruid = uid; - pc->p_svuid = uid; - pcred_unlock(p); + + /* get current credential and take a reference while we muck with it */ + for (;;) { + my_cred = kauth_cred_proc_ref(p); + + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_setuid(my_cred, uid); + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + p->p_flag |= P_SUGID; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + set_security_token(p); - p->p_flag |= P_SUGID; return (0); } -struct seteuid_args { - uid_t euid; -}; /* ARGSUSED */ -seteuid(p, uap, retval) - struct proc *p; - struct seteuid_args *uap; - register_t *retval; +int +seteuid(struct proc *p, struct seteuid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; register uid_t euid; int error; + kauth_cred_t my_cred, my_new_cred; euid = uap->euid; AUDIT_ARG(uid, 0, euid, 0, 0); - if (euid != pc->p_ruid && euid != pc->p_svuid && - (error = suser(pc->pc_ucred, &p->p_acflag))) + if (euid != p->p_ucred->cr_ruid && euid != p->p_ucred->cr_svuid && + (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Everything's okay, do it. Copy credentials so other references do - * not see our changes. + * not see our changes. get current credential and take a reference + * while we muck with it */ - pcred_writelock(p); - pc->pc_ucred = crcopy(pc->pc_ucred); - pc->pc_ucred->cr_uid = euid; - pcred_unlock(p); + for (;;) { + my_cred = kauth_cred_proc_ref(p); + + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_seteuid(p->p_ucred, euid); + + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + p->p_flag |= P_SUGID; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + set_security_token(p); - p->p_flag |= P_SUGID; return (0); } -struct setgid_args { - gid_t gid; -}; /* ARGSUSED */ -setgid(p, uap, retval) - struct proc *p; - struct setgid_args *uap; - register_t *retval; +int +setgid(struct proc *p, struct setgid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; register gid_t gid; int error; + kauth_cred_t my_cred, my_new_cred; gid = uap->gid; AUDIT_ARG(gid, gid, 0, 0, 0); - if (gid != pc->p_rgid && (error = suser(pc->pc_ucred, &p->p_acflag))) + if (gid != p->p_ucred->cr_rgid && (error = suser(p->p_ucred, &p->p_acflag))) return (error); - pcred_writelock(p); - pc->pc_ucred = crcopy(pc->pc_ucred); - pc->pc_ucred->cr_groups[0] = gid; - pc->p_rgid = gid; - pc->p_svgid = gid; /* ??? */ - pcred_unlock(p); + + /* get current credential and take a reference while we muck with it */ + for (;;) { + my_cred = kauth_cred_proc_ref(p); + + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_setgid(p->p_ucred, gid); + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + p->p_flag |= P_SUGID; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + set_security_token(p); - p->p_flag |= P_SUGID; return (0); } -struct setegid_args { - gid_t egid; -}; /* ARGSUSED */ -setegid(p, uap, retval) - struct proc *p; - struct setegid_args *uap; - register_t *retval; +int +setegid(struct proc *p, struct setegid_args *uap, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; register gid_t egid; int error; + kauth_cred_t my_cred, my_new_cred; egid = uap->egid; AUDIT_ARG(gid, 0, egid, 0, 0); - if (egid != pc->p_rgid && egid != pc->p_svgid && - (error = suser(pc->pc_ucred, &p->p_acflag))) + if (egid != p->p_ucred->cr_rgid && egid != p->p_ucred->cr_svgid && + (error = suser(p->p_ucred, &p->p_acflag))) return (error); - pcred_writelock(p); - pc->pc_ucred = crcopy(pc->pc_ucred); - pc->pc_ucred->cr_groups[0] = egid; - pcred_unlock(p); + + /* get current credential and take a reference while we muck with it */ + for (;;) { + my_cred = kauth_cred_proc_ref(p); + + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_setegid(p->p_ucred, egid); + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + p->p_flag |= P_SUGID; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + set_security_token(p); - p->p_flag |= P_SUGID; return (0); } -struct setgroups_args{ - u_int gidsetsize; - gid_t *gidset; -}; +/* + * Set the per-thread override identity. The first parameter can be the + * current real UID, KAUTH_UID_NONE, or, if the caller is priviledged, it + * can be any UID. If it is KAUTH_UID_NONE, then as a special case, this + * means "revert to the per process credential"; otherwise, if permitted, + * it changes the effective, real, and saved UIDs and GIDs for the current + * thread to the requested UID and single GID, and clears all other GIDs. + */ +int +settid(struct proc *p, struct settid_args *uap, __unused register_t *retval) +{ + kauth_cred_t uc; + struct uthread *uthread = get_bsdthread_info(current_thread()); + register uid_t uid; + register gid_t gid; + + uid = uap->uid; + gid = uap->gid; + AUDIT_ARG(uid, uid, gid, gid, 0); + + if (suser(p->p_ucred, &p->p_acflag) != 0) { + return (EPERM); + } + + if (uid == KAUTH_UID_NONE) { + + /* must already be assuming another identity in order to revert back */ + if ((uthread->uu_flag & UT_SETUID) == 0) + return (EPERM); + + /* revert to delayed binding of process credential */ + uc = kauth_cred_proc_ref(p); + kauth_cred_rele(uthread->uu_ucred); + uthread->uu_ucred = uc; + uthread->uu_flag &= ~UT_SETUID; + } else { + kauth_cred_t my_cred, my_new_cred; + + /* cannot already be assuming another identity */ + if ((uthread->uu_flag & UT_SETUID) != 0) { + return (EPERM); + } + + /* + * get a new credential instance from the old if this one changes else + * kauth_cred_setuidgid returns the same credential. we take an extra + * reference on the current credential while we muck wit it here. + */ + kauth_cred_ref(uthread->uu_ucred); + my_cred = uthread->uu_ucred; + my_new_cred = kauth_cred_setuidgid(my_cred, uid, gid); + if (my_cred != my_new_cred) + uthread->uu_ucred = my_new_cred; + uthread->uu_flag |= UT_SETUID; + + /* drop our extra reference */ + kauth_cred_rele(my_cred); + } + /* + * XXX should potentially set per thread security token (there is + * XXX none). + * XXX it is unclear whether P_SUGID should be st at this point; + * XXX in theory, it is being deprecated. + */ + return (0); +} + +/* + * Set the per-thread override identity. Use this system call for a thread to + * assume the identity of another process or to revert back to normal identity + * of the current process. + * When the "assume" argument is non zero the current thread will assume the + * identity of the process represented by the pid argument. + * When the assume argument is zero we revert back to our normal identity. + */ +int +settid_with_pid(struct proc *p, struct settid_with_pid_args *uap, __unused register_t *retval) +{ + proc_t target_proc; + struct uthread *uthread = get_bsdthread_info(current_thread()); + kauth_cred_t my_cred, my_target_cred, my_new_cred; + + AUDIT_ARG(pid, uap->pid); + AUDIT_ARG(value, uap->assume); + + if (suser(p->p_ucred, &p->p_acflag) != 0) { + return (EPERM); + } + + /* + * XXX should potentially set per thread security token (there is + * XXX none). + * XXX it is unclear whether P_SUGID should be st at this point; + * XXX in theory, it is being deprecated. + */ + + /* + * assume argument tells us to assume the identity of the process with the + * id passed in the pid argument. + */ + if (uap->assume != 0) { + /* can't do this if we have already assumed an identity */ + if ((uthread->uu_flag & UT_SETUID) != 0) + return (EPERM); + + target_proc = pfind(uap->pid); + /* can't assume the identity of the kernel process */ + if (target_proc == NULL || target_proc == kernproc) { + return (ESRCH); + } + + /* + * take a reference on the credential used in our target process then use + * it as the identity for our current thread. + */ + kauth_cred_ref(uthread->uu_ucred); + my_cred = uthread->uu_ucred; + my_target_cred = kauth_cred_proc_ref(target_proc); + my_new_cred = kauth_cred_setuidgid(my_cred, my_target_cred->cr_uid, my_target_cred->cr_gid); + if (my_cred != my_new_cred) + uthread->uu_ucred = my_new_cred; + + uthread->uu_flag |= UT_SETUID; + + /* drop our extra references */ + kauth_cred_rele(my_cred); + kauth_cred_rele(my_target_cred); + + return (0); + } + + /* we are reverting back to normal mode of operation where delayed binding + * of the process credential sets the credential in the thread (uu_ucred) + */ + if ((uthread->uu_flag & UT_SETUID) == 0) + return (EPERM); + + /* revert to delayed binding of process credential */ + my_new_cred = kauth_cred_proc_ref(p); + kauth_cred_rele(uthread->uu_ucred); + uthread->uu_ucred = my_new_cred; + uthread->uu_flag &= ~UT_SETUID; + + return (0); +} /* ARGSUSED */ -setgroups(p, uap, retval) - struct proc *p; - struct setgroups_args *uap; - register_t *retval; +static int +setgroups1(struct proc *p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused register_t *retval) { - register struct pcred *pc = p->p_cred; - struct ucred *new, *old; register u_int ngrp; - int error; + gid_t newgroups[NGROUPS] = { 0 }; + int error; + kauth_cred_t my_cred, my_new_cred; - if (error = suser(pc->pc_ucred, &p->p_acflag)) + if ((error = suser(p->p_ucred, &p->p_acflag))) return (error); - ngrp = uap->gidsetsize; + ngrp = gidsetsize; if (ngrp > NGROUPS) return (EINVAL); - new = crget(); - + if ( ngrp < 1 ) { ngrp = 1; } else { - error = copyin((caddr_t)uap->gidset, - (caddr_t)new->cr_groups, ngrp * sizeof(gid_t)); + error = copyin(gidset, + (caddr_t)newgroups, ngrp * sizeof(gid_t)); if (error) { - crfree(new); return (error); } } - new->cr_ngroups = ngrp; - AUDIT_ARG(groupset, new->cr_groups, ngrp); - pcred_writelock(p); - old = pc->pc_ucred; - new->cr_uid = old->cr_uid; - pc->pc_ucred = new; - pcred_unlock(p); + + /* get current credential and take a reference while we muck with it */ + for (;;) { + my_cred = kauth_cred_proc_ref(p); + + /* + * set the credential with new info. If there is no change we get back + * the same credential we passed in. + */ + my_new_cred = kauth_cred_setgroups(p->p_ucred, &newgroups[0], ngrp, gmuid); + if (my_cred != my_new_cred) { + proc_lock(p); + /* need to protect for a race where another thread also changed + * the credential after we took our reference. If p_ucred has + * changed then we should restart this again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + kauth_cred_rele(my_cred); + kauth_cred_rele(my_new_cred); + /* try again */ + continue; + } + p->p_ucred = my_new_cred; + p->p_flag |= P_SUGID; + proc_unlock(p); + } + /* drop our extra reference */ + kauth_cred_rele(my_cred); + break; + } + + AUDIT_ARG(groupset, p->p_ucred->cr_groups, ngrp); set_security_token(p); - p->p_flag |= P_SUGID; - if (old != NOCRED) - crfree(old); + return (0); } -#if COMPAT_43 -struct osetreuid_args{ - int ruid; - int euid; -}; -/* ARGSUSED */ -osetreuid(p, uap, retval) - register struct proc *p; - struct osetreuid_args *uap; - register_t *retval; +int +initgroups(struct proc *p, struct initgroups_args *uap, __unused register_t *retval) { - struct seteuid_args seuidargs; - struct setuid_args suidargs; + return(setgroups1(p, uap->gidsetsize, uap->gidset, uap->gmuid, retval)); +} - /* - * There are five cases, and we attempt to emulate them in - * the following fashion: - * -1, -1: return 0. This is correct emulation. - * -1, N: call seteuid(N). This is correct emulation. - * N, -1: if we called setuid(N), our euid would be changed - * to N as well. the theory is that we don't want to - * revoke root access yet, so we call seteuid(N) - * instead. This is incorrect emulation, but often - * suffices enough for binary compatibility. - * N, N: call setuid(N). This is correct emulation. - * N, M: call setuid(N). This is close to correct emulation. - */ - if (uap->ruid == (uid_t)-1) { - if (uap->euid == (uid_t)-1) - return (0); /* -1, -1 */ - seuidargs.euid = uap->euid; /* -1, N */ - return (seteuid(p, &seuidargs, retval)); - } - if (uap->euid == (uid_t)-1) { - seuidargs.euid = uap->ruid; /* N, -1 */ - return (seteuid(p, &seuidargs, retval)); - } - suidargs.uid = uap->ruid; /* N, N and N, M */ - return (setuid(p, &suidargs, retval)); +int +setgroups(struct proc *p, struct setgroups_args *uap, __unused register_t *retval) +{ + return(setgroups1(p, uap->gidsetsize, uap->gidset, KAUTH_UID_NONE, retval)); } -struct osetregid_args { - int rgid; - int egid; -}; -/* ARGSUSED */ -osetregid(p, uap, retval) - register struct proc *p; - struct osetregid_args *uap; - register_t *retval; +/* + * Set the per-thread/per-process supplementary groups list. + */ +#warning XXX implement +int +setsgroups(__unused struct proc *p, __unused struct setsgroups_args *uap, __unused register_t *retval) { - struct setegid_args segidargs; - struct setgid_args sgidargs; + return(ENOTSUP); +} - /* - * There are five cases, described above in osetreuid() - */ - if (uap->rgid == (gid_t)-1) { - if (uap->egid == (gid_t)-1) - return (0); /* -1, -1 */ - segidargs.egid = uap->egid; /* -1, N */ - return (setegid(p, &segidargs, retval)); - } - if (uap->egid == (gid_t)-1) { - segidargs.egid = uap->rgid; /* N, -1 */ - return (setegid(p, &segidargs, retval)); - } - sgidargs.gid = uap->rgid; /* N, N and N, M */ - return (setgid(p, &sgidargs, retval)); +/* + * Set the per-thread/per-process whiteout groups list. + */ +#warning XXX implement +int +setwgroups(__unused struct proc *p, __unused struct setwgroups_args *uap, __unused register_t *retval) +{ + return(ENOTSUP); } -#endif /* COMPAT_43 */ /* * Check if gid is a member of the group set. + * + * XXX This interface is going away */ -groupmember(gid, cred) - gid_t gid; - register struct ucred *cred; +int +groupmember(gid_t gid, kauth_cred_t cred) { - register gid_t *gp; - gid_t *egp; + int is_member; - egp = &(cred->cr_groups[cred->cr_ngroups]); - for (gp = cred->cr_groups; gp < egp; gp++) - if (*gp == gid) - return (1); + if (kauth_cred_ismember_gid(cred, gid, &is_member) == 0 && is_member) + return (1); return (0); } @@ -622,16 +829,17 @@ groupmember(gid, cred) * privilege; if so, and we have accounting info, set the flag * indicating use of super-powers. * Returns 0 or error. + * + * XXX This interface is going away */ -suser(cred, acflag) - struct ucred *cred; - u_short *acflag; +int +suser(kauth_cred_t cred, u_short *acflag) { #if DIAGNOSTIC if (cred == NOCRED || cred == FSCRED) panic("suser"); #endif - if (cred->cr_uid == 0) { + if (kauth_cred_getuid(cred) == 0) { if (acflag) *acflag |= ASU; return (0); @@ -659,148 +867,40 @@ is_suser1(void) return (0); return (suser(p->p_ucred, &p->p_acflag) == 0 || - p->p_cred->p_ruid == 0 || p->p_cred->p_svuid == 0); -} - -/* - * Allocate a zeroed cred structure. - */ -struct ucred * -crget() -{ - register struct ucred *cr; - - MALLOC_ZONE(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK); - bzero((caddr_t)cr, sizeof(*cr)); - cr->cr_ref = 1; - return (cr); -} - -/* - * Free a cred structure. - * Throws away space when ref count gets to 0. - */ -void -crfree(cr) - struct ucred *cr; -{ -#if DIAGNOSTIC - if (cr == NOCRED || cr == FSCRED) - panic("crfree"); -#endif - if (--cr->cr_ref == 0) - FREE_ZONE((caddr_t)cr, sizeof *cr, M_CRED); -} - -/* - * Copy cred structure to a new one and free the old one. - */ -struct ucred * -crcopy(cr) - struct ucred *cr; -{ - struct ucred *newcr; - -#if DIAGNOSTIC - if (cr == NOCRED || cr == FSCRED) - panic("crcopy"); -#endif - if (cr->cr_ref == 1) - return (cr); - newcr = crget(); - *newcr = *cr; - crfree(cr); - newcr->cr_ref = 1; - return (newcr); -} - -/* - * Dup cred struct to a new held one. - */ -struct ucred * -crdup(cr) - struct ucred *cr; -{ - struct ucred *newcr; - -#if DIAGNOSTIC - if (cr == NOCRED || cr == FSCRED) - panic("crdup"); -#endif - newcr = crget(); - *newcr = *cr; - newcr->cr_ref = 1; - return (newcr); -} - -/* - * compare two cred structs - */ -int -crcmp(cr1, cr2) - struct ucred *cr1; - struct ucred *cr2; -{ - int i; - - if (cr1 == cr2) - return 0; - if (cr1 == NOCRED || cr1 == FSCRED || - cr2 == NOCRED || cr2 == FSCRED) - return 1; - if (cr1->cr_uid != cr2->cr_uid) - return 1; - if (cr1->cr_ngroups != cr2->cr_ngroups) - return 1; - // XXX assumes groups will always be listed in some order - for (i=0; i < cr1->cr_ngroups; i++) - if (cr1->cr_groups[i] != cr2->cr_groups[i]) - return 1; - return (0); + p->p_ucred->cr_ruid == 0 || p->p_ucred->cr_svuid == 0); } /* * Get login name, if available. */ -struct getlogin_args { - char *namebuf; - u_int namelen; -}; /* ARGSUSED */ -getlogin(p, uap, retval) - struct proc *p; - struct getlogin_args *uap; - register_t *retval; +int +getlogin(struct proc *p, struct getlogin_args *uap, __unused register_t *retval) { if (uap->namelen > sizeof (p->p_pgrp->pg_session->s_login)) uap->namelen = sizeof (p->p_pgrp->pg_session->s_login); return (copyout((caddr_t) p->p_pgrp->pg_session->s_login, - (caddr_t)uap->namebuf, uap->namelen)); + uap->namebuf, uap->namelen)); } /* * Set login name. */ -struct setlogin_args { - char *namebuf; -}; /* ARGSUSED */ -setlogin(p, uap, retval) - struct proc *p; - struct setlogin_args *uap; - register_t *retval; +int +setlogin(struct proc *p, struct setlogin_args *uap, __unused register_t *retval) { int error; int dummy=0; - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(p->p_ucred, &p->p_acflag))) return (error); - error = copyinstr((caddr_t) uap->namebuf, + error = copyinstr(uap->namebuf, (caddr_t) p->p_pgrp->pg_session->s_login, sizeof (p->p_pgrp->pg_session->s_login) - 1, (size_t *)&dummy); - if(!error) + if (!error) AUDIT_ARG(text, p->p_pgrp->pg_session->s_login); else if (error == ENAMETOOLONG) error = EINVAL; @@ -809,14 +909,37 @@ setlogin(p, uap, retval) /* Set the secrity token of the task with current euid and eguid */ -kern_return_t +/* + * XXX This needs to change to give the task a reference and/or an opaque + * XXX identifier. + */ +int set_security_token(struct proc * p) { security_token_t sec_token; audit_token_t audit_token; - sec_token.val[0] = p->p_ucred->cr_uid; - sec_token.val[1] = p->p_ucred->cr_gid; + /* + * Don't allow a vfork child to override the parent's token settings + * (since they share a task). Instead, the child will just have to + * suffer along using the parent's token until the exec(). It's all + * undefined behavior anyway, right? + */ + if (p->task == current_task()) { + uthread_t uthread; + uthread = (uthread_t)get_bsdthread_info(current_thread()); + if (uthread->uu_flag & UT_VFORK) + return (1); + } + + /* XXX mach_init doesn't have a p_ucred when it calls this function */ + if (p->p_ucred != NOCRED && p->p_ucred != FSCRED) { + sec_token.val[0] = kauth_cred_getuid(p->p_ucred); + sec_token.val[1] = p->p_ucred->cr_gid; + } else { + sec_token.val[0] = 0; + sec_token.val[1] = 0; + } /* * The current layout of the Mach audit token explicitly @@ -827,36 +950,36 @@ set_security_token(struct proc * p) * the user of the trailer from future representation * changes. */ - audit_token.val[0] = p->p_au->ai_auid; + audit_token.val[0] = p->p_ucred->cr_au.ai_auid; audit_token.val[1] = p->p_ucred->cr_uid; - audit_token.val[2] = p->p_ucred->cr_gid; - audit_token.val[3] = p->p_cred->p_ruid; - audit_token.val[4] = p->p_cred->p_rgid; + audit_token.val[2] = p->p_ucred->cr_gid; + audit_token.val[3] = p->p_ucred->cr_ruid; + audit_token.val[4] = p->p_ucred->cr_rgid; audit_token.val[5] = p->p_pid; - audit_token.val[6] = p->p_au->ai_asid; - audit_token.val[7] = p->p_au->ai_termid.port; + audit_token.val[6] = p->p_ucred->cr_au.ai_asid; + audit_token.val[7] = p->p_ucred->cr_au.ai_termid.port; - return host_security_set_task_token(host_security_self(), + return (host_security_set_task_token(host_security_self(), p->task, sec_token, audit_token, (sec_token.val[0]) ? HOST_PRIV_NULL : - host_priv_self()); + host_priv_self()) != KERN_SUCCESS); } /* - * Fill in a struct xucred based on a struct ucred. + * Fill in a struct xucred based on a kauth_cred_t. */ __private_extern__ void -cru2x(struct ucred *cr, struct xucred *xcr) +cru2x(kauth_cred_t cr, struct xucred *xcr) { bzero(xcr, sizeof(*xcr)); xcr->cr_version = XUCRED_VERSION; - xcr->cr_uid = cr->cr_uid; + xcr->cr_uid = kauth_cred_getuid(cr); xcr->cr_ngroups = cr->cr_ngroups; bcopy(cr->cr_groups, xcr->cr_groups, sizeof(xcr->cr_groups)); } diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index 99b821e5e..6ce5a3874 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,25 +64,29 @@ #include #include #include -#include +#include #include #include -#include -#include +#include +#include +#include + +#include +#include #include -#include #include #include #include #include +#include #include -int donice __P((struct proc *curp, struct proc *chgp, int n)); -int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp)); +int donice(struct proc *curp, struct proc *chgp, int n); +int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp); rlim_t maxdmap = MAXDSIZ; /* XXX */ rlim_t maxsmap = MAXSSIZ; /* XXX */ @@ -106,19 +110,15 @@ SYSCTL_INT( _kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, /* * Resource controls and accounting. */ -struct getpriority_args { - int which; - int who; -}; int -getpriority(curp, uap, retval) - struct proc *curp; - register struct getpriority_args *uap; - register_t *retval; +getpriority(struct proc *curp, struct getpriority_args *uap, register_t *retval) { register struct proc *p; register int low = PRIO_MAX + 1; + if (uap->who < 0) + return (EINVAL); + switch (uap->which) { case PRIO_PROCESS: @@ -147,9 +147,9 @@ getpriority(curp, uap, retval) case PRIO_USER: if (uap->who == 0) - uap->who = curp->p_ucred->cr_uid; + uap->who = kauth_cred_getuid(kauth_cred_get()); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) - if (p->p_ucred->cr_uid == uap->who && + if (kauth_cred_getuid(p->p_ucred) == uap->who && p->p_nice < low) low = p->p_nice; break; @@ -163,17 +163,9 @@ getpriority(curp, uap, retval) return (0); } -struct setpriority_args { - int which; - int who; - int prio; -}; /* ARGSUSED */ int -setpriority(curp, uap, retval) - struct proc *curp; - register struct setpriority_args *uap; - register_t *retval; +setpriority(struct proc *curp, struct setpriority_args *uap, __unused register_t *retval) { register struct proc *p; int found = 0, error = 0; @@ -182,6 +174,9 @@ setpriority(curp, uap, retval) AUDIT_ARG(owner, uap->who, 0); AUDIT_ARG(value, uap->prio); + if (uap->who < 0) + return (EINVAL); + switch (uap->which) { case PRIO_PROCESS: @@ -212,9 +207,9 @@ setpriority(curp, uap, retval) case PRIO_USER: if (uap->who == 0) - uap->who = curp->p_ucred->cr_uid; + uap->who = kauth_cred_getuid(kauth_cred_get()); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) - if (p->p_ucred->cr_uid == uap->who) { + if (kauth_cred_getuid(p->p_ucred) == uap->who) { error = donice(curp, p, uap->prio); found++; } @@ -233,89 +228,33 @@ donice(curp, chgp, n) register struct proc *curp, *chgp; register int n; { - register struct pcred *pcred = curp->p_cred; + kauth_cred_t ucred = curp->p_ucred; - if (pcred->pc_ucred->cr_uid && pcred->p_ruid && - pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && - pcred->p_ruid != chgp->p_ucred->cr_uid) + if (suser(ucred, NULL) && ucred->cr_ruid && + kauth_cred_getuid(ucred) != kauth_cred_getuid(chgp->p_ucred) && + ucred->cr_ruid != kauth_cred_getuid(chgp->p_ucred)) return (EPERM); if (n > PRIO_MAX) n = PRIO_MAX; if (n < PRIO_MIN) n = PRIO_MIN; - if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) + if (n < chgp->p_nice && suser(ucred, &curp->p_acflag)) return (EACCES); chgp->p_nice = n; (void)resetpriority(chgp); return (0); } -#if COMPAT_43 -struct osetrlimit_args { - u_int which; - struct ogetrlimit * rlp; -}; -/* ARGSUSED */ -int -osetrlimit(p, uap, retval) - struct proc *p; - struct osetrlimit_args *uap; - register_t *retval; -{ - struct orlimit olim; - struct rlimit lim; - int error; - - if (error = copyin((caddr_t)uap->rlp, (caddr_t)&olim, - sizeof (struct orlimit))) - return (error); - lim.rlim_cur = olim.rlim_cur; - lim.rlim_max = olim.rlim_max; - return (dosetrlimit(p, uap->which, &lim)); -} -struct ogetrlimit_args { - u_int which; - struct ogetrlimit * rlp; -}; /* ARGSUSED */ int -ogetrlimit(p, uap, retval) - struct proc *p; - struct ogetrlimit_args *uap; - register_t *retval; -{ - struct orlimit olim; - - if (uap->which >= RLIM_NLIMITS) - return (EINVAL); - olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur; - if (olim.rlim_cur == -1) - olim.rlim_cur = 0x7fffffff; - olim.rlim_max = p->p_rlimit[uap->which].rlim_max; - if (olim.rlim_max == -1) - olim.rlim_max = 0x7fffffff; - return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, - sizeof(olim))); -} -#endif /* COMPAT_43 */ - -struct setrlimit_args { - u_int which; - struct rlimit * rlp; -}; -/* ARGSUSED */ -int -setrlimit(p, uap, retval) - struct proc *p; - register struct setrlimit_args *uap; - register_t *retval; +setrlimit(struct proc *p, register struct setrlimit_args *uap, __unused register_t *retval) { struct rlimit alim; int error; - if (error = copyin((caddr_t)uap->rlp, (caddr_t)&alim, - sizeof (struct rlimit))) + if ((error = copyin(uap->rlp, (caddr_t)&alim, + sizeof (struct rlimit)))) return (error); return (dosetrlimit(p, uap->which, &alim)); } @@ -327,7 +266,6 @@ dosetrlimit(p, which, limp) struct rlimit *limp; { register struct rlimit *alimp; - extern rlim_t maxdmap, maxsmap; int error; if (which >= RLIM_NLIMITS) @@ -335,7 +273,7 @@ dosetrlimit(p, which, limp) alimp = &p->p_rlimit[which]; if (limp->rlim_cur > alimp->rlim_max || limp->rlim_max > alimp->rlim_max) - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); if (limp->rlim_cur > limp->rlim_max) limp->rlim_cur = limp->rlim_max; @@ -366,9 +304,8 @@ dosetrlimit(p, which, limp) * up make more accessible, if going down make inaccessible. */ if (limp->rlim_cur != alimp->rlim_cur) { - vm_offset_t addr; - vm_size_t size; - vm_prot_t prot; + user_addr_t addr; + user_size_t size; if (limp->rlim_cur > alimp->rlim_cur) { /* grow stack */ @@ -377,13 +314,14 @@ dosetrlimit(p, which, limp) #if STACK_GROWTH_UP /* go to top of current stack */ - addr = trunc_page((unsigned int)(p->user_stack + alimp->rlim_cur)); + addr = p->user_stack + alimp->rlim_cur; #else STACK_GROWTH_UP - addr = trunc_page_32((unsigned int)(p->user_stack - alimp->rlim_cur)); + addr = p->user_stack - alimp->rlim_cur; addr -= size; #endif /* STACK_GROWTH_UP */ - if (vm_allocate(current_map(), - &addr, size, FALSE) != KERN_SUCCESS) + if (mach_vm_allocate(current_map(), + &addr, size, + VM_FLAGS_FIXED) != KERN_SUCCESS) return(EINVAL); } else { /* shrink stack */ @@ -434,22 +372,14 @@ dosetrlimit(p, which, limp) return (0); } -struct getrlimit_args { - u_int which; - struct rlimit * rlp; -}; /* ARGSUSED */ int -getrlimit(p, uap, retval) - struct proc *p; - register struct getrlimit_args *uap; - register_t *retval; +getrlimit(struct proc *p, register struct getrlimit_args *uap, __unused register_t *retval) { - if (uap->which >= RLIM_NLIMITS) return (EINVAL); return (copyout((caddr_t)&p->p_rlimit[uap->which], - (caddr_t)uap->rlp, sizeof (struct rlimit))); + uap->rlp, sizeof (struct rlimit))); } /* @@ -500,24 +430,22 @@ calcru(p, up, sp, ip) } } -struct getrusage_args { - int who; - struct rusage * rusage; -}; +__private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p); + /* ARGSUSED */ int -getrusage(p, uap, retval) - register struct proc *p; - register struct getrusage_args *uap; - register_t *retval; +getrusage(register struct proc *p, register struct getrusage_args *uap, __unused register_t *retval) { struct rusage *rup, rubuf; + struct user_rusage rubuf64; + size_t retsize = sizeof(rubuf); /* default: 32 bits */ + caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */ switch (uap->who) { - case RUSAGE_SELF: rup = &p->p_stats->p_ru; calcru(p, &rup->ru_utime, &rup->ru_stime, NULL); + // LP64todo: proc struct should have 64 bit version of struct rubuf = *rup; break; @@ -529,8 +457,12 @@ getrusage(p, uap, retval) default: return (EINVAL); } - return (copyout((caddr_t)&rubuf, (caddr_t)uap->rusage, - sizeof (struct rusage))); + if (IS_64BIT_PROCESS(p)) { + retsize = sizeof(rubuf64); + retbuf = (caddr_t)&rubuf64; + munge_rusage(&rubuf, &rubuf64); + } + return (copyout(retbuf, uap->rusage, retsize)); } void @@ -562,6 +494,8 @@ limcopy(lim) MALLOC_ZONE(copy, struct plimit *, sizeof(struct plimit), M_SUBPROC, M_WAITOK); + if (copy == NULL) + panic("limcopy"); bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct rlimit) * RLIM_NLIMITS); copy->p_lflags = 0; diff --git a/bsd/kern/kern_shutdown.c b/bsd/kern/kern_shutdown.c index f8568f9be..7c8eb53b7 100644 --- a/bsd/kern/kern_shutdown.c +++ b/bsd/kern/kern_shutdown.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,13 +30,12 @@ #include #include #include -#include +#include #include -#include #include #include -#include -#include +#include +#include #include #include #include @@ -55,7 +54,9 @@ #include #include #include +#include #include + #include int waittime = -1; @@ -70,6 +71,7 @@ boot(paniced, howto, command) struct proc *p = current_proc(); /* XXX */ int hostboot_option=0; int funnel_state; + struct proc *launchd_proc; static void proc_shutdown(); extern void md_prepare_for_shutdown(int paniced, int howto, char * command); @@ -96,24 +98,28 @@ boot(paniced, howto, command) sync(p, (void *)NULL, (int *)NULL); - /* Release vnodes from the VM object cache */ - ubc_unmountall(); + /* + * Now that all processes have been termianted and system is sync'ed up, + * suspend launchd + */ - IOSleep( 1 * 1000 ); + launchd_proc = pfind(1); + if (launchd_proc && p != launchd_proc) { + task_suspend(launchd_proc->task); + } /* * Unmount filesystems */ - if (panicstr == 0) - vfs_unmountall(); + vfs_unmountall(); /* Wait for the buffer cache to clean remaining dirty buffers */ - for (iter = 0; iter < 20; iter++) { + for (iter = 0; iter < 100; iter++) { nbusy = count_busy_buffers(); if (nbusy == 0) break; printf("%d ", nbusy); - IOSleep( 4 * nbusy ); + IOSleep( 1 * nbusy ); } if (nbusy) printf("giving up\n"); @@ -135,6 +141,16 @@ boot(paniced, howto, command) if (paniced == RB_PANIC) hostboot_option = HOST_REBOOT_HALT; + /* + * if we're going to power down due to a halt, + * give the disks a chance to finish getting + * the track cache flushed to the media... + * unfortunately, some of our earlier drives + * don't properly hold off on returning + * from the track flush command (issued by + * the unmounts) until it's actully fully + * committed. + */ if (hostboot_option == HOST_REBOOT_HALT) IOSleep( 1 * 1000 ); @@ -161,6 +177,7 @@ proc_shutdown() struct proc *p, *self; struct vnode **cdirp, **rdirp, *vp; int restart, i, TERM_catch; + int delayterm = 0; /* * Kill as many procs as we can. (Except ourself...) @@ -168,11 +185,13 @@ proc_shutdown() self = (struct proc *)current_proc(); /* - * Suspend /etc/init + * Signal the init with SIGTERM so that he does not launch + * new processes */ p = pfind(1); - if (p && p != self) - task_suspend(p->task); /* stop init */ + if (p && p != self) { + psignal(p, SIGTERM); + } printf("Killing all processes "); @@ -181,15 +200,19 @@ proc_shutdown() */ sigterm_loop: for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_shutdownstate == 0)) { + if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_stat != SZOMB) && (p->p_shutdownstate == 0)) { + + if ((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM)) { + continue; + } if (p->p_sigcatch & sigmask(SIGTERM)) { - p->p_shutdownstate = 1; + p->p_shutdownstate = 1; psignal(p, SIGTERM); goto sigterm_loop; - } } } + } /* * now wait for up to 30 seconds to allow those procs catching SIGTERM * to digest it @@ -201,23 +224,26 @@ sigterm_loop: * and then check to see if the tasks that were sent a * SIGTERM have exited */ - IOSleep(100); + IOSleep(100); TERM_catch = 0; - for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (p->p_shutdownstate == 1) - TERM_catch++; + for (p = allproc.lh_first; p; p = p->p_list.le_next) { + if (p->p_shutdownstate == 1) { + TERM_catch++; + } } if (TERM_catch == 0) break; } if (TERM_catch) { - /* + /* * log the names of the unresponsive tasks */ + for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (p->p_shutdownstate == 1) + if (p->p_shutdownstate == 1) { printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); + } } IOSleep(1000 * 5); } @@ -227,10 +253,13 @@ sigterm_loop: */ sigkill_loop: for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_shutdownstate != 2)) { - psignal(p, SIGKILL); + if (((p->p_flag&P_SYSTEM) == 0) && (p->p_pptr->p_pid != 0) && (p != self) && (p->p_stat != SZOMB) && (p->p_shutdownstate != 2)) { + + if ((delayterm == 0) && ((p->p_lflag& P_LDELAYTERM) == P_LDELAYTERM)) { + continue; + } + psignal(p, SIGKILL); p->p_shutdownstate = 2; - goto sigkill_loop; } } @@ -241,7 +270,7 @@ sigkill_loop: IOSleep(200); /* double the time from 100 to 200 for NFS requests in particular */ for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (p->p_shutdownstate == 2) + if (p->p_shutdownstate == 2) break; } if (!p) @@ -253,7 +282,8 @@ sigkill_loop: */ p = allproc.lh_first; while (p) { - if ((p->p_flag&P_SYSTEM) || (p->p_pptr->p_pid == 0) || (p == self)) { + if ((p->p_flag&P_SYSTEM) || (!delayterm && ((p->p_lflag& P_LDELAYTERM))) + || (p->p_pptr->p_pid == 0) || (p == self)) { p = p->p_list.le_next; } else { @@ -264,12 +294,11 @@ sigkill_loop: * understand the sig_lock. This needs to be fixed. * XXX */ - if (p->exit_thread) { /* someone already doing it */ - /* give him a chance */ - thread_block(THREAD_CONTINUE_NULL); - } - else { - p->exit_thread = current_act(); + if (p->exit_thread) { /* someone already doing it */ + /* give him a chance */ + thread_block(THREAD_CONTINUE_NULL); + } else { + p->exit_thread = current_thread(); printf("."); exit1(p, 1, (int *)NULL); } @@ -277,28 +306,13 @@ sigkill_loop: } } printf("\n"); - /* - * Forcibly free resources of what's left. - */ -#ifdef notyet - p = allproc.lh_first; - while (p) { - /* - * Close open files and release open-file table. - * This may block! - */ - /* panics on reboot due to "zfree: non-allocated memory in collectable zone" message */ - fdfree(p); - p = p->p_list.le_next; + + /* Now start the termination of processes that are marked for delayed termn */ + if (delayterm == 0) { + delayterm = 1; + goto sigterm_loop; } -#endif /* notyet */ - /* Wait for the reaper thread to run, and clean up what we have done - * before we proceed with the hardcore shutdown. This reduces the race - * between kill_tasks and the reaper thread. - */ - /* thread_wakeup(&reaper_queue); */ - /* IOSleep( 1 * 1000); */ printf("continuing\n"); } diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index 6c593326e..6313188a9 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,15 +63,13 @@ #define SIGPROP /* include signal properties table */ #include #include -#include -#include -#include +#include +#include #include #include #include -#include #include -#include +#include #include #include #include @@ -82,34 +80,62 @@ #include #include #include + #include +#include #include +#include + #include #include #include /* for coredump */ #include /* for APC support */ +#include +#include /* extern void *get_bsdtask_info(task_t); */ #include #include #include #include +#include +#include + +/* + * Missing prototypes that Mach should export + * + * +++ + */ +extern int thread_enable_fpe(thread_t act, int onoff); +extern void unix_syscall_return(int error); +extern thread_t port_name_to_thread(mach_port_name_t port_name); +extern kern_return_t check_actforsig(task_t task, thread_t thread, int setast); +extern kern_return_t get_signalact(task_t , thread_t *, int); +extern boolean_t thread_should_abort(thread_t); +extern unsigned int get_useraddr(void); + +/* + * --- + */ extern void doexception(int exc, int code, int sub); -void stop __P((struct proc *p)); -int cansignal __P((struct proc *, struct pcred *, struct proc *, int)); -int killpg1 __P((struct proc *, int, int, int)); -void sigexit_locked __P((struct proc *, int)); -int setsigvec __P((struct proc *, int, struct __sigaction *)); -void exit1 __P((struct proc *, int, int *)); -int signal_lock __P((struct proc *)); -int signal_unlock __P((struct proc *)); -void signal_setast __P((thread_act_t)); -void psignal_lock __P((struct proc *, int, int)); -void psignal_uthread __P((thread_act_t, int)); +void stop(struct proc *p); +int cansignal(struct proc *, kauth_cred_t, struct proc *, int); +int killpg1(struct proc *, int, int, int); +void sigexit_locked(struct proc *, int); +int setsigvec(struct proc *, int, struct __user_sigaction *); +void exit1(struct proc *, int, int *); +void psignal_uthread(thread_t, int); kern_return_t do_bsdexception(int, int, int); +void __posix_sem_syscall_return(kern_return_t); + +/* implementations in osfmk/kern/sync_sema.c. We do not want port.h in this scope, so void * them */ +kern_return_t semaphore_timedwait_signal_trap_internal(void *, void *,time_t, int32_t, void (*)(int)); +kern_return_t semaphore_timedwait_trap_internal(void *, time_t, int32_t, void (*)(int)); +kern_return_t semaphore_wait_signal_trap_internal(void *, void *, void (*)(int)); +kern_return_t semaphore_wait_trap_internal(void *, void (*)(int)); static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); @@ -118,8 +144,52 @@ static int filt_signal(struct knote *kn, long hint); struct filterops sig_filtops = { 0, filt_sigattach, filt_sigdetach, filt_signal }; + +/* + * NOTE: Source and target may *NOT* overlap! (target is smaller) + */ +static void +sigaltstack_64to32(struct user_sigaltstack *in, struct sigaltstack *out) +{ + out->ss_sp = CAST_DOWN(void *,in->ss_sp); + out->ss_size = in->ss_size; + out->ss_flags = in->ss_flags; +} + +/* + * NOTE: Source and target may are permitted to overlap! (source is smaller); + * this works because we copy fields in order from the end of the struct to + * the beginning. + */ +static void +sigaltstack_32to64(struct sigaltstack *in, struct user_sigaltstack *out) +{ + out->ss_flags = in->ss_flags; + out->ss_size = in->ss_size; + out->ss_sp = CAST_USER_ADDR_T(in->ss_sp); +} + +static void +sigaction_64to32(struct user_sigaction *in, struct sigaction *out) +{ + /* This assumes 32 bit __sa_handler is of type sig_t */ + out->__sigaction_u.__sa_handler = CAST_DOWN(sig_t,in->__sigaction_u.__sa_handler); + out->sa_mask = in->sa_mask; + out->sa_flags = in->sa_flags; +} + +static void +__sigaction_32to64(struct __sigaction *in, struct __user_sigaction *out) +{ + out->__sigaction_u.__sa_handler = CAST_USER_ADDR_T(in->__sigaction_u.__sa_handler); + out->sa_tramp = CAST_USER_ADDR_T(in->sa_tramp); + out->sa_mask = in->sa_mask; + out->sa_flags = in->sa_flags; +} + + #if SIGNAL_DEBUG -void ram_printf __P((int)); +void ram_printf(int); int ram_debug=0; unsigned int rdebug_proc=0; void @@ -155,8 +225,7 @@ int error = 0; #endif /* DIAGNOSTIC */ siglock_retry: - /* TBD: check p last arg */ - error = lockmgr(&p->signal_lock, LK_EXCLUSIVE, 0, (struct proc *)p); + error = lockmgr((struct lock__bsd__ *)&p->signal_lock[0], LK_EXCLUSIVE, 0, (struct proc *)0); if (error == EINTR) goto siglock_retry; return(error); @@ -186,23 +255,23 @@ signal_unlock(struct proc *p) #endif /* DIAGNOSTIC */ /* TBD: check p last arg */ - return(lockmgr(&p->signal_lock, LK_RELEASE, (simple_lock_t)0, (struct proc *)p)); + return(lockmgr((struct lock__bsd__ *)&p->signal_lock[0], LK_RELEASE, (simple_lock_t)0, (struct proc *)0)); } void signal_setast(sig_actthread) -thread_act_t sig_actthread; +thread_t sig_actthread; { act_set_astbsd(sig_actthread); } /* - * Can process p, with pcred pc, send the signal signum to process q? + * Can process p, with ucred uc, send the signal signum to process q? */ int -cansignal(p, pc, q, signum) +cansignal(p, uc, q, signum) struct proc *p; - struct pcred *pc; + kauth_cred_t uc; struct proc *q; int signum; { @@ -210,7 +279,7 @@ cansignal(p, pc, q, signum) if (p == q) return(1); - if (pc->pc_ucred->cr_uid == 0) + if (!suser(uc, NULL)) return (1); /* root can always signal */ if (signum == SIGCONT && q->p_session == p->p_session) @@ -233,10 +302,10 @@ cansignal(p, pc, q, signum) case SIGHUP: case SIGUSR1: case SIGUSR2: - if (pc->p_ruid == q->p_cred->p_ruid || - pc->pc_ucred->cr_uid == q->p_cred->p_ruid || - pc->p_ruid == q->p_ucred->cr_uid || - pc->pc_ucred->cr_uid == q->p_ucred->cr_uid) + if (uc->cr_ruid == q->p_ucred->cr_ruid || + kauth_cred_getuid(uc) == q->p_ucred->cr_ruid || + uc->cr_ruid == kauth_cred_getuid(q->p_ucred) || + kauth_cred_getuid(uc) == kauth_cred_getuid(q->p_ucred)) return (1); } return (0); @@ -246,34 +315,27 @@ cansignal(p, pc, q, signum) * because the P_SUGID test exists, this has extra tests which * could be removed. */ - if (pc->p_ruid == q->p_cred->p_ruid || - pc->p_ruid == q->p_cred->p_svuid || - pc->pc_ucred->cr_uid == q->p_cred->p_ruid || - pc->pc_ucred->cr_uid == q->p_cred->p_svuid || - pc->p_ruid == q->p_ucred->cr_uid || - pc->pc_ucred->cr_uid == q->p_ucred->cr_uid) + if (uc->cr_ruid == q->p_ucred->cr_ruid || + uc->cr_ruid == q->p_ucred->cr_svuid || + kauth_cred_getuid(uc) == q->p_ucred->cr_ruid || + kauth_cred_getuid(uc) == q->p_ucred->cr_svuid || + uc->cr_ruid == kauth_cred_getuid(q->p_ucred) || + kauth_cred_getuid(uc) == kauth_cred_getuid(q->p_ucred)) return (1); return (0); } -struct sigaction_args { - int signum; - struct __sigaction *nsa; - struct sigaction *osa; -}; /* ARGSUSED */ int -sigaction(p, uap, retval) - struct proc *p; - register struct sigaction_args *uap; - register_t *retval; +sigaction(struct proc *p, register struct sigaction_args *uap, __unused register_t *retval) { - struct sigaction vec; - struct __sigaction __vec; + struct user_sigaction vec; + struct __user_sigaction __vec; - register struct sigaction *sa; + struct user_sigaction *sa = &vec; register struct sigacts *ps = p->p_sigacts; + register int signum; int bit, error=0; @@ -281,7 +343,7 @@ sigaction(p, uap, retval) if (signum <= 0 || signum >= NSIG || signum == SIGKILL || signum == SIGSTOP) return (EINVAL); - sa = &vec; + if (uap->osa) { sa->sa_handler = ps->ps_sigact[signum]; sa->sa_mask = ps->ps_catchmask[signum]; @@ -301,13 +363,26 @@ sigaction(p, uap, retval) sa->sa_flags |= SA_NOCLDSTOP; if ((signum == SIGCHLD) && (p->p_flag & P_NOCLDWAIT)) sa->sa_flags |= SA_NOCLDWAIT; - if (error = copyout((caddr_t)sa, (caddr_t)uap->osa, - sizeof (vec))) + + if (IS_64BIT_PROCESS(p)) { + error = copyout(sa, uap->osa, sizeof(struct user_sigaction)); + } else { + struct sigaction vec32; + sigaction_64to32(sa, &vec32); + error = copyout(&vec32, uap->osa, sizeof(struct sigaction)); + } + if (error) return (error); } if (uap->nsa) { - if (error = copyin((caddr_t)uap->nsa, (caddr_t)&__vec, - sizeof (__vec))) + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->nsa, &__vec, sizeof(struct __user_sigaction)); + } else { + struct __sigaction __vec32; + error = copyin(uap->nsa, &__vec32, sizeof(struct __sigaction)); + __sigaction_32to64(&__vec32, &__vec); + } + if (error) return (error); error = setsigvec(p, signum, &__vec); } @@ -319,7 +394,7 @@ int clear_procsiglist(struct proc *p, int bit) { struct uthread * uth; - thread_act_t thact; + thread_t thact; signal_lock(p); @@ -342,11 +417,12 @@ clear_procsiglist(struct proc *p, int bit) return(0); } -int + +static int unblock_procsigmask(struct proc *p, int bit) { struct uthread * uth; - thread_act_t thact; + thread_t thact; signal_lock(p); if ((p->p_flag & P_INVFORK) && p->p_vforkact) { @@ -368,11 +444,11 @@ unblock_procsigmask(struct proc *p, int bit) } -int +static int block_procsigmask(struct proc *p, int bit) { struct uthread * uth; - thread_act_t thact; + thread_t thact; signal_lock(p); if ((p->p_flag & P_INVFORK) && p->p_vforkact) { @@ -392,11 +468,12 @@ block_procsigmask(struct proc *p, int bit) signal_unlock(p); return(0); } + int set_procsigmask(struct proc *p, int bit) { struct uthread * uth; - thread_act_t thact; + thread_t thact; signal_lock(p); if ((p->p_flag & P_INVFORK) && p->p_vforkact) { @@ -417,11 +494,9 @@ set_procsigmask(struct proc *p, int bit) return(0); } +/* XXX should be static? */ int -setsigvec(p, signum, sa) - register struct proc *p; - int signum; - register struct __sigaction *sa; +setsigvec(struct proc *p, int signum, struct __user_sigaction *sa) { register struct sigacts *ps = p->p_sigacts; register int bit; @@ -434,7 +509,7 @@ setsigvec(p, signum, sa) * Change setting atomically. */ ps->ps_sigact[signum] = sa->sa_handler; - ps->ps_trampact[signum] = (sig_t) sa->sa_tramp; + ps->ps_trampact[signum] = sa->sa_tramp; ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask; if (sa->sa_flags & SA_SIGINFO) ps->ps_siginfo |= bit; @@ -478,9 +553,9 @@ setsigvec(p, signum, sa) #ifdef __ppc__ if (signum == SIGFPE) { if (sa->sa_handler == SIG_DFL || sa->sa_handler == SIG_IGN) - thread_enable_fpe(current_act(), 0); + thread_enable_fpe(current_thread(), 0); else - thread_enable_fpe(current_act(), 1); + thread_enable_fpe(current_thread(), 1); } #endif /* __ppc__ */ /* @@ -527,7 +602,7 @@ siginit(p) void execsigs(p, thr_act) register struct proc *p; - register thread_act_t thr_act; + register thread_t thr_act; { register struct sigacts *ps = p->p_sigacts; register int nc, mask; @@ -560,7 +635,7 @@ execsigs(p, thr_act) */ ps->ps_sigstk.ss_flags = SA_DISABLE; ps->ps_sigstk.ss_size = 0; - ps->ps_sigstk.ss_sp = 0; + ps->ps_sigstk.ss_sp = USER_ADDR_NULL; ps->ps_flags = 0; } @@ -570,47 +645,39 @@ execsigs(p, thr_act) * and return old mask as return value; * the library stub does the rest. */ -struct sigprocmask_args { - int how; - sigset_t *mask; - sigset_t * omask; -}; int -sigprocmask(p, uap, retval) - register struct proc *p; - struct sigprocmask_args *uap; - register_t *retval; +sigprocmask(register struct proc *p, struct sigprocmask_args *uap, __unused register_t *retval) { int error = 0; sigset_t oldmask, nmask; - sigset_t * omask = uap->omask; + user_addr_t omask = uap->omask; struct uthread *ut; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); oldmask = ut->uu_sigmask; - if (uap->mask == (sigset_t *)0) { + if (uap->mask == USER_ADDR_NULL) { /* just want old mask */ goto out; } - error = copyin((caddr_t)uap->mask, &nmask, sizeof(sigset_t)); + error = copyin(uap->mask, &nmask, sizeof(sigset_t)); if (error) goto out; switch (uap->how) { case SIG_BLOCK: block_procsigmask(p, (nmask & ~sigcantmask)); - signal_setast(current_act()); + signal_setast(current_thread()); break; case SIG_UNBLOCK: unblock_procsigmask(p, (nmask & ~sigcantmask)); - signal_setast(current_act()); + signal_setast(current_thread()); break; case SIG_SETMASK: set_procsigmask(p, (nmask & ~sigcantmask)); - signal_setast(current_act()); + signal_setast(current_thread()); break; default: @@ -618,24 +685,18 @@ sigprocmask(p, uap, retval) break; } out: - if (!error && omask) + if (!error && omask != USER_ADDR_NULL) copyout(&oldmask, omask, sizeof(sigset_t)); return (error); } -struct sigpending_args { - struct sigvec *osv; -}; int -sigpending(p, uap, retval) - struct proc *p; - register struct sigpending_args *uap; - register_t *retval; +sigpending(__unused struct proc *p, register struct sigpending_args *uap, __unused register_t *retval) { struct uthread *ut; sigset_t pendlist; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); pendlist = ut->uu_siglist; if (uap->osv) @@ -643,95 +704,6 @@ sigpending(p, uap, retval) return(0); } -#if COMPAT_43 -/* - * Generalized interface signal handler, 4.3-compatible. - */ -struct osigvec_args { - int signum; - struct sigvec *nsv; - struct sigvec *osv; -}; -/* ARGSUSED */ -int -osigvec(p, uap, retval) - struct proc *p; - register struct osigvec_args *uap; - register_t *retval; -{ - struct sigvec __vec; - struct sigvec vec; - register struct sigacts *ps = p->p_sigacts; - register struct sigvec *sv; - register int signum; - int bit, error=0; - -#if 0 - signum = uap->signum; - if (signum <= 0 || signum >= NSIG || - signum == SIGKILL || signum == SIGSTOP) - return (EINVAL); - sv = &vec; - if (uap->osv) { - *(sig_t *)&sv->sv_handler = ps->ps_sigact[signum]; - sv->sv_mask = ps->ps_catchmask[signum]; - bit = sigmask(signum); - sv->sv_flags = 0; - if ((ps->ps_sigonstack & bit) != 0) - sv->sv_flags |= SV_ONSTACK; - if ((ps->ps_sigintr & bit) != 0) - sv->sv_flags |= SV_INTERRUPT; - if (p->p_flag & P_NOCLDSTOP) - sv->sv_flags |= SA_NOCLDSTOP; - if (error = copyout((caddr_t)sv, (caddr_t)uap->osv, - sizeof (vec))) - return (error); - } - if (uap->nsv) { - if (error = copyin((caddr_t)uap->nsv, (caddr_t)sv, - sizeof (vec))) - return (error); - sv->sv_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ - error = setsigvec(p, signum, (struct sigaction *)sv); - } -#else -error = ENOSYS; -#endif - return (error); -} - -struct osigblock_args { - int mask; -}; -int -osigblock(p, uap, retval) - register struct proc *p; - struct osigblock_args *uap; - register_t *retval; -{ - struct uthread * uth = get_bsdthread_info(current_act()); - - *retval = uth->uu_sigmask; - uth->uu_sigmask |= (uap->mask & ~sigcantmask); - return (0); -} - -struct osigsetmask_args { - int mask; -}; -int -osigsetmask(p, uap, retval) - struct proc *p; - struct osigsetmask_args *uap; - register_t *retval; -{ - struct uthread * uth = get_bsdthread_info(current_act()); - - *retval = uth->uu_sigmask; - uth->uu_sigmask = (uap->mask & ~sigcantmask); - return (0); -} -#endif /* COMPAT_43 */ /* * Suspend process until signal, providing mask to be set @@ -739,28 +711,19 @@ osigsetmask(p, uap, retval) * libc stub passes mask, not pointer, to save a copyin. */ -int -sigcontinue(error) +static int +sigcontinue(__unused int error) { - struct uthread *ut = get_bsdthread_info(current_act()); +// struct uthread *ut = get_bsdthread_info(current_thread()); unix_syscall_return(EINTR); } -struct sigsuspend_args { - sigset_t mask; -}; - -/* ARGSUSED */ int -sigsuspend(p, uap, retval) - register struct proc *p; - struct sigsuspend_args *uap; - register_t *retval; +sigsuspend(register struct proc *p, struct sigsuspend_args *uap, __unused register_t *retval) { - register struct sigacts *ps = p->p_sigacts; struct uthread *ut; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); /* * When returning from sigpause, we want @@ -770,56 +733,184 @@ sigsuspend(p, uap, retval) * to indicate this. */ ut->uu_oldmask = ut->uu_sigmask; - ut->uu_flag |= USAS_OLDMASK; + ut->uu_flag |= UT_SAS_OLDMASK; ut->uu_sigmask = (uap->mask & ~sigcantmask); (void) tsleep0((caddr_t) p, PPAUSE|PCATCH, "pause", 0, sigcontinue); /* always return EINTR rather than ERESTART... */ return (EINTR); } -struct __disable_thsignal_args { - int value; -}; int -__disable_threadsignal(p, uap, retval) - struct proc *p; - register struct __disable_thsignal_args *uap; - register_t *retval; +__disable_threadsignal(struct proc *p, + __unused register struct __disable_threadsignal_args *uap, + __unused register_t *retval) { struct uthread *uth; - uth = (struct uthread *)get_bsdthread_info(current_act()); + uth = (struct uthread *)get_bsdthread_info(current_thread()); /* No longer valid to have any signal delivered */ signal_lock(p); - uth->uu_flag |= UNO_SIGMASK; + uth->uu_flag |= UT_NO_SIGMASK; signal_unlock(p); return(0); } -struct pthread_kill_args { - void * thread_port; - int sig; -}; -int -__pthread_kill(p, uap, retval) +int +__pthread_markcancel(p, uap, retval) struct proc *p; - register struct pthread_kill_args *uap; + register struct __pthread_markcancel_args *uap; register_t *retval; { thread_act_t target_act; int error = 0; - int signum = uap->sig; struct uthread *uth; - target_act = (thread_act_t)port_name_to_act(uap->thread_port); + target_act = (thread_act_t)port_name_to_thread(uap->thread_port); if (target_act == THR_ACT_NULL) return (ESRCH); + + uth = (struct uthread *)get_bsdthread_info(target_act); + + /* if the thread is in vfork do not cancel */ + if ((uth->uu_flag & (P_VFORK | UT_CANCEL | UT_CANCELED )) == 0) { + uth->uu_flag |= (UT_CANCEL | UT_NO_SIGMASK); + if (((uth->uu_flag & UT_NOTCANCELPT) == 0) + && ((uth->uu_flag & UT_CANCELDISABLE) == 0)) + thread_abort_safely(target_act); + } + + thread_deallocate(target_act); + return (error); +} + +/* if action =0 ; return the cancellation state , + * if marked for cancellation, make the thread canceled + * if action = 1 ; Enable the cancel handling + * if action = 2; Disable the cancel handling + */ +int +__pthread_canceled(p, uap, retval) + struct proc *p; + register struct __pthread_canceled_args *uap; + register_t *retval; +{ + thread_act_t thr_act; + struct uthread *uth; + int action = uap->action; + + thr_act = current_act(); + uth = (struct uthread *)get_bsdthread_info(thr_act); + + switch (action) { + case 1: + uth->uu_flag &= ~UT_CANCELDISABLE; + return(0); + case 2: + uth->uu_flag |= UT_CANCELDISABLE; + return(0); + case 0: + default: + /* if the thread is in vfork do not cancel */ + if((uth->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + uth->uu_flag &= ~UT_CANCEL; + uth->uu_flag |= (UT_CANCELED | UT_NO_SIGMASK); + return(0); + } + return(EINVAL); + } + return(EINVAL); +} + +void +__posix_sem_syscall_return(kern_return_t kern_result) +{ + int error = 0; + + if (kern_result == KERN_SUCCESS) + error = 0; + else if (kern_result == KERN_ABORTED) + error = EINTR; + else if (kern_result == KERN_OPERATION_TIMED_OUT) + error = ETIMEDOUT; + else + error = EINVAL; + unix_syscall_return(error); + /* does not return */ +} + + +int +__semwait_signal(p, uap, retval) + struct proc *p; + register struct __semwait_signal_args *uap; + register_t *retval; +{ + + kern_return_t kern_result; + mach_timespec_t then; + struct timespec now; + + if(uap->timeout) { + + if (uap->relative) { + then.tv_sec = uap->tv_sec; + then.tv_nsec = uap->tv_nsec; + } else { + nanotime(&now); + then.tv_sec = uap->tv_sec - now.tv_sec; + then.tv_nsec = uap->tv_nsec - now.tv_nsec; + if (then.tv_nsec < 0) { + then.tv_nsec += NSEC_PER_SEC; + then.tv_sec--; + } + } + + if (uap->mutex_sem == (void *)NULL) + kern_result = semaphore_timedwait_trap_internal(uap->cond_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); + else + kern_result = semaphore_timedwait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); + + } else { + + if (uap->mutex_sem == (void *)NULL) + kern_result = semaphore_wait_trap_internal(uap->cond_sem, __posix_sem_syscall_return); + else + + kern_result = semaphore_wait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, __posix_sem_syscall_return); + } + +out: + if (kern_result == KERN_SUCCESS) + return(0); + else if (kern_result == KERN_ABORTED) + return(EINTR); + else if (kern_result == KERN_OPERATION_TIMED_OUT) + return(ETIMEDOUT); + else + return(EINVAL); +} + + +int +__pthread_kill(__unused struct proc *p, + register struct __pthread_kill_args *uap, + __unused register_t *retval) +{ + thread_t target_act; + int error = 0; + int signum = uap->sig; + struct uthread *uth; + + target_act = (thread_t)port_name_to_thread(uap->thread_port); + + if (target_act == THREAD_NULL) + return (ESRCH); if ((u_int)signum >= NSIG) { error = EINVAL; goto out; @@ -827,7 +918,7 @@ __pthread_kill(p, uap, retval) uth = (struct uthread *)get_bsdthread_info(target_act); - if (uth->uu_flag & UNO_SIGMASK) { + if (uth->uu_flag & UT_NO_SIGMASK) { error = ESRCH; goto out; } @@ -835,39 +926,32 @@ __pthread_kill(p, uap, retval) if (signum) psignal_uthread(target_act, signum); out: - act_deallocate(target_act); + thread_deallocate(target_act); return (error); } -struct pthread_sigmask_args { - int how; - const sigset_t *set; - sigset_t * oset; -}; int -pthread_sigmask(p, uap, retval) - register struct proc *p; - register struct pthread_sigmask_args *uap; - register_t *retval; +pthread_sigmask(__unused register struct proc *p, + register struct pthread_sigmask_args *uap, + __unused register_t *retval) { - int how = uap->how; - const sigset_t *set = uap->set; - sigset_t * oset = uap->oset; - const sigset_t nset; + user_addr_t set = uap->set; + user_addr_t oset = uap->oset; + sigset_t nset; int error = 0; struct uthread *ut; sigset_t oldset; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); oldset = ut->uu_sigmask; - if (set == (sigset_t *) 0) { + if (set == USER_ADDR_NULL) { /* need only old mask */ goto out; } - error = copyin((caddr_t)set, (caddr_t)&nset, sizeof(sigset_t)); + error = copyin(set, &nset, sizeof(sigset_t)); if (error) goto out; @@ -878,12 +962,12 @@ pthread_sigmask(p, uap, retval) case SIG_UNBLOCK: ut->uu_sigmask &= ~(nset); - signal_setast(current_act()); + signal_setast(current_thread()); break; case SIG_SETMASK: ut->uu_sigmask = (nset & ~sigcantmask); - signal_setast(current_act()); + signal_setast(current_thread()); break; default: @@ -891,40 +975,30 @@ pthread_sigmask(p, uap, retval) } out: - if (!error && oset) - copyout((caddr_t)&oldset, (caddr_t)oset, sizeof(sigset_t)); + if (!error && oset != USER_ADDR_NULL) + copyout(&oldset, oset, sizeof(sigset_t)); return(error); } -struct sigwait_args { - const sigset_t *set; - int *sig; -}; - int -sigwait(p, uap, retval) - register struct proc *p; - register struct sigwait_args *uap; - register_t *retval; +sigwait(register struct proc *p, register struct sigwait_args *uap, __unused register_t *retval) { - register struct sigacts *ps = p->p_sigacts; struct uthread *ut; struct uthread *uth; - thread_act_t thact; int error = 0; sigset_t mask; sigset_t siglist; sigset_t sigw=0; int signum; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); - if (uap->set == (const sigset_t *)0) + if (uap->set == USER_ADDR_NULL) return(EINVAL); - error = copyin((caddr_t)uap->set, (caddr_t)&mask, sizeof(sigset_t)); + error = copyin(uap->set, &mask, sizeof(sigset_t)); if (error) return(error); @@ -939,7 +1013,7 @@ sigwait(p, uap, retval) return(EINVAL); } else { TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { - if (sigw = uth->uu_siglist & siglist) { + if ( (sigw = uth->uu_siglist & siglist) ) { break; } } @@ -957,7 +1031,7 @@ sigwait(p, uap, retval) * to indicate this. */ ut->uu_oldmask = ut->uu_sigmask; - ut->uu_flag |= USAS_OLDMASK; + ut->uu_flag |= UT_SAS_OLDMASK; if (siglist == (sigset_t)0) return(EINVAL); /* SIGKILL and SIGSTOP are not maskable as well */ @@ -972,7 +1046,7 @@ sigwait(p, uap, retval) sigw = (ut->uu_sigwait & siglist); ut->uu_sigmask = ut->uu_oldmask; ut->uu_oldmask = 0; - ut->uu_flag &= ~USAS_OLDMASK; + ut->uu_flag &= ~UT_SAS_OLDMASK; sigwait1: ut->uu_sigwait = 0; if (!error) { @@ -980,7 +1054,7 @@ sigwait1: if (!signum) panic("sigwait with no signal wakeup"); ut->uu_siglist &= ~(sigmask(signum)); - if (uap->sig) + if (uap->sig != USER_ADDR_NULL) error = copyout(&signum, uap->sig, sizeof(int)); } @@ -988,65 +1062,38 @@ sigwait1: } -#if COMPAT_43 -struct osigstack_args { - struct sigstack *nss; - struct sigstack *oss; -}; - -/* ARGSUSED */ -int -osigstack(p, uap, retval) - struct proc *p; - register struct osigstack_args *uap; - register_t *retval; -{ - struct sigstack ss; - struct sigacts *psp; - int error = 0; - - psp = p->p_sigacts; - ss.ss_sp = psp->ps_sigstk.ss_sp; - ss.ss_onstack = psp->ps_sigstk.ss_flags & SA_ONSTACK; - if (uap->oss && (error = copyout((caddr_t)&ss, - (caddr_t)uap->oss, sizeof (struct sigstack)))) - return (error); - if (uap->nss && (error = copyin((caddr_t)uap->nss, - (caddr_t)&ss, sizeof (ss))) == 0) { - psp->ps_sigstk.ss_sp = ss.ss_sp; - psp->ps_sigstk.ss_size = 0; - psp->ps_sigstk.ss_flags |= ss.ss_onstack & SA_ONSTACK; - psp->ps_flags |= SAS_ALTSTACK; - } - return (error); -} -#endif /* COMPAT_43 */ -struct sigaltstack_args { - struct sigaltstack *nss; - struct sigaltstack *oss; -}; -/* ARGSUSED */ int -sigaltstack(p, uap, retval) - struct proc *p; - register struct sigaltstack_args *uap; - register_t *retval; +sigaltstack(struct proc *p, register struct sigaltstack_args *uap, __unused register_t *retval) { struct sigacts *psp; - struct sigaltstack ss; + struct user_sigaltstack ss; int error; psp = p->p_sigacts; if ((psp->ps_flags & SAS_ALTSTACK) == 0) psp->ps_sigstk.ss_flags |= SA_DISABLE; - if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk, - (caddr_t)uap->oss, sizeof (struct sigaltstack)))) - return (error); - if (uap->nss == 0) + if (uap->oss) { + if (IS_64BIT_PROCESS(p)) { + error = copyout(&psp->ps_sigstk, uap->oss, sizeof(struct user_sigaltstack)); + } else { + struct sigaltstack ss32; + sigaltstack_64to32(&psp->ps_sigstk, &ss32); + error = copyout(&ss32, uap->oss, sizeof(struct sigaltstack)); + } + if (error) + return (error); + } + if (uap->nss == USER_ADDR_NULL) return (0); - if (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, - sizeof (ss))) + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->nss, &ss, sizeof(struct user_sigaltstack)); + } else { + struct sigaltstack ss32; + error = copyin(uap->nss, &ss32, sizeof(struct sigaltstack)); + sigaltstack_32to64(&ss32,&ss); + } + if (error) return (error); if ((ss.ss_flags & ~SA_DISABLE) != 0) { return(EINVAL); @@ -1068,22 +1115,15 @@ sigaltstack(p, uap, retval) return (0); } -struct kill_args { - int pid; - int signum; -}; -/* ARGSUSED */ int -kill(cp, uap, retval) - register struct proc *cp; - register struct kill_args *uap; - register_t *retval; +kill(struct proc *cp, struct kill_args *uap, __unused register_t *retval) { register struct proc *p; - register struct pcred *pc = cp->p_cred; + kauth_cred_t uc = kauth_cred_get(); + + AUDIT_ARG(pid, uap->pid); + AUDIT_ARG(signum, uap->signum); - AUDIT_ARG(pid, uap->pid); - AUDIT_ARG(signum, uap->signum); if ((u_int)uap->signum >= NSIG) return (EINVAL); if (uap->pid > 0) { @@ -1099,7 +1139,7 @@ kill(cp, uap, retval) return (ESRCH); } AUDIT_ARG(process, p); - if (!cansignal(cp, pc, p, uap->signum)) + if (!cansignal(cp, uc, p, uap->signum)) return (EPERM); if (uap->signum) psignal(p, uap->signum); @@ -1116,26 +1156,6 @@ kill(cp, uap, retval) /* NOTREACHED */ } -#if COMPAT_43 -struct okillpg_args { - int pgid; - int signum; -}; -/* ARGSUSED */ -int -okillpg(p, uap, retval) - struct proc *p; - register struct okillpg_args *uap; - register_t *retval; -{ - - AUDIT_ARG(pid, uap->pgid); - AUDIT_ARG(signum, uap->signum); - if ((u_int)uap->signum >= NSIG) - return (EINVAL); - return (killpg1(p, uap->signum, uap->pgid, 0)); -} -#endif /* COMPAT_43 */ /* * Common code for kill process group/broadcast kill. @@ -1147,7 +1167,7 @@ killpg1(cp, signum, pgid, all) int signum, pgid, all; { register struct proc *p; - register struct pcred *pc = cp->p_cred; + kauth_cred_t uc = cp->p_ucred; struct pgrp *pgrp; int nfound = 0; @@ -1157,7 +1177,7 @@ killpg1(cp, signum, pgid, all) */ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || - p == cp || !cansignal(cp, pc, p, signum)) + p == cp || !cansignal(cp, uc, p, signum)) continue; nfound++; if (signum) @@ -1178,7 +1198,7 @@ killpg1(cp, signum, pgid, all) p = p->p_pglist.le_next) { if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p->p_stat == SZOMB || - !cansignal(cp, pc, p, signum)) + !cansignal(cp, uc, p, signum)) continue; nfound++; if (signum) @@ -1244,10 +1264,7 @@ tty_pgsignal(pgrp, signum) * Send a signal caused by a trap to a specific thread. */ void -threadsignal(sig_actthread, signum, code) - register thread_act_t sig_actthread; - register int signum; - u_long code; +threadsignal(thread_t sig_actthread, int signum, u_long code) { register struct uthread *uth; register struct task * sig_task; @@ -1264,7 +1281,7 @@ threadsignal(sig_actthread, signum, code) p = (struct proc *)(get_bsdtask_info(sig_task)); uth = get_bsdthread_info(sig_actthread); - if (uth && (uth->uu_flag & P_VFORK)) + if (uth && (uth->uu_flag & UT_VFORK)) p = uth->uu_proc; if (!(p->p_flag & P_TRACED) && (p->p_sigignore & mask)) @@ -1287,18 +1304,11 @@ psignal(p, signum) } void -psignal_vfork(p, new_task, thr_act, signum) - register struct proc *p; - task_t new_task; - thread_act_t thr_act; - register int signum; +psignal_vfork(struct proc *p, task_t new_task, thread_t thr_act, int signum) { - int withlock = 1; - int pend = 0; - register int s, prop; + register int prop; register sig_t action; int mask; - kern_return_t kret; struct uthread *uth; if ((u_int)signum >= NSIG || signum == 0) @@ -1312,7 +1322,7 @@ psignal_vfork(p, new_task, thr_act, signum) } #endif /* SIGNAL_DEBUG */ - if ((new_task == TASK_NULL) || (thr_act == (thread_act_t)NULL) || is_kerneltask(new_task)) + if ((new_task == TASK_NULL) || (thr_act == (thread_t)NULL) || is_kerneltask(new_task)) return; @@ -1394,13 +1404,13 @@ psigout: signal_unlock(p); } -thread_act_t +static thread_t get_signalthread(struct proc *p, int signum) { struct uthread *uth; - thread_act_t thr_act; + thread_t thr_act; sigset_t mask = sigmask(signum); - thread_act_t sig_thread_act; + thread_t sig_thread_act; struct task * sig_task = p->task; kern_return_t kret; @@ -1410,11 +1420,11 @@ get_signalthread(struct proc *p, int signum) if (kret == KERN_SUCCESS) return(sig_thread_act); else - return(THR_ACT_NULL); + return(THREAD_NULL); } TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { - if(((uth->uu_flag & UNO_SIGMASK)== 0) && + if(((uth->uu_flag & UT_NO_SIGMASK)== 0) && (((uth->uu_sigmask & mask) == 0) || (uth->uu_sigwait & mask))) { if (check_actforsig(p->task, uth->uu_act, 1) == KERN_SUCCESS) return(uth->uu_act); @@ -1424,7 +1434,7 @@ get_signalthread(struct proc *p, int signum) return(thr_act); } - return(THR_ACT_NULL); + return(THREAD_NULL); } /* @@ -1448,11 +1458,11 @@ psignal_lock(p, signum, withlock) { register int s, prop; register sig_t action; - thread_act_t sig_thread_act; + thread_t sig_thread_act; register task_t sig_task; int mask; struct uthread *uth; - kern_return_t kret; + boolean_t funnel_state = FALSE; int sw_funnel = 0; if ((u_int)signum >= NSIG || signum == 0) @@ -1466,9 +1476,9 @@ psignal_lock(p, signum, withlock) } #endif /* SIGNAL_DEBUG */ - if (thread_funnel_get() == (funnel_t *)network_flock) { + if (thread_funnel_get() == (funnel_t *)0) { sw_funnel = 1; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + funnel_state = thread_funnel_set(kernel_flock, TRUE); } /* * We will need the task pointer later. Grab it now to @@ -1477,7 +1487,7 @@ psignal_lock(p, signum, withlock) */ if (((sig_task = p->task) == TASK_NULL) || is_kerneltask(sig_task)) { if (sw_funnel) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + thread_funnel_set(kernel_flock, funnel_state); return; } @@ -1492,7 +1502,7 @@ psignal_lock(p, signum, withlock) */ if (ISSET(p->p_flag, P_REBOOT)) { if (sw_funnel) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + thread_funnel_set(kernel_flock, funnel_state); return; } @@ -1515,11 +1525,11 @@ psignal_lock(p, signum, withlock) /* If successful return with ast set */ sig_thread_act = get_signalthread(p, signum); - if (sig_thread_act == THR_ACT_NULL) { + if (sig_thread_act == THREAD_NULL) { /* XXXX FIXME - /* if it is sigkill, may be we should - * inject a thread to terminate - */ + * if it is sigkill, may be we should + * inject a thread to terminate + */ #if SIGNAL_DEBUG ram_printf(1); #endif /* SIGNAL_DEBUG */ @@ -1543,12 +1553,13 @@ psignal_lock(p, signum, withlock) */ if (p->p_sigignore & mask) goto psigout; + /* sigwait takes precedence */ if (uth->uu_sigwait & mask) - action = SIG_WAIT; - if (uth->uu_sigmask & mask) - action = SIG_HOLD; + action = KERN_SIG_WAIT; + else if (uth->uu_sigmask & mask) + action = KERN_SIG_HOLD; else if (p->p_sigcatch & mask) - action = SIG_CATCH; + action = KERN_SIG_CATCH; else action = SIG_DFL; } @@ -1583,7 +1594,7 @@ psignal_lock(p, signum, withlock) * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ - if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) { + if (action == KERN_SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) { goto psigout; } /* @@ -1608,14 +1619,16 @@ psignal_lock(p, signum, withlock) goto psigout; } - if (action == SIG_WAIT) { + if (action == KERN_SIG_WAIT) { uth->uu_sigwait = mask; uth->uu_siglist &= ~mask; p->p_siglist &= ~mask; wakeup(&uth->uu_sigwait); /* if it is SIGCONT resume whole process */ - if (prop & SA_CONT) + if (prop & SA_CONT) { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); + } goto psigout; } @@ -1630,6 +1643,7 @@ psignal_lock(p, signum, withlock) p->p_flag &= ~P_TTYSLEEP; wakeup(&p->p_siglist); } else { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); } p->p_stat = SRUN; @@ -1669,7 +1683,7 @@ psignal_lock(p, signum, withlock) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_STOPPED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; psignal(pp, SIGCHLD); } } @@ -1706,6 +1720,7 @@ psignal_lock(p, signum, withlock) p->p_flag &= ~P_TTYSLEEP; wakeup(&p->p_siglist); } else { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); } uth->uu_siglist &= ~mask; @@ -1745,20 +1760,20 @@ psigout: if (withlock) signal_unlock(p); if (sw_funnel) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + thread_funnel_set(kernel_flock, funnel_state); } /* psignal_lock(p, signum, withlock ) */ void psignal_uthread(thr_act, signum) - thread_act_t thr_act; + thread_t thr_act; int signum; { struct proc *p; - register int s, prop; + register int prop; register sig_t action; - thread_act_t sig_thread_act; + thread_t sig_thread_act; register task_t sig_task; int mask; struct uthread *uth; @@ -1836,12 +1851,13 @@ psignal_uthread(thr_act, signum) */ if (p->p_sigignore & mask) goto puthout; + /* sigwait takes precedence */ if (uth->uu_sigwait & mask) - action = SIG_WAIT; - if (uth->uu_sigmask & mask) - action = SIG_HOLD; + action = KERN_SIG_WAIT; + else if (uth->uu_sigmask & mask) + action = KERN_SIG_HOLD; else if (p->p_sigcatch & mask) - action = SIG_CATCH; + action = KERN_SIG_CATCH; else action = SIG_DFL; } @@ -1875,7 +1891,7 @@ psignal_uthread(thr_act, signum) * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ - if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) + if (action == KERN_SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) goto puthout; /* @@ -1900,14 +1916,16 @@ psignal_uthread(thr_act, signum) goto puthout; } - if (action == SIG_WAIT) { + if (action == KERN_SIG_WAIT) { uth->uu_sigwait = mask; uth->uu_siglist &= ~mask; p->p_siglist &= ~mask; wakeup(&uth->uu_sigwait); /* if it is SIGCONT resume whole process */ - if (prop & SA_CONT) + if (prop & SA_CONT) { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); + } goto puthout; } @@ -1917,8 +1935,10 @@ psignal_uthread(thr_act, signum) * Wake up the thread, but don't un-suspend it * (except for SIGCONT). */ - if (prop & SA_CONT) + if (prop & SA_CONT) { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); + } goto psurun; } else { /* Default action - varies */ @@ -1952,7 +1972,7 @@ psignal_uthread(thr_act, signum) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_STOPPED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; psignal(pp, SIGCHLD); } stop(p); @@ -1990,6 +2010,7 @@ psignal_uthread(thr_act, signum) p->p_flag &= ~P_TTYSLEEP; wakeup(&p->p_siglist); } else { + p->p_flag |= P_CONTINUED; (void) task_resume(sig_task); } uth->uu_siglist &= ~mask; @@ -2031,20 +2052,18 @@ puthout: __inline__ void -sig_lock_to_exit( - struct proc *p) +sig_lock_to_exit(struct proc *p) { - thread_t self = current_act(); + thread_t self = current_thread(); p->exit_thread = self; (void) task_suspend(p->task); } __inline__ int -sig_try_locked( - struct proc *p) +sig_try_locked(struct proc *p) { - thread_t self = current_act(); + thread_t self = current_thread(); while (p->sigwait || p->exit_thread) { if (p->exit_thread) { @@ -2090,14 +2109,11 @@ issignal(p) register struct proc *p; { register int signum, mask, prop, sigbits; - task_t task = p->task; - thread_act_t cur_act; - int s; + thread_t cur_act; struct uthread * ut; - kern_return_t kret; struct proc *pp; - cur_act = current_act(); + cur_act = current_thread(); #if SIGNAL_DEBUG if(rdebug_proc && (p == rdebug_proc)) { @@ -2138,7 +2154,6 @@ issignal(p) continue; } if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) { - register int hold; register task_t task; /* * If traced, always stop, and stay @@ -2151,7 +2166,7 @@ issignal(p) p->sigwait = TRUE; p->sigwait_thread = cur_act; p->p_stat = SSTOP; - p->p_flag &= ~P_WAITED; + p->p_flag &= ~(P_WAITED|P_CONTINUED); ut->uu_siglist &= ~mask; /* clear the old signal */ p->p_siglist &= ~mask; /* clear the old signal */ signal_unlock(p); @@ -2162,7 +2177,7 @@ issignal(p) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_TRAPPED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; psignal(pp, SIGCHLD); /* * XXX Have to really stop for debuggers; @@ -2176,7 +2191,7 @@ issignal(p) p->sigwait = TRUE; p->sigwait_thread = cur_act; p->p_stat = SSTOP; - p->p_flag &= ~P_WAITED; + p->p_flag &= ~(P_WAITED|P_CONTINUED); ut->uu_siglist &= ~mask; /* clear the old signal */ p->p_siglist &= ~mask; /* clear the old signal */ @@ -2203,7 +2218,7 @@ issignal(p) * clear it, since sig_lock_to_exit will * wait. */ - clear_wait(current_act(), THREAD_INTERRUPTED); + clear_wait(current_thread(), THREAD_INTERRUPTED); sig_lock_to_exit(p); /* * Since this thread will be resumed @@ -2220,7 +2235,7 @@ issignal(p) /* * We may have to quit */ - if (thread_should_abort(current_act())) { + if (thread_should_abort(current_thread())) { signal_unlock(p); return(0); } @@ -2287,7 +2302,7 @@ issignal(p) pp->si_pid = p->p_pid; pp->si_status = p->p_xstat; pp->si_code = CLD_STOPPED; - pp->si_uid = p->p_cred->p_ruid; + pp->si_uid = p->p_ucred->cr_ruid; psignal(pp, SIGCHLD); } } @@ -2339,14 +2354,12 @@ CURSIG(p) register struct proc *p; { register int signum, mask, prop, sigbits; - task_t task = p->task; - thread_act_t cur_act; - int s; + thread_t cur_act; struct uthread * ut; int retnum = 0; - cur_act = current_act(); + cur_act = current_thread(); ut = get_bsdthread_info(cur_act); @@ -2469,9 +2482,9 @@ stop(p) register struct proc *p; { p->p_stat = SSTOP; - p->p_flag &= ~P_WAITED; + p->p_flag &= ~(P_WAITED|P_CONTINUED); if (p->p_pptr->p_stat != SSTOP) - wakeup((caddr_t)p->p_pptr); + wakeup((caddr_t)p->p_pptr); (void) task_suspend(p->task); /*XXX*/ } @@ -2480,12 +2493,11 @@ stop(p) * from the current set of pending signals. */ void -postsig(signum) - register int signum; +postsig(int signum) { - register struct proc *p = current_proc(); - register struct sigacts *ps = p->p_sigacts; - register sig_t action; + struct proc *p = current_proc(); + struct sigacts *ps = p->p_sigacts; + user_addr_t catcher; u_long code; int mask, returnmask; struct uthread * ut; @@ -2509,20 +2521,21 @@ postsig(signum) return; } - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); mask = sigmask(signum); ut->uu_siglist &= ~mask; p->p_siglist &= ~mask; - action = ps->ps_sigact[signum]; + catcher = ps->ps_sigact[signum]; #if KTRACE + //LP64: catcher argument is a 64 bit user space handler address if (KTRPOINT(p, KTR_PSIG)) ktrpsig(p->p_tracep, - signum, action, ut->uu_flag & USAS_OLDMASK ? - &ut->uu_oldmask : &ut->uu_sigmask, 0, -1); + signum, CAST_DOWN(void *,catcher), ut->uu_flag & UT_SAS_OLDMASK ? + &ut->uu_oldmask : &ut->uu_sigmask, 0); #endif - if (action == SIG_DFL) { + if (catcher == SIG_DFL) { /* - * Default action, where the default is to kill + * Default catcher, where the default is to kill * the process. (Other cases were ignored above.) */ /* called with signal_lock() held */ @@ -2534,7 +2547,7 @@ postsig(signum) * If we get here, the signal must be caught. */ #if DIAGNOSTIC - if (action == SIG_IGN || (ut->uu_sigmask & mask)) + if (catcher == SIG_IGN || (ut->uu_sigmask & mask)) log(LOG_WARNING, "postsig: processing masked or ignored signal\n"); #endif @@ -2547,9 +2560,9 @@ postsig(signum) * mask from before the sigpause is what we want * restored after the signal processing is completed. */ - if (ut->uu_flag & USAS_OLDMASK) { + if (ut->uu_flag & UT_SAS_OLDMASK) { returnmask = ut->uu_oldmask; - ut->uu_flag &= ~USAS_OLDMASK; + ut->uu_flag &= ~UT_SAS_OLDMASK; ut->uu_oldmask = 0; } else returnmask = ut->uu_sigmask; @@ -2566,7 +2579,7 @@ postsig(signum) #ifdef __ppc__ /* Needs to disable to run in user mode */ if (signum == SIGFPE) { - thread_enable_fpe(current_act(), 0); + thread_enable_fpe(current_thread(), 0); } #endif /* __ppc__ */ @@ -2577,7 +2590,7 @@ postsig(signum) ps->ps_code = 0; } p->p_stats->p_ru.ru_nsignals++; - sendsig(p, action, signum, returnmask, code); + sendsig(p, catcher, signum, returnmask, code); } signal_unlock(p); } @@ -2601,10 +2614,12 @@ sigexit_locked(p, signum) p->p_acflag |= AXSIG; if (sigprop[signum] & SA_CORE) { p->p_sigacts->ps_sig = signum; + signal_unlock(p); if (coredump(p) == 0) signum |= WCOREFLAG; - } - signal_unlock(p); + } else + signal_unlock(p); + exit1(p, W_EXITCODE(0, signum), (int *)NULL); /* NOTREACHED */ } @@ -2645,21 +2660,22 @@ filt_signal(struct knote *kn, long hint) if (hint & NOTE_SIGNAL) { hint &= ~NOTE_SIGNAL; - if (kn->kn_id == hint) + if (kn->kn_id == (unsigned int)hint) kn->kn_data++; } return (kn->kn_data != 0); } + void -bsd_ast(thread_act_t thr_act) +bsd_ast(thread_t thr_act) { struct proc *p = current_proc(); struct uthread *ut = get_bsdthread_info(thr_act); int signum; - unsigned int pc; + user_addr_t pc; boolean_t funnel_state; - static bsd_init_done = 0; + static int bsd_init_done = 0; if (p == NULL) return; @@ -2672,13 +2688,11 @@ bsd_ast(thread_act_t thr_act) p->p_flag &= ~P_OWEUPC; } - if (CHECK_SIGNALS(p, current_act(), ut)) { - while (signum = issignal(p)) + if (CHECK_SIGNALS(p, current_thread(), ut)) { + while ( (signum = issignal(p)) ) postsig(signum); } if (!bsd_init_done) { - extern void bsdinit_task(void); - bsd_init_done = 1; bsdinit_task(); } @@ -2743,6 +2757,7 @@ task_t task; } } + kern_return_t do_bsdexception( int exc, @@ -2750,10 +2765,63 @@ do_bsdexception( int sub) { exception_data_type_t codes[EXCEPTION_CODE_MAX]; - extern kern_return_t bsd_exception(int, exception_data_type_t codes[], int); codes[0] = code; codes[1] = sub; return(bsd_exception(exc, codes, 2)); } +int +proc_pendingsignals(struct proc *p, sigset_t mask) +{ + struct uthread * uth; + thread_t th; + sigset_t bits = 0; + int error; + + /* If the process is in proc exit return no signal info */ + if (p->p_lflag & P_LPEXIT) + return(0); + + /* duplicate the signal lock code to enable recursion; as exit + * holds the lock too long. All this code is being reworked + * this is just a workaround for regressions till new code + * arrives. + */ +ppend_retry: + error = lockmgr((struct lock__bsd__ *)&p->signal_lock[0], (LK_EXCLUSIVE | LK_CANRECURSE), 0, (struct proc *)0); + if (error == EINTR) + goto ppend_retry; + + if ((p->p_flag & P_INVFORK) && p->p_vforkact) { + th = p->p_vforkact; + uth = (struct uthread *)get_bsdthread_info(th); + if (uth) { + bits = (((uth->uu_siglist & ~uth->uu_sigmask) & ~p->p_sigignore) & mask); + } + goto out; + } + + bits = 0; + TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { + bits |= (((uth->uu_siglist & ~uth->uu_sigmask) & ~p->p_sigignore) & mask); + } +out: + signal_unlock(p); + return(bits); +} + +int +thread_issignal(proc_t p, thread_t th, sigset_t mask) +{ + struct uthread * uth; + sigset_t bits=0; + + + uth = (struct uthread *)get_bsdthread_info(th); + if (uth) { + bits = (((uth->uu_siglist & ~uth->uu_sigmask) & ~p->p_sigignore) & mask); + } + return(bits); +} + diff --git a/bsd/kern/kern_subr.c b/bsd/kern/kern_subr.c index cc5b4382e..40e9f4c35 100644 --- a/bsd/kern/kern_subr.c +++ b/bsd/kern/kern_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,10 +62,12 @@ #include #include -#include +#include #include #include #include +#include +#include #include @@ -73,137 +75,224 @@ #define DBG_UIO_COPYOUT 16 #define DBG_UIO_COPYIN 17 +#if DEBUG +#include + +static int uio_t_count = 0; +#endif /* DEBUG */ + int uiomove(cp, n, uio) register caddr_t cp; register int n; - register struct uio *uio; + register uio_t uio; { return uiomove64((addr64_t)((unsigned int)cp), n, uio); } + // LP64todo - fix this! 'n' should be int64_t? int -uiomove64(addr64_t cp, int n, struct uio *uio) +uiomove64(addr64_t cp, int n, register struct uio *uio) { - register struct iovec *iov; - u_int cnt; +#if LP64KERN + register uint64_t acnt; +#else + register u_int acnt; +#endif int error = 0; #if DIAGNOSTIC if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) panic("uiomove: mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) - panic("uiomove proc"); #endif - while (n > 0 && uio->uio_resid) { - iov = uio->uio_iov; - cnt = iov->iov_len; - if (cnt == 0) { - uio->uio_iov++; +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + while (n > 0 && uio_resid(uio)) { + acnt = uio_iov_len(uio); + if (acnt == 0) { + uio_next_iov(uio); uio->uio_iovcnt--; continue; } - if (cnt > n) - cnt = n; + if (n > 0 && acnt > (uint64_t)n) + acnt = n; + switch (uio->uio_segflg) { + case UIO_USERSPACE64: + case UIO_USERISPACE64: + // LP64 - 3rd argument in debug code is 64 bit, expected to be 32 bit + if (uio->uio_rw == UIO_READ) + { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, + (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0); + + error = copyout( CAST_DOWN(caddr_t, cp), uio->uio_iovs.iov64p->iov_base, acnt ); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, + (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0); + } + else + { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, + (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0); + + error = copyin(uio->uio_iovs.iov64p->iov_base, CAST_DOWN(caddr_t, cp), acnt); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, + (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0); + } + if (error) + return (error); + break; + + case UIO_USERSPACE32: + case UIO_USERISPACE32: case UIO_USERSPACE: case UIO_USERISPACE: if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)iov->iov_base, cnt, 0,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0); - error = copyout( CAST_DOWN(caddr_t, cp), iov->iov_base, cnt ); + error = copyout( CAST_DOWN(caddr_t, cp), CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), acnt ); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)iov->iov_base, cnt, 0,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)iov->iov_base, (int)cp, cnt, 0,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0); - error = copyin(iov->iov_base, CAST_DOWN(caddr_t, cp), cnt); + error = copyin(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), CAST_DOWN(caddr_t, cp), acnt); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)iov->iov_base, (int)cp, cnt, 0,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0); } if (error) return (error); break; + case UIO_SYSSPACE32: case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) - error = copywithin(CAST_DOWN(caddr_t, cp), iov->iov_base, - cnt); + error = copywithin(CAST_DOWN(caddr_t, cp), (caddr_t)uio->uio_iovs.iov32p->iov_base, + acnt); else - error = copywithin(iov->iov_base, CAST_DOWN(caddr_t, cp), - cnt); + error = copywithin((caddr_t)uio->uio_iovs.iov32p->iov_base, CAST_DOWN(caddr_t, cp), + acnt); break; + case UIO_PHYS_USERSPACE64: + if (uio->uio_rw == UIO_READ) + { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, + (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0); + + error = copypv((addr64_t)cp, uio->uio_iovs.iov64p->iov_base, acnt, cppvPsrc | cppvNoRefSrc); + if (error) /* Copy physical to virtual */ + error = EFAULT; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, + (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0); + } + else + { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, + (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0); + + error = copypv(uio->uio_iovs.iov64p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk); + if (error) /* Copy virtual to physical */ + error = EFAULT; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, + (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0); + } + if (error) + return (error); + break; + + case UIO_PHYS_USERSPACE32: case UIO_PHYS_USERSPACE: if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)iov->iov_base, cnt, 1,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0); - if (error = copypv((addr64_t)cp, (addr64_t)((unsigned int)iov->iov_base), cnt, cppvPsrc | cppvNoRefSrc)) /* Copy physical to virtual */ + error = copypv((addr64_t)cp, (addr64_t)uio->uio_iovs.iov32p->iov_base, acnt, cppvPsrc | cppvNoRefSrc); + if (error) /* Copy physical to virtual */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)iov->iov_base, cnt, 1,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)iov->iov_base, (int)cp, cnt, 1,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0); - if (error = copypv((addr64_t)((unsigned int)iov->iov_base), (addr64_t)cp, cnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk)) /* Copy virtual to physical */ + error = copypv((addr64_t)uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk); + if (error) /* Copy virtual to physical */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)iov->iov_base, (int)cp, cnt, 1,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0); } if (error) return (error); break; + case UIO_PHYS_SYSSPACE32: case UIO_PHYS_SYSSPACE: if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)iov->iov_base, cnt, 2,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0); - if (error = copypv((addr64_t)cp, (addr64_t)((unsigned int)iov->iov_base), cnt, cppvKmap | cppvPsrc | cppvNoRefSrc)) /* Copy physical to virtual */ + error = copypv((addr64_t)cp, uio->uio_iovs.iov32p->iov_base, acnt, cppvKmap | cppvPsrc | cppvNoRefSrc); + if (error) /* Copy physical to virtual */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)iov->iov_base, cnt, 2,0); + (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)iov->iov_base, (int)cp, cnt, 2,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0); - if (error = copypv((addr64_t)((unsigned int)iov->iov_base), (addr64_t)cp, cnt, cppvKmap | cppvPsnk | cppvNoRefSrc | cppvNoModSnk)) /* Copy virtual to physical */ + error = copypv(uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvKmap | cppvPsnk | cppvNoRefSrc | cppvNoModSnk); + if (error) /* Copy virtual to physical */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)iov->iov_base, (int)cp, cnt, 2,0); + (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0); } if (error) return (error); break; + + default: + break; } - iov->iov_base += cnt; - iov->iov_len -= cnt; - uio->uio_resid -= cnt; - uio->uio_offset += cnt; - cp += cnt; - n -= cnt; + uio_iov_base_add(uio, acnt); +#if LP64KERN + uio_iov_len_add(uio, -((int64_t)acnt)); + uio_setresid(uio, (uio_resid(uio) - ((int64_t)acnt))); +#else + uio_iov_len_add(uio, -((int)acnt)); + uio_setresid(uio, (uio_resid(uio) - ((int)acnt))); +#endif + uio->uio_offset += acnt; + cp += acnt; + n -= acnt; } return (error); } @@ -216,38 +305,46 @@ ureadc(c, uio) register int c; register struct uio *uio; { - register struct iovec *iov; - - if (uio->uio_resid <= 0) + if (uio_resid(uio) <= 0) panic("ureadc: non-positive resid"); again: if (uio->uio_iovcnt == 0) panic("ureadc: non-positive iovcnt"); - iov = uio->uio_iov; - if (iov->iov_len <= 0) { + if (uio_iov_len(uio) <= 0) { uio->uio_iovcnt--; - uio->uio_iov++; + uio_next_iov(uio); goto again; } switch (uio->uio_segflg) { + case UIO_USERSPACE32: case UIO_USERSPACE: - if (subyte(iov->iov_base, c) < 0) + if (subyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), c) < 0) return (EFAULT); break; + case UIO_USERSPACE64: + if (subyte((user_addr_t)uio->uio_iovs.iov64p->iov_base, c) < 0) + return (EFAULT); + break; + + case UIO_SYSSPACE32: case UIO_SYSSPACE: - *iov->iov_base = c; + *((caddr_t)uio->uio_iovs.iov32p->iov_base) = c; break; + case UIO_USERISPACE32: case UIO_USERISPACE: - if (suibyte(iov->iov_base, c) < 0) + if (suibyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), c) < 0) return (EFAULT); break; + + default: + break; } - iov->iov_base++; - iov->iov_len--; - uio->uio_resid--; + uio_iov_base_add(uio, 1); + uio_iov_len_add(uio, -1); + uio_setresid(uio, (uio_resid(uio) - 1)); uio->uio_offset++; return (0); } @@ -257,36 +354,43 @@ again: /* * Get next character written in by user from uio. */ +int uwritec(uio) - struct uio *uio; + uio_t uio; { - register struct iovec *iov; - register int c; + register int c = 0; - if (uio->uio_resid <= 0) + if (uio_resid(uio) <= 0) return (-1); again: if (uio->uio_iovcnt <= 0) panic("uwritec: non-positive iovcnt"); - iov = uio->uio_iov; - if (iov->iov_len == 0) { - uio->uio_iov++; + + if (uio_iov_len(uio) == 0) { + uio_next_iov(uio); if (--uio->uio_iovcnt == 0) return (-1); goto again; } switch (uio->uio_segflg) { + case UIO_USERSPACE32: case UIO_USERSPACE: - c = fubyte(iov->iov_base); + c = fubyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base)); break; + case UIO_USERSPACE64: + c = fubyte((user_addr_t)uio->uio_iovs.iov64p->iov_base); + break; + + case UIO_SYSSPACE32: case UIO_SYSSPACE: - c = *iov->iov_base & 0377; + c = *((caddr_t)uio->uio_iovs.iov32p->iov_base) & 0377; break; + case UIO_USERISPACE32: case UIO_USERISPACE: - c = fuibyte(iov->iov_base); + c = fuibyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base)); break; default: @@ -296,9 +400,9 @@ again: } if (c < 0) return (-1); - iov->iov_base++; - iov->iov_len--; - uio->uio_resid--; + uio_iov_base_add(uio, 1); + uio_iov_len_add(uio, -1); + uio_setresid(uio, (uio_resid(uio) - 1)); uio->uio_offset++; return (c); } @@ -322,10 +426,806 @@ hashinit(elements, type, hashmask) continue; hashsize >>= 1; MALLOC(hashtbl, struct generic *, - (u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); - bzero(hashtbl, (u_long)hashsize * sizeof(*hashtbl)); - for (i = 0; i < hashsize; i++) - LIST_INIT(&hashtbl[i]); - *hashmask = hashsize - 1; + (u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK|M_ZERO); + if (hashtbl != NULL) { + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + } return (hashtbl); } + +/* + * uio_resid - return the residual IO value for the given uio_t + */ +user_ssize_t uio_resid( uio_t a_uio ) +{ +#if DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +/* if (IS_VALID_UIO_SEGFLG(a_uio->uio_segflg) == 0) { */ +/* panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); */ +/* } */ +#endif /* DEBUG */ + + /* return 0 if there are no active iovecs */ + if (a_uio == NULL) { + return( 0 ); + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + return( (user_ssize_t)a_uio->uio_resid ); +#else + return( a_uio->uio_resid_64 ); +#endif + } + return( (user_ssize_t)a_uio->uio_resid ); +} + +/* + * uio_setresid - set the residual IO value for the given uio_t + */ +void uio_setresid( uio_t a_uio, user_ssize_t a_value ) +{ +#if DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +/* if (IS_VALID_UIO_SEGFLG(a_uio->uio_segflg) == 0) { */ +/* panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); */ +/* } */ +#endif /* DEBUG */ + + if (a_uio == NULL) { + return; + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + a_uio->uio_resid = (int)a_value; +#else + a_uio->uio_resid_64 = a_value; +#endif + } + else { + a_uio->uio_resid = (int)a_value; + } + return; +} + +#if 0 // obsolete +/* + * uio_proc_t - return the proc_t for the given uio_t + * WARNING - This call is going away. Find another way to get the proc_t!! + */ +__private_extern__ proc_t uio_proc_t( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + /* return 0 if there are no active iovecs */ + if (a_uio == NULL) { + return( NULL ); + } + return( a_uio->uio_procp ); +} + +/* + * uio_setproc_t - set the residual IO value for the given uio_t + * WARNING - This call is going away. + */ +__private_extern__ void uio_setproc_t( uio_t a_uio, proc_t a_proc_t ) +{ + if (a_uio == NULL) { +#if LP64_DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return; + } + + a_uio->uio_procp = a_proc_t; + return; +} +#endif // obsolete + +/* + * uio_curriovbase - return the base address of the current iovec associated + * with the given uio_t. May return 0. + */ +user_addr_t uio_curriovbase( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL || a_uio->uio_iovcnt < 1) { + return(0); + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + return(a_uio->uio_iovs.uiovp->iov_base); + } + return((user_addr_t)((uintptr_t)a_uio->uio_iovs.kiovp->iov_base)); + +} + +/* + * uio_curriovlen - return the length value of the current iovec associated + * with the given uio_t. + */ +user_size_t uio_curriovlen( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL || a_uio->uio_iovcnt < 1) { + return(0); + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + return(a_uio->uio_iovs.uiovp->iov_len); + } + return((user_size_t)a_uio->uio_iovs.kiovp->iov_len); +} + +/* + * uio_setcurriovlen - set the length value of the current iovec associated + * with the given uio_t. + */ +__private_extern__ void uio_setcurriovlen( uio_t a_uio, user_size_t a_value ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return; + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + a_uio->uio_iovs.uiovp->iov_len = a_value; + } + else { +#if LP64_DEBUG + if (a_value > 0xFFFFFFFFull) { + panic("%s :%d - invalid a_value\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + a_uio->uio_iovs.kiovp->iov_len = (size_t)a_value; + } + return; +} + +/* + * uio_iovcnt - return count of active iovecs for the given uio_t + */ +int uio_iovcnt( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return(0); + } + + return( a_uio->uio_iovcnt ); +} + +/* + * uio_offset - return the current offset value for the given uio_t + */ +off_t uio_offset( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return(0); + } + return( a_uio->uio_offset ); +} + +/* + * uio_setoffset - set the current offset value for the given uio_t + */ +void uio_setoffset( uio_t a_uio, off_t a_offset ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return; + } + a_uio->uio_offset = a_offset; + return; +} + +/* + * uio_rw - return the read / write flag for the given uio_t + */ +int uio_rw( uio_t a_uio ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return(-1); + } + return( a_uio->uio_rw ); +} + +/* + * uio_setrw - set the read / write flag for the given uio_t + */ +void uio_setrw( uio_t a_uio, int a_value ) +{ + if (a_uio == NULL) { +#if LP64_DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return; + } + +#if LP64_DEBUG + if (!(a_value == UIO_READ || a_value == UIO_WRITE)) { + panic("%s :%d - invalid a_value\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_value == UIO_READ || a_value == UIO_WRITE) { + a_uio->uio_rw = a_value; + } + return; +} + +/* + * uio_isuserspace - return non zero value if the address space + * flag is for a user address space (could be 32 or 64 bit). + */ +int uio_isuserspace( uio_t a_uio ) +{ + if (a_uio == NULL) { +#if LP64_DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return(0); + } + + if (UIO_SEG_IS_USER_SPACE(a_uio->uio_segflg)) { + return( 1 ); + } + return( 0 ); +} + + +/* + * uio_create - create an uio_t. + * Space is allocated to hold up to a_iovcount number of iovecs. The uio_t + * is not fully initialized until all iovecs are added using uio_addiov calls. + * a_iovcount is the maximum number of iovecs you may add. + */ +uio_t uio_create( int a_iovcount, /* number of iovecs */ + off_t a_offset, /* current offset */ + int a_spacetype, /* type of address space */ + int a_iodirection ) /* read or write flag */ +{ + void * my_buf_p; + int my_size; + uio_t my_uio; + + my_size = sizeof(struct uio) + (sizeof(struct user_iovec) * a_iovcount); + my_buf_p = kalloc(my_size); + my_uio = uio_createwithbuffer( a_iovcount, + a_offset, + a_spacetype, + a_iodirection, + my_buf_p, + my_size ); + if (my_uio != 0) { + /* leave a note that we allocated this uio_t */ + my_uio->uio_flags |= UIO_FLAGS_WE_ALLOCED; +#if DEBUG + hw_atomic_add(&uio_t_count, 1); +#endif + } + + return( my_uio ); +} + + +/* + * uio_createwithbuffer - create an uio_t. + * Create a uio_t using the given buffer. The uio_t + * is not fully initialized until all iovecs are added using uio_addiov calls. + * a_iovcount is the maximum number of iovecs you may add. + * This call may fail if the given buffer is not large enough. + */ +__private_extern__ uio_t + uio_createwithbuffer( int a_iovcount, /* number of iovecs */ + off_t a_offset, /* current offset */ + int a_spacetype, /* type of address space */ + int a_iodirection, /* read or write flag */ + void *a_buf_p, /* pointer to a uio_t buffer */ + int a_buffer_size ) /* size of uio_t buffer */ +{ + uio_t my_uio = (uio_t) a_buf_p; + int my_size; + + my_size = sizeof(struct uio) + (sizeof(struct user_iovec) * a_iovcount); + if (a_buffer_size < my_size) { +#if DEBUG + panic("%s :%d - a_buffer_size is too small\n", __FILE__, __LINE__); +#endif /* DEBUG */ + return( NULL ); + } + my_size = a_buffer_size; + +#if DEBUG + if (my_uio == 0) { + panic("%s :%d - could not allocate uio_t\n", __FILE__, __LINE__); + } + if (!IS_VALID_UIO_SEGFLG(a_spacetype)) { + panic("%s :%d - invalid address space type\n", __FILE__, __LINE__); + } + if (!(a_iodirection == UIO_READ || a_iodirection == UIO_WRITE)) { + panic("%s :%d - invalid IO direction flag\n", __FILE__, __LINE__); + } + if (a_iovcount > UIO_MAXIOV) { + panic("%s :%d - invalid a_iovcount\n", __FILE__, __LINE__); + } +#endif /* DEBUG */ + + bzero(my_uio, my_size); + my_uio->uio_size = my_size; + + /* we use uio_segflg to indicate if the uio_t is the new format or */ + /* old (pre LP64 support) legacy format */ + switch (a_spacetype) { + case UIO_USERSPACE: + my_uio->uio_segflg = UIO_USERSPACE32; + case UIO_SYSSPACE: + my_uio->uio_segflg = UIO_SYSSPACE32; + case UIO_PHYS_USERSPACE: + my_uio->uio_segflg = UIO_PHYS_USERSPACE32; + case UIO_PHYS_SYSSPACE: + my_uio->uio_segflg = UIO_PHYS_SYSSPACE32; + default: + my_uio->uio_segflg = a_spacetype; + break; + } + + if (a_iovcount > 0) { + my_uio->uio_iovs.uiovp = (struct user_iovec *) + (((uint8_t *)my_uio) + sizeof(struct uio)); + } + else { + my_uio->uio_iovs.uiovp = NULL; + } + + my_uio->uio_max_iovs = a_iovcount; + my_uio->uio_offset = a_offset; + my_uio->uio_rw = a_iodirection; + my_uio->uio_flags = UIO_FLAGS_INITED; + + return( my_uio ); +} + +/* + * uio_spacetype - return the address space type for the given uio_t + */ +int uio_spacetype( uio_t a_uio ) +{ + if (a_uio == NULL) { +#if LP64_DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return(-1); + } + + return( a_uio->uio_segflg ); +} + +/* + * uio_iovsaddr - get the address of the iovec array for the given uio_t. + * This returns the location of the iovecs within the uio. + * NOTE - for compatibility mode we just return the current value in uio_iovs + * which will increase as the IO is completed and is NOT embedded within the + * uio, it is a seperate array of one or more iovecs. + */ +struct user_iovec * uio_iovsaddr( uio_t a_uio ) +{ + struct user_iovec * my_addr; + + if (a_uio == NULL) { + return(NULL); + } + + if (a_uio->uio_segflg == UIO_USERSPACE || a_uio->uio_segflg == UIO_SYSSPACE) { + /* we need this for compatibility mode. */ + my_addr = (struct user_iovec *) a_uio->uio_iovs.iovp; + } + else { + my_addr = (struct user_iovec *) (((uint8_t *)a_uio) + sizeof(struct uio)); + } + return(my_addr); +} + +/* + * uio_reset - reset an uio_t. + * Reset the given uio_t to initial values. The uio_t is not fully initialized + * until all iovecs are added using uio_addiov calls. + * The a_iovcount value passed in the uio_create is the maximum number of + * iovecs you may add. + */ +void uio_reset( uio_t a_uio, + off_t a_offset, /* current offset */ + int a_spacetype, /* type of address space */ + int a_iodirection ) /* read or write flag */ +{ + vm_size_t my_size; + int my_max_iovs; + u_int32_t my_old_flags; + +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - could not allocate uio_t\n", __FILE__, __LINE__); + } + if (!IS_VALID_UIO_SEGFLG(a_spacetype)) { + panic("%s :%d - invalid address space type\n", __FILE__, __LINE__); + } + if (!(a_iodirection == UIO_READ || a_iodirection == UIO_WRITE)) { + panic("%s :%d - invalid IO direction flag\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL) { + return; + } + + my_size = a_uio->uio_size; + my_old_flags = a_uio->uio_flags; + my_max_iovs = a_uio->uio_max_iovs; + bzero(a_uio, my_size); + a_uio->uio_size = my_size; + a_uio->uio_segflg = a_spacetype; + if (my_max_iovs > 0) { + a_uio->uio_iovs.uiovp = (struct user_iovec *) + (((uint8_t *)a_uio) + sizeof(struct uio)); + } + else { + a_uio->uio_iovs.uiovp = NULL; + } + a_uio->uio_max_iovs = my_max_iovs; + a_uio->uio_offset = a_offset; + a_uio->uio_rw = a_iodirection; + a_uio->uio_flags = my_old_flags; + + return; +} + +/* + * uio_free - free a uio_t allocated via uio_init. this also frees all + * associated iovecs. + */ +void uio_free( uio_t a_uio ) +{ +#if DEBUG + if (a_uio == NULL) { + panic("%s :%d - passing NULL uio_t\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio != NULL && (a_uio->uio_flags & UIO_FLAGS_WE_ALLOCED) != 0) { +#if DEBUG + if ((int)(hw_atomic_sub(&uio_t_count, 1)) < 0) { + panic("%s :%d - uio_t_count has gone negative\n", __FILE__, __LINE__); + } +#endif + kfree(a_uio, a_uio->uio_size); + } + + +} + +/* + * uio_addiov - add an iovec to the given uio_t. You may call this up to + * the a_iovcount number that was passed to uio_create. This call will + * increment the residual IO count as iovecs are added to the uio_t. + * returns 0 if add was successful else non zero. + */ +int uio_addiov( uio_t a_uio, user_addr_t a_baseaddr, user_size_t a_length ) +{ + int i; + + if (a_uio == NULL) { +#if DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return(-1); + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { + if (a_uio->uio_iovs.uiovp[i].iov_len == 0 && a_uio->uio_iovs.uiovp[i].iov_base == 0) { + a_uio->uio_iovs.uiovp[i].iov_len = a_length; + a_uio->uio_iovs.uiovp[i].iov_base = a_baseaddr; + a_uio->uio_iovcnt++; +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + a_uio->uio_resid += a_length; +#else + a_uio->uio_resid_64 += a_length; +#endif + return( 0 ); + } + } + } + else { + for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { + if (a_uio->uio_iovs.kiovp[i].iov_len == 0 && a_uio->uio_iovs.kiovp[i].iov_base == 0) { + a_uio->uio_iovs.kiovp[i].iov_len = (u_int32_t)a_length; + a_uio->uio_iovs.kiovp[i].iov_base = (u_int32_t)((uintptr_t)a_baseaddr); + a_uio->uio_iovcnt++; + a_uio->uio_resid += a_length; + return( 0 ); + } + } + } + + return( -1 ); +} + +/* + * uio_getiov - get iovec data associated with the given uio_t. Use + * a_index to iterate over each iovec (0 to (uio_iovcnt(uio_t) - 1)). + * a_baseaddr_p and a_length_p may be NULL. + * returns -1 when a_index is >= uio_t.uio_iovcnt or invalid uio_t. + * returns 0 when data is returned. + */ +int uio_getiov( uio_t a_uio, + int a_index, + user_addr_t * a_baseaddr_p, + user_size_t * a_length_p ) +{ + if (a_uio == NULL) { +#if DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* DEBUG */ + return(-1); + } + if ( a_index < 0 || a_index >= a_uio->uio_iovcnt) { + return(-1); + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (a_baseaddr_p != NULL) { + *a_baseaddr_p = a_uio->uio_iovs.uiovp[a_index].iov_base; + } + if (a_length_p != NULL) { + *a_length_p = a_uio->uio_iovs.uiovp[a_index].iov_len; + } + } + else { + if (a_baseaddr_p != NULL) { + *a_baseaddr_p = a_uio->uio_iovs.kiovp[a_index].iov_base; + } + if (a_length_p != NULL) { + *a_length_p = a_uio->uio_iovs.kiovp[a_index].iov_len; + } + } + + return( 0 ); +} + +/* + * uio_calculateresid - runs through all iovecs associated with this + * uio_t and calculates (and sets) the residual IO count. + */ +__private_extern__ void uio_calculateresid( uio_t a_uio ) +{ + int i; + + if (a_uio == NULL) { +#if LP64_DEBUG + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); +#endif /* LP64_DEBUG */ + return; + } + + a_uio->uio_iovcnt = 0; + if (UIO_IS_64_BIT_SPACE(a_uio)) { +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + a_uio->uio_resid = 0; +#else + a_uio->uio_resid_64 = 0; +#endif + for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { + if (a_uio->uio_iovs.uiovp[i].iov_len != 0 && a_uio->uio_iovs.uiovp[i].iov_base != 0) { + a_uio->uio_iovcnt++; +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + a_uio->uio_resid += a_uio->uio_iovs.uiovp[i].iov_len; +#else + a_uio->uio_resid_64 += a_uio->uio_iovs.uiovp[i].iov_len; +#endif + } + } + } + else { + a_uio->uio_resid = 0; + for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { + if (a_uio->uio_iovs.kiovp[i].iov_len != 0 && a_uio->uio_iovs.kiovp[i].iov_base != 0) { + a_uio->uio_iovcnt++; + a_uio->uio_resid += a_uio->uio_iovs.kiovp[i].iov_len; + } + } + } + return; +} + +/* + * uio_update - update the given uio_t for a_count of completed IO. + * This call decrements the current iovec length and residual IO value + * and increments the current iovec base address and offset value. + * If the current iovec length is 0 then advance to the next + * iovec (if any). + */ +void uio_update( uio_t a_uio, user_size_t a_count ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } + if (UIO_IS_32_BIT_SPACE(a_uio) && a_count > 0xFFFFFFFFull) { + panic("%s :%d - invalid count value \n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL || a_uio->uio_iovcnt < 1) { + return; + } + + if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (a_count > a_uio->uio_iovs.uiovp->iov_len) { + a_uio->uio_iovs.uiovp->iov_base += a_uio->uio_iovs.uiovp->iov_len; + a_uio->uio_iovs.uiovp->iov_len = 0; + } + else { + a_uio->uio_iovs.uiovp->iov_base += a_count; + a_uio->uio_iovs.uiovp->iov_len -= a_count; + } +#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI + if (a_uio->uio_resid < 0) { + a_uio->uio_resid = 0; + } + if (a_count > (user_size_t)a_uio->uio_resid) { + a_uio->uio_offset += a_uio->uio_resid; + a_uio->uio_resid = 0; + } + else { + a_uio->uio_offset += a_count; + a_uio->uio_resid -= a_count; + } +#else + if (a_uio->uio_resid_64 < 0) { + a_uio->uio_resid_64 = 0; + } + if (a_count > (user_size_t)a_uio->uio_resid_64) { + a_uio->uio_offset += a_uio->uio_resid_64; + a_uio->uio_resid_64 = 0; + } + else { + a_uio->uio_offset += a_count; + a_uio->uio_resid_64 -= a_count; + } +#endif // LP64todo + + /* advance to next iovec if current one is totally consumed */ + while (a_uio->uio_iovcnt > 0 && a_uio->uio_iovs.uiovp->iov_len == 0) { + a_uio->uio_iovcnt--; + if (a_uio->uio_iovcnt > 0) { + a_uio->uio_iovs.uiovp++; + } + } + } + else { + if (a_count > a_uio->uio_iovs.kiovp->iov_len) { + a_uio->uio_iovs.kiovp->iov_base += a_uio->uio_iovs.kiovp->iov_len; + a_uio->uio_iovs.kiovp->iov_len = 0; + } + else { + a_uio->uio_iovs.kiovp->iov_base += a_count; + a_uio->uio_iovs.kiovp->iov_len -= a_count; + } + if (a_uio->uio_resid < 0) { + a_uio->uio_resid = 0; + } + if (a_count > (user_size_t)a_uio->uio_resid) { + a_uio->uio_offset += a_uio->uio_resid; + a_uio->uio_resid = 0; + } + else { + a_uio->uio_offset += a_count; + a_uio->uio_resid -= a_count; + } + + /* advance to next iovec if current one is totally consumed */ + while (a_uio->uio_iovcnt > 0 && a_uio->uio_iovs.kiovp->iov_len == 0) { + a_uio->uio_iovcnt--; + if (a_uio->uio_iovcnt > 0) { + a_uio->uio_iovs.kiovp++; + } + } + } + return; +} + + +/* + * uio_duplicate - allocate a new uio and make a copy of the given uio_t. + * may return NULL. + */ +uio_t uio_duplicate( uio_t a_uio ) +{ + uio_t my_uio; + int i; + + if (a_uio == NULL) { + return(NULL); + } + + my_uio = (uio_t) kalloc(a_uio->uio_size); + if (my_uio == 0) { + panic("%s :%d - allocation failed\n", __FILE__, __LINE__); + } + + bcopy((void *)a_uio, (void *)my_uio, a_uio->uio_size); + /* need to set our iovec pointer to point to first active iovec */ + if (my_uio->uio_max_iovs > 0) { + my_uio->uio_iovs.uiovp = (struct user_iovec *) + (((uint8_t *)my_uio) + sizeof(struct uio)); + + /* advance to first nonzero iovec */ + if (my_uio->uio_iovcnt > 0) { + for ( i = 0; i < my_uio->uio_max_iovs; i++ ) { + if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (my_uio->uio_iovs.uiovp->iov_len != 0) { + break; + } + my_uio->uio_iovs.uiovp++; + } + else { + if (my_uio->uio_iovs.kiovp->iov_len != 0) { + break; + } + my_uio->uio_iovs.kiovp++; + } + } + } + } + + return(my_uio); +} + diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 9091fca23..ed56bd1cd 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,17 +23,22 @@ * * File: bsd/kern/kern_symfile.c * - * This file contains creates a dummy symbol file for mach_kernel based on - * the symbol table information passed by the SecondaryLoader/PlatformExpert. - * This allows us to correctly link other executables (drivers, etc) against the - * the kernel in cases where the kernel image on the root device does not match - * the live kernel. This can occur during net-booting where the actual kernel - * image is obtained from the network via tftp rather than the root - * device. + * This file contains creates a dummy symbol file for mach_kernel + * based on the symbol table information passed by the + * SecondaryLoader/PlatformExpert. This allows us to correctly + * link other executables (drivers, etc) against the the kernel in + * cases where the kernel image on the root device does not match + * the live kernel. This can occur during net-booting where the + * actual kernel image is obtained from the network via tftp rather + * than the root device. * - * If a symbol table is available, then the file /mach.sym will be created - * containing a Mach Header and a LC_SYMTAB load command followed by the - * the symbol table data for mach_kernel. + * If a symbol table is available, then the file /mach.sym will be + * created containing a Mach Header and a LC_SYMTAB load command + * followed by the the symbol table data for mach_kernel. + * + * NOTE: This file supports only 32 bit kernels at the present time; + * adding support for 64 bit kernels is possible, but is not + * necessary at the present time. * * HISTORY * @@ -47,21 +52,25 @@ #include #include #include -#include -#include +#include +#include +#include #include #include -#include #include -#include +#include #include #include #include +#include +#include #include #include +#include #include +#include extern unsigned char rootdevice[]; extern struct mach_header _mh_execute_header; @@ -73,15 +82,15 @@ extern int IODTGetLoaderInfo(char *key, void **infoAddr, int *infoSize); extern void IODTFreeLoaderInfo(char *key, void *infoAddr, int infoSize); /* - * + * Can only operate against currently running 32 bit mach_kernel */ -static int output_kernel_symbols(struct proc *p) +static int +output_kernel_symbols(struct proc *p) { struct vnode *vp; - struct pcred *pcred = p->p_cred; - struct ucred *cred = pcred->pc_ucred; - struct nameidata nd; - struct vattr vattr; + kauth_cred_t cred = p->p_ucred; /* XXX */ + struct vnode_attr va; + struct vfs_context context; struct load_command *cmd; struct mach_header *orig_mh, *mh; struct segment_command *orig_ds, *orig_ts, *orig_le, *sg; @@ -90,9 +99,9 @@ static int output_kernel_symbols(struct proc *p) struct nlist *sym; vm_size_t orig_mhsize, orig_st_size; vm_offset_t header; - vm_size_t header_size; + vm_size_t header_size = 0; /* out: protected by header */ int error, error1; - int i, j; + unsigned int i, j; caddr_t addr; vm_offset_t offset; int rc_mh, rc_sc; @@ -117,28 +126,29 @@ static int output_kernel_symbols(struct proc *p) IODTFreeLoaderInfo("Kernel-__SYMTAB", (void *)orig_st, round_page_32(orig_st_size)); - if (pcred->p_svuid != pcred->p_ruid || pcred->p_svgid != pcred->p_rgid) + if (cred->cr_svuid != cred->cr_ruid || cred->cr_svgid != cred->cr_rgid) goto out; // Check to see if the root is 'e' or 'n', is this a test for network? if (rootdevice[0] == 'e' && rootdevice[1] == 'n') goto out; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "mach.sym", p); - if((error = vn_open(&nd, O_CREAT | FWRITE, S_IRUSR | S_IRGRP | S_IROTH))) goto out; + context.vc_proc = p; + context.vc_ucred = cred; + + if ((error = vnode_open("mach.sym", (O_CREAT | FWRITE), (S_IRUSR | S_IRGRP | S_IROTH), 0, &vp, &context))) + goto out; - vp = nd.ni_vp; - /* Don't dump to non-regular files or files with links. */ error = EFAULT; - if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred, p) - || vattr.va_nlink != 1) + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); + if ((vp->v_type != VREG) || vnode_getattr(vp, &va, &context) || (va.va_nlink != 1)) goto out; - VATTR_NULL(&vattr); - vattr.va_size = 0; - VOP_LEASE(vp, p, cred, LEASE_WRITE); - VOP_SETATTR(vp, &vattr, cred, p); + VATTR_INIT(&va); /* better to do it here than waste more stack in vnode_getsize */ + VATTR_SET(&va, va_data_size, 0); + vnode_setattr(vp, &va, &context); p->p_acflag |= ACORE; // If the file type is MH_EXECUTE then this must be a kernel @@ -149,14 +159,14 @@ static int output_kernel_symbols(struct proc *p) cmd = (struct load_command *) &orig_mh[1]; for (i = 0; i < orig_mh->ncmds; i++) { if (cmd->cmd == LC_SEGMENT) { - struct segment_command *sg = (struct segment_command *) cmd; + struct segment_command *orig_sg = (struct segment_command *) cmd; - if (!strcmp(SEG_TEXT, sg->segname)) - orig_ts = sg; - else if (!strcmp(SEG_DATA, sg->segname)) - orig_ds = sg; - else if (!strcmp(SEG_LINKEDIT, sg->segname)) - orig_le = sg; + if (!strcmp(SEG_TEXT, orig_sg->segname)) + orig_ts = orig_sg; + else if (!strcmp(SEG_DATA, orig_sg->segname)) + orig_ds = orig_sg; + else if (!strcmp(SEG_LINKEDIT, orig_sg->segname)) + orig_le = orig_sg; } else if (cmd->cmd == LC_SYMTAB) orig_st = (struct symtab_command *) cmd; @@ -183,7 +193,7 @@ static int output_kernel_symbols(struct proc *p) + orig_ds->cmdsize + sizeof(struct symtab_command); - (void) kmem_alloc_wired(kernel_map, + (void) kmem_alloc(kernel_map, (vm_offset_t *) &header, (vm_size_t) header_size); if (header) @@ -204,7 +214,7 @@ static int output_kernel_symbols(struct proc *p) mh->flags = orig_mh->flags; // Initialise the current file offset and addr - offset = round_page_32(header_size); + offset = round_page(header_size); addr = (caddr_t) const_text->addr; // Load address of __TEXT,__const /* @@ -217,7 +227,7 @@ static int output_kernel_symbols(struct proc *p) sg->vmaddr = (unsigned long) addr; sg->vmsize = const_text->size; sg->fileoff = 0; - sg->filesize = const_text->size + round_page_32(header_size); + sg->filesize = const_text->size + round_page(header_size); sg->maxprot = 0; sg->initprot = 0; sg->flags = 0; @@ -234,7 +244,7 @@ static int output_kernel_symbols(struct proc *p) const_text = se; } } - offset = round_page_32((vm_address_t) offset); + offset = round_page(offset); // Now copy of the __DATA segment load command, the image need // not be stored to disk nobody needs it, yet! @@ -255,7 +265,7 @@ static int output_kernel_symbols(struct proc *p) se->offset = offset; se->nreloc = 0; } - offset = round_page_32(offset); + offset = round_page(offset); /* @@ -285,7 +295,7 @@ static int output_kernel_symbols(struct proc *p) * Write out the load commands at the beginning of the file. */ error = vn_rdwr(UIO_WRITE, vp, (caddr_t) mh, header_size, (off_t) 0, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); + UIO_SYSSPACE32, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); if (error) goto out; @@ -294,7 +304,7 @@ static int output_kernel_symbols(struct proc *p) */ error = vn_rdwr(UIO_WRITE, vp, (caddr_t) const_text->addr, const_text->size, const_text->offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); + UIO_SYSSPACE32, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); if (error) goto out; @@ -304,17 +314,13 @@ static int output_kernel_symbols(struct proc *p) offset = st->nsyms * sizeof(struct nlist) + st->strsize; // symtab size error = vn_rdwr(UIO_WRITE, vp, (caddr_t) orig_le->vmaddr, offset, st->symoff, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); - if (error) - goto out; - + UIO_SYSSPACE32, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, p); out: if (header) kmem_free(kernel_map, header, header_size); if (vp) { - VOP_UNLOCK(vp, 0, p); - error1 = vn_close(vp, FWRITE, cred, p); + error1 = vnode_close(vp, FWRITE, &context); if (!error) error = error1; } @@ -334,3 +340,4 @@ int get_kernel_symfile(struct proc *p, char **symfile) return error_code; } + diff --git a/bsd/kern/kern_synch.c b/bsd/kern/kern_synch.c index 97b35818c..9f33c4ce1 100644 --- a/bsd/kern/kern_synch.c +++ b/bsd/kern/kern_synch.c @@ -28,12 +28,11 @@ #include #include -#include +#include #include -#include +#include #include #include -#include #include @@ -48,6 +47,8 @@ #include #include +#include + #if KTRACE #include @@ -55,19 +56,22 @@ #endif static void -_sleep_continue(void) +_sleep_continue( + void *parameter, + wait_result_t wresult) { - register struct proc *p; - register thread_t self = current_act(); + register struct proc *p = current_proc(); + register thread_t self = current_thread(); struct uthread * ut; int sig, catch; int error = 0; + int dropmutex; ut = get_bsdthread_info(self); - catch = ut->uu_pri & PCATCH; - p = current_proc(); + catch = ut->uu_pri & PCATCH; + dropmutex = ut->uu_pri & PDROP; - switch (get_thread_waitresult(self)) { + switch (wresult) { case THREAD_TIMED_OUT: error = EWOULDBLOCK; break; @@ -94,7 +98,10 @@ _sleep_continue(void) if (thread_should_abort(self)) { error = EINTR; } - } + } else if( (ut->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + /* due to thread cancel */ + error = EINTR; + } } else error = EINTR; break; @@ -103,13 +110,12 @@ _sleep_continue(void) if (error == EINTR || error == ERESTART) act_set_astbsd(self); - if (ut->uu_timo) - thread_cancel_timer(); - #if KTRACE if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p->p_tracep, 0, 0, -1); + ktrcsw(p->p_tracep, 0, 0); #endif + if (ut->uu_mtx && !dropmutex) + lck_mtx_lock(ut->uu_mtx); unix_syscall_return((*ut->uu_continuation)(error)); } @@ -126,104 +132,116 @@ _sleep_continue(void) * Callers of this routine must be prepared for * premature return, and check that the reason for * sleeping has gone away. + * + * if msleep was the entry point, than we have a mutex to deal with + * + * The mutex is unlocked before the caller is blocked, and + * relocked before msleep returns unless the priority includes the PDROP + * flag... if PDROP is specified, _sleep returns with the mutex unlocked + * regardless of whether it actually blocked or not. */ static int _sleep( caddr_t chan, - int pri, - char *wmsg, + int pri, + const char *wmsg, u_int64_t abstime, - int (*continuation)(int)) + int (*continuation)(int), + lck_mtx_t *mtx) { register struct proc *p; - register thread_t self = current_act(); + register thread_t self = current_thread(); struct uthread * ut; int sig, catch = pri & PCATCH; - int sigttblock = pri & PTTYBLOCK; + int dropmutex = pri & PDROP; int wait_result; int error = 0; - spl_t s; - - s = splhigh(); ut = get_bsdthread_info(self); - + p = current_proc(); #if KTRACE if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p->p_tracep, 1, 0, -1); + ktrcsw(p->p_tracep, 1, 0); #endif p->p_priority = pri & PRIMASK; - - if (chan != NULL) - assert_wait_prim(chan, NULL, abstime, - (catch) ? THREAD_ABORTSAFE : THREAD_UNINT); - else - if (abstime != 0) - thread_set_timer_deadline(abstime); - - /* - * We start our timeout - * before calling CURSIG, as we could stop there, and a wakeup - * or a SIGCONT (or both) could occur while we were stopped. - * A SIGCONT would cause us to be marked as SSLEEP - * without resuming us, thus we must be ready for sleep - * when CURSIG is called. If the wakeup happens while we're - * stopped, p->p_wchan will be 0 upon return from CURSIG. - */ - if (catch) { - if (SHOULDissignal(p,ut)) { - if (sig = CURSIG(p)) { - if (clear_wait(self, THREAD_INTERRUPTED) == KERN_FAILURE) - goto block; - /* if SIGTTOU or SIGTTIN then block till SIGCONT */ - if (sigttblock && ((sig == SIGTTOU) || (sig == SIGTTIN))) { - p->p_flag |= P_TTYSLEEP; - /* reset signal bits */ - clear_procsiglist(p, sig); - assert_wait(&p->p_siglist, THREAD_ABORTSAFE); - /* assert wait can block and SIGCONT should be checked */ - if (p->p_flag & P_TTYSLEEP) - thread_block(THREAD_CONTINUE_NULL); - /* return with success */ - error = 0; + p->p_stats->p_ru.ru_nvcsw++; + + if (mtx != NULL && chan != NULL && (thread_continue_t)continuation == THREAD_CONTINUE_NULL) { + + if (abstime) + wait_result = lck_mtx_sleep_deadline(mtx, (dropmutex) ? LCK_SLEEP_UNLOCK : 0, + chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT, abstime); + else + wait_result = lck_mtx_sleep(mtx, (dropmutex) ? LCK_SLEEP_UNLOCK : 0, + chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT); + } + else { + if (chan != NULL) + assert_wait_deadline(chan, (catch) ? THREAD_ABORTSAFE : THREAD_UNINT, abstime); + if (mtx) + lck_mtx_unlock(mtx); + if (catch) { + if (SHOULDissignal(p,ut)) { + if (sig = CURSIG(p)) { + if (clear_wait(self, THREAD_INTERRUPTED) == KERN_FAILURE) + goto block; + /* if SIGTTOU or SIGTTIN then block till SIGCONT */ + if ((pri & PTTYBLOCK) && ((sig == SIGTTOU) || (sig == SIGTTIN))) { + p->p_flag |= P_TTYSLEEP; + /* reset signal bits */ + clear_procsiglist(p, sig); + assert_wait(&p->p_siglist, THREAD_ABORTSAFE); + /* assert wait can block and SIGCONT should be checked */ + if (p->p_flag & P_TTYSLEEP) { + thread_block(THREAD_CONTINUE_NULL); + + if (mtx && !dropmutex) + lck_mtx_lock(mtx); + } + + /* return with success */ + error = 0; + goto out; + } + if (p->p_sigacts->ps_sigintr & sigmask(sig)) + error = EINTR; + else + error = ERESTART; + if (mtx && !dropmutex) + lck_mtx_lock(mtx); goto out; } - if (p->p_sigacts->ps_sigintr & sigmask(sig)) - error = EINTR; - else - error = ERESTART; + } + if (thread_should_abort(self)) { + if (clear_wait(self, THREAD_INTERRUPTED) == KERN_FAILURE) + goto block; + error = EINTR; + + if (mtx && !dropmutex) + lck_mtx_lock(mtx); goto out; } - } - if (thread_should_abort(self)) { - if (clear_wait(self, THREAD_INTERRUPTED) == KERN_FAILURE) - goto block; - error = EINTR; - goto out; - } - if (get_thread_waitresult(self) != THREAD_WAITING) { - /*already happened */ - goto out; - } - } + } -block: - splx(s); - p->p_stats->p_ru.ru_nvcsw++; +block: + if ((thread_continue_t)continuation != THREAD_CONTINUE_NULL) { + ut->uu_continuation = continuation; + ut->uu_pri = pri; + ut->uu_timo = abstime? 1: 0; + ut->uu_mtx = mtx; + (void) thread_block(_sleep_continue); + /* NOTREACHED */ + } + + wait_result = thread_block(THREAD_CONTINUE_NULL); - if ((thread_continue_t)continuation != THREAD_CONTINUE_NULL ) { - ut->uu_continuation = continuation; - ut->uu_pri = pri; - ut->uu_timo = abstime? 1: 0; - (void) thread_block(_sleep_continue); - /* NOTREACHED */ + if (mtx && !dropmutex) + lck_mtx_lock(mtx); } - wait_result = thread_block(THREAD_CONTINUE_NULL); - switch (wait_result) { case THREAD_TIMED_OUT: error = EWOULDBLOCK; @@ -241,7 +259,7 @@ block: if (catch) { if (thread_should_abort(self)) { error = EINTR; - } else if (SHOULDissignal(p,ut)) { + } else if (SHOULDissignal(p, ut)) { if (sig = CURSIG(p)) { if (p->p_sigacts->ps_sigintr & sigmask(sig)) error = EINTR; @@ -259,12 +277,10 @@ block: out: if (error == EINTR || error == ERESTART) act_set_astbsd(self); - if (abstime) - thread_cancel_timer(); - (void) splx(s); + #if KTRACE if (KTRPOINT(p, KTR_CSW)) - ktrcsw(p->p_tracep, 0, 0, -1); + ktrcsw(p->p_tracep, 0, 0); #endif return (error); } @@ -274,28 +290,74 @@ sleep( void *chan, int pri) { - return _sleep((caddr_t)chan, pri, (char *)NULL, 0, (int (*)(int))0); + return _sleep((caddr_t)chan, pri, (char *)NULL, 0, (int (*)(int))0, (lck_mtx_t *)0); +} + +int +msleep0( + void *chan, + lck_mtx_t *mtx, + int pri, + const char *wmsg, + int timo, + int (*continuation)(int)) +{ + u_int64_t abstime = 0; + + if (timo) + clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime); + + return _sleep((caddr_t)chan, pri, wmsg, abstime, continuation, mtx); +} + +int +msleep( + void *chan, + lck_mtx_t *mtx, + int pri, + const char *wmsg, + struct timespec *ts) +{ + u_int64_t abstime = 0; + + if (ts && (ts->tv_sec || ts->tv_nsec)) { + nanoseconds_to_absolutetime((uint64_t)ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec, &abstime ); + clock_absolutetime_interval_to_deadline( abstime, &abstime ); + } + + return _sleep((caddr_t)chan, pri, wmsg, abstime, (int (*)(int))0, mtx); +} + +int +msleep1( + void *chan, + lck_mtx_t *mtx, + int pri, + const char *wmsg, + u_int64_t abstime) +{ + return _sleep((caddr_t)chan, pri, wmsg, abstime, (int (*)(int))0, mtx); } int tsleep( - void *chan, + void *chan, int pri, - char *wmsg, + const char *wmsg, int timo) { u_int64_t abstime = 0; if (timo) clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime); - return _sleep((caddr_t)chan, pri, wmsg, abstime, (int (*)(int))0); + return _sleep((caddr_t)chan, pri, wmsg, abstime, (int (*)(int))0, (lck_mtx_t *)0); } int tsleep0( - void *chan, + void *chan, int pri, - char *wmsg, + const char *wmsg, int timo, int (*continuation)(int)) { @@ -303,18 +365,18 @@ tsleep0( if (timo) clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime); - return _sleep((caddr_t)chan, pri, wmsg, abstime, continuation); + return _sleep((caddr_t)chan, pri, wmsg, abstime, continuation, (lck_mtx_t *)0); } int tsleep1( void *chan, - int pri, - char *wmsg, + int pri, + const char *wmsg, u_int64_t abstime, - int (*continuation)(int)) + int (*continuation)(int)) { - return _sleep((caddr_t)chan, pri, wmsg, abstime, continuation); + return _sleep((caddr_t)chan, pri, wmsg, abstime, continuation, (lck_mtx_t *)0); } /* @@ -366,10 +428,11 @@ static fixpt_t cexp[3] = { void compute_averunnable( - register int nrun) + void *arg) { - register int i; + unsigned int nrun = *(unsigned int *)arg; struct loadavg *avg = &averunnable; + register int i; for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index afff95618..dd0736fc9 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -66,9 +66,10 @@ #include #include #include -#include -#include -#include +#include +#include +#include +#include #include #include #include @@ -86,19 +87,24 @@ #include #include #include +#include #include +#include #include extern vm_map_t bsd_pageable_map; -#include +#include #include +#include #include #include #include +#include + sysctlfn kern_sysctl; #ifdef DEBUG sysctlfn debug_sysctl; @@ -112,68 +118,154 @@ extern int aio_max_requests_per_process; extern int aio_worker_threads; extern int maxprocperuid; extern int maxfilesperproc; +extern int lowpri_IO_window_msecs; +extern int lowpri_IO_delay_msecs; - +static void +fill_eproc(struct proc *p, struct eproc *ep); +static void +fill_externproc(struct proc *p, struct extern_proc *exp); +static void +fill_user_eproc(struct proc *p, struct user_eproc *ep); +static void +fill_user_proc(struct proc *p, struct user_kinfo_proc *kp); +static void +fill_user_externproc(struct proc *p, struct user_extern_proc *exp); +extern int +kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep); int -userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t - *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval); - +kdebug_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep, struct proc *p); +#if NFSCLIENT +extern int +netboot_root(void); +#endif +int +pcsamples_ops(int *name, u_int namelen, user_addr_t where, size_t *sizep, + struct proc *p); +__private_extern__ kern_return_t +reset_vmobjectcache(unsigned int val1, unsigned int val2); +extern int +resize_namecache(u_int newsize); static int -sysctl_aiomax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_aiomax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); static int -sysctl_aioprocmax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_aioprocmax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); static int -sysctl_aiothreads( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_aiothreads(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); +extern int +sysctl_clockrate(user_addr_t where, size_t *sizep); +int +sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep); +int +sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen); +int +sysctl_file(user_addr_t where, size_t *sizep); static void fill_proc(struct proc *p, struct kinfo_proc *kp); static int -sysctl_maxfilesperproc( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_maxfilesperproc(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen); static int -sysctl_maxprocperuid( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_maxprocperuid(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen); static int -sysctl_maxproc( void *oldp, size_t *oldlenp, void *newp, size_t newlen ); +sysctl_maxproc(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen); +int +sysctl_procargs(int *name, u_int namelen, user_addr_t where, + size_t *sizep, struct proc *cur_proc); static int -sysctl_procargs2( int *name, u_int namelen, char *where, size_t *sizep, struct proc *cur_proc); +sysctl_procargs2(int *name, u_int namelen, user_addr_t where, size_t *sizep, + struct proc *cur_proc); static int -sysctl_procargsx( int *name, u_int namelen, char *where, size_t *sizep, struct proc *cur_proc, int argc_yes); +sysctl_procargsx(int *name, u_int namelen, user_addr_t where, size_t *sizep, + struct proc *cur_proc, int argc_yes); +int +sysctl_struct(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, + size_t newlen, void *sp, int len); +extern int +sysctl_vnode(user_addr_t where, size_t *sizep); /* * temporary location for vm_sysctl. This should be machine independant */ + +extern uint32_t mach_factor[3]; + +static void +loadavg32to64(struct loadavg *la32, struct user_loadavg *la64) +{ + la64->ldavg[0] = la32->ldavg[0]; + la64->ldavg[1] = la32->ldavg[1]; + la64->ldavg[2] = la32->ldavg[2]; + la64->fscale = (user_long_t)la32->fscale; +} + int -vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +vm_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, __unused struct proc *p) { - extern uint32_t mach_factor[3]; struct loadavg loadinfo; switch (name[0]) { case VM_LOADAVG: - return (sysctl_struct(oldp, oldlenp, newp, newlen, + if (proc_is64bit(p)) { + struct user_loadavg loadinfo64; + loadavg32to64(&averunnable, &loadinfo64); + return (sysctl_struct(oldp, oldlenp, newp, newlen, + &loadinfo64, sizeof(loadinfo64))); + } else { + return (sysctl_struct(oldp, oldlenp, newp, newlen, &averunnable, sizeof(struct loadavg))); + } case VM_MACHFACTOR: loadinfo.ldavg[0] = mach_factor[0]; loadinfo.ldavg[1] = mach_factor[1]; loadinfo.ldavg[2] = mach_factor[2]; loadinfo.fscale = LSCALE; - return (sysctl_struct(oldp, oldlenp, newp, newlen, + if (proc_is64bit(p)) { + struct user_loadavg loadinfo64; + loadavg32to64(&loadinfo, &loadinfo64); + return (sysctl_struct(oldp, oldlenp, newp, newlen, + &loadinfo64, sizeof(loadinfo64))); + } else { + return (sysctl_struct(oldp, oldlenp, newp, newlen, &loadinfo, sizeof(struct loadavg))); + } + case VM_SWAPUSAGE: { + int error; + uint64_t swap_total; + uint64_t swap_avail; + uint32_t swap_pagesize; + boolean_t swap_encrypted; + struct xsw_usage xsu; + + error = macx_swapinfo(&swap_total, + &swap_avail, + &swap_pagesize, + &swap_encrypted); + if (error) + return error; + + xsu.xsu_total = swap_total; + xsu.xsu_avail = swap_avail; + xsu.xsu_used = swap_total - swap_avail; + xsu.xsu_pagesize = swap_pagesize; + xsu.xsu_encrypted = swap_encrypted; + return sysctl_struct(oldp, oldlenp, newp, newlen, + &xsu, sizeof (struct xsw_usage)); + } case VM_METER: - return (EOPNOTSUPP); + return (ENOTSUP); case VM_MAXID: - return (EOPNOTSUPP); + return (ENOTSUP); default: - return (EOPNOTSUPP); + return (ENOTSUP); } /* NOTREACHED */ - return (EOPNOTSUPP); + return (ENOTSUP); } /* @@ -185,23 +277,12 @@ static struct sysctl_lock { int sl_locked; } memlock; -struct __sysctl_args { - int *name; - u_int namelen; - void *old; - size_t *oldlenp; - void *new; - size_t newlen; -}; int -__sysctl(p, uap, retval) - struct proc *p; - register struct __sysctl_args *uap; - register_t *retval; +__sysctl(struct proc *p, struct __sysctl_args *uap, __unused register_t *retval) { int error, dolock = 1; - size_t savelen, oldlen = 0; - sysctlfn *fn; + size_t savelen = 0, oldlen = 0, newlen; + sysctlfn *fnp = NULL; int name[CTL_MAXNAME]; int i; int error1; @@ -211,51 +292,71 @@ __sysctl(p, uap, retval) */ if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); - if (error = - copyin(uap->name, &name, uap->namelen * sizeof(int))) + error = copyin(uap->name, &name[0], uap->namelen * sizeof(int)); + if (error) return (error); - + AUDIT_ARG(ctlname, name, uap->namelen); + if (proc_is64bit(p)) { + /* uap->newlen is a size_t value which grows to 64 bits + * when coming from a 64-bit process. since it's doubtful we'll + * have a sysctl newp buffer greater than 4GB we shrink it to size_t + */ + newlen = CAST_DOWN(size_t, uap->newlen); + } + else { + newlen = uap->newlen; + } + /* CTL_UNSPEC is used to get oid to AUTO_OID */ - if (uap->new != NULL - && ((name[0] == CTL_KERN - && !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO)) - || (name[0] == CTL_HW) - || (name[0] == CTL_VM) - || (name[0] == CTL_VFS)) - && (error = suser(p->p_ucred, &p->p_acflag))) + if (uap->new != USER_ADDR_NULL + && ((name[0] == CTL_KERN + && !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO || name[1] == KERN_PROCDELAYTERM || + name[1] == KERN_PROC_LOW_PRI_IO)) + || (name[0] == CTL_HW) + || (name[0] == CTL_VM) + || (name[0] == CTL_VFS)) + && (error = suser(kauth_cred_get(), &p->p_acflag))) return (error); switch (name[0]) { case CTL_KERN: - fn = kern_sysctl; + fnp = kern_sysctl; if ((name[1] != KERN_VNODE) && (name[1] != KERN_FILE) && (name[1] != KERN_PROC)) dolock = 0; break; case CTL_VM: - fn = vm_sysctl; + fnp = vm_sysctl; break; case CTL_VFS: - fn = vfs_sysctl; + fnp = vfs_sysctl; break; #ifdef DEBUG case CTL_DEBUG: - fn = debug_sysctl; + fnp = debug_sysctl; break; #endif default: - fn = 0; + fnp = NULL; } - if (uap->oldlenp && - (error = copyin(uap->oldlenp, &oldlen, sizeof(oldlen)))) - return (error); + if (uap->oldlenp != USER_ADDR_NULL) { + uint64_t oldlen64 = fuulong(uap->oldlenp); + + oldlen = CAST_DOWN(size_t, oldlen64); + /* + * If more than 4G, clamp to 4G - useracc() below will catch + * with an EFAULT, if it's actually necessary. + */ + if (oldlen64 > 0x00000000ffffffffULL) + oldlen = 0xffffffffUL; + } - if (uap->old != NULL) { - if (!useracc(uap->old, oldlen, B_WRITE)) + if (uap->old != USER_ADDR_NULL) { + if (!useracc(uap->old, (user_size_t)oldlen, B_WRITE)) return (EFAULT); /* The pc sampling mechanism does not need to take this lock */ @@ -269,7 +370,8 @@ __sysctl(p, uap, retval) memlock.sl_lock = 1; } - if (dolock && oldlen && (error = vslock(uap->old, oldlen))) { + if (dolock && oldlen && + (error = vslock(uap->old, (user_size_t)oldlen))) { if ((name[1] != KERN_PCSAMPLES) && (! ((name[1] == KERN_KDEBUG) && (name[2] == KERN_KDGETENTROPY)))) { memlock.sl_lock = 0; @@ -283,20 +385,22 @@ __sysctl(p, uap, retval) savelen = oldlen; } - if (fn) - error = (*fn)(name + 1, uap->namelen - 1, uap->old, - &oldlen, uap->new, uap->newlen, p); + if (fnp) { + error = (*fnp)(name + 1, uap->namelen - 1, uap->old, + &oldlen, uap->new, newlen, p); + } else - error = EOPNOTSUPP; + error = ENOTSUP; - if ( (name[0] != CTL_VFS) && (error == EOPNOTSUPP)) - error = userland_sysctl(p, name, uap->namelen, - uap->old, uap->oldlenp, 0, - uap->new, uap->newlen, &oldlen); + if ( (name[0] != CTL_VFS) && (error == ENOTSUP)) { + size_t tmp = oldlen; + error = userland_sysctl(p, name, uap->namelen, uap->old, &tmp, + 1, uap->new, newlen, &oldlen); + } - if (uap->old != NULL) { + if (uap->old != USER_ADDR_NULL) { if (dolock && savelen) { - error1 = vsunlock(uap->old, savelen, B_WRITE); + error1 = vsunlock(uap->old, (user_size_t)savelen, B_WRITE); if (!error && error1) error = error1; } @@ -311,8 +415,8 @@ __sysctl(p, uap, retval) if ((error) && (error != ENOMEM)) return (error); - if (uap->oldlenp) { - i = copyout(&oldlen, uap->oldlenp, sizeof(oldlen)); + if (uap->oldlenp != USER_ADDR_NULL) { + i = suulong(uap->oldlenp, oldlen); if (i) return i; } @@ -323,19 +427,14 @@ __sysctl(p, uap, retval) /* * Attributes stored in the kernel. */ -extern char hostname[MAXHOSTNAMELEN]; /* defined in bsd/kern/init_main.c */ -extern int hostnamelen; -extern char domainname[MAXHOSTNAMELEN]; -extern int domainnamelen; extern char classichandler[32]; -extern long classichandler_fsid; +extern uint32_t classichandler_fsid; extern long classichandler_fileid; __private_extern__ char corefilename[MAXPATHLEN+1]; -__private_extern__ do_coredump; -__private_extern__ sugid_coredump; +__private_extern__ int do_coredump; +__private_extern__ int sugid_coredump; -extern long hostid; #ifdef INSECURE int securelevel = -1; #else @@ -343,21 +442,21 @@ int securelevel; #endif static int -sysctl_affinity(name, namelen, oldBuf, oldSize, newBuf, newSize, cur_proc) - int *name; - u_int namelen; - char *oldBuf; - size_t *oldSize; - char *newBuf; - size_t newSize; - struct proc *cur_proc; +sysctl_affinity( + int *name, + u_int namelen, + user_addr_t oldBuf, + size_t *oldSize, + user_addr_t newBuf, + __unused size_t newSize, + struct proc *cur_proc) { if (namelen < 1) - return (EOPNOTSUPP); + return (ENOTSUP); if (name[0] == 0 && 1 == namelen) { return sysctl_rdint(oldBuf, oldSize, newBuf, - (cur_proc->p_flag & P_AFFINITY) ? 1 : 0); + (cur_proc->p_flag & P_AFFINITY) ? 1 : 0); } else if (name[0] == 1 && 2 == namelen) { if (name[1] == 0) { cur_proc->p_flag &= ~P_AFFINITY; @@ -366,123 +465,125 @@ sysctl_affinity(name, namelen, oldBuf, oldSize, newBuf, newSize, cur_proc) } return 0; } - return (EOPNOTSUPP); + return (ENOTSUP); } static int -sysctl_classic(name, namelen, oldBuf, oldSize, newBuf, newSize, cur_proc) - int *name; - u_int namelen; - char *oldBuf; - size_t *oldSize; - char *newBuf; - size_t newSize; - struct proc *cur_proc; +sysctl_classic( + int *name, + u_int namelen, + user_addr_t oldBuf, + size_t *oldSize, + user_addr_t newBuf, + __unused size_t newSize, + struct proc *cur_proc) { - int newVal; - int err; struct proc *p; if (namelen != 1) - return (EOPNOTSUPP); + return (ENOTSUP); p = pfind(name[0]); if (p == NULL) return (EINVAL); - if ((p->p_ucred->cr_uid != cur_proc->p_ucred->cr_uid) - && suser(cur_proc->p_ucred, &cur_proc->p_acflag)) + if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) + && suser(kauth_cred_get(), &cur_proc->p_acflag)) return (EPERM); return sysctl_rdint(oldBuf, oldSize, newBuf, - (p->p_flag & P_CLASSIC) ? 1 : 0); + (p->p_flag & P_CLASSIC) ? 1 : 0); } static int -sysctl_classichandler(name, namelen, oldBuf, oldSize, newBuf, newSize, p) - int *name; - u_int namelen; - char *oldBuf; - size_t *oldSize; - char *newBuf; - size_t newSize; - struct proc *p; +sysctl_classichandler( + __unused int *name, + __unused u_int namelen, + user_addr_t oldBuf, + size_t *oldSize, + user_addr_t newBuf, + size_t newSize, + struct proc *p) { int error; - int len; + size_t len; struct nameidata nd; - struct vattr vattr; + struct vnode_attr va; char handler[sizeof(classichandler)]; - - if ((error = suser(p->p_ucred, &p->p_acflag))) - return (error); - len = strlen(classichandler) + 1; - if (oldBuf && *oldSize < len) - return (ENOMEM); - if (newBuf && newSize >= sizeof(classichandler)) - return (ENAMETOOLONG); - *oldSize = len - 1; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + if (oldSize) { + len = strlen(classichandler) + 1; + if (oldBuf) { + if (*oldSize < len) + return (ENOMEM); + error = copyout(classichandler, oldBuf, len); + if (error) + return (error); + } + *oldSize = len - 1; + } if (newBuf) { + error = suser(context.vc_ucred, &p->p_acflag); + if (error) + return (error); + if (newSize >= sizeof(classichandler)) + return (ENAMETOOLONG); error = copyin(newBuf, handler, newSize); if (error) return (error); handler[newSize] = 0; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, - handler, p); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + CAST_USER_ADDR_T(handler), &context); error = namei(&nd); if (error) return (error); + nameidone(&nd); + /* Check mount point */ if ((nd.ni_vp->v_mount->mnt_flag & MNT_NOEXEC) || (nd.ni_vp->v_type != VREG)) { - vput(nd.ni_vp); + vnode_put(nd.ni_vp); return (EACCES); } - error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + error = vnode_getattr(nd.ni_vp, &va, &context); if (error) { - vput(nd.ni_vp); + vnode_put(nd.ni_vp); return (error); } - classichandler_fsid = vattr.va_fsid; - classichandler_fileid = vattr.va_fileid; - vput(nd.ni_vp); - } - if (oldBuf) { - error = copyout(classichandler, oldBuf, len); - if (error) - return (error); - } - if (newBuf) { + vnode_put(nd.ni_vp); + + classichandler_fsid = va.va_fsid; + classichandler_fileid = (u_long)va.va_fileid; strcpy(classichandler, handler); } - return (error); + return 0; } extern int get_kernel_symfile( struct proc *, char **); -extern int sysctl_dopanicinfo(int *, u_int, void *, size_t *, - void *, size_t, struct proc *); +__private_extern__ int +sysctl_dopanicinfo(int *, u_int, user_addr_t, size_t *, user_addr_t, + size_t, struct proc *); /* * kernel related system variables. */ int -kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, struct proc *p) { int error, level, inthostid, tmp; unsigned int oldval=0; char *str; - extern char ostype[], osrelease[], version[]; - extern int netboot_root(); - /* all sysctl names not listed below are terminal at this level */ if (namelen != 1 && !(name[0] == KERN_PROC @@ -495,7 +596,8 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) || name[0] == KERN_SYSV || name[0] == KERN_AFFINITY || name[0] == KERN_CLASSIC - || name[0] == KERN_PANICINFO) + || name[0] == KERN_PANICINFO + || name[0] == KERN_POSIX) ) return (ENOTDIR); /* overloaded */ @@ -528,14 +630,14 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case KERN_SECURELVL: level = securelevel; if ((error = sysctl_int(oldp, oldlenp, newp, newlen, &level)) || - newp == NULL) + newp == USER_ADDR_NULL) return (error); if (level < securelevel && p->p_pid != 1) return (EPERM); securelevel = level; return (0); case KERN_HOSTNAME: - error = sysctl_string(oldp, oldlenp, newp, newlen, + error = sysctl_trstring(oldp, oldlenp, newp, newlen, hostname, sizeof(hostname)); if (newp && !error) hostnamelen = newlen; @@ -554,8 +656,15 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case KERN_CLOCKRATE: return (sysctl_clockrate(oldp, oldlenp)); case KERN_BOOTTIME: - return (sysctl_rdstruct(oldp, oldlenp, newp, &boottime, + { + struct timeval t; + + t.tv_sec = boottime_sec(); + t.tv_usec = 0; + + return (sysctl_rdstruct(oldp, oldlenp, newp, &t, sizeof(struct timeval))); + } case KERN_VNODE: return (sysctl_vnode(oldp, oldlenp)); case KERN_PROC: @@ -594,8 +703,10 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) if ( error ) return error; return (sysctl_rdstring(oldp, oldlenp, newp, str)); +#if NFSCLIENT case KERN_NETBOOT: return (sysctl_rdint(oldp, oldlenp, newp, netboot_root())); +#endif case KERN_PANICINFO: return(sysctl_dopanicinfo(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p)); @@ -614,6 +725,10 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return( sysctl_aioprocmax( oldp, oldlenp, newp, newlen ) ); case KERN_AIOTHREADS: return( sysctl_aiothreads( oldp, oldlenp, newp, newlen ) ); + case KERN_USRSTACK: + return (sysctl_rdint(oldp, oldlenp, newp, (uintptr_t)p->user_stack)); + case KERN_USRSTACK64: + return (sysctl_rdquad(oldp, oldlenp, newp, p->user_stack)); case KERN_COREFILE: error = sysctl_string(oldp, oldlenp, newp, newlen, corefilename, sizeof(corefilename)); @@ -621,7 +736,7 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case KERN_COREDUMP: tmp = do_coredump; error = sysctl_int(oldp, oldlenp, newp, newlen, &do_coredump); - if (!error && (do_coredump < 0) || (do_coredump > 1)) { + if (!error && ((do_coredump < 0) || (do_coredump > 1))) { do_coredump = tmp; error = EINVAL; } @@ -629,13 +744,112 @@ kern_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case KERN_SUGID_COREDUMP: tmp = sugid_coredump; error = sysctl_int(oldp, oldlenp, newp, newlen, &sugid_coredump); - if (!error && (sugid_coredump < 0) || (sugid_coredump > 1)) { + if (!error && ((sugid_coredump < 0) || (sugid_coredump > 1))) { sugid_coredump = tmp; error = EINVAL; } return (error); + case KERN_PROCDELAYTERM: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(int)) + return (ENOMEM); + if ( newp && newlen != sizeof(int) ) + return(EINVAL); + *oldlenp = sizeof(int); + old_value = (p->p_lflag & P_LDELAYTERM)? 1: 0; + if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) + return(error); + if (error == 0 && newp ) + error = copyin( newp, &new_value, sizeof(int) ); + if (error == 0 && newp) { + if (new_value) + p->p_lflag |= P_LDELAYTERM; + else + p->p_lflag &= ~P_LDELAYTERM; + } + return(error); + } + case KERN_PROC_LOW_PRI_IO: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(int)) + return (ENOMEM); + if ( newp && newlen != sizeof(int) ) + return(EINVAL); + *oldlenp = sizeof(int); + + old_value = (p->p_lflag & P_LLOW_PRI_IO)? 0x01: 0; + if (p->p_lflag & P_LBACKGROUND_IO) + old_value |= 0x02; + + if (oldp && (error = copyout( &old_value, oldp, sizeof(int)))) + return(error); + if (error == 0 && newp ) + error = copyin( newp, &new_value, sizeof(int) ); + if (error == 0 && newp) { + if (new_value & 0x01) + p->p_lflag |= P_LLOW_PRI_IO; + else if (new_value & 0x02) + p->p_lflag |= P_LBACKGROUND_IO; + else if (new_value == 0) + p->p_lflag &= ~(P_LLOW_PRI_IO | P_LBACKGROUND_IO); + } + return(error); + } + case KERN_LOW_PRI_WINDOW: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(old_value) ) + return (ENOMEM); + if ( newp && newlen != sizeof(new_value) ) + return(EINVAL); + *oldlenp = sizeof(old_value); + + old_value = lowpri_IO_window_msecs; + + if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) + return(error); + if (error == 0 && newp ) + error = copyin( newp, &new_value, sizeof(newlen) ); + if (error == 0 && newp) { + lowpri_IO_window_msecs = new_value; + } + return(error); + } + case KERN_LOW_PRI_DELAY: + { + int old_value, new_value; + + error = 0; + if (oldp && *oldlenp < sizeof(old_value) ) + return (ENOMEM); + if ( newp && newlen != sizeof(new_value) ) + return(EINVAL); + *oldlenp = sizeof(old_value); + + old_value = lowpri_IO_delay_msecs; + + if (oldp && (error = copyout( &old_value, oldp, *oldlenp))) + return(error); + if (error == 0 && newp ) + error = copyin( newp, &new_value, sizeof(newlen) ); + if (error == 0 && newp) { + lowpri_IO_delay_msecs = new_value; + } + return(error); + } + case KERN_SHREG_PRIVATIZABLE: + /* this kernel does implement shared_region_make_private_np() */ + return (sysctl_rdint(oldp, oldlenp, newp, 1)); default: - return (EOPNOTSUPP); + return (ENOTSUP); } /* NOTREACHED */ } @@ -659,14 +873,8 @@ static struct ctldebug *debugvars[CTL_DEBUG_MAXID] = { &debug15, &debug16, &debug17, &debug18, &debug19, }; int -debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, struct proc *p) { struct ctldebug *cdp; @@ -675,14 +883,14 @@ debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) return (ENOTDIR); /* overloaded */ cdp = debugvars[name[0]]; if (cdp->debugname == 0) - return (EOPNOTSUPP); + return (ENOTSUP); switch (name[1]) { case CTL_DEBUG_NAME: return (sysctl_rdstring(oldp, oldlenp, newp, cdp->debugname)); case CTL_DEBUG_VALUE: return (sysctl_int(oldp, oldlenp, newp, newlen, cdp->debugvar)); default: - return (EOPNOTSUPP); + return (ENOTSUP); } /* NOTREACHED */ } @@ -693,15 +901,13 @@ debug_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) * for an integer-valued sysctl function. */ int -sysctl_int(oldp, oldlenp, newp, newlen, valp) - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - int *valp; +sysctl_int(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, int *valp) { int error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); if (oldp && *oldlenp < sizeof(int)) return (ENOMEM); if (newp && newlen != sizeof(int)) @@ -720,14 +926,12 @@ sysctl_int(oldp, oldlenp, newp, newlen, valp) * As above, but read-only. */ int -sysctl_rdint(oldp, oldlenp, newp, val) - void *oldp; - size_t *oldlenp; - void *newp; - int val; +sysctl_rdint(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, int val) { int error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); if (oldp && *oldlenp < sizeof(int)) return (ENOMEM); if (newp) @@ -743,15 +947,13 @@ sysctl_rdint(oldp, oldlenp, newp, val) * for an quad(64bit)-valued sysctl function. */ int -sysctl_quad(oldp, oldlenp, newp, newlen, valp) - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - quad_t *valp; +sysctl_quad(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, quad_t *valp) { int error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); if (oldp && *oldlenp < sizeof(quad_t)) return (ENOMEM); if (newp && newlen != sizeof(quad_t)) @@ -776,13 +978,58 @@ sysctl_rdquad(oldp, oldlenp, newp, val) { int error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); if (oldp && *oldlenp < sizeof(quad_t)) return (ENOMEM); if (newp) return (EPERM); *oldlenp = sizeof(quad_t); if (oldp) - error = copyout((caddr_t)&val, oldp, sizeof(quad_t)); + error = copyout((caddr_t)&val, CAST_USER_ADDR_T(oldp), sizeof(quad_t)); + return (error); +} + +/* + * Validate parameters and get old / set new parameters + * for a string-valued sysctl function. Unlike sysctl_string, if you + * give it a too small (but larger than 0 bytes) buffer, instead of + * returning ENOMEM, it truncates the returned string to the buffer + * size. This preserves the semantics of some library routines + * implemented via sysctl, which truncate their returned data, rather + * than simply returning an error. The returned string is always NUL + * terminated. + */ +int +sysctl_trstring(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, char *str, int maxlen) +{ + int len, copylen, error = 0; + + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); + copylen = len = strlen(str) + 1; + if (oldp && (len < 0 || *oldlenp < 1)) + return (ENOMEM); + if (oldp && (*oldlenp < (size_t)len)) + copylen = *oldlenp + 1; + if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) + return (EINVAL); + *oldlenp = copylen - 1; /* deal with NULL strings correctly */ + if (oldp) { + error = copyout(str, oldp, copylen); + if (!error) { + unsigned char c = 0; + /* NUL terminate */ + oldp += *oldlenp; + error = copyout((void *)&c, oldp, sizeof(char)); + } + } + if (error == 0 && newp) { + error = copyin(newp, str, newlen); + str[newlen] = 0; + AUDIT_ARG(text, (char *)str); + } return (error); } @@ -791,20 +1038,17 @@ sysctl_rdquad(oldp, oldlenp, newp, val) * for a string-valued sysctl function. */ int -sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen) - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - char *str; - int maxlen; +sysctl_string(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, char *str, int maxlen) { int len, error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); len = strlen(str) + 1; - if (oldp && *oldlenp < len) + if (oldp && (len < 0 || *oldlenp < (size_t)len)) return (ENOMEM); - if (newp && newlen >= maxlen) + if (newp && (maxlen < 0 || newlen >= (size_t)maxlen)) return (EINVAL); *oldlenp = len -1; /* deal with NULL strings correctly */ if (oldp) { @@ -822,16 +1066,15 @@ sysctl_string(oldp, oldlenp, newp, newlen, str, maxlen) * As above, but read-only. */ int -sysctl_rdstring(oldp, oldlenp, newp, str) - void *oldp; - size_t *oldlenp; - void *newp; - char *str; +sysctl_rdstring(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, char *str) { int len, error = 0; + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); len = strlen(str) + 1; - if (oldp && *oldlenp < len) + if (oldp && *oldlenp < (size_t)len) return (ENOMEM); if (newp) return (EPERM); @@ -846,19 +1089,16 @@ sysctl_rdstring(oldp, oldlenp, newp, str) * for a structure oriented sysctl function. */ int -sysctl_struct(oldp, oldlenp, newp, newlen, sp, len) - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - void *sp; - int len; +sysctl_struct(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, void *sp, int len) { int error = 0; - if (oldp && *oldlenp < len) + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); + if (oldp && (len < 0 || *oldlenp < (size_t)len)) return (ENOMEM); - if (newp && newlen > len) + if (newp && (len < 0 || newlen > (size_t)len)) return (EINVAL); if (oldp) { *oldlenp = len; @@ -874,15 +1114,14 @@ sysctl_struct(oldp, oldlenp, newp, newlen, sp, len) * for a structure oriented sysctl function. */ int -sysctl_rdstruct(oldp, oldlenp, newp, sp, len) - void *oldp; - size_t *oldlenp; - void *newp, *sp; - int len; +sysctl_rdstruct(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, void *sp, int len) { int error = 0; - if (oldp && *oldlenp < len) + if (oldp != USER_ADDR_NULL && oldlenp == NULL) + return (EFAULT); + if (oldp && (len < 0 || *oldlenp < (size_t)len)) return (ENOMEM); if (newp) return (EPERM); @@ -896,31 +1135,31 @@ sysctl_rdstruct(oldp, oldlenp, newp, sp, len) * Get file structures. */ int -sysctl_file(where, sizep) - char *where; - size_t *sizep; +sysctl_file(user_addr_t where, size_t *sizep) { int buflen, error; - struct file *fp; - char *start = where; + struct fileglob *fg; + user_addr_t start = where; + struct extern_file nef; buflen = *sizep; - if (where == NULL) { + if (where == USER_ADDR_NULL) { /* * overestimate by 10 files */ - *sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct file); + *sizep = sizeof(filehead) + (nfiles + 10) * sizeof(struct extern_file); return (0); } /* * first copyout filehead */ - if (buflen < sizeof(filehead)) { + if (buflen < 0 || (size_t)buflen < sizeof(filehead)) { *sizep = 0; return (0); } - if (error = copyout((caddr_t)&filehead, where, sizeof(filehead))) + error = copyout((caddr_t)&filehead, where, sizeof(filehead)); + if (error) return (error); buflen -= sizeof(filehead); where += sizeof(filehead); @@ -928,17 +1167,28 @@ sysctl_file(where, sizep) /* * followed by an array of file structures */ - for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { - if (buflen < sizeof(struct file)) { + for (fg = filehead.lh_first; fg != 0; fg = fg->f_list.le_next) { + if (buflen < 0 || (size_t)buflen < sizeof(struct extern_file)) { *sizep = where - start; return (ENOMEM); } - if (error = copyout((caddr_t)fp, where, sizeof (struct file))) + nef.f_list.le_next = (struct extern_file *)fg->f_list.le_next; + nef.f_list.le_prev = (struct extern_file **)fg->f_list.le_prev; + nef.f_flag = (fg->fg_flag & FMASK); + nef.f_type = fg->fg_type; + nef.f_count = fg->fg_count; + nef.f_msgcount = fg->fg_msgcount; + nef.f_cred = fg->fg_cred; + nef.f_ops = fg->fg_ops; + nef.f_offset = fg->fg_offset; + nef.f_data = fg->fg_data; + error = copyout((caddr_t)&nef, where, sizeof (struct extern_file)); + if (error) return (error); - buflen -= sizeof(struct file); - where += sizeof(struct file); + buflen -= sizeof(struct extern_file); + where += sizeof(struct extern_file); } - *sizep = where - start; + *sizep = where - start; return (0); } @@ -948,24 +1198,33 @@ sysctl_file(where, sizep) #define KERN_PROCSLOP (5 * sizeof (struct kinfo_proc)) int -sysctl_doproc(name, namelen, where, sizep) - int *name; - u_int namelen; - char *where; - size_t *sizep; +sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) { - register struct proc *p; - register struct kinfo_proc *dp = (struct kinfo_proc *)where; - register int needed = 0; - int buflen = where != NULL ? *sizep : 0; + struct proc *p; + user_addr_t dp = where; + size_t needed = 0; + int buflen = where != USER_ADDR_NULL ? *sizep : 0; int doingzomb; - struct kinfo_proc kproc; int error = 0; + boolean_t is_64_bit = FALSE; + struct kinfo_proc kproc; + struct user_kinfo_proc user_kproc; + int sizeof_kproc; + caddr_t kprocp; if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL)) return (EINVAL); p = allproc.lh_first; doingzomb = 0; + is_64_bit = proc_is64bit(current_proc()); + if (is_64_bit) { + sizeof_kproc = sizeof(user_kproc); + kprocp = (caddr_t) &user_kproc; + } + else { + sizeof_kproc = sizeof(kproc); + kprocp = (caddr_t) &kproc; + } again: for (; p != 0; p = p->p_list.le_next) { /* @@ -1001,34 +1260,39 @@ again: case KERN_PROC_UID: if ((p->p_ucred == NULL) || - (p->p_ucred->cr_uid != (uid_t)name[1])) + (kauth_cred_getuid(p->p_ucred) != (uid_t)name[1])) continue; break; case KERN_PROC_RUID: if ((p->p_ucred == NULL) || - (p->p_cred->p_ruid != (uid_t)name[1])) + (p->p_ucred->cr_ruid != (uid_t)name[1])) continue; break; } - if (buflen >= sizeof(struct kinfo_proc)) { - bzero(&kproc, sizeof(struct kinfo_proc)); - fill_proc(p, &kproc); - if (error = copyout((caddr_t)&kproc, &dp->kp_proc, - sizeof(struct kinfo_proc))) + if (buflen >= sizeof_kproc) { + bzero(kprocp, sizeof_kproc); + if (is_64_bit) { + fill_user_proc(p, (struct user_kinfo_proc *) kprocp); + } + else { + fill_proc(p, (struct kinfo_proc *) kprocp); + } + error = copyout(kprocp, dp, sizeof_kproc); + if (error) return (error); - dp++; - buflen -= sizeof(struct kinfo_proc); + dp += sizeof_kproc; + buflen -= sizeof_kproc; } - needed += sizeof(struct kinfo_proc); + needed += sizeof_kproc; } if (doingzomb == 0) { p = zombproc.lh_first; doingzomb++; goto again; } - if (where != NULL) { - *sizep = (caddr_t)dp - where; + if (where != USER_ADDR_NULL) { + *sizep = dp - where; if (needed > *sizep) return (ENOMEM); } else { @@ -1061,10 +1325,23 @@ fill_eproc(p, ep) ep->e_jobc = 0; } ep->e_ppid = (p->p_pptr) ? p->p_pptr->p_pid : 0; - if (p->p_cred) { - ep->e_pcred = *p->p_cred; - if (p->p_ucred) - ep->e_ucred = *p->p_ucred; + /* Pre-zero the fake historical pcred */ + bzero(&ep->e_pcred, sizeof(struct _pcred)); + if (p->p_ucred) { + /* XXX not ref-counted */ + + /* A fake historical pcred */ + ep->e_pcred.p_ruid = p->p_ucred->cr_ruid; + ep->e_pcred.p_svuid = p->p_ucred->cr_svuid; + ep->e_pcred.p_rgid = p->p_ucred->cr_rgid; + ep->e_pcred.p_svgid = p->p_ucred->cr_svgid; + + /* A fake historical *kauth_cred_t */ + ep->e_ucred.cr_ref = p->p_ucred->cr_ref; + ep->e_ucred.cr_uid = kauth_cred_getuid(p->p_ucred); + ep->e_ucred.cr_ngroups = p->p_ucred->cr_ngroups; + bcopy(p->p_ucred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t)); + } if (p->p_stat == SIDL || p->p_stat == SZOMB) { ep->e_vm.vm_tsize = 0; @@ -1089,6 +1366,72 @@ fill_eproc(p, ep) ep->e_xccount = ep->e_xswrss = 0; } +/* + * Fill in an LP64 version of eproc structure for the specified process. + */ +static void +fill_user_eproc(register struct proc *p, register struct user_eproc *ep) +{ + register struct tty *tp; + struct session *sessionp = NULL; + + ep->e_paddr = CAST_USER_ADDR_T(p); + if (p->p_pgrp) { + sessionp = p->p_pgrp->pg_session; + ep->e_sess = CAST_USER_ADDR_T(sessionp); + ep->e_pgid = p->p_pgrp->pg_id; + ep->e_jobc = p->p_pgrp->pg_jobc; + if (sessionp) { + if (sessionp->s_ttyvp) + ep->e_flag = EPROC_CTTY; + } + } else { + ep->e_sess = USER_ADDR_NULL; + ep->e_pgid = 0; + ep->e_jobc = 0; + } + ep->e_ppid = (p->p_pptr) ? p->p_pptr->p_pid : 0; + /* Pre-zero the fake historical pcred */ + bzero(&ep->e_pcred, sizeof(ep->e_pcred)); + if (p->p_ucred) { + /* XXX not ref-counted */ + + /* A fake historical pcred */ + ep->e_pcred.p_ruid = p->p_ucred->cr_ruid; + ep->e_pcred.p_svuid = p->p_ucred->cr_svuid; + ep->e_pcred.p_rgid = p->p_ucred->cr_rgid; + ep->e_pcred.p_svgid = p->p_ucred->cr_svgid; + + /* A fake historical *kauth_cred_t */ + ep->e_ucred.cr_ref = p->p_ucred->cr_ref; + ep->e_ucred.cr_uid = kauth_cred_getuid(p->p_ucred); + ep->e_ucred.cr_ngroups = p->p_ucred->cr_ngroups; + bcopy(p->p_ucred->cr_groups, ep->e_ucred.cr_groups, NGROUPS*sizeof(gid_t)); + + } + if (p->p_stat == SIDL || p->p_stat == SZOMB) { + ep->e_vm.vm_tsize = 0; + ep->e_vm.vm_dsize = 0; + ep->e_vm.vm_ssize = 0; + } + ep->e_vm.vm_rssize = 0; + + if ((p->p_flag & P_CONTROLT) && (sessionp) && + (tp = sessionp->s_ttyp)) { + ep->e_tdev = tp->t_dev; + ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; + ep->e_tsess = CAST_USER_ADDR_T(tp->t_session); + } else + ep->e_tdev = NODEV; + + if (SESS_LEADER(p)) + ep->e_flag |= EPROC_SLEADER; + if (p->p_wmesg) + strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); + ep->e_xsize = ep->e_xrssize = 0; + ep->e_xccount = ep->e_xswrss = 0; +} + /* * Fill in an eproc structure for the specified process. */ @@ -1108,7 +1451,7 @@ fill_externproc(p, exp) exp->p_oppid = p->p_oppid ; exp->p_dupfd = p->p_dupfd ; /* Mach related */ - exp->user_stack = p->user_stack ; + exp->user_stack = CAST_DOWN(caddr_t, p->user_stack); exp->exit_thread = p->exit_thread ; exp->p_debugger = p->p_debugger ; exp->sigwait = p->sigwait ; @@ -1142,7 +1485,67 @@ fill_externproc(p, exp) exp->p_addr = NULL; exp->p_xstat = p->p_xstat ; exp->p_acflag = p->p_acflag ; - exp->p_ru = p->p_ru ; + exp->p_ru = p->p_ru ; /* XXX may be NULL */ +} + +/* + * Fill in an LP64 version of extern_proc structure for the specified process. + */ +static void +fill_user_externproc(register struct proc *p, register struct user_extern_proc *exp) +{ + exp->p_forw = exp->p_back = USER_ADDR_NULL; + if (p->p_stats) { + exp->p_starttime.tv_sec = p->p_stats->p_start.tv_sec; + exp->p_starttime.tv_usec = p->p_stats->p_start.tv_usec; + } + exp->p_vmspace = USER_ADDR_NULL; + exp->p_sigacts = CAST_USER_ADDR_T(p->p_sigacts); + exp->p_flag = p->p_flag; + exp->p_stat = p->p_stat ; + exp->p_pid = p->p_pid ; + exp->p_oppid = p->p_oppid ; + exp->p_dupfd = p->p_dupfd ; + /* Mach related */ + exp->user_stack = p->user_stack; + exp->exit_thread = CAST_USER_ADDR_T(p->exit_thread); + exp->p_debugger = p->p_debugger ; + exp->sigwait = p->sigwait ; + /* scheduling */ + exp->p_estcpu = p->p_estcpu ; + exp->p_cpticks = p->p_cpticks ; + exp->p_pctcpu = p->p_pctcpu ; + exp->p_wchan = CAST_USER_ADDR_T(p->p_wchan); + exp->p_wmesg = CAST_USER_ADDR_T(p->p_wmesg); + exp->p_swtime = p->p_swtime ; + exp->p_slptime = p->p_slptime ; + exp->p_realtimer.it_interval.tv_sec = p->p_realtimer.it_interval.tv_sec; + exp->p_realtimer.it_interval.tv_usec = p->p_realtimer.it_interval.tv_usec; + exp->p_realtimer.it_value.tv_sec = p->p_realtimer.it_value.tv_sec; + exp->p_realtimer.it_value.tv_usec = p->p_realtimer.it_value.tv_usec; + exp->p_rtime.tv_sec = p->p_rtime.tv_sec; + exp->p_rtime.tv_usec = p->p_rtime.tv_usec; + exp->p_uticks = p->p_uticks ; + exp->p_sticks = p->p_sticks ; + exp->p_iticks = p->p_iticks ; + exp->p_traceflag = p->p_traceflag ; + exp->p_tracep = CAST_USER_ADDR_T(p->p_tracep); + exp->p_siglist = 0 ; /* No longer relevant */ + exp->p_textvp = CAST_USER_ADDR_T(p->p_textvp); + exp->p_holdcnt = 0 ; + exp->p_sigmask = 0 ; /* no longer avaialable */ + exp->p_sigignore = p->p_sigignore ; + exp->p_sigcatch = p->p_sigcatch ; + exp->p_priority = p->p_priority ; + exp->p_usrpri = p->p_usrpri ; + exp->p_nice = p->p_nice ; + bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN); + exp->p_comm[MAXCOMLEN] = '\0'; + exp->p_pgrp = CAST_USER_ADDR_T(p->p_pgrp); + exp->p_addr = USER_ADDR_NULL; + exp->p_xstat = p->p_xstat ; + exp->p_acflag = p->p_acflag ; + exp->p_ru = CAST_USER_ADDR_T(p->p_ru); /* XXX may be NULL */ } static void @@ -1154,20 +1557,21 @@ fill_proc(p, kp) fill_eproc(p, &kp->kp_eproc); } +static void +fill_user_proc(register struct proc *p, register struct user_kinfo_proc *kp) +{ + fill_user_externproc(p, &kp->kp_proc); + fill_user_eproc(p, &kp->kp_eproc); +} + int -kdebug_ops(name, namelen, where, sizep, p) -int *name; -u_int namelen; -char *where; -size_t *sizep; -struct proc *p; +kdebug_ops(int *name, u_int namelen, user_addr_t where, + size_t *sizep, struct proc *p) { - int size=*sizep; int ret=0; - extern int kdbg_control(int *name, u_int namelen, - char * where,size_t * sizep); - if (ret = suser(p->p_ucred, &p->p_acflag)) + ret = suser(kauth_cred_get(), &p->p_acflag); + if (ret) return(ret); switch(name[0]) { @@ -1189,25 +1593,23 @@ struct proc *p; ret = kdbg_control(name, namelen, where, sizep); break; default: - ret= EOPNOTSUPP; + ret= ENOTSUP; break; } return(ret); } +extern int pcsamples_control(int *name, u_int namelen, user_addr_t where, + size_t * sizep); + int -pcsamples_ops(name, namelen, where, sizep, p) -int *name; -u_int namelen; -char *where; -size_t *sizep; -struct proc *p; +pcsamples_ops(int *name, u_int namelen, user_addr_t where, + size_t *sizep, struct proc *p) { int ret=0; - extern int pcsamples_control(int *name, u_int namelen, - char * where,size_t * sizep); - if (ret = suser(p->p_ucred, &p->p_acflag)) + ret = suser(kauth_cred_get(), &p->p_acflag); + if (ret) return(ret); switch(name[0]) { @@ -1222,7 +1624,7 @@ struct proc *p; ret = pcsamples_control(name, namelen, where, sizep); break; default: - ret= EOPNOTSUPP; + ret= ENOTSUP; break; } return(ret); @@ -1233,56 +1635,45 @@ struct proc *p; * user stack down through the saved exec_path, whichever is smaller. */ int -sysctl_procargs(name, namelen, where, sizep, cur_proc) - int *name; - u_int namelen; - char *where; - size_t *sizep; - struct proc *cur_proc; +sysctl_procargs(int *name, u_int namelen, user_addr_t where, + size_t *sizep, struct proc *cur_proc) { return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 0); } static int -sysctl_procargs2(name, namelen, where, sizep, cur_proc) - int *name; - u_int namelen; - char *where; - size_t *sizep; - struct proc *cur_proc; +sysctl_procargs2(int *name, u_int namelen, user_addr_t where, + size_t *sizep, struct proc *cur_proc) { return sysctl_procargsx( name, namelen, where, sizep, cur_proc, 1); } static int -sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) - int *name; - u_int namelen; - char *where; - size_t *sizep; - struct proc *cur_proc; - int argc_yes; +sysctl_procargsx(int *name, __unused u_int namelen, user_addr_t where, + size_t *sizep, struct proc *cur_proc, int argc_yes) { - register struct proc *p; - register int needed = 0; - int buflen = where != NULL ? *sizep : 0; + struct proc *p; + int buflen = where != USER_ADDR_NULL ? *sizep : 0; int error = 0; struct vm_map *proc_map; struct task * task; vm_map_copy_t tmp; - vm_offset_t arg_addr; - vm_size_t arg_size; + user_addr_t arg_addr; + size_t arg_size; caddr_t data; - unsigned size; + int size; vm_offset_t copy_start, copy_end; - int *ip; kern_return_t ret; int pid; if (argc_yes) - buflen -= NBPW; /* reserve first word to return argc */ + buflen -= sizeof(int); /* reserve first word to return argc */ - if ((buflen <= 0) || (buflen > ARG_MAX)) { + /* we only care about buflen when where (oldp from sysctl) is not NULL. */ + /* when where (oldp from sysctl) is NULL and sizep (oldlenp from sysctl */ + /* is not NULL then the caller wants us to return the length needed to */ + /* hold the data we would return */ + if (where != USER_ADDR_NULL && (buflen <= 0 || buflen > ARG_MAX)) { return(EINVAL); } arg_size = buflen; @@ -1291,8 +1682,6 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) * Lookup process by pid */ pid = name[0]; - - restart: p = pfind(pid); if (p == NULL) { return(EINVAL); @@ -1311,10 +1700,35 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) if (!p->user_stack) return(EINVAL); - if ((p->p_ucred->cr_uid != cur_proc->p_ucred->cr_uid) - && suser(cur_proc->p_ucred, &cur_proc->p_acflag)) + if (where == USER_ADDR_NULL) { + /* caller only wants to know length of proc args data */ + if (sizep == NULL) + return(EFAULT); + + size = p->p_argslen; + if (argc_yes) { + size += sizeof(int); + } + else { + /* + * old PROCARGS will return the executable's path and plus some + * extra space for work alignment and data tags + */ + size += PATH_MAX + (6 * sizeof(int)); + } + size += (size & (sizeof(int) - 1)) ? (sizeof(int) - (size & (sizeof(int) - 1))) : 0; + *sizep = size; + return (0); + } + + if ((kauth_cred_getuid(p->p_ucred) != kauth_cred_getuid(kauth_cred_get())) + && suser(kauth_cred_get(), &cur_proc->p_acflag)) return (EINVAL); - arg_addr = (vm_offset_t)(p->user_stack - arg_size); + + if ((u_int)arg_size > p->p_argslen) + arg_size = round_page(p->p_argslen); + + arg_addr = p->user_stack - arg_size; /* @@ -1327,30 +1741,32 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) return(EINVAL); /* - * A regular task_reference call can block, causing the funnel - * to be dropped and allowing the proc/task to get freed. - * Instead, we issue a non-blocking attempt at the task reference, - * and look up the proc/task all over again if that fails. + * Once we have a task reference we can convert that into a + * map reference, which we will use in the calls below. The + * task/process may change its map after we take this reference + * (see execve), but the worst that will happen then is a return + * of stale info (which is always a possibility). */ - if (!task_reference_try(task)) { - mutex_pause(); - goto restart; - } + task_reference(task); + proc_map = get_task_map_reference(task); + task_deallocate(task); + if (proc_map == NULL) + return(EINVAL); - ret = kmem_alloc(kernel_map, ©_start, round_page_32(arg_size)); + + ret = kmem_alloc(kernel_map, ©_start, round_page(arg_size)); if (ret != KERN_SUCCESS) { - task_deallocate(task); + vm_map_deallocate(proc_map); return(ENOMEM); } - proc_map = get_task_map(task); - copy_end = round_page_32(copy_start + arg_size); + copy_end = round_page(copy_start + arg_size); - if( vm_map_copyin(proc_map, trunc_page(arg_addr), round_page_32(arg_size), - FALSE, &tmp) != KERN_SUCCESS) { - task_deallocate(task); + if( vm_map_copyin(proc_map, (vm_map_address_t)arg_addr, + (vm_map_size_t)arg_size, FALSE, &tmp) != KERN_SUCCESS) { + vm_map_deallocate(proc_map); kmem_free(kernel_map, copy_start, - round_page_32(arg_size)); + round_page(arg_size)); return (EIO); } @@ -1358,28 +1774,29 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) * Now that we've done the copyin from the process' * map, we can release the reference to it. */ - task_deallocate(task); + vm_map_deallocate(proc_map); - if( vm_map_copy_overwrite(kernel_map, copy_start, - tmp, FALSE) != KERN_SUCCESS) { + if( vm_map_copy_overwrite(kernel_map, + (vm_map_address_t)copy_start, + tmp, FALSE) != KERN_SUCCESS) { kmem_free(kernel_map, copy_start, - round_page_32(arg_size)); + round_page(arg_size)); return (EIO); } - data = (caddr_t) (copy_end - arg_size); - - if (buflen > p->p_argslen) { - data = &data[buflen - p->p_argslen]; + if (arg_size > p->p_argslen) { + data = (caddr_t) (copy_end - p->p_argslen); size = p->p_argslen; } else { - size = buflen; + data = (caddr_t) (copy_end - arg_size); + size = arg_size; } if (argc_yes) { /* Put processes argc as the first word in the copyout buffer */ suword(where, p->p_argc); - error = copyout(data, where + NBPW, size); + error = copyout(data, (where + sizeof(int)), size); + size += sizeof(int); } else { error = copyout(data, where, size); @@ -1391,14 +1808,13 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) * * Note: we keep all pointers&sizes aligned to word boundries */ - - if ( (! error) && (buflen > p->p_argslen) ) + if ( (! error) && (buflen > 0 && (u_int)buflen > p->p_argslen) ) { - int binPath_sz; + int binPath_sz, alignedBinPath_sz = 0; int extraSpaceNeeded, addThis; - char * placeHere; + user_addr_t placeHere; char * str = (char *) data; - unsigned int max_len = size; + int max_len = size; /* Some apps are really bad about messing up their stacks So, we have to be extra careful about getting the length @@ -1413,31 +1829,32 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) while ( (binPath_sz < max_len-1) && (*str++ != 0) ) binPath_sz++; + /* If we have a NUL terminator, copy it, too */ if (binPath_sz < max_len-1) binPath_sz += 1; /* Pre-Flight the space requiremnts */ /* Account for the padding that fills out binPath to the next word */ - binPath_sz += (binPath_sz & (NBPW-1)) ? (NBPW-(binPath_sz & (NBPW-1))) : 0; + alignedBinPath_sz += (binPath_sz & (sizeof(int)-1)) ? (sizeof(int)-(binPath_sz & (sizeof(int)-1))) : 0; placeHere = where + size; /* Account for the bytes needed to keep placeHere word aligned */ - addThis = ((unsigned long)placeHere & (NBPW-1)) ? (NBPW-((unsigned long)placeHere & (NBPW-1))) : 0; + addThis = (placeHere & (sizeof(int)-1)) ? (sizeof(int)-(placeHere & (sizeof(int)-1))) : 0; /* Add up all the space that is needed */ - extraSpaceNeeded = binPath_sz + addThis + (4 * NBPW); + extraSpaceNeeded = alignedBinPath_sz + addThis + binPath_sz + (4 * sizeof(int)); /* is there is room to tack on argv[0]? */ - if ( (buflen & ~(NBPW-1)) >= ( p->p_argslen + extraSpaceNeeded )) + if ( (buflen & ~(sizeof(int)-1)) >= ( p->p_argslen + extraSpaceNeeded )) { placeHere += addThis; suword(placeHere, 0); - placeHere += NBPW; + placeHere += sizeof(int); suword(placeHere, 0xBFFF0000); - placeHere += NBPW; + placeHere += sizeof(int); suword(placeHere, 0); - placeHere += NBPW; + placeHere += sizeof(int); error = copyout(data, placeHere, binPath_sz); if ( ! error ) { @@ -1456,7 +1873,7 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) return(error); } - if (where != NULL) + if (where != USER_ADDR_NULL) *sizep = size; return (0); } @@ -1469,7 +1886,7 @@ sysctl_procargsx(name, namelen, where, sizep, cur_proc, argc_yes) * limit. */ static int -sysctl_aiomax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_aiomax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen) { int error = 0; int new_value; @@ -1502,7 +1919,7 @@ sysctl_aiomax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) * limit. */ static int -sysctl_aioprocmax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_aioprocmax(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen ) { int error = 0; int new_value = 0; @@ -1534,7 +1951,7 @@ sysctl_aioprocmax( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) * We only allow an increase in the number of worker threads. */ static int -sysctl_aiothreads( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_aiothreads(user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen) { int error = 0; int new_value; @@ -1568,20 +1985,21 @@ sysctl_aiothreads( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) * Makes sure per UID limit is less than the system wide limit. */ static int -sysctl_maxprocperuid( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_maxprocperuid(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen) { int error = 0; int new_value; - if ( oldp != NULL && *oldlenp < sizeof(int) ) + if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) return (ENOMEM); - if ( newp != NULL && newlen != sizeof(int) ) + if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) return (EINVAL); *oldlenp = sizeof(int); - if ( oldp != NULL ) + if ( oldp != USER_ADDR_NULL ) error = copyout( &maxprocperuid, oldp, sizeof(int) ); - if ( error == 0 && newp != NULL ) { + if ( error == 0 && newp != USER_ADDR_NULL ) { error = copyin( newp, &new_value, sizeof(int) ); if ( error == 0 ) { AUDIT_ARG(value, new_value); @@ -1590,7 +2008,7 @@ sysctl_maxprocperuid( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) else error = EINVAL; } - else + else error = EINVAL; } return( error ); @@ -1604,20 +2022,21 @@ sysctl_maxprocperuid( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) * Makes sure per process limit is less than the system-wide limit. */ static int -sysctl_maxfilesperproc( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_maxfilesperproc(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen) { int error = 0; int new_value; - if ( oldp != NULL && *oldlenp < sizeof(int) ) + if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) return (ENOMEM); - if ( newp != NULL && newlen != sizeof(int) ) + if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) return (EINVAL); *oldlenp = sizeof(int); - if ( oldp != NULL ) + if ( oldp != USER_ADDR_NULL ) error = copyout( &maxfilesperproc, oldp, sizeof(int) ); - if ( error == 0 && newp != NULL ) { + if ( error == 0 && newp != USER_ADDR_NULL ) { error = copyin( newp, &new_value, sizeof(int) ); if ( error == 0 ) { AUDIT_ARG(value, new_value); @@ -1641,25 +2060,26 @@ sysctl_maxfilesperproc( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) * limit set at kernel compilation. */ static int -sysctl_maxproc( void *oldp, size_t *oldlenp, void *newp, size_t newlen ) +sysctl_maxproc(user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen ) { int error = 0; int new_value; - if ( oldp != NULL && *oldlenp < sizeof(int) ) + if ( oldp != USER_ADDR_NULL && *oldlenp < sizeof(int) ) return (ENOMEM); - if ( newp != NULL && newlen != sizeof(int) ) + if ( newp != USER_ADDR_NULL && newlen != sizeof(int) ) return (EINVAL); *oldlenp = sizeof(int); - if ( oldp != NULL ) + if ( oldp != USER_ADDR_NULL ) error = copyout( &maxproc, oldp, sizeof(int) ); - if ( error == 0 && newp != NULL ) { + if ( error == 0 && newp != USER_ADDR_NULL ) { error = copyin( newp, &new_value, sizeof(int) ); if ( error == 0 ) { AUDIT_ARG(value, new_value); if ( new_value <= hard_maxproc && new_value > 0 ) - maxproc = new_value; + maxproc = new_value; else error = EINVAL; } diff --git a/bsd/kern/kern_time.c b/bsd/kern/kern_time.c index 0a4d9e52e..07354b8b7 100644 --- a/bsd/kern/kern_time.c +++ b/bsd/kern/kern_time.c @@ -59,18 +59,38 @@ #include #include #include -#include +#include +#include #include -#include +#include +#include +#include #include +#include #define HZ 100 /* XXX */ -volatile struct timeval time; /* simple lock used to access timezone, tz structure */ -decl_simple_lock_data(, tz_slock); +lck_spin_t * tz_slock; +lck_grp_t * tz_slock_grp; +lck_attr_t * tz_slock_attr; +lck_grp_attr_t *tz_slock_grp_attr; + +static void setthetime( + struct timeval *tv); + +void time_zone_slock_init(void); + +int gettimeofday(struct proc *p, +#ifdef __ppc__ + struct ppc_gettimeofday_args *uap, +#else + struct gettimeofday_args *uap, +#endif + register_t *retval); + /* * Time of day and interval timer support. * @@ -79,177 +99,183 @@ decl_simple_lock_data(, tz_slock); * here provide support for adding and subtracting timeval structures * and decrementing interval timers, optionally reloading the interval * timers when they expire. + * + * XXX Y2038 bug because of clock_get_calendar_microtime() first argument */ -struct gettimeofday_args{ - struct timeval *tp; - struct timezone *tzp; -}; /* ARGSUSED */ int -gettimeofday(p, uap, retval) - struct proc *p; - register struct gettimeofday_args *uap; - register_t *retval; +gettimeofday(__unused struct proc *p, +#ifdef __ppc__ + register struct ppc_gettimeofday_args *uap, +#else + register struct gettimeofday_args *uap, +#endif + __unused register_t *retval) { struct timeval atv; int error = 0; - extern simple_lock_data_t tz_slock; struct timezone ltz; /* local copy */ /* NOTE THIS implementation is for non ppc architectures only */ if (uap->tp) { - clock_get_calendar_microtime(&atv.tv_sec, &atv.tv_usec); - if (error = copyout((caddr_t)&atv, (caddr_t)uap->tp, - sizeof (atv))) + clock_get_calendar_microtime((uint32_t *)&atv.tv_sec, &atv.tv_usec); + if (IS_64BIT_PROCESS(p)) { + struct user_timeval user_atv; + user_atv.tv_sec = atv.tv_sec; + user_atv.tv_usec = atv.tv_usec; + /* + * This cast is not necessary for PPC, but is + * mostly harmless. + */ + error = copyout(&user_atv, CAST_USER_ADDR_T(uap->tp), sizeof(struct user_timeval)); + } else { + error = copyout(&atv, CAST_USER_ADDR_T(uap->tp), sizeof(struct timeval)); + } + if (error) return(error); } if (uap->tzp) { - usimple_lock(&tz_slock); + lck_spin_lock(tz_slock); ltz = tz; - usimple_unlock(&tz_slock); - error = copyout((caddr_t)<z, (caddr_t)uap->tzp, + lck_spin_unlock(tz_slock); + error = copyout((caddr_t)<z, CAST_USER_ADDR_T(uap->tzp), sizeof (tz)); } return(error); } -struct settimeofday_args { - struct timeval *tv; - struct timezone *tzp; -}; +/* + * XXX Y2038 bug because of setthetime() argument + */ /* ARGSUSED */ int -settimeofday(p, uap, retval) - struct proc *p; - struct settimeofday_args *uap; - register_t *retval; +settimeofday(struct proc *p, struct settimeofday_args *uap, __unused register_t *retval) { struct timeval atv; struct timezone atz; - int error, s; - extern simple_lock_data_t tz_slock; + int error; - if (error = suser(p->p_ucred, &p->p_acflag)) - return (error); - /* Verify all parameters before changing time. */ - if (uap->tv && (error = copyin((caddr_t)uap->tv, - (caddr_t)&atv, sizeof(atv)))) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); - if (uap->tzp && (error = copyin((caddr_t)uap->tzp, - (caddr_t)&atz, sizeof(atz)))) + /* Verify all parameters before changing time */ + if (uap->tv) { + if (IS_64BIT_PROCESS(p)) { + struct user_timeval user_atv; + error = copyin(uap->tv, &user_atv, sizeof(struct user_timeval)); + atv.tv_sec = user_atv.tv_sec; + atv.tv_usec = user_atv.tv_usec; + } else { + error = copyin(uap->tv, &atv, sizeof(struct timeval)); + } + if (error) + return (error); + } + if (uap->tzp && (error = copyin(uap->tzp, (caddr_t)&atz, sizeof(atz)))) return (error); - if (uap->tv) + if (uap->tv) { + timevalfix(&atv); + if (atv.tv_sec < 0 || (atv.tv_sec == 0 && atv.tv_usec < 0)) + return (EPERM); setthetime(&atv); + } if (uap->tzp) { - usimple_lock(&tz_slock); + lck_spin_lock(tz_slock); tz = atz; - usimple_unlock(&tz_slock); + lck_spin_unlock(tz_slock); } return (0); } -setthetime(tv) - struct timeval *tv; +static void +setthetime( + struct timeval *tv) { - long delta = tv->tv_sec - time.tv_sec; - clock_set_calendar_microtime(tv->tv_sec, tv->tv_usec); - boottime.tv_sec += delta; -#if NFSCLIENT || NFSSERVER - lease_updatetime(delta); -#endif } -struct adjtime_args { - struct timeval *delta; - struct timeval *olddelta; -}; +/* + * XXX Y2038 bug because of clock_adjtime() first argument + */ /* ARGSUSED */ int -adjtime(p, uap, retval) - struct proc *p; - register struct adjtime_args *uap; - register_t *retval; +adjtime(struct proc *p, register struct adjtime_args *uap, __unused register_t *retval) { struct timeval atv; int error; - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); - if (error = copyin((caddr_t)uap->delta, - (caddr_t)&atv, sizeof (struct timeval))) + if (IS_64BIT_PROCESS(p)) { + struct user_timeval user_atv; + error = copyin(uap->delta, &user_atv, sizeof(struct user_timeval)); + atv.tv_sec = user_atv.tv_sec; + atv.tv_usec = user_atv.tv_usec; + } else { + error = copyin(uap->delta, &atv, sizeof(struct timeval)); + } + if (error) return (error); - /* - * Compute the total correction and the rate at which to apply it. - */ - clock_adjtime(&atv.tv_sec, &atv.tv_usec); + /* + * Compute the total correction and the rate at which to apply it. + */ + clock_adjtime((int32_t *)&atv.tv_sec, &atv.tv_usec); if (uap->olddelta) { - (void) copyout((caddr_t)&atv, - (caddr_t)uap->olddelta, sizeof (struct timeval)); + if (IS_64BIT_PROCESS(p)) { + struct user_timeval user_atv; + user_atv.tv_sec = atv.tv_sec; + user_atv.tv_usec = atv.tv_usec; + error = copyout(&user_atv, uap->olddelta, sizeof(struct user_timeval)); + } else { + error = copyout(&atv, uap->olddelta, sizeof(struct timeval)); + } } return (0); } /* - * Initialze the time of day register. - * Trust the RTC except for the case where it is set before - * the UNIX epoch. In that case use the the UNIX epoch. - * The argument passed in is ignored. + * Verify the calendar value. If negative, + * reset to zero (the epoch). */ void -inittodr(base) - time_t base; +inittodr( + __unused time_t base) { struct timeval tv; /* * Assertion: * The calendar has already been - * set up from the battery clock. + * set up from the platform clock. * * The value returned by microtime() * is gotten from the calendar. */ microtime(&tv); - time = tv; - boottime.tv_sec = tv.tv_sec; - boottime.tv_usec = 0; - - /* - * If the RTC does not have acceptable value, i.e. time before - * the UNIX epoch, set it to the UNIX epoch - */ - if (tv.tv_sec < 0) { + if (tv.tv_sec < 0 || tv.tv_usec < 0) { printf ("WARNING: preposterous time in Real Time Clock"); - time.tv_sec = 0; /* the UNIX epoch */ - time.tv_usec = 0; - setthetime(&time); - boottime = time; + tv.tv_sec = 0; /* the UNIX epoch */ + tv.tv_usec = 0; + setthetime(&tv); printf(" -- CHECK AND RESET THE DATE!\n"); } - - return; } -void timevaladd( - struct timeval *t1, - struct timeval *t2); -void timevalsub( - struct timeval *t1, - struct timeval *t2); -void timevalfix( - struct timeval *t1); +time_t +boottime_sec(void) +{ + uint32_t sec, nanosec; + clock_get_boottime_nanotime(&sec, &nanosec); + return (sec); +} -uint64_t - tvtoabstime( - struct timeval *tvp); +uint64_t tvtoabstime(struct timeval *tvp); /* * Get value of an interval timer. The process virtual and @@ -271,16 +297,9 @@ uint64_t * absolute time when the timer should go off. */ -struct getitimer_args { - u_int which; - struct itimerval *itv; -}; /* ARGSUSED */ int -getitimer(p, uap, retval) - struct proc *p; - register struct getitimer_args *uap; - register_t *retval; +getitimer(struct proc *p, register struct getitimer_args *uap, __unused register_t *retval) { struct itimerval aitv; @@ -310,15 +329,18 @@ getitimer(p, uap, retval) else aitv = p->p_stats->p_timer[uap->which]; - return (copyout((caddr_t)&aitv, - (caddr_t)uap->itv, sizeof (struct itimerval))); + if (IS_64BIT_PROCESS(p)) { + struct user_itimerval user_itv; + user_itv.it_interval.tv_sec = aitv.it_interval.tv_sec; + user_itv.it_interval.tv_usec = aitv.it_interval.tv_usec; + user_itv.it_value.tv_sec = aitv.it_value.tv_sec; + user_itv.it_value.tv_usec = aitv.it_value.tv_usec; + return (copyout((caddr_t)&user_itv, uap->itv, sizeof (struct user_itimerval))); + } else { + return (copyout((caddr_t)&aitv, uap->itv, sizeof (struct itimerval))); + } } -struct setitimer_args { - u_int which; - struct itimerval *itv; - struct itimerval *oitv; -}; /* ARGSUSED */ int setitimer(p, uap, retval) @@ -327,28 +349,38 @@ setitimer(p, uap, retval) register_t *retval; { struct itimerval aitv; - register struct itimerval *itvp; + user_addr_t itvp; int error; if (uap->which > ITIMER_PROF) return (EINVAL); - if ((itvp = uap->itv) && - (error = copyin((caddr_t)itvp, - (caddr_t)&aitv, sizeof (struct itimerval)))) - return (error); - if ((uap->itv = uap->oitv) && (error = getitimer(p, uap, retval))) + if ((itvp = uap->itv)) { + if (IS_64BIT_PROCESS(p)) { + struct user_itimerval user_itv; + if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof (struct user_itimerval)))) + return (error); + aitv.it_interval.tv_sec = user_itv.it_interval.tv_sec; + aitv.it_interval.tv_usec = user_itv.it_interval.tv_usec; + aitv.it_value.tv_sec = user_itv.it_value.tv_sec; + aitv.it_value.tv_usec = user_itv.it_value.tv_usec; + } else { + if ((error = copyin(itvp, (caddr_t)&aitv, sizeof (struct itimerval)))) + return (error); + } + } + if ((uap->itv = uap->oitv) && (error = getitimer(p, (struct getitimer_args *)uap, retval))) return (error); if (itvp == 0) return (0); if (itimerfix(&aitv.it_value) || itimerfix(&aitv.it_interval)) return (EINVAL); if (uap->which == ITIMER_REAL) { - thread_call_func_cancel(realitexpire, (void *)p->p_pid, FALSE); + thread_call_func_cancel((thread_call_func_t)realitexpire, (void *)p->p_pid, FALSE); if (timerisset(&aitv.it_value)) { microuptime(&p->p_rtime); timevaladd(&p->p_rtime, &aitv.it_value); thread_call_func_delayed( - realitexpire, (void *)p->p_pid, + (thread_call_func_t)realitexpire, (void *)p->p_pid, tvtoabstime(&p->p_rtime)); } else @@ -376,8 +408,9 @@ realitexpire( { register struct proc *p; struct timeval now; - boolean_t funnel_state = thread_funnel_set(kernel_flock, TRUE); + boolean_t funnel_state; + funnel_state = thread_funnel_set(kernel_flock, TRUE); p = pfind((pid_t)pid); if (p == NULL) { (void) thread_funnel_set(kernel_flock, FALSE); @@ -410,7 +443,7 @@ realitexpire( psignal(p, SIGALRM); - thread_call_func_delayed(realitexpire, pid, tvtoabstime(&p->p_rtime)); + thread_call_func_delayed((thread_call_func_t)realitexpire, pid, tvtoabstime(&p->p_rtime)); (void) thread_funnel_set(kernel_flock, FALSE); } @@ -527,14 +560,14 @@ void microtime( struct timeval *tvp) { - clock_get_calendar_microtime(&tvp->tv_sec, &tvp->tv_usec); + clock_get_calendar_microtime((uint32_t *)&tvp->tv_sec, &tvp->tv_usec); } void microuptime( struct timeval *tvp) { - clock_get_system_microtime(&tvp->tv_sec, &tvp->tv_usec); + clock_get_system_microtime((uint32_t *)&tvp->tv_sec, &tvp->tv_usec); } /* @@ -544,14 +577,14 @@ void nanotime( struct timespec *tsp) { - clock_get_calendar_nanotime((uint32_t *)&tsp->tv_sec, &tsp->tv_nsec); + clock_get_calendar_nanotime((uint32_t *)&tsp->tv_sec, (uint32_t *)&tsp->tv_nsec); } void nanouptime( struct timespec *tsp) { - clock_get_system_nanotime((uint32_t *)&tsp->tv_sec, &tsp->tv_nsec); + clock_get_system_nanotime((uint32_t *)&tsp->tv_sec, (uint32_t *)&tsp->tv_nsec); } uint64_t @@ -570,9 +603,17 @@ tvtoabstime( void time_zone_slock_init(void) { - extern simple_lock_data_t tz_slock; + /* allocate lock group attribute and group */ + tz_slock_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(tz_slock_grp_attr); - simple_lock_init(&tz_slock); + tz_slock_grp = lck_grp_alloc_init("tzlock", tz_slock_grp_attr); + /* Allocate lock attribute */ + tz_slock_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(tz_slock_attr); + /* Allocate the spin lock */ + tz_slock = lck_spin_alloc_init(tz_slock_grp, tz_slock_attr); } + diff --git a/bsd/kern/kern_xxx.c b/bsd/kern/kern_xxx.c index ebc0af446..470a220e8 100644 --- a/bsd/kern/kern_xxx.c +++ b/bsd/kern/kern_xxx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,7 +60,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -68,142 +69,25 @@ #include -#include +#include +#include -#if COMPAT_43 -/* ARGSUSED */ int -ogethostid(p, uap, retval) -struct proc *p; -void *uap; -register_t *retval; -{ - - *retval = hostid; - return 0; -} - -struct osethostid_args { - long hostid; -}; -/* ARGSUSED */ -int -osethostid(p, uap, retval) -struct proc *p; -register struct osethostid_args *uap; -register_t *retval; -{ - int error; - - if (error = suser(p->p_ucred, &p->p_acflag)) - return (error); - hostid = uap->hostid; - return (0); - -} - -struct ogethostname_args { - char *hostname; - u_int len; -}; -/* ARGSUSED */ -int -ogethostname(p, uap, retval) -struct proc *p; -register struct ogethostname_args *uap; -register_t *retval; -{ - int name; - - name = KERN_HOSTNAME; - - return (kern_sysctl(&name, 1, uap->hostname, &uap->len, 0, 0)); -} - -struct osethostname_args { - char *hostname; - u_int len; -}; -/* ARGSUSED */ -int -osethostname(p, uap, retval) -struct proc *p; -register struct osethostname_args *uap; -register_t *retval; -{ - int name; - int error; - - if (error = suser(p->p_ucred, &p->p_acflag)) - return (error); - - name = KERN_HOSTNAME; - return (kern_sysctl(&name, 1, 0, 0, uap->hostname, - uap->len)); -} - -struct ogetdomainname_args { - char *domainname; - int len; -}; -/* ARGSUSED */ -int -ogetdomainname(p, uap, retval) -struct proc *p; -register struct ogetdomainname_args *uap; -register_t *retval; -{ - int name; - - name = KERN_DOMAINNAME; - return (kern_sysctl(&name, 1, uap->domainname, - &uap->len, 0, 0)); -} - -struct osetdomainname_args { - char *domainname; - u_int len; -}; -/* ARGSUSED */ -int -osetdomainname(p, uap, retval) -struct proc *p; -register struct osetdomainname_args *uap; -register_t *retval; -{ - int name; - int error; - - if (error = suser(p->p_ucred, &p->p_acflag)) - return (error); - name = KERN_DOMAINNAME; - return (kern_sysctl(&name, 1, 0, 0, uap->domainname, - uap->len)); -} -#endif /* COMPAT_43 */ - -struct reboot_args { - int opt; - char *command; -}; - -reboot(p, uap, retval) -struct proc *p; -register struct reboot_args *uap; -register_t *retval; +reboot(struct proc *p, register struct reboot_args *uap, __unused register_t *retval) { char command[64]; int error; int dummy=0; AUDIT_ARG(cmd, uap->opt); + command[0] = '\0'; - if (error = suser(p->p_cred->pc_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); if (uap->opt & RB_COMMAND) - error = copyinstr((void *)uap->command, + error = copyinstr(uap->command, (void *)command, sizeof(command), (size_t *)&dummy); if (!error) { SET(p->p_flag, P_REBOOT); /* No more signals for this proc */ diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c new file mode 100644 index 000000000..15c290aab --- /dev/null +++ b/bsd/kern/kpi_mbuf.c @@ -0,0 +1,939 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define __KPI__ +//#include + +#include +#include +#include +#include +#include +#include +#include + +void mbuf_tag_id_first_last(u_long *first, u_long *last); +errno_t mbuf_tag_id_find_internal(const char *string, u_long *out_id, int create); + +static const mbuf_flags_t mbuf_flags_mask = MBUF_EXT | MBUF_PKTHDR | MBUF_EOR | + MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG | + MBUF_LASTFRAG | MBUF_PROMISC; + +void* mbuf_data(mbuf_t mbuf) +{ + return m_mtod(mbuf); +} + +void* mbuf_datastart(mbuf_t mbuf) +{ + if (mbuf->m_flags & M_EXT) + return mbuf->m_ext.ext_buf; + if (mbuf->m_flags & M_PKTHDR) + return mbuf->m_pktdat; + return mbuf->m_dat; +} + +errno_t mbuf_setdata(mbuf_t mbuf, void* data, size_t len) +{ + size_t start = (size_t)((char*)mbuf_datastart(mbuf)); + size_t maxlen = mbuf_maxlen(mbuf); + + if ((size_t)data < start || ((size_t)data) + len > start + maxlen) + return EINVAL; + mbuf->m_data = data; + mbuf->m_len = len; + + return 0; +} + +errno_t mbuf_align_32(mbuf_t mbuf, size_t len) +{ + if ((mbuf->m_flags & M_EXT) != 0 && m_mclhasreference(mbuf)) + return ENOTSUP; + mbuf->m_data = mbuf_datastart(mbuf); + mbuf->m_data += ((mbuf_trailingspace(mbuf) - len) &~ (sizeof(u_int32_t) - 1)); + + return 0; +} + +addr64_t mbuf_data_to_physical(void* ptr) +{ + return (addr64_t)mcl_to_paddr(ptr); +} + +errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + *mbuf = m_get(how, type); + + return (*mbuf == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + *mbuf = m_gethdr(how, type); + + return (*mbuf == NULL) ? ENOMEM : 0; +} + +extern struct mbuf * m_mbigget(struct mbuf *m, int nowait); + +errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, mbuf_t* mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + errno_t error = 0; + int created = 0; + + if (mbuf == NULL) + return EINVAL; + if (*mbuf == NULL) { + *mbuf = m_get(how, type); + if (*mbuf == NULL) + return ENOMEM; + created = 1; + } + /* + * At the time this code was written, m_mclget and m_mbigget would always + * return the same value that was passed in to it. + */ + if (size == MCLBYTES) { + *mbuf = m_mclget(*mbuf, how); + } else if (size == NBPG) { + *mbuf = m_mbigget(*mbuf, how); + } else { + error = EINVAL; + goto out; + } + if (*mbuf == NULL || ((*mbuf)->m_flags & M_EXT) == 0) + error = ENOMEM; +out: + if (created && error != 0) { + error = ENOMEM; + mbuf_free(*mbuf); + *mbuf = NULL; + } + return error; +} + +errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + errno_t error = 0; + int created = 0; + if (mbuf == NULL) return EINVAL; + if (*mbuf == NULL) { + error = mbuf_get(how, type, mbuf); + if (error) + return error; + created = 1; + } + + /* + * At the time this code was written, m_mclget would always + * return the same value that was passed in to it. + */ + *mbuf = m_mclget(*mbuf, how); + + if (created && ((*mbuf)->m_flags & M_EXT) == 0) { + mbuf_free(*mbuf); + *mbuf = NULL; + } + if (*mbuf == NULL || ((*mbuf)->m_flags & M_EXT) == 0) + error = ENOMEM; + return error; +} + + +errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + errno_t error = 0; + + *mbuf = m_getpacket_how(how); + + if (*mbuf == NULL) { + if (how == MBUF_WAITOK) + error = ENOMEM; + else + error = EWOULDBLOCK; + } + + return error; +} + +mbuf_t mbuf_free(mbuf_t mbuf) +{ + return m_free(mbuf); +} + +void mbuf_freem(mbuf_t mbuf) +{ + m_freem(mbuf); +} + +int mbuf_freem_list(mbuf_t mbuf) +{ + return m_freem_list(mbuf); +} + +size_t mbuf_leadingspace(mbuf_t mbuf) +{ + return m_leadingspace(mbuf); +} + +size_t mbuf_trailingspace(mbuf_t mbuf) +{ + return m_trailingspace(mbuf); +} + +/* Manipulation */ +errno_t mbuf_copym(mbuf_t src, size_t offset, size_t len, + mbuf_how_t how, mbuf_t *new_mbuf) +{ + /* Must set *mbuf to NULL in failure case */ + *new_mbuf = m_copym(src, offset, len, how); + + return (*new_mbuf == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_dup(mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf) +{ + /* Must set *new_mbuf to NULL in failure case */ + *new_mbuf = m_dup(src, how); + + return (*new_mbuf == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_prepend(mbuf_t *orig, size_t len, mbuf_how_t how) +{ + /* Must set *orig to NULL in failure case */ + *orig = m_prepend_2(*orig, len, how); + + return (*orig == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_split(mbuf_t src, size_t offset, + mbuf_how_t how, mbuf_t *new_mbuf) +{ + /* Must set *new_mbuf to NULL in failure case */ + *new_mbuf = m_split(src, offset, how); + + return (*new_mbuf == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_pullup(mbuf_t *mbuf, size_t len) +{ + /* Must set *mbuf to NULL in failure case */ + *mbuf = m_pullup(*mbuf, len); + + return (*mbuf == NULL) ? ENOMEM : 0; +} + +errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t len, mbuf_t *location) +{ + /* Must set *location to NULL in failure case */ + int new_offset; + *location = m_pulldown(src, *offset, len, &new_offset); + *offset = new_offset; + + return (*location == NULL) ? ENOMEM : 0; +} + +void mbuf_adj(mbuf_t mbuf, int len) +{ + m_adj(mbuf, len); +} + +errno_t mbuf_copydata(mbuf_t m, size_t off, size_t len, void* out_data) +{ + /* Copied m_copydata, added error handling (don't just panic) */ + int count; + + while (off > 0) { + if (m == 0) + return EINVAL; + if (off < (size_t)m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == 0) + return EINVAL; + count = m->m_len - off > len ? len : m->m_len - off; + bcopy(mtod(m, caddr_t) + off, out_data, count); + len -= count; + out_data = ((char*)out_data) + count; + off = 0; + m = m->m_next; + } + + return 0; +} + +int mbuf_mclref(mbuf_t mbuf) +{ + return m_mclref(mbuf); +} + +int mbuf_mclunref(mbuf_t mbuf) +{ + return m_mclunref(mbuf); +} + +int mbuf_mclhasreference(mbuf_t mbuf) +{ + if ((mbuf->m_flags & M_EXT)) + return m_mclhasreference(mbuf); + else + return 0; +} + + +/* mbuf header */ +mbuf_t mbuf_next(mbuf_t mbuf) +{ + return mbuf->m_next; +} + +errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next) +{ + if (next && ((next)->m_nextpkt != NULL || + (next)->m_type == MT_FREE)) return EINVAL; + mbuf->m_next = next; + + return 0; +} + +mbuf_t mbuf_nextpkt(mbuf_t mbuf) +{ + return mbuf->m_nextpkt; +} + +void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt) +{ + mbuf->m_nextpkt = nextpkt; +} + +size_t mbuf_len(mbuf_t mbuf) +{ + return mbuf->m_len; +} + +void mbuf_setlen(mbuf_t mbuf, size_t len) +{ + mbuf->m_len = len; +} + +size_t mbuf_maxlen(mbuf_t mbuf) +{ + if (mbuf->m_flags & M_EXT) + return mbuf->m_ext.ext_size; + return &mbuf->m_dat[MLEN] - ((char*)mbuf_datastart(mbuf)); +} + +mbuf_type_t mbuf_type(mbuf_t mbuf) +{ + return mbuf->m_type; +} + +errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type) +{ + if (new_type == MBUF_TYPE_FREE) return EINVAL; + + m_mchtype(mbuf, new_type); + + return 0; +} + +mbuf_flags_t mbuf_flags(mbuf_t mbuf) +{ + return mbuf->m_flags & mbuf_flags_mask; +} + +errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags) +{ + if ((flags & ~mbuf_flags_mask) != 0) return EINVAL; + mbuf->m_flags = flags | + (mbuf->m_flags & ~mbuf_flags_mask); + + return 0; +} + +errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, mbuf_flags_t mask) +{ + if (((flags | mask) & ~mbuf_flags_mask) != 0) return EINVAL; + + mbuf->m_flags = (flags & mask) | (mbuf->m_flags & ~mask); + + return 0; +} + +errno_t mbuf_copy_pkthdr(mbuf_t dest, mbuf_t src) +{ + if (((src)->m_flags & M_PKTHDR) == 0) + return EINVAL; + + m_copy_pkthdr(dest, src); + + return 0; +} + +size_t mbuf_pkthdr_len(mbuf_t mbuf) +{ + return mbuf->m_pkthdr.len; +} + +void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len) +{ + mbuf->m_pkthdr.len = len; +} + +ifnet_t mbuf_pkthdr_rcvif(mbuf_t mbuf) +{ + // If we reference count ifnets, we should take a reference here before returning + return mbuf->m_pkthdr.rcvif; +} + +errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifnet) +{ + /* May want to walk ifnet list to determine if interface is valid */ + mbuf->m_pkthdr.rcvif = (struct ifnet*)ifnet; + return 0; +} + +void* mbuf_pkthdr_header(mbuf_t mbuf) +{ + return mbuf->m_pkthdr.header; +} + +void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header) +{ + mbuf->m_pkthdr.header = (void*)header; +} + +/* mbuf aux data */ +errno_t mbuf_aux_add(mbuf_t mbuf, int family, mbuf_type_t type, mbuf_t *aux_mbuf) +{ + *aux_mbuf = m_aux_add(mbuf, family, type); + return (*aux_mbuf == NULL) ? ENOMEM : 0; +} + +mbuf_t mbuf_aux_find(mbuf_t mbuf, int family, mbuf_type_t type) +{ + return m_aux_find(mbuf, family, type); +} + +void mbuf_aux_delete(mbuf_t mbuf, mbuf_t aux) +{ + m_aux_delete(mbuf, aux); +} + +void +mbuf_inbound_modified(mbuf_t mbuf) +{ + /* Invalidate hardware generated checksum flags */ + mbuf->m_pkthdr.csum_flags = 0; +} + +extern void in_cksum_offset(struct mbuf* m, size_t ip_offset); +extern void in_delayed_cksum_offset(struct mbuf *m, int ip_offset); + +void +mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, size_t protocol_offset) +{ + if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) == 0) + return; + + /* Generate the packet in software, client needs it */ + switch (protocol_family) { + case PF_INET: + if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum_offset(mbuf, protocol_offset); + } + + if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IP) { + in_cksum_offset(mbuf, protocol_offset); + } + + mbuf->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DELAY_IP); + break; + + default: + /* + * Not sure what to do here if anything. + * Hardware checksum code looked pretty IPv4 specific. + */ + if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) != 0) + panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 packet (%d)!\n", protocol_family); + } +} + +errno_t +mbuf_set_vlan_tag( + mbuf_t mbuf, + u_int16_t vlan) +{ + mbuf->m_pkthdr.csum_flags |= CSUM_VLAN_TAG_VALID; + mbuf->m_pkthdr.vlan_tag = vlan; + + return 0; +} + +errno_t +mbuf_get_vlan_tag( + mbuf_t mbuf, + u_int16_t *vlan) +{ + if ((mbuf->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) + return ENXIO; // No vlan tag set + + *vlan = mbuf->m_pkthdr.vlan_tag; + + return 0; +} + +errno_t +mbuf_clear_vlan_tag( + mbuf_t mbuf) +{ + mbuf->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID; + mbuf->m_pkthdr.vlan_tag = 0; + + return 0; +} + +static const mbuf_csum_request_flags_t mbuf_valid_csum_request_flags = + MBUF_CSUM_REQ_IP | MBUF_CSUM_REQ_TCP | MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_SUM16; + +errno_t +mbuf_set_csum_requested( + mbuf_t mbuf, + mbuf_csum_request_flags_t request, + u_int32_t value) +{ + request &= mbuf_valid_csum_request_flags; + mbuf->m_pkthdr.csum_flags = (mbuf->m_pkthdr.csum_flags & 0xffff0000) | request; + mbuf->m_pkthdr.csum_data = value; + + return 0; +} + +errno_t +mbuf_get_csum_requested( + mbuf_t mbuf, + mbuf_csum_request_flags_t *request, + u_int32_t *value) +{ + *request = mbuf->m_pkthdr.csum_flags; + *request &= mbuf_valid_csum_request_flags; + if (value != NULL) { + *value = mbuf->m_pkthdr.csum_data; + } + + return 0; +} + +errno_t +mbuf_clear_csum_requested( + mbuf_t mbuf) +{ + mbuf->m_pkthdr.csum_flags &= 0xffff0000; + mbuf->m_pkthdr.csum_data = 0; + + return 0; +} + +static const mbuf_csum_performed_flags_t mbuf_valid_csum_performed_flags = + MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD | MBUF_CSUM_DID_DATA | + MBUF_CSUM_PSEUDO_HDR | MBUF_CSUM_TCP_SUM16; + +errno_t +mbuf_set_csum_performed( + mbuf_t mbuf, + mbuf_csum_performed_flags_t performed, + u_int32_t value) +{ + performed &= mbuf_valid_csum_performed_flags; + mbuf->m_pkthdr.csum_flags = (mbuf->m_pkthdr.csum_flags & 0xffff0000) | performed; + mbuf->m_pkthdr.csum_data = value; + + return 0; +} + +errno_t +mbuf_get_csum_performed( + mbuf_t mbuf, + mbuf_csum_performed_flags_t *performed, + u_int32_t *value) +{ + *performed = mbuf->m_pkthdr.csum_flags & mbuf_valid_csum_performed_flags; + *value = mbuf->m_pkthdr.csum_data; + + return 0; +} + +errno_t +mbuf_clear_csum_performed( + mbuf_t mbuf) +{ + mbuf->m_pkthdr.csum_flags &= 0xffff0000; + mbuf->m_pkthdr.csum_data = 0; + + return 0; +} + +/* + * Mbuf tag KPIs + */ + +struct mbuf_tag_id_entry { + SLIST_ENTRY(mbuf_tag_id_entry) next; + mbuf_tag_id_t id; + char string[]; +}; + +#define MBUF_TAG_ID_ENTRY_SIZE(__str) \ + ((size_t)&(((struct mbuf_tag_id_entry*)0)->string[0]) + \ + strlen(__str) + 1) + +#define MTAG_FIRST_ID 1000 +static u_long mtag_id_next = MTAG_FIRST_ID; +static SLIST_HEAD(,mbuf_tag_id_entry) mtag_id_list = {NULL}; +static lck_mtx_t *mtag_id_lock = NULL; + +__private_extern__ void +mbuf_tag_id_first_last( + u_long *first, + u_long *last) +{ + *first = MTAG_FIRST_ID; + *last = mtag_id_next - 1; +} + +__private_extern__ errno_t +mbuf_tag_id_find_internal( + const char *string, + u_long *out_id, + int create) +{ + struct mbuf_tag_id_entry *entry = NULL; + + + *out_id = 0; + + if (string == NULL || out_id == NULL) { + return EINVAL; + } + + /* Don't bother allocating the lock if we're only doing a lookup */ + if (create == 0 && mtag_id_lock == NULL) + return ENOENT; + + /* Allocate lock if necessary */ + if (mtag_id_lock == NULL) { + lck_grp_attr_t *grp_attrib = NULL; + lck_attr_t *lck_attrb = NULL; + lck_grp_t *lck_group = NULL; + lck_mtx_t *new_lock = NULL; + + grp_attrib = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attrib); + lck_group = lck_grp_alloc_init("mbuf_tag_allocate_id", grp_attrib); + lck_grp_attr_free(grp_attrib); + lck_attrb = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attrb); + lck_attr_setdebug(lck_attrb); + new_lock = lck_mtx_alloc_init(lck_group, lck_attrb); + if (!OSCompareAndSwap((UInt32)0, (UInt32)new_lock, (UInt32*)&mtag_id_lock)) { + /* + * If the atomic swap fails, someone else has already + * done this work. We can free the stuff we allocated. + */ + lck_mtx_free(new_lock, lck_group); + lck_grp_free(lck_group); + } + lck_attr_free(lck_attrb); + } + + /* Look for an existing entry */ + lck_mtx_lock(mtag_id_lock); + SLIST_FOREACH(entry, &mtag_id_list, next) { + if (strcmp(string, entry->string) == 0) { + break; + } + } + + if (entry == NULL) { + if (create == 0) { + lck_mtx_unlock(mtag_id_lock); + return ENOENT; + } + + entry = kalloc(MBUF_TAG_ID_ENTRY_SIZE(string)); + if (entry == NULL) { + lck_mtx_unlock(mtag_id_lock); + return ENOMEM; + } + + strcpy(entry->string, string); + entry->id = mtag_id_next; + mtag_id_next++; + SLIST_INSERT_HEAD(&mtag_id_list, entry, next); + } + lck_mtx_unlock(mtag_id_lock); + + *out_id = entry->id; + + return 0; +} + +errno_t +mbuf_tag_id_find( + const char *string, + mbuf_tag_id_t *out_id) +{ + return mbuf_tag_id_find_internal(string, (u_long*)out_id, 1); +} + +errno_t +mbuf_tag_allocate( + mbuf_t mbuf, + mbuf_tag_id_t id, + mbuf_tag_type_t type, + size_t length, + mbuf_how_t how, + void** data_p) +{ + struct m_tag *tag; + + if (data_p != NULL) + *data_p = NULL; + + /* Sanity check parameters */ + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || + id >= mtag_id_next || length < 1 || (length & 0xffff0000) != 0 || + data_p == NULL) { + return EINVAL; + } + + /* Make sure this mtag hasn't already been allocated */ + tag = m_tag_locate(mbuf, id, type, NULL); + if (tag != NULL) { + return EEXIST; + } + + /* Allocate an mtag */ + tag = m_tag_alloc(id, type, length, how); + if (tag == NULL) { + return how == M_WAITOK ? ENOMEM : EWOULDBLOCK; + } + + /* Attach the mtag and set *data_p */ + m_tag_prepend(mbuf, tag); + *data_p = tag + 1; + + return 0; +} + +errno_t +mbuf_tag_find( + mbuf_t mbuf, + mbuf_tag_id_t id, + mbuf_tag_type_t type, + size_t* length, + void** data_p) +{ + struct m_tag *tag; + + if (length != NULL) + *length = 0; + if (data_p != NULL) + *data_p = NULL; + + /* Sanity check parameters */ + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || + id >= mtag_id_next || length == NULL || data_p == NULL) { + return EINVAL; + } + + /* Locate an mtag */ + tag = m_tag_locate(mbuf, id, type, NULL); + if (tag == NULL) { + return ENOENT; + } + + /* Copy out the pointer to the data and the lenght value */ + *length = tag->m_tag_len; + *data_p = tag + 1; + + return 0; +} + +void +mbuf_tag_free( + mbuf_t mbuf, + mbuf_tag_id_t id, + mbuf_tag_type_t type) +{ + struct m_tag *tag; + + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || + id >= mtag_id_next) + return; + + tag = m_tag_locate(mbuf, id, type, NULL); + if (tag == NULL) { + return; + } + + m_tag_delete(mbuf, tag); + return; +} + +/* mbuf stats */ +void mbuf_stats(struct mbuf_stat *stats) +{ + stats->mbufs = mbstat.m_mbufs; + stats->clusters = mbstat.m_clusters; + stats->clfree = mbstat.m_clfree; + stats->drops = mbstat.m_drops; + stats->wait = mbstat.m_wait; + stats->drain = mbstat.m_drain; + __builtin_memcpy(stats->mtypes, mbstat.m_mtypes, sizeof(stats->mtypes)); + stats->mcfail = mbstat.m_mcfail; + stats->mpfail = mbstat.m_mpfail; + stats->msize = mbstat.m_msize; + stats->mclbytes = mbstat.m_mclbytes; + stats->minclsize = mbstat.m_minclsize; + stats->mlen = mbstat.m_mlen; + stats->mhlen = mbstat.m_mhlen; + stats->bigclusters = mbstat.m_bigclusters; + stats->bigclfree = mbstat.m_bigclfree; + stats->bigmclbytes = mbstat.m_bigmclbytes; +} + +errno_t +mbuf_allocpacket(mbuf_how_t how, size_t packetlen, unsigned int *maxchunks, mbuf_t *mbuf) +{ + errno_t error; + struct mbuf *m; + unsigned int numpkts = 1; + unsigned int numchunks = maxchunks ? *maxchunks : 0; + + if (packetlen == 0) { + error = EINVAL; + goto out; + } + m = m_allocpacket_internal(&numpkts, packetlen, maxchunks ? &numchunks : NULL, how, 1, 0); + if (m == 0) { + if (maxchunks && *maxchunks && numchunks > *maxchunks) + error = ENOBUFS; + else + error = ENOMEM; + } else { + error = 0; + *mbuf = m; + } +out: + return error; +} + + +/* + * mbuf_copyback differs from m_copyback in a few ways: + * 1) mbuf_copyback will allocate clusters for new mbufs we append + * 2) mbuf_copyback will grow the last mbuf in the chain if possible + * 3) mbuf_copyback reports whether or not the operation succeeded + * 4) mbuf_copyback allows the caller to specify M_WAITOK or M_NOWAIT + */ +errno_t +mbuf_copyback( + mbuf_t m, + size_t off, + size_t len, + const void *data, + mbuf_how_t how) +{ + size_t mlen; + mbuf_t m_start = m; + mbuf_t n; + int totlen = 0; + errno_t result = 0; + const char *cp = data; + + if (m == NULL || len == 0 || data == NULL) + return EINVAL; + + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == 0) { + n = m_getclr(how, m->m_type); + if (n == 0) { + result = ENOBUFS; + goto out; + } + n->m_len = MIN(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + + while (len > 0) { + mlen = MIN(m->m_len - off, len); + if (mlen < len && m->m_next == NULL && mbuf_trailingspace(m) > 0) { + size_t grow = MIN(mbuf_trailingspace(m), len - mlen); + mlen += grow; + m->m_len += grow; + } + bcopy(cp, off + (char*)mbuf_data(m), (unsigned)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) + break; + if (m->m_next == 0) { + n = m_get(how, m->m_type); + if (n == NULL) { + result = ENOBUFS; + goto out; + } + if (len > MINCLSIZE) { + /* cluter allocation failure is okay, we can grow chain */ + mbuf_mclget(how, m->m_type, &n); + } + n->m_len = MIN(mbuf_maxlen(n), len); + m->m_next = n; + } + m = m->m_next; + } + +out: + if ((m_start->m_flags & M_PKTHDR) && (m_start->m_pkthdr.len < totlen)) + m_start->m_pkthdr.len = totlen; + + return result; +} diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c new file mode 100644 index 000000000..c2d295c27 --- /dev/null +++ b/bsd/kern/kpi_socket.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define __KPI__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void *memcpy(void *, const void *, size_t); +extern int soclose_locked(struct socket *so); + +errno_t sock_send_internal( + socket_t sock, + const struct msghdr *msg, + mbuf_t data, + int flags, + size_t *sentlen); + + + +errno_t +sock_accept( + socket_t sock, + struct sockaddr *from, + int fromlen, + int flags, + sock_upcall callback, + void* cookie, + socket_t *new_sock) +{ + struct sockaddr *sa; + struct socket *new_so; + lck_mtx_t *mutex_held; + int dosocklock; + errno_t error = 0; + + if (sock == NULL || new_sock == NULL) return EINVAL; + socket_lock(sock, 1); + if ((sock->so_options & SO_ACCEPTCONN) == 0) { + socket_unlock(sock, 1); + return EINVAL; + } + if ((flags & ~(MSG_DONTWAIT)) != 0) { + socket_unlock(sock, 1); + return ENOTSUP; + } + if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) && + sock->so_comp.tqh_first == NULL) { + socket_unlock(sock, 1); + return EWOULDBLOCK; + } + + if (sock->so_proto->pr_getlock != NULL) { + mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); + dosocklock = 1; + } + else { + mutex_held = sock->so_proto->pr_domain->dom_mtx; + dosocklock = 0; + } + + while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) { + if (sock->so_state & SS_CANTRCVMORE) { + sock->so_error = ECONNABORTED; + break; + } + error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH, "sock_accept", 0); + if (error) { + socket_unlock(sock, 1); + return (error); + } + } + if (sock->so_error) { + error = sock->so_error; + sock->so_error = 0; + socket_unlock(sock, 1); + return (error); + } + + new_so = TAILQ_FIRST(&sock->so_comp); + TAILQ_REMOVE(&sock->so_comp, new_so, so_list); + sock->so_qlen--; + socket_unlock(sock, 1); /* release the head */ + + if (dosocklock) { + lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0), + LCK_MTX_ASSERT_NOTOWNED); + socket_lock(new_so, 1); + } + + new_so->so_state &= ~SS_COMP; + new_so->so_head = NULL; + soacceptlock(new_so, &sa, 0); + + if (callback) { + new_so->so_upcall = callback; + new_so->so_upcallarg = cookie; + new_so->so_rcv.sb_flags |= SB_UPCALL; + } + + if (sa && from) + { + if (fromlen > sa->sa_len) fromlen = sa->sa_len; + memcpy(from, sa, fromlen); + } + if (sa) FREE(sa, M_SONAME); + *new_sock = new_so; + if (dosocklock) + socket_unlock(new_so, 1); + return error; +} + +errno_t +sock_bind( + socket_t sock, + const struct sockaddr *to) +{ + if (sock == NULL || to == NULL) return EINVAL; + + return sobind(sock, (struct sockaddr*)to); +} + +errno_t +sock_connect( + socket_t sock, + const struct sockaddr *to, + int flags) +{ + int error = 0; + lck_mtx_t *mutex_held; + + if (sock == NULL || to == NULL) return EINVAL; + + socket_lock(sock, 1); + + if ((sock->so_state & SS_ISCONNECTING) && + ((sock->so_state & SS_NBIO) != 0 || + (flags & MSG_DONTWAIT) != 0)) { + socket_unlock(sock, 1); + return EALREADY; + } + error = soconnectlock(sock, (struct sockaddr*)to, 0); + if (!error) { + if ((sock->so_state & SS_ISCONNECTING) && + ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { + socket_unlock(sock, 1); + return EINPROGRESS; + } + + if (sock->so_proto->pr_getlock != NULL) + mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); + else + mutex_held = sock->so_proto->pr_domain->dom_mtx; + + while ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) { + error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH, + "sock_connect", 0); + if (error) + break; + } + + if (error == 0) { + error = sock->so_error; + sock->so_error = 0; + } + } + else { + sock->so_state &= ~SS_ISCONNECTING; + } + socket_unlock(sock, 1); + return error; +} + +errno_t +sock_connectwait( + socket_t sock, + const struct timeval *tv) +{ + lck_mtx_t * mutex_held; + errno_t retval = 0; + struct timespec ts; + + socket_lock(sock, 1); + + // Check if we're already connected or if we've already errored out + if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error) { + if (sock->so_error) { + retval = sock->so_error; + sock->so_error = 0; + } + else { + if ((sock->so_state & SS_ISCONNECTED) != 0) + retval = 0; + else + retval = EINVAL; + } + goto done; + } + + // copied translation from timeval to hertz from SO_RCVTIMEO handling + if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz || + tv->tv_usec < 0 || tv->tv_usec >= 1000000) { + retval = EDOM; + goto done; + } + + ts.tv_sec = tv->tv_sec; + ts.tv_nsec = (tv->tv_usec * NSEC_PER_USEC); + if ( (ts.tv_sec + (ts.tv_nsec/NSEC_PER_SEC))/100 > SHRT_MAX) { + retval = EDOM; + goto done; + } + + if (sock->so_proto->pr_getlock != NULL) + mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); + else + mutex_held = sock->so_proto->pr_domain->dom_mtx; + + msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK, "sock_connectwait", &ts); + + // Check if we're still waiting to connect + if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) { + retval = EINPROGRESS; + goto done; + } + + if (sock->so_error) { + retval = sock->so_error; + sock->so_error = 0; + } + +done: + socket_unlock(sock, 1); + return retval; +} + +errno_t +sock_nointerrupt( + socket_t sock, + int on) +{ + socket_lock(sock, 1); + + if (on) { + sock->so_rcv.sb_flags |= SB_NOINTR; // This isn't safe + sock->so_snd.sb_flags |= SB_NOINTR; // This isn't safe + } + else { + sock->so_rcv.sb_flags &= ~SB_NOINTR; // This isn't safe + sock->so_snd.sb_flags &= ~SB_NOINTR; // This isn't safe + } + + socket_unlock(sock, 1); + + return 0; +} + +errno_t +sock_getpeername( + socket_t sock, + struct sockaddr *peername, + int peernamelen) +{ + int error = 0; + struct sockaddr *sa = NULL; + + if (sock == NULL || peername == NULL || peernamelen < 0) return EINVAL; + socket_lock(sock, 1); + if ((sock->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { + socket_unlock(sock, 1); + return ENOTCONN; + } + error = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa); + if (!error) + { + if (peernamelen > sa->sa_len) peernamelen = sa->sa_len; + memcpy(peername, sa, peernamelen); + } + if (sa) FREE(sa, M_SONAME); + socket_unlock(sock, 1); + return error; +} + +errno_t +sock_getsockname( + socket_t sock, + struct sockaddr *sockname, + int socknamelen) +{ + int error = 0; + struct sockaddr *sa = NULL; + + if (sock == NULL || sockname == NULL || socknamelen < 0) return EINVAL; + socket_lock(sock, 1); + error = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa); + if (!error) + { + if (socknamelen > sa->sa_len) socknamelen = sa->sa_len; + memcpy(sockname, sa, socknamelen); + } + if (sa) FREE(sa, M_SONAME); + socket_unlock(sock, 1); + return error; +} + +errno_t +sock_getsockopt( + socket_t sock, + int level, + int optname, + void *optval, + int *optlen) +{ + int error = 0; + struct sockopt sopt; + + if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL; + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = level; + sopt.sopt_name = optname; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = *optlen; + sopt.sopt_p = NULL; + error = sogetopt(sock, &sopt); /* will lock socket */ + if (error == 0) *optlen = sopt.sopt_valsize; + return error; +} + +errno_t +sock_ioctl( + socket_t sock, + unsigned long request, + void *argp) +{ + return soioctl(sock, request, argp, NULL); /* will lock socket */ +} + +errno_t +sock_setsockopt( + socket_t sock, + int level, + int optname, + const void *optval, + int optlen) +{ + struct sockopt sopt; + + if (sock == NULL || optval == NULL) return EINVAL; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = level; + sopt.sopt_name = optname; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = optlen; + sopt.sopt_p = NULL; + return sosetopt(sock, &sopt); /* will lock socket */ +} + +errno_t +sock_listen( + socket_t sock, + int backlog) +{ + if (sock == NULL) return EINVAL; + return solisten(sock, backlog); /* will lock socket */ +} + +static errno_t +sock_receive_internal( + socket_t sock, + struct msghdr *msg, + mbuf_t *data, + int flags, + size_t *recvdlen) +{ + uio_t auio; + struct mbuf *control = NULL; + int error = 0; + int length = 0; + struct sockaddr *fromsa; + char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ]; + + if (sock == NULL) return EINVAL; + + auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0), + 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (msg && data == NULL) { + int i; + struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + + for (i = 0; i < msg->msg_iovlen; i++) { + uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); + } + if (uio_resid(auio) < 0) return EINVAL; + } + else { + uio_setresid(auio, (uio_resid(auio) + *recvdlen)); + } + length = uio_resid(auio); + + if (recvdlen) + *recvdlen = 0; + + if (msg && msg->msg_control) { + if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL; + if ((size_t)msg->msg_controllen > MLEN) return EINVAL; + control = m_get(M_NOWAIT, MT_CONTROL); + if (control == NULL) return ENOMEM; + memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen); + control->m_len = msg->msg_controllen; + } + + /* let pru_soreceive handle the socket locking */ + error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio, + data, control ? &control : NULL, &flags); + if (error) goto cleanup; + + if (recvdlen) + *recvdlen = length - uio_resid(auio); + if (msg) { + msg->msg_flags = flags; + + if (msg->msg_name) + { + int salen; + salen = msg->msg_namelen; + if (msg->msg_namelen > 0 && fromsa != 0) + { + salen = MIN(salen, fromsa->sa_len); + memcpy(msg->msg_name, fromsa, + msg->msg_namelen > fromsa->sa_len ? fromsa->sa_len : msg->msg_namelen); + } + } + + if (msg->msg_control) + { + struct mbuf* m = control; + u_char* ctlbuf = msg->msg_control; + int clen = msg->msg_controllen; + msg->msg_controllen = 0; + + while (m && clen > 0) + { + unsigned int tocopy; + if (clen >= m->m_len) + { + tocopy = m->m_len; + } + else + { + msg->msg_flags |= MSG_CTRUNC; + tocopy = clen; + } + memcpy(ctlbuf, mtod(m, caddr_t), tocopy); + ctlbuf += tocopy; + clen -= tocopy; + m = m->m_next; + } + msg->msg_controllen = (u_int32_t)ctlbuf - (u_int32_t)msg->msg_control; + } + } + +cleanup: + if (control) m_freem(control); + if (fromsa) FREE(fromsa, M_SONAME); + return error; +} + +errno_t +sock_receive( + socket_t sock, + struct msghdr *msg, + int flags, + size_t *recvdlen) +{ + if ((msg == NULL) || + (msg->msg_iovlen < 1) || + (msg->msg_iov[0].iov_len == 0) || + (msg->msg_iov[0].iov_base == NULL)) + return EINVAL; + return sock_receive_internal(sock, msg, NULL, flags, recvdlen); +} + +errno_t +sock_receivembuf( + socket_t sock, + struct msghdr *msg, + mbuf_t *data, + int flags, + size_t *recvlen) +{ + if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg && + (msg->msg_iov != NULL || msg->msg_iovlen != 0))) + return EINVAL; + return sock_receive_internal(sock, msg, data, flags, recvlen); +} + +errno_t +sock_send_internal( + socket_t sock, + const struct msghdr *msg, + mbuf_t data, + int flags, + size_t *sentlen) +{ + uio_t auio = NULL; + struct mbuf *control = NULL; + int error = 0; + int datalen = 0; + char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ]; + + if (sock == NULL) { + error = EINVAL; + goto errorout; + } + + if (data == 0 && msg != NULL) { + struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + + auio = uio_createwithbuffer(msg->msg_iovlen, 0, UIO_SYSSPACE, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + if (tempp != NULL) + { + int i; + + for (i = 0; i < msg->msg_iovlen; i++) { + uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); + } + + if (uio_resid(auio) < 0) { + error = EINVAL; + goto errorout; + } + } + } + + if (sentlen) + *sentlen = 0; + + if (auio) + datalen = uio_resid(auio); + else + datalen = data->m_pkthdr.len; + + if (msg && msg->msg_control) + { + if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL; + if ((size_t)msg->msg_controllen > MLEN) return EINVAL; + control = m_get(M_NOWAIT, MT_CONTROL); + if (control == NULL) { + error = ENOMEM; + goto errorout; + } + memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen); + control->m_len = msg->msg_controllen; + } + + error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg ? (struct sockaddr*)msg->msg_name : 0, + auio, data, control, flags); + if (error == 0 && sentlen) { + if (auio) + *sentlen = datalen - uio_resid(auio); + else + *sentlen = datalen; + } + + return error; + +/* + * In cases where we detect an error before returning, we need to + * free the mbuf chain if there is one. sosend (and pru_sosend) will + * free the mbuf chain if they encounter an error. + */ +errorout: + if (control) + m_freem(control); + if (data) + m_freem(data); + if (sentlen) + *sentlen = 0; + return error; +} + +errno_t +sock_send( + socket_t sock, + const struct msghdr *msg, + int flags, + size_t *sentlen) +{ + if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1) + return EINVAL; + return sock_send_internal(sock, msg, NULL, flags, sentlen); +} + +errno_t +sock_sendmbuf( + socket_t sock, + const struct msghdr *msg, + mbuf_t data, + int flags, + size_t *sentlen) +{ + if (data == NULL || (msg && + (msg->msg_iov != NULL || msg->msg_iovlen != 0))) { + if (data) + m_freem(data); + return EINVAL; + } + return sock_send_internal(sock, msg, data, flags, sentlen); +} + +errno_t +sock_shutdown( + socket_t sock, + int how) +{ + if (sock == NULL) return EINVAL; + return soshutdown(sock, how); +} + +typedef void (*so_upcall)(struct socket *sock, void* arg, int waitf); + +errno_t +sock_socket( + int domain, + int type, + int protocol, + sock_upcall callback, + void* context, + socket_t *new_so) +{ + int error = 0; + if (new_so == NULL) return EINVAL; + /* socreate will create an initial so_count */ + error = socreate(domain, new_so, type, protocol); + if (error == 0 && callback) + { + (*new_so)->so_rcv.sb_flags |= SB_UPCALL; + (*new_so)->so_upcall = (so_upcall)callback; + (*new_so)->so_upcallarg = context; + } + return error; +} + +void +sock_close( + socket_t sock) +{ + if (sock == NULL) return; + soclose(sock); +} + +/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04)*/ +void +sock_retain( + socket_t sock) +{ + if (sock == NULL) return; + socket_lock(sock, 1); + sock->so_retaincnt++; + sock->so_usecount++; /* add extra reference for holding the socket */ + socket_unlock(sock, 1); +} + +/* Do we want this to be APPLE_PRIVATE API? */ +void +sock_release( + socket_t sock) +{ + if (sock == NULL) return; + socket_lock(sock, 1); + sock->so_retaincnt--; + if (sock->so_retaincnt < 0) + panic("sock_release: negative retain count for sock=%x cnt=%x\n", + sock, sock->so_retaincnt); + if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2)) + soclose_locked(sock); /* close socket only if the FD is not holding it */ + else + sock->so_usecount--; /* remove extra reference holding the socket */ + socket_unlock(sock, 1); +} + +errno_t +sock_setpriv( + socket_t sock, + int on) +{ + if (sock == NULL) return EINVAL; + socket_lock(sock, 1); + if (on) + { + sock->so_state |= SS_PRIV; + } + else + { + sock->so_state &= ~SS_PRIV; + } + socket_unlock(sock, 1); + return 0; +} + +int +sock_isconnected( + socket_t sock) +{ + int retval; + socket_lock(sock, 1); + retval = (sock->so_state & SS_ISCONNECTED) != 0; + socket_unlock(sock, 1); + return (retval); +} + +int +sock_isnonblocking( + socket_t sock) +{ + int retval; + socket_lock(sock, 1); + retval = (sock->so_state & SS_NBIO) != 0; + socket_unlock(sock, 1); + return (retval); +} + +errno_t +sock_gettype( + socket_t sock, + int *outDomain, + int *outType, + int *outProtocol) +{ + socket_lock(sock, 1); + if (outDomain) + *outDomain = sock->so_proto->pr_domain->dom_family; + if (outType) + *outType = sock->so_type; + if (outProtocol) + *outProtocol = sock->so_proto->pr_protocol; + socket_unlock(sock, 1); + return 0; +} diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c new file mode 100644 index 000000000..729f5fac1 --- /dev/null +++ b/bsd/kern/kpi_socketfilter.c @@ -0,0 +1,595 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct socket_filter_list sock_filter_head; +static lck_mtx_t *sock_filter_lock = 0; + +__private_extern__ void +sflt_init(void) +{ + lck_grp_attr_t *grp_attrib = 0; + lck_attr_t *lck_attrib = 0; + lck_grp_t *lck_group = 0; + + TAILQ_INIT(&sock_filter_head); + + /* Allocate a spin lock */ + grp_attrib = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attrib); + lck_group = lck_grp_alloc_init("socket filter lock", grp_attrib); + lck_grp_attr_free(grp_attrib); + lck_attrib = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attrib); + lck_attr_setdebug(lck_attrib); + sock_filter_lock = lck_mtx_alloc_init(lck_group, lck_attrib); + lck_grp_free(lck_group); + lck_attr_free(lck_attrib); +} + +__private_extern__ void +sflt_initsock( + struct socket *so) +{ + struct protosw *proto = so->so_proto; + struct socket_filter *filter; + + if (TAILQ_FIRST(&proto->pr_filter_head) != NULL) { + lck_mtx_lock(sock_filter_lock); + TAILQ_FOREACH(filter, &proto->pr_filter_head, sf_protosw_next) { + sflt_attach_private(so, filter, 0, 0); + } + lck_mtx_unlock(sock_filter_lock); + } +} + +__private_extern__ void +sflt_termsock( + struct socket *so) +{ + struct socket_filter_entry *filter; + struct socket_filter_entry *filter_next; + + for (filter = so->so_filt; filter; filter = filter_next) { + filter_next = filter->sfe_next_onsocket; + sflt_detach_private(filter, 0); + } +} + +__private_extern__ void +sflt_use( + struct socket *so) +{ + so->so_filteruse++; +} + +__private_extern__ void +sflt_unuse( + struct socket *so) +{ + so->so_filteruse--; + if (so->so_filteruse == 0) { + struct socket_filter_entry *filter; + struct socket_filter_entry *next_filter; + // search for detaching filters + for (filter = so->so_filt; filter; filter = next_filter) { + next_filter = filter->sfe_next_onsocket; + + if (filter->sfe_flags & SFEF_DETACHING) { + sflt_detach_private(filter, 0); + } + } + } +} + +__private_extern__ void +sflt_notify( + struct socket *so, + sflt_event_t event, + void *param) +{ + struct socket_filter_entry *filter; + int filtered = 0; + + for (filter = so->so_filt; filter; + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_notify) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + filter->sfe_filter->sf_filter.sf_notify( + filter->sfe_cookie, so, event, param); + } + } + + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + } +} + +__private_extern__ int +sflt_data_in( + struct socket *so, + const struct sockaddr *from, + mbuf_t *data, + mbuf_t *control, + sflt_data_flag_t flags) +{ + struct socket_filter_entry *filter; + int filtered = 0; + int error = 0; + + for (filter = so->so_filt; filter; + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_data_in) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_data_in( + filter->sfe_cookie, so, from, data, control, flags); + } + } + + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + } + + return error; +} + +/* sflt_attach_private + * + * Assumptions: If filter is not NULL, socket_filter_lock is held. + */ + +__private_extern__ int +sflt_attach_private( + struct socket *so, + struct socket_filter *filter, + sflt_handle handle, + int sock_locked) +{ + struct socket_filter_entry *entry = NULL; + int didlock = 0; + int error = 0; + + if (filter == NULL) { + /* Find the filter by the handle */ + lck_mtx_lock(sock_filter_lock); + didlock = 1; + + TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) { + if (filter->sf_filter.sf_handle == handle) + break; + } + } + + if (filter == NULL) + error = ENOENT; + + if (error == 0) { + /* allocate the socket filter entry */ + MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR, M_WAITOK); + if (entry == NULL) { + error = ENOMEM; + } + } + + if (error == 0) { + /* Initialize the socket filter entry and call the attach function */ + entry->sfe_filter = filter; + entry->sfe_socket = so; + entry->sfe_cookie = NULL; + if (entry->sfe_filter->sf_filter.sf_attach) { + filter->sf_usecount++; + + if (sock_locked) + socket_unlock(so, 0); + error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so); + if (sock_locked) + socket_lock(so, 0); + + filter->sf_usecount--; + + /* If the attach function returns an error, this filter is not attached */ + if (error) { + FREE(entry, M_IFADDR); + entry = NULL; + } + } + } + + if (error == 0) { + /* Put the entry in the socket list */ + entry->sfe_next_onsocket = so->so_filt; + so->so_filt = entry; + + /* Put the entry in the filter list */ + entry->sfe_next_onfilter = filter->sf_entry_head; + filter->sf_entry_head = entry; + + /* Increment the socket's usecount */ + so->so_usecount++; + + /* Incremenet the parent filter's usecount */ + filter->sf_usecount++; + } + + if (didlock) { + lck_mtx_unlock(sock_filter_lock); + } + + return error; +} + + +/* sflt_detach_private + * + * Assumptions: if you pass 0 in for the second parameter, you are holding the + * socket lock for the socket the entry is attached to. If you pass 1 in for + * the second parameter, it is assumed that the entry is not on the filter's + * list and the socket lock is not held. + */ + +__private_extern__ void +sflt_detach_private( + struct socket_filter_entry *entry, + int filter_detached) +{ + struct socket *so = entry->sfe_socket; + struct socket_filter_entry **next_ptr; + int detached = 0; + int found = 0; + + if (filter_detached) { + socket_lock(entry->sfe_socket, 0); + } + + /* + * Attempt to find the entry on the filter's list and + * remove it. This prevents a filter detaching at the + * same time from attempting to remove the same entry. + */ + lck_mtx_lock(sock_filter_lock); + if (!filter_detached) { + for (next_ptr = &entry->sfe_filter->sf_entry_head; *next_ptr; + next_ptr = &((*next_ptr)->sfe_next_onfilter)) { + if (*next_ptr == entry) { + found = 1; + *next_ptr = entry->sfe_next_onfilter; + break; + } + } + } + + if (!filter_detached && !found && (entry->sfe_flags & SFEF_DETACHING) == 0) { + lck_mtx_unlock(sock_filter_lock); + return; + } + + if (entry->sfe_socket->so_filteruse != 0) { + lck_mtx_unlock(sock_filter_lock); + entry->sfe_flags |= SFEF_DETACHING; + return; + } + + /* + * Check if we are removing the last attached filter and + * the parent filter is being unregistered. + */ + if (entry->sfe_socket->so_filteruse == 0) { + entry->sfe_filter->sf_usecount--; + if ((entry->sfe_filter->sf_usecount == 0) && + (entry->sfe_filter->sf_flags & SFF_DETACHING) != 0) + detached = 1; + } + lck_mtx_unlock(sock_filter_lock); + + /* Remove from the socket list */ + for (next_ptr = &entry->sfe_socket->so_filt; *next_ptr; + next_ptr = &((*next_ptr)->sfe_next_onsocket)) { + if (*next_ptr == entry) { + *next_ptr = entry->sfe_next_onsocket; + break; + } + } + + if (entry->sfe_filter->sf_filter.sf_detach) + entry->sfe_filter->sf_filter.sf_detach(entry->sfe_cookie, entry->sfe_socket); + + if (detached && entry->sfe_filter->sf_filter.sf_unregistered) { + entry->sfe_filter->sf_filter.sf_unregistered(entry->sfe_filter->sf_filter.sf_handle); + FREE(entry->sfe_filter, M_IFADDR); + } + + if (filter_detached) { + socket_unlock(entry->sfe_socket, 1); + } + else { + // We need some better way to decrement the usecount + so->so_usecount--; + } + FREE(entry, M_IFADDR); +} + +errno_t +sflt_attach( + socket_t socket, + sflt_handle handle) +{ + if (socket == NULL || handle == 0) + return EINVAL; + + return sflt_attach_private(socket, NULL, handle, 0); +} + +errno_t +sflt_detach( + socket_t socket, + sflt_handle handle) +{ + struct socket_filter_entry *filter; + errno_t result = 0; + + if (socket == NULL || handle == 0) + return EINVAL; + + socket_lock(socket, 1); + + for (filter = socket->so_filt; filter; + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_handle == handle) + break; + } + + if (filter != NULL) { + sflt_detach_private(filter, 0); + } + else { + result = ENOENT; + } + + socket_unlock(socket, 1); + + return result; +} + + +errno_t +sflt_register( + const struct sflt_filter *filter, + int domain, + int type, + int protocol) +{ + struct socket_filter *sock_filt = NULL; + struct socket_filter *match = NULL; + int error = 0; + struct protosw *pr = pffindproto(domain, protocol, type); + + if (pr == NULL) return ENOENT; + + if (filter->sf_attach == NULL || filter->sf_detach == NULL) return EINVAL; + if (filter->sf_handle == 0) return EINVAL; + if (filter->sf_name == NULL) return EINVAL; + + /* Allocate the socket filter */ + MALLOC(sock_filt, struct socket_filter*, sizeof(*sock_filt), M_IFADDR, M_WAITOK); + if (sock_filt == NULL) { + return ENOBUFS; + } + + bzero(sock_filt, sizeof(*sock_filt)); + sock_filt->sf_filter = *filter; + + lck_mtx_lock(sock_filter_lock); + /* Look for an existing entry */ + TAILQ_FOREACH(match, &sock_filter_head, sf_global_next) { + if (match->sf_filter.sf_handle == sock_filt->sf_filter.sf_handle) { + break; + } + } + + /* Add the entry only if there was no existing entry */ + if (match == NULL) { + TAILQ_INSERT_TAIL(&sock_filter_head, sock_filt, sf_global_next); + if ((sock_filt->sf_filter.sf_flags & SFLT_GLOBAL) != 0) { + TAILQ_INSERT_TAIL(&pr->pr_filter_head, sock_filt, sf_protosw_next); + sock_filt->sf_proto = pr; + } + } + lck_mtx_unlock(sock_filter_lock); + + if (match != NULL) { + FREE(sock_filt, M_IFADDR); + return EEXIST; + } + + return error; +} + +errno_t +sflt_unregister( + sflt_handle handle) +{ + struct socket_filter *filter; + struct socket_filter_entry *entry_head = NULL; + + /* Find the entry and remove it from the global and protosw lists */ + lck_mtx_lock(sock_filter_lock); + TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) { + if (filter->sf_filter.sf_handle == handle) + break; + } + + if (filter) { + TAILQ_REMOVE(&sock_filter_head, filter, sf_global_next); + if ((filter->sf_filter.sf_flags & SFLT_GLOBAL) != 0) { + TAILQ_REMOVE(&filter->sf_proto->pr_filter_head, filter, sf_protosw_next); + } + entry_head = filter->sf_entry_head; + filter->sf_entry_head = NULL; + filter->sf_flags |= SFF_DETACHING; + } + + lck_mtx_unlock(sock_filter_lock); + + if (filter == NULL) + return ENOENT; + + /* We need to detach the filter from any sockets it's attached to */ + if (entry_head == 0) { + if (filter->sf_filter.sf_unregistered) + filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle); + } else { + while (entry_head) { + struct socket_filter_entry *next_entry; + next_entry = entry_head->sfe_next_onfilter; + sflt_detach_private(entry_head, 1); + entry_head = next_entry; + } + } + + return 0; +} + +errno_t +sock_inject_data_in( + socket_t so, + const struct sockaddr* from, + mbuf_t data, + mbuf_t control, + sflt_data_flag_t flags) +{ + int error = 0; + if (so == NULL || data == NULL) return EINVAL; + + if (flags & sock_data_filt_flag_oob) { + return ENOTSUP; + } + + socket_lock(so, 1); + + if (from) { + if (sbappendaddr(&so->so_rcv, (struct sockaddr*)from, data, + control, NULL)) + sorwakeup(so); + goto done; + } + + if (control) { + if (sbappendcontrol(&so->so_rcv, data, control, NULL)) + sorwakeup(so); + goto done; + } + + if (flags & sock_data_filt_flag_record) { + if (control || from) { + error = EINVAL; + goto done; + } + if (sbappendrecord(&so->so_rcv, (struct mbuf*)data)) + sorwakeup(so); + goto done; + } + + if (sbappend(&so->so_rcv, data)) + sorwakeup(so); +done: + socket_unlock(so, 1); + return error; +} + +errno_t +sock_inject_data_out( + socket_t so, + const struct sockaddr* to, + mbuf_t data, + mbuf_t control, + sflt_data_flag_t flags) +{ + int sosendflags = 0; + if (flags & sock_data_filt_flag_oob) sosendflags = MSG_OOB; + return sosend(so, (const struct sockaddr*)to, NULL, + data, control, sosendflags); +} + +sockopt_dir +sockopt_direction( + sockopt_t sopt) +{ + return (sopt->sopt_dir == SOPT_GET) ? sockopt_get : sockopt_set; +} + +int +sockopt_level( + sockopt_t sopt) +{ + return sopt->sopt_level; +} + +int +sockopt_name( + sockopt_t sopt) +{ + return sopt->sopt_name; +} + +size_t +sockopt_valsize( + sockopt_t sopt) +{ + return sopt->sopt_valsize; +} + +errno_t +sockopt_copyin( + sockopt_t sopt, + void *data, + size_t len) +{ + return sooptcopyin(sopt, data, len, len); +} + +errno_t +sockopt_copyout( + sockopt_t sopt, + void *data, + size_t len) +{ + return sooptcopyout(sopt, data, len); +} diff --git a/bsd/kern/mach_fat.c b/bsd/kern/mach_fat.c index 4d3c8e07b..408d2ecb2 100644 --- a/bsd/kern/mach_fat.c +++ b/bsd/kern/mach_fat.c @@ -40,7 +40,10 @@ #include #include -#define CPU_TYPE_NATIVE (machine_slot[cpu_number()].cpu_type) +/* XXX should be in common header */ +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); + +#define CPU_TYPE_NATIVE (cpu_type()) #define CPU_TYPE_CLASSIC CPU_TYPE_POWERPC /********************************************************************** @@ -51,7 +54,9 @@ * * Args: vp: The vnode for the fat file. * header: A pointer to the fat file header. - * cpu_type: The required cpu type. + * req_cpu_type: The required cpu type. + * mask_bits: Bits to mask from the sub-image type when + * grading it vs. the req_cpu_type * archret (out): Pointer to fat_arch structure to hold * the results. * @@ -60,15 +65,19 @@ **********************************************************************/ static load_return_t fatfile_getarch2( +#if 0 struct vnode *vp, +#else + __unused struct vnode *vp, +#endif vm_offset_t data_ptr, - cpu_type_t cpu_type, + cpu_type_t req_cpu_type, + cpu_type_t mask_bits, struct fat_arch *archret) { /* vm_pager_t pager; */ vm_offset_t addr; vm_size_t size; - kern_return_t kret; load_return_t lret; struct fat_arch *arch; struct fat_arch *best_arch; @@ -77,7 +86,9 @@ fatfile_getarch2( int nfat_arch; int end_of_archs; struct fat_header *header; +#if 0 off_t filesize; +#endif /* * Get the pager for the file. @@ -108,7 +119,7 @@ fatfile_getarch2( * Round size of fat_arch structures up to page boundry. */ size = round_page_32(end_of_archs); - if (size <= 0) + if (size == 0) return(LOAD_BADMACHO); /* @@ -123,13 +134,14 @@ fatfile_getarch2( /* * Check to see if right cpu type. */ - if(NXSwapBigIntToHost(arch->cputype) != cpu_type) + if(((cpu_type_t)NXSwapBigIntToHost(arch->cputype) & ~mask_bits) != req_cpu_type) continue; /* * Get the grade of the cpu subtype. */ - grade = grade_cpu_subtype( + grade = grade_binary( + NXSwapBigIntToHost(arch->cputype), NXSwapBigIntToHost(arch->cpusubtype)); /* @@ -187,10 +199,14 @@ fatfile_getarch_affinity( primary_type = CPU_TYPE_NATIVE; fallback_type = CPU_TYPE_CLASSIC; } - lret = fatfile_getarch2(vp, data_ptr, primary_type, archret); + /* + * Ignore the architectural bits when determining if an image + * in a fat file should be skipped or graded. + */ + lret = fatfile_getarch2(vp, data_ptr, primary_type, CPU_ARCH_MASK, archret); if ((lret != 0) && handler) { lret = fatfile_getarch2(vp, data_ptr, fallback_type, - archret); + 0, archret); } return lret; } @@ -215,6 +231,31 @@ fatfile_getarch( vm_offset_t data_ptr, struct fat_arch *archret) { - return fatfile_getarch2(vp, data_ptr, CPU_TYPE_NATIVE, archret); + return fatfile_getarch2(vp, data_ptr, CPU_TYPE_NATIVE, 0, archret); +} + +/********************************************************************** + * Routine: fatfile_getarch_with_bits() + * + * Function: Locate the architecture-dependant contents of a fat + * file that match this CPU. + * + * Args: vp: The vnode for the fat file. + * archbits: Architecture specific feature bits + * header: A pointer to the fat file header. + * archret (out): Pointer to fat_arch structure to hold + * the results. + * + * Returns: KERN_SUCCESS: Valid architecture found. + * KERN_FAILURE: No valid architecture found. + **********************************************************************/ +load_return_t +fatfile_getarch_with_bits( + struct vnode *vp, + integer_t archbits, + vm_offset_t data_ptr, + struct fat_arch *archret) +{ + return fatfile_getarch2(vp, data_ptr, archbits | CPU_TYPE_NATIVE, 0, archret); } diff --git a/bsd/kern/mach_header.c b/bsd/kern/mach_header.c index 34ffecda5..9071eaa48 100644 --- a/bsd/kern/mach_header.c +++ b/bsd/kern/mach_header.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,6 +24,14 @@ * * Functions for accessing mach-o headers. * + * NOTE: This file supports only 32 bit mach headers at the present + * time; it's primary use is by kld, and all externally + * referenced routines at the present time operate against + * the 32 bit mach header _mh_execute_header, which is the + * header for the currently executing kernel. Adding support + * for 64 bit kernels is possible, but is not necessary at the + * present time. + * * HISTORY * 27-MAR-97 Umesh Vaishampayan (umeshv@NeXT.com) * Added getsegdatafromheader(); @@ -35,26 +43,22 @@ #if !defined(KERNEL_PRELOAD) #include +#include // from libsa extern struct mach_header _mh_execute_header; -struct section *getsectbynamefromheader( - struct mach_header *header, - char *seg_name, - char *sect_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - char *seg_name); - /* * return the last address (first avail) + * + * This routine operates against the currently executing kernel only */ -vm_offset_t getlastaddr(void) +vm_offset_t +getlastaddr(void) { struct segment_command *sgp; vm_offset_t last_addr = 0; struct mach_header *header = &_mh_execute_header; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -69,10 +73,12 @@ vm_offset_t getlastaddr(void) } #if FIXME /* [ */ +/* + * This routine operates against the currently executing kernel only + */ struct mach_header ** getmachheaders(void) { - extern struct mach_header _mh_execute_header; struct mach_header **tl; tl = (struct mach_header **)malloc(2*sizeof(struct mach_header *)); tl[0] = &_mh_execute_header; @@ -86,12 +92,14 @@ getmachheaders(void) * named segment if it exist in the mach header passed to it. Also it returns * the size of the section data indirectly through the pointer size. Otherwise * it returns zero for the pointer and the size. + * + * This routine can operate against any 32 bit mach header. */ void * getsectdatafromheader( struct mach_header *mhp, - char *segname, - char *sectname, + const char *segname, + const char *sectname, int *size) { const struct section *sp; @@ -112,11 +120,13 @@ getsectdatafromheader( * if it exist in the mach header passed to it. Also it returns * the size of the segment data indirectly through the pointer size. * Otherwise it returns zero for the pointer and the size. + * + * This routine can operate against any 32 bit mach header. */ void * getsegdatafromheader( - struct mach_header *mhp, - char *segname, + struct mach_header *mhp, + const char *segname, int *size) { const struct segment_command *sc; @@ -136,16 +146,18 @@ getsegdatafromheader( * This routine returns the section structure for the named section in the * named segment for the mach_header pointer passed to it if it exist. * Otherwise it returns zero. + * + * This routine can operate against any 32 bit mach header. */ struct section * getsectbynamefromheader( struct mach_header *mhp, - char *segname, - char *sectname) + const char *segname, + const char *sectname) { struct segment_command *sgp; struct section *sp; - long i, j; + unsigned long i, j; sgp = (struct segment_command *) ((char *)mhp + sizeof(struct mach_header)); @@ -170,12 +182,16 @@ getsectbynamefromheader( return((struct section *)0); } -struct segment_command *getsegbynamefromheader( +/* + * This routine can operate against any 32 bit mach header. + */ +struct segment_command * +getsegbynamefromheader( struct mach_header *header, - char *seg_name) + const char *seg_name) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -221,7 +237,9 @@ static struct { 4, // align 0, // reloff 0, // nreloc - 0 // flags + 0, // flags + 0, // reserved1 + 0 // reserved2 } }; @@ -232,16 +250,25 @@ static vm_offset_t getsizeofmacho(struct mach_header *header); /* * Return the first segment_command in the header. + * + * This routine operates against the currently executing kernel only */ -struct segment_command *firstseg(void) +struct segment_command * +firstseg(void) { return firstsegfromheader(&_mh_execute_header); } -struct segment_command *firstsegfromheader(struct mach_header *header) +/* + * This routine can operate against any 32 bit mach header, and returns a + * pointer to a 32 bit segment_command structure from the file prefixed by + * the header it is passed as its argument. + */ +struct segment_command * +firstsegfromheader(struct mach_header *header) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -253,7 +280,14 @@ struct segment_command *firstsegfromheader(struct mach_header *header) return (struct segment_command *)0; } -struct segment_command *nextseg(struct segment_command *sgp) +/* + * This routine operates against a 32 bit mach segment_command structure + * pointer from the currently executing kernel only, to obtain the + * sequentially next segment_command structure in the currently executing + * kernel + */ +struct segment_command * +nextseg(struct segment_command *sgp) { struct segment_command *this; @@ -269,12 +303,18 @@ struct segment_command *nextseg(struct segment_command *sgp) return this; } -struct segment_command *nextsegfromheader( +/* + * This routine operates against any 32 bit mach segment_command structure + * pointer and the provided 32 bit header, to obtain the sequentially next + * segment_command structure in that header. + */ +struct segment_command * +nextsegfromheader( struct mach_header *header, struct segment_command *seg) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -299,9 +339,11 @@ struct segment_command *nextsegfromheader( /* - * Return the address of the named Mach-O segment, or NULL. + * Return the address of the named Mach-O segment from the currently + * executing 32 bit kernel, or NULL. */ -struct segment_command *getsegbyname(char *seg_name) +struct segment_command * +getsegbyname(const char *seg_name) { struct segment_command *this; @@ -319,42 +361,60 @@ struct segment_command *getsegbyname(char *seg_name) /* * This routine returns the a pointer the section structure of the named - * section in the named segment if it exist in the mach executable it is - * linked into. Otherwise it returns zero. + * section in the named segment if it exists in the currently executing + * kernel, which it is presumed to be linked into. Otherwise it returns NULL. */ struct section * getsectbyname( - char *segname, - char *sectname) + const char *segname, + const char *sectname) { return(getsectbynamefromheader( (struct mach_header *)&_mh_execute_header, segname, sectname)); } -struct section *firstsect(struct segment_command *sgp) +/* + * This routine can operate against any 32 bit segment_command structure to + * return the first 32 bit section immediately following that structure. If + * there are no sections associated with the segment_command structure, it + * returns NULL. + */ +struct section * +firstsect(struct segment_command *sgp) { - struct section *sp; - if (!sgp || sgp->nsects == 0) return (struct section *)0; return (struct section *)(sgp+1); } -struct section *nextsect(struct segment_command *sgp, struct section *sp) +/* + * This routine can operate against any 32 bit segment_command structure and + * 32 bit section to return the next consecutive 32 bit section immediately + * following the 32 bit section provided. If there are no sections following + * the provided section, it returns NULL. + */ +struct section * +nextsect(struct segment_command *sgp, struct section *sp) { struct section *fsp = firstsect(sgp); - if (sp - fsp >= sgp->nsects-1) + if (((unsigned long)(sp - fsp) + 1) >= sgp->nsects) return (struct section *)0; return sp+1; } -static struct fvmfile_command *fvmfilefromheader(struct mach_header *header) +/* + * This routine can operate against any 32 bit mach header to return the + * first occurring 32 bit fvmfile_command section. If one is not present, + * it returns NULL. + */ +static struct fvmfile_command * +fvmfilefromheader(struct mach_header *header) { struct fvmfile_command *fvp; - int i; + unsigned long i; fvp = (struct fvmfile_command *) ((char *)header + sizeof(struct mach_header)); @@ -368,8 +428,11 @@ static struct fvmfile_command *fvmfilefromheader(struct mach_header *header) /* * Create a fake USER seg if a fvmfile_command is present. + * + * This routine operates against the currently executing kernel only */ -struct segment_command *getfakefvmseg(void) +struct segment_command * +getfakefvmseg(void) { struct segment_command *sgp = getsegbyname("__USER"); struct fvmfile_command *fvp = fvmfilefromheader(&_mh_execute_header); @@ -396,16 +459,20 @@ struct segment_command *getfakefvmseg(void) printf("fake fvm seg __USER/\"%s\" at 0x%x, size 0x%x\n", sp->sectname, sp->addr, sp->size); #endif /* DEBUG */ + + return sgp; } /* * Figure out the size the size of the data associated with a * loaded mach_header. + * + * This routine can operate against any 32 bit mach header. */ -static vm_offset_t getsizeofmacho(struct mach_header *header) +static vm_offset_t +getsizeofmacho(struct mach_header *header) { struct segment_command *sgp; - struct section *sp; vm_offset_t last_addr; last_addr = 0; diff --git a/bsd/kern/mach_header.h b/bsd/kern/mach_header.h index 1e4cbeaba..ff667a6f4 100644 --- a/bsd/kern/mach_header.h +++ b/bsd/kern/mach_header.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,6 +24,11 @@ * * Definitions for accessing mach-o headers. * + * NOTE: The functions prototyped by this header only operate againt + * 32 bit mach headers. Many of these functions imply the + * currently running kernel, and cannot be used against mach + * headers other than that of the currently running kernel. + * * HISTORY * 29-Jan-92 Mike DeMoney (mike@next.com) * Made into machine independent form from machdep/m68k/mach_header.h. @@ -46,17 +51,17 @@ struct segment_command *nextseg(struct segment_command *sgp); struct segment_command *nextsegfromheader( struct mach_header *header, struct segment_command *seg); -struct segment_command *getsegbyname(char *seg_name); +struct segment_command *getsegbyname(const char *seg_name); struct segment_command *getsegbynamefromheader( struct mach_header *header, - char *seg_name); -void *getsegdatafromheader(struct mach_header *, char *, int *); -struct section *getsectbyname(char *seg_name, char *sect_name); + const char *seg_name); +void *getsegdatafromheader(struct mach_header *, const char *, int *); +struct section *getsectbyname(const char *seg_name, const char *sect_name); struct section *getsectbynamefromheader( struct mach_header *header, - char *seg_name, - char *sect_name); -void *getsectdatafromheader(struct mach_header *, char *, char *, int *); + const char *seg_name, + const char *sect_name); +void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); struct section *firstsect(struct segment_command *sgp); struct section *nextsect(struct segment_command *sgp, struct section *sp); struct fvmlib_command *fvmlib(void); diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index 8a988f82d..a12aa7682 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,129 +30,205 @@ * 21-Jul-88 Avadis Tevanian, Jr. (avie) at NeXT * Started. */ + #include -#include +#include #include #include -#include +#include +#include #include #include -#include +#include #include -#include +#include +#include #include +#include /* vm_allocate() */ +#include /* mach_vm_allocate() */ +#include +#include +#include +#include + +#include +#include +#include #include +#include #include +#include #include #include -#include - +#include #include #include #include #include -#include - -#include #include +#include -#include +/* + * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE + * when KERNEL is defined. + */ +extern pmap_t pmap_create(vm_map_size_t size); +extern void pmap_switch(pmap_t); +extern void pmap_map_sharedpage(task_t task, pmap_t pmap); + +/* + * XXX kern/thread.h should not treat these prototypes as MACH_KERNEL_PRIVATE + * when KERNEL is defined. + */ +extern kern_return_t thread_setstatus(thread_t thread, int flavor, + thread_state_t tstate, + mach_msg_type_number_t count); + +extern kern_return_t thread_state_initialize(thread_t thread); + + +/* XXX should have prototypes in a shared header file */ +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); +extern int get_map_nentries(vm_map_t); +extern kern_return_t thread_userstack(thread_t, int, thread_state_t, + unsigned int, mach_vm_offset_t *, int *); +extern kern_return_t thread_entrypoint(thread_t, int, thread_state_t, + unsigned int, mach_vm_offset_t *); + + +/* An empty load_result_t */ +static load_result_t load_result_null = { + MACH_VM_MIN_ADDRESS, + MACH_VM_MIN_ADDRESS, + MACH_VM_MIN_ADDRESS, + 0, + 0, + 0, + 0 +}; /* * Prototypes of static functions. */ -static -load_return_t +static load_return_t parse_machfile( struct vnode *vp, - vm_map_t map, - thread_act_t thr_act, + vm_map_t map, + thread_t thr_act, struct mach_header *header, - unsigned long file_offset, - unsigned long macho_size, - int depth, - load_result_t *result, - boolean_t clean_regions -), + off_t file_offset, + off_t macho_size, + boolean_t shared_regions, + boolean_t clean_regions, + int depth, + load_result_t *result +); + +static load_return_t load_segment( struct segment_command *scp, void * pager, - unsigned long pager_offset, - unsigned long macho_size, - unsigned long end_of_file, + off_t pager_offset, + off_t macho_size, + off_t end_of_file, vm_map_t map, load_result_t *result -), +); + +static load_return_t +load_segment_64( + struct segment_command_64 *scp64, + void *pager, + off_t pager_offset, + off_t macho_size, + off_t end_of_file, + vm_map_t map, + load_result_t *result +); + +static load_return_t load_unixthread( struct thread_command *tcp, - thread_act_t thr_act, + thread_t thr_act, load_result_t *result -), +); + +static load_return_t load_thread( struct thread_command *tcp, - thread_act_t thr_act, + thread_t thr_act, load_result_t *result -), +); + +static load_return_t load_threadstate( thread_t thread, unsigned long *ts, unsigned long total_size -), +); + +static load_return_t load_threadstack( thread_t thread, unsigned long *ts, unsigned long total_size, - vm_offset_t *user_stack, + mach_vm_offset_t *user_stack, int *customstack -), +); + +static load_return_t load_threadentry( thread_t thread, unsigned long *ts, unsigned long total_size, - vm_offset_t *entry_point -), + mach_vm_offset_t *entry_point +); + +static load_return_t load_dylinker( struct dylinker_command *lcp, + integer_t archbits, vm_map_t map, - thread_act_t thr_act, + thread_t thr_act, int depth, load_result_t *result, boolean_t clean_regions -), +); + +static load_return_t get_macho_vnode( char *path, + integer_t archbits, struct mach_header *mach_header, - unsigned long *file_offset, - unsigned long *macho_size, + off_t *file_offset, + off_t *macho_size, struct vnode **vpp ); load_return_t load_machfile( - struct vnode *vp, + struct image_params *imgp, struct mach_header *header, - unsigned long file_offset, - unsigned long macho_size, - load_result_t *result, - thread_act_t thr_act, + thread_t thr_act, vm_map_t new_map, - boolean_t clean_regions + boolean_t clean_regions, + load_result_t *result ) { - pmap_t pmap; + struct vnode *vp = imgp->ip_vp; + off_t file_offset = imgp->ip_arch_offset; + off_t macho_size = imgp->ip_arch_size; + + pmap_t pmap = 0; /* protected by create_map */ vm_map_t map; vm_map_t old_map; load_result_t myresult; - kern_return_t kret; load_return_t lret; boolean_t create_map = TRUE; -#ifndef i386 - extern pmap_t pmap_create(vm_size_t size); /* XXX */ -#endif if (new_map != VM_MAP_NULL) { create_map = FALSE; @@ -164,7 +240,7 @@ load_machfile( pmap = get_task_pmap(current_task()); pmap_reference(pmap); #else - pmap = pmap_create((vm_size_t) 0); + pmap = pmap_create((vm_map_size_t) 0); #endif map = vm_map_create(pmap, get_map_min(old_map), @@ -176,10 +252,11 @@ load_machfile( if (!result) result = &myresult; - *result = (load_result_t) { 0 }; + *result = load_result_null; lret = parse_machfile(vp, map, thr_act, header, file_offset, macho_size, - 0, result, clean_regions); + ((imgp->ip_flags & IMGPF_IS_64BIT) == 0), /* shared regions? */ + clean_regions, 0, result); if (lret != LOAD_SUCCESS) { if (create_map) { @@ -213,27 +290,38 @@ load_machfile( int dylink_test = 1; +/* + * The file size of a mach-o file is limited to 32 bits; this is because + * this is the limit on the kalloc() of enough bytes for a mach_header and + * the contents of its sizeofcmds, which is currently constrained to 32 + * bits in the file format itself. We read into the kernel buffer the + * commands section, and then parse it in order to parse the mach-o file + * format load_command segment(s). We are only interested in a subset of + * the total set of possible commands. + */ static load_return_t parse_machfile( - struct vnode *vp, + struct vnode *vp, vm_map_t map, - thread_act_t thr_act, + thread_t thr_act, struct mach_header *header, - unsigned long file_offset, - unsigned long macho_size, + off_t file_offset, + off_t macho_size, + boolean_t shared_regions, + boolean_t clean_regions, int depth, - load_result_t *result, - boolean_t clean_regions + load_result_t *result ) { - struct machine_slot *ms; uint32_t ncmds; - struct load_command *lcp, *next; + struct load_command *lcp; struct dylinker_command *dlp = 0; + integer_t dlarchbits = 0; void * pager; load_return_t ret = LOAD_SUCCESS; - vm_offset_t addr, kl_addr; + caddr_t addr; + void * kl_addr; vm_size_t size,kl_size; size_t offset; size_t oldoffset; /* for overflow check */ @@ -242,6 +330,13 @@ parse_machfile( int error; int resid=0; task_t task; + size_t mach_header_sz = sizeof(struct mach_header); + boolean_t abi64; + + if (header->magic == MH_MAGIC_64 || + header->magic == MH_CIGAM_64) { + mach_header_sz = sizeof(struct mach_header_64); + } /* * Break infinite recursion @@ -256,11 +351,12 @@ parse_machfile( /* * Check to see if right machine type. */ - ms = &machine_slot[cpu_number()]; - if ((header->cputype != ms->cpu_type) || - !check_cpu_subtype(header->cpusubtype)) + if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != cpu_type()) || + !grade_binary(header->cputype, header->cpusubtype)) return(LOAD_BADARCH); + abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64); + switch (header->filetype) { case MH_OBJECT: @@ -295,13 +391,13 @@ parse_machfile( * Map portion that must be accessible directly into * kernel's map. */ - if ((sizeof (struct mach_header) + header->sizeofcmds) > macho_size) + if ((mach_header_sz + header->sizeofcmds) > macho_size) return(LOAD_BADMACHO); /* * Round size of Mach-O commands up to page boundry. */ - size = round_page_32(sizeof (struct mach_header) + header->sizeofcmds); + size = round_page(mach_header_sz + header->sizeofcmds); if (size <= 0) return(LOAD_BADMACHO); @@ -311,17 +407,18 @@ parse_machfile( addr = 0; kl_size = size; kl_addr = kalloc(size); - addr = kl_addr; + addr = (caddr_t)kl_addr; if (addr == NULL) return(LOAD_NOSPACE); - if(error = vn_rdwr(UIO_READ, vp, (caddr_t)addr, size, file_offset, - UIO_SYSSPACE, 0, p->p_ucred, &resid, p)) { + error = vn_rdwr(UIO_READ, vp, addr, size, file_offset, + UIO_SYSSPACE32, 0, kauth_cred_get(), &resid, p); + if (error) { if (kl_addr ) kfree(kl_addr, kl_size); return(LOAD_IOERROR); } - /* ubc_map(vp); */ /* NOT HERE */ + /* (void)ubc_map(vp, PROT_EXEC); */ /* NOT HERE */ /* * Scan through the commands, processing each one as necessary. @@ -333,7 +430,7 @@ parse_machfile( * run off the end of the reserved section by incrementing * the offset too far, so we are implicitly fail-safe. */ - offset = sizeof(struct mach_header); + offset = mach_header_sz; ncmds = header->ncmds; while (ncmds--) { /* @@ -353,8 +450,8 @@ parse_machfile( */ if (oldoffset > offset || lcp->cmdsize < sizeof(struct load_command) || - offset > header->sizeofcmds + sizeof(struct mach_header)) { - ret = LOAD_BADMACHO; + offset > header->sizeofcmds + mach_header_sz) { + ret = LOAD_BADMACHO; break; } @@ -363,41 +460,59 @@ parse_machfile( * intervention is required. */ switch(lcp->cmd) { + case LC_SEGMENT_64: + if (pass != 1) + break; + ret = load_segment_64( + (struct segment_command_64 *)lcp, + pager, + file_offset, + macho_size, + ubc_getsize(vp), + map, + result); + break; case LC_SEGMENT: if (pass != 1) break; ret = load_segment( (struct segment_command *) lcp, - pager, file_offset, + pager, + file_offset, macho_size, - (unsigned long)ubc_getsize(vp), + ubc_getsize(vp), map, result); break; case LC_THREAD: if (pass != 2) break; - ret = load_thread((struct thread_command *)lcp, thr_act, + ret = load_thread((struct thread_command *)lcp, + thr_act, result); break; case LC_UNIXTHREAD: if (pass != 2) break; ret = load_unixthread( - (struct thread_command *) lcp, thr_act, + (struct thread_command *) lcp, + thr_act, result); break; case LC_LOAD_DYLINKER: if (pass != 2) break; - if ((depth == 1) && (dlp == 0)) + if ((depth == 1) && (dlp == 0)) { dlp = (struct dylinker_command *)lcp; - else + dlarchbits = (header->cputype & CPU_ARCH_MASK); + } else { ret = LOAD_FAILURE; + } break; default: /* Other commands are ignored by the kernel */ ret = LOAD_SUCCESS; + break; } if (ret != LOAD_SUCCESS) break; @@ -405,8 +520,10 @@ parse_machfile( if (ret != LOAD_SUCCESS) break; } - if ((ret == LOAD_SUCCESS) && (depth == 1)) { - vm_offset_t addr; + if (ret == LOAD_SUCCESS) { + + if (shared_regions) { + vm_offset_t vmaddr; shared_region_mapping_t shared_region; struct shared_region_task_mappings map_info; shared_region_mapping_t next; @@ -454,25 +571,24 @@ RedoLookup: } } - if (dylink_test) { p->p_flag |= P_NOSHLIB; /* no shlibs in use */ - addr = map_info.client_base; + vmaddr = map_info.client_base; if(clean_regions) { - vm_map(map, &addr, map_info.text_size, - 0, SHARED_LIB_ALIAS, + vm_map(map, &vmaddr, map_info.text_size, + 0, SHARED_LIB_ALIAS|VM_FLAGS_FIXED, map_info.text_region, 0, FALSE, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_SHARE); } else { - vm_map(map, &addr, map_info.text_size, 0, + vm_map(map, &vmaddr, map_info.text_size, 0, (VM_MEMORY_SHARED_PMAP << 24) - | SHARED_LIB_ALIAS, + | SHARED_LIB_ALIAS | VM_FLAGS_FIXED, map_info.text_region, 0, FALSE, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_SHARE); } - addr = map_info.client_base + map_info.text_size; - vm_map(map, &addr, map_info.data_size, - 0, SHARED_LIB_ALIAS, + vmaddr = map_info.client_base + map_info.text_size; + vm_map(map, &vmaddr, map_info.data_size, + 0, SHARED_LIB_ALIAS | VM_FLAGS_FIXED, map_info.data_region, 0, TRUE, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_SHARE); @@ -497,27 +613,36 @@ RedoLookup: &(map_info.system), &(map_info.flags), &next); - addr = map_info.client_base; - vm_map(map, &addr, map_info.text_size, - 0, SHARED_LIB_ALIAS, + vmaddr = map_info.client_base; + vm_map(map, &vmaddr, map_info.text_size, + 0, SHARED_LIB_ALIAS | VM_FLAGS_FIXED, map_info.text_region, 0, FALSE, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_SHARE); } } - if (dlp != 0) { - ret = load_dylinker(dlp, map, thr_act, - depth, result, clean_regions); - } + } + if (dlp != 0) + ret = load_dylinker(dlp, dlarchbits, map, thr_act, depth, result, clean_regions); + + if(depth == 1) { + if (result->thread_count == 0) + ret = LOAD_FAILURE; +#ifdef __ppc__ + else if ( abi64 ) { + /* Map in 64-bit commpage */ + /* LP64todo - make this clean */ + pmap_map_sharedpage(current_task(), get_map_pmap(map)); + vm_map_commpage64(map); + } +#endif + } } if (kl_addr ) kfree(kl_addr, kl_size); - if ((ret == LOAD_SUCCESS) && (depth == 1) && - (result->thread_count == 0)) - ret = LOAD_FAILURE; if (ret == LOAD_SUCCESS) - ubc_map(vp); + (void)ubc_map(vp, PROT_EXEC); return(ret); } @@ -527,9 +652,9 @@ load_return_t load_segment( struct segment_command *scp, void * pager, - unsigned long pager_offset, - unsigned long macho_size, - unsigned long end_of_file, + off_t pager_offset, + off_t macho_size, + __unused off_t end_of_file, vm_map_t map, load_result_t *result ) @@ -537,7 +662,6 @@ load_segment( kern_return_t ret; vm_offset_t map_addr, map_offset; vm_size_t map_size, seg_size, delta_size; - caddr_t tmp; vm_prot_t initprot; vm_prot_t maxprot; @@ -548,15 +672,15 @@ load_segment( if (scp->fileoff + scp->filesize > macho_size) return (LOAD_BADMACHO); - seg_size = round_page_32(scp->vmsize); + seg_size = round_page(scp->vmsize); if (seg_size == 0) return(KERN_SUCCESS); /* * Round sizes to page size. */ - map_size = round_page_32(scp->filesize); - map_addr = trunc_page_32(scp->vmaddr); + map_size = round_page(scp->filesize); + map_addr = trunc_page(scp->vmaddr); map_offset = pager_offset + scp->fileoff; @@ -567,8 +691,8 @@ load_segment( * Map a copy of the file into the address space. */ ret = vm_map(map, - &map_addr, map_size, (vm_offset_t)0, FALSE, - pager, map_offset, TRUE, + &map_addr, map_size, (vm_offset_t)0, + VM_FLAGS_FIXED, pager, map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) @@ -583,7 +707,7 @@ load_segment( if (delta_size > 0) { vm_offset_t tmp; - ret = vm_allocate(kernel_map, &tmp, delta_size, TRUE); + ret = vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); if (ret != KERN_SUCCESS) return(LOAD_RESOURCE); @@ -608,7 +732,7 @@ load_segment( if (delta_size > 0) { vm_offset_t tmp = map_addr + map_size; - ret = vm_allocate(map, &tmp, delta_size, FALSE); + ret = vm_allocate(map, &tmp, delta_size, VM_FLAGS_FIXED); if (ret != KERN_SUCCESS) return(LOAD_NOSPACE); } @@ -634,49 +758,110 @@ load_segment( static load_return_t -load_unixthread( - struct thread_command *tcp, - thread_act_t thread, +load_segment_64( + struct segment_command_64 *scp64, + void * pager, + off_t pager_offset, + off_t macho_size, + __unused off_t end_of_file, + vm_map_t map, load_result_t *result ) { - load_return_t ret; - int customstack =0; + kern_return_t ret; + mach_vm_offset_t map_addr, map_offset; + mach_vm_size_t map_size, seg_size, delta_size; + vm_prot_t initprot; + vm_prot_t maxprot; - if (result->thread_count != 0) + /* + * Make sure what we get from the file is really ours (as specified + * by macho_size). + */ + if (scp64->fileoff + scp64->filesize > (uint64_t)macho_size) + return (LOAD_BADMACHO); + + seg_size = round_page_64(scp64->vmsize); + if (seg_size == 0) + return(KERN_SUCCESS); + + /* + * Round sizes to page size. + */ + map_size = round_page_64(scp64->filesize); /* limited to 32 bits */ + map_addr = round_page_64(scp64->vmaddr); + + map_offset = pager_offset + scp64->fileoff; /* limited to 32 bits */ + + if (map_size > 0) { + initprot = (scp64->initprot) & VM_PROT_ALL; + maxprot = (scp64->maxprot) & VM_PROT_ALL; + /* + * Map a copy of the file into the address space. + */ + ret = mach_vm_map(map, + &map_addr, map_size, (mach_vm_offset_t)0, + VM_FLAGS_FIXED, pager, map_offset, TRUE, + initprot, maxprot, + VM_INHERIT_DEFAULT); + if (ret != KERN_SUCCESS) + return(LOAD_NOSPACE); + + /* + * If the file didn't end on a page boundary, + * we need to zero the leftover. + */ + delta_size = map_size - scp64->filesize; +#if FIXME + if (delta_size > 0) { + mach_vm_offset_t tmp; + + ret = vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); + if (ret != KERN_SUCCESS) + return(LOAD_RESOURCE); + + if (copyout(tmp, map_addr + scp64->filesize, + delta_size)) { + (void) vm_deallocate( + kernel_map, tmp, delta_size); return (LOAD_FAILURE); + } - ret = load_threadstack(thread, - (unsigned long *)(((vm_offset_t)tcp) + - sizeof(struct thread_command)), - tcp->cmdsize - sizeof(struct thread_command), - &result->user_stack, - &customstack); - if (ret != LOAD_SUCCESS) - return(ret); + (void) vm_deallocate(kernel_map, tmp, delta_size); + } +#endif /* FIXME */ + } - if (customstack) - result->customstack = 1; - else - result->customstack = 0; - ret = load_threadentry(thread, - (unsigned long *)(((vm_offset_t)tcp) + - sizeof(struct thread_command)), - tcp->cmdsize - sizeof(struct thread_command), - &result->entry_point); - if (ret != LOAD_SUCCESS) - return(ret); + /* + * If the virtual size of the segment is greater + * than the size from the file, we need to allocate + * zero fill memory for the rest. + */ + delta_size = seg_size - map_size; + if (delta_size > 0) { + mach_vm_offset_t tmp = map_addr + map_size; - ret = load_threadstate(thread, - (unsigned long *)(((vm_offset_t)tcp) + - sizeof(struct thread_command)), - tcp->cmdsize - sizeof(struct thread_command)); - if (ret != LOAD_SUCCESS) - return (ret); + ret = mach_vm_allocate(map, &tmp, delta_size, VM_FLAGS_FIXED); + if (ret != KERN_SUCCESS) + return(LOAD_NOSPACE); + } - result->unixproc = TRUE; - result->thread_count++; + /* + * Set protection values. (Note: ignore errors!) + */ + if (scp64->maxprot != VM_PROT_DEFAULT) { + (void) mach_vm_protect(map, + map_addr, seg_size, + TRUE, scp64->maxprot); + } + if (scp64->initprot != VM_PROT_DEFAULT) { + (void) mach_vm_protect(map, + map_addr, seg_size, + FALSE, scp64->initprot); + } + if ( (scp64->fileoff == 0) && (scp64->filesize != 0) ) + result->mach_header = map_addr; return(LOAD_SUCCESS); } @@ -684,7 +869,7 @@ static load_return_t load_thread( struct thread_command *tcp, - thread_act_t thread, + thread_t thread, load_result_t *result ) { @@ -700,7 +885,7 @@ load_thread( kret = thread_create(task, &thread); if (kret != KERN_SUCCESS) return(LOAD_RESOURCE); - act_deallocate(thread); + thread_deallocate(thread); } lret = load_threadstate(thread, @@ -746,6 +931,54 @@ load_thread( return(LOAD_SUCCESS); } +static +load_return_t +load_unixthread( + struct thread_command *tcp, + thread_t thread, + load_result_t *result +) +{ + load_return_t ret; + int customstack =0; + + if (result->thread_count != 0) + return (LOAD_FAILURE); + + ret = load_threadstack(thread, + (unsigned long *)(((vm_offset_t)tcp) + + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command), + &result->user_stack, + &customstack); + if (ret != LOAD_SUCCESS) + return(ret); + + if (customstack) + result->customstack = 1; + else + result->customstack = 0; + ret = load_threadentry(thread, + (unsigned long *)(((vm_offset_t)tcp) + + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command), + &result->entry_point); + if (ret != LOAD_SUCCESS) + return(ret); + + ret = load_threadstate(thread, + (unsigned long *)(((vm_offset_t)tcp) + + sizeof(struct thread_command)), + tcp->cmdsize - sizeof(struct thread_command)); + if (ret != LOAD_SUCCESS) + return (ret); + + result->unixproc = TRUE; + result->thread_count++; + + return(LOAD_SUCCESS); +} + static load_return_t load_threadstate( @@ -757,18 +990,29 @@ load_threadstate( kern_return_t ret; unsigned long size; int flavor; + unsigned long thread_size; + ret = thread_state_initialize( thread ); + if (ret != KERN_SUCCESS) + return(LOAD_FAILURE); + /* - * Set the thread state. + * Set the new thread state; iterate through the state flavors in + * the mach-o file. */ - while (total_size > 0) { flavor = *ts++; size = *ts++; - total_size -= (size+2)*sizeof(unsigned long); - if (total_size < 0) + thread_size = (size+2)*sizeof(unsigned long); + if (thread_size > total_size) return(LOAD_BADMACHO); - ret = thread_setstatus(thread, flavor, ts, size); + total_size -= thread_size; + /* + * Third argument is a kernel space pointer; it gets cast + * to the appropriate type in machine_thread_set_state() + * based on the value of flavor. + */ + ret = thread_setstatus(thread, flavor, (thread_state_t)ts, size); if (ret != KERN_SUCCESS) return(LOAD_FAILURE); ts += size; /* ts is a (unsigned long *) */ @@ -782,23 +1026,29 @@ load_threadstack( thread_t thread, unsigned long *ts, unsigned long total_size, - vm_offset_t *user_stack, + user_addr_t *user_stack, int *customstack ) { kern_return_t ret; unsigned long size; int flavor; + unsigned long stack_size; while (total_size > 0) { flavor = *ts++; size = *ts++; - total_size -= (size+2)*sizeof(unsigned long); - if (total_size < 0) + stack_size = (size+2)*sizeof(unsigned long); + if (stack_size > total_size) return(LOAD_BADMACHO); - *user_stack = USRSTACK; - ret = thread_userstack(thread, flavor, ts, size, - user_stack, customstack); + total_size -= stack_size; + + /* + * Third argument is a kernel space pointer; it gets cast + * to the appropriate type in thread_userstack() based on + * the value of flavor. + */ + ret = thread_userstack(thread, flavor, (thread_state_t)ts, size, user_stack, customstack); if (ret != KERN_SUCCESS) return(LOAD_FAILURE); ts += size; /* ts is a (unsigned long *) */ @@ -812,24 +1062,31 @@ load_threadentry( thread_t thread, unsigned long *ts, unsigned long total_size, - vm_offset_t *entry_point + mach_vm_offset_t *entry_point ) { kern_return_t ret; unsigned long size; int flavor; + unsigned long entry_size; /* * Set the thread state. */ - *entry_point = 0; + *entry_point = MACH_VM_MIN_ADDRESS; while (total_size > 0) { flavor = *ts++; size = *ts++; - total_size -= (size+2)*sizeof(unsigned long); - if (total_size < 0) + entry_size = (size+2)*sizeof(unsigned long); + if (entry_size > total_size) return(LOAD_BADMACHO); - ret = thread_entrypoint(thread, flavor, ts, size, entry_point); + total_size -= entry_size; + /* + * Third argument is a kernel space pointer; it gets cast + * to the appropriate type in thread_entrypoint() based on + * the value of flavor. + */ + ret = thread_entrypoint(thread, flavor, (thread_state_t)ts, size, entry_point); if (ret != KERN_SUCCESS) return(LOAD_FAILURE); ts += size; /* ts is a (unsigned long *) */ @@ -842,8 +1099,9 @@ static load_return_t load_dylinker( struct dylinker_command *lcp, + integer_t archbits, vm_map_t map, - thread_act_t thr_act, + thread_t thr_act, int depth, load_result_t *result, boolean_t clean_regions @@ -853,15 +1111,14 @@ load_dylinker( char *p; struct vnode *vp; struct mach_header header; - unsigned long file_offset; - unsigned long macho_size; + off_t file_offset; + off_t macho_size; vm_map_t copy_map; load_result_t myresult; kern_return_t ret; vm_map_copy_t tmp; - vm_offset_t dyl_start, map_addr; - vm_size_t dyl_length; - extern pmap_t pmap_create(vm_size_t size); /* XXX */ + mach_vm_offset_t dyl_start, map_addr; + mach_vm_size_t dyl_length; name = (char *)lcp + lcp->name.offset; /* @@ -873,35 +1130,39 @@ load_dylinker( return(LOAD_BADMACHO); } while (*p++); - ret = get_macho_vnode(name, &header, &file_offset, &macho_size, &vp); + ret = get_macho_vnode(name, archbits, &header, &file_offset, &macho_size, &vp); if (ret) return (ret); - myresult = (load_result_t) { 0 }; - /* * Load the Mach-O. + * Use a temporary map to do the work. */ - - copy_map = vm_map_create(pmap_create(macho_size), - get_map_min(map), get_map_max( map), TRUE); + copy_map = vm_map_create(pmap_create(vm_map_round_page(macho_size)), + get_map_min(map), get_map_max(map), TRUE); + if (VM_MAP_NULL == copy_map) { + ret = LOAD_RESOURCE; + goto out; + } + + myresult = load_result_null; ret = parse_machfile(vp, copy_map, thr_act, &header, file_offset, macho_size, - depth, &myresult, clean_regions); + FALSE, clean_regions, depth, &myresult); if (ret) goto out; if (get_map_nentries(copy_map) > 0) { - dyl_start = get_map_start(copy_map); - dyl_length = get_map_end(copy_map) - dyl_start; + dyl_start = mach_get_vm_start(copy_map); + dyl_length = mach_get_vm_end(copy_map) - dyl_start; map_addr = dyl_start; - ret = vm_allocate(map, &map_addr, dyl_length, FALSE); + ret = mach_vm_allocate(map, &map_addr, dyl_length, VM_FLAGS_FIXED); if (ret != KERN_SUCCESS) { - ret = vm_allocate(map, &map_addr, dyl_length, TRUE); + ret = mach_vm_allocate(map, &map_addr, dyl_length, VM_FLAGS_ANYWHERE); } if (ret != KERN_SUCCESS) { @@ -909,24 +1170,29 @@ load_dylinker( goto out; } - ret = vm_map_copyin(copy_map, dyl_start, dyl_length, TRUE, - &tmp); + ret = vm_map_copyin(copy_map, + (vm_map_address_t)dyl_start, + (vm_map_size_t)dyl_length, + TRUE, &tmp); if (ret != KERN_SUCCESS) { (void) vm_map_remove(map, - map_addr, - map_addr + dyl_length, - VM_MAP_NO_FLAGS); + vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + dyl_length), + VM_MAP_NO_FLAGS); goto out; } - ret = vm_map_copy_overwrite(map, map_addr, tmp, FALSE); + ret = vm_map_copy_overwrite(map, + (vm_map_address_t)map_addr, + tmp, FALSE); if (ret != KERN_SUCCESS) { - vm_map_copy_discard(tmp); - (void) vm_map_remove(map, - map_addr, - map_addr + dyl_length, - VM_MAP_NO_FLAGS); - goto out; } + vm_map_copy_discard(tmp); + (void) vm_map_remove(map, + vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + dyl_length), + VM_MAP_NO_FLAGS); + goto out; + } if (map_addr != dyl_start) myresult.entry_point += (map_addr - dyl_start); @@ -936,28 +1202,35 @@ load_dylinker( if (ret == LOAD_SUCCESS) { result->dynlinker = TRUE; result->entry_point = myresult.entry_point; - ubc_map(vp); + (void)ubc_map(vp, PROT_EXEC); } out: vm_map_deallocate(copy_map); - vrele(vp); + vnode_put(vp); return (ret); } +/* + * This routine exists to support the load_dylinker(). + * + * This routine has its own, separate, understanding of the FAT file format, + * which is terrifically unfortunate. + */ static load_return_t get_macho_vnode( char *path, + integer_t archbits, struct mach_header *mach_header, - unsigned long *file_offset, - unsigned long *macho_size, + off_t *file_offset, + off_t *macho_size, struct vnode **vpp ) { struct vnode *vp; - struct vattr attr, *atp; + struct vfs_context context; struct nameidata nid, *ndp; struct proc *p = current_proc(); /* XXXX */ boolean_t is_fat; @@ -970,23 +1243,25 @@ get_macho_vnode( char pad[512]; } header; off_t fsize = (off_t)0; - struct ucred *cred = p->p_ucred; + struct ucred *cred = kauth_cred_get(); int err2; + context.vc_proc = p; + context.vc_ucred = cred; + ndp = &nid; - atp = &attr; /* init the namei data to point the file user's program name */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), &context); - if (error = namei(ndp)) { + if ((error = namei(ndp)) != 0) { if (error == ENOENT) error = LOAD_ENOENT; else error = LOAD_FAILURE; return(error); } - + nameidone(ndp); vp = ndp->ni_vp; /* check for regular file */ @@ -995,8 +1270,8 @@ get_macho_vnode( goto bad1; } - /* get attributes */ - if (error = VOP_GETATTR(vp, &attr, cred, p)) { + /* get size */ + if ((error = vnode_size(vp, &fsize, &context)) != 0) { error = LOAD_FAILURE; goto bad1; } @@ -1007,39 +1282,26 @@ get_macho_vnode( goto bad1; } - if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED)) - atp->va_mode &= ~(VSUID | VSGID); - - /* check access. for root we have to see if any exec bit on */ - if (error = VOP_ACCESS(vp, VEXEC, cred, p)) { + /* check access */ + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, &context)) != 0) { error = LOAD_PROTECT; goto bad1; } - if ((atp->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) { - error = LOAD_PROTECT; - goto bad1; - } - - /* hold the vnode for the IO */ - if (UBCINFOEXISTS(vp) && !ubc_hold(vp)) { - error = LOAD_ENOENT; - goto bad1; - } /* try to open it */ - if (error = VOP_OPEN(vp, FREAD, cred, p)) { + if ((error = VNOP_OPEN(vp, FREAD, &context)) != 0) { error = LOAD_PROTECT; - ubc_rele(vp); goto bad1; } - if(error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, - UIO_SYSSPACE, IO_NODELOCKED, cred, &resid, p)) { + if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, + UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p)) != 0) { error = LOAD_IOERROR; goto bad2; } - if (header.mach_header.magic == MH_MAGIC) + if (header.mach_header.magic == MH_MAGIC || + header.mach_header.magic == MH_MAGIC_64) is_fat = FALSE; else if (header.fat_header.magic == FAT_MAGIC || header.fat_header.magic == FAT_CIGAM) @@ -1051,21 +1313,22 @@ get_macho_vnode( if (is_fat) { /* Look up our architecture in the fat file. */ - error = fatfile_getarch(vp, (vm_offset_t)(&header.fat_header), &fat_arch); + error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch); if (error != LOAD_SUCCESS) goto bad2; /* Read the Mach-O header out of it */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header, sizeof(header.mach_header), fat_arch.offset, - UIO_SYSSPACE, IO_NODELOCKED, cred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p); if (error) { error = LOAD_IOERROR; goto bad2; } /* Is this really a Mach-O? */ - if (header.mach_header.magic != MH_MAGIC) { + if (header.mach_header.magic != MH_MAGIC && + header.mach_header.magic != MH_MAGIC_64) { error = LOAD_BADMACHO; goto bad2; } @@ -1073,28 +1336,36 @@ get_macho_vnode( *file_offset = fat_arch.offset; *macho_size = fsize = fat_arch.size; } else { + /* + * Force get_macho_vnode() to fail if the architecture bits + * do not match the expected architecture bits. This in + * turn causes load_dylinker() to fail for the same reason, + * so it ensures the dynamic linker and the binary are in + * lock-step. This is potentially bad, if we ever add to + * the CPU_ARCH_* bits any bits that are desirable but not + * required, since the dynamic linker might work, but we will + * refuse to load it because of this check. + */ + if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits) + return(LOAD_BADARCH); *file_offset = 0; - *macho_size = fsize = attr.va_size; + *macho_size = fsize; } *mach_header = header.mach_header; *vpp = vp; - if (UBCISVALID(vp)) - ubc_setsize(vp, fsize); /* XXX why? */ + + ubc_setsize(vp, fsize); - VOP_UNLOCK(vp, 0, p); - ubc_rele(vp); return (error); bad2: - VOP_UNLOCK(vp, 0, p); - err2 = VOP_CLOSE(vp, FREAD, cred, p); - ubc_rele(vp); - vrele(vp); + err2 = VNOP_CLOSE(vp, FREAD, &context); + vnode_put(vp); return (error); bad1: - vput(vp); + vnode_put(vp); return(error); } diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h index 939445e02..75713b125 100644 --- a/bsd/kern/mach_loader.h +++ b/bsd/kern/mach_loader.h @@ -39,11 +39,15 @@ typedef int load_return_t; +/* + * Structure describing the result from calling load_machfile(), if that + * function returns LOAD_SUCCESS. + */ typedef struct _load_result { - vm_offset_t mach_header; - vm_offset_t entry_point; - vm_offset_t user_stack; - int thread_count; + user_addr_t mach_header; + user_addr_t entry_point; + user_addr_t user_stack; + int thread_count; unsigned int /* boolean_t */ unixproc :1, dynlinker :1, @@ -51,15 +55,14 @@ typedef struct _load_result { :0; } load_result_t; +struct image_params; load_return_t load_machfile( - struct vnode *vp, + struct image_params *imgp, struct mach_header *header, - unsigned long file_offset, - unsigned long macho_size, - load_result_t *result, - thread_act_t thr_act, + thread_t thr_act, vm_map_t map, - boolean_t clean_regions); + boolean_t clean_regions, + load_result_t *result); #define LOAD_SUCCESS 0 #define LOAD_BADARCH 1 /* CPU type/subtype not found */ diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index 8c0567ea1..caa043027 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,7 +65,8 @@ #include #include -#include +#include +#include #include #include #include @@ -73,7 +74,8 @@ #include #include -#include +#include +#include #include @@ -81,22 +83,23 @@ #include #include + /* Macros to clear/set/test flags. */ #define SET(t, f) (t) |= (f) #define CLR(t, f) (t) &= ~(f) #define ISSET(t, f) ((t) & (f)) -void psignal_lock __P((struct proc *, int, int)); +extern thread_t port_name_to_thread(mach_port_name_t port_name); +extern kern_return_t thread_getstatus(thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); +extern thread_t get_firstthread(task_t); + +#if defined (ppc) +extern kern_return_t thread_setstatus(thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t count); +#endif /* * sys-trace system call. */ -struct ptrace_args { - int req; - pid_t pid; - caddr_t addr; - int data; -}; int ptrace(p, uap, retval) @@ -105,15 +108,10 @@ ptrace(p, uap, retval) register_t *retval; { struct proc *t = current_proc(); /* target process */ - vm_offset_t start_addr, end_addr, - kern_addr, offset; - vm_size_t size; task_t task; - thread_t thread; - thread_act_t th_act; + thread_t th_act; struct uthread *ut; int *locr0; - int error = 0; #if defined(ppc) struct ppc_thread_state64 statep; #elif defined(i386) @@ -129,14 +127,14 @@ ptrace(p, uap, retval) AUDIT_ARG(addr, uap->addr); AUDIT_ARG(value, uap->data); - if (uap->req == PT_DENY_ATTACH) { - if (ISSET(p->p_flag, P_TRACED)) { - exit1(p, W_EXITCODE(ENOTSUP, 0), retval); - /* drop funnel before we return */ - thread_funnel_set(kernel_flock, FALSE); - thread_exception_return(); - /* NOTREACHED */ - } + if (uap->req == PT_DENY_ATTACH) { + if (ISSET(p->p_flag, P_TRACED)) { + exit1(p, W_EXITCODE(ENOTSUP, 0), retval); + /* drop funnel before we return */ + thread_funnel_set(kernel_flock, FALSE); + thread_exception_return(); + /* NOTREACHED */ + } SET(p->p_flag, P_NOATTACH); return(0); @@ -173,7 +171,6 @@ ptrace(p, uap, retval) if ((t = pfind(uap->pid)) == NULL) return (ESRCH); - AUDIT_ARG(process, t); /* We do not want ptrace to do anything with kernel, init @@ -188,52 +185,35 @@ ptrace(p, uap, retval) tr_sigexc = 1; } if (uap->req == PT_ATTACH) { - - /* - * You can't attach to a process if: - * (1) it's the process that's doing the attaching, - */ - if (t->p_pid == p->p_pid) - return (EINVAL); - - /* - * (2) it's already being traced, or - */ - if (ISSET(t->p_flag, P_TRACED)) - return (EBUSY); - - /* - * (3) it's not owned by you, or is set-id on exec - * (unless you're root). - */ - if ((t->p_cred->p_ruid != p->p_cred->p_ruid || - ISSET(t->p_flag, P_SUGID)) && - (error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); - - if ((p->p_flag & P_TRACED) && isinferior(p, t)) - return(EPERM); - - if (ISSET(t->p_flag, P_NOATTACH)) { - psignal(p, SIGSEGV); - return (EBUSY); + int err; + + if ( kauth_authorize_process(proc_ucred(p), KAUTH_PROCESS_CANTRACE, + t, (uintptr_t)&err, 0, 0) == 0 ) { + /* it's OK to attach */ + SET(t->p_flag, P_TRACED); + if (tr_sigexc) + SET(t->p_flag, P_SIGEXC); + + t->p_oppid = t->p_pptr->p_pid; + if (t->p_pptr != p) + proc_reparent(t, p); + + if (get_task_userstop(task) == 0 ) { + t->p_xstat = 0; + psignal(t, SIGSTOP); + } else { + t->p_xstat = SIGSTOP; + task_resume(task); + } + return(0); } - SET(t->p_flag, P_TRACED); - if (tr_sigexc) - SET(t->p_flag, P_SIGEXC); - - t->p_oppid = t->p_pptr->p_pid; - if (t->p_pptr != p) - proc_reparent(t, p); - - if (get_task_userstop(task) == 0 ) { - t->p_xstat = 0; - psignal(t, SIGSTOP); - } else { - t->p_xstat = SIGSTOP; - task_resume(task); + else { + /* not allowed to attach, proper error code returned by kauth_authorize_process */ + if (ISSET(t->p_flag, P_NOATTACH)) { + psignal(p, SIGSEGV); + } + return (err); } - return(0); } /* @@ -284,8 +264,8 @@ ptrace(p, uap, retval) case PT_STEP: /* single step the child */ case PT_CONTINUE: /* continue the child */ - th_act = (thread_act_t)get_firstthread(task); - if (th_act == THR_ACT_NULL) + th_act = (thread_t)get_firstthread(task); + if (th_act == THREAD_NULL) goto errorLabel; ut = (uthread_t)get_bsdthread_info(th_act); locr0 = ut->uu_ar0; @@ -296,13 +276,13 @@ ptrace(p, uap, retval) } #elif defined(ppc) state_count = PPC_THREAD_STATE64_COUNT; - if (thread_getstatus(th_act, PPC_THREAD_STATE64, &statep, &state_count) != KERN_SUCCESS) { + if (thread_getstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, (mach_msg_type_number_t *)&state_count) != KERN_SUCCESS) { goto errorLabel; } #else #error architecture not supported #endif - if ((int)uap->addr != 1) { + if (uap->addr != (user_addr_t)1) { #if defined(i386) locr0[PC] = (int)uap->addr; #elif defined(ppc) @@ -310,18 +290,18 @@ ptrace(p, uap, retval) if (!ALIGNED((int)uap->addr, sizeof(int))) return (ERESTART); - statep.srr0 = (uint64_t)((uint32_t)uap->addr); + statep.srr0 = uap->addr; state_count = PPC_THREAD_STATE64_COUNT; - if (thread_setstatus(th_act, PPC_THREAD_STATE64, &statep, &state_count) != KERN_SUCCESS) { + if (thread_setstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, state_count) != KERN_SUCCESS) { goto errorLabel; } #undef ALIGNED #else #error architecture not implemented! #endif - } /* (int)uap->addr != 1 */ + } /* uap->addr != (user_addr_t)1 */ - if ((unsigned)uap->data < 0 || (unsigned)uap->data >= NSIG) + if ((unsigned)uap->data >= NSIG) goto errorLabel; if (uap->data != 0) { @@ -329,7 +309,7 @@ ptrace(p, uap, retval) } #if defined(ppc) state_count = PPC_THREAD_STATE64_COUNT; - if (thread_getstatus(th_act, PPC_THREAD_STATE64, &statep, &state_count) != KERN_SUCCESS) { + if (thread_getstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, (mach_msg_type_number_t *)&state_count) != KERN_SUCCESS) { goto errorLabel; } #endif @@ -354,7 +334,7 @@ ptrace(p, uap, retval) } #if defined (ppc) state_count = PPC_THREAD_STATE64_COUNT; - if (thread_setstatus(th_act, PPC_THREAD_STATE64, &statep, &state_count) != KERN_SUCCESS) { + if (thread_setstatus(th_act, PPC_THREAD_STATE64, (thread_state_t)&statep, state_count) != KERN_SUCCESS) { goto errorLabel; } #endif @@ -369,19 +349,17 @@ ptrace(p, uap, retval) break; case PT_THUPDATE: { - thread_act_t target_act; - if ((unsigned)uap->data >= NSIG) goto errorLabel; - th_act = (thread_act_t)port_name_to_act((void *)uap->addr); - if (th_act == THR_ACT_NULL) + th_act = port_name_to_thread(CAST_DOWN(mach_port_name_t, uap->addr)); + if (th_act == THREAD_NULL) return (ESRCH); ut = (uthread_t)get_bsdthread_info(th_act); if (uap->data) ut->uu_siglist |= sigmask(uap->data); t->p_xstat = uap->data; t->p_stat = SRUN; - act_deallocate(th_act); + thread_deallocate(th_act); return(0); } break; @@ -393,3 +371,51 @@ errorLabel: return(0); } + +/* + * determine if one process (cur_procp) can trace another process (traced_procp). + */ + +int +cantrace(proc_t cur_procp, kauth_cred_t creds, proc_t traced_procp, int *errp) +{ + int my_err; + /* + * You can't trace a process if: + * (1) it's the process that's doing the tracing, + */ + if (traced_procp->p_pid == cur_procp->p_pid) { + *errp = EINVAL; + return (0); + } + + /* + * (2) it's already being traced, or + */ + if (ISSET(traced_procp->p_flag, P_TRACED)) { + *errp = EBUSY; + return (0); + } + + /* + * (3) it's not owned by you, or is set-id on exec + * (unless you're root). + */ + if ((creds->cr_ruid != proc_ucred(traced_procp)->cr_ruid || + ISSET(traced_procp->p_flag, P_SUGID)) && + (my_err = suser(creds, &cur_procp->p_acflag)) != 0) { + *errp = my_err; + return (0); + } + + if ((cur_procp->p_flag & P_TRACED) && isinferior(cur_procp, traced_procp)) { + *errp = EPERM; + return (0); + } + + if (ISSET(traced_procp->p_flag, P_NOATTACH)) { + *errp = EBUSY; + return (0); + } + return(1); +} diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh new file mode 100755 index 000000000..d186f1690 --- /dev/null +++ b/bsd/kern/makesyscalls.sh @@ -0,0 +1,694 @@ +#! /bin/sh - +# @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93 +# $FreeBSD: src/sys/kern/makesyscalls.sh,v 1.60 2003/04/01 01:12:24 jeff Exp $ +# +# Copyright (c) 2004 Apple Computer, Inc. All rights reserved. +# +# @APPLE_LICENSE_HEADER_START@ +# +# The contents of this file constitute Original Code as defined in and +# are subject to the Apple Public Source License Version 1.1 (the +# "License"). You may not use this file except in compliance with the +# License. Please obtain a copy of the License at +# http://www.apple.com/publicsource and read it before using this file. +# +# This Original Code and all software distributed under the License are +# distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +# License for the specific language governing rights and limitations +# under the License. +# +# @APPLE_LICENSE_HEADER_END@ +# + +set -e + +# output files: +syscallnamesfile="syscalls.c" +sysprotofile="../sys/sysproto.h" +sysproto_h=_SYS_SYSPROTO_H_ +syshdrfile="../sys/syscall.h" +syscall_h=_SYS_SYSCALL_H_ +syscalltablefile="init_sysent.c" +syscallprefix="SYS_" +switchname="sysent" +namesname="syscallnames" + +# tmp files: +syslegal="sysent.syslegal.$$" +sysent="sysent.switch.$$" +sysinc="sysinc.switch.$$" +sysarg="sysarg.switch.$$" +sysprotoend="sysprotoend.$$" +syscallnamestempfile="syscallnamesfile.$$" +syshdrtempfile="syshdrtempfile.$$" + +trap "rm $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile" 0 + +touch $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile + +case $# in + 0) echo "usage: $0 input-file " 1>&2 + exit 1 + ;; +esac + +if [ -n "$2" -a -f "$2" ]; then + . $2 +fi + +sed -e ' +s/\$//g +:join + /\\$/{a\ + + N + s/\\\n// + b join + } +2,${ + /^#/!s/\([{}()*,]\)/ \1 /g +} +' < $1 | awk " + BEGIN { + syslegal = \"$syslegal\" + sysprotofile = \"$sysprotofile\" + sysprotoend = \"$sysprotoend\" + sysproto_h = \"$sysproto_h\" + syscall_h = \"$syscall_h\" + sysent = \"$sysent\" + syscalltablefile = \"$syscalltablefile\" + sysinc = \"$sysinc\" + sysarg = \"$sysarg\" + syscallnamesfile = \"$syscallnamesfile\" + syscallnamestempfile = \"$syscallnamestempfile\" + syshdrfile = \"$syshdrfile\" + syshdrtempfile = \"$syshdrtempfile\" + syscallprefix = \"$syscallprefix\" + switchname = \"$switchname\" + namesname = \"$namesname\" + infile = \"$1\" + "' + + printf "/*\n" > syslegal + printf " * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.\n" > syslegal + printf " * \n" > syslegal + printf " * @APPLE_LICENSE_HEADER_START@ \n" > syslegal + printf " * \n" > syslegal + printf " * The contents of this file constitute Original Code as defined in and \n" > syslegal + printf " * are subject to the Apple Public Source License Version 1.1 (the \n" > syslegal + printf " * \"License\"). You may not use this file except in compliance with the \n" > syslegal + printf " * License. Please obtain a copy of the License at \n" > syslegal + printf " * http://www.apple.com/publicsource and read it before using this file. \n" > syslegal + printf " * \n" > syslegal + printf " * This Original Code and all software distributed under the License are \n" > syslegal + printf " * distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY KIND, EITHER \n" > syslegal + printf " * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, \n" > syslegal + printf " * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, \n" > syslegal + printf " * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the \n" > syslegal + printf " * License for the specific language governing rights and limitations \n" > syslegal + printf " * under the License. \n" > syslegal + printf " * \n" > syslegal + printf " * @APPLE_LICENSE_HEADER_END@ \n" > syslegal + printf " * \n" > syslegal + printf " * \n" > syslegal + printf " * System call switch table.\n *\n" > syslegal + printf " * DO NOT EDIT-- this file is automatically generated.\n" > syslegal + printf " * created from %s\n */\n\n", infile > syslegal + } + NR == 1 { + printf "\n/* The casts are bogus but will do for now. */\n" > sysent + printf "__private_extern__ struct sysent %s[] = {\n",switchname > sysent + + printf "#ifndef %s\n", sysproto_h > sysarg + printf "#define\t%s\n\n", sysproto_h > sysarg + printf "#ifndef %s\n", syscall_h > syshdrtempfile + printf "#define\t%s\n\n", syscall_h > syshdrtempfile + printf "#include \n" > syshdrtempfile + printf "#ifdef __APPLE_API_PRIVATE\n" > syshdrtempfile + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "#include \n" > sysarg + printf "\n#ifdef KERNEL\n" > sysarg + printf "#ifdef __APPLE_API_PRIVATE\n" > sysarg + printf "#ifdef __ppc__\n" > sysarg + printf "#define\tPAD_(t)\t(sizeof(uint64_t) <= sizeof(t) \\\n " > sysarg + printf "\t\t? 0 : sizeof(uint64_t) - sizeof(t))\n" > sysarg + printf "#else\n" > sysarg + printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) \\\n " > sysarg + printf "\t\t? 0 : sizeof(register_t) - sizeof(t))\n" > sysarg + printf "#endif\n" > sysarg + printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg + printf "#define\tPADL_(t)\t0\n" > sysarg + printf "#define\tPADR_(t)\tPAD_(t)\n" > sysarg + printf "#else\n" > sysarg + printf "#define\tPADL_(t)\tPAD_(t)\n" > sysarg + printf "#define\tPADR_(t)\t0\n" > sysarg + printf "#endif\n" > sysarg + printf "\n__BEGIN_DECLS\n" > sysarg + printf "#ifndef __MUNGE_ONCE\n" > sysarg + printf "#define __MUNGE_ONCE\n" > sysarg + printf "#ifdef __ppc__\n" > sysarg + printf "void munge_w(const void *, void *); \n" > sysarg + printf "void munge_ww(const void *, void *); \n" > sysarg + printf "void munge_www(const void *, void *); \n" > sysarg + printf "void munge_wwww(const void *, void *); \n" > sysarg + printf "void munge_wwwww(const void *, void *); \n" > sysarg + printf "void munge_wwwwww(const void *, void *); \n" > sysarg + printf "void munge_wwwwwww(const void *, void *); \n" > sysarg + printf "void munge_wwwwwwww(const void *, void *); \n" > sysarg + printf "void munge_d(const void *, void *); \n" > sysarg + printf "void munge_dd(const void *, void *); \n" > sysarg + printf "void munge_ddd(const void *, void *); \n" > sysarg + printf "void munge_dddd(const void *, void *); \n" > sysarg + printf "void munge_ddddd(const void *, void *); \n" > sysarg + printf "void munge_dddddd(const void *, void *); \n" > sysarg + printf "void munge_ddddddd(const void *, void *); \n" > sysarg + printf "void munge_dddddddd(const void *, void *); \n" > sysarg + printf "void munge_wl(const void *, void *); \n" > sysarg + printf "void munge_wlw(const void *, void *); \n" > sysarg + printf "void munge_wwwl(const void *, void *); \n" > sysarg + printf "void munge_wwwwl(const void *, void *); \n" > sysarg + printf "void munge_wwwwwl(const void *, void *); \n" > sysarg + printf "void munge_wsw(const void *, void *); \n" > sysarg + printf "void munge_wws(const void *, void *); \n" > sysarg + printf "void munge_wwwsw(const void *, void *); \n" > sysarg + printf "#else \n" > sysarg + printf "#define munge_w NULL \n" > sysarg + printf "#define munge_ww NULL \n" > sysarg + printf "#define munge_www NULL \n" > sysarg + printf "#define munge_wwww NULL \n" > sysarg + printf "#define munge_wwwww NULL \n" > sysarg + printf "#define munge_wwwwww NULL \n" > sysarg + printf "#define munge_wwwwwww NULL \n" > sysarg + printf "#define munge_wwwwwwww NULL \n" > sysarg + printf "#define munge_d NULL \n" > sysarg + printf "#define munge_dd NULL \n" > sysarg + printf "#define munge_ddd NULL \n" > sysarg + printf "#define munge_dddd NULL \n" > sysarg + printf "#define munge_ddddd NULL \n" > sysarg + printf "#define munge_dddddd NULL \n" > sysarg + printf "#define munge_ddddddd NULL \n" > sysarg + printf "#define munge_dddddddd NULL \n" > sysarg + printf "#define munge_wl NULL \n" > sysarg + printf "#define munge_wlw NULL \n" > sysarg + printf "#define munge_wwwl NULL \n" > sysarg + printf "#define munge_wwwwl NULL \n" > sysarg + printf "#define munge_wwwwwl NULL \n" > sysarg + printf "#define munge_wsw NULL \n" > sysarg + printf "#define munge_wws NULL \n" > sysarg + printf "#define munge_wwwsw NULL \n" > sysarg + printf "#endif // __ppc__\n" > sysarg + printf "#endif /* !__MUNGE_ONCE */\n" > sysarg + + printf "\n" > sysarg + + printf "const char *%s[] = {\n", namesname > syscallnamestempfile + next + } + NF == 0 || $1 ~ /^;/ { + next + } + $1 ~ /^#[ ]*include/ { + print > sysinc + next + } + $1 ~ /^#[ ]*if/ { + print > sysent + print > sysarg + print > syscallnamestempfile + print > syshdrtempfile + print > sysprotoend + savesyscall = syscall + next + } + $1 ~ /^#[ ]*else/ { + print > sysent + print > sysarg + print > syscallnamestempfile + print > syshdrtempfile + print > sysprotoend + syscall = savesyscall + next + } + $1 ~ /^#/ { + print > sysent + print > sysarg + print > syscallnamestempfile + print > syshdrtempfile + print > sysprotoend + next + } + syscall != $1 { + printf "%s: line %d: syscall number out of sync at %d\n", + infile, NR, syscall + printf "line is:\n" + print + exit 1 + } + function align_comment(linesize, location, thefile) { + printf(" ") > thefile + while (linesize < location) { + printf(" ") > thefile + linesize++ + } + } + function parserr(was, wanted) { + printf "%s: line %d: unexpected %s (expected %s)\n", + infile, NR, was, wanted + exit 1 + } + + function parseline() { + funcname = "" + current_field = 5 + args_start = 0 + args_end = 0 + comments_start = 0 + comments_end = 0 + argc = 0 + argssize = "0" + additional_comments = " " + + # find start and end of call name and arguments + if ($current_field != "{") + parserr($current_field, "{") + args_start = current_field + current_field++ + while (current_field <= NF) { + if ($current_field == "}") { + args_end = current_field + break + } + current_field++ + } + if (args_end == 0) { + printf "%s: line %d: invalid call name and arguments\n", + infile, NR + exit 1 + } + + # find start and end of optional comments + current_field++ + if (current_field < NF && $current_field == "{") { + comments_start = current_field + while (current_field <= NF) { + if ($current_field == "}") { + comments_end = current_field + break + } + current_field++ + } + if (comments_end == 0) { + printf "%s: line %d: invalid comments \n", + infile, NR + exit 1 + } + } + + if ($args_end != "}") + parserr($args_end, "}") + args_end-- + if ($args_end != ";") + parserr($args_end, ";") + args_end-- + if ($args_end != ")") + parserr($args_end, ")") + args_end-- + + # extract additional comments + if (comments_start != 0) { + current_field = comments_start + 1 + while (current_field < comments_end) { + additional_comments = additional_comments $current_field " " + current_field++ + } + } + + # get function return type + current_field = args_start + 1 + returntype = $current_field + + # get function name and set up to get arguments + current_field++ + funcname = $current_field + argalias = funcname "_args" + current_field++ # bump past function name + + if ($current_field != "(") + parserr($current_field, "(") + current_field++ + + if (current_field == args_end) { + if ($current_field != "void") + parserr($current_field, "argument definition") + return + } + + # extract argument types and names + while (current_field <= args_end) { + argc++ + argtype[argc]="" + ext_argtype[argc]="" + oldf="" + while (current_field < args_end && $(current_field + 1) != ",") { + if (argtype[argc] != "" && oldf != "*") { + argtype[argc] = argtype[argc] " "; + } + argtype[argc] = argtype[argc] $current_field; + ext_argtype[argc] = argtype[argc]; + oldf = $current_field; + current_field++ + } + if (argtype[argc] == "") + parserr($current_field, "argument definition") + argname[argc] = $current_field; + current_field += 2; # skip name, and any comma + } + if (argc > 8) { + printf "%s: line %d: too many arguments!\n", infile, NR + exit 1 + } + if (argc != 0) + argssize = "AC(" argalias ")" + } + + { + add_sysent_entry = 1 + add_sysnames_entry = 1 + add_sysheader_entry = 1 + add_sysproto_entry = 1 + add_64bit_unsafe = 0 + add_64bit_fakesafe = 0 + add_cancel_enable = "0" + + if ($2 == "NONE") { + add_cancel_enable = "_SYSCALL_CANCEL_NONE" + } + else if ($2 == "PRE") { + add_cancel_enable = "_SYSCALL_CANCEL_PRE" + } + else if ($2 == "POST") { + add_cancel_enable = "_SYSCALL_CANCEL_POST" + } + else { + printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2 + exit 1 + + } + + if ($3 == "KERN") { + my_funnel = "KERNEL_FUNNEL" + } + else if ($3 == "NONE") { + my_funnel = "NO_FUNNEL" + } + else { + printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $3 + exit 1 + } + + if ($4 != "ALL" && $4 != "UALL") { + files_keyword_OK = 0 + add_sysent_entry = 0 + add_sysnames_entry = 0 + add_sysheader_entry = 0 + add_sysproto_entry = 0 + + if (match($4, "[T]") != 0) { + add_sysent_entry = 1 + files_keyword_OK = 1 + } + if (match($4, "[N]") != 0) { + add_sysnames_entry = 1 + files_keyword_OK = 1 + } + if (match($4, "[H]") != 0) { + add_sysheader_entry = 1 + files_keyword_OK = 1 + } + if (match($4, "[P]") != 0) { + add_sysproto_entry = 1 + files_keyword_OK = 1 + } + if (match($4, "[U]") != 0) { + add_64bit_unsafe = 1 + } + if (match($4, "[F]") != 0) { + add_64bit_fakesafe = 1 + } + + if (files_keyword_OK == 0) { + printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $4 + exit 1 + } + } + else if ($4 == "UALL") { + add_64bit_unsafe = 1; + } + + + parseline() + + # output function argument structures to sysproto.h and build the + # name of the appropriate argument mungers + munge32 = "NULL" + munge64 = "NULL" + if (funcname != "nosys" || (syscall == 0 && funcname == "nosys")) { + if (argc != 0) { + if (add_sysproto_entry == 1) { + printf("struct %s {\n", argalias) > sysarg + } + munge32 = "munge_" + munge64 = "munge_" + for (i = 1; i <= argc; i++) { + # Build name of argument munger. + # We account for all sys call argument types here. + # This is where you add any new types. With LP64 support + # each argument consumes 64-bits. + # see .../xnu/bsd/dev/ppc/munge.s for munge argument types. + if (argtype[i] == "long") { + if (add_64bit_unsafe == 0) + ext_argtype[i] = "user_long_t"; + munge32 = munge32 "s" + munge64 = munge64 "d" + } + else if (argtype[i] == "u_long") { + if (add_64bit_unsafe == 0) + ext_argtype[i] = "user_ulong_t"; + munge32 = munge32 "w" + munge64 = munge64 "d" + } + else if (argtype[i] == "size_t") { + if (add_64bit_unsafe == 0) + ext_argtype[i] = "user_size_t"; + munge32 = munge32 "w" + munge64 = munge64 "d" + } + else if (argtype[i] == "ssize_t") { + if (add_64bit_unsafe == 0) + ext_argtype[i] = "user_ssize_t"; + munge32 = munge32 "s" + munge64 = munge64 "d" + } + else if (argtype[i] == "user_ssize_t" || argtype[i] == "user_long_t") { + munge32 = munge32 "s" + munge64 = munge64 "d" + } + else if (argtype[i] == "user_addr_t" || argtype[i] == "user_size_t" || + argtype[i] == "user_ulong_t") { + munge32 = munge32 "w" + munge64 = munge64 "d" + } + else if (argtype[i] == "caddr_t" || argtype[i] == "semun_t" || + match(argtype[i], "[\*]") != 0) { + if (add_64bit_unsafe == 0) + ext_argtype[i] = "user_addr_t"; + munge32 = munge32 "w" + munge64 = munge64 "d" + } + else if (argtype[i] == "int" || argtype[i] == "u_int" || + argtype[i] == "uid_t" || argtype[i] == "pid_t" || + argtype[i] == "id_t" || argtype[i] == "idtype_t" || + argtype[i] == "socklen_t" || argtype[i] == "uint32_t" || argtype[i] == "int32_t" || + argtype[i] == "sigset_t" || argtype[i] == "gid_t" || + argtype[i] == "semconfig_ctl_t" || argtype[i] == "mode_t" || argtype[i] == "key_t" || argtype[i] == "time_t") { + munge32 = munge32 "w" + munge64 = munge64 "d" + } + else if (argtype[i] == "off_t" || argtype[i] == "int64_t" || argtype[i] == "uint64_t") { + munge32 = munge32 "l" + munge64 = munge64 "d" + } + else { + printf "%s: line %d: invalid type \"%s\" \n", + infile, NR, argtype[i] + printf "You need to add \"%s\" into the type checking code. \n", + argtype[i] + exit 1 + } + if (add_sysproto_entry == 1) { + printf("\tchar %s_l_[PADL_(%s)]; " \ + "%s %s; char %s_r_[PADR_(%s)];\n", + argname[i], ext_argtype[i], + ext_argtype[i], argname[i], + argname[i], ext_argtype[i]) > sysarg + } + } + if (add_sysproto_entry == 1) { + printf("};\n") > sysarg + } + } + else if (add_sysproto_entry == 1) { + printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg + } + } + + # output to init_sysent.c + tempname = funcname + if (add_sysent_entry == 0) { + argssize = "0" + munge32 = "NULL" + munge64 = "NULL" + munge_ret = "_SYSCALL_RET_NONE" + tempname = "nosys" + } + else { + # figure out which return value type to munge + if (returntype == "user_addr_t") { + munge_ret = "_SYSCALL_RET_ADDR_T" + } + else if (returntype == "user_ssize_t") { + munge_ret = "_SYSCALL_RET_SSIZE_T" + } + else if (returntype == "user_size_t") { + munge_ret = "_SYSCALL_RET_SIZE_T" + } + else if (returntype == "int") { + munge_ret = "_SYSCALL_RET_INT_T" + } + else if (returntype == "u_int") { + munge_ret = "_SYSCALL_RET_UINT_T" + } + else if (returntype == "off_t") { + munge_ret = "_SYSCALL_RET_OFF_T" + } + else if (returntype == "void") { + munge_ret = "_SYSCALL_RET_NONE" + } + else { + printf "%s: line %d: invalid return type \"%s\" \n", + infile, NR, returntype + printf "You need to add \"%s\" into the return type checking code. \n", + returntype + exit 1 + } + } + + if (add_64bit_unsafe == 1 && add_64bit_fakesafe == 0) + my_funnel = my_funnel "|UNSAFE_64BIT"; + + printf("\t{%s, %s, %s, \(sy_call_t *\)%s, %s, %s, %s},", + argssize, add_cancel_enable, my_funnel, tempname, munge32, munge64, munge_ret) > sysent + linesize = length(argssize) + length(add_cancel_enable) + length(my_funnel) + length(tempname) + \ + length(munge32) + length(munge64) + length(munge_ret) + 28 + align_comment(linesize, 88, sysent) + printf("/* %d = %s%s*/\n", syscall, funcname, additional_comments) > sysent + + # output to syscalls.c + if (add_sysnames_entry == 1) { + tempname = funcname + if (funcname == "nosys") { + if (syscall == 0) + tempname = "syscall" + else + tempname = "#" syscall + } + printf("\t\"%s\", ", tempname) > syscallnamestempfile + linesize = length(tempname) + 8 + align_comment(linesize, 25, syscallnamestempfile) + if (substr(tempname,1,1) == "#") { + printf("/* %d =%s*/\n", syscall, additional_comments) > syscallnamestempfile + } + else { + printf("/* %d = %s%s*/\n", syscall, tempname, additional_comments) > syscallnamestempfile + } + } + + # output to syscalls.h + if (add_sysheader_entry == 1) { + tempname = funcname + if (syscall == 0) { + tempname = "syscall" + } + if (tempname != "nosys") { + printf("#define\t%s%s", syscallprefix, tempname) > syshdrtempfile + linesize = length(syscallprefix) + length(tempname) + 12 + align_comment(linesize, 30, syshdrtempfile) + printf("%d\n", syscall) > syshdrtempfile + # special case for gettimeofday on ppc - cctools project uses old name + if (tempname == "ppc_gettimeofday") { + printf("#define\t%s%s", syscallprefix, "gettimeofday") > syshdrtempfile + linesize = length(syscallprefix) + length(tempname) + 12 + align_comment(linesize, 30, syshdrtempfile) + printf("%d\n", syscall) > syshdrtempfile + } + } + else { + printf("\t\t\t/* %d %s*/\n", syscall, additional_comments) > syshdrtempfile + } + } + + # output function prototypes to sysproto.h + if (add_sysproto_entry == 1) { + if (funcname =="exit") { + printf("void %s(struct proc *, struct %s *, int *);\n", + funcname, argalias) > sysprotoend + } + else if (funcname != "nosys" || (syscall == 0 && funcname == "nosys")) { + printf("int %s(struct proc *, struct %s *, %s *);\n", + funcname, argalias, returntype) > sysprotoend + } + } + + syscall++ + next + } + + END { + printf "#ifdef __ppc__\n" > sysinc + printf "#define AC(name) (sizeof(struct name) / sizeof(uint64_t))\n" > sysinc + printf "#else\n" > sysinc + printf "#define AC(name) (sizeof(struct name) / sizeof(register_t))\n" > sysinc + printf "#endif\n" > sysinc + printf "\n" > sysinc + + printf("\n__END_DECLS\n") > sysprotoend + printf("#undef PAD_\n") > sysprotoend + printf("#undef PADL_\n") > sysprotoend + printf("#undef PADR_\n") > sysprotoend + printf "\n#endif /* __APPLE_API_PRIVATE */\n" > sysprotoend + printf "#endif /* KERNEL */\n" > sysprotoend + printf("\n#endif /* !%s */\n", sysproto_h) > sysprotoend + + printf("};\n") > sysent + printf("int nsysent = sizeof(sysent) / sizeof(sysent[0]);\n") > sysent + + printf("};\n") > syscallnamestempfile + printf("#define\t%sMAXSYSCALL\t%d\n", syscallprefix, syscall) \ + > syshdrtempfile + printf("\n#endif /* __APPLE_API_PRIVATE */\n") > syshdrtempfile + printf("#endif /* !%s */\n", syscall_h) > syshdrtempfile + } ' + +cat $syslegal $sysinc $sysent > $syscalltablefile +cat $syslegal $sysarg $sysprotoend > $sysprotofile +cat $syslegal $syscallnamestempfile > $syscallnamesfile +cat $syslegal $syshdrtempfile > $syshdrfile diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c index c2bc3b3a3..2555875b7 100644 --- a/bsd/kern/netboot.c +++ b/bsd/kern/netboot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2001-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,11 +30,11 @@ #include #include #include -#include -#include +#include +#include #include #include -#include +#include #include #include #include @@ -46,9 +46,11 @@ #include #include #include -#include #include +#include +#include + //#include extern struct filedesc filedesc0; @@ -250,6 +252,7 @@ static __inline__ boolean_t parse_netboot_path(char * path, struct in_addr * iaddr_p, char * * host, char * * mount_dir, char * * image_path) { + static char tmp[MAX_IPv4_STR_LEN]; /* Danger - not thread safe */ char * start; char * colon; @@ -283,7 +286,7 @@ parse_netboot_path(char * path, struct in_addr * iaddr_p, char * * host, (void)find_colon(start); *image_path = start; } - *host = inet_ntoa(*iaddr_p); + *host = inet_ntop(AF_INET, iaddr_p, tmp, sizeof(tmp)); return (TRUE); } @@ -353,6 +356,8 @@ netboot_info_init(struct in_addr iaddr) char * vndevice = NULL; MALLOC_ZONE(vndevice, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (vndevice == NULL) + panic("netboot_info_init: M_NAMEI zone exhausted"); if (PE_parse_boot_arg("vndevice", vndevice) == TRUE) { use_hdix = FALSE; } @@ -366,6 +371,8 @@ netboot_info_init(struct in_addr iaddr) /* check for a booter-specified path then a NetBoot path */ MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (root_path == NULL) + panic("netboot_info_init: M_NAMEI zone exhausted"); if (PE_parse_boot_arg("rp", root_path) == TRUE || PE_parse_boot_arg("rootpath", root_path) == TRUE || get_root_path(root_path) == TRUE) { @@ -431,15 +438,15 @@ netboot_info_free(struct netboot_info * * info_p) if (info) { if (info->mount_point) { - kfree((vm_offset_t)info->mount_point, info->mount_point_length); + kfree(info->mount_point, info->mount_point_length); } if (info->server_name) { - kfree((vm_offset_t)info->server_name, info->server_name_length); + kfree(info->server_name, info->server_name_length); } if (info->image_path) { - kfree((vm_offset_t)info->image_path, info->image_path_length); + kfree(info->image_path, info->image_path_length); } - kfree((vm_offset_t)info, sizeof(*info)); + kfree(info, sizeof(*info)); } *info_p = NULL; return; @@ -617,13 +624,14 @@ find_interface(void) struct ifnet * ifp = NULL; if (rootdevice[0]) { - ifp = ifunit(rootdevice); + ifp = ifunit(rootdevice); } if (ifp == NULL) { - TAILQ_FOREACH(ifp, &ifnet, if_link) - if ((ifp->if_flags & - (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) - break; + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) + if ((ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0) + break; + ifnet_head_done(); } return (ifp); } @@ -643,7 +651,6 @@ netboot_mountroot(void) bzero(&ifr, sizeof(ifr)); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); /* find the interface */ ifp = find_interface(); @@ -701,7 +708,6 @@ netboot_mountroot(void) } soclose(so); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); S_netboot_info_p = netboot_info_init(iaddr); switch (S_netboot_info_p->image_type) { @@ -760,7 +766,6 @@ failed: if (so != NULL) { soclose(so); } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); return (error); } @@ -799,20 +804,24 @@ netboot_setup(struct proc * p) if (error == 0 && rootvnode != NULL) { struct vnode *tvp; struct vnode *newdp; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); /* XXX kauth_cred_get() ??? proxy */ /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ - if (VFS_ROOT(mountlist.cqh_last, &newdp)) + if (VFS_ROOT(mountlist.tqh_last, &newdp, &context)) panic("netboot_setup: cannot find root vnode"); - VREF(newdp); + vnode_ref(newdp); + vnode_put(newdp); tvp = rootvnode; - vrele(tvp); + vnode_rele(tvp); filedesc0.fd_cdir = newdp; rootvnode = newdp; - simple_lock(&mountlist_slock); - CIRCLEQ_REMOVE(&mountlist, CIRCLEQ_FIRST(&mountlist), mnt_list); - simple_unlock(&mountlist_slock); - VOP_UNLOCK(rootvnode, 0, p); - mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; + mount_list_lock(); + TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list); + mount_list_unlock(); + mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; } done: netboot_info_free(&S_netboot_info_p); diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c index e6ab89ff6..cf17502e0 100644 --- a/bsd/kern/posix_sem.c +++ b/bsd/kern/posix_sem.c @@ -39,11 +39,11 @@ #include #include #include -#include +#include #include #include -#include -#include +#include +#include #include #include #include @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -58,10 +59,23 @@ #include #include #include +#include +#include #include #include #include +#if KTRACE +#include +#endif + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data #define PSEMNAMLEN 31 /* maximum name segment length we bother with */ struct pseminfo { @@ -71,7 +85,7 @@ struct pseminfo { uid_t psem_uid; gid_t psem_gid; char psem_name[PSEMNAMLEN + 1]; /* segment name */ - void * psem_semobject; + semaphore_t psem_semobject; struct proc * sem_proc; }; #define PSEMINFO_NULL (struct pseminfo *)0 @@ -123,25 +137,58 @@ struct psemnode { LIST_HEAD(psemhashhead, psemcache) *psemhashtbl; /* Hash Table */ u_long psemhash; /* size of hash table - 1 */ long psemnument; /* number of cache entries allocated */ +long posix_sem_max = 10000; /* tunable for max POSIX semaphores */ + /* 10000 limits to ~1M of memory */ +SYSCTL_NODE(_kern, KERN_POSIX, posix, CTLFLAG_RW, 0, "Posix"); +SYSCTL_NODE(_kern_posix, OID_AUTO, sem, CTLFLAG_RW, 0, "Semaphores"); +SYSCTL_INT (_kern_posix_sem, OID_AUTO, max, CTLFLAG_RW, &posix_sem_max, 0, "max"); + struct psemstats psemstats; /* cache effectiveness statistics */ -static int psem_cache_search __P((struct pseminfo **, - struct psemname *, struct psemcache **)); +static int psem_access(struct pseminfo *pinfo, int mode, kauth_cred_t cred); +static int psem_cache_search(struct pseminfo **, + struct psemname *, struct psemcache **); +static int psem_delete(struct pseminfo * pinfo); -static int psem_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int psem_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int psem_ioctl __P((struct file *fp, u_long com, - caddr_t data, struct proc *p)); -static int psem_select __P((struct file *fp, int which, void *wql, - struct proc *p)); -static int psem_closefile __P((struct file *fp, struct proc *p)); +static int psem_read (struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int psem_write (struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int psem_ioctl (struct fileproc *fp, u_long com, + caddr_t data, struct proc *p); +static int psem_select (struct fileproc *fp, int which, void *wql, struct proc *p); +static int psem_closefile (struct fileglob *fp, struct proc *p); -static int psem_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p)); +static int psem_kqfilter (struct fileproc *fp, struct knote *kn, struct proc *p); struct fileops psemops = - { psem_read, psem_write, psem_ioctl, psem_select, psem_closefile, psem_kqfilter }; + { psem_read, psem_write, psem_ioctl, psem_select, psem_closefile, psem_kqfilter, 0 }; + + +static lck_grp_t *psx_sem_subsys_lck_grp; +static lck_grp_attr_t *psx_sem_subsys_lck_grp_attr; +static lck_attr_t *psx_sem_subsys_lck_attr; +static lck_mtx_t psx_sem_subsys_mutex; + +#define PSEM_SUBSYS_LOCK() lck_mtx_lock(& psx_sem_subsys_mutex) +#define PSEM_SUBSYS_UNLOCK() lck_mtx_unlock(& psx_sem_subsys_mutex) + + +static int psem_cache_add(struct pseminfo *psemp, struct psemname *pnp, struct psemcache *pcp); +/* Initialize the mutex governing access to the posix sem subsystem */ +__private_extern__ void +psem_lock_init( void ) +{ + + psx_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(psx_sem_subsys_lck_grp_attr); + + psx_sem_subsys_lck_grp = lck_grp_alloc_init("posix shared memory", psx_sem_subsys_lck_grp_attr); + + psx_sem_subsys_lck_attr = lck_attr_alloc_init(); + /* lck_attr_setdebug(psx_sem_subsys_lck_attr); */ + lck_mtx_init(& psx_sem_subsys_mutex, psx_sem_subsys_lck_grp, psx_sem_subsys_lck_attr); +} /* * Lookup an entry in the cache @@ -159,8 +206,8 @@ psem_cache_search(psemp, pnp, pcache) struct psemname *pnp; struct psemcache **pcache; { - register struct psemcache *pcp, *nnp; - register struct psemhashhead *pcpp; + struct psemcache *pcp, *nnp; + struct psemhashhead *pcpp; if (pnp->psem_namelen > PSEMNAMLEN) { psemstats.longnames++; @@ -201,12 +248,9 @@ psem_cache_search(psemp, pnp, pcache) * Add an entry to the cache. */ static int -psem_cache_add(psemp, pnp) - struct pseminfo *psemp; - struct psemname *pnp; +psem_cache_add(struct pseminfo *psemp, struct psemname *pnp, struct psemcache *pcp) { - register struct psemcache *pcp; - register struct psemhashhead *pcpp; + struct psemhashhead *pcpp; struct pseminfo *dpinfo; struct psemcache *dpcp; @@ -215,20 +259,14 @@ psem_cache_add(psemp, pnp) panic("cache_enter: name too long"); #endif - /* - * We allocate a new entry if we are less than the maximum - * allowed and the one at the front of the LRU list is in use. - * Otherwise we use the one at the front of the LRU list. - */ - pcp = (struct psemcache *)_MALLOC(sizeof(struct psemcache), M_SHM, M_WAITOK); + /* if the entry has already been added by some one else return */ if (psem_cache_search(&dpinfo, pnp, &dpcp) == -1) { - _FREE(pcp, M_SHM); return(EEXIST); } + if (psemnument >= posix_sem_max) + return(ENOSPC); psemnument++; - - bzero(pcp, sizeof(struct psemcache)); /* * Fill in cache info, if vp is NULL this is a "negative" cache entry. * For negative entries, we have to record whether it is a whiteout. @@ -241,7 +279,7 @@ psem_cache_add(psemp, pnp) pcpp = PSEMHASH(pnp); #if DIAGNOSTIC { - register struct psemcache *p; + struct psemcache *p; for (p = pcpp->lh_first; p != 0; p = p->psem_hash.le_next) if (p == pcp) @@ -256,14 +294,13 @@ psem_cache_add(psemp, pnp) * Name cache initialization, from vfs_init() when we are booting */ void -psem_cache_init() +psem_cache_init(void) { psemhashtbl = hashinit(desiredvnodes, M_SHM, &psemhash); } static void -psem_cache_delete(pcp) - struct psemcache *pcp; +psem_cache_delete(struct psemcache *pcp) { #if DIAGNOSTIC if (pcp->psem_hash.le_prev == 0) @@ -276,6 +313,7 @@ psem_cache_delete(pcp) psemnument--; } +#if NOT_USED /* * Invalidate a all entries to particular vnode. * @@ -284,40 +322,29 @@ psem_cache_delete(pcp) * need to ditch the entire cache, to avoid confusion. No valid vnode will * ever have (v_id == 0). */ -void +static void psem_cache_purge(void) { struct psemcache *pcp; struct psemhashhead *pcpp; for (pcpp = &psemhashtbl[psemhash]; pcpp >= psemhashtbl; pcpp--) { - while (pcp = pcpp->lh_first) + while ( (pcp = pcpp->lh_first) ) psem_cache_delete(pcp); } } - -struct sem_open_args { - const char *name; - int oflag; - int mode; - int value; -}; +#endif /* NOT_USED */ int -sem_open(p, uap, retval) - struct proc *p; - register struct sem_open_args *uap; - register_t *retval; +sem_open(struct proc *p, struct sem_open_args *uap, user_addr_t *retval) { - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - register struct vnode *vp; - int i; - struct file *nfp; - int type, indx, error; + struct fileproc *fp; + size_t i; + struct fileproc *nfp; + int indx, error; struct psemname nd; struct pseminfo *pinfo; - extern struct fileops psemops; + struct psemcache *pcp; char * pnbuf; char * nameptr; char * cp; @@ -334,18 +361,20 @@ sem_open(p, uap, retval) AUDIT_ARG(fflags, uap->oflag); AUDIT_ARG(mode, uap->mode); AUDIT_ARG(value, uap->value); + pinfo = PSEMINFO_NULL; - MALLOC_ZONE(pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (pnbuf == NULL) + return(ENOSPC); + pathlen = MAXPATHLEN; - error = copyinstr((void *)uap->name, pnbuf, - MAXPATHLEN, &pathlen); + error = copyinstr(uap->name, pnbuf, MAXPATHLEN, &pathlen); if (error) { goto bad; } AUDIT_ARG(text, pnbuf); - if (pathlen > PSEMNAMLEN) { + if ( (pathlen > PSEMNAMLEN) ) { error = ENAMETOOLONG; goto bad; } @@ -374,9 +403,16 @@ sem_open(p, uap, retval) nd.psem_hash += (unsigned char)*cp * i; } +#if KTRACE + if (KTRPOINT(p, KTR_NAMEI)) + ktrnamei(p->p_tracep, nameptr); +#endif + + PSEM_SUBSYS_LOCK(); error = psem_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { + PSEM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; @@ -387,10 +423,12 @@ sem_open(p, uap, retval) incache = 1; fmode = FFLAGS(uap->oflag); - if (error = falloc(p, &nfp, &indx)) { + PSEM_SUBSYS_UNLOCK(); + error = falloc(p, &nfp, &indx); + if (error) goto bad; - } + PSEM_SUBSYS_LOCK(); fp = nfp; cmode &= ALLPERMS; @@ -401,7 +439,8 @@ sem_open(p, uap, retval) } #endif AUDIT_ARG(posix_ipc_perm, pinfo->psem_uid, - pinfo->psem_gid, pinfo->psem_mode); + pinfo->psem_gid, pinfo->psem_mode); + PSEM_SUBSYS_UNLOCK(); error = EEXIST; goto bad1; } @@ -410,58 +449,96 @@ sem_open(p, uap, retval) fmode &= ~O_CREAT; } - if (fmode & O_CREAT) { + if ( (fmode & O_CREAT) ) { if((value < 0) && (value > SEM_VALUE_MAX)) { + PSEM_SUBSYS_UNLOCK(); error = EINVAL; goto bad1; } - pinfo = (struct pseminfo *)_MALLOC(sizeof(struct pseminfo), M_SHM, M_WAITOK); - bzero(pinfo, sizeof(struct pseminfo)); + PSEM_SUBSYS_UNLOCK(); + MALLOC(pinfo, struct pseminfo *, sizeof(struct pseminfo), M_SHM, M_WAITOK|M_ZERO); + if (pinfo == NULL) { + error = ENOSPC; + goto bad1; + } + PSEM_SUBSYS_LOCK(); + pinfo_alloc = 1; pinfo->psem_flags = PSEM_DEFINED | PSEM_INCREATE; pinfo->psem_usecount = 1; pinfo->psem_mode = cmode; - pinfo->psem_uid = p->p_ucred->cr_uid; - pinfo->psem_gid = p->p_ucred->cr_gid; + pinfo->psem_uid = kauth_cred_getuid(kauth_cred_get()); + pinfo->psem_gid = kauth_cred_get()->cr_gid; + PSEM_SUBSYS_UNLOCK(); kret = semaphore_create(kernel_task, &pinfo->psem_semobject, SYNC_POLICY_FIFO, value); if(kret != KERN_SUCCESS) goto bad3; + PSEM_SUBSYS_LOCK(); pinfo->psem_flags &= ~PSEM_DEFINED; pinfo->psem_flags |= PSEM_ALLOCATED; pinfo->sem_proc = p; } else { /* semaphore should exist as it is without O_CREAT */ if (!incache) { + PSEM_SUBSYS_UNLOCK(); error = ENOENT; goto bad1; } if( pinfo->psem_flags & PSEM_INDELETE) { + PSEM_SUBSYS_UNLOCK(); error = ENOENT; goto bad1; } AUDIT_ARG(posix_ipc_perm, pinfo->psem_uid, - pinfo->psem_gid, pinfo->psem_mode); - if (error = psem_access(pinfo, fmode, p->p_ucred, p)) + pinfo->psem_gid, pinfo->psem_mode); + if ( (error = psem_access(pinfo, fmode, kauth_cred_get())) ) { + PSEM_SUBSYS_UNLOCK(); goto bad1; + } } - pnode = (struct psemnode *)_MALLOC(sizeof(struct psemnode), M_SHM, M_WAITOK); - bzero(pnode, sizeof(struct psemnode)); + PSEM_SUBSYS_UNLOCK(); + MALLOC(pnode, struct psemnode *, sizeof(struct psemnode), M_SHM, M_WAITOK|M_ZERO); + if (pnode == NULL) { + error = ENOSPC; + goto bad1; + } + if (!incache) { + /* + * We allocate a new entry if we are less than the maximum + * allowed and the one at the front of the LRU list is in use. + * Otherwise we use the one at the front of the LRU list. + */ + MALLOC(pcp, struct psemcache *, sizeof(struct psemcache), M_SHM, M_WAITOK|M_ZERO); + if (pcp == NULL) { + error = ENOMEM; + goto bad2; + } + } + PSEM_SUBSYS_LOCK(); if (!incache) { - if (error = psem_cache_add(pinfo, &nd)) { - goto bad2; + if ( (error = psem_cache_add(pinfo, &nd, pcp)) ) { + PSEM_SUBSYS_UNLOCK(); + FREE(pcp, M_SHM); + goto bad2; } } pinfo->psem_flags &= ~PSEM_INCREATE; pinfo->psem_usecount++; pnode->pinfo = pinfo; + PSEM_SUBSYS_UNLOCK(); + + proc_fdlock(p); fp->f_flag = fmode & FMASK; fp->f_type = DTYPE_PSXSEM; fp->f_ops = &psemops; fp->f_data = (caddr_t)pnode; *fdflags(p, indx) &= ~UF_RESERVED; - *retval = indx; + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + + *retval = CAST_USER_ADDR_T(indx); FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (0); @@ -476,36 +553,33 @@ bad3: } goto bad1; bad2: - _FREE(pnode, M_SHM); - if (pinfo_alloc) - _FREE(pinfo, M_SHM); + FREE(pnode, M_SHM); bad1: - fdrelse(p, indx); - ffree(nfp); + if (pinfo_alloc) + FREE(pinfo, M_SHM); + fp_free(p, indx, nfp); bad: FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); } -int -psem_access(pinfo, mode, cred, p) - struct pseminfo *pinfo; - int mode; - struct ucred *cred; - struct proc *p; +/* + * XXX This code is repeated in several places + */ +static int +psem_access(struct pseminfo *pinfo, int mode, kauth_cred_t cred) { mode_t mask; - register gid_t *gp; - int i, error; + int is_member; /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) + if (!suser(cred, NULL)) return (0); mask = 0; /* Otherwise, check the owner. */ - if (cred->cr_uid == pinfo->psem_uid) { + if (kauth_cred_getuid(cred) == pinfo->psem_uid) { if (mode & FREAD) mask |= S_IRUSR; if (mode & FWRITE) @@ -514,14 +588,13 @@ psem_access(pinfo, mode, cred, p) } /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (pinfo->psem_gid == *gp) { - if (mode & FREAD) - mask |= S_IRGRP; - if (mode & FWRITE) - mask |= S_IWGRP; - return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES); - } + if (kauth_cred_ismember_gid(cred, pinfo->psem_gid, &is_member) == 0 && is_member) { + if (mode & FREAD) + mask |= S_IRGRP; + if (mode & FWRITE) + mask |= S_IWGRP; + return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES); + } /* Otherwise, check everyone else. */ if (mode & FREAD) @@ -531,40 +604,28 @@ psem_access(pinfo, mode, cred, p) return ((pinfo->psem_mode & mask) == mask ? 0 : EACCES); } -struct sem_unlink_args { - const char *name; -}; - int -sem_unlink(p, uap, retval) - struct proc *p; - register struct sem_unlink_args *uap; - register_t *retval; +sem_unlink(__unused struct proc *p, struct sem_unlink_args *uap, __unused register_t *retval) { - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - int flags, i; + size_t i; int error=0; struct psemname nd; struct pseminfo *pinfo; - extern struct fileops psemops; char * pnbuf; char * nameptr; char * cp; size_t pathlen, plen; - int fmode, cmode ; int incache = 0; - struct psemnode * pnode = PSEMNODE_NULL; struct psemcache *pcache = PSEMCACHE_NULL; - kern_return_t kret; pinfo = PSEMINFO_NULL; - MALLOC_ZONE(pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (pnbuf == NULL) { + return(ENOSPC); /* XXX non-standard */ + } pathlen = MAXPATHLEN; - error = copyinstr((void *)uap->name, pnbuf, - MAXPATHLEN, &pathlen); + error = copyinstr(uap->name, pnbuf, MAXPATHLEN, &pathlen); if (error) { goto bad; } @@ -599,29 +660,37 @@ sem_unlink(p, uap, retval) nd.psem_hash += (unsigned char)*cp * i; } + PSEM_SUBSYS_LOCK(); error = psem_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { + PSEM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; } if (!error) { + PSEM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; } else incache = 1; - if (error = psem_access(pinfo, pinfo->psem_mode, p->p_ucred, p)) + if ( (error = psem_access(pinfo, pinfo->psem_mode, kauth_cred_get())) ) { + PSEM_SUBSYS_UNLOCK(); goto bad; + } if ((pinfo->psem_flags & (PSEM_DEFINED | PSEM_ALLOCATED))==0) { + PSEM_SUBSYS_UNLOCK(); return (EINVAL); } - if (pinfo->psem_flags & PSEM_INDELETE) { + if ( (pinfo->psem_flags & PSEM_INDELETE) ) { + PSEM_SUBSYS_UNLOCK(); error = 0; goto bad; } + AUDIT_ARG(posix_ipc_perm, pinfo->psem_uid, pinfo->psem_gid, pinfo->psem_mode); @@ -630,122 +699,127 @@ sem_unlink(p, uap, retval) if (!pinfo->psem_usecount) { psem_delete(pinfo); - _FREE(pinfo,M_SHM); + FREE(pinfo,M_SHM); } else pinfo->psem_flags |= PSEM_REMOVED; psem_cache_delete(pcache); - _FREE(pcache, M_SHM); + PSEM_SUBSYS_UNLOCK(); + FREE(pcache, M_SHM); error = 0; bad: FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); } -struct sem_close_args { - sem_t *sem; -}; - int -sem_close(p, uap, retval) - struct proc *p; - struct sem_close_args *uap; - register_t *retval; +sem_close(struct proc *p, struct sem_close_args *uap, __unused register_t *retval) { - int fd = (int)uap->sem; - register struct filedesc *fdp = p->p_fd; - register struct file *fp; + int fd = CAST_DOWN(int,uap->sem); + struct fileproc *fp; int error = 0; AUDIT_ARG(fd, fd); /* XXX This seems wrong; uap->sem is a pointer */ - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) - return (EBADF); - fdrelse(p, fd); - if( error = closef(fp, p)) + + proc_fdlock(p); + error = fp_lookup(p,fd, &fp, 1); + if (error) { + proc_fdunlock(p); return(error); - return(0); + } + fdrelse(p, fd); + error = closef_locked(fp, fp->f_fglob, p); + FREE_ZONE(fp, sizeof *fp, M_FILEPROC); + proc_fdunlock(p); + return(error); } -struct sem_wait_args { - sem_t *sem; -}; - int -sem_wait(p, uap, retval) - struct proc *p; - struct sem_wait_args *uap; - register_t *retval; +sem_wait(struct proc *p, struct sem_wait_args *uap, __unused register_t *retval) { - int fd = (int)uap->sem; - register struct filedesc *fdp = p->p_fd; - struct file *fp; + int fd = CAST_DOWN(int,uap->sem); + struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; kern_return_t kret; int error; - if (error = fdgetf(p, (int)uap->sem, &fp)) + error = fp_getfpsem(p, fd, &fp, &pnode); + if (error) return (error); - if (fp->f_type != DTYPE_PSXSEM) - return(EBADF); - if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) - return(EINVAL); - if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) - return(EINVAL); + if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) { + error = EINVAL; + goto out; + } + PSEM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) { + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; + } if ((pinfo->psem_flags & (PSEM_DEFINED | PSEM_ALLOCATED)) != PSEM_ALLOCATED) { - return(EINVAL); + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; } + PSEM_SUBSYS_UNLOCK(); kret = semaphore_wait(pinfo->psem_semobject); switch (kret) { case KERN_INVALID_ADDRESS: case KERN_PROTECTION_FAILURE: - return (EACCES); + error = EACCES; + break; case KERN_ABORTED: case KERN_OPERATION_TIMED_OUT: - return (EINTR); + error = EINTR; + break; case KERN_SUCCESS: - return(0); + error = 0; + break; default: - return (EINVAL); + error = EINVAL; + break; } -} +out: + fp_drop(p, fd, fp, 0); + return(error); -struct sem_trywait_args { - sem_t *sem; -}; +} int -sem_trywait(p, uap, retval) - struct proc *p; - struct sem_trywait_args *uap; - register_t *retval; +sem_trywait(struct proc *p, struct sem_trywait_args *uap, __unused register_t *retval) { - int fd = (int)uap->sem; - register struct filedesc *fdp = p->p_fd; - struct file *fp; + int fd = CAST_DOWN(int,uap->sem); + struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; kern_return_t kret; mach_timespec_t wait_time; int error; - if (error = fdgetf(p, (int)uap->sem, &fp)) + error = fp_getfpsem(p, fd, &fp, &pnode); + if (error) return (error); - if (fp->f_type != DTYPE_PSXSEM) - return(EBADF); - if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) - return(EINVAL); - if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) - return(EINVAL); + if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) { + error = EINVAL; + goto out; + } + PSEM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) { + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; + } if ((pinfo->psem_flags & (PSEM_DEFINED | PSEM_ALLOCATED)) != PSEM_ALLOCATED) { - return(EINVAL); + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; } + PSEM_SUBSYS_UNLOCK(); wait_time.tv_sec = 0; wait_time.tv_nsec = 0; @@ -753,121 +827,112 @@ sem_trywait(p, uap, retval) switch (kret) { case KERN_INVALID_ADDRESS: case KERN_PROTECTION_FAILURE: - return (EINVAL); + error = EINVAL; + break; case KERN_ABORTED: - return (EINTR); + error = EINTR; + break; case KERN_OPERATION_TIMED_OUT: - return (EAGAIN); + error = EAGAIN; + break; case KERN_SUCCESS: - return(0); + error = 0; + break; default: - return (EINVAL); + error = EINVAL; + break; } +out: + fp_drop(p, fd, fp, 0); + return(error); } -struct sem_post_args { - sem_t *sem; -}; - int -sem_post(p, uap, retval) - struct proc *p; - struct sem_post_args *uap; - register_t *retval; +sem_post(struct proc *p, struct sem_post_args *uap, __unused register_t *retval) { - int fd = (int)uap->sem; - register struct filedesc *fdp = p->p_fd; - struct file *fp; + int fd = CAST_DOWN(int,uap->sem); + struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; kern_return_t kret; int error; - if (error = fdgetf(p, (int)uap->sem, &fp)) + error = fp_getfpsem(p, fd, &fp, &pnode); + if (error) return (error); - if (fp->f_type != DTYPE_PSXSEM) - return(EBADF); - if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) - return(EINVAL); - if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) - return(EINVAL); + if (((pnode = (struct psemnode *)fp->f_data)) == PSEMNODE_NULL ) { + error = EINVAL; + goto out; + } + PSEM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) { + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; + } if ((pinfo->psem_flags & (PSEM_DEFINED | PSEM_ALLOCATED)) != PSEM_ALLOCATED) { - return(EINVAL); + PSEM_SUBSYS_UNLOCK(); + error = EINVAL; + goto out; } + PSEM_SUBSYS_UNLOCK(); kret = semaphore_signal(pinfo->psem_semobject); switch (kret) { case KERN_INVALID_ADDRESS: case KERN_PROTECTION_FAILURE: - return (EINVAL); + error = EINVAL; + break; case KERN_ABORTED: case KERN_OPERATION_TIMED_OUT: - return (EINTR); + error = EINTR; + break; case KERN_SUCCESS: - return(0); + error = 0; + break; default: - return (EINVAL); + error = EINVAL; + break; } +out: + fp_drop(p, fd, fp, 0); + return(error); } -struct sem_init_args { - sem_t *sem; - int phsared; - unsigned int value; -}; - int -sem_init(p, uap, retval) - struct proc *p; - struct sem_init_args *uap; - register_t *retval; +sem_init(__unused struct proc *p, __unused struct sem_init_args *uap, __unused register_t *retval) { return(ENOSYS); } -struct sem_destroy_args { - sem_t *sem; -}; - int -sem_destroy(p, uap, retval) - struct proc *p; - struct sem_destroy_args *uap; - register_t *retval; +sem_destroy(__unused struct proc *p, __unused struct sem_destroy_args *uap, __unused register_t *retval) { return(ENOSYS); } -struct sem_getvalue_args { - sem_t *sem; - int * sval; -}; - int -sem_getvalue(p, uap, retval) - struct proc *p; - struct sem_getvalue_args *uap; - register_t *retval; +sem_getvalue(__unused struct proc *p, __unused struct sem_getvalue_args *uap, __unused register_t *retval) { return(ENOSYS); } static int -psem_close(pnode, flags, cred, p) - register struct psemnode *pnode; - int flags; - struct ucred *cred; - struct proc *p; +psem_close(struct psemnode *pnode, __unused int flags, + __unused kauth_cred_t cred, __unused struct proc *p) { int error=0; - kern_return_t kret; register struct pseminfo *pinfo; - if ((pinfo = pnode->pinfo) == PSEMINFO_NULL) + PSEM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSEMINFO_NULL){ + PSEM_SUBSYS_UNLOCK(); return(EINVAL); + } if ((pinfo->psem_flags & PSEM_ALLOCATED) != PSEM_ALLOCATED) { + PSEM_SUBSYS_UNLOCK(); return(EINVAL); } #if DIAGNOSTIC @@ -878,24 +943,33 @@ psem_close(pnode, flags, cred, p) pinfo->psem_usecount--; if ((pinfo->psem_flags & PSEM_REMOVED) && !pinfo->psem_usecount) { + PSEM_SUBSYS_UNLOCK(); + /* lock dropped as only semaphore is destroyed here */ error = psem_delete(pinfo); - _FREE(pinfo,M_SHM); + FREE(pinfo,M_SHM); + } else { + PSEM_SUBSYS_UNLOCK(); } - _FREE(pnode, M_SHM); + /* subsystem lock is dropped when we get here */ + FREE(pnode, M_SHM); return (error); } static int -psem_closefile(fp, p) - struct file *fp; +psem_closefile(fg, p) + struct fileglob *fg; struct proc *p; { + int error; - return (psem_close(((struct psemnode *)fp->f_data), fp->f_flag, - fp->f_cred, p)); + /* Not locked as psem_close is called only from here and is locked properly */ + error = psem_close(((struct psemnode *)fg->fg_data), fg->fg_flag, + fg->fg_cred, p); + + return(error); } -int +static int psem_delete(struct pseminfo * pinfo) { kern_return_t kret; @@ -917,53 +991,39 @@ psem_delete(struct pseminfo * pinfo) } static int -psem_read(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +psem_read(__unused struct fileproc *fp, __unused struct uio *uio, + __unused kauth_cred_t cred, __unused int flags, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -psem_write(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +psem_write(__unused struct fileproc *fp, __unused struct uio *uio, + __unused kauth_cred_t cred, __unused int flags, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -psem_ioctl(fp, com, data, p) - struct file *fp; - u_long com; - caddr_t data; - struct proc *p; +psem_ioctl(__unused struct fileproc *fp, __unused u_long com, + __unused caddr_t data, __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -psem_select(fp, which, wql, p) - struct file *fp; - int which; - void *wql; - struct proc *p; +psem_select(__unused struct fileproc *fp, __unused int which, + __unused void *wql, __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -psem_kqfilter(fp, kn, p) - struct file *fp; - struct knote *kn; - struct proc *p; +psem_kqfilter(__unused struct fileproc *fp, __unused struct knote *kn, + __unused struct proc *p) { - return (EOPNOTSUPP); + return (ENOTSUP); } diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index 49e035a40..f44264c37 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -39,11 +39,11 @@ #include #include #include -#include +#include #include #include -#include -#include +#include +#include #include #include #include @@ -51,16 +51,34 @@ #include #include #include +#include +#include #include #include +#include +#include #include #include #include #include +#include +#include +#include + +#if KTRACE +#include +#endif +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data #define PSHMNAMLEN 31 /* maximum name segment length we bother with */ struct pshminfo { @@ -113,8 +131,8 @@ struct pshmname { }; struct pshmnode { - off_t mapp_addr; - size_t map_size; + off_t mapp_addr; + user_size_t map_size; struct pshminfo *pinfo; unsigned int pshm_usecount; #if DIAGNOSTIC @@ -127,25 +145,59 @@ struct pshmnode { #define PSHMHASH(pnp) \ (&pshmhashtbl[(pnp)->pshm_hash & pshmhash]) + LIST_HEAD(pshmhashhead, pshmcache) *pshmhashtbl; /* Hash Table */ u_long pshmhash; /* size of hash table - 1 */ long pshmnument; /* number of cache entries allocated */ struct pshmstats pshmstats; /* cache effectiveness statistics */ -static int pshm_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int pshm_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int pshm_ioctl __P((struct file *fp, u_long com, - caddr_t data, struct proc *p)); -static int pshm_select __P((struct file *fp, int which, void *wql, - struct proc *p)); -static int pshm_closefile __P((struct file *fp, struct proc *p)); - -static int pshm_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p)); +static int pshm_read (struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int pshm_write (struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int pshm_ioctl (struct fileproc *fp, u_long com, + caddr_t data, struct proc *p); +static int pshm_select (struct fileproc *fp, int which, void *wql, struct proc *p); +static int pshm_close(struct pshmnode *pnode); +static int pshm_closefile (struct fileglob *fg, struct proc *p); + +static int pshm_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); + +int pshm_access(struct pshminfo *pinfo, int mode, kauth_cred_t cred, struct proc *p); +static int pshm_cache_add(struct pshminfo *pshmp, struct pshmname *pnp, struct pshmcache *pcp); +static void pshm_cache_delete(struct pshmcache *pcp); +#if NOT_USED +static void pshm_cache_purge(void); +#endif /* NOT_USED */ +static int pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp, + struct pshmcache **pcache); struct fileops pshmops = - { pshm_read, pshm_write, pshm_ioctl, pshm_select, pshm_closefile, pshm_kqfilter }; + { pshm_read, pshm_write, pshm_ioctl, pshm_select, pshm_closefile, pshm_kqfilter, 0 }; + +static lck_grp_t *psx_shm_subsys_lck_grp; +static lck_grp_attr_t *psx_shm_subsys_lck_grp_attr; +static lck_attr_t *psx_shm_subsys_lck_attr; +static lck_mtx_t psx_shm_subsys_mutex; + +#define PSHM_SUBSYS_LOCK() lck_mtx_lock(& psx_shm_subsys_mutex) +#define PSHM_SUBSYS_UNLOCK() lck_mtx_unlock(& psx_shm_subsys_mutex) + + +/* Initialize the mutex governing access to the posix shm subsystem */ +__private_extern__ void +pshm_lock_init( void ) +{ + + psx_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(psx_shm_subsys_lck_grp_attr); + + psx_shm_subsys_lck_grp = lck_grp_alloc_init("posix shared memory", psx_shm_subsys_lck_grp_attr); + + psx_shm_subsys_lck_attr = lck_attr_alloc_init(); + /* lck_attr_setdebug(psx_shm_subsys_lck_attr); */ + lck_mtx_init(& psx_shm_subsys_mutex, psx_shm_subsys_lck_grp, psx_shm_subsys_lck_attr); +} /* * Lookup an entry in the cache @@ -157,14 +209,12 @@ struct fileops pshmops = * fails, a status of zero is returned. */ -int -pshm_cache_search(pshmp, pnp, pcache) - struct pshminfo **pshmp; - struct pshmname *pnp; - struct pshmcache **pcache; +static int +pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp, + struct pshmcache **pcache) { - register struct pshmcache *pcp, *nnp; - register struct pshmhashhead *pcpp; + struct pshmcache *pcp, *nnp; + struct pshmhashhead *pcpp; if (pnp->pshm_namelen > PSHMNAMLEN) { pshmstats.longnames++; @@ -203,14 +253,12 @@ pshm_cache_search(pshmp, pnp, pcache) /* * Add an entry to the cache. + * XXX should be static? */ -int -pshm_cache_add(pshmp, pnp) - struct pshminfo *pshmp; - struct pshmname *pnp; +static int +pshm_cache_add(struct pshminfo *pshmp, struct pshmname *pnp, struct pshmcache *pcp) { - register struct pshmcache *pcp; - register struct pshmhashhead *pcpp; + struct pshmhashhead *pcpp; struct pshminfo *dpinfo; struct pshmcache *dpcp; @@ -219,20 +267,13 @@ pshm_cache_add(pshmp, pnp) panic("cache_enter: name too long"); #endif - /* - * We allocate a new entry if we are less than the maximum - * allowed and the one at the front of the LRU list is in use. - * Otherwise we use the one at the front of the LRU list. - */ - pcp = (struct pshmcache *)_MALLOC(sizeof(struct pshmcache), M_SHM, M_WAITOK); + /* if the entry has already been added by some one else return */ if (pshm_cache_search(&dpinfo, pnp, &dpcp) == -1) { - _FREE(pcp, M_SHM); return(EEXIST); } pshmnument++; - bzero(pcp, sizeof(struct pshmcache)); /* * Fill in cache info, if vp is NULL this is a "negative" cache entry. * For negative entries, we have to record whether it is a whiteout. @@ -245,7 +286,7 @@ pshm_cache_add(pshmp, pnp) pcpp = PSHMHASH(pnp); #if DIAGNOSTIC { - register struct pshmcache *p; + struct pshmcache *p; for (p = pcpp->lh_first; p != 0; p = p->pshm_hash.le_next) if (p == pcp) @@ -260,11 +301,12 @@ pshm_cache_add(pshmp, pnp) * Name cache initialization, from vfs_init() when we are booting */ void -pshm_cache_init() +pshm_cache_init(void) { pshmhashtbl = hashinit(desiredvnodes, M_SHM, &pshmhash); } +#if NOT_USED /* * Invalidate a all entries to particular vnode. * @@ -273,20 +315,21 @@ pshm_cache_init() * need to ditch the entire cache, to avoid confusion. No valid vnode will * ever have (v_id == 0). */ -void +static void pshm_cache_purge(void) { struct pshmcache *pcp; struct pshmhashhead *pcpp; for (pcpp = &pshmhashtbl[pshmhash]; pcpp >= pshmhashtbl; pcpp--) { - while (pcp = pcpp->lh_first) + while ( (pcp = pcpp->lh_first) ) pshm_cache_delete(pcp); } } +#endif /* NOT_USED */ -pshm_cache_delete(pcp) - struct pshmcache *pcp; +static void +pshm_cache_delete(struct pshmcache *pcp) { #if DIAGNOSTIC if (pcp->pshm_hash.le_prev == 0) @@ -300,27 +343,15 @@ pshm_cache_delete(pcp) } -struct shm_open_args { - const char *name; - int oflag; - int mode; -}; - int -shm_open(p, uap, retval) - struct proc *p; - register struct shm_open_args *uap; - register_t *retval; +shm_open(struct proc *p, struct shm_open_args *uap, register_t *retval) { - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - register struct vnode *vp; - int i; - struct file *nfp; - int type, indx, error; + struct fileproc *fp; + size_t i; + struct fileproc *nfp; + int indx, error; struct pshmname nd; struct pshminfo *pinfo; - extern struct fileops pshmops; char * pnbuf; char * nameptr; char * cp; @@ -330,17 +361,21 @@ shm_open(p, uap, retval) int incache = 0; struct pshmnode * pnode = PSHMNODE_NULL; struct pshmcache * pcache = PSHMCACHE_NULL; + struct pshmcache *pcp; int pinfo_alloc=0; AUDIT_ARG(fflags, uap->oflag); AUDIT_ARG(mode, uap->mode); + pinfo = PSHMINFO_NULL; - MALLOC_ZONE(pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (pnbuf == NULL) { + return(ENOSPC); + } + pathlen = MAXPATHLEN; - error = copyinstr((void *)uap->name, (void *)pnbuf, - MAXPATHLEN, &pathlen); + error = copyinstr(uap->name, (void *)pnbuf, MAXPATHLEN, &pathlen); if (error) { goto bad; } @@ -375,9 +410,16 @@ shm_open(p, uap, retval) nd.pshm_hash += (unsigned char)*cp * i; } +#if KTRACE + if (KTRPOINT(p, KTR_NAMEI)) + ktrnamei(p->p_tracep, nameptr); +#endif + + PSHM_SUBSYS_LOCK(); error = pshm_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { + PSHM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; @@ -388,12 +430,22 @@ shm_open(p, uap, retval) incache = 1; fmode = FFLAGS(uap->oflag); if ((fmode & (FREAD | FWRITE))==0) { + PSHM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; } - if (error = falloc(p, &nfp, &indx)) + /* + * XXXXXXXXXX TBD XXXXXXXXXX + * There is a race that existed with the funnels as well. + * Need to be fixed later + */ + PSHM_SUBSYS_UNLOCK(); + error = falloc(p, &nfp, &indx); + if (error ) goto bad; + PSHM_SUBSYS_LOCK(); + fp = nfp; cmode &= ALLPERMS; @@ -409,43 +461,57 @@ shm_open(p, uap, retval) } #endif error = EEXIST; + PSHM_SUBSYS_UNLOCK(); goto bad1; } if (!incache) { + PSHM_SUBSYS_UNLOCK(); /* create a new one */ - pinfo = (struct pshminfo *)_MALLOC(sizeof(struct pshminfo), M_SHM, M_WAITOK); - bzero(pinfo, sizeof(struct pshminfo)); - pinfo_alloc = 1; + MALLOC(pinfo, struct pshminfo *, sizeof(struct pshminfo), M_SHM, M_WAITOK|M_ZERO); + if (pinfo == NULL) { + error = ENOSPC; + goto bad1; + } + PSHM_SUBSYS_LOCK(); + pinfo_alloc = 1; pinfo->pshm_flags = PSHM_DEFINED | PSHM_INCREATE; - pinfo->pshm_usecount = 1; + pinfo->pshm_usecount = 1; /* existence reference */ pinfo->pshm_mode = cmode; - pinfo->pshm_uid = p->p_ucred->cr_uid; - pinfo->pshm_gid = p->p_ucred->cr_gid; + pinfo->pshm_uid = kauth_cred_getuid(kauth_cred_get()); + pinfo->pshm_gid = kauth_cred_get()->cr_gid; } else { /* already exists */ if( pinfo->pshm_flags & PSHM_INDELETE) { + PSHM_SUBSYS_UNLOCK(); error = ENOENT; goto bad1; } - AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, - pinfo->pshm_gid, pinfo->pshm_mode); - if (error = pshm_access(pinfo, fmode, p->p_ucred, p)) + AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, + pinfo->pshm_gid, pinfo->pshm_mode); + if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) { + PSHM_SUBSYS_UNLOCK(); goto bad1; + } } } else { if (!incache) { /* O_CREAT is not set and the shm obecj does not exist */ + PSHM_SUBSYS_UNLOCK(); error = ENOENT; goto bad1; } if( pinfo->pshm_flags & PSHM_INDELETE) { + PSHM_SUBSYS_UNLOCK(); error = ENOENT; goto bad1; } - if (error = pshm_access(pinfo, fmode, p->p_ucred, p)) + if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) { + PSHM_SUBSYS_UNLOCK(); goto bad1; + } } if (fmode & O_TRUNC) { + PSHM_SUBSYS_UNLOCK(); error = EINVAL; goto bad2; } @@ -455,54 +521,74 @@ shm_open(p, uap, retval) if (fmode & FREAD) pinfo->pshm_readcount++; #endif - pnode = (struct pshmnode *)_MALLOC(sizeof(struct pshmnode), M_SHM, M_WAITOK); - bzero(pnode, sizeof(struct pshmnode)); + PSHM_SUBSYS_UNLOCK(); + MALLOC(pnode, struct pshmnode *, sizeof(struct pshmnode), M_SHM, M_WAITOK|M_ZERO); + if (pnode == NULL) { + error = ENOSPC; + goto bad2; + } + if (!incache) { + /* + * We allocate a new entry if we are less than the maximum + * allowed and the one at the front of the LRU list is in use. + * Otherwise we use the one at the front of the LRU list. + */ + MALLOC(pcp, struct pshmcache *, sizeof(struct pshmcache), M_SHM, M_WAITOK|M_ZERO); + if (pcp == NULL) { + error = ENOSPC; + goto bad2; + } + + } + PSHM_SUBSYS_LOCK(); if (!incache) { - if (error = pshm_cache_add(pinfo, &nd)) { - goto bad3; + if ( (error = pshm_cache_add(pinfo, &nd, pcp)) ) { + PSHM_SUBSYS_UNLOCK(); + FREE(pcp, M_SHM); + goto bad3; } } pinfo->pshm_flags &= ~PSHM_INCREATE; - pinfo->pshm_usecount++; + pinfo->pshm_usecount++; /* extra reference for the new fd */ pnode->pinfo = pinfo; + + PSHM_SUBSYS_UNLOCK(); + proc_fdlock(p); fp->f_flag = fmode & FMASK; fp->f_type = DTYPE_PSXSHM; fp->f_ops = &pshmops; fp->f_data = (caddr_t)pnode; *fdflags(p, indx) &= ~UF_RESERVED; + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + *retval = indx; FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (0); bad3: - _FREE(pnode, M_SHM); + FREE(pnode, M_SHM); bad2: if (pinfo_alloc) - _FREE(pinfo, M_SHM); + FREE(pinfo, M_SHM); bad1: - fdrelse(p, indx); - ffree(nfp); + fp_free(p, indx, fp); bad: FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); } -/* ARGSUSED */ int -pshm_truncate(p, fp, fd, length, retval) - struct proc *p; - struct file *fp; - int fd; - off_t length; - register_t *retval; +pshm_truncate(__unused struct proc *p, struct fileproc *fp, __unused int fd, + off_t length, __unused register_t *retval) { struct pshminfo * pinfo; struct pshmnode * pnode ; kern_return_t kret; vm_offset_t user_addr; - void * mem_object; + mem_entry_name_port_t mem_object; vm_size_t size; if (fp->f_type != DTYPE_PSXSHM) { @@ -513,15 +599,20 @@ pshm_truncate(p, fp, fd, length, retval) if (((pnode = (struct pshmnode *)fp->f_data)) == PSHMNODE_NULL ) return(EINVAL); - if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) + PSHM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); + } if ((pinfo->pshm_flags & (PSHM_DEFINED | PSHM_ALLOCATED)) != PSHM_DEFINED) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); } + PSHM_SUBSYS_UNLOCK(); size = round_page_64(length); - kret = vm_allocate(current_map(), &user_addr, size, TRUE); + kret = vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); if (kret != KERN_SUCCESS) goto out; @@ -533,10 +624,12 @@ pshm_truncate(p, fp, fd, length, retval) vm_deallocate(current_map(), user_addr, size); + PSHM_SUBSYS_LOCK(); pinfo->pshm_flags &= ~PSHM_DEFINED; pinfo->pshm_flags = PSHM_ALLOCATED; - pinfo->pshm_memobject = mem_object; + pinfo->pshm_memobject = (void *)mem_object; pinfo->pshm_length = size; + PSHM_SUBSYS_UNLOCK(); return(0); out: @@ -553,39 +646,44 @@ out: } int -pshm_stat(pnode, sb) -struct pshmnode *pnode; -struct stat *sb; +pshm_stat(struct pshmnode *pnode, struct stat *sb) { struct pshminfo *pinfo; - if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) + PSHM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSHMINFO_NULL){ + PSHM_SUBSYS_UNLOCK(); return(EINVAL); + } bzero(sb, sizeof(struct stat)); sb->st_mode = pinfo->pshm_mode; sb->st_uid = pinfo->pshm_uid; sb->st_gid = pinfo->pshm_gid; sb->st_size = pinfo->pshm_length; + PSHM_SUBSYS_UNLOCK(); return(0); } +/* + * This is called only from shm_open which holds pshm_lock(); + * XXX This code is repeated many times + */ int -pshm_access(struct pshminfo *pinfo, int mode, struct ucred *cred, struct proc *p) +pshm_access(struct pshminfo *pinfo, int mode, kauth_cred_t cred, __unused struct proc *p) { mode_t mask; - register gid_t *gp; - int i, error; + int is_member; /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) + if (!suser(cred, NULL)) return (0); mask = 0; /* Otherwise, check the owner. */ - if (cred->cr_uid == pinfo->pshm_uid) { + if (kauth_cred_getuid(cred) == pinfo->pshm_uid) { if (mode & FREAD) mask |= S_IRUSR; if (mode & FWRITE) @@ -594,14 +692,13 @@ pshm_access(struct pshminfo *pinfo, int mode, struct ucred *cred, struct proc *p } /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (pinfo->pshm_gid == *gp) { - if (mode & FREAD) - mask |= S_IRGRP; - if (mode & FWRITE) - mask |= S_IWGRP; - return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES); - } + if (kauth_cred_ismember_gid(cred, pinfo->pshm_gid, &is_member) == 0 && is_member) { + if (mode & FREAD) + mask |= S_IRGRP; + if (mode & FWRITE) + mask |= S_IWGRP; + return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES); + } /* Otherwise, check everyone else. */ if (mode & FREAD) @@ -611,29 +708,18 @@ pshm_access(struct pshminfo *pinfo, int mode, struct ucred *cred, struct proc *p return ((pinfo->pshm_mode & mask) == mask ? 0 : EACCES); } -struct mmap_args { - caddr_t addr; - size_t len; - int prot; - int flags; - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - long pad; -#endif - off_t pos; -}; - int -pshm_mmap(struct proc *p, struct mmap_args *uap, register_t *retval, struct file *fp, vm_size_t pageoff) +pshm_mmap(struct proc *p, struct mmap_args *uap, user_addr_t *retval, struct fileproc *fp, off_t pageoff) { - vm_offset_t user_addr = (vm_offset_t)uap->addr; - vm_size_t user_size = (vm_size_t)uap->len ; + mach_vm_offset_t user_addr = (mach_vm_offset_t)uap->addr; + mach_vm_size_t user_size = (mach_vm_size_t)uap->len ; int prot = uap->prot; int flags = uap->flags; vm_object_offset_t file_pos = (vm_object_offset_t)uap->pos; int fd = uap->fd; vm_map_t user_map; - boolean_t find_space,docow; + int alloc_flags; + boolean_t docow; kern_return_t kret; struct pshminfo * pinfo; struct pshmnode * pnode; @@ -653,57 +739,71 @@ pshm_mmap(struct proc *p, struct mmap_args *uap, register_t *retval, struct file if (((pnode = (struct pshmnode *)fp->f_data)) == PSHMNODE_NULL ) return(EINVAL); - if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) + PSHM_SUBSYS_LOCK(); + if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); + } if ((pinfo->pshm_flags & PSHM_ALLOCATED) != PSHM_ALLOCATED) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); } - if (user_size > pinfo->pshm_length) { + if ((off_t)user_size > pinfo->pshm_length) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); } - if ((off_t)user_size + file_pos > pinfo->pshm_length) { + if ((off_t)(user_size + file_pos) > pinfo->pshm_length) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); } if ((mem_object = pinfo->pshm_memobject) == NULL) { + PSHM_SUBSYS_UNLOCK(); return(EINVAL); } - + + PSHM_SUBSYS_UNLOCK(); user_map = current_map(); if ((flags & MAP_FIXED) == 0) { - find_space = TRUE; - user_addr = round_page_32(user_addr); + alloc_flags = VM_FLAGS_ANYWHERE; + user_addr = mach_vm_round_page(user_addr); } else { - if (user_addr != trunc_page_32(user_addr)) + if (user_addr != mach_vm_trunc_page(user_addr)) return (EINVAL); - find_space = FALSE; - (void) vm_deallocate(user_map, user_addr, user_size); + /* + * We do not get rid of the existing mappings here because + * it wouldn't be atomic (see comment in mmap()). We let + * Mach VM know that we want it to replace any existing + * mapping with the new one. + */ + alloc_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } docow = FALSE; - kret = vm_map_64(user_map, &user_addr, user_size, - 0, find_space, pinfo->pshm_memobject, file_pos, docow, + kret = mach_vm_map(user_map, &user_addr, user_size, + 0, alloc_flags, pinfo->pshm_memobject, file_pos, docow, prot, VM_PROT_DEFAULT, - VM_INHERIT_DEFAULT); - + VM_INHERIT_SHARE); if (kret != KERN_SUCCESS) goto out; - kret = vm_inherit(user_map, user_addr, user_size, + /* LP64todo - this should be superfluous at this point */ + kret = mach_vm_inherit(user_map, user_addr, user_size, VM_INHERIT_SHARE); if (kret != KERN_SUCCESS) { - (void) vm_deallocate(user_map, user_addr, user_size); + (void) mach_vm_deallocate(user_map, user_addr, user_size); goto out; } + PSHM_SUBSYS_LOCK(); pnode->mapp_addr = user_addr; pnode->map_size = user_size; pinfo->pshm_flags |= (PSHM_MAPPED | PSHM_INUSE); + PSHM_SUBSYS_UNLOCK(); out: switch (kret) { case KERN_SUCCESS: - *fdflags(p, fd) |= UF_MAPPED; - *retval = (register_t)(user_addr + pageoff); + *retval = (user_addr + pageoff); return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: @@ -716,40 +816,29 @@ out: } -struct shm_unlink_args { - const char *name; -}; - int -shm_unlink(p, uap, retval) - struct proc *p; - register struct shm_unlink_args *uap; - register_t *retval; +shm_unlink(__unused struct proc *p, struct shm_unlink_args *uap, + __unused register_t *retval) { - register struct filedesc *fdp = p->p_fd; - register struct file *fp; - int flags, i; + size_t i; int error=0; struct pshmname nd; struct pshminfo *pinfo; - extern struct fileops pshmops; char * pnbuf; char * nameptr; char * cp; size_t pathlen, plen; - int fmode, cmode ; int incache = 0; - struct pshmnode * pnode = PSHMNODE_NULL; struct pshmcache *pcache = PSHMCACHE_NULL; - kern_return_t kret; pinfo = PSHMINFO_NULL; - MALLOC_ZONE(pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); + MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (pnbuf == NULL) { + return(ENOSPC); /* XXX non-standard */ + } pathlen = MAXPATHLEN; - error = copyinstr((void *)uap->name, (void *)pnbuf, - MAXPATHLEN, &pathlen); + error = copyinstr(uap->name, (void *)pnbuf, MAXPATHLEN, &pathlen); if (error) { goto bad; } @@ -784,65 +873,72 @@ shm_unlink(p, uap, retval) nd.pshm_hash += (unsigned char)*cp * i; } + PSHM_SUBSYS_LOCK(); error = pshm_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { + PSHM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; } if (!error) { + PSHM_SUBSYS_UNLOCK(); error = EINVAL; goto bad; } else incache = 1; if ((pinfo->pshm_flags & (PSHM_DEFINED | PSHM_ALLOCATED))==0) { + PSHM_SUBSYS_UNLOCK(); return (EINVAL); } if (pinfo->pshm_flags & PSHM_INDELETE) { + PSHM_SUBSYS_UNLOCK(); error = 0; goto bad; } - if (pinfo->pshm_memobject == NULL) { - error = EINVAL; - goto bad; - } - AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, pinfo->pshm_gid, pinfo->pshm_mode); + + /* + * JMM - How should permissions be checked? + */ + pinfo->pshm_flags |= PSHM_INDELETE; - pinfo->pshm_usecount--; - kret = mach_destroy_memory_entry(pinfo->pshm_memobject); pshm_cache_delete(pcache); - _FREE(pcache, M_SHM); pinfo->pshm_flags |= PSHM_REMOVED; + /* release the existence reference */ + if (!--pinfo->pshm_usecount) { + PSHM_SUBSYS_UNLOCK(); + /* + * If this is the last reference going away on the object, + * then we need to destroy the backing object. The name + * has an implied but uncounted reference on the object, + * once it's created, since it's used as a rendesvous, and + * therefore may be subsequently reopened. + */ + if (pinfo->pshm_memobject != NULL) + mach_memory_entry_port_release(pinfo->pshm_memobject); + PSHM_SUBSYS_LOCK(); + FREE(pinfo,M_SHM); + } + PSHM_SUBSYS_UNLOCK(); + FREE(pcache, M_SHM); error = 0; bad: FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); -out: - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - return (EACCES); - default: - return (EINVAL); - } } -int -pshm_close(pnode, flags, cred, p) - register struct pshmnode *pnode; - int flags; - struct ucred *cred; - struct proc *p; +/* already called locked */ +static int +pshm_close(struct pshmnode *pnode) { int error=0; - kern_return_t kret; - register struct pshminfo *pinfo; + struct pshminfo *pinfo; if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) return(EINVAL); @@ -855,71 +951,68 @@ pshm_close(pnode, flags, cred, p) kprintf("negative usecount in pshm_close\n"); } #endif /* DIAGNOSTIC */ - pinfo->pshm_usecount--; + pinfo->pshm_usecount--; /* release this fd's reference */ if ((pinfo->pshm_flags & PSHM_REMOVED) && !pinfo->pshm_usecount) { - _FREE(pinfo,M_SHM); - } - _FREE(pnode, M_SHM); + PSHM_SUBSYS_UNLOCK(); + /* + * If this is the last reference going away on the object, + * then we need to destroy the backing object. + */ + if (pinfo->pshm_memobject != NULL) + mach_memory_entry_port_release(pinfo->pshm_memobject); + PSHM_SUBSYS_LOCK(); + FREE(pinfo,M_SHM); + } + FREE(pnode, M_SHM); return (error); } +/* struct proc passed to match prototype for struct fileops */ static int -pshm_closefile(fp, p) - struct file *fp; - struct proc *p; +pshm_closefile(struct fileglob *fg, __unused struct proc *p) { - return (pshm_close(((struct pshmnode *)fp->f_data), fp->f_flag, - fp->f_cred, p)); + int error; + + PSHM_SUBSYS_LOCK(); + error = pshm_close(((struct pshmnode *)fg->fg_data)); + PSHM_SUBSYS_UNLOCK(); + return(error); } static int -pshm_read(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +pshm_read(__unused struct fileproc *fp, __unused struct uio *uio, + __unused kauth_cred_t cred, __unused int flags, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -pshm_write(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +pshm_write(__unused struct fileproc *fp, __unused struct uio *uio, + __unused kauth_cred_t cred, __unused int flags, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -pshm_ioctl(fp, com, data, p) - struct file *fp; - u_long com; - caddr_t data; - struct proc *p; +pshm_ioctl(__unused struct fileproc *fp, __unused u_long com, + __unused caddr_t data, __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -pshm_select(fp, which, wql, p) - struct file *fp; - int which; - void *wql; - struct proc *p; +pshm_select(__unused struct fileproc *fp, __unused int which, __unused void *wql, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } static int -pshm_kqfilter(fp, kn, p) - struct file *fp; - struct knote *kn; - struct proc *p; +pshm_kqfilter(__unused struct fileproc *fp, __unused struct knote *kn, + __unused struct proc *p) { - return(EOPNOTSUPP); + return(ENOTSUP); } diff --git a/bsd/kern/qsort.c b/bsd/kern/qsort.c index 7eeb4e408..9ac15c01b 100644 --- a/bsd/kern/qsort.c +++ b/bsd/kern/qsort.c @@ -60,8 +60,8 @@ #include //#include -static inline char *med3 __P((char *, char *, char *, int (*)())); -static inline void swapfunc __P((char *, char *, int, int)); +static inline char *med3(char *, char *, char *, int (*)()); +static inline void swapfunc(char *, char *, int, int); #define min(a, b) (a) < (b) ? a : b diff --git a/bsd/kern/spl.c b/bsd/kern/spl.c index 4ab15677d..52768d76b 100644 --- a/bsd/kern/spl.c +++ b/bsd/kern/spl.c @@ -27,8 +27,6 @@ unsigned sploff( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "sploff()"); return(0); } @@ -36,8 +34,6 @@ unsigned splhigh( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splhigh()"); return(0); } @@ -45,8 +41,6 @@ unsigned splsched( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splsched()"); return(0); } @@ -54,8 +48,6 @@ unsigned splclock ( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splclock()"); return(0); } @@ -63,8 +55,6 @@ unsigned splpower ( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splpower()"); return(0); } @@ -72,8 +62,6 @@ unsigned splvm( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splvm()"); return(0); } @@ -81,8 +69,6 @@ unsigned splbio ( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splbio()"); return(0); } @@ -90,16 +76,12 @@ unsigned splimp( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splimp()"); return(0); } unsigned spltty(void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "spltty()"); return(0); } @@ -107,55 +89,42 @@ unsigned splnet( void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splnet()"); return(0); } unsigned splsoftclock(void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splsoftclock()"); return(0); } void spllo(void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "spllo()"); return; } void spl0(void) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "spl0()"); return; } void spln(unsigned t) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "spln()"); return; } void splx(unsigned l) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splx()"); return; } void splon(unsigned l) { - if(thread_funnel_get() == THR_FUNNEL_NULL) - panic("%s not under funnel", "splon()"); return; } + diff --git a/bsd/kern/subr_log.c b/bsd/kern/subr_log.c index 2bab82bac..73dc5cc51 100644 --- a/bsd/kern/subr_log.c +++ b/bsd/kern/subr_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,14 +61,16 @@ #include #include -#include +#include #include #include #include -#include +#include #include #include +#include #include +#include #define LOG_RDPRI (PZERO + 1) @@ -86,16 +88,21 @@ int log_open; /* also used in log() */ struct msgbuf temp_msgbuf; struct msgbuf *msgbufp; static int _logentrypend = 0; +static int log_inited = 0; +void bsd_log_lock(void); +/* the following two are implemented in osfmk/kern/printf.c */ +extern void bsd_log_unlock(void); +extern void bsd_log_init(void); /* * Serialize log access. Note that the log can be written at interrupt level, * so any log manipulations that can be done from, or affect, another processor * at interrupt level must be guarded with a spin lock. */ -decl_simple_lock_data(,log_lock); /* stop races dead in their tracks */ -#define LOG_LOCK() simple_lock(&log_lock) -#define LOG_UNLOCK() simple_unlock(&log_lock) -#define LOG_LOCK_INIT() simple_lock_init(&log_lock) + +#define LOG_LOCK() bsd_log_lock() +#define LOG_UNLOCK() bsd_log_unlock() + /*ARGSUSED*/ logopen(dev, flags, mode, p) @@ -137,9 +144,7 @@ logclose(dev, flag) LOG_LOCK(); log_open = 0; selwakeup(&logsoftc.sc_selp); - oldpri = splhigh(); selthreadclear(&logsoftc.sc_selp); - splx(oldpri); LOG_UNLOCK(); return (0); } @@ -154,42 +159,57 @@ logread(dev, uio, flag) register long l; register int s; int error = 0; + char localbuff[MSG_BSIZE]; + int copybytes; - s = splhigh(); + LOG_LOCK(); while (msgbufp->msg_bufr == msgbufp->msg_bufx) { if (flag & IO_NDELAY) { - splx(s); - return (EWOULDBLOCK); + error = EWOULDBLOCK; + goto out; } if (logsoftc.sc_state & LOG_NBIO) { - splx(s); - return (EWOULDBLOCK); + error = EWOULDBLOCK; + goto out; } logsoftc.sc_state |= LOG_RDWAIT; + LOG_UNLOCK(); + /* + * If the wakeup is missed the ligtening bolt will wake this up + * if there are any new characters. If that doesn't do it + * then wait for 5 sec and reevaluate + */ if (error = tsleep((caddr_t)msgbufp, LOG_RDPRI | PCATCH, - "klog", 0)) { - splx(s); - return (error); + "klog", 5 * hz)) { + /* if it times out; ignore */ + if (error != EWOULDBLOCK) + return (error); } + LOG_LOCK(); } - splx(s); logsoftc.sc_state &= ~LOG_RDWAIT; - while (uio->uio_resid > 0) { + + while (uio_resid(uio) > 0) { l = msgbufp->msg_bufx - msgbufp->msg_bufr; if (l < 0) l = MSG_BSIZE - msgbufp->msg_bufr; - l = min(l, uio->uio_resid); + l = min(l, uio_resid(uio)); if (l == 0) break; - error = uiomove((caddr_t)&msgbufp->msg_bufc[msgbufp->msg_bufr], + bcopy(&msgbufp->msg_bufc[msgbufp->msg_bufr], &localbuff[0], l); + LOG_UNLOCK(); + error = uiomove((caddr_t)&localbuff[0], (int)l, uio); + LOG_LOCK(); if (error) break; msgbufp->msg_bufr += l; if (msgbufp->msg_bufr < 0 || msgbufp->msg_bufr >= MSG_BSIZE) msgbufp->msg_bufr = 0; } +out: + LOG_UNLOCK(); return (error); } @@ -201,19 +221,19 @@ logselect(dev, rw, wql, p) void * wql; struct proc *p; { - int s = splhigh(); switch (rw) { case FREAD: + LOG_LOCK(); if (msgbufp->msg_bufr != msgbufp->msg_bufx) { - splx(s); + LOG_UNLOCK(); return (1); } selrecord(p, &logsoftc.sc_selp, wql); + LOG_UNLOCK(); break; } - splx(s); return (0); } @@ -224,24 +244,26 @@ logwakeup() int pgid; boolean_t funnel_state; - if (!log_open) + LOG_LOCK(); + if (!log_open) { + LOG_UNLOCK(); return; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + } selwakeup(&logsoftc.sc_selp); if (logsoftc.sc_state & LOG_ASYNC) { - LOG_LOCK(); pgid = logsoftc.sc_pgid; LOG_UNLOCK(); if (pgid < 0) gsignal(-pgid, SIGIO); else if (p = pfind(pgid)) psignal(p, SIGIO); + LOG_LOCK(); } if (logsoftc.sc_state & LOG_RDWAIT) { wakeup((caddr_t)msgbufp); logsoftc.sc_state &= ~LOG_RDWAIT; } - (void) thread_funnel_set(kernel_flock, funnel_state); + LOG_UNLOCK(); } void @@ -262,13 +284,12 @@ logioctl(dev, com, data, flag) long l; int s; + LOG_LOCK(); switch (com) { /* return number of characters immediately available */ case FIONREAD: - s = splhigh(); l = msgbufp->msg_bufx - msgbufp->msg_bufr; - splx(s); if (l < 0) l += MSG_BSIZE; *(off_t *)data = l; @@ -289,28 +310,28 @@ logioctl(dev, com, data, flag) break; case TIOCSPGRP: - LOG_LOCK(); logsoftc.sc_pgid = *(int *)data; - LOG_UNLOCK(); break; case TIOCGPGRP: - LOG_LOCK(); *(int *)data = logsoftc.sc_pgid; - LOG_UNLOCK(); break; default: + LOG_UNLOCK(); return (-1); } + LOG_UNLOCK(); return (0); } void -log_init() +bsd_log_init() { - msgbufp = &temp_msgbuf; - LOG_LOCK_INIT(); + if (!log_inited) { + msgbufp = &temp_msgbuf; + log_inited = 1; + } } void @@ -318,8 +339,10 @@ log_putc(char c) { register struct msgbuf *mbp; - if (msgbufp == NULL) - msgbufp =&temp_msgbuf; + if (!log_inited) { + panic("bsd log is not inited"); + } + LOG_LOCK(); mbp = msgbufp; if (mbp-> msg_magic != MSG_MAGIC) { @@ -334,4 +357,6 @@ log_putc(char c) _logentrypend = 1; if (mbp->msg_bufx < 0 || mbp->msg_bufx >= MSG_BSIZE) mbp->msg_bufx = 0; + LOG_UNLOCK(); } + diff --git a/bsd/kern/subr_prf.c b/bsd/kern/subr_prf.c index 8bcf0e3ce..3b77fb394 100644 --- a/bsd/kern/subr_prf.c +++ b/bsd/kern/subr_prf.c @@ -77,14 +77,13 @@ #include #include -#include #include #include #include -#include +#include #include #include -#include +#include #include #include #include @@ -127,8 +126,6 @@ extern int __doprnt(const char *fmt, static void puts(const char *s, int flags, struct tty *ttyp); static void printn(u_long n, int b, int flags, struct tty *ttyp, int zf, int fld_size); -/* MP printf stuff */ -decl_simple_lock_data(,printf_lock) #if NCPUS > 1 boolean_t new_printf_cpu_number; /* do we need to output who we are */ #endif @@ -299,7 +296,6 @@ int prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) int cpun = cpu_number(); if(ttyp == 0) { - simple_lock(&printf_lock); } else TTY_LOCK(ttyp); @@ -317,7 +313,6 @@ int prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) #if NCPUS > 1 if(ttyp == 0) { - simple_unlock(&printf_lock); } else TTY_UNLOCK(ttyp); #endif diff --git a/bsd/kern/subr_prof.c b/bsd/kern/subr_prof.c index 4ffac789b..9b3791f0a 100644 --- a/bsd/kern/subr_prof.c +++ b/bsd/kern/subr_prof.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,13 +58,20 @@ #include #include #include -#include +#include #include #include +#include -#include +#include +#include +#include +#include #include +#include + +extern boolean_t ml_set_interrupts_enabled(boolean_t enable); #ifdef GPROF #include @@ -72,18 +79,25 @@ #include #include -decl_simple_lock_data(,mcount_lock); +lck_spin_t * mcount_lock; +lck_grp_t * mcount_lock_grp; +lck_attr_t * mcount_lock_attr; /* * Froms is actually a bunch of unsigned shorts indexing tos */ struct gmonparam _gmonparam = { GMON_PROF_OFF }; -kmstartup() +/* + * This code uses 32 bit mach object segment information from the currently + * running kernel. + */ +void +kmstartup(void) { char *cp; u_long fromssize, tossize; - struct segment_command *sgp; + struct segment_command *sgp; /* 32 bit mach object file segment */ struct gmonparam *p = &_gmonparam; sgp = getsegbyname("__TEXT"); @@ -120,20 +134,20 @@ kmstartup() p->kcount = (u_short *)cp; cp += p->kcountsize; p->froms = (u_short *)cp; - simple_lock_init(&mcount_lock); + + mcount_lock_grp = lck_grp_alloc_init("MCOUNT", LCK_GRP_ATTR_NULL); + mcount_lock_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(mcount_lock_attr); + mcount_lock = lck_spin_alloc_init(mcount_lock_grp, mcount_lock_attr); + } /* * Return kernel profiling information. */ int -sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; +sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen) { struct gmonparam *gp = &_gmonparam; int error; @@ -153,18 +167,18 @@ sysctl_doprof(name, namelen, oldp, oldlenp, newp, newlen) startprofclock(kernproc); return (0); case GPROF_COUNT: - return (sysctl_struct(oldp, oldlenp, newp, newlen, - gp->kcount, gp->kcountsize)); + return (sysctl_struct(oldp, oldlenp, newp, newlen, + gp->kcount, gp->kcountsize)); case GPROF_FROMS: return (sysctl_struct(oldp, oldlenp, newp, newlen, - gp->froms, gp->fromssize)); + gp->froms, gp->fromssize)); case GPROF_TOS: return (sysctl_struct(oldp, oldlenp, newp, newlen, - gp->tos, gp->tossize)); + gp->tos, gp->tossize)); case GPROF_GMONPARAM: return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp)); default: - return (EOPNOTSUPP); + return (ENOTSUP); } /* NOTREACHED */ } @@ -191,7 +205,7 @@ mcount( if (p->state != GMON_PROF_ON) return; - usimple_lock(&mcount_lock); + lck_spin_lock(mcount_lock); /* * check that frompcindex is a reasonable pc value. @@ -274,36 +288,26 @@ mcount( } done: - usimple_unlock(&mcount_lock); + lck_spin_unlock(mcount_lock); return; overflow: p->state = GMON_PROF_ERROR; - usimple_unlock(&mcount_lock); + lck_spin_unlock(mcount_lock); printf("mcount: tos overflow\n"); return; } #endif /* GPROF */ -#define PROFILE_LOCK(x) simple_lock(x) -#define PROFILE_UNLOCK(x) simple_unlock(x) +#define PROFILE_LOCK(x) +#define PROFILE_UNLOCK(x) -struct profil_args { - short *bufbase; - u_int bufsize; - u_int pcoffset; - u_int pcscale; -}; int -profil(p, uap, retval) - struct proc *p; - register struct profil_args *uap; - register_t *retval; +profil(struct proc *p, register struct profil_args *uap, __unused register_t *retval) { - register struct uprof *upp = &p->p_stats->p_prof; - struct uprof *upc, *nupc; - int s; + struct uprof *upp = &p->p_stats->p_prof; + int s; if (uap->pcscale > (1 << 16)) return (EINVAL); @@ -313,53 +317,95 @@ profil(p, uap, retval) } /* Block profile interrupts while changing state. */ - s = ml_set_interrupts_enabled(FALSE); - PROFILE_LOCK(&upp->pr_lock); - upp->pr_base = (caddr_t)uap->bufbase; - upp->pr_size = uap->bufsize; - upp->pr_off = uap->pcoffset; - upp->pr_scale = uap->pcscale; - - /* remove buffers previously allocated with add_profil() */ - for (upc = upp->pr_next; upc; upc = nupc) { - nupc = upc->pr_next; - kfree(upc, sizeof (struct uprof)); + s = ml_set_interrupts_enabled(FALSE); + + if (proc_is64bit(p)) { + struct user_uprof *user_upp = &p->p_stats->user_p_prof; + struct user_uprof *upc, *nupc; + + PROFILE_LOCK(&user_upp->pr_lock); + user_upp->pr_base = uap->bufbase; + user_upp->pr_size = uap->bufsize; + user_upp->pr_off = uap->pcoffset; + user_upp->pr_scale = uap->pcscale; + upp->pr_base = NULL; + upp->pr_size = 0; + upp->pr_scale = 0; + + /* remove buffers previously allocated with add_profil() */ + for (upc = user_upp->pr_next; upc; upc = nupc) { + nupc = upc->pr_next; + kfree(upc, sizeof (*upc)); + } + user_upp->pr_next = 0; + PROFILE_UNLOCK(&user_upp->pr_lock); + } + else { + struct uprof *upc, *nupc; + + PROFILE_LOCK(&upp->pr_lock); + upp->pr_base = CAST_DOWN(caddr_t, uap->bufbase); + upp->pr_size = uap->bufsize; + upp->pr_off = uap->pcoffset; + upp->pr_scale = uap->pcscale; + + /* remove buffers previously allocated with add_profil() */ + for (upc = upp->pr_next; upc; upc = nupc) { + nupc = upc->pr_next; + kfree(upc, sizeof (struct uprof)); + } + upp->pr_next = 0; + PROFILE_UNLOCK(&upp->pr_lock); } - upp->pr_next = 0; - PROFILE_UNLOCK(&upp->pr_lock); startprofclock(p); ml_set_interrupts_enabled(s); return(0); } -struct add_profile_args { - short *bufbase; - u_int bufsize; - u_int pcoffset; - u_int pcscale; -}; int -add_profil(p, uap, retval) - struct proc *p; - register struct add_profile_args *uap; - register_t *retval; +add_profil(struct proc *p, register struct add_profil_args *uap, __unused register_t *retval) { struct uprof *upp = &p->p_stats->p_prof, *upc; + struct user_uprof *user_upp = NULL, *user_upc; int s; + boolean_t is64bit = proc_is64bit(p); - if (upp->pr_scale == 0) - return (0); - s = ml_set_interrupts_enabled(FALSE); - upc = (struct uprof *) kalloc(sizeof (struct uprof)); - upc->pr_base = (caddr_t)uap->bufbase; - upc->pr_size = uap->bufsize; - upc->pr_off = uap->pcoffset; - upc->pr_scale = uap->pcscale; - PROFILE_LOCK(&upp->pr_lock); - upc->pr_next = upp->pr_next; - upp->pr_next = upc; - PROFILE_UNLOCK(&upp->pr_lock); + if (is64bit) { + user_upp = &p->p_stats->user_p_prof; + if (user_upp->pr_scale == 0) + return (0); + } + else { + if (upp->pr_scale == 0) + return (0); + } + + s = ml_set_interrupts_enabled(FALSE); + + if (is64bit) { + user_upc = (struct user_uprof *) kalloc(sizeof (struct user_uprof)); + user_upc->pr_base = uap->bufbase; + user_upc->pr_size = uap->bufsize; + user_upc->pr_off = uap->pcoffset; + user_upc->pr_scale = uap->pcscale; + PROFILE_LOCK(&user_upp->pr_lock); + user_upc->pr_next = user_upp->pr_next; + user_upp->pr_next = user_upc; + PROFILE_UNLOCK(&user_upp->pr_lock); + } + else { + upc = (struct uprof *) kalloc(sizeof (struct uprof)); + upc->pr_base = CAST_DOWN(caddr_t, uap->bufbase); + upc->pr_size = uap->bufsize; + upc->pr_off = uap->pcoffset; + upc->pr_scale = uap->pcscale; + PROFILE_LOCK(&upp->pr_lock); + upc->pr_next = upp->pr_next; + upp->pr_next = upc; + PROFILE_UNLOCK(&upp->pr_lock); + } + ml_set_interrupts_enabled(s); return(0); } @@ -390,11 +436,9 @@ add_profil(p, uap, retval) void addupc_task(p, pc, ticks) register struct proc *p; - register u_long pc; + user_addr_t pc; u_int ticks; { - register struct uprof *prof; - register short *cell; register u_int off; u_short count; @@ -402,19 +446,44 @@ addupc_task(p, pc, ticks) if ((p->p_flag & P_PROFIL) == 0 || ticks == 0) return; - for (prof = &p->p_stats->p_prof; prof; prof = prof->pr_next) { - off = PC_TO_INDEX(pc,prof); - cell = (short *)(prof->pr_base + off); - if (cell >= (short *)prof->pr_base && - cell < (short*)(prof->pr_size + (int) prof->pr_base)) { - if (copyin((caddr_t)cell, (caddr_t) &count, sizeof(count)) == 0) { - count += ticks; - if(copyout((caddr_t) &count, (caddr_t)cell, sizeof(count)) == 0) - return; - } - p->p_stats->p_prof.pr_scale = 0; - stopprofclock(p); - break; - } + if (proc_is64bit(p)) { + struct user_uprof *prof; + user_addr_t cell; + + for (prof = &p->p_stats->user_p_prof; prof; prof = prof->pr_next) { + off = PC_TO_INDEX(pc, prof); + cell = (prof->pr_base + off); + if (cell >= prof->pr_base && + cell < (prof->pr_size + prof->pr_base)) { + if (copyin(cell, (caddr_t) &count, sizeof(count)) == 0) { + count += ticks; + if(copyout((caddr_t) &count, cell, sizeof(count)) == 0) + return; + } + p->p_stats->user_p_prof.pr_scale = 0; + stopprofclock(p); + break; + } + } + } + else { + struct uprof *prof; + short *cell; + + for (prof = &p->p_stats->p_prof; prof; prof = prof->pr_next) { + off = PC_TO_INDEX(CAST_DOWN(uint, pc),prof); + cell = (short *)(prof->pr_base + off); + if (cell >= (short *)prof->pr_base && + cell < (short*)(prof->pr_size + (int) prof->pr_base)) { + if (copyin(CAST_USER_ADDR_T(cell), (caddr_t) &count, sizeof(count)) == 0) { + count += ticks; + if(copyout((caddr_t) &count, CAST_USER_ADDR_T(cell), sizeof(count)) == 0) + return; + } + p->p_stats->p_prof.pr_scale = 0; + stopprofclock(p); + break; + } + } } } diff --git a/bsd/kern/subr_xxx.c b/bsd/kern/subr_xxx.c index 503401a6c..393c07142 100644 --- a/bsd/kern/subr_xxx.c +++ b/bsd/kern/subr_xxx.c @@ -58,8 +58,7 @@ #include #include #include -#include -#include +#include #include #include @@ -114,11 +113,13 @@ enosys() /* * Return error for operation not supported * on a specific object or file type. + * + * XXX Name of this routine is wrong. */ int eopnotsupp() { - return (EOPNOTSUPP); + return (ENOTSUP); } /* diff --git a/bsd/kern/sys_domain.c b/bsd/kern/sys_domain.c index ab3f62847..244d2ed3c 100644 --- a/bsd/kern/sys_domain.c +++ b/bsd/kern/sys_domain.c @@ -33,11 +33,10 @@ /* domain init function */ -void systemdomain_init(); - +void systemdomain_init(void); struct domain systemdomain = - { PF_SYSTEM, "system", systemdomain_init, 0, 0, 0}; + { PF_SYSTEM, "system", systemdomain_init, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0} }; void systemdomain_init() diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index 15fdf111f..72169c03d 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -64,27 +64,41 @@ #include #include #include -#include -#include +#include +#include #include +#if KTRACE +#include +#else #include +#endif #include #include #include +#include -#include +#include #include #include #include #include +#include +#include +#include + +#include +#include #include -#include +#include +#include +#include #include #include #include #include #include +#include #include @@ -106,259 +120,325 @@ #include /* for wait queue based select */ #include +#include #if KTRACE #include #endif -#include - +#include + +int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); +int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); +extern void *get_bsduthreadarg(thread_t); +extern int *get_bsduthreadrval(thread_t); + +__private_extern__ int dofileread(struct proc *p, struct fileproc *fp, int fd, + user_addr_t bufp, user_size_t nbyte, + off_t offset, int flags, user_ssize_t *retval); +__private_extern__ int dofilewrite(struct proc *p, struct fileproc *fp, int fd, + user_addr_t bufp, user_size_t nbyte, + off_t offset, int flags, user_ssize_t *retval); +__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode); +__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd); -__private_extern__ struct file* -holdfp(fdp, fd, flag) - struct filedesc* fdp; - int fd, flag; -{ - struct file* fp; - - if (((u_int)fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) { - return (NULL); - } - if (fref(fp) == -1) - return (NULL); - return (fp); -} +#if NETAT +extern int appletalk_inited; +#endif /* NETAT */ +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data /* * Read system call. */ -#ifndef _SYS_SYSPROTO_H_ -struct read_args { - int fd; - char *cbuf; - u_int nbyte; -}; -#endif int read(p, uap, retval) struct proc *p; register struct read_args *uap; - register_t *retval; + user_ssize_t *retval; { - register struct file *fp; + struct fileproc *fp; int error; + int fd = uap->fd; + + if ( (error = preparefileread(p, &fp, fd, 0)) ) + return (error); - if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) - return (EBADF); error = dofileread(p, fp, uap->fd, uap->cbuf, uap->nbyte, - (off_t)-1, 0, retval); - frele(fp); - return(error); + (off_t)-1, 0, retval); + + donefileread(p, fp, fd); + + return (error); } /* * Pread system call */ -#ifndef _SYS_SYSPROTO_H_ -struct pread_args { - int fd; - void *buf; - size_t nbyte; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t offset; -}; -#endif int pread(p, uap, retval) struct proc *p; register struct pread_args *uap; - int *retval; + user_ssize_t *retval; { - register struct file *fp; + struct fileproc *fp; + int fd = uap->fd; int error; - if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE) { - error = ESPIPE; - } else { - error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, - uap->offset, FOF_OFFSET, retval); - } - frele(fp); + if ( (error = preparefileread(p, &fp, fd, 1)) ) + return (error); + + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET, retval); + donefileread(p, fp, fd); + if (!error) KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE), uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); - return(error); + return (error); } /* * Code common for read and pread */ + +void +donefileread(struct proc *p, struct fileproc *fp, int fd) +{ + proc_fdlock(p); + + fp->f_flags &= ~FP_INCHRREAD; + + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); +} + +int +preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread) +{ + vnode_t vp; + int error; + struct fileproc *fp; + + proc_fdlock(p); + + error = fp_lookup(p, fd, &fp, 1); + + if (error) { + proc_fdunlock(p); + return (error); + } + if ((fp->f_flag & FREAD) == 0) { + error = EBADF; + goto out; + } + if (check_for_pread && (fp->f_type != DTYPE_VNODE)) { + error = ESPIPE; + goto out; + } + if (fp->f_type == DTYPE_VNODE) { + vp = (struct vnode *)fp->f_fglob->fg_data; + + if (vp->v_type == VCHR) + fp->f_flags |= FP_INCHRREAD; + } + + *fp_ret = fp; + + proc_fdunlock(p); + return (0); + +out: + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return (error); +} + + __private_extern__ int -dofileread(p, fp, fd, buf, nbyte, offset, flags, retval) +dofileread(p, fp, fd, bufp, nbyte, offset, flags, retval) struct proc *p; - struct file *fp; + struct fileproc *fp; int fd, flags; - void *buf; - size_t nbyte; + user_addr_t bufp; + user_size_t nbyte; off_t offset; - int *retval; + user_ssize_t *retval; { - struct uio auio; - struct iovec aiov; - long cnt, error = 0; + uio_t auio; + user_ssize_t bytecnt; + long error = 0; + char uio_buf[ UIO_SIZEOF(1) ]; #if KTRACE - struct iovec ktriov; - struct uio ktruio; + uio_t ktruio = NULL; + char ktr_uio_buf[ UIO_SIZEOF(1) ]; int didktr = 0; #endif - aiov.iov_base = (caddr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = offset; + // LP64todo - do we want to raise this? if (nbyte > INT_MAX) return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; + + if (IS_64BIT_PROCESS(p)) { + auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + } else { + auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + } + uio_addiov(auio, bufp, nbyte); + #if KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { - ktriov = aiov; - ktruio = auio; didktr = 1; + + if (IS_64BIT_PROCESS(p)) { + ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ, + &ktr_uio_buf[0], sizeof(ktr_uio_buf)); + } else { + ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ, + &ktr_uio_buf[0], sizeof(ktr_uio_buf)); + } + uio_addiov(ktruio, bufp, nbyte); } #endif - cnt = nbyte; + bytecnt = nbyte; - if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { - if (auio.uio_resid != cnt && (error == ERESTART || + if ((error = fo_read(fp, auio, fp->f_cred, flags, p))) { + if (uio_resid(auio) != bytecnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } - cnt -= auio.uio_resid; + bytecnt -= uio_resid(auio); #if KTRACE if (didktr && error == 0) { - ktruio.uio_iov = &ktriov; - ktruio.uio_resid = cnt; - ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error, - KERNEL_FUNNEL); + uio_setresid(ktruio, bytecnt); + ktrgenio(p->p_tracep, fd, UIO_READ, ktruio, error); } #endif - *retval = cnt; + + *retval = bytecnt; + return (error); } /* * Scatter read system call. */ -#ifndef _SYS_SYSPROTO_H_ -struct readv_args { - int fd; - struct iovec *iovp; - u_int iovcnt; -}; -#endif int readv(p, uap, retval) struct proc *p; register struct readv_args *uap; - int *retval; + user_ssize_t *retval; { - struct uio auio; - register struct iovec *iov; + uio_t auio = NULL; int error; - struct iovec aiov[UIO_SMALLIOV]; - - if (uap->iovcnt > UIO_SMALLIOV) { - if (uap->iovcnt > UIO_MAXIOV) - return (EINVAL); - if ((iov = (struct iovec *) - kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0) - return (ENOMEM); - } else - iov = aiov; - auio.uio_iov = iov; - auio.uio_iovcnt = uap->iovcnt; - auio.uio_rw = UIO_READ; - error = copyin((caddr_t)uap->iovp, (caddr_t)iov, - uap->iovcnt * sizeof (struct iovec)); - if (!error) - error = rwuio(p, uap->fd, &auio, UIO_READ, retval); - if (uap->iovcnt > UIO_SMALLIOV) - kfree(iov, sizeof(struct iovec)*uap->iovcnt); + int size_of_iovec; + struct user_iovec *iovp; + + /* Verify range bedfore calling uio_create() */ + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + + /* allocate a uio large enough to hold the number of iovecs passed */ + auio = uio_create(uap->iovcnt, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_READ); + + /* get location of iovecs within the uio. then copyin the iovecs from + * user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto ExitThisRoutine; + } + size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); + error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); + if (error) { + goto ExitThisRoutine; + } + + /* finalize uio_t for use and do the IO + */ + uio_calculateresid(auio); + error = rd_uio(p, uap->fd, auio, retval); + +ExitThisRoutine: + if (auio != NULL) { + uio_free(auio); + } return (error); } /* * Write system call */ -#ifndef _SYS_SYSPROTO_H_ -struct write_args { - int fd; - char *cbuf; - u_int nbyte; -}; -#endif int write(p, uap, retval) struct proc *p; register struct write_args *uap; - int *retval; + user_ssize_t *retval; { - register struct file *fp; + struct fileproc *fp; int error; + int fd = uap->fd; - if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) - return (EBADF); - error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte, + error = fp_lookup(p,fd,&fp,0); + if (error) + return(error); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else { + error = dofilewrite(p, fp, uap->fd, uap->cbuf, uap->nbyte, (off_t)-1, 0, retval); - frele(fp); + } + if (error == 0) + fp_drop_written(p, fd, fp); + else + fp_drop(p, fd, fp, 0); return(error); } /* - * Pwrite system call + * pwrite system call */ -#ifndef _SYS_SYSPROTO_H_ -struct pwrite_args { - int fd; - const void *buf; - size_t nbyte; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t offset; -}; -#endif int pwrite(p, uap, retval) struct proc *p; register struct pwrite_args *uap; - int *retval; + user_ssize_t *retval; { - register struct file *fp; + struct fileproc *fp; int error; + int fd = uap->fd; + + error = fp_lookup(p,fd,&fp,0); + if (error) + return(error); - if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE) { - error = ESPIPE; - } else { - error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, - uap->offset, FOF_OFFSET, retval); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + } else { + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET, retval); + } } - frele(fp); + if (error == 0) + fp_drop_written(p, fd, fp); + else + fp_drop(p, fd, fp, 0); if (!error) KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE), @@ -368,231 +448,295 @@ pwrite(p, uap, retval) } __private_extern__ int -dofilewrite(p, fp, fd, buf, nbyte, offset, flags, retval) +dofilewrite(p, fp, fd, bufp, nbyte, offset, flags, retval) struct proc *p; - struct file *fp; + struct fileproc *fp; int fd, flags; - const void *buf; - size_t nbyte; + user_addr_t bufp; + user_size_t nbyte; off_t offset; - int *retval; + user_ssize_t *retval; { - struct uio auio; - struct iovec aiov; - long cnt, error = 0; + uio_t auio; + long error = 0; + user_ssize_t bytecnt; + char uio_buf[ UIO_SIZEOF(1) ]; #if KTRACE - struct iovec ktriov; - struct uio ktruio; + uio_t ktruio; int didktr = 0; + char ktr_uio_buf[ UIO_SIZEOF(1) ]; #endif - - aiov.iov_base = (void *)(uintptr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = offset; + + // LP64todo - do we want to raise this? if (nbyte > INT_MAX) return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; + + if (IS_64BIT_PROCESS(p)) { + auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + } else { + auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + } + uio_addiov(auio, bufp, nbyte); + #if KTRACE /* * if tracing, save a copy of iovec and uio */ if (KTRPOINT(p, KTR_GENIO)) { - ktriov = aiov; - ktruio = auio; didktr = 1; + + if (IS_64BIT_PROCESS(p)) { + ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE, + &ktr_uio_buf[0], sizeof(ktr_uio_buf)); + } else { + ktruio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE, + &ktr_uio_buf[0], sizeof(ktr_uio_buf)); + } + uio_addiov(ktruio, bufp, nbyte); } #endif - cnt = nbyte; - if (fp->f_type == DTYPE_VNODE) - bwillwrite(); - if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { - if (auio.uio_resid != cnt && (error == ERESTART || + bytecnt = nbyte; + if ((error = fo_write(fp, auio, fp->f_cred, flags, p))) { + if (uio_resid(auio) != bytecnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; /* The socket layer handles SIGPIPE */ if (error == EPIPE && fp->f_type != DTYPE_SOCKET) psignal(p, SIGPIPE); } - cnt -= auio.uio_resid; + bytecnt -= uio_resid(auio); #if KTRACE if (didktr && error == 0) { - ktruio.uio_iov = &ktriov; - ktruio.uio_resid = cnt; - ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error, - KERNEL_FUNNEL); + uio_setresid(ktruio, bytecnt); + ktrgenio(p->p_tracep, fd, UIO_WRITE, ktruio, error); } #endif - *retval = cnt; + *retval = bytecnt; + return (error); } /* * Gather write system call */ -#ifndef _SYS_SYSPROTO_H_ -struct writev_args { - int fd; - struct iovec *iovp; - u_int iovcnt; -}; -#endif int writev(p, uap, retval) struct proc *p; register struct writev_args *uap; - int *retval; + user_ssize_t *retval; { - struct uio auio; - register struct iovec *iov; + uio_t auio = NULL; int error; - struct iovec aiov[UIO_SMALLIOV]; - - if (uap->iovcnt > UIO_SMALLIOV) { - if (uap->iovcnt > UIO_MAXIOV) - return (EINVAL); - if ((iov = (struct iovec *) - kalloc(sizeof(struct iovec) * (uap->iovcnt))) == 0) - return (ENOMEM); - } else - iov = aiov; - auio.uio_iov = iov; - auio.uio_iovcnt = uap->iovcnt; - auio.uio_rw = UIO_WRITE; - error = copyin((caddr_t)uap->iovp, (caddr_t)iov, - uap->iovcnt * sizeof (struct iovec)); - if (!error) - error = rwuio(p, uap->fd, &auio, UIO_WRITE, retval); - if (uap->iovcnt > UIO_SMALLIOV) - kfree(iov, sizeof(struct iovec)*uap->iovcnt); + int size_of_iovec; + struct user_iovec *iovp; + + /* Verify range bedfore calling uio_create() */ + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) + return (EINVAL); + + /* allocate a uio large enough to hold the number of iovecs passed */ + auio = uio_create(uap->iovcnt, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + + /* get location of iovecs within the uio. then copyin the iovecs from + * user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto ExitThisRoutine; + } + size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); + error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); + if (error) { + goto ExitThisRoutine; + } + + /* finalize uio_t for use and do the IO + */ + uio_calculateresid(auio); + error = wr_uio(p, uap->fd, auio, retval); + +ExitThisRoutine: + if (auio != NULL) { + uio_free(auio); + } return (error); } + int -rwuio(p, fdes, uio, rw, retval) +wr_uio(p, fdes, uio, retval) struct proc *p; int fdes; - register struct uio *uio; - enum uio_rw rw; - int *retval; + register uio_t uio; + user_ssize_t *retval; { - struct file *fp; - register struct iovec *iov; - int i, count, flag, error; + struct fileproc *fp; + int error; + user_ssize_t count; #if KTRACE - struct iovec *ktriov; + struct iovec_64 *ktriov = NULL; struct uio ktruio; int didktr = 0; u_int iovlen; #endif - if (error = fdgetf(p, fdes, &fp)) - return (error); + error = fp_lookup(p,fdes,&fp,0); + if (error) + return(error); - if ((fp->f_flag&(rw==UIO_READ ? FREAD : FWRITE)) == 0) { - return(EBADF); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; } - uio->uio_resid = 0; - uio->uio_segflg = UIO_USERSPACE; - uio->uio_procp = p; - iov = uio->uio_iov; - for (i = 0; i < uio->uio_iovcnt; i++) { - if (iov->iov_len < 0) { - return(EINVAL); + count = uio_resid(uio); +#if KTRACE + /* + * if tracing, save a copy of iovec + */ + if (KTRPOINT(p, KTR_GENIO)) { + iovlen = uio->uio_iovcnt * + (IS_64BIT_PROCESS(p) ? sizeof (struct iovec_64) : sizeof (struct iovec_32)); + MALLOC(ktriov, struct iovec_64 *, iovlen, M_TEMP, M_WAITOK); + if (ktriov != NULL) { + bcopy((caddr_t)uio->uio_iovs.iov64p, (caddr_t)ktriov, iovlen); + ktruio = *uio; + didktr = 1; } - uio->uio_resid += iov->iov_len; - if (uio->uio_resid < 0) { - return(EINVAL); + } +#endif + error = fo_write(fp, uio, fp->f_cred, 0, p); + if (error) { + if (uio_resid(uio) != count && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* The socket layer handles SIGPIPE */ + if (error == EPIPE && fp->f_type != DTYPE_SOCKET) + psignal(p, SIGPIPE); + } + *retval = count - uio_resid(uio); + +#if KTRACE + if (didktr) { + if (error == 0) { + ktruio.uio_iovs.iov64p = ktriov; + uio_setresid(&ktruio, *retval); + ktrgenio(p->p_tracep, fdes, UIO_WRITE, &ktruio, error); } - iov++; + FREE(ktriov, M_TEMP); } - count = uio->uio_resid; +#endif + +out: + if ( (error == 0) ) + fp_drop_written(p, fdes, fp); + else + fp_drop(p, fdes, fp, 0); + return(error); +} + + +int +rd_uio(p, fdes, uio, retval) + struct proc *p; + int fdes; + register uio_t uio; + user_ssize_t *retval; +{ + struct fileproc *fp; + int error; + user_ssize_t count; +#if KTRACE + struct iovec_64 *ktriov = NULL; + struct uio ktruio; + int didktr = 0; + u_int iovlen; +#endif + + if ( (error = preparefileread(p, &fp, fdes, 0)) ) + return (error); + + count = uio_resid(uio); #if KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { - iovlen = uio->uio_iovcnt * sizeof (struct iovec); - MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); - bcopy((caddr_t)uio->uio_iov, (caddr_t)ktriov, iovlen); - ktruio = *uio; - didktr = 1; + iovlen = uio->uio_iovcnt * + (IS_64BIT_PROCESS(p) ? sizeof (struct iovec_64) : sizeof (struct iovec_32)); + MALLOC(ktriov, struct iovec_64 *, iovlen, M_TEMP, M_WAITOK); + if (ktriov != NULL) { + bcopy((caddr_t)uio->uio_iovs.iov64p, (caddr_t)ktriov, iovlen); + ktruio = *uio; + didktr = 1; + } } #endif + error = fo_read(fp, uio, fp->f_cred, 0, p); - if (rw == UIO_READ) { - if (error = fo_read(fp, uio, fp->f_cred, 0, p)) - if (uio->uio_resid != count && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) - error = 0; - } else { - if (fp->f_type == DTYPE_VNODE) - bwillwrite(); - if (error = fo_write(fp, uio, fp->f_cred, 0, p)) { - if (uio->uio_resid != count && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) - error = 0; - /* The socket layer handles SIGPIPE */ - if (error == EPIPE && fp->f_type != DTYPE_SOCKET) - psignal(p, SIGPIPE); - } + if (error) { + if (uio_resid(uio) != count && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; } - - *retval = count - uio->uio_resid; + *retval = count - uio_resid(uio); #if KTRACE if (didktr) { if (error == 0) { - ktruio.uio_iov = ktriov; - ktruio.uio_resid = *retval; - ktrgenio(p->p_tracep, fdes, rw, &ktruio, error, - KERNEL_FUNNEL); + ktruio.uio_iovs.iov64p = ktriov; + uio_setresid(&ktruio, *retval); + ktrgenio(p->p_tracep, fdes, UIO_READ, &ktruio, error); } FREE(ktriov, M_TEMP); } #endif + donefileread(p, fp, fdes); - return(error); + return (error); } /* * Ioctl system call + * */ -#ifndef _SYS_SYSPROTO_H_ -struct ioctl_args { - int fd; - u_long com; - caddr_t data; -}; -#endif int -ioctl(p, uap, retval) - struct proc *p; - register struct ioctl_args *uap; - register_t *retval; +ioctl(struct proc *p, register struct ioctl_args *uap, __unused register_t *retval) { - struct file *fp; + struct fileproc *fp; register u_long com; - register int error; + int error = 0; register u_int size; - caddr_t data, memp; + caddr_t datap, memp; + boolean_t is64bit; int tmp; #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; + int fd = uap->fd; AUDIT_ARG(fd, uap->fd); - AUDIT_ARG(cmd, uap->com); /* XXX cmd is int, uap->com is long */ + AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */ AUDIT_ARG(addr, uap->data); - if (error = fdgetf(p, uap->fd, &fp)) - return (error); + + is64bit = proc_is64bit(p); + + proc_fdlock(p); + error = fp_lookup(p,fd,&fp,1); + if (error) { + proc_fdunlock(p); + return(error); + } AUDIT_ARG(file, p, fp); - if ((fp->f_flag & (FREAD | FWRITE)) == 0) - return (EBADF); + + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + error = EBADF; + goto out; + } #if NETAT /* @@ -600,14 +744,16 @@ ioctl(p, uap, retval) * while implementing an ATioctl system call */ { - extern int appletalk_inited; - if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) { + u_long fixed_command; #ifdef APPLETALK_DEBUG kprintf("ioctl: special AppleTalk \n"); #endif - error = fo_ioctl(fp, uap->com, uap->data, p); - return(error); + datap = &stkbuf[0]; + *(user_addr_t *)datap = uap->data; + fixed_command = _IOW(0, 0xff99, uap->data); + error = fo_ioctl(fp, fixed_command, datap, p); + goto out; } } @@ -617,10 +763,12 @@ ioctl(p, uap, retval) switch (com = uap->com) { case FIONCLEX: *fdflags(p, uap->fd) &= ~UF_EXCLOSE; - return (0); + error =0; + goto out; case FIOCLEX: *fdflags(p, uap->fd) |= UF_EXCLOSE; - return (0); + error =0; + goto out; } /* @@ -628,38 +776,62 @@ ioctl(p, uap, retval) * copied to/from the user's address space. */ size = IOCPARM_LEN(com); - if (size > IOCPARM_MAX) - return (ENOTTY); + if (size > IOCPARM_MAX) { + error = ENOTTY; + goto out; + } memp = NULL; if (size > sizeof (stkbuf)) { - if ((memp = (caddr_t)kalloc(size)) == 0) - return(ENOMEM); - data = memp; + proc_fdunlock(p); + if ((memp = (caddr_t)kalloc(size)) == 0) { + proc_fdlock(p); + error = ENOMEM; + goto out; + } + proc_fdlock(p); + datap = memp; } else - data = stkbuf; + datap = &stkbuf[0]; if (com&IOC_IN) { if (size) { - error = copyin(uap->data, data, (u_int)size); + proc_fdunlock(p); + error = copyin(uap->data, datap, size); if (error) { if (memp) kfree(memp, size); - return (error); + proc_fdlock(p); + goto out; } - } else - *(caddr_t *)data = uap->data; + proc_fdlock(p); + } else { + /* XXX - IOC_IN and no size? we should proably return an error here!! */ + if (is64bit) { + *(user_addr_t *)datap = uap->data; + } + else { + *(uint32_t *)datap = (uint32_t)uap->data; + } + } } else if ((com&IOC_OUT) && size) /* * Zero the buffer so the user always * gets back something deterministic. */ - bzero(data, size); - else if (com&IOC_VOID) - *(caddr_t *)data = uap->data; + bzero(datap, size); + else if (com&IOC_VOID) { + /* XXX - this is odd since IOC_VOID means no parameters */ + if (is64bit) { + *(user_addr_t *)datap = uap->data; + } + else { + *(uint32_t *)datap = (uint32_t)uap->data; + } + } switch (com) { case FIONBIO: - if (tmp = *(int *)data) + if ( (tmp = *(int *)datap) ) fp->f_flag |= FNONBLOCK; else fp->f_flag &= ~FNONBLOCK; @@ -667,7 +839,7 @@ ioctl(p, uap, retval) break; case FIOASYNC: - if (tmp = *(int *)data) + if ( (tmp = *(int *)datap) ) fp->f_flag |= FASYNC; else fp->f_flag &= ~FASYNC; @@ -675,12 +847,16 @@ ioctl(p, uap, retval) break; case FIOSETOWN: - tmp = *(int *)data; + tmp = *(int *)datap; if (fp->f_type == DTYPE_SOCKET) { ((struct socket *)fp->f_data)->so_pgid = tmp; error = 0; break; } + if (fp->f_type == DTYPE_PIPE) { + error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); + break; + } if (tmp <= 0) { tmp = -tmp; } else { @@ -697,26 +873,31 @@ ioctl(p, uap, retval) case FIOGETOWN: if (fp->f_type == DTYPE_SOCKET) { error = 0; - *(int *)data = ((struct socket *)fp->f_data)->so_pgid; + *(int *)datap = ((struct socket *)fp->f_data)->so_pgid; break; } - error = fo_ioctl(fp, TIOCGPGRP, data, p); - *(int *)data = -*(int *)data; + error = fo_ioctl(fp, TIOCGPGRP, datap, p); + *(int *)datap = -*(int *)datap; break; default: - error = fo_ioctl(fp, com, data, p); + error = fo_ioctl(fp, com, datap, p); /* * Copy any data to user, size was * already set and checked above. */ if (error == 0 && (com&IOC_OUT) && size) - error = copyout(data, uap->data, (u_int)size); + error = copyout(datap, uap->data, (u_int)size); break; } + proc_fdunlock(p); if (memp) kfree(memp, size); - return (error); + proc_fdlock(p); +out: + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return(error); } int selwait, nselcoll; @@ -725,42 +906,29 @@ int selwait, nselcoll; extern int selcontinue(int error); extern int selprocess(int error, int sel_pass); static int selscan(struct proc *p, struct _select * sel, - int nfd, register_t *retval, int sel_pass); + int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub); static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits, - int nfd, int * count, int * nfcount); + int nfd, int * count); +static int seldrop(struct proc *p, u_int32_t *ibits, int nfd); extern uint64_t tvtoabstime(struct timeval *tvp); /* * Select system call. */ -#ifndef _SYS_SYSPROTO_H_ -struct select_args { - int nd; - u_int32_t *in; - u_int32_t *ou; - u_int32_t *ex; - struct timeval *tv; -}; -#endif int -select(p, uap, retval) - register struct proc *p; - register struct select_args *uap; - register_t *retval; +select(struct proc *p, struct select_args *uap, register_t *retval) { int error = 0; u_int ni, nw, size; - thread_act_t th_act; + thread_t th_act; struct uthread *uth; struct _select *sel; int needzerofill = 1; - int kfcount =0; - int nfcount = 0; int count = 0; - th_act = current_act(); + th_act = current_thread(); uth = get_bsdthread_info(th_act); - sel = &uth->uu_state.ss_select; + sel = &uth->uu_select; retval = (int *)get_bsduthreadrval(th_act); *retval = 0; @@ -780,10 +948,10 @@ select(p, uap, retval) */ if (sel->nbytes == 0) { sel->nbytes = 3 * ni; - MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK); - MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK); - bzero((caddr_t)sel->ibits, sel->nbytes); - bzero((caddr_t)sel->obits, sel->nbytes); + MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO); + MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO); + if ((sel->ibits == NULL) || (sel->obits == NULL)) + panic("select out of memory"); needzerofill = 0; } @@ -795,10 +963,10 @@ select(p, uap, retval) sel->nbytes = (3 * ni); FREE(sel->ibits, M_TEMP); FREE(sel->obits, M_TEMP); - MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK); - MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK); - bzero((caddr_t)sel->ibits, sel->nbytes); - bzero((caddr_t)sel->obits, sel->nbytes); + MALLOC(sel->ibits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO); + MALLOC(sel->obits, u_int32_t *, sel->nbytes, M_TEMP, M_WAITOK | M_ZERO); + if ((sel->ibits == NULL) || (sel->obits == NULL)) + panic("select out of memory"); needzerofill = 0; } @@ -812,7 +980,7 @@ select(p, uap, retval) */ #define getbits(name, x) \ do { \ - if (uap->name && (error = copyin((caddr_t)uap->name, \ + if (uap->name && (error = copyin(uap->name, \ (caddr_t)&sel->ibits[(x) * nw], ni))) \ goto continuation; \ } while (0) @@ -824,8 +992,15 @@ select(p, uap, retval) if (uap->tv) { struct timeval atv; - - error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); + if (IS_64BIT_PROCESS(p)) { + struct user_timeval atv64; + error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); + /* Loses resolution - assume timeout < 68 years */ + atv.tv_sec = atv64.tv_sec; + atv.tv_usec = atv64.tv_usec; + } else { + error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv)); + } if (error) goto continuation; if (itimerfix(&atv)) { @@ -839,36 +1014,33 @@ select(p, uap, retval) else sel->abstime = 0; - sel->nfcount = 0; - if (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &nfcount)) { + if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count)) ) { goto continuation; } - sel->nfcount = nfcount; sel->count = count; - size = SIZEOF_WAITQUEUE_SUB + (count * SIZEOF_WAITQUEUE_LINK); + size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); if (sel->allocsize) { - if (uth->uu_wqsub == 0) + if (sel->wqset == 0) panic("select: wql memory smashed"); /* needed for the select now */ if (size > sel->allocsize) { - kfree(uth->uu_wqsub, sel->allocsize); + kfree(sel->wqset, sel->allocsize); sel->allocsize = size; - uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize); - if (uth->uu_wqsub == (wait_queue_sub_t)NULL) + sel->wqset = (wait_queue_set_t)kalloc(size); + if (sel->wqset == (wait_queue_set_t)NULL) panic("failed to allocate memory for waitqueue\n"); - sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB; } } else { sel->count = count; sel->allocsize = size; - uth->uu_wqsub = (wait_queue_sub_t)kalloc(sel->allocsize); - if (uth->uu_wqsub == (wait_queue_sub_t)NULL) + sel->wqset = (wait_queue_set_t)kalloc(sel->allocsize); + if (sel->wqset == (wait_queue_set_t)NULL) panic("failed to allocate memory for waitqueue\n"); - sel->wql = (char *)uth->uu_wqsub + SIZEOF_WAITQUEUE_SUB; } - bzero(uth->uu_wqsub, size); - wait_queue_sub_init(uth->uu_wqsub, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); + bzero(sel->wqset, size); + sel->wql = (char *)sel->wqset + SIZEOF_WAITQUEUE_SET; + wait_queue_set_init(sel->wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); continuation: return selprocess(error, SEL_FIRSTPASS); @@ -881,11 +1053,11 @@ selcontinue(int error) } int -selprocess(error, sel_pass) +selprocess(int error, int sel_pass) { int ncoll; u_int ni, nw; - thread_act_t th_act; + thread_t th_act; struct uthread *uth; struct proc *p; struct select_args *uap; @@ -898,11 +1070,11 @@ selprocess(error, sel_pass) wait_result_t wait_result; p = current_proc(); - th_act = current_act(); + th_act = current_thread(); uap = (struct select_args *)get_bsduthreadarg(th_act); retval = (int *)get_bsduthreadrval(th_act); uth = get_bsdthread_info(th_act); - sel = &uth->uu_state.ss_select; + sel = &uth->uu_select; /* if it is first pass wait queue is not setup yet */ if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) @@ -919,9 +1091,9 @@ retry: /* skip scans if the select is just for timeouts */ if (sel->count) { if (sel_pass == SEL_FIRSTPASS) - wait_queue_sub_clearrefs(uth->uu_wqsub); + wait_queue_sub_clearrefs(sel->wqset); - error = selscan(p, sel, uap->nd, retval, sel_pass); + error = selscan(p, sel, uap->nd, retval, sel_pass, sel->wqset); if (error || *retval) { goto done; } @@ -974,12 +1146,12 @@ retry: panic("selprocess: 2nd pass assertwaiting"); /* Wait Queue Subordinate has waitqueue as first element */ - wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqsub, - &selwait, THREAD_ABORTSAFE); + wait_result = wait_queue_assert_wait((wait_queue_t)sel->wqset, + &selwait, THREAD_ABORTSAFE, sel->abstime); if (wait_result != THREAD_AWAKENED) { /* there are no preposted events */ - error = tsleep1(NULL, PSOCK | PCATCH, - "select", sel->abstime, selcontinue); + error = tsleep1(NULL, PSOCK | PCATCH, + "select", 0, selcontinue); } else { prepost = 1; error = 0; @@ -992,8 +1164,10 @@ retry: goto retry; } done: - if (unwind) - wait_subqueue_unlink_all(uth->uu_wqsub); + if (unwind) { + wait_subqueue_unlink_all(sel->wqset); + seldrop(p, sel->ibits, uap->nd); + } p->p_flag &= ~P_SELECT; /* select is not restarted after signals... */ if (error == ERESTART) @@ -1005,8 +1179,8 @@ done: #define putbits(name, x) \ do { \ - if (uap->name && (error2 = copyout((caddr_t)&sel->obits[(x) * nw], \ - (caddr_t)uap->name, ni))) \ + if (uap->name && (error2 = \ + copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \ error = error2; \ } while (0) @@ -1022,17 +1196,18 @@ done: } static int -selscan(p, sel, nfd, retval, sel_pass) +selscan(p, sel, nfd, retval, sel_pass, wqsub) struct proc *p; struct _select *sel; int nfd; register_t *retval; int sel_pass; + wait_queue_sub_t wqsub; { register struct filedesc *fdp = p->p_fd; register int msk, i, j, fd; register u_int32_t bits; - struct file *fp; + struct fileproc *fp; int n = 0; int nc = 0; static int flag[3] = { FREAD, FWRITE, 0 }; @@ -1040,10 +1215,7 @@ selscan(p, sel, nfd, retval, sel_pass) u_int nw; u_int32_t *ibits, *obits; char * wql; - int nfunnel = 0; - int count, nfcount; char * wql_ptr; - struct vnode *vp; /* * Problems when reboot; due to MacOSX signal probs @@ -1053,89 +1225,42 @@ selscan(p, sel, nfd, retval, sel_pass) *retval=0; return(EIO); } - ibits = sel->ibits; obits = sel->obits; wql = sel->wql; - count = sel->count; - nfcount = sel->nfcount; - - if (nfcount > count) - panic("selcount countfd_ofiles[fd]; - if (fp == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) { - return(EBADF); - } - if (sel_pass == SEL_SECONDPASS) - wql_ptr = (char *)0; - else - wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK); - /* - * Merlot: need to remove the bogus f_data check - * from the following "if" statement. It's there - * because of various problems stemming from - * races due to the split-funnels and lack of real - * referencing on sockets... - */ - if (fp->f_ops && (fp->f_type != DTYPE_SOCKET) - && (fp->f_data != (caddr_t)-1) - && !(fp->f_type == DTYPE_VNODE - && (vp = (struct vnode *)fp->f_data) - && vp->v_type == VFIFO) - && fo_select(fp, flag[msk], wql_ptr, p)) { - optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); - n++; - } - nc++; - } - } - } - } - - if (nfcount) { - /* socket file descriptors for scan */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + proc_fdlock(p); - nc = 0; + if (sel->count) { for (msk = 0; msk < 3; msk++) { iptr = (u_int32_t *)&ibits[msk * nw]; optr = (u_int32_t *)&obits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); fp = fdp->fd_ofiles[fd]; + if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + proc_fdunlock(p); return(EBADF); } - if (sel_pass == SEL_SECONDPASS) + if (sel_pass == SEL_SECONDPASS) { wql_ptr = (char *)0; - else - wql_ptr = (wql+ nc * SIZEOF_WAITQUEUE_LINK); - if (fp->f_ops - && (fp->f_type == DTYPE_SOCKET - || (fp->f_type == DTYPE_VNODE - && (vp = (struct vnode *)fp->f_data) - && vp != (struct vnode *)-1 - && vp->v_type == VFIFO)) - && fo_select(fp, flag[msk], wql_ptr, p)) { + fp->f_flags &= ~FP_INSELECT; + fp->f_waddr = (void *)0; + } else { + wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK); + fp->f_flags |= FP_INSELECT; + fp->f_waddr = (void *)wqsub; + } + if (fp->f_ops && fo_select(fp, flag[msk], wql_ptr, p)) { optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); n++; } @@ -1143,43 +1268,227 @@ selscan(p, sel, nfd, retval, sel_pass) } } } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); } - + proc_fdunlock(p); *retval = n; return (0); } -/*ARGSUSED*/ +static int poll_callback(struct kqueue *, struct kevent *, void *); + +struct poll_continue_args { + user_addr_t pca_fds; + u_int pca_nfds; + u_int pca_rfds; +}; + int -seltrue(dev, flag, p) - dev_t dev; - int flag; - struct proc *p; +poll(struct proc *p, struct poll_args *uap, register_t *retval) { + struct poll_continue_args *cont; + struct pollfd *fds; + struct kqueue *kq; + struct timeval atv; + int ncoll, error = 0; + u_int nfds = uap->nfds; + u_int rfds = 0; + u_int i; + size_t ni; - return (1); -} + /* + * This is kinda bogus. We have fd limits, but that is not + * really related to the size of the pollfd array. Make sure + * we let the process use at least FD_SETSIZE entries and at + * least enough for the current limits. We want to be reasonably + * safe, but not overly restrictive. + */ + if (nfds > OPEN_MAX || + (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE)) + return (EINVAL); -static int -selcount(p, ibits, obits, nfd, count, nfcount) - struct proc *p; - u_int32_t *ibits, *obits; - int nfd; - int *count; - int *nfcount; -{ - register struct filedesc *fdp = p->p_fd; - register int msk, i, j, fd; - register u_int32_t bits; - struct file *fp; + kq = kqueue_alloc(p); + if (kq == NULL) + return (EAGAIN); + + ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args); + MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK); + if (NULL == cont) { + error = EAGAIN; + goto out; + } + + fds = (struct pollfd *)&cont[1]; + error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd)); + if (error) + goto out; + + if (uap->timeout != -1) { + struct timeval rtv; + + atv.tv_sec = uap->timeout / 1000; + atv.tv_usec = (uap->timeout % 1000) * 1000; + if (itimerfix(&atv)) { + error = EINVAL; + goto out; + } + getmicrouptime(&rtv); + timevaladd(&atv, &rtv); + } else { + atv.tv_sec = 0; + atv.tv_usec = 0; + } + + /* JMM - all this P_SELECT stuff is bogus */ + ncoll = nselcoll; + p->p_flag |= P_SELECT; + + for (i = 0; i < nfds; i++) { + short events = fds[i].events; + struct kevent kev; + int kerror = 0; + + /* per spec, ignore fd values below zero */ + if (fds[i].fd < 0) { + fds[i].revents = 0; + continue; + } + + /* convert the poll event into a kqueue kevent */ + kev.ident = fds[i].fd; + kev.flags = EV_ADD | EV_ONESHOT | EV_POLL; + kev.fflags = NOTE_LOWAT; + kev.data = 1; /* efficiency be damned: any data should trigger */ + kev.udata = CAST_USER_ADDR_T(&fds[i]); + + /* Handle input events */ + if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND )) { + kev.filter = EVFILT_READ; + if (!(events & ( POLLIN | POLLRDNORM ))) + kev.flags |= EV_OOBAND; + kerror = kevent_register(kq, &kev, p); + } + + /* Handle output events */ + if (kerror == 0 && + events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) { + kev.filter = EVFILT_WRITE; + kerror = kevent_register(kq, &kev, p); + } + + /* Handle BSD extension vnode events */ + if (kerror == 0 && + events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) { + kev.filter = EVFILT_VNODE; + kev.fflags = 0; + if (events & POLLEXTEND) + kev.fflags |= NOTE_EXTEND; + if (events & POLLATTRIB) + kev.fflags |= NOTE_ATTRIB; + if (events & POLLNLINK) + kev.fflags |= NOTE_LINK; + if (events & POLLWRITE) + kev.fflags |= NOTE_WRITE; + kerror = kevent_register(kq, &kev, p); + } + + if (kerror != 0) { + fds[i].revents = POLLNVAL; + rfds++; + } else + fds[i].revents = 0; + } + + /* Did we have any trouble registering? */ + if (rfds > 0) + goto done; + + /* scan for, and possibly wait for, the kevents to trigger */ + cont->pca_fds = uap->fds; + cont->pca_nfds = nfds; + cont->pca_rfds = rfds; + error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p); + rfds = cont->pca_rfds; + + done: + p->p_flag &= ~P_SELECT; + /* poll is not restarted after signals... */ + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) + error = 0; + if (error == 0) { + error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd)); + *retval = rfds; + } + + out: + if (NULL != cont) + FREE(cont, M_TEMP); + + kqueue_dealloc(kq, p); + return (error); +} + +static int +poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) +{ + struct poll_continue_args *cont = (struct poll_continue_args *)data; + struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata); + + /* convert the results back into revents */ + if (kevp->flags & EV_EOF) + fds->revents |= POLLHUP; + if (kevp->flags & EV_ERROR) + fds->revents |= POLLERR; + cont->pca_rfds++; + + switch (kevp->filter) { + case EVFILT_READ: + if (kevp->data != 0) + fds->revents |= (fds->events & ( POLLIN | POLLRDNORM )); + if (kevp->flags & EV_OOBAND) + fds->revents |= (fds->events & ( POLLPRI | POLLRDBAND )); + break; + + case EVFILT_WRITE: + if (!(fds->revents & POLLHUP)) + fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND )); + break; + + case EVFILT_PROC: + if (kevp->fflags & NOTE_EXTEND) + fds->revents |= (fds->events & POLLEXTEND); + if (kevp->fflags & NOTE_ATTRIB) + fds->revents |= (fds->events & POLLATTRIB); + if (kevp->fflags & NOTE_LINK) + fds->revents |= (fds->events & POLLNLINK); + if (kevp->fflags & NOTE_WRITE) + fds->revents |= (fds->events & POLLWRITE); + break; + } + return 0; +} + +int +seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p) +{ + + return (1); +} + +static int +selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits, + int nfd, int *count) +{ + register struct filedesc *fdp = p->p_fd; + register int msk, i, j, fd; + register u_int32_t bits; + struct fileproc *fp; int n = 0; - int nc = 0; - int nfc = 0; - static int flag[3] = { FREAD, FWRITE, 0 }; - u_int32_t *iptr, *fptr, *fbits; + u_int32_t *iptr; u_int nw; - struct vnode *vp; + int error=0; + int dropcount; /* * Problems when reboot; due to MacOSX signal probs @@ -1187,13 +1496,11 @@ selcount(p, ibits, obits, nfd, count, nfcount) */ if (fdp == NULL) { *count=0; - *nfcount=0; return(EIO); } - nw = howmany(nfd, NFDBITS); - + proc_fdlock(p); for (msk = 0; msk < 3; msk++) { iptr = (u_int32_t *)&ibits[msk * nw]; for (i = 0; i < nfd; i += NFDBITS) { @@ -1204,20 +1511,102 @@ selcount(p, ibits, obits, nfd, count, nfcount) if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { *count=0; - *nfcount=0; - return(EBADF); + error = EBADF; + goto bad; } - if (fp->f_type == DTYPE_SOCKET || - (fp->f_type == DTYPE_VNODE - && (vp = (struct vnode *)fp->f_data) - && vp->v_type == VFIFO)) - nfc++; + fp->f_iocount++; n++; } } } + proc_fdunlock(p); + *count = n; - *nfcount = nfc; + return (0); +bad: + dropcount = 0; + + if (n== 0) + goto out; + /* undo the iocounts */ + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (dropcount >= n) + goto out; + fp->f_iocount--; + + if (p->p_fpdrainwait && fp->f_iocount == 0) { + p->p_fpdrainwait = 0; + wakeup(&p->p_fpdrainwait); + } + dropcount++; + } + } + } +out: + proc_fdunlock(p); + return(error); +} + +static int +seldrop(p, ibits, nfd) + struct proc *p; + u_int32_t *ibits; + int nfd; +{ + register struct filedesc *fdp = p->p_fd; + register int msk, i, j, fd; + register u_int32_t bits; + struct fileproc *fp; + int n = 0; + u_int32_t *iptr; + u_int nw; + + /* + * Problems when reboot; due to MacOSX signal probs + * in Beaker1C ; verify that the p->p_fd is valid + */ + if (fdp == NULL) { + return(EIO); + } + + nw = howmany(nfd, NFDBITS); + + + proc_fdlock(p); + for (msk = 0; msk < 3; msk++) { + iptr = (u_int32_t *)&ibits[msk * nw]; + for (i = 0; i < nfd; i += NFDBITS) { + bits = iptr[i/NFDBITS]; + while ((j = ffs(bits)) && (fd = i + --j) < nfd) { + bits &= ~(1 << j); + fp = fdp->fd_ofiles[fd]; + if (fp == NULL +#if 0 + /* if you are here then it is being closed */ + || (fdp->fd_ofileflags[fd] & UF_RESERVED) +#endif + ) { + proc_fdunlock(p); + return(EBADF); + } + n++; + fp->f_iocount--; + fp->f_flags &= ~FP_INSELECT; + + if (p->p_fpdrainwait && fp->f_iocount == 0) { + p->p_fpdrainwait = 0; + wakeup(&p->p_fpdrainwait); + } + } + } + } + proc_fdunlock(p); return (0); } @@ -1225,12 +1614,9 @@ selcount(p, ibits, obits, nfd, count, nfcount) * Record a select request. */ void -selrecord(selector, sip, p_wql) - struct proc *selector; - struct selinfo *sip; - void * p_wql; +selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql) { - thread_act_t cur_act = current_act(); + thread_t cur_act = current_thread(); struct uthread * ut = get_bsdthread_info(cur_act); /* need to look at collisions */ @@ -1256,8 +1642,9 @@ selrecord(selector, sip, p_wql) sip->si_flags &= ~SI_COLL; sip->si_flags |= SI_RECORDED; - if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqsub)) - wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqsub, (wait_queue_link_t)p_wql); + if (!wait_queue_member(&sip->si_wait_queue, ut->uu_select.wqset)) + wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_select.wqset, + (wait_queue_link_t)p_wql); return; } @@ -1304,243 +1691,367 @@ selthreadclear(sip) } -extern struct eventqelt *evprocdeque(struct proc *p, struct eventqelt *eqp); + + +#define DBG_EVENT 0x10 + +#define DBG_POST 0x10 +#define DBG_WATCH 0x11 +#define DBG_WAIT 0x12 +#define DBG_MOD 0x13 +#define DBG_EWAKEUP 0x14 +#define DBG_ENQUEUE 0x15 +#define DBG_DEQUEUE 0x16 + +#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST) +#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH) +#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT) +#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD) +#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) +#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE) +#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE) + + +#define EVPROCDEQUE(p, evq) do { \ + proc_lock(p); \ + if (evq->ee_flags & EV_QUEUED) { \ + TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \ + evq->ee_flags &= ~EV_QUEUED; \ + } \ + proc_unlock(p); \ +} while (0); + /* * called upon socket close. deque and free all events for - * the socket + * the socket... socket must be locked by caller. */ void evsofree(struct socket *sp) { - struct eventqelt *eqp, *next; + struct eventqelt *evq, *next; + proc_t p; + + if (sp == NULL) + return; - if (sp == NULL) return; + for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) { + next = evq->ee_slist.tqe_next; + p = evq->ee_proc; - for (eqp = sp->so_evlist.tqh_first; eqp != NULL; eqp = next) { - next = eqp->ee_slist.tqe_next; - evprocdeque(eqp->ee_proc, eqp); // remove from proc q if there - TAILQ_REMOVE(&sp->so_evlist, eqp, ee_slist); // remove from socket q - FREE(eqp, M_TEMP); - } + if (evq->ee_flags & EV_QUEUED) { + EVPROCDEQUE(p, evq); + } + TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q + FREE(evq, M_TEMP); + } } -#define DBG_EVENT 0x10 +/* + * called upon pipe close. deque and free all events for + * the pipe... pipe must be locked by caller + */ +void +evpipefree(struct pipe *cpipe) +{ + struct eventqelt *evq, *next; + proc_t p; -#define DBG_POST 0x10 -#define DBG_WATCH 0x11 -#define DBG_WAIT 0x12 -#define DBG_MOD 0x13 -#define DBG_EWAKEUP 0x14 -#define DBG_ENQUEUE 0x15 -#define DBG_DEQUEUE 0x16 + for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) { + next = evq->ee_slist.tqe_next; + p = evq->ee_proc; -#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST) -#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH) -#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT) -#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD) -#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) -#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE) -#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE) + EVPROCDEQUE(p, evq); + + TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q + FREE(evq, M_TEMP); + } +} /* - * enque this event if it's not already queued. wakeup - the proc if we do queue this event to it. + * enqueue this event if it's not already queued. wakeup + * the proc if we do queue this event to it... + * entered with proc lock held... we drop it before + * doing the wakeup and return in that state */ -void -evprocenque(struct eventqelt *eqp) +static void +evprocenque(struct eventqelt *evq) { - struct proc *p; - - assert(eqp); - KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, eqp, eqp->ee_flags, eqp->ee_eventmask,0,0); - if (eqp->ee_flags & EV_QUEUED) { - KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); - return; - } - eqp->ee_flags |= EV_QUEUED; - eqp->ee_eventmask = 0; // disarm - p = eqp->ee_proc; - TAILQ_INSERT_TAIL(&p->p_evlist, eqp, ee_plist); - KERNEL_DEBUG(DBG_MISC_EWAKEUP,0,0,0,eqp,0); - wakeup(&p->p_evlist); - KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); + proc_t p; + + assert(evq); + p = evq->ee_proc; + + KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, evq, evq->ee_flags, evq->ee_eventmask,0,0); + + proc_lock(p); + + if (evq->ee_flags & EV_QUEUED) { + proc_unlock(p); + + KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); + return; + } + evq->ee_flags |= EV_QUEUED; + + TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist); + + proc_unlock(p); + + wakeup(&p->p_evlist); + + KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); } + /* - * given either a sockbuf or a socket run down the - * event list and queue ready events found + * pipe lock must be taken by the caller */ void -postevent(struct socket *sp, struct sockbuf *sb, int event) +postpipeevent(struct pipe *pipep, int event) { - int mask; - struct eventqelt *evq; - register struct tcpcb *tp; - - if (sb) sp = sb->sb_so; - if (!sp || sp->so_evlist.tqh_first == NULL) return; - - KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,0,0); - - for (evq = sp->so_evlist.tqh_first; - evq != NULL; evq = evq->ee_slist.tqe_next) { - - mask = 0; - - /* ready for reading: - - byte cnt >= receive low water mark - - read-half of conn closed - - conn pending for listening sock - - socket error pending - - ready for writing - - byte cnt avail >= send low water mark - - write half of conn closed - - socket error pending - - non-blocking conn completed successfully - - exception pending - - out of band data - - sock at out of band mark - - */ - switch (event & EV_DMASK) { - - case EV_RWBYTES: - case EV_OOB: - case EV_RWBYTES|EV_OOB: - if (event & EV_OOB) { - if ((evq->ee_eventmask & EV_EX)) { - if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) { - mask |= EV_EX|EV_OOB; - } - } - } - if (event & EV_RWBYTES) { - if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) { - if ((sp->so_type == SOCK_STREAM) && (sp->so_error == ECONNREFUSED) || - (sp->so_error == ECONNRESET)) { - if ((sp->so_pcb == 0) || - !(tp = sototcpcb(sp)) || - (tp->t_state == TCPS_CLOSED)) { - mask |= EV_RE|EV_RESET; - break; - } - } - if (sp->so_state & SS_CANTRCVMORE) { - mask |= EV_RE|EV_FIN; - evq->ee_req.er_rcnt = sp->so_rcv.sb_cc; - break; - } - mask |= EV_RE; - evq->ee_req.er_rcnt = sp->so_rcv.sb_cc; - } - - if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) { - if ((sp->so_type == SOCK_STREAM) &&(sp->so_error == ECONNREFUSED) || - (sp->so_error == ECONNRESET)) { - if ((sp->so_pcb == 0) || - !(tp = sototcpcb(sp)) || - (tp->t_state == TCPS_CLOSED)) { - mask |= EV_WR|EV_RESET; - break; - } - } - mask |= EV_WR; - evq->ee_req.er_wcnt = sbspace(&sp->so_snd); - } - } - break; - - case EV_RCONN: - if ((evq->ee_eventmask & EV_RE)) { - evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one - mask |= EV_RE|EV_RCONN; - } - break; - - case EV_WCONN: - if ((evq->ee_eventmask & EV_WR)) { - mask |= EV_WR|EV_WCONN; - } - break; - - case EV_RCLOSED: - if ((evq->ee_eventmask & EV_RE)) { - mask |= EV_RE|EV_RCLOSED; - } - break; - - case EV_WCLOSED: - if ((evq->ee_eventmask & EV_WR)) { - mask |= EV_WR|EV_WCLOSED; - } - break; - - case EV_FIN: - if (evq->ee_eventmask & EV_RE) { - mask |= EV_RE|EV_FIN; - } - break; - - case EV_RESET: - case EV_TIMEOUT: - if (evq->ee_eventmask & EV_RE) { - mask |= EV_RE | event; - } - if (evq->ee_eventmask & EV_WR) { - mask |= EV_WR | event; - } - break; - - default: - return; - } /* switch */ - - if (mask) { - evq->ee_req.er_eventbits |= mask; - KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask,0,0); - evprocenque(evq); - } - } - KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,0,0); + int mask; + struct eventqelt *evq; + + if (pipep == NULL) + return; + KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0); + + for (evq = pipep->pipe_evlist.tqh_first; + evq != NULL; evq = evq->ee_slist.tqe_next) { + + if (evq->ee_eventmask == 0) + continue; + mask = 0; + + switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) { + + case EV_RWBYTES: + if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) { + mask |= EV_RE; + evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt; + } + if ((evq->ee_eventmask & EV_WR) && + (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) { + + if (pipep->pipe_state & PIPE_EOF) { + mask |= EV_WR|EV_RESET; + break; + } + mask |= EV_WR; + evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt; + } + break; + + case EV_WCLOSED: + case EV_RCLOSED: + if ((evq->ee_eventmask & EV_RE)) { + mask |= EV_RE|EV_RCLOSED; + } + if ((evq->ee_eventmask & EV_WR)) { + mask |= EV_WR|EV_WCLOSED; + } + break; + + default: + return; + } + if (mask) { + /* + * disarm... postevents are nops until this event is 'read' via + * waitevent and then re-armed via modwatch + */ + evq->ee_eventmask = 0; + + /* + * since events are disarmed until after the waitevent + * the ee_req.er_xxxx fields can't change once we've + * inserted this event into the proc queue... + * therefore, the waitevent will see a 'consistent' + * snapshot of the event, even though it won't hold + * the pipe lock, and we're updating the event outside + * of the proc lock, which it will hold + */ + evq->ee_req.er_eventbits |= mask; + + KERNEL_DEBUG(DBG_MISC_POST, evq, evq->ee_req.er_eventbits, mask, 1,0); + + evprocenque(evq); + } + } + KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0); } + /* - * remove and return the first event (eqp=NULL) or a specific - * event, or return NULL if no events found + * given either a sockbuf or a socket run down the + * event list and queue ready events found... + * the socket must be locked by the caller */ -struct eventqelt * -evprocdeque(struct proc *p, struct eventqelt *eqp) +void +postevent(struct socket *sp, struct sockbuf *sb, int event) { - - KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_START,p,eqp,0,0,0); - - if (eqp && ((eqp->ee_flags & EV_QUEUED) == NULL)) { - KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0); - return(NULL); - } - if (p->p_evlist.tqh_first == NULL) { - KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,0,0,0,0,0); - return(NULL); - } - if (eqp == NULL) { // remove first - eqp = p->p_evlist.tqh_first; - } - TAILQ_REMOVE(&p->p_evlist, eqp, ee_plist); - eqp->ee_flags &= ~EV_QUEUED; - KERNEL_DEBUG(DBG_MISC_DEQUEUE|DBG_FUNC_END,eqp,0,0,0,0); - return(eqp); + int mask; + struct eventqelt *evq; + struct tcpcb *tp; + + if (sb) + sp = sb->sb_so; + if (sp == NULL) + return; + + KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0); + + for (evq = sp->so_evlist.tqh_first; + evq != NULL; evq = evq->ee_slist.tqe_next) { + + if (evq->ee_eventmask == 0) + continue; + mask = 0; + + /* ready for reading: + - byte cnt >= receive low water mark + - read-half of conn closed + - conn pending for listening sock + - socket error pending + + ready for writing + - byte cnt avail >= send low water mark + - write half of conn closed + - socket error pending + - non-blocking conn completed successfully + + exception pending + - out of band data + - sock at out of band mark + */ + + switch (event & EV_DMASK) { + + case EV_OOB: + if ((evq->ee_eventmask & EV_EX)) { + if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) + mask |= EV_EX|EV_OOB; + } + break; + + case EV_RWBYTES|EV_OOB: + if ((evq->ee_eventmask & EV_EX)) { + if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) + mask |= EV_EX|EV_OOB; + } + /* + * fall into the next case + */ + case EV_RWBYTES: + if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) { + if (sp->so_error) { + if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { + if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || + (tp->t_state == TCPS_CLOSED)) { + mask |= EV_RE|EV_RESET; + break; + } + } + } + mask |= EV_RE; + evq->ee_req.er_rcnt = sp->so_rcv.sb_cc; + + if (sp->so_state & SS_CANTRCVMORE) { + mask |= EV_FIN; + break; + } + } + if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) { + if (sp->so_error) { + if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { + if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || + (tp->t_state == TCPS_CLOSED)) { + mask |= EV_WR|EV_RESET; + break; + } + } + } + mask |= EV_WR; + evq->ee_req.er_wcnt = sbspace(&sp->so_snd); + } + break; + + case EV_RCONN: + if ((evq->ee_eventmask & EV_RE)) { + mask |= EV_RE|EV_RCONN; + evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one + } + break; + + case EV_WCONN: + if ((evq->ee_eventmask & EV_WR)) { + mask |= EV_WR|EV_WCONN; + } + break; + + case EV_RCLOSED: + if ((evq->ee_eventmask & EV_RE)) { + mask |= EV_RE|EV_RCLOSED; + } + break; + + case EV_WCLOSED: + if ((evq->ee_eventmask & EV_WR)) { + mask |= EV_WR|EV_WCLOSED; + } + break; + + case EV_FIN: + if (evq->ee_eventmask & EV_RE) { + mask |= EV_RE|EV_FIN; + } + break; + + case EV_RESET: + case EV_TIMEOUT: + if (evq->ee_eventmask & EV_RE) { + mask |= EV_RE | event; + } + if (evq->ee_eventmask & EV_WR) { + mask |= EV_WR | event; + } + break; + + default: + KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0); + return; + } /* switch */ + + KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0); + + if (mask) { + /* + * disarm... postevents are nops until this event is 'read' via + * waitevent and then re-armed via modwatch + */ + evq->ee_eventmask = 0; + + /* + * since events are disarmed until after the waitevent + * the ee_req.er_xxxx fields can't change once we've + * inserted this event into the proc queue... + * since waitevent can't see this event until we + * enqueue it, waitevent will see a 'consistent' + * snapshot of the event, even though it won't hold + * the socket lock, and we're updating the event outside + * of the proc lock, which it will hold + */ + evq->ee_req.er_eventbits |= mask; + + evprocenque(evq); + } + } + KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0); } -struct evwatch_args { - struct eventreq *u_req; - int u_eventmask; -}; - /* * watchevent system call. user passes us an event to watch @@ -1552,78 +2063,99 @@ struct evwatch_args { * should this prevent duplicate events on same socket? */ int -watchevent(p, uap, retval) - struct proc *p; - struct evwatch_args *uap; - register_t *retval; +watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval) { - struct eventqelt *eqp = (struct eventqelt *)0; - struct eventqelt *np; - struct eventreq *erp; - struct file *fp; - struct socket *sp; - int error; - - KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0); - - // get a qelt and fill with users req - MALLOC(eqp, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK); - if (!eqp) panic("can't MALLOC eqp"); - erp = &eqp->ee_req; - // get users request pkt - if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp, - sizeof(struct eventreq))) { - FREE(eqp, M_TEMP); - KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); - return(error); - } - KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,eqp,0,0); - // validate, freeing qelt if errors - error = 0; - if (erp->er_type != EV_FD) { - error = EINVAL; - } else if (erp->er_handle < 0) { - error = EBADF; - } else if (erp->er_handle > p->p_fd->fd_nfiles) { - error = EBADF; - } else if ((fp = *fdfile(p, erp->er_handle)) == NULL) { - error = EBADF; - } else if (fp->f_type != DTYPE_SOCKET) { - error = EINVAL; - } - if (error) { - FREE(eqp,M_TEMP); - KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); - return(error); - } - - erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0; - eqp->ee_proc = p; - eqp->ee_eventmask = uap->u_eventmask & EV_MASK; - eqp->ee_flags = 0; - - sp = (struct socket *)fp->f_data; - assert(sp != NULL); - - // only allow one watch per file per proc - for (np = sp->so_evlist.tqh_first; np != NULL; np = np->ee_slist.tqe_next) { - if (np->ee_proc == p) { - FREE(eqp,M_TEMP); - KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); - return(EINVAL); - } - } - - TAILQ_INSERT_TAIL(&sp->so_evlist, eqp, ee_slist); - postevent(sp, 0, EV_RWBYTES); // catch existing events - KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0); - return(0); + struct eventqelt *evq = (struct eventqelt *)0; + struct eventqelt *np = NULL; + struct eventreq *erp; + struct fileproc *fp = NULL; + int error; + + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0); + + // get a qelt and fill with users req + MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK); + + if (evq == NULL) + panic("can't MALLOC evq"); + erp = &evq->ee_req; + + // get users request pkt + if ( (error = copyin(CAST_USER_ADDR_T(uap->u_req), (caddr_t)erp, + sizeof(struct eventreq))) ) { + FREE(evq, M_TEMP); + + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); + return(error); + } + KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,evq,0,0); + + // validate, freeing qelt if errors + error = 0; + proc_fdlock(p); + + if (erp->er_type != EV_FD) { + error = EINVAL; + } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { + error = EBADF; + } else if (fp->f_type == DTYPE_SOCKET) { + socket_lock((struct socket *)fp->f_data, 1); + np = ((struct socket *)fp->f_data)->so_evlist.tqh_first; + } else if (fp->f_type == DTYPE_PIPE) { + PIPE_LOCK((struct pipe *)fp->f_data); + np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; + } else { + fp_drop(p, erp->er_handle, fp, 1); + error = EINVAL; + } + proc_fdunlock(p); + + if (error) { + FREE(evq, M_TEMP); + + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); + return(error); + } + + /* + * only allow one watch per file per proc + */ + for ( ; np != NULL; np = np->ee_slist.tqe_next) { + if (np->ee_proc == p) { + if (fp->f_type == DTYPE_SOCKET) + socket_unlock((struct socket *)fp->f_data, 1); + else + PIPE_UNLOCK((struct pipe *)fp->f_data); + fp_drop(p, erp->er_handle, fp, 0); + FREE(evq, M_TEMP); + + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); + return(EINVAL); + } + } + erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0; + evq->ee_proc = p; + evq->ee_eventmask = uap->u_eventmask & EV_MASK; + evq->ee_flags = 0; + + if (fp->f_type == DTYPE_SOCKET) { + TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); + postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events + + socket_unlock((struct socket *)fp->f_data, 1); + } else { + TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); + postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES); + + PIPE_UNLOCK((struct pipe *)fp->f_data); + } + fp_drop_event(p, erp->er_handle, fp); + + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0); + return(0); } -struct evwait_args { - struct eventreq *u_req; - struct timeval *tv; -}; + /* * waitevent system call. @@ -1632,57 +2164,71 @@ struct evwait_args { * or poll mode (tv=NULL); */ int -waitevent(p, uap, retval) - struct proc *p; - struct evwait_args *uap; - register_t *retval; +waitevent(proc_t p, struct waitevent_args *uap, int *retval) { - int error = 0; - struct eventqelt *eqp; + int error = 0; + struct eventqelt *evq; + struct eventreq er; uint64_t abstime, interval; if (uap->tv) { struct timeval atv; - error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); + error = copyin(CAST_USER_ADDR_T(uap->tv), (caddr_t)&atv, sizeof (atv)); if (error) return(error); if (itimerfix(&atv)) { error = EINVAL; return(error); } - interval = tvtoabstime(&atv); - } - else - abstime = interval = 0; + } else + interval = 0; KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0); + proc_lock(p); retry: - if ((eqp = evprocdeque(p,NULL)) != NULL) { - error = copyout((caddr_t)&eqp->ee_req, - (caddr_t)uap->u_req, sizeof(struct eventreq)); - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, - eqp->ee_req.er_handle,eqp->ee_req.er_eventbits,eqp,0); + if ((evq = p->p_evlist.tqh_first) != NULL) { + /* + * found one... make a local copy while it's still on the queue + * to prevent it from changing while in the midst of copying + * don't want to hold the proc lock across a copyout because + * it might block on a page fault at the target in user space + */ + bcopy((caddr_t)&evq->ee_req, (caddr_t)&er, sizeof (struct eventreq)); + + TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); + + evq->ee_flags &= ~EV_QUEUED; + proc_unlock(p); + + error = copyout((caddr_t)&er, CAST_USER_ADDR_T(uap->u_req), sizeof(struct eventreq)); + + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, + evq->ee_req.er_handle,evq->ee_req.er_eventbits,evq,0); return (error); } else { if (uap->tv && interval == 0) { + proc_unlock(p); *retval = 1; // poll failed - KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); + KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); return (error); } - if (interval != 0) clock_absolutetime_interval_to_deadline(interval, &abstime); + else + abstime = 0; KERNEL_DEBUG(DBG_MISC_WAIT, 1,&p->p_evlist,0,0,0); - error = tsleep1(&p->p_evlist, PSOCK | PCATCH, - "waitevent", abstime, (int (*)(int))0); + + error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime); + KERNEL_DEBUG(DBG_MISC_WAIT, 2,&p->p_evlist,0,0,0); + if (error == 0) goto retry; if (error == ERESTART) @@ -1692,16 +2238,12 @@ retry: error = 0; } } + proc_unlock(p); KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0); - return (error); } -struct modwatch_args { - struct eventreq *u_req; - int u_eventmask; -}; /* * modwatch system call. user passes in event to modify. @@ -1709,87 +2251,202 @@ struct modwatch_args { * it needed. */ int -modwatch(p, uap, retval) - struct proc *p; - struct modwatch_args *uap; - register_t *retval; +modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval) { - struct eventreq er; - struct eventreq *erp = &er; - struct eventqelt *evq; - int error; - struct file *fp; - struct socket *sp; - int flag; - - KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0); - - // get users request pkt - if (error = copyin((caddr_t)uap->u_req, (caddr_t)erp, - sizeof(struct eventreq))) return(error); - - if (erp->er_type != EV_FD) return(EINVAL); - if (erp->er_handle < 0) return(EBADF); - if (erp->er_handle > p->p_fd->fd_nfiles) return(EBADF); - if ((fp = *fdfile(p, erp->er_handle)) == NULL) - return(EBADF); - if (fp->f_type != DTYPE_SOCKET) return(EINVAL); // for now must be sock - sp = (struct socket *)fp->f_data; - - /* soo_close sets f_data to 0 before switching funnel */ - if (sp == (struct socket *)0) - return(EBADF); - - // locate event if possible - for (evq = sp->so_evlist.tqh_first; - evq != NULL; evq = evq->ee_slist.tqe_next) { - if (evq->ee_proc == p) break; - } - - if (evq == NULL) { - KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0); - return(EINVAL); - } - KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0); - - if (uap->u_eventmask == EV_RM) { - evprocdeque(p, evq); - TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); - FREE(evq, M_TEMP); - KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0); - return(0); - } - - switch (uap->u_eventmask & EV_MASK) { + struct eventreq er; + struct eventreq *erp = &er; + struct eventqelt *evq; + int error; + struct fileproc *fp; + int flag; + + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0); + + /* + * get user's request pkt + */ + if ((error = copyin(CAST_USER_ADDR_T(uap->u_req), (caddr_t)erp, + sizeof(struct eventreq)))) { + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); + return(error); + } + proc_fdlock(p); + + if (erp->er_type != EV_FD) { + error = EINVAL; + } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { + error = EBADF; + } else if (fp->f_type == DTYPE_SOCKET) { + socket_lock((struct socket *)fp->f_data, 1); + evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; + } else if (fp->f_type == DTYPE_PIPE) { + PIPE_LOCK((struct pipe *)fp->f_data); + evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; + } else { + fp_drop(p, erp->er_handle, fp, 1); + error = EINVAL; + } + + if (error) { + proc_fdunlock(p); + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); + return(error); + } + + if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) { + fp->f_flags &= ~FP_WAITEVENT; + } + proc_fdunlock(p); + + // locate event if possible + for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { + if (evq->ee_proc == p) + break; + } + if (evq == NULL) { + if (fp->f_type == DTYPE_SOCKET) + socket_unlock((struct socket *)fp->f_data, 1); + else + PIPE_UNLOCK((struct pipe *)fp->f_data); + fp_drop(p, erp->er_handle, fp, 0); + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0); + return(EINVAL); + } + KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,evq,0,0); + + if (uap->u_eventmask == EV_RM) { + EVPROCDEQUE(p, evq); + + if (fp->f_type == DTYPE_SOCKET) { + TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); + socket_unlock((struct socket *)fp->f_data, 1); + } else { + TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); + PIPE_UNLOCK((struct pipe *)fp->f_data); + } + fp_drop(p, erp->er_handle, fp, 0); + FREE(evq, M_TEMP); + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0); + return(0); + } + switch (uap->u_eventmask & EV_MASK) { - case 0: - flag = 0; - break; - - case EV_RE: - case EV_WR: - case EV_RE|EV_WR: - flag = EV_RWBYTES; - break; - - case EV_EX: - flag = EV_OOB; - break; - - case EV_EX|EV_RE: - case EV_EX|EV_WR: - case EV_EX|EV_RE|EV_WR: - flag = EV_OOB|EV_RWBYTES; - break; - - default: - return(EINVAL); - } - - evq->ee_eventmask = uap->u_eventmask & EV_MASK; - evprocdeque(p, evq); - evq->ee_req.er_eventbits = 0; - postevent(sp, 0, flag); - KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,sp,flag,0); - return(0); + case 0: + flag = 0; + break; + + case EV_RE: + case EV_WR: + case EV_RE|EV_WR: + flag = EV_RWBYTES; + break; + + case EV_EX: + flag = EV_OOB; + break; + + case EV_EX|EV_RE: + case EV_EX|EV_WR: + case EV_EX|EV_RE|EV_WR: + flag = EV_OOB|EV_RWBYTES; + break; + + default: + if (fp->f_type == DTYPE_SOCKET) + socket_unlock((struct socket *)fp->f_data, 1); + else + PIPE_UNLOCK((struct pipe *)fp->f_data); + fp_drop(p, erp->er_handle, fp, 0); + KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); + return(EINVAL); + } + /* + * since we're holding the socket/pipe lock, the event + * cannot go from the unqueued state to the queued state + * however, it can go from the queued state to the unqueued state + * since that direction is protected by the proc_lock... + * so do a quick check for EV_QUEUED w/o holding the proc lock + * since by far the common case will be NOT EV_QUEUED, this saves + * us taking the proc_lock the majority of the time + */ + if (evq->ee_flags & EV_QUEUED) { + /* + * EVPROCDEQUE will recheck the state after it grabs the proc_lock + */ + EVPROCDEQUE(p, evq); + } + /* + * while the event is off the proc queue and + * we're holding the socket/pipe lock + * it's safe to update these fields... + */ + evq->ee_req.er_eventbits = 0; + evq->ee_eventmask = uap->u_eventmask & EV_MASK; + + if (fp->f_type == DTYPE_SOCKET) { + postevent((struct socket *)fp->f_data, 0, flag); + socket_unlock((struct socket *)fp->f_data, 1); + } + else { + postpipeevent((struct pipe *)fp->f_data, flag); + PIPE_UNLOCK((struct pipe *)fp->f_data); + } + fp_drop(p, erp->er_handle, fp, 0); + KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,fp->f_data,flag,0); + return(0); } + +/* this routine is called from the close of fd with proc_fdlock held */ +int +waitevent_close(struct proc *p, struct fileproc *fp) +{ + struct eventqelt *evq; + + + fp->f_flags &= ~FP_WAITEVENT; + + if (fp->f_type == DTYPE_SOCKET) { + socket_lock((struct socket *)fp->f_data, 1); + evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; + } + else if (fp->f_type == DTYPE_PIPE) { + PIPE_LOCK((struct pipe *)fp->f_data); + evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; + } + else { + return(EINVAL); + } + proc_fdunlock(p); + + + // locate event if possible + for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { + if (evq->ee_proc == p) + break; + } + if (evq == NULL) { + if (fp->f_type == DTYPE_SOCKET) + socket_unlock((struct socket *)fp->f_data, 1); + else + PIPE_UNLOCK((struct pipe *)fp->f_data); + + proc_fdlock(p); + + return(EINVAL); + } + EVPROCDEQUE(p, evq); + + if (fp->f_type == DTYPE_SOCKET) { + TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); + socket_unlock((struct socket *)fp->f_data, 1); + } else { + TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); + PIPE_UNLOCK((struct pipe *)fp->f_data); + } + FREE(evq, M_TEMP); + + proc_fdlock(p); + + return(0); +} + diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c new file mode 100644 index 000000000..2fb396aa0 --- /dev/null +++ b/bsd/kern/sys_pipe.c @@ -0,0 +1,1646 @@ +/* + * Copyright (c) 1996 John S. Dyson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice immediately at the beginning of the file, without modification, + * this list of conditions, and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Absolutely no warranty of function or purpose is made by the author + * John S. Dyson. + * 4. Modifications may be freely made to this file if the above conditions + * are met. + */ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * This file contains a high-performance replacement for the socket-based + * pipes scheme originally used in FreeBSD/4.4Lite. It does not support + * all features of sockets, but does do everything that pipes normally + * do. + */ + +/* + * This code has two modes of operation, a small write mode and a large + * write mode. The small write mode acts like conventional pipes with + * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the + * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT + * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and + * the receiving process can copy it directly from the pages in the sending + * process. + * + * If the sending process receives a signal, it is possible that it will + * go away, and certainly its address space can change, because control + * is returned back to the user-mode side. In that case, the pipe code + * arranges to copy the buffer supplied by the user process, to a pageable + * kernel buffer, and the receiving process will grab the data from the + * pageable kernel buffer. Since signals don't happen all that often, + * the copy operation is normally eliminated. + * + * The constant PIPE_MINDIRECT is chosen to make sure that buffering will + * happen for small transfers so that the system will not spend all of + * its time context switching. + * + * In order to limit the resource use of pipes, two sysctls exist: + * + * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable + * address space available to us in pipe_map. Whenever the amount in use + * exceeds half of this value, all new pipes will be created with size + * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited + * as well. This value is loader tunable only. + * + * kern.ipc.maxpipekvawired - This value limits the amount of memory that may + * be wired in order to facilitate direct copies using page flipping. + * Whenever this value is exceeded, pipes will fall back to using regular + * copies. This value is sysctl controllable at all times. + * + * These values are autotuned in subr_param.c. + * + * Memory usage may be monitored through the sysctls + * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data +/* + * Use this define if you want to disable *fancy* VM things. Expect an + * approx 30% decrease in transfer rate. This could be useful for + * NetBSD or OpenBSD. + * + * this needs to be ported to X and the performance measured + * before committing to supporting it + */ +#define PIPE_NODIRECT 1 + +#ifndef PIPE_NODIRECT + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif + + +/* + * interfaces to the outside world + */ +static int pipe_read(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); + +static int pipe_write(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); + +static int pipe_close(struct fileglob *fg, struct proc *p); + +static int pipe_select(struct fileproc *fp, int which, void * wql, struct proc *p); + +static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); + +static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p); + + +struct fileops pipeops = + { pipe_read, + pipe_write, + pipe_ioctl, + pipe_select, + pipe_close, + pipe_kqfilter, + 0 }; + + +static void filt_pipedetach(struct knote *kn); +static int filt_piperead(struct knote *kn, long hint); +static int filt_pipewrite(struct knote *kn, long hint); + +static struct filterops pipe_rfiltops = + { 1, NULL, filt_pipedetach, filt_piperead }; +static struct filterops pipe_wfiltops = + { 1, NULL, filt_pipedetach, filt_pipewrite }; + +/* + * Default pipe buffer size(s), this can be kind-of large now because pipe + * space is pageable. The pipe code will try to maintain locality of + * reference for performance reasons, so small amounts of outstanding I/O + * will not wipe the cache. + */ +#define MINPIPESIZE (PIPE_SIZE/3) + +/* + * Limit the number of "big" pipes + */ +#define LIMITBIGPIPES 32 +static int nbigpipe; + +static int amountpipes; +static int amountpipekva; + +#ifndef PIPE_NODIRECT +static int amountpipekvawired; +#endif +int maxpipekva = 1024 * 1024 * 16; + +#if PIPE_SYSCTLS +SYSCTL_DECL(_kern_ipc); + +SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, + &maxpipekva, 0, "Pipe KVA limit"); +SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, + &maxpipekvawired, 0, "Pipe KVA wired limit"); +SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, + &amountpipes, 0, "Current # of pipes"); +SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, + &nbigpipe, 0, "Current # of big pipes"); +SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, + &amountpipekva, 0, "Pipe KVA usage"); +SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, + &amountpipekvawired, 0, "Pipe wired KVA usage"); +#endif + +void pipeinit(void *dummy __unused); +static void pipeclose(struct pipe *cpipe); +static void pipe_free_kmem(struct pipe *cpipe); +static int pipe_create(struct pipe **cpipep); +static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe); +static __inline int pipelock(struct pipe *cpipe, int catch); +static __inline void pipeunlock(struct pipe *cpipe); + +#ifndef PIPE_NODIRECT +static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); +static void pipe_destroy_write_buffer(struct pipe *wpipe); +static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); +static void pipe_clone_write_buffer(struct pipe *wpipe); +#endif + +extern int postpipeevent(struct pipe *, int); +extern void evpipefree(struct pipe *cpipe); + + +static int pipespace(struct pipe *cpipe, int size); + +static lck_grp_t *pipe_mtx_grp; +static lck_attr_t *pipe_mtx_attr; +static lck_grp_attr_t *pipe_mtx_grp_attr; + +static zone_t pipe_zone; + +SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); + +void +pipeinit(void *dummy __unused) +{ + pipe_zone = (zone_t)zinit(sizeof(struct pipe), 8192 * sizeof(struct pipe), 4096, "pipe zone"); + + /* + * allocate lock group attribute and group for pipe mutexes + */ + pipe_mtx_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(pipe_mtx_grp_attr); + pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr); + + /* + * allocate the lock attribute for pipe mutexes + */ + pipe_mtx_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(pipe_mtx_attr); +} + + + +/* + * The pipe system call for the DTYPE_PIPE type of pipes + */ + +/* ARGSUSED */ +int +pipe(struct proc *p, __unused struct pipe_args *uap, register_t *retval) +{ + struct fileproc *rf, *wf; + struct pipe *rpipe, *wpipe; + lck_mtx_t *pmtx; + int fd, error; + + if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL) + return (ENOMEM); + + rpipe = wpipe = NULL; + if (pipe_create(&rpipe) || pipe_create(&wpipe)) { + error = ENFILE; + goto freepipes; + } + /* + * allocate the space for the normal I/O direction up + * front... we'll delay the allocation for the other + * direction until a write actually occurs (most + * likely it won't)... + * + * Reduce to 1/4th pipe size if we're over our global max. + */ + if (amountpipekva > maxpipekva / 2) + error = pipespace(rpipe, SMALL_PIPE_SIZE); + else + error = pipespace(rpipe, PIPE_SIZE); + if (error) + goto freepipes; + +#ifndef PIPE_NODIRECT + rpipe->pipe_state |= PIPE_DIRECTOK; + wpipe->pipe_state |= PIPE_DIRECTOK; +#endif + TAILQ_INIT(&rpipe->pipe_evlist); + TAILQ_INIT(&wpipe->pipe_evlist); + + error = falloc(p, &rf, &fd); + if (error) { + goto freepipes; + } + retval[0] = fd; + + /* + * for now we'll create half-duplex + * pipes... this is what we've always + * supported.. + */ + rf->f_flag = FREAD; + rf->f_type = DTYPE_PIPE; + rf->f_data = (caddr_t)rpipe; + rf->f_ops = &pipeops; + + error = falloc(p, &wf, &fd); + if (error) { + fp_free(p, retval[0], rf); + goto freepipes; + } + wf->f_flag = FWRITE; + wf->f_type = DTYPE_PIPE; + wf->f_data = (caddr_t)wpipe; + wf->f_ops = &pipeops; + + retval[1] = fd; +#ifdef MAC + /* + * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX + * + * struct pipe represents a pipe endpoint. The MAC label is shared + * between the connected endpoints. As a result mac_init_pipe() and + * mac_create_pipe() should only be called on one of the endpoints + * after they have been connected. + */ + mac_init_pipe(rpipe); + mac_create_pipe(td->td_ucred, rpipe); +#endif + proc_fdlock(p); + *fdflags(p, retval[0]) &= ~UF_RESERVED; + *fdflags(p, retval[1]) &= ~UF_RESERVED; + fp_drop(p, retval[0], rf, 1); + fp_drop(p, retval[1], wf, 1); + proc_fdunlock(p); + + rpipe->pipe_peer = wpipe; + wpipe->pipe_peer = rpipe; + + rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; + + return (0); + +freepipes: + pipeclose(rpipe); + pipeclose(wpipe); + lck_mtx_free(pmtx, pipe_mtx_grp); + + return (error); +} + + +int +pipe_stat(struct pipe *cpipe, struct stat *ub) +{ +#ifdef MAC + int error; +#endif + struct timeval now; + + if (cpipe == NULL) + return (EBADF); +#ifdef MAC + PIPE_LOCK(cpipe); + error = mac_check_pipe_stat(active_cred, cpipe); + PIPE_UNLOCK(cpipe); + if (error) + return (error); +#endif + if (cpipe->pipe_buffer.buffer == 0) { + /* + * must be stat'ing the write fd + */ + cpipe = cpipe->pipe_peer; + + if (cpipe == NULL) + return (EBADF); + } + bzero(ub, sizeof(*ub)); + ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; + ub->st_blksize = cpipe->pipe_buffer.size; + ub->st_size = cpipe->pipe_buffer.cnt; + ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; + ub->st_nlink = 1; + + ub->st_uid = kauth_getuid(); + ub->st_gid = kauth_getgid(); + + microtime(&now); + ub->st_atimespec.tv_sec = now.tv_sec; + ub->st_atimespec.tv_nsec = now.tv_usec * 1000; + + ub->st_mtimespec.tv_sec = now.tv_sec; + ub->st_mtimespec.tv_nsec = now.tv_usec * 1000; + + ub->st_ctimespec.tv_sec = now.tv_sec; + ub->st_ctimespec.tv_nsec = now.tv_usec * 1000; + + /* + * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. + * XXX (st_dev, st_ino) should be unique. + */ + return (0); +} + + +/* + * Allocate kva for pipe circular buffer, the space is pageable + * This routine will 'realloc' the size of a pipe safely, if it fails + * it will retain the old buffer. + * If it fails it will return ENOMEM. + */ +static int +pipespace(struct pipe *cpipe, int size) +{ + vm_offset_t buffer; + + size = round_page(size); + + if (kmem_alloc(kernel_map, &buffer, size) != KERN_SUCCESS) + return(ENOMEM); + + /* free old resources if we're resizing */ + pipe_free_kmem(cpipe); + cpipe->pipe_buffer.buffer = (caddr_t)buffer; + cpipe->pipe_buffer.size = size; + cpipe->pipe_buffer.in = 0; + cpipe->pipe_buffer.out = 0; + cpipe->pipe_buffer.cnt = 0; + + OSAddAtomic(1, (SInt32 *)&amountpipes); + OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva); + + return (0); +} + +/* + * initialize and allocate VM and memory for pipe + */ +static int +pipe_create(struct pipe **cpipep) +{ + struct pipe *cpipe; + + cpipe = (struct pipe *)zalloc(pipe_zone); + + if ((*cpipep = cpipe) == NULL) + return (ENOMEM); + + /* + * protect so pipespace or pipeclose don't follow a junk pointer + * if pipespace() fails. + */ + bzero(cpipe, sizeof *cpipe); + + return (0); +} + + +/* + * lock a pipe for I/O, blocking other access + */ +static __inline int +pipelock(cpipe, catch) + struct pipe *cpipe; + int catch; +{ + int error; + + while (cpipe->pipe_state & PIPE_LOCKFL) { + cpipe->pipe_state |= PIPE_LWANT; + + error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO, + "pipelk", 0); + if (error != 0) + return (error); + } + cpipe->pipe_state |= PIPE_LOCKFL; + + return (0); +} + +/* + * unlock a pipe I/O lock + */ +static __inline void +pipeunlock(cpipe) + struct pipe *cpipe; +{ + + cpipe->pipe_state &= ~PIPE_LOCKFL; + + if (cpipe->pipe_state & PIPE_LWANT) { + cpipe->pipe_state &= ~PIPE_LWANT; + wakeup(cpipe); + } +} + +static void +pipeselwakeup(cpipe, spipe) + struct pipe *cpipe; + struct pipe *spipe; +{ + + if (cpipe->pipe_state & PIPE_SEL) { + cpipe->pipe_state &= ~PIPE_SEL; + selwakeup(&cpipe->pipe_sel); + } + if (cpipe->pipe_state & PIPE_KNOTE) + KNOTE(&cpipe->pipe_sel.si_note, 1); + + postpipeevent(cpipe, EV_RWBYTES); + + if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) { + struct proc *p; + + if (spipe->pipe_pgid < 0) + gsignal(-spipe->pipe_pgid, SIGIO); + else if ((p = pfind(spipe->pipe_pgid)) != (struct proc *)0) + psignal(p, SIGIO); + } +} + +/* ARGSUSED */ +static int +pipe_read(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p) +{ + struct pipe *rpipe = (struct pipe *)fp->f_data; + int error; + int nread = 0; + u_int size; + + PIPE_LOCK(rpipe); + ++rpipe->pipe_busy; + + error = pipelock(rpipe, 1); + if (error) + goto unlocked_error; + +#ifdef MAC + error = mac_check_pipe_read(active_cred, rpipe); + if (error) + goto locked_error; +#endif + + while (uio_resid(uio)) { + /* + * normal pipe buffer receive + */ + if (rpipe->pipe_buffer.cnt > 0) { + size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; + if (size > rpipe->pipe_buffer.cnt) + size = rpipe->pipe_buffer.cnt; + // LP64todo - fix this! + if (size > (u_int) uio_resid(uio)) + size = (u_int) uio_resid(uio); + + PIPE_UNLOCK(rpipe); + error = uiomove( + &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], + size, uio); + PIPE_LOCK(rpipe); + if (error) + break; + + rpipe->pipe_buffer.out += size; + if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) + rpipe->pipe_buffer.out = 0; + + rpipe->pipe_buffer.cnt -= size; + + /* + * If there is no more to read in the pipe, reset + * its pointers to the beginning. This improves + * cache hit stats. + */ + if (rpipe->pipe_buffer.cnt == 0) { + rpipe->pipe_buffer.in = 0; + rpipe->pipe_buffer.out = 0; + } + nread += size; +#ifndef PIPE_NODIRECT + /* + * Direct copy, bypassing a kernel buffer. + */ + } else if ((size = rpipe->pipe_map.cnt) && + (rpipe->pipe_state & PIPE_DIRECTW)) { + caddr_t va; + // LP64todo - fix this! + if (size > (u_int) uio_resid(uio)) + size = (u_int) uio_resid(uio); + + va = (caddr_t) rpipe->pipe_map.kva + + rpipe->pipe_map.pos; + PIPE_UNLOCK(rpipe); + error = uiomove(va, size, uio); + PIPE_LOCK(rpipe); + if (error) + break; + nread += size; + rpipe->pipe_map.pos += size; + rpipe->pipe_map.cnt -= size; + if (rpipe->pipe_map.cnt == 0) { + rpipe->pipe_state &= ~PIPE_DIRECTW; + wakeup(rpipe); + } +#endif + } else { + /* + * detect EOF condition + * read returns 0 on EOF, no need to set error + */ + if (rpipe->pipe_state & PIPE_EOF) + break; + + /* + * If the "write-side" has been blocked, wake it up now. + */ + if (rpipe->pipe_state & PIPE_WANTW) { + rpipe->pipe_state &= ~PIPE_WANTW; + wakeup(rpipe); + } + + /* + * Break if some data was read. + */ + if (nread > 0) + break; + + /* + * Unlock the pipe buffer for our remaining processing. + * We will either break out with an error or we will + * sleep and relock to loop. + */ + pipeunlock(rpipe); + + /* + * Handle non-blocking mode operation or + * wait for more data. + */ + if (fp->f_flag & FNONBLOCK) { + error = EAGAIN; + } else { + rpipe->pipe_state |= PIPE_WANTR; + + error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0); + + if (error == 0) + error = pipelock(rpipe, 1); + } + if (error) + goto unlocked_error; + } + } +#ifdef MAC +locked_error: +#endif + pipeunlock(rpipe); + +unlocked_error: + --rpipe->pipe_busy; + + /* + * PIPE_WANT processing only makes sense if pipe_busy is 0. + */ + if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { + rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); + wakeup(rpipe); + } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { + /* + * Handle write blocking hysteresis. + */ + if (rpipe->pipe_state & PIPE_WANTW) { + rpipe->pipe_state &= ~PIPE_WANTW; + wakeup(rpipe); + } + } + + if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) + pipeselwakeup(rpipe, rpipe->pipe_peer); + + PIPE_UNLOCK(rpipe); + + return (error); +} + + + +#ifndef PIPE_NODIRECT +/* + * Map the sending processes' buffer into kernel space and wire it. + * This is similar to a physical write operation. + */ +static int +pipe_build_write_buffer(wpipe, uio) + struct pipe *wpipe; + struct uio *uio; +{ + pmap_t pmap; + u_int size; + int i, j; + vm_offset_t addr, endaddr; + + + size = (u_int) uio->uio_iov->iov_len; + if (size > wpipe->pipe_buffer.size) + size = wpipe->pipe_buffer.size; + + pmap = vmspace_pmap(curproc->p_vmspace); + endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); + addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); + for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { + /* + * vm_fault_quick() can sleep. Consequently, + * vm_page_lock_queue() and vm_page_unlock_queue() + * should not be performed outside of this loop. + */ + race: + if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { + vm_page_lock_queues(); + for (j = 0; j < i; j++) + vm_page_unhold(wpipe->pipe_map.ms[j]); + vm_page_unlock_queues(); + return (EFAULT); + } + wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, + VM_PROT_READ); + if (wpipe->pipe_map.ms[i] == NULL) + goto race; + } + +/* + * set up the control block + */ + wpipe->pipe_map.npages = i; + wpipe->pipe_map.pos = + ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; + wpipe->pipe_map.cnt = size; + +/* + * and map the buffer + */ + if (wpipe->pipe_map.kva == 0) { + /* + * We need to allocate space for an extra page because the + * address range might (will) span pages at times. + */ + wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, + wpipe->pipe_buffer.size + PAGE_SIZE); + atomic_add_int(&amountpipekvawired, + wpipe->pipe_buffer.size + PAGE_SIZE); + } + pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, + wpipe->pipe_map.npages); + +/* + * and update the uio data + */ + + uio->uio_iov->iov_len -= size; + uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; + if (uio->uio_iov->iov_len == 0) + uio->uio_iov++; + uio_setresid(uio, (uio_resid(uio) - size)); + uio->uio_offset += size; + return (0); +} + +/* + * unmap and unwire the process buffer + */ +static void +pipe_destroy_write_buffer(wpipe) + struct pipe *wpipe; +{ + int i; + + if (wpipe->pipe_map.kva) { + pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); + + if (amountpipekvawired > maxpipekvawired / 2) { + /* Conserve address space */ + vm_offset_t kva = wpipe->pipe_map.kva; + wpipe->pipe_map.kva = 0; + kmem_free(kernel_map, kva, + wpipe->pipe_buffer.size + PAGE_SIZE); + atomic_subtract_int(&amountpipekvawired, + wpipe->pipe_buffer.size + PAGE_SIZE); + } + } + vm_page_lock_queues(); + for (i = 0; i < wpipe->pipe_map.npages; i++) { + vm_page_unhold(wpipe->pipe_map.ms[i]); + } + vm_page_unlock_queues(); + wpipe->pipe_map.npages = 0; +} + +/* + * In the case of a signal, the writing process might go away. This + * code copies the data into the circular buffer so that the source + * pages can be freed without loss of data. + */ +static void +pipe_clone_write_buffer(wpipe) + struct pipe *wpipe; +{ + int size; + int pos; + + size = wpipe->pipe_map.cnt; + pos = wpipe->pipe_map.pos; + + wpipe->pipe_buffer.in = size; + wpipe->pipe_buffer.out = 0; + wpipe->pipe_buffer.cnt = size; + wpipe->pipe_state &= ~PIPE_DIRECTW; + + PIPE_UNLOCK(wpipe); + bcopy((caddr_t) wpipe->pipe_map.kva + pos, + wpipe->pipe_buffer.buffer, size); + pipe_destroy_write_buffer(wpipe); + PIPE_LOCK(wpipe); +} + +/* + * This implements the pipe buffer write mechanism. Note that only + * a direct write OR a normal pipe write can be pending at any given time. + * If there are any characters in the pipe buffer, the direct write will + * be deferred until the receiving process grabs all of the bytes from + * the pipe buffer. Then the direct mapping write is set-up. + */ +static int +pipe_direct_write(wpipe, uio) + struct pipe *wpipe; + struct uio *uio; +{ + int error; + +retry: + while (wpipe->pipe_state & PIPE_DIRECTW) { + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + wpipe->pipe_state |= PIPE_WANTW; + error = msleep(wpipe, PIPE_MTX(wpipe), + PRIBIO | PCATCH, "pipdww", 0); + if (error) + goto error1; + if (wpipe->pipe_state & PIPE_EOF) { + error = EPIPE; + goto error1; + } + } + wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ + if (wpipe->pipe_buffer.cnt > 0) { + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + + wpipe->pipe_state |= PIPE_WANTW; + error = msleep(wpipe, PIPE_MTX(wpipe), + PRIBIO | PCATCH, "pipdwc", 0); + if (error) + goto error1; + if (wpipe->pipe_state & PIPE_EOF) { + error = EPIPE; + goto error1; + } + goto retry; + } + + wpipe->pipe_state |= PIPE_DIRECTW; + + pipelock(wpipe, 0); + PIPE_UNLOCK(wpipe); + error = pipe_build_write_buffer(wpipe, uio); + PIPE_LOCK(wpipe); + pipeunlock(wpipe); + if (error) { + wpipe->pipe_state &= ~PIPE_DIRECTW; + goto error1; + } + + error = 0; + while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { + if (wpipe->pipe_state & PIPE_EOF) { + pipelock(wpipe, 0); + PIPE_UNLOCK(wpipe); + pipe_destroy_write_buffer(wpipe); + PIPE_LOCK(wpipe); + pipeselwakeup(wpipe, wpipe); + pipeunlock(wpipe); + error = EPIPE; + goto error1; + } + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + pipeselwakeup(wpipe, wpipe); + error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, + "pipdwt", 0); + } + + pipelock(wpipe,0); + if (wpipe->pipe_state & PIPE_DIRECTW) { + /* + * this bit of trickery substitutes a kernel buffer for + * the process that might be going away. + */ + pipe_clone_write_buffer(wpipe); + } else { + PIPE_UNLOCK(wpipe); + pipe_destroy_write_buffer(wpipe); + PIPE_LOCK(wpipe); + } + pipeunlock(wpipe); + return (error); + +error1: + wakeup(wpipe); + return (error); +} +#endif + + + +static int +pipe_write(struct fileproc *fp, struct uio *uio, __unused kauth_cred_t active_cred, __unused int flags, __unused struct proc *p) +{ + int error = 0; + int orig_resid; + int pipe_size; + struct pipe *wpipe, *rpipe; + + rpipe = (struct pipe *)fp->f_data; + + PIPE_LOCK(rpipe); + wpipe = rpipe->pipe_peer; + + /* + * detect loss of pipe read side, issue SIGPIPE if lost. + */ + if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) { + PIPE_UNLOCK(rpipe); + return (EPIPE); + } +#ifdef MAC + error = mac_check_pipe_write(active_cred, wpipe); + if (error) { + PIPE_UNLOCK(rpipe); + return (error); + } +#endif + ++wpipe->pipe_busy; + + pipe_size = 0; + + if (wpipe->pipe_buffer.buffer == 0) { + /* + * need to allocate some storage... we delay the allocation + * until the first write on fd[0] to avoid allocating storage for both + * 'pipe ends'... most pipes are half-duplex with the writes targeting + * fd[1], so allocating space for both ends is a waste... + * + * Reduce to 1/4th pipe size if we're over our global max. + */ + if (amountpipekva > maxpipekva / 2) + pipe_size = SMALL_PIPE_SIZE; + else + pipe_size = PIPE_SIZE; + } + + /* + * If it is advantageous to resize the pipe buffer, do + * so. + */ + if ((uio_resid(uio) > PIPE_SIZE) && + (wpipe->pipe_buffer.size <= PIPE_SIZE) && + (amountpipekva < maxpipekva / 2) && + (nbigpipe < LIMITBIGPIPES) && +#ifndef PIPE_NODIRECT + (wpipe->pipe_state & PIPE_DIRECTW) == 0 && +#endif + (wpipe->pipe_buffer.cnt == 0)) { + + pipe_size = BIG_PIPE_SIZE; + + } + if (pipe_size) { + /* + * need to do initial allocation or resizing of pipe + */ + if ((error = pipelock(wpipe, 1)) == 0) { + PIPE_UNLOCK(wpipe); + if (pipespace(wpipe, pipe_size) == 0) + OSAddAtomic(1, (SInt32 *)&nbigpipe); + PIPE_LOCK(wpipe); + pipeunlock(wpipe); + + if (wpipe->pipe_buffer.buffer == 0) { + /* + * initial allocation failed + */ + error = ENOMEM; + } + } + if (error) { + /* + * If an error occurred unbusy and return, waking up any pending + * readers. + */ + --wpipe->pipe_busy; + if ((wpipe->pipe_busy == 0) && + (wpipe->pipe_state & PIPE_WANT)) { + wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); + wakeup(wpipe); + } + PIPE_UNLOCK(rpipe); + return(error); + } + } + // LP64todo - fix this! + orig_resid = uio_resid(uio); + + while (uio_resid(uio)) { + int space; + +#ifndef PIPE_NODIRECT + /* + * If the transfer is large, we can gain performance if + * we do process-to-process copies directly. + * If the write is non-blocking, we don't use the + * direct write mechanism. + * + * The direct write mechanism will detect the reader going + * away on us. + */ + if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && + (fp->f_flag & FNONBLOCK) == 0 && + amountpipekvawired + uio->uio_resid < maxpipekvawired) { + error = pipe_direct_write(wpipe, uio); + if (error) + break; + continue; + } + + /* + * Pipe buffered writes cannot be coincidental with + * direct writes. We wait until the currently executing + * direct write is completed before we start filling the + * pipe buffer. We break out if a signal occurs or the + * reader goes away. + */ + retrywrite: + while (wpipe->pipe_state & PIPE_DIRECTW) { + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0); + + if (wpipe->pipe_state & PIPE_EOF) + break; + if (error) + break; + } +#else + retrywrite: +#endif + space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; + + /* + * Writes of size <= PIPE_BUF must be atomic. + */ + if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF)) + space = 0; + + if (space > 0) { + + if ((error = pipelock(wpipe,1)) == 0) { + int size; /* Transfer size */ + int segsize; /* first segment to transfer */ + + if (wpipe->pipe_state & PIPE_EOF) { + pipeunlock(wpipe); + error = EPIPE; + break; + } +#ifndef PIPE_NODIRECT + /* + * It is possible for a direct write to + * slip in on us... handle it here... + */ + if (wpipe->pipe_state & PIPE_DIRECTW) { + pipeunlock(wpipe); + goto retrywrite; + } +#endif + /* + * If a process blocked in pipelock, our + * value for space might be bad... the mutex + * is dropped while we're blocked + */ + if (space > (int)(wpipe->pipe_buffer.size - + wpipe->pipe_buffer.cnt)) { + pipeunlock(wpipe); + goto retrywrite; + } + + /* + * Transfer size is minimum of uio transfer + * and free space in pipe buffer. + */ + // LP64todo - fix this! + if (space > uio_resid(uio)) + size = uio_resid(uio); + else + size = space; + /* + * First segment to transfer is minimum of + * transfer size and contiguous space in + * pipe buffer. If first segment to transfer + * is less than the transfer size, we've got + * a wraparound in the buffer. + */ + segsize = wpipe->pipe_buffer.size - + wpipe->pipe_buffer.in; + if (segsize > size) + segsize = size; + + /* Transfer first segment */ + + PIPE_UNLOCK(rpipe); + error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], + segsize, uio); + PIPE_LOCK(rpipe); + + if (error == 0 && segsize < size) { + /* + * Transfer remaining part now, to + * support atomic writes. Wraparound + * happened. + */ + if (wpipe->pipe_buffer.in + segsize != + wpipe->pipe_buffer.size) + panic("Expected pipe buffer " + "wraparound disappeared"); + + PIPE_UNLOCK(rpipe); + error = uiomove( + &wpipe->pipe_buffer.buffer[0], + size - segsize, uio); + PIPE_LOCK(rpipe); + } + if (error == 0) { + wpipe->pipe_buffer.in += size; + if (wpipe->pipe_buffer.in >= + wpipe->pipe_buffer.size) { + if (wpipe->pipe_buffer.in != + size - segsize + + wpipe->pipe_buffer.size) + panic("Expected " + "wraparound bad"); + wpipe->pipe_buffer.in = size - + segsize; + } + + wpipe->pipe_buffer.cnt += size; + if (wpipe->pipe_buffer.cnt > + wpipe->pipe_buffer.size) + panic("Pipe buffer overflow"); + + } + pipeunlock(wpipe); + } + if (error) + break; + + } else { + /* + * If the "read-side" has been blocked, wake it up now. + */ + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + /* + * don't block on non-blocking I/O + * we'll do the pipeselwakeup on the way out + */ + if (fp->f_flag & FNONBLOCK) { + error = EAGAIN; + break; + } + /* + * We have no more space and have something to offer, + * wake up select/poll. + */ + pipeselwakeup(wpipe, wpipe); + + wpipe->pipe_state |= PIPE_WANTW; + + error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0); + + if (error != 0) + break; + /* + * If read side wants to go away, we just issue a signal + * to ourselves. + */ + if (wpipe->pipe_state & PIPE_EOF) { + error = EPIPE; + break; + } + } + } + --wpipe->pipe_busy; + + if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { + wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); + wakeup(wpipe); + } + if (wpipe->pipe_buffer.cnt > 0) { + /* + * If there are any characters in the buffer, we wake up + * the reader if it was blocked waiting for data. + */ + if (wpipe->pipe_state & PIPE_WANTR) { + wpipe->pipe_state &= ~PIPE_WANTR; + wakeup(wpipe); + } + /* + * wake up thread blocked in select/poll or post the notification + */ + pipeselwakeup(wpipe, wpipe); + } + PIPE_UNLOCK(rpipe); + + return (error); +} + +/* + * we implement a very minimal set of ioctls for compatibility with sockets. + */ +/* ARGSUSED 3 */ +static int +pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, __unused struct proc *p) +{ + struct pipe *mpipe = (struct pipe *)fp->f_data; +#ifdef MAC + int error; +#endif + + PIPE_LOCK(mpipe); + +#ifdef MAC + error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); + if (error) { + PIPE_UNLOCK(mpipe); + + return (error); + } +#endif + + switch (cmd) { + + case FIONBIO: + PIPE_UNLOCK(mpipe); + return (0); + + case FIOASYNC: + if (*(int *)data) { + mpipe->pipe_state |= PIPE_ASYNC; + } else { + mpipe->pipe_state &= ~PIPE_ASYNC; + } + PIPE_UNLOCK(mpipe); + return (0); + + case FIONREAD: +#ifndef PIPE_NODIRECT + if (mpipe->pipe_state & PIPE_DIRECTW) + *(int *)data = mpipe->pipe_map.cnt; + else +#endif + *(int *)data = mpipe->pipe_buffer.cnt; + PIPE_UNLOCK(mpipe); + return (0); + + case TIOCSPGRP: + mpipe->pipe_pgid = *(int *)data; + + PIPE_UNLOCK(mpipe); + return (0); + + case TIOCGPGRP: + *(int *)data = mpipe->pipe_pgid; + + PIPE_UNLOCK(mpipe); + return (0); + + } + PIPE_UNLOCK(mpipe); + return (ENOTTY); +} + + +static int +pipe_select(struct fileproc *fp, int which, void *wql, struct proc *p) +{ + struct pipe *rpipe = (struct pipe *)fp->f_data; + struct pipe *wpipe; + int retnum = 0; + + if (rpipe == NULL || rpipe == (struct pipe *)-1) + return (retnum); + + PIPE_LOCK(rpipe); + + wpipe = rpipe->pipe_peer; + + switch (which) { + + case FREAD: + if ((rpipe->pipe_state & PIPE_DIRECTW) || + (rpipe->pipe_buffer.cnt > 0) || + (rpipe->pipe_state & PIPE_EOF)) { + + retnum = 1; + } else { + rpipe->pipe_state |= PIPE_SEL; + selrecord(p, &rpipe->pipe_sel, wql); + } + break; + + case FWRITE: + if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || + (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && + (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { + + retnum = 1; + } else { + wpipe->pipe_state |= PIPE_SEL; + selrecord(p, &wpipe->pipe_sel, wql); + } + break; + case 0: + rpipe->pipe_state |= PIPE_SEL; + selrecord(p, &rpipe->pipe_sel, wql); + break; + } + PIPE_UNLOCK(rpipe); + + return (retnum); +} + + +/* ARGSUSED 1 */ +static int +pipe_close(struct fileglob *fg, __unused struct proc *p) +{ + struct pipe *cpipe; + + proc_fdlock(p); + cpipe = (struct pipe *)fg->fg_data; + fg->fg_data = NULL; + proc_fdunlock(p); + + if (cpipe) + pipeclose(cpipe); + + return (0); +} + +static void +pipe_free_kmem(struct pipe *cpipe) +{ + + if (cpipe->pipe_buffer.buffer != NULL) { + if (cpipe->pipe_buffer.size > PIPE_SIZE) + OSAddAtomic(-1, (SInt32 *)&nbigpipe); + OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva); + OSAddAtomic(-1, (SInt32 *)&amountpipes); + + kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer, + cpipe->pipe_buffer.size); + cpipe->pipe_buffer.buffer = NULL; + } +#ifndef PIPE_NODIRECT + if (cpipe->pipe_map.kva != 0) { + atomic_subtract_int(&amountpipekvawired, + cpipe->pipe_buffer.size + PAGE_SIZE); + kmem_free(kernel_map, + cpipe->pipe_map.kva, + cpipe->pipe_buffer.size + PAGE_SIZE); + cpipe->pipe_map.cnt = 0; + cpipe->pipe_map.kva = 0; + cpipe->pipe_map.pos = 0; + cpipe->pipe_map.npages = 0; + } +#endif +} + +/* + * shutdown the pipe + */ +static void +pipeclose(struct pipe *cpipe) +{ + struct pipe *ppipe; + + if (cpipe == NULL) + return; + + /* partially created pipes won't have a valid mutex. */ + if (PIPE_MTX(cpipe) != NULL) + PIPE_LOCK(cpipe); + + pipeselwakeup(cpipe, cpipe); + + /* + * If the other side is blocked, wake it up saying that + * we want to close it down. + */ + while (cpipe->pipe_busy) { + cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; + + wakeup(cpipe); + + msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); + } + +#ifdef MAC + if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) + mac_destroy_pipe(cpipe); +#endif + + /* + * Disconnect from peer + */ + if ((ppipe = cpipe->pipe_peer) != NULL) { + + ppipe->pipe_state |= PIPE_EOF; + + pipeselwakeup(ppipe, ppipe); + wakeup(ppipe); + + if (cpipe->pipe_state & PIPE_KNOTE) + KNOTE(&ppipe->pipe_sel.si_note, 1); + + postpipeevent(ppipe, EV_RCLOSED); + + ppipe->pipe_peer = NULL; + } + evpipefree(cpipe); + + /* + * free resources + */ + if (PIPE_MTX(cpipe) != NULL) { + if (ppipe != NULL) { + /* + * since the mutex is shared and the peer is still + * alive, we need to release the mutex, not free it + */ + PIPE_UNLOCK(cpipe); + } else { + /* + * peer is gone, so we're the sole party left with + * interest in this mutex... we can just free it + */ + lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp); + } + } + pipe_free_kmem(cpipe); + + zfree(pipe_zone, cpipe); +} + + +/*ARGSUSED*/ +static int +pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) +{ + struct pipe *cpipe; + + cpipe = (struct pipe *)kn->kn_fp->f_data; + + PIPE_LOCK(cpipe); + + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &pipe_rfiltops; + break; + case EVFILT_WRITE: + kn->kn_fop = &pipe_wfiltops; + + if (cpipe->pipe_peer == NULL) { + /* + * other end of pipe has been closed + */ + PIPE_UNLOCK(cpipe); + return (EPIPE); + } + cpipe = cpipe->pipe_peer; + break; + default: + PIPE_UNLOCK(cpipe); + return (1); + } + + if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn)) + cpipe->pipe_state |= PIPE_KNOTE; + + PIPE_UNLOCK(cpipe); + return (0); +} + +static void +filt_pipedetach(struct knote *kn) +{ + struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; + + PIPE_LOCK(cpipe); + + if (kn->kn_filter == EVFILT_WRITE) { + if (cpipe->pipe_peer == NULL) { + PIPE_UNLOCK(cpipe); + return; + } + cpipe = cpipe->pipe_peer; + } + if (cpipe->pipe_state & PIPE_KNOTE) { + if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn)) + cpipe->pipe_state &= ~PIPE_KNOTE; + } + PIPE_UNLOCK(cpipe); +} + +/*ARGSUSED*/ +static int +filt_piperead(struct knote *kn, long hint) +{ + struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; + struct pipe *wpipe; + int retval; + + /* + * if hint == 0, then we've been called from the kevent + * world directly and do not currently hold the pipe mutex... + * if hint == 1, we're being called back via the KNOTE post + * we made in pipeselwakeup, and we already hold the mutex... + */ + if (hint == 0) + PIPE_LOCK(rpipe); + + wpipe = rpipe->pipe_peer; + kn->kn_data = rpipe->pipe_buffer.cnt; + +#ifndef PIPE_NODIRECT + if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) + kn->kn_data = rpipe->pipe_map.cnt; +#endif + if ((rpipe->pipe_state & PIPE_EOF) || + (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + kn->kn_flags |= EV_EOF; + retval = 1; + } else + retval = (kn->kn_sfflags & NOTE_LOWAT) ? + (kn->kn_data >= kn->kn_sdata) : (kn->kn_data > 0); + + if (hint == 0) + PIPE_UNLOCK(rpipe); + + return (retval); +} + +/*ARGSUSED*/ +static int +filt_pipewrite(struct knote *kn, long hint) +{ + struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; + struct pipe *wpipe; + + /* + * if hint == 0, then we've been called from the kevent + * world directly and do not currently hold the pipe mutex... + * if hint == 1, we're being called back via the KNOTE post + * we made in pipeselwakeup, and we already hold the mutex... + */ + if (hint == 0) + PIPE_LOCK(rpipe); + + wpipe = rpipe->pipe_peer; + + if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + kn->kn_data = 0; + kn->kn_flags |= EV_EOF; + + if (hint == 0) + PIPE_UNLOCK(rpipe); + return (1); + } + kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; + +#ifndef PIPE_NODIRECT + if (wpipe->pipe_state & PIPE_DIRECTW) + kn->kn_data = 0; +#endif + if (hint == 0) + PIPE_UNLOCK(rpipe); + + return (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? + kn->kn_sdata : PIPE_BUF)); +} diff --git a/bsd/kern/sys_socket.c b/bsd/kern/sys_socket.c index 973f4870f..1e7b7d3c0 100644 --- a/bsd/kern/sys_socket.c +++ b/bsd/kern/sys_socket.c @@ -56,7 +56,7 @@ #include #include -#include +#include #include #include #include @@ -66,141 +66,106 @@ #include #include #include +#include +#include #include #include -int soo_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -int soo_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -int soo_close __P((struct file *fp, struct proc *p)); - -int soo_select __P((struct file *fp, int which, void * wql, struct proc *p)); - -int soo_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p)); +/* + * File operations on sockets. + */ +int soo_read(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, + int flags, struct proc *p); +int soo_write(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, + int flags, struct proc *p); +int soo_close(struct fileglob *fp, struct proc *p); +int soo_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p); +int soo_stat(struct socket *so, struct stat *ub); +int soo_select(struct fileproc *fp, int which, void * wql, struct proc *p); +int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); +int soo_drain(struct fileproc *fp, struct proc *p); struct fileops socketops = - { soo_read, soo_write, soo_ioctl, soo_select, soo_close, soo_kqfilter }; + { soo_read, soo_write, soo_ioctl, soo_select, soo_close, soo_kqfilter, soo_drain }; /* ARGSUSED */ int -soo_read(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +soo_read( + struct fileproc *fp, + struct uio *uio, + __unused kauth_cred_t cred, + __unused int flags, + __unused struct proc *p) { struct socket *so; - struct kextcb *kp; int stat; - int (*fsoreceive) __P((struct socket *so, + int (*fsoreceive)(struct socket *so2, struct sockaddr **paddr, - struct uio *uio, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp)); + struct uio *uio2, struct mbuf **mp0, + struct mbuf **controlp, int *flagsp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if ((so = (struct socket *)fp->f_data) == NULL) { + if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) { /* This is not a valid open file descriptor */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (EBADF); + return(EBADF); } - +//###LD will have to change fsoreceive = so->so_proto->pr_usrreqs->pru_soreceive; - if (fsoreceive != soreceive) - { kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_soreceive) - (*kp->e_soif->sf_soreceive)(so, 0, &uio, - 0, 0, 0, kp); - kp = kp->e_next; - } - - } stat = (*fsoreceive)(so, 0, uio, 0, 0, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); return stat; } /* ARGSUSED */ int -soo_write(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +soo_write( + struct fileproc *fp, + struct uio *uio, + __unused kauth_cred_t cred, + __unused int flags, + struct proc *procp) { struct socket *so; - int (*fsosend) __P((struct socket *so, struct sockaddr *addr, - struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags)); - struct kextcb *kp; + int (*fsosend)(struct socket *so2, struct sockaddr *addr, + struct uio *uio2, struct mbuf *top, + struct mbuf *control, int flags2); int stat; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - - if ((so = (struct socket *)fp->f_data) == NULL) { - /* This is not a valid open file descriptor */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (EBADF); - } + if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) { + /* This is not a valid open file descriptor */ + return (EBADF); + } fsosend = so->so_proto->pr_usrreqs->pru_sosend; - if (fsosend != sosend) - { kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sosend) - (*kp->e_soif->sf_sosend)(so, 0, &uio, - 0, 0, 0, kp); - kp = kp->e_next; - } - } stat = (*fsosend)(so, 0, uio, 0, 0, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - /* Generation of SIGPIPE can be controlled per socket */ - if (stat == EPIPE && uio->uio_procp && !(so->so_flags & SOF_NOSIGPIPE)) - psignal(uio->uio_procp, SIGPIPE); + /* Generation of SIGPIPE can be controlled per socket */ + if (stat == EPIPE && procp && !(so->so_flags & SOF_NOSIGPIPE)) + psignal(procp, SIGPIPE); - return stat; + return stat; } -int -soo_ioctl(fp, cmd, data, p) - struct file *fp; - u_long cmd; - register caddr_t data; - struct proc *p; +__private_extern__ int +soioctl( + struct socket *so, + u_long cmd, + caddr_t data, + struct proc *p) { - register struct socket *so; struct sockopt sopt; - struct kextcb *kp; int error = 0; + int dropsockref = -1; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - - if ((so = (struct socket *)fp->f_data) == NULL) { - /* This is not a valid open file descriptor */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (EBADF); - } - kp = sotokextcb(so); - sopt.sopt_level = cmd; - sopt.sopt_name = (int)data; - sopt.sopt_p = p; + socket_lock(so, 1); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_socontrol) - (*kp->e_soif->sf_socontrol)(so, &sopt, kp); - kp = kp->e_next; - } + sopt.sopt_level = cmd; + sopt.sopt_name = (int)data; + sopt.sopt_p = p; switch (cmd) { @@ -210,8 +175,7 @@ soo_ioctl(fp, cmd, data, p) else so->so_state &= ~SS_NBIO; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case FIOASYNC: if (*(int *)data) { @@ -223,28 +187,23 @@ soo_ioctl(fp, cmd, data, p) so->so_rcv.sb_flags &= ~SB_ASYNC; so->so_snd.sb_flags &= ~SB_ASYNC; } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case FIONREAD: *(int *)data = so->so_rcv.sb_cc; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case SIOCSPGRP: so->so_pgid = *(int *)data; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case SIOCGPGRP: *(int *)data = so->so_pgid; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case SIOCATMARK: *(int *)data = (so->so_state&SS_RCVATMARK) != 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (0); + goto out; case SIOCSETOT: { /* @@ -256,18 +215,14 @@ soo_ioctl(fp, cmd, data, p) /* let's make sure it's either -1 or a valid file descriptor */ if (cloned_fd != -1) { - struct file *cloned_fp; - error = getsock(p->p_fd, cloned_fd, &cloned_fp); + error = file_socket(cloned_fd, &cloned_so); if (error) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (error); + goto out; } - - cloned_so = (struct socket *)cloned_fp->f_data; + dropsockref = cloned_fd; } /* Always set socket non-blocking for OT */ - fp->f_flag |= FNONBLOCK; so->so_state |= SS_NBIO; so->so_options |= SO_DONTTRUNC | SO_WANTMORE; so->so_flags |= SOF_NOSIGPIPE; @@ -284,15 +239,13 @@ soo_ioctl(fp, cmd, data, p) if (cloned_so->so_snd.sb_hiwat > 0) { if (sbreserve(&so->so_snd, cloned_so->so_snd.sb_hiwat) == 0) { error = ENOBUFS; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (error); + goto out; } } if (cloned_so->so_rcv.sb_hiwat > 0) { if (sbreserve(&so->so_rcv, cloned_so->so_rcv.sb_hiwat) == 0) { error = ENOBUFS; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (error); + goto out; } } @@ -304,7 +257,7 @@ soo_ioctl(fp, cmd, data, p) (cloned_so->so_rcv.sb_lowat > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : cloned_so->so_rcv.sb_lowat; - /* SO_SNDTIMEO, SO_RCVTIMEO */ + /* SO_SNDTIMEO, SO_RCVTIMEO */ so->so_snd.sb_timeo = cloned_so->so_snd.sb_timeo; so->so_rcv.sb_timeo = cloned_so->so_rcv.sb_timeo; } @@ -314,8 +267,7 @@ soo_ioctl(fp, cmd, data, p) if (error == EOPNOTSUPP) error = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (error); + goto out; } } /* @@ -324,36 +276,64 @@ soo_ioctl(fp, cmd, data, p) * different entry since a socket's unnecessary */ if (IOCGROUP(cmd) == 'i') - error = ifioctl(so, cmd, data, p); + error = ifioctllocked(so, cmd, data, p); else if (IOCGROUP(cmd) == 'r') error = rtioctl(cmd, data, p); else error = (*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, 0, p); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); +out: + if (dropsockref != -1) + file_drop(dropsockref); + socket_unlock(so, 1); + + return error; +} + +int +soo_ioctl(fp, cmd, data, p) + struct fileproc *fp; + u_long cmd; + register caddr_t data; + struct proc *p; +{ + register struct socket *so; + int error; + + + if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) { + /* This is not a valid open file descriptor */ + return (EBADF); + } + + error = soioctl(so, cmd, data, p); + + if (error == 0 && cmd == SIOCSETOT) + fp->f_fglob->fg_flag |= FNONBLOCK; + return error; } int soo_select(fp, which, wql, p) - struct file *fp; + struct fileproc *fp; int which; void * wql; struct proc *p; { - register struct socket *so = (struct socket *)fp->f_data; - register int s = splnet(); + register struct socket *so = (struct socket *)fp->f_fglob->fg_data; int retnum=0; - if (so == NULL || so == (struct socket*)-1) goto done; + if (so == NULL || so == (struct socket*)-1) + return (0); + socket_lock(so, 1); switch (which) { case FREAD: so->so_rcv.sb_flags |= SB_SEL; if (soreadable(so)) { - splx(s); retnum = 1; so->so_rcv.sb_flags &= ~SB_SEL; goto done; @@ -364,7 +344,6 @@ soo_select(fp, which, wql, p) case FWRITE: so->so_snd.sb_flags |= SB_SEL; if (sowriteable(so)) { - splx(s); retnum = 1; so->so_snd.sb_flags &= ~SB_SEL; goto done; @@ -375,7 +354,6 @@ soo_select(fp, which, wql, p) case 0: so->so_rcv.sb_flags |= SB_SEL; if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) { - splx(s); retnum = 1; so->so_rcv.sb_flags &= ~SB_SEL; goto done; @@ -383,8 +361,9 @@ soo_select(fp, which, wql, p) selrecord(p, &so->so_rcv.sb_sel, wql); break; } - splx(s); + done: + socket_unlock(so, 1); return (retnum); } @@ -396,36 +375,49 @@ soo_stat(so, ub) { int stat; - /* - * DANGER: by the time we get the network funnel the socket - * may have been closed - */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); bzero((caddr_t)ub, sizeof (*ub)); + socket_lock(so, 1); ub->st_mode = S_IFSOCK; stat = (*so->so_proto->pr_usrreqs->pru_sense)(so, ub); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + socket_unlock(so, 1); return stat; } /* ARGSUSED */ int -soo_close(fp, p) - struct file *fp; - struct proc *p; +soo_close(struct fileglob *fg, __unused proc_t p) { int error = 0; struct socket *sp; - sp = (struct socket *)fp->f_data; - fp->f_data = NULL; + sp = (struct socket *)fg->fg_data; + fg->fg_data = NULL; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); if (sp) error = soclose(sp); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); return (error); } + +int +soo_drain(struct fileproc *fp, __unused struct proc *p) +{ + int error = 0; + struct socket *so = (struct socket *)fp->f_fglob->fg_data; + + if (so) { + socket_lock(so, 1); + so->so_state |= SS_DRAINING; + + wakeup((caddr_t)&so->so_timeo); + sorwakeup(so); + sowwakeup(so); + + socket_unlock(so, 1); + } + + return error; +} + diff --git a/bsd/kern/syscalls.c b/bsd/kern/syscalls.c index f2495c800..e75391978 100644 --- a/bsd/kern/syscalls.c +++ b/bsd/kern/syscalls.c @@ -1,403 +1,448 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * @APPLE_LICENSE_HEADER_END@ * - * @APPLE_LICENSE_HEADER_END@ + * + * System call switch table. + * + * DO NOT EDIT-- this file is automatically generated. + * created from syscalls.master */ -/* Copyright (c) 1992,1995-1999 Apple Computer, Inc. All rights resereved. */ -char *syscallnames[] = { - "syscall", /* 0 = syscall */ - "exit", /* 1 = exit */ - "fork", /* 2 = fork */ - "read", /* 3 = read */ - "write", /* 4 = write */ - "open", /* 5 = open */ - "close", /* 6 = close */ - "wait4", /* 7 = wait4 */ - "obs_creat", /* 8 = old creat */ - "link", /* 9 = link */ - "unlink", /* 10 = unlink */ - "obs_execv", /* 11 = obsolete execv */ - "chdir", /* 12 = chdir */ - "fchdir", /* 13 = fchdir */ - "mknod", /* 14 = mknod */ - "chmod", /* 15 = chmod */ - "chown", /* 16 = chown */ - "obs_break", /* 17 = obsolete break */ - "obs_getfsstat", /* 18 = obsolete getfsstat */ - "old_lseek", /* 19 = old lseek */ - "getpid", /* 20 = getpid */ - "obs_mount", /* 21 = obsolete mount */ - "obs_unmount", /* 22 = obsolete unmount */ - "setuid", /* 23 = setuid */ - "getuid", /* 24 = getuid */ - "geteuid", /* 25 = geteuid */ - "ptrace", /* 26 = ptrace */ - "recvmsg", /* 27 = recvmsg */ - "sendmsg", /* 28 = sendmsg */ - "recvfrom", /* 29 = recvfrom */ - "accept", /* 30 = accept */ - "getpeername", /* 31 = getpeername */ - "getsockname", /* 32 = getsockname */ - "access", /* 33 = access */ - "chflags", /* 34 = chflags */ - "fchflags", /* 35 = fchflags */ - "sync", /* 36 = sync */ - "kill", /* 37 = kill */ - "obs_stat", /* 38 = old stat */ - "getppid", /* 39 = getppid */ - "obs_lstat", /* 40 = old lstat */ - "dup", /* 41 = dup */ - "pipe", /* 42 = pipe */ - "getegid", /* 43 = getegid */ - "profil", /* 44 = profil */ - "ktrace", /* 45 = ktrace */ - "sigaction", /* 46 = sigaction */ - "getgid", /* 47 = getgid */ - "sigprocmask", /* 48 = sigprocmask */ - "getlogin", /* 49 = getlogin */ - "setlogin", /* 50 = setlogin */ - "acct", /* 51 = acct */ - "sigpending", /* 52 = sigpending */ - "sigaltstack", /* 53 = sigaltstack */ - "ioctl", /* 54 = ioctl */ - "reboot", /* 55 = reboot */ - "revoke", /* 56 = revoke */ - "symlink", /* 57 = symlink */ - "readlink", /* 58 = readlink */ - "execve", /* 59 = execve */ - "umask", /* 60 = umask */ - "chroot", /* 61 = chroot */ - "obs_fstat", /* 62 = old fstat */ - "#63", /* 63 = reserved */ - "obs_getpagesize", /* 64 = old getpagesize */ - "msync", /* 65 = msync */ - "vfork", /* 66 = vfork */ - "obs_vread", /* 67 = obsolete vread */ - "obs_vwrite", /* 68 = obsolete vwrite */ - "sbrk", /* 69 = sbrk */ - "sstk", /* 70 = sstk */ - "obs_mmap", /* 71 = old mmap */ - "obs_vadvise", /* 72 = obsolete vadvise */ - "munmap", /* 73 = munmap */ - "mprotect", /* 74 = mprotect */ - "madvise", /* 75 = madvise */ - "#76", /* 76 = obsolete vhangup */ - "#77", /* 77 = obsolete vlimit */ - "mincore", /* 78 = mincore */ - "getgroups", /* 79 = getgroups */ - "setgroups", /* 80 = setgroups */ - "getpgrp", /* 81 = getpgrp */ - "setpgid", /* 82 = setpgid */ - "setitimer", /* 83 = setitimer */ - "old_wait", /* 84 = old wait */ - "obs_swapon", /* 85 = swapon */ - "getitimer", /* 86 = getitimer */ - "obs_gethostname", /* 87 = old gethostname */ - "obs_sethostname", /* 88 = old sethostname */ - "getdtablesize", /* 89 = getdtablesize */ - "dup2", /* 90 = dup2 */ - "#91", /* 91 = getdopt */ - "fcntl", /* 92 = fcntl */ - "select", /* 93 = select */ - "#94", /* 94 = setdopt */ - "fsync", /* 95 = fsync */ - "setpriority", /* 96 = setpriority */ - "socket", /* 97 = socket */ - "connect", /* 98 = connect */ - "obs_accept", /* 99 = old accept */ - "getpriority", /* 100 = getpriority */ - "old_send", /* 101 = old send */ - "old_recv", /* 102 = old recv */ +const char *syscallnames[] = { + "syscall", /* 0 = syscall indirect syscall */ + "exit", /* 1 = exit */ + "fork", /* 2 = fork */ + "read", /* 3 = read */ + "write", /* 4 = write */ + "open", /* 5 = open */ + "close", /* 6 = close */ + "wait4", /* 7 = wait4 */ + "#8", /* 8 = old creat */ + "link", /* 9 = link */ + "unlink", /* 10 = unlink */ + "#11", /* 11 = old execv */ + "chdir", /* 12 = chdir */ + "fchdir", /* 13 = fchdir */ + "mknod", /* 14 = mknod */ + "chmod", /* 15 = chmod */ + "chown", /* 16 = chown */ + "obreak", /* 17 = obreak old break */ +#if COMPAT_GETFSSTAT + "ogetfsstat", /* 18 = ogetfsstat */ +#else + "getfsstat", /* 18 = getfsstat */ +#endif + "#19", /* 19 = old lseek */ + "getpid", /* 20 = getpid */ + "#21", /* 21 = old mount */ + "#22", /* 22 = old umount */ + "setuid", /* 23 = setuid */ + "getuid", /* 24 = getuid */ + "geteuid", /* 25 = geteuid */ + "ptrace", /* 26 = ptrace */ + "recvmsg", /* 27 = recvmsg */ + "sendmsg", /* 28 = sendmsg */ + "recvfrom", /* 29 = recvfrom */ + "accept", /* 30 = accept */ + "getpeername", /* 31 = getpeername */ + "getsockname", /* 32 = getsockname */ + "access", /* 33 = access */ + "chflags", /* 34 = chflags */ + "fchflags", /* 35 = fchflags */ + "sync", /* 36 = sync */ + "kill", /* 37 = kill */ + "#38", /* 38 = old stat */ + "getppid", /* 39 = getppid */ + "#40", /* 40 = old lstat */ + "dup", /* 41 = dup */ + "pipe", /* 42 = pipe */ + "getegid", /* 43 = getegid */ + "profil", /* 44 = profil */ + "ktrace", /* 45 = ktrace */ + "sigaction", /* 46 = sigaction */ + "getgid", /* 47 = getgid */ + "sigprocmask", /* 48 = sigprocmask */ + "getlogin", /* 49 = getlogin */ + "setlogin", /* 50 = setlogin */ + "acct", /* 51 = acct */ + "sigpending", /* 52 = sigpending */ + "sigaltstack", /* 53 = sigaltstack */ + "ioctl", /* 54 = ioctl */ + "reboot", /* 55 = reboot */ + "revoke", /* 56 = revoke */ + "symlink", /* 57 = symlink */ + "readlink", /* 58 = readlink */ + "execve", /* 59 = execve */ + "umask", /* 60 = umask */ + "chroot", /* 61 = chroot */ + "#62", /* 62 = old fstat */ + "#63", /* 63 = used internally , reserved */ + "#64", /* 64 = old getpagesize */ + "msync", /* 65 = msync */ + "vfork", /* 66 = vfork */ + "#67", /* 67 = old vread */ + "#68", /* 68 = old vwrite */ + "sbrk", /* 69 = sbrk */ + "sstk", /* 70 = sstk */ + "#71", /* 71 = old mmap */ + "ovadvise", /* 72 = ovadvise old vadvise */ + "munmap", /* 73 = munmap */ + "mprotect", /* 74 = mprotect */ + "madvise", /* 75 = madvise */ + "#76", /* 76 = old vhangup */ + "#77", /* 77 = old vlimit */ + "mincore", /* 78 = mincore */ + "getgroups", /* 79 = getgroups */ + "setgroups", /* 80 = setgroups */ + "getpgrp", /* 81 = getpgrp */ + "setpgid", /* 82 = setpgid */ + "setitimer", /* 83 = setitimer */ + "#84", /* 84 = old wait */ + "swapon", /* 85 = swapon */ + "getitimer", /* 86 = getitimer */ + "#87", /* 87 = old gethostname */ + "#88", /* 88 = old sethostname */ + "getdtablesize", /* 89 = getdtablesize */ + "dup2", /* 90 = dup2 */ + "#91", /* 91 = old getdopt */ + "fcntl", /* 92 = fcntl */ + "select", /* 93 = select */ + "#94", /* 94 = old setdopt */ + "fsync", /* 95 = fsync */ + "setpriority", /* 96 = setpriority */ + "socket", /* 97 = socket */ + "connect", /* 98 = connect */ + "#99", /* 99 = old accept */ + "getpriority", /* 100 = getpriority */ + "#101", /* 101 = old send */ + "#102", /* 102 = old recv */ #ifdef __ppc__ - "osigreturn", /* 103 = sigreturn */ + "#103", /* 103 = old sigreturn */ #else - "sigreturn", /* 103 = sigreturn */ + "sigreturn", /* 103 = sigreturn */ #endif - "bind", /* 104 = bind */ - "setsockopt", /* 105 = setsockopt */ - "listen", /* 106 = listen */ - "#107", /* 107 = obsolete vtimes */ - "obs_sigvec", /* 108 = old sigvec */ - "obs_sigblock", /* 109 = old sigblock */ - "obs_sigsetmask", /* 110 = old sigsetmask */ - "sigsuspend", /* 111 = sigsuspend */ - "obs_sigstack", /* 112 = old sigstack */ - "obs_recvmsg", /* 113 = old recvmsg */ - "obs_sendmsg", /* 114 = old sendmsg */ - "#115", /* 115 = obsolete vtrace */ - "gettimeofday", /* 116 = gettimeofday */ - "getrusage", /* 117 = getrusage */ - "getsockopt", /* 118 = getsockopt */ - "#119", /* 119 = nosys */ - "readv", /* 120 = readv */ - "writev", /* 121 = writev */ - "settimeofday", /* 122 = settimeofday */ - "fchown", /* 123 = fchown */ - "fchmod", /* 124 = fchmod */ - "obs_recvfrom", /* 125 = old recvfrom */ - "obs_setreuid", /* 126 = old setreuid */ - "obs_setregid", /* 127 = old setregid */ - "rename", /* 128 = rename */ - "obs_truncate", /* 129 = old truncate */ - "obs_ftruncate", /* 130 = old ftruncate */ - "flock", /* 131 = flock */ - "mkfifo", /* 132 = mkfifo */ - "sendto", /* 133 = sendto */ - "shutdown", /* 134 = shutdown */ - "socketpair", /* 135 = socketpair */ - "mkdir", /* 136 = mkdir */ - "rmdir", /* 137 = rmdir */ - "utimes", /* 138 = utimes */ - "futimes", /* 139 = futimes */ - "adjtime", /* 140 = adjtime */ - "obs_getpeername", /* 141 = old getpeername */ - "obs_gethostid", /* 142 = old gethostid */ - "#143", /* 143 = old sethostid */ - "obs_getrlimit", /* 144 = old getrlimit */ - "obs_setrlimit", /* 145 = old setrlimit */ - "obs_killpg", /* 146 = old killpg */ - "setsid", /* 147 = setsid */ - "#148", /* 148 = obsolete setquota */ - "#149", /* 149 = obsolete qquota */ - "obs_getsockname", /* 150 = old getsockname */ - "getpgid", /* 151 = getpgid */ - "setprivexec", /* 152 = setprivexec */ - "pread", /* 153 = pread */ - "pwrite", /* 154 = pwrite */ - "nfssvc", /* 155 = nfssvc */ - "getdirentries", /* 156 =getdirentries */ - "statfs", /* 157 = statfs */ - "fstatfs", /* 158 = fstatfs */ - "unmount", /* 159 = unmount */ - "#160", /* 160 = obsolete async_daemon */ - "getfh", /* 161 = getfh */ - "obs_getdomainname",/* 162 = old getdomainname */ - "obs_setdomainname",/* 163 = old setdomainname */ - "#164", /* 164 */ - "quotactl", /* 165 = quotactl */ - "#166", /* 166 = obsolete exportfs */ - "mount", /* 167 = mount */ - "#168", /* 168 = obsolete ustat */ - "#169", /* 169 = nosys */ - "#170", /* 170 = obsolete table */ - "obs_wait3", /* 171 = old wait3 */ - "#172", /* 172 = obsolete rpause */ - "#173", /* 173 = nosys */ - "#174", /* 174 = obsolete getdents */ - "#175", /* 175 = nosys */ - "add_profil", /* 176 = add_profil */ /* NeXT */ - "#177", /* 177 = nosys */ - "#178", /* 178 = nosys */ - "#179", /* 179 = nosys */ - "kdebug_trace", /* 180 = kdebug_trace */ - "setgid", /* 181 = setgid */ - "setegid", /* 182 = setegid */ - "seteuid", /* 183 = seteuid */ + "bind", /* 104 = bind */ + "setsockopt", /* 105 = setsockopt */ + "listen", /* 106 = listen */ + "#107", /* 107 = old vtimes */ + "#108", /* 108 = old sigvec */ + "#109", /* 109 = old sigblock */ + "#110", /* 110 = old sigsetmask */ + "sigsuspend", /* 111 = sigsuspend */ + "#112", /* 112 = old sigstack */ + "#113", /* 113 = old recvmsg */ + "#114", /* 114 = old sendmsg */ + "#115", /* 115 = old vtrace */ #ifdef __ppc__ - "sigreturn", /* 184 = sigreturn */ + "ppc_gettimeofday", /* 116 = ppc_gettimeofday */ +#else + "gettimeofday", /* 116 = gettimeofday */ +#endif + "getrusage", /* 117 = getrusage */ + "getsockopt", /* 118 = getsockopt */ + "#119", /* 119 = old resuba */ + "readv", /* 120 = readv */ + "writev", /* 121 = writev */ + "settimeofday", /* 122 = settimeofday */ + "fchown", /* 123 = fchown */ + "fchmod", /* 124 = fchmod */ + "#125", /* 125 = old recvfrom */ + "#126", /* 126 = old setreuid */ + "#127", /* 127 = old setregid */ + "rename", /* 128 = rename */ + "#129", /* 129 = old truncate */ + "#130", /* 130 = old ftruncate */ + "flock", /* 131 = flock */ + "mkfifo", /* 132 = mkfifo */ + "sendto", /* 133 = sendto */ + "shutdown", /* 134 = shutdown */ + "socketpair", /* 135 = socketpair */ + "mkdir", /* 136 = mkdir */ + "rmdir", /* 137 = rmdir */ + "utimes", /* 138 = utimes */ + "futimes", /* 139 = futimes */ + "adjtime", /* 140 = adjtime */ + "#141", /* 141 = old getpeername */ + "#142", /* 142 = old gethostid */ + "#143", /* 143 = old sethostid */ + "#144", /* 144 = old getrlimit */ + "#145", /* 145 = old setrlimit */ + "#146", /* 146 = old killpg */ + "setsid", /* 147 = setsid */ + "#148", /* 148 = old setquota */ + "#149", /* 149 = old qquota */ + "#150", /* 150 = old getsockname */ + "getpgid", /* 151 = getpgid */ + "setprivexec", /* 152 = setprivexec */ + "pread", /* 153 = pread */ + "pwrite", /* 154 = pwrite */ +#if NFSSERVER + "nfssvc", /* 155 = nfssvc */ +#else + "#155", /* 155 = */ +#endif + "#156", /* 156 = old getdirentries */ + "statfs", /* 157 = statfs */ + "fstatfs", /* 158 = fstatfs */ + "unmount", /* 159 = unmount */ + "#160", /* 160 = old async_daemon */ +#if NFSCLIENT + "getfh", /* 161 = getfh */ +#else + "#161", /* 161 = */ +#endif + "#162", /* 162 = old getdomainname */ + "#163", /* 163 = old setdomainname */ + "#164", /* 164 = */ + "quotactl", /* 165 = quotactl */ + "#166", /* 166 = old exportfs */ + "mount", /* 167 = mount */ + "#168", /* 168 = old ustat */ + "#169", /* 169 = */ + "table", /* 170 = table old table */ + "#171", /* 171 = old wait3 */ + "#172", /* 172 = old rpause */ + "waitid", /* 173 = waitid */ + "#174", /* 174 = old getdents */ + "#175", /* 175 = old gc_control */ + "add_profil", /* 176 = add_profil */ + "#177", /* 177 = */ + "#178", /* 178 = */ + "#179", /* 179 = */ + "kdebug_trace", /* 180 = kdebug_trace */ + "setgid", /* 181 = setgid */ + "setegid", /* 182 = setegid */ + "seteuid", /* 183 = seteuid */ +#ifdef __ppc__ + "sigreturn", /* 184 = sigreturn */ +#else + "#184", /* 184 = */ +#endif + "#185", /* 185 = */ + "#186", /* 186 = */ + "#187", /* 187 = */ + "stat", /* 188 = stat */ + "fstat", /* 189 = fstat */ + "lstat", /* 190 = lstat */ + "pathconf", /* 191 = pathconf */ + "fpathconf", /* 192 = fpathconf */ +#if COMPAT_GETFSSTAT + "getfsstat", /* 193 = getfsstat */ +#else + "#193", /* 193 = */ +#endif + "getrlimit", /* 194 = getrlimit */ + "setrlimit", /* 195 = setrlimit */ + "getdirentries", /* 196 = getdirentries */ + "mmap", /* 197 = mmap */ + "#198", /* 198 = __syscall */ + "lseek", /* 199 = lseek */ + "truncate", /* 200 = truncate */ + "ftruncate", /* 201 = ftruncate */ + "__sysctl", /* 202 = __sysctl */ + "mlock", /* 203 = mlock */ + "munlock", /* 204 = munlock */ + "undelete", /* 205 = undelete */ +#ifdef __ppc__ + "ATsocket", /* 206 = ATsocket */ + "ATgetmsg", /* 207 = ATgetmsg */ + "ATputmsg", /* 208 = ATputmsg */ + "ATPsndreq", /* 209 = ATPsndreq */ + "ATPsndrsp", /* 210 = ATPsndrsp */ + "ATPgetreq", /* 211 = ATPgetreq */ + "ATPgetrsp", /* 212 = ATPgetrsp */ + "#213", /* 213 = Reserved for AppleTalk */ +#else + "ATsocket", /* 206 = ATsocket */ + "ATgetmsg", /* 207 = ATgetmsg */ + "ATputmsg", /* 208 = ATputmsg */ + "ATPsndreq", /* 209 = ATPsndreq */ + "ATPsndrsp", /* 210 = ATPsndrsp */ + "ATPgetreq", /* 211 = ATPgetreq */ + "ATPgetrsp", /* 212 = ATPgetrsp */ + "#213", /* 213 = Reserved for AppleTalk */ +#endif /* __ppc__ */ + "kqueue_from_portset_np", /* 214 = kqueue_from_portset_np */ + "kqueue_portset_np", /* 215 = kqueue_portset_np */ + "mkcomplex", /* 216 = mkcomplex soon to be obsolete */ + "statv", /* 217 = statv soon to be obsolete */ + "lstatv", /* 218 = lstatv soon to be obsolete */ + "fstatv", /* 219 = fstatv soon to be obsolete */ + "getattrlist", /* 220 = getattrlist */ + "setattrlist", /* 221 = setattrlist */ + "getdirentriesattr", /* 222 = getdirentriesattr */ + "exchangedata", /* 223 = exchangedata */ +#ifdef __APPLE_API_OBSOLETE + "checkuseraccess", /* 224 = checkuseraccess */ +#else + "#224", /* 224 = HFS checkuseraccess check access to a file */ +#endif /* __APPLE_API_OBSOLETE */ + "searchfs", /* 225 = searchfs */ + "delete", /* 226 = delete private delete ( Carbon semantics ) */ + "copyfile", /* 227 = copyfile */ + "#228", /* 228 = */ + "#229", /* 229 = */ + "poll", /* 230 = poll */ + "watchevent", /* 231 = watchevent */ + "waitevent", /* 232 = waitevent */ + "modwatch", /* 233 = modwatch */ + "getxattr", /* 234 = getxattr */ + "fgetxattr", /* 235 = fgetxattr */ + "setxattr", /* 236 = setxattr */ + "fsetxattr", /* 237 = fsetxattr */ + "removexattr", /* 238 = removexattr */ + "fremovexattr", /* 239 = fremovexattr */ + "listxattr", /* 240 = listxattr */ + "flistxattr", /* 241 = flistxattr */ + "fsctl", /* 242 = fsctl */ + "initgroups", /* 243 = initgroups */ + "#244", /* 244 = */ + "#245", /* 245 = */ + "#246", /* 246 = */ +#if NFSCLIENT + "nfsclnt", /* 247 = nfsclnt */ + "fhopen", /* 248 = fhopen */ #else - "#184", /* 184 = nosys */ + "#247", /* 247 = */ + "#248", /* 248 = */ #endif - "#185", /* 185 = nosys */ - "#186", /* 186 = nosys */ - "#187", /* 187 = nosys */ - "stat", /* 188 = stat */ - "fstat", /* 189 = fstat */ - "lstat", /* 190 = lstat */ - "pathconf", /* 191 = pathconf */ - "fpathconf", /* 192 = fpathconf */ - "obs_getfsstat", /* 193 = old getfsstat */ - "getrlimit", /* 194 = getrlimit */ - "setrlimit", /* 195 = setrlimit */ - "getdirentries", /* 196 = getdirentries */ - "mmap", /* 197 = mmap */ - "#198", /* 198 = __syscall */ - "lseek", /* 199 = lseek */ - "truncate", /* 200 = truncate */ - "ftruncate", /* 201 = ftruncate */ - "__sysctl", /* 202 = __sysctl */ - "mlock", /* 203 = mlock */ - "munlock", /* 204 = munlock */ - "undelete", /* 205 = undelete */ - "ATsocket", /* 206 = ATsocket */ - "ATgetmsg", /* 207 = ATgetmsg */ - "ATputmsg", /* 208 = ATputmsg */ - "ATPsndreq", /* 209 = ATPsndreq */ - "ATPsndrsp", /* 210 = ATPsndrsp */ - "ATPgetreq", /* 211 = ATPgetreq */ - "ATPgetrsp", /* 212 = ATPgetrsp */ - "#213", /* 213 = Reserved for AppleTalk */ - "kqueue_from_portset_np", /* 214 = kqueue_from_portset_np */ - "kqueue_portset_np", /* 215 = kqueue_portset_np */ - "#216", /* 216 = Reserved */ - "#217", /* 217 = Reserved */ - "#218", /* 218 = Reserved */ - "#219", /* 219 = Reserved */ - "getattrlist", /* 220 = getattrlist */ - "setattrlist", /* 221 = setattrlist */ - "getdirentriesattr", /* 222 = getdirentriesattr */ - "exchangedata", /* 223 = exchangedata */ - "checkuseraccess", /* 224 - checkuseraccess */ - "searchfs", /* 225 = searchfs */ - "delete", /* 226 = private delete call */ - "copyfile", /* 227 = copyfile */ - "#228", /* 228 = nosys */ - "#229", /* 229 = nosys */ - "#230", /* 230 = reserved for AFS */ - "watchevent", /* 231 = watchevent */ - "waitevent", /* 232 = waitevent */ - "modwatch", /* 233 = modwatch */ - "#234", /* 234 = nosys */ - "#235", /* 235 = nosys */ - "#236", /* 236 = nosys */ - "#237", /* 237 = nosys */ - "#238", /* 238 = nosys */ - "#239", /* 239 = nosys */ - "#240", /* 240 = nosys */ - "#241", /* 241 = nosys */ - "fsctl", /* 242 = fsctl */ - "#243", /* 243 = nosys */ - "#244", /* 244 = nosys */ - "#245", /* 245 = nosys */ - "#246", /* 246 = nosys */ - "nfsclnt", /* 247 = nfsclnt */ - "fhopen", /* 248 = fhopen */ - "#249", /* 249 = nosys */ - "minherit", /* 250 = minherit */ - "semsys", /* 251 = semsys */ - "msgsys", /* 252 = msgsys */ - "shmsys", /* 253 = shmsys */ - "semctl", /* 254 = semctl */ - "semget", /* 255 = semget */ - "semop", /* 256 = semop */ - "semconfig", /* 257 = semconfig */ - "msgctl", /* 258 = msgctl */ - "msgget", /* 259 = msgget */ - "msgsnd", /* 260 = msgsnd */ - "msgrcv", /* 261 = msgrcv */ - "shmat", /* 262 = shmat */ - "shmctl", /* 263 = shmctl */ - "shmdt", /* 264 = shmdt */ - "shmget", /* 265 = shmget */ - "shm_open", /* 266 = shm_open */ - "shm_unlink", /* 267 = shm_unlink */ - "sem_open", /* 268 = sem_open */ - "sem_close", /* 269 = sem_close */ - "sem_unlink", /* 270 = sem_unlink */ - "sem_wait", /* 271 = sem_wait */ - "sem_trywait", /* 272 = sem_trywait */ - "sem_post", /* 273 = sem_post */ - "sem_getvalue", /* 274 = sem_getvalue */ - "sem_init", /* 275 = sem_init */ - "sem_destroy", /* 276 = sem_destroy */ - "#277", /* 277 = nosys */ - "#278", /* 278 = nosys */ - "#279", /* 279 = nosys */ - "#280", /* 280 = nosys */ - "#281", /* 281 = nosys */ - "#282", /* 282 = nosys */ - "#283", /* 283 = nosys */ - "#284", /* 284 = nosys */ - "#285", /* 285 = nosys */ - "#286", /* 286 = nosys */ - "#287", /* 287 = nosys */ - "#288", /* 288 = nosys */ - "#289", /* 289 = nosys */ - "#290", /* 290 = nosys */ - "#291", /* 291 = nosys */ - "#292", /* 292 = nosys */ - "#293", /* 293 = nosys */ - "#294", /* 294 = nosys */ - "#295", /* 295 = nosys */ - "load_shared_file", /* 296 = load_shared_file */ - "reset_shared_file", /* 297 = reset_shared_file */ - "new_system_shared_regions", /* 298 = new_system_shared_regions */ - "#299", /* 299 = nosys */ - "#300", /* 300 = modnext */ - "#301", /* 301 = modstat */ - "#302", /* 302 = modfnext */ - "#303", /* 303 = modfind */ - "#304", /* 304 = kldload */ - "#305", /* 305 = kldunload */ - "#306", /* 306 = kldfind */ - "#307", /* 307 = kldnext */ - "#308", /* 308 = kldstat */ - "#309", /* 309 = kldfirstmod */ - "getsid", /* 310 = getsid */ - "#311", /* 311 = setresuid */ - "#312", /* 312 = setresgid */ - "aio_fsync", /* 313 = aio_fsync */ - "aio_return", /* 314 = aio_return */ - "aio_suspend", /* 315 = aio_suspend */ - "aio_cancel", /* 316 = aio_cancel */ - "aio_error", /* 317 = aio_error */ - "aio_read", /* 318 = aio_read */ - "aio_write", /* 319 = aio_write */ - "lio_listio", /* 320 = lio_listio */ - "#321", /* 321 = yield */ - "#322", /* 322 = thr_sleep */ - "#323", /* 323 = thr_wakeup */ - "mlockall", /* 324 = mlockall */ - "munlockall", /* 325 = munlockall */ - "#326", /* 326 */ - "issetugid", /* 327 = issetugid */ - "__pthread_kill", /* 328 = __pthread_kill */ - "pthread_sigmask", /* 329 = pthread_sigmask */ - "sigwait", /* 330 = sigwait */ - "#331", /* 331 */ - "#332", /* 332 */ - "#333", /* 333 */ - "#334", /* 334 */ - "utrace", /* 335 = utrace */ - "#336", /* 336 */ - "#337", /* 337 */ - "#338", /* 338 */ - "#339", /* 339 */ - "#340", /* 340 = TBD sigprocmask */ - "#341", /* 341 = TBD sigsuspend */ - "#342", /* 342 = TBD sigaction */ - "#343", /* 343 = TBD sigpending */ - "#344", /* 344 = TBD sigreturn */ - "#345", /* 345 = TBD sigtimedwait */ - "#346", /* 346 = TBD sigwaitinfo */ - "#347", /* 347 */ - "#348", /* 348 */ - "#349" /* 349 */ - "audit", /* 350 */ - "auditon", /* 351 */ - "#352", /* 352 */ - "getauid", /* 353 */ - "setauid", /* 354 */ - "getaudit", /* 355 */ - "setaudit", /* 356 */ - "getaudit_addr", /* 357 */ - "setaudit_addr", /* 358 */ - "auditctl", /* 359 */ - "#360", /* 360 */ - "#361", /* 361 */ - "kqueue", /* 362 = kqueue */ - "kevent", /* 363 = kevent */ - "#364", /* 364 */ - "#365", /* 365 */ - "#366", /* 366 */ - "#367", /* 367 */ - "#368", /* 368 */ - "#369" /* 369 */ + "#249", /* 249 = */ + "minherit", /* 250 = minherit */ + "semsys", /* 251 = semsys */ + "msgsys", /* 252 = msgsys */ + "shmsys", /* 253 = shmsys */ + "semctl", /* 254 = semctl */ + "semget", /* 255 = semget */ + "semop", /* 256 = semop */ + "semconfig", /* 257 = semconfig */ + "msgctl", /* 258 = msgctl */ + "msgget", /* 259 = msgget */ + "msgsnd", /* 260 = msgsnd */ + "msgrcv", /* 261 = msgrcv */ + "shmat", /* 262 = shmat */ + "shmctl", /* 263 = shmctl */ + "shmdt", /* 264 = shmdt */ + "shmget", /* 265 = shmget */ + "shm_open", /* 266 = shm_open */ + "shm_unlink", /* 267 = shm_unlink */ + "sem_open", /* 268 = sem_open */ + "sem_close", /* 269 = sem_close */ + "sem_unlink", /* 270 = sem_unlink */ + "sem_wait", /* 271 = sem_wait */ + "sem_trywait", /* 272 = sem_trywait */ + "sem_post", /* 273 = sem_post */ + "sem_getvalue", /* 274 = sem_getvalue */ + "sem_init", /* 275 = sem_init */ + "sem_destroy", /* 276 = sem_destroy */ + "open_extended", /* 277 = open_extended */ + "umask_extended", /* 278 = umask_extended */ + "stat_extended", /* 279 = stat_extended */ + "lstat_extended", /* 280 = lstat_extended */ + "fstat_extended", /* 281 = fstat_extended */ + "chmod_extended", /* 282 = chmod_extended */ + "fchmod_extended", /* 283 = fchmod_extended */ + "access_extended", /* 284 = access_extended */ + "settid", /* 285 = settid */ + "gettid", /* 286 = gettid */ + "setsgroups", /* 287 = setsgroups */ + "getsgroups", /* 288 = getsgroups */ + "setwgroups", /* 289 = setwgroups */ + "getwgroups", /* 290 = getwgroups */ + "mkfifo_extended", /* 291 = mkfifo_extended */ + "mkdir_extended", /* 292 = mkdir_extended */ + "identitysvc", /* 293 = identitysvc */ + "#294", /* 294 = */ + "#295", /* 295 = */ + "load_shared_file", /* 296 = load_shared_file */ + "reset_shared_file", /* 297 = reset_shared_file */ + "new_system_shared_regions", /* 298 = new_system_shared_regions */ + "shared_region_map_file_np", /* 299 = shared_region_map_file_np */ + "shared_region_make_private_np", /* 300 = shared_region_make_private_np */ + "#301", /* 301 = */ + "#302", /* 302 = */ + "#303", /* 303 = */ + "#304", /* 304 = */ + "#305", /* 305 = */ + "#306", /* 306 = */ + "#307", /* 307 = */ + "#308", /* 308 = */ + "#309", /* 309 = */ + "getsid", /* 310 = getsid */ + "settid_with_pid", /* 311 = settid_with_pid */ + "#312", /* 312 = */ + "aio_fsync", /* 313 = aio_fsync */ + "aio_return", /* 314 = aio_return */ + "aio_suspend", /* 315 = aio_suspend */ + "aio_cancel", /* 316 = aio_cancel */ + "aio_error", /* 317 = aio_error */ + "aio_read", /* 318 = aio_read */ + "aio_write", /* 319 = aio_write */ + "lio_listio", /* 320 = lio_listio */ + "#321", /* 321 = */ + "#322", /* 322 = */ + "#323", /* 323 = */ + "mlockall", /* 324 = mlockall */ + "munlockall", /* 325 = munlockall */ + "#326", /* 326 = */ + "issetugid", /* 327 = issetugid */ + "__pthread_kill", /* 328 = __pthread_kill */ + "pthread_sigmask", /* 329 = pthread_sigmask */ + "sigwait", /* 330 = sigwait */ + "__disable_threadsignal", /* 331 = __disable_threadsignal */ + "__pthread_markcancel", /* 332 = __pthread_markcancel */ + "__pthread_canceled", /* 333 = __pthread_canceled */ + "__semwait_signal", /* 334 = __semwait_signal */ + "utrace", /* 335 = utrace */ + "#336", /* 336 = */ + "#337", /* 337 = */ + "#338", /* 338 = */ + "#339", /* 339 = */ + "#340", /* 340 = */ + "#341", /* 341 = */ + "#342", /* 342 = */ + "#343", /* 343 = */ + "#344", /* 344 = */ + "#345", /* 345 = */ + "#346", /* 346 = */ + "#347", /* 347 = */ + "#348", /* 348 = */ + "#349", /* 349 = */ + "audit", /* 350 = audit */ + "auditon", /* 351 = auditon */ + "#352", /* 352 = */ + "getauid", /* 353 = getauid */ + "setauid", /* 354 = setauid */ + "getaudit", /* 355 = getaudit */ + "setaudit", /* 356 = setaudit */ + "getaudit_addr", /* 357 = getaudit_addr */ + "setaudit_addr", /* 358 = setaudit_addr */ + "auditctl", /* 359 = auditctl */ + "#360", /* 360 = */ + "#361", /* 361 = */ + "kqueue", /* 362 = kqueue */ + "kevent", /* 363 = kevent */ + "lchown", /* 364 = lchown */ + "#365", /* 365 = */ + "#366", /* 366 = */ + "#367", /* 367 = */ + "#368", /* 368 = */ + "#369", /* 369 = */ }; diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master new file mode 100644 index 000000000..a3e6e7d59 --- /dev/null +++ b/bsd/kern/syscalls.master @@ -0,0 +1,474 @@ +; derived from: FreeBSD @(#)syscalls.master 8.2 (Berkeley) 1/13/94 +; +; System call name/number master file. +; This is file processed by .../xnu/bsd/kern/makesyscalls.sh and creates: +; .../xnu/bsd/kern/init_sysent.c +; .../xnu/bsd/kern/syscalls.c +; .../xnu/bsd/sys/syscall.h +; .../xnu/bsd/sys/sysproto.h + +; Columns -> | Number | Cancel | Funnel | Files | { Name and Args } | { Comments } +; Number: system call number, must be in order +; Cancel: type of thread cancel - "PRE", "POST" or "NONE" +; Funnel: type of funnel - "KERN" or "NONE" +; Files: with files to generate - "ALL" or any combo of: +; "T" for syscall table (in init_sysent.c) +; "N" for syscall names (in syscalls.c) +; "H" for syscall headers (in syscall.h) +; "P" for syscall prototypes (in sysproto.h) +; Comments: additional comments about the sys call copied to output files + +; #ifdef's, #include's, #if's etc. are copied to all output files. + +#include +#include +#include +#include +#include +#include + +0 NONE NONE ALL { int nosys(void); } { indirect syscall } +1 NONE KERN ALL { void exit(int rval); } +2 NONE KERN ALL { int fork(void); } +3 PRE NONE ALL { user_ssize_t read(int fd, user_addr_t cbuf, user_size_t nbyte); } +4 PRE NONE ALL { user_ssize_t write(int fd, user_addr_t cbuf, user_size_t nbyte); } +5 PRE NONE ALL { int open(user_addr_t path, int flags, int mode); } +6 PRE NONE ALL { int close(int fd); } +7 PRE KERN ALL { int wait4(int pid, user_addr_t status, int options, user_addr_t rusage); } +8 NONE NONE ALL { int nosys(void); } { old creat } +9 NONE NONE ALL { int link(user_addr_t path, user_addr_t link); } +10 NONE NONE ALL { int unlink(user_addr_t path); } +11 NONE NONE ALL { int nosys(void); } { old execv } +12 NONE NONE ALL { int chdir(user_addr_t path); } +13 NONE NONE ALL { int fchdir(int fd); } +14 NONE NONE ALL { int mknod(user_addr_t path, int mode, int dev); } +15 NONE NONE ALL { int chmod(user_addr_t path, int mode); } +16 NONE NONE ALL { int chown(user_addr_t path, int uid, int gid); } +17 NONE NONE UALL { int obreak(char *nsize); } { old break } + +#if COMPAT_GETFSSTAT +18 NONE NONE ALL { int ogetfsstat(user_addr_t buf, int bufsize, int flags); } +#else +18 NONE NONE ALL { int getfsstat(user_addr_t buf, int bufsize, int flags); } +#endif + +19 NONE NONE ALL { int nosys(void); } { old lseek } +20 NONE NONE ALL { int getpid(void); } +21 NONE NONE ALL { int nosys(void); } { old mount } +22 NONE NONE ALL { int nosys(void); } { old umount } +23 NONE KERN ALL { int setuid(uid_t uid); } +24 NONE KERN ALL { int getuid(void); } +25 NONE KERN ALL { int geteuid(void); } +26 NONE KERN ALL { int ptrace(int req, pid_t pid, caddr_t addr, int data); } +27 PRE NONE ALL { int recvmsg(int s, struct msghdr *msg, int flags); } +28 PRE NONE ALL { int sendmsg(int s, caddr_t msg, int flags); } +29 PRE NONE ALL { int recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr); } +30 PRE NONE ALL { int accept(int s, caddr_t name, socklen_t *anamelen); } +31 NONE NONE ALL { int getpeername(int fdes, caddr_t asa, socklen_t *alen); } +32 NONE NONE ALL { int getsockname(int fdes, caddr_t asa, socklen_t *alen); } +33 NONE NONE ALL { int access(user_addr_t path, int flags); } +34 NONE NONE ALL { int chflags(char *path, int flags); } +35 NONE NONE ALL { int fchflags(int fd, int flags); } +36 NONE NONE ALL { int sync(void); } +37 NONE KERN ALL { int kill(int pid, int signum); } +38 NONE NONE ALL { int nosys(void); } { old stat } +39 NONE KERN ALL { int getppid(void); } +40 NONE NONE ALL { int nosys(void); } { old lstat } +41 NONE NONE ALL { int dup(u_int fd); } +42 NONE NONE ALL { int pipe(void); } +43 NONE KERN ALL { int getegid(void); } +44 NONE KERN ALL { int profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } +45 NONE KERN ALL { int ktrace(const char *fname, int ops, int facs, int pid); } +46 NONE KERN ALL { int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa); } +47 NONE KERN ALL { int getgid(void); } +48 NONE KERN ALL { int sigprocmask(int how, user_addr_t mask, user_addr_t omask); } +49 NONE KERN ALL { int getlogin(char *namebuf, u_int namelen); } +50 NONE KERN ALL { int setlogin(char *namebuf); } +51 NONE KERN ALL { int acct(char *path); } +52 NONE KERN ALL { int sigpending(struct sigvec *osv); } +53 NONE KERN ALL { int sigaltstack(struct sigaltstack *nss, struct sigaltstack *oss); } +54 NONE NONE ALL { int ioctl(int fd, u_long com, caddr_t data); } +55 NONE KERN ALL { int reboot(int opt, char *command); } +56 NONE NONE ALL { int revoke(char *path); } +57 NONE NONE ALL { int symlink(char *path, char *link); } +58 NONE NONE ALL { int readlink(char *path, char *buf, int count); } +59 NONE KERN ALL { int execve(char *fname, char **argp, char **envp); } +60 NONE KERN ALL { int umask(int newmask); } +61 NONE KERN ALL { int chroot(user_addr_t path); } +62 NONE NONE ALL { int nosys(void); } { old fstat } +63 NONE NONE ALL { int nosys(void); } { used internally, reserved } +64 NONE NONE ALL { int nosys(void); } { old getpagesize } +65 PRE NONE ALL { int msync(caddr_t addr, size_t len, int flags); } +66 NONE KERN ALL { int vfork(void); } +67 NONE NONE ALL { int nosys(void); } { old vread } +68 NONE NONE ALL { int nosys(void); } { old vwrite } +69 NONE NONE ALL { int sbrk(int incr); } +70 NONE NONE ALL { int sstk(int incr); } +71 NONE NONE ALL { int nosys(void); } { old mmap } +72 NONE NONE ALL { int ovadvise(void); } { old vadvise } +73 NONE NONE ALL { int munmap(caddr_t addr, size_t len); } +74 NONE NONE ALL { int mprotect(caddr_t addr, size_t len, int prot); } +75 NONE NONE ALL { int madvise(caddr_t addr, size_t len, int behav); } +76 NONE NONE ALL { int nosys(void); } { old vhangup } +77 NONE NONE ALL { int nosys(void); } { old vlimit } +78 NONE NONE ALL { int mincore(user_addr_t addr, user_size_t len, user_addr_t vec); } +79 NONE KERN ALL { int getgroups(u_int gidsetsize, gid_t *gidset); } +80 NONE KERN ALL { int setgroups(u_int gidsetsize, gid_t *gidset); } +81 NONE KERN ALL { int getpgrp(void); } +82 NONE KERN ALL { int setpgid(int pid, int pgid); } +83 NONE KERN ALL { int setitimer(u_int which, struct itimerval *itv, struct itimerval *oitv); } +84 NONE NONE ALL { int nosys(void); } { old wait } +85 NONE NONE ALL { int swapon(void); } +86 NONE KERN ALL { int getitimer(u_int which, struct itimerval *itv); } +87 NONE NONE ALL { int nosys(void); } { old gethostname } +88 NONE NONE ALL { int nosys(void); } { old sethostname } +89 NONE NONE ALL { int getdtablesize(void); } +90 NONE NONE ALL { int dup2(u_int from, u_int to); } +91 NONE NONE ALL { int nosys(void); } { old getdopt } +92 PRE NONE ALL { int fcntl(int fd, int cmd, long arg); } +93 PRE KERN ALL { int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv); } +94 NONE NONE ALL { int nosys(void); } { old setdopt } +95 PRE NONE ALL { int fsync(int fd); } +96 NONE KERN ALL { int setpriority(int which, int who, int prio); } +97 NONE NONE ALL { int socket(int domain, int type, int protocol); } +98 PRE NONE ALL { int connect(int s, caddr_t name, socklen_t namelen); } +99 NONE NONE ALL { int nosys(void); } { old accept } +100 NONE KERN ALL { int getpriority(int which, int who); } +101 NONE NONE ALL { int nosys(void); } { old send } +102 NONE NONE ALL { int nosys(void); } { old recv } + +#ifdef __ppc__ +103 NONE NONE ALL { int nosys(void); } { old sigreturn } +#else +103 NONE KERN UALL { int sigreturn(struct sigcontext *sigcntxp); } +#endif + +104 NONE NONE ALL { int bind(int s, caddr_t name, socklen_t namelen); } +105 NONE NONE ALL { int setsockopt(int s, int level, int name, caddr_t val, socklen_t valsize); } +106 NONE NONE ALL { int listen(int s, int backlog); } +107 NONE NONE ALL { int nosys(void); } { old vtimes } +108 NONE NONE ALL { int nosys(void); } { old sigvec } +109 NONE NONE ALL { int nosys(void); } { old sigblock } +110 NONE NONE ALL { int nosys(void); } { old sigsetmask } +111 PRE KERN ALL { int sigsuspend(sigset_t mask); } +112 NONE NONE ALL { int nosys(void); } { old sigstack } +113 NONE NONE ALL { int nosys(void); } { old recvmsg } +114 NONE NONE ALL { int nosys(void); } { old sendmsg } +115 NONE NONE ALL { int nosys(void); } { old vtrace } + +#ifdef __ppc__ +116 NONE NONE ALL { int ppc_gettimeofday(struct timeval *tp, struct timezone *tzp); } +#else +116 NONE NONE ALL { int gettimeofday(struct timeval *tp, struct timezone *tzp); } +#endif + +117 NONE KERN ALL { int getrusage(int who, struct rusage *rusage); } +118 NONE NONE ALL { int getsockopt(int s, int level, int name, caddr_t val, socklen_t *avalsize); } +119 NONE NONE ALL { int nosys(void); } { old resuba } +120 PRE NONE ALL { user_ssize_t readv(int fd, struct iovec *iovp, u_int iovcnt); } +121 PRE NONE ALL { user_ssize_t writev(int fd, struct iovec *iovp, u_int iovcnt); } +122 NONE KERN ALL { int settimeofday(struct timeval *tv, struct timezone *tzp); } +123 NONE NONE ALL { int fchown(int fd, int uid, int gid); } +124 NONE NONE ALL { int fchmod(int fd, int mode); } +125 NONE NONE ALL { int nosys(void); } { old recvfrom } +126 NONE NONE ALL { int nosys(void); } { old setreuid } +127 NONE NONE ALL { int nosys(void); } { old setregid } +128 NONE NONE ALL { int rename(char *from, char *to); } +129 NONE NONE ALL { int nosys(void); } { old truncate } +130 NONE NONE ALL { int nosys(void); } { old ftruncate } +131 NONE NONE ALL { int flock(int fd, int how); } +132 NONE NONE ALL { int mkfifo(user_addr_t path, int mode); } +133 PRE NONE ALL { int sendto(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen); } +134 NONE NONE ALL { int shutdown(int s, int how); } +135 NONE NONE ALL { int socketpair(int domain, int type, int protocol, int *rsv); } +136 NONE NONE ALL { int mkdir(user_addr_t path, int mode); } +137 NONE NONE ALL { int rmdir(char *path); } +138 NONE NONE ALL { int utimes(char *path, struct timeval *tptr); } +139 NONE NONE ALL { int futimes(int fd, struct timeval *tptr); } +140 NONE KERN ALL { int adjtime(struct timeval *delta, struct timeval *olddelta); } +141 NONE NONE ALL { int nosys(void); } { old getpeername } +142 NONE NONE ALL { int nosys(void); } { old gethostid } +143 NONE NONE ALL { int nosys(void); } { old sethostid } +144 NONE NONE ALL { int nosys(void); } { old getrlimit } +145 NONE NONE ALL { int nosys(void); } { old setrlimit } +146 NONE NONE ALL { int nosys(void); } { old killpg } +147 NONE KERN ALL { int setsid(void); } +148 NONE NONE ALL { int nosys(void); } { old setquota } +149 NONE NONE ALL { int nosys(void); } { old qquota } +150 NONE NONE ALL { int nosys(void); } { old getsockname } +151 NONE KERN ALL { int getpgid(pid_t pid); } +152 NONE KERN ALL { int setprivexec(int flag); } +153 PRE NONE ALL { user_ssize_t pread(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } +154 PRE NONE ALL { user_ssize_t pwrite(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } + +#if NFSSERVER +155 NONE KERN ALL { int nfssvc(int flag, caddr_t argp); } +#else +155 NONE NONE ALL { int nosys(void); } +#endif + +156 NONE NONE ALL { int nosys(void); } { old getdirentries } +157 NONE NONE ALL { int statfs(char *path, struct statfs *buf); } +158 NONE NONE ALL { int fstatfs(int fd, struct statfs *buf); } +159 NONE NONE ALL { int unmount(user_addr_t path, int flags); } +160 NONE NONE ALL { int nosys(void); } { old async_daemon } + +#if NFSCLIENT +161 NONE KERN ALL { int getfh(char *fname, fhandle_t *fhp); } +#else +161 NONE NONE ALL { int nosys(void); } +#endif + +162 NONE NONE ALL { int nosys(void); } { old getdomainname } +163 NONE NONE ALL { int nosys(void); } { old setdomainname } +164 NONE NONE ALL { int nosys(void); } +165 NONE KERN ALL { int quotactl(char *path, int cmd, int uid, caddr_t arg); } +166 NONE NONE ALL { int nosys(void); } { old exportfs } +167 NONE NONE ALL { int mount(char *type, char *path, int flags, caddr_t data); } +168 NONE NONE ALL { int nosys(void); } { old ustat } +169 NONE NONE ALL { int nosys(void); } +170 NONE NONE HN { int table(void); } { old table } +171 NONE NONE ALL { int nosys(void); } { old wait3 } +172 NONE NONE ALL { int nosys(void); } { old rpause } +173 PRE KERN ALL { int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); } +174 NONE NONE ALL { int nosys(void); } { old getdents } +175 NONE NONE ALL { int nosys(void); } { old gc_control } +176 NONE KERN ALL { int add_profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } +177 NONE NONE ALL { int nosys(void); } +178 NONE NONE ALL { int nosys(void); } +179 NONE NONE ALL { int nosys(void); } +180 NONE NONE UALL { int kdebug_trace(int code, int arg1, int arg2, int arg3, int arg4, int arg5); } +181 NONE KERN ALL { int setgid(gid_t gid); } +182 NONE KERN ALL { int setegid(gid_t egid); } +183 NONE KERN ALL { int seteuid(uid_t euid); } + +#ifdef __ppc__ +184 NONE KERN ALL { int sigreturn(struct ucontext *uctx, int infostyle); } +#else +184 NONE NONE ALL { int nosys(void); } +#endif + +185 NONE NONE ALL { int nosys(void); } +186 NONE NONE ALL { int nosys(void); } +187 NONE NONE ALL { int nosys(void); } +188 NONE NONE ALL { int stat(user_addr_t path, user_addr_t ub); } +189 NONE NONE ALL { int fstat(int fd, user_addr_t ub); } +190 NONE NONE ALL { int lstat(user_addr_t path, user_addr_t ub); } +191 NONE NONE ALL { int pathconf(char *path, int name); } +192 NONE NONE ALL { int fpathconf(int fd, int name); } + +#if COMPAT_GETFSSTAT +193 NONE NONE ALL { int getfsstat(user_addr_t buf, user_long_t bufsize, int flags); } +#else +193 NONE NONE ALL { int nosys(void); } +#endif + +194 NONE KERN ALL { int getrlimit(u_int which, struct rlimit *rlp); } +195 NONE KERN ALL { int setrlimit(u_int which, struct rlimit *rlp); } +196 NONE NONE ALL { int getdirentries(int fd, char *buf, u_int count, long *basep); } +197 NONE NONE ALL { user_addr_t mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos); } +198 NONE NONE ALL { int nosys(void); } { __syscall } +199 NONE NONE ALL { off_t lseek(int fd, off_t offset, int whence); } +200 NONE NONE ALL { int truncate(char *path, off_t length); } +201 NONE NONE ALL { int ftruncate(int fd, off_t length); } +202 NONE KERN ALL { int __sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen); } +203 NONE NONE ALL { int mlock(caddr_t addr, size_t len); } +204 NONE NONE ALL { int munlock(caddr_t addr, size_t len); } +205 NONE NONE ALL { int undelete(user_addr_t path); } + +#ifdef __ppc__ +206 NONE NONE ALL { int ATsocket(int proto); } +207 NONE NONE UALL { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } +208 NONE NONE UALL { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } +209 NONE NONE UALL { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } +210 NONE NONE UALL { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } +211 NONE NONE UALL { int ATPgetreq(int fd, unsigned char *buf, int buflen); } +212 NONE NONE UALL { int ATPgetrsp(int fd, unsigned char *bdsp); } +213 NONE NONE ALL { int nosys(void); } { Reserved for AppleTalk } +#else +206 NONE NONE HN { int ATsocket(int proto); } +207 NONE NONE UHN { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } +208 NONE NONE UHN { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } +209 NONE NONE UHN { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } +210 NONE NONE UHN { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } +211 NONE NONE UHN { int ATPgetreq(int fd, unsigned char *buf, int buflen); } +212 NONE NONE UHN { int ATPgetrsp(int fd, unsigned char *bdsp); } +213 NONE NONE ALL { int nosys(void); } { Reserved for AppleTalk } +#endif /* __ppc__ */ + +214 NONE KERN ALL { int kqueue_from_portset_np(int portset); } +215 NONE KERN ALL { int kqueue_portset_np(int fd); } + +; System Calls 216 - 230 are reserved for calls to support HFS/HFS Plus +; file system semantics. Currently, we only use 215-227. The rest is +; for future expansion in anticipation of new MacOS APIs for HFS Plus. +; These calls are not conditionalized becuase while they are specific +; to HFS semantics, they are not specific to the HFS filesystem. +; We expect all filesystems to recognize the call and report that it is +; not supported or to actually implement it. +216 NONE NONE UHN { int mkcomplex(const char *path, mode_t mode, u_long type); } { soon to be obsolete } +217 NONE NONE UHN { int statv(const char *path, struct vstat *vsb); } { soon to be obsolete } +218 NONE NONE UHN { int lstatv(const char *path, struct vstat *vsb); } { soon to be obsolete } +219 NONE NONE UHN { int fstatv(int fd, struct vstat *vsb); } { soon to be obsolete } +220 NONE NONE ALL { int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +221 NONE NONE ALL { int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +222 NONE NONE ALL { int getdirentriesattr(int fd, struct attrlist *alist, void *buffer, size_t buffersize, u_long *count, u_long *basep, u_long *newstate, u_long options); } +223 NONE NONE ALL { int exchangedata(const char *path1, const char *path2, u_long options); } + +#ifdef __APPLE_API_OBSOLETE +224 NONE NONE UALL { int checkuseraccess(const char *path, uid_t userid, gid_t *groups, int ngroups, int accessrequired, u_long options); } +#else +224 NONE NONE ALL { int nosys(void); } { HFS checkuseraccess check access to a file } +#endif /* __APPLE_API_OBSOLETE */ +225 NONE KERN ALL { int searchfs(const char *path, struct fssearchblock *searchblock, u_long *nummatches, u_long scriptcode, u_long options, struct searchstate *state); } +226 NONE NONE ALL { int delete(user_addr_t path); } { private delete (Carbon semantics) } +227 NONE NONE ALL { int copyfile(char *from, char *to, int mode, int flags); } +228 NONE NONE ALL { int nosys(void); } +229 NONE NONE ALL { int nosys(void); } +230 PRE NONE ALL { int poll(struct pollfd *fds, u_int nfds, int timeout); } +231 NONE NONE UALL { int watchevent(struct eventreq *u_req, int u_eventmask); } +232 NONE NONE UALL { int waitevent(struct eventreq *u_req, struct timeval *tv); } +233 NONE NONE UALL { int modwatch(struct eventreq *u_req, int u_eventmask); } +234 NONE NONE ALL { user_ssize_t getxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +235 NONE NONE ALL { user_ssize_t fgetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +236 NONE NONE ALL { int setxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +237 NONE NONE ALL { int fsetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +238 NONE NONE ALL { int removexattr(user_addr_t path, user_addr_t attrname, int options); } +239 NONE NONE ALL { int fremovexattr(int fd, user_addr_t attrname, int options); } +240 NONE NONE ALL { user_ssize_t listxattr(user_addr_t path, user_addr_t namebuf, size_t bufsize, int options); } +241 NONE NONE ALL { user_ssize_t flistxattr(int fd, user_addr_t namebuf, size_t bufsize, int options); } +242 NONE KERN ALL { int fsctl(const char *path, u_long cmd, caddr_t data, u_long options); } +243 NONE KERN ALL { int initgroups(u_int gidsetsize, gid_t *gidset, int gmuid); } +244 NONE NONE ALL { int nosys(void); } +245 NONE NONE ALL { int nosys(void); } +246 NONE NONE ALL { int nosys(void); } + +#if NFSCLIENT +247 NONE KERN ALL { int nfsclnt(int flag, caddr_t argp); } +248 NONE KERN ALL { int fhopen(const struct fhandle *u_fhp, int flags); } +#else +247 NONE NONE ALL { int nosys(void); } +248 NONE NONE ALL { int nosys(void); } +#endif + +249 NONE NONE ALL { int nosys(void); } +250 NONE NONE ALL { int minherit(void *addr, size_t len, int inherit); } +251 NONE NONE ALL { int semsys(u_int which, int a2, int a3, int a4, int a5); } +252 NONE NONE ALL { int msgsys(u_int which, int a2, int a3, int a4, int a5); } +253 NONE NONE ALL { int shmsys(u_int which, int a2, int a3, int a4); } +254 NONE NONE ALL { int semctl(int semid, int semnum, int cmd, semun_t arg); } +255 NONE NONE ALL { int semget(key_t key, int nsems, int semflg); } +256 NONE NONE ALL { int semop(int semid, struct sembuf *sops, int nsops); } +257 NONE NONE ALL { int semconfig(semconfig_ctl_t flag); } +258 NONE NONE ALL { int msgctl(int msqid, int cmd, struct msqid_ds *buf); } +259 NONE NONE ALL { int msgget(key_t key, int msgflg); } +260 PRE NONE ALL { int msgsnd(int msqid, void *msgp, size_t msgsz, int msgflg); } +261 PRE NONE ALL { user_ssize_t msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg); } +262 NONE NONE ALL { int shmat(int shmid, void *shmaddr, int shmflg); } +263 NONE NONE ALL { int shmctl(int shmid, int cmd, struct shmid_ds *buf); } +264 NONE NONE ALL { int shmdt(void *shmaddr); } +265 NONE NONE ALL { int shmget(key_t key, size_t size, int shmflg); } +266 NONE NONE ALL { int shm_open(const char *name, int oflag, int mode); } +267 NONE NONE ALL { int shm_unlink(const char *name); } +268 NONE NONE ALL { user_addr_t sem_open(const char *name, int oflag, int mode, int value); } +269 NONE NONE ALL { int sem_close(sem_t *sem); } +270 NONE NONE ALL { int sem_unlink(const char *name); } +271 PRE NONE ALL { int sem_wait(sem_t *sem); } +272 NONE NONE ALL { int sem_trywait(sem_t *sem); } +273 NONE NONE ALL { int sem_post(sem_t *sem); } +274 NONE NONE ALL { int sem_getvalue(sem_t *sem, int *sval); } +275 NONE NONE ALL { int sem_init(sem_t *sem, int phsared, u_int value); } +276 NONE NONE ALL { int sem_destroy(sem_t *sem); } +277 NONE NONE ALL { int open_extended(user_addr_t path, int flags, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity); } +278 NONE KERN ALL { int umask_extended(int newmask, user_addr_t xsecurity); } +279 NONE NONE ALL { int stat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size); } +280 NONE NONE ALL { int lstat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size); } +281 NONE NONE ALL { int fstat_extended(int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size); } +282 NONE NONE ALL { int chmod_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity); } +283 NONE NONE ALL { int fchmod_extended(int fd, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity); } +284 NONE NONE ALL { int access_extended(user_addr_t entries, size_t size, user_addr_t results, uid_t uid); } +285 NONE NONE ALL { int settid(uid_t uid, gid_t gid); } +286 NONE NONE ALL { int gettid(uid_t *uidp, gid_t *gidp); } +287 NONE NONE ALL { int setsgroups(int setlen, user_addr_t guidset); } +288 NONE NONE ALL { int getsgroups(user_addr_t setlen, user_addr_t guidset); } +289 NONE NONE ALL { int setwgroups(int setlen, user_addr_t guidset); } +290 NONE NONE ALL { int getwgroups(user_addr_t setlen, user_addr_t guidset); } +291 NONE NONE ALL { int mkfifo_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity); } +292 NONE NONE ALL { int mkdir_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity); } +293 NONE NONE ALL { int identitysvc(int opcode, user_addr_t message); } +294 NONE NONE ALL { int nosys(void); } +295 NONE NONE ALL { int nosys(void); } +296 NONE KERN UALL { int load_shared_file(char *filename, caddr_t mfa, u_long mfs, caddr_t *ba, int map_cnt, sf_mapping_t *mappings, int *flags); } +297 NONE KERN UALL { int reset_shared_file(caddr_t *ba, int map_cnt, sf_mapping_t *mappings); } +298 NONE KERN ALL { int new_system_shared_regions(void); } +299 NONE KERN UALL { int shared_region_map_file_np(int fd, uint32_t mappingCount, user_addr_t mappings, user_addr_t slide_p); } +300 NONE KERN UALL { int shared_region_make_private_np(uint32_t rangeCount, user_addr_t ranges); } +301 NONE NONE ALL { int nosys(void); } +302 NONE NONE ALL { int nosys(void); } +303 NONE NONE ALL { int nosys(void); } +304 NONE NONE ALL { int nosys(void); } +305 NONE NONE ALL { int nosys(void); } +306 NONE NONE ALL { int nosys(void); } +307 NONE NONE ALL { int nosys(void); } +308 NONE NONE ALL { int nosys(void); } +309 NONE NONE ALL { int nosys(void); } +310 NONE KERN ALL { int getsid(pid_t pid); } +311 NONE NONE ALL { int settid_with_pid(pid_t pid, int assume); } +312 NONE NONE ALL { int nosys(void); } +313 NONE NONE ALL { int aio_fsync(int op, user_addr_t aiocbp); } +314 NONE NONE ALL { user_ssize_t aio_return(user_addr_t aiocbp); } +315 PRE NONE ALL { int aio_suspend(user_addr_t aiocblist, int nent, user_addr_t timeoutp); } +316 NONE NONE ALL { int aio_cancel(int fd, user_addr_t aiocbp); } +317 NONE NONE ALL { int aio_error(user_addr_t aiocbp); } +318 NONE NONE ALL { int aio_read(user_addr_t aiocbp); } +319 NONE NONE ALL { int aio_write(user_addr_t aiocbp); } +320 NONE NONE ALL { int lio_listio(int mode, user_addr_t aiocblist, int nent, user_addr_t sigp); } +321 NONE NONE ALL { int nosys(void); } +322 NONE NONE ALL { int nosys(void); } +323 NONE NONE ALL { int nosys(void); } +324 NONE NONE ALL { int mlockall(int how); } +325 NONE NONE ALL { int munlockall(int how); } +326 NONE NONE ALL { int nosys(void); } +327 NONE KERN ALL { int issetugid(void); } +328 NONE KERN ALL { int __pthread_kill(int thread_port, int sig); } +329 NONE KERN ALL { int pthread_sigmask(int how, user_addr_t set, user_addr_t oset); } +330 PRE KERN ALL { int sigwait(user_addr_t set, user_addr_t sig); } +331 NONE KERN ALL { int __disable_threadsignal(int value); } +332 NONE NONE ALL { int __pthread_markcancel(int thread_port); } +333 NONE NONE ALL { int __pthread_canceled(int action); } +334 POST NONE ALL { int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, time_t tv_sec, int32_t tv_nsec); } +335 NONE KERN ALL { int utrace(const void *addr, size_t len); } +336 NONE NONE ALL { int nosys(void); } +337 NONE NONE ALL { int nosys(void); } +338 NONE NONE ALL { int nosys(void); } +339 NONE NONE ALL { int nosys(void); } +340 NONE NONE ALL { int nosys(void); } +341 NONE NONE ALL { int nosys(void); } +342 NONE NONE ALL { int nosys(void); } +343 NONE NONE ALL { int nosys(void); } +344 NONE NONE ALL { int nosys(void); } +345 NONE NONE ALL { int nosys(void); } +346 NONE NONE ALL { int nosys(void); } +347 NONE NONE ALL { int nosys(void); } +348 NONE NONE ALL { int nosys(void); } +349 NONE NONE ALL { int nosys(void); } +350 NONE KERN ALL { int audit(void *record, int length); } +351 NONE KERN ALL { int auditon(int cmd, void *data, int length); } +352 NONE KERN ALL { int nosys(void); } +353 NONE KERN ALL { int getauid(au_id_t *auid); } +354 NONE KERN ALL { int setauid(au_id_t *auid); } +355 NONE KERN ALL { int getaudit(struct auditinfo *auditinfo); } +356 NONE KERN ALL { int setaudit(struct auditinfo *auditinfo); } +357 NONE KERN ALL { int getaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } +358 NONE KERN ALL { int setaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } +359 NONE KERN ALL { int auditctl(char *path); } +360 NONE NONE ALL { int nosys(void); } +361 NONE NONE ALL { int nosys(void); } +362 NONE NONE ALL { int kqueue(void); } +363 NONE NONE ALL { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } +364 NONE NONE ALL { int lchown(user_addr_t path, uid_t owner, gid_t group); } +365 NONE NONE ALL { int nosys(void); } +366 NONE NONE ALL { int nosys(void); } +367 NONE NONE ALL { int nosys(void); } +368 NONE NONE ALL { int nosys(void); } +369 NONE NONE ALL { int nosys(void); } diff --git a/bsd/kern/sysctl_init.c b/bsd/kern/sysctl_init.c index 2fc67f072..e50013d38 100644 --- a/bsd/kern/sysctl_init.c +++ b/bsd/kern/sysctl_init.c @@ -21,18 +21,19 @@ */ #include -#include #include #include extern struct sysctl_oid sysctl__debug_bpf_bufsize; extern struct sysctl_oid sysctl__debug_bpf_maxbufsize; +extern struct sysctl_oid sysctl__debug_bpf_maxdevices; +extern struct sysctl_oid sysctl__debug_iokit; #if TUN extern struct sysctl_oid sysctl__debug_if_tun_debug; #endif -#if COMPAT_43 +#if COMPAT_43_TTY #ifndef NeXT extern struct sysctl_oid sysctl__debug_ttydebug; #endif @@ -42,6 +43,10 @@ extern struct sysctl_oid sysctl__hw_machine; extern struct sysctl_oid sysctl__hw_model; extern struct sysctl_oid sysctl__hw_ncpu; extern struct sysctl_oid sysctl__hw_activecpu; +extern struct sysctl_oid sysctl__hw_physicalcpu; +extern struct sysctl_oid sysctl__hw_physicalcpu_max; +extern struct sysctl_oid sysctl__hw_logicalcpu; +extern struct sysctl_oid sysctl__hw_logicalcpu_max; extern struct sysctl_oid sysctl__hw_byteorder; extern struct sysctl_oid sysctl__hw_cputype; extern struct sysctl_oid sysctl__hw_cpusubtype; @@ -84,6 +89,10 @@ extern struct sysctl_oid sysctl__kern_sysv_shmmin; extern struct sysctl_oid sysctl__kern_sysv_shmmni; extern struct sysctl_oid sysctl__kern_sysv_shmseg; extern struct sysctl_oid sysctl__kern_sysv_shmall; +extern struct sysctl_oid sysctl__kern_sysv_ipcs; +extern struct sysctl_oid sysctl__kern_sysv_ipcs_shm; +extern struct sysctl_oid sysctl__kern_sysv_ipcs_sem; +extern struct sysctl_oid sysctl__kern_sysv_ipcs_msg; extern struct sysctl_oid sysctl__kern_sysv_semmni; extern struct sysctl_oid sysctl__kern_sysv_semmns; @@ -93,12 +102,16 @@ extern struct sysctl_oid sysctl__kern_sysv_semume; extern struct sysctl_oid sysctl__kern_dummy; extern struct sysctl_oid sysctl__kern_ipc_maxsockbuf; +extern struct sysctl_oid sysctl__kern_ipc_mbstat; extern struct sysctl_oid sysctl__kern_ipc_nmbclusters; extern struct sysctl_oid sysctl__kern_ipc_sockbuf_waste_factor; extern struct sysctl_oid sysctl__kern_ipc_somaxconn; extern struct sysctl_oid sysctl__kern_ipc_sosendminchain; extern struct sysctl_oid sysctl__kern_ipc_sorecvmincopy; extern struct sysctl_oid sysctl__kern_ipc_maxsockets; +extern struct sysctl_oid sysctl__kern_posix; +extern struct sysctl_oid sysctl__kern_posix_sem; +extern struct sysctl_oid sysctl__kern_posix_sem_max; extern struct sysctl_oid sysctl__kern_sugid_scripts; extern struct sysctl_oid sysctl__net_inet_icmp_icmplim; extern struct sysctl_oid sysctl__net_inet_icmp_maskrepl; @@ -126,6 +139,7 @@ extern struct sysctl_oid sysctl__net_inet_ip_subnets_are_local; extern struct sysctl_oid sysctl__net_inet_ip_keepfaith; extern struct sysctl_oid sysctl__net_inet_ip_maxfragpackets; extern struct sysctl_oid sysctl__net_inet_ip_maxfragsperpacket; +extern struct sysctl_oid sysctl__net_inet_ip_maxfrags; extern struct sysctl_oid sysctl__net_inet_ip_check_interface; extern struct sysctl_oid sysctl__net_inet_ip_check_route_selfref; extern struct sysctl_oid sysctl__net_inet_ip_use_route_genid; @@ -134,17 +148,39 @@ extern struct sysctl_oid sysctl__net_inet_ip_gifttl; #endif #if DUMMYNET -extern struct sysctl_oid sysctl__net_inet_ip_dummynet_calls; -extern struct sysctl_oid sysctl__net_inet_ip_dummynet_debug; -extern struct sysctl_oid sysctl__net_inet_ip_dummynet_idle; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_hash_size; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_curr_time; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_ready_heap; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_extract_heap; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_searches; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_search_steps; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_expire; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_max_chain_len; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_red_lookup_depth; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_red_avg_pkt_size; +extern struct sysctl_oid sysctl__net_inet_ip_dummynet_red_max_pkt_size; extern struct sysctl_oid sysctl__net_inet_ip_dummynet; #endif #if IPFIREWALL && !IPFIREWALL_KEXT +extern struct sysctl_oid sysctl__net_inet_ip_fw_enable; extern struct sysctl_oid sysctl__net_inet_ip_fw_debug; extern struct sysctl_oid sysctl__net_inet_ip_fw_verbose; extern struct sysctl_oid sysctl__net_inet_ip_fw_verbose_limit; extern struct sysctl_oid sysctl__net_inet_ip_fw_one_pass; +extern struct sysctl_oid sysctl__net_inet_ip_fw_autoinc_step; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_buckets; +extern struct sysctl_oid sysctl__net_inet_ip_fw_curr_dyn_buckets; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_count; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_max; +extern struct sysctl_oid sysctl__net_inet_ip_fw_static_count; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_ack_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_syn_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_fin_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_rst_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_udp_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_short_lifetime; +extern struct sysctl_oid sysctl__net_inet_ip_fw_dyn_keepalive; extern struct sysctl_oid sysctl__net_inet_ip_fw; #endif @@ -152,6 +188,7 @@ extern struct sysctl_oid sysctl__net_inet_ip_linklocal; extern struct sysctl_oid sysctl__net_inet_ip_linklocal_stat; extern struct sysctl_oid sysctl__net_inet_ip_linklocal_in; extern struct sysctl_oid sysctl__net_inet_ip_linklocal_in_allowbadttl; +extern struct sysctl_oid sysctl__net_inet_ip_maxchainsent; extern struct sysctl_oid sysctl__net_inet_raw_maxdgram; extern struct sysctl_oid sysctl__net_inet_raw_recvspace; @@ -166,6 +203,7 @@ extern struct sysctl_oid sysctl__net_inet_tcp_keepinit; extern struct sysctl_oid sysctl__net_inet_tcp_keepintvl; extern struct sysctl_oid sysctl__net_inet_tcp_mssdflt; extern struct sysctl_oid sysctl__net_inet_tcp_minmss; +extern struct sysctl_oid sysctl__net_inet_tcp_minmssoverload; extern struct sysctl_oid sysctl__net_inet_tcp_recvspace; extern struct sysctl_oid sysctl__net_inet_tcp_sendspace; extern struct sysctl_oid sysctl__net_inet_tcp_slowlink_wsize; @@ -175,6 +213,7 @@ extern struct sysctl_oid sysctl__net_inet_tcp_path_mtu_discovery; extern struct sysctl_oid sysctl__net_inet_tcp_slowstart_flightsize; extern struct sysctl_oid sysctl__net_inet_tcp_local_slowstart_flightsize; extern struct sysctl_oid sysctl__net_inet_tcp_newreno; +extern struct sysctl_oid sysctl__net_inet_tcp_packetchain; extern struct sysctl_oid sysctl__net_inet_tcp_tcbhashsize; extern struct sysctl_oid sysctl__net_inet_tcp_do_tcpdrain; extern struct sysctl_oid sysctl__net_inet_tcp_icmp_may_rst; @@ -199,6 +238,7 @@ extern struct sysctl_oid sysctl__net_inet_udp_checksum; extern struct sysctl_oid sysctl__net_inet_udp_maxdgram; extern struct sysctl_oid sysctl__net_inet_udp_recvspace; extern struct sysctl_oid sysctl__net_inet_udp_blackhole; +extern struct sysctl_oid sysctl__net_inet_udp_pcbcount; #if NETAT extern struct sysctl_oid sysctl__net_appletalk_debug; @@ -221,7 +261,7 @@ extern struct sysctl_oid sysctl__net_link_ether_inet_maxtries; extern struct sysctl_oid sysctl__net_link_ether_inet_proxyall; extern struct sysctl_oid sysctl__net_link_ether_inet_prune_intvl; extern struct sysctl_oid sysctl__net_link_ether_inet_useloopback; -extern struct sysctl_oid sysctl__net_link_ether_inet_log_arp_wrong_iface; +extern struct sysctl_oid sysctl__net_link_ether_inet_log_arp_warnings; extern struct sysctl_oid sysctl__net_link_ether_inet_apple_hwcksum_tx; extern struct sysctl_oid sysctl__net_link_ether_inet_apple_hwcksum_rx; @@ -229,6 +269,7 @@ extern struct sysctl_oid sysctl__net_link_ether_inet_apple_hwcksum_rx; extern struct sysctl_oid sysctl__net_link_generic_system_ifcount; extern struct sysctl_oid sysctl__net_link_generic; extern struct sysctl_oid sysctl__net_link_generic_ifdata; +extern struct sysctl_oid sysctl__net_link_generic_ifalldata; extern struct sysctl_oid sysctl__net_link_generic_system; #endif @@ -259,10 +300,12 @@ extern struct sysctl_oid sysctl__vfs_nfs_diskless_rootpath; extern struct sysctl_oid sysctl__vfs_nfs_diskless_swappath; extern struct sysctl_oid sysctl__vfs_nfs_nfsstats; #endif +#if NFSCLIENT extern struct sysctl_oid sysctl__vfs_generic_nfs_client_initialdowndelay; extern struct sysctl_oid sysctl__vfs_generic_nfs_client_nextdowndelay; extern struct sysctl_oid sysctl__vfs_generic_nfs_client; extern struct sysctl_oid sysctl__vfs_generic_nfs; +#endif extern struct sysctl_oid sysctl__vfs_generic; extern struct sysctl_oid sysctl__vfs_generic_vfsidlist; @@ -339,6 +382,7 @@ extern struct sysctl_oid sysctl__net_inet6_ip6_forwarding; extern struct sysctl_oid sysctl__net_inet6_ip6_redirect; extern struct sysctl_oid sysctl__net_inet6_ip6_hlim; extern struct sysctl_oid sysctl__net_inet6_ip6_maxfragpackets; +extern struct sysctl_oid sysctl__net_inet6_ip6_maxfrags; extern struct sysctl_oid sysctl__net_inet6_ip6_accept_rtadv; extern struct sysctl_oid sysctl__net_inet6_ip6_keepfaith; extern struct sysctl_oid sysctl__net_inet6_ip6_log_interval; @@ -354,6 +398,7 @@ extern struct sysctl_oid sysctl__net_inet6_ip6_use_tempaddr; extern struct sysctl_oid sysctl__net_inet6_ip6_v6only; extern struct sysctl_oid sysctl__net_inet6_ip6_auto_linklocal; extern struct sysctl_oid sysctl__net_inet6_ip6_rip6stats; +extern struct sysctl_oid sysctl__net_inet6_ip6_mrt6stat; extern struct sysctl_oid sysctl__net_inet6_ip6_rtexpire; extern struct sysctl_oid sysctl__net_inet6_ip6_rtminexpire; extern struct sysctl_oid sysctl__net_inet6_ip6_rtmaxcache; @@ -421,6 +466,7 @@ extern struct sysctl_oid sysctl__net_key_esp_keymin; extern struct sysctl_oid sysctl__net_key_esp_auth; extern struct sysctl_oid sysctl__net_key_ah_keymin; extern struct sysctl_oid sysctl__net_key_natt_keepalive_interval; +extern struct sysctl_oid sysctl__net_key_pfkeystat; #endif @@ -434,41 +480,58 @@ struct sysctl_oid *newsysctl_list[] = &sysctl__vfs, &sysctl__sysctl, &sysctl__debug_bpf_bufsize, - &sysctl__debug_bpf_maxbufsize + &sysctl__debug_bpf_maxbufsize, + &sysctl__debug_bpf_maxdevices, + &sysctl__debug_iokit #if TUN ,&sysctl__debug_if_tun_debug #endif -#if COMPAT_43 +#if COMPAT_43_TTY #ifndef NeXT ,&sysctl__debug_ttydebug #endif #endif + ,&sysctl__kern_posix + ,&sysctl__kern_posix_sem + ,&sysctl__kern_posix_sem_max + ,&sysctl__kern_sysv_shmmax ,&sysctl__kern_sysv_shmmin ,&sysctl__kern_sysv_shmmni ,&sysctl__kern_sysv_shmseg ,&sysctl__kern_sysv_shmall + ,&sysctl__kern_sysv_ipcs + ,&sysctl__kern_sysv_ipcs_shm + ,&sysctl__kern_sysv_ipcs_sem + ,&sysctl__kern_sysv_ipcs_msg ,&sysctl__kern_sysv_semmni ,&sysctl__kern_sysv_semmns ,&sysctl__kern_sysv_semmnu ,&sysctl__kern_sysv_semmsl ,&sysctl__kern_sysv_semume ,&sysctl__kern_dummy + ,&sysctl__kern_ipc_maxsockbuf + ,&sysctl__kern_ipc_mbstat ,&sysctl__kern_ipc_nmbclusters ,&sysctl__kern_ipc_sockbuf_waste_factor ,&sysctl__kern_ipc_somaxconn ,&sysctl__kern_ipc_sosendminchain ,&sysctl__kern_ipc_sorecvmincopy ,&sysctl__kern_ipc_maxsockets + ,&sysctl__kern_sugid_scripts ,&sysctl__hw_machine ,&sysctl__hw_model ,&sysctl__hw_ncpu ,&sysctl__hw_activecpu + ,&sysctl__hw_physicalcpu + ,&sysctl__hw_physicalcpu_max + ,&sysctl__hw_logicalcpu + ,&sysctl__hw_logicalcpu_max ,&sysctl__hw_byteorder ,&sysctl__hw_cputype ,&sysctl__hw_cpusubtype @@ -529,6 +592,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_ip_keepfaith ,&sysctl__net_inet_ip_maxfragpackets ,&sysctl__net_inet_ip_maxfragsperpacket + ,&sysctl__net_inet_ip_maxfrags ,&sysctl__net_inet_ip_check_interface ,&sysctl__net_inet_ip_check_route_selfref ,&sysctl__net_inet_ip_use_route_genid @@ -536,23 +600,46 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_ip_gifttl #endif #if DUMMYNET - ,&sysctl__net_inet_ip_dummynet_calls - ,&sysctl__net_inet_ip_dummynet_debug - ,&sysctl__net_inet_ip_dummynet_idle + ,&sysctl__net_inet_ip_dummynet_hash_size + ,&sysctl__net_inet_ip_dummynet_curr_time + ,&sysctl__net_inet_ip_dummynet_ready_heap + ,&sysctl__net_inet_ip_dummynet_extract_heap + ,&sysctl__net_inet_ip_dummynet_searches + ,&sysctl__net_inet_ip_dummynet_search_steps + ,&sysctl__net_inet_ip_dummynet_expire + ,&sysctl__net_inet_ip_dummynet_max_chain_len + ,&sysctl__net_inet_ip_dummynet_red_lookup_depth + ,&sysctl__net_inet_ip_dummynet_red_avg_pkt_size + ,&sysctl__net_inet_ip_dummynet_red_max_pkt_size ,&sysctl__net_inet_ip_dummynet #endif #if IPFIREWALL && !IPFIREWALL_KEXT + ,&sysctl__net_inet_ip_fw_enable ,&sysctl__net_inet_ip_fw_debug ,&sysctl__net_inet_ip_fw_verbose ,&sysctl__net_inet_ip_fw_verbose_limit ,&sysctl__net_inet_ip_fw_one_pass + ,&sysctl__net_inet_ip_fw_autoinc_step + ,&sysctl__net_inet_ip_fw_dyn_buckets + ,&sysctl__net_inet_ip_fw_curr_dyn_buckets + ,&sysctl__net_inet_ip_fw_dyn_count + ,&sysctl__net_inet_ip_fw_dyn_max + ,&sysctl__net_inet_ip_fw_static_count + ,&sysctl__net_inet_ip_fw_dyn_ack_lifetime + ,&sysctl__net_inet_ip_fw_dyn_syn_lifetime + ,&sysctl__net_inet_ip_fw_dyn_fin_lifetime + ,&sysctl__net_inet_ip_fw_dyn_rst_lifetime + ,&sysctl__net_inet_ip_fw_dyn_udp_lifetime + ,&sysctl__net_inet_ip_fw_dyn_short_lifetime + ,&sysctl__net_inet_ip_fw_dyn_keepalive ,&sysctl__net_inet_ip_fw #endif ,&sysctl__net_inet_ip_linklocal ,&sysctl__net_inet_ip_linklocal_stat ,&sysctl__net_inet_ip_linklocal_in ,&sysctl__net_inet_ip_linklocal_in_allowbadttl + ,&sysctl__net_inet_ip_maxchainsent ,&sysctl__net_inet_raw_maxdgram ,&sysctl__net_inet_raw_recvspace ,&sysctl__net_inet_tcp_always_keepalive @@ -566,6 +653,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_tcp_keepintvl ,&sysctl__net_inet_tcp_mssdflt ,&sysctl__net_inet_tcp_minmss + ,&sysctl__net_inet_tcp_minmssoverload ,&sysctl__net_inet_tcp_recvspace ,&sysctl__net_inet_tcp_sendspace ,&sysctl__net_inet_tcp_slowlink_wsize @@ -575,6 +663,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_tcp_slowstart_flightsize ,&sysctl__net_inet_tcp_local_slowstart_flightsize ,&sysctl__net_inet_tcp_newreno + ,&sysctl__net_inet_tcp_packetchain ,&sysctl__net_inet_tcp_tcbhashsize ,&sysctl__net_inet_tcp_do_tcpdrain ,&sysctl__net_inet_tcp_icmp_may_rst @@ -599,6 +688,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet_udp_maxdgram ,&sysctl__net_inet_udp_recvspace ,&sysctl__net_inet_udp_blackhole + ,&sysctl__net_inet_udp_pcbcount #if NETAT ,&sysctl__net_appletalk_debug @@ -622,13 +712,14 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_link_ether_inet_proxyall ,&sysctl__net_link_ether_inet_prune_intvl ,&sysctl__net_link_ether_inet_useloopback - ,&sysctl__net_link_ether_inet_log_arp_wrong_iface + ,&sysctl__net_link_ether_inet_log_arp_warnings ,&sysctl__net_link_ether_inet_apple_hwcksum_tx ,&sysctl__net_link_ether_inet_apple_hwcksum_rx #if NETMIBS ,&sysctl__net_link_generic_system_ifcount ,&sysctl__net_link_generic ,&sysctl__net_link_generic_ifdata + ,&sysctl__net_link_generic_ifalldata ,&sysctl__net_link_generic_system #endif @@ -664,10 +755,12 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__vfs_generic_vfsidlist ,&sysctl__vfs_generic_ctlbyfsid ,&sysctl__vfs_generic_noremotehang +#if NFSCLIENT ,&sysctl__vfs_generic_nfs ,&sysctl__vfs_generic_nfs_client ,&sysctl__vfs_generic_nfs_client_initialdowndelay ,&sysctl__vfs_generic_nfs_client_nextdowndelay +#endif ,&sysctl__kern_ipc ,&sysctl__kern_sysv ,&sysctl__net_inet @@ -721,6 +814,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet6_ip6_redirect ,&sysctl__net_inet6_ip6_hlim ,&sysctl__net_inet6_ip6_maxfragpackets + ,&sysctl__net_inet6_ip6_maxfrags ,&sysctl__net_inet6_ip6_accept_rtadv ,&sysctl__net_inet6_ip6_keepfaith ,&sysctl__net_inet6_ip6_log_interval @@ -736,6 +830,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_inet6_ip6_v6only ,&sysctl__net_inet6_ip6_auto_linklocal ,&sysctl__net_inet6_ip6_rip6stats + ,&sysctl__net_inet6_ip6_mrt6stat ,&sysctl__net_inet6_ip6_rtexpire ,&sysctl__net_inet6_ip6_rtminexpire ,&sysctl__net_inet6_ip6_rtmaxcache @@ -788,6 +883,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__net_key_esp_auth ,&sysctl__net_key_ah_keymin ,&sysctl__net_key_natt_keepalive_interval + ,&sysctl__net_key_pfkeystat ,&sysctl__net_inet_ipsec ,&sysctl__net_inet_ipsec_stats ,&sysctl__net_inet_ipsec_def_policy diff --git a/bsd/kern/sysv_ipc.c b/bsd/kern/sysv_ipc.c index e39505b70..0a13e17fd 100644 --- a/bsd/kern/sysv_ipc.c +++ b/bsd/kern/sysv_ipc.c @@ -55,6 +55,7 @@ #include #include #include +#include /* @@ -65,23 +66,22 @@ */ int -ipcperm(cred, perm, mode) - struct ucred *cred; - struct ipc_perm *perm; - int mode; +ipcperm(kauth_cred_t cred, struct ipc_perm *perm, int mode) { if (!suser(cred, (u_short *)NULL)) return (0); /* Check for user match. */ - if (cred->cr_uid != perm->cuid && cred->cr_uid != perm->uid) { + if (kauth_cred_getuid(cred) != perm->cuid && kauth_cred_getuid(cred) != perm->uid) { + int is_member; + if (mode & IPC_M) return (EPERM); /* Check for group match. */ mode >>= 3; - if (!groupmember(perm->gid, cred) && - !groupmember(perm->cgid, cred)) + if ((kauth_cred_ismember_gid(cred, perm->gid, &is_member) || !is_member) && + (kauth_cred_ismember_gid(cred, perm->cgid, &is_member) || !is_member)) /* Check for `other' match. */ mode >>= 3; } @@ -90,70 +90,3 @@ ipcperm(cred, perm, mode) return (0); return ((mode & perm->mode) == mode ? 0 : EACCES); } - - - -/* - * SYSVMSG stubs - */ - -int -msgsys(p, uap) - struct proc *p; - /* XXX actually varargs. */ -#if 0 - struct msgsys_args *uap; -#else - void *uap; -#endif -{ - return(EOPNOTSUPP); -}; - -int -msgctl(p, uap) - struct proc *p; -#if 0 - register struct msgctl_args *uap; -#else - void *uap; -#endif -{ - return(EOPNOTSUPP); -}; - -int -msgget(p, uap) - struct proc *p; -#if 0 - register struct msgget_args *uap; -#else - void *uap; -#endif -{ - return(EOPNOTSUPP); -}; - -int -msgsnd(p, uap) - struct proc *p; -#if 0 - register struct msgsnd_args *uap; -#else - void *uap; -#endif -{ - return(EOPNOTSUPP); -}; - -int -msgrcv(p, uap) - struct proc *p; -#if 0 - register struct msgrcv_args *uap; -#else - void *uap; -#endif -{ - return(EOPNOTSUPP); -}; diff --git a/bsd/kern/sysv_msg.c b/bsd/kern/sysv_msg.c index 4226476d2..757edc883 100644 --- a/bsd/kern/sysv_msg.c +++ b/bsd/kern/sysv_msg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,31 +40,29 @@ #include #include -#include #include -#include +#include +#include #include -#include +#include +#include #include -static void msginit __P((void *)); -SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL) +#include +#include +#include +#include +#include + +static void msginit(void *); #define MSG_DEBUG #undef MSG_DEBUG_OK -#ifndef _SYS_SYSPROTO_H_ -struct msgctl_args; -int msgctl __P((struct proc *p, struct msgctl_args *uap)); -struct msgget_args; -int msgget __P((struct proc *p, struct msgget_args *uap)); -struct msgsnd_args; -int msgsnd __P((struct proc *p, struct msgsnd_args *uap)); -struct msgrcv_args; -int msgrcv __P((struct proc *p, struct msgrcv_args *uap)); -#endif -static void msg_freehdr __P((struct msg *msghdr)); +static void msg_freehdr(struct msg *msghdr); + +typedef int sy_call_t(struct proc *, void *, int *); /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *msgcalls[] = { @@ -72,20 +70,117 @@ static sy_call_t *msgcalls[] = { (sy_call_t *)msgsnd, (sy_call_t *)msgrcv }; -static int nfree_msgmaps; /* # of free map entries */ -static short free_msgmaps; /* head of linked list of free map entries */ -static struct msg *free_msghdrs; /* list of free msg headers */ -char *msgpool; /* MSGMAX byte long msg buffer pool */ -struct msgmap *msgmaps; /* MSGSEG msgmap structures */ -struct msg *msghdrs; /* MSGTQL msg headers */ -struct msqid_ds *msqids; /* MSGMNI msqid_ds struct's */ - -void -msginit(dummy) - void *dummy; +static int nfree_msgmaps; /* # of free map entries */ +static short free_msgmaps; /* free map entries list head */ +static struct msg *free_msghdrs; /* list of free msg headers */ +char *msgpool; /* MSGMAX byte long msg buffer pool */ +struct msgmap *msgmaps; /* MSGSEG msgmap structures */ +struct msg *msghdrs; /* MSGTQL msg headers */ +struct user_msqid_ds *msqids; /* MSGMNI user_msqid_ds struct's */ + +static lck_grp_t *sysv_msg_subsys_lck_grp; +static lck_grp_attr_t *sysv_msg_subsys_lck_grp_attr; +static lck_attr_t *sysv_msg_subsys_lck_attr; +static lck_mtx_t sysv_msg_subsys_mutex; + +#define SYSV_MSG_SUBSYS_LOCK() lck_mtx_lock(&sysv_msg_subsys_mutex) +#define SYSV_MSG_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_msg_subsys_mutex) + +void sysv_msg_lock_init(void); + + +#ifdef __APPLE_API_PRIVATE +struct msginfo msginfo = { + MSGMAX, /* = (MSGSSZ*MSGSEG) : max chars in a message */ + MSGMNI, /* = 40 : max message queue identifiers */ + MSGMNB, /* = 2048 : max chars in a queue */ + MSGTQL, /* = 40 : max messages in system */ + MSGSSZ, /* = 8 : size of a message segment (2^N long) */ + MSGSEG /* = 2048 : number of message segments */ +}; +#endif /* __APPLE_API_PRIVATE */ + +/* Initialize the mutex governing access to the SysV msg subsystem */ +__private_extern__ void +sysv_msg_lock_init( void ) +{ + sysv_msg_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(sysv_msg_subsys_lck_grp_attr); + + sysv_msg_subsys_lck_grp = lck_grp_alloc_init("sysv_msg_subsys_lock", sysv_msg_subsys_lck_grp_attr); + + sysv_msg_subsys_lck_attr = lck_attr_alloc_init(); + /* lck_attr_setdebug(sysv_msg_subsys_lck_attr); */ + lck_mtx_init(&sysv_msg_subsys_mutex, sysv_msg_subsys_lck_grp, sysv_msg_subsys_lck_attr); +} + +static __inline__ user_time_t +sysv_msgtime(void) +{ + struct timeval tv; + microtime(&tv); + return (tv.tv_sec); +} + +/* + * NOTE: Source and target may *NOT* overlap! (target is smaller) + */ +static void +msqid_ds_64to32(struct user_msqid_ds *in, struct msqid_ds *out) +{ + out->msg_perm = in->msg_perm; + out->msg_qnum = in->msg_qnum; + out->msg_cbytes = in->msg_cbytes; /* for ipcs */ + out->msg_qbytes = in->msg_qbytes; + out->msg_lspid = in->msg_lspid; + out->msg_lrpid = in->msg_lrpid; + out->msg_stime = in->msg_stime; /* XXX loss of range */ + out->msg_rtime = in->msg_rtime; /* XXX loss of range */ + out->msg_ctime = in->msg_ctime; /* XXX loss of range */ +} + +/* + * NOTE: Source and target may are permitted to overlap! (source is smaller); + * this works because we copy fields in order from the end of the struct to + * the beginning. + */ +static void +msqid_ds_32to64(struct msqid_ds *in, struct user_msqid_ds *out) +{ + out->msg_ctime = in->msg_ctime; + out->msg_rtime = in->msg_rtime; + out->msg_stime = in->msg_stime; + out->msg_lrpid = in->msg_lrpid; + out->msg_lspid = in->msg_lspid; + out->msg_qbytes = in->msg_qbytes; + out->msg_cbytes = in->msg_cbytes; /* for ipcs */ + out->msg_qnum = in->msg_qnum; + out->msg_perm = in->msg_perm; +} + +/* This routine assumes the system is locked prior to calling this routine */ +void +msginit(__unused void *dummy) { + static int initted = 0; register int i; + /* Lazy initialization on first system call; we don't have SYSINIT(). */ + if (initted) + return; + initted = 1; + + msgpool = (char *)_MALLOC(msginfo.msgmax, M_SHM, M_WAITOK); + MALLOC(msgmaps, struct msgmap *, + sizeof(struct msgmap) * msginfo.msgseg, + M_SHM, M_WAITOK); + MALLOC(msghdrs, struct msg *, + sizeof(struct msg) * msginfo.msgtql, + M_SHM, M_WAITOK); + MALLOC(msqids, struct user_msqid_ds *, + sizeof(struct user_msqid_ds) * msginfo.msgmni, + M_SHM, M_WAITOK); + /* * msginfo.msgssz should be a power of two for efficiency reasons. * It is also pretty silly if msginfo.msgssz is less than 8 @@ -140,28 +235,17 @@ msginit(dummy) /* * Entry point for all MSG calls */ -int -msgsys(p, uap) - struct proc *p; /* XXX actually varargs. */ - struct msgsys_args /* { - u_int which; - int a2; - int a3; - int a4; - int a5; - int a6; - } */ *uap; +int +msgsys(struct proc *p, struct msgsys_args *uap, register_t *retval) { - if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0])) return (EINVAL); - return ((*msgcalls[uap->which])(p, &uap->a2)); + return ((*msgcalls[uap->which])(p, &uap->a2, retval)); } static void -msg_freehdr(msghdr) - struct msg *msghdr; +msg_freehdr(struct msg *msghdr) { while (msghdr->msg_ts > 0) { short next; @@ -183,29 +267,23 @@ msg_freehdr(msghdr) free_msghdrs = msghdr; } -#ifndef _SYS_SYSPROTO_H_ -struct msgctl_args { - int msqid; - int cmd; - struct msqid_ds *buf; -}; -#endif - int -msgctl(p, uap) - struct proc *p; - register struct msgctl_args *uap; +msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) { int msqid = uap->msqid; int cmd = uap->cmd; - struct msqid_ds *user_msqptr = uap->buf; - struct ucred *cred = p->p_ucred; + kauth_cred_t cred = kauth_cred_get(); int rval, eval; - struct msqid_ds msqbuf; - register struct msqid_ds *msqptr; + struct user_msqid_ds msqbuf; + struct user_msqid_ds *msqptr; + struct user_msqid_ds umsds; + + SYSV_MSG_SUBSYS_LOCK(); + + msginit( 0); #ifdef MSG_DEBUG_OK - printf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr); + printf("call to msgctl(%d, %d, 0x%qx)\n", msqid, cmd, uap->buf); #endif AUDIT_ARG(svipc_cmd, cmd); @@ -217,7 +295,8 @@ msgctl(p, uap) printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid, msginfo.msgmni); #endif - return(EINVAL); + eval = EINVAL; + goto msgctlout; } msqptr = &msqids[msqid]; @@ -226,13 +305,15 @@ msgctl(p, uap) #ifdef MSG_DEBUG_OK printf("no such msqid\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgctlout; } if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) { #ifdef MSG_DEBUG_OK printf("wrong sequence number\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgctlout; } eval = 0; @@ -244,7 +325,8 @@ msgctl(p, uap) { struct msg *msghdr; if ((eval = ipcperm(cred, &msqptr->msg_perm, IPC_M))) - return(eval); + goto msgctlout; + /* Free the message headers */ msghdr = msqptr->msg_first; while (msghdr != NULL) { @@ -272,15 +354,31 @@ msgctl(p, uap) case IPC_SET: if ((eval = ipcperm(cred, &msqptr->msg_perm, IPC_M))) + goto msgctlout; + + SYSV_MSG_SUBSYS_UNLOCK(); + + if (IS_64BIT_PROCESS(p)) { + eval = copyin(uap->buf, &msqbuf, sizeof(struct user_msqid_ds)); + } else { + eval = copyin(uap->buf, &msqbuf, sizeof(struct msqid_ds)); + /* convert in place; ugly, but safe */ + msqid_ds_32to64((struct msqid_ds *)&msqbuf, &msqbuf); + } + if (eval) return(eval); - if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0) - return(eval); + + SYSV_MSG_SUBSYS_LOCK(); + if (msqbuf.msg_qbytes > msqptr->msg_qbytes) { eval = suser(cred, &p->p_acflag); if (eval) - return(eval); + goto msgctlout; } - if (msqbuf.msg_qbytes > msginfo.msgmnb) { + + + /* compare (msglen_t) value against restrict (int) value */ + if (msqbuf.msg_qbytes > (msglen_t)msginfo.msgmnb) { #ifdef MSG_DEBUG_OK printf("can't increase msg_qbytes beyond %d (truncating)\n", msginfo.msgmnb); @@ -291,14 +389,15 @@ msgctl(p, uap) #ifdef MSG_DEBUG_OK printf("can't reduce msg_qbytes to 0\n"); #endif - return(EINVAL); /* non-standard errno! */ + eval = EINVAL; + goto msgctlout; } msqptr->msg_perm.uid = msqbuf.msg_perm.uid; /* change the owner */ msqptr->msg_perm.gid = msqbuf.msg_perm.gid; /* change the owner */ msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) | (msqbuf.msg_perm.mode & 0777); msqptr->msg_qbytes = msqbuf.msg_qbytes; - msqptr->msg_ctime = time_second; + msqptr->msg_ctime = sysv_msgtime(); break; case IPC_STAT: @@ -306,41 +405,48 @@ msgctl(p, uap) #ifdef MSG_DEBUG_OK printf("requester doesn't have read access\n"); #endif - return(eval); + goto msgctlout; + } + + bcopy(msqptr, &umsds, sizeof(struct user_msqid_ds)); + + SYSV_MSG_SUBSYS_UNLOCK(); + if (IS_64BIT_PROCESS(p)) { + eval = copyout(&umsds, uap->buf, sizeof(struct user_msqid_ds)); + } else { + struct msqid_ds msqid_ds32; + msqid_ds_64to32(&umsds, &msqid_ds32); + eval = copyout(&msqid_ds32, uap->buf, sizeof(struct msqid_ds)); } - eval = copyout((caddr_t)msqptr, user_msqptr, - sizeof(struct msqid_ds)); + SYSV_MSG_SUBSYS_LOCK(); break; default: #ifdef MSG_DEBUG_OK printf("invalid command %d\n", cmd); #endif - return(EINVAL); + eval = EINVAL; + goto msgctlout; } if (eval == 0) - p->p_retval[0] = rval; + *retval = rval; +msgctlout: + SYSV_MSG_SUBSYS_UNLOCK(); return(eval); } -#ifndef _SYS_SYSPROTO_H_ -struct msgget_args { - key_t key; - int msgflg; -}; -#endif - int -msgget(p, uap) - struct proc *p; - register struct msgget_args *uap; +msgget(__unused struct proc *p, struct msgget_args *uap, register_t *retval) { int msqid, eval; int key = uap->key; int msgflg = uap->msgflg; - struct ucred *cred = p->p_ucred; - register struct msqid_ds *msqptr = NULL; + kauth_cred_t cred = kauth_cred_get(); + struct user_msqid_ds *msqptr = NULL; + + SYSV_MSG_SUBSYS_LOCK(); + msginit( 0); #ifdef MSG_DEBUG_OK printf("msgget(0x%x, 0%o)\n", key, msgflg); @@ -361,29 +467,30 @@ msgget(p, uap) #ifdef MSG_DEBUG_OK printf("not exclusive\n"); #endif - return(EEXIST); + eval = EEXIST; + goto msggetout; } if ((eval = ipcperm(cred, &msqptr->msg_perm, msgflg & 0700 ))) { #ifdef MSG_DEBUG_OK printf("requester doesn't have 0%o access\n", msgflg & 0700); #endif - return(eval); + goto msggetout; } goto found; } } #ifdef MSG_DEBUG_OK - printf("need to allocate the msqid_ds\n"); + printf("need to allocate the user_msqid_ds\n"); #endif if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) { for (msqid = 0; msqid < msginfo.msgmni; msqid++) { /* - * Look for an unallocated and unlocked msqid_ds. - * msqid_ds's can be locked by msgsnd or msgrcv while - * they are copying the message in/out. We can't - * re-use the entry until they release it. + * Look for an unallocated and unlocked user_msqid_ds. + * user_msqid_ds's can be locked by msgsnd or msgrcv + * while they are copying the message in/out. We + * can't re-use the entry until they release it. */ msqptr = &msqids[msqid]; if (msqptr->msg_qbytes == 0 && @@ -392,16 +499,17 @@ msgget(p, uap) } if (msqid == msginfo.msgmni) { #ifdef MSG_DEBUG_OK - printf("no more msqid_ds's available\n"); + printf("no more user_msqid_ds's available\n"); #endif - return(ENOSPC); + eval = ENOSPC; + goto msggetout; } #ifdef MSG_DEBUG_OK printf("msqid %d is available\n", msqid); #endif msqptr->msg_perm.key = key; - msqptr->msg_perm.cuid = cred->cr_uid; - msqptr->msg_perm.uid = cred->cr_uid; + msqptr->msg_perm.cuid = kauth_cred_getuid(cred); + msqptr->msg_perm.uid = kauth_cred_getuid(cred); msqptr->msg_perm.cgid = cred->cr_gid; msqptr->msg_perm.gid = cred->cr_gid; msqptr->msg_perm.mode = (msgflg & 0777); @@ -416,47 +524,45 @@ msgget(p, uap) msqptr->msg_lrpid = 0; msqptr->msg_stime = 0; msqptr->msg_rtime = 0; - msqptr->msg_ctime = time_second; + msqptr->msg_ctime = sysv_msgtime(); } else { #ifdef MSG_DEBUG_OK printf("didn't find it and wasn't asked to create it\n"); #endif - return(ENOENT); + eval = ENOENT; + goto msggetout; } found: /* Construct the unique msqid */ - p->p_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm); - AUDIT_ARG(svipc_id, p->p_retval[0]); - return(0); + *retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm); + AUDIT_ARG(svipc_id, *retval); + eval = 0; +msggetout: + SYSV_MSG_SUBSYS_UNLOCK(); + return(eval); } -#ifndef _SYS_SYSPROTO_H_ -struct msgsnd_args { - int msqid; - void *msgp; - size_t msgsz; - int msgflg; -}; -#endif int -msgsnd(p, uap) - struct proc *p; - register struct msgsnd_args *uap; +msgsnd(struct proc *p, struct msgsnd_args *uap, register_t *retval) { int msqid = uap->msqid; - void *user_msgp = uap->msgp; - size_t msgsz = uap->msgsz; + user_addr_t user_msgp = uap->msgp; + size_t msgsz = (size_t)uap->msgsz; /* limit to 4G */ int msgflg = uap->msgflg; int segs_needed, eval; - struct ucred *cred = p->p_ucred; - register struct msqid_ds *msqptr; - register struct msg *msghdr; + struct user_msqid_ds *msqptr; + struct msg *msghdr; short next; + user_long_t msgtype; + + + SYSV_MSG_SUBSYS_LOCK(); + msginit( 0); #ifdef MSG_DEBUG_OK - printf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz, + printf("call to msgsnd(%d, 0x%qx, %d, %d)\n", msqid, user_msgp, msgsz, msgflg); #endif @@ -468,7 +574,8 @@ msgsnd(p, uap) printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid, msginfo.msgmni); #endif - return(EINVAL); + eval = EINVAL; + goto msgsndout; } msqptr = &msqids[msqid]; @@ -476,20 +583,22 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("no such message queue id\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgsndout; } if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) { #ifdef MSG_DEBUG_OK printf("wrong sequence number\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgsndout; } - if ((eval = ipcperm(cred, &msqptr->msg_perm, IPC_W))) { + if ((eval = ipcperm(kauth_cred_get(), &msqptr->msg_perm, IPC_W))) { #ifdef MSG_DEBUG_OK printf("requester doesn't have write access\n"); #endif - return(eval); + goto msgsndout; } segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz; @@ -509,7 +618,8 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("msgsz > msqptr->msg_qbytes\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgsndout; } if (msqptr->msg_perm.mode & MSG_LOCKED) { @@ -544,19 +654,20 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("need more resources but caller doesn't want to wait\n"); #endif - return(EAGAIN); + eval = EAGAIN; + goto msgsndout; } if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) { #ifdef MSG_DEBUG_OK - printf("we don't own the msqid_ds\n"); + printf("we don't own the user_msqid_ds\n"); #endif we_own_it = 0; } else { /* Force later arrivals to wait for our request */ #ifdef MSG_DEBUG_OK - printf("we own the msqid_ds\n"); + printf("we own the user_msqid_ds\n"); #endif msqptr->msg_perm.mode |= MSG_LOCKED; we_own_it = 1; @@ -564,7 +675,7 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("goodnight\n"); #endif - eval = tsleep((caddr_t)msqptr, (PZERO - 4) | PCATCH, + eval = msleep((caddr_t)msqptr, &sysv_msg_subsys_mutex, (PZERO - 4) | PCATCH, "msgwait", 0); #ifdef MSG_DEBUG_OK printf("good morning, eval=%d\n", eval); @@ -575,7 +686,8 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("msgsnd: interrupted system call\n"); #endif - return(EINTR); + eval = EINTR; + goto msgsndout; } /* @@ -588,12 +700,14 @@ msgsnd(p, uap) #endif /* The SVID says to return EIDRM. */ #ifdef EIDRM - return(EIDRM); + eval = EIDRM; #else /* Unfortunately, BSD doesn't define that code yet! */ - return(EINVAL); + eval = EINVAL; #endif + goto msgsndout; + } } else { @@ -619,12 +733,12 @@ msgsnd(p, uap) panic("no more msghdrs"); /* - * Re-lock the msqid_ds in case we page-fault when copying in the - * message + * Re-lock the user_msqid_ds in case we page-fault when copying in + * the message */ if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) - panic("msqid_ds is already locked"); + panic("user_msqid_ds is already locked"); msqptr->msg_perm.mode |= MSG_LOCKED; /* @@ -661,25 +775,36 @@ msgsnd(p, uap) } /* - * Copy in the message type + * Copy in the message type. For a 64 bit process, this is 64 bits, + * but we only ever use the low 32 bits, so the cast is OK. */ + if (IS_64BIT_PROCESS(p)) { + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyin(user_msgp, &msgtype, sizeof(msgtype)); + SYSV_MSG_SUBSYS_LOCK(); + msghdr->msg_type = CAST_DOWN(long,msgtype); + user_msgp = user_msgp + sizeof(msgtype); /* ptr math */ + } else { + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyin(user_msgp, &msghdr->msg_type, sizeof(long)); + SYSV_MSG_SUBSYS_LOCK(); + user_msgp = user_msgp + sizeof(long); /* ptr math */ + } - if ((eval = copyin(user_msgp, &msghdr->msg_type, - sizeof(msghdr->msg_type))) != 0) { + if (eval != 0) { #ifdef MSG_DEBUG_OK printf("error %d copying the message type\n", eval); #endif msg_freehdr(msghdr); msqptr->msg_perm.mode &= ~MSG_LOCKED; wakeup((caddr_t)msqptr); - return(eval); + goto msgsndout; } - user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type); + /* * Validate the message type */ - if (msghdr->msg_type < 1) { msg_freehdr(msghdr); msqptr->msg_perm.mode &= ~MSG_LOCKED; @@ -687,17 +812,18 @@ msgsnd(p, uap) #ifdef MSG_DEBUG_OK printf("mtype (%d) < 1\n", msghdr->msg_type); #endif - return(EINVAL); + eval = EINVAL; + goto msgsndout; } /* * Copy in the message body */ - next = msghdr->msg_spot; while (msgsz > 0) { size_t tlen; - if (msgsz > msginfo.msgssz) + /* compare input (size_t) value against restrict (int) value */ + if (msgsz > (size_t)msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz; @@ -705,31 +831,36 @@ msgsnd(p, uap) panic("next too low #2"); if (next >= msginfo.msgseg) panic("next out of range #2"); - if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz], - tlen)) != 0) { + + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz], tlen); + SYSV_MSG_SUBSYS_LOCK(); + + if (eval != 0) { #ifdef MSG_DEBUG_OK printf("error %d copying in message segment\n", eval); #endif msg_freehdr(msghdr); msqptr->msg_perm.mode &= ~MSG_LOCKED; wakeup((caddr_t)msqptr); - return(eval); + + goto msgsndout; } msgsz -= tlen; - user_msgp = (char *)user_msgp + tlen; + user_msgp = user_msgp + tlen; /* ptr math */ next = msgmaps[next].next; } if (next != -1) panic("didn't use all the msg segments"); /* - * We've got the message. Unlock the msqid_ds. + * We've got the message. Unlock the user_msqid_ds. */ msqptr->msg_perm.mode &= ~MSG_LOCKED; /* - * Make sure that the msqid_ds is still allocated. + * Make sure that the user_msqid_ds is still allocated. */ if (msqptr->msg_qbytes == 0) { @@ -737,11 +868,12 @@ msgsnd(p, uap) wakeup((caddr_t)msqptr); /* The SVID says to return EIDRM. */ #ifdef EIDRM - return(EIDRM); + eval = EIDRM; #else /* Unfortunately, BSD doesn't define that code yet! */ - return(EINVAL); + eval = EINVAL; #endif + goto msgsndout; } /* @@ -760,42 +892,39 @@ msgsnd(p, uap) msqptr->msg_cbytes += msghdr->msg_ts; msqptr->msg_qnum++; msqptr->msg_lspid = p->p_pid; - msqptr->msg_stime = time_second; + msqptr->msg_stime = sysv_msgtime(); wakeup((caddr_t)msqptr); - p->p_retval[0] = 0; - return(0); + *retval = 0; + eval = 0; + +msgsndout: + SYSV_MSG_SUBSYS_UNLOCK(); + return(eval); } -#ifndef _SYS_SYSPROTO_H_ -struct msgrcv_args { - int msqid; - void *msgp; - size_t msgsz; - long msgtyp; - int msgflg; -}; -#endif int -msgrcv(p, uap) - struct proc *p; - register struct msgrcv_args *uap; +msgrcv(struct proc *p, struct msgrcv_args *uap, user_ssize_t *retval) { int msqid = uap->msqid; - void *user_msgp = uap->msgp; - size_t msgsz = uap->msgsz; - long msgtyp = uap->msgtyp; + user_addr_t user_msgp = uap->msgp; + size_t msgsz = (size_t)uap->msgsz; /* limit to 4G */ + long msgtyp = (long)uap->msgtyp; /* limit to 32 bits */ int msgflg = uap->msgflg; size_t len; - struct ucred *cred = p->p_ucred; - register struct msqid_ds *msqptr; - register struct msg *msghdr; + struct user_msqid_ds *msqptr; + struct msg *msghdr; int eval; short next; + user_long_t msgtype; + long msg_type_long; + + SYSV_MSG_SUBSYS_LOCK(); + msginit( 0); #ifdef MSG_DEBUG_OK - printf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp, + printf("call to msgrcv(%d, 0x%qx, %d, %ld, %d)\n", msqid, user_msgp, msgsz, msgtyp, msgflg); #endif @@ -807,7 +936,8 @@ msgrcv(p, uap) printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid, msginfo.msgmni); #endif - return(EINVAL); + eval = EINVAL; + goto msgrcvout; } msqptr = &msqids[msqid]; @@ -815,20 +945,22 @@ msgrcv(p, uap) #ifdef MSG_DEBUG_OK printf("no such message queue id\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgrcvout; } if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) { #ifdef MSG_DEBUG_OK printf("wrong sequence number\n"); #endif - return(EINVAL); + eval = EINVAL; + goto msgrcvout; } - if ((eval = ipcperm(cred, &msqptr->msg_perm, IPC_R))) { + if ((eval = ipcperm(kauth_cred_get(), &msqptr->msg_perm, IPC_R))) { #ifdef MSG_DEBUG_OK printf("requester doesn't have read access\n"); #endif - return(eval); + goto msgrcvout; } msghdr = NULL; @@ -842,7 +974,8 @@ msgrcv(p, uap) printf("first message on the queue is too big (want %d, got %d)\n", msgsz, msghdr->msg_ts); #endif - return(E2BIG); + eval = E2BIG; + goto msgrcvout; } if (msqptr->msg_first == msqptr->msg_last) { msqptr->msg_first = NULL; @@ -881,7 +1014,8 @@ msgrcv(p, uap) printf("requested message on the queue is too big (want %d, got %d)\n", msgsz, msghdr->msg_ts); #endif - return(E2BIG); + eval = E2BIG; + goto msgrcvout; } *prev = msghdr->msg_next; if (msghdr == msqptr->msg_last) { @@ -928,11 +1062,12 @@ msgrcv(p, uap) #endif /* The SVID says to return ENOMSG. */ #ifdef ENOMSG - return(ENOMSG); + eval = ENOMSG; #else /* Unfortunately, BSD doesn't define that code yet! */ - return(EAGAIN); + eval = EAGAIN; #endif + goto msgrcvout; } /* @@ -942,7 +1077,7 @@ msgrcv(p, uap) #ifdef MSG_DEBUG_OK printf("msgrcv: goodnight\n"); #endif - eval = tsleep((caddr_t)msqptr, (PZERO - 4) | PCATCH, "msgwait", + eval = msleep((caddr_t)msqptr, &sysv_msg_subsys_mutex, (PZERO - 4) | PCATCH, "msgwait", 0); #ifdef MSG_DEBUG_OK printf("msgrcv: good morning (eval=%d)\n", eval); @@ -952,7 +1087,8 @@ msgrcv(p, uap) #ifdef MSG_DEBUG_OK printf("msgsnd: interrupted system call\n"); #endif - return(EINTR); + eval = EINTR; + goto msgrcvout; } /* @@ -966,11 +1102,12 @@ msgrcv(p, uap) #endif /* The SVID says to return EIDRM. */ #ifdef EIDRM - return(EIDRM); + eval = EIDRM; #else /* Unfortunately, BSD doesn't define that code yet! */ - return(EINVAL); + eval = EINVAL; #endif + goto msgrcvout; } } @@ -983,7 +1120,7 @@ msgrcv(p, uap) msqptr->msg_cbytes -= msghdr->msg_ts; msqptr->msg_qnum--; msqptr->msg_lrpid = p->p_pid; - msqptr->msg_rtime = time_second; + msqptr->msg_rtime = sysv_msgtime(); /* * Make msgsz the actual amount that we'll be returning. @@ -1002,17 +1139,34 @@ msgrcv(p, uap) * Return the type to the user. */ - eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp, - sizeof(msghdr->msg_type)); + /* + * Copy out the message type. For a 64 bit process, this is 64 bits, + * but we only ever use the low 32 bits, so the cast is OK. + */ + if (IS_64BIT_PROCESS(p)) { + msgtype = msghdr->msg_type; + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyout(&msgtype, user_msgp, sizeof(msgtype)); + SYSV_MSG_SUBSYS_LOCK(); + user_msgp = user_msgp + sizeof(msgtype); /* ptr math */ + } else { + msg_type_long = msghdr->msg_type; + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyout(&msg_type_long, user_msgp, sizeof(long)); + SYSV_MSG_SUBSYS_LOCK(); + user_msgp = user_msgp + sizeof(long); /* ptr math */ + } + if (eval != 0) { #ifdef MSG_DEBUG_OK printf("error (%d) copying out message type\n", eval); #endif msg_freehdr(msghdr); wakeup((caddr_t)msqptr); - return(eval); + + goto msgrcvout; } - user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type); + /* * Return the segments to the user @@ -1022,7 +1176,8 @@ msgrcv(p, uap) for (len = 0; len < msgsz; len += msginfo.msgssz) { size_t tlen; - if (msgsz > msginfo.msgssz) + /* compare input (size_t) value against restrict (int) value */ + if (msgsz > (size_t)msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz; @@ -1030,8 +1185,10 @@ msgrcv(p, uap) panic("next too low #3"); if (next >= msginfo.msgseg) panic("next out of range #3"); - eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz], + SYSV_MSG_SUBSYS_UNLOCK(); + eval = copyout(&msgpool[next * msginfo.msgssz], user_msgp, tlen); + SYSV_MSG_SUBSYS_LOCK(); if (eval != 0) { #ifdef MSG_DEBUG_OK printf("error (%d) copying out message segment\n", @@ -1039,9 +1196,9 @@ msgrcv(p, uap) #endif msg_freehdr(msghdr); wakeup((caddr_t)msqptr); - return(eval); + goto msgrcvout; } - user_msgp = (char *)user_msgp + tlen; + user_msgp = user_msgp + tlen; /* ptr math */ next = msgmaps[next].next; } @@ -1051,6 +1208,121 @@ msgrcv(p, uap) msg_freehdr(msghdr); wakeup((caddr_t)msqptr); - p->p_retval[0] = msgsz; - return(0); + *retval = msgsz; + eval = 0; +msgrcvout: + SYSV_MSG_SUBSYS_UNLOCK(); + return(eval); +} + +static int +IPCS_msg_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error; + int cursor; + union { + struct IPCS_command u32; + struct user_IPCS_command u64; + } ipcs; + struct msqid_ds msqid_ds32; /* post conversion, 32 bit version */ + void *msqid_dsp; + size_t ipcs_sz = sizeof(struct user_IPCS_command); + size_t msqid_ds_sz = sizeof(struct user_msqid_ds); + struct proc *p = current_proc(); + + if (!IS_64BIT_PROCESS(p)) { + ipcs_sz = sizeof(struct IPCS_command); + msqid_ds_sz = sizeof(struct msqid_ds); + } + + /* Copy in the command structure */ + if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) { + return(error); + } + + if (!IS_64BIT_PROCESS(p)) /* convert in place */ + ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data); + + /* Let us version this interface... */ + if (ipcs.u64.ipcs_magic != IPCS_MAGIC) { + return(EINVAL); + } + + SYSV_MSG_SUBSYS_LOCK(); + + switch(ipcs.u64.ipcs_op) { + case IPCS_MSG_CONF: /* Obtain global configuration data */ + if (ipcs.u64.ipcs_datalen != sizeof(struct msginfo)) { + error = ERANGE; + break; + } + if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */ + error = EINVAL; + break; + } + SYSV_MSG_SUBSYS_UNLOCK(); + error = copyout(&msginfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + SYSV_MSG_SUBSYS_LOCK(); + break; + + case IPCS_MSG_ITER: /* Iterate over existing segments */ + /* Not done up top so we can set limits via sysctl (later) */ + msginit( 0); + + cursor = ipcs.u64.ipcs_cursor; + if (cursor < 0 || cursor >= msginfo.msgmni) { + error = ERANGE; + break; + } + if (ipcs.u64.ipcs_datalen != (int)msqid_ds_sz) { + error = ENOMEM; + break; + } + for( ; cursor < msginfo.msgmni; cursor++) { + if (msqids[cursor].msg_qbytes != 0) /* allocated */ + break; + continue; + } + if (cursor == msginfo.msgmni) { + error = ENOENT; + break; + } + + msqid_dsp = &msqids[cursor]; /* default: 64 bit */ + + /* + * If necessary, convert the 64 bit kernel segment + * descriptor to a 32 bit user one. + */ + if (!IS_64BIT_PROCESS(p)) { + msqid_ds_64to32(msqid_dsp, &msqid_ds32); + msqid_dsp = &msqid_ds32; + } + SYSV_MSG_SUBSYS_UNLOCK(); + error = copyout(msqid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + if (!error) { + /* update cursor */ + ipcs.u64.ipcs_cursor = cursor + 1; + + if (!IS_64BIT_PROCESS(p)) /* convert in place */ + ipcs.u32.ipcs_data = CAST_DOWN(void *,ipcs.u64.ipcs_data); + error = SYSCTL_OUT(req, &ipcs, ipcs_sz); + } + SYSV_MSG_SUBSYS_LOCK(); + break; + + default: + error = EINVAL; + break; + } + + SYSV_MSG_SUBSYS_UNLOCK(); + return(error); } + +SYSCTL_DECL(_kern_sysv_ipcs); +SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, msg, CTLFLAG_RW|CTLFLAG_ANYBODY, + 0, 0, IPCS_msg_sysctl, + "S,IPCS_msg_command", + "ipcs msg command interface"); diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index 6764d816a..8f7b26537 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -33,49 +33,27 @@ #include #include #include -#include -#include +#include +#include +#include #include +#include + #include -#include +#include #include +#include +#include +#include #include -#include - -/*#include */ -/*#include */ /* Uncomment this line to see the debugging output */ /* #define SEM_DEBUG */ -/* Macros to deal with the semaphore subsystem lock. The lock currently uses - * the semlock_holder static variable as a mutex. NULL means no lock, any - * value other than NULL means locked. semlock_holder is used because it was - * present in the code before the Darwin port, and for no other reason. - * When the time comes to relax the funnel requirements of the kernel only - * these macros should need to be changed. A spin lock would work well. - */ -/* Aquire the lock */ -#define SUBSYSTEM_LOCK_AQUIRE(p) { sysv_sem_aquiring_threads++; \ - while (semlock_holder != NULL) \ - (void) tsleep((caddr_t)&semlock_holder, (PZERO - 4), "sysvsem", 0); \ - semlock_holder = p; \ - sysv_sem_aquiring_threads--; } - -/* Release the lock */ -#define SUBSYSTEM_LOCK_RELEASE { semlock_holder = NULL; wakeup((caddr_t)&semlock_holder); } - -/* Release the lock and return a value */ -#define UNLOCK_AND_RETURN(ret) { SUBSYSTEM_LOCK_RELEASE; return(ret); } +#define M_SYSVSEM M_TEMP -#define M_SYSVSEM M_SUBPROC - -#if 0 -static void seminit __P((void *)); -SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL) -#endif 0 /* Hard system limits to avoid resource starvation / DOS attacks. * These are not needed if we can make the semaphore pages swappable. @@ -110,27 +88,11 @@ struct seminfo seminfo = { SEMAEM /* adjust on exit max value */ }; -/* A counter so the module unload code knows when there are no more processes using - * the sysv_sem code */ -static long sysv_sem_sleeping_threads = 0; -static long sysv_sem_aquiring_threads = 0; - -struct semctl_args; -int semctl __P((struct proc *p, struct semctl_args *uap, int *)); -struct semget_args; -int semget __P((struct proc *p, struct semget_args *uap, int *)); -struct semop_args; -int semop __P((struct proc *p, struct semop_args *uap, int *)); -struct semconfig_args; -int semconfig __P((struct proc *p, struct semconfig_args *uap, int *)); - -static struct sem_undo *semu_alloc __P((struct proc *p)); -static int semundo_adjust __P((struct proc *p, struct sem_undo **supptr, - int semid, int semnum, int adjval)); -static void semundo_clear __P((int semid, int semnum)); - -typedef int sy_call_t __P((struct proc *, void *, int *)); +static struct sem_undo *semu_alloc(struct proc *p); +static int semundo_adjust(struct proc *p, struct sem_undo **supptr, + int semid, int semnum, int adjval); +static void semundo_clear(int semid, int semnum); /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *semcalls[] = { @@ -138,45 +100,97 @@ static sy_call_t *semcalls[] = { (sy_call_t *)semop, (sy_call_t *)semconfig }; -static int semtot = 0; /* # of used semaphores */ -struct semid_ds *sema = NULL; /* semaphore id pool */ -struct sem *sem = NULL; /* semaphore pool */ -static struct sem_undo *semu_list = NULL; /* list of active undo structures */ -struct sem_undo *semu = NULL; /* semaphore undo pool */ +static int semtot = 0; /* # of used semaphores */ +struct user_semid_ds *sema = NULL; /* semaphore id pool */ +struct sem *sem_pool = NULL; /* semaphore pool */ +static struct sem_undo *semu_list = NULL; /* active undo structures */ +struct sem_undo *semu = NULL; /* semaphore undo pool */ -static struct proc *semlock_holder = NULL; -/* seminit no longer needed. The data structures are grown dynamically */ -void -seminit() +void sysv_sem_lock_init(void); +static lck_grp_t *sysv_sem_subsys_lck_grp; +static lck_grp_attr_t *sysv_sem_subsys_lck_grp_attr; +static lck_attr_t *sysv_sem_subsys_lck_attr; +static lck_mtx_t sysv_sem_subsys_mutex; + +#define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex) +#define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex) + + +__private_extern__ void +sysv_sem_lock_init( void ) +{ + + sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(sysv_sem_subsys_lck_grp_attr); + + sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_sem_subsys_lck_grp_attr); + + sysv_sem_subsys_lck_attr = lck_attr_alloc_init(); + lck_attr_setdebug(sysv_sem_subsys_lck_attr); + lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr); +} + +static __inline__ user_time_t +sysv_semtime(void) +{ + struct timeval tv; + microtime(&tv); + return (tv.tv_sec); +} + +/* + * XXX conversion of internal user_time_t to external tume_t loses + * XXX precision; not an issue for us now, since we are only ever + * XXX setting 32 bits worth of time into it. + * + * pad field contents are not moved correspondingly; contents will be lost + * + * NOTE: Source and target may *NOT* overlap! (target is smaller) + */ +static void +semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out) { + out->sem_perm = in->sem_perm; + out->sem_base = (__int32_t)in->sem_base; + out->sem_nsems = in->sem_nsems; + out->sem_otime = in->sem_otime; /* XXX loses precision */ + out->sem_ctime = in->sem_ctime; /* XXX loses precision */ } +/* + * pad field contents are not moved correspondingly; contents will be lost + * + * NOTE: Source and target may are permitted to overlap! (source is smaller); + * this works because we copy fields in order from the end of the struct to + * the beginning. + * + * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect + * XXX is the same. + */ +static void +semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out) +{ + out->sem_ctime = in->sem_ctime; + out->sem_otime = in->sem_otime; + out->sem_nsems = in->sem_nsems; + out->sem_base = (void *)in->sem_base; + out->sem_perm = in->sem_perm; +} + + /* * Entry point for all SEM calls * * In Darwin this is no longer the entry point. It will be removed after * the code has been tested better. */ -struct semsys_args { - u_int which; - int a2; - int a3; - int a4; - int a5; -}; +/* XXX actually varargs. */ int -semsys(p, uap, retval) - struct proc *p; - /* XXX actually varargs. */ - struct semsys_args *uap; - register_t *retval; +semsys(struct proc *p, struct semsys_args *uap, register_t *retval) { /* The individual calls handling the locking now */ - /*while (semlock_holder != NULL && semlock_holder != p) - (void) tsleep((caddr_t)&semlock_holder, (PZERO - 4), "semsys", 0); - */ if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0])) return (EINVAL); @@ -198,27 +212,18 @@ semsys(p, uap, retval) * in /dev/kmem. */ -#ifndef _SYS_SYSPROTO_H_ -struct semconfig_args { - semconfig_ctl_t flag; -}; -#endif - int -semconfig(p, uap, retval) - struct proc *p; - struct semconfig_args *uap; - register_t *retval; +semconfig(__unused struct proc *p, struct semconfig_args *uap, register_t *retval) { int eval = 0; switch (uap->flag) { case SEM_CONFIG_FREEZE: - SUBSYSTEM_LOCK_AQUIRE(p); + SYSV_SEM_SUBSYS_LOCK(); break; case SEM_CONFIG_THAW: - SUBSYSTEM_LOCK_RELEASE; + SYSV_SEM_SUBSYS_UNLOCK(); break; default: @@ -232,19 +237,26 @@ semconfig(p, uap, retval) return(eval); } -/* Expand the semu array to the given capacity. If the expansion fails +/* + * Expand the semu array to the given capacity. If the expansion fails * return 0, otherwise return 1. * * Assumes we already have the subsystem lock. */ static int -grow_semu_array(newSize) - int newSize; +grow_semu_array(int newSize) { - register int i, j; + register int i; register struct sem_undo *newSemu; + static boolean_t grow_semu_array_in_progress = FALSE; + + while (grow_semu_array_in_progress) { + msleep(&grow_semu_array_in_progress, &sysv_sem_subsys_mutex, + PPAUSE, "grow_semu_array", NULL); + } + if (newSize <= seminfo.semmnu) - return 0; + return 1; if (newSize > limitseminfo.semmnu) /* enforce hard limit */ { #ifdef SEM_DEBUG @@ -259,8 +271,13 @@ grow_semu_array(newSize) #ifdef SEM_DEBUG printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize); #endif - MALLOC(newSemu, struct sem_undo*, sizeof(struct sem_undo)*newSize, + grow_semu_array_in_progress = TRUE; + SYSV_SEM_SUBSYS_UNLOCK(); + MALLOC(newSemu, struct sem_undo*, sizeof(struct sem_undo) * newSize, M_SYSVSEM, M_WAITOK); + SYSV_SEM_SUBSYS_LOCK(); + grow_semu_array_in_progress = FALSE; + wakeup((caddr_t) &grow_semu_array_in_progress); if (NULL == newSemu) { #ifdef SEM_DEBUG @@ -273,8 +290,6 @@ grow_semu_array(newSize) for (i = 0; i < seminfo.semmnu; i++) { newSemu[i] = semu[i]; - for(j = 0; j < SEMUME; j++) /* Is this really needed? */ - newSemu[i].un_ent[j] = semu[i].un_ent[j]; } for (i = seminfo.semmnu; i < newSize; i++) { @@ -300,10 +315,9 @@ grow_semu_array(newSize) * Assumes we already have the subsystem lock. */ static int -grow_sema_array(newSize) - int newSize; +grow_sema_array(int newSize) { - register struct semid_ds *newSema; + register struct user_semid_ds *newSema; register int i; if (newSize <= seminfo.semmni) @@ -322,7 +336,7 @@ grow_sema_array(newSize) #ifdef SEM_DEBUG printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize); #endif - MALLOC(newSema, struct semid_ds*, sizeof(struct semid_ds)*newSize, + MALLOC(newSema, struct user_semid_ds *, sizeof(struct user_semid_ds) * newSize, M_SYSVSEM, M_WAITOK); if (NULL == newSema) { @@ -342,7 +356,7 @@ grow_sema_array(newSize) * this with the existing code, so we wake up the * process and let it do a lot of work to determine the * semaphore set is really not available yet, and then - * sleep on the correct, reallocated semid_ds pointer. + * sleep on the correct, reallocated user_semid_ds pointer. */ if (sema[i].sem_perm.mode & SEM_ALLOC) wakeup((caddr_t)&sema[i]); @@ -350,7 +364,7 @@ grow_sema_array(newSize) for (i = seminfo.semmni; i < newSize; i++) { - newSema[i].sem_base = 0; + newSema[i].sem_base = NULL; newSema[i].sem_perm.mode = 0; } @@ -367,38 +381,38 @@ grow_sema_array(newSize) } /* - * Expand the sem array to the given capacity. If the expansion fails + * Expand the sem_pool array to the given capacity. If the expansion fails * we return 0 (fail), otherwise we return 1 (success). * * Assumes we already hold the subsystem lock. */ static int -grow_sem_array(newSize) - int newSize; +grow_sem_pool(int new_pool_size) { - register struct sem *newSem = NULL; - register int i; + struct sem *new_sem_pool = NULL; + struct sem *sem_free; + int i; - if (newSize < semtot) + if (new_pool_size < semtot) return 0; - if (newSize > limitseminfo.semmns) /* enforce hard limit */ - { + /* enforce hard limit */ + if (new_pool_size > limitseminfo.semmns) { #ifdef SEM_DEBUG printf("semaphore hard limit of %d reached, requested %d\n", - limitseminfo.semmns, newSize); + limitseminfo.semmns, new_pool_size); #endif return 0; } - newSize = (newSize/SEMMNS_INC + 1) * SEMMNS_INC; - newSize = newSize > limitseminfo.semmns ? limitseminfo.semmns : newSize; + + new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC; + new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size; #ifdef SEM_DEBUG - printf("growing sem array from %d to %d\n", seminfo.semmns, newSize); + printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size); #endif - MALLOC(newSem, struct sem*, sizeof(struct sem)*newSize, + MALLOC(new_sem_pool, struct sem *, sizeof(struct sem) * new_pool_size, M_SYSVSEM, M_WAITOK); - if (NULL == newSem) - { + if (NULL == new_sem_pool) { #ifdef SEM_DEBUG printf("allocation failed. no changes made.\n"); #endif @@ -406,26 +420,24 @@ grow_sem_array(newSize) } /* We have our new memory, now copy the old contents over */ - if (sem) + if (sem_pool) for(i = 0; i < seminfo.semmns; i++) - newSem[i] = sem[i]; + new_sem_pool[i] = sem_pool[i]; /* Update our id structures to point to the new semaphores */ - for(i = 0; i < seminfo.semmni; i++) + for(i = 0; i < seminfo.semmni; i++) { if (sema[i].sem_perm.mode & SEM_ALLOC) /* ID in use */ - { - if (newSem > sem) - sema[i].sem_base += newSem - sem; - else - sema[i].sem_base -= sem - newSem; - } + sema[i].sem_base += (new_sem_pool - sem_pool); + } + + sem_free = sem_pool; + sem_pool = new_sem_pool; /* clean up the old array */ - if (sem) - FREE(sem, M_SYSVSEM); + if (sem_free != NULL) + FREE(sem_free, M_SYSVSEM); - sem = newSem; - seminfo.semmns = newSize; + seminfo.semmns = new_pool_size; #ifdef SEM_DEBUG printf("expansion complete\n"); #endif @@ -440,8 +452,7 @@ grow_sem_array(newSize) */ static struct sem_undo * -semu_alloc(p) - struct proc *p; +semu_alloc(struct proc *p) { register int i; register struct sem_undo *suptr; @@ -466,6 +477,7 @@ semu_alloc(p) suptr->un_next = semu_list; semu_list = suptr; suptr->un_cnt = 0; + suptr->un_ent = NULL; suptr->un_proc = p; return(suptr); } @@ -515,16 +527,12 @@ semu_alloc(p) * * Assumes we already hold the subsystem lock. */ - static int -semundo_adjust(p, supptr, semid, semnum, adjval) - register struct proc *p; - struct sem_undo **supptr; - int semid, semnum; - int adjval; +semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, + int semnum, int adjval) { register struct sem_undo *suptr; - register struct undo *sunptr; + register struct undo *sueptr, **suepptr, *new_sueptr; int i; /* Look for and remember the sem_undo if the caller doesn't provide @@ -553,31 +561,75 @@ semundo_adjust(p, supptr, semid, semnum, adjval) * Look for the requested entry and adjust it (delete if adjval becomes * 0). */ - sunptr = &suptr->un_ent[0]; - for (i = 0; i < suptr->un_cnt; i++, sunptr++) { - if (sunptr->un_id != semid || sunptr->un_num != semnum) + new_sueptr = NULL; +lookup: + for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent; + i < suptr->un_cnt; + i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) { + if (sueptr->une_id != semid || sueptr->une_num != semnum) continue; if (adjval == 0) - sunptr->un_adjval = 0; + sueptr->une_adjval = 0; else - sunptr->un_adjval += adjval; - if (sunptr->un_adjval == 0) { + sueptr->une_adjval += adjval; + if (sueptr->une_adjval == 0) { suptr->un_cnt--; - if (i < suptr->un_cnt) - suptr->un_ent[i] = - suptr->un_ent[suptr->un_cnt]; + *suepptr = sueptr->une_next; + FREE(sueptr, M_SYSVSEM); + sueptr = NULL; + } + if (new_sueptr != NULL) { + /* + * We lost the race: free the "undo" entry we allocated + * and use the one that won. + */ + FREE(new_sueptr, M_SYSVSEM); + new_sueptr = NULL; } return(0); } /* Didn't find the right entry - create it */ - if (adjval == 0) + if (adjval == 0) { + if (new_sueptr != NULL) { + FREE(new_sueptr, M_SYSVSEM); + new_sueptr = NULL; + } return(0); - if (suptr->un_cnt != limitseminfo.semume) { - sunptr = &suptr->un_ent[suptr->un_cnt]; + } + + if (new_sueptr != NULL) { + /* + * Use the new "undo" entry we allocated in the previous pass + */ + new_sueptr->une_next = suptr->un_ent; + suptr->un_ent = new_sueptr; suptr->un_cnt++; - sunptr->un_adjval = adjval; - sunptr->un_id = semid; sunptr->un_num = semnum; + new_sueptr->une_adjval = adjval; + new_sueptr->une_id = semid; + new_sueptr->une_num = semnum; + return 0; + } + + if (suptr->un_cnt != limitseminfo.semume) { + SYSV_SEM_SUBSYS_UNLOCK(); + /* + * Unlocking opens the door to race conditions. Someone else + * could be trying to allocate the same thing at this point, + * so we'll have to check if we lost the race. + */ + MALLOC(new_sueptr, struct undo *, sizeof (struct undo), + M_SYSVSEM, M_WAITOK); + SYSV_SEM_SUBSYS_LOCK(); + if (new_sueptr == NULL) { + return ENOMEM; + } + /* + * There might be other threads doing the same thing for this + * process, so check again if an "undo" entry exists for that + * semaphore. + */ + goto lookup; } else return(EINVAL); return(0); @@ -586,94 +638,96 @@ semundo_adjust(p, supptr, semid, semnum, adjval) /* Assumes we already hold the subsystem lock. */ static void -semundo_clear(semid, semnum) - int semid, semnum; +semundo_clear(int semid, int semnum) { - register struct sem_undo *suptr; + struct sem_undo *suptr; for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) { - register struct undo *sunptr = &suptr->un_ent[0]; - register int i = 0; + struct undo *sueptr; + struct undo **suepptr; + int i = 0; + sueptr = suptr->un_ent; + suepptr = &suptr->un_ent; while (i < suptr->un_cnt) { - if (sunptr->un_id == semid) { - if (semnum == -1 || sunptr->un_num == semnum) { + if (sueptr->une_id == semid) { + if (semnum == -1 || sueptr->une_num == semnum) { suptr->un_cnt--; - if (i < suptr->un_cnt) { - suptr->un_ent[i] = - suptr->un_ent[suptr->un_cnt]; - continue; - } + *suepptr = sueptr->une_next; + FREE(sueptr, M_SYSVSEM); + sueptr = *suepptr; + continue; } if (semnum != -1) break; } - i++, sunptr++; + i++; + suepptr = &sueptr->une_next; + sueptr = sueptr->une_next; } } } /* - * Note that the user-mode half of this passes a union, not a pointer + * Note that the user-mode half of this passes a union coerced to a + * user_addr_t. The union contains either an int or a pointer, and + * so we have to coerce it back, variant on whether the calling + * process is 64 bit or not. The coercion works for the 'val' element + * because the alignment is the same in user and kernel space. */ -#ifndef _SYS_SYSPROTO_H_ -struct semctl_args { - int semid; - int semnum; - int cmd; - union semun arg; -}; -#endif - int -semctl(p, uap, retval) - struct proc *p; - register struct semctl_args *uap; - register_t *retval; +semctl(struct proc *p, struct semctl_args *uap, register_t *retval) { int semid = uap->semid; int semnum = uap->semnum; int cmd = uap->cmd; - union semun arg = uap->arg; - union semun real_arg; - struct ucred *cred = p->p_ucred; + user_semun_t user_arg = (user_semun_t)uap->arg; + kauth_cred_t cred = kauth_cred_get(); int i, rval, eval; - struct semid_ds sbuf; - register struct semid_ds *semaptr; + struct user_semid_ds sbuf; + struct user_semid_ds *semaptr; + struct user_semid_ds uds; + AUDIT_ARG(svipc_cmd, cmd); AUDIT_ARG(svipc_id, semid); - SUBSYSTEM_LOCK_AQUIRE(p); + + SYSV_SEM_SUBSYS_LOCK(); + #ifdef SEM_DEBUG - printf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); + printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg); #endif semid = IPCID_TO_IX(semid); - if (semid < 0 || semid >= seminfo.semmni) -{ + + if (semid < 0 || semid >= seminfo.semmni) { #ifdef SEM_DEBUG printf("Invalid semid\n"); #endif - UNLOCK_AND_RETURN(EINVAL); -} + eval = EINVAL; + goto semctlout; + } semaptr = &sema[semid]; if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || - semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) - UNLOCK_AND_RETURN(EINVAL); + semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { + eval = EINVAL; + goto semctlout; + } eval = 0; rval = 0; switch (cmd) { case IPC_RMID: - if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) - UNLOCK_AND_RETURN(eval); - semaptr->sem_perm.cuid = cred->cr_uid; - semaptr->sem_perm.uid = cred->cr_uid; + if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) + goto semctlout; + + semaptr->sem_perm.cuid = kauth_cred_getuid(cred); + semaptr->sem_perm.uid = kauth_cred_getuid(cred); semtot -= semaptr->sem_nsems; - for (i = semaptr->sem_base - sem; i < semtot; i++) - sem[i] = sem[i + semaptr->sem_nsems]; + for (i = semaptr->sem_base - sem_pool; i < semtot; i++) + sem_pool[i] = sem_pool[i + semaptr->sem_nsems]; for (i = 0; i < seminfo.semmni; i++) { if ((sema[i].sem_perm.mode & SEM_ALLOC) && sema[i].sem_base > semaptr->sem_base) @@ -686,60 +740,84 @@ semctl(p, uap, retval) case IPC_SET: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) - UNLOCK_AND_RETURN(eval); - /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - UNLOCK_AND_RETURN(eval);*/ - if ((eval = copyin(arg.buf, (caddr_t)&sbuf, - sizeof(sbuf))) != 0) - UNLOCK_AND_RETURN(eval); + goto semctlout; + + SYSV_SEM_SUBSYS_UNLOCK(); + + if (IS_64BIT_PROCESS(p)) { + eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds)); + } else { + eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds)); + /* convert in place; ugly, but safe */ + semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf); + } + + if (eval != 0) + return(eval); + + SYSV_SEM_SUBSYS_LOCK(); + semaptr->sem_perm.uid = sbuf.sem_perm.uid; semaptr->sem_perm.gid = sbuf.sem_perm.gid; semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) | (sbuf.sem_perm.mode & 0777); - semaptr->sem_ctime = time_second; + semaptr->sem_ctime = sysv_semtime(); break; case IPC_STAT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - UNLOCK_AND_RETURN(eval);*/ - eval = copyout((caddr_t)semaptr, arg.buf, - sizeof(struct semid_ds)); + goto semctlout; + bcopy(semaptr, &uds, sizeof(struct user_semid_ds)); + SYSV_SEM_SUBSYS_UNLOCK(); + if (IS_64BIT_PROCESS(p)) { + eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds)); + } else { + struct semid_ds semid_ds32; + semid_ds_64to32(&uds, &semid_ds32); + eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds)); + } + SYSV_SEM_SUBSYS_LOCK(); break; case GETNCNT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - if (semnum < 0 || semnum >= semaptr->sem_nsems) - UNLOCK_AND_RETURN(EINVAL); + goto semctlout; + if (semnum < 0 || semnum >= semaptr->sem_nsems) { + eval = EINVAL; + goto semctlout; + } rval = semaptr->sem_base[semnum].semncnt; break; case GETPID: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - if (semnum < 0 || semnum >= semaptr->sem_nsems) - UNLOCK_AND_RETURN(EINVAL); + goto semctlout; + if (semnum < 0 || semnum >= semaptr->sem_nsems) { + eval = EINVAL; + goto semctlout; + } rval = semaptr->sem_base[semnum].sempid; break; case GETVAL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - if (semnum < 0 || semnum >= semaptr->sem_nsems) - UNLOCK_AND_RETURN(EINVAL); + goto semctlout; + if (semnum < 0 || semnum >= semaptr->sem_nsems) { + eval = EINVAL; + goto semctlout; + } rval = semaptr->sem_base[semnum].semval; break; case GETALL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - UNLOCK_AND_RETURN(eval);*/ + goto semctlout; +/* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */ for (i = 0; i < semaptr->sem_nsems; i++) { + /* XXX could be done in one go... */ eval = copyout((caddr_t)&semaptr->sem_base[i].semval, - &arg.array[i], sizeof(arg.array[0])); + user_arg.array + (i * sizeof(unsigned short)), + sizeof(unsigned short)); if (eval != 0) break; } @@ -747,9 +825,11 @@ semctl(p, uap, retval) case GETZCNT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - UNLOCK_AND_RETURN(eval); - if (semnum < 0 || semnum >= semaptr->sem_nsems) - UNLOCK_AND_RETURN(EINVAL); + goto semctlout; + if (semnum < 0 || semnum >= semaptr->sem_nsems) { + eval = EINVAL; + goto semctlout; + } rval = semaptr->sem_base[semnum].semzcnt; break; @@ -759,36 +839,35 @@ semctl(p, uap, retval) #ifdef SEM_DEBUG printf("Invalid credentials for write\n"); #endif - UNLOCK_AND_RETURN(eval); + goto semctlout; } if (semnum < 0 || semnum >= semaptr->sem_nsems) { #ifdef SEM_DEBUG printf("Invalid number out of range for set\n"); #endif - UNLOCK_AND_RETURN(EINVAL); + eval = EINVAL; + goto semctlout; } - /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - { -#ifdef SEM_DEBUG - printf("Error during value copyin\n"); -#endif - UNLOCK_AND_RETURN(eval); - }*/ - semaptr->sem_base[semnum].semval = arg.val; + /* + * Cast down a pointer instead of using 'val' member directly + * to avoid introducing endieness and a pad field into the + * header file. Ugly, but it works. + */ + semaptr->sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf); semundo_clear(semid, semnum); wakeup((caddr_t)semaptr); break; case SETALL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W))) - UNLOCK_AND_RETURN(eval); - /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - UNLOCK_AND_RETURN(eval);*/ + goto semctlout; +/*** XXXXXXXXXXXX TBD ********/ for (i = 0; i < semaptr->sem_nsems; i++) { - eval = copyin(&arg.array[i], + /* XXX could be done in one go... */ + eval = copyin(user_arg.array + (i * sizeof(unsigned short)), (caddr_t)&semaptr->sem_base[i].semval, - sizeof(arg.array[0])); + sizeof(unsigned short)); if (eval != 0) break; } @@ -797,41 +876,36 @@ semctl(p, uap, retval) break; default: - UNLOCK_AND_RETURN(EINVAL); + eval = EINVAL; + goto semctlout; } if (eval == 0) *retval = rval; - UNLOCK_AND_RETURN(eval); +semctlout: + SYSV_SEM_SUBSYS_UNLOCK(); + return(eval); } -#ifndef _SYS_SYSPROTO_H_ -struct semget_args { - key_t key; - int nsems; - int semflg; -}; -#endif - int -semget(p, uap, retval) - struct proc *p; - register struct semget_args *uap; - register_t *retval; +semget(__unused struct proc *p, struct semget_args *uap, register_t *retval) { int semid, eval; int key = uap->key; int nsems = uap->nsems; int semflg = uap->semflg; - struct ucred *cred = p->p_ucred; + kauth_cred_t cred = kauth_cred_get(); - SUBSYSTEM_LOCK_AQUIRE(p); #ifdef SEM_DEBUG if (key != IPC_PRIVATE) printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); else printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg); #endif + + + SYSV_SEM_SUBSYS_LOCK(); + if (key != IPC_PRIVATE) { for (semid = 0; semid < seminfo.semmni; semid++) { @@ -845,18 +919,20 @@ semget(p, uap, retval) #endif if ((eval = ipcperm(cred, &sema[semid].sem_perm, semflg & 0700))) - UNLOCK_AND_RETURN(eval); - if (nsems > 0 && sema[semid].sem_nsems < nsems) { + goto semgetout; + if (nsems < 0 || sema[semid].sem_nsems < nsems) { #ifdef SEM_DEBUG printf("too small\n"); #endif - UNLOCK_AND_RETURN(EINVAL); + eval = EINVAL; + goto semgetout; } if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { #ifdef SEM_DEBUG printf("not exclusive\n"); #endif - UNLOCK_AND_RETURN(EEXIST); + eval = EEXIST; + goto semgetout; } goto found; } @@ -871,19 +947,20 @@ semget(p, uap, retval) printf("nsems out of range (0<%d<=%d)\n", nsems, seminfo.semmsl); #endif - UNLOCK_AND_RETURN(EINVAL); + eval = EINVAL; + goto semgetout; } if (nsems > seminfo.semmns - semtot) { #ifdef SEM_DEBUG printf("not enough semaphores left (need %d, got %d)\n", nsems, seminfo.semmns - semtot); #endif - if (!grow_sem_array(semtot + nsems)) - { + if (!grow_sem_pool(semtot + nsems)) { #ifdef SEM_DEBUG printf("failed to grow the sem array\n"); #endif - UNLOCK_AND_RETURN(ENOSPC); + eval = ENOSPC; + goto semgetout; } } for (semid = 0; semid < seminfo.semmni; semid++) { @@ -899,15 +976,16 @@ semget(p, uap, retval) #ifdef SEM_DEBUG printf("failed to grow sema array\n"); #endif - UNLOCK_AND_RETURN(ENOSPC); + eval = ENOSPC; + goto semgetout; } } #ifdef SEM_DEBUG printf("semid %d is available\n", semid); #endif sema[semid].sem_perm.key = key; - sema[semid].sem_perm.cuid = cred->cr_uid; - sema[semid].sem_perm.uid = cred->cr_uid; + sema[semid].sem_perm.cuid = kauth_cred_getuid(cred); + sema[semid].sem_perm.uid = kauth_cred_getuid(cred); sema[semid].sem_perm.cgid = cred->cr_gid; sema[semid].sem_perm.gid = cred->cr_gid; sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC; @@ -915,20 +993,21 @@ semget(p, uap, retval) (sema[semid].sem_perm.seq + 1) & 0x7fff; sema[semid].sem_nsems = nsems; sema[semid].sem_otime = 0; - sema[semid].sem_ctime = time_second; - sema[semid].sem_base = &sem[semtot]; + sema[semid].sem_ctime = sysv_semtime(); + sema[semid].sem_base = &sem_pool[semtot]; semtot += nsems; bzero(sema[semid].sem_base, sizeof(sema[semid].sem_base[0])*nsems); #ifdef SEM_DEBUG printf("sembase = 0x%x, next = 0x%x\n", sema[semid].sem_base, - &sem[semtot]); + &sem_pool[semtot]); #endif } else { #ifdef SEM_DEBUG printf("didn't find it and wasn't asked to create it\n"); #endif - UNLOCK_AND_RETURN(ENOENT); + eval = ENOENT; + goto semgetout; } found: @@ -937,72 +1016,73 @@ found: #ifdef SEM_DEBUG printf("semget is done, returning %d\n", *retval); #endif - SUBSYSTEM_LOCK_RELEASE; - return(0); -} + eval = 0; -#ifndef _SYS_SYSPROTO_H_ -struct semop_args { - int semid; - struct sembuf *sops; - int nsops; -}; -#endif +semgetout: + SYSV_SEM_SUBSYS_UNLOCK(); + return(eval); +} int -semop(p, uap, retval) - struct proc *p; - register struct semop_args *uap; - register_t *retval; +semop(struct proc *p, struct semop_args *uap, register_t *retval) { int semid = uap->semid; int nsops = uap->nsops; struct sembuf sops[MAX_SOPS]; - register struct semid_ds *semaptr; - register struct sembuf *sopptr; - register struct sem *semptr; + register struct user_semid_ds *semaptr; + register struct sembuf *sopptr = NULL; /* protected by 'semptr' */ + register struct sem *semptr = NULL; /* protected by 'if' */ struct sem_undo *suptr = NULL; - struct ucred *cred = p->p_ucred; int i, j, eval; int do_wakeup, do_undos; AUDIT_ARG(svipc_id, uap->semid); - SUBSYSTEM_LOCK_AQUIRE(p); + + SYSV_SEM_SUBSYS_LOCK(); + #ifdef SEM_DEBUG printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops); #endif semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ - if (semid < 0 || semid >= seminfo.semmni) - UNLOCK_AND_RETURN(EINVAL); + if (semid < 0 || semid >= seminfo.semmni) { + eval = EINVAL; + goto semopout; + } semaptr = &sema[semid]; - if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) - UNLOCK_AND_RETURN(EINVAL); - if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) - UNLOCK_AND_RETURN(EINVAL); + if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) { + eval = EINVAL; + goto semopout; + } + if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { + eval = EINVAL; + goto semopout; + } - if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W))) { + if ((eval = ipcperm(kauth_cred_get(), &semaptr->sem_perm, IPC_W))) { #ifdef SEM_DEBUG printf("eval = %d from ipaccess\n", eval); #endif - UNLOCK_AND_RETURN(eval); + goto semopout; } if (nsops < 0 || nsops > MAX_SOPS) { #ifdef SEM_DEBUG printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops); #endif - UNLOCK_AND_RETURN(E2BIG); + eval = E2BIG; + goto semopout; } - if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { + /* OK for LP64, since sizeof(struct sembuf) is currently invariant */ + if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) { #ifdef SEM_DEBUG printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval, - uap->sops, &sops, nsops * sizeof(sops[0])); + uap->sops, &sops, nsops * sizeof(struct sembuf)); #endif - UNLOCK_AND_RETURN(eval); + goto semopout; } /* @@ -1022,8 +1102,10 @@ semop(p, uap, retval) for (i = 0; i < nsops; i++) { sopptr = &sops[i]; - if (sopptr->sem_num >= semaptr->sem_nsems) - UNLOCK_AND_RETURN(EFBIG); + if (sopptr->sem_num >= semaptr->sem_nsems) { + eval = EFBIG; + goto semopout; + } semptr = &semaptr->sem_base[sopptr->sem_num]; @@ -1084,8 +1166,10 @@ semop(p, uap, retval) * If the request that we couldn't satisfy has the * NOWAIT flag set then return with EAGAIN. */ - if (sopptr->sem_flg & IPC_NOWAIT) - UNLOCK_AND_RETURN(EAGAIN); + if (sopptr->sem_flg & IPC_NOWAIT) { + eval = EAGAIN; + goto semopout; + } if (sopptr->sem_op == 0) semptr->semzcnt++; @@ -1100,23 +1184,23 @@ semop(p, uap, retval) * waiting for. We will get the lock back after we * wake up. */ - SUBSYSTEM_LOCK_RELEASE; - sysv_sem_sleeping_threads++; - eval = tsleep((caddr_t)semaptr, (PZERO - 4) | PCATCH, + eval = msleep((caddr_t)semaptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH, "semwait", 0); - sysv_sem_sleeping_threads--; #ifdef SEM_DEBUG printf("semop: good morning (eval=%d)!\n", eval); #endif - /* There is no need to get the lock if we are just - * going to return without performing more semaphore - * operations. - */ - if (eval != 0) - return(EINTR); + /* we need the lock here due to mods on semptr */ + if (eval != 0) { + if (sopptr->sem_op == 0) + semptr->semzcnt--; + else + semptr->semncnt--; + + eval = EINTR; + goto semopout; + } - SUBSYSTEM_LOCK_AQUIRE(p); /* Get it back */ suptr = NULL; /* sem_undo may have been reallocated */ semaptr = &sema[semid]; /* sema may have been reallocated */ @@ -1132,11 +1216,16 @@ semop(p, uap, retval) semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { /* The man page says to return EIDRM. */ /* Unfortunately, BSD doesn't define that code! */ + if (sopptr->sem_op == 0) + semptr->semzcnt--; + else + semptr->semncnt--; #ifdef EIDRM - UNLOCK_AND_RETURN(EIDRM); + eval = EIDRM; #else - UNLOCK_AND_RETURN(EINVAL); + eval = EINVAL; #endif + goto semopout; } /* @@ -1201,7 +1290,7 @@ done: #ifdef SEM_DEBUG printf("eval = %d from semundo_adjust\n", eval); #endif - UNLOCK_AND_RETURN(eval); + goto semopout; } /* loop through the sops */ } /* if (do_undos) */ @@ -1212,16 +1301,6 @@ done: semptr->sempid = p->p_pid; } - /* Do a wakeup if any semaphore was up'd. - * we will release our lock on the semaphore subsystem before - * we wakeup other processes to prevent a little thrashing. - * Note that this is fine because we are done using the - * semaphore structures at this point in time. We only use - * a local variable pointer value, and the retval - * parameter. - * Note 2: Future use of sem_wakeup may reqiure the lock. - */ - SUBSYSTEM_LOCK_RELEASE; if (do_wakeup) { #ifdef SEM_DEBUG printf("semop: doing wakeup\n"); @@ -1239,7 +1318,10 @@ done: printf("semop: done\n"); #endif *retval = 0; - return(0); + eval = 0; +semopout: + SYSV_SEM_SUBSYS_UNLOCK(); + return(eval); } /* @@ -1247,8 +1329,7 @@ done: * semaphores. */ void -semexit(p) - struct proc *p; +semexit(struct proc *p) { register struct sem_undo *suptr; register struct sem_undo **supptr; @@ -1258,10 +1339,11 @@ semexit(p) * anything to undo, but we need the lock to prevent * dynamic memory race conditions. */ - SUBSYSTEM_LOCK_AQUIRE(p); - if (!sem) + SYSV_SEM_SUBSYS_LOCK(); + + if (!sem_pool) { - SUBSYSTEM_LOCK_RELEASE; + SYSV_SEM_SUBSYS_UNLOCK(); return; } did_something = 0; @@ -1289,13 +1371,17 @@ semexit(p) * If there are any active undo elements then process them. */ if (suptr->un_cnt > 0) { - int ix; + while (suptr->un_ent != NULL) { + struct undo *sueptr; + int semid; + int semnum; + int adjval; + struct user_semid_ds *semaptr; - for (ix = 0; ix < suptr->un_cnt; ix++) { - int semid = suptr->un_ent[ix].un_id; - int semnum = suptr->un_ent[ix].un_num; - int adjval = suptr->un_ent[ix].un_adjval; - struct semid_ds *semaptr; + sueptr = suptr->un_ent; + semid = sueptr->une_id; + semnum = sueptr->une_num; + adjval = sueptr->une_adjval; semaptr = &sema[semid]; if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) @@ -1305,10 +1391,11 @@ semexit(p) #ifdef SEM_DEBUG printf("semexit: %08x id=%d num=%d(adj=%d) ; sem=%d\n", - suptr->un_proc, suptr->un_ent[ix].un_id, - suptr->un_ent[ix].un_num, - suptr->un_ent[ix].un_adjval, - semaptr->sem_base[semnum].semval); + suptr->un_proc, + semid, + semnum, + adjval, + semaptr->sem_base[semnum].semval); #endif if (adjval < 0) { @@ -1336,6 +1423,10 @@ semexit(p) #ifdef SEM_DEBUG printf("semexit: back from wakeup\n"); #endif + suptr->un_cnt--; + suptr->un_ent = sueptr->une_next; + FREE(sueptr, M_SYSVSEM); + sueptr = NULL; } } @@ -1368,32 +1459,36 @@ unlock: * same leaky semaphore problem. */ - SUBSYSTEM_LOCK_RELEASE; + SYSV_SEM_SUBSYS_UNLOCK(); } + + /* (struct sysctl_oid *oidp, void *arg1, int arg2, \ struct sysctl_req *req) */ static int -sysctl_seminfo SYSCTL_HANDLER_ARGS +sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1, + __unused int arg2, struct sysctl_req *req) { int error = 0; error = SYSCTL_OUT(req, arg1, sizeof(int)); - if (error || !req->newptr) + if (error || req->newptr == USER_ADDR_NULL) return(error); - SUBSYSTEM_LOCK_AQUIRE(current_proc()); + SYSV_SEM_SUBSYS_LOCK(); + /* Set the values only if shared memory is not initialised */ - if ((sem == (struct sem *) 0) && - (sema == (struct semid_ds *) 0) && - (semu == (struct semid_ds *) 0) && - (semu_list == (struct sem_undo *) 0)) { - if (error = SYSCTL_IN(req, arg1, sizeof(int))) { + if ((sem_pool == NULL) && + (sema == NULL) && + (semu == NULL) && + (semu_list == NULL)) { + if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) { goto out; } } else error = EINVAL; out: - SUBSYSTEM_LOCK_RELEASE; + SYSV_SEM_SUBSYS_UNLOCK(); return(error); } @@ -1416,3 +1511,103 @@ SYSCTL_PROC(_kern_sysv, KSYSV_SEMUNE, semume, CTLTYPE_INT | CTLFLAG_RW, &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume"); +static int +IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error; + int cursor; + union { + struct IPCS_command u32; + struct user_IPCS_command u64; + } ipcs; + struct semid_ds semid_ds32; /* post conversion, 32 bit version */ + void *semid_dsp; + size_t ipcs_sz = sizeof(struct user_IPCS_command); + size_t semid_ds_sz = sizeof(struct user_semid_ds); + struct proc *p = current_proc(); + + /* Copy in the command structure */ + if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) { + return(error); + } + + if (!IS_64BIT_PROCESS(p)) { + ipcs_sz = sizeof(struct IPCS_command); + semid_ds_sz = sizeof(struct semid_ds); + } + + /* Let us version this interface... */ + if (ipcs.u64.ipcs_magic != IPCS_MAGIC) { + return(EINVAL); + } + + SYSV_SEM_SUBSYS_LOCK(); + switch(ipcs.u64.ipcs_op) { + case IPCS_SEM_CONF: /* Obtain global configuration data */ + if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) { + error = ERANGE; + break; + } + if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */ + error = EINVAL; + break; + } + SYSV_SEM_SUBSYS_UNLOCK(); + error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + SYSV_SEM_SUBSYS_LOCK(); + break; + + case IPCS_SEM_ITER: /* Iterate over existing segments */ + cursor = ipcs.u64.ipcs_cursor; + if (cursor < 0 || cursor >= seminfo.semmni) { + error = ERANGE; + break; + } + if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) { + error = EINVAL; + break; + } + for( ; cursor < seminfo.semmni; cursor++) { + if (sema[cursor].sem_perm.mode & SEM_ALLOC) + break; + continue; + } + if (cursor == seminfo.semmni) { + error = ENOENT; + break; + } + + semid_dsp = &sema[cursor]; /* default: 64 bit */ + + /* + * If necessary, convert the 64 bit kernel segment + * descriptor to a 32 bit user one. + */ + if (!IS_64BIT_PROCESS(p)) { + semid_ds_64to32(semid_dsp, &semid_ds32); + semid_dsp = &semid_ds32; + } + SYSV_SEM_SUBSYS_UNLOCK(); + error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + if (!error) { + /* update cursor */ + ipcs.u64.ipcs_cursor = cursor + 1; + error = SYSCTL_OUT(req, &ipcs, ipcs_sz); + } + SYSV_SEM_SUBSYS_LOCK(); + break; + + default: + error = EINVAL; + break; + } + SYSV_SEM_SUBSYS_UNLOCK(); + return(error); +} + +SYSCTL_DECL(_kern_sysv_ipcs); +SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY, + 0, 0, IPCS_sem_sysctl, + "S,IPCS_sem_command", + "ipcs sem command interface"); diff --git a/bsd/kern/sysv_shm.c b/bsd/kern/sysv_shm.c index 7b2eff349..c626909e0 100644 --- a/bsd/kern/sysv_shm.c +++ b/bsd/kern/sysv_shm.c @@ -56,39 +56,49 @@ #include #include #include -#include -#include +#include +#include +#include #include #include #include #include +#include +#include +#include #include #include #include +#include + +#include + #include +#include +#include -struct shmat_args; -extern int shmat __P((struct proc *p, struct shmat_args *uap, int *retval)); -struct shmctl_args; -extern int shmctl __P((struct proc *p, struct shmctl_args *uap, int *retval)); -struct shmdt_args; -extern int shmdt __P((struct proc *p, struct shmdt_args *uap, int *retval)); -struct shmget_args; -extern int shmget __P((struct proc *p, struct shmget_args *uap, int *retval)); +#include +static void shminit(void *); #if 0 -static void shminit __P((void *)); SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL) #endif 0 -struct oshmctl_args; -static int oshmctl __P((struct proc *p, struct oshmctl_args *uap, int * retval)); -static int shmget_allocate_segment __P((struct proc *p, struct shmget_args *uap, int mode, int * retval)); -static int shmget_existing __P((struct proc *p, struct shmget_args *uap, int mode, int segnum, int * retval)); +static lck_grp_t *sysv_shm_subsys_lck_grp; +static lck_grp_attr_t *sysv_shm_subsys_lck_grp_attr; +static lck_attr_t *sysv_shm_subsys_lck_attr; +static lck_mtx_t sysv_shm_subsys_mutex; -typedef int sy_call_t __P((struct proc *, void *, int *)); +#define SYSV_SHM_SUBSYS_LOCK() lck_mtx_lock(&sysv_shm_subsys_mutex) +#define SYSV_SHM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_shm_subsys_mutex) + +static int oshmctl(void *p, void *uap, void *retval); +static int shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, int * retval); +static int shmget_existing(struct shmget_args *uap, int mode, int segnum, int * retval); +static void shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out); +static void shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out); /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *shmcalls[] = { @@ -103,23 +113,22 @@ static sy_call_t *shmcalls[] = { #define SHMSEG_WANTED 0x1000 static int shm_last_free, shm_nused, shm_committed; -struct shmid_ds *shmsegs; +struct user_shmid_ds *shmsegs; /* 64 bit version */ static int shm_inited = 0; struct shm_handle { - /* vm_offset_t kva; */ - void * shm_object; + void * shm_object; /* vm_offset_t kva; */ }; struct shmmap_state { - vm_offset_t va; - int shmid; + mach_vm_address_t va; /* user address */ + int shmid; /* segment id */ }; -static void shm_deallocate_segment __P((struct shmid_ds *)); -static int shm_find_segment_by_key __P((key_t)); -static struct shmid_ds *shm_find_segment_by_shmid __P((int)); -static int shm_delete_mapping __P((struct proc *, struct shmmap_state *, int)); +static void shm_deallocate_segment(struct user_shmid_ds *); +static int shm_find_segment_by_key(key_t); +static struct user_shmid_ds *shm_find_segment_by_shmid(int); +static int shm_delete_mapping(struct proc *, struct shmmap_state *, int); #ifdef __APPLE_API_PRIVATE struct shminfo shminfo = { @@ -131,9 +140,58 @@ struct shminfo shminfo = { }; #endif /* __APPLE_API_PRIVATE */ +void sysv_shm_lock_init(void); + +static __inline__ time_t +sysv_shmtime(void) +{ + struct timeval tv; + microtime(&tv); + return (tv.tv_sec); +} + +/* + * This conversion is safe, since if we are converting for a 32 bit process, + * then it's value of (struct shmid_ds)->shm_segsz will never exceed 4G. + * + * NOTE: Source and target may *NOT* overlap! (target is smaller) + */ +static void +shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out) +{ + out->shm_perm = in->shm_perm; + out->shm_segsz = (size_t)in->shm_segsz; + out->shm_lpid = in->shm_lpid; + out->shm_cpid = in->shm_cpid; + out->shm_nattch = in->shm_nattch; + out->shm_atime = in->shm_atime; + out->shm_dtime = in->shm_dtime; + out->shm_ctime = in->shm_ctime; + out->shm_internal = CAST_DOWN(void *,in->shm_internal); +} + +/* + * NOTE: Source and target may are permitted to overlap! (source is smaller); + * this works because we copy fields in order from the end of the struct to + * the beginning. + */ +static void +shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out) +{ + out->shm_internal = CAST_USER_ADDR_T(in->shm_internal); + out->shm_ctime = in->shm_ctime; + out->shm_dtime = in->shm_dtime; + out->shm_atime = in->shm_atime; + out->shm_nattch = in->shm_nattch; + out->shm_cpid = in->shm_cpid; + out->shm_lpid = in->shm_lpid; + out->shm_segsz = (user_size_t)in->shm_segsz; + out->shm_perm = in->shm_perm; +} + + static int -shm_find_segment_by_key(key) - key_t key; +shm_find_segment_by_key(key_t key) { int i; @@ -144,12 +202,11 @@ shm_find_segment_by_key(key) return -1; } -static struct shmid_ds * -shm_find_segment_by_shmid(shmid) - int shmid; +static struct user_shmid_ds * +shm_find_segment_by_shmid(int shmid) { int segnum; - struct shmid_ds *shmseg; + struct user_shmid_ds *shmseg; segnum = IPCID_TO_IX(shmid); if (segnum < 0 || segnum >= shminfo.shmmni) @@ -163,44 +220,40 @@ shm_find_segment_by_shmid(shmid) } static void -shm_deallocate_segment(shmseg) - struct shmid_ds *shmseg; +shm_deallocate_segment(struct user_shmid_ds *shmseg) { struct shm_handle *shm_handle; - struct shmmap_state *shmmap_s=NULL; - size_t size; - char * ptr; + mach_vm_size_t size; - shm_handle = shmseg->shm_internal; - size = round_page_32(shmseg->shm_segsz); - mach_destroy_memory_entry(shm_handle->shm_object); + shm_handle = CAST_DOWN(void *,shmseg->shm_internal); /* tunnel */ + size = mach_vm_round_page(shmseg->shm_segsz); + mach_memory_entry_port_release(shm_handle->shm_object); + shm_handle->shm_object = NULL; FREE((caddr_t)shm_handle, M_SHM); - shmseg->shm_internal = NULL; + shmseg->shm_internal = USER_ADDR_NULL; /* tunnel */ shm_committed -= btoc(size); shm_nused--; shmseg->shm_perm.mode = SHMSEG_FREE; } static int -shm_delete_mapping(p, shmmap_s, deallocate) - struct proc *p; - struct shmmap_state *shmmap_s; - int deallocate; +shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s, + int deallocate) { - struct shmid_ds *shmseg; + struct user_shmid_ds *shmseg; int segnum, result; - size_t size; + mach_vm_size_t size; segnum = IPCID_TO_IX(shmmap_s->shmid); shmseg = &shmsegs[segnum]; - size = round_page_32(shmseg->shm_segsz); + size = mach_vm_round_page(shmseg->shm_segsz); /* XXX done for us? */ if (deallocate) { - result = vm_deallocate(current_map(), shmmap_s->va, size); + result = mach_vm_deallocate(current_map(), shmmap_s->va, size); if (result != KERN_SUCCESS) return EINVAL; } shmmap_s->shmid = -1; - shmseg->shm_dtime = time_second; + shmseg->shm_dtime = sysv_shmtime(); if ((--shmseg->shm_nattch <= 0) && (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { shm_deallocate_segment(shmseg); @@ -209,224 +262,205 @@ shm_delete_mapping(p, shmmap_s, deallocate) return 0; } -struct shmdt_args { - void *shmaddr; -}; - int -shmdt(p, uap, retval) - struct proc *p; - struct shmdt_args *uap; - register_t *retval; +shmdt(struct proc *p, struct shmdt_args *uap, register_t *retval) { struct shmmap_state *shmmap_s; int i; + int shmdtret = 0; + + // LP64todo - fix this + AUDIT_ARG(svipc_addr, CAST_DOWN(void *,uap->shmaddr)); + + SYSV_SHM_SUBSYS_LOCK(); - AUDIT_ARG(svipc_addr, uap->shmaddr); - if (!shm_inited) - return(EINVAL); + if (!shm_inited) { + shmdtret = EINVAL; + goto shmdt_out; + } shmmap_s = (struct shmmap_state *)p->vm_shm; - if (shmmap_s == NULL) - return EINVAL; + if (shmmap_s == NULL) { + shmdtret = EINVAL; + goto shmdt_out; + } + for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1 && - shmmap_s->va == (vm_offset_t)uap->shmaddr) + shmmap_s->va == (mach_vm_offset_t)uap->shmaddr) break; - if (i == shminfo.shmseg) - return EINVAL; - return shm_delete_mapping(p, shmmap_s, 1); + if (i == shminfo.shmseg) { + shmdtret = EINVAL; + goto shmdt_out; + } + i = shm_delete_mapping(p, shmmap_s, 1); + + if (i == 0) + *retval = 0; + shmdtret = i; +shmdt_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return shmdtret; } -#ifndef _SYS_SYSPROTO_H_ -struct shmat_args { - int shmid; - void *shmaddr; - int shmflg; -}; -#endif - int -shmat(p, uap, retval) - struct proc *p; - struct shmat_args *uap; - register_t *retval; +shmat(struct proc *p, struct shmat_args *uap, register_t *retval) { int error, i, flags; - struct ucred *cred = p->p_ucred; - struct shmid_ds *shmseg; - struct shmmap_state *shmmap_s = NULL; - struct shm_handle *shm_handle; - vm_offset_t attach_va; - vm_prot_t prot; - vm_size_t size; - kern_return_t rv; + struct user_shmid_ds *shmseg; + struct shmmap_state *shmmap_s = NULL; + struct shm_handle *shm_handle; + mach_vm_address_t attach_va; /* attach address in/out */ + mach_vm_size_t map_size; /* size of map entry */ + vm_prot_t prot; + size_t size; + kern_return_t rv; + int shmat_ret = 0; AUDIT_ARG(svipc_id, uap->shmid); - AUDIT_ARG(svipc_addr, uap->shmaddr); - if (!shm_inited) - return(EINVAL); + // LP64todo - fix this + AUDIT_ARG(svipc_addr, CAST_DOWN(void *,uap->shmaddr)); + + SYSV_SHM_SUBSYS_LOCK(); + + if (!shm_inited) { + shmat_ret = EINVAL; + goto shmat_out; + } + shmmap_s = (struct shmmap_state *)p->vm_shm; + if (shmmap_s == NULL) { size = shminfo.shmseg * sizeof(struct shmmap_state); - shmmap_s = (struct shmmap_state *)_MALLOC(size, M_SHM, M_WAITOK); + MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK); + if (shmmap_s == NULL) { + shmat_ret = ENOMEM; + goto shmat_out; + } for (i = 0; i < shminfo.shmseg; i++) shmmap_s[i].shmid = -1; p->vm_shm = (caddr_t)shmmap_s; } shmseg = shm_find_segment_by_shmid(uap->shmid); - if (shmseg == NULL) - return EINVAL; + if (shmseg == NULL) { + shmat_ret = EINVAL; + goto shmat_out; + } AUDIT_ARG(svipc_perm, &shmseg->shm_perm); - error = ipcperm(cred, &shmseg->shm_perm, + error = ipcperm(kauth_cred_get(), &shmseg->shm_perm, (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); - if (error) - return error; + if (error) { + shmat_ret = error; + goto shmat_out; + } + for (i = 0; i < shminfo.shmseg; i++) { if (shmmap_s->shmid == -1) break; shmmap_s++; } - if (i >= shminfo.shmseg) - return EMFILE; - size = round_page_32(shmseg->shm_segsz); + if (i >= shminfo.shmseg) { + shmat_ret = EMFILE; + goto shmat_out; + } + + map_size = mach_vm_round_page(shmseg->shm_segsz); prot = VM_PROT_READ; if ((uap->shmflg & SHM_RDONLY) == 0) prot |= VM_PROT_WRITE; flags = MAP_ANON | MAP_SHARED; - if (uap->shmaddr) { + if (uap->shmaddr) flags |= MAP_FIXED; - if (uap->shmflg & SHM_RND) - attach_va = (vm_offset_t)uap->shmaddr & ~(SHMLBA-1); - else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) - attach_va = (vm_offset_t)uap->shmaddr; - else - return EINVAL; - } else { - attach_va = round_page_32((unsigned int)uap->shmaddr); - } - - shm_handle = shmseg->shm_internal; - rv = vm_map(current_map(), &attach_va, size, 0, (flags & MAP_FIXED)? FALSE: TRUE, - shm_handle->shm_object, 0, FALSE, prot, prot, VM_INHERIT_DEFAULT); + + attach_va = (mach_vm_address_t)uap->shmaddr; + if (uap->shmflg & SHM_RND) + attach_va &= ~(SHMLBA-1); + else if ((attach_va & (SHMLBA-1)) != 0) { + shmat_ret = EINVAL; + goto shmat_out; + } + + shm_handle = CAST_DOWN(void *, shmseg->shm_internal); /* tunnel */ + + rv = mach_vm_map(current_map(), /* process map */ + &attach_va, /* attach address */ + map_size, /* segment size */ + (mach_vm_offset_t)0, /* alignment mask */ + (flags & MAP_FIXED)? VM_FLAGS_FIXED: VM_FLAGS_ANYWHERE, + shm_handle->shm_object, + (mach_vm_offset_t)0, + FALSE, + prot, + prot, + VM_INHERIT_DEFAULT); if (rv != KERN_SUCCESS) goto out; - rv = vm_inherit(current_map(), attach_va, size, - VM_INHERIT_SHARE); + + rv = mach_vm_inherit(current_map(), attach_va, map_size, VM_INHERIT_SHARE); if (rv != KERN_SUCCESS) { - (void) vm_deallocate(current_map(), attach_va, size); + (void)mach_vm_deallocate(current_map(), attach_va, map_size); goto out; } shmmap_s->va = attach_va; shmmap_s->shmid = uap->shmid; shmseg->shm_lpid = p->p_pid; - shmseg->shm_atime = time_second; + shmseg->shm_atime = sysv_shmtime(); shmseg->shm_nattch++; - *retval = attach_va; - return( 0); + *retval = attach_va; /* XXX return -1 on error */ + shmat_ret = 0; + goto shmat_out; out: switch (rv) { case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: - return (ENOMEM); + shmat_ret = ENOMEM; case KERN_PROTECTION_FAILURE: - return (EACCES); + shmat_ret = EACCES; default: - return (EINVAL); + shmat_ret = EINVAL; } - +shmat_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return shmat_ret; } -struct oshmid_ds { - struct ipc_perm shm_perm; /* operation perms */ - int shm_segsz; /* size of segment (bytes) */ - ushort shm_cpid; /* pid, creator */ - ushort shm_lpid; /* pid, last operation */ - short shm_nattch; /* no. of current attaches */ - time_t shm_atime; /* last attach time */ - time_t shm_dtime; /* last detach time */ - time_t shm_ctime; /* last change time */ - void *shm_handle; /* internal handle for shm segment */ -}; - -struct oshmctl_args { - int shmid; - int cmd; - struct oshmid_ds *ubuf; -}; - static int -oshmctl(p, uap, retval) - struct proc *p; - struct oshmctl_args *uap; - register_t *retval; +oshmctl(__unused void *p, __unused void *uap, __unused void *retval) { -#ifdef COMPAT_43 - int error; - struct ucred *cred = p->p_ucred; - struct shmid_ds *shmseg; - struct oshmid_ds outbuf; - - if (!shm_inited) - return(EINVAL); - shmseg = shm_find_segment_by_shmid(uap->shmid); - if (shmseg == NULL) - return EINVAL; - switch (uap->cmd) { - case IPC_STAT: - error = ipcperm(cred, &shmseg->shm_perm, IPC_R); - if (error) - return error; - outbuf.shm_perm = shmseg->shm_perm; - outbuf.shm_segsz = shmseg->shm_segsz; - outbuf.shm_cpid = shmseg->shm_cpid; - outbuf.shm_lpid = shmseg->shm_lpid; - outbuf.shm_nattch = shmseg->shm_nattch; - outbuf.shm_atime = shmseg->shm_atime; - outbuf.shm_dtime = shmseg->shm_dtime; - outbuf.shm_ctime = shmseg->shm_ctime; - outbuf.shm_handle = shmseg->shm_internal; - error = copyout((caddr_t)&outbuf, uap->ubuf, sizeof(outbuf)); - if (error) - return error; - break; - default: - /* XXX casting to (sy_call_t *) is bogus, as usual. */ - return ((sy_call_t *)shmctl)(p, uap, retval); - } - return 0; -#else return EINVAL; -#endif } -#ifndef _SYS_SYSPROTO_H_ -struct shmctl_args { - int shmid; - int cmd; - struct shmid_ds *buf; -}; -#endif - int -shmctl(p, uap, retval) - struct proc *p; - struct shmctl_args *uap; - register_t *retval; +shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval) { int error; - struct ucred *cred = p->p_ucred; - struct shmid_ds inbuf; - struct shmid_ds *shmseg; + kauth_cred_t cred = kauth_cred_get(); + struct user_shmid_ds inbuf; + struct user_shmid_ds *shmseg; + size_t shmid_ds_sz = sizeof(struct user_shmid_ds); + + int shmctl_ret = 0; AUDIT_ARG(svipc_cmd, uap->cmd); AUDIT_ARG(svipc_id, uap->shmid); - if (!shm_inited) - return(EINVAL); + + SYSV_SHM_SUBSYS_LOCK(); + + if (!shm_inited) { + shmctl_ret = EINVAL; + goto shmctl_out; + } + + if (!IS_64BIT_PROCESS(p)) + shmid_ds_sz = sizeof(struct shmid_ds); + shmseg = shm_find_segment_by_shmid(uap->shmid); - if (shmseg == NULL) - return EINVAL; + if (shmseg == NULL) { + shmctl_ret = EINVAL; + goto shmctl_out; + } + /* XXAUDIT: This is the perms BEFORE any change by this call. This * may not be what is desired. */ @@ -435,30 +469,53 @@ shmctl(p, uap, retval) switch (uap->cmd) { case IPC_STAT: error = ipcperm(cred, &shmseg->shm_perm, IPC_R); - if (error) - return error; - error = copyout((caddr_t)shmseg, uap->buf, sizeof(inbuf)); - if (error) - return error; + if (error) { + shmctl_ret = error; + goto shmctl_out; + } + + if (IS_64BIT_PROCESS(p)) { + error = copyout(shmseg, uap->buf, sizeof(struct user_shmid_ds)); + } else { + struct shmid_ds shmid_ds32; + shmid_ds_64to32(shmseg, &shmid_ds32); + error = copyout(&shmid_ds32, uap->buf, sizeof(struct shmid_ds)); + } + if (error) { + shmctl_ret = error; + goto shmctl_out; + } break; case IPC_SET: error = ipcperm(cred, &shmseg->shm_perm, IPC_M); - if (error) - return error; - error = copyin(uap->buf, (caddr_t)&inbuf, sizeof(inbuf)); - if (error) - return error; + if (error) { + shmctl_ret = error; + goto shmctl_out; + } + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->buf, &inbuf, sizeof(struct user_shmid_ds)); + } else { + error = copyin(uap->buf, &inbuf, sizeof(struct shmid_ds)); + /* convert in place; ugly, but safe */ + shmid_ds_32to64((struct shmid_ds *)&inbuf, &inbuf); + } + if (error) { + shmctl_ret = error; + goto shmctl_out; + } shmseg->shm_perm.uid = inbuf.shm_perm.uid; shmseg->shm_perm.gid = inbuf.shm_perm.gid; shmseg->shm_perm.mode = (shmseg->shm_perm.mode & ~ACCESSPERMS) | (inbuf.shm_perm.mode & ACCESSPERMS); - shmseg->shm_ctime = time_second; + shmseg->shm_ctime = sysv_shmtime(); break; case IPC_RMID: error = ipcperm(cred, &shmseg->shm_perm, IPC_M); - if (error) - return error; + if (error) { + shmctl_ret = error; + goto shmctl_out; + } shmseg->shm_perm.key = IPC_PRIVATE; shmseg->shm_perm.mode |= SHMSEG_REMOVED; if (shmseg->shm_nattch <= 0) { @@ -471,29 +528,20 @@ shmctl(p, uap, retval) case SHM_UNLOCK: #endif default: - return EINVAL; + shmctl_ret = EINVAL; + goto shmctl_out; } - return 0; + *retval = 0; + shmctl_ret = 0; +shmctl_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return shmctl_ret; } -#ifndef _SYS_SYSPROTO_H_ -struct shmget_args { - key_t key; - size_t size; - int shmflg; -}; -#endif - static int -shmget_existing(p, uap, mode, segnum, retval) - struct proc *p; - struct shmget_args *uap; - int mode; - int segnum; - int *retval; +shmget_existing(struct shmget_args *uap, int mode, int segnum, int *retval) { - struct shmid_ds *shmseg; - struct ucred *cred = p->p_ucred; + struct user_shmid_ds *shmseg; int error; shmseg = &shmsegs[segnum]; @@ -509,7 +557,7 @@ shmget_existing(p, uap, mode, segnum, retval) return error; return EAGAIN; } - error = ipcperm(cred, &shmseg->shm_perm, mode); + error = ipcperm(kauth_cred_get(), &shmseg->shm_perm, mode); if (error) return error; if (uap->size && uap->size > shmseg->shm_segsz) @@ -521,25 +569,23 @@ shmget_existing(p, uap, mode, segnum, retval) } static int -shmget_allocate_segment(p, uap, mode, retval) - struct proc *p; - struct shmget_args *uap; - int mode; - int * retval; +shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, + int *retval) { int i, segnum, shmid, size; - struct ucred *cred = p->p_ucred; - struct shmid_ds *shmseg; + kauth_cred_t cred = kauth_cred_get(); + struct user_shmid_ds *shmseg; struct shm_handle *shm_handle; kern_return_t kret; vm_offset_t user_addr; void * mem_object; - if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) + if (uap->size < (user_size_t)shminfo.shmmin || + uap->size > (user_size_t)shminfo.shmmax) return EINVAL; if (shm_nused >= shminfo.shmmni) /* any shmids left? */ return ENOSPC; - size = round_page_32(uap->size); + size = mach_vm_round_page(uap->size); if (shm_committed + btoc(size) > shminfo.shmall) return ENOMEM; if (shm_last_free < 0) { @@ -558,25 +604,33 @@ shmget_allocate_segment(p, uap, mode, retval) * In case we sleep in malloc(), mark the segment present but deleted * so that noone else tries to create the same key. */ - kret = vm_allocate(current_map(), &user_addr, size, TRUE); + kret = vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); if (kret != KERN_SUCCESS) goto out; - kret = mach_make_memory_entry (current_map(), &size, - user_addr, VM_PROT_DEFAULT, &mem_object, 0); + kret = mach_make_memory_entry (current_map(), &size, user_addr, + VM_PROT_DEFAULT, (mem_entry_name_port_t *)&mem_object, 0); if (kret != KERN_SUCCESS) goto out; + + vm_deallocate(current_map(), user_addr, size); + shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; shmseg->shm_perm.key = uap->key; shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; - shm_handle = (struct shm_handle *) - _MALLOC(sizeof(struct shm_handle), M_SHM, M_WAITOK); + MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK); + if (shm_handle == NULL) { + kret = KERN_NO_SPACE; + mach_memory_entry_port_release(mem_object); + mem_object = NULL; + goto out; + } shm_handle->shm_object = mem_object; shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); - shmseg->shm_internal = shm_handle; - shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; + shmseg->shm_internal = CAST_USER_ADDR_T(shm_handle); /* tunnel */ + shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_getuid(cred); shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; @@ -584,7 +638,7 @@ shmget_allocate_segment(p, uap, mode, retval) shmseg->shm_cpid = p->p_pid; shmseg->shm_lpid = shmseg->shm_nattch = 0; shmseg->shm_atime = shmseg->shm_dtime = 0; - shmseg->shm_ctime = time_second; + shmseg->shm_ctime = sysv_shmtime(); shm_committed += btoc(size); shm_nused++; AUDIT_ARG(svipc_perm, &shmseg->shm_perm); @@ -613,89 +667,107 @@ out: } int -shmget(p, uap, retval) - struct proc *p; - struct shmget_args *uap; - register_t *retval; +shmget(struct proc *p, struct shmget_args *uap, register_t *retval) { int segnum, mode, error; - + int shmget_ret = 0; + /* Auditing is actually done in shmget_allocate_segment() */ - if (!shm_inited) - return(EINVAL); + + SYSV_SHM_SUBSYS_LOCK(); + + if (!shm_inited) { + shmget_ret = EINVAL; + goto shmget_out; + } mode = uap->shmflg & ACCESSPERMS; if (uap->key != IPC_PRIVATE) { again: segnum = shm_find_segment_by_key(uap->key); if (segnum >= 0) { - error = shmget_existing(p, uap, mode, segnum, retval); + error = shmget_existing(uap, mode, segnum, retval); if (error == EAGAIN) goto again; - return(error); + shmget_ret = error; + goto shmget_out; + } + if ((uap->shmflg & IPC_CREAT) == 0) { + shmget_ret = ENOENT; + goto shmget_out; } - if ((uap->shmflg & IPC_CREAT) == 0) - return ENOENT; } - return( shmget_allocate_segment(p, uap, mode, retval));; + shmget_ret = shmget_allocate_segment(p, uap, mode, retval); +shmget_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return shmget_ret; /*NOTREACHED*/ } -struct shmsys_args { - u_int which; - int a2; - int a3; - int a4; -}; +/* XXX actually varargs. */ int -shmsys(p, uap, retval) - struct proc *p; - /* XXX actually varargs. */ - struct shmsys_args *uap; - register_t *retval; +shmsys(struct proc *p, struct shmsys_args *uap, register_t *retval) { - if (!shm_inited) - return(EINVAL); + /* The routine that we are dispatching already does this */ if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0])) return EINVAL; return ((*shmcalls[uap->which])(p, &uap->a2, retval)); } -void -shmfork(p1, p2) - struct proc *p1, *p2; +/* + * Return 0 on success, 1 on failure. + */ +int +shmfork(struct proc *p1, struct proc *p2) { struct shmmap_state *shmmap_s; size_t size; int i; + int shmfork_ret = 0; - if (!shm_inited) - return; + SYSV_SHM_SUBSYS_LOCK(); + + if (!shm_inited) { + shmfork_ret = 0; + goto shmfork_out; + } + size = shminfo.shmseg * sizeof(struct shmmap_state); - shmmap_s = (struct shmmap_state *)_MALLOC(size, M_SHM, M_WAITOK); - bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size); - p2->vm_shm = (caddr_t)shmmap_s; - for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) - if (shmmap_s->shmid != -1) - shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; + MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK); + if (shmmap_s != NULL) { + bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size); + p2->vm_shm = (caddr_t)shmmap_s; + for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) + if (shmmap_s->shmid != -1) + shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; + shmfork_ret = 0; + goto shmfork_out; + } + + shmfork_ret = 1; /* failed to copy to child - ENOMEM */ +shmfork_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return shmfork_ret; } void -shmexit(p) - struct proc *p; +shmexit(struct proc *p) { struct shmmap_state *shmmap_s; int i; shmmap_s = (struct shmmap_state *)p->vm_shm; + + SYSV_SHM_SUBSYS_LOCK(); for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1) shm_delete_mapping(p, shmmap_s, 1); FREE((caddr_t)p->vm_shm, M_SHM); p->vm_shm = NULL; + SYSV_SHM_SUBSYS_UNLOCK(); } /* @@ -705,32 +777,42 @@ shmexit(p) * need to do to keep the System V shared memory subsystem sane. */ __private_extern__ void -shmexec(p) - struct proc *p; +shmexec(struct proc *p) { struct shmmap_state *shmmap_s; int i; shmmap_s = (struct shmmap_state *)p->vm_shm; + SYSV_SHM_SUBSYS_LOCK(); for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1) shm_delete_mapping(p, shmmap_s, 0); FREE((caddr_t)p->vm_shm, M_SHM); p->vm_shm = NULL; + SYSV_SHM_SUBSYS_UNLOCK(); } void -shminit(dummy) - void *dummy; +shminit(__unused void *dummy) { int i; int s; if (!shm_inited) { - s = sizeof(struct shmid_ds) * shminfo.shmmni; + /* + * we store internally 64 bit, since if we didn't, we would + * be unable to represent a segment size in excess of 32 bits + * with the (struct shmid_ds)->shm_segsz field; also, POSIX + * dictates this filed be a size_t, which is 64 bits when + * running 64 bit binaries. + */ + s = sizeof(struct user_shmid_ds) * shminfo.shmmni; - MALLOC(shmsegs, struct shmid_ds *, s, - M_SHM, M_WAITOK); + MALLOC(shmsegs, struct user_shmid_ds *, s, M_SHM, M_WAITOK); + if (shmsegs == NULL) { + /* XXX fail safely: leave shared memory uninited */ + return; + } for (i = 0; i < shminfo.shmmni; i++) { shmsegs[i].shm_perm.mode = SHMSEG_FREE; shmsegs[i].shm_perm.seq = 0; @@ -741,56 +823,193 @@ shminit(dummy) shm_inited = 1; } } +/* Initialize the mutex governing access to the SysV shm subsystem */ +__private_extern__ void +sysv_shm_lock_init( void ) +{ + + sysv_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(sysv_shm_subsys_lck_grp_attr); + + sysv_shm_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_shm_subsys_lck_grp_attr); + + sysv_shm_subsys_lck_attr = lck_attr_alloc_init(); + /* lck_attr_setdebug(sysv_shm_subsys_lck_attr); */ + lck_mtx_init(&sysv_shm_subsys_mutex, sysv_shm_subsys_lck_grp, sysv_shm_subsys_lck_attr); +} /* (struct sysctl_oid *oidp, void *arg1, int arg2, \ struct sysctl_req *req) */ static int -sysctl_shminfo SYSCTL_HANDLER_ARGS +sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1, + __unused int arg2, struct sysctl_req *req) { int error = 0; + int sysctl_shminfo_ret = 0; - error = SYSCTL_OUT(req, arg1, sizeof(int)); - if (error || !req->newptr) + error = SYSCTL_OUT(req, arg1, sizeof(user_ssize_t)); + if (error || req->newptr == USER_ADDR_NULL) return(error); + SYSV_SHM_SUBSYS_LOCK(); /* Set the values only if shared memory is not initialised */ if (!shm_inited) { - if (error = SYSCTL_IN(req, arg1, sizeof(int))) - return(error); + if ((error = SYSCTL_IN(req, arg1, sizeof(user_ssize_t))) + != 0) { + sysctl_shminfo_ret = error; + goto sysctl_shminfo_out; + } + if (arg1 == &shminfo.shmmax) { - if (shminfo.shmmax & PAGE_MASK) { - shminfo.shmmax = -1; - return(EINVAL); + if (shminfo.shmmax & PAGE_MASK_64) { + shminfo.shmmax = (user_ssize_t)-1; + sysctl_shminfo_ret = EINVAL; + goto sysctl_shminfo_out; } } /* Initialize only when all values are set */ - if ((shminfo.shmmax != -1) && - (shminfo.shmmin != -1) && - (shminfo.shmmni != -1) && - (shminfo.shmseg != -1) && - (shminfo.shmall != -1)) { + if ((shminfo.shmmax != (user_ssize_t)-1) && + (shminfo.shmmin != (user_ssize_t)-1) && + (shminfo.shmmni != (user_ssize_t)-1) && + (shminfo.shmseg != (user_ssize_t)-1) && + (shminfo.shmall != (user_ssize_t)-1)) { shminit(NULL); } } - return(0); + sysctl_shminfo_ret = 0; +sysctl_shminfo_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return sysctl_shminfo_ret; +} + +static int +IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error; + int cursor; + union { + struct IPCS_command u32; + struct user_IPCS_command u64; + } ipcs; + struct shmid_ds shmid_ds32; /* post conversion, 32 bit version */ + void *shmid_dsp; + size_t ipcs_sz = sizeof(struct user_IPCS_command); + size_t shmid_ds_sz = sizeof(struct user_shmid_ds); + struct proc *p = current_proc(); + + int ipcs__shminfo_ret = 0; + + SYSV_SHM_SUBSYS_LOCK(); + + if (!shm_inited) { + error = EINVAL; + goto ipcs_shm_sysctl_out; + } + + if (!IS_64BIT_PROCESS(p)) { + ipcs_sz = sizeof(struct IPCS_command); + shmid_ds_sz = sizeof(struct shmid_ds); + } + + /* Copy in the command structure */ + if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) { + goto ipcs_shm_sysctl_out; + } + + if (!IS_64BIT_PROCESS(p)) /* convert in place */ + ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data); + + /* Let us version this interface... */ + if (ipcs.u64.ipcs_magic != IPCS_MAGIC) { + error = EINVAL; + goto ipcs_shm_sysctl_out; + } + + switch(ipcs.u64.ipcs_op) { + case IPCS_SHM_CONF: /* Obtain global configuration data */ + if (ipcs.u64.ipcs_datalen != sizeof(struct shminfo)) { + if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */ + error = ENOMEM; + break; + } + error = ERANGE; + break; + } + error = copyout(&shminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + break; + + case IPCS_SHM_ITER: /* Iterate over existing segments */ + cursor = ipcs.u64.ipcs_cursor; + if (cursor < 0 || cursor >= shminfo.shmmni) { + error = ERANGE; + break; + } + if (ipcs.u64.ipcs_datalen != (int)shmid_ds_sz) { + error = ENOMEM; + break; + } + for( ; cursor < shminfo.shmmni; cursor++) { + if (shmsegs[cursor].shm_perm.mode & SHMSEG_ALLOCATED) + break; + continue; + } + if (cursor == shminfo.shmmni) { + error = ENOENT; + break; + } + + shmid_dsp = &shmsegs[cursor]; /* default: 64 bit */ + + /* + * If necessary, convert the 64 bit kernel segment + * descriptor to a 32 bit user one. + */ + if (!IS_64BIT_PROCESS(p)) { + shmid_ds_64to32(shmid_dsp, &shmid_ds32); + shmid_dsp = &shmid_ds32; + } + error = copyout(shmid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); + if (!error) { + /* update cursor */ + ipcs.u64.ipcs_cursor = cursor + 1; + + if (!IS_64BIT_PROCESS(p)) /* convert in place */ + ipcs.u32.ipcs_data = CAST_DOWN(void *,ipcs.u64.ipcs_data); + error = SYSCTL_OUT(req, &ipcs, ipcs_sz); + } + break; + + default: + error = EINVAL; + break; + } +ipcs_shm_sysctl_out: + SYSV_SHM_SUBSYS_UNLOCK(); + return(error); } SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); -SYSCTL_PROC(_kern_sysv, KSYSV_SHMMAX, shmmax, CTLTYPE_INT | CTLFLAG_RW, - &shminfo.shmmax, 0, &sysctl_shminfo ,"I","shmmax"); +SYSCTL_PROC(_kern_sysv, KSYSV_SHMMAX, shmmax, CTLTYPE_QUAD | CTLFLAG_RW, + &shminfo.shmmax, 0, &sysctl_shminfo ,"Q","shmmax"); -SYSCTL_PROC(_kern_sysv, KSYSV_SHMMIN, shmmin, CTLTYPE_INT | CTLFLAG_RW, - &shminfo.shmmin, 0, &sysctl_shminfo ,"I","shmmin"); +SYSCTL_PROC(_kern_sysv, KSYSV_SHMMIN, shmmin, CTLTYPE_QUAD | CTLFLAG_RW, + &shminfo.shmmin, 0, &sysctl_shminfo ,"Q","shmmin"); -SYSCTL_PROC(_kern_sysv, KSYSV_SHMMNI, shmmni, CTLTYPE_INT | CTLFLAG_RW, - &shminfo.shmmni, 0, &sysctl_shminfo ,"I","shmmni"); +SYSCTL_PROC(_kern_sysv, KSYSV_SHMMNI, shmmni, CTLTYPE_QUAD | CTLFLAG_RW, + &shminfo.shmmni, 0, &sysctl_shminfo ,"Q","shmmni"); -SYSCTL_PROC(_kern_sysv, KSYSV_SHMSEG, shmseg, CTLTYPE_INT | CTLFLAG_RW, - &shminfo.shmseg, 0, &sysctl_shminfo ,"I","shmseg"); +SYSCTL_PROC(_kern_sysv, KSYSV_SHMSEG, shmseg, CTLTYPE_QUAD | CTLFLAG_RW, + &shminfo.shmseg, 0, &sysctl_shminfo ,"Q","shmseg"); -SYSCTL_PROC(_kern_sysv, KSYSV_SHMALL, shmall, CTLTYPE_INT | CTLFLAG_RW, - &shminfo.shmall, 0, &sysctl_shminfo ,"I","shmall"); +SYSCTL_PROC(_kern_sysv, KSYSV_SHMALL, shmall, CTLTYPE_QUAD | CTLFLAG_RW, + &shminfo.shmall, 0, &sysctl_shminfo ,"Q","shmall"); +SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW, 0, "SYSVIPCS"); +SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW|CTLFLAG_ANYBODY, + 0, 0, IPCS_shm_sysctl, + "S,IPCS_shm_command", + "ipcs shm command interface"); diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index dee8138ff..824160f87 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -98,8 +98,9 @@ #include #undef TTYDEFCHARS #include -#include -#include +#include +#include +#include #include #include #include @@ -132,20 +133,22 @@ #include #endif /* 0 ] */ #endif /* !NeXT */ +#include /* averunnable */ #ifndef NeXT -static int proc_compare __P((struct proc *p1, struct proc *p2)); +static int proc_compare(struct proc *p1, struct proc *p2); #endif /* NeXT */ -static int ttnread __P((struct tty *tp)); -static void ttyecho __P((int c, struct tty *tp)); -static int ttyoutput __P((int c, register struct tty *tp)); -static void ttypend __P((struct tty *tp)); -static void ttyretype __P((struct tty *tp)); -static void ttyrub __P((int c, struct tty *tp)); -static void ttyrubo __P((struct tty *tp, int cnt)); -static void ttystop __P((struct tty *tp, int rw)); -static void ttyunblock __P((struct tty *tp)); -static int ttywflush __P((struct tty *tp)); +static int ttnread(struct tty *tp); +static void ttyecho(int c, struct tty *tp); +static int ttyoutput(int c, register struct tty *tp); +static void ttypend(struct tty *tp); +static void ttyretype(struct tty *tp); +static void ttyrub(int c, struct tty *tp); +static void ttyrubo(struct tty *tp, int count); +static void ttystop(struct tty *tp, int rw); +static void ttyunblock(struct tty *tp); +static int ttywflush(struct tty *tp); +static int proc_compare(struct proc *p1, struct proc *p2); /* * Table with character classes and parity. The 8th bit indicates parity, @@ -236,6 +239,37 @@ static u_char const char_type[] = { #undef MAX_INPUT /* XXX wrong in */ #define MAX_INPUT TTYHOG +static void +termios32to64(struct termios *in, struct user_termios *out) +{ + out->c_iflag = (user_tcflag_t)in->c_iflag; + out->c_oflag = (user_tcflag_t)in->c_oflag; + out->c_cflag = (user_tcflag_t)in->c_cflag; + out->c_lflag = (user_tcflag_t)in->c_lflag; + + /* bcopy is OK, since this type is ILP32/LP64 size invariant */ + bcopy(in->c_cc, out->c_cc, sizeof(in->c_cc)); + + out->c_ispeed = (user_speed_t)in->c_ispeed; + out->c_ospeed = (user_speed_t)in->c_ospeed; +} + +static void +termios64to32(struct user_termios *in, struct termios *out) +{ + out->c_iflag = (tcflag_t)in->c_iflag; + out->c_oflag = (tcflag_t)in->c_oflag; + out->c_cflag = (tcflag_t)in->c_cflag; + out->c_lflag = (tcflag_t)in->c_lflag; + + /* bcopy is OK, since this type is ILP32/LP64 size invariant */ + bcopy(in->c_cc, out->c_cc, sizeof(in->c_cc)); + + out->c_ispeed = (speed_t)in->c_ispeed; + out->c_ospeed = (speed_t)in->c_ospeed; +} + + /* * Initial open of tty, or (re)entry to standard tty line discipline. */ @@ -778,41 +812,31 @@ ttyoutput(c, tp) */ /* ARGSUSED */ int -#ifndef NeXT -ttioctl(tp, cmd, data, flag) - register struct tty *tp; - int cmd, flag; - void *data; -#else -ttioctl(tp, cmd, data, flag, p) - register struct tty *tp; - u_long cmd; - caddr_t data; - int flag; - struct proc *p; -#endif +ttioctl(register struct tty *tp, + u_long cmd, caddr_t data, int flag, + struct proc *p) { -#ifndef NeXT - register struct proc *p = curproc; /* XXX */ -#endif int s, error; struct uthread *ut; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); /* If the ioctl involves modification, hang if in the background. */ switch (cmd) { case TIOCFLUSH: case TIOCSETA: + case TIOCSETA_64: case TIOCSETD: case TIOCSETAF: + case TIOCSETAF_64: case TIOCSETAW: + case TIOCSETAW_64: #ifdef notdef case TIOCSPGRP: #endif case TIOCSTAT: case TIOCSTI: case TIOCSWINSZ: -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +#if COMPAT_43_TTY || defined(COMPAT_SUNOS) case TIOCLBIC: case TIOCLBIS: case TIOCLSET: @@ -884,7 +908,7 @@ ttioctl(tp, cmd, data, flag, p) return (EBUSY); } #if defined(NeXT) || !defined(UCONSOLE) - if ( (error = suser(p->p_ucred, &p->p_acflag)) ) + if ( (error = suser(kauth_cred_get(), &p->p_acflag)) ) return (error); #endif constty = tp; @@ -907,10 +931,13 @@ ttioctl(tp, cmd, data, flag, p) if (error) return (error); break; - case TIOCGETA: { /* get termios struct */ - struct termios *t = (struct termios *)data; - - bcopy(&tp->t_termios, t, sizeof(struct termios)); + case TIOCGETA: /* get termios struct */ + case TIOCGETA_64: { /* get termios struct */ + if (IS_64BIT_PROCESS(p)) { + termios32to64(&tp->t_termios, (struct user_termios *)data); + } else { + bcopy(&tp->t_termios, data, sizeof(struct termios)); + } break; } case TIOCGETD: /* get line discipline */ @@ -940,20 +967,29 @@ ttioctl(tp, cmd, data, flag, p) *(int *)data = tp->t_outq.c_cc; break; case TIOCSETA: /* set termios struct */ + case TIOCSETA_64: case TIOCSETAW: /* drain output, set */ - case TIOCSETAF: { /* drn out, fls in, set */ + case TIOCSETAW_64: + case TIOCSETAF: /* drn out, fls in, set */ + case TIOCSETAF_64: { /* drn out, fls in, set */ register struct termios *t = (struct termios *)data; + struct termios lcl_termios; + if (IS_64BIT_PROCESS(p)) { + termios64to32((struct user_termios *)data, &lcl_termios); + t = &lcl_termios; + } if (t->c_ispeed < 0 || t->c_ospeed < 0) return (EINVAL); s = spltty(); - if (cmd == TIOCSETAW || cmd == TIOCSETAF) { + if (cmd == TIOCSETAW || cmd == TIOCSETAF || + cmd == TIOCSETAW_64 || cmd == TIOCSETAF_64) { error = ttywait(tp); if (error) { splx(s); return (error); } - if (cmd == TIOCSETAF) + if (cmd == TIOCSETAF || cmd == TIOCSETAF_64) ttyflush(tp, FREAD); } if (!ISSET(t->c_cflag, CIGNORE)) { @@ -990,7 +1026,7 @@ ttioctl(tp, cmd, data, flag, p) ttsetwater(tp); } if (ISSET(t->c_lflag, ICANON) != ISSET(tp->t_lflag, ICANON) && - cmd != TIOCSETAF) { + cmd != TIOCSETAF && cmd != TIOCSETAF_64) { if (ISSET(t->c_lflag, ICANON)) SET(tp->t_lflag, PENDIN); else { @@ -1045,9 +1081,8 @@ ttioctl(tp, cmd, data, flag, p) case TIOCSETD: { /* set line discipline */ register int t = *(int *)data; dev_t device = tp->t_dev; - extern int nlinesw; - if ((u_int)t >= nlinesw) + if (t >= nlinesw) return (ENXIO); if (t != tp->t_line) { s = spltty(); @@ -1074,9 +1109,9 @@ ttioctl(tp, cmd, data, flag, p) splx(s); break; case TIOCSTI: /* simulate terminal input */ - if (p->p_ucred->cr_uid && (flag & FREAD) == 0) + if (suser(kauth_cred_get(), NULL) && (flag & FREAD) == 0) return (EPERM); - if (p->p_ucred->cr_uid && !isctty(p, tp)) + if (suser(kauth_cred_get(), NULL) && !isctty(p, tp)) return (EACCES); s = spltty(); (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp); @@ -1132,7 +1167,7 @@ ttioctl(tp, cmd, data, flag, p) } break; case TIOCSDRAINWAIT: - error = suser(p->p_ucred, &p->p_acflag); + error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); tp->t_timeout = *(int *)data * hz; @@ -1143,14 +1178,14 @@ ttioctl(tp, cmd, data, flag, p) *(int *)data = tp->t_timeout / hz; break; default: -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +#if COMPAT_43_TTY || defined(COMPAT_SUNOS) #ifdef NeXT return (ttcompat(tp, cmd, data, flag, p)); #else return (ttcompat(tp, cmd, data, flag)); #endif /* NeXT */ #else - return (-1); + return (ENOTTY); #endif } @@ -1589,7 +1624,7 @@ ttread(tp, uio, flag) funnel_state = thread_funnel_set(kernel_flock, TRUE); - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); loop: s = spltty(); @@ -1654,7 +1689,6 @@ loop: int m = cc[VMIN]; long t = cc[VTIME]; struct timeval etime, timecopy; - int x; /* * Check each of the four combinations. @@ -1683,9 +1717,7 @@ loop: goto sleep; if (qp->c_cc >= m) goto read; - x = splclock(); - timecopy = time; - splx(x); + microuptime(&timecopy); if (!has_etime) { /* first character, start timer */ has_etime = 1; @@ -1714,9 +1746,7 @@ loop: } else { /* m == 0 */ if (qp->c_cc > 0) goto read; - x = splclock(); - timecopy = time; - splx(x); + microuptime(&timecopy); if (!has_etime) { has_etime = 1; @@ -1789,7 +1819,7 @@ read: char ibuf[IBUFSIZ]; int icc; - icc = min(uio->uio_resid, IBUFSIZ); + icc = min(uio_resid(uio), IBUFSIZ); icc = q_to_b(qp, ibuf, icc); if (icc <= 0) { if (first) @@ -1808,7 +1838,7 @@ read: #endif if (error) break; - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) break; first = 0; } @@ -1857,7 +1887,7 @@ slowcase: ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpinc((struct snoop *)tp->t_sc, (char)c); #endif - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) break; /* * In canonical mode check for a "break character" @@ -1895,10 +1925,11 @@ ttycheckoutq(tp, wait) register struct tty *tp; int wait; { - int hiwat, s, oldsig; + int hiwat, s; + sigset_t oldsig; struct uthread *ut; - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); hiwat = tp->t_hiwat; s = spltty(); @@ -1931,23 +1962,24 @@ ttwrite(tp, uio, flag) register char *cp = NULL; register int cc, ce; register struct proc *p; - int i, hiwat, cnt, error, s; + int i, hiwat, count, error, s; char obuf[OBUFSIZ]; boolean_t funnel_state; struct uthread *ut; funnel_state = thread_funnel_set(kernel_flock, TRUE); - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); hiwat = tp->t_hiwat; - cnt = uio->uio_resid; + // LP64todo - fix this! + count = uio_resid(uio); error = 0; cc = 0; loop: s = spltty(); if (ISSET(tp->t_state, TS_ZOMBIE)) { splx(s); - if (uio->uio_resid == cnt) + if (uio_resid(uio) == count) error = EIO; goto out; } @@ -1988,9 +2020,9 @@ loop: * output translation. Keep track of high water mark, sleep on * overflow awaiting device aid in acquiring new space. */ - while (uio->uio_resid > 0 || cc > 0) { + while (uio_resid(uio) > 0 || cc > 0) { if (ISSET(tp->t_lflag, FLUSHO)) { - uio->uio_resid = 0; + uio_setresid(uio, 0); thread_funnel_set(kernel_flock, funnel_state); return (0); } @@ -2001,7 +2033,7 @@ loop: * leftover from last time. */ if (cc == 0) { - cc = min(uio->uio_resid, OBUFSIZ); + cc = min(uio_resid(uio), OBUFSIZ); cp = obuf; error = uiomove(cp, cc, uio); if (error) { @@ -2027,7 +2059,7 @@ loop: ce = cc; else { ce = cc - scanc((u_int)cc, (u_char *)cp, - (u_char *)char_type, CCLASSMASK); + char_type, CCLASSMASK); /* * If ce is zero, then we're processing * a special character through ttyoutput. @@ -2105,7 +2137,7 @@ out: * offset and iov pointers have moved forward, but it doesn't matter * (the call will either return short or restart with a new uio). */ - uio->uio_resid += cc; + uio_setresid(uio, (uio_resid(uio) + cc)); thread_funnel_set(kernel_flock, funnel_state); return (error); @@ -2134,9 +2166,9 @@ ovhiwat: } if (flag & IO_NDELAY) { splx(s); - uio->uio_resid += cc; + uio_setresid(uio, (uio_resid(uio) + cc)); thread_funnel_set(kernel_flock, funnel_state); - return (uio->uio_resid == cnt ? EWOULDBLOCK : 0); + return (uio_resid(uio) == count ? EWOULDBLOCK : 0); } SET(tp->t_state, TS_SO_OLOWAT); error = ttysleep(tp, TSA_OLOWAT(tp), TTOPRI | PCATCH, "ttywri", @@ -2242,15 +2274,13 @@ ttyrub(c, tp) } /* - * Back over cnt characters, erasing them. + * Back over count characters, erasing them. */ static void -ttyrubo(tp, cnt) - register struct tty *tp; - int cnt; +ttyrubo(struct tty *tp, int count) { - while (cnt-- > 0) { + while (count-- > 0) { (void)ttyoutput('\b', tp); (void)ttyoutput(' ', tp); (void)ttyoutput('\b', tp); @@ -2395,10 +2425,10 @@ ttspeedtab(speed, table) * */ void -ttsetwater(tp) - struct tty *tp; +ttsetwater(struct tty *tp) { - register int cps, x; + int cps; + unsigned int x; #define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x)) @@ -2413,17 +2443,107 @@ ttsetwater(tp) /* NeXT ttyinfo has been converted to the MACH kernel */ #include +/* XXX Should be in Mach header , but doesn't work */ +extern kern_return_t thread_info_internal(thread_t thread, + thread_flavor_t flavor, + thread_info_t thread_info_out, + mach_msg_type_number_t *thread_info_count); + /* * Report on state of foreground process group. */ void -ttyinfo(tp) - register struct tty *tp; +ttyinfo(struct tty *tp) { - /* NOT IMPLEMENTED FOR MACH */ + int load; + thread_t thread; + uthread_t uthread; + struct proc *p; + struct proc *pick; + const char *state; + struct timeval utime; + struct timeval stime; + thread_basic_info_data_t basic_info; + mach_msg_type_number_t mmtn = THREAD_BASIC_INFO_COUNT; + + if (ttycheckoutq(tp,0) == 0) + return; + + /* Print load average. */ + load = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT; + ttyprintf(tp, "load: %d.%02d ", load / 100, load % 100); + + /* + * On return following a ttyprintf(), we set tp->t_rocount to 0 so + * that pending input will be retyped on BS. + */ + if (tp->t_session == NULL) { + ttyprintf(tp, "not a controlling terminal\n"); + tp->t_rocount = 0; + return; +} + if (tp->t_pgrp == NULL) { + ttyprintf(tp, "no foreground process group\n"); + tp->t_rocount = 0; + return; + } + /* first process in process group */ + if ((p = tp->t_pgrp->pg_members.lh_first) == NULL) { + ttyprintf(tp, "empty foreground process group\n"); + tp->t_rocount = 0; + return; + } + + /* + * Pick the most interesting process and copy some of its + * state for printing later. + */ + for (pick = NULL; p != NULL; p = p->p_pglist.le_next) { + if (proc_compare(pick, p)) + pick = p; + } + + if (TAILQ_EMPTY(&pick->p_uthlist) || + (uthread = TAILQ_FIRST(&pick->p_uthlist)) == NULL || + (thread = uthread->uu_act) == NULL || + (thread_info_internal(thread, THREAD_BASIC_INFO, (thread_info_t)&basic_info, &mmtn) != KERN_SUCCESS)) { + ttyprintf(tp, "foreground process without thread\n"); + tp->t_rocount = 0; + return; + } + + switch(basic_info.run_state) { + case TH_STATE_RUNNING: + state = "running"; + break; + case TH_STATE_STOPPED: + state = "stopped"; + break; + case TH_STATE_WAITING: + state = "waiting"; + break; + case TH_STATE_UNINTERRUPTIBLE: + state = "uninterruptible"; + break; + case TH_STATE_HALTED: + state = "halted"; + break; + default: + state = "unknown"; + break; + } + calcru(pick, &utime, &stime, NULL); + + /* Print command, pid, state, utime, and stime */ + ttyprintf(tp, " cmd: %s %d %s %ld.%02ldu %ld.%02lds\n", + pick->p_comm, + pick->p_pid, + state, + (long)utime.tv_sec, utime.tv_usec / 10000, + (long)stime.tv_sec, stime.tv_usec / 10000); + tp->t_rocount = 0; } -#ifndef NeXT /* * Returns 1 if p2 is "better" than p1 * @@ -2433,8 +2553,7 @@ ttyinfo(tp) * 2) Runnable processes are favored over anything else. The runner * with the highest cpu utilization is picked (p_estcpu). Ties are * broken by picking the highest pid. - * 3) The sleeper with the shortest sleep time is next. With ties, - * we pick out just "short-term" sleepers (P_SINTR == 0). + * 3) The sleeper with the shortest sleep time is next. * 4) Further ties are broken by picking the highest pid. */ #define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL)) @@ -2486,16 +2605,8 @@ proc_compare(p1, p2) return (0); if (p1->p_slptime > p2->p_slptime) return (1); - /* - * favor one sleeping in a non-interruptible sleep - */ - if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0) - return (1); - if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0) - return (0); return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } -#endif /* NeXT */ /* * Output char to tty; console putchar style. @@ -2527,11 +2638,7 @@ tputchar(c, tp) * at the start of the call. */ int -ttysleep(tp, chan, pri, wmesg, timo) - struct tty *tp; - void *chan; - int pri, timo; - char *wmesg; +ttysleep(struct tty *tp, void *chan, int pri, const char *wmesg, int timo) { int error; int gen; @@ -2548,17 +2655,18 @@ ttysleep(tp, chan, pri, wmesg, timo) * Allocate a tty structure and its associated buffers. */ struct tty * -ttymalloc() +ttymalloc(void) { struct tty *tp; - MALLOC(tp, struct tty *, sizeof(struct tty), M_TTYS, M_WAITOK); - bzero(tp, sizeof *tp); - /* XXX: default to TTYCLSIZE(1024) chars for now */ - clalloc(&tp->t_rawq, TTYCLSIZE, 1); - clalloc(&tp->t_canq, TTYCLSIZE, 1); - /* output queue doesn't need quoting */ - clalloc(&tp->t_outq, TTYCLSIZE, 0); + MALLOC(tp, struct tty *, sizeof(struct tty), M_TTYS, M_WAITOK|M_ZERO); + if (tp != NULL) { + /* XXX: default to TTYCLSIZE(1024) chars for now */ + clalloc(&tp->t_rawq, TTYCLSIZE, 1); + clalloc(&tp->t_canq, TTYCLSIZE, 1); + /* output queue doesn't need quoting */ + clalloc(&tp->t_outq, TTYCLSIZE, 0); + } return(tp); } @@ -2591,8 +2699,7 @@ ttymalloc() { struct tty *tp; - tp = _MALLOC(sizeof *tp, M_TTYS, M_WAITOK); - bzero(tp, sizeof *tp); + MALLOC(tp, struct tty *, sizeof *tp, M_TTYS, M_WAITOK|M_ZERO); return (tp); } #endif diff --git a/bsd/kern/tty_compat.c b/bsd/kern/tty_compat.c index cf6e818b1..346ac762b 100644 --- a/bsd/kern/tty_compat.c +++ b/bsd/kern/tty_compat.c @@ -62,30 +62,30 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include #include #include -/* NeXT Move define down here cause COMPAT_43 not valid earlier */ -#if COMPAT_43 || defined(COMPAT_SUNOS) +/* NeXT Move define down here cause COMPAT_43_TTY not valid earlier */ +#if COMPAT_43_TTY || defined(COMPAT_SUNOS) -static int ttcompatgetflags __P((struct tty *tp)); -static void ttcompatsetflags __P((struct tty *tp, struct termios *t)); -static void ttcompatsetlflags __P((struct tty *tp, struct termios *t)); -static int ttcompatspeedtab __P((int speed, struct speedtab *table)); - - -static int ttydebug = 0; - -#ifndef NeXT -SYSCTL_INT(_debug, OID_AUTO, ttydebug, CTLFLAG_RW, &ttydebug, 0, ""); -#endif +static int ttcompatgetflags(struct tty *tp); +static void ttcompatsetflags(struct tty *tp, struct termios *t); +static void ttcompatsetlflags(struct tty *tp, struct termios *t); +static int ttcompatspeedtab(int speed, struct speedtab *table); +/* + * These two tables encode baud rate to speed code and speed code to + * baud rate information. They are a mapping between the + * baud rate constants and the baud rate constants. We + * cannot use those constants directly here because they occupy the same + * name space. + */ static struct speedtab compatspeeds[] = { #define MAX_SPEED 17 { 115200, 17 }, @@ -113,10 +113,30 @@ static int compatspcodes[] = { 1800, 2400, 4800, 9600, 19200, 38400, 57600, 115200, }; +/* + * ttcompatspeedtab + * + * Description: Given a baud rate value as an integer, and a speed table, + * convert the baud rate to a speed code, according to the + * contents of the table. This effectively changes termios.h + * baud rate values into ttydev.h baud rate codes. + * + * Parameters: int speed Baud rate, as an integer + * struct speedtab *table Baud rate table to speed code table + * + * Returns: 1 B50 speed code; returned if we can + * not find an answer in the table. + * 0 If a 0 was requested in order to + * trigger a hangup (250ms of line + * silence, per Bell 103C standard). + * * A speed code matching the requested + * baud rate (potentially rounded down, + * if there is no exact match). + * + * Notes: This function is used for TIOCGETP, TIOCSETP, and TIOCSETN. + */ static int -ttcompatspeedtab(speed, table) - int speed; - register struct speedtab *table; +ttcompatspeedtab(int speed, struct speedtab *table) { if (speed == 0) return (0); /* hangup */ @@ -126,25 +146,67 @@ ttcompatspeedtab(speed, table) return (1); /* 50, min and not hangup */ } -#ifndef NeXT -int -ttsetcompat(tp, com, data, term) - register struct tty *tp; - int *com; - caddr_t data; - struct termios *term; -#else +/* + * ttsetcompat + * + * Description: Convert backward compatability set command arguments as + * follows: + * + * TIOCSETP -> TIOSETAF + * TIOCSETN -> TIOCSETA + * TIOCSETC -> TIOCSETA + * TIOCSLTC -> TIOCSETA + * TIOCLBIS -> TIOCSETA + * TIOCLBIC -> TIOCSETA + * TIOCLSET -> TIOCSETA + * + * The converted command argument and potentially modified 'term' + * argument are returned to the caller, which will then call ttioctl(), + * if this function returns successfully. + * + * Parameters struct tty *tp The tty on which the operation is + * being performed. + * u_long *com A pointer to the terminal input/output + * command being requested; its contents + * will be modified per the table above, + * on a non-error return. + * caddr_t data Command specific parameter data; this + * data is read but not modified. + * struct termios *term A local stack termios structure from + * ttcompat(), whose contents are to be + * modified based on *com and *data. + * + * Returns: EINVAL An input speed or output speed is + * outside the allowable range for a + * TIOCSETP or TIOCSETN command. + * 0 All other cases return 0. + * + * Notes: This function may modify the contents of the tp->t_flags + * field in a successful call to TIOCSETP, TIOCSETN, TIOCLBIS, + * TIOCLBIC, or TIOCLSET. + * + * All other tp fields will remain unmodifed, since the struct + * termious is a local stack copy from ttcompat(), and not the + * real thing. A subsequent call to ttioctl() in ttcompat(), + * however, may result in subsequent changes. + */ __private_extern__ int -ttsetcompat(tp, com, data, term) - register struct tty *tp; - u_long *com; - caddr_t data; - struct termios *term; -#endif /* !NeXT */ +ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term) { switch (*com) { case TIOCSETP: - case TIOCSETN: { + /* + * Wait for all characters queued for output to drain, then + * Discard all characters queued for input, and then set + * the input and output speeds and device flags, per the + * contents of the struct sgttyb that 'data' points to. + */ + case TIOCSETN: + /* + * Same as TIOCSETP, but the output is not drained, and any + * pending input is not discarded. + */ + { register struct sgttyb *sg = (struct sgttyb *)data; int speed; @@ -167,7 +229,12 @@ ttsetcompat(tp, com, data, term) *com = (*com == TIOCSETP) ? TIOCSETAF : TIOCSETA; break; } - case TIOCSETC: { + case TIOCSETC: + /* + * Set the terminal control characters per the contents of + * the struct tchars that 'data' points to. + */ + { struct tchars *tc = (struct tchars *)data; register cc_t *cc; @@ -183,7 +250,12 @@ ttsetcompat(tp, com, data, term) *com = TIOCSETA; break; } - case TIOCSLTC: { + case TIOCSLTC: + /* + * Set the terminal control characters per the contents of + * the struct ltchars that 'data' points to. + */ + { struct ltchars *ltc = (struct ltchars *)data; register cc_t *cc; @@ -198,8 +270,23 @@ ttsetcompat(tp, com, data, term) break; } case TIOCLBIS: + /* + * Set the bits in the terminal state local flags word + * (16 bits) for the terminal to the current bits OR + * those in the 16 bit value pointed to by 'data'. + */ case TIOCLBIC: + /* + * Clear the bits in the terminal state local flags word + * for the terminal to the current bits AND those bits NOT + * in the 16 bit value pointed to by 'data'. + */ case TIOCLSET: + /* + * Set the terminal state local flags word to exactly those + * bits that correspond to the 16 bit value pointed to by + * 'data'. + */ if (*com == TIOCLSET) tp->t_flags = (tp->t_flags&0xffff) | *(int *)data<<16; else { @@ -217,23 +304,44 @@ ttsetcompat(tp, com, data, term) return 0; } +/* + * ttcompat + * + * Description: For 'set' commands, convert the command and arguments as + * necessary, and call ttioctl(), returning the result as + * our result; for 'get' commands, obtain the requested data + * from the appropriate source, and return it in the expected + * format. If the command is not recognized, return EINVAL. + * + * Parameters struct tty *tp The tty on which the operation is + * being performed. + * u_long com The terminal input/output command + * being requested. + * caddr_t data The pointer to the user data argument + * provided with the command. + * int flag The file open flags (e.g. FREAD). + * struct proc *p The current process pointer for the + * operation. + * + * Returns: 0 Most 'get' operations can't fail, and + * therefore return this. + * ENOTTY TIOCGSID may return this when you + * attempt to get the session ID for a + * terminal with no associated session, + * or for which there is a session, but + * no session leader. + * EIOCTL If the command cannot be handled at + * this layer, this will be returned. + * * Any value returned by ttioctl(), if a + * set command is requested. + * + * NOTES: The process pointer may be a proxy on whose behalf we are + * operating, so it is not safe to simply use current_process() + * instead. + */ /*ARGSUSED*/ -#ifndef NeXT -int -ttcompat(tp, com, data, flag) - register struct tty *tp; - int com; - caddr_t data; - int flag; -#else __private_extern__ int -ttcompat(tp, com, data, flag, p) - register struct tty *tp; - u_long com; - caddr_t data; - int flag; - struct proc *p; -#endif /* !NeXT */ +ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p) { switch (com) { case TIOCSETP: @@ -242,20 +350,26 @@ ttcompat(tp, com, data, flag, p) case TIOCSLTC: case TIOCLBIS: case TIOCLBIC: - case TIOCLSET: { + case TIOCLSET: + /* + * See ttsetcompat() for a full description of these command + * values and their meanings. + */ + { struct termios term; int error; term = tp->t_termios; if ((error = ttsetcompat(tp, &com, data, &term)) != 0) return error; -#ifdef NeXT return ttioctl(tp, com, (caddr_t) &term, flag, p); -#else - return ttioctl(tp, com, &term, flag); -#endif } - case TIOCGETP: { + case TIOCGETP: + /* + * Get the current input and output speeds, and device + * flags, into the structure pointed to by 'data'. + */ + { register struct sgttyb *sg = (struct sgttyb *)data; register cc_t *cc = tp->t_cc; @@ -269,7 +383,12 @@ ttcompat(tp, com, data, flag, p) sg->sg_flags = tp->t_flags = ttcompatgetflags(tp); break; } - case TIOCGETC: { + case TIOCGETC: + /* + * Get the terminal control characters into the struct + * tchars that 'data' points to. + */ + { struct tchars *tc = (struct tchars *)data; register cc_t *cc = tp->t_cc; @@ -281,7 +400,12 @@ ttcompat(tp, com, data, flag, p) tc->t_brkc = cc[VEOL]; break; } - case TIOCGLTC: { + case TIOCGLTC: + /* + * Get the terminal control characters into the struct + * ltchars that 'data' points to. + */ + { struct ltchars *ltc = (struct ltchars *)data; register cc_t *cc = tp->t_cc; @@ -294,33 +418,30 @@ ttcompat(tp, com, data, flag, p) break; } case TIOCLGET: + /* + * Get the terminal state local flags word into the 16 bit + * value pointed to by 'data'. + */ tp->t_flags = (ttcompatgetflags(tp) & 0xffff0000UL) | (tp->t_flags & 0xffff); *(int *)data = tp->t_flags>>16; -#ifndef NeXT - if (ttydebug) - printf("CLGET: returning %x\n", *(int *)data); -#endif break; case OTIOCGETD: + /* + * Get the current line discipline into the int pointed to + * by 'data'. + */ *(int *)data = tp->t_line ? tp->t_line : 2; break; -#ifndef NeXT - case OTIOCSETD: { - int ldisczero = 0; - - return (ttioctl(tp, TIOCSETD, - *(int *)data == 2 ? (caddr_t)&ldisczero : data, flag)); - } - - case OTIOCCONS: - *(int *)data = 1; - return (ttioctl(tp, TIOCCONS, data, flag)); -#else - case OTIOCSETD: { + case OTIOCSETD: + /* + * Set the current line discipline based on the value of the + * int pointed to by 'data'. + */ + { int ldisczero = 0; return (ttioctl(tp, TIOCSETD, @@ -328,10 +449,16 @@ ttcompat(tp, com, data, flag, p) } case OTIOCCONS: + /* + * Become the console device. + */ *(int *)data = 1; return (ttioctl(tp, TIOCCONS, data, flag, p)); case TIOCGSID: + /* + * Get the current session ID (controlling process' PID). + */ if (tp->t_session == NULL) return ENOTTY; @@ -340,23 +467,44 @@ ttcompat(tp, com, data, flag, p) *(int *) data = tp->t_session->s_leader->p_pid; break; -#endif /* NeXT */ default: - return (-1); + /* + * This ioctl is not handled at this layer. + */ + return (ENOTTY); } + + /* + * Successful 'get' operation. + */ return (0); } +/* + * ttcompatgetflags + * + * Description: Get the terminal state local flags, device flags, and current + * speed code for the device (all 32 bits are returned). + * + * Parameters struct tty *tp The tty on which the operation is + * being performed. + * + * Returns: * Integer value corresponding to the + * current terminal state local flags + * word. + * + * Notes: Caller is responsible for breaking these bits back out into + * separate 16 bit filelds, if that's what was actually desired. + */ static int -ttcompatgetflags(tp) - register struct tty *tp; +ttcompatgetflags(struct tty *tp) { register tcflag_t iflag = tp->t_iflag; register tcflag_t lflag = tp->t_lflag; register tcflag_t oflag = tp->t_oflag; register tcflag_t cflag = tp->t_cflag; - register flags = 0; + register int flags = 0; if (iflag&IXOFF) flags |= TANDEM; @@ -380,12 +528,12 @@ ttcompatgetflags(tp) if ((lflag&ICANON) == 0) { /* fudge */ if (iflag&(INPCK|ISTRIP|IXON) || lflag&(IEXTEN|ISIG) - || cflag&(CSIZE|PARENB) != CS8) + || (cflag&(CSIZE|PARENB)) != CS8) flags |= CBREAK; else flags |= RAW; } - if (!(flags&RAW) && !(oflag&OPOST) && cflag&(CSIZE|PARENB) == CS8) + if (!(flags&RAW) && !(oflag&OPOST) && (cflag&(CSIZE|PARENB)) == CS8) flags |= LITOUT; if (cflag&MDMBUF) flags |= MDMBUF; @@ -404,19 +552,27 @@ ttcompatgetflags(tp) if ((iflag&IXANY) == 0) flags |= DECCTQ; flags |= lflag&(ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH); -#ifndef NeXT - if (ttydebug) - printf("getflags: %x\n", flags); -#endif return (flags); } +/* + * ttcompatsetflags + * + * Description: Given a set of compatability flags, convert the compatability + * flags in the terminal flags fields into canonical flags in the + * provided termios struct. + * + * Parameters: struct tty *tp The tty on which the operation is + * being performed. + * struct termios *t The termios structure into which to + * return the converted flags. + * + * Returns: void (implicit: *t, modified) + */ static void -ttcompatsetflags(tp, t) - register struct tty *tp; - register struct termios *t; +ttcompatsetflags(struct tty *tp, struct termios *t) { - register flags = tp->t_flags; + register int flags = tp->t_flags; register tcflag_t iflag = t->c_iflag; register tcflag_t oflag = t->c_oflag; register tcflag_t lflag = t->c_lflag; @@ -490,12 +646,24 @@ ttcompatsetflags(tp, t) t->c_cflag = cflag; } +/* + * ttcompatsetlflags + * + * Description: Given a set of compatability terminal state local flags, + * convert the compatability flags in the terminal flags + * fields into canonical flags in the provided termios struct. + * + * Parameters: struct tty *tp The tty on which the operation is + * being performed. + * struct termios *t The termios structure into which to + * return the converted local flags. + * + * Returns: void (implicit: *t, modified) + */ static void -ttcompatsetlflags(tp, t) - register struct tty *tp; - register struct termios *t; +ttcompatsetlflags(struct tty *tp, struct termios *t) { - register flags = tp->t_flags; + register int flags = tp->t_flags; register tcflag_t iflag = t->c_iflag; register tcflag_t oflag = t->c_oflag; register tcflag_t lflag = t->c_lflag; @@ -567,4 +735,4 @@ ttcompatsetlflags(tp, t) t->c_lflag = lflag; t->c_cflag = cflag; } -#endif /* COMPAT_43 || COMPAT_SUNOS */ +#endif /* COMPAT_43_TTY || COMPAT_SUNOS */ diff --git a/bsd/kern/tty_conf.c b/bsd/kern/tty_conf.c index bb4a8c9c0..5bf3647b1 100644 --- a/bsd/kern/tty_conf.c +++ b/bsd/kern/tty_conf.c @@ -72,40 +72,38 @@ #ifndef NeXT static l_open_t l_noopen; static l_close_t l_noclose; -static l_ioctl_t l_nullioctl; static l_rint_t l_norint; -static l_start_t l_nostart; #else /* NeXT */ -#define l_noopen ((int (*) __P((dev_t, struct tty *)))enodev) -#define l_noclose ((int (*) __P((struct tty *, int flags)))enodev) -#define l_noread ((int (*) __P((struct tty *, struct uio *, int)))enodev) -#define l_nowrite l_noread -#define l_norint ((int (*) __P((int c, struct tty *)))enodev) -#define l_nostart ((int (*) __P((struct tty *)))enodev) -static int -l_nullioctl(struct tty *tp, u_long cmd, caddr_t data, int flag, struct proc *p); +#define l_noopen ((l_open_t *) &enodev) +#define l_noclose ((l_close_t *) &enodev) +#define l_noread ((l_read_t *) &enodev) +#define l_nowrite ((l_write_t *) &enodev) +#define l_norint ((l_rint_t *) &enodev) #endif /* !NeXT */ +static l_ioctl_t l_noioctl; +static l_start_t l_nostart; + /* * XXX it probably doesn't matter what the entries other than the l_open - * entry are here. The l_nullioctl and ttymodem entries still look fishy. + * entry are here. The l_noioctl and ttymodem entries still look fishy. * Reconsider the removal of nullmodem anyway. It was too much like * ttymodem, but a completely null version might be useful. */ #define NODISC(n) \ { l_noopen, l_noclose, l_noread, l_nowrite, \ - l_nullioctl, l_norint, l_nostart, ttymodem } + l_noioctl, l_norint, l_nostart, ttymodem } struct linesw linesw[MAXLDISC] = { /* 0- termios */ { ttyopen, ttylclose, ttread, ttwrite, - l_nullioctl, ttyinput, ttstart, ttymodem }, + l_noioctl, ttyinput, ttwwakeup, ttymodem }, NODISC(1), /* 1- defunct */ /* 2- NTTYDISC */ -#ifdef COMPAT_43 +#if COMPAT_43_TTY { ttyopen, ttylclose, ttread, ttwrite, - l_nullioctl, ttyinput, ttstart, ttymodem }, + l_noioctl, ttyinput, ttwwakeup, ttymodem }, #else NODISC(2), #endif @@ -215,14 +213,6 @@ l_norint(c, tp) return (ENODEV); } - -static int -l_nostart(tp) - struct tty *tp; -{ - - return (ENODEV); -} #endif /* !NeXT */ /* @@ -230,13 +220,13 @@ l_nostart(tp) * discipline specific ioctl command. */ static int -l_nullioctl(tp, cmd, data, flags, p) - struct tty *tp; - u_long cmd; - caddr_t data; - int flags; - struct proc *p; +l_noioctl(__unused struct tty *tp, __unused u_long cmd, __unused caddr_t data, + __unused int flags, __unused struct proc *p) { - return (-1); + return ENOTTY; } + +static void +l_nostart(__unused struct tty *tp) + { } diff --git a/bsd/kern/tty_pty.c b/bsd/kern/tty_pty.c index 7003a4202..5ec154c67 100644 --- a/bsd/kern/tty_pty.c +++ b/bsd/kern/tty_pty.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,10 +64,11 @@ #include #include #include -#include +#include +#include #include #include -#include +#include #include #include #include @@ -78,23 +79,24 @@ #define FREE_BSDSTATIC static #else -#include - #define FREE_BSDSTATIC __private_extern__ #define d_devtotty_t struct tty ** #ifdef d_stop_t #undef d_stop_t #endif -typedef void d_stop_t __P((struct tty *tp, int rw)); +typedef void d_stop_t(struct tty *tp, int rw); #endif /* NeXT */ +/* XXX function should be removed??? */ +int pty_init(int n_ptys); + #ifdef notyet -static void ptyattach __P((int n)); +static void ptyattach(int n); #endif -static void ptsstart __P((struct tty *tp)); -static void ptcwakeup __P((struct tty *tp, int flag)); +static void ptsstart(struct tty *tp); +static void ptcwakeup(struct tty *tp, int flag); FREE_BSDSTATIC d_open_t ptsopen; FREE_BSDSTATIC d_close_t ptsclose; @@ -204,7 +206,8 @@ ptyattach(n) #endif #ifndef DEVFS -int pty_init() +int +pty_init(__unused int n_ptys) { return 0; } @@ -212,7 +215,8 @@ int pty_init() #include #define START_CHAR 'p' #define HEX_BASE 16 -int pty_init(int n_ptys) +int +pty_init(int n_ptys) { int i; int j; @@ -238,23 +242,24 @@ int pty_init(int n_ptys) /*ARGSUSED*/ FREE_BSDSTATIC int -ptsopen(dev, flag, devtype, p) - dev_t dev; - int flag, devtype; - struct proc *p; +ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) { register struct tty *tp; int error; + boolean_t funnel_state; + funnel_state = thread_funnel_set(kernel_flock, TRUE); #ifndef NeXT tp = &pt_tty[minor(dev)]; #else /* - * You will see this sourt of code coming up in diffs later both + * You will see this sort of code coming up in diffs later both * the ttymalloc and the tp indirection. */ - if (minor(dev) >= npty) - return (ENXIO); + if (minor(dev) >= npty) { + error = ENXIO; + goto out; + } if (!pt_tty[minor(dev)]) { tp = pt_tty[minor(dev)] = ttymalloc(); } else @@ -268,8 +273,10 @@ ptsopen(dev, flag, devtype, p) tp->t_cflag = TTYDEF_CFLAG; tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; ttsetwater(tp); /* would be done in xxparam() */ - } else if (tp->t_state&TS_XCLUDE && p->p_ucred->cr_uid != 0) - return (EBUSY); + } else if (tp->t_state&TS_XCLUDE && suser(kauth_cred_get(), NULL)) { + error = EBUSY; + goto out; + } if (tp->t_oproc) /* Ctrlr still around. */ (void)(*linesw[tp->t_line].l_modem)(tp, 1); while ((tp->t_state & TS_CARR_ON) == 0) { @@ -278,27 +285,31 @@ ptsopen(dev, flag, devtype, p) error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, "ptsopn", 0); if (error) - return (error); + goto out; } error = (*linesw[tp->t_line].l_open)(dev, tp); if (error == 0) ptcwakeup(tp, FREAD|FWRITE); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } FREE_BSDSTATIC int -ptsclose(dev, flag, mode, p) - dev_t dev; - int flag, mode; - struct proc *p; +ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) { register struct tty *tp; int err; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); tp = pt_tty[minor(dev)]; err = (*linesw[tp->t_line].l_close)(tp, flag); ptsstop(tp, FREAD|FWRITE); (void) ttyclose(tp); + + (void) thread_funnel_set(kernel_flock, funnel_state); return (err); } @@ -317,21 +328,27 @@ ptsread(dev, uio, flag) register struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; int error = 0; struct uthread *ut; + boolean_t funnel_state; - ut = (struct uthread *)get_bsdthread_info(current_act()); + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + + ut = (struct uthread *)get_bsdthread_info(current_thread()); again: if (pti->pt_flags & PF_REMOTE) { while (isbackground(p, tp)) { if ((p->p_sigignore & sigmask(SIGTTIN)) || (ut->uu_sigmask & sigmask(SIGTTIN)) || p->p_pgrp->pg_jobc == 0 || - p->p_flag & P_PPWAIT) - return (EIO); + p->p_flag & P_PPWAIT) { + error = EIO; + goto out; + } pgsignal(p->p_pgrp, SIGTTIN, 1); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH | PTTYBLOCK, "ptsbg", 0); if (error) - return (error); + goto out; } if (tp->t_canq.c_cc == 0) { if (flag & IO_NDELAY) @@ -339,22 +356,31 @@ again: error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, "ptsin", 0); if (error) - return (error); + goto out; goto again; } - while (tp->t_canq.c_cc > 1 && uio->uio_resid > 0) - if (ureadc(getc(&tp->t_canq), uio) < 0) { - error = EFAULT; + while (tp->t_canq.c_cc > 1 && uio_resid(uio) > 0) { + int cc; + char buf[BUFSIZ]; + + cc = min(uio_resid(uio), BUFSIZ); + // Don't copy the very last byte + cc = min(cc, tp->t_canq.c_cc - 1); + cc = q_to_b(&tp->t_canq, buf, cc); + error = uiomove(buf, cc, uio); + if (error) break; - } + } if (tp->t_canq.c_cc == 1) (void) getc(&tp->t_canq); if (tp->t_canq.c_cc) - return (error); + goto out; } else if (tp->t_oproc) error = (*linesw[tp->t_line].l_read)(tp, uio, flag); ptcwakeup(tp, FWRITE); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } @@ -370,11 +396,19 @@ ptswrite(dev, uio, flag) int flag; { register struct tty *tp; + int error; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); tp = pt_tty[minor(dev)]; if (tp->t_oproc == 0) - return (EIO); - return ((*linesw[tp->t_line].l_write)(tp, uio, flag)); + error = EIO; + else + error = (*linesw[tp->t_line].l_write)(tp, uio, flag); + + (void) thread_funnel_set(kernel_flock, funnel_state); + return (error); } /* @@ -386,14 +420,20 @@ ptsstart(tp) struct tty *tp; { register struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (tp->t_state & TS_TTSTOP) - return; + goto out; if (pti->pt_flags & PF_STOPPED) { pti->pt_flags &= ~PF_STOPPED; pti->pt_send = TIOCPKT_START; } ptcwakeup(tp, FREAD); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); + return; } static void @@ -402,6 +442,9 @@ ptcwakeup(tp, flag) int flag; { struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); if (flag & FREAD) { selwakeup(&pti->pt_selr); @@ -411,25 +454,31 @@ ptcwakeup(tp, flag) selwakeup(&pti->pt_selw); wakeup(TSA_PTC_WRITE(tp)); } + (void) thread_funnel_set(kernel_flock, funnel_state); } FREE_BSDSTATIC int -ptcopen(dev, flag, devtype, p) - dev_t dev; - int flag, devtype; - struct proc *p; +ptcopen(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) { register struct tty *tp; struct pt_ioctl *pti; + int error = 0; + boolean_t funnel_state; - if (minor(dev) >= npty) - return (ENXIO); + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + if (minor(dev) >= npty) { + error = ENXIO; + goto out; + } if(!pt_tty[minor(dev)]) { tp = pt_tty[minor(dev)] = ttymalloc(); } else tp = pt_tty[minor(dev)]; - if (tp->t_oproc) - return (EIO); + if (tp->t_oproc) { + error = EIO; + goto out; + } tp->t_oproc = ptsstart; #ifdef sun4c tp->t_stop = ptsstop; @@ -440,17 +489,18 @@ ptcopen(dev, flag, devtype, p) pti->pt_flags = 0; pti->pt_send = 0; pti->pt_ucntl = 0; - return (0); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); + return (error); } FREE_BSDSTATIC int -ptcclose(dev, flags, fmt, p) - dev_t dev; - int flags; - int fmt; - struct proc *p; +ptcclose(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) { register struct tty *tp; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); tp = pt_tty[minor(dev)]; (void)(*linesw[tp->t_line].l_modem)(tp, 0); @@ -470,6 +520,8 @@ ptcclose(dev, flags, fmt, p) } tp->t_oproc = 0; /* mark closed */ + + (void) thread_funnel_set(kernel_flock, funnel_state); return (0); } @@ -483,6 +535,9 @@ ptcread(dev, uio, flag) struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; char buf[BUFSIZ]; int error = 0, cc; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); /* * We want to block until the slave @@ -495,43 +550,48 @@ ptcread(dev, uio, flag) if (pti->pt_flags&PF_PKT && pti->pt_send) { error = ureadc((int)pti->pt_send, uio); if (error) - return (error); + goto out; if (pti->pt_send & TIOCPKT_IOCTL) { - cc = min(uio->uio_resid, + cc = min(uio_resid(uio), sizeof(tp->t_termios)); uiomove((caddr_t)&tp->t_termios, cc, uio); } pti->pt_send = 0; - return (0); + goto out; } if (pti->pt_flags&PF_UCNTL && pti->pt_ucntl) { error = ureadc((int)pti->pt_ucntl, uio); if (error) - return (error); + goto out; pti->pt_ucntl = 0; - return (0); + goto out; } if (tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) break; } if ((tp->t_state & TS_CONNECTED) == 0) - return (0); /* EOF */ - if (flag & IO_NDELAY) - return (EWOULDBLOCK); + goto out; /* EOF */ + if (flag & IO_NDELAY) { + error = EWOULDBLOCK; + goto out; + } error = tsleep(TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptcin", 0); if (error) - return (error); + goto out; } if (pti->pt_flags & (PF_PKT|PF_UCNTL)) error = ureadc(0, uio); - while (uio->uio_resid > 0 && error == 0) { - cc = q_to_b(&tp->t_outq, buf, min(uio->uio_resid, BUFSIZ)); + while (uio_resid(uio) > 0 && error == 0) { + cc = q_to_b(&tp->t_outq, buf, min(uio_resid(uio), BUFSIZ)); if (cc <= 0) break; error = uiomove(buf, cc, uio); } - ttwwakeup(tp); + (*linesw[tp->t_line].l_start)(tp); + +out: + (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } @@ -542,6 +602,9 @@ ptsstop(tp, flush) { struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; int flag; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); /* note: FLUSHREAD and FLUSHWRITE already ok */ if (flush == 0) { @@ -557,6 +620,8 @@ ptsstop(tp, flush) if (flush & FWRITE) flag |= FREAD; ptcwakeup(tp, flag); + + (void) thread_funnel_set(kernel_flock, funnel_state); } FREE_BSDSTATIC int @@ -568,30 +633,35 @@ ptcselect(dev, rw, wql, p) { register struct tty *tp = pt_tty[minor(dev)]; struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; - int s; + int retval = 0; + boolean_t funnel_state; - if ((tp->t_state & TS_CONNECTED) == 0) - return (1); + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + if ((tp->t_state & TS_CONNECTED) == 0) { + retval = 1; + goto out; + } switch (rw) { case FREAD: /* * Need to block timeouts (ttrstart). */ - s = spltty(); if ((tp->t_state&TS_ISOPEN) && tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) { - splx(s); - return (1); + retval = 1; + goto out; } - splx(s); /* FALLTHROUGH */ case 0: /* exceptional */ if ((tp->t_state&TS_ISOPEN) && ((pti->pt_flags&PF_PKT && pti->pt_send) || - (pti->pt_flags&PF_UCNTL && pti->pt_ucntl))) - return (1); + (pti->pt_flags&PF_UCNTL && pti->pt_ucntl))) { + retval = 1; + goto out; + } selrecord(p, &pti->pt_selr, wql); break; @@ -599,20 +669,28 @@ ptcselect(dev, rw, wql, p) case FWRITE: if (tp->t_state&TS_ISOPEN) { if (pti->pt_flags & PF_REMOTE) { - if (tp->t_canq.c_cc == 0) - return (1); + if (tp->t_canq.c_cc == 0) { + retval = 1; + goto out; + } } else { - if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) - return (1); - if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON)) - return (1); + if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) { + retval = 1; + goto out; + } + if (tp->t_canq.c_cc == 0 && (tp->t_iflag&ICANON)) { + retval = 1; + goto out; + } } } selrecord(p, &pti->pt_selw, wql); break; } - return (0); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); + return (retval); } FREE_BSDSTATIC int @@ -625,9 +703,12 @@ ptcwrite(dev, uio, flag) register u_char *cp = NULL; register int cc = 0; u_char locbuf[BUFSIZ]; - int cnt = 0; + int wcnt = 0; struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; int error = 0; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); again: if ((tp->t_state&TS_ISOPEN) == 0) @@ -635,20 +716,21 @@ again: if (pti->pt_flags & PF_REMOTE) { if (tp->t_canq.c_cc) goto block; - while ((uio->uio_resid > 0 || cc > 0) && + while ((uio_resid(uio) > 0 || cc > 0) && tp->t_canq.c_cc < TTYHOG - 1) { if (cc == 0) { - cc = min(uio->uio_resid, BUFSIZ); + cc = min(uio_resid(uio), BUFSIZ); cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc); cp = locbuf; error = uiomove((caddr_t)cp, cc, uio); if (error) - return (error); + goto out; /* check again for safety */ if ((tp->t_state & TS_ISOPEN) == 0) { /* adjust as usual */ - uio->uio_resid += cc; - return (EIO); + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; } } if (cc > 0) { @@ -666,24 +748,25 @@ again: } } /* adjust for data copied in but not written */ - uio->uio_resid += cc; + uio_setresid(uio, (uio_resid(uio) + cc)); (void) putc(0, &tp->t_canq); ttwakeup(tp); wakeup(TSA_PTS_READ(tp)); - return (0); + goto out; } - while (uio->uio_resid > 0 || cc > 0) { + while (uio_resid(uio) > 0 || cc > 0) { if (cc == 0) { - cc = min(uio->uio_resid, BUFSIZ); + cc = min(uio_resid(uio), BUFSIZ); cp = locbuf; error = uiomove((caddr_t)cp, cc, uio); if (error) - return (error); + goto out; /* check again for safety */ if ((tp->t_state & TS_ISOPEN) == 0) { /* adjust for data copied in but not written */ - uio->uio_resid += cc; - return (EIO); + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; } } while (cc > 0) { @@ -693,12 +776,14 @@ again: goto block; } (*linesw[tp->t_line].l_rint)(*cp++, tp); - cnt++; + wcnt++; cc--; } cc = 0; } - return (0); +out: + (void) thread_funnel_set(kernel_flock, funnel_state); + return (error); block: /* * Come here to wait for slave to open, for space @@ -706,21 +791,22 @@ block: */ if ((tp->t_state & TS_CONNECTED) == 0) { /* adjust for data copied in but not written */ - uio->uio_resid += cc; - return (EIO); + uio_setresid(uio, (uio_resid(uio) + cc)); + error = EIO; + goto out; } if (flag & IO_NDELAY) { /* adjust for data copied in but not written */ - uio->uio_resid += cc; - if (cnt == 0) - return (EWOULDBLOCK); - return (0); + uio_setresid(uio, (uio_resid(uio) + cc)); + if (wcnt == 0) + error = EWOULDBLOCK; + goto out; } error = tsleep(TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptcout", 0); if (error) { /* adjust for data copied in but not written */ - uio->uio_resid += cc; - return (error); + uio_setresid(uio, (uio_resid(uio) + cc)); + goto out; } goto again; } @@ -759,7 +845,10 @@ ptyioctl(dev, cmd, data, flag, p) register struct tty *tp = pt_tty[minor(dev)]; register struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; register u_char *cc = tp->t_cc; - int stop, error; + int stop, error = 0; + boolean_t funnel_state; + + funnel_state = thread_funnel_set(kernel_flock, TRUE); /* * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. @@ -785,7 +874,7 @@ ptyioctl(dev, cmd, data, flag, p) } tp->t_lflag &= ~EXTPROC; } - return(0); + goto out; } else #ifndef NeXT if (cdevsw[major(dev)]->d_open == ptcopen) @@ -800,25 +889,29 @@ ptyioctl(dev, cmd, data, flag, p) * in that case, tp must be the controlling terminal. */ *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0; - return (0); + goto out; case TIOCPKT: if (*(int *)data) { - if (pti->pt_flags & PF_UCNTL) - return (EINVAL); + if (pti->pt_flags & PF_UCNTL) { + error = EINVAL; + goto out; + } pti->pt_flags |= PF_PKT; } else pti->pt_flags &= ~PF_PKT; - return (0); + goto out; case TIOCUCNTL: if (*(int *)data) { - if (pti->pt_flags & PF_PKT) - return (EINVAL); + if (pti->pt_flags & PF_PKT) { + error = EINVAL; + goto out; + } pti->pt_flags |= PF_UCNTL; } else pti->pt_flags &= ~PF_UCNTL; - return (0); + goto out; case TIOCREMOTE: if (*(int *)data) @@ -826,9 +919,9 @@ ptyioctl(dev, cmd, data, flag, p) else pti->pt_flags &= ~PF_REMOTE; ttyflush(tp, FREAD|FWRITE); - return (0); + goto out; -#ifdef COMPAT_43 +#if COMPAT_43_TTY case TIOCSETP: case TIOCSETN: #endif @@ -841,30 +934,33 @@ ptyioctl(dev, cmd, data, flag, p) case TIOCSIG: if (*(unsigned int *)data >= NSIG || - *(unsigned int *)data == 0) - return(EINVAL); + *(unsigned int *)data == 0) { + error = EINVAL; + goto out; + } if ((tp->t_lflag&NOFLSH) == 0) ttyflush(tp, FREAD|FWRITE); pgsignal(tp->t_pgrp, *(unsigned int *)data, 1); if ((*(unsigned int *)data == SIGINFO) && ((tp->t_lflag&NOKERNINFO) == 0)) ttyinfo(tp); - return(0); + goto out; } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); - if (error < 0) - error = ttioctl(tp, cmd, data, flag, p); - if (error < 0) { - if (pti->pt_flags & PF_UCNTL && - (cmd & ~0xff) == UIOCCMD(0)) { + if (error == ENOTTY) { + error = ttioctl(tp, cmd, data, flag, p); + if (error == ENOTTY + && pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { + /* Process the UIOCMD ioctl group */ if (cmd & 0xff) { pti->pt_ucntl = (u_char)cmd; ptcwakeup(tp, FREAD); } - return (0); + error = 0; + goto out; } - error = ENOTTY; } + /* * If external processing and packet mode send ioctl packet. */ @@ -873,11 +969,11 @@ ptyioctl(dev, cmd, data, flag, p) case TIOCSETA: case TIOCSETAW: case TIOCSETAF: -#ifdef COMPAT_43 +#if COMPAT_43_TTY case TIOCSETP: case TIOCSETN: #endif -#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +#if COMPAT_43_TTY || defined(COMPAT_SUNOS) case TIOCSETC: case TIOCSLTC: case TIOCLBIS: @@ -907,6 +1003,8 @@ ptyioctl(dev, cmd, data, flag, p) ptcwakeup(tp, FREAD); } } +out: + (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } diff --git a/bsd/kern/tty_subr.c b/bsd/kern/tty_subr.c index 9bc116f30..1fcda738d 100644 --- a/bsd/kern/tty_subr.c +++ b/bsd/kern/tty_subr.c @@ -56,7 +56,6 @@ #include #include -#include #include #include #include @@ -89,7 +88,7 @@ * Initialize clists. */ void -cinit() +cinit(void) { } @@ -346,10 +345,7 @@ out: * clrbit(cp, off + len); */ void -clrbits(cp, off, len) - u_char *cp; - int off; - int len; +clrbits(u_char *cp, int off, int len) { int sby, sbi, eby, ebi; register int i; @@ -385,13 +381,10 @@ clrbits(cp, off, len) * Return number of bytes not transfered. */ int -b_to_q(cp, count, clp) - u_char *cp; - int count; - struct clist *clp; +b_to_q(const u_char *cp, int count, struct clist *clp) { - register int cc; - register u_char *p = cp; + int cc; + const u_char *p = cp; int s; if (count <= 0) diff --git a/bsd/kern/tty_tb.c b/bsd/kern/tty_tb.c index c04197e24..d4b8e4d62 100644 --- a/bsd/kern/tty_tb.c +++ b/bsd/kern/tty_tb.c @@ -38,7 +38,7 @@ #include #include #if NeXT -#include +#include #endif /* diff --git a/bsd/kern/tty_tty.c b/bsd/kern/tty_tty.c index 359d71096..8489c7766 100644 --- a/bsd/kern/tty_tty.c +++ b/bsd/kern/tty_tty.c @@ -62,10 +62,10 @@ #include #include #include -#include +#include #include -#include -#include +#include +#include #ifndef NeXT #include #ifdef DEVFS @@ -78,6 +78,18 @@ static d_write_t cttywrite; static d_ioctl_t cttyioctl; static d_select_t cttyselect; +#endif /* !NeXT */ + +/* Forward declarations for cdevsw[] entry */ +/* XXX we should consider making these static */ +int cttyopen(dev_t dev, int flag, int mode, struct proc *p); +int cttyread(dev_t dev, struct uio *uio, int flag); +int cttywrite(dev_t dev, struct uio *uio, int flag); +int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p); +int cttyselect(dev_t dev, int flag, void* wql, struct proc *p); + +#ifndef NeXT + #define CDEV_MAJOR 1 /* Don't make static, fdesc_vnops uses this. */ struct cdevsw ctty_cdevsw = @@ -91,80 +103,59 @@ struct cdevsw ctty_cdevsw = /*ARGSUSED*/ int -cttyopen(dev, flag, mode, p) - dev_t dev; - int flag, mode; - struct proc *p; +cttyopen(__unused dev_t dev, int flag, __unused int mode, struct proc *p) { struct vnode *ttyvp = cttyvp(p); + struct vfs_context context; int error; if (ttyvp == NULL) return (ENXIO); -#ifndef NeXT - VOP_LOCK(ttyvp); -#else - /* - * This is the only place that NeXT Guarding has been used for - * VOP_.*LOCK style calls. Note all of the other diffs should - * use the three paramater lock/unlock. - */ - vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); -#endif -#ifdef PARANOID - /* - * Since group is tty and mode is 620 on most terminal lines - * and since sessions protect terminals from processes outside - * your session, this check is probably no longer necessary. - * Since it inhibits setuid root programs that later switch - * to another user from accessing /dev/tty, we have decided - * to delete this test. (mckusick 5/93) - */ - error = VOP_ACCESS(ttyvp, - (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), p->p_ucred, p); - if (!error) -#endif /* PARANOID */ - error = VOP_OPEN(ttyvp, flag, NOCRED, p); - VOP_UNLOCK(ttyvp, 0, p); + context.vc_proc = p; + context.vc_ucred = p->p_ucred; + error = VNOP_OPEN(ttyvp, flag, &context); + return (error); } /*ARGSUSED*/ int -cttyread(dev, uio, flag) - dev_t dev; - struct uio *uio; - int flag; +cttyread(__unused dev_t dev, struct uio *uio, int flag) { - struct proc *p = uio->uio_procp; - register struct vnode *ttyvp = cttyvp(uio->uio_procp); + struct proc *p = current_proc(); + register struct vnode *ttyvp = cttyvp(p); + struct vfs_context context; int error; if (ttyvp == NULL) return (EIO); - vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_READ(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp, 0, p); + + context.vc_proc = p; + context.vc_ucred = NOCRED; + + error = VNOP_READ(ttyvp, uio, flag, &context); + return (error); } /*ARGSUSED*/ int -cttywrite(dev, uio, flag) - dev_t dev; - struct uio *uio; - int flag; +cttywrite(__unused dev_t dev, struct uio *uio, int flag) { - struct proc *p = uio->uio_procp; - register struct vnode *ttyvp = cttyvp(uio->uio_procp); + struct proc *p = current_proc(); + register struct vnode *ttyvp = cttyvp(p); + struct vfs_context context; int error; if (ttyvp == NULL) return (EIO); - vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_WRITE(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp, 0, p); + + context.vc_proc = p; + context.vc_ucred = NOCRED; + + error = VNOP_WRITE(ttyvp, uio, flag, &context); + return (error); } @@ -179,15 +170,12 @@ cttyioctl(dev, cmd, addr, flag, p) struct proc *p; #else int -cttyioctl(dev, cmd, addr, flag, p) - dev_t dev; - u_long cmd; - caddr_t addr; - int flag; - struct proc *p; +cttyioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, + struct proc *p) #endif /* !NeXT */ { struct vnode *ttyvp = cttyvp(p); + struct vfs_context context; if (ttyvp == NULL) return (EIO); @@ -200,22 +188,25 @@ cttyioctl(dev, cmd, addr, flag, p) } else return (EINVAL); } - return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p)); + context.vc_proc = p; + context.vc_ucred = NOCRED; + + return (VNOP_IOCTL(ttyvp, cmd, addr, flag, &context)); } /*ARGSUSED*/ int -cttyselect(dev, flag, wql, p) - dev_t dev; - int flag; - void * wql; - struct proc *p; +cttyselect(__unused dev_t dev, int flag, void* wql, struct proc *p) { struct vnode *ttyvp = cttyvp(p); + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = NOCRED; if (ttyvp == NULL) return (1); /* try operation to get EOF/failure */ - return (VOP_SELECT(ttyvp, flag, FREAD|FWRITE, NOCRED, wql, p)); + return (VNOP_SELECT(ttyvp, flag, FREAD|FWRITE, wql, &context)); } #ifndef NeXT diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 2f85056f6..1660c5b7a 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,18 +37,25 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include +#include +#include #include #include #include +#include +#include +#include +#include #include +#include +#include /* last */ #if DIAGNOSTIC #if defined(assert) @@ -60,79 +67,12 @@ #include #endif /* DIAGNOSTIC */ -struct zone *ubc_info_zone; - -/* lock for changes to struct UBC */ -static __inline__ void -ubc_lock(struct vnode *vp) -{ - /* For now, just use the v_interlock */ - simple_lock(&vp->v_interlock); -} - -/* unlock */ -static __inline__ void -ubc_unlock(struct vnode *vp) -{ - /* For now, just use the v_interlock */ - simple_unlock(&vp->v_interlock); -} - -/* - * Serialize the requests to the VM - * Returns: - * 0 - Failure - * 1 - Sucessful in acquiring the lock - * 2 - Sucessful in acquiring the lock recursively - * do not call ubc_unbusy() - * [This is strange, but saves 4 bytes in struct ubc_info] - */ -static int -ubc_busy(struct vnode *vp) -{ - register struct ubc_info *uip; - - if (!UBCINFOEXISTS(vp)) - return (0); - - uip = vp->v_ubcinfo; - - while (ISSET(uip->ui_flags, UI_BUSY)) { - - if (uip->ui_owner == (void *)current_act()) - return (2); - - SET(uip->ui_flags, UI_WANTED); - (void) tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "ubcbusy", 0); - - if (!UBCINFOEXISTS(vp)) - return (0); - } - uip->ui_owner = (void *)current_act(); - - SET(uip->ui_flags, UI_BUSY); - - return (1); -} +int ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize); +int ubc_umcallback(vnode_t, void *); +int ubc_isinuse_locked(vnode_t, int, int); +int ubc_msync_internal(vnode_t, off_t, off_t, off_t *, int, int *); -static void -ubc_unbusy(struct vnode *vp) -{ - register struct ubc_info *uip; - - if (!UBCINFOEXISTS(vp)) { - wakeup((caddr_t)&vp->v_ubcinfo); - return; - } - uip = vp->v_ubcinfo; - CLR(uip->ui_flags, UI_BUSY); - uip->ui_owner = (void *)NULL; - - if (ISSET(uip->ui_flags, UI_WANTED)) { - CLR(uip->ui_flags, UI_WANTED); - wakeup((caddr_t)&vp->v_ubcinfo); - } -} +struct zone *ubc_info_zone; /* * Initialization of the zone for Unified Buffer Cache. @@ -153,50 +93,35 @@ ubc_init() */ int ubc_info_init(struct vnode *vp) +{ + return(ubc_info_init_internal(vp, 0, 0)); +} +int +ubc_info_init_withsize(struct vnode *vp, off_t filesize) +{ + return(ubc_info_init_internal(vp, 1, filesize)); +} + +int +ubc_info_init_internal(struct vnode *vp, int withfsize, off_t filesize) { register struct ubc_info *uip; void * pager; - struct vattr vattr; struct proc *p = current_proc(); int error = 0; kern_return_t kret; memory_object_control_t control; - if (!UBCISVALID(vp)) - return (EINVAL); + uip = vp->v_ubcinfo; - ubc_lock(vp); - if (ISSET(vp->v_flag, VUINIT)) { - /* - * other thread is already doing this - * wait till done - */ - while (ISSET(vp->v_flag, VUINIT)) { - SET(vp->v_flag, VUWANT); /* XXX overloaded! */ - ubc_unlock(vp); - (void) tsleep((caddr_t)vp, PINOD, "ubcinfo", 0); - ubc_lock(vp); - } - ubc_unlock(vp); - return (0); - } else { - SET(vp->v_flag, VUINIT); - } + if (uip == UBC_INFO_NULL) { - uip = vp->v_ubcinfo; - if ((uip == UBC_INFO_NULL) || (uip == UBC_NOINFO)) { - ubc_unlock(vp); uip = (struct ubc_info *) zalloc(ubc_info_zone); - uip->ui_pager = MEMORY_OBJECT_NULL; - uip->ui_control = MEMORY_OBJECT_CONTROL_NULL; - uip->ui_flags = UI_INITED; + bzero((char *)uip, sizeof(struct ubc_info)); + uip->ui_vnode = vp; + uip->ui_flags = UI_INITED; uip->ui_ucred = NOCRED; - uip->ui_refcount = 1; - uip->ui_size = 0; - uip->ui_mapped = 0; - uip->ui_owner = (void *)NULL; - ubc_lock(vp); } #if DIAGNOSTIC else @@ -206,21 +131,17 @@ ubc_info_init(struct vnode *vp) assert(uip->ui_flags != UI_NONE); assert(uip->ui_vnode == vp); -#if 0 - if(ISSET(uip->ui_flags, UI_HASPAGER)) - goto done; -#endif /* 0 */ - /* now set this ubc_info in the vnode */ vp->v_ubcinfo = uip; - SET(uip->ui_flags, UI_HASPAGER); - ubc_unlock(vp); + pager = (void *)vnode_pager_setup(vp, uip->ui_pager); assert(pager); - ubc_setpager(vp, pager); + + SET(uip->ui_flags, UI_HASPAGER); + uip->ui_pager = pager; /* - * Note: We can not use VOP_GETATTR() to get accurate + * Note: We can not use VNOP_GETATTR() to get accurate * value of ui_size. Thanks to NFS. * nfs_getattr() can call vinvalbuf() and in this case * ubc_info is not set up to deal with that. @@ -244,25 +165,24 @@ ubc_info_init(struct vnode *vp) assert(control); uip->ui_control = control; /* cache the value of the mo control */ SET(uip->ui_flags, UI_HASOBJREF); /* with a named reference */ +#if 0 /* create a pager reference on the vnode */ error = vnode_pager_vget(vp); if (error) panic("ubc_info_init: vnode_pager_vget error = %d", error); - - /* initialize the size */ - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); - - ubc_lock(vp); - uip->ui_size = (error ? 0: vattr.va_size); - -done: - CLR(vp->v_flag, VUINIT); - if (ISSET(vp->v_flag, VUWANT)) { - CLR(vp->v_flag, VUWANT); - ubc_unlock(vp); - wakeup((caddr_t)vp); - } else - ubc_unlock(vp); +#endif + if (withfsize == 0) { + struct vfs_context context; + /* initialize the size */ + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + error = vnode_size(vp, &uip->ui_size, &context); + if (error) + uip->ui_size = 0; + } else { + uip->ui_size = filesize; + } + vp->v_lflag |= VNAMED_UBC; return (error); } @@ -271,16 +191,18 @@ done: static void ubc_info_free(struct ubc_info *uip) { - struct ucred *credp; + kauth_cred_t credp; credp = uip->ui_ucred; if (credp != NOCRED) { uip->ui_ucred = NOCRED; - crfree(credp); + kauth_cred_rele(credp); } if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL) memory_object_control_deallocate(uip->ui_control); + + cluster_release(uip); zfree(ubc_info_zone, (vm_offset_t)uip); return; @@ -289,20 +211,7 @@ ubc_info_free(struct ubc_info *uip) void ubc_info_deallocate(struct ubc_info *uip) { - - assert(uip->ui_refcount > 0); - - if (uip->ui_refcount-- == 1) { - struct vnode *vp; - - vp = uip->ui_vnode; - if (ISSET(uip->ui_flags, UI_WANTED)) { - CLR(uip->ui_flags, UI_WANTED); - wakeup((caddr_t)&vp->v_ubcinfo); - } - - ubc_info_free(uip); - } + ubc_info_free(uip); } /* @@ -321,9 +230,6 @@ ubc_setsize(struct vnode *vp, off_t nsize) if (nsize < (off_t)0) return (0); - if (UBCINVALID(vp)) - return (0); - if (!UBCINFOEXISTS(vp)) return (0); @@ -357,7 +263,7 @@ ubc_setsize(struct vnode *vp, off_t nsize) /* invalidate last page and old contents beyond nsize */ kret = memory_object_lock_request(control, (memory_object_offset_t)lastpg, - (memory_object_size_t)(olastpgend - lastpg), + (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH, VM_PROT_NO_CHANGE); if (kret != KERN_SUCCESS) @@ -369,7 +275,7 @@ ubc_setsize(struct vnode *vp, off_t nsize) /* flush the last page */ kret = memory_object_lock_request(control, (memory_object_offset_t)lastpg, - PAGE_SIZE_64, + PAGE_SIZE_64, NULL, NULL, MEMORY_OBJECT_RETURN_DIRTY, FALSE, VM_PROT_NO_CHANGE); @@ -377,7 +283,7 @@ ubc_setsize(struct vnode *vp, off_t nsize) /* invalidate last page and old contents beyond nsize */ kret = memory_object_lock_request(control, (memory_object_offset_t)lastpg, - (memory_object_size_t)(olastpgend - lastpg), + (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH, VM_PROT_NO_CHANGE); if (kret != KERN_SUCCESS) @@ -394,141 +300,50 @@ ubc_setsize(struct vnode *vp, off_t nsize) off_t ubc_getsize(struct vnode *vp) { - return (vp->v_ubcinfo->ui_size); -} - -/* - * Caller indicate that the object corresponding to the vnode - * can not be cached in object cache. Make it so. - * returns 1 on success, 0 on failure - */ -int -ubc_uncache(struct vnode *vp) -{ - kern_return_t kret; - struct ubc_info *uip; - int recursed; - memory_object_control_t control; - memory_object_perf_info_data_t perf; - - if (!UBCINFOEXISTS(vp)) - return (0); - - if ((recursed = ubc_busy(vp)) == 0) - return (0); - - uip = vp->v_ubcinfo; - - assert(uip != UBC_INFO_NULL); - - /* - * AGE it so that vfree() can make sure that it - * would get recycled soon after the last reference is gone - * This will insure that .nfs turds would not linger + /* people depend on the side effect of this working this way + * as they call this for directory */ - vagevp(vp); - - /* set the "do not cache" bit */ - SET(uip->ui_flags, UI_DONTCACHE); - - control = uip->ui_control; - assert(control); - - perf.cluster_size = PAGE_SIZE; /* XXX use real cluster_size. */ - perf.may_cache = FALSE; - kret = memory_object_change_attributes(control, - MEMORY_OBJECT_PERFORMANCE_INFO, - (memory_object_info_t) &perf, - MEMORY_OBJECT_PERF_INFO_COUNT); - - if (kret != KERN_SUCCESS) { - printf("ubc_uncache: memory_object_change_attributes_named " - "kret = %d", kret); - if (recursed == 1) - ubc_unbusy(vp); - return (0); - } - - ubc_release_named(vp); - - if (recursed == 1) - ubc_unbusy(vp); - return (1); + if (!UBCINFOEXISTS(vp)) + return ((off_t)0); + return (vp->v_ubcinfo->ui_size); } /* - * call ubc_clean() and ubc_uncache() on all the vnodes + * call ubc_sync_range(vp, 0, EOF, UBC_PUSHALL) on all the vnodes * for this mount point. * returns 1 on success, 0 on failure */ + __private_extern__ int ubc_umount(struct mount *mp) { - struct proc *p = current_proc(); - struct vnode *vp, *nvp; - int ret = 1; - -loop: - simple_lock(&mntvnode_slock); - for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { - if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); - goto loop; - } - nvp = vp->v_mntvnodes.le_next; - simple_unlock(&mntvnode_slock); - if (UBCINFOEXISTS(vp)) { - - /* - * Must get a valid reference on the vnode - * before callig UBC functions - */ - if (vget(vp, 0, p)) { - ret = 0; - simple_lock(&mntvnode_slock); - continue; /* move on to the next vnode */ - } - ret &= ubc_clean(vp, 0); /* do not invalidate */ - ret &= ubc_uncache(vp); - vrele(vp); - } - simple_lock(&mntvnode_slock); - } - simple_unlock(&mntvnode_slock); - return (ret); + vnode_iterate(mp, 0, ubc_umcallback, 0); + return(0); } -/* - * Call ubc_unmount() for all filesystems. - * The list is traversed in reverse order - * of mounting to avoid dependencies. - */ -__private_extern__ void -ubc_unmountall() +static int +ubc_umcallback(vnode_t vp, __unused void * args) { - struct mount *mp, *nmp; - /* - * Since this only runs when rebooting, it is not interlocked. - */ - for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_prev; - (void) ubc_umount(mp); + if (UBCINFOEXISTS(vp)) { + + cluster_push(vp, 0); + + (void) ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL, UBC_PUSHALL); } + return (VNODE_RETURNED); } + + /* Get the credentials */ -struct ucred * +kauth_cred_t ubc_getcred(struct vnode *vp) { - struct ubc_info *uip; - - uip = vp->v_ubcinfo; + if (UBCINFOEXISTS(vp)) + return (vp->v_ubcinfo->ui_ucred); - if (UBCINVALID(vp)) - return (NOCRED); - - return (uip->ui_ucred); + return (NOCRED); } /* @@ -540,18 +355,20 @@ int ubc_setcred(struct vnode *vp, struct proc *p) { struct ubc_info *uip; - struct ucred *credp; - - uip = vp->v_ubcinfo; + kauth_cred_t credp; - if (UBCINVALID(vp)) + if ( !UBCINFOEXISTS(vp)) return (0); + vnode_lock(vp); + + uip = vp->v_ubcinfo; credp = uip->ui_ucred; + if (credp == NOCRED) { - crhold(p->p_ucred); - uip->ui_ucred = p->p_ucred; + uip->ui_ucred = kauth_cred_proc_ref(p); } + vnode_unlock(vp); return (1); } @@ -560,14 +377,10 @@ ubc_setcred(struct vnode *vp, struct proc *p) __private_extern__ memory_object_t ubc_getpager(struct vnode *vp) { - struct ubc_info *uip; - - uip = vp->v_ubcinfo; - - if (UBCINVALID(vp)) - return (0); + if (UBCINFOEXISTS(vp)) + return (vp->v_ubcinfo->ui_pager); - return (uip->ui_pager); + return (0); } /* @@ -579,458 +392,217 @@ ubc_getpager(struct vnode *vp) */ memory_object_control_t -ubc_getobject(struct vnode *vp, int flags) +ubc_getobject(struct vnode *vp, __unused int flags) { - struct ubc_info *uip; - int recursed; - memory_object_control_t control; + if (UBCINFOEXISTS(vp)) + return((vp->v_ubcinfo->ui_control)); - if (UBCINVALID(vp)) - return (0); - - if (flags & UBC_FOR_PAGEOUT) - return(vp->v_ubcinfo->ui_control); - - if ((recursed = ubc_busy(vp)) == 0) - return (0); - - uip = vp->v_ubcinfo; - control = uip->ui_control; - - if ((flags & UBC_HOLDOBJECT) && (!ISSET(uip->ui_flags, UI_HASOBJREF))) { - - /* - * Take a temporary reference on the ubc info so that it won't go - * away during our recovery attempt. - */ - ubc_lock(vp); - uip->ui_refcount++; - ubc_unlock(vp); - if (memory_object_recover_named(control, TRUE) == KERN_SUCCESS) { - SET(uip->ui_flags, UI_HASOBJREF); - } else { - control = MEMORY_OBJECT_CONTROL_NULL; - } - if (recursed == 1) - ubc_unbusy(vp); - ubc_info_deallocate(uip); - - } else { - if (recursed == 1) - ubc_unbusy(vp); - } - - return (control); + return (0); } -/* Set the pager */ -int -ubc_setpager(struct vnode *vp, memory_object_t pager) -{ - struct ubc_info *uip; - - uip = vp->v_ubcinfo; - - if (UBCINVALID(vp)) - return (0); - - uip->ui_pager = pager; - return (1); -} - -int -ubc_setflags(struct vnode * vp, int flags) -{ - struct ubc_info *uip; - - if (UBCINVALID(vp)) - return (0); - - uip = vp->v_ubcinfo; - - SET(uip->ui_flags, flags); - - return (1); -} - -int -ubc_clearflags(struct vnode * vp, int flags) -{ - struct ubc_info *uip; - - if (UBCINVALID(vp)) - return (0); - - uip = vp->v_ubcinfo; - - CLR(uip->ui_flags, flags); - - return (1); -} - - -int -ubc_issetflags(struct vnode * vp, int flags) -{ - struct ubc_info *uip; - - if (UBCINVALID(vp)) - return (0); - - uip = vp->v_ubcinfo; - - return (ISSET(uip->ui_flags, flags)); -} off_t -ubc_blktooff(struct vnode *vp, daddr_t blkno) +ubc_blktooff(vnode_t vp, daddr64_t blkno) { off_t file_offset; int error; - if (UBCINVALID(vp)) - return ((off_t)-1); + if (UBCINVALID(vp)) + return ((off_t)-1); - error = VOP_BLKTOOFF(vp, blkno, &file_offset); + error = VNOP_BLKTOOFF(vp, blkno, &file_offset); if (error) file_offset = -1; return (file_offset); } -daddr_t -ubc_offtoblk(struct vnode *vp, off_t offset) +daddr64_t +ubc_offtoblk(vnode_t vp, off_t offset) { - daddr_t blkno; + daddr64_t blkno; int error = 0; - if (UBCINVALID(vp)) { - return ((daddr_t)-1); - } + if (UBCINVALID(vp)) + return ((daddr64_t)-1); - error = VOP_OFFTOBLK(vp, offset, &blkno); + error = VNOP_OFFTOBLK(vp, offset, &blkno); if (error) blkno = -1; return (blkno); } -/* - * Cause the file data in VM to be pushed out to the storage - * it also causes all currently valid pages to be released - * returns 1 on success, 0 on failure - */ int -ubc_clean(struct vnode *vp, int invalidate) +ubc_pages_resident(vnode_t vp) { - off_t size; - struct ubc_info *uip; - memory_object_control_t control; - kern_return_t kret; - int flags = 0; - - if (UBCINVALID(vp)) + kern_return_t kret; + boolean_t has_pages_resident; + + if ( !UBCINFOEXISTS(vp)) return (0); - - if (!UBCINFOEXISTS(vp)) + + kret = memory_object_pages_resident(vp->v_ubcinfo->ui_control, &has_pages_resident); + + if (kret != KERN_SUCCESS) return (0); + + if (has_pages_resident == TRUE) + return (1); + + return (0); +} - /* - * if invalidate was requested, write dirty data and then discard - * the resident pages - */ - if (invalidate) - flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE); - - uip = vp->v_ubcinfo; - size = uip->ui_size; /* call ubc_getsize() ??? */ - - control = uip->ui_control; - assert(control); - - cluster_release(vp); - vp->v_clen = 0; - - /* Write the dirty data in the file and discard cached pages */ - kret = memory_object_lock_request(control, - (memory_object_offset_t)0, - (memory_object_size_t)round_page_64(size), - MEMORY_OBJECT_RETURN_ALL, flags, - VM_PROT_NO_CHANGE); - - if (kret != KERN_SUCCESS) - printf("ubc_clean: clean failed (error = %d)\n", kret); - return ((kret == KERN_SUCCESS) ? 1 : 0); -} /* - * Cause the file data in VM to be pushed out to the storage - * currently valid pages are NOT invalidated - * returns 1 on success, 0 on failure + * This interface will eventually be deprecated + * + * clean and/or invalidate a range in the memory object that backs this + * vnode. The start offset is truncated to the page boundary and the + * size is adjusted to include the last page in the range. + * + * returns 1 for success, 0 for failure */ int -ubc_pushdirty(struct vnode *vp) +ubc_sync_range(vnode_t vp, off_t beg_off, off_t end_off, int flags) { - off_t size; - struct ubc_info *uip; - memory_object_control_t control; - kern_return_t kret; - - if (UBCINVALID(vp)) - return (0); - - if (!UBCINFOEXISTS(vp)) - return (0); - - uip = vp->v_ubcinfo; - size = uip->ui_size; /* call ubc_getsize() ??? */ - - control = uip->ui_control; - assert(control); - - vp->v_flag &= ~VHASDIRTY; - vp->v_clen = 0; - - /* Write the dirty data in the file and discard cached pages */ - kret = memory_object_lock_request(control, - (memory_object_offset_t)0, - (memory_object_size_t)round_page_64(size), - MEMORY_OBJECT_RETURN_DIRTY, FALSE, - VM_PROT_NO_CHANGE); - - if (kret != KERN_SUCCESS) - printf("ubc_pushdirty: flush failed (error = %d)\n", kret); - - return ((kret == KERN_SUCCESS) ? 1 : 0); + return (ubc_msync_internal(vp, beg_off, end_off, NULL, flags, NULL)); } + /* - * Cause the file data in VM to be pushed out to the storage - * currently valid pages are NOT invalidated - * returns 1 on success, 0 on failure + * clean and/or invalidate a range in the memory object that backs this + * vnode. The start offset is truncated to the page boundary and the + * size is adjusted to include the last page in the range. + * if a */ -int -ubc_pushdirty_range(struct vnode *vp, off_t offset, off_t size) +errno_t +ubc_msync(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags) { - struct ubc_info *uip; - memory_object_control_t control; - kern_return_t kret; - - if (UBCINVALID(vp)) - return (0); - - if (!UBCINFOEXISTS(vp)) - return (0); - - uip = vp->v_ubcinfo; + int retval; + int io_errno = 0; + + if (resid_off) + *resid_off = beg_off; - control = uip->ui_control; - assert(control); + retval = ubc_msync_internal(vp, beg_off, end_off, resid_off, flags, &io_errno); - /* Write any dirty pages in the requested range of the file: */ - kret = memory_object_lock_request(control, - (memory_object_offset_t)offset, - (memory_object_size_t)round_page_64(size), - MEMORY_OBJECT_RETURN_DIRTY, FALSE, - VM_PROT_NO_CHANGE); + if (retval == 0 && io_errno == 0) + return (EINVAL); + return (io_errno); +} - if (kret != KERN_SUCCESS) - printf("ubc_pushdirty_range: flush failed (error = %d)\n", kret); - return ((kret == KERN_SUCCESS) ? 1 : 0); -} /* - * Make sure the vm object does not vanish - * returns 1 if the hold count was incremented - * returns 0 if the hold count was not incremented - * This return value should be used to balance - * ubc_hold() and ubc_rele(). + * clean and/or invalidate a range in the memory object that backs this + * vnode. The start offset is truncated to the page boundary and the + * size is adjusted to include the last page in the range. */ -int -ubc_hold(struct vnode *vp) +static int +ubc_msync_internal(vnode_t vp, off_t beg_off, off_t end_off, off_t *resid_off, int flags, int *io_errno) { - struct ubc_info *uip; - int recursed; - memory_object_control_t object; - -retry: - - if (UBCINVALID(vp)) - return (0); + memory_object_size_t tsize; + kern_return_t kret; + int request_flags = 0; + int flush_flags = MEMORY_OBJECT_RETURN_NONE; + + if ( !UBCINFOEXISTS(vp)) + return (0); + if (end_off <= beg_off) + return (0); + if ((flags & (UBC_INVALIDATE | UBC_PUSHDIRTY | UBC_PUSHALL)) == 0) + return (0); + + if (flags & UBC_INVALIDATE) + /* + * discard the resident pages + */ + request_flags = (MEMORY_OBJECT_DATA_FLUSH | MEMORY_OBJECT_DATA_NO_CHANGE); - ubc_lock(vp); - if (ISSET(vp->v_flag, VUINIT)) { - /* - * other thread is not done initializing this - * yet, wait till it's done and try again + if (flags & UBC_SYNC) + /* + * wait for all the I/O to complete before returning */ - while (ISSET(vp->v_flag, VUINIT)) { - SET(vp->v_flag, VUWANT); /* XXX overloaded! */ - ubc_unlock(vp); - (void) tsleep((caddr_t)vp, PINOD, "ubchold", 0); - ubc_lock(vp); - } - ubc_unlock(vp); - goto retry; - } - ubc_unlock(vp); + request_flags |= MEMORY_OBJECT_IO_SYNC; - if ((recursed = ubc_busy(vp)) == 0) { - /* must be invalid or dying vnode */ - assert(UBCINVALID(vp) || - ((vp->v_flag & VXLOCK) || (vp->v_flag & VTERMINATE))); - return (0); - } + if (flags & UBC_PUSHDIRTY) + /* + * we only return the dirty pages in the range + */ + flush_flags = MEMORY_OBJECT_RETURN_DIRTY; - uip = vp->v_ubcinfo; - assert(uip->ui_control != MEMORY_OBJECT_CONTROL_NULL); - - ubc_lock(vp); - uip->ui_refcount++; - ubc_unlock(vp); - - if (!ISSET(uip->ui_flags, UI_HASOBJREF)) { - if (memory_object_recover_named(uip->ui_control, TRUE) - != KERN_SUCCESS) { - if (recursed == 1) - ubc_unbusy(vp); - ubc_info_deallocate(uip); - return (0); - } - SET(uip->ui_flags, UI_HASOBJREF); - } - if (recursed == 1) - ubc_unbusy(vp); + if (flags & UBC_PUSHALL) + /* + * then return all the interesting pages in the range (both dirty and precious) + * to the pager + */ + flush_flags = MEMORY_OBJECT_RETURN_ALL; - assert(uip->ui_refcount > 0); + beg_off = trunc_page_64(beg_off); + end_off = round_page_64(end_off); + tsize = (memory_object_size_t)end_off - beg_off; - return (1); + /* flush and/or invalidate pages in the range requested */ + kret = memory_object_lock_request(vp->v_ubcinfo->ui_control, + beg_off, tsize, resid_off, io_errno, + flush_flags, request_flags, VM_PROT_NO_CHANGE); + + return ((kret == KERN_SUCCESS) ? 1 : 0); } -/* - * Drop the holdcount. - * release the reference on the vm object if the this is "uncached" - * ubc_info. - */ -void -ubc_rele(struct vnode *vp) -{ - struct ubc_info *uip; - - if (UBCINVALID(vp)) - return; - - if (!UBCINFOEXISTS(vp)) { - /* nothing more to do for a dying vnode */ - if ((vp->v_flag & VXLOCK) || (vp->v_flag & VTERMINATE)) - return; - panic("ubc_rele: can not"); - } - - uip = vp->v_ubcinfo; - - if (uip->ui_refcount == 1) - panic("ubc_rele: ui_refcount"); - - --uip->ui_refcount; - - if ((uip->ui_refcount == 1) - && ISSET(uip->ui_flags, UI_DONTCACHE)) - (void) ubc_release_named(vp); - - return; -} /* * The vnode is mapped explicitly, mark it so. */ -__private_extern__ void -ubc_map(struct vnode *vp) +__private_extern__ int +ubc_map(vnode_t vp, int flags) { struct ubc_info *uip; + int error = 0; + int need_ref = 0; + struct vfs_context context; - if (UBCINVALID(vp)) - return; - - if (!UBCINFOEXISTS(vp)) - return; - - ubc_lock(vp); - uip = vp->v_ubcinfo; - - SET(uip->ui_flags, UI_WASMAPPED); - uip->ui_mapped = 1; - ubc_unlock(vp); + if (vnode_getwithref(vp)) + return (0); - return; -} + if (UBCINFOEXISTS(vp)) { + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); -/* - * Release the memory object reference on the vnode - * only if it is not in use - * Return 1 if the reference was released, 0 otherwise. - */ -int -ubc_release_named(struct vnode *vp) -{ - struct ubc_info *uip; - int recursed; - memory_object_control_t control; - kern_return_t kret = KERN_FAILURE; + error = VNOP_MMAP(vp, flags, &context); - if (UBCINVALID(vp)) - return (0); + if (error != EPERM) + error = 0; - if ((recursed = ubc_busy(vp)) == 0) - return (0); - uip = vp->v_ubcinfo; + if (error == 0) { + vnode_lock(vp); + + uip = vp->v_ubcinfo; - /* can not release held or mapped vnodes */ - if (ISSET(uip->ui_flags, UI_HASOBJREF) && - (uip->ui_refcount == 1) && !uip->ui_mapped) { - control = uip->ui_control; - assert(control); + if ( !ISSET(uip->ui_flags, UI_ISMAPPED)) + need_ref = 1; + SET(uip->ui_flags, (UI_WASMAPPED | UI_ISMAPPED)); - // XXXdbg - if (vp->v_flag & VDELETED) { - ubc_setsize(vp, (off_t)0); + vnode_unlock(vp); + + if (need_ref) + vnode_ref(vp); } - - CLR(uip->ui_flags, UI_HASOBJREF); - kret = memory_object_release_name(control, - MEMORY_OBJECT_RESPECT_CACHE); } + vnode_put(vp); - if (recursed == 1) - ubc_unbusy(vp); - return ((kret != KERN_SUCCESS) ? 0 : 1); -} - -/* - * This function used to called by extensions directly. Some may - * still exist with this behavior. In those cases, we will do the - * release as part of reclaiming or cleaning the vnode. We don't - * need anything explicit - so just stub this out until those callers - * get cleaned up. - */ -int -ubc_release( - struct vnode *vp) -{ - return 0; + return (error); } /* * destroy the named reference for a given vnode */ __private_extern__ int -ubc_destroy_named( - struct vnode *vp) +ubc_destroy_named(struct vnode *vp) { memory_object_control_t control; - struct proc *p; struct ubc_info *uip; kern_return_t kret; @@ -1046,10 +618,6 @@ ubc_destroy_named( uip = vp->v_ubcinfo; - /* can not destroy held vnodes */ - if (uip->ui_refcount > 1) - return (0); - /* * Terminate the memory object. * memory_object_destroy() will result in @@ -1060,6 +628,9 @@ ubc_destroy_named( control = ubc_getobject(vp, UBC_HOLDOBJECT); if (control != MEMORY_OBJECT_CONTROL_NULL) { + /* + * XXXXX - should we hold the vnode lock here? + */ if (ISSET(vp->v_flag, VTERMINATE)) panic("ubc_destroy_named: already teminating"); SET(vp->v_flag, VTERMINATE); @@ -1074,115 +645,83 @@ ubc_destroy_named( * wait for vnode_pager_no_senders() to clear * VTERMINATE */ - while (ISSET(vp->v_flag, VTERMINATE)) { - SET(vp->v_flag, VTERMWANT); - (void)tsleep((caddr_t)&vp->v_ubcinfo, + vnode_lock(vp); + while (ISSET(vp->v_lflag, VNAMED_UBC)) { + (void)msleep((caddr_t)&vp->v_lflag, &vp->v_lock, PINOD, "ubc_destroy_named", 0); } + vnode_unlock(vp); } return (1); } /* - * Invalidate a range in the memory object that backs this - * vnode. The offset is truncated to the page boundary and the - * size is adjusted to include the last page in the range. + * Find out whether a vnode is in use by UBC + * Returns 1 if file is in use by UBC, 0 if not */ int -ubc_invalidate(struct vnode *vp, off_t offset, size_t size) +ubc_isinuse(struct vnode *vp, int busycount) { - struct ubc_info *uip; - memory_object_control_t control; - kern_return_t kret; - off_t toff; - size_t tsize; - - if (UBCINVALID(vp)) + if ( !UBCINFOEXISTS(vp)) return (0); - - if (!UBCINFOEXISTS(vp)) - return (0); - - toff = trunc_page_64(offset); - tsize = (size_t)(round_page_64(offset+size) - toff); - uip = vp->v_ubcinfo; - control = uip->ui_control; - assert(control); - - /* invalidate pages in the range requested */ - kret = memory_object_lock_request(control, - (memory_object_offset_t)toff, - (memory_object_size_t)tsize, - MEMORY_OBJECT_RETURN_NONE, - (MEMORY_OBJECT_DATA_NO_CHANGE| MEMORY_OBJECT_DATA_FLUSH), - VM_PROT_NO_CHANGE); - if (kret != KERN_SUCCESS) - printf("ubc_invalidate: invalidate failed (error = %d)\n", kret); - - return ((kret == KERN_SUCCESS) ? 1 : 0); + return(ubc_isinuse_locked(vp, busycount, 0)); } -/* - * Find out whether a vnode is in use by UBC - * Returns 1 if file is in use by UBC, 0 if not - */ + int -ubc_isinuse(struct vnode *vp, int busycount) +ubc_isinuse_locked(struct vnode *vp, int busycount, int locked) { - if (!UBCINFOEXISTS(vp)) - return (0); + int retval = 0; - if (busycount == 0) { - printf("ubc_isinuse: called without a valid reference" - ": v_tag = %d\v", vp->v_tag); - vprint("ubc_isinuse", vp); - return (0); - } - if (vp->v_usecount > busycount+1) - return (1); + if (!locked) + vnode_lock(vp); - if ((vp->v_usecount == busycount+1) - && (vp->v_ubcinfo->ui_mapped == 1)) - return (1); - else - return (0); + if ((vp->v_usecount - vp->v_kusecount) > busycount) + retval = 1; + + if (!locked) + vnode_unlock(vp); + return (retval); } + /* - * The backdoor routine to clear the ui_mapped. * MUST only be called by the VM - * - * Note that this routine is not called under funnel. There are numerous - * things about the calling sequence that make this work on SMP. - * Any code change in those paths can break this. - * */ __private_extern__ void ubc_unmap(struct vnode *vp) { + struct vfs_context context; struct ubc_info *uip; - boolean_t funnel_state; - - if (UBCINVALID(vp)) - return; + int need_rele = 0; - if (!UBCINFOEXISTS(vp)) - return; + if (vnode_getwithref(vp)) + return; - ubc_lock(vp); - uip = vp->v_ubcinfo; - uip->ui_mapped = 0; - if ((uip->ui_refcount > 1) || !ISSET(uip->ui_flags, UI_DONTCACHE)) { - ubc_unlock(vp); - return; - } - ubc_unlock(vp); + if (UBCINFOEXISTS(vp)) { + vnode_lock(vp); - funnel_state = thread_funnel_set(kernel_flock, TRUE); - (void) ubc_release_named(vp); - (void) thread_funnel_set(kernel_flock, funnel_state); + uip = vp->v_ubcinfo; + if (ISSET(uip->ui_flags, UI_ISMAPPED)) { + CLR(uip->ui_flags, UI_ISMAPPED); + need_rele = 1; + } + vnode_unlock(vp); + + if (need_rele) { + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + (void)VNOP_MNOMAP(vp, &context); + + vnode_rele(vp); + } + } + /* + * the drop of the vnode ref will cleanup + */ + vnode_put(vp); } kern_return_t @@ -1254,7 +793,6 @@ ubc_create_upl( memory_object_control_t control; int count; int ubcflags; - off_t file_offset; kern_return_t kr; if (bufsize & 0xfff) @@ -1378,3 +916,46 @@ ubc_upl_pageinfo( { return (UPL_GET_INTERNAL_PAGE_LIST(upl)); } + +/************* UBC APIS **************/ + +int +UBCINFOMISSING(struct vnode * vp) +{ + return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo == UBC_INFO_NULL)); +} + +int +UBCINFORECLAIMED(struct vnode * vp) +{ + return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo == UBC_INFO_NULL)); +} + + +int +UBCINFOEXISTS(struct vnode * vp) +{ + return((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo != UBC_INFO_NULL)); +} +int +UBCISVALID(struct vnode * vp) +{ + return((vp) && ((vp)->v_type == VREG) && !((vp)->v_flag & VSYSTEM)); +} +int +UBCINVALID(struct vnode * vp) +{ + return(((vp) == NULL) || ((vp) && ((vp)->v_type != VREG)) + || ((vp) && ((vp)->v_flag & VSYSTEM))); +} +int +UBCINFOCHECK(const char * fun, struct vnode * vp) +{ + if ((vp) && ((vp)->v_type == VREG) && + ((vp)->v_ubcinfo == UBC_INFO_NULL)) { + panic("%s: lost ubc_info", (fun)); + return(1); + } else + return(0); +} + diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index efb0a3cd6..9be151def 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -64,13 +64,13 @@ #include #include #include -#include +#include #include #include #include -void pffasttimo __P((void *)); -void pfslowtimo __P((void *)); +void pffasttimo(void *); +void pfslowtimo(void *); /* * Add/delete 'domain': Link structure into system list, @@ -78,11 +78,21 @@ void pfslowtimo __P((void *)); * To delete, just remove from the list (dom_refs must be zero) */ +lck_grp_t *domain_proto_mtx_grp; +lck_attr_t *domain_proto_mtx_attr; +static lck_grp_attr_t *domain_proto_mtx_grp_attr; +lck_mtx_t *domain_proto_mtx; +extern int do_reclaim; void init_domain(register struct domain *dp) { struct protosw *pr; + if ((dp->dom_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) { + printf("init_domain: can't init domain mtx for domain=%s\n", dp->dom_name); + return; /* we have a problem... */ + } + if (dp->dom_init) (*dp->dom_init)(); @@ -109,6 +119,7 @@ void init_domain(register struct domain *dp) void concat_domain(struct domain *dp) { + lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_OWNED); dp->dom_next = domains; domains = dp; } @@ -116,33 +127,30 @@ void concat_domain(struct domain *dp) void net_add_domain(register struct domain *dp) { register struct protosw *pr; - register int s; - extern int splhigh(void); - extern int splx(int); kprintf("Adding domain %s (family %d)\n", dp->dom_name, dp->dom_family); /* First, link in the domain */ - s = splhigh(); + lck_mtx_lock(domain_proto_mtx); concat_domain(dp); init_domain(dp); + lck_mtx_unlock(domain_proto_mtx); - splx(s); } int net_del_domain(register struct domain *dp) { register struct domain *dp1, *dp2; - register int s, retval = 0; - extern int splhigh(void); - extern int splx(int); + register int retval = 0; + + lck_mtx_lock(domain_proto_mtx); - if (dp->dom_refs) + if (dp->dom_refs) { + lck_mtx_unlock(domain_proto_mtx); return(EBUSY); - - s = splhigh(); + } for (dp2 = NULL, dp1 = domains; dp1; dp2 = dp1, dp1 = dp1->dom_next) { if (dp == dp1) @@ -155,27 +163,24 @@ net_del_domain(register struct domain *dp) domains = dp1->dom_next; } else retval = EPFNOSUPPORT; - splx(s); + lck_mtx_unlock(domain_proto_mtx); return(retval); } /* * net_add_proto - link a protosw into a domain's protosw chain + * + * note: protocols must use their own domain lock before calling net_add_proto */ int net_add_proto(register struct protosw *pp, register struct domain *dp) { register struct protosw *pp1, *pp2; - register int s; - extern int splhigh(void); - extern int splx(int); - s = splhigh(); for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next) { if (pp1->pr_type == pp->pr_type && pp1->pr_protocol == pp->pr_protocol) { - splx(s); return(EEXIST); } pp2 = pp1; @@ -185,13 +190,12 @@ net_add_proto(register struct protosw *pp, else pp2->pr_next = pp; pp->pr_next = NULL; - TAILQ_INIT(&pp->pr_sfilter); + TAILQ_INIT(&pp->pr_filter_head); if (pp->pr_init) (*pp->pr_init)(); /* Make sure pr_init isn't called again!! */ pp->pr_init = 0; - splx(s); return(0); } @@ -199,17 +203,15 @@ net_add_proto(register struct protosw *pp, * net_del_proto - remove a protosw from a domain's protosw chain. * Search the protosw chain for the element with matching data. * Then unlink and return. + * + * note: protocols must use their own domain lock before calling net_del_proto */ int net_del_proto(register int type, register int protocol, register struct domain *dp) { register struct protosw *pp1, *pp2; - int s; - extern int splhigh(void); - extern int splx(int); - s = splhigh(); for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next) { if (pp1->pr_type == type && pp1->pr_protocol == protocol) @@ -217,14 +219,12 @@ net_del_proto(register int type, pp2 = pp1; } if (pp1 == NULL) { - splx(s); return(ENXIO); } if (pp2) pp2->pr_next = pp1->pr_next; else dp->dom_protosw = pp1->pr_next; - splx(s); return(0); } @@ -255,11 +255,30 @@ domaininit() extern struct domain keydomain; #endif + /* + * allocate lock group attribute and group for domain mutexes + */ + domain_proto_mtx_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(domain_proto_mtx_grp_attr); + + domain_proto_mtx_grp = lck_grp_alloc_init("domain", domain_proto_mtx_grp_attr); + + /* + * allocate the lock attribute for per domain mutexes + */ + domain_proto_mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(domain_proto_mtx_attr); + + if ((domain_proto_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) { + printf("domaininit: can't init domain mtx for domain list\n"); + return; /* we have a problem... */ + } /* * Add all the static domains to the domains list */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_mtx_lock(domain_proto_mtx); + concat_domain(&localdomain); concat_domain(&routedomain); concat_domain(&inetdomain); @@ -293,9 +312,9 @@ domaininit() for (dp = domains; dp; dp = dp->dom_next) init_domain(dp); + lck_mtx_unlock(domain_proto_mtx); timeout(pffasttimo, NULL, 1); timeout(pfslowtimo, NULL, 1); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); } struct protosw * @@ -305,14 +324,20 @@ pffindtype(family, type) register struct domain *dp; register struct protosw *pr; + lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(domain_proto_mtx); for (dp = domains; dp; dp = dp->dom_next) if (dp->dom_family == family) goto found; + lck_mtx_unlock(domain_proto_mtx); return (0); found: for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_type && pr->pr_type == type) + if (pr->pr_type && pr->pr_type == type) { + lck_mtx_unlock(domain_proto_mtx); return (pr); + } + lck_mtx_unlock(domain_proto_mtx); return (0); } @@ -320,18 +345,35 @@ struct domain * pffinddomain(int pf) { struct domain *dp; + lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(domain_proto_mtx); dp = domains; while (dp) - { if (dp->dom_family == pf) + { if (dp->dom_family == pf) { + lck_mtx_unlock(domain_proto_mtx); return(dp); + } dp = dp->dom_next; } + lck_mtx_unlock(domain_proto_mtx); return(NULL); } struct protosw * pffindproto(family, protocol, type) int family, protocol, type; +{ + register struct protosw *pr; + lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(domain_proto_mtx); + pr = pffindproto_locked(family, protocol, type); + lck_mtx_unlock(domain_proto_mtx); + return (pr); +} + +struct protosw * +pffindproto_locked(family, protocol, type) + int family, protocol, type; { register struct domain *dp; register struct protosw *pr; @@ -356,18 +398,12 @@ found: } int -net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, struct proc *p) { register struct domain *dp; register struct protosw *pr; - int family, protocol; + int family, protocol, error; /* * All sysctl names at this level are nonterminal; @@ -381,15 +417,21 @@ net_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) if (family == 0) return (0); + lck_mtx_lock(domain_proto_mtx); for (dp = domains; dp; dp = dp->dom_next) if (dp->dom_family == family) goto found; + lck_mtx_unlock(domain_proto_mtx); return (ENOPROTOOPT); found: for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_protocol == protocol && pr->pr_sysctl) - return ((*pr->pr_sysctl)(name + 2, namelen - 2, - oldp, oldlenp, newp, newlen)); + if (pr->pr_protocol == protocol && pr->pr_sysctl) { + error = (*pr->pr_sysctl)(name + 2, namelen - 2, + oldp, oldlenp, newp, newlen); + lck_mtx_unlock(domain_proto_mtx); + return (error); + } + lck_mtx_unlock(domain_proto_mtx); return (ENOPROTOOPT); } @@ -412,10 +454,13 @@ pfctlinput2(cmd, sa, ctlparam) if (!sa) return; + + lck_mtx_lock(domain_proto_mtx); for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr; pr = pr->pr_next) if (pr->pr_ctlinput) (*pr->pr_ctlinput)(cmd, sa, ctlparam); + lck_mtx_unlock(domain_proto_mtx); } void @@ -424,17 +469,19 @@ pfslowtimo(arg) { register struct domain *dp; register struct protosw *pr; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) + lck_mtx_lock(domain_proto_mtx); + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr; pr = pr->pr_next) { if (pr->pr_slowtimo) (*pr->pr_slowtimo)(); + if (do_reclaim && pr->pr_drain) + (*pr->pr_drain)(); + } + do_reclaim = 0; + lck_mtx_unlock(domain_proto_mtx); timeout(pfslowtimo, NULL, hz/2); - (void) thread_funnel_set(network_flock, FALSE); } void @@ -443,15 +490,12 @@ pffasttimo(arg) { register struct domain *dp; register struct protosw *pr; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); + lck_mtx_lock(domain_proto_mtx); for (dp = domains; dp; dp = dp->dom_next) for (pr = dp->dom_protosw; pr; pr = pr->pr_next) if (pr->pr_fasttimo) (*pr->pr_fasttimo)(); + lck_mtx_unlock(domain_proto_mtx); timeout(pffasttimo, NULL, hz/5); - - (void) thread_funnel_set(network_flock, FALSE); } diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 270534767..84100312a 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,10 +70,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -81,6 +81,9 @@ #include +extern vm_offset_t kmem_mb_alloc(vm_map_t , int ); +extern boolean_t PE_parse_boot_arg(const char *, void *); + #define _MCLREF(p) (++mclrefcnt[mtocl(p)]) #define _MCLUNREF(p) (--mclrefcnt[mtocl(p)] == 0) #define _M_CLEAR_PKTHDR(mbuf_ptr) (mbuf_ptr)->m_pkthdr.rcvif = NULL; \ @@ -89,24 +92,28 @@ (mbuf_ptr)->m_pkthdr.csum_flags = 0; \ (mbuf_ptr)->m_pkthdr.csum_data = 0; \ (mbuf_ptr)->m_pkthdr.aux = (struct mbuf*)NULL; \ - (mbuf_ptr)->m_pkthdr.reserved_1 = 0; \ (mbuf_ptr)->m_pkthdr.vlan_tag = 0; \ - (mbuf_ptr)->m_pkthdr.reserved2 = NULL; + (mbuf_ptr)->m_pkthdr.socket_id = 0; \ + SLIST_INIT(&(mbuf_ptr)->m_pkthdr.tags); -extern pmap_t kernel_pmap; /* The kernel's pmap */ /* kernel translater */ extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); -decl_simple_lock_data(, mbuf_slock); +lck_mtx_t * mbuf_mlock; +lck_grp_t * mbuf_mlock_grp; +lck_grp_attr_t * mbuf_mlock_grp_attr; +lck_attr_t * mbuf_mlock_attr; +extern lck_mtx_t *domain_proto_mtx; + struct mbuf *mfree; /* mbuf free list */ struct mbuf *mfreelater; /* mbuf deallocation list */ extern vm_map_t mb_map; /* special map */ int m_want; /* sleepers on mbufs */ -extern int nmbclusters; /* max number of mapped clusters */ short *mclrefcnt; /* mapped cluster reference counts */ int *mcl_paddr; static ppnum_t mcl_paddr_base; /* Handle returned by IOMapper::iovmAlloc() */ union mcluster *mclfree; /* mapped cluster free list */ +union mbigcluster *mbigfree; /* mapped cluster free list */ int max_linkhdr; /* largest link-level header */ int max_protohdr; /* largest protocol header */ int max_hdr; /* largest link+protocol header */ @@ -116,18 +123,32 @@ union mcluster *mbutl; /* first mapped cluster address */ union mcluster *embutl; /* ending virtual address of mclusters */ static int nclpp; /* # clusters per physical page */ -static char mbfail[] = "mbuf not mapped"; -static int m_howmany(); +static int m_howmany(int, size_t ); +void m_reclaim(void); +static int m_clalloc(const int , const int, const size_t, int); +int do_reclaim = 0; + +#define MF_NOWAIT 0x1 +#define MF_BIG 0x2 /* The number of cluster mbufs that are allocated, to start. */ #define MINCL max(16, 2) static int mbuf_expand_thread_wakeup = 0; static int mbuf_expand_mcl = 0; +static int mbuf_expand_big = 0; static int mbuf_expand_thread_initialized = 0; static void mbuf_expand_thread_init(void); +static void mbuf_expand_thread(void); +static int m_expand(int ); +static caddr_t m_bigalloc(int ); +static void m_bigfree(caddr_t , u_int , caddr_t ); +static struct mbuf * m_mbigget(struct mbuf *, int ); +void mbinit(void); +static void m_range_check(void *addr); + #if 0 static int mfree_munge = 0; @@ -176,25 +197,39 @@ munge_mbuf(struct mbuf *m) } -void -mbinit() +static void +m_range_check(void *addr) { - int s,m; + if (addr && (addr < (void *)mbutl || addr >= (void *)embutl)) + panic("mbuf address out of range 0x%x", addr); +} + +__private_extern__ void +mbinit(void) +{ + int m; int initmcl = 32; - int mcl_pages; + int mcl_pages; if (nclpp) return; nclpp = round_page_32(MCLBYTES) / MCLBYTES; /* see mbufgc() */ if (nclpp < 1) nclpp = 1; - MBUF_LOCKINIT(); -// NETISR_LOCKINIT(); + mbuf_mlock_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(mbuf_mlock_grp_attr); + + mbuf_mlock_grp = lck_grp_alloc_init("mbuf", mbuf_mlock_grp_attr); + mbuf_mlock_attr = lck_attr_alloc_init(); + lck_attr_setdefault(mbuf_mlock_attr); + + mbuf_mlock = lck_mtx_alloc_init(mbuf_mlock_grp, mbuf_mlock_attr); - mbstat.m_msize = MSIZE; - mbstat.m_mclbytes = MCLBYTES; - mbstat.m_minclsize = MINCLSIZE; - mbstat.m_mlen = MLEN; - mbstat.m_mhlen = MHLEN; + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + mbstat.m_bigmclbytes = NBPG; if (nmbclusters == 0) nmbclusters = NMBCLUSTERS; @@ -205,20 +240,20 @@ mbinit() for (m = 0; m < nmbclusters; m++) mclrefcnt[m] = -1; - /* Calculate the number of pages assigned to the cluster pool */ - mcl_pages = nmbclusters/(PAGE_SIZE/CLBYTES); + /* Calculate the number of pages assigned to the cluster pool */ + mcl_pages = nmbclusters/(NBPG/CLBYTES); MALLOC(mcl_paddr, int *, mcl_pages * sizeof(int), M_TEMP, M_WAITOK); if (mcl_paddr == 0) panic("mbinit1"); - /* Register with the I/O Bus mapper */ - mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages); + /* Register with the I/O Bus mapper */ + mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages); bzero((char *)mcl_paddr, mcl_pages * sizeof(int)); embutl = (union mcluster *)((unsigned char *)mbutl + (nmbclusters * MCLBYTES)); PE_parse_boot_arg("initmcl", &initmcl); - if (m_clalloc(max(PAGE_SIZE/CLBYTES, 1) * initmcl, M_WAIT) == 0) + if (m_clalloc(max(NBPG/CLBYTES, 1) * initmcl, M_WAIT, MCLBYTES, 0) == 0) goto bad; MBUF_UNLOCK(); @@ -232,108 +267,156 @@ bad: /* * Allocate some number of mbuf clusters * and place on cluster free list. + * Take the mbuf lock (if not already locked) and do not release it */ /* ARGSUSED */ -m_clalloc(ncl, nowait) - register int ncl; - int nowait; +static int +m_clalloc( + const int num, + const int nowait, + const size_t bufsize, + int locked) { - register union mcluster *mcl; - register int i; - vm_size_t size; - static char doing_alloc; + int i; + vm_size_t size = 0; + int numpages = 0; + vm_offset_t page = 0; + if (locked == 0) + MBUF_LOCK(); /* * Honor the caller's wish to block or not block. * We have a way to grow the pool asynchronously, * by kicking the dlil_input_thread. */ - if ((i = m_howmany()) <= 0) + i = m_howmany(num, bufsize); + if (i == 0 || nowait == M_DONTWAIT) goto out; - if ((nowait == M_DONTWAIT)) - goto out; + MBUF_UNLOCK(); + size = round_page_32(i * bufsize); + page = kmem_mb_alloc(mb_map, size); - if (ncl < i) - ncl = i; - size = round_page_32(ncl * MCLBYTES); - mcl = (union mcluster *)kmem_mb_alloc(mb_map, size); - - if (mcl == 0 && ncl > 1) { - size = round_page_32(MCLBYTES); /* Try for 1 if failed */ - mcl = (union mcluster *)kmem_mb_alloc(mb_map, size); + if (page == 0) { + size = NBPG; /* Try for 1 if failed */ + page = kmem_mb_alloc(mb_map, size); } + MBUF_LOCK(); - if (mcl) { - MBUF_LOCK(); - ncl = size / MCLBYTES; - for (i = 0; i < ncl; i++) { - if (++mclrefcnt[mtocl(mcl)] != 0) - panic("m_clalloc already there"); - if (((int)mcl & PAGE_MASK) == 0) { - ppnum_t offset = ((char *)mcl - (char *)mbutl)/PAGE_SIZE; - ppnum_t new_page = pmap_find_phys(kernel_pmap, (vm_address_t) mcl); - - /* - * In the case of no mapper being available - * the following code nops and returns the - * input page, if there is a mapper the I/O - * page appropriate is returned. - */ - new_page = IOMapperInsertPage(mcl_paddr_base, offset, new_page); - mcl_paddr[offset] = new_page << 12; - } - - mcl->mcl_next = mclfree; - mclfree = mcl++; + if (page) { + numpages = size / NBPG; + for (i = 0; i < numpages; i++, page += NBPG) { + if (((int)page & PGOFSET) == 0) { + ppnum_t offset = ((char *)page - (char *)mbutl)/NBPG; + ppnum_t new_page = pmap_find_phys(kernel_pmap, (vm_address_t) page); + + /* + * In the case of no mapper being available + * the following code nops and returns the + * input page, if there is a mapper the I/O + * page appropriate is returned. + */ + new_page = IOMapperInsertPage(mcl_paddr_base, offset, new_page); + mcl_paddr[offset] = new_page << 12; + } + if (bufsize == MCLBYTES) { + union mcluster *mcl = (union mcluster *)page; + + if (++mclrefcnt[mtocl(mcl)] != 0) + panic("m_clalloc already there"); + mcl->mcl_next = mclfree; + mclfree = mcl++; + if (++mclrefcnt[mtocl(mcl)] != 0) + panic("m_clalloc already there"); + mcl->mcl_next = mclfree; + mclfree = mcl++; + } else { + union mbigcluster *mbc = (union mbigcluster *)page; + + if (++mclrefcnt[mtocl(mbc)] != 0) + panic("m_clalloc already there"); + if (++mclrefcnt[mtocl(mbc) + 1] != 0) + panic("m_clalloc already there"); + + mbc->mbc_next = mbigfree; + mbigfree = mbc; + } + } + if (bufsize == MCLBYTES) { + int numcl = numpages << 1; + mbstat.m_clfree += numcl; + mbstat.m_clusters += numcl; + return (numcl); + } else { + mbstat.m_bigclfree += numpages; + mbstat.m_bigclusters += numpages; + return (numpages); } - mbstat.m_clfree += ncl; - mbstat.m_clusters += ncl; - return (ncl); } /* else ... */ out: - MBUF_LOCK(); - /* - * When non-blocking we kick the dlil thread if we havve to grow the + * When non-blocking we kick a thread if we havve to grow the * pool or if the number of free clusters is less than requested. */ - if ((nowait == M_DONTWAIT) && (i > 0 || ncl >= mbstat.m_clfree)) { - mbuf_expand_mcl = 1; - if (mbuf_expand_thread_initialized) - wakeup((caddr_t)&mbuf_expand_thread_wakeup); + if (bufsize == MCLBYTES) { + if (i > 0) { + /* Remember total number of clusters needed at this time */ + i += mbstat.m_clusters; + if (i > mbuf_expand_mcl) { + mbuf_expand_mcl = i; + if (mbuf_expand_thread_initialized) + wakeup((caddr_t)&mbuf_expand_thread_wakeup); + } + } + + if (mbstat.m_clfree >= num) + return 1; + } else { + if (i > 0) { + /* Remember total number of 4KB clusters needed at this time */ + i += mbstat.m_bigclusters; + if (i > mbuf_expand_big) { + mbuf_expand_big = i; + if (mbuf_expand_thread_initialized) + wakeup((caddr_t)&mbuf_expand_thread_wakeup); + } + } + + if (mbstat.m_bigclfree >= num) + return 1; } - - if (mbstat.m_clfree >= ncl) - return 1; - return 0; } /* * Add more free mbufs by cutting up a cluster. */ -m_expand(canwait) - int canwait; +static int +m_expand(int canwait) { - register caddr_t mcl; + caddr_t mcl; - if (mbstat.m_clfree < (mbstat.m_clusters >> 4)) - /* 1/16th of the total number of cluster mbufs allocated is - reserved for large packets. The number reserved must - always be < 1/2, or future allocation will be prevented. - */ - return 0; + if (mbstat.m_clfree < (mbstat.m_clusters >> 4)) { + /* + * 1/16th of the total number of cluster mbufs allocated is + * reserved for large packets. The number reserved must + * always be < 1/2, or future allocation will be prevented. + */ + (void)m_clalloc(1, canwait, MCLBYTES, 0); + MBUF_UNLOCK(); + if (mbstat.m_clfree < (mbstat.m_clusters >> 4)) + return 0; + } MCLALLOC(mcl, canwait); if (mcl) { - register struct mbuf *m = (struct mbuf *)mcl; - register int i = NMBPCL; + struct mbuf *m = (struct mbuf *)mcl; + int i = NMBPCL; MBUF_LOCK(); mbstat.m_mtypes[MT_FREE] += i; mbstat.m_mbufs += i; while (i--) { - _MFREE_MUNGE(m); + _MFREE_MUNGE(m); m->m_type = MT_FREE; m->m_next = mfree; mfree = m++; @@ -352,14 +435,12 @@ m_expand(canwait) * then re-attempt to allocate an mbuf. */ struct mbuf * -m_retry(canwait, type) - int canwait, type; +m_retry( + int canwait, + int type) { - register struct mbuf *m; - int wait, s; - funnel_t * fnl; - int fnl_switch = 0; - boolean_t funnel_state; + struct mbuf *m; + int wait; for (;;) { (void) m_expand(canwait); @@ -369,12 +450,13 @@ m_retry(canwait, type) (m)->m_type = (type); (m)->m_data = (m)->m_dat; (m)->m_flags = 0; + (m)->m_len = 0; } if (m || canwait == M_DONTWAIT) break; MBUF_LOCK(); wait = m_want++; - mbuf_expand_mcl = 1; + mbuf_expand_mcl++; if (wait == 0) mbstat.m_drain++; else @@ -384,25 +466,13 @@ m_retry(canwait, type) if (mbuf_expand_thread_initialized) wakeup((caddr_t)&mbuf_expand_thread_wakeup); - /* - * Need to be inside network funnel for m_reclaim because it calls into the - * socket domains and tsleep end-up calling splhigh - */ - fnl = thread_funnel_get(); - if (wait == 0 && fnl == network_flock) { + if (wait == 0) { m_reclaim(); - } else if (fnl != THR_FUNNEL_NULL) { - /* Sleep with a small timeout as insurance */ - (void) tsleep((caddr_t)&mfree, PZERO-1, "m_retry", hz); } else { - /* We are called from a non-BSD context: use mach primitives */ - u_int64_t abstime = 0; - - assert_wait((event_t)&mfree, THREAD_UNINT); - clock_interval_to_deadline(hz, NSEC_PER_SEC / hz, &abstime); - thread_set_timer_deadline(abstime); - if (thread_block(THREAD_CONTINUE_NULL) != THREAD_TIMED_OUT) - thread_cancel_timer(); + struct timespec ts; + ts.tv_sec = 1; + ts.tv_nsec = 0; + (void) msleep((caddr_t)&mfree, 0, (PZERO-1) | PDROP, "m_retry", &ts); } } if (m == 0) @@ -414,12 +484,14 @@ m_retry(canwait, type) * As above; retry an MGETHDR. */ struct mbuf * -m_retryhdr(canwait, type) - int canwait, type; +m_retryhdr( + int canwait, + int type) { - register struct mbuf *m; + struct mbuf *m; - if (m = m_retry(canwait, type)) { + if ((m = m_retry(canwait, type))) { + m->m_next = m->m_nextpkt = 0; m->m_flags |= M_PKTHDR; m->m_data = m->m_pktdat; _M_CLEAR_PKTHDR(m); @@ -427,15 +499,10 @@ m_retryhdr(canwait, type) return (m); } -m_reclaim() +void +m_reclaim(void) { - register struct domain *dp; - register struct protosw *pr; - - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_drain) - (*pr->pr_drain)(); + do_reclaim = 1; /* drain is performed in pfslowtimo(), to avoid deadlocks */ mbstat.m_drain++; } @@ -445,10 +512,15 @@ m_reclaim() * for critical paths. */ struct mbuf * -m_get(nowait, type) - int nowait, type; +m_get( + int nowait, + int type) { - register struct mbuf *m; + struct mbuf *m; + + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); _MINTGET(m, type); if (m) { @@ -456,17 +528,29 @@ m_get(nowait, type) m->m_type = type; m->m_data = m->m_dat; m->m_flags = 0; + m->m_len = 0; } else (m) = m_retry(nowait, type); + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + return (m); } struct mbuf * -m_gethdr(nowait, type) - int nowait, type; +m_gethdr( + int nowait, + int type) { - register struct mbuf *m; + struct mbuf *m; + + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + _MINTGET(m, type); if (m) { @@ -474,18 +558,25 @@ m_gethdr(nowait, type) m->m_type = type; m->m_data = m->m_pktdat; m->m_flags = M_PKTHDR; + m->m_len = 0; _M_CLEAR_PKTHDR(m) } else m = m_retryhdr(nowait, type); + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + return m; } struct mbuf * -m_getclr(nowait, type) - int nowait, type; +m_getclr( + int nowait, + int type) { - register struct mbuf *m; + struct mbuf *m; MGET(m, nowait, type); if (m == 0) @@ -495,11 +586,15 @@ m_getclr(nowait, type) } struct mbuf * -m_free(m) - struct mbuf *m; +m_free( + struct mbuf *m) { struct mbuf *n = m->m_next; - int i, s; + int i; + + m_range_check(m); + m_range_check(mfree); + m_range_check(mclfree); if (m->m_type == MT_FREE) panic("freeing free mbuf"); @@ -509,6 +604,8 @@ m_free(m) { m_freem(m->m_pkthdr.aux); } + if ((m->m_flags & M_PKTHDR) != 0) + m_tag_delete_chain(m, NULL); MBUF_LOCK(); if ((m->m_flags & M_EXT)) @@ -517,6 +614,9 @@ m_free(m) remque((queue_t)&m->m_ext.ext_refs); } else if (m->m_ext.ext_free == NULL) { union mcluster *mcl= (union mcluster *)m->m_ext.ext_buf; + + m_range_check(mcl); + if (_MCLUNREF(mcl)) { mcl->mcl_next = mclfree; mclfree = mcl; @@ -537,7 +637,7 @@ m_free(m) } mbstat.m_mtypes[m->m_type]--; (void) _MCLUNREF(m); - _MFREE_MUNGE(m); + _MFREE_MUNGE(m); m->m_type = MT_FREE; mbstat.m_mtypes[m->m_type]++; m->m_flags = 0; @@ -553,9 +653,9 @@ m_free(m) /* m_mclget() add an mbuf cluster to a normal mbuf */ struct mbuf * -m_mclget(m, nowait) - struct mbuf *m; - int nowait; +m_mclget( + struct mbuf *m, + int nowait) { MCLALLOC(m->m_ext.ext_buf, nowait); if (m->m_ext.ext_buf) { @@ -572,12 +672,12 @@ m_mclget(m, nowait) /* m_mclalloc() allocate an mbuf cluster */ caddr_t -m_mclalloc( nowait) - int nowait; +m_mclalloc( + int nowait) { caddr_t p; - (void)m_clalloc(1, nowait); + (void)m_clalloc(1, nowait, MCLBYTES, 0); if ((p = (caddr_t)mclfree)) { ++mclrefcnt[mtocl(p)]; mbstat.m_clfree--; @@ -587,16 +687,19 @@ m_mclalloc( nowait) } MBUF_UNLOCK(); - return p; + return p; } /* m_mclfree() releases a reference to a cluster allocated by MCLALLOC, * freeing the cluster if the reference count has reached 0. */ void -m_mclfree(p) - caddr_t p; +m_mclfree( + caddr_t p) { MBUF_LOCK(); + + m_range_check(p); + if (--mclrefcnt[mtocl(p)] == 0) { ((union mcluster *)(p))->mcl_next = mclfree; mclfree = (union mcluster *)(p); @@ -607,153 +710,450 @@ m_mclfree(p) /* mcl_hasreference() checks if a cluster of an mbuf is referenced by another mbuf */ int -m_mclhasreference(m) - struct mbuf *m; +m_mclhasreference( + struct mbuf *m) { return (m->m_ext.ext_refs.forward != &(m->m_ext.ext_refs)); } +__private_extern__ caddr_t +m_bigalloc(int nowait) +{ + caddr_t p; + + (void)m_clalloc(1, nowait, NBPG, 0); + if ((p = (caddr_t)mbigfree)) { + if (mclrefcnt[mtocl(p)] != mclrefcnt[mtocl(p) + 1]) + panic("m_bigalloc mclrefcnt %x mismatch %d != %d", + p, mclrefcnt[mtocl(p)], mclrefcnt[mtocl(p) + 1]); + if (mclrefcnt[mtocl(p)] || mclrefcnt[mtocl(p) + 1]) + panic("m_bigalloc mclrefcnt %x not null %d != %d", + p, mclrefcnt[mtocl(p)], mclrefcnt[mtocl(p) + 1]); + ++mclrefcnt[mtocl(p)]; + ++mclrefcnt[mtocl(p) + 1]; + mbstat.m_bigclfree--; + mbigfree = ((union mbigcluster *)p)->mbc_next; + } else { + mbstat.m_drops++; + } + MBUF_UNLOCK(); + return p; +} + +__private_extern__ void +m_bigfree(caddr_t p, __unused u_int size, __unused caddr_t arg) +{ + m_range_check(p); + + if (mclrefcnt[mtocl(p)] != mclrefcnt[mtocl(p) + 1]) + panic("m_bigfree mclrefcnt %x mismatch %d != %d", + p, mclrefcnt[mtocl(p)], mclrefcnt[mtocl(p) + 1]); + --mclrefcnt[mtocl(p)]; + --mclrefcnt[mtocl(p) + 1]; + if (mclrefcnt[mtocl(p)] == 0) { + ((union mbigcluster *)(p))->mbc_next = mbigfree; + mbigfree = (union mbigcluster *)(p); + mbstat.m_bigclfree++; + } +} + +/* m_mbigget() add an 4KB mbuf cluster to a normal mbuf */ +__private_extern__ struct mbuf * +m_mbigget(struct mbuf *m, int nowait) +{ + m->m_ext.ext_buf = m_bigalloc(nowait); + if (m->m_ext.ext_buf) { + m->m_data = m->m_ext.ext_buf; + m->m_flags |= M_EXT; + m->m_ext.ext_size = NBPG; + m->m_ext.ext_free = m_bigfree; + m->m_ext.ext_arg = 0; + m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = + &m->m_ext.ext_refs; + } + + return m; +} + + /* */ void -m_copy_pkthdr(to, from) - struct mbuf *to, *from; +m_copy_pkthdr( + struct mbuf *to, + struct mbuf *from) { to->m_pkthdr = from->m_pkthdr; from->m_pkthdr.aux = (struct mbuf *)NULL; + SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ to->m_flags = from->m_flags & M_COPYFLAGS; to->m_data = (to)->m_pktdat; } -/* Best effort to get a mbuf cluster + pkthdr under one lock. - * If we don't have them avail, just bail out and use the regular - * path. - * Used by drivers to allocated packets on receive ring. +/* + * "Move" mbuf pkthdr from "from" to "to". + * "from" must have M_PKTHDR set, and "to" must be empty. */ -struct mbuf * -m_getpacket(void) +#ifndef __APPLE__ +void +m_move_pkthdr(struct mbuf *to, struct mbuf *from) { - struct mbuf *m; - m_clalloc(1, M_DONTWAIT); /* takes the MBUF_LOCK, but doesn't release it... */ - if ((mfree != 0) && (mclfree != 0)) { /* mbuf + cluster are available */ - m = mfree; - mfree = m->m_next; - MCHECK(m); - ++mclrefcnt[mtocl(m)]; - mbstat.m_mtypes[MT_FREE]--; - mbstat.m_mtypes[MT_DATA]++; - m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */ - ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; - mbstat.m_clfree--; - mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; - - m->m_next = m->m_nextpkt = 0; - m->m_type = MT_DATA; - m->m_data = m->m_ext.ext_buf; - m->m_flags = M_PKTHDR | M_EXT; - _M_CLEAR_PKTHDR(m) - m->m_ext.ext_free = 0; - m->m_ext.ext_size = MCLBYTES; - m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = - &m->m_ext.ext_refs; - MBUF_UNLOCK(); - } - else { /* slow path: either mbuf or cluster need to be allocated anyway */ - MBUF_UNLOCK(); + KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster")); - MGETHDR(m, M_WAITOK, MT_DATA ); - - if ( m == 0 ) - return (NULL); - - MCLGET( m, M_WAITOK ); - if ( ( m->m_flags & M_EXT ) == 0 ) - { - m_free(m); m = 0; - } - } - return (m); + to->m_flags = from->m_flags & M_COPYFLAGS; + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; /* especially tags */ + SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ + from->m_flags &= ~M_PKTHDR; } +#endif +/* + * Duplicate "from"'s mbuf pkthdr in "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + * In particular, this does a deep copy of the packet tags. + */ +#ifndef __APPLE__ +int +m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) +{ + to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); + if ((to->m_flags & M_EXT) == 0) + to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; + SLIST_INIT(&to->m_pkthdr.tags); + return (m_tag_copy_chain(to, from, how)); +} +#endif /* * return a list of mbuf hdrs that point to clusters... - * try for num_needed, if this can't be met, return whatever + * try for num_needed, if wantall is not set, return whatever * number were available... set up the first num_with_pkthdrs * with mbuf hdrs configured as packet headers... these are * chained on the m_nextpkt field... any packets requested beyond * this are chained onto the last packet header's m_next field. + * The size of the cluster is controlled by the paramter bufsize. */ -struct mbuf * -m_getpackets(int num_needed, int num_with_pkthdrs, int how) +__private_extern__ struct mbuf * +m_getpackets_internal(unsigned int *num_needed, int num_with_pkthdrs, int how, int wantall, size_t bufsize) { struct mbuf *m; struct mbuf **np, *top; - + unsigned int num, needed = *num_needed; + + if (bufsize != MCLBYTES && bufsize != NBPG) + return 0; + top = NULL; np = ⊤ + + (void)m_clalloc(needed, how, bufsize, 0); /* takes the MBUF_LOCK, but doesn't release it... */ + + for (num = 0; num < needed; num++) { + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + if (mfree && ((bufsize == NBPG && mbigfree) || (bufsize == MCLBYTES && mclfree))) { + /* mbuf + cluster are available */ + m = mfree; + MCHECK(m); + mfree = m->m_next; + ++mclrefcnt[mtocl(m)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[MT_DATA]++; + if (bufsize == NBPG) { + m->m_ext.ext_buf = (caddr_t)mbigfree; /* get the big cluster */ + ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; + ++mclrefcnt[mtocl(m->m_ext.ext_buf) + 1]; + mbstat.m_bigclfree--; + mbigfree = ((union mbigcluster *)(m->m_ext.ext_buf))->mbc_next; + m->m_ext.ext_free = m_bigfree; + m->m_ext.ext_size = NBPG; + } else { + m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */ + ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; + mbstat.m_clfree--; + mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; + m->m_ext.ext_free = 0; + m->m_ext.ext_size = MCLBYTES; + } + m->m_ext.ext_arg = 0; + m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; + m->m_next = m->m_nextpkt = 0; + m->m_type = MT_DATA; + m->m_data = m->m_ext.ext_buf; + m->m_len = 0; - m_clalloc(num_needed, how); /* takes the MBUF_LOCK, but doesn't release it... */ + if (num_with_pkthdrs == 0) + m->m_flags = M_EXT; + else { + m->m_flags = M_PKTHDR | M_EXT; + _M_CLEAR_PKTHDR(m); + + num_with_pkthdrs--; + } + } else { + MBUF_UNLOCK(); + + if (num_with_pkthdrs == 0) { + MGET(m, how, MT_DATA ); + } else { + MGETHDR(m, how, MT_DATA); + + num_with_pkthdrs--; + } + if (m == 0) + goto fail; + + if (bufsize == NBPG) + m = m_mbigget(m, how); + else + m = m_mclget(m, how); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + goto fail; + } + MBUF_LOCK(); + } + *np = m; + + if (num_with_pkthdrs) + np = &m->m_nextpkt; + else + np = &m->m_next; + } + MBUF_UNLOCK(); + + *num_needed = num; + return (top); +fail: + if (wantall && top) { + m_freem(top); + return 0; + } + return top; +} - while (num_needed--) { - if (mfree && mclfree) { /* mbuf + cluster are available */ - m = mfree; - MCHECK(m); - mfree = m->m_next; - ++mclrefcnt[mtocl(m)]; - mbstat.m_mtypes[MT_FREE]--; - mbstat.m_mtypes[MT_DATA]++; - m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */ - ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; - mbstat.m_clfree--; - mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; - m->m_next = m->m_nextpkt = 0; - m->m_type = MT_DATA; - m->m_data = m->m_ext.ext_buf; - m->m_ext.ext_free = 0; - m->m_ext.ext_size = MCLBYTES; - m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; +/* + * Return list of mbuf linked by m_nextpkt + * Try for num_needed, and if wantall is not set, return whatever + * number were available + * The size of each mbuf in the list is controlled by the parameter packetlen. + * Each mbuf of the list may have a chain of mbufs linked by m_next. Each mbuf in + * the chain is called a segment. + * If maxsegments is not null and the value pointed to is not null, this specify + * the maximum number of segments for a chain of mbufs. + * If maxsegments is zero or the value pointed to is zero the + * caller does not have any restriction on the number of segments. + * The actual number of segments of a mbuf chain is return in the value pointed + * to by maxsegments. + * When possible the allocation is done under a single lock. + */ - if (num_with_pkthdrs == 0) - m->m_flags = M_EXT; - else { - m->m_flags = M_PKTHDR | M_EXT; - _M_CLEAR_PKTHDR(m); +__private_extern__ struct mbuf * +m_allocpacket_internal(unsigned int *num_needed, size_t packetlen, unsigned int * maxsegments, + int how, int wantall, size_t wantsize) +{ + struct mbuf **np, *top; + size_t bufsize; + unsigned int num; + unsigned int numchunks = 0; - num_with_pkthdrs--; + top = NULL; + np = ⊤ + + if (wantsize == 0) { + if (packetlen <= MINCLSIZE) + bufsize = packetlen; + else if (packetlen > MCLBYTES) + bufsize = NBPG; + else + bufsize = MCLBYTES; + } else if (wantsize == MCLBYTES || wantsize == NBPG) + bufsize = wantsize; + else + return 0; + + if (bufsize <= MHLEN) { + numchunks = 1; + } else if (bufsize <= MINCLSIZE) { + if (maxsegments != NULL && *maxsegments == 1) { + bufsize = MCLBYTES; + numchunks = 1; + } else { + numchunks = 2; } + } else if (bufsize == NBPG) { + numchunks = ((packetlen - 1) >> PGSHIFT) + 1; + } else { + numchunks = ((packetlen - 1) >> MCLSHIFT) + 1; + } + if (maxsegments != NULL) { + if (*maxsegments && numchunks > *maxsegments) { + *maxsegments = numchunks; + return 0; + } + *maxsegments = numchunks; + } + /* m_clalloc takes the MBUF_LOCK, but do not release it */ + (void)m_clalloc(numchunks, how, (bufsize == NBPG) ? NBPG : MCLBYTES, 0); + for (num = 0; num < *num_needed; num++) { + struct mbuf **nm, *pkt = 0; + size_t len; + + nm = &pkt; + + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + for (len = 0; len < packetlen; ) { + struct mbuf *m = NULL; + + if (wantsize == 0 && packetlen > MINCLSIZE) { + if (packetlen - len > MCLBYTES) + bufsize = NBPG; + else + bufsize = MCLBYTES; + } + len += bufsize; + + if (mfree && ((bufsize == NBPG && mbigfree) || (bufsize == MCLBYTES && mclfree))) { + /* mbuf + cluster are available */ + m = mfree; + MCHECK(m); + mfree = m->m_next; + ++mclrefcnt[mtocl(m)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[MT_DATA]++; + if (bufsize == NBPG) { + m->m_ext.ext_buf = (caddr_t)mbigfree; /* get the big cluster */ + ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; + ++mclrefcnt[mtocl(m->m_ext.ext_buf) + 1]; + mbstat.m_bigclfree--; + mbigfree = ((union mbigcluster *)(m->m_ext.ext_buf))->mbc_next; + m->m_ext.ext_free = m_bigfree; + m->m_ext.ext_size = NBPG; + } else { + m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */ + ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; + mbstat.m_clfree--; + mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; + m->m_ext.ext_free = 0; + m->m_ext.ext_size = MCLBYTES; + } + m->m_ext.ext_arg = 0; + m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; + m->m_next = m->m_nextpkt = 0; + m->m_type = MT_DATA; + m->m_data = m->m_ext.ext_buf; + m->m_len = 0; + + if (pkt == 0) { + pkt = m; + m->m_flags = M_PKTHDR | M_EXT; + _M_CLEAR_PKTHDR(m); + } else { + m->m_flags = M_EXT; + } + } else { + MBUF_UNLOCK(); + + if (pkt == 0) { + MGETHDR(m, how, MT_DATA); + } else { + MGET(m, how, MT_DATA ); + } + if (m == 0) { + m_freem(pkt); + goto fail; + } + if (bufsize <= MINCLSIZE) { + if (bufsize > MHLEN) { + MGET(m->m_next, how, MT_DATA); + if (m->m_next == 0) { + m_free(m); + m_freem(pkt); + goto fail; + } + } + } else { + if (bufsize == NBPG) + m = m_mbigget(m, how); + else + m = m_mclget(m, how); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + m_freem(pkt); + goto fail; + } + } + MBUF_LOCK(); + } + *nm = m; + nm = &m->m_next; + } + *np = pkt; + np = &pkt->m_nextpkt; + } + MBUF_UNLOCK(); + *num_needed = num; + + return top; +fail: + if (wantall && top) { + m_freem(top); + return 0; + } + *num_needed = num; + + return top; +} - } else { - MBUF_UNLOCK(); +/* Best effort to get a mbuf cluster + pkthdr under one lock. + * If we don't have them avail, just bail out and use the regular + * path. + * Used by drivers to allocated packets on receive ring. + */ +__private_extern__ struct mbuf * +m_getpacket_how(int how) +{ + unsigned int num_needed = 1; + + return m_getpackets_internal(&num_needed, 1, how, 1, MCLBYTES); +} - if (num_with_pkthdrs == 0) { - MGET(m, how, MT_DATA ); - } else { - MGETHDR(m, how, MT_DATA); +/* Best effort to get a mbuf cluster + pkthdr under one lock. + * If we don't have them avail, just bail out and use the regular + * path. + * Used by drivers to allocated packets on receive ring. + */ +struct mbuf * +m_getpacket(void) +{ + unsigned int num_needed = 1; - num_with_pkthdrs--; - } - if (m == 0) - return(top); - - MCLGET(m, how); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return(top); - } - MBUF_LOCK(); - } - *np = m; + return m_getpackets_internal(&num_needed, 1, M_WAITOK, 1, MCLBYTES); +} - if (num_with_pkthdrs) - np = &m->m_nextpkt; - else - np = &m->m_next; - } - MBUF_UNLOCK(); - return (top); +/* + * return a list of mbuf hdrs that point to clusters... + * try for num_needed, if this can't be met, return whatever + * number were available... set up the first num_with_pkthdrs + * with mbuf hdrs configured as packet headers... these are + * chained on the m_nextpkt field... any packets requested beyond + * this are chained onto the last packet header's m_next field. + */ +struct mbuf * +m_getpackets(int num_needed, int num_with_pkthdrs, int how) +{ + unsigned int n = num_needed; + + return m_getpackets_internal(&n, num_with_pkthdrs, how, 0, MCLBYTES); } @@ -773,7 +1173,11 @@ m_getpackethdrs(int num_needed, int how) MBUF_LOCK(); while (num_needed--) { - if (m = mfree) { /* mbufs are available */ + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + if ((m = mfree)) { /* mbufs are available */ MCHECK(m); mfree = m->m_next; ++mclrefcnt[mtocl(m)]; @@ -782,20 +1186,18 @@ m_getpackethdrs(int num_needed, int how) m->m_next = m->m_nextpkt = 0; m->m_type = MT_DATA; - m->m_flags = M_PKTHDR; + m->m_flags = M_PKTHDR; + m->m_len = 0; m->m_data = m->m_pktdat; _M_CLEAR_PKTHDR(m); } else { - MBUF_UNLOCK(); - - m = m_retryhdr(how, MT_DATA); - - if (m == 0) - return(top); - - MBUF_LOCK(); + MBUF_UNLOCK(); + m = m_retryhdr(how, MT_DATA); + if (m == 0) + return(top); + MBUF_LOCK(); } *np = m; np = &m->m_nextpkt; @@ -810,8 +1212,8 @@ m_getpackethdrs(int num_needed, int how) * returns the count for mbufs packets freed. Used by the drivers. */ int -m_freem_list(m) - struct mbuf *m; +m_freem_list( + struct mbuf *m) { struct mbuf *nextpkt; int i, count=0; @@ -830,6 +1232,12 @@ m_freem_list(m) struct mbuf *n; + m_range_check(m); + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + + /* Free the aux data if there is any */ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.aux) { /* @@ -845,6 +1253,13 @@ m_freem_list(m) m = nextpkt->m_pkthdr.aux; nextpkt->m_pkthdr.aux = NULL; } + + if ((m->m_flags & M_PKTHDR) != 0 && !SLIST_EMPTY(&m->m_pkthdr.tags)) { + /* A quick (albeit inefficient) expedient */ + MBUF_UNLOCK(); + m_tag_delete_chain(m, NULL); + MBUF_LOCK(); + } n = m->m_next; @@ -858,6 +1273,9 @@ m_freem_list(m) remque((queue_t)&m->m_ext.ext_refs); } else if (m->m_ext.ext_free == NULL) { union mcluster *mcl= (union mcluster *)m->m_ext.ext_buf; + + m_range_check(mcl); + if (_MCLUNREF(mcl)) { mcl->mcl_next = mclfree; mclfree = mcl; @@ -881,20 +1299,20 @@ m_freem_list(m) } m = nextpkt; /* bump m with saved nextpkt if any */ } - if (i = m_want) - m_want = 0; + if ((i = m_want)) + m_want = 0; MBUF_UNLOCK(); if (i) - wakeup((caddr_t)&mfree); + wakeup((caddr_t)&mfree); return (count); } void -m_freem(m) - register struct mbuf *m; +m_freem( + struct mbuf *m) { while (m) m = m_free(m); @@ -907,8 +1325,9 @@ m_freem(m) * Compute the amount of space available * before the current start of data in an mbuf. */ -m_leadingspace(m) -register struct mbuf *m; +int +m_leadingspace( + struct mbuf *m) { if (m->m_flags & M_EXT) { if (MCLHASREFERENCE(m)) @@ -924,8 +1343,9 @@ register struct mbuf *m; * Compute the amount of space available * after the end of data in an mbuf. */ -m_trailingspace(m) -register struct mbuf *m; +int +m_trailingspace( + struct mbuf *m) { if (m->m_flags & M_EXT) { if (MCLHASREFERENCE(m)) @@ -943,9 +1363,10 @@ register struct mbuf *m; * Does not adjust packet header length. */ struct mbuf * -m_prepend(m, len, how) - register struct mbuf *m; - int len, how; +m_prepend( + struct mbuf *m, + int len, + int how) { struct mbuf *mn; @@ -973,9 +1394,10 @@ m_prepend(m, len, how) * */ struct mbuf * -m_prepend_2(m, len, how) - register struct mbuf *m; - int len, how; +m_prepend_2( + struct mbuf *m, + int len, + int how) { if (M_LEADINGSPACE(m) >= len) { m->m_data -= len; @@ -996,13 +1418,14 @@ m_prepend_2(m, len, how) int MCFail; struct mbuf * -m_copym(m, off0, len, wait) - register struct mbuf *m; - int off0, wait; - register int len; +m_copym( + struct mbuf *m, + int off0, + int len, + int wait) { - register struct mbuf *n, **np; - register int off = off0; + struct mbuf *n, **np; + int off = off0; struct mbuf *top; int copyhdr = 0; @@ -1023,20 +1446,24 @@ m_copym(m, off0, len, wait) MBUF_LOCK(); while (len > 0) { + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + if (m == 0) { if (len != M_COPYALL) panic("m_copym"); break; } - if (n = mfree) { - MCHECK(n); - ++mclrefcnt[mtocl(n)]; + if ((n = mfree)) { + MCHECK(n); + ++mclrefcnt[mtocl(n)]; mbstat.m_mtypes[MT_FREE]--; mbstat.m_mtypes[m->m_type]++; mfree = n->m_next; n->m_next = n->m_nextpkt = 0; n->m_type = m->m_type; - n->m_data = n->m_dat; + n->m_data = n->m_dat; n->m_flags = 0; } else { MBUF_UNLOCK(); @@ -1105,15 +1532,16 @@ nospace: * rescan the entire mbuf list (normally hung off of the socket) */ struct mbuf * -m_copym_with_hdrs(m, off0, len, wait, m_last, m_off) - register struct mbuf *m; - int off0, wait; - register int len; - struct mbuf **m_last; - int *m_off; +m_copym_with_hdrs( + struct mbuf *m, + int off0, + int len, + int wait, + struct mbuf **m_last, + int *m_off) { - register struct mbuf *n, **np; - register int off = off0; + struct mbuf *n, **np = 0; + int off = off0; struct mbuf *top = 0; int copyhdr = 0; int type; @@ -1130,9 +1558,14 @@ m_copym_with_hdrs(m, off0, len, wait, m_last, m_off) m = m->m_next; } } + MBUF_LOCK(); while (len > 0) { + m_range_check(mfree); + m_range_check(mclfree); + m_range_check(mbigfree); + if (top == 0) type = MT_HEADER; else { @@ -1140,7 +1573,7 @@ m_copym_with_hdrs(m, off0, len, wait, m_last, m_off) panic("m_gethdr_and_copym"); type = m->m_type; } - if (n = mfree) { + if ((n = mfree)) { MCHECK(n); ++mclrefcnt[mtocl(n)]; mbstat.m_mtypes[MT_FREE]--; @@ -1223,13 +1656,13 @@ nospace: * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. */ -void m_copydata(m, off, len, cp) - register struct mbuf *m; - register int off; - register int len; - caddr_t cp; +void m_copydata( + struct mbuf *m, + int off, + int len, + caddr_t cp) { - register unsigned count; + unsigned count; if (off < 0 || len < 0) panic("m_copydata"); @@ -1258,8 +1691,8 @@ void m_copydata(m, off, len, cp) * Both chains must be of the same type (e.g. MT_DATA). * Any m_pkthdr is not updated. */ -void m_cat(m, n) - register struct mbuf *m, *n; +void m_cat( + struct mbuf *m, struct mbuf *n) { while (m->m_next) m = m->m_next; @@ -1279,13 +1712,13 @@ void m_cat(m, n) } void -m_adj(mp, req_len) - struct mbuf *mp; - int req_len; +m_adj( + struct mbuf *mp, + int req_len) { - register int len = req_len; - register struct mbuf *m; - register count; + int len = req_len; + struct mbuf *m; + int count; if ((m = mp) == NULL) return; @@ -1348,7 +1781,7 @@ m_adj(mp, req_len) } count -= m->m_len; } - while (m = m->m_next) + while ((m = m->m_next)) m->m_len = 0; } } @@ -1364,12 +1797,12 @@ m_adj(mp, req_len) int MPFail; struct mbuf * -m_pullup(n, len) - register struct mbuf *n; - int len; +m_pullup( + struct mbuf *n, + int len) { - register struct mbuf *m; - register int count; + struct mbuf *m; + int count; int space; /* @@ -1428,11 +1861,12 @@ bad: * attempts to restore the chain to its original state. */ struct mbuf * -m_split(m0, len0, wait) - register struct mbuf *m0; - int len0, wait; +m_split( + struct mbuf *m0, + int len0, + int wait) { - register struct mbuf *m, *n; + struct mbuf *m, *n; unsigned len = len0, remain; for (m = m0; m && len > m->m_len; m = m->m_next) @@ -1491,16 +1925,17 @@ extpacket: * Routine to copy from device local memory into mbufs. */ struct mbuf * -m_devget(buf, totlen, off0, ifp, copy) - char *buf; - int totlen, off0; - struct ifnet *ifp; - void (*copy)(); +m_devget( + char *buf, + int totlen, + int off0, + struct ifnet *ifp, + void (*copy)(const void *, void *, size_t)) { - register struct mbuf *m; + struct mbuf *m; struct mbuf *top = 0, **mp = ⊤ - register int off = off0, len; - register char *cp; + int off = off0, len; + char *cp; char *epkt; cp = buf; @@ -1571,35 +2006,61 @@ m_devget(buf, totlen, off0, ifp, copy) * Ensure hysteresis between hi/lo. */ static int -m_howmany() +m_howmany(int num, size_t bufsize) { - register int i; - - /* Under minimum */ - if (mbstat.m_clusters < MINCL) - return (MINCL - mbstat.m_clusters); - /* Too few (free < 1/2 total) and not over maximum */ - if (mbstat.m_clusters < nmbclusters && - (i = ((mbstat.m_clusters >> 1) - mbstat.m_clfree)) > 0) - return i; - return 0; + int i = 0; + + /* Bail if we've maxed out the mbuf memory map */ + if (mbstat.m_clusters + (mbstat.m_bigclusters << 1) < nmbclusters) { + int j = 0; + + if (bufsize == MCLBYTES) { + /* Under minimum */ + if (mbstat.m_clusters < MINCL) + return (MINCL - mbstat.m_clusters); + /* Too few (free < 1/2 total) and not over maximum */ + if (mbstat.m_clusters < (nmbclusters >> 1)) { + if (num >= mbstat.m_clfree) + i = num - mbstat.m_clfree; + if (((mbstat.m_clusters + num) >> 1) > mbstat.m_clfree) + j = ((mbstat.m_clusters + num) >> 1) - mbstat.m_clfree; + i = max(i, j); + if (i + mbstat.m_clusters >= (nmbclusters >> 1)) + i = (nmbclusters >> 1) - mbstat.m_clusters; + } + } else { + /* Under minimum */ + if (mbstat.m_bigclusters < MINCL) + return (MINCL - mbstat.m_bigclusters); + /* Too few (free < 1/2 total) and not over maximum */ + if (mbstat.m_bigclusters < (nmbclusters >> 2)) { + if (num >= mbstat.m_bigclfree) + i = num - mbstat.m_bigclfree; + if (((mbstat.m_bigclusters + num) >> 1) > mbstat.m_bigclfree) + j = ((mbstat.m_bigclusters + num) >> 1) - mbstat.m_bigclfree; + i = max(i, j); + if (i + mbstat.m_bigclusters >= (nmbclusters >> 2)) + i = (nmbclusters >> 2) - mbstat.m_bigclusters; + } + } + } + return i; } - /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void -m_copyback(m0, off, len, cp) - struct mbuf *m0; - register int off; - register int len; - caddr_t cp; +m_copyback( + struct mbuf *m0, + int off, + int len, + caddr_t cp) { - register int mlen; - register struct mbuf *m = m0, *n; + int mlen; + struct mbuf *m = m0, *n; int totlen = 0; if (m0 == 0) @@ -1640,16 +2101,16 @@ out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) } -char *mcl_to_paddr(register char *addr) { - register int base_phys; +char *mcl_to_paddr(char *addr) { + int base_phys; if (addr < (char *)mbutl || addr >= (char *)embutl) return (0); - base_phys = mcl_paddr[(addr - (char *)mbutl) >> PAGE_SHIFT]; + base_phys = mcl_paddr[(addr - (char *)mbutl) >> PGSHIFT]; if (base_phys == 0) return (0); - return ((char *)((int)base_phys | ((int)addr & PAGE_MASK))); + return ((char *)((int)base_phys | ((int)addr & PGOFSET))); } /* @@ -1663,8 +2124,9 @@ char *mcl_to_paddr(register char *addr) { int MDFail; struct mbuf * -m_dup(register struct mbuf *m, int how) -{ register struct mbuf *n, **np; +m_dup(struct mbuf *m, int how) +{ + struct mbuf *n, **np; struct mbuf *top; int copyhdr = 0; @@ -1684,16 +2146,16 @@ m_dup(register struct mbuf *m, int how) { if ((n = m_gethdr(how, m->m_type)) == NULL) return(NULL); n->m_len = m->m_len; - n->m_flags |= (m->m_flags & M_COPYFLAGS); - n->m_pkthdr.len = m->m_pkthdr.len; - n->m_pkthdr.rcvif = m->m_pkthdr.rcvif; - n->m_pkthdr.header = NULL; - n->m_pkthdr.csum_flags = 0; - n->m_pkthdr.csum_data = 0; - n->m_pkthdr.aux = NULL; - n->m_pkthdr.vlan_tag = 0; - n->m_pkthdr.reserved_1 = 0; - n->m_pkthdr.reserved2 = 0; + n->m_flags |= (m->m_flags & M_COPYFLAGS); + n->m_pkthdr.len = m->m_pkthdr.len; + n->m_pkthdr.rcvif = m->m_pkthdr.rcvif; + n->m_pkthdr.header = NULL; + n->m_pkthdr.csum_flags = 0; + n->m_pkthdr.csum_data = 0; + n->m_pkthdr.aux = NULL; + n->m_pkthdr.vlan_tag = 0; + n->m_pkthdr.socket_id = 0; + SLIST_INIT(&n->m_pkthdr.tags); bcopy(m->m_data, n->m_data, m->m_pkthdr.len); return(n); } @@ -1805,29 +2267,54 @@ void m_mcheck(struct mbuf *m) panic("mget MCHECK: m_type=%x m=%x", m->m_type, m); } -void +static void mbuf_expand_thread(void) { - while (1) { - int expand_mcl; - MBUF_LOCK(); - expand_mcl = mbuf_expand_mcl; - mbuf_expand_mcl = 0; - MBUF_UNLOCK(); - if (expand_mcl) { - caddr_t p; - MCLALLOC(p, M_WAIT); - if (p) MCLFREE(p); + while (1) { + MBUF_LOCK(); + if (mbuf_expand_mcl) { + int n; + + /* Adjust to the current number of cluster in use */ + n = mbuf_expand_mcl - (mbstat.m_clusters - mbstat.m_clfree); + mbuf_expand_mcl = 0; + + if (n > 0) + (void)m_clalloc(n, M_WAIT, MCLBYTES, 1); + } + if (mbuf_expand_big) { + int n; + + /* Adjust to the current number of 4 KB cluster in use */ + n = mbuf_expand_big - (mbstat.m_bigclusters - mbstat.m_bigclfree); + mbuf_expand_big = 0; + + if (n > 0) + (void)m_clalloc(n, M_WAIT, NBPG, 1); } - assert_wait(&mbuf_expand_thread_wakeup, THREAD_UNINT); - (void) thread_block(mbuf_expand_thread); - } + MBUF_UNLOCK(); + /* + * Because we can run out of memory before filling the mbuf map, we + * should not allocate more clusters than they are mbufs -- otherwise + * we could have a large number of useless clusters allocated. + */ + while (mbstat.m_mbufs < mbstat.m_bigclusters + mbstat.m_clusters) { + if (m_expand(M_WAIT) == 0) + break; + } + + assert_wait(&mbuf_expand_thread_wakeup, THREAD_UNINT); + (void) thread_block((thread_continue_t)mbuf_expand_thread); + } } -void +static void mbuf_expand_thread_init(void) { mbuf_expand_thread_initialized++; mbuf_expand_thread(); } +SYSCTL_DECL(_kern_ipc); +SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, ""); + diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c index ef742b340..a8c8652b2 100644 --- a/bsd/kern/uipc_mbuf2.c +++ b/bsd/kern/uipc_mbuf2.c @@ -90,7 +90,7 @@ #include #include -#include +#include #include #include #if defined(PULLDOWN_STAT) && defined(INET6) @@ -279,17 +279,9 @@ m_pulldown(m, off, len, offp) if ((n->m_flags & M_EXT) == 0) sharedcluster = 0; else { -#ifdef __bsdi__ - if (n->m_ext.ext_func) -#else if (n->m_ext.ext_free) -#endif sharedcluster = 1; -#ifdef __NetBSD__ - else if (MCLISREFERENCED(n)) -#else - else if (mclrefcnt[mtocl(n->m_ext.ext_buf)] > 1) -#endif + else if (m_mclhasreference(n)) sharedcluster = 1; else sharedcluster = 0; @@ -440,3 +432,180 @@ m_aux_delete(m, victim) n = next; } } + +/* Get a packet tag structure along with specified data following. */ +struct m_tag * +m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait) +{ + struct m_tag *t; + + if (len < 0) + return NULL; +#ifndef __APPLE__ + t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); +#else + /*MALLOC(t, struct m_tag *, len + sizeof(struct m_tag), M_TEMP, M_WAITOK);*/ + if (len + sizeof(struct m_tag) <= MLEN) { + struct mbuf *m = m_get(wait, MT_TAG); + if (m == NULL) + return NULL; + t = (struct m_tag *) m->m_dat; + } else if (len + sizeof(struct m_tag) <= MCLBYTES) { + MCLALLOC((caddr_t)t, wait); + } else + t = NULL; +#endif + if (t == NULL) + return NULL; + t->m_tag_type = type; + t->m_tag_len = len; + t->m_tag_id = id; + return t; +} + + +/* Free a packet tag. */ +void +m_tag_free(struct m_tag *t) +{ +#ifndef __APPLE__ + free(t, M_PACKET_TAGS); +#else + /* FREE(t, M_TEMP); */ + if (t == NULL) + return; + if (t->m_tag_len <= MLEN) { + struct mbuf * m = m_dtom(t); + m_free(m); + } else { + MCLFREE((caddr_t)t); + } +#endif +} + +/* Prepend a packet tag. */ +void +m_tag_prepend(struct mbuf *m, struct m_tag *t) +{ + KASSERT(m && t, ("m_tag_prepend: null argument, m %p t %p", m, t)); + SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); +} + +/* Unlink a packet tag. */ +void +m_tag_unlink(struct mbuf *m, struct m_tag *t) +{ + KASSERT(m && t, ("m_tag_unlink: null argument, m %p t %p", m, t)); + SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); +} + +/* Unlink and free a packet tag. */ +void +m_tag_delete(struct mbuf *m, struct m_tag *t) +{ + KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t)); + m_tag_unlink(m, t); + m_tag_free(t); +} + +/* Unlink and free a packet tag chain, starting from given tag. */ +void +m_tag_delete_chain(struct mbuf *m, struct m_tag *t) +{ + struct m_tag *p, *q; + + KASSERT(m, ("m_tag_delete_chain: null mbuf")); + if (t != NULL) + p = t; + else + p = SLIST_FIRST(&m->m_pkthdr.tags); + if (p == NULL) + return; + while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) + m_tag_delete(m, q); + m_tag_delete(m, p); +} + +/* Find a tag, starting from a given position. */ +struct m_tag * +m_tag_locate(struct mbuf *m, u_int32_t id, u_int16_t type, struct m_tag *t) +{ + struct m_tag *p; + + KASSERT(m, ("m_tag_find: null mbuf")); + if (t == NULL) + p = SLIST_FIRST(&m->m_pkthdr.tags); + else + p = SLIST_NEXT(t, m_tag_link); + while (p != NULL) { + if (p->m_tag_id == id && p->m_tag_type == type) + return p; + p = SLIST_NEXT(p, m_tag_link); + } + return NULL; +} + +/* Copy a single tag. */ +struct m_tag * +m_tag_copy(struct m_tag *t, int how) +{ + struct m_tag *p; + + KASSERT(t, ("m_tag_copy: null tag")); + p = m_tag_alloc(t->m_tag_type, t->m_tag_id, t->m_tag_len, how); + if (p == NULL) + return (NULL); + bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ + return p; +} + +/* + * Copy two tag chains. The destination mbuf (to) loses any attached + * tags even if the operation fails. This should not be a problem, as + * m_tag_copy_chain() is typically called with a newly-allocated + * destination mbuf. + */ +int +m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) +{ + struct m_tag *p, *t, *tprev = NULL; + + KASSERT(to && from, + ("m_tag_copy: null argument, to %p from %p", to, from)); + m_tag_delete_chain(to, NULL); + SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { + t = m_tag_copy(p, how); + if (t == NULL) { + m_tag_delete_chain(to, NULL); + return 0; + } + if (tprev == NULL) + SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); + else { + SLIST_INSERT_AFTER(tprev, t, m_tag_link); + tprev = t; + } + } + return 1; +} + +/* Initialize tags on an mbuf. */ +void +m_tag_init(struct mbuf *m) +{ + SLIST_INIT(&m->m_pkthdr.tags); +} + +/* Get first tag in chain. */ +struct m_tag * +m_tag_first(struct mbuf *m) +{ + return SLIST_FIRST(&m->m_pkthdr.tags); +} + +/* Get next tag in chain. */ +struct m_tag * +m_tag_next(__unused struct mbuf *m, struct m_tag *t) +{ + return SLIST_NEXT(t, m_tag_link); +} diff --git a/bsd/kern/uipc_proto.c b/bsd/kern/uipc_proto.c index 1d31b684a..6fd419ddd 100644 --- a/bsd/kern/uipc_proto.c +++ b/bsd/kern/uipc_proto.c @@ -75,22 +75,29 @@ extern struct domain localdomain; /* or at least forward */ static struct protosw localsw[] = { { SOCK_STREAM, &localdomain, 0, PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, - 0, 0, 0, 0, + 0, 0, 0, uipc_ctloutput, 0, 0, 0, 0, 0, - 0, &uipc_usrreqs + 0, + &uipc_usrreqs, + 0, 0, 0 + }, { SOCK_DGRAM, &localdomain, 0, PR_ATOMIC|PR_ADDR|PR_RIGHTS, - 0, 0, 0, 0, + 0, 0, 0, uipc_ctloutput, 0, 0, 0, 0, 0, - 0, &uipc_usrreqs + 0, + &uipc_usrreqs, + 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, raw_ctlinput, 0, 0, - raw_init, 0, 0, 0, - 0, &raw_usrreqs + 0, 0, 0, 0, + 0, + &raw_usrreqs, + 0, 0, 0 } }; diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index ebeeec818..2018446b4 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,8 +60,9 @@ #include #include #include -#include -#include +#include +#include +#include #include #include #include @@ -82,6 +83,7 @@ #include #include #include +#include #include int so_cache_hw = 0; @@ -96,6 +98,11 @@ struct zone *so_cache_zone; extern int get_inpcb_str_size(); extern int get_tcp_str_size(); +static lck_grp_t *so_cache_mtx_grp; +static lck_attr_t *so_cache_mtx_attr; +static lck_grp_attr_t *so_cache_mtx_grp_attr; +lck_mtx_t *so_cache_mtx; + #include static void filt_sordetach(struct knote *kn); @@ -111,6 +118,7 @@ static struct filterops soread_filtops = static struct filterops sowrite_filtops = { 1, NULL, filt_sowdetach, filt_sowrite }; +#define EVEN_MORE_LOCKING_DEBUG 0 int socket_debug = 0; int socket_zone = M_SOCKET; so_gen_t so_gencnt; /* generation count for sockets */ @@ -128,6 +136,7 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) + SYSCTL_DECL(_kern_ipc); static int somaxconn = SOMAXCONN; @@ -144,8 +153,6 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy, 0, ""); void so_cache_timer(); -struct mbuf *m_getpackets(int, int, int); - /* * Socket operation routines. @@ -156,20 +163,54 @@ struct mbuf *m_getpackets(int, int, int); */ #ifdef __APPLE__ + +vm_size_t so_cache_zone_element_size; + +static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid); + + void socketinit() { vm_size_t str_size; + if (so_cache_init_done) { + printf("socketinit: already called...\n"); + return; + } + + /* + * allocate lock group attribute and group for socket cache mutex + */ + so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(so_cache_mtx_grp_attr); + + so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); + + /* + * allocate the lock attribute for socket cache mutex + */ + so_cache_mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(so_cache_mtx_attr); + so_cache_init_done = 1; - timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); + so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */ + + if (so_cache_mtx == NULL) + return; /* we're hosed... */ + str_size = (vm_size_t)( sizeof(struct socket) + 4 + get_inpcb_str_size() + 4 + get_tcp_str_size()); so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone"); #if TEMPDEBUG - kprintf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); + printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); #endif + timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); + + so_cache_zone_element_size = str_size; + + sflt_init(); } @@ -179,11 +220,11 @@ int waitok; { caddr_t temp; - int s; register u_long offset; - s = splnet(); + lck_mtx_lock(so_cache_mtx); + if (cached_sock_count) { cached_sock_count--; *so = socket_cache_head; @@ -195,7 +236,8 @@ int waitok; socket_cache_head->cache_prev = 0; else socket_cache_tail = 0; - splx(s); + + lck_mtx_unlock(so_cache_mtx); temp = (*so)->so_saved_pcb; bzero((caddr_t)*so, sizeof(struct socket)); @@ -204,13 +246,16 @@ int waitok; cached_sock_count); #endif (*so)->so_saved_pcb = temp; + (*so)->cached_in_sock_layer = 1; + } else { #if TEMPDEBUG kprintf("Allocating cached sock %x from memory\n", *so); #endif - splx(s); + lck_mtx_unlock(so_cache_mtx); + if (waitok) *so = (struct socket *) zalloc(so_cache_zone); else @@ -255,17 +300,16 @@ int waitok; void cached_sock_free(so) struct socket *so; { - int s; + lck_mtx_lock(so_cache_mtx); - s = splnet(); if (++cached_sock_count > MAX_CACHED_SOCKETS) { --cached_sock_count; - splx(s); + lck_mtx_unlock(so_cache_mtx); #if TEMPDEBUG kprintf("Freeing overflowed cached socket %x\n", so); #endif - zfree(so_cache_zone, (vm_offset_t) so); + zfree(so_cache_zone, so); } else { #if TEMPDEBUG @@ -283,7 +327,7 @@ struct socket *so; so->cache_timestamp = so_cache_time; socket_cache_head = so; - splx(s); + lck_mtx_unlock(so_cache_mtx); } #if TEMPDEBUG @@ -297,44 +341,38 @@ struct socket *so; void so_cache_timer() { register struct socket *p; - register int s; register int n_freed = 0; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - ++so_cache_time; + lck_mtx_lock(so_cache_mtx); - s = splnet(); + ++so_cache_time; - while (p = socket_cache_tail) + while ( (p = socket_cache_tail) ) { if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) break; so_cache_timeouts++; - if (socket_cache_tail = p->cache_prev) + if ( (socket_cache_tail = p->cache_prev) ) p->cache_prev->cache_next = 0; if (--cached_sock_count == 0) socket_cache_head = 0; - splx(s); - zfree(so_cache_zone, (vm_offset_t) p); + zfree(so_cache_zone, p); - splnet(); if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) { so_cache_max_freed++; break; } } - splx(s); + lck_mtx_unlock(so_cache_mtx); timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); - (void) thread_funnel_set(network_flock, FALSE); } #endif /* __APPLE__ */ @@ -358,12 +396,12 @@ soalloc(waitok, dom, type) cached_sock_alloc(&so, waitok); else { - so = _MALLOC_ZONE(sizeof(*so), socket_zone, M_WAITOK); + MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK); if (so) bzero(so, sizeof *so); } /* XXX race condition for reentrant kernel */ - +//###LD Atomic add for so_gencnt if (so) { so->so_gencnt = ++so_gencnt; so->so_zone = socket_zone; @@ -415,23 +453,22 @@ socreate(dom, aso, type, proto) #ifdef __APPLE__ if (p != 0) { - if (p->p_ucred->cr_uid == 0) + so->so_uid = kauth_cred_getuid(kauth_cred_get()); + if (!suser(kauth_cred_get(),NULL)) so->so_state = SS_PRIV; - - so->so_uid = p->p_ucred->cr_uid; } #else - so->so_cred = p->p_ucred; - crhold(so->so_cred); + so->so_cred = kauth_cred_get_with_ref(); #endif so->so_proto = prp; #ifdef __APPLE__ so->so_rcv.sb_flags |= SB_RECV; /* XXX */ - if (prp->pr_sfilter.tqh_first) - error = sfilter_init(so); - if (error == 0) + so->so_rcv.sb_so = so->so_snd.sb_so = so; #endif - error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); + +//### Attachement will create the per pcb lock if necessary and increase refcount + + error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); if (error) { /* * Warning: @@ -439,13 +476,16 @@ socreate(dom, aso, type, proto) * so protocol attachment handler must be coded carefuly */ so->so_state |= SS_NOFDREF; - sofree(so); + sofreelastref(so, 1); return (error); } + so->so_usecount++; #ifdef __APPLE__ prp->pr_domain->dom_refs++; - so->so_rcv.sb_so = so->so_snd.sb_so = so; TAILQ_INIT(&so->so_evlist); + + /* Attach socket filters for this protocol */ + sflt_initsock(so); #if TCPDEBUG if (tcpconsdebug == 2) so->so_options |= SO_DEBUG; @@ -463,29 +503,40 @@ sobind(so, nam) { struct proc *p = current_proc(); - int error; - struct kextcb *kp; - int s = splnet(); + int error = 0; + struct socket_filter_entry *filter; + int filtered = 0; - error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); - if (error == 0) { - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sobind) { - error = (*kp->e_soif->sf_sobind)(so, nam, kp); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - break; - } - splx(s); - return(error); - } + socket_lock(so, 1); + + /* Socket filter */ + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_bind) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); } - kp = kp->e_next; + error = filter->sfe_filter->sf_filter.sf_bind( + filter->sfe_cookie, so, nam); } } - splx(s); + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + } + /* End socket filter */ + + if (error == 0) + error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); + + socket_unlock(so, 1); + + if (error == EJUSTRETURN) + error = 0; + return (error); } @@ -513,13 +564,17 @@ sodealloc(so) FREE(so->so_accf, M_ACCF); } #endif /* INET */ - crfree(so->so_cred); + kauth_cred_rele(so->so_cred); zfreei(so->so_zone, so); #else if (so->cached_in_sock_layer == 1) cached_sock_free(so); - else - _FREE_ZONE(so, sizeof(*so), so->so_zone); + else { + if (so->cached_in_sock_layer == -1) + panic("sodealloc: double dealloc: so=%x\n", so); + so->cached_in_sock_layer = -1; + FREE_ZONE(so, sizeof(*so), so->so_zone); + } #endif /* __APPLE__ */ } @@ -529,64 +584,65 @@ solisten(so, backlog) int backlog; { - struct kextcb *kp; struct proc *p = current_proc(); - int s, error; + int error; - s = splnet(); - error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); + socket_lock(so, 1); + + { + struct socket_filter_entry *filter; + int filtered = 0; + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_listen) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_listen( + filter->sfe_cookie, so); + } + } + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + } + } + + if (error == 0) { + error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); + } + if (error) { - splx(s); + socket_unlock(so, 1); + if (error == EJUSTRETURN) + error = 0; return (error); } - if (TAILQ_EMPTY(&so->so_comp)) + + if (TAILQ_EMPTY(&so->so_comp)) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_solisten) { - error = (*kp->e_soif->sf_solisten)(so, kp); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - break; - } - splx(s); - return(error); - } - } - kp = kp->e_next; - } - splx(s); + socket_unlock(so, 1); return (0); } - void -sofree(so) +sofreelastref(so, dealloc) register struct socket *so; + int dealloc; { int error; - struct kextcb *kp; struct socket *head = so->so_head; - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sofree) { - error = (*kp->e_soif->sf_sofree)(so, kp); - if (error) { - selthreadclear(&so->so_snd.sb_sel); - selthreadclear(&so->so_rcv.sb_sel); - return; /* void fn */ - } - } - kp = kp->e_next; - } + /*### Assume socket is locked */ - if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { + if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) { #ifdef __APPLE__ selthreadclear(&so->so_snd.sb_sel); selthreadclear(&so->so_rcv.sb_sel); @@ -594,6 +650,7 @@ sofree(so) return; } if (head != NULL) { + socket_lock(head, 1); if (so->so_state & SS_INCOMP) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; @@ -608,6 +665,7 @@ sofree(so) selthreadclear(&so->so_snd.sb_sel); selthreadclear(&so->so_rcv.sb_sel); #endif + socket_unlock(head, 1); return; } else { panic("sofree: not queued"); @@ -615,14 +673,20 @@ sofree(so) head->so_qlen--; so->so_state &= ~SS_INCOMP; so->so_head = NULL; + socket_unlock(head, 1); } #ifdef __APPLE__ selthreadclear(&so->so_snd.sb_sel); sbrelease(&so->so_snd); #endif sorflush(so); - sfilter_term(so); - sodealloc(so); + + /* 3932268: disable upcall */ + so->so_rcv.sb_flags &= ~SB_UPCALL; + so->so_snd.sb_flags &= ~SB_UPCALL; + + if (dealloc) + sodealloc(so); } /* @@ -631,52 +695,69 @@ sofree(so) * Free socket when disconnect complete. */ int -soclose(so) +soclose_locked(so) register struct socket *so; { - int s = splnet(); /* conservative */ int error = 0; - struct kextcb *kp; + lck_mtx_t * mutex_held; + struct timespec ts; -#ifndef __APPLE__ - funsetown(so->so_sigio); -#endif - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soclose) { - error = (*kp->e_soif->sf_soclose)(so, kp); - if (error) { - splx(s); - return((error == EJUSTRETURN) ? 0 : error); - } - } - kp = kp->e_next; + if (so->so_usecount == 0) { + panic("soclose: so=%x refcount=0\n", so); } - if (so->so_options & SO_ACCEPTCONN) { - struct socket *sp, *sonext; - - sp = TAILQ_FIRST(&so->so_incomp); - for (; sp != NULL; sp = sonext) { - sonext = TAILQ_NEXT(sp, so_list); - (void) soabort(sp); - } - for (sp = TAILQ_FIRST(&so->so_comp); sp != NULL; sp = sonext) { - sonext = TAILQ_NEXT(sp, so_list); - /* Dequeue from so_comp since sofree() won't do it */ - TAILQ_REMOVE(&so->so_comp, sp, so_list); - so->so_qlen--; - sp->so_state &= ~SS_COMP; - sp->so_head = NULL; - (void) soabort(sp); - } - - } - if (so->so_pcb == 0) + sflt_notify(so, sock_evt_closing, NULL); + + if ((so->so_options & SO_ACCEPTCONN)) { + struct socket *sp; + + /* We do not want new connection to be added to the connection queues */ + so->so_options &= ~SO_ACCEPTCONN; + + while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { + /* A bit tricky here. We need to keep + * a lock if it's a protocol global lock + * but we want the head, not the socket locked + * in the case of per-socket lock... + */ + if (so->so_proto->pr_getlock != NULL) + socket_lock(sp, 1); + if (so->so_proto->pr_getlock != NULL) + socket_unlock(so, 0); + (void) soabort(sp); + if (so->so_proto->pr_getlock != NULL) + socket_lock(so, 0); + if (so->so_proto->pr_getlock != NULL) + socket_unlock(sp, 1); + } + + while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { + if (so->so_proto->pr_getlock != NULL) + socket_lock(sp, 1); + + /* Dequeue from so_comp since sofree() won't do it */ + TAILQ_REMOVE(&so->so_comp, sp, so_list); + so->so_qlen--; + sp->so_state &= ~SS_COMP; + sp->so_head = NULL; + + if (so->so_proto->pr_getlock != NULL) + socket_unlock(so, 0); + (void) soabort(sp); + if (so->so_proto->pr_getlock != NULL) + socket_lock(so, 0); + if (so->so_proto->pr_getlock != NULL) + socket_unlock(sp, 1); + } + } + if (so->so_pcb == 0) { + /* 3915887: mark the socket as ready for dealloc */ + so->so_flags |= SOF_PCBCLEARING; goto discard; + } if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { - error = sodisconnect(so); + error = sodisconnectlocked(so); if (error) goto drop; } @@ -684,20 +765,34 @@ soclose(so) if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; while (so->so_state & SS_ISCONNECTED) { - error = tsleep((caddr_t)&so->so_timeo, - PSOCK | PCATCH, "soclos", so->so_linger); - if (error) + ts.tv_sec = (so->so_linger/100); + ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10; + error = msleep((caddr_t)&so->so_timeo, mutex_held, + PSOCK | PCATCH, "soclos", &ts); + if (error) { + /* It's OK when the time fires, don't report an error */ + if (error == EWOULDBLOCK) + error = 0; break; + } } } } drop: - if (so->so_pcb) { + if (so->so_usecount == 0) + panic("soclose: usecount is zero so=%x\n", so); + if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) { int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); if (error == 0) error = error2; } + if (so->so_usecount <= 0) + panic("soclose: usecount is zero so=%x\n", so); discard: if (so->so_pcb && so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); @@ -706,20 +801,49 @@ discard: so->so_proto->pr_domain->dom_refs--; evsofree(so); #endif + so->so_usecount--; sofree(so); - splx(s); return (error); } +int +soclose(so) + register struct socket *so; +{ + int error = 0; + socket_lock(so, 1); + if (so->so_retaincnt == 0) + error = soclose_locked(so); + else { /* if the FD is going away, but socket is retained in kernel remove its reference */ + so->so_usecount--; + if (so->so_usecount < 2) + panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount); + } + socket_unlock(so, 1); + return (error); +} + + /* * Must be called at splnet... */ +//#### Should already be locked int soabort(so) struct socket *so; { int error; +#ifdef MORE_LOCKING_DEBUG + lck_mtx_t * mutex_held; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +#endif + error = (*so->so_proto->pr_usrreqs->pru_abort)(so); if (error) { sofree(so); @@ -729,55 +853,48 @@ soabort(so) } int -soaccept(so, nam) +soacceptlock(so, nam, dolock) register struct socket *so; struct sockaddr **nam; + int dolock; { - int s = splnet(); int error; - struct kextcb *kp; + + if (dolock) socket_lock(so, 1); if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); - if (error == 0) { - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soaccept) { - error = (*kp->e_soif->sf_soaccept)(so, nam, kp); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - break; - } - splx(s); - return(error); - } - } - kp = kp->e_next; - } - } - - splx(s); + if (dolock) socket_unlock(so, 1); return (error); } +int +soaccept(so, nam) + register struct socket *so; + struct sockaddr **nam; +{ + return (soacceptlock(so, nam, 1)); +} int -soconnect(so, nam) +soconnectlock(so, nam, dolock) register struct socket *so; struct sockaddr *nam; + int dolock; { int s; int error; struct proc *p = current_proc(); - struct kextcb *kp; - if (so->so_options & SO_ACCEPTCONN) + if (dolock) socket_lock(so, 1); + + if (so->so_options & SO_ACCEPTCONN) { + if (dolock) socket_unlock(so, 1); return (EOPNOTSUPP); - s = splnet(); + } /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. @@ -786,72 +903,77 @@ soconnect(so, nam) */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || - (error = sodisconnect(so)))) + (error = sodisconnectlocked(so)))) error = EISCONN; else { - /* - * Run connect filter before calling protocol: - * - non-blocking connect returns before completion; - * - allows filters to modify address. - */ - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soconnect) { - error = (*kp->e_soif->sf_soconnect)(so, nam, kp); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - } - splx(s); - return(error); - } - } - kp = kp->e_next; - } + /* + * Run connect filter before calling protocol: + * - non-blocking connect returns before completion; + */ + { + struct socket_filter_entry *filter; + int filtered = 0; + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_connect_out) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_connect_out( + filter->sfe_cookie, so, nam); + } + } + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + } + } + if (error) { + if (error == EJUSTRETURN) + error = 0; + if (dolock) socket_unlock(so, 1); + return error; + } + error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); } - splx(s); + if (dolock) socket_unlock(so, 1); return (error); } +int +soconnect(so, nam) + register struct socket *so; + struct sockaddr *nam; +{ + return (soconnectlock(so, nam, 1)); +} + int soconnect2(so1, so2) register struct socket *so1; struct socket *so2; { - int s = splnet(); int error; - struct kextcb *kp; +//####### Assumes so1 is already locked / + + socket_lock(so2, 1); error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); - if (error == 0) { - kp = sotokextcb(so1); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soconnect2) { - error = (*kp->e_soif->sf_soconnect2)(so1, so2, kp); - if (error) { - if (error == EJUSTRETURN) { - return 0; - break; - } - splx(s); - return(error); - } - } - kp = kp->e_next; - } - } - splx(s); + + socket_unlock(so2, 1); return (error); } + int -sodisconnect(so) +sodisconnectlocked(so) register struct socket *so; { - int s = splnet(); int error; - struct kextcb *kp; if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; @@ -861,31 +983,102 @@ sodisconnect(so) error = EALREADY; goto bad; } + error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); + if (error == 0) { - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sodisconnect) { - error = (*kp->e_soif->sf_sodisconnect)(so, kp); - if (error) { - if (error == EJUSTRETURN) { - error = 0; - break; - } - splx(s); - return(error); - } - } - kp = kp->e_next; - } + sflt_notify(so, sock_evt_disconnected, NULL); } bad: - splx(s); return (error); } +//### Locking version +int +sodisconnect(so) + register struct socket *so; +{ + int error; + + socket_lock(so, 1); + error = sodisconnectlocked(so); + socket_unlock(so, 1); + return(error); +} #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) + +/* + * sosendcheck will lock the socket buffer if it isn't locked and + * verify that there is space for the data being inserted. + */ + +static int +sosendcheck( + struct socket *so, + struct sockaddr *addr, + long resid, + long clen, + long atomic, + int flags, + int *sblocked) +{ + int error = 0; + long space; + +restart: + if (*sblocked == 0) { + error = sblock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) + return error; + *sblocked = 1; + } + + if (so->so_state & SS_CANTSENDMORE) + return EPIPE; + + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + return error; + } + + if ((so->so_state & SS_ISCONNECTED) == 0) { + /* + * `sendto' and `sendmsg' is allowed on a connection- + * based socket if it supports implied connect. + * Return ENOTCONN if not connected and no address is + * supplied. + */ + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && + (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) + return ENOTCONN; + } else if (addr == 0 && !(flags&MSG_HOLD)) + return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ; + } + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + if ((atomic && resid > so->so_snd.sb_hiwat) || + clen > so->so_snd.sb_hiwat) + return EMSGSIZE; + if (space < resid + clen && + (atomic || space < so->so_snd.sb_lowat || space < clen)) { + if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) + return EWOULDBLOCK; + sbunlock(&so->so_snd, 1); + error = sbwait(&so->so_snd); + if (error) { + return error; + } + goto restart; + } + + return 0; +} + /* * Send on a socket. * If send must go all at once and message is larger than @@ -920,13 +1113,14 @@ sosend(so, addr, uio, top, control, flags) struct mbuf **mp; register struct mbuf *m, *freelist = NULL; register long space, len, resid; - int clen = 0, error, s, dontroute, mlen, sendflags; + int clen = 0, error, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; + int sblocked = 0; struct proc *p = current_proc(); - struct kextcb *kp; if (uio) - resid = uio->uio_resid; + // LP64todo - fix this! + resid = uio_resid(uio); else resid = top->m_pkthdr.len; @@ -937,6 +1131,8 @@ sosend(so, addr, uio, top, control, flags) so->so_snd.sb_lowat, so->so_snd.sb_hiwat); + socket_lock(so, 1); + /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 @@ -947,8 +1143,9 @@ sosend(so, addr, uio, top, control, flags) * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ - if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { + if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; + socket_unlock(so, 1); goto out; } @@ -959,161 +1156,138 @@ sosend(so, addr, uio, top, control, flags) p->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; -#define snderr(errno) { error = errno; splx(s); goto release; } -restart: - error = sblock(&so->so_snd, SBLOCKWAIT(flags)); - if (error) - goto out; do { - s = splnet(); - if (so->so_state & SS_CANTSENDMORE) - snderr(EPIPE); - if (so->so_error) { - error = so->so_error; - so->so_error = 0; - splx(s); - goto release; - } - if ((so->so_state & SS_ISCONNECTED) == 0) { - /* - * `sendto' and `sendmsg' is allowed on a connection- - * based socket if it supports implied connect. - * Return ENOTCONN if not connected and no address is - * supplied. - */ - if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && - (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { - if ((so->so_state & SS_ISCONFIRMING) == 0 && - !(resid == 0 && clen != 0)) - snderr(ENOTCONN); - } else if (addr == 0 && !(flags&MSG_HOLD)) - snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? - ENOTCONN : EDESTADDRREQ); - } - space = sbspace(&so->so_snd); - if (flags & MSG_OOB) - space += 1024; - if ((atomic && resid > so->so_snd.sb_hiwat) || - clen > so->so_snd.sb_hiwat) - snderr(EMSGSIZE); - if (space < resid + clen && - (atomic || space < so->so_snd.sb_lowat || space < clen)) { - if (so->so_state & SS_NBIO) - snderr(EWOULDBLOCK); - sbunlock(&so->so_snd); - error = sbwait(&so->so_snd); - splx(s); - if (error) + error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked); + if (error) { + if (sblocked) + goto release; + else { + socket_unlock(so, 1); goto out; - goto restart; + } } - splx(s); mp = ⊤ - space -= clen; + space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0); do { - if (uio == NULL) { - /* - * Data is prepackaged in "top". - */ - resid = 0; - if (flags & MSG_EOR) - top->m_flags |= M_EOR; - } else { - boolean_t dropped_funnel = FALSE; - int chainlength; - int bytes_to_copy; - - bytes_to_copy = min(resid, space); - - if (sosendminchain > 0) { - if (bytes_to_copy >= sosendminchain) { - dropped_funnel = TRUE; - (void)thread_funnel_set(network_flock, FALSE); - } - chainlength = 0; - } else - chainlength = sosendmaxchain; - - do { - - if (bytes_to_copy >= MINCLSIZE) { - /* - * try to maintain a local cache of mbuf clusters needed to complete this write - * the list is further limited to the number that are currently needed to fill the socket - * this mechanism allows a large number of mbufs/clusters to be grabbed under a single - * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs - * if we fail early (or miscalcluate the number needed) make sure to release any clusters - * we haven't yet consumed. - */ - if ((m = freelist) == NULL) { - int num_needed; - int hdrs_needed = 0; - - if (top == 0) - hdrs_needed = 1; - num_needed = bytes_to_copy / MCLBYTES; - - if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) - num_needed++; - - if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL) - goto getpackets_failed; - m = freelist; - } - freelist = m->m_next; - m->m_next = NULL; - - mlen = MCLBYTES; - len = min(mlen, bytes_to_copy); - } else { -getpackets_failed: - if (top == 0) { - MGETHDR(m, M_WAIT, MT_DATA); - mlen = MHLEN; - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { - MGET(m, M_WAIT, MT_DATA); - mlen = MLEN; - } - len = min(mlen, bytes_to_copy); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); - } - chainlength += len; - space -= len; - - error = uiomove(mtod(m, caddr_t), (int)len, uio); - - resid = uio->uio_resid; - - m->m_len = len; - *mp = m; - top->m_pkthdr.len += len; - if (error) - break; - mp = &m->m_next; - if (resid <= 0) { + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; - break; - } - bytes_to_copy = min(resid, space); - - } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); + } else { + int chainlength; + int bytes_to_copy; + + bytes_to_copy = min(resid, space); + + if (sosendminchain > 0) { + chainlength = 0; + } else + chainlength = sosendmaxchain; + + socket_unlock(so, 0); + + do { + int num_needed; + int hdrs_needed = (top == 0) ? 1 : 0; + + /* + * try to maintain a local cache of mbuf clusters needed to complete this write + * the list is further limited to the number that are currently needed to fill the socket + * this mechanism allows a large number of mbufs/clusters to be grabbed under a single + * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs + * if we fail early (or miscalcluate the number needed) make sure to release any clusters + * we haven't yet consumed. + */ + if (freelist == NULL && bytes_to_copy > MCLBYTES) { + num_needed = bytes_to_copy / NBPG; + + if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE) + num_needed++; + + freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG); + /* Fall back to cluster size if allocation failed */ + } + + if (freelist == NULL && bytes_to_copy > MINCLSIZE) { + num_needed = bytes_to_copy / MCLBYTES; + + if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) + num_needed++; + + freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES); + /* Fall back to a single mbuf if allocation failed */ + } + + if (freelist == NULL) { + if (top == 0) + MGETHDR(freelist, M_WAIT, MT_DATA); + else + MGET(freelist, M_WAIT, MT_DATA); + + if (freelist == NULL) { + error = ENOBUFS; + socket_lock(so, 0); + if (sblocked) { + goto release; + } else { + socket_unlock(so, 1); + goto out; + } + } + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && bytes_to_copy < MHLEN) + MH_ALIGN(freelist, bytes_to_copy); + } + m = freelist; + freelist = m->m_next; + m->m_next = NULL; + + if ((m->m_flags & M_EXT)) + mlen = m->m_ext.ext_size; + else if ((m->m_flags & M_PKTHDR)) + mlen = MHLEN - m_leadingspace(m); + else + mlen = MLEN; + len = min(mlen, bytes_to_copy); + + chainlength += len; + + space -= len; - if (dropped_funnel == TRUE) - (void)thread_funnel_set(network_flock, TRUE); - if (error) - goto release; - } + error = uiomove(mtod(m, caddr_t), (int)len, uio); + + // LP64todo - fix this! + resid = uio_resid(uio); + + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + break; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + bytes_to_copy = min(resid, space); + + } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); + + socket_lock(so, 0); + + if (error) + goto release; + } if (flags & (MSG_HOLD|MSG_SEND)) { /* Enqueue for later, go away if HOLD */ @@ -1138,7 +1312,6 @@ getpackets_failed: } if (dontroute) so->so_options |= SO_DONTROUTE; - s = splnet(); /* XXX */ /* Compute flags here, for pru_send and NKEs */ sendflags = (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1152,32 +1325,84 @@ getpackets_failed: PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; - kp = sotokextcb(so); - while (kp) - { if (kp->e_soif && kp->e_soif->sf_sosend) { - error = (*kp->e_soif->sf_sosend)(so, &addr, - &uio, &top, - &control, - &sendflags, - kp); - if (error) { - splx(s); + + /* + * Socket filter processing + */ + { + struct socket_filter_entry *filter; + int filtered; + + filtered = 0; + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_data_out) { + int so_flags = 0; + if (filtered == 0) { + filtered = 1; + /* + * We don't let sbunlock unlock the socket because + * we don't want it to decrement the usecount. + */ + sbunlock(&so->so_snd, 1); + sblocked = 0; + socket_unlock(so, 0); + so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0; + } + error = filter->sfe_filter->sf_filter.sf_data_out( + filter->sfe_cookie, so, addr, &top, &control, so_flags); + } + } + + if (filtered) { + /* + * At this point, we've run at least one filter. + * The socket is unlocked as is the socket buffer. + */ + socket_lock(so, 0); if (error == EJUSTRETURN) { - sbunlock(&so->so_snd); + error = 0; + clen = 0; + control = 0; + top = 0; + socket_unlock(so, 1); + goto out; + } + else if (error) { + socket_unlock(so, 1); + goto out; + } - if (freelist) - m_freem_list(freelist); - return(0); + + /* Verify our state again, this will lock the socket buffer */ + error = sosendcheck(so, addr, top->m_pkthdr.len, + control ? control->m_pkthdr.len : 0, + atomic, flags, &sblocked); + if (error) { + if (sblocked) { + /* sbunlock at release will unlock the socket */ + goto release; + } + else { + socket_unlock(so, 1); + goto out; + } } - goto release; } } - kp = kp->e_next; - } - - error = (*so->so_proto->pr_usrreqs->pru_send)(so, - sendflags, top, addr, control, p); - splx(s); + /* + * End Socket filter processing + */ + + if (error == EJUSTRETURN) { + /* A socket filter handled this data */ + error = 0; + } + else { + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + sendflags, top, addr, control, p); + } #ifdef __APPLE__ if (flags & MSG_SEND) so->so_temp = NULL; @@ -1194,7 +1419,7 @@ getpackets_failed: } while (resid); release: - sbunlock(&so->so_snd); + sbunlock(&so->so_snd, 0); /* will unlock socket */ out: if (top) m_freem(top); @@ -1238,13 +1463,13 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) struct mbuf **controlp; int *flagsp; { - register struct mbuf *m, **mp, *ml; - register int flags, len, error, s, offset; + register struct mbuf *m, **mp, *ml = NULL; + register int flags, len, error, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; - int orig_resid = uio->uio_resid; - struct kextcb *kp; + // LP64todo - fix this! + int orig_resid = uio_resid(uio); volatile struct mbuf *free_list; volatile int delayed_copy_len; int can_delay; @@ -1252,27 +1477,20 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) struct proc *p = current_proc(); + // LP64todo - fix this! KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, - uio->uio_resid, + uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soreceive) { - error = (*kp->e_soif->sf_soreceive)(so, psa, &uio, - mp0, controlp, - flagsp, kp); - if (error) { - KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); - return((error == EJUSTRETURN) ? 0 : error); - } - } - kp = kp->e_next; - } + socket_lock(so, 1); +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount == 1) + panic("soreceive: so=%x no other reference on socket\n", so); +#endif mp = mp0; if (psa) *psa = 0; @@ -1293,17 +1511,21 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) { m = m_get(M_WAIT, MT_DATA); if (m == NULL) { + socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0); return (ENOBUFS); } error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; + socket_unlock(so, 0); do { + // LP64todo - fix this! error = uiomove(mtod(m, caddr_t), - (int) min(uio->uio_resid, m->m_len), uio); + (int) min(uio_resid(uio), m->m_len), uio); m = m_free(m); - } while (uio->uio_resid && error == 0 && m); + } while (uio_resid(uio) && error == 0 && m); + socket_lock(so, 0); bad: if (m) m_freem(m); @@ -1319,7 +1541,8 @@ bad: goto nooob; } else if (error == 0 && flagsp) *flagsp |= MSG_OOB; - } + } + socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); #endif return (error); @@ -1327,19 +1550,23 @@ bad: nooob: if (mp) *mp = (struct mbuf *)0; - if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) + if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) (*pr->pr_usrreqs->pru_rcvd)(so, 0); free_list = (struct mbuf *)0; delayed_copy_len = 0; restart: +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount <= 1) + printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount); +#endif error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) { + socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); return (error); } - s = splnet(); m = so->so_rcv.sb_mb; /* @@ -1354,9 +1581,9 @@ restart: * a short count if a timeout or signal occurs after we start. */ if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && - so->so_rcv.sb_cc < uio->uio_resid) && + so->so_rcv.sb_cc < uio_resid(uio)) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); @@ -1384,21 +1611,27 @@ restart: error = ENOTCONN; goto release; } - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) goto release; - if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { + if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; goto release; } - sbunlock(&so->so_rcv); + sbunlock(&so->so_rcv, 1); +#ifdef EVEN_MORE_LOCKING_DEBUG if (socket_debug) printf("Waiting for socket data\n"); +#endif error = sbwait(&so->so_rcv); +#ifdef EVEN_MORE_LOCKING_DEBUG if (socket_debug) printf("SORECEIVE - sbwait returned %d\n", error); - splx(s); +#endif + if (so->so_usecount < 1) + panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount); if (error) { + socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); return (error); } @@ -1434,6 +1667,8 @@ dontblock: m = m->m_next; } else { sbfree(&so->so_rcv, m); + if (m->m_next == 0 && so->so_rcv.sb_cc != 0) + panic("soreceive: about to create invalid socketbuf"); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } @@ -1448,9 +1683,14 @@ dontblock: if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == - SCM_RIGHTS) + SCM_RIGHTS) { + socket_unlock(so, 0); /* release socket lock: see 3903171 */ error = (*pr->pr_domain->dom_externalize)(m); + socket_lock(so, 0); + } *controlp = m; + if (m->m_next == 0 && so->so_rcv.sb_cc != 0) + panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0"); so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; @@ -1474,15 +1714,14 @@ dontblock: moff = 0; offset = 0; - if (!(flags & MSG_PEEK) && uio->uio_resid > sorecvmincopy) + if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) can_delay = 1; else can_delay = 0; need_event = 0; - - while (m && (uio->uio_resid - delayed_copy_len) > 0 && error == 0) { + while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; @@ -1509,7 +1748,8 @@ dontblock: } #endif so->so_state &= ~SS_RCVATMARK; - len = uio->uio_resid - delayed_copy_len; + // LP64todo - fix this! + len = uio_resid(uio) - delayed_copy_len; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) @@ -1534,13 +1774,11 @@ dontblock: */ delayed_copy_len += len; } else { - splx(s); if (delayed_copy_len) { - error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); if (error) { - s = splnet(); goto release; } if (m != so->so_rcv.sb_mb) { @@ -1556,14 +1794,15 @@ dontblock: break; } } + socket_unlock(so, 0); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); + socket_lock(so, 0); - s = splnet(); if (error) goto release; } } else - uio->uio_resid -= len; + uio_setresid(uio, (uio_resid(uio) - len)); if (len == m->m_len - moff) { if (m->m_flags & M_EOR) @@ -1574,6 +1813,7 @@ dontblock: } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); + m->m_nextpkt = NULL; if (mp) { *mp = m; @@ -1581,7 +1821,6 @@ dontblock: so->so_rcv.sb_mb = m = m->m_next; *mp = (struct mbuf *)0; } else { - m->m_nextpkt = 0; if (free_list == NULL) free_list = m; else @@ -1622,7 +1861,7 @@ dontblock: break; } } - if (flags & MSG_EOR) + if (flags & MSG_EOR) break; /* * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), @@ -1631,12 +1870,12 @@ dontblock: * with a short count but without error. * Keep sockbuf locked against other readers. */ - while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio->uio_resid - delayed_copy_len) > 0 && + while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) goto release; - if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD)) (*pr->pr_usrreqs->pru_rcvd)(so, flags); if (sbwait(&so->so_rcv)) { error = 0; @@ -1657,7 +1896,7 @@ dontblock: */ if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) { - error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); if (error) goto release; @@ -1668,6 +1907,10 @@ dontblock: } } } +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount <= 1) + panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount); +#endif if (m && pr->pr_flags & PR_ATOMIC) { #ifdef __APPLE__ @@ -1693,7 +1936,7 @@ dontblock: flags |= MSG_HAVEMORE; if (delayed_copy_len) { - error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); if (error) goto release; @@ -1705,28 +1948,31 @@ dontblock: if (need_event) postevent(so, 0, EV_OOB); #endif - if (orig_resid == uio->uio_resid && orig_resid && + if (orig_resid == uio_resid(uio) && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { - sbunlock(&so->so_rcv); - splx(s); + sbunlock(&so->so_rcv, 1); goto restart; } if (flagsp) *flagsp |= flags; release: +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount <= 1) + panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount); +#endif if (delayed_copy_len) { - error = sodelayed_copy(uio, &free_list, &delayed_copy_len); + error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); } if (free_list) { m_freem_list((struct mbuf *)free_list); } - sbunlock(&so->so_rcv); - splx(s); + sbunlock(&so->so_rcv, 0); /* will unlock socket */ + // LP64todo - fix this! KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, - uio->uio_resid, + uio_resid(uio), so->so_rcv.sb_cc, 0, error); @@ -1735,19 +1981,15 @@ release: } -int sodelayed_copy(struct uio *uio, struct mbuf **free_list, int *resid) +static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid) { int error = 0; - boolean_t dropped_funnel = FALSE; struct mbuf *m; m = *free_list; - if (*resid >= sorecvmincopy) { - dropped_funnel = TRUE; + socket_unlock(so, 0); - (void)thread_funnel_set(network_flock, FALSE); - } while (m && error == 0) { error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); @@ -1759,8 +2001,7 @@ int sodelayed_copy(struct uio *uio, struct mbuf **free_list, int *resid) *free_list = (struct mbuf *)NULL; *resid = 0; - if (dropped_funnel == TRUE) - (void)thread_funnel_set(network_flock, TRUE); + socket_lock(so, 0); return (error); } @@ -1772,22 +2013,11 @@ soshutdown(so, how) register int how; { register struct protosw *pr = so->so_proto; - struct kextcb *kp; int ret; - - KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_START, 0,0,0,0,0); - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soshutdown) { - ret = (*kp->e_soif->sf_soshutdown)(so, how, kp); - if (ret) { - KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); - return((ret == EJUSTRETURN) ? 0 : ret); - } - } - kp = kp->e_next; - } + socket_lock(so, 1); + + sflt_notify(so, sock_evt_shutdown, &how); if (how != SHUT_WR) { sorflush(so); @@ -1797,10 +2027,12 @@ soshutdown(so, how) ret = ((*pr->pr_usrreqs->pru_shutdown)(so)); postevent(so, 0, EV_WCLOSED); KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); + socket_unlock(so, 1); return(ret); } KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); + socket_unlock(so, 1); return (0); } @@ -1810,37 +2042,36 @@ sorflush(so) { register struct sockbuf *sb = &so->so_rcv; register struct protosw *pr = so->so_proto; - register int s, error; struct sockbuf asb; - struct kextcb *kp; - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sorflush) { - if ((*kp->e_soif->sf_sorflush)(so, kp)) - return; - } - kp = kp->e_next; - } +#ifdef MORE_LOCKING_DEBUG + lck_mtx_t * mutex_held; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +#endif + + sflt_notify(so, sock_evt_flush_read, NULL); sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAIT); - s = splimp(); socantrcvmore(so); - sbunlock(sb); + sbunlock(sb, 1); #ifdef __APPLE__ selthreadclear(&sb->sb_sel); #endif asb = *sb; bzero((caddr_t)sb, sizeof (*sb)); + sb->sb_so = so; /* reestablish link to socket */ if (asb.sb_flags & SB_KNOTE) { sb->sb_sel.si_note = asb.sb_sel.si_note; sb->sb_flags = SB_KNOTE; } - splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease(&asb); } @@ -1874,7 +2105,7 @@ sooptcopyin(sopt, buf, len, minlen) if (sopt->sopt_p != 0) return (copyin(sopt->sopt_val, buf, valsize)); - bcopy(sopt->sopt_val, buf, valsize); + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize); return 0; } @@ -1887,36 +2118,60 @@ sosetopt(so, sopt) struct linger l; struct timeval tv; short val; - struct kextcb *kp; + + socket_lock(so, 1); if (sopt->sopt_dir != SOPT_SET) { sopt->sopt_dir = SOPT_SET; } - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_socontrol) { - error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); - if (error) - return((error == EJUSTRETURN) ? 0 : error); + { + struct socket_filter_entry *filter; + int filtered = 0; + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_setoption) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_setoption( + filter->sfe_cookie, so, sopt); + } + } + + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + + if (error) { + if (error == EJUSTRETURN) + error = 0; + goto bad; + } } - kp = kp->e_next; } error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); + if (so->so_proto && so->so_proto->pr_ctloutput) { + error = (*so->so_proto->pr_ctloutput) + (so, sopt); + socket_unlock(so, 1); + return (error); + } error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { case SO_LINGER: + case SO_LINGER_SEC: error = sooptcopyin(sopt, &l, sizeof l, sizeof l); if (error) goto bad; - so->so_linger = l.l_linger; + so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz; if (l.l_onoff) so->so_options |= SO_LINGER; else @@ -2000,29 +2255,18 @@ sosetopt(so, sopt) if (error) goto bad; - /* assert(hz > 0); */ - if (tv.tv_sec < 0 || tv.tv_sec > SHRT_MAX / hz || + if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX || tv.tv_usec < 0 || tv.tv_usec >= 1000000) { error = EDOM; goto bad; } - /* assert(tick > 0); */ - /* assert(ULONG_MAX - SHRT_MAX >= 1000000); */ - { - long tmp = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; - if (tmp > SHRT_MAX) { - error = EDOM; - goto bad; - } - val = tmp; - } - + switch (sopt->sopt_name) { case SO_SNDTIMEO: - so->so_snd.sb_timeo = val; + so->so_snd.sb_timeo = tv; break; case SO_RCVTIMEO: - so->so_rcv.sb_timeo = val; + so->so_rcv.sb_timeo = tv; break; } break; @@ -2030,14 +2274,13 @@ sosetopt(so, sopt) case SO_NKE: { struct so_nke nke; - struct NFDescriptor *nf1, *nf2 = NULL; error = sooptcopyin(sopt, &nke, sizeof nke, sizeof nke); if (error) goto bad; - error = nke_insert(so, &nke); + error = sflt_attach_private(so, NULL, nke.nke_handle, 1); break; } @@ -2075,6 +2318,7 @@ sosetopt(so, sopt) } } bad: + socket_unlock(so, 1); return (error); } @@ -2101,11 +2345,11 @@ sooptcopyout(sopt, buf, len) */ valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; - if (sopt->sopt_val != 0) { + if (sopt->sopt_val != USER_ADDR_NULL) { if (sopt->sopt_p != 0) error = copyout(buf, sopt->sopt_val, valsize); else - bcopy(buf, sopt->sopt_val, valsize); + bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); } return error; } @@ -2118,35 +2362,60 @@ sogetopt(so, sopt) int error, optval; struct linger l; struct timeval tv; - struct mbuf *m; - struct kextcb *kp; if (sopt->sopt_dir != SOPT_GET) { sopt->sopt_dir = SOPT_GET; } - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_socontrol) { - error = (*kp->e_soif->sf_socontrol)(so, sopt, kp); - if (error) - return((error == EJUSTRETURN) ? 0 : error); + socket_lock(so, 1); + + { + struct socket_filter_entry *filter; + int filtered = 0; + error = 0; + for (filter = so->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_getoption) { + if (filtered == 0) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_getoption( + filter->sfe_cookie, so, sopt); + } + } + if (filtered != 0) { + socket_lock(so, 0); + sflt_unuse(so); + + if (error) { + if (error == EJUSTRETURN) + error = 0; + socket_unlock(so, 1); + return error; + } } - kp = kp->e_next; } error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) { - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); - } else + error = (*so->so_proto->pr_ctloutput) + (so, sopt); + socket_unlock(so, 1); + return (error); + } else { + socket_unlock(so, 1); return (ENOPROTOOPT); + } } else { switch (sopt->sopt_name) { case SO_LINGER: + case SO_LINGER_SEC: l.l_onoff = so->so_options & SO_LINGER; - l.l_linger = so->so_linger; + l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger : + so->so_linger / hz; error = sooptcopyout(sopt, &l, sizeof l); break; @@ -2183,25 +2452,19 @@ integer: m1 = so->so_rcv.sb_mb; if (so->so_proto->pr_flags & PR_ATOMIC) { -#if 0 - kprintf("SKT CC: %d\n", so->so_rcv.sb_cc); -#endif while (m1) { if (m1->m_type == MT_DATA) pkt_total += m1->m_len; -#if 0 - kprintf("CNT: %d/%d\n", m1->m_len, pkt_total); -#endif m1 = m1->m_next; } optval = pkt_total; } else optval = so->so_rcv.sb_cc; -#if 0 - kprintf("RTN: %d\n", optval); -#endif goto integer; } + case SO_NWRITE: + optval = so->so_snd.sb_cc; + goto integer; #endif case SO_ERROR: optval = so->so_error; @@ -2226,90 +2489,29 @@ integer: case SO_SNDTIMEO: case SO_RCVTIMEO: - optval = (sopt->sopt_name == SO_SNDTIMEO ? + tv = (sopt->sopt_name == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); - tv.tv_sec = optval / hz; - tv.tv_usec = (optval % hz) * tick; error = sooptcopyout(sopt, &tv, sizeof tv); break; - case SO_NOSIGPIPE: - optval = (so->so_flags & SOF_NOSIGPIPE); - goto integer; + case SO_NOSIGPIPE: + optval = (so->so_flags & SOF_NOSIGPIPE); + goto integer; case SO_NOADDRERR: - optval = (so->so_flags & SOF_NOADDRAVAIL); - goto integer; + optval = (so->so_flags & SOF_NOADDRAVAIL); + goto integer; default: error = ENOPROTOOPT; break; } + socket_unlock(so, 1); return (error); } } -#ifdef __APPLE__ -/* - * Network filter support - */ -/* Run the list of filters, creating extension control blocks */ -sfilter_init(register struct socket *so) -{ struct kextcb *kp, **kpp; - struct protosw *prp; - struct NFDescriptor *nfp; - - prp = so->so_proto; - nfp = prp->pr_sfilter.tqh_first; /* non-null */ - kpp = &so->so_ext; - kp = NULL; - while (nfp) - { MALLOC(kp, struct kextcb *, sizeof(*kp), - M_TEMP, M_WAITOK); - if (kp == NULL) - return(ENOBUFS); /* so_free will clean up */ - *kpp = kp; - kpp = &kp->e_next; - kp->e_next = NULL; - kp->e_fcb = NULL; - kp->e_nfd = nfp; - kp->e_soif = nfp->nf_soif; - kp->e_sout = nfp->nf_soutil; - /* - * Ignore return value for create - * Everyone gets a chance at startup - */ - if (kp->e_soif && kp->e_soif->sf_socreate) - (*kp->e_soif->sf_socreate)(so, prp, kp); - nfp = nfp->nf_next.tqe_next; - } - return(0); -} - -/* - * Run the list of filters, freeing extension control blocks - * Assumes the soif/soutil blocks have been handled. - */ -sfilter_term(struct socket *so) -{ struct kextcb *kp, *kp1; - - kp = so->so_ext; - while (kp) - { kp1 = kp->e_next; - /* - * Ignore return code on termination; everyone must - * get terminated. - */ - if (kp->e_soif && kp->e_soif->sf_sofree) - kp->e_soif->sf_sofree(so, kp); - FREE(kp, M_TEMP); - kp = kp1; - } - return(0); -} -#endif __APPLE__ - /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ int soopt_getm(struct sockopt *sopt, struct mbuf **mp) @@ -2366,22 +2568,21 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; - if (sopt->sopt_val == NULL) + if (sopt->sopt_val == USER_ADDR_NULL) return 0; while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_p != NULL) { int error; - error = copyin(sopt->sopt_val, mtod(m, char *), - m->m_len); + error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); if (error != 0) { m_freem(m0); return(error); } } else - bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len); sopt->sopt_valsize -= m->m_len; - (caddr_t)sopt->sopt_val += m->m_len; + sopt->sopt_val += m->m_len; m = m->m_next; } if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ @@ -2396,22 +2597,21 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) struct mbuf *m0 = m; size_t valsize = 0; - if (sopt->sopt_val == NULL) + if (sopt->sopt_val == USER_ADDR_NULL) return 0; while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_p != NULL) { int error; - error = copyout(mtod(m, char *), sopt->sopt_val, - m->m_len); + error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); if (error != 0) { m_freem(m0); return(error); } } else - bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); + bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len); sopt->sopt_valsize -= m->m_len; - (caddr_t)sopt->sopt_val += m->m_len; + sopt->sopt_val += m->m_len; valsize += m->m_len; m = m->m_next; } @@ -2429,16 +2629,7 @@ sohasoutofband(so) register struct socket *so; { struct proc *p; - struct kextcb *kp; - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sohasoutofband) { - if ((*kp->e_soif->sf_sohasoutofband)(so, kp)) - return; - } - kp = kp->e_next; - } if (so->so_pgid < 0) gsignal(-so->so_pgid, SIGURG); else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) @@ -2447,11 +2638,12 @@ sohasoutofband(so) } int -sopoll(struct socket *so, int events, struct ucred *cred, void * wql) +sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) { struct proc *p = current_proc(); int revents = 0; - int s = splnet(); + + socket_lock(so, 1); if (events & (POLLIN | POLLRDNORM)) if (soreadable(so)) @@ -2479,17 +2671,18 @@ sopoll(struct socket *so, int events, struct ucred *cred, void * wql) } } - splx(s); + socket_unlock(so, 1); return (revents); } +int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); int -soo_kqfilter(struct file *fp, struct knote *kn, struct proc *p) +soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; struct sockbuf *sb; - int s; + socket_lock(so, 1); switch (kn->kn_filter) { case EVFILT_READ: @@ -2504,81 +2697,127 @@ soo_kqfilter(struct file *fp, struct knote *kn, struct proc *p) sb = &so->so_snd; break; default: + socket_unlock(so, 1); return (1); } - if (sb->sb_sel.si_flags & SI_INITED) - return (1); - - s = splnet(); if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn)) sb->sb_flags |= SB_KNOTE; - splx(s); + socket_unlock(so, 1); return (0); } static void filt_sordetach(struct knote *kn) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; - int s = splnet(); + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; - if (so->so_rcv.sb_flags & SB_KNOTE && - !(so->so_rcv.sb_sel.si_flags & SI_INITED)) + socket_lock(so, 1); + if (so->so_rcv.sb_flags & SB_KNOTE) if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) so->so_rcv.sb_flags &= ~SB_KNOTE; - splx(s); + socket_unlock(so, 1); } /*ARGSUSED*/ static int filt_soread(struct knote *kn, long hint) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; - kn->kn_data = so->so_rcv.sb_cc; - if (so->so_state & SS_CANTRCVMORE) { - kn->kn_flags |= EV_EOF; - kn->kn_fflags = so->so_error; - return (1); + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_lock(so, 1); + + if (so->so_oobmark) { + if (kn->kn_flags & EV_OOBAND) { + kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark; + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + return (1); + } + kn->kn_data = so->so_oobmark; + kn->kn_flags |= EV_OOBAND; + } else { + kn->kn_data = so->so_rcv.sb_cc; + if (so->so_state & SS_CANTRCVMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + return (1); + } } - if (so->so_error) /* temporary udp error */ + + if (so->so_state & SS_RCVATMARK) { + if (kn->kn_flags & EV_OOBAND) { + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + return (1); + } + kn->kn_flags |= EV_OOBAND; + } else if (kn->kn_flags & EV_OOBAND) { + kn->kn_data = 0; + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + return (0); + } + + if (so->so_error) { /* temporary udp error */ + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); return (1); - if (kn->kn_sfflags & NOTE_LOWAT) - return (kn->kn_data >= kn->kn_sdata); - return (kn->kn_data >= so->so_rcv.sb_lowat); + } + + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + + return( kn->kn_flags & EV_OOBAND || + kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? + kn->kn_sdata : so->so_rcv.sb_lowat)); } static void filt_sowdetach(struct knote *kn) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; - int s = splnet(); + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + socket_lock(so, 1); - if(so->so_snd.sb_flags & SB_KNOTE && - !(so->so_snd.sb_sel.si_flags & SI_INITED)) + if(so->so_snd.sb_flags & SB_KNOTE) if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) so->so_snd.sb_flags &= ~SB_KNOTE; - splx(s); + socket_unlock(so, 1); } /*ARGSUSED*/ static int filt_sowrite(struct knote *kn, long hint) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_lock(so, 1); kn->kn_data = sbspace(&so->so_snd); if (so->so_state & SS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); return (1); } - if (so->so_error) /* temporary udp error */ + if (so->so_error) { /* temporary udp error */ + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); return (1); + } if (((so->so_state & SS_ISCONNECTED) == 0) && - (so->so_proto->pr_flags & PR_CONNREQUIRED)) + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); return (0); + } + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); if (kn->kn_sfflags & NOTE_LOWAT) return (kn->kn_data >= kn->kn_sdata); return (kn->kn_data >= so->so_snd.sb_lowat); @@ -2588,9 +2827,123 @@ filt_sowrite(struct knote *kn, long hint) static int filt_solisten(struct knote *kn, long hint) { - struct socket *so = (struct socket *)kn->kn_fp->f_data; + struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + int isempty; + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_lock(so, 1); kn->kn_data = so->so_qlen; - return (! TAILQ_EMPTY(&so->so_comp)); + isempty = ! TAILQ_EMPTY(&so->so_comp); + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + return (isempty); } + +int +socket_lock(so, refcount) + struct socket *so; + int refcount; +{ + int error = 0, lr, lr_saved; +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + + if (so->so_proto->pr_lock) { + error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); + } + else { +#ifdef MORE_LOCKING_DEBUG + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); +#endif + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + if (refcount) + so->so_usecount++; + so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */ + } + + return(error); + +} + +int +socket_unlock(so, refcount) + struct socket *so; + int refcount; +{ + int error = 0, lr, lr_saved; + lck_mtx_t * mutex_held; + +#ifdef __ppc__ +__asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + + + + if (so->so_proto == NULL) + panic("socket_unlock null so_proto so=%x\n", so); + + if (so && so->so_proto->pr_unlock) + error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved); + else { + mutex_held = so->so_proto->pr_domain->dom_mtx; +#ifdef MORE_LOCKING_DEBUG + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +#endif + if (refcount) { + if (so->so_usecount <= 0) + panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount); + so->so_usecount--; + if (so->so_usecount == 0) { + sofreelastref(so, 1); + } + else + so->reserved4 = (void*)lr_saved; /* save caller */ + } + lck_mtx_unlock(mutex_held); + } + + return(error); +} +//### Called with socket locked, will unlock socket +void +sofree(so) + struct socket *so; +{ + + int lr, lr_saved; + lck_mtx_t * mutex_held; +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + /* Remove the filters */ + sflt_termsock(so); + + sofreelastref(so, 0); +} + +void +soreference(so) + struct socket *so; +{ + socket_lock(so, 1); /* locks & take one reference on socket */ + socket_unlock(so, 0); /* unlock only */ +} + +void +sodereference(so) + struct socket *so; +{ + socket_lock(so, 0); + socket_unlock(so, 1); +} diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index 175824a8a..2fb59d20f 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -61,7 +61,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -71,7 +72,10 @@ #include #include #include - +#include +#include +#include +#include #include #define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) @@ -115,7 +119,6 @@ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ * the kernel, the wakeups done here will sometimes * cause software-interrupt process scheduling. */ - void soisconnecting(so) register struct socket *so; @@ -123,6 +126,8 @@ soisconnecting(so) so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; + + sflt_notify(so, sock_evt_connecting, NULL); } void @@ -130,30 +135,27 @@ soisconnected(so) struct socket *so; { struct socket *head = so->so_head; - struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soisconnected) { - if ((*kp->e_soif->sf_soisconnected)(so, kp)) - return; - } - kp = kp->e_next; - } so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; + + sflt_notify(so, sock_evt_connected, NULL); + if (head && (so->so_state & SS_INCOMP)) { - postevent(head,0,EV_RCONN); + if (head->so_proto->pr_getlock != NULL) + socket_lock(head, 1); + postevent(head, 0, EV_RCONN); TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; so->so_state &= ~SS_INCOMP; TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; sorwakeup(head); - wakeup_one(&head->so_timeo); + wakeup_one((caddr_t)&head->so_timeo); + if (head->so_proto->pr_getlock != NULL) + socket_unlock(head, 1); } else { - postevent(so,0,EV_WCONN); + postevent(so, 0, EV_WCONN); wakeup((caddr_t)&so->so_timeo); sorwakeup(so); sowwakeup(so); @@ -164,19 +166,9 @@ void soisdisconnecting(so) register struct socket *so; { - register struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soisdisconnecting) { - if ((*kp->e_soif->sf_soisdisconnecting)(so, kp)) - return; - } - kp = kp->e_next; - } - so->so_state &= ~SS_ISCONNECTING; so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); + sflt_notify(so, sock_evt_disconnecting, NULL); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); @@ -186,19 +178,9 @@ void soisdisconnected(so) register struct socket *so; { - register struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soisdisconnected) { - if ((*kp->e_soif->sf_soisdisconnected)(so, kp)) - return; - } - kp = kp->e_next; - } - so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); + sflt_notify(so, sock_evt_disconnected, NULL); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); @@ -218,7 +200,7 @@ struct socket * sodropablereq(head) register struct socket *head; { - register struct socket *so; + struct socket *so, *sonext = NULL; unsigned int i, j, qlen; static int rnd; static struct timeval old_runtime; @@ -234,18 +216,27 @@ sodropablereq(head) so = TAILQ_FIRST(&head->so_incomp); if (!so) - return (so); + return (NULL); qlen = head->so_incqlen; if (++cur_cnt > qlen || old_cnt > qlen) { rnd = (314159 * rnd + 66329) & 0xffff; j = ((qlen + 1) * rnd) >> 16; - - while (j-- && so) - so = TAILQ_NEXT(so, so_list); +//###LD To clean up + while (j-- && so) { +// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + socket_lock(so, 1); + sonext = TAILQ_NEXT(so, so_list); +// in_pcb_check_state(so->so_pcb, WNT_RELEASE, 0); + socket_unlock(so, 1); + so = sonext; + } } - return (so); +// if (in_pcb_checkstate(so->so_pcb, WNT_ACQUIRE, 0) == WNT_STOPUSING) +// return (NULL); +// else + return (so); } /* @@ -256,14 +247,20 @@ sodropablereq(head) * data structure of the original socket, and return this. * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. */ -struct socket * -sonewconn(head, connstatus) +static struct socket * +sonewconn_internal(head, connstatus) register struct socket *head; int connstatus; { int error = 0; register struct socket *so; - register struct kextcb *kp; + lck_mtx_t *mutex_held; + + if (head->so_proto->pr_getlock != NULL) + mutex_held = (*head->so_proto->pr_getlock)(head, 0); + else + mutex_held = head->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); if (head->so_qlen > 3 * head->so_qlimit / 2) return ((struct socket *)0); @@ -285,36 +282,25 @@ sonewconn(head, connstatus) so->so_timeo = head->so_timeo; so->so_pgid = head->so_pgid; so->so_uid = head->so_uid; + so->so_usecount = 1; - /* Attach socket filters for this protocol */ - if (so->so_proto->pr_sfilter.tqh_first) - error = sfilter_init(so); - if (error != 0) { + if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { + sflt_termsock(so); sodealloc(so); return ((struct socket *)0); } - /* Call socket filters' sonewconn1 function if set */ - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_sonewconn) { - error = (int)(*kp->e_soif->sf_sonewconn)(so, connstatus, kp); - if (error == EJUSTRETURN) { - return so; - } else if (error != 0) { - sodealloc(so); - return NULL; - } - } - kp = kp->e_next; - } - - if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || - (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { - sfilter_term(so); + /* + * Must be done with head unlocked to avoid deadlock with pcb list + */ + socket_unlock(head, 0); + if (((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL) != 0) || error) { + sflt_termsock(so); sodealloc(so); + socket_lock(head, 0); return ((struct socket *)0); } + socket_lock(head, 0); #ifdef __APPLE__ so->so_proto->pr_domain->dom_refs++; #endif @@ -328,18 +314,57 @@ sonewconn(head, connstatus) head->so_incqlen++; } head->so_qlen++; - if (connstatus) { - sorwakeup(head); - wakeup((caddr_t)&head->so_timeo); - so->so_state |= connstatus; - } #ifdef __APPLE__ so->so_rcv.sb_so = so->so_snd.sb_so = so; TAILQ_INIT(&so->so_evlist); + + /* Attach socket filters for this protocol */ + sflt_initsock(so); #endif + if (connstatus) { + so->so_state |= connstatus; + sorwakeup(head); + wakeup((caddr_t)&head->so_timeo); + } return (so); } + +struct socket * +sonewconn( + struct socket *head, + int connstatus, + const struct sockaddr *from) +{ + int error = 0; + struct socket_filter_entry *filter; + int filtered = 0; + + error = 0; + for (filter = head->so_filt; filter && (error == 0); + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_connect_in) { + if (filtered == 0) { + filtered = 1; + sflt_use(head); + socket_unlock(head, 0); + } + error = filter->sfe_filter->sf_filter.sf_connect_in( + filter->sfe_cookie, head, from); + } + } + if (filtered != 0) { + socket_lock(head, 0); + sflt_unuse(head); + } + + if (error) { + return NULL; + } + + return sonewconn_internal(head, connstatus); +} + /* * Socantsendmore indicates that no more data will be sent on the * socket; it would normally be applied to a socket when the user @@ -354,19 +379,8 @@ void socantsendmore(so) struct socket *so; { - register struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_socantsendmore) { - if ((*kp->e_soif->sf_socantsendmore)(so, kp)) - return; - } - kp = kp->e_next; - } - - so->so_state |= SS_CANTSENDMORE; + sflt_notify(so, sock_evt_cantsendmore, NULL); sowwakeup(so); } @@ -374,19 +388,8 @@ void socantrcvmore(so) struct socket *so; { - register struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_socantrcvmore) { - if ((*kp->e_soif->sf_socantrcvmore)(so, kp)) - return; - } - kp = kp->e_next; - } - - so->so_state |= SS_CANTRCVMORE; + sflt_notify(so, sock_evt_cantrecvmore, NULL); sorwakeup(so); } @@ -397,11 +400,42 @@ int sbwait(sb) struct sockbuf *sb; { + int error = 0, lr, lr_saved; + struct socket *so = sb->sb_so; + lck_mtx_t *mutex_held; + struct timespec ts; + +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; sb->sb_flags |= SB_WAIT; - return (tsleep((caddr_t)&sb->sb_cc, - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", - sb->sb_timeo)); + + if (so->so_usecount < 1) + panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount); + ts.tv_sec = sb->sb_timeo.tv_sec; + ts.tv_nsec = sb->sb_timeo.tv_usec * 1000; + error = msleep((caddr_t)&sb->sb_cc, mutex_held, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", + &ts); + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + if (so->so_usecount < 1) + panic("sbwait: so=%x refcount=%d\n", so, so->so_usecount); + + if ((so->so_state & SS_DRAINING)) { + error = EBADF; + } + + return (error); } /* @@ -412,14 +446,31 @@ int sb_lock(sb) register struct sockbuf *sb; { - int error; + struct socket *so = sb->sb_so; + lck_mtx_t * mutex_held; + int error = 0, lr, lr_saved; + +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + + if (so == NULL) + panic("sb_lock: null so back pointer sb=%x\n", sb); while (sb->sb_flags & SB_LOCK) { sb->sb_flags |= SB_WANT; - error = tsleep((caddr_t)&sb->sb_flags, - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, - "sblock", 0); - if (error) + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + if (so->so_usecount < 1) + panic("sb_lock: so=%x refcount=%d\n", so, so->so_usecount); + error = msleep((caddr_t)&sb->sb_flags, mutex_held, + (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sblock", 0); + if (so->so_usecount < 1) + panic("sb_lock: 2 so=%x refcount=%d\n", so, so->so_usecount); + if (error) return (error); } sb->sb_flags |= SB_LOCK; @@ -437,8 +488,6 @@ sowakeup(so, sb) register struct sockbuf *sb; { struct proc *p = current_proc(); - /* We clear the flag before calling selwakeup. */ - /* BSD calls selwakeup then sets the flag */ sb->sb_flags &= ~SB_SEL; selwakeup(&sb->sb_sel); if (sb->sb_flags & SB_WAIT) { @@ -451,11 +500,14 @@ sowakeup(so, sb) else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) psignal(p, SIGIO); } - if (sb->sb_flags & SB_UPCALL) + if (sb->sb_flags & SB_KNOTE) { + KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED); + } + if (sb->sb_flags & SB_UPCALL) { + socket_unlock(so, 0); (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); - if (sb->sb_flags & SB_KNOTE && - !(sb->sb_sel.si_flags & SI_INITED)) - KNOTE(&sb->sb_sel.si_note, 0); + socket_lock(so, 0); + } } /* @@ -495,16 +547,6 @@ soreserve(so, sndcc, rcvcc) register struct socket *so; u_long sndcc, rcvcc; { - register struct kextcb *kp; - - kp = sotokextcb(so); - while (kp) { - if (kp->e_soif && kp->e_soif->sf_soreserve) { - if ((*kp->e_soif->sf_soreserve)(so, sndcc, rcvcc, kp)) - return; - } - kp = kp->e_next; - } if (sbreserve(&so->so_snd, sndcc) == 0) goto bad; @@ -591,44 +633,55 @@ sbrelease(sb) * the mbuf chain is recorded in sb. Empty mbufs are * discarded and mbufs are compacted where possible. */ -void +int sbappend(sb, m) struct sockbuf *sb; struct mbuf *m; { - struct kextcb *kp; - register struct mbuf *n; + register struct mbuf *n, *sb_first; + int result = 0; + int error = 0; KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0); if (m == 0) - return; - kp = sotokextcb(sbtoso(sb)); - while (kp) { - if (kp->e_sout && kp->e_sout->su_sbappend) { - if ((*kp->e_sout->su_sbappend)(sb, m, kp)) { - KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, kp, 0, 0); - return; - } - } - kp = kp->e_next; - } - n = sb->sb_mb; + return 0; + sb_first = n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { - sbappendrecord(sb, m); /* XXXXXX!!!! */ + result = sbappendrecord(sb, m); /* XXXXXX!!!! */ KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0); - return; + return result; } } while (n->m_next && (n = n->m_next)); } - sbcompress(sb, m, n); + + if ((sb->sb_flags & SB_RECV) != 0) { + error = sflt_data_in(sb->sb_so, NULL, &m, NULL, 0); + if (error) { + /* no data was appended, caller should not call sowakeup */ + return 0; + } + } + + /* 3962537 - sflt_data_in may drop the lock, need to validate state again */ + if (sb_first != sb->sb_mb) { + n = sb->sb_mb; + if (n) { + while (n->m_nextpkt) + n = n->m_nextpkt; + } + } + + result = sbcompress(sb, m, n); KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0); + + return result; } #ifdef SOCKBUF_DEBUG @@ -639,6 +692,17 @@ sbcheck(sb) register struct mbuf *m; register struct mbuf *n = 0; register u_long len = 0, mbcnt = 0; + lck_mtx_t *mutex_held; + + if (sb->sb_so->so_proto->pr_getlock != NULL) + mutex_held = (*sb->sb_so->so_proto->pr_getlock)(sb->sb_so, 0); + else + mutex_held = sb->sb_so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + if (sbchecking == 0) + return; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; @@ -649,18 +713,10 @@ sbcheck(sb) mbcnt += m->m_ext.ext_size; } } -#ifndef __APPLE__ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { - printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, + panic("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); - panic("sbcheck"); } -#else - if (len != sb->sb_cc) - printf("sbcheck len %ld != sb_cc %ld\n", len, sb->sb_cc); - if (mbcnt != sb->sb_mbcnt) - printf("sbcheck mbcnt %ld != sb_mbcnt %ld\n", mbcnt, sb->sb_mbcnt); -#endif } #endif @@ -668,24 +724,24 @@ sbcheck(sb) * As above, except the mbuf chain * begins a new record. */ -void +int sbappendrecord(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; - register struct kextcb *kp; + int result = 0; if (m0 == 0) - return; - - kp = sotokextcb(sbtoso(sb)); - while (kp) - { if (kp->e_sout && kp->e_sout->su_sbappendrecord) - { if ((*kp->e_sout->su_sbappendrecord)(sb, m0, kp)) - return; + return 0; + + if ((sb->sb_flags & SB_RECV) != 0) { + int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, sock_data_filt_flag_record); + if (error != 0) { + if (error != EJUSTRETURN) + m_freem(m0); + return 0; } - kp = kp->e_next; } m = sb->sb_mb; @@ -707,7 +763,7 @@ sbappendrecord(sb, m0) m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } - sbcompress(sb, m, m0); + return sbcompress(sb, m, m0); } /* @@ -715,25 +771,27 @@ sbappendrecord(sb, m0) * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ -void +int sbinsertoob(sb, m0) - register struct sockbuf *sb; - register struct mbuf *m0; + struct sockbuf *sb; + struct mbuf *m0; { - register struct mbuf *m; - register struct mbuf **mp; - register struct kextcb *kp; + struct mbuf *m; + struct mbuf **mp; if (m0 == 0) - return; - - kp = sotokextcb(sbtoso(sb)); - while (kp) - { if (kp->e_sout && kp->e_sout->su_sbinsertoob) - { if ((*kp->e_sout->su_sbinsertoob)(sb, m0, kp)) - return; + return 0; + + if ((sb->sb_flags & SB_RECV) != 0) { + int error = sflt_data_in(sb->sb_so, NULL, &m0, NULL, + sock_data_filt_flag_oob); + + if (error) { + if (error != EJUSTRETURN) { + m_freem(m0); + } + return 0; } - kp = kp->e_next; } for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { @@ -764,7 +822,7 @@ sbinsertoob(sb, m0) m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } - sbcompress(sb, m, m0); + return sbcompress(sb, m, m0); } /* @@ -773,28 +831,18 @@ sbinsertoob(sb, m0) * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ -int -sbappendaddr(sb, asa, m0, control) +static int +sbappendaddr_internal(sb, asa, m0, control) register struct sockbuf *sb; struct sockaddr *asa; struct mbuf *m0, *control; { register struct mbuf *m, *n; int space = asa->sa_len; - register struct kextcb *kp; if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr"); - kp = sotokextcb(sbtoso(sb)); - while (kp) - { if (kp->e_sout && kp->e_sout->su_sbappendaddr) - { if ((*kp->e_sout->su_sbappendaddr)(sb, asa, m0, control, kp)) - return 0; - } - kp = kp->e_next; - } - if (m0) space += m0->m_pkthdr.len; for (n = control; n; n = n->m_next) { @@ -830,26 +878,55 @@ sbappendaddr(sb, asa, m0, control) } int -sbappendcontrol(sb, m0, control) +sbappendaddr( + struct sockbuf* sb, + struct sockaddr* asa, + struct mbuf *m0, + struct mbuf *control, + int *error_out) +{ + int result = 0; + + if (error_out) *error_out = 0; + + if (m0 && (m0->m_flags & M_PKTHDR) == 0) + panic("sbappendaddrorfree"); + + /* Call socket data in filters */ + if ((sb->sb_flags & SB_RECV) != 0) { + int error; + error = sflt_data_in(sb->sb_so, asa, &m0, &control, 0); + if (error) { + if (error != EJUSTRETURN) { + if (m0) m_freem(m0); + if (control) m_freem(control); + if (error_out) *error_out = error; + } + return 0; + } + } + + result = sbappendaddr_internal(sb, asa, m0, control); + if (result == 0) { + if (m0) m_freem(m0); + if (control) m_freem(control); + if (error_out) *error_out = ENOBUFS; + } + + return result; +} + +static int +sbappendcontrol_internal(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { register struct mbuf *m, *n; int space = 0; - register struct kextcb *kp; if (control == 0) panic("sbappendcontrol"); - kp = sotokextcb(sbtoso(sb)); - while (kp) - { if (kp->e_sout && kp->e_sout->su_sbappendcontrol) - { if ((*kp->e_sout->su_sbappendcontrol)(sb, m0, control, kp)) - return 0; - } - kp = kp->e_next; - } - for (m = control; ; m = m->m_next) { space += m->m_len; if (m->m_next == 0) @@ -874,12 +951,46 @@ sbappendcontrol(sb, m0, control) return (1); } +int +sbappendcontrol( + struct sockbuf *sb, + struct mbuf *m0, + struct mbuf *control, + int *error_out) +{ + int result = 0; + + if (error_out) *error_out = 0; + + if (sb->sb_flags & SB_RECV) { + int error; + error = sflt_data_in(sb->sb_so, NULL, &m0, &control, 0); + if (error) { + if (error != EJUSTRETURN) { + if (m0) m_freem(m0); + if (control) m_freem(control); + if (error_out) *error_out = error; + } + return 0; + } + } + + result = sbappendcontrol_internal(sb, m0, control); + if (result == 0) { + if (m0) m_freem(m0); + if (control) m_freem(control); + if (error_out) *error_out = ENOBUFS; + } + + return result; +} + /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n * is null, the buffer is presumed empty. */ -void +static int sbcompress(sb, m, n) register struct sockbuf *sb; register struct mbuf *m, *n; @@ -927,6 +1038,7 @@ sbcompress(sb, m, n) printf("semi-panic: sbcompress\n"); } postevent(0,sb, EV_RWBYTES); + return 1; } /* @@ -937,17 +1049,8 @@ void sbflush(sb) register struct sockbuf *sb; { - register struct kextcb *kp; - - kp = sotokextcb(sbtoso(sb)); - while (kp) { - if (kp->e_sout && kp->e_sout->su_sbflush) { - if ((*kp->e_sout->su_sbflush)(sb, kp)) - return; - } - kp = kp->e_next; - } - + if (sb->sb_so == NULL) + panic ("sbflush sb->sb_so already null sb=%x\n", sb); (void)sblock(sb, M_WAIT); while (sb->sb_mbcnt) { /* @@ -958,12 +1061,12 @@ sbflush(sb) break; sbdrop(sb, (int)sb->sb_cc); } - if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) - panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); - - sbunlock(sb); + if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt || sb->sb_so == NULL) + panic("sbflush: cc %ld || mb %p || mbcnt %ld sb_so=%x", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt, sb->sb_so); postevent(0, sb, EV_RWBYTES); + sbunlock(sb, 1); /* keep socket locked */ + } /* @@ -984,20 +1087,9 @@ sbdrop(sb, len) { register struct mbuf *m, *free_list, *ml; struct mbuf *next, *last; - register struct kextcb *kp; KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0); - kp = sotokextcb(sbtoso(sb)); - while (kp) { - if (kp->e_sout && kp->e_sout->su_sbdrop) { - if ((*kp->e_sout->su_sbdrop)(sb, len, kp)) { - KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, len, kp, 0, 0); - return; - } - } - kp = kp->e_next; - } next = (m = sb->sb_mb) ? m->m_nextpkt : 0; free_list = last = m; ml = (struct mbuf *)0; @@ -1065,16 +1157,6 @@ sbdroprecord(sb) register struct sockbuf *sb; { register struct mbuf *m, *mn; - register struct kextcb *kp; - - kp = sotokextcb(sbtoso(sb)); - while (kp) { - if (kp->e_sout && kp->e_sout->su_sbdroprecord) { - if ((*kp->e_sout->su_sbdroprecord)(sb, kp)) - return; - } - kp = kp->e_next; - } m = sb->sb_mb; if (m) { @@ -1266,8 +1348,9 @@ int pru_soreceive(struct socket *so, } -int pru_sopoll_notsupp(struct socket *so, int events, - struct ucred *cred) +int +pru_sopoll_notsupp(__unused struct socket *so, __unused int events, + __unused kauth_cred_t cred, __unused void *wql) { return EOPNOTSUPP; } @@ -1365,13 +1448,40 @@ sblock(struct sockbuf *sb, int wf) /* release lock on sockbuf sb */ void -sbunlock(struct sockbuf *sb) +sbunlock(struct sockbuf *sb, int keeplocked) { + struct socket *so = sb->sb_so; + int lr, lr_saved; + lck_mtx_t *mutex_held; + +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif sb->sb_flags &= ~SB_LOCK; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + if (keeplocked == 0) + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + if (sb->sb_flags & SB_WANT) { sb->sb_flags &= ~SB_WANT; + if (so->so_usecount < 0) + panic("sbunlock: b4 wakeup so=%x ref=%d lr=%x sb_flags=%x\n", sb->sb_so, so->so_usecount, lr_saved, sb->sb_flags); + wakeup((caddr_t)&(sb)->sb_flags); } + if (keeplocked == 0) { /* unlock on exit */ + so->so_usecount--; + if (so->so_usecount < 0) + panic("sbunlock: unlock on exit so=%x lr=%x sb_flags=%x\n", so, so->so_usecount,lr_saved, sb->sb_flags); + so->reserved4= lr_saved; + lck_mtx_unlock(mutex_held); + } } void @@ -1424,8 +1534,12 @@ sotoxsocket(struct socket *so, struct xsocket *xso) xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; - xso->xso_protocol = so->so_proto->pr_protocol; - xso->xso_family = so->so_proto->pr_domain->dom_family; + if (so->so_proto) { + xso->xso_protocol = so->so_proto->pr_protocol; + xso->xso_family = so->so_proto->pr_domain->dom_family; + } + else + xso->xso_protocol = xso->xso_family = 0; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; @@ -1453,7 +1567,9 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; - xsb->sb_timeo = sb->sb_timeo; + xsb->sb_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; + if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) + xsb->sb_timeo = 1; } /* diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 5f35cd283..6fba04eae 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -62,23 +62,34 @@ #include #include #include -#include -#include -#include +#include +#include #include #include +#include +#include #include +#include #include #include #if KTRACE #include #endif #include +#include #include #include - +#include + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data #if KDEBUG #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) @@ -94,35 +105,13 @@ #endif -struct getsockname_args { - int fdes; - caddr_t asa; - socklen_t *alen; -}; -struct getsockopt_args { - int s; - int level; - int name; - caddr_t val; - socklen_t *avalsize; -} ; - -struct accept_args { - int s; - caddr_t name; - socklen_t *anamelen; -}; - -struct getpeername_args { - int fdes; - caddr_t asa; - socklen_t *alen; -}; +#define HACK_FOR_4056224 1 +#if HACK_FOR_4056224 +static pid_t last_pid_4056224 = 0; +#endif /* HACK_FOR_4056224 */ -/* ARGSUSED */ - #if SENDFILE static void sf_buf_init(void *arg); SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) @@ -136,187 +125,225 @@ static struct sf_buf *sf_bufs; static int sf_buf_alloc_want; #endif -static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags, register_t *retval)); -static int recvit __P((struct proc *p, int s, struct msghdr *mp, - caddr_t namelenp, register_t *retval)); +static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, + int flags, register_t *retval); +static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, + user_addr_t namelenp, register_t *retval); -static int accept1 __P((struct proc *p, struct accept_args *uap, register_t *retval, int compat)); -static int getsockname1 __P((struct proc *p, struct getsockname_args *uap, - register_t *retval, int compat)); -static int getpeername1 __P((struct proc *p, struct getpeername_args *uap, - register_t *retval, int compat)); +static int accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat); +static int getsockname1(struct proc *p, struct getsockname_args *uap, + register_t *retval, int compat); +static int getpeername1(struct proc *p, struct getpeername_args *uap, + register_t *retval, int compat); + + +#if COMPAT_43_SOCKET +struct orecvmsg_args { + int s; + struct omsghdr *msg; + int flags; +}; +struct osendmsg_args { + int s; + caddr_t msg; + int flags; +}; +struct osend_args { + int s; + caddr_t buf; + int len; + int flags; +}; +struct orecv_args { + int s; + caddr_t buf; + int len; + int flags; +}; + +int oaccept(struct proc *p, struct accept_args *uap, register_t *retval); +int ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval); +int ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval); +int orecv(struct proc *p, struct orecv_args *uap, register_t *retval); +int orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval); +int orecvmsg(struct proc *p, struct orecvmsg_args *uap, register_t *retval); +int osend(struct proc *p, struct osend_args *uap, register_t *retval); +int osendmsg(struct proc *p, struct osendmsg_args *uap, register_t *retval); +#endif // COMPAT_43_SOCKET /* * System call interface to the socket abstraction. */ -#if COMPAT_43 || defined(COMPAT_SUNOS) -#define COMPAT_OLDSOCK -#endif extern struct fileops socketops; -struct socket_args { - int domain; - int type; - int protocol; -}; int socket(p, uap, retval) struct proc *p; register struct socket_args *uap; register_t *retval; { - struct filedesc *fdp = p->p_fd; struct socket *so; - struct file *fp; + struct fileproc *fp; int fd, error; AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - error = falloc(p, &fp, &fd); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (error) + error = falloc(p, &fp, &fd); + if (error) { return (error); + } fp->f_flag = FREAD|FWRITE; fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - if (error = socreate(uap->domain, &so, uap->type, - uap->protocol)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - fdrelse(p, fd); - ffree(fp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + + error = socreate(uap->domain, &so, uap->type, uap->protocol); + if (error) { + fp_free(p, fd, fp); } else { fp->f_data = (caddr_t)so; + + proc_fdlock(p); *fdflags(p, fd) &= ~UF_RESERVED; + + fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + *retval = fd; } return (error); } -struct bind_args { - int s; - caddr_t name; - socklen_t namelen; -}; - /* ARGSUSED */ int -bind(p, uap, retval) - struct proc *p; - register struct bind_args *uap; - register_t *retval; +bind(struct proc *p, struct bind_args *uap, __unused register_t *retval) { - struct file *fp; struct sockaddr *sa; + struct socket *so; int error; AUDIT_ARG(fd, uap->s); - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) return (error); error = getsockaddr(&sa, uap->name, uap->namelen); - if (error) - return (error); + if (error) + goto out; AUDIT_ARG(sockaddr, p, sa); - if (fp->f_data != NULL) - error = sobind((struct socket *)fp->f_data, sa); + if (so != NULL) + error = sobind(so, sa); else error = EBADF; FREE(sa, M_SONAME); +out: + file_drop(uap->s); return (error); } -struct listen_args { - int s; - int backlog; -}; - - int -listen(p, uap, retval) - struct proc *p; - register struct listen_args *uap; - register_t *retval; +listen(__unused struct proc *p, register struct listen_args *uap, + __unused register_t *retval) { - struct file *fp; int error; + struct socket * so; AUDIT_ARG(fd, uap->s); - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) return (error); - if (fp->f_data != NULL) - return (solisten((struct socket *)fp->f_data, uap->backlog)); + if (so != NULL) + error = solisten(so, uap->backlog); else - return (EBADF); + error = EBADF; + file_drop(uap->s); + return (error); } -#ifndef COMPAT_OLDSOCK +#if !COMPAT_43_SOCKET #define accept1 accept #endif int -accept1(p, uap, retval, compat) - struct proc *p; - register struct accept_args *uap; - register_t *retval; - int compat; +accept1(struct proc *p, struct accept_args *uap, register_t *retval, int compat) { - struct file *fp; + struct fileproc *fp; struct sockaddr *sa; - u_int namelen; - int error, s; - struct socket *head, *so; - int fd; + socklen_t namelen; + int error; + struct socket *head, *so = NULL; + lck_mtx_t *mutex_held; + int fd = uap->s; + int newfd;; short fflag; /* type must match fp->f_flag */ - int tmpfd; + int dosocklock = 0; AUDIT_ARG(fd, uap->s); if (uap->name) { - error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen, - sizeof (namelen)); + error = copyin(uap->anamelen, (caddr_t)&namelen, + sizeof(socklen_t)); if(error) return (error); } - error = getsock(p->p_fd, uap->s, &fp); - if (error) + error = fp_getfsock(p, fd, &fp, &head); + if (error) { + if (error == EOPNOTSUPP) + error = ENOTSOCK; return (error); - s = splnet(); - head = (struct socket *)fp->f_data; + } if (head == NULL) { - splx(s); - return (EBADF); + error = EBADF; + goto out; } + + socket_lock(head, 1); + + if (head->so_proto->pr_getlock != NULL) { + mutex_held = (*head->so_proto->pr_getlock)(head, 0); + dosocklock = 1; + } + else { + mutex_held = head->so_proto->pr_domain->dom_mtx; + dosocklock = 0; + } + + if ((head->so_options & SO_ACCEPTCONN) == 0) { - splx(s); - return (EINVAL); + socket_unlock(head, 1); + error = EINVAL; + goto out; } if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) { - splx(s); - return (EWOULDBLOCK); + socket_unlock(head, 1); + error = EWOULDBLOCK; + goto out; } while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { head->so_error = ECONNABORTED; break; } - error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH, + if (head->so_usecount < 1) + panic("accept1: head=%x refcount=%d\n", head, head->so_usecount); + error = msleep((caddr_t)&head->so_timeo, mutex_held, PSOCK | PCATCH, "accept", 0); + if (head->so_usecount < 1) + panic("accept1: 2 head=%x refcount=%d\n", head, head->so_usecount); + if ((head->so_state & SS_DRAINING)) { + error = ECONNABORTED; + } if (error) { - splx(s); - return (error); + socket_unlock(head, 1); + goto out; } } if (head->so_error) { error = head->so_error; head->so_error = 0; - splx(s); - return (error); + socket_unlock(head, 1); + goto out; } @@ -327,14 +354,14 @@ accept1(p, uap, retval, compat) * block allowing another process to accept the connection * instead. */ + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); so = TAILQ_FIRST(&head->so_comp); TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; - + socket_unlock(head, 0); /* unlock head to avoid deadlock with select, keep a ref on head */ fflag = fp->f_flag; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - error = falloc(p, &fp, &fd); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + proc_fdlock(p); + error = falloc_locked(p, &fp, &newfd, 1); if (error) { /* * Probably ran out of file descriptors. Put the @@ -342,114 +369,130 @@ accept1(p, uap, retval, compat) * do another wakeup so some other process might * have a chance at it. */ + proc_fdunlock(p); + socket_lock(head, 0); TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); head->so_qlen++; - wakeup_one(&head->so_timeo); - splx(s); - return (error); - } else { - *fdflags(p, fd) &= ~UF_RESERVED; - *retval = fd; - } - - so->so_state &= ~SS_COMP; - so->so_head = NULL; + wakeup_one((caddr_t)&head->so_timeo); + socket_unlock(head, 1); + goto out; + } + *fdflags(p, newfd) &= ~UF_RESERVED; + *retval = newfd; fp->f_type = DTYPE_SOCKET; fp->f_flag = fflag; fp->f_ops = &socketops; fp->f_data = (caddr_t)so; + fp_drop(p, newfd, fp, 1); + proc_fdunlock(p); + socket_lock(head, 0); + if (dosocklock) + socket_lock(so, 1); + so->so_state &= ~SS_COMP; + so->so_head = NULL; sa = 0; - (void) soaccept(so, &sa); + (void) soacceptlock(so, &sa, 0); + socket_unlock(head, 1); if (sa == 0) { namelen = 0; if (uap->name) goto gotnoname; - return 0; + if (dosocklock) + socket_unlock(so, 1); + error = 0; + goto out; } AUDIT_ARG(sockaddr, p, sa); if (uap->name) { /* check sa_len before it is destroyed */ if (namelen > sa->sa_len) namelen = sa->sa_len; -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (compat) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif - error = copyout(sa, (caddr_t)uap->name, (u_int)namelen); + error = copyout(sa, uap->name, namelen); if (!error) gotnoname: - error = copyout((caddr_t)&namelen, - (caddr_t)uap->anamelen, sizeof (*uap->anamelen)); + error = copyout((caddr_t)&namelen, uap->anamelen, + sizeof(socklen_t)); } FREE(sa, M_SONAME); - splx(s); + if (dosocklock) + socket_unlock(so, 1); +out: + file_drop(fd); return (error); } int -accept(p, uap, retval) - struct proc *p; - struct accept_args *uap; - register_t *retval; +accept(struct proc *p, struct accept_args *uap, register_t *retval) { return (accept1(p, uap, retval, 0)); } -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET int -oaccept(p, uap, retval) - struct proc *p; - struct accept_args *uap; - register_t *retval; +oaccept(struct proc *p, struct accept_args *uap, register_t *retval) { return (accept1(p, uap, retval, 1)); } -#endif /* COMPAT_OLDSOCK */ +#endif /* COMPAT_43_SOCKET */ -struct connect_args { - int s; - caddr_t name; - socklen_t namelen; -}; /* ARGSUSED */ int -connect(p, uap, retval) - struct proc *p; - register struct connect_args *uap; - register_t *retval; +connect(struct proc *p, struct connect_args *uap, __unused register_t *retval) { - struct file *fp; - register struct socket *so; + struct socket *so; struct sockaddr *sa; - int error, s; + lck_mtx_t *mutex_held; + int error; + int fd = uap->s; AUDIT_ARG(fd, uap->s); - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket( fd, &so); if (error) return (error); - so = (struct socket *)fp->f_data; - if (so == NULL) - return (EBADF); - if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) - return (EALREADY); + if (so == NULL) { + error = EBADF; + goto out; + } + + socket_lock(so, 1); + + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + socket_unlock(so, 1); + error = EALREADY; + goto out; + } error = getsockaddr(&sa, uap->name, uap->namelen); - if (error) - return (error); + if (error) { + socket_unlock(so, 1); + goto out; + } AUDIT_ARG(sockaddr, p, sa); - error = soconnect(so, sa); + error = soconnectlock(so, sa, 0); if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { FREE(sa, M_SONAME); - return (EINPROGRESS); + socket_unlock(so, 1); + error = EINPROGRESS; + goto out; } - s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + error = msleep((caddr_t)&so->so_timeo, mutex_held, PSOCK | PCATCH, "connec", 0); + if ((so->so_state & SS_DRAINING)) { + error = ECONNABORTED; + } if (error) break; } @@ -457,29 +500,21 @@ connect(p, uap, retval) error = so->so_error; so->so_error = 0; } - splx(s); bad: so->so_state &= ~SS_ISCONNECTING; + socket_unlock(so, 1); FREE(sa, M_SONAME); if (error == ERESTART) error = EINTR; +out: + file_drop(fd); return (error); } -struct socketpair_args { - int domain; - int type; - int protocol; - int *rsv; -}; int -socketpair(p, uap, retval) - struct proc *p; - register struct socketpair_args *uap; - register_t *retval; +socketpair(struct proc *p, struct socketpair_args *uap, __unused register_t *retval) { - register struct filedesc *fdp = p->p_fd; - struct file *fp1, *fp2; + struct fileproc *fp1, *fp2; struct socket *so1, *so2; int fd, error, sv[2]; @@ -490,57 +525,59 @@ socketpair(p, uap, retval) error = socreate(uap->domain, &so2, uap->type, uap->protocol); if (error) goto free1; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + error = falloc(p, &fp1, &fd); - if (error) + if (error) { goto free2; - sv[0] = fd; + } fp1->f_flag = FREAD|FWRITE; fp1->f_type = DTYPE_SOCKET; fp1->f_ops = &socketops; fp1->f_data = (caddr_t)so1; + sv[0] = fd; + error = falloc(p, &fp2, &fd); - if (error) + if (error) { goto free3; + } fp2->f_flag = FREAD|FWRITE; fp2->f_type = DTYPE_SOCKET; fp2->f_ops = &socketops; fp2->f_data = (caddr_t)so2; sv[1] = fd; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + error = soconnect2(so1, so2); if (error) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto free4; } - if (uap->type == SOCK_DGRAM) { /* * Datagram socket connection is asymmetric. */ error = soconnect2(so2, so1); if (error) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto free4; } } + + proc_fdlock(p); *fdflags(p, sv[0]) &= ~UF_RESERVED; *fdflags(p, sv[1]) &= ~UF_RESERVED; - error = copyout((caddr_t)sv, (caddr_t)uap->rsv, - 2 * sizeof (int)); + fp_drop(p, sv[0], fp1, 1); + fp_drop(p, sv[1], fp2, 1); + proc_fdunlock(p); + + error = copyout((caddr_t)sv, uap->rsv, 2 * sizeof(int)); #if 0 /* old pipe(2) syscall compatability, unused these days */ retval[0] = sv[0]; /* XXX ??? */ retval[1] = sv[1]; /* XXX ??? */ #endif /* 0 */ return (error); free4: - fdrelse(p, sv[1]); - ffree(fp2); + fp_free(p, sv[1], fp2); free3: - fdrelse(p, sv[0]); - ffree(fp1); + fp_free(p, sv[0], fp1); free2: - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); (void)soclose(so2); free1: (void)soclose(so1); @@ -548,68 +585,41 @@ free1: } static int -sendit(p, s, mp, flags, retsize) - register struct proc *p; - int s; - register struct msghdr *mp; - int flags; - register_t *retsize; +sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, + int flags, register_t *retval) { - struct file *fp; - struct uio auio; - register struct iovec *iov; - register int i; struct mbuf *control; struct sockaddr *to; - int len, error; + int error; struct socket *so; + user_ssize_t len; #if KTRACE - struct iovec *ktriov = NULL; - struct uio ktruio; + uio_t ktruio = NULL; #endif KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0,0,0,0,0); - if (error = getsock(p->p_fd, s, &fp)) + error = file_socket(s, &so); + if (error ) { KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0); return (error); } - - auio.uio_iov = mp->msg_iov; - auio.uio_iovcnt = mp->msg_iovlen; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_procp = p; - auio.uio_offset = 0; /* XXX */ - auio.uio_resid = 0; - iov = mp->msg_iov; - for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if (iov->iov_len < 0) - { - KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0); - return (EINVAL); - } - - if ((auio.uio_resid += iov->iov_len) < 0) - { - KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, EINVAL,0,0,0,0); - return (EINVAL); - } - } + if (mp->msg_name) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); if (error) { KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0); - return (error); + goto out; } AUDIT_ARG(sockaddr, p, to); - } else + } else { to = 0; + } if (mp->msg_control) { - if (mp->msg_controllen < sizeof(struct cmsghdr) -#ifdef COMPAT_OLDSOCK - && mp->msg_flags != MSG_COMPAT + if (mp->msg_controllen < ((socklen_t)sizeof(struct cmsghdr)) +#if COMPAT_43_SOCKET + && !(mp->msg_flags & MSG_COMPAT) #endif ) { error = EINVAL; @@ -619,8 +629,8 @@ sendit(p, s, mp, flags, retsize) mp->msg_controllen, MT_CONTROL); if (error) goto bad; -#ifdef COMPAT_OLDSOCK - if (mp->msg_flags == MSG_COMPAT) { +#if COMPAT_43_SOCKET + if (mp->msg_flags & MSG_COMPAT) { register struct cmsghdr *cm; M_PREPEND(control, sizeof(*cm), M_WAIT); @@ -635,27 +645,24 @@ sendit(p, s, mp, flags, retsize) } } #endif - } else + } else { control = 0; + } #if KTRACE - if (KTRPOINT(p, KTR_GENIO)) { - int iovlen = auio.uio_iovcnt * sizeof (struct iovec); - - MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); - bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); - ktruio = auio; - } + if (KTRPOINT(p, KTR_GENIO)) { + ktruio = uio_duplicate(uiop); + } #endif - len = auio.uio_resid; - so = (struct socket *)fp->f_data; + + len = uio_resid(uiop); if (so == NULL) error = EBADF; else - error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, + error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control, flags); if (error) { - if (auio.uio_resid != len && (error == ERESTART || + if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; /* Generation of SIGPIPE can be controlled per socket */ @@ -663,278 +670,287 @@ sendit(p, s, mp, flags, retsize) psignal(p, SIGPIPE); } if (error == 0) - *retsize = len - auio.uio_resid; + *retval = (int)(len - uio_resid(uiop)); +bad: #if KTRACE - if (ktriov != NULL) { + if (ktruio != NULL) { if (error == 0) { - ktruio.uio_iov = ktriov; - ktruio.uio_resid = retsize[0]; - ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1); + uio_setresid(ktruio, retval[0]); + ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error); } - FREE(ktriov, M_TEMP); + uio_free(ktruio); } #endif -bad: if (to) FREE(to, M_SONAME); KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error,0,0,0,0); +out: + file_drop(s); return (error); } -struct sendto_args { - int s; - caddr_t buf; - size_t len; - int flags; - caddr_t to; - int tolen; -}; - int -sendto(p, uap, retval) - struct proc *p; - register struct sendto_args /* { - int s; - caddr_t buf; - size_t len; - int flags; - caddr_t to; - int tolen; - } */ *uap; - register_t *retval; - +sendto(struct proc *p, struct sendto_args *uap, register_t *retval) { - struct msghdr msg; - struct iovec aiov; - int stat; + struct user_msghdr msg; + int error; + uio_t auio = NULL; KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0,0,0,0,0); AUDIT_ARG(fd, uap->s); + auio = uio_create(1, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + if (auio == NULL) { + return (ENOMEM); + } + uio_addiov(auio, uap->buf, uap->len); + msg.msg_name = uap->to; msg.msg_namelen = uap->tolen; - msg.msg_iov = &aiov; - msg.msg_iovlen = 1; + /* no need to set up msg_iov. sendit uses uio_t we send it */ + msg.msg_iov = 0; + msg.msg_iovlen = 0; msg.msg_control = 0; -#ifdef COMPAT_OLDSOCK msg.msg_flags = 0; -#endif - aiov.iov_base = uap->buf; - aiov.iov_len = uap->len; - stat = sendit(p, uap->s, &msg, uap->flags, retval); - KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, stat, *retval,0,0,0); - return(stat); -} -#ifdef COMPAT_OLDSOCK -struct osend_args { - int s; - caddr_t buf; - int len; - int flags; -}; + error = sendit(p, uap->s, &msg, auio, uap->flags, retval); + + if (auio != NULL) { + uio_free(auio); + } + +#if HACK_FOR_4056224 + /* + * Radar 4056224 + * Temporary workaround to let send() and recv() work over a pipe for binary compatibility + * This will be removed in the release following Tiger + */ + if (error == ENOTSOCK) { + struct fileproc *fp; + + if (fp_lookup(p, uap->s, &fp, 0) == 0) { + (void) fp_drop(p, uap->s, fp,0); + + if (fp->f_type == DTYPE_PIPE) { + struct write_args write_uap; + user_ssize_t write_retval; + + if (p->p_pid > last_pid_4056224) { + last_pid_4056224 = p->p_pid; + + printf("%s[%d] uses send/recv on a pipe\n", + p->p_comm, p->p_pid); + } + + bzero(&write_uap, sizeof(struct write_args)); + write_uap.fd = uap->s; + write_uap.cbuf = uap->buf; + write_uap.nbyte = uap->len; + + error = write(p, &write_uap, &write_retval); + *retval = (int)write_retval; + } + } + } +#endif /* HACK_FOR_4056224 */ -int -osend(p, uap, retval) - struct proc *p; - register struct osend_args /* { - int s; - caddr_t buf; - int len; - int flags; - } */ *uap; - register_t *retval; + KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval,0,0,0); + + return(error); +} +#if COMPAT_43_SOCKET +int +osend(__unused struct proc *p, + __unused struct osend_args *uap, + __unused register_t *retval) { - struct msghdr msg; - struct iovec aiov; - - msg.msg_name = 0; - msg.msg_namelen = 0; - msg.msg_iov = &aiov; - msg.msg_iovlen = 1; - aiov.iov_base = uap->buf; - aiov.iov_len = uap->len; - msg.msg_control = 0; - msg.msg_flags = 0; - return (sendit(p, uap->s, &msg, uap->flags, retval)); + /* these are no longer supported and in fact + * there is no way to call it directly. + * LP64todo - remove this once we're sure there are no clients + */ + return (ENOTSUP); } -struct osendmsg_args { - int s; - caddr_t msg; - int flags; -}; int -osendmsg(p, uap, retval) - struct proc *p; - register struct osendmsg_args /* { - int s; - caddr_t msg; - int flags; - } */ *uap; - register_t *retval; - +osendmsg(__unused struct proc *p, + __unused struct osendmsg_args *uap, + __unused register_t *retval) { - struct msghdr msg; - struct iovec aiov[UIO_SMALLIOV], *iov; - int error; - - error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr)); - if (error) - return (error); - if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { - if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) - return (EMSGSIZE); - MALLOC(iov, struct iovec *, - sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, - M_WAITOK); - } else - iov = aiov; - error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, - (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); - if (error) - goto done; - msg.msg_flags = MSG_COMPAT; - msg.msg_iov = iov; - error = sendit(p, uap->s, &msg, uap->flags, retval); -done: - if (iov != aiov) - FREE(iov, M_IOV); - return (error); + /* these are no longer supported and in fact + * there is no way to call it directly. + * LP64todo - remove this once we're sure there are no clients + */ + return (ENOTSUP); } #endif -struct sendmsg_args { - int s; - caddr_t msg; - int flags; -}; int -sendmsg(p, uap, retval) - struct proc *p; - register struct sendmsg_args *uap; - register_t *retval; +sendmsg(struct proc *p, register struct sendmsg_args *uap, register_t *retval) { struct msghdr msg; - struct iovec aiov[UIO_SMALLIOV], *iov; + struct user_msghdr user_msg; + caddr_t msghdrp; + int size_of_msghdr; int error; + int size_of_iovec; + uio_t auio = NULL; + struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0,0,0,0,0); AUDIT_ARG(fd, uap->s); - if (error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg))) + if (IS_64BIT_PROCESS(p)) { + msghdrp = (caddr_t) &user_msg; + size_of_msghdr = sizeof(user_msg); + size_of_iovec = sizeof(struct user_iovec); + } + else { + msghdrp = (caddr_t) &msg; + size_of_msghdr = sizeof(msg); + size_of_iovec = sizeof(struct iovec); + } + error = copyin(uap->msg, msghdrp, size_of_msghdr); + if (error) { KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0); return (error); } - - if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { - if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { - KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0); - return (EMSGSIZE); - } - MALLOC(iov, struct iovec *, - sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, - M_WAITOK); - } else - iov = aiov; - if (msg.msg_iovlen && - (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, - (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) - goto done; - msg.msg_iov = iov; -#ifdef COMPAT_OLDSOCK - msg.msg_flags = 0; + + /* only need to copy if user process is not 64-bit */ + if (!IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg.msg_flags; + user_msg.msg_controllen = msg.msg_controllen; + user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); + user_msg.msg_iovlen = msg.msg_iovlen; + user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); + user_msg.msg_namelen = msg.msg_namelen; + user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + } + + if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { + KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0); + return (EMSGSIZE); + } + + /* allocate a uio large enough to hold the number of iovecs passed */ + auio = uio_create(user_msg.msg_iovlen, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_WRITE); + if (auio == NULL) { + error = ENOBUFS; + goto done; + } + + if (user_msg.msg_iovlen) { + /* get location of iovecs within the uio. then copyin the iovecs from + * user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOBUFS; + goto done; + } + error = copyin(user_msg.msg_iov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec)); + if (error) + goto done; + user_msg.msg_iov = CAST_USER_ADDR_T(iovp); + + /* finish setup of uio_t */ + uio_calculateresid(auio); + } + else { + user_msg.msg_iov = 0; + } + +#if COMPAT_43_SOCKET + user_msg.msg_flags = 0; #endif - error = sendit(p, uap->s, &msg, uap->flags, retval); + error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval); done: - if (iov != aiov) - FREE(iov, M_IOV); + if (auio != NULL) { + uio_free(auio); + } KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error,0,0,0,0); + return (error); } static int -recvit(p, s, mp, namelenp, retval) +recvit(p, s, mp, uiop, namelenp, retval) register struct proc *p; int s; - register struct msghdr *mp; - caddr_t namelenp; + register struct user_msghdr *mp; + uio_t uiop; + user_addr_t namelenp; register_t *retval; { - struct file *fp; - struct uio auio; - register struct iovec *iov; - register int i; int len, error; struct mbuf *m, *control = 0; - caddr_t ctlbuf; + user_addr_t ctlbuf; struct socket *so; struct sockaddr *fromsa = 0; + struct fileproc *fp; #if KTRACE - struct iovec *ktriov = NULL; - struct uio ktruio; + uio_t ktruio = NULL; #endif KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0,0,0,0,0); - if (error = getsock(p->p_fd, s, &fp)) - { + proc_fdlock(p); + if ( (error = fp_lookup(p, s, &fp, 1)) ) { KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0); + proc_fdunlock(p); return (error); } + if (fp->f_type != DTYPE_SOCKET) { + fp_drop(p, s, fp,1); + proc_fdunlock(p); + return(ENOTSOCK); + } - auio.uio_iov = mp->msg_iov; - auio.uio_iovcnt = mp->msg_iovlen; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_rw = UIO_READ; - auio.uio_procp = p; - auio.uio_offset = 0; /* XXX */ - auio.uio_resid = 0; - iov = mp->msg_iov; - for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if ((auio.uio_resid += iov->iov_len) < 0) { - KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0); - return (EINVAL); - } + so = (struct socket *)fp->f_data; + + proc_fdunlock(p); + if (uio_resid(uiop) < 0) { + KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL,0,0,0,0); + error = EINVAL; + goto out1; } #if KTRACE if (KTRPOINT(p, KTR_GENIO)) { - int iovlen = auio.uio_iovcnt * sizeof (struct iovec); - - MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); - bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); - ktruio = auio; + ktruio = uio_duplicate(uiop); } #endif - len = auio.uio_resid; - so = (struct socket *)fp->f_data; + + len = uio_resid(uiop); if (so == NULL) error = EBADF; - else - error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, + else { + error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop, (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, &mp->msg_flags); + } AUDIT_ARG(sockaddr, p, fromsa); if (error) { - if (auio.uio_resid != len && (error == ERESTART || + if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } #if KTRACE - if (ktriov != NULL) { + if (ktruio != NULL) { if (error == 0) { - ktruio.uio_iov = ktriov; - ktruio.uio_resid = len - auio.uio_resid; - ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error, -1); + uio_setresid(ktruio, len - uio_resid(uiop)); + ktrgenio(p->p_tracep, s, UIO_WRITE, ktruio, error); } - FREE(ktriov, M_TEMP); + uio_free(ktruio); } #endif if (error) goto out; - *retval = len - auio.uio_resid; + *retval = len - uio_resid(uiop); if (mp->msg_name) { len = mp->msg_namelen; if (len <= 0 || fromsa == 0) @@ -945,20 +961,19 @@ recvit(p, s, mp, namelenp, retval) #endif /* save sa_len before it is destroyed by MSG_COMPAT */ len = MIN(len, fromsa->sa_len); -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (mp->msg_flags & MSG_COMPAT) ((struct osockaddr *)fromsa)->sa_family = fromsa->sa_family; #endif - error = copyout(fromsa, - (caddr_t)mp->msg_name, (unsigned)len); + error = copyout(fromsa, mp->msg_name, (unsigned)len); if (error) goto out; } mp->msg_namelen = len; if (namelenp && (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) { -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (mp->msg_flags & MSG_COMPAT) error = 0; /* old recvfrom didn't check */ else @@ -967,7 +982,7 @@ recvit(p, s, mp, namelenp, retval) } } if (mp->msg_control) { -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET /* * We assume that old recvmsg calls won't receive access * rights and other control info, esp. as control info @@ -990,7 +1005,7 @@ recvit(p, s, mp, namelenp, retval) len = mp->msg_controllen; m = control; mp->msg_controllen = 0; - ctlbuf = (caddr_t) mp->msg_control; + ctlbuf = mp->msg_control; while (m && len > 0) { unsigned int tocopy; @@ -1002,8 +1017,8 @@ recvit(p, s, mp, namelenp, retval) tocopy = len; } - if (error = copyout((caddr_t)mtod(m, caddr_t), - ctlbuf, tocopy)) + error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, tocopy); + if (error) goto out; ctlbuf += tocopy; @@ -1018,19 +1033,12 @@ out: if (control) m_freem(control); KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error,0,0,0,0); +out1: + fp_drop(p, s, fp, 0); return (error); } -struct recvfrom_args { - int s; - caddr_t buf; - size_t len; - int flags; - caddr_t from; - int *fromlenaddr; -}; - int recvfrom(p, uap, retval) struct proc *p; @@ -1044,37 +1052,83 @@ recvfrom(p, uap, retval) } */ *uap; register_t *retval; { - struct msghdr msg; - struct iovec aiov; + struct user_msghdr msg; int error; + uio_t auio = NULL; KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0,0,0,0,0); AUDIT_ARG(fd, uap->s); if (uap->fromlenaddr) { - error = copyin((caddr_t)uap->fromlenaddr, + error = copyin(uap->fromlenaddr, (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); if (error) return (error); } else msg.msg_namelen = 0; msg.msg_name = uap->from; - msg.msg_iov = &aiov; - msg.msg_iovlen = 1; - aiov.iov_base = uap->buf; - aiov.iov_len = uap->len; + auio = uio_create(1, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_READ); + if (auio == NULL) { + return (ENOMEM); + } + + uio_addiov(auio, uap->buf, uap->len); + /* no need to set up msg_iov. recvit uses uio_t we send it */ + msg.msg_iov = 0; + msg.msg_iovlen = 0; msg.msg_control = 0; + msg.msg_controllen = 0; msg.msg_flags = uap->flags; + error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval); + if (auio != NULL) { + uio_free(auio); + } + +#if HACK_FOR_4056224 + /* + * Radar 4056224 + * Temporary workaround to let send() and recv() work over a pipe for binary compatibility + * This will be removed in the release following Tiger + */ + if (error == ENOTSOCK && proc_is64bit(p) == 0) { + struct fileproc *fp; + + if (fp_lookup(p, uap->s, &fp, 0) == 0) { + (void) fp_drop(p, uap->s, fp,0); + + if (fp->f_type == DTYPE_PIPE) { + struct read_args read_uap; + user_ssize_t read_retval; + + if (p->p_pid > last_pid_4056224) { + last_pid_4056224 = p->p_pid; + + printf("%s[%d] uses send/recv on a pipe\n", + p->p_comm, p->p_pid); + } + + bzero(&read_uap, sizeof(struct read_args)); + read_uap.fd = uap->s; + read_uap.cbuf = uap->buf; + read_uap.nbyte = uap->len; + + error = read(p, &read_uap, &read_retval); + *retval = (int)read_retval; + } + } + } +#endif /* HACK_FOR_4056224 */ + KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error,0,0,0,0); - return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr, retval)); + + return (error); } -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET int -orecvfrom(p, uap, retval) - struct proc *p; - struct recvfrom_args *uap; - register_t *retval; +orecvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval) { uap->flags |= MSG_COMPAT; @@ -1083,32 +1137,17 @@ orecvfrom(p, uap, retval) #endif -#ifdef COMPAT_OLDSOCK -struct orecv_args { - int s; - caddr_t buf; - int len; - int flags; -}; - +#if COMPAT_43_SOCKET int -orecv(p, uap, retval) - struct proc *p; - struct orecv_args *uap; - register_t *retval; +orecv(__unused struct proc *p, __unused struct orecv_args *uap, + __unused register_t *retval) { - struct msghdr msg; - struct iovec aiov; - - msg.msg_name = 0; - msg.msg_namelen = 0; - msg.msg_iov = &aiov; - msg.msg_iovlen = 1; - aiov.iov_base = uap->buf; - aiov.iov_len = uap->len; - msg.msg_control = 0; - msg.msg_flags = uap->flags; - return (recvit(p, uap->s, &msg, (caddr_t)0, retval)); + /* these are no longer supported and in fact + * there is no way to call it directly. + * LP64todo - remove this once we're sure there are no clients + */ + + return (ENOTSUP); } /* @@ -1116,58 +1155,20 @@ orecv(p, uap, retval) * overlays the new one, missing only the flags, and with the (old) access * rights where the control fields are now. */ -struct orecvmsg_args { - int s; - struct omsghdr *msg; - int flags; -}; - int -orecvmsg(p, uap, retval) - struct proc *p; - struct orecvmsg_args *uap; - register_t *retval; +orecvmsg(__unused struct proc *p, __unused struct orecvmsg_args *uap, + __unused register_t *retval) { - struct msghdr msg; - struct iovec aiov[UIO_SMALLIOV], *iov; - int error; + /* these are no longer supported and in fact + * there is no way to call it directly. + * LP64todo - remove this once we're sure there are no clients + */ - error = copyin((caddr_t)uap->msg, (caddr_t)&msg, - sizeof (struct omsghdr)); - if (error) - return (error); - if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { - if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) - return (EMSGSIZE); - MALLOC(iov, struct iovec *, - sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, - M_WAITOK); - } else - iov = aiov; - msg.msg_flags = uap->flags | MSG_COMPAT; - error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov, - (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); - if (error) - goto done; - msg.msg_iov = iov; - error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen, retval); + return (ENOTSUP); - if (msg.msg_controllen && error == 0) - error = copyout((caddr_t)&msg.msg_controllen, - (caddr_t)&uap->msg->msg_accrightslen, sizeof (int)); -done: - if (iov != aiov) - FREE(iov, M_IOV); - return (error); } #endif -struct recvmsg_args { - int s; - struct msghdr *msg; - int flags; -}; - int recvmsg(p, uap, retval) struct proc *p; @@ -1175,73 +1176,125 @@ recvmsg(p, uap, retval) register_t *retval; { struct msghdr msg; - struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; + struct user_msghdr user_msg; + caddr_t msghdrp; + int size_of_msghdr; + user_addr_t uiov; register int error; + int size_of_iovec; + uio_t auio = NULL; + struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0,0,0,0,0); AUDIT_ARG(fd, uap->s); - if (error = copyin((caddr_t)uap->msg, (caddr_t)&msg, - sizeof (msg))) + if (IS_64BIT_PROCESS(p)) { + msghdrp = (caddr_t) &user_msg; + size_of_msghdr = sizeof(user_msg); + size_of_iovec = sizeof(struct user_iovec); + } + else { + msghdrp = (caddr_t) &msg; + size_of_msghdr = sizeof(msg); + size_of_iovec = sizeof(struct iovec); + } + error = copyin(uap->msg, msghdrp, size_of_msghdr); + if (error) { KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0); return (error); } - if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { - if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { - KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0); - return (EMSGSIZE); - } - MALLOC(iov, struct iovec *, - sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, - M_WAITOK); - } else - iov = aiov; -#ifdef COMPAT_OLDSOCK - msg.msg_flags = uap->flags &~ MSG_COMPAT; + /* only need to copy if user process is not 64-bit */ + if (!IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg.msg_flags; + user_msg.msg_controllen = msg.msg_controllen; + user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); + user_msg.msg_iovlen = msg.msg_iovlen; + user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); + user_msg.msg_namelen = msg.msg_namelen; + user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + } + + if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { + KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE,0,0,0,0); + return (EMSGSIZE); + } + +#if COMPAT_43_SOCKET + user_msg.msg_flags = uap->flags &~ MSG_COMPAT; #else - msg.msg_flags = uap->flags; + user_msg.msg_flags = uap->flags; #endif - uiov = msg.msg_iov; - msg.msg_iov = iov; - error = copyin((caddr_t)uiov, (caddr_t)iov, - (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); + + /* allocate a uio large enough to hold the number of iovecs passed */ + auio = uio_create(user_msg.msg_iovlen, 0, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + UIO_READ); + if (auio == NULL) { + error = ENOMEM; + goto done; + } + + /* get location of iovecs within the uio. then copyin the iovecs from + * user space. + */ + iovp = uio_iovsaddr(auio); + if (iovp == NULL) { + error = ENOMEM; + goto done; + } + uiov = user_msg.msg_iov; + user_msg.msg_iov = CAST_USER_ADDR_T(iovp); + error = copyin(uiov, (caddr_t)iovp, (user_msg.msg_iovlen * size_of_iovec)); if (error) goto done; - error = recvit(p, uap->s, &msg, (caddr_t)0, retval); + + /* finish setup of uio_t */ + uio_calculateresid(auio); + + error = recvit(p, uap->s, &user_msg, auio, 0, retval); if (!error) { - msg.msg_iov = uiov; - error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg)); + user_msg.msg_iov = uiov; + /* only need to copy if user process is not 64-bit */ + if (!IS_64BIT_PROCESS(p)) { + // LP64todo - do all these change? if not, then no need to copy all of them! + msg.msg_flags = user_msg.msg_flags; + msg.msg_controllen = user_msg.msg_controllen; + msg.msg_control = CAST_DOWN(caddr_t, user_msg.msg_control); + msg.msg_iovlen = user_msg.msg_iovlen; + msg.msg_iov = (struct iovec *) CAST_DOWN(caddr_t, user_msg.msg_iov); + msg.msg_namelen = user_msg.msg_namelen; + msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name); + } + error = copyout(msghdrp, uap->msg, size_of_msghdr); } done: - if (iov != aiov) - FREE(iov, M_IOV); + if (auio != NULL) { + uio_free(auio); + } KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error,0,0,0,0); return (error); } /* ARGSUSED */ -struct shutdown_args { - int s; - int how; -}; - int -shutdown(p, uap, retval) - struct proc *p; - struct shutdown_args *uap; - register_t *retval; +shutdown(__unused struct proc *p, struct shutdown_args *uap, __unused register_t *retval) { - struct file *fp; + struct socket * so; int error; AUDIT_ARG(fd, uap->s); - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) return (error); - if (fp->f_data == NULL) - return (EBADF); - return (soshutdown((struct socket *)fp->f_data, uap->how)); + if (so == NULL) { + error = EBADF; + goto out; + } + error = soshutdown((struct socket *)so, uap->how); +out: + file_drop(uap->s); + return(error); } @@ -1249,21 +1302,10 @@ shutdown(p, uap, retval) /* ARGSUSED */ -struct setsockopt_args { - int s; - int level; - int name; - caddr_t val; - socklen_t valsize; -}; - int -setsockopt(p, uap, retval) - struct proc *p; - struct setsockopt_args *uap; - register_t *retval; +setsockopt(struct proc *p, struct setsockopt_args *uap, __unused register_t *retval) { - struct file *fp; + struct socket * so; struct sockopt sopt; int error; @@ -1273,7 +1315,7 @@ setsockopt(p, uap, retval) if (uap->valsize < 0) return (EINVAL); - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) return (error); @@ -1284,33 +1326,37 @@ setsockopt(p, uap, retval) sopt.sopt_valsize = uap->valsize; sopt.sopt_p = p; - if (fp->f_data == NULL) - return (EBADF); - return (sosetopt((struct socket *)fp->f_data, &sopt)); + if (so == NULL) { + error = EINVAL; + goto out; + } + error = sosetopt(so, &sopt); +out: + file_drop(uap->s); + return(error); } int -getsockopt(p, uap, retval) - struct proc *p; - struct getsockopt_args *uap; - register_t *retval; +getsockopt(struct proc *p, struct getsockopt_args *uap, __unused register_t *retval) { - int valsize, error; - struct file *fp; - struct sockopt sopt; + int error; + socklen_t valsize; + struct sockopt sopt; + struct socket * so; - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) return (error); if (uap->val) { - error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, - sizeof (valsize)); + error = copyin(uap->avalsize, (caddr_t)&valsize, sizeof (valsize)); if (error) - return (error); - if (valsize < 0) - return (EINVAL); + goto out; + if (valsize < 0) { + error = EINVAL; + goto out; + } } else valsize = 0; @@ -1321,79 +1367,17 @@ getsockopt(p, uap, retval) sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ sopt.sopt_p = p; - if (fp->f_data == NULL) - return (EBADF); - error = sogetopt((struct socket *)fp->f_data, &sopt); + if (so == NULL) { + error = EBADF; + goto out; + } + error = sogetopt((struct socket *)so, &sopt); if (error == 0) { valsize = sopt.sopt_valsize; - error = copyout((caddr_t)&valsize, - (caddr_t)uap->avalsize, sizeof (valsize)); - } - return (error); -} - - - -struct pipe_args { - int dummy; -}; -/* ARGSUSED */ -int -pipe(p, uap, retval) - struct proc *p; - struct pipe_args *uap; - register_t *retval; -{ - struct file *rf, *wf; - struct socket *rso, *wso; - int fd, error; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (error); - } - if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) { - goto free1; + error = copyout((caddr_t)&valsize, uap->avalsize, sizeof (valsize)); } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - error = falloc(p, &rf, &fd); - if (error) - goto free2; - retval[0] = fd; - rf->f_flag = FREAD; - rf->f_type = DTYPE_SOCKET; - rf->f_ops = &socketops; - rf->f_data = (caddr_t)rso; - if (error = falloc(p, &wf, &fd)) - goto free3; - wf->f_flag = FWRITE; - wf->f_type = DTYPE_SOCKET; - wf->f_ops = &socketops; - wf->f_data = (caddr_t)wso; - retval[1] = fd; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error = unp_connect2(wso, rso); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (error) - goto free4; - *fdflags(p, retval[0]) &= ~UF_RESERVED; - *fdflags(p, retval[1]) &= ~UF_RESERVED; - return (0); -free4: - fdrelse(p, retval[1]); - ffree(wf); -free3: - fdrelse(p, retval[0]); - ffree(rf); -free2: - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - (void)soclose(wso); -free1: - (void)soclose(rso); - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); +out: + file_drop(uap->s); return (error); } @@ -1403,29 +1387,53 @@ free1: */ /* ARGSUSED */ static int -getsockname1(p, uap, retval, compat) - struct proc *p; - register struct getsockname_args *uap; - register_t *retval; - int compat; +getsockname1(__unused struct proc *p, struct getsockname_args *uap, __unused register_t *retval, + int compat) { - struct file *fp; - register struct socket *so; + struct socket *so; struct sockaddr *sa; - u_int len; + socklen_t len; int error; - error = getsock(p->p_fd, uap->fdes, &fp); + error = file_socket(uap->fdes, &so); if (error) return (error); - error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); + error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t)); if (error) - return (error); - so = (struct socket *)fp->f_data; - if (so == NULL) - return (EBADF); + goto out; + if (so == NULL) { + error = EBADF; + goto out; + } sa = 0; + socket_lock(so, 1); error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); + if (error == 0) + { + struct socket_filter_entry *filter; + int filtered = 0; + for (filter = so->so_filt; filter && error == 0; + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_getsockname) { + if (!filtered) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_getsockname(filter->sfe_cookie, + so, &sa); + } + } + + if (error == EJUSTRETURN) + error = 0; + + if (filtered) { + socket_lock(so, 0); + sflt_unuse(so); + } + } + socket_unlock(so, 1); if (error) goto bad; if (sa == 0) { @@ -1434,73 +1442,97 @@ getsockname1(p, uap, retval, compat) } len = MIN(len, sa->sa_len); -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (compat) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif - error = copyout(sa, (caddr_t)uap->asa, (u_int)len); + error = copyout((caddr_t)sa, uap->asa, len); if (error == 0) gotnothing: - error = copyout((caddr_t)&len, (caddr_t)uap->alen, - sizeof (len)); + error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t)); bad: if (sa) FREE(sa, M_SONAME); +out: + file_drop(uap->fdes); return (error); } int -getsockname(p, uap, retval) - struct proc *p; - struct getsockname_args *uap; - register_t *retval; +getsockname(struct proc *p, struct getsockname_args *uap, register_t *retval) { - return (getsockname1(p, uap, retval, 0)); } -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET int -ogetsockname(p, uap, retval) - struct proc *p; - struct getsockname_args *uap; - register_t *retval; +ogetsockname(struct proc *p, struct getsockname_args *uap, register_t *retval) { - return (getsockname1(p, uap, retval, 1)); } -#endif /* COMPAT_OLDSOCK */ +#endif /* COMPAT_43_SOCKET */ /* * Get name of peer for connected socket. */ /* ARGSUSED */ int -getpeername1(p, uap, retval, compat) - struct proc *p; - register struct getpeername_args *uap; - register_t *retval; - int compat; +getpeername1(__unused struct proc *p, struct getpeername_args *uap, __unused register_t *retval, + int compat) { - struct file *fp; - register struct socket *so; + struct socket *so; struct sockaddr *sa; - u_int len; + socklen_t len; int error; - error = getsock(p->p_fd, uap->fdes, &fp); - if (error) - return (error); - so = (struct socket *)fp->f_data; - if (so == NULL) - return (EBADF); - if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) - return (ENOTCONN); - error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); + error = file_socket(uap->fdes, &so); if (error) return (error); + if (so == NULL) { + error = EBADF; + goto out; + } + + socket_lock(so, 1); + + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { + socket_unlock(so, 1); + error = ENOTCONN; + goto out; + } + error = copyin(uap->alen, (caddr_t)&len, sizeof(socklen_t)); + if (error) { + socket_unlock(so, 1); + goto out; + } sa = 0; error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); + if (error == 0) + { + struct socket_filter_entry *filter; + int filtered = 0; + for (filter = so->so_filt; filter && error == 0; + filter = filter->sfe_next_onsocket) { + if (filter->sfe_filter->sf_filter.sf_getpeername) { + if (!filtered) { + filtered = 1; + sflt_use(so); + socket_unlock(so, 0); + } + error = filter->sfe_filter->sf_filter.sf_getpeername(filter->sfe_cookie, + so, &sa); + } + } + + if (error == EJUSTRETURN) + error = 0; + + if (filtered) { + socket_lock(so, 0); + sflt_unuse(so); + } + } + socket_unlock(so, 1); if (error) goto bad; if (sa == 0) { @@ -1508,48 +1540,43 @@ getpeername1(p, uap, retval, compat) goto gotnothing; } len = MIN(len, sa->sa_len); -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (compat) ((struct osockaddr *)sa)->sa_family = sa->sa_family; #endif - error = copyout(sa, (caddr_t)uap->asa, (u_int)len); + error = copyout(sa, uap->asa, len); if (error) goto bad; gotnothing: - error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); + error = copyout((caddr_t)&len, uap->alen, sizeof(socklen_t)); bad: if (sa) FREE(sa, M_SONAME); +out: + file_drop(uap->fdes); return (error); } int -getpeername(p, uap, retval) - struct proc *p; - struct getpeername_args *uap; - register_t *retval; +getpeername(struct proc *p, struct getpeername_args *uap, register_t *retval) { return (getpeername1(p, uap, retval, 0)); } -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET int -ogetpeername(p, uap, retval) - struct proc *p; - struct ogetpeername_args *uap; - register_t *retval; +ogetpeername(struct proc *p, struct getpeername_args *uap, register_t *retval) { - /* XXX uap should have type `getpeername_args *' to begin with. */ - return (getpeername1(p, (struct getpeername_args *)uap, retval, 1)); + return (getpeername1(p, uap, retval, 1)); } -#endif /* COMPAT_OLDSOCK */ +#endif /* COMPAT_43_SOCKET */ int -sockargs(mp, buf, buflen, type) +sockargs(mp, data, buflen, type) struct mbuf **mp; - caddr_t buf; + user_addr_t data; int buflen, type; { register struct sockaddr *sa; @@ -1557,18 +1584,26 @@ sockargs(mp, buf, buflen, type) int error; if ((u_int)buflen > MLEN) { -#ifdef COMPAT_OLDSOCK +#if COMPAT_43_SOCKET if (type == MT_SONAME && (u_int)buflen <= 112) buflen = MLEN; /* unix domain compat. hack */ else #endif - return (EINVAL); + if ((u_int)buflen > MCLBYTES) + return (EINVAL); } m = m_get(M_WAIT, type); if (m == NULL) return (ENOBUFS); + if ((u_int)buflen > MLEN) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return ENOBUFS; + } + } m->m_len = buflen; - error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); + error = copyin(data, mtod(m, caddr_t), (u_int)buflen); if (error) (void) m_free(m); else { @@ -1576,7 +1611,7 @@ sockargs(mp, buf, buflen, type) if (type == MT_SONAME) { sa = mtod(m, struct sockaddr *); -#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN +#if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX) sa->sa_family = sa->sa_len; #endif @@ -1586,11 +1621,11 @@ sockargs(mp, buf, buflen, type) return (error); } +/* + * Given a user_addr_t of length len, allocate and fill out a *sa. + */ int -getsockaddr(namp, uaddr, len) - struct sockaddr **namp; - caddr_t uaddr; - size_t len; +getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len) { struct sockaddr *sa; int error; @@ -1602,11 +1637,14 @@ getsockaddr(namp, uaddr, len) return EINVAL; MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); - error = copyin(uaddr, sa, len); + if (sa == NULL) { + return ENOMEM; + } + error = copyin(uaddr, (caddr_t)sa, len); if (error) { FREE(sa, M_SONAME); } else { -#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN +#if COMPAT_43_SOCKET && BYTE_ORDER != BIG_ENDIAN if (sa->sa_family == 0 && sa->sa_len < AF_MAX) sa->sa_family = sa->sa_len; #endif @@ -1616,23 +1654,6 @@ getsockaddr(namp, uaddr, len) return error; } -int -getsock(fdp, fdes, fpp) - struct filedesc *fdp; - int fdes; - struct file **fpp; -{ - register struct file *fp; - - if ((unsigned)fdes >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fdes]) == NULL || - (fdp->fd_ofileflags[fdes] & UF_RESERVED)) - return (EBADF); - if (fp->f_type != DTYPE_SOCKET) - return (ENOTSOCK); - *fpp = fp; - return (0); -} #if SENDFILE /* @@ -1647,9 +1668,11 @@ sf_buf_init(void *arg) int i; SLIST_INIT(&sf_freelist); - sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); - sf_bufs = _MALLOC(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT); - bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf)); + kmem_alloc_pageable(kernel_map, &sf_base, nsfbufs * PAGE_SIZE); + MALLOC(sf_bufs, struct sf_buf *, nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT|M_ZERO); + if (sf_bufs == NULL) + return; /* XXX silently fail leaving sf_bufs NULL */ + for (i = 0; i < nsfbufs; i++) { sf_bufs[i].kva = sf_base + i * PAGE_SIZE; SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list); @@ -1663,15 +1686,12 @@ static struct sf_buf * sf_buf_alloc() { struct sf_buf *sf; - int s; - s = splimp(); while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) { sf_buf_alloc_want = 1; tsleep(&sf_freelist, PVM, "sfbufa", 0); } SLIST_REMOVE_HEAD(&sf_freelist, free_list); - splx(s); sf->refcnt = 1; return (sf); } @@ -1699,7 +1719,6 @@ sf_buf_free(caddr_t addr, u_int size) { struct sf_buf *sf; struct vm_page *m; - int s; sf = dtosf(addr); if (sf->refcnt == 0) @@ -1708,7 +1727,6 @@ sf_buf_free(caddr_t addr, u_int size) if (sf->refcnt == 0) { pmap_qremove((vm_offset_t)addr, 1); m = sf->m; - s = splvm(); vm_page_unwire(m, 0); /* * Check for the object going away on us. This can @@ -1719,7 +1737,6 @@ sf_buf_free(caddr_t addr, u_int size) vm_page_lock_queues(); vm_page_free(m); vm_page_unlock_queues(); - splx(s); sf->m = NULL; SLIST_INSERT_HEAD(&sf_freelist, sf, free_list); if (sf_buf_alloc_want) { @@ -1742,8 +1759,7 @@ sf_buf_free(caddr_t addr, u_int size) int sendfile(struct proc *p, struct sendfile_args *uap) { - struct file *fp; - struct filedesc *fdp = p->p_fd; + struct fileproc *fp; struct vnode *vp; struct vm_object *obj; struct socket *so; @@ -1755,45 +1771,47 @@ sendfile(struct proc *p, struct sendfile_args *uap) off_t off, xfsize, sbytes = 0; int error = 0, s; + if (sf_bufs == NULL) { + /* Fail if initialization failed */ + return ENOSYS; + } + /* * Do argument checking. Must be a regular file in, stream * type and connected socket out, positive offset. */ - if (((u_int)uap->fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL || - (fp->f_flag & FREAD) == 0) { - error = EBADF; - goto done; - } - if (fp->f_type != DTYPE_VNODE) { - error = EINVAL; + if (error = fp_getfvp(p, uap->fd, &fp, &vp)) goto done; + if (fp->f_flag & FREAD) == 0) { + error = EBADF; + goto done1; } - vp = (struct vnode *)fp->f_data; obj = vp->v_object; if (vp->v_type != VREG || obj == NULL) { error = EINVAL; - goto done; + goto done1; } - error = getsock(p->p_fd, uap->s, &fp); + error = file_socket(uap->s, &so); if (error) - goto done; - so = (struct socket *)fp->f_data; + goto done1; if (so == NULL) { error = EBADF; - goto done; + goto done2; } + + socket_lock(so, 1); + if (so->so_type != SOCK_STREAM) { error = EINVAL; - goto done; + goto done3; } if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; - goto done; + goto done3; } if (uap->offset < 0) { error = EINVAL; - goto done; + goto done3; } /* @@ -1801,9 +1819,9 @@ sendfile(struct proc *p, struct sendfile_args *uap) * any headers/trailers. */ if (uap->hdtr != NULL) { - error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); + error = copyin(CAST_USER_ADDR_T(uap->hdtr), &hdtr, sizeof(hdtr)); if (error) - goto done; + goto done3; /* * Send any headers. Wimp out and use writev(2). */ @@ -1813,7 +1831,7 @@ sendfile(struct proc *p, struct sendfile_args *uap) nuap.iovcnt = hdtr.hdr_cnt; error = writev(p, &nuap); if (error) - goto done; + goto done3; sbytes += p->p_retval[0]; } } @@ -1858,8 +1876,8 @@ retry_lookup: error = EPIPE; else error = EAGAIN; - sbunlock(&so->so_snd); - goto done; + sbunlock(&so->so_snd, 0); /* will release lock */ + goto done2; } /* * Attempt to look up the page. If the page doesn't exist or the @@ -1897,20 +1915,17 @@ retry_lookup: /* * Get the page from backing store. */ - bsize = vp->v_mount->mnt_stat.f_iosize; + bsize = vp->v_mount->mnt_vfsstat.f_iosize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = 0; aiov.iov_len = MAXBSIZE; - auio.uio_resid = MAXBSIZE; auio.uio_offset = trunc_page(off); auio.uio_segflg = UIO_NOCOPY; auio.uio_rw = UIO_READ; - auio.uio_procp = p; - vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); + uio_setresid(&auio, MAXBSIZE); error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16), p->p_ucred); - VOP_UNLOCK(vp, 0, p); vm_page_flag_clear(pg, PG_ZERO); vm_page_io_finish(pg); if (error) { @@ -1925,8 +1940,8 @@ retry_lookup: vm_page_lock_queues(); vm_page_free(pg); vm_page_unlock_queues(); - sbunlock(&so->so_snd); - goto done; + sbunlock(&so->so_snd, 0); /* will release socket lock */ + goto done2; } } else { if ((pg->flags & PG_BUSY) || pg->busy) { @@ -1937,10 +1952,8 @@ retry_lookup: */ vm_page_flag_set(pg, PG_WANTED); tsleep(pg, PVM, "sfpbsy", 0); - splx(s); goto retry_lookup; } - splx(s); } /* * Protect from having the page ripped out from beneath us. @@ -1958,6 +1971,11 @@ retry_lookup: * Get an mbuf header and set it up as having external storage. */ MGETHDR(m, M_WAIT, MT_DATA); + if (m == NULL) { + error = ENOBUFS; + sbunlock(&so->so_snd, 0); /* will release socket lock */ + goto done2; + } m->m_ext.ext_free = sf_buf_free; m->m_ext.ext_ref = sf_buf_ref; m->m_ext.ext_buf = (void *)sf->kva; @@ -1968,7 +1986,6 @@ retry_lookup: /* * Add the buffer to the socket buffer chain. */ - s = splnet(); retry_space: /* * Make sure that the socket is still able to take more data. @@ -1989,9 +2006,8 @@ retry_space: so->so_error = 0; } m_freem(m); - sbunlock(&so->so_snd); - splx(s); - goto done; + sbunlock(&so->so_snd, 0); /* will release socket lock */ + goto done2; } /* * Wait for socket space to become available. We do this just @@ -2001,10 +2017,9 @@ retry_space: if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { if (so->so_state & SS_NBIO) { m_freem(m); - sbunlock(&so->so_snd); - splx(s); + sbunlock(&so->so_snd, 0); /* will release socket lock */ error = EAGAIN; - goto done; + goto done2; } error = sbwait(&so->so_snd); /* @@ -2014,20 +2029,19 @@ retry_space: */ if (error) { m_freem(m); - sbunlock(&so->so_snd); - splx(s); - goto done; + sbunlock(&so->so_snd, 0); + goto done2; } goto retry_space; } error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p); splx(s); if (error) { - sbunlock(&so->so_snd); - goto done; + sbunlock(&so->so_snd, 0); /* will release socket lock */ + goto done2; } } - sbunlock(&so->so_snd); + sbunlock(&so->so_snd, 0); /* will release socket lock */ /* * Send trailers. Wimp out and use writev(2). @@ -2038,15 +2052,22 @@ retry_space: nuap.iovcnt = hdtr.trl_cnt; error = writev(p, &nuap); if (error) - goto done; + goto done2; sbytes += p->p_retval[0]; } - +done2: + file_drop(uap->s); +done1: + file_drop(uap->fd); done: if (uap->sbytes != NULL) { - copyout(&sbytes, uap->sbytes, sizeof(off_t)); + /* XXX this appears bogus for some early failure conditions */ + copyout(&sbytes, CAST_USER_ADDR_T(uap->sbytes), sizeof(off_t)); } return (error); +done3: + socket_unlock(so, 1); + goto done2; } #endif diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index dda829231..13f275e7e 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,12 +60,13 @@ #include #include #include /* XXX must be before */ -#include +#include #include #include #include #include -#include +#include +#include #include #include #include @@ -73,14 +74,23 @@ #include #include #include -#include +#include +#include #include +#include +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data struct zone *unp_zone; static unp_gen_t unp_gencnt; static u_int unp_count; +static lck_mtx_t *unp_mutex; +extern lck_mtx_t * uipc_lock; static struct unp_head unp_shead, unp_dhead; /* @@ -92,22 +102,24 @@ static struct unp_head unp_shead, unp_dhead; * need a proper out-of-band * lock pushdown */ -static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; +static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL, { 0 } }; static ino_t unp_ino; /* prototype for fake inode numbers */ -static int unp_attach __P((struct socket *)); -static void unp_detach __P((struct unpcb *)); -static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); -static int unp_connect __P((struct socket *,struct sockaddr *, - struct proc *)); -static void unp_disconnect __P((struct unpcb *)); -static void unp_shutdown __P((struct unpcb *)); -static void unp_drop __P((struct unpcb *, int)); -static void unp_gc __P((void)); -static void unp_scan __P((struct mbuf *, void (*)(struct file *))); -static void unp_mark __P((struct file *)); -static void unp_discard __P((struct file *)); -static int unp_internalize __P((struct mbuf *, struct proc *)); +static int unp_attach(struct socket *); +static void unp_detach(struct unpcb *); +static int unp_bind(struct unpcb *,struct sockaddr *, struct proc *); +static int unp_connect(struct socket *,struct sockaddr *, struct proc *); +static void unp_disconnect(struct unpcb *); +static void unp_shutdown(struct unpcb *); +static void unp_drop(struct unpcb *, int); +static void unp_gc(void); +static void unp_scan(struct mbuf *, void (*)(struct fileglob *)); +static void unp_mark(struct fileglob *); +static void unp_discard(struct fileglob *); +static void unp_discard_fdlocked(struct fileglob *, struct proc *); +static int unp_internalize(struct mbuf *, struct proc *); +static int unp_listen(struct unpcb *, struct proc *); + static int uipc_abort(struct socket *so) @@ -117,6 +129,8 @@ uipc_abort(struct socket *so) if (unp == 0) return EINVAL; unp_drop(unp, ECONNABORTED); + unp_detach(unp); + sofree(so); return 0; } @@ -143,7 +157,7 @@ uipc_accept(struct socket *so, struct sockaddr **nam) } static int -uipc_attach(struct socket *so, int proto, struct proc *p) +uipc_attach(struct socket *so, __unused int proto, __unused struct proc *p) { struct unpcb *unp = sotounpcb(so); @@ -210,13 +224,13 @@ uipc_disconnect(struct socket *so) } static int -uipc_listen(struct socket *so, struct proc *p) +uipc_listen(struct socket *so, __unused struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0 || unp->unp_vnode == 0) return EINVAL; - return 0; + return unp_listen(unp, p); } static int @@ -233,7 +247,7 @@ uipc_peeraddr(struct socket *so, struct sockaddr **nam) } static int -uipc_rcvd(struct socket *so, int flags) +uipc_rcvd(struct socket *so, __unused int flags) { struct unpcb *unp = sotounpcb(so); struct socket *so2; @@ -316,18 +330,18 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; - if (sbappendaddr(&so2->so_rcv, from, m, control)) { + if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) { sorwakeup(so2); - m = 0; - control = 0; - } else - error = ENOBUFS; + } + m = 0; + control = 0; if (nam) unp_disconnect(unp); break; } - case SOCK_STREAM: + case SOCK_STREAM: { + int didreceive = 0; #define rcv (&so2->so_rcv) #define snd (&so->so_snd) /* Connect if not connected yet. */ @@ -358,20 +372,22 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ - if (control) { - if (sbappendcontrol(rcv, m, control)) - control = 0; - } else - sbappend(rcv, m); + if ((control && sbappendcontrol(rcv, m, control, NULL)) || + sbappend(rcv, m)) { + didreceive = 1; + } snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; unp->unp_conn->unp_cc = rcv->sb_cc; - sorwakeup(so2); + if (didreceive) + sorwakeup(so2); m = 0; + control = 0; #undef snd #undef rcv + } break; default: @@ -387,6 +403,9 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, unp_shutdown(unp); } + if (control && error != 0) + unp_dispose(control); + release: if (control) m_freem(control); @@ -444,8 +463,43 @@ struct pr_usrreqs uipc_usrreqs = { uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, - sosend, soreceive, sopoll + sosend, soreceive, pru_sopoll_notsupp }; + +int +uipc_ctloutput( + struct socket *so, + struct sockopt *sopt) +{ + struct unpcb *unp = sotounpcb(so); + int error; + + switch (sopt->sopt_dir) { + case SOPT_GET: + switch (sopt->sopt_name) { + case LOCAL_PEERCRED: + if (unp->unp_flags & UNP_HAVEPC) + error = sooptcopyout(sopt, &unp->unp_peercred, + sizeof(unp->unp_peercred)); + else { + if (so->so_type == SOCK_STREAM) + error = ENOTCONN; + else + error = EINVAL; + } + break; + default: + error = EOPNOTSUPP; + break; + } + break; + case SOPT_SET: + default: + error = EOPNOTSUPP; + break; + } + return (error); +} /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering @@ -479,11 +533,10 @@ SYSCTL_DECL(_net_local); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); static int -unp_attach(so) - struct socket *so; +unp_attach(struct socket *so) { - register struct unpcb *unp; - int error; + struct unpcb *unp; + int error = 0; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { @@ -506,20 +559,22 @@ unp_attach(so) if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof *unp); - unp->unp_gencnt = ++unp_gencnt; - unp_count++; + lck_mtx_lock(unp_mutex); LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + unp->unp_gencnt = ++unp_gencnt; + unp_count++; LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); so->so_pcb = (caddr_t)unp; + lck_mtx_unlock(unp_mutex); return (0); } static void -unp_detach(unp) - register struct unpcb *unp; +unp_detach(struct unpcb *unp) { + lck_mtx_assert(unp_mutex, LCK_MTX_ASSERT_OWNED); LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; @@ -527,15 +582,14 @@ unp_detach(unp) struct vnode *tvp = unp->unp_vnode; unp->unp_vnode->v_socket = 0; unp->unp_vnode = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vrele(tvp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + vnode_rele(tvp); /* drop the usecount */ } if (unp->unp_conn) unp_disconnect(unp); while (unp->unp_refs.lh_first) unp_drop(unp->unp_refs.lh_first, ECONNRESET); soisdisconnected(unp->unp_socket); + unp->unp_socket->so_flags |= SOF_PCBCLEARING; /* makes sure we're getting dealloced */ unp->unp_socket->so_pcb = 0; if (unp_rights) { /* @@ -550,22 +604,26 @@ unp_detach(unp) } if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); - zfree(unp_zone, (vm_offset_t)unp); + zfree(unp_zone, unp); } static int -unp_bind(unp, nam, p) - struct unpcb *unp; - struct sockaddr *nam; - struct proc *p; +unp_bind( + struct unpcb *unp, + struct sockaddr *nam, + struct proc *p) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; - register struct vnode *vp; - struct vattr vattr; + struct vnode *vp, *dvp; + struct vnode_attr va; + struct vfs_context context; int error, namelen; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? proxy */ + if (unp->unp_vnode != NULL) return (EINVAL); namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); @@ -573,81 +631,93 @@ unp_bind(unp, nam, p) return EINVAL; strncpy(buf, soun->sun_path, namelen); buf[namelen] = 0; /* null-terminate the string */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, - buf, p); + NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE32, + CAST_USER_ADDR_T(buf), &context); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } + dvp = nd.ni_dvp; vp = nd.ni_vp; + if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + /* + * need to do this before the vnode_put of dvp + * since we may have to release an fs_nodelock + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); + return (EADDRINUSE); } - VATTR_NULL(&vattr); - vattr.va_type = VSOCK; - vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); -#if 0 - /* In FreeBSD create leave s parent held ; not here */ - vput(nd.ni_dvp); -#endif + + /* authorize before creating */ + error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context); + + if (!error) { + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VSOCK); + VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask)); + + /* create the socket */ + error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, &context); + } + + nameidone(&nd); + vnode_put(dvp); + if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } - vp = nd.ni_vp; + vnode_ref(vp); /* gain a longterm reference */ vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); - VOP_UNLOCK(vp, 0, p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + vnode_put(vp); /* drop the iocount */ + return (0); } static int -unp_connect(so, nam, p) - struct socket *so; - struct sockaddr *nam; - struct proc *p; +unp_connect( + struct socket *so, + struct sockaddr *nam, + struct proc *p) { - register struct sockaddr_un *soun = (struct sockaddr_un *)nam; - register struct vnode *vp; - register struct socket *so2, *so3; - struct unpcb *unp2, *unp3; + struct sockaddr_un *soun = (struct sockaddr_un *)nam; + struct vnode *vp; + struct socket *so2, *so3; + struct unpcb *unp, *unp2, *unp3; + struct vfs_context context; int error, len; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? proxy */ + len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return EINVAL; strncpy(buf, soun->sun_path, len); buf[len] = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(buf), &context); error = namei(&nd); if (error) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (error); } + nameidone(&nd); vp = nd.ni_vp; if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } - error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); + + error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, &context); if (error) goto bad; so2 = vp->v_socket; @@ -655,11 +725,14 @@ unp_connect(so, nam, p) error = ECONNREFUSED; goto bad; } + + /* make sure the socket can't go away while we're connecting */ + so2->so_usecount++; + if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad; } - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); /* * Check if socket was connected while we were trying to @@ -668,40 +741,64 @@ unp_connect(so, nam, p) */ if ((so->so_state & SS_ISCONNECTED) != 0) { error = EISCONN; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto bad; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if ((so2->so_options & SO_ACCEPTCONN) == 0 || - (so3 = sonewconn(so2, 0)) == 0) { + (so3 = sonewconn(so2, 0, nam)) == 0) { error = ECONNREFUSED; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto bad; } + unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) dup_sockaddr((struct sockaddr *) unp2->unp_addr, 1); + + /* + * unp_peercred management: + * + * The connecter's (client's) credentials are copied + * from its process structure at the time of connect() + * (which is now). + */ + cru2x(p->p_ucred, &unp3->unp_peercred); + unp3->unp_flags |= UNP_HAVEPC; + /* + * The receiver's (server's) credentials are copied + * from the unp_peercred member of socket on which the + * former called listen(); unp_listen() cached that + * process's credentials at that time so we can use + * them now. + */ + KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, + ("unp_connect: listener without cached peercred")); + memcpy(&unp->unp_peercred, &unp2->unp_peercred, + sizeof(unp->unp_peercred)); + unp->unp_flags |= UNP_HAVEPC; + + so2->so_usecount--; /* drop reference taken on so2 */ so2 = so3; + so3->so_usecount++; /* make sure we keep it around */ } error = unp_connect2(so, so2); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); bad: - vput(vp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if (so2 != NULL) + so2->so_usecount--; /* release count on socket */ + vnode_put(vp); return (error); } int -unp_connect2(so, so2) - register struct socket *so; - register struct socket *so2; +unp_connect2( + struct socket *so, + struct socket *so2) { - register struct unpcb *unp = sotounpcb(so); - register struct unpcb *unp2; + struct unpcb *unp = sotounpcb(so); + struct unpcb *unp2; if (so2->so_type != so->so_type) return (EPROTOTYPE); @@ -720,6 +817,14 @@ unp_connect2(so, so2) break; case SOCK_STREAM: + /* This takes care of socketpair */ + if (!(unp->unp_flags & UNP_HAVEPC) && !(unp2->unp_flags & UNP_HAVEPC)) { + cru2x(kauth_cred_get(), &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPC; + + cru2x(kauth_cred_get(), &unp2->unp_peercred); + unp2->unp_flags |= UNP_HAVEPC; + } unp2->unp_conn = unp; soisconnected(so); soisconnected(so2); @@ -732,13 +837,13 @@ unp_connect2(so, so2) } static void -unp_disconnect(unp) - struct unpcb *unp; +unp_disconnect(struct unpcb *unp) { - register struct unpcb *unp2 = unp->unp_conn; + struct unpcb *unp2 = unp->unp_conn; if (unp2 == 0) return; + lck_mtx_assert(unp_mutex, LCK_MTX_ASSERT_OWNED); unp->unp_conn = 0; switch (unp->unp_socket->so_type) { @@ -757,8 +862,7 @@ unp_disconnect(unp) #ifdef notdef void -unp_abort(unp) - struct unpcb *unp; +unp_abort(struct unpcb *unp) { unp_detach(unp); @@ -774,21 +878,25 @@ unp_pcblist SYSCTL_HANDLER_ARGS struct xunpgen xug; struct unp_head *head; + lck_mtx_lock(unp_mutex); head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + if (req->oldptr == USER_ADDR_NULL) { n = unp_count; req->oldidx = 2 * (sizeof xug) + (n + n/8) * sizeof(struct xunpcb); + lck_mtx_unlock(unp_mutex); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_mtx_unlock(unp_mutex); return EPERM; + } /* * OK, now we're committed to doing something. @@ -801,18 +909,24 @@ unp_pcblist SYSCTL_HANDLER_ARGS xug.xug_gen = gencnt; xug.xug_sogen = so_gencnt; error = SYSCTL_OUT(req, &xug, sizeof xug); - if (error) + if (error) { + lck_mtx_unlock(unp_mutex); return error; + } /* * We are done if there is no pcb */ - if (n == 0) + if (n == 0) { + lck_mtx_unlock(unp_mutex); return 0; + } - unp_list = _MALLOC(n * sizeof *unp_list, M_TEMP, M_WAITOK); - if (unp_list == 0) + MALLOC(unp_list, struct unpcb **, n * sizeof *unp_list, M_TEMP, M_WAITOK); + if (unp_list == 0) { + lck_mtx_unlock(unp_mutex); return ENOMEM; + } for (unp = head->lh_first, i = 0; unp && i < n; unp = unp->unp_link.le_next) { @@ -827,7 +941,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS if (unp->unp_gencnt <= gencnt) { struct xunpcb xu; xu.xu_len = sizeof xu; - xu.xu_unpp = unp; + xu.xu_unpp = (struct unpcb_compat *)unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. @@ -839,7 +953,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS bcopy(unp->unp_conn->unp_addr, &xu.xu_caddr, unp->unp_conn->unp_addr->sun_len); - bcopy(unp, &xu.xu_unp, sizeof *unp); + bcopy(unp, &xu.xu_unp, sizeof(xu.xu_unp)); sotoxsocket(unp->unp_socket, &xu.xu_socket); error = SYSCTL_OUT(req, &xu, sizeof xu); } @@ -858,6 +972,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xug, sizeof xug); } FREE(unp_list, M_TEMP); + lck_mtx_unlock(unp_mutex); return error; } @@ -869,8 +984,7 @@ SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, "List of active local stream sockets"); static void -unp_shutdown(unp) - struct unpcb *unp; +unp_shutdown(struct unpcb *unp) { struct socket *so; @@ -880,24 +994,14 @@ unp_shutdown(unp) } static void -unp_drop(unp, errno) - struct unpcb *unp; - int errno; +unp_drop( + struct unpcb *unp, + int errno) { struct socket *so = unp->unp_socket; so->so_error = errno; unp_disconnect(unp); - if (so->so_head) { - LIST_REMOVE(unp, unp_link); - unp->unp_gencnt = ++unp_gencnt; - unp_count--; - so->so_pcb = (caddr_t) 0; - if (unp->unp_addr) - FREE(unp->unp_addr, M_SONAME); - zfree(unp_zone, (vm_offset_t)unp); - sofree(so); - } } #ifdef notdef @@ -909,31 +1013,30 @@ unp_drain() #endif int -unp_externalize(rights) - struct mbuf *rights; +unp_externalize(struct mbuf *rights) { struct proc *p = current_proc(); /* XXX */ - register int i; - register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); - register struct file **rp = (struct file **)(cm + 1); - register struct file *fp; + int i; + struct cmsghdr *cm = mtod(rights, struct cmsghdr *); + struct fileglob **rp = (struct fileglob **)(cm + 1); + struct fileproc *fp; + struct fileglob *fg; int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); int f; - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + proc_fdlock(p); /* * if the new FD's will not fit, then we free them all */ if (!fdavail(p, newfds)) { for (i = 0; i < newfds; i++) { - fp = *rp; - unp_discard(fp); + fg = *rp; + unp_discard_fdlocked(fg, p); *rp++ = 0; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (EMSGSIZE); } /* @@ -945,15 +1048,19 @@ unp_externalize(rights) for (i = 0; i < newfds; i++) { if (fdalloc(p, 0, &f)) panic("unp_externalize"); - fp = *rp; + fg = *rp; + MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + bzero(fp, sizeof(struct fileproc)); + fp->f_iocount = 0; + fp->f_fglob = fg; p->p_fd->fd_ofiles[f] = fp; + fg_removeuipc(fg); *fdflags(p, f) &= ~UF_RESERVED; - fp->f_msgcount--; unp_rights--; *(int *)rp++ = f; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (0); } @@ -967,6 +1074,8 @@ unp_init(void) panic("unp_init"); LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); + + unp_mutex = localdomain.dom_mtx; } #ifndef MIN @@ -974,42 +1083,42 @@ unp_init(void) #endif static int -unp_internalize(control, p) - struct mbuf *control; - struct proc *p; +unp_internalize( + struct mbuf *control, + struct proc *p) { - register struct cmsghdr *cm = mtod(control, struct cmsghdr *); - register struct file **rp; - struct file *fp; + struct cmsghdr *cm = mtod(control, struct cmsghdr *); + struct fileglob **rp; + struct fileproc *fp; register int i, error; int oldfds; + int fdgetf_noref(proc_t, struct fileglob **, struct fileproc **); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (EINVAL); } - oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); - rp = (struct file **)(cm + 1); - for (i = 0; i < oldfds; i++) - if (error = fdgetf(p, *(int *)rp++, 0)) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return (error); + proc_fdlock(p); + rp = (struct fileglob **)(cm + 1); + + for (i = 0; i < oldfds; i++) { + if (error = fdgetf_noref(p, *(int *)rp++, (struct fileglob **)0)) { + proc_fdunlock(p); + return (error); } + } + rp = (struct fileglob **)(cm + 1); - rp = (struct file **)(cm + 1); for (i = 0; i < oldfds; i++) { - (void) fdgetf(p, *(int *)rp, &fp); - *rp++ = fp; - fref(fp); - fp->f_msgcount++; + (void) fdgetf_noref(p, *(int *)rp, &fp); + fg_insertuipc(fp->f_fglob); + *rp++ = fp->f_fglob; unp_rights++; } + proc_fdunlock(p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return (0); } @@ -1018,68 +1127,85 @@ static int unp_defer, unp_gcing; static void unp_gc() { - register struct file *fp, *nextfp; + register struct fileglob *fg, *nextfg; register struct socket *so; - struct file **extra_ref, **fpp; + struct fileglob **extra_ref, **fpp; int nunref, i; - if (unp_gcing) + lck_mtx_lock(uipc_lock); + if (unp_gcing) { + lck_mtx_unlock(uipc_lock); return; + } unp_gcing = 1; unp_defer = 0; + lck_mtx_unlock(uipc_lock); /* * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) - fp->f_flag &= ~(FMARK|FDEFER); + for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) { + lck_mtx_lock(&fg->fg_lock); + fg->fg_flag &= ~(FMARK|FDEFER); + lck_mtx_unlock(&fg->fg_lock); + } do { - for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { + for (fg = fmsghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) { + lck_mtx_lock(&fg->fg_lock); /* * If the file is not open, skip it */ - if (fcount(fp) == 0) + if (fg->fg_count == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ - if (fp->f_flag & FDEFER) { - fp->f_flag &= ~FDEFER; + if (fg->fg_flag & FDEFER) { + fg->fg_flag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ - if (fp->f_flag & FMARK) + if (fg->fg_flag & FMARK){ + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ - if (fcount(fp) == fp->f_msgcount) + if (fg->fg_count == fg->fg_msgcount) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If it got this far then it must be * externally accessible. */ - fp->f_flag |= FMARK; + fg->fg_flag |= FMARK; } /* * either it was defered, or it is externally * accessible and not already marked so. * Now check if it is possibly one of OUR sockets. */ - if (fp->f_type != DTYPE_SOCKET || - (so = (struct socket *)fp->f_data) == 0) + if (fg->fg_type != DTYPE_SOCKET || + (so = (struct socket *)fg->fg_data) == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } if (so->so_proto->pr_domain != &localdomain || - (so->so_proto->pr_flags&PR_RIGHTS) == 0) + (so->so_proto->pr_flags&PR_RIGHTS) == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } #ifdef notdef /* if this code is enabled need to run under network funnel */ if (so->so_rcv.sb_flags & SB_LOCK) { @@ -1105,6 +1231,7 @@ unp_gc() * as accessible too. */ unp_scan(so->so_rcv.sb_mb, unp_mark); + lck_mtx_unlock(&fg->fg_lock); } } while (unp_defer); /* @@ -1146,83 +1273,94 @@ unp_gc() * * 91/09/19, bsy@cs.cmu.edu */ - extra_ref = _MALLOC(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); - for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; - fp = nextfp) { - nextfp = fp->f_list.le_next; + extra_ref = _MALLOC(nfiles * sizeof(struct fileglob *), M_FILEGLOB, M_WAITOK); + for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0; + fg = nextfg) { + lck_mtx_lock(&fg->fg_lock); + + nextfg = fg->f_msglist.le_next; /* * If it's not open, skip it */ - if (fcount(fp) == 0) + if (fg->fg_count == 0) { + lck_mtx_unlock(&fg->fg_lock); continue; + } /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ - if (fcount(fp) == fp->f_msgcount && !(fp->f_flag & FMARK)) { - *fpp++ = fp; + if (fg->fg_count == fg->fg_msgcount && !(fg->fg_flag & FMARK)) { + fg->fg_count++; + *fpp++ = fg; nunref++; - fref(fp); } + lck_mtx_unlock(&fg->fg_lock); } /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { - struct file *tfp = *fpp; - if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - sorflush((struct socket *)(tfp->f_data)); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - } - } + struct fileglob *tfg; + tfg = *fpp; + if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) { + sorflush((struct socket *)(tfg->fg_data)); + } + } for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) - closef(*fpp, (struct proc *) NULL); - FREE((caddr_t)extra_ref, M_FILE); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - + closef_locked((struct fileproc *)0, *fpp, (struct proc *) NULL); unp_gcing = 0; + FREE((caddr_t)extra_ref, M_FILEGLOB); + } void -unp_dispose(m) - struct mbuf *m; +unp_dispose(struct mbuf *m) { if (m) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); unp_scan(m, unp_discard); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); } } -/* should run under kernel funnel */ +static int +unp_listen( + struct unpcb *unp, + struct proc *p) +{ + + cru2x(p->p_ucred, &unp->unp_peercred); + unp->unp_flags |= UNP_HAVEPCCACHED; + return (0); +} + +/* should run under kernel funnel */ static void -unp_scan(m0, op) - register struct mbuf *m0; - void (*op) __P((struct file *)); +unp_scan( + struct mbuf *m0, + void (*op)(struct fileglob *)) { - register struct mbuf *m; - register struct file **rp; - register struct cmsghdr *cm; - register int i; + struct mbuf *m; + struct fileglob **rp; + struct cmsghdr *cm; + int i; int qfds; while (m0) { for (m = m0; m; m = m->m_next) if (m->m_type == MT_CONTROL && - m->m_len >= sizeof(*cm)) { + (size_t) m->m_len >= sizeof(*cm)) { cm = mtod(m, struct cmsghdr *); if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) continue; qfds = (cm->cmsg_len - sizeof *cm) - / sizeof (struct file *); - rp = (struct file **)(cm + 1); + / sizeof (struct fileglob *); + rp = (struct fileglob **)(cm + 1); for (i = 0; i < qfds; i++) (*op)(*rp++); break; /* XXX, but saves time */ @@ -1233,23 +1371,40 @@ unp_scan(m0, op) /* should run under kernel funnel */ static void -unp_mark(fp) - struct file *fp; +unp_mark(struct fileglob *fg) { + lck_mtx_lock(&fg->fg_lock); - if (fp->f_flag & FMARK) + if (fg->fg_flag & FMARK) { + lck_mtx_unlock(&fg->fg_lock); return; + } + fg->fg_flag |= (FMARK|FDEFER); + + lck_mtx_unlock(&fg->fg_lock); + unp_defer++; - fp->f_flag |= (FMARK|FDEFER); } /* should run under kernel funnel */ static void -unp_discard(fp) - struct file *fp; +unp_discard(fg) + struct fileglob *fg; { + struct proc *p = current_proc(); /* XXX */ + + proc_fdlock(p); + unp_discard_fdlocked(fg, p); + proc_fdunlock(p); +} +static void +unp_discard_fdlocked(fg, p) + struct fileglob *fg; + struct proc *p; +{ + + fg_removeuipc(fg); - fp->f_msgcount--; unp_rights--; - (void) closef(fp, (struct proc *)NULL); + (void) closef_locked((struct fileproc *)0, fg, p); } diff --git a/bsd/libkern/Makefile b/bsd/libkern/Makefile index 32aaccb2e..825806a41 100644 --- a/bsd/libkern/Makefile +++ b/bsd/libkern/Makefile @@ -11,9 +11,6 @@ include $(MakeInc_def) DATAFILES = \ libkern.h -INSTALL_MI_LIST = ${DATAFILES} - -INSTALL_MI_DIR = libkern EXPORT_MI_LIST = ${DATAFILES} diff --git a/bsd/libkern/crc32.c b/bsd/libkern/crc32.c new file mode 100644 index 000000000..d8f5e345d --- /dev/null +++ b/bsd/libkern/crc32.c @@ -0,0 +1,104 @@ +/*- + * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or + * code or tables extracted from it, as desired without restriction. + * + * First, the polynomial itself and its table of feedback terms. The + * polynomial is + * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 + * + * Note that we take it "backwards" and put the highest-order term in + * the lowest-order bit. The X^32 term is "implied"; the LSB is the + * X^31 term, etc. The X^0 term (usually shown as "+1") results in + * the MSB being 1 + * + * Note that the usual hardware shift register implementation, which + * is what we're using (we're merely optimizing it by doing eight-bit + * chunks at a time) shifts bits into the lowest-order term. In our + * implementation, that means shifting towards the right. Why do we + * do it this way? Because the calculated CRC must be transmitted in + * order from highest-order term to lowest-order term. UARTs transmit + * characters in order from LSB to MSB. By storing the CRC this way + * we hand it to the UART in the order low-byte to high-byte; the UART + * sends each low-bit to hight-bit; and the result is transmission bit + * by bit from highest- to lowest-order term without requiring any bit + * shuffling on our part. Reception works similarly + * + * The feedback terms table consists of 256, 32-bit entries. Notes + * + * The table can be generated at runtime if desired; code to do so + * is shown later. It might not be obvious, but the feedback + * terms simply represent the results of eight shift/xor opera + * tions for all combinations of data and CRC register values + * + * The values must be right-shifted by eight bits by the "updcrc + * logic; the shift must be unsigned (bring in zeroes). On some + * hardware you could probably optimize the shift in assembler by + * using byte-swap instructions + * polynomial $edb88320 + * + * + * CRC32 code derived from work by Gary S. Brown. + */ + +#include +#include + +static uint32_t crc32_tab[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +uint32_t +crc32(uint32_t crc, const void *buf, size_t size) +{ + const uint8_t *p; + + p = buf; + crc = crc ^ ~0U; + + while (size--) + crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); + + return crc ^ ~0U; +} diff --git a/bsd/libkern/inet_ntoa.c b/bsd/libkern/inet_ntoa.c deleted file mode 100644 index 0925e8a3c..000000000 --- a/bsd/libkern/inet_ntoa.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -/* - * Copyright 1994, 1995 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include - -#include - -char * -inet_ntoa(struct in_addr ina) -{ - static char buf[4*sizeof "123"]; - unsigned char *ucp = (unsigned char *)&ina; - - sprintf(buf, "%d.%d.%d.%d", - ucp[0] & 0xff, - ucp[1] & 0xff, - ucp[2] & 0xff, - ucp[3] & 0xff); - return buf; -} - diff --git a/bsd/libkern/inet_ntop.c b/bsd/libkern/inet_ntop.c new file mode 100644 index 000000000..03d64504a --- /dev/null +++ b/bsd/libkern/inet_ntop.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Copyright 1994, 1995 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include + +static const char *hexchars = "0123456789abcdef"; + +static const char * +inet_ntop4(const struct in_addr *addr, char *buf, size_t len) +{ + const u_int8_t *ap = (const u_int8_t *)&addr->s_addr; + char tmp[MAX_IPv4_STR_LEN]; /* max length of ipv4 addr string */ + int fulllen; + + /* + * snprintf returns number of bytes printed (not including NULL) or + * number of bytes that would have been printed if more than would + * fit + */ + fulllen = snprintf(tmp, sizeof(tmp), "%d.%d.%d.%d", + ap[0], ap[1], ap[2], ap[3]); + if (fulllen >= (int)len) { + return NULL; + } + + bcopy(tmp, buf, fulllen + 1); + + return buf; +} + +static const char * +inet_ntop6(const struct in6_addr *addr, char *dst, size_t size) +{ + char hexa[8][5], tmp[MAX_IPv6_STR_LEN]; + int zr[8]; + size_t len; + int32_t i, j, k, skip; + uint8_t x8, hx8; + uint16_t x16; + struct in_addr a4; + + if (addr == NULL) return NULL; + + bzero(tmp, sizeof(tmp)); + + /* check for mapped or compat addresses */ + i = IN6_IS_ADDR_V4MAPPED(addr); + j = IN6_IS_ADDR_V4COMPAT(addr); + if ((i != 0) || (j != 0)) + { + char tmp2[16]; /* max length of ipv4 addr string */ + a4.s_addr = addr->__u6_addr.__u6_addr32[3]; + len = snprintf(tmp, sizeof(tmp), "::%s%s", (i != 0) ? "ffff:" : "", + inet_ntop4(&a4, tmp2, sizeof(tmp2))); + if (len >= size) return NULL; + bcopy(tmp, dst, len + 1); + return dst; + } + + k = 0; + for (i = 0; i < 16; i += 2) + { + j = 0; + skip = 1; + + bzero(hexa[k], 5); + + x8 = addr->__u6_addr.__u6_addr8[i]; + + hx8 = x8 >> 4; + if (hx8 != 0) + { + skip = 0; + hexa[k][j++] = hexchars[hx8]; + } + + hx8 = x8 & 0x0f; + if ((skip == 0) || ((skip == 1) && (hx8 != 0))) + { + skip = 0; + hexa[k][j++] = hexchars[hx8]; + } + + x8 = addr->__u6_addr.__u6_addr8[i + 1]; + + hx8 = x8 >> 4; + if ((skip == 0) || ((skip == 1) && (hx8 != 0))) + { + hexa[k][j++] = hexchars[hx8]; + } + + hx8 = x8 & 0x0f; + hexa[k][j++] = hexchars[hx8]; + + k++; + } + + /* find runs of zeros for :: convention */ + j = 0; + for (i = 7; i >= 0; i--) + { + zr[i] = j; + x16 = addr->__u6_addr.__u6_addr16[i]; + if (x16 == 0) j++; + else j = 0; + zr[i] = j; + } + + /* find longest run of zeros */ + k = -1; + j = 0; + for(i = 0; i < 8; i++) + { + if (zr[i] > j) + { + k = i; + j = zr[i]; + } + } + + for(i = 0; i < 8; i++) + { + if (i != k) zr[i] = 0; + } + + len = 0; + for (i = 0; i < 8; i++) + { + if (zr[i] != 0) + { + /* check for leading zero */ + if (i == 0) tmp[len++] = ':'; + tmp[len++] = ':'; + i += (zr[i] - 1); + continue; + } + for (j = 0; hexa[i][j] != '\0'; j++) tmp[len++] = hexa[i][j]; + if (i != 7) tmp[len++] = ':'; + } + + /* trailing NULL */ + len++; + + if (len > size) return NULL; + bcopy(tmp, dst, len); + return dst; +} + +const char * +inet_ntop(int af, const void *addr, char *buf, size_t len) +{ + if(af==AF_INET6) + return inet_ntop6(addr, buf, len); + if(af==AF_INET) + return inet_ntop4(addr, buf, len); + return NULL; +} diff --git a/bsd/libkern/libkern.h b/bsd/libkern/libkern.h index 16c005525..6eee3e08c 100644 --- a/bsd/libkern/libkern.h +++ b/bsd/libkern/libkern.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,8 +58,11 @@ #define _LIBKERN_LIBKERN_H_ #include +#include +#include #include #include +#include #ifdef __APPLE_API_OBSOLETE /* BCD conversions. */ @@ -119,18 +122,39 @@ ulmin(u_long a, u_long b) } /* Prototypes for non-quad routines. */ -int bcmp __P((const void *, const void *, size_t)); -int ffs __P((int)); -int locc __P((int, char *, u_int)); -u_long random __P((void)); -char *rindex __P((const char *, int)); -int scanc __P((u_int, u_char *, u_char *, int)); -int skpc __P((int, int, char *)); -char *strcat __P((char *, const char *)); -char *strcpy __P((char *, const char *)); -size_t strlen __P((const char *)); -char *strncpy __P((char *, const char *, size_t)); -long strtol __P((const char*, char **, int)); +extern int ffs(int); +extern int locc(int, char *, u_int); +extern u_long random(void); +extern char *rindex(const char *, int); +extern int scanc(u_int, u_char *, const u_char *, int); +extern int skpc(int, int, char *); +extern long strtol(const char*, char **, int); +extern u_long strtoul(const char *, char **, int); +extern quad_t strtoq(const char *, char **, int); +extern u_quad_t strtouq(const char *, char **, int); + +int snprintf(char *, size_t, const char *, ...); +int sprintf(char *bufp, const char *, ...); +int sscanf(const char *, char const *, ...); +void printf(const char *, ...); + +uint32_t crc32(uint32_t crc, const void *bufp, size_t len); + +int copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done); +int copyinstr(const user_addr_t uaddr, void *kaddr, size_t len, size_t *done); +int copyoutstr(const void *kaddr, user_addr_t udaddr, size_t len, size_t *done); +int copyin(const user_addr_t uaddr, void *kaddr, size_t len); +int copyout(const void *kaddr, user_addr_t udaddr, size_t len); + +int vsscanf(const char *, char const *, __darwin_va_list); +extern int vsnprintf(char *, size_t, const char *, __darwin_va_list); +extern int vsprintf(char *bufp, const char *, __darwin_va_list); + +extern void invalidate_icache(vm_offset_t, unsigned, int); +extern void flush_dcache(vm_offset_t, unsigned, int); +extern void invalidate_icache64(addr64_t, unsigned, int); +extern void flush_dcache64(addr64_t, unsigned, int); + __END_DECLS #endif /* _LIBKERN_LIBKERN_H_ */ diff --git a/bsd/libkern/scanc.c b/bsd/libkern/scanc.c index 5be4e6f2f..8eb93c384 100644 --- a/bsd/libkern/scanc.c +++ b/bsd/libkern/scanc.c @@ -57,10 +57,7 @@ #include int -scanc(size, cp, table, mask0) - u_int size; - register u_char *cp, table[]; - int mask0; +scanc(u_int size, u_char *cp, const u_char table[], int mask0) { register u_char *end; register u_char mask; diff --git a/bsd/machine/Makefile b/bsd/machine/Makefile index 86558ef33..0304d6d01 100644 --- a/bsd/machine/Makefile +++ b/bsd/machine/Makefile @@ -9,17 +9,23 @@ include $(MakeInc_def) DATAFILES = \ - ansi.h byte_order.h cons.h cpu.h disklabel.h endian.h exec.h \ - label_t.h param.h proc.h profile.h psl.h ptrace.h reboot.h \ - reg.h setjmp.h signal.h spl.h table.h trap.h types.h unix_traps.h \ - ucontext.h user.h vmparam.h + byte_order.h endian.h \ + param.h profile.h \ + setjmp.h signal.h types.h\ + ucontext.h vmparam.h _types.h _limits.h +KERNELFILES = \ + byte_order.h endian.h \ + param.h profile.h \ + signal.h spl.h types.h \ + vmparam.h _types.h _limits.h INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_LCL_LIST = ${DATAFILES} disklabel.h INSTALL_MI_DIR = machine -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = machine diff --git a/bsd/machine/unix_traps.h b/bsd/machine/_limits.h similarity index 78% rename from bsd/machine/unix_traps.h rename to bsd/machine/_limits.h index fc94186c8..a5be9109a 100644 --- a/bsd/machine/unix_traps.h +++ b/bsd/machine/_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,15 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _BSD_MACHINE_UNIX_TRAPS_H_ -#define _BSD_MACHINE_UNIX_TRAPS_H_ +#ifndef _BSD_MACHINE__LIMITS_H_ +#define _BSD_MACHINE__LIMITS_H_ - -#if defined (__ppc__) -#include "ppc/unix_traps.h" +#if defined (__ppc__) || defined (__ppc64__) +#include "ppc/_limits.h" #elif defined (__i386__) -#include "i386/unix_traps.h" +#include "i386/_limits.h" #else #error architecture not supported #endif - -#endif /* _BSD_MACHINE_UNIX_TRAPS_H_ */ +#endif /* _BSD_MACHINE__LIMITS_H_ */ diff --git a/bsd/machine/table.h b/bsd/machine/_types.h similarity index 78% rename from bsd/machine/table.h rename to bsd/machine/_types.h index e71d1101c..8e6333b6f 100644 --- a/bsd/machine/table.h +++ b/bsd/machine/_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,15 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _BSD_MACHINE_TABLE_H_ -#define _BSD_MACHINE_TABLE_H_ +#ifndef _BSD_MACHINE__TYPES_H_ +#define _BSD_MACHINE__TYPES_H_ - -#if defined (__ppc__) -#include "ppc/table.h" +#if defined (__ppc__) || defined (__ppc64__) +#include "ppc/_types.h" #elif defined (__i386__) -#include "i386/table.h" +#include "i386/_types.h" #else #error architecture not supported #endif - -#endif /* _BSD_MACHINE_TABLE_H_ */ +#endif /* _BSD_MACHINE__TYPES_H_ */ diff --git a/bsd/machine/cons.h b/bsd/machine/cons.h index c68a4af46..6d4b3d7cc 100644 --- a/bsd/machine/cons.h +++ b/bsd/machine/cons.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_CONS_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include #elif defined (__i386__) #include diff --git a/bsd/machine/disklabel.h b/bsd/machine/disklabel.h index fed73e500..8d1402213 100644 --- a/bsd/machine/disklabel.h +++ b/bsd/machine/disklabel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_CPU_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/disklabel.h" #elif defined (__i386__) #include "i386/disklabel.h" diff --git a/bsd/machine/endian.h b/bsd/machine/endian.h index 4aa1ad8f4..a6f870e5b 100644 --- a/bsd/machine/endian.h +++ b/bsd/machine/endian.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ #define _BSD_MACHINE_ENDIAN_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined(__ppc64__) #include "ppc/endian.h" #elif defined (__i386__) #include "i386/endian.h" diff --git a/bsd/machine/exec.h b/bsd/machine/exec.h index 979093289..cb3306c73 100644 --- a/bsd/machine/exec.h +++ b/bsd/machine/exec.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ #define _BSD_MACHINE_EXEC_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/exec.h" #elif defined (__i386__) #include "i386/exec.h" diff --git a/bsd/machine/param.h b/bsd/machine/param.h index 4ee6af0fd..ab305ba73 100644 --- a/bsd/machine/param.h +++ b/bsd/machine/param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ #define _BSD_MACHINE_PARAM_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/param.h" #elif defined (__i386__) #include "i386/param.h" diff --git a/bsd/machine/profile.h b/bsd/machine/profile.h index 94c316745..847570beb 100644 --- a/bsd/machine/profile.h +++ b/bsd/machine/profile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,7 +30,7 @@ #define _BSD_MACHINE_PROFILE_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/profile.h" #elif defined (__i386__) #include "i386/profile.h" diff --git a/bsd/machine/psl.h b/bsd/machine/psl.h index e9763e864..06c76e528 100644 --- a/bsd/machine/psl.h +++ b/bsd/machine/psl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_PSL_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/psl.h" #elif defined (__i386__) #include "i386/psl.h" diff --git a/bsd/machine/ptrace.h b/bsd/machine/ptrace.h index f2abe2b6a..8d14243a6 100644 --- a/bsd/machine/ptrace.h +++ b/bsd/machine/ptrace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ #define _BSD_MACHINE_PTRACE_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined(__ppc64__) #include "ppc/ptrace.h" #elif defined (__i386__) #include "i386/ptrace.h" diff --git a/bsd/machine/reboot.h b/bsd/machine/reboot.h index 368fa0aad..7d0af116c 100644 --- a/bsd/machine/reboot.h +++ b/bsd/machine/reboot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_REBOOT_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/reboot.h" #elif defined (__i386__) #include "i386/reboot.h" diff --git a/bsd/machine/reg.h b/bsd/machine/reg.h index add5145e2..7e18c5b53 100644 --- a/bsd/machine/reg.h +++ b/bsd/machine/reg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_REG_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/reg.h" #elif defined (__i386__) #include "i386/reg.h" diff --git a/bsd/machine/setjmp.h b/bsd/machine/setjmp.h index c39a8ea77..c4bbf5dec 100644 --- a/bsd/machine/setjmp.h +++ b/bsd/machine/setjmp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,17 +20,12 @@ * @APPLE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * - */ #ifndef _MACHINE_SETJMP_H_ #define _MACHINE_SETJMP_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/setjmp.h" #elif defined (__i386__) #include "i386/setjmp.h" diff --git a/bsd/machine/signal.h b/bsd/machine/signal.h index b7c7300f1..6c926665e 100644 --- a/bsd/machine/signal.h +++ b/bsd/machine/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_SIGNAL_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/signal.h" #elif defined (__i386__) #include "i386/signal.h" diff --git a/bsd/machine/spl.h b/bsd/machine/spl.h index 89d75fad5..36ab465e9 100644 --- a/bsd/machine/spl.h +++ b/bsd/machine/spl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,14 +22,35 @@ #ifndef _BSD_MACHINE_SPL_H_ #define _BSD_MACHINE_SPL_H_ +#ifdef KERNEL +#ifndef __ASSEMBLER__ +/* + * Machine-dependent SPL definitions. + * + */ +typedef unsigned spl_t; + +extern unsigned int sploff(void); +extern unsigned int splhigh(void); +extern unsigned int splsched(void); +extern unsigned int splclock(void); +extern unsigned int splpower(void); +extern unsigned int splvm(void); +extern unsigned int splbio(void); +extern unsigned int splimp(void); +extern unsigned int spltty(void); +extern unsigned int splnet(void); +extern unsigned int splsoftclock(void); + +extern void spllo(void); +extern void splon(unsigned int level); +extern void splx(unsigned int level); +extern void spln(unsigned int level); +#define splstatclock() splhigh() + +#endif /* __ASSEMBLER__ */ -#if defined (__ppc__) -#include "ppc/spl.h" -#elif defined (__i386__) -#include "i386/spl.h" -#else -#error architecture not supported -#endif +#endif /* KERNEL */ #endif /* _BSD_MACHINE_SPL_H_ */ diff --git a/bsd/machine/types.h b/bsd/machine/types.h index f5ade7c2f..12053c52f 100644 --- a/bsd/machine/types.h +++ b/bsd/machine/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ #define _BSD_MACHINE_TYPES_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/types.h" #elif defined (__i386__) #include "i386/types.h" diff --git a/bsd/machine/ucontext.h b/bsd/machine/ucontext.h index 56cf8137d..fa9635508 100644 --- a/bsd/machine/ucontext.h +++ b/bsd/machine/ucontext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,7 +22,7 @@ #ifndef _MACHINE_UCONTEXT_H_ #define _MACHINE_UCONTEXT_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/ucontext.h" #elif defined (__i386__) #include "i386/ucontext.h" diff --git a/bsd/machine/user.h b/bsd/machine/user.h deleted file mode 100644 index 4aaf1bbf0..000000000 --- a/bsd/machine/user.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _BSD_MACHINE_USER_H_ -#define _BSD_MACHINE_USER_H_ - - -#if defined (__ppc__) -#include "ppc/user.h" -#elif defined (__i386__) -#include "i386/user.h" -#else -#error architecture not supported -#endif - - -#endif /* _BSD_MACHINE_USER_H_ */ diff --git a/bsd/machine/vmparam.h b/bsd/machine/vmparam.h index d1375d280..ab232e8bb 100644 --- a/bsd/machine/vmparam.h +++ b/bsd/machine/vmparam.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,7 @@ #define _BSD_MACHINE_VMPARAM_H_ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "ppc/vmparam.h" #elif defined (__i386__) #include "i386/vmparam.h" diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile index 986739b08..3473b0190 100644 --- a/bsd/man/man2/Makefile +++ b/bsd/man/man2/Makefile @@ -9,12 +9,17 @@ include $(MakeInc_def) DATAFILES = \ __syscall.2 \ _exit.2 \ + aio_cancel.2 \ + aio_error.2 \ + aio_read.2 \ + aio_return.2 \ + aio_suspend.2 \ + aio_write.2 \ accept.2 \ access.2 \ acct.2 \ adjtime.2 \ bind.2 \ - brk.2 \ chdir.2 \ chflags.2 \ chmod.2 \ @@ -25,6 +30,7 @@ DATAFILES = \ dup.2 \ dup2.2 \ execve.2 \ + exchangedata.2 \ fchdir.2 \ fchflags.2 \ fchmod.2 \ @@ -35,11 +41,12 @@ DATAFILES = \ fpathconf.2 \ fstat.2 \ fstatfs.2 \ - fsctl.2 \ fsync.2 \ ftruncate.2 \ futimes.2 \ + getattrlist.2 \ getdirentries.2 \ + getdirentriesattr.2 \ getegid.2 \ geteuid.2 \ getfh.2 \ @@ -61,6 +68,7 @@ DATAFILES = \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ + getxattr.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -70,6 +78,7 @@ DATAFILES = \ lchown.2 \ link.2 \ listen.2 \ + listxattr.2 \ lseek.2 \ lstat.2 \ madvise.2 \ @@ -89,6 +98,7 @@ DATAFILES = \ open.2 \ pathconf.2 \ pipe.2 \ + poll.2 \ posix_madvise.2 \ pread.2 \ profil.2 \ @@ -103,9 +113,10 @@ DATAFILES = \ recvfrom.2 \ recvmsg.2 \ rename.2 \ + removexattr.2 \ revoke.2 \ rmdir.2 \ - sbrk.2 \ + searchfs.2 \ select.2 \ semctl.2 \ semget.2 \ @@ -113,6 +124,7 @@ DATAFILES = \ send.2 \ sendmsg.2 \ sendto.2 \ + setattrlist.2 \ setegid.2 \ seteuid.2 \ setgid.2 \ @@ -127,6 +139,7 @@ DATAFILES = \ setsockopt.2 \ settimeofday.2 \ setuid.2 \ + setxattr.2 \ shmat.2 \ shmctl.2 \ shmdt.2 \ @@ -159,7 +172,12 @@ DATAFILES = \ write.2 \ writev.2 \ +# List of source/target hard link pairs for installed manual pages; source +# names may be repeated +MLINKS= kqueue.2 kevent.2 + INSTALL_MAN_LIST = ${DATAFILES} +INSTALL_MAN_LINKS = ${MLINKS} INSTALL_MAN_DIR = man2 diff --git a/bsd/man/man2/accept.2 b/bsd/man/man2/accept.2 index f62dbc3ba..3b5ec370f 100644 --- a/bsd/man/man2/accept.2 +++ b/bsd/man/man2/accept.2 @@ -43,7 +43,7 @@ .Fd #include .Fd #include .Ft int -.Fn accept "int s" "struct sockaddr *addr" "int *addrlen" +.Fn accept "int s" "struct sockaddr *addr" "socklen_t *addrlen" .Sh DESCRIPTION The argument .Fa s diff --git a/bsd/man/man2/aio_cancel.2 b/bsd/man/man2/aio_cancel.2 new file mode 100644 index 000000000..a5f1392c6 --- /dev/null +++ b/bsd/man/man2/aio_cancel.2 @@ -0,0 +1,117 @@ +.\" Copyright (c) 1999 Softweyr LLC. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Softweyr LLC AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Softweyr LLC OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_cancel.2,v 1.22 2003/01/13 10:37:11 tjr Exp $ +.\" +.Dd January 19, 2000 +.Dt AIO_CANCEL 2 +.Os +.Sh NAME +.Nm aio_cancel +.Nd cancel an outstanding asynchronous I/O operation (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_cancel "int fildes" "struct aiocb * iocb" +.Sh DESCRIPTION +The +.Fn aio_cancel +system call cancels the outstanding asynchronous +I/O request for the file descriptor specified in +.Fa fildes . +If +.Fa iocb +is specified, only that specific asynchronous I/O request is cancelled. +.Pp +Normal asynchronous notification occurs for cancelled requests. +Requests complete with an error result of +.Er ECANCELED . +.Sh RESTRICTIONS +The +.Fn aio_cancel +system call does not cancel asynchronous I/O requests for raw disk devices. +The +.Fn aio_cancel +system call will always return +.Dv AIO_NOTCANCELED +for file descriptors associated with raw disk devices. +.Sh RETURN VALUES +The +.Fn aio_cancel +system call returns -1 to indicate an error, or one of the following: +.Bl -tag -width Dv +.It Bq Dv AIO_CANCELED +All outstanding requests meeting the criteria specified were cancelled. +.It Bq Dv AIO_NOTCANCELED +Some requests were not cancelled, status for the requests should be +checked with +.Xr aio_error 2 . +.It Bq Dv AIO_ALLDONE +All of the requests meeting the criteria have finished. +.El +.Sh ERRORS +An error return from +.Fn aio_cancel +indicates: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fildes +argument +is an invalid file descriptor. +.El +.Sh SEE ALSO +.Xr aio_error 2 , +.Xr aio_read 2 , +.Xr aio_return 2 , +.Xr aio_suspend 2 , +.Xr aio_write 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_cancel +system call is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_cancel +system call first appeared in +.Fx 3.0 . +The first functional implementation of +.Fn aio_cancel +appeared in +.Fx 4.0 . +.Sh AUTHORS +.An -nosplit +This +manual page was originally written by +.An Wes Peters Aq wes@softweyr.com . +.An Christopher M Sedore Aq cmsedore@maxwell.syr.edu +updated it when +.Fn aio_cancel +was implemented for +.Fx 4.0 . diff --git a/bsd/man/man2/aio_error.2 b/bsd/man/man2/aio_error.2 new file mode 100644 index 000000000..8c13ca3f5 --- /dev/null +++ b/bsd/man/man2/aio_error.2 @@ -0,0 +1,100 @@ +.\" Copyright (c) 1999 Softweyr LLC. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Softweyr LLC AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Softweyr LLC OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_error.2,v 1.18 2003/01/13 10:37:11 tjr Exp $ +.\" +.Dd June 2, 1999 +.Dt AIO_ERROR 2 +.Os +.Sh NAME +.Nm aio_error +.Nd retrieve error status of asynchronous I/O operation (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_error "const struct aiocb *iocb" +.Sh DESCRIPTION +The +.Fn aio_error +system call returns the error status of the asynchronous I/O request +associated with the structure pointed to by +.Fa iocb . +.Sh RETURN VALUES +If the asynchronous I/O request has completed successfully, +.Fn aio_error +returns 0. If the request has not yet completed, +.Er EINPROGRESS +is returned. If the request has completed unsuccessfully the error +status is returned as described in +.Xr read 2 , +.Xr write 2 , +or +.Xr fsync 2 +is returned. +On failure, +.Fn aio_error +returns +.Dv -1 +and sets +.Dv errno +to indicate the error condition. +.Sh ERRORS +The +.Fn aio_error +system call will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa iocb +argument +does not reference an outstanding asynchronous I/O request. +.El +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_read 2 , +.Xr aio_return 2 , +.Xr aio_suspend 2 , +.Xr aio_write 2 , +.Xr fsync 2 , +.Xr read 2 , +.Xr write 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_error +system call +is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_error +system call first appeared in +.Fx 3.0 . +.Sh AUTHORS +This +manual page was written by +.An Wes Peters Aq wes@softweyr.com . diff --git a/bsd/man/man2/aio_read.2 b/bsd/man/man2/aio_read.2 new file mode 100644 index 000000000..e0ef5a537 --- /dev/null +++ b/bsd/man/man2/aio_read.2 @@ -0,0 +1,211 @@ +.\" Copyright (c) 1998 Terry Lambert +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_read.2,v 1.19 2003/01/14 02:37:06 tjr Exp $ +.\" +.Dd November 17, 1998 +.Dt AIO_READ 2 +.Os +.Sh NAME +.Nm aio_read +.Nd asynchronous read from a file (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_read "struct aiocb *iocb" +.Sh DESCRIPTION +The +.Fn aio_read +system call allows the calling process to read +.Fa iocb->aio_nbytes +from the descriptor +.Fa iocb->aio_fildes +beginning at the offset +.Fa iocb->aio_offset +into the buffer pointed to by +.Fa iocb->aio_buf . +The call returns immediately after the read request has +been enqueued to the descriptor; the read may or may not have +completed at the time the call returns. +.Pp +If _POSIX_PRIORITIZED_IO is defined, and the descriptor supports it, +then the enqueued operation is submitted at a priority equal to that +of the calling process minus +.Fa iocb->aio_reqprio . +.Pp +The +.Fa iocb->aio_lio_opcode +argument +is ignored by the +.Fn aio_read +system call. +.Pp +The +.Fa iocb +pointer may be subsequently used as an argument to +.Fn aio_return +and +.Fn aio_error +in order to determine return or error status for the enqueued operation +while it is in progress. +.Pp +If the request could not be enqueued (generally due to invalid arguments), +then the call returns without having enqueued the request. +.Pp +If the request is successfully enqueued, the value of +.Fa iocb->aio_offset +can be modified during the request as context, so this value must +not be referenced after the request is enqueued. +.Sh RESTRICTIONS +The Asynchronous I/O Control Block structure pointed to by +.Fa iocb +and the buffer that the +.Fa iocb->aio_buf +member of that structure references must remain valid until the +operation has completed. For this reason, use of auto (stack) variables +for these objects is discouraged. +.Pp +The asynchronous I/O control buffer +.Fa iocb +should be zeroed before the +.Fn aio_read +call to avoid passing bogus context information to the kernel. +.Pp +Modifications of the Asynchronous I/O Control Block structure or the +buffer contents after the request has been enqueued, but before the +request has completed, are not allowed. +.Pp +If the file offset in +.Fa iocb->aio_offset +is past the offset maximum for +.Fa iocb->aio_fildes , +no I/O will occur. +.Sh RETURN VALUES +.Rv -std aio_read +.Sh DIAGNOSTICS +None. +.Sh ERRORS +The +.Fn aio_read +system call will fail if: +.Bl -tag -width Er +.It Bq Er EAGAIN +The request was not queued because of system resource limitations. +.It Bq Er ENOSYS +The +.Fn aio_read +system call is not supported. +.El +.Pp +The following conditions may be synchronously detected when the +.Fn aio_read +system call is made, or asynchronously, at any time thereafter. If they +are detected at call time, +.Fn aio_read +returns -1 and sets +.Va errno +appropriately; otherwise the +.Fn aio_return +system call must be called, and will return -1, and +.Fn aio_error +must be called to determine the actual value that would have been +returned in +.Va errno . +.Pp +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa iocb->aio_fildes +argument +is invalid. +.It Bq Er EINVAL +The offset +.Fa iocb->aio_offset +is not valid, the priority specified by +.Fa iocb->aio_reqprio +is not a valid priority, or the number of bytes specified by +.Fa iocb->aio_nbytes +is not valid. +.It Bq Er EOVERFLOW +The file is a regular file, +.Fa iocb->aio_nbytes +is greater than zero, the starting offset in +.Fa iocb->aio_offset +is before the end of the file, but is at or beyond the +.Fa iocb->aio_fildes +offset maximum. +.El +.Pp +If the request is successfully enqueued, but subsequently cancelled +or an error occurs, the value returned by the +.Fn aio_return +system call is per the +.Xr read 2 +system call, and the value returned by the +.Fn aio_error +system call is either one of the error returns from the +.Xr read 2 +system call, or one of: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa iocb->aio_fildes +argument +is invalid for reading. +.It Bq Er ECANCELED +The request was explicitly cancelled via a call to +.Fn aio_cancel . +.It Bq Er EINVAL +The offset +.Fa iocb->aio_offset +would be invalid. +.El +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_error 2 , +.Xr aio_return 2 , +.Xr aio_suspend 2 , +.Xr aio_write 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_read +system call is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_read +system call first appeared in +.Fx 3.0 . +.Sh AUTHORS +This +manual page was written by +.An Terry Lambert Aq terry@whistle.com . +.Sh BUGS +Invalid information in +.Fa iocb->_aiocb_private +may confuse the kernel. diff --git a/bsd/man/man2/aio_return.2 b/bsd/man/man2/aio_return.2 new file mode 100644 index 000000000..8c4e28ff2 --- /dev/null +++ b/bsd/man/man2/aio_return.2 @@ -0,0 +1,103 @@ +.\" Copyright (c) 1999 Softweyr LLC. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Softweyr LLC AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Softweyr LLC OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_return.2,v 1.17 2003/01/13 10:37:11 tjr Exp $ +.\" +.Dd June 2, 1999 +.Dt AIO_RETURN 2 +.Os +.Sh NAME +.Nm aio_return +.Nd retrieve return status of asynchronous I/O operation (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_return "struct aiocb *iocb" +.Sh DESCRIPTION +The +.Fn aio_return +system call returns the final status of the asynchronous I/O request +associated with the structure pointed to by +.Fa iocb . +.Pp +The +.Fn aio_return +system call +should only be called once, to obtain the final status of an asynchronous +I/O operation once +.Xr aio_error 2 +returns something other than +.Er EINPROGRESS . +.Sh RETURN VALUES +If the asynchronous I/O request has completed, the status is returned +as described in +.Xr read 2 , +.Xr write 2 , +or +.Xr fsync 2 . +On failure, +.Fn aio_return +returns +.Dv -1 +and sets +.Dv errno +to indicate the error condition. +.Sh ERRORS +The +.Fn aio_return +system call will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa iocb +argument +does not reference an outstanding asynchronous I/O request. +.El +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_error 2 , +.Xr aio_suspend 2 , +.Xr aio_write 2 , +.Xr fsync 2 , +.Xr read 2 , +.Xr write 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_return +system call +is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_return +system call first appeared in +.Fx 3.0 . +.Sh AUTHORS +This +manual page was written by +.An Wes Peters Aq wes@softweyr.com . diff --git a/bsd/man/man2/aio_suspend.2 b/bsd/man/man2/aio_suspend.2 new file mode 100644 index 000000000..c0b85ce10 --- /dev/null +++ b/bsd/man/man2/aio_suspend.2 @@ -0,0 +1,113 @@ +.\" Copyright (c) 1999 Softweyr LLC. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Softweyr LLC AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Softweyr LLC OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_suspend.2,v 1.19 2003/01/13 10:37:11 tjr Exp $ +.\" +.Dd June 2, 1999 +.Dt AIO_SUSPEND 2 +.Os +.Sh NAME +.Nm aio_suspend +.Nd suspend until asynchronous I/O operations or timeout complete (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_suspend "const struct aiocb * const iocbs[]" "int niocb" "const struct timespec * timeout" +.Sh DESCRIPTION +The +.Fn aio_suspend +system call suspends the calling process until at least one of the +specified asynchronous I/O requests have completed, a signal is +delivered, or the +.Fa timeout +has passed. +.Pp +The +.Fa iocbs +argument +is an array of +.Fa niocb +pointers to asynchronous I/O requests. Array members containing +NULL will be silently ignored. +.Pp +If +.Fa timeout +is a non-nil pointer, it specifies a maximum interval to suspend. +If +.Fa timeout +is a nil pointer, the suspend blocks indefinitely. To effect a +poll, the +.Fa timeout +should point to a zero-value timespec structure. +.Sh RETURN VALUES +If one or more of the specified asynchronous I/O requests have +completed, +.Fn aio_suspend +returns 0. Otherwise it returns -1 and sets +.Va errno +to indicate the error, as enumerated below. +.Sh ERRORS +The +.Fn aio_suspend +system call will fail if: +.Bl -tag -width Er +.It Bq Er EAGAIN +the +.Fa timeout +expired before any I/O requests completed. +.It Bq Er EINVAL +The +.Fa iocbs +argument +contains more than +.Dv AIO_LISTIO_MAX +asynchronous I/O requests, or at least one of the requests is not +valid. +.It Bq Er EINTR +the suspend was interrupted by a signal. +.El +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_error 2 , +.Xr aio_return 2 , +.Xr aio_write 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_suspend +system call +is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_suspend +system call first appeared in +.Fx 3.0 . +.Sh AUTHORS +This +manual page was written by +.An Wes Peters Aq wes@softweyr.com . diff --git a/bsd/man/man2/aio_write.2 b/bsd/man/man2/aio_write.2 new file mode 100644 index 000000000..097daaf4a --- /dev/null +++ b/bsd/man/man2/aio_write.2 @@ -0,0 +1,204 @@ +.\" Copyright (c) 1999 Softweyr LLC. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY Softweyr LLC AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL Softweyr LLC OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/lib/libc/sys/aio_write.2,v 1.16 2003/01/13 10:37:11 tjr Exp $ +.\" +.Dd June 2, 1999 +.Dt AIO_WRITE 2 +.Os +.Sh NAME +.Nm aio_write +.Nd asynchronous write to a file (REALTIME) +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_write "struct aiocb *iocb" +.Sh DESCRIPTION +The +.Fn aio_write +system call allows the calling process to write +.Fa iocb->aio_nbytes +from the buffer pointed to by +.Fa iocb->aio_buf +to the descriptor +.Fa iocb->aio_fildes . +The call returns immediately after the write request has been enqueued +to the descriptor; the write may or may not have completed at the time +the call returns. If the request could not be enqueued, generally due +to invalid arguments, the call returns without having enqueued the +request. +.Pp +If +.Dv O_APPEND +is set for +.Fa iocb->aio_fildes , +.Fn aio_write +operations append to the file in the same order as the calls were +made. If +.Dv O_APPEND +is not set for the file descriptor, the write operation will occur at +the absolute position from the beginning of the file plus +.Fa iocb->aio_offset . +.Pp +If +.Dv _POSIX_PRIORITIZED_IO +is defined, and the descriptor supports it, then the enqueued +operation is submitted at a priority equal to that of the calling +process minus +.Fa iocb->aio_reqprio . +.Pp +The +.Fa iocb +pointer may be subsequently used as an argument to +.Fn aio_return +and +.Fn aio_error +in order to determine return or error status for the enqueued operation +while it is in progress. +.Pp +If the request is successfully enqueued, the value of +.Fa iocb->aio_offset +can be modified during the request as context, so this value must not +be referenced after the request is enqueued. +.Sh RESTRICTIONS +The Asynchronous I/O Control Block structure pointed to by +.Fa iocb +and the buffer that the +.Fa iocb->aio_buf +member of that structure references must remain valid until the +operation has completed. For this reason, use of auto (stack) variables +for these objects is discouraged. +.Pp +The asynchronous I/O control buffer +.Fa iocb +should be zeroed before the +.Fn aio_write +system call to avoid passing bogus context information to the kernel. +.Pp +Modifications of the Asynchronous I/O Control Block structure or the +buffer contents after the request has been enqueued, but before the +request has completed, are not allowed. +.Pp +If the file offset in +.Fa iocb->aio_offset +is past the offset maximum for +.Fa iocb->aio_fildes , +no I/O will occur. +.Sh RETURN VALUES +.Rv -std aio_write +.Sh ERRORS +The +.Fn aio_write +system call will fail if: +.Bl -tag -width Er +.It Bq Er EAGAIN +The request was not queued because of system resource limitations. +.It Bq Er ENOSYS +The +.Fn aio_write +system call is not supported. +.El +.Pp +The following conditions may be synchronously detected when the +.Fn aio_write +system call is made, or asynchronously, at any time thereafter. If they +are detected at call time, +.Fn aio_write +returns -1 and sets +.Va errno +appropriately; otherwise the +.Fn aio_return +system call must be called, and will return -1, and +.Fn aio_error +must be called to determine the actual value that would have been +returned in +.Va errno . +.Pp +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa iocb->aio_fildes +argument +is invalid, or is not opened for writing. +.It Bq Er EINVAL +The offset +.Fa iocb->aio_offset +is not valid, the priority specified by +.Fa iocb->aio_reqprio +is not a valid priority, or the number of bytes specified by +.Fa iocb->aio_nbytes +is not valid. +.El +.Pp +If the request is successfully enqueued, but subsequently canceled +or an error occurs, the value returned by the +.Fn aio_return +system call is per the +.Xr write 2 +system call, and the value returned by the +.Fn aio_error +system call is either one of the error returns from the +.Xr write 2 +system call, or one of: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa iocb->aio_fildes +argument +is invalid for writing. +.It Bq Er ECANCELED +The request was explicitly canceled via a call to +.Fn aio_cancel . +.It Bq Er EINVAL +The offset +.Fa iocb->aio_offset +would be invalid. +.El +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_error 2 , +.Xr aio_return 2 , +.Xr aio_suspend 2 , +.Xr aio 4 +.Sh STANDARDS +The +.Fn aio_write +system call +is expected to conform to the +.St -p1003.1 +standard. +.Sh HISTORY +The +.Fn aio_write +system call first appeared in +.Fx 3.0 . +.Sh AUTHORS +This manual page was written by +.An Wes Peters Aq wes@softweyr.com . +.Sh BUGS +Invalid information in +.Fa iocb->_aiocb_private +may confuse the kernel. diff --git a/bsd/man/man2/bind.2 b/bsd/man/man2/bind.2 index 9c4404cdd..742a58858 100644 --- a/bsd/man/man2/bind.2 +++ b/bsd/man/man2/bind.2 @@ -43,7 +43,7 @@ .Fd #include .Fd #include .Ft int -.Fn bind "int s" "const struct sockaddr *name" "int namelen" +.Fn bind "int s" "const struct sockaddr *name" "socklen_t namelen" .Sh DESCRIPTION .Fn Bind assigns a name to an unnamed socket. diff --git a/bsd/man/man2/brk.2 b/bsd/man/man2/brk.2 deleted file mode 100644 index f580c15f6..000000000 --- a/bsd/man/man2/brk.2 +++ /dev/null @@ -1,150 +0,0 @@ -.\" $NetBSD: brk.2,v 1.7 1995/02/27 12:31:57 cgd Exp $ -.\" -.\" Copyright (c) 1980, 1991, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. -.\" 4. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" @(#)brk.2 8.2 (Berkeley) 12/11/93 -.\" -.Dd December 11, 1993 -.Dt BRK 2 -.Os BSD 4 -.Sh NAME -.Nm brk , -.Nm sbrk -.Nd change data segment size -.Sh SYNOPSIS -.Fd #include -.Ft char * -.Fn brk "const char *addr" -.Ft char * -.Fn sbrk "int incr" -.Sh DESCRIPTION -.Bf -symbolic -The brk and sbrk functions are historical curiosities -left over from earlier days before the advent of virtual memory management. -.Ef -The -.Fn brk -function -sets the break or lowest address -of a process's data segment (uninitialized data) to -.Fa addr -(immediately above bss). -Data addressing is restricted between -.Fa addr -and the lowest stack pointer to the stack segment. -Memory is allocated by -.Fa brk -in page size pieces; -if -.Fa addr -is not evenly divisible by the system page size, it is -increased to the next page boundary. -.Pp -.\" The -.\" .Nm sbrk -.\" function -.\" allocates chunks of -.\" .Fa incr -.\" bytes -.\" to the process's data space -.\" and returns an address pointer. -.\" The -.\" .Xr malloc 3 -.\" function utilizes -.\" .Nm sbrk . -.\" .Pp -The current value of the program break is reliably returned by -.Dq Li sbrk(0) -(see also -.Xr end 3 ) . -The -.Xr getrlimit 2 -system call may be used to determine -the maximum permissible size of the -.Em data -segment; -it will not be possible to set the break -beyond the -.Em rlim_max -value returned from a call to -.Xr getrlimit , -e.g. -.Dq qetext + rlp\(->rlim_max. -(see -.Xr end 3 -for the definition of -.Em etext ) . -.Sh RETURN VALUES -.Nm Brk -returns a pointer to the new end of memory if successful; -otherwise -1 with -.Va errno -set to indicate why the allocation failed. -The -.Nm sbrk -function returns a pointer to the base of the new storage if successful; -otherwise -1 with -.Va errno -set to indicate why the allocation failed. -.Sh ERRORS -.Xr Sbrk -will fail and no additional memory will be allocated if -one of the following are true: -.Bl -tag -width Er -.It Bq Er ENOMEM -The limit, as set by -.Xr setrlimit 2 , -was exceeded. -.It Bq Er ENOMEM -The maximum possible size of a data segment (compiled into the -system) was exceeded. -.It Bq Er ENOMEM -Insufficient space existed in the swap area -to support the expansion. -.El -.Sh SEE ALSO -.Xr execve 2 , -.Xr getrlimit 2 , -.Xr malloc 3 , -.Xr mmap 2 , -.Xr end 3 -.Sh BUGS -Setting the break may fail due to a temporary lack of -swap space. It is not possible to distinguish this -from a failure caused by exceeding the maximum size of -the data segment without consulting -.Xr getrlimit . -.Sh HISTORY -A -.Fn brk -function call appeared in -.At v7 . diff --git a/bsd/man/man2/chflags.2 b/bsd/man/man2/chflags.2 index 70cb5097b..66a036bbb 100644 --- a/bsd/man/man2/chflags.2 +++ b/bsd/man/man2/chflags.2 @@ -44,9 +44,9 @@ .Fd #include .Fd #include .Ft int -.Fn chflags "const char *path" "u_long flags" +.Fn chflags "const char *path" "u_int flags" .Ft int -.Fn fchflags "int fd" "u_long flags" +.Fn fchflags "int fd" "u_int flags" .Sh DESCRIPTION The file whose name is given by diff --git a/bsd/man/man2/chown.2 b/bsd/man/man2/chown.2 index 3ce057f3b..7ba416f38 100644 --- a/bsd/man/man2/chown.2 +++ b/bsd/man/man2/chown.2 @@ -1,6 +1,3 @@ -.\" $OpenBSD: chown.2,v 1.3 1997/01/26 05:10:33 downsj Exp $ -.\" $NetBSD: chown.2,v 1.10 1995/10/12 15:40:47 jtc Exp $ -.\" .\" Copyright (c) 1980, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" @@ -34,29 +31,31 @@ .\" .\" @(#)chown.2 8.4 (Berkeley) 4/19/94 .\" -.Dd January 25, 1997 +.Dd April 19, 1994 .Dt CHOWN 2 .Os .Sh NAME .Nm chown , -.Nm fchown -.Nd change owner and group of a file or link +.Nm fchown , +.Nm lchown +.Nd change owner and group of a file .Sh SYNOPSIS -.Fd #include -.Fd #include +.In unistd.h .Ft int .Fn chown "const char *path" "uid_t owner" "gid_t group" .Ft int .Fn fchown "int fd" "uid_t owner" "gid_t group" +.Ft int +.Fn lchown "const char *path" "uid_t owner" "gid_t group" .Sh DESCRIPTION -The owner ID and group ID of the file (or link) +The owner ID and group ID of the file named by .Fa path or referenced by .Fa fd is changed as specified by the arguments .Fa owner -and +and .Fa group . The owner of a file may change the .Fa group @@ -66,36 +65,49 @@ but the change .Fa owner capability is restricted to the super-user. .Pp -.Fn Chown +The +.Fn chown +system call clears the set-user-id and set-group-id bits on the file to prevent accidental or mischievous creation of -set-user-id and set-group-id programs. +set-user-id and set-group-id programs if not executed +by the super-user. +The +.Fn chown +system call +follows symbolic links to operate on the target of the link +rather than the link itself. .Pp -.Fn Fchown +The +.Fn fchown +system call is particularly useful when used in conjunction with the file locking primitives (see .Xr flock 2 ) . .Pp +The +.Fn lchown +system call is similar to +.Fn chown +but does not follow symbolic links. +.Pp One of the owner or group id's may be left unchanged by specifying it as -1. .Sh RETURN VALUES -Zero is returned if the operation was successful; --1 is returned if an error occurs, with a more specific -error code being placed in the global variable -.Va errno . +.Rv -std .Sh ERRORS -.Fn Chown -will fail and the file or link will be unchanged if: +The +.Fn chown +and +.Fn lchown +will fail and the file will be unchanged if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENAMETOOLONG -A component of a pathname exceeded -.Dv {NAME_MAX} -characters, or an entire path name exceeded -.Dv {PATH_MAX} -characters. +A component of a pathname exceeded 255 characters, +or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er EACCES @@ -107,20 +119,27 @@ The effective user ID is not the super-user. .It Bq Er EROFS The named file resides on a read-only file system. .It Bq Er EFAULT -.Fa Path +The +.Fa path +argument points outside the process's allocated address space. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .El .Pp -.Fn Fchown -will fail if: +The +.Fn fchown +system call will fail if: .Bl -tag -width Er .It Bq Er EBADF +The .Fa fd +argument does not refer to a valid descriptor. .It Bq Er EINVAL +The .Fa fd +argument refers to a socket, not a file. .It Bq Er EPERM The effective user ID is not the super-user. @@ -130,24 +149,33 @@ The named file resides on a read-only file system. An I/O error occurred while reading from or writing to the file system. .El .Sh SEE ALSO -.Xr chown 8 , .Xr chgrp 1 , .Xr chmod 2 , -.Xr flock 2 +.Xr flock 2 , +.Xr chown 8 .Sh STANDARDS The .Fn chown -function is expected to conform to -.St -p1003.1-88 . +system call is expected to conform to +.St -p1003.1-90 . .Sh HISTORY The +.Fn chown +function appeared in +.At v7 . +The .Fn fchown -function call appeared in +system call appeared in .Bx 4.2 . .Pp The .Fn chown and .Fn fchown -functions were changed to follow symbolic links in +system calls were changed to follow symbolic links in .Bx 4.4 . +The +.Fn lchown +system call was added in +.Fx 3.0 +to compensate for the loss of functionality. diff --git a/bsd/man/man2/connect.2 b/bsd/man/man2/connect.2 index e06e59fc5..c778b8d3a 100644 --- a/bsd/man/man2/connect.2 +++ b/bsd/man/man2/connect.2 @@ -43,7 +43,7 @@ .Fd #include .Fd #include .Ft int -.Fn connect "int s" "const struct sockaddr *name" "int namelen" +.Fn connect "int s" "const struct sockaddr *name" "socklen_t namelen" .Sh DESCRIPTION The parameter .Fa s @@ -71,8 +71,11 @@ multiple times to change their association. Datagram sockets may dissolve the association by connecting to an invalid address, such as a null address or an address with -the address family set to AF_UNPSEC (the error -EAFNOSUPPORT will be harmlessly returned). +the address family set to +.Dv AF_UNSPEC +(the error +.Dv EAFNOSUPPORT +will be harmlessly returned). .Sh RETURN VALUES If the connection or binding succeeds, 0 is returned. Otherwise a -1 is returned, and a more specific error @@ -119,6 +122,11 @@ for completion by selecting the socket for writing. The socket is non-blocking and a previous connection attempt has not yet been completed. +.It Bq Er EACCES +The destination address is a broadcast address and the +socket option +.Dv SO_BROADCAST +is not set. .El .Pp The following errors are specific to connecting names in the UNIX domain. diff --git a/bsd/man/man2/exchangedata.2 b/bsd/man/man2/exchangedata.2 new file mode 100644 index 000000000..cc2111ea4 --- /dev/null +++ b/bsd/man/man2/exchangedata.2 @@ -0,0 +1,190 @@ +.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)exchangedata.2 +. +.Dd December 15, 2003 +.Dt EXCHANGEDATA 2 +.Os Darwin +.Sh NAME +.Nm exchangedata +.Nd atomically exchange data between two files +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fn exchangedata "const char * path1" "const char * path2" "unsigned long options" +. +.Sh DESCRIPTION +The +.Fn exchangedata +function swaps the contents of the files referenced by +.Fa path1 +and +.Fa path2 +in an atomic fashion. +That is, all concurrent processes will either see the pre-exchanged state or the +post-exchanged state; they can never see the files in an inconsistent state. +The data in all forks is swapped in this way. +The +.Fa options +parameter lets you control specific aspects of the function's behaviour. +.Pp +. +Open file descriptors follow the swapped data. +Thus, a descriptor that previously referenced +.Fa path1 +will now reference the data that's accessible via +.Fa path2 , +and vice versa. +.Pp +. +In general, the file attributes (metadata) are not exchanged. +Specifically, the object identifier attributes (that is, the +.Dv ATTR_CMN_OBJID +and +.Dv ATTR_CMN_OBJPERMANENTID +attributes as defined by the +.Xr getattrlist 2 +function) are not swapped. +An exception to this general rule is that the modification time attribute ( +.Dv ATTR_CMN_MODTIME +) is swapped. +.Pp +. +When combined, these features allow you to implement a 'safe save' function that +does not break references to the file (for example, aliases). +You first save the new contents to a temporary file and then +exchange the data of the original file and the temporary. +Programs that reference the file via an object identifier will continue to +reference the original file, but now it has the new data. +.Pp +. +.\" path1 and path2 parameters +. +The +.Fa path1 +and +.Fa path2 +parameters must both reference valid files. +All directories listed in the path names leading to these files must be +searchable. +You must have write access to the files. +.Pp +. +.\" options parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn exchangedata . +The following option bits are defined. +. +.Bl -tag -width FSOPT_NOFOLLOW +. +.It FSOPT_NOFOLLOW +If this bit is set, +.Fn exchangedata +will not follow a symlink if it occurs as +the last component of +.Fa path1 +or +.Fa path2 . +. +.El +. +.Sh RETURN VALUES +Upon successful completion a value of 0 is returned. +Otherwise, a value of -1 is returned and +.Va errno +is set to indicate the error. +. +.Sh COMPATIBILITY +Not all volumes support +.Fn exchangedata . +You can test whether a volume supports +.Fn exchangedata +by using +.Xr getattrlist 2 +to get the volume capabilities attribute +.Dv ATTR_VOL_CAPABILITIES , +and then testing the +.Dv VOL_CAP_INT_EXCHANGEDATA +flag. +.Pp +. +.Sh ERRORS +.Fn exchangedata +will fail if: +.Bl -tag -width Er +. +.It Bq Er ENOTSUP +The volume does not support +.Fn exchangedata . +. +.It Bq Er ENOTDIR +A component of the path prefix is not a directory. +. +.It Bq Er ENAMETOOLONG +A component of a path name exceeded +.Dv NAME_MAX +characters, or an entire path name exceeded +.Dv PATH_MAX +characters. +. +.It Bq Er ENOENT +Either file does not exist. +. +.It Bq Er EACCES +Search permission is denied for a component of the path prefix. +. +.It Bq Er ELOOP +Too many symbolic links were encountered in translating the pathname. +. +.It Bq Er EFAULT +.Fa path1 +or +.Em path2 +points to an invalid address. +. +.It Bq Er EXDEV +.Fa path1 +and +.Em path2 +are on different volumes (mounted file systems). +. +.It Bq Er EINVAL +.Fa path1 +or +.Em path2 +reference the same file. +. +.It Bq Er EINVAL +You try to exchange something other than a regular file (for example, a directory). +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh SEE ALSO +. +.Xr getattrlist 2 +. +.Sh HISTORY +A +.Fn exchangedata +function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). +. diff --git a/bsd/man/man2/fcntl.2 b/bsd/man/man2/fcntl.2 index 1916e0fdc..12fd284f4 100644 --- a/bsd/man/man2/fcntl.2 +++ b/bsd/man/man2/fcntl.2 @@ -152,6 +152,19 @@ Get disk device information. Currently this only includes the disk device address that corresponds to the current file offset. +.It Dv F_FULLFSYNC +Does the same thing as +.Xr fsync 2 +then asks the drive to +flush all buffered data to +the permanent storage device +.Fa ( arg +is ignored). +This is currently +only implemented on HFS filesystems and +the operation may take quite a while to +complete. Certain FireWire drives have +also been known to ignore this request. .El .Pp The flags for the diff --git a/bsd/man/man2/flock.2 b/bsd/man/man2/flock.2 index e38879e58..c74ac5b69 100644 --- a/bsd/man/man2/flock.2 +++ b/bsd/man/man2/flock.2 @@ -133,7 +133,7 @@ is an invalid descriptor. The argument .Fa fd refers to an object other than a file. -.It Bq Er EOPNOTSUPP +.It Bq Er ENOTSUP The referenced descriptor is not of the correct type. .El .Sh SEE ALSO diff --git a/bsd/man/man2/fsync.2 b/bsd/man/man2/fsync.2 index 7d72c2599..b75a4229d 100644 --- a/bsd/man/man2/fsync.2 +++ b/bsd/man/man2/fsync.2 @@ -51,10 +51,41 @@ to be moved to a permanent storage device. This normally results in all in-core modified copies of buffers for the associated file to be written to a disk. .Pp -.Fn Fsync -should be used by programs that require a file to be -in a known state, for example, in building a simple transaction -facility. +Note that while +.Fn fsync +will flush all data from the host +to the drive (i.e. the "permanent storage +device"), the +drive itself may not physically +write the data to the +platters for quite some time +and it may be written in an +out-of-order sequence. +.Pp +Specifically, if the drive loses power +or the OS crashes, +the application +may find that only some or none of their data was +written. The disk drive may also re-order +the data so that later writes +may be present while earlier writes are not. +.Pp +This is not a theoretical +edge case. This scenario is easily reproduced +with real world workloads and drive +power failures. +.Pp +For applications that require tighter guarantess about +the integrity of their data, MacOS X provides the +F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks the +drive to flush all buffered data to permanent +storage. Applications such as databases that require +a strict ordering of writes should use F_FULLFSYNC to +ensure their data is written in the order they expect. +Please see +.Xr fcntl 2 +for more detail. +.Pp .Sh RETURN VALUES A 0 value is returned on success. A -1 value indicates an error. @@ -75,7 +106,8 @@ An I/O error occurred while reading from or writing to the file system. .Sh SEE ALSO .Xr sync 2 , .Xr sync 8 , -.Xr update 8 +.Xr update 8 , +.Xr fcntl 2 .Sh HISTORY The .Fn fsync diff --git a/bsd/man/man2/getattrlist.2 b/bsd/man/man2/getattrlist.2 new file mode 100644 index 000000000..e9bb5c33f --- /dev/null +++ b/bsd/man/man2/getattrlist.2 @@ -0,0 +1,1684 @@ +.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)getattrlist.2 +. +.Dd October 14, 2004 +.Dt GETATTRLIST 2 +.Os Darwin +.Sh NAME +.Nm getattrlist +.Nd get file system attributes +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Ft int +.Fn getattrlist "const char* path" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" +. +.Sh DESCRIPTION +The +.Fn getattrlist +function returns attributes (that is, metadata) of file system objects. +You can think of +.Fn getattrlist +as a seriously enhanced version of +.Xr stat 2 . +The function returns attributes about the file system object specified by +.Fa path +in the buffer specified by +.Fa attrBuf +and +.Fa attrBufSize . +The +.Fa attrList +parameter determines what attributes are returned. +The +.Fa options +parameter lets you control specific aspects of the function's behaviour. +.Pp +. +The +.Fn getattrlist +function is only supported by certain volume format implementations. +For maximum compatibility, client programs should use high-level APIs +(such as the Carbon File Manager) to access file system attributes. +These high-level APIs include logic to emulate file system attributes +on volumes that don't support +.Fn getattrlist . +.Pp +. +Not all volumes support all attributes. +See the discussion of +.Dv ATTR_VOL_ATTRIBUTES +for a discussion of how to determine whether a particular volume supports a +particular attribute. +.Pp +Furthermore, you should only request the attributes that you need. +Some attributes are expensive to calculate on some volume formats. +For example, +.Dv ATTR_DIR_ENTRYCOUNT +is usually expensive to calculate on non-HFS [Plus] volumes. +If you don't need a particular attribute, you should not ask for it. +.Pp +. +.\" path parameter +. +The +.Fa path +parameter must reference a valid file system object. +Read, write or execute permission of the object itself is not required, but +all directories listed in the path name leading to the object must be +searchable. +.Pp +. +.\" attrList parameter +. +The +.Fa attrList +parameter is a pointer to an +.Vt attrlist +structure, as defined by +.Aq Pa sys/attr.h +(shown below). +It determines what attributes are returned by the function. +You are responsible for filling out all fields of this structure before calling the function. +.Bd -literal +typedef u_int32_t attrgroup_t; +.Pp +struct attrlist { + u_short bitmapcount; /* number of attr. bit sets in list */ + u_int16_t reserved; /* (to maintain 4-byte alignment) */ + attrgroup_t commonattr; /* common attribute group */ + attrgroup_t volattr; /* volume attribute group */ + attrgroup_t dirattr; /* directory attribute group */ + attrgroup_t fileattr; /* file attribute group */ + attrgroup_t forkattr; /* fork attribute group */ +}; +#define ATTR_BIT_MAP_COUNT 5 +.Ed +.Pp +. +.\" attrlist elements +. +The fields of the +.Vt attrlist +structure are defined as follows. +.Bl -tag -width XXXbitmapcount +. +.It bitmapcount +Number of attribute bit sets in the structure. +In current systems you must set this to +.Dv ATTR_BIT_MAP_COUNT . +. +.It reserved +Reserved. +You must set this to 0. +. +.It commonattr +A bit set that specifies the common attributes that you require. +Common attributes relate to all types of file system objects. +See below for a description of these attributes. +. +.It volattr +A bit set that specifies the volume attributes that you require. +Volume attributes relate to volumes (that is, mounted file systems). +See below for a description of these attributes. +If you request volume attributes, +.Fa path +must reference the root of a volume. +In addition, you can't request volume attributes if you also request +file or directory attributes. +. +.It dirattr +A bit set that specifies the directory attributes that you require. +See below for a description of these attributes. +. +.It fileattr +A bit set that specifies the file attributes that you require. +See below for a description of these attributes. +. +.It forkattr +A bit set that specifies the fork attributes that you require. +Fork attributes relate to the actual data in the file, +which can be held in multiple named contiguous ranges, or forks. +See below for a description of these attributes. +. +.El +.Pp +. +Unless otherwise noted in the lists below, attributes are read-only. +Attributes labelled as read/write can be set using +.Xr setattrlist 2 . +.Pp +. +.\" attrBuf and attrBufSize parameters +. +The +.Fa attrBuf +and +.Fa attrBufSize +parameters specify a buffer into which the function places attribute values. +The format of this buffer is sufficiently complex that its description +requires a separate section (see below). +The initial contents of this buffer are ignored. +.Pp +. +.\" option parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn getattrlist . +The following option bits are defined. +. +.Bl -tag -width XXXbitmapcount +. +.It FSOPT_NOFOLLOW +If this bit is set, +.Fn getattrlist +will not follow a symlink if it occurs as +the last component of +.Fa path . +. +.El +. +.Sh ATTRIBUTE BUFFER +. +The data returned in the buffer described by +.Fa attrBuf +and +.Fa attrBufSize +is formatted as follows. +.Pp +. +.Bl -enum +. +.It +The first element of the buffer is a +.Vt unsigned long +that contains the overall length, in bytes, of the attributes returned. +This size includes the length field itself. +. +.It +Following the length field is a list of attributes. +Each attribute is represented by a field of its type, +where the type is given as part of the attribute description (below). +. +.It +The attributes are placed into the attribute buffer in the order +that they are described below. +. +.El +.Pp +. +If the attribute is of variable length, it is represented +in the list by an +.Vt attrreference +structure, as defined by +.Aq Pa sys/attr.h +(shown below). +. +.Bd -literal +typedef struct attrreference { + long attr_dataoffset; + size_t attr_length; +} attrreference_t; +.Ed +.Pp +. +This structure contains a 'pointer' to the variable length attribute data. +The +.Fa attr_length +field is the length of the attribute data (in bytes). +The +.Fa attr_dataoffset +field is the offset in bytes from the +.Vt attrreference +structure +to the attribute data. +This offset will always be a multiple of sizeof(unsigned long) bytes, +so you can safely access common data types without fear of alignment +exceptions. +.Pp +. +The +.Fn getattrlist +function will silently truncate attribute data if +.Fa attrBufSize +is too small. +The length field at the front of the attribute list always represents +the length of the data actually copied into the attribute buffer. +If the data is truncated, there is no easy way to determine the +buffer size that's required to get all of the requested attributes. +You should always pass an +.Fa attrBufSize +that is large enough to accommodate the known size of the attributes +in the attribute list (including the leading length field). +.Pp +. +Because the returned attributes are simply truncated if the buffer is +too small, it's possible for a variable length attribute to reference +data beyond the end of the attribute buffer. That is, it's possible +for the attribute data to start beyond the end of the attribute buffer +(that is, if +.Fa attrRef +is a pointer to the +.Vt attrreference_t , +( ( (char *) +.Fa attrRef +) + +.Fa attr_dataoffset +) > ( ( (char *) +.Fa attrBuf +) + +.Fa attrSize +) ) or, indeed, for the attribute data to extend beyond the end of the attribute buffer (that is, +( ( (char *) +.Fa attrRef +) + +.Fa attr_dataoffset ++ +.Fa attr_datalength +) > ( ( (char *) +.Fa attrBuf +) + +.Fa attrSize +) ). +If this happens you must increase the size of the buffer and call +.Fn getattrlist +to get an accurate copy of the attribute. +. +.Sh COMMON ATTRIBUTES +. +Common attributes relate to all types of file system objects. +The following common attributes are defined. +. +.Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP +. +.It ATTR_CMN_NAME +An +.Vt attrreference +structure containing the name of the file system object as +UTF-8 encoded, null terminated C string. +The attribute data length will not be greater than +.Dv NAME_MAX + +1. +.Pp +. +.It ATTR_CMN_DEVID +A +.Vt dev_t +containing the device number of the device on which this +file system object's volume is mounted. +Equivalent to the +.Fa st_dev +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_FSID +An +.Vt fsid_t +structure containing the file system identifier for the volume on which +the file system object resides. +Equivalent to the +.Fa f_fsid +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +. +.Pp +This value is not related to the file system ID from traditional Mac OS (for example, +the +.Fa filesystemID +field of the +.Vt FSVolumeInfo +structure returned by Carbon's FSGetVolumeInfo() function). +On current versions of Mac OS X that value is synthesised by the Carbon File Manager. +. +.It ATTR_CMN_OBJTYPE +An +.Vt fsobj_type_t +that identifies the type of file system object. +The values are taken from +.Vt enum vtype +in +.Aq Pa sys/vnode.h . +. +.It ATTR_CMN_OBJTAG +An +.Vt fsobj_tag_t +that identifies the type of file system containing the object. +The values are taken from +.Vt enum vtagtype +in +.Aq Pa sys/vnode.h . +. +.It ATTR_CMN_OBJID +An +.Vt fsobj_id_t +structure that uniquely identifies the file system object +within its volume. +The fid_generation field of this structure will be zero for all non-root callers +(effective UID not 0). +This identifier need not be persistent across an unmount/mount sequence. +.Pp +. +Some volume formats use well known values for the +.Fa fid_objno +field for the root directory (2) and the parent of root directory (1). +This is not a required behaviour of this attribute. +. +.It ATTR_CMN_OBJPERMANENTID +An +.Vt fsobj_id_t +structure that uniquely identifies the file system object +within its volume. +The fid_generation field of this structure will be zero for all non-root callers +(effective UID not 0). +This identifier should be persistent across an unmount/mount sequence. +.Pp +Some file systems (for example, original HFS) may need to modify the on-disk +structure to return a persistent identifier. +If such a file system is mounted read-only, an attempt to get this attribute +will fail with the error +.Dv EROFS . +. +.It ATTR_CMN_PAROBJID +An +.Vt fsobj_id_t +structure that identifies the parent directory of the file system object. +The fid_generation field of this structure will be zero for all non-root callers +(effective UID not 0). +Equivalent to the ATTR_CMN_OBJID attribute of the parent directory. +This identifier need not be persistent across an unmount/mount sequence. +.Pp +. +On a volume that supports hard links, a multiply linked file has no unique parent. +This attribute will return an unspecified parent. +.Pp +. +For some volume formats this attribute is very expensive to calculate. +. +.It ATTR_CMN_SCRIPT +(read/write) A +.Vt text_encoding_t +containing a text encoding hint for +the file system object's name. +It is included to facilitate the lossless round trip conversion of names between +Unicode and traditional Mac OS script encodings. +The values are defined in +.Aq Pa CarbonCore/TextCommon.h . +File systems that do not have an appropriate text encoding value should return +kTextEncodingMacUnicode. +See DTS Q&A 1173 "File Manager Text Encoding Hints". +. +.It ATTR_CMN_CRTIME +(read/write) A +.Vt timespec +structure containing the time that the file system object +was created. +. +.It ATTR_CMN_MODTIME +(read/write) A +.Vt timespec +structure containing the time that the file system object +was last modified. +Equivalent to the +.Fa st_mtimespec +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_CHGTIME +(read/write) A +.Vt timespec +structure containing the time that the file system object's +attributes were last modified. +Equivalent to the +.Fa st_ctimespec +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_ACCTIME +(read/write) A +.Vt timespec +structure containing the time that the file system object +was last accessed. +Equivalent to the +.Fa st_atimespec +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_BKUPTIME +(read/write) A +.Vt timespec +structure containing the time that the file system object was +last backed up. +This value is for use by backup utilities. +The file system stores but does not interpret the value. +. +.It ATTR_CMN_FNDRINFO +(read/write) 32 bytes of data for use by the Finder. +Equivalent to the concatenation of a +.Vt FileInfo +structure and an +.Vt ExtendedFileInfo +structure +(or, for directories, a +.Vt FolderInfo +structure and an +.Vt ExtendedFolderInfo +structure). +These structures are defined in +.Aq Pa CarbonCore/Finder.h . +.Pp +This attribute is not byte swapped by the file system. +The value of multibyte fields on disk is always big endian. +When running on a little endian system (such as Darwin on x86), +you must byte swap any multibyte fields. +. +.It ATTR_CMN_OWNERID +(read/write) A +.Vt uid_t +containing the owner of the file system object. +Equivalent to the +.Fa st_uid +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_GRPID +(read/write) A +.Vt gid_t +containing the group of the file system object. +Equivalent to the +.Fa st_gid +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_ACCESSMASK +(read/write) A +.Vt mode_t +containing the access permissions of the file system object. +Equivalent to the +.Fa st_mode +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_CMN_NAMEDATTRCOUNT +A +.Vt unsigned long +containing the number of named attributes of the file system object. +No built-in file systems on Mac OS X currently support named attributes. +. +.It ATTR_CMN_NAMEDATTRLIST +An +.Vt attrreference +structure containing a list of named attributes of the file system object. +No built-in file systems on Mac OS X currently support named attributes. +Because of this, the structure of this attribute's value is not yet defined. +. +.It ATTR_CMN_FLAGS +(read/write) A +.Vt unsigned long +containing file flags. +Equivalent to the +.Fa st_flags +field of the +.Vt stat +structure returned by +.Xr stat 2 . +For more information about these flags, see +.Xr chflags 2 . +.Pp +. +The order that attributes are placed into the attribute buffer +almost invariably matches the order of the attribute mask bit values. +The exception is +.Dv ATTR_CMN_FLAGS . +If its order was based on its bit position, it would be before +the +.Dv ATTR_CMN_NAMEDATTRCOUNT +/ +.Dv ATTR_CMN_NAMEDATTRLIST +pair, however, +it is placed in the buffer after them. +. +.It ATTR_CMN_USERACCESS +A +.Vt unsigned long +containing the effective permissions of the current user +(the calling process's effective UID) for this file system object. +You can test for read, write, and execute permission using +.Dv R_OK , +.Dv W_OK , +and +.Dv X_OK , +respectively. See +.Xr access 2 +for more details. +. +.El +. +.Sh VOLUME ATTRIBUTES +. +Volume attributes relate to volumes (that is, mounted file systems). +The following volume attributes are defined. +. +.Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP +. +.It ATTR_VOL_INFO +For reasons that are not at all obvious, you must set +.Dv ATTR_VOL_INFO +in the +.Fa volattr +field if you request any other volume attributes. +This does not result in any attribute data being added to the attribute buffer. +. +.It ATTR_VOL_FSTYPE +A +.Vt unsigned long +containing the file system type. +Equivalent to the +.Fa f_type +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +Generally not a useful value. +. +.It ATTR_VOL_SIGNATURE +A +.Vt unsigned long +containing the volume signature word. +This value is unique within a given file system type and lets you +distinguish between different volume formats handled by the same file system. +See +.Aq Pa CarbonCore/Files.h +for more details. +. +.It ATTR_VOL_SIZE +An +.Vt off_t +containing the total size of the volume in bytes. +. +.It ATTR_VOL_SPACEFREE +An +.Vt off_t +containing the free space on the volume in bytes. +. +.It ATTR_VOL_SPACEAVAIL +An +.Vt off_t +containing the space, in bytes, on the volume available to non-privileged processes. +This is the free space minus the amount of space reserved by the system to prevent critical +disk exhaustion errors. +Non-privileged programs, like a disk management tool, should use this value to display the +space available to the user. +.Pp +.Dv ATTR_VOL_SPACEAVAIL +is to +.Dv ATTR_VOL_SPACEFREE +as +.Fa f_bavail +is to +.Fa f_bfree +in +.Xr statfs 2 . +. +.It ATTR_VOL_MINALLOCATION +An +.Vt off_t +containing the minimum allocation size on the volume in bytes. +If you create a file containing one byte, it will consume this much space. +. +.It ATTR_VOL_ALLOCATIONCLUMP +An +.Vt off_t +containing the allocation clump size on the volume, in bytes. +As a file is extended, the file system will attempt to allocate +this much space each time in order to reduce fragmentation. +. +.It ATTR_VOL_IOBLOCKSIZE +A +.Vt unsigned long +containing the optimal block size when reading or writing data. +Equivalent to the +.Fa f_iosize +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +. +.It ATTR_VOL_OBJCOUNT +A +.Vt unsigned long +containing the number of file system objects on the volume. +. +.It ATTR_VOL_FILECOUNT +A +.Vt unsigned long +containing the number of files on the volume. +. +.It ATTR_VOL_DIRCOUNT +A +.Vt unsigned long +containing the number of directories on the volume. +. +.It ATTR_VOL_MAXOBJCOUNT +A +.Vt unsigned long +containing the maximum number of file system objects that can be stored on the volume. +. +.It ATTR_VOL_MOUNTPOINT +An +.Vt attrreference +structure containing the path to the volume's mount point as a +UTF-8 encoded, null terminated C string. +The attribute data length will not be greater than +.Dv MAXPATHLEN . +Equivalent to the +.Fa f_mntonname +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +. +.It ATTR_VOL_NAME +(read/write) An +.Vt attrreference +structure containing the name of the volume as a +UTF-8 encoded, null terminated C string. +The attribute data length will not be greater than +.Dv NAME_MAX + +1. +.Pp +. +This attribute is only read/write if the +.Dv VOL_CAP_INT_VOL_RENAME +bit is set in the volume capabilities (see below). +.Pp +. +.It ATTR_VOL_MOUNTFLAGS +A +.Vt unsigned long +containing the volume mount flags. +This is a copy of the value passed to the +.Fa flags +parameter of +.Xr mount 2 +when the volume was mounted. +Equivalent to the +.Fa f_flags +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +. +.It ATTR_VOL_MOUNTEDDEVICE +An +.Vt attrreference +structure that returns the same value as the +.Fa f_mntfromname +field of the +.Vt statfs +structure returned by +.Xr statfs 2 . +For local volumes this is the path to the device on which the volume is mounted as a +UTF-8 encoded, null terminated C string. +For network volumes, this is a unique string that identifies the mount. +The attribute data length will not be greater than +.Dv MAXPATHLEN . +.Pp +. +.It ATTR_VOL_ENCODINGSUSED +An +.Vt unsigned long long +containing a bitmap of the text encodings used on this volume. +For more information about this, see the discussion of +.Fa encodingsBitmap +in DTS Technote 1150 "HFS Plus Volume Format". +. +.It ATTR_VOL_CAPABILITIES +A +.Vt vol_capabilities_attr_t +structure describing the optional features supported by this volume. +See below for a discussion of volume capabilities. +. +.It ATTR_VOL_ATTRIBUTES +A +.Vt vol_attributes_attr_t +structure describing the attributes supported by this volume. +This structure is discussed below, along with volume capabilities. +. +.El +. +.Sh DIRECTORY ATTRIBUTES +. +The following directory attributes are defined. +. +.Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP +. +.It ATTR_DIR_LINKCOUNT +A +.Vt unsigned long +containing the number of file system objects in the directory, including +synthetic items such as "." and "..". +For historical reasons, you should not always rely on this value being accurate. +.Pp +If you're implementing a volume format on which this is hard to calculate, +you should not support this attribute. +While it's traditional to return a constant value of 1 in the +.Fa st_nlink +field of the +.Vt stat +structure as returned by +.Xr stat 2 , +it's not necessary to do this here because there is a +defined way to indicate that you do not support the attribute. +. +.It ATTR_DIR_ENTRYCOUNT +A +.Vt unsigned long +containing the number of file system objects in the directory, not including +any synthetic items. +. +.It ATTR_DIR_MOUNTSTATUS +A +.Vt unsigned long +containing flags describing what's mounted on the directory. +Currently the only flag defined is +.Dv DIR_MNTSTATUS_MNTPOINT, +which indicates that there is a file system mounted on this directory. +Due to a bug (r. 3502822), this flag is never set on current system. +. +.El +. +.Sh FILE ATTRIBUTES +. +The following file attributes are defined. +. +.Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP +. +.It ATTR_FILE_LINKCOUNT +A +.Vt unsigned long +containing the number of hard links to this file. +Equivalent to the +.Fa st_nlink +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_FILE_TOTALSIZE +An +.Vt off_t +containing the total number of bytes in all forks of the file (the logical size). +. +.It ATTR_FILE_ALLOCSIZE +An +.Vt off_t +containing a count of the bytes on disk used by all of the file's forks (the physical size). +. +.It ATTR_FILE_IOBLOCKSIZE +A +.Vt unsigned long +containing the optimal block size when reading or writing this file's data. +. +.It ATTR_FILE_CLUMPSIZE +A +.Vt unsigned long +containing the allocation clump size for this file, in bytes. +As the file is extended, the file system will attempt to allocate +this much space each time in order to reduce fragmentation. +This value applies to the data fork. +. +.It ATTR_FILE_DEVTYPE +(read/write) A +.Vt unsigned long +containing the device type for a special device file. +Equivalent to the +.Fa st_rdev +field of the +.Vt stat +structure returned by +.Xr stat 2 . +. +.It ATTR_FILE_FILETYPE +A +.Vt unsigned long +that whose value is reserved. +Clients should ignore its value. +New volume format implementations should not support this attribute. +. +.It ATTR_FILE_FORKCOUNT +A +.Vt unsigned long +containing the number of forks in the file. +No built-in file systems on Mac OS X currently support forks other +than the data and resource fork. +. +.It ATTR_FILE_FORKLIST +An +.Vt attrreference +structure containing a list of named forks of the file. +No built-in file systems on Mac OS X currently support forks +other than the data and resource fork. +Because of this, the structure of this attribute's value is not yet defined. +. +.It ATTR_FILE_DATALENGTH +An +.Vt off_t +containing the length of the data fork in bytes (the logical size). +. +.It ATTR_FILE_DATAALLOCSIZE +An +.Vt off_t +containing a count of the bytes on disk used by the data fork (the physical size). +. +.It ATTR_FILE_DATAEXTENTS +An +.Vt extentrecord +array for the data fork. +The array contains eight +.Vt diskextent +structures which represent the first +eight extents of the fork. +.Pp +This attributes exists for compatibility reasons. +New clients should not use this attribute. +Rather, they should use the +.Dv F_LOG2PHYS +command in +.Xr fcntl 2 . +.Pp +. +In current implementations the value may not be entirely accurate for +a variety of reasons. +. +.It ATTR_FILE_RSRCLENGTH +An +.Vt off_t +containing the length of the resource fork in bytes (the logical size). +. +.It ATTR_FILE_RSRCALLOCSIZE +An +.Vt off_t +containing a count of the bytes on disk used by the resource fork (the physical size). +. +.It ATTR_FILE_RSRCEXTENTS +An +.Vt extentrecord +array for the resource fork. +The array contains eight +.Vt diskextent +structures which represent the first +eight extents of the fork. +.Pp +See also +.Dv ATTR_FILE_DATAEXTENTS . +. +.El +. +.Sh FORK ATTRIBUTES +. +Fork attributes relate to the actual data in the file, +which can be held in multiple named contiguous ranges, or forks. +The following fork attributes are defined. +. +.Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP +. +.It ATTR_FORK_TOTALSIZE +An +.Vt off_t +containing the length of the fork in bytes (the logical size). +. +.It ATTR_FORK_ALLOCSIZE +An +.Vt off_t +containing a count of the bytes on disk used by the fork (the physical size). +. +.El +.Pp +. +Fork attributes are not properly implemented by any current Mac OS X +volume format implementation. +We strongly recommend that client programs do not request fork attributes. +If you are implementing a volume format, you should not support these attributes. +. +.Sh VOLUME CAPABILITIES +. +.\" vol_capabilities_attr_t +. +Not all volumes support all features. The +.Dv ATTR_VOL_CAPABILITIES +attribute returns a +.Vt vol_capabilities_attr_t +structure (shown below) that indicates which features are supported by the volume. +. +.Bd -literal +typedef u_int32_t vol_capabilities_set_t[4]; +.Pp +. +#define VOL_CAPABILITIES_FORMAT 0 +#define VOL_CAPABILITIES_INTERFACES 1 +#define VOL_CAPABILITIES_RESERVED1 2 +#define VOL_CAPABILITIES_RESERVED2 3 +.Pp +. +typedef struct vol_capabilities_attr { + vol_capabilities_set_t capabilities; + vol_capabilities_set_t valid; +} vol_capabilities_attr_t; +.Ed +.Pp +. +The structure contains two fields, +.Fa capabilities +and +.Fa valid . +Each consists of an array of four elements. +The arrays are indexed by the following values. +. +.Bl -tag -width VOL_CAP_FMT_PERSISTENTOBJECTIDS +. +.It VOL_CAPABILITIES_FORMAT +This element contains information about the volume format. +See +.Dv VOL_CAP_FMT_PERSISTENTOBJECTIDS +and so on, below. +. +.It VOL_CAPABILITIES_INTERFACES +This element contains information about which optional functions are +supported by the volume format implementation. +See +.Dv VOL_CAP_INT_SEARCHFS +and so on, below. +. +.It VOL_CAPABILITIES_RESERVED1 +Reserved. +A file system implementation should set this element to zero. +A client program should ignore this element. +. +.It VOL_CAPABILITIES_RESERVED2 +Reserved. +A file system implementation should set this element to zero. +A client program should ignore this element. +. +.El +.Pp +. +The +.Fa valid +field contains bit sets that indicate which flags are known to the volume format +implementation. +Each bit indicates whether the contents of the corresponding bit in the +.Fa capabilities +field is valid. +.Pp +. +The +.Fa capabilities +field contains bit sets that indicate whether a particular feature is implemented +by this volume format. +.Pp +. +The following bits are defined in the first element (indexed by +.Dv VOL_CAPABILITIES_FORMAT ) +of the +.Fa capabilities +and +.Fa valid +fields of the +.Vt vol_capabilities_attr_t +structure. +. +.Bl -tag -width VOL_CAP_FMT_PERSISTENTOBJECTIDS +. +.It VOL_CAP_FMT_PERSISTENTOBJECTIDS +If this bit is set the volume format supports persistent object identifiers +and can look up file system objects by their IDs. +See +.Dv ATTR_CMN_OBJPERMANENTID +for details about how to obtain these identifiers. +. +.It VOL_CAP_FMT_SYMBOLICLINKS +If this bit is set the volume format supports symbolic links. +. +.It VOL_CAP_FMT_HARDLINKS +If this bit is set the volume format supports hard links. +. +.It VOL_CAP_FMT_JOURNAL +If this bit is set the volume format supports a journal used to +speed recovery in case of unplanned restart (such as a power outage +or crash). +This does not necessarily mean the volume is actively using a journal. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_JOURNAL_ACTIVE +If this bit is set the volume is currently using a journal for +speedy recovery after an unplanned restart. +This bit can be set only if +.Dv VOL_CAP_FMT_JOURNAL +is also set. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_NO_ROOT_TIMES +If this bit is set the volume format does not store reliable times for +the root directory, so you should not depend on them to detect changes, +identify volumes across unmount/mount, and so on. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_SPARSE_FILES +If this bit is set the volume format supports sparse files, +that is, files which can have 'holes' that have never been written +to, and thus do not consume space on disk. +A sparse file may have an allocated size on disk that is less than its logical length (that is, +.Dv ATTR_FILE_ALLOCSIZE +< +.Dv ATTR_FILE_TOTALSIZE ). +. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_ZERO_RUNS +For security reasons, parts of a file (runs) that have never been +written to must appear to contain zeroes. +When this bit is set, the volume keeps track of allocated but unwritten +runs of a file so that it can substitute zeroes without actually +writing zeroes to the media. +This provides performance similar to sparse files, but not the space savings. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_CASE_SENSITIVE +If this bit is set the volume format treats upper and lower case +characters in file and directory names as different. +Otherwise an upper case character is equivalent to a lower case character, +and you can't have two names that differ solely in the case of +the characters. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_CASE_PRESERVING +If this bit is set the volume format preserves the case of +file and directory names. +Otherwise the volume may change the case of some characters +(typically making them all upper or all lower case). +A volume that sets +.Dv VOL_CAP_FMT_CASE_SENSITIVE +must also set +.Dv VOL_CAP_FMT_CASE_PRESERVING . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_FAST_STATFS +This bit is used as a hint to upper layers (specifically the Carbon File Manager) to +indicate that +.Xr statfs 2 +is fast enough that its results need not be cached by the caller. +A volume format implementation that caches the +.Xr statfs 2 +information in memory should set this bit. +An implementation that must always read from disk or always perform a network +transaction to satisfy +.Xr statfs 2 +should not set this bit. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_FMT_2TB_FILESIZE +If this bit is set the volume format supports file +sizes upto 2TB. This bit does not necessarily mean that the file +system does not support file size more than 2TB. This bit does +not mean that the currently available space on the volume is 2TB. +.Pp +Introduced with Darwin 8.0 (Mac OS X version 10.4). +. +.El +.Pp +. +The following bits are defined in the second element (indexed by +.Dv VOL_CAPABILITIES_INTERFACES ) +of the +.Fa capabilities +and +.Fa valid +fields of the +.Vt vol_capabilities_attr_t +structure. +. +.Bl -tag -width VOL_CAP_FMT_PERSISTENTOBJECTIDS +. +.It VOL_CAP_INT_SEARCHFS +If this bit is set the volume format implementation supports +.Xr searchfs 2 . +. +.It VOL_CAP_INT_ATTRLIST +If this bit is set the volume format implementation supports +.Fn getattrlist +and +.Xr setattrlist 2 . +. +.It VOL_CAP_INT_NFSEXPORT +If this bit is set the volume format implementation allows this volume to be exported via NFS. +. +.It VOL_CAP_INT_READDIRATTR +If this bit is set the volume format implementation supports +.Xr getdirentriesattr 2 . +. +.It VOL_CAP_INT_EXCHANGEDATA +If this bit is set the volume format implementation supports +.Xr exchangedata 2 . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_INT_COPYFILE +If this bit is set the volume format implementation supports the (private and undocumented) +copyfile() function. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_INT_ALLOCATE +If this bit is set the volume format implementation supports the +.Dv F_PREALLOCATE +selector of +.Xr fcntl 2 . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_INT_VOL_RENAME +If this bit is set the volume format implementation allows you to +modify the volume name using +.Xr setattrlist 2 . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_INT_ADVLOCK +If this bit is set the volume format implementation supports +advisory locking, that is, the +.Dv F_GETLK , +.Dv F_SETLK , +and +.Dv F_SETLKW +selectors to +.Xr fcntl 2 . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It VOL_CAP_INT_FLOCK +If this bit is set the volume format implementation supports +whole file locks. +This includes +.Xr flock 2 +and the +.Dv O_EXLOCK +and +.Dv O_SHLOCK +flags to +.Xr open 2 . +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.El +.Pp +. +.\" vol_attributes_attr_t +. +A volume can also report which attributes it supports. +This information is returned by the +.Dv ATTR_VOL_ATTRIBUTES +attribute, which returns a +.Vt vol_attributes_attr_t +structure (shown below). +. +.Bd -literal +typedef struct attribute_set { + attrgroup_t commonattr; /* common attribute group */ + attrgroup_t volattr; /* volume attribute group */ + attrgroup_t dirattr; /* directory attribute group */ + attrgroup_t fileattr; /* file attribute group */ + attrgroup_t forkattr; /* fork attribute group */ +} attribute_set_t; +.Pp +. +typedef struct vol_attributes_attr { + attribute_set_t validattr; + attribute_set_t nativeattr; +} vol_attributes_attr_t; +.Ed +.Pp +. +The +.Fa validattr +field consists of a number of bit sets that indicate whether an attribute is +supported by the volume format implementation. +The +.Fa nativeattr +is similar except that the bit sets indicate whether an attribute is supported +natively by the volume format. +An attribute is supported natively if the volume format implementation does not have to do +any complex conversions to access the attribute. +For example, a volume format might support persistent object identifiers, but +doing so requires a complex table lookup that is not part of the core volume +format. +In that case, the +.Dv ATTR_VOL_ATTRIBUTES +attribute would return +.Dv ATTR_CMN_OBJPERMANENTID +set in the +.Fa validattr +field of the +.Vt vol_attributes_attr_t , +but not in the +.Fa nativeattr +field. +. +.Sh RETURN VALUES +Upon successful completion a value of 0 is returned. +Otherwise, a value of -1 is returned and +.Va errno +is set to indicate the error. +. +.Sh COMPATIBILITY +Not all volumes support +.Fn getattrlist . +The best way to test whether a volume supports this function is to +simply call it and check the error result. +.Fn getattrlist +will return +.Dv ENOTSUP +if it is not supported on a particular volume. +.Pp +. +The +.Fn getattrlist +function has been undocumented for more than two years. +In that time a number of volume format implementations have been created without +a proper specification for the behaviour of this routine. +You may encounter volume format implementations with slightly different +behaviour than what is described here. +Your program is expected to be tolerant of this variant behaviour. +.Pp +. +If you're implementing a volume format that supports +.Fn getattrlist , +you should be careful to support the behaviour specified by this document. +. +.Sh ERRORS +.Fn getattrlist +will fail if: +.Bl -tag -width Er +. +.It Bq Er ENOTSUP +The volume does not support +.Fn getattrlist . +. +.It Bq Er ENOTDIR +A component of the path prefix is not a directory. +. +.It Bq Er ENAMETOOLONG +A component of a path name exceeded +.Dv NAME_MAX +characters, or an entire path name exceeded +.Dv PATH_MAX +characters. +. +.It Bq Er ENOENT +The file system object does not exist. +. +.It Bq Er EACCES +Search permission is denied for a component of the path prefix. +. +.It Bq Er ELOOP +Too many symbolic links were encountered in translating the pathname. +. +.It Bq Er EFAULT +.Fa path , +.Fa attrList +or +.Em attrBuf +points to an invalid address. +. +.It Bq Er EINVAL +The +.Fa bitmapcount +field of +.Fa attrList +is not +.Dv ATTR_BIT_MAP_COUNT . +. +.It Bq Er EINVAL +You requested an invalid attribute. +. +.It Bq Er EINVAL +You requested an attribute that is not supported for this file system object. +. +.It Bq Er EINVAL +You requested volume attributes and directory or file attributes. +. +.It Bq Er EINVAL +You requested volume attributes but +.Fa path +does not reference the root of the volume. +. +.It Bq Er EROFS +The volume is read-only but must be modified in order to return this attribute. +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh CAVEATS +. +If you request any volume attributes, you must set +.Dv ATTR_VOL_INFO +in the +.Fa volattr +field, even though it generates no result in the attribute buffer. +.Pp +. +The order that attributes are stored in the attribute buffer almost +invariably matches the order of attribute mask bit values. +For example, +.Dv ATTR_CMN_NAME +(0x00000001) comes before +.Dv ATTR_CMN_DEVID +(0x00000002) because its value is smaller. +However, you can not rely on this ordering because there is one key exception: +.Dv ATTR_CMN_FLAGS +is placed after the +.Dv ATTR_CMN_NAMEDATTRCOUNT +/ +.Dv ATTR_CMN_NAMEDATTRLIST +pair, even though its bit position indicates that it should come before. +This is due to a bug in an early version of Mac OS X that can't be fixed for +binary compatibility reasons. +When ordering attributes, you should always use the order in which they +are described above. +.Pp +. +For more caveats, see also the compatibility notes above. +. +.Sh EXAMPLES +. +The following code prints the file type and creator of a file, +assuming that the volume supports the required attributes. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +.Pp +. +struct FInfoAttrBuf { + unsigned long length; + fsobj_type_t objType; + char finderInfo[32]; +}; +typedef struct FInfoAttrBuf FInfoAttrBuf; +.Pp +. +static int FInfoDemo(const char *path) +{ + int err; + attrlist_t attrList; + FInfoAttrBuf attrBuf; +.Pp +. + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.commonattr = ATTR_CMN_OBJTYPE | ATTR_CMN_FNDRINFO; +.Pp + + err = getattrlist(path, &attrList, &attrBuf, sizeof(attrBuf), 0); + if (err != 0) { + err = errno; + } +.Pp + + if (err == 0) { + assert(attrBuf.length == sizeof(attrBuf)); +.Pp + + printf("Finder information for %s:\en", path); + switch (attrBuf.objType) { + case VREG: + printf("file type = '%.4s'\en", &attrBuf.finderInfo[0]); + printf("file creator = '%.4s'\en", &attrBuf.finderInfo[4]); + break; + case VDIR: + printf("directory\en"); + break; + default: + printf("other object type, %d\en", attrBuf.objType); + break; + } + } +.Pp +. + return err; +} +.Ed +.Pp +. +The following code is an alternative implementation that uses nested structures +to group the related attributes. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +.Pp +. +struct FInfo2CommonAttrBuf { + fsobj_type_t objType; + char finderInfo[32]; +}; +typedef struct FInfo2CommonAttrBuf FInfo2CommonAttrBuf; +.Pp +. +struct FInfo2AttrBuf { + unsigned long length; + FInfo2CommonAttrBuf common; +}; +typedef struct FInfo2AttrBuf FInfo2AttrBuf; +.Pp +. +static int FInfo2Demo(const char *path) +{ + int err; + attrlist_t attrList; + FInfo2AttrBuf attrBuf; +.Pp +. + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.commonattr = ATTR_CMN_OBJTYPE | ATTR_CMN_FNDRINFO; +.Pp +. + err = getattrlist(path, &attrList, &attrBuf, sizeof(attrBuf), 0); + if (err != 0) { + err = errno; + } +.Pp +. + if (err == 0) { + assert(attrBuf.length == sizeof(attrBuf)); +.Pp +. + printf("Finder information for %s:\en", path); + switch (attrBuf.common.objType) { + case VREG: + printf( + "file type = '%.4s'\en", + &attrBuf.common.finderInfo[0] + ); + printf( + "file creator = '%.4s'\en", + &attrBuf.common.finderInfo[4] + ); + break; + case VDIR: + printf("directory\en"); + break; + default: + printf( + "other object type, %d\en", + attrBuf.common.objType + ); + break; + } + } +.Pp +. + return err; +} +.Ed +.Pp +. +The following example shows how to deal with variable length attributes. +It assumes that the volume specified by +.Fa path +supports the necessary attributes. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +.Pp +. +struct VolAttrBuf { + unsigned long length; + unsigned long fileCount; + unsigned long dirCount; + attrreference_t mountPointRef; + attrreference_t volNameRef; + char mountPointSpace[MAXPATHLEN]; + char volNameSpace[MAXPATHLEN]; +}; +typedef struct VolAttrBuf VolAttrBuf; +.Pp +. +static int VolDemo(const char *path) +{ + int err; + attrlist_t attrList; + VolAttrBuf attrBuf; +.Pp +. + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.volattr = ATTR_VOL_INFO + | ATTR_VOL_FILECOUNT + | ATTR_VOL_DIRCOUNT + | ATTR_VOL_MOUNTPOINT + | ATTR_VOL_NAME; +.Pp + + err = getattrlist(path, &attrList, &attrBuf, sizeof(attrBuf), 0); + if (err != 0) { + err = errno; + } +.Pp + + if (err == 0) { + assert(attrBuf.length > offsetof(VolAttrBuf, mountPointSpace)); + assert(attrBuf.length <= sizeof(attrBuf)); +.Pp + + printf("Volume information for %s:\en", path); + printf("ATTR_VOL_FILECOUNT: %lu\en", attrBuf.fileCount); + printf("ATTR_VOL_DIRCOUNT: %lu\en", attrBuf.dirCount); + printf( + "ATTR_VOL_MOUNTPOINT: %.*s\en", + (int) attrBuf.mountPointRef.attr_length, + ( ((char *) &attrBuf.mountPointRef) + + attrBuf.mountPointRef.attr_dataoffset ) + ); + printf( + "ATTR_VOL_NAME: %.*s\en", + (int) attrBuf.volNameRef.attr_length, + ( ((char *) &attrBuf.volNameRef) + + attrBuf.volNameRef.attr_dataoffset ) + ); + } +.Pp +. + return err; +} +.Ed +.Pp +. +.Sh SEE ALSO +. +.Xr access 2 , +.Xr chflags 2 , +.Xr exchangedata 2 , +.Xr fcntl 2 , +.Xr getdirentriesattr 2 , +.Xr mount 2 , +.Xr searchfs 2 , +.Xr setattrlist 2 , +.Xr stat 2 , +.Xr statfs 2 +. +.Sh HISTORY +A +.Fn getattrlist +function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). +. diff --git a/bsd/man/man2/getdirentriesattr.2 b/bsd/man/man2/getdirentriesattr.2 new file mode 100644 index 000000000..9c59e22ae --- /dev/null +++ b/bsd/man/man2/getdirentriesattr.2 @@ -0,0 +1,427 @@ +.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)getdirentriesattr.2 +. +.Dd December 15, 2003 +.Dt GETDIRENTRIESATTR 2 +.Os Darwin +.Sh NAME +.Nm getdirentriesattr +.Nd get file system attributes for multiple directory entries +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Ft int +.Fn getdirentriesattr "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long * count" "unsigned long * basep" "unsigned long * newState" "unsigned long options" +. +. +.Sh DESCRIPTION +The +.Fn getdirentriesattr +function reads directory entries and returns their attributes (that is, metadata). +You can think of it as a combination of +.Xr getdirentries 2 +and +.Xr getattrlist 2 . +The function reads directory entries from the directory referenced by the +file descriptor +.Fa fd . +Attributes of those directory entries are placed into the buffer specified by +.Fa attrBuf +and +.Fa attrBufSize . +The +.Fa attrList +parameter determines what attributes are returned for each entry. +The +.Fa count +parameter contains the number of directory entries requested and returned. +The +.Fa basep +parameter returns the directory offset in a manner similar to +.Xr getdirentries 2 . +The +.Fa newState +parameter allows you to check whether the directory has been modified while +you were reading it. +The +.Fa options +parameter lets you control specific aspects of the function's behaviour. +.Pp +. +The +.Fn getdirentriesattr +function is only supported by certain volume format implementations. +For maximum compatibility, client programs should use high-level APIs +(such as the Carbon File Manager) to access file system attributes. +These high-level APIs include logic to emulate file system attributes +on volumes that don't support +.Fn getdirentriesattr . +.Pp +. +.\" fd parameter +. +The +.Fa fd +parameter must be a file descriptor that references a directory that you have opened for reading. +.Pp +. +.\" attrList parameter +. +The +.Fa attrList +parameter is a pointer to an +.Vt attrlist +structure. +You are responsible for filling out all fields of this structure before calling the function. +See the discussion of the +.Xr getattrlist 2 +function for a detailed description of this structure. +To get an attribute you must set the corresponding bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure. +You must not request volume attributes. +.Pp +. +.\" attrBuf and attrBufSize parameters +. +The +.Fa attrBuf +and +.Fa attrBufSize +parameters specify a buffer into which the function places attribute values. +The attributes for any given directory entry are grouped together and +packed in exactly the same way as they are returned from +.Xr getattrlist 2 . +These groups are then placed into the buffer, one after another. +As each group starts with a leading +.Vt unsigned long +that contains the +overall length of the group, you can step from one group to the next +by simply adding this length to your pointer. +The sample code (below) shows how to do this. +The initial contents of this buffer are ignored. +.Pp +. +.\" count parameter +. +The +.Fa count +parameter points to a +.Vt unsigned long +variable. +You should initialise this variable to be the number of directory entries for which +you wish to get attributes. +On return, this variable contains the number of directory entries whose attributes +have been placed into the attribute buffer. +This may be smaller than the number that you requested. +.Pp +. +.\" basep parameter +The +.Fa basep +parameter returns the offset of the last directory entry read, in a +manner identical to +.Xr getdirentries 2 . +You can use this value to reset a directory iteration to a known position +using +.Xr lseek 2 . +The initial value of the variable is ignored. +.Pp +. +.\" newState parameter +. +The +.Fa newState +parameter returns a value that changes if the directory has been modified. +If you're iterating through the directory by making repeated calls to +.Fn getdirentriesattr , +you can compare subsequent values of +.Fa newState +to determine whether the directory has been modified (and thus restart +your iteration at the beginning). +The initial value of the variable is ignored. +.Pp +. +.\" options parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn getdirentriesattr . +The following option bits are defined. +. +.Bl -tag -width FSOPT_NOINMEMUPDATE +. +.It FSOPT_NOINMEMUPDATE +This tells +.Fn getdirentriesattr +to return the directory entries from disk rather than taking the extra step of looking +at data structures in-memory which may contain changes that haven't been flushed to disk. +.Pp +This option allowed for specific performance optimizations for specific clients on older systems. +We currently recommend that clients not set this option and that file system +implementations ignore it. +. +.El +.Pp +It is typical to ask for a combination of common, file, and directory +attributes and then use the value of the +.Dv ATTR_CMN_OBJTYPE +attribute to parse the resulting attribute buffer. +. +.Sh RETURN VALUES +Upon successful completion a value of 0 or 1 is returned. +The value 0 indicates that the routine completed successfully. +The value 1 indicates that the routine completed successfully and has +returned the last entry in the directory. +On error, a value of -1 is returned and +.Va errno +is set to indicate the error. +. +.Sh COMPATIBILITY +Not all volumes support +.Fn getdirentriesattr . +You can test whether a volume supports +.Fn getdirentriesattr +by using +.Xr getattrlist 2 +to get the volume capabilities attribute +.Dv ATTR_VOL_CAPABILITIES , +and then testing the +.Dv VOL_CAP_INT_READDIRATTR +flag. +.Pp +. +The +.Fn getdirentriesattr +function has been undocumented for more than two years. +In that time a number of volume format implementations have been created without +a proper specification for the behaviour of this routine. +You may encounter volume format implementations with slightly different +behaviour than what is described here. +Your program is expected to be tolerant of this variant behaviour. +.Pp +. +If you're implementing a volume format that supports +.Fn getdirentriesattr , +you should be careful to support the behaviour specified by this document. +. +.Sh ERRORS +.Fn getdirentriesattr +will fail if: +.Bl -tag -width Er +. +.It Bq Er ENOTSUP +The volume does not support +.Fn getdirentriesattr . +. +.It Bq Er EBADF +.Fa fd +is not a valid file descriptor for a directory open for reading. +. +.It Bq Er EFAULT +.Fa attrList +or +.Em attrBuf +points to an invalid address. +. +.It Bq Er EINVAL +The +.Fa bitmapcount +field of +.Fa attrList +is not +.Dv ATTR_BIT_MAP_COUNT . +. +.It Bq Er EINVAL +You requested an invalid attribute. +. +.It Bq Er EINVAL +You requested volume attributes. +. +.It Bq Er EINVAL +The +.Fa options +parameter contains an invalid flag. +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh EXAMPLES +. +The following code lists the contents of a directory using +.Fn getdirentriesattr . +The listing includes the file type and creator for files. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +.Pp +. +struct FInfoAttrBuf { + unsigned long length; + attrreference_t name; + fsobj_type_t objType; + char finderInfo[32]; +}; +typedef struct FInfoAttrBuf FInfoAttrBuf; +.Pp +. +enum { + kEntriesPerCall = 10 +}; +.Pp +. +static int FInfoDemo(const char *dirPath) +{ + int err; + int junk; + int dirFD; + attrlist_t attrList; + unsigned long index; + unsigned long count; + unsigned long junkBaseP; + bool oldStateValid; + unsigned long oldState; + unsigned long newState; + bool done; + FInfoAttrBuf * thisEntry; + char attrBuf[kEntriesPerCall * (sizeof(FInfoAttrBuf) + 64)]; +.Pp +. + // attrBuf is big enough for kEntriesPerCall entries, assuming that + // the average name length is less than 64. +.Pp +. + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.commonattr = ATTR_CMN_NAME + | ATTR_CMN_OBJTYPE + | ATTR_CMN_FNDRINFO; +.Pp + + err = 0; + dirFD = open(dirPath, O_RDONLY, 0); + if (dirFD < 0) { + err = errno; + } + if (err == 0) { + oldStateValid = false; + done = false; + do { + count = kEntriesPerCall; +.Pp + err = getdirentriesattr( + dirFD, + &attrList, + &attrBuf, + sizeof(attrBuf), + &count, + &junkBaseP, + &newState, + 0 + ); + if (err < 0) { + err = errno; + } else { + done = err; + err = 0; + } +.Pp + if (err == 0) { + if (oldStateValid) { + if (newState != oldState) { + printf("*** Directory has changed\en"); + oldState = newState; + } + } else { + oldState = newState; + oldStateValid = true; + } +.Pp + thisEntry = (FInfoAttrBuf *) attrBuf; +.Pp + for (index = 0; index < count; index++) { + switch (thisEntry->objType) { + case VREG: + printf( + "'%4.4s' '%4.4s' ", + &thisEntry->finderInfo[0], + &thisEntry->finderInfo[4] + ); + break; + case VDIR: + printf("directory "); + break; + default: + printf( + "objType = %-2d ", + thisEntry->objType + ); + break; + } + printf( + "%s\en", + ((char *) &thisEntry->name) + + thisEntry->name.attr_dataoffset + ); +.Pp + // Advance to the next entry. +.Pp + ((char *) thisEntry) += thisEntry->length; + } + } + } while ( err == 0 && ! done ); + } +.Pp + if (dirFD != -1) { + junk = close(dirFD); + assert(junk == 0); + } +.Pp + return err; +} +.Ed +.Pp +. +.Sh SEE ALSO +. +.Xr getattrlist 2 , +.Xr getdirentries 2 , +.Xr lseek 2 +. +.Sh HISTORY +A +.Fn getdirentriesattr +function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). +. diff --git a/bsd/man/man2/getfsstat.2 b/bsd/man/man2/getfsstat.2 index 525f6a04f..cdc2e2586 100644 --- a/bsd/man/man2/getfsstat.2 +++ b/bsd/man/man2/getfsstat.2 @@ -44,7 +44,7 @@ .Fd #include .Fd #include .Ft int -.Fn getfsstat "struct statfs *buf" "long bufsize" "int flags" +.Fn getfsstat "struct statfs *buf" "int bufsize" "int flags" .Sh DESCRIPTION .Fn Getfsstat returns information about all mounted file systems. @@ -95,21 +95,22 @@ is given as NULL, .Fn getfsstat returns just the number of mounted file systems. .Pp -Normally -.Fa flags -should be specified as -.Dv MNT_WAIT . If .Fa flags is set to .Dv MNT_NOWAIT , .Fn getfsstat -will return the information it has available without requesting -an update from each file system. -Thus, some of the information will be out of date, but +will directly return the information retained in the kernel +to avoid delays caused by waiting for updated information from +a file system that is perhaps temporarily unable to respond. +Some of the information returned may be out of date, however; if +.Fa flags +is set to +.Dv MNT_WAIT +instead, .Fn getfsstat -will not block waiting for information from a file system that is -unable to respond. +will request updated information from each mounted filesystem before +returning. .Sh RETURN VALUES Upon successful completion, the number of .Fa statfs diff --git a/bsd/man/man2/getpeername.2 b/bsd/man/man2/getpeername.2 index 6d5738014..3fe1c98d4 100644 --- a/bsd/man/man2/getpeername.2 +++ b/bsd/man/man2/getpeername.2 @@ -42,7 +42,7 @@ .Sh SYNOPSIS .Fd #include .Ft int -.Fn getpeername "int s" "struct sockaddr *name" "int *namelen" +.Fn getpeername "int s" "struct sockaddr *name" "socklen_t *namelen" .Sh DESCRIPTION .Fn Getpeername returns the name of the peer connected to diff --git a/bsd/man/man2/getsockname.2 b/bsd/man/man2/getsockname.2 index 4582a3ea5..2d63acb38 100644 --- a/bsd/man/man2/getsockname.2 +++ b/bsd/man/man2/getsockname.2 @@ -42,7 +42,7 @@ .Sh SYNOPSIS .Fd #include .Ft int -.Fn getsockname "int s" "struct sockaddr *name" "int *namelen" +.Fn getsockname "int s" "struct sockaddr *name" "socklen_t *namelen" .Sh DESCRIPTION .Fn Getsockname returns the current diff --git a/bsd/man/man2/getsockopt.2 b/bsd/man/man2/getsockopt.2 index b1ced804a..1f22a55da 100644 --- a/bsd/man/man2/getsockopt.2 +++ b/bsd/man/man2/getsockopt.2 @@ -44,9 +44,9 @@ .Fd #include .Fd #include .Ft int -.Fn getsockopt "int s" "int level" "int optname" "void *optval" "int *optlen" +.Fn getsockopt "int s" "int level" "int optname" "void *optval" "socklen_t *optlen" .Ft int -.Fn setsockopt "int s" "int level" "int optname" "const void *optval" "int optlen" +.Fn setsockopt "int s" "int level" "int optname" "const void *optval" "socklen_t optlen" .Sh DESCRIPTION .Fn Getsockopt and @@ -295,8 +295,7 @@ receiving additional data, it returns with a short count or with the error .Er EWOULDBLOCK if no data were received. The struct timeval parameter must represent a -positive time interval less than SHRT_MAX * 10 milliseconds (5 minutes -and 28 seconds) otherwise +positive time interval otherwise .Fn setsockopt returns with the error .Er EDOM . diff --git a/bsd/man/man2/getxattr.2 b/bsd/man/man2/getxattr.2 new file mode 100644 index 000000000..8f04a9194 --- /dev/null +++ b/bsd/man/man2/getxattr.2 @@ -0,0 +1,165 @@ +.\" +.\" Copyright (c) 2004 Apple Computer, Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.Dd Oct 19, 2004 +.Dt GETXATTR 2 +.Os "Mac OS X" +.Sh NAME +.Nm getxattr, +.Nm fgetxattr +.Nd get an extended attribute value +.Sh SYNOPSIS +.Fd #include +.Ft ssize_t +.Fn getxattr "const char *path" "const char *name" "void *value" "size_t size" "u_int32_t position" "int options" +.Ft ssize_t +.Fn fgetxattr "int fd" "const char *name" "void *value" "size_t size" "u_int32_t position" "int options" +.Sh DESCRIPTION +Extended attributes extend the basic attributes of files and +directories in the file system. They are stored as name:data pairs +associated with file system objects (files, directories, symlinks, etc). +.Pp +The +.Fn getxattr +function retrieves up to +.Fa size +bytes of data from the extended attribute identified by +.Fa name +associated with +.Fa path +into the pre-allocated buffer pointed to by +.Fa value . +The function returns the number of bytes of data retrieved. +.Pp +An extended attribute's +.Fa name +is a simple NULL-terminated UTF-8 string. +.Fa position +specifies an offset within the extended attribute. In the current +implementation, this argument is only used with the resource fork attribute. +For all other extended attributes, this parameter is reserved and should +be zero. +.Pp +On success, +.Fa value +contains the data associated with +.Fa name . +When +.Fa value +is set to NULL, +.Fn getxattr +returns current size of the named attribute. This facility can be used +to determine the size of a buffer sufficiently large to hold the data +currently associated with the attribute. +.Pp +.Fa options +specify options for retrieving extended attributes: +.Pp +.Bl -tag -width XATTR_NOFOLLOW +.It Dv XATTR_NOFOLLOW +do not follow symbolic links. +.Fn getxattr +normally returns information from the target of +.Fa path +if it is a symbolic link. With this option, +.Fn getxattr +will return extended attribute data from the symbolic link instead. +.El +.Pp +.Fn fgetxattr +is identical to +.Fn getxattr , +except that it retrieves extended attribute data from the open file +referenced by the file descriptor +.Fa fd . +.Sh RETURN VALUES +On success, the size of the extended attribute data is returned. On +failure, -1 is returned and the global variable +.Va errno +is set as follows. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er ENOATTR +The extended attribute does not exist. +.It Bq Er ENOTSUP +The file system does not support extended attributes or has the feature +disabled. +.It Bq Er ERANGE +.Fa value +(as indicated by +.Fa size ) +is too small to hold the extended attribute data. +.It Bq Er EPERM +The named attribute is not permitted for this type of object. +.It Bq Er EINVAL +.Fa name +is invalid or +.Fa options +has an unsupported bit set. +.It Bq Er EISDIR +.Fa path +or +.Fa fd +do not refer to a regular file and the attribute in question is only +applicable to files. Similar to EPERM. +.It Bq Er ENOTDIR +A component of +.Fa path 's +prefix is not a directory. +.It Bq Er ENAMETOOLONG +The length of +.Fa name +exceeds +.Dv XATTR_MAXNAMELEN +UTF-8 bytes, or a component of +.Fa path +exceeds +.Dv NAME_MAX +characters, or the entire +.Fa path +exceeds +.Dv PATH_MAX +characters. +.It Bq Er EACCES +Search permission is denied for a component of +.Fa path +or the attribute is not allowed to be read (e.g. an ACL prohibits reading +the attributes of this file). +.It Bq Er ELOOP +Too many symbolic links were encountered in translating the pathname. +.It Bq Er EFAULT +.Fa path +or +.Fa name +points to an invalid address. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Sh SEE ALSO +.Xr setxattr 2 , +.Xr removexattr 2 , +.Xr listxattr 2 +.Sh HISTORY +.Fn getxattr +and +.Fn fgetxattr +first appeared in Mac OS X 10.4. diff --git a/bsd/man/man2/intro.2 b/bsd/man/man2/intro.2 index e9a29acd5..addbe6aa3 100644 --- a/bsd/man/man2/intro.2 +++ b/bsd/man/man2/intro.2 @@ -270,13 +270,8 @@ system or no implementation for it exists. .It Er 44 ESOCKTNOSUPPORT Em "Socket type not supported" . The support for the socket type has not been configured into the system or no implementation for it exists. -.It Er 45 EOPNOTSUPP Em "Operation not supported" . +.It Er 45 ENOTSUP Em "Not supported" . The attempted operation is not supported for the type of object referenced. -Usually this occurs when a file descriptor refers to a file or socket -that cannot support this operation, -for example, trying to -.Em accept -a connection on a datagram socket. .It Er 46 EPFNOSUPPORT Em "Protocol family not supported" . The protocol family has not been configured into the system or no implementation for it exists. @@ -444,6 +439,28 @@ along an invalid or an incomplete sequence of bytes or the given wide character is invalid. .It Er 93 ENOATTR Em "Attribute not found" . The specified extended attribute does not exist. +.It Er 94 EBADMSG Em "Bad message" . +The message to be received is inapprorpiate for the operation being attempted. +.It Er 95 EMULTIHOP Em "Reserved" . +This error is reserved for future use. +.It Er 96 ENODATA Em "No message available" . +No message was available to be received by the requested operation. +.It Er 97 ENOLINK Em "Reserved" . +This error is reserved for future use. +.It Er 98 ENOSR Em "No STREAM resources" . +This error is reserved for future use. +.It Er 99 ENOSTR Em "Not a STREAM" . +This error is reserved for future use. +.It Er 100 EPROTO Em "Protocol error" . +Some protocol error occurred. This error is device-specific, but is +generally not related to a hardware failure. +.It Er 101 ETIME Em "STREAM ioctl() timeout" . +This error is reserved for future use. +.It Er 102 EOPNOTSUPP Em "Operation not supported on socket" . +The attempted operation is not supported for the type of socket referenced; +for example, trying to +.Em accept +a connection on a datagram socket. .El .Sh DEFINITIONS .Bl -tag -width Ds diff --git a/bsd/man/man2/listxattr.2 b/bsd/man/man2/listxattr.2 new file mode 100644 index 000000000..b466439be --- /dev/null +++ b/bsd/man/man2/listxattr.2 @@ -0,0 +1,153 @@ +.\" +.\" Copyright (c) 2004 Apple Computer, Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.Dd Oct 19, 2004 +.Dt LISTXATTR 2 +.Os "Mac OS X" +.Sh NAME +.Nm listxattr, +.Nm flistxattr +.Nd list an extended attribute value +.Sh SYNOPSIS +.Fd #include +.Ft ssize_t +.Fn listxattr "const char *path" "char *namebuf" "size_t size" "int options" +.Ft ssize_t +.Fn flistxattr "int fd" "char *namebuf" "size_t size" "int options" +.Sh DESCRIPTION +Extended attributes extend the basic attributes associated with files and +directories in the file system. They are stored as name:data pairs associated +with file system objects (files, directories, symlinks, etc). +.Pp +.Fn listxattr +retrieves a list of names of extended attributes associated with the given +.Fa path +in the file system. +.Pp +.Fa namebuf +is a data buffer of +.Pa size +bytes for the names of the extended attributes associated with +.Fa path . +The extended attribute names are simple NULL-terminated UTF-8 strings and +are returned in arbitrary order. No extra padding is provided between +names in the buffer. The list will only include names of extended +attributes to which the calling process has access. The function returns +the size of the list of names. +.Pp +.Fa options +controls how the attribute list is generated: +.Pp +.Bl -tag -width XATTR_NOFOLLOW +.It Dv XATTR_NOFOLLOW +do not follow symbolic links. +.Fn listxattr +normally lists attributes of the target of +.Fa path +if it is a symbolic link. With this option, +.Fn listxattr +will list attributes of the link itself. +.El +.Pp +If +.Fa namebuf +is set to NULL, +the function returns the size of the list of extended attribute names. +This facility can be used to determine the size of a buffer sufficiently +large to hold the names of the attributes currently associated with +.Fa path . +.Pp +.Fn flistxattr +is identical to +.Fn listxattr , +except that it returns the list of extended attribute names associated +with the open file referenced by file descriptor +.Fa fd . +.Sh RETURN VALUES +On success, the size of the extended attribute name list is returned. If +no accessible extended attributes are associated with the given +.Fa path +or +.Fa fd , +the function returns zero. On failure, -1 is returned and the global +variable +.Va errno +is set as follows. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er ENOTSUP +The file system does not support extended attributes or has the feature +disabled. +.It Bq Er ERANGE +.Fa namebuf +(as indicated by +.Fa size ) +is too small to hold the list of names. +.It Bq Er EPERM +.Fa path +or +.Fa fd +refer to a file system object that does not support extended attributes. +For example, resource forks don't support extended attributes. +.\" If only EFTYPE was a POSIX error +.It Bq Er ENOTDIR +A component of +.Fa path 's +prefix is not a directory. +.It Bq Er ENAMETOOLONG +.Fa name +exceeds +.Dv XATTR_MAXNAMELEN +UTF-8 bytes, or a component of +.Fa path +exceeds +.Dv NAME_MAX +characters, or the entire +.Fa path +exceeds +.Dv PATH_MAX +characters. +.It Bq Er EACCES +Search permission is denied for a component of +.Fa path +or permission is denied to read the list of attributes from this file. +.It Bq Er ELOOP +Too many symbolic links were encountered resolving +.Fa path . +.It Bq Er EFAULT +.Fa path +points to an invalid address. +.It Bq Er EIO +An I/O error occurred. +.It Bq Er EINVAL +.Fa options +does not make sense. +.El +.Sh SEE ALSO +.Xr setxattr 2 , +.Xr getxattr 2 , +.Xr removexattr 2 +.Sh HISTORY +.Fn listxattr +and +.Fn flistxattr +first appeared in Mac OS X 10.4. diff --git a/bsd/man/man2/madvise.2 b/bsd/man/man2/madvise.2 index 9f5938267..1c85fce72 100644 --- a/bsd/man/man2/madvise.2 +++ b/bsd/man/man2/madvise.2 @@ -99,7 +99,7 @@ Indicates that the application will not need the information contained in this a .Fn madvise system call. .El - +.Pp The .Fn posix_madvise behaves same as diff --git a/bsd/man/man2/mkfifo.2 b/bsd/man/man2/mkfifo.2 index af5d7615f..7d843f2c5 100644 --- a/bsd/man/man2/mkfifo.2 +++ b/bsd/man/man2/mkfifo.2 @@ -66,7 +66,7 @@ indicates an error, and an error code is stored in .Fn Mkfifo will fail and no fifo will be created if: .Bl -tag -width Er -.It Bq Er EOPNOTSUPP +.It Bq Er ENOTSUP The kernel has not been configured to support fifo's. .It Bq Er ENOTDIR A component of the path prefix is not a directory. diff --git a/bsd/man/man2/poll.2 b/bsd/man/man2/poll.2 new file mode 100644 index 000000000..a91b73094 --- /dev/null +++ b/bsd/man/man2/poll.2 @@ -0,0 +1,198 @@ +.\" +.\" Copyright (c) 2005 Apple Computer, Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" +.\" Copyright (c) 1996 Charles M. Hannum. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by Charles M. Hannum. +.\" 4. The name of the author may not be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd February 27, 2005 +.Dt POLL 2 +.Os +.Sh NAME +.Nm poll +.Nd synchronous I/O multiplexing +.Sh SYNOPSIS +.In poll.h +.Ft int +.Fn poll "struct pollfd *fds" "nfds_t nfds" "int timeout" +.Sh DESCRIPTION +.Fn Poll +examines a set of file descriptors to see if some of them are ready for +I/O or if certain events have occurred on them. +The +.Fa fds +argument is a pointer to an array of pollfd structures as defined in +.Aq Pa poll.h +(shown below). The +.Fa nfds +argument determines the size of the +.Fa fds +array. +.Bd -literal +struct pollfd { + int fd; /* file descriptor */ + short events; /* events to look for */ + short revents; /* events returned */ +}; +.Ed +.Pp +The fields of +.Fa struct pollfd +are as follows: +.Bl -tag -width XXXrevents +.It fd +File descriptor to poll. +.It events +Events to poll for. (See below.) +.It revents +Events which may occur or have occurred. (See below.) +.El +.Pp +The event bitmasks in +.Fa events +and +.Fa revents +have the following bits: +.Bl -tag -width XXXPOLLWRNORM +.It POLLIN +Data other than high priority data may be read without blocking. +This is equivalent to ( POLLRDNORM | POLLRDBAND ). +.It POLLRDNORM +Normal data may be read without blocking. +.It POLLRDBAND +Priority data may be read without blocking. +.It POLLPRI +High priority data may be read without blocking. +.It POLLOUT +.It POLLWRNORM +Normal data may be written without blocking. +.It POLLWRBAND +Priority data may be written without blocking. +.It POLLERR +An exceptional condition has occurred on the device or socket. This +flag is output only, and ignored if present in the input +.Fa events +bitmask. +.It POLLHUP +The device or socket has been disconnected. This flag is output only, +and ignored if present in the input +.Fa events +bitmask. Note that +POLLHUP +and +POLLOUT +are mutually exclusive and should never be present in the +.Fa revents +bitmask at the same time. +.It POLLNVAL +The file descriptor is not open. This flag is output only, and ignored if present in the input +.Fa events +bitmask. +.El +.Pp +The distinction between normal, priority, and high-priority data is file type +or device specific. +.Pp +If +.Fa timeout +is greater than zero, it specifies a maximum interval to +wait for any file descriptor to become ready, in milliseconds. If +.Fa timeout +is zero, then +.Fn poll +will return without blocking. If the value of +.Fa timeout +is -1, the poll blocks indefinitely. +.Sh RETURN VALUES +.Fn Poll +returns the number of descriptors that are ready for I/O, or -1 if an +error occured. If the time limit expires, +.Fn poll +returns 0. +If +.Fn poll +returns with an error, +including one due to an interrupted call, +the +.Fa fds +array will be unmodified. +.Sh ERRORS +An error return from +.Fn poll +indicates: +.Bl -tag -width Er +.It Bq Er EFAULT +.Fa Fds +points outside the process's allocated address space. +.It Bq Er EINTR +A signal was delivered before the time limit expired and +before any of the selected events occurred. +.It Bq Er EINVAL +The +.Fa nfds +argument is greater than OPEN_MAX, or the +.Fa timeout +argument is less than -1. +.El +.Sh BUGS +The +.Fn poll +system call currently does not support devices. +.Sh SEE ALSO +.Xr accept 2 , +.Xr connect 2 , +.Xr kevent 2 , +.Xr read 2 , +.Xr recv 2 , +.Xr select 2 , +.Xr send 2 , +.Xr write 2 +.Sh HISTORY +The +.Fn poll +function call appeared in +.At V . + diff --git a/bsd/man/man2/posix_madvise.2 b/bsd/man/man2/posix_madvise.2 index d0f9ea997..c83f56ab9 100644 --- a/bsd/man/man2/posix_madvise.2 +++ b/bsd/man/man2/posix_madvise.2 @@ -1 +1 @@ -.so man2/madvise.2 +.so man2/madvise.2 \ No newline at end of file diff --git a/bsd/man/man2/ptrace.2 b/bsd/man/man2/ptrace.2 index 649458fec..5df4371b2 100644 --- a/bsd/man/man2/ptrace.2 +++ b/bsd/man/man2/ptrace.2 @@ -47,7 +47,7 @@ argument specifies the process ID of the traced process. can be: .Bl -tag -width 12n .It Dv PT_TRACE_ME -This request is the only one used by the traced process; it declares +This request is one of two used by the traced process; it declares that the process expects to be traced by its parent. All the other arguments are ignored. (If the parent process does not expect to trace the child, it will probably be rather confused by the results; once the @@ -65,70 +65,14 @@ such as it will stop before executing the first instruction of the new image. Also, any setuid or setgid bits on the executable being executed will be ignored. -.It Dv PT_READ_I , Dv PT_READ_D -These requests read a single -.Li int -of data from the traced process' address space. Traditionally, -.Fn ptrace -has allowed for machines with distinct address spaces for instruction -and data, which is why there are two requests: conceptually, -.Dv PT_READ_I -reads from the instruction space and -.Dv PT_READ_D -reads from the data space. In the current OpenBSD implementation, these -two requests are completely identical. The -.Fa addr -argument specifies the address (in the traced process' virtual address -space) at which the read is to be done. This address does not have to -meet any alignment constraints. The value read is returned as the -return value from -.Eo \& -.Fn ptrace -.Ec . -.It Dv PT_WRITE_I , Dv PT_WRITE_D -These requests parallel -.Dv PT_READ_I -and -.Dv PT_READ_D , -except that they write rather than read. The -.Fa data -argument supplies the value to be written. -.\" .It Dv PT_READ_U -.\" This request reads an -.\" .Li int -.\" from the traced process' user structure. The -.\" .Fa addr -.\" argument specifies the location of the int relative to the base of the -.\" user structure; it will usually be an integer value cast to -.\" .Li caddr_t -.\" either explicitly or via the presence of a prototype for -.\" .Eo \& -.\" .Fn ptrace -.\" .Ec . -.\" Unlike -.\" .Dv PT_READ_I -.\" and -.\" .Dv PT_READ_D , -.\" .Fa addr -.\" must be aligned on an -.\" .Li int -.\" boundary. The value read is returned as the return value from -.\" .Eo \& -.\" .Fn ptrace -.\" .Ec . -.\" .It Dv PT_WRITE_U -.\" This request writes an -.\" .Li int -.\" into the traced process' user structure. -.\" .Fa addr -.\" specifies the offset, just as for -.\" .Dv PT_READ_U , -.\" and -.\" .Fa data -.\" specifies the value to be written, just as for -.\" .Dv PT_WRITE_I -.\" and -.\" .Dv PT_WRITE_D . +.It Dv PT_DENY_ATTACH +This request is the other operation used by the traced process; it allows +a process that is not currently being traced to deny future traces by its +parent. All other arguments are ignored. If the process is currently +being traced, it will exit with the exit status of ENOTSUP; otherwise, +it sets a flag that denies future traces. An attempt by the parent to +trace a process which has set this flag will result in a segmentation violation +in the parent. .It Dv PT_CONTINUE The traced process continues execution. .Fa addr @@ -139,6 +83,10 @@ to indicate that execution is to pick up where it left off. .Fa data provides a signal number to be delivered to the traced process as it resumes execution, or 0 if no signal is to be sent. +.It Dv PT_STEP +The traced process continues execution for a single step. The +parameters are identical to those passed to +.Dv PT_CONTINUE. .It Dv PT_KILL The traced process terminates, as if .Dv PT_CONTINUE @@ -164,138 +112,6 @@ succeeds, the traced process is no longer traced and continues execution normally. .El .Pp -Additionally, machine-specific requests can exist. On the SPARC, these -are: -.Bl -tag -width 12n -.It Dv PT_GETREGS -This request reads the traced process' machine registers into the -.Dq Li "struct reg" -(defined in -.Aq Pa machine/reg.h ) -pointed to by -.Fa addr . -.It Dv PT_SETREGS -This request is the converse of -.Dv PT_GETREGS ; -it loads the traced process' machine registers from the -.Dq Li "struct reg" -(defined in -.Aq Pa machine/reg.h ) -pointed to by -.Fa addr . -.It Dv PT_GETFPREGS -This request reads the traced process' floating-point registers into -the -.Dq Li "struct fpreg" -(defined in -.Aq Pa machine/reg.h ) -pointed to by -.Fa addr . -.It Dv PT_SETFPREGS -This request is the converse of -.Dv PT_GETFPREGS ; -it loads the traced process' floating-point registers from the -.Dq Li "struct fpreg" -(defined in -.Aq Pa machine/reg.h ) -pointed to by -.Fa addr . -.\" .It Dv PT_SYSCALL -.\" This request is like -.\" .Dv PT_CONTINUE -.\" except that the process will stop next time it executes any system -.\" call. Information about the system call can be examined with -.\" .Dv PT_READ_U -.\" and potentially modified with -.\" .Dv PT_WRITE_U -.\" through the -.\" .Li u_kproc.kp_proc.p_md -.\" element of the user structure (see below). If the process is continued -.\" with another -.\" .Dv PT_SYSCALL -.\" request, it will stop again on exit from the syscall, at which point -.\" the return values can be examined and potentially changed. The -.\" .Li u_kproc.kp_proc.p_md -.\" element is of type -.\" .Dq Li "struct mdproc" , -.\" which should be declared by including -.\" .Aq Pa sys/param.h , -.\" .Aq Pa sys/user.h , -.\" and -.\" .Aq Pa machine/proc.h , -.\" and contains the following fields (among others): -.\" .Bl -item -compact -offset indent -.\" .It -.\" .Li syscall_num -.\" .It -.\" .Li syscall_nargs -.\" .It -.\" .Li syscall_args[8] -.\" .It -.\" .Li syscall_err -.\" .It -.\" .Li syscall_rv[2] -.\" .El -.\" When a process stops on entry to a syscall, -.\" .Li syscall_num -.\" holds the number of the syscall, -.\" .Li syscall_nargs -.\" holds the number of arguments it expects, and -.\" .Li syscall_args -.\" holds the arguments themselves. (Only the first -.\" .Li syscall_nargs -.\" elements of -.\" .Li syscall_args -.\" are guaranteed to be useful.) When a process stops on exit from a -.\" syscall, -.\" .Li syscall_num -.\" is -.\" .Eo \& -.\" .Li -1 -.\" .Ec , -.\" .Li syscall_err -.\" holds the error number -.\" .Po -.\" see -.\" .Xr errno 2 -.\" .Pc , -.\" or 0 if no error occurred, and -.\" .Li syscall_rv -.\" holds the return values. (If the syscall returns only one value, only -.\" .Li syscall_rv[0] -.\" is useful.) The tracing process can modify any of these with -.\" .Dv PT_WRITE_U ; -.\" only some modifications are useful. -.\" .Pp -.\" On entry to a syscall, -.\" .Li syscall_num -.\" can be changed, and the syscall actually performed will correspond to -.\" the new number (it is the responsibility of the tracing process to fill -.\" in -.\" .Li syscall_args -.\" appropriately for the new call, but there is no need to modify -.\" .Eo \& -.\" .Li syscall_nargs -.\" .Ec ). -.\" If the new syscall number is 0, no syscall is actually performed; -.\" instead, -.\" .Li syscall_err -.\" and -.\" .Li syscall_rv -.\" are passed back to the traced process directly (and therefore should be -.\" filled in). If the syscall number is otherwise out of range, a dummy -.\" syscall which simply produces an -.\" .Er ENOSYS -.\" error is effectively performed. -.\" .Pp -.\" On exit from a syscall, only -.\" .Li syscall_err -.\" and -.\" .Li syscall_rv -.\" can usefully be changed; they are set to the values returned by the -.\" syscall and will be passed back to the traced process by the normal -.\" syscall return mechanism. -.El .Sh ERRORS Some requests can cause .Fn ptrace @@ -318,22 +134,11 @@ on itself. The .Fa request was not one of the legal requests. -.\" .It -.\" The -.\" .Fa addr -.\" to -.\" .Dv PT_READ_U -.\" or -.\" .Dv PT_WRITE_U -.\" was not -.\" .Li int Ns \&-aligned. .It The signal number (in .Fa data ) to .Dv PT_CONTINUE -.\" or -.\" .Dv PT_SYSCALL was neither 0 nor a legal signal number. .It .Dv PT_GETREGS , @@ -371,27 +176,3 @@ on a process in violation of the requirements listed under above. .El .El -.Sh BUGS -On the SPARC, the PC is set to the provided PC value for -.Dv PT_CONTINUE -and similar calls, but the NPC is set willy-nilly to 4 greater than the -PC value. Using -.Dv PT_GETREGS -and -.Dv PT_SETREGS -to modify the PC, passing -.Li (caddr_t)1 -to -.Eo \& -.Fn ptrace -.Ec , -should be able to sidestep this. -.Pp -Single-stepping is not available. -.\" .Pp -.\" When using -.\" .Dv PT_SYSCALL , -.\" there is no easy way to tell whether the traced process stopped because -.\" it made a syscall or because a signal was sent at a moment that it just -.\" happened to have valid-looking garbage in its -.\" .Dq Li "struct mdproc" . diff --git a/bsd/man/man2/quotactl.2 b/bsd/man/man2/quotactl.2 index ea35c50b7..68b2e3c24 100644 --- a/bsd/man/man2/quotactl.2 +++ b/bsd/man/man2/quotactl.2 @@ -158,7 +158,7 @@ A .Fn quotactl call will fail if: .Bl -tag -width Er -.It Bq Er EOPNOTSUPP +.It Bq Er ENOTSUP The kernel has not been compiled with the .Dv QUOTA option. diff --git a/bsd/man/man2/recv.2 b/bsd/man/man2/recv.2 index 75ef209bf..5ceee5989 100644 --- a/bsd/man/man2/recv.2 +++ b/bsd/man/man2/recv.2 @@ -47,7 +47,7 @@ .Ft ssize_t .Fn recv "int s" "void *buf" "size_t len" "int flags" .Ft ssize_t -.Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "int *fromlen" +.Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr *from" "socklen_t *fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" .Sh DESCRIPTION @@ -147,13 +147,13 @@ This structure has the following form, as defined in .Pp .Bd -literal struct msghdr { - caddr_t msg_name; /* optional address */ - u_int msg_namelen; /* size of address */ - struct iovec *msg_iov; /* scatter/gather array */ - u_int msg_iovlen; /* # elements in msg_iov */ - caddr_t msg_control; /* ancillary data, see below */ - u_int msg_controllen; /* ancillary data buffer len */ - int msg_flags; /* flags on received message */ + caddr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + u_int msg_iovlen; /* # elements in msg_iov */ + caddr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ }; .Ed .Pp diff --git a/bsd/man/man2/removexattr.2 b/bsd/man/man2/removexattr.2 new file mode 100644 index 000000000..acfa319d5 --- /dev/null +++ b/bsd/man/man2/removexattr.2 @@ -0,0 +1,135 @@ +.\" +.\" Copyright (c) 2004 Apple Computer, Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.Dd Oct 19, 2004 +.Dt REMOVEXATTR 2 +.Os "Mac OS X" +.Sh NAME +.Nm removexattr, +.Nm fremovexattr +.Nd remove an extended attribute value +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fn removexattr "const char *path" "const char *name" "int options" +.Ft int +.Fn fremovexattr "int fd" "const char *name" "int options" +.Sh DESCRIPTION +Extended attributes extend the basic attributes associated with files and +directories in the file system. They are stored as name:data pairs +associated with file system objects (files, directories, symlinks, etc). +.Pp +.Fn Removexattr +deletes the extended attribute +.Fa name +associated with +.Fa path . +.Pp +An extended attribute's +.Fa name +is a simple NULL-terminated UTF-8 string. +.Fa Options +is a bit mask specifying various options: +.Pp +.Bl -tag -width XATTR_NOFOLLOW +.It Dv XATTR_NOFOLLOW +do not follow symbolic links. Normally, +.Fn removexattr +acts on the target of +.Fa path +if it is a symbolic link. With this option, +.Fn removexattr +will act on the link itself. +.El +.Pp +.Fn fremovexattr +is identical to +.Fn removexattr , +except that it removes an extended attribute from an open file referenced +by file descriptor +.Fa fd . +.Sh RETURN VALUES +On success, 0 is returned. On failure, -1 is returned and the global +variable +.Va errno +is set as follows. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er ENOATTR +The specified extended attribute does not exist. +.It Bq Er ENOTSUP +The file system does not support extended attributes or has the feature +disabled. +.It Bq Er EROFS +The file system is mounted read-only. +.It Bq Er EPERM +This type of object does not support extended attributes. +.It Bq Er EINVAL +.Fa name +or +.Fa options +is invalid. +.Fa name +must be valid UTF-8 +.Fa options +must make sense. +.It Bq Er ENOTDIR +A component of the +.Fa path 's +prefix is not a directory. +.It Bq Er ENAMETOOLONG +.Fa Name +exceeded +.Dv XATTR_MAXNAMELEN +UTF-8 bytes, or a component of +.Fa path +exceeded +.Dv NAME_MAX +characters, or the entire +.Fa path +exceeded +.Dv PATH_MAX +characters. +.It Bq Er EACCES +Search permission is denied for a component +.Fa path +or permission to remove the attribute is denied. +.It Bq Er ELOOP +Too many symbolic links were encountered in +.Fa path . +.It Bq Er EFAULT +.Fa path +or +.Fa name +points to an invalid address. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Sh SEE ALSO +.Xr getxattr 2 , +.Xr setxattr 2 , +.Xr listxattr 2 +.Sh HISTORY +.Fn removexattr +and +.Fn fremovexattr +first appeared in Mac OS X 10.4. diff --git a/bsd/man/man2/sbrk.2 b/bsd/man/man2/sbrk.2 deleted file mode 100644 index a3711a537..000000000 --- a/bsd/man/man2/sbrk.2 +++ /dev/null @@ -1 +0,0 @@ -.so man2/brk.2 diff --git a/bsd/man/man2/searchfs.2 b/bsd/man/man2/searchfs.2 new file mode 100644 index 000000000..c3b602b4d --- /dev/null +++ b/bsd/man/man2/searchfs.2 @@ -0,0 +1,804 @@ +.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)searchfs.2 +. +.Dd December 15, 2003 +.Dt SEARCHFS 2 +.Os Darwin +.Sh NAME +.Nm searchfs +.Nd search a volume quickly +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Ft int +.Fn searchfs "const char * path" "struct fssearchblock * searchBlock" "unsigned long * numMatches" "unsigned long scriptCode" "unsigned long options" "struct searchstate * state" +. +.Sh DESCRIPTION +The +.Fn searchfs +function searches the volume (that is, mounted file system) specified by +.Fa path +for file system objects matching the criteria specified by +.Fa searchBlock , +.Fa scriptCode , +and +.Fa options . +The +.Fa numMatches +parameter returns the number of matching file system objects found. +The function also returns attributes of those file system objects in a buffer +specified by +.Fa searchBlock . +The +.Fa searchState +parameter allows you search the volume using multiple calls to +.Fn searchfs , +resuming the search where it left off. +The routine will only return objects to which you have access (that is, you +have execute permissions on the directories leading to this object from the root). +.Pp +. +.\" path parameter +. +The +.Fa path +parameter must reference a valid file system object on the volume to be searched. +Typically the path is to the volume's root directory. +The entire volume is always searched. +All directories listed in the path name leading to this object must be +searchable. +.Pp +. +.\" searchBlock parameter +. +The +.Fa searchBlock +parameter is a pointer to an +.Vt fssearchblock +structure, as defined by +.Aq Pa sys/attr.h +(shown below). +You are responsible for filling out all fields of this structure before calling the function. +.Bd -literal +struct fssearchblock { + struct attrlist * returnattrs; + void * returnbuffer; + size_t returnbuffersize; + unsigned long maxmatches; + struct timeval timelimit; + void * searchparams1; + size_t sizeofsearchparams1; + void * searchparams2; + size_t sizeofsearchparams2; + struct attrlist searchattrs; +}; +.Ed +.Pp +. +For information about the +.Vt attrlist +structure, see the discussion of +.Xr getattrlist 2 . +.Pp +. +.\" searchBlock elements +. +The fields of the +.Vt fssearchblock +structure are defined as follows. +.Bl -tag -width sizeofsearchparams1 +. +.It returnattrs +.Fn searchfs +can return arbitrary attributes of the file system objects that it finds. +This field must point to an +.Vt attrlist +structure that specifies the attributes that you want returned. +To request an attribute you must set the corresponding bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure. +You are responsible for filling out all fields of this structure before calling the function. +You must not request volume attributes. +. +.It returnbuffer +.Fn searchfs +places attributes of the matching file system objects into this returned attributes buffer. +The attributes for any given object are grouped together and +packed in exactly the same way as they would be returned from +.Xr getdirentriesattr 2 . +The initial contents of this buffer are ignored. +. +.It returnbuffersize +Set this field to the size, in bytes, of the buffer pointed to by +.Fa returnbuffer . +. +.It maxmatches +Specifies the maximum number of matches that you want this call to +.Fn searchfs +to return. +. +.It timelimit +Specifies the maximum time that you want this call to +.Fn searchfs +to run. +.Pp +. +If you're implementing a volume format, you should impose your own internal +limit on the duration of this call to prevent a malicious user program +from monopolising kernel resources. +.Pp +. +.It searchparams1 +Specifies the lower bound of the search criteria. +This is discussed in detail below. +You must place attribute values into the buffer in the same +way as they would be returned by +.Xr getattrlist 2 , +where the +.Fa searchattrs +field determines the exact layout of the attribute values. +. +.It sizeofsearchparams1 +Set this field to the size, in bytes, of the buffer pointed to by +.Fa searchparams1 . +. +.It searchparams2 +Specifies the upper bound of the search criteria. +This is discussed in detail below. +You must place attribute values into the buffer in the same +way as they would be returned by +.Xr getattrlist 2 , +where the +.Fa searchattrs +field determines the exact layout of the attribute values. +. +.It sizeofsearchparams2 +Set this field to the size, in bytes, of the buffer pointed to by +.Fa searchparams2 . +. +.It searchattrs +Specifies the attributes that you want you use for your search criteria. +You are responsible for filling out all fields of this structure before calling the function. +To search for an attribute you must set the corresponding bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure, and place the appropriate values into the +.Fa searchparam1 +and +.Fa searchparam2 +buffers. +The attributes specified here determine the format of those buffers. +This is discussed in detail below. +. +.El +.Pp +. +.\" numMatches parameter +. +The +.Fa numMatches +parameter points to an +.Vt unsigned long +variable. +The initial value of this variable is ignored. +On return, this variable contains the number of matching file system objects found. +The is always less than or equal to the +.Fa maxmatches +field of the +.Fa searchBlock +parameter. +The attributes for the matching objects have been placed into the returned attributes buffer. +.Pp +. +.\" scriptCode parameter +. +The +.Fa scriptCode +parameter is currently ignored. +You should always pass in the value 0x08000103, which corresponds to the +UTF-8 text encoding value defined by +.Aq Pa CarbonCore/TextCommon.h . +.Pp +. +.\" options parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn searchfs . +The following option bits are defined. +. +.Bl -tag -width SRCHFS_MATCHPARTIALNAMES +. +.It SRCHFS_START +If this bit is set, +.Fn searchfs +will ignore the +.Fa state +parameter and start a new search. +Otherwise +.Fn searchfs +assumes that +.Fa searchstate +is valid and attempts to resume a previous search based on that state. +. +.It SRCHFS_MATCHPARTIALNAMES +If this bit is set, +.Fn searchfs +will consider substrings to be successful matches when evaluating the +.Dv ATTR_CMN_NAME +attribute. +. +.It SRCHFS_MATCHDIRS +If this bit is set, +.Fn searchfs +will search for directories that match the search criteria. +To get meaningful results you must specify either this bit or +.Dv SRCHFS_MATCHFILES , +or both. +. +.It SRCHFS_MATCHFILES +If this bit is set, +.Fn searchfs +will search for files that match the search criteria. +To get meaningful results you must specify either this bit or +.Dv SRCHFS_MATCHDIRS , +or both. +. +.It SRCHFS_SKIPLINKS +If this bit is set, +.Fn searchfs +will only return one reference for a hard linked file, rather that a reference +for each hard link to the file. +.Pp +This option is not recommended for general development. +Its primary client is the +.Xr quotacheck 2 +utility. +.Pp +. +This option is privileged (the caller's effective UID must be 0) and cannot +be used if you request the +.Dv ATTR_CMN_NAME +or +.Dv ATTR_CMN_PAROBJID +attributes. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It SRCHFS_SKIPINVISIBLE +If this bit is set, +.Fn searchfs +will not match any invisible file system objects (that is, objects whose +.Dv ATTR_CMN_FNDRINFO +attribute has bit 6 set in the ninth byte) or any objects within +invisible directories. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It SRCHFS_SKIPPACKAGES +If this bit is set, +.Fn searchfs +will not match any file system objects that are inside a package. +A package is defined as a directory whose extension matches one +of the extensions that are configured into the kernel by Launch Services. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It SRCHFS_SKIPINAPPROPRIATE +If this bit is set, +.Fn searchfs +will not match any file system objects that are within an inappropriate directory. +The current list of inappropriate directories contains one item: /System. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.It SRCHFS_NEGATEPARAMS +If this bit is set, +.Fn searchfs +will return all the file system objects that do not match the search criteria. +.Pp +Introduced with Darwin 7.0 (Mac OS X version 10.3). +. +.El +.Pp +. +.\" state parameter +. +The +.Fa state +parameter is a pointer to an opaque data structure that +.Fn searchfs +uses to maintain the state of a search between successive calls. +In your first call to +.Fn searchfs , +you specify the +.Dv SRCHFS_START +flag in the +.Fa options +parameter. +This tells +.Fn searchfs +that the search state is invalid and that it should start a new search. +When this call completes, it may have only returned partial results; +in that case, it will have updated the structure pointed to by +.Fa state . +If you call +.Fn searchfs +again, this time without specifying the +.Dv SRCHFS_START +flag in the +.Fa options +parameter, it will resume the search where it left off, using the search state +that it previously stored in the state structure. +You do not need to explicitly dispose of this state. +.Pp +. +The +.Fn searchfs +function returns significant errors in the followings cases. +. +.Bl -bullet +. +.It +If it has found as many objects as you requested in the +.Fa maxmatches +field of the +.Fa searchBlock +parameter, it will return +.Dv EAGAIN . +. +.It +If there is not enough space in the returned attributes buffer for the first match, +it will return +.Dv ENOBUFS . +You should allocate a larger returned attributes buffer and try again. +.Fa numMatches +will be zero in this case. +. +.It +If the timeout expires it will return +.Dv EAGAIN . +. +.It +If you attempt to resume a search (that is, +.Dv SRCHFS_START +is not specified in the +.Fa options +parameter) and the catalog has changed since the last search, +the function will return +.Dv EBUSY . +You must start your search again from the beginning. +. +.El +.Pp +. +If +.Fn searchfs +returns +.Dv EAGAIN , +the value in +.Fa numMatches +may be greater than zero. +This is known as a partial result. +You should be sure to process these matches before calling +.Fn searchfs +again. +. +.Sh SEARCH CRITERIA +. +You specify the search criteria using a combination of the +.Fa searchattrs , +.Fa searchparams1 , +.Fa sizeofsearchparams1, +.Fa searchparams2 , +and +.Fa sizeofsearchparams2 +fields of the +.Fa searchBlock +parameter, and various flags in the +.Fa options +parameter. +The +.Fa searchattrs +field determines the attributes considered when comparing a file system object to +the search criteria. +You can specify that an attribute should be considered by setting the corresponding +bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure. +See the discussion of +.Xr getattrlist 2 +for a detailed description of this structure. +.Pp +. +The +.Fa searchparams1 , +.Fa sizeofsearchparams1 , +.Fa searchparams2 , +and +.Fa sizeofsearchparams2 +fields specify the attribute values that must be matched. +The format of each of these buffers is determined by the attributes that you're searching for. +The values are packed in exactly the same way as they would be returned from +.Xr getattrlist 2 , +including the leading +.Vt unsigned long +length value. +.Pp +. +The attribute values in the first and second search buffers form a lower and upper bound for +the search, respectively. +These have different meanings depending on the type of attribute. +. +.Bl -bullet +. +.It +For string attributes (specifically +.Dv ATTR_CMN_NAME , +the object name), the value in the first search +buffer is significant and the value in the second search buffer is ignored. +The string comparison is either an exact match or a substring match depending on +the +.Dv SRCHFS_MATCHPARTIALNAMES +flag in the +.Fa options +parameter. +. +.It +For structured attributes (specifically +.Dv ATTR_CMN_FNDRINFO , +the Finder information), the value from the +file system object is masked (logical AND) with the value in the second search buffer and then +compared, byte for byte, against the value in the first search buffer. +If it is equal, the object is a match. +. +.It +For scalar attributes (all other attributes, for example, +.Dv ATTR_CMN_MODTIME , +the modification date), the values in the first and second search +buffers are literally a lower and upper bound. +An object matches the criteria if its value is greater than or equal to the value in +the first buffer and less than or equal to the value in the second. +. +.El +. +.Sh RETURN VALUES +Upon successful completion, a value of 0 is returned. +This means that the entire volume has been searched and all matches returned. +Otherwise, a value of -1 is returned and +.Va errno +is set to indicate the error. +.Pp +. +See the discussion of the +.Dv EAGAIN , +.Dv ENOBUFS , +and +.Dv EBUSY +error codes above. +. +.Sh COMPATIBILITY +Not all volumes support +.Fn searchfs . +You can test whether a volume supports +.Fn searchfs +by using +.Xr getattrlist 2 +to get the volume capabilities attribute +.Dv ATTR_VOL_CAPABILITIES , +and then testing the +.Dv VOL_CAP_INT_SEARCHFS +flag. +.Pp +. +The +.Fn searchfs +function has been undocumented for more than two years. +In that time a number of volume format implementations have been created without +a proper specification for the behaviour of this routine. +You may encounter volume format implementations with slightly different +behaviour than what is described here. +Your program is expected to be tolerant of this variant behaviour. +.Pp +. +If you're implementing a volume format that supports +.Fn searchfs , +you should be careful to support the behaviour specified by this document. +.Pp +. +A bug in systems prior to Darwin 7.0 (Mac OS X version 10.3) makes searching for the +.Dv ATTR_CMN_BKUPTIME +attribute tricky. +The bug causes the attribute to consume two items in the search attribute buffers, the +first in the proper place and the second between +.Dv ATTR_CMN_FNDRINFO +and +.Dv ATTR_CMN_OWNERID . +. +.Sh ERRORS +.Fn searchfs +will fail if: +.Bl -tag -width Er +. +.It Bq Er ENOTSUP +The volume does not support +.Fn searchfs . +. +.It Bq Er ENOTDIR +A component of the path prefix is not a directory. +. +.It Bq Er ENAMETOOLONG +A component of a path name exceeded +.Dv NAME_MAX +characters, or an entire path name exceeded +.Dv PATH_MAX +characters. +. +.It Bq Er ENOENT +The file system object does not exist. +. +.It Bq Er EACCES +Search permission is denied for a component of the path prefix. +. +.It Bq Er ELOOP +Too many symbolic links were encountered in translating the pathname. +. +.It Bq Er EFAULT +One of the pointer parameters points to an invalid address. +. +.It Bq Er EINVAL +The +.Fa options +parameter contains an invalid flag or sizeofsearchparams1/2 is greater than +SEARCHFS_MAX_SEARCHPARMS (see attr.h). +. +.It Bq Er EAGAIN +The search terminated with partial results, either because +.Fa numMatches +has hit the limit specified by +.Fa maxmatches +or because the timeout expired. +Process the matches returned so far and then call +.Fn searchfs +again to look for more. +.Pp +. +.It Bq Er ENOBUFS +The returned attributes buffer is too small for the first match. +You should allocate a larger returned attributes buffer and try again. +.Fa numMatches +will be zero in this case. +. +.It Bq Er EBUSY +The search could not be resumed because the volume has changed. +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh CAVEATS +Not all attributes can be searched for using +.Fn searchfs . +The list currently includes: +.Pp +. +.Bl -item -compact +.It +ATTR_CMN_NAME +.It +ATTR_CMN_OBJID +.It +ATTR_CMN_PAROBJID +.It +ATTR_CMN_CRTIME +.It +ATTR_CMN_MODTIME +.It +ATTR_CMN_CHGTIME +.It +ATTR_CMN_ACCTIME +.It +ATTR_CMN_BKUPTIME +.It +ATTR_CMN_FNDRINFO +.It +ATTR_CMN_BKUPTIME +.It +ATTR_CMN_OWNERID +.It +ATTR_CMN_GRPID +.It +ATTR_CMN_ACCESSMASK +.Pp +. +.It +ATTR_DIR_ENTRYCOUNT +.Pp +. +.It +ATTR_FILE_DATALENGTH +.It +ATTR_FILE_DATAALLOCSIZE +.It +ATTR_FILE_RSRCLENGTH +.It +ATTR_FILE_RSRCALLOCSIZE +.El +. +.Sh EXAMPLES +. +The following code searches a volume for files of the specified type and creator. +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +typedef struct fssearchblock fssearchblock_t; +typedef struct searchstate searchstate_t; +.Pp +. +struct SearchAttrBuf { + unsigned long length; + char finderInfo[32]; +}; +typedef struct SearchAttrBuf SearchAttrBuf; +.Pp +. +struct ResultAttrBuf { + unsigned long length; + attrreference_t name; + fsobj_id_t parObjID; +}; +typedef struct ResultAttrBuf ResultAttrBuf; +.Pp +. +enum { + kMatchesPerCall = 16 +}; +.Pp +. +static int SearchFSDemo( + const char *volPath, + const char *type, + const char *creator +) +{ + int err; + fssearchblock_t searchBlock; + SearchAttrBuf lower; + SearchAttrBuf upper; + static const unsigned char kAllOnes[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; + unsigned long matchCount; + unsigned long matchIndex; + unsigned long options; + searchstate_t state; + ResultAttrBuf * thisEntry; + attrlist_t returnAttrList; + char resultAttrBuf[ kMatchesPerCall + * (sizeof(ResultAttrBuf) + 64)]; +.Pp +. + // resultAttrBuf is big enough for kMatchesPerCall entries, + // assuming that the average name length is less than 64. +.Pp +. + assert(strlen(type) == 4); + assert(strlen(creator) == 4); +.Pp + + memset(&searchBlock, 0, sizeof(searchBlock)); + searchBlock.searchattrs.bitmapcount = ATTR_BIT_MAP_COUNT; + searchBlock.searchattrs.commonattr = ATTR_CMN_FNDRINFO; +.Pp + + memset(&lower, 0, sizeof(lower)); + memset(&upper, 0, sizeof(upper)); + lower.length = sizeof(lower); + upper.length = sizeof(upper); + memcpy(&lower.finderInfo[0], type, 4); + memcpy(&lower.finderInfo[4], creator, 4); + memcpy(&upper.finderInfo[0], kAllOnes, 4); + memcpy(&upper.finderInfo[4], kAllOnes, 4); + searchBlock.searchparams1 = &lower; + searchBlock.sizeofsearchparams1 = sizeof(lower); + searchBlock.searchparams2 = &upper; + searchBlock.sizeofsearchparams2 = sizeof(lower); +.Pp + + searchBlock.timelimit.tv_sec = 0; + searchBlock.timelimit.tv_usec = 100 * 1000; +.Pp + + searchBlock.maxmatches = kMatchesPerCall; +.Pp + + memset(&returnAttrList, 0, sizeof(returnAttrList)); + returnAttrList.bitmapcount = ATTR_BIT_MAP_COUNT; + returnAttrList.commonattr = ATTR_CMN_NAME | ATTR_CMN_PAROBJID; +.Pp +. + searchBlock.returnattrs = &returnAttrList; + searchBlock.returnbuffer = resultAttrBuf; + searchBlock.returnbuffersize = sizeof(resultAttrBuf); +.Pp + + options = SRCHFS_START | SRCHFS_MATCHFILES; +.Pp + + do { + err = searchfs( + volPath, + &searchBlock, + &matchCount, + 0x08000103, + options, + &state + ); + if (err != 0) { + err = errno; + } + if ( (err == 0) || (err == EAGAIN) ) { + thisEntry = (ResultAttrBuf *) resultAttrBuf; +.Pp + + for (matchIndex = 0; matchIndex < matchCount; matchIndex++) { + printf("%08x ", thisEntry->parObjID.fid_objno); + printf( + "%s\en", + ((char *) &thisEntry->name) + + thisEntry->name.attr_dataoffset + ); +. + // Advance to the next entry. +. + ((char *) thisEntry) += thisEntry->length; + } + } +.Pp + + options &= ~SRCHFS_START; + } while (err == EAGAIN); +.Pp + + return err; +} +.Ed +. +.Sh SEE ALSO +. +.Xr getattrlist 2 +. +.Sh HISTORY +A +.Fn searchfs +function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). +. diff --git a/bsd/man/man2/select.2 b/bsd/man/man2/select.2 index 39fd5d84b..f4c446606 100644 --- a/bsd/man/man2/select.2 +++ b/bsd/man/man2/select.2 @@ -50,6 +50,7 @@ .Fn FD_SET fd &fdset .Fn FD_CLR fd &fdset .Fn FD_ISSET fd &fdset +.Fn FD_COPY &fdset_orig &fdset_copy .Fn FD_ZERO &fdset .Sh DESCRIPTION .Fn Select @@ -66,7 +67,9 @@ The first descriptors are checked in each set; i.e., the descriptors from 0 through .Fa nfds Ns No -1 -in the descriptor sets are examined. +in the descriptor sets are examined. (Example: If you have set two file descriptors "4" and "17", +.Fa nfds +should not be "2", but rather "17 + 1" or "18".) On return, .Fn select replaces the given descriptor sets @@ -97,6 +100,11 @@ is non-zero if is a member of .Fa fdset , zero otherwise. +.Fn FD_COPY &fdset_orig &fdset_copy +replaces an already allocated +.Fa &fdset_copy +file descriptor set with a copy of +.Fa &fdset_orig . The behavior of these macros is undefined if a descriptor value is less than zero or greater than or equal to .Dv FD_SETSIZE , diff --git a/bsd/man/man2/semctl.2 b/bsd/man/man2/semctl.2 index 2a7e8eb3b..f5d6e8c32 100644 --- a/bsd/man/man2/semctl.2 +++ b/bsd/man/man2/semctl.2 @@ -31,8 +31,6 @@ .Sh NAME .Nm semctl .Nd control operations on a semaphore set -.Sh LIBRARY -.Lb libc .Sh SYNOPSIS .In sys/types.h .In sys/ipc.h diff --git a/bsd/man/man2/semget.2 b/bsd/man/man2/semget.2 index 47ef04913..8705b29e1 100644 --- a/bsd/man/man2/semget.2 +++ b/bsd/man/man2/semget.2 @@ -31,8 +31,6 @@ .Sh NAME .Nm semget .Nd obtain a semaphore id -.Sh LIBRARY -.Lb libc .Sh SYNOPSIS .In sys/types.h .In sys/ipc.h diff --git a/bsd/man/man2/semop.2 b/bsd/man/man2/semop.2 index 94896e750..82701cd74 100644 --- a/bsd/man/man2/semop.2 +++ b/bsd/man/man2/semop.2 @@ -31,8 +31,6 @@ .Sh NAME .Nm semop .Nd atomic array of operations on a semaphore set -.Sh LIBRARY -.Lb libc .Sh SYNOPSIS .In sys/types.h .In sys/ipc.h diff --git a/bsd/man/man2/send.2 b/bsd/man/man2/send.2 index 2d9daca68..36bf6c6c8 100644 --- a/bsd/man/man2/send.2 +++ b/bsd/man/man2/send.2 @@ -47,7 +47,7 @@ .Ft ssize_t .Fn send "int s" "const void *msg" "size_t len" "int flags" .Ft ssize_t -.Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "int tolen" +.Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "socklen_t tolen" .Ft ssize_t .Fn sendmsg "int s" "const struct msghdr *msg" "int flags" .Sh DESCRIPTION diff --git a/bsd/man/man2/setattrlist.2 b/bsd/man/man2/setattrlist.2 new file mode 100644 index 000000000..d2fbb6b53 --- /dev/null +++ b/bsd/man/man2/setattrlist.2 @@ -0,0 +1,363 @@ +.\" Copyright (c) 2003 Apple Computer, Inc. All rights reserved. +.\" +.\" The contents of this file constitute Original Code as defined in and +.\" are subject to the Apple Public Source License Version 1.1 (the +.\" "License"). You may not use this file except in compliance with the +.\" License. Please obtain a copy of the License at +.\" http://www.apple.com/publicsource and read it before using this file. +.\" +.\" This Original Code and all software distributed under the License are +.\" distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the +.\" License for the specific language governing rights and limitations +.\" under the License. +.\" +.\" @(#)setattrlist.2 +. +.Dd December 15, 2003 +.Dt SETATTRLIST 2 +.Os Darwin +.Sh NAME +.Nm setattrlist +.Nd set file system attributes +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Ft int +.Fn setattrlist "const char* path" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" +. +.Sh DESCRIPTION +The +.Fn setattrlist +function sets attributes (that is, metadata) of file system objects. +It is the logical opposite of +.Xr getattrlist 2 . +The function sets attributes about the file system object specified by +.Fa path +from the values in the buffer specified by +.Fa attrBuf +and +.Fa attrBufSize . +The +.Fa attrList +parameter determines what attributes are set. +The +.Fa options +parameter lets you control specific aspects of the function's behaviour. +.Pp +. +The +.Fn setattrlist +function is only supported by certain volume format implementations. +For maximum compatibility, client programs should use high-level APIs +(such as the Carbon File Manager) to access file system attributes. +These high-level APIs include logic to emulate file system attributes +on volumes that don't support +.Fn setattrlist . +.Pp +. +.\" path parameter +. +The +.Fa path +parameter must reference a valid file system object. +All directories listed in the path name leading to the object +must be searchable. +You must own the file system object in order to set any of the +following attributes: +.Pp +. +.Bl -item -compact +.It +ATTR_CMN_GRPID +.It +ATTR_CMN_ACCESSMASK +.It +ATTR_CMN_FLAGS +.It +ATTR_CMN_CRTIME +.It +ATTR_CMN_MODTIME +.It +ATTR_CMN_CHGTIME +.It +ATTR_CMN_ACCTIME +.El +.Pp +. +You must be root (that is, your process's effective UID must be 0) in order to change the +.Dv ATTR_CMN_OWNERID +attribute. +Setting other attributes requires that you have write access to the object. +.Pp +. +.\" attrList parameter +. +The +.Fa attrList +parameter is a pointer to an +.Vt attrlist +structure. +You are responsible for filling out all fields of this structure before calling the function. +See the discussion of the +.Xr getattrlist 2 +function for a detailed description of this structure. +To set an attribute you must set the corresponding bit in the appropriate +.Vt attrgroup_t +field of the +.Vt attrlist +structure. +.Pp +. +.\" attrBuf and attrBufSize parameters +. +The +.Fa attrBuf +and +.Fa attrBufSize +parameters specify a buffer that contains the attribute values to set. +Attributes are packed in exactly the same way as they are returned from +.Xr getattrlist 2 +except that, when setting attributes, the buffer does not include the leading +.Vt unsigned long +length value. +.Pp +. +.\" option parameter +. +The +.Fa options +parameter is a bit set that controls the behaviour of +.Fn setattrlist . +The following option bits are defined. +. +.Bl -tag -width XXXbitmapcount +. +.It FSOPT_NOFOLLOW +If this bit is set, +.Fn setattrlist +will not follow a symlink if it occurs as +the last component of +.Fa path . +. +.El +. +.Sh RETURN VALUES +Upon successful completion a value of 0 is returned. +Otherwise, a value of -1 is returned and +.Va errno +is set to indicate the error. +. +.Sh COMPATIBILITY +Not all volumes support +.Fn setattrlist . +However, if a volume supports +.Xr getattrlist 2 , +it must also support +.Fn setattrlist . +See the documentation for +.Xr getattrlist 2 +for details on how to tell whether a volume supports it. +.Pp +. +The +.Fn setattrlist +function has been undocumented for more than two years. +In that time a number of volume format implementations have been created without +a proper specification for the behaviour of this routine. +You may encounter volume format implementations with slightly different +behaviour than what is described here. +Your program is expected to be tolerant of this variant behaviour. +.Pp +. +If you're implementing a volume format that supports +.Fn setattrlist , +you should be careful to support the behaviour specified by this document. +. +.Sh ERRORS +.Fn setattrlist +will fail if: +.Bl -tag -width Er +. +.It Bq Er ENOTSUP +The volume does not support +.Fn setattrlist . +. +.It Bq Er ENOTDIR +A component of the path prefix is not a directory. +. +.It Bq Er ENAMETOOLONG +A component of a path name exceeded +.Dv NAME_MAX +characters, or an entire path name exceeded +.Dv PATH_MAX +characters. +. +.It Bq Er ENOENT +The file system object does not exist. +. +.It Bq Er EROFS +The volume is read-only. +. +.It Bq Er EACCES +Search permission is denied for a component of the path prefix. +. +.It Bq Er ELOOP +Too many symbolic links were encountered in translating the pathname. +. +.It Bq Er EFAULT +.Fa path , +.Fa attrList +or +.Em attrBuf +points to an invalid address. +. +.It Bq Er EINVAL +The +.Fa bitmapcount +field of +.Fa attrList +is not +.Dv ATTR_BIT_MAP_COUNT . +. +.It Bq Er EINVAL +You try to set an invalid attribute. +. +.It Bq Er EINVAL +You try to set an attribute that is read-only. +. +.It Bq Er EINVAL +You try to set volume attributes and directory or file attributes at the same time. +. +.It Bq Er EINVAL +You try to set volume attributes but +.Fa path +does not reference the root of the volume. +. +.It Bq Er EPERM +You try to set an attribute that can only be set by the owner. +. +.It Bq Er EACCES +You try to set an attribute that's only settable if you have write permission, +and you do not have write permission. +. +.It Bq Er EINVAL +The buffer size you specified in +.Fa attrBufSize +is too small to hold all the attributes that you are trying to set. +. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.El +.Pp +. +.Sh CAVEATS +. +If you try to set any volume attributes, you must set +.Dv ATTR_VOL_INFO +in the +.Fa volattr +field, even though it consumes no data from the attribute buffer. +.Pp +. +For more caveats, see also the compatibility notes above. +. +.Sh EXAMPLES +. +The following code shows how to set the file type and creator of +a file by getting the +.Dv ATTR_CMN_FNDRINFO +attribute using +.Xr getattrlist 2 , +modifying the appropriate fields of the 32-byte Finder information structure, +and then setting the attribute back using +.Fn setattrlist . +This assumes that the target volume supports the required attributes +. +.Bd -literal +#include +#include +#include +#include +#include +#include +#include +#include +.Pp +. +typedef struct attrlist attrlist_t; +.Pp +. +struct FInfoAttrBuf + unsigned long length; + fsobj_type_t objType; + char finderInfo[32]; +}; +typedef struct FInfoAttrBuf FInfoAttrBuf; +.Pp +. +static int FInfoDemo( + const char *path, + const char *type, + const char *creator +) +{ + int err; + attrlist_t attrList; + FInfoAttrBuf attrBuf; +.Pp + + assert( strlen(type) == 4 ); + assert( strlen(creator) == 4 ); +.Pp +. + memset(&attrList, 0, sizeof(attrList)); + attrList.bitmapcount = ATTR_BIT_MAP_COUNT; + attrList.commonattr = ATTR_CMN_OBJTYPE | ATTR_CMN_FNDRINFO; +.Pp + + err = getattrlist(path, &attrList, &attrBuf, sizeof(attrBuf), 0); + if (err != 0) { + err = errno; + } +.Pp + + if ( (err == 0) && (attrBuf.objType != VREG) ) { + fprintf(stderr, "Not a standard file.\en"); + err = EINVAL; + } else { + memcpy( &attrBuf.finderInfo[0], type, 4 ); + memcpy( &attrBuf.finderInfo[4], creator, 4 ); + + attrList.commonattr = ATTR_CMN_FNDRINFO; + err = setattrlist( + path, + &attrList, + attrBuf.finderInfo, + sizeof(attrBuf.finderInfo), + 0 + ); + } +.Pp + return err; +} +.Ed +.Pp +. +.Sh SEE ALSO +. +.Xr chflags 2 , +.Xr chmod 2 , +.Xr chown 2 , +.Xr getattrlist 2 , +.Xr getdirentriesattr 2 , +.Xr searchfs 2 , +.Xr utimes 2 +. +.Sh HISTORY +A +.Fn setattrlist +function call appeared in Darwin 1.3.1 (Mac OS X version 10.0). +. diff --git a/bsd/man/man2/setxattr.2 b/bsd/man/man2/setxattr.2 new file mode 100644 index 000000000..01b444355 --- /dev/null +++ b/bsd/man/man2/setxattr.2 @@ -0,0 +1,175 @@ +.\" +.\" Copyright (c) 2004 Apple Computer, Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.Dd Oct 19, 2004 +.Dt SETXATTR 2 +.Os "Mac OS X" +.Sh NAME +.Nm setxattr, +.Nm fsetxattr +.Nd set an extended attribute value +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fn setxattr "const char *path" "const char *name" "void *value" "size_t size" "u_int32_t position" "int options" +.Ft int +.Fn fsetxattr "int fd" "const char *name" "void *value" "size_t size" "u_int32_t position" "int options" +.Sh DESCRIPTION +Extended attributes extend the basic attributes associated with files and +directories in the file system. They are stored as name:data pairs +associated with file system objects (files, directories, symlinks, etc). +.Pp +.Fn setxattr +associates +.Fa name +and +.Fa data +together as an attribute of +.Fa path . +.Pp +An extended attribute's +.Fa name +is a simple NULL-terminated UTF-8 string. +.Fa Value +is a pointer to a data buffer of +.Fa size +bytes containing textual or binary data to be associated with the +extended attribute. +.Fa Position +specifies the offset within the extended attribute. In the current +implementation, only the resource fork extended attribute makes use of +this argument. For all others, +.Fa position +is reserved and should be +set to zero. +.Pp +.Fa options +controls how the attribute is set: +.Pp +.Bl -tag -width XATTR_NOFOLLOW +.It Dv XATTR_NOFOLLOW +do not follow symbolic links. +.Fn setxattr +normally sets attributes on the target of +.Fa path +if it is a symbolic link. +With this option, +.Fn setxattr +will act on the link itself. +.It Dv XATTR_CREATE +fail if the named attribute already exists. +.It Dv XATTR_REPLACE +fail if the named attribute does not exist. Failure to specify +.Dv XATTR_REPLACE +or +.Dv XATTR_CREATE +allows creation and replacement. +.El +.Pp +.Fn fsetxattr +is identical to +.Fn setxattr , +except that it sets an extended attribute on an open file referenced by +file descriptor +.Fa fd . +.Sh RETURN VALUES +On success, 0 is returned. On failure, -1 is returned and the global +variable +.Va errno +is set as follows. +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EEXIST +.Fa options +contains +.Em XATTR_CREATE +and the named attribute already exists. +.It Bq Er ENOATTR +.Fa options +is set to +.Em XATTR_REPLACE +and the named attribute does not exist. +.It Bq Er ENOTSUP +The file system does not support extended attributes or has them disabled. +.It Bq Er EROFS +The file system is mounted read-only. +.It Bq Er ERANGE +The data size of the attribute is out of range (some attributes have size +restrictions). +.It Bq Er EPERM +.\" EFTYPE could be more specific but isn't POSIX +Attributes cannot be associated with this type of object. For example, +attributes are not allowed for resource forks. +.It Bq Er EINVAL +.Fa name +or +.Fa options +is invalid. +.Fa name +must be valid UTF-8 and +.Fa options +must make sense. +.It Bq Er ENOTDIR +A component of +.Fa path +is not a directory. +.It Bq Er ENAMETOOLONG +.Fa name +exceeded +.Dv XATTR_MAXNAMELEN +UTF-8 bytes, or a component of +.Fa path +exceeded +.Dv NAME_MAX +characters, or the entire +.Fa path +exceeded +.Dv PATH_MAX +characters. +.It Bq Er EACCES +Search permission is denied for a component of +.Fa path +or permission to set the attribute is denied. +.It Bq Er ELOOP +Too many symbolic links were encountered resolving +.Fa path . +.It Bq Er EFAULT +.Fa path +or +.Fa name +points to an invalid address. +.It Bq Er EIO +An I/O error occurred while reading from or writing to the file system. +.It Bq Er E2BIG +The data size of the extended attribute is too large. +.It Bq Er ENOSPC +Not enough space left on the file system. +.El +.Sh SEE ALSO +.Xr getxattr 2 , +.Xr removexattr 2 , +.Xr listxattr 2 +.Sh HISTORY +.Fn setxattr +and +.Fn fsetxattr +first appeared in Mac OS X 10.4. diff --git a/bsd/man/man2/shmget.2 b/bsd/man/man2/shmget.2 index 55ad6fbdd..1d13c4960 100644 --- a/bsd/man/man2/shmget.2 +++ b/bsd/man/man2/shmget.2 @@ -46,7 +46,7 @@ .Fn shmget returns the shared memory identifier associated with the key .Fa key . - +.Pp A shared memory segment is created if either .Fa key is equal to IPC_PRIVATE, or @@ -54,7 +54,7 @@ is equal to IPC_PRIVATE, or does not have a shared memory segment identifier associated with it, and the IPC_CREAT bit is set in .Fa shmflg. - +.Pp If a new shared memory segment is created, the data structure associated with it (the .Va shmid_ds structure, see diff --git a/bsd/man/man2/shutdown.2 b/bsd/man/man2/shutdown.2 index 927e3bdf3..e6799e727 100644 --- a/bsd/man/man2/shutdown.2 +++ b/bsd/man/man2/shutdown.2 @@ -52,13 +52,19 @@ the socket associated with to be shut down. If .Fa how -is 0, further receives will be disallowed. +is +.Dv SHUT_RD , +further receives will be disallowed. If .Fa how -is 1, further sends will be disallowed. +is +.Dv SHUT_WR , +further sends will be disallowed. If .Fa how -is 2, further sends and receives will be disallowed. +is +.Dv SHUT_RDWR , +further sends and receives will be disallowed. .Sh DIAGNOSTICS A 0 is returned if the call succeeds, -1 if it fails. .Sh ERRORS diff --git a/bsd/man/man2/vfork.2 b/bsd/man/man2/vfork.2 index 06e201dfc..c8b1d3bbd 100644 --- a/bsd/man/man2/vfork.2 +++ b/bsd/man/man2/vfork.2 @@ -94,7 +94,7 @@ since buffered data would then be flushed twice.) .Xr fork 2 , .Xr execve 2 , .Xr sigaction 2 , -.Xr wait 2 , +.Xr wait 2 .Sh DIAGNOSTICS Same as for .Xr fork . diff --git a/bsd/man/man4/Makefile b/bsd/man/man4/Makefile index 031eb7e86..3cc1cc953 100644 --- a/bsd/man/man4/Makefile +++ b/bsd/man/man4/Makefile @@ -10,11 +10,13 @@ DATAFILES = \ arp.4 \ bpf.4 \ divert.4 \ + dummynet.4 \ faith.4 \ fd.4 \ gif.4 \ icmp.4 \ icmp6.4 \ + ifmib.4 \ inet.4 \ inet6.4 \ ip.4 \ diff --git a/bsd/man/man4/arp.4 b/bsd/man/man4/arp.4 index acdad7029..8cfc8d3a6 100644 --- a/bsd/man/man4/arp.4 +++ b/bsd/man/man4/arp.4 @@ -122,5 +122,4 @@ same Internet address. .%A Karels, M.J. .%B "Trailer Encapsulations .%T RFC893 -.Re - +.Re \ No newline at end of file diff --git a/bsd/man/man4/bpf.4 b/bsd/man/man4/bpf.4 index b79476efc..17b9876c3 100644 --- a/bsd/man/man4/bpf.4 +++ b/bsd/man/man4/bpf.4 @@ -93,6 +93,10 @@ packet can be processed per write. Currently, only writes to Ethernets and .Tn SLIP links are supported. +.Pp +When the last minor device is opened, an additional minor device is +created on demand. The maximum number of devices that can be created is +controlled by the sysctl debug.bpf_maxdevices. .Sh IOCTLS The .Xr ioctl 2 diff --git a/bsd/man/man4/dummynet.4 b/bsd/man/man4/dummynet.4 new file mode 100644 index 000000000..fbd317bf4 --- /dev/null +++ b/bsd/man/man4/dummynet.4 @@ -0,0 +1,64 @@ +.\" +.\" $FreeBSD: /repoman/r/ncvs/src/share/man/man4/dummynet.4,v 1.4.2.12 2002/11/18 21:51:16 luigi Exp $ +.\" +.Dd October 28, 2002 +.Dt DUMMYNET 4 +.Os Darwin +.Sh NAME +.Nm dummynet +.Nd traffic shaper, bandwidth manager and delay emulator +.Sh DESCRIPTION +.Em dummynet +is a system facility that permits the control of traffic +going through the various network interfaces, by applying bandwidth +and queue size limitations, implementing different scheduling and queue +management policies, and emulating delays and losses. +.Pp +The user interface for +.Em dummynet +is implemented by the +.Nm ipfw +program, so the reader is referred to the +.Xr ipfw 8 +manpage for a complete description of the capabilities of +.Nm +and on how to use it. +.Sh KERNEL OPTIONS +The following options in the kernel configuration file are related to +.Nm +operation: +.Bd -literal + IPFIREWALL - enable ipfirewall (required for dummynet). + IPFIREWALL_VERBOSE - enable firewall output. + IPFIREWALL_VERBOSE_LIMIT - limit firewall output. + DUMMYNET - enable dummynet operation. + NMBCLUSTERS - set the amount of network packet buffers + HZ - sets the timer granularity +.Ed +.Pp +Generally, the following options are required: +.Bd -literal + options IPFIREWALL + options DUMMYNET + options HZ=1000 # strongly recommended +.Ed +.Pp +additionally, one may want to increase the number +of mbuf clusters (used to store network packets) according to the +sum of the bandwidth-delay products and queue sizes of all configured +pipes. +.Sh SEE ALSO +.Xr setsockopt 2 , +.Xr bridge 4 , +.Xr ip 4 , +.Xr ipfw 8 , +.Xr sysctl 8 +.Sh HISTORY +.Nm +was initially implemented as a testing tool for TCP congestion control +by +.An Luigi Rizzo Aq luigi@iet.unipi.it , +as described on ACM Computer Communication Review, Jan.97 issue. +Later it has been then modified to work at the ip and bridging +level, integrated with the IPFW packet filter, and extended to +support multiple queueing and scheduling policies. diff --git a/bsd/man/man4/icmp6.4 b/bsd/man/man4/icmp6.4 index fe6cc97e4..28b1325d1 100644 --- a/bsd/man/man4/icmp6.4 +++ b/bsd/man/man4/icmp6.4 @@ -1,32 +1,9 @@ -.\" Copyright (C) 1999 WIDE Project. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the project nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. +.\" $NetBSD: icmp6.4,v 1.13 2005/01/11 06:01:41 itojun Exp $ +.\" $KAME: icmp6.4,v 1.6 2004/12/27 05:30:56 itojun Exp $ +.\" $OpenBSD: icmp6.4,v 1.19 2004/12/23 20:33:03 jaredy Exp $ .\" .\" Copyright (c) 1986, 1991, 1993 -.\" The Regents of the University of California. All rights reserved. +.\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions @@ -36,11 +13,7 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. -.\" 4. Neither the name of the University nor the names of its contributors +.\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" @@ -55,52 +28,42 @@ .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.\" -.\" KAME $Id: icmp6.4,v 1.2 2002/04/17 00:18:23 lindak Exp $ -.\" $FreeBSD: src/share/man/man4/icmp6.4,v 1.1.2.7 2001/12/17 11:30:12 ru Exp $ -.\" -.Dd March 13, 2000 +.Dd December 20, 2004 .Dt ICMP6 4 .Os -.\" .Sh NAME .Nm icmp6 .Nd Internet Control Message Protocol for IPv6 -.\" .Sh SYNOPSIS -.In sys/types.h .In sys/socket.h .In netinet/in.h .In netinet/icmp6.h .Ft int -.Fn socket AF_INET6 SOCK_RAW proto -.\" +.Fn socket AF_INET6 SOCK_RAW IPPROTO_ICMPV6 .Sh DESCRIPTION -.Tn ICMPv6 -is the error and control message protocol used -by -.Tn IPv6 -and the Internet protocol family. +ICMPv6 is the error and control message protocol used by IPv6 and the +IPv6 protocol family (see +.Xr ip6 4 +and +.Xr inet6 4 ) . It may be accessed through a .Dq raw socket for network monitoring and diagnostic functions. +.Pp The .Fa proto -parameter to the socket call to create an -.Tn ICMPv6 -socket is obtained from -.Xr getprotobyname 3 , -or you can use -.Dv IPPROTO_ICMPV6 . -.Tn ICMPv6 -sockets are connectionless, and are normally used with the +parameter to the +.Xr socket 2 +call to create an ICMPv6 socket may be obtained from +.Xr getprotobyname 3 . +ICMPv6 sockets are connectionless, and are normally used with the .Xr sendto 2 and .Xr recvfrom 2 calls, though the .Xr connect 2 call may also be used to fix the destination for future packets -(in which case the +(in which case .Xr read 2 or .Xr recv 2 @@ -110,158 +73,183 @@ or .Xr send 2 system calls may be used). .Pp -Outgoing packets automatically have an -.Tn IPv6 -header prepended to them +Outgoing packets automatically have an IPv6 header prepended to them (based on the destination address). -.Tn ICMPv6 -pseudo header checksum field -.Pq Li icmp6_cksum -will be filled automatically by the kernel. -Incoming packets are received without the -.Tn IPv6 -header nor IPv6 extension headers. -Notice that this behavior is opposite from -.Tn IPv4 -raw sockets and. -.Tn ICMPv4 -sockets. +Incoming packets on the socket are received with the IPv6 header and any +extension headers removed. +.Ss Types +ICMPv6 messages are classified according to the type and code fields +present in the ICMPv6 header. +The abbreviations for the types and codes may be used in rules in +.Xr pf.conf 5 . +The following types are defined: +.Bl -column x xxxxxxxxxxxx -offset indent +.It Sy Num Ta Sy Abbrev. Ta Sy Description +.It 1 Ta unreach Ta "Destination unreachable" +.It 2 Ta toobig Ta "Packet too big" +.It 3 Ta timex Ta "Time exceeded" +.It 4 Ta paramprob Ta "Invalid IPv6 header" +.It 128 Ta echoreq Ta "Echo service request" +.It 129 Ta echorep Ta "Echo service reply" +.It 130 Ta groupqry Ta "Group membership query" +.It 130 Ta listqry Ta "Multicast listener query" +.It 131 Ta grouprep Ta "Group membership report" +.It 131 Ta listenrep Ta "Multicast listener report" +.It 132 Ta groupterm Ta "Group membership termination" +.It 132 Ta listendone Ta "Multicast listerner done" +.It 133 Ta routersol Ta "Router solicitation" +.It 134 Ta routeradv Ta "Router advertisement" +.It 135 Ta neighbrsol Ta "Neighbor solicitation" +.It 136 Ta neighbradv Ta "Neighbor advertisement" +.It 137 Ta redir Ta "Shorter route exists" +.It 138 Ta routrrenum Ta "Route renumbering" +.It 139 Ta fqdnreq Ta "FQDN query" +.It 139 Ta niqry Ta "Node information query" +.It 139 Ta wrureq Ta "Who-are-you request" +.It 140 Ta fqdnrep Ta "FQDN reply" +.It 140 Ta nirep Ta "Node information reply" +.It 140 Ta wrurep Ta "Who-are-you reply" +.It 200 Ta mtraceresp Ta "mtrace response" +.It 201 Ta mtrace Ta "mtrace messages" +.El +.Pp +The following codes are defined: +.Bl -column x xxxxxxxxxxxx xxxxxxxx -offset indent +.It Sy Num Ta Sy Abbrev. Ta Sy Type Ta +.Sy Description +.It 0 Ta noroute-unr Ta unreach Ta "No route to destination" +.It 1 Ta admin-unr Ta unreach Ta "Administratively prohibited" +.It 2 Ta beyond-unr Ta unreach Ta "Beyond scope of source address" +.It 2 Ta notnbr-unr Ta unreach Ta "Not a neighbor (obselete)" +.It 3 Ta addr-unr Ta unreach Ta "Address unreachable" +.It 4 Ta port-unr Ta unreach Ta "Port unreachable" +.It 0 Ta transit Ta timex Ta "Time exceeded in transit" +.It 1 Ta reassemb Ta timex Ta "Time exceeded in reassembly" +.It 0 Ta badhead Ta paramprob Ta "Erroneous header field" +.It 1 Ta nxthdr Ta paramprob Ta "Unrecognized next header" +.It 2 Ta "" Ta redir Ta "Unrecognized option" +.It 0 Ta redironlink Ta redir Ta "Redirection to on-link node" +.It 1 Ta redirrouter Ta redir Ta "Redirection to better router" +.El +.Ss Headers +All ICMPv6 messages are prefixed with an ICMPv6 header. +This header corresponds to the +.Vt icmp6_hdr +structure and has the following definition: +.Bd -literal -offset indent +struct icmp6_hdr { + u_int8_t icmp6_type; /* type field */ + u_int8_t icmp6_code; /* code field */ + u_int16_t icmp6_cksum; /* checksum field */ + union { + u_int32_t icmp6_un_data32[1]; /* type-specific */ + u_int16_t icmp6_un_data16[2]; /* type-specific */ + u_int8_t icmp6_un_data8[4]; /* type-specific */ + } icmp6_dataun; +} __packed; + +#define icmp6_data32 icmp6_dataun.icmp6_un_data32 +#define icmp6_data16 icmp6_dataun.icmp6_un_data16 +#define icmp6_data8 icmp6_dataun.icmp6_un_data8 +#define icmp6_pptr icmp6_data32[0] /* parameter prob */ +#define icmp6_mtu icmp6_data32[0] /* packet too big */ +#define icmp6_id icmp6_data16[0] /* echo request/reply */ +#define icmp6_seq icmp6_data16[1] /* echo request/reply */ +#define icmp6_maxdelay icmp6_data16[0] /* mcast group membership*/ +.Ed .Pp -.Ss ICMPv6 type/code filter -Each -.Tn ICMPv6 -raw socket has an associated filter whose datatype is defined as -.Li struct icmp6_filter ; +.Va icmp6_type +describes the type of the message. +Suitable values are defined in +.Aq Pa netinet/icmp6.h . +.Va icmp6_code +describes the sub-type of the message and depends on +.Va icmp6_type . +.Va icmp6_cksum +contains the checksum for the message and is filled in by the +kernel on outgoing messages. +The other fields are used for type-specific purposes. +.Ss Filters +Because of the extra functionality of ICMPv6 in comparison to ICMPv4, +a larger number of messages may be potentially received on an ICMPv6 +socket. +Input filters may therefore be used to restrict input to a subset of the +incoming ICMPv6 messages so only interesting messages are returned by the +.Xr recv 2 +family of calls to an application. .Pp -This structure, along with the macros and constants defined later in -this section, are defined as a result of including the -.Aq Li netinet/icmp6.h -header. +The +.Vt icmp6_filter +structure may be used to refine the input message set according to the +ICMPv6 type. +By default, all messages types are allowed on newly created raw ICMPv6 +sockets. +The following macros may be used to refine the input set: +.Bl -tag -width Ds +.It Fn "void ICMP6_FILTER_SETPASSALL" "struct icmp6_filter *filterp" +Allow all incoming messages. +.Va filterp +is modified to allow all message types. +.It Fn "void ICMP6_FILTER_SETBLOCKALL" "struct icmp6_filter *filterp" +Ignore all incoming messages. +.Va filterp +is modified to ignore all message types. +.It Fn "void ICMP6_FILTER_SETPASS" "int type" \ + "struct icmp6_filter *filterp" +Allow ICMPv6 messages with the given +.Fa type . +.Va filterp +is modified to allow such messages. +.It Fn "void ICMP6_FILTER_SETBLOCK" "int type" \ + "struct icmp6_filter *filterp" +Ignore ICMPv6 messages with the given +.Fa type . +.Va filterp +is modified to ignore such messages. +.It Fn "int ICMP6_FILTER_WILLPASS" "int type" \ + "const struct icmp6_filter *filterp" +Determine if the given filter will allow an ICMPv6 message of the given +type. +.It Fn "int ICMP6_FILTER_WILLBLOCK" "int type" \ + "const struct icmp6_filter *filterp" +Determine if the given filter will ignore an ICMPv6 message of the given +type. +.El .Pp -The current filter is fetched and stored using +The .Xr getsockopt 2 and .Xr setsockopt 2 -with a level of +calls may be used to obtain and install the filter on ICMPv6 sockets at +option level .Dv IPPROTO_ICMPV6 -and an option name of -.Dv ICMP6_FILTER . -.Pp -Six macros operate on an icmp6_filter structure: -.\" is "Fn" legal for macros? -.Bl -item -offset indent -.It -.Ft void -.Fn ICMP6_FILTER_SETPASSALL "struct icmp6_filter *filterp" -.It -.Ft void -.Fn ICMP6_FILTER_SETBLOCKALL "struct icmp6_filter *filterp" -.It -.Ft void -.Fn ICMP6_FILTER_SETPASS "int type" "struct icmp6_filter *filterp" -.It -.Ft void -.Fn ICMP6_FILTER_SETBLOCK "int type" "struct icmp6_filter *filterp" -.It -.Ft int -.Fn ICMP6_FILTER_WILLPASS "int type" "const struct icmp6_filter *filterp" -.It -.Ft int -.Fn ICMP6_FILTER_WILLBLOCK "int type" "const struct icmp6_filter *filterp" -.El -.Pp -The first argument to the last four macros -(an integer) -is an -.Tn ICMPv6 -message type, between 0 and 255. -The pointer argument to all six -macros is a pointer to a filter that is modified by the first four -macros examined by the last two macros. -.Pp -The first two macros, -.Dv SETPASSALL -and -.Dv SETBLOCKALL , -let us specify that -all -.Tn ICMPv6 -messages are passed to the application or that all -.Tn ICMPv6 -messages are blocked from being passed to the application. -.Pp -The next two macros, -.Dv SETPASS -and -.Dv SETBLOCK , -let us specify that -messages of a given -.Tn ICMPv6 -type should be passed to the application -or not passed to the application -(blocked). -.Pp -The final two macros, -.Dv WILLPASS -and -.Dv WILLBLOCK , -return true or false -depending whether the specified message type is passed to the -application or blocked from being passed to the application by the -filter pointed to by the second argument. -.Pp -When an -.Tn ICMPv6 -raw socket is created, it will by default pass all -.Tn ICMPv6 -message types to the application. -.Pp -For further discussions see RFC2292. -.\" -.Sh ERRORS -A socket operation may fail with one of the following errors returned: -.Bl -tag -width Er -.It Bq Er EISCONN -when trying to establish a connection on a socket which -already has one, or when trying to send a datagram with the destination -address specified and the socket is already connected; -.It Bq Er ENOTCONN -when trying to send a datagram, but -no destination address is specified, and the socket hasn't been -connected; -.It Bq Er ENOBUFS -when the system runs out of memory for -an internal data structure; -.It Bq Er EADDRNOTAVAIL -when an attempt is made to create a -socket with a network address for which no network interface exists. -.El -.\" +and name +.Dv ICMPV6_FILTER +with a pointer to the +.Vt icmp6_filter +structure as the option value. .Sh SEE ALSO +.Xr getsockopt 2 , .Xr recv 2 , .Xr send 2 , +.Xr setsockopt 2 , +.Xr socket 2 , +.Xr getprotobyname 3 , .Xr inet6 4 , -.Xr intro 4 , -.Xr ip6 4 +.Xr ip6 4 , +.Xr netintro 4 .Rs .%A W. Stevens .%A M. Thomas -.%R RFC -.%N 2292 +.%T Advanced Sockets API for IPv6 +.%N RFC 2292 .%D February 1998 -.%T "Advanced Sockets API for IPv6" .Re .Rs .%A A. Conta .%A S. Deering -.%R RFC -.%N 2463 +.%T "Internet Control Message Protocol (ICMPv6) for the Internet" \ + "Protocol Version 6 (IPv6) Specification" +.%N RFC 2463 .%D December 1998 -.%T "Internet Control Message Protocol (ICMPv6) for the Internet Protocol Version 6 (IPv6) Specification" .Re -.\" -.Sh HISTORY -The implementation is based on KAME stack -(which is descendant of WIDE hydrangea IPv6 stack kit). -.Pp -Part of the document was shamelessly copied from RFC2292. diff --git a/bsd/man/man4/ifmib.4 b/bsd/man/man4/ifmib.4 new file mode 100644 index 000000000..db51a73d2 --- /dev/null +++ b/bsd/man/man4/ifmib.4 @@ -0,0 +1,196 @@ +.\" Copyright 1996 Massachusetts Institute of Technology +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that both the above copyright notice and this +.\" permission notice appear in all copies, that both the above +.\" copyright notice and this permission notice appear in all +.\" supporting documentation, and that the name of M.I.T. not be used +.\" in advertising or publicity pertaining to distribution of the +.\" software without specific, written prior permission. M.I.T. makes +.\" no representations about the suitability of this software for any +.\" purpose. It is provided "as is" without express or implied +.\" warranty. +.\" +.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS +.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT +.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: /repoman/r/ncvs/src/share/man/man4/ifmib.4,v 1.20 2004/07/03 18:29:20 ru Exp $ +.\" +.Dd November 15, 1996 +.Dt IFMIB 4 +.Os +.Sh NAME +.Nm ifmib +.Nd Management Information Base for network interfaces +.Sh SYNOPSIS +.In sys/types.h +.In sys/socket.h +.In sys/sysctl.h +.In sys/time.h +.In net/if.h +.In net/if_mib.h +.Sh DESCRIPTION +The +.Nm +facility is an application of the +.Xr sysctl 3 +interface to provide management information about network interfaces +to client applications such as +.Xr netstat 1 , +.Xr slstat 8 , +and +.Tn SNMP +management agents. +This information is structured as a table, where +each row in the table represents a logical network interface (either a +hardware device or a software pseudo-device like +.Xr lo 4 ) . +There are two columns in the table, each containing a single +structure: one column contains generic information relevant to all +interfaces, and the other contains information specific to the +particular class of interface. +(Generally the latter will implement +the +.Tn SNMP +.Tn MIB +defined for that particular interface class, if one exists and can be +implemented in the kernel.) +.Pp +The +.Nm +facility is accessed via the +.Dq Li net.link.generic +branch of the +.Xr sysctl 3 +MIB. +The manifest constants for each level in the +.Xr sysctl 3 +.Ar name +are defined in +.In net/if_mib.h . +The index of the last row in the table is given by +.Dq Li net.link.generic.system.ifcount +(or, using the manifest constants, +.Dv CTL_NET , +.Dv PF_LINK , +.Dv NETLINK_GENERIC , +.Dv IFMIB_SYSTEM , +.Dv IFMIB_IFCOUNT ) . +A management application searching for a particular interface should +start with row 1 and continue through the table row-by-row until the +desired interface is found, or the interface count is reached. +Note that the table may be sparse, i.e., a given row may not exist, +indicated by an +.Va errno +of +.Er ENOENT . +Such an error should be ignored, and the next row should be checked. +.Pp +The generic interface information, common to all interfaces, +can be accessed via the following procedure: +.Bd -literal -offset indent +int +get_ifmib_general(int row, struct ifmibdata *ifmd) +{ + int name[6]; + size_t len; + + name[0] = CTL_NET; + name[1] = PF_LINK; + name[2] = NETLINK_GENERIC; + name[3] = IFMIB_IFDATA; + name[4] = row; + name[5] = IFDATA_GENERAL; + + len = sizeof(*ifmd); + + return sysctl(name, 6, ifmd, &len, (void *)0, 0); +} +.Ed +.Pp +The fields in +.Li struct ifmibdata +are as follows: +.Bl -tag -width "ifmd_snd_drops" +.It Li ifmd_name +.Pq Li "char []" +the name of the interface, including the unit number +.It Li ifmd_pcount +.Pq Li int +the number of promiscuous listeners +.It Li ifmd_flags +.Pq Li int +the interface's flags (defined in +.In net/if.h ) +.It Li ifmd_snd_len +.Pq Li int +the current instantaneous length of the send queue +.It Li ifmd_snd_drops +.Pq Li int +the number of packets dropped at this interface because the send queue +was full +.It Li ifmd_data +.Pq Li struct if_data +more information from a structure defined in +.In net/if.h +(see +.Xr if_data 9 ) +.El +.Pp +Class-specific information can be retrieved by examining the +.Dv IFDATA_LINKSPECIFIC +column instead. +Note that the form and length of the structure will +depend on the class of interface. +For +.Dv IFT_ETHER , +.Dv IFT_ISO88023 , +and +.Dv IFT_STARLAN +interfaces, the structure is called +.Dq Li struct ifmib_iso_8802_3 +(defined in +.In net/if_mib.h ) , +and implements a superset of the +.Tn "RFC 1650" +MIB for Ethernet-like networks. +.\" This will eventually be defined in an ethermib(4) page. +For +.Dv IFT_SLIP , +the structure is a +.Dq Li struct sl_softc +.Pq In net/if_slvar.h . +.Sh SEE ALSO +.Xr sysctl 3 , +.Xr intro 4 , +.Xr ifnet 9 +.\" .Xr ethermib 4 , +.Rs +.%T "Definitions of Managed Objects for the Ethernet-like Interface Types Using SMIv2" +.%A F. Kastenholz +.%D August 1994 +.%O RFC 1650 +.Re +.Sh BUGS +Many Ethernet-like interfaces do not yet support the Ethernet MIB; +the interfaces known to support it include +.Xr ed 4 +and +.Xr de 4 . +Regardless, all interfaces automatically support the generic MIB. +.Sh HISTORY +The +.Nm +interface first appeared in +.Fx 2.2 . diff --git a/bsd/man/man4/ip6.4 b/bsd/man/man4/ip6.4 index 3c2b1eb98..25df62c8e 100644 --- a/bsd/man/man4/ip6.4 +++ b/bsd/man/man4/ip6.4 @@ -1,31 +1,6 @@ -.\" $KAME: ip6.4,v 1.14 2001/02/26 09:31:39 itojun Exp $ -.\" -.\" Copyright (C) 1999 WIDE Project. -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the project nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. +.\" $NetBSD: ip6.4,v 1.20 2005/01/11 06:01:41 itojun Exp $ +.\" $KAME: ip6.4,v 1.23 2005/01/11 05:56:25 itojun Exp $ +.\" $OpenBSD: ip6.4,v 1.21 2005/01/06 03:50:46 itojun Exp $ .\" .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. All rights reserved. @@ -38,11 +13,7 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. -.\" 4. Neither the name of the University nor the names of its contributors +.\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" @@ -57,651 +28,659 @@ .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. -.\" -.\" $FreeBSD: src/share/man/man4/ip6.4,v 1.1.2.8 2001/12/17 11:30:12 ru Exp $ -.\" -.Dd March 13, 2000 +.Dd December 29, 2004 .Dt IP6 4 .Os -.\" .Sh NAME .Nm ip6 -.Nd Internet Protocol version 6 (IPv6) -.\" +.Nd Internet Protocol version 6 (IPv6) network layer .Sh SYNOPSIS -.In sys/types.h .In sys/socket.h .In netinet/in.h .Ft int .Fn socket AF_INET6 SOCK_RAW proto -.\" .Sh DESCRIPTION -.Tn IPv6 -is the network layer protocol used by the Internet protocol version 6 family -.Pq Dv AF_INET6 . -Options may be set at the -.Tn IPv6 -level when using higher-level protocols that are based on -.Tn IPv6 -(such as -.Tn TCP +The IPv6 network layer is used by the IPv6 protocol family for +transporting data. +IPv6 packets contain an IPv6 header that is not provided as part of the +payload contents when passed to an application. +IPv6 header options affect the behavior of this protocol and may be used +by high-level protocols (such as the +.Xr tcp 4 and -.Tn UDP ) . -It may also be accessed through a -.Dq raw socket -when developing new protocols, or special-purpose applications. -.Pp -There are several -.Tn IPv6-level -.Xr setsockopt 2 Ns / Ns Xr getsockopt 2 -options. -They are separated into the basic IPv6 sockets API -(defined in RFC2553), -and the advanced API -(defined in RFC2292). -The basic API looks very similar to the API presented in -.Xr ip 4 . -Advanced API uses ancillary data and can handle more complex cases. +.Xr udp 4 +protocols) as well as directly by +.Dq raw sockets , +which process IPv6 messages at a lower-level and may be useful for +developing new protocols and special-purpose applications. +.Ss Header +All IPv6 packets begin with an IPv6 header. +When data received by the kernel are passed to the application, this +header is not included in buffer, even when raw sockets are being used. +Likewise, when data are sent to the kernel for transmit from the +application, the buffer is not examined for an IPv6 header: +the kernel always constructs the header. +To directly access IPv6 headers from received packets and specify them +as part of the buffer passed to the kernel, link-level access +.Po +.Xr bpf 4 , +for example +.Pc +must instead be utilized. .Pp -To specify some of socket options, certain privilege -(i.e. root privilege) is required. -.\" -.Ss Basic IPv6 sockets API -.Dv IPV6_UNICAST_HOPS -may be used to set the hoplimit field in the -.Tn IPv6 -header. -As symbol name suggests, the option controls hoplimit field on unicast packets. -If -1 is specified, the kernel will use a default value. -If a value of 0 to 255 is specified, the packet will have the specified -value as hoplimit. -Other values are considered invalid, and -.Er EINVAL -will be returned. -For example: +The header has the following definition: .Bd -literal -offset indent -int hlim = 60; /* max = 255 */ -setsockopt(s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &hlim, sizeof(hlim)); +struct ip6_hdr { + union { + struct ip6_hdrctl { + u_int32_t ip6_un1_flow; /* 20 bits of flow ID */ + u_int16_t ip6_un1_plen; /* payload length */ + u_int8_t ip6_un1_nxt; /* next header */ + u_int8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + u_int8_t ip6_un2_vfc; /* version and class */ + } ip6_ctlun; + struct in6_addr ip6_src; /* source address */ + struct in6_addr ip6_dst; /* destination address */ +} __packed; + +#define ip6_vfc ip6_ctlun.ip6_un2_vfc +#define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow +#define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt +#define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim +#define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim .Ed .Pp -.Tn IPv6 -multicasting is supported only on -.Dv AF_INET6 -sockets of type -.Dv SOCK_DGRAM +All fields are in network-byte order. +Any options specified (see +.Sx Options +below) must also be specified in network-byte order. +.Pp +.Va ip6_flow +specifies the flow ID. +.Va ip6_plen +specifies the payload length. +.Va ip6_nxt +specifies the type of the next header. +.Va ip6_hlim +specifies the hop limit. +.Pp +The top 4 bits of +.Va ip6_vfc +specify the class and the bottom 4 bits specify the version. +.Pp +.Va ip6_src and -.Dv SOCK_RAW, -and only on networks where the interface driver supports multicasting. +.Va ip6_dst +specify the source and destination addresses. .Pp -The -.Dv IPV6_MULTICAST_HOPS -option changes the hoplimit for outgoing multicast datagrams -in order to control the scope of the multicasts: +The IPv6 header may be followed by any number of extension headers that start +with the following generic definition: .Bd -literal -offset indent -unsigned int hlim; /* range: 0 to 255, default = 1 */ -setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, &hlim, sizeof(hlim)); +struct ip6_ext { + u_int8_t ip6e_nxt; + u_int8_t ip6e_len; +} __packed; .Ed -.Pp -Datagrams with a hoplimit of 1 are not forwarded beyond the local network. -Multicast datagrams with a hoplimit of 0 will not be transmitted on any network, -but may be delivered locally if the sending host belongs to the destination -group and if multicast loopback has not been disabled on the sending socket -(see below). -Multicast datagrams with hoplimit greater than 1 may be forwarded -to other networks if a multicast router is attached to the local network. -.Pp -For hosts with multiple interfaces, each multicast transmission is -sent from the primary network interface. -The -.Dv IPV6_MULTICAST_IF -option overrides the default for -subsequent transmissions from a given socket: -.Bd -literal -offset indent -unsigned int outif; -outif = if_nametoindex("ne0"); -setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_IF, &outif, sizeof(outif)); -.Ed -.Pp -where "outif" is an interface index of the desired interface, -or 0 to specify the default interface. -.Pp -If a multicast datagram is sent to a group to which the sending host itself -belongs (on the outgoing interface), a copy of the datagram is, by default, -looped back by the IPv6 layer for local delivery. -The -.Dv IPV6_MULTICAST_LOOP -option gives the sender explicit control -over whether or not subsequent datagrams are looped back: -.Bd -literal -offset indent -u_char loop; /* 0 = disable, 1 = enable (default) */ -setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, &loop, sizeof(loop)); -.Ed -.Pp -This option -improves performance for applications that may have no more than one -instance on a single host (such as a router daemon), by eliminating -the overhead of receiving their own transmissions. -It should generally not be used by applications for which there -may be more than one instance on a single host (such as a conferencing -program) or for which the sender does not belong to the destination -group (such as a time querying program). -.Pp -A multicast datagram sent with an initial hoplimit greater than 1 may be delivered -to the sending host on a different interface from that on which it was sent, -if the host belongs to the destination group on that other interface. -The loopback control option has no effect on such delivery. -.Pp +.Ss Options +IPv6 allows header options on packets to manipulate the behavior of the +protocol. +These options and other control requests are accessed with the +.Xr getsockopt 2 +and +.Xr setsockopt 2 +system calls at level +.Dv IPPROTO_IPV6 +and by using ancillary data in +.Xr recvmsg 2 +and +.Xr sendmsg 2 . +They can be used to access most of the fields in the IPv6 header and +extension headers. +.Pp +The following socket options are supported: +.Bl -tag -width Ds +.\" .It Dv IPV6_OPTIONS +.It Dv IPV6_UNICAST_HOPS Fa "int *" +Get or set the default hop limit header field for outgoing unicast +datagrams sent on this socket. +A value of \-1 resets to the default value. +.\" .It Dv IPV6_RECVOPTS Fa "int *" +.\" Get or set the status of whether all header options will be +.\" delivered along with the datagram when it is received. +.\" .It Dv IPV6_RECVRETOPTS Fa "int *" +.\" Get or set the status of whether header options will be delivered +.\" for reply. +.\" .It Dv IPV6_RECVDSTADDR Fa "int *" +.\" Get or set the status of whether datagrams are received with +.\" destination addresses. +.\" .It Dv IPV6_RETOPTS +.\" Get or set IPv6 options. +.It Dv IPV6_MULTICAST_IF Fa "u_int *" +Get or set the interface from which multicast packets will be sent. +For hosts with multiple interfaces, each multicast transmission is sent +from the primary network interface. +The interface is specified as its index as provided by +.Xr if_nametoindex 3 . +A value of zero specifies the default interface. +.It Dv IPV6_MULTICAST_HOPS Fa "int *" +Get or set the default hop limit header field for outgoing multicast +datagrams sent on this socket. +This option controls the scope of multicast datagram transmissions. +.Pp +Datagrams with a hop limit of 1 are not forwarded beyond the local +network. +Multicast datagrams with a hop limit of zero will not be transmitted on +any network but may be delivered locally if the sending host belongs to +the destination group and if multicast loopback (see below) has not been +disabled on the sending socket. +Multicast datagrams with a hop limit greater than 1 may be forwarded to +the other networks if a multicast router (such as +.Xr mrouted 8 ) +is attached to the local network. +.It Dv IPV6_MULTICAST_LOOP Fa "u_int *" +Get or set the status of whether multicast datagrams will be looped back +for local delivery when a multicast datagram is sent to a group to which +the sending host belongs. +.Pp +This option improves performance for applications that may have no more +than one instance on a single host (such as a router daemon) by +eliminating the overhead of receiving their own transmissions. +It should generally not be used by applications for which there may be +more than one instance on a single host (such as a conferencing program) +or for which the sender does not belong to the destination group +(such as a time-querying program). +.Pp +A multicast datagram sent with an initial hop limit greater than 1 may +be delivered to the sending host on a different interface from that on +which it was sent if the host belongs to the destination group on that +other interface. +The multicast loopback control option has no effect on such delivery. +.It Dv IPV6_JOIN_GROUP Fa "struct ipv6_mreq *" +Join a multicast group. A host must become a member of a multicast group before it can receive datagrams sent to the group. -To join a multicast group, use the -.Dv IPV6_JOIN_GROUP -option: -.Bd -literal -offset indent -struct ipv6_mreq mreq6; -setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6, sizeof(mreq6)); +.Bd -literal +struct ipv6_mreq { + struct in6_addr ipv6mr_multiaddr; + unsigned int ipv6mr_interface; +}; .Ed .Pp -where -.Fa mreq6 -is the following structure: -.Bd -literal -offset indent -struct ipv6_mreq { - struct in6_addr ipv6mr_multiaddr; - u_int ipv6mr_interface; +.Va ipv6mr_interface +may be set to zeroes to choose the default multicast interface or to the +index of a particular multicast-capable interface if the host is +multihomed. +Membership is associated with a single interface; programs running on +multihomed hosts may need to join the same group on more than one +interface. +.Pp +If the multicast address is unspecified (i.e., all zeroes), messages +from all multicast addresses will be accepted by this group. +Note that setting to this value requires superuser privileges. +.It Dv IPV6_LEAVE_GROUP Fa "struct ipv6_mreq *" +Drop membership from the associated multicast group. +Memberships are automatically dropped when the socket is closed or when +the process exits. +.It Dv IPV6_PORTRANGE Fa "int *" +Get or set the allocation policy of ephemeral ports for when the kernel +automatically binds a local address to this socket. +The following values are available: +.Pp +.Bl -tag -width IPV6_PORTRANGE_DEFAULT -compact +.It Dv IPV6_PORTRANGE_DEFAULT +Use the regular range of non-reserved ports (varies, see +.Xr sysctl 8 ) . +.It Dv IPV6_PORTRANGE_HIGH +Use a high range (varies, see +.Xr sysctl 8 ) . +.It Dv IPV6_PORTRANGE_LOW +Use a low, reserved range (600\-1023). +.El +.It Dv IPV6_PKTINFO Fa "int *" +Get or set whether additional information about subsequent packets will +be provided as ancillary data along with the payload in subsequent +.Xr recvmsg 2 +calls. +The information is stored in the following structure in the ancillary +data returned: +.Bd -literal +struct in6_pktinfo { + struct in6_addr ipi6_addr; /* src/dst IPv6 address */ + unsigned int ipi6_ifindex; /* send/recv if index */ }; .Ed +.It Dv IPV6_HOPLIMIT Fa "int *" +Get or set whether the hop limit header field from subsequent packets +will be provided as ancillary data along with the payload in subsequent +.Xr recvmsg 2 +calls. +The value is stored as an +.Vt int +in the ancillary data returned. +.\" .It Dv IPV6_NEXTHOP Fa "int *" +.\" Get or set whether the address of the next hop for subsequent +.\" packets will be provided as ancillary data along with the payload in +.\" subsequent +.\" .Xr recvmsg 2 +.\" calls. +.\" The option is stored as a +.\" .Vt sockaddr +.\" structure in the ancillary data returned. +.\" .Pp +.\" This option requires superuser privileges. +.It Dv IPV6_HOPOPTS Fa "int *" +Get or set whether the hop-by-hop options from subsequent packets will be +provided as ancillary data along with the payload in subsequent +.Xr recvmsg 2 +calls. +The option is stored in the following structure in the ancillary data +returned: +.Bd -literal +struct ip6_hbh { + u_int8_t ip6h_nxt; /* next header */ + u_int8_t ip6h_len; /* length in units of 8 octets */ +/* followed by options */ +} __packed; +.Ed .Pp -.Dv ipv6mr_interface -should be 0 to choose the default multicast interface, or the -interface index of a particular multicast-capable interface if -the host is multihomed. -Membership is associated with a single interface; -programs running on multihomed hosts may need to -join the same group on more than one interface. +The +.Fn inet6_option_space +routine and family of routines may be used to manipulate this data. .Pp -To drop a membership, use: -.Bd -literal -offset indent -struct ipv6_mreq mreq6; -setsockopt(s, IPPROTO_IPV6, IPV6_LEAVE_GROUP, &mreq6, sizeof(mreq6)); +This option requires superuser privileges. +.It Dv IPV6_DSTOPTS Fa "int *" +Get or set whether the destination options from subsequent packets will +be provided as ancillary data along with the payload in subsequent +.Xr recvmsg 2 +calls. +The option is stored in the following structure in the ancillary data +returned: +.Bd -literal +struct ip6_dest { + u_int8_t ip6d_nxt; /* next header */ + u_int8_t ip6d_len; /* length in units of 8 octets */ +/* followed by options */ +} __packed; .Ed .Pp -where -.Fa mreq6 -contains the same values as used to add the membership. -Memberships are dropped when the socket is closed or the process exits. +The +.Fn inet6_option_space +routine and family of routines may be used to manipulate this data. .Pp -.Dv IPV6_PORTRANGE -controls how ephemeral ports are allocated for -.Dv SOCK_STREAM -and -.Dv SOCK_DGRAM -sockets. -For example, -.Bd -literal -offset indent -int range = IPV6_PORTRANGE_LOW; /* see */ -setsockopt(s, IPPROTO_IPV6, IPV6_PORTRANGE, &range, sizeof(range)); +This option requires superuser privileges. +.It Dv IPV6_RTHDR Fa "int *" +Get or set whether the routing header from subsequent packets will be +provided as ancillary data along with the payload in subsequent +.Xr recvmsg 2 +calls. +The header is stored in the following structure in the ancillary data +returned: +.Bd -literal +struct ip6_rthdr { + u_int8_t ip6r_nxt; /* next header */ + u_int8_t ip6r_len; /* length in units of 8 octets */ + u_int8_t ip6r_type; /* routing type */ + u_int8_t ip6r_segleft; /* segments left */ +/* followed by routing-type-specific data */ +} __packed; .Ed .Pp -.Dv IPV6_V6ONLY -controls behavior of -.Dv AF_INET6 -wildcard listening socket. -The following example sets the option to 1: -.Bd -literal -offset indent -int on = 1; -setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)); -.Ed +The +.Fn inet6_option_space +routine and family of routines may be used to manipulate this data. +.Pp +This option requires superuser privileges. +.It Dv IPV6_PKTOPTIONS Fa "struct cmsghdr *" +Get or set all header options and extension headers at one time on the +last packet sent or received on the socket. +All options must fit within the size of an mbuf (see +.Xr mbuf 9 ) . +Options are specified as a series of +.Vt cmsghdr +structures followed by corresponding values. +.Va cmsg_level +is set to +.Dv IPPROTO_IPV6 , +.Va cmsg_type +to one of the other values in this list, and trailing data to the option +value. +When setting options, if the length +.Va optlen +to +.Xr setsockopt 2 +is zero, all header options will be reset to their default values. +Otherwise, the length should specify the size the series of control +messages consumes. .Pp -If set to 1, -.Dv AF_INET6 -wildcard listening socket will accept IPv6 traffic only. -If set to 0, it will accept IPv4 traffic as well, -as if it was from IPv4 mapped address like -.Li ::ffff:10.1.1.1 . -.\" RFC2553 defines the behavior when the variable is set to 0. -Note that if you set it this to 0, -IPv4 access control gets much more complicated. -For example, even if you have no listening -.Dv AF_INET -listening socket on port -.Li X , -you will end up accepting IPv4 traffic by -.Dv AF_INET6 -listening socket on the same port. -The default value for this flag is copied at socket instantiation time, -from -.Li net.inet6.ip6.v6only -.Xr sysctl 3 -variable. -The option affects -.Tn TCP -and -.Tn UDP -sockets only. -.\" -.Ss Advanced IPv6 sockets API -The advanced IPv6 sockets API lets userland programs specify or obtain -details about the IPv6 header and the IPv6 extension headers on packets. -The advanced API uses ancillary data for passing data from/to the kernel. +Instead of using +.Xr sendmsg 2 +to specify option values, the ancillary data used in these calls that +correspond to the desired header options may be directly specified as +the control message in the series of control messages provided as the +argument to +.Xr setsockopt 2 . +.It Dv IPV6_CHECKSUM Fa "int *" +Get or set the byte offset into a packet where the 16-bit checksum is +located. +When set, this byte offset is where incoming packets will be expected +to have checksums of their data stored and where outgoing packets will +have checksums of their data computed and stored by the kernel. +A value of \-1 specifies that no checksums will be checked on incoming +packets and that no checksums will be computed or stored on outgoing +packets. +The offset of the checksum for ICMPv6 sockets cannot be relocated or +turned off. +.It Dv IPV6_V6ONLY Fa "int *" +Get or set whether only IPv6 connections can be made to this socket. +For wildcard sockets, this can restrict connections to IPv6 only. +.\"With +.\".Ox +.\"IPv6 sockets are always IPv6-only, so the socket option is read-only +.\"(not modifiable). +.It Dv IPV6_FAITH Fa "int *" +Get or set the status of whether +.Xr faith 4 +connections can be made to this socket. +.It Dv IPV6_USE_MIN_MTU Fa "int *" +Get or set whether the minimal IPv6 maximum transmission unit (MTU) size +will be used to avoid fragmentation from occurring for subsequent +outgoing datagrams. +.It Dv IPV6_AUTH_LEVEL Fa "int *" +Get or set the +.Xr ipsec 4 +authentication level. +.It Dv IPV6_ESP_TRANS_LEVEL Fa "int *" +Get or set the ESP transport level. +.It Dv IPV6_ESP_NETWORK_LEVEL Fa "int *" +Get or set the ESP encapsulation level. +.It Dv IPV6_IPCOMP_LEVEL Fa "int *" +Get or set the +.Xr ipcomp 4 +level. +.El .Pp -There are -.Xr setsockopt 2 Ns / Ns Xr getsockopt 2 -options to get optional information on incoming packets. -They are +The .Dv IPV6_PKTINFO , +.\" .Dv IPV6_NEXTHOP , .Dv IPV6_HOPLIMIT , .Dv IPV6_HOPOPTS , .Dv IPV6_DSTOPTS , and -.Dv IPV6_RTHDR . -.Bd -literal -offset indent -int on = 1; - -setsockopt(fd, IPPROTO_IPV6, IPV6_PKTINFO, &on, sizeof(on)); -setsockopt(fd, IPPROTO_IPV6, IPV6_HOPLIMIT, &on, sizeof(on)); -setsockopt(fd, IPPROTO_IPV6, IPV6_HOPOPTS, &on, sizeof(on)); -setsockopt(fd, IPPROTO_IPV6, IPV6_DSTOPTS, &on, sizeof(on)); -setsockopt(fd, IPPROTO_IPV6, IPV6_RTHDR, &on, sizeof(on)); -.Ed -.Pp -When any of these options are enabled, the corresponding data is -returned as control information by -.Xr recvmsg 2 , -as one or more ancillary data objects. -.Pp -If -.Dv IPV6_PKTINFO -is enabled, the destination IPv6 address and the arriving interface index -will be available via -.Li struct in6_pktinfo -on ancillary data stream. -You can pick the structure by checking for an ancillary data item with -.Li cmsg_level -equals to -.Dv IPPROTO_IPV6 , -and -.Li cmsg_type -equals to -.Dv IPV6_PKTINFO . -.Pp -If -.Dv IPV6_HOPLIMIT -is enabled, hoplimit value on the packet will be made available to the -userland program. -Ancillary data stream will contain an integer data item with -.Li cmsg_level -equals to -.Dv IPPROTO_IPV6 , -and -.Li cmsg_type -equals to -.Dv IPV6_HOPLIMIT . -.Pp -.Xr inet6_option_space 3 -and friends will help you parse ancillary data items for -.Dv IPV6_HOPOPTS -and -.Dv IPV6_DSTOPTS . -Similarly, -.Xr inet6_rthdr_space 3 -and friends will help you parse ancillary data items for -.Dv IPV6_RTHDR . -.Pp -.Dv IPV6_HOPOPTS +.Dv IPV6_RTHDR +options will return ancillary data along with payload contents in subsequent +.Xr recvmsg 2 +calls with +.Va cmsg_level +set to +.Dv IPPROTO_IPV6 and -.Dv IPV6_DSTOPTS -may appear multiple times on an ancillary data stream -(note that the behavior is slightly different than the specification). -Other ancillary data item will appear no more than once. -.Pp -For outgoing direction, -you can pass ancillary data items with normal payload data, using -.Xr sendmsg 2 . -Ancillary data items will be parsed by the kernel, and used to construct -the IPv6 header and extension headers. -For the 5 -.Li cmsg_level -values listed above, ancillary data format is the same as inbound case. -Additionally, you can specify -.Dv IPV6_NEXTHOP -data object. -The -.Dv IPV6_NEXTHOP -ancillary data object specifies the next hop for the -datagram as a socket address structure. -In the -.Li cmsghdr -structure -containing this ancillary data, the -.Li cmsg_level -member will be -.Dv IPPROTO_IPV6 , -the -.Li cmsg_type -member will be -.Dv IPV6_NEXTHOP , -and the first byte of -.Li cmsg_data[] -will be the first byte of the socket address structure. -.Pp -If the socket address structure contains an IPv6 address (e.g., the -sin6_family member is -.Dv AF_INET6 ) , -then the node identified by that -address must be a neighbor of the sending host. -If that address -equals the destination IPv6 address of the datagram, then this is -equivalent to the existing -.Dv SO_DONTROUTE -socket option. -.Pp -For applications that do not, or unable to use -.Xr sendmsg 2 -or -.Xr recvmsg 2 , -.Dv IPV6_PKTOPTIONS -socket option is defined. -Setting the socket option specifies any of the optional output fields: -.Bd -literal -offset indent -setsockopt(fd, IPPROTO_IPV6, IPV6_PKTOPTIONS, &buf, len); -.Ed -.Pp -The fourth argument points to a buffer containing one or more -ancillary data objects, and the fifth argument is the total length of -all these objects. -The application fills in this buffer exactly as -if the buffer were being passed to +.Va cmsg_type +set to respective option name value (e.g., +.Dv IPV6_HOPTLIMIT ) . +These options may also be used directly as ancillary +.Va cmsg_type +values in .Xr sendmsg 2 -as control information. -.Pp -The options set by calling -.Xr setsockopt 2 -for -.Dv IPV6_PKTOPTIONS -are -called "sticky" options because once set they apply to all packets -sent on that socket. -The application can call -.Xr setsockopt 2 -again to -change all the sticky options, or it can call -.Xr setsockopt 2 -with a -length of 0 to remove all the sticky options for the socket. -.Pp -The corresponding receive option -.Bd -literal -offset indent -getsockopt(fd, IPPROTO_IPV6, IPV6_PKTOPTIONS, &buf, &len); -.Ed -.Pp -returns a buffer with one or more ancillary data objects for all the -optional receive information that the application has previously -specified that it wants to receive. -The fourth argument points to -the buffer that is filled in by the call. -The fifth argument is a -pointer to a value-result integer: when the function is called the -integer specifies the size of the buffer pointed to by the fourth -argument, and on return this integer contains the actual number of -bytes that were returned. -The application processes this buffer -exactly as if the buffer were returned by -.Xr recvmsg 2 -as control information. -.\" -.Ss Advanced API and TCP sockets -When using -.Xr getsockopt 2 -with the -.Dv IPV6_PKTOPTIONS -option and a -.Tn TCP -socket, only the options from the most recently received segment are -retained and returned to the caller, and only after the socket option -has been set. -.\" That is, -.\" .Tn TCP -.\" need not start saving a copy of the options until the application says -.\" to do so. -The application is not allowed to specify ancillary data in a call to +to set options on the packet being transmitted by the call. +The +.Va cmsg_level +value must be +.Dv IPPROTO_IPV6 . +For these options, the ancillary data object value format is the same +as the value returned as explained for each when received with +.Xr recvmsg 2 . +.Pp +Note that using .Xr sendmsg 2 -on a -.Tn TCP -socket, and none of the ancillary data that we -described above is ever returned as control information by -.Xr recvmsg 2 -on a -.Tn TCP -socket. -.\" -.Ss Conflict resolution -In some cases, there are multiple APIs defined for manipulating -a IPv6 header field. -A good example is the outgoing interface for multicast datagrams: -it can be manipulated by +to specify options on particular packets works only on UDP and raw sockets. +To manipulate header options for packets on TCP sockets, only the socket +options may be used. +.Pp +In some cases, there are multiple APIs defined for manipulating an IPv6 +header field. +A good example is the outgoing interface for multicast datagrams, which +can be set by the .Dv IPV6_MULTICAST_IF -in basic API, +socket option, through the .Dv IPV6_PKTINFO -in advanced API, and -.Li sin6_scope_id -field of the socket address passed to -.Xr sendto 2 . -.Pp -When conflicting options are given to the kernel, -the kernel will get the value in the following preference: -(1) options specified by using ancillary data, -(2) options specified by a sticky option of the advanced API, -(3) options specified by using the basic API, and lastly -(4) options specified by a socket address. -Note that the conflict resolution is undefined in the API specifcation -and implementation dependent. -.\" -.Ss "Raw IPv6 Sockets" -Raw -.Tn IPv6 -sockets are connectionless, and are normally used with the +option, and through the +.Va sin6_scope_id +field of the socket address passed to the +.Xr sendto 2 +system call. +.Pp +Resolving these conflicts is implementation dependent. +This implementation determines the value in the following way: +options specified by using ancillary data (i.e., +.Xr sendmsg 2 ) +are considered first, +options specified by using +.Dv IPV6_PKTOPTIONS +to set +.Dq sticky +options are considered second, +options specified by using the individual, basic, and direct socket +options (e.g., +.Dv IPV6_UNICAST_HOPS ) +are considered third, +and options specified in the socket address supplied to +.Xr sendto 2 +are the last choice. +.Ss Multicasting +IPv6 multicasting is supported only on +.Dv AF_INET6 +sockets of type +.Dv SOCK_DGRAM +and +.Dv SOCK_RAW , +and only on networks where the interface driver supports +multicasting. +Socket options (see above) that manipulate membership of +multicast groups and other multicast options include +.Dv IPV6_MULTICAST_IF , +.Dv IPV6_MULTICAST_HOPS , +.Dv IPV6_MULTICAST_LOOP , +.Dv IPV6_LEAVE_GROUP , +and +.Dv IPV6_JOIN_GROUP . +.Ss Raw Sockets +Raw IPv6 sockets are connectionless and are normally used with the .Xr sendto 2 and .Xr recvfrom 2 -calls, though the +calls, although the .Xr connect 2 -call may also be used to fix the destination for future -packets (in which case the -.Xr read 2 -or -.Xr recv 2 -and -.Xr write 2 -or +call may be used to fix the destination address for future outgoing +packets so that .Xr send 2 -system calls may be used). -.Pp -If -.Fa proto -is 0, the default protocol -.Dv IPPROTO_RAW -is used for outgoing packets, and only incoming packets destined -for that protocol are received. -If -.Fa proto -is non-zero, that protocol number will be used on outgoing packets -and to filter incoming packets. -.Pp -Outgoing packets automatically have an -.Tn IPv6 -header prepended to them (based on the destination address and the -protocol number the socket is created with). -Incoming packets are received without -.Tn IPv6 -header nor extension headers. -.Pp -All data sent via raw sockets MUST be in network byte order and all -data received via raw sockets will be in network byte order. -This differs from the IPv4 raw sockets, which did not specify a byte -ordering and typically used the host's byte order. -.Pp -Another difference from IPv4 raw sockets is that complete packets -(that is, IPv6 packets with extension headers) cannot be read or -written using the IPv6 raw sockets API. -Instead, ancillary data -objects are used to transfer the extension headers, as described above. -Should an application need access to the -complete IPv6 packet, some other technique, such as the datalink -interfaces, such as -.Xr bpf 4 , -must be used. -.Pp -All fields in the IPv6 header that an application might want to -change (i.e., everything other than the version number) can be -modified using ancillary data and/or socket options by the -application for output. -All fields in a received IPv6 header (other -than the version number and Next Header fields) and all extension -headers are also made available to the application as ancillary data -on input. -Hence there is no need for a socket option similar to the -IPv4 -.Dv IP_HDRINCL -socket option. -.Pp -When writing to a raw socket the kernel will automatically fragment -the packet if its size exceeds the path MTU, inserting the required -fragmentation headers. On input the kernel reassembles received -fragments, so the reader of a raw socket never sees any fragment -headers. +may instead be used and the +.Xr bind 2 +call may be used to fix the source address for future outgoing +packets instead of having the kernel choose a source address. .Pp -Most IPv4 implementations give special treatment to a raw socket -created with a third argument to +By using +.Xr connect 2 +or +.Xr bind 2 , +raw socket input is constrained to only packets with their +source address matching the socket destination address if +.Xr connect 2 +was used and to packets with their destination address +matching the socket source address if +.Xr bind 2 +was used. +.Pp +If the +.Ar proto +argument to .Xr socket 2 -of -.Dv IPPROTO_RAW , -whose value is normally 255. -We note that this value has no special meaning to -an IPv6 raw socket (and the IANA currently reserves the value of 255 -when used as a next-header field). -.\" Note: This feature was added to -.\" IPv4 in 1988 by Van Jacobson to support traceroute, allowing a -.\" complete IP header to be passed by the application, before the -.\" .Dv IP_HDRINCL -.\" socket option was added. -.Pp -For ICMPv6 raw sockets, -the kernel will calculate and insert the ICMPv6 checksum for -since this checksum is mandatory. +is zero, the default protocol +.Pq Dv IPPROTO_RAW +is used for outgoing packets. +For incoming packets, protocols recognized by kernel are +.Sy not +passed to the application socket (e.g., +.Xr tcp 4 +and +.Xr udp 4 ) +except for some ICMPv6 messages. +The ICMPv6 messages not passed to raw sockets include echo, timestamp, +and address mask requests. +If +.Ar proto +is non-zero, only packets with this protocol will be passed to the +socket. .Pp -For other raw IPv6 sockets (that is, for raw IPv6 sockets created -with a third argument other than IPPROTO_ICMPV6), the application -must set the new IPV6_CHECKSUM socket option to have the kernel (1) -compute and store a psuedo header checksum for output, -and (2) verify the received -pseudo header checksum on input, -discarding the packet if the checksum is in error. -This option prevents applications from having to perform source -address selection on the packets they send. -The checksum will -incorporate the IPv6 pseudo-header, defined in Section 8.1 of RFC2460. -This new socket option also specifies an integer offset into -the user data of where the checksum is located. -.Bd -literal -offset indent -int offset = 2; -setsockopt(fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)); +IPv6 fragments are also not passed to application sockets until +they have been reassembled. +If reception of all packets is desired, link-level access (such as +.Xr bpf 4 ) +must be used instead. +.Pp +Outgoing packets automatically have an IPv6 header prepended to them +(based on the destination address and the protocol number the socket +was created with). +Incoming packets are received by an application without the IPv6 header +or any extension headers. +.Pp +Outgoing packets will be fragmented automatically by the kernel if they +are too large. +Incoming packets will be reassembled before being sent to the raw socket, +so packet fragments or fragment headers will never be seen on a raw socket. +.Sh EXAMPLES +The following determines the hop limit on the next packet received: +.Bd -literal +struct iovec iov[2]; +u_char buf[BUFSIZ]; +struct cmsghdr *cm; +struct msghdr m; +int found, optval; +u_char data[2048]; + +/* Create socket. */ + +(void)memset(&m, 0, sizeof(m)); +(void)memset(&iov, 0, sizeof(iov)); + +iov[0].iov_base = data; /* buffer for packet payload */ +iov[0].iov_len = sizeof(data); /* expected packet length */ + +m.msg_name = &from; /* sockaddr_in6 of peer */ +m.msg_namelen = sizeof(from); +m.msg_iov = iov; +m.msg_iovlen = 1; +m.msg_control = (caddr_t)buf; /* buffer for control messages */ +m.msg_controllen = sizeof(buf); + +/* + * Enable the hop limit value from received packets to be + * returned along with the payload. + */ +optval = 1; +if (setsockopt(s, IPPROTO_IPV6, IPV6_HOPLIMIT, &optval, + sizeof(optval)) == -1) + err(1, "setsockopt"); + +found = 0; +while (!found) { + if (recvmsg(s, &m, 0) == -1) + err(1, "recvmsg"); + for (cm = CMSG_FIRSTHDR(&m); cm != NULL; + cm = CMSG_NXTHDR(&m, cm)) { + if (cm->cmsg_level == IPPROTO_IPV6 && + cm->cmsg_type == IPV6_HOPLIMIT && + cm->cmsg_len == CMSG_LEN(sizeof(int))) { + found = 1; + (void)printf("hop limit: %d\en", + *(int *)CMSG_DATA(cm)); + break; + } + } +} .Ed -.Pp -By default, this socket option is disabled. Setting the offset to -1 -also disables the option. By disabled we mean (1) the kernel will -not calculate and store a checksum for outgoing packets, and (2) the -kernel will not verify a checksum for received packets. -.Pp -Note: Since the checksum is always calculated by the kernel for an -ICMPv6 socket, applications are not able to generate ICMPv6 packets -with incorrect checksums (presumably for testing purposes) using this -API. -.\" -.Sh ERRORS +.Sh DIAGNOSTICS A socket operation may fail with one of the following errors returned: -.Bl -tag -width Er +.Bl -tag -width EADDRNOTAVAILxx .It Bq Er EISCONN -when trying to establish a connection on a socket which already -has one, or when trying to send a datagram with the destination -address specified and the socket is already connected; +when trying to establish a connection on a socket which +already has one or when trying to send a datagram with the destination +address specified and the socket is already connected. .It Bq Er ENOTCONN -when trying to send a datagram, but no destination address is -specified, and the socket hasn't been connected; +when trying to send a datagram, but +no destination address is specified, and the socket hasn't been +connected. .It Bq Er ENOBUFS -when the system runs out of memory for an internal data structure; +when the system runs out of memory for +an internal data structure. .It Bq Er EADDRNOTAVAIL -when an attempt is made to create a socket with a network address -for which no network interface exists. +when an attempt is made to create a +socket with a network address for which no network interface +exists. .It Bq Er EACCES -when an attempt is made to create a raw IPv6 socket by a non-privileged process. +when an attempt is made to create +a raw IPv6 socket by a non-privileged process. .El .Pp -The following errors specific to -.Tn IPv6 -may occur: +The following errors specific to IPv6 may occur when setting or getting +header options: .Bl -tag -width EADDRNOTAVAILxx .It Bq Er EINVAL An unknown socket option name was given. .It Bq Er EINVAL -The ancillary data items were improperly formed, or option name was unknown. +An ancillary data object was improperly formed. .El -.\" .Sh SEE ALSO .Xr getsockopt 2 , .Xr recv 2 , .Xr send 2 , .Xr setsockopt 2 , -.Xr inet6_option_space 3 , -.Xr inet6_rthdr_space 3 , +.Xr socket 2 , +.\" .Xr inet6_option_space 3 , +.\" .Xr inet6_rthdr_space 3 , +.Xr if_nametoindex 3 , +.Xr bpf 4 , .Xr icmp6 4 , .Xr inet6 4 , -.Xr intro 4 +.Xr netintro 4 , +.Xr tcp 4 , +.Xr udp 4 .Rs .%A W. Stevens .%A M. Thomas -.%R RFC -.%N 2292 +.%T Advanced Sockets API for IPv6 +.%R RFC 2292 .%D February 1998 -.%T "Advanced Sockets API for IPv6" .Re .Rs .%A S. Deering .%A R. Hinden -.%R RFC -.%N 2460 +.%T Internet Protocol, Version 6 (IPv6) Specification +.%R RFC 2460 .%D December 1998 -.%T "Internet Protocol, Version 6 (IPv6) Specification" .Re .Rs .%A R. Gilligan .%A S. Thomson .%A J. Bound .%A W. Stevens -.%R RFC -.%N 2553 +.%T Basic Socket Interface Extensions for IPv6 +.%R RFC 2553 .%D March 1999 -.%T "Basic Socket Interface Extensions for IPv6" .Re -.\" +.Rs +.%A W. Stevens +.%A B. Fenner +.%A A. Rudoff +.%T UNIX Network Programming, third edition +.Re .Sh STANDARDS -Most of the socket options are defined in -RFC2292 and/or RFC2553. -.Pp +Most of the socket options are defined in RFC 2292 or RFC 2553. +The .Dv IPV6_V6ONLY -socket option is defined in draft-ietf-ipngwg-rfc2553bis-03. -.Dv IPV6_PORTRANGE -socket option -and -conflict resolution rule -are not defined in the RFCs and should be considered implementation dependent. -.\" -.Sh HISTORY -The implementation is based on KAME stack -(which is descendant of WIDE hydrangea IPv6 stack kit). -.Pp -Part of the document was shamelessly copied from RFC2553 and RFC2292. -.\" -.Sh BUGS +socket option is defined in RFC 3542. The -.Dv IPV6_NEXTHOP -object/option is not fully implemented as of writing this. +.Dv IPV6_PORTRANGE +socket option and the conflict resolution rule are not defined in the +RFCs and should be considered implementation dependent. diff --git a/bsd/man/man4/termios.4 b/bsd/man/man4/termios.4 index c3c5ee741..e7fcc5ba0 100644 --- a/bsd/man/man4/termios.4 +++ b/bsd/man/man4/termios.4 @@ -173,7 +173,7 @@ process is orphaned, the .Xr read 2 returns -1 with .Va errno set to -.Er Dv EIO +.Er EIO and no signal is sent. The default action of the .Dv SIGTTIN @@ -204,7 +204,7 @@ the .Xr write returns -1 with errno set to -.Er Dv EIO +.Er EIO and no signal is sent. .Pp Certain calls that set terminal parameters are treated in the same diff --git a/bsd/man/man4/unix.4 b/bsd/man/man4/unix.4 index db8816b6e..6597873ff 100644 --- a/bsd/man/man4/unix.4 +++ b/bsd/man/man4/unix.4 @@ -148,6 +148,35 @@ passed to a receiver. Descriptors that are awaiting delivery, or that are purposely not received, are automatically closed by the system when the destination socket is closed. +.Pp +The effective credentials (i.e., the user ID and group list) the of a +peer on a +.Dv SOCK_STREAM +socket may be obtained using the +.Dv LOCAL_PEERCRED +socket option. +This may be used by a server to obtain and verify the credentials of +its client, and vice versa by the client to verify the credentials +of the server. +These will arrive in the form of a filled in +.Ar struct xucred +(defined in +.Pa sys/ucred.h ) . +The credentials presented to the server (the +.Xr listen 2 +caller) are those of the client when it called +.Xr connect 2 ; +the credentials presented to the client (the +.Xr connect 2 +caller) are those of the server when it called +.Xr listen 2 . +This mechanism is reliable; there is no way for either party to influence +the credentials presented to its peer except by calling the appropriate +system call (e.g., +.Xr connect 2 +or +.Xr listen 2 ) +under different effective credentials. .Sh SEE ALSO .Xr socket 2 , .Xr intro 4 diff --git a/bsd/man/man5/types.5 b/bsd/man/man5/types.5 index 533fa7328..48b2372ef 100644 --- a/bsd/man/man5/types.5 +++ b/bsd/man/man5/types.5 @@ -66,8 +66,7 @@ typedef unsigned int u_int; typedef unsigned long u_long; typedef unsigned short ushort; /* Sys V compatibility */ -#include -#if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE) +#if !defined(_ANSI_SOURCE) && !defined(_POSIX_C_SOURCE) #include #endif @@ -132,6 +131,7 @@ typedef struct fd_set { #define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS))) #define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS))) #define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS))) +#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) #define FD_ZERO(p) bzero((char *)(p), sizeof(*(p))) #endif /* !_POSIX_SOURCE */ diff --git a/bsd/man/man9/fetch.9 b/bsd/man/man9/fetch.9 index 5669d0078..24d3182e0 100644 --- a/bsd/man/man9/fetch.9 +++ b/bsd/man/man9/fetch.9 @@ -34,15 +34,17 @@ .\" .\" $FreeBSD: src/share/man/man9/fetch.9,v 1.6.2.4 2001/12/17 11:30:18 ru Exp $ .\" -.Dd January 7, 1996 +.Dd December 16, 2004 .Dt FETCH 9 .Os .Sh NAME .Nm fetch , .Nm fubyte , -.Nm fusword , -.Nm fuswintr , -.Nm fuword +.Nm fuibyte , +.Nm fuword , +.Nm fuiword , +.Nm fulong , +.Nm fuulong .Nd fetch data from user-space .Sh SYNOPSIS .In sys/types.h @@ -50,13 +52,17 @@ .In sys/systm.h .In sys/resourcevar.h .Ft int -.Fn fubyte "const void *base" +.Fn fubyte "const user_addr_t addr" .Ft int -.Fn fusword "void *base" +.Fn fuibyte "const user_addr_t addr" .Ft int -.Fn fuswintr "void *base" -.Ft long -.Fn fuword "const void *base" +.Fn fuword "user_addr_t addr" +.Ft int +.Fn fuiword "user_addr_t addr" +.Ft int64_t +.Fn fulong "user_addr_t addr" +.Ft uint64_t +.Fn fuulong "user_addr_t addr" .Sh DESCRIPTION The .Nm @@ -65,20 +71,27 @@ functions are designed to copy small amounts of data from user-space. The .Nm routines provide the following functionality: -.Bl -tag -width "fuswintr()" +.Bl -tag -width "fuiword()" .It Fn fubyte Fetches a byte of data from the user-space address -.Pa base . -.It Fn fusword -Fetches a short word of data from the user-space address -.Pa base . -.It Fn fuswintr -Fetches a short word of data from the user-space address -.Pa base . +.Pa addr . +.It Fn fuibyte +Fetches a byte of data from the user-space address +.Pa addr . This function is safe to call during an interrupt context. .It Fn fuword Fetches a word of data from the user-space address -.Pa base . +.Pa addr . +.It Fn fuiword +Fetches a word of data from the user-space address +.Pa addr . +This function is safe to call during an interrupt context. +.It Fn fulong +Fetches a long word of data from the user-space address +.Pa addr . +.It Fn fuulong +Fetches a unsigned long word of data from the user-space address +.Pa addr . .El .Sh RETURN VALUES The diff --git a/bsd/man/man9/store.9 b/bsd/man/man9/store.9 index 5ef2d664e..1092c2fce 100644 --- a/bsd/man/man9/store.9 +++ b/bsd/man/man9/store.9 @@ -34,15 +34,17 @@ .\" .\" $FreeBSD: src/share/man/man9/store.9,v 1.7.2.4 2001/12/17 11:30:19 ru Exp $ .\" -.Dd January 7, 1996 +.Dd December 16, 2004 .Dt STORE 9 .Os .Sh NAME .Nm store , .Nm subyte , -.Nm susword , -.Nm suswintr , -.Nm suword +.Nm suibyte , +.Nm suword , +.Nm suiword , +.Nm sulong , +.Nm suulong .Nd store data to user-space .Sh SYNOPSIS .In sys/types.h @@ -50,13 +52,17 @@ .In sys/systm.h .In sys/resourcevar.h .Ft int -.Fn subyte "void *base" "int byte" +.Fn subyte "user_addr_t addr" "int byte" .Ft int -.Fn susword "void *base" "int word" +.Fn suibyte "user_addr_t addr" "int byte" .Ft int -.Fn suswintr "void *base" "int word" +.Fn suword "user_addr_t addr" "int word" .Ft int -.Fn suword "void *base" "long word" +.Fn suiword "user_addr_t addr" "int word" +.Ft int +.Fn sulong "user_addr_t addr" "int64_t longword" +.Ft int +.Fn suulong "user_addr_t addr" "uint64_t longword" .Sh DESCRIPTION The .Nm @@ -65,20 +71,27 @@ functions are designed to copy small amounts of data to user-space. The .Nm routines provide the following functionality: -.Bl -tag -width "suswintr()" +.Bl -tag -width "suibyte()" .It Fn subyte Stores a byte of data to the user-space address -.Pa base . -.It Fn susword -Stores a short word of data to the user-space address -.Pa base . -.It Fn suswintr -Stores a short word of data to the user-space address -.Pa base . +.Pa addr . +.It Fn suibyte +Stores a byte of data to the user-space address +.Pa addr . This function is safe to call during an interrupt context. .It Fn suword Stores a word of data to the user-space address -.Pa base . +.Pa addr . +.It Fn suiword +Stores a word of data to the user-space address +.Pa addr . +This function is safe to call during an interrupt context. +.It Fn sulong +Stores a long word of data to the user-space address +.Pa addr . +.It Fn suulong +Stores a unsigned long word of data to the user-space address +.Pa addr . .El .Sh RETURN VALUES The diff --git a/bsd/miscfs/deadfs/dead_vnops.c b/bsd/miscfs/deadfs/dead_vnops.c index aaca316c9..2e58fddac 100644 --- a/bsd/miscfs/deadfs/dead_vnops.c +++ b/bsd/miscfs/deadfs/dead_vnops.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include #include #include @@ -67,104 +67,80 @@ /* * Prototypes for dead operations on vnodes. */ -int dead_badop(), - dead_ebadf(); -int dead_lookup __P((struct vop_lookup_args *)); -#define dead_create ((int (*) __P((struct vop_create_args *)))dead_badop) -#define dead_mknod ((int (*) __P((struct vop_mknod_args *)))dead_badop) -int dead_open __P((struct vop_open_args *)); -#define dead_close ((int (*) __P((struct vop_close_args *)))nullop) -#define dead_access ((int (*) __P((struct vop_access_args *)))dead_ebadf) -#define dead_getattr ((int (*) __P((struct vop_getattr_args *)))dead_ebadf) -#define dead_setattr ((int (*) __P((struct vop_setattr_args *)))dead_ebadf) -int dead_read __P((struct vop_read_args *)); -int dead_write __P((struct vop_write_args *)); -int dead_ioctl __P((struct vop_ioctl_args *)); -int dead_select __P((struct vop_select_args *)); -#define dead_mmap ((int (*) __P((struct vop_mmap_args *)))dead_badop) -#define dead_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) -#define dead_seek ((int (*) __P((struct vop_seek_args *)))nullop) -#define dead_remove ((int (*) __P((struct vop_remove_args *)))dead_badop) -#define dead_link ((int (*) __P((struct vop_link_args *)))dead_badop) -#define dead_rename ((int (*) __P((struct vop_rename_args *)))dead_badop) -#define dead_mkdir ((int (*) __P((struct vop_mkdir_args *)))dead_badop) -#define dead_rmdir ((int (*) __P((struct vop_rmdir_args *)))dead_badop) -#define dead_symlink ((int (*) __P((struct vop_symlink_args *)))dead_badop) -#define dead_readdir ((int (*) __P((struct vop_readdir_args *)))dead_ebadf) -#define dead_readlink ((int (*) __P((struct vop_readlink_args *)))dead_ebadf) -#define dead_abortop ((int (*) __P((struct vop_abortop_args *)))dead_badop) -#define dead_inactive ((int (*) __P((struct vop_inactive_args *)))nullop) -#define dead_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop) -int dead_lock __P((struct vop_lock_args *)); -#define dead_unlock ((int (*) __P((struct vop_unlock_args *)))nullop) -int dead_bmap __P((struct vop_bmap_args *)); -int dead_strategy __P((struct vop_strategy_args *)); -int dead_print __P((struct vop_print_args *)); -#define dead_islocked ((int (*) __P((struct vop_islocked_args *)))nullop) -#define dead_pathconf ((int (*) __P((struct vop_pathconf_args *)))dead_ebadf) -#define dead_advlock ((int (*) __P((struct vop_advlock_args *)))dead_ebadf) -#define dead_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))dead_badop) -#define dead_valloc ((int (*) __P((struct vop_valloc_args *)))dead_badop) -#define dead_vfree ((int (*) __P((struct vop_vfree_args *)))dead_badop) -#define dead_truncate ((int (*) __P((struct vop_truncate_args *)))nullop) -#define dead_update ((int (*) __P((struct vop_update_args *)))nullop) -#define dead_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop) -int dead_pagein __P((struct vop_pagein_args *)); -int dead_pageout __P((struct vop_pageout_args *)); -int dead_blktooff __P((struct vop_blktooff_args *)); -int dead_offtoblk __P((struct vop_offtoblk_args *)); -int dead_cmap __P((struct vop_cmap_args *)); +int dead_badop(void *); +int dead_ebadf(void *); +int dead_lookup(struct vnop_lookup_args *); +#define dead_create (int (*)(struct vnop_create_args *))dead_badop +#define dead_mknod (int (*)(struct vnop_mknod_args *))dead_badop +int dead_open(struct vnop_open_args *); +#define dead_close (int (*)(struct vnop_close_args *))nullop +#define dead_access (int (*)(struct vnop_access_args *))dead_ebadf +#define dead_getattr (int (*)(struct vnop_getattr_args *))dead_ebadf +#define dead_setattr (int (*)(struct vnop_setattr_args *))dead_ebadf +int dead_read(struct vnop_read_args *); +int dead_write(struct vnop_write_args *); +int dead_ioctl(struct vnop_ioctl_args *); +int dead_select(struct vnop_select_args *); +#define dead_mmap (int (*)(struct vnop_mmap_args *))dead_badop +#define dead_fsync (int (*)(struct vnop_fsync_args *))nullop +#define dead_remove (int (*)(struct vnop_remove_args ))dead_badop +#define dead_link (int (*)(struct vnop_link_args *))dead_badop +#define dead_rename (int (*)(struct vnop_rename_args *))dead_badop +#define dead_mkdir (int (*)(struct vnop_mkdir_args *))dead_badop +#define dead_rmdir (int (*)(struct vnop_rmdir_args *))dead_badop +#define dead_symlink (int (*)(struct vnop_symlink_args *))dead_badop +#define dead_readdir (int (*)(struct vnop_readdir_args *))dead_ebadf +#define dead_readlink (int (*)(struct vnop_readlink_args *))dead_ebadf +#define dead_inactive (int (*)(struct vnop_inactive_args *))nullop +#define dead_reclaim (int (*)(struct vnop_reclaim_args *))nullop +int dead_strategy(struct vnop_strategy_args *); +#define dead_pathconf (int (*)(struct vnop_pathconf_args *))dead_ebadf +#define dead_advlock (int (*)(struct vnop_advlock_args *))dead_ebadf +#define dead_bwrite (int (*)(struct vnop_bwrite_args *))nullop +int dead_pagein(struct vnop_pagein_args *); +int dead_pageout(struct vnop_pageout_args *); +int dead_blktooff(struct vnop_blktooff_args *); +int dead_offtoblk(struct vnop_offtoblk_args *); +int dead_blockmap(struct vnop_blockmap_args *); #define VOPFUNC int (*)(void *) int (**dead_vnodeop_p)(void *); struct vnodeopv_entry_desc dead_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)dead_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)dead_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)dead_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)dead_open }, /* open */ - { &vop_close_desc, (VOPFUNC)dead_close }, /* close */ - { &vop_access_desc, (VOPFUNC)dead_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)dead_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)dead_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)dead_read }, /* read */ - { &vop_write_desc, (VOPFUNC)dead_write }, /* write */ - { &vop_ioctl_desc, (VOPFUNC)dead_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)dead_select }, /* select */ - { &vop_mmap_desc, (VOPFUNC)dead_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)dead_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)dead_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)dead_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)dead_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)dead_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)dead_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)dead_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)dead_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)dead_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)dead_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)dead_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)dead_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)dead_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)dead_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)dead_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)dead_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)dead_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)dead_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)dead_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)dead_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)dead_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)dead_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)dead_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)dead_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)dead_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)dead_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)dead_bwrite }, /* bwrite */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)dead_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)dead_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)dead_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)dead_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)dead_create }, /* create */ + { &vnop_open_desc, (VOPFUNC)dead_open }, /* open */ + { &vnop_mknod_desc, (VOPFUNC)dead_mknod }, /* mknod */ + { &vnop_close_desc, (VOPFUNC)dead_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)dead_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)dead_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)dead_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)dead_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)dead_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)dead_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)dead_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC)dead_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)dead_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)dead_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)dead_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)dead_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)dead_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)dead_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)dead_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)dead_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)dead_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)dead_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)dead_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)dead_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)dead_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)dead_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)dead_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)dead_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)dead_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)dead_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc dead_vnodeop_opv_desc = @@ -176,10 +152,11 @@ struct vnodeopv_desc dead_vnodeop_opv_desc = /* ARGSUSED */ int dead_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; + vfs_context_t a_context; } */ *ap; { @@ -193,11 +170,10 @@ dead_lookup(ap) /* ARGSUSED */ int dead_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { @@ -210,11 +186,11 @@ dead_open(ap) /* ARGSUSED */ int dead_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { @@ -234,11 +210,11 @@ dead_read(ap) /* ARGSUSED */ int dead_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { @@ -253,29 +229,28 @@ dead_write(ap) /* ARGSUSED */ int dead_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { if (!chkvnlock(ap->a_vp)) return (EBADF); - return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap)); + return (VCALL(ap->a_vp, VOFFSET(vnop_ioctl), ap)); } /* ARGSUSED */ int dead_select(ap) - struct vop_select_args /* { + struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; + kauth_cred_t a_cred; void *a_wql; struct proc *a_p; } */ *ap; @@ -292,102 +267,48 @@ dead_select(ap) */ int dead_strategy(ap) - struct vop_strategy_args /* { + struct vnop_strategy_args /* { struct buf *a_bp; } */ *ap; { - if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) { - ap->a_bp->b_flags |= B_ERROR; - biodone(ap->a_bp); + if (buf_vnode(ap->a_bp) == NULL || !chkvnlock(buf_vnode(ap->a_bp))) { + buf_seterror(ap->a_bp, EIO); + buf_biodone(ap->a_bp); return (EIO); } - return (VOP_STRATEGY(ap->a_bp)); + return (VNOP_STRATEGY(ap->a_bp)); } /* * Wait until the vnode has finished changing state. */ int -dead_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - struct vnode *vp = ap->a_vp; - - /* - * Since we are not using the lock manager, we must clear - * the interlock here. - */ - if (ap->a_flags & LK_INTERLOCK) { - simple_unlock(&vp->v_interlock); - ap->a_flags &= ~LK_INTERLOCK; - } - if (!chkvnlock(ap->a_vp)) - return (0); - return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap)); -} - -/* - * Wait until the vnode has finished changing state. - */ -int -dead_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - - if (!chkvnlock(ap->a_vp)) - return (EIO); - return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp)); -} - -/* - * Wait until the vnode has finished changing state. - */ -int -dead_cmap(ap) - struct vop_cmap_args /* { +dead_blockmap(ap) + struct vnop_blockmap_args /* { struct vnode *a_vp; off_t a_foffset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; + int flags; + vfs_context_t a_context; } */ *ap; { if (!chkvnlock(ap->a_vp)) return (EIO); - return (VOP_CMAP(ap->a_vp, ap->a_foffset, ap->a_size, ap->a_bpn, ap->a_run, ap->a_poff)); -} - -/* - * Print out the contents of a dead vnode. - */ -/* ARGSUSED */ -int -dead_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - printf("tag VT_NON, dead vnode\n"); + return (VNOP_BLOCKMAP(ap->a_vp, ap->a_foffset, ap->a_size, ap->a_bpn, + ap->a_run, ap->a_poff, ap->a_flags, ap->a_context)); } /* * Empty vnode failed operation */ +/* ARGSUSED */ int -dead_ebadf() +dead_ebadf(void *dummy) { return (EBADF); @@ -396,8 +317,9 @@ dead_ebadf() /* * Empty vnode bad operation */ +/* ARGSUSED */ int -dead_badop() +dead_badop(void *dummy) { panic("dead_badop called"); @@ -407,8 +329,9 @@ dead_badop() /* * Empty vnode null operation */ +/* ARGSUSED */ int -dead_nullop() +dead_nullop(void *dummy) { return (0); @@ -419,26 +342,18 @@ dead_nullop() * in a state of change. */ int -chkvnlock(vp) - register struct vnode *vp; +chkvnlock(__unused vnode_t vp) { - int locked = 0; - - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - sleep((caddr_t)vp, PINOD); - locked = 1; - } - return (locked); + return (0); } /* Blktooff */ int dead_blktooff(ap) - struct vop_blktooff_args /* { + struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */ *ap; { @@ -451,15 +366,15 @@ dead_blktooff(ap) /* Blktooff */ int dead_offtoblk(ap) -struct vop_offtoblk_args /* { +struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */ *ap; { if (!chkvnlock(ap->a_vp)) return (EIO); - *ap->a_lblkno = (daddr_t)-1; /* failure */ + *ap->a_lblkno = (daddr64_t)-1; /* failure */ return (0); } diff --git a/bsd/miscfs/devfs/devfs.h b/bsd/miscfs/devfs/devfs.h index c76d8544a..647e94785 100644 --- a/bsd/miscfs/devfs/devfs.h +++ b/bsd/miscfs/devfs/devfs.h @@ -52,7 +52,6 @@ #include -#ifdef __APPLE_API_UNSTABLE #define DEVFS_CHAR 0 #define DEVFS_BLOCK 1 @@ -73,9 +72,10 @@ __BEGIN_DECLS * Returns: * A handle to a device node if successful, NULL otherwise. */ -void * devfs_make_node __P((dev_t dev, int chrblk, uid_t uid, gid_t gid, - int perms, char *fmt, ...)); +void * devfs_make_node(dev_t dev, int chrblk, uid_t uid, gid_t gid, + int perms, const char *fmt, ...); +#ifdef BSD_KERNEL_PRIVATE /* * Function: devfs_make_link * @@ -85,7 +85,8 @@ void * devfs_make_node __P((dev_t dev, int chrblk, uid_t uid, gid_t gid, * Returns: * 0 if successful, -1 if failed */ -int devfs_link __P((void * handle, char *fmt, ...)); +int devfs_link(void * handle, char *fmt, ...); +#endif /* BSD_KERNEL_PRIVATE */ /* * Function: devfs_remove @@ -94,10 +95,9 @@ int devfs_link __P((void * handle, char *fmt, ...)); * Remove the device node returned by devfs_make_node() along with * any links created with devfs_make_link(). */ -void devfs_remove __P((void * handle)); +void devfs_remove(void * handle); __END_DECLS -#endif /* __APPLE_API_UNSTABLE */ #ifdef __APPLE_API_PRIVATE /* XXX */ diff --git a/bsd/miscfs/devfs/devfs_proto.h b/bsd/miscfs/devfs/devfs_proto.h index 3683d6b57..77e9b1c2f 100644 --- a/bsd/miscfs/devfs/devfs_proto.h +++ b/bsd/miscfs/devfs/devfs_proto.h @@ -33,19 +33,14 @@ int dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, int dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, devnode_t * *dn_pp, struct devfsmount *dvm); void devnode_free(devnode_t * dnp); -void devfs_dn_free(devnode_t * dnp); -int devfs_propogate(devdirent_t * parent,devdirent_t * child); int dev_dup_plane(struct devfsmount *devfs_mp_p); void devfs_free_plane(struct devfsmount *devfs_mp_p); -int dev_dup_entry(devnode_t * parent, devdirent_t * back, devdirent_t * *dnm_pp, - struct devfsmount *dvm); int dev_free_name(devdirent_t * dirent_p); -void dev_free_hier(devdirent_t * dirent_p); int devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, struct proc * p); int dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp); -int devfs_mount(struct mount *mp, char *path, caddr_t data, - struct nameidata *ndp, struct proc *p); +int devfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, + vfs_context_t context); #endif /* __APPLE_API_PRIVATE */ #endif /* __DEVFS_DEVFS_PROTO_H__ */ diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index 11464c40a..ff61c3d5a 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -82,7 +82,7 @@ #include #include #include -#include +#include #include #include #include @@ -90,7 +90,18 @@ #include "devfs.h" #include "devfsdefs.h" -struct lock__bsd__ devfs_lock; /* the "big switch" */ +static void devfs_release_busy(devnode_t *); +static void dev_free_hier(devdirent_t *); +static int devfs_propogate(devdirent_t *, devdirent_t *); +static int dev_finddir(char *, devnode_t *, int, devnode_t **); +static int dev_dup_entry(devnode_t *, devdirent_t *, devdirent_t **, struct devfsmount *); + + +lck_grp_t * devfs_lck_grp; +lck_grp_attr_t * devfs_lck_grp_attr; +lck_attr_t * devfs_lck_attr; +lck_mtx_t devfs_mutex; + devdirent_t * dev_root = NULL; /* root of backing tree */ struct devfs_stats devfs_stats; /* hold stats */ @@ -116,20 +127,37 @@ static int devfs_ready = 0; int devfs_sinit(void) { - lockinit(&devfs_lock, PINOD, "devfs", 0, 0); - if (dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, - &dev_root)) { + int error; + + devfs_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(devfs_lck_grp_attr); + devfs_lck_grp = lck_grp_alloc_init("devfs_lock", devfs_lck_grp_attr); + + devfs_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(devfs_lck_attr); + + lck_mtx_init(&devfs_mutex, devfs_lck_grp, devfs_lck_attr); + + DEVFS_LOCK(); + error = dev_add_entry("root", NULL, DEV_DIR, NULL, NULL, NULL, &dev_root); + DEVFS_UNLOCK(); + + if (error) { printf("devfs_sinit: dev_add_entry failed "); - return (EOPNOTSUPP); + return (ENOTSUP); } #ifdef HIDDEN_MOUNTPOINT MALLOC(devfs_hidden_mount, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); bzero(devfs_hidden_mount,sizeof(struct mount)); + mount_lock_init(devfs_hidden_mount); + TAILQ_INIT(&devfs_hidden_mount->mnt_vnodelist); + TAILQ_INIT(&devfs_hidden_mount->mnt_workerqueue); + TAILQ_INIT(&devfs_hidden_mount->mnt_newvnodes); - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + /* Initialize the default IO constraints */ + mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; + mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; devfs_mount(devfs_hidden_mount,"dummy",NULL,NULL,NULL); dev_root->de_dnp->dn_dvm @@ -146,13 +174,15 @@ devfs_sinit(void) \***********************************************************************/ -/***************************************************************\ -* Search down the linked list off a dir to find "name" * -* return the devnode_t * for that node. -\***************************************************************/ -/*proto*/ + +/*************************************************************** + * Search down the linked list off a dir to find "name" + * return the devnode_t * for that node. + * + * called with DEVFS_LOCK held + ***************************************************************/ devdirent_t * -dev_findname(devnode_t * dir,char *name) +dev_findname(devnode_t * dir, char *name) { devdirent_t * newfp; if (dir->dn_type != DEV_DIR) return 0;/*XXX*/ /* printf?*/ @@ -170,6 +200,7 @@ dev_findname(devnode_t * dir,char *name) } } newfp = dir->dn_typeinfo.Dir.dirlist; + while(newfp) { if(!(strcmp(name,newfp->de_name))) @@ -179,121 +210,16 @@ dev_findname(devnode_t * dir,char *name) return NULL; } -#if 0 -/***********************************************************************\ -* Given a starting node (0 for root) and a pathname, return the node * -* for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' * -* option is true, then create any missing nodes in the path and create * -* and return the final node as well. * -* This is used to set up a directory, before making nodes in it.. * -* * -* Warning: This function is RECURSIVE. * -\***********************************************************************/ -int -dev_finddir(char * orig_path, /* find this dir (err if not dir) */ - devnode_t * dirnode, /* starting point */ - int create, /* create path? */ - devnode_t * * dn_pp) /* returned */ -{ - devdirent_t * dirent_p; - devnode_t * dnp = NULL; - char pathbuf[DEVMAXPATHSIZE]; - char *path; - char *name; - register char *cp; - int retval; - - - /***************************************\ - * If no parent directory is given * - * then start at the root of the tree * - \***************************************/ - if(!dirnode) dirnode = dev_root->de_dnp; - - /***************************************\ - * Sanity Checks * - \***************************************/ - if (dirnode->dn_type != DEV_DIR) return ENOTDIR; - if(strlen(orig_path) > (DEVMAXPATHSIZE - 1)) return ENAMETOOLONG; - - - path = pathbuf; - strcpy(path,orig_path); - - /***************************************\ - * always absolute, skip leading / * - * get rid of / or // or /// etc. * - \***************************************/ - while(*path == '/') path++; - - /***************************************\ - * If nothing left, then parent was it.. * - \***************************************/ - if ( *path == '\0' ) { - *dn_pp = dirnode; - return 0; - } - - /***************************************\ - * find the next segment of the name * - \***************************************/ - cp = name = path; - while((*cp != '/') && (*cp != 0)) { - cp++; - } - - /***********************************************\ - * Check to see if it's the last component * - \***********************************************/ - if(*cp) { - path = cp + 1; /* path refers to the rest */ - *cp = 0; /* name is now a separate string */ - if(!(*path)) { - path = (char *)0; /* was trailing slash */ - } - } else { - path = NULL; /* no more to do */ - } - - /***************************************\ - * Start scanning along the linked list * - \***************************************/ - dirent_p = dev_findname(dirnode,name); - if(dirent_p) { /* check it's a directory */ - dnp = dirent_p->de_dnp; - if(dnp->dn_type != DEV_DIR) return ENOTDIR; - } else { - /***************************************\ - * The required element does not exist * - * So we will add it if asked to. * - \***************************************/ - if(!create) return ENOENT; - - if((retval = dev_add_entry(name, dirnode, - DEV_DIR, NULL, NULL, NULL, - &dirent_p)) != 0) { - return retval; - } - dnp = dirent_p->de_dnp; - devfs_propogate(dirnode->dn_typeinfo.Dir.myname,dirent_p); - } - if(path != NULL) { /* decide whether to recurse more or return */ - return (dev_finddir(path,dnp,create,dn_pp)); - } else { - *dn_pp = dnp; - return 0; - } -} -#endif -/***********************************************************************\ -* Given a starting node (0 for root) and a pathname, return the node * -* for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' * -* option is true, then create any missing nodes in the path and create * -* and return the final node as well. * -* This is used to set up a directory, before making nodes in it.. * -\***********************************************************************/ -/* proto */ -int +/*********************************************************************** + * Given a starting node (0 for root) and a pathname, return the node + * for the end item on the path. It MUST BE A DIRECTORY. If the 'CREATE' + * option is true, then create any missing nodes in the path and create + * and return the final node as well. + * This is used to set up a directory, before making nodes in it.. + * + * called with DEVFS_LOCK held + ***********************************************************************/ +static int dev_finddir(char * path, devnode_t * dirnode, int create, @@ -365,16 +291,17 @@ dev_finddir(char * path, } -/***********************************************************************\ -* Add a new NAME element to the devfs * -* If we're creating a root node, then dirname is NULL * -* Basically this creates a new namespace entry for the device node * -* * -* Creates a name node, and links it to the supplied node * -\***********************************************************************/ -/*proto*/ +/*********************************************************************** + * Add a new NAME element to the devfs + * If we're creating a root node, then dirname is NULL + * Basically this creates a new namespace entry for the device node + * + * Creates a name node, and links it to the supplied node + * + * called with DEVFS_LOCK held + ***********************************************************************/ int -dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, +dev_add_name(char * name, devnode_t * dirnode, __unused devdirent_t * back, devnode_t * dnp, devdirent_t * *dirent_pp) { devdirent_t * dirent_p = NULL; @@ -470,8 +397,6 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, /* * Put it on the END of the linked list of directory entries */ - int len; - dirent_p->de_parent = dirnode; /* null for root */ dirent_p->de_prevp = dirnode->dn_typeinfo.Dir.dirlast; dirent_p->de_next = *(dirent_p->de_prevp); /* should be NULL */ @@ -488,21 +413,22 @@ dev_add_name(char * name, devnode_t * dirnode, devdirent_t * back, } -/***********************************************************************\ -* Add a new element to the devfs plane. * -* * -* Creates a new dev_node to go with it if the prototype should not be * -* reused. (Is a DIR, or we select SPLIT_DEVS at compile time) * -* typeinfo gives us info to make our node if we don't have a prototype. * -* If typeinfo is null and proto exists, then the typeinfo field of * -* the proto is used intead in the CREATE case. * -* note the 'links' count is 0 (except if a dir) * -* but it is only cleared on a transition * -* so this is ok till we link it to something * -* Even in SPLIT_DEVS mode, * -* if the node already exists on the wanted plane, just return it * -\***********************************************************************/ -/*proto*/ +/*********************************************************************** + * Add a new element to the devfs plane. + * + * Creates a new dev_node to go with it if the prototype should not be + * reused. (Is a DIR, or we select SPLIT_DEVS at compile time) + * typeinfo gives us info to make our node if we don't have a prototype. + * If typeinfo is null and proto exists, then the typeinfo field of + * the proto is used intead in the CREATE case. + * note the 'links' count is 0 (except if a dir) + * but it is only cleared on a transition + * so this is ok till we link it to something + * Even in SPLIT_DEVS mode, + * if the node already exists on the wanted plane, just return it + * + * called with DEVFS_LOCK held +***********************************************************************/ int dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, devnode_t * *dn_pp, struct devfsmount *dvm) @@ -545,7 +471,7 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, * If we have a proto, that means that we are duplicating some * other device, which can only happen if we are not at the back plane */ - if(proto) { + if (proto) { bcopy(proto, dnp, sizeof(devnode_t)); dnp->dn_links = 0; dnp->dn_linklist = NULL; @@ -562,8 +488,8 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, /* * We have no prototype, so start off with a clean slate */ - tv = time; - bzero(dnp,sizeof(devnode_t)); + microtime(&tv); + bzero(dnp, sizeof(devnode_t)); dnp->dn_type = entrytype; dnp->dn_nextsibling = dnp; dnp->dn_prevsiblingp = &(dnp->dn_nextsibling); @@ -639,21 +565,29 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, } -/*proto*/ +/*********************************************************************** + * called with DEVFS_LOCK held + **********************************************************************/ void devnode_free(devnode_t * dnp) { + if (dnp->dn_lflags & DN_BUSY) { + dnp->dn_lflags |= DN_DELETE; + return; + } if (dnp->dn_type == DEV_SLNK) { DEVFS_DECR_STRINGSPACE(dnp->dn_typeinfo.Slnk.namelen + 1); - FREE(dnp->dn_typeinfo.Slnk.name,M_DEVFSNODE); + FREE(dnp->dn_typeinfo.Slnk.name, M_DEVFSNODE); } - FREE(dnp, M_DEVFSNODE); DEVFS_DECR_NODES(); - return; + FREE(dnp, M_DEVFSNODE); } -/*proto*/ -void + +/*********************************************************************** + * called with DEVFS_LOCK held + **********************************************************************/ +static void devfs_dn_free(devnode_t * dnp) { if(--dnp->dn_links <= 0 ) /* can be -1 for initial free, on error */ @@ -666,16 +600,9 @@ devfs_dn_free(devnode_t * dnp) } if (dnp->dn_vn == NULL) { -#if 0 - printf("devfs_dn_free: free'ing %x\n", (unsigned int)dnp); -#endif devnode_free(dnp); /* no accesses/references */ } else { -#if 0 - printf("devfs_dn_free: marking %x for deletion\n", - (unsigned int)dnp); -#endif dnp->dn_delete = TRUE; } } @@ -686,20 +613,21 @@ devfs_dn_free(devnode_t * dnp) * Add or delete a chain of front nodes * \***********************************************************************/ -/***********************************************************************\ -* Given a directory backing node, and a child backing node, add the * -* appropriate front nodes to the front nodes of the directory to * -* represent the child node to the user * -* * -* on failure, front nodes will either be correct or not exist for each * -* front dir, however dirs completed will not be stripped of completed * -* frontnodes on failure of a later frontnode * -* * -* This allows a new node to be propogated through all mounted planes * -* * -\***********************************************************************/ -/*proto*/ -int + +/*********************************************************************** + * Given a directory backing node, and a child backing node, add the + * appropriate front nodes to the front nodes of the directory to + * represent the child node to the user + * + * on failure, front nodes will either be correct or not exist for each + * front dir, however dirs completed will not be stripped of completed + * frontnodes on failure of a later frontnode + * + * This allows a new node to be propogated through all mounted planes + * + * called with DEVFS_LOCK held + ***********************************************************************/ +static int devfs_propogate(devdirent_t * parent,devdirent_t * child) { int error; @@ -709,9 +637,9 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) devnode_t * adnp = parent->de_dnp; int type = child->de_dnp->dn_type; - /***********************************************\ - * Find the other instances of the parent node * - \***********************************************/ + /*********************************************** + * Find the other instances of the parent node + ***********************************************/ for (adnp = pdnp->dn_nextsibling; adnp != pdnp; adnp = adnp->dn_nextsibling) @@ -730,6 +658,7 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) return 0; /* for now always succeed */ } + /*********************************************************************** * remove all instances of this devicename [for backing nodes..] * note.. if there is another link to the node (non dir nodes only) @@ -745,20 +674,17 @@ devfs_remove(void *dirent_p) { devnode_t * dnp = ((devdirent_t *)dirent_p)->de_dnp; devnode_t * dnp2; - boolean_t funnel_state; boolean_t lastlink; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + DEVFS_LOCK(); if (!devfs_ready) { printf("devfs_remove: not ready for devices!\n"); goto out; } - DEVFS_LOCK(0); - /* keep removing the next sibling till only we exist. */ - while((dnp2 = dnp->dn_nextsibling) != dnp) { + while ((dnp2 = dnp->dn_nextsibling) != dnp) { /* * Keep removing the next front node till no more exist @@ -767,7 +693,7 @@ devfs_remove(void *dirent_p) dnp->dn_nextsibling->dn_prevsiblingp = &(dnp->dn_nextsibling); dnp2->dn_nextsibling = dnp2; dnp2->dn_prevsiblingp = &(dnp2->dn_nextsibling); - if(dnp2->dn_linklist) { + if (dnp2->dn_linklist) { do { lastlink = (1 == dnp2->dn_links); dev_free_name(dnp2->dn_linklist); @@ -780,26 +706,28 @@ devfs_remove(void *dirent_p) * If we are not running in SPLIT_DEVS mode, then * THIS is what gets rid of the propogated nodes. */ - if(dnp->dn_linklist) { + if (dnp->dn_linklist) { do { lastlink = (1 == dnp->dn_links); dev_free_name(dnp->dn_linklist); } while (!lastlink); } - DEVFS_UNLOCK(0); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + DEVFS_UNLOCK(); + return ; } + /*************************************************************** * duplicate the backing tree into a tree of nodes hung off the * mount point given as the argument. Do this by * calling dev_dup_entry which recurses all the way * up the tree.. + * + * called with DEVFS_LOCK held **************************************************************/ -/*proto*/ int dev_dup_plane(struct devfsmount *devfs_mp_p) { @@ -807,40 +735,43 @@ dev_dup_plane(struct devfsmount *devfs_mp_p) int error = 0; if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p))) - return error; + return error; devfs_mp_p->plane_root = new; return error; } -/***************************************************************\ -* Free a whole plane -\***************************************************************/ -/*proto*/ +/*************************************************************** + * Free a whole plane + * + * called with DEVFS_LOCK held + ***************************************************************/ void devfs_free_plane(struct devfsmount *devfs_mp_p) { devdirent_t * dirent_p; dirent_p = devfs_mp_p->plane_root; - if(dirent_p) { + if (dirent_p) { dev_free_hier(dirent_p); dev_free_name(dirent_p); } devfs_mp_p->plane_root = NULL; } -/***************************************************************\ -* Create and link in a new front element.. * -* Parent can be 0 for a root node * -* Not presently usable to make a symlink XXX * -* (Ok, symlinks don't propogate) -* recursively will create subnodes corresponding to equivalent * -* child nodes in the base level * -\***************************************************************/ -/*proto*/ -int + +/*************************************************************** + * Create and link in a new front element.. + * Parent can be 0 for a root node + * Not presently usable to make a symlink XXX + * (Ok, symlinks don't propogate) + * recursively will create subnodes corresponding to equivalent + * child nodes in the base level + * + * called with DEVFS_LOCK held + ***************************************************************/ +static int dev_dup_entry(devnode_t * parent, devdirent_t * back, devdirent_t * *dnm_pp, struct devfsmount *dvm) { @@ -890,13 +821,16 @@ dev_dup_entry(devnode_t * parent, devdirent_t * back, devdirent_t * *dnm_pp, return error; } -/***************************************************************\ -* Free a name node * -* remember that if there are other names pointing to the * -* dev_node then it may not get freed yet * -* can handle if there is no dnp * -\***************************************************************/ -/*proto*/ + +/*************************************************************** + * Free a name node + * remember that if there are other names pointing to the + * dev_node then it may not get freed yet + * can handle if there is no dnp + * + * called with DEVFS_LOCK held + ***************************************************************/ + int dev_free_name(devdirent_t * dirent_p) { @@ -952,19 +886,22 @@ dev_free_name(devdirent_t * dirent_p) } DEVFS_DECR_ENTRIES(); - FREE(dirent_p,M_DEVFSNAME); + FREE(dirent_p, M_DEVFSNAME); return 0; } -/***************************************************************\ -* Free a hierarchy starting at a directory node name * -* remember that if there are other names pointing to the * -* dev_node then it may not get freed yet * -* can handle if there is no dnp * -* leave the node itself allocated. * -\***************************************************************/ -/*proto*/ -void + +/*************************************************************** + * Free a hierarchy starting at a directory node name + * remember that if there are other names pointing to the + * dev_node then it may not get freed yet + * can handle if there is no dnp + * leave the node itself allocated. + * + * called with DEVFS_LOCK held + ***************************************************************/ + +static void dev_free_hier(devdirent_t * dirent_p) { devnode_t * dnp = dirent_p->de_dnp; @@ -981,60 +918,155 @@ dev_free_hier(devdirent_t * dirent_p) } } -/***************************************************************\ -* given a dev_node, find the appropriate vnode if one is already -* associated, or get a new one and associate it with the dev_node -\***************************************************************/ -/*proto*/ + +/*************************************************************** + * given a dev_node, find the appropriate vnode if one is already + * associated, or get a new one and associate it with the dev_node + * + * called with DEVFS_LOCK held + ***************************************************************/ int -devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, struct proc * p) +devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) { - struct vnode *vn_p, *nvp; + struct vnode *vn_p; int error = 0; + struct vnode_fsparam vfsp; + enum vtype vtype = 0; + int markroot = 0; +retry: *vn_pp = NULL; vn_p = dnp->dn_vn; + + dnp->dn_lflags |= DN_BUSY; + if (vn_p) { /* already has a vnode */ - *vn_pp = vn_p; - return(vget(vn_p, LK_EXCLUSIVE, p)); + uint32_t vid; + + vid = vnode_vid(vn_p); + + DEVFS_UNLOCK(); + + error = vnode_getwithvid(vn_p, vid); + + DEVFS_LOCK(); + + if (dnp->dn_lflags & DN_DELETE) { + /* + * our BUSY node got marked for + * deletion while the DEVFS lock + * was dropped... + */ + if (error == 0) { + /* + * vnode_getwithvid returned a valid ref + * which we need to drop + */ + vnode_put(vn_p); + } + /* + * set the error to EAGAIN + * which will cause devfs_lookup + * to retry this node + */ + error = EAGAIN; + } + if ( !error) + *vn_pp = vn_p; + + devfs_release_busy(dnp); + + return error; + } + + if (dnp->dn_lflags & DN_CREATE) { + dnp->dn_lflags |= DN_CREATEWAIT; + msleep(&dnp->dn_lflags, &devfs_mutex, PRIBIO, 0 , 0); + goto retry; } - if (!(error = getnewvnode(VT_DEVFS, dnp->dn_dvm->mount, - *(dnp->dn_ops), &vn_p))) { - switch(dnp->dn_type) { + + dnp->dn_lflags |= DN_CREATE; + + switch (dnp->dn_type) { case DEV_SLNK: - vn_p->v_type = VLNK; + vtype = VLNK; break; case DEV_DIR: if (dnp->dn_typeinfo.Dir.parent == dnp) { - vn_p->v_flag |= VROOT; + markroot = 1; } - vn_p->v_type = VDIR; + vtype = VDIR; break; case DEV_BDEV: case DEV_CDEV: - vn_p->v_type - = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; - if ((nvp = checkalias(vn_p, dnp->dn_typeinfo.dev, - dnp->dn_dvm->mount)) != NULL) { - vput(vn_p); - vn_p = nvp; - } + vtype = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; break; + } + vfsp.vnfs_mp = dnp->dn_dvm->mount; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "devfs"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = dnp; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = *(dnp->dn_ops); + + if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_rdev = dnp->dn_typeinfo.dev; + else + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + /* Tag system files */ + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = markroot; + + DEVFS_UNLOCK(); + + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vn_p); + + DEVFS_LOCK(); + + if (error == 0) { + if ((dnp->dn_vn)) { + panic("devnode already has a vnode?"); + } else { + dnp->dn_vn = vn_p; + *vn_pp = vn_p; + vnode_settag(vn_p, VT_DEVFS); } - vn_p->v_mount = dnp->dn_dvm->mount;/* XXX Duplicated */ - *vn_pp = vn_p; - vn_p->v_data = (void *)dnp; - dnp->dn_vn = vn_p; - error = vn_lock(vn_p, LK_EXCLUSIVE | LK_RETRY, p); } + + dnp->dn_lflags &= ~DN_CREATE; + + if (dnp->dn_lflags & DN_CREATEWAIT) { + dnp->dn_lflags &= ~DN_CREATEWAIT; + wakeup(&dnp->dn_lflags); + } + + devfs_release_busy(dnp); + return error; } -/***********************************************************************\ -* add a whole device, with no prototype.. make name element and node * -* Used for adding the original device entries * -\***********************************************************************/ -/*proto*/ + +/*********************************************************************** + * called with DEVFS_LOCK held + ***********************************************************************/ +static void +devfs_release_busy(devnode_t *dnp) { + + dnp->dn_lflags &= ~DN_BUSY; + + if (dnp->dn_lflags & DN_DELETE) + devnode_free(dnp); +} + +/*********************************************************************** + * add a whole device, with no prototype.. make name element and node + * Used for adding the original device entries + * + * called with DEVFS_LOCK held + ***********************************************************************/ int dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinfo, devnode_t * proto, struct devfsmount *dvm, devdirent_t * *nm_pp) @@ -1059,6 +1091,7 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf return error; } + /* * Function: devfs_make_node * @@ -1076,27 +1109,28 @@ dev_add_entry(char *name, devnode_t * parent, int type, devnode_type_t * typeinf */ void * devfs_make_node(dev_t dev, int chrblk, uid_t uid, - gid_t gid, int perms, char *fmt, ...) + gid_t gid, int perms, const char *fmt, ...) { devdirent_t * new_dev = NULL; devnode_t * dnp; /* devnode for parent directory */ devnode_type_t typeinfo; char *name, *path, buf[256]; /* XXX */ - boolean_t funnel_state; int i; va_list ap; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + + DEVFS_LOCK(); if (!devfs_ready) { printf("devfs_make_node: not ready for devices!\n"); goto out; } - if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) goto out; + DEVFS_UNLOCK(); + va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); @@ -1117,8 +1151,8 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, name = buf; path = "/"; } + DEVFS_LOCK(); - DEVFS_LOCK(0); /* find/create directory path ie. mkdir -p */ if (dev_finddir(path, NULL, CREATE, &dnp) == 0) { typeinfo.dev = dev; @@ -1131,10 +1165,9 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev); } } - DEVFS_UNLOCK(0); - out: - (void) thread_funnel_set(kernel_flock, funnel_state); + DEVFS_UNLOCK(); + return new_dev; } @@ -1157,14 +1190,14 @@ devfs_make_link(void *original, char *fmt, ...) va_list ap; char *p, buf[256]; /* XXX */ int i; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + DEVFS_LOCK(); if (!devfs_ready) { printf("devfs_make_link: not ready for devices!\n"); goto out; } + DEVFS_UNLOCK(); va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); @@ -1172,28 +1205,31 @@ devfs_make_link(void *original, char *fmt, ...) p = NULL; - for(i=strlen(buf); i>0; i--) + for(i=strlen(buf); i>0; i--) { if(buf[i] == '/') { p=&buf[i]; buf[i]=0; break; } - DEVFS_LOCK(0); + } + DEVFS_LOCK(); + if (p) { - *p++ = '\0'; - if (dev_finddir(buf, NULL, CREATE, &dirnode) - || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) - goto fail; + *p++ = '\0'; + + if (dev_finddir(buf, NULL, CREATE, &dirnode) + || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) + goto fail; } else { - if (dev_finddir("", NULL, CREATE, &dirnode) - || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) - goto fail; + if (dev_finddir("", NULL, CREATE, &dirnode) + || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) + goto fail; } devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev); fail: - DEVFS_UNLOCK(0); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + DEVFS_UNLOCK(); + return ((new_dev != NULL) ? 0 : -1); } diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index 4c6b4729b..c2148de5d 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,18 +58,19 @@ #include #include #include -#include +#include #include -#include +#include +#include #include #include "devfs.h" #include "devfsdefs.h" -static int devfs_statfs( struct mount *mp, struct statfs *sbp, struct proc *p); +static int devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, vfs_context_t context); +static int devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context); -static struct vfsconf * devfs_vfsp = 0; -static int kernel_mount = 0; +static struct vfstable * devfs_vfsp = 0; /*- @@ -83,10 +84,10 @@ static int kernel_mount = 0; static int devfs_init(struct vfsconf *vfsp) { - devfs_vfsp = vfsp; /* remember this for devfs_kernel_mount below */ + devfs_vfsp = (struct vfstable *)vfsp; /* remember this for devfs_kernel_mount below */ if (devfs_sinit()) - return (EOPNOTSUPP); + return (ENOTSUP); devfs_make_node(makedev(0, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0622, "console"); devfs_make_node(makedev(2, 0), DEVFS_CHAR, @@ -119,12 +120,10 @@ devfs_init(struct vfsconf *vfsp) */ /*proto*/ int -devfs_mount(struct mount *mp, char *path, caddr_t data, - struct nameidata *ndp, struct proc *p) +devfs_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data, vfs_context_t context) { struct devfsmount *devfs_mp_p; /* devfs specific mount info */ int error; - size_t size; /*- * If they just want to update, we don't need to do anything. @@ -134,6 +133,9 @@ devfs_mount(struct mount *mp, char *path, caddr_t data, return 0; } + /* Advisory locking should be handled at the VFS layer */ + vfs_setlocklocal(mp); + /*- * Well, it's not an update, it's a real mount request. * Time to get dirty. @@ -151,19 +153,20 @@ devfs_mount(struct mount *mp, char *path, caddr_t data, * Fill out some fields */ mp->mnt_data = (qaddr_t)devfs_mp_p; - mp->mnt_stat.f_type = mp->mnt_vfc->vfc_typenum; - mp->mnt_stat.f_fsid.val[0] = (int32_t)(void *)devfs_mp_p; - mp->mnt_stat.f_fsid.val[1] = mp->mnt_stat.f_type; + mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(void *)devfs_mp_p; + mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); mp->mnt_flag |= MNT_LOCAL; - DEVFS_LOCK(p); + DEVFS_LOCK(); error = dev_dup_plane(devfs_mp_p); - DEVFS_UNLOCK(p); + DEVFS_UNLOCK(); + if (error) { mp->mnt_data = (qaddr_t)0; FREE((caddr_t)devfs_mp_p, M_DEVFSMNT); return (error); - } + } else + DEVFS_INCR_MOUNTS(); /*- * Copy in the name of the directory the filesystem @@ -172,22 +175,16 @@ devfs_mount(struct mount *mp, char *path, caddr_t data, * to be tidy. */ - if (!kernel_mount) { - copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, - sizeof(mp->mnt_stat.f_mntonname)-1, &size); - bzero(mp->mnt_stat.f_mntonname + size, - sizeof(mp->mnt_stat.f_mntonname) - size); - } - bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); - bcopy("devfs",mp->mnt_stat.f_mntfromname, 5); - DEVFS_INCR_MOUNTS(); - (void)devfs_statfs(mp, &mp->mnt_stat, p); + bzero(mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN); + bcopy("devfs",mp->mnt_vfsstat.f_mntfromname, 5); + (void)devfs_statfs(mp, &mp->mnt_vfsstat, context); + return 0; } static int -devfs_start(struct mount *mp, int flags, struct proc *p) +devfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) { return 0; } @@ -196,7 +193,7 @@ devfs_start(struct mount *mp, int flags, struct proc *p) * Unmount the filesystem described by mp. */ static int -devfs_unmount( struct mount *mp, int mntflags, struct proc *p) +devfs_unmount( struct mount *mp, int mntflags, __unused vfs_context_t context) { struct devfsmount *devfs_mp_p = (struct devfsmount *)mp->mnt_data; int flags = 0; @@ -211,11 +208,13 @@ devfs_unmount( struct mount *mp, int mntflags, struct proc *p) if (error && !force) return error; - DEVFS_LOCK(p); + DEVFS_LOCK(); devfs_free_plane(devfs_mp_p); - DEVFS_UNLOCK(p); - FREE((caddr_t)devfs_mp_p, M_DEVFSMNT); + DEVFS_UNLOCK(); + DEVFS_DECR_MOUNTS(); + + FREE((caddr_t)devfs_mp_p, M_DEVFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; @@ -224,32 +223,27 @@ devfs_unmount( struct mount *mp, int mntflags, struct proc *p) /* return the address of the root vnode in *vpp */ static int -devfs_root(struct mount *mp, struct vnode **vpp) +devfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context) { struct devfsmount *devfs_mp_p = (struct devfsmount *)(mp->mnt_data); int error; - error = devfs_dntovn(devfs_mp_p->plane_root->de_dnp,vpp, - current_proc()); - return error; -} + DEVFS_LOCK(); + error = devfs_dntovn(devfs_mp_p->plane_root->de_dnp, vpp, context->vc_proc); + DEVFS_UNLOCK(); -static int -devfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t arg, - struct proc *p) -{ - return EOPNOTSUPP; + return error; } static int -devfs_statfs( struct mount *mp, struct statfs *sbp, struct proc *p) +devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, __unused vfs_context_t context) { struct devfsmount *devfs_mp_p = (struct devfsmount *)mp->mnt_data; /*- * Fill in the stat block. */ - sbp->f_type = mp->mnt_stat.f_type; + //sbp->f_type = mp->mnt_vfsstat.f_type; sbp->f_flags = 0; /* XXX */ sbp->f_bsize = 512; sbp->f_iosize = 512; @@ -263,33 +257,48 @@ devfs_statfs( struct mount *mp, struct statfs *sbp, struct proc *p) sbp->f_files = devfs_stats.nodes; sbp->f_ffree = 0; sbp->f_fsid.val[0] = (int32_t)(void *)devfs_mp_p; - sbp->f_fsid.val[1] = mp->mnt_stat.f_type; + sbp->f_fsid.val[1] = vfs_typenum(mp); - /*- - * Copy the mounted on and mounted from names into - * the passed in stat block, if it is not the one - * in the mount structure. - */ - if (sbp != &mp->mnt_stat) { - bcopy((caddr_t)mp->mnt_stat.f_mntonname, - (caddr_t)&sbp->f_mntonname[0], MNAMELEN); - bcopy((caddr_t)mp->mnt_stat.f_mntfromname, - (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + return 0; +} + +static int +devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) +{ + VFSATTR_RETURN(fsap, f_objcount, devfs_stats.nodes); + VFSATTR_RETURN(fsap, f_maxobjcount, devfs_stats.nodes); + VFSATTR_RETURN(fsap, f_bsize, 512); + VFSATTR_RETURN(fsap, f_iosize, 512); + if (VFSATTR_IS_ACTIVE(fsap, f_blocks) || VFSATTR_IS_ACTIVE(fsap, f_bused)) { + fsap->f_blocks = (devfs_stats.mounts * sizeof(struct devfsmount) + + devfs_stats.nodes * sizeof(devnode_t) + + devfs_stats.entries * sizeof(devdirent_t) + + devfs_stats.stringspace + ) / fsap->f_bsize; + fsap->f_bused = fsap->f_blocks; + VFSATTR_SET_SUPPORTED(fsap, f_blocks); + VFSATTR_SET_SUPPORTED(fsap, f_bused); } + VFSATTR_RETURN(fsap, f_bfree, 0); + VFSATTR_RETURN(fsap, f_bavail, 0); + VFSATTR_RETURN(fsap, f_files, devfs_stats.nodes); + VFSATTR_RETURN(fsap, f_ffree, 0); + VFSATTR_RETURN(fsap, f_fssubtype, 0); + return 0; } static int -devfs_sync(struct mount *mp, int waitfor,struct ucred *cred,struct proc *p) +devfs_sync(__unused struct mount *mp, __unused int waitfor, __unused vfs_context_t context) { return (0); } static int -devfs_vget(struct mount *mp, void * ino,struct vnode **vpp) +devfs_vget(__unused struct mount *mp, __unused ino64_t ino, __unused struct vnode **vpp, __unused vfs_context_t context) { - return EOPNOTSUPP; + return ENOTSUP; } /************************************************************* @@ -298,30 +307,24 @@ devfs_vget(struct mount *mp, void * ino,struct vnode **vpp) */ static int -devfs_fhtovp (struct mount *mp, struct fid *fhp, struct mbuf *nam, - struct vnode **vpp, int *exflagsp, struct ucred **credanonp) +devfs_fhtovp (__unused struct mount *mp, __unused int fhlen, __unused unsigned char *fhp, __unused struct vnode **vpp, __unused vfs_context_t context) { return (EINVAL); } static int -devfs_vptofh (struct vnode *vp, struct fid *fhp) +devfs_vptofh (__unused struct vnode *vp, __unused int *fhlenp, __unused unsigned char *fhp, __unused vfs_context_t context) { return (EINVAL); } static int -devfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +devfs_sysctl(__unused int *name, __unused u_int namelen, __unused user_addr_t oldp, + __unused size_t *oldlenp, __unused user_addr_t newp, + __unused size_t newlen, __unused vfs_context_t context) { - return (EOPNOTSUPP); + return (ENOTSUP); } #include @@ -336,39 +339,47 @@ devfs_kernel_mount(char * mntname) { struct mount *mp; int error; - struct proc *procp; struct nameidata nd; struct vnode * vp; + struct vfs_context context; if (devfs_vfsp == NULL) { printf("devfs_kernel_mount: devfs_vfsp is NULL\n"); return (EINVAL); } - procp = current_proc(); + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, - mntname, procp); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + CAST_USER_ADDR_T(mntname), &context); if ((error = namei(&nd))) { printf("devfs_kernel_mount: failed to find directory '%s', %d", mntname, error); return (error); } + nameidone(&nd); vp = nd.ni_vp; - if ((error = vinvalbuf(vp, V_SAVE, procp->p_ucred, procp, 0, 0))) { - printf("devfs_kernel_mount: vinval failed: %d\n", error); - vput(vp); + + if ((error = VNOP_FSYNC(vp, MNT_WAIT, &context))) { + printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error); + vnode_put(vp); return (error); } - if (vp->v_type != VDIR) { + if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { + printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error); + vnode_put(vp); + return (error); + } + if (vnode_isdir(vp) == 0) { printf("devfs_kernel_mount: '%s' is not a directory\n", mntname); - vput(vp); + vnode_put(vp); return (ENOTDIR); } - if (vp->v_mountedhere != NULL) { - vput(vp); + if ((vnode_mountedhere(vp))) { + vnode_put(vp); return (EBUSY); } @@ -379,44 +390,46 @@ devfs_kernel_mount(char * mntname) M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + /* Initialize the default IO constraints */ + mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; + mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + + mount_lock_init(mp); + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - (void)vfs_busy(mp, LK_NOWAIT, 0, procp); - LIST_INIT(&mp->mnt_vnodelist); + (void)vfs_busy(mp, LK_NOWAIT); mp->mnt_op = devfs_vfsp->vfc_vfsops; - mp->mnt_vfc = devfs_vfsp; + mp->mnt_vtable = devfs_vfsp; devfs_vfsp->vfc_refcount++; + devfs_vfsp->vfc_threadsafe = TRUE; + devfs_vfsp->vfc_64bitready = TRUE; mp->mnt_flag = 0; mp->mnt_flag |= devfs_vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_stat.f_fstypename, devfs_vfsp->vfc_name, MFSNAMELEN); + strncpy(mp->mnt_vfsstat.f_fstypename, devfs_vfsp->vfc_name, MFSTYPENAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; - mp->mnt_stat.f_owner = procp->p_ucred->cr_uid; - (void) copystr(mntname, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0); + mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); + (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); + + error = devfs_mount(mp, NULL, NULL, &context); - kernel_mount = 1; - error = devfs_mount(mp, mntname, NULL, NULL, procp); - kernel_mount = 0; if (error) { printf("devfs_kernel_mount: mount %s failed: %d", mntname, error); - mp->mnt_vfc->vfc_refcount--; + mp->mnt_vtable->vfc_refcount--; - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, procp); + vfs_unbusy(mp); + mount_lock_destroy(mp); FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); - vput(vp); + vnode_put(vp); return (error); } - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - VOP_UNLOCK(vp, 0, procp); - vfs_unbusy(mp, procp); + vnode_ref(vp); + vnode_put(vp); + vfs_unbusy(mp); + mount_list_add(mp); return (0); } @@ -425,12 +438,12 @@ struct vfsops devfs_vfsops = { devfs_start, devfs_unmount, devfs_root, - devfs_quotactl, - devfs_statfs, + NULL, /* quotactl */ + devfs_vfs_getattr, devfs_sync, devfs_vget, devfs_fhtovp, devfs_vptofh, devfs_init, - devfs_sysctl, + devfs_sysctl }; diff --git a/bsd/miscfs/devfs/devfs_vnops.c b/bsd/miscfs/devfs/devfs_vnops.c index 9e8c291fd..c74d145f2 100644 --- a/bsd/miscfs/devfs/devfs_vnops.c +++ b/bsd/miscfs/devfs/devfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,7 +69,6 @@ #include #include -#include #include #include #include @@ -77,17 +76,23 @@ #include #include #include -#include +#include #include +#include #include -#include +#include #include #include #include #include +#include #include "devfsdefs.h" +static int devfs_update(struct vnode *vp, struct timeval *access, + struct timeval *modify); + + /* * Convert a component of a pathname into a pointer to a locked node. * This is a very central and rather complicated routine. @@ -126,14 +131,17 @@ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent node unlocked. */ static int -devfs_lookup(struct vop_lookup_args *ap) - /*struct vop_lookup_args { +devfs_lookup(struct vnop_lookup_args *ap) + /*struct vnop_lookup_args { struct vnode * a_dvp; directory vnode ptr struct vnode ** a_vpp; where to put the result struct componentname * a_cnp; the name we want + vfs_context_t a_context; };*/ { struct componentname *cnp = ap->a_cnp; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); struct vnode *dir_vnode = ap->a_dvp; struct vnode **result_vnode = ap->a_vpp; devnode_t * dir_node; /* the directory we are searching */ @@ -141,69 +149,69 @@ devfs_lookup(struct vop_lookup_args *ap) devdirent_t * nodename; int flags = cnp->cn_flags; int op = cnp->cn_nameiop; /* LOOKUP, CREATE, RENAME, or DELETE */ - int lockparent = flags & LOCKPARENT; int wantparent = flags & (LOCKPARENT|WANTPARENT); int error = 0; - struct proc *p = cnp->cn_proc; char heldchar; /* the char at the end of the name componet */ +retry: + *result_vnode = NULL; /* safe not sorry */ /*XXX*/ - if (dir_vnode->v_usecount == 0) - printf("devfs_lookup: dir had no refs "); + //if (dir_vnode->v_usecount == 0) + //printf("devfs_lookup: dir had no refs "); dir_node = VTODN(dir_vnode); /* - * Check accessiblity of directory. + * Make sure that our node is a directory as well. */ if (dir_node->dn_type != DEV_DIR) { return (ENOTDIR); } - if ((error = VOP_ACCESS(dir_vnode, VEXEC, cnp->cn_cred, p)) != 0) { - return (error); - } - - /* temporarily terminate string component */ + DEVFS_LOCK(); + /* + * temporarily terminate string component + */ heldchar = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; - DEVFS_LOCK(p); - nodename = dev_findname(dir_node,cnp->cn_nameptr); - if (nodename) { - /* entry exists */ - node = nodename->de_dnp; - node->dn_last_lookup = nodename; /* for unlink */ - /* Do potential vnode allocation here inside the lock - * to make sure that our device node has a non-NULL dn_vn - * associated with it. The device node might otherwise - * get deleted out from under us (see devfs_dn_free()). - */ - error = devfs_dntovn(node, result_vnode, p); - } - DEVFS_UNLOCK(p); - /* restore saved character */ + + nodename = dev_findname(dir_node, cnp->cn_nameptr); + /* + * restore saved character + */ cnp->cn_nameptr[cnp->cn_namelen] = heldchar; - if (error) - return (error); + if (nodename) { + /* entry exists */ + node = nodename->de_dnp; - if (!nodename) { /* no entry */ - /* If it doesn't exist and we're not the last component, + /* Do potential vnode allocation here inside the lock + * to make sure that our device node has a non-NULL dn_vn + * associated with it. The device node might otherwise + * get deleted out from under us (see devfs_dn_free()). + */ + error = devfs_dntovn(node, result_vnode, p); + } + DEVFS_UNLOCK(); + + if (error) { + if (error == EAGAIN) + goto retry; + return error; + } + if (!nodename) { + /* + * we haven't called devfs_dntovn if we get here + * we have not taken a reference on the node.. no + * vnode_put is necessary on these error returns + * + * If it doesn't exist and we're not the last component, * or we're at the last component, but we're not creating * or renaming, return ENOENT. */ if (!(flags & ISLASTCN) || !(op == CREATE || op == RENAME)) { return ENOENT; } - /* - * Access for write is interpreted as allowing - * creation of files in the directory. - */ - if ((error = VOP_ACCESS(dir_vnode, VWRITE, - cnp->cn_cred, p)) != 0) - { - return (error); - } /* * We return with the directory locked, so that * the parameters we set up above will still be @@ -211,17 +219,16 @@ devfs_lookup(struct vop_lookup_args *ap) * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory vnode in namei_data->ni_dvp. - * The pathname buffer is saved so that the name - * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ - cnp->cn_flags |= SAVENAME; - if (!lockparent) - VOP_UNLOCK(dir_vnode, 0, p); return (EJUSTRETURN); } + /* + * from this point forward, we need to vnode_put the reference + * picked up in devfs_dntovn if we decide to return an error + */ /* * If deleting, and at end of pathname, return @@ -231,37 +238,20 @@ devfs_lookup(struct vop_lookup_args *ap) * on and lock the node, being careful with ".". */ if (op == DELETE && (flags & ISLASTCN)) { - /* - * Write access to directory required to delete files. - */ - if ((error = VOP_ACCESS(dir_vnode, VWRITE, - cnp->cn_cred, p)) != 0) - return (error); + /* * we are trying to delete '.'. What does this mean? XXX */ if (dir_node == node) { - VREF(dir_vnode); - *result_vnode = dir_vnode; - return (0); - } -#ifdef NOTYET - /* - * If directory is "sticky", then user must own - * the directory, or the file in it, else she - * may not delete it (unless she's root). This - * implements append-only directories. - */ - if ((dir_node->mode & ISVTX) && - cnp->cn_cred->cr_uid != 0 && - cnp->cn_cred->cr_uid != dir_node->uid && - cnp->cn_cred->cr_uid != node->uid) { - VOP_UNLOCK(*result_vnode, 0, p); - return (EPERM); + if (*result_vnode) { + vnode_put(*result_vnode); + *result_vnode = NULL; + } + if ( ((error = vnode_get(dir_vnode)) == 0) ) { + *result_vnode = dir_vnode; + } + return (error); } -#endif - if (!lockparent) - VOP_UNLOCK(dir_vnode, 0, p); return (0); } @@ -272,22 +262,15 @@ devfs_lookup(struct vop_lookup_args *ap) * regular file, or empty directory. */ if (op == RENAME && wantparent && (flags & ISLASTCN)) { - /* - * Are we allowed to change the holding directory? - */ - if ((error = VOP_ACCESS(dir_vnode, VWRITE, - cnp->cn_cred, p)) != 0) - return (error); + /* * Careful about locking second node. * This can only occur if the target is ".". */ - if (dir_node == node) - return (EISDIR); - /* hmm save the 'from' name (we need to delete it) */ - cnp->cn_flags |= SAVENAME; - if (!lockparent) - VOP_UNLOCK(dir_vnode, 0, p); + if (dir_node == node) { + error = EISDIR; + goto drop_ref; + } return (0); } @@ -311,294 +294,193 @@ devfs_lookup(struct vop_lookup_args *ap) * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ - if (flags & ISDOTDOT) { - VOP_UNLOCK(dir_vnode, 0, p); /* race to get the node */ - if (lockparent && (flags & ISLASTCN)) - vn_lock(dir_vnode, LK_EXCLUSIVE | LK_RETRY, p); - } else if (dir_node == node) { -#if 0 - /* - * this next statement is wrong: we already did a vget in - * devfs_dntovn(); DWS 4/16/1999 - */ - VREF(dir_vnode); /* we want ourself, ie "." */ -#endif + if ((flags & ISDOTDOT) == 0 && dir_node == node) { + if (*result_vnode) { + vnode_put(*result_vnode); + *result_vnode = NULL; + } + if ( (error = vnode_get(dir_vnode)) ) { + return (error); + } *result_vnode = dir_vnode; - } else { - if (!lockparent || (flags & ISLASTCN)) - VOP_UNLOCK(dir_vnode, 0, p); } - return (0); + +drop_ref: + if (*result_vnode) { + vnode_put(*result_vnode); + *result_vnode = NULL; + } + return (error); } static int -devfs_access(struct vop_access_args *ap) - /*struct vop_access_args { +devfs_getattr(struct vnop_getattr_args *ap) + /*struct vnop_getattr_args { struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; + struct vnode_attr *a_vap; + kauth_cred_t a_cred; struct proc *a_p; } */ { - /* - * mode is filled with a combination of VREAD, VWRITE, - * and/or VEXEC bits turned on. In an octal number these - * are the Y in 0Y00. - */ struct vnode *vp = ap->a_vp; - int mode = ap->a_mode; - struct ucred *cred = ap->a_cred; + struct vnode_attr *vap = ap->a_vap; devnode_t * file_node; - gid_t *gp; - int i; - struct proc *p = ap->a_p; + struct timeval now; file_node = VTODN(vp); - /* - * if we are not running as a process, we are in the - * kernel and we DO have permission - */ - if (p == NULL) - return 0; - /* - * Access check is based on only one of owner, group, public. - * If not owner, then check group. If not a member of the - * group, then check public access. - */ - if (cred->cr_uid != file_node->dn_uid) - { - /* failing that.. try groups */ - mode >>= 3; - gp = cred->cr_groups; - for (i = 0; i < cred->cr_ngroups; i++, gp++) - { - if (file_node->dn_gid == *gp) - { - goto found; - } - } - /* failing that.. try general access */ - mode >>= 3; -found: - ; - } - if ((file_node->dn_mode & mode) == mode) - return (0); - /* - * Root gets to do anything. - * but only use suser prives as a last resort - * (Use of super powers is recorded in ap->a_p->p_acflag) - */ - if( suser(cred, &ap->a_p->p_acflag) == 0) /* XXX what if no proc? */ - return 0; - return (EACCES); -} + DEVFS_LOCK(); -static int -devfs_getattr(struct vop_getattr_args *ap) - /*struct vop_getattr_args { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ -{ - struct vnode *vp = ap->a_vp; - struct vattr *vap = ap->a_vap; - devnode_t * file_node; - struct timeval tv; + microtime(&now); + dn_times(file_node, &now, &now, &now); + + VATTR_RETURN(vap, va_mode, file_node->dn_mode); - file_node = VTODN(vp); - tv = time; - dn_times(file_node, tv, tv); - vap->va_rdev = 0;/* default value only */ - vap->va_mode = file_node->dn_mode; switch (file_node->dn_type) { case DEV_DIR: - vap->va_rdev = (dev_t)file_node->dn_dvm; + VATTR_RETURN(vap, va_rdev, (dev_t)file_node->dn_dvm); vap->va_mode |= (S_IFDIR); break; case DEV_CDEV: - vap->va_rdev = file_node->dn_typeinfo.dev; + VATTR_RETURN(vap, va_rdev, file_node->dn_typeinfo.dev); vap->va_mode |= (S_IFCHR); break; case DEV_BDEV: - vap->va_rdev = file_node->dn_typeinfo.dev; + VATTR_RETURN(vap, va_rdev, file_node->dn_typeinfo.dev); vap->va_mode |= (S_IFBLK); break; case DEV_SLNK: + VATTR_RETURN(vap, va_rdev, 0); vap->va_mode |= (S_IFLNK); break; + default: + VATTR_RETURN(vap, va_rdev, 0); /* default value only */ } - vap->va_type = vp->v_type; - vap->va_nlink = file_node->dn_links; - vap->va_uid = file_node->dn_uid; - vap->va_gid = file_node->dn_gid; - vap->va_fsid = (int32_t)(void *)file_node->dn_dvm; - vap->va_fileid = (int32_t)(void *)file_node; - vap->va_size = file_node->dn_len; /* now a u_quad_t */ - /* this doesn't belong here */ + VATTR_RETURN(vap, va_type, vp->v_type); + VATTR_RETURN(vap, va_nlink, file_node->dn_links); + VATTR_RETURN(vap, va_uid, file_node->dn_uid); + VATTR_RETURN(vap, va_gid, file_node->dn_gid); + VATTR_RETURN(vap, va_fsid, (uintptr_t)file_node->dn_dvm); + VATTR_RETURN(vap, va_fileid, (uintptr_t)file_node); + VATTR_RETURN(vap, va_data_size, file_node->dn_len); + + /* return an override block size (advisory) */ if (vp->v_type == VBLK) - vap->va_blocksize = BLKDEV_IOSIZE; + VATTR_RETURN(vap, va_iosize, BLKDEV_IOSIZE); else if (vp->v_type == VCHR) - vap->va_blocksize = MAXPHYSIO; + VATTR_RETURN(vap, va_iosize, MAXPHYSIO); else - vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; + VATTR_RETURN(vap, va_iosize, vp->v_mount->mnt_vfsstat.f_iosize); /* if the time is bogus, set it to the boot time */ - if (file_node->dn_ctime.tv_sec == 0) - file_node->dn_ctime.tv_sec = boottime.tv_sec; + if (file_node->dn_ctime.tv_sec == 0) { + file_node->dn_ctime.tv_sec = boottime_sec(); + file_node->dn_ctime.tv_nsec = 0; + } if (file_node->dn_mtime.tv_sec == 0) - file_node->dn_mtime.tv_sec = boottime.tv_sec; + file_node->dn_mtime = file_node->dn_ctime; if (file_node->dn_atime.tv_sec == 0) - file_node->dn_atime.tv_sec = boottime.tv_sec; - vap->va_ctime = file_node->dn_ctime; - vap->va_mtime = file_node->dn_mtime; - vap->va_atime = file_node->dn_atime; - vap->va_gen = 0; - vap->va_flags = 0; - vap->va_bytes = file_node->dn_len; /* u_quad_t */ - vap->va_filerev = 0; /* XXX */ /* u_quad_t */ - vap->va_vaflags = 0; /* XXX */ + file_node->dn_atime = file_node->dn_ctime; + VATTR_RETURN(vap, va_change_time, file_node->dn_ctime); + VATTR_RETURN(vap, va_modify_time, file_node->dn_mtime); + VATTR_RETURN(vap, va_access_time, file_node->dn_atime); + VATTR_RETURN(vap, va_gen, 0); + VATTR_RETURN(vap, va_flags, 0); + VATTR_RETURN(vap, va_filerev, 0); + VATTR_RETURN(vap, va_acl, NULL); + + DEVFS_UNLOCK(); + return 0; } static int -devfs_setattr(struct vop_setattr_args *ap) - /*struct vop_setattr_args { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ +devfs_setattr(struct vnop_setattr_args *ap) + /*struct vnop_setattr_args { + struct vnode *a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ { - struct vnode *vp = ap->a_vp; - struct vattr *vap = ap->a_vap; - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; - int error = 0; - gid_t *gp; - int i; - devnode_t * file_node; - struct timeval atimeval, mtimeval; - - if (vap->va_flags != VNOVAL) /* XXX needs to be implemented */ - return (EOPNOTSUPP); - - file_node = VTODN(vp); - - if ((vap->va_type != VNON) || - (vap->va_nlink != VNOVAL) || - (vap->va_fsid != VNOVAL) || - (vap->va_fileid != VNOVAL) || - (vap->va_blocksize != VNOVAL) || - (vap->va_rdev != VNOVAL) || - (vap->va_bytes != VNOVAL) || - (vap->va_gen != VNOVAL )) - { - return EINVAL; - } - - /* - * Go through the fields and update iff not VNOVAL. - */ - if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { - if (cred->cr_uid != file_node->dn_uid && - (error = suser(cred, &p->p_acflag)) && - ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || - (error = VOP_ACCESS(vp, VWRITE, cred, p)))) - return (error); - if (vap->va_atime.tv_sec != VNOVAL) - file_node->dn_flags |= DN_ACCESS; - if (vap->va_mtime.tv_sec != VNOVAL) - file_node->dn_flags |= DN_CHANGE | DN_UPDATE; - atimeval.tv_sec = vap->va_atime.tv_sec; - atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; - mtimeval.tv_sec = vap->va_mtime.tv_sec; - mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; - if (error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1)) - return (error); - } - - /* - * Change the permissions.. must be root or owner to do this. - */ - if (vap->va_mode != (u_short)VNOVAL) { - if ((cred->cr_uid != file_node->dn_uid) - && (error = suser(cred, &p->p_acflag))) - return (error); - file_node->dn_mode &= ~07777; - file_node->dn_mode |= vap->va_mode & 07777; - } - - /* - * Change the owner.. must be root to do this. - */ - if (vap->va_uid != (uid_t)VNOVAL) { - if (error = suser(cred, &p->p_acflag)) - return (error); - file_node->dn_uid = vap->va_uid; - } - - /* - * Change the group.. must be root or owner to do this. - * If we are the owner, we must be in the target group too. - * don't use suser() unless you have to as it reports - * whether you needed suser powers or not. - */ - if (vap->va_gid != (gid_t)VNOVAL) { - if (cred->cr_uid == file_node->dn_uid){ - gp = cred->cr_groups; - for (i = 0; i < cred->cr_ngroups; i++, gp++) { - if (vap->va_gid == *gp) - goto cando; - } - } - /* - * we can't do it with normal privs, - * do we have an ace up our sleeve? - */ - if (error = suser(cred, &p->p_acflag)) - return (error); -cando: - file_node->dn_gid = vap->va_gid; - } -#if 0 - /* - * Copied from somewhere else - * but only kept as a marker and reminder of the fact that - * flags should be handled some day - */ - if (vap->va_flags != VNOVAL) { - if (error = suser(cred, &p->p_acflag)) - return error; - if (cred->cr_uid == 0) - ; - else { + struct vnode *vp = ap->a_vp; + struct vnode_attr *vap = ap->a_vap; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); + int error = 0; + devnode_t * file_node; + struct timeval atimeval, mtimeval; + + file_node = VTODN(vp); + + DEVFS_LOCK(); + /* + * Go through the fields and update if set. + */ + if (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time)) { + + + if (VATTR_IS_ACTIVE(vap, va_access_time)) + file_node->dn_access = 1; + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + file_node->dn_change = 1; + file_node->dn_update = 1; } - } -#endif + atimeval.tv_sec = vap->va_access_time.tv_sec; + atimeval.tv_usec = vap->va_access_time.tv_nsec / 1000; + mtimeval.tv_sec = vap->va_modify_time.tv_sec; + mtimeval.tv_usec = vap->va_modify_time.tv_nsec / 1000; + + if ( (error = devfs_update(vp, &atimeval, &mtimeval)) ) + goto exit; + } + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_change_time); + + /* + * Change the permissions. + */ + if (VATTR_IS_ACTIVE(vap, va_mode)) { + file_node->dn_mode &= ~07777; + file_node->dn_mode |= vap->va_mode & 07777; + } + VATTR_SET_SUPPORTED(vap, va_mode); + + /* + * Change the owner. + */ + if (VATTR_IS_ACTIVE(vap, va_uid)) + file_node->dn_uid = vap->va_uid; + VATTR_SET_SUPPORTED(vap, va_uid); + + /* + * Change the group. + */ + if (VATTR_IS_ACTIVE(vap, va_gid)) + file_node->dn_gid = vap->va_gid; + VATTR_SET_SUPPORTED(vap, va_gid); + exit: + DEVFS_UNLOCK(); + return error; } static int -devfs_read(struct vop_read_args *ap) - /*struct vop_read_args { +devfs_read(struct vnop_read_args *ap) + /* struct vnop_read_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ { devnode_t * dn_p = VTODN(ap->a_vp); switch (ap->a_vp->v_type) { case VDIR: { - dn_p->dn_flags |= DN_ACCESS; - return VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, - NULL, NULL, NULL); + dn_p->dn_access = 1; + + return VNOP_READDIR(ap->a_vp, ap->a_uio, 0, NULL, NULL, ap->a_context); } default: { printf("devfs_read(): bad file type %d", ap->a_vp->v_type); @@ -610,79 +492,90 @@ devfs_read(struct vop_read_args *ap) } static int -devfs_close(ap) - struct vop_close_args /* { +devfs_close(struct vnop_close_args *ap) + /* struct vnop_close_args { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ { struct vnode * vp = ap->a_vp; register devnode_t * dnp = VTODN(vp); + struct timeval now; - simple_lock(&vp->v_interlock); - if (vp->v_usecount > 1) - dn_times(dnp, time, time); - simple_unlock(&vp->v_interlock); + if (vnode_isinuse(vp, 1)) { + DEVFS_LOCK(); + microtime(&now); + dn_times(dnp, &now, &now, &now); + DEVFS_UNLOCK(); + } return (0); } static int -devfsspec_close(ap) - struct vop_close_args /* { +devfsspec_close(struct vnop_close_args *ap) + /* struct vnop_close_args { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ { struct vnode * vp = ap->a_vp; register devnode_t * dnp = VTODN(vp); + struct timeval now; - simple_lock(&vp->v_interlock); - if (vp->v_usecount > 1) - dn_times(dnp, time, time); - simple_unlock(&vp->v_interlock); - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); + if (vnode_isinuse(vp, 1)) { + DEVFS_LOCK(); + microtime(&now); + dn_times(dnp, &now, &now, &now); + DEVFS_UNLOCK(); + } + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap)); } static int -devfsspec_read(struct vop_read_args *ap) - /*struct vop_read_args { +devfsspec_read(struct vnop_read_args *ap) + /* struct vnop_read_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + kauth_cred_t a_cred; } */ { - VTODN(ap->a_vp)->dn_flags |= DN_ACCESS; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); + register devnode_t * dnp = VTODN(ap->a_vp); + + dnp->dn_access = 1; + + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap)); } static int -devfsspec_write(struct vop_write_args *ap) - /*struct vop_write_args { +devfsspec_write(struct vnop_write_args *ap) + /* struct vnop_write_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ { - VTODN(ap->a_vp)->dn_flags |= DN_CHANGE | DN_UPDATE; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); + register devnode_t * dnp = VTODN(ap->a_vp); + + dnp->dn_change = 1; + dnp->dn_update = 1; + + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap)); } /* * Write data to a file or directory. */ static int -devfs_write(struct vop_write_args *ap) - /*struct vop_write_args { +devfs_write(struct vnop_write_args *ap) + /* struct vnop_write_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + kauth_cred_t a_cred; } */ { switch (ap->a_vp->v_type) { @@ -696,8 +589,8 @@ devfs_write(struct vop_write_args *ap) } static int -devfs_remove(struct vop_remove_args *ap) - /*struct vop_remove_args { +devfs_remove(struct vnop_remove_args *ap) + /* struct vnop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; @@ -706,34 +599,29 @@ devfs_remove(struct vop_remove_args *ap) struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; + vfs_context_t ctx = cnp->cn_context; devnode_t * tp; devnode_t * tdp; devdirent_t * tnp; int doingdirectory = 0; int error = 0; - uid_t ouruid = cnp->cn_cred->cr_uid; - struct proc *p = cnp->cn_proc; + uid_t ouruid = kauth_cred_getuid(vfs_context_ucred(ctx)); /* - * Lock our directories and get our name pointers - * assume that the names are null terminated as they + * assume that the name is null terminated as they * are the end of the path. Get pointers to all our * devfs structures. */ tp = VTODN(vp); tdp = VTODN(dvp); - /* - * Assuming we are atomic, dev_lookup left this for us - */ - tnp = tp->dn_last_lookup; - /* - * Check we are doing legal things WRT the new flags - */ - if ((tp->dn_flags & (IMMUTABLE | APPEND)) - || (tdp->dn_flags & APPEND) /*XXX eh?*/ ) { - error = EPERM; - goto abort; + DEVFS_LOCK(); + + tnp = dev_findname(tdp, cnp->cn_nameptr); + + if (tnp == NULL) { + error = ENOENT; + goto abort; } /* @@ -754,21 +642,9 @@ devfs_remove(struct vop_remove_args *ap) /*********************************** * Start actually doing things.... * ***********************************/ - tdp->dn_flags |= DN_CHANGE | DN_UPDATE; + tdp->dn_change = 1; + tdp->dn_update = 1; - /* - * own the parent directory, or the destination of the rename, - * otherwise the destination may not be changed (except by - * root). This implements append-only directories. - * XXX shoudn't this be in generic code? - */ - if ((tdp->dn_mode & S_ISTXT) - && ouruid != 0 - && ouruid != tdp->dn_uid - && ouruid != tp->dn_uid ) { - error = EPERM; - goto abort; - } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible @@ -778,37 +654,32 @@ devfs_remove(struct vop_remove_args *ap) error = ENOTEMPTY; goto abort; } - DEVFS_LOCK(p); dev_free_name(tnp); - DEVFS_UNLOCK(p); - abort: - if (dvp == vp) - vrele(vp); - else - vput(vp); - vput(dvp); +abort: + DEVFS_UNLOCK(); + return (error); } /* */ static int -devfs_link(struct vop_link_args *ap) - /*struct vop_link_args { +devfs_link(struct vnop_link_args *ap) + /*struct vnop_link_args { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; devnode_t * fp; devnode_t * tdp; devdirent_t * tnp; int error = 0; - struct timeval tv; + struct timeval now; /* * First catch an arbitrary restriction for this FS @@ -828,71 +699,26 @@ devfs_link(struct vop_link_args *ap) fp = VTODN(vp); if (tdvp->v_mount != vp->v_mount) { - error = EXDEV; - VOP_ABORTOP(tdvp, cnp); - goto out2; - } - if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { - VOP_ABORTOP(tdvp, cnp); - goto out2; + return (EXDEV); } + DEVFS_LOCK(); - /* - * Check we are doing legal things WRT the new flags - */ - if (fp->dn_flags & (IMMUTABLE | APPEND)) { - VOP_ABORTOP(tdvp, cnp); - error = EPERM; - goto out1; - } /*********************************** * Start actually doing things.... * ***********************************/ - fp->dn_flags |= DN_CHANGE; - tv = time; - error = VOP_UPDATE(vp, &tv, &tv, 1); + fp->dn_change = 1; + + microtime(&now); + error = devfs_update(vp, &now, &now); + if (!error) { - DEVFS_LOCK(p); error = dev_add_name(cnp->cn_nameptr, tdp, NULL, fp, &tnp); - DEVFS_UNLOCK(p); } out1: - if (tdvp != vp) - VOP_UNLOCK(vp, 0, p); -out2: - vput(tdvp); - return (error); -} - -/* - * Check if source directory is in the path of the target directory. - * Target is supplied locked, source is unlocked. - * The target is always vput before returning. - */ -int -devfs_checkpath(source, target) - devnode_t *source, *target; -{ - int error = 0; - devnode_t * ntmp; - devnode_t * tmp; - struct vnode *vp; - - vp = target->dn_vn; - tmp = target; - - do { - if (tmp == source) { - error = EINVAL; - break; - } - ntmp = tmp; - } while ((tmp = tmp->dn_typeinfo.Dir.parent) != ntmp); + DEVFS_UNLOCK(); - if (vp != NULL) - vput(vp); - return (error); + return (error); } /* @@ -923,14 +749,15 @@ devfs_checkpath(source, target) * directory. */ static int -devfs_rename(struct vop_rename_args *ap) - /*struct vop_rename_args { +devfs_rename(struct vnop_rename_args *ap) + /*struct vnop_rename_args { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ { struct vnode *tvp = ap->a_tvp; @@ -939,23 +766,22 @@ devfs_rename(struct vop_rename_args *ap) struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; - struct proc *p = fcnp->cn_proc; devnode_t *fp, *fdp, *tp, *tdp; devdirent_t *fnp,*tnp; int doingdirectory = 0; int error = 0; - struct timeval tv; + struct timeval now; + DEVFS_LOCK(); /* * First catch an arbitrary restriction for this FS */ - if(tcnp->cn_namelen > DEVMAXNAMESIZE) { + if (tcnp->cn_namelen > DEVMAXNAMESIZE) { error = ENAMETOOLONG; - goto abortit; + goto out; } /* - * Lock our directories and get our name pointers * assume that the names are null terminated as they * are the end of the path. Get pointers to all our * devfs structures. @@ -963,47 +789,26 @@ devfs_rename(struct vop_rename_args *ap) tdp = VTODN(tdvp); fdp = VTODN(fdvp); fp = VTODN(fvp); - fnp = fp->dn_last_lookup; + + fnp = dev_findname(fdp, fcnp->cn_nameptr); + + if (fnp == NULL) { + error = ENOENT; + goto out; + } tp = NULL; tnp = NULL; + if (tvp) { - tp = VTODN(tvp); - tnp = tp->dn_last_lookup; - } - - /* - * trying to move it out of devfs? - * if we move a dir across mnt points. we need to fix all - * the mountpoint pointers! XXX - * so for now keep dirs within the same mount - */ - if ((fvp->v_mount != tdvp->v_mount) || - (tvp && (fvp->v_mount != tvp->v_mount))) { - error = EXDEV; -abortit: - VOP_ABORTOP(tdvp, tcnp); - if (tdvp == tvp) /* eh? */ - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ - vrele(fdvp); - vrele(fvp); - return (error); - } + tnp = dev_findname(tdp, tcnp->cn_nameptr); - /* - * Check we are doing legal things WRT the new flags - */ - if ((tp && (tp->dn_flags & (IMMUTABLE | APPEND))) - || (fp->dn_flags & (IMMUTABLE | APPEND)) - || (fdp->dn_flags & APPEND)) { - error = EPERM; - goto abortit; + if (tnp == NULL) { + error = ENOENT; + goto out; + } + tp = VTODN(tvp); } - + /* * Make sure that we don't try do something stupid */ @@ -1017,7 +822,7 @@ abortit: || (tcnp->cn_flags&ISDOTDOT) || (tdp == fp )) { error = EINVAL; - goto abortit; + goto out; } doingdirectory++; } @@ -1032,7 +837,6 @@ abortit: */ if (doingdirectory && (tdp != fdp)) { devnode_t * tmp, *ntmp; - error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); tmp = tdp; do { if(tmp == fp) { @@ -1047,11 +851,11 @@ abortit: /*********************************** * Start actually doing things.... * ***********************************/ - fp->dn_flags |= DN_CHANGE; - tv = time; - if (error = VOP_UPDATE(fvp, &tv, &tv, 1)) { - VOP_UNLOCK(fvp, 0, p); - goto bad; + fp->dn_change = 1; + microtime(&now); + + if ( (error = devfs_update(fvp, &now, &now)) ) { + goto out; } /* * Check if just deleting a link name. @@ -1059,24 +863,14 @@ abortit: if (fvp == tvp) { if (fvp->v_type == VDIR) { error = EINVAL; - goto abortit; + goto out; } - /* Release destination completely. */ - VOP_ABORTOP(tdvp, tcnp); - vput(tdvp); - vput(tvp); - - /* Delete source. */ - VOP_ABORTOP(fdvp, fcnp); /*XXX*/ - vrele(fdvp); - vrele(fvp); dev_free_name(fnp); + + DEVFS_UNLOCK(); return 0; } - - vrele(fdvp); - /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before @@ -1088,29 +882,15 @@ abortit: * We could do that as well but won't */ if (tp) { - int ouruid = tcnp->cn_cred->cr_uid; - /* - * If the parent directory is "sticky", then the user must - * own the parent directory, or the destination of the rename, - * otherwise the destination may not be changed (except by - * root). This implements append-only directories. - * XXX shoudn't this be in generic code? - */ - if ((tdp->dn_mode & S_ISTXT) - && ouruid != 0 - && ouruid != tdp->dn_uid - && ouruid != tp->dn_uid ) { - error = EPERM; - goto bad; - } + int ouruid = kauth_cred_getuid(vfs_context_ucred(tcnp->cn_context)); /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (( doingdirectory) && (tp->dn_links > 2)) { - error = ENOTEMPTY; - goto bad; + error = ENOTEMPTY; + goto bad; } dev_free_name(tnp); tp = NULL; @@ -1118,140 +898,112 @@ abortit: dev_add_name(tcnp->cn_nameptr,tdp,NULL,fp,&tnp); fnp->de_dnp = NULL; fp->dn_links--; /* one less link to it.. */ - dev_free_name(fnp); - fp->dn_links--; /* we added one earlier*/ - if (tdp) - vput(tdvp); - if (tp) - vput(fvp); - vrele(fvp); - return (error); + dev_free_name(fnp); bad: - if (tp) - vput(tvp); - vput(tdvp); + fp->dn_links--; /* we added one earlier*/ out: - if (vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p) == 0) { - fp->dn_links--; /* we added one earlier*/ - vput(fvp); - } else - vrele(fvp); + DEVFS_UNLOCK(); return (error); } static int -devfs_symlink(struct vop_symlink_args *ap) - /*struct vop_symlink_args { +devfs_symlink(struct vnop_symlink_args *ap) + /*struct vnop_symlink_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; char *a_target; + vfs_context_t a_context; } */ { struct componentname * cnp = ap->a_cnp; - struct vnode *vp = NULL; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); int error = 0; devnode_t * dir_p; devnode_type_t typeinfo; devdirent_t * nm_p; devnode_t * dev_p; - struct vattr * vap = ap->a_vap; + struct vnode_attr * vap = ap->a_vap; struct vnode * * vpp = ap->a_vpp; - struct proc *p = cnp->cn_proc; - struct timeval tv; dir_p = VTODN(ap->a_dvp); typeinfo.Slnk.name = ap->a_target; typeinfo.Slnk.namelen = strlen(ap->a_target); - DEVFS_LOCK(p); + + DEVFS_LOCK(); error = dev_add_entry(cnp->cn_nameptr, dir_p, DEV_SLNK, &typeinfo, NULL, NULL, &nm_p); - DEVFS_UNLOCK(p); if (error) { goto failure; } - dev_p = nm_p->de_dnp; dev_p->dn_uid = dir_p->dn_uid; dev_p->dn_gid = dir_p->dn_gid; dev_p->dn_mode = vap->va_mode; dn_copy_times(dev_p, dir_p); + error = devfs_dntovn(dev_p, vpp, p); - if (error) - goto failure; - vp = *vpp; - vput(vp); failure: - if ((cnp->cn_flags & SAVESTART) == 0) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - vput(ap->a_dvp); + DEVFS_UNLOCK(); + return error; } /* * Mknod vnode call */ -/* ARGSUSED */ -int -devfs_mknod(ap) - struct vop_mknod_args /* { +static int +devfs_mknod(struct vnop_mknod_args *ap) + /* struct vnop_mknod_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ { struct componentname * cnp = ap->a_cnp; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); devnode_t * dev_p; devdirent_t * devent; devnode_t * dir_p; /* devnode for parent directory */ struct vnode * dvp = ap->a_dvp; int error = 0; devnode_type_t typeinfo; - struct vattr * vap = ap->a_vap; + struct vnode_attr * vap = ap->a_vap; struct vnode ** vpp = ap->a_vpp; - struct proc * p = cnp->cn_proc; *vpp = NULL; - if (!vap->va_type == VBLK && !vap->va_type == VCHR) { - error = EINVAL; /* only support mknod of special files */ - goto failure; + if (!(vap->va_type == VBLK) && !(vap->va_type == VCHR)) { + return (EINVAL); /* only support mknod of special files */ } dir_p = VTODN(dvp); typeinfo.dev = vap->va_rdev; - DEVFS_LOCK(p); + + DEVFS_LOCK(); error = dev_add_entry(cnp->cn_nameptr, dir_p, (vap->va_type == VBLK) ? DEV_BDEV : DEV_CDEV, &typeinfo, NULL, NULL, &devent); - DEVFS_UNLOCK(p); if (error) { - goto failure; + goto failure; } dev_p = devent->de_dnp; error = devfs_dntovn(dev_p, vpp, p); if (error) - goto failure; - dev_p->dn_uid = cnp->cn_cred->cr_uid; - dev_p->dn_gid = dir_p->dn_gid; + goto failure; + dev_p->dn_uid = vap->va_uid; + dev_p->dn_gid = vap->va_gid; dev_p->dn_mode = vap->va_mode; + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_mode); failure: - if (*vpp) { - vput(*vpp); - *vpp = 0; - } - if ((cnp->cn_flags & SAVESTART) == 0) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - vput(dvp); + DEVFS_UNLOCK(); + return (error); } @@ -1259,14 +1011,14 @@ failure: * Vnode op for readdir */ static int -devfs_readdir(struct vop_readdir_args *ap) - /*struct vop_readdir_args { +devfs_readdir(struct vnop_readdir_args *ap) + /*struct vnop_readdir_args { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; - int *eofflag; - int *ncookies; - u_int **cookies; + int a_flags; + int *a_eofflag; + int *a_numdirent; + vfs_context_t a_context; } */ { struct vnode *vp = ap->a_vp; @@ -1279,21 +1031,25 @@ devfs_readdir(struct vop_readdir_args *ap) int reclen; int nodenumber; int startpos,pos; - struct proc * p = uio->uio_procp; + + if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) + return (EINVAL); /* set up refs to dir */ dir_node = VTODN(vp); - if(dir_node->dn_type != DEV_DIR) + if (dir_node->dn_type != DEV_DIR) return(ENOTDIR); - pos = 0; startpos = uio->uio_offset; - DEVFS_LOCK(p); + + DEVFS_LOCK(); + name_node = dir_node->dn_typeinfo.Dir.dirlist; nodenumber = 0; - dir_node->dn_flags |= DN_ACCESS; - while ((name_node || (nodenumber < 2)) && (uio->uio_resid > 0)) + dir_node->dn_access = 1; + + while ((name_node || (nodenumber < 2)) && (uio_resid(uio) > 0)) { switch(nodenumber) { @@ -1341,7 +1097,7 @@ devfs_readdir(struct vop_readdir_args *ap) if(pos >= startpos) /* made it to the offset yet? */ { - if (uio->uio_resid < reclen) /* will it fit? */ + if (uio_resid(uio) < reclen) /* will it fit? */ break; strcpy( dirent.d_name,name); if ((error = uiomove ((caddr_t)&dirent, @@ -1353,7 +1109,7 @@ devfs_readdir(struct vop_readdir_args *ap) name_node = name_node->de_next; nodenumber++; } - DEVFS_UNLOCK(p); + DEVFS_UNLOCK(); uio->uio_offset = pos; return (error); @@ -1363,11 +1119,11 @@ devfs_readdir(struct vop_readdir_args *ap) /* */ static int -devfs_readlink(struct vop_readlink_args *ap) - /*struct vop_readlink_args { +devfs_readlink(struct vnop_readlink_args *ap) + /*struct vnop_readlink_args { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ { struct vnode *vp = ap->a_vp; @@ -1377,25 +1133,28 @@ devfs_readlink(struct vop_readlink_args *ap) /* set up refs to dir */ lnk_node = VTODN(vp); - if(lnk_node->dn_type != DEV_SLNK) - return(EINVAL); - if ((error = VOP_ACCESS(vp, VREAD, ap->a_cred, NULL)) != 0) { /* XXX */ - return error; + + if (lnk_node->dn_type != DEV_SLNK) { + error = EINVAL; + goto out; } error = uiomove(lnk_node->dn_typeinfo.Slnk.name, lnk_node->dn_typeinfo.Slnk.namelen, uio); +out: return error; } static int -devfs_reclaim(struct vop_reclaim_args *ap) - /*struct vop_reclaim_args { +devfs_reclaim(struct vnop_reclaim_args *ap) + /*struct vnop_reclaim_args { struct vnode *a_vp; } */ { struct vnode * vp = ap->a_vp; devnode_t * dnp = VTODN(vp); + DEVFS_LOCK(); + if (dnp) { /* * do the same as devfs_inactive in case it is not called @@ -1403,78 +1162,99 @@ devfs_reclaim(struct vop_reclaim_args *ap) */ dnp->dn_vn = NULL; vp->v_data = NULL; + if (dnp->dn_delete) { devnode_free(dnp); } } + DEVFS_UNLOCK(); + return(0); } + /* - * Print out the contents of a /devfs vnode. + * Get configurable pathname variables. */ static int -devfs_print(struct vop_print_args *ap) - /*struct vop_print_args { +devs_vnop_pathconf( + struct vnop_pathconf_args /* { struct vnode *a_vp; - } */ + int a_name; + int *a_retval; + vfs_context_t a_context; + } */ *ap) { + switch (ap->a_name) { + case _PC_LINK_MAX: + /* arbitrary limit matching HFS; devfs has no hard limit */ + *ap->a_retval = 32767; + break; + case _PC_NAME_MAX: + *ap->a_retval = DEVMAXNAMESIZE - 1; /* includes NUL */ + break; + case _PC_PATH_MAX: + *ap->a_retval = DEVMAXPATHSIZE - 1; /* XXX nonconformant */ + break; + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + break; + case _PC_NO_TRUNC: + *ap->a_retval = 0; + break; + case _PC_CASE_SENSITIVE: + *ap->a_retval = 1; + break; + case _PC_CASE_PRESERVING: + *ap->a_retval = 1; + break; + default: + return (EINVAL); + } return (0); } + + /**************************************************************************\ * pseudo ops * \**************************************************************************/ /* * - * struct vop_inactive_args { + * struct vnop_inactive_args { * struct vnode *a_vp; - * struct proc *a_p; + * vfs_context_t a_context; * } */ static int -devfs_inactive(struct vop_inactive_args *ap) +devfs_inactive(__unused struct vnop_inactive_args *ap) { - struct vnode * vp = ap->a_vp; - devnode_t * dnp = VTODN(vp); - - if (dnp) { - dnp->dn_vn = NULL; - vp->v_data = NULL; - if (dnp->dn_delete) { - devnode_free(dnp); - } - } - VOP_UNLOCK(vp, 0, ap->a_p); return (0); } -int -devfs_update(ap) - struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; - } */ *ap; +/* + * called with DEVFS_LOCK held + */ +static int +devfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify) { - register struct fs *fs; - int error; devnode_t * ip; + struct timeval now; + + ip = VTODN(vp); + if (vp->v_mount->mnt_flag & MNT_RDONLY) { + ip->dn_access = 0; + ip->dn_change = 0; + ip->dn_update = 0; - ip = VTODN(ap->a_vp); - if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) { - ip->dn_flags &= - ~(DN_ACCESS | DN_CHANGE | DN_MODIFIED | DN_UPDATE); return (0); } - if ((ip->dn_flags & - (DN_ACCESS | DN_CHANGE | DN_MODIFIED | DN_UPDATE)) == 0) - return (0); - dn_times(ip, time, time); + microtime(&now); + dn_times(ip, access, modify, &now); + return (0); } @@ -1483,57 +1263,42 @@ devfs_update(ap) /* The following ops are used by directories and symlinks */ int (**devfs_vnodeop_p)(void *); static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)devfs_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)err_create }, /* create */ - { &vop_whiteout_desc, (VOPFUNC)err_whiteout }, /* whiteout */ - { &vop_mknod_desc, (VOPFUNC)devfs_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)nop_open }, /* open */ - { &vop_close_desc, (VOPFUNC)devfs_close }, /* close */ - { &vop_access_desc, (VOPFUNC)devfs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)devfs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)devfs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)devfs_read }, /* read */ - { &vop_write_desc, (VOPFUNC)devfs_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)nop_lease }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)err_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)err_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)err_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)nop_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)err_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)devfs_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)devfs_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)devfs_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)devfs_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)devfs_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)nop_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)devfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)devfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)nop_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)nop_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)err_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)err_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)err_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)nop_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)err_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)err_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)err_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)err_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)err_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)devfs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)err_bwrite }, - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)err_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)devfs_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ + { &vnop_whiteout_desc, (VOPFUNC)err_whiteout }, /* whiteout */ + { &vnop_mknod_desc, (VOPFUNC)devfs_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)nop_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)devfs_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)devfs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)devfs_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)devfs_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)devfs_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)err_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)err_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)err_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)nop_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)devfs_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)devfs_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)devfs_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)devfs_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)devfs_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)devfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)devfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)err_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)devs_vnop_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)err_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)err_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc devfs_vnodeop_opv_desc = @@ -1542,57 +1307,42 @@ struct vnodeopv_desc devfs_vnodeop_opv_desc = /* The following ops are used by the device nodes */ int (**devfs_spec_vnodeop_p)(void *); static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)devfsspec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)devfs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)devfs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)devfs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)devfsspec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)devfsspec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)spec_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)spec_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)devfs_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)spec_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)devfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)devfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)nop_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)nop_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)devfs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)nop_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)spec_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)spec_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)spec_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)nop_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)spec_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)devfs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ - { &vop_blktooff_desc, (VOPFUNC)spec_offtoblk }, /* blkofftoblk */ - { &vop_cmap_desc, (VOPFUNC)spec_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)devfsspec_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)devfs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)devfs_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)devfsspec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)devfsspec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)devfs_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)devfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)devfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ + { &vnop_blktooff_desc, (VOPFUNC)spec_offtoblk }, /* blkofftoblk */ + { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc devfs_spec_vnodeop_opv_desc = diff --git a/bsd/miscfs/devfs/devfsdefs.h b/bsd/miscfs/devfs/devfsdefs.h index 6b00a76c5..b104d6927 100644 --- a/bsd/miscfs/devfs/devfsdefs.h +++ b/bsd/miscfs/devfs/devfsdefs.h @@ -101,12 +101,6 @@ union devnode_type { }Slnk; }; -#define DN_ACCESS 0x0001 /* Access time update request. */ -#define DN_CHANGE 0x0002 /* Inode change time update request. */ -#define DN_UPDATE 0x0004 /* Modification time update request. */ -#define DN_MODIFIED 0x0008 /* Inode has been modified. */ -#define DN_RENAME 0x0010 /* Inode is being renamed. */ - struct devnode { devfstype_t dn_type; @@ -123,13 +117,22 @@ struct devnode struct vnode * dn_vn; /* address of last vnode that represented us */ int dn_len; /* of any associated info (e.g. dir data) */ devdirent_t * dn_linklist;/* circular list of hardlinks to this node */ - devdirent_t * dn_last_lookup; /* name I was last looked up from */ devnode_t * dn_nextsibling; /* the list of equivalent nodes */ devnode_t * * dn_prevsiblingp;/* backpointer for the above */ devnode_type_t dn_typeinfo; int dn_delete; /* mark for deletion */ + int dn_change; + int dn_update; + int dn_access; + int dn_lflags; }; +#define DN_BUSY 0x01 +#define DN_DELETE 0x02 +#define DN_CREATE 0x04 +#define DN_CREATEWAIT 0x08 + + struct devdirent { /*-----------------------directory entry fields-------------*/ @@ -143,8 +146,8 @@ struct devdirent }; extern devdirent_t * dev_root; -extern struct lock__bsd__ devfs_lock; extern struct devfs_stats devfs_stats; +extern lck_mtx_t devfs_mutex; /* * Rules for front nodes: @@ -179,90 +182,80 @@ struct devfsmount #define VTODN(vp) ((devnode_t *)(vp)->v_data) -static __inline__ int -DEVFS_LOCK(struct proc * p) -{ - return (lockmgr(&devfs_lock, LK_EXCLUSIVE, NULL, p)); -} +#define DEVFS_LOCK() lck_mtx_lock(&devfs_mutex) + +#define DEVFS_UNLOCK() lck_mtx_unlock(&devfs_mutex) + -static __inline__ int -DEVFS_UNLOCK(struct proc * p) -{ - return (lockmgr(&devfs_lock, LK_RELEASE, NULL, p)); -} static __inline__ void DEVFS_INCR_ENTRIES() { - devfs_stats.entries++; + OSAddAtomic(1, &devfs_stats.entries); } static __inline__ void DEVFS_DECR_ENTRIES() { - devfs_stats.entries--; + OSAddAtomic(-1, &devfs_stats.entries); } static __inline__ void DEVFS_INCR_NODES() { - devfs_stats.nodes++; + OSAddAtomic(1, &devfs_stats.nodes); } static __inline__ void DEVFS_DECR_NODES() { - devfs_stats.nodes--; + OSAddAtomic(-1, &devfs_stats.nodes); } static __inline__ void DEVFS_INCR_MOUNTS() { - devfs_stats.mounts++; + OSAddAtomic(1, &devfs_stats.mounts); } static __inline__ void DEVFS_DECR_MOUNTS() { - devfs_stats.mounts--; + OSAddAtomic(-1, &devfs_stats.mounts); } static __inline__ void DEVFS_INCR_STRINGSPACE(int space) { - devfs_stats.stringspace += space; + OSAddAtomic(space, &devfs_stats.stringspace); } static __inline__ void DEVFS_DECR_STRINGSPACE(int space) { - devfs_stats.stringspace -= space; - if (devfs_stats.stringspace < 0) { - printf("DEVFS_DECR_STRINGSPACE: (%d - %d < 0)\n", - devfs_stats.stringspace + space, space); - devfs_stats.stringspace = 0; - } + OSAddAtomic(-space, &devfs_stats.stringspace); } static __inline__ void -dn_times(devnode_t * dnp, struct timeval t1, struct timeval t2) +dn_times(devnode_t * dnp, struct timeval *t1, struct timeval *t2, struct timeval *t3) { - if (dnp->dn_flags & (DN_ACCESS | DN_CHANGE | DN_UPDATE)) { - if (dnp->dn_flags & DN_ACCESS) { - dnp->dn_atime.tv_sec = t1.tv_sec; - dnp->dn_atime.tv_nsec = t1.tv_usec * 1000; + if (dnp->dn_access) { + dnp->dn_atime.tv_sec = t1->tv_sec; + dnp->dn_atime.tv_nsec = t1->tv_usec * 1000; + dnp->dn_access = 0; } - if (dnp->dn_flags & DN_UPDATE) { - dnp->dn_mtime.tv_sec = t2.tv_sec; - dnp->dn_mtime.tv_nsec = t2.tv_usec * 1000; + if (dnp->dn_update) { + dnp->dn_mtime.tv_sec = t2->tv_sec; + dnp->dn_mtime.tv_nsec = t2->tv_usec * 1000; + dnp->dn_update = 0; } - if (dnp->dn_flags & DN_CHANGE) { - dnp->dn_ctime.tv_sec = time.tv_sec; - dnp->dn_ctime.tv_nsec = time.tv_usec * 1000; + if (dnp->dn_change) { + dnp->dn_ctime.tv_sec = t3->tv_sec; + dnp->dn_ctime.tv_nsec = t3->tv_usec * 1000; + dnp->dn_change = 0; } - dnp->dn_flags &= ~(DN_ACCESS | DN_CHANGE | DN_UPDATE); - } - return; + + return; } static __inline__ void diff --git a/bsd/miscfs/fdesc/fdesc.h b/bsd/miscfs/fdesc/fdesc.h index 63a330513..b4141e2c1 100644 --- a/bsd/miscfs/fdesc/fdesc.h +++ b/bsd/miscfs/fdesc/fdesc.h @@ -88,18 +88,33 @@ typedef enum { struct fdescnode { LIST_ENTRY(fdescnode) fd_hash; /* Hash list */ struct vnode *fd_vnode; /* Back ptr to vnode */ - fdntype fd_type; /* Type of this node */ - unsigned fd_fd; /* Fd to be dup'ed */ - char *fd_link; /* Link to fd/n */ - int fd_ix; /* filesystem index */ + fdntype fd_type; /* Type of this node */ + unsigned fd_fd; /* Fd to be dup'ed */ + char *fd_link; /* Link to fd/n */ + int fd_ix; /* filesystem index */ }; #define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data)) #define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data) -extern int fdesc_init __P((struct vfsconf *)); -extern int fdesc_root __P((struct mount *, struct vnode **)); -extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **)); +extern int fdesc_allocvp(fdntype, int, struct mount *, struct vnode **, enum vtype); +extern int fdesc_badop(void); +extern int fdesc_getattr(struct vnop_getattr_args *ap); +extern int fdesc_inactive(struct vnop_inactive_args *ap); +extern int fdesc_init(struct vfsconf *); +extern int fdesc_ioctl(struct vnop_ioctl_args *ap); +extern int fdesc_lookup(struct vnop_lookup_args *ap); +extern int fdesc_open(struct vnop_open_args *ap); +extern int fdesc_pathconf(struct vnop_pathconf_args *ap); +extern int fdesc_read(struct vnop_read_args *ap); +extern int fdesc_readdir(struct vnop_readdir_args *ap); +extern int fdesc_readlink(struct vnop_readlink_args *ap); +extern int fdesc_reclaim(struct vnop_reclaim_args *ap); +extern int fdesc_root(struct mount *, struct vnode **, vfs_context_t); +extern int fdesc_select(struct vnop_select_args *ap); +extern int fdesc_setattr(struct vnop_setattr_args *ap); +extern int fdesc_write(struct vnop_write_args *ap); + extern int (**fdesc_vnodeop_p)(void *); extern struct vfsops fdesc_vfsops; #endif /* KERNEL */ diff --git a/bsd/miscfs/fdesc/fdesc_vfsops.c b/bsd/miscfs/fdesc/fdesc_vfsops.c index 53c3d75a9..b0173ec44 100644 --- a/bsd/miscfs/fdesc/fdesc_vfsops.c +++ b/bsd/miscfs/fdesc/fdesc_vfsops.c @@ -67,11 +67,11 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #include #include @@ -80,15 +80,9 @@ * Mount the per-process file descriptors (/dev/fd) */ int -fdesc_mount(mp, path, data, ndp, p) - struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +fdesc_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context) { int error = 0; - size_t size; struct fdescmount *fmp; struct vnode *rvp; @@ -96,67 +90,68 @@ fdesc_mount(mp, path, data, ndp, p) * Update is a no-op */ if (mp->mnt_flag & MNT_UPDATE) - return (EOPNOTSUPP); + return (ENOTSUP); - error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp); + error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp, VDIR); if (error) return (error); MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount), M_UFSMNT, M_WAITOK); /* XXX */ - rvp->v_type = VDIR; - rvp->v_flag |= VROOT; + + vnode_setnoflush(rvp); + vnode_ref(rvp); + vnode_put(rvp); + fmp->f_root = rvp; /* XXX -- don't mark as local to work around fts() problems */ /*mp->mnt_flag |= MNT_LOCAL;*/ mp->mnt_data = (qaddr_t) fmp; vfs_getnewfsid(mp); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); - bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc")); + bzero(mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN); + bcopy("fdesc", mp->mnt_vfsstat.f_mntfromname, sizeof("fdesc")); return (0); } int -fdesc_start(mp, flags, p) +fdesc_start(mp, flags, context) struct mount *mp; int flags; - struct proc *p; + vfs_context_t context; { return (0); } int -fdesc_unmount(mp, mntflags, p) +fdesc_unmount(mp, mntflags, context) struct mount *mp; int mntflags; - struct proc *p; + vfs_context_t context; { int error; int flags = 0; int force = 0; - struct vnode *rootvp = VFSTOFDESC(mp)->f_root; + struct vnode *rvp = VFSTOFDESC(mp)->f_root; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; force = 1; } - if ( (rootvp->v_usecount > 1) && !force ) + if ( vnode_isinuse(rvp, 1) && !force ) return (EBUSY); - if ( (error = vflush(mp, rootvp, flags)) && !force ) + if ( (error = vflush(mp, rvp, flags|SKIPSYSTEM)) && !force ) return (error); /* - * Release reference on underlying root vnode + * And mark for recycle after we drop its reference; it away for future re-use */ - vrele(rootvp); + vnode_recycle(rvp); /* - * And blow it away for future re-use + * Release reference on underlying root vnode */ - vgone(rootvp); + vnode_rele(rvp); /* * Finally, throw away the fdescmount structure */ @@ -167,29 +162,29 @@ fdesc_unmount(mp, mntflags, p) } int -fdesc_root(mp, vpp) +fdesc_root(mp, vpp, context) struct mount *mp; struct vnode **vpp; + vfs_context_t context; { - struct proc *p = current_proc(); /* XXX */ struct vnode *vp; /* * Return locked reference to root. */ vp = VFSTOFDESC(mp)->f_root; - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + vnode_get(vp); *vpp = vp; return (0); } int -fdesc_statfs(mp, sbp, p) +fdesc_statfs(mp, sbp, context) struct mount *mp; - struct statfs *sbp; - struct proc *p; + struct vfsstatfs *sbp; + vfs_context_t context; { + struct proc *p = vfs_context_proc(context); struct filedesc *fdp; int lim; int i; @@ -221,50 +216,94 @@ fdesc_statfs(mp, sbp, p) sbp->f_flags = 0; sbp->f_bsize = DEV_BSIZE; sbp->f_iosize = DEV_BSIZE; - sbp->f_blocks = 2; /* 1K to keep df happy */ + sbp->f_blocks = (uint64_t)2; /* 1K to keep df happy */ sbp->f_bfree = 0; sbp->f_bavail = 0; - sbp->f_files = lim + 1; /* Allow for "." */ - sbp->f_ffree = freefd; /* See comments above */ - if (sbp != &mp->mnt_stat) { - sbp->f_type = mp->mnt_vfc->vfc_typenum; - bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); - bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); - bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); - } + sbp->f_files = (uint64_t)((unsigned long)(lim + 1)); /* Allow for "." */ + sbp->f_ffree = (uint64_t)((unsigned long)freefd); /* See comments above */ + return (0); } +static int +fdesc_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) +{ + VFSATTR_RETURN(fsap, f_bsize, DEV_BSIZE); + VFSATTR_RETURN(fsap, f_iosize, DEV_BSIZE); + VFSATTR_RETURN(fsap, f_blocks, 2); + VFSATTR_RETURN(fsap, f_bfree, 0); + VFSATTR_RETURN(fsap, f_bavail, 0); + VFSATTR_RETURN(fsap, f_fssubtype, 0); + + if (VFSATTR_IS_ACTIVE(fsap, f_objcount) || + VFSATTR_IS_ACTIVE(fsap, f_maxobjcount) || + VFSATTR_IS_ACTIVE(fsap, f_files) || + VFSATTR_IS_ACTIVE(fsap, f_ffree)) + { + struct proc *p = vfs_context_proc(context); + struct filedesc *fdp; + int lim; + int i; + int last; + int freefd; + + /* + * Compute number of free file descriptors. + * [ Strange results will ensue if the open file + * limit is ever reduced below the current number + * of open files... ] + */ + lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; + fdp = p->p_fd; + last = min(fdp->fd_nfiles, lim); + freefd = 0; + for (i = fdp->fd_freefile; i < last; i++) + if (fdp->fd_ofiles[i] == NULL && + !(fdp->fd_ofileflags[i] & UF_RESERVED)) + freefd++; + + /* + * Adjust for the fact that the fdesc array may not + * have been fully allocated yet. + */ + if (fdp->fd_nfiles < lim) + freefd += (lim - fdp->fd_nfiles); + + VFSATTR_RETURN(fsap, f_objcount, lim+1); + VFSATTR_RETURN(fsap, f_maxobjcount, lim+1); + VFSATTR_RETURN(fsap, f_files, lim+1); + VFSATTR_RETURN(fsap, f_ffree, freefd); + } + + return 0; +} + int -fdesc_sync(mp, waitfor) +fdesc_sync(mp, waitfor, context) struct mount *mp; int waitfor; + vfs_context_t context; { return (0); } -#define fdesc_fhtovp ((int (*) __P((struct mount *, struct fid *, \ - struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) -#define fdesc_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ - struct proc *)))eopnotsupp) -#define fdesc_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ - size_t, struct proc *)))eopnotsupp) -#define fdesc_vget ((int (*) __P((struct mount *, void *, struct vnode **))) \ - eopnotsupp) -#define fdesc_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) +#define fdesc_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp +#define fdesc_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp +#define fdesc_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp +#define fdesc_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp struct vfsops fdesc_vfsops = { fdesc_mount, fdesc_start, fdesc_unmount, fdesc_root, - fdesc_quotactl, - fdesc_statfs, + NULL, /* quotactl */ + fdesc_vfs_getattr, fdesc_sync, fdesc_vget, fdesc_fhtovp, fdesc_vptofh, fdesc_init, - fdesc_sysctl, + fdesc_sysctl }; diff --git a/bsd/miscfs/fdesc/fdesc_vnops.c b/bsd/miscfs/fdesc/fdesc_vnops.c index 3f11d10c6..185a74c61 100644 --- a/bsd/miscfs/fdesc/fdesc_vnops.c +++ b/bsd/miscfs/fdesc/fdesc_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,21 +67,25 @@ #include #include #include -#include +#include #include /* boottime */ #include #include -#include +#include +#include #include -#include +#include #include -#include +#include #include -#include #include #include +#include +#include +#include #include #include +#include #define FDL_WANT 0x01 #define FDL_LOCKED 0x02 @@ -99,35 +103,54 @@ FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2 LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; u_long fdhash; +static int fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context); + + /* * Initialise cache headers */ -fdesc_init(vfsp) - struct vfsconf *vfsp; +int +fdesc_init(__unused struct vfsconf *vfsp) { fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); + + return( 0 ); } int -fdesc_allocvp(ftype, ix, mp, vpp) +fdesc_allocvp(ftype, ix, mp, vpp, vtype) fdntype ftype; int ix; struct mount *mp; struct vnode **vpp; + enum vtype vtype; { - struct proc *p = current_proc(); /* XXX */ struct fdhashhead *fc; struct fdescnode *fd; int error = 0; + int vid = 0; + struct vnode_fsparam vfsp; fc = FD_NHASH(ix); loop: for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) { - if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { - if (vget(fd->fd_vnode, 0, p)) + if (fd->fd_ix == ix && vnode_mount(fd->fd_vnode) == mp) { + /* + * doing a vnode_getwithvid isn't technically + * necessary since fdesc is an unsafe filesystem + * and we're running behind a funnel at this point + * however, vnode_get always succeeds, which isn't + * what we want if this vnode is in the process of + * being terminated + */ + vid = vnode_vid(fd->fd_vnode); + + if (vnode_getwithvid(fd->fd_vnode, vid)) goto loop; *vpp = fd->fd_vnode; + (*vpp)->v_type = vtype; + return (error); } } @@ -144,12 +167,29 @@ loop: fdcache_lock |= FDL_LOCKED; MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); - error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp); + + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "fdesc"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = fd; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = fdesc_vnodeop_p; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + vfsp.vnfs_marksystem = 0; + if (ftype == Froot) + vfsp.vnfs_markroot = 1; + else + vfsp.vnfs_markroot = 0; + + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp); if (error) { FREE(fd, M_TEMP); goto out; } - (*vpp)->v_data = fd; + (*vpp)->v_tag = VT_FDESC; fd->fd_vnode = *vpp; fd->fd_type = ftype; fd->fd_fd = -1; @@ -174,28 +214,30 @@ out: */ int fdesc_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; + vfs_context_t a_context; } */ *ap; { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; char *pname = cnp->cn_nameptr; - struct proc *p = cnp->cn_proc; - int nfiles = p->p_fd->fd_nfiles; - unsigned fd; + struct proc *p = vfs_context_proc(ap->a_context); + int numfiles = p->p_fd->fd_nfiles; + int fd; int error; struct vnode *fvp; char *ln; - VOP_UNLOCK(dvp, 0, p); if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; - VREF(dvp); - vn_lock(dvp, LK_SHARED | LK_RETRY, p); + + if ( (error = vnode_get(dvp)) ) { + return(error); + } return (0); } @@ -203,21 +245,20 @@ fdesc_lookup(ap) default: case Flink: case Fdesc: + /* should never happen */ error = ENOTDIR; goto bad; case Froot: if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) { - error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp); + error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp, VDIR); if (error) goto bad; *vpp = fvp; - fvp->v_type = VDIR; - vn_lock(fvp, LK_SHARED | LK_RETRY, p); return (0); } - ln = 0; + ln = NULL; switch (cnp->cn_namelen) { case 5: if (bcmp(pname, "stdin", 5) == 0) { @@ -238,13 +279,11 @@ fdesc_lookup(ap) } if (ln) { - error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp); + error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp, VLNK); if (error) goto bad; VTOFDESC(fvp)->fd_link = ln; *vpp = fvp; - fvp->v_type = VLNK; - vn_lock(fvp, LK_SHARED | LK_RETRY, p); return (0); } else { error = ENOENT; @@ -255,7 +294,7 @@ fdesc_lookup(ap) case Fdevfd: if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) { - if (error = fdesc_root(dvp->v_mount, vpp)) + if ((error = fdesc_root(dvp->v_mount, vpp, ap->a_context))) goto bad; return (0); } @@ -263,7 +302,7 @@ fdesc_lookup(ap) fd = 0; while (*pname >= '0' && *pname <= '9') { fd = 10 * fd + *pname++ - '0'; - if (fd >= nfiles) + if (fd >= numfiles) break; } @@ -272,38 +311,36 @@ fdesc_lookup(ap) goto bad; } - if (fd >= nfiles || + if (fd < 0 || fd >= numfiles || *fdfile(p, fd) == NULL || (*fdflags(p, fd) & UF_RESERVED)) { error = EBADF; goto bad; } - error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp); + error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, VNON); if (error) goto bad; VTOFDESC(fvp)->fd_fd = fd; - vn_lock(fvp, LK_SHARED | LK_RETRY, p); *vpp = fvp; return (0); } bad:; - vn_lock(dvp, LK_SHARED | LK_RETRY, p); *vpp = NULL; return (error); } int fdesc_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; + struct proc *p = vfs_context_proc(ap->a_context); int error = 0; switch (VTOFDESC(vp)->fd_type) { @@ -314,9 +351,9 @@ fdesc_open(ap) * return ensures that the vnode for this device will be * released by vn_open. Open will detect this special error and * take the actions in dupfdopen. Other callers of vn_open or - * VOP_OPEN will simply report the error. + * vnop_open will simply report the error. */ - ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ + p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ error = ENODEV; break; @@ -326,123 +363,136 @@ fdesc_open(ap) } static int -fdesc_attr(fd, vap, cred, p) - int fd; - struct vattr *vap; - struct ucred *cred; - struct proc *p; +fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context) { - struct file *fp; + struct fileproc *fp; + struct proc *p = vfs_context_proc(a_context); struct stat stb; int error; - if (error = fdgetf(p, fd, &fp)) + if ((error = fp_lookup(p, fd, &fp, 0))) return (error); - switch (fp->f_type) { + switch (fp->f_fglob->fg_type) { case DTYPE_VNODE: - error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p); + if(error = vnode_getwithref((struct vnode *) fp->f_fglob->fg_data)) { + break; + } + if ((error = vnode_authorize((struct vnode *)fp->f_fglob->fg_data, + NULL, + KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, + a_context)) == 0) + error = vnode_getattr((struct vnode *)fp->f_fglob->fg_data, vap, a_context); if (error == 0 && vap->va_type == VDIR) { /* * directories can cause loops in the namespace, * so turn off the 'x' bits to avoid trouble. + * + * XXX ACLs break this, of course */ vap->va_mode &= ~((VEXEC)|(VEXEC>>3)|(VEXEC>>6)); } + (void)vnode_put((struct vnode *) fp->f_fglob->fg_data); break; case DTYPE_SOCKET: - error = soo_stat((struct socket *)fp->f_data, &stb); + case DTYPE_PIPE: + if (fp->f_fglob->fg_type == DTYPE_SOCKET) + error = soo_stat((struct socket *)fp->f_fglob->fg_data, &stb); + else + error = pipe_stat((struct socket *)fp->f_fglob->fg_data, &stb); + if (error == 0) { - vattr_null(vap); - vap->va_type = VSOCK; - vap->va_mode = stb.st_mode; - vap->va_nlink = stb.st_nlink; - vap->va_uid = stb.st_uid; - vap->va_gid = stb.st_gid; - vap->va_fsid = stb.st_dev; - vap->va_fileid = stb.st_ino; - vap->va_size = stb.st_size; - vap->va_blocksize = stb.st_blksize; - vap->va_atime = stb.st_atimespec; - vap->va_mtime = stb.st_mtimespec; - vap->va_ctime = stb.st_ctimespec; - vap->va_gen = stb.st_gen; - vap->va_flags = stb.st_flags; - vap->va_rdev = stb.st_rdev; - vap->va_bytes = stb.st_blocks * stb.st_blksize; + if (fp->f_fglob->fg_type == DTYPE_SOCKET) + VATTR_RETURN(vap, va_type, VSOCK); + else + VATTR_RETURN(vap, va_type, VFIFO); + + VATTR_RETURN(vap, va_mode, stb.st_mode); + VATTR_RETURN(vap, va_nlink, stb.st_nlink); + VATTR_RETURN(vap, va_uid, stb.st_uid); + VATTR_RETURN(vap, va_gid, stb.st_gid); + VATTR_RETURN(vap, va_fsid, stb.st_dev); + VATTR_RETURN(vap, va_fileid, stb.st_ino); + VATTR_RETURN(vap, va_data_size, stb.st_size); + VATTR_RETURN(vap, va_access_time, stb.st_atimespec); + VATTR_RETURN(vap, va_modify_time, stb.st_mtimespec); + VATTR_RETURN(vap, va_change_time, stb.st_ctimespec); + VATTR_RETURN(vap, va_gen, stb.st_gen); + VATTR_RETURN(vap, va_flags, stb.st_flags); + VATTR_RETURN(vap, va_rdev, stb.st_rdev); + VATTR_RETURN(vap, va_total_alloc, stb.st_blocks * stb.st_blksize); + VATTR_RETURN(vap, va_acl, NULL); } break; default: - return (EBADF); - break; + error = EBADF; } + fp_drop(p, fd, fp, 0); return (error); } int fdesc_getattr(ap) - struct vop_getattr_args /* { + struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; - struct vattr *vap = ap->a_vap; + struct vnode_attr *vap = ap->a_vap; unsigned fd; int error = 0; + struct timespec ts; switch (VTOFDESC(vp)->fd_type) { case Froot: case Fdevfd: case Flink: - bzero((caddr_t) vap, sizeof(*vap)); - vattr_null(vap); - vap->va_fileid = VTOFDESC(vp)->fd_ix; - - vap->va_uid = 0; - vap->va_gid = 0; - vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; - vap->va_blocksize = DEV_BSIZE; - vap->va_atime.tv_sec = boottime.tv_sec; - vap->va_atime.tv_nsec = 0; - vap->va_mtime = vap->va_atime; - vap->va_ctime = vap->va_mtime; - vap->va_gen = 0; - vap->va_flags = 0; - vap->va_rdev = 0; - vap->va_bytes = 0; + VATTR_RETURN(vap, va_fileid, VTOFDESC(vp)->fd_ix); + VATTR_RETURN(vap, va_uid, 0); + VATTR_RETURN(vap, va_gid, 0); + VATTR_RETURN(vap, va_fsid, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + VATTR_RETURN(vap, va_iosize, DEV_BSIZE); + ts.tv_sec = boottime_sec(); + ts.tv_nsec = 0; + VATTR_RETURN(vap, va_access_time, ts); + VATTR_RETURN(vap, va_modify_time, ts); + VATTR_RETURN(vap, va_change_time, ts); + VATTR_RETURN(vap, va_gen, 0); + VATTR_RETURN(vap, va_flags, 0); + VATTR_RETURN(vap, va_rdev, 0); + VATTR_RETURN(vap, va_acl, NULL); switch (VTOFDESC(vp)->fd_type) { case Flink: - vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; - vap->va_type = VLNK; - vap->va_nlink = 1; - vap->va_size = strlen(VTOFDESC(vp)->fd_link); + VATTR_RETURN(vap, va_mode, S_IRUSR|S_IRGRP|S_IROTH); + VATTR_RETURN(vap, va_type, VLNK); /* not strictly required */ + VATTR_RETURN(vap, va_nlink, 1); + VATTR_RETURN(vap, va_data_size, strlen(VTOFDESC(vp)->fd_link)); break; default: - vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; - vap->va_type = VDIR; - vap->va_nlink = 2; - vap->va_size = DEV_BSIZE; + VATTR_RETURN(vap, va_mode, S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + VATTR_RETURN(vap, va_type, VDIR); + VATTR_RETURN(vap, va_nlink, 2); + VATTR_RETURN(vap, va_data_size, DEV_BSIZE); break; } break; case Fdesc: fd = VTOFDESC(vp)->fd_fd; - error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p); + error = fdesc_attr(fd, vap, ap->a_context); break; default: return (EBADF); break; } - + if (error == 0) { vp->v_type = vap->va_type; } @@ -452,16 +502,16 @@ fdesc_getattr(ap) int fdesc_setattr(ap) - struct vop_setattr_args /* { + struct vnop_setattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - struct file *fp; + struct fileproc *fp; unsigned fd; int error; + struct proc * p = vfs_context_proc(ap->a_context); /* * Can't mess with the root vnode @@ -475,27 +525,34 @@ fdesc_setattr(ap) } fd = VTOFDESC(ap->a_vp)->fd_fd; - if (error = fdgetf(ap->a_p, fd, &fp)) + if ((error = fp_lookup(vfs_context_proc(ap->a_context), fd, &fp, 0))) return (error); /* * Can setattr the underlying vnode, but not sockets! */ - switch (fp->f_type) { + switch (fp->f_fglob->fg_type) { case DTYPE_VNODE: - error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p); + { + if ((error = vnode_getwithref((struct vnode *) fp->f_fglob->fg_data)) != 0) + break; + error = vnode_setattr((struct vnode *) fp->f_fglob->fg_data, ap->a_vap, ap->a_context); + (void)vnode_put((struct vnode *) fp->f_fglob->fg_data); break; + } case DTYPE_SOCKET: + case DTYPE_PIPE: error = 0; break; default: - kprintf("fp->f_type = %d\n", fp->f_type); + kprintf("fp->f_fglob->fg_type = %d\n", fp->f_fglob->fg_type); error = EBADF; break; } + fp_drop(p, fd, fp, 0); return (error); } @@ -511,29 +568,29 @@ static struct dirtmp { { FD_STDIN, UIO_MX, 5, "stdin" }, { FD_STDOUT, UIO_MX, 6, "stdout" }, { FD_STDERR, UIO_MX, 6, "stderr" }, - { 0 } + { 0, 0, 0, "" } }; int fdesc_readdir(ap) - struct vop_readdir_args /* { + struct vnop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + int a_flags; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_numdirent; + vfs_context_t a_context; } */ *ap; { struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; + struct proc *p = current_proc(); int i, error; /* * We don't allow exporting fdesc mounts, and currently local * requests do not need cookies. */ - if (ap->a_ncookies) + if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) return (EINVAL); switch (VTOFDESC(ap->a_vp)->fd_type) { @@ -553,7 +610,7 @@ fdesc_readdir(ap) i = uio->uio_offset / UIO_MX; error = 0; - while (uio->uio_resid > 0) { + while (uio_resid(uio) > 0) { dt = &rootent[i]; if (dt->d_fileno == 0) { /**eofflagp = 1;*/ @@ -590,7 +647,7 @@ fdesc_readdir(ap) i = uio->uio_offset / UIO_MX; error = 0; - while (uio->uio_resid > 0) { + while (uio_resid(uio) > 0) { if (i >= p->p_fd->fd_nfiles) break; @@ -620,10 +677,10 @@ fdesc_readdir(ap) int fdesc_readlink(ap) - struct vop_readlink_args /* { + struct vnop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -636,70 +693,42 @@ fdesc_readlink(ap) char *ln = VTOFDESC(vp)->fd_link; error = uiomove(ln, strlen(ln), ap->a_uio); } else { - error = EOPNOTSUPP; + error = ENOTSUP; } return (error); } int -fdesc_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +fdesc_read(__unused struct vnop_read_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } int -fdesc_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +fdesc_write(__unused struct vnop_write_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } int -fdesc_ioctl(ap) - struct vop_ioctl_args /* { - struct vnode *a_vp; - int a_command; - caddr_t a_data; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +fdesc_ioctl(__unused struct vnop_ioctl_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } int -fdesc_select(ap) - struct vop_select_args /* { - struct vnode *a_vp; - int a_which; - int a_fflags; - struct ucred *a_cred; - void *a_wql; - struct proc *a_p; - } */ *ap; +fdesc_select(__unused struct vnop_select_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } int fdesc_inactive(ap) - struct vop_inactive_args /* { + struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -708,15 +737,15 @@ fdesc_inactive(ap) * Clear out the v_type field to avoid * nasty things happening in vgone(). */ - VOP_UNLOCK(vp, 0, ap->a_p); vp->v_type = VNON; return (0); } int fdesc_reclaim(ap) - struct vop_reclaim_args /* { + struct vnop_reclaim_args /* { struct vnode *a_vp; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -732,11 +761,13 @@ fdesc_reclaim(ap) /* * Return POSIX pathconf information applicable to special devices. */ +int fdesc_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { @@ -765,29 +796,10 @@ fdesc_pathconf(ap) /* NOTREACHED */ } -/* - * Print out the contents of a /dev/fd vnode. - */ -/* ARGSUSED */ -int -fdesc_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - printf("tag VT_NON, fdesc vnode\n"); - return (0); -} /*void*/ int -fdesc_vfree(ap) - struct vop_vfree_args /* { - struct vnode *a_pvp; - ino_t a_ino; - int a_mode; - } */ *ap; +fdesc_vfree(__unused struct vnop_vfree_args *ap) { return (0); @@ -797,7 +809,7 @@ fdesc_vfree(ap) * /dev/fd "should never get here" operation */ int -fdesc_badop() +fdesc_badop(void) { return (ENOTSUP); @@ -806,93 +818,64 @@ fdesc_badop() #define VOPFUNC int (*)(void *) -#define fdesc_create ((int (*) __P((struct vop_create_args *)))eopnotsupp) -#define fdesc_mknod ((int (*) __P((struct vop_mknod_args *)))eopnotsupp) -#define fdesc_close ((int (*) __P((struct vop_close_args *)))nullop) -#define fdesc_access ((int (*) __P((struct vop_access_args *)))nullop) -#define fdesc_mmap ((int (*) __P((struct vop_mmap_args *)))eopnotsupp) -#define fdesc_revoke vop_revoke -#define fdesc_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) -#define fdesc_seek ((int (*) __P((struct vop_seek_args *)))nullop) -#define fdesc_remove ((int (*) __P((struct vop_remove_args *)))eopnotsupp) -#define fdesc_link ((int (*) __P((struct vop_link_args *)))eopnotsupp) -#define fdesc_rename ((int (*) __P((struct vop_rename_args *)))eopnotsupp) -#define fdesc_mkdir ((int (*) __P((struct vop_mkdir_args *)))eopnotsupp) -#define fdesc_rmdir ((int (*) __P((struct vop_rmdir_args *)))eopnotsupp) -#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))eopnotsupp) -#define fdesc_abortop ((int (*) __P((struct vop_abortop_args *)))nop_abortop) -#define fdesc_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) -#define fdesc_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) -#define fdesc_bmap ((int (*) __P((struct vop_bmap_args *)))fdesc_badop) -#define fdesc_strategy ((int (*) __P((struct vop_strategy_args *)))fdesc_badop) -#define fdesc_islocked \ - ((int (*) __P((struct vop_islocked_args *)))vop_noislocked) -#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))eopnotsupp) -#define fdesc_blkatoff \ - ((int (*) __P((struct vop_blkatoff_args *)))eopnotsupp) -#define fdesc_valloc ((int(*) __P(( \ - struct vnode *pvp, \ - int mode, \ - struct ucred *cred, \ - struct vnode **vpp))) eopnotsupp) -#define fdesc_truncate \ - ((int (*) __P((struct vop_truncate_args *)))eopnotsupp) -#define fdesc_update ((int (*) __P((struct vop_update_args *)))eopnotsupp) -#define fdesc_bwrite ((int (*) __P((struct vop_bwrite_args *)))eopnotsupp) -#define fdesc_blktooff ((int (*) __P((struct vop_blktooff_args *)))eopnotsupp) -#define fdesc_offtoblk ((int (*) __P((struct vop_offtoblk_args *)))eopnotsupp) -#define fdesc_cmap ((int (*) __P((struct vop_cmap_args *)))eopnotsupp) +#define fdesc_create (int (*) (struct vnop_create_args *))eopnotsupp +#define fdesc_mknod (int (*) (struct vnop_mknod_args *))eopnotsupp +#define fdesc_close (int (*) (struct vnop_close_args *))nullop +#define fdesc_access (int (*) (struct vnop_access_args *))nullop +#define fdesc_mmap (int (*) (struct vnop_mmap_args *))eopnotsupp +#define fdesc_revoke nop_revoke +#define fdesc_fsync (int (*) (struct vnop_fsync_args *))nullop +#define fdesc_remove (int (*) (struct vnop_remove_args *))eopnotsupp +#define fdesc_link (int (*) (struct vnop_link_args *))eopnotsupp +#define fdesc_rename (int (*) (struct vnop_rename_args *))eopnotsupp +#define fdesc_mkdir (int (*) (struct vnop_mkdir_args *))eopnotsupp +#define fdesc_rmdir (int (*) (struct vnop_rmdir_args *))eopnotsupp +#define fdesc_symlink (int (*) (struct vnop_symlink_args *))eopnotsupp +#define fdesc_strategy (int (*) (struct vnop_strategy_args *))fdesc_badop +#define fdesc_advlock (int (*) (struct vnop_advlock_args *))eopnotsupp +#define fdesc_bwrite (int (*) (struct vnop_bwrite_args *))eopnotsupp +#define fdesc_blktooff (int (*) (struct vnop_blktooff_args *))eopnotsupp +#define fdesc_offtoblk (int (*) (struct vnop_offtoblk_args *))eopnotsupp +#define fdesc_blockmap (int (*) (struct vnop_blockmap_args *))eopnotsupp int (**fdesc_vnodeop_p)(void *); struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)fdesc_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)fdesc_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)fdesc_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)fdesc_open }, /* open */ - { &vop_close_desc, (VOPFUNC)fdesc_close }, /* close */ - { &vop_access_desc, (VOPFUNC)fdesc_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)fdesc_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)fdesc_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)fdesc_read }, /* read */ - { &vop_write_desc, (VOPFUNC)fdesc_write }, /* write */ - { &vop_ioctl_desc, (VOPFUNC)fdesc_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)fdesc_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)fdesc_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)fdesc_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)fdesc_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)fdesc_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)fdesc_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)fdesc_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)fdesc_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)fdesc_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)fdesc_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)fdesc_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)fdesc_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)fdesc_readlink },/* readlink */ - { &vop_abortop_desc, (VOPFUNC)fdesc_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)fdesc_inactive },/* inactive */ - { &vop_reclaim_desc, (VOPFUNC)fdesc_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)fdesc_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)fdesc_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)fdesc_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)fdesc_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)fdesc_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)fdesc_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)fdesc_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)fdesc_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)fdesc_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)fdesc_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)fdesc_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)fdesc_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)fdesc_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)fdesc_bwrite }, /* bwrite */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)fdesc_blktooff }, /* blktooff */ - { &vop_blktooff_desc, (VOPFUNC)fdesc_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)fdesc_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)fdesc_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)fdesc_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)fdesc_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)fdesc_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)fdesc_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)fdesc_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)fdesc_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)fdesc_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)fdesc_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)fdesc_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)fdesc_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)fdesc_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)fdesc_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)fdesc_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)fdesc_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)fdesc_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)fdesc_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)fdesc_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)fdesc_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)fdesc_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)fdesc_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)fdesc_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)fdesc_readlink },/* readlink */ + { &vnop_inactive_desc, (VOPFUNC)fdesc_inactive },/* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)fdesc_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)fdesc_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)fdesc_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)fdesc_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)fdesc_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)fdesc_blktooff }, /* blktooff */ + { &vnop_blktooff_desc, (VOPFUNC)fdesc_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)fdesc_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc fdesc_vnodeop_opv_desc = diff --git a/bsd/miscfs/fifofs/fifo.h b/bsd/miscfs/fifofs/fifo.h index 1d212b694..a083b5948 100644 --- a/bsd/miscfs/fifofs/fifo.h +++ b/bsd/miscfs/fifofs/fifo.h @@ -57,59 +57,66 @@ #ifndef __FIFOFS_FOFO_H__ #define __FIFOFS_FOFO_H__ -#include +#ifdef BSD_KERNEL_PRIVATE + + +/* + * This structure is associated with the FIFO vnode and stores + * the state associated with the FIFO. + */ +struct fifoinfo { + unsigned int fi_flags; + struct socket *fi_readsock; + struct socket *fi_writesock; + long fi_readers; + long fi_writers; + unsigned int fi_count; +}; + +#define FIFO_INCREATE 1 +#define FIFO_CREATEWAIT 2 +#define FIFO_CREATED 4 + -#ifdef __APPLE_API_PRIVATE /* * Prototypes for fifo operations on vnodes. */ -int fifo_ebadf(); +int fifo_ebadf(void *); + +int fifo_lookup (struct vnop_lookup_args *); +#define fifo_create (int (*) (struct vnop_create_args *))err_create +#define fifo_mknod (int (*) (struct vnop_mknod_args *))err_mknod +int fifo_open (struct vnop_open_args *); +int fifo_close (struct vnop_close_args *); +int fifo_close_internal (vnode_t, int, vfs_context_t, int); +#define fifo_access (int (*) (struct vnop_access_args *))fifo_ebadf +#define fifo_getattr (int (*) (struct vnop_getattr_args *))fifo_ebadf +#define fifo_setattr (int (*) (struct vnop_setattr_args *))fifo_ebadf +int fifo_read (struct vnop_read_args *); +int fifo_write (struct vnop_write_args *); +int fifo_ioctl (struct vnop_ioctl_args *); +int fifo_select (struct vnop_select_args *); +#define fifo_revoke nop_revoke +#define fifo_mmap (int (*) (struct vnop_mmap_args *))err_mmap +#define fifo_fsync (int (*) (struct vnop_fsync_args *))nullop +#define fifo_remove (int (*) (struct vnop_remove_args *))err_remove +#define fifo_link (int (*) (struct vnop_link_args *))err_link +#define fifo_rename (int (*) (struct vnop_rename_args *))err_rename +#define fifo_mkdir (int (*) (struct vnop_mkdir_args *))err_mkdir +#define fifo_rmdir (int (*) (struct vnop_rmdir_args *))err_rmdir +#define fifo_symlink (int (*) (struct vnop_symlink_args *))err_symlink +#define fifo_readdir (int (*) (struct vnop_readdir_args *))err_readdir +#define fifo_readlink (int (*) (struct vnop_readlink_args *))err_readlink +int fifo_inactive (struct vnop_inactive_args *); +#define fifo_reclaim (int (*) (struct vnop_reclaim_args *))nullop +#define fifo_strategy (int (*) (struct vnop_strategy_args *))err_strategy +int fifo_pathconf (struct vnop_pathconf_args *); +int fifo_advlock (struct vnop_advlock_args *); +#define fifo_valloc (int (*) (struct vnop_valloc_args *))err_valloc +#define fifo_vfree (int (*) (struct vnop_vfree_args *))err_vfree +#define fifo_bwrite (int (*) (struct vnop_bwrite_args *))nullop +#define fifo_blktooff (int (*) (struct vnop_blktooff_args *))err_blktooff -int fifo_lookup __P((struct vop_lookup_args *)); -#define fifo_create ((int (*) __P((struct vop_create_args *)))err_create) -#define fifo_mknod ((int (*) __P((struct vop_mknod_args *)))err_mknod) -int fifo_open __P((struct vop_open_args *)); -int fifo_close __P((struct vop_close_args *)); -#define fifo_access ((int (*) __P((struct vop_access_args *)))fifo_ebadf) -#define fifo_getattr ((int (*) __P((struct vop_getattr_args *)))fifo_ebadf) -#define fifo_setattr ((int (*) __P((struct vop_setattr_args *)))fifo_ebadf) -int fifo_read __P((struct vop_read_args *)); -int fifo_write __P((struct vop_write_args *)); -#define fifo_lease_check ((int (*) __P((struct vop_lease_args *)))nullop) -int fifo_ioctl __P((struct vop_ioctl_args *)); -int fifo_select __P((struct vop_select_args *)); -#define fifo_revoke vop_revoke -#define fifo_mmap ((int (*) __P((struct vop_mmap_args *)))err_mmap) -#define fifo_fsync ((int (*) __P((struct vop_fsync_args *)))nullop) -#define fifo_seek ((int (*) __P((struct vop_seek_args *)))err_seek) -#define fifo_remove ((int (*) __P((struct vop_remove_args *)))err_remove) -#define fifo_link ((int (*) __P((struct vop_link_args *)))err_link) -#define fifo_rename ((int (*) __P((struct vop_rename_args *)))err_rename) -#define fifo_mkdir ((int (*) __P((struct vop_mkdir_args *)))err_mkdir) -#define fifo_rmdir ((int (*) __P((struct vop_rmdir_args *)))err_rmdir) -#define fifo_symlink ((int (*) __P((struct vop_symlink_args *)))err_symlink) -#define fifo_readdir ((int (*) __P((struct vop_readdir_args *)))err_readdir) -#define fifo_readlink ((int (*) __P((struct vop_readlink_args *)))err_readlink) -#define fifo_abortop ((int (*) __P((struct vop_abortop_args *)))err_abortop) -int fifo_inactive __P((struct vop_inactive_args *)); -#define fifo_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop) -#define fifo_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) -#define fifo_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) -int fifo_bmap __P((struct vop_bmap_args *)); -#define fifo_strategy ((int (*) __P((struct vop_strategy_args *)))err_strategy) -int fifo_print __P((struct vop_print_args *)); -#define fifo_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked) -int fifo_pathconf __P((struct vop_pathconf_args *)); -int fifo_advlock __P((struct vop_advlock_args *)); -#define fifo_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))err_blkatoff) -#define fifo_valloc ((int (*) __P((struct vop_valloc_args *)))err_valloc) -#define fifo_reallocblks \ - ((int (*) __P((struct vop_reallocblks_args *)))err_reallocblks) -#define fifo_vfree ((int (*) __P((struct vop_vfree_args *)))err_vfree) -#define fifo_truncate ((int (*) __P((struct vop_truncate_args *)))nullop) -#define fifo_update ((int (*) __P((struct vop_update_args *)))nullop) -#define fifo_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop) -#define fifo_blktooff ((int (*) __P((struct vop_blktooff_args *)))err_blktooff) +#endif /* BSD_KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ #endif /* __FIFOFS_FOFO_H__ */ diff --git a/bsd/miscfs/fifofs/fifo_vnops.c b/bsd/miscfs/fifofs/fifo_vnops.c index c35fe724b..72358ae91 100644 --- a/bsd/miscfs/fifofs/fifo_vnops.c +++ b/bsd/miscfs/fifofs/fifo_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,82 +59,61 @@ #include #include #include -#include +#include #include #include #include #include #include -#include +#include #include #include #include #include -/* - * This structure is associated with the FIFO vnode and stores - * the state associated with the FIFO. - */ -struct fifoinfo { - struct socket *fi_readsock; - struct socket *fi_writesock; - long fi_readers; - long fi_writers; -}; - #define VOPFUNC int (*)(void *) +extern int soo_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p); +extern int soo_select(struct fileproc *fp, int which, void * wql, struct proc *p); + int (**fifo_vnodeop_p)(void *); struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)err_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vop_close_desc, (VOPFUNC)fifo_close }, /* close */ - { &vop_access_desc, (VOPFUNC)fifo_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)fifo_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)fifo_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)fifo_read }, /* read */ - { &vop_write_desc, (VOPFUNC)fifo_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)fifo_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)fifo_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)err_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)err_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)err_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)err_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)err_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)fifo_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)fifo_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)fifo_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)fifo_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)fifo_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)err_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)fifo_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)fifo_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)err_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)err_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)fifo_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)fifo_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)fifo_bwrite }, /* bwrite */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)err_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)fifo_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)fifo_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)fifo_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)fifo_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)fifo_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)fifo_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)fifo_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)err_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)fifo_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)fifo_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)err_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)fifo_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)err_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc fifo_vnodeop_opv_desc = @@ -144,11 +123,13 @@ struct vnodeopv_desc fifo_vnodeop_opv_desc = * Trivial lookup routine that always fails. */ /* ARGSUSED */ +int fifo_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; + vfs_context_t a_context; } */ *ap; { @@ -161,58 +142,94 @@ fifo_lookup(ap) * to find an active instance of a fifo. */ /* ARGSUSED */ +int fifo_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; struct fifoinfo *fip; - struct proc *p = ap->a_p; struct socket *rso, *wso; int error; - if ((fip = vp->v_fifoinfo) == NULL) { - MALLOC(fip, struct fifoinfo *, - sizeof(*fip), M_TEMP, M_WAITOK); - vp->v_fifoinfo = fip; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vp->v_fifoinfo = NULL; - FREE(fip, M_TEMP); - return (error); - } - fip->fi_readsock = rso; - if (error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0)) { - (void)soclose(rso); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vp->v_fifoinfo = NULL; - FREE(fip, M_TEMP); - return (error); - } - fip->fi_writesock = wso; - if (error = unp_connect2(wso, rso)) { - (void)soclose(wso); - (void)soclose(rso); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vp->v_fifoinfo = NULL; - FREE(fip, M_TEMP); - return (error); + vnode_lock(vp); + +retry: + + fip = vp->v_fifoinfo; + + if (fip == (struct fifoinfo *)0) + panic("fifo_open with no fifoinfo"); + + if ((fip->fi_flags & FIFO_CREATED) == 0) { + if (fip->fi_flags & FIFO_INCREATE) { + fip->fi_flags |= FIFO_CREATEWAIT; + error = msleep(&fip->fi_flags, &vp->v_lock, PRIBIO | PCATCH, "fifocreatewait", 0); + if (error) { + vnode_unlock(vp); + return(error); + } + goto retry; + } else { + fip->fi_flags |= FIFO_INCREATE; + vnode_unlock(vp); + if ( (error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0)) ) { + goto bad1; + } + fip->fi_readsock = rso; + + if ( (error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0)) ) { + (void)soclose(rso); + goto bad1; + } + fip->fi_writesock = wso; + + if ( (error = soconnect2(wso, rso)) ) { + (void)soclose(wso); + (void)soclose(rso); + goto bad1; + } + fip->fi_readers = fip->fi_writers = 0; + + socket_lock(wso, 1); + wso->so_state |= SS_CANTRCVMORE; + wso->so_snd.sb_lowat = PIPE_BUF; +#if 0 + /* Because all the unp is protected by single mutex + * doing it in two step may actually cause problems + * as it opens up window between the drop and acquire + */ + socket_unlock(wso, 1); + + socket_lock(rso, 1); +#endif + rso->so_state |= SS_CANTSENDMORE; + socket_unlock(wso, 1); + + vnode_lock(vp); + fip->fi_flags |= FIFO_CREATED; + fip->fi_flags &= ~FIFO_INCREATE; + + if ((fip->fi_flags & FIFO_CREATEWAIT)) { + fip->fi_flags &= ~FIFO_CREATEWAIT; + wakeup(&fip->fi_flags); + } + /* vnode lock is held to process further */ } - wso->so_state |= SS_CANTRCVMORE; - wso->so_snd.sb_lowat = PIPE_BUF; - rso->so_state |= SS_CANTSENDMORE; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - fip->fi_readers = fip->fi_writers = 0; } + + /* vnode is locked at this point */ + /* fifo in created already */ if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { + socket_lock(fip->fi_writesock, 1); fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; + socket_unlock(fip->fi_writesock, 1); + if (fip->fi_writers > 0) wakeup((caddr_t)&fip->fi_writers); } @@ -220,17 +237,18 @@ fifo_open(ap) if (ap->a_mode & FWRITE) { fip->fi_writers++; if (fip->fi_writers == 1) { + socket_lock(fip->fi_readsock, 1); fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; + socket_unlock(fip->fi_readsock, 1); + if (fip->fi_readers > 0) wakeup((caddr_t)&fip->fi_readers); } } if ((ap->a_mode & FREAD) && (ap->a_mode & O_NONBLOCK) == 0) { if (fip->fi_writers == 0) { - VOP_UNLOCK(vp, 0, p); - error = tsleep((caddr_t)&fip->fi_readers, - PCATCH | PSOCK, "fifoor", 0); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + error = msleep((caddr_t)&fip->fi_readers, &vp->v_lock, + PCATCH | PSOCK, "fifoor", 0); if (error) goto bad; if (fip->fi_readers == 1) { @@ -242,15 +260,13 @@ fifo_open(ap) if (ap->a_mode & FWRITE) { if (ap->a_mode & O_NONBLOCK) { if (fip->fi_readers == 0) { - error = ENXIO; - goto bad; + error = ENXIO; + goto bad; } } else { if (fip->fi_readers == 0) { - VOP_UNLOCK(vp, 0, p); - error = tsleep((caddr_t)&fip->fi_writers, - PCATCH | PSOCK, "fifoow", 0); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + error = msleep((caddr_t)&fip->fi_writers,&vp->v_lock, + PCATCH | PSOCK, "fifoow", 0); if (error) goto bad; if (fip->fi_writers == 1) { @@ -260,39 +276,57 @@ fifo_open(ap) } } } + + vnode_unlock(vp); return (0); bad: - if (error) - VOP_CLOSE(vp, ap->a_mode, ap->a_cred, p); + fifo_close_internal(vp, ap->a_mode, ap->a_context, 1); + + vnode_unlock(vp); + return (error); +bad1: + vnode_lock(vp); + + fip->fi_flags &= ~FIFO_INCREATE; + + if ((fip->fi_flags & FIFO_CREATEWAIT)) { + fip->fi_flags &= ~FIFO_CREATEWAIT; + wakeup(&fip->fi_flags); + } + vnode_unlock(vp); + return (error); } /* * Vnode op for read */ -/* ARGSUSED */ +int fifo_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { struct uio *uio = ap->a_uio; struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; - struct proc *p = uio->uio_procp; int error, startresid; + int rflags; #if DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("fifo_read mode"); #endif - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); - if (ap->a_ioflag & IO_NDELAY) - rso->so_state |= SS_NBIO; - startresid = uio->uio_resid; + + rflags = (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0; + + // LP64todo - fix this! + startresid = uio_resid(uio); + /* fifo conformance - if we have a reader open on the fifo but no * writers then we need to make sure we do not block. We do that by * checking the receive buffer and if empty set error to EWOULDBLOCK. @@ -300,19 +334,15 @@ fifo_read(ap) */ error = 0; if (ap->a_vp->v_fifoinfo->fi_writers < 1) { - error = (rso->so_rcv.sb_cc == 0) ? EWOULDBLOCK : 0; + socket_lock(rso, 1); + error = (rso->so_rcv.sb_cc == 0) ? EWOULDBLOCK : 0; + socket_unlock(rso, 1); } /* skip soreceive to avoid blocking when we have no writers */ if (error != EWOULDBLOCK) { - VOP_UNLOCK(ap->a_vp, 0, p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0, - (struct mbuf **)0, (int *)0); - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); + (struct mbuf **)0, &rflags); } else { /* clear EWOULDBLOCK and return EOF (zero) */ @@ -321,102 +351,102 @@ fifo_read(ap) /* * Clear EOF indication after first such return. */ - if (uio->uio_resid == startresid) + if (uio_resid(uio) == startresid) { + socket_lock(rso, 1); rso->so_state &= ~SS_CANTRCVMORE; - if (ap->a_ioflag & IO_NDELAY) - rso->so_state &= ~SS_NBIO; + socket_unlock(rso, 1); + } return (error); } /* * Vnode op for write */ -/* ARGSUSED */ +int fifo_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock; - struct proc *p = ap->a_uio->uio_procp; int error; #if DIAGNOSTIC if (ap->a_uio->uio_rw != UIO_WRITE) panic("fifo_write mode"); #endif - if (ap->a_ioflag & IO_NDELAY) - wso->so_state |= SS_NBIO; - VOP_UNLOCK(ap->a_vp, 0, p); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0, (struct mbuf *)0, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (ap->a_ioflag & IO_NDELAY) - wso->so_state &= ~SS_NBIO; + error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0, + (struct mbuf *)0, (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0); + return (error); } /* * Device ioctl operation. */ -/* ARGSUSED */ +int fifo_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - struct file filetmp; + struct proc *p = vfs_context_proc(ap->a_context); + struct fileproc filetmp; + struct fileglob filefg; int error; if (ap->a_command == FIONBIO) return (0); + bzero(&filetmp, sizeof(struct fileproc)); + filetmp.f_fglob = &filefg; if (ap->a_fflag & FREAD) { - filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; - error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p); + filetmp.f_fglob->fg_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; + error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, p); if (error) return (error); } if (ap->a_fflag & FWRITE) { - filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; - error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p); + filetmp.f_fglob->fg_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; + error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, p); if (error) return (error); } return (0); } -/* ARGSUSED */ +int fifo_select(ap) - struct vop_select_args /* { + struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void * a_wql; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - struct file filetmp; + struct proc *p = vfs_context_proc(ap->a_context); + struct fileproc filetmp; + struct fileglob filefg; int ready; + bzero(&filetmp, sizeof(struct fileproc)); + filetmp.f_fglob = &filefg; if (ap->a_which & FREAD) { - filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; - ready = soo_select(&filetmp, ap->a_which, ap->a_wql, ap->a_p); + filetmp.f_fglob->fg_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; + ready = soo_select(&filetmp, ap->a_which, ap->a_wql, p); if (ready) return (ready); } if (ap->a_which & FWRITE) { - filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; - ready = soo_select(&filetmp, ap->a_which, ap->a_wql, ap->a_p); + filetmp.f_fglob->fg_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; + ready = soo_select(&filetmp, ap->a_which, ap->a_wql, p); if (ready) return (ready); } @@ -424,101 +454,95 @@ fifo_select(ap) } int -fifo_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct proc *a_p; - } */ *ap; +fifo_inactive(__unused struct vnop_inactive_args *ap) { - - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } -/* - * This is a noop, simply returning what one has been given. - */ -fifo_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - - if (ap->a_vpp != NULL) - *ap->a_vpp = ap->a_vp; - if (ap->a_bnp != NULL) - *ap->a_bnp = ap->a_bn; - if (ap->a_runp != NULL) - *ap->a_runp = 0; - return (0); -} /* * Device close routine */ -/* ARGSUSED */ +int fifo_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; + return fifo_close_internal(ap->a_vp, ap->a_fflag, ap->a_context, 0); +} + +int +fifo_close_internal(vnode_t vp, int fflag, __unused vfs_context_t context, int locked) +{ register struct fifoinfo *fip = vp->v_fifoinfo; int error1, error2; + struct socket *rso; + struct socket *wso; - if (ap->a_fflag & FREAD) { + if (!locked) + vnode_lock(vp); + + if ((fip->fi_flags & FIFO_CREATED) == 0) { + if (!locked) + vnode_unlock(vp); + return(0); + + } + + if (fflag & FREAD) { fip->fi_readers--; if (fip->fi_readers == 0){ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + socket_lock(fip->fi_writesock, 1); socantsendmore(fip->fi_writesock); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + socket_unlock(fip->fi_writesock, 1); } } - if (ap->a_fflag & FWRITE) { + + if (fflag & FWRITE) { fip->fi_writers--; if (fip->fi_writers == 0) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + socket_lock(fip->fi_readsock, 1); socantrcvmore(fip->fi_readsock); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + socket_unlock(fip->fi_readsock, 1); } } - if (vp->v_usecount > 1) +#if 0 + if (vnode_isinuse_locked(vp, 0, 1)) { + if (!locked) + vnode_unlock(vp); + return (0); + } +#endif + + if (fip->fi_writers || fip->fi_readers) { + if (!locked) + vnode_unlock(vp); return (0); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error1 = soclose(fip->fi_readsock); - error2 = soclose(fip->fi_writesock); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vp->v_fifoinfo = NULL; - FREE(fip, M_TEMP); + } + + wso = fip->fi_writesock; + rso = fip->fi_readsock; + fip->fi_readsock = 0; + fip->fi_writesock = 0; + fip->fi_flags &= ~FIFO_CREATED; + if (!locked) + vnode_unlock(vp); + error1 = soclose(rso); + error2 = soclose(wso); + if (error1) return (error1); return (error2); } -/* - * Print out the contents of a fifo vnode. - */ -fifo_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - printf("tag VT_NON"); - fifo_printinfo(ap->a_vp); - printf("\n"); -} /* * Print out internal contents of a fifo vnode. */ +void fifo_printinfo(vp) struct vnode *vp; { @@ -531,11 +555,13 @@ fifo_printinfo(vp) /* * Return POSIX pathconf information applicable to fifo's. */ +int fifo_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { @@ -558,7 +584,8 @@ fifo_pathconf(ap) /* * Fifo failed operation */ -fifo_ebadf() +int +fifo_ebadf(__unused void *dummy) { return (EBADF); @@ -567,17 +594,10 @@ fifo_ebadf() /* * Fifo advisory byte-level locks. */ -/* ARGSUSED */ -fifo_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; +int +fifo_advlock(__unused struct vnop_advlock_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } diff --git a/bsd/miscfs/nullfs/null.h b/bsd/miscfs/nullfs/null.h index a4ccc0e36..734b1d772 100644 --- a/bsd/miscfs/nullfs/null.h +++ b/bsd/miscfs/nullfs/null.h @@ -75,6 +75,15 @@ struct null_mount { }; #ifdef KERNEL +/* LP64 version of null_args. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with null_args + */ +/* LP64todo - should this move? */ +struct user_null_args { + user_addr_t target; /* Target of loopback */ +}; + /* * A cache of vnode references */ @@ -84,13 +93,13 @@ struct null_node { struct vnode *null_vnode; /* Back pointer */ }; -extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp)); +extern int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp); #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) #define NULLTOV(xp) ((xp)->null_vnode) #ifdef NULLFS_DIAGNOSTIC -extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno)); +extern struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno); #define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__) #else #define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp) diff --git a/bsd/miscfs/nullfs/null_subr.c b/bsd/miscfs/nullfs/null_subr.c index 133cdb932..cd7f6618e 100644 --- a/bsd/miscfs/nullfs/null_subr.c +++ b/bsd/miscfs/nullfs/null_subr.c @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include #include @@ -79,8 +79,8 @@ * Null layer cache: * Each cache entry holds a reference to the lower vnode * along with a pointer to the alias vnode. When an - * entry is added the lower vnode is VREF'd. When the - * alias is removed the lower vnode is vrele'd. + * entry is added the lower vnode is vnode_get'd. When the + * alias is removed the lower vnode is vnode_put'd. */ #define NULL_NHASH(vp) \ @@ -101,7 +101,7 @@ nullfs_init() } /* - * Return a VREF'ed alias for lower vnode if already exists, else 0. + * Return a vnode_get'ed alias for lower vnode if already exists, else 0. */ static struct vnode * null_node_find(mp, lowervp) @@ -117,19 +117,15 @@ null_node_find(mp, lowervp) * Find hash base, and then search the (two-way) linked * list looking for a null_node structure which is referencing * the lower vnode. If found, the increment the null_node - * reference count (but NOT the lower vnode's VREF counter). + * reference count (but NOT the lower vnode's vnode_get counter). */ hd = NULL_NHASH(lowervp); loop: for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) { if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { vp = NULLTOV(a); - /* - * We need vget for the VXLOCK - * stuff, but we don't want to lock - * the lower node. - */ - if (vget(vp, 0, p)) { + + if (vnode_get(vp)) { printf ("null_node_find: vget failed.\n"); goto loop; }; @@ -182,7 +178,7 @@ null_node_alloc(mp, lowervp, vpp) }; if (vp->v_type == VREG) ubc_info_init(vp); - VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */ + vnode_get(lowervp); /* Extra vnode_get will be vnode_put'd in null_node_create */ hd = NULL_NHASH(lowervp); LIST_INSERT_HEAD(hd, xp, null_hash); return 0; @@ -210,7 +206,7 @@ null_node_create(mp, lowervp, newvpp) #ifdef NULLFS_DIAGNOSTIC vprint("null_node_create: exists", NULLTOV(ap)); #endif - /* VREF(aliasvp); --- done in null_node_find */ + /* vnode_get(aliasvp); --- done in null_node_find */ } else { int error; @@ -228,11 +224,11 @@ null_node_create(mp, lowervp, newvpp) return error; /* - * aliasvp is already VREF'd by getnewvnode() + * aliasvp is already vnode_get'd by getnewvnode() */ } - vrele(lowervp); + vnode_put(lowervp); #if DIAGNOSTIC if (lowervp->v_usecount < 1) { @@ -261,7 +257,7 @@ null_checkvp(vp, fil, lno) struct null_node *a = VTONULL(vp); #ifdef notyet /* - * Can't do this check because vop_reclaim runs + * Can't do this check because vnop_reclaim runs * with a funny vop vector. */ if (vp->v_op != null_vnodeop_p) { diff --git a/bsd/miscfs/nullfs/null_vfsops.c b/bsd/miscfs/nullfs/null_vfsops.c index 66f61af3d..d916c8cd8 100644 --- a/bsd/miscfs/nullfs/null_vfsops.c +++ b/bsd/miscfs/nullfs/null_vfsops.c @@ -68,10 +68,11 @@ #include #include #include +#include #include #include #include -#include +#include #include #include #include @@ -79,16 +80,15 @@ /* * Mount null layer */ -int -nullfs_mount(mp, path, data, ndp, p) +static int +nullfs_mount(mp, devvp, data, context) struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; + vnode_t devvp; + user_addr_t data; + vfs_context_t context; { int error = 0; - struct null_args args; + struct user_null_args args; struct vnode *lowerrootvp, *vp; struct vnode *nullm_rootvp; struct null_mount *xmp; @@ -102,30 +102,38 @@ nullfs_mount(mp, path, data, ndp, p) * Update is a no-op */ if (mp->mnt_flag & MNT_UPDATE) { - return (EOPNOTSUPP); - /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/ + return (ENOTSUP); + /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, devvp, data, p);*/ } /* * Get argument */ - if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args))) + if (vfs_context_is64bit(context)) { + error = copyin(data, (caddr_t)&args, sizeof (args)); + } + else { + struct null_args temp; + error = copyin(data, (caddr_t)&temp, sizeof (temp)); + args.target = CAST_USER_ADDR_T(temp.target); + } + if (error) return (error); /* * Find lower node */ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF, - UIO_USERSPACE, args.target, p); + UIO_USERSPACE, args.target, context); if (error = namei(ndp)) return (error); - + nameidone(ndp); /* * Sanity check on lower vnode */ lowerrootvp = ndp->ni_vp; - vrele(ndp->ni_dvp); + vnode_put(ndp->ni_dvp); ndp->ni_dvp = NULL; xmp = (struct null_mount *) _MALLOC(sizeof(struct null_mount), @@ -141,22 +149,18 @@ nullfs_mount(mp, path, data, ndp, p) * a reference on the root vnode. */ error = null_node_create(mp, lowerrootvp, &vp); - /* - * Unlock the node (either the lower or the alias) - */ - VOP_UNLOCK(vp, 0, p); /* * Make sure the node alias worked */ if (error) { - vrele(lowerrootvp); + vnode_put(lowerrootvp); FREE(xmp, M_UFSMNT); /* XXX */ return (error); } /* * Keep a held reference to the root vnode. - * It is vrele'd in nullfs_unmount. + * It is vnode_put'd in nullfs_unmount. */ nullm_rootvp = vp; nullm_rootvp->v_flag |= VROOT; @@ -166,14 +170,12 @@ nullfs_mount(mp, path, data, ndp, p) mp->mnt_data = (qaddr_t) xmp; vfs_getnewfsid(mp); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + (void) copyinstr(args.target, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + bzero(mp->mnt_vfsstat.f_mntfromname + size, MNAMELEN - size); #ifdef NULLFS_DIAGNOSTIC printf("nullfs_mount: lower %s, alias at %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); + mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); #endif return (0); } @@ -183,24 +185,24 @@ nullfs_mount(mp, path, data, ndp, p) * on the underlying filesystem will have been called * when that filesystem was mounted. */ -int -nullfs_start(mp, flags, p) +static int +nullfs_start(mp, flags, context) struct mount *mp; int flags; - struct proc *p; + vfs_context_t context; { return (0); - /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */ + /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, context); */ } /* * Free reference to null layer */ -int -nullfs_unmount(mp, mntflags, p) +static int +nullfs_unmount(mp, mntflags, context) struct mount *mp; int mntflags; - struct proc *p; + vfs_context_t context; { struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; int error; @@ -227,11 +229,11 @@ nullfs_unmount(mp, mntflags, p) /* * Release reference on underlying root vnode */ - vrele(nullm_rootvp); + vnode_put(nullm_rootvp); /* * And blow it away for future re-use */ - vgone(nullm_rootvp); + vnode_reclaim(nullm_rootvp); /* * Finally, throw away the null_mount structure */ @@ -240,10 +242,11 @@ nullfs_unmount(mp, mntflags, p) return 0; } -int -nullfs_root(mp, vpp) +static int +nullfs_root(mp, vpp, context) struct mount *mp; struct vnode **vpp; + vfs_context_t context; { struct proc *p = curproc; /* XXX */ struct vnode *vp; @@ -259,31 +262,30 @@ nullfs_root(mp, vpp) * Return locked reference to root. */ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + vnode_get(vp); *vpp = vp; return 0; } -int -nullfs_quotactl(mp, cmd, uid, arg, p) +static int +nullfs_quotactl(mp, cmd, uid, datap, context) struct mount *mp; int cmd; uid_t uid; - caddr_t arg; - struct proc *p; + caddr_t datap; + vfs_context_t context; { - return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p); + return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, datap, context); } -int -nullfs_statfs(mp, sbp, p) +static int +nullfs_statfs(mp, sbp, context) struct mount *mp; - struct statfs *sbp; - struct proc *p; + struct vfsstatfs *sbp; + vfs_context_t context; { int error; - struct statfs mstat; + struct vfsstatfs mstat; #ifdef NULLFS_DIAGNOSTIC printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp, @@ -294,12 +296,12 @@ nullfs_statfs(mp, sbp, p) bzero(&mstat, sizeof(mstat)); - error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p); + error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, context); if (error) return (error); /* now copy across the "interesting" information and fake the rest */ - sbp->f_type = mstat.f_type; + //sbp->f_type = mstat.f_type; sbp->f_flags = mstat.f_flags; sbp->f_bsize = mstat.f_bsize; sbp->f_iosize = mstat.f_iosize; @@ -308,20 +310,12 @@ nullfs_statfs(mp, sbp, p) sbp->f_bavail = mstat.f_bavail; sbp->f_files = mstat.f_files; sbp->f_ffree = mstat.f_ffree; - if (sbp != &mp->mnt_stat) { - bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); - bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); - bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); - } return (0); } -int -nullfs_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; +static int +nullfs_sync(__unused struct mount *mp, __unused int waitfor, + __unused kauth_cred_t cred, __unused vfs_context_t context) { /* * XXX - Assumes no data cached at null layer. @@ -329,41 +323,42 @@ nullfs_sync(mp, waitfor, cred, p) return (0); } -int -nullfs_vget(mp, ino, vpp) +static int +nullfs_vget(mp, ino, vpp, context) struct mount *mp; - ino_t ino; + ino64_t ino; struct vnode **vpp; + vfs_context_t context; { - return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp, context); } -int -nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp) +static int +nullfs_fhtovp(mp, fhlen, fhp, vpp, context) struct mount *mp; - struct fid *fidp; - struct mbuf *nam; + int fhlen; + unsigned char *fhp; struct vnode **vpp; - int *exflagsp; - struct ucred**credanonp; + vfs_context_t context; { - return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp); + return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fhlen, fhp, vpp, context); } -int -nullfs_vptofh(vp, fhp) +static int +nullfs_vptofh(vp, fhlenp, fhp, context) struct vnode *vp; - struct fid *fhp; + int *fhlenp; + unsigned char *fhp; + vfs_context_t context; { - return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp); + return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhlenp, fhp, context); } -int nullfs_init __P((struct vfsconf *)); +int nullfs_init (struct vfsconf *); -#define nullfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ - size_t, struct proc *)))eopnotsupp) +#define nullfs_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, proc_t))eopnotsupp struct vfsops null_vfsops = { nullfs_mount, @@ -377,5 +372,5 @@ struct vfsops null_vfsops = { nullfs_fhtovp, nullfs_vptofh, nullfs_init, - nullfs_sysctl, + nullfs_sysctl }; diff --git a/bsd/miscfs/nullfs/null_vnops.c b/bsd/miscfs/nullfs/null_vnops.c index fecb1278a..ae4e8db1b 100644 --- a/bsd/miscfs/nullfs/null_vnops.c +++ b/bsd/miscfs/nullfs/null_vnops.c @@ -113,12 +113,12 @@ * in the arguments and, if a vnode is return by the operation, * stacks a null-node on top of the returned vnode. * - * Although bypass handles most operations, vop_getattr, vop_lock, - * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not + * Although bypass handles most operations, vnop_getattr, vnop_lock, + * vnop_unlock, vnop_inactive, vnop_reclaim, and vnop_print are not * bypassed. Vop_getattr must change the fsid being returned. - * Vop_lock and vop_unlock must handle any locking for the + * Vop_lock and vnop_unlock must handle any locking for the * current vnode as well as pass the lock request down. - * Vop_inactive and vop_reclaim are not bypassed so that + * Vop_inactive and vnop_reclaim are not bypassed so that * they can handle freeing null-layer specific data. Vop_print * is not bypassed to avoid excessive debugging information. * Also, certain vnode operations change the locking state within @@ -150,7 +150,7 @@ * "mount_null /usr/include /dev/layer/null". * Changing directory to /dev/layer/null will assign * the root null-node (which was created when the null layer was mounted). - * Now consider opening "sys". A vop_lookup would be + * Now consider opening "sys". A vnop_lookup would be * done on the root null-node. This operation would bypass through * to the lower layer which would return a vnode representing * the UFS "sys". Null_bypass then builds a null-node @@ -196,10 +196,11 @@ #include #include #include +#include #include #include #include -#include +#include #include #include #include @@ -219,7 +220,7 @@ int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ * As an exception to this, vnodes can be marked "unmapped" by setting * the Nth bit in operation's vdesc_flags. * - * Also, some BSD vnode operations have the side effect of vrele'ing + * Also, some BSD vnode operations have the side effect of node_put'ing * their arguments. With stacking, the reference counts are held * by the upper node, not the lower one, so we must handle these * side-effects here. This is not of concern in Sun-derived systems @@ -227,7 +228,7 @@ int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ * * This makes the following assumptions: * - only one returned vpp - * - no INOUT vpp's (Sun's vop_open has one of these) + * - no INOUT vpp's (Sun's vnop_open has one of these) * - the vnode operation vector of the first vnode should be used * to determine what implementation of the op should be invoked * - all mapped vnodes are of our vnode-type (NEEDSWORK: @@ -235,7 +236,7 @@ int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ */ int null_bypass(ap) - struct vop_generic_args /* { + struct vnop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; @@ -285,11 +286,11 @@ null_bypass(ap) *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); /* * XXX - Several operations have the side effect - * of vrele'ing their vp's. We must account for + * of vnode_put'ing their vp's. We must account for * that. (This should go away in the future.) */ if (reles & 1) - VREF(*this_vp_p); + vnode_get(*this_vp_p); } } @@ -312,21 +313,21 @@ null_bypass(ap) if (old_vps[i]) { *(vps_p[i]) = old_vps[i]; if (reles & 1) - vrele(*(vps_p[i])); + vnode_put(*(vps_p[i])); } } /* * Map the possible out-going vpp * (Assumes that the lower layer always returns - * a VREF'ed vpp unless it gets an error.) + * a vnode_get'ed vpp unless it gets an error.) */ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !(descp->vdesc_flags & VDESC_NOMAP_VPP) && !error) { /* * XXX - even though some ops have vpp returned vp's, - * several ops actually vrele this before returning. + * several ops actually vnode_put this before returning. * We must avoid these ops. * (This should go away when these ops are regularized.) */ @@ -347,28 +348,21 @@ null_bypass(ap) * if this layer is mounted read-only. */ null_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; + vfs_context_t a_context; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int flags = cnp->cn_flags; - struct vop_lock_args lockargs; - struct vop_unlock_args unlockargs; struct vnode *dvp, *vp; int error; - if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) - return (EROFS); error = null_bypass(ap); - if (error == EJUSTRETURN && (flags & ISLASTCN) && - (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) - error = EROFS; + /* * We must do the same locking and unlocking at this layer as * is done in the layers below us. We could figure this out @@ -381,43 +375,26 @@ null_lookup(ap) vp = *ap->a_vpp; if (dvp == vp) return (error); - if (!VOP_ISLOCKED(dvp)) { - unlockargs.a_vp = dvp; - unlockargs.a_flags = 0; - unlockargs.a_p = p; - vop_nounlock(&unlockargs); - } - if (vp != NULL && VOP_ISLOCKED(vp)) { - lockargs.a_vp = vp; - lockargs.a_flags = LK_SHARED; - lockargs.a_p = p; - vop_nolock(&lockargs); - } return (error); } /* - * Setattr call. Disallow write attempts if the layer is mounted read-only. + * Setattr call. */ int -null_setattr(ap) - struct vop_setattr_args /* { +null_setattr( + struct vnop_setattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; + struct vnode_attr *a_vap; + kauth_cred_t a_cred; struct proc *a_p; - } */ *ap; + } */ *ap) { struct vnode *vp = ap->a_vp; - struct vattr *vap = ap->a_vap; - - if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && - (vp->v_mount->mnt_flag & MNT_RDONLY)) - return (EROFS); - if (vap->va_size != VNOVAL) { + struct vnode_attr *vap = ap->a_vap; + + if (VATTR_IS_ACTIVE(vap, va_data_size)) { switch (vp->v_type) { case VDIR: return (EISDIR); @@ -429,12 +406,6 @@ null_setattr(ap) case VREG: case VLNK: default: - /* - * Disallow write attempts if the filesystem is - * mounted read-only. - */ - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); } } return (null_bypass(ap)); @@ -445,11 +416,10 @@ null_setattr(ap) */ int null_getattr(ap) - struct vop_getattr_args /* { + struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { int error; @@ -457,91 +427,31 @@ null_getattr(ap) if (error = null_bypass(ap)) return (error); /* Requires that arguments be restored. */ - ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]); return (0); } int null_access(ap) - struct vop_access_args /* { + struct vnop_access_args /* { struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; + int a_action; + vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - mode_t mode = ap->a_mode; - - /* - * Disallow write attempts on read-only layers; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - break; - } - } - return (null_bypass(ap)); -} - -/* - * We need to process our own vnode lock and then clear the - * interlock flag as it applies only to our vnode, not the - * vnodes below us on the stack. - */ -int -null_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - - vop_nolock(ap); - if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - ap->a_flags &= ~LK_INTERLOCK; - return (null_bypass(ap)); -} - -/* - * We need to process our own vnode unlock and then clear the - * interlock flag as it applies only to our vnode, not the - * vnodes below us on the stack. - */ -int -null_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - vop_nounlock(ap); - ap->a_flags &= ~LK_INTERLOCK; return (null_bypass(ap)); } int null_inactive(ap) - struct vop_inactive_args /* { + struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { /* * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, + * Wait to vnode_put lowervp until reclaim, * so that until then our null_node is in the * cache and reusable. * @@ -551,15 +461,14 @@ null_inactive(ap) * like they do in the name lookup cache code. * That's too much work for now. */ - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } int null_reclaim(ap) - struct vop_reclaim_args /* { + struct vnop_reclaim_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -567,7 +476,7 @@ null_reclaim(ap) struct vnode *lowervp = xp->null_lowervp; /* - * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, + * Note: in vnop_reclaim, vp->v_op == dead_vnodeop_p, * so we can't call VOPs on ourself. */ /* After this assignment, this node will not be re-used. */ @@ -575,29 +484,18 @@ null_reclaim(ap) LIST_REMOVE(xp, null_hash); FREE(vp->v_data, M_TEMP); vp->v_data = NULL; - vrele (lowervp); - return (0); -} - -int -null_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp)); + vnode_put (lowervp); return (0); } /* - * XXX - vop_strategy must be hand coded because it has no + * XXX - vnop_strategy must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ int null_strategy(ap) - struct vop_strategy_args /* { + struct vnop_strategy_args /* { struct buf *a_bp; } */ *ap; { @@ -605,24 +503,24 @@ null_strategy(ap) int error; struct vnode *savedvp; - savedvp = bp->b_vp; - bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); + savedvp = vnode(bp); + buf_setvnode(bp, NULLVPTOLOWERVP(savedvp)); - error = VOP_STRATEGY(bp); + error = VNOP_STRATEGY(bp); - bp->b_vp = savedvp; + buf_setvnode(bp, savedvp); return (error); } /* - * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no + * XXX - like vnop_strategy, vnop_bwrite must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ int null_bwrite(ap) - struct vop_bwrite_args /* { + struct vnop_bwrite_args /* { struct buf *a_bp; } */ *ap; { @@ -630,12 +528,12 @@ null_bwrite(ap) int error; struct vnode *savedvp; - savedvp = bp->b_vp; - bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); + savedvp = buf_vnode(bp); + buf_setvnode(bp, NULLVPTOLOWERVP(savedvp)); - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); - bp->b_vp = savedvp; + buf_setvnode(bp, savedvp); return (error); } @@ -648,20 +546,17 @@ null_bwrite(ap) int (**null_vnodeop_p)(void *); struct vnodeopv_entry_desc null_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)null_bypass }, - - { &vop_lookup_desc, (VOPFUNC)null_lookup }, - { &vop_setattr_desc, (VOPFUNC)null_setattr }, - { &vop_getattr_desc, (VOPFUNC)null_getattr }, - { &vop_access_desc, (VOPFUNC)null_access }, - { &vop_lock_desc, (VOPFUNC)null_lock }, - { &vop_unlock_desc, (VOPFUNC)null_unlock }, - { &vop_inactive_desc, (VOPFUNC)null_inactive }, - { &vop_reclaim_desc, (VOPFUNC)null_reclaim }, - { &vop_print_desc, (VOPFUNC)null_print }, - - { &vop_strategy_desc, (VOPFUNC)null_strategy }, - { &vop_bwrite_desc, (VOPFUNC)null_bwrite }, + { &vnop_default_desc, (VOPFUNC)null_bypass }, + + { &vnop_lookup_desc, (VOPFUNC)null_lookup }, + { &vnop_setattr_desc, (VOPFUNC)null_setattr }, + { &vnop_getattr_desc, (VOPFUNC)null_getattr }, + { &vnop_access_desc, (VOPFUNC)null_access }, + { &vnop_inactive_desc, (VOPFUNC)null_inactive }, + { &vnop_reclaim_desc, (VOPFUNC)null_reclaim }, + + { &vnop_strategy_desc, (VOPFUNC)null_strategy }, + { &vnop_bwrite_desc, (VOPFUNC)null_bwrite }, { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; diff --git a/bsd/miscfs/specfs/spec_lockf.c b/bsd/miscfs/specfs/spec_lockf.c deleted file mode 100644 index 105656291..000000000 --- a/bsd/miscfs/specfs/spec_lockf.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Scooter Morris at Genentech Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)spec_lockf.c 8.4 (Berkeley) 10/26/94 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * This variable controls the maximum number of processes that will - * be checked in doing deadlock detection. - */ -int spec_maxlockdepth = MAXDEPTH; - -#ifdef LOCKF_DEBUG -#include -#include -int lockf_debug = 0; -struct ctldebug debug4 = { "lockf_debug", &lockf_debug }; -#endif - -#define NOLOCKF (struct lockf *)0 -#define SELF 0x1 -#define OTHERS 0x2 - -/* - * Set a byte-range lock. - */ -int -spec_lf_setlock(lock) - register struct lockf *lock; -{ - register struct lockf *block; - struct specinfo *sip = lock->lf_specinfo; - struct lockf **prev, *overlap, *ltmp; - static char lockstr[] = "lockf"; - int ovcase, priority, needtolink, error; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - spec_lf_print("lf_setlock", lock); -#endif /* LOCKF_DEBUG */ - - /* - * Set the priority - */ - priority = PLOCK; - if (lock->lf_type == F_WRLCK) - priority += 4; - priority |= PCATCH; - /* - * Scan lock list for this file looking for locks that would block us. - */ - while (block = spec_lf_getblock(lock)) { - /* - * Free the structure and return if nonblocking. - */ - if ((lock->lf_flags & F_WAIT) == 0) { - FREE(lock, M_LOCKF); - return (EAGAIN); - } - /* - * We are blocked. Since flock style locks cover - * the whole file, there is no chance for deadlock. - * For byte-range locks we must check for deadlock. - * - * Deadlock detection is done by looking through the - * wait channels to see if there are any cycles that - * involve us. MAXDEPTH is set just to make sure we - * do not go off into neverland. - */ - if ((lock->lf_flags & F_POSIX) && - (block->lf_flags & F_POSIX)) { - register struct proc *wproc; - register struct lockf *waitblock; - int i = 0; - - /* The block is waiting on something */ - wproc = (struct proc *)block->lf_id; - while (wproc->p_wchan && - (wproc->p_wmesg == lockstr) && - (i++ < spec_maxlockdepth)) { - waitblock = (struct lockf *)wproc->p_wchan; - /* Get the owner of the blocking lock */ - waitblock = waitblock->lf_next; - if ((waitblock->lf_flags & F_POSIX) == 0) - break; - wproc = (struct proc *)waitblock->lf_id; - if (wproc == (struct proc *)lock->lf_id) { - _FREE(lock, M_LOCKF); - return (EDEADLK); - } - } - } - /* - * For flock type locks, we must first remove - * any shared locks that we hold before we sleep - * waiting for an exclusive lock. - */ - if ((lock->lf_flags & F_FLOCK) && - lock->lf_type == F_WRLCK) { - lock->lf_type = F_UNLCK; - (void) spec_lf_clearlock(lock); - lock->lf_type = F_WRLCK; - } - /* - * Add our lock to the blocked list and sleep until we're free. - * Remember who blocked us (for deadlock detection). - */ - lock->lf_next = block; - TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) { - spec_lf_print("lf_setlock: blocking on", block); - spec_lf_printlist("lf_setlock", block); - } -#endif /* LOCKF_DEBUG */ - if (error = tsleep((caddr_t)lock, priority, lockstr, 0)) { - /* - * We may have been awakened by a signal (in - * which case we must remove ourselves from the - * blocked list) and/or by another process - * releasing a lock (in which case we have already - * been removed from the blocked list and our - * lf_next field set to NOLOCKF). - */ - if (lock->lf_next) - TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, - lf_block); - _FREE(lock, M_LOCKF); - return (error); - } - } - /* - * No blocks!! Add the lock. Note that we will - * downgrade or upgrade any overlapping locks this - * process already owns. - * - * Skip over locks owned by other processes. - * Handle any locks that overlap and are owned by ourselves. - */ - prev = &sip->si_lockf; - block = sip->si_lockf; - needtolink = 1; - for (;;) { - if (ovcase = spec_lf_findoverlap(block, lock, SELF, &prev, &overlap)) - block = overlap->lf_next; - /* - * Six cases: - * 0) no overlap - * 1) overlap == lock - * 2) overlap contains lock - * 3) lock contains overlap - * 4) overlap starts before lock - * 5) overlap ends after lock - */ - switch (ovcase) { - case 0: /* no overlap */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap; - } - break; - - case 1: /* overlap == lock */ - /* - * If downgrading lock, others may be - * able to acquire it. - */ - if (lock->lf_type == F_RDLCK && - overlap->lf_type == F_WRLCK) - spec_lf_wakelock(overlap); - overlap->lf_type = lock->lf_type; - FREE(lock, M_LOCKF); - lock = overlap; /* for debug output below */ - break; - - case 2: /* overlap contains lock */ - /* - * Check for common starting point and different types. - */ - if (overlap->lf_type == lock->lf_type) { - _FREE(lock, M_LOCKF); - lock = overlap; /* for debug output below */ - break; - } - if (overlap->lf_start == lock->lf_start) { - *prev = lock; - lock->lf_next = overlap; - overlap->lf_start = lock->lf_end + 1; - } else - spec_lf_split(overlap, lock); - spec_lf_wakelock(overlap); - break; - - case 3: /* lock contains overlap */ - /* - * If downgrading lock, others may be able to - * acquire it, otherwise take the list. - */ - if (lock->lf_type == F_RDLCK && - overlap->lf_type == F_WRLCK) { - spec_lf_wakelock(overlap); - } else { - while (ltmp = overlap->lf_blkhd.tqh_first) { - TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, - lf_block); - TAILQ_INSERT_TAIL(&lock->lf_blkhd, - ltmp, lf_block); - } - } - /* - * Add the new lock if necessary and delete the overlap. - */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap->lf_next; - prev = &lock->lf_next; - needtolink = 0; - } else - *prev = overlap->lf_next; - _FREE(overlap, M_LOCKF); - continue; - - case 4: /* overlap starts before lock */ - /* - * Add lock after overlap on the list. - */ - lock->lf_next = overlap->lf_next; - overlap->lf_next = lock; - overlap->lf_end = lock->lf_start - 1; - prev = &lock->lf_next; - spec_lf_wakelock(overlap); - needtolink = 0; - continue; - - case 5: /* overlap ends after lock */ - /* - * Add the new lock before overlap. - */ - if (needtolink) { - *prev = lock; - lock->lf_next = overlap; - } - overlap->lf_start = lock->lf_end + 1; - spec_lf_wakelock(overlap); - break; - } - break; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) { - spec_lf_print("lf_setlock: got the lock", lock); - spec_lf_printlist("lf_setlock", lock); - } -#endif /* LOCKF_DEBUG */ - return (0); -} - -/* - * Remove a byte-range lock on an specinfo. - * - * Generally, find the lock (or an overlap to that lock) - * and remove it (or shrink it), then wakeup anyone we can. - */ -int -spec_lf_clearlock(unlock) - register struct lockf *unlock; -{ - struct specinfo *sip = unlock->lf_specinfo; - register struct lockf *lf = sip->si_lockf; - struct lockf *overlap, **prev; - int ovcase; - - if (lf == NOLOCKF) - return (0); -#ifdef LOCKF_DEBUG - if (unlock->lf_type != F_UNLCK) - panic("lf_clearlock: bad type"); - if (lockf_debug & 1) - spec_lf_print("lf_clearlock", unlock); -#endif /* LOCKF_DEBUG */ - prev = &sip->si_lockf; - while (ovcase = spec_lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) { - /* - * Wakeup the list of locks to be retried. - */ - spec_lf_wakelock(overlap); - - switch (ovcase) { - - case 1: /* overlap == lock */ - *prev = overlap->lf_next; - FREE(overlap, M_LOCKF); - break; - - case 2: /* overlap contains lock: split it */ - if (overlap->lf_start == unlock->lf_start) { - overlap->lf_start = unlock->lf_end + 1; - break; - } - spec_lf_split(overlap, unlock); - overlap->lf_next = unlock->lf_next; - break; - - case 3: /* lock contains overlap */ - *prev = overlap->lf_next; - lf = overlap->lf_next; - _FREE(overlap, M_LOCKF); - continue; - - case 4: /* overlap starts before lock */ - overlap->lf_end = unlock->lf_start - 1; - prev = &overlap->lf_next; - lf = overlap->lf_next; - continue; - - case 5: /* overlap ends after lock */ - overlap->lf_start = unlock->lf_end + 1; - break; - } - break; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - spec_lf_printlist("lf_clearlock", unlock); -#endif /* LOCKF_DEBUG */ - return (0); -} - -/* - * Check whether there is a blocking lock, - * and if so return its process identifier. - */ -int -spec_lf_getlock(lock, fl) - register struct lockf *lock; - register struct flock *fl; -{ - register struct lockf *block; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 1) - spec_lf_print("lf_getlock", lock); -#endif /* LOCKF_DEBUG */ - - if (block = spec_lf_getblock(lock)) { - fl->l_type = block->lf_type; - fl->l_whence = SEEK_SET; - fl->l_start = block->lf_start; - if (block->lf_end == -1) - fl->l_len = 0; - else - fl->l_len = block->lf_end - block->lf_start + 1; - if (block->lf_flags & F_POSIX) - fl->l_pid = ((struct proc *)(block->lf_id))->p_pid; - else - fl->l_pid = -1; - } else { - fl->l_type = F_UNLCK; - } - return (0); -} - -/* - * Walk the list of locks for an specinfo and - * return the first blocking lock. - */ -struct lockf * -spec_lf_getblock(lock) - register struct lockf *lock; -{ - struct lockf **prev, *overlap, *lf = lock->lf_specinfo->si_lockf; - int ovcase; - - prev = &lock->lf_specinfo->si_lockf; - while (ovcase = spec_lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) { - /* - * We've found an overlap, see if it blocks us - */ - if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) - return (overlap); - /* - * Nope, point to the next one on the list and - * see if it blocks us - */ - lf = overlap->lf_next; - } - return (NOLOCKF); -} - -/* - * Walk the list of locks for an specinfo to - * find an overlapping lock (if any). - * - * NOTE: this returns only the FIRST overlapping lock. There - * may be more than one. - */ -int -spec_lf_findoverlap(lf, lock, type, prev, overlap) - register struct lockf *lf; - struct lockf *lock; - int type; - struct lockf ***prev; - struct lockf **overlap; -{ - off_t start, end; - - *overlap = lf; - if (lf == NOLOCKF) - return (0); -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - spec_lf_print("lf_findoverlap: looking for overlap in", lock); -#endif /* LOCKF_DEBUG */ - start = lock->lf_start; - end = lock->lf_end; - while (lf != NOLOCKF) { - if (((type & SELF) && lf->lf_id != lock->lf_id) || - ((type & OTHERS) && lf->lf_id == lock->lf_id)) { - *prev = &lf->lf_next; - *overlap = lf = lf->lf_next; - continue; - } -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - spec_lf_print("\tchecking", lf); -#endif /* LOCKF_DEBUG */ - /* - * OK, check for overlap - * - * Six cases: - * 0) no overlap - * 1) overlap == lock - * 2) overlap contains lock - * 3) lock contains overlap - * 4) overlap starts before lock - * 5) overlap ends after lock - */ - if ((lf->lf_end != -1 && start > lf->lf_end) || - (end != -1 && lf->lf_start > end)) { - /* Case 0 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("no overlap\n"); -#endif /* LOCKF_DEBUG */ - if ((type & SELF) && end != -1 && lf->lf_start > end) - return (0); - *prev = &lf->lf_next; - *overlap = lf = lf->lf_next; - continue; - } - if ((lf->lf_start == start) && (lf->lf_end == end)) { - /* Case 1 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap == lock\n"); -#endif /* LOCKF_DEBUG */ - return (1); - } - if ((lf->lf_start <= start) && - (end != -1) && - ((lf->lf_end >= end) || (lf->lf_end == -1))) { - /* Case 2 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap contains lock\n"); -#endif /* LOCKF_DEBUG */ - return (2); - } - if (start <= lf->lf_start && - (end == -1 || - (lf->lf_end != -1 && end >= lf->lf_end))) { - /* Case 3 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("lock contains overlap\n"); -#endif /* LOCKF_DEBUG */ - return (3); - } - if ((lf->lf_start < start) && - ((lf->lf_end >= start) || (lf->lf_end == -1))) { - /* Case 4 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap starts before lock\n"); -#endif /* LOCKF_DEBUG */ - return (4); - } - if ((lf->lf_start > start) && - (end != -1) && - ((lf->lf_end > end) || (lf->lf_end == -1))) { - /* Case 5 */ -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - printf("overlap ends after lock\n"); -#endif /* LOCKF_DEBUG */ - return (5); - } - panic("lf_findoverlap: default"); - } - return (0); -} - -/* - * Split a lock and a contained region into - * two or three locks as necessary. - */ -void -spec_lf_split(lock1, lock2) - register struct lockf *lock1; - register struct lockf *lock2; -{ - register struct lockf *splitlock; - -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) { - spec_lf_print("lf_split", lock1); - spec_lf_print("splitting from", lock2); - } -#endif /* LOCKF_DEBUG */ - /* - * Check to see if spliting into only two pieces. - */ - if (lock1->lf_start == lock2->lf_start) { - lock1->lf_start = lock2->lf_end + 1; - lock2->lf_next = lock1; - return; - } - if (lock1->lf_end == lock2->lf_end) { - lock1->lf_end = lock2->lf_start - 1; - lock2->lf_next = lock1->lf_next; - lock1->lf_next = lock2; - return; - } - /* - * Make a new lock consisting of the last part of - * the encompassing lock - */ - MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); - bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock); - splitlock->lf_start = lock2->lf_end + 1; - TAILQ_INIT(&splitlock->lf_blkhd); - lock1->lf_end = lock2->lf_start - 1; - /* - * OK, now link it in - */ - splitlock->lf_next = lock1->lf_next; - lock2->lf_next = splitlock; - lock1->lf_next = lock2; -} - -/* - * Wakeup a blocklist - */ -void -spec_lf_wakelock(listhead) - struct lockf *listhead; -{ - register struct lockf *wakelock; - - while (wakelock = listhead->lf_blkhd.tqh_first) { - TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); - wakelock->lf_next = NOLOCKF; -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) - spec_lf_print("lf_wakelock: awakening", wakelock); -#endif /* LOCKF_DEBUG */ - wakeup((caddr_t)wakelock); - } -} - -#ifdef LOCKF_DEBUG -/* - * Print out a lock. - */ -spec_lf_print(tag, lock) - char *tag; - register struct lockf *lock; -{ - - printf("%s: lock 0x%lx for ", tag, lock); - if (lock->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lock->lf_id))->p_pid); - else - printf("id 0x%x", lock->lf_id); - printf(" on sip %d rdev <%d, %d>, %s, start %d, end %d", - lock->lf_specinfo, - major(lock->lf_specinfo->si_rdev), - minor(lock->lf_specinfo->si_rdev), - lock->lf_type == F_RDLCK ? "shared" : - lock->lf_type == F_WRLCK ? "exclusive" : - lock->lf_type == F_UNLCK ? "unlock" : - "unknown", lock->lf_start, lock->lf_end); - if (lock->lf_blkhd.tqh_first) - printf(" block 0x%x\n", lock->lf_blkhd.tqh_first); - else - printf("\n"); -} - -spec_lf_printlist(tag, lock) - char *tag; - struct lockf *lock; -{ - register struct lockf *lf, *blk; - - printf("%s: Lock list for sip %d on dev <%d, %d>:\n", - tag, lock->lf_specinfo, - major(lock->lf_specinfo->si_dev), - minor(lock->lf_specinfo->si_dev)); - for (lf = lock->lf_specinfo->si_lockf; lf; lf = lf->lf_next) { - printf("\tlock 0x%lx for ", lf); - if (lf->lf_flags & F_POSIX) - printf("proc %d", ((struct proc *)(lf->lf_id))->p_pid); - else - printf("id 0x%x", lf->lf_id); - printf(", %s, start %d, end %d", - lf->lf_type == F_RDLCK ? "shared" : - lf->lf_type == F_WRLCK ? "exclusive" : - lf->lf_type == F_UNLCK ? "unlock" : - "unknown", lf->lf_start, lf->lf_end); - for (blk = lf->lf_blkhd.tqh_first; blk; - blk = blk->lf_block.tqe_next) { - printf("\n\t\tlock request 0x%lx for ", blk); - if (blk->lf_flags & F_POSIX) - printf("proc %d", - ((struct proc *)(blk->lf_id))->p_pid); - else - printf("id 0x%x", blk->lf_id); - printf(", %s, start %d, end %d", - blk->lf_type == F_RDLCK ? "shared" : - blk->lf_type == F_WRLCK ? "exclusive" : - blk->lf_type == F_UNLCK ? "unlock" : - "unknown", blk->lf_start, blk->lf_end); - if (blk->lf_blkhd.tqh_first) - panic("lf_printlist: bad list"); - } - printf("\n"); - } -} -#endif /* LOCKF_DEBUG */ diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 314464b19..7cb75e4cd 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,22 +56,24 @@ */ #include -#include +#include +#include #include #include #include -#include -#include +#include +#include #include -#include +#include #include #include #include #include +#include #include #include +#include #include -#include #include #include @@ -91,70 +93,62 @@ char devcls[] = "devcls"; int (**spec_vnodeop_p)(void *); struct vnodeopv_entry_desc spec_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)err_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)spec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)spec_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)spec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)spec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)nop_lease }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)err_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)err_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)err_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)err_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)err_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)nop_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)nop_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)spec_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)nop_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)err_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)err_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)err_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)nop_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)nop_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)spec_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)err_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)err_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)spec_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)spec_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)spec_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)err_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)err_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)err_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)err_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)err_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)err_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)err_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)err_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)nop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)nop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)spec_bwrite }, /* bwrite */ + { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ + { &vnop_pagein_desc, (VOPFUNC)err_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)err_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)spec_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)spec_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; + +static void set_blocksize(vnode_t, dev_t); + + /* * Trivial lookup routine that always fails. */ int spec_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { @@ -162,10 +156,10 @@ spec_lookup(ap) return (ENOTDIR); } -void +static void set_blocksize(struct vnode *vp, dev_t dev) { - int (*size)(); + int (*size)(dev_t); int rsize; if ((major(dev) < nblkdev) && (size = bdevsw[major(dev)].d_psize)) { @@ -187,10 +181,12 @@ set_fsblocksize(struct vnode *vp) dev_t dev = (dev_t)vp->v_rdev; int maj = major(dev); - if ((u_int)maj >= nblkdev) + if ((u_int)maj >= (u_int)nblkdev) return; + vnode_lock(vp); set_blocksize(vp, dev); + vnode_unlock(vp); } } @@ -199,17 +195,17 @@ set_fsblocksize(struct vnode *vp) /* * Open a special file. */ -/* ARGSUSED */ +int spec_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - struct proc *p = ap->a_p; - struct vnode *bvp, *vp = ap->a_vp; + struct proc *p = vfs_context_proc(ap->a_context); + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct vnode *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; @@ -223,9 +219,9 @@ spec_open(ap) switch (vp->v_type) { case VCHR: - if ((u_int)maj >= nchrdev) + if ((u_int)maj >= (u_int)nchrdev) return (ENXIO); - if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { + if (cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. @@ -239,66 +235,77 @@ spec_open(ap) * currently mounted. */ if (securelevel >= 1) { - if ((bdev = chrtoblk(dev)) != NODEV && - vfinddev(bdev, VBLK, &bvp) && - bvp->v_usecount > 0 && - (error = vfs_mountedon(bvp))) + if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error)) return (error); if (iskmemdev(dev)) return (EPERM); } } - if (cdevsw[maj].d_type == D_TTY) + if (cdevsw[maj].d_type == D_TTY) { + vnode_lock(vp); vp->v_flag |= VISTTY; - VOP_UNLOCK(vp, 0, p); + vnode_unlock(vp); + } error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: - if ((u_int)maj >= nblkdev) + if ((u_int)maj >= (u_int)nblkdev) return (ENXIO); /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ - if (securelevel >= 2 && ap->a_cred != FSCRED && + if (securelevel >= 2 && cred != FSCRED && (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ - if (error = vfs_mountedon(vp)) + if ( (error = vfs_mountedon(vp)) ) return (error); error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p); if (!error) { u_int64_t blkcnt; u_int32_t blksize; + int setsize = 0; + u_int32_t size512 = 512; + + + if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) { + /* Switch to 512 byte sectors (temporarily) */ + if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) { + /* Get the number of 512 byte physical blocks. */ + if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) { + setsize = 1; + } + } + /* If it doesn't set back, we can't recover */ + if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context)) + error = ENXIO; + } + + + vnode_lock(vp); set_blocksize(vp, dev); /* * Cache the size in bytes of the block device for later * use by spec_write(). */ - vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */ - if (!VOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, NOCRED, p)) { - /* Switch to 512 byte sectors (temporarily) */ - u_int32_t size512 = 512; - - if (!VOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, NOCRED, p)) { - /* Get the number of 512 byte physical blocks. */ - if (!VOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, NOCRED, p)) { + if (setsize) vp->v_specdevsize = blkcnt * (u_int64_t)size512; - } - } - /* If it doesn't set back, we can't recover */ - if (VOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, NOCRED, p)) - error = ENXIO; - } + else + vp->v_specdevsize = (u_int64_t)0; /* Default: Can't get */ + + vnode_unlock(vp); + } return(error); + default: + panic("spec_open type"); } return (0); } @@ -306,42 +313,39 @@ spec_open(ap) /* * Vnode op for read */ -/* ARGSUSED */ +int spec_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; struct buf *bp; - daddr_t bn, nextbn; + daddr64_t bn, nextbn; long bsize, bscale; int devBlockSize=0; - int n, on, majordev, (*ioctl)(); + int n, on; int error = 0; dev_t dev; #if DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_read proc"); #endif - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: @@ -361,55 +365,60 @@ spec_read(ap) do { on = uio->uio_offset % bsize; - bn = (uio->uio_offset / devBlockSize) &~ (bscale - 1); + bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1)); - if (vp->v_lastr + bscale == bn) { + if (vp->v_speclastr + bscale == bn) { nextbn = bn + bscale; - error = breadn(vp, bn, (int)bsize, &nextbn, + error = buf_breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else - error = bread(vp, bn, (int)bsize, NOCRED, &bp); + error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp); + + vnode_lock(vp); + vp->v_speclastr = bn; + vnode_unlock(vp); - vp->v_lastr = bn; - n = bsize - bp->b_resid; + n = bsize - buf_resid(bp); if ((on > n) || error) { if (!error) error = EINVAL; - brelse(bp); + buf_brelse(bp); return (error); } - n = min((unsigned)(n - on), uio->uio_resid); + // LP64todo - fix this! + n = min((unsigned)(n - on), uio_resid(uio)); - error = uiomove((char *)bp->b_data + on, n, uio); + error = uiomove((char *)buf_dataptr(bp) + on, n, uio); if (n + on == bsize) - bp->b_flags |= B_AGE; - brelse(bp); - } while (error == 0 && uio->uio_resid > 0 && n != 0); + buf_markaged(bp); + buf_brelse(bp); + } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ + + return (0); } /* * Vnode op for write */ -/* ARGSUSED */ +int spec_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; struct buf *bp; - daddr_t bn; + daddr64_t bn; int bsize, blkmask, bscale; register int io_sync; register int io_size; @@ -421,27 +430,26 @@ spec_write(ap) #if DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: - VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); io_sync = (ap->a_ioflag & IO_SYNC); - io_size = uio->uio_resid; + // LP64todo - fix this! + io_size = uio_resid(uio); dev = (vp->v_rdev); @@ -455,20 +463,21 @@ spec_write(ap) do { - bn = (uio->uio_offset / devBlockSize) &~ blkmask; + bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); on = uio->uio_offset % bsize; - n = min((unsigned)(bsize - on), uio->uio_resid); + // LP64todo - fix this! + n = min((unsigned)(bsize - on), uio_resid(uio)); /* - * Use getblk() as an optimization IFF: + * Use buf_getblk() as an optimization IFF: * * 1) We are reading exactly a block on a block * aligned boundary * 2) We know the size of the device from spec_open * 3) The read doesn't span the end of the device * - * Otherwise, we fall back on bread(). + * Otherwise, we fall back on buf_bread(). */ if (n == bsize && vp->v_specdevsize != (u_int64_t)0 && @@ -478,92 +487,95 @@ spec_write(ap) } if (n == bsize) - bp = getblk(vp, bn, bsize, 0, 0, BLK_WRITE); + bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE); else - error = bread(vp, bn, bsize, NOCRED, &bp); + error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp); /* Translate downstream error for upstream, if needed */ - if (!error) { - error = bp->b_error; - if (!error && (bp->b_flags & B_ERROR) != 0) { - error = EIO; - } - } + if (!error) + error = (int)buf_error(bp); if (error) { - brelse(bp); + buf_brelse(bp); return (error); } - n = min(n, bsize - bp->b_resid); - - error = uiomove((char *)bp->b_data + on, n, uio); + n = min(n, bsize - buf_resid(bp)); - bp->b_flags |= B_AGE; + error = uiomove((char *)buf_dataptr(bp) + on, n, uio); + if (error) { + buf_brelse(bp); + return (error); + } + buf_markaged(bp); if (io_sync) - bwrite(bp); + error = buf_bwrite(bp); else { if ((n + on) == bsize) - bawrite(bp); + error = buf_bawrite(bp); else - bdwrite(bp); + error = buf_bdwrite(bp); } - } while (error == 0 && uio->uio_resid > 0 && n != 0); + } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ + + return (0); } /* * Device ioctl operation. */ -/* ARGSUSED */ +int spec_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { + proc_t p = vfs_context_proc(ap->a_context); dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, ap->a_p)); + ap->a_fflag, p)); case VBLK: - if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) + if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) { if (bdevsw[major(dev)].d_type == D_TAPE) return (0); else return (1); + } return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, ap->a_p)); + ap->a_fflag, p)); default: panic("spec_ioctl"); /* NOTREACHED */ } + return (0); } -/* ARGSUSED */ +int spec_select(ap) - struct vop_select_args /* { + struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void * a_wql; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { + proc_t p = vfs_context_proc(ap->a_context); register dev_t dev; switch (ap->a_vp->v_type) { @@ -573,249 +585,154 @@ spec_select(ap) case VCHR: dev = ap->a_vp->v_rdev; - return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, ap->a_p); + return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_wql, p); } } + /* * Synch buffers associated with a block device */ -/* ARGSUSED */ int -spec_fsync(ap) - struct vop_fsync_args /* { - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct proc *a_p; - } */ *ap; +spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context) { - register struct vnode *vp = ap->a_vp; - register struct buf *bp; - struct buf *nbp; - int s; - if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ -loop: - s = splbio(); - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - // XXXdbg - don't flush locked blocks. they may be journaled. - if ((bp->b_flags & B_BUSY) || (bp->b_flags & B_LOCKED)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("spec_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - bawrite(bp); - goto loop; - } - if (ap->a_waitfor == MNT_WAIT) { - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spec_fsync", 0); - } -#if DIAGNOSTIC - if (vp->v_dirtyblkhd.lh_first) { - vprint("spec_fsync: dirty", vp); - splx(s); - goto loop; - } -#endif - } - splx(s); + buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, (char *)"spec_fsync"); + return (0); } +int +spec_fsync(ap) + struct vnop_fsync_args /* { + struct vnode *a_vp; + int a_waitfor; + vfs_context_t a_context; + } */ *ap; +{ + return spec_fsync_internal(ap->a_vp, ap->a_waitfor, ap->a_context); +} + /* * Just call the device strategy routine */ +extern int hard_throttle_on_root; + + +#define LOWPRI_DELAY_MSECS 200 +#define LOWPRI_WINDOW_MSECS 200 + +int lowpri_IO_window_msecs = LOWPRI_WINDOW_MSECS; +int lowpri_IO_delay_msecs = LOWPRI_DELAY_MSECS; + +struct timeval last_normal_IO_timestamp; +struct timeval last_lowpri_IO_timestamp; +struct timeval lowpri_IO_window = { 0, LOWPRI_WINDOW_MSECS * 1000 }; + +int spec_strategy(ap) - struct vop_strategy_args /* { + struct vnop_strategy_args /* { struct buf *a_bp; } */ *ap; { - struct buf *bp; - extern int hard_throttle_on_root; + buf_t bp; + int bflags; + dev_t bdev; + proc_t p; + struct timeval elapsed; bp = ap->a_bp; + bdev = buf_device(bp); + bflags = buf_flags(bp); if (kdebug_enable) { - int code = 0; + int code = 0; - if (bp->b_flags & B_READ) - code |= DKIO_READ; - if (bp->b_flags & B_ASYNC) - code |= DKIO_ASYNC; + if (bflags & B_READ) + code |= DKIO_READ; + if (bflags & B_ASYNC) + code |= DKIO_ASYNC; - if (bp->b_flags & B_META) - code |= DKIO_META; - else if (bp->b_flags & (B_PGIN | B_PAGEOUT)) - code |= DKIO_PAGING; + if (bflags & B_META) + code |= DKIO_META; + else if (bflags & B_PAGEIO) + code |= DKIO_PAGING; - KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - (unsigned int)bp, bp->b_dev, bp->b_blkno, bp->b_bcount, 0); + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, + (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); } - if ((bp->b_flags & B_PGIN) && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) - hard_throttle_on_root = 1; - - (*bdevsw[major(bp->b_dev)].d_strategy)(bp); - return (0); -} - -/* - * Advisory record locking support - */ -int -spec_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - register struct flock *fl = ap->a_fl; - register struct lockf *lock; - off_t start, end; - int error; - - /* - * Avoid the common case of unlocking when inode has no locks. - */ - if (ap->a_vp->v_specinfo->si_lockf == (struct lockf *)0) { - if (ap->a_op != F_SETLK) { - fl->l_type = F_UNLCK; - return (0); + if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && + (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) + hard_throttle_on_root = 1; + + if ( lowpri_IO_delay_msecs && lowpri_IO_window_msecs ) { + p = current_proc(); + + if ( (p == NULL) || !(p->p_lflag & P_LLOW_PRI_IO)) { + if (!(p->p_lflag & P_LBACKGROUND_IO)) + microuptime(&last_normal_IO_timestamp); + } else { + microuptime(&last_lowpri_IO_timestamp); + + elapsed = last_lowpri_IO_timestamp; + timevalsub(&elapsed, &last_normal_IO_timestamp); + + lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000; + lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000; + + if (timevalcmp(&elapsed, &lowpri_IO_window, <)) { + struct uthread *ut; + + /* + * I'd really like to do the IOSleep here, but + * we may be holding all kinds of filesystem related locks + * and the pages for this I/O marked 'busy'... + * we don't want to cause a normal task to block on + * one of these locks while we're throttling a task marked + * for low priority I/O... we'll mark the uthread and + * do the delay just before we return from the system + * call that triggered this I/O or from vnode_pagein + */ + ut = get_bsdthread_info(current_thread()); + ut->uu_lowpri_delay = lowpri_IO_delay_msecs; + } } } - /* - * Convert the flock structure into a start and end. - */ - switch (fl->l_whence) { + (*bdevsw[major(bdev)].d_strategy)(bp); - case SEEK_SET: - case SEEK_CUR: - /* - * Caller is responsible for adding any necessary offset - * when SEEK_CUR is used. - */ - start = fl->l_start; - break; - - case SEEK_END: - start = ap->a_vp->v_specinfo->si_devsize + fl->l_start; - break; - - default: - return (EINVAL); - } - if (fl->l_len == 0) - end = -1; - else if (fl->l_len > 0) - end = start + fl->l_len - 1; - else { /* l_len is negative */ - end = start - 1; - start += fl->l_len; - } - if (start < 0) - return (EINVAL); - /* - * Create the lockf structure - */ - MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); - lock->lf_start = start; - lock->lf_end = end; - lock->lf_id = ap->a_id; - lock->lf_specinfo = ap->a_vp->v_specinfo; - lock->lf_type = fl->l_type; - lock->lf_next = (struct lockf *)0; - TAILQ_INIT(&lock->lf_blkhd); - lock->lf_flags = ap->a_flags; - /* - * Do the requested operation. - */ - switch(ap->a_op) { - case F_SETLK: - return (spec_lf_setlock(lock)); - - case F_UNLCK: - error = spec_lf_clearlock(lock); - FREE(lock, M_LOCKF); - return (error); - - case F_GETLK: - error = spec_lf_getlock(lock, fl); - FREE(lock, M_LOCKF); - return (error); - - default: - _FREE(lock, M_LOCKF); - return (EINVAL); - } - /* NOTREACHED */ + return (0); } -/* - * This is a noop, simply returning what one has been given. - */ -spec_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - - if (ap->a_vpp != NULL) - *ap->a_vpp = ap->a_vp; - if (ap->a_bnp != NULL) - *ap->a_bnp = ap->a_bn * (PAGE_SIZE / ap->a_vp->v_specsize); - if (ap->a_runp != NULL) - *ap->a_runp = (MAXPHYSIO / PAGE_SIZE) - 1; - return (0); -} /* * This is a noop, simply returning what one has been given. */ -spec_cmap(ap) - struct vop_cmap_args /* { - struct vnode *a_vp; - off_t a_offset; - size_t a_size; - daddr_t *a_bpn; - size_t *a_run; - void *a_poff; - } */ *ap; +int +spec_blockmap(__unused struct vnop_blockmap_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } /* * Device close routine */ -/* ARGSUSED */ +int spec_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; - int (*devclose) __P((dev_t, int, int, struct proc *)); + int (*devclose)(dev_t, int, int, struct proc *); int mode, error; + struct proc *p = vfs_context_proc(ap->a_context); switch (vp->v_type) { @@ -829,17 +746,15 @@ spec_close(ap) * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ - if (vcount(vp) == 2 && ap->a_p && - vp == ap->a_p->p_session->s_ttyvp) { - ap->a_p->p_session->s_ttyvp = NULL; - vrele(vp); + if (vcount(vp) == 2 && p && + vp == p->p_session->s_ttyvp) { + p->p_session->s_ttyvp = NULL; + vnode_rele(vp); } /* - * If the vnode is locked, then we are in the midst - * of forcably closing the device, otherwise we only * close on last reference. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); devclose = cdevsw[major(dev)].d_close; mode = S_IFCHR; @@ -852,33 +767,30 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); - VOP_UNLOCK(vp, 0, ap->a_p); + if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) + return (error); + + error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); /* - * We do not want to really close the device if it - * is still in use unless we are trying to close it - * forcibly. Since every use (buffer, vnode, swap, cmap) + * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); #else /* DEVFS_IMPLEMENTS_LOCKING */ /* - * We do not want to really close the device if it - * is still in use unless we are trying to close it - * forcibly. Since every use (buffer, vnode, swap, cmap) + * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + if (vcount(vp) > 1) return (0); /* @@ -886,7 +798,10 @@ spec_close(ap) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ - error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) + return (error); + + error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); #endif /* DEVFS_IMPLEMENTS_LOCKING */ @@ -898,30 +813,19 @@ spec_close(ap) panic("spec_close: not special"); } - return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); -} - -/* - * Print out the contents of a special device vnode. - */ -spec_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), - minor(ap->a_vp->v_rdev)); + return ((*devclose)(dev, ap->a_fflag, mode, p)); } /* * Return POSIX pathconf information applicable to special devices. */ +int spec_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { @@ -952,7 +856,7 @@ spec_pathconf(ap) int spec_devblocksize(ap) - struct vop_devblocksize_args /* { + struct vnop_devblocksize_args /* { struct vnode *a_vp; int *a_retval; } */ *ap; @@ -964,7 +868,8 @@ spec_devblocksize(ap) /* * Special device failed operation */ -spec_ebadf() +int +spec_ebadf(__unused void *dummy) { return (EBADF); @@ -973,6 +878,7 @@ spec_ebadf() /* * Special device bad operation */ +int spec_badop() { @@ -983,9 +889,9 @@ spec_badop() /* Blktooff derives file offset from logical block number */ int spec_blktooff(ap) - struct vop_blktooff_args /* { + struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */ *ap; { @@ -994,42 +900,46 @@ spec_blktooff(ap) switch (vp->v_type) { case VCHR: *ap->a_offset = (off_t)-1; /* failure */ - return (EOPNOTSUPP); + return (ENOTSUP); case VBLK: printf("spec_blktooff: not implemented for VBLK\n"); *ap->a_offset = (off_t)-1; /* failure */ - return (EOPNOTSUPP); + return (ENOTSUP); default: panic("spec_blktooff type"); } /* NOTREACHED */ + + return (0); } /* Offtoblk derives logical block number from file offset */ int spec_offtoblk(ap) - struct vop_offtoblk_args /* { + struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */ *ap; { register struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VCHR: - *ap->a_lblkno = (daddr_t)-1; /* failure */ - return (EOPNOTSUPP); + *ap->a_lblkno = (daddr64_t)-1; /* failure */ + return (ENOTSUP); case VBLK: printf("spec_offtoblk: not implemented for VBLK\n"); - *ap->a_lblkno = (daddr_t)-1; /* failure */ - return (EOPNOTSUPP); + *ap->a_lblkno = (daddr64_t)-1; /* failure */ + return (ENOTSUP); default: panic("spec_offtoblk type"); } /* NOTREACHED */ + + return (0); } diff --git a/bsd/miscfs/specfs/specdev.h b/bsd/miscfs/specfs/specdev.h index b90c72a6e..61c340ac1 100644 --- a/bsd/miscfs/specfs/specdev.h +++ b/bsd/miscfs/specfs/specdev.h @@ -68,15 +68,14 @@ * special devices. It is allocated in checkalias and freed * in vgone. */ -struct lockf; struct specinfo { struct vnode **si_hashchain; struct vnode *si_specnext; long si_flags; dev_t si_rdev; - daddr_t si_size; /* device block size in bytes */ - u_int64_t si_devsize; /* actual device size in bytes */ - struct lockf *si_lockf; /* head of advisory lock list */ + daddr_t si_size; /* device block size in bytes */ + daddr64_t si_lastr; /* last read blkno (read-ahead) */ + u_int64_t si_devsize; /* actual device size in bytes */ }; /* * Exported shorthand @@ -87,6 +86,7 @@ struct specinfo { #define v_specflags v_specinfo->si_flags #define v_specsize v_specinfo->si_size #define v_specdevsize v_specinfo->si_devsize +#define v_speclastr v_specinfo->si_lastr /* * Flags for specinfo @@ -116,56 +116,48 @@ struct flock; struct buf; struct uio; -int spec_ebadf(); +int spec_ebadf(void *); -int spec_lookup __P((struct vop_lookup_args *)); -#define spec_create ((int (*) __P((struct vop_access_args *)))err_create) -#define spec_mknod ((int (*) __P((struct vop_access_args *)))err_mknod) -int spec_open __P((struct vop_open_args *)); -int spec_close __P((struct vop_close_args *)); -#define spec_access ((int (*) __P((struct vop_access_args *)))spec_ebadf) -#define spec_getattr ((int (*) __P((struct vop_getattr_args *)))spec_ebadf) -#define spec_setattr ((int (*) __P((struct vop_setattr_args *)))spec_ebadf) -int spec_read __P((struct vop_read_args *)); -int spec_write __P((struct vop_write_args *)); -#define spec_lease_check ((int (*) __P((struct vop_access_args *)))nop_lease) -int spec_ioctl __P((struct vop_ioctl_args *)); -int spec_select __P((struct vop_select_args *)); -#define spec_revoke ((int (*) __P((struct vop_access_args *)))nop_revoke) -#define spec_mmap ((int (*) __P((struct vop_access_args *)))err_mmap) -int spec_fsync __P((struct vop_fsync_args *)); -#define spec_seek ((int (*) __P((struct vop_access_args *)))err_seek) -#define spec_remove ((int (*) __P((struct vop_access_args *)))err_remove) -#define spec_link ((int (*) __P((struct vop_access_args *)))err_link) -#define spec_rename ((int (*) __P((struct vop_access_args *)))err_rename) -#define spec_mkdir ((int (*) __P((struct vop_access_args *)))err_mkdir) -#define spec_rmdir ((int (*) __P((struct vop_access_args *)))err_rmdir) -#define spec_symlink ((int (*) __P((struct vop_access_args *)))err_symlink) -#define spec_readdir ((int (*) __P((struct vop_access_args *)))err_readdir) -#define spec_readlink ((int (*) __P((struct vop_access_args *)))err_readlink) -#define spec_abortop ((int (*) __P((struct vop_access_args *)))err_abortop) -#define spec_inactive ((int (*) __P((struct vop_access_args *)))nop_inactive) -#define spec_reclaim ((int (*) __P((struct vop_access_args *)))nop_reclaim) -#define spec_lock ((int (*) __P((struct vop_access_args *)))nop_lock) -#define spec_unlock ((int (*) __P((struct vop_access_args *)))nop_unlock) -int spec_bmap __P((struct vop_bmap_args *)); -int spec_strategy __P((struct vop_strategy_args *)); -int spec_print __P((struct vop_print_args *)); -#define spec_islocked ((int (*) __P((struct vop_access_args *)))nop_islocked) -int spec_pathconf __P((struct vop_pathconf_args *)); -int spec_advlock __P((struct vop_advlock_args *)); -#define spec_blkatoff ((int (*) __P((struct vop_access_args *)))err_blkatoff) -#define spec_valloc ((int (*) __P((struct vop_access_args *)))err_valloc) -#define spec_vfree ((int (*) __P((struct vop_access_args *)))err_vfree) -#define spec_truncate ((int (*) __P((struct vop_access_args *)))nop_truncate) -#define spec_update ((int (*) __P((struct vop_access_args *)))nop_update) -#define spec_reallocblks \ - ((int (*) __P((struct vop_reallocblks_args *)))err_reallocblks) -#define spec_bwrite ((int (*) __P((struct vop_bwrite_args *)))nop_bwrite) -int spec_devblocksize __P((struct vop_devblocksize_args *)); -int spec_blktooff __P((struct vop_blktooff_args *)); -int spec_offtoblk __P((struct vop_offtoblk_args *)); -int spec_cmap __P((struct vop_cmap_args *)); +int spec_lookup (struct vnop_lookup_args *); +#define spec_create (int (*) (struct vnop_access_args *))err_create +#define spec_mknod (int (*) (struct vnop_access_args *))err_mknod +int spec_open (struct vnop_open_args *); +int spec_close (struct vnop_close_args *); +#define spec_access (int (*) (struct vnop_access_args *))spec_ebadf +#define spec_getattr (int (*) (struct vnop_getattr_args *))spec_ebadf +#define spec_setattr (int (*) (struct vnop_setattr_args *))spec_ebadf +int spec_read (struct vnop_read_args *); +int spec_write (struct vnop_write_args *); +int spec_ioctl (struct vnop_ioctl_args *); +int spec_select (struct vnop_select_args *); +#define spec_revoke (int (*) (struct vnop_access_args *))nop_revoke +#define spec_mmap (int (*) (struct vnop_access_args *))err_mmap +int spec_fsync (struct vnop_fsync_args *); +int spec_fsync_internal (vnode_t, int, vfs_context_t); +#define spec_remove (int (*) (struct vnop_access_args *))err_remove +#define spec_link (int (*) (struct vnop_access_args *))err_link +#define spec_rename (int (*) (struct vnop_access_args *))err_rename +#define spec_mkdir (int (*) (struct vnop_access_args *))err_mkdir +#define spec_rmdir (int (*) (struct vnop_access_args *))err_rmdir +#define spec_symlink (int (*) (struct vnop_access_args *))err_symlink +#define spec_readdir (int (*) (struct vnop_access_args *))err_readdir +#define spec_readlink (int (*) (struct vnop_access_args *))err_readlink +#define spec_inactive (int (*) (struct vnop_access_args *))nop_inactive +#define spec_reclaim (int (*) (struct vnop_access_args *))nop_reclaim +#define spec_lock (int (*) (struct vnop_access_args *))nop_lock +#define spec_unlock (int (*)(struct vnop_access_args *))nop_unlock +int spec_strategy (struct vnop_strategy_args *); +#define spec_islocked (int (*) (struct vnop_access_args *))nop_islocked +int spec_pathconf (struct vnop_pathconf_args *); +#define spec_advlock (int (*) (struct vnop_access_args *))err_advlock +#define spec_blkatoff (int (*) (struct vnop_access_args *))err_blkatoff +#define spec_valloc (int (*) (struct vnop_access_args *))err_valloc +#define spec_vfree (int (*) (struct vnop_access_args *))err_vfree +#define spec_bwrite (int (*) (struct vnop_bwrite_args *))nop_bwrite +int spec_devblocksize (struct vnop_devblocksize_args *); +int spec_blktooff (struct vnop_blktooff_args *); +int spec_offtoblk (struct vnop_offtoblk_args *); +int spec_blockmap (struct vnop_blockmap_args *); #endif /* __APPLE_API_PRIVATE */ #endif /* _MISCFS_SPECFS_SPECDEV_H_ */ diff --git a/bsd/miscfs/synthfs/synthfs.h b/bsd/miscfs/synthfs/synthfs.h index 28a85f8b2..cb152a45e 100644 --- a/bsd/miscfs/synthfs/synthfs.h +++ b/bsd/miscfs/synthfs/synthfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -97,7 +97,6 @@ struct synthfsnode struct synthfsnode *s_parent; struct vnode *s_vp; char *s_name; - struct lock__bsd__ s_lock; unsigned long s_nodeflags; /* Internal synthfs flags: IN_CHANGED, IN_MODIFIED, etc. */ unsigned long s_pflags; /* File system flags: IMMUTABLE, etc. */ unsigned long s_nodeid; @@ -146,7 +145,10 @@ struct synthfsnode (sp)->s_modificationtime = *(t2); \ } \ if ((sp)->s_nodeflags & IN_CHANGE) { \ - (sp)->s_changetime = time; \ + struct timeval _tv; \ + \ + microtime(&_tv); \ + (sp)->s_changetime = _tv; \ }; \ (sp)->s_nodeflags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ @@ -182,54 +184,49 @@ struct synthfsnode extern int (**synthfs_vnodeop_p)(void *); __BEGIN_DECLS -int synthfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); -int synthfs_start __P((struct mount *, int, struct proc *)); -int synthfs_unmount __P((struct mount *, int, struct proc *)); -int synthfs_root __P((struct mount *, struct vnode **)); -int synthfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); -int synthfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -int synthfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int synthfs_vget __P((struct mount *, void *ino, struct vnode **)); -int synthfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); -int synthfs_vptofh __P((struct vnode *, struct fid *)); -int synthfs_init __P((struct vfsconf *)); -int synthfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); - -int synthfs_create __P((struct vop_create_args *)); -int synthfs_open __P((struct vop_open_args *)); -int synthfs_mmap __P((struct vop_mmap_args *)); -int synthfs_access __P((struct vop_access_args *)); -int synthfs_getattr __P((struct vop_getattr_args *)); -int synthfs_setattr __P((struct vop_setattr_args *)); -int synthfs_rename __P((struct vop_rename_args *)); -int synthfs_select __P((struct vop_select_args *)); -int synthfs_remove __P((struct vop_remove_args *)); -int synthfs_mkdir __P((struct vop_mkdir_args *)); -int synthfs_rmdir __P((struct vop_rmdir_args *)); -int synthfs_symlink __P((struct vop_symlink_args *)); -int synthfs_readlink __P((struct vop_readlink_args *)); -int synthfs_readdir __P((struct vop_readdir_args *)); -int synthfs_cached_lookup __P((struct vop_cachedlookup_args *)); -int synthfs_lookup __P((struct vop_cachedlookup_args *)); -int synthfs_pathconf __P((struct vop_pathconf_args *)); -int synthfs_update __P((struct vop_update_args *)); +int synthfs_mount (struct mount *, vnode_t, user_addr_t, vfs_context_t context); +int synthfs_start (struct mount *, int, vfs_context_t context); +int synthfs_unmount (struct mount *, int, vfs_context_t context); +int synthfs_root (struct mount *, struct vnode **, vfs_context_t context); +int synthfs_vfs_getattr (mount_t mp, struct vfs_attr *fsap, vfs_context_t context); +int synthfs_sync (struct mount *, int, vfs_context_t context); +int synthfs_vget (struct mount *, ino64_t ino, struct vnode **, vfs_context_t context); +int synthfs_fhtovp (struct mount *, int, unsigned char *, struct vnode **, vfs_context_t context); +int synthfs_vptofh (struct vnode *, int *, unsigned char *, vfs_context_t context); +int synthfs_init (struct vfsconf *); +int synthfs_sysctl (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t context); + +int synthfs_create (struct vnop_create_args *); +int synthfs_open (struct vnop_open_args *); +int synthfs_mmap (struct vnop_mmap_args *); +int synthfs_getattr (struct vnop_getattr_args *); +int synthfs_setattr (struct vnop_setattr_args *); +int synthfs_rename (struct vnop_rename_args *); +int synthfs_select (struct vnop_select_args *); +int synthfs_remove (struct vnop_remove_args *); +int synthfs_mkdir (struct vnop_mkdir_args *); +int synthfs_rmdir (struct vnop_rmdir_args *); +int synthfs_symlink (struct vnop_symlink_args *); +int synthfs_readlink (struct vnop_readlink_args *); +int synthfs_readdir (struct vnop_readdir_args *); +int synthfs_cached_lookup (struct vnop_lookup_args *); +int synthfs_lookup (struct vnop_lookup_args *); +int synthfs_pathconf (struct vnop_pathconf_args *); -int synthfs_lock __P((struct vop_lock_args *)); -int synthfs_unlock __P((struct vop_unlock_args *)); -int synthfs_islocked __P((struct vop_islocked_args *)); - -int synthfs_inactive __P((struct vop_inactive_args*)); -int synthfs_reclaim __P((struct vop_reclaim_args*)); - -void synthfs_setupuio __P((struct iovec *iov, struct uio *uio, void *buffer, size_t bufsize, enum uio_seg space, enum uio_rw direction, struct proc *p)); -int synthfs_new_directory __P((struct mount *mp, struct vnode *dp, const char *name, unsigned long nodeid, mode_t mode, struct proc *p, struct vnode **vpp)); -int synthfs_new_symlink __P((struct mount *mp, struct vnode *dp, const char *name, unsigned long nodeid, char *targetstring, struct proc *p, struct vnode **vpp)); -long synthfs_adddirentry __P((u_int32_t fileno, u_int8_t type, const char *name, struct uio *uio)); -int synthfs_remove_entry __P((struct vnode *vp)); -int synthfs_remove_directory __P((struct vnode *vp)); -int synthfs_remove_symlink __P((struct vnode *vp)); -int synthfs_move_rename_entry __P((struct vnode *source_vp, struct vnode *newparent_vp, char *newname)); -int synthfs_derive_vnode_path __P((struct vnode *vp, char *vnpath, size_t pathbuffersize)); + +int synthfs_inactive (struct vnop_inactive_args*); +int synthfs_reclaim (struct vnop_reclaim_args*); + +void synthfs_setupuio (struct iovec *iov, struct uio *uio, void *buffer, size_t bufsize, enum uio_seg space, enum uio_rw direction, proc_t p); +int synthfs_new_directory (mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, mode_t mode, proc_t p, vnode_t *vpp); +int synthfs_new_symlink (mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, char *targetstring, proc_t p, vnode_t *vpp); +long synthfs_adddirentry (u_int32_t fileno, u_int8_t type, const char *name, struct uio *uio); +int synthfs_remove_entry (struct vnode *vp); +int synthfs_remove_directory (struct vnode *vp); +int synthfs_remove_symlink (struct vnode *vp); +int synthfs_move_rename_entry (struct vnode *source_vp, struct vnode *newparent_vp, char *newname); +int synthfs_derive_vnode_path (struct vnode *vp, char *vnpath, size_t pathbuffersize); +int synthfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor); #endif /* __APPLE_API_PRIVATE */ #endif /* __SYNTHFS_H__ */ diff --git a/bsd/miscfs/synthfs/synthfs_util.c b/bsd/miscfs/synthfs/synthfs_util.c index 37ec7cde4..d28e6ec5d 100644 --- a/bsd/miscfs/synthfs/synthfs_util.c +++ b/bsd/miscfs/synthfs/synthfs_util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,16 +34,16 @@ #include #include #include -#include #include #include -#include -#include +#include +#include #include #include #include #include #include +#include #include #include @@ -62,25 +62,6 @@ struct synthfs_direntry_head { #define PATHSEPARATOR '/' #define ROOTDIRID 2 -void synthfs_setupuio(struct iovec *iov, - struct uio *uio, - void *buffer, - size_t bufsize, - enum uio_seg space, - enum uio_rw direction, - struct proc *p) { - iov->iov_base = (char *)buffer; - iov->iov_len = bufsize; - - uio->uio_iov = iov; - uio->uio_iovcnt = 1; - uio->uio_offset = 0; - uio->uio_resid = bufsize; - uio->uio_segflg = space; - uio->uio_rw = direction; - uio->uio_procp = p; -} - static int synthfs_insertnode(struct synthfsnode *newnode_sp, struct synthfsnode *parent_sp) { struct timeval now; @@ -91,25 +72,25 @@ static int synthfs_insertnode(struct synthfsnode *newnode_sp, struct synthfsnode ++parent_sp->s_u.d.d_entrycount; newnode_sp->s_parent = parent_sp; - parent_sp->s_nodeflags |= IN_CHANGE | IN_MODIFIED; - now = time; - VOP_UPDATE(STOV(parent_sp), &now, &now, 0); + parent_sp->s_nodeflags |= IN_CHANGE | IN_MODIFIED; + microtime(&now); + synthfs_update(STOV(parent_sp), &now, &now, 0); return 0; } -static int synthfs_newnode(struct mount *mp, struct vnode *dp, const char *name, unsigned long nodeid, mode_t mode, struct proc *p, struct vnode **vpp) { +static int synthfs_newnode(mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, + mode_t mode, __unused proc_t p, enum vtype vtype, vnode_t *vpp) { int result; struct synthfsnode *sp; struct vnode *vp; struct timeval now; char *nodename; + struct vnode_fsparam vfsp; - /* Allocate the synthfsnode now to avoid blocking between the call - to getnewvnode(), below, and the initialization of v_data: */ - MALLOC(sp, struct synthfsnode *, sizeof(struct synthfsnode), M_SYNTHFS, M_WAITOK); + MALLOC(sp, struct synthfsnode *, sizeof(struct synthfsnode), M_SYNTHFS, M_WAITOK); if (name == NULL) { MALLOC(nodename, char *, 1, M_TEMP, M_WAITOK); @@ -119,31 +100,12 @@ static int synthfs_newnode(struct mount *mp, struct vnode *dp, const char *name, strcpy(nodename, name); }; - /* - Note that getnewvnode() returns the vnode with a refcount of +1; - this routine returns the newly created vnode with this positive refcount. - */ - result = getnewvnode(VT_SYNTHFS, mp, synthfs_vnodeop_p, &vp); - if (result != 0) { - DBG_VOP(("getnewvnode failed with error code %d\n", result)); - FREE(nodename, M_TEMP); - FREE(sp, M_TEMP); - return result; - } - if (vp == NULL) { - DBG_VOP(("getnewvnod returned NULL without an error!\n")); - FREE(nodename, M_TEMP); - FREE(sp, M_TEMP); - return EINVAL; - } - /* Initialize the relevant synthfsnode fields: */ bzero(sp, sizeof(*sp)); - lockinit(&sp->s_lock, PINOD, "synthfsnode", 0, 0); sp->s_nodeid = nodeid; /* Initialize all times from a consistent snapshot of the clock: */ - now = time; + microtime(&now); sp->s_createtime = now; sp->s_accesstime = now; sp->s_modificationtime = now; @@ -151,11 +113,32 @@ static int synthfs_newnode(struct mount *mp, struct vnode *dp, const char *name, sp->s_name = nodename; sp->s_mode = mode; + + //bzero(&vfsp, sizeof(struct vnode_fsparam)); + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "synthfs"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = sp; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = synthfs_vnodeop_p; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = 0; + + result = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); + if (result != 0) { + DBG_VOP(("getnewvnode failed with error code %d\n", result)); + FREE(nodename, M_TEMP); + FREE(sp, M_TEMP); + return result; + } + vnode_ref(vp); + sp->s_vp = vp; - vp->v_data = sp; - vget(vp, LK_EXCLUSIVE, p); - /* If there's a parent directory, update its subnode structures to insert this new node: */ if (dp) { result = synthfs_insertnode(sp, VTOS(dp)); @@ -178,8 +161,8 @@ int synthfs_remove_entry(struct vnode *vp) { --psp->s_u.d.d_entrycount; psp->s_nodeflags |= IN_CHANGE | IN_MODIFIED; - now = time; - VOP_UPDATE(STOV(psp), &now, &now, 0); + microtime(&now); + synthfs_update(STOV(psp), &now, &now, 0); }; return 0; @@ -219,15 +202,13 @@ int synthfs_new_directory(struct mount *mp, struct vnode *dp, const char *name, struct vnode *vp; struct synthfsnode *sp; - result = synthfs_newnode(mp, dp, name, nodeid, mode, p, &vp); + result = synthfs_newnode(mp, dp, name, nodeid, mode, p, VDIR, &vp); if (result) { return result; }; sp = VTOS(vp); sp->s_linkcount = 2; - /* Initialize the relevant vnode fields: */ - vp->v_type = VDIR; if (dp) { ++VTOS(dp)->s_linkcount; /* Account for the [fictitious] ".." link */ }; @@ -251,6 +232,7 @@ int synthfs_remove_directory(struct vnode *vp) { if (psp && (sp->s_type == SYNTHFS_DIRECTORY) && (psp != sp)) { --psp->s_linkcount; /* account for the [fictitious] ".." link now removed */ }; + vnode_rele(vp); /* Do the standard cleanup involved in pruning an entry from the filesystem: */ return synthfs_remove_entry(vp); /* Do whatever standard cleanup is required */ @@ -271,16 +253,13 @@ int synthfs_new_symlink( struct vnode *vp; struct synthfsnode *sp; - result = synthfs_newnode(mp, dp, name, nodeid, 0, p, &vp); + result = synthfs_newnode(mp, dp, name, nodeid, 0, p, VLNK, &vp); if (result) { return result; }; sp = VTOS(vp); sp->s_linkcount = 1; - /* Initialize the relevant vnode fields: */ - vp->v_type = VLNK; - /* Set up the symlink-specific fields: */ sp->s_type = SYNTHFS_SYMLINK; sp->s_u.s.s_length = strlen(targetstring); @@ -298,6 +277,7 @@ int synthfs_remove_symlink(struct vnode *vp) { struct synthfsnode *sp = VTOS(vp); FREE(sp->s_u.s.s_symlinktarget, M_TEMP); + vnode_rele(vp); /* Do the standard cleanup involved in pruning an entry from the filesystem: */ return synthfs_remove_entry(vp); /* Do whatever standard cleanup is required */ @@ -324,7 +304,7 @@ long synthfs_adddirentry(u_int32_t fileno, u_int8_t type, const char *name, stru direntry.d_type = type; direntry.d_namlen = namelength; - if (uio->uio_resid < direntry.d_reclen) { + if (uio_resid(uio) < direntry.d_reclen) { direntrylength = 0; } else { uiomove((caddr_t)(&direntry), sizeof(direntry), uio); diff --git a/bsd/miscfs/synthfs/synthfs_vfsops.c b/bsd/miscfs/synthfs/synthfs_vfsops.c index 530239f61..39e8d6a6c 100644 --- a/bsd/miscfs/synthfs/synthfs_vfsops.c +++ b/bsd/miscfs/synthfs/synthfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,13 +31,12 @@ #include #include #include -#include +#include #include #include -#include +#include #include -#include -#include +#include #include #include #include @@ -45,6 +44,7 @@ #include #include #include +#include #include @@ -59,8 +59,8 @@ struct vfsops synthfs_vfsops = { synthfs_start, synthfs_unmount, synthfs_root, - synthfs_quotactl, - synthfs_statfs, + NULL, /* quotactl */ + synthfs_vfs_getattr, synthfs_sync, synthfs_vget, synthfs_fhtovp, @@ -71,7 +71,7 @@ struct vfsops synthfs_vfsops = { #define ROOTMPMODE 0755 #define ROOTPLACEHOLDERMODE 0700 -static char synthfs_fs_name[MFSNAMELEN] = "synthfs"; +static char synthfs_fs_name[MFSTYPENAMELEN] = "synthfs"; static char synthfs_fake_mntfromname[] = ""; @@ -91,130 +91,14 @@ int vn_symlink(struct proc *p, char *path, char *link); #if LOADABLE_FS void synthfs_load(int loadArgument) { - struct vfsconf *newvfsconf = NULL; - int j; - int (***opv_desc_vector_p)() = NULL; - int (**opv_desc_vector)(); - struct vnodeopv_entry_desc *opve_descp; - int error = 0; - -#pragma unused(loadArgument) - - /* - * This routine is responsible for all the initialization that would - * ordinarily be done as part of the system startup; it calls synthfs_init - * to do the initialization that is strictly synthfs-specific. - */ - - DBG_VOP(("load_synthfs: starting ...\n")); - - MALLOC(newvfsconf, void *, sizeof(struct vfsconf), M_SYNTHFS, M_WAITOK); - DBG_VOP(("load_synthfs: Allocated new vfsconf list entry, newvfsconf = 0x%08lx.\n", (unsigned long)newvfsconf)); - bzero(newvfsconf, sizeof(struct vfsconf)); - - if (newvfsconf) { - DBG_VOP(("load_synthfs: filling in newly allocated vfsconf entry at 0x%08lX.\n", (long)newvfsconf)); - newvfsconf->vfc_vfsops = &synthfs_vfsops; - strncpy(&newvfsconf->vfc_name[0], synthfs_fs_name, MFSNAMELEN); - newvfsconf->vfc_typenum = maxvfsconf++; - newvfsconf->vfc_refcount = 0; - newvfsconf->vfc_flags = 0; - newvfsconf->vfc_mountroot = NULL; /* Can't mount root of file system [yet] */ - - newvfsconf->vfc_next = NULL; - - /* Based on vfs_op_init and ... */ - opv_desc_vector_p = synthfs_vnodeop_opv_desc.opv_desc_vector_p; - - DBG_VOP(("load_synthfs: Allocating and initializing VNode ops vector...\n")); - - /* - * Allocate and init the vector. - * Also handle backwards compatibility. - */ - - MALLOC(*opv_desc_vector_p, PFI *, vfs_opv_numops*sizeof(PFI), M_SYNTHFS, M_WAITOK); - bzero (*opv_desc_vector_p, vfs_opv_numops*sizeof(PFI)); - opv_desc_vector = *opv_desc_vector_p; - for (j=0; synthfs_vnodeop_opv_desc.opv_desc_ops[j].opve_op; j++) { - opve_descp = &(synthfs_vnodeop_opv_desc.opv_desc_ops[j]); - - /* - * Sanity check: is this operation listed - * in the list of operations? We check this - * by seeing if its offest is zero. Since - * the default routine should always be listed - * first, it should be the only one with a zero - * offset. Any other operation with a zero - * offset is probably not listed in - * vfs_op_descs, and so is probably an error. - * - * A panic here means the layer programmer - * has committed the all-too common bug - * of adding a new operation to the layer's - * list of vnode operations but - * not adding the operation to the system-wide - * list of supported operations. - */ - if (opve_descp->opve_op->vdesc_offset == 0 && - opve_descp->opve_op->vdesc_offset != VOFFSET(vop_default)) { - DBG_VOP(("load_synthfs: operation %s not listed in %s.\n", - opve_descp->opve_op->vdesc_name, - "vfs_op_descs")); - panic ("load_synthfs: bad operation"); - } - /* - * Fill in this entry. - */ - opv_desc_vector[opve_descp->opve_op->vdesc_offset] = - opve_descp->opve_impl; - } - - /* - * Finally, go back and replace unfilled routines - * with their default. (Sigh, an O(n^3) algorithm. I - * could make it better, but that'd be work, and n is small.) - */ - opv_desc_vector_p = synthfs_vnodeop_opv_desc.opv_desc_vector_p; - - /* - * Force every operations vector to have a default routine. - */ - opv_desc_vector = *opv_desc_vector_p; - if (opv_desc_vector[VOFFSET(vop_default)]==NULL) { - panic("load_vp;fs: operation vector without default routine."); - } - for (j = 0;jmnt_stat.f_fstypename, synthfs_fs_name, sizeof(mp->mnt_stat.f_fstypename)); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname) - 1, &size); - strncpy(mp->mnt_stat.f_mntfromname, synthfs_fake_mntfromname, sizeof(mp->mnt_stat.f_mntfromname)); + strncpy(mp->mnt_vfsstat.f_fstypename, synthfs_fs_name, sizeof(mp->mnt_vfsstat.f_fstypename)); + strncpy(mp->mnt_vfsstat.f_mntfromname, synthfs_fake_mntfromname, sizeof(mp->mnt_vfsstat.f_mntfromname)); priv_mnt_data->synthfs_mounteddev = (dev_t)0; priv_mnt_data->synthfs_nextid = FIRST_SYNTHFS_ID; priv_mnt_data->synthfs_filecount = 0; @@ -263,7 +146,7 @@ synthfs_mount_fs(struct mount *mp, char *path, caddr_t data, struct nameidata *n /* Drop the freshly acquired reference on the root, leaving v_usecount=1 to prevent the vnode from beeing freed: */ - vput(priv_mnt_data->synthfs_rootvp); + vnode_put(priv_mnt_data->synthfs_rootvp); return (0); } @@ -271,17 +154,15 @@ synthfs_mount_fs(struct mount *mp, char *path, caddr_t data, struct nameidata *n int -synthfs_mount(mp, path, data, ndp, p) +synthfs_mount(mp, devvp, data, context) register struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; + vnode_t devvp; + user_addr_t data; + vfs_context_t context; { size_t size; - (void) copyinstr(path, mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname) - 1, &size); - return (synthfs_mount_fs(mp, path, data, ndp, p)); + return (synthfs_mount_fs(mp, devvp, data, vfs_context_proc(context))); } @@ -301,10 +182,10 @@ synthfs_init(vfsp) } int -synthfs_start(mp, flags, p) +synthfs_start(mp, flags, context) struct mount * mp; int flags; -struct proc * p; +vfs_context_t context; { DBG_VOP(("synthfs_start called.\n")); return 0; @@ -314,38 +195,27 @@ struct proc * p; * Return the root of a filesystem. */ int -synthfs_root(mp, vpp) +synthfs_root(mp, vpp, context) struct mount *mp; struct vnode **vpp; + vfs_context_t context; { unsigned long root_nodeid = ROOT_DIRID; DBG_VOP(("synthfs_root called.\n")); *vpp = VFSTOSFS(mp)->synthfs_rootvp; - return vget(VFSTOSFS(mp)->synthfs_rootvp, LK_EXCLUSIVE | LK_RETRY, current_proc()); -} - -int -synthfs_quotactl(mp, cmds, uid, arg, p) -struct mount *mp; -int cmds; -uid_t uid; -caddr_t arg; -struct proc * p; -{ - DBG_VOP(("synthfs_quotactl called.\n")); - return (0); + return vnode_get(VFSTOSFS(mp)->synthfs_rootvp); } /* * unmount system call */ int -synthfs_unmount(mp, mntflags, p) +synthfs_unmount(mp, mntflags, context) struct mount *mp; int mntflags; - struct proc *p; + vfs_context_t context; { struct synthfs_mntdata *synth; struct vnode *root_vp; @@ -359,16 +229,13 @@ synthfs_unmount(mp, mntflags, p) if (retval && ((mntflags & MNT_FORCE) == 0)) goto Err_Exit; /* Free the root vnode. - Note that there's no need to vget() or vref() it before locking it here: the ref. count has been maintained at +1 ever since mount time. */ if (root_vp) { - retval = vn_lock(root_vp, LK_EXCLUSIVE | LK_RETRY, p); if ((mntflags & MNT_FORCE) == 0) { if (retval) goto Err_Exit; if (root_vp->v_usecount > 1) { DBG_VOP(("synthfs ERROR: root vnode = %x, usecount = %d\n", (int)root_vp, synth->synthfs_rootvp->v_usecount)); - VOP_UNLOCK(root_vp, 0, p); retval = EBUSY; goto Err_Exit; }; @@ -377,8 +244,10 @@ synthfs_unmount(mp, mntflags, p) synth->synthfs_rootvp = NULL; if (retval == 0) { - vput(root_vp); /* This drops synthfs's own refcount */ - vgone(root_vp); + vnode_get(root_vp); + vnode_rele(root_vp); + vnode_recycle(root_vp); + vnode_put(root_vp); /* This drops synthfs's own refcount */ }; }; @@ -398,24 +267,22 @@ Err_Exit: * Get file system statistics. */ int -synthfs_statfs(mp, sbp, p) - struct mount *mp; - register struct statfs *sbp; - struct proc *p; +synthfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) { - DBG_VOP(("synthfs_statfs called.\n")); - - sbp->f_bsize = 512; - sbp->f_iosize = 512; - sbp->f_blocks = 1024; // lies, darn lies and virtual file systems - sbp->f_bfree = 0; // Nope, can't write here! - sbp->f_bavail = 0; - sbp->f_files = VFSTOSFS(mp)->synthfs_filecount + VFSTOSFS(mp)->synthfs_dircount; - sbp->f_ffree = 0; - strncpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, sizeof(sbp->f_mntonname)); - strncpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, sizeof(sbp->f_mntfromname)); - - return (0); + struct synthfs_mntdata *synthfs_mp = VFSTOSFS(mp); + DBG_VOP(("synthfs_vfs_getattr called.\n")); + + VFSATTR_RETURN(fsap, f_bsize, 512); + VFSATTR_RETURN(fsap, f_iosize, 512); + VFSATTR_RETURN(fsap, f_blocks, 1024); + VFSATTR_RETURN(fsap, f_bfree, 0); + VFSATTR_RETURN(fsap, f_bavail, 0); + VFSATTR_RETURN(fsap, f_bused, 1024); + VFSATTR_RETURN(fsap, f_files, synthfs_mp->synthfs_filecount + synthfs_mp->synthfs_dircount); + VFSATTR_RETURN(fsap, f_ffree, 0); + VFSATTR_RETURN(fsap, f_fssubtype, 0); + + return 0; } /* @@ -423,11 +290,10 @@ synthfs_statfs(mp, sbp, p) * structures, so don't do anything */ int -synthfs_sync(mp, waitfor, cred, p) +synthfs_sync(mp, waitfor, context) struct mount *mp; int waitfor; - struct ucred *cred; - struct proc *p; + vfs_context_t context; { // DBG_VOP(("synthfs_sync called\n")); return 0; @@ -436,12 +302,14 @@ synthfs_sync(mp, waitfor, cred, p) * Look up a synthfs node by node number. */ int -synthfs_vget(mp, ino, vpp) +synthfs_vget(mp, ino, vpp, context) struct mount *mp; - void *ino; + ino64_t ino; struct vnode **vpp; + vfs_context_t context; { struct vnode *vp; + int vid = 0; // DBG_VOP(("synthfs_vget called\n")); @@ -452,19 +320,25 @@ synthfs_vget(mp, ino, vpp) } loop: - simple_lock(&mntvnode_slock); - LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - if (VTOS(vp)->s_nodeid == *((unsigned long *)ino)) { - if (vget(vp, LK_EXCLUSIVE, current_proc()) != 0) { - simple_unlock(&mntvnode_slock); - goto loop; - }; - simple_unlock(&mntvnode_slock); + TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { + if (VTOS(vp)->s_nodeid == (unsigned long)ino) { + /* + * doing a vnode_getwithvid isn't technically + * necessary since synthfs is an unsafe filesystem + * and we're running behind a funnel at this point + * however, vnode_get always succeeds, which isn't + * what we want if this vnode is in the process of + * being terminated + */ + vid = vnode_vid(vp); + + if (vnode_getwithvid(vp, vid) != 0) { + goto loop; + }; *vpp = vp; return 0; }; }; - simple_unlock(&mntvnode_slock); *vpp = NULL; return -1; } @@ -473,17 +347,11 @@ loop: * fast filesystem related variables. */ int -synthfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +synthfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, vfs_context_t context) { DBG_VOP(("synthfs_sysctl called.\n")); - return (EOPNOTSUPP); + return (ENOTSUP); } /* @@ -491,16 +359,15 @@ synthfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) * */ int -synthfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) +synthfs_fhtovp(mp, fhlen, fhp, vpp, context) register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; + int fhlen; + unsigned char *fhp; struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; + vfs_context_t context; { DBG_VOP(("synthfs_fhtovp called.\n")); - return EOPNOTSUPP; + return ENOTSUP; } /* @@ -508,12 +375,14 @@ synthfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) */ /* ARGSUSED */ int -synthfs_vptofh(vp, fhp) +synthfs_vptofh(vp, fhlenp, fhp, context) struct vnode *vp; - struct fid *fhp; + int *fhlenp; + unsigned char *fhp; + vfs_context_t context; { DBG_VOP(("synthfs_vptofh called.\n")); - return EOPNOTSUPP; + return ENOTSUP; } @@ -522,38 +391,42 @@ synthfs_vptofh(vp, fhp) int -vn_mkdir(struct proc *p, char *path, int mode) { +vn_mkdir(struct proc *p, char *path, int mode) +{ struct nameidata nd; struct vnode *vp; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; int error; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, path, p); - if (error = namei(&nd)) { + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); /* XXX kauth_cred_get() ??? proxy */ + + NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), &context); + error = namei(&nd); + if (error) { DBG_VOP(("vn_mkdir: error from namei, error = %d.\n", error)); return (error); }; vp = nd.ni_vp; - if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); + + if (vp == NULL) { + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VDIR); + VATTR_SET(&va, va_mode, (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask); + + error = vn_create(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, 0, &context); + if (error) + DBG_VOP(("vn_mkdir: error from vnop_mkdir (%d).\n", error)); + } else { DBG_VOP(("vn_mkdir: target already exists; returning EEXIST.\n")); - return (EEXIST); + error = EEXIST; } - VATTR_NULL(&vattr); - vattr.va_type = VDIR; - vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (error) { - DBG_VOP(("vn_mkdir: error from VOP_MKDIR (%d).\n", error)); - } else { - vput(nd.ni_vp); - }; + vnode_put(nd.ni_dvp); + if (nd.ni_vp) + vnode_put(nd.ni_vp); + nameidone(&nd); + return (error); } @@ -562,25 +435,31 @@ vn_mkdir(struct proc *p, char *path, int mode) { int vn_symlink(struct proc *p, char *path, char *link) { struct nameidata nd; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; int error; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, link, p); - if (error = namei(&nd)) return error; - - if (nd.ni_vp) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - return EEXIST; - } - VATTR_NULL(&vattr); - vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - return VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); /* XXX kauth_cred_get() ??? proxy */ + + NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE32, CAST_USER_ADDR_T(link), &context); + if ((error = namei(&nd))) return error; + + if (nd.ni_vp == NULL) { + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VLNK); + VATTR_SET(&va, va_mode, ACCESSPERMS &~ p->p_fd->fd_cmask); + + error = VNOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, path, &context); + } else + error = EEXIST; + + vnode_put(nd.ni_dvp); + if (nd.ni_vp) + vnode_put(nd.ni_vp); + nameidone(&nd); + + return (error); } diff --git a/bsd/miscfs/synthfs/synthfs_vnops.c b/bsd/miscfs/synthfs/synthfs_vnops.c index eb723cb22..4f1110e77 100644 --- a/bsd/miscfs/synthfs/synthfs_vnops.c +++ b/bsd/miscfs/synthfs/synthfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,15 +35,16 @@ #include #include #include -#include #include +#include #include -#include -#include +#include +#include #include #include #include #include +#include #include #include @@ -55,81 +56,61 @@ #if RWSUPPORT #error NOT PORTED FOR UBC -/* when porting to UBC, do not just replace - * vnode_uncache by ubc_uncache - there's more - * to it than that! - */ #include #endif -extern int groupmember(gid_t gid, struct ucred* cred); +static int synthfs_remove_internal(struct vnode *dvp, struct vnode *vp, + struct componentname *cnp, vfs_context_t context); + #define VOPFUNC int (*)(void *) /* Global vfs data structures for synthfs. */ int (**synthfs_vnodeop_p) (void *); struct vnodeopv_entry_desc synthfs_vnodeop_entries[] = { - {&vop_default_desc, (VOPFUNC)vn_default_error}, - {&vop_strategy_desc, (VOPFUNC)err_strategy}, /* strategy - not supported */ - {&vop_bwrite_desc, (VOPFUNC)err_bwrite}, /* bwrite - not supported */ - {&vop_lookup_desc, (VOPFUNC)synthfs_cached_lookup}, /* cached lookup */ - {&vop_create_desc, (VOPFUNC)synthfs_create}, /* create - DEBUGGER */ - {&vop_whiteout_desc, (VOPFUNC)err_whiteout}, /* whiteout - not supported */ - {&vop_mknod_desc, (VOPFUNC)err_mknod}, /* mknod - not supported */ - {&vop_mkcomplex_desc, (VOPFUNC)err_mkcomplex}, /* mkcomplex - not supported */ - {&vop_open_desc, (VOPFUNC)synthfs_open}, /* open - DEBUGGER */ - {&vop_close_desc, (VOPFUNC)nop_close}, /* close - NOP */ - {&vop_access_desc, (VOPFUNC)synthfs_access}, /* access */ - {&vop_getattr_desc, (VOPFUNC)synthfs_getattr}, /* getattr */ - {&vop_setattr_desc, (VOPFUNC)synthfs_setattr}, /* setattr */ - {&vop_getattrlist_desc, (VOPFUNC)err_getattrlist}, /* getattrlist - not supported */ - {&vop_setattrlist_desc, (VOPFUNC)err_setattrlist}, /* setattrlist - not supported */ - {&vop_read_desc, (VOPFUNC)err_read}, /* read - not supported */ - {&vop_write_desc, (VOPFUNC)err_write}, /* write - not supported */ - {&vop_lease_desc, (VOPFUNC)err_lease}, /* lease - not supported */ - {&vop_ioctl_desc, (VOPFUNC)err_ioctl}, /* ioctl - not supported */ - {&vop_select_desc, (VOPFUNC)synthfs_select}, /* select */ - {&vop_exchange_desc, (VOPFUNC)err_exchange}, /* exchange - not supported */ - {&vop_revoke_desc, (VOPFUNC)nop_revoke}, /* revoke - NOP */ - {&vop_mmap_desc, (VOPFUNC)synthfs_mmap}, /* mmap - DEBUGGER */ - {&vop_fsync_desc, (VOPFUNC)nop_fsync}, /* fsync - NOP */ - {&vop_seek_desc, (VOPFUNC)nop_seek}, /* seek - NOP */ - {&vop_remove_desc, (VOPFUNC)synthfs_remove}, /* remove */ - {&vop_link_desc, (VOPFUNC)err_link}, /* link - not supported */ - {&vop_rename_desc, (VOPFUNC)synthfs_rename}, /* rename */ - {&vop_mkdir_desc, (VOPFUNC)synthfs_mkdir}, /* mkdir */ - {&vop_rmdir_desc, (VOPFUNC)synthfs_rmdir}, /* rmdir */ - {&vop_symlink_desc, (VOPFUNC)synthfs_symlink}, /* symlink */ - {&vop_readdir_desc, (VOPFUNC)synthfs_readdir}, /* readdir */ - {&vop_readdirattr_desc, (VOPFUNC)err_readdirattr}, /* readdirattr - not supported */ - {&vop_readlink_desc, (VOPFUNC)synthfs_readlink}, /* readlink */ - {&vop_abortop_desc, (VOPFUNC)nop_abortop}, /* abortop - NOP */ - {&vop_inactive_desc, (VOPFUNC)synthfs_inactive}, /* inactive */ - {&vop_reclaim_desc, (VOPFUNC)synthfs_reclaim}, /* reclaim */ - {&vop_lock_desc, (VOPFUNC)synthfs_lock}, /* lock */ - {&vop_unlock_desc, (VOPFUNC)synthfs_unlock}, /* unlock */ - {&vop_bmap_desc, (VOPFUNC)err_bmap}, /* bmap - not supported */ - {&vop_print_desc, (VOPFUNC)err_print}, /* print - not supported */ - {&vop_islocked_desc, (VOPFUNC)synthfs_islocked}, /* islocked */ - {&vop_pathconf_desc, (VOPFUNC)synthfs_pathconf}, /* pathconf */ - {&vop_advlock_desc, (VOPFUNC)err_advlock}, /* advlock - not supported */ - {&vop_blkatoff_desc, (VOPFUNC)err_blkatoff}, /* blkatoff - not supported */ - {&vop_valloc_desc, (VOPFUNC)err_valloc}, /* valloc - not supported */ - {&vop_reallocblks_desc, (VOPFUNC)err_reallocblks}, /* reallocblks - not supported */ - {&vop_vfree_desc, (VOPFUNC)err_vfree}, /* vfree - not supported */ - {&vop_truncate_desc, (VOPFUNC)err_truncate}, /* truncate - not supported */ - {&vop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate - not supported */ - {&vop_update_desc, (VOPFUNC)synthfs_update}, /* update */ - {&vop_pgrd_desc, (VOPFUNC)err_pgrd}, /* pgrd - not supported */ - {&vop_pgwr_desc, (VOPFUNC)err_pgwr}, /* pgwr - not supported */ - {&vop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein - not supported */ - {&vop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout - not supported */ - {&vop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize - not supported */ - {&vop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs - not supported */ - {&vop_copyfile_desc, (VOPFUNC)err_copyfile}, /* copyfile - not supported */ - { &vop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff not supported */ - { &vop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk not supported */ - { &vop_cmap_desc, (VOPFUNC)err_cmap }, /* cmap not supported */ + {&vnop_default_desc, (VOPFUNC)vn_default_error}, + {&vnop_strategy_desc, (VOPFUNC)err_strategy}, /* strategy - not supported */ + {&vnop_bwrite_desc, (VOPFUNC)err_bwrite}, /* bwrite - not supported */ + {&vnop_lookup_desc, (VOPFUNC)synthfs_cached_lookup}, /* cached lookup */ + {&vnop_create_desc, (VOPFUNC)synthfs_create}, /* create - DEBUGGER */ + {&vnop_whiteout_desc, (VOPFUNC)err_whiteout}, /* whiteout - not supported */ + {&vnop_mknod_desc, (VOPFUNC)err_mknod}, /* mknod - not supported */ + {&vnop_open_desc, (VOPFUNC)synthfs_open}, /* open - DEBUGGER */ + {&vnop_close_desc, (VOPFUNC)nop_close}, /* close - NOP */ + {&vnop_getattr_desc, (VOPFUNC)synthfs_getattr}, /* getattr */ + {&vnop_setattr_desc, (VOPFUNC)synthfs_setattr}, /* setattr */ + {&vnop_getattrlist_desc, (VOPFUNC)err_getattrlist}, /* getattrlist - not supported */ + {&vnop_setattrlist_desc, (VOPFUNC)err_setattrlist}, /* setattrlist - not supported */ + {&vnop_read_desc, (VOPFUNC)err_read}, /* read - not supported */ + {&vnop_write_desc, (VOPFUNC)err_write}, /* write - not supported */ + {&vnop_ioctl_desc, (VOPFUNC)err_ioctl}, /* ioctl - not supported */ + {&vnop_select_desc, (VOPFUNC)synthfs_select}, /* select */ + {&vnop_exchange_desc, (VOPFUNC)err_exchange}, /* exchange - not supported */ + {&vnop_revoke_desc, (VOPFUNC)nop_revoke}, /* revoke - NOP */ + {&vnop_mmap_desc, (VOPFUNC)synthfs_mmap}, /* mmap - DEBUGGER */ + {&vnop_fsync_desc, (VOPFUNC)nop_fsync}, /* fsync - NOP */ + {&vnop_remove_desc, (VOPFUNC)synthfs_remove}, /* remove */ + {&vnop_link_desc, (VOPFUNC)err_link}, /* link - not supported */ + {&vnop_rename_desc, (VOPFUNC)synthfs_rename}, /* rename */ + {&vnop_mkdir_desc, (VOPFUNC)synthfs_mkdir}, /* mkdir */ + {&vnop_rmdir_desc, (VOPFUNC)synthfs_rmdir}, /* rmdir */ + {&vnop_symlink_desc, (VOPFUNC)synthfs_symlink}, /* symlink */ + {&vnop_readdir_desc, (VOPFUNC)synthfs_readdir}, /* readdir */ + {&vnop_readdirattr_desc, (VOPFUNC)err_readdirattr}, /* readdirattr - not supported */ + {&vnop_readlink_desc, (VOPFUNC)synthfs_readlink}, /* readlink */ + {&vnop_inactive_desc, (VOPFUNC)synthfs_inactive}, /* inactive */ + {&vnop_reclaim_desc, (VOPFUNC)synthfs_reclaim}, /* reclaim */ + {&vnop_pathconf_desc, (VOPFUNC)synthfs_pathconf}, /* pathconf */ + {&vnop_advlock_desc, (VOPFUNC)err_advlock}, /* advlock - not supported */ + {&vnop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate - not supported */ + {&vnop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein - not supported */ + {&vnop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout - not supported */ + {&vnop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize - not supported */ + {&vnop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs - not supported */ + {&vnop_copyfile_desc, (VOPFUNC)err_copyfile}, /* copyfile - not supported */ + { &vnop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff not supported */ + { &vnop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk not supported */ + { &vnop_blockmap_desc, (VOPFUNC)err_blockmap }, /* blockmap not supported */ {(struct vnodeop_desc *) NULL, (int (*) ()) NULL} }; @@ -147,11 +128,11 @@ struct vnodeopv_desc synthfs_vnodeop_opv_desc = #% create dvp L U U #% create vpp - L - # - vop_create { + vnop_create { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; - IN struct vattr *vap; + IN struct vnode_attr *vap; We are responsible for freeing the namei buffer, it is done in hfs_makenode(), unless there is a previous error. @@ -160,11 +141,12 @@ struct vnodeopv_desc synthfs_vnodeop_opv_desc = int synthfs_create(ap) -struct vop_create_args /* { +struct vnop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { #if DEBUG @@ -184,20 +166,18 @@ struct vop_create_args /* { * Open called. #% open vp L L L # - vop_open { + vnop_open { IN struct vnode *vp; IN int mode; - IN struct ucred *cred; - IN struct proc *p; + IN vfs_context_t a_context; */ int synthfs_open(ap) -struct vop_open_args /* { +struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -225,10 +205,10 @@ struct vop_open_args /* { * NB Currently unsupported. # XXX - not used # - vop_mmap { + vnop_mmap { IN struct vnode *vp; IN int fflags; - IN struct ucred *cred; + IN kauth_cred_t cred; IN struct proc *p; */ @@ -236,186 +216,74 @@ struct vop_open_args /* { /* ARGSUSED */ int -synthfs_mmap(ap) -struct vop_mmap_args /* { - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; -} */ *ap; +synthfs_mmap(__unused struct vnop_mmap_args *ap) { -#if DEBUG - struct vnode *vp = ap->a_vp; - char debugmsg[255]; - - sprintf(debugmsg, "synthfs_mmap: attempt to map '/%s' ?!", VTOS(vp)->s_name); - Debugger(debugmsg); -#endif - return EINVAL; } -/* -#% access vp L L L -# - vop_access { - IN struct vnode *vp; - IN int mode; - IN struct ucred *cred; - IN struct proc *p; - -*/ - -int -synthfs_access(ap) -struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - mode_t mode = ap->a_mode; - struct ucred *cred = ap->a_cred; - struct synthfsnode *sp = VTOS(vp); - register gid_t *gp; - mode_t mask; - int retval = 0; - int i; - - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) - return (EROFS); - break; - default: - break; - } - } - - /* If immutable bit set, nobody gets to write it. */ - if ((mode & VWRITE) && (sp->s_flags & IMMUTABLE)) - return (EPERM); - - /* Otherwise, user id 0 always gets access. */ - if (ap->a_cred->cr_uid == 0) { - retval = 0; - goto Exit; - }; - - mask = 0; - - /* Otherwise, check the owner. */ - if (cred->cr_uid == sp->s_uid) { - if (mode & VEXEC) - mask |= S_IXUSR; - if (mode & VREAD) - mask |= S_IRUSR; - if (mode & VWRITE) - mask |= S_IWUSR; - retval = ((sp->s_mode & mask) == mask ? 0 : EACCES); - goto Exit; - } - - /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (sp->s_gid == *gp) { - if (mode & VEXEC) - mask |= S_IXGRP; - if (mode & VREAD) - mask |= S_IRGRP; - if (mode & VWRITE) - mask |= S_IWGRP; - retval = ((sp->s_mode & mask) == mask ? 0 : EACCES); - goto Exit; - } - - /* Otherwise, check everyone else. */ - if (mode & VEXEC) - mask |= S_IXOTH; - if (mode & VREAD) - mask |= S_IROTH; - if (mode & VWRITE) - mask |= S_IWOTH; - retval = ((sp->s_mode & mask) == mask ? 0 : EACCES); - -Exit: - return (retval); -} - /* #% getattr vp = = = # - vop_getattr { + vnop_getattr { IN struct vnode *vp; - IN struct vattr *vap; - IN struct ucred *cred; - IN struct proc *p; + IN struct vnode_attr *vap; + IN vfs_context_t context; */ int synthfs_getattr(ap) -struct vop_getattr_args /* { +struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - struct vattr *vap = ap->a_vap; - struct synthfsnode *sp = VTOS(vp); - struct synthfs_mntdata *smp = VTOSFS(vp); - - vap->va_type = vp->v_type; - vap->va_mode = sp->s_mode; - vap->va_nlink = sp->s_linkcount; - vap->va_uid = sp->s_uid; - vap->va_gid = sp->s_gid; - vap->va_fsid = VTOVFS(vp)->mnt_stat.f_fsid.val[0]; - vap->va_fileid = sp->s_nodeid; + struct vnode *vp = ap->a_vp; + struct vnode_attr *vap = ap->a_vap; + struct synthfsnode *sp = VTOS(vp); + + VATTR_RETURN(vap, va_type, vp->v_type); + VATTR_RETURN(vap, va_mode, sp->s_mode); + VATTR_RETURN(vap, va_nlink, sp->s_linkcount); + VATTR_RETURN(vap, va_uid, sp->s_uid); + VATTR_RETURN(vap, va_gid, sp->s_gid); + VATTR_RETURN(vap, va_fsid, VTOVFS(vp)->mnt_vfsstat.f_fsid.val[0]); + VATTR_RETURN(vap, va_fileid, sp->s_nodeid); switch (vp->v_type) { - case VDIR: - vap->va_size = (sp->s_u.d.d_entrycount + 2) * sizeof(struct dirent); + case VDIR: + VATTR_RETURN(vap, va_data_size, (sp->s_u.d.d_entrycount + 2) * sizeof(struct dirent)); break; - case VREG: - vap->va_size = sp->s_u.f.f_size; + case VREG: + VATTR_RETURN(vap, va_data_size, sp->s_u.f.f_size); break; - case VLNK: - vap->va_size = sp->s_u.s.s_length; + case VLNK: + VATTR_RETURN(vap, va_data_size, sp->s_u.s.s_length); break; - default: - vap->va_size = 0; + default: + VATTR_RETURN(vap, va_data_size, 0); }; - vap->va_blocksize = 512; - vap->va_atime.tv_sec = sp->s_accesstime.tv_sec; - vap->va_atime.tv_nsec = sp->s_accesstime.tv_usec * 1000; - vap->va_mtime.tv_sec = sp->s_modificationtime.tv_sec; - vap->va_mtime.tv_nsec = sp->s_modificationtime.tv_usec * 1000; - vap->va_ctime.tv_sec = sp->s_changetime.tv_sec; - vap->va_ctime.tv_nsec = sp->s_changetime.tv_usec * 1000; - vap->va_gen = sp->s_generation; - vap->va_flags = sp->s_flags; - vap->va_rdev = sp->s_rdev; - vap->va_bytes = vap->va_blocksize * ((vap->va_size + vap->va_blocksize - 1) / vap->va_blocksize); - vap->va_filerev = 0; - vap->va_vaflags = 0; - - return (0); + VATTR_RETURN(vap, va_iosize, 512); + vap->va_access_time.tv_sec = sp->s_accesstime.tv_sec; + vap->va_access_time.tv_nsec = sp->s_accesstime.tv_usec * 1000; + VATTR_SET_SUPPORTED(vap, va_access_time); + vap->va_modify_time.tv_sec = sp->s_modificationtime.tv_sec; + vap->va_modify_time.tv_nsec = sp->s_modificationtime.tv_usec * 1000; + VATTR_SET_SUPPORTED(vap, va_modify_time); + vap->va_change_time.tv_sec = sp->s_changetime.tv_sec; + vap->va_change_time.tv_nsec = sp->s_changetime.tv_usec * 1000; + VATTR_SET_SUPPORTED(vap, va_change_time); + VATTR_RETURN(vap, va_gen, sp->s_generation); + VATTR_RETURN(vap, va_flags, sp->s_flags); + VATTR_RETURN(vap, va_rdev, sp->s_rdev); + VATTR_RETURN(vap, va_filerev, 0); + VATTR_RETURN(vap, va_acl, NULL); + + return (0); } @@ -424,20 +292,11 @@ struct vop_getattr_args /* { * Change the mode on a file or directory. * vnode vp must be locked on entry. */ -int synthfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct proc *p) +int synthfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) { struct synthfsnode *sp = VTOS(vp); int result; - if ((cred->cr_uid != sp->s_uid) && - (result = suser(cred, &p->p_acflag))) - return result; - if (cred->cr_uid) { - if (vp->v_type != VDIR && (mode & S_ISTXT)) - return EFTYPE; - if (!groupmember(sp->s_gid, cred) && (mode & S_ISGID)) - return (EPERM); - } sp->s_mode &= ~ALLPERMS; sp->s_mode |= (mode & ALLPERMS); sp->s_nodeflags |= IN_CHANGE; @@ -454,29 +313,11 @@ int synthfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct proc *p * Change the flags on a file or directory. * vnode vp must be locked on entry. */ -int synthfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, struct proc *p) +int synthfs_chflags(struct vnode *vp, u_long flags, kauth_cred_t cred, struct proc *p) { struct synthfsnode *sp = VTOS(vp); - int result; - - if (cred->cr_uid != sp->s_uid && - (result = suser(cred, &p->p_acflag))) - return result; - if (cred->cr_uid == 0) { - if ((sp->s_flags & (SF_IMMUTABLE | SF_APPEND)) && - securelevel > 0) { - return EPERM; - }; - sp->s_flags = flags; - } else { - if (sp->s_flags & (SF_IMMUTABLE | SF_APPEND) || - (flags & UF_SETTABLE) != flags) { - return EPERM; - }; - sp->s_flags &= SF_SETTABLE; - sp->s_flags |= (flags & UF_SETTABLE); - } + sp->s_flags = flags; sp->s_nodeflags |= IN_CHANGE; return 0; @@ -488,26 +329,17 @@ int synthfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, struct p * Perform chown operation on vnode vp; * vnode vp must be locked on entry. */ -int synthfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p) +int synthfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, struct proc *p) { struct synthfsnode *sp = VTOS(vp); uid_t ouid; gid_t ogid; int result = 0; + int is_member; if (uid == (uid_t)VNOVAL) uid = sp->s_uid; if (gid == (gid_t)VNOVAL) gid = sp->s_gid; - /* - * If we don't own the file, are trying to change the owner - * of the file, or are not a member of the target group, - * the caller must be superuser or the call fails. - */ - if ((cred->cr_uid != sp->s_uid || uid != sp->s_uid || - (gid != sp->s_gid && !groupmember((gid_t)gid, cred))) && - (result = suser(cred, &p->p_acflag))) - return result; - ogid = sp->s_gid; ouid = sp->s_uid; @@ -515,8 +347,8 @@ int synthfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, st sp->s_uid = uid; if (ouid != uid || ogid != gid) sp->s_nodeflags |= IN_CHANGE; - if (ouid != uid && cred->cr_uid != 0) sp->s_mode &= ~S_ISUID; - if (ogid != gid && cred->cr_uid != 0) sp->s_mode &= ~S_ISGID; + if (ouid != uid && suser(cred, NULL)) sp->s_mode &= ~S_ISUID; + if (ogid != gid && suser(cred, NULL)) sp->s_mode &= ~S_ISGID; return 0; } @@ -527,143 +359,92 @@ int synthfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, st * Set attribute vnode op. called from several syscalls #% setattr vp L L L # - vop_setattr { + vnop_setattr { IN struct vnode *vp; - IN struct vattr *vap; - IN struct ucred *cred; - IN struct proc *p; - + IN struct vnode_attr *vap; + IN vfs_context_t context; */ int synthfs_setattr(ap) -struct vop_setattr_args /* { +struct vnop_setattr_args /* { struct vnode *a_vp; -struct vattr *a_vap; -struct ucred *a_cred; -struct proc *a_p; +struct vnode_attr *a_vap; +vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - struct synthfsnode *sp = VTOS(vp); - struct vattr *vap = ap->a_vap; - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; - struct timeval atimeval, mtimeval; - int result; - - /* - * Check for unsettable attributes. - */ - if (((vap->va_type != VNON) && (vap->va_type != vp->v_type)) || - (vap->va_nlink != VNOVAL) || - (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || - (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || - ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { - result = EINVAL; - goto Err_Exit; - } - - if (vap->va_flags != VNOVAL) { - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) { - result = EROFS; - goto Err_Exit; - }; - if ((result = synthfs_chflags(vp, vap->va_flags, cred, p))) { - goto Err_Exit; - }; - if (vap->va_flags & (IMMUTABLE | APPEND)) { - result = 0; - goto Err_Exit; - }; - } - - if (sp->s_flags & (IMMUTABLE | APPEND)) { - result = EPERM; - goto Err_Exit; - }; + struct vnode *vp = ap->a_vp; + struct synthfsnode *sp = VTOS(vp); + struct vnode_attr *vap = ap->a_vap; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); + struct timeval atimeval, mtimeval; + uid_t nuid; + gid_t ngid; + int result; + + result = 0; + + if (VATTR_IS_ACTIVE(vap, va_flags)) { + if ((result = synthfs_chflags(vp, vap->va_flags, cred, p))) { + goto Err_Exit; + } + } + VATTR_SET_SUPPORTED(vap, va_flags); + + nuid = (uid_t)ngid = (gid_t)VNOVAL; + if (VATTR_IS_ACTIVE(vap, va_uid)) + nuid = vap->va_uid; + if (VATTR_IS_ACTIVE(vap, va_gid)) + ngid = vap->va_gid; + if (nuid != (uid_t)VNOVAL || ngid != (gid_t)VNOVAL) { + if ((result = synthfs_chown(vp, nuid, ngid, cred, p))) { + goto Err_Exit; + } + } + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); - /* - * Go through the fields and update iff not VNOVAL. - */ - if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) { - result = EROFS; - goto Err_Exit; - }; - if ((result = synthfs_chown(vp, vap->va_uid, vap->va_gid, cred, p))) { - goto Err_Exit; - }; - } - if (vap->va_size != VNOVAL) { - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - switch (vp->v_type) { - case VDIR: - result = EISDIR; - goto Err_Exit; - case VLNK: - case VREG: - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) { - result = EROFS; - goto Err_Exit; - }; - break; - default: - break; - } + if (VATTR_IS_ACTIVE(vap, va_data_size)) { #if RWSUPPORT - if ((result = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p))) { - goto Err_Exit; - }; + if ((result = vnode_setsize(vp, vap->va_data_size, 0, ap->a_context))) { + goto Err_Exit; + }; + VATTR_SET_SUPPORTED(vap, va_data_size); #else - result = EINVAL; - goto Err_Exit; + result = EINVAL; + goto Err_Exit; #endif - } + } - sp = VTOS(vp); - if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) { - result = EROFS; - goto Err_Exit; - }; - if (cred->cr_uid != sp->s_uid && - (result = suser(cred, &p->p_acflag)) && - ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || - (result = VOP_ACCESS(vp, VWRITE, cred, p)))) { - goto Err_Exit; - }; - if (vap->va_atime.tv_sec != VNOVAL) - sp->s_nodeflags |= IN_ACCESS; - if (vap->va_mtime.tv_sec != VNOVAL) - sp->s_nodeflags |= IN_CHANGE | IN_UPDATE; - atimeval.tv_sec = vap->va_atime.tv_sec; - atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; - mtimeval.tv_sec = vap->va_mtime.tv_sec; - mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; - if ((result = VOP_UPDATE(vp, &atimeval, &mtimeval, 1))) { - goto Err_Exit; - }; - } + sp = VTOS(vp); + if (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time)) { + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + sp->s_nodeflags |= IN_ACCESS; + atimeval.tv_sec = vap->va_access_time.tv_sec; + atimeval.tv_usec = vap->va_access_time.tv_nsec / 1000; + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + sp->s_nodeflags |= IN_CHANGE | IN_UPDATE; + mtimeval.tv_sec = vap->va_modify_time.tv_sec; + mtimeval.tv_usec = vap->va_modify_time.tv_nsec / 1000; + } + if ((result = synthfs_update(vp, &atimeval, &mtimeval, 1))) { + goto Err_Exit; + } + } + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); - result = 0; - if (vap->va_mode != (mode_t)VNOVAL) { - if (VTOVFS(vp)->mnt_flag & MNT_RDONLY) { - result = EROFS; - goto Err_Exit; - }; - result = synthfs_chmod(vp, (int)vap->va_mode, cred, p); - }; + if (VATTR_IS_ACTIVE(vap, va_mode)) + result = synthfs_chmod(vp, (int)vap->va_mode, cred, p); + VATTR_SET_SUPPORTED(vap, va_mode); -Err_Exit: ; + Err_Exit: - DBG_VOP(("synthfs_setattr: returning %d...\n", result)); + DBG_VOP(("synthfs_setattr: returning %d...\n", result)); - return (result); + return (result); } @@ -675,7 +456,7 @@ Err_Exit: ; #% rename targetPar_vp L U U #% rename target_vp X U U # - vop_rename { + vnop_rename { IN WILLRELE struct vnode *sourcePar_vp; IN WILLRELE struct vnode *source_vp; IN struct componentname *source_cnp; @@ -700,13 +481,14 @@ Err_Exit: ; int synthfs_rename(ap) -struct vop_rename_args /* { +struct vnop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ *ap; { struct vnode *target_vp = ap->a_tvp; @@ -715,7 +497,6 @@ struct vop_rename_args /* { struct vnode *sourcePar_vp = ap->a_fdvp; struct componentname *target_cnp = ap->a_tcnp; struct componentname *source_cnp = ap->a_fcnp; - struct proc *p = source_cnp->cn_proc; struct synthfsnode *target_sp, *targetPar_sp, *source_sp, *sourcePar_sp; u_short doingdirectory = 0, oldparent = 0, newparent = 0; int retval = 0; @@ -730,35 +511,10 @@ struct vop_rename_args /* { DBG_ASSERT((ap->a_fdvp->v_type == VDIR) && (ap->a_tdvp->v_type == VDIR)); target_sp = targetPar_sp = source_sp = sourcePar_sp = NULL; - /* - * Check for cross-device rename. - */ - if ((source_vp->v_mount != targetPar_vp->v_mount) || - (target_vp && (source_vp->v_mount != target_vp->v_mount))) { - retval = EXDEV; - goto abortit; - } - - /* - * Check for access permissions - */ - if (target_vp && ((VTOS(target_vp)->s_pflags & (IMMUTABLE | APPEND)) || - (VTOS(targetPar_vp)->s_pflags & APPEND))) { - retval = EPERM; - goto abortit; - } - - if ((retval = vn_lock(source_vp, LK_EXCLUSIVE, p))) - goto abortit; sourcePar_sp = VTOS(sourcePar_vp); source_sp = VTOS(source_vp); oldparent = sourcePar_sp->s_nodeid; - if ((source_sp->s_pflags & (IMMUTABLE | APPEND)) || (sourcePar_sp->s_pflags & APPEND)) { - VOP_UNLOCK(source_vp, 0, p); - retval = EPERM; - goto abortit; - } /* * Be sure we are not renaming ".", "..", or an alias of ".". This @@ -771,7 +527,6 @@ struct vop_rename_args /* { || sourcePar_sp == source_sp || (source_cnp->cn_flags & ISDOTDOT) || (source_sp->s_nodeflags & IN_RENAME)) { - VOP_UNLOCK(source_vp, 0, p); retval = EINVAL; goto abortit; } @@ -785,11 +540,6 @@ struct vop_rename_args /* { target_sp = target_vp ? VTOS(target_vp) : NULL; newparent = targetPar_sp->s_nodeid; - retval = VOP_ACCESS(source_vp, VWRITE, target_cnp->cn_cred, target_cnp->cn_proc); - if (doingdirectory && (newparent != oldparent)) { - if (retval) /* write access check above */ - goto bad; - } /* * If the destination exists, then be sure its type (file or dir) @@ -797,35 +547,15 @@ struct vop_rename_args /* { * it is empty. Then delete the destination. */ if (target_vp) { - /* - * If the parent directory is "sticky", then the user must - * own the parent directory, or the destination of the rename, - * otherwise the destination may not be changed (except by - * root). This implements append-only directories. - */ - if ((targetPar_sp->s_mode & S_ISTXT) && target_cnp->cn_cred->cr_uid != 0 && - target_cnp->cn_cred->cr_uid != targetPar_sp->s_uid && - target_sp->s_uid != target_cnp->cn_cred->cr_uid) { - retval = EPERM; - goto bad; - } - /* - * VOP_REMOVE will vput targetPar_vp so we better bump - * its ref count and relockit, always set target_vp to - * NULL afterwards to indicate that were done with it. - */ - VREF(targetPar_vp); #if RWSUPPORT - if (target_vp->v_type == VREG) { - (void) vnode_uncache(target_vp); - }; + if (target_vp->v_type == VREG) { + (void) vnode_uncache(target_vp); + }; #endif - cache_purge(target_vp); + cache_purge(target_vp); - target_cnp->cn_flags &= ~SAVENAME; - retval = VOP_REMOVE(targetPar_vp, target_vp, target_cnp); - (void) vn_lock(targetPar_vp, LK_EXCLUSIVE | LK_RETRY, p); + retval = synthfs_remove_internal(targetPar_vp, target_vp, target_cnp, ap->a_context); target_vp = NULL; target_sp = NULL; @@ -834,17 +564,11 @@ struct vop_rename_args /* { }; - if (newparent != oldparent) - vn_lock(sourcePar_vp, LK_EXCLUSIVE | LK_RETRY, p); - /* remove the existing entry from the namei cache: */ if (source_vp->v_type == VREG) cache_purge(source_vp); retval = synthfs_move_rename_entry( source_vp, targetPar_vp, target_cnp->cn_nameptr); - if (newparent != oldparent) - VOP_UNLOCK(sourcePar_vp, 0, p); - if (retval) goto bad; source_sp->s_nodeflags &= ~IN_RENAME; @@ -857,55 +581,21 @@ struct vop_rename_args /* { */ targetPar_sp->s_nodeflags |= IN_UPDATE; sourcePar_sp->s_nodeflags |= IN_UPDATE; - tv = time; + + microtime(&tv); SYNTHFSTIMES(targetPar_sp, &tv, &tv); SYNTHFSTIMES(sourcePar_sp, &tv, &tv); - vput(targetPar_vp); - vrele(sourcePar_vp); - vput(source_vp); - return (retval); bad:; if (retval && doingdirectory) source_sp->s_nodeflags &= ~IN_RENAME; - if (targetPar_vp == target_vp) - vrele(targetPar_vp); - else - vput(targetPar_vp); - - if (target_vp) - vput(target_vp); - - vrele(sourcePar_vp); - - if (VOP_ISLOCKED(source_vp)) - vput(source_vp); - else - vrele(source_vp); - - return (retval); + return (retval); abortit:; - - VOP_ABORTOP(targetPar_vp, target_cnp); /* XXX, why not in NFS? */ - - if (targetPar_vp == target_vp) - vrele(targetPar_vp); - else - vput(targetPar_vp); - - if (target_vp) - vput(target_vp); - - VOP_ABORTOP(sourcePar_vp, source_cnp); /* XXX, why not in NFS? */ - - vrele(sourcePar_vp); - vrele(source_vp); - - return (retval); + return (retval); } @@ -916,11 +606,12 @@ abortit:; #% mkdir dvp L U U #% mkdir vpp - L - # - vop_mkdir { + vnop_mkdir { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; - IN struct vattr *vap; + IN struct vnode_attr *vap; + IN vfs_context_t context; We are responsible for freeing the namei buffer, it is done in synthfs_makenode(), unless there is a previous error. @@ -929,11 +620,12 @@ abortit:; int synthfs_mkdir(ap) -struct vop_mkdir_args /* { +struct vnop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { int retval; @@ -942,22 +634,20 @@ struct vop_mkdir_args /* { int mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); struct vnode *vp = NULL; - *ap->a_vpp = NULL; + *ap->a_vpp = NULL; - retval = synthfs_new_directory(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, mode, ap->a_cnp->cn_proc, &vp); - if (retval) goto Error_Exit; + retval = synthfs_new_directory(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, mode, vfs_context_proc(cnp->cn_context), &vp); + if (retval) goto Error_Exit; - retval = VOP_SETATTR(vp, ap->a_vap, cnp->cn_cred, cnp->cn_proc); - if (retval != 0) goto Error_Exit; + *ap->a_vpp = vp; - *ap->a_vpp = vp; + retval = vnode_setattr(vp, ap->a_vap, ap->a_context); + if (retval != 0) goto Error_Exit; -Error_Exit:; - if (retval != 0) { - if (vp) synthfs_remove_directory(vp); - VOP_ABORTOP(dvp, cnp); - } - vput(dvp); + Error_Exit:; + if (retval != 0) { + if (vp) synthfs_remove_directory(vp); + } return retval; } @@ -969,37 +659,39 @@ Error_Exit:; #% remove dvp L U U #% remove vp L U U # - vop_remove { + vnop_remove { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; - + IN vfs_context_t context; + */ int synthfs_remove(ap) -struct vop_remove_args /* { +struct vnop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - struct vnode *dvp = ap->a_dvp; + return synthfs_remove_internal(ap->a_dvp, ap->a_vp, ap->a_cnp, ap->a_context); +} + +static int +synthfs_remove_internal(struct vnode *dvp, struct vnode *vp, + __unused struct componentname *cnp, + __unused vfs_context_t context) +{ struct synthfsnode *sp = VTOS(vp); - struct timeval tv; + struct timeval tv; int retval = 0; - if ((sp->s_flags & (IMMUTABLE | APPEND)) || - (VTOS(dvp)->s_flags & APPEND)) { - retval = EPERM; - goto out; - }; - /* This is sort of silly right now but someday it may make sense... */ if (sp->s_nodeflags & IN_MODIFIED) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 0); + microtime(&tv); + synthfs_update(vp, &tv, &tv, 0); }; /* remove the entry from the namei cache: */ @@ -1028,13 +720,6 @@ out: if (! retval) VTOS(dvp)->s_nodeflags |= IN_CHANGE | IN_UPDATE; - if (dvp == vp) { - vrele(vp); - } else { - vput(vp); - }; - - vput(dvp); return (retval); } @@ -1044,23 +729,24 @@ out: #% rmdir dvp L U U #% rmdir vp L U U # - vop_rmdir { + vnop_rmdir { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; + IN vfs_context_t context; */ int synthfs_rmdir(ap) - struct vop_rmdir_args /* { + struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - DBG_VOP(("synthfs_rmdir called\n")); - return synthfs_remove((struct vop_remove_args *)ap); + return synthfs_remove((struct vnop_remove_args *)ap); } @@ -1071,15 +757,15 @@ synthfs_rmdir(ap) * Locking policy: ignore */ int -synthfs_select(ap) -struct vop_select_args /* { +synthfs_select(__unused +struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; + kauth_cred_t a_cred; void *a_wql; struct proc *a_p; -} */ *ap; +} */ *ap) { DBG_VOP(("synthfs_select called\n")); @@ -1091,15 +777,15 @@ struct vop_select_args /* { #% symlink dvp L U U #% symlink vpp - U - # -# XXX - note that the return vnode has already been vrele'ed -# by the filesystem layer. To use it you must use vget, +# XXX - note that the return vnode has already been vnode_put'ed +# by the filesystem layer. To use it you must use vnode_get, # possibly with a further namei. # - vop_symlink { + vnop_symlink { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; IN struct componentname *cnp; - IN struct vattr *vap; + IN struct vnode_attr *vap; IN char *target; We are responsible for freeing the namei buffer, it is done in synthfs_makenode(), unless there is @@ -1110,12 +796,13 @@ struct vop_select_args /* { int synthfs_symlink(ap) - struct vop_symlink_args /* { + struct vnop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; char *a_target; + vfs_context_t a_context; } */ *ap; { struct vnode *dvp = ap->a_dvp; @@ -1125,17 +812,7 @@ synthfs_symlink(ap) *vpp = NULL; - retval = synthfs_new_symlink(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, ap->a_target, ap->a_cnp->cn_proc, vpp); - if (retval) goto Error_Exit; - - VOP_UNLOCK(*vpp, 0, cnp->cn_proc); - -Error_Exit:; - - if (retval != 0) { - VOP_ABORTOP(dvp, cnp); - } - vput(dvp); + retval = synthfs_new_symlink(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, ap->a_target, vfs_context_proc(cnp->cn_context), vpp); return (retval); } @@ -1146,18 +823,18 @@ Error_Exit:; # #% readlink vp L L L # - vop_readlink { + vnop_readlink { IN struct vnode *vp; INOUT struct uio *uio; - IN struct ucred *cred; + IN kauth_cred_t cred; */ int synthfs_readlink(ap) -struct vop_readlink_args /* { +struct vnop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -1170,8 +847,9 @@ struct vop_readlink_args /* { return 0; }; - if (uio->uio_offset + uio->uio_resid <= sp->s_u.s.s_length) { - count = uio->uio_resid; + // LP64todo - fix this! + if (uio->uio_offset + uio_resid(uio) <= sp->s_u.s.s_length) { + count = uio_resid(uio); } else { count = sp->s_u.s.s_length - uio->uio_offset; }; @@ -1186,27 +864,17 @@ struct vop_readlink_args /* { /* -#% readdir vp L L L -# -vop_readdir { - IN struct vnode *vp; - INOUT struct uio *uio; - IN struct ucred *cred; - INOUT int *eofflag; - OUT int *ncookies; - INOUT u_long **cookies; -*/ - - + * Read directory entries. + */ int synthfs_readdir(ap) -struct vop_readdir_args /* { - struct vnode *vp; - struct uio *uio; - struct ucred *cred; - int *eofflag; - int *ncookies; - u_long **cookies; +struct vnop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_flags; + int *a_eofflag; + int *a_numdirent; + vfs_context_t a_context; } */ *ap; { struct synthfsnode *sp = VTOS(ap->a_vp); @@ -1214,34 +882,30 @@ struct vop_readdir_args /* { off_t diroffset; /* Offset into simulated directory file */ struct synthfsnode *entry; - DBG_VOP(("\tuio_offset = %d, uio_resid = %d\n", (int) uio->uio_offset, uio->uio_resid)); + DBG_VOP(("\tuio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); + + if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) + return (EINVAL); /* We assume it's all one big buffer... */ if (uio->uio_iovcnt > 1) { DBG_VOP(("\tuio->uio_iovcnt = %d?\n", uio->uio_iovcnt)); return EINVAL; }; - - /* - NFS cookies are not supported: - */ - if ((ap->a_cookies != NULL) || (ap->a_ncookies != NULL)) { - return EINVAL; - }; diroffset = 0; /* * We must synthesize . and .. */ - DBG_VOP(("\tstarting ... uio_offset = %d, uio_resid = %d\n", (int) uio->uio_offset, uio->uio_resid)); + DBG_VOP(("\tstarting ... uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); if (uio->uio_offset == diroffset) { DBG_VOP(("\tAdding .\n")); diroffset += synthfs_adddirentry(sp->s_nodeid, DT_DIR, ".", uio); - DBG_VOP(("\t after adding ., uio_offset = %d, uio_resid = %d\n", (int) uio->uio_offset, uio->uio_resid)); + DBG_VOP(("\t after adding ., uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); } - if ((uio->uio_resid > 0) && (diroffset > uio->uio_offset)) { + if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ return EINVAL; }; @@ -1254,9 +918,9 @@ struct vop_readdir_args /* { } else { diroffset += synthfs_adddirentry(sp->s_nodeid, DT_DIR, "..", uio); } - DBG_VOP(("\t after adding .., uio_offset = %d, uio_resid = %d\n", (int) uio->uio_offset, uio->uio_resid)); + DBG_VOP(("\t after adding .., uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); } - if ((uio->uio_resid > 0) && (diroffset > uio->uio_offset)) { + if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ return EINVAL; }; @@ -1267,7 +931,7 @@ struct vop_readdir_args /* { /* Return this entry */ diroffset += synthfs_adddirentry(entry->s_nodeid, VTTOIF(STOV(entry)->v_type), entry->s_name, uio); }; - if ((uio->uio_resid > 0) && (diroffset > uio->uio_offset)) { + if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ return EINVAL; }; @@ -1290,7 +954,7 @@ struct vop_readdir_args /* { int synthfs_cached_lookup(ap) - struct vop_cachedlookup_args /* { + struct vnop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; @@ -1300,35 +964,16 @@ synthfs_cached_lookup(ap) struct componentname *cnp = ap->a_cnp; u_long nameiop = cnp->cn_nameiop; u_long flags = cnp->cn_flags; - boolean_t lockparent = (flags & LOCKPARENT); - struct proc *p = cnp->cn_proc; - struct ucred *cred = cnp->cn_cred; - struct vnode *target_vp = NULL; - u_int32_t target_vnode_id; /* Capability ID of the target vnode for .. unlock/relock handling check */ struct vnode **vpp = ap->a_vpp; int result = 0; DBG_VOP(("synthfs_cached_lookup called, name = %s, namelen = %ld\n", ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen)); - if (flags & LOCKPARENT) DBG_VOP(("\tLOCKPARENT is set\n")); +#if DEBUG if (flags & ISLASTCN) DBG_VOP(("\tISLASTCN is set\n")); +#endif *vpp = NULL; - if (dp->v_type != VDIR) { - result = ENOTDIR; - goto Err_Exit; - }; - - if ((flags & ISLASTCN) && - (VTOVFS(dp)->mnt_flag & MNT_RDONLY) && - ((nameiop == DELETE) || (nameiop == RENAME))) { - result = EROFS; - goto Err_Exit; - }; - - result = VOP_ACCESS(dp, VEXEC, cred, cnp->cn_proc); - if (result != 0) goto Err_Exit; - /* * Look up an entry in the namei cache */ @@ -1344,66 +989,21 @@ synthfs_cached_lookup(ap) /* An entry matching the parent vnode/name was found in the cache: */ - - target_vp = *vpp; - target_vnode_id = target_vp->v_id; - if (target_vp == dp) { - /* lookup on "." */ - VREF(target_vp); - result = 0; - } else if (flags & ISDOTDOT) { - /* - * Carefully now: trying to step from child to parent; - * must release lock on child before trying to lock parent - * vnode. - */ - VOP_UNLOCK(dp, 0, p); - result = vget(target_vp, LK_EXCLUSIVE, p); - if ((result == 0) && lockparent && (flags & ISLASTCN)) { - result = vn_lock(dp, LK_EXCLUSIVE, p); - } - } else { - result = vget(target_vp, LK_EXCLUSIVE, p); - if (!lockparent || (result != 0) || !(flags & ISLASTCN)) { - VOP_UNLOCK(dp, 0, p); - }; - }; - - /* - Check to make sure the target vnode ID didn't change while we - tried to lock it: - */ - if (result == 0) { - if (target_vnode_id == target_vp->v_id) { - return 0; /* THIS IS THE NORMAL EXIT PATH */ - }; - - /* The vnode ID didn't match anymore: we've got another vnode! */ - vput(target_vp); - /* Unlock the parent vnode in the cases where it should've been left locked: */ - if (lockparent && (dp != target_vp) && (flags & ISLASTCN)) { - VOP_UNLOCK(dp, 0, p); - }; - }; - - /* One last try for a successful lookup through the complete lookup path: */ - result = vn_lock(dp, LK_EXCLUSIVE, p); - if (result == 0) { - return synthfs_lookup(ap); - }; + return (0); Err_Exit:; - return result; + return result; } int synthfs_lookup(ap) - struct vop_cachedlookup_args /* { + struct vnop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { struct vnode *dp = ap->a_dvp; @@ -1413,8 +1013,9 @@ synthfs_lookup(ap) // char *nameptr = cnp->cn_nameptr; u_long flags = cnp->cn_flags; long namelen = cnp->cn_namelen; - struct proc *p = cnp->cn_proc; - struct ucred *cred = cnp->cn_cred; +// struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + kauth_cred_t cred = vfs_context_ucred(ctx); struct synthfsnode *entry; struct vnode *target_vp = NULL; int result = 0; @@ -1424,26 +1025,13 @@ synthfs_lookup(ap) struct vnode *starting_parent = dp; DBG_VOP(("synthfs_lookup called, name = %s, namelen = %ld\n", ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen)); +#if DEBUG if (flags & LOCKPARENT) DBG_VOP(("\tLOCKPARENT is set\n")); if (flags & ISLASTCN) DBG_VOP(("\tISLASTCN is set\n")); +#endif *ap->a_vpp = NULL; - if (dp->v_type != VDIR) { - result = ENOTDIR; - goto Err_Exit; - }; - - if ((flags & ISLASTCN) && - (VTOVFS(dp)->mnt_flag & MNT_RDONLY) && - ((nameiop == DELETE) || (nameiop == RENAME))) { - result = EROFS; - goto Err_Exit; - }; - - result = VOP_ACCESS(dp, VEXEC, cred, cnp->cn_proc); - if (result != 0) goto Err_Exit; - /* first check for "." and ".." */ if (cnp->cn_nameptr[0] == '.') { if (namelen == 1) { @@ -1454,7 +1042,7 @@ synthfs_lookup(ap) found = TRUE; target_vp = dp; - VREF(target_vp); + vnode_get(target_vp); result = 0; @@ -1472,18 +1060,10 @@ synthfs_lookup(ap) * Special case for ".." to prevent deadlock: * always release the parent vnode BEFORE trying to acquire * ITS parent. This avoids deadlocking with another lookup - * starting from the target_vp trying to vget() this directory. + * starting from the target_vp trying to vnode_get() this directory. */ - VOP_UNLOCK(dp, 0, p); - result = vget(target_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (result != 0) { - vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); - goto Err_Exit; - } - if ((flags & LOCKPARENT) && (flags & ISLASTCN)) { - result = vn_lock(dp, LK_EXCLUSIVE, p); - // vput(target_vp); /* XXX WHY WAS THIS HERE? */ - } + result = vnode_get(target_vp); + } else { target_vp = dp; /* dp is alread locked and ref'ed */ @@ -1501,9 +1081,8 @@ synthfs_lookup(ap) (*(entry->s_name + namelen) == (char)0)) { found = TRUE; target_vp = STOV(entry); - result = vget(target_vp, LK_EXCLUSIVE | LK_RETRY, p); /* vget is not really needed because refcount is always > 0... */ + result = vnode_getwithref(target_vp); /* refcount is always > 0 for any vnode in this list... */ if (result != 0) { - vrele(target_vp); goto Err_Exit; }; @@ -1517,11 +1096,6 @@ synthfs_lookup(ap) Std_Exit:; if (found) { if ((nameiop == DELETE) && (flags & ISLASTCN)) { - /* - * Deleting entries requires write access: - */ - result = VOP_ACCESS(dp, VWRITE, cred, p); - if (result != 0) goto Err_Exit; /* * If the parent directory is "sticky" then the user must own @@ -1530,23 +1104,21 @@ Std_Exit:; * append-only directories */ if ((dsp->s_mode & S_ISVTX) && - (cred->cr_uid != 0) && - (cred->cr_uid != dsp->s_uid) && + suser(cred, NULL) && + (kauth_cred_getuid(cred) != dsp->s_uid) && (target_vp != NULL) && (target_vp->v_type != VLNK) && - (VTOS(target_vp)->s_uid != cred->cr_uid)) { - vput(target_vp); + (VTOS(target_vp)->s_uid != kauth_cred_getuid(cred))) { + vnode_put(target_vp); result = EPERM; goto Err_Exit; }; }; if ((nameiop == RENAME) && (flags & WANTPARENT) && (flags * ISLASTCN)) { - result = VOP_ACCESS(dp, VWRITE, cred, p); - if (result != 0) goto Err_Exit; if (isDot) { - vrele(target_vp); + vnode_put(target_vp); result = EISDIR; goto Err_Exit; }; @@ -1559,43 +1131,25 @@ Std_Exit:; ((nameiop == CREATE) || (nameiop == RENAME) || ((nameiop == DELETE) && (flags & DOWHITEOUT) && (flags & ISWHITEOUT)))) { - /* Write access is required to create entries in the directory: */ - result = VOP_ACCESS(dp, VWRITE, cred, p); - if (result != 0) goto Err_Exit; - - cnp->cn_flags |= SAVENAME; - + /* create a new entry */ result = EJUSTRETURN; } }; - /* XXX PPD Should we do something special in case LOCKLEAF isn't set? */ - if (found && !isDot && !isDotDot && (!(flags & LOCKPARENT) || !(flags & ISLASTCN))) { - VOP_UNLOCK(dp, 0, p); - }; - *ap->a_vpp = target_vp; Err_Exit:; DBG_VOP(("synthfs_lookup: result = %d.\n", result)); if (found) { if (target_vp) { - if (VOP_ISLOCKED(target_vp)) { - DBG_VOP(("synthfs_lookup: target_vp = 0x%08X (locked).\n", (u_long)target_vp)); - } else { - DBG_VOP(("synthfs_lookup: target_vp = 0x%08X (NOT locked?).\n", (u_long)target_vp)); - }; + DBG_VOP(("synthfs_lookup: target_vp = 0x%08X \n", (u_long)target_vp)); } else { DBG_VOP(("synthfs_lookup: found = true but target_vp = NULL?\n")); }; } else { DBG_VOP(("synthf_lookup: target not found.\n")); }; - if (VOP_ISLOCKED(starting_parent)) { - DBG_VOP(("synthfs_lookup: dp = %08X; starting_parent = 0x%08X (LOCKED).\n", (u_long)dp, (u_long)starting_parent)); - } else { - DBG_VOP(("synthfs_lookup: dp = %08X; starting_parent = 0x%08X (UNLOCKED).\n", (u_long)dp, (u_long)starting_parent)); - }; + DBG_VOP(("synthfs_lookup: dp = %08X; starting_parent = 0x%08X .\n", (u_long)dp, (u_long)starting_parent)); return result; } @@ -1606,17 +1160,18 @@ Err_Exit:; #% pathconf vp L L L # - vop_pathconf { + vnop_pathconf { IN struct vnode *vp; IN int name; OUT register_t *retval; */ int synthfs_pathconf(ap) -struct vop_pathconf_args /* { +struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { DBG_VOP(("synthfs_pathconf called\n")); @@ -1657,40 +1212,29 @@ struct vop_pathconf_args /* { * time. If waitfor is set, then wait for the disk write of the node to * complete. */ -/* -#% update vp L L L - IN struct vnode *vp; - IN struct timeval *access; - IN struct timeval *modify; - IN int waitfor; -*/ int -synthfs_update(ap) - struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; - } */ *ap; +synthfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, __unused int waitfor) { - struct vnode *vp = ap->a_vp; struct synthfsnode *sp = VTOS(vp); + struct timeval tv; DBG_ASSERT(sp != NULL); - DBG_ASSERT(*((int*)&vp->v_interlock) == 0); if (((sp->s_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0) && - !(VTOVFS(ap->a_vp)->mnt_flag & MNT_RDONLY)) { - if (sp->s_nodeflags & IN_ACCESS) sp->s_accesstime = *ap->a_access; - if (sp->s_nodeflags & IN_UPDATE) sp->s_modificationtime = *ap->a_modify; - if (sp->s_nodeflags & IN_CHANGE) sp->s_changetime = time; + !(VTOVFS(vp)->mnt_flag & MNT_RDONLY)) { + if (sp->s_nodeflags & IN_ACCESS) sp->s_accesstime = *access; + if (sp->s_nodeflags & IN_UPDATE) sp->s_modificationtime = *modify; + if (sp->s_nodeflags & IN_CHANGE) { + + microtime(&tv); + sp->s_changetime = tv; + } }; /* After the updates are finished, clear the flags */ sp->s_nodeflags &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); -// DBG_ASSERT(*((int*)&ap->a_vp->v_interlock) == 0); return 0; } @@ -1703,71 +1247,11 @@ synthfs_update(ap) ******************************************************************************************/ -/* -#% lock vp U L U -# - vop_lock { - IN struct vnode *vp; - IN int flags; - IN struct proc *p; -*/ - -int -synthfs_lock(ap) -struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -} */ *ap; -{ - return lockmgr(&VTOS(ap->a_vp)->s_lock, ap->a_flags, &ap->a_vp->v_interlock, ap->a_p); -} - -/* - * Unlock an synthfsnode. -#% unlock vp L U L -# - vop_unlock { - IN struct vnode *vp; - IN int flags; - IN struct proc *p; - - */ -int -synthfs_unlock(ap) -struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -} */ *ap; -{ - return lockmgr(&VTOS(ap->a_vp)->s_lock, ap->a_flags | LK_RELEASE, &ap->a_vp->v_interlock, ap->a_p); -} - -/* - * Check for a locked synthfsnode. -#% islocked vp = = = -# - vop_islocked { - IN struct vnode *vp; - - */ -int -synthfs_islocked(ap) -struct vop_islocked_args /* { - struct vnode *a_vp; -} */ *ap; -{ - return lockstatus(&VTOS(ap->a_vp)->s_lock); -} - - - /* # #% inactive vp L U U # - vop_inactive { + vnop_inactive { IN struct vnode *vp; IN struct proc *p; @@ -1775,18 +1259,19 @@ struct vop_islocked_args /* { int synthfs_inactive(ap) -struct vop_inactive_args /* { +struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; - struct proc *p = ap->a_p; struct synthfsnode *sp = VTOS(vp); struct timeval tv; +#if DEBUG if (vp->v_usecount != 0) DBG_VOP(("synthfs_inactive: bad usecount = %d\n", vp->v_usecount )); +#endif /* * Ignore nodes related to stale file handles. @@ -1796,18 +1281,17 @@ struct vop_inactive_args /* { /* This is sort of silly but might make sense in the future: */ if (sp->s_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 0); + microtime(&tv); + synthfs_update(vp, &tv, &tv, 0); } out: - VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ if (vp->v_type == VNON) { - vrecycle(vp, (struct slock *)0, p); + vnode_recycle(vp); }; return 0; @@ -1822,7 +1306,7 @@ out: */ int synthfs_reclaim(ap) - struct vop_reclaim_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; + struct vnop_reclaim_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct synthfsnode *sp = VTOS(vp); diff --git a/bsd/miscfs/union/union.h b/bsd/miscfs/union/union.h index 147df4c7f..475a6f7dd 100644 --- a/bsd/miscfs/union/union.h +++ b/bsd/miscfs/union/union.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,6 +62,7 @@ #define __UNION_UNION_H__ #include +#include #ifdef __APPLE_API_PRIVATE struct union_args { @@ -83,6 +84,24 @@ struct union_mount { }; #ifdef KERNEL +/* LP64 version of union_args. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with union_args + */ +/* LP64todo - should this move? */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_union_args { + user_addr_t target; /* Target of loopback */ + int mntflags; /* Options on the mount */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif /* * DEFDIRMODE is the mode bits used to create a shadow directory. @@ -120,29 +139,26 @@ struct union_node { #define UN_KLOCK 0x08 /* Keep upper node locked on vput */ #define UN_CACHED 0x10 /* In union cache */ -extern int union_allocvp __P((struct vnode **, struct mount *, +extern int union_allocvp(struct vnode **, struct mount *, struct vnode *, struct vnode *, struct componentname *, struct vnode *, - struct vnode *, int)); -extern int union_copyfile __P((struct vnode *, struct vnode *, - struct ucred *, struct proc *)); -extern int union_copyup __P((struct union_node *, int, struct ucred *, - struct proc *)); -extern int union_dowhiteout __P((struct union_node *, struct ucred *, - struct proc *)); -extern int union_mkshadow __P((struct union_mount *, struct vnode *, - struct componentname *, struct vnode **)); -extern int union_mkwhiteout __P((struct union_mount *, struct vnode *, - struct componentname *, char *)); -extern int union_vn_create __P((struct vnode **, struct union_node *, - struct proc *)); -extern int union_cn_close __P((struct vnode *, int, struct ucred *, - struct proc *)); -extern void union_removed_upper __P((struct union_node *un)); -extern struct vnode *union_lowervp __P((struct vnode *)); -extern void union_newlower __P((struct union_node *, struct vnode *)); -extern void union_newupper __P((struct union_node *, struct vnode *)); -extern void union_newsize __P((struct vnode *, off_t, off_t)); + struct vnode *, int); +extern int union_copyfile(struct vnode *, struct vnode *, + struct ucred *, struct proc *); +extern int union_copyup(struct union_node *, int, struct ucred *, + struct proc *); +extern int union_dowhiteout(struct union_node *, vfs_context_t); +extern int union_mkshadow(struct union_mount *, struct vnode *, + struct componentname *, struct vnode **); +extern int union_mkwhiteout(struct union_mount *, struct vnode *, + struct componentname *, char *); +extern int union_vn_create(struct vnode **, struct union_node *, struct proc *); +extern int union_cn_close(struct vnode *, int, struct ucred *, struct proc *); +extern void union_removed_upper(struct union_node *un); +extern struct vnode *union_lowervp(struct vnode *); +extern void union_newlower(struct union_node *, struct vnode *); +extern void union_newupper(struct union_node *, struct vnode *); +extern void union_newsize(struct vnode *, off_t, off_t); #define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data)) #define VTOUNION(vp) ((struct union_node *)(vp)->v_data) diff --git a/bsd/miscfs/union/union_subr.c b/bsd/miscfs/union/union_subr.c index 8f5ce12ac..6aa953ee6 100644 --- a/bsd/miscfs/union/union_subr.c +++ b/bsd/miscfs/union/union_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,18 +61,20 @@ #include #include -#include +#include +#include #include #include -#include +#include #include #include #include #include #include -#include +#include #include #include +#include #include #if DIAGNOSTIC @@ -137,7 +139,7 @@ union_updatevp(un, uppervp, lowervp) int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); int nhash = UNION_HASH(uppervp, lowervp); int docache = (lowervp != NULLVP || uppervp != NULLVP); - int lhash, hhash, uhash; + int lhash, uhash; /* * Ensure locking is ordered from lower to higher @@ -170,13 +172,13 @@ union_updatevp(un, uppervp, lowervp) if (un->un_lowervp != lowervp) { if (un->un_lowervp) { - vrele(un->un_lowervp); + vnode_put(un->un_lowervp); if (un->un_path) { _FREE(un->un_path, M_TEMP); un->un_path = 0; } if (un->un_dirvp) { - vrele(un->un_dirvp); + vnode_put(un->un_dirvp); un->un_dirvp = NULLVP; } } @@ -186,7 +188,7 @@ union_updatevp(un, uppervp, lowervp) if (un->un_uppervp != uppervp) { if (un->un_uppervp) - vrele(un->un_uppervp); + vnode_put(un->un_uppervp); un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; @@ -255,8 +257,7 @@ union_newsize(vp, uppersz, lowersz) printf("union: %s size now %ld\n", uppersz != VNOVAL ? "upper" : "lower", (long) sz); #endif - if (UBCISVALID(vp)) - ubc_setsize(vp, sz); /* XXX check error */ + ubc_setsize(vp, sz); } } @@ -272,7 +273,7 @@ union_newsize(vp, uppersz, lowersz) * being mapped. either, but not both, can be nil. * if supplied, (uppervp) is locked. * the reference is either maintained in the new union_node - * object which is allocated, or they are vrele'd. + * object which is allocated, or they are vnode_put'd. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot @@ -286,7 +287,7 @@ union_newsize(vp, uppersz, lowersz) * vnode. this lock is only taken if we are going to * call getnewvnode, since the kernel itself is single-threaded. * - * if an entry is found on the list, then call vget() to + * if an entry is found on the list, then call vnode_get() to * take a reference. this is done because there may be * zero references to it and so it needs to removed from * the vnode free list. @@ -308,9 +309,11 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int hash; - int vflag; + int markroot; int try; struct union_node *unp; + struct vnode_fsparam vfsp; + enum vtype vtype; if (uppervp == NULLVP && lowervp == NULLVP) panic("union: unidentifiable allocation"); @@ -321,15 +324,15 @@ union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) } /* detect the root vnode (and aliases) */ - vflag = 0; + markroot = 0; if ((uppervp == um->um_uppervp) && ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { if (lowervp == NULLVP) { lowervp = um->um_lowervp; if (lowervp != NULLVP) - VREF(lowervp); + vnode_get(lowervp); } - vflag = VROOT; + markroot = VROOT; } loop: @@ -366,8 +369,7 @@ loop: (un->un_uppervp == uppervp || un->un_uppervp == NULLVP) && (UNIONTOV(un)->v_mount == mp)) { - if (vget(UNIONTOV(un), 0, - cnp ? cnp->cn_proc : NULL)) { + if (vnode_get(UNIONTOV(un))) { union_list_unlock(hash); goto loop; } @@ -387,7 +389,7 @@ loop: * uppervp is locked, though un->un_uppervp * may not be. this doesn't break the locking * hierarchy since in the case that un->un_uppervp - * is not yet locked it will be vrele'd and replaced + * is not yet locked it will be vnode_put'd and replaced * with uppervp. */ @@ -407,7 +409,7 @@ loop: #endif } else { if (un->un_flags & UN_LOCKED) { - vrele(UNIONTOV(un)); + vnode_put(UNIONTOV(un)); un->un_flags |= UN_WANT; sleep((caddr_t) &un->un_flags, PINOD); goto loop; @@ -434,7 +436,7 @@ loop: if (uppervp != un->un_uppervp) { union_newupper(un, uppervp); } else if (uppervp) { - vrele(uppervp); + vnode_put(uppervp); } if (un->un_uppervp) { @@ -457,11 +459,11 @@ loop: bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); + vnode_get(dvp); un->un_dirvp = dvp; } } else if (lowervp) { - vrele(lowervp); + vnode_put(lowervp); } *vpp = UNIONTOV(un); return (0); @@ -479,31 +481,38 @@ loop: } MALLOC(unp, void *, sizeof(struct union_node), M_TEMP, M_WAITOK); - error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); + + if (uppervp) + vtype = uppervp->v_type; + else + vtype = lowervp->v_type; + //bzero(&vfsp, sizeof(struct vnode_fsparam)); + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "unionfs"; + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_fsnode = unp; + vfsp.vnfs_cnp = cnp; + vfsp.vnfs_vops = union_vnodeop_p; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = markroot; + + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp); if (error) { FREE(unp, M_TEMP); if (uppervp) { - if (dvp == uppervp) - vrele(uppervp); - else - vput(uppervp); + vnode_put(uppervp); } if (lowervp) - vrele(lowervp); + vnode_put(lowervp); goto out; } - (*vpp)->v_data = unp; - (*vpp)->v_flag |= vflag; - if (uppervp) - (*vpp)->v_type = uppervp->v_type; - else - (*vpp)->v_type = lowervp->v_type; - - if ((*vpp)->v_type == VREG) - ubc_info_init(*vpp); - + (*vpp)->v_tag = VT_UNION; un = VTOUNION(*vpp); un->un_vnode = *vpp; un->un_uppervp = uppervp; @@ -512,7 +521,7 @@ loop: un->un_lowersz = VNOVAL; un->un_pvp = undvp; if (undvp != NULLVP) - VREF(undvp); + vnode_get(undvp); un->un_dircache = 0; un->un_openl = 0; un->un_flags = UN_LOCKED; @@ -529,7 +538,7 @@ loop: un->un_path = _MALLOC(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; - VREF(dvp); + vnode_get(dvp); un->un_dirvp = dvp; } else { un->un_hash = 0; @@ -543,7 +552,7 @@ loop: } if (xlowervp) - vrele(xlowervp); + vnode_put(xlowervp); out: if (docache) @@ -564,13 +573,13 @@ union_freevp(vp) } if (un->un_pvp != NULLVP) - vrele(un->un_pvp); + vnode_put(un->un_pvp); if (un->un_uppervp != NULLVP) - vrele(un->un_uppervp); + vnode_put(un->un_uppervp); if (un->un_lowervp != NULLVP) - vrele(un->un_lowervp); + vnode_put(un->un_lowervp); if (un->un_dirvp != NULLVP) - vrele(un->un_dirvp); + vnode_put(un->un_dirvp); if (un->un_path) _FREE(un->un_path, M_TEMP); @@ -586,15 +595,13 @@ union_freevp(vp) * and (tvp) are locked on entry and exit. */ int -union_copyfile(fvp, tvp, cred, p) - struct vnode *fvp; - struct vnode *tvp; - struct ucred *cred; - struct proc *p; +union_copyfile(struct vnode *fvp, struct vnode *tvp, kauth_cred_t cred, + struct proc *p) { - char *buf; + char *bufp; struct uio uio; - struct iovec iov; + struct iovec_32 iov; + struct vfs_context context; int error = 0; /* @@ -605,51 +612,50 @@ union_copyfile(fvp, tvp, cred, p) * give up at the first sign of trouble. */ - uio.uio_procp = p; + context.vc_proc = p; + context.vc_ucred = cred; + +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ uio.uio_segflg = UIO_SYSSPACE; +#else + uio.uio_segflg = UIO_SYSSPACE32; +#endif uio.uio_offset = 0; - VOP_UNLOCK(fvp, 0, p); /* XXX */ - VOP_LEASE(fvp, p, cred, LEASE_READ); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VOP_UNLOCK(tvp, 0, p); /* XXX */ - VOP_LEASE(tvp, p, cred, LEASE_WRITE); - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - - buf = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK); + bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; - uio.uio_iov = &iov; + uio.uio_iovs.iov32p = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; + iov.iov_base = (uintptr_t)bufp; iov.iov_len = MAXPHYSIO; - uio.uio_resid = iov.iov_len; + uio_setresid(&uio, iov.iov_len); uio.uio_rw = UIO_READ; - error = VOP_READ(fvp, &uio, 0, cred); + error = VNOP_READ(fvp, &uio, 0, &context); if (error == 0) { - uio.uio_iov = &iov; + uio.uio_iovs.iov32p = &iov; uio.uio_iovcnt = 1; - iov.iov_base = buf; - iov.iov_len = MAXPHYSIO - uio.uio_resid; + iov.iov_base = (uintptr_t)bufp; + iov.iov_len = MAXPHYSIO - uio_resid(&uio); uio.uio_offset = offset; uio.uio_rw = UIO_WRITE; - uio.uio_resid = iov.iov_len; + uio_setresid(&uio, iov.iov_len); - if (uio.uio_resid == 0) + if (uio_resid(&uio) == 0) break; do { - error = VOP_WRITE(tvp, &uio, 0, cred); - } while ((uio.uio_resid > 0) && (error == 0)); + error = VNOP_WRITE(tvp, &uio, 0, &context); + } while ((uio_resid(&uio) > 0) && (error == 0)); } } while (error == 0); - _FREE(buf, M_TEMP); + _FREE(bufp, M_TEMP); return (error); } @@ -658,19 +664,20 @@ union_copyfile(fvp, tvp, cred, p) * locked on exit. */ int -union_copyup(un, docopy, cred, p) - struct union_node *un; - int docopy; - struct ucred *cred; - struct proc *p; +union_copyup(struct union_node *un, int docopy, kauth_cred_t cred, + struct proc *p) { int error; struct vnode *lvp, *uvp; + struct vfs_context context; error = union_vn_create(&uvp, un, p); if (error) return (error); + context.vc_proc = p; + context.vc_ucred = cred; + /* at this point, uppervp is locked */ union_newupper(un, uvp); un->un_flags |= UN_ULOCK; @@ -680,14 +687,12 @@ union_copyup(un, docopy, cred, p) if (docopy) { /* * XX - should not ignore errors - * from VOP_CLOSE + * from vnop_close */ - vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(lvp, FREAD, cred, p); + error = VNOP_OPEN(lvp, FREAD, &context); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); - VOP_UNLOCK(lvp, 0, p); - (void) VOP_CLOSE(lvp, FREAD, cred, p); + (void) VNOP_CLOSE(lvp, FREAD, &context); } #ifdef UNION_DIAGNOSTIC if (error == 0) @@ -696,9 +701,7 @@ union_copyup(un, docopy, cred, p) } un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(uvp, 0, p); union_vn_close(uvp, FWRITE, cred, p); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; /* @@ -713,8 +716,8 @@ union_copyup(un, docopy, cred, p) int i; for (i = 0; i < un->un_openl; i++) { - (void) VOP_CLOSE(lvp, FREAD, cred, p); - (void) VOP_OPEN(uvp, FREAD, cred, p); + (void) VNOP_CLOSE(lvp, FREAD, &context); + (void) VNOP_OPEN(uvp, FREAD, &context); } un->un_openl = 0; } @@ -741,10 +744,8 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) * from or what it is being used for. This must duplicate * some of the work done by NDINIT, some of the work done * by namei, some of the work done by lookup and some of - * the work done by VOP_LOOKUP when given a CREATE flag. + * the work done by vnop_lookup when given a CREATE flag. * Conclusion: Horrible. - * - * The pathname buffer will be FREEed by VOP_MKDIR. */ cn->cn_namelen = pathlen; cn->cn_pnbuf = _MALLOC_ZONE(cn->cn_namelen+1, M_NAMEI, M_WAITOK); @@ -754,19 +755,22 @@ union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) cn->cn_nameiop = CREATE; cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); +#ifdef XXX_HELP_ME cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; else cn->cn_cred = um->um_cred; +#endif + cn->cn_context = cnp->cn_context; /* XXX !UNMNT_ABOVE case ??? */ cn->cn_nameptr = cn->cn_pnbuf; cn->cn_hash = cnp->cn_hash; cn->cn_consume = cnp->cn_consume; - VREF(dvp); + vnode_get(dvp); error = relookup(dvp, vpp, cn); if (!error) - vrele(dvp); + vnode_put(dvp); return (error); } @@ -791,8 +795,7 @@ union_mkshadow(um, dvp, cnp, vpp) struct vnode **vpp; { int error; - struct vattr va; - struct proc *p = cnp->cn_proc; + struct vnode_attr va; struct componentname cn; error = union_relookup(um, dvp, vpp, cnp, &cn, @@ -801,9 +804,7 @@ union_mkshadow(um, dvp, cnp, vpp) return (error); if (*vpp) { - VOP_ABORTOP(dvp, &cn); - VOP_UNLOCK(dvp, 0, p); - vrele(*vpp); + vnode_put(*vpp); *vpp = NULLVP; return (EEXIST); } @@ -815,15 +816,11 @@ union_mkshadow(um, dvp, cnp, vpp) * 777 modified by umask (ie mostly identical to the * mkdir syscall). (jsp, kb) */ + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VDIR); + VATTR_SET(&va, va_mode, um->um_cmode); - VATTR_NULL(&va); - va.va_type = VDIR; - va.va_mode = um->um_cmode; - - /* VOP_LEASE: dvp is locked */ - VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); - - error = VOP_MKDIR(dvp, vpp, &cn, &va); + error = vn_create(dvp, vpp, &cn, &va, 0, cnp->cn_context); return (error); } @@ -844,33 +841,22 @@ union_mkwhiteout(um, dvp, cnp, path) char *path; { int error; - struct vattr va; - struct proc *p = cnp->cn_proc; struct vnode *wvp; struct componentname cn; - VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); if (error) { - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } - if (wvp) { - VOP_ABORTOP(dvp, &cn); - vrele(dvp); - vrele(wvp); + vnode_put(dvp); + vnode_put(wvp); return (EEXIST); } - /* VOP_LEASE: dvp is locked */ - VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); + error = VNOP_WHITEOUT(dvp, &cn, CREATE, cnp->cn_context); - error = VOP_WHITEOUT(dvp, &cn, CREATE); - if (error) - VOP_ABORTOP(dvp, &cn); - - vrele(dvp); + vnode_put(dvp); return (error); } @@ -890,9 +876,10 @@ union_vn_create(vpp, un, p) struct proc *p; { struct vnode *vp; - struct ucred *cred = p->p_ucred; - struct vattr vat; - struct vattr *vap = &vat; + kauth_cred_t cred = p->p_ucred; + struct vnode_attr vat; + struct vnode_attr *vap = &vat; + struct vfs_context context; int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); int error; int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; @@ -901,6 +888,9 @@ union_vn_create(vpp, un, p) *vpp = NULLVP; + context.vc_proc = p; + context.vc_ucred = p->p_ucred; + /* * Build a new componentname structure (for the same * reasons outlines in union_mkshadow). @@ -917,24 +907,19 @@ union_vn_create(vpp, un, p) bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); - cn.cn_proc = p; - cn.cn_cred = p->p_ucred; + cn.cn_context = &context; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; - VREF(un->un_dirvp); + vnode_get(un->un_dirvp); if (error = relookup(un->un_dirvp, &vp, &cn)) return (error); - vrele(un->un_dirvp); + vnode_put(un->un_dirvp); if (vp) { - VOP_ABORTOP(un->un_dirvp, &cn); - if (un->un_dirvp == vp) - vrele(un->un_dirvp); - else - vput(un->un_dirvp); - vrele(vp); + vnode_put(un->un_dirvp); + vnode_put(vp); return (EEXIST); } @@ -946,37 +931,46 @@ union_vn_create(vpp, un, p) * it is unioned, will require access to the top *and* * bottom files. Access when not unioned will simply * require access to the top-level file. + * * TODO: confirm choice of access permissions. + * decide on authorisation behaviour */ - VATTR_NULL(vap); - vap->va_type = VREG; - vap->va_mode = cmode; - VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); - if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) + + VATTR_INIT(vap); + VATTR_SET(vap, va_type, VREG); + VATTR_SET(vap, va_mode, cmode); + + if (error = vn_create(un->un_dirvp, &vp, &cn, vap, 0, &context)) return (error); - if (error = VOP_OPEN(vp, fmode, cred, p)) { - vput(vp); + if (error = VNOP_OPEN(vp, fmode, &context)) { + vnode_put(vp); return (error); } + vnode_lock(vp); if (++vp->v_writecount <= 0) panic("union: v_writecount"); + vnode_unlock(vp); *vpp = vp; return (0); } int -union_vn_close(vp, fmode, cred, p) - struct vnode *vp; - int fmode; - struct ucred *cred; - struct proc *p; +union_vn_close(struct vnode *vp, int fmode, kauth_cred_t cred, + struct proc *p) { + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = cred; - if (fmode & FWRITE) + if (fmode & FWRITE) { + vnode_lock(vp); --vp->v_writecount; - return (VOP_CLOSE(vp, fmode, cred, p)); + vnode_unlock(vp); + } + return (VNOP_CLOSE(vp, fmode, &context)); } void @@ -993,7 +987,6 @@ union_removed_upper(un) if (un->un_flags & UN_ULOCK) { un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); } } @@ -1006,7 +999,7 @@ union_lowervp(vp) if ((un->un_lowervp != NULLVP) && (vp->v_type == un->un_lowervp->v_type)) { - if (vget(un->un_lowervp, 0, current_proc()) == 0) + if (vnode_get(un->un_lowervp) == 0) return (un->un_lowervp); } @@ -1019,17 +1012,16 @@ union_lowervp(vp) * during a remove/rmdir operation. */ int -union_dowhiteout(un, cred, p) - struct union_node *un; - struct ucred *cred; - struct proc *p; +union_dowhiteout(struct union_node *un, vfs_context_t ctx) { - struct vattr va; + struct vnode_attr va; if (un->un_lowervp != NULLVP) return (1); - if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + if (vnode_getattr(un->un_uppervp, &va, ctx) == 0 && (va.va_flags & OPAQUE)) return (1); @@ -1046,7 +1038,7 @@ union_dircache_r(vp, vppp, cntp) if (vp->v_op != union_vnodeop_p) { if (vppp) { - VREF(vp); + vnode_get(vp); *(*vppp)++ = vp; if (--(*cntp) == 0) panic("union: dircache table too small"); @@ -1069,27 +1061,26 @@ union_dircache(vp, p) struct vnode *vp; struct proc *p; { - int cnt; + int count; struct vnode *nvp; struct vnode **vpp; struct vnode **dircache; struct union_node *un; int error; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); dircache = VTOUNION(vp)->un_dircache; nvp = NULLVP; if (dircache == 0) { - cnt = 0; - union_dircache_r(vp, 0, &cnt); - cnt++; + count = 0; + union_dircache_r(vp, 0, &count); + count++; dircache = (struct vnode **) - _MALLOC(cnt * sizeof(struct vnode *), + _MALLOC(count * sizeof(struct vnode *), M_TEMP, M_WAITOK); vpp = dircache; - union_dircache_r(vp, &vpp, &cnt); + union_dircache_r(vp, &vpp, &count); *vpp = NULLVP; vpp = dircache + 1; } else { @@ -1103,8 +1094,7 @@ union_dircache(vp, p) if (*vpp == NULLVP) goto out; - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); - VREF(*vpp); + vnode_get(*vpp); error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); if (error) goto out; @@ -1114,6 +1104,5 @@ union_dircache(vp, p) un->un_dircache = dircache; out: - VOP_UNLOCK(vp, 0, p); return (nvp); } diff --git a/bsd/miscfs/union/union_vfsops.c b/bsd/miscfs/union/union_vfsops.c index 779831f2d..959c201d2 100644 --- a/bsd/miscfs/union/union_vfsops.c +++ b/bsd/miscfs/union/union_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,38 +67,36 @@ #include #include #include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include +static int union_itercallback(__unused vnode_t, void *); + /* * Mount union filesystem */ int -union_mount(mp, path, data, ndp, p) - struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +union_mount(mount_t mp, __unused vnode_t devvp, user_addr_t data, vfs_context_t context) { + proc_t p = vfs_context_proc(context); int error = 0; - struct union_args args; + struct user_union_args args; struct vnode *lowerrootvp = NULLVP; struct vnode *upperrootvp = NULLVP; struct union_mount *um = 0; - struct ucred *cred = 0; - struct ucred *scred; - struct vattr va; + kauth_cred_t cred = NOCRED; char *cp; int len; u_int size; - + struct nameidata nd; + #ifdef UNION_DIAGNOSTIC printf("union_mount(mp = %x)\n", mp); #endif @@ -112,31 +110,42 @@ union_mount(mp, path, data, ndp, p) * 1. a way to convert between rdonly and rdwr mounts. * 2. support for nfs exports. */ - error = EOPNOTSUPP; + error = ENOTSUP; goto bad; } /* * Get argument */ - if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args))) + if (vfs_context_is64bit(context)) { + error = copyin(data, (caddr_t)&args, sizeof(args)); + } + else { + struct union_args temp; + error = copyin(data, (caddr_t)&temp, sizeof (temp)); + args.target = CAST_USER_ADDR_T(temp.target); + args.mntflags = temp.mntflags; + } + if (error) goto bad; lowerrootvp = mp->mnt_vnodecovered; - VREF(lowerrootvp); + vnode_get(lowerrootvp); /* * Find upper node. */ - NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, - UIO_USERSPACE, args.target, p); + NDINIT(&nd, LOOKUP, FOLLOW|WANTPARENT, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + args.target, context); - if (error = namei(ndp)) + if ((error = namei(&nd))) goto bad; - upperrootvp = ndp->ni_vp; - vrele(ndp->ni_dvp); - ndp->ni_dvp = NULL; + nameidone(&nd); + upperrootvp = nd.ni_vp; + vnode_put(nd.ni_dvp); + nd.ni_dvp = NULL; if (upperrootvp->v_type != VDIR) { error = EINVAL; @@ -150,7 +159,7 @@ union_mount(mp, path, data, ndp, p) /* * Keep a held reference to the target vnodes. - * They are vrele'd in union_unmount. + * They are vnode_put'd in union_unmount. * * Depending on the _BELOW flag, the filesystems are * viewed in a different order. In effect, this is the @@ -170,7 +179,7 @@ union_mount(mp, path, data, ndp, p) break; case UNMNT_REPLACE: - vrele(lowerrootvp); + vnode_put(lowerrootvp); lowerrootvp = NULLVP; um->um_uppervp = upperrootvp; um->um_lowervp = lowerrootvp; @@ -186,13 +195,13 @@ union_mount(mp, path, data, ndp, p) * supports whiteout operations */ if ((mp->mnt_flag & MNT_RDONLY) == 0) { - error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + error = VNOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, + LOOKUP, context); if (error) goto bad; } - um->um_cred = p->p_ucred; - crhold(um->um_cred); + um->um_cred = kauth_cred_get_with_ref(); um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask; /* @@ -223,9 +232,6 @@ union_mount(mp, path, data, ndp, p) mp->mnt_data = (qaddr_t) um; vfs_getnewfsid(mp); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, - MNAMELEN - 1, (size_t *)&size); - bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); switch (um->um_op) { case UNMNT_ABOVE: @@ -239,9 +245,9 @@ union_mount(mp, path, data, ndp, p) break; } len = strlen(cp); - bcopy(cp, mp->mnt_stat.f_mntfromname, len); + bcopy(cp, mp->mnt_vfsstat.f_mntfromname, len); - cp = mp->mnt_stat.f_mntfromname + len; + cp = mp->mnt_vfsstat.f_mntfromname + len; len = MNAMELEN - len; (void) copyinstr(args.target, cp, len - 1, (size_t *)&size); @@ -249,7 +255,7 @@ union_mount(mp, path, data, ndp, p) #ifdef UNION_DIAGNOSTIC printf("union_mount: from %s, on %s\n", - mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); + mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); #endif return (0); @@ -257,11 +263,11 @@ bad: if (um) _FREE(um, M_UFSMNT); if (cred != NOCRED) - crfree(cred); + kauth_cred_rele(cred); if (upperrootvp) - vrele(upperrootvp); + vnode_put(upperrootvp); if (lowerrootvp) - vrele(lowerrootvp); + vnode_put(lowerrootvp); return (error); } @@ -271,30 +277,35 @@ bad: * when that filesystem was mounted. */ int -union_start(mp, flags, p) - struct mount *mp; - int flags; - struct proc *p; +union_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) { return (0); } +static int +union_itercallback(__unused vnode_t vp, void *args) +{ + int num = *(int *)args; + + *(int *)args = num + 1; + return(VNODE_RETURNED); +} + + + /* * Free reference to union layer */ int -union_unmount(mp, mntflags, p) - struct mount *mp; - int mntflags; - struct proc *p; +union_unmount(mount_t mp, int mntflags, __unused vfs_context_t context) { struct union_mount *um = MOUNTTOUNIONMOUNT(mp); struct vnode *um_rootvp; int error; int freeing; int flags = 0; - struct ucred *cred; + kauth_cred_t cred; #ifdef UNION_DIAGNOSTIC printf("union_unmount(mp = %x)\n", mp); @@ -303,7 +314,7 @@ union_unmount(mp, mntflags, p) if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - if (error = union_root(mp, &um_rootvp)) + if ((error = union_root(mp, &um_rootvp))) return (error); /* @@ -316,14 +327,9 @@ union_unmount(mp, mntflags, p) * in the filesystem. */ for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) { - struct vnode *vp; - int n; + int n = 0; - /* count #vnodes held on mount list */ - for (n = 0, vp = mp->mnt_vnodelist.lh_first; - vp != NULLVP; - vp = vp->v_mntvnodes.le_next) - n++; + vnode_iterate(mp, VNODE_NOLOCK_INTERNAL, union_itercallback, &n); /* if this is unchanged then stop */ if (n == freeing) @@ -334,8 +340,8 @@ union_unmount(mp, mntflags, p) } /* At this point the root vnode should have a single reference */ - if (um_rootvp->v_usecount > 1) { - vput(um_rootvp); + if (vnode_isinuse(um_rootvp, 0)) { + vnode_put(um_rootvp); return (EBUSY); } @@ -346,21 +352,21 @@ union_unmount(mp, mntflags, p) * Discard references to upper and lower target vnodes. */ if (um->um_lowervp) - vrele(um->um_lowervp); - vrele(um->um_uppervp); + vnode_put(um->um_lowervp); + vnode_put(um->um_uppervp); cred = um->um_cred; if (cred != NOCRED) { um->um_cred = NOCRED; - crfree(cred); + kauth_cred_rele(cred); } /* * Release reference on underlying root vnode */ - vput(um_rootvp); + vnode_put(um_rootvp); /* * And blow it away for future re-use */ - vgone(um_rootvp); + vnode_reclaim(um_rootvp); /* * Finally, throw away the union_mount structure */ @@ -370,28 +376,17 @@ union_unmount(mp, mntflags, p) } int -union_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; +union_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t context) { - struct proc *p = current_proc(); /* XXX */ struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int error; - int loselock; /* * Return locked reference to root. */ - VREF(um->um_uppervp); - if ((um->um_op == UNMNT_BELOW) && - VOP_ISLOCKED(um->um_uppervp)) { - loselock = 1; - } else { - vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - loselock = 0; - } + vnode_get(um->um_uppervp); if (um->um_lowervp) - VREF(um->um_lowervp); + vnode_get(um->um_lowervp); error = union_allocvp(vpp, mp, (struct vnode *) 0, (struct vnode *) 0, @@ -401,75 +396,85 @@ union_root(mp, vpp) 1); if (error) { - if (loselock) - vrele(um->um_uppervp); - else - vput(um->um_uppervp); + vnode_put(um->um_uppervp); if (um->um_lowervp) - vrele(um->um_lowervp); - } else { - if (loselock) - VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; - } + vnode_put(um->um_lowervp); + } return (error); } -int -union_statfs(mp, sbp, p) - struct mount *mp; - struct statfs *sbp; - struct proc *p; +static int +union_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) { int error; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); - struct statfs mstat; - int lbsize; + struct vfs_attr attr; + uint32_t lbsize = 0; #ifdef UNION_DIAGNOSTIC - printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp, + printf("union_vfs_getattr(mp = %x, lvp = %x, uvp = %x)\n", mp, um->um_lowervp, um->um_uppervp); #endif - bzero(&mstat, sizeof(mstat)); - + /* Get values from lower file system (if any) */ if (um->um_lowervp) { - error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p); + VFSATTR_INIT(&attr); + VFSATTR_WANTED(&attr, f_bsize); + VFSATTR_WANTED(&attr, f_blocks); + VFSATTR_WANTED(&attr, f_bused); + VFSATTR_WANTED(&attr, f_files); + error = vfs_getattr(um->um_lowervp->v_mount, &attr, context); if (error) return (error); + + /* now copy across the "interesting" information and fake the rest */ + if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) + lbsize = attr.f_bsize; + else + lbsize = um->um_lowervp->v_mount->mnt_devblocksize; + fsap->f_blocks = VFSATTR_IS_SUPPORTED(&attr, f_blocks) ? attr.f_blocks : 0; + fsap->f_bused = VFSATTR_IS_SUPPORTED(&attr, f_bused) ? attr.f_bused : 0; + fsap->f_files = VFSATTR_IS_SUPPORTED(&attr, f_files) ? attr.f_files : 0; + } else { + fsap->f_blocks = 0; + fsap->f_bused = 0; + fsap->f_files = 0; } - /* now copy across the "interesting" information and fake the rest */ -#if 0 - sbp->f_type = mstat.f_type; - sbp->f_flags = mstat.f_flags; - sbp->f_bsize = mstat.f_bsize; - sbp->f_iosize = mstat.f_iosize; -#endif - lbsize = mstat.f_bsize; - sbp->f_blocks = mstat.f_blocks; - sbp->f_bfree = mstat.f_bfree; - sbp->f_bavail = mstat.f_bavail; - sbp->f_files = mstat.f_files; - sbp->f_ffree = mstat.f_ffree; - - error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p); + VFSATTR_INIT(&attr); + VFSATTR_WANTED(&attr, f_bsize); + VFSATTR_WANTED(&attr, f_blocks); + VFSATTR_WANTED(&attr, f_bfree); + VFSATTR_WANTED(&attr, f_bavail); + VFSATTR_WANTED(&attr, f_files); + VFSATTR_WANTED(&attr, f_ffree); + error = vfs_getattr(um->um_uppervp->v_mount, &attr, context); if (error) return (error); - sbp->f_flags = mstat.f_flags; - sbp->f_bsize = mstat.f_bsize; - sbp->f_iosize = mstat.f_iosize; + if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) { + fsap->f_bsize = attr.f_bsize; + VFSATTR_SET_SUPPORTED(fsap, f_bsize); + } + if (VFSATTR_IS_SUPPORTED(&attr, f_iosize)) { + fsap->f_iosize = attr.f_iosize; + VFSATTR_SET_SUPPORTED(fsap, f_iosize); + } /* * if the lower and upper blocksizes differ, then frig the * block counts so that the sizes reported by df make some * kind of sense. none of this makes sense though. */ - - if (mstat.f_bsize != lbsize) - sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize; + if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) + fsap->f_bsize = attr.f_bsize; + else + fsap->f_bsize = um->um_uppervp->v_mount->mnt_devblocksize; + VFSATTR_RETURN(fsap, f_bsize, attr.f_bsize); + if (fsap->f_bsize != lbsize) + fsap->f_blocks = fsap->f_blocks * lbsize / attr.f_bsize; /* * The "total" fields count total resources in all layers, @@ -477,49 +482,52 @@ union_statfs(mp, sbp, p) * free in the upper layer (since only the upper layer * is writeable). */ - sbp->f_blocks += mstat.f_blocks; - sbp->f_bfree = mstat.f_bfree; - sbp->f_bavail = mstat.f_bavail; - sbp->f_files += mstat.f_files; - sbp->f_ffree = mstat.f_ffree; - - if (sbp != &mp->mnt_stat) { - sbp->f_type = mp->mnt_vfc->vfc_typenum; - bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); - bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); - bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); - } + if (VFSATTR_IS_SUPPORTED(&attr, f_blocks)) + fsap->f_blocks += attr.f_blocks; + if (VFSATTR_IS_SUPPORTED(&attr, f_bfree)) + fsap->f_bfree = attr.f_bfree; + if (VFSATTR_IS_SUPPORTED(&attr, f_bavail)) + fsap->f_bavail = attr.f_bavail; + if (VFSATTR_IS_SUPPORTED(&attr, f_bused)) + fsap->f_bused += attr.f_bused; + if (VFSATTR_IS_SUPPORTED(&attr, f_files)) + fsap->f_files += attr.f_files; + if (VFSATTR_IS_SUPPORTED(&attr, f_ffree)) + fsap->f_ffree = attr.f_ffree; + + VFSATTR_SET_SUPPORTED(fsap, f_bsize); + VFSATTR_SET_SUPPORTED(fsap, f_blocks); + VFSATTR_SET_SUPPORTED(fsap, f_bfree); + VFSATTR_SET_SUPPORTED(fsap, f_bavail); + VFSATTR_SET_SUPPORTED(fsap, f_bused); + VFSATTR_SET_SUPPORTED(fsap, f_files); + VFSATTR_SET_SUPPORTED(fsap, f_ffree); + return (0); } /* * XXX - Assumes no data cached at union layer. */ -#define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \ - struct proc *)))nullop) - -#define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \ - struct mbuf *, struct vnode **, int *, struct ucred **)))eopnotsupp) -int union_init __P((struct vfsconf *)); -#define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ - struct proc *)))eopnotsupp) -#define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ - size_t, struct proc *)))eopnotsupp) -#define union_vget ((int (*) __P((struct mount *, void *, struct vnode **))) \ - eopnotsupp) -#define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) +#define union_sync (int (*) (mount_t, int, ucred_t, vfs_context_t))nullop + +#define union_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp +int union_init (struct vfsconf *); +#define union_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp +#define union_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp +#define union_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp struct vfsops union_vfsops = { union_mount, union_start, union_unmount, union_root, - union_quotactl, - union_statfs, + NULL, /* quotactl */ + union_vfs_getattr, union_sync, union_vget, union_fhtovp, union_vptofh, union_init, - union_sysctl, + union_sysctl }; diff --git a/bsd/miscfs/union/union_vnops.c b/bsd/miscfs/union/union_vnops.c index 5ff3134f9..4b1ca8ef1 100644 --- a/bsd/miscfs/union/union_vnops.c +++ b/bsd/miscfs/union/union_vnops.c @@ -62,20 +62,22 @@ #include #include #include +#include #include #include #include #include -#include -#include +#include +#include #include #include -#include +#include #include #include #include #include #include +#include #define FIXUP(un, p) { \ if (((un)->un_flags & UN_ULOCK) == 0) { \ @@ -89,19 +91,15 @@ union_fixup(un, p) struct proc *p; { - vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; } static int -union_lookup1(udvp, dvpp, vpp, cnp) - struct vnode *udvp; - struct vnode **dvpp; - struct vnode **vpp; - struct componentname *cnp; +union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp, + struct componentname *cnp) { int error; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; struct vnode *tdvp; struct vnode *dvp; struct mount *mp; @@ -124,26 +122,16 @@ union_lookup1(udvp, dvpp, vpp, cnp) */ tdvp = dvp; *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; - vput(tdvp); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + vnode_put(tdvp); + vnode_get(dvp); } } - error = VOP_LOOKUP(dvp, &tdvp, cnp); + error = VNOP_LOOKUP(dvp, &tdvp, cnp, ctx); if (error) return (error); - /* - * The parent directory will have been unlocked, unless lookup - * found the last component. In which case, re-lock the node - * here to allow it to be unlocked again (phew) in union_lookup. - */ - if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - dvp = tdvp; - /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to @@ -151,18 +139,18 @@ union_lookup1(udvp, dvpp, vpp, cnp) */ while (dvp != udvp && (dvp->v_type == VDIR) && (mp = dvp->v_mountedhere)) { - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(dvp); + if (vfs_busy(mp, LK_NOWAIT)) { + vnode_put(dvp); return(ENOENT); } - error = VFS_ROOT(mp, &tdvp); - vfs_unbusy(mp, p); + error = VFS_ROOT(mp, &tdvp, ctx); + vfs_unbusy(mp); if (error) { - vput(dvp); + vnode_put(dvp); return (error); } - vput(dvp); + vnode_put(dvp); dvp = tdvp; } @@ -171,13 +159,14 @@ union_lookup1(udvp, dvpp, vpp, cnp) } int -union_lookup(ap) - struct vop_lookup_args /* { +union_lookup( + struct vnop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { int error; int uerror, lerror; @@ -186,13 +175,13 @@ union_lookup(ap) struct vnode *dvp = ap->a_dvp; struct union_node *dun = VTOUNION(dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); int lockparent = cnp->cn_flags & LOCKPARENT; - int rdonly = cnp->cn_flags & RDONLY; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); - struct ucred *saved_cred; + kauth_cred_t saved_cred; int iswhiteout; - struct vattr va; + struct vnode_attr va; #ifdef notyet if (cnp->cn_namelen == 3 && @@ -202,10 +191,8 @@ union_lookup(ap) dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); if (dvp == NULLVP) return (ENOENT); - VREF(dvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(ap->a_dvp, 0, p); + vnode_get(dvp); + return (0); } #endif @@ -241,8 +228,9 @@ union_lookup(ap) if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; } else if (lowerdvp != NULLVP) { - lerror = VOP_GETATTR(upperdvp, &va, - cnp->cn_cred, cnp->cn_proc); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + lerror = vnode_getattr(upperdvp, &va, ap->a_context); if (lerror == 0 && (va.va_flags & OPAQUE)) iswhiteout = 1; } @@ -254,15 +242,13 @@ union_lookup(ap) /* * in a similar way to the upper layer, do the lookup * in the lower layer. this time, if there is some - * component magic going on, then vput whatever we got + * component magic going on, then vnode_put whatever we got * back from the upper layer and return the lower vnode * instead. */ if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; - vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); - /* * Only do a LOOKUP on the bottom node, since * we won't be making changes to it anyway. @@ -270,24 +256,21 @@ union_lookup(ap) nameiop = cnp->cn_nameiop; cnp->cn_nameiop = LOOKUP; if (um->um_op == UNMNT_BELOW) { - saved_cred = cnp->cn_cred; - cnp->cn_cred = um->um_cred; + /* XXX BOGUS */ + saved_cred = cnp->cn_context->vc_ucred; + cnp->cn_context->vc_ucred = um->um_cred; + lerror = union_lookup1(um->um_lowervp, &lowerdvp, + &lowervp, cnp); + cnp->cn_context->vc_ucred = saved_cred; + } else { + lerror = union_lookup1(um->um_lowervp, &lowerdvp, + &lowervp, cnp); } - lerror = union_lookup1(um->um_lowervp, &lowerdvp, - &lowervp, cnp); - if (um->um_op == UNMNT_BELOW) - cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; - if (lowervp != lowerdvp) - VOP_UNLOCK(lowerdvp, 0, p); - if (cnp->cn_consume != 0) { if (uppervp != NULLVP) { - if (uppervp == upperdvp) - vrele(uppervp); - else - vput(uppervp); + vnode_put(uppervp); uppervp = NULLVP; } *ap->a_vpp = lowervp; @@ -300,8 +283,7 @@ union_lookup(ap) if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { lowervp = LOWERVP(dun->un_pvp); if (lowervp != NULLVP) { - VREF(lowervp); - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); + vnode_get(lowervp); lerror = 0; } } @@ -345,54 +327,46 @@ union_lookup(ap) if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ dun->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(upperdvp, 0, p); uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); - vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; if (uerror) { if (lowervp != NULLVP) { - vput(lowervp); + vnode_put(lowervp); lowervp = NULLVP; } return (uerror); } } } - - if (lowervp != NULLVP) - VOP_UNLOCK(lowervp, 0, p); - error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); if (error) { if (uppervp != NULLVP) - vput(uppervp); + vnode_put(uppervp); if (lowervp != NULLVP) - vrele(lowervp); - } else { - if (*ap->a_vpp != dvp) - if (!lockparent || !(cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); + vnode_put(lowervp); } return (error); } int -union_create(ap) - struct vop_create_args /* { +union_create( + struct vnop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dvp != NULLVP) { int error; @@ -401,57 +375,59 @@ union_create(ap) FIXUP(un, p); - VREF(dvp); un->un_flags |= UN_KLOCK; mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); - error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); + + /* note that this is a direct passthrough to the filesystem */ + error = VNOP_CREATE(dvp, &vp, cnp, ap->a_vap, ap->a_context); if (error) return (error); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) - vput(vp); + vnode_put(vp); return (error); } - - vput(ap->a_dvp); return (EROFS); } int -union_whiteout(ap) - struct vop_whiteout_args /* { +union_whiteout( + struct vnop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (un->un_uppervp == NULLVP) - return (EOPNOTSUPP); + return (ENOTSUP); FIXUP(un, p); - return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); + return (VNOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags, ap->a_context)); } int -union_mknod(ap) - struct vop_mknod_args /* { +union_mknod( + struct vnop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dvp != NULLVP) { int error; @@ -460,11 +436,11 @@ union_mknod(ap) FIXUP(un, p); - VREF(dvp); un->un_flags |= UN_KLOCK; mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); - error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); + + /* note that this is a direct passthrough to the filesystem */ + error = VNOP_MKNOD(dvp, &vp, cnp, ap->a_vap, ap->a_context); if (error) return (error); @@ -472,30 +448,27 @@ union_mknod(ap) error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) - vput(vp); + vnode_put(vp); } return (error); } - - vput(ap->a_dvp); return (EROFS); } int -union_open(ap) - struct vop_open_args /* { +union_open( + struct vnop_open_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *tvp; int mode = ap->a_mode; - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); int error; /* @@ -512,7 +485,7 @@ union_open(ap) if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); if (error == 0) - error = VOP_OPEN(un->un_uppervp, mode, cred, p); + error = VNOP_OPEN(un->un_uppervp, mode, ap->a_context); return (error); } @@ -520,27 +493,25 @@ union_open(ap) * Just open the lower vnode */ un->un_openl++; - vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_OPEN(tvp, mode, cred, p); - VOP_UNLOCK(tvp, 0, p); + + error = VNOP_OPEN(tvp, mode, ap->a_context); return (error); } FIXUP(un, p); - error = VOP_OPEN(tvp, mode, cred, p); + error = VNOP_OPEN(tvp, mode, ap->a_context); return (error); } int union_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); @@ -556,7 +527,7 @@ union_close(ap) } ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_close), ap)); + return (VCALL(vp, VOFFSET(vnop_close), ap)); } /* @@ -568,39 +539,37 @@ union_close(ap) * the user caused an implicit file copy. */ int -union_access(ap) - struct vop_access_args /* { +union_access( + struct vnop_access_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + int a_action; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + struct proc *p = vfs_context_proc(ap->a_context); int error = EACCES; struct vnode *vp; if ((vp = un->un_uppervp) != NULLVP) { FIXUP(un, p); ap->a_vp = vp; - return (VCALL(vp, VOFFSET(vop_access), ap)); + return (VCALL(vp, VOFFSET(vnop_access), ap)); } if ((vp = un->un_lowervp) != NULLVP) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_access), ap); + error = VCALL(vp, VOFFSET(vnop_access), ap); if (error == 0) { struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount); if (um->um_op == UNMNT_BELOW) { - ap->a_cred = um->um_cred; - error = VCALL(vp, VOFFSET(vop_access), ap); + /* XXX fix me */ + // ap->a_cred = um->um_cred; + error = VCALL(vp, VOFFSET(vnop_access), ap); } } - VOP_UNLOCK(vp, 0, p); if (error) return (error); } @@ -614,19 +583,18 @@ union_access(ap) */ int union_getattr(ap) - struct vop_getattr_args /* { + struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { int error; struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp = un->un_uppervp; - struct proc *p = ap->a_p; - struct vattr *vap; - struct vattr va; + struct proc *p = vfs_context_proc(ap->a_context); + struct vnode_attr *vap; + struct vnode_attr va; /* @@ -643,7 +611,7 @@ union_getattr(ap) vp = un->un_uppervp; if (vp != NULLVP) { /* - * It's not clear whether VOP_GETATTR is to be + * It's not clear whether vnop_getattr is to be * called with the vnode locked or not. stat() calls * it with (vp) locked, and fstat calls it with * (vp) unlocked. @@ -653,46 +621,49 @@ union_getattr(ap) if (un->un_flags & UN_LOCKED) FIXUP(un, p); - error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + error = vnode_getattr(vp, vap, ap->a_context); if (error) return (error); - union_newsize(ap->a_vp, vap->va_size, VNOVAL); + union_newsize(ap->a_vp, vap->va_data_size, VNOVAL); } if (vp == NULLVP) { vp = un->un_lowervp; } else if (vp->v_type == VDIR) { vp = un->un_lowervp; + VATTR_INIT(&va); + /* all we want from the lower node is the link count */ + VATTR_WANTED(&va, va_nlink); vap = &va; } else { vp = NULLVP; } if (vp != NULLVP) { - error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + error = vnode_getattr(vp, vap, ap->a_context); if (error) return (error); - union_newsize(ap->a_vp, VNOVAL, vap->va_size); + union_newsize(ap->a_vp, VNOVAL, vap->va_data_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; - ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]); return (0); } int union_setattr(ap) - struct vop_setattr_args /* { + struct vnop_setattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; + struct proc *p = vfs_context_proc(ap->a_context); + kauth_cred_t cred = vfs_context_ucred(ap->a_context); int error; /* @@ -700,11 +671,11 @@ union_setattr(ap) * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ - if ((un->un_uppervp == NULLVP) && + if (VATTR_IS_ACTIVE(ap->a_vap, va_data_size) && + (un->un_uppervp == NULLVP) && /* assert(un->un_lowervp != NULLVP) */ (un->un_lowervp->v_type == VREG)) { - error = union_copyup(un, (ap->a_vap->va_size != 0), - ap->a_cred, ap->a_p); + error = union_copyup(un, (ap->a_vap->va_data_size != 0), cred, p); if (error) return (error); } @@ -715,10 +686,9 @@ union_setattr(ap) */ if (un->un_uppervp != NULLVP) { FIXUP(un, p); - error = VOP_SETATTR(un->un_uppervp, ap->a_vap, - ap->a_cred, ap->a_p); - if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) - union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); + error = vnode_setattr(un->un_uppervp, ap->a_vap, ap->a_context); + if ((error == 0) && VATTR_IS_ACTIVE(ap->a_vap, va_data_size)) + union_newsize(ap->a_vp, ap->a_vap->va_data_size, VNOVAL); } else { error = EROFS; } @@ -728,25 +698,21 @@ union_setattr(ap) int union_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { int error; - struct proc *p = ap->a_uio->uio_procp; + struct proc *p = vfs_context_proc(ap->a_context); struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else + if (!dolock) FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); - if (dolock) - VOP_UNLOCK(vp, 0, p); + error = VNOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_context); /* * XXX @@ -772,24 +738,24 @@ union_read(ap) int union_write(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { int error; struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_uio->uio_procp; + struct proc *p = vfs_context_proc(ap->a_context); vp = UPPERVP(ap->a_vp); if (vp == NULLVP) panic("union: missing upper layer in write"); FIXUP(un, p); - error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + error = VNOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_context); /* * the size of the underlying object may be changed by the @@ -805,142 +771,109 @@ union_write(ap) return (error); } -union_lease(ap) - struct vop_lease_args /* { - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; - } */ *ap; -{ - register struct vnode *ovp = OTHERVP(ap->a_vp); - - ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_lease), ap)); -} int union_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); + return (VCALL(ovp, VOFFSET(vnop_ioctl), ap)); } int union_select(ap) - struct vop_select_args /* { + struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void * a_wql; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_select), ap)); + return (VCALL(ovp, VOFFSET(vnop_select), ap)); } int union_revoke(ap) - struct vop_revoke_args /* { + struct vnop_revoke_args /* { struct vnode *a_vp; int a_flags; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; if (UPPERVP(vp)) - VOP_REVOKE(UPPERVP(vp), ap->a_flags); + VNOP_REVOKE(UPPERVP(vp), ap->a_flags, ap->a_context); if (LOWERVP(vp)) - VOP_REVOKE(LOWERVP(vp), ap->a_flags); - vgone(vp); + VNOP_REVOKE(LOWERVP(vp), ap->a_flags, ap->a_context); + vnode_reclaim(vp); } int union_mmap(ap) - struct vop_mmap_args /* { + struct vnop_mmap_args /* { struct vnode *a_vp; int a_fflags; - struct ucred *a_cred; + kauth_cred_t a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_mmap), ap)); + return (VCALL(ovp, VOFFSET(vnop_mmap), ap)); } int -union_fsync(ap) - struct vop_fsync_args /* { +union_fsync( + struct vnop_fsync_args /* { struct vnode *a_vp; - struct ucred *a_cred; int a_waitfor; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { int error = 0; - struct proc *p = ap->a_p; + struct proc *p = vfs_context_proc(ap->a_context); struct vnode *targetvp = OTHERVP(ap->a_vp); if (targetvp != NULLVP) { int dolock = (targetvp == LOWERVP(ap->a_vp)); - if (dolock) - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - else + if (!dolock) FIXUP(VTOUNION(ap->a_vp), p); - error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); - if (dolock) - VOP_UNLOCK(targetvp, 0, p); + error = VNOP_FSYNC(targetvp, ap->a_waitfor, ap->a_context); } return (error); } int -union_seek(ap) - struct vop_seek_args /* { - struct vnode *a_vp; - off_t a_oldoff; - off_t a_newoff; - struct ucred *a_cred; - } */ *ap; -{ - register struct vnode *ovp = OTHERVP(ap->a_vp); - - ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_seek), ap)); -} - -int -union_remove(ap) - struct vop_remove_args /* { +union_remove( + struct vnop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dun->un_uppervp == NULLVP) panic("union remove: null upper vnode"); @@ -950,17 +883,13 @@ union_remove(ap) struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); FIXUP(un, p); - VREF(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_context)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_REMOVE(dvp, vp, cnp); + error = VNOP_REMOVE(dvp, vp, cnp, 0, ap->a_context); if (!error) union_removed_upper(un); } else { @@ -968,24 +897,24 @@ union_remove(ap) error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } return (error); } int -union_link(ap) - struct vop_link_args /* { +union_link( + struct vnop_link_args /* { struct vnode *a_vp; struct vnode *a_tdvp; struct componentname *a_cnp; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { int error = 0; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); struct union_node *un; struct vnode *vp; struct vnode *tdvp; @@ -997,48 +926,41 @@ union_link(ap) } else { struct union_node *tun = VTOUNION(ap->a_vp); if (tun->un_uppervp == NULLVP) { - vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); if (un->un_uppervp == tun->un_dirvp) { un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); } - error = union_copyup(tun, 1, cnp->cn_cred, p); + error = union_copyup(tun, 1, vfs_context_ucred(ctx), p); if (un->un_uppervp == tun->un_dirvp) { - vn_lock(un->un_uppervp, - LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; } - VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = un->un_uppervp; if (tdvp == NULLVP) error = EROFS; if (error) { - vput(ap->a_tdvp); return (error); } FIXUP(un, p); - VREF(tdvp); + vnode_get(tdvp); un->un_flags |= UN_KLOCK; - vput(ap->a_tdvp); - return (VOP_LINK(vp, tdvp, cnp)); + return (VNOP_LINK(vp, tdvp, cnp, ap->a_context)); } int union_rename(ap) - struct vop_rename_args /* { + struct vnop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ *ap; { int error; @@ -1062,8 +984,7 @@ union_rename(ap) } fdvp = un->un_uppervp; - VREF(fdvp); - vrele(ap->a_fdvp); + vnode_get(fdvp); } if (fvp->v_op == union_vnodeop_p) { /* always true */ @@ -1078,8 +999,7 @@ union_rename(ap) ap->a_fcnp->cn_flags |= DOWHITEOUT; fvp = un->un_uppervp; - VREF(fvp); - vrele(ap->a_fvp); + vnode_get(fvp); } if (tdvp->v_op == union_vnodeop_p) { @@ -1096,9 +1016,8 @@ union_rename(ap) } tdvp = un->un_uppervp; - VREF(tdvp); + vnode_get(tdvp); un->un_flags |= UN_KLOCK; - vput(ap->a_tdvp); } if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { @@ -1106,77 +1025,69 @@ union_rename(ap) tvp = un->un_uppervp; if (tvp != NULLVP) { - VREF(tvp); + vnode_get(tvp); un->un_flags |= UN_KLOCK; } - vput(ap->a_tvp); } - return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + return (VNOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp, ap->a_context)); bad: - vrele(fdvp); - vrele(fvp); - vput(tdvp); - if (tvp != NULLVP) - vput(tvp); - return (error); } int -union_mkdir(ap) - struct vop_mkdir_args /* { +union_mkdir( + struct vnop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dvp != NULLVP) { int error; struct vnode *vp; FIXUP(un, p); - VREF(dvp); un->un_flags |= UN_KLOCK; - VOP_UNLOCK(ap->a_dvp, 0, p); - error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); - if (error) { - vrele(ap->a_dvp); + + /* note that this is a direct fallthrough to the filesystem */ + error = VNOP_MKDIR(dvp, &vp, cnp, ap->a_vap, ap->a_context); + if (error) return (error); - } error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); - vrele(ap->a_dvp); if (error) - vput(vp); + vnode_put(vp); return (error); } - - vput(ap->a_dvp); return (EROFS); } int -union_rmdir(ap) - struct vop_rmdir_args /* { +union_rmdir( + struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dun->un_uppervp == NULLVP) panic("union rmdir: null upper vnode"); @@ -1186,17 +1097,15 @@ union_rmdir(ap) struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); + vnode_get(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); FIXUP(un, p); - VREF(vp); + vnode_get(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_context)) cnp->cn_flags |= DOWHITEOUT; - error = VOP_RMDIR(dvp, vp, ap->a_cnp); + error = VNOP_RMDIR(dvp, vp, ap->a_cnp, ap->a_context); if (!error) union_removed_upper(un); } else { @@ -1204,43 +1113,38 @@ union_rmdir(ap) error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } - return (error); } int -union_symlink(ap) - struct vop_symlink_args /* { +union_symlink( + struct vnop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; char *a_target; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct union_node *un = VTOUNION(ap->a_dvp); struct vnode *dvp = un->un_uppervp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); if (dvp != NULLVP) { int error; struct vnode *vp; - struct mount *mp = ap->a_dvp->v_mount; FIXUP(un, p); - VREF(dvp); un->un_flags |= UN_KLOCK; - vput(ap->a_dvp); - error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); + + error = VNOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target, ap->a_context); *ap->a_vpp = NULLVP; return (error); } - - vput(ap->a_dvp); return (EROFS); } @@ -1253,98 +1157,67 @@ union_symlink(ap) */ int union_readdir(ap) - struct vop_readdir_args /* { + struct vnop_readdir_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + int a_flags; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_numdirent; + vfs_context_t a_context; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *uvp = un->un_uppervp; - struct proc *p = ap->a_uio->uio_procp; + struct proc *p = vfs_context_proc(ap->a_context); + + if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) + return (EINVAL); if (uvp == NULLVP) return (0); FIXUP(un, p); ap->a_vp = uvp; - return (VCALL(uvp, VOFFSET(vop_readdir), ap)); + return (VCALL(uvp, VOFFSET(vnop_readdir), ap)); } int union_readlink(ap) - struct vop_readlink_args /* { + struct vnop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { int error; struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; + struct proc *p = vfs_context_proc(ap->a_context); struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); + if (!dolock) + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_readlink), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); -} - -int -union_abortop(ap) - struct vop_abortop_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; - } */ *ap; -{ - int error; - struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; - struct vnode *vp = OTHERVP(ap->a_dvp); - struct union_node *un = VTOUNION(ap->a_dvp); - int islocked = un->un_flags & UN_LOCKED; - int dolock = (vp == LOWERVP(ap->a_dvp)); - - if (islocked) { - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_dvp), p); - } - ap->a_dvp = vp; - error = VCALL(vp, VOFFSET(vop_abortop), ap); - if (islocked && dolock) - VOP_UNLOCK(vp, 0, p); + error = VCALL(vp, VOFFSET(vnop_readlink), ap); return (error); } int -union_inactive(ap) - struct vop_inactive_args /* { +union_inactive( + struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; - } */ *ap; + vfs_context_t a_context; + } */ *ap) { struct vnode *vp = ap->a_vp; - struct proc *p = ap->a_p; struct union_node *un = VTOUNION(vp); struct vnode **vpp; /* * Do nothing (and _don't_ bypass). - * Wait to vrele lowervp until reclaim, + * Wait to vnode_put lowervp until reclaim, * so that until then our union_node is in the * cache and reusable. * @@ -1357,23 +1230,22 @@ union_inactive(ap) if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) - vrele(*vpp); + vnode_put(*vpp); _FREE(un->un_dircache, M_TEMP); un->un_dircache = 0; } - VOP_UNLOCK(vp, 0, p); - if ((un->un_flags & UN_CACHED) == 0) - vgone(vp); + vnode_recycle(vp); return (0); } int union_reclaim(ap) - struct vop_reclaim_args /* { + struct vnop_reclaim_args /* { struct vnode *a_vp; + vfs_context_t a_context; } */ *ap; { @@ -1383,155 +1255,15 @@ union_reclaim(ap) } int -union_lock(ap) - struct vop_lock_args *ap; -{ - struct vnode *vp = ap->a_vp; - struct proc *p = ap->a_p; - int flags = ap->a_flags; - struct union_node *un; - int error; - - - vop_nolock(ap); - /* - * Need to do real lockmgr-style locking here. - * in the mean time, draining won't work quite right, - * which could lead to a few race conditions. - * the following test was here, but is not quite right, we - * still need to take the lock: - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - */ - flags &= ~LK_INTERLOCK; - -start: - un = VTOUNION(vp); - - if (un->un_uppervp != NULLVP) { - if (((un->un_flags & UN_ULOCK) == 0) && - (vp->v_usecount != 0)) { - error = vn_lock(un->un_uppervp, flags, p); - if (error) - return (error); - un->un_flags |= UN_ULOCK; - } -#if DIAGNOSTIC - if (un->un_flags & UN_KLOCK) { - vprint("union: dangling klock", vp); - panic("union: dangling upper lock (%lx)", vp); - } -#endif - } - - if (un->un_flags & UN_LOCKED) { -#if DIAGNOSTIC - if (current_proc() && un->un_pid == current_proc()->p_pid && - un->un_pid > -1 && current_proc()->p_pid > -1) - panic("union: locking against myself"); -#endif - un->un_flags |= UN_WANT; - tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); - goto start; - } - -#if DIAGNOSTIC - if (current_proc()) - un->un_pid = current_proc()->p_pid; - else - un->un_pid = -1; -#endif - - un->un_flags |= UN_LOCKED; - return (0); -} - -/* - * When operations want to vput() a union node yet retain a lock on - * the upper vnode (say, to do some further operations like link(), - * mkdir(), ...), they set UN_KLOCK on the union node, then call - * vput() which calls VOP_UNLOCK() and comes here. union_unlock() - * unlocks the union node (leaving the upper vnode alone), clears the - * KLOCK flag, and then returns to vput(). The caller then does whatever - * is left to do with the upper vnode, and ensures that it gets unlocked. - * - * If UN_KLOCK isn't set, then the upper vnode is unlocked here. - */ -int -union_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct union_node *un = VTOUNION(ap->a_vp); - struct proc *p = ap->a_p; - -#if DIAGNOSTIC - if ((un->un_flags & UN_LOCKED) == 0) - panic("union: unlock unlocked node"); - if (current_proc() && un->un_pid != current_proc()->p_pid && - current_proc()->p_pid > -1 && un->un_pid > -1) - panic("union: unlocking other process's union node"); -#endif - - un->un_flags &= ~UN_LOCKED; - - if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) - VOP_UNLOCK(un->un_uppervp, 0, p); - - un->un_flags &= ~(UN_ULOCK|UN_KLOCK); - - if (un->un_flags & UN_WANT) { - un->un_flags &= ~UN_WANT; - wakeup((caddr_t) &un->un_flags); - } - -#if DIAGNOSTIC - un->un_pid = 0; -#endif - vop_nounlock(ap); - - return (0); -} - -int -union_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - int error; - struct proc *p = current_proc(); /* XXX */ - struct vnode *vp = OTHERVP(ap->a_vp); - int dolock = (vp == LOWERVP(ap->a_vp)); - - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); - ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_bmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); - - return (error); -} - -int -union_cmap(ap) - struct vop_cmap_args /* { +union_blockmap(ap) + struct vnop_blockmap_args /* { struct vnode *a_vp; off_t a_offset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; + int a_flags; } */ *ap; { int error; @@ -1539,52 +1271,21 @@ union_cmap(ap) struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); + if (!dolock) + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_cmap), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + error = VCALL(vp, VOFFSET(vnop_blockmap), ap); return (error); } -int -union_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n", - vp, UPPERVP(vp), LOWERVP(vp)); - if (UPPERVP(vp) != NULLVP) - vprint("union: upper", UPPERVP(vp)); - if (LOWERVP(vp) != NULLVP) - vprint("union: lower", LOWERVP(vp)); - - return (0); -} - -int -union_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); -} - int union_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { int error; @@ -1592,43 +1293,40 @@ union_pathconf(ap) struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); - if (dolock) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - else - FIXUP(VTOUNION(ap->a_vp), p); + if (!dolock) + FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; - error = VCALL(vp, VOFFSET(vop_pathconf), ap); - if (dolock) - VOP_UNLOCK(vp, 0, p); + error = VCALL(vp, VOFFSET(vnop_pathconf), ap); return (error); } int union_advlock(ap) - struct vop_advlock_args /* { + struct vnop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; + vfs_context_t a_context; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; - return (VCALL(ovp, VOFFSET(vop_advlock), ap)); + return (VCALL(ovp, VOFFSET(vnop_advlock), ap)); } /* - * XXX - vop_strategy must be hand coded because it has no + * XXX - vnop_strategy must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ int union_strategy(ap) - struct vop_strategy_args /* { + struct vnop_strategy_args /* { struct buf *a_bp; } */ *ap; { @@ -1636,41 +1334,41 @@ union_strategy(ap) int error; struct vnode *savedvp; - savedvp = bp->b_vp; - bp->b_vp = OTHERVP(bp->b_vp); + savedvp = buf_vnode(bp); + buf_setvnode(bp, OTHERVP(savedvp)); #if DIAGNOSTIC - if (bp->b_vp == NULLVP) + if (buf_vnode(bp) == NULLVP) panic("union_strategy: nil vp"); - if (((bp->b_flags & B_READ) == 0) && - (bp->b_vp == LOWERVP(savedvp))) + if (((buf_flags(bp) & B_READ) == 0) && + (buf_vnode(bp) == LOWERVP(savedvp))) panic("union_strategy: writing to lowervp"); #endif - error = VOP_STRATEGY(bp); - bp->b_vp = savedvp; + error = VNOP_STRATEGY(bp); + buf_setvnode(bp, savedvp); return (error); } /* Pagein */ +int union_pagein(ap) - struct vop_pagein_args /* { + struct vnop_pagein_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */ *ap; { int error; - struct proc *p = current_proc(); struct vnode *vp = OTHERVP(ap->a_vp); - error = VOP_PAGEIN(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, - ap->a_size, ap->a_cred,ap->a_flags); + error = VNOP_PAGEIN(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, + ap->a_size, ap->a_flags, ap->a_context); /* * XXX @@ -1695,15 +1393,16 @@ union_pagein(ap) } /* Pageout */ +int union_pageout(ap) - struct vop_pageout_args /* { + struct vnop_pageout_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */ *ap; { int error; @@ -1714,8 +1413,8 @@ union_pageout(ap) if (vp == NULLVP) panic("union: missing upper layer in pageout"); - error = VOP_PAGEOUT(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, - ap->a_size, ap->a_cred,ap->a_flags); + error = VNOP_PAGEOUT(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, + ap->a_size, ap->a_flags, ap->a_context); /* * the size of the underlying object may be changed by the @@ -1734,16 +1433,16 @@ union_pageout(ap) /* Blktooff derives file offset for the given logical block number */ int union_blktooff(ap) - struct vop_blktooff_args /* { + struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */ *ap; { int error; struct vnode *vp = OTHERVP(ap->a_vp); - error = VOP_BLKTOOFF(vp, ap->a_lblkno, ap->a_offset); + error = VNOP_BLKTOOFF(vp, ap->a_lblkno, ap->a_offset); return(error); } @@ -1751,16 +1450,16 @@ union_blktooff(ap) /* offtoblk derives file offset for the given logical block number */ int union_offtoblk(ap) - struct vop_offtoblk_args /* { + struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */ *ap; { int error; struct vnode *vp = OTHERVP(ap->a_vp); - error = VOP_OFFTOBLK(vp, ap->a_offset, ap->a_lblkno); + error = VNOP_OFFTOBLK(vp, ap->a_offset, ap->a_lblkno); return(error); } @@ -1772,58 +1471,45 @@ union_offtoblk(ap) */ int (**union_vnodeop_p)(void *); struct vnodeopv_entry_desc union_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)union_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)union_create }, /* create */ - { &vop_whiteout_desc, (VOPFUNC)union_whiteout }, /* whiteout */ - { &vop_mknod_desc, (VOPFUNC)union_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)union_open }, /* open */ - { &vop_close_desc, (VOPFUNC)union_close }, /* close */ - { &vop_access_desc, (VOPFUNC)union_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)union_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)union_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)union_read }, /* read */ - { &vop_write_desc, (VOPFUNC)union_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)union_lease }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)union_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)union_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)union_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)union_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)union_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)union_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)union_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)union_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)union_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)union_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)union_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)union_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)union_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)union_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)union_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)union_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)union_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)union_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)union_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)union_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)union_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)union_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)union_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)union_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)union_advlock }, /* advlock */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)union_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)union_create }, /* create */ + { &vnop_whiteout_desc, (VOPFUNC)union_whiteout }, /* whiteout */ + { &vnop_mknod_desc, (VOPFUNC)union_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)union_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)union_close }, /* close */ + { &vnop_access_desc, (VOPFUNC)union_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC)union_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)union_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)union_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)union_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)union_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)union_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)union_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)union_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)union_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)union_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)union_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)union_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)union_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)union_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)union_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)union_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)union_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)union_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)union_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)union_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)union_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)union_advlock }, /* advlock */ #ifdef notdef - { &vop_blkatoff_desc, (VOPFUNC)union_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)union_valloc }, /* valloc */ - { &vop_vfree_desc, (VOPFUNC)union_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)union_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)union_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)union_bwrite }, /* bwrite */ + { &vnop_bwrite_desc, (VOPFUNC)union_bwrite }, /* bwrite */ #endif - { &vop_pagein_desc, (VOPFUNC)union_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)union_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (VOPFUNC)union_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)union_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)union_cmap }, /* cmap */ + { &vnop_pagein_desc, (VOPFUNC)union_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)union_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (VOPFUNC)union_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)union_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)union_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc union_vnodeop_opv_desc = diff --git a/bsd/miscfs/volfs/volfs.h b/bsd/miscfs/volfs/volfs.h index 939f555ec..0b083ee7d 100644 --- a/bsd/miscfs/volfs/volfs.h +++ b/bsd/miscfs/volfs/volfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,10 +19,7 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1998, Apple Computer, Inc. All rights reserved. */ -/* - * Header file for volfs - */ + #ifndef __VOLFS_VOLFS_H__ #define __VOLFS_VOLFS_H__ @@ -32,7 +29,6 @@ struct volfs_mntdata { struct vnode *volfs_rootvp; - LIST_HEAD(volfs_fsvnodelist, vnode) volfs_fsvnodes; }; /* @@ -46,9 +42,9 @@ struct volfs_mntdata struct volfs_vndata { int vnode_type; - struct lock__bsd__ lock; unsigned int nodeID; /* the dev entry of a file system */ struct mount * fs_mount; + fsid_t fs_fsid; }; #define MAXVLFSNAMLEN 24 /* max length is really 10, pad to 24 since @@ -61,140 +57,11 @@ struct volfs_vndata #define MAXPLCENTRIES 250 #define PLCHASHSIZE 128 -extern int (**volfs_vnodeop_p)(void *); -__BEGIN_DECLS - -int volfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, - struct proc *)); -int volfs_start __P((struct mount *, int, struct proc *)); -int volfs_unmount __P((struct mount *, int, struct proc *)); -int volfs_root __P((struct mount *, struct vnode **)); -int volfs_quotactl __P((struct mount *, int, uid_t, caddr_t, - struct proc *)); -int volfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -int volfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int volfs_vget __P((struct mount *, void *ino_t, struct vnode **)); -int volfs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -int volfs_vptofh __P((struct vnode *, struct fid *)); -int volfs_init __P((struct vfsconf *)); -int volfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, - struct proc *)); - -int volfs_reclaim __P((struct vop_reclaim_args*)); -int volfs_access __P((struct vop_access_args *)); -int volfs_getattr __P((struct vop_getattr_args *)); -int volfs_select __P((struct vop_select_args *)); -int volfs_rmdir __P((struct vop_rmdir_args *)); -int volfs_readdir __P((struct vop_readdir_args *)); -int volfs_lock __P((struct vop_lock_args *)); -int volfs_unlock __P((struct vop_unlock_args *)); -int volfs_islocked __P((struct vop_islocked_args *)); -int volfs_pathconf __P((struct vop_pathconf_args *)); -int volfs_lookup __P((struct vop_lookup_args *)); -__END_DECLS #define VTOVL(VP) ((struct volfs_vndata *)((VP)->v_data)) #define PRINTIT kprintf -#if VOLFS_DEBUG - #define DBG_VOP_TEST_LOCKS 1 - #define DBG_FUNC_NAME(FSTR) static char *funcname = FSTR - #define DBG_PRINT_FUNC_NAME() PRINTIT("%s\n", funcname); - #define DBG_VOP_PRINT_FUNCNAME() PRINTIT("%s: ", funcname); - #define DBG_VOP_PRINT_CPN_INFO(CN) PRINTIT("name: %s",(CN)->cn_nameptr); - #define DBG_VOP(STR) PRINTIT STR; - #define DBG_VOP_PRINT_VNODE_INFO(VP) { if ((VP)) \ - { if ((VP)->v_tag == VT_NON) \ - PRINTIT("\tfs:%s id: %d v: 0x%x ", VTOVL(VP)->fs_mount->mnt_stat.f_fstypename, VTOVL(VP)->nodeID, (u_int)(VP)); \ - else PRINTIT("\t%s v: 0x%x ", (VP)->v_mount->mnt_stat.f_fstypename, (u_int)(VP)); \ - } else { PRINTIT("*** NULL NODE ***"); } } - -#else /* VOLFS_DEBUG */ - #define DBG_VOP_TEST_LOCKS 0 - #define DBG_FUNC_NAME(FSTR) - #define DBG_PRINT_FUNC_NAME() - #define DBG_VOP_PRINT_FUNCNAME() - #define DBG_VOP_PRINT_CPN_INFO(CN) - #define DBG_VOP(A) - #define DBG_VOP_PRINT_VNODE_INFO(VP) -#endif /* VOLFS_DEBUG */ - - -#if DBG_VOP_TEST_LOCKS - -#define VOPDBG_IGNORE 0 -#define VOPDBG_LOCKED 1 -#define VOPDBG_UNLOCKED -1 -#define VOPDBG_LOCKNOTNIL 2 -#define VOPDBG_SAME 3 - -#define VOPDBG_ZERO 0 -#define VOPDBG_POS 1 - - -#define MAXDBGLOCKS 15 - -typedef struct VopDbgStoreRec { - short id; - struct vnode *vp; - short inState; - short outState; - short errState; - int inValue; - int outValue; - } VopDbgStoreRec; - - -/* This sets up the test for the lock state of vnodes. The entry paramaters are: - * I = index of paramater - * VP = pointer to a vnode - * ENTRYSTATE = the inState of the lock - * EXITSTATE = the outState of the lock - * ERRORSTATE = the error state of the lock - * It initializes the structure, does some preliminary validity checks, but does nothing - * if the instate is set to be ignored. - */ - -#define DBG_VOP_LOCKS_DECL(I) VopDbgStoreRec VopDbgStore[I];short numOfLockSlots=I -#define DBG_VOP_LOCKS_INIT(I,VP,ENTRYSTATE,EXITSTATE,ERRORSTATE,CHECKFLAG) \ - if (I >= numOfLockSlots) { \ - PRINTIT("%s: DBG_VOP_LOCKS_INIT: Entry #%d greater than allocated slots!\n", funcname, I); \ - }; \ - VopDbgStore[I].id = I; \ - VopDbgStore[I].vp = (VP); \ - VopDbgStore[I].inState = ENTRYSTATE; \ - VopDbgStore[I].outState = EXITSTATE; \ - VopDbgStore[I].errState = ERRORSTATE; \ - VopDbgStore[I].inValue = 0; \ - VopDbgStore[I].outValue = 0; \ - if ((VopDbgStore[I].inState != VOPDBG_IGNORE)) { \ - if ((VP) == NULL) \ - PRINTIT ("%s: DBG_VOP_LOCK on start: Null vnode ptr\n", funcname); \ - else \ - VopDbgStore[I].inValue = lockstatus (&((struct volfs_vndata *)((VP)->v_data))->lock); \ - } \ - if ((VP) != NULL) \ - { \ - if (CHECKFLAG==VOPDBG_POS && (VP)->v_usecount <= 0) \ - PRINTIT("%s: BAD USECOUNT OF %d !!!!\n", funcname, (VP)->v_usecount); \ - else if ((VP)->v_usecount < 0) \ - PRINTIT("%s: BAD USECOUNT OF %d !!!!\n", funcname, (VP)->v_usecount); \ - } -#define DBG_VOP_UPDATE_VP(I, VP) \ - VopDbgStore[I].vp = (VP); - - -#define DBG_VOP_LOCKS_TEST(status) DbgVopTest (numOfLockSlots, status, VopDbgStore, funcname); - -#else /*DBG_VOP_TEST_LOCKS */ -#define DBG_VOP_LOCKS_DECL(A) -#define DBG_VOP_LOCKS_INIT(A,B,C,D,E,F) -#define DBG_VOP_LOCKS_TEST(a) -#define DBG_VOP_UPDATE_VP(I, VP) - -#endif /* DBG_VOP_TEST_LOCKS */ #endif /* __APPLE_API_PRIVATE */ #endif /* __VOLFS_VOLFS_H__ */ diff --git a/bsd/miscfs/volfs/volfs_vfsops.c b/bsd/miscfs/volfs/volfs_vfsops.c index d92ee4c62..6cdd7f2ed 100644 --- a/bsd/miscfs/volfs/volfs_vfsops.c +++ b/bsd/miscfs/volfs/volfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,13 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1998 Apple Computer, Inc. All Rights Reserved */ -/* - * Change History: - * - * 29-May-1998 Pat Dirks Changed to cache pointer to root vnode until unmount. - * - */ #include #include @@ -35,8 +28,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -48,13 +40,27 @@ #include #include "volfs.h" +static int volfs_mount(struct mount *, vnode_t , user_addr_t, vfs_context_t); +static int volfs_start(struct mount *, int, vfs_context_t); +static int volfs_unmount(struct mount *, int, vfs_context_t); +static int volfs_root(struct mount *, struct vnode **, vfs_context_t); +static int volfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context); +static int volfs_sync(struct mount *, int, vfs_context_t); +static int volfs_vget(struct mount *, ino64_t, struct vnode **, vfs_context_t); +static int volfs_fhtovp(struct mount *, int, unsigned char *, struct vnode **, vfs_context_t); +static int volfs_vptofh(struct vnode *, int *, unsigned char *, vfs_context_t); +static int volfs_init(struct vfsconf *); +static int volfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); +void volfs_load(int loadArgument); + + struct vfsops volfs_vfsops = { volfs_mount, volfs_start, volfs_unmount, volfs_root, - volfs_quotactl, - volfs_statfs, + NULL, /* quotactl */ + volfs_vfs_getattr, volfs_sync, volfs_vget, volfs_fhtovp, @@ -63,17 +69,19 @@ struct vfsops volfs_vfsops = { volfs_sysctl }; -static char volfs_fs_name[MFSNAMELEN] = "volfs"; +// static char volfs_fs_name[MFSNAMELEN] = "volfs"; extern struct vnodeopv_desc volfs_vnodeop_opv_desc; +extern int (**volfs_vnodeop_p)(void *); + /* The following refer to kernel global variables used in the loading/initialization: */ -extern int maxvfsslots; /* Total number of slots in the system's vfsconf table */ -extern int maxvfsconf; /* The highest fs type number [old-style ID] in use [dispite its name] */ extern int vfs_opv_numops; /* The total number of defined vnode operations */ extern int kdp_flag; void -volfs_load(int loadArgument) { +volfs_load(__unused int loadArgument) +{ +#if 0 struct vfsconf *vfsconflistentry; int entriesRemaining; struct vfsconf *newvfsconf = NULL; @@ -82,9 +90,7 @@ volfs_load(int loadArgument) { int (***opv_desc_vector_p)(); int (**opv_desc_vector)(); struct vnodeopv_entry_desc *opve_descp; - -#pragma unused(loadArgument) - + /* * This routine is responsible for all the initialization that would * ordinarily be done as part of the system startup; it calls volfs_init @@ -99,7 +105,6 @@ volfs_load(int loadArgument) { This becomes irrelevant when volfs is compiled into the list. */ - DBG_VOP(("load_volfs: Scanning vfsconf list...\n")); vfsconflistentry = vfsconf; for (entriesRemaining = maxvfsslots; entriesRemaining > 0; --entriesRemaining) { if (vfsconflistentry->vfc_vfsops != NULL) { @@ -123,8 +128,7 @@ volfs_load(int loadArgument) { }; if (newvfsconf) { - DBG_VOP(("load_volfs: filling in vfsconf entry at 0x%08lX; lastentry = 0x%08lX.\n", (long)newvfsconf, (long)lastentry)); - newvfsconf->vfc_vfsops = &volfs_vfsops; + newvfsconf->vfc_vfsops = &volfs_vfsops; strncpy(&newvfsconf->vfc_name[0], "volfs", MFSNAMELEN); newvfsconf->vfc_typenum = maxvfsconf++; newvfsconf->vfc_refcount = 0; @@ -141,8 +145,6 @@ volfs_load(int loadArgument) { /* Based on vfs_op_init and ... */ opv_desc_vector_p = volfs_vnodeop_opv_desc.opv_desc_vector_p; - DBG_VOP(("load_volfs: Allocating and initializing VNode ops vector...\n")); - /* * Allocate and init the vector. * Also handle backwards compatibility. @@ -173,10 +175,7 @@ volfs_load(int loadArgument) { * list of supported operations. */ if (opve_descp->opve_op->vdesc_offset == 0 && - opve_descp->opve_op->vdesc_offset != VOFFSET(vop_default)) { - DBG_VOP(("load_volfs: operation %s not listed in %s.\n", - opve_descp->opve_op->vdesc_name, - "vfs_op_descs")); + opve_descp->opve_op->vdesc_offset != VOFFSET(vnop_default)) { panic ("load_volfs: bad operation"); } /* @@ -197,17 +196,19 @@ volfs_load(int loadArgument) { * Force every operations vector to have a default routine. */ opv_desc_vector = *opv_desc_vector_p; - if (opv_desc_vector[VOFFSET(vop_default)]==NULL) { + if (opv_desc_vector[VOFFSET(vnop_default)]==NULL) { panic("load_vp;fs: operation vector without default routine."); } for (j = 0;jvolfs_fsvnodes); - DBG_VOP(("LIST_INIT succeeded\n")); mp->mnt_data = (void *)priv_mnt_data; - strcpy(mp->mnt_stat.f_fstypename, "volfs"); - (void) copyinstr(path, mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname) - 1, &size); - strcpy(mp->mnt_stat.f_mntfromname, ""); + strcpy(mp->mnt_vfsstat.f_fstypename, "volfs"); + strcpy(mp->mnt_vfsstat.f_mntfromname, ""); /* Set up the root vnode for fast reference in the future. Note that the root is maintained unlocked but with a pos. ref count until unmount. */ - MALLOC(priv_vn_data, struct volfs_vndata *, sizeof(struct volfs_vndata), M_VOLFSNODE, M_WAITOK); - error = getnewvnode(VT_VOLFS, mp, volfs_vnodeop_p, &root_vp); - if (error != 0) - { + MALLOC(priv_vn_data, struct volfs_vndata *, sizeof(struct volfs_vndata), M_VOLFSNODE, M_WAITOK); + + priv_vn_data->vnode_type = VOLFS_ROOT; + priv_vn_data->nodeID = ROOT_DIRID; + priv_vn_data->fs_mount = mp; + priv_vn_data->fs_fsid = mp->mnt_vfsstat.f_fsid; + + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = VDIR; + vfsp.vnfs_str = "volfs"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = priv_vn_data; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = volfs_vnodeop_p; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = 1; + + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &root_vp); + if (error != 0) { FREE(priv_mnt_data, M_VOLFSMNT); FREE(priv_vn_data, M_VOLFSNODE); - DBG_VOP(("getnewvnode failed with error code %d\n", error)); return(error); } - root_vp->v_type = VDIR; - root_vp->v_flag |= VROOT; - lockinit(&priv_vn_data->lock, PINOD, "volfsnode", 0, 0); - priv_vn_data->vnode_type = VOLFS_ROOT; - priv_vn_data->nodeID = 0; - priv_vn_data->fs_mount = mp; - root_vp->v_data = priv_vn_data; - - priv_mnt_data->volfs_rootvp = root_vp; + vnode_ref(root_vp); + vnode_put(root_vp); + + /* obtain a new fsid for the mount point */ + vfs_getnewfsid(mp); + + vnode_settag(root_vp, VT_VOLFS); - mp->mnt_flag &= ~MNT_RDONLY; + priv_mnt_data->volfs_rootvp = root_vp; + mp->mnt_flag &= ~MNT_RDONLY; + + mp->mnt_vtable->vfc_threadsafe = TRUE; - return (0); + return (0); } -int -volfs_start(mp, flags, p) -struct mount * mp; -int flags; -struct proc * p; +static int +volfs_start(__unused struct mount * mp, __unused int flags, __unused vfs_context_t context) { - DBG_VOP(("volfs_start called\n")); return (0); } @@ -282,58 +287,33 @@ struct proc * p; * Return the root of a filesystem. For volfs the root vnode is a directory * containing the list of all filesystems volfs can work with. */ -int -volfs_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; +static int +volfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) { struct volfs_mntdata *priv_data; - // struct volfs_vndata *priv_vn_data; - // int error; - DBG_VOP(("volfs_root called\n")); priv_data = (struct volfs_mntdata *)mp->mnt_data; - if (priv_data->volfs_rootvp) { - vref(priv_data->volfs_rootvp); - VOP_LOCK(priv_data->volfs_rootvp, LK_EXCLUSIVE, current_proc()); + if (priv_data->volfs_rootvp) { + vnode_get(priv_data->volfs_rootvp); *vpp = priv_data->volfs_rootvp; } else { panic("volfs: root vnode missing!"); - }; - - DBG_VOP(("volfs_root returned with ")); - DBG_VOP_PRINT_VNODE_INFO(*vpp);DBG_VOP(("\n")); - - return(0); -} + }; -int -volfs_quotactl(mp, cmds, uid, arg, p) -struct mount *mp; -int cmds; -uid_t uid; -caddr_t arg; -struct proc * p; -{ - DBG_VOP(("volfs_quotactl called\n")); - return (0); + return(0); } /* * unmount system call */ -int -volfs_unmount(mp, mntflags, p) - struct mount *mp; - int mntflags; - struct proc *p; +static int +volfs_unmount(struct mount *mp, __unused int mntflags, __unused vfs_context_t context) { struct volfs_mntdata *priv_data; struct vnode *root_vp; int retval; - DBG_VOP(("volfs_unmount called\n")); priv_data = (struct volfs_mntdata *)mp->mnt_data; root_vp = priv_data->volfs_rootvp; @@ -344,23 +324,17 @@ volfs_unmount(mp, mntflags, p) Note that there's no need to vget() or vref() it before locking it here: the ref. count has been maintained at +1 ever since mount time. */ if (root_vp) { - retval = vn_lock(root_vp, LK_EXCLUSIVE, p); - if (retval) goto Err_Exit; - if (root_vp->v_usecount > 1) { - DBG_VOP(("VOLFS ERROR: root vnode = %x, usecount = %d\n", (int)root_vp, priv_data->volfs_rootvp->v_usecount)); - VOP_UNLOCK(root_vp, 0, p); - retval = EBUSY; + if (vnode_isinuse(root_vp, 1)) { + retval = EBUSY; goto Err_Exit; }; priv_data->volfs_rootvp = NULL; - vput(root_vp); /* This drops volfs's own refcount */ - vgone(root_vp); + vnode_rele(root_vp); /* This drops volfs's own refcount */ + vnode_reclaim(root_vp); }; /* All vnodes should be gone, and no errors, clean up the last */ - /* XXX DBG_ASSERT(mp->mnt_vnodelist.lh_first == NULL); */ - /* XXX DBG_ASSERT(retval == 0); */ mp->mnt_data = NULL; FREE(priv_data, M_VOLFSMNT); @@ -373,122 +347,78 @@ Err_Exit: /* * Get file system statistics. */ -int -volfs_statfs(mp, sbp, p) - struct mount *mp; - register struct statfs *sbp; - struct proc *p; +static int +volfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) { - DBG_VOP(("volfs_statfs called\n")); - sbp->f_bsize = 512; - sbp->f_iosize = 512; - sbp->f_blocks = 1024; // lies, darn lies and virtual file systems - sbp->f_bfree = 0; // Nope, can't write here! - sbp->f_bavail = 0; - sbp->f_files = 0; // Hmmm...maybe later - sbp->f_ffree = 0; - return (0); + VFSATTR_RETURN(fsap, f_bsize, 512); + VFSATTR_RETURN(fsap, f_iosize, 512); + VFSATTR_RETURN(fsap, f_blocks, 1024); + VFSATTR_RETURN(fsap, f_bfree, 0); + VFSATTR_RETURN(fsap, f_bavail, 0); + VFSATTR_RETURN(fsap, f_bused, 1024); + VFSATTR_RETURN(fsap, f_files, 0); + VFSATTR_RETURN(fsap, f_ffree, 0); + VFSATTR_RETURN(fsap, f_fssubtype, 0); + return 0; } /* * volfs doesn't have any data and you can't write into any of the volfs * structures, so don't do anything */ -int -volfs_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; +static int +volfs_sync(__unused struct mount *mp, __unused int waitfor, __unused vfs_context_t context) { -// DBG_VOP(("volfs_sync called\n")); - - /* Release a few entries from the permissions cache to keep them from getting stale. - * Since sync is called at least every 30 seconds or so, releasing 1/20 of the cache - * every time through should free all entries in no less than 10 minutes, which should - * be adequate to prevent pid-wrapping from mis-associating PLC entries: - */ - volfs_PLC_reclaim_entries(MAXPLCENTRIES / 20); - return 0; } + /* - * Look up a FFS dinode number to find its incore vnode, otherwise read it - * in from disk. If it is in core, wait for the lock bit to clear, then - * return the inode locked. Detection and handling of mount points must be - * done by the calling routine. + * */ -int -volfs_vget(mp, ino, vpp) - struct mount *mp; - void *ino; - struct vnode **vpp; +static int +volfs_vget(__unused struct mount *mp, __unused ino64_t ino, + __unused struct vnode **vpp, __unused vfs_context_t context) { -// DBG_VOP(("volfs_vget called\n")); - return(0); + return(ENOTSUP); } + /* * File handle to vnode - * - * Have to be really careful about stale file handles: - * - check that the inode number is valid - * - call ffs_vget() to get the locked inode - * - check for an unallocated inode (i_mode == 0) - * - check that the given client host has export rights and return - * those rights via. exflagsp and credanonp */ -int -volfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) - register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; +static int +volfs_fhtovp(__unused struct mount *mp, __unused int fhlen, + __unused unsigned char *fhp, __unused struct vnode **vpp, + __unused vfs_context_t context) { - DBG_VOP(("volfs_fhtovp called\n")); - return(0); + return(ENOTSUP); } + /* * Vnode pointer to File handle */ -/* ARGSUSED */ -int -volfs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; +static int +volfs_vptofh(__unused struct vnode *vp, __unused int *fhlenp, __unused unsigned char *fhp, __unused vfs_context_t context) { - DBG_VOP(("volfs_vptofh called\n")); - return(0); + return(ENOTSUP); } + /* * Initialize the filesystem */ -int -volfs_init(vfsp) - struct vfsconf *vfsp; -{ - DBG_VOP(("volfs_init called\n")); - - volfs_PLChashinit(); - +static int +volfs_init(__unused struct vfsconf *vfsp) +{ return (0); } /* * fast filesystem related variables. */ -int -volfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +static int +volfs_sysctl(__unused int *name, __unused u_int namelen, __unused user_addr_t oldp, + __unused size_t *oldlenp, __unused user_addr_t newp, __unused size_t newlen, + __unused vfs_context_t context) { - DBG_VOP(("volfs_sysctl called\n")); - return (EOPNOTSUPP); + return (ENOTSUP); } diff --git a/bsd/miscfs/volfs/volfs_vnops.c b/bsd/miscfs/volfs/volfs_vnops.c index 9c0980f75..d875957b1 100644 --- a/bsd/miscfs/volfs/volfs_vnops.c +++ b/bsd/miscfs/volfs/volfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,22 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998-1999 Apple Computer, Inc. All Rights Reserved. - * - * Modification History: - * - * 2/10/2000 Clark Warner Added copyfile - * 5/24/1999 Don Brady Fixed security hole in get_fsvnode. - * 11/18/1998 Don Brady Special case 2 to mean the root of a file system. - * 9/28/1998 Umesh Vaishampayan Use the default vnode ops. Cleanup - * header includes. - * 11/12/1998 Scott Roberts validfsnode only checks to see if the volfs mount flag is set - * 8/5/1998 Don Brady fix validfsnode logic to handle a "bad" VFS_GET - * 7/5/1998 Don Brady In volfs_reclaim set vp->v_data to NULL after private data is free (VFS expects a NULL). - * 4/5/1998 Don Brady Changed lockstatus calls to VOP_ISLOCKED (radar #2231108); - * 3/25/1998 Pat Dirks Added include for sys/attr.h, which is no longer included indirectly. - */ #include @@ -45,22 +29,25 @@ #include #include #include -#include -#include +#include /* for p_fd */ +#include #include -#include -#include +#include +#include #include #include #include #include #include #include +#include #include #include #include +#include + #include "volfs.h" /* @@ -92,72 +79,74 @@ * a similar mechanism. */ +static int volfs_reclaim (struct vnop_reclaim_args*); +static int volfs_getattr (struct vnop_getattr_args *); +static int volfs_select (struct vnop_select_args *); +static int volfs_rmdir (struct vnop_rmdir_args *); +static int volfs_readdir (struct vnop_readdir_args *); +static int volfs_pathconf (struct vnop_pathconf_args *); +static int volfs_lookup (struct vnop_lookup_args *); + +static int volfs_readdir_callback(mount_t, void *); +static int get_filevnode(struct mount *parent_fs, u_int id, vnode_t *ret_vnode, vfs_context_t context); +static int get_fsvnode(struct mount *our_mount, int id, vnode_t *ret_vnode); + +/* for the call back function in volfs_readdir */ +struct volfs_rdstruct { + int validindex; + vnode_t vp; + int rec_offset; + struct uio * uio; +}; + #define VOPFUNC int (*)(void *) /* Global vfs data structures for volfs. */ int (**volfs_vnodeop_p) (void *); struct vnodeopv_entry_desc volfs_vnodeop_entries[] = { - {&vop_default_desc, (VOPFUNC)vn_default_error}, - {&vop_strategy_desc, (VOPFUNC)err_strategy}, /* strategy */ - {&vop_bwrite_desc, (VOPFUNC)err_bwrite}, /* bwrite */ - {&vop_lookup_desc, (VOPFUNC)volfs_lookup}, /* lookup */ - {&vop_create_desc, (VOPFUNC)err_create}, /* create */ - {&vop_whiteout_desc, (VOPFUNC)err_whiteout}, /* whiteout */ - {&vop_mknod_desc, (VOPFUNC)err_mknod}, /* mknod */ - {&vop_mkcomplex_desc, (VOPFUNC)err_mkcomplex}, /* mkcomplex */ - {&vop_open_desc, (VOPFUNC)nop_open}, /* open */ - {&vop_close_desc, (VOPFUNC)nop_close}, /* close */ - {&vop_access_desc, (VOPFUNC)volfs_access}, /* access */ - {&vop_getattr_desc, (VOPFUNC)volfs_getattr}, /* getattr */ - {&vop_setattr_desc, (VOPFUNC)err_setattr}, /* setattr */ - {&vop_getattrlist_desc, (VOPFUNC)err_getattrlist}, /* getattrlist */ - {&vop_setattrlist_desc, (VOPFUNC)err_setattrlist}, /* setattrlist */ - {&vop_read_desc, (VOPFUNC)err_read}, /* read */ - {&vop_write_desc, (VOPFUNC)err_write}, /* write */ - {&vop_lease_desc, (VOPFUNC)err_lease}, /* lease */ - {&vop_ioctl_desc, (VOPFUNC)err_ioctl}, /* ioctl */ - {&vop_select_desc, (VOPFUNC)volfs_select}, /* select */ - {&vop_exchange_desc, (VOPFUNC)err_exchange}, /* exchange */ - {&vop_revoke_desc, (VOPFUNC)nop_revoke}, /* revoke */ - {&vop_mmap_desc, (VOPFUNC)err_mmap}, /* mmap */ - {&vop_fsync_desc, (VOPFUNC)err_fsync}, /* fsync */ - {&vop_seek_desc, (VOPFUNC)nop_seek}, /* seek */ - {&vop_remove_desc, (VOPFUNC)err_remove}, /* remove */ - {&vop_link_desc, (VOPFUNC)err_link}, /* link */ - {&vop_rename_desc, (VOPFUNC)err_rename}, /* rename */ - {&vop_mkdir_desc, (VOPFUNC)err_mkdir}, /* mkdir */ - {&vop_rmdir_desc, (VOPFUNC)volfs_rmdir}, /* rmdir */ - {&vop_symlink_desc, (VOPFUNC)err_symlink}, /* symlink */ - {&vop_readdir_desc, (VOPFUNC)volfs_readdir}, /* readdir */ - {&vop_readdirattr_desc, (VOPFUNC)err_readdirattr}, /* readdirattr */ - {&vop_readlink_desc, (VOPFUNC)err_readlink}, /* readlink */ - {&vop_abortop_desc, (VOPFUNC)err_abortop}, /* abortop */ - {&vop_inactive_desc, (VOPFUNC)err_inactive}, /* inactive */ - {&vop_reclaim_desc, (VOPFUNC)volfs_reclaim}, /* reclaim */ - {&vop_lock_desc, (VOPFUNC)volfs_lock}, /* lock */ - {&vop_unlock_desc, (VOPFUNC)volfs_unlock}, /* unlock */ - {&vop_bmap_desc, (VOPFUNC)err_bmap}, /* bmap */ - {&vop_print_desc, (VOPFUNC)err_print}, /* print */ - {&vop_islocked_desc, (VOPFUNC)volfs_islocked}, /* islocked */ - {&vop_pathconf_desc, (VOPFUNC)volfs_pathconf}, /* pathconf */ - {&vop_advlock_desc, (VOPFUNC)err_advlock}, /* advlock */ - {&vop_blkatoff_desc, (VOPFUNC)err_blkatoff}, /* blkatoff */ - {&vop_valloc_desc, (VOPFUNC)err_valloc}, /* valloc */ - {&vop_reallocblks_desc, (VOPFUNC)err_reallocblks}, /* reallocblks */ - {&vop_vfree_desc, (VOPFUNC)err_vfree}, /* vfree */ - {&vop_truncate_desc, (VOPFUNC)err_truncate}, /* truncate */ - {&vop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate */ - {&vop_update_desc, (VOPFUNC)err_update}, /* update */ - {&vop_pgrd_desc, (VOPFUNC)err_pgrd}, /* pgrd */ - {&vop_pgwr_desc, (VOPFUNC)err_pgwr}, /* pgwr */ - {&vop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein */ - {&vop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout */ - {&vop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize */ - {&vop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs */ - {&vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ - {&vop_blktooff_desc, (VOPFUNC)err_blktooff}, /* blktooff */ - {&vop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ - {&vop_cmap_desc, (VOPFUNC)err_cmap }, /* cmap */ + {&vnop_default_desc, (VOPFUNC)vn_default_error}, + {&vnop_strategy_desc, (VOPFUNC)err_strategy}, /* strategy */ + {&vnop_bwrite_desc, (VOPFUNC)err_bwrite}, /* bwrite */ + {&vnop_lookup_desc, (VOPFUNC)volfs_lookup}, /* lookup */ + {&vnop_create_desc, (VOPFUNC)err_create}, /* create */ + {&vnop_whiteout_desc, (VOPFUNC)err_whiteout}, /* whiteout */ + {&vnop_mknod_desc, (VOPFUNC)err_mknod}, /* mknod */ + {&vnop_open_desc, (VOPFUNC)nop_open}, /* open */ + {&vnop_close_desc, (VOPFUNC)nop_close}, /* close */ + {&vnop_getattr_desc, (VOPFUNC)volfs_getattr}, /* getattr */ + {&vnop_setattr_desc, (VOPFUNC)err_setattr}, /* setattr */ + {&vnop_getattrlist_desc, (VOPFUNC)err_getattrlist}, /* getattrlist */ + {&vnop_setattrlist_desc, (VOPFUNC)err_setattrlist}, /* setattrlist */ + {&vnop_read_desc, (VOPFUNC)err_read}, /* read */ + {&vnop_write_desc, (VOPFUNC)err_write}, /* write */ + {&vnop_ioctl_desc, (VOPFUNC)err_ioctl}, /* ioctl */ + {&vnop_select_desc, (VOPFUNC)volfs_select}, /* select */ + {&vnop_exchange_desc, (VOPFUNC)err_exchange}, /* exchange */ + {&vnop_revoke_desc, (VOPFUNC)nop_revoke}, /* revoke */ + {&vnop_mmap_desc, (VOPFUNC)err_mmap}, /* mmap */ + {&vnop_fsync_desc, (VOPFUNC)err_fsync}, /* fsync */ + {&vnop_remove_desc, (VOPFUNC)err_remove}, /* remove */ + {&vnop_link_desc, (VOPFUNC)err_link}, /* link */ + {&vnop_rename_desc, (VOPFUNC)err_rename}, /* rename */ + {&vnop_mkdir_desc, (VOPFUNC)err_mkdir}, /* mkdir */ + {&vnop_rmdir_desc, (VOPFUNC)volfs_rmdir}, /* rmdir */ + {&vnop_symlink_desc, (VOPFUNC)err_symlink}, /* symlink */ + {&vnop_readdir_desc, (VOPFUNC)volfs_readdir}, /* readdir */ + {&vnop_readdirattr_desc, (VOPFUNC)err_readdirattr}, /* readdirattr */ + {&vnop_readlink_desc, (VOPFUNC)err_readlink}, /* readlink */ + {&vnop_inactive_desc, (VOPFUNC)err_inactive}, /* inactive */ + {&vnop_reclaim_desc, (VOPFUNC)volfs_reclaim}, /* reclaim */ + {&vnop_pathconf_desc, (VOPFUNC)volfs_pathconf}, /* pathconf */ + {&vnop_advlock_desc, (VOPFUNC)err_advlock}, /* advlock */ + {&vnop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate */ + {&vnop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein */ + {&vnop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout */ + {&vnop_devblocksize_desc, (VOPFUNC)err_devblocksize}, /* devblocksize */ + {&vnop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs */ + {&vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copyfile */ + {&vnop_blktooff_desc, (VOPFUNC)err_blktooff}, /* blktooff */ + {&vnop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk */ + {&vnop_blockmap_desc, (VOPFUNC)err_blockmap }, /* blockmap */ {(struct vnodeop_desc *) NULL, (int (*) ()) NULL} }; @@ -168,7 +157,6 @@ struct vnodeopv_entry_desc volfs_vnodeop_entries[] = { struct vnodeopv_desc volfs_vnodeop_opv_desc = {&volfs_vnodeop_p, volfs_vnodeop_entries}; -static char gDot[] = "."; static char gDotDot[] = ".."; struct finfo { @@ -180,321 +168,119 @@ struct finfoattrbuf { struct finfo fi; }; -static int validfsnode(struct mount *fsnode); - -struct volfs_PLCEntry -{ - LIST_ENTRY(volfs_PLCEntry) vplc_hash_link; /* entry's hash chain */ - TAILQ_ENTRY(volfs_PLCEntry) vplc_lru_link; /* entry's LRU chain link */ - int32_t vplc_fsid; - u_int vplc_item_id; - uid_t vplc_uid; - pid_t vplc_pid; -}; - -#define VOLFSPLCHASH(fsid, inum) ((((unsigned long)fsid) + (unsigned long)(inum)) & volfs_PLCHashMask) - -static struct slock volfs_PLChashtable_slock; -static TAILQ_HEAD(volfs_PLCLRUListHead, volfs_PLCEntry) volfs_PLCLRUList; -static TAILQ_HEAD(volfs_PLCFreeListHead, volfs_PLCEntry) volfs_PLCFreeList; -static LIST_HEAD(, volfs_PLCEntry) *volfs_PLCHashTable; -static u_long volfs_PLCHashMask; /* size of hash table - 1 */ -static u_long volfs_PLCEntryCount; -#if DBG_VOP_TEST_LOCKS -static void DbgVopTest (int max, int error, VopDbgStoreRec *VopDbgStore, char *funcname); -#endif /* DBG_VOP_TEST_LOCKS */ - - -/* - * volfs_PLChashinit - */ -__private_extern__ void -volfs_PLChashinit(void) -{ - int i; - - TAILQ_INIT(&volfs_PLCLRUList); - TAILQ_INIT(&volfs_PLCFreeList); - simple_lock_init(&volfs_PLChashtable_slock); -#if MAXPLCENTRIES - volfs_PLCHashTable = hashinit(PLCHASHSIZE, M_TEMP, &volfs_PLCHashMask); - - for (i = 0; i < PLCHASHSIZE; ++i) { - LIST_INIT(&volfs_PLCHashTable[i]); - }; -#endif - volfs_PLCEntryCount = 0; -} - - - -__private_extern__ void -volfs_PLC_reclaim_entries(int entrycount) -{ -#if MAXPLCENTRIES - int i; - struct volfs_PLCEntry *reclaim_target; - - simple_lock(&volfs_PLChashtable_slock); - - for (i = entrycount; i > 0; --i) { - if (TAILQ_EMPTY(&volfs_PLCLRUList)) break; - - /* Pick the next entry to be recycled and free it: */ - reclaim_target = TAILQ_FIRST(&volfs_PLCLRUList); - TAILQ_REMOVE(&volfs_PLCLRUList, reclaim_target, vplc_lru_link); - LIST_REMOVE(reclaim_target, vplc_hash_link); - TAILQ_INSERT_TAIL(&volfs_PLCFreeList, reclaim_target, vplc_lru_link); - }; - - simple_unlock(&volfs_PLChashtable_slock); -#endif -} - - - -#if MAXPLCENTRIES -/* - * volfs_PLCLookup - * - * Look up a PLC entry in the hash - */ -static int -volfs_PLCLookup(int32_t fsid, u_int target_id, uid_t uid, pid_t pid) -{ - struct volfs_PLCEntry *hash_entry; - int result = 0; - - simple_lock(&volfs_PLChashtable_slock); - LIST_FOREACH(hash_entry, &volfs_PLCHashTable[VOLFSPLCHASH(fsid, target_id)], vplc_hash_link) { - if ((hash_entry->vplc_item_id == target_id) && - (hash_entry->vplc_pid == pid) && - (hash_entry->vplc_uid == uid) && - (hash_entry->vplc_fsid == fsid)) { - result = 1; -#if 0 - if (hash_entry != TAILQ_LAST(&volfs_PLCLRUList, volfs_PLCLRUListHead)) { - TAILQ_REMOVE(&volfs_PLCLRUList, hash_entry, vplc_lru_link); - TAILQ_INSERT_TAIL(&volfs_PLCLRUList, hash_entry, vplc_lru_link); - }; -#endif - break; - }; - }; - simple_unlock(&volfs_PLChashtable_slock); - return result; -} - - -static void -volfs_PLCEnter(int32_t fsid, u_int target_id, uid_t uid, pid_t pid) -{ - struct volfs_PLCEntry *new_entry; - - simple_lock(&volfs_PLChashtable_slock); - if (!TAILQ_EMPTY(&volfs_PLCFreeList)) { - new_entry = TAILQ_FIRST(&volfs_PLCFreeList); - TAILQ_REMOVE(&volfs_PLCFreeList, new_entry, vplc_lru_link); - } else { - /* - * Allocate up to the predetermined maximum number of new entries: - * [must be done now to avoid blocking in MALLOC() with volfs_PLChashtable_slock held locked] - */ - if (volfs_PLCEntryCount < MAXPLCENTRIES) { - simple_unlock(&volfs_PLChashtable_slock); - new_entry = MALLOC(new_entry, struct volfs_PLCEntry *, sizeof(struct volfs_PLCEntry), M_TEMP, M_WAITOK); - simple_lock(&volfs_PLChashtable_slock); - ++volfs_PLCEntryCount; - } else { - new_entry = TAILQ_FIRST(&volfs_PLCLRUList); - TAILQ_REMOVE(&volfs_PLCLRUList, new_entry, vplc_lru_link); - LIST_REMOVE(new_entry, vplc_hash_link); - }; - }; - - new_entry->vplc_fsid = fsid; - new_entry->vplc_item_id = target_id; - new_entry->vplc_uid = uid; - new_entry->vplc_pid = pid; - - /* Link the new entry on the hash list for the fsid/target_id as well as the tail of the LRU list: */ - LIST_INSERT_HEAD(&volfs_PLCHashTable[VOLFSPLCHASH(fsid, target_id)], new_entry, vplc_hash_link); - TAILQ_INSERT_TAIL(&volfs_PLCLRUList, new_entry, vplc_lru_link); - simple_unlock(&volfs_PLChashtable_slock); -} -#endif +static int volfs_getattr_callback(mount_t, void *); /* * volfs_reclaim - Reclaim a vnode so that it can be used for other purposes. - * - * Locking policy: ignored */ -int +static int volfs_reclaim(ap) - struct vop_reclaim_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; + struct vnop_reclaim_args /* { struct vnode *a_vp; vfs_context_t a_context; } */ *ap; { - struct vnode *vp = ap->a_vp; - void *data = vp->v_data; - - DBG_FUNC_NAME("volfs_reclaim"); - DBG_VOP_LOCKS_DECL(1); - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0, vp, VOPDBG_UNLOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_ZERO); + struct vnode *vp = ap->a_vp; + void *data = vp->v_data; vp->v_data = NULL; - FREE(data, M_VOLFSNODE); + FREE(data, M_VOLFSNODE); - DBG_VOP_LOCKS_TEST(0); - return (0); + return (0); } -/* - * volfs_access - same access policy for all vnodes and all users (file/directory vnodes - * for the actual file systems are handled by actual file system) - * - * Locking policy: a_vp locked on input and output - */ -int -volfs_access(ap) - struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct - ucred *a_cred; struct proc *a_p; } */ *ap; -{ - int ret_err; - DBG_FUNC_NAME("volfs_access"); - DBG_VOP_LOCKS_DECL(1); - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS); +struct volfsgetattr_struct{ + int numMounts; + vnode_t a_vp; +}; - /* - * We don't need to check credentials! FS is read-only for everyone - */ - if ((ap->a_mode & ~(VREAD | VEXEC)) == 0) - ret_err = 0; - else - ret_err = EACCES; +static int +volfs_getattr_callback(mount_t mp, void * arg) +{ + struct volfsgetattr_struct *vstrp = (struct volfsgetattr_struct *)arg; - DBG_VOP_LOCKS_TEST(ret_err); - return (ret_err); + if (mp != vnode_mount(vstrp->a_vp) && validfsnode(mp)) + vstrp->numMounts++; + return(VFS_RETURNED); } /* * volfs_getattr - fill in the attributes for this vnode - * - * Locking policy: don't change anything */ -int +static int volfs_getattr(ap) - struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; - struct ucred *a_cred; struct proc *a_p; } */ *ap; + struct vnop_getattr_args /* { struct vnode *a_vp; struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { struct volfs_vndata *priv_data; - struct vnode *a_vp; - struct vattr *a_vap; + struct vnode *a_vp; + struct vnode_attr *a_vap; int numMounts = 0; - DBG_FUNC_NAME("volfs_getattr"); - DBG_VOP_LOCKS_DECL(1); - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_SAME, VOPDBG_SAME, VOPDBG_SAME, VOPDBG_POS); + struct volfsgetattr_struct vstr; + struct timespec ts; a_vp = ap->a_vp; a_vap = ap->a_vap; priv_data = a_vp->v_data; - a_vap->va_type = VDIR; - a_vap->va_mode = 0444; /* Yup, hard - coded to read - only */ - a_vap->va_nlink = 2; - a_vap->va_uid = 0; /* Always owned by root */ - a_vap->va_gid = 0; /* Always part of group 0 */ - a_vap->va_fsid = (int) a_vp->v_mount->mnt_stat.f_fsid.val[0]; - a_vap->va_fileid = priv_data->nodeID; + VATTR_RETURN(a_vap, va_type, VDIR); + VATTR_RETURN(a_vap, va_mode, 0555); + VATTR_RETURN(a_vap, va_nlink, 2); + VATTR_RETURN(a_vap, va_uid, 0); + VATTR_RETURN(a_vap, va_gid, 0); + VATTR_RETURN(a_vap, va_fsid, (int) a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + VATTR_RETURN(a_vap, va_fileid, (uint64_t)((u_long)priv_data->nodeID)); + VATTR_RETURN(a_vap, va_acl, NULL); /* * If it's the root vnode calculate its size based on the number of eligible * file systems */ - if (priv_data->vnode_type == VOLFS_ROOT) - { - register struct mount *mp, *nmp; + if (priv_data->vnode_type == VOLFS_ROOT) { + vstr.numMounts = 0; + vstr.a_vp = a_vp; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, ap->a_p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - - if (mp != a_vp->v_mount && validfsnode(mp)) - numMounts++; + vfs_iterate(LK_NOWAIT, volfs_getattr_callback, (void *)&vstr); - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, ap->a_p); - } - simple_unlock(&mountlist_slock); + numMounts = vstr.numMounts; - DBG_VOP(("found %d file systems that volfs can support\n", numMounts)); - a_vap->va_size = (numMounts + 2) * VLFSDIRENTLEN; - } - else - { - a_vap->va_size = 2 * VLFSDIRENTLEN; - } - DBG_VOP(("va_size = %d, VLFSDIRENTLEN = %ld\n", (int) a_vap->va_size, VLFSDIRENTLEN)); - a_vap->va_blocksize = 512; - - a_vap->va_atime.tv_sec = boottime.tv_sec; - a_vap->va_atime.tv_nsec = 0; - - a_vap->va_mtime.tv_sec = boottime.tv_sec; - a_vap->va_mtime.tv_nsec = 0; + VATTR_RETURN(a_vap, va_data_size, (numMounts + 2) * VLFSDIRENTLEN); + } else { + VATTR_RETURN(a_vap, va_data_size, 2 * VLFSDIRENTLEN); + } - a_vap->va_ctime.tv_sec = boottime.tv_sec; - a_vap->va_ctime.tv_nsec = 0; + VATTR_RETURN(a_vap, va_iosize, 512); + ts.tv_sec = boottime_sec(); + ts.tv_nsec = 0; + VATTR_RETURN(a_vap, va_access_time, ts); + VATTR_RETURN(a_vap, va_modify_time, ts); + VATTR_RETURN(a_vap, va_change_time, ts); - a_vap->va_gen = 0; - a_vap->va_flags = 0; - a_vap->va_rdev = 0; - a_vap->va_bytes = a_vap->va_size; - a_vap->va_filerev = 0; - a_vap->va_vaflags = 0; + VATTR_RETURN(a_vap, va_gen, 0); + VATTR_RETURN(a_vap, va_flags, 0); + VATTR_RETURN(a_vap, va_rdev, 0); + VATTR_RETURN(a_vap, va_filerev, 0); - DBG_VOP_LOCKS_TEST(0); return (0); } /* * volfs_select - just say OK. Only possible op is readdir - * - * Locking policy: ignore */ -int -volfs_select(ap) - struct vop_select_args /* { struct vnode *a_vp; int a_which; int - * a_fflags; struct ucred *a_cred; void * a_wql; struct - proc *a_p; } */ *ap; +static int +volfs_select(__unused struct vnop_select_args *ap) { - DBG_VOP(("volfs_select called\n")); - - return (1); + return (1); } /* * vofls_rmdir - not possible to remove directories in volfs - * - * Locking policy: a_dvp & a_vp - locked on entry, unlocked on exit */ -int +static int volfs_rmdir(ap) - struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; - struct componentname *a_cnp; } */ *ap; + struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; + struct componentname *a_cnp; vfs_context_t a_context; } */ *ap; { - DBG_VOP(("volfs_rmdir called\n")); if (ap->a_dvp == ap->a_vp) { (void) nop_rmdir(ap); return (EINVAL); @@ -502,6 +288,31 @@ volfs_rmdir(ap) return (err_rmdir(ap)); } + + +static int +volfs_readdir_callback(mount_t mp, void * v) +{ + struct volfs_rdstruct * vcsp = (struct volfs_rdstruct *)v; + struct dirent local_dir; + int error; + + if ((mp != vnode_mount(vcsp->vp)) && validfsnode(mp)) + vcsp->validindex++; + + if (vcsp->rec_offset == vcsp->validindex) + { + local_dir.d_fileno = mp->mnt_vfsstat.f_fsid.val[0]; + local_dir.d_type = DT_DIR; + local_dir.d_reclen = VLFSDIRENTLEN; + local_dir.d_namlen = sprintf(&local_dir.d_name[0], "%d", mp->mnt_vfsstat.f_fsid.val[0]); + error = uiomove((char *) &local_dir, VLFSDIRENTLEN, vcsp->uio); + vcsp->rec_offset++; + } + + return(VFS_RETURNED); +} + /* * volfs_readdir - Get directory entries * @@ -511,14 +322,12 @@ volfs_rmdir(ap) * equivalent of the f_fsid.val[0] from their mount structure (typically * the device id of the volume). The maximum length for a name, then is * 10 characters. - * - * Locking policy: a_vp locked on entry and exit */ -int +static int volfs_readdir(ap) - struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; - * struct ucred *a_cred; int *a_eofflag; int - *ncookies; u_long **a_cookies; } */ *ap; + struct vnop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; + * int *a_eofflag; int + *ncookies; u_long **a_cookies; vfs_context_t a_context; } */ *ap; { struct volfs_vndata *priv_data; register struct uio *uio = ap->a_uio; @@ -529,52 +338,42 @@ volfs_readdir(ap) int i; int starting_resid; off_t off; - DBG_FUNC_NAME("volfs_readdir"); - DBG_VOP_LOCKS_DECL(1); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS); - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP(("\tuio_offset = %d, uio_resid = %d\n", (int) uio->uio_offset, uio->uio_resid)); - /* We assume it's all one big buffer... */ - if (uio->uio_iovcnt > 1) - DBG_VOP(("\tuio->uio_iovcnt = %d?\n", uio->uio_iovcnt)); - + struct volfs_rdstruct vcs; + off = uio->uio_offset; priv_data = ap->a_vp->v_data; - starting_resid = uio->uio_resid; - count = uio->uio_resid; + // LP64todo - fix this! + starting_resid = count = uio_resid(uio); /* Make sure we don't return partial entries. */ count -= (uio->uio_offset + count) & (VLFSDIRENTLEN - 1); - if (count <= 0) - { - DBG_VOP(("volfs_readdir: Not enough buffer to read in entries\n")); - DBG_VOP_LOCKS_TEST(EINVAL); - return (EINVAL); - } + if (count <= 0) { + return (EINVAL); + } /* * Make sure we're starting on a directory boundary */ - if (off & (VLFSDIRENTLEN - 1)) - { - DBG_VOP_LOCKS_TEST(EINVAL); - return (EINVAL); - } + if (off & (VLFSDIRENTLEN - 1)) { + return (EINVAL); + } rec_offset = off / VLFSDIRENTLEN; - lost = uio->uio_resid - count; - uio->uio_resid = count; - uio->uio_iov->iov_len = count; + // LP64todo - fix this! + lost = uio_resid(uio) - count; + uio_setresid(uio, count); + uio_iov_len_set(uio, count); +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ local_dir.d_reclen = VLFSDIRENTLEN; /* * We must synthesize . and .. */ - DBG_VOP(("\tstarting ... uio_offset = %d, uio_resid = %d\n", - (int) uio->uio_offset, uio->uio_resid)); + if (rec_offset == 0) { - DBG_VOP(("\tAdding .\n")); /* * Synthesize . */ @@ -585,13 +384,10 @@ volfs_readdir(ap) for (i = 1; i < MAXVLFSNAMLEN; i++) local_dir.d_name[i] = 0; error = uiomove((char *) &local_dir, VLFSDIRENTLEN, uio); - DBG_VOP(("\t after adding ., uio_offset = %d, uio_resid = %d\n", - (int) uio->uio_offset, uio->uio_resid)); rec_offset++; } if (rec_offset == 1) { - DBG_VOP(("\tAdding ..\n")); /* * Synthesize .. * We only have two levels in the volfs hierarchy. Root's @@ -607,8 +403,6 @@ volfs_readdir(ap) local_dir.d_name[i] = 0; error = uiomove((char *) &local_dir, VLFSDIRENTLEN, uio); rec_offset++; - DBG_VOP(("\t after adding .., uio_offset = %d, uio_resid = %d\n", - (int) uio->uio_offset, uio->uio_resid)); } /* @@ -619,58 +413,26 @@ volfs_readdir(ap) if (priv_data->vnode_type == VOLFS_FSNODE) { *ap->a_eofflag = 1; /* we got all the way to the end */ - DBG_VOP_LOCKS_TEST(error); return (error); } if (rec_offset > 1) { - register struct mount *mp, *nmp; - int validnodeindex; - struct proc *p = uio->uio_procp; - - validnodeindex = 1; /* we always have "." and ".." */ - - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - - if (mp != ap->a_vp->v_mount && validfsnode(mp)) - validnodeindex++; - - if (rec_offset == validnodeindex) - { - local_dir.d_fileno = mp->mnt_stat.f_fsid.val[0]; - local_dir.d_type = DT_DIR; - local_dir.d_reclen = VLFSDIRENTLEN; - DBG_VOP(("\tAdding dir entry %d for offset %d\n", mp->mnt_stat.f_fsid.val[0], rec_offset)); - local_dir.d_namlen = sprintf(&local_dir.d_name[0], "%d", mp->mnt_stat.f_fsid.val[0]); - error = uiomove((char *) &local_dir, VLFSDIRENTLEN, uio); - DBG_VOP(("\t after adding entry '%s', uio_offset = %d, uio_resid = %d\n", - &local_dir.d_name[0], (int) uio->uio_offset, uio->uio_resid)); - rec_offset++; - } - - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); + vcs.validindex = 1; /* we always have "." and ".." */ + vcs.rec_offset = rec_offset; + vcs.vp = ap->a_vp; + vcs.uio = uio; + + + vfs_iterate(0, volfs_readdir_callback, &vcs); - if (mp == (void *) &mountlist) + //if (mp == (void *) &mountlist) *ap->a_eofflag = 1; /* we got all the way to the end */ } + uio_setresid(uio, (uio_resid(uio) + lost)); - uio->uio_resid += lost; - if (starting_resid == uio->uio_resid) + if (starting_resid == uio_resid(uio)) uio->uio_offset = 0; - DBG_VOP(("\tExiting, uio_offset = %d, uio_resid = %d, ap->a_eofflag = %d\n", - (int) uio->uio_offset, uio->uio_resid, *ap->a_eofflag)); - - DBG_VOP_LOCKS_TEST(error); return (error); } @@ -680,7 +442,7 @@ volfs_readdir(ap) * * This can cause context switching, so caller should be lock safe */ -static int +int validfsnode(struct mount *fsnode) { @@ -695,109 +457,14 @@ validfsnode(struct mount *fsnode) return 0; } -/* - * volfs_lock - Lock an inode. - * If its already locked, set the WANT bit and sleep. - * - * Locking policy: handled by lockmgr - */ -int -volfs_lock(ap) - struct vop_lock_args /* { struct vnode *a_vp; int a_flags; struct - proc *a_p; } */ *ap; -{ - int retval; - struct volfs_vndata *priv_data; - DBG_FUNC_NAME("volfs_lock"); - DBG_VOP_LOCKS_DECL(1); -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 0)) | DBG_FUNC_START, - (unsigned int)ap->a_vp, (unsigned int)ap->a_flags, (unsigned int)ap->a_p, 0, 0); -#endif - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_UNLOCKED, VOPDBG_LOCKED, VOPDBG_UNLOCKED, VOPDBG_ZERO); - - priv_data = (struct volfs_vndata *) ap->a_vp->v_data; - retval = lockmgr(&priv_data->lock, ap->a_flags, &ap->a_vp->v_interlock, ap->a_p); - DBG_VOP_LOCKS_TEST(retval); -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 0)) | DBG_FUNC_END, - (unsigned int)ap->a_vp, (unsigned int)ap->a_flags, (unsigned int)ap->a_p, retval, 0); -#endif - return (retval); -} - -/* - * volfs_unlock - Unlock an inode. - * - * Locking policy: handled by lockmgr - */ -int -volfs_unlock(ap) - struct vop_unlock_args /* { struct vnode *a_vp; int a_flags; struct - proc *a_p; } */ *ap; -{ - int retval; - struct volfs_vndata *priv_data; - DBG_FUNC_NAME("volfs_unlock"); - DBG_VOP_LOCKS_DECL(1); -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 4)) | DBG_FUNC_START, - (unsigned int)ap->a_vp, (unsigned int)ap->a_flags, (unsigned int)ap->a_p, 0, 0); -#endif - DBG_VOP_PRINT_FUNCNAME();DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_UNLOCKED, VOPDBG_LOCKED, VOPDBG_ZERO); - - priv_data = (struct volfs_vndata *) ap->a_vp->v_data; - retval = lockmgr(&priv_data->lock, ap->a_flags | LK_RELEASE, - &ap->a_vp->v_interlock, ap->a_p); - - DBG_VOP_LOCKS_TEST(retval); -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 4)) | DBG_FUNC_END, - (unsigned int)ap->a_vp, (unsigned int)ap->a_flags, (unsigned int)ap->a_p, retval, 0); -#endif - return (retval); -} - -/* - * volfs_islocked - Check for a locked inode. - * - * Locking policy: ignore - */ -int -volfs_islocked(ap) - struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; -{ - int retval; - struct volfs_vndata *priv_data; - - DBG_FUNC_NAME("volfs_islocked"); - DBG_VOP_LOCKS_DECL(1); - //DBG_VOP_PRINT_FUNCNAME();DBG_VOP(("\n")); - - DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_ZERO); - priv_data = (struct volfs_vndata *) ap->a_vp->v_data; - retval = lockstatus(&priv_data->lock); - - DBG_VOP_LOCKS_TEST(retval); - return (retval); -} - /* * volfs_pathconf - Return POSIX pathconf information applicable to ufs filesystems. - * - * Locking policy: a_vp locked on input and output */ -int +static int volfs_pathconf(ap) - struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int - *a_retval; } */ *ap; + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int + *a_retval; vfs_context_t a_context; } */ *ap; { - DBG_VOP(("volfs_pathconf called\n")); - switch (ap->a_name) { case _PC_LINK_MAX: @@ -824,65 +491,31 @@ volfs_pathconf(ap) /* NOTREACHED */ } - -/* - * Call VOP_GETATTRLIST on a given vnode - */ -static int -vp_getattrlist(struct vnode *vp, struct attrlist alist, void *attrbufptr, size_t bufsize, unsigned long options, struct proc *p) { - struct iovec iov; - struct uio bufuio; - - iov.iov_base = (char *)attrbufptr; - iov.iov_len = bufsize; - - bufuio.uio_iov = &iov; - bufuio.uio_iovcnt = 1; - bufuio.uio_offset = 0; - bufuio.uio_resid = iov.iov_len; - bufuio.uio_segflg = UIO_SYSSPACE; - bufuio.uio_rw = UIO_READ; - bufuio.uio_procp = p; - - return VOP_GETATTRLIST(vp, &alist, &bufuio, p->p_ucred, p); -} - /* * get_parentvp() - internal routine that tries to lookup the parent of vpp. - * On success, *vpp is the parent vp and is returned locked and the original child - * is left unlocked. On failure, the original child will be locked upon return. + * On success, *vpp is the parent vp and is returned with a reference. */ static int -get_parentvp(struct vnode **vpp, struct mount *mp, struct proc *p) +get_parentvp(struct vnode **vpp, struct mount *mp, vfs_context_t context) { int result; - struct attrlist alist; - struct finfoattrbuf finfobuf; + struct vnode_attr va; struct vnode *child_vp = *vpp; - - alist.bitmapcount = 5; - alist.reserved = 0; - alist.commonattr = ATTR_CMN_PAROBJID; - alist.volattr = 0; - alist.dirattr = 0; - alist.fileattr = 0; - alist.forkattr = 0; - result = vp_getattrlist(child_vp, alist, &finfobuf, sizeof(finfobuf), 0, p); - if (result) - return result; - - /* Release the child vnode before trying to acquire its parent - to avoid vnode deadlock problems with parsing code - coming top-down through the directory hierarchy: */ - VOP_UNLOCK(child_vp, 0, p); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_parentid); + result = vnode_getattr(child_vp, &va, context); + if (result) { + return result; + } + /* Shift attention to the parent directory vnode: */ - result = VFS_VGET(mp, &finfobuf.fi.parID.fid_objno, vpp); - if (result) { - /* Make sure child_vp is still locked on exit: */ - vn_lock(child_vp, LK_EXCLUSIVE | LK_RETRY, p); + result = VFS_VGET(mp, (ino64_t)va.va_parentid, vpp, context); + + if (result == 0 && child_vp->v_parent != *vpp) { + vnode_update_identity(child_vp, *vpp, NULL, 0, 0, VNODE_UPDATE_PARENT); } - + return result; } @@ -891,58 +524,50 @@ get_parentvp(struct vnode **vpp, struct mount *mp, struct proc *p) * Look up the parent directory of a given vnode. */ static int -lookup_parent(u_int id, struct vnode *child_vp, struct vnode **parent_vp, struct proc *p) +lookup_parent(vnode_t child_vp, vnode_t *parent_vpp, int is_authorized, vfs_context_t context) { - struct nameidata nd; - struct componentname *cnp = &nd.ni_cnd; - struct filedesc *fdp = p->p_fd; + struct componentname cn; + vnode_t new_vp; int error; - *parent_vp = NULL; - - /* - * Special case lookups for root's parent directory, - * recognized by its special id of "1": - */ - if (id != 1) { - VREF(child_vp); - nd.ni_startdir = child_vp; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, (caddr_t)&gDotDot, p); - } else { - struct vnode *root_vp; - - error = VFS_ROOT(child_vp->v_mount, &root_vp); - if (error) return error; - VOP_UNLOCK(root_vp, 0, p); /* Hold on to the reference */ - nd.ni_startdir = root_vp; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, (caddr_t)&gDot, p); - }; - nd.ni_cnd.cn_cred = nd.ni_cnd.cn_proc->p_ucred; - - /* Since we can't hit any symlinks, use the source path string directly: */ - cnp->cn_pnbuf = nd.ni_dirp; - nd.ni_pathlen = strlen(cnp->cn_pnbuf); - cnp->cn_pnlen = nd.ni_pathlen + 1; - cnp->cn_flags |= (HASBUF | SAVENAME); - - nd.ni_loopcnt = 0; - - if ((nd.ni_rootdir = fdp->fd_rdir) == NULL) nd.ni_rootdir = rootvnode; - cnp->cn_nameptr = cnp->cn_pnbuf; - if (error = lookup(&nd)) { - cnp->cn_pnbuf = NULL; - return (error); - } - /* - * Check for symbolic link - */ - if (cnp->cn_flags & ISSYMLINK) return ENOENT; - if (nd.ni_vp == child_vp) return ELOOP; + *parent_vpp = NULLVP; - *parent_vp = nd.ni_vp; - return 0; -} + if (is_authorized == 0) { + error = vnode_authorize(child_vp, NULL, KAUTH_VNODE_SEARCH, context); + if (error != 0) { + return (error); + } + } + new_vp = child_vp->v_parent; + if (new_vp != NULLVP) { + if ( (error = vnode_getwithref(new_vp)) == 0 ) + *parent_vpp = new_vp; + return (error); + } + bzero(&cn, sizeof(cn)); + cn.cn_nameiop = LOOKUP; + cn.cn_context = context; + cn.cn_pnbuf = CAST_DOWN(caddr_t, &gDotDot); + cn.cn_pnlen = strlen(cn.cn_pnbuf); + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_namelen = cn.cn_pnlen; + cn.cn_flags = (FOLLOW | LOCKLEAF | ISLASTCN | ISDOTDOT); + + error = VNOP_LOOKUP(child_vp, &new_vp, &cn, context); + if (error != 0) { + return(error); + } + if (new_vp == child_vp) { + vnode_put(new_vp); + return ELOOP; + } + if (child_vp->v_parent == NULLVP) { + vnode_update_identity(child_vp, new_vp, NULL, 0, 0, VNODE_UPDATE_PARENT); + } + *parent_vpp = new_vp; + return 0; +} /* @@ -950,107 +575,120 @@ lookup_parent(u_int id, struct vnode *child_vp, struct vnode **parent_vp, struct */ static int -verify_fullpathaccess(u_int id, struct vnode *targetvp, struct proc *p) { +verify_fullpathaccess(struct vnode *targetvp, vfs_context_t context) +{ struct vnode *vp, *parent_vp; struct mount *mp = targetvp->v_mount; - struct attrlist alist; - struct finfoattrbuf finfobuf; + struct proc *p = vfs_context_proc(context); int result; + int dp_authorized; struct filedesc *fdp = p->p_fd; /* pointer to file descriptor state */ - u_int target_id; - u_long vp_id; -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 12)) | DBG_FUNC_START, - (unsigned int)targetvp, (unsigned int)mp, (unsigned int)p, 0, 0); -#endif - vp = targetvp; - vp_id = vp->v_id; - if (vp->v_type != VDIR) { - - /* The target is a file: get the parent directory. */ - result = get_parentvp(&vp, mp, p); - if (result) goto err_exit; - - /* At this point, targetvp is unlocked (but still referenced), and - vp is the parent directory vnode, held locked */ - }; - + dp_authorized = 0; -#if MAXPLCENTRIES - if (volfs_PLCLookup(mp->mnt_stat.f_fsid.val[0], id, p->p_ucred->cr_uid, p->p_pid)) goto lookup_success; -#endif - /* Keep going up until either the process's root or the process's working directory is hit, - either one of which are potential valid starting points for a full pathname: */ - target_id = id; - while (vp != NULL && (!((vp->v_flag & VROOT) || /* Hit "/" */ - (vp == fdp->fd_cdir) || /* Hit process's working directory */ - (vp == fdp->fd_rdir)))) { /* Hit process chroot()-ed root */ - - /* At this point, vp is some directory node and it's always locked */ - /* Unlock the starting directory for namei(), retaining a reference... */ - VOP_UNLOCK(vp, 0, p); - - if (result = lookup_parent(target_id, vp, &parent_vp, p)) { - /* - * If the lookup fails with EACCES and the targetvp is a directory, - * we should try again using get_parentvp(). Without this check, - * directories that you can navigate to but not traverse will - * disappear when clicked in the Finder. - */ - if (result == EACCES && vp == targetvp && vp->v_type == VDIR && (vp->v_flag & VROOT) == 0) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - parent_vp = vp; - if (get_parentvp(&parent_vp, mp, p)) { - /* on error, vp is still locked... unlock for lookup_err_exit path */ - VOP_UNLOCK(vp, 0, p); - } else { - /* on success, vp is returned unlocked, parent_vp is returned locked */ - result = 0; + /* get the parent directory. */ + if ((vp->v_flag & VROOT) == 0 && vp != fdp->fd_cdir && vp != fdp->fd_rdir) { + if (vp->v_parent == NULLVP || (vp->v_flag & VISHARDLINK) || (vnode_getwithref(vp->v_parent) != 0)) { + if (vp->v_type == VDIR) { + result = lookup_parent(vp, &parent_vp, dp_authorized, context); + + /* + * If the lookup fails with EACCES and the vp is a directory, + * we should try again but bypass authorization check. Without this + * workaround directories that you can navigate to but not traverse will + * disappear when clicked in the Finder. + */ + if (result == EACCES && (vp->v_flag & VROOT) == 0) { + dp_authorized = 1; /* bypass auth check */ + if (lookup_parent(vp, &parent_vp, dp_authorized, context) == 0) { + result = 0; + } + dp_authorized = 0; /* force us to authorize */ } - }; - if (result) goto lookup_err_exit; - }; - - if (vp != targetvp) { - vrele(vp); /* Completely done with that vp now... */ - }; - - vp = parent_vp; - target_id = 0; /* It's unknown at this point */ - - if (((result = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) != 0) && - ((result = VOP_ACCESS(vp, VREAD, p->p_ucred, p)) != 0)) { - VOP_UNLOCK(vp, 0, p); - goto lookup_err_exit; - }; - }; + vp = parent_vp; + } + else { + /* + * this is not a directory so we must get parent object ID + */ + result = get_parentvp(&vp, mp, context); + parent_vp = vp; + } + if (result != 0) + goto err_exit; + } + else { + /* + * we where able to get a reference on v_parent + */ + parent_vp = vp = vp->v_parent; + } + } -#if MAXPLCENTRIES - volfs_PLCEnter(mp->mnt_stat.f_fsid.val[0], id, p->p_ucred->cr_uid, p->p_pid); -#endif + /* + * Keep going up until either the process's root or the process's working + * directory is hit, either one of which are potential valid starting points + * for a full pathname + */ + while (vp != NULLVP) { -lookup_success: - /* Success: the caller has complete access to the initial vnode: */ - result = 0; - - if (vp && vp != targetvp) VOP_UNLOCK(vp, 0, p); - -lookup_err_exit: - if (vp && vp != targetvp) { - vrele(vp); - vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp_id != targetvp->v_id || targetvp->v_type == VBAD) { - result = EAGAIN; /* vnode was recycled */ + result = reverse_lookup(vp, &parent_vp, fdp, context, &dp_authorized); + if (result == 0) { + /* + * we're done and we have access + */ + break; } - }; + if (vp != parent_vp) { + /* + * we where able to walk up the parent chain so now we don't need + * vp any longer + */ + vnode_put(vp); + vp = parent_vp; + } + /* + * we have a referenced vp at this point... if dp_authorized == 1, than + * it's been authorized for search, but v_parent was NULL... + * if dp_authorized == 0, than we need to do the authorization check + * before looking up the parent + */ + if ((vp->v_flag & VROOT) != 0 || + vp == fdp->fd_cdir || vp == fdp->fd_rdir) { + /* + * we're already at the termination point, which implies that + * the authorization check in the cache failed (otherwise we + * would have returned 'done' from "reverse_lookup"... so, + * do the authorization and bail + */ + result = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, context); + goto lookup_exit; + } + result = lookup_parent(vp, &parent_vp, dp_authorized, context); + if (result != 0) { + goto lookup_exit; + } + if (vp != parent_vp) { + /* + * got the parent so now we don't need vp any longer + */ + vnode_put(vp); + vp = parent_vp; + } + } /* while loop */ + + /* + * Success: the caller has complete access to the initial vnode + */ + result = 0; + +lookup_exit: + if (vp != NULLVP && vp != targetvp) { + vnode_put(vp); + } err_exit: -#if 0 - KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 12)) | DBG_FUNC_END, - (unsigned int)targetvp, (unsigned int)mp, (unsigned int)p, result, 0); -#endif return result; }; @@ -1060,33 +698,21 @@ err_exit: * id of filesystem to lookup and pointer to vnode pointer to fill in */ static int -get_fsvnode(our_mount, id, ret_vnode) - struct mount *our_mount; - int id; - struct vnode **ret_vnode; +get_fsvnode(struct mount *our_mount, int id, vnode_t *ret_vnode) { - register struct mount *mp; struct mount *cur_mount; + fsid_t cur_fsid; struct vnode *cur_vnode; struct volfs_vndata *cur_privdata; int retval; - - //DBG_VOP(("volfs: get_fsvnode called\n")); + struct vnode_fsparam vfsp; + int vid = 0; /* * OK, first look up the matching mount on the list of mounted file systems */ - cur_mount = NULL; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) - { - if (validfsnode(mp) && mp->mnt_stat.f_fsid.val[0] == id) - { - cur_mount = mp; - break; - } - } - simple_unlock(&mountlist_slock); + /* the following will return the mount point with vfs_busy held */ + cur_mount = mount_lookupby_volfsid(id, 1); if (cur_mount == NULL) { /* @@ -1100,34 +726,36 @@ get_fsvnode(our_mount, id, ret_vnode) return ENOENT; }; + cur_fsid = cur_mount->mnt_vfsstat.f_fsid; + /* * Now search the list attached to the mount structure to * see if this vnode is already floating around */ search_vnodelist: - cur_vnode = our_mount->mnt_vnodelist.lh_first; - while (cur_vnode != NULL) - { + mount_lock(our_mount); + TAILQ_FOREACH(cur_vnode, &our_mount->mnt_vnodelist, v_mntvnodes) { cur_privdata = (struct volfs_vndata *) cur_vnode->v_data; - if (cur_privdata->nodeID == id) + if (cur_privdata->nodeID == (unsigned int)id) { if (cur_privdata->fs_mount != cur_mount) { - DBG_VOP(("volfs get_fsvnode: Updating fs_mount for vnode 0x%08lX (id = %d) from 0x%08lX to 0x%08lX...\n", - (unsigned long)cur_vnode, - cur_privdata->nodeID, - (unsigned long)cur_privdata->fs_mount, - (unsigned long)cur_mount)); cur_privdata->fs_mount = cur_mount; + cur_privdata->fs_fsid = cur_fsid; }; break; } - cur_vnode = cur_vnode->v_mntvnodes.le_next; - } + } + mount_unlock(our_mount); - //DBG_VOP(("\tfinal cur_mount: 0x%x\n",cur_mount)); if (cur_vnode) { - /* If vget returns an error, cur_vnode will not be what we think it is, try again */ - if (vget(cur_vnode, LK_EXCLUSIVE, current_proc()) != 0) { + vid = vnode_vid(cur_vnode); + + /* + * use vnode_getwithvid since it will wait for a vnode currently being + * terminated... if it returns an error, cur_vnode will not be what we + * think it is, try again + */ + if (vnode_getwithvid(cur_vnode, vid) != 0) { goto search_vnodelist; }; } @@ -1135,27 +763,40 @@ search_vnodelist: { MALLOC(cur_privdata, struct volfs_vndata *, sizeof(struct volfs_vndata), M_VOLFSNODE, M_WAITOK); - retval = getnewvnode(VT_VOLFS, our_mount, volfs_vnodeop_p, &cur_vnode); - if (retval != 0) { - FREE(cur_privdata, M_VOLFSNODE); - return retval; - }; - + cur_privdata->vnode_type = VOLFS_FSNODE; cur_privdata->nodeID = id; cur_privdata->fs_mount = cur_mount; - lockinit(&cur_privdata->lock, PINOD, "volfsnode", 0, 0); - lockmgr(&cur_privdata->lock, LK_EXCLUSIVE, (struct slock *)0, current_proc()); - cur_vnode->v_data = cur_privdata; - cur_vnode->v_type = VDIR; - DBG_VOP(("get_fsvnode returned with new node of ")); - DBG_VOP_PRINT_VNODE_INFO(cur_vnode);DBG_VOP(("\n")); + cur_privdata->fs_fsid = cur_fsid; + + vfsp.vnfs_mp = our_mount; + vfsp.vnfs_vtype = VDIR; + vfsp.vnfs_str = "volfs"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = cur_privdata; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = volfs_vnodeop_p; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = 0; + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = 0; + + retval = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &cur_vnode); + if (retval != 0) { + FREE(cur_privdata, M_VOLFSNODE); + goto out; + }; + cur_vnode->v_tag = VT_VOLFS; + } *ret_vnode = cur_vnode; - - return (0); + retval = 0; +out: + vfs_unbusy(cur_mount); + return (retval); } @@ -1166,11 +807,7 @@ search_vnodelist: * to a vnode pointer */ static int -get_filevnode(parent_fs, id, ret_vnode, p) - struct mount *parent_fs; - u_int id; - struct vnode **ret_vnode; - struct proc *p; +get_filevnode(struct mount *parent_fs, u_int id, vnode_t *ret_vnode, vfs_context_t context) { int retval; @@ -1179,18 +816,18 @@ again: * Special case 2 to mean the root of a file system */ if (id == 2) - retval = VFS_ROOT(parent_fs, ret_vnode); + retval = VFS_ROOT(parent_fs, ret_vnode, context); else - retval = VFS_VGET(parent_fs, &id, ret_vnode); + retval = VFS_VGET(parent_fs, (ino64_t)id, ret_vnode, context); if (retval) goto error; - retval = verify_fullpathaccess(id, *ret_vnode, p); + retval = verify_fullpathaccess(*ret_vnode, context); if (retval) { /* An error was encountered verifying that the caller has, in fact, got access all the way from "/" or their working directory to the specified item... */ - vput(*ret_vnode); + vnode_put(*ret_vnode); *ret_vnode = NULL; /* vnode was recycled during access verification. */ if (retval == EAGAIN) { @@ -1203,382 +840,140 @@ error: } -int -volfs_lookup(ap) - struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode - **a_vpp; struct componentname *a_cnp; } */ *ap; +static int +volfs_lookup(struct vnop_lookup_args *ap) { - struct volfs_vndata *priv_data; - char *cnp; - long namelen; - struct mount *parent_fs; - int unlocked_parent = 0, isdot_or_dotdot = 0; - int ret_err = ENOENT; - DBG_FUNC_NAME("volfs_lookup"); - DBG_VOP_LOCKS_DECL(2); + struct volfs_vndata *priv_data; + char *nameptr; + long namelen; + struct mount *parent_fs; + vnode_t vp; + int isdot_or_dotdot = 0; + int ret_err = ENOENT; + char firstchar; + int ret_val; #if 0 KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 8)) | DBG_FUNC_START, (unsigned int)ap->a_dvp, (unsigned int)ap->a_cnp, (unsigned int)p, 0, 0); #endif - - DBG_VOP(("volfs_lookup called, name = %s, namelen = %ld\n", ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen)); - - DBG_VOP_LOCKS_INIT(0,ap->a_dvp, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS); - DBG_VOP_LOCKS_INIT(1,*ap->a_vpp, VOPDBG_IGNORE, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_POS); - DBG_VOP_PRINT_FUNCNAME();DBG_VOP(("\n")); - DBG_VOP(("\t"));DBG_VOP_PRINT_CPN_INFO(ap->a_cnp);DBG_VOP(("\n")); - if (ap->a_cnp->cn_flags & LOCKPARENT) - DBG_VOP(("\tLOCKPARENT is set\n")); - if (ap->a_cnp->cn_flags & ISLASTCN) - { - DBG_VOP(("\tISLASTCN is set\n")); - if (ap->a_cnp->cn_nameiop == DELETE || ap->a_cnp->cn_nameiop == RENAME) /* XXX PPD Shouldn't we check for CREATE, too? */ - { - ret_err = EROFS; - goto Err_Exit; - } - } priv_data = ap->a_dvp->v_data; - cnp = ap->a_cnp->cn_nameptr; + nameptr = ap->a_cnp->cn_nameptr; namelen = ap->a_cnp->cn_namelen; - -#if VOLFS_DEBUG - switch (priv_data->vnode_type) { - case VOLFS_ROOT: - DBG_VOP(("\tparent directory (vnode 0x%08lX) vnode_type is VOLFS_ROOT.\n", (unsigned long)ap->a_dvp)); - break; - - case VOLFS_FSNODE: - DBG_VOP(("\tparent directory (vnode 0x%08lX) vnode_type is VOLFS_FSNODE, nodeID = %d, fs_mount = 0x%08lX.\n", - (unsigned long)ap->a_dvp, - priv_data->nodeID, - (unsigned long)priv_data->fs_mount)); - - default: - DBG_VOP(("\tparent directory (vnode 0x%08lX) has unknown vnode_type (%d), nodeID = %d.\n", - (unsigned long)ap->a_dvp, - priv_data->vnode_type, - priv_data->nodeID)); - }; -#endif /* VOLFS_DEBUG */ + firstchar = nameptr[0]; - /* first check for "." and ".." */ - if (cnp[0] == '.') - { - if (namelen == 1) - { + /* First check for "." and ".." */ + if (firstchar == '.') { + if (namelen == 1) { /* "." requested */ isdot_or_dotdot = 1; - *ap->a_vpp = ap->a_dvp; - VREF(*ap->a_vpp); - DBG_VOP_LOCKS_TEST(0); - ret_err = 0; - } - else if (cnp[1] == '.' && namelen == 2) - { + *ap->a_vpp = ap->a_dvp; + vnode_get(*ap->a_vpp); + ret_err = 0; + } else if (nameptr[1] == '.' && namelen == 2) { /* ".." requested */ isdot_or_dotdot = 1; - ret_err = volfs_root(ap->a_dvp->v_mount, ap->a_vpp); + ret_err = VFS_ROOT(ap->a_dvp->v_mount, ap->a_vpp, ap->a_context); } - } - - /* then look for special file system root symbol ('@') */ - else if (cnp[0] == '@') - { + } else if (firstchar == '@') { /* '@' is alias for system root */ if ((namelen == 1) && (priv_data->vnode_type != VOLFS_ROOT)) { - parent_fs = priv_data->fs_mount; - if (!(ap->a_cnp->cn_flags & LOCKPARENT) || !(ap->a_cnp->cn_flags & ISLASTCN)) { - VOP_UNLOCK(ap->a_dvp, 0, ap->a_cnp->cn_proc); - unlocked_parent = 1; - }; - ret_err = VFS_ROOT(parent_fs, ap->a_vpp); - } else { - DBG_VOP(("volfs_lookup: pathname = '@' but namelen = %ld and parent vnode_type = %d.\n", namelen, priv_data->vnode_type)); - *ap->a_vpp = NULL; - ret_err = ENOENT; - }; - } - - /* finally, just look for numeric ids... */ - else if (namelen <= 10 && cnp[0] > '0' && cnp[0] <= '9') /* 10 digits max lead digit must be 1 - 9 */ - { + /* the following returns with iteration count on mount point */ + parent_fs = mount_list_lookupby_fsid(&priv_data->fs_fsid, 0, 1); + if (parent_fs) { + ret_val = vfs_busy(parent_fs, LK_NOWAIT); + mount_iterdrop(parent_fs); + if (ret_val !=0) { + *ap->a_vpp = NULL; + ret_err = ENOENT; + } else { + ret_err = VFS_ROOT(parent_fs, ap->a_vpp, ap->a_context); + vfs_unbusy(parent_fs); + } + } else { + *ap->a_vpp = NULL; + ret_err = ENOENT; + } + } else { + *ap->a_vpp = NULL; + ret_err = ENOENT; + } + } else if (namelen <= 10 && firstchar > '0' && firstchar <= '9') { char *check_ptr; u_long id; - id = strtoul(cnp, &check_ptr, 10); + id = strtoul(nameptr, &check_ptr, 10); - /* + /* * strtol will leave us at the first non-numeric character. * we've checked to make sure the component name does * begin with a numeric so check_ptr must wind up on * the terminating null or there was other junk following the * number */ - if ((check_ptr - cnp) == namelen) - { - if (priv_data->vnode_type == VOLFS_ROOT) + if ((check_ptr - nameptr) == namelen) { + if (priv_data->vnode_type == VOLFS_ROOT) { + /* + * OPTIMIZATION + * + * Obtain the mountpoint and call VFS_VGET in + * one step (ie without creating a vnode for + * the mountpoint). + */ + if (check_ptr[0] == '/' && + check_ptr[1] > '0' && check_ptr[1] <= '9') { + struct mount *mp; + struct vnode *vp; + u_long id2; + char *endptr; + + /* this call will return mount point with vfs_busy held */ + mp = mount_lookupby_volfsid(id, 1); + if (mp == NULL) { + *ap->a_vpp = NULL; + return ENOENT; + } + id2 = strtoul(&check_ptr[1], &endptr, 10); + if ((endptr[0] == '/' || endptr[0] == '\0') && + get_filevnode(mp, id2, &vp, ap->a_context) == 0) { + ap->a_cnp->cn_consume = endptr - check_ptr; + *ap->a_vpp = vp; + vfs_unbusy(mp); + return (0); + } + vfs_unbusy(mp); + } + /* Fall through to default behavior... */ + ret_err = get_fsvnode(ap->a_dvp->v_mount, id, ap->a_vpp); - else { - parent_fs = priv_data->fs_mount; - if (!(ap->a_cnp->cn_flags & LOCKPARENT) || !(ap->a_cnp->cn_flags & ISLASTCN)) { - VOP_UNLOCK(ap->a_dvp, 0, ap->a_cnp->cn_proc); - unlocked_parent = 1; - }; - ret_err = get_filevnode(parent_fs, id, ap->a_vpp, ap->a_cnp->cn_proc); - } - } - } - if (!isdot_or_dotdot && *ap->a_vpp && VPARENT(*ap->a_vpp) == NULL && ap->a_dvp != *ap->a_vpp) { - if (VPARENT(ap->a_dvp) == *ap->a_vpp) { - panic("volfs: ap->a_dvp 0x%x has parent == a_vpp 0x%x\n", - ap->a_dvp, *ap->a_vpp); + } else { + parent_fs = mount_list_lookupby_fsid(&priv_data->fs_fsid, 0, 1); + if (parent_fs) { + ret_val = vfs_busy(parent_fs, LK_NOWAIT); + mount_iterdrop(parent_fs); + if (ret_val !=0) { + *ap->a_vpp = NULL; + ret_err = ENOENT; + } else { + ret_err = get_filevnode(parent_fs, id, ap->a_vpp, ap->a_context); + vfs_unbusy(parent_fs); + } + } else { + *ap->a_vpp = NULL; + ret_err = ENOENT; + } + } } - vget(ap->a_dvp, 0, ap->a_cnp->cn_proc); - VPARENT(*ap->a_vpp) = ap->a_dvp; } + vp = *ap->a_vpp; - if (!unlocked_parent && (!(ap->a_cnp->cn_flags & LOCKPARENT) || !(ap->a_cnp->cn_flags & ISLASTCN))) { - VOP_UNLOCK(ap->a_dvp, 0, ap->a_cnp->cn_proc); - }; - - /* XXX PPD Should we do something special in case LOCKLEAF isn't set? */ + if ( ret_err == 0 && !isdot_or_dotdot && (vp != NULLVP) && (vp->v_parent == NULLVP)) + vnode_update_identity(vp, ap->a_dvp, NULL, 0, 0, VNODE_UPDATE_PARENT); -Err_Exit: - - DBG_VOP_UPDATE_VP(1, *ap->a_vpp); - DBG_VOP_LOCKS_TEST(ret_err); - #if 0 KERNEL_DEBUG((FSDBG_CODE(DBG_FSVN, 8)) | DBG_FUNC_START, (unsigned int)ap->a_dvp, (unsigned int)ap->a_cnp, (unsigned int)p, ret_err, 0); #endif - return (ret_err); -} - -#if DBG_VOP_TEST_LOCKS - -#if 0 -static void DbgLookupTest( char *funcname, struct componentname *cnp, struct vnode *dvp, struct vnode *vp) -{ - int flags = cnp->cn_flags; - int nameiop = cnp->cn_nameiop; - - DBG_VOP (("%s: Action:", funcname)); - switch (nameiop) - { - case LOOKUP: - PRINTIT ("LOOKUP"); - break; - case CREATE: - PRINTIT ("CREATE"); - break; - case DELETE: - PRINTIT ("DELETE"); - break; - case RENAME: - PRINTIT ("RENAME"); - break; - default: - PRINTIT ("!!!UNKNOWN!!!!"); - break; - } - PRINTIT(" flags: 0x%x ",flags ); - if (flags & LOCKPARENT) - PRINTIT (" Lock Parent"); - if (flags & ISLASTCN) - PRINTIT (" Last Action"); - PRINTIT("\n"); - - if (dvp) - { - PRINTIT ("%s: Parent vnode exited ", funcname); - if (VOP_ISLOCKED(dvp)) - PRINTIT("LOCKED\n"); - else - PRINTIT("UNLOCKED\n"); - } - if (vp && vp==dvp) - { - PRINTIT ("%s: Found and Parent are the same\n", funcname); - } - else if (vp) - { - PRINTIT ("%s: Found vnode exited ", funcname); - if (VOP_ISLOCKED(vp)) - PRINTIT("LOCKED\n"); - else - PRINTIT("UNLOCKED\n"); - } - else - PRINTIT ("%s: Found vnode exited NULL\n", funcname); - - -} -#endif - -static void DbgVopTest( int maxSlots, - int retval, - VopDbgStoreRec *VopDbgStore, - char *funcname) -{ - int index; - - for (index = 0; index < maxSlots; index++) - { - if (VopDbgStore[index].id != index) { - PRINTIT("%s: DBG_VOP_LOCK: invalid id field (%d) in target entry (#%d).\n", funcname, VopDbgStore[index].id, index); - return; - }; - - if ((VopDbgStore[index].vp != NULL) && - ((VopDbgStore[index].vp->v_data==NULL))) - continue; - - switch (VopDbgStore[index].inState) - { - case VOPDBG_IGNORE: - case VOPDBG_SAME: - /* Do Nothing !!! */ - break; - case VOPDBG_LOCKED: - case VOPDBG_UNLOCKED: - case VOPDBG_LOCKNOTNIL: - { - if (VopDbgStore[index].vp == NULL && (VopDbgStore[index].inState != VOPDBG_LOCKNOTNIL)) { - PRINTIT ("%s: InState check: Null vnode ptr in entry #%d\n", funcname, index); - } else if (VopDbgStore[index].vp != NULL) { - switch (VopDbgStore[index].inState) - { - case VOPDBG_LOCKED: - case VOPDBG_LOCKNOTNIL: - if (VopDbgStore[index].inValue == 0) - { - PRINTIT ("%s: %d Entry: not LOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - case VOPDBG_UNLOCKED: - if (VopDbgStore[index].inValue != 0) - { - PRINTIT ("%s: %d Entry: not UNLOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - } - } - break; - } - default: - PRINTIT ("%s: DBG_VOP_LOCK on entry: bad lock test value: %d\n", funcname, VopDbgStore[index].errState); - } - - - if (retval != 0) - { - switch (VopDbgStore[index].errState) - { - case VOPDBG_IGNORE: - /* Do Nothing !!! */ - break; - case VOPDBG_LOCKED: - case VOPDBG_UNLOCKED: - case VOPDBG_SAME: - { - if (VopDbgStore[index].vp == NULL) { - PRINTIT ("%s: ErrState check: Null vnode ptr in entry #%d\n", funcname, index); - } else { - VopDbgStore[index].outValue = VOP_ISLOCKED(VopDbgStore[index].vp); - switch (VopDbgStore[index].errState) - { - case VOPDBG_LOCKED: - if (VopDbgStore[index].outValue == 0) - { - PRINTIT ("%s: %d Error: not LOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - case VOPDBG_UNLOCKED: - if (VopDbgStore[index].outValue != 0) - { - PRINTIT ("%s: %d Error: not UNLOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - case VOPDBG_SAME: - if (VopDbgStore[index].outValue != VopDbgStore[index].inValue) - PRINTIT ("%s: Error: In/Out locks are DIFFERENT: 0x%x, inis %d and out is %d\n", funcname, (u_int)VopDbgStore[index].vp, VopDbgStore[index].inValue, VopDbgStore[index].outValue); - break; - } - } - break; - } - case VOPDBG_LOCKNOTNIL: - if (VopDbgStore[index].vp != NULL) { - VopDbgStore[index].outValue = VOP_ISLOCKED(VopDbgStore[index].vp); - if (VopDbgStore[index].outValue == 0) - PRINTIT ("%s: Error: %d Not LOCKED: 0x%x\n", funcname, index, (u_int)VopDbgStore[index].vp); - } - break; - default: - PRINTIT ("%s: Error: bad lock test value: %d\n", funcname, VopDbgStore[index].errState); - } - } - else - { - switch (VopDbgStore[index].outState) - { - case VOPDBG_IGNORE: - /* Do Nothing !!! */ - break; - case VOPDBG_LOCKED: - case VOPDBG_UNLOCKED: - case VOPDBG_SAME: - if (VopDbgStore[index].vp == NULL) { - PRINTIT ("%s: OutState: Null vnode ptr in entry #%d\n", funcname, index); - }; - if (VopDbgStore[index].vp != NULL) - { - VopDbgStore[index].outValue = VOP_ISLOCKED(VopDbgStore[index].vp); - switch (VopDbgStore[index].outState) - { - case VOPDBG_LOCKED: - if (VopDbgStore[index].outValue == 0) - { - PRINTIT ("%s: %d Out: not LOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - case VOPDBG_UNLOCKED: - if (VopDbgStore[index].outValue != 0) - { - PRINTIT ("%s: %d Out: not UNLOCKED:", funcname, index); DBG_VOP(("\n")); - } - break; - case VOPDBG_SAME: - if (VopDbgStore[index].outValue != VopDbgStore[index].inValue) - PRINTIT ("%s: Out: In/Out locks are DIFFERENT: 0x%x, inis %d and out is %d\n", funcname, (u_int)VopDbgStore[index].vp, VopDbgStore[index].inValue, VopDbgStore[index].outValue); - break; - } - } - break; - case VOPDBG_LOCKNOTNIL: - if (VopDbgStore[index].vp != NULL) { - if (&((struct volfs_vndata *)(VopDbgStore[index].vp->v_data))->lock == NULL) - PRINTIT ("%s: DBG_VOP_LOCK on out: Null lock on vnode 0x%x\n", funcname, (u_int)VopDbgStore[index].vp); - else { - VopDbgStore[index].outValue = VOP_ISLOCKED(VopDbgStore[index].vp); - if (VopDbgStore[index].outValue == 0) - { - PRINTIT ("%s: DBG_VOP_LOCK on out: Should be LOCKED:", funcname); DBG_VOP(("\n")); - } - } - } - break; - default: - PRINTIT ("%s: DBG_VOP_LOCK on out: bad lock test value: %d\n", funcname, VopDbgStore[index].outState); - } - } - - VopDbgStore[index].id = -1; /* Invalidate the entry to allow panic-free re-use */ - } + return (ret_err); } -#endif /* DBG_VOP_TEST_LOCKS */ - diff --git a/bsd/net/Makefile b/bsd/net/Makefile index 304c2be7d..e29e3a849 100644 --- a/bsd/net/Makefile +++ b/bsd/net/Makefile @@ -20,29 +20,37 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES= \ - bpf.h bpf_compat.h bpfdesc.h dlil.h dlil_pvt.h \ - etherdefs.h ethernet.h if.h if_arp.h \ + bpf.h dlil.h \ + ethernet.h if.h if_arp.h \ if_dl.h if_llc.h if_media.h if_mib.h \ - if_slvar.h \ - if_types.h if_var.h iso88025.h \ - kext_net.h ndrv.h net_osdep.h netisr.h pfkeyv2.h \ - radix.h raw_cb.h route.h slcompress.h slip.h + if_types.h if_var.h \ + kext_net.h ndrv.h pfkeyv2.h \ + route.h + +KERNELFILES= \ + kpi_interface.h kpi_interfacefilter.h kpi_protocol.h \ + if_ether.h init.h PRIVATE_DATAFILES = \ - ndrv_var.h zlib.h if_pppvar.h if_sppp.h ppp_comp.h if_atm.h \ - if_tun.h if_vlan_var.h if_ppp.h firewire.h ppp_defs.h + if_atm.h if_vlan_var.h if_ppp.h firewire.h \ + ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \ + raw_cb.h etherdefs.h iso88025.h + +PRIVATE_KERNELFILES = ${KERNELFILES} \ + bpfdesc.h dlil_pvt.h if_faith.h ppp_comp.h \ + zlib.h bpf_compat.h net_osdep.h INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = net -EXPORT_MI_LIST = ${INSTALL_MI_LIST} +EXPORT_MI_LIST = ${INSTALL_MI_LIST} ${KERNELFILES} EXPORT_MI_DIR = ${INSTALL_MI_DIR} INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index 1da778677..3d025ccbc 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ #include #include #include +#include #if defined(sparc) && BSD < 199103 #include @@ -100,10 +101,14 @@ #include #include -#include +#include #include #include +#include + +extern int tvtohz(struct timeval *); + #if NBPFILTER > 0 /* @@ -126,12 +131,15 @@ static caddr_t bpf_alloc(); /* * The default read buffer size is patchable. */ -static int bpf_bufsize = BPF_BUFSIZE; +static unsigned int bpf_bufsize = BPF_BUFSIZE; SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW, &bpf_bufsize, 0, ""); -static int bpf_maxbufsize = BPF_MAXBUFSIZE; +static unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE; SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW, &bpf_maxbufsize, 0, ""); +static unsigned int bpf_maxdevices = 256; +SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW, + &bpf_maxdevices, 0, ""); /* * bpf_iflist is the list of interfaces; each corresponds to an ifnet @@ -143,42 +151,48 @@ static struct bpf_if *bpf_iflist; * BSD now stores the bpf_d in the dev_t which is a struct * on their system. Our dev_t is an int, so we still store * the bpf_d in a separate table indexed by minor device #. + * + * The value stored in bpf_dtab[n] represent three states: + * 0: device not opened + * 1: device opening or closing + * other: device opened with pointer to storage */ static struct bpf_d **bpf_dtab = NULL; -static int bpf_dtab_size = 0; -static int nbpfilter = 0; +static unsigned int bpf_dtab_size = 0; +static unsigned int nbpfilter = 0; + +static lck_mtx_t *bpf_mlock; +static lck_grp_t *bpf_mlock_grp; +static lck_grp_attr_t *bpf_mlock_grp_attr; +static lck_attr_t *bpf_mlock_attr; /* * Mark a descriptor free by making it point to itself. * This is probably cheaper than marking with a constant since * the address should be in a register anyway. */ -#define D_ISFREE(d) ((d) == (d)->bd_next) -#define D_MARKFREE(d) ((d)->bd_next = (d)) -#define D_MARKUSED(d) ((d)->bd_next = 0) #endif /* __APPLE__ */ -static int bpf_allocbufs __P((struct bpf_d *)); -static void bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp)); -static void bpf_detachd __P((struct bpf_d *d)); -static void bpf_freed __P((struct bpf_d *)); -static void bpf_mcopy __P((const void *, void *, size_t)); -static int bpf_movein __P((struct uio *, int, - struct mbuf **, struct sockaddr *, int *)); -static int bpf_setif __P((struct bpf_d *, struct ifreq *)); -static inline void - bpf_wakeup __P((struct bpf_d *)); -static void catchpacket __P((struct bpf_d *, u_char *, u_int, - u_int, void (*)(const void *, void *, size_t))); -static void reset_d __P((struct bpf_d *)); -static int bpf_setf __P((struct bpf_d *, struct bpf_program *)); +static int bpf_allocbufs(struct bpf_d *); +static void bpf_attachd(struct bpf_d *d, struct bpf_if *bp); +static void bpf_detachd(struct bpf_d *d); +static void bpf_freed(struct bpf_d *); +static void bpf_mcopy(const void *, void *, size_t); +static int bpf_movein(struct uio *, int, + struct mbuf **, struct sockaddr *, int *); +static int bpf_setif(struct bpf_d *, struct ifreq *); +static void bpf_wakeup(struct bpf_d *); +static void catchpacket(struct bpf_d *, u_char *, u_int, + u_int, void (*)(const void *, void *, size_t)); +static void reset_d(struct bpf_d *); +static int bpf_setf(struct bpf_d *, struct user_bpf_program *); /*static void *bpf_devfs_token[MAXBPFILTER];*/ static int bpf_devsw_installed; -void bpf_init __P((void *unused)); - +void bpf_init(void *unused); +int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m); /* * Darwin differs from BSD here, the following are static @@ -188,15 +202,9 @@ void bpf_init __P((void *unused)); d_close_t bpfclose; d_read_t bpfread; d_write_t bpfwrite; - d_ioctl_t bpfioctl; + ioctl_fcn_t bpfioctl; select_fcn_t bpfpoll; -#ifdef __APPLE__ -void bpf_mtap(struct ifnet *, struct mbuf *); - -int bpfopen(), bpfclose(), bpfread(), bpfwrite(), bpfioctl(), - bpfpoll(); -#endif /* Darwin's cdevsw struct differs slightly from BSDs */ #define CDEV_MAJOR 23 @@ -206,98 +214,101 @@ static struct cdevsw bpf_cdevsw = { /* read */ bpfread, /* write */ bpfwrite, /* ioctl */ bpfioctl, - /* stop */ nulldev, - /* reset */ nulldev, - /* tty */ NULL, + /* stop */ eno_stop, + /* reset */ eno_reset, + /* tty */ NULL, /* select */ bpfpoll, - /* mmap */ eno_mmap, + /* mmap */ eno_mmap, /* strategy*/ eno_strat, - /* getc */ eno_getc, - /* putc */ eno_putc, - /* type */ 0 + /* getc */ eno_getc, + /* putc */ eno_putc, + /* type */ 0 }; #define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data) static int -bpf_movein(uio, linktype, mp, sockp, datlen) - register struct uio *uio; - int linktype, *datlen; - register struct mbuf **mp; - register struct sockaddr *sockp; +bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *sockp, int *datlen) { struct mbuf *m; int error; int len; int hlen; - /* - * Build a sockaddr based on the data link layer type. - * We do this at this level because the ethernet header - * is copied directly into the data field of the sockaddr. - * In the case of SLIP, there is no header and the packet - * is forwarded as is. - * Also, we are careful to leave room at the front of the mbuf - * for the link level header. - */ - switch (linktype) { - - case DLT_SLIP: - sockp->sa_family = AF_INET; - hlen = 0; - break; - - case DLT_EN10MB: - sockp->sa_family = AF_UNSPEC; - /* XXX Would MAXLINKHDR be better? */ - hlen = sizeof(struct ether_header); - break; - - case DLT_FDDI: -#if defined(__FreeBSD__) || defined(__bsdi__) - sockp->sa_family = AF_IMPLINK; - hlen = 0; -#else - sockp->sa_family = AF_UNSPEC; - /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */ - hlen = 24; -#endif - break; - - case DLT_RAW: - case DLT_NULL: - sockp->sa_family = AF_UNSPEC; - hlen = 0; - break; - -#ifdef __FreeBSD__ - case DLT_ATM_RFC1483: + if (sockp) { /* - * en atm driver requires 4-byte atm pseudo header. - * though it isn't standard, vpi:vci needs to be - * specified anyway. + * Build a sockaddr based on the data link layer type. + * We do this at this level because the ethernet header + * is copied directly into the data field of the sockaddr. + * In the case of SLIP, there is no header and the packet + * is forwarded as is. + * Also, we are careful to leave room at the front of the mbuf + * for the link level header. */ - sockp->sa_family = AF_UNSPEC; - hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ - break; -#endif - case DLT_PPP: - sockp->sa_family = AF_UNSPEC; - hlen = 4; /* This should match PPP_HDRLEN */ - break; - - case DLT_APPLE_IP_OVER_IEEE1394: - sockp->sa_family = AF_UNSPEC; - hlen = sizeof(struct firewire_header); - break; - - default: - return (EIO); + switch (linktype) { + + case DLT_SLIP: + sockp->sa_family = AF_INET; + hlen = 0; + break; + + case DLT_EN10MB: + sockp->sa_family = AF_UNSPEC; + /* XXX Would MAXLINKHDR be better? */ + hlen = sizeof(struct ether_header); + break; + + case DLT_FDDI: + #if defined(__FreeBSD__) || defined(__bsdi__) + sockp->sa_family = AF_IMPLINK; + hlen = 0; + #else + sockp->sa_family = AF_UNSPEC; + /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */ + hlen = 24; + #endif + break; + + case DLT_RAW: + case DLT_NULL: + sockp->sa_family = AF_UNSPEC; + hlen = 0; + break; + + #ifdef __FreeBSD__ + case DLT_ATM_RFC1483: + /* + * en atm driver requires 4-byte atm pseudo header. + * though it isn't standard, vpi:vci needs to be + * specified anyway. + */ + sockp->sa_family = AF_UNSPEC; + hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ + break; + #endif + case DLT_PPP: + sockp->sa_family = AF_UNSPEC; + hlen = 4; /* This should match PPP_HDRLEN */ + break; + + case DLT_APPLE_IP_OVER_IEEE1394: + sockp->sa_family = AF_UNSPEC; + hlen = sizeof(struct firewire_header); + break; + + default: + return (EIO); + } + if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) { + return (EIO); + } } - if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) { - return (EIO); + else { + hlen = 0; } - len = uio->uio_resid; + + // LP64todo - fix this! + len = uio_resid(uio); *datlen = len - hlen; if ((unsigned)len > MCLBYTES) return (EIO); @@ -305,7 +316,7 @@ bpf_movein(uio, linktype, mp, sockp, datlen) MGETHDR(m, M_WAIT, MT_DATA); if (m == 0) return (ENOBUFS); - if (len > MHLEN) { + if ((unsigned)len > MHLEN) { #if BSD >= 199103 MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) { @@ -347,76 +358,71 @@ bpf_movein(uio, linktype, mp, sockp, datlen) /* Callback registered with Ethernet driver. */ int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m) { - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - /* * Do nothing if the BPF tap has been turned off. * This is to protect from a potential race where this - * call blocks on the funnel lock. And in the meantime + * call blocks on the lock. And in the meantime * BPF is turned off, which will clear if_bpf. */ if (ifp->if_bpf) bpf_mtap(ifp, m); - - thread_funnel_set(network_flock, funnel_state); return 0; } /* - * Returns 1 on sucess, 0 on failure + * The dynamic addition of a new device node must block all processes that are opening + * the last device so that no process will get an unexpected ENOENT */ -static int -bpf_dtab_grow(int increment) +static void +bpf_make_dev_t(int maj) { - struct bpf_d **new_dtab = NULL; - - new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * (bpf_dtab_size + increment), M_DEVBUF, M_WAIT); - if (new_dtab == NULL) - return 0; - - if (bpf_dtab) { - struct bpf_d **old_dtab; + static int bpf_growing = 0; + unsigned int cur_size = nbpfilter, i; - bcopy(bpf_dtab, new_dtab, sizeof(struct bpf_d *) * bpf_dtab_size); - /* - * replace must be atomic with respect to free do bpf_dtab - * is always valid. - */ - old_dtab = bpf_dtab; - bpf_dtab = new_dtab; - _FREE(old_dtab, M_DEVBUF); - } - else bpf_dtab = new_dtab; - - bzero(bpf_dtab + bpf_dtab_size, sizeof(struct bpf_d *) * increment); - - bpf_dtab_size += increment; - - return 1; -} + if (nbpfilter >= bpf_maxdevices) + return; -static struct bpf_d * -bpf_make_dev_t(int maj) -{ - struct bpf_d *d; + while (bpf_growing) { + /* Wait until new device has been created */ + (void)tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0); + } + if (nbpfilter > cur_size) { + /* other thread grew it already */ + return; + } + bpf_growing = 1; - if (nbpfilter >= bpf_dtab_size && bpf_dtab_grow(NBPFILTER) == 0) - return NULL; - - d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT); - if (d != NULL) { - int i = nbpfilter++; - - bzero(d, sizeof(struct bpf_d)); - bpf_dtab[i] = d; - D_MARKFREE(bpf_dtab[i]); - /*bpf_devfs_token[i] = */devfs_make_node(makedev(maj, i), - DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600, - "bpf%d", i); + /* need to grow bpf_dtab first */ + if (nbpfilter == bpf_dtab_size) { + int new_dtab_size; + struct bpf_d **new_dtab = NULL; + struct bpf_d **old_dtab = NULL; + + new_dtab_size = bpf_dtab_size + NBPFILTER; + new_dtab = (struct bpf_d **)_MALLOC(sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT); + if (new_dtab == 0) { + printf("bpf_make_dev_t: malloc bpf_dtab failed\n"); + goto done; + } + if (bpf_dtab) { + bcopy(bpf_dtab, new_dtab, + sizeof(struct bpf_d *) * bpf_dtab_size); + } + bzero(new_dtab + bpf_dtab_size, + sizeof(struct bpf_d *) * NBPFILTER); + old_dtab = bpf_dtab; + bpf_dtab = new_dtab; + bpf_dtab_size = new_dtab_size; + if (old_dtab != NULL) + _FREE(old_dtab, M_DEVBUF); } - return d; + i = nbpfilter++; + (void) devfs_make_node(makedev(maj, i), + DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600, + "bpf%d", i); +done: + bpf_growing = 0; + wakeup((caddr_t)&bpf_growing); } #endif @@ -426,9 +432,7 @@ bpf_make_dev_t(int maj) * Must be called at splimp. */ static void -bpf_attachd(d, bp) - struct bpf_d *d; - struct bpf_if *bp; +bpf_attachd(struct bpf_d *d, struct bpf_if *bp) { /* * Point d at bp, and add d to the interface's list of listeners. @@ -442,8 +446,7 @@ bpf_attachd(d, bp) bp->bif_ifp->if_bpf = bp; #ifdef __APPLE__ - if (bp->bif_ifp->if_set_bpf_tap) - (*bp->bif_ifp->if_set_bpf_tap)(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback); + dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback); #endif } @@ -451,8 +454,7 @@ bpf_attachd(d, bp) * Detach a file from its interface. */ static void -bpf_detachd(d) - struct bpf_d *d; +bpf_detachd(struct bpf_d *d) { struct bpf_d **p; struct bpf_if *bp; @@ -470,14 +472,14 @@ bpf_detachd(d) */ if (d->bd_promisc) { d->bd_promisc = 0; - if (ifpromisc(bp->bif_ifp, 0)) + if (ifnet_set_promiscuous(bp->bif_ifp, 0)) /* * Something is really wrong if we were able to put * the driver into promiscuous mode, but can't * take it out. * Most likely the network interface is gone. */ - printf("bpf: ifpromisc failed"); + printf("bpf: ifnet_set_promiscuous failed"); } /* Remove d from the interface's descriptor list. */ p = &bp->bif_dlist; @@ -505,58 +507,57 @@ bpf_detachd(d) */ /* ARGSUSED */ int -bpfopen(dev, flags, fmt, p) - dev_t dev; - int flags; - int fmt; - struct proc *p; +bpfopen(dev_t dev, __unused int flags, __unused int fmt, __unused struct proc *p) { register struct bpf_d *d; -#ifdef __APPLE__ - /* new device nodes on demand when opening the last one */ - if (minor(dev) == nbpfilter - 1) - bpf_make_dev_t(major(dev)); - - if (minor(dev) >= nbpfilter) + if ((unsigned int) minor(dev) >= nbpfilter) return (ENXIO); - - d = bpf_dtab[minor(dev)]; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); -#else - if (p->p_prison) - return (EPERM); + + /* + * New device nodes are created on demand when opening the last one. + * The programming model is for processes to loop on the minor starting at 0 + * as long as EBUSY is returned. The loop stops when either the open succeeds or + * an error other that EBUSY is returned. That means that bpf_make_dev_t() must + * block all processes that are opening the last node. If not all + * processes are blocked, they could unexpectedly get ENOENT and abort their + * opening loop. + */ + if ((unsigned int) minor(dev) == (nbpfilter - 1)) + bpf_make_dev_t(major(dev)); - d = dev->si_drv1; -#endif /* * Each minor can be opened by only one process. If the requested * minor is in use, return EBUSY. + * + * Important: bpfopen() and bpfclose() have to check and set the status of a device + * in the same lockin context otherwise the device may be leaked because the vnode use count + * will be unpextectly greater than 1 when close() is called. */ -#ifdef __APPLE__ - if (!D_ISFREE(d)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - return (EBUSY); + if (bpf_dtab[minor(dev)] == 0) + bpf_dtab[minor(dev)] = (void *)1; /* Mark opening */ + else + return (EBUSY); + + d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF, M_WAIT); + if (d == NULL) { + /* this really is a catastrophic failure */ + printf("bpfopen: malloc bpf_d failed\n"); + bpf_dtab[minor(dev)] = 0; + return ENOMEM; } + bzero(d, sizeof(struct bpf_d)); - /* Mark "free" and do most initialization. */ - bzero((char *)d, sizeof(*d)); -#else - if (d) - return (EBUSY); - make_dev(&bpf_cdevsw, minor(dev), 0, 0, 0600, "bpf%d", lminor(dev)); - MALLOC(d, struct bpf_d *, sizeof(*d), M_BPF, M_WAITOK); - bzero(d, sizeof(*d)); - dev->si_drv1 = d; -#endif + /* + * It is not necessary to take the BPF lock here because no other + * thread can access the device until it is marked opened... + */ + + /* Mark "in use" and do most initialization. */ d->bd_bufsize = bpf_bufsize; d->bd_sig = SIGIO; d->bd_seesent = 1; - -#ifdef __APPLE__ - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); -#endif + bpf_dtab[minor(dev)] = d; /* Mark opened */ return (0); } @@ -567,93 +568,49 @@ bpfopen(dev, flags, fmt, p) */ /* ARGSUSED */ int -bpfclose(dev, flags, fmt, p) - dev_t dev; - int flags; - int fmt; - struct proc *p; +bpfclose(dev_t dev, __unused int flags, __unused int fmt, __unused struct proc *p) { register struct bpf_d *d; - register int s; -#ifdef __APPLE__ - struct bpf_d **bpf_dtab_schk; -#endif -#ifndef __APPLE__ - funsetown(d->bd_sigio); -#endif - s = splimp(); -#ifdef __APPLE__ -again: d = bpf_dtab[minor(dev)]; - bpf_dtab_schk = bpf_dtab; -#endif - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if (d == 0 || d == (void *)1) + return (ENXIO); + + bpf_dtab[minor(dev)] = (void *)1; /* Mark closing */ -#ifdef __APPLE__ - /* - * If someone grows bpf_dtab[] while we were waiting for the - * funnel, then we will be pointing off into freed memory; - * check to see if this is the case. - */ - if (bpf_dtab_schk != bpf_dtab) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - goto again; - } -#endif + /* Take BPF lock to ensure no other thread is using the device */ + lck_mtx_lock(bpf_mlock); if (d->bd_bif) bpf_detachd(d); - splx(s); -#ifdef __APPLE__ selthreadclear(&d->bd_sel); -#endif bpf_freed(d); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + + lck_mtx_unlock(bpf_mlock); + + /* Mark free in same context as bpfopen comes to check */ + bpf_dtab[minor(dev)] = 0; /* Mark closed */ + _FREE(d, M_DEVBUF); + return (0); } -/* - * Support for SunOS, which does not have tsleep. - */ -#if BSD < 199103 -static -bpf_timeout(arg) - caddr_t arg; -{ - boolean_t funnel_state; - struct bpf_d *d = (struct bpf_d *)arg; - funnel_state = thread_funnel_set(network_flock, TRUE); - d->bd_timedout = 1; - wakeup(arg); - (void) thread_funnel_set(network_flock, FALSE); -} -#define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan) +#define BPF_SLEEP bpf_sleep -int -bpf_sleep(d) - register struct bpf_d *d; +static int +bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo) { - register int rto = d->bd_rtout; register int st; - if (rto != 0) { - d->bd_timedout = 0; - timeout(bpf_timeout, (caddr_t)d, rto); - } - st = sleep((caddr_t)d, PRINET|PCATCH); - if (rto != 0) { - if (d->bd_timedout == 0) - untimeout(bpf_timeout, (caddr_t)d); - else if (st == 0) - return EWOULDBLOCK; - } - return (st != 0) ? EINTR : 0; + lck_mtx_unlock(bpf_mlock); + + st = tsleep((caddr_t)d, pri, wmesg, timo); + + lck_mtx_lock(bpf_mlock); + + return st; } -#else -#define BPF_SLEEP tsleep -#endif /* * Rotate the packet buffers in descriptor d. Move the store buffer @@ -670,25 +627,26 @@ bpf_sleep(d) * bpfread - read next chunk of packets from buffers */ int -bpfread(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; +bpfread(dev_t dev, struct uio *uio, int ioflag) { register struct bpf_d *d; int error; int s; d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + + lck_mtx_lock(bpf_mlock); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); /* * Restrict application to use a buffer the same size as * as kernel buffers. */ + // LP64todo - fix this if (uio->uio_resid != d->bd_bufsize) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (EINVAL); } @@ -717,18 +675,18 @@ bpfread(dev, uio, ioflag) */ if (d->bd_bif == NULL) { splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (ENXIO); } if (ioflag & IO_NDELAY) error = EWOULDBLOCK; else - error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf", + error = BPF_SLEEP(d, PRINET|PCATCH, "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (error); } if (error == EWOULDBLOCK) { @@ -747,7 +705,7 @@ bpfread(dev, uio, ioflag) if (d->bd_slen == 0) { splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (0); } ROTATE_BUFFERS(d); @@ -771,7 +729,7 @@ bpfread(dev, uio, ioflag) d->bd_hbuf = 0; d->bd_hlen = 0; splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (error); } @@ -779,9 +737,8 @@ bpfread(dev, uio, ioflag) /* * If there are processes sleeping on this descriptor, wake them up. */ -static inline void -bpf_wakeup(d) - register struct bpf_d *d; +static void +bpf_wakeup(struct bpf_d *d) { wakeup((caddr_t)d); if (d->bd_async && d->bd_sig && d->bd_sigio) @@ -806,57 +763,54 @@ bpf_wakeup(d) #define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header)) int -bpfwrite(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; +bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) { register struct bpf_d *d; struct ifnet *ifp; struct mbuf *m; - int error, s; + int error; char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN]; int datlen; d = bpf_dtab[minor(dev)]; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if (d == 0 || d == (void *)1) + return (ENXIO); + + lck_mtx_lock(bpf_mlock); if (d->bd_bif == 0) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (ENXIO); } ifp = d->bd_bif->bif_ifp; if (uio->uio_resid == 0) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (0); } ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf); error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, - (struct sockaddr *)dst_buf, &datlen); + d->bd_hdrcmplt ? 0 : (struct sockaddr *)dst_buf, &datlen); if (error) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (error); } - if (datlen > ifp->if_mtu) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if ((unsigned)datlen > ifp->if_mtu) { + lck_mtx_unlock(bpf_mlock); return (EMSGSIZE); } + lck_mtx_unlock(bpf_mlock); + if (d->bd_hdrcmplt) { - ((struct sockaddr *)dst_buf)->sa_family = pseudo_AF_HDRCMPLT; + error = dlil_output(ifp, 0, m, NULL, NULL, 1); } - - s = splnet(); - - error = dlil_output(ifptodlt(ifp, PF_INET), m, - (caddr_t) 0, (struct sockaddr *)dst_buf, 0); - - splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + else { + error = dlil_output(ifp, PF_INET, m, NULL, (struct sockaddr *)dst_buf, 0); + } + /* * The driver frees the mbuf. */ @@ -868,8 +822,7 @@ bpfwrite(dev, uio, ioflag) * receive and drop counts. Should be called at splimp. */ static void -reset_d(d) - struct bpf_d *d; +reset_d(struct bpf_d *d) { if (d->bd_hbuf) { /* Free the hold buffer. */ @@ -904,19 +857,16 @@ reset_d(d) */ /* ARGSUSED */ int -bpfioctl(dev, cmd, addr, flags, p) - dev_t dev; - u_long cmd; - caddr_t addr; - int flags; - struct proc *p; +bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, struct proc *p) { register struct bpf_d *d; int s, error = 0; d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_mtx_lock(bpf_mlock); switch (cmd) { @@ -949,7 +899,7 @@ bpfioctl(dev, cmd, addr, flags, p) error = EINVAL; else { ifp = d->bd_bif->bif_ifp; - error = (*ifp->if_ioctl)(ifp, cmd, addr); + error = dlil_ioctl(0, ifp, cmd, addr); } break; } @@ -986,7 +936,18 @@ bpfioctl(dev, cmd, addr, flags, p) * Set link layer read filter. */ case BIOCSETF: - error = bpf_setf(d, (struct bpf_program *)addr); + if (proc_is64bit(p)) { + error = bpf_setf(d, (struct user_bpf_program *)addr); + } + else { + struct bpf_program * tmpp; + struct user_bpf_program tmp; + + tmpp = (struct bpf_program *)addr; + tmp.bf_len = tmpp->bf_len; + tmp.bf_insns = CAST_USER_ADDR_T(tmpp->bf_insns); + error = bpf_setf(d, &tmp); + } break; /* @@ -1011,7 +972,7 @@ bpfioctl(dev, cmd, addr, flags, p) } s = splimp(); if (d->bd_promisc == 0) { - error = ifpromisc(d->bd_bif->bif_ifp, 1); + error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1); if (error == 0) d->bd_promisc = 1; } @@ -1175,7 +1136,9 @@ bpfioctl(dev, cmd, addr, flags, p) *(u_int *)addr = d->bd_sig; break; } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + + lck_mtx_unlock(bpf_mlock); + return (error); } @@ -1184,16 +1147,14 @@ bpfioctl(dev, cmd, addr, flags, p) * free it and replace it. Returns EINVAL for bogus requests. */ static int -bpf_setf(d, fp) - struct bpf_d *d; - struct bpf_program *fp; +bpf_setf(struct bpf_d *d, struct user_bpf_program *fp) { struct bpf_insn *fcode, *old; u_int flen, size; int s; old = d->bd_filter; - if (fp->bf_insns == 0) { + if (fp->bf_insns == USER_ADDR_NULL) { if (fp->bf_len != 0) return (EINVAL); s = splimp(); @@ -1208,13 +1169,13 @@ bpf_setf(d, fp) if (flen > BPF_MAXINSNS) return (EINVAL); - size = flen * sizeof(*fp->bf_insns); + size = flen * sizeof(struct bpf_insn); fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT); #ifdef __APPLE__ if (fcode == NULL) return (ENOBUFS); #endif - if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && + if (copyin(fp->bf_insns, (caddr_t)fcode, size) == 0 && bpf_validate(fcode, (int)flen)) { s = splimp(); d->bd_filter = fcode; @@ -1235,9 +1196,7 @@ bpf_setf(d, fp) * Return an errno or 0. */ static int -bpf_setif(d, ifr) - struct bpf_d *d; - struct ifreq *ifr; +bpf_setif(struct bpf_d *d, struct ifreq *ifr) { struct bpf_if *bp; int s, error; @@ -1295,24 +1254,23 @@ bpf_setif(d, ifr) * Otherwise, return false but make a note that a selwakeup() must be done. */ int -bpfpoll(dev, events, wql, p) - register dev_t dev; - int events; - void * wql; - struct proc *p; +bpfpoll(dev_t dev, int events, void * wql, struct proc *p) { register struct bpf_d *d; register int s; int revents = 0; d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + + lck_mtx_lock(bpf_mlock); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); /* * An imitation of the FIONREAD ioctl code. */ if (d->bd_bif == NULL) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_mtx_unlock(bpf_mlock); return (ENXIO); } @@ -1324,7 +1282,8 @@ bpfpoll(dev, events, wql, p) selrecord(p, &d->bd_sel, wql); } splx(s); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + + lck_mtx_unlock(bpf_mlock); return (revents); } @@ -1335,10 +1294,7 @@ bpfpoll(dev, events, wql, p) * buffer. */ void -bpf_tap(ifp, pkt, pktlen) - struct ifnet *ifp; - register u_char *pkt; - register u_int pktlen; +bpf_tap(struct ifnet *ifp, u_char *pkt, u_int pktlen) { struct bpf_if *bp; register struct bpf_d *d; @@ -1348,20 +1304,21 @@ bpf_tap(ifp, pkt, pktlen) * The only problem that could arise here is that if two different * interfaces shared any data. This is not the case. */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_mtx_lock(bpf_mlock); + bp = ifp->if_bpf; #ifdef __APPLE__ if (bp) { #endif - for (d = bp->bif_dlist; d != 0; d = d->bd_next) { - ++d->bd_rcount; - slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); - if (slen != 0) - catchpacket(d, pkt, pktlen, slen, bcopy); - } + for (d = bp->bif_dlist; d != 0; d = d->bd_next) { + ++d->bd_rcount; + slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); + if (slen != 0) + catchpacket(d, pkt, pktlen, slen, bcopy); + } #ifdef __APPLE__ } - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_mtx_unlock(bpf_mlock); #endif } @@ -1370,13 +1327,10 @@ bpf_tap(ifp, pkt, pktlen) * from m_copydata in sys/uipc_mbuf.c. */ static void -bpf_mcopy(src_arg, dst_arg, len) - const void *src_arg; - void *dst_arg; - register size_t len; +bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) { - register const struct mbuf *m; - register u_int count; + const struct mbuf *m; + u_int count; u_char *dst; m = src_arg; @@ -1385,7 +1339,7 @@ bpf_mcopy(src_arg, dst_arg, len) if (m == 0) panic("bpf_mcopy"); count = min(m->m_len, len); - bcopy(mtod((struct mbuf *)m, void *), dst, count); + bcopy(mtod(m, const void *), dst, count); m = m->m_next; dst += count; len -= count; @@ -1396,27 +1350,32 @@ bpf_mcopy(src_arg, dst_arg, len) * Incoming linkage from device drivers, when packet is in an mbuf chain. */ void -bpf_mtap(ifp, m) - struct ifnet *ifp; - struct mbuf *m; +bpf_mtap(struct ifnet *ifp, struct mbuf *m) { - struct bpf_if *bp = ifp->if_bpf; + struct bpf_if *bp; struct bpf_d *d; u_int pktlen, slen; struct mbuf *m0; + lck_mtx_lock(bpf_mlock); + + bp = ifp->if_bpf; + if (bp) { pktlen = 0; for (m0 = m; m0 != 0; m0 = m0->m_next) pktlen += m0->m_len; - - for (d = bp->bif_dlist; d != 0; d = d->bd_next) { - if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL)) - continue; - ++d->bd_rcount; - slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0); - if (slen != 0) - catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy); + + for (d = bp->bif_dlist; d != 0; d = d->bd_next) { + if (!d->bd_seesent && (m->m_pkthdr.rcvif == NULL)) + continue; + ++d->bd_rcount; + slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0); + if (slen != 0) + catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy); + } } + + lck_mtx_unlock(bpf_mlock); } /* @@ -1428,11 +1387,8 @@ bpf_mtap(ifp, m) * pkt is really an mbuf. */ static void -catchpacket(d, pkt, pktlen, snaplen, cpfn) - register struct bpf_d *d; - register u_char *pkt; - register u_int pktlen, snaplen; - register void (*cpfn) __P((const void *, void *, size_t)); +catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, + void (*cpfn)(const void *, void *, size_t)) { register struct bpf_hdr *hp; register int totlen, curlen; @@ -1500,8 +1456,7 @@ catchpacket(d, pkt, pktlen, snaplen, cpfn) * Initialize all nonzero fields of a descriptor. */ static int -bpf_allocbufs(d) - register struct bpf_d *d; +bpf_allocbufs(struct bpf_d *d) { d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT); if (d->bd_fbuf == 0) @@ -1522,8 +1477,7 @@ bpf_allocbufs(d) * Called on close. */ static void -bpf_freed(d) - register struct bpf_d *d; +bpf_freed(struct bpf_d *d) { /* * We don't need to lock out interrupts since this descriptor has @@ -1539,8 +1493,6 @@ bpf_freed(d) } if (d->bd_filter) FREE((caddr_t)d->bd_filter, M_DEVBUF); - - D_MARKFREE(d); } /* @@ -1549,16 +1501,15 @@ bpf_freed(d) * size of the link header (variable length headers not yet supported). */ void -bpfattach(ifp, dlt, hdrlen) - struct ifnet *ifp; - u_int dlt, hdrlen; +bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen) { struct bpf_if *bp; - int i; bp = (struct bpf_if *) _MALLOC(sizeof(*bp), M_DEVBUF, M_WAIT); if (bp == 0) panic("bpfattach"); + lck_mtx_lock(bpf_mlock); + bp->bif_dlist = 0; bp->bif_ifp = ifp; bp->bif_dlt = dlt; @@ -1575,6 +1526,11 @@ bpfattach(ifp, dlt, hdrlen) * performance reasons and to alleviate alignment restrictions). */ bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; + + /* Take a reference on the interface */ + ifp_reference(ifp); + + lck_mtx_unlock(bpf_mlock); #ifndef __APPLE__ if (bootverbose) @@ -1589,14 +1545,15 @@ bpfattach(ifp, dlt, hdrlen) * ENXIO. */ void -bpfdetach(ifp) - struct ifnet *ifp; +bpfdetach(struct ifnet *ifp) { struct bpf_if *bp, *bp_prev; struct bpf_d *d; int s; s = splimp(); + + lck_mtx_lock(bpf_mlock); /* Locate BPF interface information */ bp_prev = NULL; @@ -1633,6 +1590,10 @@ bpfdetach(ifp) } else { bpf_iflist = bp->bif_next; } + + ifp_release(ifp); + + lck_mtx_unlock(bpf_mlock); FREE(bp, M_DEVBUF); @@ -1640,25 +1601,51 @@ bpfdetach(ifp) } void -bpf_init(unused) - void *unused; +bpf_init(__unused void *unused) { #ifdef __APPLE__ int i; int maj; - if (!bpf_devsw_installed ) { + if (bpf_devsw_installed == 0) { bpf_devsw_installed = 1; + + bpf_mlock_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(bpf_mlock_grp_attr); + + bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr); + + bpf_mlock_attr = lck_attr_alloc_init(); + lck_attr_setdefault(bpf_mlock_attr); + + bpf_mlock = lck_mtx_alloc_init(bpf_mlock_grp, bpf_mlock_attr); + + if (bpf_mlock == 0) { + printf("bpf_init: failed to allocate bpf_mlock\n"); + bpf_devsw_installed = 0; + return; + } + maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw); if (maj == -1) { + if (bpf_mlock) + lck_mtx_free(bpf_mlock, bpf_mlock_grp); + if (bpf_mlock_attr) + lck_attr_free(bpf_mlock_attr); + if (bpf_mlock_grp) + lck_grp_free(bpf_mlock_grp); + if (bpf_mlock_grp_attr) + lck_grp_attr_free(bpf_mlock_grp_attr); + + bpf_mlock = 0; + bpf_mlock_attr = 0; + bpf_mlock_grp = 0; + bpf_mlock_grp_attr = 0; + bpf_devsw_installed = 0; printf("bpf_init: failed to allocate a major number!\n"); - nbpfilter = 0; - return; - } - if (bpf_dtab_grow(NBPFILTER) == 0) { - printf("bpf_init: failed to allocate bpf_dtab\n"); return; } + for (i = 0 ; i < NBPFILTER; i++) bpf_make_dev_t(maj); } diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index 89e9163c6..b6b0a3995 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,6 +67,11 @@ #include #include #include +#include + +#ifdef KERNEL +#include +#endif /* BSD style release date */ #define BPF_RELEASE 199606 @@ -82,7 +87,7 @@ typedef u_int32_t bpf_u_int32; #define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1)) #define BPF_MAXINSNS 512 -#define BPF_MAXBUFSIZE 0x8000 +#define BPF_MAXBUFSIZE 0x80000 #define BPF_MINBUFSIZE 32 /* @@ -93,6 +98,26 @@ struct bpf_program { struct bpf_insn *bf_insns; }; +#ifdef KERNEL +/* LP64 version of bpf_program. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with bpf_program + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_bpf_program { + u_int bf_len; + user_addr_t bf_insns; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + /* * Struct returned by BIOCGSTATS. */ @@ -332,26 +357,40 @@ struct bpf_insn { #define BPF_STMT(code, k) { (u_short)(code), 0, 0, k } #define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k } +#ifdef KERNEL_PRIVATE /* Forward declerations */ struct ifnet; struct mbuf; -#ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE -int bpf_validate __P((const struct bpf_insn *, int)); -void bpf_tap __P((struct ifnet *, u_char *, u_int)); -void bpf_mtap __P((struct ifnet *, struct mbuf *)); -void bpfattach __P((struct ifnet *, u_int, u_int)); -void bpfdetach __P((struct ifnet *)); +int bpf_validate(const struct bpf_insn *, int); +void bpf_tap(struct ifnet *, u_char *, u_int); +void bpf_mtap(struct ifnet *, struct mbuf *); + +void bpfdetach(struct ifnet *); -void bpfilterattach __P((int)); -u_int bpf_filter __P((const struct bpf_insn *, u_char *, u_int, u_int)); +void bpfilterattach(int); +u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int); #ifdef __APPLE__ #define BPF_TAP(x, y, z) bpf_tap(x,y,z) #define BPF_MTAP(x, y) bpf_mtap(x, y) #endif /* __APPLE__ */ -#endif /* __APPLE_API_UNSTABLE */ + +#endif /* KERNEL_PRIVATE */ + +#ifdef KERNEL +/*! + @function bpfattach + @discussion Registers an interface with BPF. This allows bpf devices + to attach to your interface to capture packets. Your interface + will be unregistered automatically when your interface is + detached. + @param interface The interface to register with BPF. + @param data_link_type The data link type of the interface. See the + DLT_* defines in bpf.h. + @param header_length The length, in bytes, of the data link header. + */ +void bpfattach(ifnet_t interface, u_int data_link_type, u_int header_length); #endif /* KERNEL */ /* diff --git a/bsd/net/bpf_filter.c b/bsd/net/bpf_filter.c index 102e3c492..d697c1363 100644 --- a/bsd/net/bpf_filter.c +++ b/bsd/net/bpf_filter.c @@ -93,7 +93,7 @@ #ifdef KERNEL #define MINDEX(m, k) \ { \ - register int len = m->m_len; \ + register unsigned int len = m->m_len; \ \ while (k >= len) { \ k -= len; \ @@ -104,14 +104,11 @@ } \ } -static u_int16_t m_xhalf __P((struct mbuf *m, bpf_u_int32 k, int *err)); -static u_int32_t m_xword __P((struct mbuf *m, bpf_u_int32 k, int *err)); +static u_int16_t m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err); +static u_int32_t m_xword(struct mbuf *m, bpf_u_int32 k, int *err); static u_int32_t -m_xword(m, k, err) - register struct mbuf *m; - register bpf_u_int32 k; - register int *err; +m_xword(struct mbuf *m, bpf_u_int32 k, int *err) { register size_t len; register u_char *cp, *np; @@ -164,10 +161,7 @@ m_xword(m, k, err) } static u_int16_t -m_xhalf(m, k, err) - register struct mbuf *m; - register bpf_u_int32 k; - register int *err; +m_xhalf(struct mbuf *m, bpf_u_int32 k, int *err) { register size_t len; register u_char *cp; @@ -203,11 +197,7 @@ m_xhalf(m, k, err) * buflen is the amount of data present */ u_int -bpf_filter(pc, p, wirelen, buflen) - register const struct bpf_insn *pc; - register u_char *p; - u_int wirelen; - register u_int buflen; +bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) { register u_int32_t A = 0, X = 0; register bpf_u_int32 k; @@ -540,9 +530,7 @@ bpf_filter(pc, p, wirelen, buflen) * Otherwise, a bogus program could easily crash the system. */ int -bpf_validate(f, len) - const struct bpf_insn *f; - int len; +bpf_validate(const struct bpf_insn *f, int len) { register int i; const struct bpf_insn *p; @@ -557,7 +545,7 @@ bpf_validate(f, len) register int from = i + 1; if (BPF_OP(p->code) == BPF_JA) { - if (from >= len || p->k >= len - from) + if (from >= len || p->k >= (bpf_u_int32)(len - from)) return 0; } else if (from >= len || p->jt >= len - from || diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h index b8f80bb63..1958e635b 100644 --- a/bsd/net/bpfdesc.h +++ b/bsd/net/bpfdesc.h @@ -64,7 +64,7 @@ #ifndef _NET_BPFDESC_H_ #define _NET_BPFDESC_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * The items in this header file should be wrapped in #ifdef KERNEL. */ @@ -134,5 +134,5 @@ struct bpf_if { struct ifnet *bif_ifp; /* corresponding interface */ }; -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/net/bridge.c b/bsd/net/bridge.c index 2f364dee5..2770e4a76 100644 --- a/bsd/net/bridge.c +++ b/bsd/net/bridge.c @@ -166,22 +166,24 @@ static void bdg_promisc_off(int clear_used) { struct ifnet *ifp ; - TAILQ_FOREACH(ifp, &ifnet, if_link) { - if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { - int s, ret ; - s = splimp(); - ret = ifpromisc(ifp, 0); - splx(s); - ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; - DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", - ifp->if_name, ifp->if_unit, - ifp->if_flags, ifp2sc[ifp->if_index].flags);) - } - if (clear_used) { - ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; - bdg_stats.s[ifp->if_index].name[0] = '\0'; - } + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { + int s, ret ; + s = splimp(); + ret = ifnet_set_promiscuous(ifp, 0); + splx(s); + ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; + DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", + ifp->if_name, ifp->if_unit, + ifp->if_flags, ifp2sc[ifp->if_index].flags);) + } + if (clear_used) { + ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; + bdg_stats.s[ifp->if_index].name[0] = '\0'; + } } + ifnet_head_done(); } /* @@ -193,29 +195,31 @@ bdg_promisc_on() struct ifnet *ifp ; int s ; - TAILQ_FOREACH(ifp, &ifnet, if_link) { - if ( !BDG_USED(ifp) ) - continue ; - if ( 0 == ( ifp->if_flags & IFF_UP) ) { - s = splimp(); - if_up(ifp); - splx(s); - } - if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { - int ret ; - s = splimp(); - ret = ifpromisc(ifp, 1); - splx(s); - ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; - printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", - ifp->if_name, ifp->if_unit, - ifp->if_flags, ifp2sc[ifp->if_index].flags); - } - if (BDG_MUTED(ifp)) { - printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); - BDG_UNMUTE(ifp) ; - } + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if ( !BDG_USED(ifp) ) + continue ; + if ( 0 == ( ifp->if_flags & IFF_UP) ) { + s = splimp(); + if_up(ifp); + splx(s); + } + if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { + int ret ; + s = splimp(); + ret = ifnet_set_promiscuous(ifp, 1); + splx(s); + ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; + printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", + ifp->if_name, ifp->if_unit, + ifp->if_flags, ifp2sc[ifp->if_index].flags); + } + if (BDG_MUTED(ifp)) { + printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); + BDG_UNMUTE(ifp) ; + } } + ifnet_head_done(); } static int @@ -394,17 +398,6 @@ flush_table() splx(s); } -/* wrapper for funnel */ -void -bdg_timeout_funneled(void * dummy) -{ - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - bdg_timeout(dummy); - funnel_state = thread_funnel_set(network_flock, FALSE); -} - /* * called periodically to flush entries etc. */ @@ -438,7 +431,7 @@ bdg_timeout(void *dummy) bdg_loops = 0 ; } } - timeout(bdg_timeout_funneled, (void *)0, 2*hz ); + timeout(bdg_timeout, (void *)0, 2*hz ); } /* @@ -477,7 +470,6 @@ bdgtakeifaces(void) { int i ; struct ifnet *ifp; - struct arpcom *ac ; bdg_addr *p = bdg_addresses ; struct bdg_softc *bp; @@ -485,32 +477,32 @@ bdgtakeifaces(void) *bridge_cfg = '\0'; printf("BRIDGE 010131, have %d interfaces\n", if_index); + ifnet_head_lock_shared(); for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; i++, ifp = TAILQ_NEXT(ifp, if_link) ) - if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ - bp = &ifp2sc[ifp->if_index] ; - ac = (struct arpcom *)ifp; - sprintf(bridge_cfg + strlen(bridge_cfg), - "%s%d:1,", ifp->if_name, ifp->if_unit); - printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", - ifp->if_index, - bdg_stats.s[ifp->if_index].name, - (int)ifp->if_type, (int) ifp->if_physical, - (int)ifp->if_addrlen, - ac->ac_enaddr, "." ); - bcopy(ac->ac_enaddr, p->etheraddr, 6); - p++ ; - bp->ifp = ifp ; - bp->flags = IFF_USED ; - bp->cluster_id = htons(1) ; - bp->magic = 0xDEADBEEF ; - - sprintf(bdg_stats.s[ifp->if_index].name, - "%s%d:%d", ifp->if_name, ifp->if_unit, - ntohs(bp->cluster_id)); - bdg_ports ++ ; - } - + if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ + ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN); + bp = &ifp2sc[ifp->if_index] ; + sprintf(bridge_cfg + strlen(bridge_cfg), + "%s%d:1,", ifp->if_name, ifp->if_unit); + printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", + ifp->if_index, + bdg_stats.s[ifp->if_index].name, + (int)ifp->if_type, (int) ifp->if_physical, + (int)ifp->if_addrlen, + p->etheraddr, "." ); + p++ ; + bp->ifp = ifp ; + bp->flags = IFF_USED ; + bp->cluster_id = htons(1) ; + bp->magic = 0xDEADBEEF ; + + sprintf(bdg_stats.s[ifp->if_index].name, + "%s%d:%d", ifp->if_name, ifp->if_unit, + ntohs(bp->cluster_id)); + bdg_ports ++ ; + } + ifnet_head_done(); } /* @@ -666,27 +658,27 @@ bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) bdg_thru++; /* only count once */ if (src == NULL) /* packet from ether_output */ - dst = bridge_dst_lookup(eh); + dst = bridge_dst_lookup(eh); if (dst == BDG_DROP) { /* this should not happen */ - printf("xx bdg_forward for BDG_DROP\n"); - m_freem(m0); - return NULL; + printf("xx bdg_forward for BDG_DROP\n"); + m_freem(m0); + return NULL; } if (dst == BDG_LOCAL) { /* this should not happen as well */ - printf("xx ouch, bdg_forward for local pkt\n"); - return m0; + printf("xx ouch, bdg_forward for local pkt\n"); + return m0; } if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { - ifp = ifnet.tqh_first ; /* scan all ports */ - once = 0 ; - if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ - shared = 1 ; + ifp = ifnet_head.tqh_first ; /* scan all ports */ + once = 0 ; + if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ + shared = 1 ; } else { - ifp = dst ; - once = 1 ; + ifp = dst ; + once = 1 ; } if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) - panic("bdg_forward: bad dst"); + panic("bdg_forward: bad dst"); #ifdef IPFIREWALL /* diff --git a/bsd/net/bridge.h b/bsd/net/bridge.h index a9c6b277d..3f49914b4 100644 --- a/bsd/net/bridge.h +++ b/bsd/net/bridge.h @@ -87,7 +87,6 @@ extern struct bdg_softc *ifp2sc; #define BDG_MUTE(ifp) ifp2sc[ifp->if_index].flags |= IFF_MUTE #define BDG_UNMUTE(ifp) ifp2sc[ifp->if_index].flags &= ~IFF_MUTE #define BDG_CLUSTER(ifp) (ifp2sc[ifp->if_index].cluster_id) -#define BDG_EH(ifp) ((struct arpcom *)ifp)->ac_enaddr #define BDG_SAMECLUSTER(ifp,src) \ (src == NULL || BDG_CLUSTER(ifp) == BDG_CLUSTER(src) ) diff --git a/bsd/net/bsd_comp.c b/bsd/net/bsd_comp.c index d81e66f93..1bfc725ea 100644 --- a/bsd/net/bsd_comp.c +++ b/bsd/net/bsd_comp.c @@ -146,26 +146,27 @@ struct bsd_db { #define BSD_OVHD 2 /* BSD compress overhead/packet */ #define BSD_INIT_BITS BSD_MIN_BITS -static void bsd_clear __P((struct bsd_db *db)); -static int bsd_check __P((struct bsd_db *db)); -static void *bsd_alloc __P((u_char *options, int opt_len, int decomp)); -static int bsd_init_comp_db __P((struct bsd_db *db, u_char *options, int opt_len, +static void bsd_clear(struct bsd_db *db); +static int bsd_check(struct bsd_db *db); +static void *bsd_alloc(u_char *options, int opt_len, int decomp); +static int bsd_init_comp_db(struct bsd_db *db, u_char *options, + int opt_len, int unit, int hdrlen, int mru, int debug, - int decomp)); -static void *bsd_comp_alloc __P((u_char *options, int opt_len)); -static void *bsd_decomp_alloc __P((u_char *options, int opt_len)); -static void bsd_free __P((void *state)); -static int bsd_comp_init __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int debug)); -static int bsd_decomp_init __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int mru, int debug)); -static int bsd_compress __P((void *state, struct mbuf **mret, - struct mbuf *mp, int slen, int maxolen)); -static void bsd_incomp __P((void *state, struct mbuf *dmsg)); -static int bsd_decompress __P((void *state, struct mbuf *cmp, - struct mbuf **dmpp)); -static void bsd_reset __P((void *state)); -static void bsd_comp_stats __P((void *state, struct compstat *stats)); + int decomp); +static void *bsd_comp_alloc(u_char *options, int opt_len); +static void *bsd_decomp_alloc(u_char *options, int opt_len); +static void bsd_free(void *state); +static int bsd_comp_init(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int debug); +static int bsd_decomp_init(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int mru, int debug); +static int bsd_compress(void *state, struct mbuf **mret, + struct mbuf *mp, int slen, int maxolen); +static void bsd_incomp(void *state, struct mbuf *dmsg); +static int bsd_decompress(void *state, struct mbuf *cmp, + struct mbuf **dmpp); +static void bsd_reset(void *state); +static void bsd_comp_stats(void *state, struct compstat *stats); /* * Procedures exported to if_ppp.c. diff --git a/bsd/net/devtimer.c b/bsd/net/devtimer.c new file mode 100644 index 000000000..4344f3a9f --- /dev/null +++ b/bsd/net/devtimer.c @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * devtimer.c + * - timer source based on + */ + +/* + * Modification History: + * + * June 22, 2004 Dieter Siegmund (dieter@apple.com) + * - created + */ +#include +#include +#include +#include +#include +#include + +#ifdef DEVTIMER_DEBUG +#define _devtimer_printf printf +#else DEVTIMER_DEBUG +static __inline__ void +_devtimer_printf(__unused const char * fmt, ...) +{ +} +#endif DEVTIMER_DEBUG + +struct devtimer_s { + void * dt_callout; + devtimer_timeout_func dt_timeout_func; + devtimer_process_func dt_process_func; + void * dt_arg0; + void * dt_arg1; + void * dt_arg2; + int dt_generation; + UInt32 dt_retain_count; +}; + +#define M_DEVTIMER M_DEVBUF + +static __inline__ void +timeval_add(struct timeval tv1, struct timeval tv2, + struct timeval * result) +{ + result->tv_sec = tv1.tv_sec + tv2.tv_sec; + result->tv_usec = tv1.tv_usec + tv2.tv_usec; + if (result->tv_usec > DEVTIMER_USECS_PER_SEC) { + result->tv_usec -= DEVTIMER_USECS_PER_SEC; + result->tv_sec++; + } + return; +} + +static __inline__ uint64_t +timeval_to_absolutetime(struct timeval tv) +{ + uint64_t secs; + uint64_t usecs; + + clock_interval_to_absolutetime_interval(tv.tv_sec, NSEC_PER_SEC, + &secs); + clock_interval_to_absolutetime_interval(tv.tv_usec, NSEC_PER_USEC, + &usecs); + return (secs + usecs); +} + + +__private_extern__ int +devtimer_valid(devtimer_ref timer) +{ + return (timer->dt_callout != NULL); +} + +__private_extern__ void +devtimer_retain(devtimer_ref timer) +{ + OSIncrementAtomic(&timer->dt_retain_count); + return; +} + +__private_extern__ void +devtimer_invalidate(devtimer_ref timer) +{ + devtimer_cancel(timer); + timer->dt_arg0 = NULL; + if (timer->dt_callout != NULL) { + thread_call_free(timer->dt_callout); + timer->dt_callout = NULL; + } + return; +} + +__private_extern__ void +devtimer_release(devtimer_ref timer) +{ + UInt32 old_retain_count; + + old_retain_count = OSDecrementAtomic(&timer->dt_retain_count); + switch (old_retain_count) { + case 0: + panic("devtimer_release: retain count is 0\n"); + break; + case 1: + devtimer_invalidate(timer); + FREE(timer, M_DEVTIMER); + _devtimer_printf("devtimer: timer released\n"); + break; + default: + break; + } + return; +} + +static void +devtimer_process(void * param0, void * param1) +{ + int generation = (int)param1; + devtimer_process_func process_func; + devtimer_timeout_func timeout_func; + devtimer_ref timer = (devtimer_ref)param0; + + process_func = timer->dt_process_func; + if (process_func != NULL) { + (*process_func)(timer, devtimer_process_func_event_lock); + } + timeout_func = timer->dt_timeout_func; + if (timeout_func != NULL) { + timer->dt_timeout_func = NULL; + if (timer->dt_generation == generation) { + (*timeout_func)(timer->dt_arg0, timer->dt_arg1, timer->dt_arg2); + } + } + devtimer_release(timer); + if (process_func != NULL) { + (*process_func)(timer, devtimer_process_func_event_unlock); + } + return; +} + +__private_extern__ void * +devtimer_arg0(devtimer_ref timer) +{ + return (timer->dt_arg0); +} + +__private_extern__ devtimer_ref +devtimer_create(devtimer_process_func process_func, void * arg0) +{ + devtimer_ref timer; + + timer = _MALLOC(sizeof(*timer), M_DEVTIMER, M_WAITOK); + if (timer == NULL) { + return (timer); + } + bzero(timer, sizeof(*timer)); + devtimer_retain(timer); + timer->dt_callout = thread_call_allocate(devtimer_process, timer); + if (timer->dt_callout == NULL) { + _devtimer_printf("devtimer: thread_call_allocate failed\n"); + devtimer_release(timer); + timer = NULL; + } + timer->dt_process_func = process_func; + timer->dt_arg0 = arg0; + return (timer); +} + +__private_extern__ void +devtimer_set_absolute(devtimer_ref timer, + struct timeval abs_time, + devtimer_timeout_func timeout_func, + void * arg1, void * arg2) +{ + if (timer->dt_callout == NULL) { + printf("devtimer_set_absolute: uninitialized/freed timer\n"); + return; + } + devtimer_cancel(timer); + if (timeout_func == NULL) { + return; + } + timer->dt_timeout_func = timeout_func; + timer->dt_arg1 = arg1; + timer->dt_arg2 = arg2; + _devtimer_printf("devtimer: wakeup time is (%d.%d)\n", + abs_time.tv_sec, abs_time.tv_usec); + timer->dt_generation++; + devtimer_retain(timer); + thread_call_enter1_delayed(timer->dt_callout, + (thread_call_param_t)timer->dt_generation, + timeval_to_absolutetime(abs_time)); + return; +} + +__private_extern__ void +devtimer_set_relative(devtimer_ref timer, + struct timeval rel_time, + devtimer_timeout_func timeout_func, + void * arg1, void * arg2) +{ + struct timeval abs_time; + struct timeval current_time; + + current_time = devtimer_current_time(); + timeval_add(current_time, rel_time, &abs_time); + devtimer_set_absolute(timer, abs_time, timeout_func, arg1, arg2); + return; +} + +__private_extern__ void +devtimer_cancel(devtimer_ref timer) +{ + if (timer->dt_timeout_func != NULL) { + timer->dt_timeout_func = NULL; + if (timer->dt_callout != NULL) { + _devtimer_printf("devtimer: cancelling timer source\n"); + if (thread_call_cancel(timer->dt_callout)) { + devtimer_release(timer); + } + else { + _devtimer_printf("devtimer: delayed release\n"); + } + } + } + return; +} + +__private_extern__ int +devtimer_enabled(devtimer_ref timer) +{ + return (timer->dt_timeout_func != NULL); +} + +__private_extern__ int32_t +devtimer_current_secs(void) +{ + struct timeval tv; + + tv = devtimer_current_time(); + return (tv.tv_sec); +} + +__private_extern__ struct timeval +devtimer_current_time(void) +{ + struct timeval tv; + uint32_t sec; + uint32_t usec; + + clock_get_system_microtime(&sec, &usec); + tv.tv_sec = sec; + tv.tv_usec = usec; + return (tv); +} diff --git a/bsd/net/devtimer.h b/bsd/net/devtimer.h new file mode 100644 index 000000000..9e8aeca91 --- /dev/null +++ b/bsd/net/devtimer.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * devtimer.h + * - timer source based on + */ + + +#ifndef _NET_DEVTIMER_H +#define _NET_DEVTIMER_H + +#include +#include + +#define DEVTIMER_USECS_PER_SEC (1000 * 1000) + +enum { + devtimer_process_func_event_lock, + devtimer_process_func_event_unlock, +}; +typedef int devtimer_process_func_event; + +typedef struct devtimer_s * devtimer_ref; +typedef void (*devtimer_process_func)(devtimer_ref timer, + devtimer_process_func_event event); +typedef void (*devtimer_timeout_func)(void * arg0, void * arg1, void * arg2); + +int +devtimer_valid(devtimer_ref timer); + +void +devtimer_retain(devtimer_ref timer); + +void * +devtimer_arg0(devtimer_ref timer); + +devtimer_ref +devtimer_create(devtimer_process_func process_func, void * arg0); + +void +devtimer_invalidate(devtimer_ref timer); + +void +devtimer_release(devtimer_ref timer); + +void +devtimer_set_absolute(devtimer_ref t, + struct timeval abs_time, + devtimer_timeout_func func, + void * arg1, void * arg2); + +void +devtimer_set_relative(devtimer_ref t, + struct timeval rel_time, + devtimer_timeout_func func, + void * arg1, void * arg2); +void +devtimer_cancel(devtimer_ref t); + +int +devtimer_enabled(devtimer_ref t); + +struct timeval +devtimer_current_time(void); + +int32_t +devtimer_current_secs(void); + +#endif _NET_DEVTIMER_H diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 98973ea10..f69a1c9e0 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -26,28 +26,33 @@ * Author: Ted Walker */ - - #include #include #include #include #include #include +#include +#include #include #include +#include #include #include +#include #include #include -#include +#include #include #include #include +#include -#include #include +#include + +#include #include @@ -67,49 +72,79 @@ #define PFILT(x) ((struct dlil_filterq_entry *) (x))->variants.pr_filter #define IFILT(x) ((struct dlil_filterq_entry *) (x))->variants.if_filter -struct dl_tag_str { - struct ifnet *ifp; - struct if_proto *proto; - struct dlil_filterq_head *pr_flt_head; +#if 0 +#define DLIL_PRINTF printf +#else +#define DLIL_PRINTF kprintf +#endif + +//#define DLIL_ALWAYS_DELAY_DETACH 1 + +enum { + kProtoKPI_DLIL = 0, + kProtoKPI_v1 = 1 +}; + +struct if_proto { + SLIST_ENTRY(if_proto) next_hash; + int refcount; + int detaching; + struct ifnet *ifp; + struct domain *dl_domain; + protocol_family_t protocol_family; + int proto_kpi; + union { + struct { + dl_input_func dl_input; + dl_pre_output_func dl_pre_output; + dl_event_func dl_event; + dl_offer_func dl_offer; + dl_ioctl_func dl_ioctl; + dl_detached_func dl_detached; + } dlil; + struct { + proto_media_input input; + proto_media_preout pre_output; + proto_media_event event; + proto_media_ioctl ioctl; + proto_media_detached detached; + proto_media_resolve_multi resolve_multi; + proto_media_send_arp send_arp; + } v1; + } kpi; }; +SLIST_HEAD(proto_hash_entry, if_proto); + struct dlil_ifnet { /* ifnet and drvr_ext are used by the stack and drivers drvr_ext extends the public ifnet and must follow dl_if */ struct ifnet dl_if; /* public ifnet */ - void *drvr_ext[4]; /* driver reserved (e.g arpcom extension for enet) */ /* dlil private fields */ TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet are link together */ /* it is not the ifnet list */ void *if_uniqueid; /* unique id identifying the interface */ size_t if_uniqueid_len;/* length of the unique id */ - char if_namestorage[IFNAMSIZ]; /* interface name storage for detached interfaces */ -}; - -struct dlil_stats_str { - int inject_pr_in1; - int inject_pr_in2; - int inject_pr_out1; - int inject_pr_out2; - int inject_if_in1; - int inject_if_in2; - int inject_if_out1; - int inject_if_out2; + char if_namestorage[IFNAMSIZ]; /* interface name storage */ }; - -struct dlil_filter_id_str { - int type; - struct dlil_filterq_head *head; - struct dlil_filterq_entry *filter_ptr; - struct ifnet *ifp; - struct if_proto *proto; +struct ifnet_filter { + TAILQ_ENTRY(ifnet_filter) filt_next; + ifnet_t filt_ifp; + int filt_detaching; + + const char *filt_name; + void *filt_cookie; + protocol_family_t filt_protocol; + iff_input_func filt_input; + iff_output_func filt_output; + iff_event_func filt_event; + iff_ioctl_func filt_ioctl; + iff_detached_func filt_detached; }; - - struct if_family_str { TAILQ_ENTRY(if_family_str) if_fam_next; u_long if_family; @@ -121,49 +156,47 @@ struct if_family_str { int (*add_if)(struct ifnet *ifp); int (*del_if)(struct ifnet *ifp); int (*init_if)(struct ifnet *ifp); - int (*add_proto)(struct ddesc_head_str *demux_desc_head, - struct if_proto *proto, u_long dl_tag); - int (*del_proto)(struct if_proto *proto, u_long dl_tag); - int (*ifmod_ioctl)(struct ifnet *ifp, u_long command, caddr_t data); - int (*shutdown)(); + int (*add_proto)(struct ifnet *ifp, u_long protocol_family, struct ddesc_head_str *demux_desc_head); + ifnet_del_proto_func del_proto; + ifnet_ioctl_func ifmod_ioctl; + int (*shutdown)(void); }; - struct proto_family_str { TAILQ_ENTRY(proto_family_str) proto_fam_next; u_long proto_family; u_long if_family; + int usecnt; - int (*attach_proto)(struct ifnet *ifp, u_long *dl_tag); - int (*detach_proto)(struct ifnet *ifp, u_long dl_tag); + int (*attach_proto)(struct ifnet *ifp, u_long protocol_family); + int (*detach_proto)(struct ifnet *ifp, u_long protocol_family); }; +enum { + kIfNetUseCount_MayBeZero = 0, + kIfNetUseCount_MustNotBeZero = 1 +}; - -struct dlil_stats_str dlil_stats; - -static -struct dlil_filter_id_str *dlil_filters; - -static -struct dl_tag_str *dl_tag_array; - -static -TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head; - -static -TAILQ_HEAD(, if_family_str) if_family_head; - -static -TAILQ_HEAD(, proto_family_str) proto_family_head; - -static ifnet_inited = 0; -static u_long dl_tag_nb = 0; -static u_long dlil_filters_nb = 0; +static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head; +static TAILQ_HEAD(, if_family_str) if_family_head; +static TAILQ_HEAD(, proto_family_str) proto_family_head; +static lck_grp_t *dlil_lock_group; +static lck_grp_t *ifnet_lock_group; +static lck_grp_t *ifnet_head_lock_group; +static lck_attr_t *ifnet_lock_attr; +static lck_mtx_t *proto_family_mutex; +static lck_rw_t *ifnet_head_mutex; +static lck_mtx_t *dlil_ifnet_mutex; +static lck_mtx_t *dlil_mutex; +static unsigned long dlil_read_count = 0; +static unsigned long dlil_detach_waiting = 0; +extern u_int32_t ipv4_ll_arp_aware; int dlil_initialized = 0; -decl_simple_lock_data(, dlil_input_lock) +lck_spin_t *dlil_input_lock; +__private_extern__ thread_t dlil_input_thread_ptr = 0; int dlil_input_thread_wakeup = 0; +__private_extern__ int dlil_output_thread_wakeup = 0; static struct mbuf *dlil_input_mbuf_head = NULL; static struct mbuf *dlil_input_mbuf_tail = NULL; #if NLOOP > 1 @@ -171,18 +204,140 @@ static struct mbuf *dlil_input_mbuf_tail = NULL; #endif static struct mbuf *dlil_input_loop_head = NULL; static struct mbuf *dlil_input_loop_tail = NULL; -extern struct ifmultihead ifma_lostlist; static void dlil_input_thread(void); -extern void run_netisr(void); +static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg); +struct ifnet *ifbyfamily(u_long family, short unit); +static int dlil_detach_filter_internal(interface_filter_t filter, int detached); +static void dlil_call_delayed_detach_thread(void); + +static void dlil_read_begin(void); +static void dlil_read_end(void); +static int dlil_write_begin(void); +static void dlil_write_end(void); + +static int ifp_use(struct ifnet *ifp, int handle_zero); +static int ifp_unuse(struct ifnet *ifp); +static void ifp_use_reached_zero(struct ifnet *ifp); + extern void bpfdetach(struct ifnet*); +extern void proto_input_run(void); // new run_netisr + + +int dlil_input_packet(struct ifnet *ifp, struct mbuf *m, char *frame_header); + +__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); int dlil_expand_mcl; +static const u_int32_t dlil_writer_waiting = 0x80000000; + +static __inline__ void* +_cast_non_const(const void * ptr) { + union { + const void* cval; + void* val; + } ret; + + ret.cval = ptr; + return (ret.val); +} + +/* Should these be inline? */ +static void +dlil_read_begin(void) +{ + unsigned long new_value; + unsigned long old_value; + struct uthread *uth = get_bsdthread_info(current_thread()); + + if (uth->dlil_incremented_read == dlil_writer_waiting) + panic("dlil_read_begin - thread is already a writer"); + + do { +again: + old_value = dlil_read_count; + + if ((old_value & dlil_writer_waiting) != 0 && uth->dlil_incremented_read == 0) + { + tsleep(&dlil_read_count, PRIBIO, "dlil_read_count", 1); + goto again; + } + + new_value = old_value + 1; + } while (!OSCompareAndSwap((UInt32)old_value, (UInt32)new_value, (UInt32*)&dlil_read_count)); + + uth->dlil_incremented_read++; +} + +static void +dlil_read_end(void) +{ + struct uthread *uth = get_bsdthread_info(current_thread()); + + OSDecrementAtomic((UInt32*)&dlil_read_count); + uth->dlil_incremented_read--; + if (dlil_read_count == dlil_writer_waiting) + wakeup(_cast_non_const(&dlil_writer_waiting)); +} + +static int +dlil_write_begin(void) +{ + struct uthread *uth = get_bsdthread_info(current_thread()); + + if (uth->dlil_incremented_read != 0) { + return EDEADLK; + } + lck_mtx_lock(dlil_mutex); + OSBitOrAtomic((UInt32)dlil_writer_waiting, (UInt32*)&dlil_read_count); +again: + if (dlil_read_count == dlil_writer_waiting) { + uth->dlil_incremented_read = dlil_writer_waiting; + return 0; + } + else { + tsleep(_cast_non_const(&dlil_writer_waiting), PRIBIO, "dlil_writer_waiting", 1); + goto again; + } +} + +static void +dlil_write_end(void) +{ + struct uthread *uth = get_bsdthread_info(current_thread()); + + if (uth->dlil_incremented_read != dlil_writer_waiting) + panic("dlil_write_end - thread is not a writer"); + OSBitAndAtomic((UInt32)~dlil_writer_waiting, (UInt32*)&dlil_read_count); + lck_mtx_unlock(dlil_mutex); + uth->dlil_incremented_read = 0; + wakeup(&dlil_read_count); +} + +#define PROTO_HASH_SLOTS 0x5 + /* * Internal functions. */ +static int +proto_hash_value(u_long protocol_family) +{ + switch(protocol_family) { + case PF_INET: + return 0; + case PF_INET6: + return 1; + case PF_APPLETALK: + return 2; + case PF_VLAN: + return 3; + default: + return 4; + } +} + static struct if_family_str *find_family_module(u_long if_family) { @@ -197,7 +352,8 @@ struct if_family_str *find_family_module(u_long if_family) } static -struct proto_family_str *find_proto_module(u_long proto_family, u_long if_family) +struct proto_family_str* +find_proto_module(u_long proto_family, u_long if_family) { struct proto_family_str *mod = NULL; @@ -210,1305 +366,2068 @@ struct proto_family_str *find_proto_module(u_long proto_family, u_long if_family return mod; } - -/* - * Public functions. - */ - -struct ifnet *ifbyfamily(u_long family, short unit) +static struct if_proto* +find_attached_proto(struct ifnet *ifp, u_long protocol_family) { - struct ifnet *ifp; - - TAILQ_FOREACH(ifp, &ifnet, if_link) - if ((family == ifp->if_family) && - (ifp->if_unit == unit)) - return ifp; - - return 0; + struct if_proto *proto = NULL; + u_long i = proto_hash_value(protocol_family); + if (ifp->if_proto_hash) { + proto = SLIST_FIRST(&ifp->if_proto_hash[i]); + } + + while(proto && proto->protocol_family != protocol_family) { + proto = SLIST_NEXT(proto, next_hash); + } + + return proto; } -struct if_proto *dlttoproto(u_long dl_tag) +static void +if_proto_ref(struct if_proto *proto) { - if (dl_tag < dl_tag_nb && dl_tag_array[dl_tag].ifp) - return dl_tag_array[dl_tag].proto; - return 0; + OSAddAtomic(1, (UInt32*)&proto->refcount); } - -static int dlil_ifp_proto_count(struct ifnet * ifp) +static void +if_proto_free(struct if_proto *proto) { - int count = 0; - struct if_proto * proto; - struct dlil_proto_head * tmp; - - tmp = (struct dlil_proto_head *) &ifp->proto_head; - - TAILQ_FOREACH(proto, tmp, next) - count++; - - return count; + int oldval = OSAddAtomic(-1, (UInt32*)&proto->refcount); + + if (oldval == 1) { /* This was the last reference */ + FREE(proto, M_IFADDR); + } } -u_long ifptodlt(struct ifnet *ifp, u_long proto_family) +__private_extern__ void +ifnet_lock_assert( + __unused struct ifnet *ifp, + __unused int what) { - struct if_proto *proto; - struct dlil_proto_head *tmp = (struct dlil_proto_head *) &ifp->proto_head; - - - TAILQ_FOREACH(proto, tmp, next) - if (proto->protocol_family == proto_family) - return proto->dl_tag; - - return 0; +#if IFNET_RW_LOCK + /* + * Not implemented for rw locks. + * + * Function exists so when/if we use mutex we can + * enable this check. + */ +#else + lck_mtx_assert(ifp->if_lock, what); +#endif } - -int dlil_find_dltag(u_long if_family, short unit, u_long proto_family, u_long *dl_tag) +__private_extern__ void +ifnet_lock_shared( + struct ifnet *ifp) { - struct ifnet *ifp; - - ifp = ifbyfamily(if_family, unit); - if (!ifp) - return ENOENT; - - *dl_tag = ifptodlt(ifp, proto_family); - if (*dl_tag == 0) - return EPROTONOSUPPORT; - else - return 0; +#if IFNET_RW_LOCK + lck_rw_lock_shared(ifp->if_lock); +#else + lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ifp->if_lock); +#endif } - -void dlil_post_msg(struct ifnet *ifp, u_long event_subclass, u_long event_code, - struct net_event_data *event_data, u_long event_data_len) +__private_extern__ void +ifnet_lock_exclusive( + struct ifnet *ifp) { - struct net_event_data ev_data; - struct kev_msg ev_msg; - - /* - * a net event always start with a net_event_data structure - * but the caller can generate a simple net event or - * provide a longer event structure to post - */ - - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = event_subclass; - ev_msg.event_code = event_code; - - if (event_data == 0) { - event_data = &ev_data; - event_data_len = sizeof(struct net_event_data); - } - - strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); - event_data->if_family = ifp->if_family; - event_data->if_unit = (unsigned long) ifp->if_unit; - - ev_msg.dv[0].data_length = event_data_len; - ev_msg.dv[0].data_ptr = event_data; - ev_msg.dv[1].data_length = 0; - - kev_post_msg(&ev_msg); +#if IFNET_RW_LOCK + lck_rw_lock_exclusive(ifp->if_lock); +#else + lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ifp->if_lock); +#endif } - - -void -dlil_init() +__private_extern__ void +ifnet_lock_done( + struct ifnet *ifp) { - int i; - - TAILQ_INIT(&dlil_ifnet_head); - TAILQ_INIT(&if_family_head); - TAILQ_INIT(&proto_family_head); - - // create the dl tag array - MALLOC(dl_tag_array, void *, sizeof(struct dl_tag_str) * MAX_DL_TAGS, M_NKE, M_WAITOK); - if (dl_tag_array == 0) { - printf("dlil_init tags array allocation failed\n"); - return; //very bad - } - bzero(dl_tag_array, sizeof(struct dl_tag_str) * MAX_DL_TAGS); - dl_tag_nb = MAX_DL_TAGS; - - // create the dl filters array - MALLOC(dlil_filters, void *, sizeof(struct dlil_filter_id_str) * MAX_DLIL_FILTERS, M_NKE, M_WAITOK); - if (dlil_filters == 0) { - printf("dlil_init filters array allocation failed\n"); - return; //very bad - } - bzero(dlil_filters, sizeof(struct dlil_filter_id_str) * MAX_DLIL_FILTERS); - dlil_filters_nb = MAX_DLIL_FILTERS; - - bzero(&dlil_stats, sizeof(dlil_stats)); - - simple_lock_init(&dlil_input_lock); - - /* - * Start up the dlil input thread once everything is initialized - */ - (void) kernel_thread(kernel_task, dlil_input_thread); +#if IFNET_RW_LOCK + lck_rw_done(ifp->if_lock); +#else + lck_mtx_assert(ifp->if_lock, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(ifp->if_lock); +#endif } -u_long get_new_filter_id() +__private_extern__ void +ifnet_head_lock_shared() { - u_long i; - u_char *p; - - for (i=1; i < dlil_filters_nb; i++) - if (dlil_filters[i].type == 0) - break; - - if (i == dlil_filters_nb) { - // expand the filters array by MAX_DLIL_FILTERS - MALLOC(p, u_char *, sizeof(struct dlil_filter_id_str) * (dlil_filters_nb + MAX_DLIL_FILTERS), M_NKE, M_WAITOK); - if (p == 0) - return 0; - - bcopy(dlil_filters, p, sizeof(struct dlil_filter_id_str) * dlil_filters_nb); - bzero(p + sizeof(struct dlil_filter_id_str) * dlil_filters_nb, sizeof(struct dlil_filter_id_str) * MAX_DL_TAGS); - dlil_filters_nb += MAX_DLIL_FILTERS; - FREE(dlil_filters, M_NKE); - dlil_filters = (struct dlil_filter_id_str *)p; - } - - return i; + lck_rw_lock_shared(ifnet_head_mutex); } +__private_extern__ void +ifnet_head_lock_exclusive() +{ + lck_rw_lock_exclusive(ifnet_head_mutex); +} -int dlil_attach_interface_filter(struct ifnet *ifp, - struct dlil_if_flt_str *if_filter, - u_long *filter_id, - int insertion_point) +__private_extern__ void +ifnet_head_done() { - int s; - int retval = 0; - struct dlil_filterq_entry *tmp_ptr; - struct dlil_filterq_entry *if_filt; - struct dlil_filterq_head *fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - boolean_t funnel_state; + lck_rw_done(ifnet_head_mutex); +} - MALLOC(tmp_ptr, struct dlil_filterq_entry *, sizeof(*tmp_ptr), M_NKE, M_WAITOK); - if (tmp_ptr == NULL) - return (ENOBUFS); +/* + * Public functions. + */ +struct ifnet *ifbyfamily(u_long family, short unit) +{ + struct ifnet *ifp; - bcopy((caddr_t) if_filter, (caddr_t) &tmp_ptr->variants.if_filter, - sizeof(struct dlil_if_flt_str)); + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) + if ((family == ifp->if_family) && (ifp->if_unit == unit)) + break; + ifnet_head_done(); - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); + return ifp; +} - *filter_id = get_new_filter_id(); - if (*filter_id == 0) { - FREE(tmp_ptr, M_NKE); - retval = ENOMEM; - goto end; - } - - dlil_filters[*filter_id].filter_ptr = tmp_ptr; - dlil_filters[*filter_id].head = (struct dlil_filterq_head *) &ifp->if_flt_head; - dlil_filters[*filter_id].type = DLIL_IF_FILTER; - dlil_filters[*filter_id].ifp = ifp; - tmp_ptr->filter_id = *filter_id; - tmp_ptr->type = DLIL_IF_FILTER; - - if (insertion_point != DLIL_LAST_FILTER) { - TAILQ_FOREACH(if_filt, fhead, que) - if (insertion_point == if_filt->filter_id) { - TAILQ_INSERT_BEFORE(if_filt, tmp_ptr, que); - break; - } - } - else - TAILQ_INSERT_TAIL(fhead, tmp_ptr, que); +static int dlil_ifp_proto_count(struct ifnet * ifp) +{ + int count = 0; + int i; + + if (ifp->if_proto_hash != NULL) { + for (i = 0; i < PROTO_HASH_SLOTS; i++) { + struct if_proto *proto; + SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { + count++; + } + } + } + + return count; +} -end: - splx(s); - thread_funnel_set(network_flock, funnel_state); - return retval; +__private_extern__ void +dlil_post_msg(struct ifnet *ifp, u_long event_subclass, u_long event_code, + struct net_event_data *event_data, u_long event_data_len) +{ + struct net_event_data ev_data; + struct kev_msg ev_msg; + + /* + * a net event always start with a net_event_data structure + * but the caller can generate a simple net event or + * provide a longer event structure to post + */ + + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = event_subclass; + ev_msg.event_code = event_code; + + if (event_data == 0) { + event_data = &ev_data; + event_data_len = sizeof(struct net_event_data); + } + + strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); + event_data->if_family = ifp->if_family; + event_data->if_unit = (unsigned long) ifp->if_unit; + + ev_msg.dv[0].data_length = event_data_len; + ev_msg.dv[0].data_ptr = event_data; + ev_msg.dv[1].data_length = 0; + + dlil_event_internal(ifp, &ev_msg); } +void dlil_init(void); +void +dlil_init(void) +{ + lck_grp_attr_t *grp_attributes = 0; + lck_attr_t *lck_attributes = 0; + lck_grp_t *input_lock_grp = 0; + + TAILQ_INIT(&dlil_ifnet_head); + TAILQ_INIT(&if_family_head); + TAILQ_INIT(&proto_family_head); + TAILQ_INIT(&ifnet_head); + + /* Setup the lock groups we will use */ + grp_attributes = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attributes); + + dlil_lock_group = lck_grp_alloc_init("dlil internal locks", grp_attributes); +#if IFNET_RW_LOCK + ifnet_lock_group = lck_grp_alloc_init("ifnet locks", grp_attributes); +#else + ifnet_lock_group = lck_grp_alloc_init("ifnet locks", grp_attributes); +#endif + ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock", grp_attributes); + input_lock_grp = lck_grp_alloc_init("dlil input lock", grp_attributes); + lck_grp_attr_free(grp_attributes); + grp_attributes = 0; + + /* Setup the lock attributes we will use */ + lck_attributes = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attributes); + + ifnet_lock_attr = lck_attr_alloc_init(); + lck_attr_setdefault(ifnet_lock_attr); + + dlil_input_lock = lck_spin_alloc_init(input_lock_grp, lck_attributes); + input_lock_grp = 0; + + ifnet_head_mutex = lck_rw_alloc_init(ifnet_head_lock_group, lck_attributes); + proto_family_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes); + dlil_ifnet_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes); + dlil_mutex = lck_mtx_alloc_init(dlil_lock_group, lck_attributes); + + lck_attr_free(lck_attributes); + lck_attributes = 0; + + /* + * Start up the dlil input thread once everything is initialized + */ + (void) kernel_thread(kernel_task, dlil_input_thread); + (void) kernel_thread(kernel_task, dlil_call_delayed_detach_thread); +} -int dlil_attach_protocol_filter(u_long dl_tag, - struct dlil_pr_flt_str *pr_filter, - u_long *filter_id, - int insertion_point) +int +dlil_attach_filter( + struct ifnet *ifp, + const struct iff_filter *if_filter, + interface_filter_t *filter_ref) { - struct dlil_filterq_entry *tmp_ptr, *pr_filt; - int s; int retval = 0; - boolean_t funnel_state; + struct ifnet_filter *filter; - if (dl_tag >= dl_tag_nb || dl_tag_array[dl_tag].ifp == 0) - return (ENOENT); - - MALLOC(tmp_ptr, struct dlil_filterq_entry *, sizeof(*tmp_ptr), M_NKE, M_WAITOK); - if (tmp_ptr == NULL) - return (ENOBUFS); - - bcopy((caddr_t) pr_filter, (caddr_t) &tmp_ptr->variants.pr_filter, - sizeof(struct dlil_pr_flt_str)); - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); + MALLOC(filter, struct ifnet_filter *, sizeof(*filter), M_NKE, M_WAITOK); + if (filter == NULL) + return ENOMEM; + bzero(filter, sizeof(*filter)); - *filter_id = get_new_filter_id(); - if (*filter_id == 0) { - FREE(tmp_ptr, M_NKE); - retval = ENOMEM; - goto end; - } - dlil_filters[*filter_id].filter_ptr = tmp_ptr; - dlil_filters[*filter_id].head = dl_tag_array[dl_tag].pr_flt_head; - dlil_filters[*filter_id].type = DLIL_PR_FILTER; - dlil_filters[*filter_id].proto = dl_tag_array[dl_tag].proto; - dlil_filters[*filter_id].ifp = dl_tag_array[dl_tag].ifp; - tmp_ptr->filter_id = *filter_id; - tmp_ptr->type = DLIL_PR_FILTER; - - if (insertion_point != DLIL_LAST_FILTER) { - TAILQ_FOREACH(pr_filt, dl_tag_array[dl_tag].pr_flt_head, que) - if (insertion_point == pr_filt->filter_id) { - TAILQ_INSERT_BEFORE(pr_filt, tmp_ptr, que); - break; - } - } - else - TAILQ_INSERT_TAIL(dl_tag_array[dl_tag].pr_flt_head, tmp_ptr, que); - -end: - splx(s); - thread_funnel_set(network_flock, funnel_state); - return retval; + filter->filt_ifp = ifp; + filter->filt_cookie = if_filter->iff_cookie; + filter->filt_name = if_filter->iff_name; + filter->filt_protocol = if_filter->iff_protocol; + filter->filt_input = if_filter->iff_input; + filter->filt_output = if_filter->iff_output; + filter->filt_event = if_filter->iff_event; + filter->filt_ioctl = if_filter->iff_ioctl; + filter->filt_detached = if_filter->iff_detached; + + if ((retval = dlil_write_begin()) != 0) { + /* Failed to acquire the write lock */ + FREE(filter, M_NKE); + return retval; + } + TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next); + dlil_write_end(); + *filter_ref = filter; + return retval; } - -int -dlil_detach_filter(u_long filter_id) +static int +dlil_detach_filter_internal(interface_filter_t filter, int detached) { - struct dlil_filter_id_str *flt; - int s, retval = 0; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - - if (filter_id >= dlil_filters_nb || dlil_filters[filter_id].type == 0) { - retval = ENOENT; - goto end; - } - - flt = &dlil_filters[filter_id]; - - if (flt->type == DLIL_IF_FILTER) { - if (IFILT(flt->filter_ptr).filter_detach) - (*IFILT(flt->filter_ptr).filter_detach)(IFILT(flt->filter_ptr).cookie); - } - else { - if (flt->type == DLIL_PR_FILTER) { - if (PFILT(flt->filter_ptr).filter_detach) - (*PFILT(flt->filter_ptr).filter_detach)(PFILT(flt->filter_ptr).cookie); + int retval = 0; + + + /* Take the write lock */ +#if DLIL_ALWAYS_DELAY_DETACH + retval = EDEADLK; +#else + if (detached == 0 && (retval = dlil_write_begin()) != 0) +#endif + { + if (retval == EDEADLK) { + /* Perform a delayed detach */ + filter->filt_detaching = 1; + dlil_detach_waiting = 1; + wakeup(&dlil_detach_waiting); + retval = 0; + } + return retval; } - } - - TAILQ_REMOVE(flt->head, flt->filter_ptr, que); - FREE(flt->filter_ptr, M_NKE); - flt->type = 0; + + if (detached == 0) + TAILQ_REMOVE(&filter->filt_ifp->if_flt_head, filter, filt_next); + + /* release the write lock */ + if (detached == 0) + dlil_write_end(); + + if (filter->filt_detached) + filter->filt_detached(filter->filt_cookie, filter->filt_ifp); -end: - splx(s); - thread_funnel_set(network_flock, funnel_state); - return retval; + FREE(filter, M_NKE); + + return retval; } void -dlil_input_thread_continue(void) -{ - while (1) { - struct mbuf *m, *m_loop; - - usimple_lock(&dlil_input_lock); - m = dlil_input_mbuf_head; - dlil_input_mbuf_head = NULL; - dlil_input_mbuf_tail = NULL; - m_loop = dlil_input_loop_head; - dlil_input_loop_head = NULL; - dlil_input_loop_tail = NULL; - usimple_unlock(&dlil_input_lock); - - /* - * NOTE warning %%% attention !!!! - * We should think about putting some thread starvation safeguards if - * we deal with long chains of packets. - */ - while (m) { - struct mbuf *m0 = m->m_nextpkt; - void *header = m->m_pkthdr.header; - - m->m_nextpkt = NULL; - m->m_pkthdr.header = NULL; - (void) dlil_input_packet(m->m_pkthdr.rcvif, m, header); - m = m0; - } - m = m_loop; - while (m) { - struct mbuf *m0 = m->m_nextpkt; - void *header = m->m_pkthdr.header; - struct ifnet *ifp = &loif[0]; - - m->m_nextpkt = NULL; - m->m_pkthdr.header = NULL; - (void) dlil_input_packet(ifp, m, header); - m = m0; - } +dlil_detach_filter(interface_filter_t filter) +{ + dlil_detach_filter_internal(filter, 0); +} - if (netisr != 0) - run_netisr(); +static void +dlil_input_thread_continue( + __unused void* foo, + __unused wait_result_t wait) +{ + while (1) { + struct mbuf *m, *m_loop; + + lck_spin_lock(dlil_input_lock); + m = dlil_input_mbuf_head; + dlil_input_mbuf_head = NULL; + dlil_input_mbuf_tail = NULL; + m_loop = dlil_input_loop_head; + dlil_input_loop_head = NULL; + dlil_input_loop_tail = NULL; + lck_spin_unlock(dlil_input_lock); + + /* + * NOTE warning %%% attention !!!! + * We should think about putting some thread starvation safeguards if + * we deal with long chains of packets. + */ + while (m) { + struct mbuf *m0 = m->m_nextpkt; + void *header = m->m_pkthdr.header; + + m->m_nextpkt = NULL; + m->m_pkthdr.header = NULL; + (void) dlil_input_packet(m->m_pkthdr.rcvif, m, header); + m = m0; + } + m = m_loop; + while (m) { + struct mbuf *m0 = m->m_nextpkt; + void *header = m->m_pkthdr.header; + struct ifnet *ifp = &loif[0]; + + m->m_nextpkt = NULL; + m->m_pkthdr.header = NULL; + (void) dlil_input_packet(ifp, m, header); + m = m0; + } + + proto_input_run(); - if (dlil_input_mbuf_head == NULL && - dlil_input_loop_head == NULL && - netisr == 0) { - assert_wait(&dlil_input_thread_wakeup, THREAD_UNINT); - (void) thread_block(dlil_input_thread_continue); - /* NOTREACHED */ - } - } + if (dlil_input_mbuf_head == NULL && + dlil_input_loop_head == NULL) { + assert_wait(&dlil_input_thread_wakeup, THREAD_UNINT); + (void) thread_block(dlil_input_thread_continue); + /* NOTREACHED */ + } + } } void dlil_input_thread(void) { - register thread_t self = current_act(); - - ml_thread_policy(self, MACHINE_GROUP, - (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); + register thread_t self = current_thread(); + + ml_thread_policy(self, MACHINE_GROUP, + (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); + + dlil_initialized = 1; + dlil_input_thread_ptr = current_thread(); + dlil_input_thread_continue(NULL, THREAD_RESTART); +} - /* The dlil thread is always funneled */ - thread_funnel_set(network_flock, TRUE); - dlil_initialized = 1; - dlil_input_thread_continue(); +int +dlil_input_with_stats( + struct ifnet *ifp, + struct mbuf *m_head, + struct mbuf *m_tail, + const struct ifnet_stat_increment_param *stats) +{ + /* WARNING + * Because of loopbacked multicast we cannot stuff the ifp in + * the rcvif of the packet header: loopback has its own dlil + * input queue + */ + + lck_spin_lock(dlil_input_lock); + if (ifp->if_type != IFT_LOOP) { + if (dlil_input_mbuf_head == NULL) + dlil_input_mbuf_head = m_head; + else if (dlil_input_mbuf_tail != NULL) + dlil_input_mbuf_tail->m_nextpkt = m_head; + dlil_input_mbuf_tail = m_tail ? m_tail : m_head; + } else { + if (dlil_input_loop_head == NULL) + dlil_input_loop_head = m_head; + else if (dlil_input_loop_tail != NULL) + dlil_input_loop_tail->m_nextpkt = m_head; + dlil_input_loop_tail = m_tail ? m_tail : m_head; + } + if (stats) { + ifp->if_data.ifi_ipackets += stats->packets_in; + ifp->if_data.ifi_ibytes += stats->bytes_in; + ifp->if_data.ifi_ierrors += stats->errors_in; + + ifp->if_data.ifi_opackets += stats->packets_out; + ifp->if_data.ifi_obytes += stats->bytes_out; + ifp->if_data.ifi_oerrors += stats->errors_out; + + ifp->if_data.ifi_collisions += stats->collisions; + ifp->if_data.ifi_iqdrops += stats->dropped; + } + lck_spin_unlock(dlil_input_lock); + + wakeup((caddr_t)&dlil_input_thread_wakeup); + + return 0; } int dlil_input(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail) -{ - /* WARNING - * Because of loopbacked multicast we cannot stuff the ifp in - * the rcvif of the packet header: loopback has its own dlil - * input queue - */ - - usimple_lock(&dlil_input_lock); - if (ifp->if_type != IFT_LOOP) { - if (dlil_input_mbuf_head == NULL) - dlil_input_mbuf_head = m_head; - else if (dlil_input_mbuf_tail != NULL) - dlil_input_mbuf_tail->m_nextpkt = m_head; - dlil_input_mbuf_tail = m_tail ? m_tail : m_head; - } else { - if (dlil_input_loop_head == NULL) - dlil_input_loop_head = m_head; - else if (dlil_input_loop_tail != NULL) - dlil_input_loop_tail->m_nextpkt = m_head; - dlil_input_loop_tail = m_tail ? m_tail : m_head; - } - usimple_unlock(&dlil_input_lock); - - wakeup((caddr_t)&dlil_input_thread_wakeup); - - return 0; +{ + return dlil_input_with_stats(ifp, m_head, m_tail, NULL); } int -dlil_input_packet(struct ifnet *ifp, struct mbuf *m, +dlil_input_packet(struct ifnet *ifp, struct mbuf *m, char *frame_header) { - struct ifnet *orig_ifp = 0; - struct dlil_filterq_entry *tmp; int retval; struct if_proto *ifproto = 0; - struct if_proto *proto; - struct dlil_filterq_head *fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; + protocol_family_t protocol_family; + struct ifnet_filter *filter; KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0); - /* - * Run interface filters - */ - - while (orig_ifp != ifp) { - orig_ifp = ifp; - - TAILQ_FOREACH_REVERSE(tmp, fhead, que, dlil_filterq_head) { - if (IFILT(tmp).filter_if_input) { - retval = (*IFILT(tmp).filter_if_input)(IFILT(tmp).cookie, - &ifp, - &m, - &frame_header); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } + /* + * Lock the interface while we run through + * the filters and the demux. This lock + * protects the filter list and the demux list. + */ + dlil_read_begin(); - if (ifp != orig_ifp) - break; - } - } + /* + * Call family demux module. If the demux module finds a match + * for the frame it will fill-in the ifproto pointer. + */ - ifp->if_lastchange = time; - - /* - * Call family demux module. If the demux module finds a match - * for the frame it will fill-in the ifproto pointer. - */ + retval = ifp->if_demux(ifp, m, frame_header, &protocol_family); + if (retval != 0) + protocol_family = 0; + if (retval == EJUSTRETURN) { + dlil_read_end(); + return 0; + } - retval = (*ifp->if_demux)(ifp, m, frame_header, &ifproto ); + /* DANGER!!! */ + if (m->m_flags & (M_BCAST|M_MCAST)) + ifp->if_imcasts++; - if (m->m_flags & (M_BCAST|M_MCAST)) - ifp->if_imcasts++; - - if ((retval) && (retval != EJUSTRETURN) && (ifp->offercnt)) { /* - * No match was found, look for any offers. + * Run interface filters */ - struct dlil_proto_head *tmp = (struct dlil_proto_head *) &ifp->proto_head; - TAILQ_FOREACH(proto, tmp, next) { - if ((proto->dl_offer) && (proto->dl_offer(m, frame_header) == 0)) { - ifproto = proto; - retval = 0; - break; - } + + /* Do not pass VLAN tagged packets to filters PR-3586856 */ + if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { + TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { + int filter_result; + if (filter->filt_input && (filter->filt_protocol == 0 || + filter->filt_protocol == protocol_family)) { + filter_result = filter->filt_input(filter->filt_cookie, ifp, protocol_family, &m, &frame_header); + + if (filter_result) { + dlil_read_end(); + if (filter_result == EJUSTRETURN) { + filter_result = 0; + } + else { + m_freem(m); + } + + return filter_result; + } + } + } } - } - if (retval) { - if (retval != EJUSTRETURN) { - m_freem(m); - return retval; + /* Demux is done, interface filters have been processed, unlock the mutex */ + if (retval || ((m->m_flags & M_PROMISC) != 0) ) { + dlil_read_end(); + if (retval != EJUSTRETURN) { + m_freem(m); + return retval; + } + else + return 0; } - else - return 0; - } - else + + ifproto = find_attached_proto(ifp, protocol_family); + if (ifproto == 0) { - printf("ERROR - dlil_input - if_demux didn't return an if_proto pointer\n"); - m_freem(m); - return 0; + dlil_read_end(); + DLIL_PRINTF("ERROR - dlil_input - if_demux didn't return an if_proto pointer\n"); + m_freem(m); + return 0; } + + /* + * Hand the packet off to the protocol. + */ -/* - * Call any attached protocol filters. - */ - - TAILQ_FOREACH_REVERSE(tmp, &ifproto->pr_flt_head, que, dlil_filterq_head) { - if (PFILT(tmp).filter_dl_input) { - retval = (*PFILT(tmp).filter_dl_input)(PFILT(tmp).cookie, - &m, - &frame_header, - &ifp); + if (ifproto->dl_domain && (ifproto->dl_domain->dom_flags & DOM_REENTRANT) == 0) { + lck_mtx_lock(ifproto->dl_domain->dom_mtx); + } - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } - } + if (ifproto->proto_kpi == kProtoKPI_DLIL) + retval = (*ifproto->kpi.dlil.dl_input)(m, frame_header, + ifp, ifproto->protocol_family, + TRUE); + else + retval = ifproto->kpi.v1.input(ifp, ifproto->protocol_family, m, frame_header); + if (ifproto->dl_domain && (ifproto->dl_domain->dom_flags & DOM_REENTRANT) == 0) { + lck_mtx_unlock(ifproto->dl_domain->dom_mtx); + } + dlil_read_end(); - retval = (*ifproto->dl_input)(m, frame_header, - ifp, ifproto->dl_tag, - TRUE); - - if (retval == EJUSTRETURN) - retval = 0; - else - if (retval) - m_freem(m); + if (retval == EJUSTRETURN) + retval = 0; + else + if (retval) + m_freem(m); - KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0); - return retval; + KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0); + return retval; } - - -void ether_input(ifp, eh, m) - struct ifnet *ifp; - struct ether_header *eh; - struct mbuf *m; - +static int +dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) { - kprintf("Someone is calling ether_input!!\n"); - - dlil_input(ifp, m, NULL); + struct ifnet_filter *filter; + + if (ifp_use(ifp, kIfNetUseCount_MustNotBeZero) == 0) { + dlil_read_begin(); + + /* Pass the event to the interface filters */ + TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { + if (filter->filt_event) + filter->filt_event(filter->filt_cookie, ifp, filter->filt_protocol, event); + } + + if (ifp->if_proto_hash) { + int i; + + for (i = 0; i < PROTO_HASH_SLOTS; i++) { + struct if_proto *proto; + + SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { + /* Pass the event to the protocol */ + if (proto->proto_kpi == kProtoKPI_DLIL) { + if (proto->kpi.dlil.dl_event) + proto->kpi.dlil.dl_event(ifp, event); + } + else { + if (proto->kpi.v1.event) + proto->kpi.v1.event(ifp, proto->protocol_family, event); + } + } + } + } + + dlil_read_end(); + + /* Pass the event to the interface */ + if (ifp->if_event) + ifp->if_event(ifp, event); + + if (ifp_unuse(ifp)) + ifp_use_reached_zero(ifp); + } + + return kev_post_msg(event); } - int dlil_event(struct ifnet *ifp, struct kern_event_msg *event) { - struct dlil_filterq_entry *filt; - int retval = 0; - struct ifnet *orig_ifp = 0; - struct if_proto *proto; - struct dlil_filterq_head *fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - struct kev_msg kev_msg; - struct dlil_proto_head *tmp = (struct dlil_proto_head *) &ifp->proto_head; - boolean_t funnel_state; - - - funnel_state = thread_funnel_set(network_flock, TRUE); - - while (orig_ifp != ifp) { - orig_ifp = ifp; - - TAILQ_FOREACH_REVERSE(filt, fhead, que, dlil_filterq_head) { - if (IFILT(filt).filter_if_event) { - retval = (*IFILT(filt).filter_if_event)(IFILT(filt).cookie, - &ifp, - &event); - - if (retval) { - (void) thread_funnel_set(network_flock, funnel_state); - if (retval == EJUSTRETURN) - return 0; - else - return retval; - } - } - - if (ifp != orig_ifp) - break; - } - } + int result = 0; + struct kev_msg kev_msg; - /* - * Call Interface Module event hook, if any. - */ + kev_msg.vendor_code = event->vendor_code; + kev_msg.kev_class = event->kev_class; + kev_msg.kev_subclass = event->kev_subclass; + kev_msg.event_code = event->event_code; + kev_msg.dv[0].data_ptr = &event->event_data[0]; + kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE; + kev_msg.dv[1].data_length = 0; + - if (ifp->if_event) { - retval = ifp->if_event(ifp, (caddr_t) event); + result = dlil_event_internal(ifp, &kev_msg); - if (retval) { - (void) thread_funnel_set(network_flock, funnel_state); - if (retval == EJUSTRETURN) - return 0; - else - return retval; - } - } + return result; +} - /* - * Call dl_event entry point for all protocols attached to this interface - */ +dlil_output_list( + struct ifnet* ifp, + u_long proto_family, + struct mbuf *packetlist, + caddr_t route, + const struct sockaddr *dest, + int raw) +{ + char *frame_type = 0; + char *dst_linkaddr = 0; + int error, retval = 0; + char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; + char dst_linkaddr_buffer[MAX_LINKADDR * 4]; + struct ifnet_filter *filter; + struct if_proto *proto = 0; + struct mbuf *m; + + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0); +#if BRIDGE + if ((raw != 0) || proto_family != PF_INET || do_brige) { +#else + if ((raw != 0) || proto_family != PF_INET) { +#endif + while (packetlist) { + m = packetlist; + packetlist = packetlist->m_nextpkt; + m->m_nextpkt = NULL; + error = dlil_output(ifp, proto_family, m, route, dest, raw); + if (error) { + if (packetlist) + m_freem_list(packetlist); + return (error); + } + } + return (0); + } + + dlil_read_begin(); + + frame_type = frame_type_buffer; + dst_linkaddr = dst_linkaddr_buffer; + m = packetlist; + packetlist = packetlist->m_nextpkt; + m->m_nextpkt = NULL; + + proto = find_attached_proto(ifp, proto_family); + if (proto == NULL) { + retval = ENXIO; + goto cleanup; + } - TAILQ_FOREACH(proto, tmp, next) { - /* - * Call any attached protocol filters. - */ + retval = 0; + if (proto->proto_kpi == kProtoKPI_DLIL) { + if (proto->kpi.dlil.dl_pre_output) + retval = proto->kpi.dlil.dl_pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr); + } + else { + if (proto->kpi.v1.pre_output) + retval = proto->kpi.v1.pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr); + } - TAILQ_FOREACH_REVERSE(filt, &proto->pr_flt_head, que, dlil_filterq_head) { - if (PFILT(filt).filter_dl_event) { - retval = (*PFILT(filt).filter_dl_event)(PFILT(filt).cookie, - event); - - if (retval) { - (void) thread_funnel_set(network_flock, funnel_state); - if (retval == EJUSTRETURN) - return 0; - else - return retval; - } - } - } - - - /* - * Finally, call the dl_event entry point (if any) - */ - - if (proto->dl_event) - retval = (*proto->dl_event)(event, proto->dl_tag); - - if (retval == EJUSTRETURN) { - (void) thread_funnel_set(network_flock, funnel_state); - return 0; - } - } - + if (retval) { + if (retval != EJUSTRETURN) { + m_freem(m); + } + goto cleanup; + } - /* - * Now, post this event to the Kernel Event message queue - */ + do { + + + if (ifp->if_framer) { + retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type); + if (retval) { + if (retval != EJUSTRETURN) { + m_freem(m); + } + goto cleanup; + } + } + + /* + * Let interface filters (if any) do their thing ... + */ + /* Do not pass VLAN tagged packets to filters PR-3586856 */ + if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { + TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { + if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) && + filter->filt_output) { + retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m); + if (retval) { + if (retval == EJUSTRETURN) + continue; + else { + m_freem(m); + } + goto cleanup; + } + } + } + } + /* + * Finally, call the driver. + */ + + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); + retval = ifp->if_output(ifp, m); + if (retval) { + printf("dlil_output_list: output error retval = %x\n", retval); + goto cleanup; + } + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); - kev_msg.vendor_code = event->vendor_code; - kev_msg.kev_class = event->kev_class; - kev_msg.kev_subclass = event->kev_subclass; - kev_msg.event_code = event->event_code; - kev_msg.dv[0].data_ptr = &event->event_data[0]; - kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE; - kev_msg.dv[1].data_length = 0; + m = packetlist; + if (m) { + packetlist = packetlist->m_nextpkt; + m->m_nextpkt = NULL; + } + } while (m); - kev_post_msg(&kev_msg); + + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0); - (void) thread_funnel_set(network_flock, funnel_state); - return 0; +cleanup: + dlil_read_end(); + if (packetlist) /* if any packet left, clean up */ + m_freem_list(packetlist); + if (retval == EJUSTRETURN) + retval = 0; + return retval; } - - -int -dlil_output(u_long dl_tag, - struct mbuf *m, - caddr_t route, - struct sockaddr *dest, - int raw - ) -{ - char *frame_type; - char *dst_linkaddr; - struct ifnet *orig_ifp = 0; - struct ifnet *ifp; - struct if_proto *proto; - struct dlil_filterq_entry *tmp; - int retval = 0; - char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; - char dst_linkaddr_buffer[MAX_LINKADDR * 4]; - struct dlil_filterq_head *fhead; - - KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0); - - if (dl_tag >= dl_tag_nb || dl_tag_array[dl_tag].ifp == 0) { - m_freem(m); - return ENOENT; - } - - ifp = dl_tag_array[dl_tag].ifp; - proto = dl_tag_array[dl_tag].proto; - - frame_type = frame_type_buffer; - dst_linkaddr = dst_linkaddr_buffer; - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - - if ((raw == 0) && (proto->dl_pre_output)) { - retval = (*proto->dl_pre_output)(ifp, &m, dest, route, - frame_type, dst_linkaddr, dl_tag); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } - /* - * Run any attached protocol filters. + * dlil_output + * + * Caller should have a lock on the protocol domain if the protocol + * doesn't support finer grained locking. In most cases, the lock + * will be held from the socket layer and won't be released until + * we return back to the socket layer. + * + * This does mean that we must take a protocol lock before we take + * an interface lock if we're going to take both. This makes sense + * because a protocol is likely to interact with an ifp while it + * is under the protocol lock. */ - - if (TAILQ_EMPTY(dl_tag_array[dl_tag].pr_flt_head) == 0) { - TAILQ_FOREACH(tmp, dl_tag_array[dl_tag].pr_flt_head, que) { - if (PFILT(tmp).filter_dl_output) { - retval = (*PFILT(tmp).filter_dl_output)(PFILT(tmp).cookie, - &m, &ifp, &dest, dst_linkaddr, frame_type); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { +int +dlil_output( + struct ifnet* ifp, + u_long proto_family, + struct mbuf *m, + caddr_t route, + const struct sockaddr *dest, + int raw) +{ + char *frame_type = 0; + char *dst_linkaddr = 0; + int retval = 0; + char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; + char dst_linkaddr_buffer[MAX_LINKADDR * 4]; + struct ifnet_filter *filter; + + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0); + + dlil_read_begin(); + + frame_type = frame_type_buffer; + dst_linkaddr = dst_linkaddr_buffer; + + if (raw == 0) { + struct if_proto *proto = 0; + + proto = find_attached_proto(ifp, proto_family); + if (proto == NULL) { m_freem(m); - return retval; - } + retval = ENXIO; + goto cleanup; + } + + retval = 0; + if (proto->proto_kpi == kProtoKPI_DLIL) { + if (proto->kpi.dlil.dl_pre_output) + retval = proto->kpi.dlil.dl_pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr); + } + else { + if (proto->kpi.v1.pre_output) + retval = proto->kpi.v1.pre_output(ifp, proto_family, &m, dest, route, frame_type, dst_linkaddr); + } + + if (retval) { + if (retval != EJUSTRETURN) { + m_freem(m); + } + goto cleanup; } - } } - } - - -/* - * Call framing module - */ - if ((raw == 0) && (ifp->if_framer)) { - retval = (*ifp->if_framer)(ifp, &m, dest, dst_linkaddr, frame_type); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else - { - m_freem(m); - return retval; - } + + /* + * Call framing module + */ + if ((raw == 0) && (ifp->if_framer)) { + retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, frame_type); + if (retval) { + if (retval != EJUSTRETURN) { + m_freem(m); + } + goto cleanup; + } } - } - -#if BRIDGE - if (do_bridge) { - struct mbuf *m0 = m ; - struct ether_header *eh = mtod(m, struct ether_header *); - if (m->m_pkthdr.rcvif) - m->m_pkthdr.rcvif = NULL ; - ifp = bridge_dst_lookup(eh); - bdg_forward(&m0, ifp); - if (m0) - m_freem(m0); - - return 0; - } +#if BRIDGE + /* !!!LOCKING!!! + * + * Need to consider how to handle this. + */ + broken-locking + if (do_bridge) { + struct mbuf *m0 = m; + struct ether_header *eh = mtod(m, struct ether_header *); + + if (m->m_pkthdr.rcvif) + m->m_pkthdr.rcvif = NULL; + ifp = bridge_dst_lookup(eh); + bdg_forward(&m0, ifp); + if (m0) + m_freem(m0); + + return 0; + } #endif - - -/* - * Let interface filters (if any) do their thing ... - */ - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - if (TAILQ_EMPTY(fhead) == 0) { - while (orig_ifp != ifp) { - orig_ifp = ifp; - TAILQ_FOREACH(tmp, fhead, que) { - if (IFILT(tmp).filter_if_output) { - retval = (*IFILT(tmp).filter_if_output)(IFILT(tmp).cookie, - &ifp, - &m); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; + + + /* + * Let interface filters (if any) do their thing ... + */ + + /* Do not pass VLAN tagged packets to filters PR-3586856 */ + if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { + TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { + if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_family)) && + filter->filt_output) { + retval = filter->filt_output(filter->filt_cookie, ifp, proto_family, &m); + if (retval) { + if (retval != EJUSTRETURN) + m_freem(m); + goto cleanup; + } } - } - } - - if (ifp != orig_ifp) - break; - } } - } - -/* - * Finally, call the driver. - */ - - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); - retval = (*ifp->if_output)(ifp, m); - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); - - KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0); + + /* + * Finally, call the driver. + */ + + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); + retval = ifp->if_output(ifp, m); + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); + + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0); - if ((retval == 0) || (retval == EJUSTRETURN)) - return 0; - else +cleanup: + dlil_read_end(); + if (retval == EJUSTRETURN) + retval = 0; return retval; } - int dlil_ioctl(u_long proto_fam, struct ifnet *ifp, u_long ioctl_code, caddr_t ioctl_arg) { - struct dlil_filterq_entry *tmp; - struct dlil_filterq_head *fhead; - int retval = EOPNOTSUPP; - int retval2 = EOPNOTSUPP; - u_long dl_tag; - struct if_family_str *if_family; + struct ifnet_filter *filter; + int retval = EOPNOTSUPP; + int result = 0; + struct if_family_str *if_family; + int holding_read = 0; + + /* Attempt to increment the use count. If it's zero, bail out, the ifp is invalid */ + result = ifp_use(ifp, kIfNetUseCount_MustNotBeZero); + if (result != 0) + return EOPNOTSUPP; + + dlil_read_begin(); + holding_read = 1; + + /* Run the interface filters first. + * We want to run all filters before calling the protocol, + * interface family, or interface. + */ + TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { + if ((filter->filt_protocol == 0 || (filter->filt_protocol == proto_fam)) && + filter->filt_ioctl != NULL) { + result = filter->filt_ioctl(filter->filt_cookie, ifp, proto_fam, ioctl_code, ioctl_arg); + /* Only update retval if no one has handled the ioctl */ + if (retval == EOPNOTSUPP || result == EJUSTRETURN) { + if (result == ENOTSUP) + result = EOPNOTSUPP; + retval = result; + if (retval && retval != EOPNOTSUPP) { + goto cleanup; + } + } + } + } + + /* Allow the protocol to handle the ioctl */ + if (proto_fam) { + struct if_proto *proto = find_attached_proto(ifp, proto_fam); + + if (proto != 0) { + result = EOPNOTSUPP; + if (proto->proto_kpi == kProtoKPI_DLIL) { + if (proto->kpi.dlil.dl_ioctl) + result = proto->kpi.dlil.dl_ioctl(proto_fam, ifp, ioctl_code, ioctl_arg); + } + else { + if (proto->kpi.v1.ioctl) + result = proto->kpi.v1.ioctl(ifp, proto_fam, ioctl_code, ioctl_arg); + } + + /* Only update retval if no one has handled the ioctl */ + if (retval == EOPNOTSUPP || result == EJUSTRETURN) { + if (result == ENOTSUP) + result = EOPNOTSUPP; + retval = result; + if (retval && retval != EOPNOTSUPP) { + goto cleanup; + } + } + } + } + + /* + * Since we have incremented the use count on the ifp, we are guaranteed + * that the ifp will not go away (the function pointers may not be changed). + * We release the dlil read lock so the interface ioctl may trigger a + * protocol attach. This happens with vlan and may occur with other virtual + * interfaces. + */ + dlil_read_end(); + holding_read = 0; + + /* retval is either 0 or EOPNOTSUPP */ + + /* + * Let the family handle this ioctl. + * If it returns something non-zero and not EOPNOTSUPP, we're done. + * If it returns zero, the ioctl was handled, so set retval to zero. + */ + if_family = find_family_module(ifp->if_family); + if ((if_family) && (if_family->ifmod_ioctl)) { + result = (*if_family->ifmod_ioctl)(ifp, ioctl_code, ioctl_arg); + + /* Only update retval if no one has handled the ioctl */ + if (retval == EOPNOTSUPP || result == EJUSTRETURN) { + if (result == ENOTSUP) + result = EOPNOTSUPP; + retval = result; + if (retval && retval != EOPNOTSUPP) { + goto cleanup; + } + } + } + + /* + * Let the interface handle this ioctl. + * If it returns EOPNOTSUPP, ignore that, we may have + * already handled this in the protocol or family. + */ + if (ifp->if_ioctl) + result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg); + + /* Only update retval if no one has handled the ioctl */ + if (retval == EOPNOTSUPP || result == EJUSTRETURN) { + if (result == ENOTSUP) + result = EOPNOTSUPP; + retval = result; + if (retval && retval != EOPNOTSUPP) { + goto cleanup; + } + } + +cleanup: + if (holding_read) + dlil_read_end(); + if (ifp_unuse(ifp)) + ifp_use_reached_zero(ifp); + if (retval == EJUSTRETURN) + retval = 0; + return retval; +} - if (proto_fam) { - if (dlil_find_dltag(ifp->if_family, ifp->if_unit, - proto_fam, &dl_tag) == 0) { - if (dl_tag_array[dl_tag].ifp != ifp) - return ENOENT; +__private_extern__ errno_t +dlil_set_bpf_tap( + ifnet_t ifp, + bpf_tap_mode mode, + bpf_packet_func callback) +{ + errno_t error = 0; -/* - * Run any attached protocol filters. - */ - TAILQ_FOREACH(tmp, dl_tag_array[dl_tag].pr_flt_head, que) { - if (PFILT(tmp).filter_dl_ioctl) { - retval = - (*PFILT(tmp).filter_dl_ioctl)(PFILT(tmp).cookie, - dl_tag_array[dl_tag].ifp, - ioctl_code, - ioctl_arg); - - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else - return retval; - } - } - } - - if (dl_tag_array[dl_tag].proto->dl_ioctl) - retval = - (*dl_tag_array[dl_tag].proto->dl_ioctl)(dl_tag, - dl_tag_array[dl_tag].ifp, - ioctl_code, - ioctl_arg); - else - retval = EOPNOTSUPP; - } - } - - if ((retval) && (retval != EOPNOTSUPP)) { - if (retval == EJUSTRETURN) - return 0; - else - return retval; - } - - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - TAILQ_FOREACH(tmp, fhead, que) { - if (IFILT(tmp).filter_if_ioctl) { - retval2 = (*IFILT(tmp).filter_if_ioctl)(IFILT(tmp).cookie, ifp, - ioctl_code, ioctl_arg); - if (retval2) { - if (retval2 == EJUSTRETURN) - return 0; - else - return retval2; - } - } - } - - - if_family = find_family_module(ifp->if_family); - if ((if_family) && (if_family->ifmod_ioctl)) { - retval2 = (*if_family->ifmod_ioctl)(ifp, ioctl_code, ioctl_arg); - - if ((retval2) && (retval2 != EOPNOTSUPP)) { - if (retval2 == EJUSTRETURN) - return 0; - else - return retval; - } - - if (retval == EOPNOTSUPP) - retval = retval2; - } - - if (ifp->if_ioctl) - retval2 = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg); - - if (retval == EOPNOTSUPP) - return retval2; - else { - if (retval2 == EOPNOTSUPP) - return 0; - else - return retval2; - } + dlil_read_begin(); + if (ifp->if_set_bpf_tap) + error = ifp->if_set_bpf_tap(ifp, mode, callback); + dlil_read_end(); + + return error; } - -int -dlil_attach_protocol(struct dlil_proto_reg_str *proto, - u_long *dl_tag) +__private_extern__ errno_t +dlil_resolve_multi( + struct ifnet *ifp, + const struct sockaddr *proto_addr, + struct sockaddr *ll_addr, + size_t ll_len) { - struct ifnet *ifp; - struct if_proto *ifproto; - u_long i; - struct if_family_str *if_family; - struct dlil_proto_head *tmp; - struct kev_dl_proto_data ev_pr_data; - int s, retval = 0; - boolean_t funnel_state; - u_char *p; - - if ((proto->protocol_family == 0) || (proto->interface_family == 0)) - return EINVAL; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - if_family = find_family_module(proto->interface_family); - if ((!if_family) || (if_family->flags & DLIL_SHUTDOWN)) { - kprintf("dlil_attach_protocol -- no interface family module %d", - proto->interface_family); - retval = ENOENT; - goto end; - } + errno_t result = EOPNOTSUPP; + struct if_proto *proto; + const struct sockaddr *verify; + + dlil_read_begin(); + + bzero(ll_addr, ll_len); + + /* Call the protocol first */ + proto = find_attached_proto(ifp, proto_addr->sa_family); + if (proto != NULL && proto->proto_kpi != kProtoKPI_DLIL && + proto->kpi.v1.resolve_multi != NULL) { + result = proto->kpi.v1.resolve_multi(ifp, proto_addr, + (struct sockaddr_dl*)ll_addr, ll_len); + } + + /* Let the interface verify the multicast address */ + if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) { + if (result == 0) + verify = ll_addr; + else + verify = proto_addr; + result = ifp->if_check_multi(ifp, verify); + } + + dlil_read_end(); + + return result; +} - ifp = ifbyfamily(proto->interface_family, proto->unit_number); - if (!ifp) { - kprintf("dlil_attach_protocol -- no such interface %d unit %d\n", - proto->interface_family, proto->unit_number); - retval = ENOENT; - goto end; - } +__private_extern__ errno_t +dlil_send_arp_internal( + ifnet_t ifp, + u_short arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto) +{ + struct if_proto *proto; + errno_t result = 0; + + dlil_read_begin(); + + proto = find_attached_proto(ifp, target_proto->sa_family); + if (proto == NULL || proto->proto_kpi == kProtoKPI_DLIL || + proto->kpi.v1.send_arp == NULL) { + result = ENOTSUP; + } + else { + result = proto->kpi.v1.send_arp(ifp, arpop, sender_hw, sender_proto, + target_hw, target_proto); + } + + dlil_read_end(); + + return result; +} - if (dlil_find_dltag(proto->interface_family, proto->unit_number, - proto->protocol_family, &i) == 0) { - retval = EEXIST; - goto end; - } +__private_extern__ errno_t +dlil_send_arp( + ifnet_t ifp, + u_short arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto) +{ + errno_t result = 0; + + if (target_proto == NULL || (sender_proto && + sender_proto->sa_family != target_proto->sa_family)) + return EINVAL; + + /* + * If this is an ARP request and the target IP is IPv4LL, + * send the request on all interfaces. + */ + if (IN_LINKLOCAL(((const struct sockaddr_in*)target_proto)->sin_addr.s_addr) + && ipv4_ll_arp_aware != 0 && target_proto->sa_family == AF_INET && + arpop == ARPOP_REQUEST) { + ifnet_t *ifp_list; + u_int32_t count; + u_int32_t ifp_on; + + result = ENOTSUP; + + if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) { + for (ifp_on = 0; ifp_on < count; ifp_on++) { + errno_t new_result; + ifaddr_t source_hw = NULL; + ifaddr_t source_ip = NULL; + struct sockaddr_in source_ip_copy; + + /* + * Only arp on interfaces marked for IPv4LL ARPing. This may + * mean that we don't ARP on the interface the subnet route + * points to. + */ + if ((ifp_list[ifp_on]->if_eflags & IFEF_ARPLL) == 0) { + continue; + } + + source_hw = TAILQ_FIRST(&ifp_list[ifp_on]->if_addrhead); + + /* Find the source IP address */ + ifnet_lock_shared(ifp_list[ifp_on]); + TAILQ_FOREACH(source_ip, &ifp_list[ifp_on]->if_addrhead, + ifa_link) { + if (source_ip->ifa_addr && + source_ip->ifa_addr->sa_family == AF_INET) { + break; + } + } + + /* No IP Source, don't arp */ + if (source_ip == NULL) { + ifnet_lock_done(ifp_list[ifp_on]); + continue; + } + + /* Copy the source IP address */ + source_ip_copy = *(struct sockaddr_in*)source_ip->ifa_addr; + + ifnet_lock_done(ifp_list[ifp_on]); + + /* Send the ARP */ + new_result = dlil_send_arp_internal(ifp_list[ifp_on], arpop, + (struct sockaddr_dl*)source_hw->ifa_addr, + (struct sockaddr*)&source_ip_copy, NULL, + target_proto); + + if (result == ENOTSUP) { + result = new_result; + } + } + } + + ifnet_list_free(ifp_list); + } + else { + result = dlil_send_arp_internal(ifp, arpop, sender_hw, sender_proto, + target_hw, target_proto); + } + + return result; +} - for (i=1; i < dl_tag_nb; i++) - if (dl_tag_array[i].ifp == 0) - break; +static int +ifp_use( + struct ifnet *ifp, + int handle_zero) +{ + int old_value; + int retval = 0; + + do { + old_value = ifp->if_usecnt; + if (old_value == 0 && handle_zero == kIfNetUseCount_MustNotBeZero) { + retval = ENXIO; // ifp is invalid + break; + } + } while (!OSCompareAndSwap((UInt32)old_value, (UInt32)old_value + 1, (UInt32*)&ifp->if_usecnt)); + + return retval; +} - if (i == dl_tag_nb) { - // expand the tag array by MAX_DL_TAGS - MALLOC(p, u_char *, sizeof(struct dl_tag_str) * (dl_tag_nb + MAX_DL_TAGS), M_NKE, M_WAITOK); - if (p == 0) { - retval = ENOBUFS; - goto end; - } - bcopy(dl_tag_array, p, sizeof(struct dl_tag_str) * dl_tag_nb); - bzero(p + sizeof(struct dl_tag_str) * dl_tag_nb, sizeof(struct dl_tag_str) * MAX_DL_TAGS); - dl_tag_nb += MAX_DL_TAGS; - FREE(dl_tag_array, M_NKE); - dl_tag_array = (struct dl_tag_str *)p; - } - - /* - * Allocate and init a new if_proto structure - */ +/* ifp_unuse is broken into two pieces. + * + * ifp_use and ifp_unuse must be called between when the caller calls + * dlil_write_begin and dlil_write_end. ifp_unuse needs to perform some + * operations after dlil_write_end has been called. For this reason, + * anyone calling ifp_unuse must call ifp_use_reached_zero if ifp_unuse + * returns a non-zero value. The caller must call ifp_use_reached_zero + * after the caller has called dlil_write_end. + */ +static void +ifp_use_reached_zero( + struct ifnet *ifp) +{ + struct if_family_str *if_family; + ifnet_detached_func free_func; + + dlil_read_begin(); + + if (ifp->if_usecnt != 0) + panic("ifp_use_reached_zero: ifp->if_usecnt != 0"); + + /* Let BPF know we're detaching */ + bpfdetach(ifp); + + ifnet_head_lock_exclusive(); + ifnet_lock_exclusive(ifp); + + /* Remove ourselves from the list */ + TAILQ_REMOVE(&ifnet_head, ifp, if_link); + ifnet_addrs[ifp->if_index - 1] = 0; + + /* ifp should be removed from the interface list */ + while (ifp->if_multiaddrs.lh_first) { + struct ifmultiaddr *ifma = ifp->if_multiaddrs.lh_first; + + /* + * When the interface is gone, we will no longer + * be listening on these multicasts. Various bits + * of the stack may be referencing these multicasts, + * release only our reference. + */ + LIST_REMOVE(ifma, ifma_link); + ifma->ifma_ifp = NULL; + ifma_release(ifma); + } + ifnet_head_done(); + + ifp->if_eflags &= ~IFEF_DETACHING; // clear the detaching flag + ifnet_lock_done(ifp); - ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK); - if (!ifproto) { - printf("ERROR - DLIL failed if_proto allocation\n"); - retval = ENOMEM; - goto end; - } - - bzero(ifproto, sizeof(struct if_proto)); + if_family = find_family_module(ifp->if_family); + if (if_family && if_family->del_if) + if_family->del_if(ifp); +#if 0 + if (--if_family->if_usecnt == 0) { + if (if_family->shutdown) + (*if_family->shutdown)(); + + TAILQ_REMOVE(&if_family_head, if_family, if_fam_next); + FREE(if_family, M_IFADDR); + } +#endif - dl_tag_array[i].ifp = ifp; - dl_tag_array[i].proto = ifproto; - dl_tag_array[i].pr_flt_head = &ifproto->pr_flt_head; - ifproto->dl_tag = i; - *dl_tag = i; + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, 0, 0); + free_func = ifp->if_free; + dlil_read_end(); + + if (free_func) + free_func(ifp); +} - if (proto->default_proto) { - if (ifp->if_data.default_proto == 0) - ifp->if_data.default_proto = i; - else - printf("ERROR - dlil_attach_protocol -- Attempt to attach more than one default protocol\n"); - } +static int +ifp_unuse( + struct ifnet *ifp) +{ + int oldval; + oldval = OSDecrementAtomic((UInt32*)&ifp->if_usecnt); + if (oldval == 0) + panic("ifp_unuse: ifp(%s%n)->if_usecnt was zero\n", ifp->if_name, ifp->if_unit); + + if (oldval > 1) + return 0; + + if ((ifp->if_eflags & IFEF_DETACHING) == 0) + panic("ifp_unuse: use count reached zero but detching flag is not set!"); + + return 1; /* caller must call ifp_use_reached_zero */ +} - ifproto->protocol_family = proto->protocol_family; - ifproto->dl_input = proto->input; - ifproto->dl_pre_output = proto->pre_output; - ifproto->dl_event = proto->event; - ifproto->dl_offer = proto->offer; - ifproto->dl_ioctl = proto->ioctl; - ifproto->ifp = ifp; - TAILQ_INIT(&ifproto->pr_flt_head); +void +ifp_reference( + struct ifnet *ifp) +{ + int oldval; + oldval = OSIncrementAtomic(&ifp->if_refcnt); +} - /* - * Call family module add_proto routine so it can refine the - * demux descriptors as it wishes. - */ - retval = (*if_family->add_proto)(&proto->demux_desc_head, ifproto, *dl_tag); - if (retval) { - dl_tag_array[i].ifp = 0; - FREE(ifproto, M_IFADDR); - goto end; - } +void +ifp_release( + struct ifnet *ifp) +{ + int oldval; + oldval = OSDecrementAtomic((UInt32*)&ifp->if_refcnt); + if (oldval == 0) + panic("dlil_if_reference - refcount decremented past zero!"); +} - /* - * Add to if_proto list for this interface - */ +extern lck_mtx_t *domain_proto_mtx; - tmp = (struct dlil_proto_head *) &ifp->proto_head; - TAILQ_INSERT_TAIL(tmp, ifproto, next); - ifp->refcnt++; - if (ifproto->dl_offer) - ifp->offercnt++; +static int +dlil_attach_protocol_internal( + struct if_proto *proto, + const struct ddesc_head_str *demux, + const struct ifnet_demux_desc *demux_list, + u_int32_t demux_count) +{ + struct ddesc_head_str temp_head; + struct kev_dl_proto_data ev_pr_data; + struct ifnet *ifp = proto->ifp; + int retval = 0; + u_long hash_value = proto_hash_value(proto->protocol_family); + int if_using_kpi = (ifp->if_eflags & IFEF_USEKPI) != 0; + void* free_me = NULL; + + /* setup some of the common values */ + + { + lck_mtx_lock(domain_proto_mtx); + struct domain *dp = domains; + while (dp && (protocol_family_t)dp->dom_family != proto->protocol_family) + dp = dp->dom_next; + proto->dl_domain = dp; + lck_mtx_unlock(domain_proto_mtx); + } + + /* + * Convert the demux descriptors to a type the interface + * will understand. Checking e_flags should be safe, this + * flag won't change. + */ + if (if_using_kpi && demux) { + /* Convert the demux linked list to a demux_list */ + struct dlil_demux_desc *demux_entry; + struct ifnet_demux_desc *temp_list = NULL; + u_int32_t i = 0; + + TAILQ_FOREACH(demux_entry, demux, next) { + i++; + } + + temp_list = _MALLOC(sizeof(struct ifnet_demux_desc) * i, M_TEMP, M_WAITOK); + free_me = temp_list; + + if (temp_list == NULL) + return ENOMEM; + + i = 0; + TAILQ_FOREACH(demux_entry, demux, next) { + /* dlil_demux_desc types 1, 2, and 3 are obsolete and can not be translated */ + if (demux_entry->type == 1 || + demux_entry->type == 2 || + demux_entry->type == 3) { + FREE(free_me, M_TEMP); + return ENOTSUP; + } + + temp_list[i].type = demux_entry->type; + temp_list[i].data = demux_entry->native_type; + temp_list[i].datalen = demux_entry->variants.native_type_length; + i++; + } + demux_count = i; + demux_list = temp_list; + } + else if (!if_using_kpi && demux_list != NULL) { + struct dlil_demux_desc *demux_entry; + u_int32_t i = 0; + + demux_entry = _MALLOC(sizeof(struct dlil_demux_desc) * demux_count, M_TEMP, M_WAITOK); + free_me = demux_entry; + if (demux_entry == NULL) + return ENOMEM; + + TAILQ_INIT(&temp_head); + + for (i = 0; i < demux_count; i++) { + demux_entry[i].type = demux_list[i].type; + demux_entry[i].native_type = demux_list[i].data; + demux_entry[i].variants.native_type_length = demux_list[i].datalen; + TAILQ_INSERT_TAIL(&temp_head, &demux_entry[i], next); + } + demux = &temp_head; + } + + /* + * Take the write lock to protect readers and exclude other writers. + */ + dlil_write_begin(); + + /* Check that the interface isn't currently detaching */ + ifnet_lock_shared(ifp); + if ((ifp->if_eflags & IFEF_DETACHING) != 0) { + ifnet_lock_done(ifp); + dlil_write_end(); + if (free_me) + FREE(free_me, M_TEMP); + return ENXIO; + } + ifnet_lock_done(ifp); + + if (find_attached_proto(ifp, proto->protocol_family) != NULL) { + dlil_write_end(); + if (free_me) + FREE(free_me, M_TEMP); + return EEXIST; + } + + /* + * Call family module add_proto routine so it can refine the + * demux descriptors as it wishes. + */ + if (if_using_kpi) + retval = ifp->if_add_proto_u.kpi(ifp, proto->protocol_family, demux_list, demux_count); + else { + retval = ifp->if_add_proto_u.original(ifp, proto->protocol_family, + _cast_non_const(demux)); + } + if (retval) { + dlil_write_end(); + if (free_me) + FREE(free_me, M_TEMP); + return retval; + } + + /* + * We can't fail from this point on. + * Increment the number of uses (protocol attachments + interface attached). + */ + ifp_use(ifp, kIfNetUseCount_MustNotBeZero); + + /* + * Insert the protocol in the hash + */ + { + struct if_proto* prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]); + while (prev_proto && SLIST_NEXT(prev_proto, next_hash) != NULL) + prev_proto = SLIST_NEXT(prev_proto, next_hash); + if (prev_proto) + SLIST_INSERT_AFTER(prev_proto, proto, next_hash); + else + SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], proto, next_hash); + } - /* the reserved field carries the number of protocol still attached (subject to change) */ - ev_pr_data.proto_family = proto->protocol_family; - ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, - (struct net_event_data *)&ev_pr_data, - sizeof(struct kev_dl_proto_data)); + /* + * Add to if_proto list for this interface + */ + if_proto_ref(proto); + if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer) + ifp->offercnt++; + dlil_write_end(); + + /* the reserved field carries the number of protocol still attached (subject to change) */ + ev_pr_data.proto_family = proto->protocol_family; + ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, + (struct net_event_data *)&ev_pr_data, + sizeof(struct kev_dl_proto_data)); + + DLIL_PRINTF("Attached protocol %d to %s%d - %d\n", proto->protocol_family, + ifp->if_name, ifp->if_unit, retval); + if (free_me) + FREE(free_me, M_TEMP); + return retval; +} +__private_extern__ int +dlil_attach_protocol_kpi(ifnet_t ifp, protocol_family_t protocol, + const struct ifnet_attach_proto_param *proto_details) +{ + int retval = 0; + struct if_proto *ifproto = NULL; + + ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK); + if (ifproto == 0) { + DLIL_PRINTF("ERROR - DLIL failed if_proto allocation\n"); + retval = ENOMEM; + goto end; + } + bzero(ifproto, sizeof(*ifproto)); + + ifproto->ifp = ifp; + ifproto->protocol_family = protocol; + ifproto->proto_kpi = kProtoKPI_v1; + ifproto->kpi.v1.input = proto_details->input; + ifproto->kpi.v1.pre_output = proto_details->pre_output; + ifproto->kpi.v1.event = proto_details->event; + ifproto->kpi.v1.ioctl = proto_details->ioctl; + ifproto->kpi.v1.detached = proto_details->detached; + ifproto->kpi.v1.resolve_multi = proto_details->resolve; + ifproto->kpi.v1.send_arp = proto_details->send_arp; + + retval = dlil_attach_protocol_internal(ifproto, NULL, + proto_details->demux_list, proto_details->demux_count); + end: - splx(s); - thread_funnel_set(network_flock, funnel_state); - return retval; + if (retval && ifproto) + FREE(ifproto, M_IFADDR); + return retval; } +int +dlil_attach_protocol(struct dlil_proto_reg_str *proto) +{ + struct ifnet *ifp = NULL; + struct if_proto *ifproto = NULL; + int retval = 0; + /* + * Do everything we can before taking the write lock + */ + + if ((proto->protocol_family == 0) || (proto->interface_family == 0)) + return EINVAL; -int -dlil_detach_protocol(u_long dl_tag) -{ - struct ifnet *ifp; - struct ifnet *orig_ifp=0; - struct if_proto *proto; - struct dlil_proto_head *tmp; - struct if_family_str *if_family; - struct dlil_filterq_entry *filter; - int s, retval = 0; - struct dlil_filterq_head *fhead; - struct kev_dl_proto_data ev_pr_data; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - - if (dl_tag >= dl_tag_nb || dl_tag_array[dl_tag].ifp == 0) { - retval = ENOENT; - goto end; - } + /* + * Allocate and init a new if_proto structure + */ + ifproto = _MALLOC(sizeof(struct if_proto), M_IFADDR, M_WAITOK); + if (!ifproto) { + DLIL_PRINTF("ERROR - DLIL failed if_proto allocation\n"); + retval = ENOMEM; + goto end; + } + - ifp = dl_tag_array[dl_tag].ifp; - proto = dl_tag_array[dl_tag].proto; + /* ifbyfamily returns us an ifp with an incremented if_usecnt */ + ifp = ifbyfamily(proto->interface_family, proto->unit_number); + if (!ifp) { + DLIL_PRINTF("dlil_attach_protocol -- no such interface %d unit %d\n", + proto->interface_family, proto->unit_number); + retval = ENXIO; + goto end; + } - if_family = find_family_module(ifp->if_family); - if (if_family == NULL) { - retval = ENOENT; - goto end; - } + bzero(ifproto, sizeof(struct if_proto)); + + ifproto->ifp = ifp; + ifproto->protocol_family = proto->protocol_family; + ifproto->proto_kpi = kProtoKPI_DLIL; + ifproto->kpi.dlil.dl_input = proto->input; + ifproto->kpi.dlil.dl_pre_output = proto->pre_output; + ifproto->kpi.dlil.dl_event = proto->event; + ifproto->kpi.dlil.dl_offer = proto->offer; + ifproto->kpi.dlil.dl_ioctl = proto->ioctl; + ifproto->kpi.dlil.dl_detached = proto->detached; + + retval = dlil_attach_protocol_internal(ifproto, &proto->demux_desc_head, NULL, 0); + +end: + if (retval && ifproto) + FREE(ifproto, M_IFADDR); + return retval; +} - tmp = (struct dlil_proto_head *) &ifp->proto_head; +extern void if_rtproto_del(struct ifnet *ifp, int protocol); - /* - * Call family module del_proto - */ +static int +dlil_detach_protocol_internal( + struct if_proto *proto) +{ + struct ifnet *ifp = proto->ifp; + u_long proto_family = proto->protocol_family; + struct kev_dl_proto_data ev_pr_data; + + if (proto->proto_kpi == kProtoKPI_DLIL) { + if (proto->kpi.dlil.dl_detached) + proto->kpi.dlil.dl_detached(proto->protocol_family, ifp); + } + else { + if (proto->kpi.v1.detached) + proto->kpi.v1.detached(ifp, proto->protocol_family); + } + if_proto_free(proto); + + /* + * Cleanup routes that may still be in the routing table for that interface/protocol pair. + */ + + if_rtproto_del(ifp, proto_family); + + /* the reserved field carries the number of protocol still attached (subject to change) */ + ev_pr_data.proto_family = proto_family; + ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, + (struct net_event_data *)&ev_pr_data, + sizeof(struct kev_dl_proto_data)); + return 0; +} - (*if_family->del_proto)(proto, dl_tag); +int +dlil_detach_protocol(struct ifnet *ifp, u_long proto_family) +{ + struct if_proto *proto = NULL; + int retval = 0; + int use_reached_zero = 0; + +#if DLIL_ALWAYS_DELAY_DETACH + { + retval = EDEADLK; +#else + if ((retval = dlil_write_begin()) != 0) { +#endif + if (retval == EDEADLK) { + retval = 0; + dlil_read_begin(); + proto = find_attached_proto(ifp, proto_family); + if (proto == 0) { + retval = ENXIO; + } + else { + proto->detaching = 1; + dlil_detach_waiting = 1; + wakeup(&dlil_detach_waiting); + } + dlil_read_end(); + } + goto end; + } + + proto = find_attached_proto(ifp, proto_family); + + if (proto == NULL) { + retval = ENXIO; + dlil_write_end(); + goto end; + } + + /* + * Call family module del_proto + */ + + if (ifp->if_del_proto) + ifp->if_del_proto(ifp, proto->protocol_family); - /* - * Remove and deallocate any attached protocol filters - */ + if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer) + ifp->offercnt--; - while (filter = TAILQ_FIRST(&proto->pr_flt_head)) - dlil_detach_filter(filter->filter_id); - - if (proto->dl_offer) - ifp->offercnt--; + SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], proto, if_proto, next_hash); + + /* + * We can do the rest of the work outside of the write lock. + */ + use_reached_zero = ifp_unuse(ifp); + dlil_write_end(); + + dlil_detach_protocol_internal(proto); + + /* + * Only handle the case where the interface will go away after + * we've sent the message. This way post message can send the + * message to the interface safely. + */ + + if (use_reached_zero) + ifp_use_reached_zero(ifp); + +end: + return retval; +} - if (ifp->if_data.default_proto == dl_tag) - ifp->if_data.default_proto = 0; - dl_tag_array[dl_tag].ifp = 0; +/* + * dlil_delayed_detach_thread is responsible for detaching + * protocols, protocol filters, and interface filters after + * an attempt was made to detach one of those items while + * it was not safe to do so (i.e. called dlil_read_begin). + * + * This function will take the dlil write lock and walk + * through each of the interfaces looking for items with + * the detaching flag set. When an item is found, it is + * detached from the interface and placed on a local list. + * After all of the items have been collected, we drop the + * write lock and performed the post detach. This is done + * so we only have to take the write lock once. + * + * When detaching a protocol filter, if we find that we + * have detached the very last protocol and we need to call + * ifp_use_reached_zero, we have to break out of our work + * to drop the write lock so we can call ifp_use_reached_zero. + */ + +static void +dlil_delayed_detach_thread(__unused void* foo, __unused wait_result_t wait) +{ + thread_t self = current_thread(); + int asserted = 0; - /* the reserved field carries the number of protocol still attached (subject to change) */ - ev_pr_data.proto_family = proto->protocol_family; + ml_thread_policy(self, MACHINE_GROUP, + (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); - /* - * Cleanup routes that may still be in the routing table for that interface/protocol pair. - */ + + while (1) { + if (dlil_detach_waiting != 0 && dlil_write_begin() == 0) { + struct ifnet *ifp; + struct proto_hash_entry detached_protos; + struct ifnet_filter_head detached_filters; + struct if_proto *proto; + struct if_proto *next_proto; + struct ifnet_filter *filt; + struct ifnet_filter *next_filt; + int reached_zero; + + reached_zero = 0; + + /* Clear the detach waiting flag */ + dlil_detach_waiting = 0; + TAILQ_INIT(&detached_filters); + SLIST_INIT(&detached_protos); + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + int i; + + // Look for protocols and protocol filters + for (i = 0; i < PROTO_HASH_SLOTS && !reached_zero; i++) { + struct if_proto **prev_nextptr = &SLIST_FIRST(&ifp->if_proto_hash[i]); + for (proto = *prev_nextptr; proto; proto = *prev_nextptr) { + + // Detach this protocol + if (proto->detaching) { + if (ifp->if_del_proto) + ifp->if_del_proto(ifp, proto->protocol_family); + if (proto->proto_kpi == kProtoKPI_DLIL && proto->kpi.dlil.dl_offer) + ifp->offercnt--; + *prev_nextptr = SLIST_NEXT(proto, next_hash); + SLIST_INSERT_HEAD(&detached_protos, proto, next_hash); + reached_zero = ifp_unuse(ifp); + if (reached_zero) { + break; + } + } + else { + // Update prev_nextptr to point to our next ptr + prev_nextptr = &SLIST_NEXT(proto, next_hash); + } + } + } + + // look for interface filters that need to be detached + for (filt = TAILQ_FIRST(&ifp->if_flt_head); filt; filt = next_filt) { + next_filt = TAILQ_NEXT(filt, filt_next); + if (filt->filt_detaching != 0) { + // take this interface filter off the interface filter list + TAILQ_REMOVE(&ifp->if_flt_head, filt, filt_next); + + // put this interface filter on the detached filters list + TAILQ_INSERT_TAIL(&detached_filters, filt, filt_next); + } + } + + if (ifp->if_delayed_detach) { + ifp->if_delayed_detach = 0; + reached_zero = ifp_unuse(ifp); + } + + if (reached_zero) + break; + } + ifnet_head_done(); + dlil_write_end(); + + for (filt = TAILQ_FIRST(&detached_filters); filt; filt = next_filt) { + next_filt = TAILQ_NEXT(filt, filt_next); + /* + * dlil_detach_filter_internal won't remove an item from + * the list if it is already detached (second parameter). + * The item will be freed though. + */ + dlil_detach_filter_internal(filt, 1); + } + + for (proto = SLIST_FIRST(&detached_protos); proto; proto = next_proto) { + next_proto = SLIST_NEXT(proto, next_hash); + dlil_detach_protocol_internal(proto); + } + + if (reached_zero) { + ifp_use_reached_zero(ifp); + dlil_detach_waiting = 1; // we may have missed something + } + } + + if (!asserted && dlil_detach_waiting == 0) { + asserted = 1; + assert_wait(&dlil_detach_waiting, THREAD_UNINT); + } + + if (dlil_detach_waiting == 0) { + asserted = 0; + thread_block(dlil_delayed_detach_thread); + } + } +} - if_rtproto_del(ifp, proto->protocol_family); +static void +dlil_call_delayed_detach_thread(void) { + dlil_delayed_detach_thread(NULL, THREAD_RESTART); +} - TAILQ_REMOVE(tmp, proto, next); - FREE(proto, M_IFADDR); +extern int if_next_index(void); + +__private_extern__ int +dlil_if_attach_with_address( + struct ifnet *ifp, + const struct sockaddr_dl *ll_addr) +{ + u_long interface_family = ifp->if_family; + struct if_family_str *if_family = NULL; + int stat; + struct ifnet *tmp_if; + struct proto_hash_entry *new_proto_list = NULL; + int locked = 0; + + + ifnet_head_lock_shared(); - ifp->refcnt--; - ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, - (struct net_event_data *)&ev_pr_data, - sizeof(struct kev_dl_proto_data)); + /* Verify we aren't already on the list */ + TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) { + if (tmp_if == ifp) { + ifnet_head_done(); + return EEXIST; + } + } + + ifnet_head_done(); + + if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_lock == 0) +#if IFNET_RW_LOCK + ifp->if_lock = lck_rw_alloc_init(ifnet_lock_group, ifnet_lock_attr); +#else + ifp->if_lock = lck_mtx_alloc_init(ifnet_lock_group, ifnet_lock_attr); +#endif - if (ifp->refcnt == 0) { + if (ifp->if_lock == 0) { + return ENOMEM; + } - TAILQ_REMOVE(&ifnet, ifp, if_link); + // Only use family if this is not a KPI interface + if ((ifp->if_eflags & IFEF_USEKPI) == 0) { + if_family = find_family_module(interface_family); + } - (*if_family->del_if)(ifp); + /* + * Allow interfaces withouth protocol families to attach + * only if they have the necessary fields filled out. + */ + + if ((if_family == 0) && + (ifp->if_add_proto == 0 || ifp->if_del_proto == 0)) { + DLIL_PRINTF("Attempt to attach interface without family module - %d\n", + interface_family); + return ENODEV; + } + + if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_proto_hash == NULL) { + MALLOC(new_proto_list, struct proto_hash_entry*, sizeof(struct proto_hash_entry) * PROTO_HASH_SLOTS, + M_NKE, M_WAITOK); - if (--if_family->refcnt == 0) { - if (if_family->shutdown) - (*if_family->shutdown)(); - - TAILQ_REMOVE(&if_family_head, if_family, if_fam_next); - FREE(if_family, M_IFADDR); + if (new_proto_list == 0) { + return ENOBUFS; + } } - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - while (orig_ifp != ifp) { - orig_ifp = ifp; + dlil_write_begin(); + locked = 1; + + /* + * Call the family module to fill in the appropriate fields in the + * ifnet structure. + */ + + if (if_family) { + stat = if_family->add_if(ifp); + if (stat) { + DLIL_PRINTF("dlil_if_attach -- add_if failed with %d\n", stat); + dlil_write_end(); + return stat; + } + ifp->if_add_proto_u.original = if_family->add_proto; + ifp->if_del_proto = if_family->del_proto; + if_family->refcnt++; + } + + ifp->offercnt = 0; + TAILQ_INIT(&ifp->if_flt_head); + + + if (new_proto_list) { + bzero(new_proto_list, (PROTO_HASH_SLOTS * sizeof(struct proto_hash_entry))); + ifp->if_proto_hash = new_proto_list; + new_proto_list = 0; + } + + /* old_if_attach */ + { + struct ifaddr *ifa = 0; + + if (ifp->if_snd.ifq_maxlen == 0) + ifp->if_snd.ifq_maxlen = ifqmaxlen; + TAILQ_INIT(&ifp->if_prefixhead); + LIST_INIT(&ifp->if_multiaddrs); + ifnet_touch_lastchange(ifp); + + /* usecount to track attachment to the ifnet list */ + ifp_use(ifp, kIfNetUseCount_MayBeZero); + + /* Lock the list of interfaces */ + ifnet_head_lock_exclusive(); + ifnet_lock_exclusive(ifp); + + if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) { + char workbuf[64]; + int namelen, masklen, socksize, ifasize; + + ifp->if_index = if_next_index(); + + namelen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); +#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m)) + masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; + socksize = masklen + ifp->if_addrlen; +#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) + if ((u_long)socksize < sizeof(struct sockaddr_dl)) + socksize = sizeof(struct sockaddr_dl); + socksize = ROUNDUP(socksize); + ifasize = sizeof(struct ifaddr) + 2 * socksize; + ifa = (struct ifaddr*)_MALLOC(ifasize, M_IFADDR, M_WAITOK); + if (ifa) { + struct sockaddr_dl *sdl = (struct sockaddr_dl *)(ifa + 1); + ifnet_addrs[ifp->if_index - 1] = ifa; + bzero(ifa, ifasize); + sdl->sdl_len = socksize; + sdl->sdl_family = AF_LINK; + bcopy(workbuf, sdl->sdl_data, namelen); + sdl->sdl_nlen = namelen; + sdl->sdl_index = ifp->if_index; + sdl->sdl_type = ifp->if_type; + if (ll_addr) { + sdl->sdl_alen = ll_addr->sdl_alen; + if (ll_addr->sdl_alen != ifp->if_addrlen) + panic("dlil_if_attach - ll_addr->sdl_alen != ifp->if_addrlen"); + bcopy(CONST_LLADDR(ll_addr), LLADDR(sdl), sdl->sdl_alen); + } + ifa->ifa_ifp = ifp; + ifa->ifa_rtrequest = link_rtrequest; + ifa->ifa_addr = (struct sockaddr*)sdl; + sdl = (struct sockaddr_dl*)(socksize + (caddr_t)sdl); + ifa->ifa_netmask = (struct sockaddr*)sdl; + sdl->sdl_len = masklen; + while (namelen != 0) + sdl->sdl_data[--namelen] = 0xff; + } + } + else { + /* preserve the first ifaddr */ + ifnet_addrs[ifp->if_index - 1] = TAILQ_FIRST(&ifp->if_addrhead); + } + - TAILQ_FOREACH(filter, fhead, que) { - if (IFILT(filter).filter_if_free) { - retval = (*IFILT(filter).filter_if_free)(IFILT(filter).cookie, ifp); - if (retval) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return 0; - } + TAILQ_INIT(&ifp->if_addrhead); + ifa = ifnet_addrs[ifp->if_index - 1]; + + if (ifa) { + /* + * We don't use if_attach_ifa because we want + * this address to be first on the list. + */ + ifaref(ifa); + ifa->ifa_debug |= IFA_ATTACHED; + TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); } - if (ifp != orig_ifp) - break; - } + + TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link); + ifindex2ifnet[ifp->if_index] = ifp; + + ifnet_head_done(); } - - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, 0, 0); - - (*ifp->if_free)(ifp); - } + dlil_write_end(); + + if (if_family && if_family->init_if) { + stat = if_family->init_if(ifp); + if (stat) { + DLIL_PRINTF("dlil_if_attach -- init_if failed with %d\n", stat); + } + } + + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, 0, 0); + ifnet_lock_done(ifp); -end: - splx(s); - thread_funnel_set(network_flock, funnel_state); - return retval; + return 0; } - - - - int dlil_if_attach(struct ifnet *ifp) { - u_long interface_family = ifp->if_family; - struct if_family_str *if_family; - struct dlil_proto_head *tmp; - int stat; - int s; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - if (ifnet_inited == 0) { - TAILQ_INIT(&ifnet); - ifnet_inited = 1; - } - - if_family = find_family_module(interface_family); - - if ((!if_family) || (if_family->flags & DLIL_SHUTDOWN)) { - splx(s); - kprintf("Attempt to attach interface without family module - %d\n", - interface_family); - thread_funnel_set(network_flock, funnel_state); - return ENODEV; - } - - if (ifp->refcnt == 0) { - /* - * Call the family module to fill in the appropriate fields in the - * ifnet structure. - */ - - stat = (*if_family->add_if)(ifp); - if (stat) { - splx(s); - kprintf("dlil_if_attach -- add_if failed with %d\n", stat); - thread_funnel_set(network_flock, funnel_state); - return stat; - } - if_family->refcnt++; - - /* - * Add the ifp to the interface list. - */ - - tmp = (struct dlil_proto_head *) &ifp->proto_head; - TAILQ_INIT(tmp); - - ifp->if_data.default_proto = 0; - ifp->offercnt = 0; - TAILQ_INIT(&ifp->if_flt_head); - old_if_attach(ifp); - - if (if_family->init_if) { - stat = (*if_family->init_if)(ifp); - if (stat) { - kprintf("dlil_if_attach -- init_if failed with %d\n", stat); - } - } - } - - ifp->refcnt++; - - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, 0, 0); - - splx(s); - thread_funnel_set(network_flock, funnel_state); - return 0; + dlil_if_attach_with_address(ifp, NULL); } int dlil_if_detach(struct ifnet *ifp) { - struct if_proto *proto; - struct dlil_filterq_entry *if_filter; - struct if_family_str *if_family; - struct dlil_filterq_head *fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - struct kev_msg ev_msg; - boolean_t funnel_state; + struct ifnet_filter *filter; + struct ifnet_filter *filter_next; + int zeroed = 0; + int retval = 0; + struct ifnet_filter_head fhead; - funnel_state = thread_funnel_set(network_flock, TRUE); - if_family = find_family_module(ifp->if_family); + ifnet_lock_exclusive(ifp); - if (!if_family) { - kprintf("Attempt to detach interface without family module - %s\n", - ifp->if_name); - thread_funnel_set(network_flock, funnel_state); - return ENODEV; + if ((ifp->if_eflags & IFEF_DETACHING) != 0) { + /* Interface has already been detached */ + ifnet_lock_done(ifp); + return ENXIO; } - while (if_filter = TAILQ_FIRST(fhead)) - dlil_detach_filter(if_filter->filter_id); + /* + * Indicate this interface is being detached. + * + * This should prevent protocols from attaching + * from this point on. Interface will remain on + * the list until all of the protocols are detached. + */ + ifp->if_eflags |= IFEF_DETACHING; + ifnet_lock_done(ifp); - ifp->refcnt--; + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, 0, 0); - if (ifp->refcnt > 0) { - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, 0, 0); - thread_funnel_set(network_flock, funnel_state); - return DLIL_WAIT_FOR_FREE; + if ((retval = dlil_write_begin()) != 0) { + if (retval == EDEADLK) { + retval = DLIL_WAIT_FOR_FREE; + + /* We need to perform a delayed detach */ + ifp->if_delayed_detach = 1; + dlil_detach_waiting = 1; + wakeup(&dlil_detach_waiting); + } + return retval; } - while (ifp->if_multiaddrs.lh_first) { - struct ifmultiaddr *ifma = ifp->if_multiaddrs.lh_first; - - /* - * When the interface is gone, we will no - * longer be listening on these multicasts. - * Various bits of the stack may be referencing - * these multicasts, so we can't just free them. - * We place them on a list so they may be cleaned - * up later as the other bits of the stack release - * them. - */ - LIST_REMOVE(ifma, ifma_link); - ifma->ifma_ifp = NULL; - LIST_INSERT_HEAD(&ifma_lostlist, ifma, ifma_link); - } + /* Steal the list of interface filters */ + fhead = ifp->if_flt_head; + TAILQ_INIT(&ifp->if_flt_head); - /* Let BPF know the interface is detaching. */ - bpfdetach(ifp); - TAILQ_REMOVE(&ifnet, ifp, if_link); + /* unuse the interface */ + zeroed = ifp_unuse(ifp); - (*if_family->del_if)(ifp); + dlil_write_end(); - if (--if_family->refcnt == 0) { - if (if_family->shutdown) - (*if_family->shutdown)(); - - TAILQ_REMOVE(&if_family_head, if_family, if_fam_next); - FREE(if_family, M_IFADDR); + for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) { + filter_next = TAILQ_NEXT(filter, filt_next); + dlil_detach_filter_internal(filter, 1); } - dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, 0, 0); - thread_funnel_set(network_flock, funnel_state); - return 0; + if (zeroed == 0) { + retval = DLIL_WAIT_FOR_FREE; + } + else + { + ifp_use_reached_zero(ifp); + } + + return retval; } @@ -1517,25 +2436,17 @@ dlil_reg_if_modules(u_long interface_family, struct dlil_ifmod_reg_str *ifmod) { struct if_family_str *if_family; - int s; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); if (find_family_module(interface_family)) { - kprintf("Attempt to register dlil family module more than once - %d\n", + DLIL_PRINTF("Attempt to register dlil family module more than once - %d\n", interface_family); - splx(s); - thread_funnel_set(network_flock, funnel_state); return EEXIST; } if ((!ifmod->add_if) || (!ifmod->del_if) || (!ifmod->add_proto) || (!ifmod->del_proto)) { - kprintf("dlil_reg_if_modules passed at least one null pointer\n"); - splx(s); - thread_funnel_set(network_flock, funnel_state); + DLIL_PRINTF("dlil_reg_if_modules passed at least one null pointer\n"); return EINVAL; } @@ -1551,17 +2462,13 @@ dlil_reg_if_modules(u_long interface_family, if (interface_family == 123) { /* Vicom */ ifmod->init_if = 0; } else { - splx(s); - thread_funnel_set(network_flock, funnel_state); return EINVAL; } } if_family = (struct if_family_str *) _MALLOC(sizeof(struct if_family_str), M_IFADDR, M_WAITOK); if (!if_family) { - kprintf("dlil_reg_if_modules failed allocation\n"); - splx(s); - thread_funnel_set(network_flock, funnel_state); + DLIL_PRINTF("dlil_reg_if_modules failed allocation\n"); return ENOMEM; } @@ -1574,29 +2481,23 @@ dlil_reg_if_modules(u_long interface_family, if_family->init_if = ifmod->init_if; if_family->add_proto = ifmod->add_proto; if_family->del_proto = ifmod->del_proto; - if_family->ifmod_ioctl = ifmod->ifmod_ioctl; + if_family->ifmod_ioctl = ifmod->ifmod_ioctl; if_family->refcnt = 1; if_family->flags = 0; TAILQ_INSERT_TAIL(&if_family_head, if_family, if_fam_next); - splx(s); - thread_funnel_set(network_flock, funnel_state); return 0; } int dlil_dereg_if_modules(u_long interface_family) { struct if_family_str *if_family; - int s, ret = 0; - boolean_t funnel_state; + int ret = 0; + - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); if_family = find_family_module(interface_family); if (if_family == 0) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return ENOENT; + return ENXIO; } if (--if_family->refcnt == 0) { @@ -1611,104 +2512,84 @@ int dlil_dereg_if_modules(u_long interface_family) ret = DLIL_WAIT_FOR_FREE; } - splx(s); - thread_funnel_set(network_flock, funnel_state); return ret; } int -dlil_reg_proto_module(u_long protocol_family, u_long interface_family, - struct dlil_protomod_reg_str *protomod_reg) +dlil_reg_proto_module( + u_long protocol_family, + u_long interface_family, + int (*attach)(struct ifnet *ifp, u_long protocol_family), + int (*detach)(struct ifnet *ifp, u_long protocol_family)) { struct proto_family_str *proto_family; - int s; - boolean_t funnel_state; + if (attach == NULL) return EINVAL; - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - if (find_proto_module(protocol_family, interface_family)) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return EEXIST; - } - - if (protomod_reg->reserved[0] != 0 || protomod_reg->reserved[1] != 0 - || protomod_reg->reserved[2] != 0 || protomod_reg->reserved[3] !=0) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return EINVAL; - } - - if (protomod_reg->attach_proto == NULL) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return EINVAL; + lck_mtx_lock(proto_family_mutex); + + TAILQ_FOREACH(proto_family, &proto_family_head, proto_fam_next) { + if (proto_family->proto_family == protocol_family && + proto_family->if_family == interface_family) { + lck_mtx_unlock(proto_family_mutex); + return EEXIST; + } } proto_family = (struct proto_family_str *) _MALLOC(sizeof(struct proto_family_str), M_IFADDR, M_WAITOK); if (!proto_family) { - splx(s); - thread_funnel_set(network_flock, funnel_state); + lck_mtx_unlock(proto_family_mutex); return ENOMEM; } bzero(proto_family, sizeof(struct proto_family_str)); proto_family->proto_family = protocol_family; proto_family->if_family = interface_family & 0xffff; - proto_family->attach_proto = protomod_reg->attach_proto; - proto_family->detach_proto = protomod_reg->detach_proto; + proto_family->attach_proto = attach; + proto_family->detach_proto = detach; TAILQ_INSERT_TAIL(&proto_family_head, proto_family, proto_fam_next); - splx(s); - thread_funnel_set(network_flock, funnel_state); + lck_mtx_unlock(proto_family_mutex); return 0; } int dlil_dereg_proto_module(u_long protocol_family, u_long interface_family) { struct proto_family_str *proto_family; - int s, ret = 0; - boolean_t funnel_state; + int ret = 0; + + lck_mtx_lock(proto_family_mutex); - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); proto_family = find_proto_module(protocol_family, interface_family); if (proto_family == 0) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return ENOENT; + lck_mtx_unlock(proto_family_mutex); + return ENXIO; } TAILQ_REMOVE(&proto_family_head, proto_family, proto_fam_next); FREE(proto_family, M_IFADDR); - - splx(s); - thread_funnel_set(network_flock, funnel_state); + + lck_mtx_unlock(proto_family_mutex); return ret; } -int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp, u_long *dl_tag) +int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp) { struct proto_family_str *proto_family; - int s, ret = 0; - boolean_t funnel_state; + int ret = 0; - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); + lck_mtx_lock(proto_family_mutex); proto_family = find_proto_module(protocol_family, ifp->if_family); if (proto_family == 0) { - splx(s); - thread_funnel_set(network_flock, funnel_state); - return ENOENT; + lck_mtx_unlock(proto_family_mutex); + return ENXIO; } - ret = (*proto_family->attach_proto)(ifp, dl_tag); + ret = proto_family->attach_proto(ifp, protocol_family); - splx(s); - thread_funnel_set(network_flock, funnel_state); + lck_mtx_unlock(proto_family_mutex); return ret; } @@ -1716,488 +2597,65 @@ int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp, u_long *dl_ta int dlil_unplumb_protocol(u_long protocol_family, struct ifnet *ifp) { struct proto_family_str *proto_family; - int s, ret = 0; - u_long tag; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); - - ret = dlil_find_dltag(ifp->if_family, ifp->if_unit, protocol_family, &tag); - - if (ret == 0) { - proto_family = find_proto_module(protocol_family, ifp->if_family); - if (proto_family && proto_family->detach_proto) - ret = (*proto_family->detach_proto)(ifp, tag); - else - ret = dlil_detach_protocol(tag); - } - - splx(s); - thread_funnel_set(network_flock, funnel_state); - return ret; -} - - - -/* - * Old if_attach no-op'ed function defined here for temporary backwards compatibility - */ - -void if_attach(ifp) - struct ifnet *ifp; -{ - dlil_if_attach(ifp); -} - - - -int -dlil_inject_if_input(struct mbuf *m, char *frame_header, u_long from_id) -{ - struct ifnet *orig_ifp = 0; - struct ifnet *ifp; - struct if_proto *ifproto; - struct if_proto *proto; - struct dlil_filterq_entry *tmp; - int retval = 0; - struct dlil_filterq_head *fhead; - int match_found; - - dlil_stats.inject_if_in1++; - - if (from_id >= dlil_filters_nb || dlil_filters[from_id].type != DLIL_IF_FILTER) - return ENOENT; - - ifp = dlil_filters[from_id].ifp; - -/* - * Let interface filters (if any) do their thing ... - */ - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - match_found = 0; - - if (TAILQ_EMPTY(fhead) == 0) { - while (orig_ifp != ifp) { - orig_ifp = ifp; - TAILQ_FOREACH_REVERSE(tmp, fhead, que, dlil_filterq_head) { - if ((match_found) && (IFILT(tmp).filter_if_input)) { - retval = (*IFILT(tmp).filter_if_input)(IFILT(tmp).cookie, - &ifp, - &m, - &frame_header); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - - } - - if (ifp != orig_ifp) - break; - - if (from_id == tmp->filter_id) - match_found = 1; - } - } - } - - ifp->if_lastchange = time; - - /* - * Call family demux module. If the demux module finds a match - * for the frame it will fill-in the ifproto pointer. - */ - - retval = (*ifp->if_demux)(ifp, m, frame_header, &ifproto ); - - if (m->m_flags & (M_BCAST|M_MCAST)) - ifp->if_imcasts++; - - if ((retval) && (ifp->offercnt)) { - /* - * No match was found, look for any offers. - */ - struct dlil_proto_head *tmp = (struct dlil_proto_head *) &ifp->proto_head; - TAILQ_FOREACH(proto, tmp, next) { - if ((proto->dl_offer) && (proto->dl_offer(m, frame_header) == 0)) { - ifproto = proto; - retval = 0; - break; - } - } - } - - if (retval) { - if (retval != EJUSTRETURN) { - m_freem(m); - return retval; - } - else - return 0; - } - else - if (ifproto == 0) { - printf("ERROR - dlil_inject_if_input -- if_demux didn't return an if_proto pointer\n"); - m_freem(m); - return 0; - } - -/* - * Call any attached protocol filters. - */ - TAILQ_FOREACH_REVERSE(tmp, &ifproto->pr_flt_head, que, dlil_filterq_head) { - if (PFILT(tmp).filter_dl_input) { - retval = (*PFILT(tmp).filter_dl_input)(PFILT(tmp).cookie, - &m, - &frame_header, - &ifp); - - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } - } - - - - retval = (*ifproto->dl_input)(m, frame_header, - ifp, ifproto->dl_tag, - FALSE); - - dlil_stats.inject_if_in2++; - if (retval == EJUSTRETURN) - retval = 0; - else - if (retval) - m_freem(m); - - return retval; - -} - - - - - -int -dlil_inject_pr_input(struct mbuf *m, char *frame_header, u_long from_id) -{ - struct ifnet *orig_ifp = 0; - struct dlil_filterq_entry *tmp; - int retval; - struct if_proto *ifproto = 0; - int match_found; - struct ifnet *ifp; - - dlil_stats.inject_pr_in1++; - if (from_id >= dlil_filters_nb || dlil_filters[from_id].type != DLIL_PR_FILTER) - return ENOENT; - - ifproto = dlil_filters[from_id].proto; - ifp = dlil_filters[from_id].ifp; - -/* - * Call any attached protocol filters. - */ - - match_found = 0; - TAILQ_FOREACH_REVERSE(tmp, &ifproto->pr_flt_head, que, dlil_filterq_head) { - if ((match_found) && (PFILT(tmp).filter_dl_input)) { - retval = (*PFILT(tmp).filter_dl_input)(PFILT(tmp).cookie, - &m, - &frame_header, - &ifp); - - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } - - if (tmp->filter_id == from_id) - match_found = 1; - } - - - retval = (*ifproto->dl_input)(m, frame_header, - ifp, ifproto->dl_tag, - FALSE); - - if (retval == EJUSTRETURN) - retval = 0; - else - if (retval) - m_freem(m); - - dlil_stats.inject_pr_in2++; - return retval; -} - + int ret = 0; + lck_mtx_lock(proto_family_mutex); -int -dlil_inject_pr_output(struct mbuf *m, - struct sockaddr *dest, - int raw, - char *frame_type, - char *dst_linkaddr, - u_long from_id) -{ - struct ifnet *orig_ifp = 0; - struct ifnet *ifp; - struct dlil_filterq_entry *tmp; - int retval = 0; - char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; - char dst_linkaddr_buffer[MAX_LINKADDR * 4]; - struct dlil_filterq_head *fhead; - int match_found; - u_long dl_tag; - - dlil_stats.inject_pr_out1++; - if (raw == 0) { - if (frame_type) - bcopy(frame_type, &frame_type_buffer[0], MAX_FRAME_TYPE_SIZE * 4); - else - return EINVAL; - - if (dst_linkaddr) - bcopy(dst_linkaddr, &dst_linkaddr_buffer, MAX_LINKADDR * 4); + proto_family = find_proto_module(protocol_family, ifp->if_family); + if (proto_family && proto_family->detach_proto) + ret = proto_family->detach_proto(ifp, protocol_family); else - return EINVAL; - } - - if (from_id >= dlil_filters_nb || dlil_filters[from_id].type != DLIL_PR_FILTER) - return ENOENT; - - ifp = dlil_filters[from_id].ifp; - dl_tag = dlil_filters[from_id].proto->dl_tag; - - frame_type = frame_type_buffer; - dst_linkaddr = dst_linkaddr_buffer; - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - -/* - * Run any attached protocol filters. - */ - match_found = 0; - - if (TAILQ_EMPTY(dl_tag_array[dl_tag].pr_flt_head) == 0) { - TAILQ_FOREACH(tmp, dl_tag_array[dl_tag].pr_flt_head, que) { - if ((match_found) && (PFILT(tmp).filter_dl_output)) { - retval = (*PFILT(tmp).filter_dl_output)(PFILT(tmp).cookie, - &m, &ifp, &dest, dst_linkaddr, frame_type); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - } - - if (tmp->filter_id == from_id) - match_found = 1; - } - } - - -/* - * Call framing module - */ - if ((raw == 0) && (ifp->if_framer)) { - retval = (*ifp->if_framer)(ifp, &m, dest, dst_linkaddr, frame_type); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else - { - m_freem(m); - return retval; - } - } - } - - -#if BRIDGE - if (do_bridge) { - struct mbuf *m0 = m ; - struct ether_header *eh = mtod(m, struct ether_header *); - - if (m->m_pkthdr.rcvif) - m->m_pkthdr.rcvif = NULL ; - ifp = bridge_dst_lookup(eh); - bdg_forward(&m0, ifp); - if (m0) - m_freem(m0); - - return 0; - } -#endif - - -/* - * Let interface filters (if any) do their thing ... - */ - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - if (TAILQ_EMPTY(fhead) == 0) { - while (orig_ifp != ifp) { - orig_ifp = ifp; - TAILQ_FOREACH(tmp, fhead, que) { - if (IFILT(tmp).filter_if_output) { - retval = (*IFILT(tmp).filter_if_output)(IFILT(tmp).cookie, - &ifp, - &m); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - - } - - if (ifp != orig_ifp) - break; - } - } - } - -/* - * Finally, call the driver. - */ - - retval = (*ifp->if_output)(ifp, m); - dlil_stats.inject_pr_out2++; - if ((retval == 0) || (retval == EJUSTRETURN)) - return 0; - else - return retval; -} - - -int -dlil_inject_if_output(struct mbuf *m, u_long from_id) -{ - struct ifnet *orig_ifp = 0; - struct ifnet *ifp; - struct dlil_filterq_entry *tmp; - int retval = 0; - struct dlil_filterq_head *fhead; - int match_found; - - dlil_stats.inject_if_out1++; - if (from_id > dlil_filters_nb || dlil_filters[from_id].type != DLIL_IF_FILTER) - return ENOENT; - - ifp = dlil_filters[from_id].ifp; - -/* - * Let interface filters (if any) do their thing ... - */ - - fhead = (struct dlil_filterq_head *) &ifp->if_flt_head; - match_found = 0; - - if (TAILQ_EMPTY(fhead) == 0) { - while (orig_ifp != ifp) { - orig_ifp = ifp; - TAILQ_FOREACH(tmp, fhead, que) { - if ((match_found) && (IFILT(tmp).filter_if_output)) { - retval = (*IFILT(tmp).filter_if_output)(IFILT(tmp).cookie, - &ifp, - &m); - if (retval) { - if (retval == EJUSTRETURN) - return 0; - else { - m_freem(m); - return retval; - } - } - - } - - if (ifp != orig_ifp) - break; - - if (from_id == tmp->filter_id) - match_found = 1; - } - } - } - -/* - * Finally, call the driver. - */ + ret = dlil_detach_protocol(ifp, protocol_family); - retval = (*ifp->if_output)(ifp, m); - dlil_stats.inject_if_out2++; - if ((retval == 0) || (retval == EJUSTRETURN)) - return 0; - else - return retval; + lck_mtx_unlock(proto_family_mutex); + return ret; } -static -int dlil_recycle_ioctl(struct ifnet *ifnet_ptr, u_long ioctl_code, void *ioctl_arg) +static errno_t +dlil_recycle_ioctl( + __unused ifnet_t ifnet_ptr, + __unused u_int32_t ioctl_code, + __unused void *ioctl_arg) { - return EOPNOTSUPP; } -static -int dlil_recycle_output(struct ifnet *ifnet_ptr, struct mbuf *m) +static int +dlil_recycle_output( + __unused struct ifnet *ifnet_ptr, + struct mbuf *m) { - m_freem(m); return 0; } -static -int dlil_recycle_free(struct ifnet *ifnet_ptr) +static void +dlil_recycle_free( + __unused ifnet_t ifnet_ptr) { - return 0; } -static -int dlil_recycle_set_bpf_tap(struct ifnet *ifp, int mode, - int (*bpf_callback)(struct ifnet *, struct mbuf *)) +static errno_t +dlil_recycle_set_bpf_tap( + __unused ifnet_t ifp, + __unused bpf_tap_mode mode, + __unused bpf_packet_func callback) { /* XXX not sure what to do here */ return 0; } -int dlil_if_acquire(u_long family, void *uniqueid, size_t uniqueid_len, - struct ifnet **ifp) +int dlil_if_acquire( + u_long family, + const void *uniqueid, + size_t uniqueid_len, + struct ifnet **ifp) { struct ifnet *ifp1 = NULL; struct dlil_ifnet *dlifp1 = NULL; - int s, ret = 0; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); + int ret = 0; + lck_mtx_lock(dlil_ifnet_mutex); TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) { ifp1 = (struct ifnet *)dlifp1; @@ -2216,8 +2674,11 @@ int dlil_if_acquire(u_long family, void *uniqueid, size_t uniqueid_len, } } else { - - ifp1->if_eflags |= (IFEF_INUSE + IFEF_REUSE); + if (!ifp1->if_lock) + panic("ifp's lock is gone\n"); + ifnet_lock_exclusive(ifp1); + ifp1->if_eflags |= (IFEF_INUSE | IFEF_REUSE); + ifnet_lock_done(ifp1); *ifp = ifp1; goto end; } @@ -2247,27 +2708,26 @@ int dlil_if_acquire(u_long family, void *uniqueid, size_t uniqueid_len, ifp1 = (struct ifnet *)dlifp1; ifp1->if_eflags |= IFEF_INUSE; + ifp1->if_name = dlifp1->if_namestorage; TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link); *ifp = ifp1; end: + lck_mtx_unlock(dlil_ifnet_mutex); - splx(s); - thread_funnel_set(network_flock, funnel_state); return ret; } void dlil_if_release(struct ifnet *ifp) { struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp; - int s; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - s = splnet(); + /* Interface does not have a lock until it is attached - radar 3713951 */ + if (ifp->if_lock) + ifnet_lock_exclusive(ifp); ifp->if_eflags &= ~IFEF_INUSE; ifp->if_ioctl = dlil_recycle_ioctl; ifp->if_output = dlil_recycle_output; @@ -2276,8 +2736,7 @@ void dlil_if_release(struct ifnet *ifp) strncpy(dlifp->if_namestorage, ifp->if_name, IFNAMSIZ); ifp->if_name = dlifp->if_namestorage; + if (ifp->if_lock) + ifnet_lock_done(ifp); - splx(s); - thread_funnel_set(network_flock, funnel_state); } - diff --git a/bsd/net/dlil.h b/bsd/net/dlil.h index 21f6d01fe..6824572cb 100644 --- a/bsd/net/dlil.h +++ b/bsd/net/dlil.h @@ -25,22 +25,23 @@ * Data Link Inteface Layer * Author: Ted Walker */ - - #ifndef DLIL_H #define DLIL_H -#include +#ifdef KERNEL +#include +#include #if __STDC__ struct ifnet; struct mbuf; struct ether_header; +struct sockaddr_dl; #endif -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL_PRIVATE #define DLIL_LAST_FILTER -1 #define DLIL_NULL_FILTER -2 @@ -54,6 +55,8 @@ struct ether_header; #include #include +#endif KERNEL_PRIVATE + enum { BPF_TAP_DISABLE, BPF_TAP_INPUT, @@ -61,143 +64,62 @@ enum { BPF_TAP_INPUT_OUTPUT }; - -struct dl_tag_attr_str { - u_long dl_tag; - short if_flags; - short if_unit; - u_long if_family; - u_long protocol_family; -}; - - -struct dlil_pr_flt_str { - caddr_t cookie; - - int (*filter_dl_input)(caddr_t cookie, - struct mbuf **m, - char **frame_header, - struct ifnet **ifp); - - - int (*filter_dl_output)(caddr_t cookie, - struct mbuf **m, - struct ifnet **ifp, - struct sockaddr **dest, - char *dest_linkaddr, - char *frame_type); - - int (*filter_dl_event)(caddr_t cookie, - struct kern_event_msg *event_msg); - - int (*filter_dl_ioctl)(caddr_t cookie, - struct ifnet *ifp, - u_long ioctl_cmd, - caddr_t ioctl_arg); - - int (*filter_detach)(caddr_t cookie); - u_long reserved[2]; -}; +#ifdef KERNEL_PRIVATE +struct kev_msg; +struct iff_filter; struct dlil_if_flt_str { caddr_t cookie; int (*filter_if_input)(caddr_t cookie, - struct ifnet **ifnet_ptr, + struct ifnet **ifp, struct mbuf **mbuf_ptr, char **frame_ptr); int (*filter_if_event)(caddr_t cookie, - struct ifnet **ifnet_ptr, - struct kern_event_msg **event_msg_ptr); + struct ifnet *ifp, + struct kev_msg *event_msg_ptr); int (*filter_if_output)(caddr_t cookie, - struct ifnet **ifnet_ptr, + struct ifnet **ifp, struct mbuf **mbuf_ptr); int (*filter_if_ioctl)(caddr_t cookie, - struct ifnet *ifnet_ptr, + struct ifnet *ifp, u_long ioctl_code_ptr, caddr_t ioctl_arg_ptr); int (*filter_if_free)(caddr_t cookie, - struct ifnet *ifnet_ptr); + struct ifnet *ifp); - int (*filter_detach)(caddr_t cookie); + int (*filter_detach)(caddr_t cookie); u_long reserved[2]; }; - #define DLIL_PR_FILTER 1 #define DLIL_IF_FILTER 2 typedef int (*dl_input_func)(struct mbuf *m, char *frame_header, - struct ifnet *ifp, u_long dl_tag, int sync_ok); + struct ifnet *ifp, u_long protocol_family, int sync_ok); typedef int (*dl_pre_output_func)(struct ifnet *ifp, - struct mbuf **m, - struct sockaddr *dest, - caddr_t route_entry, - char *frame_type, - char *dst_addr, - u_long dl_tag); + u_long protocol_family, + struct mbuf **m, + const struct sockaddr *dest, + caddr_t route_entry, + char *frame_type, + char *dst_addr); -typedef int (*dl_event_func)(struct kern_event_msg *event, - u_long dl_tag); +typedef void (*dl_event_func)(struct ifnet *ifp, struct kev_msg *event); typedef int (*dl_offer_func)(struct mbuf *m, char *frame_header); -typedef int (*dl_ioctl_func)(u_long dl_tag, +typedef int (*dl_ioctl_func)(u_long protocol_family, struct ifnet *ifp, u_long ioctl_cmd, caddr_t ioctl_arg); +typedef int (*dl_detached_func)(u_long protocol_family, struct ifnet *ifp); - - -#ifdef __APPLE_API_PRIVATE -struct dlil_filterq_entry { - TAILQ_ENTRY(dlil_filterq_entry) que; - u_long filter_id; - int type; - union { - struct dlil_if_flt_str if_filter; - struct dlil_pr_flt_str pr_filter; - } variants; -}; -#else -struct dlil_filterq_entry; -#endif /* __APPLE_API_PRIVATE */ - -TAILQ_HEAD(dlil_filterq_head, dlil_filterq_entry); - - -struct if_proto { - TAILQ_ENTRY(if_proto) next; - u_long dl_tag; - struct dlil_filterq_head pr_flt_head; - struct ifnet *ifp; - dl_input_func dl_input; - dl_pre_output_func dl_pre_output; - dl_event_func dl_event; - dl_offer_func dl_offer; - dl_ioctl_func dl_ioctl; - u_long protocol_family; - u_long reserved[4]; - -}; - -#ifdef __APPLE_API_PRIVATE -TAILQ_HEAD(dlil_proto_head, if_proto); - -struct dlil_tag_list_entry { - TAILQ_ENTRY(dlil_tag_list_entry) next; - struct ifnet *ifp; - u_long dl_tag; -}; -#endif /* __APPLE_API_PRIVATE */ - - -#ifdef __APPLE_API_OBSOLETE /* Obsolete types */ #define DLIL_DESC_RAW 1 #define DLIL_DESC_802_2 2 @@ -210,9 +132,9 @@ struct dlil_tag_list_entry { * DLIL_DESC_802_2_SNAP - obsolete, data in variants.desc_802_2_SNAP * protocol field in host byte order */ -#endif /* __APPLE_API_OBSOLETE */ +#endif KERNEL_PRIVATE -/* Ehernet specific types */ +/* Ethernet specific types */ #define DLIL_DESC_ETYPE2 4 #define DLIL_DESC_SAP 5 #define DLIL_DESC_SNAP 6 @@ -232,6 +154,7 @@ struct dlil_tag_list_entry { * variants.native_type_length. */ +#ifdef KERNEL_PRIVATE struct dlil_demux_desc { TAILQ_ENTRY(dlil_demux_desc) next; @@ -269,7 +192,6 @@ struct dlil_demux_desc { TAILQ_HEAD(ddesc_head_str, dlil_demux_desc); - struct dlil_proto_reg_str { struct ddesc_head_str demux_desc_head; u_long interface_family; @@ -281,23 +203,39 @@ struct dlil_proto_reg_str { dl_event_func event; dl_offer_func offer; dl_ioctl_func ioctl; - u_long reserved[4]; + dl_detached_func detached; + u_long reserved[3]; }; -int dlil_attach_interface_filter(struct ifnet *ifnet_ptr, - struct dlil_if_flt_str *interface_filter, - u_long *filter_id, - int insertion_point); +int dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, + interface_filter_t *filter_ref); + +struct ifnet_stat_increment_param; + +int +dlil_input_with_stats(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, + const struct ifnet_stat_increment_param *stats); int dlil_input(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail); int -dlil_output(u_long dl_tag, +dlil_output_list( + struct ifnet *ifp, + u_long protocol_family, + struct mbuf *packetlist, + caddr_t route, + const struct sockaddr *dest, + int raw); + +int +dlil_output( + struct ifnet *ifp, + u_long protocol_family, struct mbuf *m, caddr_t route, - struct sockaddr *dest, + const struct sockaddr *dest, int raw); @@ -307,32 +245,82 @@ dlil_ioctl(u_long proto_family, u_long ioctl_code, caddr_t ioctl_arg); +errno_t +dlil_resolve_multi( + struct ifnet *ifp, + const struct sockaddr *proto_addr, + struct sockaddr *ll_addr, + size_t ll_len); + +/* + * Send arp internal bypasses the check for + * IPv4LL. + */ +errno_t +dlil_send_arp_internal( + ifnet_t ifp, + u_int16_t arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto); + +errno_t +dlil_send_arp( + ifnet_t ifp, + u_int16_t arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto); + +int +dlil_ioctl_locked(u_long proto_family, + struct ifnet *ifp, + u_long ioctl_code, + caddr_t ioctl_arg); + int -dlil_attach_protocol(struct dlil_proto_reg_str *proto, - u_long *dl_tag); +dlil_attach_protocol(struct dlil_proto_reg_str *proto); int -dlil_detach_protocol(u_long dl_tag); +dlil_detach_protocol(struct ifnet *ifp, u_long protocol_family); int dlil_if_attach(struct ifnet *ifp); +#ifdef BSD_KERNEL_PRIVATE + int -dlil_attach_protocol_filter(u_long dl_tag, - struct dlil_pr_flt_str *proto_filter, - u_long *filter_id, - int insertion_point); +dlil_if_attach_with_address( + struct ifnet *ifp, + const struct sockaddr_dl *ll_addr); + int -dlil_detach_filter(u_long filter_id); +dlil_attach_protocol_kpi(ifnet_t ifp, protocol_family_t protocol, + const struct ifnet_attach_proto_param *proto_details); + +errno_t dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, + bpf_packet_func callback); + +#endif + +void +dlil_detach_filter(interface_filter_t filter); struct dlil_ifmod_reg_str { int (*add_if)(struct ifnet *ifp); int (*del_if)(struct ifnet *ifp); - int (*add_proto)(struct ddesc_head_str *demux_desc_head, - struct if_proto *proto, u_long dl_tag); - int (*del_proto)(struct if_proto *proto, u_long dl_tag); - int (*ifmod_ioctl)(struct ifnet *ifp, u_long ioctl_cmd, caddr_t data); - int (*shutdown)(); + int (*add_proto)(struct ifnet *ifp, u_long protocol_family, + struct ddesc_head_str *demux_desc_head); +#ifdef __KPI_INTERFACE__ + ifnet_del_proto_func del_proto; + ifnet_ioctl_func ifmod_ioctl; +#else + void* del_proto; + void* ifmod_ioctl; +#endif + int (*shutdown)(void); int (*init_if)(struct ifnet *ifp); u_long reserved[3]; }; @@ -341,24 +329,6 @@ struct dlil_ifmod_reg_str { int dlil_reg_if_modules(u_long interface_family, struct dlil_ifmod_reg_str *ifmod_reg); -struct dlil_protomod_reg_str { - /* - * attach the protocol to the interface and return the dl_tag - */ - int (*attach_proto)(struct ifnet *ifp, u_long *dl_tag); - - /* - * detach the protocol from the interface. - * this is optionnal. If it is NULL, DLIL will use 0 default detach function. - */ - int (*detach_proto)(struct ifnet *ifp, u_long dl_tag); - - /* - * reserved for future use. MUST be NULL. - */ - u_long reserved[4]; -}; - /* Function : dlil_reg_proto_module @@ -402,7 +372,8 @@ EINVAL: */ int dlil_reg_proto_module(u_long protocol_family, u_long interface_family, - struct dlil_protomod_reg_str *protomod_reg); + int (*attach)(struct ifnet *ifp, u_long protocol_family), + int (*detach)(struct ifnet *ifp, u_long protocol_family)); /* @@ -438,16 +409,11 @@ Function : dlil_plumb_protocol dlil_plumb_protocol() will plumb a protocol to an actual interface. This will find a registered protocol module and call its attach function. - The module will typically call dlil_attach_protocol with the appropriate parameters, - and will return the dl_tag of the attachement. - It is up to the caller to handle the dl_tag. - Some protocol (IPv4) will stick it in their internal structure for future use. - Some other protocol (IPv6) can ignore the dl_tag. - + The module will typically call dlil_attach_protocol with the appropriate parameters. + Parameters : 'protocol_family' is PF_INET, PF_INET6, ... 'ifp' is the interface to plumb the protocol to. - 'dl_tag' is the tag returned from the succesful attachement. Return code : @@ -464,7 +430,7 @@ other: Error returned by the attach_proto function */ -int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp, u_long *dl_tag); +int dlil_plumb_protocol(u_long protocol_family, struct ifnet *ifp); /* @@ -513,9 +479,11 @@ dlil_inject_pr_output(struct mbuf *m, int dlil_inject_if_output(struct mbuf *m, u_long from_id); -int -dlil_find_dltag(u_long if_family, short unit, u_long proto_family, u_long *dl_tag); - +#ifdef KERNEL_PRIVATE +void +dlil_post_msg(struct ifnet *ifp,u_long event_subclass, u_long event_code, + struct net_event_data *event_data, u_long event_data_len); +#endif int dlil_event(struct ifnet *ifp, struct kern_event_msg *event); @@ -525,6 +493,12 @@ int dlil_dereg_if_modules(u_long interface_family); int dlil_if_detach(struct ifnet *ifp); +void +ifp_reference(struct ifnet *ifp); + +void +ifp_release(struct ifnet *ifp); + /* @@ -604,7 +578,7 @@ EBUSY: */ -int dlil_if_acquire(u_long family, void *uniqueid, size_t uniqueid_len, +int dlil_if_acquire(u_long family, const void *uniqueid, size_t uniqueid_len, struct ifnet **ifp); @@ -619,10 +593,10 @@ Function : dlil_if_release The if_eflags IF_INUSE will be cleared. The fields if_output, if_ioctl, if_free and if_set_bpf_tap will be changed to point to DLIL private functions. - After calling dlil_if_acquire, the driver can safely terminate and + After calling dlil_if_release, the driver can safely terminate and unload if necessary. - Note : if the call to dlil_if_detach returns DLIL_WAIT_FOR_FREE, the - driver can safely ignore it and call dlil_if_release. + Note: your driver should only call dlil_if_release once your if_free + function has been called. Parameters : ifp is the pointer to the ifnet to release. @@ -631,5 +605,6 @@ Parameters : void dlil_if_release(struct ifnet *ifp); -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ +#endif /* KERNEL */ #endif /* DLIL_H */ diff --git a/bsd/net/dlil_pvt.h b/bsd/net/dlil_pvt.h index 91da52b03..af688c107 100644 --- a/bsd/net/dlil_pvt.h +++ b/bsd/net/dlil_pvt.h @@ -22,26 +22,20 @@ #ifndef DLIL_PVT_H #define DLIL_PVT_H #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #include #include -struct dlil_if_filterq_entry { - TAILQ_ENTRY(dlil_if_filterq_entry) que; - struct dlil_interface_filter_str if_filter; -}; - - struct dlil_family_mod_str { TAILQ_ENTRY(dlil_family_mod_str) dl_fam_next; char *interface_family; int (*add_if)(struct ifnet_ptr *ifp); int (*del_if)(struct ifnet *ifp); - int (*add_proto)(TAILQ_HEAD(ddesc_head_name, dlil_demux_desc) demux_desc_head, - struct if_proto *proto); - int (*del_proto)(struct if_proto *proto); + int (*add_proto)(struct ifnet *ifp, u_long protocol_family, + struct ddesc_head_str *demux_desc_head); + int (*del_proto)(struct ifnet *ifp, u_long proto_family); } -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/net/ether_at_pr_module.c b/bsd/net/ether_at_pr_module.c index d811b9c33..7f031bdd8 100644 --- a/bsd/net/ether_at_pr_module.c +++ b/bsd/net/ether_at_pr_module.c @@ -65,7 +65,6 @@ #include #include -#include #include #include #include @@ -90,19 +89,22 @@ extern struct ifqueue atalkintrq; #include #endif /* NVLAN > 0 */ -static -u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -#define IFP2AC(IFP) ((struct arpcom *)IFP) - - struct dl_es_at_entry { - struct ifnet *ifp; - u_long dl_tag; - int ref_count; + struct ifnet *ifp; + int ref_count; }; +/* Local fuction declerations */ +int at_ether_input(struct mbuf *m, char *frame_header, struct ifnet *ifp, + u_long protocol_family, int sync_ok); +int ether_pre_output(struct ifnet *ifp, u_long protocol_family, struct mbuf **m0, + const struct sockaddr *dst_netaddr, caddr_t route, char *type, char *edst); +int ether_prmod_ioctl(u_long protocol_family, struct ifnet *ifp, u_long command, + caddr_t data); +int ether_attach_at(struct ifnet *ifp); +void ether_detach_at(struct ifnet *ifp); + /* * Temp static for protocol registration XXX @@ -118,155 +120,44 @@ static struct dl_es_at_entry en_at_array[MAX_EN_COUNT]; * the ether header, which is provided separately. */ int -at_ether_input(m, frame_header, ifp, dl_tag, sync_ok) - struct mbuf *m; - char *frame_header; - struct ifnet *ifp; - u_long dl_tag; - int sync_ok; +at_ether_input( + struct mbuf *m, + __unused char *frame_header, + __unused struct ifnet *ifp, + __unused u_long protocol_family, + __unused int sync_ok) { - register struct ether_header *eh = (struct ether_header *) frame_header; - register struct ifqueue *inq=0; - u_short ether_type; - int s; - u_int16_t ptype = -1; - unsigned char buf[18]; - -#if NETAT - register struct llc *l; -#endif - - if ((ifp->if_flags & IFF_UP) == 0) { - m_freem(m); - return EJUSTRETURN; - } - - ifp->if_lastchange = time; - - if (eh->ether_dhost[0] & 1) { - if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, - sizeof(etherbroadcastaddr)) == 0) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - } - if (m->m_flags & (M_BCAST|M_MCAST)) - ifp->if_imcasts++; + /* + * note: for AppleTalk we need to pass the enet header of the + * packet up stack. To do so, we made sure in that the FULL packet + * is copied in the mbuf by the mace driver, and only the m_data and + * length have been shifted to make IP and the other guys happy. + */ + + m->m_data -= sizeof(struct ether_header); + m->m_len += sizeof(struct ether_header); + m->m_pkthdr.len += sizeof(struct ether_header); + proto_input(PF_APPLETALK, m); - ether_type = ntohs(eh->ether_type); - -#if NVLAN > 0 - if (ether_type == vlan_proto) { - if (vlan_input(eh, m) < 0) - ifp->if_data.ifi_noproto++; - return EJUSTRETURN; - } -#endif /* NVLAN > 0 */ - - if (ether_type > ETHERMTU) - return ENOENT; - -#if NETAT - l = mtod(m, struct llc *); - - switch (l->llc_dsap) { - case LLC_SNAP_LSAP: - - /* Temporary hack: check for AppleTalk and AARP packets */ - /* WARNING we're checking only on the "ether_type" (the 2 bytes - * of the SNAP header. This shouldn't be a big deal, - * AppleTalk pat_input is making sure we have the right packets - * because it needs to discrimante AARP from EtherTalk packets. - */ - - if (l->llc_ssap == LLC_SNAP_LSAP && - l->llc_un.type_snap.control == 0x03) { - -#ifdef APPLETALK_DEBUG - printf("new_ether_input: SNAP Cntrol type=0x%x Src=%s\n", - l->llc_un.type_snap.ether_type, - ether_sprintf(buf, &eh->ether_shost)); - printf(" Dst=%s\n", - ether_sprintf(buf, &eh->ether_dhost)); -#endif /* APPLETALK_DEBUG */ - - if ((l->llc_un.type_snap.ether_type == 0x809B) || - (l->llc_un.type_snap.ether_type == 0x80F3)) { - - - /* - * note: for AppleTalk we need to pass the enet header of the - * packet up stack. To do so, we made sure in that the FULL packet - * is copied in the mbuf by the mace driver, and only the m_data and - * length have been shifted to make IP and the other guys happy. - */ - - m->m_data -= sizeof(*eh); - m->m_len += sizeof(*eh); - m->m_pkthdr.len += sizeof(*eh); -#ifdef APPLETALK_DEBUG - l == (struct llc *)(eh+1); - if (l->llc_un.type_snap.ether_type == 0x80F3) { - kprintf("new_ether_input: RCV AppleTalk type=0x%x Src=%s\n", - l->llc_un.type_snap.ether_type, - ether_sprintf(buf, &eh->ether_shost)); - kprintf(" Dst=%s\n", - ether_sprintf(buf, &eh->ether_dhost)); - } -#endif /* APPLETALK_DEBUG */ - schednetisr(NETISR_APPLETALK); - inq = &atalkintrq ; - - break; - } - } - - break; - - - default: - return ENOENT; - } - - - if (inq == 0) - return ENOENT; - - s = splimp(); - if (IF_QFULL(inq)) { - IF_DROP(inq); - m_freem(m); - splx(s); - return EJUSTRETURN; - } else - IF_ENQUEUE(inq, m); - splx(s); return 0; -#else - return ENOENT; -#endif /* NETAT */ } int -ether_pre_output(ifp, m0, dst_netaddr, route, type, edst, dl_tag ) - struct ifnet *ifp; - struct mbuf **m0; - struct sockaddr *dst_netaddr; - caddr_t route; - char *type; - char *edst; - u_long dl_tag; +ether_pre_output( + struct ifnet *ifp, + __unused u_long protocol_family, + struct mbuf **m0, + const struct sockaddr *dst_netaddr, + __unused caddr_t route, + char *type, + char *edst) { - int s; register struct mbuf *m = *m0; - register struct rtentry *rt; register struct ether_header *eh; - int off, len = m->m_pkthdr.len; int hlen; /* link layer header lenght */ - struct arpcom *ac = IFP2AC(ifp); @@ -314,42 +205,27 @@ ether_pre_output(ifp, m0, dst_netaddr, route, type, edst, dl_tag ) int -ether_prmod_ioctl(dl_tag, ifp, command, data) - u_long dl_tag; - struct ifnet *ifp; - int command; - caddr_t data; +ether_prmod_ioctl( + __unused u_long protocol_family, + struct ifnet *ifp, + u_long command, + caddr_t data) { - struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; - boolean_t funnel_state; - struct arpcom *ac = (struct arpcom *) ifp; - struct sockaddr_dl *sdl; - struct sockaddr_in *sin; - u_char *e_addr; - - - funnel_state = thread_funnel_set(network_flock, TRUE); switch (command) { case SIOCSIFADDR: if ((ifp->if_flags & IFF_RUNNING) == 0) { - ifp->if_flags |= IFF_UP; + ifnet_set_flags(ifp, IFF_UP, IFF_UP); dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); } break; case SIOCGIFADDR: - { - struct sockaddr *sa; - - sa = (struct sockaddr *) & ifr->ifr_data; - bcopy(IFP2AC(ifp)->ac_enaddr, - (caddr_t) sa->sa_data, ETHER_ADDR_LEN); - } + ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); break; case SIOCSIFMTU: @@ -367,102 +243,92 @@ ether_prmod_ioctl(dl_tag, ifp, command, data) return EOPNOTSUPP; } - (void) thread_funnel_set(network_flock, funnel_state); return (error); } -void -ether_attach_at(struct ifnet *ifp, u_long *at_dl_tag, u_long *aarp_dl_tag) +int +ether_attach_at( + struct ifnet *ifp) { struct dlil_proto_reg_str reg; struct dlil_demux_desc desc; struct dlil_demux_desc desc2; - u_short native = 0; /* 802.2 frames use a length here */ int stat; int first_empty; int i; - + u_int8_t atalk_snap[5] = {0x08, 0x00, 0x07, 0x80, 0x9b}; + u_int8_t aarp_snap[5] = {0x00, 0x00, 0x00, 0x80, 0xf3}; first_empty = MAX_EN_COUNT; - for (i=0; i < MAX_EN_COUNT; i++) { - if (en_at_array[i].ifp == 0) - first_empty = i; - - if (en_at_array[i].ifp == ifp) { - en_at_array[i].ref_count++; - *at_dl_tag = *aarp_dl_tag = en_at_array[i].dl_tag; - return; - } + for (i=0; i < MAX_EN_COUNT; i++) { + if (en_at_array[i].ifp == 0) + first_empty = i; + + if (en_at_array[i].ifp == ifp) { + en_at_array[i].ref_count++; + return 0; + } } - if (first_empty == MAX_EN_COUNT) - return; - - TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_802_2_SNAP; - desc.variants.desc_802_2_SNAP.dsap = LLC_SNAP_LSAP; - desc.variants.desc_802_2_SNAP.ssap = LLC_SNAP_LSAP; - desc.variants.desc_802_2_SNAP.control_code = 0x03; - desc.variants.desc_802_2_SNAP.org[0] = 0x08; - desc.variants.desc_802_2_SNAP.org[1] = 0x00; - desc.variants.desc_802_2_SNAP.org[2] = 0x07; - desc.variants.desc_802_2_SNAP.protocol_type = 0x809B; - desc.native_type = NULL; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); - reg.interface_family = ifp->if_family; - reg.unit_number = ifp->if_unit; - reg.input = at_ether_input; - reg.pre_output = ether_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = ether_prmod_ioctl; - reg.default_proto = 0; - reg.protocol_family = PF_APPLETALK; - - desc2 = desc; - desc2.variants.desc_802_2_SNAP.protocol_type = 0x80F3; - desc2.variants.desc_802_2_SNAP.org[0] = 0; - desc2.variants.desc_802_2_SNAP.org[1] = 0; - desc2.variants.desc_802_2_SNAP.org[2] = 0; - - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc2, next); - - stat = dlil_attach_protocol(®, at_dl_tag); - if (stat) { - printf("WARNING: ether_attach_at can't attach at to interface\n"); - return; - } - - *aarp_dl_tag = *at_dl_tag; - - en_at_array[first_empty].ifp = ifp; - en_at_array[first_empty].dl_tag = *at_dl_tag; - en_at_array[first_empty].ref_count = 1; + if (first_empty == MAX_EN_COUNT) + return ENOMEM; + + bzero(®, sizeof(reg)); + bzero(&desc, sizeof(desc)); + bzero(&desc2, sizeof(desc2)); + + TAILQ_INIT(®.demux_desc_head); + reg.interface_family = ifp->if_family; + reg.unit_number = ifp->if_unit; + reg.input = at_ether_input; + reg.pre_output = ether_pre_output; + reg.ioctl = ether_prmod_ioctl; + reg.protocol_family = PF_APPLETALK; + + desc.type = DLIL_DESC_SNAP; + desc.native_type = atalk_snap; + desc.variants.native_type_length = sizeof(atalk_snap); + TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); + + desc2.type = DLIL_DESC_SNAP; + desc2.native_type = aarp_snap; + desc2.variants.native_type_length = sizeof(aarp_snap); + TAILQ_INSERT_TAIL(®.demux_desc_head, &desc2, next); + + stat = dlil_attach_protocol(®); + if (stat) { + printf("WARNING: ether_attach_at can't attach at to interface\n"); + return stat; + } + en_at_array[first_empty].ifp = ifp; + en_at_array[first_empty].ref_count = 1; + + return 0; } /* ether_attach_at */ void ether_detach_at(struct ifnet *ifp) { - int i; - - for (i=0; i < MAX_EN_COUNT; i++) { - if (en_at_array[i].ifp == ifp) - break; - } - - if (i < MAX_EN_COUNT) { - if (en_at_array[i].ref_count > 1) - en_at_array[i].ref_count--; - else { - if (en_at_array[i].ref_count == 1) { - dlil_detach_protocol(en_at_array[i].dl_tag); - en_at_array[i].ifp = 0; - } - } - } + int i; + + for (i=0; i < MAX_EN_COUNT; i++) { + if (en_at_array[i].ifp == ifp) + break; + } + + if (i < MAX_EN_COUNT) { + if (en_at_array[i].ref_count > 1) + en_at_array[i].ref_count--; + else { + if (en_at_array[i].ref_count == 1) { + dlil_detach_protocol(ifp, PF_APPLETALK); + en_at_array[i].ifp = 0; + } + } + } } diff --git a/bsd/net/ether_if_module.c b/bsd/net/ether_if_module.c index 5d47f82b2..0e50b415b 100644 --- a/bsd/net/ether_if_module.c +++ b/bsd/net/ether_if_module.c @@ -65,11 +65,11 @@ #include #include -#include #include #include #include #include +#include #include #include /* For M_LOOP */ @@ -85,12 +85,10 @@ #include #include +#include #include -extern int vlan_demux(struct ifnet * ifp, struct mbuf *, - char * frame_header, struct if_proto * * proto); - #if LLC && CCITT extern struct ifqueue pkintrq; #endif @@ -107,340 +105,422 @@ extern struct ifqueue atalkintrq; #include #endif -static u_long lo_dlt = 0; +#define memcpy(x,y,z) bcopy(y, x, z) + -#define IFP2AC(IFP) ((struct arpcom *)IFP) +SYSCTL_DECL(_net_link); +SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); struct en_desc { - u_int16_t type; /* Type of protocol stored in data */ - struct if_proto *proto; /* Protocol structure */ - u_long data[2]; /* Protocol data */ + u_int16_t type; /* Type of protocol stored in data */ + u_long protocol_family; /* Protocol family */ + u_long data[2]; /* Protocol data */ }; - -#define ETHER_DESC_BLK_SIZE (10) -#define MAX_INTERFACES 50 +/* descriptors are allocated in blocks of ETHER_DESC_BLK_SIZE */ +#define ETHER_DESC_BLK_SIZE (10) /* - * Statics for demux module + * Header for the demux list, hangs off of IFP at family_cookie */ struct ether_desc_blk_str { - u_long n_max_used; - u_long n_count; - struct en_desc *block_ptr; + u_long n_max_used; + u_long n_count; + u_long n_used; + struct en_desc block_ptr[1]; }; +/* Size of the above struct before the array of struct en_desc */ +#define ETHER_DESC_HEADER_SIZE ((size_t)&(((struct ether_desc_blk_str*)0)->block_ptr[0])) +__private_extern__ u_char etherbroadcastaddr[ETHER_ADDR_LEN] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -static struct ether_desc_blk_str ether_desc_blk[MAX_INTERFACES]; - - -/* from if_ethersubr.c */ -int ether_resolvemulti __P((struct ifnet *, struct sockaddr **, - struct sockaddr *)); +int ether_add_proto_old(struct ifnet *ifp, u_long protocol_family, struct ddesc_head_str *desc_head); +int ether_add_if(struct ifnet *ifp); +int ether_del_if(struct ifnet *ifp); +int ether_init_if(struct ifnet *ifp); +int ether_family_init(void); /* - * Release all descriptor entries owned by this dl_tag (there may be several). + * Release all descriptor entries owned by this protocol (there may be several). * Setting the type to 0 releases the entry. Eventually we should compact-out * the unused entries. */ -__private_extern__ int -ether_del_proto(struct if_proto *proto, u_long dl_tag) +int +ether_del_proto( + ifnet_t ifp, + protocol_family_t protocol_family) { - struct en_desc* ed = ether_desc_blk[proto->ifp->family_cookie].block_ptr; - u_long current = 0; - int found = 0; - - for (current = ether_desc_blk[proto->ifp->family_cookie].n_max_used; - current > 0; current--) { - if (ed[current - 1].proto == proto) { - found = 1; - ed[current - 1].type = 0; - - if (current == ether_desc_blk[proto->ifp->family_cookie].n_max_used) { - ether_desc_blk[proto->ifp->family_cookie].n_max_used--; - } - } - } - - return found; + struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie; + u_long current = 0; + int found = 0; + + if (desc_blk == NULL) + return 0; + + for (current = desc_blk->n_max_used; current > 0; current--) { + if (desc_blk->block_ptr[current - 1].protocol_family == protocol_family) { + found = 1; + desc_blk->block_ptr[current - 1].type = 0; + desc_blk->n_used--; + } + } + + if (desc_blk->n_used == 0) { + FREE(ifp->family_cookie, M_IFADDR); + ifp->family_cookie = 0; + } + else { + /* Decrement n_max_used */ + for (; desc_blk->n_max_used > 0 && desc_blk->block_ptr[desc_blk->n_max_used - 1].type == 0; desc_blk->n_max_used--) + ; + } + + return 0; } +static int +ether_add_proto_internal( + struct ifnet *ifp, + protocol_family_t protocol, + const struct ifnet_demux_desc *demux) +{ + struct en_desc *ed; + struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie; + u_int32_t i; + + switch (demux->type) { + /* These types are supported */ + /* Top three are preferred */ + case DLIL_DESC_ETYPE2: + if (demux->datalen != 2) { + return EINVAL; + } + break; + + case DLIL_DESC_SAP: + if (demux->datalen != 3) { + return EINVAL; + } + break; + + case DLIL_DESC_SNAP: + if (demux->datalen != 5) { + return EINVAL; + } + break; + + default: + return ENOTSUP; + } + + // Verify a matching descriptor does not exist. + if (desc_blk != NULL) { + switch (demux->type) { + case DLIL_DESC_ETYPE2: + for (i = 0; i < desc_blk->n_max_used; i++) { + if (desc_blk->block_ptr[i].type == DLIL_DESC_ETYPE2 && + desc_blk->block_ptr[i].data[0] == + *(u_int16_t*)demux->data) { + return EADDRINUSE; + } + } + break; + case DLIL_DESC_SAP: + case DLIL_DESC_SNAP: + for (i = 0; i < desc_blk->n_max_used; i++) { + if (desc_blk->block_ptr[i].type == demux->type && + bcmp(desc_blk->block_ptr[i].data, demux->data, + demux->datalen) == 0) { + return EADDRINUSE; + } + } + break; + } + } + + // Check for case where all of the descriptor blocks are in use + if (desc_blk == NULL || desc_blk->n_used == desc_blk->n_count) { + struct ether_desc_blk_str *tmp; + u_long new_count = ETHER_DESC_BLK_SIZE; + u_long new_size; + u_long old_size = 0; + + i = 0; + + if (desc_blk) { + new_count += desc_blk->n_count; + old_size = desc_blk->n_count * sizeof(struct en_desc) + ETHER_DESC_HEADER_SIZE; + i = desc_blk->n_used; + } + + new_size = new_count * sizeof(struct en_desc) + ETHER_DESC_HEADER_SIZE; + + tmp = _MALLOC(new_size, M_IFADDR, M_WAITOK); + if (tmp == 0) { + /* + * Remove any previous descriptors set in the call. + */ + return ENOMEM; + } + + bzero(tmp + old_size, new_size - old_size); + if (desc_blk) { + bcopy(desc_blk, tmp, old_size); + FREE(desc_blk, M_IFADDR); + } + desc_blk = tmp; + ifp->family_cookie = (u_long)desc_blk; + desc_blk->n_count = new_count; + } + else { + /* Find a free entry */ + for (i = 0; i < desc_blk->n_count; i++) { + if (desc_blk->block_ptr[i].type == 0) { + break; + } + } + } + + /* Bump n_max_used if appropriate */ + if (i + 1 > desc_blk->n_max_used) { + desc_blk->n_max_used = i + 1; + } + + ed = &desc_blk->block_ptr[i]; + ed->protocol_family = protocol; + ed->data[0] = 0; + ed->data[1] = 0; + + switch (demux->type) { + case DLIL_DESC_ETYPE2: + /* 2 byte ethernet raw protocol type is at native_type */ + /* prtocol must be in network byte order */ + ed->type = DLIL_DESC_ETYPE2; + ed->data[0] = *(u_int16_t*)demux->data; + break; + + case DLIL_DESC_SAP: + ed->type = DLIL_DESC_SAP; + bcopy(demux->data, &ed->data[0], 3); + break; + + case DLIL_DESC_SNAP: { + u_int8_t* pDest = ((u_int8_t*)&ed->data[0]) + 3; + ed->type = DLIL_DESC_SNAP; + bcopy(demux->data, pDest, 5); + } + break; + } + + desc_blk->n_used++; + + return 0; +} +int +ether_add_proto( + ifnet_t ifp, + protocol_family_t protocol, + const struct ifnet_demux_desc *demux_list, + u_int32_t demux_count) +{ + int error = 0; + u_int32_t i; + + for (i = 0; i < demux_count; i++) { + error = ether_add_proto_internal(ifp, protocol, &demux_list[i]); + if (error) { + ether_del_proto(ifp, protocol); + break; + } + } + + return error; +} __private_extern__ int -ether_add_proto(struct ddesc_head_str *desc_head, struct if_proto *proto, u_long dl_tag) +ether_add_proto_old( + struct ifnet *ifp, + u_long protocol_family, + struct ddesc_head_str *desc_head) { - char *current_ptr; - struct dlil_demux_desc *desc; - struct en_desc *ed; - struct en_desc *last; - u_long *bitmask; - u_long *proto_id; - u_long i; - short total_length; - u_long block_count; - u_long *tmp; - - - TAILQ_FOREACH(desc, desc_head, next) { - switch (desc->type) { - /* These types are supported */ - /* Top three are preferred */ - case DLIL_DESC_ETYPE2: - if (desc->variants.native_type_length != 2) - return EINVAL; - break; - - case DLIL_DESC_SAP: - if (desc->variants.native_type_length != 3) - return EINVAL; - break; - - case DLIL_DESC_SNAP: - if (desc->variants.native_type_length != 5) - return EINVAL; - break; - - case DLIL_DESC_802_2: - case DLIL_DESC_802_2_SNAP: - break; - - case DLIL_DESC_RAW: - if (desc->variants.bitmask.proto_id_length == 0) - break; - /* else fall through, bitmask variant not supported */ - - default: - ether_del_proto(proto, dl_tag); - return EINVAL; - } - - ed = ether_desc_blk[proto->ifp->family_cookie].block_ptr; - - /* Find a free entry */ - for (i = 0; i < ether_desc_blk[proto->ifp->family_cookie].n_count; i++) { - if (ed[i].type == 0) { - break; - } - } - - if (i >= ether_desc_blk[proto->ifp->family_cookie].n_count) { - u_long new_count = ETHER_DESC_BLK_SIZE + - ether_desc_blk[proto->ifp->family_cookie].n_count; - tmp = _MALLOC((new_count * (sizeof(*ed))), M_IFADDR, M_WAITOK); - if (tmp == 0) { - /* - * Remove any previous descriptors set in the call. - */ - ether_del_proto(proto, dl_tag); - return ENOMEM; - } - - bzero(tmp, new_count * sizeof(*ed)); - bcopy(ether_desc_blk[proto->ifp->family_cookie].block_ptr, - tmp, ether_desc_blk[proto->ifp->family_cookie].n_count * sizeof(*ed)); - FREE(ether_desc_blk[proto->ifp->family_cookie].block_ptr, M_IFADDR); - ether_desc_blk[proto->ifp->family_cookie].n_count = new_count; - ether_desc_blk[proto->ifp->family_cookie].block_ptr = (struct en_desc*)tmp; - ed = ether_desc_blk[proto->ifp->family_cookie].block_ptr; - } - - /* Bump n_max_used if appropriate */ - if (i + 1 > ether_desc_blk[proto->ifp->family_cookie].n_max_used) { - ether_desc_blk[proto->ifp->family_cookie].n_max_used = i + 1; - } - - ed[i].proto = proto; - ed[i].data[0] = 0; - ed[i].data[1] = 0; - - switch (desc->type) { - case DLIL_DESC_RAW: - /* 2 byte ethernet raw protocol type is at native_type */ - /* protocol is not in network byte order */ - ed[i].type = DLIL_DESC_ETYPE2; - ed[i].data[0] = htons(*(u_int16_t*)desc->native_type); - break; - - case DLIL_DESC_ETYPE2: - /* 2 byte ethernet raw protocol type is at native_type */ - /* prtocol must be in network byte order */ - ed[i].type = DLIL_DESC_ETYPE2; - ed[i].data[0] = *(u_int16_t*)desc->native_type; - break; - - case DLIL_DESC_802_2: - ed[i].type = DLIL_DESC_SAP; - ed[i].data[0] = *(u_int32_t*)&desc->variants.desc_802_2; - ed[i].data[0] &= htonl(0xFFFFFF00); - break; - - case DLIL_DESC_SAP: - ed[i].type = DLIL_DESC_SAP; - bcopy(desc->native_type, &ed[i].data[0], 3); - break; - - case DLIL_DESC_802_2_SNAP: - ed[i].type = DLIL_DESC_SNAP; - desc->variants.desc_802_2_SNAP.protocol_type = - htons(desc->variants.desc_802_2_SNAP.protocol_type); - bcopy(&desc->variants.desc_802_2_SNAP, &ed[i].data[0], 8); - ed[i].data[0] &= htonl(0x000000FF); - desc->variants.desc_802_2_SNAP.protocol_type = - ntohs(desc->variants.desc_802_2_SNAP.protocol_type); - break; - - case DLIL_DESC_SNAP: { - u_int8_t* pDest = ((u_int8_t*)&ed[i].data[0]) + 3; - ed[i].type = DLIL_DESC_SNAP; - bcopy(desc->native_type, pDest, 5); - } - break; - } - } - - return 0; + struct dlil_demux_desc *desc; + int error = 0; + + TAILQ_FOREACH(desc, desc_head, next) { + struct ifnet_demux_desc dmx; + int swapped = 0; + + // Convert dlil_demux_desc to ifnet_demux_desc + dmx.type = desc->type; + dmx.datalen = desc->variants.native_type_length; + dmx.data = desc->native_type; + +#ifdef DLIL_DESC_RAW + if (dmx.type == DLIL_DESC_RAW) { + swapped = 1; + dmx.type = DLIL_DESC_ETYPE2; + dmx.datalen = 2; + *(u_int16_t*)dmx.data = htons(*(u_int16_t*)dmx.data); + } +#endif + + error = ether_add_proto_internal(ifp, protocol_family, &dmx); + if (swapped) { + *(u_int16_t*)dmx.data = ntohs(*(u_int16_t*)dmx.data); + swapped = 0; + } + if (error) { + ether_del_proto(ifp, protocol_family); + break; + } + } + + return error; } -static -int ether_shutdown() +static int +ether_shutdown(void) { return 0; } -int ether_demux(ifp, m, frame_header, proto) - struct ifnet *ifp; - struct mbuf *m; - char *frame_header; - struct if_proto **proto; - +int +ether_demux( + ifnet_t ifp, + mbuf_t m, + char *frame_header, + protocol_family_t *protocol_family) { - register struct ether_header *eh = (struct ether_header *)frame_header; - u_short ether_type = eh->ether_type; - u_short ether_type_host; - u_int16_t type; - u_int8_t *data; - u_long i = 0; - u_long max = ether_desc_blk[ifp->family_cookie].n_max_used; - struct en_desc *ed = ether_desc_blk[ifp->family_cookie].block_ptr; - u_int32_t extProto1 = 0; - u_int32_t extProto2 = 0; - - if (eh->ether_dhost[0] & 1) { - /* Check for broadcast */ - if (*(u_int32_t*)eh->ether_dhost == 0xFFFFFFFF && - *(u_int16_t*)(eh->ether_dhost + sizeof(u_int32_t)) == 0xFFFF) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - } else { - /* - * When the driver is put into promiscuous mode we may receive unicast - * frames that are not intended for our interfaces. They are filtered - * here to keep them from traveling further up the stack to code that - * is not expecting them or prepared to deal with them. In the near - * future, the filtering done here will be moved even further down the - * stack into the IONetworkingFamily, preventing even interface - * filter NKE's from receiving promiscuous packets. Please use BPF. - */ - #define ETHER_CMP(x, y) ( ((u_int16_t *) x)[0] != ((u_int16_t *) y)[0] || \ - ((u_int16_t *) x)[1] != ((u_int16_t *) y)[1] || \ - ((u_int16_t *) x)[2] != ((u_int16_t *) y)[2] ) - - if (ETHER_CMP(eh->ether_dhost, ((struct arpcom *) ifp)->ac_enaddr)) { - m_freem(m); - return EJUSTRETURN; - } - } - ether_type_host = ntohs(ether_type); - if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) - || ether_type_host == ETHERTYPE_VLAN) { - return (vlan_demux(ifp, m, frame_header, proto)); - } - data = mtod(m, u_int8_t*); - - /* - * Determine the packet's protocol type and stuff the protocol into - * longs for quick compares. - */ - if (ether_type_host <= 1500) { - extProto1 = *(u_int32_t*)data; - - // SAP or SNAP - if ((extProto1 & htonl(0xFFFFFF00)) == htonl(0xAAAA0300)) { - // SNAP - type = DLIL_DESC_SNAP; - extProto2 = *(u_int32_t*)(data + sizeof(u_int32_t)); - extProto1 &= htonl(0x000000FF); - } else { - type = DLIL_DESC_SAP; - extProto1 &= htonl(0xFFFFFF00); - } - } else { - type = DLIL_DESC_ETYPE2; - } - - /* - * Search through the connected protocols for a match. - */ - - switch (type) { - case DLIL_DESC_ETYPE2: - for (i = 0; i < max; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == ether_type)) { - *proto = ed[i].proto; - return 0; - } - } - break; - - case DLIL_DESC_SAP: - for (i = 0; i < max; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == extProto1)) { - *proto = ed[i].proto; - return 0; - } - } - break; - - case DLIL_DESC_SNAP: - for (i = 0; i < max; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == extProto1) && - (ed[i].data[1] == extProto2)) { - *proto = ed[i].proto; - return 0; - } - } - break; - } - - return ENOENT; -} + struct ether_header *eh = (struct ether_header *)frame_header; + u_short ether_type = eh->ether_type; + u_int16_t type; + u_int8_t *data; + u_long i = 0; + struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie; + u_long maxd = desc_blk ? desc_blk->n_max_used : 0; + struct en_desc *ed = desc_blk ? desc_blk->block_ptr : NULL; + u_int32_t extProto1 = 0; + u_int32_t extProto2 = 0; + + if (eh->ether_dhost[0] & 1) { + /* Check for broadcast */ + if (*(u_int32_t*)eh->ether_dhost == 0xFFFFFFFF && + *(u_int16_t*)(eh->ether_dhost + sizeof(u_int32_t)) == 0xFFFF) + m->m_flags |= M_BCAST; + else + m->m_flags |= M_MCAST; + } + if (ifp->if_eflags & IFEF_BOND) { + /* if we're bonded, bond "protocol" gets all the packets */ + *protocol_family = PF_BOND; + return (0); + } + if ((eh->ether_dhost[0] & 1) == 0) { + /* + * When the driver is put into promiscuous mode we may receive unicast + * frames that are not intended for our interfaces. They are marked here + * as being promiscuous so the caller may dispose of them after passing + * the packets to any interface filters. + */ + #define ETHER_CMP(x, y) ( ((u_int16_t *) x)[0] != ((u_int16_t *) y)[0] || \ + ((u_int16_t *) x)[1] != ((u_int16_t *) y)[1] || \ + ((u_int16_t *) x)[2] != ((u_int16_t *) y)[2] ) + + if (ETHER_CMP(eh->ether_dhost, ifnet_lladdr(ifp))) { + m->m_flags |= M_PROMISC; + } + } + + /* Quick check for VLAN */ + if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0 || + ether_type == htons(ETHERTYPE_VLAN)) { + *protocol_family = PF_VLAN; + return 0; + } + + data = mtod(m, u_int8_t*); + + /* + * Determine the packet's protocol type and stuff the protocol into + * longs for quick compares. + */ + + if (ntohs(ether_type) <= 1500) { + extProto1 = *(u_int32_t*)data; + + // SAP or SNAP + if ((extProto1 & htonl(0xFFFFFF00)) == htonl(0xAAAA0300)) { + // SNAP + type = DLIL_DESC_SNAP; + extProto2 = *(u_int32_t*)(data + sizeof(u_int32_t)); + extProto1 &= htonl(0x000000FF); + } else { + type = DLIL_DESC_SAP; + extProto1 &= htonl(0xFFFFFF00); + } + } else { + type = DLIL_DESC_ETYPE2; + } + + /* + * Search through the connected protocols for a match. + */ + + switch (type) { + case DLIL_DESC_ETYPE2: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && (ed[i].data[0] == ether_type)) { + *protocol_family = ed[i].protocol_family; + return 0; + } + } + break; + + case DLIL_DESC_SAP: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && (ed[i].data[0] == extProto1)) { + *protocol_family = ed[i].protocol_family; + return 0; + } + } + break; + + case DLIL_DESC_SNAP: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && (ed[i].data[0] == extProto1) && + (ed[i].data[1] == extProto2)) { + *protocol_family = ed[i].protocol_family; + return 0; + } + } + break; + } + + return ENOENT; +} /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. - * Assumes that ifp is actually pointer to arpcom structure. */ int -ether_frameout(ifp, m, ndest, edst, ether_type) - register struct ifnet *ifp; - struct mbuf **m; - struct sockaddr *ndest; - char *edst; - char *ether_type; +ether_frameout( + struct ifnet *ifp, + struct mbuf **m, + const struct sockaddr *ndest, + const char *edst, + const char *ether_type) { - register struct ether_header *eh; + struct ether_header *eh; int hlen; /* link layer header length */ - struct arpcom *ac = IFP2AC(ifp); - hlen = ETHER_HDR_LEN; @@ -455,26 +535,21 @@ ether_frameout(ifp, m, ndest, edst, ether_type) */ if ((ifp->if_flags & IFF_SIMPLEX) && ((*m)->m_flags & M_LOOP)) { - if (lo_dlt == 0) - dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dlt); - - if (lo_dlt) { + if (lo_ifp) { if ((*m)->m_flags & M_BCAST) { struct mbuf *n = m_copy(*m, 0, (int)M_COPYALL); if (n != NULL) - dlil_output(lo_dlt, n, 0, ndest, 0); - } - else - { - if (bcmp(edst, ac->ac_enaddr, ETHER_ADDR_LEN) == 0) { - dlil_output(lo_dlt, *m, 0, ndest, 0); + dlil_output(lo_ifp, ndest->sa_family, n, 0, ndest, 0); + } + else { + if (bcmp(edst, ifnet_lladdr(ifp), ETHER_ADDR_LEN) == 0) { + dlil_output(lo_ifp, ndest->sa_family, *m, 0, ndest, 0); return EJUSTRETURN; } } } } - /* * Add local net header. If no space in first mbuf, * allocate another. @@ -489,41 +564,17 @@ ether_frameout(ifp, m, ndest, edst, ether_type) (void)memcpy(&eh->ether_type, ether_type, sizeof(eh->ether_type)); (void)memcpy(eh->ether_dhost, edst, 6); - (void)memcpy(eh->ether_shost, ac->ac_enaddr, - sizeof(eh->ether_shost)); + ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, ETHER_ADDR_LEN); return 0; } - __private_extern__ int ether_add_if(struct ifnet *ifp) { - u_long i; - - ifp->if_framer = ether_frameout; - ifp->if_demux = ether_demux; - ifp->if_event = 0; - ifp->if_resolvemulti = ether_resolvemulti; - ifp->if_nvlans = 0; - - for (i=0; i < MAX_INTERFACES; i++) - if (ether_desc_blk[i].n_count == 0) - break; - - if (i == MAX_INTERFACES) - return ENOMEM; - - ether_desc_blk[i].block_ptr = _MALLOC(ETHER_DESC_BLK_SIZE * sizeof(struct en_desc), - M_IFADDR, M_WAITOK); - if (ether_desc_blk[i].block_ptr == 0) - return ENOMEM; - - ether_desc_blk[i].n_count = ETHER_DESC_BLK_SIZE; - bzero(ether_desc_blk[i].block_ptr, ETHER_DESC_BLK_SIZE * sizeof(struct en_desc)); - - ifp->family_cookie = i; + ifp->if_framer = ether_frameout; + ifp->if_demux = ether_demux; return 0; } @@ -531,108 +582,95 @@ ether_add_if(struct ifnet *ifp) __private_extern__ int ether_del_if(struct ifnet *ifp) { - if ((ifp->family_cookie < MAX_INTERFACES) && - (ether_desc_blk[ifp->family_cookie].n_count)) - { - FREE(ether_desc_blk[ifp->family_cookie].block_ptr, M_IFADDR); - ether_desc_blk[ifp->family_cookie].block_ptr = NULL; - ether_desc_blk[ifp->family_cookie].n_count = 0; - ether_desc_blk[ifp->family_cookie].n_max_used = 0; - return 0; - } - else - return ENOENT; + if (ifp->family_cookie) { + FREE(ifp->family_cookie, M_IFADDR); + return 0; + } + else + return ENOENT; } __private_extern__ int ether_init_if(struct ifnet *ifp) { - register struct ifaddr *ifa; - register struct sockaddr_dl *sdl; - - ifa = ifnet_addrs[ifp->if_index - 1]; - if (ifa == 0) { - printf("ether_ifattach: no lladdr!\n"); - return (EINVAL); - } - sdl = (struct sockaddr_dl *)ifa->ifa_addr; - sdl->sdl_type = IFT_ETHER; - sdl->sdl_alen = ifp->if_addrlen; - bcopy((IFP2AC(ifp))->ac_enaddr, LLADDR(sdl), ifp->if_addrlen); - + /* + * Copy ethernet address out of old style arpcom. New + * interfaces created using the KPIs will not have an + * interface family. Those interfaces will have the + * lladdr passed in when the interface is created. + */ + u_char *enaddr = ((u_char*)ifp) + sizeof(struct ifnet); + ifnet_set_lladdr(ifp, enaddr, 6); + bzero(enaddr, 6); + return 0; } +errno_t +ether_check_multi( + ifnet_t ifp, + const struct sockaddr *proto_addr) +{ + errno_t result = EAFNOSUPPORT; + const u_char *e_addr; + + /* + * AF_SPEC and AF_LINK don't require translation. We do + * want to verify that they specify a valid multicast. + */ + switch(proto_addr->sa_family) { + case AF_UNSPEC: + e_addr = (const u_char*)&proto_addr->sa_data[0]; + if ((e_addr[0] & 0x01) != 0x01) + result = EADDRNOTAVAIL; + else + result = 0; + break; + + case AF_LINK: + e_addr = CONST_LLADDR((const struct sockaddr_dl*)proto_addr); + if ((e_addr[0] & 0x01) != 0x01) + result = EADDRNOTAVAIL; + else + result = 0; + break; + } + + return result; +} + int -ether_ifmod_ioctl(ifp, command, data) - struct ifnet *ifp; - u_long command; - caddr_t data; +ether_ioctl( + __unused ifnet_t ifp, + __unused u_int32_t command, + __unused void* data) { - struct rslvmulti_req *rsreq = (struct rslvmulti_req *) data; - int error = 0; - struct sockaddr_dl *sdl; - struct sockaddr_in *sin; - u_char *e_addr; - - - switch (command) { - case SIOCRSLVMULTI: - switch(rsreq->sa->sa_family) { - case AF_UNSPEC: - /* AppleTalk uses AF_UNSPEC for multicast registration. - * No mapping needed. Just check that it's a valid MC address. - */ - e_addr = &rsreq->sa->sa_data[0]; - if ((e_addr[0] & 1) != 1) - return EADDRNOTAVAIL; - *rsreq->llsa = 0; - return EJUSTRETURN; - - - case AF_LINK: - /* - * No mapping needed. Just check that it's a valid MC address. - */ - sdl = (struct sockaddr_dl *)rsreq->sa; - e_addr = LLADDR(sdl); - if ((e_addr[0] & 1) != 1) - return EADDRNOTAVAIL; - *rsreq->llsa = 0; - return EJUSTRETURN; - - default: - return EAFNOSUPPORT; - } - - default: - return EOPNOTSUPP; - } + return EOPNOTSUPP; } -extern int ether_attach_inet(struct ifnet *ifp, u_long *dl_tag); -extern int ether_detach_inet(struct ifnet *ifp, u_long dl_tag); -extern int ether_attach_inet6(struct ifnet *ifp, u_long *dl_tag); -extern int ether_detach_inet6(struct ifnet *ifp, u_long dl_tag); -int ether_family_init() +extern int ether_attach_inet(struct ifnet *ifp, u_long proto_family); +extern int ether_detach_inet(struct ifnet *ifp, u_long proto_family); +extern int ether_attach_inet6(struct ifnet *ifp, u_long proto_family); +extern int ether_detach_inet6(struct ifnet *ifp, u_long proto_family); + +extern void kprintf(const char *, ...); + +int ether_family_init(void) { - int i, error=0; + int error=0; struct dlil_ifmod_reg_str ifmod_reg; - struct dlil_protomod_reg_str enet_protoreg; - extern int vlan_family_init(void); /* ethernet family is built-in, called from bsd_init */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); bzero(&ifmod_reg, sizeof(ifmod_reg)); ifmod_reg.add_if = ether_add_if; ifmod_reg.del_if = ether_del_if; ifmod_reg.init_if = ether_init_if; - ifmod_reg.add_proto = ether_add_proto; + ifmod_reg.add_proto = ether_add_proto_old; ifmod_reg.del_proto = ether_del_proto; - ifmod_reg.ifmod_ioctl = ether_ifmod_ioctl; + ifmod_reg.ifmod_ioctl = ether_ioctl; ifmod_reg.shutdown = ether_shutdown; if (dlil_reg_if_modules(APPLE_IF_FAM_ETHERNET, &ifmod_reg)) { @@ -641,29 +679,24 @@ int ether_family_init() goto done; } + /* Register protocol registration functions */ - /* Register protocol registration functions */ - - bzero(&enet_protoreg, sizeof(enet_protoreg)); - enet_protoreg.attach_proto = ether_attach_inet; - enet_protoreg.detach_proto = ether_detach_inet; - - if (error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_ETHERNET, &enet_protoreg) != 0) { - printf("ether_family_init: dlil_reg_proto_module failed for AF_INET error=%d\n", error); - goto done; - } - - enet_protoreg.attach_proto = ether_attach_inet6; - enet_protoreg.detach_proto = ether_detach_inet6; - - if (error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_ETHERNET, &enet_protoreg) != 0) { - printf("ether_family_init: dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); - goto done; - } - vlan_family_init(); + if ((error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_ETHERNET, + ether_attach_inet, ether_detach_inet)) != 0) { + kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); + goto done; + } + + + if ((error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_ETHERNET, + ether_attach_inet6, ether_detach_inet6)) != 0) { + kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); + goto done; + } + vlan_family_init(); + bond_family_init(); done: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); return (error); } diff --git a/bsd/net/ether_inet6_pr_module.c b/bsd/net/ether_inet6_pr_module.c index e1ac8180f..1c2175efe 100644 --- a/bsd/net/ether_inet6_pr_module.c +++ b/bsd/net/ether_inet6_pr_module.c @@ -63,14 +63,15 @@ #include #include #include +#include #include -#include #include #include #include #include #include +#include #include #include @@ -104,252 +105,112 @@ extern struct ifqueue pkintrq; #include #endif /* NVLAN > 0 */ -static u_long lo_dlt = 0; -static ivedonethis = 0; -static u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -#define IFP2AC(IFP) ((struct arpcom *)IFP) - - - +/* Local function declerations */ +int ether_attach_inet6(struct ifnet *ifp, u_long protocol_family); +int ether_detach_inet6(struct ifnet *ifp, u_long protocol_family); /* * Process a received Ethernet packet; * the packet is in the mbuf chain m without * the ether header, which is provided separately. */ -int -inet6_ether_input(m, frame_header, ifp, dl_tag, sync_ok) - struct mbuf *m; - char *frame_header; - struct ifnet *ifp; - u_long dl_tag; - int sync_ok; - +static errno_t +inet6_ether_input( + __unused ifnet_t ifp, + protocol_family_t protocol, + mbuf_t packet, + __unused char *header) { - register struct ether_header *eh = (struct ether_header *) frame_header; - register struct ifqueue *inq=0; - u_short ether_type; - int s; - u_int16_t ptype = -1; - unsigned char buf[18]; - - - - if ((ifp->if_flags & IFF_UP) == 0) { - m_freem(m); - return EJUSTRETURN; - } - - ifp->if_lastchange = time; - - if (eh->ether_dhost[0] & 1) { - if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, - sizeof(etherbroadcastaddr)) == 0) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - } - if (m->m_flags & (M_BCAST|M_MCAST)) - ifp->if_imcasts++; - - ether_type = ntohs(eh->ether_type); - - - switch (ether_type) { + proto_input(protocol, packet); + return 0; +} - case ETHERTYPE_IPV6: - schednetisr(NETISR_IPV6); - inq = &ip6intrq; - break; +static errno_t +inet6_ether_pre_output( + ifnet_t ifp, + __unused protocol_family_t protocol_family, + mbuf_t *m0, + const struct sockaddr *dst_netaddr, + void *route, + char *type, + char *edst) +{ + errno_t result; + struct sockaddr_dl sdl; + register struct mbuf *m = *m0; - default: { - return ENOENT; + /* + * Tell ether_frameout it's ok to loop packet if necessary + */ + m->m_flags |= M_LOOP; + + result = nd6_lookup_ipv6(ifp, (const struct sockaddr_in6*)dst_netaddr, + &sdl, sizeof(sdl), route, *m0); + + if (result == 0) { + *(u_int16_t*)type = htons(ETHERTYPE_IPV6); + bcopy(LLADDR(&sdl), edst, sdl.sdl_alen); } - } - - if (inq == 0) - return ENOENT; - s = splimp(); - if (IF_QFULL(inq)) { - IF_DROP(inq); - m_freem(m); - splx(s); - return EJUSTRETURN; - } else - IF_ENQUEUE(inq, m); - splx(s); - return 0; + return result; } - - - -int -inet6_ether_pre_output(ifp, m0, dst_netaddr, route, type, edst, dl_tag ) - struct ifnet *ifp; - struct mbuf **m0; - struct sockaddr *dst_netaddr; - caddr_t route; - char *type; - char *edst; - u_long dl_tag; +static int +ether_inet6_resolve_multi( + ifnet_t ifp, + const struct sockaddr *proto_addr, + struct sockaddr_dl *out_ll, + size_t ll_len) { - struct rtentry *rt0 = (struct rtentry *) route; - int s; - register struct mbuf *m = *m0; - register struct rtentry *rt; - register struct ether_header *eh; - int hlen; /* link layer header lenght */ - struct arpcom *ac = IFP2AC(ifp); - - - - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return ENETDOWN; - - rt = rt0; - if (rt) { - if ((rt->rt_flags & RTF_UP) == 0) { - rt0 = rt = rtalloc1(dst_netaddr, 1, 0UL); - if (rt0) - rtunref(rt); - else - return EHOSTUNREACH; - } - - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, - 0UL); - if ((rt = rt->rt_gwroute) == 0) - return (EHOSTUNREACH); - } - } - + static const size_t minsize = offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN; + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6*)proto_addr; - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } - - hlen = ETHER_HDR_LEN; - - /* - * Tell ether_frameout it's ok to loop packet unless negated below. - */ - m->m_flags |= M_LOOP; - - switch (dst_netaddr->sa_family) { - - - case AF_INET6: - if (!nd6_storelladdr(&ac->ac_if, rt, m, dst_netaddr, (u_char *)edst)) { - /* this must be impossible, so we bark */ - printf("nd6_storelladdr failed\n"); - return(EADDRNOTAVAIL); /* dlil_output will free the mbuf */ - } - *(u_short *)type = htons(ETHERTYPE_IPV6); - break; - - default: - printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit, - dst_netaddr->sa_family); - - /* dlil_output will free the mbuf */ - return EAFNOSUPPORT; - } - - return (0); + if (proto_addr->sa_family != AF_INET6) + return EAFNOSUPPORT; + + if (proto_addr->sa_len < sizeof(struct sockaddr_in6)) + return EINVAL; + + if (ll_len < minsize) + return EMSGSIZE; + + bzero(out_ll, minsize); + out_ll->sdl_len = minsize; + out_ll->sdl_family = AF_LINK; + out_ll->sdl_index = ifp->if_index; + out_ll->sdl_type = IFT_ETHER; + out_ll->sdl_nlen = 0; + out_ll->sdl_alen = ETHER_ADDR_LEN; + out_ll->sdl_slen = 0; + ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, LLADDR(out_ll)); + + return 0; } -int -ether_inet6_prmod_ioctl(dl_tag, ifp, command, data) - u_long dl_tag; - struct ifnet *ifp; - int command; - caddr_t data; +static errno_t +ether_inet6_prmod_ioctl( + ifnet_t ifp, + __unused protocol_family_t protocol_family, + u_int32_t command, + void* data) { - struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; - struct rslvmulti_req *rsreq = (struct rslvmulti_req *) data; int error = 0; - boolean_t funnel_state; - struct arpcom *ac = (struct arpcom *) ifp; - struct sockaddr_dl *sdl; - struct sockaddr_in *sin; - struct sockaddr_in6 *sin6; - - u_char *e_addr; - switch (command) { - case SIOCRSLVMULTI: { - switch(rsreq->sa->sa_family) { - - case AF_INET6: - sin6 = (struct sockaddr_in6 *)rsreq->sa; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - /* - * An IP6 address of 0 means listen to all - * of the Ethernet multicast address used for IP6. - * (This is used for multicast routers.) - */ - ifp->if_flags |= IFF_ALLMULTI; - *rsreq->llsa = 0; - return 0; - } - MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR, - M_WAITOK); - sdl->sdl_len = sizeof *sdl; - sdl->sdl_family = AF_LINK; - sdl->sdl_index = ifp->if_index; - sdl->sdl_type = IFT_ETHER; - sdl->sdl_nlen = 0; - sdl->sdl_alen = ETHER_ADDR_LEN; - sdl->sdl_slen = 0; - e_addr = LLADDR(sdl); - ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); -#ifndef __APPLE__ - printf("ether_resolvemulti AF_INET6 Adding %x:%x:%x:%x:%x:%x\n", - e_addr[0], e_addr[1], e_addr[2], e_addr[3], e_addr[4], e_addr[5]); -#endif - *rsreq->llsa = (struct sockaddr *)sdl; - return 0; - - default: - /* - * Well, the text isn't quite right, but it's the name - * that counts... - */ - return EAFNOSUPPORT; - } - - } case SIOCSIFADDR: if ((ifp->if_flags & IFF_RUNNING) == 0) { - ifp->if_flags |= IFF_UP; + ifnet_set_flags(ifp, IFF_UP, IFF_UP); dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); } break; case SIOCGIFADDR: - { - struct sockaddr *sa; - - sa = (struct sockaddr *) & ifr->ifr_data; - bcopy(IFP2AC(ifp)->ac_enaddr, - (caddr_t) sa->sa_data, ETHER_ADDR_LEN); - } + ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); break; case SIOCSIFMTU: @@ -366,59 +227,48 @@ ether_inet6_prmod_ioctl(dl_tag, ifp, command, data) return (error); } - - - - -int ether_attach_inet6(struct ifnet *ifp, u_long *dl_tag) +int +ether_attach_inet6( + struct ifnet *ifp, + __unused u_long protocol_family) { - struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - u_short en_6native=ETHERTYPE_IPV6; - int stat; - int i; - - - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET6, dl_tag); - if (stat == 0) - return stat; - - TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &en_6native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); - reg.interface_family = ifp->if_family; - reg.unit_number = ifp->if_unit; - reg.input = inet6_ether_input; - reg.pre_output = inet6_ether_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = ether_inet6_prmod_ioctl; - reg.default_proto = 0; - reg.protocol_family = PF_INET6; - - stat = dlil_attach_protocol(®, dl_tag); - if (stat) { - printf("WARNING: ether_attach_inet6 can't attach ip to interface\n"); - } - - return stat; + struct ifnet_attach_proto_param proto; + struct ifnet_demux_desc demux[1]; + u_short en_6native=htons(ETHERTYPE_IPV6); + errno_t error; + + bzero(&proto, sizeof(proto)); + demux[0].type = DLIL_DESC_ETYPE2; + demux[0].data = &en_6native; + demux[0].datalen = sizeof(en_6native); + proto.demux_list = demux; + proto.demux_count = 1; + proto.input = inet6_ether_input; + proto.pre_output = inet6_ether_pre_output; + proto.ioctl = ether_inet6_prmod_ioctl; + proto.resolve = ether_inet6_resolve_multi; + error = ifnet_attach_protocol(ifp, protocol_family, &proto); + if (error && error != EEXIST) { + printf("WARNING: ether_attach_inet6 can't attach ipv6 to %s%d\n", + ifp->if_name, ifp->if_unit); + } + + return error; } -int ether_detach_inet6(struct ifnet *ifp, u_long dl_tag) +int +ether_detach_inet6( + struct ifnet *ifp, + u_long protocol_family) { - int stat; - - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET6, &dl_tag); - if (stat == 0) { - stat = dlil_detach_protocol(dl_tag); - if (stat) { - printf("WARNING: ether_detach_inet6 can't detach ip6 from interface\n"); - } - } - return stat; + errno_t error; + + error = ifnet_detach_protocol(ifp, protocol_family); + if (error && error != ENOENT) { + printf("WARNING: ether_detach_inet6 can't detach ipv6 from %s%d\n", + ifp->if_name, ifp->if_unit); + } + + return error; } diff --git a/bsd/net/ether_inet_pr_module.c b/bsd/net/ether_inet_pr_module.c index 73ae154ac..43d413070 100644 --- a/bsd/net/ether_inet_pr_module.c +++ b/bsd/net/ether_inet_pr_module.c @@ -63,19 +63,21 @@ #include #include #include +#include #include -#include #include #include #include #include +#include #include #include #include #include #include +#include #include @@ -85,7 +87,6 @@ extern struct ifqueue pkintrq; #endif - #if BRIDGE #include #endif @@ -95,283 +96,217 @@ extern struct ifqueue pkintrq; #include #endif /* NVLAN > 0 */ -static u_long lo_dlt = 0; -static ivedonethis = 0; -static u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - -#define IFP2AC(IFP) ((struct arpcom *)IFP) +/* Local function declerations */ +int ether_attach_inet(struct ifnet *ifp, u_long proto_family); +int ether_detach_inet(struct ifnet *ifp, u_long proto_family); +extern void * kdp_get_interface(void); +extern void ipintr(void); +extern void arp_input(struct mbuf* m); - -extern void * kdp_get_interface(); +static void +inet_ether_arp_input( + struct mbuf *m) +{ + struct ether_arp *ea; + struct sockaddr_dl sender_hw; + struct sockaddr_in sender_ip; + struct sockaddr_in target_ip; + + if (mbuf_len(m) < sizeof(*ea) && + mbuf_pullup(&m, sizeof(*ea)) != 0) + return; + + ea = mbuf_data(m); + + /* Verify this is an ethernet/ip arp and address lengths are correct */ + if (ntohs(ea->arp_hrd) != ARPHRD_ETHER || + ntohs(ea->arp_pro) != ETHERTYPE_IP || + ea->arp_pln != sizeof(struct in_addr) || + ea->arp_hln != ETHER_ADDR_LEN) { + mbuf_free(m); + return; + } + + /* Verify the sender is not broadcast or multicast */ + if ((ea->arp_sha[0] & 0x01) != 0) { + mbuf_free(m); + return; + } + + bzero(&sender_ip, sizeof(sender_ip)); + sender_ip.sin_len = sizeof(sender_ip); + sender_ip.sin_family = AF_INET; + sender_ip.sin_addr = *(struct in_addr*)ea->arp_spa; + target_ip = sender_ip; + target_ip.sin_addr = *(struct in_addr*)ea->arp_tpa; + + bzero(&sender_hw, sizeof(sender_hw)); + sender_hw.sdl_len = sizeof(sender_hw); + sender_hw.sdl_family = AF_LINK; + sender_hw.sdl_type = IFT_ETHER; + sender_hw.sdl_alen = ETHER_ADDR_LEN; + bcopy(ea->arp_sha, LLADDR(&sender_hw), ETHER_ADDR_LEN); + + arp_ip_handle_input(mbuf_pkthdr_rcvif(m), ntohs(ea->arp_op), &sender_hw, &sender_ip, &target_ip); + mbuf_free(m); +} /* * Process a received Ethernet packet; * the packet is in the mbuf chain m without * the ether header, which is provided separately. */ -int -inet_ether_input(m, frame_header, ifp, dl_tag, sync_ok) - struct mbuf *m; - char *frame_header; - struct ifnet *ifp; - u_long dl_tag; - int sync_ok; - +static errno_t +inet_ether_input( + __unused ifnet_t ifp, + __unused protocol_family_t protocol_family, + mbuf_t m, + char *frame_header) { register struct ether_header *eh = (struct ether_header *) frame_header; - register struct ifqueue *inq=0; u_short ether_type; - int s; - u_int16_t ptype = -1; - unsigned char buf[18]; - -#if ISO || LLC || NETAT - register struct llc *l; -#endif - - if ((ifp->if_flags & IFF_UP) == 0) { - m_freem(m); - return EJUSTRETURN; - } - - ifp->if_lastchange = time; - - if (eh->ether_dhost[0] & 1) { - if (bcmp((caddr_t)etherbroadcastaddr, (caddr_t)eh->ether_dhost, - sizeof(etherbroadcastaddr)) == 0) - m->m_flags |= M_BCAST; - else - m->m_flags |= M_MCAST; - } - if (m->m_flags & (M_BCAST|M_MCAST)) - ifp->if_imcasts++; ether_type = ntohs(eh->ether_type); -#if NVLAN > 0 - if (ether_type == vlan_proto) { - if (vlan_input(eh, m) < 0) - ifp->if_data.ifi_noproto++; - return EJUSTRETURN; - } -#endif /* NVLAN > 0 */ - switch (ether_type) { - case ETHERTYPE_IP: - if (ipflow_fastforward(m)) - return EJUSTRETURN; - ptype = mtod(m, struct ip *)->ip_p; - if ((sync_ok == 0) || - (ptype != IPPROTO_TCP && ptype != IPPROTO_UDP)) { - schednetisr(NETISR_IP); - } - - inq = &ipintrq; - break; - - case ETHERTYPE_ARP: - schednetisr(NETISR_ARP); - inq = &arpintrq; - break; - - default: { - return ENOENT; - } + case ETHERTYPE_IP: + proto_input(PF_INET, m); + break; + + case ETHERTYPE_ARP: { + inet_ether_arp_input(m); + } + break; + + default: { + return ENOENT; + } } - - if (inq == 0) - return ENOENT; - - s = splimp(); - if (IF_QFULL(inq)) { - IF_DROP(inq); - m_freem(m); - splx(s); - return EJUSTRETURN; - } else - IF_ENQUEUE(inq, m); - splx(s); - - if ((sync_ok) && - (ptype == IPPROTO_TCP || ptype == IPPROTO_UDP)) { - extern void ipintr(void); - - s = splnet(); - ipintr(); - splx(s); - } - + return 0; } - - - -int -inet_ether_pre_output(ifp, m0, dst_netaddr, route, type, edst, dl_tag ) - struct ifnet *ifp; - struct mbuf **m0; - struct sockaddr *dst_netaddr; - caddr_t route; - char *type; - char *edst; - u_long dl_tag; +static errno_t +inet_ether_pre_output( + ifnet_t ifp, + __unused protocol_family_t protocol_family, + mbuf_t *m0, + const struct sockaddr *dst_netaddr, + void* route, + char *type, + char *edst) { - struct rtentry *rt0 = (struct rtentry *) route; - int s; register struct mbuf *m = *m0; - register struct rtentry *rt; register struct ether_header *eh; - int off, len = m->m_pkthdr.len; - int hlen; /* link layer header length */ - struct arpcom *ac = IFP2AC(ifp); - + errno_t result = 0; if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return ENETDOWN; - - rt = rt0; - if (rt) { - if ((rt->rt_flags & RTF_UP) == 0) { - rt0 = rt = rtalloc1(dst_netaddr, 1, 0UL); - if (rt0) - rtunref(rt); - else - return EHOSTUNREACH; - } - - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, - 0UL); - if ((rt = rt->rt_gwroute) == 0) - return (EHOSTUNREACH); - } - } - + return ENETDOWN; - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } - - hlen = ETHER_HDR_LEN; - /* * Tell ether_frameout it's ok to loop packet unless negated below. */ m->m_flags |= M_LOOP; switch (dst_netaddr->sa_family) { + + case AF_INET: { + struct sockaddr_dl ll_dest; + result = arp_lookup_ip(ifp, (const struct sockaddr_in*)dst_netaddr, + &ll_dest, sizeof(ll_dest), (route_t)route, *m0); + if (result == 0) { + bcopy(LLADDR(&ll_dest), edst, ETHER_ADDR_LEN); + *(u_int16_t*)type = htons(ETHERTYPE_IP); + } + } + break; + + case pseudo_AF_HDRCMPLT: + case AF_UNSPEC: + m->m_flags &= ~M_LOOP; + eh = (struct ether_header *)dst_netaddr->sa_data; + (void)memcpy(edst, eh->ether_dhost, 6); + *(u_short *)type = eh->ether_type; + break; + + default: + printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit, + dst_netaddr->sa_family); + + result = EAFNOSUPPORT; + } - case AF_INET: - if (!arpresolve(ac, rt, m, dst_netaddr, edst, rt0)) - return (EJUSTRETURN); /* if not yet resolved */ - off = m->m_pkthdr.len - m->m_len; - *(u_short *)type = htons(ETHERTYPE_IP); - break; - - case AF_UNSPEC: - m->m_flags &= ~M_LOOP; - eh = (struct ether_header *)dst_netaddr->sa_data; - (void)memcpy(edst, eh->ether_dhost, 6); - *(u_short *)type = eh->ether_type; - break; - - default: - kprintf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit, - dst_netaddr->sa_family); + return result; +} - return EAFNOSUPPORT; - } +static errno_t +ether_inet_resolve_multi( + ifnet_t ifp, + const struct sockaddr *proto_addr, + struct sockaddr_dl *out_ll, + size_t ll_len) +{ + static const size_t minsize = offsetof(struct sockaddr_dl, sdl_data[0]) + ETHER_ADDR_LEN; + const struct sockaddr_in *sin = (const struct sockaddr_in*)proto_addr; + + if (proto_addr->sa_family != AF_INET) + return EAFNOSUPPORT; + + if (proto_addr->sa_len < sizeof(struct sockaddr_in)) + return EINVAL; - return (0); + if (ll_len < minsize) + return EMSGSIZE; + + bzero(out_ll, minsize); + out_ll->sdl_len = minsize; + out_ll->sdl_family = AF_LINK; + out_ll->sdl_index = ifp->if_index; + out_ll->sdl_type = IFT_ETHER; + out_ll->sdl_nlen = 0; + out_ll->sdl_alen = ETHER_ADDR_LEN; + out_ll->sdl_slen = 0; + ETHER_MAP_IP_MULTICAST(&sin->sin_addr, LLADDR(out_ll)); + + return 0; } -int -ether_inet_prmod_ioctl(dl_tag, ifp, command, data) - u_long dl_tag; - struct ifnet *ifp; - int command; - caddr_t data; +static errno_t +ether_inet_prmod_ioctl( + ifnet_t ifp, + __unused protocol_family_t protocol_family, + u_int32_t command, + void* data) { - struct ifaddr *ifa = (struct ifaddr *) data; - struct ifreq *ifr = (struct ifreq *) data; - struct rslvmulti_req *rsreq = (struct rslvmulti_req *) data; + ifaddr_t ifa = data; + struct ifreq *ifr = data; int error = 0; - boolean_t funnel_state; - struct arpcom *ac = (struct arpcom *) ifp; - struct sockaddr_dl *sdl; - struct sockaddr_in *sin; - u_char *e_addr; -#if 0 - /* No tneeded at soo_ioctlis already funnelled */ - funnel_state = thread_funnel_set(network_flock,TRUE); -#endif - switch (command) { - case SIOCRSLVMULTI: { - switch(rsreq->sa->sa_family) { - - case AF_INET: - sin = (struct sockaddr_in *)rsreq->sa; - if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) - return EADDRNOTAVAIL; - MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR, - M_WAITOK); - sdl->sdl_len = sizeof *sdl; - sdl->sdl_family = AF_LINK; - sdl->sdl_index = ifp->if_index; - sdl->sdl_type = IFT_ETHER; - sdl->sdl_nlen = 0; - sdl->sdl_alen = ETHER_ADDR_LEN; - sdl->sdl_slen = 0; - e_addr = LLADDR(sdl); - ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); - *rsreq->llsa = (struct sockaddr *)sdl; - return EJUSTRETURN; - - default: - /* - * Well, the text isn't quite right, but it's the name - * that counts... - */ - return EAFNOSUPPORT; - } - - } case SIOCSIFADDR: - if ((ifp->if_flags & IFF_RUNNING) == 0) { - ifp->if_flags |= IFF_UP; - dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); - } + if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) { + ifnet_set_flags(ifp, IFF_UP, IFF_UP); + ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL); + } - switch (ifa->ifa_addr->sa_family) { + switch (ifaddr_address_family(ifa)) { case AF_INET: - if (ifp->if_init) - ifp->if_init(ifp->if_softc); /* before arpwhohas */ - - arp_ifinit(IFP2AC(ifp), ifa); + inet_arp_init_ifaddr(ifp, ifa); /* * Register new IP and MAC addresses with the kernel debugger * if the interface is the same as was registered by IOKernelDebugger. If * no interface was registered, fall back and just match against en0 interface. */ - if ((kdp_get_interface() != 0 && kdp_get_interface() == ifp->if_private) + if ((kdp_get_interface() != 0 && kdp_get_interface() == ifp->if_softc) || (kdp_get_interface() == 0 && ifp->if_unit == 0)) - kdp_set_ip_and_mac_addresses(&(IA_SIN(ifa)->sin_addr), &(IFP2AC(ifp)->ac_enaddr)); + kdp_set_ip_and_mac_addresses(&(IA_SIN(ifa)->sin_addr), ifnet_lladdr(ifp)); break; @@ -382,13 +317,7 @@ ether_inet_prmod_ioctl(dl_tag, ifp, command, data) break; case SIOCGIFADDR: - { - struct sockaddr *sa; - - sa = (struct sockaddr *) & ifr->ifr_data; - bcopy(IFP2AC(ifp)->ac_enaddr, - (caddr_t) sa->sa_data, ETHER_ADDR_LEN); - } + ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); break; case SIOCSIFMTU: @@ -402,71 +331,183 @@ ether_inet_prmod_ioctl(dl_tag, ifp, command, data) return EOPNOTSUPP; } - //(void) thread_funnel_set(network_flock, FALSE); - return (error); } +static void +ether_inet_event( + ifnet_t ifp, + __unused protocol_family_t protocol, + const struct kev_msg *event) +{ + ifaddr_t *addresses; + + if (event->vendor_code != KEV_VENDOR_APPLE || + event->kev_class != KEV_NETWORK_CLASS || + event->kev_subclass != KEV_DL_SUBCLASS || + event->event_code != KEV_DL_LINK_ADDRESS_CHANGED) { + return; + } + + if (ifnet_get_address_list_family(ifp, &addresses, AF_INET) == 0) { + int i; + + for (i = 0; addresses[i] != NULL; i++) { + inet_arp_init_ifaddr(ifp, addresses[i]); + } + + ifnet_free_address_list(addresses); + } +} - - +static errno_t +ether_inet_arp( + ifnet_t ifp, + u_short arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto) +{ + mbuf_t m; + errno_t result; + struct ether_header *eh; + struct ether_arp *ea; + const struct sockaddr_in* sender_ip = (const struct sockaddr_in*)sender_proto; + const struct sockaddr_in* target_ip = (const struct sockaddr_in*)target_proto; + char *datap; + + if (target_ip == NULL) + return EINVAL; + + if ((sender_ip && sender_ip->sin_family != AF_INET) || + (target_ip && target_ip->sin_family != AF_INET)) + return EAFNOSUPPORT; + + result = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &m); + if (result != 0) + return result; + + mbuf_setlen(m, sizeof(*ea)); + mbuf_pkthdr_setlen(m, sizeof(*ea)); + + /* Move the data pointer in the mbuf to the end, aligned to 4 bytes */ + datap = mbuf_datastart(m); + datap += mbuf_trailingspace(m); + datap -= (((u_long)datap) & 0x3); + mbuf_setdata(m, datap, sizeof(*ea)); + ea = mbuf_data(m); + + /* Prepend the ethernet header, we will send the raw frame */ + mbuf_prepend(&m, sizeof(*eh), MBUF_WAITOK); + eh = mbuf_data(m); + eh->ether_type = htons(ETHERTYPE_ARP); + + /* Fill out the arp header */ + ea->arp_pro = htons(ETHERTYPE_IP); + ea->arp_hln = sizeof(ea->arp_sha); + ea->arp_pln = sizeof(ea->arp_spa); + ea->arp_hrd = htons(ARPHRD_ETHER); + ea->arp_op = htons(arpop); + + /* Sender Hardware */ + if (sender_hw != NULL) { + bcopy(CONST_LLADDR(sender_hw), ea->arp_sha, sizeof(ea->arp_sha)); + } + else { + ifnet_lladdr_copy_bytes(ifp, ea->arp_sha, ETHER_ADDR_LEN); + } + ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, sizeof(eh->ether_shost)); + + /* Sender IP */ + if (sender_ip != NULL) { + bcopy(&sender_ip->sin_addr, ea->arp_spa, sizeof(ea->arp_spa)); + } + else { + struct ifaddr *ifa; + + /* Look for an IP address to use as our source */ + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) + break; + } + if (ifa) { + bcopy(&((struct sockaddr_in*)ifa->ifa_addr)->sin_addr, ea->arp_spa, + sizeof(ea->arp_spa)); + } + ifnet_lock_done(ifp); + + if (ifa == NULL) { + mbuf_free(m); + return ENXIO; + } + } + + /* Target Hardware */ + if (target_hw == 0) { + bzero(ea->arp_tha, sizeof(ea->arp_tha)); + bcopy(etherbroadcastaddr, eh->ether_dhost, sizeof(eh->ether_dhost)); + } + else { + bcopy(CONST_LLADDR(target_hw), ea->arp_tha, sizeof(ea->arp_tha)); + bcopy(CONST_LLADDR(target_hw), eh->ether_dhost, sizeof(eh->ether_dhost)); + } + + /* Target IP */ + bcopy(&target_ip->sin_addr, ea->arp_tpa, sizeof(ea->arp_tpa)); + + ifnet_output_raw(ifp, PF_INET, m); + + return 0; +} int -ether_attach_inet(struct ifnet *ifp, u_long *dl_tag) +ether_attach_inet( + struct ifnet *ifp, + __unused u_long proto_family) { - struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - struct dlil_demux_desc desc2; - u_short en_native=ETHERTYPE_IP; - u_short arp_native=ETHERTYPE_ARP; - int stat; - int i; - - - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET, dl_tag); - if (stat == 0) - return (stat); - - TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &en_native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); - reg.interface_family = ifp->if_family; - reg.unit_number = ifp->if_unit; - reg.input = inet_ether_input; - reg.pre_output = inet_ether_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = ether_inet_prmod_ioctl; - reg.default_proto = 1; - reg.protocol_family = PF_INET; - - desc2 = desc; - desc2.native_type = (char *) &arp_native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc2, next); - - stat = dlil_attach_protocol(®, dl_tag); - if (stat) { - printf("WARNING: ether_attach_inet can't attach ip to interface\n"); - return stat; - } - return (0); + struct ifnet_attach_proto_param proto; + struct ifnet_demux_desc demux[2]; + u_short en_native=htons(ETHERTYPE_IP); + u_short arp_native=htons(ETHERTYPE_ARP); + errno_t error; + + bzero(&demux[0], sizeof(demux)); + demux[0].type = DLIL_DESC_ETYPE2; + demux[0].data = &en_native; + demux[0].datalen = sizeof(en_native); + demux[1].type = DLIL_DESC_ETYPE2; + demux[1].data = &arp_native; + demux[1].datalen = sizeof(arp_native); + + bzero(&proto, sizeof(proto)); + proto.demux_list = demux; + proto.demux_count = sizeof(demux) / sizeof(demux[0]); + proto.input = inet_ether_input; + proto.pre_output = inet_ether_pre_output; + proto.ioctl = ether_inet_prmod_ioctl; + proto.event = ether_inet_event; + proto.resolve = ether_inet_resolve_multi; + proto.send_arp = ether_inet_arp; + + error = ifnet_attach_protocol(ifp, proto_family, &proto); + if (error && error != EEXIST) { + printf("WARNING: ether_attach_inet can't attach ip to %s%d\n", + ifp->if_name, ifp->if_unit); + } + return error; } -int ether_detach_inet(struct ifnet *ifp, u_long dl_tag) +int +ether_detach_inet( + struct ifnet *ifp, + u_long proto_family) { int stat; - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET, &dl_tag); - if (stat == 0) { - stat = dlil_detach_protocol(dl_tag); - if (stat) { - printf("WARNING: ether_detach_inet can't detach ip from interface\n"); - } - } + stat = dlil_detach_protocol(ifp, proto_family); + return stat; } diff --git a/bsd/net/ethernet.h b/bsd/net/ethernet.h index 703660a07..b52bd815f 100644 --- a/bsd/net/ethernet.h +++ b/bsd/net/ethernet.h @@ -102,11 +102,17 @@ struct ether_addr { #define ETHERMTU (ETHER_MAX_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define ETHERMIN (ETHER_MIN_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -struct ether_addr *ether_aton __P((char *)); -#endif /* __APPLE_API_PRIVATE */ +#ifdef KERNEL_PRIVATE +/* + * The following are used by ethernet interfaces. + */ + +struct ether_addr *ether_aton(const char *); + +#ifdef BSD_KERNEL_PRIVATE +extern u_char etherbroadcastaddr[ETHER_ADDR_LEN]; #endif +#endif /* KERNEL_PRIVATE */ #ifndef KERNEL #include @@ -116,10 +122,11 @@ struct ether_addr *ether_aton __P((char *)); */ __BEGIN_DECLS -int ether_hostton __P((char *, struct ether_addr *)); -int ether_line __P((char *, struct ether_addr *, char *)); -char *ether_ntoa __P((const struct ether_addr *)); -int ether_ntohost __P((char *, struct ether_addr *)); +int ether_hostton(const char *, struct ether_addr *); +int ether_line(const char *, struct ether_addr *, char *); +char *ether_ntoa(const struct ether_addr *); +struct ether_addr *ether_aton(const char *); +int ether_ntohost(char *, const struct ether_addr *); __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/ppc/label_t.h b/bsd/net/ieee8023ad.h similarity index 58% rename from bsd/ppc/label_t.h rename to bsd/net/ieee8023ad.h index d4a45ac77..6de8d3ea9 100644 --- a/bsd/ppc/label_t.h +++ b/bsd/net/ieee8023ad.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,27 +19,29 @@ * * @APPLE_LICENSE_HEADER_END@ */ + /* - * Copyright (c) 1993 NeXT Computer, Inc. - * - * PowerPC Family: For setjmp/longjmp (kernel version). - * - * HISTORY - * + * ieee8023ad.h */ - -#ifndef _BSD_PPC_LABEL_T_H_ -#define _BSD_PPC_LABEL_T_H_ -#include +/* + * Modification History + * + * May 14, 2004 Dieter Siegmund (dieter@apple.com) + * - created + */ -#ifdef __APPLE_API_OBSOLETE -typedef struct label_t { - int val[59]; -} label_t; +#ifndef _NET_IEEE8023AD_H_ +#define _NET_IEEE8023AD_H_ -#endif /* __APPLE_API_OBSOLETE */ +#include -#endif /* _BSD_PPC_LABEL_T_H_ */ +#define IEEE8023AD_SLOW_PROTO_ETHERTYPE 0x8809 +#define IEEE8023AD_SLOW_PROTO_MULTICAST { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 } +#define IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP 1 +#define IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL 2 +#define IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_START 3 +#define IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END 10 +#endif _NET_IEEE8023AD_H_ diff --git a/bsd/net/if.c b/bsd/net/if.c index e5f361d70..3f6d1157d 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -55,6 +55,8 @@ * $FreeBSD: src/sys/net/if.c,v 1.85.2.9 2001/07/24 19:10:17 brooks Exp $ */ +#include + #include #include #include @@ -73,12 +75,15 @@ #include #include #include +#include + #include #include #ifdef __APPLE__ #include //#include #include +#include #endif #if defined(INET) || defined(INET6) @@ -95,19 +100,21 @@ * System initialization */ -static int ifconf __P((u_long, caddr_t)); -static void if_qflush __P((struct ifqueue *)); -static void link_rtrequest __P((int, struct rtentry *, struct sockaddr *)); +static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space); +static void if_qflush(struct ifqueue *); +__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); +void if_rtproto_del(struct ifnet *ifp, int protocol); static struct if_clone *if_clone_lookup(const char *, int *); -static int if_clone_list(struct if_clonereq *); +#ifdef IF_CLONE_LIST +static int if_clone_list(int count, int * total, user_addr_t dst); +#endif MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); int ifqmaxlen = IFQ_MAXLEN; -struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); -struct ifmultihead ifma_lostlist = LIST_HEAD_INITIALIZER(ifma_lostlist); +struct ifnethead ifnet_head = TAILQ_HEAD_INITIALIZER(ifnet_head); static int if_cloners_count; LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); @@ -117,7 +124,7 @@ LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners); * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ -extern void nd6_setmtu __P((struct ifnet *)); +extern void nd6_setmtu(struct ifnet *); #endif #define M_CLONE M_IFADDR @@ -133,6 +140,48 @@ int if_index; struct ifaddr **ifnet_addrs; struct ifnet **ifindex2ifnet; +__private_extern__ void +if_attach_ifa( + struct ifnet *ifp, + struct ifaddr *ifa) +{ + ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED); + if (ifa->ifa_debug & IFA_ATTACHED) { + panic("if_attach_ifa: Attempted to attach address that's already attached!\n"); + } + ifaref(ifa); + ifa->ifa_debug |= IFA_ATTACHED; + TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); +} + +__private_extern__ void +if_detach_ifa( + struct ifnet *ifp, + struct ifaddr *ifa) +{ + ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED); +#if 1 + /* Debugging code */ + if ((ifa->ifa_debug & IFA_ATTACHED) == 0) { + printf("if_detach_ifa: ifa is not attached to any interface! flags=%\n", ifa->ifa_debug); + return; + } + else { + struct ifaddr *ifa2; + TAILQ_FOREACH(ifa2, &ifp->if_addrhead, ifa_link) { + if (ifa2 == ifa) + break; + } + if (ifa2 != ifa) { + printf("if_detach_ifa: Attempted to detach IFA that was not attached!\n"); + } + } +#endif + TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); + ifa->ifa_debug &= ~IFA_ATTACHED; + ifafree(ifa); +} + #define INITIAL_IF_INDEXLIM 8 /* @@ -148,17 +197,14 @@ struct ifnet **ifindex2ifnet; * always allocate one extra element to hold ifindex2ifnet[0], which * is unused. */ -static int +int if_next_index(void); + +__private_extern__ int if_next_index(void) { static int if_indexlim = 0; - static int if_list_growing = 0; int new_index; - while (if_list_growing) { - /* wait until list is done growing */ - (void)tsleep((caddr_t)&ifnet_addrs, PZERO, "if_next_index", 0); - } new_index = ++if_index; if (if_index > if_indexlim) { unsigned n; @@ -167,9 +213,6 @@ if_next_index(void) caddr_t new_ifindex2ifnet; caddr_t old_ifnet_addrs; - /* mark list as growing */ - if_list_growing = 1; - old_ifnet_addrs = (caddr_t)ifnet_addrs; if (ifnet_addrs == NULL) { new_if_indexlim = INITIAL_IF_INDEXLIM; @@ -201,89 +244,8 @@ if_next_index(void) if (old_ifnet_addrs != NULL) { _FREE((caddr_t)old_ifnet_addrs, M_IFADDR); } - - /* wake up others that might be blocked */ - if_list_growing = 0; - wakeup((caddr_t)&ifnet_addrs); } return (new_index); - -} - -/* - * Attach an interface to the - * list of "active" interfaces. - */ -void -old_if_attach(ifp) - struct ifnet *ifp; -{ - unsigned socksize, ifasize; - int namelen, masklen; - char workbuf[64]; - register struct sockaddr_dl *sdl; - register struct ifaddr *ifa; - - if (ifp->if_snd.ifq_maxlen == 0) - ifp->if_snd.ifq_maxlen = ifqmaxlen; - - /* - * XXX - - * The old code would work if the interface passed a pre-existing - * chain of ifaddrs to this code. We don't trust our callers to - * properly initialize the tailq, however, so we no longer allow - * this unlikely case. - */ - TAILQ_INIT(&ifp->if_addrhead); - TAILQ_INIT(&ifp->if_prefixhead); - LIST_INIT(&ifp->if_multiaddrs); - getmicrotime(&ifp->if_lastchange); - - if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) { - /* allocate a new entry */ - ifp->if_index = if_next_index(); - ifindex2ifnet[ifp->if_index] = ifp; - - /* - * create a Link Level name for this device - */ - namelen = snprintf(workbuf, sizeof(workbuf), - "%s%d", ifp->if_name, ifp->if_unit); -#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m)) - masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; - socksize = masklen + ifp->if_addrlen; -#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) - if (socksize < sizeof(*sdl)) - socksize = sizeof(*sdl); - socksize = ROUNDUP(socksize); - ifasize = sizeof(*ifa) + 2 * socksize; - ifa = (struct ifaddr *) _MALLOC(ifasize, M_IFADDR, M_WAITOK); - if (ifa) { - bzero((caddr_t)ifa, ifasize); - sdl = (struct sockaddr_dl *)(ifa + 1); - sdl->sdl_len = socksize; - sdl->sdl_family = AF_LINK; - bcopy(workbuf, sdl->sdl_data, namelen); - sdl->sdl_nlen = namelen; - sdl->sdl_index = ifp->if_index; - sdl->sdl_type = ifp->if_type; - ifnet_addrs[ifp->if_index - 1] = ifa; - ifa->ifa_ifp = ifp; - ifa->ifa_rtrequest = link_rtrequest; - ifa->ifa_addr = (struct sockaddr *)sdl; - sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); - ifa->ifa_netmask = (struct sockaddr *)sdl; - sdl->sdl_len = masklen; - while (namelen != 0) - sdl->sdl_data[--namelen] = 0xff; - } - } else { - ifa = ifnet_addrs[ifp->if_index - 1]; - } - if (ifa != NULL) { - TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); - } - TAILQ_INSERT_TAIL(&ifnet, ifp, if_link); } /* @@ -361,7 +323,7 @@ if_clone_create(char *name, int len) /* * Destroy a clone network interface. */ -int +static int if_clone_destroy(const char *name) { struct if_clone *ifc; @@ -405,7 +367,7 @@ if_clone_lookup(const char *name, int *unitp) { struct if_clone *ifc; const char *cp; - int i; + size_t i; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) { for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) { @@ -492,27 +454,27 @@ if_clone_detach(struct if_clone *ifc) if_cloners_count--; } +#ifdef IF_CLONE_LIST /* * Provide list of interface cloners to userspace. */ static int -if_clone_list(struct if_clonereq *ifcr) +if_clone_list(int count, int * total, user_addr_t dst) { - char outbuf[IFNAMSIZ], *dst; + char outbuf[IFNAMSIZ]; struct if_clone *ifc; - int count, error = 0; + int error = 0; - ifcr->ifcr_total = if_cloners_count; - if ((dst = ifcr->ifcr_buffer) == NULL) { + *total = if_cloners_count; + if (dst == USER_ADDR_NULL) { /* Just asking how many there are. */ return (0); } - if (ifcr->ifcr_count < 0) + if (count < 0) return (EINVAL); - count = (if_cloners_count < ifcr->ifcr_count) ? - if_cloners_count : ifcr->ifcr_count; + count = (if_cloners_count < count) ? if_cloners_count : count; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0; ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) { @@ -524,27 +486,37 @@ if_clone_list(struct if_clonereq *ifcr) return (error); } +#endif IF_CLONE_LIST +int ifa_foraddr(unsigned int addr); __private_extern__ int -ifa_foraddr(addr) - unsigned int addr; +ifa_foraddr( + unsigned int addr) { - register struct ifnet *ifp; - register struct ifaddr *ifa; - register unsigned int addr2; + struct ifnet *ifp; + struct ifaddr *ifa; + unsigned int addr2; + int result = 0; - - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) { + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrhead.tqh_first; ifa; ifa = ifa->ifa_link.tqe_next) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - addr2 = IA_SIN(ifa)->sin_addr.s_addr; - - if (addr == addr2) - return (1); + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + addr2 = IA_SIN(ifa)->sin_addr.s_addr; + + if (addr == addr2) { + result = 1; + break; + } + } + ifnet_lock_done(ifp); } - return (0); + ifnet_head_done(); + + return result; } /* @@ -552,50 +524,75 @@ ifa_foraddr(addr) */ /*ARGSUSED*/ struct ifaddr * -ifa_ifwithaddr(addr) - register struct sockaddr *addr; +ifa_ifwithaddr( + const struct sockaddr *addr) { - register struct ifnet *ifp; - register struct ifaddr *ifa; + struct ifnet *ifp; + struct ifaddr *ifa; + struct ifaddr *result = 0; #define equal(a1, a2) \ - (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0) - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { - if (ifa->ifa_addr->sa_family != addr->sa_family) - continue; - if (equal(addr, ifa->ifa_addr)) - return (ifa); - if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && - /* IP6 doesn't have broadcast */ - ifa->ifa_broadaddr->sa_len != 0 && - equal(ifa->ifa_broadaddr, addr)) - return (ifa); + (bcmp((const void*)(a1), (const void*)(a2), ((const struct sockaddr *)(a1))->sa_len) == 0) + + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) { + ifnet_lock_shared(ifp); + for (ifa = ifp->if_addrhead.tqh_first; ifa; + ifa = ifa->ifa_link.tqe_next) { + if (ifa->ifa_addr->sa_family != addr->sa_family) + continue; + if (equal(addr, ifa->ifa_addr)) { + result = ifa; + break; + } + if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && + /* IP6 doesn't have broadcast */ + ifa->ifa_broadaddr->sa_len != 0 && + equal(ifa->ifa_broadaddr, addr)) { + result = ifa; + break; + } + } + if (result) + ifaref(result); + ifnet_lock_done(ifp); } - return ((struct ifaddr *)0); + ifnet_head_done(); + + return result; } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * -ifa_ifwithdstaddr(addr) - register struct sockaddr *addr; +ifa_ifwithdstaddr( + const struct sockaddr *addr) { - register struct ifnet *ifp; - register struct ifaddr *ifa; - - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) - if (ifp->if_flags & IFF_POINTOPOINT) - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { - if (ifa->ifa_addr->sa_family != addr->sa_family) - continue; - if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) - return (ifa); + struct ifnet *ifp; + struct ifaddr *ifa; + struct ifaddr *result = 0; + + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) { + if (ifp->if_flags & IFF_POINTOPOINT) { + ifnet_lock_shared(ifp); + for (ifa = ifp->if_addrhead.tqh_first; ifa; + ifa = ifa->ifa_link.tqe_next) { + if (ifa->ifa_addr->sa_family != addr->sa_family) + continue; + if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) { + result = ifa; + break; + } + } + if (result) + ifaref(result); + ifnet_lock_done(ifp); + } } - return ((struct ifaddr *)0); + ifnet_head_done(); + return result; } /* @@ -603,33 +600,42 @@ ifa_ifwithdstaddr(addr) * is most specific found. */ struct ifaddr * -ifa_ifwithnet(addr) - struct sockaddr *addr; +ifa_ifwithnet( + const struct sockaddr *addr) { - register struct ifnet *ifp; - register struct ifaddr *ifa; + struct ifnet *ifp; + struct ifaddr *ifa = NULL; struct ifaddr *ifa_maybe = (struct ifaddr *) 0; u_int af = addr->sa_family; char *addr_data = addr->sa_data, *cplim; + ifnet_head_lock_shared(); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { - register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; - if (sdl->sdl_index && sdl->sdl_index <= if_index) - return (ifnet_addrs[sdl->sdl_index - 1]); + const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; + if (sdl->sdl_index && sdl->sdl_index <= if_index) { + ifa = ifnet_addrs[sdl->sdl_index - 1]; + + if (ifa) + ifaref(ifa); + + ifnet_head_done(); + return ifa; + } } /* * Scan though each interface, looking for ones that have * addresses in this address family. */ - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { + for (ifp = ifnet_head.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrhead.tqh_first; ifa; ifa = ifa->ifa_link.tqe_next) { - register char *cp, *cp2, *cp3; + char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; @@ -653,8 +659,9 @@ next: continue; * netmask for the remote end. */ if (ifa->ifa_dstaddr != 0 - && equal(addr, ifa->ifa_dstaddr)) - return (ifa); + && equal(addr, ifa->ifa_dstaddr)) { + break; + } } else #endif /* __APPLE__*/ { @@ -663,8 +670,8 @@ next: continue; * then use it instead of the generic one. */ if (ifa->ifa_claim_addr) { - if ((*ifa->ifa_claim_addr)(ifa, addr)) { - return (ifa); + if (ifa->ifa_claim_addr(ifa, addr)) { + break; } else { continue; } @@ -696,12 +703,38 @@ next: continue; */ if (ifa_maybe == 0 || rn_refines((caddr_t)ifa->ifa_netmask, - (caddr_t)ifa_maybe->ifa_netmask)) + (caddr_t)ifa_maybe->ifa_netmask)) { + ifaref(ifa); + if (ifa_maybe) + ifafree(ifa_maybe); ifa_maybe = ifa; + } } } + + if (ifa) { + ifaref(ifa); + } + + /* + * ifa is set if we found an exact match. + * take a reference to the ifa before + * releasing the ifp lock + */ + ifnet_lock_done(ifp); + + if (ifa) { + break; + } + } + ifnet_head_done(); + if (!ifa) + ifa = ifa_maybe; + else if (ifa_maybe) { + ifafree(ifa_maybe); + ifa_maybe = NULL; } - return (ifa_maybe); + return ifa; } /* @@ -709,18 +742,20 @@ next: continue; * a given address. */ struct ifaddr * -ifaof_ifpforaddr(addr, ifp) - struct sockaddr *addr; - register struct ifnet *ifp; +ifaof_ifpforaddr( + const struct sockaddr *addr, + struct ifnet *ifp) { - register struct ifaddr *ifa; - register char *cp, *cp2, *cp3; - register char *cplim; + struct ifaddr *ifa = 0; + const char *cp, *cp2, *cp3; + char *cplim; struct ifaddr *ifa_maybe = 0; u_int af = addr->sa_family; if (af >= AF_MAX) return (0); + + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrhead.tqh_first; ifa; ifa = ifa->ifa_link.tqe_next) { if (ifa->ifa_addr->sa_family != af) @@ -730,12 +765,12 @@ ifaof_ifpforaddr(addr, ifp) if (ifa->ifa_netmask == 0) { if (equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr))) - return (ifa); + break; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (equal(addr, ifa->ifa_dstaddr)) - return (ifa); + break; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; @@ -745,10 +780,15 @@ ifaof_ifpforaddr(addr, ifp) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) - return (ifa); + break; } } - return (ifa_maybe); + + if (!ifa) ifa = ifa_maybe; + if (ifa) ifaref(ifa); + + ifnet_lock_done(ifp); + return ifa; } #include @@ -758,13 +798,13 @@ ifaof_ifpforaddr(addr, ifp) * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ -static void +void link_rtrequest(cmd, rt, sa) int cmd; - register struct rtentry *rt; + struct rtentry *rt; struct sockaddr *sa; { - register struct ifaddr *ifa; + struct ifaddr *ifa; struct sockaddr *dst; struct ifnet *ifp; @@ -776,75 +816,102 @@ link_rtrequest(cmd, rt, sa) rtsetifa(rt, ifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, sa); + ifafree(ifa); } } /* - * Mark an interface down and notify protocols of - * the transition. - * NOTE: must be called at splnet or eqivalent. - */ -void -if_unroute(ifp, flag, fam) - register struct ifnet *ifp; - int flag, fam; -{ - register struct ifaddr *ifa; - - ifp->if_flags &= ~flag; - getmicrotime(&ifp->if_lastchange); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) - if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) - pfctlinput(PRC_IFDOWN, ifa->ifa_addr); - if_qflush(&ifp->if_snd); - rt_ifmsg(ifp); -} - -/* - * Mark an interface up and notify protocols of - * the transition. - * NOTE: must be called at splnet or eqivalent. + * if_updown will set the interface up or down. It will + * prevent other up/down events from occurring until this + * up/down event has completed. + * + * Caller must lock ifnet. This function will drop the + * lock. This allows ifnet_set_flags to set the rest of + * the flags after we change the up/down state without + * dropping the interface lock between setting the + * up/down state and updating the rest of the flags. */ -void -if_route(ifp, flag, fam) - register struct ifnet *ifp; - int flag, fam; +__private_extern__ void +if_updown( + struct ifnet *ifp, + int up) { - register struct ifaddr *ifa; - - ifp->if_flags |= flag; - getmicrotime(&ifp->if_lastchange); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) - if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) - pfctlinput(PRC_IFUP, ifa->ifa_addr); + int i; + struct ifaddr **ifa; + struct timespec tv; + + /* Wait until no one else is changing the up/down state */ + while ((ifp->if_eflags & IFEF_UPDOWNCHANGE) != 0) { + tv.tv_sec = 0; + tv.tv_nsec = NSEC_PER_SEC / 10; + ifnet_lock_done(ifp); + msleep(&ifp->if_eflags, NULL, 0, "if_updown", &tv); + ifnet_lock_exclusive(ifp); + } + + /* Verify that the interface isn't already in the right state */ + if ((!up && (ifp->if_flags & IFF_UP) == 0) || + (up && (ifp->if_flags & IFF_UP) == IFF_UP)) { + return; + } + + /* Indicate that the up/down state is changing */ + ifp->if_eflags |= IFEF_UPDOWNCHANGE; + + /* Mark interface up or down */ + if (up) { + ifp->if_flags |= IFF_UP; + } + else { + ifp->if_flags &= ~IFF_UP; + } + + ifnet_touch_lastchange(ifp); + + /* Drop the lock to notify addresses and route */ + ifnet_lock_done(ifp); + if (ifnet_get_address_list(ifp, &ifa) == 0) { + for (i = 0; ifa[i] != 0; i++) { + pfctlinput(up ? PRC_IFUP : PRC_IFDOWN, ifa[i]->ifa_addr); + } + ifnet_free_address_list(ifa); + } rt_ifmsg(ifp); - + + /* Aquire the lock to clear the changing flag and flush the send queue */ + ifnet_lock_exclusive(ifp); + if (!up) + if_qflush(&ifp->if_snd); + ifp->if_eflags &= ~IFEF_UPDOWNCHANGE; + wakeup(&ifp->if_eflags); + + return; } /* * Mark an interface down and notify protocols of * the transition. - * NOTE: must be called at splnet or eqivalent. */ void -if_down(ifp) - register struct ifnet *ifp; +if_down( + struct ifnet *ifp) { - - if_unroute(ifp, IFF_UP, AF_UNSPEC); + ifnet_lock_exclusive(ifp); + if_updown(ifp, 0); + ifnet_lock_done(ifp); } /* * Mark an interface up and notify protocols of * the transition. - * NOTE: must be called at splnet or eqivalent. */ void -if_up(ifp) - register struct ifnet *ifp; +if_up( + struct ifnet *ifp) { - - if_route(ifp, IFF_UP, AF_UNSPEC); + ifnet_lock_exclusive(ifp); + if_updown(ifp, 1); + ifnet_lock_done(ifp); } /* @@ -852,9 +919,9 @@ if_up(ifp) */ static void if_qflush(ifq) - register struct ifqueue *ifq; + struct ifqueue *ifq; { - register struct mbuf *m, *n; + struct mbuf *m, *n; n = ifq->ifq_head; while ((m = n) != 0) { @@ -904,12 +971,14 @@ ifunit(const char *name) /* * Now search all the interfaces for this name/number */ - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { if (strcmp(ifp->if_name, namebuf)) continue; if (unit == ifp->if_unit) break; } + ifnet_head_done(); return (ifp); } @@ -952,8 +1021,8 @@ ifioctl(so, cmd, data, p) caddr_t data; struct proc *p; { - register struct ifnet *ifp; - register struct ifreq *ifr; + struct ifnet *ifp; + struct ifreq *ifr; struct ifstat *ifs; int error = 0; short oif_flags; @@ -961,26 +1030,41 @@ ifioctl(so, cmd, data, p) struct net_event_data ev_data; switch (cmd) { - case SIOCGIFCONF: case OSIOCGIFCONF: - return (ifconf(cmd, data)); + case SIOCGIFCONF64: + { + struct ifconf64 * ifc = (struct ifconf64 *)data; + user_addr_t user_addr; + + user_addr = proc_is64bit(p) + ? ifc->ifc_req64 : CAST_USER_ADDR_T(ifc->ifc_req); + return (ifconf(cmd, user_addr, &ifc->ifc_len)); + } + break; } ifr = (struct ifreq *)data; - switch (cmd) { case SIOCIFCREATE: case SIOCIFDESTROY: - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); return ((cmd == SIOCIFCREATE) ? if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) : if_clone_destroy(ifr->ifr_name)); -#if 0 +#if IF_CLONE_LIST case SIOCIFGCLONERS: - return (if_clone_list((struct if_clonereq *)data)); -#endif 0 + case SIOCIFGCLONERS64: + { + struct if_clonereq64 * ifcr = (struct if_clonereq64 *)data; + user_addr = proc_is64bit(p) + ? ifcr->ifcr_ifcru.ifcru_buffer64 + : CAST_USER_ADDR_T(ifcr->ifcr_ifcru.ifcru_buffer32); + return (if_clone_list(ifcr->ifcr_count, &ifcr->ifcr_total, + user_data)); + } +#endif IF_CLONE_LIST } ifp = ifunit(ifr->ifr_name); @@ -989,43 +1073,35 @@ ifioctl(so, cmd, data, p) switch (cmd) { case SIOCGIFFLAGS: + ifnet_lock_shared(ifp); ifr->ifr_flags = ifp->if_flags; + ifnet_lock_done(ifp); break; case SIOCGIFMETRIC: + ifnet_lock_shared(ifp); ifr->ifr_metric = ifp->if_metric; + ifnet_lock_done(ifp); break; case SIOCGIFMTU: + ifnet_lock_shared(ifp); ifr->ifr_mtu = ifp->if_mtu; + ifnet_lock_done(ifp); break; case SIOCGIFPHYS: + ifnet_lock_shared(ifp); ifr->ifr_phys = ifp->if_physical; + ifnet_lock_done(ifp); break; case SIOCSIFFLAGS: - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); -#ifndef __APPLE__ - if (ifp->if_flags & IFF_SMART) { - /* Smart drivers twiddle their own routes */ - } else -#endif - if (ifp->if_flags & IFF_UP && - (ifr->ifr_flags & IFF_UP) == 0) { - int s = splimp(); - if_down(ifp); - splx(s); - } else if (ifr->ifr_flags & IFF_UP && - (ifp->if_flags & IFF_UP) == 0) { - int s = splimp(); - if_up(ifp); - splx(s); - } - ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | - (ifr->ifr_flags &~ IFF_CANTCHANGE); + + ifnet_set_flags(ifp, ifr->ifr_flags, ~IFF_CANTCHANGE); error = dlil_ioctl(so->so_proto->pr_domain->dom_family, ifp, cmd, (caddr_t) data); @@ -1044,11 +1120,11 @@ ifioctl(so, cmd, data, p) ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); } - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); break; case SIOCSIFMETRIC: - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); ifp->if_metric = ifr->ifr_metric; @@ -1068,11 +1144,11 @@ ifioctl(so, cmd, data, p) ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); break; case SIOCSIFPHYS: - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return error; @@ -1093,7 +1169,7 @@ ifioctl(so, cmd, data, p) ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); } return(error); @@ -1101,7 +1177,7 @@ ifioctl(so, cmd, data, p) { u_long oldmtu = ifp->if_mtu; - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); if (ifp->if_ioctl == NULL) @@ -1126,7 +1202,7 @@ ifioctl(so, cmd, data, p) ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); rt_ifmsg(ifp); } /* @@ -1142,7 +1218,7 @@ ifioctl(so, cmd, data, p) case SIOCADDMULTI: case SIOCDELMULTI: - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); @@ -1157,8 +1233,7 @@ ifioctl(so, cmd, data, p) #endif if (cmd == SIOCADDMULTI) { - struct ifmultiaddr *ifma; - error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); + error = if_addmulti(ifp, &ifr->ifr_addr, NULL); ev_msg.event_code = KEV_DL_ADDMULTI; } else { error = if_delmulti(ifp, &ifr->ifr_addr); @@ -1177,14 +1252,10 @@ ifioctl(so, cmd, data, p) ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); } return error; - case SIOCSETVLAN: - if (ifp->if_type != IFT_L2VLAN) { - return (EOPNOTSUPP); - } case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 @@ -1194,7 +1265,10 @@ ifioctl(so, cmd, data, p) case SIOCSIFMEDIA: case SIOCSIFGENERIC: case SIOCSIFLLADDR: - error = suser(p->p_ucred, &p->p_acflag); + case SIOCSIFALTMTU: + case SIOCSIFVLAN: + case SIOCSIFBOND: + error = proc_suser(p); if (error) return (error); @@ -1202,7 +1276,7 @@ ifioctl(so, cmd, data, p) ifp, cmd, (caddr_t) data); if (error == 0) - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); return error; case SIOCGIFSTATUS: @@ -1214,13 +1288,11 @@ ifioctl(so, cmd, data, p) case SIOCGLIFPHYADDR: case SIOCGIFMEDIA: case SIOCGIFGENERIC: - + case SIOCGIFDEVMTU: return dlil_ioctl(so->so_proto->pr_domain->dom_family, ifp, cmd, (caddr_t) data); - case SIOCGETVLAN: - if (ifp->if_type != IFT_L2VLAN) { - return (EOPNOTSUPP); - } + case SIOCGIFVLAN: + case SIOCGIFBOND: return dlil_ioctl(so->so_proto->pr_domain->dom_family, ifp, cmd, (caddr_t) data); @@ -1228,10 +1300,11 @@ ifioctl(so, cmd, data, p) oif_flags = ifp->if_flags; if (so->so_proto == 0) return (EOPNOTSUPP); -#if !COMPAT_43 - return ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, - data, - ifp, p)); +#if !COMPAT_43_SOCKET + socket_lock(so, 1); + error =(*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, p)); + socket_unlock(so, 1); + return (error); #else { int ocmd = cmd; @@ -1269,10 +1342,10 @@ ifioctl(so, cmd, data, p) case OSIOCGIFNETMASK: cmd = SIOCGIFNETMASK; } - error = ((*so->so_proto->pr_usrreqs->pru_control)(so, - cmd, - data, - ifp, p)); + socket_lock(so, 1); + error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, + data, ifp, p)); + socket_unlock(so, 1); switch (ocmd) { case OSIOCGIFADDR: @@ -1283,9 +1356,9 @@ ifioctl(so, cmd, data, p) } } -#endif /* COMPAT_43 */ +#endif /* COMPAT_43_SOCKET */ - if (error == EOPNOTSUPP) + if (error == EOPNOTSUPP || error == ENOTSUP) error = dlil_ioctl(so->so_proto->pr_domain->dom_family, ifp, cmd, (caddr_t) data); @@ -1294,47 +1367,74 @@ ifioctl(so, cmd, data, p) return (0); } +int +ifioctllocked(so, cmd, data, p) + struct socket *so; + u_long cmd; + caddr_t data; + struct proc *p; +{ + int error; + + socket_unlock(so, 0); + error = ifioctl(so, cmd, data, p); + socket_lock(so, 0); + return(error); +} + /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ -int -ifpromisc(ifp, pswitch) - struct ifnet *ifp; - int pswitch; +errno_t +ifnet_set_promiscuous( + ifnet_t ifp, + int pswitch) { struct ifreq ifr; - int error; + int error = 0; int oldflags; + int locked = 0; + int changed = 0; + ifnet_lock_exclusive(ifp); + locked = 1; oldflags = ifp->if_flags; if (pswitch) { /* * If the device is not configured up, we cannot put it in * promiscuous mode. */ - if ((ifp->if_flags & IFF_UP) == 0) - return (ENETDOWN); - if (ifp->if_pcount++ != 0) - return (0); + if ((ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto done; + } + if (ifp->if_pcount++ != 0) { + goto done; + } ifp->if_flags |= IFF_PROMISC; - log(LOG_INFO, "%s%d: promiscuous mode enabled\n", - ifp->if_name, ifp->if_unit); } else { if (--ifp->if_pcount > 0) - return (0); + goto done; ifp->if_flags &= ~IFF_PROMISC; - log(LOG_INFO, "%s%d: promiscuous mode disabled\n", - ifp->if_name, ifp->if_unit); } ifr.ifr_flags = ifp->if_flags; + locked = 0; + ifnet_lock_done(ifp); error = dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error == 0) rt_ifmsg(ifp); else ifp->if_flags = oldflags; +done: + if (locked) ifnet_lock_done(ifp); + if (changed) { + log(LOG_INFO, "%s%d: promiscuous mode %s\n", + ifp->if_name, ifp->if_unit, + pswitch != 0 ? "enabled" : "disabled"); + } return error; } @@ -1346,20 +1446,19 @@ ifpromisc(ifp, pswitch) */ /*ARGSUSED*/ static int -ifconf(cmd, data) - u_long cmd; - caddr_t data; +ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) { - register struct ifconf *ifc = (struct ifconf *)data; - register struct ifnet *ifp = ifnet.tqh_first; - register struct ifaddr *ifa; - struct ifreq ifr, *ifrp; - int space = ifc->ifc_len, error = 0; - - ifrp = ifc->ifc_req; - for (; space > sizeof (ifr) && ifp; ifp = ifp->if_link.tqe_next) { + struct ifnet *ifp = NULL; + struct ifaddr *ifa; + struct ifreq ifr; + int error = 0; + size_t space; + + space = *ret_space; + ifnet_head_lock_shared(); + for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) && ifp; ifp = ifp->if_link.tqe_next) { char workbuf[64]; - int ifnlen, addrs; + size_t ifnlen, addrs; ifnlen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); @@ -1369,63 +1468,64 @@ ifconf(cmd, data) } else { strcpy(ifr.ifr_name, workbuf); } + + ifnet_lock_shared(ifp); addrs = 0; ifa = ifp->if_addrhead.tqh_first; for ( ; space > sizeof (ifr) && ifa; ifa = ifa->ifa_link.tqe_next) { - register struct sockaddr *sa = ifa->ifa_addr; + struct sockaddr *sa = ifa->ifa_addr; #ifndef __APPLE__ if (curproc->p_prison && prison_if(curproc, sa)) continue; #endif addrs++; -#ifdef COMPAT_43 +#if COMPAT_43_SOCKET if (cmd == OSIOCGIFCONF) { struct osockaddr *osa = (struct osockaddr *)&ifr.ifr_addr; ifr.ifr_addr = *sa; osa->sa_family = sa->sa_family; - error = copyout((caddr_t)&ifr, (caddr_t)ifrp, - sizeof (ifr)); - ifrp++; + error = copyout((caddr_t)&ifr, ifrp, sizeof(ifr)); + ifrp += sizeof(struct ifreq); } else #endif if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; - error = copyout((caddr_t)&ifr, (caddr_t)ifrp, - sizeof (ifr)); - ifrp++; + error = copyout((caddr_t)&ifr, ifrp, sizeof(ifr)); + ifrp += sizeof(struct ifreq); } else { - if (space < sizeof (ifr) + sa->sa_len - - sizeof(*sa)) + if (space < sizeof (ifr) + sa->sa_len - sizeof(*sa)) break; space -= sa->sa_len - sizeof(*sa); - error = copyout((caddr_t)&ifr, (caddr_t)ifrp, - sizeof (ifr.ifr_name)); - if (error == 0) + error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr.ifr_name)); + if (error == 0) { error = copyout((caddr_t)sa, - (caddr_t)&ifrp->ifr_addr, sa->sa_len); - ifrp = (struct ifreq *) - (sa->sa_len + (caddr_t)&ifrp->ifr_addr); + (ifrp + offsetof(struct ifreq, ifr_addr)), + sa->sa_len); + } + ifrp += (sa->sa_len + offsetof(struct ifreq, ifr_addr)); } if (error) break; space -= sizeof (ifr); } + ifnet_lock_done(ifp); + if (error) break; if (!addrs) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); - error = copyout((caddr_t)&ifr, (caddr_t)ifrp, - sizeof (ifr)); + error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr)); if (error) break; space -= sizeof (ifr); - ifrp++; + ifrp += sizeof(struct ifreq); } } - ifc->ifc_len -= space; + ifnet_head_done(); + *ret_space -= space; return (error); } @@ -1438,12 +1538,14 @@ if_allmulti(ifp, onswitch) int onswitch; { int error = 0; - int s = splimp(); + int modified = 0; + + ifnet_lock_exclusive(ifp); if (onswitch) { if (ifp->if_amcount++ == 0) { ifp->if_flags |= IFF_ALLMULTI; - error = dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); + modified = 1; } } else { if (ifp->if_amcount > 1) { @@ -1451,207 +1553,250 @@ if_allmulti(ifp, onswitch) } else { ifp->if_amcount = 0; ifp->if_flags &= ~IFF_ALLMULTI; - error = dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); + modified = 1; } } - splx(s); + ifnet_lock_done(ifp); + + if (modified) + error = dlil_ioctl(0, ifp, SIOCSIFFLAGS, (caddr_t) 0); if (error == 0) rt_ifmsg(ifp); return error; } -/* - * Add a multicast listenership to the interface in question. - * The link layer provides a routine which converts - */ -int -if_addmulti(ifp, sa, retifma) - struct ifnet *ifp; /* interface to manipulate */ - struct sockaddr *sa; /* address to add */ - struct ifmultiaddr **retifma; +void +ifma_reference( + struct ifmultiaddr *ifma) { - struct sockaddr *llsa = 0; - struct sockaddr *dupsa; - int error, s; - struct ifmultiaddr *ifma; - struct rslvmulti_req rsreq; + if (OSIncrementAtomic((SInt32 *)&ifma->ifma_refcount) <= 0) + panic("ifma_reference: ifma already released or invalid\n"); +} - /* - * If the matching multicast address already exists - * then don't add a new one, just add a reference - */ +void +ifma_release( + struct ifmultiaddr *ifma) +{ + while (ifma) { + struct ifmultiaddr *next; + int32_t prevValue = OSDecrementAtomic((SInt32 *)&ifma->ifma_refcount); + if (prevValue < 1) + panic("ifma_release: ifma already released or invalid\n"); + if (prevValue != 1) + break; + + /* Allow the allocator of the protospec to free it */ + if (ifma->ifma_protospec && ifma->ifma_free) { + ifma->ifma_free(ifma->ifma_protospec); + } + + next = ifma->ifma_ll; + FREE(ifma->ifma_addr, M_IFMADDR); + FREE(ifma, M_IFMADDR); + ifma = next; + } +} + + /* + * Find an ifmultiaddr that matches a socket address on an interface. + * + * Caller is responsible for holding the ifnet_lock while calling + * this function. + */ +static int +if_addmulti_doesexist( + struct ifnet *ifp, + const struct sockaddr *sa, + struct ifmultiaddr **retifma) +{ + struct ifmultiaddr *ifma; for (ifma = ifp->if_multiaddrs.lh_first; ifma; ifma = ifma->ifma_link.le_next) { if (equal(sa, ifma->ifma_addr)) { - ifma->ifma_refcount++; - if (retifma) + ifma->ifma_usecount++; + if (retifma) { *retifma = ifma; + ifma_reference(*retifma); + } return 0; } } + + return ENOENT; +} + +/* + * Add a multicast listenership to the interface in question. + * The link layer provides a routine which converts + */ +int +if_addmulti( + struct ifnet *ifp, /* interface to manipulate */ + const struct sockaddr *sa, /* address to add */ + struct ifmultiaddr **retifma) +{ + struct sockaddr_storage storage; + struct sockaddr *llsa = NULL; + struct sockaddr *dupsa; + int error; + struct ifmultiaddr *ifma; + struct ifmultiaddr *llifma = NULL; + + ifnet_lock_exclusive(ifp); + error = if_addmulti_doesexist(ifp, sa, retifma); + ifnet_lock_done(ifp); + + if (error == 0) + return 0; /* * Give the link layer a chance to accept/reject it, and also * find out which AF_LINK address this maps to, if it isn't one * already. */ - rsreq.sa = sa; - rsreq.llsa = &llsa; - - error = dlil_ioctl(sa->sa_family, ifp, SIOCRSLVMULTI, (caddr_t) &rsreq); + error = dlil_resolve_multi(ifp, sa, (struct sockaddr*)&storage, sizeof(storage)); + if (error == 0 && storage.ss_len != 0) { + MALLOC(llsa, struct sockaddr*, storage.ss_len, M_IFMADDR, M_WAITOK); + MALLOC(llifma, struct ifmultiaddr *, sizeof *llifma, M_IFMADDR, M_WAITOK); + bcopy(&storage, llsa, storage.ss_len); + } /* to be similar to FreeBSD */ if (error == EOPNOTSUPP) error = 0; - if (error) - return error; + if (error) { + return error; + } + /* Allocate while we aren't holding any locks */ MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, M_IFMADDR, M_WAITOK); MALLOC(dupsa, struct sockaddr *, sa->sa_len, M_IFMADDR, M_WAITOK); bcopy(sa, dupsa, sa->sa_len); + + ifnet_lock_exclusive(ifp); + /* + * Check again for the matching multicast. + */ + if ((error = if_addmulti_doesexist(ifp, sa, retifma)) == 0) { + ifnet_lock_done(ifp); + FREE(ifma, M_IFMADDR); + FREE(dupsa, M_IFMADDR); + if (llsa) + FREE(llsa, M_IFMADDR); + return 0; + } + bzero(ifma, sizeof(*ifma)); ifma->ifma_addr = dupsa; - ifma->ifma_lladdr = llsa; ifma->ifma_ifp = ifp; + ifma->ifma_usecount = 1; ifma->ifma_refcount = 1; - ifma->ifma_protospec = 0; - rt_newmaddrmsg(RTM_NEWMADDR, ifma); - - /* - * Some network interfaces can scan the address list at - * interrupt time; lock them out. - */ - s = splimp(); - LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); - splx(s); - if (retifma) - *retifma = ifma; - - if (llsa != 0) { - for (ifma = ifp->if_multiaddrs.lh_first; ifma; - ifma = ifma->ifma_link.le_next) { - if (equal(ifma->ifma_addr, llsa)) - break; - } - if (ifma) { - ifma->ifma_refcount++; + + if (llifma != 0) { + if (if_addmulti_doesexist(ifp, llsa, &ifma->ifma_ll) == 0) { + FREE(llsa, M_IFMADDR); + FREE(llifma, M_IFMADDR); } else { - MALLOC(ifma, struct ifmultiaddr *, sizeof *ifma, - M_IFMADDR, M_WAITOK); - MALLOC(dupsa, struct sockaddr *, llsa->sa_len, - M_IFMADDR, M_WAITOK); - bcopy(llsa, dupsa, llsa->sa_len); - ifma->ifma_addr = dupsa; - ifma->ifma_lladdr = 0; - ifma->ifma_ifp = ifp; - ifma->ifma_refcount = 1; - s = splimp(); - LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); - splx(s); + bzero(llifma, sizeof(*llifma)); + llifma->ifma_addr = llsa; + llifma->ifma_ifp = ifp; + llifma->ifma_usecount = 1; + llifma->ifma_refcount = 1; + LIST_INSERT_HEAD(&ifp->if_multiaddrs, llifma, ifma_link); + + ifma->ifma_ll = llifma; + ifma_reference(ifma->ifma_ll); } } + + LIST_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); + + if (retifma) { + *retifma = ifma; + ifma_reference(*retifma); + } + + ifnet_lock_done(ifp); + + if (llsa != 0) + rt_newmaddrmsg(RTM_NEWMADDR, ifma); + /* * We are certain we have added something, so call down to the * interface to let them know about it. */ - s = splimp(); dlil_ioctl(0, ifp, SIOCADDMULTI, (caddr_t) 0); - splx(s); - + return 0; } int -if_delmultiaddr(struct ifmultiaddr *ifma) +if_delmultiaddr( + struct ifmultiaddr *ifma, + int locked) { - struct sockaddr *sa; struct ifnet *ifp; + int do_del_multi = 0; - /* Verify ifma is valid */ - { - struct ifmultiaddr *match = NULL; - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { - for (match = ifp->if_multiaddrs.lh_first; match; match = match->ifma_link.le_next) { - if (match->ifma_ifp != ifp) { - printf("if_delmultiaddr: ifma (%x) on ifp i(%s) is stale\n", - match, if_name(ifp)); - return (0) ; /* swallow error ? */ - } - if (match == ifma) - break; - } - if (match == ifma) - break; + ifp = ifma->ifma_ifp; + + if (!locked && ifp) { + ifnet_lock_exclusive(ifp); + } + + while (ifma != NULL) { + struct ifmultiaddr *ll_ifma; + + if (ifma->ifma_usecount > 1) { + ifma->ifma_usecount--; + break; } - if (match != ifma) { - for (match = ifma_lostlist.lh_first; match; match = match->ifma_link.le_next) { - if (match->ifma_ifp != NULL) { - printf("if_delmultiaddr: item on lost list (%x) contains non-null ifp=%s\n", - match, if_name(match->ifma_ifp)); - return (0) ; /* swallow error ? */ - } - if (match == ifma) - break; - } + + if (ifp) + LIST_REMOVE(ifma, ifma_link); + + ll_ifma = ifma->ifma_ll; + + if (ll_ifma) { /* send a routing msg for network addresses only */ + if (ifp) + ifnet_lock_done(ifp); + rt_newmaddrmsg(RTM_DELMADDR, ifma); + if (ifp) + ifnet_lock_exclusive(ifp); } - if (match != ifma) { - printf("if_delmultiaddr: ifma 0x%X is invalid\n", ifma); - return 0; + /* + * Make sure the interface driver is notified + * in the case of a link layer mcast group being left. + */ + if (ll_ifma == 0) { + if (ifp && ifma->ifma_addr->sa_family == AF_LINK) + do_del_multi = 1; + break; } + + if (ifp) + ifma_release(ifma); + + ifma = ll_ifma; } - if (ifma->ifma_refcount > 1) { - ifma->ifma_refcount--; - return 0; + if (!locked && ifp) { + /* This wasn't initially locked, we should unlock it */ + ifnet_lock_done(ifp); } - - sa = ifma->ifma_lladdr; - - if (sa) /* send a routing msg for network addresses only */ - rt_newmaddrmsg(RTM_DELMADDR, ifma); - - ifp = ifma->ifma_ifp; - LIST_REMOVE(ifma, ifma_link); - /* - * Make sure the interface driver is notified - * in the case of a link layer mcast group being left. - */ - if (ifp && ifma->ifma_addr->sa_family == AF_LINK && sa == 0) + if (do_del_multi) { + if (locked) + ifnet_lock_done(ifp); dlil_ioctl(0, ifp, SIOCDELMULTI, 0); - FREE(ifma->ifma_addr, M_IFMADDR); - FREE(ifma, M_IFMADDR); - if (sa == 0) - return 0; - - /* - * Now look for the link-layer address which corresponds to - * this network address. It had been squirreled away in - * ifma->ifma_lladdr for this purpose (so we don't have - * to call SIOCRSLVMULTI again), and we saved that - * value in sa above. If some nasty deleted the - * link-layer address out from underneath us, we can deal because - * the address we stored was is not the same as the one which was - * in the record for the link-layer address. (So we don't complain - * in that case.) - */ - if (ifp) - ifma = ifp->if_multiaddrs.lh_first; - else - ifma = ifma_lostlist.lh_first; - for (; ifma; ifma = ifma->ifma_link.le_next) - if (equal(sa, ifma->ifma_addr)) - break; - - FREE(sa, M_IFMADDR); - if (ifma == 0) { - return 0; + if (locked) + ifnet_lock_exclusive(ifp); } - - return if_delmultiaddr(ifma); + + return 0; } /* @@ -1659,20 +1804,27 @@ if_delmultiaddr(struct ifmultiaddr *ifma) * if the request does not match an existing membership. */ int -if_delmulti(ifp, sa) - struct ifnet *ifp; - struct sockaddr *sa; +if_delmulti( + struct ifnet *ifp, + const struct sockaddr *sa) { struct ifmultiaddr *ifma; + int retval = 0; + ifnet_lock_exclusive(ifp); for (ifma = ifp->if_multiaddrs.lh_first; ifma; ifma = ifma->ifma_link.le_next) if (equal(sa, ifma->ifma_addr)) break; - if (ifma == 0) + if (ifma == 0) { + ifnet_lock_done(ifp); return ENOENT; + } + + retval = if_delmultiaddr(ifma, 1); + ifnet_lock_done(ifp); - return if_delmultiaddr(ifma); + return retval; } @@ -1690,15 +1842,17 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) struct ifmultiaddr * ifmaof_ifpforaddr(sa, ifp) - struct sockaddr *sa; + const struct sockaddr *sa; struct ifnet *ifp; { struct ifmultiaddr *ifma; + ifnet_lock_shared(ifp); for (ifma = ifp->if_multiaddrs.lh_first; ifma; ifma = ifma->ifma_link.le_next) if (equal(ifma->ifma_addr, sa)) break; + ifnet_lock_done(ifp); return ifma; } @@ -1711,17 +1865,21 @@ SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); * Shutdown all network activity. Used boot() when halting * system. */ +int if_down_all(void); int if_down_all(void) { - struct ifnet *ifp; - int s; - - s = splnet(); - TAILQ_FOREACH(ifp, &ifnet, if_link) - if_down(ifp); + struct ifnet **ifp; + u_int32_t count; + u_int32_t i; - splx(s); - return(0); /* Sheesh */ + if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp, &count) != 0) { + for (i = 0; i < count; i++) { + if_down(ifp[i]); + } + ifnet_list_free(ifp); + } + + return 0; } /* @@ -1740,9 +1898,9 @@ int if_down_all(void) * */ static int -if_rtdel(rn, arg) - struct radix_node *rn; - void *arg; +if_rtdel( + struct radix_node *rn, + void *arg) { struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; @@ -1757,7 +1915,7 @@ if_rtdel(rn, arg) if ((rt->rt_flags & RTF_UP) == 0) return (0); - err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + err = rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **) NULL); if (err) { @@ -1772,12 +1930,111 @@ if_rtdel(rn, arg) * Removes routing table reference to a given interfacei * for a given protocol family */ - void if_rtproto_del(struct ifnet *ifp, int protocol) { struct radix_node_head *rnh; - if ((protocol <= AF_MAX) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) + if ((protocol <= AF_MAX) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) { + lck_mtx_lock(rt_mtx); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); + lck_mtx_unlock(rt_mtx); + } +} + +extern lck_spin_t *dlil_input_lock; + +__private_extern__ void +if_data_internal_to_if_data( + const struct if_data_internal *if_data_int, + struct if_data *if_data) +{ +#define COPYFIELD(fld) if_data->fld = if_data_int->fld +#define COPYFIELD32(fld) if_data->fld = (u_int32_t)(if_data_int->fld) + COPYFIELD(ifi_type); + COPYFIELD(ifi_typelen); + COPYFIELD(ifi_physical); + COPYFIELD(ifi_addrlen); + COPYFIELD(ifi_hdrlen); + COPYFIELD(ifi_recvquota); + COPYFIELD(ifi_xmitquota); + if_data->ifi_unused1 = 0; + COPYFIELD(ifi_mtu); + COPYFIELD(ifi_metric); + if (if_data_int->ifi_baudrate & 0xFFFFFFFF00000000LL) { + if_data->ifi_baudrate = 0xFFFFFFFF; + } + else { + COPYFIELD32(ifi_baudrate); + } + + lck_spin_lock(dlil_input_lock); + COPYFIELD32(ifi_ipackets); + COPYFIELD32(ifi_ierrors); + COPYFIELD32(ifi_opackets); + COPYFIELD32(ifi_oerrors); + COPYFIELD32(ifi_collisions); + COPYFIELD32(ifi_ibytes); + COPYFIELD32(ifi_obytes); + COPYFIELD32(ifi_imcasts); + COPYFIELD32(ifi_omcasts); + COPYFIELD32(ifi_iqdrops); + COPYFIELD32(ifi_noproto); + COPYFIELD32(ifi_recvtiming); + COPYFIELD32(ifi_xmittiming); + COPYFIELD(ifi_lastchange); + lck_spin_unlock(dlil_input_lock); + +#if IF_LASTCHANGEUPTIME + if_data->ifi_lastchange.tv_sec += boottime_sec(); +#endif + + if_data->ifi_unused2 = 0; + COPYFIELD(ifi_hwassist); + if_data->ifi_reserved1 = 0; + if_data->ifi_reserved2 = 0; +#undef COPYFIELD32 +#undef COPYFIELD +} + +__private_extern__ void +if_data_internal_to_if_data64( + const struct if_data_internal *if_data_int, + struct if_data64 *if_data64) +{ +#define COPYFIELD(fld) if_data64->fld = if_data_int->fld + COPYFIELD(ifi_type); + COPYFIELD(ifi_typelen); + COPYFIELD(ifi_physical); + COPYFIELD(ifi_addrlen); + COPYFIELD(ifi_hdrlen); + COPYFIELD(ifi_recvquota); + COPYFIELD(ifi_xmitquota); + if_data64->ifi_unused1 = 0; + COPYFIELD(ifi_mtu); + COPYFIELD(ifi_metric); + COPYFIELD(ifi_baudrate); + + lck_spin_lock(dlil_input_lock); + COPYFIELD(ifi_ipackets); + COPYFIELD(ifi_ierrors); + COPYFIELD(ifi_opackets); + COPYFIELD(ifi_oerrors); + COPYFIELD(ifi_collisions); + COPYFIELD(ifi_ibytes); + COPYFIELD(ifi_obytes); + COPYFIELD(ifi_imcasts); + COPYFIELD(ifi_omcasts); + COPYFIELD(ifi_iqdrops); + COPYFIELD(ifi_noproto); + COPYFIELD(ifi_recvtiming); + COPYFIELD(ifi_xmittiming); + COPYFIELD(ifi_lastchange); + lck_spin_unlock(dlil_input_lock); + +#if IF_LASTCHANGEUPTIME + if_data64->ifi_lastchange.tv_sec += boottime_sec(); +#endif + +#undef COPYFIELD } diff --git a/bsd/net/if.h b/bsd/net/if.h index 1aa754ce5..1763d9383 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,8 +57,11 @@ #ifndef _NET_IF_H_ #define _NET_IF_H_ -#include +#define IF_NAMESIZE 16 + +#ifndef _POSIX_C_SOURCE +#include #ifdef __APPLE__ /* * Define Data-Link event subclass, and associated @@ -82,17 +85,10 @@ #define KEV_DL_LINK_ON 13 #define KEV_DL_PROTO_ATTACHED 14 #define KEV_DL_PROTO_DETACHED 15 -#endif - -/* - * does not depend on on most other systems. This - * helps userland compatability. (struct timeval ifi_lastchange) - */ -#include - +#define KEV_DL_LINK_ADDRESS_CHANGED 16 -#ifdef __APPLE__ #include +#include #endif #ifdef KERNEL_PRIVATE @@ -101,6 +97,19 @@ struct if_clonereq { int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; + +/* in-kernel, LP64-aware version of if_clonereq. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with if_clonereq + */ +struct if_clonereq64 { + int ifcr_total; /* total cloners (out) */ + int ifcr_count; /* room for this many in user buffer */ + union { + u_int64_t ifcru_buffer64; + char * ifcru_buffer32; + } ifcr_ifcru; +}; #endif KERNEL_PRIVATE #define IFF_UP 0x1 /* interface is up */ @@ -126,45 +135,51 @@ struct if_clonereq { #define IFEF_AUTOCONFIGURING 0x1 #define IFEF_DVR_REENTRY_OK 0x20 /* When set, driver may be reentered from its own thread */ #define IFEF_ACCEPT_RTADVD 0x40 /* set to accept IPv6 router advertisement on the interface */ -#define IFEF_INUSE 0x40000000 /* DLIL ifnet recycler, ifnet in use */ +#define IFEF_DETACHING 0x80 /* Set when interface is detaching */ +#define IFEF_USEKPI 0x100 /* Set when interface is created through the KPIs */ +#define IFEF_VLAN 0x200 /* interface has one or more vlans */ +#define IFEF_BOND 0x400 /* interface is part of bond */ +#define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses on this port */ #define IFEF_REUSE 0x20000000 /* DLIL ifnet recycler, ifnet is not new */ -#endif /* KERNEL_PRIVATE */ - +#define IFEF_INUSE 0x40000000 /* DLIL ifnet recycler, ifnet in use */ +#define IFEF_UPDOWNCHANGE 0x80000000 /* Interface's up/down state is changing */ /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI) +#endif /* KERNEL_PRIVATE */ + #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces - * from getkerninfo and the routing socket + * from sysctl and the routing socket */ struct if_msghdr { - u_short ifm_msglen; /* to skip over non-understood messages */ - u_char ifm_version; /* future binary compatability */ - u_char ifm_type; /* message type */ - int ifm_addrs; /* like rtm_addrs */ - int ifm_flags; /* value of if_flags */ - u_short ifm_index; /* index for associated ifp */ - struct if_data ifm_data;/* statistics and other data about if */ + unsigned short ifm_msglen; /* to skip over non-understood messages */ + unsigned char ifm_version; /* future binary compatability */ + unsigned char ifm_type; /* message type */ + int ifm_addrs; /* like rtm_addrs */ + int ifm_flags; /* value of if_flags */ + unsigned short ifm_index; /* index for associated ifp */ + struct if_data ifm_data; /* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses - * from getkerninfo and the routing socket + * from sysctl and the routing socket */ struct ifa_msghdr { - u_short ifam_msglen; /* to skip over non-understood messages */ - u_char ifam_version; /* future binary compatability */ - u_char ifam_type; /* message type */ - int ifam_addrs; /* like rtm_addrs */ - int ifam_flags; /* value of ifa_flags */ - u_short ifam_index; /* index for associated ifp */ - int ifam_metric; /* value of ifa_metric */ + unsigned short ifam_msglen; /* to skip over non-understood messages */ + unsigned char ifam_version; /* future binary compatability */ + unsigned char ifam_type; /* message type */ + int ifam_addrs; /* like rtm_addrs */ + int ifam_flags; /* value of ifa_flags */ + unsigned short ifam_index; /* index for associated ifp */ + int ifam_metric; /* value of ifa_metric */ }; /* @@ -172,12 +187,55 @@ struct ifa_msghdr { * from the routing socket */ struct ifma_msghdr { + unsigned short ifmam_msglen; /* to skip over non-understood messages */ + unsigned char ifmam_version; /* future binary compatability */ + unsigned char ifmam_type; /* message type */ + int ifmam_addrs; /* like rtm_addrs */ + int ifmam_flags; /* value of ifa_flags */ + unsigned short ifmam_index; /* index for associated ifp */ +}; + +/* + * Message format for use in obtaining information about interfaces + * from sysctl + */ +struct if_msghdr2 { + u_short ifm_msglen; /* to skip over non-understood messages */ + u_char ifm_version; /* future binary compatability */ + u_char ifm_type; /* message type */ + int ifm_addrs; /* like rtm_addrs */ + int ifm_flags; /* value of if_flags */ + u_short ifm_index; /* index for associated ifp */ + int ifm_snd_len; /* instantaneous length of send queue */ + int ifm_snd_maxlen; /* maximum length of send queue */ + int ifm_snd_drops; /* number of drops in send queue */ + int ifm_timer; /* time until if_watchdog called */ + struct if_data64 ifm_data; /* statistics and other data about if */ +}; + +/* + * Message format for use in obtaining information about multicast addresses + * from sysctl + */ +struct ifma_msghdr2 { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatability */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ + int32_t ifmam_refcount; +}; + +/* + * ifdevmtu: interface device mtu + * Used with SIOCGIFDEVMTU to get the current mtu in use by the device, + * as well as the minimum and maximum mtu allowed by the device. + */ +struct ifdevmtu { + int ifdm_current; + int ifdm_min; + int ifdm_max; }; /* @@ -186,9 +244,10 @@ struct ifma_msghdr { * definitions which begin with ifr_name. The * remainder may be interface specific. */ -#define IF_NAMESIZE IFNAMSIZ struct ifreq { -#define IFNAMSIZ 16 +#ifndef IFNAMSIZ +#define IFNAMSIZ IF_NAMESIZE +#endif char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; @@ -199,7 +258,12 @@ struct ifreq { int ifru_mtu; int ifru_phys; int ifru_media; + int ifru_intval; caddr_t ifru_data; +#ifdef KERNEL_PRIVATE + u_int64_t ifru_data64; /* 64-bit ifru_data */ +#endif KERNEL_PRIVATE + struct ifdevmtu ifru_devmtu; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -215,6 +279,11 @@ struct ifreq { #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#define ifr_devmtu ifr_ifru.ifru_devmtu +#define ifr_intval ifr_ifru.ifru_intval /* integer value */ +#ifdef KERNEL_PRIVATE +#define ifr_data64 ifr_ifru.ifru_data64 /* 64-bit pointer */ +#endif KERNEL_PRIVATE }; #define _SIZEOF_ADDR_IFREQ(ifr) \ @@ -244,6 +313,25 @@ struct ifmediareq { int *ifm_ulist; /* media words */ }; +#ifdef KERNEL_PRIVATE +/* LP64 version of ifmediareq. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with ifmediareq + */ +struct ifmediareq64 { + char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + int ifm_current; /* current media options */ + int ifm_mask; /* don't care mask */ + int ifm_status; /* media status */ + int ifm_active; /* active options */ + int ifm_count; /* # entries in ifm_ulist array */ + union { /* media words */ + int * ifmu_ulist32; /* 32-bit pointer */ + u_int64_t ifmu_ulist64; /* 64-bit pointer */ + } ifm_ifmu; +}; +#endif // KERNEL_PRIVATE + /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting @@ -273,30 +361,40 @@ struct ifconf { #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; -#ifdef __APPLE__ -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL_PRIVATE +/* LP64 version of ifconf. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with ifconf + */ +struct ifconf64 { + int ifc_len; /* size of associated buffer */ + union { + struct ifreq * ifcu_req; + u_int64_t ifcu_req64; + } ifc_ifcu; +}; +#define ifc_req64 ifc_ifcu.ifcu_req64 +#endif // KERNEL_PRIVATE + /* * DLIL KEV_DL_PROTO_ATTACHED/DETACHED structure */ struct kev_dl_proto_data { struct net_event_data link_data; - u_long proto_family; - u_long proto_remaining_count; + unsigned long proto_family; + unsigned long proto_remaining_count; }; -#endif /* __APPLE_API_UNSTABLE */ -#endif - /* * Structure for SIOC[AGD]LIFADDR */ struct if_laddrreq { - char iflr_name[IFNAMSIZ]; - u_int flags; + char iflr_name[IFNAMSIZ]; + unsigned int flags; #define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ - u_int prefixlen; /* in/out */ - struct sockaddr_storage addr; /* in/out */ - struct sockaddr_storage dstaddr; /* out */ + unsigned int prefixlen; /* in/out */ + struct sockaddr_storage addr; /* in/out */ + struct sockaddr_storage dstaddr; /* out */ }; #ifdef KERNEL @@ -305,28 +403,24 @@ MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif +#endif /* _POSIX_C_SOURCE */ #ifndef KERNEL struct if_nameindex { - u_int if_index; /* 1, 2, ... */ - char *if_name; /* null terminated name: "le0", ... */ + unsigned int if_index; /* 1, 2, ... */ + char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS -u_int if_nametoindex __P((const char *)); -char *if_indextoname __P((u_int, char *)); -struct if_nameindex *if_nameindex __P((void)); -void if_freenameindex __P((struct if_nameindex *)); +unsigned int if_nametoindex(const char *); +char *if_indextoname(unsigned int, char *); +struct if_nameindex *if_nameindex(void); +void if_freenameindex(struct if_nameindex *); __END_DECLS #endif #ifdef KERNEL -#ifndef __APPLE__ -struct proc; - -int prison_if __P((struct proc *p, struct sockaddr *sa)); -#endif - +#include #endif #endif /* !_NET_IF_H_ */ diff --git a/bsd/net/if_arp.h b/bsd/net/if_arp.h index 0a752d965..24caabe63 100644 --- a/bsd/net/if_arp.h +++ b/bsd/net/if_arp.h @@ -114,58 +114,4 @@ struct arpreq { #define ATF_PUBL 0x08 /* publish entry (respond for other host) */ #define ATF_USETRAILERS 0x10 /* has requested trailers */ -#ifdef __APPLE_API_UNSTABLE - -#ifdef __APPLE__ -/* - * Ethernet multicast address structure. There is one of these for each - * multicast address or range of multicast addresses that we are supposed - * to listen to on a particular interface. They are kept in a linked list, - * rooted in the interface's arpcom structure. (This really has nothing to - * do with ARP, or with the Internet address family, but this appears to be - * the minimally-disrupting place to put it.) - */ -struct ether_multi { - u_char enm_addrlo[6]; /* low or only address of range */ - u_char enm_addrhi[6]; /* high or only address of range */ - struct arpcom *enm_ac; /* back pointer to arpcom */ - u_int enm_refcount; /* no. claims to this addr/range */ - struct ether_multi *enm_next; /* ptr to next ether_multi */ -}; - -/* - * Structure used by macros below to remember position when stepping through - * all of the ether_multi records. - */ -struct ether_multistep { - struct ether_multi *e_enm; -}; -#endif /* __APPLE__ */ - -#ifdef KERNEL -/* - * Structure shared between the ethernet driver modules and - * the address resolution code. For example, each ec_softc or il_softc - * begins with this structure. - */ -struct arpcom { - /* - * The ifnet struct _must_ be at the head of this structure. - */ - struct ifnet ac_if; /* network-visible interface */ - u_char ac_enaddr[6]; /* ethernet hardware address */ -#ifdef __APPLE__ - struct in_addr ac_ipaddr; /* copy of ip address- XXX */ - struct ether_multi *ac_multiaddrs; /* list of ether multicast addrs */ -#endif - int ac_multicnt; /* length of ac_multiaddrs list */ -#ifndef __APPLE__ - void *ac_netgraph; /* ng_ether(4) netgraph node info */ -#endif -}; - - -#endif -#endif /* __APPLE_API_UNSTABLE */ - #endif /* !_NET_IF_ARP_H_ */ diff --git a/bsd/net/if_atm.h b/bsd/net/if_atm.h index 9a0518ac8..e8fe0eceb 100644 --- a/bsd/net/if_atm.h +++ b/bsd/net/if_atm.h @@ -58,12 +58,13 @@ * net/if_atm.h */ +#ifdef KERNEL_PRIVATE #if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) #define RTALLOC1(A,B) rtalloc1((A),(B)) #elif defined(__FreeBSD__) || defined(__APPLE__) #define RTALLOC1(A,B) rtalloc1((A),(B),0UL) #endif - +#endif /* KERNEL_PRIVATE */ /* * pseudo header for packet transmission @@ -119,13 +120,11 @@ struct atmllc { (X)->type[0] = ((V) & 0xff); \ } -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -void atm_ifattach __P((struct ifnet *)); -void atm_input __P((struct ifnet *, struct atm_pseudohdr *, - struct mbuf *, void *)); -int atm_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *)); -#endif /* __APPLE_API_PRIVATE */ -#endif +#ifdef KERNEL_PRIVATE +void atm_ifattach(struct ifnet *); +void atm_input(struct ifnet *, struct atm_pseudohdr *, + struct mbuf *, void *); +int atm_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c new file mode 100644 index 000000000..5c7005dc4 --- /dev/null +++ b/bsd/net/if_bond.c @@ -0,0 +1,4485 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * if_bond.c + * - bond/failover interface + * - implements IEEE 802.3ad Link Aggregation + */ + +/* + * Modification History: + * + * April 29, 2004 Dieter Siegmund (dieter@apple.com) + * - created + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +extern int dlil_input_packet(struct ifnet *, struct mbuf *, char *); + +static struct ether_addr slow_proto_multicast = { + IEEE8023AD_SLOW_PROTO_MULTICAST +}; + +#define BOND_MAXUNIT 128 +#define BONDNAME "bond" +#define M_BOND M_DEVBUF + +#define EA_FORMAT "%x:%x:%x:%x:%x:%x" +#define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)]) +#define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5) + +#define timestamp_printf printf + +/** + ** bond locks + **/ +static __inline__ lck_grp_t * +my_lck_grp_alloc_init(const char * grp_name) +{ + lck_grp_t * grp; + lck_grp_attr_t * grp_attrs; + + grp_attrs = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attrs); + lck_grp_attr_setdefault(grp_attrs); + grp = lck_grp_alloc_init(grp_name, grp_attrs); + lck_grp_attr_free(grp_attrs); + return (grp); +} + +static __inline__ lck_mtx_t * +my_lck_mtx_alloc_init(lck_grp_t * lck_grp) +{ + lck_attr_t * lck_attrs; + lck_mtx_t * lck_mtx; + + lck_attrs = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attrs); + lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs); + lck_attr_free(lck_attrs); + return (lck_mtx); +} + +static lck_mtx_t * bond_lck_mtx; + +static __inline__ void +bond_lock_init(void) +{ + lck_grp_t * bond_lck_grp; + + bond_lck_grp = my_lck_grp_alloc_init("if_bond"); + bond_lck_mtx = my_lck_mtx_alloc_init(bond_lck_grp); +} + +static __inline__ void +bond_assert_lock_held(void) +{ + lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_OWNED); + return; +} + +static __inline__ void +bond_assert_lock_not_held(void) +{ + lck_mtx_assert(bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED); + return; +} + +static __inline__ void +bond_lock(void) +{ + lck_mtx_lock(bond_lck_mtx); + return; +} + +static __inline__ void +bond_unlock(void) +{ + lck_mtx_unlock(bond_lck_mtx); + return; +} + +/** + ** bond structures, types + **/ + +struct LAG_info_s { + lacp_system li_system; + lacp_system_priority li_system_priority; + lacp_key li_key; +}; +typedef struct LAG_info_s LAG_info, * LAG_info_ref; + +struct bondport_s; +TAILQ_HEAD(port_list, bondport_s); +struct ifbond_s; +TAILQ_HEAD(ifbond_list, ifbond_s); +struct LAG_s; +TAILQ_HEAD(lag_list, LAG_s); + +typedef struct ifbond_s ifbond, * ifbond_ref; +typedef struct bondport_s bondport, * bondport_ref; + +struct LAG_s { + TAILQ_ENTRY(LAG_s) lag_list; + struct port_list lag_port_list; + short lag_port_count; + short lag_selected_port_count; + int lag_active_media; + LAG_info lag_info; +}; +typedef struct LAG_s LAG, * LAG_ref; + +typedef struct partner_state_s { + LAG_info ps_lag_info; + lacp_port ps_port; + lacp_port_priority ps_port_priority; + lacp_actor_partner_state ps_state; +} partner_state, * partner_state_ref; + +struct ifbond_s { + TAILQ_ENTRY(ifbond_s) ifb_bond_list; + int ifb_flags; + UInt32 ifb_retain_count; + char ifb_name[IFNAMSIZ]; + struct ifnet * ifb_ifp; + bpf_packet_func ifb_bpf_input; + bpf_packet_func ifb_bpf_output; + int ifb_altmtu; + struct port_list ifb_port_list; + short ifb_port_count; + struct lag_list ifb_lag_list; + lacp_key ifb_key; + short ifb_max_active; /* 0 == unlimited */ + LAG_ref ifb_active_lag; + struct ifmultiaddr * ifb_ifma_slow_proto; + bondport_ref * ifb_distributing_array; + int ifb_distributing_count; +}; + +struct media_info { + int mi_active; + int mi_status; +}; + +enum { + ReceiveState_none = 0, + ReceiveState_INITIALIZE = 1, + ReceiveState_PORT_DISABLED = 2, + ReceiveState_EXPIRED = 3, + ReceiveState_LACP_DISABLED = 4, + ReceiveState_DEFAULTED = 5, + ReceiveState_CURRENT = 6, +}; + +typedef u_char ReceiveState; + +enum { + SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED, + SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED, + SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY +}; +typedef u_char SelectedState; + +static __inline__ const char * +SelectedStateString(SelectedState s) +{ + static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" }; + + if (s <= SelectedState_STANDBY) { + return (names[s]); + } + return (""); +} + +enum { + MuxState_none = 0, + MuxState_DETACHED = 1, + MuxState_WAITING = 2, + MuxState_ATTACHED = 3, + MuxState_COLLECTING_DISTRIBUTING = 4, +}; + +typedef u_char MuxState; + +struct bondport_s { + TAILQ_ENTRY(bondport_s) po_port_list; + ifbond_ref po_bond; + struct multicast_list po_multicast; + struct ifnet * po_ifp; + struct ether_addr po_saved_addr; + int po_enabled; + char po_name[IFNAMSIZ]; + struct ifdevmtu po_devmtu; + + /* LACP */ + TAILQ_ENTRY(bondport_s) po_lag_port_list; + devtimer_ref po_current_while_timer; + devtimer_ref po_periodic_timer; + devtimer_ref po_wait_while_timer; + devtimer_ref po_transmit_timer; + partner_state po_partner_state; + lacp_port_priority po_priority; + lacp_actor_partner_state po_actor_state; + u_char po_flags; + u_char po_periodic_interval; + u_char po_n_transmit; + ReceiveState po_receive_state; + MuxState po_mux_state; + SelectedState po_selected; + int32_t po_last_transmit_secs; + struct media_info po_media_info; + LAG_ref po_lag; +}; + +#define IFBF_PROMISC 0x1 /* promiscuous mode */ +#define IFBF_IF_DETACHING 0x2 /* interface is detaching */ +#define IFBF_LLADDR 0x4 /* specific link address requested */ +#define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */ + +static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, + user_addr_t datap); + +static __inline__ int +ifbond_flags_promisc(ifbond_ref ifb) +{ + return ((ifb->ifb_flags & IFBF_PROMISC) != 0); +} + +static __inline__ void +ifbond_flags_set_promisc(ifbond_ref ifb) +{ + ifb->ifb_flags |= IFBF_PROMISC; + return; +} + +static __inline__ void +ifbond_flags_clear_promisc(ifbond_ref ifb) +{ + ifb->ifb_flags &= ~IFBF_PROMISC; + return; +} + +static __inline__ int +ifbond_flags_if_detaching(ifbond_ref ifb) +{ + return ((ifb->ifb_flags & IFBF_IF_DETACHING) != 0); +} + +static __inline__ void +ifbond_flags_set_if_detaching(ifbond_ref ifb) +{ + ifb->ifb_flags |= IFBF_IF_DETACHING; + return; +} + +static __inline__ int +ifbond_flags_lladdr(ifbond_ref ifb) +{ + return ((ifb->ifb_flags & IFBF_LLADDR) != 0); +} + +static __inline__ void +ifbond_flags_set_lladdr(ifbond_ref ifb) +{ + ifb->ifb_flags |= IFBF_LLADDR; + return; +} + +static __inline__ void +ifbond_flags_clear_lladdr(ifbond_ref ifb) +{ + ifb->ifb_flags &= ~IFBF_LLADDR; + return; +} + +static __inline__ int +ifbond_flags_change_in_progress(ifbond_ref ifb) +{ + return ((ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0); +} + +static __inline__ void +ifbond_flags_set_change_in_progress(ifbond_ref ifb) +{ + ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS; + return; +} + +static __inline__ void +ifbond_flags_clear_change_in_progress(ifbond_ref ifb) +{ + ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS; + return; +} + +/* + * bondport_ref->po_flags bits + */ +#define BONDPORT_FLAGS_NTT 0x01 +#define BONDPORT_FLAGS_READY 0x02 +#define BONDPORT_FLAGS_SELECTED_CHANGED 0x04 +#define BONDPORT_FLAGS_MUX_ATTACHED 0x08 +#define BONDPORT_FLAGS_DISTRIBUTING 0x10 +#define BONDPORT_FLAGS_UNUSED2 0x20 +#define BONDPORT_FLAGS_UNUSED3 0x40 +#define BONDPORT_FLAGS_UNUSED4 0x80 + +static __inline__ void +bondport_flags_set_ntt(bondport_ref p) +{ + p->po_flags |= BONDPORT_FLAGS_NTT; + return; +} + +static __inline__ void +bondport_flags_clear_ntt(bondport_ref p) +{ + p->po_flags &= ~BONDPORT_FLAGS_NTT; + return; +} + +static __inline__ int +bondport_flags_ntt(bondport_ref p) +{ + return ((p->po_flags & BONDPORT_FLAGS_NTT) != 0); +} + +static __inline__ void +bondport_flags_set_ready(bondport_ref p) +{ + p->po_flags |= BONDPORT_FLAGS_READY; + return; +} + +static __inline__ void +bondport_flags_clear_ready(bondport_ref p) +{ + p->po_flags &= ~BONDPORT_FLAGS_READY; + return; +} + +static __inline__ int +bondport_flags_ready(bondport_ref p) +{ + return ((p->po_flags & BONDPORT_FLAGS_READY) != 0); +} + +static __inline__ void +bondport_flags_set_selected_changed(bondport_ref p) +{ + p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED; + return; +} + +static __inline__ void +bondport_flags_clear_selected_changed(bondport_ref p) +{ + p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED; + return; +} + +static __inline__ int +bondport_flags_selected_changed(bondport_ref p) +{ + return ((p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0); +} + +static __inline__ void +bondport_flags_set_mux_attached(bondport_ref p) +{ + p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED; + return; +} + +static __inline__ void +bondport_flags_clear_mux_attached(bondport_ref p) +{ + p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED; + return; +} + +static __inline__ int +bondport_flags_mux_attached(bondport_ref p) +{ + return ((p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0); +} + +static __inline__ void +bondport_flags_set_distributing(bondport_ref p) +{ + p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING; + return; +} + +static __inline__ void +bondport_flags_clear_distributing(bondport_ref p) +{ + p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING; + return; +} + +static __inline__ int +bondport_flags_distributing(bondport_ref p) +{ + return ((p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0); +} + +typedef struct bond_globals_s { + struct ifbond_list ifbond_list; + lacp_system system; + lacp_system_priority system_priority; + int verbose; +} * bond_globals_ref; + +static bond_globals_ref g_bond; + +/** + ** packet_buffer routines + ** - thin wrapper for mbuf + **/ + +typedef struct mbuf * packet_buffer_ref; + +static packet_buffer_ref +packet_buffer_allocate(int length) +{ + packet_buffer_ref m; + int size; + + /* leave room for ethernet header */ + size = length + sizeof(struct ether_header); + if (size > (int)MHLEN) { + /* XXX doesn't handle large payloads */ + printf("bond: packet_buffer_allocate size %d > max %d\n", size, MHLEN); + return (NULL); + } + m = m_gethdr(M_WAITOK, MT_DATA); + if (m == NULL) { + return (NULL); + } + m->m_len = size; + m->m_pkthdr.len = size; + return (m); +} + +static void * +packet_buffer_byteptr(packet_buffer_ref buf) +{ + return (buf->m_data + sizeof(struct ether_header)); +} + +typedef enum { + LAEventStart, + LAEventTimeout, + LAEventPacket, + LAEventMediaChange, + LAEventSelectedChange, + LAEventPortMoved, + LAEventReady +} LAEvent; + +/** + ** Receive machine + **/ +static void +bondport_receive_machine(bondport_ref p, LAEvent event, + void * event_data); +/** + ** Periodic Transmission machine + **/ +static void +bondport_periodic_transmit_machine(bondport_ref p, LAEvent event, + void * event_data); + +/** + ** Transmit machine + **/ +static void +bondport_transmit_machine(bondport_ref p, LAEvent event, + void * event_data); + +/** + ** Mux machine + **/ +static void +bondport_mux_machine(bondport_ref p, LAEvent event, + void * event_data); + +/** + ** bond, LAG + **/ +static void +ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media); + +static void +ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag); + +static int +ifbond_all_ports_ready(ifbond_ref bond); + +static LAG_ref +ifbond_find_best_LAG(ifbond_ref bond, int * active_media); + +static int +LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media); + +static int +ifbond_selection(ifbond_ref bond); + + +/** + ** bondport + **/ + +static void +bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p); + +static void +bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf); + +static bondport_ref +bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, + int active, int short_timeout, int * error); +static void +bondport_start(bondport_ref p); + +static void +bondport_free(bondport_ref p); + +static int +bondport_aggregatable(bondport_ref p); + +static int +bondport_remove_from_LAG(bondport_ref p); + +static void +bondport_set_selected(bondport_ref p, SelectedState s); + +static int +bondport_matches_LAG(bondport_ref p, LAG_ref lag); + +static void +bondport_link_status_changed(bondport_ref p); + +static void +bondport_enable_distributing(bondport_ref p); + +static void +bondport_disable_distributing(bondport_ref p); + +static __inline__ int +bondport_collecting(bondport_ref p) +{ + return (lacp_actor_partner_state_collecting(p->po_actor_state)); +} + +/** + ** bond interface/dlil specific routines + **/ +static int bond_clone_create(struct if_clone *, int); +static void bond_clone_destroy(struct ifnet *); +static int bond_input(struct mbuf *m, char *frame_header, struct ifnet *ifp, + u_long protocol_family, int sync_ok); +static int bond_output(struct ifnet *ifp, struct mbuf *m); +static int bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * addr); +static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, + bpf_packet_func func); +static int bond_attach_protocol(struct ifnet *ifp); +static int bond_detach_protocol(struct ifnet *ifp); +static int bond_setmulti(struct ifnet *ifp); +static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp); +static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp); +static void bond_if_free(struct ifnet * ifp); + +static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME, + bond_clone_create, + bond_clone_destroy, + 0, + BOND_MAXUNIT); +static void interface_link_event(struct ifnet * ifp, u_long event_code); + +static int +siocsifmtu(struct ifnet * ifp, int mtu) +{ + struct ifreq ifr; + + bzero(&ifr, sizeof(ifr)); + ifr.ifr_mtu = mtu; + return (dlil_ioctl(0, ifp, SIOCSIFMTU, (caddr_t)&ifr)); +} + +static int +siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p) +{ + struct ifreq ifr; + int error; + + bzero(&ifr, sizeof(ifr)); + error = dlil_ioctl(0, ifp, SIOCGIFDEVMTU, (caddr_t)&ifr); + if (error == 0) { + *ifdm_p = ifr.ifr_devmtu; + } + return (error); +} + +static __inline__ void +ether_addr_copy(void * dest, const void * source) +{ + bcopy(source, dest, ETHER_ADDR_LEN); + return; +} + +static __inline__ void +ifbond_retain(ifbond_ref ifb) +{ + OSIncrementAtomic(&ifb->ifb_retain_count); +} + +static __inline__ void +ifbond_release(ifbond_ref ifb) +{ + UInt32 old_retain_count; + + old_retain_count = OSDecrementAtomic(&ifb->ifb_retain_count); + switch (old_retain_count) { + case 0: + panic("ifbond_release: retain count is 0\n"); + break; + case 1: + if (g_bond->verbose) { + printf("ifbond_release(%s)\n", ifb->ifb_name); + } + if (ifb->ifb_ifma_slow_proto != NULL) { + if (g_bond->verbose) { + printf("ifbond_release(%s) removing multicast\n", + ifb->ifb_name); + } + (void)if_delmultiaddr(ifb->ifb_ifma_slow_proto, 0); + ifma_release(ifb->ifb_ifma_slow_proto); + } + if (ifb->ifb_distributing_array != NULL) { + FREE(ifb->ifb_distributing_array, M_BOND); + } + FREE(ifb, M_BOND); + break; + default: + break; + } + return; +} + +/* + * Function: ifbond_wait + * Purpose: + * Allows a single thread to gain exclusive access to the ifbond + * data structure. Some operations take a long time to complete, + * and some have side-effects that we can't predict. Holding the + * bond_lock() across such operations is not possible. + * + * For example: + * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to + * complete. Simply holding the bond_lock() would freeze all other + * data structure accesses during that time. + * 2) When we attach our protocol to the interface, a dlil event is + * generated and invokes our bond_event() function. bond_event() + * needs to take the bond_lock(), but we're already holding it, so + * we're deadlocked against ourselves. + * Notes: + * Before calling, you must be holding the bond_lock and have taken + * a reference on the ifbond_ref. + */ +static void +ifbond_wait(ifbond_ref ifb, const char * msg) +{ + int waited = 0; + + /* other add/remove in progress */ + while (ifbond_flags_change_in_progress(ifb)) { + if (g_bond->verbose) { + printf("%s: %s msleep\n", ifb->ifb_name, msg); + } + waited = 1; + (void)msleep(ifb, bond_lck_mtx, PZERO, msg, 0); + } + /* prevent other bond list remove/add from taking place */ + ifbond_flags_set_change_in_progress(ifb); + if (g_bond->verbose && waited) { + printf("%s: %s woke up\n", ifb->ifb_name, msg); + } + return; +} + +/* + * Function: ifbond_signal + * Purpose: + * Allows the thread that previously invoked ifbond_wait() to + * give up exclusive access to the ifbond data structure, and wake up + * any other threads waiting to access + * Notes: + * Before calling, you must be holding the bond_lock and have taken + * a reference on the ifbond_ref. + */ +static void +ifbond_signal(ifbond_ref ifb, const char * msg) +{ + ifbond_flags_clear_change_in_progress(ifb); + wakeup((caddr_t)ifb); + if (g_bond->verbose) { + printf("%s: %s wakeup\n", ifb->ifb_name, msg); + } + return; +} + +/** + ** Media information + **/ + +static int +link_speed(int active) +{ + switch (IFM_SUBTYPE(active)) { + case IFM_10_T: + case IFM_10_2: + case IFM_10_5: + case IFM_10_STP: + case IFM_10_FL: + return (10); + case IFM_100_TX: + case IFM_100_FX: + case IFM_100_T4: + case IFM_100_VG: + case IFM_100_T2: + return (100); + case IFM_1000_SX: + case IFM_1000_LX: + case IFM_1000_CX: + case IFM_1000_TX: + return (1000); + case IFM_HPNA_1: + return (0); + default: + /* assume that new defined types are going to be at least 10GigE */ + case IFM_10G_SR: + case IFM_10G_LR: + return (10000); + } +} + +static __inline__ int +media_active(const struct media_info * mi) +{ + if ((mi->mi_status & IFM_AVALID) == 0) { + return (1); + } + return ((mi->mi_status & IFM_ACTIVE) != 0); +} + +static __inline__ int +media_full_duplex(const struct media_info * mi) +{ + return ((mi->mi_active & IFM_FDX) != 0); +} + +static __inline__ int +media_speed(const struct media_info * mi) +{ + return (link_speed(mi->mi_active)); +} + +static struct media_info +interface_media_info(struct ifnet * ifp) +{ + struct ifmediareq ifmr; + struct media_info mi; + + bzero(&mi, sizeof(mi)); + bzero(&ifmr, sizeof(ifmr)); + if (dlil_ioctl(0, ifp, SIOCGIFMEDIA, (caddr_t)&ifmr) == 0) { + if (ifmr.ifm_count != 0) { + mi.mi_status = ifmr.ifm_status; + mi.mi_active = ifmr.ifm_active; + } + } + return (mi); +} + +/** + ** interface utility functions + **/ +static __inline__ struct ifaddr * +ifindex_get_ifaddr(int i) +{ + if (i > if_index || i == 0) { + return (NULL); + } + return (ifnet_addrs[i - 1]); +} + +static __inline__ struct ifaddr * +ifp_get_ifaddr(struct ifnet * ifp) +{ + return (ifindex_get_ifaddr(ifp->if_index)); +} + +static __inline__ struct sockaddr_dl * +ifp_get_sdl(struct ifnet * ifp) +{ + struct ifaddr * ifa; + + ifa = ifp_get_ifaddr(ifp); + return ((struct sockaddr_dl *)(ifa->ifa_addr)); +} + +static int +if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p) +{ + struct ifreq ifr; + + /* + * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver + * currently expects it that way + */ + ifr.ifr_addr.sa_family = AF_UNSPEC; + ifr.ifr_addr.sa_len = ETHER_ADDR_LEN; + ether_addr_copy(ifr.ifr_addr.sa_data, ea_p); +#if 0 + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", ifp->if_name, + ifp->if_unit); +#endif 0 + return (dlil_ioctl(0, ifp, SIOCSIFLLADDR, (caddr_t)&ifr)); +} + +/** + ** bond_globals + **/ +static bond_globals_ref +bond_globals_create(lacp_system_priority sys_pri, + lacp_system_ref sys) +{ + bond_globals_ref b; + + b = _MALLOC(sizeof(*b), M_BOND, M_WAITOK); + if (b == NULL) { + return (NULL); + } + bzero(b, sizeof(*b)); + TAILQ_INIT(&b->ifbond_list); + b->system = *sys; + b->system_priority = sys_pri; +#if 0 + b->verbose = 1; +#endif 0 + return (b); +} + +static int +bond_globals_init(void) +{ + bond_globals_ref b; + int i; + struct ifnet * ifp; + + bond_assert_lock_not_held(); + + if (g_bond != NULL) { + return (0); + } + + /* + * use en0's ethernet address as the system identifier, and if it's not + * there, use en1 .. en3 + */ + ifp = NULL; + for (i = 0; i < 4; i++) { + char ifname[IFNAMSIZ+1]; + snprintf(ifname, sizeof(ifname), "en%d", i); + /* XXX ifunit() needs to return a reference on the ifp */ + ifp = ifunit(ifname); + if (ifp != NULL) { + break; + } + } + b = NULL; + if (ifp != NULL) { + b = bond_globals_create(0x8000, + (lacp_system_ref)LLADDR(ifp_get_sdl(ifp))); + } + bond_lock(); + if (g_bond != NULL) { + bond_unlock(); + _FREE(b, M_BOND); + return (0); + } + g_bond = b; + bond_unlock(); + if (ifp == NULL) { + return (ENXIO); + } + if (b == NULL) { + return (ENOMEM); + } + return (0); +} + +static void +bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m, + const struct ether_header * eh_p, + u_int16_t vlan_tag, bpf_packet_func func) +{ + struct ether_vlan_header * vlh_p; + struct mbuf * vl_m; + + vl_m = m_get(M_DONTWAIT, MT_DATA); + if (vl_m == NULL) { + return; + } + /* populate a new mbuf containing the vlan ethernet header */ + vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + vlh_p = mtod(vl_m, struct ether_vlan_header *); + bcopy(eh_p, vlh_p, offsetof(struct ether_header, ether_type)); + vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN); + vlh_p->evl_tag = htons(vlan_tag); + vlh_p->evl_proto = eh_p->ether_type; + vl_m->m_next = m; + (*func)(ifp, vl_m); + vl_m->m_next = NULL; + m_free(vl_m); + return; +} + +static __inline__ void +bond_bpf_output(struct ifnet * ifp, struct mbuf * m, + bpf_packet_func func) +{ + if (func != NULL) { + if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { + const struct ether_header * eh_p; + eh_p = mtod(m, const struct ether_header *); + m->m_data += ETHER_HDR_LEN; + m->m_len -= ETHER_HDR_LEN; + bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func); + m->m_data -= ETHER_HDR_LEN; + m->m_len += ETHER_HDR_LEN; + } else { + (*func)(ifp, m); + } + } + return; +} + +static __inline__ void +bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p, + bpf_packet_func func) +{ + if (func != NULL) { + if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { + bond_bpf_vlan(ifp, m, eh_p, m->m_pkthdr.vlan_tag, func); + } else { + /* restore the header */ + m->m_data -= ETHER_HDR_LEN; + m->m_len += ETHER_HDR_LEN; + (*func)(ifp, m); + m->m_data += ETHER_HDR_LEN; + m->m_len -= ETHER_HDR_LEN; + } + } + return; +} + +/* + * Function: bond_setmulti + * Purpose: + * Enable multicast reception on "our" interface by enabling multicasts on + * each of the member ports. + */ +static int +bond_setmulti(struct ifnet * ifp) +{ + ifbond_ref ifb; + int error; + int result = 0; + bondport_ref p; + + bond_lock(); + ifb = ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb) + || TAILQ_EMPTY(&ifb->ifb_port_list)) { + bond_unlock(); + return (0); + } + ifbond_retain(ifb); + ifbond_wait(ifb, "bond_setmulti"); + + if (ifbond_flags_if_detaching(ifb)) { + /* someone destroyed the bond while we were waiting */ + result = EBUSY; + goto signal_done; + } + bond_unlock(); + + /* ifbond_wait() let's us safely walk the list without holding the lock */ + TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { + struct ifnet * port_ifp = p->po_ifp; + + error = multicast_list_program(&p->po_multicast, + ifp, port_ifp); + if (error != 0) { + printf("bond_setmulti(%s): " + "multicast_list_program(%s%d) failed, %d\n", + ifb->ifb_name, port_ifp->if_name, + port_ifp->if_unit, error); + result = error; + } + } + bond_lock(); + signal_done: + ifbond_release(ifb); + ifbond_signal(ifb, "bond_setmulti"); + bond_unlock(); + return (result); +} + +static void +bond_clone_attach(void) +{ + if_clone_attach(&bond_cloner); + bond_lock_init(); + return; +} + +static int +ifbond_add_slow_proto_multicast(ifbond_ref ifb) +{ + int error; + struct ifmultiaddr * ifma = NULL; + struct sockaddr_dl sdl; + + bond_assert_lock_not_held(); + + bzero(&sdl, sizeof(sdl)); + sdl.sdl_len = sizeof(sdl); + sdl.sdl_family = AF_LINK; + sdl.sdl_type = IFT_ETHER; + sdl.sdl_nlen = 0; + sdl.sdl_alen = sizeof(slow_proto_multicast); + bcopy(&slow_proto_multicast, sdl.sdl_data, sizeof(slow_proto_multicast)); + error = if_addmulti(ifb->ifb_ifp, (struct sockaddr *)&sdl, + &ifma); + if (error == 0) { + ifb->ifb_ifma_slow_proto = ifma; + } + return (error); +} + +static int +bond_clone_create(struct if_clone * ifc, int unit) +{ + int error; + ifbond_ref ifb; + struct ifnet * ifp; + + error = bond_globals_init(); + if (error != 0) { + return (error); + } + + ifb = _MALLOC(sizeof(ifbond), M_BOND, M_WAITOK); + if (ifb == NULL) { + return (ENOMEM); + } + bzero(ifb, sizeof(*ifb)); + + ifbond_retain(ifb); + TAILQ_INIT(&ifb->ifb_port_list); + TAILQ_INIT(&ifb->ifb_lag_list); + ifb->ifb_key = unit + 1; + + /* use the interface name as the unique id for ifp recycle */ + if ((u_long)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d", + ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) { + ifbond_release(ifb); + return (EINVAL); + } + error = dlil_if_acquire(APPLE_IF_FAM_BOND, + ifb->ifb_name, + strlen(ifb->ifb_name), + &ifp); + if (error) { + ifbond_release(ifb); + return (error); + } + ifb->ifb_ifp = ifp; + ifp->if_name = ifc->ifc_name; + ifp->if_unit = unit; + ifp->if_family = APPLE_IF_FAM_BOND; + ifp->if_private = NULL; + ifp->if_ioctl = bond_ioctl; + ifp->if_set_bpf_tap = bond_set_bpf_tap; + ifp->if_free = bond_if_free; + ifp->if_output = bond_output; + ifp->if_hwassist = 0; + ifp->if_addrlen = ETHER_ADDR_LEN; + ifp->if_baudrate = 0; + ifp->if_type = IFT_IEEE8023ADLAG; + ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; + ifp->if_mtu = 0; + + /* XXX ethernet specific */ + ifp->if_broadcast.length = ETHER_ADDR_LEN; + bcopy(etherbroadcastaddr, ifp->if_broadcast.u.buffer, ETHER_ADDR_LEN); + + error = dlil_if_attach(ifp); + if (error != 0) { + dlil_if_release(ifp); + ifbond_release(ifb); + return (error); + } + error = ifbond_add_slow_proto_multicast(ifb); + if (error != 0) { + printf("bond_clone_create(%s): " + "failed to add slow_proto multicast, %d\n", + ifb->ifb_name, error); + } + + /* attach as ethernet */ + bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header)); + + bond_lock(); + ifp->if_private = ifb; + TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list); + bond_unlock(); + + return (0); +} + +static void +bond_remove_all_interfaces(ifbond_ref ifb) +{ + bondport_ref p; + + bond_assert_lock_held(); + + /* + * do this in reverse order to avoid re-programming the mac address + * as each head interface is removed + */ + while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) { + bond_remove_interface(ifb, p->po_ifp); + } + return; +} + +static void +bond_remove(ifbond_ref ifb) +{ + bond_assert_lock_held(); + ifbond_flags_set_if_detaching(ifb); + TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list); + bond_remove_all_interfaces(ifb); + return; +} + +static void +bond_if_detach(struct ifnet * ifp) +{ + int error; + + error = dlil_if_detach(ifp); + if (error != DLIL_WAIT_FOR_FREE) { + if (error) { + printf("bond_if_detach %s%d: dlil_if_detach failed, %d\n", + ifp->if_name, ifp->if_unit, error); + } + bond_if_free(ifp); + } + return; +} + +static void +bond_clone_destroy(struct ifnet * ifp) +{ + ifbond_ref ifb; + + bond_lock(); + ifb = ifp->if_private; + if (ifb == NULL || ifp->if_type != IFT_IEEE8023ADLAG) { + bond_unlock(); + return; + } + if (ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + return; + } + bond_remove(ifb); + bond_unlock(); + bond_if_detach(ifp); + return; +} + +static int +bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func) +{ + ifbond_ref ifb; + + bond_lock(); + ifb = ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + return (ENODEV); + } + switch (mode) { + case BPF_TAP_DISABLE: + ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL; + break; + + case BPF_TAP_INPUT: + ifb->ifb_bpf_input = func; + break; + + case BPF_TAP_OUTPUT: + ifb->ifb_bpf_output = func; + break; + + case BPF_TAP_INPUT_OUTPUT: + ifb->ifb_bpf_input = ifb->ifb_bpf_output = func; + break; + default: + break; + } + bond_unlock(); + return 0; +} + +static uint32_t +ether_header_hash(struct ether_header * eh_p) +{ + uint32_t h; + + /* get 32-bits from destination ether and ether type */ + h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16) + | eh_p->ether_type; + h ^= *((uint32_t *)&eh_p->ether_dhost[0]); + return (h); +} + +static struct mbuf * +S_mbuf_skip_to_offset(struct mbuf * m, long * offset) +{ + int len; + + len = m->m_len; + while (*offset >= len) { + *offset -= len; + m = m->m_next; + if (m == NULL) { + break; + } + len = m->m_len; + } + return (m); +} + +#if BYTE_ORDER == BIG_ENDIAN +static __inline__ uint32_t +make_uint32(u_char c0, u_char c1, u_char c2, u_char c3) +{ + return (((uint32_t)c0 << 24) | ((uint32_t)c1 << 16) + | ((uint32_t)c2 << 8) | (uint32_t)c3); +} +#else /* BYTE_ORDER == LITTLE_ENDIAN */ +static __inline__ uint32_t +make_uint32(u_char c0, u_char c1, u_char c2, u_char c3) +{ + return (((uint32_t)c3 << 24) | ((uint32_t)c2 << 16) + | ((uint32_t)c1 << 8) | (uint32_t)c0); +} +#endif /* BYTE_ORDER == LITTLE_ENDIAN */ + +static int +S_mbuf_copy_uint32(struct mbuf * m, long offset, uint32_t * val) +{ + struct mbuf * current; + u_char * current_data; + struct mbuf * next; + u_char * next_data; + int space_current; + + current = S_mbuf_skip_to_offset(m, &offset); + if (current == NULL) { + return (1); + } + current_data = mtod(current, u_char *) + offset; + space_current = current->m_len - offset; + if (space_current >= (int)sizeof(uint32_t)) { + *val = *((uint32_t *)current_data); + return (0); + } + next = current->m_next; + if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) { + return (1); + } + next_data = mtod(next, u_char *); + switch (space_current) { + case 1: + *val = make_uint32(current_data[0], next_data[0], + next_data[1], next_data[2]); + break; + case 2: + *val = make_uint32(current_data[0], current_data[1], + next_data[0], next_data[1]); + break; + default: + *val = make_uint32(current_data[0], current_data[1], + current_data[2], next_data[0]); + break; + } + return (0); +} + +#define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p)) +#define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p)) + +static uint32_t +ip_header_hash(struct mbuf * m) +{ + u_char * data; + struct in_addr ip_dst; + struct in_addr ip_src; + u_char ip_p; + long offset; + struct mbuf * orig_m = m; + + /* find the IP protocol field relative to the start of the packet */ + offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header); + m = S_mbuf_skip_to_offset(m, &offset); + if (m == NULL || m->m_len < 1) { + goto bad_ip_packet; + } + data = mtod(m, u_char *) + offset; + ip_p = *data; + + /* find the IP src relative to the IP protocol */ + if ((m->m_len - offset) + >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) { + /* this should be the normal case */ + ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET); + ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET); + } + else { + if (S_mbuf_copy_uint32(m, offset + IP_SRC_OFFSET, + (uint32_t *)&ip_src.s_addr)) { + goto bad_ip_packet; + } + if (S_mbuf_copy_uint32(m, offset + IP_DST_OFFSET, + (uint32_t *)&ip_dst.s_addr)) { + goto bad_ip_packet; + } + } + return (ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p)); + + bad_ip_packet: + return (ether_header_hash(mtod(orig_m, struct ether_header *))); +} + +#define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2) +static uint32_t +ipv6_header_hash(struct mbuf * m) +{ + u_char * data; + int i; + long offset; + struct mbuf * orig_m = m; + uint32_t * scan; + uint32_t val; + + /* find the IP protocol field relative to the start of the packet */ + offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header); + m = S_mbuf_skip_to_offset(m, &offset); + if (m == NULL) { + goto bad_ipv6_packet; + } + data = mtod(m, u_char *) + offset; + val = 0; + if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) { + /* this should be the normal case */ + for (i = 0, scan = (uint32_t *)data; + i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); + i++, scan++) { + val ^= *scan; + } + } + else { + for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) { + uint32_t tmp; + if (S_mbuf_copy_uint32(m, offset + i * sizeof(uint32_t), + (uint32_t *)&tmp)) { + goto bad_ipv6_packet; + } + val ^= tmp; + } + } + return (ntohl(val)); + + bad_ipv6_packet: + return (ether_header_hash(mtod(orig_m, struct ether_header *))); +} + +static int +bond_output(struct ifnet * ifp, struct mbuf * m) +{ + bpf_packet_func bpf_func; + uint32_t h; + ifbond_ref ifb; + struct ifnet * port_ifp = NULL; + + if (m == 0) { + return (0); + } + if ((m->m_flags & M_PKTHDR) == 0) { + m_freem(m); + return (0); + } + if (m->m_pkthdr.socket_id != 0) { + h = m->m_pkthdr.socket_id; + } + else { + struct ether_header * eh_p; + + eh_p = mtod(m, struct ether_header *); + switch (ntohs(eh_p->ether_type)) { + case ETHERTYPE_IP: + h = ip_header_hash(m); + break; + case ETHERTYPE_IPV6: + h = ipv6_header_hash(m); + break; + default: + h = ether_header_hash(eh_p); + break; + } + } + bond_lock(); + ifb = ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb) + || ifb->ifb_distributing_count == 0) { + goto done; + } + h %= ifb->ifb_distributing_count; + port_ifp = ifb->ifb_distributing_array[h]->po_ifp; + bpf_func = ifb->ifb_bpf_output; + bond_unlock(); + + if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { + (void)ifnet_stat_increment_out(ifp, 1, + m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN, + 0); + } else { + (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0); + } + bond_bpf_output(ifp, m, bpf_func); + + return (dlil_output(port_ifp, 0, m, NULL, NULL, 1)); + + done: + bond_unlock(); + m_freem(m); + return (0); +} + +static bondport_ref +ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp) +{ + bondport_ref p; + TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { + if (p->po_ifp == port_ifp) { + return (p); + } + } + return (NULL); +} + +static bondport_ref +bond_lookup_port(struct ifnet * port_ifp) +{ + ifbond_ref ifb; + bondport_ref port; + + TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) { + port = ifbond_lookup_port(ifb, port_ifp); + if (port != NULL) { + return (port); + } + } + return (NULL); +} + +static void +bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp) +{ + struct ifnet * bond_ifp = NULL; + int event_code = 0; + bondport_ref p; + + bond_lock(); + if ((port_ifp->if_eflags & IFEF_BOND) == 0) { + goto done; + } + p = bond_lookup_port(port_ifp); + if (p == NULL) { + goto done; + } + if (p->po_enabled == 0) { + goto done; + } + bondport_receive_lacpdu(p, (lacpdu_ref)m->m_data); + if (ifbond_selection(p->po_bond)) { + event_code = (p->po_bond->ifb_active_lag == NULL) + ? KEV_DL_LINK_OFF + : KEV_DL_LINK_ON; + /* XXX need to take a reference on bond_ifp */ + bond_ifp = p->po_bond->ifb_ifp; + } + + done: + bond_unlock(); + if (bond_ifp != NULL) { + interface_link_event(bond_ifp, event_code); + } + m_freem(m); + return; +} + +static void +bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp) +{ + la_marker_pdu_ref marker_p; + bondport_ref p; + + marker_p = (la_marker_pdu_ref)(m->m_data + ETHER_HDR_LEN); + if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) { + goto failed; + } + bond_lock(); + if ((port_ifp->if_eflags & IFEF_BOND) == 0) { + bond_unlock(); + goto failed; + } + p = bond_lookup_port(port_ifp); + if (p == NULL || p->po_enabled == 0) { + bond_unlock(); + goto failed; + } + /* echo back the same packet as a marker response */ + marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE; + bondport_slow_proto_transmit(p, (packet_buffer_ref)m); + bond_unlock(); + return; + + failed: + m_freem(m); + return; +} + +static int +bond_input(struct mbuf * m, char * frame_header, struct ifnet * port_ifp, + __unused u_long protocol_family, __unused int sync_ok) +{ + bpf_packet_func bpf_func; + const struct ether_header * eh_p; + ifbond_ref ifb; + struct ifnet * ifp; + bondport_ref p; + + eh_p = (const struct ether_header *)frame_header; + if ((m->m_flags & M_MCAST) != 0 + && bcmp(eh_p->ether_dhost, &slow_proto_multicast, + sizeof(eh_p->ether_dhost)) == 0 + && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) { + u_char subtype = *mtod(m, u_char *); + + if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) { + if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) { + m_freem(m); + return (0); + } + /* send to lacp */ + if (m->m_len < (int)offsetof(lacpdu, la_reserved)) { + m = m_pullup(m, offsetof(lacpdu, la_reserved)); + if (m == NULL) { + return (0); + } + } + bond_receive_lacpdu(m, port_ifp); + return (0); + } + else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) { + int min_size; + + /* restore the ethernet header pointer in the mbuf */ + m->m_pkthdr.len += ETHER_HDR_LEN; + m->m_data -= ETHER_HDR_LEN; + m->m_len += ETHER_HDR_LEN; + min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved); + if (m->m_pkthdr.len < min_size) { + m_freem(m); + return (0); + } + /* send to lacp */ + if (m->m_len < min_size) { + m = m_pullup(m, min_size); + if (m == NULL) { + return (0); + } + } + /* send to marker responder */ + bond_receive_la_marker_pdu(m, port_ifp); + return (0); + } + else if (subtype == 0 + || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) { + /* invalid subtype, discard the frame */ + m_freem(m); + return (0); + } + } + bond_lock(); + if ((port_ifp->if_eflags & IFEF_BOND) == 0) { + goto done; + } + p = bond_lookup_port(port_ifp); + if (p == NULL || bondport_collecting(p) == 0) { + goto done; + } + + /* make the packet appear as if it arrived on the bonded interface */ + ifb = p->po_bond; + ifp = ifb->ifb_ifp; + bpf_func = ifb->ifb_bpf_input; + bond_unlock(); + + if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { + (void)ifnet_stat_increment_in(ifp, 1, + (m->m_pkthdr.len + ETHER_HDR_LEN + + ETHER_VLAN_ENCAP_LEN), 0); + } + else { + (void)ifnet_stat_increment_in(ifp, 1, + (m->m_pkthdr.len + ETHER_HDR_LEN), 0); + } + m->m_pkthdr.rcvif = ifp; + bond_bpf_input(ifp, m, eh_p, bpf_func); + dlil_input_packet(ifp, m, frame_header); + return 0; + + done: + bond_unlock(); + m_freem(m); + return (0); +} + +static __inline__ const char * +bondport_get_name(bondport_ref p) +{ + return (p->po_name); +} + +static __inline__ int +bondport_get_index(bondport_ref p) +{ + return (p->po_ifp->if_index); +} + +static void +bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf) +{ + struct ether_header * eh_p; + int error; + + /* packet_buffer_allocate leaves room for ethernet header */ + eh_p = mtod(buf, struct ether_header *); + bcopy(&slow_proto_multicast, &eh_p->ether_dhost, sizeof(eh_p->ether_dhost)); + bcopy(&p->po_saved_addr, eh_p->ether_shost, sizeof(eh_p->ether_shost)); + eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE); + error = dlil_output(p->po_ifp, 0, buf, NULL, NULL, 1); + if (error != 0) { + printf("bondport_slow_proto_transmit(%s) failed %d\n", + bondport_get_name(p), error); + } + return; +} + +static void +bondport_timer_process_func(devtimer_ref timer, + devtimer_process_func_event event) +{ + bondport_ref p; + + switch (event) { + case devtimer_process_func_event_lock: + bond_lock(); + devtimer_retain(timer); + break; + case devtimer_process_func_event_unlock: + if (devtimer_valid(timer)) { + /* as long as the devtimer is valid, we can look at arg0 */ + int event_code = 0; + struct ifnet * bond_ifp = NULL; + + p = (bondport_ref)devtimer_arg0(timer); + if (ifbond_selection(p->po_bond)) { + event_code = (p->po_bond->ifb_active_lag == NULL) + ? KEV_DL_LINK_OFF + : KEV_DL_LINK_ON; + /* XXX need to take a reference on bond_ifp */ + bond_ifp = p->po_bond->ifb_ifp; + } + devtimer_release(timer); + bond_unlock(); + if (bond_ifp != NULL) { + interface_link_event(bond_ifp, event_code); + } + } + else { + /* timer is going away */ + devtimer_release(timer); + bond_unlock(); + } + break; + default: + break; + } +} + +static bondport_ref +bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, + int active, int short_timeout, int * ret_error) +{ + int error = 0; + bondport_ref p = NULL; + lacp_actor_partner_state s; + + *ret_error = 0; + p = _MALLOC(sizeof(*p), M_BOND, M_WAITOK); + if (p == NULL) { + *ret_error = ENOMEM; + return (NULL); + } + bzero(p, sizeof(*p)); + multicast_list_init(&p->po_multicast); + if ((u_long)snprintf(p->po_name, sizeof(p->po_name), "%s%d", + port_ifp->if_name, port_ifp->if_unit) + >= sizeof(p->po_name)) { + printf("if_bond: name too large\n"); + *ret_error = EINVAL; + goto failed; + } + error = siocgifdevmtu(port_ifp, &p->po_devmtu); + if (error != 0) { + printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n", + bondport_get_name(p), error); + goto failed; + } + /* remember the current interface MTU so it can be restored */ + p->po_devmtu.ifdm_current = port_ifp->if_mtu; + p->po_ifp = port_ifp; + p->po_media_info = interface_media_info(port_ifp); + p->po_current_while_timer = devtimer_create(bondport_timer_process_func, p); + if (p->po_current_while_timer == NULL) { + *ret_error = ENOMEM; + goto failed; + } + p->po_periodic_timer = devtimer_create(bondport_timer_process_func, p); + if (p->po_periodic_timer == NULL) { + *ret_error = ENOMEM; + goto failed; + } + p->po_wait_while_timer = devtimer_create(bondport_timer_process_func, p); + if (p->po_wait_while_timer == NULL) { + *ret_error = ENOMEM; + goto failed; + } + p->po_transmit_timer = devtimer_create(bondport_timer_process_func, p); + if (p->po_transmit_timer == NULL) { + *ret_error = ENOMEM; + goto failed; + } + p->po_receive_state = ReceiveState_none; + p->po_mux_state = MuxState_none; + p->po_priority = priority; + s = 0; + s = lacp_actor_partner_state_set_aggregatable(s); + if (short_timeout) { + s = lacp_actor_partner_state_set_short_timeout(s); + } + if (active) { + s = lacp_actor_partner_state_set_active_lacp(s); + } + p->po_actor_state = s; + return (p); + + failed: + bondport_free(p); + return (NULL); +} + +static void +bondport_start(bondport_ref p) +{ + bondport_receive_machine(p, LAEventStart, NULL); + bondport_mux_machine(p, LAEventStart, NULL); + bondport_periodic_transmit_machine(p, LAEventStart, NULL); + bondport_transmit_machine(p, LAEventStart, NULL); + return; +} + +/* + * Function: bondport_invalidate_timers + * Purpose: + * Invalidate all of the timers for the bondport. + */ +static void +bondport_invalidate_timers(bondport_ref p) +{ + devtimer_invalidate(p->po_current_while_timer); + devtimer_invalidate(p->po_periodic_timer); + devtimer_invalidate(p->po_wait_while_timer); + devtimer_invalidate(p->po_transmit_timer); +} + +static void +bondport_free(bondport_ref p) +{ + multicast_list_remove(&p->po_multicast); + devtimer_release(p->po_current_while_timer); + devtimer_release(p->po_periodic_timer); + devtimer_release(p->po_wait_while_timer); + devtimer_release(p->po_transmit_timer); + FREE(p, M_BOND); + return; +} + +#define BOND_ADD_PROGRESS_IN_LIST 0x1 +#define BOND_ADD_PROGRESS_PROTO_ATTACHED 0x2 +#define BOND_ADD_PROGRESS_LLADDR_SET 0x4 +#define BOND_ADD_PROGRESS_MTU_SET 0x8 + +static __inline__ int +bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb) +{ + return (((int)ifp->if_mtu > ifb->ifb_altmtu) + ? (int)ifp->if_mtu : ifb->ifb_altmtu); +} + +static int +bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp) +{ + int devmtu; + int error = 0; + int event_code = 0; + ifbond_ref ifb; + struct sockaddr_dl * ifb_sdl; + bondport_ref * new_array = NULL; + bondport_ref * old_array = NULL; + bondport_ref p; + struct sockaddr_dl * port_sdl; + int progress = 0; + + /* pre-allocate space for new port */ + p = bondport_create(port_ifp, 0x8000, 1, 0, &error); + if (p == NULL) { + return (error); + } + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + bondport_free(p); + return ((ifb == NULL ? EOPNOTSUPP : EBUSY)); + } + + /* make sure this interface can handle our current MTU */ + devmtu = bond_device_mtu(ifp, ifb); + if (devmtu != 0 + && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) { + bond_unlock(); + printf("if_bond: interface %s doesn't support mtu %d", + bondport_get_name(p), devmtu); + bondport_free(p); + return (EINVAL); + } + + /* make sure ifb doesn't get de-allocated while we wait */ + ifbond_retain(ifb); + + /* wait for other add or remove to complete */ + ifbond_wait(ifb, "bond_add_interface"); + + if (ifbond_flags_if_detaching(ifb)) { + /* someone destroyed the bond while we were waiting */ + error = EBUSY; + goto signal_done; + } + if (bond_lookup_port(port_ifp) != NULL) { + /* port is already part of a bond */ + error = EBUSY; + goto signal_done; + } + ifnet_lock_exclusive(port_ifp); + if ((port_ifp->if_eflags & (IFEF_VLAN | IFEF_BOND)) != 0) { + /* interface already has VLAN's, or is part of bond */ + ifnet_lock_done(port_ifp); + error = EBUSY; + goto signal_done; + } + + /* mark the interface busy */ + port_ifp->if_eflags |= IFEF_BOND; + ifnet_lock_done(port_ifp); + + port_sdl = ifp_get_sdl(port_ifp); + ifb_sdl = ifp_get_sdl(ifp); + + if (TAILQ_EMPTY(&ifb->ifb_port_list)) { + ifp->if_hwassist = port_ifp->if_hwassist; + ifp->if_flags |= IFF_RUNNING; + if (ifbond_flags_lladdr(ifb) == FALSE) { + /* first port added to bond determines bond's ethernet address */ + ether_addr_copy(LLADDR(ifb_sdl), LLADDR(port_sdl)); + ifb_sdl->sdl_type = IFT_ETHER; + ifb_sdl->sdl_alen = ETHER_ADDR_LEN; + } + } else { + if (ifp->if_hwassist != port_ifp->if_hwassist) { + printf("bond_add_interface(%s, %s) " + "hwassist values don't match 0x%x != 0x%x\n", + ifb->ifb_name, bondport_get_name(p), + ifp->if_hwassist, port_ifp->if_hwassist); + /* + * XXX + * if the bond has VLAN's, we can't simply change the hwassist + * field behind its back: this needs work + */ + ifp->if_hwassist = 0; + } + } + p->po_bond = ifb; + + /* remember the port's ethernet address so it can be restored */ + ether_addr_copy(&p->po_saved_addr, LLADDR(port_sdl)); + + /* add it to the list of ports */ + TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list); + ifb->ifb_port_count++; + + /* set the default MTU */ + if (ifp->if_mtu == 0) { + ifp->if_mtu = ETHERMTU; + } + bond_unlock(); + progress |= BOND_ADD_PROGRESS_IN_LIST; + + /* allocate a larger distributing array */ + new_array = (bondport_ref *) + _MALLOC(sizeof(*new_array) * ifb->ifb_port_count, M_BOND, M_WAITOK); + if (new_array == NULL) { + error = ENOMEM; + goto failed; + } + + /* attach our BOND "protocol" to the interface */ + error = bond_attach_protocol(port_ifp); + if (error) { + goto failed; + } + progress |= BOND_ADD_PROGRESS_PROTO_ATTACHED; + + /* set the interface MTU */ + devmtu = bond_device_mtu(ifp, ifb); + error = siocsifmtu(port_ifp, devmtu); + if (error != 0) { + printf("bond_add_interface(%s, %s):" + " SIOCSIFMTU %d failed %d\n", + ifb->ifb_name, bondport_get_name(p), devmtu, error); + goto failed; + } + progress |= BOND_ADD_PROGRESS_MTU_SET; + + /* program the port with our multicast addresses */ + error = multicast_list_program(&p->po_multicast, ifp, port_ifp); + if (error) { + printf("bond_add_interface(%s, %s):" + " multicast_list_program failed %d\n", + ifb->ifb_name, bondport_get_name(p), error); + goto failed; + } + + /* mark the interface up */ + ifnet_set_flags(port_ifp, IFF_UP, IFF_UP); + + error = dlil_ioctl(0, port_ifp, SIOCSIFFLAGS, (caddr_t)NULL); + if (error != 0) { + printf("bond_add_interface(%s, %s): SIOCSIFFLAGS failed %d\n", + ifb->ifb_name, bondport_get_name(p), error); + goto failed; + } + + /* re-program the port's ethernet address */ + error = if_siflladdr(port_ifp, + (const struct ether_addr *)LLADDR(ifb_sdl)); + if (error != 0) { + /* port doesn't support setting the link address */ + printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n", + ifb->ifb_name, bondport_get_name(p), error); + goto failed; + } + progress |= BOND_ADD_PROGRESS_LLADDR_SET; + + bond_lock(); + + /* no failures past this point */ + p->po_enabled = 1; + + /* copy the contents of the existing distributing array */ + if (ifb->ifb_distributing_count) { + bcopy(ifb->ifb_distributing_array, new_array, + sizeof(*new_array) * ifb->ifb_distributing_count); + } + old_array = ifb->ifb_distributing_array; + ifb->ifb_distributing_array = new_array; + + /* clear the busy state, and wakeup anyone waiting */ + ifbond_signal(ifb, "bond_add_interface"); + bondport_start(p); + + /* check if we need to generate a link status event */ + if (ifbond_selection(ifb)) { + event_code = (ifb->ifb_active_lag == NULL) + ? KEV_DL_LINK_OFF + : KEV_DL_LINK_ON; + } + bond_unlock(); + if (event_code != 0) { + interface_link_event(ifp, event_code); + } + if (old_array != NULL) { + FREE(old_array, M_BOND); + } + return 0; + + failed: + bond_assert_lock_not_held(); + + if (new_array != NULL) { + FREE(new_array, M_BOND); + } + if ((progress & BOND_ADD_PROGRESS_LLADDR_SET) != 0) { + int error1; + + error1 = if_siflladdr(port_ifp, &p->po_saved_addr); + if (error1 != 0) { + printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n", + ifb->ifb_name, bondport_get_name(p), error1); + } + } + if ((progress & BOND_ADD_PROGRESS_PROTO_ATTACHED) != 0) { + (void)bond_detach_protocol(port_ifp); + } + if ((progress & BOND_ADD_PROGRESS_MTU_SET) != 0) { + int error1; + + error1 = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current); + if (error1 != 0) { + printf("bond_add_interface(%s, %s): SIOCSIFMTU %d failed %d\n", + ifb->ifb_name, bondport_get_name(p), p->po_devmtu.ifdm_current, + error1); + } + } + bond_lock(); + if ((progress & BOND_ADD_PROGRESS_IN_LIST) != 0) { + TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list); + ifb->ifb_port_count--; + } + ifnet_lock_exclusive(port_ifp); + port_ifp->if_eflags &= ~IFEF_BOND; + ifnet_lock_done(port_ifp); + if (TAILQ_EMPTY(&ifb->ifb_port_list)) { + ifb->ifb_altmtu = 0; + ifp->if_mtu = 0; + ifp->if_hwassist = 0; + if (ifbond_flags_lladdr(ifb) == FALSE) { + bzero(LLADDR(ifb_sdl), ETHER_ADDR_LEN); + ifb_sdl->sdl_type = IFT_IEEE8023ADLAG; + ifb_sdl->sdl_alen = 0; + } + } + + signal_done: + ifbond_release(ifb); + ifbond_signal(ifb, "bond_add_interface"); + bond_unlock(); + bondport_free(p); + return (error); +} + +static int +bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp) +{ + int active_lag = 0; + int error = 0; + int event_code = 0; + bondport_ref head_port; + struct sockaddr_dl * ifb_sdl; + struct ifnet * ifp; + int new_link_address = 0; + bondport_ref p; + lacp_actor_partner_state s; + + bond_assert_lock_held(); + + ifbond_retain(ifb); + ifbond_wait(ifb, "bond_remove_interface"); + + p = ifbond_lookup_port(ifb, port_ifp); + if (p == NULL) { + error = ENXIO; + /* it got removed by another thread */ + goto signal_done; + } + + /* de-select it and remove it from the lists */ + bondport_disable_distributing(p); + bondport_set_selected(p, SelectedState_UNSELECTED); + active_lag = bondport_remove_from_LAG(p); + TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list); + ifb->ifb_port_count--; + + /* invalidate timers here while holding the bond_lock */ + bondport_invalidate_timers(p); + + /* announce that we're Individual now */ + s = p->po_actor_state; + s = lacp_actor_partner_state_set_individual(s); + s = lacp_actor_partner_state_set_not_collecting(s); + s = lacp_actor_partner_state_set_not_distributing(s); + s = lacp_actor_partner_state_set_out_of_sync(s); + p->po_actor_state = s; + bondport_flags_set_ntt(p); + + ifp = ifb->ifb_ifp; + ifb_sdl = ifp_get_sdl(ifp); + head_port = TAILQ_FIRST(&ifb->ifb_port_list); + if (head_port == NULL) { + ifp->if_flags &= ~IFF_RUNNING; + if (ifbond_flags_lladdr(ifb) == FALSE) { + ifb_sdl->sdl_type = IFT_IEEE8023ADLAG; + ifb_sdl->sdl_alen = 0; + bzero(LLADDR(ifb_sdl), ETHER_ADDR_LEN); + } + ifp->if_hwassist = 0; + ifp->if_mtu = 0; + ifb->ifb_altmtu = 0; + } else if (ifbond_flags_lladdr(ifb) == FALSE + && bcmp(&p->po_saved_addr, LLADDR(ifb_sdl), + ETHER_ADDR_LEN) == 0) { + /* this port gave the bond its ethernet address, switch to new one */ + ether_addr_copy(LLADDR(ifb_sdl), &head_port->po_saved_addr); + ifb_sdl->sdl_type = IFT_ETHER; + ifb_sdl->sdl_alen = ETHER_ADDR_LEN; + new_link_address = 1; + } + /* check if we need to generate a link status event */ + if (ifbond_selection(ifb) || active_lag) { + event_code = (ifb->ifb_active_lag == NULL) + ? KEV_DL_LINK_OFF + : KEV_DL_LINK_ON; + } + bond_unlock(); + + bondport_transmit_machine(p, LAEventStart, (void *)1); + + if (new_link_address) { + struct ifnet * scan_ifp; + bondport_ref scan_port; + + /* ifbond_wait() allows port list traversal without holding the lock */ + + /* re-program each port with the new link address */ + TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) { + scan_ifp = scan_port->po_ifp; + + error = if_siflladdr(scan_ifp, + (const struct ether_addr *) LLADDR(ifb_sdl)); + if (error != 0) { + printf("bond_remove_interface(%s, %s): " + "if_siflladdr (%s) failed %d\n", + ifb->ifb_name, bondport_get_name(p), + bondport_get_name(scan_port), error); + } + } + } + + /* restore the port's ethernet address */ + error = if_siflladdr(port_ifp, &p->po_saved_addr); + if (error != 0) { + printf("bond_remove_interface(%s, %s): if_siflladdr failed %d\n", + ifb->ifb_name, bondport_get_name(p), error); + } + + /* restore the port's MTU */ + error = siocsifmtu(port_ifp, p->po_devmtu.ifdm_current); + if (error != 0) { + printf("bond_remove_interface(%s, %s): SIOCSIFMTU %d failed %d\n", + ifb->ifb_name, bondport_get_name(p), + p->po_devmtu.ifdm_current, error); + } + + /* remove the bond "protocol" */ + bond_detach_protocol(port_ifp); + + /* generate link event */ + if (event_code != 0) { + interface_link_event(ifp, event_code); + } + + bond_lock(); + ifbond_release(ifb); + bondport_free(p); + ifnet_lock_exclusive(port_ifp); + port_ifp->if_eflags &= ~IFEF_BOND; + ifnet_lock_done(port_ifp); + + signal_done: + ifbond_signal(ifb, "bond_remove_interface"); + ifbond_release(ifb); /* a second release for the second reference */ + return (error); +} + +static int +bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap) +{ + int count; + user_addr_t dst; + int error = 0; + struct if_bond_status_req * ibsr; + struct if_bond_status ibs; + bondport_ref port; + + ibsr = &(ibr_p->ibr_ibru.ibru_status); + if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) { + return (EINVAL); + } + ibsr->ibsr_key = ifb->ifb_key; + ibsr->ibsr_total = ifb->ifb_port_count; + dst = proc_is64bit(current_proc()) + ? ibsr->ibsr_ibsru.ibsru_buffer64 + : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer32); + if (dst == USER_ADDR_NULL) { + /* just want to know how many there are */ + goto done; + } + if (ibsr->ibsr_count < 0) { + return (EINVAL); + } + count = (ifb->ifb_port_count < ibsr->ibsr_count) + ? ifb->ifb_port_count : ibsr->ibsr_count; + TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) { + struct if_bond_partner_state * ibps_p; + partner_state_ref ps; + + if (count == 0) { + break; + } + bzero(&ibs, sizeof(ibs)); + strncpy(ibs.ibs_if_name, port->po_name, sizeof(ibs.ibs_if_name)); + ibs.ibs_port_priority = port->po_priority; + ibs.ibs_state = port->po_actor_state; + ibs.ibs_selected_state = port->po_selected; + ps = &port->po_partner_state; + ibps_p = &ibs.ibs_partner_state; + ibps_p->ibps_system = ps->ps_lag_info.li_system; + ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority; + ibps_p->ibps_key = ps->ps_lag_info.li_key; + ibps_p->ibps_port = ps->ps_port; + ibps_p->ibps_port_priority = ps->ps_port_priority; + ibps_p->ibps_state = ps->ps_state; + error = copyout(&ibs, dst, sizeof(ibs)); + if (error != 0) { + break; + } + dst += sizeof(ibs); + count--; + } + + done: + if (error == 0) { + error = copyout(ibr_p, datap, sizeof(*ibr_p)); + } + else { + (void)copyout(ibr_p, datap, sizeof(*ibr_p)); + } + return (error); +} + +static int +bond_set_promisc(__unused struct ifnet *ifp) +{ + int error = 0; +#if 0 + ifbond_ref ifb = ifp->if_private; + + + if ((ifp->if_flags & IFF_PROMISC) != 0) { + if ((ifb->ifb_flags & IFBF_PROMISC) == 0) { + error = ifnet_set_promiscuous(ifb->ifb_p, 1); + if (error == 0) + ifb->ifb_flags |= IFBF_PROMISC; + } + } else { + if ((ifb->ifb_flags & IFBF_PROMISC) != 0) { + error = ifnet_set_promiscuous(ifb->ifb_p, 0); + if (error == 0) + ifb->ifb_flags &= ~IFBF_PROMISC; + } + } +#endif 0 + return (error); +} + +static void +bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max) +{ + int mtu_min = 0; + int mtu_max = 0; + bondport_ref p; + + if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) { + mtu_min = IF_MINMTU; + } + TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { + struct ifdevmtu * devmtu_p = &p->po_devmtu; + + if (devmtu_p->ifdm_min > mtu_min) { + mtu_min = devmtu_p->ifdm_min; + } + if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) { + mtu_max = devmtu_p->ifdm_max; + } + } + *ret_min = mtu_min; + *ret_max = mtu_max; + return; +} + +static int +bond_set_mtu_on_ports(ifbond_ref ifb, int mtu) +{ + int error = 0; + bondport_ref p; + + TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) { + error = siocsifmtu(p->po_ifp, mtu); + if (error != 0) { + printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n", + ifb->ifb_name, bondport_get_name(p), error); + break; + } + } + return (error); +} + +static int +bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu) +{ + int error = 0; + ifbond_ref ifb; + int mtu_min; + int mtu_max; + int new_max; + int old_max; + + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; + goto done; + } + ifbond_retain(ifb); + ifbond_wait(ifb, "bond_set_mtu"); + + /* check again */ + if (ifp->if_private == NULL || ifbond_flags_if_detaching(ifb)) { + error = EBUSY; + goto signal_done; + } + bond_get_mtu_values(ifb, &mtu_min, &mtu_max); + if (mtu > mtu_max) { + error = EINVAL; + goto signal_done; + } + if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) { + /* allow SIOCSIFALTMTU to set the mtu to 0 */ + error = EINVAL; + goto signal_done; + } + if (isdevmtu) { + new_max = (mtu > (int)ifp->if_mtu) ? mtu : (int)ifp->if_mtu; + } + else { + new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu; + } + old_max = ((int)ifp->if_mtu > ifb->ifb_altmtu) + ? (int)ifp->if_mtu : ifb->ifb_altmtu; + if (new_max != old_max) { + /* we can safely walk the list of port without the lock held */ + bond_unlock(); + error = bond_set_mtu_on_ports(ifb, new_max); + if (error != 0) { + /* try our best to back out of it */ + (void)bond_set_mtu_on_ports(ifb, old_max); + } + bond_lock(); + } + if (error == 0) { + if (isdevmtu) { + ifb->ifb_altmtu = mtu; + } + else { + ifp->if_mtu = mtu; + } + } + + signal_done: + ifbond_signal(ifb, "bond_set_mtu"); + ifbond_release(ifb); + + done: + bond_unlock(); + return (error); +} + +static int +bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * data) +{ + int error = 0; + struct if_bond_req ibr; + struct ifaddr * ifa; + ifbond_ref ifb; + struct ifreq * ifr; + struct ifmediareq64 *ifmr; + struct ifnet * port_ifp = NULL; + user_addr_t user_addr; + + if (ifp->if_type != IFT_IEEE8023ADLAG) { + return (EOPNOTSUPP); + } + ifr = (struct ifreq *)data; + ifa = (struct ifaddr *)data; + + switch (cmd) { + case SIOCSIFADDR: + ifnet_set_flags(ifp, IFF_UP, IFF_UP); + break; + + case SIOCGIFMEDIA64: + case SIOCGIFMEDIA: + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + return (ifb == NULL ? EOPNOTSUPP : EBUSY); + } + ifmr = (struct ifmediareq64 *)data; + ifmr->ifm_current = IFM_ETHER; + ifmr->ifm_mask = 0; + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + ifmr->ifm_count = 1; + if (ifb->ifb_active_lag != NULL) { + ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media; + ifmr->ifm_status |= IFM_ACTIVE; + } + bond_unlock(); + user_addr = (cmd == SIOCGIFMEDIA64) + ? ifmr->ifm_ifmu.ifmu_ulist64 + : CAST_USER_ADDR_T(ifmr->ifm_ifmu.ifmu_ulist32); + if (user_addr != USER_ADDR_NULL) { + error = copyout(&ifmr->ifm_current, + user_addr, + sizeof(int)); + } + break; + + case SIOCSIFMEDIA: + /* XXX send the SIFMEDIA to all children? Or force autoselect? */ + error = EINVAL; + break; + + case SIOCGIFDEVMTU: + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; + break; + } + ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb); + bond_get_mtu_values(ifb, &ifr->ifr_devmtu.ifdm_min, + &ifr->ifr_devmtu.ifdm_max); + bond_unlock(); + break; + + case SIOCGIFALTMTU: + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + error = (ifb == NULL) ? EOPNOTSUPP : EBUSY; + break; + } + ifr->ifr_mtu = ifb->ifb_altmtu; + bond_unlock(); + break; + + case SIOCSIFALTMTU: + error = bond_set_mtu(ifp, ifr->ifr_mtu, 1); + break; + + case SIOCSIFMTU: + error = bond_set_mtu(ifp, ifr->ifr_mtu, 0); + break; + + case SIOCSIFBOND: + user_addr = proc_is64bit(current_proc()) + ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); + error = copyin(user_addr, &ibr, sizeof(ibr)); + if (error) { + break; + } + switch (ibr.ibr_op) { + case IF_BOND_OP_ADD_INTERFACE: + case IF_BOND_OP_REMOVE_INTERFACE: + /* XXX ifunit() needs to return a reference on the ifp */ + port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name); + if (port_ifp == NULL) { + error = ENXIO; + break; + } + if (port_ifp->if_type != IFT_ETHER) { + error = EPROTONOSUPPORT; + break; + } + break; + case IF_BOND_OP_SET_VERBOSE: + break; + default: + error = EOPNOTSUPP; + break; + } + if (error != 0) { + break; + } + switch (ibr.ibr_op) { + case IF_BOND_OP_ADD_INTERFACE: + error = bond_add_interface(ifp, port_ifp); + break; + case IF_BOND_OP_REMOVE_INTERFACE: + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + return (ifb == NULL ? EOPNOTSUPP : EBUSY); + } + error = bond_remove_interface(ifb, port_ifp); + bond_unlock(); + break; + case IF_BOND_OP_SET_VERBOSE: + bond_lock(); + if (g_bond == NULL) { + bond_unlock(); + error = ENXIO; + break; + } + g_bond->verbose = ibr.ibr_ibru.ibru_int_val; + bond_unlock(); + break; + } + break; + + case SIOCGIFBOND: + user_addr = proc_is64bit(current_proc()) + ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); + error = copyin(user_addr, &ibr, sizeof(ibr)); + if (error) { + break; + } + switch (ibr.ibr_op) { + case IF_BOND_OP_GET_STATUS: + break; + default: + error = EOPNOTSUPP; + break; + } + if (error != 0) { + break; + } + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { + bond_unlock(); + return (ifb == NULL ? EOPNOTSUPP : EBUSY); + } + switch (ibr.ibr_op) { + case IF_BOND_OP_GET_STATUS: + error = bond_get_status(ifb, &ibr, user_addr); + break; + } + bond_unlock(); + break; + + case SIOCSIFLLADDR: + error = EOPNOTSUPP; + break; + + case SIOCSIFFLAGS: + /* enable/disable promiscuous mode */ + bond_lock(); + error = bond_set_promisc(ifp); + bond_unlock(); + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + error = bond_setmulti(ifp); + break; + default: + error = EOPNOTSUPP; + } + return error; +} + +static void +bond_if_free(struct ifnet * ifp) +{ + ifbond_ref ifb; + + if (ifp == NULL) { + return; + } + bond_lock(); + ifb = (ifbond_ref)ifp->if_private; + if (ifb == NULL) { + bond_unlock(); + return; + } + ifp->if_private = NULL; + ifbond_release(ifb); + bond_unlock(); + dlil_if_release(ifp); + return; +} + +static void +bond_event(struct ifnet * port_ifp, struct kev_msg * event) +{ + struct ifnet * bond_ifp = NULL; + int event_code = 0; + bondport_ref p; + struct media_info media_info; + + if (event->vendor_code != KEV_VENDOR_APPLE + || event->kev_class != KEV_NETWORK_CLASS + || event->kev_subclass != KEV_DL_SUBCLASS) { + return; + } + switch (event->event_code) { + case KEV_DL_IF_DETACHING: + break; + case KEV_DL_LINK_OFF: + case KEV_DL_LINK_ON: + media_info = interface_media_info(port_ifp); + break; + default: + return; + } + bond_lock(); + p = bond_lookup_port(port_ifp); + if (p == NULL) { + bond_unlock(); + return; + } + switch (event->event_code) { + case KEV_DL_IF_DETACHING: + bond_remove_interface(p->po_bond, p->po_ifp); + break; + case KEV_DL_LINK_OFF: + case KEV_DL_LINK_ON: + p->po_media_info = media_info; + if (p->po_enabled) { + bondport_link_status_changed(p); + } + break; + } + /* generate a link-event */ + if (ifbond_selection(p->po_bond)) { + event_code = (p->po_bond->ifb_active_lag == NULL) + ? KEV_DL_LINK_OFF + : KEV_DL_LINK_ON; + /* XXX need to take a reference on bond_ifp */ + bond_ifp = p->po_bond->ifb_ifp; + } + bond_unlock(); + if (bond_ifp != NULL) { + interface_link_event(bond_ifp, event_code); + } + return; +} + +static void +interface_link_event(struct ifnet * ifp, u_long event_code) +{ + struct { + struct kern_event_msg header; + u_long unit; + char if_name[IFNAMSIZ]; + } event; + + event.header.total_size = sizeof(event); + event.header.vendor_code = KEV_VENDOR_APPLE; + event.header.kev_class = KEV_NETWORK_CLASS; + event.header.kev_subclass = KEV_DL_SUBCLASS; + event.header.event_code = event_code; + event.header.event_data[0] = ifp->if_family; + event.unit = (u_long) ifp->if_unit; + strncpy(event.if_name, ifp->if_name, IFNAMSIZ); + dlil_event(ifp, &event.header); + return; +} + +/* + * Function: bond_attach_protocol + * Purpose: + * Attach a DLIL protocol to the interface. + * + * The ethernet demux special cases to always return PF_BOND if the + * interface is bonded. That means we receive all traffic from that + * interface without passing any of the traffic to any other attached + * protocol. + */ +static int +bond_attach_protocol(struct ifnet *ifp) +{ + int error; + struct dlil_proto_reg_str reg; + + bzero(®, sizeof(reg)); + TAILQ_INIT(®.demux_desc_head); + reg.interface_family = ifp->if_family; + reg.unit_number = ifp->if_unit; + reg.input = bond_input; + reg.event = bond_event; + reg.protocol_family = PF_BOND; + + error = dlil_attach_protocol(®); + if (error) { + printf("bond over %s%d: dlil_attach_protocol failed, %d\n", + ifp->if_name, ifp->if_unit, error); + } + return (error); +} + +/* + * Function: bond_detach_protocol + * Purpose: + * Detach our DLIL protocol from an interface + */ +static int +bond_detach_protocol(struct ifnet *ifp) +{ + int error; + + error = dlil_detach_protocol(ifp, PF_BOND); + if (error) { + printf("bond over %s%d: dlil_detach_protocol failed, %d\n", + ifp->if_name, ifp->if_unit, error); + } + return (error); +} + +/* + * DLIL interface family functions + */ +extern int ether_add_if(struct ifnet *ifp); +extern int ether_del_if(struct ifnet *ifp); +extern int ether_init_if(struct ifnet *ifp); +extern int ether_add_proto_old(struct ifnet *ifp, u_long protocol_family, + struct ddesc_head_str *desc_head); + +extern int ether_attach_inet(struct ifnet *ifp, u_long protocol_family); +extern int ether_detach_inet(struct ifnet *ifp, u_long protocol_family); +extern int ether_attach_inet6(struct ifnet *ifp, u_long protocol_family); +extern int ether_detach_inet6(struct ifnet *ifp, u_long protocol_family); + +__private_extern__ int +bond_family_init(void) +{ + int error=0; + struct dlil_ifmod_reg_str ifmod_reg; + + bzero(&ifmod_reg, sizeof(ifmod_reg)); + ifmod_reg.add_if = ether_add_if; + ifmod_reg.del_if = ether_del_if; + ifmod_reg.init_if = NULL; + ifmod_reg.add_proto = ether_add_proto_old; + ifmod_reg.del_proto = ether_del_proto; + ifmod_reg.ifmod_ioctl = ether_ioctl; + ifmod_reg.shutdown = NULL; + + if (dlil_reg_if_modules(APPLE_IF_FAM_BOND, &ifmod_reg)) { + printf("WARNING: bond_family_init -- " + "Can't register if family modules\n"); + error = EIO; + goto done; + } + + error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_BOND, + ether_attach_inet, + ether_detach_inet); + if (error != 0) { + printf("bond: dlil_reg_proto_module failed for AF_INET6 error=%d\n", + error); + goto done; + } + + error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_BOND, + ether_attach_inet6, + ether_detach_inet6); + if (error != 0) { + printf("bond: dlil_reg_proto_module failed for AF_INET6 error=%d\n", + error); + goto done; + } + bond_clone_attach(); + + done: + return (error); +} +/** + ** + ** LACP routines: + ** + **/ + +/** + ** LACP ifbond_list routines + **/ +static bondport_ref +ifbond_list_find_moved_port(bondport_ref rx_port, + const lacp_actor_partner_tlv_ref atlv) +{ + ifbond_ref bond; + bondport_ref p; + partner_state_ref ps; + LAG_info_ref ps_li; + + TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) { + TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { + + if (rx_port == p) { + /* no point in comparing against ourselves */ + continue; + } + if (p->po_receive_state != ReceiveState_PORT_DISABLED) { + /* it's not clear that we should be checking this */ + continue; + } + ps = &p->po_partner_state; + if (lacp_actor_partner_state_defaulted(ps->ps_state)) { + continue; + } + ps_li = &ps->ps_lag_info; + if (ps->ps_port == lacp_actor_partner_tlv_get_port(atlv) + && bcmp(&ps_li->li_system, atlv->lap_system, + sizeof(ps_li->li_system)) == 0) { + if (g_bond->verbose) { + timestamp_printf("System " EA_FORMAT + " Port 0x%x moved from %s to %s\n", + EA_LIST(&ps_li->li_system), ps->ps_port, + bondport_get_name(p), + bondport_get_name(rx_port)); + } + return (p); + } + } + } + return (NULL); +} + +/** + ** LACP ifbond, LAG routines + **/ + +static int +ifbond_selection(ifbond_ref bond) +{ + int all_ports_ready = 0; + int active_media = 0; + LAG_ref lag = NULL; + int lag_changed = 0; + bondport_ref p; + int port_speed = 0; + + lag = ifbond_find_best_LAG(bond, &active_media); + if (lag != bond->ifb_active_lag) { + if (bond->ifb_active_lag != NULL) { + ifbond_deactivate_LAG(bond, bond->ifb_active_lag); + bond->ifb_active_lag = NULL; + } + bond->ifb_active_lag = lag; + if (lag != NULL) { + ifbond_activate_LAG(bond, lag, active_media); + } + lag_changed = 1; + } + else if (lag != NULL) { + if (lag->lag_active_media != active_media) { + if (g_bond->verbose) { + timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n", + link_speed(lag->lag_active_media), + link_speed(active_media)); + } + ifbond_deactivate_LAG(bond, lag); + ifbond_activate_LAG(bond, lag, active_media); + lag_changed = 1; + } + } + if (lag != NULL) { + port_speed = link_speed(active_media); + all_ports_ready = ifbond_all_ports_ready(bond); + } + TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { + if (lag != NULL && p->po_lag == lag + && media_speed(&p->po_media_info) == port_speed + && (p->po_mux_state == MuxState_DETACHED + || p->po_selected == SelectedState_SELECTED + || p->po_selected == SelectedState_STANDBY) + && bondport_aggregatable(p)) { + if (bond->ifb_max_active > 0) { + if (lag->lag_selected_port_count < bond->ifb_max_active) { + if (p->po_selected == SelectedState_STANDBY + || p->po_selected == SelectedState_UNSELECTED) { + bondport_set_selected(p, SelectedState_SELECTED); + } + } + else if (p->po_selected == SelectedState_UNSELECTED) { + bondport_set_selected(p, SelectedState_STANDBY); + } + } + else { + bondport_set_selected(p, SelectedState_SELECTED); + } + } + if (bondport_flags_selected_changed(p)) { + bondport_flags_clear_selected_changed(p); + bondport_mux_machine(p, LAEventSelectedChange, NULL); + } + if (all_ports_ready + && bondport_flags_ready(p) + && p->po_mux_state == MuxState_WAITING) { + bondport_mux_machine(p, LAEventReady, NULL); + } + bondport_transmit_machine(p, LAEventStart, NULL); + } + return (lag_changed); +} + +static LAG_ref +ifbond_find_best_LAG(ifbond_ref bond, int * active_media) +{ + int best_active = 0; + LAG_ref best_lag = NULL; + int best_count = 0; + int best_speed = 0; + LAG_ref lag; + + if (bond->ifb_active_lag != NULL) { + best_lag = bond->ifb_active_lag; + best_count = LAG_get_aggregatable_port_count(best_lag, &best_active); + if (bond->ifb_max_active > 0 + && best_count > bond->ifb_max_active) { + best_count = bond->ifb_max_active; + } + best_speed = link_speed(best_active); + } + TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) { + int active; + int count; + int speed; + + if (lag == bond->ifb_active_lag) { + /* we've already computed it */ + continue; + } + count = LAG_get_aggregatable_port_count(lag, &active); + if (count == 0) { + continue; + } + if (bond->ifb_max_active > 0 + && count > bond->ifb_max_active) { + /* if there's a limit, don't count extra links */ + count = bond->ifb_max_active; + } + speed = link_speed(active); + if ((count * speed) > (best_count * best_speed)) { + best_count = count; + best_speed = speed; + best_active = active; + best_lag = lag; + } + } + if (best_count == 0) { + return (NULL); + } + *active_media = best_active; + return (best_lag); +} + +static void +ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag) +{ + bondport_ref p; + + TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { + bondport_set_selected(p, SelectedState_UNSELECTED); + } + return; +} + +static void +ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media) +{ + int need = 0; + bondport_ref p; + + if (bond->ifb_max_active > 0) { + need = bond->ifb_max_active; + } + lag->lag_active_media = active_media; + TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { + if (bondport_aggregatable(p) == 0) { + bondport_set_selected(p, SelectedState_UNSELECTED); + } + else if (media_speed(&p->po_media_info) != link_speed(active_media)) { + bondport_set_selected(p, SelectedState_UNSELECTED); + } + else if (p->po_mux_state == MuxState_DETACHED) { + if (bond->ifb_max_active > 0) { + if (need > 0) { + bondport_set_selected(p, SelectedState_SELECTED); + need--; + } + else { + bondport_set_selected(p, SelectedState_STANDBY); + } + } + else { + bondport_set_selected(p, SelectedState_SELECTED); + } + } + else { + bondport_set_selected(p, SelectedState_UNSELECTED); + } + } + return; +} + +#if 0 +static void +ifbond_set_max_active(ifbond_ref bond, int max_active) +{ + LAG_ref lag = bond->ifb_active_lag; + + bond->ifb_max_active = max_active; + if (bond->ifb_max_active <= 0 || lag == NULL) { + return; + } + if (lag->lag_selected_port_count > bond->ifb_max_active) { + bondport_ref p; + int remove_count; + + remove_count = lag->lag_selected_port_count - bond->ifb_max_active; + TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { + if (p->po_selected == SelectedState_SELECTED) { + bondport_set_selected(p, SelectedState_UNSELECTED); + remove_count--; + if (remove_count == 0) { + break; + } + } + } + } + return; +} +#endif 0 + +static int +ifbond_all_ports_ready(ifbond_ref bond) +{ + int ready = 0; + bondport_ref p; + + if (bond->ifb_active_lag == NULL) { + return (0); + } + TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) { + if (p->po_mux_state == MuxState_WAITING + && p->po_selected == SelectedState_SELECTED) { + if (bondport_flags_ready(p) == 0) { + return (0); + } + } + /* note that there was at least one ready port */ + ready = 1; + } + return (ready); +} + +static int +ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port) +{ + bondport_ref p; + + TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) { + if (this_port == p) { + continue; + } + if (bondport_flags_mux_attached(p) == 0) { + return (0); + } + } + return (1); +} + +static LAG_ref +ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p) +{ + LAG_ref lag; + + TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) { + if (bcmp(&lag->lag_info, &p->po_partner_state.ps_lag_info, + sizeof(lag->lag_info)) == 0) { + return (lag); + } + } + return (NULL); +} + +static int +LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media) +{ + int active; + int count; + bondport_ref p; + int speed; + + active = 0; + count = 0; + speed = 0; + TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) { + if (bondport_aggregatable(p)) { + int this_speed; + + this_speed = media_speed(&p->po_media_info); + if (this_speed == 0) { + continue; + } + if (this_speed > speed) { + active = p->po_media_info.mi_active; + speed = this_speed; + count = 1; + } + else if (this_speed == speed) { + count++; + } + } + } + *active_media = active; + return (count); +} + + +/** + ** LACP bondport routines + **/ +static void +bondport_link_status_changed(bondport_ref p) +{ + ifbond_ref bond = p->po_bond; + + if (g_bond->verbose) { + if (media_active(&p->po_media_info)) { + timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n", + bondport_get_name(p), + media_speed(&p->po_media_info), + media_full_duplex(&p->po_media_info) + ? "full" : "half"); + } + else { + timestamp_printf("[%s] Link DOWN\n", bondport_get_name(p)); + } + } + if (media_active(&p->po_media_info) + && bond->ifb_active_lag != NULL + && p->po_lag == bond->ifb_active_lag + && p->po_selected != SelectedState_UNSELECTED) { + if (media_speed(&p->po_media_info) != p->po_lag->lag_active_media) { + if (g_bond->verbose) { + timestamp_printf("[%s] Port speed %d differs from LAG %d\n", + bondport_get_name(p), + media_speed(&p->po_media_info), + link_speed(p->po_lag->lag_active_media)); + } + bondport_set_selected(p, SelectedState_UNSELECTED); + } + } + bondport_receive_machine(p, LAEventMediaChange, NULL); + bondport_mux_machine(p, LAEventMediaChange, NULL); + bondport_periodic_transmit_machine(p, LAEventMediaChange, NULL); + + return; +} + +static int +bondport_aggregatable(bondport_ref p) +{ + partner_state_ref ps = &p->po_partner_state; + + if (lacp_actor_partner_state_aggregatable(p->po_actor_state) == 0 + || lacp_actor_partner_state_aggregatable(ps->ps_state) == 0) { + /* we and/or our partner are individual */ + return (0); + } + if (p->po_lag == NULL) { + return (0); + } + switch (p->po_receive_state) { + default: + if (g_bond->verbose) { + timestamp_printf("[%s] Port is not selectable\n", + bondport_get_name(p)); + } + return (0); + case ReceiveState_CURRENT: + case ReceiveState_EXPIRED: + break; + } + return (1); +} + +static int +bondport_matches_LAG(bondport_ref p, LAG_ref lag) +{ + LAG_info_ref lag_li; + partner_state_ref ps; + LAG_info_ref ps_li; + + ps = &p->po_partner_state; + ps_li = &ps->ps_lag_info; + lag_li = &lag->lag_info; + if (ps_li->li_system_priority == lag_li->li_system_priority + && ps_li->li_key == lag_li->li_key + && (bcmp(&ps_li->li_system, &lag_li->li_system, + sizeof(lag_li->li_system)) + == 0)) { + return (1); + } + return (0); +} + +static int +bondport_remove_from_LAG(bondport_ref p) +{ + int active_lag = 0; + ifbond_ref bond = p->po_bond; + LAG_ref lag = p->po_lag; + + if (lag == NULL) { + return (0); + } + TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list); + if (g_bond->verbose) { + timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT + ",0x%04x)\n", + bondport_get_name(p), + lag->lag_info.li_system_priority, + EA_LIST(&lag->lag_info.li_system), + lag->lag_info.li_key); + } + p->po_lag = NULL; + lag->lag_port_count--; + if (lag->lag_port_count > 0) { + return (bond->ifb_active_lag == lag); + } + if (g_bond->verbose) { + timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT + ",0x%04x)\n", + bond->ifb_key, + lag->lag_info.li_system_priority, + EA_LIST(&lag->lag_info.li_system), + lag->lag_info.li_key); + } + TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list); + if (bond->ifb_active_lag == lag) { + bond->ifb_active_lag = NULL; + active_lag = 1; + } + FREE(lag, M_BOND); + return (active_lag); +} + +static void +bondport_add_to_LAG(bondport_ref p, LAG_ref lag) +{ + TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list); + p->po_lag = lag; + lag->lag_port_count++; + if (g_bond->verbose) { + timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n", + bondport_get_name(p), + lag->lag_info.li_system_priority, + EA_LIST(&lag->lag_info.li_system), + lag->lag_info.li_key); + } + return; +} + +static void +bondport_assign_to_LAG(bondport_ref p) +{ + ifbond_ref bond = p->po_bond; + LAG_ref lag; + + if (lacp_actor_partner_state_defaulted(p->po_actor_state)) { + bondport_remove_from_LAG(p); + return; + } + lag = p->po_lag; + if (lag != NULL) { + if (bondport_matches_LAG(p, lag)) { + /* still OK */ + return; + } + bondport_remove_from_LAG(p); + } + lag = ifbond_get_LAG_matching_port(bond, p); + if (lag != NULL) { + bondport_add_to_LAG(p, lag); + return; + } + lag = (LAG_ref)_MALLOC(sizeof(*lag), M_BOND, M_WAITOK); + TAILQ_INIT(&lag->lag_port_list); + lag->lag_port_count = 0; + lag->lag_selected_port_count = 0; + lag->lag_info = p->po_partner_state.ps_lag_info; + TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list); + if (g_bond->verbose) { + timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT + ",0x%04x)\n", + bond->ifb_key, + lag->lag_info.li_system_priority, + EA_LIST(&lag->lag_info.li_system), + lag->lag_info.li_key); + } + bondport_add_to_LAG(p, lag); + return; +} + +static void +bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p) +{ + bondport_ref moved_port; + + moved_port + = ifbond_list_find_moved_port(p, (const lacp_actor_partner_tlv_ref) + &in_lacpdu_p->la_actor_tlv); + if (moved_port != NULL) { + bondport_receive_machine(moved_port, LAEventPortMoved, NULL); + } + bondport_receive_machine(p, LAEventPacket, in_lacpdu_p); + bondport_mux_machine(p, LAEventPacket, in_lacpdu_p); + bondport_periodic_transmit_machine(p, LAEventPacket, in_lacpdu_p); + return; +} + +static void +bondport_set_selected(bondport_ref p, SelectedState s) +{ + if (s != p->po_selected) { + ifbond_ref bond = p->po_bond; + LAG_ref lag = p->po_lag; + + bondport_flags_set_selected_changed(p); + if (lag != NULL && bond->ifb_active_lag == lag) { + if (p->po_selected == SelectedState_SELECTED) { + lag->lag_selected_port_count--; + } + else if (s == SelectedState_SELECTED) { + lag->lag_selected_port_count++; + } + if (g_bond->verbose) { + timestamp_printf("[%s] SetSelected: %s (was %s)\n", + bondport_get_name(p), + SelectedStateString(s), + SelectedStateString(p->po_selected)); + } + } + } + p->po_selected = s; + return; +} + +/** + ** Receive machine + **/ + +static void +bondport_UpdateDefaultSelected(bondport_ref p) +{ + bondport_set_selected(p, SelectedState_UNSELECTED); + return; +} + +static void +bondport_RecordDefault(bondport_ref p) +{ + bzero(&p->po_partner_state, sizeof(p->po_partner_state)); + p->po_actor_state + = lacp_actor_partner_state_set_defaulted(p->po_actor_state); + bondport_assign_to_LAG(p); + return; +} + +static void +bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p) +{ + lacp_actor_partner_tlv_ref actor; + partner_state_ref ps; + LAG_info_ref ps_li; + + /* compare the PDU's Actor information to our Partner state */ + actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv; + ps = &p->po_partner_state; + ps_li = &ps->ps_lag_info; + if (lacp_actor_partner_tlv_get_port(actor) != ps->ps_port + || (lacp_actor_partner_tlv_get_port_priority(actor) + != ps->ps_port_priority) + || bcmp(actor->lap_system, &ps_li->li_system, sizeof(ps_li->li_system)) + || (lacp_actor_partner_tlv_get_system_priority(actor) + != ps_li->li_system_priority) + || (lacp_actor_partner_tlv_get_key(actor) != ps_li->li_key) + || (lacp_actor_partner_state_aggregatable(actor->lap_state) + != lacp_actor_partner_state_aggregatable(ps->ps_state))) { + bondport_set_selected(p, SelectedState_UNSELECTED); + if (g_bond->verbose) { + timestamp_printf("[%s] updateSelected UNSELECTED\n", + bondport_get_name(p)); + } + } + return; +} + +static void +bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p) +{ + lacp_actor_partner_tlv_ref actor; + ifbond_ref bond = p->po_bond; + int lacp_maintain = 0; + partner_state_ref ps; + lacp_actor_partner_tlv_ref partner; + LAG_info_ref ps_li; + + /* copy the PDU's Actor information into our Partner state */ + actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv; + ps = &p->po_partner_state; + ps_li = &ps->ps_lag_info; + ps->ps_port = lacp_actor_partner_tlv_get_port(actor); + ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(actor); + ps_li->li_system = *((lacp_system_ref)actor->lap_system); + ps_li->li_system_priority + = lacp_actor_partner_tlv_get_system_priority(actor); + ps_li->li_key = lacp_actor_partner_tlv_get_key(actor); + ps->ps_state = lacp_actor_partner_state_set_out_of_sync(actor->lap_state); + p->po_actor_state + = lacp_actor_partner_state_set_not_defaulted(p->po_actor_state); + + /* compare the PDU's Partner information to our own information */ + partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv; + + if (lacp_actor_partner_state_active_lacp(ps->ps_state) + || (lacp_actor_partner_state_active_lacp(p->po_actor_state) + && lacp_actor_partner_state_active_lacp(partner->lap_state))) { + if (g_bond->verbose) { + timestamp_printf("[%s] recordPDU: LACP will maintain\n", + bondport_get_name(p)); + } + lacp_maintain = 1; + } + if ((lacp_actor_partner_tlv_get_port(partner) + == bondport_get_index(p)) + && lacp_actor_partner_tlv_get_port_priority(partner) == p->po_priority + && bcmp(partner->lap_system, &g_bond->system, + sizeof(g_bond->system)) == 0 + && (lacp_actor_partner_tlv_get_system_priority(partner) + == g_bond->system_priority) + && lacp_actor_partner_tlv_get_key(partner) == bond->ifb_key + && (lacp_actor_partner_state_aggregatable(partner->lap_state) + == lacp_actor_partner_state_aggregatable(p->po_actor_state)) + && lacp_actor_partner_state_in_sync(actor->lap_state) + && lacp_maintain) { + ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state); + if (g_bond->verbose) { + timestamp_printf("[%s] recordPDU: LACP partner in sync\n", + bondport_get_name(p)); + } + } + else if (lacp_actor_partner_state_aggregatable(actor->lap_state) == 0 + && lacp_actor_partner_state_in_sync(actor->lap_state) + && lacp_maintain) { + ps->ps_state = lacp_actor_partner_state_set_in_sync(ps->ps_state); + if (g_bond->verbose) { + timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n", + bondport_get_name(p)); + } + } + bondport_assign_to_LAG(p); + return; +} + +static __inline__ lacp_actor_partner_state +updateNTTBits(lacp_actor_partner_state s) +{ + return (s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY + | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT + | LACP_ACTOR_PARTNER_STATE_AGGREGATION + | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION)); +} + +static void +bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p) +{ + ifbond_ref bond = p->po_bond; + lacp_actor_partner_tlv_ref partner; + + /* compare the PDU's Actor information to our Partner state */ + partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv; + if ((lacp_actor_partner_tlv_get_port(partner) != bondport_get_index(p)) + || lacp_actor_partner_tlv_get_port_priority(partner) != p->po_priority + || bcmp(partner->lap_system, &g_bond->system, sizeof(g_bond->system)) + || (lacp_actor_partner_tlv_get_system_priority(partner) + != g_bond->system_priority) + || lacp_actor_partner_tlv_get_key(partner) != bond->ifb_key + || (updateNTTBits(partner->lap_state) + != updateNTTBits(p->po_actor_state))) { + bondport_flags_set_ntt(p); + if (g_bond->verbose) { + timestamp_printf("[%s] updateNTT: Need To Transmit\n", + bondport_get_name(p)); + } + } + return; +} + +static void +bondport_AttachMuxToAggregator(bondport_ref p) +{ + if (bondport_flags_mux_attached(p) == 0) { + if (g_bond->verbose) { + timestamp_printf("[%s] Attached Mux To Aggregator\n", + bondport_get_name(p)); + } + bondport_flags_set_mux_attached(p); + } + return; +} + +static void +bondport_DetachMuxFromAggregator(bondport_ref p) +{ + if (bondport_flags_mux_attached(p)) { + if (g_bond->verbose) { + timestamp_printf("[%s] Detached Mux From Aggregator\n", + bondport_get_name(p)); + } + bondport_flags_clear_mux_attached(p); + } + return; +} + +static void +bondport_enable_distributing(bondport_ref p) +{ + if (bondport_flags_distributing(p) == 0) { + ifbond_ref bond = p->po_bond; + + bond->ifb_distributing_array[bond->ifb_distributing_count++] = p; + if (g_bond->verbose) { + timestamp_printf("[%s] Distribution Enabled\n", + bondport_get_name(p)); + } + bondport_flags_set_distributing(p); + } + return; +} + +static void +bondport_disable_distributing(bondport_ref p) +{ + if (bondport_flags_distributing(p)) { + bondport_ref * array; + ifbond_ref bond; + int count; + int i; + + bond = p->po_bond; + array = bond->ifb_distributing_array; + count = bond->ifb_distributing_count; + for (i = 0; i < count; i++) { + if (array[i] == p) { + int j; + + for (j = i; j < (count - 1); j++) { + array[j] = array[j + 1]; + } + break; + } + } + bond->ifb_distributing_count--; + if (g_bond->verbose) { + timestamp_printf("[%s] Distribution Disabled\n", + bondport_get_name(p)); + } + bondport_flags_clear_distributing(p); + } + return; +} + +/** + ** Receive machine functions + **/ +static void +bondport_receive_machine_initialize(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_receive_machine_expired(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_receive_machine_defaulted(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_receive_machine_current(bondport_ref p, LAEvent event, + void * event_data); + +static void +bondport_receive_machine_event(bondport_ref p, LAEvent event, + void * event_data) +{ + switch (p->po_receive_state) { + case ReceiveState_none: + bondport_receive_machine_initialize(p, LAEventStart, NULL); + break; + case ReceiveState_INITIALIZE: + bondport_receive_machine_initialize(p, event, event_data); + break; + case ReceiveState_PORT_DISABLED: + bondport_receive_machine_port_disabled(p, event, event_data); + break; + case ReceiveState_EXPIRED: + bondport_receive_machine_expired(p, event, event_data); + break; + case ReceiveState_LACP_DISABLED: + bondport_receive_machine_lacp_disabled(p, event, event_data); + break; + case ReceiveState_DEFAULTED: + bondport_receive_machine_defaulted(p, event, event_data); + break; + case ReceiveState_CURRENT: + bondport_receive_machine_current(p, event, event_data); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine(bondport_ref p, LAEvent event, + void * event_data) +{ + switch (event) { + case LAEventPacket: + if (p->po_receive_state != ReceiveState_LACP_DISABLED) { + bondport_receive_machine_current(p, event, event_data); + } + break; + case LAEventMediaChange: + if (media_active(&p->po_media_info)) { + switch (p->po_receive_state) { + case ReceiveState_PORT_DISABLED: + case ReceiveState_LACP_DISABLED: + bondport_receive_machine_port_disabled(p, LAEventMediaChange, NULL); + break; + default: + break; + } + } + else { + bondport_receive_machine_port_disabled(p, LAEventStart, NULL); + } + break; + default: + bondport_receive_machine_event(p, event, event_data); + break; + } + return; +} + +static void +bondport_receive_machine_initialize(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive INITIALIZE\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_INITIALIZE; + bondport_set_selected(p, SelectedState_UNSELECTED); + bondport_RecordDefault(p); + p->po_actor_state + = lacp_actor_partner_state_set_not_expired(p->po_actor_state); + bondport_receive_machine_port_disabled(p, LAEventStart, NULL); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + partner_state_ref ps; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive PORT_DISABLED\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_PORT_DISABLED; + ps = &p->po_partner_state; + ps->ps_state = lacp_actor_partner_state_set_out_of_sync(ps->ps_state); + /* FALL THROUGH */ + case LAEventMediaChange: + if (media_active(&p->po_media_info)) { + if (media_full_duplex(&p->po_media_info)) { + bondport_receive_machine_expired(p, LAEventStart, NULL); + } + else { + bondport_receive_machine_lacp_disabled(p, LAEventStart, NULL); + } + } + else if (p->po_selected == SelectedState_SELECTED) { + struct timeval tv; + + if (g_bond->verbose) { + timestamp_printf("[%s] Receive PORT_DISABLED: " + "link timer started\n", + bondport_get_name(p)); + } + tv.tv_sec = 1; + tv.tv_usec = 0; + devtimer_set_relative(p->po_current_while_timer, tv, + (devtimer_timeout_func) + bondport_receive_machine_port_disabled, + (void *)LAEventTimeout, NULL); + } + else if (p->po_selected == SelectedState_STANDBY) { + bondport_set_selected(p, SelectedState_UNSELECTED); + } + break; + case LAEventTimeout: + if (p->po_selected == SelectedState_SELECTED) { + if (g_bond->verbose) { + timestamp_printf("[%s] Receive PORT_DISABLED: " + "link timer completed, marking UNSELECTED\n", + bondport_get_name(p)); + } + bondport_set_selected(p, SelectedState_UNSELECTED); + } + break; + case LAEventPortMoved: + bondport_receive_machine_initialize(p, LAEventStart, NULL); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine_expired(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + lacp_actor_partner_state s; + struct timeval tv; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive EXPIRED\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_EXPIRED; + s = p->po_partner_state.ps_state; + s = lacp_actor_partner_state_set_out_of_sync(s); + s = lacp_actor_partner_state_set_short_timeout(s); + p->po_partner_state.ps_state = s; + p->po_actor_state + = lacp_actor_partner_state_set_expired(p->po_actor_state); + /* start current_while timer */ + tv.tv_sec = LACP_SHORT_TIMEOUT_TIME; + tv.tv_usec = 0; + devtimer_set_relative(p->po_current_while_timer, tv, + (devtimer_timeout_func) + bondport_receive_machine_expired, + (void *)LAEventTimeout, NULL); + + break; + case LAEventTimeout: + bondport_receive_machine_defaulted(p, LAEventStart, NULL); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + partner_state_ref ps; + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive LACP_DISABLED\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_LACP_DISABLED; + bondport_set_selected(p, SelectedState_UNSELECTED); + bondport_RecordDefault(p); + ps = &p->po_partner_state; + ps->ps_state = lacp_actor_partner_state_set_individual(ps->ps_state); + p->po_actor_state + = lacp_actor_partner_state_set_not_expired(p->po_actor_state); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine_defaulted(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive DEFAULTED\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_DEFAULTED; + bondport_UpdateDefaultSelected(p); + bondport_RecordDefault(p); + p->po_actor_state + = lacp_actor_partner_state_set_not_expired(p->po_actor_state); + break; + default: + break; + } + return; +} + +static void +bondport_receive_machine_current(bondport_ref p, LAEvent event, + void * event_data) +{ + partner_state_ref ps; + struct timeval tv; + + switch (event) { + case LAEventPacket: + devtimer_cancel(p->po_current_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Receive CURRENT\n", + bondport_get_name(p)); + } + p->po_receive_state = ReceiveState_CURRENT; + bondport_UpdateSelected(p, event_data); + bondport_UpdateNTT(p, event_data); + bondport_RecordPDU(p, event_data); + p->po_actor_state + = lacp_actor_partner_state_set_not_expired(p->po_actor_state); + bondport_assign_to_LAG(p); + /* start current_while timer */ + ps = &p->po_partner_state; + if (lacp_actor_partner_state_short_timeout(ps->ps_state)) { + tv.tv_sec = LACP_SHORT_TIMEOUT_TIME; + } + else { + tv.tv_sec = LACP_LONG_TIMEOUT_TIME; + } + tv.tv_usec = 0; + devtimer_set_relative(p->po_current_while_timer, tv, + (devtimer_timeout_func) + bondport_receive_machine_current, + (void *)LAEventTimeout, NULL); + break; + case LAEventTimeout: + bondport_receive_machine_expired(p, LAEventStart, NULL); + break; + default: + break; + } + return; +} + +/** + ** Periodic Transmission machine + **/ + +static void +bondport_periodic_transmit_machine(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + int interval; + partner_state_ref ps; + struct timeval tv; + + switch (event) { + case LAEventStart: + if (g_bond->verbose) { + timestamp_printf("[%s] periodic_transmit Start\n", + bondport_get_name(p)); + } + /* FALL THROUGH */ + case LAEventMediaChange: + devtimer_cancel(p->po_periodic_timer); + p->po_periodic_interval = 0; + if (media_active(&p->po_media_info) == 0 + || media_full_duplex(&p->po_media_info) == 0) { + break; + } + case LAEventPacket: + /* Neither Partner nor Actor are LACP Active, no periodic tx */ + ps = &p->po_partner_state; + if (lacp_actor_partner_state_active_lacp(p->po_actor_state) == 0 + && (lacp_actor_partner_state_active_lacp(ps->ps_state) + == 0)) { + devtimer_cancel(p->po_periodic_timer); + p->po_periodic_interval = 0; + break; + } + if (lacp_actor_partner_state_short_timeout(ps->ps_state)) { + interval = LACP_FAST_PERIODIC_TIME; + } + else { + interval = LACP_SLOW_PERIODIC_TIME; + } + if (p->po_periodic_interval != interval) { + if (interval == LACP_FAST_PERIODIC_TIME + && p->po_periodic_interval == LACP_SLOW_PERIODIC_TIME) { + if (g_bond->verbose) { + timestamp_printf("[%s] periodic_transmit:" + " Need To Transmit\n", + bondport_get_name(p)); + } + bondport_flags_set_ntt(p); + } + p->po_periodic_interval = interval; + tv.tv_usec = 0; + tv.tv_sec = interval; + devtimer_set_relative(p->po_periodic_timer, tv, + (devtimer_timeout_func) + bondport_periodic_transmit_machine, + (void *)LAEventTimeout, NULL); + if (g_bond->verbose) { + timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n", + bondport_get_name(p), + p->po_periodic_interval); + } + } + break; + case LAEventTimeout: + bondport_flags_set_ntt(p); + tv.tv_sec = p->po_periodic_interval; + tv.tv_usec = 0; + devtimer_set_relative(p->po_periodic_timer, tv, (devtimer_timeout_func) + bondport_periodic_transmit_machine, + (void *)LAEventTimeout, NULL); + if (g_bond->verbose > 1) { + timestamp_printf("[%s] Periodic Transmission Timer: %d secs\n", + bondport_get_name(p), p->po_periodic_interval); + } + break; + default: + break; + } + return; +} + +/** + ** Transmit machine + **/ +static int +bondport_can_transmit(bondport_ref p, int32_t current_secs, + long * next_secs) +{ + if (p->po_last_transmit_secs != current_secs) { + p->po_last_transmit_secs = current_secs; + p->po_n_transmit = 0; + } + if (p->po_n_transmit < LACP_PACKET_RATE) { + p->po_n_transmit++; + return (1); + } + if (next_secs != NULL) { + *next_secs = current_secs + 1; + } + return (0); +} + +static void +bondport_transmit_machine(bondport_ref p, LAEvent event, + void * event_data) +{ + lacp_actor_partner_tlv_ref aptlv; + lacp_collector_tlv_ref ctlv; + struct timeval next_tick_time = {0, 0}; + lacpdu_ref out_lacpdu_p; + packet_buffer_ref pkt; + partner_state_ref ps; + LAG_info_ref ps_li; + + switch (event) { + case LAEventTimeout: + case LAEventStart: + if (p->po_periodic_interval == 0 || bondport_flags_ntt(p) == 0) { + break; + } + if (event_data != NULL) { + /* we're going away, transmit the packet no matter what */ + } + else if (bondport_can_transmit(p, devtimer_current_secs(), + &next_tick_time.tv_sec) == 0) { + if (devtimer_enabled(p->po_transmit_timer)) { + if (g_bond->verbose > 0) { + timestamp_printf("[%s] Transmit Timer Already Set\n", + bondport_get_name(p)); + } + } + else { + devtimer_set_absolute(p->po_transmit_timer, next_tick_time, + (devtimer_timeout_func) + bondport_transmit_machine, + (void *)LAEventTimeout, NULL); + if (g_bond->verbose > 0) { + timestamp_printf("[%s] Transmit Timer Deadline %d secs\n", + bondport_get_name(p), + next_tick_time.tv_sec); + } + } + break; + } + if (g_bond->verbose > 0) { + if (event == LAEventTimeout) { + timestamp_printf("[%s] Transmit Timer Complete\n", + bondport_get_name(p)); + } + } + pkt = packet_buffer_allocate(sizeof(*out_lacpdu_p)); + if (pkt == NULL) { + printf("[%s] Transmit: failed to allocate packet buffer\n", + bondport_get_name(p)); + break; + } + out_lacpdu_p = (lacpdu_ref)packet_buffer_byteptr(pkt); + bzero(out_lacpdu_p, sizeof(*out_lacpdu_p)); + out_lacpdu_p->la_subtype = IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP; + out_lacpdu_p->la_version = LACPDU_VERSION_1; + + /* Actor */ + aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_actor_tlv; + aptlv->lap_tlv_type = LACPDU_TLV_TYPE_ACTOR; + aptlv->lap_length = LACPDU_ACTOR_TLV_LENGTH; + *((lacp_system_ref)aptlv->lap_system) = g_bond->system; + lacp_actor_partner_tlv_set_system_priority(aptlv, + g_bond->system_priority); + lacp_actor_partner_tlv_set_port_priority(aptlv, p->po_priority); + lacp_actor_partner_tlv_set_port(aptlv, bondport_get_index(p)); + lacp_actor_partner_tlv_set_key(aptlv, p->po_bond->ifb_key); + aptlv->lap_state = p->po_actor_state; + + /* Partner */ + aptlv = (lacp_actor_partner_tlv_ref)out_lacpdu_p->la_partner_tlv; + aptlv->lap_tlv_type = LACPDU_TLV_TYPE_PARTNER; + aptlv->lap_length = LACPDU_PARTNER_TLV_LENGTH; + ps = &p->po_partner_state; + ps_li = &ps->ps_lag_info; + lacp_actor_partner_tlv_set_port(aptlv, ps->ps_port); + lacp_actor_partner_tlv_set_port_priority(aptlv, ps->ps_port_priority); + *((lacp_system_ref)aptlv->lap_system) = ps_li->li_system; + lacp_actor_partner_tlv_set_system_priority(aptlv, + ps_li->li_system_priority); + lacp_actor_partner_tlv_set_key(aptlv, ps_li->li_key); + aptlv->lap_state = ps->ps_state; + + /* Collector */ + ctlv = (lacp_collector_tlv_ref)out_lacpdu_p->la_collector_tlv; + ctlv->lac_tlv_type = LACPDU_TLV_TYPE_COLLECTOR; + ctlv->lac_length = LACPDU_COLLECTOR_TLV_LENGTH; + + bondport_slow_proto_transmit(p, pkt); + bondport_flags_clear_ntt(p); + if (g_bond->verbose > 0) { + timestamp_printf("[%s] Transmit Packet %d\n", + bondport_get_name(p), p->po_n_transmit); + } + break; + default: + break; + } + return; +} + +/** + ** Mux machine functions + **/ + +static void +bondport_mux_machine_detached(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_mux_machine_waiting(bondport_ref p, LAEvent event, + void * event_data); +static void +bondport_mux_machine_attached(bondport_ref p, LAEvent event, + void * event_data); + +static void +bondport_mux_machine_collecting_distributing(bondport_ref p, LAEvent event, + void * event_data); + +static void +bondport_mux_machine(bondport_ref p, LAEvent event, void * event_data) +{ + switch (p->po_mux_state) { + case MuxState_none: + bondport_mux_machine_detached(p, LAEventStart, NULL); + break; + case MuxState_DETACHED: + bondport_mux_machine_detached(p, event, event_data); + break; + case MuxState_WAITING: + bondport_mux_machine_waiting(p, event, event_data); + break; + case MuxState_ATTACHED: + bondport_mux_machine_attached(p, event, event_data); + break; + case MuxState_COLLECTING_DISTRIBUTING: + bondport_mux_machine_collecting_distributing(p, event, event_data); + break; + default: + break; + } + return; +} + +static void +bondport_mux_machine_detached(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + lacp_actor_partner_state s; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_wait_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Mux DETACHED\n", + bondport_get_name(p)); + } + p->po_mux_state = MuxState_DETACHED; + bondport_flags_clear_ready(p); + bondport_DetachMuxFromAggregator(p); + bondport_disable_distributing(p); + s = p->po_actor_state; + s = lacp_actor_partner_state_set_out_of_sync(s); + s = lacp_actor_partner_state_set_not_collecting(s); + s = lacp_actor_partner_state_set_not_distributing(s); + p->po_actor_state = s; + bondport_flags_set_ntt(p); + break; + case LAEventSelectedChange: + case LAEventPacket: + case LAEventMediaChange: + if (p->po_selected == SelectedState_SELECTED + || p->po_selected == SelectedState_STANDBY) { + bondport_mux_machine_waiting(p, LAEventStart, NULL); + } + break; + default: + break; + } + return; +} + +static void +bondport_mux_machine_waiting(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + struct timeval tv; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_wait_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING\n", + bondport_get_name(p)); + } + p->po_mux_state = MuxState_WAITING; + /* FALL THROUGH */ + default: + case LAEventSelectedChange: + if (p->po_selected == SelectedState_UNSELECTED) { + bondport_mux_machine_detached(p, LAEventStart, NULL); + break; + } + if (p->po_selected == SelectedState_STANDBY) { + devtimer_cancel(p->po_wait_while_timer); + /* wait until state changes to SELECTED */ + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: Standby\n", + bondport_get_name(p)); + } + break; + } + if (bondport_flags_ready(p)) { + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: Port is already ready\n", + bondport_get_name(p)); + } + break; + } + if (devtimer_enabled(p->po_wait_while_timer)) { + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: Timer already set\n", + bondport_get_name(p)); + } + break; + } + if (ifbond_all_ports_attached(p->po_bond, p)) { + devtimer_cancel(p->po_wait_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: No waiting\n", + bondport_get_name(p)); + } + bondport_flags_set_ready(p); + goto no_waiting; + } + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: 2 seconds\n", + bondport_get_name(p)); + } + tv.tv_sec = LACP_AGGREGATE_WAIT_TIME; + tv.tv_usec = 0; + devtimer_set_relative(p->po_wait_while_timer, tv, + (devtimer_timeout_func) + bondport_mux_machine_waiting, + (void *)LAEventTimeout, NULL); + break; + case LAEventTimeout: + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: Ready\n", + bondport_get_name(p)); + } + bondport_flags_set_ready(p); + break; + case LAEventReady: + no_waiting: + if (bondport_flags_ready(p)){ + if (g_bond->verbose) { + timestamp_printf("[%s] Mux WAITING: All Ports Ready\n", + bondport_get_name(p)); + } + bondport_mux_machine_attached(p, LAEventStart, NULL); + break; + } + break; + } + return; +} + +static void +bondport_mux_machine_attached(bondport_ref p, LAEvent event, + __unused void * event_data) +{ + lacp_actor_partner_state s; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_wait_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Mux ATTACHED\n", + bondport_get_name(p)); + } + p->po_mux_state = MuxState_ATTACHED; + bondport_AttachMuxToAggregator(p); + s = p->po_actor_state; + s = lacp_actor_partner_state_set_in_sync(s); + s = lacp_actor_partner_state_set_not_collecting(s); + s = lacp_actor_partner_state_set_not_distributing(s); + bondport_disable_distributing(p); + p->po_actor_state = s; + bondport_flags_set_ntt(p); + /* FALL THROUGH */ + default: + switch (p->po_selected) { + case SelectedState_SELECTED: + s = p->po_partner_state.ps_state; + if (lacp_actor_partner_state_in_sync(s)) { + bondport_mux_machine_collecting_distributing(p, LAEventStart, + NULL); + } + break; + default: + bondport_mux_machine_detached(p, LAEventStart, NULL); + break; + } + break; + } + return; +} + +static void +bondport_mux_machine_collecting_distributing(bondport_ref p, + LAEvent event, + __unused void * event_data) +{ + lacp_actor_partner_state s; + + switch (event) { + case LAEventStart: + devtimer_cancel(p->po_wait_while_timer); + if (g_bond->verbose) { + timestamp_printf("[%s] Mux COLLECTING_DISTRIBUTING\n", + bondport_get_name(p)); + } + p->po_mux_state = MuxState_COLLECTING_DISTRIBUTING; + bondport_enable_distributing(p); + s = p->po_actor_state; + s = lacp_actor_partner_state_set_collecting(s); + s = lacp_actor_partner_state_set_distributing(s); + p->po_actor_state = s; + bondport_flags_set_ntt(p); + /* FALL THROUGH */ + default: + s = p->po_partner_state.ps_state; + if (lacp_actor_partner_state_in_sync(s) == 0) { + bondport_mux_machine_attached(p, LAEventStart, NULL); + break; + } + switch (p->po_selected) { + case SelectedState_UNSELECTED: + case SelectedState_STANDBY: + bondport_mux_machine_attached(p, LAEventStart, NULL); + break; + default: + break; + } + break; + } + return; +} diff --git a/bsd/net/if_bond_var.h b/bsd/net/if_bond_var.h new file mode 100644 index 000000000..f07728fbf --- /dev/null +++ b/bsd/net/if_bond_var.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _NET_IF_BOND_VAR_H_ +#define _NET_IF_BOND_VAR_H_ + +#include + +#include + +#define IF_BOND_OP_ADD_INTERFACE 1 +#define IF_BOND_OP_REMOVE_INTERFACE 2 +#define IF_BOND_OP_GET_STATUS 3 +#define IF_BOND_OP_SET_VERBOSE 4 + +struct if_bond_partner_state { + lacp_system ibps_system; + lacp_system_priority ibps_system_priority; + lacp_key ibps_key; + lacp_port ibps_port; + lacp_port_priority ibps_port_priority; + lacp_actor_partner_state ibps_state; + u_char ibps_reserved1; +}; + +#define IF_BOND_STATUS_SELECTED_STATE_UNSELECTED 0 +#define IF_BOND_STATUS_SELECTED_STATE_SELECTED 1 +#define IF_BOND_STATUS_SELECTED_STATE_STANDBY 2 + +struct if_bond_status { + char ibs_if_name[IFNAMSIZ]; /* interface name */ + lacp_port_priority ibs_port_priority; + lacp_actor_partner_state ibs_state; + u_char ibs_selected_state; + struct if_bond_partner_state ibs_partner_state; + u_int32_t ibs_reserved[8]; +}; + +#define IF_BOND_STATUS_REQ_VERSION 1 + +struct if_bond_status_req { + int ibsr_version; /* version */ + int ibsr_total; /* returned number of struct if_bond_status's */ + int ibsr_count; /* number that will fit in ibsr_buffer */ + union { /* buffer to hold if_bond_status's */ + char * ibsru_buffer32; + u_int64_t ibsru_buffer64; + } ibsr_ibsru; + lacp_key ibsr_key; /* returned */ + u_int16_t ibsr_reserved0; /* for future use */ + u_int32_t ibsr_reserved[3];/* for future use */ +}; + +#if defined(__LP64__) +#define ibsr_buffer ibsr_ibsru.ibsru_buffer64 +#else +#define ibsr_buffer ibsr_ibsru.ibsru_buffer32 +#endif + +struct if_bond_req { + u_int32_t ibr_op; /* operation */ + union { + char ibru_if_name[IFNAMSIZ]; /* interface name */ + struct if_bond_status_req ibru_status; /* status information */ + int ibru_int_val; + } ibr_ibru; +}; + +#ifdef KERNEL_PRIVATE +int bond_family_init(void); +#endif KERNEL_PRIVATE + +#endif /* _NET_IF_BOND_VAR_H_ */ diff --git a/bsd/net/if_disc.c b/bsd/net/if_disc.c index c8c266bc4..b5e751f41 100644 --- a/bsd/net/if_disc.c +++ b/bsd/net/if_disc.c @@ -78,7 +78,7 @@ #define DSMTU 65532 #endif -static void discattach __P((void)); +static void discattach(void); static struct ifnet discif; static int discoutput(struct ifnet *, struct mbuf *, struct sockaddr *, @@ -200,7 +200,7 @@ discioctl(ifp, cmd, data) switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; + ifnet_set_flags(ifp, IFF_UP, IFF_UP); ifa = (struct ifaddr *)data; if (ifa != 0) ifa->ifa_rtrequest = discrtrequest; diff --git a/bsd/net/if_dl.h b/bsd/net/if_dl.h index 16201a909..51e9262e0 100644 --- a/bsd/net/if_dl.h +++ b/bsd/net/if_dl.h @@ -98,14 +98,21 @@ struct sockaddr_dl { }; #define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen)) +#ifdef KERNEL_PRIVATE +#define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen)) +#endif + +#ifdef BSD_KERNEL_PRIVATE +#define SDL(s) ((struct sockaddr_dl *)s) +#endif #ifndef KERNEL #include __BEGIN_DECLS -void link_addr __P((const char *, struct sockaddr_dl *)); -char *link_ntoa __P((const struct sockaddr_dl *)); +void link_addr(const char *, struct sockaddr_dl *); +char *link_ntoa(const struct sockaddr_dl *); __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/net/if_dummy.c b/bsd/net/if_dummy.c index 50ce06568..c9b330fa3 100644 --- a/bsd/net/if_dummy.c +++ b/bsd/net/if_dummy.c @@ -77,7 +77,6 @@ #include #include -#include #include #include @@ -109,12 +108,12 @@ #include "bpfilter.h" -static int dummyioctl __P((struct ifnet *, u_long, caddr_t)); -int dummyoutput __P((struct ifnet *, register struct mbuf *, struct sockaddr *, - register struct rtentry *)); -static void dummyrtrequest __P((int, struct rtentry *, struct sockaddr *)); +static int dummyioctl(struct ifnet *, u_long, caddr_t); +int dummyoutput(struct ifnet *, register struct mbuf *, struct sockaddr *, + register struct rtentry *); +static void dummyrtrequest(int, struct rtentry *, struct sockaddr *); -static void dummyattach __P((void *)); +static void dummyattach(void *); PSEUDO_SET(dummyattach, if_dummy); #if TINY_DUMMYMTU @@ -171,8 +170,6 @@ dummyoutput(ifp, m, dst, rt) struct sockaddr *dst; register struct rtentry *rt; { - int s, isr; - register struct ifqueue *ifq = 0; if ((m->m_flags & M_PKTHDR) == 0) panic("dummyoutput no HDR"); @@ -216,62 +213,9 @@ dummyoutput(ifp, m, dst, rt) } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; - switch (dst->sa_family) { - -#if INET - case AF_INET: - ifq = &ipintrq; - isr = NETISR_IP; - break; -#endif -#if IPX - case AF_IPX: - ifq = &ipxintrq; - isr = NETISR_IPX; - break; -#endif -#if INET6 - case AF_INET6: - ifq = &ip6intrq; - isr = NETISR_IPV6; - break; -#endif -#if NS - case AF_NS: - ifq = &nsintrq; - isr = NETISR_NS; - break; -#endif -#if ISO - case AF_ISO: - ifq = &clnlintrq; - isr = NETISR_ISO; - break; -#endif -#if NETATALK - case AF_APPLETALK: - ifq = &atintrq2; - isr = NETISR_ATALK; - break; -#endif NETATALK - default: - printf("%s: can't handle af%d\n", - if_name(ifp), dst->sa_family); - m_freem(m); - return (EAFNOSUPPORT); - } - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - m_freem(m); - splx(s); - return (ENOBUFS); - } - IF_ENQUEUE(ifq, m); - schednetisr(isr); + proto_inject(dst->sa_family, m); ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - splx(s); return (0); } @@ -311,7 +255,7 @@ dummyioctl(ifp, cmd, data) switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP | IFF_RUNNING; + ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING); ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = dummyrtrequest; /* diff --git a/bsd/net/if_ether.h b/bsd/net/if_ether.h new file mode 100644 index 000000000..a8e1bce0d --- /dev/null +++ b/bsd/net/if_ether.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _NET_IF_ETHER_H +#define _NET_IF_ETHER_H +#ifdef KERNEL + +#include + +__BEGIN_DECLS + +/* + * These functions may be used for an interface emulating an ethernet + * interface and not using IOKit. If you use IOKit and the IOKit + * Ethernet Family, these functions will be set for you. Use these + * functions when filling out the ifnet_init_params structure. + */ +errno_t ether_demux(ifnet_t interface, mbuf_t packet, char* header, + protocol_family_t *protocol); +errno_t ether_add_proto(ifnet_t interface, protocol_family_t protocol, + const struct ifnet_demux_desc *demux_list, + u_int32_t demux_count); +errno_t ether_del_proto(ifnet_t interface, protocol_family_t protocol); +errno_t ether_frameout(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *dest_lladdr, + const char *frame_type); +errno_t ether_ioctl(ifnet_t interface, u_int32_t command, void* data); +errno_t ether_check_multi(ifnet_t ifp, const struct sockaddr *multicast); + +__END_DECLS + +#endif KERNEL +#endif _NET_IF_ETHER_H diff --git a/bsd/net/if_ethersubr.c b/bsd/net/if_ethersubr.c index 0df1345ba..205ec4392 100644 --- a/bsd/net/if_ethersubr.c +++ b/bsd/net/if_ethersubr.c @@ -65,7 +65,6 @@ #include #include -#include #include #include #include @@ -101,47 +100,16 @@ extern struct ifqueue pkintrq; extern u_char etherbroadcastaddr[]; #define senderr(e) do { error = (e); goto bad;} while (0) -#define IFP2AC(IFP) ((struct arpcom *)IFP) /* * Perform common duties while attaching to interface list */ - -/* - IONetworkingFamily should call dlil_if_attach - ether_ifattach becomes obsolete, but remains for - temporary compatibility with third parties extensions -*/ -void -ether_ifattach(ifp) - register struct ifnet *ifp; -{ - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - - ifp->if_name = "en"; - ifp->if_family = APPLE_IF_FAM_ETHERNET; - ifp->if_type = IFT_ETHER; - ifp->if_addrlen = 6; - ifp->if_hdrlen = 14; - ifp->if_mtu = ETHERMTU; - if (ifp->if_baudrate == 0) - ifp->if_baudrate = 10000000; - - dlil_if_attach(ifp); - (void) thread_funnel_set(network_flock, funnel_state); -} - -SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); - int -ether_resolvemulti(ifp, llsa, sa) - struct ifnet *ifp; - struct sockaddr **llsa; - struct sockaddr *sa; +ether_resolvemulti( + struct ifnet *ifp, + struct sockaddr **llsa, + struct sockaddr *sa) { struct sockaddr_dl *sdl; struct sockaddr_in *sin; @@ -232,185 +200,6 @@ ether_resolvemulti(ifp, llsa, sa) } - - - -u_char ether_ipmulticast_min[6] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; -u_char ether_ipmulticast_max[6] = { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff }; -/* - * Add an Ethernet multicast address or range of addresses to the list for a - * given interface. - */ -int -ether_addmulti(ifr, ac) - struct ifreq *ifr; - register struct arpcom *ac; -{ - register struct ether_multi *enm; - struct sockaddr_in *sin; - u_char addrlo[6]; - u_char addrhi[6]; - int s = splimp(); - - switch (ifr->ifr_addr.sa_family) { - - case AF_UNSPEC: - bcopy(ifr->ifr_addr.sa_data, addrlo, 6); - bcopy(addrlo, addrhi, 6); - break; - -#if INET - case AF_INET: - sin = (struct sockaddr_in *)&(ifr->ifr_addr); - if (sin->sin_addr.s_addr == INADDR_ANY) { - /* - * An IP address of INADDR_ANY means listen to all - * of the Ethernet multicast addresses used for IP. - * (This is for the sake of IP multicast routers.) - */ - bcopy(ether_ipmulticast_min, addrlo, 6); - bcopy(ether_ipmulticast_max, addrhi, 6); - } - else { - ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo); - bcopy(addrlo, addrhi, 6); - } - break; -#endif - - default: - splx(s); - return (EAFNOSUPPORT); - } - - /* - * Verify that we have valid Ethernet multicast addresses. - */ - if ((addrlo[0] & 0x01) != 1 || (addrhi[0] & 0x01) != 1) { - splx(s); - return (EINVAL); - } - /* - * See if the address range is already in the list. - */ - ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm); - if (enm != NULL) { - /* - * Found it; just increment the reference count. - */ - ++enm->enm_refcount; - splx(s); - return (0); - } - /* - * New address or range; malloc a new multicast record - * and link it into the interface's multicast list. - */ - enm = (struct ether_multi *)_MALLOC(sizeof(*enm), M_IFMADDR, M_WAITOK); - if (enm == NULL) { - splx(s); - return (ENOBUFS); - } - bcopy(addrlo, enm->enm_addrlo, 6); - bcopy(addrhi, enm->enm_addrhi, 6); - enm->enm_ac = ac; - enm->enm_refcount = 1; - enm->enm_next = ac->ac_multiaddrs; - ac->ac_multiaddrs = enm; - splx(s); - /* - * Return ENETRESET to inform the driver that the list has changed - * and its reception filter should be adjusted accordingly. - */ - return (ENETRESET); -} - -/* - * Delete a multicast address record. - */ -int -ether_delmulti(ifr, ac, ret_mca) - struct ifreq *ifr; - register struct arpcom *ac; - struct ether_addr * ret_mca; -{ - register struct ether_multi *enm; - register struct ether_multi **p; - struct sockaddr_in *sin; - u_char addrlo[6]; - u_char addrhi[6]; - int s = splimp(); - - switch (ifr->ifr_addr.sa_family) { - - case AF_UNSPEC: - bcopy(ifr->ifr_addr.sa_data, addrlo, 6); - bcopy(addrlo, addrhi, 6); - break; - -#if INET - case AF_INET: - sin = (struct sockaddr_in *)&(ifr->ifr_addr); - if (sin->sin_addr.s_addr == INADDR_ANY) { - /* - * An IP address of INADDR_ANY means stop listening - * to the range of Ethernet multicast addresses used - * for IP. - */ - bcopy(ether_ipmulticast_min, addrlo, 6); - bcopy(ether_ipmulticast_max, addrhi, 6); - } - else { - ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo); - bcopy(addrlo, addrhi, 6); - } - break; -#endif - - default: - splx(s); - return (EAFNOSUPPORT); - } - - /* - * Look up the address in our list. - */ - ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm); - if (enm == NULL) { - splx(s); - return (ENXIO); - } - if (--enm->enm_refcount != 0) { - /* - * Still some claims to this record. - */ - splx(s); - return (0); - } - - /* save the low and high address of the range before deletion */ - if (ret_mca) { - *ret_mca = *((struct ether_addr *)addrlo); - *(ret_mca + 1) = *((struct ether_addr *)addrhi); - } - - /* - * No remaining claims to this record; unlink and free it. - */ - for (p = &enm->enm_ac->ac_multiaddrs; - *p != enm; - p = &(*p)->enm_next) - continue; - *p = (*p)->enm_next; - FREE(enm, M_IFMADDR); - splx(s); - /* - * Return ENETRESET to inform the driver that the list has changed - * and its reception filter should be adjusted accordingly. - */ - return (ENETRESET); -} - /* * Convert Ethernet address to printable (loggable) representation. */ diff --git a/bsd/net/if_faith.c b/bsd/net/if_faith.c index 1a33f89d7..5c4c8e487 100644 --- a/bsd/net/if_faith.c +++ b/bsd/net/if_faith.c @@ -80,7 +80,6 @@ #include #include -#include #include #include #include @@ -106,12 +105,12 @@ #include -static int faithioctl __P((struct ifnet *, u_long, void*)); -int faith_pre_output __P((struct ifnet *, register struct mbuf **, struct sockaddr *, - caddr_t, char *, char *, u_long)); -static void faithrtrequest __P((int, struct rtentry *, struct sockaddr *)); +static int faithioctl(struct ifnet *, u_long, void*); +int faith_pre_output(struct ifnet *, register struct mbuf **, + const struct sockaddr *, caddr_t, char *, char *, u_long); +static void faithrtrequest(int, struct rtentry *, struct sockaddr *); -void faithattach __P((void)); +void faithattach(void); #ifndef __APPLE__ PSEUDO_SET(faithattach, if_faith); #endif @@ -196,21 +195,15 @@ int faith_attach_inet(struct ifnet *ifp, u_long *dl_tag) } } + bzero(®, sizeof(reg)); + bzero(&desc, sizeof(desc)); TAILQ_INIT(®.demux_desc_head); desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; desc.native_type = (char *) &native; TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); reg.interface_family = ifp->if_family; reg.unit_number = ifp->if_unit; - reg.input = 0; reg.pre_output = faith_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = 0; - reg.default_proto = 0; reg.protocol_family = PF_INET; stat = dlil_attach_protocol(®, dl_tag); @@ -288,14 +281,12 @@ int faith_pre_output(ifp, m0, dst, route_entry, frame_type, dst_addr, dl_tag) struct ifnet *ifp; register struct mbuf **m0; - struct sockaddr *dst; + const struct sockaddr *dst; caddr_t route_entry; char *frame_type; char *dst_addr; u_long dl_tag; { - int s, isr; - register struct ifqueue *ifq = 0; register struct mbuf *m = *m0; struct rtentry *rt = (struct rtentry*)route_entry; @@ -339,37 +330,10 @@ faith_pre_output(ifp, m0, dst, route_entry, frame_type, dst_addr, dl_tag) } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; - switch (dst->sa_family) { -#if INET - case AF_INET: - ifq = &ipintrq; - isr = NETISR_IP; - break; -#endif -#if INET6 - case AF_INET6: - ifq = &ip6intrq; - isr = NETISR_IPV6; - break; -#endif - default: - return EAFNOSUPPORT; - } - - /* XXX do we need more sanity checks? */ - m->m_pkthdr.rcvif = ifp; - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - splx(s); - return (ENOBUFS); - } - IF_ENQUEUE(ifq, m); - schednetisr(isr); + proto_inject(dst->sa_family, m); ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - splx(s); return (EJUSTRETURN); } @@ -409,7 +373,7 @@ faithioctl(ifp, cmd, data) switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP | IFF_RUNNING; + ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING); ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = faithrtrequest; /* diff --git a/bsd/net/if_faith.h b/bsd/net/if_faith.h index 3e97f5744..953c9b081 100644 --- a/bsd/net/if_faith.h +++ b/bsd/net/if_faith.h @@ -34,13 +34,11 @@ #define _NET_IF_FAITH_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #if INET6 struct in6_addr; -int faithprefix __P((struct in6_addr *)); +int faithprefix(struct in6_addr *); #endif /* INET6 */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_FAITH_H_ */ diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index f3e6be8c9..6db06ef62 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -66,7 +66,6 @@ #include #include -#include #include #include @@ -104,9 +103,9 @@ static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); TAILQ_HEAD(gifhead, gif_softc) gifs = TAILQ_HEAD_INITIALIZER(gifs); #ifdef __APPLE__ -void gifattach __P((void)); -int gif_pre_output __P((struct ifnet *, register struct mbuf **, struct sockaddr *, - caddr_t, char *, char *, u_long)); +void gifattach(void); +int gif_pre_output(struct ifnet *ifp, u_long protocol_family, struct mbuf **m0, + const struct sockaddr *dst, caddr_t rt, char *frame, char *address); static void gif_create_dev(void); static int gif_encapcheck(const struct mbuf*, int, int, void*); @@ -119,20 +118,22 @@ struct protosw in_gif_protosw = { SOCK_RAW, 0, 0/*IPPROTO_IPV[46]*/, PR_ATOMIC|PR_ADDR, in_gif_input, 0, 0, 0, 0, - 0, 0, 0, 0, + 0, 0, 0, 0, 0, - &rip_usrreqs + &rip_usrreqs, + 0, rip_unlock, 0 }; #endif #if INET6 struct ip6protosw in6_gif_protosw = { SOCK_RAW, 0, 0/*IPPROTO_IPV[46]*/, PR_ATOMIC|PR_ADDR, - in6_gif_input, - 0, 0, 0, + in6_gif_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, - &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0, + }; #endif @@ -163,185 +164,88 @@ static int max_gif_nesting = MAX_GIF_NEST; */ /* GIF interface module support */ -int gif_demux(ifp, m, frame_header, proto) - struct ifnet *ifp; - struct mbuf *m; - char *frame_header; - struct if_proto **proto; +int gif_demux( + struct ifnet *ifp, + struct mbuf *m, + char *frame_header, + u_long *protocol_family) { struct gif_softc* gif = (struct gif_softc*)ifp->if_softc; /* Only one protocol may be attached to a gif interface. */ - *proto = gif->gif_proto; + *protocol_family = gif->gif_proto; return 0; } static -int gif_add_if(struct ifnet *ifp) -{ - ifp->if_demux = gif_demux; - ifp->if_framer = 0; - return 0; -} - -static -int gif_del_if(struct ifnet *ifp) -{ - return 0; -} - -static -int gif_add_proto(struct ddesc_head_str *desc_head, struct if_proto *proto, u_long dl_tag) +int gif_add_proto(struct ifnet *ifp, u_long protocol_family, struct ddesc_head_str *desc_head) { /* Only one protocol may be attached at a time */ - struct gif_softc* gif = (struct gif_softc*)proto->ifp; + struct gif_softc* gif = (struct gif_softc*)ifp->if_softc; - if (gif->gif_proto != NULL) + if (gif->gif_proto != 0) printf("gif_add_proto: request add_proto for gif%d\n", gif->gif_if.if_unit); - gif->gif_proto = proto; + gif->gif_proto = protocol_family; return 0; } static -int gif_del_proto(struct if_proto *proto, u_long dl_tag) +int gif_del_proto(struct ifnet *ifp, u_long protocol_family) { - if (((struct gif_softc*)proto->ifp)->gif_proto == proto) - ((struct gif_softc*)proto->ifp)->gif_proto = NULL; + if (((struct gif_softc*)ifp)->gif_proto == protocol_family) + ((struct gif_softc*)ifp)->gif_proto = 0; else return ENOENT; return 0; } -int gif_shutdown() -{ - return 0; -} - -void gif_reg_if_mods() -{ - struct dlil_ifmod_reg_str gif_ifmod; - - bzero(&gif_ifmod, sizeof(gif_ifmod)); - gif_ifmod.add_if = gif_add_if; - gif_ifmod.del_if = gif_del_if; - gif_ifmod.add_proto = gif_add_proto; - gif_ifmod.del_proto = gif_del_proto; - gif_ifmod.ifmod_ioctl = 0; - gif_ifmod.shutdown = gif_shutdown; - - if (dlil_reg_if_modules(APPLE_IF_FAM_GIF, &gif_ifmod)) - panic("Couldn't register gif modules\n"); - -} - /* Glue code to attach inet to a gif interface through DLIL */ - -u_long gif_attach_proto_family(struct ifnet *ifp, int af) +int +gif_attach_proto_family( + struct ifnet *ifp, + u_long protocol_family) { struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - u_long dl_tag=0; - short native=0; int stat; - /* Check if we're already attached */ - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, af, &dl_tag); - if (stat == 0) - return dl_tag; - + bzero(®, sizeof(reg)); TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); reg.interface_family = ifp->if_family; reg.unit_number = ifp->if_unit; reg.input = gif_input; reg.pre_output = gif_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = 0; - reg.default_proto = 0; - reg.protocol_family = af; - - stat = dlil_attach_protocol(®, &dl_tag); - if (stat) { - panic("gif_attach_proto_family can't attach interface fam=%d\n", af); - } - - return dl_tag; -} + reg.protocol_family = protocol_family; -u_long gif_detach_proto_family(struct ifnet *ifp, int af) -{ - u_long ip_dl_tag = 0; - int stat; - - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, af, &ip_dl_tag); - if (stat == 0) { - stat = dlil_detach_protocol(ip_dl_tag); - if (stat) { - printf("WARNING: gif_detach can't detach IP fam=%d from interface\n", af); - } + stat = dlil_attach_protocol(®); + if (stat && stat != EEXIST) { + panic("gif_attach_proto_family can't attach interface fam=%d\n", protocol_family); } - return (stat); -} - -int gif_attach_inet(struct ifnet *ifp, u_long *dl_tag) { - *dl_tag = gif_attach_proto_family(ifp, AF_INET); - return 0; -} - -int gif_detach_inet(struct ifnet *ifp, u_long dl_tag) { - gif_detach_proto_family(ifp, AF_INET); - return 0; -} -int gif_attach_inet6(struct ifnet *ifp, u_long *dl_tag) { - *dl_tag = gif_attach_proto_family(ifp, AF_INET6); - return 0; + return stat; } -int gif_detach_inet6(struct ifnet *ifp, u_long dl_tag) { - gif_detach_proto_family(ifp, AF_INET6); - return 0; -} #endif /* Function to setup the first gif interface */ void gifattach(void) { - struct dlil_protomod_reg_str gif_protoreg; int error; /* Init the list of interfaces */ TAILQ_INIT(&gifs); - gif_reg_if_mods(); /* DLIL modules */ - /* Register protocol registration functions */ - - bzero(&gif_protoreg, sizeof(gif_protoreg)); - gif_protoreg.attach_proto = gif_attach_inet; - gif_protoreg.detach_proto = gif_detach_inet; - - if ( error = dlil_reg_proto_module(AF_INET, APPLE_IF_FAM_GIF, &gif_protoreg) != 0) + if ( error = dlil_reg_proto_module(AF_INET, APPLE_IF_FAM_GIF, gif_attach_proto_family, NULL) != 0) printf("dlil_reg_proto_module failed for AF_INET error=%d\n", error); - - gif_protoreg.attach_proto = gif_attach_inet6; - gif_protoreg.detach_proto = gif_detach_inet6; - if ( error = dlil_reg_proto_module(AF_INET6, APPLE_IF_FAM_GIF, &gif_protoreg) != 0) + if ( error = dlil_reg_proto_module(AF_INET6, APPLE_IF_FAM_GIF, gif_attach_proto_family, NULL) != 0) printf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); - /* Create first device */ gif_create_dev(); } @@ -399,6 +303,7 @@ gif_create_dev(void) } #endif + sc->gif_called = 0; sc->gif_if.if_family= APPLE_IF_FAM_GIF; sc->gif_if.if_mtu = GIF_MTU; sc->gif_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; @@ -406,9 +311,12 @@ gif_create_dev(void) /* turn off ingress filter */ sc->gif_if.if_flags |= IFF_LINK2; #endif + sc->gif_if.if_demux = gif_demux; sc->gif_if.if_ioctl = gif_ioctl; sc->gif_if.if_output = NULL; /* pre_output returns error or EJUSTRETURN */ sc->gif_if.if_type = IFT_GIF; + sc->gif_if.if_add_proto = gif_add_proto; + sc->gif_if.if_del_proto = gif_del_proto; dlil_if_attach(&sc->gif_if); bpfattach(&sc->gif_if, DLT_NULL, sizeof(u_int)); TAILQ_INSERT_TAIL(&gifs, sc, gif_link); @@ -473,20 +381,19 @@ gif_encapcheck(m, off, proto, arg) } int -gif_pre_output(ifp, m0, dst, rt, frame, address, dl_tag) - struct ifnet *ifp; - struct mbuf **m0; - struct sockaddr *dst; - caddr_t rt; - char *frame; - char *address; - u_long dl_tag; +gif_pre_output( + struct ifnet *ifp, + u_long protocol_family, + struct mbuf **m0, + const struct sockaddr *dst, + caddr_t rt, + char *frame, + char *address) { struct gif_softc *sc = (struct gif_softc*)ifp; register struct mbuf * m = *m0; int error = 0; - static int called = 0; /* XXX: MUTEX */ - + /* * gif may cause infinite recursion calls when misconfigured. * We'll prevent this by introducing upper limit. @@ -494,16 +401,16 @@ gif_pre_output(ifp, m0, dst, rt, frame, address, dl_tag) * mutual exclusion of the variable CALLED, especially if we * use kernel thread. */ - if (++called > max_gif_nesting) { + if (++sc->gif_called > max_gif_nesting) { log(LOG_NOTICE, "gif_output: recursively called too many times(%d)\n", - called); + sc->gif_called); m_freem(m); /* free it here not in dlil_output*/ error = EIO; /* is there better errno? */ goto end; } - getmicrotime(&ifp->if_lastchange); + ifnet_touch_lastchange(ifp); m->m_flags &= ~(M_BCAST|M_MCAST); if (!(ifp->if_flags & IFF_UP) || sc->gif_psrc == NULL || sc->gif_pdst == NULL) { @@ -521,11 +428,11 @@ gif_pre_output(ifp, m0, dst, rt, frame, address, dl_tag) * try to free it or keep a pointer a to it). */ struct mbuf m0; - u_int32_t af = dst->sa_family; + u_int32_t protocol_family = dst->sa_family; m0.m_next = m; m0.m_len = 4; - m0.m_data = (char *)⁡ + m0.m_data = (char *)&protocol_family; bpf_mtap(ifp, &m0); } @@ -554,7 +461,7 @@ gif_pre_output(ifp, m0, dst, rt, frame, address, dl_tag) } end: - called = 0; /* reset recursion counter */ + sc->gif_called = 0; /* reset recursion counter */ if (error) { /* the mbuf was freed either by in_gif_output or in here */ *m0 = NULL; /* avoid getting dlil_output freeing it */ @@ -566,16 +473,13 @@ gif_pre_output(ifp, m0, dst, rt, frame, address, dl_tag) } int -gif_input(m, frame_header, gifp, dl_tag, sync_ok) - struct mbuf *m; - char* frame_header; - struct ifnet* gifp; - u_long dl_tag; - int sync_ok; +gif_input( + struct mbuf *m, + char* frame_header, + struct ifnet* gifp, + u_long protocol_family, + int sync_ok) { - int s, isr; - struct ifqueue *ifq = 0; - int af; if (gifp == NULL) { /* just in case */ @@ -583,9 +487,6 @@ gif_input(m, frame_header, gifp, dl_tag, sync_ok) return; } - /* Assume packet is of type of protocol attached to this interface */ - af = ((struct gif_softc*)(gifp->if_softc))->gif_proto->protocol_family; - if (m->m_pkthdr.rcvif) m->m_pkthdr.rcvif = gifp; @@ -598,11 +499,11 @@ gif_input(m, frame_header, gifp, dl_tag, sync_ok) * try to free it or keep a pointer a to it). */ struct mbuf m0; - u_int32_t af1 = af; + u_int32_t protocol_family1 = protocol_family; m0.m_next = m; m0.m_len = 4; - m0.m_data = (char *)&af1; + m0.m_data = (char *)&protocol_family1; bpf_mtap(gifp, &m0); } @@ -619,37 +520,9 @@ gif_input(m, frame_header, gifp, dl_tag, sync_ok) * it occurs more times than we thought, we may change the policy * again. */ - switch (af) { -#if INET - case AF_INET: - ifq = &ipintrq; - isr = NETISR_IP; - break; -#endif -#if INET6 - case AF_INET6: - ifq = &ip6intrq; - isr = NETISR_IPV6; - break; -#endif - default: - m_freem(m); - return (EJUSTRETURN); - } - - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); /* update statistics */ - m_freem(m); - splx(s); - return (EJUSTRETURN); - } - IF_ENQUEUE(ifq, m); - /* we need schednetisr since the address family may change */ - schednetisr(isr); + proto_input(protocol_family, m); gifp->if_ipackets++; gifp->if_ibytes += m->m_pkthdr.len; - splx(s); return (0); } @@ -781,7 +654,8 @@ gif_ioctl(ifp, cmd, data) break; } - TAILQ_FOREACH(ifp2, &ifnet, if_link) { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp2, &ifnet_head, if_link) { if (strcmp(ifp2->if_name, GIFNAME) != 0) continue; sc2 = ifp2->if_softc; @@ -799,6 +673,7 @@ gif_ioctl(ifp, cmd, data) if (bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 && bcmp(sc2->gif_psrc, src, src->sa_len) == 0) { error = EADDRNOTAVAIL; + ifnet_head_done(); goto bad; } #endif @@ -813,16 +688,19 @@ gif_ioctl(ifp, cmd, data) if (dst->sa_family == AF_INET && multidest(dst) && multidest(sc2->gif_pdst)) { error = EADDRNOTAVAIL; + ifnet_head_done(); goto bad; } #if INET6 if (dst->sa_family == AF_INET6 && multidest6(dst) && multidest6(sc2->gif_pdst)) { error = EADDRNOTAVAIL; + ifnet_head_done(); goto bad; } #endif } + ifnet_head_done(); if (sc->gif_psrc) FREE((caddr_t)sc->gif_psrc, M_IFADDR); @@ -838,8 +716,6 @@ gif_ioctl(ifp, cmd, data) ifp->if_flags |= IFF_RUNNING; - gif_attach_proto_family(ifp, src->sa_family); - s = splimp(); if_up(ifp); /* mark interface UP and send up RTM_IFINFO */ #ifdef __APPLE__ @@ -966,6 +842,8 @@ gif_ioctl(ifp, cmd, data) return error; } +#ifndef __APPLE__ +/* This function is not used in our stack */ void gif_delete_tunnel(sc) struct gif_softc *sc; @@ -982,3 +860,4 @@ gif_delete_tunnel(sc) } /* change the IFF_UP flag as well? */ } +#endif diff --git a/bsd/net/if_gif.h b/bsd/net/if_gif.h index a74b84a4e..bfd647244 100644 --- a/bsd/net/if_gif.h +++ b/bsd/net/if_gif.h @@ -61,15 +61,15 @@ #include /* xxx sigh, why route have struct route instead of pointer? */ +#ifdef KERNEL_PRIVATE struct encaptab; -#ifdef __APPLE_API_PRIVATE struct gif_softc { struct ifnet gif_if; /* common area - must be at the top */ struct sockaddr *gif_psrc; /* Physical src addr */ struct sockaddr *gif_pdst; /* Physical dst addr */ #ifdef __APPLE__ - struct if_proto *gif_proto; /* dlil protocol attached */ + u_long gif_proto; /* dlil protocol attached */ #endif union { struct route gifscr_ro; /* xxx */ @@ -78,6 +78,7 @@ struct gif_softc { #endif } gifsc_gifscr; int gif_flags; + int gif_called; const struct encaptab *encap_cookie4; const struct encaptab *encap_cookie6; TAILQ_ENTRY(gif_softc) gif_link; /* all gif's are linked */ @@ -87,18 +88,20 @@ struct gif_softc { #if INET6 #define gif_ro6 gifsc_gifscr.gifscr_ro6 #endif -#endif /* __APPLE_API_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ #define GIF_MTU (1280) /* Default MTU */ #define GIF_MTU_MIN (1280) /* Minimum MTU */ #define GIF_MTU_MAX (8192) /* Maximum MTU */ -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE + /* Prototypes */ -int gif_input __P((struct mbuf *, char*, struct ifnet *, u_long, int)); -int gif_output __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *)); -int gif_ioctl __P((struct ifnet *, u_long, void*)); -#endif /* __APPLE_API_PRIVATE */ +int gif_input(struct mbuf *, char*, struct ifnet *, u_long, int); +int gif_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); +int gif_ioctl(struct ifnet *, u_long, void*); +#endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_GIF_H_ */ diff --git a/bsd/net/if_loop.c b/bsd/net/if_loop.c index 33255ecb1..57914cd96 100644 --- a/bsd/net/if_loop.c +++ b/bsd/net/if_loop.c @@ -70,7 +70,6 @@ #include #include -#include #include #include #include @@ -94,6 +93,7 @@ #endif #include +#include #if NETAT extern struct ifqueue atalkintrq; @@ -107,14 +107,14 @@ extern struct ifqueue atalkintrq; #define NLOOP_ATTACHMENTS (NLOOP * 12) struct lo_statics_str { - int bpf_mode; - int (*bpf_callback)(struct ifnet *, struct mbuf *); + int bpf_mode; + bpf_packet_func bpf_callback; }; -static struct if_proto *lo_array[NLOOP_ATTACHMENTS]; -static struct lo_statics_str lo_statics[NLOOP]; -static lo_count = 0; +void loopattach(void *dummy); +static struct lo_statics_str lo_statics[NLOOP]; +int loopattach_done = 0; /* used to sync ip6_init2 loopback configuration */ #ifdef TINY_LOMTU #define LOMTU (1024+512) @@ -123,102 +123,70 @@ static lo_count = 0; #endif struct ifnet loif[NLOOP]; +struct ifnet *lo_ifp = &loif[0]; -void lo_reg_if_mods(); - +struct loopback_header { + u_long protocol; +}; +void lo_reg_if_mods(void); +/* Local forward declerations */ -int lo_demux(ifp, m, frame_header, proto) - struct ifnet *ifp; - struct mbuf *m; - char *frame_header; - struct if_proto **proto; +static errno_t +lo_demux( + __unused ifnet_t ifp, + __unused mbuf_t m, + char *frame_header, + protocol_family_t *protocol_family) { - int i; - struct if_proto **proto_ptr; - - proto_ptr = mtod(m, struct if_proto **); - *proto = *proto_ptr; - m_adj(m, sizeof(u_long)); - return 0; + struct loopback_header *header = (struct loopback_header *)frame_header; + + *protocol_family = header->protocol; + + return 0; } -int lo_framer(ifp, m, dest, dest_linkaddr, frame_type) - struct ifnet *ifp; - struct mbuf **m; - struct sockaddr *dest; - char *dest_linkaddr; - char *frame_type; - +static errno_t +lo_framer( + __unused ifnet_t ifp, + mbuf_t *m, + __unused const struct sockaddr *dest, + __unused const char *dest_linkaddr, + const char *frame_type) { - char *to_ptr; + struct loopback_header *header; - M_PREPEND(*m, (4 * sizeof(u_long)), M_WAITOK); - to_ptr = mtod(*m, char *); - bcopy(dest_linkaddr, to_ptr, (4 * sizeof(u_long))); + M_PREPEND(*m, sizeof(struct loopback_header), M_WAITOK); + header = mtod(*m, struct loopback_header*); + header->protocol = *(const u_long*)frame_type; return 0; } -static -int lo_add_if(struct ifnet *ifp) -{ - ifp->if_demux = lo_demux; - ifp->if_framer = lo_framer; - ifp->if_event = 0; - return 0; -} - -static -int lo_del_if(struct ifnet *ifp) +static errno_t +lo_add_proto( + __unused struct ifnet *ifp, + __unused u_long protocol_family, + __unused struct ddesc_head_str *demux_desc_head) { return 0; } - - -static -int lo_add_proto(struct ddesc_head_str *desc_head, struct if_proto *proto, u_long dl_tag) +static errno_t +lo_del_proto( + __unused ifnet_t ifp, + __unused protocol_family_t protocol) { - int i; - - for (i=0; i < lo_count; i++) - if (lo_array[i] == 0) { - lo_array[lo_count] = proto; - return 0; - } - - if ((i == lo_count) && (lo_count == NLOOP_ATTACHMENTS)) - panic("lo_add_proto -- Too many attachments\n"); - - lo_array[lo_count++] = proto; - return 0; -} - - -static -int lo_del_proto(struct if_proto *proto, u_long dl_tag) -{ - int i; - - for (i=0; i < lo_count; i++) - if (lo_array[i] == proto) { - lo_array[i] = 0; - return 0; - } - - return ENOENT; + return 0; } static int -lo_output(ifp, m) - struct ifnet *ifp; - register struct mbuf *m; -{ u_int *prepend_ptr; - u_int af; - u_long saved_header[3]; +lo_output( + struct ifnet *ifp, + struct mbuf *m) +{ if ((m->m_flags & M_PKTHDR) == 0) panic("lo_output: no HDR"); @@ -230,20 +198,26 @@ lo_output(ifp, m) */ if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifp; - prepend_ptr = mtod(m, u_int *); - af = *prepend_ptr; - m_adj(m, sizeof(u_int)); + ifp->if_ibytes += m->m_pkthdr.len; + ifp->if_obytes += m->m_pkthdr.len; + + ifp->if_opackets++; + ifp->if_ipackets++; + + m->m_pkthdr.header = mtod(m, char *); + m->m_pkthdr.csum_data = 0xffff; /* loopback checksums are always OK */ + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + m_adj(m, sizeof(struct loopback_header)); #if NBPFILTER > 0 if (lo_statics[ifp->if_unit].bpf_mode != BPF_TAP_DISABLE) { struct mbuf m0, *n; - bcopy(mtod(m, caddr_t), &saved_header[0], (3 * sizeof(u_long))); - m_adj(m, (3 * sizeof(u_long))); - n = m; if (ifp->if_bpf->bif_dlt == DLT_NULL) { + struct loopback_header *header; /* * We need to prepend the address family as * a four byte field. Cons up a dummy header @@ -251,156 +225,70 @@ lo_output(ifp, m) * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ + header = (struct loopback_header*)m->m_pkthdr.header; m0.m_next = m; m0.m_len = 4; - m0.m_data = (char *)⁡ + m0.m_data = (char *)&header->protocol; n = &m0; } - (*lo_statics[ifp->if_unit].bpf_callback)(ifp, n); - - M_PREPEND(m, (3 * sizeof(u_long)), M_WAITOK); - bcopy(&saved_header[0], mtod(m, caddr_t), (3 * sizeof(u_long))); - + lo_statics[ifp->if_unit].bpf_callback(ifp, n); } #endif - ifp->if_ibytes += m->m_pkthdr.len; - ifp->if_obytes += m->m_pkthdr.len; - - ifp->if_opackets++; - ifp->if_ipackets++; - - m->m_pkthdr.header = mtod(m, char *); - m->m_pkthdr.csum_data = 0xffff; /* loopback checksums are always OK */ - m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; return dlil_input(ifp, m, m); } /* - * This is a common pre-output route used by INET, AT, etc. This could + * This is a common pre-output route used by INET and INET6. This could * (should?) be split into separate pre-output routines for each protocol. */ static int -lo_pre_output(ifp, m, dst, route, frame_type, dst_addr, dl_tag) - struct ifnet *ifp; - register struct mbuf **m; - struct sockaddr *dst; - void *route; - char *frame_type; - char *dst_addr; - u_long dl_tag; +lo_pre_output( + __unused struct ifnet *ifp, + u_long protocol_family, + struct mbuf **m, + __unused const struct sockaddr *dst, + caddr_t route, + char *frame_type, + __unused char *dst_addr) { - int s, isr; - register struct ifqueue *ifq = 0; - u_long *prepend_ptr; register struct rtentry *rt = (struct rtentry *) route; - prepend_ptr = (u_long *) dst_addr; if (((*m)->m_flags & M_PKTHDR) == 0) panic("looutput no HDR"); if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - if (rt->rt_flags & RTF_BLACKHOLE) { - m_freem(*m); - return EJUSTRETURN; - } - else - return ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); - } - - switch (dst->sa_family) { -#if INET - case AF_INET: - ifq = &ipintrq; - isr = NETISR_IP; - break; -#endif -#if INET6 - case AF_INET6: - (*m)->m_flags |= M_LOOP; - ifq = &ip6intrq; - isr = NETISR_IPV6; - break; -#endif -#if IPX - case AF_IPX: - ifq = &ipxintrq; - isr = NETISR_IPX; - break; -#endif -#if NS - case AF_NS: - ifq = &nsintrq; - isr = NETISR_NS; - break; -#endif -#if ISO - case AF_ISO: - ifq = &clnlintrq; - isr = NETISR_ISO; - break; -#endif -#if NETAT - case AF_APPLETALK: - ifq = &atalkintrq; - isr = NETISR_APPLETALK; - break; -#endif /* NETAT */ - default: - return (EAFNOSUPPORT); + if (rt->rt_flags & RTF_BLACKHOLE) { + m_freem(*m); + return EJUSTRETURN; + } + else + return ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } - - *prepend_ptr++ = dst->sa_family; /* For lo_output(BPF) */ - *prepend_ptr++ = dlttoproto(dl_tag); /* For lo_demux */ - *prepend_ptr++ = (u_long) ifq; /* For lo_input */ - *prepend_ptr = isr; /* For lo_input */ + + *(u_long *)frame_type = protocol_family; return 0; } - - - /* * lo_input - This should work for all attached protocols that use the * ifq/schednetisr input mechanism. */ - - -int -lo_input(m, fh, ifp, dl_tag, sync_ok) - register struct mbuf *m; - char *fh; - struct ifnet *ifp; - u_long dl_tag; - int sync_ok; - +static int +lo_input( + struct mbuf *m, + __unused char *fh, + __unused struct ifnet *ifp, + __unused u_long protocol_family, + __unused int sync_ok) { - u_long *prepend_ptr; - int s, isr; - register struct ifqueue *ifq = 0; - - prepend_ptr = mtod(m, u_long *); - ifq = (struct ifqueue *) *prepend_ptr++; - isr = *prepend_ptr; - m_adj(m, (2 * sizeof(u_long))); - - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); + if (proto_input(protocol_family, m) != 0) m_freem(m); - splx(s); - return (EJUSTRETURN); - } - - IF_ENQUEUE(ifq, m); - schednetisr(isr); - splx(s); return (0); } @@ -409,10 +297,10 @@ lo_input(m, fh, ifp, dl_tag, sync_ok) /* ARGSUSED */ static void -lortrequest(cmd, rt, sa) - int cmd; - struct rtentry *rt; - struct sockaddr *sa; +lortrequest( + __unused int cmd, + struct rtentry *rt, + __unused struct sockaddr *sa) { if (rt) { rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ @@ -429,8 +317,11 @@ lortrequest(cmd, rt, sa) /* * Process an ioctl request. */ -static int -lo_if_ioctl(struct ifnet *ifp, u_long cmd, void * data) +static errno_t +loioctl( + ifnet_t ifp, + u_int32_t cmd, + void* data) { register struct ifaddr *ifa; register struct ifreq *ifr = (struct ifreq *)data; @@ -439,7 +330,7 @@ lo_if_ioctl(struct ifnet *ifp, u_long cmd, void * data) switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP | IFF_RUNNING; + ifnet_set_flags(ifp, IFF_UP | IFF_RUNNING, IFF_UP | IFF_RUNNING); ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = lortrequest; /* @@ -483,140 +374,49 @@ lo_if_ioctl(struct ifnet *ifp, u_long cmd, void * data) } return (error); } - -static int -loioctl(u_long dl_tag, struct ifnet *ifp, u_long cmd, caddr_t data) -{ - return (lo_if_ioctl(ifp, cmd, data)); -} - #endif /* NLOOP > 0 */ -int lo_shutdown() -{ - return 0; -} - -int lo_attach_inet(struct ifnet *ifp, u_long *dl_tag) +static int lo_attach_proto(struct ifnet *ifp, u_long protocol_family) { - struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - short native=0; - int stat =0 ; - int i; - - for (i=0; i < lo_count; i++) { - if ((lo_array[i]) && (lo_array[i]->ifp == ifp)) { - if (lo_array[i]->protocol_family == PF_INET) { - *dl_tag = lo_array[i]->dl_tag; - return (0); - } - } - } - - TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); - reg.interface_family = ifp->if_family; - reg.unit_number = ifp->if_unit; - reg.input = lo_input; - reg.pre_output = lo_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = loioctl; - reg.default_proto = 0; - reg.protocol_family = PF_INET; - - stat = dlil_attach_protocol(®, dl_tag); - - if (stat) - printf("lo_attach_inet: dlil_attach_protocol returned=%d\n", stat); - - return stat; -} + struct dlil_proto_reg_str reg; + int stat =0 ; + + bzero(®, sizeof(reg)); + TAILQ_INIT(®.demux_desc_head); + reg.interface_family = ifp->if_family; + reg.unit_number = ifp->if_unit; + reg.input = lo_input; + reg.pre_output = lo_pre_output; + reg.protocol_family = protocol_family; + + stat = dlil_attach_protocol(®); -int lo_attach_inet6(struct ifnet *ifp, u_long *dl_tag) -{ - struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - short native=0; - int stat; - int i; - - for (i=0; i < lo_count; i++) { - if ((lo_array[i]) && (lo_array[i]->ifp == ifp)) { - if (lo_array[i]->protocol_family == PF_INET6) { - *dl_tag = lo_array[i]->dl_tag; - return (0); - } + if (stat && stat != EEXIST) { + printf("lo_attach_proto: dlil_attach_protocol for %d returned=%d\n", + protocol_family, stat); } - } - - TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); - reg.interface_family = ifp->if_family; - reg.unit_number = ifp->if_unit; - reg.input = lo_input; - reg.pre_output = lo_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = loioctl; - reg.default_proto = 0; - reg.protocol_family = PF_INET6; - - stat = dlil_attach_protocol(®, dl_tag); - - if (stat) - printf("lo_attach_inet6: dlil_attach_protocol returned=%d\n", stat); - - return stat; + + return stat; } void lo_reg_if_mods() { - struct dlil_ifmod_reg_str lo_ifmod; - struct dlil_protomod_reg_str lo_protoreg; int error; - bzero(&lo_ifmod, sizeof(lo_ifmod)); - lo_ifmod.add_if = lo_add_if; - lo_ifmod.del_if = lo_del_if; - lo_ifmod.add_proto = lo_add_proto; - lo_ifmod.del_proto = lo_del_proto; - lo_ifmod.ifmod_ioctl = 0; - lo_ifmod.shutdown = lo_shutdown; - - if (dlil_reg_if_modules(APPLE_IF_FAM_LOOPBACK, &lo_ifmod)) - panic("Couldn't register lo modules\n"); - /* Register protocol registration functions */ - - bzero(&lo_protoreg, sizeof(lo_protoreg)); - lo_protoreg.attach_proto = lo_attach_inet; - lo_protoreg.detach_proto = NULL; /* no detach function for loopback */ - - if ( error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_LOOPBACK, &lo_protoreg) != 0) + if ((error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0) printf("dlil_reg_proto_module failed for AF_INET error=%d\n", error); - lo_protoreg.attach_proto = lo_attach_inet6; - lo_protoreg.detach_proto = NULL; - - if ( error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_LOOPBACK, &lo_protoreg) != 0) + if ((error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_LOOPBACK, lo_attach_proto, NULL)) != 0) printf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); - } -int lo_set_bpf_tap(struct ifnet *ifp, int mode, int (*bpf_callback)(struct ifnet *, struct mbuf *)) +static errno_t +lo_set_bpf_tap( + ifnet_t ifp, + bpf_tap_mode mode, + bpf_packet_func bpf_callback) { /* @@ -637,32 +437,38 @@ int lo_set_bpf_tap(struct ifnet *ifp, int mode, int (*bpf_callback)(struct ifnet /* ARGSUSED */ void -loopattach(dummy) - void *dummy; +loopattach( + __unused void *dummy) { - register struct ifnet *ifp; - register int i = 0; + struct ifnet *ifp; + int i = 0; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); lo_reg_if_mods(); for (ifp = loif; i < NLOOP; ifp++) { lo_statics[i].bpf_callback = 0; lo_statics[i].bpf_mode = BPF_TAP_DISABLE; + bzero(ifp, sizeof(struct ifnet)); ifp->if_name = "lo"; ifp->if_family = APPLE_IF_FAM_LOOPBACK; ifp->if_unit = i++; ifp->if_mtu = LOMTU; ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST; - ifp->if_ioctl = lo_if_ioctl; + ifp->if_ioctl = loioctl; + ifp->if_demux = lo_demux; + ifp->if_framer = lo_framer; + ifp->if_add_proto = lo_add_proto; + ifp->if_del_proto = lo_del_proto; ifp->if_set_bpf_tap = lo_set_bpf_tap; ifp->if_output = lo_output; ifp->if_type = IFT_LOOP; - ifp->if_hwassist = 0; /* HW cksum on send side breaks Classic loopback */ + ifp->if_hwassist = IF_HWASSIST_CSUM_IP | IF_HWASSIST_CSUM_TCP | IF_HWASSIST_CSUM_UDP; + ifp->if_hdrlen = sizeof(struct loopback_header); + lo_ifp = ifp; dlil_if_attach(ifp); #if NBPFILTER > 0 bpfattach(ifp, DLT_NULL, sizeof(u_int)); #endif } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + loopattach_done = 1; } diff --git a/bsd/net/if_media.c b/bsd/net/if_media.c index ed5ec87ab..f7c4e96dc 100644 --- a/bsd/net/if_media.c +++ b/bsd/net/if_media.c @@ -83,12 +83,12 @@ * Useful for debugging newly-ported drivers. */ -static struct ifmedia_entry *ifmedia_match __P((struct ifmedia *ifm, - int flags, int mask)); +static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm, + int flags, int mask); #ifdef IFMEDIA_DEBUG int ifmedia_debug = 0; -static void ifmedia_printword __P((int)); +static void ifmedia_printword(int); #endif /* @@ -213,11 +213,11 @@ ifmedia_set(ifm, target) * Device-independent media ioctl support function. */ int -ifmedia_ioctl(ifp, ifr, ifm, cmd) - struct ifnet *ifp; - struct ifreq *ifr; - struct ifmedia *ifm; - u_long cmd; +ifmedia_ioctl( + struct ifnet *ifp, + struct ifreq *ifr, + struct ifmedia *ifm, + u_long cmd) { struct ifmedia_entry *match; struct ifmediareq *ifmr = (struct ifmediareq *) ifr; @@ -349,7 +349,7 @@ ifmedia_ioctl(ifp, ifr, ifm, cmd) sticky = error; if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) { error = copyout((caddr_t)kptr, - (caddr_t)ifmr->ifm_ulist, + CAST_USER_ADDR_T(ifmr->ifm_ulist), ifmr->ifm_count * sizeof(int)); } diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h index f17ed30fa..077fc3e69 100644 --- a/bsd/net/if_media.h +++ b/bsd/net/if_media.h @@ -72,16 +72,14 @@ * to implement this interface. */ -#ifdef KERNEL - +#ifdef KERNEL_PRIVATE #include -#ifdef __APPLE_API_UNSTABLE /* * Driver callbacks for media status and change requests. */ -typedef int (*ifm_change_cb_t) __P((struct ifnet *ifp)); -typedef void (*ifm_stat_cb_t) __P((struct ifnet *ifp, struct ifmediareq *req)); +typedef int (*ifm_change_cb_t)(struct ifnet *ifp); +typedef void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req); /* * In-kernel representation of a single supported media type. @@ -107,25 +105,24 @@ struct ifmedia { }; /* Initialize an interface's struct if_media field. */ -void ifmedia_init __P((struct ifmedia *ifm, int dontcare_mask, - ifm_change_cb_t change_callback, ifm_stat_cb_t status_callback)); +void ifmedia_init(struct ifmedia *ifm, int dontcare_mask, + ifm_change_cb_t change_callback, ifm_stat_cb_t status_callback); /* Add one supported medium to a struct ifmedia. */ -void ifmedia_add __P((struct ifmedia *ifm, int mword, int data, void *aux)); +void ifmedia_add(struct ifmedia *ifm, int mword, int data, void *aux); /* Add an array (of ifmedia_entry) media to a struct ifmedia. */ void ifmedia_list_add(struct ifmedia *mp, struct ifmedia_entry *lp, int count); /* Set default media type on initialization. */ -void ifmedia_set __P((struct ifmedia *ifm, int mword)); +void ifmedia_set(struct ifmedia *ifm, int mword); /* Common ioctl function for getting/setting media, called by driver. */ -int ifmedia_ioctl __P((struct ifnet *ifp, struct ifreq *ifr, - struct ifmedia *ifm, u_long cmd)); +int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, + struct ifmedia *ifm, u_long cmd); -#endif /* __APPLE_API_UNSTABLE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ /* * if_media Options word: @@ -156,8 +153,13 @@ int ifmedia_ioctl __P((struct ifnet *ifp, struct ifreq *ifr, #define IFM_10_FL 13 /* 10baseFL - Fiber */ #define IFM_1000_LX 14 /* 1000baseLX - single-mode fiber */ #define IFM_1000_CX 15 /* 1000baseCX - 150ohm STP */ -#define IFM_1000_TX 16 /* 1000baseTX - 4 pair cat 5 */ +#define IFM_1000_T 16 /* 1000baseT - 4 pair cat 5 */ +#ifdef PRIVATE +#define IFM_1000_TX IFM_1000_T /* For compatibility */ +#endif /* PRIVATE */ #define IFM_HPNA_1 17 /* HomePNA 1.0 (1Mb/s) */ +#define IFM_10G_SR 18 /* 10GbaseSR - multi-mode fiber */ +#define IFM_10G_LR 19 /* 10GbaseLR - single-mode fiber */ /* * Token ring @@ -283,8 +285,10 @@ struct ifmedia_description { { IFM_10_FL, "10baseFL" }, \ { IFM_1000_LX, "1000baseLX" }, \ { IFM_1000_CX, "1000baseCX" }, \ - { IFM_1000_TX, "1000baseTX" }, \ + { IFM_1000_T, "1000baseT" }, \ { IFM_HPNA_1, "HomePNA1" }, \ + { IFM_10G_SR, "10GbaseSR" }, \ + { IFM_10G_LR, "10GbaseLR" }, \ { 0, NULL }, \ } @@ -307,8 +311,10 @@ struct ifmedia_description { { IFM_10_FL, "10FL" }, \ { IFM_1000_LX, "1000LX" }, \ { IFM_1000_CX, "1000CX" }, \ - { IFM_1000_TX, "1000TX" }, \ + { IFM_1000_T, "1000T" }, \ { IFM_HPNA_1, "HPNA1" }, \ + { IFM_10G_SR, "10GSR" }, \ + { IFM_10G_LR, "10GLR" }, \ { 0, NULL }, \ } diff --git a/bsd/net/if_mib.c b/bsd/net/if_mib.c index fc94438fe..faa462f44 100644 --- a/bsd/net/if_mib.c +++ b/bsd/net/if_mib.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -85,48 +86,44 @@ */ SYSCTL_DECL(_net_link_generic); -SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW, 0, + +SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RD, 0, "Variables global to all interfaces"); + SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, CTLFLAG_RD, &if_index, 0, "Number of configured interfaces"); -static int -sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ -{ - int *name = (int *)arg1; - int error, ifnlen; - u_int namelen = arg2; - struct ifnet *ifp; - char workbuf[64]; - struct ifmibdata ifmd; +static int sysctl_ifdata SYSCTL_HANDLER_ARGS; +SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RD, + sysctl_ifdata, "Interface table"); - if (namelen != 2) - return EINVAL; +static int sysctl_ifalldata SYSCTL_HANDLER_ARGS; +SYSCTL_NODE(_net_link_generic, IFMIB_IFALLDATA, ifalldata, CTLFLAG_RD, + sysctl_ifalldata, "Interface table"); - if (name[0] <= 0 || name[0] > if_index) - return ENOENT; +static int make_ifmibdata(struct ifnet *, int *, struct sysctl_req *); - ifp = ifnet_addrs[name[0] - 1]->ifa_ifp; +int +make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req) +{ + struct ifmibdata ifmd; + int error = 0; + switch(name[1]) { default: - return ENOENT; + error = ENOENT; + break; case IFDATA_GENERAL: - /* - ifnlen = snprintf(workbuf, sizeof(workbuf), - "%s%d", ifp->if_name, ifp->if_unit); - if(ifnlen + 1 > sizeof ifmd.ifmd_name) { - return ENAMETOOLONG; - } else { - strcpy(ifmd.ifmd_name, workbuf); - } - */ + + snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d", + ifp->if_name, ifp->if_unit); #define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld COPY(pcount); COPY(flags); - COPY(data); + if_data_internal_to_if_data64(&ifp->if_data, &ifmd.ifmd_data); #undef COPY ifmd.ifmd_snd_len = ifp->if_snd.ifq_len; ifmd.ifmd_snd_maxlen = ifp->if_snd.ifq_maxlen; @@ -134,11 +131,12 @@ sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ error = SYSCTL_OUT(req, &ifmd, sizeof ifmd); if (error || !req->newptr) - return error; + break; +#ifdef IF_MIB_WR error = SYSCTL_IN(req, &ifmd, sizeof ifmd); if (error) - return error; + break; #define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld DONTCOPY(type); @@ -154,22 +152,72 @@ sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ ifp->if_snd.ifq_maxlen = ifmd.ifmd_snd_maxlen; ifp->if_snd.ifq_drops = ifmd.ifmd_snd_drops; #undef COPY +#endif /* IF_MIB_WR */ break; case IFDATA_LINKSPECIFIC: error = SYSCTL_OUT(req, ifp->if_linkmib, ifp->if_linkmiblen); if (error || !req->newptr) - return error; + break; +#ifdef IF_MIB_WR error = SYSCTL_IN(req, ifp->if_linkmib, ifp->if_linkmiblen); if (error) - return error; - + break; +#endif /* IF_MIB_WR */ + break; } - return 0; + + return error; +} + +int +sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ +{ + int *name = (int *)arg1; + int error = 0; + u_int namelen = arg2; + struct ifnet *ifp; + + if (namelen != 2) + return EINVAL; + + if (name[0] <= 0 || name[0] > if_index || + (ifp = ifindex2ifnet[name[0]]) == NULL) + return ENOENT; + + ifnet_lock_shared(ifp); + + error = make_ifmibdata(ifp, name, req); + + ifnet_lock_done(ifp); + + return error; } -SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW, - sysctl_ifdata, "Interface table"); +int +sysctl_ifalldata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ +{ + int *name = (int *)arg1; + int error = 0; + u_int namelen = arg2; + struct ifnet *ifp; + + if (namelen != 2) + return EINVAL; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + ifnet_lock_shared(ifp); + + error = make_ifmibdata(ifp, name, req); + + ifnet_lock_done(ifp); + if (error) + break; + } + ifnet_head_done(); + return error; +} #endif diff --git a/bsd/net/if_mib.h b/bsd/net/if_mib.h index dc7417e89..0175e68a9 100644 --- a/bsd/net/if_mib.h +++ b/bsd/net/if_mib.h @@ -54,16 +54,15 @@ #define _NET_IF_MIB_H 1 #include -#ifdef __APPLE_API_UNSTABLE struct ifmibdata { - char ifmd_name[IFNAMSIZ]; /* name of interface */ - int ifmd_pcount; /* number of promiscuous listeners */ - int ifmd_flags; /* interface flags */ - int ifmd_snd_len; /* instantaneous length of send queue */ - int ifmd_snd_maxlen; /* maximum length of send queue */ - int ifmd_snd_drops; /* number of drops in send queue */ - int ifmd_filler[4]; /* for future expansion */ - struct if_data ifmd_data; /* generic information and statistics */ + char ifmd_name[IFNAMSIZ]; /* name of interface */ + unsigned int ifmd_pcount; /* number of promiscuous listeners */ + unsigned int ifmd_flags; /* interface flags */ + unsigned int ifmd_snd_len; /* instantaneous length of send queue */ + unsigned int ifmd_snd_maxlen; /* maximum length of send queue */ + unsigned int ifmd_snd_drops; /* number of drops in send queue */ + unsigned int ifmd_filler[4]; /* for future expansion */ + struct if_data64 ifmd_data; /* generic information and statistics */ }; /* @@ -71,12 +70,15 @@ struct ifmibdata { */ #define IFMIB_SYSTEM 1 /* non-interface-specific */ #define IFMIB_IFDATA 2 /* per-interface data table */ +#define IFMIB_IFALLDATA 3 /* all interfaces data at once */ /* * MIB tags for the various net.link.generic.ifdata tables */ -#define IFDATA_GENERAL 1 /* generic stats for all kinds of ifaces */ -#define IFDATA_LINKSPECIFIC 2 /* specific to the type of interface */ +#define IFDATA_GENERAL 1 /* generic stats for all kinds of ifaces */ +#define IFDATA_LINKSPECIFIC 2 /* specific to the type of interface */ +#define IFDATA_ADDRS 3 /* addresses assigned to interface */ +#define IFDATA_MULTIADDRS 4 /* multicast addresses assigned to interface */ /* * MIB tags at the net.link.generic.system level @@ -105,7 +107,7 @@ struct ifmibdata { */ /* For IFT_ETHER, IFT_ISO88023, and IFT_STARLAN, as used by RFC 1650 */ -struct ifmib_iso_8802_3 { +struct ifs_iso_8802_3 { u_int32_t dot3StatsAlignmentErrors; u_int32_t dot3StatsFCSErrors; u_int32_t dot3StatsSingleCollisionFrames; @@ -190,5 +192,4 @@ enum { * Put other types of interface MIBs here, or in interface-specific * header files if convenient ones already exist. */ -#endif /* __APPLE_API_UNSTABLE */ #endif /* _NET_IF_MIB_H */ diff --git a/bsd/net/if_pppvar.h b/bsd/net/if_pppvar.h index ec688a95b..35628358c 100644 --- a/bsd/net/if_pppvar.h +++ b/bsd/net/if_pppvar.h @@ -68,7 +68,7 @@ #warning if_pppvar.h is not used by the darwin kernel #endif -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * Supported network protocols. These values are used for @@ -85,10 +85,10 @@ struct ppp_softc { /*hi*/ u_int sc_flags; /* control/status bits; see if_ppp.h */ struct callout_handle sc_ch; /* Used for scheduling timeouts */ void *sc_devp; /* pointer to device-dep structure */ - void (*sc_start) __P((struct ppp_softc *)); /* start output proc */ - void (*sc_ctlp) __P((struct ppp_softc *)); /* rcvd control pkt */ - void (*sc_relinq) __P((struct ppp_softc *)); /* relinquish ifunit */ - void (*sc_setmtu) __P((struct ppp_softc *)); /* set mtu */ + void (*sc_start)(struct ppp_softc *); /* start output proc */ + void (*sc_ctlp)(struct ppp_softc *); /* rcvd control pkt */ + void (*sc_relinq)(struct ppp_softc *); /* relinquish ifunit */ + void (*sc_setmtu)(struct ppp_softc *); /* set mtu */ short sc_mru; /* max receive unit */ pid_t sc_xfer; /* used in transferring unit */ /*hi*/ struct ifqueue sc_rawq; /* received packets */ @@ -128,13 +128,13 @@ struct ppp_softc { extern struct ppp_softc ppp_softc[]; -struct ppp_softc *pppalloc __P((pid_t pid)); -void pppdealloc __P((struct ppp_softc *sc)); -int pppioctl __P((struct ppp_softc *sc, u_long cmd, caddr_t data, - int flag, struct proc *p)); -int pppoutput __P((struct ifnet *ifp, struct mbuf *m0, - struct sockaddr *dst, struct rtentry *rtp)); -void ppp_restart __P((struct ppp_softc *sc)); -void ppppktin __P((struct ppp_softc *sc, struct mbuf *m, int lost)); -struct mbuf *ppp_dequeue __P((struct ppp_softc *sc)); +struct ppp_softc *pppalloc(pid_t pid); +void pppdealloc(struct ppp_softc *sc); +int pppioctl(struct ppp_softc *sc, u_long cmd, caddr_t data, + int flag, struct proc *p); +int pppoutput(struct ifnet *ifp, struct mbuf *m0, + struct sockaddr *dst, struct rtentry *rtp); +void ppp_restart(struct ppp_softc *sc); +void ppppktin(struct ppp_softc *sc, struct mbuf *m, int lost); +struct mbuf *ppp_dequeue(struct ppp_softc *sc); #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/net/if_sppp.h b/bsd/net/if_sppp.h deleted file mode 100644 index f0b6435bc..000000000 --- a/bsd/net/if_sppp.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Defines for synchronous PPP/Cisco link level subroutines. - * - * Copyright (C) 1994 Cronyx Ltd. - * Author: Serge Vakulenko, - * - * Heavily revamped to conform to RFC 1661. - * Copyright (C) 1997, Joerg Wunsch. - * - * This software is distributed with NO WARRANTIES, not even the implied - * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * Authors grant any other persons or organizations permission to use - * or modify this software as long as this message is kept with the software, - * all derivative works or modified versions. - * - * From: Version 2.0, Fri Oct 6 20:39:21 MSK 1995 - * - */ - -#ifndef _NET_IF_SPPP_H_ -#define _NET_IF_SPPP_H_ 1 -#include - -#ifndef DONT_WARN_OBSOLETE -#warning if_sppp.h is not used by the darwin kernel -#endif - -#define IDX_LCP 0 /* idx into state table */ - -struct slcp { - u_long opts; /* LCP options to send (bitfield) */ - u_long magic; /* local magic number */ - u_long mru; /* our max receive unit */ - u_long their_mru; /* their max receive unit */ - u_long protos; /* bitmask of protos that are started */ - u_char echoid; /* id of last keepalive echo request */ - /* restart max values, see RFC 1661 */ - int timeout; - int max_terminate; - int max_configure; - int max_failure; -}; - -#define IDX_IPCP 1 /* idx into state table */ - -struct sipcp { - u_long opts; /* IPCP options to send (bitfield) */ - u_int flags; -#define IPCP_HISADDR_SEEN 1 /* have seen his address already */ -#define IPCP_MYADDR_DYN 2 /* my address is dynamically assigned */ -#define IPCP_MYADDR_SEEN 4 /* have seen his address already */ -}; - -#define AUTHNAMELEN 32 -#define AUTHKEYLEN 16 - -struct sauth { - u_short proto; /* authentication protocol to use */ - u_short flags; -#define AUTHFLAG_NOCALLOUT 1 /* do not require authentication on */ - /* callouts */ -#define AUTHFLAG_NORECHALLENGE 2 /* do not re-challenge CHAP */ - u_char name[AUTHNAMELEN]; /* system identification name */ - u_char secret[AUTHKEYLEN]; /* secret password */ - u_char challenge[AUTHKEYLEN]; /* random challenge */ -}; - -#define IDX_PAP 2 -#define IDX_CHAP 3 - -#define IDX_COUNT (IDX_CHAP + 1) /* bump this when adding cp's! */ - -/* - * Don't change the order of this. Ordering the phases this way allows - * for a comparision of ``pp_phase >= PHASE_AUTHENTICATE'' in order to - * know whether LCP is up. - */ -enum ppp_phase { - PHASE_DEAD, PHASE_ESTABLISH, PHASE_TERMINATE, - PHASE_AUTHENTICATE, PHASE_NETWORK -}; - -#ifdef __APPLE_API_PRIVATE -struct sppp { - /* NB: pp_if _must_ be first */ - struct ifnet pp_if; /* network interface data */ - struct ifqueue pp_fastq; /* fast output queue */ - struct ifqueue pp_cpq; /* PPP control protocol queue */ - struct sppp *pp_next; /* next interface in keepalive list */ - u_int pp_flags; /* use Cisco protocol instead of PPP */ - u_short pp_alivecnt; /* keepalive packets counter */ - u_short pp_loopcnt; /* loopback detection counter */ - u_long pp_seq; /* local sequence number */ - u_long pp_rseq; /* remote sequence number */ - enum ppp_phase pp_phase; /* phase we're currently in */ - int state[IDX_COUNT]; /* state machine */ - u_char confid[IDX_COUNT]; /* id of last configuration request */ - int rst_counter[IDX_COUNT]; /* restart counter */ - int fail_counter[IDX_COUNT]; /* negotiation failure counter */ - struct callout_handle ch[IDX_COUNT]; /* per-proto and if callouts */ - struct callout_handle pap_my_to_ch; /* PAP needs one more... */ - struct slcp lcp; /* LCP params */ - struct sipcp ipcp; /* IPCP params */ - struct sauth myauth; /* auth params, i'm peer */ - struct sauth hisauth; /* auth params, i'm authenticator */ - /* - * These functions are filled in by sppp_attach(), and are - * expected to be used by the lower layer (hardware) drivers - * in order to communicate the (un)availability of the - * communication link. Lower layer drivers that are always - * ready to communicate (like hardware HDLC) can shortcut - * pp_up from pp_tls, and pp_down from pp_tlf. - */ - void (*pp_up)(struct sppp *sp); - void (*pp_down)(struct sppp *sp); - /* - * These functions need to be filled in by the lower layer - * (hardware) drivers if they request notification from the - * PPP layer whether the link is actually required. They - * correspond to the tls and tlf actions. - */ - void (*pp_tls)(struct sppp *sp); - void (*pp_tlf)(struct sppp *sp); - /* - * These (optional) functions may be filled by the hardware - * driver if any notification of established connections - * (currently: IPCP up) is desired (pp_con) or any internal - * state change of the interface state machine should be - * signaled for monitoring purposes (pp_chg). - */ - void (*pp_con)(struct sppp *sp); - void (*pp_chg)(struct sppp *sp, int new_state); - /* These two fields are for use by the lower layer */ - void *pp_lowerp; - int pp_loweri; -}; - -#endif /* __APPLE_API_PRIVATE */ - -#define PP_KEEPALIVE 0x01 /* use keepalive protocol */ -#define PP_CISCO 0x02 /* use Cisco protocol instead of PPP */ - /* 0x04 was PP_TIMO */ -#define PP_CALLIN 0x08 /* we are being called */ -#define PP_NEEDAUTH 0x10 /* remote requested authentication */ - - -#define PP_MTU 1500 /* default/minimal MRU */ -#define PP_MAX_MRU 2048 /* maximal MRU we want to negotiate */ - -/* - * Definitions to pass struct sppp data down into the kernel using the - * SIOC[SG]IFGENERIC ioctl interface. - * - * In order to use this, create a struct spppreq, fill in the cmd - * field with SPPPIOGDEFS, and put the address of this structure into - * the ifr_data portion of a struct ifreq. Pass this struct to a - * SIOCGIFGENERIC ioctl. Then replace the cmd field by SPPPIOCDEFS, - * modify the defs field as desired, and pass the struct ifreq now - * to a SIOCSIFGENERIC ioctl. - */ - -#define SPPPIOGDEFS ((caddr_t)(('S' << 24) + (1 << 16) + sizeof(struct sppp))) -#define SPPPIOSDEFS ((caddr_t)(('S' << 24) + (2 << 16) + sizeof(struct sppp))) - -struct spppreq { - int cmd; - struct sppp defs; -}; - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -void sppp_attach (struct ifnet *ifp); -void sppp_detach (struct ifnet *ifp); -void sppp_input (struct ifnet *ifp, struct mbuf *m); -int sppp_ioctl (struct ifnet *ifp, u_long cmd, void *data); -struct mbuf *sppp_dequeue (struct ifnet *ifp); -struct mbuf *sppp_pick(struct ifnet *ifp); -int sppp_isempty (struct ifnet *ifp); -void sppp_flush (struct ifnet *ifp); -#endif /* __APPLE_API_PRIVATE */ -#endif - -#endif /* _NET_IF_SPPP_H_ */ diff --git a/bsd/net/if_stf.c b/bsd/net/if_stf.c index 379550287..6bf96e349 100644 --- a/bsd/net/if_stf.c +++ b/bsd/net/if_stf.c @@ -83,15 +83,12 @@ #include #include #include -#include #include #include #include -#include #include -#include #include #include @@ -118,7 +115,7 @@ struct stf_softc { struct ifnet sc_if; /* common area */ #ifdef __APPLE__ - struct if_proto *stf_proto; /* dlil protocol attached */ + u_long sc_protocol_family; /* dlil protocol attached */ #endif union { struct route __sc_ro4; @@ -131,10 +128,7 @@ struct stf_softc { static struct stf_softc *stf; #ifdef __APPLE__ -void stfattach __P((void)); -int stf_pre_output __P((struct ifnet *, register struct mbuf **, struct sockaddr *, - caddr_t, char *, char *, u_long)); -static u_long stf_dl_tag=0; +void stfattach (void); #endif #ifndef __APPLE__ @@ -142,147 +136,94 @@ static MALLOC_DEFINE(M_STF, "stf", "6to4 Tunnel Interface"); #endif static int ip_stf_ttl = 40; +static void in_stf_input(struct mbuf *, int); extern struct domain inetdomain; struct protosw in_stf_protosw = { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, in_stf_input, 0, 0, rip_ctloutput, 0, - 0, 0, 0, 0, 0, - &rip_usrreqs + &rip_usrreqs, + 0, rip_unlock, 0 }; -static int stf_encapcheck __P((const struct mbuf *, int, int, void *)); -static struct in6_ifaddr *stf_getsrcifa6 __P((struct ifnet *)); -int stf_pre_output __P((struct ifnet *, register struct mbuf **, struct sockaddr *, - caddr_t, char *, char *, u_long)); -static int stf_checkaddr4 __P((struct stf_softc *, struct in_addr *, - struct ifnet *)); -static int stf_checkaddr6 __P((struct stf_softc *, struct in6_addr *, - struct ifnet *)); -static void stf_rtrequest __P((int, struct rtentry *, struct sockaddr *)); -int stf_ioctl __P((struct ifnet *, u_long, void *)); - +static int stf_encapcheck(const struct mbuf *, int, int, void *); +static struct in6_ifaddr *stf_getsrcifa6(struct ifnet *); +int stf_pre_output(struct ifnet *, u_long, register struct mbuf **, + const struct sockaddr *, caddr_t, char *, char *); +static int stf_checkaddr4(struct stf_softc *, struct in_addr *, + struct ifnet *); +static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, + struct ifnet *); +static void stf_rtrequest(int, struct rtentry *, struct sockaddr *); +int stf_ioctl(struct ifnet *, u_long, void *); static -int stf_add_if(struct ifnet *ifp) -{ - ifp->if_demux = 0; - ifp->if_framer = 0; - return 0; -} - -static -int stf_del_if(struct ifnet *ifp) -{ - return 0; -} - -static -int stf_add_proto(struct ddesc_head_str *desc_head, struct if_proto *proto, u_long dl_tag) +int stf_add_proto( + struct ifnet *ifp, + u_long protocol_family, + struct ddesc_head_str *desc_head) { /* Only one protocol may be attached at a time */ - struct stf_softc* stf = (struct stf_softc*)proto->ifp; - if (stf->stf_proto == NULL) - stf->stf_proto = proto; + struct stf_softc* stf = (struct stf_softc*)ifp; + if (stf->sc_protocol_family == 0) + stf->sc_protocol_family = protocol_family; else { printf("stf_add_proto: stf already has a proto\n"); - return (EBUSY); + return EBUSY; } - - return (0); + + return 0; } static -int stf_del_proto(struct if_proto *proto, u_long dl_tag) +int stf_del_proto( + struct ifnet *ifp, + u_long protocol_family) { - if (((struct stf_softc*)proto->ifp)->stf_proto == proto) - ((struct stf_softc*)proto->ifp)->stf_proto = NULL; + if (((struct stf_softc*)ifp)->sc_protocol_family == protocol_family) + ((struct stf_softc*)ifp)->sc_protocol_family = 0; else return ENOENT; return 0; } -int stf_shutdown() -{ - return 0; -} - -int stf_attach_inet6(struct ifnet *ifp, u_long *dl_tag) +static int +stf_attach_inet6(struct ifnet *ifp, u_long protocol_family) { struct dlil_proto_reg_str reg; - struct dlil_demux_desc desc; - short native=0; int stat, i; - if (stf_dl_tag != 0) { - *dl_tag = stf_dl_tag; - return 0; - } - + bzero(®, sizeof(reg)); TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); reg.interface_family = ifp->if_family; reg.unit_number = ifp->if_unit; - reg.input = 0; reg.pre_output = stf_pre_output; - reg.event = 0; - reg.offer = 0; - reg.ioctl = 0; - reg.default_proto = 0; reg.protocol_family = PF_INET6; - stat = dlil_attach_protocol(®, &stf_dl_tag); - *dl_tag = stf_dl_tag; + stat = dlil_attach_protocol(®); return stat; } -int stf_detach_inet6(struct ifnet *ifp, u_long dl_tag) +static int +stf_demux( + struct ifnet *ifp, + struct mbuf *m, + char *frame_ptr, + u_long *protocol_family) { - int stat; - - stat = dlil_find_dltag(ifp->if_family, ifp->if_unit, AF_INET6, &dl_tag); - if (stat == 0) { - stat = dlil_detach_protocol(dl_tag); - if (stat) { - printf("WARNING: stf_detach can't detach IP AF_INET6 from interface\n"); - } - } - return (stat); + *protocol_family = PF_INET6; + return 0; } void stf_reg_if_mods() { - struct dlil_ifmod_reg_str stf_ifmod; - struct dlil_protomod_reg_str stf_protoreg; int error; - bzero(&stf_ifmod, sizeof(stf_ifmod)); - stf_ifmod.add_if = stf_add_if; - stf_ifmod.del_if = stf_del_if; - stf_ifmod.add_proto = stf_add_proto; - stf_ifmod.del_proto = stf_del_proto; - stf_ifmod.ifmod_ioctl = 0; - stf_ifmod.shutdown = stf_shutdown; - - - if (dlil_reg_if_modules(APPLE_IF_FAM_STF, &stf_ifmod)) - panic("Couldn't register stf modules\n"); - /* Register protocol registration functions */ - - bzero(&stf_protoreg, sizeof(stf_protoreg)); - stf_protoreg.attach_proto = stf_attach_inet6; - stf_protoreg.detach_proto = stf_detach_inet6; - - if ( error = dlil_reg_proto_module(AF_INET6, APPLE_IF_FAM_STF, &stf_protoreg) != 0) + if ( error = dlil_reg_proto_module(AF_INET6, APPLE_IF_FAM_STF, stf_attach_inet6, NULL) != 0) kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", error); } @@ -292,8 +233,6 @@ stfattach(void) struct ifnet *ifp; struct stf_softc *sc; int i, error; - - int err; const struct encaptab *p; @@ -323,6 +262,9 @@ stfattach(void) sc->sc_if.if_output = NULL; /* processing done in pre_output */ sc->sc_if.if_type = IFT_STF; sc->sc_if.if_family= APPLE_IF_FAM_STF; + sc->sc_if.if_add_proto = stf_add_proto; + sc->sc_if.if_del_proto = stf_del_proto; + sc->sc_if.if_demux = stf_demux; #if 0 /* turn off ingress filter */ sc->sc_if.if_flags |= IFF_LINK2; @@ -409,6 +351,7 @@ stf_getsrcifa6(ifp) struct sockaddr_in6 *sin6; struct in_addr in; + ifnet_lock_shared(ifp); for (ia = ifp->if_addrlist.tqh_first; ia; ia = ia->ifa_list.tqe_next) @@ -422,6 +365,7 @@ stf_getsrcifa6(ifp) continue; bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); + lck_mtx_lock(rt_mtx); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) @@ -429,24 +373,27 @@ stf_getsrcifa6(ifp) if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; } + lck_mtx_unlock(rt_mtx); if (ia4 == NULL) continue; + ifnet_lock_done(ifp); return (struct in6_ifaddr *)ia; } + ifnet_lock_done(ifp); return NULL; } int -stf_pre_output(ifp, m0, dst, rt, frame_type, address, dl_tag) - struct ifnet *ifp; - register struct mbuf **m0; - struct sockaddr *dst; - caddr_t rt; - char *frame_type; - char *address; - u_long dl_tag; +stf_pre_output( + struct ifnet *ifp, + u_long protocol_family, + register struct mbuf **m0, + const struct sockaddr *dst, + caddr_t rt, + char *frame_type, + char *address) { register struct mbuf *m = *m0; struct stf_softc *sc; @@ -506,14 +453,14 @@ stf_pre_output(ifp, m0, dst, rt, frame_type, address, dl_tag) * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ - struct mbuf m0; + struct mbuf m1; u_int32_t af = AF_INET6; - m0.m_next = m; - m0.m_len = 4; - m0.m_data = (char *)⁡ + m1.m_next = m; + m1.m_len = 4; + m1.m_data = (char *)⁡ - bpf_mtap(ifp, &m0); + bpf_mtap(ifp, &m1); } M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); @@ -544,7 +491,7 @@ stf_pre_output(ifp, m0, dst, rt, frame_type, address, dl_tag) dst4->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr)); if (sc->sc_ro.ro_rt) { - RTFREE(sc->sc_ro.ro_rt); + rtfree(sc->sc_ro.ro_rt); sc->sc_ro.ro_rt = NULL; } } @@ -559,6 +506,8 @@ stf_pre_output(ifp, m0, dst, rt, frame_type, address, dl_tag) error = ip_output(m, NULL, &sc->sc_ro, 0, NULL); if (error == 0) return EJUSTRETURN; + + return error; } static int @@ -583,15 +532,19 @@ stf_checkaddr4(sc, in, inifp) /* * reject packets with broadcast */ + lck_mtx_lock(rt_mtx); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; - if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) + if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { + lck_mtx_unlock(rt_mtx); return -1; + } } + lck_mtx_unlock(rt_mtx); /* * perform ingress filter @@ -645,7 +598,7 @@ stf_checkaddr6(sc, in6, inifp) return 0; } -void +static void in_stf_input(m, off) struct mbuf *m; int off; @@ -654,8 +607,7 @@ in_stf_input(m, off) struct ip *ip; struct ip6_hdr *ip6; u_int8_t otos, itos; - int s, isr, proto; - struct ifqueue *ifq = NULL; + int proto; struct ifnet *ifp; ip = mtod(m, struct ip *); @@ -746,21 +698,9 @@ in_stf_input(m, off) * See net/if_gif.c for possible issues with packet processing * reorder due to extra queueing. */ - ifq = &ip6intrq; - isr = NETISR_IPV6; - - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); /* update statistics */ - m_freem(m); - splx(s); - return; - } - IF_ENQUEUE(ifq, m); - schednetisr(isr); + proto_input(PF_INET6, m); ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; - splx(s); return; } @@ -798,8 +738,11 @@ stf_ioctl(ifp, cmd, data) } sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) { - ifa->ifa_rtrequest = stf_rtrequest; - ifp->if_flags |= IFF_UP; + if ( !(ifnet_flags( ifp ) & IFF_UP) ) { + /* do this only if the interface is not already up */ + ifa->ifa_rtrequest = stf_rtrequest; + ifnet_set_flags(ifp, IFF_UP, IFF_UP); + } } else error = EINVAL; break; diff --git a/bsd/net/if_stf.h b/bsd/net/if_stf.h deleted file mode 100644 index c374f426d..000000000 --- a/bsd/net/if_stf.h +++ /dev/null @@ -1,41 +0,0 @@ -/* $FreeBSD: src/sys/net/if_stf.h,v 1.1.2.1 2000/07/15 07:14:29 kris Exp $ */ -/* $KAME: if_stf.h,v 1.3 2000/03/25 07:23:33 sumikawa Exp $ */ - -/* - * Copyright (C) 2000 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _NET_IF_STF_H_ -#define _NET_IF_STF_H_ -#include - -#ifdef __APPLE_API_PRIVATE -void in_stf_input __P((struct mbuf *, int)); -#endif /* __APPLE_API_PRIVATE */ - -#endif /* _NET_IF_STF_H_ */ diff --git a/bsd/net/if_tun.c b/bsd/net/if_tun.c deleted file mode 100644 index 8ed3afd27..000000000 --- a/bsd/net/if_tun.c +++ /dev/null @@ -1,764 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ - -/* - * Copyright (c) 1988, Julian Onions - * Nottingham University 1987. - * - * This source may be freely distributed, however I would be interested - * in any changes that are made. - * - * This driver takes packets off the IP i/f and hands them up to a - * user process to have its wicked way with. This driver has it's - * roots in a similar driver written by Phil Cockcroft (formerly) at - * UCL. This driver is based much more on read/write/poll mode of - * operation though. - */ - -#include "tun.h" -#if NTUN > 0 - -#include "opt_devfs.h" -#include "opt_inet.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if DEVFS -#include -#endif /*DEVFS*/ -#include -#include -#include - -#include -#include -#include -#include - -#if INET -#include -#include -#endif - -#if INET6 -#include -#include -#include -#endif /* INET6 */ - -#include "bpfilter.h" -#if NBPFILTER > 0 -#include -#endif - -#include -#include - -static void tunattach __P((void *)); -PSEUDO_SET(tunattach, if_tun); - -#define TUNDEBUG if (tundebug) printf -static int tundebug = 0; -SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); - -static struct tun_softc tunctl[NTUN]; - -static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *rt)); -static int tunifioctl __P((struct ifnet *, u_long, caddr_t)); -static int tuninit __P((int, int, u_char)); - -static d_open_t tunopen; -static d_close_t tunclose; -static d_read_t tunread; -static d_write_t tunwrite; -static d_ioctl_t tunioctl; -static d_poll_t tunpoll; - -#define CDEV_MAJOR 52 -static struct cdevsw tun_cdevsw = { - tunopen, tunclose, tunread, tunwrite, - tunioctl, nullstop, noreset, nodevtotty, - tunpoll, nommap, nostrategy, "tun", NULL, -1 -}; - - -static int tun_devsw_installed; -#if DEVFS -static void *tun_devfs_token[NTUN]; -#endif - -#define minor_val(n) ((((n) & ~0xff) << 8) | ((n) & 0xff)) -#define dev_val(n) (((n) >> 8) | ((n) & 0xff)) - -static void -tunattach(dummy) - void *dummy; -{ - register int i; - struct ifnet *ifp; - dev_t dev; - - if ( tun_devsw_installed ) - return; - dev = makedev(CDEV_MAJOR, 0); - cdevsw_add(&dev, &tun_cdevsw, NULL); - tun_devsw_installed = 1; - for ( i = 0; i < NTUN; i++ ) { -#if DEVFS - tun_devfs_token[i] = devfs_add_devswf(&tun_cdevsw, minor_val(i), - DV_CHR, UID_UUCP, - GID_DIALER, 0600, - "tun%d", i); -#endif - tunctl[i].tun_flags = TUN_INITED; - - ifp = &tunctl[i].tun_if; - ifp->if_unit = i; - ifp->if_name = "tun"; - ifp->if_family = APPLE_IF_FAM_TUN; - ifp->if_mtu = TUNMTU; - ifp->if_ioctl = tunifioctl; - ifp->if_output = tunoutput; - ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; - ifp->if_type = IFT_PPP; /* necessary init value for IPv6 lladdr auto conf */ - ifp->if_snd.ifq_maxlen = ifqmaxlen; - if_attach(ifp); -#if NBPFILTER > 0 - bpfattach(ifp, DLT_NULL, sizeof(u_int)); -#endif - } -} - -/* - * tunnel open - must be superuser & the device must be - * configured in - */ -static int -tunopen(dev, flag, mode, p) - dev_t dev; - int flag, mode; - struct proc *p; -{ - struct ifnet *ifp; - struct tun_softc *tp; - register int unit, error; - - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); - - if ((unit = dev_val(minor(dev))) >= NTUN) - return (ENXIO); - tp = &tunctl[unit]; - if (tp->tun_flags & TUN_OPEN) - return EBUSY; - ifp = &tp->tun_if; - tp->tun_flags |= TUN_OPEN; - TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit); - return (0); -} - -/* - * tunclose - close the device - mark i/f down & delete - * routing info - */ -static int -tunclose(dev, foo, bar, p) - dev_t dev; - int foo; - int bar; - struct proc *p; -{ - register int unit = dev_val(minor(dev)), s; - struct tun_softc *tp = &tunctl[unit]; - struct ifnet *ifp = &tp->tun_if; - struct mbuf *m; - - tp->tun_flags &= ~TUN_OPEN; - - /* - * junk all pending output - */ - do { - s = splimp(); - IF_DEQUEUE(&ifp->if_snd, m); - splx(s); - if (m) - m_freem(m); - } while (m); - - if (ifp->if_flags & IFF_UP) { - s = splimp(); - if_down(ifp); - if (ifp->if_flags & IFF_RUNNING) { - /* find internet addresses and delete routes */ - register struct ifaddr *ifa; - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { - switch (ifa->ifa_addr->sa_family) { -#if INET - case AF_INET: -#endif -#if INET6 - case AF_INET6: -#endif - rtinit(ifa, (int)RTM_DELETE, - tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); - break; - } - } - } - splx(s); - } - ifp->if_flags &= ~IFF_RUNNING; - funsetown(tp->tun_sigio); - selwakeup(&tp->tun_rsel); - selthreadclear(&tp->tun_rsel); - - TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit); - return (0); -} - -static int -tuninit(unit, cmd, af) - int unit; - int cmd; - u_char af; -{ - struct tun_softc *tp = &tunctl[unit]; - struct ifnet *ifp = &tp->tun_if; - register struct ifaddr *ifa; - - TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit); - - ifp->if_flags |= IFF_UP | IFF_RUNNING; - getmicrotime(&ifp->if_lastchange); - - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { -#if INET - if (ifa->ifa_addr->sa_family == AF_INET) { - struct sockaddr_in *si; - - si = (struct sockaddr_in *)ifa->ifa_addr; - if (si && si->sin_addr.s_addr) - tp->tun_flags |= TUN_IASET; - - si = (struct sockaddr_in *)ifa->ifa_dstaddr; - if (si && si->sin_addr.s_addr) - tp->tun_flags |= TUN_DSTADDR; - } -#endif - } - return 0; -} - -/* - * Process an ioctl request. - */ -int -tunifioctl(ifp, cmd, data) - struct ifnet *ifp; - u_long cmd; - caddr_t data; -{ - register struct ifreq *ifr = (struct ifreq *)data; - int error = 0, s; - - s = splimp(); - switch(cmd) { - case SIOCGIFSTATUS: - ifs = (struct ifstat *)data; - if (tp->tun_pid) - sprintf(ifs->ascii + strlen(ifs->ascii), - "\tOpened by PID %d\n", tp->tun_pid); - break; - case SIOCSIFADDR: - tuninit(ifp->if_unit); - TUNDEBUG("%s%d: address set\n", - ifp->if_name, ifp->if_unit); - break; - case SIOCSIFDSTADDR: -#if 0 -#if defined(INET6) && defined(__FreeBSD__) && __FreeBSD__ >= 3 - if (found_first_ifid == 0) - in6_ifattach_noifid(ifp); -#endif /* defined(INET6) && defined(__FreeBSD__) && __FreeBSD__ >= 3 */ -#endif - tuninit(ifp->if_unit, cmd, ifr->ifr_addr.sa_family); - break; - case SIOCSIFMTU: - ifp->if_mtu = ifr->ifr_mtu; - TUNDEBUG("%s%d: mtu set\n", - ifp->if_name, ifp->if_unit); - break; - case SIOCADDMULTI: - case SIOCDELMULTI: - break; - - case SIOCSIFFLAGS: - if ((ifp->if_flags & IFF_UP) != 0) - ifp->if_flags |= IFF_RUNNING; - else if ((ifp->if_flags & IFF_UP) == 0) - ifp->if_flags &= ~IFF_RUNNING; - break; - - default: - error = EINVAL; - } - splx(s); - return (error); -} - -/* - * tunoutput - queue packets from higher level ready to put out. - */ -/* Packet data format between tun and ppp is changed to enable checking of - * Address Family of sending packet. When INET6 is defined, 4byte AF field - * is appended to packet data as following. - * - * 0 1 2 3 4 5 6 7 8 ..... - * ------------------------------ - * | af | packet data ..... - * ------------------------------ - * ^^^^^^^^^^^^^ - * Newly added part. The size is sizeof(u_long). - * - * However, this is not adopted for tun -> ppp AF_INET packet for - * backword compatibility, because the ppp process may be an existing - * ip only supporting one. - * Also in ppp->tun case, when af value is unknown, (af > 255) is checked and - * if it is true, AF_INET is assumed. (the 4byte may be the head of - * AF_INET packet. Despite the byte order, the value must always be - * greater than 255, because of ip_len field or (ip_v and ip_hl) - * field. (Idea from Mr. Noritoshi Demize) - */ -int -tunoutput(ifp, m0, dst, rt) - struct ifnet *ifp; - struct mbuf *m0; - struct sockaddr *dst; - struct rtentry *rt; -{ - struct tun_softc *tp = &tunctl[ifp->if_unit]; - int s; - - TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit); - - if ((tp->tun_flags & TUN_READY) != TUN_READY) { - TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, - ifp->if_unit, tp->tun_flags); - m_freem (m0); - return EHOSTDOWN; - } - -#if NBPFILTER > 0 - /* BPF write needs to be handled specially */ - if (dst->sa_family == AF_UNSPEC) { - dst->sa_family = *(mtod(m0, int *)); - m0->m_len -= sizeof(int); - m0->m_pkthdr.len -= sizeof(int); - m0->m_data += sizeof(int); - } - - if (ifp->if_bpf) { - /* - * We need to prepend the address family as - * a four byte field. Cons up a dummy header - * to pacify bpf. This is safe because bpf - * will only read from the mbuf (i.e., it won't - * try to free it or keep a pointer to it). - */ - struct mbuf m; - u_int af = dst->sa_family; - - m.m_next = m0; - m.m_len = 4; - m.m_data = (char *)⁡ - - bpf_mtap(ifp, &m); - } -#endif - - switch(dst->sa_family) { -#if defined(INET) || defined(INET6) -#if INET6 - case AF_INET6: - M_PREPEND(m0, sizeof(u_long) /* af field passed to upper */, - M_DONTWAIT); - if (m0 == 0) - return (ENOBUFS); - *mtod(m0, u_long *) = (u_long)dst->sa_family; - /* FALLTHROUGH */ -#endif /* INET6 */ -#if INET - case AF_INET: -#endif /* INET */ -#endif /* INET || INET6 */ - s = splimp(); - if (IF_QFULL(&ifp->if_snd)) { - IF_DROP(&ifp->if_snd); - m_freem(m0); - splx(s); - ifp->if_collisions++; - return (ENOBUFS); - } - ifp->if_obytes += m0->m_pkthdr.len; - IF_ENQUEUE(&ifp->if_snd, m0); - splx(s); - ifp->if_opackets++; - break; - default: - m_freem(m0); - return EAFNOSUPPORT; - } - - if (tp->tun_flags & TUN_RWAIT) { - tp->tun_flags &= ~TUN_RWAIT; - wakeup((caddr_t)tp); - } - if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) - pgsigio(tp->tun_sigio, SIGIO, 0); - selwakeup(&tp->tun_rsel); - return 0; -} - -/* - * the cdevsw interface is now pretty minimal. - */ -static int -tunioctl(dev, cmd, data, flag, p) - dev_t dev; - u_long cmd; - caddr_t data; - int flag; - struct proc *p; -{ - int unit = dev_val(minor(dev)), s; - struct tun_softc *tp = &tunctl[unit]; - struct tuninfo *tunp; - - switch (cmd) { - case TUNSIFINFO: - tunp = (struct tuninfo *)data; - tp->tun_if.if_mtu = tunp->mtu; - tp->tun_if.if_type = tunp->type; - tp->tun_if.if_baudrate = tunp->baudrate; - break; - case TUNGIFINFO: - tunp = (struct tuninfo *)data; - tunp->mtu = tp->tun_if.if_mtu; - tunp->type = tp->tun_if.if_type; - tunp->baudrate = tp->tun_if.if_baudrate; - break; - case TUNSDEBUG: - tundebug = *(int *)data; - break; - case TUNGDEBUG: - *(int *)data = tundebug; - break; - case FIONBIO: - break; - case FIOASYNC: - if (*(int *)data) - tp->tun_flags |= TUN_ASYNC; - else - tp->tun_flags &= ~TUN_ASYNC; - break; - case FIONREAD: - s = splimp(); - if (tp->tun_if.if_snd.ifq_head) { - struct mbuf *mb = tp->tun_if.if_snd.ifq_head; - for( *(int *)data = 0; mb != 0; mb = mb->m_next) - *(int *)data += mb->m_len; - } else - *(int *)data = 0; - splx(s); - break; - case FIOSETOWN: - return (fsetown(*(int *)data, &tp->tun_sigio)); - - case FIOGETOWN: - *(int *)data = fgetown(tp->tun_sigio); - return (0); - - /* This is deprecated, FIOSETOWN should be used instead. */ - case TIOCSPGRP: - return (fsetown(-(*(int *)data), &tp->tun_sigio)); - - /* This is deprecated, FIOGETOWN should be used instead. */ - case TIOCGPGRP: - *(int *)data = -fgetown(tp->tun_sigio); - return (0); - - default: - return (ENOTTY); - } - return (0); -} - -/* - * The cdevsw read interface - reads a packet at a time, or at - * least as much of a packet as can be read. - */ -static int -tunread(dev, uio, flag) - dev_t dev; - struct uio *uio; - int flag; -{ - int unit = dev_val(minor(dev)); - struct tun_softc *tp = &tunctl[unit]; - struct ifnet *ifp = &tp->tun_if; - struct mbuf *m, *m0; - int error=0, len, s; - - TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit); - if ((tp->tun_flags & TUN_READY) != TUN_READY) { - TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, - ifp->if_unit, tp->tun_flags); - return EHOSTDOWN; - } - - tp->tun_flags &= ~TUN_RWAIT; - - s = splimp(); - do { - IF_DEQUEUE(&ifp->if_snd, m0); - if (m0 == 0) { - if (flag & IO_NDELAY) { - splx(s); - return EWOULDBLOCK; - } - tp->tun_flags |= TUN_RWAIT; - if( error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1), - "tunread", 0)) { - splx(s); - return error; - } - } - } while (m0 == 0); - splx(s); - - while (m0 && uio->uio_resid > 0 && error == 0) { - len = min(uio->uio_resid, m0->m_len); - if (len == 0) - break; - error = uiomove(mtod(m0, caddr_t), len, uio); - MFREE(m0, m); - m0 = m; - } - - if (m0) { - TUNDEBUG("Dropping mbuf\n"); - m_freem(m0); - } - return error; -} - -/* - * the cdevsw write interface - an atomic write is a packet - or else! - */ -/* See top of tunoutput() about interface change between ppp process and - * tun. */ -static int -tunwrite(dev, uio, flag) - dev_t dev; - struct uio *uio; - int flag; -{ - int unit = dev_val(minor(dev)); - struct ifnet *ifp = &tunctl[unit].tun_if; - struct mbuf *top, **mp, *m; - int error=0, s, tlen, mlen; - u_long af; - u_int netisr_af; - struct ifqueue *afintrq = NULL; - - TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit); - - if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) { - TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit, - uio->uio_resid); - return EIO; - } - tlen = uio->uio_resid; - - /* get a header mbuf */ - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) - return ENOBUFS; - if (tlen > MHLEN) { - MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return ENOBUFS; - } - mlen = m->m_ext.ext_size; - } else - mlen = MHLEN; - - top = 0; - mp = ⊤ - while (error == 0 && uio->uio_resid > 0) { - m->m_len = min(mlen, uio->uio_resid); - error = uiomove(mtod (m, caddr_t), m->m_len, uio); - *mp = m; - mp = &m->m_next; - if (uio->uio_resid > 0) { - MGET (m, M_DONTWAIT, MT_DATA); - if (m == 0) { - error = ENOBUFS; - break; - } - mlen = MLEN; - } - } - /* Change for checking Address Family of sending packet. */ - af = *mtod(top, u_long *); - switch (af) { -#if INET - case AF_INET: - netisr_af = NETISR_IP; - afintrq = &ipintrq; - break; -#endif /* INET */ -#if INET6 - case AF_INET6: - netisr_af = NETISR_IPV6; - afintrq = &ip6intrq; - break; -#endif /* INET6 */ - default: - if (af > 255) { /* see description at the top of tunoutput */ - af = AF_INET; - netisr_af = NETISR_IP; - afintrq = &ipintrq; - goto af_decided; - } - error = EAFNOSUPPORT; - break; - } - m_adj(top, sizeof(u_long)); /* remove af field passed from upper */ - tlen -= sizeof(u_long); - af_decided: - if (error) { - if (top) - m_freem (top); - return error; - } - - top->m_pkthdr.len = tlen; - top->m_pkthdr.rcvif = ifp; - -#if NBPFILTER > 0 - if (ifp->if_bpf) { - /* - * We need to prepend the address family as - * a four byte field. Cons up a dummy header - * to pacify bpf. This is safe because bpf - * will only read from the mbuf (i.e., it won't - * try to free it or keep a pointer to it). - */ - struct mbuf m; - - m.m_next = top; - m.m_len = 4; - m.m_data = (char *)⁡ - - bpf_mtap(ifp, &m); - } -#endif - - /* just for safety */ - if (!afintrq) - return EAFNOSUPPORT; - - s = splimp(); - if (IF_QFULL (afintrq)) { - IF_DROP(afintrq); - splx(s); - ifp->if_collisions++; - m_freem(top); - return ENOBUFS; - } - IF_ENQUEUE(afintrq, top); - splx(s); - ifp->if_ibytes += tlen; - ifp->if_ipackets++; - schednetisr(netisr_af); - return error; -} - -/* - * tunpoll - the poll interface, this is only useful on reads - * really. The write detect always returns true, write never blocks - * anyway, it either accepts the packet or drops it. - */ -static int -tunpoll(dev, events, wql, p) - dev_t dev; - int events; - void * wql; - struct proc *p; -{ - int unit = dev_val(minor(dev)), s; - struct tun_softc *tp = &tunctl[unit]; - struct ifnet *ifp = &tp->tun_if; - int revents = 0; - - s = splimp(); - TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit); - - if (events & (POLLIN | POLLRDNORM)) - if (ifp->if_snd.ifq_len > 0) { - TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name, - ifp->if_unit, ifp->if_snd.ifq_len); - revents |= events & (POLLIN | POLLRDNORM); - } else { - TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name, - ifp->if_unit); - selrecord(p, &tp->tun_rsel, wql); - } - - if (events & (POLLOUT | POLLWRNORM)) - revents |= events & (POLLOUT | POLLWRNORM); - - splx(s); - return (revents); -} - - -#endif /* NTUN */ diff --git a/bsd/net/if_tun.h b/bsd/net/if_tun.h deleted file mode 100644 index 23731386f..000000000 --- a/bsd/net/if_tun.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* $NetBSD: if_tun.h,v 1.5 1994/06/29 06:36:27 cgd Exp $ */ - -/* - * Copyright (c) 1988, Julian Onions - * Nottingham University 1987. - * - * This source may be freely distributed, however I would be interested - * in any changes that are made. - * - * This driver takes packets off the IP i/f and hands them up to a - * user process to have its wicked way with. This driver has it's - * roots in a similar driver written by Phil Cockcroft (formerly) at - * UCL. This driver is based much more on read/write/select mode of - * operation though. - * - */ - -#ifndef _NET_IF_TUN_H_ -#define _NET_IF_TUN_H_ -#include -#ifdef __APPLE_API_PRIVATE - -/* Refer to if_tunvar.h for the softc stuff */ - -/* Maximum transmit packet size (default) */ -#define TUNMTU 1500 - -/* Maximum receive packet size (hard limit) */ -#define TUNMRU 16384 - -struct tuninfo { - int baudrate; /* linespeed */ - short mtu; /* maximum transmission unit */ - u_char type; /* ethernet, tokenring, etc. */ - u_char dummy; /* place holder */ -}; - -/* ioctl's for get/set debug */ -#define TUNSDEBUG _IOW('t', 90, int) -#define TUNGDEBUG _IOR('t', 89, int) -#define TUNSIFINFO _IOW('t', 91, struct tuninfo) -#define TUNGIFINFO _IOR('t', 92, struct tuninfo) - -#endif /* __APPLE_API_PRIVATE */ -#endif /* !_NET_IF_TUN_H_ */ diff --git a/bsd/net/if_tunvar.h b/bsd/net/if_tunvar.h deleted file mode 100644 index 9b93e0867..000000000 --- a/bsd/net/if_tunvar.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1998 Brian Somers - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - -#ifndef _NET_IF_TUNVAR_H_ -#define _NET_IF_TUNVAR_H_ -#include -#ifdef __APPLE_API_PRIVATE - -struct tun_softc { - u_short tun_flags; /* misc flags */ -#define TUN_OPEN 0x0001 -#define TUN_INITED 0x0002 -#define TUN_RCOLL 0x0004 -#define TUN_IASET 0x0008 -#define TUN_DSTADDR 0x0010 -#define TUN_RWAIT 0x0040 -#define TUN_ASYNC 0x0080 - -#define TUN_READY (TUN_OPEN | TUN_INITED) - - struct ifnet tun_if; /* the interface */ - struct sigio *tun_sigio; /* information for async I/O */ - struct selinfo tun_rsel; /* read select */ - struct selinfo tun_wsel; /* write select (not used) */ -}; - -#endif /* __APPLE_API_PRIVATE */ -#endif /* !_NET_IF_TUNVAR_H_ */ diff --git a/bsd/net/if_types.h b/bsd/net/if_types.h index befa53907..282c0f811 100644 --- a/bsd/net/if_types.h +++ b/bsd/net/if_types.h @@ -63,6 +63,8 @@ * Interface types for benefit of parsing media address headers. * This list is derived from the SNMP list of ifTypes, currently * documented in RFC1573. + * The current list of assignments is maintained at: + * http://www.iana.org/assignments/smi-numbers */ #define IFT_OTHER 0x1 /* none of the following */ @@ -123,7 +125,14 @@ #define IFT_FAITH 0x38 /*0xf2*/ #define IFT_STF 0x39 /*0xf3*/ #define IFT_L2VLAN 0x87 /* Layer 2 Virtual LAN using 802.1Q */ - +#define IFT_IEEE8023ADLAG 0x88 /* IEEE802.3ad Link Aggregate */ #define IFT_IEEE1394 0x90 /* IEEE1394 High Performance SerialBus*/ +/* + * These are not based on IANA assignments: + * Note: IFT_STF has a defined ifType: 0xd7 (215), but we use 0x39. + */ +#define IFT_GIF 0x37 /*0xf0*/ +#define IFT_FAITH 0x38 /*0xf2*/ +#define IFT_STF 0x39 /*0xf3*/ #endif diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index 1f813e917..8ac2039fb 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -57,7 +57,18 @@ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ + #include +#include +#include +#include /* get TAILQ macros */ +#ifdef KERNEL_PRIVATE +#include +#endif /* KERNEL_PRIVATE */ + +#ifdef KERNEL +#include +#endif KERNEL #ifdef __APPLE__ #define APPLE_IF_FAM_LOOPBACK 1 @@ -73,7 +84,15 @@ #define APPLE_IF_FAM_FAITH 11 #define APPLE_IF_FAM_STF 12 #define APPLE_IF_FAM_FIREWIRE 13 -#endif +#define APPLE_IF_FAM_BOND 14 +#endif __APPLE__ + +/* + * 72 was chosen below because it is the size of a TCP/IP + * header (40) + the minimum mss (32). + */ +#define IF_MINMTU 72 +#define IF_MAXMTU 65535 /* * Structures defining a network interface, providing a packet @@ -100,122 +119,210 @@ * interfaces. These routines live in the files if.c and route.c */ -#ifdef __STDC__ -/* - * Forward structure declarations for function prototypes [sic]. - */ -struct mbuf; -struct proc; -struct rtentry; -struct socket; -struct ether_header; -struct sockaddr_dl; -#endif - #define IFNAMSIZ 16 -#include /* get TAILQ macros */ - -#ifdef __APPLE_API_UNSTABLE -#ifdef __APPLE__ -struct tqdummy; - -TAILQ_HEAD(tailq_head, tqdummy); - /* This belongs up in socket.h or socketvar.h, depending on how far the * event bubbles up. */ struct net_event_data { - u_long if_family; - u_long if_unit; + unsigned long if_family; + unsigned long if_unit; char if_name[IFNAMSIZ]; }; -#endif - - -TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ -TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ -TAILQ_HEAD(ifprefixhead, ifprefix); -LIST_HEAD(ifmultihead, ifmultiaddr); +/* + * Structure describing information about an interface + * which may be of interest to management entities. + */ +struct if_data { + /* generic interface information */ + unsigned char ifi_type; /* ethernet, tokenring, etc */ #ifdef __APPLE__ -#ifdef KERNEL_PRIVATE -/* bottom 16 bits reserved for hardware checksum */ -#define IF_HWASSIST_CSUM_IP 0x0001 /* will csum IP */ -#define IF_HWASSIST_CSUM_TCP 0x0002 /* will csum TCP */ -#define IF_HWASSIST_CSUM_UDP 0x0004 /* will csum UDP */ -#define IF_HWASSIST_CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ -#define IF_HWASSIST_CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ -#define IF_HWASSIST_CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */ -#define IF_HWASSIST_CSUM_MASK 0xffff -#define IF_HWASSIST_CSUM_FLAGS(hwassist) ((hwassist) & IF_HWASSIST_CSUM_MASK) + unsigned char ifi_typelen; /* Length of frame type id */ +#endif + unsigned char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ + unsigned char ifi_addrlen; /* media address length */ + unsigned char ifi_hdrlen; /* media header length */ + unsigned char ifi_recvquota; /* polling quota for receive intrs */ + unsigned char ifi_xmitquota; /* polling quota for xmit intrs */ + unsigned char ifi_unused1; /* for future use */ + unsigned long ifi_mtu; /* maximum transmission unit */ + unsigned long ifi_metric; /* routing metric (external only) */ + unsigned long ifi_baudrate; /* linespeed */ + /* volatile statistics */ + unsigned long ifi_ipackets; /* packets received on interface */ + unsigned long ifi_ierrors; /* input errors on interface */ + unsigned long ifi_opackets; /* packets sent on interface */ + unsigned long ifi_oerrors; /* output errors on interface */ + unsigned long ifi_collisions; /* collisions on csma interfaces */ + unsigned long ifi_ibytes; /* total number of octets received */ + unsigned long ifi_obytes; /* total number of octets sent */ + unsigned long ifi_imcasts; /* packets received via multicast */ + unsigned long ifi_omcasts; /* packets sent via multicast */ + unsigned long ifi_iqdrops; /* dropped on input, this interface */ + unsigned long ifi_noproto; /* destined for unsupported protocol */ + unsigned long ifi_recvtiming; /* usec spent receiving when timing */ + unsigned long ifi_xmittiming; /* usec spent xmitting when timing */ + struct timeval ifi_lastchange; /* time of last administrative change */ + unsigned long ifi_unused2; /* used to be the default_proto */ + unsigned long ifi_hwassist; /* HW offload capabilities */ + unsigned long ifi_reserved1; /* for future use */ + unsigned long ifi_reserved2; /* for future use */ +}; -/* VLAN support */ -#define IF_HWASSIST_VLAN_TAGGING 0x10000 /* supports VLAN tagging */ -#define IF_HWASSIST_VLAN_MTU 0x20000 /* supports VLAN MTU-sized packet (for software VLAN) */ -#endif KERNEL_PRIVATE /* * Structure describing information about an interface * which may be of interest to management entities. */ -struct if_data { +struct if_data64 { /* generic interface information */ u_char ifi_type; /* ethernet, tokenring, etc */ #ifdef __APPLE__ u_char ifi_typelen; /* Length of frame type id */ #endif - u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ - u_char ifi_addrlen; /* media address length */ + u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ + u_char ifi_addrlen; /* media address length */ + u_char ifi_hdrlen; /* media header length */ + u_char ifi_recvquota; /* polling quota for receive intrs */ + u_char ifi_xmitquota; /* polling quota for xmit intrs */ + u_char ifi_unused1; /* for future use */ + u_long ifi_mtu; /* maximum transmission unit */ + u_long ifi_metric; /* routing metric (external only) */ + u_int64_t ifi_baudrate; /* linespeed */ + /* volatile statistics */ + u_int64_t ifi_ipackets; /* packets received on interface */ + u_int64_t ifi_ierrors; /* input errors on interface */ + u_int64_t ifi_opackets; /* packets sent on interface */ + u_int64_t ifi_oerrors; /* output errors on interface */ + u_int64_t ifi_collisions; /* collisions on csma interfaces */ + u_int64_t ifi_ibytes; /* total number of octets received */ + u_int64_t ifi_obytes; /* total number of octets sent */ + u_int64_t ifi_imcasts; /* packets received via multicast */ + u_int64_t ifi_omcasts; /* packets sent via multicast */ + u_int64_t ifi_iqdrops; /* dropped on input, this interface */ + u_int64_t ifi_noproto; /* destined for unsupported protocol */ + u_long ifi_recvtiming; /* usec spent receiving when timing */ + u_long ifi_xmittiming; /* usec spent xmitting when timing */ + struct timeval ifi_lastchange; /* time of last administrative change */ +}; + +#ifdef PRIVATE +/* + * Internal storage of if_data. This is bound to change. Various places in the + * stack will translate this data structure in to the externally visible + * if_data structure above. + */ +struct if_data_internal { + /* generic interface information */ + u_char ifi_type; /* ethernet, tokenring, etc */ + u_char ifi_typelen; /* Length of frame type id */ + u_char ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ + u_char ifi_addrlen; /* media address length */ u_char ifi_hdrlen; /* media header length */ - u_char ifi_recvquota; /* polling quota for receive intrs */ - u_char ifi_xmitquota; /* polling quota for xmit intrs */ + u_char ifi_recvquota; /* polling quota for receive intrs */ + u_char ifi_xmitquota; /* polling quota for xmit intrs */ + u_char ifi_unused1; /* for future use */ u_long ifi_mtu; /* maximum transmission unit */ u_long ifi_metric; /* routing metric (external only) */ - u_long ifi_baudrate; /* linespeed */ + u_long ifi_baudrate; /* linespeed */ /* volatile statistics */ - u_long ifi_ipackets; /* packets received on interface */ - u_long ifi_ierrors; /* input errors on interface */ - u_long ifi_opackets; /* packets sent on interface */ - u_long ifi_oerrors; /* output errors on interface */ - u_long ifi_collisions; /* collisions on csma interfaces */ - u_long ifi_ibytes; /* total number of octets received */ - u_long ifi_obytes; /* total number of octets sent */ - u_long ifi_imcasts; /* packets received via multicast */ - u_long ifi_omcasts; /* packets sent via multicast */ - u_long ifi_iqdrops; /* dropped on input, this interface */ - u_long ifi_noproto; /* destined for unsupported protocol */ -#ifdef __APPLE__ + u_int64_t ifi_ipackets; /* packets received on interface */ + u_int64_t ifi_ierrors; /* input errors on interface */ + u_int64_t ifi_opackets; /* packets sent on interface */ + u_int64_t ifi_oerrors; /* output errors on interface */ + u_int64_t ifi_collisions; /* collisions on csma interfaces */ + u_int64_t ifi_ibytes; /* total number of octets received */ + u_int64_t ifi_obytes; /* total number of octets sent */ + u_int64_t ifi_imcasts; /* packets received via multicast */ + u_int64_t ifi_omcasts; /* packets sent via multicast */ + u_int64_t ifi_iqdrops; /* dropped on input, this interface */ + u_int64_t ifi_noproto; /* destined for unsupported protocol */ u_long ifi_recvtiming; /* usec spent receiving when timing */ u_long ifi_xmittiming; /* usec spent xmitting when timing */ -#endif +#define IF_LASTCHANGEUPTIME 1 /* lastchange: 1-uptime 0-calendar time */ struct timeval ifi_lastchange; /* time of last administrative change */ -#ifdef __APPLE__ - u_long default_proto; /* Default dl_tag when none is specified - * on dlil_output */ -#endif u_long ifi_hwassist; /* HW offload capabilities */ -#ifdef KERNEL_PRIVATE - u_short ifi_nvlans; /* number of attached vlans */ - u_short ifi_reserved_1; /* for future use */ -#else KERNEL_PRIVATE - u_long ifi_reserved1; /* for future use */ -#endif KERNEL_PRIVATE - u_long ifi_reserved2; /* for future use */ }; -#endif + +#define if_mtu if_data.ifi_mtu +#define if_type if_data.ifi_type +#define if_typelen if_data.ifi_typelen +#define if_physical if_data.ifi_physical +#define if_addrlen if_data.ifi_addrlen +#define if_hdrlen if_data.ifi_hdrlen +#define if_metric if_data.ifi_metric +#define if_baudrate if_data.ifi_baudrate +#define if_hwassist if_data.ifi_hwassist +#define if_ipackets if_data.ifi_ipackets +#define if_ierrors if_data.ifi_ierrors +#define if_opackets if_data.ifi_opackets +#define if_oerrors if_data.ifi_oerrors +#define if_collisions if_data.ifi_collisions +#define if_ibytes if_data.ifi_ibytes +#define if_obytes if_data.ifi_obytes +#define if_imcasts if_data.ifi_imcasts +#define if_omcasts if_data.ifi_omcasts +#define if_iqdrops if_data.ifi_iqdrops +#define if_noproto if_data.ifi_noproto +#define if_lastchange if_data.ifi_lastchange +#define if_recvquota if_data.ifi_recvquota +#define if_xmitquota if_data.ifi_xmitquota +#define if_iflags if_data.ifi_iflags + +struct mbuf; +struct ifaddr; +TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ +TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ +TAILQ_HEAD(ifprefixhead, ifprefix); +LIST_HEAD(ifmultihead, ifmultiaddr); +struct tqdummy; +TAILQ_HEAD(tailq_head, tqdummy); + +/* + * Forward structure declarations for function prototypes [sic]. + */ +struct proc; +struct rtentry; +struct socket; +struct ether_header; +struct sockaddr_dl; +struct ifnet_filter; + +TAILQ_HEAD(ifnet_filter_head, ifnet_filter); +TAILQ_HEAD(ddesc_head_name, dlil_demux_desc); + +/* bottom 16 bits reserved for hardware checksum */ +#define IF_HWASSIST_CSUM_IP 0x0001 /* will csum IP */ +#define IF_HWASSIST_CSUM_TCP 0x0002 /* will csum TCP */ +#define IF_HWASSIST_CSUM_UDP 0x0004 /* will csum UDP */ +#define IF_HWASSIST_CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ +#define IF_HWASSIST_CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ +#define IF_HWASSIST_CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */ +#define IF_HWASSIST_CSUM_MASK 0xffff +#define IF_HWASSIST_CSUM_FLAGS(hwassist) ((hwassist) & IF_HWASSIST_CSUM_MASK) + +/* VLAN support */ +#define IF_HWASSIST_VLAN_TAGGING 0x10000 /* supports VLAN tagging */ +#define IF_HWASSIST_VLAN_MTU 0x20000 /* supports VLAN MTU-sized packet (for software VLAN) */ + +#define IFNET_RW_LOCK 1 /* * Structure defining a queue for a network interface. */ struct ifqueue { - struct mbuf *ifq_head; - struct mbuf *ifq_tail; + void *ifq_head; + void *ifq_tail; int ifq_len; int ifq_maxlen; int ifq_drops; }; +struct ddesc_head_str; +struct proto_hash_entry; +struct kev_msg; + /* * Structure defining a network interface. * @@ -223,13 +330,16 @@ struct ifqueue { */ struct ifnet { void *if_softc; /* pointer to driver state */ - char *if_name; /* name, e.g. ``en'' or ``lo'' */ + const char *if_name; /* name, e.g. ``en'' or ``lo'' */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ -#ifdef __APPLE__ - struct tailq_head proto_head; /* Head for if_proto structures */ -#endif - int if_pcount; /* number of promiscuous listeners */ + u_long if_refcnt; +#ifdef __KPI_INTERFACE__ + ifnet_check_multi if_check_multi; +#else + void* if_check_multi; +#endif __KPI_INTERFACE__ + int if_pcount; /* number of promiscuous listeners */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ short if_unit; /* sub-unit for lower level driver */ @@ -238,118 +348,109 @@ struct ifnet { int if_ipending; /* interrupts pending */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ - struct if_data if_data; + struct if_data_internal if_data; -#ifdef __APPLE__ /* New with DLIL */ +#ifdef BSD_KERNEL_PRIVATE + int if_usecnt; +#else int refcnt; +#endif int offercnt; - int (*if_output)(struct ifnet *ifnet_ptr, struct mbuf *m); - int (*if_ioctl)(struct ifnet *ifnet_ptr, u_long ioctl_code, void *ioctl_arg); - int (*if_set_bpf_tap)(struct ifnet *ifp, int mode, - int (*bpf_callback)(struct ifnet *, struct mbuf *)); - int (*if_free)(struct ifnet *ifnet_ptr); - int (*if_demux)(struct ifnet *ifnet_ptr, struct mbuf *mbuf_ptr, - char *frame_ptr, void *if_proto_ptr); - - int (*if_event)(struct ifnet *ifnet_ptr, caddr_t event_ptr); - - int (*if_framer)(struct ifnet *ifp, - struct mbuf **m, - struct sockaddr *dest, - char *dest_linkaddr, - char *frame_type); +#ifdef __KPI_INTERFACE__ + ifnet_output_func if_output; + ifnet_ioctl_func if_ioctl; + ifnet_set_bpf_tap if_set_bpf_tap; + ifnet_detached_func if_free; + ifnet_demux_func if_demux; + ifnet_event_func if_event; + ifnet_framer_func if_framer; + ifnet_family_t if_family; /* ulong assigned by Apple */ +#else + void* if_output; + void* if_ioctl; + void* if_set_bpf_tap; + void* if_free; + void* if_demux; + void* if_event; + void* if_framer; + u_long if_family; /* ulong assigned by Apple */ +#endif - u_long if_family; /* ulong assigned by Apple */ - struct tailq_head if_flt_head; + struct ifnet_filter_head if_flt_head; /* End DLIL specific */ - void *reserved0; /* for future use */ + u_long if_delayed_detach; /* need to perform delayed detach */ void *if_private; /* private to interface */ long if_eflags; /* autoaddr, autoaddr done, etc. */ -#endif /* __APPLE__ */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ /* procedure handles */ -#ifndef __APPLE__ - int (*if_output) /* output routine (enqueue) */ - __P((struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *)); - void (*if_start) /* initiate output routine */ - __P((struct ifnet *)); - int (*if_done) /* output complete routine */ - __P((struct ifnet *)); /* (XXX not used; fake prototype) */ - int (*if_ioctl) /* ioctl routine */ - __P((struct ifnet *, u_long, caddr_t)); - void (*if_watchdog) /* timer routine */ - __P((struct ifnet *)); -#endif - int (*if_poll_recv) /* polled receive routine */ - __P((struct ifnet *, int *)); - int (*if_poll_xmit) /* polled transmit routine */ - __P((struct ifnet *, int *)); - void (*if_poll_intren) /* polled interrupt reenable routine */ - __P((struct ifnet *)); - void (*if_poll_slowinput) /* input routine for slow devices */ - __P((struct ifnet *, struct mbuf *)); - void (*if_init) /* Init routine */ - __P((void *)); - int (*if_resolvemulti) /* validate/resolve multicast */ - __P((struct ifnet *, struct sockaddr **, struct sockaddr *)); - struct ifqueue if_snd; /* output queue */ - struct ifqueue *if_poll_slowq; /* input queue for slow devices */ +#ifdef __KPI_INTERFACE__ + union { + int (*original)(struct ifnet *ifp, u_long protocol_family, + struct ddesc_head_str *demux_desc_head); + ifnet_add_proto_func kpi; + } if_add_proto_u; + ifnet_del_proto_func if_del_proto; +#else __KPI_INTERFACE__ + void* if_add_proto; + void* if_del_proto; +#endif __KPI_INTERFACE__ + struct proto_hash_entry *if_proto_hash; + void *if_kpi_storage; + + void *unused_was_init; + void *unused_was_resolvemulti; + + struct ifqueue if_snd; + u_long unused_2[1]; #ifdef __APPLE__ - u_long family_cookie; + u_long family_cookie; struct ifprefixhead if_prefixhead; /* list of prefixes per if */ - void *reserved1; /* for future use */ + +#ifdef _KERN_LOCKS_H_ +#if IFNET_RW_LOCK + lck_rw_t *if_lock; /* Lock to protect this interface */ +#else + lck_mtx_t *if_lock; /* Lock to protect this interface */ +#endif +#else + void *if_lock; +#endif + #else struct ifprefixhead if_prefixhead; /* list of prefixes per if */ #endif /* __APPLE__ */ + struct { + u_long length; + union { + u_char buffer[8]; + u_char *ptr; + } u; + } if_broadcast; }; -typedef void if_init_f_t __P((void *)); -#define if_mtu if_data.ifi_mtu -#define if_type if_data.ifi_type -#define if_typelen if_data.ifi_typelen -#define if_physical if_data.ifi_physical -#define if_addrlen if_data.ifi_addrlen -#define if_hdrlen if_data.ifi_hdrlen -#define if_metric if_data.ifi_metric -#define if_baudrate if_data.ifi_baudrate -#define if_hwassist if_data.ifi_hwassist -#define if_ipackets if_data.ifi_ipackets -#define if_ierrors if_data.ifi_ierrors -#define if_opackets if_data.ifi_opackets -#define if_oerrors if_data.ifi_oerrors -#define if_collisions if_data.ifi_collisions -#define if_ibytes if_data.ifi_ibytes -#define if_obytes if_data.ifi_obytes -#define if_imcasts if_data.ifi_imcasts -#define if_omcasts if_data.ifi_omcasts -#define if_iqdrops if_data.ifi_iqdrops -#define if_noproto if_data.ifi_noproto -#define if_lastchange if_data.ifi_lastchange -#define if_recvquota if_data.ifi_recvquota -#define if_xmitquota if_data.ifi_xmitquota -#ifdef KERNEL_PRIVATE -#define if_nvlans if_data.ifi_nvlans -#endif KERNEL_PRIVATE -#define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)0) +#define if_add_proto if_add_proto_u.original #ifndef __APPLE__ /* for compatibility with other BSDs */ #define if_addrlist if_addrhead #define if_list if_link -#endif +#endif !__APPLE__ + + +#endif /* PRIVATE */ + #ifdef KERNEL_PRIVATE /* * Structure describing a `cloning' interface. */ struct if_clone { LIST_ENTRY(if_clone) ifc_list; /* on list of cloners */ - const char *ifc_name; /* name of device, e.g. `vlan' */ + const char *ifc_name; /* name of device, e.g. `vlan' */ size_t ifc_namelen; /* length of name */ int ifc_minifs; /* minimum number of interfaces */ int ifc_maxunit; /* maximum unit number */ @@ -361,8 +462,7 @@ struct if_clone { }; #define IF_CLONE_INITIALIZER(name, create, destroy, minifs, maxunit) \ - { { 0 }, name, sizeof(name) - 1, minifs, maxunit, NULL, 0, create, destroy } -#endif + { { 0, 0 }, name, sizeof(name) - 1, minifs, maxunit, NULL, 0, create, destroy } /* * Bit values in if_ipending @@ -383,7 +483,7 @@ struct if_clone { if ((ifq)->ifq_tail == 0) \ (ifq)->ifq_head = m; \ else \ - (ifq)->ifq_tail->m_nextpkt = m; \ + ((struct mbuf*)(ifq)->ifq_tail)->m_nextpkt = m; \ (ifq)->ifq_tail = m; \ (ifq)->ifq_len++; \ } @@ -404,12 +504,11 @@ struct if_clone { } \ } -#ifdef KERNEL #define IF_ENQ_DROP(ifq, m) if_enq_drop(ifq, m) #if defined(__GNUC__) && defined(MT_HEADER) static __inline int -if_queue_drop(struct ifqueue *ifq, struct mbuf *m) +if_queue_drop(struct ifqueue *ifq, __unused struct mbuf *m) { IF_DROP(ifq); return 0; @@ -427,22 +526,15 @@ if_enq_drop(struct ifqueue *ifq, struct mbuf *m) #else #ifdef MT_HEADER -int if_enq_drop __P((struct ifqueue *, struct mbuf *)); -#endif +int if_enq_drop(struct ifqueue *, struct mbuf *); +#endif MT_HEADER -#endif -#endif /* __APPLE_API_UNSTABLE */ +#endif defined(__GNUC__) && defined(MT_HEADER) -/* - * 72 was chosen below because it is the size of a TCP/IP - * header (40) + the minimum mss (32). - */ -#define IF_MINMTU 72 -#define IF_MAXMTU 65535 +#endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ -#ifdef __APPLE_API_UNSTABLE +#ifdef PRIVATE /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, @@ -454,26 +546,27 @@ struct ifaddr { struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ -#ifndef __APPLE__ - /* Use of if_data doesn't justify change of API */ - struct if_data if_data; /* not all members are meaningful */ -#endif struct ifnet *ifa_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ - __P((int, struct rtentry *, struct sockaddr *)); + (int, struct rtentry *, struct sockaddr *); u_short ifa_flags; /* mostly rt_flags for cloning */ - short ifa_refcnt;/* 16bit ref count, use ifaref, ifafree */ + int ifa_refcnt;/* 32bit ref count, use ifaref, ifafree */ int ifa_metric; /* cost of going out this interface */ #ifdef notdef struct rtentry *ifa_rt; /* XXXX for ROUTETOIF ????? */ #endif - u_long ifa_dlt; int (*ifa_claim_addr) /* check if an addr goes to this if */ - __P((struct ifaddr *, struct sockaddr *)); + (struct ifaddr *, const struct sockaddr *); + u_long ifa_debug; /* debug flags */ }; -#define IFA_ROUTE RTF_UP /* route installed */ +#define IFA_ROUTE RTF_UP /* route installed (0x1) */ +#define IFA_CLONING RTF_CLONING /* (0x100) */ +#define IFA_ATTACHED 0x1 /* ifa_debug: IFA is attached to an interface */ + +#endif /* PRIVATE */ +#ifdef KERNEL_PRIVATE /* * The prefix structure contains information about one prefix * of an interface. They are maintained by the different address families, @@ -487,6 +580,10 @@ struct ifprefix { u_char ifpr_plen; /* prefix length in bits */ u_char ifpr_type; /* protocol dependent prefix type */ }; +#endif /* KERNEL_PRIVATE */ + +#ifdef PRIVATE +typedef void (*ifma_protospec_free_func)(void* ifma_protospec); /* * Multicast address structure. This is analogous to the ifaddr @@ -497,81 +594,81 @@ struct ifprefix { struct ifmultiaddr { LIST_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ - struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ - struct ifnet *ifma_ifp; /* back-pointer to interface */ - u_int ifma_refcount; /* reference count */ - void *ifma_protospec; /* protocol-specific state, if any */ + struct ifmultiaddr *ifma_ll; /* link-layer translation, if any */ + struct ifnet *ifma_ifp; /* back-pointer to interface */ + u_int ifma_usecount; /* use count, protected by ifp's lock */ + void *ifma_protospec; /* protocol-specific state, if any */ + int32_t ifma_refcount; /* reference count, atomically protected */ + ifma_protospec_free_func ifma_free; /* function called to free ifma_protospec */ }; +#endif /* PRIVATE */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE #define IFAREF(ifa) ifaref(ifa) #define IFAFREE(ifa) ifafree(ifa) -#ifdef __APPLE_API_PRIVATE -extern struct ifnethead ifnet; +/* + * To preserve kmem compatibility, we define + * ifnet_head to ifnet. This should be temp. + */ +#define ifnet_head ifnet +extern struct ifnethead ifnet_head; extern struct ifnet **ifindex2ifnet; extern int ifqmaxlen; extern struct ifnet loif[]; extern int if_index; extern struct ifaddr **ifnet_addrs; -#endif /* __APPLE_API_PRIVATE */ +extern struct ifnet *lo_ifp; + +int if_addmulti(struct ifnet *, const struct sockaddr *, struct ifmultiaddr **); +int if_allmulti(struct ifnet *, int); +void if_attach(struct ifnet *); +int if_delmultiaddr(struct ifmultiaddr *ifma, int locked); +int if_delmulti(struct ifnet *, const struct sockaddr *); +void if_down(struct ifnet *); +void if_route(struct ifnet *, int flag, int fam); +void if_unroute(struct ifnet *, int flag, int fam); +void if_up(struct ifnet *); +void if_updown(struct ifnet *ifp, int up); +/*void ifinit(void));*/ /* declared in systm.h for main( */ +int ifioctl(struct socket *, u_long, caddr_t, struct proc *); +int ifioctllocked(struct socket *, u_long, caddr_t, struct proc *); +struct ifnet *ifunit(const char *); +struct ifnet *if_withname(struct sockaddr *); -#ifndef __APPLE__ -void ether_ifattach __P((struct ifnet *, int)); -void ether_ifdetach __P((struct ifnet *, int)); -void ether_input __P((struct ifnet *, struct ether_header *, struct mbuf *)); -void ether_demux __P((struct ifnet *, struct ether_header *, struct mbuf *)); -int ether_output __P((struct ifnet *, - struct mbuf *, struct sockaddr *, struct rtentry *)); -int ether_output_frame __P((struct ifnet *, struct mbuf *)); -int ether_ioctl __P((struct ifnet *, int, caddr_t)); -#endif - -int if_addmulti __P((struct ifnet *, struct sockaddr *, - struct ifmultiaddr **)); -int if_allmulti __P((struct ifnet *, int)); -void if_attach __P((struct ifnet *)); -int if_delmultiaddr __P((struct ifmultiaddr *ifma)); -int if_delmulti __P((struct ifnet *, struct sockaddr *)); -void if_down __P((struct ifnet *)); -void if_route __P((struct ifnet *, int flag, int fam)); -void if_unroute __P((struct ifnet *, int flag, int fam)); -void if_up __P((struct ifnet *)); -/*void ifinit __P((void));*/ /* declared in systm.h for main() */ -int ifioctl __P((struct socket *, u_long, caddr_t, struct proc *)); -int ifpromisc __P((struct ifnet *, int)); -struct ifnet *ifunit __P((const char *)); -struct ifnet *if_withname __P((struct sockaddr *)); - -int if_poll_recv_slow __P((struct ifnet *ifp, int *quotap)); -void if_poll_xmit_slow __P((struct ifnet *ifp, int *quotap)); -void if_poll_throttle __P((void)); -void if_poll_unthrottle __P((void *)); -void if_poll_init __P((void)); -void if_poll __P((void)); -#ifdef KERNEL_PRIVATE void if_clone_attach(struct if_clone *); void if_clone_detach(struct if_clone *); -#endif KERNEL_PRIVATE - -struct ifaddr *ifa_ifwithaddr __P((struct sockaddr *)); -struct ifaddr *ifa_ifwithdstaddr __P((struct sockaddr *)); -struct ifaddr *ifa_ifwithnet __P((struct sockaddr *)); -struct ifaddr *ifa_ifwithroute __P((int, struct sockaddr *, - struct sockaddr *)); -struct ifaddr *ifaof_ifpforaddr __P((struct sockaddr *, struct ifnet *)); -void ifafree __P((struct ifaddr *)); -void ifaref __P((struct ifaddr *)); - -struct ifmultiaddr *ifmaof_ifpforaddr __P((struct sockaddr *, - struct ifnet *)); -#ifndef __APPLE__ -int if_simloop __P((struct ifnet *ifp, struct mbuf *m, - struct sockaddr *dst, int hlen)); -#endif - -#endif /* KERNEL */ - -#endif /* __APPLE_API_UNSTABLE */ +void ifnet_lock_assert(struct ifnet *ifp, int what); +void ifnet_lock_shared(struct ifnet *ifp); +void ifnet_lock_exclusive(struct ifnet *ifp); +void ifnet_lock_done(struct ifnet *ifp); + +void ifnet_head_lock_shared(void); +void ifnet_head_lock_exclusive(void); +void ifnet_head_done(void); + +void if_attach_ifa(struct ifnet * ifp, struct ifaddr *ifa); +void if_detach_ifa(struct ifnet * ifp, struct ifaddr *ifa); + +void ifma_reference(struct ifmultiaddr *ifma); +void ifma_release(struct ifmultiaddr *ifma); + +struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); +struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *); +struct ifaddr *ifa_ifwithnet(const struct sockaddr *); +struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *); +struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); +void ifafree(struct ifaddr *); +void ifaref(struct ifaddr *); + +struct ifmultiaddr *ifmaof_ifpforaddr(const struct sockaddr *, struct ifnet *); + +#ifdef BSD_KERNEL_PRIVATE +void if_data_internal_to_if_data(const struct if_data_internal *if_data_int, + struct if_data *if_data); +void if_data_internal_to_if_data64(const struct if_data_internal *if_data_int, + struct if_data64 *if_data64); +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* !_NET_IF_VAR_H_ */ diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 74f9e99ba..21e88cddd 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -79,63 +79,264 @@ #include #include #include +#include #include #include +#include #include +#include + #ifdef INET #include #include #endif #include +#include -#define ETHER_VLAN_ENCAP_LEN 4 /* len of 802.1Q VLAN encapsulation */ -#define IF_MAXUNIT 0x7fff /* historical value */ - -#define IFP2AC(p) ((struct arpcom *)p) - -#define VLAN_PROTO_FAMILY 0x766c616e /* 'vlan' */ +#define IF_MAXUNIT 0x7fff /* historical value */ #define VLANNAME "vlan" typedef int (bpf_callback_func)(struct ifnet *, struct mbuf *); typedef int (if_set_bpf_tap_func)(struct ifnet *ifp, int mode, bpf_callback_func * func); -struct vlan_mc_entry { - struct ether_addr mc_addr; - SLIST_ENTRY(vlan_mc_entry) mc_entries; -}; +/** + ** vlan locks + **/ +static __inline__ lck_grp_t * +my_lck_grp_alloc_init(const char * grp_name) +{ + lck_grp_t * grp; + lck_grp_attr_t * grp_attrs; + + grp_attrs = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attrs); + grp = lck_grp_alloc_init(grp_name, grp_attrs); + lck_grp_attr_free(grp_attrs); + return (grp); +} + +static __inline__ lck_mtx_t * +my_lck_mtx_alloc_init(lck_grp_t * lck_grp) +{ + lck_attr_t * lck_attrs; + lck_mtx_t * lck_mtx; + + lck_attrs = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attrs); + lck_mtx = lck_mtx_alloc_init(lck_grp, lck_attrs); + lck_attr_free(lck_attrs); + return (lck_mtx); +} + +static lck_mtx_t * vlan_lck_mtx; + +static __inline__ void +vlan_lock_init(void) +{ + lck_grp_t * vlan_lck_grp; + + vlan_lck_grp = my_lck_grp_alloc_init("if_vlan"); + vlan_lck_mtx = my_lck_mtx_alloc_init(vlan_lck_grp); +} + +static __inline__ void +vlan_assert_lock_held(void) +{ + lck_mtx_assert(vlan_lck_mtx, LCK_MTX_ASSERT_OWNED); + return; +} + +static __inline__ void +vlan_assert_lock_not_held(void) +{ + lck_mtx_assert(vlan_lck_mtx, LCK_MTX_ASSERT_NOTOWNED); + return; +} -struct ifvlan { - char ifv_name[IFNAMSIZ]; /* our unique id */ - struct ifnet *ifv_ifp; /* our interface */ - struct ifnet *ifv_p; /* parent interface of this vlan */ +static __inline__ void +vlan_lock(void) +{ + lck_mtx_lock(vlan_lck_mtx); + return; +} + +static __inline__ void +vlan_unlock(void) +{ + lck_mtx_unlock(vlan_lck_mtx); + return; +} + +/** + ** vlan structures, types + **/ +struct vlan_parent; +LIST_HEAD(vlan_parent_list, vlan_parent); +struct ifvlan; +LIST_HEAD(ifvlan_list, ifvlan); + +typedef struct vlan_parent { + LIST_ENTRY(vlan_parent) vlp_parent_list;/* list of parents */ + struct ifnet * vlp_ifp; /* interface */ + struct ifvlan_list vlp_vlan_list; /* list of VLAN's */ +#define VLPF_SUPPORTS_VLAN_MTU 0x1 +#define VLPF_CHANGE_IN_PROGRESS 0x2 +#define VLPF_DETACHING 0x4 + u_int32_t vlp_flags; + struct ifdevmtu vlp_devmtu; + UInt32 vlp_retain_count; +} vlan_parent, * vlan_parent_ref; + +struct ifvlan { + LIST_ENTRY(ifvlan) ifv_vlan_list; + char ifv_name[IFNAMSIZ]; /* our unique id */ + struct ifnet * ifv_ifp; /* our interface */ + vlan_parent_ref ifv_vlp; /* parent information */ struct ifv_linkmib { - int ifvm_parent; - int ifvm_encaplen; /* encapsulation length */ - int ifvm_mtufudge; /* MTU fudged by this much */ - int ifvm_mintu; /* min transmission unit */ - u_int16_t ifvm_proto; /* encapsulation ethertype */ - u_int16_t ifvm_tag; /* tag to apply on packets leaving if */ + u_int16_t ifvm_encaplen;/* encapsulation length */ + u_int16_t ifvm_mtufudge;/* MTU fudged by this much */ + u_int16_t ifvm_proto; /* encapsulation ethertype */ + u_int16_t ifvm_tag; /* tag to apply on packets leaving if */ } ifv_mib; - SLIST_HEAD(__vlan_mchead, vlan_mc_entry) vlan_mc_listhead; - LIST_ENTRY(ifvlan) ifv_list; - int ifv_flags; - int ifv_detaching; - u_long ifv_filter_id; - int ifv_filter_valid; - bpf_callback_func * ifv_bpf_input; - bpf_callback_func * ifv_bpf_output; + struct multicast_list ifv_multicast; +#define IFVF_PROMISC 0x1 /* promiscuous mode enabled */ +#define IFVF_DETACHING 0x2 /* interface is detaching */ +#define IFVF_READY 0x4 /* interface is ready */ + u_int32_t ifv_flags; + bpf_packet_func ifv_bpf_input; + bpf_packet_func ifv_bpf_output; }; -#define ifv_tag ifv_mib.ifvm_tag +typedef struct ifvlan * ifvlan_ref; + +typedef struct vlan_globals_s { + struct vlan_parent_list parent_list; + int verbose; +} * vlan_globals_ref; + +static vlan_globals_ref g_vlan; + +#define ifv_tag ifv_mib.ifvm_tag #define ifv_encaplen ifv_mib.ifvm_encaplen #define ifv_mtufudge ifv_mib.ifvm_mtufudge -#define ifv_mintu ifv_mib.ifvm_mintu -#define IFVF_PROMISC 0x01 /* promiscuous mode enabled */ + +/** + ** vlan_parent_ref vlp_flags in-lines + **/ +static __inline__ int +vlan_parent_flags_supports_vlan_mtu(vlan_parent_ref vlp) +{ + return ((vlp->vlp_flags & VLPF_SUPPORTS_VLAN_MTU) != 0); +} + +static __inline__ void +vlan_parent_flags_set_supports_vlan_mtu(vlan_parent_ref vlp) +{ + vlp->vlp_flags |= VLPF_SUPPORTS_VLAN_MTU; + return; +} + +static __inline__ void +vlan_parent_flags_clear_supports_vlan_mtu(vlan_parent_ref vlp) +{ + vlp->vlp_flags &= ~VLPF_SUPPORTS_VLAN_MTU; + return; +} + +static __inline__ int +vlan_parent_flags_change_in_progress(vlan_parent_ref vlp) +{ + return ((vlp->vlp_flags & VLPF_CHANGE_IN_PROGRESS) != 0); +} + +static __inline__ void +vlan_parent_flags_set_change_in_progress(vlan_parent_ref vlp) +{ + vlp->vlp_flags |= VLPF_CHANGE_IN_PROGRESS; + return; +} + +static __inline__ void +vlan_parent_flags_clear_change_in_progress(vlan_parent_ref vlp) +{ + vlp->vlp_flags &= ~VLPF_CHANGE_IN_PROGRESS; + return; +} + +static __inline__ int +vlan_parent_flags_detaching(struct vlan_parent * vlp) +{ + return ((vlp->vlp_flags & VLPF_DETACHING) != 0); +} + +static __inline__ void +vlan_parent_flags_set_detaching(struct vlan_parent * vlp) +{ + vlp->vlp_flags |= VLPF_DETACHING; + return; +} + + +/** + ** ifvlan_flags in-lines routines + **/ +static __inline__ int +ifvlan_flags_promisc(ifvlan_ref ifv) +{ + return ((ifv->ifv_flags & IFVF_PROMISC) != 0); +} + +static __inline__ void +ifvlan_flags_set_promisc(ifvlan_ref ifv) +{ + ifv->ifv_flags |= IFVF_PROMISC; + return; +} + +static __inline__ void +ifvlan_flags_clear_promisc(ifvlan_ref ifv) +{ + ifv->ifv_flags &= ~IFVF_PROMISC; + return; +} + +static __inline__ int +ifvlan_flags_ready(ifvlan_ref ifv) +{ + return ((ifv->ifv_flags & IFVF_READY) != 0); +} + +static __inline__ void +ifvlan_flags_set_ready(ifvlan_ref ifv) +{ + ifv->ifv_flags |= IFVF_READY; + return; +} + +static __inline__ void +ifvlan_flags_clear_ready(ifvlan_ref ifv) +{ + ifv->ifv_flags &= ~IFVF_READY; + return; +} + +static __inline__ int +ifvlan_flags_detaching(ifvlan_ref ifv) +{ + return ((ifv->ifv_flags & IFVF_DETACHING) != 0); +} + +static __inline__ void +ifvlan_flags_set_detaching(ifvlan_ref ifv) +{ + ifv->ifv_flags |= IFVF_DETACHING; + return; +} #if 0 SYSCTL_DECL(_net_link); @@ -145,69 +346,102 @@ SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); #define M_VLAN M_DEVBUF -MALLOC_DEFINE(M_VLAN, VLANNAME, "802.1Q Virtual LAN Interface"); - -static LIST_HEAD(, ifvlan) ifv_list; - -#if 0 -/* - * Locking: one lock is used to guard both the ifv_list and modification - * to vlan data structures. We are rather conservative here; probably - * more than necessary. - */ -static struct mtx ifv_mtx; -#define VLAN_LOCK_INIT() mtx_init(&ifv_mtx, VLANNAME, NULL, MTX_DEF) -#define VLAN_LOCK_DESTROY() mtx_destroy(&ifv_mtx) -#define VLAN_LOCK_ASSERT() mtx_assert(&ifv_mtx, MA_OWNED) -#define VLAN_LOCK() mtx_lock(&ifv_mtx) -#define VLAN_UNLOCK() mtx_unlock(&ifv_mtx) -#else -#define VLAN_LOCK_INIT() -#define VLAN_LOCK_DESTROY() -#define VLAN_LOCK_ASSERT() -#define VLAN_LOCK() -#define VLAN_UNLOCK() -#endif 0 - static int vlan_clone_create(struct if_clone *, int); static void vlan_clone_destroy(struct ifnet *); +static int vlan_input(struct mbuf *m, char *frame_header, struct ifnet *ifp, + u_long protocol_family, int sync_ok); static int vlan_output(struct ifnet *ifp, struct mbuf *m); -static void vlan_ifinit(void *foo); -static int vlan_ioctl(struct ifnet *ifp, u_long cmd, void * addr); -static int vlan_set_bpf_tap(struct ifnet * ifp, int mode, - bpf_callback_func * func); +static int vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * addr); +static int vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, + bpf_packet_func func); static int vlan_attach_protocol(struct ifnet *ifp); static int vlan_detach_protocol(struct ifnet *ifp); -static int vlan_attach_filter(struct ifnet * ifp, u_long * filter_id); -static int vlan_detach_filter(u_long filter_id); static int vlan_setmulti(struct ifnet *ifp); static int vlan_unconfig(struct ifnet *ifp); -static int vlan_config(struct ifvlan *ifv, struct ifnet *p, int tag); -static int vlan_if_free(struct ifnet * ifp); +static int vlan_config(struct ifnet * ifp, struct ifnet * p, int tag); +static void vlan_if_free(struct ifnet * ifp); +static void vlan_remove(ifvlan_ref ifv); +static void vlan_if_detach(struct ifnet * ifp); +static int vlan_new_mtu(struct ifnet * ifp, int mtu); static struct if_clone vlan_cloner = IF_CLONE_INITIALIZER(VLANNAME, - vlan_clone_create, vlan_clone_destroy, 0, IF_MAXUNIT); + vlan_clone_create, + vlan_clone_destroy, + 0, + IF_MAXUNIT); +static void interface_link_event(struct ifnet * ifp, u_long event_code); +static void vlan_parent_link_event(vlan_parent_ref vlp, + u_long event_code); +extern int dlil_input_packet(struct ifnet *ifp, struct mbuf *m, char *frame_header); + +static int +vlan_globals_init(void) +{ + vlan_globals_ref v; + + vlan_assert_lock_not_held(); + + if (g_vlan != NULL) { + return (0); + } + v = _MALLOC(sizeof(*v), M_VLAN, M_WAITOK); + if (v != NULL) { + LIST_INIT(&v->parent_list); + v->verbose = 0; + } + vlan_lock(); + if (g_vlan != NULL) { + vlan_unlock(); + if (v != NULL) { + _FREE(v, M_VLAN); + } + return (0); + } + g_vlan = v; + vlan_unlock(); + if (v == NULL) { + return (ENOMEM); + } + return (0); +} + +static int +siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p) +{ + struct ifreq ifr; + int error; + + bzero(&ifr, sizeof(ifr)); + error = dlil_ioctl(0, ifp, SIOCGIFDEVMTU, (caddr_t)&ifr); + if (error == 0) { + *ifdm_p = ifr.ifr_devmtu; + } + return (error); +} -static if_set_bpf_tap_func nop_if_bpf; -static int nop_if_free(struct ifnet *); -static int nop_if_ioctl(struct ifnet *, u_long, void *); -static int nop_if_output(struct ifnet * ifp, struct mbuf * m); +static int +siocsifaltmtu(struct ifnet * ifp, int mtu) +{ + struct ifreq ifr; -static void interface_link_event(struct ifnet * ifp, u_long event_code); + bzero(&ifr, sizeof(ifr)); + ifr.ifr_mtu = mtu; + return (dlil_ioctl(0, ifp, SIOCSIFALTMTU, (caddr_t)&ifr)); +} static __inline__ void vlan_bpf_output(struct ifnet * ifp, struct mbuf * m, - bpf_callback_func func) + bpf_packet_func func) { if (func != NULL) { - func(ifp, m); + (*func)(ifp, m); } return; } static __inline__ void vlan_bpf_input(struct ifnet * ifp, struct mbuf * m, - bpf_callback_func func, char * frame_header, + bpf_packet_func func, char * frame_header, int frame_header_len, int encap_len) { if (func != NULL) { @@ -217,7 +451,7 @@ vlan_bpf_input(struct ifnet * ifp, struct mbuf * m, } m->m_data -= frame_header_len; m->m_len += frame_header_len; - func(ifp, m); + (*func)(ifp, m); m->m_data += frame_header_len; m->m_len -= frame_header_len; if (encap_len > 0) { @@ -229,7 +463,7 @@ vlan_bpf_input(struct ifnet * ifp, struct mbuf * m, } static struct ifaddr * -ifaddr_byindex(unsigned int i) +ifaddr_byindex(int i) { if (i > if_index || i == 0) { return (NULL); @@ -237,6 +471,100 @@ ifaddr_byindex(unsigned int i) return (ifnet_addrs[i - 1]); } +/** + ** vlan_parent synchronization routines + **/ +static __inline__ void +vlan_parent_retain(vlan_parent_ref vlp) +{ + OSIncrementAtomic(&vlp->vlp_retain_count); +} + +static __inline__ void +vlan_parent_release(vlan_parent_ref vlp) +{ + UInt32 old_retain_count; + + old_retain_count = OSDecrementAtomic(&vlp->vlp_retain_count); + switch (old_retain_count) { + case 0: + panic("vlan_parent_release: retain count is 0\n"); + break; + case 1: + if (g_vlan->verbose) { + struct ifnet * ifp = vlp->vlp_ifp; + printf("vlan_parent_release(%s%d)\n", ifp->if_name, + ifp->if_unit); + } + FREE(vlp, M_VLAN); + break; + default: + break; + } + return; +} + +/* + * Function: vlan_parent_wait + * Purpose: + * Allows a single thread to gain exclusive access to the vlan_parent + * data structure. Some operations take a long time to complete, + * and some have side-effects that we can't predict. Holding the + * vlan_lock() across such operations is not possible. + * + * Notes: + * Before calling, you must be holding the vlan_lock and have taken + * a reference on the vlan_parent_ref. + */ +static void +vlan_parent_wait(vlan_parent_ref vlp, const char * msg) +{ + int waited = 0; + + /* other add/remove/multicast-change in progress */ + while (vlan_parent_flags_change_in_progress(vlp)) { + if (g_vlan->verbose) { + struct ifnet * ifp = vlp->vlp_ifp; + + printf("%s%d: %s msleep\n", ifp->if_name, ifp->if_unit, msg); + } + waited = 1; + (void)msleep(vlp, vlan_lck_mtx, PZERO, msg, 0); + } + /* prevent other vlan parent remove/add from taking place */ + vlan_parent_flags_set_change_in_progress(vlp); + if (g_vlan->verbose && waited) { + struct ifnet * ifp = vlp->vlp_ifp; + + printf("%s: %s woke up\n", ifp->if_name, ifp->if_unit, msg); + } + return; +} + +/* + * Function: vlan_parent_signal + * Purpose: + * Allows the thread that previously invoked vlan_parent_wait() to + * give up exclusive access to the vlan_parent data structure, and wake up + * any other threads waiting to access + * Notes: + * Before calling, you must be holding the vlan_lock and have taken + * a reference on the vlan_parent_ref. + */ +static void +vlan_parent_signal(vlan_parent_ref vlp, const char * msg) +{ + vlan_parent_flags_clear_change_in_progress(vlp); + wakeup((caddr_t)vlp); + if (g_vlan->verbose) { + struct ifnet * ifp = vlp->vlp_ifp; + + printf("%s%d: %s wakeup\n", ifp->if_name, ifp->if_unit, msg); + } + return; +} + + /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the @@ -246,154 +574,232 @@ ifaddr_byindex(unsigned int i) * to avoid this: there really is only one physical interface. */ static int -vlan_setmulti(struct ifnet *ifp) +vlan_setmulti(struct ifnet * ifp) { - struct ifnet *p; - struct ifmultiaddr *ifma, *rifma = NULL; - struct ifvlan *sc; - struct vlan_mc_entry *mc = NULL; - struct sockaddr_dl sdl; - int error; - - /* Find the parent. */ - sc = ifp->if_private; - p = sc->ifv_p; - if (p == NULL) { - /* no parent, so no need to program the multicast filter */ - return (0); + int error = 0; + ifvlan_ref ifv; + struct ifnet * p; + vlan_parent_ref vlp; + + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + goto unlock_done; } - - bzero((char *)&sdl, sizeof sdl); - sdl.sdl_len = sizeof sdl; - sdl.sdl_family = AF_LINK; - sdl.sdl_index = p->if_index; - sdl.sdl_type = IFT_ETHER; - sdl.sdl_alen = ETHER_ADDR_LEN; - - /* First, remove any existing filter entries. */ - while (SLIST_FIRST(&sc->vlan_mc_listhead) != NULL) { - mc = SLIST_FIRST(&sc->vlan_mc_listhead); - bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN); - error = if_delmulti(p, (struct sockaddr *)&sdl); - if (error) - return(error); - SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); - FREE(mc, M_VLAN); + vlp = ifv->ifv_vlp; + if (vlp == NULL) { + /* no parent, no need to program the multicast filter */ + goto unlock_done; + } + if (vlan_parent_flags_detaching(vlp)) { + goto unlock_done; } + vlan_parent_retain(vlp); + vlan_parent_wait(vlp, "vlan_setmulti"); - /* Now program new ones. */ - LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_LINK) - continue; - mc = _MALLOC(sizeof(struct vlan_mc_entry), M_VLAN, M_WAITOK); - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - (char *)&mc->mc_addr, ETHER_ADDR_LEN); - SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); - bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), - LLADDR(&sdl), ETHER_ADDR_LEN); - error = if_addmulti(p, (struct sockaddr *)&sdl, &rifma); - if (error) - return(error); + /* check again, things could have changed */ + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + goto signal_done; } + if (ifv->ifv_vlp != vlp) { + /* vlan parent changed */ + goto signal_done; + } + if (vlp == NULL) { + /* no parent, no need to program the multicast filter */ + goto signal_done; + } + p = vlp->vlp_ifp; + vlan_unlock(); - return(0); -} + /* update parent interface with our multicast addresses */ + error = multicast_list_program(&ifv->ifv_multicast, ifp, p); -#if 0 -/* - * VLAN support can be loaded as a module. The only place in the - * system that's intimately aware of this is ether_input. We hook - * into this code through vlan_input_p which is defined there and - * set here. Noone else in the system should be aware of this so - * we use an explicit reference here. - * - * NB: Noone should ever need to check if vlan_input_p is null or - * not. This is because interfaces have a count of the number - * of active vlans (if_nvlans) and this should never be bumped - * except by vlan_config--which is in this module so therefore - * the module must be loaded and vlan_input_p must be non-NULL. - */ -extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); + vlan_lock(); -static int -vlan_modevent(module_t mod, int type, void *data) -{ - - switch (type) { - case MOD_LOAD: - LIST_INIT(&ifv_list); - VLAN_LOCK_INIT(); - vlan_input_p = vlan_input; - if_clone_attach(&vlan_cloner); - break; - case MOD_UNLOAD: - if_clone_detach(&vlan_cloner); - vlan_input_p = NULL; - while (!LIST_EMPTY(&ifv_list)) - vlan_clone_destroy(LIST_FIRST(&ifv_list)->ifv_ifp); - VLAN_LOCK_DESTROY(); - break; - } - return 0; -} + signal_done: + vlan_parent_signal(vlp, "vlan_setmulti"); -static moduledata_t vlan_mod = { - "if_vlan", - vlan_modevent, - 0 -}; + unlock_done: + vlan_unlock(); + return (error); +} -DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +/** + ** vlan_parent list manipulation/lookup routines + **/ +static vlan_parent_ref +parent_list_lookup(struct ifnet * p) +{ + vlan_parent_ref vlp; -#endif 0 + LIST_FOREACH(vlp, &g_vlan->parent_list, vlp_parent_list) { + if (vlp->vlp_ifp == p) { + return (vlp); + } + } + return (NULL); +} -static struct ifvlan * -vlan_lookup_ifp_and_tag(struct ifnet * ifp, int tag) +static ifvlan_ref +vlan_parent_lookup_tag(vlan_parent_ref vlp, int tag) { - struct ifvlan * ifv; + ifvlan_ref ifv; - LIST_FOREACH(ifv, &ifv_list, ifv_list) { - if (ifp == ifv->ifv_p && tag == ifv->ifv_tag) { + LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) { + if (tag == ifv->ifv_tag) { return (ifv); } } return (NULL); } -static struct ifvlan * -vlan_lookup_ifp(struct ifnet * ifp) +static ifvlan_ref +vlan_lookup_parent_and_tag(struct ifnet * p, int tag) { - struct ifvlan * ifv; + vlan_parent_ref vlp; - LIST_FOREACH(ifv, &ifv_list, ifv_list) { - if (ifp == ifv->ifv_p) { - return (ifv); - } + vlp = parent_list_lookup(p); + if (vlp != NULL) { + return (vlan_parent_lookup_tag(vlp, tag)); } return (NULL); } +static int +vlan_parent_find_max_mtu(vlan_parent_ref vlp, ifvlan_ref exclude_ifv) +{ + int max_mtu = 0; + ifvlan_ref ifv; + + LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) { + int req_mtu; + + if (exclude_ifv == ifv) { + continue; + } + req_mtu = ifv->ifv_ifp->if_mtu + ifv->ifv_mtufudge; + if (req_mtu > max_mtu) { + max_mtu = req_mtu; + } + } + return (max_mtu); +} + +/* + * Function: vlan_parent_create + * Purpose: + * Create a vlan_parent structure to hold the VLAN's for the given + * interface. Add it to the list of VLAN parents. + */ +static int +vlan_parent_create(struct ifnet * p, vlan_parent_ref * ret_vlp) +{ + int error; + vlan_parent_ref vlp; + + *ret_vlp = NULL; + vlp = _MALLOC(sizeof(*vlp), M_VLAN, M_WAITOK); + if (vlp == NULL) { + return (ENOMEM); + } + bzero(vlp, sizeof(*vlp)); + error = siocgifdevmtu(p, &vlp->vlp_devmtu); + if (error != 0) { + printf("vlan_parent_create (%s%d): siocgifdevmtu failed, %d\n", + p->if_name, p->if_unit, error); + FREE(vlp, M_VLAN); + return (error); + } + LIST_INIT(&vlp->vlp_vlan_list); + vlp->vlp_ifp = p; + vlan_parent_retain(vlp); + if (p->if_hwassist + & (IF_HWASSIST_VLAN_MTU | IF_HWASSIST_VLAN_TAGGING)) { + vlan_parent_flags_set_supports_vlan_mtu(vlp); + } + *ret_vlp = vlp; + return (0); +} + +static void +vlan_parent_remove_all_vlans(vlan_parent_ref vlp) +{ + ifvlan_ref ifv; + struct ifnet * p; + + vlan_assert_lock_held(); + + while ((ifv = LIST_FIRST(&vlp->vlp_vlan_list)) != NULL) { + vlan_remove(ifv); + vlan_unlock(); + vlan_if_detach(ifv->ifv_ifp); + vlan_lock(); + } + + /* the vlan parent has no more VLAN's */ + p = vlp->vlp_ifp; + ifnet_lock_exclusive(p); + p->if_eflags &= ~IFEF_VLAN; + ifnet_lock_done(p); + LIST_REMOVE(vlp, vlp_parent_list); + vlan_unlock(); + vlan_parent_release(vlp); + vlan_lock(); + + return; +} + +static __inline__ int +vlan_parent_no_vlans(vlan_parent_ref vlp) +{ + return (LIST_EMPTY(&vlp->vlp_vlan_list)); +} + +static void +vlan_parent_add_vlan(vlan_parent_ref vlp, ifvlan_ref ifv, int tag) +{ + LIST_INSERT_HEAD(&vlp->vlp_vlan_list, ifv, ifv_vlan_list); + ifv->ifv_vlp = vlp; + ifv->ifv_tag = tag; + return; +} + +static void +vlan_parent_remove_vlan(__unused vlan_parent_ref vlp, ifvlan_ref ifv) +{ + ifv->ifv_vlp = NULL; + LIST_REMOVE(ifv, ifv_vlan_list); + return; +} + static void vlan_clone_attach(void) { if_clone_attach(&vlan_cloner); + vlan_lock_init(); return; } static int vlan_clone_create(struct if_clone *ifc, int unit) { - int error; - struct ifvlan *ifv; - struct ifnet *ifp; + int error; + ifvlan_ref ifv; + struct ifnet * ifp; + error = vlan_globals_init(); + if (error != 0) { + return (error); + } ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK); bzero(ifv, sizeof(struct ifvlan)); - SLIST_INIT(&ifv->vlan_mc_listhead); + multicast_list_init(&ifv->ifv_multicast); /* use the interface name as the unique id for ifp recycle */ - if (snprintf(ifv->ifv_name, sizeof(ifv->ifv_name), "%s%d", - ifc->ifc_name, unit) >= sizeof(ifv->ifv_name)) { + if ((unsigned int)snprintf(ifv->ifv_name, sizeof(ifv->ifv_name), "%s%d", + ifc->ifc_name, unit) >= sizeof(ifv->ifv_name)) { FREE(ifv, M_VLAN); return (EINVAL); } @@ -405,9 +811,7 @@ vlan_clone_create(struct if_clone *ifc, int unit) FREE(ifv, M_VLAN); return (error); } - ifv->ifv_ifp = ifp; - ifp->if_private = ifv; - ifp->if_name = (char *)ifc->ifc_name; + ifp->if_name = ifc->ifc_name; ifp->if_unit = unit; ifp->if_family = APPLE_IF_FAM_VLAN; @@ -420,49 +824,45 @@ vlan_clone_create(struct if_clone *ifc, int unit) ifp->if_ioctl = vlan_ioctl; ifp->if_set_bpf_tap = vlan_set_bpf_tap; - ifp->if_free = nop_if_free; - ifp->if_output = nop_if_output; + ifp->if_free = vlan_if_free; + ifp->if_output = vlan_output; ifp->if_hwassist = 0; ifp->if_addrlen = ETHER_ADDR_LEN; /* XXX ethernet specific */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; + + /* XXX ethernet specific */ + ifp->if_broadcast.length = ETHER_ADDR_LEN; + bcopy(etherbroadcastaddr, ifp->if_broadcast.u.buffer, ETHER_ADDR_LEN); + error = dlil_if_attach(ifp); if (error) { dlil_if_release(ifp); FREE(ifv, M_VLAN); return (error); } + ifp->if_private = ifv; + ifv->ifv_ifp = ifp; /* attach as ethernet */ bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header)); - - VLAN_LOCK(); - LIST_INSERT_HEAD(&ifv_list, ifv, ifv_list); - VLAN_UNLOCK(); - return (0); } static void -vlan_remove(struct ifvlan * ifv) +vlan_remove(ifvlan_ref ifv) { - VLAN_LOCK_ASSERT(); - ifv->ifv_detaching = 1; + vlan_assert_lock_held(); + ifvlan_flags_set_detaching(ifv); vlan_unconfig(ifv->ifv_ifp); - LIST_REMOVE(ifv, ifv_list); return; } static void vlan_if_detach(struct ifnet * ifp) { - ifp->if_output = nop_if_output; - ifp->if_ioctl = nop_if_ioctl; - ifp->if_set_bpf_tap = &nop_if_bpf; - if (dlil_if_detach(ifp) == DLIL_WAIT_FOR_FREE) { - ifp->if_free = vlan_if_free; - } else { + if (dlil_if_detach(ifp) != DLIL_WAIT_FOR_FREE) { vlan_if_free(ifp); } return; @@ -471,27 +871,35 @@ vlan_if_detach(struct ifnet * ifp) static void vlan_clone_destroy(struct ifnet *ifp) { - struct ifvlan *ifv = ifp->if_private; + ifvlan_ref ifv; + vlan_lock(); + ifv = ifp->if_private; if (ifv == NULL || ifp->if_type != IFT_L2VLAN) { + vlan_unlock(); return; } - VLAN_LOCK(); - if (ifv->ifv_detaching) { - VLAN_UNLOCK(); + if (ifvlan_flags_detaching(ifv)) { + vlan_unlock(); return; } vlan_remove(ifv); - VLAN_UNLOCK(); + vlan_unlock(); vlan_if_detach(ifp); return; } static int -vlan_set_bpf_tap(struct ifnet * ifp, int mode, bpf_callback_func * func) +vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func func) { - struct ifvlan *ifv = ifp->if_private; + ifvlan_ref ifv; + vlan_lock(); + ifv = ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + return (ENODEV); + } switch (mode) { case BPF_TAP_DISABLE: ifv->ifv_bpf_input = ifv->ifv_bpf_output = NULL; @@ -511,42 +919,52 @@ vlan_set_bpf_tap(struct ifnet * ifp, int mode, bpf_callback_func * func) default: break; } + vlan_unlock(); return 0; } -static void -vlan_ifinit(void *foo) -{ - return; -} - static int -vlan_output(struct ifnet *ifp, struct mbuf *m) +vlan_output(struct ifnet * ifp, struct mbuf * m) { - struct ifvlan *ifv; - struct ifnet *p; - struct ether_vlan_header *evl; - int soft_vlan; - - ifv = ifp->if_private; - p = ifv->ifv_p; - if (p == NULL) { - return (nop_if_output(ifp, m)); - } + bpf_packet_func bpf_func; + struct ether_vlan_header * evl; + int encaplen; + ifvlan_ref ifv; + struct ifnet * p; + int soft_vlan; + u_short tag; + vlan_parent_ref vlp; + if (m == 0) { - printf("%s: NULL output mbuf\n", ifv->ifv_name); - return (EINVAL); + return (0); } if ((m->m_flags & M_PKTHDR) == 0) { - printf("%s: M_PKTHDR bit not set\n", ifv->ifv_name); - m_freem(m); - return (EINVAL); + m_freem_list(m); + return (0); } - ifp->if_obytes += m->m_pkthdr.len; - ifp->if_opackets++; + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv) + || ifvlan_flags_ready(ifv) == 0) { + vlan_unlock(); + m_freem_list(m); + return (0); + } + vlp = ifv->ifv_vlp; + if (vlp == NULL) { + vlan_unlock(); + m_freem_list(m); + return (0); + } + p = vlp->vlp_ifp; + (void)ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0); soft_vlan = (p->if_hwassist & IF_HWASSIST_VLAN_TAGGING) == 0; - vlan_bpf_output(ifp, m, ifv->ifv_bpf_output); - + bpf_func = ifv->ifv_bpf_output; + tag = ifv->ifv_tag; + encaplen = ifv->ifv_encaplen; + vlan_unlock(); + vlan_bpf_output(ifp, m, bpf_func); + /* do not run parent's if_output() if the parent is not up */ if ((p->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING)) { m_freem(m); @@ -563,22 +981,22 @@ vlan_output(struct ifnet *ifp, struct mbuf *m) */ if (soft_vlan == 0) { m->m_pkthdr.csum_flags |= CSUM_VLAN_TAG_VALID; - m->m_pkthdr.vlan_tag = ifv->ifv_tag; + m->m_pkthdr.vlan_tag = tag; } else { - M_PREPEND(m, ifv->ifv_encaplen, M_DONTWAIT); + M_PREPEND(m, encaplen, M_DONTWAIT); if (m == NULL) { - printf("%s: unable to prepend VLAN header\n", - ifv->ifv_name); - ifp->if_ierrors++; + printf("%s%d: unable to prepend VLAN header\n", ifp->if_name, + ifp->if_unit); + ifp->if_oerrors++; return (0); } /* M_PREPEND takes care of m_len, m_pkthdr.len for us */ - if (m->m_len < sizeof(*evl)) { + if (m->m_len < (int)sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) { - printf("%s: cannot pullup VLAN header\n", - ifv->ifv_name); - ifp->if_ierrors++; + printf("%s%d: unable to pullup VLAN header\n", ifp->if_name, + ifp->if_unit); + ifp->if_oerrors++; return (0); } } @@ -587,31 +1005,25 @@ vlan_output(struct ifnet *ifp, struct mbuf *m) * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ - bcopy(mtod(m, char *) + ifv->ifv_encaplen, + bcopy(mtod(m, char *) + encaplen, mtod(m, char *), ETHER_HDR_LEN); evl = mtod(m, struct ether_vlan_header *); evl->evl_proto = evl->evl_encap_proto; evl->evl_encap_proto = htons(ETHERTYPE_VLAN); - evl->evl_tag = htons(ifv->ifv_tag); - m->m_pkthdr.len += ifv->ifv_encaplen; + evl->evl_tag = htons(tag); } - - /* - * Send it, precisely as ether_output() would have. - * We are already running at splimp. - */ - return ((*p->if_output)(p, m)); + return dlil_output(p, 0, m, NULL, NULL, 1); } -extern int -vlan_demux(struct ifnet * ifp, struct mbuf * m, - char * frame_header, struct if_proto * * proto) +static int +vlan_input(struct mbuf * m, char * frame_header, struct ifnet * p, + __unused u_long protocol_family, __unused int sync_ok) { - register struct ether_header *eh = (struct ether_header *)frame_header; - struct ether_vlan_header *evl; - struct ifvlan *ifv = NULL; - int soft_vlan = 0; - u_int tag; + bpf_packet_func bpf_func = NULL; + struct ether_vlan_header * evl; + struct ifnet * ifp = NULL; + int soft_vlan = 0; + u_int tag = 0; if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) { /* @@ -623,21 +1035,20 @@ vlan_demux(struct ifnet * ifp, struct mbuf * m, m->m_pkthdr.vlan_tag = 0; } else { soft_vlan = 1; - - switch (ifp->if_type) { + switch (p->if_type) { case IFT_ETHER: if (m->m_len < ETHER_VLAN_ENCAP_LEN) { m_freem(m); - return (EJUSTRETURN); + return 0; } evl = (struct ether_vlan_header *)frame_header; if (ntohs(evl->evl_proto) == ETHERTYPE_VLAN) { /* don't allow VLAN within VLAN */ m_freem(m); - return (EJUSTRETURN); + return (0); } tag = EVL_VLANOFTAG(ntohs(evl->evl_tag)); - + /* * Restore the original ethertype. We'll remove * the encapsulation after we've found the vlan @@ -647,28 +1058,34 @@ vlan_demux(struct ifnet * ifp, struct mbuf * m, break; default: printf("vlan_demux: unsupported if type %u", - ifp->if_type); + p->if_type); m_freem(m); - return (EJUSTRETURN); + return 0; break; } } if (tag != 0) { - if (ifp->if_nvlans == 0) { + ifvlan_ref ifv; + + if ((p->if_eflags & IFEF_VLAN) == 0) { /* don't bother looking through the VLAN list */ m_freem(m); - ifp->if_noproto++; - return (EJUSTRETURN); + return 0; + } + vlan_lock(); + ifv = vlan_lookup_parent_and_tag(p, tag); + if (ifv != NULL) { + ifp = ifv->ifv_ifp; } - VLAN_LOCK(); - ifv = vlan_lookup_ifp_and_tag(ifp, tag); - if (ifv == NULL || (ifv->ifv_ifp->if_flags & IFF_UP) == 0) { - VLAN_UNLOCK(); + if (ifv == NULL + || ifvlan_flags_ready(ifv) == 0 + || (ifp->if_flags & IFF_UP) == 0) { + vlan_unlock(); m_freem(m); - ifp->if_noproto++; - return (EJUSTRETURN); + return 0; } - VLAN_UNLOCK(); /* XXX extend below? */ + bpf_func = ifv->ifv_bpf_input; + vlan_unlock(); } if (soft_vlan) { /* @@ -682,105 +1099,137 @@ vlan_demux(struct ifnet * ifp, struct mbuf * m, m->m_pkthdr.csum_flags = 0; /* can't trust hardware checksum */ } if (tag != 0) { - /* we found a vlan interface above, so send it up */ - m->m_pkthdr.rcvif = ifv->ifv_ifp; - ifv->ifv_ifp->if_ipackets++; - ifv->ifv_ifp->if_ibytes += m->m_pkthdr.len; - - vlan_bpf_input(ifv->ifv_ifp, m, ifv->ifv_bpf_input, frame_header, - ETHER_HDR_LEN, soft_vlan ? ETHER_VLAN_ENCAP_LEN : 0); - - /* Pass it back through the parent's demux routine. */ - return ((*ifp->if_demux)(ifv->ifv_ifp, m, frame_header, proto)); + m->m_pkthdr.rcvif = ifp; + (void)ifnet_stat_increment_in(ifp, 1, + m->m_pkthdr.len + ETHER_HDR_LEN, 0); + vlan_bpf_input(ifp, m, bpf_func, frame_header, ETHER_HDR_LEN, + soft_vlan ? ETHER_VLAN_ENCAP_LEN : 0); + /* We found a vlan interface, inject on that interface. */ + dlil_input_packet(ifp, m, frame_header); + } else { + /* Send priority-tagged packet up through the parent */ + dlil_input_packet(p, m, frame_header); } - /* Pass it back through calling demux routine. */ - return ((*ifp->if_demux)(ifp, m, frame_header, proto)); + return 0; } +#define VLAN_CONFIG_PROGRESS_VLP_RETAINED 0x1 +#define VLAN_CONFIG_PROGRESS_IN_LIST 0x2 + static int -vlan_config(struct ifvlan *ifv, struct ifnet *p, int tag) +vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) { - struct ifnet * ifp; - struct ifaddr *ifa1, *ifa2; - struct sockaddr_dl *sdl1, *sdl2; - int supports_vlan_mtu = 0; + int error; + int first_vlan = 0; + ifvlan_ref ifv = NULL; + struct ifaddr * ifa1; + struct ifaddr * ifa2; + vlan_parent_ref new_vlp = NULL; + int need_vlp_release = 0; + u_int32_t progress = 0; + struct sockaddr_dl *sdl1; + struct sockaddr_dl *sdl2; + vlan_parent_ref vlp = NULL; + + /* pre-allocate space for vlan_parent, in case we're first */ + error = vlan_parent_create(p, &new_vlp); + if (error != 0) { + return (error); + } - VLAN_LOCK_ASSERT(); - if (p->if_data.ifi_type != IFT_ETHER) - return EPROTONOSUPPORT; - if (ifv->ifv_p != NULL || ifv->ifv_detaching) { - return EBUSY; + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv != NULL && ifv->ifv_vlp != NULL) { + vlan_unlock(); + vlan_parent_release(new_vlp); + return (EBUSY); } - if (vlan_lookup_ifp_and_tag(p, tag) != NULL) { - /* already a VLAN with that tag on this interface */ - return (EADDRINUSE); + vlp = parent_list_lookup(p); + if (vlp != NULL) { + if (vlan_parent_lookup_tag(vlp, tag) != NULL) { + /* already a VLAN with that tag on this interface */ + error = EADDRINUSE; + goto unlock_done; + } + } + else { + /* we're the first VLAN on this interface */ + LIST_INSERT_HEAD(&g_vlan->parent_list, new_vlp, vlp_parent_list); + vlp = new_vlp; } - ifp = ifv->ifv_ifp; - ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; - ifv->ifv_mintu = ETHERMIN; - ifv->ifv_flags = 0; - /* - * If the parent supports the VLAN_MTU capability, - * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, - * enable it. - */ - if (p->if_hwassist & (IF_HWASSIST_VLAN_MTU | IF_HWASSIST_VLAN_TAGGING)) { - supports_vlan_mtu = 1; + /* need to wait to ensure no one else is trying to add/remove */ + vlan_parent_retain(vlp); + progress |= VLAN_CONFIG_PROGRESS_VLP_RETAINED; + vlan_parent_wait(vlp, "vlan_config"); + + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL) { + error = EOPNOTSUPP; + goto signal_done; + } + if (vlan_parent_flags_detaching(vlp) + || ifvlan_flags_detaching(ifv) || ifv->ifv_vlp != NULL) { + error = EBUSY; + goto signal_done; } - if (p->if_nvlans == 0) { - u_long dltag; - u_long filter_id; - int error; - /* attach our VLAN "interface filter" to the interface */ - error = vlan_attach_filter(p, &filter_id); - if (error) { - return (error); - } + /* check again because someone might have gotten in */ + if (vlan_parent_lookup_tag(vlp, tag) != NULL) { + /* already a VLAN with that tag on this interface */ + error = EADDRINUSE; + goto signal_done; + } + if (vlan_parent_no_vlans(vlp)) { + first_vlan = 1; + } + vlan_parent_add_vlan(vlp, ifv, tag); + progress |= VLAN_CONFIG_PROGRESS_IN_LIST; + + /* check whether bond interface is using parent interface */ + ifnet_lock_exclusive(p); + if ((p->if_eflags & IFEF_BOND) != 0) { + ifnet_lock_done(p); + /* don't allow VLAN over interface that's already part of a bond */ + error = EBUSY; + goto signal_done; + } + /* prevent BOND interface from using it */ + p->if_eflags |= IFEF_VLAN; + ifnet_lock_done(p); + vlan_unlock(); + + if (first_vlan) { /* attach our VLAN "protocol" to the interface */ error = vlan_attach_protocol(p); if (error) { - (void)vlan_detach_filter(filter_id); - return (error); + vlan_lock(); + goto signal_done; } - ifv->ifv_filter_id = filter_id; - ifv->ifv_filter_valid = TRUE; -#if 0 - if (supports_vlan_mtu) { - /* - * Enable Tx/Rx of VLAN-sized frames. - */ - p->if_capenable |= IFCAP_VLAN_MTU; - if (p->if_flags & IFF_UP) { - struct ifreq ifr; - int error; - - ifr.ifr_flags = p->if_flags; - error = (*p->if_ioctl)(p, SIOCSIFFLAGS, - (caddr_t) &ifr); - if (error) { - if (p->if_nvlans == 0) - p->if_capenable &= ~IFCAP_VLAN_MTU; - return (error); - } - } - } -#endif 0 - } else { - struct ifvlan * other_ifv; + /* mark the parent interface up */ + ifnet_lock_exclusive(p); + p->if_flags |= IFF_UP; + ifnet_lock_done(p); + (void)dlil_ioctl(0, p, SIOCSIFFLAGS, (caddr_t)NULL); + } - other_ifv = vlan_lookup_ifp(p); - if (other_ifv == NULL) { - printf("vlan: other_ifv can't be NULL\n"); - return (EINVAL); + /* configure parent to receive our multicast addresses */ + error = multicast_list_program(&ifv->ifv_multicast, ifp, p); + if (error != 0) { + if (first_vlan) { + (void)vlan_detach_protocol(p); } - ifv->ifv_filter_id = other_ifv->ifv_filter_id; - ifv->ifv_filter_valid = TRUE; + vlan_lock(); + goto signal_done; } - p->if_nvlans++; - if (supports_vlan_mtu) { + + /* no failures past this point */ + vlan_lock(); + + ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; + ifv->ifv_flags = 0; + if (vlan_parent_flags_supports_vlan_mtu(vlp)) { ifv->ifv_mtufudge = 0; } else { /* @@ -792,9 +1241,8 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, int tag) */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } + ifp->if_mtu = ETHERMTU - ifv->ifv_mtufudge; - ifv->ifv_p = p; - ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. @@ -809,10 +1257,8 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, int tag) if (p->if_hwassist & IF_HWASSIST_VLAN_TAGGING) { ifp->if_hwassist |= IF_HWASSIST_CSUM_FLAGS(p->if_hwassist); } - /* - * Set up our ``Ethernet address'' to reflect the underlying - * physical interface's. - */ + + /* set our ethernet address to that of the parent */ ifa1 = ifaddr_byindex(ifp->if_index); ifa2 = ifaddr_byindex(p->if_index); sdl1 = (struct sockaddr_dl *)ifa1->ifa_addr; @@ -820,17 +1266,46 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, int tag) sdl1->sdl_type = IFT_ETHER; sdl1->sdl_alen = ETHER_ADDR_LEN; bcopy(LLADDR(sdl2), LLADDR(sdl1), ETHER_ADDR_LEN); - bcopy(LLADDR(sdl2), IFP2AC(ifp)->ac_enaddr, ETHER_ADDR_LEN); - - /* - * Configure multicast addresses that may already be - * joined on the vlan device. - */ - (void)vlan_setmulti(ifp); - ifp->if_output = vlan_output; - ifv->ifv_tag = tag; + ifp->if_flags |= IFF_RUNNING; + ifvlan_flags_set_ready(ifv); + vlan_parent_signal(vlp, "vlan_config"); + vlan_unlock(); + if (new_vlp != vlp) { + /* throw it away, it wasn't needed */ + vlan_parent_release(new_vlp); + } return 0; + + signal_done: + vlan_assert_lock_held(); + vlan_parent_signal(vlp, "vlan_config"); + + unlock_done: + if ((progress & VLAN_CONFIG_PROGRESS_IN_LIST) != 0) { + vlan_parent_remove_vlan(vlp, ifv); + } + if (!vlan_parent_flags_detaching(vlp) && vlan_parent_no_vlans(vlp)) { + /* the vlan parent has no more VLAN's */ + ifnet_lock_exclusive(p); + p->if_eflags &= ~IFEF_VLAN; + ifnet_lock_done(p); + LIST_REMOVE(vlp, vlp_parent_list); + /* release outside of the lock below */ + need_vlp_release = 1; + } + vlan_unlock(); + + if ((progress & VLAN_CONFIG_PROGRESS_VLP_RETAINED) != 0) { + vlan_parent_release(vlp); + } + if (need_vlp_release) { + vlan_parent_release(vlp); + } + if (new_vlp != vlp) { + vlan_parent_release(new_vlp); + } + return (error); } static void @@ -854,252 +1329,410 @@ vlan_link_event(struct ifnet * ifp, struct ifnet * p) } static int -vlan_unconfig(struct ifnet *ifp) +vlan_unconfig(struct ifnet * ifp) { - struct ifaddr *ifa; + int error = 0; + struct ifaddr * ifa; + ifvlan_ref ifv; + int last_vlan = 0; + int need_vlp_release = 0; + struct ifnet * p; struct sockaddr_dl *sdl; - struct vlan_mc_entry *mc; - struct ifvlan *ifv; - struct ifnet *p; - int error; - - VLAN_LOCK_ASSERT(); - - ifv = ifp->if_private; - - /* Disconnect from parent. */ - p = ifv->ifv_p; - ifv->ifv_p = NULL; - - if (p != NULL) { - struct sockaddr_dl sdl; + vlan_parent_ref vlp; - /* - * Since the interface is being unconfigured, we need to - * empty the list of multicast groups that we may have joined - * while we were alive from the parent's list. - */ - bzero((char *)&sdl, sizeof sdl); - sdl.sdl_len = sizeof sdl; - sdl.sdl_family = AF_LINK; - sdl.sdl_index = p->if_index; - sdl.sdl_type = IFT_ETHER; - sdl.sdl_alen = ETHER_ADDR_LEN; - - while (SLIST_FIRST(&ifv->vlan_mc_listhead) != NULL) { - mc = SLIST_FIRST(&ifv->vlan_mc_listhead); - bcopy((char *)&mc->mc_addr, LLADDR(&sdl), ETHER_ADDR_LEN); - error = if_delmulti(p, (struct sockaddr *)&sdl); - if (error) { - printf("vlan_unconfig: if_delmulti %s failed, %d\n", - ifv->ifv_name, error); - } - SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); - FREE(mc, M_VLAN); - } - p->if_nvlans--; - if (p->if_nvlans == 0) { - /* detach our VLAN "protocol" from the interface */ - if (ifv->ifv_filter_valid) { - (void)vlan_detach_filter(ifv->ifv_filter_id); - } - (void)vlan_detach_protocol(p); -#if 0 - /* - * Disable Tx/Rx of VLAN-sized frames. - */ - p->if_capenable &= ~IFCAP_VLAN_MTU; - if (p->if_flags & IFF_UP) { - struct ifreq ifr; - - ifr.ifr_flags = p->if_flags; - (*p->if_ioctl)(p, SIOCSIFFLAGS, (caddr_t) &ifr); - } -#endif 0 + vlan_assert_lock_held(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL) { + return (0); + } + vlp = ifv->ifv_vlp; + if (vlp == NULL) { + return (0); + } + vlan_parent_retain(vlp); + vlan_parent_wait(vlp, "vlan_unconfig"); + + /* check again because another thread could be in vlan_unconfig */ + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL) { + goto signal_done; + } + if (ifv->ifv_vlp != vlp) { + /* vlan parent changed */ + goto signal_done; + } + need_vlp_release++; + p = vlp->vlp_ifp; + + /* remember whether we're the last VLAN on the parent */ + if (LIST_NEXT(LIST_FIRST(&vlp->vlp_vlan_list), ifv_vlan_list) == NULL) { + if (g_vlan->verbose) { + printf("vlan_unconfig: last vlan on %s%d\n", + p->if_name, p->if_unit); } + last_vlan = 1; + } + + /* back-out any effect our mtu might have had on the parent */ + (void)vlan_new_mtu(ifp, ETHERMTU - ifv->ifv_mtufudge); + + vlan_unlock(); + + /* detach VLAN "protocol" */ + if (last_vlan) { + (void)vlan_detach_protocol(p); } - /* return to the state we were in before SETVLAN */ + /* un-join multicast on parent interface */ + (void)multicast_list_remove(&ifv->ifv_multicast); + + vlan_lock(); + + /* Disconnect from parent. */ + vlan_parent_remove_vlan(vlp, ifv); + + /* return to the state we were in before SIFVLAN */ ifp->if_mtu = 0; ifp->if_flags &= ~(IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX | IFF_RUNNING); - ifv->ifv_ifp->if_hwassist = 0; + ifp->if_hwassist = 0; ifv->ifv_flags = 0; - ifv->ifv_ifp->if_output = nop_if_output; ifv->ifv_mtufudge = 0; - ifv->ifv_filter_valid = FALSE; /* Clear our MAC address. */ - ifa = ifaddr_byindex(ifv->ifv_ifp->if_index); + ifa = ifaddr_byindex(ifp->if_index); sdl = (struct sockaddr_dl *)(ifa->ifa_addr); sdl->sdl_type = IFT_L2VLAN; sdl->sdl_alen = 0; bzero(LLADDR(sdl), ETHER_ADDR_LEN); - bzero(IFP2AC(ifv->ifv_ifp)->ac_enaddr, ETHER_ADDR_LEN); - /* send a link down event */ - if (p != NULL) { - interface_link_event(ifv->ifv_ifp, KEV_DL_LINK_OFF); + if (!vlan_parent_flags_detaching(vlp) && vlan_parent_no_vlans(vlp)) { + /* the vlan parent has no more VLAN's */ + ifnet_lock_exclusive(p); + p->if_eflags &= ~IFEF_VLAN; + ifnet_lock_done(p); + LIST_REMOVE(vlp, vlp_parent_list); + /* release outside of the lock below */ + need_vlp_release++; } - return 0; + + signal_done: + vlan_parent_signal(vlp, "vlan_unconfig"); + vlan_unlock(); + vlan_parent_release(vlp); /* one because we waited */ + + while (need_vlp_release--) { + vlan_parent_release(vlp); + } + vlan_lock(); + return (error); } static int -vlan_set_promisc(struct ifnet *ifp) +vlan_set_promisc(struct ifnet * ifp) { - struct ifvlan *ifv = ifp->if_private; - int error = 0; + int error = 0; + ifvlan_ref ifv; + vlan_parent_ref vlp; + + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + error = (ifv == NULL) ? EOPNOTSUPP : EBUSY; + goto done; + } + vlp = ifv->ifv_vlp; + if (vlp == NULL) { + goto done; + } if ((ifp->if_flags & IFF_PROMISC) != 0) { - if ((ifv->ifv_flags & IFVF_PROMISC) == 0) { - error = ifpromisc(ifv->ifv_p, 1); - if (error == 0) - ifv->ifv_flags |= IFVF_PROMISC; + if (!ifvlan_flags_promisc(ifv)) { + error = ifnet_set_promiscuous(vlp->vlp_ifp, 1); + if (error == 0) { + ifvlan_flags_set_promisc(ifv); + } } } else { - if ((ifv->ifv_flags & IFVF_PROMISC) != 0) { - error = ifpromisc(ifv->ifv_p, 0); - if (error == 0) - ifv->ifv_flags &= ~IFVF_PROMISC; + if (ifvlan_flags_promisc(ifv)) { + error = ifnet_set_promiscuous(vlp->vlp_ifp, 0); + if (error == 0) { + ifvlan_flags_clear_promisc(ifv); + } } } + done: + vlan_unlock(); + return (error); +} +static int +vlan_new_mtu(struct ifnet * ifp, int mtu) +{ + struct ifdevmtu * devmtu_p; + int error = 0; + ifvlan_ref ifv; + int max_mtu; + int new_mtu = 0; + int req_mtu; + vlan_parent_ref vlp; + + vlan_assert_lock_held(); + ifv = (ifvlan_ref)ifp->if_private; + vlp = ifv->ifv_vlp; + devmtu_p = &vlp->vlp_devmtu; + req_mtu = mtu + ifv->ifv_mtufudge; + if (req_mtu > devmtu_p->ifdm_max || req_mtu < devmtu_p->ifdm_min) { + return (EINVAL); + } + max_mtu = vlan_parent_find_max_mtu(vlp, ifv); + if (req_mtu > max_mtu) { + new_mtu = req_mtu; + } + else if (max_mtu < devmtu_p->ifdm_current) { + new_mtu = max_mtu; + } + if (new_mtu != 0) { + struct ifnet * p = vlp->vlp_ifp; + vlan_unlock(); + error = siocsifaltmtu(p, new_mtu); + vlan_lock(); + } + if (error == 0) { + if (new_mtu != 0) { + devmtu_p->ifdm_current = new_mtu; + } + ifp->if_mtu = mtu; + } return (error); } static int -vlan_ioctl(struct ifnet *ifp, u_long cmd, void * data) +vlan_set_mtu(struct ifnet * ifp, int mtu) { - struct ifaddr *ifa; - struct ifnet *p; - struct ifreq *ifr; - struct ifvlan *ifv; - struct vlanreq vlr; - int error = 0; + int error = 0; + ifvlan_ref ifv; + vlan_parent_ref vlp; + + if (mtu < IF_MINMTU) { + return (EINVAL); + } + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + return ((ifv == NULL) ? EOPNOTSUPP : EBUSY); + } + vlp = ifv->ifv_vlp; + if (vlp == NULL || vlan_parent_flags_detaching(vlp)) { + vlan_unlock(); + if (mtu != 0) { + return (EINVAL); + } + return (0); + } + vlan_parent_retain(vlp); + vlan_parent_wait(vlp, "vlan_set_mtu"); + + /* check again, something might have changed */ + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + error = (ifv == NULL) ? EOPNOTSUPP : EBUSY; + goto signal_done; + } + if (ifv->ifv_vlp != vlp) { + /* vlan parent changed */ + goto signal_done; + } + if (vlp == NULL || vlan_parent_flags_detaching(vlp)) { + if (mtu != 0) { + error = EINVAL; + } + goto signal_done; + } + error = vlan_new_mtu(ifp, mtu); + + signal_done: + vlan_parent_signal(vlp, "vlan_set_mtu"); + vlan_unlock(); + vlan_parent_release(vlp); + + return (error); +} +static int +vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * data) +{ + struct ifdevmtu * devmtu_p; + int error = 0; + struct ifaddr * ifa; + struct ifmediareq64 * ifmr; + struct ifreq * ifr; + ifvlan_ref ifv; + struct ifnet * p; + u_short tag; + user_addr_t user_addr; + vlan_parent_ref vlp; + struct vlanreq vlr; + + if (ifp->if_type != IFT_L2VLAN) { + return (EOPNOTSUPP); + } ifr = (struct ifreq *)data; ifa = (struct ifaddr *)data; - ifv = (struct ifvlan *)ifp->if_private; switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; + ifnet_set_flags(ifp, IFF_UP, IFF_UP); break; + case SIOCGIFMEDIA64: case SIOCGIFMEDIA: - VLAN_LOCK(); - if (ifv->ifv_p != NULL) { - error = (*ifv->ifv_p->if_ioctl)(ifv->ifv_p, - SIOCGIFMEDIA, data); - VLAN_UNLOCK(); - /* Limit the result to the parent's current config. */ - if (error == 0) { - struct ifmediareq *ifmr; + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + return (ifv == NULL ? EOPNOTSUPP : EBUSY); + } + p = (ifv->ifv_vlp == NULL) ? NULL : ifv->ifv_vlp->vlp_ifp; + vlan_unlock(); + ifmr = (struct ifmediareq64 *)data; + user_addr = (cmd == SIOCGIFMEDIA64) + ? ifmr->ifm_ifmu.ifmu_ulist64 + : CAST_USER_ADDR_T(ifmr->ifm_ifmu.ifmu_ulist32); + if (p != NULL) { + struct ifmediareq64 p_ifmr; - ifmr = (struct ifmediareq *) data; - if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { + bzero(&p_ifmr, sizeof(p_ifmr)); + error = dlil_ioctl(0, p, SIOCGIFMEDIA, (caddr_t)&p_ifmr); + if (error == 0) { + ifmr->ifm_active = p_ifmr.ifm_active; + ifmr->ifm_current = p_ifmr.ifm_current; + ifmr->ifm_mask = p_ifmr.ifm_mask; + ifmr->ifm_status = p_ifmr.ifm_status; + ifmr->ifm_count = p_ifmr.ifm_count; + /* Limit the result to the parent's current config. */ + if (ifmr->ifm_count >= 1 && user_addr != USER_ADDR_NULL) { ifmr->ifm_count = 1; - error = copyout(&ifmr->ifm_current, - ifmr->ifm_ulist, + error = copyout(&ifmr->ifm_current, user_addr, sizeof(int)); } } } else { - struct ifmediareq *ifmr; - VLAN_UNLOCK(); - - ifmr = (struct ifmediareq *) data; - ifmr->ifm_current = 0; + ifmr->ifm_active = ifmr->ifm_current = IFM_NONE; ifmr->ifm_mask = 0; ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = 0; ifmr->ifm_count = 1; - if (ifmr->ifm_ulist) { - error = copyout(&ifmr->ifm_current, - ifmr->ifm_ulist, - sizeof(int)); + if (user_addr != USER_ADDR_NULL) { + error = copyout(&ifmr->ifm_current, user_addr, sizeof(int)); } - error = 0; } break; case SIOCSIFMEDIA: - error = EINVAL; - break; - - case SIOCSIFMTU: - /* - * Set the interface MTU. - */ - VLAN_LOCK(); - if (ifv->ifv_p != NULL) { - if (ifr->ifr_mtu > (ifv->ifv_p->if_mtu - ifv->ifv_mtufudge) - || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) { - error = EINVAL; - } else { - ifp->if_mtu = ifr->ifr_mtu; - } - } else { - error = EINVAL; - } - VLAN_UNLOCK(); + error = EOPNOTSUPP; break; - case SIOCSETVLAN: - error = copyin(ifr->ifr_data, &vlr, sizeof(vlr)); - if (error) - break; - if (vlr.vlr_parent[0] == '\0') { - VLAN_LOCK(); - vlan_unconfig(ifp); -#if 0 - if (ifp->if_flags & IFF_UP) - if_down(ifp); - ifp->if_flags &= ~IFF_RUNNING; -#endif 0 - VLAN_UNLOCK(); - break; + case SIOCGIFDEVMTU: + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + return (ifv == NULL ? EOPNOTSUPP : EBUSY); } - p = ifunit(vlr.vlr_parent); - if (p == 0) { - error = ENOENT; - break; + vlp = ifv->ifv_vlp; + if (vlp != NULL) { + int min_mtu = vlp->vlp_devmtu.ifdm_min - ifv->ifv_mtufudge; + devmtu_p = &ifr->ifr_devmtu; + devmtu_p->ifdm_current = ifp->if_mtu; + devmtu_p->ifdm_min = max(min_mtu, IF_MINMTU); + devmtu_p->ifdm_max = vlp->vlp_devmtu.ifdm_max - ifv->ifv_mtufudge; } - /* - * Don't let the caller set up a VLAN tag with - * anything except VLID bits. - */ - if (vlr.vlr_tag & ~EVL_VLID_MASK) { - error = EINVAL; - break; + else { + devmtu_p = &ifr->ifr_devmtu; + devmtu_p->ifdm_current = 0; + devmtu_p->ifdm_min = 0; + devmtu_p->ifdm_max = 0; } - VLAN_LOCK(); - error = vlan_config(ifv, p, vlr.vlr_tag); + vlan_unlock(); + break; + + case SIOCSIFMTU: + error = vlan_set_mtu(ifp, ifr->ifr_mtu); + break; + + case SIOCSIFVLAN: + user_addr = proc_is64bit(current_proc()) + ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); + error = copyin(user_addr, &vlr, sizeof(vlr)); if (error) { - VLAN_UNLOCK(); break; } - ifp->if_flags |= IFF_RUNNING; - VLAN_UNLOCK(); - - /* Update promiscuous mode, if necessary. */ - vlan_set_promisc(ifp); - - /* generate a link event */ - vlan_link_event(ifp, p); + p = NULL; + if (vlr.vlr_parent[0] != '\0') { + if (vlr.vlr_tag & ~EVL_VLID_MASK) { + /* + * Don't let the caller set up a VLAN tag with + * anything except VLID bits. + */ + error = EINVAL; + break; + } + p = ifunit(vlr.vlr_parent); + if (p == NULL) { + error = ENXIO; + break; + } + /* can't do VLAN over anything but ethernet or ethernet aggregate */ + if (p->if_type != IFT_ETHER && p->if_type != IFT_IEEE8023ADLAG) { + error = EPROTONOSUPPORT; + break; + } + error = vlan_config(ifp, p, vlr.vlr_tag); + if (error) { + break; + } + + /* Update promiscuous mode, if necessary. */ + (void)vlan_set_promisc(ifp); + + /* generate a link event based on the state of the parent */ + vlan_link_event(ifp, p); + } else { + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + error = (ifv == NULL ? EOPNOTSUPP : EBUSY); + break; + } + error = vlan_unconfig(ifp); + vlan_unlock(); + if (error == 0) { + interface_link_event(ifp, KEV_DL_LINK_OFF); + } + } break; - case SIOCGETVLAN: + case SIOCGIFVLAN: bzero(&vlr, sizeof vlr); - VLAN_LOCK(); - if (ifv->ifv_p != NULL) { + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; + if (ifv == NULL || ifvlan_flags_detaching(ifv)) { + vlan_unlock(); + return (ifv == NULL ? EOPNOTSUPP : EBUSY); + } + p = (ifv->ifv_vlp == NULL) ? NULL : ifv->ifv_vlp->vlp_ifp; + tag = ifv->ifv_tag; + vlan_unlock(); + if (p != NULL) { snprintf(vlr.vlr_parent, sizeof(vlr.vlr_parent), - "%s%d", ifv->ifv_p->if_name, - ifv->ifv_p->if_unit); - vlr.vlr_tag = ifv->ifv_tag; + "%s%d", p->if_name, p->if_unit); + vlr.vlr_tag = tag; } - VLAN_UNLOCK(); - error = copyout(&vlr, ifr->ifr_data, sizeof vlr); + user_addr = proc_is64bit(current_proc()) + ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data); + error = copyout(&vlr, user_addr, sizeof(vlr)); break; case SIOCSIFFLAGS: @@ -1107,8 +1740,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, void * data) * For promiscuous mode, we enable promiscuous mode on * the parent if we need promiscuous on the VLAN interface. */ - if (ifv->ifv_p != NULL) - error = vlan_set_promisc(ifp); + error = vlan_set_promisc(ifp); break; case SIOCADDMULTI: @@ -1121,156 +1753,72 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, void * data) return error; } -static int -nop_if_ioctl(struct ifnet * ifp, u_long cmd, void * data) -{ - return EOPNOTSUPP; -} - -static int -nop_if_bpf(struct ifnet *ifp, int mode, bpf_callback_func * func) -{ - return ENODEV; -} - -static int -nop_if_free(struct ifnet * ifp) -{ - return 0; -} - -static int -nop_if_output(struct ifnet * ifp, struct mbuf * m) -{ - if (m != NULL) { - m_freem_list(m); - } - return 0; -} - -static int +static void vlan_if_free(struct ifnet * ifp) { - struct ifvlan *ifv; + ifvlan_ref ifv; if (ifp == NULL) { - return 0; + return; } - ifv = (struct ifvlan *)ifp->if_private; + vlan_lock(); + ifv = (ifvlan_ref)ifp->if_private; if (ifv == NULL) { - return 0; + vlan_unlock(); + return; } ifp->if_private = NULL; + vlan_unlock(); dlil_if_release(ifp); FREE(ifv, M_VLAN); - return 0; } -/* - * Function: vlan_if_filter_detach - * Purpose: - * Destroy all vlan interfaces that refer to the interface - */ -static int -vlan_if_filter_detach(caddr_t cookie) +static void +vlan_event(struct ifnet * p, struct kev_msg * event) { - struct ifnet * ifp; - struct ifvlan * ifv; - struct ifnet * p = (struct ifnet *)cookie; + vlan_parent_ref vlp; - VLAN_LOCK(); - while (TRUE) { - ifv = vlan_lookup_ifp(p); - if (ifv == NULL) { - break; - } - if (ifv->ifv_detaching) { - continue; - } - /* make sure we don't invoke vlan_detach_filter */ - ifv->ifv_filter_valid = FALSE; - vlan_remove(ifv); - ifp = ifv->ifv_ifp; - VLAN_UNLOCK(); - vlan_if_detach(ifp); - VLAN_LOCK(); + /* Check if the interface we are attached to is being detached */ + if (event->vendor_code != KEV_VENDOR_APPLE + || event->kev_class != KEV_NETWORK_CLASS + || event->kev_subclass != KEV_DL_SUBCLASS) { + return; } - VLAN_UNLOCK(); - return (0); -} - -/* - * Function: vlan_attach_filter - * Purpose: - * We attach an interface filter to detect when the underlying interface - * goes away. We are forced to do that because dlil does not call our - * protocol's dl_event function for KEV_DL_IF_DETACHING. - */ - -static int -vlan_attach_filter(struct ifnet * ifp, u_long * filter_id) -{ - int error; - struct dlil_if_flt_str filt; - - bzero(&filt, sizeof(filt)); - filt.filter_detach = vlan_if_filter_detach; - filt.cookie = (caddr_t)ifp; - error = dlil_attach_interface_filter(ifp, &filt, filter_id, - DLIL_LAST_FILTER); - if (error) { - printf("vlan: dlil_attach_interface_filter(%s%d) failed, %d\n", - ifp->if_name, ifp->if_unit, error); + switch (event->event_code) { + case KEV_DL_IF_DETACHING: + case KEV_DL_LINK_OFF: + case KEV_DL_LINK_ON: + break; + default: + return; } - return (error); -} - -/* - * Function: vlan_detach_filter - * Purpose: - * Remove our interface filter. - */ -static int -vlan_detach_filter(u_long filter_id) -{ - int error; - - error = dlil_detach_filter(filter_id); - if (error) { - printf("vlan: dlil_detach_filter failed, %d\n", error); + vlan_lock(); + if ((p->if_eflags & IFEF_VLAN) == 0) { + vlan_unlock(); + /* no VLAN's */ + return; } - return (error); -} - -/* - * Function: vlan_proto_input - * Purpose: - * This function is never called. We aren't allowed to leave the - * function pointer NULL, so this function simply free's the mbuf. - */ -static int -vlan_proto_input(m, frame_header, ifp, dl_tag, sync_ok) - struct mbuf *m; - char *frame_header; - struct ifnet *ifp; - u_long dl_tag; - int sync_ok; -{ - m_freem(m); - return (EJUSTRETURN); -} - -static struct ifnet * -find_if_name_unit(const char * if_name, int unit) -{ - struct ifnet * ifp; - - TAILQ_FOREACH(ifp, &ifnet, if_link) { - if (strcmp(if_name, ifp->if_name) == 0 && unit == ifp->if_unit) { - return (ifp); - } + vlp = parent_list_lookup(p); + if (vlp == NULL) { + /* no VLAN's */ + vlan_unlock(); + return; + } + switch (event->event_code) { + case KEV_DL_IF_DETACHING: + vlan_parent_flags_set_detaching(vlp); + vlan_parent_remove_all_vlans(vlp); + break; + + case KEV_DL_LINK_OFF: + case KEV_DL_LINK_ON: + vlan_parent_link_event(vlp, event->event_code); + break; + default: + break; } - return (ifp); + vlan_unlock(); + return; } static void @@ -1295,103 +1843,41 @@ interface_link_event(struct ifnet * ifp, u_long event_code) } static void -parent_link_event(struct ifnet * p, u_long event_code) +vlan_parent_link_event(vlan_parent_ref vlp, u_long event_code) { - struct ifvlan * ifv; + ifvlan_ref ifv; - LIST_FOREACH(ifv, &ifv_list, ifv_list) { - if (p == ifv->ifv_p) { - interface_link_event(ifv->ifv_ifp, event_code); - } + LIST_FOREACH(ifv, &vlp->vlp_vlan_list, ifv_vlan_list) { + interface_link_event(ifv->ifv_ifp, event_code); } return; } -/* - * Function: vlan_dl_event - * Purpose: - * Process DLIL events that interest us. Currently, that is - * just the interface UP and DOWN. Ideally, this would also - * include the KEV_DL_IF_DETACH{ING} messages, which would eliminate - * the need for an interface filter. - */ -static int -vlan_dl_event(struct kern_event_msg * event, u_long dl_tag) -{ - struct ifnet * p; - struct net_event_data * net_event; - - if (event->vendor_code != KEV_VENDOR_APPLE - || event->kev_class != KEV_NETWORK_CLASS - || event->kev_subclass != KEV_DL_SUBCLASS) { - goto done; - } - net_event = (struct net_event_data *)(event->event_data); - switch (event->event_code) { - case KEV_DL_LINK_OFF: - case KEV_DL_LINK_ON: - p = find_if_name_unit(net_event->if_name, net_event->if_unit); - if (p != NULL) { - parent_link_event(p, event->event_code); - } - break; -#if 0 - case KEV_DL_IF_DETACHING: - case KEV_DL_IF_DETACHED: - /* we don't get these, unfortunately */ - break; -#endif 0 - default: - break; - } - - done: - return (0); -} - /* * Function: vlan_attach_protocol * Purpose: * Attach a DLIL protocol to the interface, using the ETHERTYPE_VLAN - * demux ether type. We're not a real protocol, we'll never receive - * any packets because they're intercepted by ether_demux before - * our input routine would be called. + * demux ether type. * - * The reasons for attaching a protocol to the interface are: - * 1) add a protocol reference to the interface so that the underlying - * interface automatically gets marked up while we're attached - * 2) receive link status events which we can propagate to our - * VLAN interfaces. + * The ethernet demux actually special cases VLAN to support hardware. + * The demux here isn't used. The demux will return PF_VLAN for the + * appropriate packets and our vlan_input function will be called. */ static int vlan_attach_protocol(struct ifnet *ifp) { - struct dlil_demux_desc desc; - u_long dl_tag; - u_short en_native = ETHERTYPE_VLAN; int error; - int i; struct dlil_proto_reg_str reg; - + + bzero(®, sizeof(reg)); TAILQ_INIT(®.demux_desc_head); - desc.type = DLIL_DESC_RAW; - desc.variants.bitmask.proto_id_length = 0; - desc.variants.bitmask.proto_id = 0; - desc.variants.bitmask.proto_id_mask = 0; - desc.native_type = (char *) &en_native; - TAILQ_INSERT_TAIL(®.demux_desc_head, &desc, next); reg.interface_family = ifp->if_family; reg.unit_number = ifp->if_unit; - reg.input = vlan_proto_input; - reg.pre_output = 0; - reg.event = vlan_dl_event; - reg.offer = 0; - reg.ioctl = 0; - reg.default_proto = 0; - reg.protocol_family = VLAN_PROTO_FAMILY; - - error = dlil_attach_protocol(®, &dl_tag); + reg.input = vlan_input; + reg.event = vlan_event; + reg.protocol_family = PF_VLAN; + error = dlil_attach_protocol(®); if (error) { printf("vlan_proto_attach(%s%d) dlil_attach_protocol failed, %d\n", ifp->if_name, ifp->if_unit, error); @@ -1407,21 +1893,14 @@ vlan_attach_protocol(struct ifnet *ifp) static int vlan_detach_protocol(struct ifnet *ifp) { - u_long dl_tag; int error; - error = dlil_find_dltag(ifp->if_family, ifp->if_unit, - VLAN_PROTO_FAMILY, &dl_tag); + error = dlil_detach_protocol(ifp, PF_VLAN); if (error) { - printf("vlan_proto_detach(%s%d) dlil_find_dltag failed, %d\n", + printf("vlan_proto_detach(%s%d) dlil_detach_protocol failed, %d\n", ifp->if_name, ifp->if_unit, error); - } else { - error = dlil_detach_protocol(dl_tag); - if (error) { - printf("vlan_proto_detach(%s%d) dlil_detach_protocol failed, %d\n", - ifp->if_name, ifp->if_unit, error); - } } + return (error); } @@ -1435,41 +1914,36 @@ vlan_detach_protocol(struct ifnet *ifp) extern int ether_add_if(struct ifnet *ifp); extern int ether_del_if(struct ifnet *ifp); extern int ether_init_if(struct ifnet *ifp); -extern int ether_add_proto(struct ddesc_head_str *desc_head, - struct if_proto *proto, u_long dl_tag); -extern int ether_del_proto(struct if_proto *proto, u_long dl_tag); -extern int ether_ifmod_ioctl(struct ifnet *ifp, u_long command, - caddr_t data); -extern int ether_del_proto(struct if_proto *proto, u_long dl_tag); -extern int ether_add_proto(struct ddesc_head_str *desc_head, struct if_proto *proto, u_long dl_tag); - -extern int ether_attach_inet(struct ifnet *ifp, u_long *dl_tag); -extern int ether_detach_inet(struct ifnet *ifp, u_long dl_tag); -extern int ether_attach_inet6(struct ifnet *ifp, u_long *dl_tag); -extern int ether_detach_inet6(struct ifnet *ifp, u_long dl_tag); +extern int ether_add_proto_old(struct ifnet *ifp, u_long protocol_family, + struct ddesc_head_str *desc_head); + +extern int ether_attach_inet(struct ifnet *ifp, u_long protocol_family); +extern int ether_detach_inet(struct ifnet *ifp, u_long protocol_family); +extern int ether_attach_inet6(struct ifnet *ifp, u_long protocol_family); +extern int ether_detach_inet6(struct ifnet *ifp, u_long protocol_family); static int -vlan_attach_inet(struct ifnet *ifp, u_long *dl_tag) +vlan_attach_inet(struct ifnet *ifp, u_long protocol_family) { - return (ether_attach_inet(ifp, dl_tag)); + return (ether_attach_inet(ifp, protocol_family)); } static int -vlan_detach_inet(struct ifnet *ifp, u_long dl_tag) +vlan_detach_inet(struct ifnet *ifp, u_long protocol_family) { - return (ether_detach_inet(ifp, dl_tag)); + return (ether_detach_inet(ifp, protocol_family)); } static int -vlan_attach_inet6(struct ifnet *ifp, u_long *dl_tag) +vlan_attach_inet6(struct ifnet *ifp, u_long protocol_family) { - return (ether_attach_inet6(ifp, dl_tag)); + return (ether_attach_inet6(ifp, protocol_family)); } static int -vlan_detach_inet6(struct ifnet *ifp, u_long dl_tag) +vlan_detach_inet6(struct ifnet *ifp, u_long protocol_family) { - return (ether_detach_inet6(ifp, dl_tag)); + return (ether_detach_inet6(ifp, protocol_family)); } static int @@ -1484,38 +1958,21 @@ vlan_del_if(struct ifnet *ifp) return (ether_del_if(ifp)); } -static int -vlan_init_if(struct ifnet *ifp) -{ - return (0); -} - -static int -vlan_shutdown() -{ - return 0; -} __private_extern__ int -vlan_family_init() +vlan_family_init(void) { - int i, error=0; + int error=0; struct dlil_ifmod_reg_str ifmod_reg; - struct dlil_protomod_reg_str vlan_protoreg; - -#if 0 - /* VLAN family is built-in, called from ether_family_init */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); -#endif 0 - + bzero(&ifmod_reg, sizeof(ifmod_reg)); ifmod_reg.add_if = vlan_add_if; ifmod_reg.del_if = vlan_del_if; - ifmod_reg.init_if = vlan_init_if; - ifmod_reg.add_proto = ether_add_proto; + ifmod_reg.init_if = NULL; + ifmod_reg.add_proto = ether_add_proto_old; ifmod_reg.del_proto = ether_del_proto; - ifmod_reg.ifmod_ioctl = ether_ifmod_ioctl; - ifmod_reg.shutdown = vlan_shutdown; + ifmod_reg.ifmod_ioctl = ether_ioctl; + ifmod_reg.shutdown = NULL; if (dlil_reg_if_modules(APPLE_IF_FAM_VLAN, &ifmod_reg)) { printf("WARNING: vlan_family_init -- " @@ -1524,31 +1981,22 @@ vlan_family_init() goto done; } - /* Register protocol registration functions */ - bzero(&vlan_protoreg, sizeof(vlan_protoreg)); - vlan_protoreg.attach_proto = vlan_attach_inet; - vlan_protoreg.detach_proto = vlan_detach_inet; - - if (error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_VLAN, - &vlan_protoreg) != 0) { - kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", - error); + error = dlil_reg_proto_module(PF_INET, APPLE_IF_FAM_VLAN, + vlan_attach_inet, vlan_detach_inet); + if (error != 0) { + printf("dlil_reg_proto_module failed for AF_INET error=%d\n", + error); goto done; } - vlan_protoreg.attach_proto = vlan_attach_inet6; - vlan_protoreg.detach_proto = vlan_detach_inet6; - - if (error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_VLAN, - &vlan_protoreg) != 0) { - kprintf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", - error); + error = dlil_reg_proto_module(PF_INET6, APPLE_IF_FAM_VLAN, + vlan_attach_inet6, vlan_detach_inet6); + if (error != 0) { + printf("dlil_reg_proto_module failed for AF_INET6 error=%d\n", + error); goto done; } vlan_clone_attach(); done: -#if 0 - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); -#endif 0 return (error); } diff --git a/bsd/net/if_vlan_var.h b/bsd/net/if_vlan_var.h index 3588b9dd2..6772ddac8 100644 --- a/bsd/net/if_vlan_var.h +++ b/bsd/net/if_vlan_var.h @@ -53,6 +53,7 @@ #ifndef _NET_IF_VLAN_VAR_H_ #define _NET_IF_VLAN_VAR_H_ 1 +#define ETHER_VLAN_ENCAP_LEN 4 /* len of 802.1Q VLAN encapsulation */ struct ether_vlan_header { u_char evl_dhost[ETHER_ADDR_LEN]; u_char evl_shost[ETHER_ADDR_LEN]; @@ -79,4 +80,7 @@ struct vlanreq { u_short vlr_tag; }; +#ifdef KERNEL_PRIVATE +int vlan_family_init(void); +#endif KERNEL_PRIVATE #endif /* _NET_IF_VLAN_VAR_H_ */ diff --git a/bsd/net/init.c b/bsd/net/init.c new file mode 100644 index 000000000..82c2882df --- /dev/null +++ b/bsd/net/init.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +struct init_list_entry { + struct init_list_entry *next; + net_init_func_ptr func; +}; + +#define LIST_RAN ((struct init_list_entry*)0xffffffff) +static struct init_list_entry *list_head = 0; + +errno_t +net_init_add( + net_init_func_ptr init_func) +{ + struct init_list_entry *entry; + + if (init_func == 0) { + return EINVAL; + } + + /* Check if we've already started */ + if (list_head == LIST_RAN) { + return EALREADY; + } + + entry = kalloc(sizeof(*entry)); + if (entry == 0) { + printf("net_init_add: no memory\n"); + return ENOMEM; + } + + bzero(entry, sizeof(*entry)); + entry->func = init_func; + + do { + entry->next = list_head; + + if (entry->next == LIST_RAN) { + /* List already ran, cleanup and call the function */ + kfree(entry, sizeof(*entry)); + return EALREADY; + } + } while(!OSCompareAndSwap((UInt32)entry->next, (UInt32)entry, + (UInt32*)&list_head)); + + return 0; +} + +__private_extern__ void +net_init_run(void) +{ + struct init_list_entry *backward_head = 0; + struct init_list_entry *forward_head = 0; + struct init_list_entry *current = 0; + + /* + * Grab the list, replacing the head with 0xffffffff to indicate + * that we've already run. + */ + do { + backward_head = list_head; + } while (!OSCompareAndSwap((UInt32)backward_head, (UInt32)LIST_RAN, + (UInt32*)&list_head)); + + /* Reverse the order of the list */ + while (backward_head != 0) { + current = backward_head; + backward_head = current->next; + current->next = forward_head; + forward_head = current; + } + + /* Call each function pointer registered */ + while (forward_head != 0) { + current = forward_head; + forward_head = current->next; + current->func(); + kfree(current, sizeof(*current)); + } +} diff --git a/bsd/net/init.h b/bsd/net/init.h new file mode 100644 index 000000000..fc3ad5120 --- /dev/null +++ b/bsd/net/init.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header init.h + This header defines an API to register a function that will be called when + the network stack is being initialized. This gives a kernel extensions an + opportunity to install filters before sockets are created and network + operations occur. + */ +#ifndef _NET_INIT_H_ +#define _NET_INIT_H_ +#include + +/*! + @typedef net_init_func_ptr + @discussion net_init_func_ptr will be called once the networking stack + initialized and before network operations occur. + */ +typedef void (*net_init_func_ptr)(void); + +/*! + @function net_init_add + @discussion Add a function to be called during network initialization. Your + kext must not unload until the function you register is called if + net_init_add returns success. + @param init_func A pointer to a function to be called when the stack is + initialized. + @result EINVAL - the init_func value was NULL. + EALREADY - the network has already been initialized + ENOMEM - there was not enough memory to perform this operation + 0 - success + */ +errno_t net_init_add(net_init_func_ptr init_func); + +#ifdef BSD_KERNEL_PRIVATE +/* net_init_run is called from bsd_init */ +extern void net_init_run(void); +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* _NET_INIT_H_ */ diff --git a/bsd/net/iso88025.h b/bsd/net/iso88025.h index e1d1a68dc..243499354 100644 --- a/bsd/net/iso88025.h +++ b/bsd/net/iso88025.h @@ -42,8 +42,6 @@ #ifndef _NET_ISO88025_H_ #define _NET_ISO88025_H_ -#include -#ifdef __APPLE_API_PRIVATE /* * General ISO 802.5 definitions @@ -116,13 +114,4 @@ struct iso88025_addr { #define ISO88025_DEFAULT_MTU 1500 #define senderr(e) { error = (e); goto bad;} -#ifndef __APPLE__ -/* Not implemented in Darwin */ -void iso88025_ifattach __P((struct ifnet *)); -int iso88025_ioctl __P((struct ifnet *, int , caddr_t )); -int iso88025_output __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *)); -void iso88025_input __P((struct ifnet *, struct iso88025_header *, struct mbuf *)); -#endif - -#endif /* __APPLE_API_PRIVATE */ #endif diff --git a/bsd/net/kext_net.c b/bsd/net/kext_net.c index fe79fa1c5..3acfce5de 100644 --- a/bsd/net/kext_net.c +++ b/bsd/net/kext_net.c @@ -96,7 +96,8 @@ register_sockfilter(struct NFDescriptor *nfp, struct NFDescriptor *nfp1, return(0); } -unregister_sockfilter(struct NFDescriptor *nfp, struct protosw *pr, int flags) +int +unregister_sockfilter(struct NFDescriptor *nfp, struct protosw *pr, __unused int flags) { int s; s = splhigh(); @@ -129,7 +130,7 @@ find_nke(unsigned int handle) */ int nke_insert(struct socket *so, struct so_nke *np) -{ int s, error; +{ struct kextcb *kp, *kp1; struct NFDescriptor *nf1, *nf2 = NULL; diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h index ebe88e6c3..f81b5e93f 100644 --- a/bsd/net/kext_net.h +++ b/bsd/net/kext_net.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,77 +19,73 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (C) 1999 Apple Computer, Inc. */ + /* - * Support for network filter kernel extensions - * Justin C. Walker, 990319 + * Support for socket filter kernel extensions */ + #ifndef NET_KEXT_NET_H #define NET_KEXT_NET_H + #include #include -#include +#include -struct mbuf; -struct socket; -struct uio; -struct sockbuf; -struct sockaddr; -struct kextcb; -struct protosw; -struct sockif; -struct sockutil; -struct sockopt; +#ifdef BSD_KERNEL_PRIVATE -#ifdef __APPLE_API_UNSTABLE +#include /* - * This structure gives access to the functionality of the filter. - * The kextcb provides the link from the socket structure. + * Internal implementation bits */ -struct NFDescriptor -{ TAILQ_ENTRY(NFDescriptor) nf_next; /* protosw chain */ - TAILQ_ENTRY(NFDescriptor) nf_list; /* descriptor list */ - unsigned int nf_handle; /* Identifier */ - int nf_flags; - /* Dispatch for PF_FILTER control */ - int (*nf_connect)(); /* Make contact */ - void (*nf_disconnect)(); /* Break contact */ - int (*nf_read)(); /* Get data from filter */ - int (*nf_write)(); /* Send data to filter */ - int (*nf_get)(); /* Get filter config */ - int (*nf_set)(); /* Set filter config */ - /* - * Socket function dispatch vectors - copied to kextcb - * during socreate() - */ - struct sockif *nf_soif; /* Socket functions */ - struct sockutil *nf_soutil; /* Sockbuf utility functions */ - u_long reserved[4]; /* for future use if needed */ + +struct socket_filter; + +#define SFEF_DETACHING 0x1 + +struct socket_filter_entry { + struct socket_filter_entry *sfe_next_onsocket; + struct socket_filter_entry *sfe_next_onfilter; + + struct socket_filter *sfe_filter; + struct socket *sfe_socket; + void *sfe_cookie; + + u_int32_t sfe_flags; }; -#define NFD_GLOBAL 0x01 -#define NFD_PROG 0x02 -#define NFD_VISIBLE 0x80000000 +#define SFF_DETACHING 0x1 -#define NFF_BEFORE 0x01 -#define NFF_AFTER 0x02 +struct socket_filter { + TAILQ_ENTRY(socket_filter) sf_protosw_next; + TAILQ_ENTRY(socket_filter) sf_global_next; + struct socket_filter_entry *sf_entry_head; + + struct protosw *sf_proto; + struct sflt_filter sf_filter; + u_int32_t sf_flags; + u_int32_t sf_usecount; +}; -#ifdef KERNEL -/* How to register: filter, insert location, target protosw, flags */ -extern int register_sockfilter(struct NFDescriptor *, - struct NFDescriptor *, - struct protosw *, int); -/* How to unregister: filter, original protosw, flags */ -extern int unregister_sockfilter(struct NFDescriptor *, struct protosw *, int); +TAILQ_HEAD(socket_filter_list, socket_filter); -#ifdef __APPLE_API_PRIVATE -TAILQ_HEAD(nf_list, NFDescriptor); +/* Private, internal implementation functions */ +void sflt_init(void); +void sflt_initsock(struct socket *so); +void sflt_termsock(struct socket *so); +void sflt_use(struct socket *so); +void sflt_unuse(struct socket *so); +void sflt_notify(struct socket *so, sflt_event_t event, void *param); +int sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data, + mbuf_t *control, sflt_data_flag_t flags); +int sflt_attach_private(struct socket *so, struct socket_filter *filter, sflt_handle handle, int locked); +void sflt_detach_private(struct socket_filter_entry *entry, int filter_detached); -extern struct nf_list nf_list; -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif /* BSD_KERNEL_PRIVATE */ + +#define NFF_BEFORE 0x01 +#define NFF_AFTER 0x02 #define NKE_OK 0 #define NKE_REMOVE -1 @@ -102,6 +98,10 @@ extern struct nf_list nf_list; * the 'where' NKE. If the latter is NULL, the flags indicate "first" * or "last" */ +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + struct so_nke { unsigned int nke_handle; unsigned int nke_where; @@ -109,102 +109,9 @@ struct so_nke unsigned long reserved[4]; /* for future use */ }; -/* - * sockif: - * Contains socket interface: - * dispatch vector abstracting the interface between protocols and - * the socket layer. - * TODO: add sf_sosense() - */ -struct sockif -{ int (*sf_soabort)(struct socket *, struct kextcb *); - int (*sf_soaccept)(struct socket *, struct sockaddr **, - struct kextcb *); - int (*sf_sobind)(struct socket *, struct sockaddr *, struct kextcb *); - int (*sf_soclose)(struct socket *, struct kextcb *); - int (*sf_soconnect)(struct socket *, struct sockaddr *, - struct kextcb *); - int (*sf_soconnect2)(struct socket *, struct socket *, - struct kextcb *); - int (*sf_socontrol)(struct socket *, struct sockopt *, - struct kextcb *); - int (*sf_socreate)(struct socket *, struct protosw *, struct kextcb *); - int (*sf_sodisconnect)(struct socket *, struct kextcb *); - int (*sf_sofree)(struct socket *, struct kextcb *); - int (*sf_sogetopt)(struct socket *, int, int, struct mbuf **, - struct kextcb *); - int (*sf_sohasoutofband)(struct socket *, struct kextcb *); - int (*sf_solisten)(struct socket *, struct kextcb *); - int (*sf_soreceive)(struct socket *, struct sockaddr **, struct uio **, - struct mbuf **, struct mbuf **, int *, - struct kextcb *); - int (*sf_sorflush)(struct socket *, struct kextcb *); - int (*sf_sosend)(struct socket *, struct sockaddr **, struct uio **, - struct mbuf **, struct mbuf **, int *, - struct kextcb *); - int (*sf_sosetopt)(struct socket *, int, int, struct mbuf *, - struct kextcb *); - int (*sf_soshutdown)(struct socket *, int, struct kextcb *); - /* Calls sorwakeup() */ - int (*sf_socantrcvmore)(struct socket *, struct kextcb *); - /* Calls sowwakeup() */ - int (*sf_socantsendmore)(struct socket *, struct kextcb *); - /* Calls soqinsque(), sorwakeup(), sowwakeup() */ - int (*sf_soisconnected)(struct socket *, struct kextcb *); - int (*sf_soisconnecting)(struct socket *, struct kextcb *); - /* Calls sowwakeup(), sorwakeup() */ - int (*sf_soisdisconnected)(struct socket *, struct kextcb *); - /* Calls sowwakeup(), sorwakeup() */ - int (*sf_soisdisconnecting)(struct socket *, struct kextcb *); - /* Calls soreserve(), soqinsque(), soqremque(), sorwakeup() */ - int (*sf_sonewconn)(struct socket *, int, struct kextcb *); - int (*sf_soqinsque)(struct socket *, struct socket *, int, - struct kextcb *); - int (*sf_soqremque)(struct socket *, int, struct kextcb *); - int (*sf_soreserve)(struct socket *, u_long, u_long, struct kextcb *); - int (*sf_sowakeup)(struct socket *, struct sockbuf *, - struct kextcb *); - u_long reserved[4]; -}; - +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif -/* - * sockutil: - * Contains the utility functions for socket layer access - */ -struct sockutil -{ /* Sleeps if locked */ - int (*su_sb_lock)(struct sockbuf *, struct kextcb *); - /* Conditionally calls sbappendrecord, Calls sbcompress */ - int (*su_sbappend)(struct sockbuf *, struct mbuf *, struct kextcb *); - /* Calls sbspace(), sballoc() */ - int (*su_sbappendaddr)(struct sockbuf *, struct sockaddr *, - struct mbuf *, struct mbuf *, struct kextcb *); - /* Calls sbspace(), sballoc() */ - int (*su_sbappendcontrol)(struct sockbuf *, struct mbuf *, - struct mbuf *, struct kextcb *); - /* Calls sballoc(), sbcompress() */ - int (*su_sbappendrecord)(struct sockbuf *, struct mbuf *, - struct kextcb *); - /* Calls sballoc() */ - int (*su_sbcompress)(struct sockbuf *, struct mbuf *, struct mbuf *, - struct kextcb *); - /* Calls sbfree() */ - int (*su_sbdrop)(struct sockbuf *, int, struct kextcb *); - /* Calls sbfree() */ - int (*su_sbdroprecord)(struct sockbuf *, struct kextcb *); - /* Calls sbdrop() */ - int (*su_sbflush)(struct sockbuf *, struct kextcb *); - /* Calls sballoc(), sbcompress() */ - int (*su_sbinsertoob)(struct sockbuf *, struct mbuf *, - struct kextcb *); - /* Calls sbflush() */ - int (*su_sbrelease)(struct sockbuf *, struct kextcb *); - int (*su_sbreserve)(struct sockbuf *, u_long, struct kextcb *); - /* Calls tsleep() */ - int (*su_sbwait)(struct sockbuf *, struct kextcb *); - u_long reserved[4]; -}; -#endif /* __APPLE_API_UNSTABLE */ +#endif /* NET_KEXT_NET_H */ -#endif diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c new file mode 100644 index 000000000..a5d64adac --- /dev/null +++ b/bsd/net/kpi_interface.c @@ -0,0 +1,1355 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "kpi_interface.h" + +#include +#include /* for definition of NULL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if IF_LASTCHANGEUPTIME +#define TOUCHLASTCHANGE(__if_lastchange) microuptime(__if_lastchange) +#else +#define TOUCHLASTCHANGE(__if_lastchange) microtime(__if_lastchange) +#endif + +extern lck_spin_t *dlil_input_lock; + +/* + Temporary work around until we have real reference counting + + We keep the bits about calling dlil_if_release (which should be + called recycle) transparent by calling it from our if_free function + pointer. We have to keep the client's original detach function + somewhere so we can call it. + */ +static void +ifnet_kpi_free( + ifnet_t ifp) +{ + ifnet_detached_func detach_func = ifp->if_kpi_storage; + + if (detach_func) + detach_func(ifp); + + if (ifp->if_broadcast.length > sizeof(ifp->if_broadcast.u.buffer)) { + FREE(ifp->if_broadcast.u.ptr, M_IFADDR); + ifp->if_broadcast.u.ptr = NULL; + } + + dlil_if_release(ifp); +} + +errno_t +ifnet_allocate( + const struct ifnet_init_params *init, + ifnet_t *interface) +{ + int error; + struct ifnet *ifp = NULL; + + if (init->family == 0) + return EINVAL; + if (init->name == NULL || + init->output == NULL) + return EINVAL; + if (strlen(init->name) >= IFNAMSIZ) + return EINVAL; + if ((init->type & 0xFFFFFF00) != 0 || init->type == 0) + return EINVAL; + + error = dlil_if_acquire(init->family, init->uniqueid, init->uniqueid_len, &ifp); + if (error == 0) + { + strncpy(ifp->if_name, init->name, IFNAMSIZ); + ifp->if_type = init->type; + ifp->if_family = init->family; + ifp->if_unit = init->unit; + ifp->if_output = init->output; + ifp->if_demux = init->demux; + ifp->if_add_proto_u.kpi = init->add_proto; + ifp->if_del_proto = init->del_proto; + ifp->if_check_multi = init->check_multi; + ifp->if_framer = init->framer; + ifp->if_softc = init->softc; + ifp->if_ioctl = init->ioctl; + ifp->if_set_bpf_tap = init->set_bpf_tap; + ifp->if_free = ifnet_kpi_free; + ifp->if_event = init->event; + ifp->if_kpi_storage = init->detach; + ifp->if_eflags |= IFEF_USEKPI; + + if (init->broadcast_len && init->broadcast_addr) { + if (init->broadcast_len > sizeof(ifp->if_broadcast.u.buffer)) { + MALLOC(ifp->if_broadcast.u.ptr, u_char*, init->broadcast_len, M_IFADDR, M_NOWAIT); + if (ifp->if_broadcast.u.ptr == NULL) { + error = ENOMEM; + } + else { + bcopy(init->broadcast_addr, ifp->if_broadcast.u.ptr, init->broadcast_len); + } + } + else { + bcopy(init->broadcast_addr, ifp->if_broadcast.u.buffer, init->broadcast_len); + } + ifp->if_broadcast.length = init->broadcast_len; + } + else { + bzero(&ifp->if_broadcast, sizeof(ifp->if_broadcast)); + } + + if (error == 0) { + *interface = ifp; + ifnet_reference(ifp); // temporary - this should be done in dlil_if_acquire + } + else { + dlil_if_release(ifp); + *interface = 0; + } + } + + /* + Note: We should do something here to indicate that we haven't been + attached yet. By doing so, we can catch the case in ifnet_release + where the reference count reaches zero and call the recycle + function. If the interface is attached, the interface will be + recycled when the interface's if_free function is called. If the + interface is never attached, the if_free function will never be + called and the interface will never be recycled. + */ + + return error; +} + +errno_t +ifnet_reference( + ifnet_t interface) +{ + if (interface == NULL) return EINVAL; + ifp_reference(interface); + return 0; +} + +errno_t +ifnet_release( + ifnet_t interface) +{ + if (interface == NULL) return EINVAL; + ifp_release(interface); + return 0; +} + +errno_t +ifnet_attach( + ifnet_t interface, + const struct sockaddr_dl *ll_addr) +{ + if (interface == NULL) return EINVAL; + if (ll_addr && interface->if_addrlen == 0) { + interface->if_addrlen = ll_addr->sdl_alen; + } + else if (ll_addr && ll_addr->sdl_alen != interface->if_addrlen) { + return EINVAL; + } + return dlil_if_attach_with_address(interface, ll_addr); +} + +errno_t +ifnet_detach( + ifnet_t interface) +{ + errno_t error; + + if (interface == NULL) return EINVAL; + + error = dlil_if_detach(interface); + if (error == DLIL_WAIT_FOR_FREE) error = 0; /* Client should always wait for detach */ + + return error; +} + +void* +ifnet_softc( + ifnet_t interface) +{ + return interface == NULL ? NULL : interface->if_softc; +} + +const char* +ifnet_name( + ifnet_t interface) +{ + return interface == NULL ? NULL : interface->if_name; +} + +ifnet_family_t +ifnet_family( + ifnet_t interface) +{ + return interface == NULL ? 0 : interface->if_family; +} + +u_int32_t +ifnet_unit( + ifnet_t interface) +{ + return interface == NULL ? (u_int32_t)0xffffffff : (u_int32_t)interface->if_unit; +} + +u_int32_t +ifnet_index( + ifnet_t interface) +{ + return interface == NULL ? (u_int32_t)0xffffffff : interface->if_index; +} + +errno_t +ifnet_set_flags( + ifnet_t interface, + u_int16_t new_flags, + u_int16_t mask) +{ + int lock; + + if (interface == NULL) return EINVAL; + lock = (interface->if_lock != 0); + + if (lock) ifnet_lock_exclusive(interface); + + /* If we are modifying the up/down state, call if_updown */ + if (lock && (mask & IFF_UP) != 0) { + if_updown(interface, (new_flags & IFF_UP) == IFF_UP); + } + + interface->if_flags = (new_flags & mask) | (interface->if_flags & ~mask); + if (lock) ifnet_lock_done(interface); + + return 0; +} + +u_int16_t +ifnet_flags( + ifnet_t interface) +{ + return interface == NULL ? 0 : interface->if_flags; +} + +errno_t +ifnet_set_eflags( + ifnet_t interface, + u_int32_t new_flags, + u_int32_t mask) +{ + int lock; + + if (interface == NULL) return EINVAL; + lock = (interface->if_lock != 0); + + if (lock) ifnet_lock_exclusive(interface); + interface->if_eflags = (new_flags & mask) | (interface->if_eflags & ~mask); + if (lock) ifnet_lock_done(interface); + + return 0; +} + +u_int32_t +ifnet_eflags( + ifnet_t interface) +{ + return interface == NULL ? 0 : interface->if_eflags; +} + +static const ifnet_offload_t offload_mask = IFNET_CSUM_IP | IFNET_CSUM_TCP | + IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | + IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING | IFNET_VLAN_MTU; + +errno_t +ifnet_set_offload( + ifnet_t interface, + ifnet_offload_t offload) +{ + int lock; + + if (interface == NULL) return EINVAL; + lock = (interface->if_lock != 0); + + if (lock) ifnet_lock_exclusive(interface); + interface->if_hwassist = (offload & offload_mask); + if (lock) ifnet_lock_done(interface); + + return 0; +} + +ifnet_offload_t +ifnet_offload( + ifnet_t interface) +{ + return interface == NULL ? 0 : (interface->if_hwassist & offload_mask); +} + +/* + * Should MIB data store a copy? + */ +errno_t +ifnet_set_link_mib_data( + ifnet_t interface, + void* mibData, + u_int32_t mibLen) +{ + int lock; + + if (interface == NULL) return EINVAL; + lock = (interface->if_lock != 0); + + if (lock) ifnet_lock_exclusive(interface); + interface->if_linkmib = (void*)mibData; + interface->if_linkmiblen = mibLen; + if (lock) ifnet_lock_done(interface); + return 0; +} + +errno_t +ifnet_get_link_mib_data( + ifnet_t interface, + void *mibData, + u_int32_t *mibLen) +{ + errno_t result = 0; + int lock; + + if (interface == NULL) return EINVAL; + lock = (interface->if_lock != NULL); + + if (lock) ifnet_lock_shared(interface); + if (*mibLen < interface->if_linkmiblen) + result = EMSGSIZE; + if (result == 0 && interface->if_linkmib == NULL) + result = ENOTSUP; + + if (result == 0) { + *mibLen = interface->if_linkmiblen; + bcopy(interface->if_linkmib, mibData, *mibLen); + } + if (lock) ifnet_lock_done(interface); + + return result; +} + +u_int32_t +ifnet_get_link_mib_data_length( + ifnet_t interface) +{ + return interface == NULL ? 0 : interface->if_linkmiblen; +} + +errno_t +ifnet_attach_protocol( + ifnet_t interface, + protocol_family_t protocol, + const struct ifnet_attach_proto_param *proto_details) +{ + if (interface == NULL || protocol == 0 || proto_details == NULL) + return EINVAL; + return dlil_attach_protocol_kpi(interface, protocol, proto_details); +} + +errno_t +ifnet_detach_protocol( + ifnet_t interface, + protocol_family_t protocol) +{ + if (interface == NULL || protocol == 0) return EINVAL; + return dlil_detach_protocol(interface, protocol); +} + +errno_t +ifnet_output( + ifnet_t interface, + protocol_family_t protocol_family, + mbuf_t m, + void *route, + const struct sockaddr *dest) +{ + if (interface == NULL || protocol_family == 0 || m == NULL) { + if (m) + mbuf_freem_list(m); + return EINVAL; + } + return dlil_output(interface, protocol_family, m, route, dest, 0); +} + +errno_t +ifnet_output_raw( + ifnet_t interface, + protocol_family_t protocol_family, + mbuf_t m) +{ + if (interface == NULL || protocol_family == 0 || m == NULL) { + if (m) + mbuf_freem_list(m); + return EINVAL; + } + return dlil_output(interface, protocol_family, m, NULL, NULL, 1); +} + +errno_t +ifnet_input( + ifnet_t interface, + mbuf_t first_packet, + const struct ifnet_stat_increment_param *stats) +{ + mbuf_t last_packet = first_packet; + + if (interface == NULL || first_packet == NULL) { + if (first_packet) + mbuf_freem_list(first_packet); + return EINVAL; + } + + while (mbuf_nextpkt(last_packet) != NULL) + last_packet = mbuf_nextpkt(last_packet); + return dlil_input_with_stats(interface, first_packet, last_packet, stats); +} + +errno_t +ifnet_ioctl( + ifnet_t interface, + protocol_family_t protocol_family, + u_int32_t ioctl_code, + void *ioctl_arg) +{ + if (interface == NULL || protocol_family == 0 || ioctl_code == 0) + return EINVAL; + return dlil_ioctl(protocol_family, interface, + ioctl_code, ioctl_arg); +} + +errno_t +ifnet_event( + ifnet_t interface, + struct kern_event_msg* event_ptr) +{ + if (interface == NULL || event_ptr == NULL) return EINVAL; + return dlil_event(interface, event_ptr); +} + +errno_t +ifnet_set_mtu( + ifnet_t interface, + u_int32_t mtu) +{ + if (interface == NULL) return EINVAL; + interface->if_data.ifi_mtu = mtu; + return 0; +} + +u_int32_t +ifnet_mtu( + ifnet_t interface) +{ + u_int32_t retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_mtu; + return retval; +} + +u_char +ifnet_type( + ifnet_t interface) +{ + u_char retval; + + retval = interface == NULL ? 0 : interface->if_data.ifi_type; + return retval; +} + +#if 0 +errno_t +ifnet_set_typelen( + ifnet_t interface, + u_char typelen) +{ + int lock = (interface->if_lock != 0); + if (lock) ifnet_lock_exclusive(interface); + interface->if_data.ifi_typelen = typelen; + if (lock) ifnet_lock_done(interface); + return 0; +} + +u_char +ifnet_typelen( + ifnet_t interface) +{ + u_char retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_typelen; + return retval; +} +#endif + +errno_t +ifnet_set_addrlen( + ifnet_t interface, + u_char addrlen) +{ + if (interface == NULL) return EINVAL; + interface->if_data.ifi_addrlen = addrlen; + return 0; +} + +u_char +ifnet_addrlen( + ifnet_t interface) +{ + u_char retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_addrlen; + return retval; +} + +errno_t +ifnet_set_hdrlen( + ifnet_t interface, + u_char hdrlen) +{ + if (interface == NULL) return EINVAL; + interface->if_data.ifi_hdrlen = hdrlen; + return 0; +} + +u_char +ifnet_hdrlen( + ifnet_t interface) +{ + u_char retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_hdrlen; + return retval; +} + +errno_t +ifnet_set_metric( + ifnet_t interface, + u_int32_t metric) +{ + if (interface == NULL) return EINVAL; + interface->if_data.ifi_metric = metric; + return 0; +} + +u_int32_t +ifnet_metric( + ifnet_t interface) +{ + u_int32_t retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_metric; + return retval; +} + +errno_t +ifnet_set_baudrate( + ifnet_t interface, + u_int64_t baudrate) +{ + if (interface == NULL) return EINVAL; + /* Pin baudrate to 32 bits until we can change the storage size */ + interface->if_data.ifi_baudrate = baudrate > 0xFFFFFFFF ? 0xFFFFFFFF : baudrate; + return 0; +} + +u_int64_t +ifnet_baudrate( + ifnet_t interface) +{ + u_int64_t retval; + retval = interface == NULL ? 0 : interface->if_data.ifi_baudrate; + return retval; +} + +errno_t +ifnet_stat_increment( + ifnet_t interface, + const struct ifnet_stat_increment_param *counts) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + + interface->if_data.ifi_ipackets += counts->packets_in; + interface->if_data.ifi_ibytes += counts->bytes_in; + interface->if_data.ifi_ierrors += counts->errors_in; + + interface->if_data.ifi_opackets += counts->packets_out; + interface->if_data.ifi_obytes += counts->bytes_out; + interface->if_data.ifi_oerrors += counts->errors_out; + + interface->if_data.ifi_collisions += counts->collisions; + interface->if_data.ifi_iqdrops += counts->dropped; + + /* Touch the last change time. */ + TOUCHLASTCHANGE(&interface->if_lastchange); + + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_stat_increment_in( + ifnet_t interface, + u_int32_t packets_in, + u_int32_t bytes_in, + u_int32_t errors_in) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + + interface->if_data.ifi_ipackets += packets_in; + interface->if_data.ifi_ibytes += bytes_in; + interface->if_data.ifi_ierrors += errors_in; + + TOUCHLASTCHANGE(&interface->if_lastchange); + + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_stat_increment_out( + ifnet_t interface, + u_int32_t packets_out, + u_int32_t bytes_out, + u_int32_t errors_out) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + + interface->if_data.ifi_opackets += packets_out; + interface->if_data.ifi_obytes += bytes_out; + interface->if_data.ifi_oerrors += errors_out; + + TOUCHLASTCHANGE(&interface->if_lastchange); + + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_set_stat( + ifnet_t interface, + const struct ifnet_stats_param *stats) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + + interface->if_data.ifi_ipackets = stats->packets_in; + interface->if_data.ifi_ibytes = stats->bytes_in; + interface->if_data.ifi_imcasts = stats->multicasts_in; + interface->if_data.ifi_ierrors = stats->errors_in; + + interface->if_data.ifi_opackets = stats->packets_out; + interface->if_data.ifi_obytes = stats->bytes_out; + interface->if_data.ifi_omcasts = stats->multicasts_out; + interface->if_data.ifi_oerrors = stats->errors_out; + + interface->if_data.ifi_collisions = stats->collisions; + interface->if_data.ifi_iqdrops = stats->dropped; + interface->if_data.ifi_noproto = stats->no_protocol; + + /* Touch the last change time. */ + TOUCHLASTCHANGE(&interface->if_lastchange); + + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_stat( + ifnet_t interface, + struct ifnet_stats_param *stats) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + + stats->packets_in = interface->if_data.ifi_ipackets; + stats->bytes_in = interface->if_data.ifi_ibytes; + stats->multicasts_in = interface->if_data.ifi_imcasts; + stats->errors_in = interface->if_data.ifi_ierrors; + + stats->packets_out = interface->if_data.ifi_opackets; + stats->bytes_out = interface->if_data.ifi_obytes; + stats->multicasts_out = interface->if_data.ifi_omcasts; + stats->errors_out = interface->if_data.ifi_oerrors; + + stats->collisions = interface->if_data.ifi_collisions; + stats->dropped = interface->if_data.ifi_iqdrops; + stats->no_protocol = interface->if_data.ifi_noproto; + + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_touch_lastchange( + ifnet_t interface) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + TOUCHLASTCHANGE(&interface->if_lastchange); + lck_spin_unlock(dlil_input_lock); + + return 0; +} + +errno_t +ifnet_lastchange( + ifnet_t interface, + struct timeval *last_change) +{ + if (interface == NULL) return EINVAL; + + lck_spin_lock(dlil_input_lock); + *last_change = interface->if_data.ifi_lastchange; + lck_spin_unlock(dlil_input_lock); + +#if IF_LASTCHANGEUPTIME + /* Crude conversion from uptime to calendar time */ + last_change->tv_sec += boottime_sec(); +#endif + + return 0; +} + +errno_t +ifnet_get_address_list( + ifnet_t interface, + ifaddr_t **addresses) +{ + if (interface == NULL || addresses == NULL) return EINVAL; + return ifnet_get_address_list_family(interface, addresses, 0); +} + +errno_t +ifnet_get_address_list_family( + ifnet_t interface, + ifaddr_t **addresses, + sa_family_t family) +{ + struct ifnet *ifp; + int count = 0; + int cmax = 0; + + if (interface == NULL || addresses == NULL) return EINVAL; + *addresses = NULL; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet, if_link) + { + if (interface && ifp != interface) continue; + + ifnet_lock_shared(ifp); + if ((ifp->if_eflags & IFEF_DETACHING) == 0) { + if (interface == NULL || interface == ifp) + { + struct ifaddr *addr; + TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link) + { + if (family == 0 || addr->ifa_addr->sa_family == family) + cmax++; + } + } + } + else if (interface != NULL) { + ifnet_lock_done(ifp); + ifnet_head_done(); + return ENXIO; + } + ifnet_lock_done(ifp); + } + + MALLOC(*addresses, ifaddr_t*, sizeof(ifaddr_t) * (cmax + 1), M_TEMP, M_NOWAIT); + if (*addresses == NULL) { + ifnet_head_done(); + return ENOMEM; + } + + TAILQ_FOREACH(ifp, &ifnet, if_link) + { + if (interface && ifp != interface) continue; + + ifnet_lock_shared(ifp); + if ((ifp->if_eflags & IFEF_DETACHING) == 0) { + if (interface == NULL || (struct ifnet*)interface == ifp) + { + struct ifaddr *addr; + TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link) + { + if (count + 1 > cmax) break; + if (family == 0 || addr->ifa_addr->sa_family == family) { + (*addresses)[count] = (ifaddr_t)addr; + ifaddr_reference((*addresses)[count]); + count++; + } + } + } + } + ifnet_lock_done(ifp); + if (interface || count == cmax) + break; + } + ifnet_head_done(); + (*addresses)[cmax] = 0; + + return 0; +} + +void +ifnet_free_address_list( + ifaddr_t *addresses) +{ + int i; + + if (addresses == NULL) return; + + for (i = 0; addresses[i] != NULL; i++) + { + ifaddr_release(addresses[i]); + } + + FREE(addresses, M_TEMP); +} + +void* +ifnet_lladdr( + ifnet_t interface) +{ + if (interface == NULL) return NULL; + return LLADDR(SDL(interface->if_addrhead.tqh_first->ifa_addr)); +} + +errno_t +ifnet_llbroadcast_copy_bytes( + ifnet_t interface, + void *addr, + size_t buffer_len, + size_t *out_len) +{ + if (interface == NULL || addr == NULL || out_len == NULL) return EINVAL; + + *out_len = interface->if_broadcast.length; + + if (buffer_len < interface->if_broadcast.length) { + return EMSGSIZE; + } + + if (interface->if_broadcast.length == 0) + return ENXIO; + + if (interface->if_broadcast.length <= sizeof(interface->if_broadcast.u.buffer)) { + bcopy(interface->if_broadcast.u.buffer, addr, interface->if_broadcast.length); + } + else { + bcopy(interface->if_broadcast.u.ptr, addr, interface->if_broadcast.length); + } + + return 0; +} + +errno_t +ifnet_lladdr_copy_bytes( + ifnet_t interface, + void* lladdr, + size_t lladdr_len) +{ + struct sockaddr_dl *sdl; + if (interface == NULL || lladdr == NULL) return EINVAL; + + sdl = SDL(interface->if_addrhead.tqh_first->ifa_addr); + + while (1) { + if (lladdr_len != sdl->sdl_alen) { + bzero(lladdr, lladdr_len); + return EMSGSIZE; + } + bcopy(LLADDR(sdl), lladdr, lladdr_len); + if (bcmp(lladdr, LLADDR(sdl), lladdr_len) == 0 && + lladdr_len == sdl->sdl_alen) + break; + } + return 0; +} + +static errno_t +ifnet_set_lladdr_internal( + ifnet_t interface, + const void *lladdr, + size_t lladdr_len, + u_char new_type, + int apply_type) +{ + struct ifaddr *ifa; + struct sockaddr_dl *sdl; + errno_t error = 0; + + if (interface == NULL) return EINVAL; + + if (lladdr_len != 0 && (lladdr_len != interface->if_addrlen || lladdr == 0)) + return EINVAL; + + ifnet_head_lock_shared(); + ifa = ifnet_addrs[interface->if_index - 1]; + if (ifa != NULL) { + sdl = (struct sockaddr_dl*)ifa->ifa_addr; + if (lladdr_len != 0) { + bcopy(lladdr, LLADDR(sdl), lladdr_len); + } + else { + bzero(LLADDR(sdl), interface->if_addrlen); + } + sdl->sdl_alen = lladdr_len; + + if (apply_type) { + sdl->sdl_type = new_type; + } + } + else { + error = ENXIO; + } + ifnet_head_done(); + + /* Generate a kernel event */ + if (error == 0) { + dlil_post_msg(interface, KEV_DL_SUBCLASS, + KEV_DL_LINK_ADDRESS_CHANGED, NULL, 0); + } + + return error; +} + +errno_t +ifnet_set_lladdr( + ifnet_t interface, + const void* lladdr, + size_t lladdr_len) +{ + return ifnet_set_lladdr_internal(interface, lladdr, lladdr_len, 0, 0); +} + +errno_t +ifnet_set_lladdr_and_type( + ifnet_t interface, + const void* lladdr, + size_t lladdr_len, + u_char type) +{ + return ifnet_set_lladdr_internal(interface, lladdr, lladdr_len, type, 1); +} + +errno_t +ifnet_add_multicast( + ifnet_t interface, + const struct sockaddr *maddr, + ifmultiaddr_t *address) +{ + if (interface == NULL || maddr == NULL) return EINVAL; + return if_addmulti(interface, maddr, address); +} + +errno_t +ifnet_remove_multicast( + ifmultiaddr_t address) +{ + if (address == NULL) return EINVAL; + return if_delmultiaddr(address, 0); +} + +errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses) +{ + int count = 0; + int cmax = 0; + struct ifmultiaddr *addr; + int lock; + + if (interface == NULL || addresses == NULL) + return EINVAL; + + lock = (interface->if_lock != 0); + if (lock) ifnet_lock_shared(interface); + if ((interface->if_eflags & IFEF_DETACHING) == 0) { + LIST_FOREACH(addr, &interface->if_multiaddrs, ifma_link) + { + cmax++; + } + } + else { + if (lock) ifnet_lock_done(interface); + return ENXIO; + } + + MALLOC(*addresses, ifmultiaddr_t*, sizeof(ifmultiaddr_t) * (cmax + 1), M_TEMP, M_NOWAIT); + if (*addresses == NULL) return ENOMEM; + + LIST_FOREACH(addr, &interface->if_multiaddrs, ifma_link) + { + if (count + 1 > cmax) + break; + (*addresses)[count] = (ifmultiaddr_t)addr; + ifmaddr_reference((*addresses)[count]); + count++; + } + (*addresses)[cmax] = 0; + if (lock) ifnet_lock_done(interface); + + return 0; +} + +void +ifnet_free_multicast_list( + ifmultiaddr_t *addresses) +{ + int i; + + if (addresses == NULL) return; + + for (i = 0; addresses[i] != NULL; i++) + { + ifmaddr_release(addresses[i]); + } + + FREE(addresses, M_TEMP); +} + +errno_t +ifnet_find_by_name( + const char *ifname, + ifnet_t *interface) +{ + struct ifnet *ifp; + int namelen; + + if (ifname == NULL) return EINVAL; + + namelen = strlen(ifname); + + *interface = NULL; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet, if_link) + { + struct sockaddr_dl *ll_addr = + (struct sockaddr_dl *)ifnet_addrs[ifp->if_index - 1]->ifa_addr; + if ((ifp->if_eflags & IFEF_DETACHING) == 0 && + namelen == ll_addr->sdl_nlen && + (strncmp(ll_addr->sdl_data, ifname, ll_addr->sdl_nlen) == 0)) + { + break; + } + } + if (ifp) { + *interface = ifp; + ifnet_reference(*interface); + } + ifnet_head_done(); + + return (ifp == NULL) ? ENXIO : 0; +} + +errno_t +ifnet_list_get( + ifnet_family_t family, + ifnet_t **list, + u_int32_t *count) +{ + struct ifnet *ifp; + u_int32_t cmax = 0; + *count = 0; + errno_t result = 0; + + if (list == NULL || count == NULL) return EINVAL; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet, if_link) + { + if (ifp->if_eflags & IFEF_DETACHING) continue; + if (family == 0 || ifp->if_family == family) + cmax++; + } + + if (cmax == 0) + result = ENXIO; + + if (result == 0) { + MALLOC(*list, ifnet_t*, sizeof(ifnet_t) * (cmax + 1), M_TEMP, M_NOWAIT); + if (*list == NULL) + result = ENOMEM; + } + + if (result == 0) { + TAILQ_FOREACH(ifp, &ifnet, if_link) + { + if (ifp->if_eflags & IFEF_DETACHING) continue; + if (*count + 1 > cmax) break; + if (family == 0 || ((ifnet_family_t)ifp->if_family) == family) + { + (*list)[*count] = (ifnet_t)ifp; + ifnet_reference((*list)[*count]); + (*count)++; + } + } + (*list)[*count] = NULL; + } + ifnet_head_done(); + + return 0; +} + +void +ifnet_list_free( + ifnet_t *interfaces) +{ + int i; + + if (interfaces == NULL) return; + + for (i = 0; interfaces[i]; i++) + { + ifnet_release(interfaces[i]); + } + + FREE(interfaces, M_TEMP); +} + +/****************************************************************************/ +/* ifaddr_t accessors */ +/****************************************************************************/ + +errno_t +ifaddr_reference( + ifaddr_t ifa) +{ + if (ifa == NULL) return EINVAL; + ifaref(ifa); + return 0; +} + +errno_t +ifaddr_release( + ifaddr_t ifa) +{ + if (ifa == NULL) return EINVAL; + ifafree(ifa); + return 0; +} + +sa_family_t +ifaddr_address_family( + ifaddr_t ifa) +{ + if (ifa && ifa->ifa_addr) + return ifa->ifa_addr->sa_family; + + return 0; +} + +errno_t +ifaddr_address( + ifaddr_t ifa, + struct sockaddr *out_addr, + u_int32_t addr_size) +{ + u_int32_t copylen; + + if (ifa == NULL || out_addr == NULL) return EINVAL; + if (ifa->ifa_addr == NULL) return ENOTSUP; + + copylen = (addr_size >= ifa->ifa_addr->sa_len) ? ifa->ifa_addr->sa_len : addr_size; + bcopy(ifa->ifa_addr, out_addr, copylen); + + if (ifa->ifa_addr->sa_len > addr_size) return EMSGSIZE; + + return 0; +} + +errno_t +ifaddr_dstaddress( + ifaddr_t ifa, + struct sockaddr *out_addr, + u_int32_t addr_size) +{ + u_int32_t copylen; + if (ifa == NULL || out_addr == NULL) return EINVAL; + if (ifa->ifa_dstaddr == NULL) return ENOTSUP; + + copylen = (addr_size >= ifa->ifa_dstaddr->sa_len) ? ifa->ifa_dstaddr->sa_len : addr_size; + bcopy(ifa->ifa_dstaddr, out_addr, copylen); + + if (ifa->ifa_dstaddr->sa_len > addr_size) return EMSGSIZE; + + return 0; +} + +errno_t +ifaddr_netmask( + ifaddr_t ifa, + struct sockaddr *out_addr, + u_int32_t addr_size) +{ + u_int32_t copylen; + if (ifa == NULL || out_addr == NULL) return EINVAL; + if (ifa->ifa_netmask == NULL) return ENOTSUP; + + copylen = addr_size >= ifa->ifa_netmask->sa_len ? ifa->ifa_netmask->sa_len : addr_size; + bcopy(ifa->ifa_netmask, out_addr, copylen); + + if (ifa->ifa_netmask->sa_len > addr_size) return EMSGSIZE; + + return 0; +} + +ifnet_t +ifaddr_ifnet( + ifaddr_t ifa) +{ + struct ifnet *ifp; + if (ifa == NULL) return NULL; + ifp = ifa->ifa_ifp; + + return (ifnet_t)ifp; +} + +ifaddr_t +ifaddr_withaddr( + const struct sockaddr* address) +{ + if (address == NULL) return NULL; + return ifa_ifwithaddr(address); +} + +ifaddr_t +ifaddr_withdstaddr( + const struct sockaddr* address) +{ + if (address == NULL) return NULL; + return ifa_ifwithdstaddr(address); +} + +ifaddr_t +ifaddr_withnet( + const struct sockaddr* net) +{ + if (net == NULL) return NULL; + return ifa_ifwithnet(net); +} + +ifaddr_t +ifaddr_withroute( + int flags, + const struct sockaddr* destination, + const struct sockaddr* gateway) +{ + if (destination == NULL || gateway == NULL) return NULL; + return ifa_ifwithroute(flags, destination, gateway); +} + +ifaddr_t +ifaddr_findbestforaddr( + const struct sockaddr *addr, + ifnet_t interface) +{ + if (addr == NULL || interface == NULL) return NULL; + return ifaof_ifpforaddr(addr, interface); +} + +errno_t +ifmaddr_reference( + ifmultiaddr_t ifmaddr) +{ + if (ifmaddr == NULL) return EINVAL; + ifma_reference(ifmaddr); + return 0; +} + +errno_t +ifmaddr_release( + ifmultiaddr_t ifmaddr) +{ + if (ifmaddr == NULL) return EINVAL; + ifma_release(ifmaddr); + return 0; +} + +errno_t +ifmaddr_address( + ifmultiaddr_t ifmaddr, + struct sockaddr *out_addr, + u_int32_t addr_size) +{ + u_int32_t copylen; + + if (ifmaddr == NULL || out_addr == NULL) return EINVAL; + if (ifmaddr->ifma_addr == NULL) return ENOTSUP; + + copylen = addr_size >= ifmaddr->ifma_addr->sa_len ? ifmaddr->ifma_addr->sa_len : addr_size; + bcopy(ifmaddr->ifma_addr, out_addr, copylen); + + if (ifmaddr->ifma_addr->sa_len > addr_size) return EMSGSIZE; + + return 0; +} + +errno_t +ifmaddr_lladdress( + ifmultiaddr_t ifmaddr, + struct sockaddr *out_addr, + u_int32_t addr_size) +{ + if (ifmaddr == NULL || out_addr == NULL) return EINVAL; + if (ifmaddr->ifma_ll == NULL) return ENOTSUP; + + return ifmaddr_address(ifmaddr->ifma_ll, out_addr, addr_size); +} + +ifnet_t +ifmaddr_ifnet( + ifmultiaddr_t ifmaddr) +{ + if (ifmaddr == NULL || ifmaddr->ifma_ifp == NULL) return NULL; + return ifmaddr->ifma_ifp; +} diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h new file mode 100644 index 000000000..8f09d0985 --- /dev/null +++ b/bsd/net/kpi_interface.h @@ -0,0 +1,1617 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_interface.h + This header defines an API to interact with network interfaces in + the kernel. The network interface KPI may be used to implement + network interfaces or to attach protocols to existing interfaces. + */ + +#ifndef __KPI_INTERFACE__ +#define __KPI_INTERFACE__ +#include + +#ifndef _SA_FAMILY_T +#define _SA_FAMILY_T +typedef __uint8_t sa_family_t; +#endif + +struct timeval; +struct sockaddr; +struct sockaddr_dl; +struct kern_event_msg; +struct kev_msg; +struct ifnet_demux_desc; + +/*! + @enum Interface Families + @abstract Constants defining interface families. + @constant IFNET_FAMILY_ANY Match interface of any family type. + @constant IFNET_FAMILY_LOOPBACK A software loopback interface. + @constant IFNET_FAMILY_ETHERNET An Ethernet interface. + @constant IFNET_FAMILY_SLIP A SLIP interface. + @constant IFNET_FAMILY_TUN A tunnel interface. + @constant IFNET_FAMILY_VLAN A virtual LAN interface. + @constant IFNET_FAMILY_PPP A PPP interface. + @constant IFNET_FAMILY_PVC A PVC interface. + @constant IFNET_FAMILY_DISC A DISC interface. + @constant IFNET_FAMILY_MDECAP A MDECAP interface. + @constant IFNET_FAMILY_GIF A generic tunnel interface. + @constant IFNET_FAMILY_FAITH A FAITH (IPv4/IPv6 translation) interface. + @constant IFNET_FAMILY_STF A 6to4 interface. + @constant IFNET_FAMILY_FIREWIRE An IEEE 1394 (firewire) interface. + @constant IFNET_FAMILY_BOND A virtual bonded interface. +*/ + +enum { + IFNET_FAMILY_ANY = 0, + IFNET_FAMILY_LOOPBACK = 1, + IFNET_FAMILY_ETHERNET = 2, + IFNET_FAMILY_SLIP = 3, + IFNET_FAMILY_TUN = 4, + IFNET_FAMILY_VLAN = 5, + IFNET_FAMILY_PPP = 6, + IFNET_FAMILY_PVC = 7, + IFNET_FAMILY_DISC = 8, + IFNET_FAMILY_MDECAP = 9, + IFNET_FAMILY_GIF = 10, + IFNET_FAMILY_FAITH = 11, + IFNET_FAMILY_STF = 12, + IFNET_FAMILY_FIREWIRE = 13, + IFNET_FAMILY_BOND = 14 +}; +/*! + @typedef ifnet_family_t + @abstract Storage type for the interface family. +*/ +typedef u_int32_t ifnet_family_t; + +/*! + @enum BPF tap mode + @abstract Constants defining interface families. + @constant BPF_MODE_DISABLED Disable bpf. + @constant BPF_MODE_INPUT Enable input only. + @constant BPF_MODE_OUTPUT Enable output only. + @constant BPF_MODE_INPUT_OUTPUT Enable input and output. +*/ + +enum { + BPF_MODE_DISABLED = 0, + BPF_MODE_INPUT = 1, + BPF_MODE_OUTPUT = 2, + BPF_MODE_INPUT_OUTPUT = 3 +}; +/*! + @typedef bpf_tap_mode + @abstract Mode for tapping. BPF_MODE_DISABLED/BPF_MODE_INPUT_OUTPUT etc. +*/ +typedef u_int32_t bpf_tap_mode; + +/*! + @typedef protocol_family_t + @abstract Storage type for the protocol family. +*/ +typedef u_int32_t protocol_family_t; + +/*! + @enum Interface Abilities + @abstract Constants defining interface offload support. + @constant IFNET_CSUM_IP Hardware will calculate IPv4 checksums. + @constant IFNET_CSUM_TCP Hardware will calculate TCP checksums. + @constant IFNET_CSUM_UDP Hardware will calculate UDP checksums. + @constant IFNET_CSUM_FRAGMENT Hardware will checksum IP fragments. + @constant IFNET_IP_FRAGMENT Hardware will fragment IP packets. + @constant IFNET_VLAN_TAGGING Hardware will generate VLAN headers. + @constant IFNET_VLAN_MTU Hardware supports VLAN MTU. +*/ + +enum { + IFNET_CSUM_IP = 0x00000001, + IFNET_CSUM_TCP = 0x00000002, + IFNET_CSUM_UDP = 0x00000004, + IFNET_CSUM_FRAGMENT = 0x00000008, + IFNET_IP_FRAGMENT = 0x00000010, +#ifdef KERNEL_PRIVATE + IFNET_CSUM_SUM16 = 0x00001000, +#endif + IFNET_VLAN_TAGGING = 0x00010000, + IFNET_VLAN_MTU = 0x00020000, +}; +/*! + @typedef ifnet_offload_t + @abstract Flags indicating the offload support of the interface. +*/ +typedef u_int32_t ifnet_offload_t; + +/* + * Callbacks + * + * These are function pointers you supply to the kernel in the interface. + */ +/*! + @typedef bpf_packet_func + + @discussion bpf_packet_func The bpf_packet_func is used to intercept + inbound and outbound packets. The tap function will never free + the mbuf. The tap function will only copy the mbuf in to various + bpf file descriptors tapping this interface. + @param interface The interface being sent or received on. + @param data The packet to be transmitted or received. + @result An errno value or zero upon success. + */ +/* Fast path - do not block or spend excessive amounts of time */ +typedef errno_t (*bpf_packet_func)(ifnet_t interface, mbuf_t data); + +/*! + @typedef ifnet_output_func + + @discussion ifnet_output_func is used to transmit packets. The stack + will pass fully formed packets, including frame header, to the + ifnet_output function for an interface. The driver is + responsible for freeing the mbuf. + @param interface The interface being sent on. + @param data The packet to be sent. + */ +/* Fast path - do not block or spend excessive amounts of time */ +typedef errno_t (*ifnet_output_func)(ifnet_t interface, mbuf_t data); + +/*! + @typedef ifnet_ioctl_func + @discussion ifnet_ioctl_func is used to communicate ioctls from the + stack to the driver. + @param interface The interface the ioctl is being sent to. + @param proto_family The protocol family to handle the ioctl, may be + zero for no protocol_family. + @param cmd The ioctl command. + @param data A pointer to any data related to the ioctl. + */ +typedef errno_t (*ifnet_ioctl_func)(ifnet_t interface, u_int32_t cmd, void *data); + +/*! + @typedef ifnet_set_bpf_tap + @discussion ifnet_set_bpf_tap is used to set the bpf tap function to + be called when packets are sent and/or received. + @param interface The interface the bpf tap function is being set on. + @param mode Sets the mode of the tap to either disabled, input, + output, or input/output. + @param callback A function pointer to be called when a packet is + sent or received. + */ +typedef errno_t (*ifnet_set_bpf_tap)(ifnet_t interface, bpf_tap_mode mode, + bpf_packet_func callback); + +/*! + @typedef ifnet_detached_func + @discussion ifnet_detached_func is called an interface is detached + from the list of interfaces. When ifnet_detach is called, it may + not detach the interface immediately if protocols are attached. + ifnet_detached_func is used to notify the interface that it has + been detached from the networking stack. This is the last + function that will be called on an interface. Until this + function returns, you must not unload a kext supplying function + pointers to this interface, even if ifnet_detacah has been + called. Your detach function may be called during your call to + ifnet_detach. + @param interface The interface that has been detached. + event. + */ +typedef void (*ifnet_detached_func)(ifnet_t interface); + +/*! + @typedef ifnet_demux_func + @discussion ifnet_demux_func is called for each inbound packet to determine + which protocol family the packet belongs to. This information is then + used by the stack to determine which protocol to pass the packet to. + This function may return protocol families for protocols that are + not attached. If the protocol family has not been attached to the + interface, the packet will be discarded. + @param interface The interface the packet was received on. + @param packet The mbuf containing the packet. + @param frame_header A pointer to the frame header. + @param protocol_family Upon return, the protocol family matching the + packet should be stored here. + @result + If the result is zero, processing will continue normally. + If the result is EJUSTRETURN, processing will stop but the packet will not be freed. + If the result is anything else, the processing will stop and the packet will be freed. + */ +typedef errno_t (*ifnet_demux_func)(ifnet_t interface, mbuf_t packet, + char *frame_header, + protocol_family_t *protocol_family); + +/*! + @typedef ifnet_event_func + @discussion ifnet_event_func is called when an event occurs on a + specific interface. + @param interface The interface the event occurred on. + @param event_ptr Pointer to a kern_event structure describing the + event. + */ +typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg); + +/*! + @typedef ifnet_framer_func + @discussion ifnet_framer_func is called for each outbound packet to + give the interface an opportunity to prepend interface specific + headers. + @param interface The interface the packet is being sent on. + @param packet Pointer to the mbuf containing the packet, caller may + set this to a different mbuf upon return. This can happen if the + frameout function needs to prepend another mbuf to the chain to + have enough space for the header. + @param dest The higher layer protocol destination (i.e. IP address). + @param dest_linkaddr The link layer address as determined by the + protocol's pre-output function. + @param frame_type The frame type as determined by the protocol's + pre-output function. + @result + If the result is zero, processing will continue normally. + If the result is EJUSTRETURN, processing will stop but the packet will not be freed. + If the result is anything else, the processing will stop and the packet will be freed. + */ +typedef errno_t (*ifnet_framer_func)(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, + const char *desk_linkaddr, + const char *frame_type); + +/*! + @typedef ifnet_add_proto_func + @discussion if_add_proto_func is called by the stack when a protocol + is attached to an interface. This gives the interface an + opportunity to get a list of protocol description structures + for demuxing packets to this protocol (demux descriptors). + @param interface The interface the protocol will be attached to. + @param protocol_family The family of the protocol being attached. + @param demux_array An array of demux descriptors that describe + the interface specific ways of identifying packets belonging + to this protocol family. + @param demux_count The number of demux descriptors in the array. + @result + If the result is zero, processing will continue normally. + If the result is anything else, the add protocol will be aborted. + */ +typedef errno_t (*ifnet_add_proto_func)(ifnet_t interface, + protocol_family_t protocol_family, + const struct ifnet_demux_desc *demux_array, + u_int32_t demux_count); + +/*! + @typedef if_del_proto_func + @discussion if_del_proto_func is called by the stack when a protocol + is being detached from an interface. This gives the interface an + opportunity to free any storage related to this specific + protocol being attached to this interface. + @param interface The interface the protocol will be detached from. + @param protocol_family The family of the protocol being detached. + @result + If the result is zero, processing will continue normally. + If the result is anything else, the detach will continue + and the error will be returned to the caller. + */ +typedef errno_t (*ifnet_del_proto_func)(ifnet_t interface, + protocol_family_t protocol_family); + +/*! + @typedef ifnet_check_multi + @discussion ifnet_check_multi is called for each multicast address + added to an interface. This gives the interface an opportunity + to reject invalid multicast addresses before they are attached + to the interface. + + To prevent an address from being added to your multicast list, + return EADDRNOTAVAIL. If you don't know how to parse/translate + the address, return EOPNOTSUPP. + @param The interface. + @param mcast The multicast address. + @result + Zero upon success, EADDRNOTAVAIL on invalid multicast, + EOPNOTSUPP for addresses the interface does not understand. + */ +typedef errno_t (*ifnet_check_multi)(ifnet_t interface, + const struct sockaddr* mcast); + +/*! + @typedef proto_media_input + @discussion proto_media_input is called for all inbound packets for + a specific protocol on a specific interface. This function is + registered on an interface using ifnet_attach_protocol. + @param ifp The interface the packet was received on. + @param protocol_family The protocol of the packet received. + @param packet The packet being input. + @param header The frame header. + @result + If the result is zero, the caller will assume the packet was passed + to the protocol. + If the result is non-zero and not EJUSTRETURN, the caller will free + the packet. + */ +typedef errno_t (*proto_media_input)(ifnet_t ifp, protocol_family_t protocol, + mbuf_t packet, char* header); + +/*! + @typedef proto_media_preout + @discussion proto_media_preout is called just before the packet + is transmitted. This gives the proto_media_preout function an + opportunity to specify the media specific frame type and + destination. + @param ifp The interface the packet will be sent on. + @param protocol_family The protocol of the packet being sent + (PF_INET/etc...). + @param packet The packet being sent. + @param dest The protocol level destination address. + @param route A pointer to the routing structure for the packet. + @param frame_type The media specific frame type. + @param link_layer_dest The media specific destination. + @result + If the result is zero, processing will continue normally. If the + result is non-zero, processing will stop. If the result is + non-zero and not EJUSTRETURN, the packet will be freed by the + caller. + */ +typedef errno_t (*proto_media_preout)(ifnet_t ifp, protocol_family_t protocol, + mbuf_t *packet, const struct sockaddr *dest, + void *route, char *frame_type, char *link_layer_dest); + +/*! + @typedef proto_media_event + @discussion proto_media_event is called to notify this layer of + interface specific events. + @param ifp The interface. + @param protocol_family The protocol family. + @param kev_msg The event. + */ +typedef void (*proto_media_event)(ifnet_t ifp, protocol_family_t protocol, + const struct kev_msg *event); + +/*! + @typedef proto_media_ioctl + @discussion proto_media_event allows this layer to handle ioctls. + When an ioctl is handled, it is passed to the interface filters, + protocol filters, protocol, and interface. If you do not support + this ioctl, return EOPNOTSUPP. If you successfully handle the + ioctl, return zero. If you return any error other than + EOPNOTSUPP, other parts of the stack may not get an opportunity + to process the ioctl. If you return EJUSTRETURN, processing will + stop and a result of zero will be returned to the caller. + @param ifp The interface. + @param protocol_family The protocol family. + @param command The ioctl command. + @param argument The argument to the ioctl. + @result + See the discussion. + */ +typedef errno_t (*proto_media_ioctl)(ifnet_t ifp, protocol_family_t protocol, + u_int32_t command, void* argument); + +/*! + @typedef proto_media_detached + @discussion proto_media_detached notifies you that your protocol + has been detached. + @param ifp The interface. + @param protocol_family The protocol family. + @result + See the discussion. + */ +typedef errno_t (*proto_media_detached)(ifnet_t ifp, protocol_family_t protocol); + + +/*! + @typedef proto_media_resolve_multi + @discussion proto_media_resolve_multi is called to resolve a + protocol layer mulitcast address to a link layer multicast + address. + @param ifp The interface. + @param proto_addr The protocol address. + @param out_ll A sockaddr_dl to copy the link layer multicast in to. + @param ll_len The length of data allocated for out_ll. + @result Return zero on success or an errno error value on failure. + */ +typedef errno_t (*proto_media_resolve_multi)(ifnet_t ifp, + const struct sockaddr *proto_addr, + struct sockaddr_dl *out_ll, size_t ll_len); + +/*! + @typedef proto_media_send_arp + @discussion proto_media_send_arp is called by the stack to generate + an ARP packet. This field is currently only used with IP. This + function should inspect the parameters and transmit an arp + packet using the information passed in. + @param ifp The interface the arp packet should be sent on. + @param protocol_family The protocol family of the addresses + (PF_INET). + @param arpop The arp operation (usually ARPOP_REQUEST or + ARPOP_REPLY). + @param sender_hw The value to use for the sender hardware + address field. If this is NULL, use the hardware address + of the interface. + @param sender_proto The value to use for the sender protocol + address field. This will not be NULL. + @param target_hw The value to use for the target hardware address. + If this is NULL, the target hardware address in the ARP packet + should be NULL and the link-layer destination for the back + should be a broadcast. If this is not NULL, this value should be + used for both the link-layer destination and the target hardware + address. + @param target_proto The target protocol address. This will not be + NULL. + @result Return zero on success or an errno error value on failure. + */ +typedef errno_t (*proto_media_send_arp)(ifnet_t ifp, + u_short arpop, + const struct sockaddr_dl* sender_hw, + const struct sockaddr* sender_proto, + const struct sockaddr_dl* target_hw, + const struct sockaddr* target_proto); + +/*! + @struct ifnet_stat_increment_param + @discussion This structure is used increment the counters on a + network interface. + @field packets_in The number of packets received. + @field bytes_in The number of bytes received. + @field errors_in The number of receive errors. + @field packets_out The number of packets transmitted. + @field bytes_out The number of bytes transmitted. + @field errors_out The number of transmission errors. + @field collisions The number of collisions seen by this interface. + @field dropped The number of packets dropped. +*/ + +struct ifnet_stat_increment_param { + u_int32_t packets_in; + u_int32_t bytes_in; + u_int32_t errors_in; + + u_int32_t packets_out; + u_int32_t bytes_out; + u_int32_t errors_out; + + u_int32_t collisions; + u_int32_t dropped; +}; + +/*! + @struct ifnet_init_params + @discussion This structure is used to define various properties of + the interface when calling ifnet_init. A copy of these values + will be stored in the ifnet and can not be modified while the + interface is attached. + @field uniqueid An identifier unique to this instance of the + interface. + @field uniqueid_len The length, in bytes, of the uniqueid. + @field name The interface name (i.e. en). + @field unit The interface unit number (en0's unit number is 0). + @field family The interface family. + @field type The interface type (see sys/if_types.h). Must be less + than 256. For new types, use IFT_OTHER. + @field output The output function for the interface. Every packet the + stack attempts to send through this interface will go out through + this function. + @field demux The function used to determine the protocol family of an + incoming packet. + @field add_proto The function used to attach a protocol to this interface. + @field del_proto The function used to remove a protocol from this interface. + @field framer The function used to frame outbound packets, may be NULL. + @field softc Driver specific storage. This value can be retrieved from the + ifnet using the ifnet_softc function. + @field ioctl The function used to handle ioctls. + @field set_bpf_tap The function used to set the bpf_tap function. + @field detach The function called to let the driver know the interface has been detached. + @field event The function to notify the interface of various interface specific kernel events. + @field broadcast_addr The link-layer broadcast address for this interface. + @field broadcast_len The length of the link-layer broadcast address. +*/ + +struct ifnet_init_params { + /* used to match recycled interface */ + const void* uniqueid; /* optional */ + u_int32_t uniqueid_len; /* optional */ + + /* used to fill out initial values for interface */ + const char* name; /* required */ + u_int32_t unit; /* required */ + ifnet_family_t family; /* required */ + u_int32_t type; /* required */ + ifnet_output_func output; /* required */ + ifnet_demux_func demux; /* required */ + ifnet_add_proto_func add_proto; /* required */ + ifnet_del_proto_func del_proto; /* required */ + ifnet_check_multi check_multi; /* required for non point-to-point interfaces */ + ifnet_framer_func framer; /* optional */ + void* softc; /* optional */ + ifnet_ioctl_func ioctl; /* optional */ + ifnet_set_bpf_tap set_bpf_tap; /* optional */ + ifnet_detached_func detach; /* optional */ + ifnet_event_func event; /* optional */ + const void *broadcast_addr;/* required for non point-to-point interfaces */ + u_int32_t broadcast_len; /* required for non point-to-point interfaces */ +}; + +/*! + @struct ifnet_stats_param + @discussion This structure is used get and set the interface + statistics. + @field packets_in The number of packets received. + @field bytes_in The number of bytes received. + @field errors_in The number of receive errors. + @field packets_out The number of packets transmitted. + @field bytes_out The number of bytes transmitted. + @field errors_out The number of transmission errors. + @field collisions The number of collisions seen by this interface. + @field dropped The number of packets dropped. +*/ + +struct ifnet_stats_param { + u_int64_t packets_in; + u_int64_t bytes_in; + u_int64_t multicasts_in; + u_int64_t errors_in; + + u_int64_t packets_out; + u_int64_t bytes_out; + u_int64_t multicasts_out; + u_int64_t errors_out; + + u_int64_t collisions; + u_int64_t dropped; + u_int64_t no_protocol; +}; + +/*! + @struct ifnet_demux_desc + @discussion This structure is to identify packets that belong to a + specific protocol. The types supported are interface specific. + Ethernet supports ETHER_DESC_ETYPE2, ETHER_DESC_SAP, and + ETHER_DESC_SNAP. The type defines the offset in the packet where + the data will be matched as well as context. For example, if + ETHER_DESC_SNAP is specified, the only valid datalen is 5 and + only in the 5 bytes will only be matched when the packet header + indicates that the packet is a SNAP packet. + @field type The type of identifier data (i.e. ETHER_DESC_ETYPE2) + @field data A pointer to an entry of type (i.e. pointer to 0x0800). + @field datalen The number of bytes of data used to describe the + packet. +*/ + +struct ifnet_demux_desc { + u_int32_t type; + void* data; + u_int32_t datalen; +}; + +/*! + @struct ifnet_attach_proto_param + @discussion This structure is used to attach a protocol to an + interface. This structure provides the various functions for + handling operations related to the protocol on the interface as + well as information for how to demux packets for this protocol. + @field demux_array An array of ifnet_demux_desc structures + describing the protocol. + @field demux_count The number of entries in the demux_array array. + @field input The function to be called for inbound packets. + @field pre_output The function to be called for outbound packets. + @field event The function to be called for interface events. + @field ioctl The function to be called for ioctls. + @field detached The function to be called for handling the detach. +*/ +#ifdef KERNEL_PRIVATE +#define demux_list demux_array +#endif /* KERNEL_PRIVATE */ + +struct ifnet_attach_proto_param { + struct ifnet_demux_desc *demux_array; /* interface may/may not require */ + u_int32_t demux_count; /* interface may/may not require */ + + proto_media_input input; /* required */ + proto_media_preout pre_output; /* required */ + proto_media_event event; /* optional */ + proto_media_ioctl ioctl; /* optional */ + proto_media_detached detached; /* optional */ + proto_media_resolve_multi resolve; /* optional */ + proto_media_send_arp send_arp; /* optional */ +}; + +__BEGIN_DECLS + +/* + * Ifnet creation and reference counting + */ + +/*! + @function ifnet_allocate + @discussion Allocate an ifnet_t with an initial refcount of 1. Many + parts of the stack do not properly refcount the ifnet_t. In + order to avoid freeing the ifnet_t while some parts of the stack + may contain a reference to it, the ifnet_ts are only recycled, + never freed. A unique id is used to try and recycle the same + ifnet_t when allocating an interface. For example, for an + ethernet interface, the hardware address of the ethernet card is + usually used for the uniqueid. If a PC Card is removed and + inserted again, if the ethernet address of the PC card is used, + the same ifnet_t will be used for the card the second time it is + inserted. In the future, when the ifnet_t is correctly + refcounted by all of the stack, the interfaces may be freed and + the unique ids ignored. + @param init The initial values for the interface. These values can + not be changed after the interface has been allocated. + @param interface The interface allocated upon success. + @result May return ENOMEM if there is insufficient memory or EEXIST + if an interface with the same uniqueid and family has already + been allocated and is in use. + */ +errno_t ifnet_allocate(const struct ifnet_init_params *init, ifnet_t *interface); + +/*! + @function ifnet_reference + @discussion Increment the reference count of the ifnet to assure + that it will not go away. The interface must already have at + least one reference. + @param interface The interface to increment the reference count of. + @result May return EINVAL if the interface is not valid. + */ +errno_t ifnet_reference(ifnet_t interface); + +/*! + @function ifnet_release + @discussion Release a reference of the ifnet, this may trigger a + free if the reference count reaches 0. + @param interface The interface to decrement the reference count of + and possibly free. + @result May return EINVAL if the interface is not valid. + */ +errno_t ifnet_release(ifnet_t interface); + +/*! + @function ifnet_attach + @discussion Attaches an interface to the global interface list. The + interface must be setup properly before calling attach. The + stack will take a reference on the interface and hold it until + ifnet_detach is called. + + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface to attach. + @param ll_addr The link layer address of the interface. This is used + to fill out the first ifaddr in the list of addresses for the + interface. This parameter is not required for interfaces such as + PPP that have no link-layer address. + @result Will return an error if there is anything wrong with the + interface. + */ +errno_t ifnet_attach(ifnet_t interface, const struct sockaddr_dl *ll_addr); + +/*! + @function ifnet_detach + @discussion Detaches the interface. + + Call this to indicate this interface is no longer valid (i.e. PC + Card was removed). This function will begin the process of + removing knowledge of this interface from the stack. + + The function will return before the interface is detached. The + functions you supplied in to the interface may continue to be + called. When the detach has been completed, your detached + function will be called. Your kext must not unload until the + detached function has been called. The interface will be + properly freed when the reference count reaches zero. + + An interface may not be attached again. You must call + ifnet_allocate to create a new interface to attach. + + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface to detach. + @result 0 on success, otherwise errno error. + */ +errno_t ifnet_detach(ifnet_t interface); + +/* + * Interface manipulation. + */ + +/*! + @function ifnet_softc + @discussion Returns the driver's private storage on the interface. + @param interface Interface to retrieve the storage from. + @result Driver's private storage. + */ +void* ifnet_softc(ifnet_t interface); + +/*! + @function ifnet_name + @discussion Returns a pointer to the name of the interface. + @param interface Interface to retrieve the name from. + @result Pointer to the name. + */ +const char* ifnet_name(ifnet_t interface); + +/*! + @function ifnet_family + @discussion Returns the family of the interface. + @param interface Interface to retrieve the unit number from. + @result Unit number. + */ +ifnet_family_t ifnet_family(ifnet_t interface); + +/*! + @function ifnet_unit + @discussion Returns the unit number of the interface. + @param interface Interface to retrieve the unit number from. + @result Unit number. + */ +u_int32_t ifnet_unit(ifnet_t interface); + +/*! + @function ifnet_index + @discussion Returns the index of the interface. This index value + will match the index you would find in a sockaddr_dl or using + if_nametoindex or if_indextoname in user space. The value of the + interface index is undefined for an interface that is not + currently attached. + @param interface Interface to retrieve the index of. + @result Index. + */ +u_int32_t ifnet_index(ifnet_t interface); + +/*! + @function ifnet_set_flags + @discussion Sets the interface flags to match new_flags. + @discussion Sets the interface flags to new_flags. This function + lets you specify which flags you want to change using the mask. + The kernel will effectively take the lock, then set the + interface's flags to (if_flags & ~mask) | (new_flags & mask). + @param interface Interface to set the flags on. + @param new_flags The new set of flags that should be set. These + flags are defined in net/if.h + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask); + +/*! + @function ifnet_flags + @discussion Returns the interface flags that are set. + @param interface Interface to retrieve the flags from. + @result Flags. These flags are defined in net/if.h + */ +u_int16_t ifnet_flags(ifnet_t interface); + + +#ifdef KERNEL_PRIVATE +/*! + @function ifnet_set_eflags + @discussion Sets the extended interface flags to new_flags. This + function lets you specify which flags you want to change using + the mask. The kernel will effectively take the lock, then set + the interface's extended flags to (if_eflags & ~mask) | + (new_flags & mask). + @param interface The interface. + @param new_flags The new set of flags that should be set. These + flags are defined in net/if.h + @param mask The mask of flags to be modified. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask); + +/*! + @function ifnet_eflags + @discussion Returns the extended interface flags that are set. + @param interface Interface to retrieve the flags from. + @result Extended flags. These flags are defined in net/if.h + */ +u_int32_t ifnet_eflags(ifnet_t interface); +#endif + +/*! + @function ifnet_set_offload + @discussion Sets a bitfield to indicate special hardware offload + support provided by the interface such as hardware checksums and + VLAN. This replaces the if_hwassist flags field. Any flags + unrecognized by the stack will not be set. + @param interface The interface. + @param offload The new set of flags indicating which offload options + the device supports. + @param mask The mask of flags to be modified. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload); + +/*! + @function ifnet_offload + @discussion Returns flags indicating which operations can be + offloaded to the interface. + @param interface Interface to retrieve the offload from. + @result Abilities flags, see ifnet_offload_t. + */ +ifnet_offload_t ifnet_offload(ifnet_t interface); + +/*! + @function ifnet_set_link_mib_data + @discussion Sets the mib link data. The ifnet_t will store the + pointer you supply and copy mibLen bytes from the pointer + whenever the sysctl for getting interface specific MIB data is + used. Since the ifnet_t stores a pointer to your data instead of + a copy, you may update the data at the address at any time. + + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface Interface to set the unit number of. + @param mibData A pointer to the data. + @param mibLen Length of data pointed to. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData, u_int32_t mibLen); + +/*! + @function ifnet_get_link_mib_data + @discussion Copies the link MIB data in to mibData, up to mibLen + bytes. Returns error if the buffer is too small to hold all of + the MIB data. + @param interface The interface. + @param mibData A pointer to space for the mibData to be copied in + to. + @param mibLen When calling, this should be the size of the buffer + passed in mibData. Upon return, this will be the size of data + copied in to mibData. + @result Returns an error if the buffer size is too small or there is + no data. + */ +errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData, u_int32_t *mibLen); + +/*! + @function ifnet_get_link_mib_data_length + @discussion Retrieve the size of the mib data. + @param interface The interface. + @result Returns the number of bytes of mib data associated with the + interface. + */ +u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface); + +/*! + @function ifnet_attach_protocol + @discussion Attaches a protocol to an interface. + @param interface The interface. + @param protocol_family The protocol family being attached + (PF_INET/PF_APPLETALK/etc...). + @param proto_details Details of the protocol being attached. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_attach_protocol(ifnet_t interface, protocol_family_t protocol_family, + const struct ifnet_attach_proto_param *proto_details); + +/*! + @function ifnet_detach_protocol + @discussion Detaches a protocol from an interface. + @param interface The interface. + @param protocol_family The protocol family of the protocol to + detach. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_detach_protocol(ifnet_t interface, protocol_family_t protocol_family); + +/*! + @function ifnet_output + @discussion Handles an outbound packet on the interface by calling + any filters, a protocol preoutput function, the interface framer + function, and finally the interface's output function. The + protocol_family will be used to apply protocol filters and + determine which preoutput function to call. The route and dest + parameters will be passed to the preoutput function defined for + the attachment of the specified protocol to the specified + interface. ifnet_output will free the mbuf chain in the event of + an error. + @param interface The interface. + @param protocol_family The family of the protocol generating this + packet (i.e. AF_INET). + @param packet The packet to be transmitted. + @param route A pointer to a routing structure for this packet. The + preoutput function determines whether this value may be NULL or + not. + @param dest The destination address of protocol_family type. This + will be passed to the preoutput function. If the preoutput + function does not require this value, you may pass NULL. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_output(ifnet_t interface, protocol_family_t protocol_family, mbuf_t packet, + void* route, const struct sockaddr *dest); + +/*! + @function ifnet_output_raw + @discussion Handles and outbond raw packet on the interface by + calling any filters followed by the interface's output function. + protocol_family may be zero. If the packet is from a specific + protocol the protocol_family will be used to apply protocol + filters. All interface filters will be applied to the outgoing + packet. Processing, such as calling the protocol preoutput and + interface framer functions will be bypassed. The packet will + pass through the filters and be sent on the interface as is. + ifnet_output_raw will free the packet chain in the event of an + error. + @param interface The interface. + @param protocol_family The family of the protocol generating this + packet (i.e. AF_INET). + @param packet The fully formed packet to be transmitted. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_output_raw(ifnet_t interface, protocol_family_t protocol_family, mbuf_t packet); + +/*! + @function ifnet_input + @discussion Inputs packets from the interface. The interface's demux + will be called to determine the protocol. Once the protocol is + determined, the interface filters and protocol filters will be + called. From there, the packet will be passed to the registered + protocol. If there is an error, the mbuf chain will be freed. + @param interface The interface. + @param first_packet The first packet in a chain of packets. + @param stats Counts to be integrated in to the stats. The interface + statistics will be incremented by the amounts specified in + stats. This parameter may be NULL. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet, + const struct ifnet_stat_increment_param *stats); + +/*! + @function ifnet_ioctl + @discussion Calls the interface's ioctl function with the parameters + passed. + @param interface The interface. + @param protocol The protocol family of the protocol to send the + ioctl to (may be zero). Some ioctls apply to a protocol while + other ioctls apply to just an interface. + @param ioctl_code The ioctl to perform. + @param ioctl_arg Any parameters to the ioctl. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol, + u_int32_t ioctl_code, void *ioctl_arg); + +/*! + @function ifnet_event + @discussion Calls the interface's event function. + @param interface The interface. + @param event_ptr Pointer to an kern_event structure describing the + event. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_event(ifnet_t interface, struct kern_event_msg* event_ptr); + +/*! + @function ifnet_set_mtu + @discussion Sets the value of the MTU in the interface structure. + Calling this function will not notify the driver that the MTU + should be changed. Use the appropriate ioctl. + + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param mtu The new MTU. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu); + +/*! + @function ifnet_mtu + @param interface The interface. + @result The MTU. + */ +u_int32_t ifnet_mtu(ifnet_t interface); + +/*! + @function ifnet_type + @param interface The interface. + @result The type. See net/if_types.h. + */ +u_int8_t ifnet_type(ifnet_t interface); + +/*! + @function ifnet_set_addrlen + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param addrlen The new address length. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen); + +/*! + @function ifnet_addrlen + @param interface The interface. + @result The address length. + */ +u_int8_t ifnet_addrlen(ifnet_t interface); + +/*! + @function ifnet_set_hdrlen + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param hdrlen The new header length. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen); + +/*! + @function ifnet_hdrlen + @param interface The interface. + @result The header length. + */ +u_int8_t ifnet_hdrlen(ifnet_t interface); + +/*! + @function ifnet_set_metric + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param metric The new metric. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric); + +/*! + @function ifnet_metric + @param interface The interface. + @result The metric. + */ +u_int32_t ifnet_metric(ifnet_t interface); + +/*! + @function ifnet_set_baudrate + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param baudrate The new baudrate. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate); + +/*! + @function ifnet_baudrate + @param interface The interface. + @result The baudrate. + */ +u_int64_t ifnet_baudrate(ifnet_t interface); + +/*! + @function ifnet_stat_increment + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + @param interface The interface. + @param counts A pointer to a structure containing the amount to + increment each counter by. Any counts not appearing in the + ifnet_counter_increment structure are handled in the stack. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_stat_increment(ifnet_t interface, + const struct ifnet_stat_increment_param *counts); + +/*! + @function ifnet_stat_increment_in + @discussion + This function is intended to be called by the driver. This + function allows a driver to update the inbound interface counts. + The most efficient time to update these counts is when calling + ifnet_input. + + A lock protects the counts, this makes the increment functions + expensive. The increment function will update the lastchanged + value. + @param interface The interface. + @param packets_in The number of additional packets received. + @param bytes_in The number of additional bytes received. + @param errors_in The number of additional receive errors. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_stat_increment_in(ifnet_t interface, + u_int32_t packets_in, u_int32_t bytes_in, + u_int32_t errors_in); + +/*! + @function ifnet_stat_increment_out + @discussion + This function is intended to be called by the driver. This + function allows a driver to update the outbound interface counts. + + A lock protects the counts, this makes the increment functions + expensive. The increment function will update the lastchanged + value. + @param interface The interface. + @param packets_out The number of additional packets sent. + @param bytes_out The number of additional bytes sent. + @param errors_out The number of additional send errors. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_stat_increment_out(ifnet_t interface, + u_int32_t packets_out, u_int32_t bytes_out, + u_int32_t errors_out); + +/*! + @function ifnet_set_stat + @discussion + This function is intended to be called by the driver. A kext + must not call this function on an interface the kext does not + own. + + The one exception would be the case where a kext wants to zero + all of the counters. + @param interface The interface. + @param counts The new stats values. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_stat(ifnet_t interface, + const struct ifnet_stats_param *stats); + +/*! + @function ifnet_stat + @param interface The interface. + @param out_stats Storage for the values. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_stat(ifnet_t interface, + struct ifnet_stats_param *out_stats); + +/*! + @function ifnet_set_promiscuous + @discussion Enable or disable promiscuous mode on the interface. The + interface keeps an internal count of the number of times + promiscuous mode has been enabled. Promiscuous mode is only + disabled when this count reaches zero. Be sure to disable + promiscuous mode only once for every time you enable it. + @param interface The interface to toggle promiscuous mode on. + @param on If set, the number of promicuous on requests will be + incremented. If this is the first requrest, promiscuous mode + will be enabled. If this is not set, the number of promiscous + clients will be decremented. If this causes the number to reach + zero, promiscuous mode will be disabled. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_set_promiscuous(ifnet_t interface, int on); + +/*! + @function ifnet_touch_lastchange + @discussion Updates the lastchange value to now. + @param interface The interface. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_touch_lastchange(ifnet_t interface); + +/*! + @function ifnet_lastchange + @param interface The interface. + @param last_change A timeval struct to copy the last time changed in + to. + */ +errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change); + +/*! + @function ifnet_get_address_list + @discussion Get a list of addresses on the interface. Passing NULL + for the interface will return a list of all addresses. The + addresses will have their reference count bumped so they will + not go away. Calling ifnet_free_address_list will decrement the + refcount and free the array. If you wish to hold on to a + reference to an ifaddr_t, be sure to bump the reference count + before calling ifnet_free_address_list. + @param interface The interface. + @param addresses A pointer to a NULL terminated array of ifaddr_ts. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses); + +/*! + @function ifnet_get_address_list_family + @discussion Get a list of addresses on the interface. Passing NULL + for the interface will return a list of all addresses. The + addresses will have their reference count bumped so they will + not go away. Calling ifnet_free_address_list will decrement the + refcount and free the array. If you wish to hold on to a + reference to an ifaddr_t, be sure to bump the reference count + before calling ifnet_free_address_list. Unlike + ifnet_get_address_list, this function lets the caller specify + the address family to get a list of only a specific address type. + @param interface The interface. + @param addresses A pointer to a NULL terminated array of ifaddr_ts. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses, sa_family_t family); + +/*! + @function ifnet_free_address_list + @discussion Free a list of addresses returned from + ifnet_get_address_list. Decrements the refcounts and frees the + memory used for the array of references. + @param addresses An array of ifaddr_ts. + */ +void ifnet_free_address_list(ifaddr_t *addresses); + +/*! + @function ifnet_set_lladdr + @discussion Sets the link-layer address for this interface. + @param interface The interface the link layer address is being + changed on. + @param lladdr A pointer to the raw link layer address (pointer to + the 6 byte ethernet address for ethernet). + @param lladdr_len The length, in bytes, of the link layer address. + */ +errno_t ifnet_set_lladdr(ifnet_t interface, const void* lladdr, size_t lladdr_len); + +/*! + @function ifnet_lladdr_copy_bytes + @discussion Copies the bytes of the link-layer address in to the + specified buffer. + @param interface The interface to copy the link-layer address from. + @param lladdr The buffer to copy the link-layer address in to. + @param length The length of the buffer. This value must match the + length of the link-layer address. + */ +errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void* lladdr, size_t length); + +#ifdef KERNEL_PRIVATE +/*! + @function ifnet_lladdr + @discussion Returns a pointer to the link-layer address. + @param interface The interface the link-layer address is on. + */ +void* ifnet_lladdr(ifnet_t interface); +#endif KERNEL_PRIVATE + +/*! + @function ifnet_llbroadcast_copy_bytes + @discussion Retrieves the link-layer broadcast address for this + interface. + @param interface The interface. + @param addr A buffer to copy the broadcast address in to. + @param bufferlen The length of the buffer at addr. + @param addr_len On return, the length of the broadcast address. + @param lladdr_len The length, in bytes, of the link layer address. + */ +errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void* addr, + size_t bufferlen, size_t* addr_len); + +#ifdef KERNEL_PRIVATE +/*! + @function ifnet_set_lladdr_and_type + @discussion Sets the link-layer address as well as the type field in + the sockaddr_dl. Support for setting the type was added for vlan + and bond interfaces. + @param interface The interface the link layer address is being + changed on. + @param lladdr A pointer to the raw link layer address (pointer to + the 6 byte ethernet address for ethernet). + @param lladdr_len The length, in bytes, of the link layer address. + @param type The link-layer address type. + */ +errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void* lladdr, size_t length, u_char type); +#endif KERNEL_PRIVATE + +/*! + @function ifnet_add_multicast + @discussion Joins a multicast and returns an ifmultiaddr_t with the + reference count incremented for you. You are responsible for + decrementing the reference count after calling + ifnet_remove_multicast and making sure you no longer have any + references to the multicast. + @param interface The interface. + @param maddr The multicast address to join. Either a physical + address or logical address to be translated to a physical + address. + @param multicast The resulting ifmultiaddr_t multicast address. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_add_multicast(ifnet_t interface, const struct sockaddr *maddr, + ifmultiaddr_t *multicast); + +/*! + @function ifnet_remove_multicast + @discussion Causes the interface to leave the multicast group. The + stack keeps track of how many times ifnet_add_multicast has been + called for a given multicast address. The multicast will only be + removed when the number of times ifnet_remove_multicast has been + called matches the number of times ifnet_add_multicast has been + called. + + The memory for the multicast address is not actually freed until + the separate reference count has reached zero. Some parts of the + stack may keep a pointer to the multicast even after that + multicast has been removed from the interface. + + When an interface is detached, all of the multicasts are + removed. If the interface of the multicast passed in is no + longer attached, this function will gracefully return, + performing no work. + + It is the callers responsibility to release the multicast + address after calling this function. + @param multicast The multicast to be removed. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_remove_multicast(ifmultiaddr_t multicast); + +/*! + @function ifnet_get_multicast_list + @discussion Retrieves a list of multicast address the interface is + set to receive. This function allocates and returns an array of + references to the various multicast addresses. The multicasts + have their reference counts bumped on your behalf. Calling + ifnet_free_multicast_list will decrement the reference counts + and free the array. + @param interface The interface. + @param multicasts A pointer to a NULL terminated array of references + to the multicast addresses. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses); + +/*! + @function ifnet_free_multicast_list + @discussion Frees a list of multicasts returned by + ifnet_get_multicast_list. Decrements the refcount on each + multicast address and frees the array. + @param multicasts An array of references to the multicast addresses. + @result 0 on success otherwise the errno error. + */ +void ifnet_free_multicast_list(ifmultiaddr_t *multicasts); + +/*! + @function ifnet_find_by_name + @discussion Find an interface by the name including the unit number. + Caller must call ifnet_release on any non-null interface return + value. + @param name The name of the interface, including any unit number + (i.e. "en0"). + @param interface A pointer to an interface reference. This will be + filled in if a matching interface is found. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface); + +/*! + @function ifnet_list_get + @discussion Get a list of attached interfaces. List will be set to + point to an array allocated by ifnet_list_get. The interfaces + are refcounted and the counts will be incremented before the + function returns. The list of interfaces must be freed using + ifnet_list_free. + @param family The interface family (i.e. IFNET_FAMILY_ETHERNET). To + find interfaces of all families, use IFNET_FAMILY_ANY. + @param interfaces A pointer to an array of interface references. + @param count A pointer that will be filled in with the number of + matching interfaces in the array. + @result 0 on success otherwise the errno error. + */ +errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count); + +/*! + @function ifnet_list_free + @discussion Free a list of interfaces returned by ifnet_list_get. + Decrements the reference count on each interface and frees the + array of references. If you keep a reference to an interface, be + sure to increment the reference count before calling + ifnet_list_free. + @param interfaces An array of interface references from ifnet_list_get. + */ +void ifnet_list_free(ifnet_t *interfaces); + +/********************************************************************************************/ +/* ifaddr_t accessors */ +/********************************************************************************************/ + +/*! + @function ifaddr_reference + @discussion Increment the reference count of an address tied to an + interface. + @param ifaddr The interface address. + @result 0 upon success + */ +errno_t ifaddr_reference(ifaddr_t ifaddr); + +/*! + @function ifaddr_release + @discussion Decrements the reference count of and possibly frees an + address tied to an interface. + @param ifaddr The interface address. + @result 0 upon success + */ +errno_t ifaddr_release(ifaddr_t ifaddr); + +/*! + @function ifaddr_address + @discussion Copies the address out of the ifaddr. + @param ifaddr The interface address. + @param out_addr The sockaddr storage for the address. + @param addr_size The size of the storage for the address. + @result 0 upon success + */ +errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr, u_int32_t addr_size); + +/*! + @function ifaddr_address + @discussion Returns the address family of the address. + @param ifaddr The interface address. + @result 0 on failure, address family on success. + */ +sa_family_t ifaddr_address_family(ifaddr_t ifaddr); + +/*! + @function ifaddr_dstaddress + @discussion Copies the destination address out of the ifaddr. + @param ifaddr The interface address. + @param out_dstaddr The sockaddr storage for the destination address. + @param dstaddr_size The size of the storage for the destination address. + @result 0 upon success + */ +errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr, u_int32_t dstaddr_size); + +/*! + @function ifaddr_netmask + @discussion Copies the netmask out of the ifaddr. + @param ifaddr The interface address. + @param out_netmask The sockaddr storage for the netmask. + @param netmask_size The size of the storage for the netmask. + @result 0 upon success + */ +errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask, u_int32_t netmask_size); + +/*! + @function ifaddr_ifnet + @discussion Returns the interface the address is attached to. The + reference is only valid until the ifaddr is released. If you + need to hold a reference to the ifnet for longer than you hold a + reference to the ifaddr, increment the reference using + ifnet_reference. + @param ifaddr The interface address. + @result A reference to the interface the address is attached to. + */ +ifnet_t ifaddr_ifnet(ifaddr_t ifaddr); + +/*! + @function ifaddr_withaddr + @discussion Returns an interface address with the address specified. + Increments the reference count on the ifaddr before returning to + the caller. Caller is responsible for calling ifaddr_release. + @param address The address to search for. + @result A reference to the interface address. + */ +ifaddr_t ifaddr_withaddr(const struct sockaddr* address); + +/*! + @function ifaddr_withdstaddr + @discussion Returns an interface address for the interface address + that matches the destination when the netmask is applied. + Increments the reference count on the ifaddr before returning to + the caller. Caller is responsible for calling ifaddr_release. + @param destination The destination to search for. + @result A reference to the interface address. + */ +ifaddr_t ifaddr_withdstaddr(const struct sockaddr* destination); + +/*! + @function ifaddr_withnet + @discussion Returns an interface address for the interface with the + network described by net. Increments the reference count on the + ifaddr before returning to the caller. Caller is responsible for + calling ifaddr_release. + @param net The network to search for. + @result A reference to the interface address. + */ +ifaddr_t ifaddr_withnet(const struct sockaddr* net); + +/*! + @function ifaddr_withroute + @discussion Returns an interface address given a destination and + gateway. Increments the reference count on the ifaddr before + returning to the caller. Caller is responsible for calling + ifaddr_release. + @param flags Routing flags. See net/route.h, RTF_GATEWAY etc. + @param destination The destination to search for. + @param gateway A gateway to search for. + @result A reference to the interface address. + */ +ifaddr_t ifaddr_withroute(int flags, const struct sockaddr* destination, + const struct sockaddr* gateway); + +/*! + @function ifaddr_findbestforaddr + @discussion Finds the best local address assigned to a specific + interface to use when communicating with another address. + Increments the reference count on the ifaddr before returning to + the caller. Caller is responsible for calling ifaddr_release. + @param addr The remote address. + @param interface The local interface. + @result A reference to the interface address. + */ +ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr, ifnet_t interface); + +/********************************************************************************************/ +/* ifmultiaddr_t accessors */ +/********************************************************************************************/ + +/*! + @function ifmaddr_reference + @discussion Increment the reference count of an interface multicast + address. + @param ifmaddr The interface multicast address. + @result 0 on success. Only error will be EINVAL if ifmaddr is not valid. + */ +errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr); + +/*! + @function ifmaddr_release + @discussion Decrement the reference count of an interface multicast + address. If the reference count reaches zero, the ifmultiaddr + will be removed from the interface and the ifmultiaddr will be + freed. + @param ifmaddr The interface multicast address. + @result 0 on success. Only error will be EINVAL if ifmaddr is not valid. + */ +errno_t ifmaddr_release(ifmultiaddr_t ifmaddr); + +/*! + @function ifmaddr_address + @discussion Copies the multicast address to out_multicast. + @param out_multicast Storage for a sockaddr. + @param addr_size Size of the storage. + @result 0 on success. + */ +errno_t ifmaddr_address(ifmultiaddr_t ifmaddr, struct sockaddr *out_multicast, u_int32_t addr_size); + +/*! + @function ifmaddr_lladdress + @discussion Copies the link layer multicast address to + out_link_layer_multicast. + @param out_link_layer_multicast Storage for a sockaddr. + @param addr_size Size of the storage. + @result 0 on success. + */ +errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr, struct sockaddr *out_link_layer_multicast, + u_int32_t addr_size); + +/*! + @function ifmaddr_ifnet + @discussion Returns the interface this multicast address is attached + to. The interface reference count is not bumped by this + function. The interface is only valid as long as you don't + release the refernece to the multiast address. If you need to + maintain your pointer to the ifnet, call ifnet_reference + followed by ifnet_release when you're finished. + @param ifmaddr The interface multicast address. + @result A reference to the interface. + */ +ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr); + +__END_DECLS + +#endif diff --git a/bsd/net/kpi_interfacefilter.c b/bsd/net/kpi_interfacefilter.c new file mode 100644 index 000000000..ee9b28174 --- /dev/null +++ b/bsd/net/kpi_interfacefilter.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "kpi_interfacefilter.h" + +#include +#include +#include +#include +#include + +errno_t +iflt_attach( + ifnet_t interface, + const struct iff_filter *filter, + interface_filter_t *filter_ref) +{ + if (interface == NULL) return ENOENT; + + return dlil_attach_filter(interface, filter, filter_ref); +} + +void +iflt_detach( + interface_filter_t filter_ref) +{ + dlil_detach_filter(filter_ref); +} diff --git a/bsd/net/kpi_interfacefilter.h b/bsd/net/kpi_interfacefilter.h new file mode 100644 index 000000000..e4140b1a6 --- /dev/null +++ b/bsd/net/kpi_interfacefilter.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_interfacefilter.h + This header defines an API to attach interface filters. Interface + filters may be attached to a specific interface. The filters can + intercept all packets in to and out of the specific interface. In + addition, the filters may intercept interface specific events and + ioctls. + */ + +#ifndef __KPI_INTERFACEFILTER__ +#define __KPI_INTERFACEFILTER__ +#include +#include + +struct kev_msg; + +/*! + @typedef iff_input_func + + @discussion iff_input_func is used to filter incoming packets. The + interface is only valid for the duration of the filter call. If + you need to keep a reference to the interface, be sure to call + ifnet_reference and ifnet_release. The packets passed to the + inbound filter are different from those passed to the outbound + filter. Packets to the inbound filter have the frame header + passed in separately from the rest of the packet. The outbound + data filters is passed the whole packet including the frame + header. + + The frame header usually preceeds the data in the mbuf. This + ensures that the frame header will be a valid pointer as long as + the mbuf is not freed. If you need to change the frame header to + point somewhere else, the recommended method is to prepend a new + frame header to the mbuf chain (mbuf_prepend), set the header to + point to that data, then call mbuf_adj to move the mbuf data + pointer back to the start of the packet payload. + @param cookie The cookie specified when this filter was attached. + @param interface The interface the packet was recieved on. + @param protocol The protocol of this packet. If you specified a + protocol when attaching your filter, the protocol will only ever + be the protocol you specified. + @param data The inbound packet, after the frame header as determined + by the interface. + @param frame_ptr A pointer to the pointer to the frame header. The + frame header length can be found by inspecting the interface's + frame header length (ifnet_hdrlen). + @result Return: + 0 - The caller will continue with normal processing of the packet. + EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. + Anything Else - The caller will free the packet and stop processing. +*/ +typedef errno_t (*iff_input_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, + mbuf_t *data, char **frame_ptr); + +/*! + @typedef iff_output_func + + @discussion iff_output_func is used to filter fully formed outbound + packets. The interface is only valid for the duration of the + filter call. If you need to keep a reference to the interface, + be sure to call ifnet_reference and ifnet_release. + @param cookie The cookie specified when this filter was attached. + @param interface The interface the packet is being transmitted on. + @param data The fully formed outbound packet in a chain of mbufs. + The frame header is already included. The filter function may + modify the packet or return a different mbuf chain. + @result Return: + 0 - The caller will continue with normal processing of the packet. + EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. + Anything Else - The caller will free the packet and stop processing. +*/ +typedef errno_t (*iff_output_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, + mbuf_t *data); + +/*! + @typedef iff_event_func + + @discussion iff_event_func is used to filter interface specific + events. The interface is only valid for the duration of the + filter call. If you need to keep a reference to the interface, + be sure to call ifnet_reference and ifnet_release. + @param cookie The cookie specified when this filter was attached. + @param interface The interface the packet is being transmitted on. + @param event_msg The kernel event, may not be changed. +*/ +typedef void (*iff_event_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, + const struct kev_msg *event_msg); + +/*! + @typedef iff_ioctl_func + + @discussion iff_ioctl_func is used to filter ioctls sent to an + interface. The interface is only valid for the duration of the + filter call. If you need to keep a reference to the interface, + be sure to call ifnet_reference and ifnet_release. + @param cookie The cookie specified when this filter was attached. + @param interface The interface the packet is being transmitted on. + @param ioctl_cmd The ioctl command. + @param ioctl_arg A pointer to the ioctl argument. + @result Return: + 0 - The caller will continue with normal processing of the packet. + EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. + Anything Else - The caller will free the packet and stop processing. +*/ +typedef errno_t (*iff_ioctl_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, + u_long ioctl_cmd, void* ioctl_arg); + +/*! + @typedef iff_detached_func + + @discussion iff_detached_func is called to notify the filter that it + has been detached from an interface. This is the last call to + the filter that will be made. A filter may be detached if the + interface is detached or the detach filter function is called. + In the case that the interface is being detached, your filter's + event function will be called with the interface detaching event + before the your detached function will be called. + @param cookie The cookie specified when this filter was attached. + @param interface The interface this filter was detached from. +*/ +typedef void (*iff_detached_func)(void* cookie, ifnet_t interface); + +/*! + @struct iff_filter + @discussion This structure is used to define an interface filter for + use with the iflt_attach function. + @field iff_cookie A kext defined cookie that will be passed to all + filter functions. + @field iff_name A filter name used for debugging purposes. + @field iff_protocol The protocol of the packets this filter is + interested in. If you specify zero, packets from all protocols + will be passed to the filter. + @field iff_input The filter function to handle inbound packets, may + be NULL. + @field iff_output The filter function to handle outbound packets, + may be NULL. + @field iff_event The filter function to handle interface events, may + be null. + @field iff_ioctl The filter function to handle interface ioctls, may + be null. + @field iff_detached The filter function used to notify the filter that + it has been detached. +*/ + +struct iff_filter { + void* iff_cookie; + const char* iff_name; + protocol_family_t iff_protocol; + iff_input_func iff_input; + iff_output_func iff_output; + iff_event_func iff_event; + iff_ioctl_func iff_ioctl; + iff_detached_func iff_detached; +}; + +/*! + @function iflt_attach + @discussion Attaches an interface filter to an interface. + @param interface The interface the filter should be attached to. + @param filter A structure defining the filter. + @param filter_ref A reference to the filter used to detach. + @result 0 on success otherwise the errno error. + */ +errno_t iflt_attach(ifnet_t interface, const struct iff_filter* filter, + interface_filter_t *filter_ref); + +/*! + @function iflt_detach + @discussion Detaches an interface filter from an interface. + @param filter_ref The reference to the filter from iflt_attach. + */ +void iflt_detach(interface_filter_t filter_ref); + +#endif diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c new file mode 100644 index 000000000..ad16db5c1 --- /dev/null +++ b/bsd/net/kpi_protocol.c @@ -0,0 +1,366 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "kpi_protocol.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void proto_kpi_init(void); +void proto_input_run(void); + +typedef int (*attach_t)(struct ifnet *ifp, u_long protocol_family); +typedef int (*detach_t)(struct ifnet *ifp, u_long protocol_family); + +/****************************************************************************/ +/* WARNING: Big assumption made here - there can be only one input thread */ +struct proto_input_entry { + struct proto_input_entry *next; + int detach; + struct domain *domain; + + protocol_family_t protocol; + proto_input_handler input; + proto_input_detached_handler detached; + + mbuf_t first_packet; + mbuf_t last_packet; +}; + +#define PROTO_HASH_SLOTS 5 + +static struct proto_input_entry *proto_hash[PROTO_HASH_SLOTS]; +static struct proto_input_entry *proto_input_add_list; +static lck_mtx_t *proto_input_lock = 0; +static u_int32_t inject_buckets = 0; + +extern thread_t dlil_input_thread_ptr; +extern int dlil_input_thread_wakeup; + +static int +proto_hash_value( + protocol_family_t protocol) +{ + switch(protocol) { + case PF_INET: + return 0; + case PF_INET6: + return 1; + case PF_APPLETALK: + return 2; + case PF_VLAN: + return 3; + } + return 4; +} + +__private_extern__ void +proto_kpi_init(void) +{ + lck_grp_attr_t *grp_attrib = 0; + lck_attr_t *lck_attrib = 0; + lck_grp_t *lck_group = 0; + + /* Allocate a mtx lock */ + grp_attrib = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attrib); + lck_group = lck_grp_alloc_init("protocol kpi", grp_attrib); + lck_grp_attr_free(grp_attrib); + lck_attrib = lck_attr_alloc_init(); + lck_attr_setdefault(lck_attrib); + proto_input_lock = lck_mtx_alloc_init(lck_group, lck_attrib); + lck_grp_free(lck_group); + lck_attr_free(lck_attrib); +} + +__private_extern__ errno_t +proto_register_input( + protocol_family_t protocol, + proto_input_handler input, + proto_input_detached_handler detached) +{ + + struct proto_input_entry *entry; + + entry = _MALLOC(sizeof(*entry), M_IFADDR, M_WAITOK); + + if (entry == NULL) + return ENOMEM; + + bzero(entry, sizeof(*entry)); + entry->protocol = protocol; + entry->input = input; + entry->detached = detached; + + { + struct domain *dp = domains; + extern lck_mtx_t *domain_proto_mtx; + + lck_mtx_assert(domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(domain_proto_mtx); + while (dp && dp->dom_family != protocol) + dp = dp->dom_next; + entry->domain = dp; + lck_mtx_unlock(domain_proto_mtx); + } + + + do { + entry->next = proto_input_add_list; + } while(!OSCompareAndSwap((UInt32)entry->next, (UInt32)entry, (UInt32*)&proto_input_add_list)); + + wakeup((caddr_t)&dlil_input_thread_wakeup); + + return 0; +} + + +__private_extern__ void +proto_unregister_input( + protocol_family_t protocol) +{ + struct proto_input_entry *entry = NULL; + + for (entry = proto_hash[proto_hash_value(protocol)]; entry; entry = entry->next) + if (entry->protocol == protocol) + break; + + if (entry) + entry->detach = 1; +} + + +static void +proto_delayed_attach( + struct proto_input_entry *entry) +{ + struct proto_input_entry *next_entry; + for (next_entry = entry->next; entry; entry = next_entry) { + struct proto_input_entry *exist; + int hash_slot; + + hash_slot = proto_hash_value(entry->protocol); + next_entry = entry->next; + + for (exist = proto_hash[hash_slot]; exist; exist = exist->next) + if (exist->protocol == entry->protocol) + break; + + /* If the entry already exists, call detached and dispose */ + if (exist) { + if (entry->detached) + entry->detached(entry->protocol); + FREE(entry, M_IFADDR); + } + else { + entry->next = proto_hash[hash_slot]; + proto_hash[hash_slot] = entry; + } + } +} + +static void +proto_delayed_inject( + struct proto_input_entry *entry) +{ + mbuf_t packet_list; + mbuf_t packet; + int locked = 0; + + lck_mtx_lock(proto_input_lock); + packet_list = entry->first_packet; + entry->first_packet = entry->last_packet = 0; + lck_mtx_unlock(proto_input_lock); + + if (packet_list == NULL) + return; + + if (entry->domain && (entry->domain->dom_flags & DOM_REENTRANT) == 0) { + lck_mtx_lock(entry->domain->dom_mtx); + locked = 1; + } + + for (packet = packet_list; packet; packet = packet_list) { + packet_list = mbuf_nextpkt(packet); + mbuf_setnextpkt(packet, NULL); + entry->input(entry->protocol, packet); + } + + if (locked) { + lck_mtx_unlock(entry->domain->dom_mtx); + } +} + +/* This function must be called from a single dlil input thread */ +__private_extern__ void +proto_input_run(void) +{ + struct proto_input_entry *entry; + u_int32_t inject; + int i; + + if (current_thread() != dlil_input_thread_ptr) + panic("proto_input_run called from a thread other than dlil_input_thread!\n"); + + do { + entry = proto_input_add_list; + } while (entry && !OSCompareAndSwap((UInt32)entry, 0, (UInt32*)&proto_input_add_list)); + + if (entry) + proto_delayed_attach(entry); + + do { + inject = inject_buckets; + } while (inject && !OSCompareAndSwap(inject, 0, (UInt32*)&inject_buckets)); + + if (inject) { + for (i = 0; i < PROTO_HASH_SLOTS; i++) { + if ((inject & (1L << i)) != 0) { + for (entry = proto_hash[i]; entry; entry = entry->next) { + if (entry->first_packet) { + proto_delayed_inject(entry); + } + } + } + } + } +} + +errno_t +proto_input( + protocol_family_t protocol, + mbuf_t packet_list) +{ + struct proto_input_entry *entry; + + if (current_thread() != dlil_input_thread_ptr) + panic("proto_input called from a thread other than dlil_input_thread!\n"); + + for (entry = proto_hash[proto_hash_value(protocol)]; entry; entry = entry->next) { + if (entry->protocol == protocol) + break; + } + + if (entry) { + mbuf_t packet; +#if DIRECT_PROTO_INPUT + // See for why this is disabled + // We need to release the dlil lock before taking the protocol lock + for (packet = packet_list; packet; packet = packet_list) { + packet_list = mbuf_nextpkt(packet); + mbuf_setnextpkt(packet, NULL); + entry->input(entry->protocol, packet); + } +#else + mbuf_t last_packet; + int hash_slot = proto_hash_value(protocol); + + for (last_packet = packet_list; mbuf_nextpkt(last_packet); + last_packet = mbuf_nextpkt(last_packet)) + /* find the last packet */; + + lck_mtx_lock(proto_input_lock); + if (entry->first_packet == NULL) { + entry->first_packet = packet_list; + } + else { + mbuf_setnextpkt(entry->last_packet, packet_list); + } + entry->last_packet = last_packet; + lck_mtx_unlock(proto_input_lock); + + OSBitOrAtomic((1L << hash_slot), (UInt32*)&inject_buckets); +#endif + } + else + { + return ENOENT; + } + + return 0; +} + +errno_t +proto_inject( + protocol_family_t protocol, + mbuf_t packet_list) +{ + struct proto_input_entry *entry; + mbuf_t last_packet; + int hash_slot = proto_hash_value(protocol); + + for (last_packet = packet_list; mbuf_nextpkt(last_packet); + last_packet = mbuf_nextpkt(last_packet)) + /* find the last packet */; + + for (entry = proto_hash[hash_slot]; entry; entry = entry->next) { + if (entry->protocol == protocol) + break; + } + + if (entry) { + lck_mtx_lock(proto_input_lock); + if (entry->first_packet == NULL) { + entry->first_packet = packet_list; + } + else { + mbuf_setnextpkt(entry->last_packet, packet_list); + } + entry->last_packet = last_packet; + lck_mtx_unlock(proto_input_lock); + + OSBitOrAtomic((1L << hash_slot), (UInt32*)&inject_buckets); + + wakeup((caddr_t)&dlil_input_thread_wakeup); + } + else + { + return ENOENT; + } + + return 0; +} + +errno_t +proto_register_plumber( + protocol_family_t proto_fam, + ifnet_family_t if_fam, + proto_plumb_handler plumb, + proto_unplumb_handler unplumb) +{ + return dlil_reg_proto_module(proto_fam, if_fam, (attach_t)plumb, (detach_t)unplumb); +} + +void +proto_unregister_plumber( + protocol_family_t proto_fam, + ifnet_family_t if_fam) +{ + (void)dlil_dereg_proto_module(proto_fam, if_fam); +} diff --git a/bsd/net/kpi_protocol.h b/bsd/net/kpi_protocol.h new file mode 100644 index 000000000..5ea4188da --- /dev/null +++ b/bsd/net/kpi_protocol.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_protocol.h + This header defines an API to interact with protocols in the kernel. + The KPIs in this header file can be used to interact with protocols + that already exist in the stack. These KPIs can be used to support + existing protocols over media types that are not natively supported + in the kernel, such as ATM. + */ + +#ifndef __KPI_PROTOCOL__ +#define __KPI_PROTOCOL__ +#include +#include + + +__BEGIN_DECLS + +/****************************************************************************/ +/* Protocol input/inject */ +/****************************************************************************/ + +#ifdef KERNEL_PRIVATE +/*! + @typedef protocol_input_handler + @discussion protocol_input_handler is called to input a packet. If + your protocol has specified a global lock, the lock will be held + when this funciton is called. + @pararm protocol The protocol this packet is intended for. + @param packet The packet that should be input. + */ +typedef void (*proto_input_handler)(protocol_family_t protocol, mbuf_t packet); + +/*! + @typedef proto_input_detached_handler + @discussion proto_input_detached_handler is called to notify the + protocol that it has been detached. When this function is + called, the proto_input_handler will not be called again, making + it safe to unload. + @pararm protocol The protocol detached. + */ +typedef void (*proto_input_detached_handler)(protocol_family_t protocol); + +/*! + @function proto_register_input + @discussion Allows the caller to specify the functions called when a + packet for a protocol is received. + @param protocol The protocol family these functions will receive + packets for. + @param input The function called when a packet is input. + @result A errno error on failure. + */ +errno_t proto_register_input(protocol_family_t protocol, proto_input_handler input, + proto_input_detached_handler detached); + +/*! + @function proto_unregister_input + @discussion Allows the caller to unregister the input and inject + functions for a protocol. The input/inject functions may not be + unregistered immediately if there is a chance they are in use. + To notify the owner when the functions are no longer in use, the + proto_detached_handler function will be called. It is not safe + to unload until the proto_detached_handler is called. + @param protocol The protocol family these functions will receive + packets for. + @param input The function called when a packet is input. + @param inject The function to called when a packet is injected (not + on the normal input path). + @result A errno error on failure. + */ +void proto_unregister_input(protocol_family_t protocol); +#endif + +/*! + @function proto_input + @discussion Inputs a packet on the specified protocol from the input + path. + @param protocol The protocol of the packet. + @param packet The first packet in a chain of packets to be input. + @result A errno error on failure. Unless proto_input returns zero, + the caller is responsible for freeing the mbuf. + */ +errno_t proto_input(protocol_family_t protocol, mbuf_t packet); + +/*! + @function proto_inject + @discussion Injects a packet on the specified protocol from + anywhere. To avoid recursion, the protocol may need to queue the + packet to be handled later. + @param protocol The protocol of the packet. + @param packet The first packet in a chain of packets to be injected. + @result A errno error on failure. Unless proto_inject returns zero, + the caller is responsible for freeing the mbuf. + */ +errno_t proto_inject(protocol_family_t protocol, mbuf_t packet); + + +/****************************************************************************/ +/* Protocol plumbing */ +/****************************************************************************/ + +/*! + @typedef proto_plumb_handler + @discussion proto_plumb_handler is called to attach a protocol to an + interface. A typical protocol plumb function would fill out an + ifnet_attach_proto_param and call ifnet_attach_protocol. + @param ifp The interface the protocol should be attached to. + @param protocol_family The protocol that should be attached to the + interface. + @result + A non-zero value of the attach failed. + */ +typedef errno_t (*proto_plumb_handler)(ifnet_t ifp, protocol_family_t protocol); + +/*! + @typedef proto_unplumb_handler + @discussion proto_unplumb_handler is called to detach a protocol + from an interface. A typical unplumb function would call + ifnet_detach_protocol and perform any necessary cleanup. + @param ifp The interface the protocol should be detached from. + @param protocol_family The protocol that should be detached from the + interface. + */ +typedef void (*proto_unplumb_handler)(ifnet_t ifp, protocol_family_t protocol); + +/*! + @function proto_register_plumber + @discussion Allows the caller to specify the functions called when a protocol + is attached to an interface belonging to the specified family and when + that protocol is detached. + @param proto_fam The protocol family these plumbing functions will + handle. + @param if_fam The interface family these plumbing functions will + handle. + @param plumb The function to call to attach the protocol to an + interface. + @param unplumb The function to call to detach the protocol to an + interface, may be NULL in which case ifnet_detach_protocol will + be used to detach the protocol. + @result A non-zero value of the attach failed. + */ +errno_t proto_register_plumber(protocol_family_t proto_fam, ifnet_family_t if_fam, + proto_plumb_handler plumb, proto_unplumb_handler unplumb); + +/*! + @function proto_unregister_plumber + @discussion Unregisters a previously registered plumbing function. + @param proto_fam The protocol family these plumbing functions + handle. + @param if_fam The interface family these plumbing functions handle. + */ +void proto_unregister_plumber(protocol_family_t proto_fam, ifnet_family_t if_fam); + +__END_DECLS + +#endif diff --git a/bsd/net/lacp.h b/bsd/net/lacp.h new file mode 100644 index 000000000..0aad344c0 --- /dev/null +++ b/bsd/net/lacp.h @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * lacp.h + * - definitions for the Link Aggregation Control Protocol (LACP) and + * the Link Aggregation Marker Protocol + */ + +/* + * Modification History + * + * May 14, 2004 Dieter Siegmund (dieter@apple.com) + * - created + */ + +#ifndef _NET_LACP_H_ +#define _NET_LACP_H_ + +#include + +/** + ** Link Aggregation Control Protocol (LACP) definitions + **/ +#define LACPDU_VERSION_1 1 + +#define LACPDU_TLV_TYPE_TERMINATOR 0x00 +#define LACPDU_TLV_TYPE_ACTOR 0x01 +#define LACPDU_TLV_TYPE_PARTNER 0x02 +#define LACPDU_TLV_TYPE_COLLECTOR 0x03 + +#define LACPDU_ACTOR_TLV_LENGTH 20 +#define LACPDU_PARTNER_TLV_LENGTH 20 +#define LACPDU_COLLECTOR_TLV_LENGTH 16 + +typedef u_char lacp_actor_partner_state; +typedef u_int16_t lacp_key; +typedef u_int16_t lacp_system_priority, lacp_port_priority, lacp_port; +typedef u_int16_t lacp_collector_max_delay; +typedef struct { + u_char system_id[6]; +} lacp_system, *lacp_system_ref; + +/* + * LACP Actor/Partner TLV + */ +typedef struct lacp_actor_partner_tlv_s { + u_char lap_tlv_type; /* 0x01 or 0x02 */ + u_char lap_length; /* 20 */ + u_char lap_system_priority[2]; + u_char lap_system[6]; + u_char lap_key[2]; + u_char lap_port_priority[2]; + u_char lap_port[2]; + u_char lap_state; + u_char lap_reserved[3]; +} lacp_actor_partner_tlv, *lacp_actor_partner_tlv_ref; + +/* + * LACP Collector TLV + */ +typedef struct lacp_collector_tlv_s { + u_char lac_tlv_type; /* 0x03 */ + u_char lac_length; /* 16 */ + u_char lac_max_delay[2]; + u_char lac_reserved[12]; +} lacp_collector_tlv, *lacp_collector_tlv_ref; + + +/* + * LACP Actor/Partner State bits + */ +#define LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY 0x01 +#define LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT 0x02 +#define LACP_ACTOR_PARTNER_STATE_AGGREGATION 0x04 +#define LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION 0x08 +#define LACP_ACTOR_PARTNER_STATE_COLLECTING 0x10 +#define LACP_ACTOR_PARTNER_STATE_DISTRIBUTING 0x20 +#define LACP_ACTOR_PARTNER_STATE_DEFAULTED 0x40 +#define LACP_ACTOR_PARTNER_STATE_EXPIRED 0x80 + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_active_lacp(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_passive_lacp(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY); +} + +static __inline__ int +lacp_actor_partner_state_active_lacp(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_short_timeout(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_long_timeout(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT); +} + +static __inline__ int +lacp_actor_partner_state_short_timeout(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_aggregatable(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_AGGREGATION); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_individual(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_AGGREGATION); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_aggregatable(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_AGGREGATION) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_in_sync(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_out_of_sync(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION); +} + +static __inline__ int +lacp_actor_partner_state_in_sync(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_collecting(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_COLLECTING); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_not_collecting(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_COLLECTING); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_collecting(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_COLLECTING) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_distributing(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_DISTRIBUTING); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_not_distributing(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_DISTRIBUTING); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_distributing(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_DISTRIBUTING) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_defaulted(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_DEFAULTED); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_not_defaulted(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_DEFAULTED); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_defaulted(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_DEFAULTED) != 0); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_expired(lacp_actor_partner_state state) +{ + return (state | LACP_ACTOR_PARTNER_STATE_EXPIRED); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_set_not_expired(lacp_actor_partner_state state) +{ + return (state &= ~LACP_ACTOR_PARTNER_STATE_EXPIRED); +} + +static __inline__ lacp_actor_partner_state +lacp_actor_partner_state_expired(lacp_actor_partner_state state) +{ + return ((state & LACP_ACTOR_PARTNER_STATE_EXPIRED) != 0); +} + + +/* + * LACP Actor/Partner TLV access functions + */ +static __inline__ void +lacp_actor_partner_tlv_set_system_priority(lacp_actor_partner_tlv_ref tlv, + lacp_system_priority system_priority) +{ + *((lacp_system_priority *)tlv->lap_system_priority) + = (lacp_system_priority)htons(system_priority); + return; +} + +static __inline__ lacp_system_priority +lacp_actor_partner_tlv_get_system_priority(const lacp_actor_partner_tlv_ref tlv) +{ + return ((lacp_system_priority) + ntohs(*((u_short *)tlv->lap_system_priority))); +} + +static __inline__ void +lacp_actor_partner_tlv_set_key(lacp_actor_partner_tlv_ref tlv, lacp_key key) +{ + *((lacp_key *)tlv->lap_key) = (lacp_key)htons(key); + return; +} + +static __inline__ lacp_key +lacp_actor_partner_tlv_get_key(const lacp_actor_partner_tlv_ref tlv) +{ + return ((lacp_key)ntohs(*((u_short *)tlv->lap_key))); +} + +static __inline__ void +lacp_actor_partner_tlv_set_port_priority(lacp_actor_partner_tlv_ref tlv, + lacp_port_priority port_priority) +{ + *((lacp_port_priority *)tlv->lap_port_priority) + = (lacp_port_priority)htons(port_priority); + return; +} + +static __inline__ lacp_port_priority +lacp_actor_partner_tlv_get_port_priority(const lacp_actor_partner_tlv_ref tlv) +{ + return ((lacp_port_priority)ntohs(*((u_short *)tlv->lap_port_priority))); +} + +static __inline__ void +lacp_actor_partner_tlv_set_port(lacp_actor_partner_tlv_ref tlv, lacp_port port) +{ + *((lacp_port *)tlv->lap_port) = (lacp_port)htons(port); + return; +} + +static __inline__ lacp_port +lacp_actor_partner_tlv_get_port(const lacp_actor_partner_tlv_ref tlv) +{ + return ((lacp_port)ntohs(*((u_short *)tlv->lap_port))); +} + +/* + * LACP Collector TLV access functions + */ +static __inline__ void +lacp_collector_tlv_set_max_delay(lacp_collector_tlv_ref tlv, + lacp_collector_max_delay delay) +{ + *((lacp_collector_max_delay *)tlv->lac_max_delay) + = (lacp_collector_max_delay)htons(delay); + return; +} + +static __inline__ lacp_collector_max_delay +lacp_collector_tlv_get_max_delay(const lacp_collector_tlv_ref tlv) +{ + return ((lacp_collector_max_delay)ntohs(*((u_short *)tlv->lac_max_delay))); +} + +typedef struct lacpdu_s { + u_char la_subtype; + u_char la_version; + u_char la_actor_tlv[LACPDU_ACTOR_TLV_LENGTH]; + u_char la_partner_tlv[LACPDU_PARTNER_TLV_LENGTH]; + u_char la_collector_tlv[LACPDU_COLLECTOR_TLV_LENGTH]; + u_char la_terminator_type; + u_char la_terminator_length; + u_char la_reserved[50]; +} lacpdu, *lacpdu_ref; + +/* timer values in seconds */ +#define LACP_FAST_PERIODIC_TIME 1 +#define LACP_SLOW_PERIODIC_TIME 30 +#define LACP_SHORT_TIMEOUT_TIME 3 +#define LACP_LONG_TIMEOUT_TIME 90 +#define LACP_CHURN_DETECTION_TIME 60 +#define LACP_AGGREGATE_WAIT_TIME 2 + +/* packet rate per second */ +#define LACP_PACKET_RATE 3 + +/** + ** Link Aggregation Marker Protocol definitions + **/ +#define LA_MARKER_PDU_VERSION_1 1 +#define LA_MARKER_TLV_TYPE_TERMINATOR 0x00 +#define LA_MARKER_TLV_TYPE_MARKER 0x01 +#define LA_MARKER_TLV_TYPE_MARKER_RESPONSE 0x02 + +#define LA_MARKER_TLV_LENGTH 16 +#define LA_MARKER_RESPONSE_TLV_LENGTH 16 + +typedef u_int32_t la_marker_transaction_id; + +typedef struct la_marker_pdu_s { + u_char lm_subtype; /* 0x02 */ + u_char lm_version; /* 0x01 */ + u_char lm_marker_tlv_type; /* 0x01 or 0x02 */ + u_char lm_marker_tlv_length; /* 16 */ + u_char lm_requestor_port[2]; + u_char lm_requestor_system[6]; + u_char lm_requestor_transaction_id[4]; + u_char lm_pad[2]; + u_char lm_terminator_type; /* 0x00 */ + u_char lm_terminator_length; /* 0 */ + u_char lm_reserved[90]; +} la_marker_pdu, *la_marker_pdu_ref, + la_marker_response_pdu, * la_marker_response_pdu_ref; + +static __inline__ void +la_marker_pdu_set_requestor_port(la_marker_pdu_ref lmpdu, lacp_port port) +{ + *((lacp_port *)lmpdu->lm_requestor_port) = (lacp_port)htons(port); + return; +} + +static __inline__ lacp_port +la_marker_pdu_get_requestor_port(la_marker_pdu_ref lmpdu) +{ + return ((lacp_port)ntohs(*((lacp_port *)lmpdu->lm_requestor_port))); +} + +static __inline__ void +la_marker_pdu_set_requestor_transaction_id(la_marker_pdu_ref lmpdu, + la_marker_transaction_id xid) +{ + *((la_marker_transaction_id *)lmpdu->lm_requestor_transaction_id) + = (la_marker_transaction_id)htonl(xid); + return; +} + +static __inline__ la_marker_transaction_id +la_marker_pdu_get_requestor_transaction_id(la_marker_pdu_ref lmpdu) +{ + la_marker_transaction_id * xid_p; + + xid_p = (la_marker_transaction_id *)lmpdu->lm_requestor_transaction_id; + return ((la_marker_transaction_id)ntohl(*xid_p)); +} + +static __inline__ void +la_marker_pdu_set_requestor_system(la_marker_pdu_ref lmpdu, lacp_system sys) +{ + *((lacp_system_ref)lmpdu->lm_requestor_system) = sys; + return; +} + +static __inline__ lacp_system +la_marker_pdu_get_requestor_system(la_marker_pdu_ref lmpdu) +{ + return (*(lacp_system_ref)(lmpdu->lm_requestor_system)); +} + +#endif /* _NET_LACP_H_ */ diff --git a/bsd/net/multicast_list.c b/bsd/net/multicast_list.c new file mode 100644 index 000000000..6fbc66f25 --- /dev/null +++ b/bsd/net/multicast_list.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * multicast_util.c: + * - keep track of multicast addresses added to one interface based on the + * actual multicast addresses in another + * - used by VLAN and BOND + */ + +/* + * Modification History: + * + * April 29, 2004 Dieter Siegmund (dieter@apple.com) + * - created + */ + +#include +#include +#include +#include +#include + +__private_extern__ void +multicast_list_init(struct multicast_list * mc_list) +{ + SLIST_INIT(mc_list); + return; +} + +/* + * Function: multicast_list_remove + * Purpose: + * Remove the given list of multicast addresses from the interface and from + * the multicast list structure. + */ +__private_extern__ int +multicast_list_remove(struct multicast_list * mc_list) +{ + int error; + struct multicast_entry * mc; + int result = 0; + + while ((mc = SLIST_FIRST(mc_list)) != NULL) { + error = ifnet_remove_multicast(mc->mc_ifma); + if (error != 0) { + result = error; + } + SLIST_REMOVE_HEAD(mc_list, mc_entries); + ifmaddr_release(mc->mc_ifma); + FREE(mc, M_DEVBUF); + } + return (result); +} + +/* + * Function: multicast_list_program + * Purpose: + * Program the multicast filter on "target_ifp" using the values from + * "source_ifp", and saving the result in "mc_list" + * + * We build a new list of multicast addresses while programming the new list. + * If that completes successfully, we remove the old list, and return the + * new list. + * + * If it fails, we remove what we've added to the new list, and + * return an error. + */ +__private_extern__ int +multicast_list_program(struct multicast_list * mc_list, + struct ifnet * source_ifp, + struct ifnet * target_ifp) +{ + int alen; + int error = 0; + int i; + struct multicast_entry * mc = NULL; + struct multicast_list new_mc_list; + struct sockaddr_dl source_sdl; + ifmultiaddr_t * source_multicast_list; + struct sockaddr_dl target_sdl; + + alen = target_ifp->if_addrlen; + bzero((char *)&target_sdl, sizeof(target_sdl)); + target_sdl.sdl_len = sizeof(target_sdl); + target_sdl.sdl_family = AF_LINK; + target_sdl.sdl_type = target_ifp->if_type; + target_sdl.sdl_alen = alen; + target_sdl.sdl_index = target_ifp->if_index; + + /* build a new list */ + multicast_list_init(&new_mc_list); + error = ifnet_get_multicast_list(source_ifp, &source_multicast_list); + if (error != 0) { + printf("multicast_list_program: " + "ifnet_get_multicast_list(%s%d) failed, %d\n", + source_ifp->if_name, source_ifp->if_unit, error); + return (error); + } + for (i = 0; source_multicast_list[i] != NULL; i++) { + if (ifmaddr_address(source_multicast_list[i], + (struct sockaddr *)&source_sdl, + sizeof(source_sdl)) != 0 + || source_sdl.sdl_family != AF_LINK) { + continue; + } + mc = _MALLOC(sizeof(struct multicast_entry), M_DEVBUF, M_WAITOK); + bcopy(LLADDR(&source_sdl), LLADDR(&target_sdl), alen); + error = ifnet_add_multicast(target_ifp, (struct sockaddr *)&target_sdl, + &mc->mc_ifma); + if (error != 0) { + FREE(mc, M_DEVBUF); + break; + } + SLIST_INSERT_HEAD(&new_mc_list, mc, mc_entries); + } + if (error != 0) { + /* restore previous state */ + (void)multicast_list_remove(&new_mc_list); + } else { + /* remove the old entries, and return the new list */ + (void)multicast_list_remove(mc_list); + *mc_list = new_mc_list; + } + return (error); +} diff --git a/bsd/machine/ansi.h b/bsd/net/multicast_list.h similarity index 52% rename from bsd/machine/ansi.h rename to bsd/net/multicast_list.h index 8f34cdd79..6a6f64b65 100644 --- a/bsd/machine/ansi.h +++ b/bsd/net/multicast_list.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,28 +19,35 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ + +#ifndef _NET_MULTICAST_LIST_H +#define _NET_MULTICAST_LIST_H + +#include +#include +#include +#include +#include + /* - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * + * multicast_util.h: + * - keep track of multicast addresses on one device for programming on + * another (VLAN, BOND) */ +struct multicast_entry { + SLIST_ENTRY(multicast_entry) mc_entries; + ifmultiaddr_t mc_ifma; +}; +SLIST_HEAD(multicast_list, multicast_entry); -#ifndef _MACHINE_ANSI_H_ -#define _MACHINE_ANSI_H_ - -#if defined (__ppc__) -#include "ppc/ansi.h" -#elif defined (__i386__) -#include "i386/ansi.h" -#else -#error architecture not supported -#endif +void +multicast_list_init(struct multicast_list * mc_list); -#ifdef KERNEL -#ifndef offsetof -#define offsetof(type, member) ((size_t)(&((type *)0)->member)) -#endif /* offsetof */ -#endif /* KERNEL */ +int +multicast_list_program(struct multicast_list * mc_list, + struct ifnet * source_ifp, + struct ifnet * target_ifp); +int +multicast_list_remove(struct multicast_list * mc_list); -#endif /* _MACHINE_ANSI_H_ */ +#endif _NET_MULTICAST_LIST_H diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index 9c4595c7a..38429b2f1 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,7 +60,6 @@ #include #include -#include #include #include #include @@ -75,14 +74,13 @@ #include -int ndrv_do_detach(struct ndrv_cb *); -int ndrv_do_disconnect(struct ndrv_cb *); -struct ndrv_cb *ndrv_find_tag(unsigned int); -void ndrv_read_event(struct socket* inSo, caddr_t ref, int waitf); -int ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt); -int ndrv_delspec(struct ndrv_cb *); -int ndrv_to_dlil_demux(struct ndrv_demux_desc* ndrv, struct dlil_demux_desc* dlil); -void ndrv_handle_ifp_detach(u_long family, short unit); +static int ndrv_do_detach(struct ndrv_cb *); +static int ndrv_do_disconnect(struct ndrv_cb *); +static struct ndrv_cb *ndrv_find_inbound(struct ifnet *ifp, u_long protocol_family); +static int ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt); +static int ndrv_delspec(struct ndrv_cb *); +static int ndrv_to_dlil_demux(struct ndrv_demux_desc* ndrv, struct dlil_demux_desc* dlil); +static void ndrv_handle_ifp_detach(u_long family, short unit); static int ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt); static int ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt); static struct ndrv_multiaddr* ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* addr); @@ -90,62 +88,39 @@ static void ndrv_remove_all_multicast(struct ndrv_cb *np); unsigned long ndrv_sendspace = NDRVSNDQ; unsigned long ndrv_recvspace = NDRVRCVQ; -struct ndrv_cb ndrvl; /* Head of controlblock list */ +TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl); -struct domain ndrvdomain; -struct protosw ndrvsw; -static struct socket* ndrv_so; +extern struct domain ndrvdomain; +extern struct protosw ndrvsw; +extern lck_mtx_t *domain_proto_mtx; +extern void kprintf(const char *, ...); /* - * Protocol init function for NDRV protocol - * Init the control block list. + * Verify these values match. + * To keep clients from including dlil.h, we define + * these values independently in ndrv.h. They must + * match or a conversion function must be written. */ -void -ndrv_init() -{ - int retval; - struct kev_request kev_request; - - ndrvl.nd_next = ndrvl.nd_prev = &ndrvl; - - /* Create a PF_SYSTEM socket so we can listen for events */ - retval = socreate(PF_SYSTEM, &ndrv_so, SOCK_RAW, SYSPROTO_EVENT); - if (retval != 0 || ndrv_so == NULL) - retval = KERN_FAILURE; - - /* Install a callback function for the socket */ - ndrv_so->so_rcv.sb_flags |= SB_NOTIFY|SB_UPCALL; - ndrv_so->so_upcall = ndrv_read_event; - ndrv_so->so_upcallarg = NULL; - - /* Configure the socket to receive the events we're interested in */ - kev_request.vendor_code = KEV_VENDOR_APPLE; - kev_request.kev_class = KEV_NETWORK_CLASS; - kev_request.kev_subclass = KEV_DL_SUBCLASS; - retval = ndrv_so->so_proto->pr_usrreqs->pru_control(ndrv_so, SIOCSKEVFILT, (caddr_t)&kev_request, 0, 0); - if (retval != 0) - { - /* - * We will not get attaching or detaching events in this case. - * We should probably prevent any sockets from binding so we won't - * panic later if the interface goes away. - */ - log(LOG_WARNING, "PF_NDRV: ndrv_init - failed to set event filter (%d)", - retval); - } -} +#if NDRV_DEMUXTYPE_ETHERTYPE != DLIL_DESC_ETYPE2 +#error NDRV_DEMUXTYPE_ETHERTYPE must match DLIL_DESC_ETYPE2 +#endif +#if NDRV_DEMUXTYPE_SAP != DLIL_DESC_SAP +#error NDRV_DEMUXTYPE_SAP must match DLIL_DESC_SAP +#endif +#if NDRV_DEMUXTYPE_SNAP != DLIL_DESC_SNAP +#error NDRV_DEMUXTYPE_SNAP must match DLIL_DESC_SNAP +#endif /* * Protocol output - Called to output a raw network packet directly * to the driver. */ -int -ndrv_output(register struct mbuf *m, register struct socket *so) +static int +ndrv_output(struct mbuf *m, struct socket *so) { - register struct ndrv_cb *np = sotondrvcb(so); - register struct ifnet *ifp = np->nd_if; - extern void kprintf(const char *, ...); + struct ndrv_cb *np = sotondrvcb(so); + struct ifnet *ifp = np->nd_if; int result = 0; #if NDRV_DEBUG @@ -158,32 +133,33 @@ ndrv_output(register struct mbuf *m, register struct socket *so) if ((m->m_flags&M_PKTHDR) == 0) return(EINVAL); + /* Unlock before calling dlil_output */ + socket_unlock(so, 0); + /* * Call DLIL if we can. DLIL is much safer than calling the * ifp directly. */ - if (np->nd_tag != 0) - result = dlil_output(np->nd_tag, m, (caddr_t)NULL, - (struct sockaddr*)NULL, 1); - else if (np->nd_send_tag != 0) - result = dlil_output(np->nd_send_tag, m, (caddr_t)NULL, - (struct sockaddr*)NULL, 1); - else - result = ENXIO; + result = dlil_output(ifp, np->nd_proto_family, m, (caddr_t)NULL, + (struct sockaddr*)NULL, 1); + + socket_lock(so, 0); + return (result); } /* Our input routine called from DLIL */ -int +static int ndrv_input(struct mbuf *m, char *frame_header, struct ifnet *ifp, - u_long dl_tag, - int sync_ok) + u_long proto_family, + __unused int sync_ok) { struct socket *so; struct sockaddr_dl ndrvsrc = {sizeof (struct sockaddr_dl), AF_NDRV}; - register struct ndrv_cb *np; + struct ndrv_cb *np; + int error = 0; /* move packet from if queue to socket */ @@ -194,43 +170,36 @@ ndrv_input(struct mbuf *m, ndrvsrc.sdl_slen = 0; bcopy(frame_header, &ndrvsrc.sdl_data, 6); - np = ndrv_find_tag(dl_tag); + np = ndrv_find_inbound(ifp, proto_family); if (np == NULL) { return(ENOENT); } so = np->nd_socket; /* prepend the frame header */ - m = m_prepend(m, ifp->if_data.ifi_hdrlen, M_NOWAIT); + m = m_prepend(m, ifp->if_hdrlen, M_NOWAIT); if (m == NULL) return EJUSTRETURN; - bcopy(frame_header, m->m_data, ifp->if_data.ifi_hdrlen); + bcopy(frame_header, m->m_data, ifp->if_hdrlen); + + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); if (sbappendaddr(&(so->so_rcv), (struct sockaddr *)&ndrvsrc, - m, (struct mbuf *)0) == 0) - { - /* yes, sbappendaddr returns zero if the sockbuff is full... */ - /* caller will free m */ - return(ENOMEM); - } else + m, (struct mbuf *)0, &error) != 0) { sorwakeup(so); - return(0); -} - -int -ndrv_control(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p) -{ - return (0); + } + lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + return 0; /* radar 4030377 - always return 0 */ } /* * Allocate an ndrv control block and some buffer space for the socket */ -int -ndrv_attach(struct socket *so, int proto, struct proc *p) +static int +ndrv_attach(struct socket *so, int proto, __unused struct proc *p) { int error; - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); if ((so->so_state & SS_PRIV) == 0) return(EPERM); @@ -256,10 +225,10 @@ ndrv_attach(struct socket *so, int proto, struct proc *p) np->nd_proto.sp_family = so->so_proto->pr_domain->dom_family; np->nd_proto.sp_protocol = proto; np->nd_if = NULL; - np->nd_tag = 0; + np->nd_proto_family = 0; np->nd_family = 0; np->nd_unit = 0; - insque((queue_t)np, (queue_t)&ndrvl); + TAILQ_INSERT_TAIL(&ndrvl, np, nd_next); return(0); } @@ -268,10 +237,10 @@ ndrv_attach(struct socket *so, int proto, struct proc *p) * Flush data or not depending on the options. */ -int +static int ndrv_detach(struct socket *so) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); if (np == 0) return EINVAL; @@ -288,9 +257,10 @@ ndrv_detach(struct socket *so) * Don't expect this to be used. */ -int ndrv_connect(struct socket *so, struct sockaddr *nam, struct proc *p) +static int +ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); int result = 0; if (np == 0) @@ -312,22 +282,34 @@ int ndrv_connect(struct socket *so, struct sockaddr *nam, struct proc *p) return 0; } +static void +ndrv_event(struct ifnet *ifp, struct kev_msg *event) +{ + if (event->vendor_code == KEV_VENDOR_APPLE && + event->kev_class == KEV_NETWORK_CLASS && + event->kev_subclass == KEV_DL_SUBCLASS && + event->event_code == KEV_DL_IF_DETACHING) { + ndrv_handle_ifp_detach(ifp->if_family, ifp->if_unit); + } +} + +static int name_cmp(struct ifnet *, char *); + /* * This is the "driver open" hook - we 'bind' to the * named driver. * Here's where we latch onto the driver. */ -int -ndrv_bind(struct socket *so, struct sockaddr *nam, struct proc *p) +static int +ndrv_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { - register struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam; - register char *dname; - register struct ndrv_cb *np; - register struct ifnet *ifp; - extern int name_cmp(struct ifnet *, char *); + struct sockaddr_ndrv *sa = (struct sockaddr_ndrv *) nam; + char *dname; + struct ndrv_cb *np; + struct ifnet *ifp; int result; - if TAILQ_EMPTY(&ifnet) + if TAILQ_EMPTY(&ifnet_head) return(EADDRNOTAVAIL); /* Quick sanity check */ np = sotondrvcb(so); if (np == 0) @@ -351,46 +333,40 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, struct proc *p) * There's no internal call for this so we have to dup the code * in if.c/ifconf() */ - TAILQ_FOREACH(ifp, &ifnet, if_link) { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { if (name_cmp(ifp, dname) == 0) break; } + ifnet_head_done(); if (ifp == NULL) return(EADDRNOTAVAIL); - - /* - * Loopback demuxing doesn't work with PF_NDRV. - * The first 4 bytes of the packet must be the - * protocol ptr. Can't get that from userland. - */ - if (ifp->if_family == APPLE_IF_FAM_LOOPBACK) - return (ENOTSUP); - - if ((dlil_find_dltag(ifp->if_family, ifp->if_unit, - PF_NDRV, &np->nd_send_tag) != 0) && - (ifp->if_family != APPLE_IF_FAM_PPP)) { - /* NDRV isn't registered on this interface, lets change that */ - struct dlil_proto_reg_str ndrv_proto; - int result = 0; - bzero(&ndrv_proto, sizeof(ndrv_proto)); - TAILQ_INIT(&ndrv_proto.demux_desc_head); - - ndrv_proto.interface_family = ifp->if_family; - ndrv_proto.protocol_family = PF_NDRV; - ndrv_proto.unit_number = ifp->if_unit; - - result = dlil_attach_protocol(&ndrv_proto, &np->nd_send_tag); - - /* - * If the interface does not allow PF_NDRV to attach, we will - * respect it's wishes. Sending will be disabled. No error is - * returned because the client may later attach a real protocol - * that the interface may accept. - */ - if (result != 0) - np->nd_send_tag = 0; - } + + // PPP doesn't support PF_NDRV. + if (ifp->if_family != APPLE_IF_FAM_PPP) + { + /* NDRV on this interface */ + struct dlil_proto_reg_str ndrv_proto; + result = 0; + bzero(&ndrv_proto, sizeof(ndrv_proto)); + TAILQ_INIT(&ndrv_proto.demux_desc_head); + + ndrv_proto.interface_family = ifp->if_family; + ndrv_proto.protocol_family = PF_NDRV; + ndrv_proto.unit_number = ifp->if_unit; + ndrv_proto.event = ndrv_event; + + /* We aren't worried about double attaching, that should just return an error */ + result = dlil_attach_protocol(&ndrv_proto); + if (result && result != EEXIST) { + return result; + } + np->nd_proto_family = PF_NDRV; + } + else { + np->nd_proto_family = 0; + } np->nd_if = ifp; np->nd_family = ifp->if_family; @@ -399,10 +375,10 @@ ndrv_bind(struct socket *so, struct sockaddr *nam, struct proc *p) return(0); } -int +static int ndrv_disconnect(struct socket *so) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); if (np == 0) return EINVAL; @@ -414,41 +390,13 @@ ndrv_disconnect(struct socket *so) return 0; } -/* - * Accessor function - */ -struct ifnet* -ndrv_get_ifp(caddr_t ndrv_pcb) -{ - struct ndrv_cb* np = (struct ndrv_cb*)ndrv_pcb; - -#if DEBUG - { - struct ndrv_cb* temp = ndrvl.nd_next; - /* Verify existence of pcb */ - for (temp = ndrvl.nd_next; temp != &ndrvl; temp = temp->nd_next) - { - if (temp == np) - break; - } - - if (temp != np) - { - log(LOG_WARNING, "PF_NDRV: ndrv_get_ifp called with invalid ndrv_cb!"); - return NULL; - } - } -#endif - - return np->nd_if; -} - /* * Mark the connection as being incapable of further input. */ -int +static int ndrv_shutdown(struct socket *so) { + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); socantsendmore(so); return 0; } @@ -458,10 +406,10 @@ ndrv_shutdown(struct socket *so) * to the appropriate driver. The really tricky part * is the destination address... */ -int -ndrv_send(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p) +static int +ndrv_send(struct socket *so, __unused int flags, struct mbuf *m, + __unused struct sockaddr *addr, struct mbuf *control, + __unused struct proc *p) { int error; @@ -474,10 +422,10 @@ ndrv_send(struct socket *so, int flags, struct mbuf *m, } -int +static int ndrv_abort(struct socket *so) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); if (np == 0) return EINVAL; @@ -486,19 +434,10 @@ ndrv_abort(struct socket *so) return 0; } -int -ndrv_sense(struct socket *so, struct stat *sb) -{ - /* - * stat: don't bother with a blocksize. - */ - return (0); -} - -int +static int ndrv_sockaddr(struct socket *so, struct sockaddr **nam) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); int len; if (np == 0) @@ -508,16 +447,19 @@ ndrv_sockaddr(struct socket *so, struct sockaddr **nam) return EINVAL; len = np->nd_laddr->snd_len; + MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK); + if (*nam == NULL) + return ENOMEM; bcopy((caddr_t)np->nd_laddr, *nam, (unsigned)len); return 0; } -int +static int ndrv_peeraddr(struct socket *so, struct sockaddr **nam) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); int len; if (np == 0) @@ -527,25 +469,21 @@ ndrv_peeraddr(struct socket *so, struct sockaddr **nam) return ENOTCONN; len = np->nd_faddr->snd_len; + MALLOC(*nam, struct sockaddr *, len, M_SONAME, M_WAITOK); + if (*nam == NULL) + return ENOMEM; bcopy((caddr_t)np->nd_faddr, *nam, (unsigned)len); return 0; } -/* Control input */ - -void -ndrv_ctlinput(int dummy1, struct sockaddr *dummy2, void *dummy3) -{ -} - /* Control output */ -int +static int ndrv_ctloutput(struct socket *so, struct sockopt *sopt) { - register struct ndrv_cb *np = sotondrvcb(so); + struct ndrv_cb *np = sotondrvcb(so); int error = 0; switch(sopt->sopt_name) @@ -580,25 +518,11 @@ ndrv_ctloutput(struct socket *so, struct sockopt *sopt) return(error); } -/* Drain the queues */ -void -ndrv_drain() -{ -} - -/* Sysctl hook for NDRV */ -int -ndrv_sysctl() -{ - return(0); -} - -int -ndrv_do_detach(register struct ndrv_cb *np) +static int +ndrv_do_detach(struct ndrv_cb *np) { struct ndrv_cb* cur_np = NULL; struct socket *so = np->nd_socket; - struct ndrv_multicast* next; int error = 0; #if NDRV_DEBUG @@ -606,47 +530,38 @@ ndrv_do_detach(register struct ndrv_cb *np) #endif ndrv_remove_all_multicast(np); - if (np->nd_tag != 0) - { - error = dlil_detach_protocol(np->nd_tag); - if (error) - { - log(LOG_WARNING, "NDRV ndrv_do_detach: error %d removing dl_tag %d", - error, np->nd_tag); - return error; - } - } - - /* Remove from the linked list of control blocks */ - remque((queue_t)np); - - if (np->nd_send_tag != 0) - { - /* Check if this is the last socket attached to this interface */ - for (cur_np = ndrvl.nd_next; cur_np != &ndrvl; cur_np = cur_np->nd_next) - { - if (cur_np->nd_family == np->nd_family && - cur_np->nd_unit == np->nd_unit) - { - break; - } - } - - /* If there are no other interfaces, detach PF_NDRV from the interface */ - if (cur_np == &ndrvl) - { - dlil_detach_protocol(np->nd_send_tag); - } - } + if (np->nd_if) { + if (np->nd_proto_family != PF_NDRV && + np->nd_proto_family != 0) { + dlil_detach_protocol(np->nd_if, np->nd_proto_family); + } + + /* Remove from the linked list of control blocks */ + TAILQ_REMOVE(&ndrvl, np, nd_next); + + /* Check if this is the last socket attached to this interface */ + TAILQ_FOREACH(cur_np, &ndrvl, nd_next) { + if (cur_np->nd_family == np->nd_family && + cur_np->nd_unit == np->nd_unit) { + break; + } + } + + /* If there are no other interfaces, detach PF_NDRV from the interface */ + if (cur_np == NULL) { + dlil_detach_protocol(np->nd_if, PF_NDRV); + } + } FREE((caddr_t)np, M_PCB); so->so_pcb = 0; + so->so_flags |= SOF_PCBCLEARING; sofree(so); return error; } -int -ndrv_do_disconnect(register struct ndrv_cb *np) +static int +ndrv_do_disconnect(struct ndrv_cb *np) { #if NDRV_DEBUG kprintf("NDRV disconnect: %x\n", np); @@ -662,15 +577,30 @@ ndrv_do_disconnect(register struct ndrv_cb *np) return(0); } +/* Hackery - return a string version of a decimal number */ +static char * +sprint_d(u_int n, char *buf, int buflen) +{ char dbuf[IFNAMSIZ]; + char *cp = dbuf+IFNAMSIZ-1; + + *cp = 0; + do { buflen--; + cp--; + *cp = "0123456789"[n % 10]; + n /= 10; + } while (n != 0 && buflen > 0); + strncpy(buf, cp, IFNAMSIZ-buflen); + return (cp); +} + /* * Try to compare a device name (q) with one of the funky ifnet * device names (ifp). */ -int name_cmp(register struct ifnet *ifp, register char *q) -{ register char *r; - register int len; +static int name_cmp(struct ifnet *ifp, char *q) +{ char *r; + int len; char buf[IFNAMSIZ]; - static char *sprint_d(); r = buf; len = strlen(ifp->if_name); @@ -683,32 +613,15 @@ int name_cmp(register struct ifnet *ifp, register char *q) return(strncmp(buf, q, IFNAMSIZ)); } -/* Hackery - return a string version of a decimal number */ -static char * -sprint_d(n, buf, buflen) - u_int n; - char *buf; - int buflen; -{ char dbuf[IFNAMSIZ]; - register char *cp = dbuf+IFNAMSIZ-1; - - *cp = 0; - do { buflen--; - cp--; - *cp = "0123456789"[n % 10]; - n /= 10; - } while (n != 0 && buflen > 0); - strncpy(buf, cp, IFNAMSIZ-buflen); - return (cp); -} - +#if 0 +//### Not used /* * When closing, dump any enqueued mbufs. */ void -ndrv_flushq(register struct ifqueue *q) +ndrv_flushq(struct ifqueue *q) { - register struct mbuf *m; + struct mbuf *m; for (;;) { IF_DEQUEUE(q, m); @@ -719,6 +632,7 @@ ndrv_flushq(register struct ifqueue *q) m_freem(m); } } +#endif int ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) @@ -730,7 +644,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) int error = 0; /* Sanity checking */ - if (np->nd_tag) + if (np->nd_proto_family != PF_NDRV) return EBUSY; if (np->nd_if == NULL) return EINVAL; @@ -764,7 +678,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) if (error == 0) { /* Copy the ndrv demux array from userland */ - error = copyin(ndrvSpec.demux_list, ndrvDemux, + error = copyin(CAST_USER_ADDR_T(ndrvSpec.demux_list), ndrvDemux, ndrvSpec.demux_count * sizeof(struct ndrv_demux_desc)); ndrvSpec.demux_list = ndrvDemux; } @@ -779,6 +693,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) dlilSpec.interface_family = np->nd_family; dlilSpec.unit_number = np->nd_unit; dlilSpec.input = ndrv_input; + dlilSpec.event = ndrv_event; dlilSpec.protocol_family = ndrvSpec.protocol_family; for (demuxOn = 0; demuxOn < ndrvSpec.demux_count; demuxOn++) @@ -796,7 +711,9 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) if (error == 0) { /* We've got all our ducks lined up...lets attach! */ - error = dlil_attach_protocol(&dlilSpec, &np->nd_tag); + error = dlil_attach_protocol(&dlilSpec); + if (error == 0) + np->nd_proto_family = dlilSpec.protocol_family; } /* Free any memory we've allocated */ @@ -837,32 +754,27 @@ ndrv_delspec(struct ndrv_cb *np) { int result = 0; - if (np->nd_tag == 0) + if (np->nd_proto_family == PF_NDRV || + np->nd_proto_family == 0) return EINVAL; /* Detach the protocol */ - result = dlil_detach_protocol(np->nd_tag); - if (result == 0) - { - np->nd_tag = 0; - } + result = dlil_detach_protocol(np->nd_if, np->nd_proto_family); + np->nd_proto_family = PF_NDRV; return result; } struct ndrv_cb * -ndrv_find_tag(unsigned int tag) +ndrv_find_inbound(struct ifnet *ifp, u_long protocol) { struct ndrv_cb* np; - int i; - - if (tag == 0) - return NULL; + + if (protocol == PF_NDRV) return NULL; - for (np = ndrvl.nd_next; np != NULL; np = np->nd_next) - { - if (np->nd_tag == tag) - { + TAILQ_FOREACH(np, &ndrvl, nd_next) { + if (np->nd_proto_family == protocol && + np->nd_if == ifp) { return np; } } @@ -870,7 +782,7 @@ ndrv_find_tag(unsigned int tag) return NULL; } -void ndrv_dominit() +static void ndrv_dominit(void) { static int ndrv_dominited = 0; @@ -879,55 +791,22 @@ void ndrv_dominit() ndrv_dominited = 1; } -void -ndrv_read_event(struct socket* so, caddr_t ref, int waitf) -{ - // Read an event - struct mbuf *m = NULL; - struct kern_event_msg *msg; - struct uio auio = {0}; - int result = 0; - int flags = 0; - - // Get the data - auio.uio_resid = 1000000; // large number to get all of the data - flags = MSG_DONTWAIT; - result = soreceive(so, (struct sockaddr**)NULL, &auio, &m, - (struct mbuf**)NULL, &flags); - if (result != 0 || m == NULL) - return; - - // cast the mbuf to a kern_event_msg - // this is dangerous, doesn't handle linked mbufs - msg = mtod(m, struct kern_event_msg*); - - // check for detaches, assume even filtering is working - if (msg->event_code == KEV_DL_IF_DETACHING || - msg->event_code == KEV_DL_IF_DETACHED) - { - struct net_event_data *ev_data; - ev_data = (struct net_event_data*)msg->event_data; - ndrv_handle_ifp_detach(ev_data->if_family, ev_data->if_unit); - } - - m_free(m); -} - -void +static void ndrv_handle_ifp_detach(u_long family, short unit) { struct ndrv_cb* np; - u_long dl_tag; + struct ifnet *ifp = NULL; + struct socket *so; /* Find all sockets using this interface. */ - for (np = ndrvl.nd_next; np != &ndrvl; np = np->nd_next) - { + TAILQ_FOREACH(np, &ndrvl, nd_next) { if (np->nd_family == family && np->nd_unit == unit) { /* This cb is using the detaching interface, but not for long. */ /* Let the protocol go */ - if (np->nd_tag != 0) + ifp = np->nd_if; + if (np->nd_proto_family != 0) ndrv_delspec(np); /* Delete the multicasts first */ @@ -937,18 +816,19 @@ ndrv_handle_ifp_detach(u_long family, short unit) np->nd_if = NULL; np->nd_unit = 0; np->nd_family = 0; - np->nd_send_tag = 0; - + + so = np->nd_socket; /* Make sure sending returns an error */ /* Is this safe? Will we drop the funnel? */ - socantsendmore(np->nd_socket); - socantrcvmore(np->nd_socket); + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); + socantsendmore(so); + socantrcvmore(so); } } /* Unregister our protocol */ - if (dlil_find_dltag(family, unit, PF_NDRV, &dl_tag) == 0) { - dlil_detach_protocol(dl_tag); + if (ifp) { + dlil_detach_protocol(ifp, PF_NDRV); } } @@ -983,7 +863,7 @@ ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt) if (result == 0) { // Try adding the multicast - result = if_addmulti(np->nd_if, &ndrv_multi->addr, NULL); + result = if_addmulti(np->nd_if, &ndrv_multi->addr, &ndrv_multi->ifma); } if (result == 0) @@ -1039,7 +919,7 @@ ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt) if (result == 0) { // Try deleting the multicast - result = if_delmulti(np->nd_if, &ndrv_entry->addr); + result = if_delmultiaddr(ndrv_entry->ifma, 0); } if (result == 0) @@ -1047,6 +927,8 @@ ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt) // Remove from our linked list struct ndrv_multiaddr* cur = np->nd_multiaddrs; + ifma_release(ndrv_entry->ifma); + if (cur == ndrv_entry) { np->nd_multiaddrs = cur->next; @@ -1101,7 +983,8 @@ ndrv_remove_all_multicast(struct ndrv_cb* np) cur = np->nd_multiaddrs; np->nd_multiaddrs = cur->next; - if_delmulti(np->nd_if, &cur->addr); + if_delmultiaddr(cur->ifma, 0); + ifma_release(cur->ifma); FREE(cur, M_IFADDR); } } @@ -1109,17 +992,19 @@ ndrv_remove_all_multicast(struct ndrv_cb* np) struct pr_usrreqs ndrv_usrreqs = { ndrv_abort, pru_accept_notsupp, ndrv_attach, ndrv_bind, - ndrv_connect, pru_connect2_notsupp, ndrv_control, ndrv_detach, + ndrv_connect, pru_connect2_notsupp, pru_control_notsupp, ndrv_detach, ndrv_disconnect, pru_listen_notsupp, ndrv_peeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, ndrv_send, ndrv_sense, ndrv_shutdown, - ndrv_sockaddr, sosend, soreceive, sopoll + pru_rcvoob_notsupp, ndrv_send, pru_sense_null, ndrv_shutdown, + ndrv_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; struct protosw ndrvsw = { SOCK_RAW, &ndrvdomain, NDRVPROTO_NDRV, PR_ATOMIC|PR_ADDR, - 0, ndrv_output, ndrv_ctlinput, ndrv_ctloutput, - 0, ndrv_init, 0, 0, - ndrv_drain, ndrv_sysctl, &ndrv_usrreqs + 0, ndrv_output, 0, ndrv_ctloutput, + 0, 0, 0, 0, + 0, 0, + &ndrv_usrreqs, + 0, 0, 0 }; struct domain ndrvdomain = diff --git a/bsd/net/ndrv.h b/bsd/net/ndrv.h index 0824dafca..6aa8e14bd 100644 --- a/bsd/net/ndrv.h +++ b/bsd/net/ndrv.h @@ -42,9 +42,9 @@ struct sockaddr_ndrv * Support for user-mode protocol handlers */ -#define NDRV_DEMUXTYPE_ETHERTYPE DLIL_DESC_ETYPE2 -#define NDRV_DEMUXTYPE_SAP DLIL_DESC_SAP -#define NDRV_DEMUXTYPE_SNAP DLIL_DESC_SNAP +#define NDRV_DEMUXTYPE_ETHERTYPE 4 +#define NDRV_DEMUXTYPE_SAP 5 +#define NDRV_DEMUXTYPE_SNAP 6 #define NDRVPROTO_NDRV 0 @@ -115,9 +115,7 @@ struct ndrv_protocol_desc }; #define SOL_NDRVPROTO NDRVPROTO_NDRV /* Use this socket level */ -/* NDRV_DMXSPEC 0x01 Obsolete */ #define NDRV_DELDMXSPEC 0x02 /* Delete the registered protocol */ -/* NDRV_DMXSPECCNT 0x03 Obsolete */ #define NDRV_SETDMXSPEC 0x04 /* Set the protocol spec */ #define NDRV_ADDMULTICAST 0x05 /* Add a physical multicast address */ #define NDRV_DELMULTICAST 0x06 /* Delete a phyiscal multicast */ @@ -145,11 +143,4 @@ struct ndrv_protocol_desc * you a second or two. */ -#ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE -/* Additional Kernel APIs */ -struct ifnet* ndrv_get_ifp(caddr_t ndrv_pcb); -#endif /* __APPLE_API_UNSTABLE */ -#endif - #endif /* _NET_NDRV_H */ diff --git a/bsd/net/ndrv_var.h b/bsd/net/ndrv_var.h index 74f8aaf2b..d39c2a7ad 100644 --- a/bsd/net/ndrv_var.h +++ b/bsd/net/ndrv_var.h @@ -27,9 +27,7 @@ #ifndef _NET_NDRV_VAR_H #define _NET_NDRV_VAR_H -#include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE /* * structure for storing a linked list of multicast addresses @@ -39,6 +37,7 @@ struct ndrv_multiaddr { struct ndrv_multiaddr* next; + struct if_multiaddr* ifma; struct sockaddr addr; }; @@ -49,8 +48,7 @@ struct ndrv_multiaddr */ struct ndrv_cb { - struct ndrv_cb *nd_next; /* Doubly-linked list */ - struct ndrv_cb *nd_prev; + TAILQ_ENTRY(ndrv_cb) nd_next; struct socket *nd_socket; /* Back to the socket */ u_int32_t nd_signature; /* Just double-checking */ struct sockaddr_ndrv *nd_faddr; @@ -59,8 +57,7 @@ struct ndrv_cb int nd_descrcnt; /* # elements in nd_dlist - Obsolete */ TAILQ_HEAD(dlist, dlil_demux_desc) nd_dlist; /* Descr. list */ struct ifnet *nd_if; /* obsolete, maintained for binary compatibility */ - u_long nd_send_tag; - u_long nd_tag; + u_long nd_proto_family; u_long nd_family; struct ndrv_multiaddr* nd_multiaddrs; short nd_unit; @@ -73,7 +70,5 @@ struct ndrv_cb #define NDRVSNDQ 8192 #define NDRVRCVQ 8192 -extern struct ndrv_cb ndrvl; /* Head of controlblock list */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* PRIVATE */ #endif /* _NET_NDRV_VAR_H */ diff --git a/bsd/net/net_osdep.c b/bsd/net/net_osdep.c index 8d2f62168..51bea3c12 100644 --- a/bsd/net/net_osdep.c +++ b/bsd/net/net_osdep.c @@ -61,7 +61,6 @@ #include #include -#include #include #include diff --git a/bsd/net/net_osdep.h b/bsd/net/net_osdep.h index e350606ac..15775cb29 100644 --- a/bsd/net/net_osdep.h +++ b/bsd/net/net_osdep.h @@ -250,10 +250,10 @@ #ifndef __NET_NET_OSDEP_H_DEFINED_ #define __NET_NET_OSDEP_H_DEFINED_ #include -#ifdef KERNEL +#ifdef KERNEL_PRIVATE struct ifnet; -extern const char *if_name __P((struct ifnet *)); +extern const char *if_name(struct ifnet *); #define HAVE_OLD_BPF @@ -272,5 +272,5 @@ extern const char *if_name __P((struct ifnet *)); #define WITH_CONVERT_IP_OFF #endif -#endif /*_KERNEL*/ +#endif /* KERNEL_PRIVATE */ #endif /*__NET_NET_OSDEP_H_DEFINED_ */ diff --git a/bsd/net/netisr.c b/bsd/net/netisr.c deleted file mode 100644 index 2ae5be8eb..000000000 --- a/bsd/net/netisr.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Mach Operating System - * Copyright (c) 1987 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -/* HISTORY - * 18-May-90 Avadis Tevanian (avie) at NeXT - * Changed to use sensible priorities (higher numbers -> higher pri). - * - * 1-Feb-88 David Golub (dbg) at Carnegie-Mellon University - * Goofed... netisr thread must run at splnet, because the routines - * it calls expect to be called from the softnet interrupt (at - * splnet). - * - * 19-Nov-87 David Golub (dbg) at Carnegie-Mellon University - * Created. - * - */ - -/* - * netisr.c - * - * Kernel thread for network code. - */ - - -#include -#include -#include - -#include -#include - -volatile int netisr; - - -void run_netisr(void) -{ - spl_t spl = splnet(); - - while (netisr != 0) { -#ifdef NIMP -#if NIMP > 0 - if (netisr & (1< 0 */ -#endif /* NIMP */ - -#if INET - if (netisr & (1< -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL_PRIVATE /* * The following symbols control whether we include code for * various compression methods. @@ -72,36 +71,35 @@ struct compressor { int compress_proto; /* CCP compression protocol number */ /* Allocate space for a compressor (transmit side) */ - void *(*comp_alloc) __P((u_char *options, int opt_len)); + void *(*comp_alloc)(u_char *options, int opt_len); /* Free space used by a compressor */ - void (*comp_free) __P((void *state)); + void (*comp_free)(void *state); /* Initialize a compressor */ - int (*comp_init) __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int debug)); + int (*comp_init)(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int debug); /* Reset a compressor */ - void (*comp_reset) __P((void *state)); + void (*comp_reset)(void *state); /* Compress a packet */ - int (*compress) __P((void *state, PACKETPTR *mret, - PACKETPTR mp, int orig_len, int max_len)); + int (*compress)(void *state, PACKETPTR *mret, + PACKETPTR mp, int orig_len, int max_len); /* Return compression statistics */ - void (*comp_stat) __P((void *state, struct compstat *stats)); + void (*comp_stat)(void *state, struct compstat *stats); /* Allocate space for a decompressor (receive side) */ - void *(*decomp_alloc) __P((u_char *options, int opt_len)); + void *(*decomp_alloc)(u_char *options, int opt_len); /* Free space used by a decompressor */ - void (*decomp_free) __P((void *state)); + void (*decomp_free)(void *state); /* Initialize a decompressor */ - int (*decomp_init) __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int mru, int debug)); + int (*decomp_init)(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int mru, int debug); /* Reset a decompressor */ - void (*decomp_reset) __P((void *state)); + void (*decomp_reset)(void *state); /* Decompress a packet. */ - int (*decompress) __P((void *state, PACKETPTR mp, - PACKETPTR *dmpp)); + int (*decompress)(void *state, PACKETPTR mp, PACKETPTR *dmpp); /* Update state for an incompressible packet received */ - void (*incomp) __P((void *state, PACKETPTR mp)); + void (*incomp)(void *state, PACKETPTR mp); /* Return decompression statistics */ - void (*decomp_stat) __P((void *state, struct compstat *stats)); + void (*decomp_stat)(void *state, struct compstat *stats); }; #endif /* PACKETPTR */ @@ -183,5 +181,5 @@ struct compressor { #define CI_PREDICTOR_2 2 /* config option for Predictor-2 */ #define CILEN_PREDICTOR_2 2 /* length of its config option */ -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ #endif /* _NET_PPP_COMP_H */ diff --git a/bsd/net/ppp_deflate.c b/bsd/net/ppp_deflate.c index 7d28a5b41..3d2b95df3 100644 --- a/bsd/net/ppp_deflate.c +++ b/bsd/net/ppp_deflate.c @@ -78,24 +78,23 @@ struct deflate_state { #define DEFLATE_OVHD 2 /* Deflate overhead/packet */ -static void *z_alloc __P((void *, u_int items, u_int size)); -static void z_free __P((void *, void *ptr)); -static void *z_comp_alloc __P((u_char *options, int opt_len)); -static void *z_decomp_alloc __P((u_char *options, int opt_len)); -static void z_comp_free __P((void *state)); -static void z_decomp_free __P((void *state)); -static int z_comp_init __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int debug)); -static int z_decomp_init __P((void *state, u_char *options, int opt_len, - int unit, int hdrlen, int mru, int debug)); -static int z_compress __P((void *state, struct mbuf **mret, - struct mbuf *mp, int slen, int maxolen)); -static void z_incomp __P((void *state, struct mbuf *dmsg)); -static int z_decompress __P((void *state, struct mbuf *cmp, - struct mbuf **dmpp)); -static void z_comp_reset __P((void *state)); -static void z_decomp_reset __P((void *state)); -static void z_comp_stats __P((void *state, struct compstat *stats)); +static void *z_alloc(void *, u_int items, u_int size); +static void z_free(void *, void *ptr); +static void *z_comp_alloc(u_char *options, int opt_len); +static void *z_decomp_alloc(u_char *options, int opt_len); +static void z_comp_free(void *state); +static void z_decomp_free(void *state); +static int z_comp_init(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int debug); +static int z_decomp_init(void *state, u_char *options, int opt_len, + int unit, int hdrlen, int mru, int debug); +static int z_compress(void *state, struct mbuf **mret, + struct mbuf *mp, int slen, int maxolen); +static void z_incomp(void *state, struct mbuf *dmsg); +static int z_decompress(void *state, struct mbuf *cmp, struct mbuf **dmpp); +static void z_comp_reset(void *state); +static void z_decomp_reset(void *state); +static void z_comp_stats(void *state, struct compstat *stats); /* * Procedures exported to if_ppp.c. diff --git a/bsd/net/ppp_defs.h b/bsd/net/ppp_defs.h index 416aa9ede..b902632a9 100644 --- a/bsd/net/ppp_defs.h +++ b/bsd/net/ppp_defs.h @@ -167,12 +167,4 @@ struct ppp_idle { time_t recv_idle; /* time since last NP packet received */ }; -#ifndef __P -#ifdef __STDC__ -#define __P(x) x -#else -#define __P(x) () -#endif -#endif - #endif /* _PPP_DEFS_H_ */ diff --git a/bsd/net/radix.c b/bsd/net/radix.c index cfe854974..bbc3572b1 100644 --- a/bsd/net/radix.c +++ b/bsd/net/radix.c @@ -70,17 +70,20 @@ #endif #include #include +#include +#include +#include #endif -static int rn_walktree_from __P((struct radix_node_head *h, void *a, - void *m, walktree_f_t *f, void *w)); -static int rn_walktree __P((struct radix_node_head *, walktree_f_t *, void *)); +static int rn_walktree_from(struct radix_node_head *h, void *a, + void *m, walktree_f_t *f, void *w); +static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); static struct radix_node - *rn_insert __P((void *, struct radix_node_head *, int *, - struct radix_node [2])), - *rn_newpair __P((void *, int, struct radix_node[2])), - *rn_search __P((void *, struct radix_node *)), - *rn_search_m __P((void *, struct radix_node *, void *)); + *rn_insert(void *, struct radix_node_head *, int *, + struct radix_node [2]), + *rn_newpair(void *, int, struct radix_node[2]), + *rn_search(void *, struct radix_node *), + *rn_search_m(void *, struct radix_node *, void *); static int max_keylen; static struct radix_mask *rn_mkfreelist; @@ -89,17 +92,22 @@ static char *addmask_key; static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; static char *rn_zeros, *rn_ones; + +extern lck_grp_t *domain_proto_mtx_grp; +extern lck_attr_t *domain_proto_mtx_attr; +lck_mtx_t *rn_mutex; + #define rn_masktop (mask_rnhead->rnh_treetop) #undef Bcmp #define Bcmp(a, b, l) \ (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l)) -static int rn_lexobetter __P((void *m_arg, void *n_arg)); +static int rn_lexobetter(void *m_arg, void *n_arg); static struct radix_mask * - rn_new_radix_mask __P((struct radix_node *tt, - struct radix_mask *next)); -static int rn_satsifies_leaf __P((char *trial, struct radix_node *leaf, - int skip)); + rn_new_radix_mask(struct radix_node *tt, + struct radix_mask *next); +static int rn_satsifies_leaf(char *trial, struct radix_node *leaf, + int skip); /* * The data structure for the keys is a radix tree with one way @@ -496,7 +504,7 @@ rn_addmask(n_arg, search, skip) x = rn_insert(cp, mask_rnhead, &maskduplicated, x); if (maskduplicated) { log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); - Free(saved_x); + R_Free(saved_x); return (x); } /* @@ -1084,6 +1092,7 @@ rn_init() #ifdef KERNEL struct domain *dom; + /* lock already held when rn_init is called */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_maxrtkey > max_keylen) max_keylen = dom->dom_maxrtkey; @@ -1103,4 +1112,41 @@ rn_init() *cp++ = -1; if (rn_inithead((void **)&mask_rnhead, 0) == 0) panic("rn_init 2"); + + rn_mutex = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr); +} +int +rn_lock(so, refcount, lr) + struct socket *so; + int refcount; + int lr; +{ +// printf("rn_lock: (global) so=%x ref=%d lr=%x\n", so, so->so_usecount, lr); + lck_mtx_assert(rn_mutex, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rn_mutex); + if (refcount) + so->so_usecount++; + return (0); +} + +int +rn_unlock(so, refcount, lr) + struct socket *so; + int refcount; + int lr; +{ +// printf("rn_unlock: (global) so=%x ref=%d lr=%x\n", so, so->so_usecount, lr); + if (refcount) + so->so_usecount--; + lck_mtx_assert(rn_mutex, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(rn_mutex); + return (0); +} +lck_mtx_t * +rn_getlock(so, locktype) + struct socket *so; + int locktype; +{ +// printf("rn_getlock: (global) so=%x\n", so); + return (rn_mutex); } diff --git a/bsd/net/radix.h b/bsd/net/radix.h index 833e9f714..73d0b6da7 100644 --- a/bsd/net/radix.h +++ b/bsd/net/radix.h @@ -59,7 +59,7 @@ #define _RADIX_H_ #include -#if !defined(KERNEL) || defined(__APPLE_API_PRIVATE) +#ifdef PRIVATE #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_RTABLE); @@ -104,14 +104,6 @@ struct radix_node { #define rn_left rn_u.rn_node.rn_L #define rn_right rn_u.rn_node.rn_R -#if 0 -/* for backward compatibility with previous definitions */ -#define rn_p rn_parent -#define rn_b rn_bit -#define rn_off rn_offset -#define rn_l rn_left -#define rn_r rn_right -#endif /* * Annotations to tree concerning potential routes applying to subtrees. */ @@ -141,35 +133,35 @@ struct radix_mask { #define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);} -typedef int walktree_f_t __P((struct radix_node *, void *)); +typedef int walktree_f_t(struct radix_node *, void *); struct radix_node_head { struct radix_node *rnh_treetop; int rnh_addrsize; /* permit, but not require fixed keys */ int rnh_pktsize; /* permit, but not require fixed keys */ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ - __P((void *v, void *mask, - struct radix_node_head *head, struct radix_node nodes[])); + (void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[]); struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ - __P((void *v, void *mask, - struct radix_node_head *head, struct radix_node nodes[])); + (void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[]); struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ - __P((void *v, void *mask, struct radix_node_head *head)); + (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ - __P((void *v, void *mask, struct radix_node_head *head)); + (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ - __P((void *v, struct radix_node_head *head)); + (void *v, struct radix_node_head *head); struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ - __P((void *v, void *mask, struct radix_node_head *head)); + (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ - __P((void *v, struct radix_node_head *head)); + (void *v, struct radix_node_head *head); int (*rnh_walktree) /* traverse tree */ - __P((struct radix_node_head *head, walktree_f_t *f, void *w)); + (struct radix_node_head *head, walktree_f_t *f, void *w); int (*rnh_walktree_from) /* traverse tree below a */ - __P((struct radix_node_head *head, void *a, void *m, - walktree_f_t *f, void *w)); + (struct radix_node_head *head, void *a, void *m, + walktree_f_t *f, void *w); void (*rnh_close) /* do something when the last ref drops */ - __P((struct radix_node *rn, struct radix_node_head *head)); + (struct radix_node *rn, struct radix_node_head *head); struct radix_node rnh_nodes[3]; /* empty tree for common case */ }; @@ -178,26 +170,25 @@ struct radix_node_head { #define Bcopy(a, b, n) bcopy(((char *)(a)), ((char *)(b)), (unsigned)(n)) #define Bzero(p, n) bzero((char *)(p), (int)(n)); #define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n))) -#define Free(p) free((char *)p); +#define R_Free(p) free((char *)p); #else #define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) #define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) #define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n)); #define R_Malloc(p, t, n) (p = (t) _MALLOC((unsigned long)(n), M_RTABLE, M_WAITOK)) -#define Free(p) FREE((caddr_t)p, M_RTABLE); +#define R_Free(p) FREE((caddr_t)p, M_RTABLE); #endif /*KERNEL*/ -void rn_init __P((void)); -int rn_inithead __P((void **, int)); -int rn_refines __P((void *, void *)); +void rn_init(void); +int rn_inithead(void **, int); +int rn_refines(void *, void *); struct radix_node - *rn_addmask __P((void *, int, int)), - *rn_addroute __P((void *, void *, struct radix_node_head *, - struct radix_node [2])), - *rn_delete __P((void *, void *, struct radix_node_head *)), - *rn_lookup __P((void *v_arg, void *m_arg, - struct radix_node_head *head)), - *rn_match __P((void *, struct radix_node_head *)); - -#endif /* __APPLE_API_PRIVATE || !KERNEL */ + *rn_addmask(void *, int, int), + *rn_addroute(void *, void *, struct radix_node_head *, + struct radix_node [2]), + *rn_delete(void *, void *, struct radix_node_head *), + *rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head), + *rn_match(void *, struct radix_node_head *); + +#endif /* PRIVATE */ #endif /* _RADIX_H_ */ diff --git a/bsd/net/raw_cb.c b/bsd/net/raw_cb.c index 105ef13c8..cba291fe0 100644 --- a/bsd/net/raw_cb.c +++ b/bsd/net/raw_cb.c @@ -60,6 +60,7 @@ #include #include #include +#include #include @@ -76,6 +77,7 @@ struct rawcb_list_head rawcb_list; static u_long raw_sendspace = RAWSNDQ; static u_long raw_recvspace = RAWRCVQ; +extern lck_mtx_t *raw_mtx; /*### global raw cb mutex for now */ /* * Allocate a control block and a nominal amount @@ -102,7 +104,9 @@ raw_attach(so, proto) rp->rcb_socket = so; rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; rp->rcb_proto.sp_protocol = proto; + lck_mtx_lock(raw_mtx); LIST_INSERT_HEAD(&rawcb_list, rp, list); + lck_mtx_unlock(raw_mtx); return (0); } @@ -117,13 +121,21 @@ raw_detach(rp) struct socket *so = rp->rcb_socket; so->so_pcb = 0; + so->so_flags |= SOF_PCBCLEARING; sofree(so); + if (!lck_mtx_try_lock(raw_mtx)) { + socket_unlock(so, 0); + lck_mtx_lock(raw_mtx); + socket_lock(so, 0); + } LIST_REMOVE(rp, list); + lck_mtx_unlock(raw_mtx); #ifdef notdef if (rp->rcb_laddr) m_freem(dtom(rp->rcb_laddr)); rp->rcb_laddr = 0; #endif + rp->rcb_socket = NULL; FREE((caddr_t)(rp), M_PCB); } diff --git a/bsd/net/raw_cb.h b/bsd/net/raw_cb.h index d047ae38f..478cdd571 100644 --- a/bsd/net/raw_cb.h +++ b/bsd/net/raw_cb.h @@ -60,7 +60,7 @@ #include -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE /* * Raw protocol interface control block. Used * to tie a socket to the generic raw interface. @@ -73,7 +73,6 @@ struct rawcb { struct sockproto rcb_proto; /* protocol family, protocol */ u_long reserved[4]; /* for future use */ }; -#endif /* __APPLE_API_PRIVATE */ #define sotorawcb(so) ((struct rawcb *)(so)->so_pcb) @@ -82,21 +81,20 @@ struct rawcb { */ #define RAWSNDQ 8192 #define RAWRCVQ 8192 +#endif /* PRIVATE */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern LIST_HEAD(rawcb_list_head, rawcb) rawcb_list; -int raw_attach __P((struct socket *, int)); -void raw_ctlinput __P((int, struct sockaddr *, void *)); -void raw_detach __P((struct rawcb *)); -void raw_disconnect __P((struct rawcb *)); -void raw_init __P((void)); -void raw_input __P((struct mbuf *, - struct sockproto *, struct sockaddr *, struct sockaddr *)); +int raw_attach(struct socket *, int); +void raw_ctlinput(int, struct sockaddr *, void *); +void raw_detach(struct rawcb *); +void raw_disconnect(struct rawcb *); +void raw_init(void); +void raw_input(struct mbuf *, + struct sockproto *, struct sockaddr *, struct sockaddr *); extern struct pr_usrreqs raw_usrreqs; -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif KERNEL_PRIVATE #endif diff --git a/bsd/net/raw_usrreq.c b/bsd/net/raw_usrreq.c index 5e8a246a9..d9bf97217 100644 --- a/bsd/net/raw_usrreq.c +++ b/bsd/net/raw_usrreq.c @@ -59,18 +59,38 @@ #include #include #include +#include #include #include #include +#include #include +lck_mtx_t *raw_mtx; /*### global raw cb mutex for now */ +lck_attr_t *raw_mtx_attr; +lck_grp_t *raw_mtx_grp; +lck_grp_attr_t *raw_mtx_grp_attr; /* * Initialize raw connection block q. */ void raw_init() { + raw_mtx_grp_attr = lck_grp_attr_alloc_init(); + + lck_grp_attr_setdefault(raw_mtx_grp_attr); + + raw_mtx_grp = lck_grp_alloc_init("rawcb", raw_mtx_grp_attr); + + raw_mtx_attr = lck_attr_alloc_init(); + + lck_attr_setdefault(raw_mtx_attr); + + if ((raw_mtx = lck_mtx_alloc_init(raw_mtx_grp, raw_mtx_attr)) == NULL) { + printf("raw_init: can't alloc raw_mtx\n"); + return; + } LIST_INIT(&rawcb_list); } @@ -93,8 +113,14 @@ raw_input(m0, proto, src, dst) register struct mbuf *m = m0; register int sockets = 0; struct socket *last; + int error; +//####LD raw_input is called from many places, input & output path. We have to assume the +//####LD socket we'll find and need to append to is unlocked. +//####LD calls from the output (locked) path need to make sure the socket is not locked when +//####LD we call in raw_input last = 0; + lck_mtx_lock(raw_mtx); LIST_FOREACH(rp, &rawcb_list, list) { if (rp->rcb_proto.sp_family != proto->sp_family) continue; @@ -119,28 +145,28 @@ raw_input(m0, proto, src, dst) struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); if (n) { + socket_lock(last, 1); if (sbappendaddr(&last->so_rcv, src, - n, (struct mbuf *)0) == 0) - /* should notify about lost packet */ - m_freem(n); - else { + n, (struct mbuf *)0, &error) != 0) { sorwakeup(last); sockets++; } + socket_unlock(last, 1); } } last = rp->rcb_socket; } if (last) { + socket_lock(last, 1); if (sbappendaddr(&last->so_rcv, src, - m, (struct mbuf *)0) == 0) - m_freem(m); - else { + m, (struct mbuf *)0, &error) != 0) { sorwakeup(last); sockets++; } + socket_unlock(last, 1); } else m_freem(m); + lck_mtx_unlock(raw_mtx); } /*ARGSUSED*/ @@ -161,6 +187,13 @@ raw_uabort(struct socket *so) { struct rawcb *rp = sotorawcb(so); + lck_mtx_t * mutex_held; + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + if (rp == 0) return EINVAL; raw_disconnect(rp); @@ -175,7 +208,9 @@ static int raw_uattach(struct socket *so, int proto, struct proc *p) { struct rawcb *rp = sotorawcb(so); +#ifndef __APPLE__ int error; +#endif if (rp == 0) return EINVAL; @@ -209,6 +244,12 @@ raw_udetach(struct socket *so) { struct rawcb *rp = sotorawcb(so); + lck_mtx_t * mutex_held; + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); if (rp == 0) return EINVAL; @@ -257,6 +298,13 @@ raw_usend(struct socket *so, int flags, struct mbuf *m, int error; struct rawcb *rp = sotorawcb(so); + lck_mtx_t * mutex_held; + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + if (rp == 0) { error = EINVAL; goto release; @@ -297,6 +345,12 @@ static int raw_ushutdown(struct socket *so) { struct rawcb *rp = sotorawcb(so); + lck_mtx_t * mutex_held; + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); if (rp == 0) return EINVAL; @@ -322,5 +376,5 @@ struct pr_usrreqs raw_usrreqs = { pru_connect2_notsupp, pru_control_notsupp, raw_udetach, raw_udisconnect, pru_listen_notsupp, raw_upeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, raw_usend, pru_sense_null, raw_ushutdown, - raw_usockaddr, sosend, soreceive, sopoll + raw_usockaddr, sosend, soreceive, pru_sopoll_notsupp }; diff --git a/bsd/net/route.c b/bsd/net/route.c index 0f41d2360..4ab8d1d16 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -73,15 +74,22 @@ #define SA(p) ((struct sockaddr *)(p)) +extern struct domain routedomain; struct route_cb route_cb; -static struct rtstat rtstat; +__private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 }; struct radix_node_head *rt_tables[AF_MAX+1]; -static int rttrash; /* routes not in table but not freed */ +lck_mtx_t *rt_mtx; /*### global routing tables mutex for now */ +lck_attr_t *rt_mtx_attr; +lck_grp_t *rt_mtx_grp; +lck_grp_attr_t *rt_mtx_grp_attr; -static void rt_maskedcopy __P((struct sockaddr *, - struct sockaddr *, struct sockaddr *)); -static void rtable_init __P((void **)); +lck_mtx_t *route_domain_mtx; /*### global routing tables mutex for now */ +__private_extern__ int rttrash = 0; /* routes not in table but not freed */ + +static void rt_maskedcopy(struct sockaddr *, + struct sockaddr *, struct sockaddr *); +static void rtable_init(void **); __private_extern__ u_long route_generation = 0; extern int use_routegenid; @@ -101,8 +109,26 @@ rtable_init(table) void route_init() { + rt_mtx_grp_attr = lck_grp_attr_alloc_init(); + + lck_grp_attr_setdefault(rt_mtx_grp_attr); + + rt_mtx_grp = lck_grp_alloc_init("route", rt_mtx_grp_attr); + + rt_mtx_attr = lck_attr_alloc_init(); + + lck_attr_setdefault(rt_mtx_attr); + + if ((rt_mtx = lck_mtx_alloc_init(rt_mtx_grp, rt_mtx_attr)) == NULL) { + printf("route_init: can't alloc rt_mtx\n"); + return; + } + + lck_mtx_lock(rt_mtx); rn_init(); /* initialize all zeroes, all ones, mask table */ + lck_mtx_unlock(rt_mtx); rtable_init((void **)rt_tables); + route_domain_mtx = routedomain.dom_mtx; } /* @@ -116,34 +142,41 @@ rtalloc(ro) } void -rtalloc_ign(ro, ignore) +rtalloc_ign_locked(ro, ignore) register struct route *ro; u_long ignore; { struct rtentry *rt; - int s; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; /* XXX - We are probably always at splnet here already. */ - s = splnet(); - rtfree(rt); + rtfree_locked(rt); ro->ro_rt = NULL; - splx(s); } - ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore); + ro->ro_rt = rtalloc1_locked(&ro->ro_dst, 1, ignore); if (ro->ro_rt) ro->ro_rt->generation_id = route_generation; } +void +rtalloc_ign(ro, ignore) + register struct route *ro; + u_long ignore; +{ + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); + rtalloc_ign_locked(ro, ignore); + lck_mtx_unlock(rt_mtx); +} /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. */ struct rtentry * -rtalloc1(dst, report, ignflags) - register struct sockaddr *dst; +rtalloc1_locked(dst, report, ignflags) + const struct sockaddr *dst; int report; u_long ignflags; { @@ -153,8 +186,7 @@ rtalloc1(dst, report, ignflags) struct rtentry *newrt = 0; struct rt_addrinfo info; u_long nflags; - int s = splnet(), err = 0, msgtype = RTM_MISS; - + int err = 0, msgtype = RTM_MISS; /* * Look up the address in the table for that Address Family */ @@ -172,7 +204,7 @@ rtalloc1(dst, report, ignflags) * If it requires that it be cloned, do so. * (This implies it wasn't a HOST route.) */ - err = rtrequest(RTM_RESOLVE, dst, SA(0), + err = rtrequest_locked(RTM_RESOLVE, dst, SA(0), SA(0), 0, &newrt); if (err) { /* @@ -211,27 +243,46 @@ rtalloc1(dst, report, ignflags) rt_missmsg(msgtype, &info, 0, err); } } - splx(s); return (newrt); } +struct rtentry * +rtalloc1(dst, report, ignflags) + register struct sockaddr *dst; + int report; + u_long ignflags; +{ + struct rtentry * entry; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); + entry = rtalloc1_locked(dst, report, ignflags); + lck_mtx_unlock(rt_mtx); + return (entry); +} + /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void -rtfree(rt) +rtfree_locked(rt) register struct rtentry *rt; { /* * find the tree for that address family * Note: in the case of igmp packets, there might not be an rnh */ - register struct radix_node_head *rnh = - rt_tables[rt_key(rt)->sa_family]; + register struct radix_node_head *rnh; - if (rt == 0) - panic("rtfree"); + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + + /* See 3582620 - We hit this during the transition from funnels to locks */ + if (rt == 0) { + printf("rtfree - rt is NULL\n"); + return; + } + + rnh = rt_tables[rt_key(rt)->sa_family]; /* * decrement the reference count by one and if it reaches 0, @@ -258,7 +309,7 @@ rtfree(rt) #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { - printf("rtfree: %p not freed (neg refs)\n", rt); + printf("rtfree: %p not freed (neg refs) cnt=%d\n", rt, rt->rt_refcnt); return; } #endif @@ -268,20 +319,11 @@ rtfree(rt) * e.g other routes and ifaddrs. */ if (rt->rt_parent) - rtfree(rt->rt_parent); + rtfree_locked(rt->rt_parent); - if(rt->rt_ifa && !(rt->rt_parent && rt->rt_parent->rt_ifa == rt->rt_ifa)) { - /* - * Only release the ifa if our parent doesn't hold it for us. - * The parent route is responsible for holding a reference - * to the ifa for us. Ifa refcounts are 16bit, if every - * cloned route held a reference, the 16bit refcount may - * rollover, making a mess :( - * - * FreeBSD solved this by making the ifa_refcount 32bits, but - * we can't do that since it changes the size of the ifaddr struct. - */ + if(rt->rt_ifa) { ifafree(rt->rt_ifa); + rt->rt_ifa = NULL; } /* @@ -289,15 +331,25 @@ rtfree(rt) * This also frees the gateway, as they are always malloc'd * together. */ - Free(rt_key(rt)); + R_Free(rt_key(rt)); /* * and the rtentry itself of course */ - Free(rt); + R_Free(rt); } } +void +rtfree(rt) + register struct rtentry *rt; +{ + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); + rtfree_locked(rt); + lck_mtx_unlock(rt_mtx); +} + /* * Decrements the refcount but does not free the route when * the refcount reaches zero. Unless you have really good reason, @@ -306,6 +358,8 @@ rtfree(rt) void rtunref(struct rtentry* rt) { + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (rt == NULL) panic("rtunref"); rt->rt_refcnt--; @@ -321,6 +375,8 @@ rtunref(struct rtentry* rt) void rtref(struct rtentry* rt) { + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (rt == NULL) panic("rtref"); @@ -336,15 +392,15 @@ rtsetifa(struct rtentry *rt, struct ifaddr* ifa) if (rt->rt_ifa == ifa) return; - /* Release the old ifa if it isn't our parent route's ifa */ - if (rt->rt_ifa && !(rt->rt_parent && rt->rt_parent->rt_ifa == rt->rt_ifa)) + /* Release the old ifa */ + if (rt->rt_ifa) ifafree(rt->rt_ifa); /* Set rt_ifa */ rt->rt_ifa = ifa; - /* Take a reference to the ifa if it isn't our parent route's ifa */ - if (rt->rt_ifa && !(rt->rt_parent && rt->rt_parent->rt_ifa == ifa)) + /* Take a reference to the ifa */ + if (rt->rt_ifa) ifaref(rt->rt_ifa); } @@ -352,43 +408,31 @@ void ifafree(ifa) register struct ifaddr *ifa; { + int i, oldval; + u_char *ptr = (u_char*)ifa; + if (ifa == NULL) panic("ifafree"); - if (ifa->ifa_refcnt == 0) { -#ifdef __APPLE__ - /* Detect case where an ifa is being freed before it should */ - struct ifnet* ifp; - /* Verify this ifa isn't attached to an interface */ - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { - struct ifaddr *ifaInUse; - for (ifaInUse = ifp->if_addrhead.tqh_first; ifaInUse; ifaInUse = ifaInUse->ifa_link.tqe_next) { - if (ifa == ifaInUse) { - /* - * This is an ugly hack done because we can't move to a 32 bit - * refcnt like bsd has. We have to maintain binary compatibility - * in our kernel, unlike FreeBSD. - */ - log(LOG_ERR, "ifa attached to ifp is being freed, leaking insted\n"); - return; - } - } + + oldval = OSAddAtomic(-1, &ifa->ifa_refcnt); + + if (oldval == 0) { + if ((ifa->ifa_flags & IFA_ATTACHED) != 0) { + panic("ifa attached to ifp is being freed\n"); } -#endif FREE(ifa, M_IFADDR); } - else - ifa->ifa_refcnt--; } -#ifdef __APPLE__ void ifaref(struct ifaddr *ifa) { if (ifa == NULL) panic("ifaref"); - ifa->ifa_refcnt++; + + if (OSAddAtomic(1, &ifa->ifa_refcnt) == 0xffffffff) + panic("ifaref - reference count rolled over!"); } -#endif /* * Force a routing table entry to the specified @@ -409,14 +453,18 @@ rtredirect(dst, gateway, netmask, flags, src, rtp) int error = 0; short *stat = 0; struct rt_addrinfo info; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; + + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway)) == 0) { error = ENETUNREACH; goto out; } - rt = rtalloc1(dst, 0, 0UL); + + rt = rtalloc1_locked(dst, 0, 0UL); /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, @@ -427,8 +475,20 @@ rtredirect(dst, gateway, netmask, flags, src, rtp) if (!(flags & RTF_DONE) && rt && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; - else if (ifa_ifwithaddr(gateway)) - error = EHOSTUNREACH; + else { + ifafree(ifa); + if ((ifa = ifa_ifwithaddr(gateway))) { + ifafree(ifa); + ifa = NULL; + error = EHOSTUNREACH; + } + } + + if (ifa) { + ifafree(ifa); + ifa = NULL; + } + if (error) goto done; /* @@ -451,7 +511,7 @@ rtredirect(dst, gateway, netmask, flags, src, rtp) */ create: flags |= RTF_GATEWAY | RTF_DYNAMIC; - error = rtrequest((int)RTM_ADD, dst, gateway, + error = rtrequest_locked((int)RTM_ADD, dst, gateway, netmask, flags, (struct rtentry **)0); stat = &rtstat.rts_dynamic; @@ -475,7 +535,7 @@ done: if (rtp && !error) *rtp = rt; else - rtfree(rt); + rtfree_locked(rt); } out: if (error) @@ -488,6 +548,7 @@ out: info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg(RTM_REDIRECT, &info, flags, error); + lck_mtx_unlock(rt_mtx); } /* @@ -512,11 +573,15 @@ rtioctl(req, data, p) } struct ifaddr * -ifa_ifwithroute(flags, dst, gateway) - int flags; - struct sockaddr *dst, *gateway; +ifa_ifwithroute( + int flags, + const struct sockaddr *dst, + const struct sockaddr *gateway) { - register struct ifaddr *ifa; + + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + + struct ifaddr *ifa = 0; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, @@ -525,7 +590,6 @@ ifa_ifwithroute(flags, dst, gateway) * as our clue to the interface. Otherwise * we can use the local address. */ - ifa = 0; if (flags & RTF_HOST) { ifa = ifa_ifwithdstaddr(dst); } @@ -542,18 +606,23 @@ ifa_ifwithroute(flags, dst, gateway) if (ifa == 0) ifa = ifa_ifwithnet(gateway); if (ifa == 0) { - struct rtentry *rt = rtalloc1(dst, 0, 0UL); + struct rtentry *rt = rtalloc1_locked(dst, 0, 0UL); if (rt == 0) return (0); + ifa = rt->rt_ifa; + if (ifa) + ifaref(ifa); rtunref(rt); - if ((ifa = rt->rt_ifa) == 0) - return (0); + if (ifa == 0) + return 0; } if (ifa->ifa_addr->sa_family != dst->sa_family) { - struct ifaddr *oifa = ifa; - ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); - if (ifa == 0) - ifa = oifa; + struct ifaddr *newifa; + newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); + if (newifa != 0) { + ifafree(ifa); + ifa = newifa; + } } return (ifa); } @@ -572,20 +641,23 @@ struct rtfc_arg { * Do appropriate manipulations of a routing tree given * all the bits of info needed */ -int -rtrequest(req, dst, gateway, netmask, flags, ret_nrt) - int req, flags; - struct sockaddr *dst, *gateway, *netmask; - struct rtentry **ret_nrt; +rtrequest_locked( + int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt) { - int s = splnet(); int error = 0; + int error = 0; register struct rtentry *rt; register struct radix_node *rn; register struct radix_node_head *rnh; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; struct sockaddr *ndst; #define senderr(x) { error = x ; goto bad; } + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); /* * Find the correct routing tree to use for this Address Family */ @@ -626,7 +698,7 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) */ if (rt->rt_gwroute) { rt = rt->rt_gwroute; - rtfree(rt); + rtfree_locked(rt); (rt = (struct rtentry *)rn)->rt_gwroute = 0; } @@ -644,6 +716,7 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0)); + ifa = NULL; /* * one more rtentry floating around that is not @@ -660,7 +733,7 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) *ret_nrt = rt; else if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; /* make a 1->0 transition */ - rtfree(rt); + rtfree_locked(rt); } break; @@ -668,6 +741,7 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) if (ret_nrt == 0 || (rt = *ret_nrt) == 0) senderr(EINVAL); ifa = rt->rt_ifa; + ifaref(ifa); flags = rt->rt_flags & ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); flags |= RTF_WASCLONED; @@ -694,7 +768,7 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) * also add the rt_gwroute if possible. */ if ((error = rt_setgate(rt, dst, gateway)) != 0) { - Free(rt); + R_Free(rt); senderr(error); } @@ -716,13 +790,8 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ - /* - * Note that we do not use rtsetifa here because - * rt_parent has not been setup yet. - */ - ifaref(ifa); - rt->rt_ifa = ifa; - rt->rt_ifp = ifa->ifa_ifp; + rtsetifa(rt, ifa); + rt->rt_ifp = rt->rt_ifa->ifa_ifp; /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ @@ -737,19 +806,19 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) * mechanism, then we just blow it away and retry * the insertion of the new one. */ - rt2 = rtalloc1(dst, 0, RTF_PRCLONING); + rt2 = rtalloc1_locked(dst, 0, RTF_PRCLONING); if (rt2 && rt2->rt_parent) { - rtrequest(RTM_DELETE, + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); - rtfree(rt2); + rtfree_locked(rt2); rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, rnh, rt->rt_nodes); } else if (rt2) { /* undo the extra ref we got */ - rtfree(rt2); + rtfree_locked(rt2); } } @@ -759,12 +828,12 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) */ if (rn == 0) { if (rt->rt_gwroute) - rtfree(rt->rt_gwroute); + rtfree_locked(rt->rt_gwroute); if (rt->rt_ifa) { ifafree(rt->rt_ifa); } - Free(rt_key(rt)); - Free(rt); + R_Free(rt_key(rt)); + R_Free(rt); senderr(EEXIST); } @@ -780,13 +849,6 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { rt->rt_parent = (*ret_nrt); rtref(*ret_nrt); - - /* - * If our parent is holding a reference to the same ifa, - * free our reference and rely on the parent holding it. - */ - if (rt->rt_parent && rt->rt_parent->rt_ifa == rt->rt_ifa) - ifafree(rt->rt_ifa); } } @@ -796,6 +858,8 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0)); + ifafree(ifa); + ifa = 0; /* * We repeat the same procedure from rt_setgate() here because @@ -821,10 +885,27 @@ rtrequest(req, dst, gateway, netmask, flags, ret_nrt) break; } bad: - splx(s); + if (ifa) + ifafree(ifa); return (error); } +int +rtrequest( + int req, + struct sockaddr *dst, + struct sockaddr *gateway, + struct sockaddr *netmask, + int flags, + struct rtentry **ret_nrt) +{ + int error; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); + error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt); + lck_mtx_unlock(rt_mtx); + return (error); +} /* * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' * (i.e., the routes related to it by the operation of cloning). This @@ -840,8 +921,10 @@ rt_fixdelete(rn, vp) struct rtentry *rt = (struct rtentry *)rn; struct rtentry *rt0 = vp; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (rt->rt_parent == rt0 && !(rt->rt_flags & RTF_PINNED)) { - return rtrequest(RTM_DELETE, rt_key(rt), + return rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } @@ -882,6 +965,8 @@ rt_fixchange(rn, vp) printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0); #endif + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (!rt->rt_parent || (rt->rt_flags & RTF_PINNED)) { #ifdef DEBUG if(rtfcdebug) printf("no parent or pinned\n"); @@ -893,7 +978,7 @@ rt_fixchange(rn, vp) #ifdef DEBUG if(rtfcdebug) printf("parent match\n"); #endif - return rtrequest(RTM_DELETE, rt_key(rt), + return rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } @@ -947,7 +1032,7 @@ rt_fixchange(rn, vp) #ifdef DEBUG if(rtfcdebug) printf("deleting\n"); #endif - return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, + return rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); } @@ -966,6 +1051,9 @@ rt_setgate(rt0, dst, gate) * will interfere with keeping LLINFO in the routing * table, so disallow it. */ + + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) == (RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) && @@ -975,7 +1063,7 @@ rt_setgate(rt0, dst, gate) * or a routing redirect, so try to delete it. */ if (rt_key(rt0)) - rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt0), + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt0), rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0); return EADDRNOTAVAIL; } @@ -1011,7 +1099,7 @@ rt_setgate(rt0, dst, gate) */ if (old) { Bcopy(dst, new, dlen); - Free(old); + R_Free(old); } /* @@ -1019,7 +1107,7 @@ rt_setgate(rt0, dst, gate) * so drop it. */ if (rt->rt_gwroute) { - rt = rt->rt_gwroute; rtfree(rt); + rt = rt->rt_gwroute; rtfree_locked(rt); rt = rt0; rt->rt_gwroute = 0; } /* @@ -1033,9 +1121,9 @@ rt_setgate(rt0, dst, gate) * This is obviously mandatory when we get rt->rt_output(). */ if (rt->rt_flags & RTF_GATEWAY) { - rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING); + rt->rt_gwroute = rtalloc1_locked(gate, 1, RTF_PRCLONING); if (rt->rt_gwroute == rt) { - rtfree(rt->rt_gwroute); + rtfree_locked(rt->rt_gwroute); rt->rt_gwroute = 0; return EDQUOT; /* failure */ } @@ -1091,6 +1179,19 @@ int rtinit(ifa, cmd, flags) register struct ifaddr *ifa; int cmd, flags; +{ + int error; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rt_mtx); + error = rtinit_locked(ifa, cmd, flags); + lck_mtx_unlock(rt_mtx); + return (error); +} + +int +rtinit_locked(ifa, cmd, flags) + register struct ifaddr *ifa; + int cmd, flags; { register struct rtentry *rt; register struct sockaddr *dst; @@ -1113,8 +1214,9 @@ rtinit(ifa, cmd, flags) */ if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { m = m_get(M_DONTWAIT, MT_SONAME); - if (m == NULL) + if (m == NULL) { return(ENOBUFS); + } deldst = mtod(m, struct sockaddr *); rt_maskedcopy(dst, deldst, ifa->ifa_netmask); dst = deldst; @@ -1125,7 +1227,7 @@ rtinit(ifa, cmd, flags) * We set "report" to FALSE so that if it doesn't exist, * it doesn't report an error or clone a route, etc. etc. */ - rt = rtalloc1(dst, 0, 0UL); + rt = rtalloc1_locked(dst, 0, 0UL); if (rt) { /* * Ok so we found the rtentry. it has an extra reference @@ -1155,6 +1257,7 @@ rtinit(ifa, cmd, flags) * it doesn't exist, we could just return at this point * with an "ELSE" clause, but apparently not.. */ + lck_mtx_unlock(rt_mtx); return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } @@ -1163,7 +1266,7 @@ rtinit(ifa, cmd, flags) /* * Do the actual request */ - error = rtrequest(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask, + error = rtrequest_locked(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask, flags | ifa->ifa_flags, &nrt); if (m) (void) m_free(m); @@ -1180,7 +1283,7 @@ rtinit(ifa, cmd, flags) route_generation++; if (rt->rt_refcnt <= 0) { rt->rt_refcnt++; /* need a 1->0 transition to free */ - rtfree(rt); + rtfree_locked(rt); } } @@ -1232,6 +1335,6 @@ rtinit(ifa, cmd, flags) rt_newaddrmsg(cmd, ifa, error, nrt); if (use_routegenid) route_generation++; - } + } return (error); } diff --git a/bsd/net/route.h b/bsd/net/route.h index 85f010666..5d1ab8ab2 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -58,6 +58,8 @@ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include +#include +#include /* * Kernel resident routing tables. @@ -71,7 +73,8 @@ * to a routing entry. These are often held by protocols * in their control blocks, e.g. inpcb. */ -#if !defined(KERNEL) || defined(__APPLE_API_PRIVATE) +#ifdef PRIVATE +struct rtentry; struct route { struct rtentry *ro_rt; struct sockaddr ro_dst; @@ -79,7 +82,7 @@ struct route { }; #else struct route; -#endif +#endif /* PRIVATE */ /* * These numbers are used by reliable protocols for determining @@ -89,7 +92,7 @@ struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ - u_long rmx_expire; /* lifetime for route, e.g. redirect */ + int32_t rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ @@ -110,7 +113,6 @@ struct rt_metrics { /* * XXX kernel function pointer `rt_output' is visible to applications. */ -struct mbuf; /* * We distinguish between routes to hosts and routes to networks, @@ -120,10 +122,10 @@ struct mbuf; * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ +#ifdef PRIVATE #ifndef RNF_NORMAL #include #endif -#ifdef __APPLE_API_UNSTABLE struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_key(r) ((struct sockaddr *)((r)->rt_nodes->rn_key)) @@ -138,14 +140,15 @@ struct rtentry { caddr_t rt_llinfo; /* pointer to link level info cache */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ - int (*rt_output) __P((struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *)); + int (*rt_output)(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); /* output routine for this (rt,if) */ struct rtentry *rt_parent; /* cloning parent of this route */ u_long generation_id; /* route generation id */ }; -#endif /* __APPLE_API_UNSTABLE */ +#endif /* PRIVATE */ +#ifdef __APPLE_API_OBSOLETE /* * Following structure necessary for 4.3 compatibility; * We should eventually move it to a compat file. @@ -159,8 +162,11 @@ struct ortentry { u_long rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ }; +#endif /* __APPLE_API_OBSOLETE */ +#ifdef PRIVATE #define rt_use rt_rmx.rmx_pksent +#endif /* PRIVATE */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ @@ -181,7 +187,7 @@ struct ortentry { #define RTF_PRCLONING 0x10000 /* protocol requires cloning */ #define RTF_WASCLONED 0x20000 /* route generated through cloning */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ -/* 0x80000 unused */ + /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* future use */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ @@ -198,6 +204,7 @@ struct rtstat { short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; + /* * Structures for routing messages. */ @@ -208,14 +215,30 @@ struct rt_msghdr { u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ - pid_t rtm_pid; /* identify sender */ - int rtm_seq; /* for sender to identify action */ - int rtm_errno; /* why failed */ + pid_t rtm_pid; /* identify sender */ + int rtm_seq; /* for sender to identify action */ + int rtm_errno; /* why failed */ int rtm_use; /* from rtentry */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; +struct rt_msghdr2 { + u_short rtm_msglen; /* to skip over non-understood messages */ + u_char rtm_version; /* future binary compatibility */ + u_char rtm_type; /* message type */ + u_short rtm_index; /* index for associated ifp */ + int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ + int rtm_addrs; /* bitmask identifying sockaddrs in msg */ + int32_t rtm_refcnt; /* reference count */ + int rtm_parentflags; /* flags of the parent route */ + int rtm_reserved; /* reserved field set to 0 */ + int rtm_use; /* from rtentry */ + u_long rtm_inits; /* which metrics we are initializing */ + struct rt_metrics rtm_rmx; /* metrics themselves */ +}; + + #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* @@ -237,9 +260,12 @@ struct rt_msghdr { #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ -#ifdef KERNEL_PRIVATE +#ifdef PRIVATE #define RTM_GET_SILENT 0x11 -#endif +#endif PRIVATE +#define RTM_IFINFO2 0x12 /* */ +#define RTM_NEWMADDR2 0x13 /* */ +#define RTM_GET2 0x14 /* */ /* * Bitmask values for rtm_inits and rmx_locks. @@ -292,53 +318,45 @@ struct route_cb { int any_count; }; -#ifdef KERNEL -#ifndef __APPLE__ -#define RTFREE(rt) \ - do { \ - if ((rt)->rt_refcnt <= 1) \ - rtfree(rt); \ - else \ - (rt)->rt_refcnt--; \ - } while (0) -#else +#ifdef KERNEL_PRIVATE #define RTFREE(rt) rtfree(rt) -#endif - -#ifdef __APPLE_API_PRIVATE extern struct route_cb route_cb; extern struct radix_node_head *rt_tables[AF_MAX+1]; struct ifmultiaddr; struct proc; -void route_init __P((void)); -void rt_ifmsg __P((struct ifnet *)); -void rt_missmsg __P((int, struct rt_addrinfo *, int, int)); -void rt_newaddrmsg __P((int, struct ifaddr *, int, struct rtentry *)); -void rt_newmaddrmsg __P((int, struct ifmultiaddr *)); -int rt_setgate __P((struct rtentry *, - struct sockaddr *, struct sockaddr *)); -void rtalloc __P((struct route *)); -void rtalloc_ign __P((struct route *, u_long)); +void route_init(void); +void rt_ifmsg(struct ifnet *); +void rt_missmsg(int, struct rt_addrinfo *, int, int); +void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); +void rt_newmaddrmsg(int, struct ifmultiaddr *); +int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); +void rtalloc(struct route *); +void rtalloc_ign(struct route *, u_long); +struct rtentry * + rtalloc1(struct sockaddr *, int, u_long); struct rtentry * - rtalloc1 __P((struct sockaddr *, int, u_long)); -void rtfree __P((struct rtentry *)); -void rtref __P((struct rtentry *)); + rtalloc1_locked(const struct sockaddr *, int, u_long); +void rtfree(struct rtentry *); +void rtfree_locked(struct rtentry *); +void rtref(struct rtentry *); /* * rtunref will decrement the refcount, rtfree will decrement and free if * the refcount has reached zero and the route is not up. * Unless you have good reason to do otherwise, use rtfree. */ -void rtunref __P((struct rtentry *)); -void rtsetifa __P((struct rtentry *, struct ifaddr *)); -int rtinit __P((struct ifaddr *, int, int)); -int rtioctl __P((int, caddr_t, struct proc *)); -void rtredirect __P((struct sockaddr *, struct sockaddr *, - struct sockaddr *, int, struct sockaddr *, struct rtentry **)); -int rtrequest __P((int, struct sockaddr *, - struct sockaddr *, struct sockaddr *, int, struct rtentry **)); -#endif /* __APPLE_API_PRIVATE */ -#endif +void rtunref(struct rtentry *); +void rtsetifa(struct rtentry *, struct ifaddr *); +int rtinit(struct ifaddr *, int, int); +int rtinit_locked(struct ifaddr *, int, int); +int rtioctl(int, caddr_t, struct proc *); +void rtredirect(struct sockaddr *, struct sockaddr *, + struct sockaddr *, int, struct sockaddr *, struct rtentry **); +int rtrequest(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **); +int rtrequest_locked(int, struct sockaddr *, + struct sockaddr *, struct sockaddr *, int, struct rtentry **); +#endif KERNEL_PRIVATE #endif diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c index 8a9742722..54abd75f0 100644 --- a/bsd/net/rtsock.c +++ b/bsd/net/rtsock.c @@ -67,18 +67,28 @@ #include #include #include +#include #include #include #include #include +#include + +extern void m_copydata(struct mbuf *, int, int, caddr_t); +extern void m_copyback(struct mbuf *, int, int, caddr_t); + +extern struct rtstat rtstat; +extern int rttrash; + MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); -static struct sockaddr route_dst = { 2, PF_ROUTE, }; -static struct sockaddr route_src = { 2, PF_ROUTE, }; -static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; -static struct sockproto route_proto = { PF_ROUTE, }; +extern lck_mtx_t *rt_mtx; +static struct sockaddr route_dst = { 2, PF_ROUTE, { 0, } }; +static struct sockaddr route_src = { 2, PF_ROUTE, { 0, } }; +static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, { 0, } }; +static struct sockproto route_proto = { PF_ROUTE, 0 }; struct walkarg { int w_tmemsize; @@ -88,16 +98,16 @@ struct walkarg { }; static struct mbuf * - rt_msg1 __P((int, struct rt_addrinfo *)); -static int rt_msg2 __P((int, - struct rt_addrinfo *, caddr_t, struct walkarg *)); -static int rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *)); -static int sysctl_dumpentry __P((struct radix_node *rn, void *vw)); -static int sysctl_iflist __P((int af, struct walkarg *w)); -static int route_output __P((struct mbuf *, struct socket *)); -static void rt_setmetrics __P((u_long, struct rt_metrics *, struct rt_metrics *)); -static void rt_setif __P((struct rtentry *, struct sockaddr *, struct sockaddr *, - struct sockaddr *)); + rt_msg1(int, struct rt_addrinfo *); +static int rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *); +static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); +static int sysctl_dumpentry(struct radix_node *rn, void *vw); +static int sysctl_iflist(int af, struct walkarg *w); +static int sysctl_iflist2(int af, struct walkarg *w); +static int route_output(struct mbuf *, struct socket *); +static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *); +static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *, + struct sockaddr *); /* Sleazy use of local variables throughout file, warning!!!! */ #define dst info.rti_info[RTAX_DST] @@ -115,20 +125,19 @@ static void rt_setif __P((struct rtentry *, struct sockaddr *, struct sockaddr * static int rts_abort(struct socket *so) { - int s, error; - s = splnet(); + int error; + error = raw_usrreqs.pru_abort(so); - splx(s); return error; } /* pru_accept is EOPNOTSUPP */ static int -rts_attach(struct socket *so, int proto, struct proc *p) +rts_attach(struct socket *so, int proto, __unused struct proc *p) { struct rawcb *rp; - int s, error; + int error; if (sotorawcb(so) != 0) return EISCONN; /* XXX panic? */ @@ -144,17 +153,18 @@ rts_attach(struct socket *so, int proto, struct proc *p) * Probably we should try to do more of this work beforehand and * eliminate the spl. */ - s = splnet(); so->so_pcb = (caddr_t)rp; error = raw_attach(so, proto); /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */ rp = sotorawcb(so); if (error) { - splx(s); FREE(rp, M_PCB); so->so_pcb = 0; + so->so_flags |= SOF_PCBCLEARING; return error; } + socket_lock(so, 1); switch(rp->rcb_proto.sp_protocol) { +//####LD route_cb needs looking case AF_INET: route_cb.ip_count++; break; @@ -172,7 +182,7 @@ rts_attach(struct socket *so, int proto, struct proc *p) route_cb.any_count++; soisconnected(so); so->so_options |= SO_USELOOPBACK; - splx(s); + socket_unlock(so, 1); return 0; } @@ -287,36 +297,41 @@ rts_sockaddr(struct socket *so, struct sockaddr **nam) } static struct pr_usrreqs route_usrreqs = { - rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect, - pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect, - pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, - rts_send, pru_sense_null, rts_shutdown, rts_sockaddr, - sosend, soreceive, sopoll + rts_abort, pru_accept_notsupp, rts_attach, rts_bind, + rts_connect, pru_connect2_notsupp, pru_control_notsupp, + rts_detach, rts_disconnect, pru_listen_notsupp, rts_peeraddr, + pru_rcvd_notsupp, pru_rcvoob_notsupp, rts_send, pru_sense_null, + rts_shutdown, rts_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; /*ARGSUSED*/ static int route_output(m, so) - register struct mbuf *m; + struct mbuf *m; struct socket *so; { - register struct rt_msghdr *rtm = 0; - register struct rtentry *rt = 0; + struct rt_msghdr *rtm = 0; + struct rtentry *rt = 0; struct rtentry *saved_nrt = 0; struct radix_node_head *rnh; struct rt_addrinfo info; int len, error = 0; struct ifnet *ifp = 0; - struct ifaddr *ifa = 0; +#ifndef __APPLE__ struct proc *curproc = current_proc(); +#endif int sendonlytoself = 0; #define senderr(e) { error = e; goto flush;} - if (m == 0 || ((m->m_len < sizeof(long)) && - (m = m_pullup(m, sizeof(long))) == 0)) + if (m == 0 || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); + + /* unlock the socket (but keep a reference) it won't be accessed until raw_input appends to it. */ + socket_unlock(so, 0); + lck_mtx_lock(rt_mtx); + len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { @@ -353,15 +368,17 @@ route_output(m, so) dst = 0; senderr(EPERM); } - rtm->rtm_pid = curproc->p_pid; + + rtm->rtm_pid = proc_selfpid(); info.rti_addrs = rtm->rtm_addrs; if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { dst = 0; senderr(EINVAL); } if (dst == 0 || (dst->sa_family >= AF_MAX) - || (gate != 0 && (gate->sa_family >= AF_MAX))) + || (gate != 0 && (gate->sa_family >= AF_MAX))) { senderr(EINVAL); + } if (genmask) { struct radix_node *t; t = rn_addmask((caddr_t)genmask, 0, 1); @@ -371,10 +388,10 @@ route_output(m, so) senderr(ENOBUFS); } switch (rtm->rtm_type) { - - case RTM_ADD: - if (gate == 0) - senderr(EINVAL); + + case RTM_ADD: + if (gate == 0) + senderr(EINVAL); #ifdef __APPLE__ /* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954) @@ -389,169 +406,174 @@ route_output(m, so) * confusing the routing table with a wrong route to the previous default gateway */ { - extern int check_routeselfref; + extern int check_routeselfref; #define satosinaddr(sa) (((struct sockaddr_in *)sa)->sin_addr.s_addr) - - if (check_routeselfref && (dst && dst->sa_family == AF_INET) && - (netmask && satosinaddr(netmask) == INADDR_BROADCAST) && - (gate && satosinaddr(dst) == satosinaddr(gate))) { - log(LOG_WARNING, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n", - (ntohl(satosinaddr(gate)>>24))&0xff, - (ntohl(satosinaddr(gate)>>16))&0xff, - (ntohl(satosinaddr(gate)>>8))&0xff, - (ntohl(satosinaddr(gate)))&0xff); - - senderr(EINVAL); - } + + if (check_routeselfref && (dst && dst->sa_family == AF_INET) && + (netmask && satosinaddr(netmask) == INADDR_BROADCAST) && + (gate && satosinaddr(dst) == satosinaddr(gate))) { + log(LOG_WARNING, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n", + (ntohl(satosinaddr(gate)>>24))&0xff, + (ntohl(satosinaddr(gate)>>16))&0xff, + (ntohl(satosinaddr(gate)>>8))&0xff, + (ntohl(satosinaddr(gate)))&0xff); + + senderr(EINVAL); + } } #endif - error = rtrequest(RTM_ADD, dst, gate, netmask, - rtm->rtm_flags, &saved_nrt); - if (error == 0 && saved_nrt) { + error = rtrequest_locked(RTM_ADD, dst, gate, netmask, + rtm->rtm_flags, &saved_nrt); + if (error == 0 && saved_nrt) { #ifdef __APPLE__ - /* - * If the route request specified an interface with - * IFA and/or IFP, we set the requested interface on - * the route with rt_setif. It would be much better - * to do this inside rtrequest, but that would - * require passing the desired interface, in some - * form, to rtrequest. Since rtrequest is called in - * so many places (roughly 40 in our source), adding - * a parameter is to much for us to swallow; this is - * something for the FreeBSD developers to tackle. - * Instead, we let rtrequest compute whatever - * interface it wants, then come in behind it and - * stick in the interface that we really want. This - * works reasonably well except when rtrequest can't - * figure out what interface to use (with - * ifa_withroute) and returns ENETUNREACH. Ideally - * it shouldn't matter if rtrequest can't figure out - * the interface if we're going to explicitly set it - * ourselves anyway. But practically we can't - * recover here because rtrequest will not do any of - * the work necessary to add the route if it can't - * find an interface. As long as there is a default - * route that leads to some interface, rtrequest will - * find an interface, so this problem should be - * rarely encountered. - * dwiggins@bbn.com - */ - - rt_setif(saved_nrt, ifpaddr, ifaaddr, gate); + /* + * If the route request specified an interface with + * IFA and/or IFP, we set the requested interface on + * the route with rt_setif. It would be much better + * to do this inside rtrequest, but that would + * require passing the desired interface, in some + * form, to rtrequest. Since rtrequest is called in + * so many places (roughly 40 in our source), adding + * a parameter is to much for us to swallow; this is + * something for the FreeBSD developers to tackle. + * Instead, we let rtrequest compute whatever + * interface it wants, then come in behind it and + * stick in the interface that we really want. This + * works reasonably well except when rtrequest can't + * figure out what interface to use (with + * ifa_withroute) and returns ENETUNREACH. Ideally + * it shouldn't matter if rtrequest can't figure out + * the interface if we're going to explicitly set it + * ourselves anyway. But practically we can't + * recover here because rtrequest will not do any of + * the work necessary to add the route if it can't + * find an interface. As long as there is a default + * route that leads to some interface, rtrequest will + * find an interface, so this problem should be + * rarely encountered. + * dwiggins@bbn.com + */ + + rt_setif(saved_nrt, ifpaddr, ifaaddr, gate); #endif - rt_setmetrics(rtm->rtm_inits, - &rtm->rtm_rmx, &saved_nrt->rt_rmx); - saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); - saved_nrt->rt_rmx.rmx_locks |= - (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); - rtunref(saved_nrt); - saved_nrt->rt_genmask = genmask; - } - break; - - case RTM_DELETE: - error = rtrequest(RTM_DELETE, dst, gate, netmask, - rtm->rtm_flags, &saved_nrt); - if (error == 0) { - if ((rt = saved_nrt)) - rtref(rt); - goto report; - } - break; - - case RTM_GET: - case RTM_CHANGE: - case RTM_LOCK: - if ((rnh = rt_tables[dst->sa_family]) == 0) { - senderr(EAFNOSUPPORT); - } else if ((rt = (struct rtentry *) - rnh->rnh_lookup(dst, netmask, rnh)) != NULL) - rtref(rt); - else - senderr(ESRCH); - switch(rtm->rtm_type) { - - case RTM_GET: - report: - dst = rt_key(rt); - gate = rt->rt_gateway; - netmask = rt_mask(rt); - genmask = rt->rt_genmask; - if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { - ifp = rt->rt_ifp; - if (ifp) { - ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; - ifaaddr = rt->rt_ifa->ifa_addr; - rtm->rtm_index = ifp->if_index; - } else { - ifpaddr = 0; - ifaaddr = 0; - } + rt_setmetrics(rtm->rtm_inits, + &rtm->rtm_rmx, &saved_nrt->rt_rmx); + saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + saved_nrt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); + rtunref(saved_nrt); + saved_nrt->rt_genmask = genmask; } - len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, - (struct walkarg *)0); - if (len > rtm->rtm_msglen) { - struct rt_msghdr *new_rtm; - R_Malloc(new_rtm, struct rt_msghdr *, len); - if (new_rtm == 0) - senderr(ENOBUFS); - Bcopy(rtm, new_rtm, rtm->rtm_msglen); - Free(rtm); rtm = new_rtm; + break; + + case RTM_DELETE: + error = rtrequest_locked(RTM_DELETE, dst, gate, netmask, + rtm->rtm_flags, &saved_nrt); + if (error == 0) { + if ((rt = saved_nrt)) + rtref(rt); + goto report; } - (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, - (struct walkarg *)0); - rtm->rtm_flags = rt->rt_flags; - rtm->rtm_rmx = rt->rt_rmx; - rtm->rtm_addrs = info.rti_addrs; break; + case RTM_GET: case RTM_CHANGE: - if (gate && (error = rt_setgate(rt, rt_key(rt), gate))) - senderr(error); - - /* - * If they tried to change things but didn't specify - * the required gateway, then just use the old one. - * This can happen if the user tries to change the - * flags on the default route without changing the - * default gateway. Changing flags still doesn't work. - */ - if ((rt->rt_flags & RTF_GATEWAY) && !gate) - gate = rt->rt_gateway; - + case RTM_LOCK: + if ((rnh = rt_tables[dst->sa_family]) == 0) { + senderr(EAFNOSUPPORT); + } else if ((rt = (struct rtentry *) + rnh->rnh_lookup(dst, netmask, rnh)) != NULL) + rtref(rt); + else + senderr(ESRCH); + switch(rtm->rtm_type) { + + case RTM_GET: { + struct ifaddr *ifa2; + report: + dst = rt_key(rt); + gate = rt->rt_gateway; + netmask = rt_mask(rt); + genmask = rt->rt_genmask; + if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { + ifp = rt->rt_ifp; + if (ifp) { + ifnet_lock_shared(ifp); + ifa2 = ifp->if_addrhead.tqh_first; + ifpaddr = ifa2->ifa_addr; + ifnet_lock_done(ifp); + ifaaddr = rt->rt_ifa->ifa_addr; + rtm->rtm_index = ifp->if_index; + } else { + ifpaddr = 0; + ifaaddr = 0; + } + } + len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, + (struct walkarg *)0); + if (len > rtm->rtm_msglen) { + struct rt_msghdr *new_rtm; + R_Malloc(new_rtm, struct rt_msghdr *, len); + if (new_rtm == 0) { + senderr(ENOBUFS); + } + Bcopy(rtm, new_rtm, rtm->rtm_msglen); + R_Free(rtm); rtm = new_rtm; + } + (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, + (struct walkarg *)0); + rtm->rtm_flags = rt->rt_flags; + rtm->rtm_rmx = rt->rt_rmx; + rtm->rtm_addrs = info.rti_addrs; + } + break; + + case RTM_CHANGE: + if (gate && (error = rt_setgate(rt, rt_key(rt), gate))) + senderr(error); + + /* + * If they tried to change things but didn't specify + * the required gateway, then just use the old one. + * This can happen if the user tries to change the + * flags on the default route without changing the + * default gateway. Changing flags still doesn't work. + */ + if ((rt->rt_flags & RTF_GATEWAY) && !gate) + gate = rt->rt_gateway; + #ifdef __APPLE__ - /* - * On Darwin, we call rt_setif which contains the - * equivalent to the code found at this very spot - * in BSD. - */ - rt_setif(rt, ifpaddr, ifaaddr, gate); + /* + * On Darwin, we call rt_setif which contains the + * equivalent to the code found at this very spot + * in BSD. + */ + rt_setif(rt, ifpaddr, ifaaddr, gate); #endif - - rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, - &rt->rt_rmx); + + rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, + &rt->rt_rmx); #ifndef __APPLE__ - /* rt_setif, called above does this for us on darwin */ - if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) - rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate); + /* rt_setif, called above does this for us on darwin */ + if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) + rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate); #endif - if (genmask) - rt->rt_genmask = genmask; - /* - * Fall into - */ - case RTM_LOCK: - rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); - rt->rt_rmx.rmx_locks |= - (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); + if (genmask) + rt->rt_genmask = genmask; + /* + * Fall into + */ + case RTM_LOCK: + rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + rt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); + break; + } break; - } - break; - - default: - senderr(EOPNOTSUPP); + + default: + senderr(EOPNOTSUPP); } - flush: if (rtm) { if (error) @@ -560,16 +582,18 @@ flush: rtm->rtm_flags |= RTF_DONE; } if (rt) - rtfree(rt); + rtfree_locked(rt); + lck_mtx_unlock(rt_mtx); + socket_lock(so, 0); /* relock the socket now */ { - register struct rawcb *rp = 0; + struct rawcb *rp = 0; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (route_cb.any_count <= 1) { if (rtm) - Free(rtm); + R_Free(rtm); m_freem(m); return (error); } @@ -583,22 +607,25 @@ flush: m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); - Free(rtm); + R_Free(rtm); } if (sendonlytoself && m) { - if (sbappendaddr(&so->so_rcv, &route_src, m, (struct mbuf*)0) == 0) { - m_freem(m); - error = ENOBUFS; - } else { + error = 0; + if (sbappendaddr(&so->so_rcv, &route_src, m, (struct mbuf*)0, &error) != 0) { sorwakeup(so); } + if (error) + return error; } else { if (rp) rp->rcb_proto.sp_family = 0; /* Avoid us */ if (dst) route_proto.sp_protocol = dst->sa_family; - if (m) + if (m) { + socket_unlock(so, 0); raw_input(m, &route_proto, &route_src, &route_dst); + socket_lock(so, 0); + } if (rp) rp->rcb_proto.sp_family = PF_ROUTE; } @@ -609,7 +636,7 @@ flush: static void rt_setmetrics(which, in, out) u_long which; - register struct rt_metrics *in, *out; + struct rt_metrics *in, *out; { #define metric(f, e) if (which & (f)) out->e = in->e; metric(RTV_RPIPE, rmx_recvpipe); @@ -627,30 +654,53 @@ rt_setmetrics(which, in, out) * Set route's interface given ifpaddr, ifaaddr, and gateway. */ static void -rt_setif(rt, Ifpaddr, Ifaaddr, Gate) - struct rtentry *rt; - struct sockaddr *Ifpaddr, *Ifaaddr, *Gate; +rt_setif( + struct rtentry *rt, + struct sockaddr *Ifpaddr, + struct sockaddr *Ifaaddr, + struct sockaddr *Gate) { struct ifaddr *ifa = 0; struct ifnet *ifp = 0; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + /* new gateway could require new ifaddr, ifp; flags may also be different; ifp may be specified by ll sockaddr when protocol address is ambiguous */ if (Ifpaddr && (ifa = ifa_ifwithnet(Ifpaddr)) && - (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) + (ifp = ifa->ifa_ifp) && (Ifaaddr || Gate)) { + ifafree(ifa); ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp); - else if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) { - ifa = Gate ? ifaof_ifpforaddr(Gate, ifp) : - TAILQ_FIRST(&ifp->if_addrhead); } - else if ((Ifaaddr && (ifa = ifa_ifwithaddr(Ifaaddr))) || - (Gate && (ifa = ifa_ifwithroute(rt->rt_flags, - rt_key(rt), Gate)))) - ifp = ifa->ifa_ifp; + else + { + if (ifa) { + ifafree(ifa); + ifa = 0; + } + if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) { + if (Gate) { + ifa = ifaof_ifpforaddr(Gate, ifp); + } + else { + ifnet_lock_shared(ifp); + ifa = TAILQ_FIRST(&ifp->if_addrhead); + ifaref(ifa); + ifnet_lock_done(ifp); + } + } + else if (Ifaaddr && (ifa = ifa_ifwithaddr(Ifaaddr))) { + ifp = ifa->ifa_ifp; + } + else if (Gate && (ifa = ifa_ifwithroute(rt->rt_flags, + rt_key(rt), Gate))) { + ifp = ifa->ifa_ifp; + } + } if (ifa) { - register struct ifaddr *oifa = rt->rt_ifa; + struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa && oifa->ifa_rtrequest) oifa->ifa_rtrequest(RTM_DELETE, @@ -660,8 +710,11 @@ rt_setif(rt, Ifpaddr, Ifaaddr, Gate) rt->rt_rmx.rmx_mtu = ifp->if_mtu; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate); - } else + } else { + ifafree(ifa); goto call_ifareq; + } + ifafree(ifa); return; } call_ifareq: @@ -683,11 +736,11 @@ rt_setif(rt, Ifpaddr, Ifaaddr, Gate) */ static int rt_xaddrs(cp, cplim, rtinfo) - register caddr_t cp, cplim; - register struct rt_addrinfo *rtinfo; + caddr_t cp, cplim; + struct rt_addrinfo *rtinfo; { - register struct sockaddr *sa; - register int i; + struct sockaddr *sa; + int i; bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { @@ -721,14 +774,14 @@ rt_xaddrs(cp, cplim, rtinfo) } static struct mbuf * -rt_msg1(type, rtinfo) - int type; - register struct rt_addrinfo *rtinfo; +rt_msg1( + int type, + struct rt_addrinfo *rtinfo) { - register struct rt_msghdr *rtm; - register struct mbuf *m; - register int i; - register struct sockaddr *sa; + struct rt_msghdr *rtm; + struct mbuf *m; + int i; + struct sockaddr *sa; int len, dlen; switch (type) { @@ -787,11 +840,11 @@ rt_msg1(type, rtinfo) static int rt_msg2(type, rtinfo, cp, w) int type; - register struct rt_addrinfo *rtinfo; + struct rt_addrinfo *rtinfo; caddr_t cp; struct walkarg *w; { - register int i; + int i; int len, dlen, second_time = 0; caddr_t cp0; @@ -804,10 +857,27 @@ again: len = sizeof(struct ifa_msghdr); break; + case RTM_DELMADDR: + case RTM_NEWMADDR: + len = sizeof(struct ifma_msghdr); + break; + case RTM_IFINFO: len = sizeof(struct if_msghdr); break; + case RTM_IFINFO2: + len = sizeof(struct if_msghdr2); + break; + + case RTM_NEWMADDR2: + len = sizeof(struct ifma_msghdr2); + break; + + case RTM_GET2: + len = sizeof(struct rt_msghdr2); + break; + default: len = sizeof(struct rt_msghdr); } @@ -815,7 +885,7 @@ again: if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { - register struct sockaddr *sa; + struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == 0) continue; @@ -828,7 +898,7 @@ again: len += dlen; } if (cp == 0 && w != NULL && !second_time) { - register struct walkarg *rw = w; + struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { @@ -847,7 +917,7 @@ again: } } if (cp) { - register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; + struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; @@ -858,19 +928,21 @@ again: /* * This routine is called to generate a message from the routing - * socket indicating that a redirect has occured, a routing lookup + * socket indicating that a redirect has occurred, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg(type, rtinfo, flags, error) int type, flags, error; - register struct rt_addrinfo *rtinfo; + struct rt_addrinfo *rtinfo; { - register struct rt_msghdr *rtm; - register struct mbuf *m; + struct rt_msghdr *rtm; + struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); @@ -889,10 +961,10 @@ rt_missmsg(type, rtinfo, flags, error) * socket indicating that the status of a network interface has changed. */ void -rt_ifmsg(ifp) - register struct ifnet *ifp; +rt_ifmsg( + struct ifnet *ifp) { - register struct if_msghdr *ifm; + struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; @@ -905,7 +977,7 @@ rt_ifmsg(ifp) ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = (u_short)ifp->if_flags; - ifm->ifm_data = ifp->if_data; + if_data_internal_to_if_data(&ifp->if_data, &ifm->ifm_data); ifm->ifm_addrs = 0; route_proto.sp_protocol = 0; raw_input(m, &route_proto, &route_src, &route_dst); @@ -918,12 +990,15 @@ rt_ifmsg(ifp) * socket indicate a request to configure interfaces, then it will * be unnecessary as the routing socket will automatically generate * copies of it. + * + * Since this is coming from the interface, it is expected that the + * interface will be locked. */ void rt_newaddrmsg(cmd, ifa, error, rt) int cmd, error; - register struct ifaddr *ifa; - register struct rtentry *rt; + struct ifaddr *ifa; + struct rtentry *rt; { struct rt_addrinfo info; struct sockaddr *sa = 0; @@ -937,7 +1012,7 @@ rt_newaddrmsg(cmd, ifa, error, rt) bzero((caddr_t)&info, sizeof(info)); if ((cmd == RTM_ADD && pass == 1) || (cmd == RTM_DELETE && pass == 2)) { - register struct ifa_msghdr *ifam; + struct ifa_msghdr *ifam; int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; ifaaddr = sa = ifa->ifa_addr; @@ -954,7 +1029,7 @@ rt_newaddrmsg(cmd, ifa, error, rt) } if ((cmd == RTM_ADD && pass == 2) || (cmd == RTM_DELETE && pass == 1)) { - register struct rt_msghdr *rtm; + struct rt_msghdr *rtm; if (rt == 0) continue; @@ -1002,11 +1077,11 @@ rt_newmaddrmsg(cmd, ifma) * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ - gate = ifma->ifma_lladdr; + gate = ifma->ifma_ll->ifma_addr; if ((m = rt_msg1(cmd, &info)) == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); - ifmam->ifmam_index = ifp->if_index; + ifmam->ifmam_index = ifp ? ifp->if_index : 0; ifmam->ifmam_addrs = info.rti_addrs; route_proto.sp_protocol = ifma->ifma_addr->sa_family; raw_input(m, &route_proto, &route_src, &route_dst); @@ -1020,8 +1095,8 @@ sysctl_dumpentry(rn, vw) struct radix_node *rn; void *vw; { - register struct walkarg *w = vw; - register struct rtentry *rt = (struct rtentry *)rn; + struct walkarg *w = vw; + struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; @@ -1032,51 +1107,81 @@ sysctl_dumpentry(rn, vw) gate = rt->rt_gateway; netmask = rt_mask(rt); genmask = rt->rt_genmask; - size = rt_msg2(RTM_GET, &info, 0, w); - if (w->w_req && w->w_tmem) { - register struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; - - rtm->rtm_flags = rt->rt_flags; - rtm->rtm_use = rt->rt_use; - rtm->rtm_rmx = rt->rt_rmx; - rtm->rtm_index = rt->rt_ifp->if_index; - rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; - rtm->rtm_addrs = info.rti_addrs; - error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); - return (error); + if (w->w_op != NET_RT_DUMP2) { + size = rt_msg2(RTM_GET, &info, 0, w); + if (w->w_req && w->w_tmem) { + struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; + + rtm->rtm_flags = rt->rt_flags; + rtm->rtm_use = rt->rt_use; + rtm->rtm_rmx = rt->rt_rmx; + rtm->rtm_index = rt->rt_ifp->if_index; + rtm->rtm_pid = 0; + rtm->rtm_seq = 0; + rtm->rtm_errno = 0; + rtm->rtm_addrs = info.rti_addrs; + error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); + return (error); + } + } else { + size = rt_msg2(RTM_GET2, &info, 0, w); + if (w->w_req && w->w_tmem) { + struct rt_msghdr2 *rtm = (struct rt_msghdr2 *)w->w_tmem; + + rtm->rtm_flags = rt->rt_flags; + rtm->rtm_use = rt->rt_use; + rtm->rtm_rmx = rt->rt_rmx; + rtm->rtm_index = rt->rt_ifp->if_index; + rtm->rtm_refcnt = rt->rt_refcnt; + if (rt->rt_parent) + rtm->rtm_parentflags = rt->rt_parent->rt_flags; + else + rtm->rtm_parentflags = 0; + rtm->rtm_reserved = 0; + rtm->rtm_addrs = info.rti_addrs; + error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); + return (error); + + } } return (error); } int -sysctl_iflist(af, w) - int af; - register struct walkarg *w; +sysctl_iflist( + int af, + struct walkarg *w) { - register struct ifnet *ifp; - register struct ifaddr *ifa; + struct ifnet *ifp; + struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if (error) + break; if (w->w_arg && w->w_arg != ifp->if_index) continue; + ifnet_lock_shared(ifp); ifa = ifp->if_addrhead.tqh_first; ifpaddr = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w); ifpaddr = 0; if (w->w_req && w->w_tmem) { - register struct if_msghdr *ifm; + struct if_msghdr *ifm; ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = (u_short)ifp->if_flags; - ifm->ifm_data = ifp->if_data; + if_data_internal_to_if_data(&ifp->if_data, &ifm->ifm_data); ifm->ifm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len); - if (error) - return (error); + if (error) { + ifnet_lock_done(ifp); + break; + } } while ((ifa = ifa->ifa_link.tqe_next) != 0) { if (af && af != ifa->ifa_addr->sa_family) @@ -1090,7 +1195,7 @@ sysctl_iflist(af, w) brdaddr = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, 0, w); if (w->w_req && w->w_tmem) { - register struct ifa_msghdr *ifam; + struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_index = ifa->ifa_ifp->if_index; @@ -1099,21 +1204,148 @@ sysctl_iflist(af, w) ifam->ifam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) - return (error); + break; } } + ifnet_lock_done(ifp); ifaaddr = netmask = brdaddr = 0; } - return (0); + ifnet_head_done(); + return error; } +int +sysctl_iflist2( + int af, + struct walkarg *w) +{ + struct ifnet *ifp; + struct ifaddr *ifa; + struct rt_addrinfo info; + int len, error = 0; + + bzero((caddr_t)&info, sizeof(info)); + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if (error) + break; + if (w->w_arg && w->w_arg != ifp->if_index) + continue; + ifnet_lock_shared(ifp); + ifa = ifp->if_addrhead.tqh_first; + ifpaddr = ifa->ifa_addr; + len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)0, w); + ifpaddr = 0; + if (w->w_req && w->w_tmem) { + struct if_msghdr2 *ifm; + + ifm = (struct if_msghdr2 *)w->w_tmem; + ifm->ifm_addrs = info.rti_addrs; + ifm->ifm_flags = (u_short)ifp->if_flags; + ifm->ifm_index = ifp->if_index; + ifm->ifm_snd_len = ifp->if_snd.ifq_len; + ifm->ifm_snd_maxlen = ifp->if_snd.ifq_maxlen; + ifm->ifm_snd_drops = ifp->if_snd.ifq_drops; + ifm->ifm_timer = ifp->if_timer; + if_data_internal_to_if_data64(&ifp->if_data, &ifm->ifm_data); + error = SYSCTL_OUT(w->w_req, w->w_tmem, len); + if (error) { + ifnet_lock_done(ifp); + break; + } + } + while ((ifa = ifa->ifa_link.tqe_next) != 0) { + if (af && af != ifa->ifa_addr->sa_family) + continue; + ifaaddr = ifa->ifa_addr; + netmask = ifa->ifa_netmask; + brdaddr = ifa->ifa_dstaddr; + len = rt_msg2(RTM_NEWADDR, &info, 0, w); + if (w->w_req && w->w_tmem) { + struct ifa_msghdr *ifam; + + ifam = (struct ifa_msghdr *)w->w_tmem; + ifam->ifam_index = ifa->ifa_ifp->if_index; + ifam->ifam_flags = ifa->ifa_flags; + ifam->ifam_metric = ifa->ifa_metric; + ifam->ifam_addrs = info.rti_addrs; + error = SYSCTL_OUT(w->w_req, w->w_tmem, len); + if (error) + break; + } + } + if (error) { + ifnet_lock_done(ifp); + break; + } + { + struct ifmultiaddr *ifma; + + for (ifma = ifp->if_multiaddrs.lh_first; ifma; + ifma = ifma->ifma_link.le_next) { + if (af && af != ifma->ifma_addr->sa_family) + continue; + bzero((caddr_t)&info, sizeof(info)); + ifaaddr = ifma->ifma_addr; + if (ifp->if_addrhead.tqh_first) + ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; + if (ifma->ifma_ll) + gate = ifma->ifma_ll->ifma_addr; + len = rt_msg2(RTM_NEWMADDR2, &info, 0, w); + if (w->w_req && w->w_tmem) { + struct ifma_msghdr2 *ifmam; + + ifmam = (struct ifma_msghdr2 *)w->w_tmem; + ifmam->ifmam_addrs = info.rti_addrs; + ifmam->ifmam_flags = 0; + ifmam->ifmam_index = ifma->ifma_ifp->if_index; + ifmam->ifmam_refcount = ifma->ifma_refcount; + error = SYSCTL_OUT(w->w_req, w->w_tmem, len); + if (error) + break; + } + } + } + ifnet_lock_done(ifp); + ifaaddr = netmask = brdaddr = 0; + } + ifnet_head_done(); + return error; +} + + +static int +sysctl_rtstat(struct sysctl_req *req) +{ + int error; + + error = SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat)); + if (error) + return (error); + + return 0; +} + +static int +sysctl_rttrash(struct sysctl_req *req) +{ + int error; + + error = SYSCTL_OUT(req, &rttrash, sizeof(rttrash)); + if (error) + return (error); + + return 0; +} + + static int sysctl_rtsock SYSCTL_HANDLER_ARGS { int *name = (int *)arg1; u_int namelen = arg2; - register struct radix_node_head *rnh; - int i, s, error = EINVAL; + struct radix_node_head *rnh; + int i, error = EINVAL; u_char af; struct walkarg w; @@ -1129,10 +1361,11 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS w.w_arg = name[2]; w.w_req = req; - s = splnet(); + lck_mtx_lock(rt_mtx); switch (w.w_op) { case NET_RT_DUMP: + case NET_RT_DUMP2: case NET_RT_FLAGS: for (i = 1; i <= AF_MAX; i++) if ((rnh = rt_tables[i]) && (af == 0 || af == i) && @@ -1140,11 +1373,20 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS sysctl_dumpentry, &w))) break; break; - case NET_RT_IFLIST: error = sysctl_iflist(af, &w); + break; + case NET_RT_IFLIST2: + error = sysctl_iflist2(af, &w); + break; + case NET_RT_STAT: + error = sysctl_rtstat(req); + break; + case NET_RT_TRASH: + error = sysctl_rttrash(req); + break; } - splx(s); + lck_mtx_unlock(rt_mtx); if (w.w_tmem) FREE(w.w_tmem, M_RTABLE); return (error); @@ -1163,13 +1405,18 @@ static struct protosw routesw[] = { 0, route_output, raw_ctlinput, 0, 0, raw_init, 0, 0, 0, - 0, &route_usrreqs, 0, 0 + 0, + &route_usrreqs, + 0, 0, 0, + { 0, 0 }, 0, { 0 } } }; struct domain routedomain = { PF_ROUTE, "route", route_init, 0, 0, - routesw}; + routesw, + 0, 0, 0, 0, 0, 0, 0, 0, + { 0, 0 } }; DOMAIN_SET(route); diff --git a/bsd/net/slcompress.c b/bsd/net/slcompress.c deleted file mode 100644 index 47108b410..000000000 --- a/bsd/net/slcompress.c +++ /dev/null @@ -1,635 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1989, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)slcompress.c 8.2 (Berkeley) 4/16/94 - * $FreeBSD: src/sys/net/slcompress.c,v 1.16 1999/12/29 04:38:37 peter Exp $ - */ - -/* - * Routines to compress and uncompess tcp packets (for transmission - * over low speed serial lines. - * - * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: - * - Initial distribution. - * - */ - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#ifndef SL_NO_STATS -#define INCR(counter) ++comp->counter; -#else -#define INCR(counter) -#endif - -#define BCMP(p1, p2, n) bcmp((char *)(p1), (char *)(p2), (int)(n)) -#define BCOPY(p1, p2, n) bcopy((char *)(p1), (char *)(p2), (int)(n)) -#ifndef KERNEL -#define ovbcopy bcopy -#endif - -void -sl_compress_init(comp, max_state) - struct slcompress *comp; - int max_state; -{ - register u_int i; - register struct cstate *tstate = comp->tstate; - - if (max_state == -1) { - max_state = MAX_STATES - 1; - bzero((char *)comp, sizeof(*comp)); - } else { - /* Don't reset statistics */ - bzero((char *)comp->tstate, sizeof(comp->tstate)); - bzero((char *)comp->rstate, sizeof(comp->rstate)); - } - for (i = max_state; i > 0; --i) { - tstate[i].cs_id = i; - tstate[i].cs_next = &tstate[i - 1]; - } - tstate[0].cs_next = &tstate[max_state]; - tstate[0].cs_id = 0; - comp->last_cs = &tstate[0]; - comp->last_recv = 255; - comp->last_xmit = 255; - comp->flags = SLF_TOSS; -} - - -/* ENCODE encodes a number that is known to be non-zero. ENCODEZ - * checks for zero (since zero has to be encoded in the long, 3 byte - * form). - */ -#define ENCODE(n) { \ - if ((u_int16_t)(n) >= 256) { \ - *cp++ = 0; \ - cp[1] = (n); \ - cp[0] = (n) >> 8; \ - cp += 2; \ - } else { \ - *cp++ = (n); \ - } \ -} -#define ENCODEZ(n) { \ - if ((u_int16_t)(n) >= 256 || (u_int16_t)(n) == 0) { \ - *cp++ = 0; \ - cp[1] = (n); \ - cp[0] = (n) >> 8; \ - cp += 2; \ - } else { \ - *cp++ = (n); \ - } \ -} - -#define DECODEL(f) { \ - if (*cp == 0) {\ - (f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \ - cp += 3; \ - } else { \ - (f) = htonl(ntohl(f) + (u_int32_t)*cp++); \ - } \ -} - -#define DECODES(f) { \ - if (*cp == 0) {\ - (f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \ - cp += 3; \ - } else { \ - (f) = htons(ntohs(f) + (u_int32_t)*cp++); \ - } \ -} - -#define DECODEU(f) { \ - if (*cp == 0) {\ - (f) = htons((cp[1] << 8) | cp[2]); \ - cp += 3; \ - } else { \ - (f) = htons((u_int32_t)*cp++); \ - } \ -} - -/* - * Attempt to compress an outgoing TCP packet and return the type of - * the result. The caller must have already verified that the protocol - * is TCP. The first mbuf must contain the complete IP and TCP headers, - * and "ip" must be == mtod(m, struct ip *). "comp" supplies the - * compression state, and "compress_cid" tells us whether it is OK - * to leave out the CID field when feasible. - * - * The caller is responsible for adjusting m->m_pkthdr.len upon return, - * if m is an M_PKTHDR mbuf. - */ -u_int -sl_compress_tcp(m, ip, comp, compress_cid) - struct mbuf *m; - register struct ip *ip; - struct slcompress *comp; - int compress_cid; -{ - register struct cstate *cs = comp->last_cs->cs_next; - register u_int hlen = ip->ip_hl; - register struct tcphdr *oth; - register struct tcphdr *th; - register u_int deltaS, deltaA; - register u_int changes = 0; - u_char new_seq[16]; - register u_char *cp = new_seq; - - /* - * Bail if this is an IP fragment or if the TCP packet isn't - * `compressible' (i.e., ACK isn't set or some other control bit is - * set). (We assume that the caller has already made sure the - * packet is IP proto TCP). - */ - if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40) - return (TYPE_IP); - - th = (struct tcphdr *)&((int32_t *)ip)[hlen]; - if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK) - return (TYPE_IP); - /* - * Packet is compressible -- we're going to send either a - * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way we need - * to locate (or create) the connection state. Special case the - * most recently used connection since it's most likely to be used - * again & we don't have to do any reordering if it's used. - */ - INCR(sls_packets) - if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr || - ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr || - *(int32_t *)th != ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) { - /* - * Wasn't the first -- search for it. - * - * States are kept in a circularly linked list with - * last_cs pointing to the end of the list. The - * list is kept in lru order by moving a state to the - * head of the list whenever it is referenced. Since - * the list is short and, empirically, the connection - * we want is almost always near the front, we locate - * states via linear search. If we don't find a state - * for the datagram, the oldest state is (re-)used. - */ - register struct cstate *lcs; - register struct cstate *lastcs = comp->last_cs; - - do { - lcs = cs; cs = cs->cs_next; - INCR(sls_searches) - if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr - && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr - && *(int32_t *)th == - ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) - goto found; - } while (cs != lastcs); - - /* - * Didn't find it -- re-use oldest cstate. Send an - * uncompressed packet that tells the other side what - * connection number we're using for this conversation. - * Note that since the state list is circular, the oldest - * state points to the newest and we only need to set - * last_cs to update the lru linkage. - */ - INCR(sls_misses) - comp->last_cs = lcs; - hlen += th->th_off; - hlen <<= 2; - if (hlen > m->m_len) - return TYPE_IP; - goto uncompressed; - - found: - /* - * Found it -- move to the front on the connection list. - */ - if (cs == lastcs) - comp->last_cs = lcs; - else { - lcs->cs_next = cs->cs_next; - cs->cs_next = lastcs->cs_next; - lastcs->cs_next = cs; - } - } - - /* - * Make sure that only what we expect to change changed. The first - * line of the `if' checks the IP protocol version, header length & - * type of service. The 2nd line checks the "Don't fragment" bit. - * The 3rd line checks the time-to-live and protocol (the protocol - * check is unnecessary but costless). The 4th line checks the TCP - * header length. The 5th line checks IP options, if any. The 6th - * line checks TCP options, if any. If any of these things are - * different between the previous & current datagram, we send the - * current datagram `uncompressed'. - */ - oth = (struct tcphdr *)&((int32_t *)&cs->cs_ip)[hlen]; - deltaS = hlen; - hlen += th->th_off; - hlen <<= 2; - if (hlen > m->m_len) - return TYPE_IP; - - if (((u_int16_t *)ip)[0] != ((u_int16_t *)&cs->cs_ip)[0] || - ((u_int16_t *)ip)[3] != ((u_int16_t *)&cs->cs_ip)[3] || - ((u_int16_t *)ip)[4] != ((u_int16_t *)&cs->cs_ip)[4] || - th->th_off != oth->th_off || - (deltaS > 5 && - BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) || - (th->th_off > 5 && - BCMP(th + 1, oth + 1, (th->th_off - 5) << 2))) - goto uncompressed; - - /* - * Figure out which of the changing fields changed. The - * receiver expects changes in the order: urgent, window, - * ack, seq (the order minimizes the number of temporaries - * needed in this section of code). - */ - if (th->th_flags & TH_URG) { - deltaS = ntohs(th->th_urp); - ENCODEZ(deltaS); - changes |= NEW_U; - } else if (th->th_urp != oth->th_urp) - /* argh! URG not set but urp changed -- a sensible - * implementation should never do this but RFC793 - * doesn't prohibit the change so we have to deal - * with it. */ - goto uncompressed; - - deltaS = (u_int16_t)(ntohs(th->th_win) - ntohs(oth->th_win)); - if (deltaS) { - ENCODE(deltaS); - changes |= NEW_W; - } - - deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack); - if (deltaA) { - if (deltaA > 0xffff) - goto uncompressed; - ENCODE(deltaA); - changes |= NEW_A; - } - - deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq); - if (deltaS) { - if (deltaS > 0xffff) - goto uncompressed; - ENCODE(deltaS); - changes |= NEW_S; - } - - switch(changes) { - - case 0: - /* - * Nothing changed. If this packet contains data and the - * last one didn't, this is probably a data packet following - * an ack (normal on an interactive connection) and we send - * it compressed. Otherwise it's probably a retransmit, - * retransmitted ack or window probe. Send it uncompressed - * in case the other side missed the compressed version. - */ - if (ip->ip_len != cs->cs_ip.ip_len && - ntohs(cs->cs_ip.ip_len) == hlen) - break; - - /* (fall through) */ - - case SPECIAL_I: - case SPECIAL_D: - /* - * actual changes match one of our special case encodings -- - * send packet uncompressed. - */ - goto uncompressed; - - case NEW_S|NEW_A: - if (deltaS == deltaA && - deltaS == ntohs(cs->cs_ip.ip_len) - hlen) { - /* special case for echoed terminal traffic */ - changes = SPECIAL_I; - cp = new_seq; - } - break; - - case NEW_S: - if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) { - /* special case for data xfer */ - changes = SPECIAL_D; - cp = new_seq; - } - break; - } - - deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id); - if (deltaS != 1) { - ENCODEZ(deltaS); - changes |= NEW_I; - } - if (th->th_flags & TH_PUSH) - changes |= TCP_PUSH_BIT; - /* - * Grab the cksum before we overwrite it below. Then update our - * state with this packet's header. - */ - deltaA = ntohs(th->th_sum); - BCOPY(ip, &cs->cs_ip, hlen); - - /* - * We want to use the original packet as our compressed packet. - * (cp - new_seq) is the number of bytes we need for compressed - * sequence numbers. In addition we need one byte for the change - * mask, one for the connection id and two for the tcp checksum. - * So, (cp - new_seq) + 4 bytes of header are needed. hlen is how - * many bytes of the original packet to toss so subtract the two to - * get the new packet size. - */ - deltaS = cp - new_seq; - cp = (u_char *)ip; - if (compress_cid == 0 || comp->last_xmit != cs->cs_id) { - comp->last_xmit = cs->cs_id; - hlen -= deltaS + 4; - cp += hlen; - *cp++ = changes | NEW_C; - *cp++ = cs->cs_id; - } else { - hlen -= deltaS + 3; - cp += hlen; - *cp++ = changes; - } - m->m_len -= hlen; - m->m_data += hlen; - *cp++ = deltaA >> 8; - *cp++ = deltaA; - BCOPY(new_seq, cp, deltaS); - INCR(sls_compressed) - return (TYPE_COMPRESSED_TCP); - - /* - * Update connection state cs & send uncompressed packet ('uncompressed' - * means a regular ip/tcp packet but with the 'conversation id' we hope - * to use on future compressed packets in the protocol field). - */ -uncompressed: - BCOPY(ip, &cs->cs_ip, hlen); - ip->ip_p = cs->cs_id; - comp->last_xmit = cs->cs_id; - return (TYPE_UNCOMPRESSED_TCP); -} - - -int -sl_uncompress_tcp(bufp, len, type, comp) - u_char **bufp; - int len; - u_int type; - struct slcompress *comp; -{ - u_char *hdr, *cp; - int hlen, vjlen; - - cp = bufp? *bufp: NULL; - vjlen = sl_uncompress_tcp_core(cp, len, len, type, comp, &hdr, &hlen); - if (vjlen < 0) - return (0); /* error */ - if (vjlen == 0) - return (len); /* was uncompressed already */ - - cp += vjlen; - len -= vjlen; - - /* - * At this point, cp points to the first byte of data in the - * packet. If we're not aligned on a 4-byte boundary, copy the - * data down so the ip & tcp headers will be aligned. Then back up - * cp by the tcp/ip header length to make room for the reconstructed - * header (we assume the packet we were handed has enough space to - * prepend 128 bytes of header). - */ - if ((intptr_t)cp & 3) { - if (len > 0) - (void) ovbcopy(cp, (caddr_t)((intptr_t)cp &~ 3), len); - cp = (u_char *)((intptr_t)cp &~ 3); - } - cp -= hlen; - len += hlen; - BCOPY(hdr, cp, hlen); - - *bufp = cp; - return (len); -} - -/* - * Uncompress a packet of total length total_len. The first buflen - * bytes are at buf; this must include the entire (compressed or - * uncompressed) TCP/IP header. This procedure returns the length - * of the VJ header, with a pointer to the uncompressed IP header - * in *hdrp and its length in *hlenp. - */ -int -sl_uncompress_tcp_core(buf, buflen, total_len, type, comp, hdrp, hlenp) - u_char *buf; - int buflen, total_len; - u_int type; - struct slcompress *comp; - u_char **hdrp; - u_int *hlenp; -{ - register u_char *cp; - register u_int hlen, changes; - register struct tcphdr *th; - register struct cstate *cs; - register struct ip *ip; - register u_int16_t *bp; - register u_int vjlen; - - switch (type) { - - case TYPE_UNCOMPRESSED_TCP: - ip = (struct ip *) buf; - if (ip->ip_p >= MAX_STATES) - goto bad; - cs = &comp->rstate[comp->last_recv = ip->ip_p]; - comp->flags &=~ SLF_TOSS; - ip->ip_p = IPPROTO_TCP; - /* - * Calculate the size of the TCP/IP header and make sure that - * we don't overflow the space we have available for it. - */ - hlen = ip->ip_hl << 2; - if (hlen + sizeof(struct tcphdr) > buflen) - goto bad; - hlen += ((struct tcphdr *)&((char *)ip)[hlen])->th_off << 2; - if (hlen > MAX_HDR || hlen > buflen) - goto bad; - BCOPY(ip, &cs->cs_ip, hlen); - cs->cs_hlen = hlen; - INCR(sls_uncompressedin) - *hdrp = (u_char *) &cs->cs_ip; - *hlenp = hlen; - return (0); - - default: - goto bad; - - case TYPE_COMPRESSED_TCP: - break; - } - /* We've got a compressed packet. */ - INCR(sls_compressedin) - cp = buf; - changes = *cp++; - if (changes & NEW_C) { - /* Make sure the state index is in range, then grab the state. - * If we have a good state index, clear the 'discard' flag. */ - if (*cp >= MAX_STATES) - goto bad; - - comp->flags &=~ SLF_TOSS; - comp->last_recv = *cp++; - } else { - /* this packet has an implicit state index. If we've - * had a line error since the last time we got an - * explicit state index, we have to toss the packet. */ - if (comp->flags & SLF_TOSS) { - INCR(sls_tossed) - return (-1); - } - } - cs = &comp->rstate[comp->last_recv]; - hlen = cs->cs_ip.ip_hl << 2; - th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen]; - th->th_sum = htons((*cp << 8) | cp[1]); - cp += 2; - if (changes & TCP_PUSH_BIT) - th->th_flags |= TH_PUSH; - else - th->th_flags &=~ TH_PUSH; - - switch (changes & SPECIALS_MASK) { - case SPECIAL_I: - { - register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen; - th->th_ack = htonl(ntohl(th->th_ack) + i); - th->th_seq = htonl(ntohl(th->th_seq) + i); - } - break; - - case SPECIAL_D: - th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len) - - cs->cs_hlen); - break; - - default: - if (changes & NEW_U) { - th->th_flags |= TH_URG; - DECODEU(th->th_urp) - } else - th->th_flags &=~ TH_URG; - if (changes & NEW_W) - DECODES(th->th_win) - if (changes & NEW_A) - DECODEL(th->th_ack) - if (changes & NEW_S) - DECODEL(th->th_seq) - break; - } - if (changes & NEW_I) { - DECODES(cs->cs_ip.ip_id) - } else - cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1); - - /* - * At this point, cp points to the first byte of data in the - * packet. Fill in the IP total length and update the IP - * header checksum. - */ - vjlen = cp - buf; - buflen -= vjlen; - if (buflen < 0) - /* we must have dropped some characters (crc should detect - * this but the old slip framing won't) */ - goto bad; - - total_len += cs->cs_hlen - vjlen; - cs->cs_ip.ip_len = htons(total_len); - - /* recompute the ip header checksum */ - bp = (u_int16_t *) &cs->cs_ip; - cs->cs_ip.ip_sum = 0; - for (changes = 0; hlen > 0; hlen -= 2) - changes += *bp++; - changes = (changes & 0xffff) + (changes >> 16); - changes = (changes & 0xffff) + (changes >> 16); - cs->cs_ip.ip_sum = ~ changes; - - *hdrp = (u_char *) &cs->cs_ip; - *hlenp = cs->cs_hlen; - return vjlen; - -bad: - comp->flags |= SLF_TOSS; - INCR(sls_errorin) - return (-1); -} diff --git a/bsd/net/slcompress.h b/bsd/net/slcompress.h deleted file mode 100644 index b784ad8db..000000000 --- a/bsd/net/slcompress.h +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Definitions for tcp compression routines. - * - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: - * - Initial distribution. - * $FreeBSD: src/sys/net/slcompress.h,v 1.14.2.1 2000/05/05 13:37:06 jlemon Exp $ - */ - -#ifndef _NET_SLCOMPRESS_H_ -#define _NET_SLCOMPRESS_H_ -#include - -#include - -#define MAX_STATES 16 /* must be > 2 and < 256 */ -#define MAX_HDR 128 - -/* - * Compressed packet format: - * - * The first octet contains the packet type (top 3 bits), TCP - * 'push' bit, and flags that indicate which of the 4 TCP sequence - * numbers have changed (bottom 5 bits). The next octet is a - * conversation number that associates a saved IP/TCP header with - * the compressed packet. The next two octets are the TCP checksum - * from the original datagram. The next 0 to 15 octets are - * sequence number changes, one change per bit set in the header - * (there may be no changes and there are two special cases where - * the receiver implicitly knows what changed -- see below). - * - * There are 5 numbers which can change (they are always inserted - * in the following order): TCP urgent pointer, window, - * acknowledgement, sequence number and IP ID. (The urgent pointer - * is different from the others in that its value is sent, not the - * change in value.) Since typical use of SLIP links is biased - * toward small packets (see comments on MTU/MSS below), changes - * use a variable length coding with one octet for numbers in the - * range 1 - 255 and 3 octets (0, MSB, LSB) for numbers in the - * range 256 - 65535 or 0. (If the change in sequence number or - * ack is more than 65535, an uncompressed packet is sent.) - */ - -/* - * Packet types (must not conflict with IP protocol version) - * - * The top nibble of the first octet is the packet type. There are - * three possible types: IP (not proto TCP or tcp with one of the - * control flags set); uncompressed TCP (a normal IP/TCP packet but - * with the 8-bit protocol field replaced by an 8-bit connection id -- - * this type of packet syncs the sender & receiver); and compressed - * TCP (described above). - * - * LSB of 4-bit field is TCP "PUSH" bit (a worthless anachronism) and - * is logically part of the 4-bit "changes" field that follows. Top - * three bits are actual packet type. For backward compatibility - * and in the interest of conserving bits, numbers are chosen so the - * IP protocol version number (4) which normally appears in this nibble - * means "IP packet". - */ - -/* packet types */ -#define TYPE_IP 0x40 -#define TYPE_UNCOMPRESSED_TCP 0x70 -#define TYPE_COMPRESSED_TCP 0x80 -#define TYPE_ERROR 0x00 - -/* Bits in first octet of compressed packet */ -#define NEW_C 0x40 /* flag bits for what changed in a packet */ -#define NEW_I 0x20 -#define NEW_S 0x08 -#define NEW_A 0x04 -#define NEW_W 0x02 -#define NEW_U 0x01 - -/* reserved, special-case values of above */ -#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */ -#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */ -#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U) - -#define TCP_PUSH_BIT 0x10 - - -/* - * "state" data for each active tcp conversation on the wire. This is - * basically a copy of the entire IP/TCP header from the last packet - * we saw from the conversation together with a small identifier - * the transmit & receive ends of the line use to locate saved header. - */ -struct cstate { - struct cstate *cs_next; /* next most recently used cstate (xmit only) */ - u_int16_t cs_hlen; /* size of hdr (receive only) */ - u_char cs_id; /* connection # associated with this state */ - u_char cs_filler; - union { - char csu_hdr[MAX_HDR]; - struct ip csu_ip; /* ip/tcp hdr from most recent packet */ - } slcs_u; -}; -#define cs_ip slcs_u.csu_ip -#define cs_hdr slcs_u.csu_hdr - -/* - * all the state data for one serial line (we need one of these - * per line). - */ -struct slcompress { - struct cstate *last_cs; /* most recently used tstate */ - u_char last_recv; /* last rcvd conn. id */ - u_char last_xmit; /* last sent conn. id */ - u_int16_t flags; -#ifndef SL_NO_STATS - int sls_packets; /* outbound packets */ - int sls_compressed; /* outbound compressed packets */ - int sls_searches; /* searches for connection state */ - int sls_misses; /* times couldn't find conn. state */ - int sls_uncompressedin; /* inbound uncompressed packets */ - int sls_compressedin; /* inbound compressed packets */ - int sls_errorin; /* inbound unknown type packets */ - int sls_tossed; /* inbound packets tossed because of error */ -#endif - struct cstate tstate[MAX_STATES]; /* xmit connection states */ - struct cstate rstate[MAX_STATES]; /* receive connection states */ -}; -/* flag values */ -#define SLF_TOSS 1 /* tossing rcvd frames because of input err */ - -#if !defined(KERNEL) || defined(__APPLE_API_PRIVATE) -void sl_compress_init __P((struct slcompress *, int)); -u_int sl_compress_tcp __P((struct mbuf *, - struct ip *, struct slcompress *, int)); -int sl_uncompress_tcp __P((u_char **, int, u_int, struct slcompress *)); -int sl_uncompress_tcp_core __P((u_char *, int, int, u_int, - struct slcompress *, u_char **, u_int *)); - -#endif /* !KERNEL || __APPLE_API_PRIVATE */ -#endif /* !_NET_SLCOMPRESS_H_ */ diff --git a/bsd/net/zlib.c b/bsd/net/zlib.c index 78130d5b8..a3d4c72ba 100644 --- a/bsd/net/zlib.c +++ b/bsd/net/zlib.c @@ -49,7 +49,7 @@ subject to change. Applications should only use zlib.h. */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ #ifndef _Z_UTIL_H #define _Z_UTIL_H @@ -295,7 +295,7 @@ void zcfree OF((voidpf opaque, voidpf ptr)); subject to change. Applications should only use zlib.h. */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ #ifndef _DEFLATE_H #define _DEFLATE_H @@ -655,7 +655,7 @@ void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, * */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ /* #include "deflate.h" */ @@ -1997,7 +1997,7 @@ local block_state deflate_slow(s, flush) * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ /* #define GEN_TREES_H */ @@ -2359,7 +2359,7 @@ local int tr_static_init( ush bl_count[MAX_BITS+1]; /* number of codes at each bit length for an optimal tree */ - if (static_init_done) return; + if (static_init_done) return Z_OK; /* allocate storage for static structures */ if (static_storage == Z_NULL) { @@ -2439,6 +2439,7 @@ local int tr_static_init( gen_trees_header(); # endif #endif /* defined(GEN_TREES_H) || !defined(STDC) */ + return Z_OK; } /* =========================================================================== @@ -5541,7 +5542,7 @@ z_streamp z; * For conditions of distribution and use, see copyright notice in zlib.h */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ /* #include "zutil.h" */ @@ -5771,7 +5772,7 @@ void zcfree (opaque, ptr) * For conditions of distribution and use, see copyright notice in zlib.h */ -/* @(#) $Id: zlib.c,v 1.9 2002/11/28 00:56:55 lindak Exp $ */ +/* @(#) $Id: zlib.c,v 1.10 2004/07/29 19:17:20 lindak Exp $ */ /* #include "zlib.h" */ diff --git a/bsd/net/zlib.h b/bsd/net/zlib.h index 688673ae5..5f90b2049 100644 --- a/bsd/net/zlib.h +++ b/bsd/net/zlib.h @@ -40,7 +40,7 @@ #define _ZLIB_H #include -#if !defined(KERNEL) || defined(__APPLE_API_PRIVATE) +#ifdef KERNEL_PRIVATE #if __cplusplus extern "C" { @@ -81,8 +81,10 @@ extern "C" { # define compress2 z_compress2 # define uncompress z_uncompress # define adler32 z_adler32 +#if 0 # define crc32 z_crc32 # define get_crc_table z_get_crc_table +#endif # define Byte z_Byte # define uInt z_uInt @@ -1126,6 +1128,7 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); if (adler != original_adler) error(); */ +#if 0 ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); /* Update a running crc with the bytes buf[0..len-1] and return the updated @@ -1141,6 +1144,7 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); } if (crc != original_crc) error(); */ +#endif /* various hacks, don't look :) */ @@ -1181,6 +1185,6 @@ ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); } #endif -#endif /* !KERNEL || __APPLE_API_PRIVATE */ +#endif KERNEL_PRIVATE #endif /* _ZLIB_H */ /* --- zlib.h */ diff --git a/bsd/netat/Makefile b/bsd/netat/Makefile index c31806cc6..9a37bee86 100644 --- a/bsd/netat/Makefile +++ b/bsd/netat/Makefile @@ -25,11 +25,17 @@ COMP_SUBDIRS = \ INST_SUBDIRS = \ -DATAFILES = appletalk.h atp.h asp.h at_pcb.h at_var.h aurp.h \ - debug.h ddp.h ep.h lap.h nbp.h pap.h sysglue.h zip.h \ - adsp.h adsp_internal.h \ - at_pat.h at_snmp.h at_aarp.h at_ddp_brt.h \ - routing_tables.h rtmp.h +DATAFILES = appletalk.h atp.h asp.h aurp.h \ + ddp.h ep.h lap.h nbp.h pap.h zip.h \ + adsp.h at_pat.h at_snmp.h at_aarp.h \ + rtmp.h + +PRIVATE_DATAFILES = \ + debug.h routing_tables.h sysglue.h at_var.h + +PRIVATE_KERNELFILES = \ + adsp_internal.h at_ddp_brt.h at_pcb.h + INSTALL_MI_LIST = ${DATAFILES} @@ -39,6 +45,10 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = netat +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + +INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netat/adsp.c b/bsd/netat/adsp.c index 02d01c963..b6c1cf6e6 100644 --- a/bsd/netat/adsp.c +++ b/bsd/netat/adsp.c @@ -133,150 +133,151 @@ adspWriteHandler(gref, mp) void *sp; switch(gbuf_type(mp)) { - case MSG_DATA: - if (gref->info == 0) { - gbuf_freem(mp); - return(STR_IGNORE); - } - /* - * Fill in the global stuff - */ - ap = (struct adspcmd *)gbuf_rptr(mp); - ap->gref = gref; - ap->ioc = 0; - ap->mp = mp; - sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); - switch(ap->csCode) { - case dspWrite: - if ((error = adspWrite(sp, ap))) - gbuf_freem(mp); - return(STR_IGNORE); - case dspAttention: - if ((error = adspAttention(sp, ap))) - gbuf_freem(mp); - return(STR_IGNORE); - } - case MSG_IOCTL: - if (gref->info == 0) { - adspioc_ack(EPROTO, mp, gref); - return(STR_IGNORE); - } - iocbp = (ioc_t *) gbuf_rptr(mp); - if (ADSP_IOCTL(iocbp->ioc_cmd)) { - iocbp->ioc_count = sizeof(*ap) - 1; - if (gbuf_cont(mp) == 0) { - adspioc_ack(EINVAL, mp, gref); - return(STR_IGNORE); - } - ap = (struct adspcmd *) gbuf_rptr(gbuf_cont(mp)); - ap->gref = gref; - ap->ioc = (caddr_t) mp; - ap->mp = gbuf_cont(mp); /* request head */ - ap->ioResult = 0; - - if ((gref->info == 0) && ((iocbp->ioc_cmd != ADSPOPEN) && - (iocbp->ioc_cmd != ADSPCLLISTEN))) { - ap->ioResult = errState; - - adspioc_ack(EINVAL, mp, gref); - return(STR_IGNORE); - } - } - sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); - switch(iocbp->ioc_cmd) { - case ADSPOPEN: - case ADSPCLLISTEN: - ap->socket = ((CCBPtr)sp)->localSocket; - flag = (adspMode(ap) == ocAccept) ? 1 : 0; - if (flag && ap->socket) { - if (adspDeassignSocket((CCBPtr)sp) >= 0) - ap->socket = 0; - } - if ((ap->socket == 0) && - ((ap->socket = - (at_socket)adspAssignSocket(gref, flag)) == 0)) { - adspioc_ack(EADDRNOTAVAIL, mp, gref); - return(STR_IGNORE); - } - ap->csCode = iocbp->ioc_cmd == ADSPOPEN ? dspInit : dspCLInit; - if ((error = adspInit(sp, ap)) == 0) { + case MSG_DATA: + if (gref->info == 0) { + gbuf_freem(mp); + return(STR_IGNORE); + } + /* + * Fill in the global stuff + */ + ap = (struct adspcmd *)gbuf_rptr(mp); + ap->gref = gref; + ap->ioc = 0; + ap->mp = mp; + sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); switch(ap->csCode) { - case dspInit: - /* and open the connection */ - ap->csCode = dspOpen; - error = adspOpen(sp, ap); - break; - case dspCLInit: - /* ADSPCLLISTEN */ - ap->csCode = dspCLListen; - error = adspCLListen(sp, ap); - break; + case dspWrite: + if ((error = adspWrite(sp, ap))) + gbuf_freem(mp); + return(STR_IGNORE); + case dspAttention: + if ((error = adspAttention(sp, ap))) + gbuf_freem(mp); + return(STR_IGNORE); } - } - if (error) - adspioc_ack(error, mp, gref); /* if this failed req complete */ - return(STR_IGNORE); - case ADSPCLOSE: - ap->csCode = dspClose; - if ((error = adspClose(sp, ap))) { - adspioc_ack(error, mp, gref); - break; - } - break; - case ADSPCLREMOVE: - ap->csCode = dspCLRemove; - error = adspClose(sp, ap); - adspioc_ack(error, mp, gref); - return(STR_IGNORE); - case ADSPCLDENY: - ap->csCode = dspCLDeny; - if ((error = adspCLDeny(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPSTATUS: - ap->csCode = dspStatus; - if ((error = adspStatus(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPREAD: - ap->csCode = dspRead; - if ((error = adspRead(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPATTENTION: - ap->csCode = dspAttention; - if ((error = adspReadAttention(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPOPTIONS: - ap->csCode = dspOptions; - if ((error = adspOptions(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPRESET: - ap->csCode = dspReset; - if ((error = adspReset(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPNEWCID: - ap->csCode = dspNewCID; - if ((error = adspNewCID(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - default: - return(STR_PUTNEXT); /* pass it on down */ - } - return(STR_IGNORE); - case MSG_PROTO: - default: - gbuf_freem(mp); + case MSG_IOCTL: + if (gref->info == 0) { + adspioc_ack(EPROTOTYPE, mp, gref); + return(STR_IGNORE); + } + iocbp = (ioc_t *) gbuf_rptr(mp); + if (ADSP_IOCTL(iocbp->ioc_cmd)) { + iocbp->ioc_count = sizeof(*ap) - 1; + if (gbuf_cont(mp) == 0) { + adspioc_ack(EINVAL, mp, gref); + return(STR_IGNORE); + } + ap = (struct adspcmd *) gbuf_rptr(gbuf_cont(mp)); + ap->gref = gref; + ap->ioc = (caddr_t) mp; + ap->mp = gbuf_cont(mp); /* request head */ + ap->ioResult = 0; + + if ((gref->info == 0) && ((iocbp->ioc_cmd != ADSPOPEN) && + (iocbp->ioc_cmd != ADSPCLLISTEN))) { + ap->ioResult = errState; + + adspioc_ack(EINVAL, mp, gref); + return(STR_IGNORE); + } + } else + return(STR_PUTNEXT); /* pass it on down */ + sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); + switch(iocbp->ioc_cmd) { + case ADSPOPEN: + case ADSPCLLISTEN: + ap->socket = ((CCBPtr)sp)->localSocket; + flag = (adspMode(ap) == ocAccept) ? 1 : 0; + if (flag && ap->socket) { + if (adspDeassignSocket((CCBPtr)sp) >= 0) + ap->socket = 0; + } + if ((ap->socket == 0) && + ((ap->socket = + (at_socket)adspAssignSocket(gref, flag)) == 0)) { + adspioc_ack(EADDRNOTAVAIL, mp, gref); + return(STR_IGNORE); + } + ap->csCode = iocbp->ioc_cmd == ADSPOPEN ? dspInit : dspCLInit; + if ((error = adspInit(sp, ap)) == 0) { + switch(ap->csCode) { + case dspInit: + /* and open the connection */ + ap->csCode = dspOpen; + error = adspOpen(sp, ap); + break; + case dspCLInit: + /* ADSPCLLISTEN */ + ap->csCode = dspCLListen; + error = adspCLListen(sp, ap); + break; + } + } + if (error) + adspioc_ack(error, mp, gref); /* if this failed req complete */ + return(STR_IGNORE); + case ADSPCLOSE: + ap->csCode = dspClose; + if ((error = adspClose(sp, ap))) { + adspioc_ack(error, mp, gref); + break; + } + break; + case ADSPCLREMOVE: + ap->csCode = dspCLRemove; + error = adspClose(sp, ap); + adspioc_ack(error, mp, gref); + return(STR_IGNORE); + case ADSPCLDENY: + ap->csCode = dspCLDeny; + if ((error = adspCLDeny(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPSTATUS: + ap->csCode = dspStatus; + if ((error = adspStatus(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPREAD: + ap->csCode = dspRead; + if ((error = adspRead(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPATTENTION: + ap->csCode = dspAttention; + if ((error = adspReadAttention(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPOPTIONS: + ap->csCode = dspOptions; + if ((error = adspOptions(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPRESET: + ap->csCode = dspReset; + if ((error = adspReset(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + case ADSPNEWCID: + ap->csCode = dspNewCID; + if ((error = adspNewCID(sp, ap))) { + adspioc_ack(error, mp, gref); + } + return(STR_IGNORE); + default: + return(STR_PUTNEXT); /* pass it on down */ + } + return(STR_IGNORE); + case MSG_PROTO: + default: + gbuf_freem(mp); } } diff --git a/bsd/netat/adsp.h b/bsd/netat/adsp.h index bf40ffbd5..315a7decc 100644 --- a/bsd/netat/adsp.h +++ b/bsd/netat/adsp.h @@ -31,6 +31,10 @@ #ifndef _NETAT_ADSP_H_ #define _NETAT_ADSP_H_ #include +#include + +#ifdef __APPLE_API_OBSOLETE + /* ADSP flags for read, write, and close routines */ #define ADSP_EOM 0x01 /* Sent or received EOM with data */ @@ -269,7 +273,7 @@ struct tpb { #endif */ -typedef long (*ProcPtr)(); +typedef long (*ProcPtr)(); /* XXX */ typedef ProcPtr *ProcHandle; typedef char *Ptr; typedef Ptr *Handle; @@ -663,8 +667,7 @@ typedef struct { #define ADSPGETSOCK ((AT_MID_ADSP<<8) | 239) #define ADSPGETPEER ((AT_MID_ADSP<<8) | 238) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* from h/adsp_adsp.h */ @@ -674,19 +677,22 @@ typedef struct { #define STR_PUTBACK 2 #define STR_QTIME (HZ >> 3) -extern int adspInit(); -extern int adspOpen(); -extern int adspCLListen(); -extern int adspClose(); -extern int adspCLDeny(); -extern int adspStatus(); -extern int adspRead(); -extern int adspWrite(); -extern int adspAttention(); -extern int adspOptions(); -extern int adspReset(); -extern int adspNewCID(); -extern int adspPacket(); +struct ccb; +#define CCBPtr struct ccb * +extern int adspInit(CCBPtr sp, struct adspcmd *ap); +extern int adspOpen(register CCBPtr sp, register struct adspcmd *pb); +extern int adspCLListen( register CCBPtr sp, register struct adspcmd *pb); +extern int adspClose(register CCBPtr sp, register struct adspcmd *pb); +extern int adspCLDeny(struct adspcmd *pb, CCBPtr sp); +extern int adspStatus(CCBPtr sp, register struct adspcmd *pb); +extern int adspRead(register CCBPtr sp, register struct adspcmd *pb); +extern int adspWrite(CCBPtr sp, struct adspcmd *pb); +extern int adspAttention(register struct adspcmd *pb, register CCBPtr sp); +extern int adspOptions(CCBPtr sp, struct adspcmd *pb); +extern int adspReset(CCBPtr sp, struct adspcmd *pb); +extern int adspNewCID(CCBPtr sp, struct adspcmd *pb); +extern int adspPacket(gref_t *gref, gbuf_t *mp); +#undef CCBPtr struct adsp_debug { @@ -701,6 +707,6 @@ struct adsp_debug { int ad_sendWdwSeq; }; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_ADSP_H_ */ diff --git a/bsd/netat/adsp_CLDeny.c b/bsd/netat/adsp_CLDeny.c index 9d6a9b7e7..a7e14c4b0 100644 --- a/bsd/netat/adsp_CLDeny.c +++ b/bsd/netat/adsp_CLDeny.c @@ -62,9 +62,7 @@ * errState not a connection listener * errAborted request aborted by a Remove call */ -int adspCLDeny(sp, pb) /* (DSPPBPtr pb) */ - struct adspcmd *pb; - CCBPtr sp; +int adspCLDeny(struct adspcmd *pb, CCBPtr sp) { gbuf_t *mp; ADSP_FRAMEPtr adspp; diff --git a/bsd/netat/adsp_Close.c b/bsd/netat/adsp_Close.c index 684c47204..179d60e96 100644 --- a/bsd/netat/adsp_Close.c +++ b/bsd/netat/adsp_Close.c @@ -222,7 +222,7 @@ int AbortIO(sp, err) /* * Complete all outstanding transactions. */ - total += CompleteQueue(&sp->sapb, err); /* Abort outstanding send attentions */ + total = CompleteQueue(&sp->sapb, err); /* Abort outstanding send attentions */ CompleteQueue(&sp->frpb, err); /* Abort outstanding forward resets */ if (sp->sbuf_mb) { /* clear the send queue */ diff --git a/bsd/netat/adsp_Control.c b/bsd/netat/adsp_Control.c index 69221b4fb..fe9d6803a 100644 --- a/bsd/netat/adsp_Control.c +++ b/bsd/netat/adsp_Control.c @@ -165,7 +165,7 @@ top: gbuf_wset(mp,DDPL_FRAME_LEN); /* leave room for DDP header */ if (sp->sendCtl) { - short mask; + short mask = 0; i = sp->sendCtl; /* get local copy bitmap of */ /* which ctl packets to send. */ diff --git a/bsd/netat/adsp_Timer.c b/bsd/netat/adsp_Timer.c index 94127cf21..d528e7e3a 100644 --- a/bsd/netat/adsp_Timer.c +++ b/bsd/netat/adsp_Timer.c @@ -179,9 +179,9 @@ send: void TimerTick_funnel(void *arg) { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); TimerTick(); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } static StopTimer; diff --git a/bsd/netat/adsp_attention.c b/bsd/netat/adsp_attention.c index 18ce61f65..8a0510ac4 100644 --- a/bsd/netat/adsp_attention.c +++ b/bsd/netat/adsp_attention.c @@ -70,9 +70,7 @@ * errAttention attention message too long * errAborted request aborted by Remove or Close call */ -int adspAttention(sp, pb) /* (DSPPBPtr pb) */ - register struct adspcmd *pb; - register CCBPtr sp; +int adspAttention(register struct adspcmd *pb, register CCBPtr sp) { int s; register gbuf_t *mp, *nmp; diff --git a/bsd/netat/adsp_internal.h b/bsd/netat/adsp_internal.h index 268cbe068..e5a48476d 100644 --- a/bsd/netat/adsp_internal.h +++ b/bsd/netat/adsp_internal.h @@ -21,12 +21,11 @@ */ #ifndef _NETAT_ADSP_INTERNAL_H_ #define _NETAT_ADSP_INTERNAL_H_ -#include #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef __APPLE_API_OBSOLETE +#ifdef KERNEL_PRIVATE /* from h/adsp_portab.h */ @@ -141,9 +140,8 @@ typedef struct TNetworkTransition { ProcPtr netValidProc; /* pointer to the network valid procedure */ } TNetworkTransition, *TPNetworkTransition; -typedef long (*NetworkTransitionProcPtr)(); - /* (TPNetworkTransition nettrans, - unsigned long thenet); */ +typedef long (*NetworkTransitionProcPtr)(TPNetworkTransition nettrans, + unsigned long thenet); /* * This is the connection control block */ @@ -309,34 +307,33 @@ typedef struct { /* fron h/adsp_supp.h */ -void CallUserRoutine(); /* (CCB FPTR sp); */ +void CallUserRoutine(CCBPtr sp); /* (CCB FPTR sp); */ /* * Add queue element to end of queue. Pass Address of ptr to * 1st element of queue +int qAddToEnd(struct qlink **qhead, struct qlink *qelem); */ -int qAddToEnd(); /* (void FPTR FPTR qhead, void FPTR qelem); */ + /* (void FPTR FPTR qhead, void FPTR qelem); */ /* * Hunt down a linked list of queue elements looking for an element with * 'data' at 'offset' bytes into the queue element. */ -void *qfind_b(); /* (void *qhead, word offset, word data); */ -void *qfind_w(); /* (void *qhead, word offset, word data); */ -void *qfind_p(); /* (void *qhead, word offset, void *ptr); */ -void *qfind_o(); /* (void *qhead, word offset, void *ptr); */ -void *qfind_m(); /* (void *qhead, void *match, - ProcPtr compare_fnx); */ +void *qfind_b(void *qhead, word offset, word data); +void *qfind_w(void *qhead, word offset, word data); +void *qfind_p(void *qhead, word offset, void *ptr); +void *qfind_o(void *qhead, word offset, void *ptr); +void *qfind_m(CCBPtr qhead, void *match, ProcPtr compare_fnx); /* * Routines to handle sorted timer queues */ -void InsertTimerElem(); /* (TimerElemPtr *qhead, TimerElemPtr t, - word val); */ -void RemoveTimerElem(); /* (TimerElemPtr *qhead, TimerElemPtr t); */ -void TimerQueueTick(); /* (TimerElemPtr *qhead);*/ +void InsertTimerElem(TimerElemPtr *qhead, TimerElemPtr t, int val); +void RemoveTimerElem(TimerElemPtr *qhead, TimerElemPtr t); +void TimerQueueTick(TimerElemPtr *qhead); /* from h/adsp_global.h */ @@ -355,7 +352,7 @@ extern GLOBAL adspGlobal; /* Address of ptr to list of ccb's */ #define AT_ADSP_STREAMS ((CCB **)&(adspGlobal.ccbList)) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_ADSP_INTERNAL_H_ */ diff --git a/bsd/netat/appletalk.h b/bsd/netat/appletalk.h index 7fbc4e38e..615164c37 100644 --- a/bsd/netat/appletalk.h +++ b/bsd/netat/appletalk.h @@ -39,6 +39,8 @@ #include #include +#ifdef __APPLE_API_OBSOLETE + /* Non-aligned types are used in packet headers. */ @@ -281,19 +283,18 @@ typedef struct at_state { #define AT_ST_ZT_CHANGED 0x0800 /* zone table changed (for SNMP) */ #define AT_ST_NBP_CHANGED 0x1000 /* if nbp table changed (for SNMP)*/ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern at_state_t at_state; /* global state of AT network */ #define ROUTING_MODE (at_state.flags & AT_ST_ROUTER) #define MULTIHOME_MODE (at_state.flags & AT_ST_MULTIHOME) #define MULTIPORT_MODE (ROUTING_MODE || MULTIHOME_MODE) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ /* defines originally from h/at_elap.h */ #define AT_ADDR 0 #define ET_ADDR 1 #define AT_ADDR_NO_LOOP 2 /* disables packets from looping back */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_APPLETALK_H_ */ diff --git a/bsd/netat/asp.h b/bsd/netat/asp.h index a18a74d85..fd7d85c24 100644 --- a/bsd/netat/asp.h +++ b/bsd/netat/asp.h @@ -32,6 +32,8 @@ #define _NETAT_ASP_H_ #include +#ifdef __APPLE_API_OBSOLETE + #define ASP_Version 0x100 #define ASPFUNC_CloseSess 1 @@ -125,8 +127,7 @@ union asp_primitives { asp_command_ind_t CommandInd; }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define ASPSTATE_Close 0 #define ASPSTATE_Idle 1 @@ -214,6 +215,6 @@ typedef struct asp_scb { atevent_t delay_event; } asp_scb_t; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_ASP_H_ */ diff --git a/bsd/netat/asp_proto.c b/bsd/netat/asp_proto.c index bcaef1ac3..265de3369 100644 --- a/bsd/netat/asp_proto.c +++ b/bsd/netat/asp_proto.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -81,7 +82,7 @@ void asp_init(); void asp_ack_reply(); void asp_nak_reply(); void asp_clock(); -void asp_clock_funnel(void *); +void asp_clock_locked(void *); int asp_open(); int asp_close(); int asp_wput(); @@ -97,13 +98,14 @@ StaticProc void asp_timout(); StaticProc void asp_untimout(); StaticProc void asp_hangup(); StaticProc void asp_send_tickle(); -StaticProc void asp_send_tickle_funnel(void *); +StaticProc void asp_send_tickle_locked(void *); StaticProc void asp_accept(); StaticProc int asp_send_req(); extern at_ifaddr_t *ifID_home; extern int atp_pidM[]; extern gref_t *atp_inputQ[]; +extern lck_mtx_t *atalk_mutex; gbuf_t *scb_resource_m = 0; unsigned char asp_inpC[256]; asp_scb_t *asp_scbQ[256]; @@ -265,7 +267,7 @@ asp_close(gref) */ scb->tmo_cnt = 0; asp_untimout(asp_hangup, scb); - untimeout(asp_send_tickle_funnel, (void *)scb); /* added for 2225395 */ + untimeout(asp_send_tickle_locked, (void *)scb); /* added for 2225395 */ /* * free the asp session control block @@ -493,7 +495,7 @@ int asp_wput(gref, m) case ASPIOC_GetLocEntity: if ((gbuf_cont(mioc) == 0) || (scb->atp_state == 0)) { - asp_iocnak(gref, mioc, EPROTO); + asp_iocnak(gref, mioc, EPROTOTYPE); return 0; } *(at_inet_t *)gbuf_rptr(gbuf_cont(mioc)) = scb->loc_addr; @@ -501,7 +503,7 @@ int asp_wput(gref, m) case ASPIOC_GetRemEntity: if ((gbuf_cont(mioc) == 0) || (scb->atp_state == 0)) { - asp_iocnak(gref, mioc, EPROTO); + asp_iocnak(gref, mioc, EPROTOTYPE); return 0; } *(at_inet_t *)gbuf_rptr(gbuf_cont(mioc)) = scb->rem_addr; @@ -509,7 +511,7 @@ int asp_wput(gref, m) case ASPIOC_GetSession: if ((mdata = gbuf_cont(mioc)) == 0) { - asp_iocnak(gref, mioc, EPROTO); + asp_iocnak(gref, mioc, EPROTOTYPE); return 0; } addr = (at_inet_t *)gbuf_rptr(mdata); @@ -518,11 +520,11 @@ int asp_wput(gref, m) server_scb = asp_scbQ[addr->socket]; /*### LD 10/28/97: changed to make sure we're not accessing a null server_scb */ if (server_scb == 0) { - asp_iocnak(gref, mioc, EPROTO); + asp_iocnak(gref, mioc, EPROTOTYPE); return 0; } if (server_scb->sess_ioc == 0) { - asp_iocnak(gref, mioc, EPROTO); + asp_iocnak(gref, mioc, EPROTOTYPE); return 0; } @@ -774,15 +776,15 @@ asp_send_req(gref, mioc, dest, retry, awp, xo, state, bitmap) } /* - * send tickle routine - funnelled version + * send tickle routine - locked version */ StaticProc void -asp_send_tickle_funnel(scb) +asp_send_tickle_locked(scb) void *scb; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); asp_send_tickle((asp_scb_t *)scb); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } @@ -810,7 +812,7 @@ asp_send_tickle(scb) dPrintf(D_M_ASP, D_L_WARNING, ("asp_send_tickle: ENOBUFS 0, loc=%d, rem=%d\n", scb->loc_addr.socket,scb->rem_addr.socket)); - timeout(asp_send_tickle_funnel, (void *)scb, 10); + timeout(asp_send_tickle_locked, (void *)scb, 10); return; } gbuf_wset(mioc,sizeof(ioc_t)); @@ -832,7 +834,7 @@ asp_send_tickle(scb) ("asp_send_tickle: ENOBUFS 1, loc=%d, rem=%d\n", scb->loc_addr.socket,scb->rem_addr.socket)); - timeout(asp_send_tickle_funnel, (void *)scb, 10); + timeout(asp_send_tickle_locked, (void *)scb, 10); return; } } @@ -893,14 +895,14 @@ asp_accept(scb, sess_scb, m) } /* asp_accept */ /* - * timer routine - funneled version + * timer routine - locked version */ -void asp_clock_funnel(arg) +void asp_clock_locked(arg) void *arg; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); asp_clock(arg); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* @@ -929,7 +931,7 @@ void asp_clock(arg) ATENABLE(s, asptmo_lock); if (++scb_tmo_cnt == 0) scb_tmo_cnt++; - timeout(asp_clock_funnel, (void *)arg, (1<sess_ioc) == 0) { scb->get_wait = 1; - err = tsleep(&scb->event, PSOCK | PCATCH, "aspgetmsg", 0); + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + err = msleep(&scb->event, atalk_mutex, PSOCK | PCATCH, "aspgetmsg", 0); if (err != 0) { scb->get_wait = 0; ATENABLE(s, scb->lock); @@ -2278,10 +2281,10 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, bcopy (datptr, &datbuf, sizeof(datbuf)); } else { /* called from user space */ - if ((err = copyin((caddr_t)ctlptr, + if ((err = copyin(CAST_USER_ADDR_T(ctlptr), (caddr_t)&ctlbuf, sizeof(ctlbuf))) != 0) goto l_err; - if ((err = copyin((caddr_t)datptr, + if ((err = copyin(CAST_USER_ADDR_T(datptr), (caddr_t)&datbuf, sizeof(datbuf))) != 0) goto l_err; } @@ -2320,10 +2323,10 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, } else { /* called from user space */ if ((err = copyout((caddr_t)gbuf_rptr(mproto), - (caddr_t)ctlbuf.buf, ctlbuf.len)) != 0) + CAST_USER_ADDR_T(ctlbuf.buf), ctlbuf.len)) != 0) goto l_err; if ((err = copyout((caddr_t)&ctlbuf, - (caddr_t)ctlptr, sizeof(ctlbuf))) != 0) + CAST_USER_ADDR_T(ctlptr), sizeof(ctlbuf))) != 0) goto l_err; } @@ -2341,7 +2344,7 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, if (mreply == NULL) { /* called from user space */ - if ((err = copyout((caddr_t)gbuf_rptr(mdata), (caddr_t)&datbuf.buf[sum], len)) != 0) + if ((err = copyout((caddr_t)gbuf_rptr(mdata), CAST_USER_ADDR_T(&datbuf.buf[sum]), len)) != 0) goto l_err; } sum += len; @@ -2353,7 +2356,7 @@ int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, bcopy (&datbuf, datptr, sizeof(datbuf)); } else { /* called from user space */ - if ((err = copyout((caddr_t)&datbuf, (caddr_t)datptr, sizeof(datbuf))) != 0) + if ((err = copyout((caddr_t)&datbuf, CAST_USER_ADDR_T(datptr), sizeof(datbuf))) != 0) goto l_err; } diff --git a/bsd/netat/at.c b/bsd/netat/at.c index 089fd3889..3909aa1d9 100644 --- a/bsd/netat/at.c +++ b/bsd/netat/at.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -124,7 +125,8 @@ static int set_zones(ifz) * ifp is 0 if not an interface-specific ioctl. */ -int at_control(so, cmd, data, ifp) +int +at_control(so, cmd, data, ifp) struct socket *so; u_long cmd; caddr_t data; @@ -137,15 +139,18 @@ int at_control(so, cmd, data, ifp) struct ifaddr *ifa; struct sockaddr_dl *sdl; - if (cmd == 0x2000ff99) { + if ((cmd & 0xffff) == 0xff99) { + u_long fixed_command; + char ioctl_buffer[32]; /* *** this is a temporary hack to get at_send_to_dev() to work with BSD-style sockets instead of the special purpose system calls, ATsocket() and ATioctl(). *** */ - if ((error = at_ioctl((struct atpcb *)so->so_pcb, cmd, data, 0))) { + fixed_command = _IOW(0, 0xff99, user_addr_t); + if ((error = at_ioctl((struct atpcb *)so->so_pcb, fixed_command, data, 0))) { if (((struct atpcb *)so->so_pcb)->proto != ATPROTO_LAP) { ((struct atpcb *)so->so_pcb)->proto = ATPROTO_LAP; - error = at_ioctl((struct atpcb *)so->so_pcb, cmd, data, 0); + error = at_ioctl((struct atpcb *)so->so_pcb, fixed_command, data , 0); } } return(error); @@ -216,7 +221,7 @@ int at_control(so, cmd, data, ifp) at_def_zone_t *defzonep = (at_def_zone_t *)data; /* check for root access */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = suser(kauth_cred_get(), 0)) return(EACCES); ifID = 0; @@ -326,7 +331,7 @@ int at_control(so, cmd, data, ifp) { at_nbp_reg_t *nbpP = (at_nbp_reg_t *)data; nve_entry_t nve; - int error; + int error2; if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) return(ENOTREADY); @@ -382,14 +387,14 @@ int at_control(so, cmd, data, ifp) * this tuple in the registry and return ok response. */ ATDISABLE(nve_lock, NVE_LOCK); - if ((error = nbp_new_nve_entry(&nve, ifID)) == 0) { + if ((error2 = nbp_new_nve_entry(&nve, ifID)) == 0) { nbpP->addr.net = ifID->ifThisNode.s_net; nbpP->addr.node = ifID->ifThisNode.s_node; nbpP->unique_nbp_id = nve.unique_nbp_id; } ATENABLE(nve_lock, NVE_LOCK); - return(error); + return(error2); break; } @@ -463,7 +468,7 @@ int at_control(so, cmd, data, ifp) at_router_params_t *rt = (at_router_params_t *)data; /* check for root access */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = suser(kauth_cred_get(), 0)) return(EACCES); /* when in routing/multihome mode the AIOCSETROUTER IOCTL @@ -503,7 +508,7 @@ int at_control(so, cmd, data, ifp) at_kern_err_t *keP = (at_kern_err_t *)data; /* check for root access */ - if (suser(p->p_ucred, &p->p_acflag)) + if (suser(kauth_cred_get(), 0)) return(EACCES); if (!(at_state.flags & AT_ST_STARTED)) @@ -534,7 +539,7 @@ int at_control(so, cmd, data, ifp) ret; /* check for root access */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = suser(kauth_cred_get(), 0)) return(EACCES); ret = ddp_shutdown(*count_only); @@ -561,7 +566,7 @@ int at_control(so, cmd, data, ifp) case SIOCSIFADDR: /* check for root access */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = suser(kauth_cred_get(), 0)) error = EACCES; else if (ifID) error = EEXIST; @@ -579,6 +584,7 @@ int at_control(so, cmd, data, ifp) ifID->aa_ifp = ifp; ifa = &ifID->aa_ifa; + ifnet_lock_exclusive(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if ((sdl = (struct sockaddr_dl *)ifa->ifa_addr) && (sdl->sdl_family == AF_LINK)) { @@ -599,14 +605,14 @@ int at_control(so, cmd, data, ifp) ifID->ifNodeAddress.sat_family = AF_APPLETALK; /* the address itself will be filled in when ifThisNode is set */ - s = splnet(); - TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); - splx(s); + if_attach_ifa(ifp, ifa); + ifnet_lock_done(ifp); switch (ifp->if_type) { case IFT_ETHER: - ether_attach_at(ifp, &ifID->at_dl_tag, - &ifID->aarp_dl_tag); + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: /* bonded ethernet */ + ether_attach_at(ifp); error = 0; ifID->cable_multicast_addr = etalk_multicast_addr; @@ -649,7 +655,7 @@ int at_control(so, cmd, data, ifp) deletion of interfaces *** */ case SIOCDIFADDR: /* check for root access */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = suser(kauth_cred_get(), 0)) error = EACCES; else if (!ifID) error = EINVAL; @@ -669,13 +675,11 @@ int at_control(so, cmd, data, ifp) /* let's make sure it's either -1 or a valid file descriptor */ if (cloned_fd != -1) { struct socket *cloned_so; - struct file *cloned_fp; - error = getsock(p->p_fd, cloned_fd, &cloned_fp); + error = file_socket(cloned_fd, &cloned_so); if (error){ splx(s); /* XXX */ break; } - cloned_so = (struct socket *)cloned_fp->f_data; clonedat_pcb = sotoatpcb(cloned_so); } else { clonedat_pcb = NULL; @@ -687,6 +691,7 @@ int at_control(so, cmd, data, ifp) at_pcb->ddp_flags = clonedat_pcb->ddp_flags; } splx(s); /* XXX */ + file_drop(cloned_fd); break; } diff --git a/bsd/netat/at_aarp.h b/bsd/netat/at_aarp.h index 80c8b88ec..61bd23db8 100644 --- a/bsd/netat/at_aarp.h +++ b/bsd/netat/at_aarp.h @@ -22,6 +22,12 @@ #ifndef _NETAT_AT_AARP_H_ #define _NETAT_AT_AARP_H_ #include +#ifdef KERNEL_PRIVATE +#include +#endif KERNEL_PRIVATE + +#ifdef __APPLE_API_OBSOLETE + /* * Copyright (c) 1988, 1989 Apple Computer, Inc. */ @@ -90,6 +96,8 @@ typedef struct { /* Errors returned by AARP routines */ #define AARP_ERR_NOT_OURS 1 /* not our appletalk address */ +#ifdef KERNEL_PRIVATE + /*************************************************/ /* Declarations for AARP Address Map Table (AMT) */ /*************************************************/ @@ -174,13 +182,10 @@ typedef struct { ) ? 1 : 0 \ ) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE - int aarp_chk_addr(at_ddp_t *, at_ifaddr_t *); int aarp_rcv_pkt(aarp_pkt_t *, at_ifaddr_t *); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AT_AARP_H_ */ diff --git a/bsd/netat/at_config.h b/bsd/netat/at_config.h index b9f6abef3..0955e7457 100644 --- a/bsd/netat/at_config.h +++ b/bsd/netat/at_config.h @@ -27,6 +27,8 @@ #define _NETAT_AT_CONFIG_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* originally from if_cnt.h * * defines for if_stat struct. @@ -73,4 +75,5 @@ typedef struct if_zone { } if_zone_t; +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AT_CONFIG_H_ */ diff --git a/bsd/netat/at_ddp_brt.h b/bsd/netat/at_ddp_brt.h index 0b40c9893..aab59d573 100644 --- a/bsd/netat/at_ddp_brt.h +++ b/bsd/netat/at_ddp_brt.h @@ -27,7 +27,8 @@ #ifndef _NETAT_AT_DDP_BRT_H_ #define _NETAT_AT_DDP_BRT_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE +#ifdef __APPLE_API_OBSOLETE typedef struct { int age_flag; @@ -80,6 +81,7 @@ typedef struct { /* Best Router Cache */ extern ddp_brt_t at_ddp_brt[BRTSIZE]; -#endif /* __APPLE_API_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ +#endif /* KERNEL_PRIVATE */ #endif /* _NETAT_AT_DDP_BRT_H_ */ diff --git a/bsd/netat/at_pat.h b/bsd/netat/at_pat.h index 107b3857d..99dc7ee29 100644 --- a/bsd/netat/at_pat.h +++ b/bsd/netat/at_pat.h @@ -28,6 +28,8 @@ #define _NETAT_AT_PAT_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* This is header for the PAT module. This contains a table of pointers that * should get initialized with the BNET stuff and the ethernet driver. The * number of interfaces supported should be communicated. Should include @@ -62,4 +64,5 @@ typedef struct { ((*((unsigned long *)(a1)) == *((unsigned long *)(a2))) && \ (a1[4] == a2[4]) \ ) +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AT_PAT_H_ */ diff --git a/bsd/netat/at_pcb.c b/bsd/netat/at_pcb.c index 7c120650e..09b6801e2 100644 --- a/bsd/netat/at_pcb.c +++ b/bsd/netat/at_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -130,9 +130,11 @@ int at_pcballoc(so, head) pcb->atpcb_head = head; pcb->atpcb_socket = so; + atalk_lock(); /* makes sure the list is locked while inserting atpcb */ if (head) insque((queue_t)pcb, (queue_t)head); so->so_pcb = (caddr_t)pcb; + atalk_unlock(); return (0); } @@ -149,9 +151,10 @@ int at_pcbdetach(pcb) } so->so_pcb = 0; + so->so_flags |= SOF_PCBCLEARING; if ((pcb->atpcb_next) && (pcb->atpcb_prev)) remque((queue_t)pcb); - zfree(atpcb_zone, (vm_offset_t)pcb); + zfree(atpcb_zone, pcb); sofree(so); return(0); } diff --git a/bsd/netat/at_pcb.h b/bsd/netat/at_pcb.h index 9a4d4297a..6d9dfb28b 100644 --- a/bsd/netat/at_pcb.h +++ b/bsd/netat/at_pcb.h @@ -60,7 +60,8 @@ /* at_pcb.h */ #include -#ifdef __APPLE_API_PRIVATE +#ifdef __APPLE_API_OBSOLETE +#ifdef KERNEL_PRIVATE /* * Common structure pcb for internet protocol implementation. * Here are stored pointers to local and foreign host table @@ -68,6 +69,8 @@ * up (to a socket structure) and down (to a protocol-specific) * control block. */ +struct atpcb; +typedef struct atpcb gref_t; struct atpcb { struct atpcb *atpcb_next, /* pointers to other pcb's */ *atpcb_prev, @@ -96,14 +99,13 @@ struct atpcb { atlock_t lock; atevent_t event; atevent_t iocevent; - int (*writeable)(); - int (*readable)(); + int (*writeable)(gref_t *gref); + int (*readable)(gref_t *gref); struct selinfo si; /* BSD 4.4 selinfo structure for selrecord/selwakeup */ }; #define sotoatpcb(so)((struct atpcb *)(so)->so_pcb) -#endif /* __APPLE_API_PRIVATE */ /* ddp_flags */ #define DDPFLG_CHKSUM 0x01 /* DDP checksums to be used on this connection */ @@ -112,12 +114,13 @@ struct atpcb { #define DDPFLG_HDRINCL 0x08 /* user supplies entire DDP header */ #define DDPFLG_STRIPHDR 0x200 /* drop DDP header on receive (raw) */ -#ifdef __APPLE_API_PRIVATE -#ifdef KERNEL -typedef struct atpcb gref_t; +int at_pcballoc(struct socket *, struct atpcb *); +int at_pcbdetach(struct atpcb *); +int at_pcbbind(struct atpcb *, struct sockaddr *); + +int atalk_getref(struct fileproc *, int , gref_t ** , struct proc *, int); +int atalk_getref_locked(struct fileproc *, int , gref_t ** , struct proc *, int); + -int at_pcballoc __P((struct socket *, struct atpcb *)); -int at_pcbdetach __P((struct atpcb *)); -int at_pcbbind __P((struct atpcb *, struct sockaddr *)); -#endif /* KERNEL */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/netat/at_proto.c b/bsd/netat/at_proto.c index 3a8cb6055..d0e0934e9 100644 --- a/bsd/netat/at_proto.c +++ b/bsd/netat/at_proto.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -45,7 +46,6 @@ #include #include -struct domain atalkdomain; extern int ddp_pru_abort(struct socket *so); @@ -71,6 +71,7 @@ extern int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, extern int ddp_pru_shutdown(struct socket *so); extern int ddp_pru_sockaddr(struct socket *so, struct sockaddr **nam); +void atalk_dominit(); /* * Dummy usrreqs struct created by Ted for FreeBSD 3.x integration. @@ -81,24 +82,64 @@ struct pr_usrreqs ddp_usrreqs = { ddp_pru_connect, pru_connect2_notsupp, ddp_pru_control, ddp_pru_detach, ddp_pru_disconnect, pru_listen_notsupp, ddp_pru_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, ddp_pru_send, pru_sense_null, ddp_pru_shutdown, - ddp_pru_sockaddr, sosend, soreceive, sopoll + ddp_pru_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; +struct domain atalkdomain; struct protosw atalksw[] = { { SOCK_RAW, &atalkdomain, /*protocol*/ 0, PR_ATOMIC|PR_ADDR, /*input*/ 0, /*output*/ 0, /*clinput*/ 0, ddp_ctloutput, /*ousrreq*/ 0, ddp_init, /*fastto*/ 0, /*slowto*/ 0, /*drain*/ 0, - /*sysctl*/ 0, &ddp_usrreqs + /*sysctl*/ 0, &ddp_usrreqs, + 0, 0, 0 } }; struct domain atalkdomain = -{ AF_APPLETALK, "appletalk", 0, 0, 0, +{ AF_APPLETALK, "appletalk", atalk_dominit, 0, 0, atalksw, 0, 0, 0, 0, DDP_X_HDR_SIZE, 0 }; +struct domain * atalkdom = &atalkdomain; +lck_mtx_t *atalk_mutex = NULL; + SYSCTL_NODE(_net, PF_APPLETALK, appletalk, CTLFLAG_RW, 0, "AppleTalk Family"); +void +atalk_dominit() +{ + atalk_mutex = atalkdom->dom_mtx; +} + +void +atalk_lock() +{ + int error = 0, lr, lr_saved; +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(atalkdom->dom_mtx); +} + +void +atalk_unlock() +{ + int error = 0, lr, lr_saved; +#ifdef __ppc__ + __asm__ volatile("mflr %0" : "=r" (lr)); + lr_saved = lr; +#endif + lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(atalkdom->dom_mtx); + +} + + + + + diff --git a/bsd/netat/at_snmp.h b/bsd/netat/at_snmp.h index 476e6d481..3ec477cac 100644 --- a/bsd/netat/at_snmp.h +++ b/bsd/netat/at_snmp.h @@ -23,6 +23,8 @@ #define _NETAT_AT_SNMP_H_ #include +#ifdef __APPLE_API_OBSOLETE + #define MAX_PHYS_ADDR_SIZE 6 /* maximum physical addr size */ #define MAX_IFS 25 /* max # interfaces */ #define IF_NAME_SIZE 6 /* max name length of I/F name */ @@ -212,4 +214,5 @@ typedef struct snmpStats { #define SNMP_TYPE(var,type) ((var & SNMP_OBJ_TYPE_MASK) == type) +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AT_SNMP_H_ */ diff --git a/bsd/netat/at_var.h b/bsd/netat/at_var.h index bd8a4f82e..9eb7c5795 100644 --- a/bsd/netat/at_var.h +++ b/bsd/netat/at_var.h @@ -23,8 +23,11 @@ * Copyright (c) 1998 Apple Computer, Inc. */ +#ifndef _NETAT_AT_VAR_H_ +#define _NETAT_AT_VAR_H_ + #include -#ifdef __APPLE_API_PRIVATE +#ifdef __APPLE_API_OBSOLETE #include /* at_var.h */ @@ -37,20 +40,17 @@ #define MCAST_TRACK_DELETE 2 #define MCAST_TRACK_CHECK 3 -#define ETHERNET_ADDR_LEN 6 -#define IFNAMESIZ 16 - /* maximum number of I/F's allowed */ #define IF_TOTAL_MAX 17 /* max count of any combination of I/F's */ /* 17 == (1+(4*4)); 9 and 13 would also be reasonable values */ #define FDDI_OR_TOKENRING(i) ((i == IFT_FDDI) || (i == IFT_ISO88025)) - +#define ETHERNET_ADDR_LEN 6 +#define IFNAMESIZ 16 typedef struct etalk_addr { u_char etalk_addr_octet[ETHERNET_ADDR_LEN]; } etalk_addr_t; - typedef char if_name_t[IFNAMESIZ]; typedef struct at_ifname_list { if_name_t at_if[IF_TOTAL_MAX]; @@ -118,6 +118,7 @@ typedef struct { short router_mix; } at_router_params_t; + typedef struct at_kern_err { int error; /* kernel error # (KE_xxx) */ int port1; @@ -143,6 +144,7 @@ typedef struct at_kern_err { #define KE_RTMP_OVERFLOW 10 #define KE_ZIP_OVERFLOW 11 +#ifdef KERNEL_PRIVATE /* * Interface address, AppleTalk version. One of these structures * is allocated for each AppleTalk address on an interface. @@ -181,6 +183,7 @@ typedef struct at_ifaddr { /* for use by ZIP */ u_char ifNumRetries; + u_char ifGNIScheduled; /* to keep getnetinfo from being scheduled more than once */ at_nvestr_t ifZoneName; /* Added for routing support */ @@ -213,6 +216,7 @@ typedef struct at_ifaddr { middle of an elap_online operation */ } at_ifaddr_t; +#endif /* KERNEL_PRIVATE */ #define LAP_OFFLINE 0 /* LAP_OFFLINE MUST be 0 */ #define LAP_ONLINE 1 @@ -269,16 +273,16 @@ typedef struct at_ifaddr { #define ELAP_CFG_HOME 0x02 /* designate home port (one allowed) */ #define ELAP_CFG_SEED 0x08 /* set if it's a seed port */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE extern TAILQ_HEAD(at_ifQueueHd, at_ifaddr) at_ifQueueHd; -int at_control __P((struct socket *, u_long, caddr_t, struct ifnet *)); -int ddp_usrreq __P((struct socket *, int, struct mbuf *, struct mbuf *, - struct mbuf *)); -int ddp_ctloutput __P((struct socket *, struct sockopt *)); -void ddp_init __P((void));; -void ddp_slowtimo __P((void)); -#endif +int at_control(struct socket *, u_long, caddr_t, struct ifnet *); +int ddp_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, + struct mbuf *); +int ddp_ctloutput(struct socket *, struct sockopt *); +void ddp_init(void);; +void ddp_slowtimo(void); +#endif /* KERNEL_PRIVATE */ /* * Define AppleTalk event subclass and specific AppleTalk events. @@ -302,7 +306,13 @@ struct kev_atalk_data { } node_data; }; +#ifdef KERNEL_PRIVATE + void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *address, at_nvestr_t *zone); void aarp_sched_probe(void *); +void atalk_lock(); +void atalk_unlock(); -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ +#endif /* _NETAT_AT_VAR_H_ */ diff --git a/bsd/netat/atp.h b/bsd/netat/atp.h index cc590c659..421c265f0 100644 --- a/bsd/netat/atp.h +++ b/bsd/netat/atp.h @@ -48,6 +48,8 @@ #define _NETAT_ATP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* ATP function codes */ #define ATP_CMD_TREQ 0x01 /* TRequest packet */ @@ -184,8 +186,7 @@ typedef struct { #define ATP_SENDRESPONSE 2 #define ATP_GETREQUEST 3 -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* @@ -456,6 +457,6 @@ void atp_timout(void (*func)(), struct atp_trans *, int); void atp_untimout(void (*func)(), struct atp_trans *); int atp_tid(struct atp_state *); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_ATP_H_ */ diff --git a/bsd/netat/atp_misc.c b/bsd/netat/atp_misc.c index f8d34a439..91fecf154 100644 --- a/bsd/netat/atp_misc.c +++ b/bsd/netat/atp_misc.c @@ -277,13 +277,15 @@ void atp_rcb_timer() register struct atp_rcb *next_rcbp; extern struct atp_rcb_qhead atp_need_rel; extern struct atp_trans *trp_tmo_rcb; + struct timeval timenow; l_again: ATDISABLE(s, atpgen_lock); + getmicrouptime(&timenow); for (rcbp = atp_need_rel.head; rcbp; rcbp = next_rcbp) { next_rcbp = rcbp->rc_tlist.next; - if (abs(time.tv_sec - rcbp->rc_timestamp) > 30) { + if (abs(timenow.tv_sec - rcbp->rc_timestamp) > 30) { ATENABLE(s, atpgen_lock); atp_rcb_free(rcbp); goto l_again; diff --git a/bsd/netat/atp_open.c b/bsd/netat/atp_open.c index 5271592ab..8f07c0332 100644 --- a/bsd/netat/atp_open.c +++ b/bsd/netat/atp_open.c @@ -140,7 +140,7 @@ int atp_open(gref, flag) if (atp_rcb_data == NULL) { if (kmem_alloc(kernel_map, &temp, sizeof(struct atp_rcb) * NATP_RCB) != KERN_SUCCESS) return(ENOMEM); - if (atp_rcb_data == NULL) { /* in case we lost funnel while allocating */ + if (atp_rcb_data == NULL) { bzero((caddr_t)temp, sizeof(struct atp_rcb) * NATP_RCB); atp_rcb_data = (struct atp_rcb*)temp; for (i = 0; i < NATP_RCB; i++) { diff --git a/bsd/netat/atp_read.c b/bsd/netat/atp_read.c index 92f0ac6cd..25690a3c6 100644 --- a/bsd/netat/atp_read.c +++ b/bsd/netat/atp_read.c @@ -50,7 +50,7 @@ static void atp_trans_complete(); void atp_x_done(); -void atp_x_done_funnel(void *); +void atp_x_done_locked(void *); extern void atp_req_timeout(); /* @@ -63,9 +63,8 @@ void atp_treq_event(void *arg) register gref_t *gref = (gref_t *)arg; register gbuf_t *m; register struct atp_state *atp; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); atp = (struct atp_state *)gref->info; if (atp->dflag) atp = (struct atp_state *)atp->atp_msgq; @@ -86,7 +85,7 @@ void atp_treq_event(void *arg) if (m == 0) timeout(atp_treq_event, gref, 10); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } void atp_rput(gref, m) @@ -97,6 +96,7 @@ gbuf_t *m; register struct atp_state *atp; register int s, s_gen; gbuf_t *m_asp = NULL; + struct timeval timenow; atp = (struct atp_state *)gref->info; if (atp->dflag) @@ -399,9 +399,10 @@ gbuf_t *m; * update the bitmap and resend * the replies */ + getmicrouptime(&timenow); ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp) { - rcbp->rc_timestamp = time.tv_sec; + rcbp->rc_timestamp = timenow.tv_sec; if (rcbp->rc_timestamp == 0) rcbp->rc_timestamp = 1; } @@ -455,12 +456,12 @@ gbuf_t *m; } /* atp_rput */ void -atp_x_done_funnel(trp) +atp_x_done_locked(trp) void *trp; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); atp_x_done((struct atp_trans *)trp); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } @@ -491,7 +492,7 @@ register struct atp_trans *trp; atp = trp->tr_queue; trp->tr_state = TRANS_RELEASE; - timeout(atp_x_done_funnel, trp, 10); + timeout(atp_x_done_locked, trp, 10); } } } diff --git a/bsd/netat/atp_write.c b/bsd/netat/atp_write.c index 668fee473..865962ef6 100644 --- a/bsd/netat/atp_write.c +++ b/bsd/netat/atp_write.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -67,7 +68,7 @@ static int loop_cnt; /* for debugging loops */ static void atp_pack_bdsp(struct atp_trans *, struct atpBDS *); static int atp_unpack_bdsp(struct atp_state *, gbuf_t *, struct atp_rcb *, int, int); -void atp_trp_clock(), asp_clock(), asp_clock_funnel(), atp_trp_clock_funnel();; +void atp_trp_clock(), asp_clock(), asp_clock_locked(), atp_trp_clock_locked();; extern struct atp_rcb_qhead atp_need_rel; extern int atp_inited; @@ -82,6 +83,7 @@ extern gbuf_t *atp_resource_m; extern gref_t *atp_inputQ[]; extern int atp_pidM[]; extern at_ifaddr_t *ifID_home; +extern lck_mtx_t * atalk_mutex; static struct atp_trans *trp_tmo_list; struct atp_trans *trp_tmo_rcb; @@ -104,8 +106,8 @@ void atp_link() void atp_unlink() { - untimeout(asp_clock_funnel, (void *)&atp_inited); - untimeout(atp_trp_clock_funnel, (void *)&atp_inited); + untimeout(asp_clock_locked, (void *)&atp_inited); + untimeout(atp_trp_clock_locked, (void *)&atp_inited); atp_untimout(atp_rcb_timer, trp_tmo_rcb); trp_tmo_list = 0; @@ -464,6 +466,7 @@ void atp_send_replies(atp, rcbp) struct ddp_atp { char ddp_atp_hdr[TOTAL_ATP_HDR_SIZE]; }; + struct timeval timenow; ATDISABLE(s, atp->atp_lock); if (rcbp->rc_queue != atp) { @@ -501,6 +504,8 @@ void atp_send_replies(atp, rcbp) offset = 0; if (m0) space = gbuf_msgsize(m0); + else + space = 0; for (i = 0; i < cnt; i++) { if (rcbp->rc_snd[i] == 0) { if ((len = UAS_VALUE(bdsp->bdsBuffSz))) { @@ -583,9 +588,10 @@ nothing_to_send: * resources. */ if (rcbp->rc_xo && rcbp->rc_state != RCB_RELEASED) { + getmicrouptime(&timenow); ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp == 0) { - rcbp->rc_timestamp = time.tv_sec; + rcbp->rc_timestamp = timenow.tv_sec; if (rcbp->rc_timestamp == 0) rcbp->rc_timestamp = 1; ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); @@ -638,7 +644,7 @@ atp_pack_bdsp(trp, bdsp) if (len > bufsize) len = bufsize; copyout((caddr_t)gbuf_rptr(m), - (caddr_t)&buf[tmp], + CAST_USER_ADDR_T(&buf[tmp]), len); bufsize -= len; tmp += len; @@ -687,6 +693,7 @@ atp_unpack_bdsp(atp, m, rcbp, cnt, wait) gbuf_t *rc_xmt[ATP_TRESP_MAX]; unsigned char *m0_rptr, *m0_wptr; int err, offset, space; + struct timeval timenow; /* * get the user data structure pointer @@ -821,9 +828,10 @@ atp_unpack_bdsp(atp, m, rcbp, cnt, wait) */ l_send: if (rcbp->rc_xo) { + getmicrouptime(&timenow); ATDISABLE(s_gen, atpgen_lock); if (rcbp->rc_timestamp == 0) { - if ((rcbp->rc_timestamp = time.tv_sec) == 0) + if ((rcbp->rc_timestamp = timenow.tv_sec) == 0) rcbp->rc_timestamp = 1; ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); } @@ -1118,12 +1126,12 @@ atp_untimout(func, trp) } void -atp_trp_clock_funnel(arg) +atp_trp_clock_locked(arg) void *arg; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); atp_trp_clock(arg); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } void @@ -1149,7 +1157,7 @@ atp_trp_clock(arg) } ATENABLE(s, atptmo_lock); - timeout(atp_trp_clock_funnel, (void *)arg, (1<<5)); + timeout(atp_trp_clock_locked, (void *)arg, (1<<5)); } void @@ -1262,16 +1270,15 @@ void atp_retry_req(arg) { gbuf_t *m = (gbuf_t *)arg; gref_t *gref; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); gref = (gref_t *)((ioc_t *)gbuf_rptr(m))->ioc_private; if (gref->info) { ((asp_scb_t *)gref->info)->stat_msg = 0; atp_send_req(gref, m); } - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } void atp_send_rsp(gref, m, wait) @@ -1372,7 +1379,7 @@ int asp_pack_bdsp(trp, xm) gbuf_rinc(m, ATP_HDR_SIZE); if (UAL_VALUE(bdsp->bdsBuffAddr)) { - short tmp; + short tmp = 0; /* user expects data back */ m = gbuf_strip(m); @@ -1442,24 +1449,31 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) gbuf_t *m2, *m, *mioc; char bds[atpBDSsize]; - if ((*err = atalk_getref(0, fd, &gref, proc)) != 0) + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) return -1; if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) || (atp->atp_flags & ATP_CLOSING)) { dPrintf(D_M_ATP, D_L_ERROR, ("ATPsndreq: stale handle=0x%x, pid=%d\n", (u_int) gref, gref->pid)); - + file_drop(fd); *err = EINVAL; return -1; } + while ((mioc = gbuf_alloc(sizeof(ioc_t), PRI_MED)) == 0) { + struct timespec ts; + /* the vaue of 10n terms of hz is 100ms */ + ts.tv_sec = 0; + ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; + ATDISABLE(s, atp->atp_delay_lock); - rc = tsleep(&atp->atp_delay_event, PSOCK | PCATCH, "atpmioc", 10); + rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpmioc", &ts); ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { *err = rc; + file_drop(fd); return -1; } @@ -1467,21 +1481,28 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) gbuf_wset(mioc,sizeof(ioc_t)); len -= atpBDSsize; while ((m2 = gbuf_alloc(len, PRI_MED)) == 0) { + struct timespec ts; + /* the vaue of 10n terms of hz is 100ms */ + ts.tv_sec = 0; + ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; + ATDISABLE(s, atp->atp_delay_lock); - rc = tsleep(&atp->atp_delay_event, PSOCK | PCATCH, "atpm2", 10); + rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpm2", &ts); ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { gbuf_freeb(mioc); + file_drop(fd); *err = rc; return -1; } } gbuf_wset(m2, len); gbuf_cont(mioc) = m2; - if (((*err = copyin((caddr_t)buf, (caddr_t)bds, atpBDSsize)) != 0) - || ((*err = copyin((caddr_t)&buf[atpBDSsize], + if (((*err = copyin(CAST_USER_ADDR_T(buf), (caddr_t)bds, atpBDSsize)) != 0) + || ((*err = copyin(CAST_USER_ADDR_T(&buf[atpBDSsize]), (caddr_t)gbuf_rptr(m2), len)) != 0)) { gbuf_freem(mioc); + file_drop(fd); return -1; } gbuf_set_type(mioc, MSG_IOCTL); @@ -1503,11 +1524,17 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) * allocate and set up the transaction record */ while ((trp = atp_trans_alloc(atp)) == 0) { + struct timespec ts; + /* the vaue of 10n terms of hz is 100ms */ + ts.tv_sec = 0; + ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; + ATDISABLE(s, atp->atp_delay_lock); - rc = tsleep(&atp->atp_delay_event, PSOCK | PCATCH, "atptrp", 10); + rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atptrp", &ts); ATENABLE(s, atp->atp_delay_lock); if (rc != 0) { gbuf_freem(mioc); + file_drop(fd); *err = rc; return -1; } @@ -1570,8 +1597,10 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) if (m) DDP_OUTPUT(m); - if (nowait) + if (nowait) { + file_drop(fd); return (int)tid; + } /* * wait for the transaction to complete @@ -1580,10 +1609,11 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) while ((trp->tr_state != TRANS_DONE) && (trp->tr_state != TRANS_FAILED) && (trp->tr_state != TRANS_ABORTING)) { trp->tr_rsp_wait = 1; - rc = tsleep(&trp->tr_event, PSOCK | PCATCH, "atpsndreq", 0); + rc = msleep(&trp->tr_event, atalk_mutex, PSOCK | PCATCH, "atpsndreq", 0); if (rc != 0) { trp->tr_rsp_wait = 0; ATENABLE(s, trp->tr_lock); + file_drop(fd); *err = rc; return -1; } @@ -1597,6 +1627,7 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) * transaction timed out, return error */ atp_free(trp); + file_drop(fd); *err = ETIMEDOUT; return -1; } @@ -1609,9 +1640,10 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) /* * copyout the result info */ - copyout((caddr_t)bds, (caddr_t)buf, atpBDSsize); + copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize); atp_free(trp); + file_drop(fd); return (int)tid; } /* _ATPsndreq */ @@ -1646,7 +1678,7 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) int bds_cnt, count, len; caddr_t dataptr; - if ((*err = atalk_getref(0, fd, &gref, proc)) != 0) + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) return -1; if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) @@ -1654,6 +1686,7 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) dPrintf(D_M_ATP, D_L_ERROR, ("ATPsndrsp: stale handle=0x%x, pid=%d\n", (u_int) gref, gref->pid)); + file_drop(fd); *err = EINVAL; return -1; } @@ -1663,10 +1696,12 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) */ if ((m = gbuf_alloc_wait(resplen, TRUE)) == 0) { *err = ENOMEM; + file_drop(fd); return -1; } - if ((*err = copyin((caddr_t)respbuff, (caddr_t)gbuf_rptr(m), resplen)) != 0) { + if ((*err = copyin(CAST_USER_ADDR_T(respbuff), (caddr_t)gbuf_rptr(m), resplen)) != 0) { gbuf_freeb(m); + file_drop(fd); return -1; } gbuf_wset(m,resplen); @@ -1683,6 +1718,7 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) if (bds_cnt > ATP_TRESP_MAX) { gbuf_freem(m); *err = EINVAL; + file_drop(fd); return -1; } @@ -1692,12 +1728,14 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) if (size > datalen) { gbuf_freem(m); *err = EINVAL; + file_drop(fd); return -1; } /* get the first mbuf */ if ((mdata = gbuf_alloc_wait((space = (size > MCLBYTES ? MCLBYTES : size)), TRUE)) == 0) { gbuf_freem(m); + file_drop(fd); *err = ENOMEM; return -1; } @@ -1711,6 +1749,7 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) /* allocate the next mbuf */ if ((gbuf_cont(mdata) = m_get((M_WAIT), MSG_DATA)) == 0) { gbuf_freem(m); + file_drop(fd); *err = ENOMEM; return -1; } @@ -1718,14 +1757,16 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) MCLGET(mdata, M_WAIT); if (!(mdata->m_flags & M_EXT)) { m_freem(m); + file_drop(fd); return(NULL); } dataptr = mtod(mdata, caddr_t); space = MCLBYTES; } /* do the copyin */ - if ((*err = copyin((caddr_t)bufaddr, dataptr, len)) != 0) { + if ((*err = copyin(CAST_USER_ADDR_T(bufaddr), dataptr, len)) != 0) { gbuf_freem(m); + file_drop(fd); return -1; } dataptr += len; @@ -1736,6 +1777,7 @@ _ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) gbuf_cont(m)->m_pkthdr.len = size; /* set packet hdr len */ atp_send_rsp(gref, m, TRUE); + file_drop(fd); return 0; } @@ -1753,13 +1795,14 @@ _ATPgetreq(fd, buf, buflen, err, proc) register gbuf_t *m, *m_head; int s, size, len; - if ((*err = atalk_getref(0, fd, &gref, proc)) != 0) + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) return -1; if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) || (atp->atp_flags & ATP_CLOSING)) { dPrintf(D_M_ATP, D_L_ERROR, ("ATPgetreq: stale handle=0x%x, pid=%d\n", (u_int) gref, gref->pid)); + file_drop(fd); *err = EINVAL; return -1; } @@ -1790,17 +1833,19 @@ _ATPgetreq(fd, buf, buflen, err, proc) for (size=0, m=m_head; m; m = gbuf_cont(m)) { if ((len = gbuf_len(m)) > buflen) len = buflen; - copyout((caddr_t)gbuf_rptr(m), (caddr_t)&buf[size], len); + copyout((caddr_t)gbuf_rptr(m), CAST_USER_ADDR_T(&buf[size]), len); size += len; if ((buflen -= len) == 0) break; } gbuf_freem(m_head); + file_drop(fd); return size; } ATENABLE(s, atp->atp_lock); + file_drop(fd); return -1; } @@ -1817,13 +1862,14 @@ _ATPgetrsp(fd, bdsp, err, proc) int s, tid; char bds[atpBDSsize]; - if ((*err = atalk_getref(0, fd, &gref, proc)) != 0) + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) return -1; if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) || (atp->atp_flags & ATP_CLOSING)) { dPrintf(D_M_ATP, D_L_ERROR, ("ATPgetrsp: stale handle=0x%x, pid=%d\n", (u_int) gref, gref->pid)); + file_drop(fd); *err = EINVAL; return -1; } @@ -1837,13 +1883,16 @@ _ATPgetrsp(fd, bdsp, err, proc) switch (trp->tr_state) { case TRANS_DONE: ATENABLE(s, atp->atp_lock); - if ((*err = copyin((caddr_t)bdsp, - (caddr_t)bds, sizeof(bds))) != 0) + if ((*err = copyin(CAST_USER_ADDR_T(bdsp), + (caddr_t)bds, sizeof(bds))) != 0) { + file_drop(fd); return -1; + } atp_pack_bdsp(trp, (struct atpBDS *)bds); tid = (int)trp->tr_tid; atp_free(trp); - copyout((caddr_t)bds, (caddr_t)bdsp, sizeof(bds)); + copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds)); + file_drop(fd); return tid; case TRANS_FAILED: @@ -1852,6 +1901,7 @@ _ATPgetrsp(fd, bdsp, err, proc) */ ATENABLE(s, atp->atp_lock); atp_free(trp); + file_drop(fd); *err = ETIMEDOUT; return -1; @@ -1861,6 +1911,7 @@ _ATPgetrsp(fd, bdsp, err, proc) } ATENABLE(s, atp->atp_lock); + file_drop(fd); *err = EINVAL; return -1; } diff --git a/bsd/netat/aurp.h b/bsd/netat/aurp.h index 9bc832279..c98f2321f 100644 --- a/bsd/netat/aurp.h +++ b/bsd/netat/aurp.h @@ -31,6 +31,8 @@ #define _NETAT_AURP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* * AURP device ioctl (I_STR) 'subcommands' */ @@ -49,8 +51,7 @@ #define AURP_SOCKNUM 387 #define AURP_MAXNETACCESS 64 -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define AURPCODE_REG 0 #define AURPCODE_RTMPPKT 1 @@ -187,6 +188,8 @@ struct myq #define LOCK_DECL(x) atlock_t x +#include + /* * Quandry: if we use a single socket, we have to rebind on each call. * If we use separate sockets per tunnel endpoint, we have to examine @@ -215,44 +218,44 @@ struct aurp_global_t #define AE_UDPIP 0x02 /* UDP/IP input event */ #define AE_SHUTDOWN 0x04 /* Shutdown AURP process */ -void aurp_wakeup __P((struct socket *, caddr_t, int)); -struct mbuf *at_gbuf_to_mbuf __P((gbuf_t *)); -gbuf_t *at_mbuf_to_gbuf __P((struct mbuf *, int)); -int at_insert __P((gbuf_t *m, unsigned int type, unsigned int node)); -int ddp_AURPfuncx __P((int code, void *param, unsigned char node)); -int AURPinit __P((void)); -int aurpd_start __P((void)); -void atalk_to_ip __P((gbuf_t *m)); -void AURPaccess __P((void)); -void AURPshutdown __P((void)); -void AURPiocack __P((gref_t *gref, gbuf_t *m)); -void AURPiocnak __P((gref_t *gref, gbuf_t *m, int error)); -void AURPsndZReq __P((aurp_state_t *state)); -void AURPsndZRsp __P((aurp_state_t *state, gbuf_t *dat_m, int flag)); -void AURPsndRIUpd __P((aurp_state_t *state)); -void AURPsndRIReq __P((aurp_state_t *state)); -void AURPsndRIAck __P((aurp_state_t *state, gbuf_t *m, unsigned short flags)); -void AURPsndOpenReq __P((aurp_state_t *state)); -void AURPsndRDReq __P((aurp_state_t *state)); -void AURPrcvZReq __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvZRsp __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvRIUpd __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvRIReq __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvRIAck __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvRIRsp __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvOpenReq __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvOpenRsp __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvTickle __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvTickleAck __P((aurp_state_t *state, gbuf_t *m)); -void AURPrcvRDReq __P((aurp_state_t *state, gbuf_t *m)); -void AURPfreemsg __P((gbuf_t *m)); -void AURPrtupdate __P((RT_entry *entry, unsigned char ev)); -void AURPsend __P((gbuf_t *mdata, int type, int node)); -void AURPcleanup __P((aurp_state_t *state)); -void AURPpurgeri __P((unsigned char node)); -int AURPgetri __P((short next_entry, unsigned char *buf, short *len)); -int AURPsetri __P((unsigned char node, gbuf_t *m)); -int AURPupdateri __P((unsigned char node, gbuf_t *m)); +void aurp_wakeup(struct socket *, caddr_t, int); +struct mbuf *at_gbuf_to_mbuf(gbuf_t *); +gbuf_t *at_mbuf_to_gbuf(struct mbuf *, int); +int at_insert(gbuf_t *m, unsigned int type, unsigned int node); +int ddp_AURPfuncx(int code, void *param, unsigned char node); +int AURPinit(void); +int aurpd_start(void); +void atalk_to_ip(gbuf_t *m); +void AURPaccess(void); +void AURPshutdown(void); +void AURPiocack(gref_t *gref, gbuf_t *m); +void AURPiocnak(gref_t *gref, gbuf_t *m, int error); +void AURPsndZReq(aurp_state_t *state); +void AURPsndZRsp(aurp_state_t *state, gbuf_t *dat_m, int flag); +void AURPsndRIUpd(aurp_state_t *state); +void AURPsndRIReq(aurp_state_t *state); +void AURPsndRIAck(aurp_state_t *state, gbuf_t *m, unsigned short flags); +void AURPsndOpenReq(aurp_state_t *state); +void AURPsndRDReq(aurp_state_t *state); +void AURPrcvZReq(aurp_state_t *state, gbuf_t *m); +void AURPrcvZRsp(aurp_state_t *state, gbuf_t *m); +void AURPrcvRIUpd(aurp_state_t *state, gbuf_t *m); +void AURPrcvRIReq(aurp_state_t *state, gbuf_t *m); +void AURPrcvRIAck(aurp_state_t *state, gbuf_t *m); +void AURPrcvRIRsp(aurp_state_t *state, gbuf_t *m); +void AURPrcvOpenReq(aurp_state_t *state, gbuf_t *m); +void AURPrcvOpenRsp(aurp_state_t *state, gbuf_t *m); +void AURPrcvTickle(aurp_state_t *state, gbuf_t *m); +void AURPrcvTickleAck(aurp_state_t *state, gbuf_t *m); +void AURPrcvRDReq(aurp_state_t *state, gbuf_t *m); +void AURPfreemsg(gbuf_t *m); +void AURPrtupdate(RT_entry *entry, unsigned char ev); +void AURPsend(gbuf_t *mdata, int type, int node); +void AURPcleanup(aurp_state_t *state); +void AURPpurgeri(unsigned char node); +int AURPgetri(short next_entry, unsigned char *buf, short *len); +int AURPsetri(unsigned char node, gbuf_t *m); +int AURPupdateri(unsigned char node, gbuf_t *m); /* AURP header for IP tunneling */ typedef struct aurp_domain @@ -283,6 +286,6 @@ typedef struct aurp_domain /****### LD 9/26/97*/ extern struct aurp_global_t aurp_global; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_AURP_H_ */ diff --git a/bsd/netat/aurp_aurpd.c b/bsd/netat/aurp_aurpd.c index a051e9ced..bcd8365ac 100644 --- a/bsd/netat/aurp_aurpd.c +++ b/bsd/netat/aurp_aurpd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,8 @@ #include #include #include +#include +#include #include #include @@ -62,6 +65,8 @@ #define M_RCVBUF (64 * 1024) #define M_SNDBUF (64 * 1024) +extern lck_mtx_t * atalk_mutex; + static int ip_to_atalk(struct sockaddr_in *fp, register gbuf_t *p_mbuf); static int aurp_bindrp(struct socket *so); @@ -89,7 +94,7 @@ aurpd_start() int maxbuf; struct sockopt sopt; - if (suser(current_proc()->p_ucred, ¤t_proc()->p_acflag) != 0 ) + if (suser(kauth_cred_get(), 0) != 0 ) return(EPERM); /* @@ -134,7 +139,7 @@ aurpd_start() goto out; } else { maxbuf = M_RCVBUF; - sopt.sopt_val = &maxbuf; + sopt.sopt_val = CAST_USER_ADDR_T(&maxbuf); sopt.sopt_valsize = sizeof(maxbuf); sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVBUF; @@ -154,7 +159,7 @@ aurpd_start() } else { maxbuf = M_SNDBUF; - sopt.sopt_val = &maxbuf; + sopt.sopt_val = CAST_USER_ADDR_T(&maxbuf); sopt.sopt_valsize = sizeof(maxbuf); sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_SNDBUF; @@ -171,8 +176,8 @@ aurpd_start() so->so_snd.sb_flags |=(SB_SEL|SB_NOINTR); out: - sbunlock(&so->so_snd); - sbunlock(&so->so_rcv); + sbunlock(&so->so_snd, 0); + sbunlock(&so->so_rcv, 0); return(error); } @@ -181,7 +186,7 @@ int AURPgetmsg(err) int *err; { register struct socket *so; - register int s, events; + register int events; so = aurp_global.tunnel; *err = 0; @@ -189,7 +194,8 @@ AURPgetmsg(err) for (;;) { gbuf_t *from, *p_mbuf; int flags = MSG_DONTWAIT; - struct uio auio; + uio_t auio; + char uio_buf[ UIO_SIZEOF(0) ]; /* * Wait for a package to arrive. This will be from the @@ -201,7 +207,8 @@ AURPgetmsg(err) events = aurp_global.event; if (((*err == 0) || (*err == EWOULDBLOCK)) && events == 0) { - *err = tsleep(&aurp_global.event_anchor, PSOCK | PCATCH, "AURPgetmsg", 0); + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + *err = msleep(&aurp_global.event_anchor, atalk_mutex, PSOCK | PCATCH, "AURPgetmsg", 0); events = aurp_global.event; aurp_global.event = 0; } @@ -237,11 +244,8 @@ AURPgetmsg(err) * give it no iov's, point off to non-existant user space, * but make sure the 'resid' count means somehting. */ - - auio.uio_iov = NULL; - auio.uio_iovcnt = 0; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_offset = 0; /* XXX */ + auio = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); /* Keep up an even flow... */ for (;;) @@ -253,8 +257,8 @@ AURPgetmsg(err) #define A_LARGE_SIZE 700 flags = MSG_DONTWAIT; - auio.uio_resid = A_LARGE_SIZE; - *err = soreceive(so, (struct sockaddr **)&from, &auio, &p_mbuf, 0, &flags); + uio_setresid(auio, A_LARGE_SIZE); + *err = soreceive(so, (struct sockaddr **)&from, auio, &p_mbuf, 0, &flags); dPrintf(D_M_AURP, D_L_VERBOSE, ("AURPgetmsg: soreceive returned %d, aurp_global.event==0x%x\n", *err, events)); /* soreceive() sets *mp to zero! at start */ @@ -278,7 +282,6 @@ AURPgetmsg(err) * which will wake us from the sleep at * the top of the outer loop. */ - int s; ATDISABLE(s, aurp_global.glock); aurp_global.event &= ~AE_UDPIP; ATENABLE(s, aurp_global.glock); @@ -296,9 +299,8 @@ AURPgetmsg(err) * * This conforms to the so_upcall function pointer member of struct sockbuf. */ -void aurp_wakeup(struct socket *so, register caddr_t p, int state) +void aurp_wakeup(__unused struct socket *so, register caddr_t p, __unused int state) { - register int s; register int bit; bit = (int) p; @@ -322,7 +324,6 @@ aurp_bindrp(struct socket *so) { struct sockaddr_in sin; struct proc *p = current_proc(); - gbuf_t *m; int error; @@ -336,8 +337,8 @@ aurp_bindrp(struct socket *so) sblock(&so->so_snd, M_WAIT); so->so_state |= SS_PRIV; error = (*so->so_proto->pr_usrreqs->pru_bind)(so, (struct sockaddr *) &sin, p); - sbunlock(&so->so_snd); - sbunlock(&so->so_rcv); + sbunlock(&so->so_snd, 0); + sbunlock(&so->so_rcv, 0); return (error); } @@ -409,7 +410,6 @@ atalk_to_ip(register gbuf_t *m) int error; int flags = MSG_DONTWAIT; struct sockaddr_in rem_addr; - int s; m->m_type = MT_HEADER; m->m_pkthdr.len = gbuf_msgsize(m); diff --git a/bsd/netat/aurp_cfg.c b/bsd/netat/aurp_cfg.c index bc7de2587..97f8c6d33 100644 --- a/bsd/netat/aurp_cfg.c +++ b/bsd/netat/aurp_cfg.c @@ -77,7 +77,7 @@ int aurp_open(gref) if (ddp_AURPfuncx(AURPCODE_REG, AURPcmdx, 0)) { aurp_gref = 0; aurp_minor_no[i] = 0; - return EPROTO; + return EPROTOTYPE; } } diff --git a/bsd/netat/aurp_misc.c b/bsd/netat/aurp_misc.c index 662499730..f1cd9a728 100644 --- a/bsd/netat/aurp_misc.c +++ b/bsd/netat/aurp_misc.c @@ -84,15 +84,14 @@ void AURPupdate(arg) void *arg; { unsigned char node; - boolean_t funnel_state; aurp_state_t *state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); state = (aurp_state_t *)&aurp_state[1]; if (aurp_gref == 0) { - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } /* @@ -110,7 +109,7 @@ void AURPupdate(arg) timeout(AURPupdate, arg, AURP_UpdateRate*10*HZ); update_tmo = 1; - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ diff --git a/bsd/netat/aurp_open.c b/bsd/netat/aurp_open.c index 948ccda85..2157d68c7 100644 --- a/bsd/netat/aurp_open.c +++ b/bsd/netat/aurp_open.c @@ -50,13 +50,13 @@ #include -/* funnel version of AURPsndOpenReq */ -void AURPsndOpenReq_funnel(state) +/* locked version of AURPsndOpenReq */ +void AURPsndOpenReq_locked(state) aurp_state_t *state; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); AURPsndOpenReq(state); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ @@ -116,7 +116,7 @@ void AURPsndOpenReq(state) } /* start the retry timer */ - timeout(AURPsndOpenReq_funnel, state, AURP_RetryInterval*HZ); + timeout(AURPsndOpenReq_locked, state, AURP_RetryInterval*HZ); state->rcv_tmo = 1; } @@ -218,7 +218,7 @@ void AURPrcvOpenRsp(state, m) } /* cancel the retry timer */ - untimeout(AURPsndOpenReq_funnel, state); + untimeout(AURPsndOpenReq_locked, state); state->rcv_tmo = 0; state->rcv_retry = 0; diff --git a/bsd/netat/aurp_ri.c b/bsd/netat/aurp_ri.c index 6cab1f01c..44d8df254 100644 --- a/bsd/netat/aurp_ri.c +++ b/bsd/netat/aurp_ri.c @@ -86,13 +86,13 @@ void AURPsndRIAck(state, m, flags) AURPsend(m, AUD_AURP, state->rem_node); } -/* funneled version of AURPsndRIReq */ -void AURPsndRIReq_funnel(state) +/* locked version of AURPsndRIReq */ +void AURPsndRIReq_locked(state) aurp_state_t *state; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); AURPsndRIReq(state); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ @@ -132,17 +132,17 @@ void AURPsndRIReq(state) } /* start the retry timer */ - timeout(AURPsndRIReq_funnel, state, AURP_RetryInterval*HZ); + timeout(AURPsndRIReq_locked, state, AURP_RetryInterval*HZ); state->rcv_tmo = 1; } -/* funneled version of AURPsndRIRsp */ -void AURPsndRIRsp_funnel(state) +/* locked version of AURPsndRIRsp */ +void AURPsndRIRsp_locked(state) aurp_state_t *state; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); AURPsndRIRsp(state); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ @@ -170,7 +170,7 @@ void AURPsndRIRsp(state) ATENABLE(s, aurpgen_lock); msize = sizeof(aurp_hdr_t); if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - timeout(AURPsndRIRsp_funnel, state, AURP_RetryInterval*HZ); + timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); state->snd_tmo = 1; return; } @@ -198,7 +198,7 @@ void AURPsndRIRsp(state) m = (gbuf_t *)gbuf_dupb(state->rsp_m); /* start the retry timer */ - timeout(AURPsndRIRsp_funnel, state, AURP_RetryInterval*HZ); + timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); state->snd_tmo = 1; if (msize == 0) @@ -212,12 +212,12 @@ void AURPsndRIRsp(state) } -void AURPsndRIUpd_funnel(state) +void AURPsndRIUpd_locked(state) aurp_state_t *state; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); AURPsndRIUpd(state); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ @@ -261,7 +261,7 @@ void AURPsndRIUpd(state) m = (gbuf_t *)gbuf_dupb(state->upd_m); /* start the retry timer */ - timeout(AURPsndRIUpd_funnel, state, AURP_RetryInterval*HZ); + timeout(AURPsndRIUpd_locked, state, AURP_RetryInterval*HZ); state->snd_tmo = 1; if (msize == 0) @@ -369,7 +369,7 @@ void AURPrcvRIRsp(state, m) dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIRsp: len=%ld\n", gbuf_len(m))); /* cancel the retry timer */ - untimeout(AURPsndRIReq_funnel, state); + untimeout(AURPsndRIReq_locked, state); state->rcv_tmo = 0; /* send RI ack */ @@ -472,13 +472,13 @@ void AURPrcvRIAck(state, m) if (snd_state == AURPSTATE_WaitingForRIAck1) { /* ack from the tunnel peer to our RI response */ - untimeout(AURPsndRIRsp_funnel, state); + untimeout(AURPsndRIRsp_locked, state); dat_m = state->rsp_m; state->rsp_m = 0; flag = 1; } else { /* ack from the tunnel peer to our RI update */ - untimeout(AURPsndRIUpd_funnel, state); + untimeout(AURPsndRIUpd_locked, state); dat_m = state->upd_m; state->upd_m = 0; flag = 2; diff --git a/bsd/netat/aurp_rx.c b/bsd/netat/aurp_rx.c index 5d5c43a3a..2a3d85b30 100644 --- a/bsd/netat/aurp_rx.c +++ b/bsd/netat/aurp_rx.c @@ -106,6 +106,7 @@ aurp_wput(gref, m) break; case AUC_UDPPORT: + mdata = gbuf_cont(m); aurp_global.udp_port = *(char *)gbuf_rptr(mdata); break; diff --git a/bsd/netat/aurp_tickle.c b/bsd/netat/aurp_tickle.c index ce8772d0f..91994a9bc 100644 --- a/bsd/netat/aurp_tickle.c +++ b/bsd/netat/aurp_tickle.c @@ -56,12 +56,11 @@ void AURPsndTickle(state) int msize; gbuf_t *m; aurp_hdr_t *hdrp; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); if (state->rcv_state == AURPSTATE_Unconnected) { - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } /* stop trying if the retry count exceeds the maximum retry value */ @@ -78,7 +77,7 @@ void AURPsndTickle(state) /* purge all routes associated with the tunnel peer */ AURPpurgeri(state->rem_node); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } @@ -102,7 +101,7 @@ void AURPsndTickle(state) /* start the retry timer */ timeout(AURPsndTickle, state, AURP_TickleRetryInterval*HZ); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* */ diff --git a/bsd/netat/ddp.c b/bsd/netat/ddp.c index 93da77ebf..e471d55c1 100644 --- a/bsd/netat/ddp.c +++ b/bsd/netat/ddp.c @@ -381,10 +381,10 @@ void ddp_rem_if(ifID) /* un-do processing done in SIOCSIFADDR */ if (ifa->ifa_addr) { - int s = splnet(); - TAILQ_REMOVE(&ifID->aa_ifp->if_addrhead, ifa, ifa_link); + ifnet_lock_exclusive(ifID->aa_ifp); + if_detach_ifa(ifID->aa_ifp, ifa); ifa->ifa_addr = NULL; - splx(s); + ifnet_lock_done(ifID->aa_ifp); } if (ifID->at_dl_tag) { /* dlil_detach_protocol(ifID->at_dl_tag); */ @@ -1080,10 +1080,9 @@ void ddp_input(mp, ifID) if (sbappendaddr(&((gref->atpcb_socket)->so_rcv), (struct sockaddr *)&ddp_in, - mp, 0) == 0) - gbuf_freem(mp); - else + mp, 0, NULL) != 0) { sorwakeup(gref->atpcb_socket); + } } else { atalk_putnext(gref, mp); } diff --git a/bsd/netat/ddp.h b/bsd/netat/ddp.h index ad79d612b..919d73373 100644 --- a/bsd/netat/ddp.h +++ b/bsd/netat/ddp.h @@ -32,6 +32,8 @@ #define _NETAT_DDP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* Header and data sizes */ #define DDP_HDR_SIZE 5 /* DDP (short) header size */ @@ -74,11 +76,9 @@ typedef struct { char data[DDP_DATA_SIZE]; } at_ddp_t; - #define DDPLEN_ASSIGN(ddp, len) ddp->length = len #define DDPLEN_VALUE(ddp) ddp->length - /* DDP module statistics and configuration */ typedef struct at_ddp_stats { @@ -102,6 +102,7 @@ typedef struct at_ddp_stats { u_int xmit_dropped_nobuf; } at_ddp_stats_t; + /* DDP streams module ioctls */ #define AT_MID_DDP 203 @@ -124,8 +125,7 @@ typedef struct at_ddp_stats { #define DDP_IOC_SET_PROTO ((AT_MID_DDP<<8) | 13) #endif -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define DDP_MIN_NETWORK 0x0001 #define DDP_MAX_NETWORK 0xfffe @@ -184,6 +184,6 @@ void ddp_bit_reverse(unsigned char *); /* in ddp_lap.c */ int ddp_shutdown(int); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_DDP_H_ */ diff --git a/bsd/netat/ddp_aarp.c b/bsd/netat/ddp_aarp.c index 8789856a9..d3692d60c 100644 --- a/bsd/netat/ddp_aarp.c +++ b/bsd/netat/ddp_aarp.c @@ -434,6 +434,8 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) register at_ddp_t *ddp_hdrp; int error; int s; + struct timeval timenow; + getmicrouptime(&timenow); if (gbuf_len(m) <= 0) ddp_hdrp = (at_ddp_t *)gbuf_rptr(gbuf_cont(m)); @@ -514,7 +516,8 @@ int aarp_send_data(m, elapp, dest_at_addr, loop) amt_ptr->dest_at_addr = *dest_at_addr; amt_ptr->dest_at_addr.atalk_unused = 0; - amt_ptr->last_time = time.tv_sec; + getmicrouptime(&timenow); + amt_ptr->last_time = timenow.tv_sec; amt_ptr->m = m; amt_ptr->elapp = elapp; amt_ptr->no_of_retries = 0; @@ -765,9 +768,8 @@ register aarp_amt_t *amt_ptr; void aarp_sched_probe(void *arg) { - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); if (probe_cb.elapp->aa_ifp != 0 && probe_cb.no_of_retries != AARP_MAX_PROBE_RETRIES) { @@ -778,7 +780,7 @@ void aarp_sched_probe(void *arg) AARPwakeup(&probe_cb); } - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } @@ -810,10 +812,9 @@ StaticProc void aarp_sched_req(arg) void *arg; { int s, i; - boolean_t funnel_state; aarp_amt_t *amt_ptr = (aarp_amt_t *)arg; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); /* * make sure pointer still valid in case interface removed @@ -831,13 +832,13 @@ StaticProc void aarp_sched_req(arg) ATDISABLE(s, arpinp_lock); if (amt_ptr->tmo == 0) { ATENABLE(s, arpinp_lock); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } if (amt_ptr->no_of_retries < AARP_MAX_REQ_RETRIES) { ATENABLE(s, arpinp_lock); if (aarp_send_req(amt_ptr) == 0) { - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } ATDISABLE(s, arpinp_lock); @@ -846,7 +847,7 @@ StaticProc void aarp_sched_req(arg) aarp_delete_amt_info(amt_ptr); break; } - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } diff --git a/bsd/netat/ddp_brt.c b/bsd/netat/ddp_brt.c index 9d9c66253..6f125f76e 100644 --- a/bsd/netat/ddp_brt.c +++ b/bsd/netat/ddp_brt.c @@ -130,7 +130,7 @@ void ddp_brt_init() bzero(at_ddp_brt, sizeof(at_ddp_brt)); ddp_brt_sweep_timer = 1; #ifdef NOT_USED - timeout(ddp_brt_sweep_funnel, (long)0, BRT_SWEEP_INT * SYS_HZ); + timeout(ddp_brt_sweep_locked, (long)0, BRT_SWEEP_INT * SYS_HZ); #endif } @@ -139,17 +139,17 @@ void ddp_brt_shutdown() #ifdef NOT_USED bzero(at_ddp_brt, sizeof(at_ddp_brt)); if (ddp_brt_sweep_timer) - untimeout(ddp_brt_sweep_funnel, 0); + untimeout(ddp_brt_sweep_locked, 0); #endif ddp_brt_sweep_timer = 0; } -/* funneled version */ -void ddp_brt_sweep_funnel() +/* locked version */ +void ddp_brt_sweep_locked() { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); ddp_brt_sweep(); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } void ddp_brt_sweep() @@ -182,7 +182,7 @@ void ddp_brt_sweep() } #ifdef NOT_USED /* set up the next sweep... */ - timeout(ddp_brt_sweep_funnel, (long)0, BRT_SWEEP_INT * SYS_HZ); + timeout(ddp_brt_sweep_locked, (long)0, BRT_SWEEP_INT * SYS_HZ); #endif } diff --git a/bsd/netat/ddp_lap.c b/bsd/netat/ddp_lap.c index 5c2414e8d..f2ed8d1b4 100644 --- a/bsd/netat/ddp_lap.c +++ b/bsd/netat/ddp_lap.c @@ -53,8 +53,10 @@ #include #include #include +#include #include #include +#include #include #include /* for kernel_map */ @@ -125,6 +127,7 @@ extern asp_scb_t *scb_used_list; extern CCB *adsp_inputQ[]; extern CCB *ccb_used_list; extern at_ddp_stats_t at_ddp_stats; +extern lck_mtx_t * atalk_mutex; /* protos */ extern snmpAarpEnt_t * getAarp(int *); @@ -305,7 +308,7 @@ int elap_wput(gref, m) register ioc_t *iocbp; register at_if_cfg_t *cfgp; at_elap_stats_t *statsp; - int error, i; + int i; int (*func)(); gbuf_t *tmpm; at_ifaddr_t *patp; @@ -774,7 +777,7 @@ elap_dataput(m, elapp, addr_flag, addr) char *addr; { register int size; - int error; + int error = 0; extern int zip_type_packet(); struct etalk_addr dest_addr; struct atalk_addr dest_at_addr; @@ -901,9 +904,11 @@ static int elap_online1(elapp) return ENOENT; elapp->startup_inprogress = TRUE; - if (! (elapp->startup_error = re_aarp(elapp))) - (void)tsleep(&elapp->startup_inprogress, PSOCK | PCATCH, + if (! (elapp->startup_error = re_aarp(elapp))) { + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + (void)msleep(&elapp->startup_inprogress, atalk_mutex, PSOCK | PCATCH, "elap_online1", 0); + } /* then later, after some timeouts AARPwakeup() is called */ @@ -1004,7 +1009,8 @@ int elap_online3(elapp) /* then later, after some timeouts AARPwakeup() is called */ - (void)tsleep(&elapp->startup_inprogress, PSOCK | PCATCH, + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + (void)msleep(&elapp->startup_inprogress, atalk_mutex, PSOCK | PCATCH, "elap_online3", 0); return(elapp->startup_error); } /* elap_online3 */ @@ -1041,6 +1047,7 @@ void elap_offline(elapp) ATENABLE(s, ddpinp_lock); /* make sure no zip timeouts are left running */ + elapp->ifGNIScheduled = 0; untimeout(zip_sched_getnetinfo, elapp); } ddp_rem_if(elapp); @@ -1251,6 +1258,7 @@ int routerStart(keP) { register at_ifaddr_t *ifID; int error; + struct timespec ts; if (! ifID_home) return(EINVAL); @@ -1274,12 +1282,18 @@ int routerStart(keP) dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("router_start: waiting 20 sec before starting up\n")); + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); /* sleep for 20 seconds */ + + /* the vaue of 10n terms of hz is 100ms */ + ts.tv_sec = 20; + ts.tv_nsec = 0; + if ((error = /* *** eventually this will be the ifID for the interface being brought up in router mode *** */ - tsleep(&ifID_home->startup_inprogress, - PSOCK | PCATCH, "routerStart", 20 * SYS_HZ)) + msleep(&ifID_home->startup_inprogress, atalk_mutex, + PSOCK | PCATCH, "routerStart", &ts)) != EWOULDBLOCK) { /* if (!error) @@ -1428,7 +1442,9 @@ static int elap_trackMcast(patp, func, addr) u_char c; switch(patp->aa_ifp->if_type) { case IFT_ETHER: - case IFT_FDDI: + case IFT_FDDI: + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: /* bonded ethernet */ /* set addr to point to unique part of addr */ c = addr[5]; @@ -1515,6 +1531,8 @@ static getSnmpCfg(snmp) ifc->ifc_addrSize = getPhysAddrSize(i); switch (elapp->aa_ifp->if_type) { case IFT_ETHER: + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: /* bonded ethernet */ ifc->ifc_type = SNMP_TYPE_ETHER2; break; case IFT_ISO88025: /* token ring */ @@ -1570,7 +1588,7 @@ int at_reg_mcast(ifID, data) caddr_t data; { struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr sa; + struct sockaddr_dl sdl; if (*(int *)data) { if (!nddp) { @@ -1582,16 +1600,22 @@ int at_reg_mcast(ifID, data) return(0); /* this is for ether_output */ - sa.sa_family = AF_UNSPEC; - sa.sa_len = 2 + sizeof(struct etalk_addr); - bcopy (data, &sa.sa_data[0], sizeof(struct etalk_addr)); + bzero(&sdl, sizeof(sdl)); + sdl.sdl_family = AF_LINK; + sdl.sdl_alen = sizeof(struct etalk_addr); + sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data) + + sizeof(struct etalk_addr); + bcopy(data, sdl.sdl_data, sizeof(struct etalk_addr)); + /* these next two lines should not really be needed XXX */ + sdl.sdl_index = nddp->if_index; + sdl.sdl_type = IFT_ETHER; dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: adding multicast %08x%04x ifID:0x%x\n", *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, (unsigned)ifID)); - if (if_addmulti(nddp, &sa, 0)) + if (if_addmulti(nddp, &sdl, 0)) return -1; } return 0; @@ -1603,7 +1627,7 @@ int at_unreg_mcast(ifID, data) caddr_t data; { struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr sa; + struct sockaddr_dl sdl; if (*(int *)data) { if (!nddp) { @@ -1614,9 +1638,15 @@ int at_unreg_mcast(ifID, data) elap_trackMcast(ifID, MCAST_TRACK_DELETE, data); /* this is for ether_output */ - sa.sa_family = AF_UNSPEC; - sa.sa_len = 2 + sizeof(struct etalk_addr); - bcopy (data, &sa.sa_data[0], sizeof(struct etalk_addr)); + bzero(&sdl, sizeof(sdl)); + sdl.sdl_family = AF_LINK; + sdl.sdl_alen = sizeof(struct etalk_addr); + sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data) + + sizeof(struct etalk_addr); + bcopy(data, sdl.sdl_data, sizeof(struct etalk_addr)); + /* these next two lines should not really be needed XXX */ + sdl.sdl_index = nddp->if_index; + sdl.sdl_type = IFT_ETHER; dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: deleting multicast %08x%04x ifID:0x%x\n", @@ -1624,7 +1654,7 @@ int at_unreg_mcast(ifID, data) (unsigned)ifID)); bzero(data, sizeof(struct etalk_addr)); - if (if_delmulti(nddp, &sa)) + if (if_delmulti(nddp, &sdl)) return -1; } return 0; diff --git a/bsd/netat/ddp_nbp.c b/bsd/netat/ddp_nbp.c index 1bfec9e17..db4629db5 100644 --- a/bsd/netat/ddp_nbp.c +++ b/bsd/netat/ddp_nbp.c @@ -226,7 +226,7 @@ void nbp_input(m, ifID) /* true if home zone == destination zone */ register int zno, i; register gbuf_t *m2; - register error_found =0; + register int error_found =0; register at_ifaddr_t *ifIDorig; if (!ROUTING_MODE) /* for routers only! */ @@ -1423,7 +1423,7 @@ int nbp_new_nve_entry(nve_entry, ifID) new_entry->zone_hash = nbp_strhash(&new_entry->zone); } new_entry->tag = tag; - new_entry->pid = current_proc()->p_pid; + new_entry->pid = proc_selfpid(); ATDISABLE(nve_lock_pri,NVE_LOCK); TAILQ_INSERT_TAIL(&name_registry, new_entry, nve_link); diff --git a/bsd/netat/ddp_proto.c b/bsd/netat/ddp_proto.c index 7a6298a4d..347ba3e77 100644 --- a/bsd/netat/ddp_proto.c +++ b/bsd/netat/ddp_proto.c @@ -42,7 +42,6 @@ #include #include #include -#include #include diff --git a/bsd/netat/ddp_r_rtmp.c b/bsd/netat/ddp_r_rtmp.c index 46c378fd8..438d9220f 100644 --- a/bsd/netat/ddp_r_rtmp.c +++ b/bsd/netat/ddp_r_rtmp.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -82,7 +83,7 @@ gbuf_t *rtmp_prep_new_packet(); void rtmp_timeout(); void rtmp_send_port(); -void rtmp_send_port_funnel(); +void rtmp_send_port_locked(); void rtmp_dropper(void *); void rtmp_shutdown(); static void rtmp_update(); @@ -92,6 +93,7 @@ extern int elap_online3(); extern pktsIn, pktsOut, pktsDropped, pktsHome; extern short ErrorRTMPoverflow, ErrorZIPoverflow; extern atlock_t ddpinp_lock; +extern lck_mtx_t * atalk_mutex; /* * rtmp_router_input: function called by DDP (in router mode) to handle @@ -739,12 +741,11 @@ register at_ifaddr_t *ifID; register unsigned int s; short i; RT_entry *en = &RT_table[0]; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); if (ifID->ifRoutingState < PORT_ONLINE) { - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); return; } @@ -799,7 +800,7 @@ register at_ifaddr_t *ifID; ATENABLE(s, ddpinp_lock); timeout(rtmp_timeout, (caddr_t) ifID, 20*SYS_HZ); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* @@ -1168,13 +1169,13 @@ static void rtmp_request(ifID, ddp) } -/* funnel version of rtmp_send_port */ -void rtmp_send_port_funnel(ifID) +/* locked version of rtmp_send_port */ +void rtmp_send_port_locked(ifID) register at_ifaddr_t *ifID; { - thread_funnel_set(network_flock, TRUE); + atalk_lock(); rtmp_send_port(ifID); - thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } @@ -1212,7 +1213,7 @@ void rtmp_send_port(ifID) dPrintf(D_M_RTMP_LOW, D_L_TRACE, ("rtmp_send_port: func=0x%x, ifID=0x%x\n", (u_int) rtmp_send_port, (u_int) ifID)); - timeout (rtmp_send_port_funnel, (caddr_t)ifID, 10 * SYS_HZ); + timeout (rtmp_send_port_locked, (caddr_t)ifID, 10 * SYS_HZ); } @@ -1222,14 +1223,13 @@ void rtmp_send_port(ifID) void rtmp_dropper(void *arg) { - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); pktsIn = pktsOut = pktsHome = pktsDropped = 0; timeout(rtmp_dropper, NULL, 2*SYS_HZ); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* @@ -1248,9 +1248,8 @@ int rtmp_router_start(keP) register short Index, router_starting_timer = 0; register RT_entry *Entry; register at_net_al netStart, netStop; - boolean_t funnel_state; + struct timespec ts; - funnel_state = thread_funnel_set(network_flock, TRUE); /* clear the static structure used to record routing errors */ bzero(&ke, sizeof(ke)); @@ -1422,13 +1421,15 @@ int rtmp_router_start(keP) goto error; } - /* sleep for 10 seconds */ + /* sleep for 11 seconds */ + ts.tv_sec = 11; + ts.tv_nsec = 0; if ((err = /* *** eventually this will be the ifID for the interface being brought up in router mode *** */ /* *** router sends rtmp packets every 10 seconds *** */ - tsleep(&ifID_home->startup_inprogress, - PSOCK | PCATCH, "router_start1", (10+1) * SYS_HZ)) + msleep(&ifID_home->startup_inprogress, atalk_mutex, + PSOCK | PCATCH, "router_start1", &ts)) != EWOULDBLOCK) { goto error; } @@ -1470,11 +1471,13 @@ startZoneInfo: dPrintf(D_M_RTMP, D_L_STARTUP, ("rtmp_router_start: waiting for zone info to complete\n")); /* sleep for 10 seconds */ + ts.tv_sec = 10; + ts.tv_nsec = 0; if ((err = /* *** eventually this will be the ifID for the interface being brought up in router mode *** */ - tsleep(&ifID_home->startup_inprogress, - PSOCK | PCATCH, "router_start2", 10 * SYS_HZ)) + msleep(&ifID_home->startup_inprogress, atalk_mutex, + PSOCK | PCATCH, "router_start2", &ts)) != EWOULDBLOCK) { goto error; } @@ -1558,22 +1561,20 @@ startZoneInfo: /* prepare the packet dropper timer */ timeout (rtmp_dropper, NULL, 1*SYS_HZ); - (void) thread_funnel_set(network_flock, funnel_state); return(0); error: dPrintf(D_M_RTMP,D_L_ERROR, - ("rtmp_router_start: error type=%d occured on port %d\n", + ("rtmp_router_start: error type=%d occurred on port %d\n", ifID->ifRoutingState, ifID->ifPort)); /* if there's no keP->error, copy the local ke structure, - since the error occured asyncronously */ + since the error occurred asyncronously */ if ((!keP->error) && ke.error) bcopy(&ke, keP, sizeof(ke)); rtmp_shutdown(); /* to return the error in keP, the ioctl has to return 0 */ - (void) thread_funnel_set(network_flock, funnel_state); return((keP->error)? 0: err); } /* rtmp_router_start */ @@ -1597,7 +1598,7 @@ void rtmp_shutdown() TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { if (ifID->ifRoutingState > PORT_OFFLINE ) { if (ifID->ifRoutingState == PORT_ONLINE) { - untimeout(rtmp_send_port_funnel, (caddr_t)ifID); + untimeout(rtmp_send_port_locked, (caddr_t)ifID); untimeout(rtmp_timeout, (caddr_t) ifID); } /* diff --git a/bsd/netat/ddp_r_zip.c b/bsd/netat/ddp_r_zip.c index eaf337695..bde37beb8 100644 --- a/bsd/netat/ddp_r_zip.c +++ b/bsd/netat/ddp_r_zip.c @@ -89,7 +89,7 @@ extern short ErrorZIPoverflow; static int netinfo_reply_pending; static void zip_netinfo_reply(at_x_zip_t *, at_ifaddr_t *); static void zip_getnetinfo(at_ifaddr_t *); -static void zip_getnetinfo_funnel(void *); +static void zip_getnetinfo_locked(void *); static void send_phony_reply(void *); /* @@ -742,6 +742,7 @@ void zip_router_input (m, ifID) */ ifID->ifNumRetries = ZIP_NETINFO_RETRIES; netinfo_reply_pending = 1; + ifID->ifGNIScheduled = 1; timeout(zip_sched_getnetinfo, (caddr_t) ifID, 2*ZIP_TIMER_INT); @@ -881,6 +882,7 @@ static void zip_netinfo_reply (netinfo, ifID) ifID->ifThisCableStart, ifID->ifThisCableEnd)); /* The packet is in response to our request */ + ifID->ifGNIScheduled = 0; untimeout (zip_sched_getnetinfo, (caddr_t) ifID); netinfo_reply_pending = 0; zone_name_len = netinfo->data[0]; @@ -965,13 +967,15 @@ int zip_control (ifID, control) switch (control) { case ZIP_ONLINE : case ZIP_LATE_ROUTER : - ifID->ifNumRetries = 0; - /* Get the desired zone name from elap and put it in - * ifID for zip_getnetinfo() to use. - */ - if (ifID->startup_zone.len) - ifID->ifZoneName = ifID->startup_zone; - zip_getnetinfo(ifID); + if (!ifID->ifGNIScheduled) { + ifID->ifNumRetries = 0; + /* Get the desired zone name from elap and put it in + * ifID for zip_getnetinfo() to use. + */ + if (ifID->startup_zone.len) + ifID->ifZoneName = ifID->startup_zone; + zip_getnetinfo(ifID); + } break; case ZIP_NO_ROUTER : ifID->ifZoneName.len = 1; @@ -988,14 +992,19 @@ int zip_control (ifID, control) return (0); } -/* funnel version of zip_getnetinfo */ -static void zip_getnetinfo_funnel(arg) +/* locked version of zip_getnetinfo */ +static void zip_getnetinfo_locked(arg) void *arg; { - at_ifaddr_t *ifID = (at_ifaddr_t *)arg; - thread_funnel_set(network_flock, TRUE); - zip_getnetinfo(ifID); - thread_funnel_set(network_flock, FALSE); + at_ifaddr_t *ifID; + + atalk_lock(); + if (ifID != NULL) { // make sure it hasn't been closed + ifID = (at_ifaddr_t *)arg; + ifID->ifGNIScheduled = 0; + zip_getnetinfo(ifID); + } + atalk_unlock(); } @@ -1012,6 +1021,7 @@ static void zip_getnetinfo (ifID) void zip_sched_getnetinfo(); register struct atalk_addr *at_dest; register int size; + size = DDP_X_HDR_SIZE + ZIP_X_HDR_SIZE + ifID->ifZoneName.len + 1 + sizeof(struct atalk_addr) + 1; @@ -1022,7 +1032,8 @@ static void zip_getnetinfo (ifID) */ dPrintf(D_M_ZIP, D_L_WARNING, ("zip_getnetinfo: no buffer, call later port=%d\n", ifID->ifPort)); - timeout (zip_getnetinfo_funnel, (caddr_t) ifID, ZIP_TIMER_INT/10); + ifID->ifGNIScheduled = 1; + timeout (zip_getnetinfo_locked, (caddr_t) ifID, ZIP_TIMER_INT/10); return; } @@ -1075,7 +1086,7 @@ static void zip_getnetinfo (ifID) ifID->ifNumRetries++; netinfo_reply_pending = 1; - + ifID->ifGNIScheduled = 1; timeout (zip_sched_getnetinfo, (caddr_t) ifID, ZIP_TIMER_INT); } /* zip_getnetinfo */ @@ -1088,9 +1099,10 @@ static void zip_getnetinfo (ifID) void zip_sched_getnetinfo (ifID) register at_ifaddr_t *ifID; { - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); + + ifID->ifGNIScheduled = 0; if (ifID->ifNumRetries >= ZIP_NETINFO_RETRIES) { /* enough packets sent.... give up! */ @@ -1119,7 +1131,7 @@ void zip_sched_getnetinfo (ifID) } else zip_getnetinfo(ifID); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } @@ -1263,13 +1275,11 @@ send_phony_reply(arg) void *arg; { gbuf_t *rm = (gbuf_t *)arg; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - + atalk_lock(); ddp_input(rm, ifID_home); + atalk_unlock(); - (void) thread_funnel_set(network_flock, FALSE); return; } diff --git a/bsd/netat/ddp_rtmp.c b/bsd/netat/ddp_rtmp.c index c1b40b63e..023418dfc 100644 --- a/bsd/netat/ddp_rtmp.c +++ b/bsd/netat/ddp_rtmp.c @@ -174,10 +174,15 @@ void trackrouter(ifID, net, node) void ddp_age_router(deadrouter) register struct routerinfo *deadrouter; { - register at_ifaddr_t *ourrouter = deadrouter->ifID; - boolean_t funnel_state; + register at_ifaddr_t *ourrouter; - funnel_state = thread_funnel_set(network_flock, TRUE); + atalk_lock(); + + ourrouter = deadrouter->ifID; + if (ourrouter == NULL) { + atalk_unlock(); + return; + } dPrintf(D_M_RTMP, D_L_INFO, ("ddp_age_router called deadrouter=%d:%d\n", NODE(deadrouter), NET(deadrouter))); @@ -238,7 +243,7 @@ void ddp_age_router(deadrouter) } else bzero((caddr_t) deadrouter, sizeof(struct routerinfo)); - (void) thread_funnel_set(network_flock, FALSE); + atalk_unlock(); } /* ddp_age_router */ diff --git a/bsd/netat/ddp_usrreq.c b/bsd/netat/ddp_usrreq.c index 30d2d217e..9331419cb 100644 --- a/bsd/netat/ddp_usrreq.c +++ b/bsd/netat/ddp_usrreq.c @@ -94,7 +94,7 @@ int ddp_pru_attach(struct socket *so, int proto, if (error) return error; pcb = (struct atpcb *)((so)->so_pcb); - pcb->pid = current_proc()->p_pid; + pcb->pid = proc_selfpid(); pcb->ddptype = (u_char) proto; /* set in socreate() */ pcb->proto = ATPROTO_DDP; diff --git a/bsd/netat/debug.h b/bsd/netat/debug.h index 34c8517f5..d59f52235 100644 --- a/bsd/netat/debug.h +++ b/bsd/netat/debug.h @@ -28,7 +28,8 @@ #ifndef _NETAT_DEBUG_H_ #define _NETAT_DEBUG_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef __APPLE_API_OBSOLETE +#ifdef PRIVATE #define D_L_FATAL 0x00000001 #define D_L_ERROR 0x00000002 @@ -260,6 +261,7 @@ static char *at_mid_strings[] = { #endif -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_DEBUG_H_ */ diff --git a/bsd/netat/drv_dep.c b/bsd/netat/drv_dep.c index 13f845952..8f286db63 100644 --- a/bsd/netat/drv_dep.c +++ b/bsd/netat/drv_dep.c @@ -68,6 +68,8 @@ static llc_header_t snap_hdr_aarp = SNAP_HDR_AARP; static unsigned char snap_proto_ddp[5] = SNAP_PROTO_AT; static unsigned char snap_proto_aarp[5] = SNAP_PROTO_AARP; +static void at_input_packet(protocol_family_t protocol, mbuf_t m); + int pktsIn, pktsOut; struct ifqueue atalkintrq; /* appletalk and aarp packet input queue */ @@ -86,6 +88,7 @@ void atalk_load() { extern int _ATsocket(), _ATgetmsg(), _ATputmsg(); extern int _ATPsndreq(), _ATPsndrsp(), _ATPgetreq(), _ATPgetrsp(); + extern lck_mtx_t *domain_proto_mtx; sys_ATsocket = _ATsocket; sys_ATgetmsg = _ATgetmsg; @@ -114,6 +117,9 @@ void atalk_load() for 2225395 this happens in adsp_open and is undone on ADSP_UNLINK */ + lck_mtx_unlock(domain_proto_mtx); + proto_register_input(PF_APPLETALK, at_input_packet, NULL); + lck_mtx_lock(domain_proto_mtx); } /* atalk_load */ /* Undo everything atalk_load() did. */ @@ -208,7 +214,9 @@ int pat_output(patp, mlist, dst_addr, type) kprintf("po: mlen= %d, m2len= %d\n", m->m_len, (m->m_next)->m_len); #endif - dlil_output(patp->at_dl_tag, m, NULL, &dst, 0); + atalk_unlock(); + dlil_output(patp->aa_ifp, PF_APPLETALK, m, NULL, &dst, 0); + atalk_lock(); pktsOut++; } @@ -216,44 +224,30 @@ int pat_output(patp, mlist, dst_addr, type) return 0; } /* pat_output */ -void atalkintr() +static void +at_input_packet( + __unused protocol_family_t protocol, + mbuf_t m) { - struct mbuf *m, *m1, *mlist = NULL; + struct mbuf *m1; struct ifnet *ifp; - int s; llc_header_t *llc_header; at_ifaddr_t *ifID; char src[6]; enet_header_t *enet_header; - -next: - s = splimp(); - IF_DEQUEUE(&atalkintrq, m); - splx(s); - - if (m == 0) - return; - - for ( ; m ; m = mlist) { - mlist = m->m_nextpkt; -#ifdef APPLETALK_DEBUG - /* packet chains are not yet in use on input */ - if (mlist) kprintf("atalkintr: packet chain\n"); -#endif - m->m_nextpkt = 0; - if (!appletalk_inited) { + if (!appletalk_inited) { m_freem(m); - continue; - } + return; + } - if ((m->m_flags & M_PKTHDR) == 0) { + if ((m->m_flags & M_PKTHDR) == 0) { #ifdef APPLETALK_DEBUG - kprintf("atalkintr: no HDR on packet received"); + kprintf("atalkintr: no HDR on packet received"); #endif m_freem(m); - continue; - } + return; + } /* make sure the interface this packet was received on is configured for AppleTalk */ @@ -265,7 +259,7 @@ next: /* if we didn't find a matching interface */ if (!ifID) { m_freem(m); - continue; /* was EAFNOSUPPORT */ + return; /* was EAFNOSUPPORT */ } /* make sure the entire packet header is in the current mbuf */ @@ -275,13 +269,15 @@ next: kprintf("atalkintr: packet too small\n"); #endif m_freem(m); - continue; + return; } enet_header = mtod(m, enet_header_t *); /* Ignore multicast packets from local station */ /* *** Note: code for IFTYPE_TOKENTALK may be needed here. *** */ - if (ifID->aa_ifp->if_type == IFT_ETHER) { + if (ifID->aa_ifp->if_type == IFT_ETHER || + ifID->aa_ifp->if_type == IFT_L2VLAN || + ifID->aa_ifp->if_type == IFT_IEEE8023ADLAG) { bcopy((char *)enet_header->src, src, sizeof(src)); #ifdef COMMENT /* In order to receive packets from the Blue Box, we cannot @@ -291,7 +287,7 @@ next: (bcmp(src, ifID->xaddr, sizeof(src)) == 0)) { /* Packet rejected: think it's a local mcast. */ m_freem(m); - continue; /* was EAFNOSUPPORT */ + return; /* was EAFNOSUPPORT */ } #endif /* COMMENT */ @@ -321,7 +317,7 @@ next: llc_header->protocol[4]); #endif m_freem(m); - continue; /* was EAFNOSUPPORT */ + return; /* was EAFNOSUPPORT */ } } MCHTYPE(m, MSG_DATA); /* set the mbuf type */ @@ -342,7 +338,5 @@ next: #endif m_freem(m); } - } } - goto next; -} /* atalkintr */ +} diff --git a/bsd/netat/ep.h b/bsd/netat/ep.h index 2f46f707b..fd917a57a 100644 --- a/bsd/netat/ep.h +++ b/bsd/netat/ep.h @@ -31,6 +31,8 @@ #define _NETAT_EP_H_ #include +#ifdef __APPLE_API_OBSOLETE + #define EP_REQUEST 1 /* Echo request packet */ #define EP_REPLY 2 /* Echo reply packet */ @@ -38,4 +40,5 @@ #define EP_DATA_SIZE 585 /* Maximum size of EP data */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_EP_H_ */ diff --git a/bsd/netat/lap.h b/bsd/netat/lap.h index aec11df28..85542c5d9 100644 --- a/bsd/netat/lap.h +++ b/bsd/netat/lap.h @@ -30,6 +30,8 @@ #define _NETAT_LAP_H_ #include +#ifdef __APPLE_API_OBSOLETE + #define AT_MID_ELAP 202 /* elap ioctl's */ @@ -88,5 +90,6 @@ #endif /* NOT_USED */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_LAP_H_ */ diff --git a/bsd/netat/nbp.h b/bsd/netat/nbp.h index 8c77bed8b..9bac6ef0f 100644 --- a/bsd/netat/nbp.h +++ b/bsd/netat/nbp.h @@ -54,6 +54,8 @@ #define _NETAT_NBP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* NBP packet types */ #define NBP_BRRQ 0x01 /* Broadcast request */ @@ -97,8 +99,7 @@ typedef struct at_nbp { #define DEFAULT_ZONE(zone) (!(zone)->len || ((zone)->len == 1 && (zone)->str[0] == '*')) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* Struct for name registry */ typedef struct _nve_ { @@ -123,8 +124,10 @@ typedef struct _nve_ { #define NBP_WILD_TYPE 0x02 #define NBP_WILD_MASK 0x03 -typedef struct nbp_req { - int (*func)(); +struct nbp_req; +typedef struct nbp_req nbp_req_t; +struct nbp_req { + int (*func)(nbp_req_t *, nve_entry_t *); gbuf_t *response; /* the response datagram */ int space_unused; /* Space available in the resp */ /* packet. */ @@ -134,16 +137,16 @@ typedef struct nbp_req { u_char flags; /* Flags to indicate whether or */ /* not the request tuple has */ /* wildcards in it */ -} nbp_req_t; +}; extern int nbp_insert_entry(nve_entry_t *); extern u_int nbp_strhash (at_nvestr_t *); extern nve_entry_t *nbp_find_nve(nve_entry_t *); -extern int nbp_fillin_nve(); +extern int nbp_fillin_nve(at_entity_t *, nve_entry_t *); extern at_nvestr_t *getSPLocalZone(int); extern at_nvestr_t *getLocalZone(int); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_NBP_H_ */ diff --git a/bsd/netat/pap.h b/bsd/netat/pap.h index 6abed58d8..51388c274 100644 --- a/bsd/netat/pap.h +++ b/bsd/netat/pap.h @@ -37,6 +37,8 @@ #define _NETAT_PAP_H_ #include +#ifdef __APPLE_API_OBSOLETE + #define AT_PAP_DATA_SIZE 512 /* Maximum PAP data size */ #define AT_PAP_STATUS_SIZE 255 /* Maximum PAP status length */ #define PAP_TIMEOUT 120 @@ -125,4 +127,5 @@ struct pap_state { int pap_tickle_id; /* the transaction ID for tickles */ }; +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_PAP_H_ */ diff --git a/bsd/netat/routing_tables.h b/bsd/netat/routing_tables.h index 5376ab6a7..f3b46283d 100644 --- a/bsd/netat/routing_tables.h +++ b/bsd/netat/routing_tables.h @@ -32,7 +32,7 @@ #ifndef _NETAT_ROUTING_TABLES_H_ #define _NETAT_ROUTING_TABLES_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE /* RTMP table entry state bitmap (EntryState) values */ @@ -141,7 +141,7 @@ typedef struct { ZT_entry zt; /* the zone table entry */ } ZT_entryno; -#ifdef KERNEL +#ifdef KERNEL_PRIVATE /* Macros for Routing table B-tree easy access */ @@ -191,9 +191,10 @@ extern int zonename_equal(at_nvestr_t *, at_nvestr_t *); extern RT_entry *RT_table_freelist; extern RT_entry RT_table_start; extern RT_entry *RT_table; -extern RT_entry *rt_binsert(); -extern RT_entry *rt_insert(); -extern RT_entry *rt_bdelete(); +extern RT_entry *rt_binsert (RT_entry *); +extern RT_entry *rt_insert( at_net_al NStop, at_net_al NStart, at_net_al NxNet, + at_node NxNode, u_char NtDist, u_char NtPort, u_char EntS); +extern RT_entry *rt_bdelete (at_net_al NetStop, at_net_al NetStart); extern RT_entry *rt_blookup(int); extern RT_entry *rt_getNextRoute(int); @@ -209,9 +210,9 @@ extern int zt_ent_zindex(u_char *); extern ZT_entryno *zt_getNextZone(int); extern void zt_remove_zones(u_char *); extern void zt_set_zmap(u_short, char *); -extern void rtmp_router_input(); +extern void rtmp_router_input(gbuf_t *, at_ifaddr_t *); -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ #endif /* _NETAT_ROUTING_TABLES_H_ */ diff --git a/bsd/netat/rtmp.h b/bsd/netat/rtmp.h index 8f7365b21..31b1a1b68 100644 --- a/bsd/netat/rtmp.h +++ b/bsd/netat/rtmp.h @@ -28,6 +28,8 @@ #define _NETAT_RTMP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* Changed 03-22-94 for router support LD */ /* RTMP function codes */ @@ -62,4 +64,5 @@ typedef struct { unsigned char at_rtmp_data; } at_rtmp_tuple; +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_RTMP_H_ */ diff --git a/bsd/netat/sys_dep.c b/bsd/netat/sys_dep.c index 0c20c8f1c..272d890dc 100644 --- a/bsd/netat/sys_dep.c +++ b/bsd/netat/sys_dep.c @@ -33,14 +33,16 @@ #include #include #include -#include +#include /* for p_fd in fdflags */ #include #include #include #include -#include +#include #include #include +#include +#include #include #include @@ -59,17 +61,23 @@ int (*sys_ATPgetrsp)() = 0; extern at_state_t at_state; /* global state of AT network */ extern at_ifaddr_t *ifID_home; /* default interface */ +extern lck_mtx_t * atalk_mutex; + +#define f_flag f_fglob->fg_flag +#define f_type f_fglob->fg_type +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data -struct ATsocket_args { - int proto; -}; int ATsocket(proc, uap, retval) - void *proc; + struct proc *proc; struct ATsocket_args *uap; int *retval; { int err; - + atalk_lock(); if (sys_ATsocket) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -82,22 +90,18 @@ int ATsocket(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -struct ATgetmsg_args { - int fd; - void *ctlptr; - void *datptr; - int *flags; -}; int ATgetmsg(proc, uap, retval) - void *proc; + struct proc *proc; struct ATgetmsg_args *uap; int *retval; { int err; + atalk_lock(); if (sys_ATgetmsg) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -112,22 +116,18 @@ int ATgetmsg(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -struct ATputmsg_args { - int fd; - void *ctlptr; - void *datptr; - int flags; -}; int ATputmsg(proc, uap, retval) - void *proc; + struct proc *proc; struct ATputmsg_args *uap; int *retval; { int err; + atalk_lock(); if (sys_ATputmsg) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -142,22 +142,18 @@ int ATputmsg(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -struct ATPsndreq_args { - int fd; - unsigned char *buf; - int len; - int nowait; -}; int ATPsndreq(proc, uap, retval) - void *proc; + struct proc *proc; struct ATPsndreq_args *uap; int *retval; { int err; + atalk_lock(); if (sys_ATPsndreq) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -172,22 +168,18 @@ int ATPsndreq(proc, uap, retval) *retval = -1; err= ENXIO; } + atalk_unlock(); return err; } -struct ATPsndrsp_args { - int fd; - unsigned char *respbuff; - int resplen; - int datalen; -}; int ATPsndrsp(proc, uap, retval) - void *proc; + struct proc *proc; struct ATPsndrsp_args *uap; int *retval; { int err; + atalk_lock(); if (sys_ATPsndrsp) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -202,21 +194,18 @@ int ATPsndrsp(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -struct ATPgetreq_args { - int fd; - unsigned char *buf; - int buflen; -}; int ATPgetreq(proc, uap, retval) - void *proc; + struct proc *proc; struct ATPgetreq_args *uap; int *retval; { int err; + atalk_lock(); if (sys_ATPgetreq) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -231,20 +220,18 @@ int ATPgetreq(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -struct ATPgetrsp_args { - int fd; - unsigned char *bdsp; -}; int ATPgetrsp(proc, uap, retval) - void *proc; + struct proc *proc; struct ATPgetrsp_args *uap; int *retval; { int err = 0; + atalk_lock(); if (sys_ATPgetrsp) { /* required check for all AppleTalk system calls */ if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { @@ -258,19 +245,16 @@ int ATPgetrsp(proc, uap, retval) *retval = -1; err = ENXIO; } + atalk_unlock(); return err; } -int atalk_closeref(fp, grefp) - struct file *fp; +int atalk_closeref(fg, grefp) + struct fileglob *fg; gref_t **grefp; { - if ((*grefp = (gref_t *)fp->f_data)) { - fp->f_data = 0; -/* - kprintf("atalk_closeref: fp = 0x%x, gref = 0x%x\n", (u_int)fp, - (u_int)*grefp); -*/ + if ((*grefp = (gref_t *)fg->fg_data)) { + fg->fg_data = 0; return(0); } return(EBADF); @@ -283,14 +267,15 @@ int atalk_openref(gref, retfd, proc) { extern int _ATread(), _ATwrite(),_ATioctl(), _ATselect(), _ATclose(), _ATkqfilter(); static struct fileops fileops = - {_ATread, _ATwrite, _ATioctl, _ATselect, _ATclose, _ATkqfilter}; + {_ATread, _ATwrite, _ATioctl, _ATselect, _ATclose, _ATkqfilter, 0}; int err, fd; - struct file *fp; - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - - if ((err = falloc(proc, &fp, &fd)) != 0) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + struct fileproc *fp; + + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + + proc_fdlock(proc); + if ((err = falloc_locked(proc, &fp, &fd, 1)) != 0) { + proc_fdunlock(proc); return err; } @@ -300,45 +285,69 @@ int atalk_openref(gref, retfd, proc) */ fp->f_type = DTYPE_ATALK+1; fp->f_ops = &fileops; + fp->f_data = (void *)gref; + *fdflags(proc, fd) &= ~UF_RESERVED; *retfd = fd; - fp->f_data = (void *)gref; + fp_drop(proc, fd, fp, 1); + proc_fdunlock(proc); /* kprintf("atalk_openref: fp = 0x%x, gref = 0x%x\n", (u_int)fp, (u_int)gref); */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); return 0; } -/* go from file descriptor to gref, which has been saved in fp->f_data */ -int atalk_getref(fp, fd, grefp, proc) -struct file *fp; +/* + * go from file descriptor to gref, which has been saved in fp->f_data + * + * This routine returns with an iocount on the fileproc when the fp is null + * as it converts fd to fileproc. Callers of this api who pass fp as null + * need to drop the iocount when they are done with the fp + */ +int atalk_getref(fp, fd, grefp, proc, droponerr) +struct fileproc *fp; int fd; gref_t **grefp; struct proc *proc; +int droponerr; { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (fp == 0) { - int error = fdgetf(proc, fd, &fp); - - if (error) { - - *grefp = (gref_t *) 0; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return EBADF; - } - } - *grefp = (gref_t *)fp->f_data; - if (*grefp == 0 || *grefp == (gref_t *)(-1)) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return EBADF; - } + int error; - if ((*grefp)->errno) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return (int)(*grefp)->errno; - } + proc_fdlock(proc); + error = atalk_getref_locked(fp, fd, grefp, proc, droponerr); + proc_fdunlock(proc); + return error; +} - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - return 0; +int atalk_getref_locked(fp, fd, grefp, proc, droponerr) +struct fileproc *fp; +int fd; +gref_t **grefp; +struct proc *proc; +int droponerr; +{ + lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); + if (fp == 0) { + int error = fp_lookup(proc, fd, &fp, 1); + + if (error) { + + *grefp = (gref_t *) 0; + return EBADF; + } + } + *grefp = (gref_t *)fp->f_data; + if (*grefp == 0 || *grefp == (gref_t *)(-1)) { + if (droponerr) + fp_drop(proc, fd, fp, 1); + printf("atalk_getref_locked EBADF f_data: %x\n", fp->f_data); + return EBADF; + } + + if ((*grefp)->errno) { + if (droponerr) + fp_drop(proc, fd, fp, 1); + return (int)(*grefp)->errno; + } + return 0; } diff --git a/bsd/netat/sys_glue.c b/bsd/netat/sys_glue.c index 7b6538136..a59859f2f 100644 --- a/bsd/netat/sys_glue.c +++ b/bsd/netat/sys_glue.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,12 +35,15 @@ #include #include #include +#include #include #include #include +#include #include #include #include +#include #include @@ -55,6 +58,7 @@ #include extern struct atpcb ddp_head; +extern lck_mtx_t * atalk_mutex; extern void ddp_putmsg(gref_t *gref, gbuf_t *m), @@ -84,6 +88,9 @@ at_ddp_stats_t at_ddp_stats; /* DDP statistics */ SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD, &at_ddp_stats, at_ddp_stats, "AppleTalk DDP Stats"); +static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p ); +static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p ); + atlock_t refall_lock; caddr_t atp_free_cluster_list = 0; @@ -112,7 +119,7 @@ void gref_wput(gref, m) gbuf_freem(gbuf_cont(m)); gbuf_cont(m) = 0; ((ioc_t *)gbuf_rptr(m))->ioc_rval = -1; - ((ioc_t *)gbuf_rptr(m))->ioc_error = EPROTO; + ((ioc_t *)gbuf_rptr(m))->ioc_error = EPROTOTYPE; gbuf_set_type(m, MSG_IOCNAK); atalk_putnext(gref, m); } else @@ -159,7 +166,7 @@ int _ATsocket(proto, err, proc) return -1; } gref->proto = proto; - gref->pid = ((struct proc *)proc)->p_pid; + gref->pid = proc_pid((struct proc *)proc); /* open the specified protocol */ switch (gref->proto) { @@ -211,7 +218,7 @@ int _ATgetmsg(fd, ctlptr, datptr, flags, err, proc) int rc = -1; gref_t *gref; - if ((*err = atalk_getref(0, fd, &gref, proc)) == 0) { + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) == 0) { switch (gref->proto) { case ATPROTO_ASP: rc = ASPgetmsg(gref, ctlptr, datptr, NULL, flags, err); @@ -225,6 +232,7 @@ int _ATgetmsg(fd, ctlptr, datptr, flags, err, proc) *err = EPROTONOSUPPORT; break; } + file_drop(fd); } /* kprintf("_ATgetmsg: return=%d\n", *err);*/ @@ -242,30 +250,31 @@ int _ATputmsg(fd, ctlptr, datptr, flags, err, proc) int rc = -1; gref_t *gref; - if ((*err = atalk_getref(0, fd, &gref, proc)) == 0) { + if ((*err = atalk_getref(0, fd, &gref, proc, 1)) == 0) { switch (gref->proto) { case ATPROTO_ASP: rc = ASPputmsg(gref, ctlptr, datptr, NULL, flags, err); break; default: *err = EPROTONOSUPPORT; break; } + file_drop(fd); } /* kprintf("_ATputmsg: return=%d\n", *err); */ return rc; } -int _ATclose(fp, proc) - struct file *fp; +int _ATclose(fg, proc) + struct fileglob *fg; struct proc *proc; { int err; gref_t *gref; - if ((err = atalk_closeref(fp, &gref)) == 0) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if ((err = atalk_closeref(fg, &gref)) == 0) { + atalk_lock(); (void)gref_close(gref); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + atalk_unlock(); } return err; @@ -281,10 +290,12 @@ int _ATrw(fp, rw, uio, ext) gref_t *gref; gbuf_t *m, *mhead, *mprev; - if ((err = atalk_getref(fp, 0, &gref, 0)) != 0) + /* no need to get/drop iocount as the fp already has one */ + if ((err = atalk_getref_locked(fp, 0, &gref, 0, 1)) != 0) return err; - if ((len = uio->uio_resid) == 0) + // LP64todo - fix this! + if ((len = uio_resid(uio)) == 0) return 0; ATDISABLE(s, gref->lock); @@ -293,7 +304,7 @@ int _ATrw(fp, rw, uio, ext) KERNEL_DEBUG(DBG_ADSP_ATRW, 0, gref, len, gref->rdhead, 0); while ((gref->errno == 0) && ((mhead = gref->rdhead) == 0)) { gref->sevents |= POLLMSG; - err = tsleep(&gref->event, PSOCK | PCATCH, "AT read", 0); + err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT read", 0); gref->sevents &= ~POLLMSG; if (err != 0) { ATENABLE(s, gref->lock); @@ -359,7 +370,7 @@ int _ATrw(fp, rw, uio, ext) while (!(*gref->writeable)(gref)) { /* flow control on, wait to be enabled to write */ gref->sevents |= POLLSYNC; - err = tsleep(&gref->event, PSOCK | PCATCH, "AT write", 0); + err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT write", 0); gref->sevents &= ~POLLSYNC; if (err != 0) { ATENABLE(s, gref->lock); @@ -394,7 +405,7 @@ int _ATrw(fp, rw, uio, ext) } /* _ATrw */ int _ATread(fp, uio, cred, flags, p) - void *fp; + struct fileproc *fp; struct uio *uio; void *cred; int flags; @@ -402,14 +413,14 @@ int _ATread(fp, uio, cred, flags, p) { int stat; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + atalk_lock(); stat = _ATrw(fp, UIO_READ, uio, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + atalk_unlock(); return stat; } int _ATwrite(fp, uio, cred, flags, p) - void *fp; + struct fileproc *fp; struct uio *uio; void *cred; int flags; @@ -417,10 +428,9 @@ int _ATwrite(fp, uio, cred, flags, p) { int stat; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + atalk_lock(); stat = _ATrw(fp, UIO_WRITE, uio, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + atalk_unlock(); return stat; } @@ -431,27 +441,43 @@ int _ATwrite(fp, uio, cred, flags, p) int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) { int s, err = 0, len; + u_int size; gbuf_t *m, *mdata; ioc_t *ioc; - ioccmd_t ioccmd; + user_addr_t user_arg; + user_ioccmd_t user_ioccmd; + boolean_t is64bit; /* error if not for us */ if ((cmd & 0xffff) != 0xff99) return EOPNOTSUPP; + size = IOCPARM_LEN(cmd); + if (size != sizeof(user_addr_t)) + return EINVAL; + + user_arg = *((user_addr_t *)arg); + /* copy in ioc command info */ -/* - kprintf("at_ioctl: arg ioccmd.ic_cmd=%x ic_len=%x gref->lock=%x, gref->event=%x\n", - ((ioccmd_t *)arg)->ic_cmd, ((ioccmd_t *)arg)->ic_len, - gref->lock, gref->event); -*/ - if (fromKernel) - bcopy (arg, &ioccmd, sizeof (ioccmd_t)); + is64bit = proc_is64bit(current_proc()); + if (fromKernel) { + ioccmd_t tmp; + bcopy (CAST_DOWN(caddr_t, user_arg), &tmp, sizeof (tmp)); + ioccmd_t_32_to_64(&tmp, &user_ioccmd); + } else { - if ((err = copyin((caddr_t)arg, (caddr_t)&ioccmd, sizeof(ioccmd_t))) != 0) { + if (is64bit) { + err = copyin(user_arg, (caddr_t)&user_ioccmd, sizeof(user_ioccmd)); + } + else { + ioccmd_t tmp; + err = copyin(user_arg, (caddr_t)&tmp, sizeof(tmp)); + ioccmd_t_32_to_64(&tmp, &user_ioccmd); + } + if (err != 0) { #ifdef APPLETALK_DEBUG - kprintf("at_ioctl: err = %d, copyin(%x, %x, %d)\n", err, - (caddr_t)arg, (caddr_t)&ioccmd, sizeof(ioccmd_t)); + kprintf("at_ioctl: err = %d, copyin(%llx, %x, %d)\n", err, + user_arg, (caddr_t)&user_ioccmd, sizeof(user_ioccmd)); #endif return err; } @@ -466,27 +492,27 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) /* create the ioc command second mbuf contains the actual ASP command */ - if (ioccmd.ic_len) { - if ((gbuf_cont(m) = gbuf_alloc(ioccmd.ic_len, PRI_HI)) == 0) { + if (user_ioccmd.ic_len) { + if ((gbuf_cont(m) = gbuf_alloc(user_ioccmd.ic_len, PRI_HI)) == 0) { gbuf_freem(m); #ifdef APPLETALK_DEBUG kprintf("at_ioctl: gbuf_alloc err=%d\n",ENOBUFS); #endif return ENOBUFS; } - gbuf_wset(gbuf_cont(m), ioccmd.ic_len); /* mbuf->m_len */ + gbuf_wset(gbuf_cont(m), user_ioccmd.ic_len); /* mbuf->m_len */ if (fromKernel) - bcopy (ioccmd.ic_dp, gbuf_rptr(gbuf_cont(m)), ioccmd.ic_len); + bcopy (CAST_DOWN(caddr_t, user_ioccmd.ic_dp), gbuf_rptr(gbuf_cont(m)), user_ioccmd.ic_len); else { - if ((err = copyin((caddr_t)ioccmd.ic_dp, (caddr_t)gbuf_rptr(gbuf_cont(m)), ioccmd.ic_len)) != 0) { + if ((err = copyin(user_ioccmd.ic_dp, (caddr_t)gbuf_rptr(gbuf_cont(m)), user_ioccmd.ic_len)) != 0) { gbuf_freem(m); return err; } } } ioc = (ioc_t *) gbuf_rptr(m); - ioc->ioc_cmd = ioccmd.ic_cmd; - ioc->ioc_count = ioccmd.ic_len; + ioc->ioc_cmd = user_ioccmd.ic_cmd; + ioc->ioc_count = user_ioccmd.ic_len; ioc->ioc_error = 0; ioc->ioc_rval = 0; @@ -500,7 +526,7 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) #ifdef APPLETALK_DEBUG kprintf("sleep gref = 0x%x\n", (unsigned)gref); #endif - err = tsleep(&gref->iocevent, PSOCK | PCATCH, "AT ioctl", 0); + err = msleep(&gref->iocevent, atalk_mutex, PSOCK | PCATCH, "AT ioctl", 0); gref->sevents &= ~POLLPRI; if (err != 0) { ATENABLE(s, gref->lock); @@ -527,19 +553,19 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) /* process the ioc response */ ioc = (ioc_t *) gbuf_rptr(m); if ((err = ioc->ioc_error) == 0) { - ioccmd.ic_timout = ioc->ioc_rval; - ioccmd.ic_len = 0; + user_ioccmd.ic_timout = ioc->ioc_rval; + user_ioccmd.ic_len = 0; mdata = gbuf_cont(m); - if (mdata && ioccmd.ic_dp) { - ioccmd.ic_len = gbuf_msgsize(mdata); + if (mdata && user_ioccmd.ic_dp) { + user_ioccmd.ic_len = gbuf_msgsize(mdata); for (len = 0; mdata; mdata = gbuf_cont(mdata)) { if (fromKernel) - bcopy (gbuf_rptr(mdata), &ioccmd.ic_dp[len], gbuf_len(mdata)); + bcopy (gbuf_rptr(mdata), CAST_DOWN(caddr_t, (user_ioccmd.ic_dp + len)), gbuf_len(mdata)); else { - if ((err = copyout((caddr_t)gbuf_rptr(mdata), (caddr_t)&ioccmd.ic_dp[len], gbuf_len(mdata))) < 0) { + if ((err = copyout((caddr_t)gbuf_rptr(mdata), (user_ioccmd.ic_dp + len), gbuf_len(mdata))) < 0) { #ifdef APPLETALK_DEBUG kprintf("at_ioctl: len=%d error copyout=%d from=%x to=%x gbuf_len=%x\n", - len, err, (caddr_t)gbuf_rptr(mdata), (caddr_t)&ioccmd.ic_dp[len], gbuf_len(mdata)); + len, err, (caddr_t)gbuf_rptr(mdata), (caddr_t)&user_ioccmd.ic_dp[len], gbuf_len(mdata)); #endif goto l_done; } @@ -548,14 +574,21 @@ int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) } } - if (fromKernel) - bcopy (&ioccmd, arg, sizeof(ioccmd_t)); + if (fromKernel) { + ioccmd_t tmp; + ioccmd_t_64_to_32(&user_ioccmd, &tmp); + bcopy (&tmp, CAST_DOWN(caddr_t, user_arg), sizeof(tmp)); + } else { - if ((err = copyout((caddr_t)&ioccmd, (caddr_t)arg, sizeof(ioccmd_t))) != 0) { -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: error copyout2=%d from=%x to=%x len=%d\n", - err, &ioccmd, arg, sizeof(ioccmd_t)); -#endif + if (is64bit) { + err = copyout((caddr_t)&user_ioccmd, user_arg, sizeof(user_ioccmd)); + } + else { + ioccmd_t tmp; + ioccmd_t_64_to_32(&user_ioccmd, &tmp); + err = copyout((caddr_t)&tmp, user_arg, sizeof(tmp)); + } + if (err != 0) { goto l_done; } } @@ -576,8 +609,9 @@ int _ATioctl(fp, cmd, arg, proc) int err; gref_t *gref; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if ((err = atalk_getref(fp, 0, &gref, 0)) != 0) { + atalk_lock(); + /* No need to get a reference on fp as it already has one */ + if ((err = atalk_getref_locked(fp, 0, &gref, 0, 0)) != 0) { #ifdef APPLETALK_DEBUG kprintf("_ATioctl: atalk_getref err = %d\n", err); #endif @@ -585,13 +619,13 @@ int _ATioctl(fp, cmd, arg, proc) else err = at_ioctl(gref, cmd, arg, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + atalk_unlock(); return err; } int _ATselect(fp, which, wql, proc) - struct file *fp; + struct fileproc *fp; int which; void * wql; struct proc *proc; @@ -599,9 +633,10 @@ int _ATselect(fp, which, wql, proc) int s, err, rc = 0; gref_t *gref; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - err = atalk_getref(fp, 0, &gref, 0); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + atalk_lock(); + /* no need to drop the iocount as select covers that */ + err = atalk_getref_locked(fp, 0, &gref, 0, 0); + atalk_unlock(); if (err != 0) rc = 1; @@ -633,7 +668,7 @@ int _ATselect(fp, which, wql, proc) } int _ATkqfilter(fp, kn, p) - struct file *fp; + struct fileproc *fp; struct knote *kn; struct proc *p; { @@ -1317,3 +1352,20 @@ void ioc_ack(errno, m, gref) atalk_putnext(gref, m); } + +static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p ) +{ + to_p->ic_cmd = from_p->ic_cmd; + to_p->ic_timout = from_p->ic_timout; + to_p->ic_len = from_p->ic_len; + to_p->ic_dp = CAST_USER_ADDR_T(from_p->ic_dp); +} + + +static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p ) +{ + to_p->ic_cmd = from_p->ic_cmd; + to_p->ic_timout = from_p->ic_timout; + to_p->ic_len = from_p->ic_len; + to_p->ic_dp = CAST_DOWN(caddr_t, from_p->ic_dp); +} diff --git a/bsd/netat/sysglue.h b/bsd/netat/sysglue.h index 4235ac253..6aa9e37c6 100644 --- a/bsd/netat/sysglue.h +++ b/bsd/netat/sysglue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,6 +35,9 @@ #ifndef _NETAT_SYSGLUE_H_ #define _NETAT_SYSGLUE_H_ #include +#include + +#ifdef __APPLE_API_OBSOLETE /* The following is originally from netat/h/localglue.h, which was @@ -48,6 +51,31 @@ typedef struct { char *ic_dp; } ioccmd_t; +#ifdef KERNEL +#ifdef KERNEL_PRIVATE + +/* LP64 version of ioccmd_t. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with ioccmd_t + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +typedef struct { + int ic_cmd; + int ic_timout; + int ic_len; + user_addr_t ic_dp; +} user_ioccmd_t; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL_PRIVATE +#endif // KERNEL + typedef struct { int ioc_cmd; void *ioc_cr; @@ -89,7 +117,7 @@ typedef struct { #endif #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define SYS_HZ HZ /* Number of clock (SYS_SETTIMER) ticks per second */ #define HZ hz /* HZ ticks definition used throughout AppleTalk */ @@ -100,7 +128,6 @@ typedef struct { * in MacOSX. Need to find a better Error code ###LD */ #define ENOTREADY ESHUTDOWN -#define EPROTO EPROTOTYPE /* T_MPSAFE is used only in atp_open. I suspect it's a * trick to accelerate local atp transactions. @@ -130,11 +157,11 @@ typedef struct { #endif typedef int atevent_t; -typedef simple_lock_t atlock_t; +typedef int atlock_t; typedef int *atomic_p; #define ATLOCKINIT(a) (a = (atlock_t) EVENT_NULL) -#define ATDISABLE(l, a) (l = splimp()) -#define ATENABLE(l, a) splx(l) +#define ATDISABLE(l, a) +#define ATENABLE(l, a) #define ATEVENTINIT(a) (a = (atevent_t) EVENT_NULL) #define DDP_OUTPUT(m) ddp_putmsg(0,m) #define StaticProc static @@ -187,6 +214,7 @@ int gbuf_msgsize(gbuf_t *m); #undef timeout #undef untimeout -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* KERNEL */ +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_SYSGLUE_H_ */ diff --git a/bsd/netat/zip.h b/bsd/netat/zip.h index 457c246cb..fd0b38a5f 100644 --- a/bsd/netat/zip.h +++ b/bsd/netat/zip.h @@ -32,6 +32,8 @@ #define _NETAT_ZIP_H_ #include +#ifdef __APPLE_API_OBSOLETE + /* Definitions for ZIP, per AppleTalk Zone Information Protocol * documentation from `Inside AppleTalk', July 14, 1986. */ @@ -90,4 +92,5 @@ typedef struct { #define ZIP_RE_AARP -1 +#endif /* __APPLE_API_OBSOLETE */ #endif /* _NETAT_ZIP_H_ */ diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile index a680385a0..de3d2890a 100644 --- a/bsd/netinet/Makefile +++ b/bsd/netinet/Makefile @@ -20,29 +20,36 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - bootp.h icmp6.h icmp_var.h if_ether.h \ - igmp.h igmp_var.h in.h in_gif.h in_pcb.h \ - in_systm.h in_var.h ip.h ip6.h ip_compat.h \ - ip_dummynet.h ip_ecn.h ip_encap.h \ - ip_fw.h ip_icmp.h ip_mroute.h \ - ip_var.h tcp.h \ - tcp_debug.h tcp_fsm.h tcp_seq.h tcp_timer.h tcp_var.h \ + bootp.h icmp6.h if_ether.h icmp_var.h \ + igmp.h igmp_var.h in.h in_pcb.h \ + in_systm.h in_var.h ip.h ip6.h \ + ip_fw.h ip_fw2.h \ + ip_icmp.h ip_mroute.h ip_var.h tcp.h \ + tcp_fsm.h tcp_seq.h tcp_timer.h tcp_var.h \ tcpip.h udp.h udp_var.h +KERNELFILES = \ + kpi_ipfilter.h in_arp.h + PRIVATE_DATAFILES = \ - if_fddi.h if_atm.h ip_flow.h + if_fddi.h if_atm.h ip_dummynet.h \ + tcp_debug.h \ + in_gif.h ip_compat.h + +PRIVATE_KERNELFILES = ${KERNELFILES} \ + ip_ecn.h ip_encap.h ip_flow.h INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = netinet -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${DATAFILES} ${KERNELFILES} -EXPORT_MI_DIR = netinet +EXPORT_MI_DIR = ${INSTALL_MI_DIR} INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netinet/bootp.h b/bsd/netinet/bootp.h index 4de5d87cd..8cbbfa7c9 100644 --- a/bsd/netinet/bootp.h +++ b/bsd/netinet/bootp.h @@ -33,6 +33,8 @@ * packet. */ +#include + #define iaddr_t struct in_addr struct bootp { diff --git a/bsd/netinet/dhcp_options.h b/bsd/netinet/dhcp_options.h index cbef0e298..8c5daabde 100644 --- a/bsd/netinet/dhcp_options.h +++ b/bsd/netinet/dhcp_options.h @@ -174,7 +174,7 @@ typedef struct { typedef ptrlist_t dhcpol_t; -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE void dhcpol_init(dhcpol_t * list); void dhcpol_free(dhcpol_t * list); @@ -194,5 +194,5 @@ boolean_t dhcpol_parse_vendor(dhcpol_t * vendor, dhcpol_t * options, unsigned char * err); void dhcpol_print(dhcpol_t * list); -#endif /* __APPLE_API_PRIVATE */ +#endif KERNEL_PRIVATE #endif /* _NETINET_DHCP_OPTIONS_H */ diff --git a/bsd/netinet/icmp6.h b/bsd/netinet/icmp6.h index aaa2ee50d..87380d493 100644 --- a/bsd/netinet/icmp6.h +++ b/bsd/netinet/icmp6.h @@ -506,7 +506,6 @@ struct icmp6_filter { }; #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE #define ICMP6_FILTER_SETPASSALL(filterp) \ do { \ int i; u_char *p; \ @@ -516,7 +515,6 @@ do { \ } while (0) #define ICMP6_FILTER_SETBLOCKALL(filterp) \ bzero(filterp, sizeof(struct icmp6_filter)) -#endif /* __APPLE_API_UNSTABLE */ #else /* KERNEL */ #define ICMP6_FILTER_SETPASSALL(filterp) \ memset(filterp, 0xff, sizeof(struct icmp6_filter)) @@ -533,7 +531,6 @@ do { \ #define ICMP6_FILTER_WILLBLOCK(type, filterp) \ ((((filterp)->icmp6_filt[(type) >> 5]) & (1 << ((type) & 31))) == 0) -#ifdef __APPLE_API_UNSTABLE /* * Variables related to this implementation * of the internet control message protocol version 6. @@ -618,6 +615,7 @@ struct icmp6stat { #define ICMPV6CTL_ND6_PRLIST 20 #define ICMPV6CTL_MAXID 21 +#ifdef KERNEL_PRIVATE #define ICMPV6CTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ @@ -641,29 +639,26 @@ struct icmp6stat { { 0, 0 }, \ { 0, 0 }, \ } -#endif /* __APPLE_API_UNSTABLE */ #define RTF_PROBEMTU RTF_PROTO1 -#ifdef KERNEL # ifdef __STDC__ struct rtentry; struct rttimer; struct in6_multi; # endif -#ifdef __APPLE_API_PRIVATE -void icmp6_init __P((void)); -void icmp6_paramerror __P((struct mbuf *, int)); -void icmp6_error __P((struct mbuf *, int, int, int)); -int icmp6_input __P((struct mbuf **, int *)); -void icmp6_fasttimo __P((void)); -void icmp6_reflect __P((struct mbuf *, size_t)); -void icmp6_prepare __P((struct mbuf *)); -void icmp6_redirect_input __P((struct mbuf *, int)); -void icmp6_redirect_output __P((struct mbuf *, struct rtentry *)); +void icmp6_init(void); +void icmp6_paramerror(struct mbuf *, int); +void icmp6_error(struct mbuf *, int, int, int); +int icmp6_input(struct mbuf **, int *); +void icmp6_fasttimo(void); +void icmp6_reflect(struct mbuf *, size_t); +void icmp6_prepare(struct mbuf *); +void icmp6_redirect_input(struct mbuf *, int); +void icmp6_redirect_output(struct mbuf *, struct rtentry *); struct ip6ctlparam; -void icmp6_mtudisc_update __P((struct ip6ctlparam *, int)); +void icmp6_mtudisc_update(struct ip6ctlparam *, int); /* XXX: is this the right place for these macros? */ #define icmp6_ifstat_inc(ifp, tag) \ @@ -730,7 +725,6 @@ do { \ extern int icmp6_rediraccept; /* accept/process redirects */ extern int icmp6_redirtimeout; /* cache time for redirect routes */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE #endif /* !_NETINET_ICMP6_H_ */ diff --git a/bsd/netinet/icmp_var.h b/bsd/netinet/icmp_var.h index cc55d4fd1..ec3a8ef5c 100644 --- a/bsd/netinet/icmp_var.h +++ b/bsd/netinet/icmp_var.h @@ -58,7 +58,6 @@ #ifndef _NETINET_ICMP_VAR_H_ #define _NETINET_ICMP_VAR_H_ #include -#ifdef __APPLE_API_UNSTABLE /* * Variables related to this implementation @@ -90,6 +89,7 @@ struct icmpstat { #define ICMPCTL_TIMESTAMP 4 /* allow replies to time stamp requests */ #define ICMPCTL_MAXID 5 +#ifdef KERNEL_PRIVATE #define ICMPCTL_NAMES { \ { 0, 0 }, \ { "maskrepl", CTLTYPE_INT }, \ @@ -97,13 +97,10 @@ struct icmpstat { { "icmplim", CTLTYPE_INT }, \ { "icmptimestamp", CTLTYPE_INT }, \ } -#endif /* __APPLE_API_UNSTABLE */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE SYSCTL_DECL(_net_inet_icmp); #ifdef ICMP_BANDLIM -extern int badport_bandlim __P((int)); +extern int badport_bandlim(int); #endif #define BANDLIM_UNLIMITED -1 #define BANDLIM_ICMP_UNREACH 0 @@ -112,6 +109,5 @@ extern int badport_bandlim __P((int)); #define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ #define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ #define BANDLIM_MAX 4 -#endif /* __APPLE_API_PRIVATE */ -#endif -#endif +#endif KERNEL_PRIVATE +#endif _NETINET_ICMP_VAR_H_ diff --git a/bsd/netinet/if_atm.h b/bsd/netinet/if_atm.h index 9c31d421b..e1b147905 100644 --- a/bsd/netinet/if_atm.h +++ b/bsd/netinet/if_atm.h @@ -59,13 +59,13 @@ */ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct atm_pseudohdr; struct mbuf; struct rtentry; struct sockaddr; -void atm_rtrequest __P((int, struct rtentry *, struct sockaddr *)); -int atmresolve __P((struct rtentry *, struct mbuf *, struct sockaddr *, - struct atm_pseudohdr *)); -#endif /* __APPLE_API_PRIVATE */ +void atm_rtrequest(int, struct rtentry *, struct sockaddr *); +int atmresolve(struct rtentry *, struct mbuf *, struct sockaddr *, + struct atm_pseudohdr *); +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/if_ether.c b/bsd/netinet/if_ether.c deleted file mode 100644 index 499057940..000000000 --- a/bsd/netinet/if_ether.c +++ /dev/null @@ -1,923 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 - * $FreeBSD: src/sys/netinet/if_ether.c,v 1.64.2.11 2001/07/25 17:27:56 jlemon Exp $ - */ - -/* - * Ethernet address resolution protocol. - * TODO: - * add "inuse/lock" bit (or ref. count) along with valid bit - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#if BRIDGE -#include -#include -#endif - -#include -#include -#include - -#include - -#define SIN(s) ((struct sockaddr_in *)s) -#define SDL(s) ((struct sockaddr_dl *)s) - -SYSCTL_DECL(_net_link_ether); -SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); - -/* timer values */ -static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ -static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ -static int arpt_down = 20; /* once declared down, don't send for 20 sec */ - -/* Apple Hardware SUM16 checksuming */ -int apple_hwcksum_tx = 1; -int apple_hwcksum_rx = 1; - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW, - &arpt_prune, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, - &arpt_keep, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW, - &arpt_down, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx, CTLFLAG_RW, - &apple_hwcksum_tx, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, CTLFLAG_RW, - &apple_hwcksum_rx, 0, ""); - -#define rt_expire rt_rmx.rmx_expire - -struct llinfo_arp { - LIST_ENTRY(llinfo_arp) la_le; - struct rtentry *la_rt; - struct mbuf *la_hold; /* last packet until resolved/timeout */ - long la_asked; /* last time we QUERIED for this addr */ -#define la_timer la_rt->rt_rmx.rmx_expire /* deletion time in seconds */ -}; - -static LIST_HEAD(, llinfo_arp) llinfo_arp; - -struct ifqueue arpintrq = {0, 0, 0, 50}; -static int arp_inuse, arp_allocated; - -static int arp_maxtries = 5; -static int useloopback = 1; /* use loopback interface for local traffic */ -static int arp_proxyall = 0; -static int arp_init_called = 0; - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, - &arp_maxtries, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, - &useloopback, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, - &arp_proxyall, 0, ""); - -void arp_rtrequest __P((int, struct rtentry *, struct sockaddr *)); -static void arprequest __P((struct arpcom *, - struct in_addr *, struct in_addr *, u_char *)); -void arpintr __P((void)); -static void arptfree __P((struct llinfo_arp *)); -static void arptimer __P((void *)); -static u_char etherbroadcastaddr[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -static struct llinfo_arp - *arplookup __P((u_long, int, int)); -#if INET -static void in_arpinput __P((struct mbuf *)); -#endif - -/* - * Timeout routine. Age arp_tab entries periodically. - */ -/* ARGSUSED */ -static void -arptimer(ignored_arg) - void *ignored_arg; -{ -#ifdef __APPLE__ - boolean_t funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - int s = splnet(); - register struct llinfo_arp *la = llinfo_arp.lh_first; - struct llinfo_arp *ola; - - timeout(arptimer, (caddr_t)0, arpt_prune * hz); - while ((ola = la) != 0) { - register struct rtentry *rt = la->la_rt; - la = la->la_le.le_next; - if (rt->rt_expire && rt->rt_expire <= time_second) - arptfree(ola); /* timer has expired, clear */ - } - splx(s); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - -/* - * Parallel to llc_rtrequest. - */ -void -arp_rtrequest(req, rt, sa) - int req; - register struct rtentry *rt; - struct sockaddr *sa; -{ - register struct sockaddr *gate = rt->rt_gateway; - register struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; - static int arpinit_done; - - if (!arpinit_done) { - arpinit_done = 1; - LIST_INIT(&llinfo_arp); - timeout(arptimer, (caddr_t)0, hz); -#ifndef __APPLE__ - register_netisr(NETISR_ARP, arpintr); -#endif - } - if (rt->rt_flags & RTF_GATEWAY) - return; - switch (req) { - - case RTM_ADD: - /* - * XXX: If this is a manually added route to interface - * such as older version of routed or gated might provide, - * restore cloning bit. - */ - if ((rt->rt_flags & RTF_HOST) == 0 && - SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) - rt->rt_flags |= RTF_CLONING; - if (rt->rt_flags & RTF_CLONING) { - /* - * Case 1: This route should come from a route to iface. - */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = rt->rt_ifp->if_type; - SDL(gate)->sdl_index = rt->rt_ifp->if_index; - rt->rt_expire = time_second; - break; - } - /* Announce a new entry if requested. */ - if (rt->rt_flags & RTF_ANNOUNCE) - arprequest((struct arpcom *)rt->rt_ifp, - &SIN(rt_key(rt))->sin_addr, - &SIN(rt_key(rt))->sin_addr, - (u_char *)LLADDR(SDL(gate))); - /*FALLTHROUGH*/ - case RTM_RESOLVE: - if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { - log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); - break; - } - SDL(gate)->sdl_type = rt->rt_ifp->if_type; - SDL(gate)->sdl_index = rt->rt_ifp->if_index; - if (la != 0) - break; /* This happens on a route change */ - /* - * Case 2: This route may come from cloning, or a manual route - * add with a LL address. - */ - R_Malloc(la, struct llinfo_arp *, sizeof(*la)); - rt->rt_llinfo = (caddr_t)la; - if (la == 0) { - log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); - break; - } - arp_inuse++, arp_allocated++; - Bzero(la, sizeof(*la)); - la->la_rt = rt; - rt->rt_flags |= RTF_LLINFO; - LIST_INSERT_HEAD(&llinfo_arp, la, la_le); - -#if INET - /* - * This keeps the multicast addresses from showing up - * in `arp -a' listings as unresolved. It's not actually - * functional. Then the same for broadcast. - */ - if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { - ETHER_MAP_IP_MULTICAST(&SIN(rt_key(rt))->sin_addr, - LLADDR(SDL(gate))); - SDL(gate)->sdl_alen = 6; - rt->rt_expire = 0; - } - if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { - memcpy(LLADDR(SDL(gate)), etherbroadcastaddr, 6); - SDL(gate)->sdl_alen = 6; - rt->rt_expire = time_second; - } -#endif - - if (SIN(rt_key(rt))->sin_addr.s_addr == - (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { - /* - * This test used to be - * if (loif.if_flags & IFF_UP) - * It allowed local traffic to be forced - * through the hardware by configuring the loopback down. - * However, it causes problems during network configuration - * for boards that can't receive packets they send. - * It is now necessary to clear "useloopback" and remove - * the route to force traffic out to the hardware. - */ - rt->rt_expire = 0; - Bcopy(((struct arpcom *)rt->rt_ifp)->ac_enaddr, - LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6); - if (useloopback) - rt->rt_ifp = loif; - - } - break; - - case RTM_DELETE: - if (la == 0) - break; - arp_inuse--; - LIST_REMOVE(la, la_le); - rt->rt_llinfo = 0; - rt->rt_flags &= ~RTF_LLINFO; - if (la->la_hold) - m_freem(la->la_hold); - Free((caddr_t)la); - } -} - -/* - * Broadcast an ARP packet, asking who has addr on interface ac. - */ -void -arpwhohas(ac, addr) - struct arpcom *ac; - struct in_addr *addr; -{ - struct ifnet *ifp = (struct ifnet *)ac; - struct ifaddr *ifa = TAILQ_FIRST(&ifp->if_addrhead); - - while (ifa) { - if (ifa->ifa_addr->sa_family == AF_INET) { - arprequest(ac, &SIN(ifa->ifa_addr)->sin_addr, addr, ac->ac_enaddr); - return; - } - ifa = TAILQ_NEXT(ifa, ifa_link); - } - return; /* XXX */ -} - -/* - * Broadcast an ARP request. Caller specifies: - * - arp header source ip address - * - arp header target ip address - * - arp header source ethernet address - */ -static void -arprequest(ac, sip, tip, enaddr) - register struct arpcom *ac; - register struct in_addr *sip, *tip; - register u_char *enaddr; -{ - register struct mbuf *m; - register struct ether_header *eh; - register struct ether_arp *ea; - struct sockaddr sa; - static u_char llcx[] = { 0x82, 0x40, LLC_SNAP_LSAP, LLC_SNAP_LSAP, - LLC_UI, 0x00, 0x00, 0x00, 0x08, 0x06 }; - - if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) - return; - m->m_pkthdr.rcvif = (struct ifnet *)0; - switch (ac->ac_if.if_type) { - case IFT_ISO88025: - m->m_len = sizeof(*ea) + sizeof(llcx); - m->m_pkthdr.len = sizeof(*ea) + sizeof(llcx); - MH_ALIGN(m, sizeof(*ea) + sizeof(llcx)); - (void)memcpy(mtod(m, caddr_t), llcx, sizeof(llcx)); - (void)memcpy(sa.sa_data, etherbroadcastaddr, 6); - (void)memcpy(sa.sa_data + 6, enaddr, 6); - sa.sa_data[6] |= TR_RII; - sa.sa_data[12] = TR_AC; - sa.sa_data[13] = TR_LLC_FRAME; - ea = (struct ether_arp *)(mtod(m, char *) + sizeof(llcx)); - bzero((caddr_t)ea, sizeof (*ea)); - ea->arp_hrd = htons(ARPHRD_IEEE802); - break; - case IFT_FDDI: - case IFT_ETHER: - /* - * This may not be correct for types not explicitly - * listed, but this is our best guess - */ - default: - m->m_len = sizeof(*ea); - m->m_pkthdr.len = sizeof(*ea); - MH_ALIGN(m, sizeof(*ea)); - ea = mtod(m, struct ether_arp *); - eh = (struct ether_header *)sa.sa_data; - bzero((caddr_t)ea, sizeof (*ea)); - /* if_output will not swap */ - eh->ether_type = htons(ETHERTYPE_ARP); - (void)memcpy(eh->ether_dhost, etherbroadcastaddr, - sizeof(eh->ether_dhost)); - ea->arp_hrd = htons(ARPHRD_ETHER); - break; - } - ea->arp_pro = htons(ETHERTYPE_IP); - ea->arp_hln = sizeof(ea->arp_sha); /* hardware address length */ - ea->arp_pln = sizeof(ea->arp_spa); /* protocol address length */ - ea->arp_op = htons(ARPOP_REQUEST); - (void)memcpy(ea->arp_sha, enaddr, sizeof(ea->arp_sha)); - (void)memcpy(ea->arp_spa, sip, sizeof(ea->arp_spa)); - (void)memcpy(ea->arp_tpa, tip, sizeof(ea->arp_tpa)); - sa.sa_family = AF_UNSPEC; - sa.sa_len = sizeof(sa); - dlil_output(ifptodlt(((struct ifnet *)ac), PF_INET), m, 0, &sa, 0); -} - -/* - * Resolve an IP address into an ethernet address. If success, - * desten is filled in. If there is no entry in arptab, - * set one up and broadcast a request for the IP address. - * Hold onto this mbuf and resend it once the address - * is finally resolved. A return value of 1 indicates - * that desten has been filled in and the packet should be sent - * normally; a 0 return indicates that the packet has been - * taken over here, either now or for later transmission. - */ -int -arpresolve(ac, rt, m, dst, desten, rt0) - register struct arpcom *ac; - register struct rtentry *rt; - struct mbuf *m; - register struct sockaddr *dst; - register u_char *desten; - struct rtentry *rt0; -{ - struct llinfo_arp *la = 0; - struct sockaddr_dl *sdl; - - if (m->m_flags & M_BCAST) { /* broadcast */ - (void)memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr)); - return (1); - } - if (m->m_flags & M_MCAST) { /* multicast */ - ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); - return(1); - } - if (rt) - la = (struct llinfo_arp *)rt->rt_llinfo; - if (la == 0) { - la = arplookup(SIN(dst)->sin_addr.s_addr, 1, 0); - if (la) - rt = la->la_rt; - } - if (la == 0 || rt == 0) { - log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s%s%s\n", - inet_ntoa(SIN(dst)->sin_addr), la ? "la" : "", - rt ? "rt" : ""); - m_freem(m); - return (0); - } - sdl = SDL(rt->rt_gateway); - /* - * Check the address family and length is valid, the address - * is resolved; otherwise, try to resolve. - */ - if ((rt->rt_expire == 0 || rt->rt_expire > time_second) && - sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) { - bcopy(LLADDR(sdl), desten, sdl->sdl_alen); - return 1; - } - /* - * If ARP is disabled on this interface, stop. - * XXX - * Probably should not allocate empty llinfo struct if we are - * not going to be sending out an arp request. - */ - if (ac->ac_if.if_flags & IFF_NOARP) - return (0); - /* - * There is an arptab entry, but no ethernet address - * response yet. Replace the held mbuf with this - * latest one. - */ - if (la->la_hold) - m_freem(la->la_hold); - la->la_hold = m; - if (rt->rt_expire) { - rt->rt_flags &= ~RTF_REJECT; - if (la->la_asked == 0 || rt->rt_expire != time_second) { - rt->rt_expire = time_second; - if (la->la_asked++ < arp_maxtries) - arprequest(ac, - &SIN(rt->rt_ifa->ifa_addr)->sin_addr, - &SIN(dst)->sin_addr, ac->ac_enaddr); - else { - rt->rt_flags |= RTF_REJECT; - rt->rt_expire += arpt_down; - la->la_asked = 0; - } - - } - } - return (0); -} - -/* - * Common length and type checks are done here, - * then the protocol-specific routine is called. - */ -void -arpintr() -{ - register struct mbuf *m; - register struct arphdr *ar; - int s; - - while (arpintrq.ifq_head) { - s = splimp(); - IF_DEQUEUE(&arpintrq, m); - splx(s); - if (m == 0 || (m->m_flags & M_PKTHDR) == 0) - panic("arpintr"); - - if (m->m_len < sizeof(struct arphdr) && - ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) { - log(LOG_ERR, "arp: runt packet -- m_pullup failed\n"); - continue; - } - ar = mtod(m, struct arphdr *); - - if (ntohs(ar->ar_hrd) != ARPHRD_ETHER - && ntohs(ar->ar_hrd) != ARPHRD_IEEE802) { - log(LOG_ERR, - "arp: unknown hardware address format (0x%2D)\n", - (unsigned char *)&ar->ar_hrd, ""); - m_freem(m); - continue; - } - - if (m->m_pkthdr.len < sizeof(struct arphdr) + 2 * ar->ar_hln - + 2 * ar->ar_pln) { - log(LOG_ERR, "arp: runt packet\n"); - m_freem(m); - continue; - } - - switch (ntohs(ar->ar_pro)) { -#ifdef INET - case ETHERTYPE_IP: - in_arpinput(m); - continue; -#endif - } - m_freem(m); - } -} - -#if INET -/* - * ARP for Internet protocols on 10 Mb/s Ethernet. - * Algorithm is that given in RFC 826. - * In addition, a sanity check is performed on the sender - * protocol address, to catch impersonators. - * We no longer handle negotiations for use of trailer protocol: - * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent - * along with IP replies if we wanted trailers sent to us, - * and also sent them in response to IP replies. - * This allowed either end to announce the desire to receive - * trailer packets. - * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, - * but formerly didn't normally send requests. - */ -static int log_arp_wrong_iface = 0; - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW, - &log_arp_wrong_iface, 0, - "log arp packets arriving on the wrong interface"); - -static void -in_arpinput(m) - struct mbuf *m; -{ - register struct ether_arp *ea; - register struct arpcom *ac = (struct arpcom *)m->m_pkthdr.rcvif; - struct ether_header *eh; - struct iso88025_header *th = (struct iso88025_header *)0; - register struct llinfo_arp *la = 0; - register struct rtentry *rt; - struct in_ifaddr *ia, *maybe_ia = 0; - struct sockaddr_dl *sdl; - struct sockaddr sa; - struct in_addr isaddr, itaddr, myaddr; - int op, rif_len; - unsigned char buf[18]; - unsigned char buf2[18]; - - if (m->m_len < sizeof(struct ether_arp) && - (m = m_pullup(m, sizeof(struct ether_arp))) == NULL) { - log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n"); - return; - } - - ea = mtod(m, struct ether_arp *); - op = ntohs(ea->arp_op); - (void)memcpy(&isaddr, ea->arp_spa, sizeof (isaddr)); - (void)memcpy(&itaddr, ea->arp_tpa, sizeof (itaddr)); - -#if __APPLE__ - /* Don't respond to requests for 0.0.0.0 */ - if (itaddr.s_addr == 0 && op == ARPOP_REQUEST) { - m_freem(m); - return; - } -#endif - - for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { - /* - * For a bridge, we want to check the address irrespective - * of the receive interface. (This will change slightly - * when we have clusters of interfaces). - */ -#if BRIDGE -#define BRIDGE_TEST (do_bridge) -#else -#define BRIDGE_TEST (0) /* cc will optimise the test away */ -#endif - if ((BRIDGE_TEST) || (ia->ia_ifp == &ac->ac_if)) { - maybe_ia = ia; - if ((itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) || - (isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)) { - break; - } - } - } - if (maybe_ia == 0) { - m_freem(m); - return; - } - myaddr = ia ? ia->ia_addr.sin_addr : maybe_ia->ia_addr.sin_addr; - if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)ac->ac_enaddr, - sizeof (ea->arp_sha))) { - m_freem(m); /* it's from me, ignore it. */ - return; - } - if (!bcmp((caddr_t)ea->arp_sha, (caddr_t)etherbroadcastaddr, - sizeof (ea->arp_sha))) { - log(LOG_ERR, - "arp: ether address is broadcast for IP address %s!\n", - inet_ntoa(isaddr)); - m_freem(m); - return; - } - if (isaddr.s_addr == myaddr.s_addr) { - struct kev_msg ev_msg; - struct kev_in_collision *in_collision; - u_char storage[sizeof(struct kev_in_collision) + 6]; - in_collision = (struct kev_in_collision*)storage; - - log(LOG_ERR, - "duplicate IP address %s sent from ethernet address %s\n", - inet_ntoa(isaddr), ether_sprintf(buf, ea->arp_sha)); - - /* Send a kernel event so anyone can learn of the conflict */ - in_collision->link_data.if_family = ac->ac_if.if_family; - in_collision->link_data.if_unit = ac->ac_if.if_unit; - strncpy(&in_collision->link_data.if_name[0], ac->ac_if.if_name, IFNAMSIZ); - in_collision->ia_ipaddr = isaddr; - in_collision->hw_len = ETHER_ADDR_LEN; - bcopy((caddr_t)ea->arp_sha, (caddr_t)in_collision->hw_addr, sizeof(ea->arp_sha)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; - ev_msg.event_code = KEV_INET_ARPCOLLISION; - ev_msg.dv[0].data_ptr = in_collision; - ev_msg.dv[0].data_length = sizeof(struct kev_in_collision) + 6; - ev_msg.dv[1].data_length = 0; - kev_post_msg(&ev_msg); - - itaddr = myaddr; - goto reply; - } - la = arplookup(isaddr.s_addr, itaddr.s_addr == myaddr.s_addr, 0); - if (la && (rt = la->la_rt) && (sdl = SDL(rt->rt_gateway))) { - /* the following is not an error when doing bridging */ - if (!BRIDGE_TEST && rt->rt_ifp != &ac->ac_if) { - if (log_arp_wrong_iface) - log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n", - inet_ntoa(isaddr), - rt->rt_ifp->if_name, rt->rt_ifp->if_unit, - ether_sprintf(buf, ea->arp_sha), - ac->ac_if.if_name, ac->ac_if.if_unit); - goto reply; - } - if (sdl->sdl_alen && - bcmp((caddr_t)ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) { - if (rt->rt_expire) - log(LOG_INFO, "arp: %s moved from %s to %s on %s%d\n", - inet_ntoa(isaddr), - ether_sprintf(buf, (u_char *)LLADDR(sdl)), - ether_sprintf(buf2, ea->arp_sha), - ac->ac_if.if_name, ac->ac_if.if_unit); - else { - log(LOG_ERR, - "arp: %s attempts to modify permanent entry for %s on %s%d", - ether_sprintf(buf, ea->arp_sha), inet_ntoa(isaddr), - ac->ac_if.if_name, ac->ac_if.if_unit); - goto reply; - } - } - (void)memcpy(LLADDR(sdl), ea->arp_sha, sizeof(ea->arp_sha)); - sdl->sdl_alen = sizeof(ea->arp_sha); -#ifndef __APPLE__ - /* TokenRing */ - sdl->sdl_rcf = (u_short)0; - /* - * If we receive an arp from a token-ring station over - * a token-ring nic then try to save the source - * routing info. - */ - if (ac->ac_if.if_type == IFT_ISO88025) { - th = (struct iso88025_header *)m->m_pkthdr.header; - rif_len = TR_RCF_RIFLEN(th->rcf); - if ((th->iso88025_shost[0] & TR_RII) && - (rif_len > 2)) { - sdl->sdl_rcf = th->rcf; - sdl->sdl_rcf ^= htons(TR_RCF_DIR); - memcpy(sdl->sdl_route, th->rd, rif_len - 2); - sdl->sdl_rcf &= ~htons(TR_RCF_BCST_MASK); - /* - * Set up source routing information for - * reply packet (XXX) - */ - m->m_data -= rif_len; - m->m_len += rif_len; - m->m_pkthdr.len += rif_len; - } else { - th->iso88025_shost[0] &= ~TR_RII; - } - m->m_data -= 8; - m->m_len += 8; - m->m_pkthdr.len += 8; - th->rcf = sdl->sdl_rcf; - } -#endif - if (rt->rt_expire) - rt->rt_expire = time_second + arpt_keep; - rt->rt_flags &= ~RTF_REJECT; - la->la_asked = 0; - if (la->la_hold) { - dlil_output(((struct ifnet *)ac)->if_data.default_proto, la->la_hold, rt, - rt_key(rt), 0); - la->la_hold = 0; - } - } -reply: - if (op != ARPOP_REQUEST) { - m_freem(m); - return; - } - if (itaddr.s_addr == myaddr.s_addr) { - /* I am the target */ - (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha)); - (void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha)); - } else { - la = arplookup(itaddr.s_addr, 0, SIN_PROXY); - if (la == NULL) { - struct sockaddr_in sin; - - if (!arp_proxyall) { - m_freem(m); - return; - } - - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof sin; - sin.sin_addr = itaddr; - - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); - if (!rt) { - m_freem(m); - return; - } - /* - * Don't send proxies for nodes on the same interface - * as this one came out of, or we'll get into a fight - * over who claims what Ether address. - */ - if (rt->rt_ifp == &ac->ac_if) { - rtfree(rt); - m_freem(m); - return; - } - (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha)); - (void)memcpy(ea->arp_sha, ac->ac_enaddr, sizeof(ea->arp_sha)); - rtfree(rt); -#if DEBUG_PROXY - printf("arp: proxying for %s\n", - inet_ntoa(itaddr)); -#endif - } else { - rt = la->la_rt; - (void)memcpy(ea->arp_tha, ea->arp_sha, sizeof(ea->arp_sha)); - sdl = SDL(rt->rt_gateway); - (void)memcpy(ea->arp_sha, LLADDR(sdl), sizeof(ea->arp_sha)); - } - } - - (void)memcpy(ea->arp_tpa, ea->arp_spa, sizeof(ea->arp_spa)); - (void)memcpy(ea->arp_spa, &itaddr, sizeof(ea->arp_spa)); - ea->arp_op = htons(ARPOP_REPLY); - ea->arp_pro = htons(ETHERTYPE_IP); /* let's be sure! */ - switch (ac->ac_if.if_type) { - case IFT_ISO88025: - /* Re-arrange the source/dest address */ - memcpy(th->iso88025_dhost, th->iso88025_shost, - sizeof(th->iso88025_dhost)); - memcpy(th->iso88025_shost, ac->ac_enaddr, - sizeof(th->iso88025_shost)); - /* Set the source routing bit if neccesary */ - if (th->iso88025_dhost[0] & TR_RII) { - th->iso88025_dhost[0] &= ~TR_RII; - if (TR_RCF_RIFLEN(th->rcf) > 2) - th->iso88025_shost[0] |= TR_RII; - } - /* Copy the addresses, ac and fc into sa_data */ - memcpy(sa.sa_data, th->iso88025_dhost, - sizeof(th->iso88025_dhost) * 2); - sa.sa_data[(sizeof(th->iso88025_dhost) * 2)] = TR_AC; - sa.sa_data[(sizeof(th->iso88025_dhost) * 2) + 1] = TR_LLC_FRAME; - break; - case IFT_ETHER: - case IFT_FDDI: - /* - * May not be correct for types not explictly - * listed, but it is our best guess. - */ - default: - eh = (struct ether_header *)sa.sa_data; -#ifdef __APPLE__ - if (IN_LINKLOCAL(ntohl(*((u_int32_t*)ea->arp_spa)))) - (void)memcpy(eh->ether_dhost, etherbroadcastaddr, sizeof(eh->ether_dhost)); - else -#endif - (void)memcpy(eh->ether_dhost, ea->arp_tha, sizeof(eh->ether_dhost)); - eh->ether_type = htons(ETHERTYPE_ARP); - break; - } - sa.sa_family = AF_UNSPEC; - sa.sa_len = sizeof(sa); - dlil_output(((struct ifnet *)ac)->if_data.default_proto, m, 0, &sa, 0); - return; -} -#endif - -/* - * Free an arp entry. - */ -static void -arptfree(la) - register struct llinfo_arp *la; -{ - register struct rtentry *rt = la->la_rt; - register struct sockaddr_dl *sdl; - if (rt == 0) - panic("arptfree"); - if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && - sdl->sdl_family == AF_LINK) { - sdl->sdl_alen = 0; - la->la_asked = 0; - rt->rt_flags &= ~RTF_REJECT; - return; - } - rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), - 0, (struct rtentry **)0); -} -/* - * Lookup or enter a new address in arptab. - */ -static struct llinfo_arp * -arplookup(addr, create, proxy) - u_long addr; - int create, proxy; -{ - register struct rtentry *rt; - static struct sockaddr_inarp sin = {sizeof(sin), AF_INET }; - const char *why = 0; - - sin.sin_addr.s_addr = addr; - sin.sin_other = proxy ? SIN_PROXY : 0; - rt = rtalloc1((struct sockaddr *)&sin, create, 0UL); - if (rt == 0) - return (0); - rtunref(rt); - - if (rt->rt_flags & RTF_GATEWAY) { - why = "host is not on local network"; - - /* If there are no references to this route, purge it */ - if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_WASCLONED) != 0) { - rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); - } - } - else if ((rt->rt_flags & RTF_LLINFO) == 0) - why = "could not allocate llinfo"; - else if (rt->rt_gateway->sa_family != AF_LINK) - why = "gateway route is not ours"; - - if (why && create) { - log(LOG_DEBUG, "arplookup %s failed: %s\n", - inet_ntoa(sin.sin_addr), why); - return 0; - } else if (why) { - return 0; - } - return ((struct llinfo_arp *)rt->rt_llinfo); -} - -void -arp_ifinit(ac, ifa) - struct arpcom *ac; - struct ifaddr *ifa; -{ - if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) - arprequest(ac, &IA_SIN(ifa)->sin_addr, - &IA_SIN(ifa)->sin_addr, ac->ac_enaddr); - ifa->ifa_rtrequest = arp_rtrequest; - ifa->ifa_flags |= RTF_CLONING; -} diff --git a/bsd/netinet/if_ether.h b/bsd/netinet/if_ether.h index 0d9799b6b..fa65cfe8b 100644 --- a/bsd/netinet/if_ether.h +++ b/bsd/netinet/if_ether.h @@ -65,52 +65,6 @@ #include #define ea_byte ether_addr_octet -#ifdef __APPLE__ -#ifdef __APPLE_API_UNSTABLE -/* - * Macro for looking up the ether_multi record for a given range of Ethernet - * multicast addresses connected to a given arpcom structure. If no matching - * record is found, "enm" returns NULL. - */ -#define ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm) \ - /* u_char addrlo[6]; */ \ - /* u_char addrhi[6]; */ \ - /* struct arpcom *ac; */ \ - /* struct ether_multi *enm; */ \ -{ \ - for ((enm) = (ac)->ac_multiaddrs; \ - (enm) != NULL && \ - (bcmp((enm)->enm_addrlo, (addrlo), 6) != 0 || \ - bcmp((enm)->enm_addrhi, (addrhi), 6) != 0); \ - (enm) = (enm)->enm_next); \ -} - -/* - * Macro to step through all of the ether_multi records, one at a time. - * The current position is remembered in "step", which the caller must - * provide. ETHER_FIRST_MULTI(), below, must be called to initialize "step" - * and get the first record. Both macros return a NULL "enm" when there - * are no remaining records. - */ -#define ETHER_NEXT_MULTI(step, enm) \ - /* struct ether_multistep step; */ \ - /* struct ether_multi *enm; */ \ -{ \ - if (((enm) = (step).e_enm) != NULL) \ - (step).e_enm = (enm)->enm_next; \ -} - -#define ETHER_FIRST_MULTI(step, ac, enm) \ - /* struct ether_multistep step; */ \ - /* struct arpcom *ac; */ \ - /* struct ether_multi *enm; */ \ -{ \ - (step).e_enm = (ac)->ac_multiaddrs; \ - ETHER_NEXT_MULTI((step), (enm)); \ -} -#endif /* __APPLE_API_UNSTABLE */ -#endif /* __APPLE__ */ - /* * Macro to map an IP multicast address to an Ethernet multicast address. * The high-order 25 bits of the Ethernet address are statically assigned, @@ -123,9 +77,9 @@ (enaddr)[0] = 0x01; \ (enaddr)[1] = 0x00; \ (enaddr)[2] = 0x5e; \ - (enaddr)[3] = ((u_char *)ipaddr)[1] & 0x7f; \ - (enaddr)[4] = ((u_char *)ipaddr)[2]; \ - (enaddr)[5] = ((u_char *)ipaddr)[3]; \ + (enaddr)[3] = ((const u_char *)ipaddr)[1] & 0x7f; \ + (enaddr)[4] = ((const u_char *)ipaddr)[2]; \ + (enaddr)[5] = ((const u_char *)ipaddr)[3]; \ } /* * Macro to map an IP6 multicast address to an Ethernet multicast address. @@ -138,10 +92,10 @@ { \ (enaddr)[0] = 0x33; \ (enaddr)[1] = 0x33; \ - (enaddr)[2] = ((u_char *)ip6addr)[12]; \ - (enaddr)[3] = ((u_char *)ip6addr)[13]; \ - (enaddr)[4] = ((u_char *)ip6addr)[14]; \ - (enaddr)[5] = ((u_char *)ip6addr)[15]; \ + (enaddr)[2] = ((const u_char *)ip6addr)[12]; \ + (enaddr)[3] = ((const u_char *)ip6addr)[13]; \ + (enaddr)[4] = ((const u_char *)ip6addr)[14]; \ + (enaddr)[5] = ((const u_char *)ip6addr)[15]; \ } /* @@ -180,18 +134,14 @@ struct sockaddr_inarp { #define RTF_USETRAILERS RTF_PROTO1 /* use trailers */ #define RTF_ANNOUNCE RTF_PROTO2 /* announce new arp entry */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN]; extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; extern struct ifqueue arpintrq; -int arpresolve __P((struct arpcom *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *, struct rtentry *)); -#endif /* __APPLE_API_PRIVATE */ -#ifdef __APPLE_API_UNSTABLE -void arp_ifinit __P((struct arpcom *, struct ifaddr *)); -#endif /* __APPLE_API_UNSTABLE */ -#endif +int arpresolve(struct ifnet *, struct rtentry *, struct mbuf *, + struct sockaddr *, u_char *, struct rtentry *); +void arp_ifinit(struct ifnet *, struct ifaddr *); +#endif KERNEL_PRIVATE -#endif +#endif _NETINET_IF_ETHER_H_ diff --git a/bsd/netinet/if_fddi.h b/bsd/netinet/if_fddi.h index dd33f7311..ed9071d4c 100644 --- a/bsd/netinet/if_fddi.h +++ b/bsd/netinet/if_fddi.h @@ -91,8 +91,7 @@ struct fddi_header { #define FDDIFC_LLC_SYNC 0xd0 #define FDDIFC_SMT 0x40 -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define fddibroadcastaddr etherbroadcastaddr #define fddi_ipmulticast_min ether_ipmulticast_min #define fddi_ipmulticast_max ether_ipmulticast_max @@ -100,11 +99,10 @@ struct fddi_header { #define fddi_delmulti ether_delmulti #define fddi_sprintf ether_sprintf -void fddi_ifattach __P((struct ifnet *)); -void fddi_input __P((struct ifnet *, struct fddi_header *, struct mbuf *)); -int fddi_output __P((struct ifnet *, - struct mbuf *, struct sockaddr *, struct rtentry *)); -#endif /* __APPLE_API_PRIVATE */ -#endif +void fddi_ifattach(struct ifnet *); +void fddi_input(struct ifnet *, struct fddi_header *, struct mbuf *); +int fddi_output(struct ifnet *, + struct mbuf *, struct sockaddr *, struct rtentry *); +#endif KERNEL_PRIVATE -#endif +#endif _NETINET_IF_FDDI_H_ diff --git a/bsd/netinet/if_tun.h b/bsd/netinet/if_tun.h index 6ffd6734b..9f748c7a3 100644 --- a/bsd/netinet/if_tun.h +++ b/bsd/netinet/if_tun.h @@ -39,7 +39,7 @@ #define _NET_IF_TUN_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct tun_softc { u_short tun_flags; /* misc flags */ #define TUN_OPEN 0x0001 @@ -61,13 +61,14 @@ struct tun_softc { caddr_t tun_bpf; #endif }; -#endif /* __APPLE_API_PRIVATE */ -/* Maximum packet size */ -#define TUNMTU 1500 +#endif KERNEL_PRIVATE /* ioctl's for get/set debug */ #define TUNSDEBUG _IOW('t', 90, int) #define TUNGDEBUG _IOR('t', 89, int) +/* Maximum packet size */ +#define TUNMTU 1500 + #endif /* !_NET_IF_TUN_H_ */ diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c index 1430acc47..0f427e7e3 100644 --- a/bsd/netinet/igmp.c +++ b/bsd/netinet/igmp.c @@ -94,7 +94,7 @@ static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); #endif static struct router_info * - find_rti __P((struct ifnet *ifp)); + find_rti(struct ifnet *ifp); static struct igmpstat igmpstat; @@ -107,7 +107,7 @@ static u_long igmp_all_rtrs_group; static struct mbuf *router_alert; static struct router_info *Head; -static void igmp_sendpkt __P((struct in_multi *, int, unsigned long)); +static void igmp_sendpkt(struct in_multi *, int, unsigned long); void igmp_init() @@ -138,10 +138,10 @@ igmp_init() } static struct router_info * -find_rti(ifp) - struct ifnet *ifp; +find_rti( + struct ifnet *ifp) { - register struct router_info *rti = Head; + struct router_info *rti = Head; #if IGMP_DEBUG @@ -173,17 +173,17 @@ find_rti(ifp) } void -igmp_input(m, iphlen) - register struct mbuf *m; - register int iphlen; +igmp_input( + struct mbuf *m, + int iphlen) { - register struct igmp *igmp; - register struct ip *ip; - register int igmplen; - register struct ifnet *ifp = m->m_pkthdr.rcvif; - register int minlen; - register struct in_multi *inm; - register struct in_ifaddr *ia; + struct igmp *igmp; + struct ip *ip; + int igmplen; + struct ifnet *ifp = m->m_pkthdr.rcvif; + int minlen; + struct in_multi *inm; + struct in_ifaddr *ia; struct in_multistep step; struct router_info *rti; @@ -293,6 +293,7 @@ igmp_input(m, iphlen) * - Use the value specified in the query message as * the maximum timeout. */ + lck_mtx_lock(rt_mtx); IN_FIRST_MULTI(step, inm); while (inm != NULL) { if (inm->inm_ifp == ifp && @@ -308,6 +309,7 @@ igmp_input(m, iphlen) } IN_NEXT_MULTI(step, inm); } + lck_mtx_unlock(rt_mtx); break; @@ -350,7 +352,9 @@ igmp_input(m, iphlen) * If we belong to the group being reported, stop * our timer for that group. */ + ifnet_lock_shared(ifp); IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); + ifnet_lock_done(ifp); if (inm != NULL) { inm->inm_timer = 0; @@ -373,7 +377,6 @@ int igmp_joingroup(inm) struct in_multi *inm; { - int s = splnet(); if (inm->inm_addr.s_addr == igmp_all_hosts_group || inm->inm_ifp->if_flags & IFF_LOOPBACK) { @@ -389,7 +392,6 @@ igmp_joingroup(inm) igmp_timers_are_running = 1; } return 0; - splx(s); } void @@ -406,9 +408,8 @@ igmp_leavegroup(inm) void igmp_fasttimo() { - register struct in_multi *inm; + struct in_multi *inm; struct in_multistep step; - int s; /* * Quick check to see if any work needs to be done, in order @@ -418,7 +419,6 @@ igmp_fasttimo() if (!igmp_timers_are_running) return; - s = splnet(); igmp_timers_are_running = 0; IN_FIRST_MULTI(step, inm); while (inm != NULL) { @@ -432,14 +432,12 @@ igmp_fasttimo() } IN_NEXT_MULTI(step, inm); } - splx(s); } void igmp_slowtimo() { - int s = splnet(); - register struct router_info *rti = Head; + struct router_info *rti = Head; #if IGMP_DEBUG printf("[igmp.c,_slowtimo] -- > entering \n"); @@ -456,7 +454,6 @@ igmp_slowtimo() #if IGMP_DEBUG printf("[igmp.c,_slowtimo] -- > exiting \n"); #endif - splx(s); } static struct route igmprt; diff --git a/bsd/netinet/igmp_var.h b/bsd/netinet/igmp_var.h index b7e99ed14..435197b7c 100644 --- a/bsd/netinet/igmp_var.h +++ b/bsd/netinet/igmp_var.h @@ -62,7 +62,6 @@ #define _NETINET_IGMP_VAR_H_ #include -#ifdef __APPLE_API_UNSTABLE /* * Internet Group Management Protocol (IGMP), @@ -85,8 +84,8 @@ struct igmpstat { u_int igps_snd_reports; /* sent membership reports */ }; +#ifdef KERNEL_PRIVATE #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE #define IGMP_RANDOM_DELAY(X) (random() % (X) + 1) /* @@ -109,17 +108,17 @@ struct igmpstat { */ #define IGMP_AGE_THRESHOLD 540 -void igmp_init __P((void)); -void igmp_input __P((struct mbuf *, int)); -int igmp_joingroup __P((struct in_multi *)); -void igmp_leavegroup __P((struct in_multi *)); -void igmp_fasttimo __P((void)); -void igmp_slowtimo __P((void)); +void igmp_init(void); +void igmp_input(struct mbuf *, int); +int igmp_joingroup(struct in_multi *); +void igmp_leavegroup(struct in_multi *); +void igmp_fasttimo(void); +void igmp_slowtimo(void); SYSCTL_DECL(_net_inet_igmp); -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ /* * Names for IGMP sysctl objects @@ -127,11 +126,11 @@ SYSCTL_DECL(_net_inet_igmp); #define IGMPCTL_STATS 1 /* statistics (read-only) */ #define IGMPCTL_MAXID 2 +#ifdef KERNEL_PRIVATE #define IGMPCTL_NAMES { \ { 0, 0 }, \ { "stats", CTLTYPE_STRUCT }, \ } -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ #endif - diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c index cbbf21679..6fa1e7ab8 100644 --- a/bsd/netinet/in.c +++ b/bsd/netinet/in.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -86,14 +87,14 @@ #include -static int in_mask2len __P((struct in_addr *)); -static void in_len2mask __P((struct in_addr *, int)); -static int in_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, - struct ifnet *, struct proc *)); +static int in_mask2len(struct in_addr *); +static void in_len2mask(struct in_addr *, int); +static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t, + struct ifnet *, struct proc *); -static void in_socktrim __P((struct sockaddr_in *)); -static int in_ifinit __P((struct ifnet *, - struct in_ifaddr *, struct sockaddr_in *, int)); +static void in_socktrim(struct sockaddr_in *); +static int in_ifinit(struct ifnet *, + struct in_ifaddr *, struct sockaddr_in *, int); static int subnetsarelocal = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, @@ -101,7 +102,10 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, struct in_multihead in_multihead; /* XXX BSS initialization */ -extern void arp_rtrequest(); +extern lck_mtx_t *rt_mtx; + +/* Track whether or not the SIOCARPIPLL ioctl has been called */ +__private_extern__ u_int32_t ipv4_ll_arp_aware = 0; /* * Return 1 if an internet address is for a ``local'' host @@ -113,19 +117,27 @@ int in_localaddr(in) struct in_addr in; { - register u_long i = ntohl(in.s_addr); - register struct in_ifaddr *ia; + u_long i = ntohl(in.s_addr); + struct in_ifaddr *ia; if (subnetsarelocal) { + lck_mtx_lock(rt_mtx); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) - if ((i & ia->ia_netmask) == ia->ia_net) + if ((i & ia->ia_netmask) == ia->ia_net) { + lck_mtx_unlock(rt_mtx); return (1); + } + lck_mtx_unlock(rt_mtx); } else { + lck_mtx_lock(rt_mtx); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) - if ((i & ia->ia_subnetmask) == ia->ia_subnet) + if ((i & ia->ia_subnetmask) == ia->ia_subnet) { + lck_mtx_unlock(rt_mtx); return (1); + } + lck_mtx_unlock(rt_mtx); } return (0); } @@ -139,8 +151,8 @@ int in_canforward(in) struct in_addr in; { - register u_long i = ntohl(in.s_addr); - register u_long net; + u_long i = ntohl(in.s_addr); + u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i)) return (0); @@ -159,8 +171,8 @@ static void in_socktrim(ap) struct sockaddr_in *ap; { - register char *cplim = (char *) &ap->sin_addr; - register char *cp = (char *) (&ap->sin_addr + 1); + char *cplim = (char *) &ap->sin_addr; + char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) @@ -216,21 +228,21 @@ static int in_interfaces; /* number of external internet interfaces */ */ /* ARGSUSED */ int -in_control(so, cmd, data, ifp, p) - struct socket *so; - u_long cmd; - caddr_t data; - register struct ifnet *ifp; - struct proc *p; +in_control( + struct socket *so, + u_long cmd, + caddr_t data, + struct ifnet *ifp, + struct proc *p) { - register struct ifreq *ifr = (struct ifreq *)data; - register struct in_ifaddr *ia = 0, *iap; - register struct ifaddr *ifa; + struct ifreq *ifr = (struct ifreq *)data; + struct in_ifaddr *ia = 0, *iap; + struct ifaddr *ifa; struct in_ifaddr *oia; struct in_aliasreq *ifra = (struct in_aliasreq *)data; struct sockaddr_in oldaddr; - int error, hostIsNew, maskIsNew, s; - u_long i, dl_tag; + int error, hostIsNew, maskIsNew; + u_long i; struct kev_msg ev_msg; struct kev_in_data in_event_data; @@ -238,7 +250,7 @@ in_control(so, cmd, data, ifp, p) switch (cmd) { case SIOCALIFADDR: case SIOCDLIFADDR: - if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) + if (p && (error = proc_suser(p)) != 0) return error; /*fall through*/ case SIOCGLIFADDR: @@ -253,7 +265,8 @@ in_control(so, cmd, data, ifp, p) * If an alias address was specified, find that one instead of * the first one on the interface. */ - if (ifp) + if (ifp) { + lck_mtx_lock(rt_mtx); for (iap = in_ifaddrhead.tqh_first; iap; iap = iap->ia_link.tqe_next) if (iap->ia_ifp == ifp) { @@ -267,10 +280,12 @@ in_control(so, cmd, data, ifp, p) break; } } - + lck_mtx_unlock(rt_mtx); + } switch (cmd) { case SIOCAUTOADDR: - if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) + case SIOCARPIPLL: + if (p && (error = proc_suser(p)) != 0) return error; break; @@ -279,12 +294,14 @@ in_control(so, cmd, data, ifp, p) if (ifp == 0) return (EADDRNOTAVAIL); if (ifra->ifra_addr.sin_family == AF_INET) { + lck_mtx_lock(rt_mtx); for (oia = ia; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifp == ifp && ia->ia_addr.sin_addr.s_addr == ifra->ifra_addr.sin_addr.s_addr) break; } + lck_mtx_unlock(rt_mtx); if ((ifp->if_flags & IFF_POINTOPOINT) && (cmd == SIOCAIFADDR) && (ifra->ifra_dstaddr.sin_addr.s_addr @@ -300,13 +317,8 @@ in_control(so, cmd, data, ifp, p) case SIOCSIFADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: -#ifdef __APPLE__ if ((so->so_state & SS_PRIV) == 0) return (EPERM); -#else - if (p && (error = suser(p)) != 0) - return error; -#endif if (ifp == 0) return (EADDRNOTAVAIL); @@ -322,29 +334,14 @@ in_control(so, cmd, data, ifp, p) * Protect from ipintr() traversing address list * while we're modifying it. */ - s = splnet(); - TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); ifa = &ia->ia_ifa; - TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); - -/* - * Temorary code for protocol attachment XXX - */ - - /* Generic protocol plumbing */ - - if (error = dlil_plumb_protocol(PF_INET, ifp, &dl_tag)) { - kprintf("in.c: warning can't plumb proto if=%s%n type %d error=%d\n", - ifp->if_name, ifp->if_unit, ifp->if_type, error); - error = 0; /*discard error, can be cold with unsupported interfaces */ - } -/* End of temp code */ ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; ia->ia_sockmask.sin_len = 8; + ifnet_lock_exclusive(ifp); if (ifp->if_flags & IFF_BROADCAST) { ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr); ia->ia_broadaddr.sin_family = AF_INET; @@ -352,13 +349,27 @@ in_control(so, cmd, data, ifp, p) ia->ia_ifp = ifp; if (!(ifp->if_flags & IFF_LOOPBACK)) in_interfaces++; - splx(s); + if_attach_ifa(ifp, ifa); + ifnet_lock_done(ifp); + + lck_mtx_lock(rt_mtx); + TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); + lck_mtx_unlock(rt_mtx); + + /* Generic protocol plumbing */ + + if (error = dlil_plumb_protocol(PF_INET, ifp)) { + kprintf("in.c: warning can't plumb proto if=%s%n type %d error=%d\n", + ifp->if_name, ifp->if_unit, ifp->if_type, error); + error = 0; /*discard error, can be cold with unsupported interfaces */ + } + } break; case SIOCPROTOATTACH: case SIOCPROTODETACH: - if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) + if (p && (error = proc_suser(p)) != 0) return error; if (ifp == 0) return (EADDRNOTAVAIL); @@ -386,10 +397,24 @@ in_control(so, cmd, data, ifp, p) case SIOCAUTOADDR: if (ifp == 0) return (EADDRNOTAVAIL); - if (ifr->ifr_data) + ifnet_lock_exclusive(ifp); + if (ifr->ifr_intval) ifp->if_eflags |= IFEF_AUTOCONFIGURING; else ifp->if_eflags &= ~IFEF_AUTOCONFIGURING; + ifnet_lock_done(ifp); + break; + + case SIOCARPIPLL: + if (ifp == 0) + return (EADDRNOTAVAIL); + ipv4_ll_arp_aware = 1; + ifnet_lock_exclusive(ifp); + if (ifr->ifr_data) + ifp->if_eflags |= IFEF_ARPLL; + else + ifp->if_eflags &= ~IFEF_ARPLL; + ifnet_lock_done(ifp); break; case SIOCGIFADDR: @@ -504,16 +529,20 @@ in_control(so, cmd, data, ifp, p) (struct sockaddr_in *) &ifr->ifr_addr, 1)); case SIOCPROTOATTACH: - error = dlil_plumb_protocol(PF_INET, ifp, &dl_tag); + error = dlil_plumb_protocol(PF_INET, ifp); if (error) return(error); break; case SIOCPROTODETACH: // if an ip address is still present, refuse to detach - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) - if (ifa->ifa_addr->sa_family == AF_INET) - return EBUSY; + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) + if (ifa->ifa_addr->sa_family == AF_INET) + break; + ifnet_lock_done(ifp); + if (ifa != 0) + return EBUSY; error = dlil_unplumb_protocol(PF_INET, ifp); if (error) @@ -567,7 +596,7 @@ in_control(so, cmd, data, ifp, p) hostIsNew = 0; } if (ifra->ifra_mask.sin_len) { - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, 0); ia->ia_sockmask = ifra->ifra_mask; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); @@ -575,7 +604,7 @@ in_control(so, cmd, data, ifp, p) } if ((ifp->if_flags & IFF_POINTOPOINT) && (ifra->ifra_dstaddr.sin_family == AF_INET)) { - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, 0); ia->ia_dstaddr = ifra->ifra_dstaddr; maskIsNew = 1; /* We lie; but the effect's the same */ } @@ -627,12 +656,13 @@ in_control(so, cmd, data, ifp, p) return (error); case SIOCDIFADDR: - error = dlil_ioctl(PF_INET, ifp, SIOCDIFADDR, (caddr_t)ia); - if (error == EOPNOTSUPP) - error = 0; + error = dlil_ioctl(PF_INET, ifp, SIOCDIFADDR, (caddr_t)ia); + if (error == EOPNOTSUPP) + error = 0; if (error) return error; + /* Fill out the kernel event information */ ev_msg.vendor_code = KEV_VENDOR_APPLE; ev_msg.kev_class = KEV_NETWORK_CLASS; ev_msg.kev_subclass = KEV_INET_SUBCLASS; @@ -656,47 +686,23 @@ in_control(so, cmd, data, ifp, p) in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; - ev_msg.dv[0].data_length = sizeof(struct kev_in_data); + ev_msg.dv[0].data_length = sizeof(struct kev_in_data); ev_msg.dv[1].data_length = 0; - kev_post_msg(&ev_msg); - + lck_mtx_lock(rt_mtx); + TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); /* * in_ifscrub kills the interface route. */ - in_ifscrub(ifp, ia); -#ifndef __APPLE__ - /* - * in_ifadown gets rid of all the rest of - * the routes. This is not quite the right - * thing to do, but at least if we are running - * a routing process they will come back. - */ - in_ifadown(&ia->ia_ifa, 1); - /* - * XXX horrible hack to detect that we are being called - * from if_detach() - */ - if (!ifnet_addrs[ifp->if_index - 1]) { - in_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp); - in_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp); - } -#endif - - /* - * Protect from ipintr() traversing address list - * while we're modifying it. - */ - s = splnet(); - + in_ifscrub(ifp, ia, 1); ifa = &ia->ia_ifa; - TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); - oia = ia; - TAILQ_REMOVE(&in_ifaddrhead, oia, ia_link); - ifafree(&oia->ia_ifa); + lck_mtx_unlock(rt_mtx); + ifnet_lock_exclusive(ifp); + if_detach_ifa(ifp, ifa); + ifafree(&ia->ia_ifa); #ifdef __APPLE__ - /* + /* * If the interface supports multicast, and no address is left, * remove the "all hosts" multicast group from that interface. */ @@ -711,12 +717,16 @@ in_control(so, cmd, data, ifp, p) if (ifa == 0) { addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); IN_LOOKUP_MULTI(addr, ifp, inm); - if (inm) - in_delmulti(inm); } - } + ifnet_lock_done(ifp); + if (inm) + in_delmulti(&inm); + } else + ifnet_lock_done(ifp); #endif - splx(s); + + /* Post the kernel event */ + kev_post_msg(&ev_msg); break; #ifdef __APPLE__ @@ -726,27 +736,23 @@ in_control(so, cmd, data, ifp, p) * Special ioctl for OpenTransport sockets */ struct inpcb *inp, *cloned_inp; - int error = 0; + int error2 = 0; int cloned_fd = *(int *)data; - s = splnet(); /* XXX */ inp = sotoinpcb(so); if (inp == NULL) { - splx(s); break; } /* let's make sure it's either -1 or a valid file descriptor */ if (cloned_fd != -1) { struct socket *cloned_so; - struct file *cloned_fp; - error = getsock(p->p_fd, cloned_fd, &cloned_fp); - if (error){ - splx(s); + error2 = file_socket(cloned_fd, &cloned_so); + if (error2){ break; } - cloned_so = (struct socket *)cloned_fp->f_data; cloned_inp = sotoinpcb(cloned_so); + file_drop(cloned_fd); } else { cloned_inp = NULL; } @@ -768,7 +774,7 @@ in_control(so, cmd, data, ifp, p) /* Multicast options */ if (cloned_inp->inp_moptions != NULL) { - int i; + int i; struct ip_moptions *cloned_imo = cloned_inp->inp_moptions; struct ip_moptions *imo = inp->inp_moptions; @@ -777,14 +783,12 @@ in_control(so, cmd, data, ifp, p) * No multicast option buffer attached to the pcb; * allocate one. */ - splx(); imo = (struct ip_moptions*) _MALLOC(sizeof(*imo), M_IPMOPTS, M_WAITOK); if (imo == NULL) { - error = ENOBUFS; + error2 = ENOBUFS; break; } - s = splnet(); /* XXX */ inp->inp_moptions = imo; } imo->imo_multicast_ifp = cloned_imo->imo_multicast_ifp; @@ -797,29 +801,25 @@ in_control(so, cmd, data, ifp, p) in_addmulti(&cloned_imo->imo_membership[i]->inm_addr, cloned_imo->imo_membership[i]->inm_ifp); if (imo->imo_membership[i] == NULL) { - error = ENOBUFS; + error2 = ENOBUFS; break; } } if (i < cloned_imo->imo_num_memberships) { /* Failed, perform cleanup */ for (i--; i >= 0; i--) - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); + imo->imo_num_memberships = 0; break; } } } - splx(s); break; } #endif /* __APPLE__ */ default: return EOPNOTSUPP; - /* Darwin: dlil_ioctl called from ifioctl */ -#ifndef __APPLE__ - return ((*ifp->if_ioctl)(ifp, cmd, data)); -#endif } return (0); } @@ -841,12 +841,12 @@ in_control(so, cmd, data, ifp, p) * other values may be returned from in_ioctl() */ static int -in_lifaddr_ioctl(so, cmd, data, ifp, p) - struct socket *so; - u_long cmd; - caddr_t data; - struct ifnet *ifp; - struct proc *p; +in_lifaddr_ioctl( + struct socket *so, + u_long cmd, + caddr_t data, + struct ifnet *ifp, + struct proc *p) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; @@ -946,6 +946,7 @@ in_lifaddr_ioctl(so, cmd, data, ifp, p) } } + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; @@ -956,6 +957,7 @@ in_lifaddr_ioctl(so, cmd, data, ifp, p) if (candidate.s_addr == match.s_addr) break; } + ifnet_lock_done(ifp); if (!ifa) return EADDRNOTAVAIL; ia = (struct in_ifaddr *)ifa; @@ -1006,18 +1008,23 @@ in_lifaddr_ioctl(so, cmd, data, ifp, p) * Delete any existing route for an interface. */ void -in_ifscrub(ifp, ia) - register struct ifnet *ifp; - register struct in_ifaddr *ia; +in_ifscrub( + struct ifnet *ifp, + struct in_ifaddr *ia, + int locked) { if ((ia->ia_flags & IFA_ROUTE) == 0) return; + if (!locked) + lck_mtx_lock(rt_mtx); if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) - rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); + rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); else - rtinit(&(ia->ia_ifa), (int)RTM_DELETE, 0); + rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, 0); ia->ia_flags &= ~IFA_ROUTE; + if (!locked) + lck_mtx_unlock(rt_mtx); } /* @@ -1025,15 +1032,15 @@ in_ifscrub(ifp, ia) * and routing table entry. */ static int -in_ifinit(ifp, ia, sin, scrub) - register struct ifnet *ifp; - register struct in_ifaddr *ia; - struct sockaddr_in *sin; - int scrub; +in_ifinit( + struct ifnet *ifp, + struct in_ifaddr *ia, + struct sockaddr_in *sin, + int scrub) { - register u_long i = ntohl(sin->sin_addr.s_addr); + u_long i = ntohl(sin->sin_addr.s_addr); struct sockaddr_in oldaddr; - int s = splimp(), flags = RTF_UP, error; + int flags = RTF_UP, error; oldaddr = ia->ia_addr; ia->ia_addr = *sin; @@ -1047,14 +1054,12 @@ in_ifinit(ifp, ia, sin, scrub) if (error == EOPNOTSUPP) error = 0; if (error) { - splx(s); ia->ia_addr = oldaddr; return (error); } - splx(s); if (scrub) { ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr; - in_ifscrub(ifp, ia); + in_ifscrub(ifp, ia, 0); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; } if (IN_CLASSA(i)) @@ -1108,7 +1113,9 @@ in_ifinit(ifp, ia, sin, scrub) struct in_addr addr; addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); + ifnet_lock_shared(ifp); IN_LOOKUP_MULTI(addr, ifp, inm); + ifnet_lock_done(ifp); if (inm == 0) in_addmulti(&addr, ifp); } @@ -1120,11 +1127,11 @@ in_ifinit(ifp, ia, sin, scrub) * Return 1 if the address might be a local broadcast address. */ int -in_broadcast(in, ifp) - struct in_addr in; - struct ifnet *ifp; +in_broadcast( + struct in_addr in, + struct ifnet *ifp) { - register struct ifaddr *ifa; + struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || @@ -1138,10 +1145,12 @@ in_broadcast(in, ifp) * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { - if (ifa->ifa_addr == NULL) + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr == NULL) { + ifnet_lock_done(ifp); return (0); + } if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || in.s_addr == ia->ia_netbroadcast.s_addr || @@ -1154,25 +1163,45 @@ in_broadcast(in, ifp) * only exist when an interface gets a secondary * address. */ - ia->ia_subnetmask != (u_long)0xffffffff) - return 1; + ia->ia_subnetmask != (u_long)0xffffffff) { + ifnet_lock_done(ifp); + return 1; + } } + ifnet_lock_done(ifp); return (0); #undef ia } + +static void +in_free_inm( + void* ifma_protospec) +{ + struct in_multi *inm = ifma_protospec; + + /* + * No remaining claims to this record; let IGMP know that + * we are leaving the multicast group. + */ + igmp_leavegroup(inm); + lck_mtx_lock(rt_mtx); + LIST_REMOVE(inm, inm_link); + lck_mtx_unlock(rt_mtx); + FREE(inm, M_IPMADDR); +} + /* * Add an address to the list of IP multicast addresses for a given interface. */ struct in_multi * -in_addmulti(ap, ifp) - register struct in_addr *ap; - register struct ifnet *ifp; +in_addmulti( + struct in_addr *ap, + struct ifnet *ifp) { - register struct in_multi *inm; + struct in_multi *inm; int error; struct sockaddr_in sin; struct ifmultiaddr *ifma; - int s = splnet(); /* * Call generic routine to add membership or increment @@ -1185,7 +1214,6 @@ in_addmulti(ap, ifp) sin.sin_addr = *ap; error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma); if (error) { - splx(s); return 0; } @@ -1194,13 +1222,11 @@ in_addmulti(ap, ifp) * a new record. Otherwise, we are done. */ if (ifma->ifma_protospec != 0) { - splx(s); return ifma->ifma_protospec; } inm = (struct in_multi *) _MALLOC(sizeof(*inm), M_IPMADDR, M_WAITOK); if (inm == NULL) { - splx(s); return (NULL); } @@ -1208,20 +1234,37 @@ in_addmulti(ap, ifp) inm->inm_addr = *ap; inm->inm_ifp = ifp; inm->inm_ifma = ifma; - ifma->ifma_protospec = inm; - LIST_INSERT_HEAD(&in_multihead, inm, inm_link); + lck_mtx_lock(rt_mtx); + if (ifma->ifma_protospec == NULL) { + ifma->ifma_protospec = inm; + ifma->ifma_free = in_free_inm; + LIST_INSERT_HEAD(&in_multihead, inm, inm_link); + } + lck_mtx_unlock(rt_mtx); + + if (ifma->ifma_protospec != inm) { + _FREE(inm, M_IPMADDR); + return ifma->ifma_protospec; + } /* * Let IGMP know that we have joined a new IP multicast group. */ error = igmp_joingroup(inm); if (error) { - if_delmultiaddr(ifma); - LIST_REMOVE(inm, inm_link); - _FREE(inm, M_IPMADDR); - inm = NULL; + char addrbuf[16]; + + /* + * We can't free the inm because someone else may already be + * using it. Once we put it in to ifma->ifma_protospec, it + * must exist as long as the ifma does. Might be nice to flag + * the error so we can try igmp_joingroup the next time through. + */ + log(LOG_ERR, "igmp_joingroup error %d joining multicast %s on %s%d\n", + error, inet_ntop(AF_INET, &sin.sin_addr, addrbuf, sizeof(addrbuf)), + ifp->if_name, ifp->if_unit); } - splx(s); + return (inm); } @@ -1229,26 +1272,49 @@ in_addmulti(ap, ifp) * Delete a multicast address record. */ void -in_delmulti(inm) - register struct in_multi *inm; +in_delmulti( + struct in_multi **inm) { - struct ifmultiaddr *ifma = inm->inm_ifma; - int s = splnet(); + struct in_multi *inm2; + + lck_mtx_lock(rt_mtx); + LIST_FOREACH(inm2, &in_multihead, inm_link) { + if (inm2 == *inm) + break; + } + if (inm2 != *inm) { + lck_mtx_unlock(rt_mtx); + printf("in_delmulti - ignorning invalid inm (0x%x)\n", *inm); + return; + } + lck_mtx_unlock(rt_mtx); /* We intentionally do this a bit differently than BSD */ - - if (ifma && ifma->ifma_refcount == 1) { - /* - * No remaining claims to this record; let IGMP know that - * we are leaving the multicast group. - */ - igmp_leavegroup(inm); - ifma->ifma_protospec = 0; - LIST_REMOVE(inm, inm_link); - FREE(inm, M_IPMADDR); + if ((*inm)->inm_ifma) { + if_delmultiaddr((*inm)->inm_ifma, 0); + ifma_release((*inm)->inm_ifma); } - /* XXX - should be separate API for when we have an ifma? */ - if (ifma) - if_delmultiaddr(ifma); - splx(s); + *inm = NULL; } + +#if !NFSCLIENT +int +inet_aton(char * cp, struct in_addr * pin) +{ + u_char * b = (char *)pin; + int i; + char * p; + + for (p = cp, i = 0; i < 4; i++) { + u_long l = strtoul(p, 0, 0); + if (l > 255) + return (FALSE); + b[i] = l; + p = strchr(p, '.'); + if (i < 3 && p == NULL) + return (FALSE); + p++; + } + return (TRUE); +} +#endif diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index c8e73f38b..695e07bff 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -58,6 +58,30 @@ #ifndef _NETINET_IN_H_ #define _NETINET_IN_H_ #include +#include + +#ifndef _IN_ADDR_T +#define _IN_ADDR_T +typedef __uint32_t in_addr_t; /* base type for internet address */ +#endif + +#ifndef _IN_PORT_T +#define _IN_PORT_T +typedef __uint16_t in_port_t; +#endif + +/* + * POSIX 1003.1-2003 + * "Inclusion of the header may also make visible all + * symbols from and ". + */ +#include + +/* + * The following two #includes insure htonl and family are defined + */ +#include +#include /* * Constants and structures defined by the internet system, @@ -68,13 +92,18 @@ * Protocols (RFC 1700) */ #define IPPROTO_IP 0 /* dummy for IP */ +#ifndef _POSIX_C_SOURCE #define IPPROTO_HOPOPTS 0 /* IP6 hop-by-hop options */ +#endif /* !_POSIX_C_SOURCE */ #define IPPROTO_ICMP 1 /* control message protocol */ +#ifndef _POSIX_C_SOURCE #define IPPROTO_IGMP 2 /* group mgmt protocol */ #define IPPROTO_GGP 3 /* gateway^2 (deprecated) */ #define IPPROTO_IPV4 4 /* IPv4 encapsulation */ #define IPPROTO_IPIP IPPROTO_IPV4 /* for compatibility */ +#endif /* !_POSIX_C_SOURCE */ #define IPPROTO_TCP 6 /* tcp */ +#ifndef _POSIX_C_SOURCE #define IPPROTO_ST 7 /* Stream protocol II */ #define IPPROTO_EGP 8 /* exterior gateway protocol */ #define IPPROTO_PIGP 9 /* private interior gateway */ @@ -85,7 +114,9 @@ #define IPPROTO_EMCON 14 /* EMCON */ #define IPPROTO_XNET 15 /* Cross Net Debugger */ #define IPPROTO_CHAOS 16 /* Chaos*/ +#endif /* !_POSIX_C_SOURCE */ #define IPPROTO_UDP 17 /* user datagram protocol */ +#ifndef _POSIX_C_SOURCE #define IPPROTO_MUX 18 /* Multiplexing */ #define IPPROTO_MEAS 19 /* DCN Measurement Subsystems */ #define IPPROTO_HMP 20 /* Host Monitoring */ @@ -109,7 +140,9 @@ #define IPPROTO_CMTP 38 /* Control Message Transport */ #define IPPROTO_TPXX 39 /* TP++ Transport */ #define IPPROTO_IL 40 /* IL transport protocol */ +#endif /* !_POSIX_C_SOURCE */ #define IPPROTO_IPV6 41 /* IP6 header */ +#ifndef _POSIX_C_SOURCE #define IPPROTO_SDRP 42 /* Source Demand Routing */ #define IPPROTO_ROUTING 43 /* IP6 routing header */ #define IPPROTO_FRAGMENT 44 /* IP6 fragmentation header */ @@ -174,11 +207,15 @@ /* 255: Reserved */ /* BSD Private, local use, namespace incursion */ #define IPPROTO_DIVERT 254 /* divert pseudo-protocol */ +#endif /* !_POSIX_C_SOURCE */ #define IPPROTO_RAW 255 /* raw IP packet */ + +#ifndef _POSIX_C_SOURCE #define IPPROTO_MAX 256 /* last return value of *_input(), meaning "all job for this pkt is done". */ #define IPPROTO_DONE 257 +#endif /* _POSIX_C_SOURCE */ /* * Local port number conventions: @@ -225,13 +262,18 @@ * */ +#define __DARWIN_IPPORT_RESERVED 1024 + +#ifndef _POSIX_C_SOURCE /* * Ports < IPPORT_RESERVED are reserved for * privileged processes (e.g. root). (IP_PORTRANGE_LOW) * Ports > IPPORT_USERRESERVED are reserved * for servers, not necessarily privileged. (IP_PORTRANGE_DEFAULT) */ -#define IPPORT_RESERVED 1024 +#ifndef IPPORT_RESERVED +#define IPPORT_RESERVED __DARWIN_IPPORT_RESERVED +#endif #define IPPORT_USERRESERVED 5000 /* @@ -247,6 +289,7 @@ * have a fit if we use. */ #define IPPORT_RESERVEDSTART 600 +#endif /* !_POSIX_C_SOURCE */ /* * Internet address (a structure for historical reasons) @@ -260,6 +303,10 @@ struct in_addr { * On subnets, the decomposition of addresses to host and net parts * is done according to subnet mask, not the masks here. */ +#define INADDR_ANY (u_int32_t)0x00000000 +#define INADDR_BROADCAST (u_int32_t)0xffffffff /* must be masked */ + +#ifndef _POSIX_C_SOURCE #define IN_CLASSA(i) (((u_int32_t)(i) & 0x80000000) == 0) #define IN_CLASSA_NET 0xff000000 #define IN_CLASSA_NSHIFT 24 @@ -286,9 +333,7 @@ struct in_addr { #define IN_EXPERIMENTAL(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000) #define IN_BADCLASS(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000) -#define INADDR_ANY (u_int32_t)0x00000000 #define INADDR_LOOPBACK (u_int32_t)0x7f000001 -#define INADDR_BROADCAST (u_int32_t)0xffffffff /* must be masked */ #ifndef KERNEL #define INADDR_NONE 0xffffffff /* -1 return */ #endif @@ -304,20 +349,22 @@ struct in_addr { #endif #define IN_LOOPBACKNET 127 /* official! */ +#endif /* !_POSIX_C_SOURCE */ /* * Socket address, internet style. */ struct sockaddr_in { - u_char sin_len; - u_char sin_family; - u_short sin_port; + __uint8_t sin_len; + sa_family_t sin_family; + in_port_t sin_port; struct in_addr sin_addr; - char sin_zero[8]; + char sin_zero[8]; /* XXX bwg2001-004 */ }; #define INET_ADDRSTRLEN 16 +#ifndef _POSIX_C_SOURCE /* * Structure used to describe IP options. * Used to store options internally, to pass them to a process, @@ -407,6 +454,7 @@ struct ip_mreq { #define IP_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IP_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ + /* * Definitions for inet sysctl operations. * @@ -415,6 +463,8 @@ struct ip_mreq { */ #define IPPROTO_MAXID (IPPROTO_AH + 1) /* don't list to IPPROTO_MAX */ +#ifdef KERNEL_PRIVATE + #define CTL_IPPROTO_NAMES { \ { "ip", CTLTYPE_NODE }, \ { "icmp", CTLTYPE_NODE }, \ @@ -470,6 +520,8 @@ struct ip_mreq { { "ipsec", CTLTYPE_NODE }, \ } +#endif /* KERNEL_PRIVATE */ + /* * Names for IP sysctl objects */ @@ -493,6 +545,8 @@ struct ip_mreq { #define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */ #define IPCTL_MAXID 17 +#ifdef KERNEL_PRIVATE + #define IPCTL_NAMES { \ { 0, 0 }, \ { "forwarding", CTLTYPE_INT }, \ @@ -512,6 +566,10 @@ struct ip_mreq { { "keepfaith", CTLTYPE_INT }, \ { "gifttl", CTLTYPE_INT }, \ } +#endif /* KERNEL_PRIVATE */ + +#endif /* !_POSIX_C_SOURCE */ + /* INET6 stuff */ #define __KAME_NETINET_IN_H_INCLUDED_ @@ -519,19 +577,22 @@ struct ip_mreq { #undef __KAME_NETINET_IN_H_INCLUDED_ #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct ifnet; struct mbuf; /* forward declarations for Standard C */ -int in_broadcast __P((struct in_addr, struct ifnet *)); -int in_canforward __P((struct in_addr)); -int in_cksum __P((struct mbuf *, int)); -int in_cksum_skip __P((struct mbuf *, u_short, u_short)); -u_short in_addword __P((u_short, u_short)); -u_short in_pseudo __P((u_int, u_int, u_int)); -int in_localaddr __P((struct in_addr)); -char *inet_ntoa __P((struct in_addr)); /* in libkern */ -u_long in_netof __P((struct in_addr)); -#endif /* __APPLE_API_PRIVATE */ +int in_broadcast(struct in_addr, struct ifnet *); +int in_canforward(struct in_addr); +int in_cksum(struct mbuf *, int); +int in_cksum_skip(struct mbuf *, u_short, u_short); +u_short in_addword(u_short, u_short); +u_short in_pseudo(u_int, u_int, u_int); +int in_localaddr(struct in_addr); +u_long in_netof(struct in_addr); +#endif /* KERNEL_PRIVATE */ +#define MAX_IPv4_STR_LEN 16 +#define MAX_IPv6_STR_LEN 64 + +const char *inet_ntop(int, const void *, char *, size_t); /* in libkern */ #endif /* KERNEL */ -#endif +#endif _NETINET_IN_H_ diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c new file mode 100644 index 000000000..44c526395 --- /dev/null +++ b/bsd/netinet/in_arp.c @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SIN(s) ((struct sockaddr_in *)s) +#define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen)) +#define rt_expire rt_rmx.rmx_expire + +static const size_t MAX_HW_LEN = 10; + +SYSCTL_DECL(_net_link_ether); +SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); + +/* timer values */ +static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ +static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ +static int arpt_down = 20; /* once declared down, don't send for 20 sec */ + +/* Apple Hardware SUM16 checksuming */ +int apple_hwcksum_tx = 1; +int apple_hwcksum_rx = 1; + +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW, + &arpt_prune, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, + &arpt_keep, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, CTLFLAG_RW, + &arpt_down, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx, CTLFLAG_RW, + &apple_hwcksum_tx, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, CTLFLAG_RW, + &apple_hwcksum_rx, 0, ""); + +struct llinfo_arp { + LIST_ENTRY(llinfo_arp) la_le; + struct rtentry *la_rt; + struct mbuf *la_hold; /* last packet until resolved/timeout */ + long la_asked; /* last time we QUERIED for this addr */ +}; + +static LIST_HEAD(, llinfo_arp) llinfo_arp; + +static int arp_inuse, arp_allocated; + +static int arp_maxtries = 5; +static int useloopback = 1; /* use loopback interface for local traffic */ +static int arp_proxyall = 0; + +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, + &arp_maxtries, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, + &useloopback, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, + &arp_proxyall, 0, ""); + +static int log_arp_warnings = 0; + +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings, CTLFLAG_RW, + &log_arp_warnings, 0, + "log arp warning messages"); + +extern u_int32_t ipv4_ll_arp_aware; + +/* + * Free an arp entry. + */ +static void +arptfree( + struct llinfo_arp *la) +{ + struct rtentry *rt = la->la_rt; + struct sockaddr_dl *sdl; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (rt == 0) + panic("arptfree"); + if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && + sdl->sdl_family == AF_LINK) { + sdl->sdl_alen = 0; + la->la_asked = 0; + rt->rt_flags &= ~RTF_REJECT; + return; + } + rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), + 0, (struct rtentry **)0); +} + +/* + * Timeout routine. Age arp_tab entries periodically. + */ +/* ARGSUSED */ +static void +arptimer( + __unused void *ignored_arg) +{ + struct llinfo_arp *la = llinfo_arp.lh_first; + struct llinfo_arp *ola; + struct timeval timenow; + + lck_mtx_lock(rt_mtx); + getmicrotime(&timenow); + while ((ola = la) != 0) { + struct rtentry *rt = la->la_rt; + la = la->la_le.le_next; + if (rt->rt_expire && rt->rt_expire <= timenow.tv_sec) + arptfree(ola); /* timer has expired, clear */ + } + lck_mtx_unlock(rt_mtx); + timeout(arptimer, (caddr_t)0, arpt_prune * hz); +} + +/* + * Parallel to llc_rtrequest. + */ +static void +arp_rtrequest( + int req, + struct rtentry *rt, + __unused struct sockaddr *sa) +{ + struct sockaddr *gate = rt->rt_gateway; + struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; + static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}}; + static int arpinit_done; + struct timeval timenow; + + if (!arpinit_done) { + arpinit_done = 1; + LIST_INIT(&llinfo_arp); + timeout(arptimer, (caddr_t)0, hz); + } + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + + if (rt->rt_flags & RTF_GATEWAY) + return; + getmicrotime(&timenow); + switch (req) { + + case RTM_ADD: + /* + * XXX: If this is a manually added route to interface + * such as older version of routed or gated might provide, + * restore cloning bit. + */ + if ((rt->rt_flags & RTF_HOST) == 0 && + SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) + rt->rt_flags |= RTF_CLONING; + if (rt->rt_flags & RTF_CLONING) { + /* + * Case 1: This route should come from a route to iface. + */ + rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl); + gate = rt->rt_gateway; + SDL(gate)->sdl_type = rt->rt_ifp->if_type; + SDL(gate)->sdl_index = rt->rt_ifp->if_index; + rt->rt_expire = timenow.tv_sec; + break; + } + /* Announce a new entry if requested. */ + if (rt->rt_flags & RTF_ANNOUNCE) + dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, SDL(gate), rt_key(rt), (struct sockaddr_dl *)rt_key(rt), NULL); + /*FALLTHROUGH*/ + case RTM_RESOLVE: + if (gate->sa_family != AF_LINK || + gate->sa_len < sizeof(null_sdl)) { + if (log_arp_warnings) + log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); + break; + } + SDL(gate)->sdl_type = rt->rt_ifp->if_type; + SDL(gate)->sdl_index = rt->rt_ifp->if_index; + if (la != 0) + break; /* This happens on a route change */ + /* + * Case 2: This route may come from cloning, or a manual route + * add with a LL address. + */ + R_Malloc(la, struct llinfo_arp *, sizeof(*la)); + rt->rt_llinfo = (caddr_t)la; + if (la == 0) { + if ( log_arp_warnings) + log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); + break; + } + arp_inuse++, arp_allocated++; + Bzero(la, sizeof(*la)); + la->la_rt = rt; + rt->rt_flags |= RTF_LLINFO; + LIST_INSERT_HEAD(&llinfo_arp, la, la_le); + +#if INET + /* + * This keeps the multicast addresses from showing up + * in `arp -a' listings as unresolved. It's not actually + * functional. Then the same for broadcast. + */ + if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { + dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, sizeof(struct sockaddr_dl)); + rt->rt_expire = 0; + } + else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { + struct sockaddr_dl *gate_ll = SDL(gate); + size_t broadcast_len; + ifnet_llbroadcast_copy_bytes(rt->rt_ifp, LLADDR(gate_ll), + sizeof(gate_ll->sdl_data), + &broadcast_len); + gate_ll->sdl_alen = broadcast_len; + gate_ll->sdl_family = AF_LINK; + gate_ll->sdl_len = sizeof(struct sockaddr_dl); + rt->rt_expire = timenow.tv_sec; + } +#endif + + if (SIN(rt_key(rt))->sin_addr.s_addr == + (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { + /* + * This test used to be + * if (loif.if_flags & IFF_UP) + * It allowed local traffic to be forced + * through the hardware by configuring the loopback down. + * However, it causes problems during network configuration + * for boards that can't receive packets they send. + * It is now necessary to clear "useloopback" and remove + * the route to force traffic out to the hardware. + */ + rt->rt_expire = 0; + ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6); + if (useloopback) + rt->rt_ifp = loif; + + } + break; + + case RTM_DELETE: + if (la == 0) + break; + arp_inuse--; + LIST_REMOVE(la, la_le); + rt->rt_llinfo = 0; + rt->rt_flags &= ~RTF_LLINFO; + if (la->la_hold) { + m_freem(la->la_hold); + } + la->la_hold = NULL; + R_Free((caddr_t)la); + } +} + +/* + * convert hardware address to hex string for logging errors. + */ +static const char * +sdl_addr_to_hex(const struct sockaddr_dl *sdl, char * orig_buf, int buflen) +{ + char * buf = orig_buf; + int i; + const u_char * lladdr = sdl->sdl_data; + int maxbytes = buflen / 3; + + if (maxbytes > sdl->sdl_alen) { + maxbytes = sdl->sdl_alen; + } + *buf = '\0'; + for (i = 0; i < maxbytes; i++) { + snprintf(buf, 3, "%02x", lladdr[i]); + buf += 2; + *buf = (i == maxbytes - 1) ? '\0' : ':'; + buf++; + } + return (orig_buf); +} + +/* + * arp_lookup_route will lookup the route for a given address. + * + * The routing lock must be held. The address must be for a + * host on a local network on this interface. + */ +static errno_t +arp_lookup_route( + const struct in_addr *addr, + int create, + int proxy, + route_t *route) +{ + struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0}; + const char *why = 0; + errno_t error = 0; + + // Caller is responsible for taking the routing lock + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + + sin.sin_addr.s_addr = addr->s_addr; + sin.sin_other = proxy ? SIN_PROXY : 0; + + *route = rtalloc1_locked((const struct sockaddr*)&sin, create, 0); + if (*route == NULL) + return ENETUNREACH; + + rtunref(*route); + + if ((*route)->rt_flags & RTF_GATEWAY) { + why = "host is not on local network"; + + /* If there are no references to this route, purge it */ + if ((*route)->rt_refcnt <= 0 && ((*route)->rt_flags & RTF_WASCLONED) != 0) { + rtrequest_locked(RTM_DELETE, + (struct sockaddr *)rt_key(*route), + (*route)->rt_gateway, rt_mask(*route), + (*route)->rt_flags, 0); + } + *route = NULL; + error = ENETUNREACH; + } + else if (((*route)->rt_flags & RTF_LLINFO) == 0) { + why = "could not allocate llinfo"; + *route = NULL; + error = ENOMEM; + } + else if ((*route)->rt_gateway->sa_family != AF_LINK) { + why = "gateway route is not ours"; + *route = NULL; + error = EPROTONOSUPPORT; + } + + if (why && create && log_arp_warnings) { + char tmp[MAX_IPv4_STR_LEN]; + log(LOG_DEBUG, "arplookup %s failed: %s\n", + inet_ntop(AF_INET, addr, tmp, sizeof(tmp)), why); + } + + return error; +} + + +__private_extern__ errno_t +arp_route_to_gateway_route( + const struct sockaddr *net_dest, + route_t hint, + route_t *out_route); +/* + * arp_route_to_gateway_route will find the gateway route for a given route. + * + * If the route is down, look the route up again. + * If the route goes through a gateway, get the route to the gateway. + * If the gateway route is down, look it up again. + * If the route is set to reject, verify it hasn't expired. + */ +__private_extern__ errno_t +arp_route_to_gateway_route( + const struct sockaddr *net_dest, + route_t hint, + route_t *out_route) +{ + route_t route = hint; + *out_route = NULL; + struct timeval timenow; + + /* If we got a hint from the higher layers, check it out */ + if (route) { + lck_mtx_lock(rt_mtx); + + if ((route->rt_flags & RTF_UP) == 0) { + /* route is down, find a new one */ + hint = route = rtalloc1_locked(net_dest, 1, 0); + if (hint) { + rtunref(hint); + } + else { + /* No route to host */ + lck_mtx_unlock(rt_mtx); + return EHOSTUNREACH; + } + } + + if (route->rt_flags & RTF_GATEWAY) { + /* + * We need the gateway route. If it is NULL or down, + * look it up. + */ + if (route->rt_gwroute == 0 || + (route->rt_gwroute->rt_flags & RTF_UP) == 0) { + if (route->rt_gwroute != 0) + rtfree_locked(route->rt_gwroute); + + route->rt_gwroute = rtalloc1_locked(route->rt_gateway, 1, 0); + if (route->rt_gwroute == 0) { + lck_mtx_unlock(rt_mtx); + return EHOSTUNREACH; + } + } + + route = route->rt_gwroute; + } + + if (route->rt_flags & RTF_REJECT) { + getmicrotime(&timenow); + if (route->rt_rmx.rmx_expire == 0 || + timenow.tv_sec < route->rt_rmx.rmx_expire) { + lck_mtx_unlock(rt_mtx); + return route == hint ? EHOSTDOWN : EHOSTUNREACH; + } + } + + lck_mtx_unlock(rt_mtx); + } + + *out_route = route; + return 0; +} + +errno_t +arp_lookup_ip( + ifnet_t ifp, + const struct sockaddr_in *net_dest, + struct sockaddr_dl *ll_dest, + size_t ll_dest_len, + route_t hint, + mbuf_t packet) +{ + route_t route = NULL; + errno_t result = 0; + struct sockaddr_dl *gateway; + struct llinfo_arp *llinfo; + struct timeval timenow; + + if (net_dest->sin_family != AF_INET) + return EAFNOSUPPORT; + + if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) + return ENETDOWN; + + /* + * If we were given a route, verify the route and grab the gateway + */ + if (hint) { + result = arp_route_to_gateway_route((const struct sockaddr*)net_dest, + hint, &route); + if (result != 0) + return result; + } + + if (packet->m_flags & M_BCAST) { + u_long broadcast_len; + bzero(ll_dest, ll_dest_len); + result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest), ll_dest_len + - offsetof(struct sockaddr_dl, + sdl_data), &broadcast_len); + if (result != 0) { + return result; + } + + ll_dest->sdl_alen = broadcast_len; + ll_dest->sdl_family = AF_LINK; + ll_dest->sdl_len = sizeof(struct sockaddr_dl); + + return 0; + } + if (packet->m_flags & M_MCAST) { + return dlil_resolve_multi(ifp, (const struct sockaddr*)net_dest, + (struct sockaddr*)ll_dest, ll_dest_len); + } + + lck_mtx_lock(rt_mtx); + + /* + * If we didn't find a route, or the route doesn't have + * link layer information, trigger the creation of the + * route and link layer information. + */ + if (route == NULL || route->rt_llinfo == NULL) + result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route); + + if (result || route == NULL || route->rt_llinfo == NULL) { + char tmp[MAX_IPv4_STR_LEN]; + lck_mtx_unlock(rt_mtx); + if (log_arp_warnings) + log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s\n", + inet_ntop(AF_INET, &net_dest->sin_addr, tmp, sizeof(tmp))); + return result; + } + + /* + * Now that we have the right route, is it filled in? + */ + gateway = SDL(route->rt_gateway); + getmicrotime(&timenow); + if ((route->rt_rmx.rmx_expire == 0 || route->rt_rmx.rmx_expire > timenow.tv_sec) && + gateway != NULL && gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0) { + bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len)); + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* + * Route wasn't complete/valid. We need to arp. + */ + if (ifp->if_flags & IFF_NOARP) { + lck_mtx_unlock(rt_mtx); + return ENOTSUP; + } + + llinfo = (struct llinfo_arp*)route->rt_llinfo; + if (packet) { + if (llinfo->la_hold) { + m_freem(llinfo->la_hold); + } + llinfo->la_hold = packet; + } + + if (route->rt_rmx.rmx_expire) { + route->rt_flags &= ~RTF_REJECT; + if (llinfo->la_asked == 0 || route->rt_rmx.rmx_expire != timenow.tv_sec) { + route->rt_rmx.rmx_expire = timenow.tv_sec; + if (llinfo->la_asked++ < arp_maxtries) { + lck_mtx_unlock(rt_mtx); + dlil_send_arp(ifp, ARPOP_REQUEST, NULL, route->rt_ifa->ifa_addr, + NULL, (const struct sockaddr*)net_dest); + return EJUSTRETURN; + } + else { + route->rt_flags |= RTF_REJECT; + route->rt_rmx.rmx_expire += arpt_down; + llinfo->la_asked = 0; + llinfo->la_hold = 0; + lck_mtx_unlock(rt_mtx); + return EHOSTUNREACH; + } + } + } + lck_mtx_unlock(rt_mtx); + + return EJUSTRETURN; +} + +errno_t +arp_ip_handle_input( + ifnet_t ifp, + u_short arpop, + const struct sockaddr_dl *sender_hw, + const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip) +{ + char ipv4str[MAX_IPv4_STR_LEN]; + struct sockaddr_dl *gateway; + struct in_ifaddr *ia; + struct in_ifaddr *best_ia = NULL; + route_t route = NULL; + char buf[3 * MAX_HW_LEN]; // enough for MAX_HW_LEN byte hw address + struct llinfo_arp *llinfo; + struct timeval timenow; + errno_t error; + + /* Do not respond to requests for 0.0.0.0 */ + if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST) { + return 0; + } + + /* + * Determine if this ARP is for us + */ + lck_mtx_lock(rt_mtx); + for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { + /* do_bridge should be tested here for bridging */ + if (ia->ia_ifp == ifp) { + best_ia = ia; + if (target_ip->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr || + sender_ip->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) { + break; + } + } + } + + /* If we don't have an IP address on this interface, ignore the packet */ + if (best_ia == 0) { + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* If the packet is from this interface, ignore the packet */ + if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_len)) { + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* Check for a conflict */ + if (sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) { + struct kev_msg ev_msg; + struct kev_in_collision *in_collision; + u_char storage[sizeof(struct kev_in_collision) + MAX_HW_LEN]; + in_collision = (struct kev_in_collision*)storage; + log(LOG_ERR, "%s%d duplicate IP address %s sent from address %s\n", + ifp->if_name, ifp->if_unit, + inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, sizeof(ipv4str)), + sdl_addr_to_hex(sender_hw, buf, sizeof(buf))); + + /* Send a kernel event so anyone can learn of the conflict */ + in_collision->link_data.if_family = ifp->if_family; + in_collision->link_data.if_unit = ifp->if_unit; + strncpy(&in_collision->link_data.if_name[0], ifp->if_name, IFNAMSIZ); + in_collision->ia_ipaddr = sender_ip->sin_addr; + in_collision->hw_len = sender_hw->sdl_alen < MAX_HW_LEN ? sender_hw->sdl_alen : MAX_HW_LEN; + bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr, in_collision->hw_len); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + ev_msg.event_code = KEV_INET_ARPCOLLISION; + ev_msg.dv[0].data_ptr = in_collision; + ev_msg.dv[0].data_length = sizeof(struct kev_in_collision) + in_collision->hw_len; + ev_msg.dv[1].data_length = 0; + kev_post_msg(&ev_msg); + + goto respond; + } + + /* + * Look up the routing entry. If it doesn't exist and we are the + * target, go ahead and create one. + */ + error = arp_lookup_route(&sender_ip->sin_addr, (target_ip->sin_addr.s_addr == + best_ia->ia_addr.sin_addr.s_addr), 0, &route); + + if (error || route == 0 || route->rt_gateway == 0) { + if (ipv4_ll_arp_aware != 0 && IN_LINKLOCAL(target_ip->sin_addr.s_addr) + && arpop == ARPOP_REQUEST && sender_ip->sin_addr.s_addr == 0) { + /* + * Verify this ARP probe doesn't conflict with an IPv4LL we know of + * on another interface. + */ + error = arp_lookup_route(&target_ip->sin_addr, 0, 0, &route); + if (error == 0 && route && route->rt_gateway) { + gateway = SDL(route->rt_gateway); + if (route->rt_ifp != ifp && + (gateway->sdl_alen != sender_hw->sdl_alen || + bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw), + gateway->sdl_alen) != 0)) { + /* + * A node is probing for an IPv4LL we know exists on a + * different interface. We respond with a conflicting probe + * to force the new device to pick a different IPv4LL + * address. + */ + log(LOG_INFO, + "arp: %s on %s%d sent probe for %s, already on %s%d\n", + sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), + ifp->if_name, ifp->if_unit, + inet_ntop(AF_INET, &target_ip->sin_addr, ipv4str, + sizeof(ipv4str)), + route->rt_ifp->if_name, route->rt_ifp->if_unit); + log(LOG_INFO, + "arp: sending conflicting probe to %s on %s%d\n", + sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), + ifp->if_name, ifp->if_unit); + + /* + * Send a conservative unicast "ARP probe". + * This should force the other device to pick a new number. + * This will not force the device to pick a new number if the device + * has already assigned that number. + * This will not imply to the device that we own that address. + */ + dlil_send_arp_internal(ifp, ARPOP_REQUEST, + (struct sockaddr_dl*)TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr, + (const struct sockaddr*)sender_ip, sender_hw, + (const struct sockaddr*)target_ip); + } + } + } + + goto respond; + } + + gateway = SDL(route->rt_gateway); + if (route->rt_ifp != ifp) { + if (!IN_LINKLOCAL(sender_ip->sin_addr.s_addr) || (ifp->if_eflags & IFEF_ARPLL) == 0) { + if (log_arp_warnings) + log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n", + inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, + sizeof(ipv4str)), + route->rt_ifp->if_name, + route->rt_ifp->if_unit, + sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), + ifp->if_name, ifp->if_unit); + goto respond; + } + else { + /* Don't change a permanent address */ + if (route->rt_rmx.rmx_expire == 0) { + goto respond; + } + + /* + * Don't change the cloned route away from the parent's interface + * if the address did resolve. + */ + if (gateway->sdl_alen != 0 && route->rt_parent && + route->rt_parent->rt_ifp == route->rt_ifp) { + goto respond; + } + + /* Change the interface when the existing route is on */ + route->rt_ifp = ifp; + rtsetifa(route, &best_ia->ia_ifa); + gateway->sdl_index = ifp->if_index; + } + } + + if (gateway->sdl_alen && bcmp(LLADDR(gateway), CONST_LLADDR(sender_hw), gateway->sdl_alen)) { + if (route->rt_rmx.rmx_expire) { + char buf2[3 * MAX_HW_LEN]; + log(LOG_INFO, "arp: %s moved from %s to %s on %s%d\n", + inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, + sizeof(ipv4str)), + sdl_addr_to_hex(gateway, buf, sizeof(buf)), + sdl_addr_to_hex(sender_hw, buf2, sizeof(buf2)), ifp->if_name, + ifp->if_unit); + } + else { + log(LOG_ERR, + "arp: %s attempts to modify permanent entry for %s on %s%d\n", + sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), + inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, + sizeof(ipv4str)), + ifp->if_name, ifp->if_unit); + goto respond; + } + } + + /* Copy the sender hardware address in to the route's gateway address */ + gateway->sdl_alen = sender_hw->sdl_alen; + bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen); + + /* Update the expire time for the route and clear the reject flag */ + getmicrotime(&timenow); + if (route->rt_rmx.rmx_expire) + route->rt_rmx.rmx_expire = timenow.tv_sec + arpt_keep; + route->rt_flags &= ~RTF_REJECT; + + /* update the llinfo, send a queued packet if there is one */ + llinfo = (struct llinfo_arp*)route->rt_llinfo; + llinfo->la_asked = 0; + if (llinfo->la_hold) { + struct mbuf *m0; + m0 = llinfo->la_hold; + llinfo->la_hold = 0; + + /* Should we a reference on the route first? */ + lck_mtx_unlock(rt_mtx); + dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0); + lck_mtx_lock(rt_mtx); + } + +respond: + if (arpop != ARPOP_REQUEST) { + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* If we are not the target, check if we should proxy */ + if (target_ip->sin_addr.s_addr != best_ia->ia_addr.sin_addr.s_addr) { + + /* Find a proxy route */ + error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY, &route); + if (error || route == NULL) { + + /* We don't have a route entry indicating we should use proxy */ + /* If we aren't supposed to proxy all, we are done */ + if (!arp_proxyall) { + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* See if we have a route to the target ip before we proxy it */ + route = rtalloc1_locked((const struct sockaddr*)target_ip, 0, 0); + if (!route) { + lck_mtx_unlock(rt_mtx); + return 0; + } + + /* + * Don't proxy for hosts already on the same interface. + */ + if (route->rt_ifp == ifp) { + rtfree_locked(route); + lck_mtx_unlock(rt_mtx); + return 0; + } + } + } + lck_mtx_unlock(rt_mtx); + + dlil_send_arp(ifp, ARPOP_REPLY, NULL, (const struct sockaddr*)target_ip, + sender_hw, (const struct sockaddr*)sender_ip); + + return 0; +} + +void +arp_ifinit( + struct ifnet *ifp, + struct ifaddr *ifa) +{ + ifa->ifa_rtrequest = arp_rtrequest; + ifa->ifa_flags |= RTF_CLONING; + dlil_send_arp(ifp, ARPOP_REQUEST, NULL, ifa->ifa_addr, NULL, ifa->ifa_addr); +} diff --git a/bsd/netinet/in_arp.h b/bsd/netinet/in_arp.h new file mode 100644 index 000000000..e32f597a7 --- /dev/null +++ b/bsd/netinet/in_arp.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_IN_ARP_H_ +#define _NETINET_IN_ARP_H_ +#include + +struct sockaddr_dl; +struct sockaddr_in; + +/*! + @function inet_arp_lookup + @discussion This function will check the routing table for a cached + arp entry or trigger an arp query to resolve the ip address to a + link-layer address. + + Arp entries are stored in the routing table. This function will + lookup the ip destination in the routing table. If the + destination requires forwarding to a gateway, the route of the + gateway will be looked up. The route entry is inspected to + determine if the link layer destination address is known. If + unknown, the arp generation function for IP attached to the + interface is called to create an arp request packet. + @param interface The interface the packet is being sent on. + @param ip_dest The ip destination of the packet. + @param ll_dest On output, the link-layer destination. + @param ll_dest_len The length of the buffer for ll_dest. + @param hint Any routing hint passed down from the protocol. + @param packet The packet being transmitted. + @result May return an error such as EHOSTDOWN or ENETUNREACH. If + this function returns EJUSTRETURN, the packet has been queued + and will be sent when an arp response is received. If any other + value is returned, the caller is responsible for disposing of + the packet. + */ +#ifdef BSD_KERNEL_PRIVATE +#define inet_arp_lookup arp_lookup_ip +#else +errno_t inet_arp_lookup(ifnet_t interface, const struct sockaddr_in *ip_dest, + struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, + mbuf_t packet); +#endif /* BSD_KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE +/* arp_lookup_ip is obsolete, use inet_arp_lookup */ +errno_t arp_lookup_ip(ifnet_t interface, const struct sockaddr_in *ip_dest, + struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, + mbuf_t packet); +#endif /* KERNEL_PRIVATE */ + +/*! + @function inet_arp_handle_input + @discussion This function should be called by code that handles + inbound arp packets. The caller should parse the ARP packet to + pull out the operation and the relevant addresses. If a response + is required, the proto_media_send_arp function will be called. + + This function will lookup the sender in the routing table and + add an arp entry if necessary. Any queued packets waiting for + the arp resolution will also be transmitted. + @param interface The interface the packet was received on. + @param arp_op The arp operation, ARPOP_REQUEST or ARPOP_REPLY + @param sender_hw The sender hardware address from the arp payload. + @param sender_ip The sender IP address from the arp payload. + @param target_ip The target IP address from the arp payload. + @result 0 on success or an errno error value on failure. + */ +#ifdef BSD_KERNEL_PRIVATE +#define inet_arp_handle_input arp_ip_handle_input +#else +errno_t inet_arp_handle_input(ifnet_t ifp, u_int16_t arpop, + const struct sockaddr_dl *sender_hw, + const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip); +#endif /* KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE +/* arp_ip_handle_input is obsolete, use inet_arp_handle_input */ +errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, + const struct sockaddr_dl *sender_hw, + const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip); +#endif /* BSD_KERNEL_PRIVATE */ + +/*! + @function inet_arp_init_ifaddr + @discussion This function should be called in two places, when an IP + address is added and when the hardware address changes. This + function will setup the ifaddr_t for use with the IP ARP + functions. This function will also trigger the transmission of a + gratuitous ARP packet. + + When the SIOCSIFADDR ioctl is handled, the data parameter will + be an ifaddr_t. If this is an IP address, inet_arp_init_ifaddr + should be called. This is usually performed in the protocol + attachment's ioctl handler. + + When the event handler for the protocol attachment receives a + KEV_DL_LINK_ADDRESS_CHANGED event, the event handler should call + inet_arp_init_ifaddr for each interface ip address. + + For an example, see bsd/net/ether_inet_pr_module.c in xnu. + Search for inet_arp_init_ifaddr. + @param interface The interface the packet was received on. + @param ipaddr The ip interface address. + */ +#ifdef BSD_KERNEL_PRIVATE +/* inet_arp_init_ifaddr is aliased to arp_ifinit */ +#define inet_arp_init_ifaddr arp_ifinit +#else +void inet_arp_init_ifaddr(ifnet_t interface, ifaddr_t ipaddr); +#endif + +#endif _NETINET_IN_ARP_H_ diff --git a/bsd/netinet/in_bootp.c b/bsd/netinet/in_bootp.c index 8a1055ebc..903262d4c 100644 --- a/bsd/netinet/in_bootp.c +++ b/bsd/netinet/in_bootp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -66,12 +67,22 @@ #include #include +#include +#include + #ifdef BOOTP_DEBUG #define dprintf(x) printf x; #else /* !BOOTP_DEBUG */ #define dprintf(x) #endif /* BOOTP_DEBUG */ +int bootp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try, + struct in_addr * netmask_p, struct in_addr * router_p, + struct proc * procp); +struct mbuf * ip_pkt_to_mbuf(caddr_t pkt, int pktsize); +int receive_packet(struct socket * so, caddr_t pp, int psize, int * actual_size); + + /* ip address formatting macros */ #define IP_FORMAT "%d.%d.%d.%d" #define IP_CH(ip) ((u_char *)ip) @@ -85,7 +96,7 @@ blank_sin() } static __inline__ void -print_reply(struct bootp *bp, int bp_len) +print_reply(struct bootp *bp, __unused int bp_len) { int i, j, len; @@ -130,7 +141,7 @@ print_reply(struct bootp *bp, int bp_len) } static __inline__ void -print_reply_short(struct bootp *bp, int bp_len) +print_reply_short(struct bootp *bp, __unused int bp_len) { printf("bp_yiaddr = " IP_FORMAT "\n", IP_LIST(&bp->bp_yiaddr)); printf("bp_sname = %s\n", bp->bp_sname); @@ -240,13 +251,16 @@ link_from_ifnet(struct ifnet * ifp) /* for (addr = ifp->if_addrlist; addr; addr = addr->ifa_next) */ + ifnet_lock_shared(ifp); TAILQ_FOREACH(addr, &ifp->if_addrhead, ifa_link) { if (addr->ifa_addr->sa_family == AF_LINK) { struct sockaddr_dl * dl_p = (struct sockaddr_dl *)(addr->ifa_addr); + ifnet_lock_done(ifp); return (dl_p); } } + ifnet_lock_done(ifp); return (NULL); } @@ -257,7 +271,7 @@ link_from_ifnet(struct ifnet * ifp) * bypassing routing code. */ static int -send_bootp_request(struct ifnet * ifp, struct socket * so, +send_bootp_request(struct ifnet * ifp, __unused struct socket * so, struct bootp_packet * pkt) { struct mbuf * m; @@ -269,7 +283,7 @@ send_bootp_request(struct ifnet * ifp, struct socket * so, sin.sin_addr.s_addr = INADDR_BROADCAST; m = ip_pkt_to_mbuf((caddr_t)pkt, sizeof(*pkt)); - return (dlil_output(ifptodlt(ifp, PF_INET), m, 0, (struct sockaddr *)&sin, 0)); + return dlil_output(ifp, PF_INET, m, 0, (struct sockaddr *)&sin, 0); } /* @@ -280,23 +294,18 @@ send_bootp_request(struct ifnet * ifp, struct socket * so, int receive_packet(struct socket * so, caddr_t pp, int psize, int * actual_size) { - struct iovec aiov; - struct uio auio; + uio_t auio; int rcvflg; int error; + char uio_buf[ UIO_SIZEOF(1) ]; - aiov.iov_base = pp; - aiov.iov_len = psize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_offset = 0; - auio.uio_resid = psize; - auio.uio_rw = UIO_READ; + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(pp), psize); rcvflg = MSG_WAITALL; - error = soreceive(so, (struct sockaddr **) 0, &auio, 0, 0, &rcvflg); - *actual_size = psize - auio.uio_resid; + error = soreceive(so, (struct sockaddr **) 0, auio, 0, 0, &rcvflg); + *actual_size = psize - uio_resid(auio); return (error); } @@ -310,14 +319,13 @@ bootp_timeout(void * arg) { struct socket * * socketflag = (struct socket * *)arg; struct socket * so = *socketflag; - boolean_t funnel_state; dprintf(("bootp: timeout\n")); - funnel_state = thread_funnel_set(network_flock,TRUE); *socketflag = NULL; + socket_lock(so, 1); sowakeup(so, &so->so_rcv); - (void) thread_funnel_set(network_flock, FALSE); + socket_unlock(so, 1); return; } @@ -331,7 +339,7 @@ bootp_timeout(void * arg) */ #define GOOD_RATING 3 static __inline__ int -rate_packet(struct bootp * pkt, int pkt_size, dhcpol_t * options_p) +rate_packet(__unused struct bootp * pkt, __unused int pkt_size, dhcpol_t * options_p) { int len; int rating = 1; @@ -501,8 +509,11 @@ bootp_loop(struct socket * so, struct ifnet * ifp, int max_try, } break; /* retry */ } - else - sbwait(&so->so_rcv); + else { + socket_lock(so, 1); + error = sbwait(&so->so_rcv); + socket_unlock(so, 1); + } } if (error && (error != EWOULDBLOCK)) { dprintf(("bootp: failed to receive packets: %d\n", error)); @@ -523,9 +534,9 @@ bootp_loop(struct socket * so, struct ifnet * ifp, int max_try, cleanup: if (request) - kfree((caddr_t)request, sizeof (*request)); + kfree(request, sizeof (*request)); if (reply) - kfree((caddr_t)reply, reply_size); + kfree(reply, reply_size); return (error); } @@ -583,7 +594,9 @@ int bootp(struct ifnet * ifp, struct in_addr * iaddr_p, int max_try, dprintf(("bootp: sobind failed, %d\n", error)); goto cleanup; } + socket_lock(so, 1); so->so_state |= SS_NBIO; + socket_unlock(so, 1); } /* do the protocol */ error = bootp_loop(so, ifp, max_try, iaddr_p, netmask_p, router_p); diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index 1971b49bc..6b5b9efff 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -91,11 +91,11 @@ SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW, &ip_gif_ttl, 0, ""); int -in_gif_output(ifp, family, m, rt) - struct ifnet *ifp; - int family; - struct mbuf *m; - struct rtentry *rt; +in_gif_output( + struct ifnet *ifp, + int family, + struct mbuf *m, + struct rtentry *rt) { struct gif_softc *sc = (struct gif_softc*)ifp; struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst; @@ -342,14 +342,18 @@ gif_encapcheck4(m, off, proto, arg) return 0; } /* reject packets with broadcast on source */ + lck_mtx_lock(rt_mtx); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) { if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; - if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) + if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { + lck_mtx_unlock(rt_mtx); return 0; + } } + lck_mtx_unlock(rt_mtx); /* ingress filters on outer source */ if ((sc->gif_if.if_flags & IFF_LINK2) == 0 && diff --git a/bsd/netinet/in_gif.h b/bsd/netinet/in_gif.h index e6d09d293..4321eba46 100644 --- a/bsd/netinet/in_gif.h +++ b/bsd/netinet/in_gif.h @@ -53,15 +53,17 @@ #ifndef _NETINET_IN_GIF_H_ #define _NETINET_IN_GIF_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL +#ifdef KERNEL_PRIVATE #define GIF_TTL 30 extern int ip_gif_ttl; -void in_gif_input __P((struct mbuf *, int)); -int in_gif_output __P((struct ifnet *, int, struct mbuf *, struct rtentry *)); -int gif_encapcheck4 __P((const struct mbuf *, int, int, void *)); +void in_gif_input(struct mbuf *, int); +int in_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); +int gif_encapcheck4(const struct mbuf *, int, int, void *); -#endif /* __APPLE_API_PRIVATE */ -#endif /*_NETINET_IN_GIF_H_*/ +#endif KERNEL_PRIVATE +#endif KERNEL +#endif _NETINET_IN_GIF_H_ diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 9a8696d2b..c3b56f784 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,6 +69,7 @@ #endif #include #include +#include #include @@ -100,6 +101,7 @@ #if IPSEC extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif extern u_long route_generation; @@ -182,7 +184,9 @@ in_pcballoc(so, pcbinfo, p) register struct inpcb *inp; caddr_t temp; #if IPSEC +#ifndef __APPLE__ int error; +#endif #endif if (so->cached_in_sock_layer == 0) { @@ -207,12 +211,24 @@ in_pcballoc(so, pcbinfo, p) inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; + so->so_pcb = (caddr_t)inp; + + if (so->so_proto->pr_flags & PR_PCBLOCK) { + inp->inpcb_mtx = lck_mtx_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr); + if (inp->inpcb_mtx == NULL) { + printf("in_pcballoc: can't alloc mutex! so=%x\n", so); + return(ENOMEM); + } + } + #if IPSEC #ifndef __APPLE__ if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); error = ipsec_init_policy(so, &inp->inp_sp); + lck_mtx_unlock(sadb_mutex); if (error != 0) { - zfree(pcbinfo->ipi_zone, (vm_offset_t)inp); + zfree(pcbinfo->ipi_zone, inp); return error; } } @@ -222,13 +238,16 @@ in_pcballoc(so, pcbinfo, p) if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on) inp->inp_flags |= IN6P_IPV6_V6ONLY; #endif - LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); - pcbinfo->ipi_count++; - so->so_pcb = (caddr_t)inp; + #if INET6 if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; #endif + lck_rw_lock_exclusive(pcbinfo->mtx); + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); + pcbinfo->ipi_count++; + lck_rw_done(pcbinfo->mtx); return (0); } @@ -252,17 +271,25 @@ in_pcbbind(inp, nam, p) return (EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; + socket_unlock(so, 0); /* keep reference on socket */ + lck_rw_lock_exclusive(pcbinfo->mtx); if (nam) { sin = (struct sockaddr_in *)nam; - if (nam->sa_len != sizeof (*sin)) + if (nam->sa_len != sizeof (*sin)) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EINVAL); + } #ifdef notdef /* * We should check the family, but old programs * incorrectly fail to initialize it. */ - if (sin->sin_family != AF_INET) + if (sin->sin_family != AF_INET) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EAFNOSUPPORT); + } #endif lport = sin->sin_port; if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { @@ -276,17 +303,27 @@ in_pcbbind(inp, nam, p) if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; } else if (sin->sin_addr.s_addr != INADDR_ANY) { + struct ifaddr *ifa; sin->sin_port = 0; /* yech... */ - if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) + if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EADDRNOTAVAIL); + } + else { + ifafree(ifa); + } } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) < IPPORT_RESERVED && p && - suser(p->p_ucred, &p->p_acflag)) + proc_suser(p)) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EACCES); + } if (so->so_uid && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local(inp->inp_pcbinfo, @@ -303,9 +340,12 @@ in_pcbbind(inp, nam, p) ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket)) + INP_SOCKAF(t->inp_socket)) { #endif /* defined(INET6) */ - return (EADDRINUSE); + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); + return (EADDRINUSE); + } } } t = in_pcblookup_local(pcbinfo, sin->sin_addr, @@ -319,9 +359,12 @@ in_pcbbind(inp, nam, p) ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket)) + INP_SOCKAF(t->inp_socket)) { #endif /* defined(INET6) */ - return (EADDRINUSE); + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); + return (EADDRINUSE); + } } } inp->inp_laddr = sin->sin_addr; @@ -337,8 +380,11 @@ in_pcbbind(inp, nam, p) last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { - if (p && (error = suser(p->p_ucred, &p->p_acflag))) + if (p && (error = proc_suser(p))) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return error; + } first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; @@ -362,6 +408,8 @@ in_pcbbind(inp, nam, p) do { if (count-- < 0) { /* completely used? */ + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); inp->inp_laddr.s_addr = INADDR_ANY; return (EADDRNOTAVAIL); } @@ -379,6 +427,8 @@ in_pcbbind(inp, nam, p) do { if (count-- < 0) { /* completely used? */ + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); inp->inp_laddr.s_addr = INADDR_ANY; return (EADDRNOTAVAIL); } @@ -390,12 +440,15 @@ in_pcbbind(inp, nam, p) inp->inp_laddr, lport, wild)); } } + socket_lock(so, 0); inp->inp_lport = lport; - if (in_pcbinshash(inp) != 0) { + if (in_pcbinshash(inp, 1) != 0) { inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; + lck_rw_done(pcbinfo->mtx); return (EAGAIN); } + lck_rw_done(pcbinfo->mtx); return (0); } @@ -426,6 +479,7 @@ in_pcbladdr(inp, nam, plocal_sin) return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); + lck_mtx_lock(rt_mtx); if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, @@ -460,19 +514,19 @@ in_pcbladdr(inp, nam, plocal_sin) sin->sin_addr.s_addr || inp->inp_socket->so_options & SO_DONTROUTE || ro->ro_rt->generation_id != route_generation)) { - rtfree(ro->ro_rt); + rtfree_locked(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == (struct ifnet *)0)) { + ro->ro_rt->rt_ifp == 0)) { /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sin->sin_addr; - rtalloc(ro); + rtalloc_ign_locked(ro, 0UL); } /* * If we found a route, use the address @@ -480,20 +534,29 @@ in_pcbladdr(inp, nam, plocal_sin) * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ - if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) + if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); + } if (ia == 0) { u_short fport = sin->sin_port; sin->sin_port = 0; ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); - if (ia == 0) + if (ia == 0) { ia = ifatoia(ifa_ifwithnet(sintosa(sin))); + } sin->sin_port = fport; - if (ia == 0) + if (ia == 0) { ia = TAILQ_FIRST(&in_ifaddrhead); - if (ia == 0) + if (ia) + ifaref(&ia->ia_ifa); + } + if (ia == 0) { + lck_mtx_unlock(rt_mtx); return (EADDRNOTAVAIL); + } } /* * If the destination address is multicast and an outgoing @@ -506,22 +569,29 @@ in_pcbladdr(inp, nam, plocal_sin) struct ifnet *ifp; imo = inp->inp_moptions; - if (imo->imo_multicast_ifp != NULL) { + if (imo->imo_multicast_ifp != NULL && (ia == NULL || + ia->ia_ifp != imo->imo_multicast_ifp)) { ifp = imo->imo_multicast_ifp; + if (ia) + ifafree(&ia->ia_ifa); TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) if (ia->ia_ifp == ifp) break; - if (ia == 0) + if (ia == 0) { + lck_mtx_unlock(rt_mtx); return (EADDRNOTAVAIL); + } + ifaref(ia); } } - /* - * Don't do pcblookup call here; return interface in plocal_sin - * and exit to caller, that will do the lookup. - */ + /* + * Don't do pcblookup call here; return interface in plocal_sin + * and exit to caller, that will do the lookup. + */ *plocal_sin = &ia->ia_addr; - + ifafree(&ia->ia_ifa); } + lck_mtx_unlock(rt_mtx); return(0); } @@ -540,7 +610,7 @@ in_pcbconnect(inp, nam, p) { struct sockaddr_in *ifaddr; struct sockaddr_in *sin = (struct sockaddr_in *)nam; - struct sockaddr_in sa; + struct inpcb *pcb; int error; /* @@ -549,9 +619,13 @@ in_pcbconnect(inp, nam, p) if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0) return(error); - if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, + socket_unlock(inp->inp_socket, 0); + pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, - inp->inp_lport, 0, NULL) != NULL) { + inp->inp_lport, 0, NULL); + socket_lock(inp->inp_socket, 0); + if (pcb != NULL) { + in_pcb_checkstate(pcb, WNT_RELEASE, 0); return (EADDRINUSE); } if (inp->inp_laddr.s_addr == INADDR_ANY) { @@ -560,12 +634,27 @@ in_pcbconnect(inp, nam, p) if (error) return (error); } + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } inp->inp_laddr = ifaddr->sin_addr; inp->inp_flags |= INP_INADDR_ANY; + } + else { + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; in_pcbrehash(inp); + lck_rw_done(inp->inp_pcbinfo->mtx); return (0); } @@ -576,8 +665,18 @@ in_pcbdisconnect(inp) inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; + + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } + in_pcbrehash(inp); - if (inp->inp_socket->so_state & SS_NOFDREF) + lck_rw_done(inp->inp_pcbinfo->mtx); + + if (inp->inp_socket->so_state & SS_NOFDREF) in_pcbdetach(inp); } @@ -586,56 +685,115 @@ in_pcbdetach(inp) struct inpcb *inp; { struct socket *so = inp->inp_socket; - struct inpcbinfo *ipi = inp->inp_pcbinfo; struct rtentry *rt = inp->inp_route.ro_rt; - - if (so->so_pcb == 0) /* we've been called twice, ignore */ - return; + if (so->so_pcb == 0) { /* we've been called twice */ + panic("in_pcbdetach: inp=%x so=%x proto=%x so_pcb is null!\n", + inp, so, so->so_proto->pr_protocol); + } #if IPSEC - ipsec4_delete_pcbpolicy(inp); + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + ipsec4_delete_pcbpolicy(inp); + lck_mtx_unlock(sadb_mutex); + } #endif /*IPSEC*/ - inp->inp_gencnt = ++ipi->ipi_gencnt; - in_pcbremlists(inp); + + /* mark socket state as dead */ + if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) + panic("in_pcbdetach so=%x prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol); #if TEMPDEBUG if (so->cached_in_sock_layer) - printf("PCB_DETACH for cached socket %x\n", so); + printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags); else - printf("PCB_DETACH for allocated socket %x\n", so); + printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags); #endif + if ((so->so_flags & SOF_PCBCLEARING) == 0) { + inp->inp_vflag = 0; + if (inp->inp_options) + (void)m_free(inp->inp_options); + if (rt) { + /* + * route deletion requires reference count to be <= zero + */ + lck_mtx_lock(rt_mtx); + if ((rt->rt_flags & RTF_DELCLONE) && + (rt->rt_flags & RTF_WASCLONED) && + (rt->rt_refcnt <= 1)) { + rtunref(rt); + rt->rt_flags &= ~RTF_UP; + rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), + rt->rt_flags, (struct rtentry **)0); + } + else { + rtfree_locked(rt); + inp->inp_route.ro_rt = 0; + } + lck_mtx_unlock(rt_mtx); + } + ip_freemoptions(inp->inp_moptions); + inp->inp_moptions = NULL; + sofreelastref(so, 0); + inp->inp_state = INPCB_STATE_DEAD; + so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ + } +} - so->so_pcb = 0; - if (inp->inp_options) - (void)m_free(inp->inp_options); - if (rt) { - /* - * route deletion requires reference count to be <= zero - */ - if ((rt->rt_flags & RTF_DELCLONE) && - (rt->rt_flags & RTF_WASCLONED) && - (rt->rt_refcnt <= 1)) { - rtunref(rt); - rt->rt_flags &= ~RTF_UP; - rtrequest(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, (struct rtentry **)0); +void +in_pcbdispose(inp) + struct inpcb *inp; +{ + struct socket *so = inp->inp_socket; + struct inpcbinfo *ipi = inp->inp_pcbinfo; + +#if TEMPDEBUG + if (inp->inp_state != INPCB_STATE_DEAD) { + printf("in_pcbdispose: not dead yet? so=%x\n", so); + } +#endif + + if (so && so->so_usecount != 0) + panic("in_pcbdispose: use count=%x so=%x\n", so->so_usecount, so); + + + inp->inp_gencnt = ++ipi->ipi_gencnt; + /*### access ipi in in_pcbremlists */ + in_pcbremlists(inp); + + if (so) { + if (so->so_proto->pr_flags & PR_PCBLOCK) { + sofreelastref(so, 0); + if (so->so_rcv.sb_cc || so->so_snd.sb_cc) { +#if TEMPDEBUG + printf("in_pcbdispose sb not cleaned up so=%x rc_cci=%x snd_cc=%x\n", + so, so->so_rcv.sb_cc, so->so_snd.sb_cc); +#endif + sbrelease(&so->so_rcv); + sbrelease(&so->so_snd); + } + if (so->so_head != NULL) + panic("in_pcbdispose, so=%x head still exist\n", so); + lck_mtx_unlock(inp->inpcb_mtx); + lck_mtx_free(inp->inpcb_mtx, ipi->mtx_grp); } - else { - rtfree(rt); - inp->inp_route.ro_rt = 0; + so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ + so->so_saved_pcb = (caddr_t) inp; + so->so_pcb = 0; + inp->inp_socket = 0; + inp->reserved[0] = so; + if (so->cached_in_sock_layer == 0) { + zfree(ipi->ipi_zone, inp); } + sodealloc(so); } - ip_freemoptions(inp->inp_moptions); - inp->inp_vflag = 0; - if (so->cached_in_sock_layer) - so->so_saved_pcb = (caddr_t) inp; +#if TEMPDEBUG else - zfree(ipi->ipi_zone, (vm_offset_t) inp); - - sofree(so); + printf("in_pcbdispose: no socket for inp=%x\n", inp); +#endif } /* @@ -653,7 +811,6 @@ in_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { - int s; register struct inpcb *inp; register struct sockaddr_in *sin; @@ -667,16 +824,13 @@ in_setsockaddr(so, nam) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); - s = splnet(); inp = sotoinpcb(so); if (!inp) { - splx(s); FREE(sin, M_SONAME); return ECONNRESET; } sin->sin_port = inp->inp_lport; sin->sin_addr = inp->inp_laddr; - splx(s); *nam = (struct sockaddr *)sin; return 0; @@ -687,7 +841,6 @@ in_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { - int s; struct inpcb *inp; register struct sockaddr_in *sin; @@ -701,33 +854,29 @@ in_setpeeraddr(so, nam) sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); - s = splnet(); inp = sotoinpcb(so); if (!inp) { - splx(s); FREE(sin, M_SONAME); return ECONNRESET; } sin->sin_port = inp->inp_fport; sin->sin_addr = inp->inp_faddr; - splx(s); *nam = (struct sockaddr *)sin; return 0; } void -in_pcbnotifyall(head, faddr, errno, notify) - struct inpcbhead *head; +in_pcbnotifyall(pcbinfo, faddr, errno, notify) + struct inpcbinfo *pcbinfo; struct in_addr faddr; - void (*notify) __P((struct inpcb *, int)); + void (*notify) (struct inpcb *, int); { - struct inpcb *inp, *ninp; - int s; + struct inpcb *inp; + + lck_rw_lock_shared(pcbinfo->mtx); - s = splnet(); - for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { - ninp = LIST_NEXT(inp, inp_list); + LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { #if INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; @@ -735,15 +884,20 @@ in_pcbnotifyall(head, faddr, errno, notify) if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) continue; + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + socket_lock(inp->inp_socket, 1); (*notify)(inp, errno); + (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); + socket_unlock(inp->inp_socket, 1); } - splx(s); + lck_rw_done(pcbinfo->mtx); } void -in_pcbpurgeif0(head, ifp) - struct inpcb *head; - struct ifnet *ifp; +in_pcbpurgeif0( + struct inpcb *head, + struct ifnet *ifp) { struct inpcb *inp; struct ip_moptions *imo; @@ -767,7 +921,7 @@ in_pcbpurgeif0(head, ifp) for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) { if (imo->imo_membership[i]->inm_ifp == ifp) { - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); gap++; } else if (gap != 0) imo->imo_membership[i - gap] = @@ -792,6 +946,7 @@ in_losing(inp) struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt)) { + lck_mtx_lock(rt_mtx); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&inp->inp_route.ro_dst; @@ -799,11 +954,12 @@ in_losing(inp) info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) - (void) rtrequest(RTM_DELETE, rt_key(rt), + (void) rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); inp->inp_route.ro_rt = 0; - rtfree(rt); + rtfree_locked(rt); + lck_mtx_unlock(rt_mtx); /* * A new route can be allocated * the next time output is attempted. @@ -821,7 +977,7 @@ in_rtchange(inp, errno) int errno; { if (inp->inp_route.ro_rt) { - if (ifa_foraddr(inp->inp_laddr.s_addr) == NULL) + if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0) return; /* we can't remove the route now. not sure if still ok to use src */ rtfree(inp->inp_route.ro_rt); inp->inp_route.ro_rt = 0; @@ -930,13 +1086,14 @@ in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) * Lookup PCB in hash list. */ struct inpcb * -in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, - ifp) - struct inpcbinfo *pcbinfo; - struct in_addr faddr, laddr; - u_int fport_arg, lport_arg; - int wildcard; - struct ifnet *ifp; +in_pcblookup_hash( + struct inpcbinfo *pcbinfo, + struct in_addr faddr, + u_int fport_arg, + struct in_addr laddr, + u_int lport_arg, + int wildcard, + struct ifnet *ifp) { struct inpcbhead *head; register struct inpcb *inp; @@ -946,19 +1103,7 @@ in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, * We may have found the pcb in the last lookup - check this first. */ - if ((!IN_MULTICAST(laddr.s_addr)) && (pcbinfo->last_pcb)) { - if (faddr.s_addr == pcbinfo->last_pcb->inp_faddr.s_addr && - laddr.s_addr == pcbinfo->last_pcb->inp_laddr.s_addr && - fport_arg == pcbinfo->last_pcb->inp_fport && - lport_arg == pcbinfo->last_pcb->inp_lport) { - /* - * Found. - */ - return (pcbinfo->last_pcb); - } - - pcbinfo->last_pcb = 0; - } + lck_rw_lock_shared(pcbinfo->mtx); /* * First look for an exact match. @@ -976,7 +1121,14 @@ in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, /* * Found. */ - return (inp); + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (inp); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } } } if (wildcard) { @@ -998,8 +1150,16 @@ in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, (inp->inp_flags & INP_FAITH) == 0) continue; #endif - if (inp->inp_laddr.s_addr == laddr.s_addr) - return (inp); + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (inp); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } + } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if defined(INET6) if (INP_CHECK_SOCKAF(inp->inp_socket, @@ -1012,15 +1172,35 @@ in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, } } #if defined(INET6) - if (local_wild == NULL) - return (local_wild_mapped); + if (local_wild == NULL) { + if (local_wild_mapped != NULL) { + if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (local_wild_mapped); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } + } + lck_rw_done(pcbinfo->mtx); + return (NULL); + } #endif /* defined(INET6) */ - return (local_wild); + if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (local_wild); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } } /* * Not found. */ + lck_rw_done(pcbinfo->mtx); return (NULL); } @@ -1028,8 +1208,9 @@ in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, * Insert PCB onto various hash lists. */ int -in_pcbinshash(inp) +in_pcbinshash(inp, locked) struct inpcb *inp; + int locked; /* list already locked exclusive */ { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; @@ -1044,8 +1225,18 @@ in_pcbinshash(inp) #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; - pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, - inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; + inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask); + + if (!locked) { + if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } + } + + pcbhash = &pcbinfo->hashbase[inp->hash_element]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; @@ -1063,6 +1254,8 @@ in_pcbinshash(inp) if (phd == NULL) { MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK); if (phd == NULL) { + if (!locked) + lck_rw_done(pcbinfo->mtx); return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; @@ -1072,10 +1265,8 @@ in_pcbinshash(inp) inp->inp_phd = phd; LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); -#ifdef __APPLE__ - inp->hash_element = INP_PCBHASH(inp->inp_faddr.s_addr, inp->inp_lport, - inp->inp_fport, pcbinfo->hashmask); -#endif + if (!locked) + lck_rw_done(pcbinfo->mtx); return (0); } @@ -1098,30 +1289,23 @@ in_pcbrehash(inp) else #endif /* INET6 */ hashkey_faddr = inp->inp_faddr.s_addr; - - head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, - inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; + inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, + inp->inp_fport, inp->inp_pcbinfo->hashmask); + head = &inp->inp_pcbinfo->hashbase[inp->hash_element]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); -#ifdef __APPLE__ - inp->hash_element = INP_PCBHASH(inp->inp_faddr.s_addr, inp->inp_lport, - inp->inp_fport, inp->inp_pcbinfo->hashmask); -#endif } /* * Remove PCB from various lists. */ +//###LOCK must be called with list lock held void in_pcbremlists(inp) struct inpcb *inp; { inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; -#ifdef __APPLE__ - if (inp == inp->inp_pcbinfo->last_pcb) - inp->inp_pcbinfo->last_pcb = 0; -#endif if (inp->inp_lport) { struct inpcbport *phd = inp->inp_phd; @@ -1137,17 +1321,18 @@ in_pcbremlists(inp) inp->inp_pcbinfo->ipi_count--; } +static void in_pcb_detach_port( struct inpcb *inp); int -in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, +in_pcb_grab_port (struct inpcbinfo *pcbinfo, u_short options, struct in_addr laddr, u_short *lport, struct in_addr faddr, u_short fport, u_int cookie, - u_char owner_id)) + u_char owner_id) { - struct inpcb *pcb; + struct inpcb *inp, *pcb; struct sockaddr_in sin; struct proc *p = current_proc(); int stat; @@ -1164,19 +1349,25 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, * it's from the acceptance of an incoming connection. */ if (laddr.s_addr == 0) { + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; return EINVAL; } - if (in_pcblookup_hash(pcbinfo, faddr, fport, - laddr, *lport, 0, NULL) != NULL) { - if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) { - return (EADDRINUSE); - } + inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL); + if (inp) { + /* pcb was found, its count was upped. need to decrease it here */ + in_pcb_checkstate(inp, WNT_RELEASE, 0); + if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) { + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; + return (EADDRINUSE); + } } stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) + if (stat) { + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; return stat; + } pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); pcb->inp_vflag |= INP_IPV4; @@ -1185,7 +1376,10 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, pcb->inp_faddr = faddr; pcb->inp_fport = fport; - in_pcbinshash(pcb); + + lck_rw_lock_exclusive(pcbinfo->mtx); + in_pcbinshash(pcb, 1); + lck_rw_done(pcbinfo->mtx); } else { /* @@ -1198,8 +1392,10 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, */ stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) + if (stat) { + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; return stat; + } pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); pcb->inp_vflag |= INP_IPV4; @@ -1209,21 +1405,26 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, sin.sin_family = AF_INET; sin.sin_addr.s_addr = laddr.s_addr; sin.sin_port = *lport; - + + socket_lock(&pcbinfo->nat_dummy_socket, 1); stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, (struct sockaddr *) &sin, p); if (stat) { - in_pcbdetach(pcb); - return stat; + socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */ + in_pcb_detach_port(pcb); /* will restore dummy pcb */ + return stat; } + socket_unlock(&pcbinfo->nat_dummy_socket, 1); } } else { /* The grabber wants an ephemeral port */ stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) + if (stat) { + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; return stat; + } pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); pcb->inp_vflag |= INP_IPV4; @@ -1240,26 +1441,34 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, */ if (laddr.s_addr == 0) { - in_pcbdetach(pcb); + in_pcb_detach_port(pcb); /* restores dummy pcb */ return EINVAL; } + socket_lock(&pcbinfo->nat_dummy_socket, 1); stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, (struct sockaddr *) &sin, p); if (stat) { - in_pcbdetach(pcb); + socket_unlock(&pcbinfo->nat_dummy_socket, 1); + in_pcb_detach_port(pcb); /* restores dummy pcb */ return stat; } - if (in_pcblookup_hash(pcbinfo, faddr, fport, - pcb->inp_laddr, pcb->inp_lport, 0, NULL) != NULL) { - in_pcbdetach(pcb); - return (EADDRINUSE); + socket_unlock(&pcbinfo->nat_dummy_socket, 1); + inp = in_pcblookup_hash(pcbinfo, faddr, fport, + pcb->inp_laddr, pcb->inp_lport, 0, NULL); + if (inp) { + /* pcb was found, its count was upped. need to decrease it here */ + in_pcb_checkstate(inp, WNT_RELEASE, 0); + in_pcb_detach_port(pcb); + return (EADDRINUSE); } + lck_rw_lock_exclusive(pcbinfo->mtx); pcb->inp_faddr = faddr; pcb->inp_fport = fport; in_pcbrehash(pcb); + lck_rw_done(pcbinfo->mtx); } else { /* @@ -1267,12 +1476,15 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, * may or may not be defined. */ + socket_lock(&pcbinfo->nat_dummy_socket, 1); stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, (struct sockaddr *) &sin, p); if (stat) { - in_pcbdetach(pcb); + socket_unlock(&pcbinfo->nat_dummy_socket, 1); + in_pcb_detach_port(pcb); return stat; } + socket_unlock(&pcbinfo->nat_dummy_socket, 1); } *lport = pcb->inp_lport; } @@ -1281,12 +1493,33 @@ in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, pcb->nat_owner = owner_id; pcb->nat_cookie = cookie; pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb; + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */ return 0; } +/* 3962035 - in_pcb_letgo_port needs a special case function for detaching */ +static void +in_pcb_detach_port( + struct inpcb *inp) +{ + struct socket *so = inp->inp_socket; + struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + + if (so != &pcbinfo->nat_dummy_socket) + panic("in_pcb_detach_port: not a dummy_sock: so=%x, inp=%x\n", so, inp); + inp->inp_gencnt = ++pcbinfo->ipi_gencnt; + /*### access ipi in in_pcbremlists */ + in_pcbremlists(inp); + + inp->inp_socket = 0; + inp->reserved[0] = so; + zfree(pcbinfo->ipi_zone, inp); + pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */ +} + int -in_pcb_letgo_port __P((struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, - struct in_addr faddr, u_short fport, u_char owner_id)) +in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, + struct in_addr faddr, u_short fport, u_char owner_id) { struct inpcbhead *head; register struct inpcb *inp; @@ -1295,6 +1528,8 @@ in_pcb_letgo_port __P((struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short /* * First look for an exact match. */ + + lck_rw_lock_exclusive(pcbinfo->mtx); head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1305,11 +1540,13 @@ in_pcb_letgo_port __P((struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short /* * Found. */ - in_pcbdetach(inp); + in_pcb_detach_port(inp); + lck_rw_done(pcbinfo->mtx); return 0; } } + lck_rw_done(pcbinfo->mtx); return ENOENT; } @@ -1332,6 +1569,7 @@ in_pcb_get_owner(struct inpcbinfo *pcbinfo, * owners. */ + lck_rw_lock_shared(pcbinfo->mtx); porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { @@ -1340,6 +1578,7 @@ in_pcb_get_owner(struct inpcbinfo *pcbinfo, } if (phd == 0) { + lck_rw_done(pcbinfo->mtx); return INPCB_NO_OWNER; } @@ -1355,18 +1594,23 @@ in_pcb_get_owner(struct inpcbinfo *pcbinfo, } } + lck_rw_done(pcbinfo->mtx); return owner_id; } else { inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, 1, NULL); if (inp) { + /* pcb was found, its count was upped. need to decrease it here */ + /* if we found it, that pcb is already locked by the caller */ + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) + return(INPCB_NO_OWNER); + if (inp->nat_owner) { owner_id = inp->nat_owner; *cookie = inp->nat_cookie; } else { - pcbinfo->last_pcb = inp; owner_id = INPCB_OWNED_BY_X; } } @@ -1401,6 +1645,7 @@ in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id) struct inpcb *inp; + lck_rw_lock_exclusive(pcbinfo->mtx); if (pcbinfo->all_owners & owner_id) { pcbinfo->all_owners &= ~owner_id; for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) { @@ -1409,16 +1654,18 @@ in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id) /* * Deallocate the pcb */ - in_pcbdetach(inp); + in_pcb_detach_port(inp); else inp->nat_owner &= ~owner_id; } } } else { + lck_rw_done(pcbinfo->mtx); return ENOENT; } + lck_rw_done(pcbinfo->mtx); return 0; } @@ -1427,11 +1674,147 @@ in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id) void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily, int pfamily, int protocol) { + int stat; + struct proc *p = current_proc(); + bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket)); - pcbinfo->nat_dummy_socket.so_proto = pffindproto(afamily, pfamily, protocol); + pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol); pcbinfo->all_owners = 0; + stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); + if (stat) + panic("in_pcb_nat_init: can't alloc fakepcb err=%\n", stat); + pcbinfo->nat_dummy_pcb = pcbinfo->nat_dummy_socket.so_pcb; } +/* Mechanism used to defer the memory release of PCBs + * The pcb list will contain the pcb until the ripper can clean it up if + * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING + * 3) usecount is null + * This function will be called to either mark the pcb as +*/ +int +in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) + +{ + + volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; + UInt32 origwant; + UInt32 newwant; + + switch (mode) { + + case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */ + + /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */ + + if (locked == 0) + socket_lock(pcb->inp_socket, 1); + pcb->inp_state = INPCB_STATE_DEAD; +stopusing: + if (pcb->inp_socket->so_usecount < 0) + panic("in_pcb_checkstate STOP pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket); + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); + + origwant = *wantcnt; + if ((UInt16) origwant == 0xffff ) /* should stop using */ + return (WNT_STOPUSING); + newwant = 0xffff; + if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */ + OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt) ; + } + return (WNT_STOPUSING); + break; + + case WNT_ACQUIRE: /* try to increase reference to pcb */ + /* if WNT_STOPUSING should bail out */ + /* + * if socket state DEAD, try to set count to STOPUSING, return failed + * otherwise increase cnt + */ + do { + origwant = *wantcnt; + if ((UInt16) origwant == 0xffff ) {/* should stop using */ +// printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%x\n", pcb); + return (WNT_STOPUSING); + } + newwant = origwant + 1; + } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt)); + return (WNT_ACQUIRE); + break; + + case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD, + set wanted bit to STOPUSING + */ + + if (locked == 0) + socket_lock(pcb->inp_socket, 1); + + do { + origwant = *wantcnt; + if ((UInt16) origwant == 0x0 ) + panic("in_pcb_checkstate pcb=%x release with zero count", pcb); + if ((UInt16) origwant == 0xffff ) {/* should stop using */ +#if TEMPDEBUG + printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%x\n", pcb); +#endif + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); + return (WNT_STOPUSING); + } + newwant = origwant - 1; + } while (!OSCompareAndSwap(origwant, newwant, (UInt32 *) wantcnt)); + + if (pcb->inp_state == INPCB_STATE_DEAD) + goto stopusing; + if (pcb->inp_socket->so_usecount < 0) + panic("in_pcb_checkstate RELEASE pcb=%x so=%x usecount is negative\n", pcb, pcb->inp_socket); + + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); + return (WNT_RELEASE); + break; + + default: + + panic("in_pcb_checkstate: so=%x not a valid state =%x\n", pcb->inp_socket, mode); + } + + /* NOTREACHED */ + return (mode); +} + +/* + * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. + * The inpcb_compat data structure is passed to user space and must + * not change. We intentionally avoid copying pointers. The socket is + * the one exception, though we probably shouldn't copy that either. + */ +void +inpcb_to_compat( + struct inpcb *inp, + struct inpcb_compat *inp_compat) +{ + bzero(inp_compat, sizeof(*inp_compat)); + inp_compat->inp_fport = inp->inp_fport; + inp_compat->inp_lport = inp->inp_lport; + inp_compat->inp_socket = inp->inp_socket; + inp_compat->nat_owner = inp->nat_owner; + inp_compat->nat_cookie = inp->nat_cookie; + inp_compat->inp_gencnt = inp->inp_gencnt; + inp_compat->inp_flags = inp->inp_flags; + inp_compat->inp_flow = inp->inp_flow; + inp_compat->inp_vflag = inp->inp_vflag; + inp_compat->inp_ip_ttl = inp->inp_ip_ttl; + inp_compat->inp_ip_p = inp->inp_ip_p; + inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; + inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; + inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; + inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; + inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; + inp_compat->inp6_ifindex = inp->inp6_ifindex; + inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; +} #ifndef __APPLE__ prison_xinpcb(struct proc *p, struct inpcb *inp) diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index a32277efc..9f8b77058 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -59,12 +59,17 @@ #define _NETINET_IN_PCB_H_ #include +#include #include - +#ifdef KERNEL_PRIVATE +#ifdef KERNEL +#include +#endif +#endif /* KERNEL_PRIVATE */ #include /* for IPSEC */ -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ @@ -78,6 +83,7 @@ */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); +#endif /* KERNEL_PRIVATE */ typedef u_quad_t inp_gen_t; /* @@ -90,6 +96,7 @@ struct in_addr_4in6 { struct in_addr ia46_addr4; }; +#ifdef KERNEL_PRIVATE /* * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS * of the structure. Therefore, it is important that the members in @@ -100,8 +107,8 @@ struct icmp6_filter; struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* hash list */ - struct in_addr reserved1; /* APPLE reserved: inp_faddr defined in protcol indep. part */ - struct in_addr reserved2; /* APPLE reserved */ + int inp_wantcnt; /* pcb wanted count. protected by pcb list lock */ + int inp_state; /* state of this pcb, in use, recycled, ready for recycling... */ u_short inp_fport; /* foreign port */ u_short inp_lport; /* local port */ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ @@ -116,9 +123,7 @@ struct inpcb { int inp_flags; /* generic IP/datagram flags */ u_int32_t inp_flow; - u_char inp_vflag; -#define INP_IPV4 0x1 -#define INP_IPV6 0x2 + u_char inp_vflag; /* INP_IPV4 or INP_IPV6 */ u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ @@ -146,12 +151,6 @@ struct inpcb { /* IP multicast options */ struct ip_moptions *inp4_moptions; } inp_depend4; -#define inp_faddr inp_dependfaddr.inp46_foreign.ia46_addr4 -#define inp_laddr inp_dependladdr.inp46_local.ia46_addr4 -#define inp_route inp_dependroute.inp4_route -#define inp_ip_tos inp_depend4.inp4_ip_tos -#define inp_options inp_depend4.inp4_options -#define inp_moptions inp_depend4.inp4_moptions struct { /* IP options */ struct mbuf *inp6_options; @@ -169,32 +168,20 @@ struct inpcb { u_short inp6_ifindex; short inp6_hops; } inp_depend6; -#define in6p_faddr inp_dependfaddr.inp6_foreign -#define in6p_laddr inp_dependladdr.inp6_local -#define in6p_route inp_dependroute.inp6_route -#define in6p_ip6_hlim inp_depend6.inp6_hlim -#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ -#define in6p_ip6_nxt inp_ip_p -#define in6p_flowinfo inp_flow -#define in6p_vflag inp_vflag -#define in6p_options inp_depend6.inp6_options -#define in6p_outputopts inp_depend6.inp6_outputopts -#define in6p_moptions inp_depend6.inp6_moptions -#define in6p_icmp6filt inp_depend6.inp6_icmp6filt -#define in6p_cksum inp_depend6.inp6_cksum -#define inp6_ifindex inp_depend6.inp6_ifindex -#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ -#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ -#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ -#define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ -#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ int hash_element; /* Array index of pcb's hash list */ caddr_t inp_saved_ppcb; /* place to save pointer while cached */ struct inpcbpolicy *inp_sp; - u_long reserved[3]; /* For future use */ +#ifdef _KERN_LOCKS_H_ + lck_mtx_t *inpcb_mtx; /* inpcb per-socket mutex */ +#else + void *inpcb_mtx; +#endif + u_long reserved[2]; /* For future use */ }; -#endif /* __APPLE_API_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + /* * The range of the generation count, as used in this implementation, * is 9e19. We would have to create 300 billion connections per @@ -206,10 +193,105 @@ struct inpcb { * Interface exported to userland by various protocols which use * inpcbs. Hack alert -- only define if struct xsocket is in scope. */ -#ifdef _SYS_SOCKETVAR_H_ + +/* + * This is a copy of the inpcb as it shipped in Panther. This structure + * is filled out in a copy function. This allows the inpcb to change + * without breaking userland tools. + * + * CAUTION: Many fields may not be filled out. Fewer may be filled out + * in the future. Code defensively. + */ +#ifdef KERNEL_PRIVATE +struct inpcb_compat { +#else +struct inpcbinfo; +struct inpcbport; +struct mbuf; +struct ip6_pktopts; +struct ip6_moptions; +struct icmp6_filter; +struct inpcbpolicy; + +struct inpcb { +#endif /* KERNEL_PRIVATE */ + LIST_ENTRY(inpcb) inp_hash; /* hash list */ + struct in_addr reserved1; /* APPLE reserved: inp_faddr defined in protcol indep. part */ + struct in_addr reserved2; /* APPLE reserved */ + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ + LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ + caddr_t inp_ppcb; /* pointer to per-protocol pcb */ + struct inpcbinfo *inp_pcbinfo; /* PCB list info */ + void* inp_socket; /* back pointer to socket */ + u_char nat_owner; /* Used to NAT TCP/UDP traffic */ + u_long nat_cookie; /* Cookie stored and returned to NAT */ + LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ + struct inpcbport *inp_phd; /* head of this list */ + inp_gen_t inp_gencnt; /* generation count of this instance */ + int inp_flags; /* generic IP/datagram flags */ + u_int32_t inp_flow; + + u_char inp_vflag; + + u_char inp_ip_ttl; /* time to live proto */ + u_char inp_ip_p; /* protocol proto */ + /* protocol dependent part */ + union { + /* foreign host table entry */ + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; + } inp_dependfaddr; + union { + /* local host table entry */ + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; + } inp_dependladdr; + union { + /* placeholder for routing entry */ + u_char inp4_route[20]; + u_char inp6_route[32]; + } inp_dependroute; + struct { + /* type of service proto */ + u_char inp4_ip_tos; + /* IP options */ + struct mbuf *inp4_options; + /* IP multicast options */ + struct ip_moptions *inp4_moptions; + } inp_depend4; + + struct { + /* IP options */ + struct mbuf *inp6_options; + u_int8_t inp6_hlim; + u_int8_t unused_uint8_1; + ushort unused_uint16_1; + /* IP6 options for outgoing packets */ + struct ip6_pktopts *inp6_outputopts; + /* IP multicast options */ + struct ip6_moptions *inp6_moptions; + /* ICMPv6 code type filter */ + struct icmp6_filter *inp6_icmp6filt; + /* IPV6_CHECKSUM setsockopt */ + int inp6_cksum; + u_short inp6_ifindex; + short inp6_hops; + } inp_depend6; + + int hash_element; /* Array index of pcb's hash list */ + caddr_t inp_saved_ppcb; /* place to save pointer while cached */ + struct inpcbpolicy *inp_sp; + u_long reserved[3]; /* For future use */ +}; + struct xinpcb { size_t xi_len; /* length of this structure */ +#ifdef KERNEL_PRIVATE + struct inpcb_compat xi_inp; +#else struct inpcb xi_inp; +#endif struct xsocket xi_socket; u_quad_t xi_alignment_hack; }; @@ -220,9 +302,41 @@ struct xinpgen { inp_gen_t xig_gen; /* generation count at this time */ so_gen_t xig_sogen; /* socket generation count at this time */ }; -#endif /* _SYS_SOCKETVAR_H_ */ -#ifdef __APPLE_API_PRIVATE +/* + * These defines are for use with the inpcb. + */ +#define INP_IPV4 0x1 +#define INP_IPV6 0x2 +#define inp_faddr inp_dependfaddr.inp46_foreign.ia46_addr4 +#define inp_laddr inp_dependladdr.inp46_local.ia46_addr4 +#define inp_route inp_dependroute.inp4_route +#define inp_ip_tos inp_depend4.inp4_ip_tos +#define inp_options inp_depend4.inp4_options +#define inp_moptions inp_depend4.inp4_moptions +#define in6p_faddr inp_dependfaddr.inp6_foreign +#define in6p_laddr inp_dependladdr.inp6_local +#define in6p_route inp_dependroute.inp6_route +#define in6p_ip6_hlim inp_depend6.inp6_hlim +#define in6p_hops inp_depend6.inp6_hops /* default hop limit */ +#define in6p_ip6_nxt inp_ip_p +#define in6p_flowinfo inp_flow +#define in6p_vflag inp_vflag +#define in6p_options inp_depend6.inp6_options +#define in6p_outputopts inp_depend6.inp6_outputopts +#define in6p_moptions inp_depend6.inp6_moptions +#define in6p_icmp6filt inp_depend6.inp6_icmp6filt +#define in6p_cksum inp_depend6.inp6_cksum +#define inp6_ifindex inp_depend6.inp6_ifindex +#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ +#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ +#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ +#define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ +#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ +#define in6p_state inp_state +#define in6p_wantcnt inp_wantcnt + +#ifdef KERNEL_PRIVATE struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; @@ -245,10 +359,21 @@ struct inpcbinfo { /* XXX documentation, prefixes */ u_int ipi_count; /* number of pcbs in this list */ u_quad_t ipi_gencnt; /* current generation count */ #ifdef __APPLE__ - u_char all_owners; - struct socket nat_dummy_socket; - struct inpcb *last_pcb; - caddr_t dummy_cb; + u_char all_owners; + struct socket nat_dummy_socket; /* fake socket for NAT pcb backpointer */ + struct inpcb *nat_dummy_pcb; /* fake pcb for finding NAT mutex */ + caddr_t dummy_cb; +#ifdef _KERN_LOCKS_H_ + lck_attr_t *mtx_attr; /* mutex attributes */ + lck_grp_t *mtx_grp; /* mutex group definition */ + lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ + lck_rw_t *mtx; /* global mutex for the pcblist*/ +#else + void *mtx_attr; /* mutex attributes */ + void *mtx_grp; /* mutex group definition */ + void *mtx_grp_attr; /* mutex group attributes */ + void *mtx; /* global mutex for the pcblist*/ +#endif #endif }; @@ -257,6 +382,8 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) +#endif /* KERNEL_PRIVATE */ + /* flags in inp_flags: */ #define INP_RECVOPTS 0x01 /* receive incoming IP options */ #define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ @@ -268,12 +395,12 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define INP_RECVIF 0x80 /* receive incoming interface */ #define INP_MTUDISC 0x100 /* user can do MTU discovery */ #ifdef __APPLE__ -#define INP_STRIPHDR 0x200 /* Strip headers in raw_ip, for OT support */ +#define INP_STRIPHDR 0x200 /* Strip headers in raw_ip, for OT support */ #endif #define INP_FAITH 0x400 /* accept FAITH'ed connections */ #define INP_INADDR_ANY 0x800 /* local address wasn't specified */ -#define INP_RECVTTL 0x1000 +#define INP_RECVTTL 0x1000 #define IN6P_IPV6_V6ONLY 0x008000 /* restrict AF_INET6 socket for v6 */ @@ -286,6 +413,7 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ #define IN6P_BINDV6ONLY 0x10000000 /* do not grab IPv4 traffic */ +#ifdef KERNEL_PRIVATE #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ INP_RECVIF|\ IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ @@ -313,7 +441,7 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define INPCB_NO_OWNER 0x0 #define INPCB_OWNED_BY_X 0x80 #define INPCB_MAX_IDS 7 -#endif +#endif /* __APPLE__ */ #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) #define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ @@ -330,55 +458,63 @@ extern int ipport_lastauto; extern int ipport_hifirstauto; extern int ipport_hilastauto; -void in_pcbpurgeif0 __P((struct inpcb *, struct ifnet *)); -void in_losing __P((struct inpcb *)); -void in_rtchange __P((struct inpcb *, int)); -int in_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *)); -int in_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *)); -int in_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *)); -void in_pcbdetach __P((struct inpcb *)); -void in_pcbdisconnect __P((struct inpcb *)); -int in_pcbinshash __P((struct inpcb *)); -int in_pcbladdr __P((struct inpcb *, struct sockaddr *, - struct sockaddr_in **)); +#define INPCB_STATE_INUSE 0x1 /* freshly allocated PCB, it's in use */ +#define INPCB_STATE_CACHED 0x2 /* this pcb is sitting in a a cache */ +#define INPCB_STATE_DEAD 0x3 /* should treat as gone, will be garbage collected and freed */ + +#define WNT_STOPUSING 0xffff /* marked as ready to be garbaged collected, should be treated as not found */ +#define WNT_ACQUIRE 0x1 /* that pcb is being acquired, do not recycle this time */ +#define WNT_RELEASE 0x2 /* release acquired mode, can be garbage collected when wantcnt is null */ + + +void in_pcbpurgeif0(struct inpcb *, struct ifnet *); +void in_losing(struct inpcb *); +void in_rtchange(struct inpcb *, int); +int in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); +int in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); +int in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); +void in_pcbdetach(struct inpcb *); +void in_pcbdispose (struct inpcb *); +void in_pcbdisconnect(struct inpcb *); +int in_pcbinshash(struct inpcb *, int); +int in_pcbladdr(struct inpcb *, struct sockaddr *, struct sockaddr_in **); struct inpcb * - in_pcblookup_local __P((struct inpcbinfo *, - struct in_addr, u_int, int)); + in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_int, int); struct inpcb * - in_pcblookup_hash __P((struct inpcbinfo *, + in_pcblookup_hash(struct inpcbinfo *, struct in_addr, u_int, struct in_addr, u_int, - int, struct ifnet *)); -void in_pcbnotifyall __P((struct inpcbhead *, struct in_addr, - int, void (*)(struct inpcb *, int))); -void in_pcbrehash __P((struct inpcb *)); -int in_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); -int in_setsockaddr __P((struct socket *so, struct sockaddr **nam)); + int, struct ifnet *); +void in_pcbnotifyall(struct inpcbinfo *, struct in_addr, + int, void (*)(struct inpcb *, int)); +void in_pcbrehash(struct inpcb *); +int in_setpeeraddr(struct socket *so, struct sockaddr **nam); +int in_setsockaddr(struct socket *so, struct sockaddr **nam); +int in_pcb_checkstate(struct inpcb *pcb, int mode, int locked); -#ifdef __APPLE__ -int -in_pcb_grab_port __P((struct inpcbinfo *pcbinfo, +int +in_pcb_grab_port (struct inpcbinfo *pcbinfo, u_short options, struct in_addr laddr, u_short *lport, struct in_addr faddr, u_short fport, u_int cookie, - u_char owner_id)); + u_char owner_id); int -in_pcb_letgo_port __P((struct inpcbinfo *pcbinfo, +in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, struct in_addr faddr, - u_short fport, u_char owner_id)); + u_short fport, u_char owner_id); u_char -in_pcb_get_owner __P((struct inpcbinfo *pcbinfo, +in_pcb_get_owner(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, struct in_addr faddr, u_short fport, - u_int *cookie)); + u_int *cookie); void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily, int pfamily, int protocol); @@ -388,13 +524,12 @@ in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id); int in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id); -#endif /* __APPLE__ */ -void in_pcbremlists __P((struct inpcb *inp)); -#ifndef __APPLE__ -int prison_xinpcb __P((struct proc *p, struct inpcb *inp)); -#endif -#endif /* _KERNEL */ -#endif /* __APPLE_API_PRIVATE */ +void in_pcbremlists(struct inpcb *inp); +int in_pcb_ckeckstate(struct inpcb *, int, int); +void inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat); + +#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/bsd/netinet/in_proto.c b/bsd/netinet/in_proto.c index ad4409a07..d910aa342 100644 --- a/bsd/netinet/in_proto.c +++ b/bsd/netinet/in_proto.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -78,6 +79,7 @@ #include #include #include +#include /* @@ -102,98 +104,127 @@ static struct pr_usrreqs nousrreqs; extern struct pr_usrreqs icmp_dgram_usrreqs; extern int icmp_dgram_ctloutput(struct socket *, struct sockopt *); + struct protosw inetsw[] = { { 0, &inetdomain, 0, 0, 0, 0, 0, 0, 0, ip_init, 0, ip_slowtimo, ip_drain, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, -{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, +{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK, udp_input, 0, udp_ctlinput, ip_ctloutput, 0, - udp_init, 0, 0, 0, - 0, &udp_usrreqs + udp_init, 0, udp_slowtimo, 0, + 0, + &udp_usrreqs, + udp_lock, udp_unlock, udp_getlock, { 0, 0 }, 0, { 0 } }, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, - PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, +{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, + PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD|PR_PCBLOCK|PR_PROTOLOCK|PR_DISPOSE, tcp_input, 0, tcp_ctlinput, tcp_ctloutput, 0, tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, - 0, &tcp_usrreqs + 0, + &tcp_usrreqs, + tcp_lock, tcp_unlock, tcp_getlock, { 0, 0 }, 0, { 0 } }, { SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, rip_input, 0, rip_ctlinput, rip_ctloutput, 0, 0, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, { SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, icmp_input, 0, 0, rip_ctloutput, 0, 0, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, -{ SOCK_DGRAM, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - icmp_input, 0, 0, icmp_dgram_ctloutput, +{ SOCK_DGRAM, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, + icmp_input, 0, 0, icmp_dgram_ctloutput, 0, - 0, 0, 0, 0, - 0, &icmp_dgram_usrreqs + 0, 0, 0, 0, + 0, + &icmp_dgram_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, { SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, igmp_input, 0, 0, rip_ctloutput, 0, igmp_init, igmp_fasttimo, igmp_slowtimo, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, { SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, rsvp_input, 0, 0, rip_ctloutput, 0, 0, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, #if IPSEC -{ SOCK_RAW, &inetdomain, IPPROTO_AH, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inetdomain, IPPROTO_AH, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, ah4_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #if IPSEC_ESP -{ SOCK_RAW, &inetdomain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inetdomain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, esp4_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #endif -{ SOCK_RAW, &inetdomain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inetdomain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, ipcomp4_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #endif /* IPSEC */ { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR|PR_LASTHDR, encap4_input, 0, 0, rip_ctloutput, 0, encap_init, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, # if INET6 { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, encap4_input, 0, 0, rip_ctloutput, 0, encap_init, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #endif #if IPDIVERT -{ SOCK_RAW, &inetdomain, IPPROTO_DIVERT, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inetdomain, IPPROTO_DIVERT, PR_ATOMIC|PR_ADDR|PR_PCBLOCK, div_input, 0, 0, ip_ctloutput, 0, div_init, 0, 0, 0, - 0, &div_usrreqs, + 0, + &div_usrreqs, + div_lock, div_unlock, div_getlock, { 0, 0 }, 0, { 0 } }, #endif #if IPXIP @@ -201,7 +232,9 @@ struct protosw inetsw[] = { ipxip_input, 0, ipxip_ctlinput, 0, 0, 0, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #endif #if NSIP @@ -209,29 +242,44 @@ struct protosw inetsw[] = { idpip_input, 0, nsip_ctlinput, 0, 0, 0, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, 0, 0, { 0, 0 }, 0, { 0 } }, #endif /* raw wildcard */ -{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR|PR_LASTHDR, rip_input, 0, 0, rip_ctloutput, 0, rip_init, 0, 0, 0, - 0, &rip_usrreqs + 0, + &rip_usrreqs, + 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } }, }; -extern int in_inithead __P((void **, int)); +extern int in_inithead(void **, int); int in_proto_count = (sizeof (inetsw) / sizeof (struct protosw)); extern void in_dinit(void); /* A routing init function, and a header size */ struct domain inetdomain = - { AF_INET, "internet", in_dinit, 0, 0, - inetsw, 0, - in_inithead, 32, sizeof(struct sockaddr_in), - sizeof(struct tcpiphdr), 0 + { AF_INET, + "internet", + in_dinit, + 0, + 0, + inetsw, + 0, + in_inithead, + 32, + sizeof(struct sockaddr_in), + sizeof(struct tcpiphdr), + 0, + 0, + 0, + { 0, 0} }; DOMAIN_SET(inet); diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c index 62d6eb597..3a7afc3cd 100644 --- a/bsd/netinet/in_rmx.c +++ b/bsd/netinet/in_rmx.c @@ -70,19 +70,21 @@ #include #include #include +#include #include #include #include #include -extern int in_inithead __P((void **head, int off)); +extern int in_inithead(void **head, int off); #ifdef __APPLE__ static void in_rtqtimo(void *rock); #endif #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ +extern lck_mtx_t *rt_mtx; /* * Do what we need to do when inserting a route. @@ -145,21 +147,21 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * Find out if it is because of an * ARP entry and delete it if so. */ - rt2 = rtalloc1((struct sockaddr *)sin, 0, + rt2 = rtalloc1_locked((struct sockaddr *)sin, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK) { - rtrequest(RTM_DELETE, + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); } - rtfree(rt2); + rtfree_locked(rt2); } } return ret; @@ -232,6 +234,7 @@ static void in_clsroute(struct radix_node *rn, struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + struct timeval timenow; if(!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ @@ -249,10 +252,11 @@ in_clsroute(struct radix_node *rn, struct radix_node_head *head) * waiting for a timeout cycle to kill it. */ if(rtq_reallyold != 0) { + getmicrotime(&timenow); rt->rt_flags |= RTPRF_OURS; - rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; + rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } else { - rtrequest(RTM_DELETE, + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); @@ -279,15 +283,18 @@ in_rtqkill(struct radix_node *rn, void *rock) struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; + struct timeval timenow; + getmicrotime(&timenow); + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); if(rt->rt_flags & RTPRF_OURS) { ap->found++; - if(ap->draining || rt->rt_rmx.rmx_expire <= time_second) { + if(ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) { if(rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest(RTM_DELETE, + err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); @@ -298,9 +305,9 @@ in_rtqkill(struct radix_node *rn, void *rock) } } else { if(ap->updating - && (rt->rt_rmx.rmx_expire - time_second + && (rt->rt_rmx.rmx_expire - timenow.tv_sec > rtq_reallyold)) { - rt->rt_rmx.rmx_expire = time_second + rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } ap->nextstop = lmin(ap->nextstop, @@ -314,11 +321,7 @@ in_rtqkill(struct radix_node *rn, void *rock) static void in_rtqtimo_funnel(void *rock) { - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); in_rtqtimo(rock); - (void) thread_funnel_set(network_flock, FALSE); } #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ @@ -331,15 +334,15 @@ in_rtqtimo(void *rock) struct rtqk_arg arg; struct timeval atv; static time_t last_adjusted_timeout = 0; - int s; + struct timeval timenow; + getmicrotime(&timenow); arg.found = arg.killed = 0; arg.rnh = rnh; - arg.nextstop = time_second + rtq_timeout; + arg.nextstop = timenow.tv_sec + rtq_timeout; arg.draining = arg.updating = 0; - s = splnet(); + lck_mtx_lock(rt_mtx); rnh->rnh_walktree(rnh, in_rtqkill, &arg); - splx(s); /* * Attempt to be somewhat dynamic about this: @@ -350,27 +353,26 @@ in_rtqtimo(void *rock) * hard. */ if((arg.found - arg.killed > rtq_toomany) - && (time_second - last_adjusted_timeout >= rtq_timeout) + && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout) && rtq_reallyold > rtq_minreallyold) { rtq_reallyold = 2*rtq_reallyold / 3; if(rtq_reallyold < rtq_minreallyold) { rtq_reallyold = rtq_minreallyold; } - last_adjusted_timeout = time_second; + last_adjusted_timeout = timenow.tv_sec; #if DIAGNOSTIC log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", rtq_reallyold); #endif arg.found = arg.killed = 0; arg.updating = 1; - s = splnet(); rnh->rnh_walktree(rnh, in_rtqkill, &arg); - splx(s); } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_second; + atv.tv_sec = arg.nextstop - timenow.tv_sec; + lck_mtx_unlock(rt_mtx); timeout(in_rtqtimo_funnel, rock, tvtohz(&atv)); } @@ -379,15 +381,14 @@ in_rtqdrain(void) { struct radix_node_head *rnh = rt_tables[AF_INET]; struct rtqk_arg arg; - int s; arg.found = arg.killed = 0; arg.rnh = rnh; arg.nextstop = 0; arg.draining = 1; arg.updating = 0; - s = splnet(); + lck_mtx_lock(rt_mtx); rnh->rnh_walktree(rnh, in_rtqkill, &arg); - splx(s); + lck_mtx_unlock(rt_mtx); } /* @@ -451,7 +452,7 @@ in_ifadownkill(struct radix_node *rn, void *xap) * so that behavior is not needed there. */ rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); - err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), + err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_ifadownkill: error %d\n", err); @@ -466,6 +467,8 @@ in_ifadown(struct ifaddr *ifa, int delete) struct in_ifadown_arg arg; struct radix_node_head *rnh; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (ifa->ifa_addr->sa_family != AF_INET) return 1; diff --git a/bsd/netinet/in_systm.h b/bsd/netinet/in_systm.h index 347a460e3..ecfcc405d 100644 --- a/bsd/netinet/in_systm.h +++ b/bsd/netinet/in_systm.h @@ -58,6 +58,7 @@ #ifndef _NETINET_IN_SYSTM_H_ #define _NETINET_IN_SYSTM_H_ #include +#include /* * Miscellaneous internetwork @@ -72,15 +73,13 @@ * the bytes before transmission at each protocol level. The n_ types * represent the types with the bytes in ``high-ender'' order. */ -typedef u_int16_t n_short; /* short as received from the net */ -typedef u_int32_t n_long; /* long as received from the net */ +typedef __uint16_t n_short; /* short as received from the net */ +typedef __uint32_t n_long; /* long as received from the net */ -typedef u_int32_t n_time; /* ms since 00:00 GMT, byte rev */ +typedef __uint32_t n_time; /* ms since 00:00 GMT, byte rev */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -n_time iptime __P((void)); -#endif /* __APPLE_API_PRIVATE */ -#endif +#ifdef KERNEL_PRIVATE +n_time iptime(void); +#endif KERNEL_PRIVATE #endif diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h index e33e1ea2e..1ec7420be 100644 --- a/bsd/netinet/in_var.h +++ b/bsd/netinet/in_var.h @@ -64,7 +64,7 @@ #include #endif -#ifdef __APPLE_API_UNSTABLE +#ifdef PRIVATE /* * Interface address, Internet version. One of these structures @@ -88,6 +88,7 @@ struct in_ifaddr { #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ }; +#endif /* PRIVATE */ struct in_aliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ @@ -100,7 +101,6 @@ struct in_aliasreq { #endif }; -#ifdef __APPLE__ /* * Event data, internet style. */ @@ -136,8 +136,8 @@ struct kev_in_collision { #define KEV_INET_SIFBRDADDR 5 #define KEV_INET_SIFNETMASK 6 #define KEV_INET_ARPCOLLISION 7 /* use kev_in_collision */ -#endif /* __APPLE__ */ +#ifdef KERNEL_PRIVATE /* * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. @@ -148,14 +148,11 @@ struct kev_in_collision { #define IN_LNAOF(in, ifa) \ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask)) -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead; extern struct ifqueue ipintrq; /* ip packet input queue */ extern struct in_addr zeroin_addr; extern u_char inetctlerrmap[]; +extern lck_mtx_t *rt_mtx; /* * Macro for finding the interface (ifnet structure) corresponding to one @@ -167,10 +164,13 @@ extern u_char inetctlerrmap[]; { \ struct in_ifaddr *ia; \ \ + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \ + lck_mtx_lock(rt_mtx); \ TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) \ if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \ break; \ (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \ + lck_mtx_unlock(rt_mtx); \ } /* @@ -181,15 +181,15 @@ extern u_char inetctlerrmap[]; /* struct ifnet *ifp; */ \ /* struct in_ifaddr *ia; */ \ { \ + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \ + lck_mtx_lock(rt_mtx); \ for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \ (ia) != NULL && (ia)->ia_ifp != (ifp); \ (ia) = TAILQ_NEXT((ia), ia_link)) \ continue; \ + lck_mtx_unlock(rt_mtx); \ } -#endif /* __APPLE_API_PRIVATE */ -#endif -#ifdef __APPLE_API_UNSTABLE /* * This information should be part of the ifnet structure but we don't wish * to change that - as it might break a number of things @@ -219,10 +219,6 @@ struct in_multi { u_int inm_state; /* state of the membership */ struct router_info *inm_rti; /* router info*/ }; -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_ip); @@ -283,20 +279,19 @@ do { \ } while(0) struct route; -struct in_multi *in_addmulti __P((struct in_addr *, struct ifnet *)); -void in_delmulti __P((struct in_multi *)); -int in_control __P((struct socket *, u_long, caddr_t, struct ifnet *, - struct proc *)); -void in_rtqdrain __P((void)); -void ip_input __P((struct mbuf *)); -int in_ifadown __P((struct ifaddr *ifa, int)); -void in_ifscrub __P((struct ifnet *, struct in_ifaddr *)); -int ipflow_fastforward __P((struct mbuf *)); -void ipflow_create __P((const struct route *, struct mbuf *)); -void ipflow_slowtimo __P((void)); +struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); +void in_delmulti(struct in_multi **); +int in_control(struct socket *, u_long, caddr_t, struct ifnet *, + struct proc *); +void in_rtqdrain(void); +void ip_input(struct mbuf *); +int in_ifadown(struct ifaddr *ifa, int); +void in_ifscrub(struct ifnet *, struct in_ifaddr *, int); +int ipflow_fastforward(struct mbuf *); +void ipflow_create(const struct route *, struct mbuf *); +void ipflow_slowtimo(void); -#endif /* __APPLE_API_PRIVATE */ -#endif /* _KERNEL */ +#endif /* KERNEL_PRIVATE */ /* INET6 stuff */ #include diff --git a/bsd/netinet/ip.h b/bsd/netinet/ip.h index 8f19e94a6..7b75ffb2c 100644 --- a/bsd/netinet/ip.h +++ b/bsd/netinet/ip.h @@ -58,6 +58,10 @@ #ifndef _NETINET_IP_H_ #define _NETINET_IP_H_ #include +#include > /* XXX temporary hack to get u_ types */ +#include +#include + /* * Definitions for internet protocol version 4. diff --git a/bsd/netinet/ip6.h b/bsd/netinet/ip6.h index 6a3b65785..c677f2b6a 100644 --- a/bsd/netinet/ip6.h +++ b/bsd/netinet/ip6.h @@ -237,8 +237,7 @@ struct ip6_frag { #define IPV6_MMTU 1280 /* minimal MTU and reassembly. 1024 + 256 */ #define IPV6_MAXPACKET 65535 /* ip6 max packet size without Jumbo payload*/ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * IP6_EXTHDR_CHECK ensures that region between the IP6 header and the * target header (including IPv6 itself, extension headers and @@ -248,25 +247,25 @@ struct ip6_frag { * supposed to never be matched but is prepared just in case. */ -#define IP6_EXTHDR_CHECK(m, off, hlen, ret) \ +#define IP6_EXTHDR_CHECK(m, off, hlen, action) \ do { \ if ((m)->m_next != NULL) { \ if (((m)->m_flags & M_LOOP) && \ ((m)->m_len < (off) + (hlen)) && \ (((m) = m_pullup((m), (off) + (hlen))) == NULL)) { \ ip6stat.ip6s_exthdrtoolong++; \ - return ret; \ + action; \ } else if ((m)->m_flags & M_EXT) { \ if ((m)->m_len < (off) + (hlen)) { \ ip6stat.ip6s_exthdrtoolong++; \ m_freem(m); \ - return ret; \ + action; \ } \ } else { \ if ((m)->m_len < (off) + (hlen)) { \ ip6stat.ip6s_exthdrtoolong++; \ m_freem(m); \ - return ret; \ + action; \ } \ } \ } else { \ @@ -274,7 +273,7 @@ do { \ ip6stat.ip6s_tooshort++; \ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); \ m_freem(m); \ - return ret; \ + action; \ } \ } \ } while (0) @@ -325,7 +324,6 @@ do { \ } \ } \ } while (0) -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ -#endif /* not _NETINET_IP6_H_ */ +#endif KERNEL_PRIVATE +#endif !_NETINET_IP6_H_ diff --git a/bsd/netinet/ip_compat.h b/bsd/netinet/ip_compat.h index a39802e7f..8fc0d1ea5 100644 --- a/bsd/netinet/ip_compat.h +++ b/bsd/netinet/ip_compat.h @@ -35,13 +35,8 @@ #ifndef __IP_COMPAT_H__ #define __IP_COMPAT_H__ -#ifndef __P -# ifdef __STDC__ -# define __P(x) x -# else -# define __P(x) () -# define const -# endif +#ifndef __STDC__ +# define const #endif #ifndef SOLARIS @@ -245,7 +240,7 @@ typedef struct qif { */ int qf_hl; /* header length */ } qif_t; -extern ill_t *get_unit __P((char *)); +extern ill_t *get_unit(char *); # define GETUNIT(n) get_unit((n)) # else /* SOLARIS */ # if defined(__sgi) @@ -289,8 +284,8 @@ typedef struct { # define KFREE(x) kmem_free((char *)(x), sizeof(*(x))) # define KFREES(x,s) kmem_free((char *)(x), (s)) # if !SOLARIS -extern void m_copydata __P((struct mbuf *, int, int, caddr_t)); -extern void m_copyback __P((struct mbuf *, int, int, caddr_t)); +extern void m_copydata(struct mbuf *, int, int, caddr_t); +extern void m_copyback(struct mbuf *, int, int, caddr_t); # endif # ifdef __sgi # include diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index f33097694..e92d7e6a3 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -51,10 +51,9 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.42.2.4 2001/07/29 19:32:40 ume Exp $ + * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.98 2004/08/17 22:05:54 andre Exp $ */ - #ifndef INET #error "IPDIVERT requires INET." #endif @@ -64,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +80,10 @@ #include #include #include +#include +#include + +#include /* * Divert sockets @@ -92,20 +96,28 @@ #define DIVRCVQ (65536 + 100) /* - * A 16 bit cookie is passed to and from the user process. - * The user process can send it back to help the caller know - * something about where the packet originally came from. + * Divert sockets work in conjunction with ipfw, see the divert(4) + * manpage for features. + * Internally, packets selected by ipfw in ip_input() or ip_output(), + * and never diverted before, are passed to the input queue of the + * divert socket with a given 'divert_port' number (as specified in + * the matching ipfw rule), and they are tagged with a 16 bit cookie + * (representing the rule number of the matching ipfw rule), which + * is passed to process reading from the socket. * - * In the case of ipfw, then the cookie is the rule that sent - * us here. On reinjection is is the rule after which processing - * should continue. Leaving it the same will make processing start - * at the rule number after that which sent it here. Setting it to - * 0 will restart processing at the beginning. + * Packets written to the divert socket are again tagged with a cookie + * (usually the same as above) and a destination address. + * If the destination address is INADDR_ANY then the packet is + * treated as outgoing and sent to ip_output(), otherwise it is + * treated as incoming and sent to ip_input(). + * In both cases, the packet is tagged with the cookie. * - * For divert_packet(), ip_divert_cookie is an input value only. - * For div_output(), ip_divert_cookie is an output value only. + * On reinjection, processing in ip_input() and ip_output() + * will be exactly the same as for the original packet, except that + * ipfw processing will start at the rule number after the one + * written in the cookie (so, tagging a packet with a cookie of 0 + * will cause it to be effectively considered as a standard packet). */ -u_int16_t ip_divert_cookie; /* Internal variables */ static struct inpcbhead divcb; @@ -115,18 +127,20 @@ static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ /* Optimization: have this preinitialized */ -static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET }; +static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, }; /* Internal functions */ static int div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, struct mbuf *control); +extern int load_ipfw(void); /* * Initialize divert connection block queue. */ void div_init(void) { + struct inpcbinfo *pcbinfo; LIST_INIT(&divcb); divcbinfo.listhead = &divcb; /* @@ -138,13 +152,26 @@ div_init(void) divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); divcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)), 4096, "divzone"); + pcbinfo = &divcbinfo; + /* + * allocate lock group attribute and group for udp pcb mutexes + */ + pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); -/* - * ### LD 08/03: init IP forwarding at this point [ipfw is not a module yet] - */ -#if !IPFIREWALL_KEXT - ip_fw_init(); -#endif + pcbinfo->mtx_grp = lck_grp_alloc_init("divcb", pcbinfo->mtx_grp_attr); + + /* + * allocate the lock attribute for divert pcb mutexes + */ + pcbinfo->mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(pcbinfo->mtx_attr); + + if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) + return; /* pretty much dead if this fails... */ + + if (!IPFW_LOADED) { + load_ipfw(); + } } /* @@ -152,7 +179,7 @@ div_init(void) * with that protocol number to enter the system from the outside. */ void -div_input(struct mbuf *m, int off) +div_input(struct mbuf *m, __unused int off) { ipstat.ips_noproto++; m_freem(m); @@ -163,9 +190,10 @@ div_input(struct mbuf *m, int off) * * Setup generic address and protocol structures for div_input routine, * then pass them along with mbuf chain. + * ###LOCK called in ip_mutex from ip_output/ip_input */ void -divert_packet(struct mbuf *m, int incoming, int port) +divert_packet(struct mbuf *m, int incoming, int port, int rule) { struct ip *ip; struct inpcb *inp; @@ -175,9 +203,7 @@ divert_packet(struct mbuf *m, int incoming, int port) /* Sanity check */ KASSERT(port != 0, ("%s: port=0", __FUNCTION__)); - /* Record and reset divert cookie */ - divsrc.sin_port = ip_divert_cookie; - ip_divert_cookie = 0; + divsrc.sin_port = rule; /* record matching rule */ /* Assure header */ if (m->m_len < sizeof(struct ip) && @@ -198,6 +224,7 @@ divert_packet(struct mbuf *m, int incoming, int port) KASSERT((m->m_flags & M_PKTHDR), ("%s: !PKTHDR", __FUNCTION__)); /* Find IP address for receive interface */ + ifnet_lock_shared(m->m_pkthdr.rcvif); TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL) continue; @@ -207,6 +234,7 @@ divert_packet(struct mbuf *m, int incoming, int port) ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; break; } + ifnet_lock_done(m->m_pkthdr.rcvif); } /* * Record the incoming interface name whenever we have one. @@ -239,21 +267,25 @@ divert_packet(struct mbuf *m, int incoming, int port) /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)port); + lck_rw_lock_shared(divcbinfo.mtx); LIST_FOREACH(inp, &divcb, inp_list) { if (inp->inp_lport == nport) sa = inp->inp_socket; } if (sa) { + int error = 0; + + socket_lock(sa, 1); if (sbappendaddr(&sa->so_rcv, (struct sockaddr *)&divsrc, - m, (struct mbuf *)0) == 0) - m_freem(m); - else + m, (struct mbuf *)0, &error) != 0) sorwakeup(sa); + socket_unlock(sa, 1); } else { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } + lck_rw_done(divcbinfo.mtx); } /* @@ -262,6 +294,7 @@ divert_packet(struct mbuf *m, int incoming, int port) * If no address specified, or address is 0.0.0.0, send to ip_output(); * otherwise, send to ip_input() and mark as having been received on * the interface with that address. + * ###LOCK called in inet_proto mutex when from div_send. */ static int div_output(so, m, addr, control) @@ -280,10 +313,21 @@ div_output(so, m, addr, control) /* Loopback avoidance and state recovery */ if (sin) { + struct m_tag *mtag; + struct divert_tag *dt; int len = 0; char *c = sin->sin_zero; - ip_divert_cookie = sin->sin_port; + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, + sizeof(struct divert_tag), M_NOWAIT); + if (mtag == NULL) { + error = ENOBUFS; + goto cantsend; + } + dt = (struct divert_tag *)(mtag+1); + dt->info = 0; + dt->cookie = sin->sin_port; + m_tag_prepend(m, mtag); /* * Find receive interface with the given name or IP address. @@ -297,8 +341,6 @@ div_output(so, m, addr, control) while (*c++ && (len++ < sizeof(sin->sin_zero))); if ((len > 0) && (len < sizeof(sin->sin_zero))) m->m_pkthdr.rcvif = ifunit(sin->sin_zero); - } else { - ip_divert_cookie = 0; } /* Reinject packet into the system as incoming or outgoing */ @@ -319,10 +361,13 @@ div_output(so, m, addr, control) /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ - error = ip_output(m, inp->inp_options, &inp->inp_route, + socket_unlock(so, 0); + error = ip_output(m, + inp->inp_options, &inp->inp_route, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions); + socket_lock(so, 0); } else { struct ifaddr *ifa; @@ -341,20 +386,40 @@ div_output(so, m, addr, control) goto cantsend; } m->m_pkthdr.rcvif = ifa->ifa_ifp; + ifafree(ifa); + } + + if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & + m->m_pkthdr.csum_flags) == 0) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + m->m_pkthdr.csum_data = 0xffff; + } + else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + int hlen; + +#ifdef _IP_VHL + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + ip->ip_sum = in_cksum(m, hlen); } /* Send packet to input processing */ - ip_input(m); + proto_inject(PF_INET, m); } - /* paranoid: Reset for next time (and other packets) */ - /* almost definitly already done in the ipfw filter but.. */ - ip_divert_cookie = 0; return error; cantsend: m_freem(m); - ip_divert_cookie = 0; return error; } @@ -362,20 +427,19 @@ static int div_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; - int error, s; + int error; + inp = sotoinpcb(so); if (inp) panic("div_attach"); - if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) + if (p && (error = proc_suser(p)) != 0) return error; error = soreserve(so, div_sendspace, div_recvspace); if (error) return error; - s = splnet(); error = in_pcballoc(so, &divcbinfo, p); - splx(s); if (error) return error; inp = (struct inpcb *)so->so_pcb; @@ -385,6 +449,11 @@ div_attach(struct socket *so, int proto, struct proc *p) /* The socket is always "connected" because we always know "where" to send the packet */ so->so_state |= SS_ISCONNECTED; + +#ifdef MORE_DICVLOCK_DEBUG + printf("div_attach: so=%x sopcb=%x lock=%x ref=%x\n", + so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount); +#endif return 0; } @@ -393,10 +462,15 @@ div_detach(struct socket *so) { struct inpcb *inp; +#ifdef MORE_DICVLOCK_DEBUG + printf("div_detach: so=%x sopcb=%x lock=%x ref=%x\n", + so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount); +#endif inp = sotoinpcb(so); if (inp == 0) - panic("div_detach"); + panic("div_detach: so=%x null inp\n", so); in_pcbdetach(inp); + inp->inp_state = INPCB_STATE_DEAD; return 0; } @@ -419,10 +493,8 @@ static int div_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; - int s; int error; - s = splnet(); inp = sotoinpcb(so); /* in_pcbbind assumes that the socket is a sockaddr_in * and in_pcbbind requires a valid address. Since divert @@ -437,7 +509,6 @@ div_bind(struct socket *so, struct sockaddr *nam, struct proc *p) ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; error = in_pcbbind(inp, nam, p); } - splx(s); return error; } @@ -449,8 +520,8 @@ div_shutdown(struct socket *so) } static int -div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, - struct mbuf *control, struct proc *p) +div_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam, + struct mbuf *control, __unused struct proc *p) { /* Packet must have a header (but that's about it) */ if (m->m_len < sizeof (struct ip) && @@ -467,7 +538,7 @@ div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, static int div_pcblist SYSCTL_HANDLER_ARGS { - int error, i, n, s; + int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; @@ -476,57 +547,61 @@ div_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + lck_rw_lock_exclusive(divcbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { n = divcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); + lck_rw_done(divcbinfo.mtx); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(divcbinfo.mtx); return EPERM; + } /* * OK, now we're committed to doing something. */ - s = splnet(); gencnt = divcbinfo.ipi_gencnt; n = divcbinfo.ipi_count; - splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); - if (error) + if (error) { + lck_rw_done(divcbinfo.mtx); return error; + } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == 0) + if (inp_list == 0) { + lck_rw_done(divcbinfo.mtx); return ENOMEM; + } - s = splnet(); for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { #ifdef __APPLE__ - if (inp->inp_gencnt <= gencnt) + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) #else if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) #endif inp_list[i++] = inp; } - splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); + inpcb_to_compat(inp, &xi.xi_inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); @@ -540,30 +615,119 @@ div_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - s = splnet(); xig.xig_gen = divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = divcbinfo.ipi_count; - splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); + lck_rw_done(divcbinfo.mtx); return error; } -#ifndef __APPLE__ -#warning Fix SYSCTL net_inet_divert +__private_extern__ int +div_lock(struct socket *so, int refcount, int lr) + { + int lr_saved; +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; #endif -#if 0 -SYSCTL_DECL(_net_inet_divert); -SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0, - div_pcblist, "S,xinpcb", "List of active divert sockets"); + +#ifdef MORE_DICVLOCK_DEBUG + printf("div_lock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", + so, + so->so_pcb, + so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0, + so->so_usecount, + lr_saved); #endif + if (so->so_pcb) { + lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } else { + panic("div_lock: so=%x NO PCB! lr=%x\n", so, lr_saved); + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + } + + if (so->so_usecount < 0) + panic("div_lock: so=%x so_pcb=%x lr=%x ref=%x\n", + so, so->so_pcb, lr_saved, so->so_usecount); + + if (refcount) + so->so_usecount++; + so->reserved3 = (void *)lr_saved; + + return (0); +} + +__private_extern__ int +div_unlock(struct socket *so, int refcount, int lr) +{ + int lr_saved; + lck_mtx_t * mutex_held; + struct inpcb *inp = sotoinpcb(so); +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; +#endif + +#ifdef MORE_DICVLOCK_DEBUG + printf("div_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", + so, + so->so_pcb, + so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0, + so->so_usecount, + lr_saved); +#endif + if (refcount) + so->so_usecount--; + + if (so->so_usecount < 0) + panic("div_unlock: so=%x usecount=%x\n", so, so->so_usecount); + if (so->so_pcb == NULL) { + panic("div_unlock: so=%x NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); + mutex_held = so->so_proto->pr_domain->dom_mtx; + } else { + mutex_held = ((struct inpcb *)so->so_pcb)->inpcb_mtx; + } + + if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { + lck_rw_lock_exclusive(divcbinfo.mtx); + in_pcbdispose(inp); + lck_rw_done(divcbinfo.mtx); + return (0); + } + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(mutex_held); + so->reserved4 = (void *)lr_saved; + return (0); +} + +__private_extern__ lck_mtx_t * +div_getlock(struct socket *so, __unused int locktype) +{ + struct inpcb *inpcb = (struct inpcb *)so->so_pcb; + + if (so->so_pcb) { + if (so->so_usecount < 0) + panic("div_getlock: so=%x usecount=%x\n", so, so->so_usecount); + return(inpcb->inpcb_mtx); + } else { + panic("div_getlock: so=%x NULL so_pcb\n", so); + return (so->so_proto->pr_domain->dom_mtx); + } +} + struct pr_usrreqs div_usrreqs = { div_abort, pru_accept_notsupp, div_attach, div_bind, pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach, div_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; + diff --git a/bsd/netinet/ip_divert.h b/bsd/netinet/ip_divert.h new file mode 100644 index 000000000..b2f1a2939 --- /dev/null +++ b/bsd/netinet/ip_divert.h @@ -0,0 +1,92 @@ +/*- + * Copyright (c) 2003 Sam Leffler, Errno Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD: src/sys/netinet/ip_divert.h,v 1.3 2004/02/25 19:55:28 mlaier Exp $ + */ + +#ifndef _NETINET_IP_DIVERT_H_ +#define _NETINET_IP_DIVERT_H_ + +#if IPDIVERT +/* + * Divert socket definitions. + */ + +/* 32-bit unique unsigned value used to identify a module */ + +struct divert_tag { + u_int32_t info; /* port & flags */ + u_int16_t cookie; /* ipfw rule number */ +}; + +/* + * Return the divert cookie associated with the mbuf; if any. + */ +static __inline u_int16_t +divert_cookie(struct m_tag *mtag) +{ + return ((struct divert_tag *)(mtag+1))->cookie; +} +static __inline u_int16_t +divert_find_cookie(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DIVERT, NULL); + return mtag ? divert_cookie(mtag) : 0; +} + +/* + * Return the divert info associated with the mbuf; if any. + */ +static __inline u_int32_t +divert_info(struct m_tag *mtag) +{ + return ((struct divert_tag *)(mtag+1))->info; +} +static __inline u_int32_t +divert_find_info(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DIVERT, NULL); + return mtag ? divert_info(mtag) : 0; +} + +extern void div_init(void); +extern void div_input(struct mbuf *, int); +lck_mtx_t * + div_getlock(struct socket *, int ); +int div_unlock(struct socket *, int, int); +int div_lock(struct socket *, int , int ); +extern void divert_packet(struct mbuf *m, int incoming, int port, int rule); +extern struct pr_usrreqs div_usrreqs; + +#endif /* IPDIVERT */ +#endif /* _NETINET_IP_DIVERT_H_ */ diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index 0979e45d3..013ccbf43 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -19,7 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ - * Copyright (c) 1998-2001 Luigi Rizzo, Universita` di Pisa +/* + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa * Portions Copyright (c) 2000 Akamba Corp. * All rights reserved * @@ -44,11 +45,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.11 2001/02/09 23:18:08 luigi Exp $ + * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.84 2004/08/25 09:31:30 pjd Exp $ */ -#define DEB(x) -#define DDB(x) x +#define DUMMYNET_DEBUG /* * This module implements IP dummynet, a bandwidth limiter/delay emulator @@ -60,10 +60,7 @@ * + scheduler and dummynet functions; * + configuration and initialization. * - * NOTA BENE: critical sections are protected by splimp()/splx() - * pairs. One would think that splnet() is enough as for most of - * the netinet code, but it is not so because when used with - * bridging, dummynet is invoked at splimp(). + * NOTA BENE: critical sections are protected by the "dummynet lock". * * Most important Changes: * @@ -88,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -140,7 +138,7 @@ static void ready_event(struct dn_flow_queue *q); static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */ -#if SYSCTL_NODE +#ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, @@ -168,30 +166,35 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); #endif +#ifdef DUMMYNET_DEBUG +int dummynet_debug = 0; +#ifdef SYSCTL_NODE +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, + 0, "control debugging printfs"); +#endif +#define DPRINTF(X) if (dummynet_debug) printf X +#else +#define DPRINTF(X) +#endif + +/* dummynet lock */ +lck_grp_t *dn_mutex_grp; +lck_grp_attr_t *dn_mutex_grp_attr; +lck_attr_t *dn_mutex_attr; +lck_mtx_t *dn_mutex; + static int config_pipe(struct dn_pipe *p); static int ip_dn_ctl(struct sockopt *sopt); -static void rt_unref(struct rtentry *); static void dummynet(void *); static void dummynet_flush(void); void dummynet_drain(void); -int if_tx_rdy(struct ifnet *ifp); +static ip_dn_io_t dummynet_io; +static void dn_rule_delete(void *); -/* - * ip_fw_chain is used when deleting a pipe, because ipfw rules can - * hold references to the pipe. - */ -extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain_head; +int if_tx_rdy(struct ifnet *ifp); -static void -rt_unref(struct rtentry *rt) -{ - if (rt == NULL) - return ; - if (rt->rt_refcnt <= 0) - printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); - rtfree(rt); -} +extern lck_mtx_t *rt_mtx; /* route global lock */ /* * Heap management functions. @@ -214,23 +217,23 @@ rt_unref(struct rtentry *rt) static int heap_init(struct dn_heap *h, int new_size) -{ +{ struct dn_heap_entry *p; if (h->size >= new_size ) { - printf("heap_init, Bogus call, have %d want %d\n", + printf("dummynet: heap_init, Bogus call, have %d want %d\n", h->size, new_size); return 0 ; - } + } new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; - p = _MALLOC(new_size * sizeof(*p), M_IPFW, M_DONTWAIT ); + p = _MALLOC(new_size * sizeof(*p), M_DUMMYNET, M_DONTWAIT ); if (p == NULL) { - printf(" heap_init, resize %d failed\n", new_size ); + printf("dummynet: heap_init, resize %d failed\n", new_size ); return 1 ; /* error */ } if (h->size > 0) { bcopy(h->p, p, h->size * sizeof(*p) ); - FREE(h->p, M_IPFW); + FREE(h->p, M_DUMMYNET); } h->p = p ; h->size = new_size ; @@ -258,7 +261,7 @@ heap_init(struct dn_heap *h, int new_size) *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; static int heap_insert(struct dn_heap *h, dn_key key1, void *p) -{ +{ int son = h->elements ; if (p == NULL) /* data already there, set starting point */ @@ -277,7 +280,7 @@ heap_insert(struct dn_heap *h, dn_key key1, void *p) struct dn_heap_entry tmp ; if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) - break ; /* found right position */ + break ; /* found right position */ /* son smaller than father, swap and repeat */ HEAP_SWAP(h->p[son], h->p[father], tmp) ; SET_OFFSET(h, son); @@ -292,22 +295,22 @@ heap_insert(struct dn_heap *h, dn_key key1, void *p) */ static void heap_extract(struct dn_heap *h, void *obj) -{ +{ int child, father, max = h->elements - 1 ; if (max < 0) { - printf("warning, extract from empty heap 0x%p\n", h); + printf("dummynet: warning, extract from empty heap 0x%p\n", h); return ; } father = 0 ; /* default: move up smallest child */ if (obj != NULL) { /* extract specific element, index is at offset */ if (h->offset <= 0) - panic("*** heap_extract from middle not supported on this heap!!!\n"); + panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); father = *((int *)((char *)obj + h->offset)) ; if (father < 0 || father >= h->elements) { printf("dummynet: heap_extract, father %d out of bound 0..%d\n", father, h->elements); - panic("heap_extract"); + panic("dummynet: heap_extract"); } } RESET_OFFSET(h, father); @@ -319,7 +322,7 @@ heap_extract(struct dn_heap *h, void *obj) SET_OFFSET(h, father); father = child ; child = HEAP_LEFT(child) ; /* left child for next loop */ - } + } h->elements-- ; if (father != max) { /* @@ -328,7 +331,7 @@ heap_extract(struct dn_heap *h, void *obj) h->p[father] = h->p[max] ; heap_insert(h, father, NULL); /* this one cannot fail */ } -} +} #if 0 /* @@ -391,7 +394,7 @@ static void heap_free(struct dn_heap *h) { if (h->size >0 ) - FREE(h->p, M_IPFW); + FREE(h->p, M_DUMMYNET); bzero(h, sizeof(*h) ); } @@ -399,6 +402,23 @@ heap_free(struct dn_heap *h) * --- end of heap management functions --- */ +/* + * Return the mbuf tag holding the dummynet state. As an optimization + * this is assumed to be the first tag on the list. If this turns out + * wrong we'll need to search the list. + */ +static struct dn_pkt_tag * +dn_tag_get(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_first(m); +/* KASSERT(mtag != NULL && + mtag->m_tag_id == KERNEL_MODULE_TAG_ID && + mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET, + ("packet on dummynet queue w/o dummynet tag!")); +*/ + return (struct dn_pkt_tag *)(mtag+1); +} + /* * Scheduler functions: * @@ -420,76 +440,78 @@ heap_free(struct dn_heap *h) static void transmit_event(struct dn_pipe *pipe) { - struct dn_pkt *pkt ; - - while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { - /* - * first unlink, then call procedures, since ip_input() can invoke - * ip_output() and viceversa, thus causing nested calls - */ - pipe->head = DN_NEXT(pkt) ; - - /* - * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf - * (NOT A REAL one, just a small block of malloc'ed memory) with - * m_type = MT_DUMMYNET - * m_next = actual mbuf to be processed by ip_input/output - * m_data = the matching rule - * and some other fields. - * The block IS FREED HERE because it contains parameters passed - * to the called routine. - */ - switch (pkt->dn_dir) { - case DN_TO_IP_OUT: - (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL); - rt_unref (pkt->ro.ro_rt) ; - break ; - - case DN_TO_IP_IN : - ip_input((struct mbuf *)pkt) ; - break ; - + struct mbuf *m ; + struct dn_pkt_tag *pkt ; + struct ip *ip; + + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + + while ( (m = pipe->head) ) { + pkt = dn_tag_get(m); + if ( !DN_KEY_LEQ(pkt->output_time, curr_time) ) + break; + /* + * first unlink, then call procedures, since ip_input() can invoke + * ip_output() and viceversa, thus causing nested calls + */ + pipe->head = m->m_nextpkt ; + m->m_nextpkt = NULL; + + /* XXX: drop the lock for now to avoid LOR's */ + lck_mtx_unlock(dn_mutex); + switch (pkt->dn_dir) { + case DN_TO_IP_OUT: { + struct route tmp_rt = pkt->ro; + (void)ip_output(m, NULL, NULL, pkt->flags, NULL); + if (tmp_rt.ro_rt) { + rtfree(tmp_rt.ro_rt); + } + break ; + } + case DN_TO_IP_IN : + ip = mtod(m, struct ip *); + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + proto_inject(PF_INET, m); + break ; + #if BRIDGE - case DN_TO_BDG_FWD : { - struct mbuf *m = (struct mbuf *)pkt ; - struct ether_header *eh; - - if (pkt->dn_m->m_len < ETHER_HDR_LEN - && (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/bridge: pullup fail, dropping pkt\n"); - break; - } - /* - * same as ether_input, make eh be a pointer into the mbuf - */ - eh = mtod(pkt->dn_m, struct ether_header *); - m_adj(pkt->dn_m, ETHER_HDR_LEN); - /* - * bdg_forward() wants a pointer to the pseudo-mbuf-header, but - * on return it will supply the pointer to the actual packet - * (originally pkt->dn_m, but could be something else now) if - * it has not consumed it. - */ - m = bdg_forward(m, eh, pkt->ifp); - if (m) - m_freem(m); - } - break ; -#endif - - default: - printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(pkt->dn_m); - break ; - } - FREE(pkt, M_IPFW); + case DN_TO_BDG_FWD : + /* + * The bridge requires/assumes the Ethernet header is + * contiguous in the first mbuf header. Insure this is true. + */ + if (BDG_LOADED) { + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/bridge: pullup fail, dropping pkt\n"); + break; + } + m = bdg_forward_ptr(m, pkt->ifp); + } else { + /* somebody unloaded the bridge module. Drop pkt */ + /* XXX rate limit */ + printf("dummynet: dropping bridged packet trapped in pipe\n"); + } + if (m) + m_freem(m); + break; +#endif + default: + printf("dummynet: bad switch %d!\n", pkt->dn_dir); + m_freem(m); + break ; + } + lck_mtx_lock(dn_mutex); } /* if there are leftover packets, put into the heap for next event */ - if ( (pkt = pipe->head) ) - heap_insert(&extract_heap, pkt->output_time, pipe ) ; - /* XXX should check errors on heap_insert, by draining the - * whole pipe p and hoping in the future we are more successful - */ + if ( (m = pipe->head) ) { + pkt = dn_tag_get(m); + /* XXX should check errors on heap_insert, by draining the + * whole pipe p and hoping in the future we are more successful + */ + heap_insert(&extract_heap, pkt->output_time, pipe); + } } /* @@ -497,8 +519,8 @@ transmit_event(struct dn_pipe *pipe) * before being able to transmit a packet. The credit is taken from * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ -#define SET_TICKS(pkt, q, p) \ - (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ +#define SET_TICKS(_m, q, p) \ + ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ p->bandwidth ; /* @@ -506,21 +528,23 @@ transmit_event(struct dn_pipe *pipe) * and put into delay line (p_queue) */ static void -move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, +move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p, int len) { - q->head = DN_NEXT(pkt) ; + struct dn_pkt_tag *dt = dn_tag_get(pkt); + + q->head = pkt->m_nextpkt ; q->len-- ; q->len_bytes -= len ; - pkt->output_time = curr_time + p->delay ; + dt->output_time = curr_time + p->delay ; if (p->head == NULL) p->head = pkt; else - DN_NEXT(p->tail) = pkt; + p->tail->m_nextpkt = pkt; p->tail = pkt; - DN_NEXT(p->tail) = NULL; + p->tail->m_nextpkt = NULL; } /* @@ -533,12 +557,14 @@ move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, static void ready_event(struct dn_flow_queue *q) { - struct dn_pkt *pkt; + struct mbuf *pkt; struct dn_pipe *p = q->fs->pipe ; int p_was_empty ; + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + if (p == NULL) { - printf("ready_event- pipe is gone\n"); + printf("dummynet: ready_event- pipe is gone\n"); return ; } p_was_empty = (p->head == NULL) ; @@ -553,7 +579,7 @@ ready_event(struct dn_flow_queue *q) */ q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; while ( (pkt = q->head) != NULL ) { - int len = pkt->dn_m->m_pkthdr.len; + int len = pkt->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; if (len_scaled > q->numbytes ) break ; @@ -574,8 +600,10 @@ ready_event(struct dn_flow_queue *q) /* XXX should check errors on heap_insert, and drain the whole * queue on error hoping next time we are luckier. */ - } else /* RED needs to know when the queue becomes empty */ + } else { /* RED needs to know when the queue becomes empty */ q->q_time = curr_time; + q->numbytes = 0; + } /* * If the delay line was empty call transmit_event(p) now. * Otherwise, the scheduler will take care of it. @@ -599,14 +627,16 @@ ready_event_wfq(struct dn_pipe *p) struct dn_heap *sch = &(p->scheduler_heap); struct dn_heap *neh = &(p->not_eligible_heap) ; + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + if (p->if_name[0] == 0) /* tx clock is simulated */ p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; else { /* tx clock is for real, the ifq must be empty or this is a NOP */ if (p->ifp && p->ifp->if_snd.ifq_head != NULL) return ; else { - DEB(printf("pipe %d ready from %s --\n", - p->pipe_nr, p->if_name);) + DPRINTF(("dummynet: pipe %d ready from %s --\n", + p->pipe_nr, p->if_name)); } } @@ -617,9 +647,9 @@ ready_event_wfq(struct dn_pipe *p) while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { if (sch->elements > 0) { /* have some eligible pkts to send out */ struct dn_flow_queue *q = sch->p[0].object ; - struct dn_pkt *pkt = q->head; - struct dn_flow_set *fs = q->fs; - u_int64_t len = pkt->dn_m->m_pkthdr.len; + struct mbuf *pkt = q->head; + struct dn_flow_set *fs = q->fs; + u_int64_t len = pkt->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; heap_extract(sch, NULL); /* remove queue from heap */ @@ -636,7 +666,7 @@ ready_event_wfq(struct dn_pipe *p) * update F and position in backlogged queue, then * put flow in not_eligible_heap (we will fix this later). */ - len = (q->head)->dn_m->m_pkthdr.len; + len = (q->head)->m_pkthdr.len; q->F += (len<weight ; if (DN_KEY_LEQ(q->S, p->V)) heap_insert(neh, q->S, q); @@ -691,7 +721,7 @@ ready_event_wfq(struct dn_pipe *p) if (p->bandwidth > 0) t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; - p->tail->output_time += t ; + dn_tag_get(p->tail)->output_time += t ; p->sched_time = curr_time ; heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); /* XXX should check errors on heap_insert, and drain the whole @@ -715,7 +745,6 @@ dummynet(void * __unused unused) { void *p ; /* generic parameter to handler */ struct dn_heap *h ; - int s ; struct dn_heap *heaps[3]; int i; struct dn_pipe *pe ; @@ -723,14 +752,16 @@ dummynet(void * __unused unused) heaps[0] = &ready_heap ; /* fixed-rate queues */ heaps[1] = &wfq_ready_heap ; /* wfq queues */ heaps[2] = &extract_heap ; /* delay line */ - s = splimp(); /* see note on top, splnet() is not enough */ + + lck_mtx_lock(dn_mutex); + curr_time++ ; for (i=0; i < 3 ; i++) { h = heaps[i]; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { - DDB(if (h->p[0].key > curr_time) - printf("-- dummynet: warning, heap %d is %d ticks late\n", - i, (int)(curr_time - h->p[0].key));) + if (h->p[0].key > curr_time) + printf("dummynet: warning, heap %d is %d ticks late\n", + i, (int)(curr_time - h->p[0].key)); p = h->p[0].object ; /* store a copy before heap_extract */ heap_extract(h, NULL); /* need to extract before processing */ if (i == 0) @@ -738,7 +769,7 @@ dummynet(void * __unused unused) else if (i == 1) { struct dn_pipe *pipe = p; if (pipe->if_name[0] != '\0') - printf("*** bad ready_event_wfq for pipe %s\n", + printf("dummynet: bad ready_event_wfq for pipe %s\n", pipe->if_name); else ready_event_wfq(p) ; @@ -756,7 +787,9 @@ dummynet(void * __unused unused) q->S = q->F + 1 ; /* mark timestamp as invalid */ pe->sum -= q->fs->weight ; } - splx(s); + + lck_mtx_unlock(dn_mutex); + timeout(dummynet, NULL, 1); } @@ -768,6 +801,7 @@ if_tx_rdy(struct ifnet *ifp) { struct dn_pipe *p; + lck_mtx_lock(dn_mutex); for (p = all_pipes; p ; p = p->next ) if (p->ifp == ifp) break ; @@ -777,16 +811,18 @@ if_tx_rdy(struct ifnet *ifp) for (p = all_pipes; p ; p = p->next ) if (!strcmp(p->if_name, buf) ) { p->ifp = ifp ; - DEB(printf("++ tx rdy from %s (now found)\n", buf);) + DPRINTF(("dummynet: ++ tx rdy from %s (now found)\n", buf)); break ; } } if (p != NULL) { - DEB(printf("++ tx rdy from %s%d - qlen %d\n", ifp->if_name, - ifp->if_unit, ifp->if_snd.ifq_len);) + DPRINTF(("dummynet: ++ tx rdy from %s%d - qlen %d\n", ifp->if_name, + ifp->if_unit, ifp->if_snd.ifq_len)); p->numbytes = 0 ; /* mark ready for I/O */ ready_event_wfq(p); } + lck_mtx_lock(dn_mutex); + return 0; } @@ -799,10 +835,13 @@ expire_queues(struct dn_flow_set *fs) { struct dn_flow_queue *q, *prev ; int i, initial_elements = fs->rq_elements ; + struct timeval timenow; - if (fs->last_expired == time_second) + getmicrotime(&timenow); + + if (fs->last_expired == timenow.tv_sec) return 0 ; - fs->last_expired = time_second ; + fs->last_expired = timenow.tv_sec ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) if (q->head != NULL || q->S != q->F+1) { @@ -816,7 +855,7 @@ expire_queues(struct dn_flow_set *fs) else fs->rq[i] = q = q->next ; fs->rq_elements-- ; - FREE(old_q, M_IPFW); + FREE(old_q, M_DUMMYNET); } return initial_elements - fs->rq_elements ; } @@ -839,12 +878,11 @@ create_queue(struct dn_flow_set *fs, int i) if ( fs->rq[i] != NULL ) return fs->rq[i] ; } - q = _MALLOC(sizeof(*q), M_IPFW, M_DONTWAIT) ; + q = _MALLOC(sizeof(*q), M_DUMMYNET, M_DONTWAIT | M_ZERO); if (q == NULL) { - printf("sorry, cannot allocate queue for new flow\n"); + printf("dummynet: sorry, cannot allocate queue for new flow\n"); return NULL ; } - bzero(q, sizeof(*q) ); /* needed */ q->fs = fs ; q->hash_slot = i ; q->next = fs->rq[i] ; @@ -860,7 +898,7 @@ create_queue(struct dn_flow_set *fs, int i) * so that further searches take less time. */ static struct dn_flow_queue * -find_queue(struct dn_flow_set *fs) +find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) { int i = 0 ; /* we need i and q for new allocations */ struct dn_flow_queue *q, *prev; @@ -869,25 +907,30 @@ find_queue(struct dn_flow_set *fs) q = fs->rq[0] ; else { /* first, do the masking */ - last_pkt.dst_ip &= fs->flow_mask.dst_ip ; - last_pkt.src_ip &= fs->flow_mask.src_ip ; - last_pkt.dst_port &= fs->flow_mask.dst_port ; - last_pkt.src_port &= fs->flow_mask.src_port ; - last_pkt.proto &= fs->flow_mask.proto ; - last_pkt.flags = 0 ; /* we don't care about this one */ + id->dst_ip &= fs->flow_mask.dst_ip ; + id->src_ip &= fs->flow_mask.src_ip ; + id->dst_port &= fs->flow_mask.dst_port ; + id->src_port &= fs->flow_mask.src_port ; + id->proto &= fs->flow_mask.proto ; + id->flags = 0 ; /* we don't care about this one */ /* then, hash function */ - i = ( (last_pkt.dst_ip) & 0xffff ) ^ - ( (last_pkt.dst_ip >> 15) & 0xffff ) ^ - ( (last_pkt.src_ip << 1) & 0xffff ) ^ - ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^ - (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^ - (last_pkt.proto ); + i = ( (id->dst_ip) & 0xffff ) ^ + ( (id->dst_ip >> 15) & 0xffff ) ^ + ( (id->src_ip << 1) & 0xffff ) ^ + ( (id->src_ip >> 16 ) & 0xffff ) ^ + (id->dst_port << 1) ^ (id->src_port) ^ + (id->proto ); i = i % fs->rq_size ; /* finally, scan the current list for a match */ searches++ ; for (prev=NULL, q = fs->rq[i] ; q ; ) { search_steps++; - if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0) + if (id->dst_ip == q->id.dst_ip && + id->src_ip == q->id.src_ip && + id->dst_port == q->id.dst_port && + id->src_port == q->id.src_port && + id->proto == q->id.proto && + id->flags == q->id.flags) break ; /* found */ else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { /* entry is idle and not in any heap, expire it */ @@ -898,7 +941,7 @@ find_queue(struct dn_flow_set *fs) else fs->rq[i] = q = q->next ; fs->rq_elements-- ; - FREE(old_q, M_IPFW); + FREE(old_q, M_DUMMYNET); continue ; } prev = q ; @@ -913,7 +956,7 @@ find_queue(struct dn_flow_set *fs) if (q == NULL) { /* no match, need to allocate a new entry */ q = create_queue(fs, i); if (q != NULL) - q->id = last_pkt ; + q->id = *id ; } return q ; } @@ -923,27 +966,27 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) { /* * RED algorithm - * + * * RED calculates the average queue size (avg) using a low-pass filter * with an exponential weighted (w_q) moving average: * avg <- (1-w_q) * avg + w_q * q_size * where q_size is the queue length (measured in bytes or * packets). - * + * * If q_size == 0, we compute the idle time for the link, and set * avg = (1 - w_q)^(idle/s) * where s is the time needed for transmitting a medium-sized packet. - * + * * Now, if avg < min_th the packet is enqueued. * If avg > max_th the packet is dropped. Otherwise, the packet is * dropped with probability P function of avg. - * + * */ int64_t p_b = 0; /* queue in bytes or packets ? */ u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; - DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);) + DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time, q_size)); /* average queue size estimation */ if (q_size != 0) { @@ -969,7 +1012,7 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; } } - DEB(printf("avg: %u ", SCALE_VAL(q->avg));) + DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg))); /* should i drop ? */ @@ -988,7 +1031,7 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; } else { q->count = -1; - printf("- drop"); + DPRINTF(("dummynet: - drop")); return 1 ; } } else if (q->avg > fs->min_th) { @@ -1010,7 +1053,7 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) */ if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { q->count = 0; - DEB(printf("- red drop");) + DPRINTF(("dummynet: - red drop")); /* after a drop we calculate a new random value */ q->random = random() & 0xffff; return 1; /* drop */ @@ -1022,51 +1065,83 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) static __inline struct dn_flow_set * -locate_flowset(int pipe_nr, struct ip_fw_chain *rule) +locate_flowset(int pipe_nr, struct ip_fw *rule) { - struct dn_flow_set *fs = NULL ; + struct dn_flow_set *fs; + ipfw_insn *cmd = rule->cmd + rule->act_ofs; - if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE ) - for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) - ; + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + + bcopy(& ((ipfw_insn_pipe *)cmd)->pipe_ptr, &fs, sizeof(fs)); + + if (fs != NULL) + return fs; + + if (cmd->opcode == O_QUEUE) { + for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) + ; + } else { - struct dn_pipe *p1; - for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) - ; - if (p1 != NULL) - fs = &(p1->fs) ; + struct dn_pipe *p1; + for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) + ; + if (p1 != NULL) + fs = &(p1->fs) ; } - if (fs != NULL) - rule->rule->pipe_ptr = fs ; /* record for the future */ + /* record for the future */ + bcopy(&fs, & ((ipfw_insn_pipe *)cmd)->pipe_ptr, sizeof(fs)); + return fs ; } /* * dummynet hook for packets. Below 'pipe' is a pipe or a queue * depending on whether WF2Q or fixed bw is used. + * + * pipe_nr pipe or queue the packet is destined for. + * dir where shall we send the packet after dummynet. + * m the mbuf with the packet + * ifp the 'ifp' parameter from the caller. + * NULL in ip_input, destination interface in ip_output, + * real_dst in bdg_forward + * ro route parameter (only used in ip_output, NULL otherwise) + * dst destination address, only used by ip_output + * rule matching rule, in case of multiple passes + * flags flags from the caller, only used in ip_output + * */ -int -dummynet_io(int pipe_nr, int dir, /* pipe_nr can also be a fs_nr */ - struct mbuf *m, struct ifnet *ifp, struct route *ro, - struct sockaddr_in *dst, - struct ip_fw_chain *rule, int flags) +static int +dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) { - struct dn_pkt *pkt; + struct dn_pkt_tag *pkt; + struct m_tag *mtag; struct dn_flow_set *fs; struct dn_pipe *pipe ; u_int64_t len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; - int s ; - - s = splimp(); + int is_pipe; + +#if IPFW2 + ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; + + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + is_pipe = (cmd->opcode == O_PIPE); +#else + is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; +#endif pipe_nr &= 0xffff ; - if ( (fs = rule->rule->pipe_ptr) == NULL ) { - fs = locate_flowset(pipe_nr, rule); - if (fs == NULL) - goto dropit ; /* this queue/pipe does not exist! */ - } + lck_mtx_lock(dn_mutex); + + /* + * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. + */ + fs = locate_flowset(pipe_nr, fwa->rule); + if (fs == NULL) + goto dropit ; /* this queue/pipe does not exist! */ pipe = fs->pipe ; if (pipe == NULL) { /* must be a queue, try find a matching pipe */ for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; @@ -1075,12 +1150,12 @@ dummynet_io(int pipe_nr, int dir, /* pipe_nr can also be a fs_nr */ if (pipe != NULL) fs->pipe = pipe ; else { - printf("No pipe %d for queue %d, drop pkt\n", + printf("dummynet: no pipe %d for queue %d, drop pkt\n", fs->parent_nr, fs->fs_nr); goto dropit ; } } - q = find_queue(fs); + q = find_queue(fs, &(fwa->f_id)); if ( q == NULL ) goto dropit ; /* cannot allocate queue */ /* @@ -1100,56 +1175,59 @@ dummynet_io(int pipe_nr, int dir, /* pipe_nr can also be a fs_nr */ if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) goto dropit ; - pkt = (struct dn_pkt *)_MALLOC(sizeof (*pkt), M_IPFW, M_NOWAIT) ; - if ( pkt == NULL ) - goto dropit ; /* cannot allocate packet header */ + /* XXX expensive to zero, see if we can remove it*/ + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, + sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO); + if ( mtag == NULL ) + goto dropit ; /* cannot allocate packet header */ + m_tag_prepend(m, mtag); /* attach to mbuf chain */ + + pkt = (struct dn_pkt_tag *)(mtag+1); /* ok, i can handle the pkt now... */ - bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/ /* build and enqueue packet + parameters */ - pkt->hdr.mh_type = MT_DUMMYNET ; - (struct ip_fw_chain *)pkt->hdr.mh_data = rule ; - DN_NEXT(pkt) = NULL; - pkt->dn_m = m; + pkt->rule = fwa->rule ; pkt->dn_dir = dir ; - pkt->ifp = ifp; + pkt->ifp = fwa->oif; if (dir == DN_TO_IP_OUT) { /* * We need to copy *ro because for ICMP pkts (and maybe others) * the caller passed a pointer into the stack; dst might also be * a pointer into *ro so it needs to be updated. */ - pkt->ro = *ro; - if (ro->ro_rt) - rtref(ro->ro_rt); - if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */ - dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; - - pkt->dn_dst = dst; - pkt->flags = flags ; - } + lck_mtx_lock(rt_mtx); + pkt->ro = *(fwa->ro); + if (fwa->ro->ro_rt) + fwa->ro->ro_rt->rt_refcnt++ ; + if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ + fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; + lck_mtx_unlock(rt_mtx); + + pkt->dn_dst = fwa->dst; + pkt->flags = fwa->flags; + } if (q->head == NULL) - q->head = pkt; + q->head = m; else - DN_NEXT(q->tail) = pkt; - q->tail = pkt; + q->tail->m_nextpkt = m; + q->tail = m; q->len++; q->len_bytes += len ; - if ( q->head != pkt ) /* flow was not idle, we are done */ + if ( q->head != m ) /* flow was not idle, we are done */ goto done; /* * If we reach this point the flow was previously idle, so we need * to schedule it. This involves different actions for fixed-rate or * WF2Q queues. */ - if ( (rule->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE ) { + if (is_pipe) { /* * Fixed-rate queue: just insert into the ready_heap. */ dn_key t = 0 ; - if (pipe->bandwidth) - t = SET_TICKS(pkt, q, pipe); + if (pipe->bandwidth) + t = SET_TICKS(m, q, pipe); q->sched_time = curr_time ; if (t == 0) /* must process it now */ ready_event( q ); @@ -1193,42 +1271,46 @@ dummynet_io(int pipe_nr, int dir, /* pipe_nr can also be a fs_nr */ */ if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ if (pipe->scheduler_heap.elements == 0) - printf("++ ouch! not eligible but empty scheduler!\n"); + printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); heap_insert(&(pipe->not_eligible_heap), q->S, q); } else { heap_insert(&(pipe->scheduler_heap), q->F, q); if (pipe->numbytes >= 0) { /* pipe is idle */ if (pipe->scheduler_heap.elements != 1) - printf("*** OUCH! pipe should have been idle!\n"); - DEB(printf("Waking up pipe %d at %d\n", - pipe->pipe_nr, (int)(q->F >> MY_M)); ) + printf("dummynet: OUCH! pipe should have been idle!\n"); + DPRINTF(("dummynet: waking up pipe %d at %d\n", + pipe->pipe_nr, (int)(q->F >> MY_M))); pipe->sched_time = curr_time ; ready_event_wfq(pipe); } } } done: - splx(s); + lck_mtx_unlock(dn_mutex); return 0; dropit: - splx(s); if (q) q->drops++ ; + lck_mtx_unlock(dn_mutex); m_freem(m); - return ENOBUFS ; + return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } /* - * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) + * Below, the rtfree is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ -#define DN_FREE_PKT(pkt) { \ - struct dn_pkt *n = pkt ; \ - rt_unref ( n->ro.ro_rt ) ; \ - m_freem(n->dn_m); \ - pkt = DN_NEXT(n) ; \ - FREE(n, M_IPFW) ; } +#define DN_FREE_PKT(_m) do { \ + struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \ + if (tag) { \ + struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \ + if (n->ro.ro_rt) \ + rtfree(n->ro.ro_rt); \ + } \ + m_tag_delete(_m, tag); \ + m_freem(_m); \ +} while (0) /* * Dispose all packets and flow_queues on a flow_set. @@ -1239,16 +1321,22 @@ dropit: static void purge_flow_set(struct dn_flow_set *fs, int all) { - struct dn_pkt *pkt ; struct dn_flow_queue *q, *qn ; int i ; + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + for (i = 0 ; i <= fs->rq_size ; i++ ) { for (q = fs->rq[i] ; q ; q = qn ) { - for (pkt = q->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + struct mbuf *m, *mnext; + + mnext = q->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } qn = q->next ; - FREE(q, M_IPFW); + FREE(q, M_DUMMYNET); } fs->rq[i] = NULL ; } @@ -1256,12 +1344,12 @@ purge_flow_set(struct dn_flow_set *fs, int all) if (all) { /* RED - free lookup table */ if (fs->w_q_lookup) - FREE(fs->w_q_lookup, M_IPFW); + FREE(fs->w_q_lookup, M_DUMMYNET); if (fs->rq) - FREE(fs->rq, M_IPFW); + FREE(fs->rq, M_DUMMYNET); /* if this fs is not part of a pipe, free it */ if (fs->pipe && fs != &(fs->pipe->fs) ) - FREE(fs, M_IPFW); + FREE(fs, M_DUMMYNET); } } @@ -1273,12 +1361,15 @@ purge_flow_set(struct dn_flow_set *fs, int all) static void purge_pipe(struct dn_pipe *pipe) { - struct dn_pkt *pkt ; + struct mbuf *m, *mnext; purge_flow_set( &(pipe->fs), 1 ); - for (pkt = pipe->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + mnext = pipe->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); @@ -1293,25 +1384,22 @@ static void dummynet_flush() { struct dn_pipe *curr_p, *p ; - struct ip_fw_chain *chain ; struct dn_flow_set *fs, *curr_fs; - int s ; - s = splimp() ; + lck_mtx_lock(dn_mutex); /* remove all references to pipes ...*/ - LIST_FOREACH(chain, &ip_fw_chain_head, next) - chain->rule->pipe_ptr = NULL ; + flush_pipe_ptrs(NULL); /* prevent future matches... */ p = all_pipes ; - all_pipes = NULL ; + all_pipes = NULL ; fs = all_flow_sets ; all_flow_sets = NULL ; /* and free heaps so we don't have unwanted events */ heap_free(&ready_heap); heap_free(&wfq_ready_heap); heap_free(&extract_heap); - splx(s) ; + /* * Now purge all queued pkts and delete all pipes */ @@ -1325,24 +1413,27 @@ dummynet_flush() purge_pipe(p); curr_p = p ; p = p->next ; - FREE(q, M_IPFW); + FREE(curr_p, M_DUMMYNET); } + lck_mtx_unlock(dn_mutex); } -extern struct ip_fw_chain *ip_fw_default_rule ; +extern struct ip_fw *ip_fw_default_rule ; static void dn_rule_delete_fs(struct dn_flow_set *fs, void *r) { int i ; struct dn_flow_queue *q ; - struct dn_pkt *pkt ; + struct mbuf *m ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ for (q = fs->rq[i] ; q ; q = q->next ) - for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) - if (pkt->hdr.mh_data == r) - pkt->hdr.mh_data = (void *)ip_fw_default_rule ; + for (m = q->head ; m ; m = m->m_nextpkt ) { + struct dn_pkt_tag *pkt = dn_tag_get(m) ; + if (pkt->rule == r) + pkt->rule = ip_fw_default_rule ; + } } /* * when a firewall rule is deleted, scan all queues and remove the flow-id @@ -1352,8 +1443,11 @@ void dn_rule_delete(void *r) { struct dn_pipe *p ; - struct dn_pkt *pkt ; struct dn_flow_set *fs ; + struct dn_pkt_tag *pkt ; + struct mbuf *m ; + + lck_mtx_lock(dn_mutex); /* * If the rule references a queue (dn_flow_set), then scan @@ -1365,17 +1459,20 @@ dn_rule_delete(void *r) for ( p = all_pipes ; p ; p = p->next ) { fs = &(p->fs) ; dn_rule_delete_fs(fs, r); - for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) - if (pkt->hdr.mh_data == r) - pkt->hdr.mh_data = (void *)ip_fw_default_rule ; + for (m = p->head ; m ; m = m->m_nextpkt ) { + pkt = dn_tag_get(m) ; + if (pkt->rule == r) + pkt->rule = ip_fw_default_rule ; + } } + lck_mtx_unlock(dn_mutex); } /* * setup RED parameters */ static int -config_red(struct dn_flow_set *p, struct dn_flow_set * x) +config_red(struct dn_flow_set *p, struct dn_flow_set * x) { int i; @@ -1392,19 +1489,21 @@ config_red(struct dn_flow_set *p, struct dn_flow_set * x) } /* if the lookup table already exist, free and create it again */ - if (x->w_q_lookup) - FREE(x->w_q_lookup, M_IPFW); + if (x->w_q_lookup) { + FREE(x->w_q_lookup, M_DUMMYNET); + x->w_q_lookup = NULL ; + } if (red_lookup_depth == 0) { - printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0"); - FREE(x, M_IPFW); + printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n"); + FREE(x, M_DUMMYNET); return EINVAL; } x->lookup_depth = red_lookup_depth; x->w_q_lookup = (u_int *) _MALLOC(x->lookup_depth * sizeof(int), - M_IPFW, M_DONTWAIT); + M_DUMMYNET, M_DONTWAIT); if (x->w_q_lookup == NULL) { - printf("sorry, cannot allocate red lookup table\n"); - FREE(x, M_IPFW); + printf("dummynet: sorry, cannot allocate red lookup table\n"); + FREE(x, M_DUMMYNET); return ENOSPC; } @@ -1433,18 +1532,17 @@ alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) l = dn_hash_size; if (l < 4) l = 4; - else if (l > 1024) - l = 1024; + else if (l > DN_MAX_HASH_SIZE) + l = DN_MAX_HASH_SIZE; x->rq_size = l; } else /* one is enough for null mask */ x->rq_size = 1; x->rq = _MALLOC((1 + x->rq_size) * sizeof(struct dn_flow_queue *), - M_IPFW, M_DONTWAIT); + M_DUMMYNET, M_DONTWAIT | M_ZERO); if (x->rq == NULL) { - printf("sorry, cannot allocate queue\n"); + printf("dummynet: sorry, cannot allocate queue\n"); return ENOSPC; } - bzero(x->rq, (1+x->rq_size) * sizeof(struct dn_flow_queue *)); x->rq_elements = 0; return 0 ; } @@ -1474,19 +1572,20 @@ set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) * setup pipe or queue parameters. */ -static int +static int config_pipe(struct dn_pipe *p) { - int s ; + int i, r; struct dn_flow_set *pfs = &(p->fs); + struct dn_flow_queue *q; - /* - * The config program passes parameters as follows: + /* + * The config program passes parameters as follows: * bw = bits/second (0 means no limits), * delay = ms, must be translated into ticks. * qsize = slots/bytes - */ - p->delay = ( p->delay * hz ) / 1000 ; + */ + p->delay = ( p->delay * hz ) / 1000 ; /* We need either a pipe number or a flow_set number */ if (p->pipe_nr == 0 && pfs->fs_nr == 0) return EINVAL ; @@ -1494,64 +1593,74 @@ config_pipe(struct dn_pipe *p) return EINVAL ; if (p->pipe_nr != 0) { /* this is a pipe */ struct dn_pipe *x, *a, *b; - /* locate pipe */ + + lck_mtx_lock(dn_mutex); +/* locate pipe */ for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; a = b , b = b->next) ; if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ - x = _MALLOC(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ; + x = _MALLOC(sizeof(struct dn_pipe), M_DUMMYNET, M_DONTWAIT | M_ZERO) ; if (x == NULL) { - printf("ip_dummynet.c: no memory for new pipe\n"); + lck_mtx_unlock(dn_mutex); + printf("dummynet: no memory for new pipe\n"); return ENOSPC; } - bzero(x, sizeof(struct dn_pipe)); x->pipe_nr = p->pipe_nr; x->fs.pipe = x ; /* idle_heap is the only one from which we extract from the middle. */ x->idle_heap.size = x->idle_heap.elements = 0 ; x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos); - } else + } else { x = b; + /* Flush accumulated credit for all queues */ + for (i = 0; i <= x->fs.rq_size; i++) + for (q = x->fs.rq[i]; q; q = q->next) + q->numbytes = 0; + } - x->bandwidth = p->bandwidth ; + x->bandwidth = p->bandwidth ; x->numbytes = 0; /* just in case... */ bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); x->ifp = NULL ; /* reset interface ptr */ - x->delay = p->delay ; + x->delay = p->delay ; set_fs_parms(&(x->fs), pfs); if ( x->fs.rq == NULL ) { /* a new pipe */ - s = alloc_hash(&(x->fs), pfs) ; - if (s) { - FREE(x, M_IPFW); - return s ; + r = alloc_hash(&(x->fs), pfs) ; + if (r) { + lck_mtx_unlock(dn_mutex); + FREE(x, M_DUMMYNET); + return r ; } - s = splimp() ; x->next = b ; if (a == NULL) all_pipes = x ; else a->next = x ; - splx(s); } + lck_mtx_unlock(dn_mutex); } else { /* config queue */ struct dn_flow_set *x, *a, *b ; + lck_mtx_lock(dn_mutex); /* locate flow_set */ for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ; a = b , b = b->next) ; if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ - if (pfs->parent_nr == 0) /* need link to a pipe */ - return EINVAL ; - x = _MALLOC(sizeof(struct dn_flow_set), M_IPFW, M_DONTWAIT); + if (pfs->parent_nr == 0) { /* need link to a pipe */ + lck_mtx_unlock(dn_mutex); + return EINVAL ; + } + x = _MALLOC(sizeof(struct dn_flow_set), M_DUMMYNET, M_DONTWAIT | M_ZERO); if (x == NULL) { - printf("ip_dummynet.c: no memory for new flow_set\n"); - return ENOSPC; + lck_mtx_unlock(dn_mutex); + printf("dummynet: no memory for new flow_set\n"); + return ENOSPC; } - bzero(x, sizeof(struct dn_flow_set)); x->fs_nr = pfs->fs_nr; x->parent_nr = pfs->parent_nr; x->weight = pfs->weight ; @@ -1561,26 +1670,28 @@ config_pipe(struct dn_pipe *p) x->weight = 100 ; } else { /* Change parent pipe not allowed; must delete and recreate */ - if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) - return EINVAL ; + if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) { + lck_mtx_unlock(dn_mutex); + return EINVAL ; + } x = b; } set_fs_parms(x, pfs); if ( x->rq == NULL ) { /* a new flow_set */ - s = alloc_hash(x, pfs) ; - if (s) { - FREE(x, M_IPFW); - return s ; + r = alloc_hash(x, pfs) ; + if (r) { + lck_mtx_unlock(dn_mutex); + FREE(x, M_DUMMYNET); + return r ; } - s = splimp() ; x->next = b; if (a == NULL) all_flow_sets = x; else a->next = x; - splx(s); } + lck_mtx_unlock(dn_mutex); } return 0 ; } @@ -1631,7 +1742,9 @@ dummynet_drain() { struct dn_flow_set *fs; struct dn_pipe *p; - struct dn_pkt *pkt; + struct mbuf *m, *mnext; + + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); heap_free(&ready_heap); heap_free(&wfq_ready_heap); @@ -1642,8 +1755,12 @@ dummynet_drain() for (p = all_pipes; p; p= p->next ) { purge_flow_set(&(p->fs), 0); - for (pkt = p->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + + mnext = p->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } p->head = p->tail = NULL ; } } @@ -1651,12 +1768,9 @@ dummynet_drain() /* * Fully delete a pipe or a queue, cleaning up associated info. */ -static int +static int delete_pipe(struct dn_pipe *p) { - int s ; - struct ip_fw_chain *chain ; - if (p->pipe_nr == 0 && p->fs.fs_nr == 0) return EINVAL ; if (p->pipe_nr != 0 && p->fs.fs_nr != 0) @@ -1665,13 +1779,14 @@ delete_pipe(struct dn_pipe *p) struct dn_pipe *a, *b; struct dn_flow_set *fs; + lck_mtx_lock(dn_mutex); /* locate pipe */ for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; a = b , b = b->next) ; - if (b == NULL || (b->pipe_nr != p->pipe_nr) ) + if (b == NULL || (b->pipe_nr != p->pipe_nr) ) { + lck_mtx_unlock(dn_mutex); return EINVAL ; /* not found */ - - s = splimp() ; + } /* unlink from list of pipes */ if (a == NULL) @@ -1679,14 +1794,12 @@ delete_pipe(struct dn_pipe *p) else a->next = b->next ; /* remove references to this pipe from the ip_fw rules. */ - LIST_FOREACH(chain, &ip_fw_chain_head, next) - if (chain->rule->pipe_ptr == &(b->fs)) - chain->rule->pipe_ptr = NULL ; + flush_pipe_ptrs(&(b->fs)); /* remove all references to this pipe from flow_sets */ for (fs = all_flow_sets; fs; fs= fs->next ) if (fs->pipe == b) { - printf("++ ref to pipe %d from fs %d\n", + printf("dummynet: ++ ref to pipe %d from fs %d\n", p->pipe_nr, fs->fs_nr); fs->pipe = NULL ; purge_flow_set(fs, 0); @@ -1696,26 +1809,27 @@ delete_pipe(struct dn_pipe *p) /* remove reference to here from extract_heap and wfq_ready_heap */ pipe_remove_from_heap(&extract_heap, b); pipe_remove_from_heap(&wfq_ready_heap, b); - splx(s); - FREE(b, M_IPFW); + lck_mtx_unlock(dn_mutex); + + FREE(b, M_DUMMYNET); } else { /* this is a WF2Q queue (dn_flow_set) */ struct dn_flow_set *a, *b; + lck_mtx_lock(dn_mutex); /* locate set */ for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ; a = b , b = b->next) ; - if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) + if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) { + lck_mtx_unlock(dn_mutex); return EINVAL ; /* not found */ + } - s = splimp() ; if (a == NULL) all_flow_sets = b->next ; else a->next = b->next ; /* remove references to this flow_set from the ip_fw rules. */ - LIST_FOREACH(chain, &ip_fw_chain_head, next) - if (chain->rule->pipe_ptr == b) - chain->rule->pipe_ptr = NULL ; + flush_pipe_ptrs(b); if (b->pipe != NULL) { /* Update total weight on parent pipe and cleanup parent heaps */ @@ -1727,7 +1841,7 @@ delete_pipe(struct dn_pipe *p) #endif } purge_flow_set(b, 1); - splx(s); + lck_mtx_unlock(dn_mutex); } return 0 ; } @@ -1741,13 +1855,15 @@ dn_copy_set(struct dn_flow_set *set, char *bp) int i, copied = 0 ; struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + for (i = 0 ; i <= set->rq_size ; i++) for (q = set->rq[i] ; q ; q = q->next, qp++ ) { if (q->hash_slot != i) - printf("++ at %d: wrong slot (have %d, " + printf("dummynet: ++ at %d: wrong slot (have %d, " "should be %d)\n", copied, q->hash_slot, i); if (q->fs != set) - printf("++ at %d: wrong fs ptr (have %p, should be %p)\n", + printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", i, q->fs, set); copied++ ; bcopy(q, qp, sizeof( *q ) ); @@ -1757,21 +1873,20 @@ dn_copy_set(struct dn_flow_set *set, char *bp) qp->fs = NULL ; } if (copied != set->rq_elements) - printf("++ wrong count, have %d should be %d\n", + printf("dummynet: ++ wrong count, have %d should be %d\n", copied, set->rq_elements); return (char *)qp ; } -static int -dummynet_get(struct sockopt *sopt) +static size_t +dn_calc_size(void) { - char *buf, *bp ; /* bp is the "copy-pointer" */ - size_t size ; struct dn_flow_set *set ; struct dn_pipe *p ; - int s, error=0 ; + size_t size ; + + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); - s = splimp(); /* * compute size of data structures: list of pipes and flow_sets. */ @@ -1781,10 +1896,37 @@ dummynet_get(struct sockopt *sopt) for (set = all_flow_sets ; set ; set = set->next ) size += sizeof ( *set ) + set->rq_elements * sizeof(struct dn_flow_queue); - buf = _MALLOC(size, M_TEMP, M_DONTWAIT); - if (buf == 0) { - splx(s); - return ENOBUFS ; + return size ; +} + +static int +dummynet_get(struct sockopt *sopt) +{ + char *buf, *bp ; /* bp is the "copy-pointer" */ + size_t size ; + struct dn_flow_set *set ; + struct dn_pipe *p ; + int error=0, i ; + + /* XXX lock held too long */ + lck_mtx_lock(dn_mutex); + /* + * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we + * cannot use this flag while holding a mutex. + */ + for (i = 0; i < 10; i++) { + size = dn_calc_size(); + lck_mtx_unlock(dn_mutex); + buf = _MALLOC(size, M_TEMP, M_WAITOK); + lck_mtx_lock(dn_mutex); + if (size == dn_calc_size()) + break; + FREE(buf, M_TEMP); + buf = NULL; + } + if (buf == NULL) { + lck_mtx_unlock(dn_mutex); + return ENOBUFS ; } for (p = all_pipes, bp = buf ; p ; p = p->next ) { struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; @@ -1822,7 +1964,8 @@ dummynet_get(struct sockopt *sopt) bp += sizeof( *set ) ; bp = dn_copy_set( set, bp ); } - splx(s); + lck_mtx_unlock(dn_mutex); + error = sooptcopyout(sopt, buf, size); FREE(buf, M_TEMP); return error ; @@ -1843,7 +1986,7 @@ ip_dn_ctl(struct sockopt *sopt) switch (sopt->sopt_name) { default : - printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name); + printf("dummynet: -- unknown option %d", sopt->sopt_name); return EINVAL ; case IP_DUMMYNET_GET : @@ -1853,6 +1996,7 @@ ip_dn_ctl(struct sockopt *sopt) case IP_DUMMYNET_FLUSH : dummynet_flush() ; break ; + case IP_DUMMYNET_CONFIGURE : p = &tmp_pipe ; error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); @@ -1873,10 +2017,20 @@ ip_dn_ctl(struct sockopt *sopt) return error ; } -static void +void ip_dn_init(void) { - printf("DUMMYNET initialized (010124)\n"); + /* setup locks */ + dn_mutex_grp_attr = lck_grp_attr_alloc_init(); + dn_mutex_grp = lck_grp_alloc_init("dn", dn_mutex_grp_attr); + dn_mutex_attr = lck_attr_alloc_init(); + lck_attr_setdefault(dn_mutex_attr); + + if ((dn_mutex = lck_mtx_alloc_init(dn_mutex_grp, dn_mutex_attr)) == NULL) { + printf("ip_dn_init: can't alloc dn_mutex\n"); + return; + } + all_pipes = NULL ; all_flow_sets = NULL ; ready_heap.size = ready_heap.elements = 0 ; @@ -1888,37 +2042,8 @@ ip_dn_init(void) extract_heap.size = extract_heap.elements = 0 ; extract_heap.offset = 0 ; ip_dn_ctl_ptr = ip_dn_ctl; + ip_dn_io_ptr = dummynet_io; + ip_dn_ruledel_ptr = dn_rule_delete; + timeout(dummynet, NULL, 1); } - -static ip_dn_ctl_t *old_dn_ctl_ptr ; - -static int -dummynet_modevent(module_t mod, int type, void *data) -{ - int s ; - switch (type) { - case MOD_LOAD: - s = splimp(); - old_dn_ctl_ptr = ip_dn_ctl_ptr; - ip_dn_init(); - splx(s); - break; - case MOD_UNLOAD: - s = splimp(); - ip_dn_ctl_ptr = old_dn_ctl_ptr; - splx(s); - dummynet_flush(); - break ; - default: - break ; - } - return 0 ; -} - -static moduledata_t dummynet_mod = { - "dummynet", - dummynet_modevent, - NULL -} ; -DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/bsd/netinet/ip_dummynet.h b/bsd/netinet/ip_dummynet.h index 9a13ae239..c334a1f2a 100644 --- a/bsd/netinet/ip_dummynet.h +++ b/bsd/netinet/ip_dummynet.h @@ -20,25 +20,39 @@ * @APPLE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998 Luigi Rizzo + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Portions Copyright (c) 2000 Akamba Corp. + * All rights reserved * - * Redistribution and use in source forms, with and without modification, - * are permitted provided that this entire comment appears intact. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Redistribution in binary form may occur without any restrictions. - * Obviously, it would be nice if you gave credit where credit is due - * but requiring it would be too onerous. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * - * This software is provided ``AS IS'' without any warranties of any kind. - * - * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.10.2.3 2001/02/01 20:25:09 luigi Exp $ + * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.32 2004/08/17 22:05:54 andre Exp $ */ #ifndef _IP_DUMMYNET_H #define _IP_DUMMYNET_H #include -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE /* * Definition of dummynet data structures. In the structures, I decided * not to use the macros in in the hope of making the code @@ -85,6 +99,12 @@ typedef u_int64_t dn_key ; /* sorting key */ */ #define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) ) +/* + * The maximum hash table size for queues. This value must be a power + * of 2. + */ +#define DN_MAX_HASH_SIZE 65536 + /* * A heap entry is made of a key and a pointer to the actual * object stored in the heap. @@ -113,39 +133,28 @@ struct dn_heap { } ; /* - * MT_DUMMYNET is a new (fake) mbuf type that is prepended to the - * packet when it comes out of a pipe. The definition - * ought to go in /sys/sys/mbuf.h but here it is less intrusive. + * Packets processed by dummynet have an mbuf tag associated with + * them that carries their dummynet state. This is used within + * the dummynet code as well as outside when checking for special + * processing requirements. */ - -#define MT_DUMMYNET MT_CONTROL - -/* - * struct dn_pkt identifies a packet in the dummynet queue. The - * first part is really an m_hdr for implementation purposes, and some - * fields are saved there. When passing the packet back to the ip_input/ - * ip_output()/bdg_forward, the struct is prepended to the mbuf chain with type - * MT_DUMMYNET, and contains the pointer to the matching rule. - * - * Note: there is no real need to make this structure contain an m_hdr, - * in the future this should be changed to a normal data structure. - */ -struct dn_pkt { - struct m_hdr hdr ; -#define dn_next hdr.mh_nextpkt /* next element in queue */ -#define DN_NEXT(x) (struct dn_pkt *)(x)->dn_next -#define dn_m hdr.mh_next /* packet to be forwarded */ -#define dn_dir hdr.mh_flags /* action when pkt extracted from a queue */ +#ifdef KERNEL +struct dn_pkt_tag { + struct ip_fw *rule; /* matching rule */ + int dn_dir; /* action when packet comes out. */ #define DN_TO_IP_OUT 1 #define DN_TO_IP_IN 2 #define DN_TO_BDG_FWD 3 - dn_key output_time; /* when the pkt is due for delivery */ - struct ifnet *ifp; /* interface, for ip_output */ - struct sockaddr_in *dn_dst ; - struct route ro; /* route, for ip_output. MUST COPY */ - int flags ; /* flags, for ip_output (IPv6 ?) */ + dn_key output_time; /* when the pkt is due for delivery */ + struct ifnet *ifp; /* interface, for ip_output */ + struct sockaddr_in *dn_dst ; + struct route ro; /* route, for ip_output. MUST COPY */ + int flags ; /* flags, for ip_output (IPv6 ?) */ }; +#else +struct dn_pkt; +#endif /* KERNEL */ /* * Overall structure of dummynet (with WF2Q+): @@ -211,19 +220,24 @@ flow using a number of heaps defined into the pipe itself. * per flow queue. This contains the flow identifier, the queue * of packets, counters, and parameters used to support both RED and * WF2Q+. + * + * A dn_flow_queue is created and initialized whenever a packet for + * a new flow arrives. */ struct dn_flow_queue { struct dn_flow_queue *next ; struct ipfw_flow_id id ; - struct dn_pkt *head, *tail ; /* queue of packets */ + + struct mbuf *head, *tail ; /* queue of packets */ u_int len ; u_int len_bytes ; - long numbytes ; /* credit for transmission (dynamic queues) */ + u_long numbytes ; /* credit for transmission (dynamic queues) */ u_int64_t tot_pkts ; /* statistics counters */ u_int64_t tot_bytes ; u_int32_t drops ; - int hash_slot ; /* debugging/diagnostic */ + + int hash_slot ; /* debugging/diagnostic */ /* RED parameters */ int avg ; /* average queue length est. (scaled) */ @@ -232,12 +246,13 @@ struct dn_flow_queue { u_int32_t q_time ; /* start of queue idle time */ /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - dn_key sched_time ; /* current time when queue enters ready_heap */ + struct dn_flow_set *fs ; /* parent flow set */ + int heap_pos ; /* position (index) of struct in heap */ + dn_key sched_time ; /* current time when queue enters ready_heap */ - dn_key S,F ; /* start-time, finishing time */ - /* setting F < S means the timestamp is invalid. We only need + dn_key S,F ; /* start time, finish time */ + /* + * Setting F < S means the timestamp is invalid. We only need * to test this when the queue is empty. */ } ; @@ -250,6 +265,9 @@ struct dn_flow_queue { * hashing the flow-id, then scan the list looking for a match. * The size of the hash table (buckets) is configurable on a per-queue * basis. + * + * A dn_flow_set is created whenever a new queue or pipe is created (in the + * latter case, the structure is located inside the struct dn_pipe). */ struct dn_flow_set { struct dn_flow_set *next; /* next flow set in all_flow_sets list */ @@ -257,26 +275,28 @@ struct dn_flow_set { u_short fs_nr ; /* flow_set number */ u_short flags_fs; #define DN_HAVE_FLOW_MASK 0x0001 -#define DN_IS_PIPE 0x4000 -#define DN_IS_QUEUE 0x8000 #define DN_IS_RED 0x0002 #define DN_IS_GENTLE_RED 0x0004 -#define DN_QSIZE_IS_BYTES 0x0008 /* queue measured in bytes */ +#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ +#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ +#define DN_IS_PIPE 0x4000 +#define DN_IS_QUEUE 0x8000 - struct dn_pipe *pipe ; /* pointer to parent pipe */ + struct dn_pipe *pipe ; /* pointer to parent pipe */ u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ - int weight ; /* WFQ queue weight */ - int qsize ; /* queue size in slots or bytes */ - int plr ; /* pkt loss rate (2^31-1 means 100%) */ + int weight ; /* WFQ queue weight */ + int qsize ; /* queue size in slots or bytes */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ struct ipfw_flow_id flow_mask ; + /* hash table of queues onto this flow_set */ int rq_size ; /* number of slots */ int rq_elements ; /* active elements */ struct dn_flow_queue **rq; /* array of rq_size entries */ + u_int32_t last_expired ; /* do not expire too frequently */ - /* XXX some RED parameters as well ? */ int backlogged ; /* #active queues for this flowset */ /* RED parameters */ @@ -284,64 +304,61 @@ struct dn_flow_set { #define SCALE(x) ( (x) << SCALE_RED ) #define SCALE_VAL(x) ( (x) >> SCALE_RED ) #define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ + u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ + u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ + u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ + u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ + u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ + u_int lookup_depth ; /* depth of lookup table */ + int lookup_step ; /* granularity inside the lookup table */ + int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ + int avg_pkt_size ; /* medium packet size */ + int max_pkt_size ; /* max packet size */ } ; /* * Pipe descriptor. Contains global parameters, delay-line queue, * and the flow_set used for fixed-rate queues. - * - * For WF2Q support it also has 4 heaps holding dn_flow_queue: + * + * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: * not_eligible_heap, for queues whose start time is higher * than the virtual time. Sorted by start time. * scheduler_heap, for queues eligible for scheduling. Sorted by * finish time. - * backlogged_heap, all flows in the two heaps above, sorted by - * start time. This is used to compute the virtual time. * idle_heap, all flows that are idle and can be removed. We * do that on each tick so we do not slow down too much * operations during forwarding. * */ -struct dn_pipe { /* a pipe */ - struct dn_pipe *next ; +struct dn_pipe { /* a pipe */ + struct dn_pipe *next ; int pipe_nr ; /* number */ - int bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ + int bandwidth; /* really, bytes/tick. */ + int delay ; /* really, ticks */ - struct dn_pkt *head, *tail ; /* packets in delay line */ + struct mbuf *head, *tail ; /* packets in delay line */ /* WF2Q+ */ struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ struct dn_heap not_eligible_heap; /* top extract- key Start time */ struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ - dn_key V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - int numbytes; /* bit i can transmit (more or less). */ + dn_key V ; /* virtual time */ + int sum; /* sum of weights of all active sessions */ + int numbytes; /* bits I can transmit (more or less). */ - dn_key sched_time ; /* first time pipe is scheduled in ready_heap */ + dn_key sched_time ; /* time pipe was scheduled in ready_heap */ - /* the tx clock can come from an interface. In this case, the - * name is below, and the pointer is filled when the rule is - * configured. We identify this by setting the if_name to a - * non-empty string. + /* + * When the tx clock come from an interface (if_name[0] != '\0'), its name + * is stored below, whereas the ifp is filled when the rule is configured. */ - char if_name[16]; + char if_name[IFNAMSIZ]; struct ifnet *ifp ; int ready ; /* set if ifp != NULL and we got a signal from it */ @@ -350,17 +367,33 @@ struct dn_pipe { /* a pipe */ #ifdef KERNEL -MALLOC_DECLARE(M_IPFW); +void ip_dn_init(void); /* called from raw_ip.c:load_ipfw() */ -typedef int ip_dn_ctl_t __P((struct sockopt *)) ; -extern ip_dn_ctl_t *ip_dn_ctl_ptr; +typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ +typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ +typedef int ip_dn_io_t(struct mbuf *m, int pipe_nr, int dir, + struct ip_fw_args *fwa); +extern ip_dn_ctl_t *ip_dn_ctl_ptr; +extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; +extern ip_dn_io_t *ip_dn_io_ptr; +#define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) -void dn_rule_delete(void *r); /* used in ip_fw.c */ -int dummynet_io(int pipe, int dir, - struct mbuf *m, struct ifnet *ifp, struct route *ro, - struct sockaddr_in * dst, - struct ip_fw_chain *rule, int flags); -#endif - -#endif /* __APPLE_API_PRIVATE */ +/* + * Return the IPFW rule associated with the dummynet tag; if any. + * Make sure that the dummynet tag is not reused by lower layers. + */ +static __inline struct ip_fw * +ip_dn_claim_rule(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DUMMYNET, NULL); + if (mtag != NULL) { + mtag->m_tag_type = KERNEL_TAG_TYPE_NONE; + return (((struct dn_pkt_tag *)(mtag+1))->rule); + } else + return (NULL); +} +#endif /* KERNEL */ + +#endif /* PRIVATE */ #endif /* _IP_DUMMYNET_H */ diff --git a/bsd/netinet/ip_ecn.h b/bsd/netinet/ip_ecn.h index 4aa2132c5..6e452f578 100644 --- a/bsd/netinet/ip_ecn.h +++ b/bsd/netinet/ip_ecn.h @@ -54,13 +54,11 @@ */ #include +#ifdef KERNEL_PRIVATE #define ECN_ALLOWED 1 /* ECN allowed */ #define ECN_FORBIDDEN 0 /* ECN forbidden */ #define ECN_NOCARE (-1) /* no consideration to ECN */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern void ip_ecn_ingress __P((int, u_int8_t *, const u_int8_t *)); -extern void ip_ecn_egress __P((int, const u_int8_t *, u_int8_t *)); -#endif /* __APPLE_API_PRIVATE */ -#endif +extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); +extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +#endif KERNEL_PRIVATE diff --git a/bsd/netinet/ip_encap.c b/bsd/netinet/ip_encap.c index e4c9c7c9b..9517bbb05 100644 --- a/bsd/netinet/ip_encap.c +++ b/bsd/netinet/ip_encap.c @@ -113,10 +113,10 @@ MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); #endif -static void encap_add __P((struct encaptab *)); -static int mask_match __P((const struct encaptab *, const struct sockaddr *, - const struct sockaddr *)); -static void encap_fillarg __P((struct mbuf *, const struct encaptab *)); +static void encap_add(struct encaptab *); +static int mask_match(const struct encaptab *, const struct sockaddr *, + const struct sockaddr *); +static void encap_fillarg(struct mbuf *, const struct encaptab *); #ifndef LIST_HEAD_INITIALIZER /* rely upon BSS initialization */ @@ -412,7 +412,7 @@ const struct encaptab * encap_attach_func(af, proto, func, psw, arg) int af; int proto; - int (*func) __P((const struct mbuf *, int, int, void *)); + int (*func)(const struct mbuf *, int, int, void *); const struct protosw *psw; void *arg; { diff --git a/bsd/netinet/ip_encap.h b/bsd/netinet/ip_encap.h index a1c472e22..0a3aba152 100644 --- a/bsd/netinet/ip_encap.h +++ b/bsd/netinet/ip_encap.h @@ -54,8 +54,7 @@ #define _NETINET_IP_ENCAP_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct encaptab { LIST_ENTRY(encaptab) chain; @@ -65,23 +64,22 @@ struct encaptab { struct sockaddr_storage srcmask; struct sockaddr_storage dst; /* remote addr */ struct sockaddr_storage dstmask; - int (*func) __P((const struct mbuf *, int, int, void *)); + int (*func)(const struct mbuf *, int, int, void *); const struct protosw *psw; /* only pr_input will be used */ void *arg; /* passed via m->m_pkthdr.aux */ }; -void encap_init __P((void)); -void encap4_input __P((struct mbuf *, int)); -int encap6_input __P((struct mbuf **, int *)); -const struct encaptab *encap_attach __P((int, int, const struct sockaddr *, +void encap_init(void); +void encap4_input(struct mbuf *, int); +int encap6_input(struct mbuf **, int *); +const struct encaptab *encap_attach(int, int, const struct sockaddr *, const struct sockaddr *, const struct sockaddr *, - const struct sockaddr *, const struct protosw *, void *)); -const struct encaptab *encap_attach_func __P((int, int, - int (*) __P((const struct mbuf *, int, int, void *)), - const struct protosw *, void *)); -int encap_detach __P((const struct encaptab *)); -void *encap_getarg __P((struct mbuf *)); -#endif /* __APPLE_API_PRIVATE */ -#endif + const struct sockaddr *, const struct protosw *, void *); +const struct encaptab *encap_attach_func(int, int, + int (*)(const struct mbuf *, int, int, void *), + const struct protosw *, void *); +int encap_detach(const struct encaptab *); +void *encap_getarg(struct mbuf *); +#endif KERNEL_PRIVATE #endif /*_NETINET_IP_ENCAP_H_*/ diff --git a/bsd/netinet/ip_flow.c b/bsd/netinet/ip_flow.c index 59db2291f..ba3ca9816 100644 --- a/bsd/netinet/ip_flow.c +++ b/bsd/netinet/ip_flow.c @@ -194,7 +194,7 @@ ipflow_fastforward( dst = &ipf->ipf_ro.ro_dst; #ifdef __APPLE__ /* Not sure the rt_dlt is valid here !! XXX */ - if ((error = dlil_output(ifptodlt(rt->rt_ifp, PF_INET), m, (caddr_t) rt, dst, 0)) != 0) { + if ((error = dlil_output(rt->rt_ifp, PF_INET, m, (caddr_t) rt, dst, 0)) != 0) { #else if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { @@ -352,7 +352,7 @@ ipflow_create( * Fill in the updated information. */ ipf->ipf_ro = *ro; - rtref(ro->ro_rt); + rtref(ro->ro_rt); //### LD 5/25/04 needs rt_mtx lock ipf->ipf_dst = ip->ip_dst; ipf->ipf_src = ip->ip_src; ipf->ipf_tos = ip->ip_tos; diff --git a/bsd/netinet/ip_flow.h b/bsd/netinet/ip_flow.h index 23c25d366..0fed616e0 100644 --- a/bsd/netinet/ip_flow.h +++ b/bsd/netinet/ip_flow.h @@ -61,7 +61,7 @@ #define _NETINET_IP_FLOW_H #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct ipflow { LIST_ENTRY(ipflow) ipf_next; /* next ipflow in bucket */ struct in_addr ipf_dst; /* destination address */ @@ -76,6 +76,6 @@ struct ipflow { u_long ipf_errors; /* other errors returned by if_output */ u_long ipf_last_uses; /* number of uses in last period */ }; -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/netinet/ip_fw.h b/bsd/netinet/ip_fw.h index 41eae12e2..3f19ae79f 100644 --- a/bsd/netinet/ip_fw.h +++ b/bsd/netinet/ip_fw.h @@ -38,6 +38,10 @@ #define _IP_FW_H #include +#ifdef IPFW2 +#include +#else /* !IPFW2, good old ipfw */ + #include @@ -287,8 +291,7 @@ struct ipfw_dyn_rule { /* * Main firewall chains definitions and global var's definitions. */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define IP_FW_PORT_DYNT_FLAG 0x10000 #define IP_FW_PORT_TEE_FLAG 0x20000 @@ -297,20 +300,20 @@ struct ipfw_dyn_rule { /* * Function definitions. */ -void ip_fw_init __P((void)); +void ip_fw_init(void); /* Firewall hooks */ struct ip; struct sockopt; -typedef int ip_fw_chk_t __P((struct ip **, int, struct ifnet *, u_int16_t *, - struct mbuf **, struct ip_fw_chain **, struct sockaddr_in **)); -typedef int ip_fw_ctl_t __P((struct sockopt *)); +typedef int ip_fw_chk_t(struct ip **, int, struct ifnet *, u_int16_t *, + struct mbuf **, struct ip_fw_chain **, struct sockaddr_in **); +typedef int ip_fw_ctl_t(struct sockopt *); extern ip_fw_chk_t *ip_fw_chk_ptr; extern ip_fw_ctl_t *ip_fw_ctl_ptr; extern int fw_one_pass; extern int fw_enable; extern struct ipfw_flow_id last_pkt ; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE -#endif /* _IP_FW_H */ +#endif !IPFW2 +#endif _IP_FW_H diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c new file mode 100644 index 000000000..5f45949f1 --- /dev/null +++ b/bsd/netinet/ip_fw2.c @@ -0,0 +1,3324 @@ +/* + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.6.2.18 2003/10/17 11:01:03 scottl Exp $ + */ + +#define DEB(x) +#define DDB(x) x + +/* + * Implement IP packet firewall (new version) + */ + +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ + +#ifdef IPFW2 +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if DUMMYNET +#include +#endif /* DUMMYNET */ + +#include +#include +#include +#include +#include +#include + +#ifdef IPSEC +#include +#endif + +#include /* XXX for ETHERTYPE_IP */ + +#include "ip_fw2_compat.h" + +#include +#include + +/* +#include +*/ /* XXX for in_cksum */ + +/* + * XXX This one should go in sys/mbuf.h. It is used to avoid that + * a firewall-generated packet loops forever through the firewall. + */ +#ifndef M_SKIP_FIREWALL +#define M_SKIP_FIREWALL 0x4000 +#endif + +/* + * set_disable contains one bit per set value (0..31). + * If the bit is set, all rules with the corresponding set + * are disabled. Set RESVD_SET(31) is reserved for the default rule + * and rules that are not deleted by the flush command, + * and CANNOT be disabled. + * Rules in set RESVD_SET can only be deleted explicitly. + */ +static u_int32_t set_disable; + +int fw_verbose; +static int verbose_limit; + +#define IPFW_DEFAULT_RULE 65535 + +#define IPFW_RULE_INACTIVE 1 + +/* + * list of rules for layer 3 + */ +static struct ip_fw *layer3_chain; + +MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); + +static int fw_debug = 1; +static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ + +#ifdef SYSCTL_NODE +SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable, + CTLFLAG_RW, + &fw_enable, 0, "Enable ipfw"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, + &autoinc_step, 0, "Rule number autincrement step"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, + CTLFLAG_RW, + &fw_one_pass, 0, + "Only do a single pass through ipfw when using dummynet(4)"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, debug, + CTLFLAG_RW, + &fw_debug, 0, "Enable printing of debug ip_fw statements"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, + CTLFLAG_RW, + &fw_verbose, 0, "Log matches to ipfw rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, + &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); + +/* + * Description of dynamic rules. + * + * Dynamic rules are stored in lists accessed through a hash table + * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can + * be modified through the sysctl variable dyn_buckets which is + * updated when the table becomes empty. + * + * XXX currently there is only one list, ipfw_dyn. + * + * When a packet is received, its address fields are first masked + * with the mask defined for the rule, then hashed, then matched + * against the entries in the corresponding list. + * Dynamic rules can be used for different purposes: + * + stateful rules; + * + enforcing limits on the number of sessions; + * + in-kernel NAT (not implemented yet) + * + * The lifetime of dynamic rules is regulated by dyn_*_lifetime, + * measured in seconds and depending on the flags. + * + * The total number of dynamic rules is stored in dyn_count. + * The max number of dynamic rules is dyn_max. When we reach + * the maximum number of rules we do not create anymore. This is + * done to avoid consuming too much memory, but also too much + * time when searching on each packet (ideally, we should try instead + * to put a limit on the length of the list on each bucket...). + * + * Each dynamic rule holds a pointer to the parent ipfw rule so + * we know what action to perform. Dynamic rules are removed when + * the parent rule is deleted. XXX we should make them survive. + * + * There are some limitations with dynamic rules -- we do not + * obey the 'randomized match', and we do not do multiple + * passes through the firewall. XXX check the latter!!! + */ +static ipfw_dyn_rule **ipfw_dyn_v = NULL; +static u_int32_t dyn_buckets = 256; /* must be power of 2 */ +static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ + +/* + * Timeouts for various events in handing dynamic rules. + */ +static u_int32_t dyn_ack_lifetime = 300; +static u_int32_t dyn_syn_lifetime = 20; +static u_int32_t dyn_fin_lifetime = 1; +static u_int32_t dyn_rst_lifetime = 1; +static u_int32_t dyn_udp_lifetime = 10; +static u_int32_t dyn_short_lifetime = 5; + +/* + * Keepalives are sent if dyn_keepalive is set. They are sent every + * dyn_keepalive_period seconds, in the last dyn_keepalive_interval + * seconds of lifetime of a rule. + * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower + * than dyn_keepalive_period. + */ + +static u_int32_t dyn_keepalive_interval = 20; +static u_int32_t dyn_keepalive_period = 5; +static u_int32_t dyn_keepalive = 1; /* do send keepalives */ + +static u_int32_t static_count; /* # of static rules */ +static u_int32_t static_len; /* size in bytes of static rules */ +static u_int32_t dyn_count; /* # of dynamic rules */ +static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ + +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, + &dyn_buckets, 0, "Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, + &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, + &dyn_count, 0, "Number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, + &dyn_max, 0, "Max number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, + &static_count, 0, "Number of static rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, + &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, + &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, + &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, + &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, + &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, + &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, + &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); + +#endif /* SYSCTL_NODE */ + + +extern lck_mtx_t *ip_mutex; +static ip_fw_chk_t ipfw_chk; + +/* firewall lock */ +lck_grp_t *ipfw_mutex_grp; +lck_grp_attr_t *ipfw_mutex_grp_attr; +lck_attr_t *ipfw_mutex_attr; +lck_mtx_t *ipfw_mutex; + +extern void ipfwsyslog( int level, char *format,...); + +#if DUMMYNET +ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL; /* hook into dummynet */ +#endif /* DUMMYNET */ + +#define KEV_LOG_SUBCLASS 10 +#define IPFWLOGEVENT 0 + +#define ipfwstring "ipfw:" +static size_t ipfwstringlen; + +#define dolog( a ) { \ + if ( fw_verbose == 2 ) /* Apple logging, log to ipfw.log */ \ + ipfwsyslog a ; \ + else log a ; \ +} + +void ipfwsyslog( int level, char *format,...) +{ +#define msgsize 100 + + struct kev_msg ev_msg; + va_list ap; + char msgBuf[msgsize]; + char *dptr = msgBuf; + unsigned char pri; + int loglen; + + va_start( ap, format ); + loglen = vsnprintf(msgBuf, msgsize, format, ap); + va_end( ap ); + + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_LOG_SUBCLASS; + ev_msg.event_code = IPFWLOGEVENT; + + /* get rid of the trailing \n */ + dptr[loglen-1] = 0; + + pri = LOG_PRI(level); + + /* remove "ipfw:" prefix if logging to ipfw log */ + if ( !(strncmp( ipfwstring, msgBuf, ipfwstringlen))){ + dptr = msgBuf+ipfwstringlen; + } + + ev_msg.dv[0].data_ptr = &pri; + ev_msg.dv[0].data_length = 1; + ev_msg.dv[1].data_ptr = dptr; + ev_msg.dv[1].data_length = 100; /* bug in kern_post_msg, it can't handle size > 256-msghdr */ + ev_msg.dv[2].data_length = 0; + + kev_post_msg(&ev_msg); +} + +/* + * This macro maps an ip pointer into a layer3 header pointer of type T + */ +#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) + +static __inline int +icmptype_match(struct ip *ip, ipfw_insn_u32 *cmd) +{ + int type = L3HDR(struct icmp,ip)->icmp_type; + + return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; + return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. + * + * We scan options and store the bits we find set. We succeed if + * + * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear + * + * The code is sometimes optimized not to store additional variables. + */ + +static int +flags_match(ipfw_insn *cmd, u_int8_t bits) +{ + u_char want_clear; + bits = ~bits; + + if ( ((cmd->arg1 & 0xff) & bits) != 0) + return 0; /* some bits we want set were clear */ + want_clear = (cmd->arg1 >> 8) & 0xff; + if ( (want_clear & bits) != want_clear) + return 0; /* some bits we want clear were set */ + return 1; +} + +static int +ipopts_match(struct ip *ip, ipfw_insn *cmd) +{ + int optlen, bits = 0; + u_char *cp = (u_char *)(ip + 1); + int x = (ip->ip_hl << 2) - sizeof (struct ip); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[IPOPT_OPTVAL]; + + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > x) + return 0; /* invalid or truncated */ + } + switch (opt) { + + default: + break; + + case IPOPT_LSRR: + bits |= IP_FW_IPOPT_LSRR; + break; + + case IPOPT_SSRR: + bits |= IP_FW_IPOPT_SSRR; + break; + + case IPOPT_RR: + bits |= IP_FW_IPOPT_RR; + break; + + case IPOPT_TS: + bits |= IP_FW_IPOPT_TS; + break; + } + } + return (flags_match(cmd, bits)); +} + +static int +tcpopts_match(struct ip *ip, ipfw_insn *cmd) +{ + int optlen, bits = 0; + struct tcphdr *tcp = L3HDR(struct tcphdr,ip); + u_char *cp = (u_char *)(tcp + 1); + int x = (tcp->th_off << 2) - sizeof(struct tcphdr); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + + switch (opt) { + + default: + break; + + case TCPOPT_MAXSEG: + bits |= IP_FW_TCPOPT_MSS; + break; + + case TCPOPT_WINDOW: + bits |= IP_FW_TCPOPT_WINDOW; + break; + + case TCPOPT_SACK_PERMITTED: + case TCPOPT_SACK: + bits |= IP_FW_TCPOPT_SACK; + break; + + case TCPOPT_TIMESTAMP: + bits |= IP_FW_TCPOPT_TS; + break; + + case TCPOPT_CC: + case TCPOPT_CCNEW: + case TCPOPT_CCECHO: + bits |= IP_FW_TCPOPT_CC; + break; + } + } + return (flags_match(cmd, bits)); +} + +static int +iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) +{ + if (ifp == NULL) /* no iface with this packet, match fails */ + return 0; + /* Check by name or by IP address */ + if (cmd->name[0] != '\0') { /* match by name */ + /* Check unit number (-1 is wildcard) */ + if (cmd->p.unit != -1 && cmd->p.unit != ifp->if_unit) + return(0); + /* Check name */ + if (!strncmp(ifp->if_name, cmd->name, IFNAMSIZ)) + return(1); + } else { + struct ifaddr *ia; + + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { + if (ia->ifa_addr == NULL) + continue; + if (ia->ifa_addr->sa_family != AF_INET) + continue; + if (cmd->p.ip.s_addr == ((struct sockaddr_in *) + (ia->ifa_addr))->sin_addr.s_addr) { + ifnet_lock_done(ifp); + return(1); /* match */ + } + } + ifnet_lock_done(ifp); + } + return(0); /* no match, fail ... */ +} + +/* + * The 'verrevpath' option checks that the interface that an IP packet + * arrives on is the same interface that traffic destined for the + * packet's source address would be routed out of. This is a measure + * to block forged packets. This is also commonly known as "anti-spoofing" + * or Unicast Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The + * name of the knob is purposely reminisent of the Cisco IOS command, + * + * ip verify unicast reverse-path + * + * which implements the same functionality. But note that syntax is + * misleading. The check may be performed on all IP packets whether unicast, + * multicast, or broadcast. + */ +static int +verify_rev_path(struct in_addr src, struct ifnet *ifp) +{ + static struct route ro; + struct sockaddr_in *dst; + + dst = (struct sockaddr_in *)&(ro.ro_dst); + + /* Check if we've cached the route from the previous call. */ + if (src.s_addr != dst->sin_addr.s_addr) { + ro.ro_rt = NULL; + + bzero(dst, sizeof(*dst)); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = src; + + rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING); + } + + if ((ro.ro_rt == NULL) || (ifp == NULL) || + (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) + return 0; + + return 1; +} + + +static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ + +#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 +#define SNP(buf) buf, sizeof(buf) + +/* + * We enter here when we have a rule with O_LOG. + * XXX this function alone takes about 2Kbytes of code! + */ +static void +ipfw_log(struct ip_fw *f, u_int hlen, struct ether_header *eh, + struct mbuf *m, struct ifnet *oif) +{ + char *action; + int limit_reached = 0; + char ipv4str[MAX_IPv4_STR_LEN]; + char action2[40], proto[48], fragment[28]; + + fragment[0] = '\0'; + proto[0] = '\0'; + + if (f == NULL) { /* bogus pkt */ + if (verbose_limit != 0 && norule_counter >= verbose_limit) + return; + norule_counter++; + if (norule_counter == verbose_limit) + limit_reached = verbose_limit; + action = "Refuse"; + } else { /* O_LOG is the first action, find the real one */ + ipfw_insn *cmd = ACTION_PTR(f); + ipfw_insn_log *l = (ipfw_insn_log *)cmd; + + if (l->max_log != 0 && l->log_left == 0) + return; + l->log_left--; + if (l->log_left == 0) + limit_reached = l->max_log; + cmd += F_LEN(cmd); /* point to first action */ + if (cmd->opcode == O_PROB) + cmd += F_LEN(cmd); + + action = action2; + switch (cmd->opcode) { + case O_DENY: + action = "Deny"; + break; + + case O_REJECT: + if (cmd->arg1==ICMP_REJECT_RST) + action = "Reset"; + else if (cmd->arg1==ICMP_UNREACH_HOST) + action = "Reject"; + else + snprintf(SNPARGS(action2, 0), "Unreach %d", + cmd->arg1); + break; + + case O_ACCEPT: + action = "Accept"; + break; + case O_COUNT: + action = "Count"; + break; + case O_DIVERT: + snprintf(SNPARGS(action2, 0), "Divert %d", + cmd->arg1); + break; + case O_TEE: + snprintf(SNPARGS(action2, 0), "Tee %d", + cmd->arg1); + break; + case O_SKIPTO: + snprintf(SNPARGS(action2, 0), "SkipTo %d", + cmd->arg1); + break; + case O_PIPE: + snprintf(SNPARGS(action2, 0), "Pipe %d", + cmd->arg1); + break; + case O_QUEUE: + snprintf(SNPARGS(action2, 0), "Queue %d", + cmd->arg1); + break; + case O_FORWARD_IP: { + ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; + int len; + + if (f->reserved_1 == IPFW_RULE_INACTIVE) { + break; + } + len = snprintf(SNPARGS(action2, 0), "Forward to %s", + inet_ntop(AF_INET, &sa->sa.sin_addr, ipv4str, sizeof(ipv4str))); + if (sa->sa.sin_port) + snprintf(SNPARGS(action2, len), ":%d", + sa->sa.sin_port); + } + break; + default: + action = "UNKNOWN"; + break; + } + } + + if (hlen == 0) { /* non-ip */ + snprintf(SNPARGS(proto, 0), "MAC"); + } else { + struct ip *ip = mtod(m, struct ip *); + /* these three are all aliases to the same thing */ + struct icmp *const icmp = L3HDR(struct icmp, ip); + struct tcphdr *const tcp = (struct tcphdr *)icmp; + struct udphdr *const udp = (struct udphdr *)icmp; + + int ip_off, offset, ip_len; + + int len; + + if (eh != NULL) { /* layer 2 packets are as on the wire */ + ip_off = ntohs(ip->ip_off); + ip_len = ntohs(ip->ip_len); + } else { + ip_off = ip->ip_off; + ip_len = ip->ip_len; + } + offset = ip_off & IP_OFFMASK; + switch (ip->ip_p) { + case IPPROTO_TCP: + len = snprintf(SNPARGS(proto, 0), "TCP %s", + inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str))); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d %s:%d", + ntohs(tcp->th_sport), + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)), + ntohs(tcp->th_dport)); + else + snprintf(SNPARGS(proto, len), " %s", + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str))); + break; + + case IPPROTO_UDP: + len = snprintf(SNPARGS(proto, 0), "UDP %s", + inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str))); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d %s:%d", + ntohs(udp->uh_sport), + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)), + ntohs(udp->uh_dport)); + else + snprintf(SNPARGS(proto, len), " %s", + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str))); + break; + + case IPPROTO_ICMP: + if (offset == 0) + len = snprintf(SNPARGS(proto, 0), + "ICMP:%u.%u ", + icmp->icmp_type, icmp->icmp_code); + else + len = snprintf(SNPARGS(proto, 0), "ICMP "); + len += snprintf(SNPARGS(proto, len), "%s", + inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str))); + snprintf(SNPARGS(proto, len), " %s", + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str))); + break; + + default: + len = snprintf(SNPARGS(proto, 0), "P:%d %s", ip->ip_p, + inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str))); + snprintf(SNPARGS(proto, len), " %s", + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str))); + break; + } + + if (ip_off & (IP_MF | IP_OFFMASK)) + snprintf(SNPARGS(fragment, 0), " (frag %d:%d@%d%s)", + ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), + offset << 3, + (ip_off & IP_MF) ? "+" : ""); + } + if (oif || m->m_pkthdr.rcvif) + { + dolog((LOG_AUTHPRIV | LOG_INFO, + "ipfw: %d %s %s %s via %s%d%s\n", + f ? f->rulenum : -1, + action, proto, oif ? "out" : "in", + oif ? oif->if_name : m->m_pkthdr.rcvif->if_name, + oif ? oif->if_unit : m->m_pkthdr.rcvif->if_unit, + fragment)); + } + else{ + dolog((LOG_AUTHPRIV | LOG_INFO, + "ipfw: %d %s %s [no if info]%s\n", + f ? f->rulenum : -1, + action, proto, fragment)); + } + if (limit_reached){ + dolog((LOG_AUTHPRIV | LOG_NOTICE, + "ipfw: limit %d reached on entry %d\n", + limit_reached, f ? f->rulenum : -1)); + } +} + +/* + * IMPORTANT: the hash function for dynamic rules must be commutative + * in source and destination (ip,port), because rules are bidirectional + * and we want to find both in the same bucket. + */ +static __inline int +hash_packet(struct ipfw_flow_id *id) +{ + u_int32_t i; + + i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); + i &= (curr_dyn_buckets - 1); + return i; +} + +/** + * unlink a dynamic rule from a chain. prev is a pointer to + * the previous one, q is a pointer to the rule to delete, + * head is a pointer to the head of the queue. + * Modifies q and potentially also head. + */ +#define UNLINK_DYN_RULE(prev, head, q) { \ + ipfw_dyn_rule *old_q = q; \ + \ + /* remove a refcount to the parent */ \ + if (q->dyn_type == O_LIMIT) \ + q->parent->count--; \ + DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\ + (q->id.src_ip), (q->id.src_port), \ + (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ + if (prev != NULL) \ + prev->next = q = q->next; \ + else \ + head = q = q->next; \ + dyn_count--; \ + _FREE(old_q, M_IPFW); } + +#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) + +/** + * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. + * + * If keep_me == NULL, rules are deleted even if not expired, + * otherwise only expired rules are removed. + * + * The value of the second parameter is also used to point to identify + * a rule we absolutely do not want to remove (e.g. because we are + * holding a reference to it -- this is the case with O_LIMIT_PARENT + * rules). The pointer is only used for comparison, so any non-null + * value will do. + */ +static void +remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) +{ + static u_int32_t last_remove = 0; + +#define FORCE (keep_me == NULL) + + ipfw_dyn_rule *prev, *q; + int i, pass = 0, max_pass = 0; + struct timeval timenow; + + getmicrotime(&timenow); + + if (ipfw_dyn_v == NULL || dyn_count == 0) + return; + /* do not expire more than once per second, it is useless */ + if (!FORCE && last_remove == timenow.tv_sec) + return; + last_remove = timenow.tv_sec; + + /* + * because O_LIMIT refer to parent rules, during the first pass only + * remove child and mark any pending LIMIT_PARENT, and remove + * them in a second pass. + */ +next_pass: + for (i = 0 ; i < curr_dyn_buckets ; i++) { + for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { + /* + * Logic can become complex here, so we split tests. + */ + if (q == keep_me) + goto next; + if (rule != NULL && rule != q->rule) + goto next; /* not the one we are looking for */ + if (q->dyn_type == O_LIMIT_PARENT) { + /* + * handle parent in the second pass, + * record we need one. + */ + max_pass = 1; + if (pass == 0) + goto next; + if (FORCE && q->count != 0 ) { + /* XXX should not happen! */ + printf("ipfw: OUCH! cannot remove rule," + " count %d\n", q->count); + } + } else { + if (!FORCE && + !TIME_LEQ( q->expire, timenow.tv_sec )) + goto next; + } + if (q->dyn_type != O_LIMIT_PARENT || !q->count) { + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; + } +next: + prev=q; + q=q->next; + } + } + if (pass++ < max_pass) + goto next_pass; +} + + +/** + * lookup a dynamic rule. + */ +static ipfw_dyn_rule * +lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, + struct tcphdr *tcp) +{ + /* + * stateful ipfw extensions. + * Lookup into dynamic session queue + */ +#define MATCH_REVERSE 0 +#define MATCH_FORWARD 1 +#define MATCH_NONE 2 +#define MATCH_UNKNOWN 3 +#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) +#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) + + int i, dir = MATCH_NONE; + ipfw_dyn_rule *prev, *q=NULL; + struct timeval timenow; + + getmicrotime(&timenow); + + if (ipfw_dyn_v == NULL) + goto done; /* not found */ + i = hash_packet( pkt ); + for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { + if (q->dyn_type == O_LIMIT_PARENT && q->count) + goto next; + if (TIME_LEQ( q->expire, timenow.tv_sec)) { /* expire entry */ + int dounlink = 1; + + /* check if entry is TCP */ + if ( q->id.proto == IPPROTO_TCP ) + { + /* do not delete an established TCP connection which hasn't been closed by both sides */ + if ( (q->state & (BOTH_SYN | BOTH_FIN)) != (BOTH_SYN | BOTH_FIN) ) + dounlink = 0; + } + if ( dounlink ){ + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; + } + } + if (pkt->proto == q->id.proto && + q->dyn_type != O_LIMIT_PARENT) { + if (pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port ) { + dir = MATCH_FORWARD; + break; + } + if (pkt->src_ip == q->id.dst_ip && + pkt->dst_ip == q->id.src_ip && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port ) { + dir = MATCH_REVERSE; + break; + } + } +next: + prev = q; + q = q->next; + } + if (q == NULL) + goto done; /* q = NULL, not found */ + + if ( prev != NULL) { /* found and not in front */ + prev->next = q->next; + q->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = q; + } + if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ + u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); + + q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); + switch (q->state) { + case TH_SYN: /* opening */ + q->expire = timenow.tv_sec + dyn_syn_lifetime; + break; + + case BOTH_SYN: /* move to established */ + case BOTH_SYN | TH_FIN : /* one side tries to close */ + case BOTH_SYN | (TH_FIN << 8) : + if (tcp) { +#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) + u_int32_t ack = ntohl(tcp->th_ack); + if (dir == MATCH_FORWARD) { + if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) + q->ack_fwd = ack; + else { /* ignore out-of-sequence */ + break; + } + } else { + if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) + q->ack_rev = ack; + else { /* ignore out-of-sequence */ + break; + } + } + } + q->expire = timenow.tv_sec + dyn_ack_lifetime; + break; + + case BOTH_SYN | BOTH_FIN: /* both sides closed */ + if (dyn_fin_lifetime >= dyn_keepalive_period) + dyn_fin_lifetime = dyn_keepalive_period - 1; + q->expire = timenow.tv_sec + dyn_fin_lifetime; + break; + + default: +#if 0 + /* + * reset or some invalid combination, but can also + * occur if we use keep-state the wrong way. + */ + if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) + printf("invalid state: 0x%x\n", q->state); +#endif + if (dyn_rst_lifetime >= dyn_keepalive_period) + dyn_rst_lifetime = dyn_keepalive_period - 1; + q->expire = timenow.tv_sec + dyn_rst_lifetime; + break; + } + } else if (pkt->proto == IPPROTO_UDP) { + q->expire = timenow.tv_sec + dyn_udp_lifetime; + } else { + /* other protocols */ + q->expire = timenow.tv_sec + dyn_short_lifetime; + } +done: + if (match_direction) + *match_direction = dir; + return q; +} + +static void +realloc_dynamic_table(void) +{ + /* + * Try reallocation, make sure we have a power of 2 and do + * not allow more than 64k entries. In case of overflow, + * default to 1024. + */ + + if (dyn_buckets > 65536) + dyn_buckets = 1024; + if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ + dyn_buckets = curr_dyn_buckets; /* reset */ + return; + } + curr_dyn_buckets = dyn_buckets; + if (ipfw_dyn_v != NULL) + _FREE(ipfw_dyn_v, M_IPFW); + for (;;) { + ipfw_dyn_v = _MALLOC(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), + M_IPFW, M_NOWAIT | M_ZERO); + if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) + break; + curr_dyn_buckets /= 2; + } +} + +/** + * Install state of type 'type' for a dynamic session. + * The hash table contains two type of rules: + * - regular rules (O_KEEP_STATE) + * - rules for sessions with limited number of sess per user + * (O_LIMIT). When they are created, the parent is + * increased by 1, and decreased on delete. In this case, + * the third parameter is the parent rule and not the chain. + * - "parent" rules for the above (O_LIMIT_PARENT). + */ +static ipfw_dyn_rule * +add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) +{ + ipfw_dyn_rule *r; + int i; + struct timeval timenow; + + getmicrotime(&timenow); + + if (ipfw_dyn_v == NULL || + (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { + realloc_dynamic_table(); + if (ipfw_dyn_v == NULL) + return NULL; /* failed ! */ + } + i = hash_packet(id); + + r = _MALLOC(sizeof *r, M_IPFW, M_NOWAIT | M_ZERO); + if (r == NULL) { +#if IPFW_DEBUG + printf ("ipfw: sorry cannot allocate state\n"); +#endif + return NULL; + } + + /* increase refcount on parent, and set pointer */ + if (dyn_type == O_LIMIT) { + ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; + if ( parent->dyn_type != O_LIMIT_PARENT) + panic("invalid parent"); + parent->count++; + r->parent = parent; + rule = parent->rule; + } + + r->id = *id; + r->expire = timenow.tv_sec + dyn_syn_lifetime; + r->rule = rule; + r->dyn_type = dyn_type; + r->pcnt = r->bcnt = 0; + r->count = 0; + + r->bucket = i; + r->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = r; + dyn_count++; + DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", + dyn_type, + (r->id.src_ip), (r->id.src_port), + (r->id.dst_ip), (r->id.dst_port), + dyn_count ); ) + return r; +} + +/** + * lookup dynamic parent rule using pkt and rule as search keys. + * If the lookup fails, then install one. + */ +static ipfw_dyn_rule * +lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) +{ + ipfw_dyn_rule *q; + int i; + struct timeval timenow; + + getmicrotime(&timenow); + + if (ipfw_dyn_v) { + i = hash_packet( pkt ); + for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) + if (q->dyn_type == O_LIMIT_PARENT && + rule== q->rule && + pkt->proto == q->id.proto && + pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port) { + q->expire = timenow.tv_sec + dyn_short_lifetime; + DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) + return q; + } + } + return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); +} + +/** + * Install dynamic state for rule type cmd->o.opcode + * + * Returns 1 (failure) if state is not installed because of errors or because + * session limitations are enforced. + */ +static int +install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, + struct ip_fw_args *args) +{ + static int last_log; + struct timeval timenow; + + ipfw_dyn_rule *q; + getmicrotime(&timenow); + + DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n", + cmd->o.opcode, + (args->f_id.src_ip), (args->f_id.src_port), + (args->f_id.dst_ip), (args->f_id.dst_port) );) + + q = lookup_dyn_rule(&args->f_id, NULL, NULL); + + if (q != NULL) { /* should never occur */ + if (last_log != timenow.tv_sec) { + last_log = timenow.tv_sec; + printf("ipfw: install_state: entry already present, done\n"); + } + return 0; + } + + if (dyn_count >= dyn_max) + /* + * Run out of slots, try to remove any expired rule. + */ + remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); + + if (dyn_count >= dyn_max) { + if (last_log != timenow.tv_sec) { + last_log = timenow.tv_sec; + printf("ipfw: install_state: Too many dynamic rules\n"); + } + return 1; /* cannot install, notify caller */ + } + + switch (cmd->o.opcode) { + case O_KEEP_STATE: /* bidir rule */ + add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); + break; + + case O_LIMIT: /* limit number of sessions */ + { + u_int16_t limit_mask = cmd->limit_mask; + struct ipfw_flow_id id; + ipfw_dyn_rule *parent; + + DEB(printf("ipfw: installing dyn-limit rule %d\n", + cmd->conn_limit);) + + id.dst_ip = id.src_ip = 0; + id.dst_port = id.src_port = 0; + id.proto = args->f_id.proto; + + if (limit_mask & DYN_SRC_ADDR) + id.src_ip = args->f_id.src_ip; + if (limit_mask & DYN_DST_ADDR) + id.dst_ip = args->f_id.dst_ip; + if (limit_mask & DYN_SRC_PORT) + id.src_port = args->f_id.src_port; + if (limit_mask & DYN_DST_PORT) + id.dst_port = args->f_id.dst_port; + parent = lookup_dyn_parent(&id, rule); + if (parent == NULL) { + printf("ipfw: add parent failed\n"); + return 1; + } + if (parent->count >= cmd->conn_limit) { + /* + * See if we can remove some expired rule. + */ + remove_dyn_rule(rule, parent); + if (parent->count >= cmd->conn_limit) { + if (fw_verbose && last_log != timenow.tv_sec) { + last_log = timenow.tv_sec; + dolog((LOG_AUTHPRIV | LOG_DEBUG, + "drop session, too many entries\n")); + } + return 1; + } + } + add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); + } + break; + default: + printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode); + return 1; + } + lookup_dyn_rule(&args->f_id, NULL, NULL); /* XXX just set lifetime */ + return 0; +} + +/* + * Transmit a TCP packet, containing either a RST or a keepalive. + * When flags & TH_RST, we are sending a RST packet, because of a + * "reset" action matched the packet. + * Otherwise we are sending a keepalive, and flags & TH_ + */ +static void +send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) +{ + struct mbuf *m; + struct ip *ip; + struct tcphdr *tcp; + struct route sro; /* fake route */ + + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == 0) + return; + m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); + m->m_data += max_linkhdr; + + ip = mtod(m, struct ip *); + bzero(ip, m->m_len); + tcp = (struct tcphdr *)(ip + 1); /* no IP options */ + ip->ip_p = IPPROTO_TCP; + tcp->th_off = 5; + /* + * Assume we are sending a RST (or a keepalive in the reverse + * direction), swap src and destination addresses and ports. + */ + ip->ip_src.s_addr = htonl(id->dst_ip); + ip->ip_dst.s_addr = htonl(id->src_ip); + tcp->th_sport = htons(id->dst_port); + tcp->th_dport = htons(id->src_port); + if (flags & TH_RST) { /* we are sending a RST */ + if (flags & TH_ACK) { + tcp->th_seq = htonl(ack); + tcp->th_ack = htonl(0); + tcp->th_flags = TH_RST; + } else { + if (flags & TH_SYN) + seq++; + tcp->th_seq = htonl(0); + tcp->th_ack = htonl(seq); + tcp->th_flags = TH_RST | TH_ACK; + } + } else { + /* + * We are sending a keepalive. flags & TH_SYN determines + * the direction, forward if set, reverse if clear. + * NOTE: seq and ack are always assumed to be correct + * as set by the caller. This may be confusing... + */ + if (flags & TH_SYN) { + /* + * we have to rewrite the correct addresses! + */ + ip->ip_dst.s_addr = htonl(id->dst_ip); + ip->ip_src.s_addr = htonl(id->src_ip); + tcp->th_dport = htons(id->dst_port); + tcp->th_sport = htons(id->src_port); + } + tcp->th_seq = htonl(seq); + tcp->th_ack = htonl(ack); + tcp->th_flags = TH_ACK; + } + /* + * set ip_len to the payload size so we can compute + * the tcp checksum on the pseudoheader + * XXX check this, could save a couple of words ? + */ + ip->ip_len = htons(sizeof(struct tcphdr)); + tcp->th_sum = in_cksum(m, m->m_pkthdr.len); + /* + * now fill fields left out earlier + */ + ip->ip_ttl = ip_defttl; + ip->ip_len = m->m_pkthdr.len; + bzero (&sro, sizeof (sro)); + ip_rtaddr(ip->ip_dst, &sro); + m->m_flags |= M_SKIP_FIREWALL; + ip_output_list(m, 0, NULL, &sro, 0, NULL); + if (sro.ro_rt) + RTFREE(sro.ro_rt); +} + +/* + * sends a reject message, consuming the mbuf passed as an argument. + */ +static void +send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) +{ + + if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ + /* We need the IP header in host order for icmp_error(). */ + if (args->eh != NULL) { + struct ip *ip = mtod(args->m, struct ip *); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + } + lck_mtx_unlock(ip_mutex); + icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); + lck_mtx_lock(ip_mutex); + } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { + struct tcphdr *const tcp = + L3HDR(struct tcphdr, mtod(args->m, struct ip *)); + if ( (tcp->th_flags & TH_RST) == 0) { + lck_mtx_unlock(ip_mutex); + send_pkt(&(args->f_id), ntohl(tcp->th_seq), + ntohl(tcp->th_ack), + tcp->th_flags | TH_RST); + lck_mtx_lock(ip_mutex); + } + m_freem(args->m); + } else + m_freem(args->m); + args->m = NULL; +} + +/** + * + * Given an ip_fw *, lookup_next_rule will return a pointer + * to the next rule, which can be either the jump + * target (for skipto instructions) or the next one in the list (in + * all other cases including a missing jump target). + * The result is also written in the "next_rule" field of the rule. + * Backward jumps are not allowed, so start looking from the next + * rule... + * + * This never returns NULL -- in case we do not have an exact match, + * the next rule is returned. When the ruleset is changed, + * pointers are flushed so we are always correct. + */ + +static struct ip_fw * +lookup_next_rule(struct ip_fw *me) +{ + struct ip_fw *rule = NULL; + ipfw_insn *cmd; + + /* look for action, in case it is a skipto */ + cmd = ACTION_PTR(me); + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + if ( cmd->opcode == O_SKIPTO ) + for (rule = me->next; rule ; rule = rule->next) + if (rule->rulenum >= cmd->arg1) + break; + if (rule == NULL) /* failure or not a skipto */ + rule = me->next; + me->next_rule = rule; + return rule; +} + +/* + * The main check routine for the firewall. + * + * All arguments are in args so we can modify them and return them + * back to the caller. + * + * Parameters: + * + * args->m (in/out) The packet; we set to NULL when/if we nuke it. + * Starts with the IP header. + * args->eh (in) Mac header if present, or NULL for layer3 packet. + * args->oif Outgoing interface, or NULL if packet is incoming. + * The incoming interface is in the mbuf. (in) + * args->divert_rule (in/out) + * Skip up to the first rule past this rule number; + * upon return, non-zero port number for divert or tee. + * + * args->rule Pointer to the last matching rule (in/out) + * args->next_hop Socket we are forwarding to (out). + * args->f_id Addresses grabbed from the packet (out) + * + * Return value: + * + * IP_FW_PORT_DENY_FLAG the packet must be dropped. + * 0 The packet is to be accepted and routed normally OR + * the packet was denied/rejected and has been dropped; + * in the latter case, *m is equal to NULL upon return. + * port Divert the packet to port, with these caveats: + * + * - If IP_FW_PORT_TEE_FLAG is set, tee the packet instead + * of diverting it (ie, 'ipfw tee'). + * + * - If IP_FW_PORT_DYNT_FLAG is set, interpret the lower + * 16 bits as a dummynet pipe number instead of diverting + */ + +static int +ipfw_chk(struct ip_fw_args *args) +{ + /* + * Local variables hold state during the processing of a packet. + * + * IMPORTANT NOTE: to speed up the processing of rules, there + * are some assumption on the values of the variables, which + * are documented here. Should you change them, please check + * the implementation of the various instructions to make sure + * that they still work. + * + * args->eh The MAC header. It is non-null for a layer2 + * packet, it is NULL for a layer-3 packet. + * + * m | args->m Pointer to the mbuf, as received from the caller. + * It may change if ipfw_chk() does an m_pullup, or if it + * consumes the packet because it calls send_reject(). + * XXX This has to change, so that ipfw_chk() never modifies + * or consumes the buffer. + * ip is simply an alias of the value of m, and it is kept + * in sync with it (the packet is supposed to start with + * the ip header). + */ + struct mbuf *m = args->m; + struct ip *ip = mtod(m, struct ip *); + + /* + * oif | args->oif If NULL, ipfw_chk has been called on the + * inbound path (ether_input, bdg_forward, ip_input). + * If non-NULL, ipfw_chk has been called on the outbound path + * (ether_output, ip_output). + */ + struct ifnet *oif = args->oif; + + struct ip_fw *f = NULL; /* matching rule */ + int retval = 0; + + /* + * hlen The length of the IPv4 header. + * hlen >0 means we have an IPv4 packet. + */ + u_int hlen = 0; /* hlen >0 means we have an IP pkt */ + + /* + * offset The offset of a fragment. offset != 0 means that + * we have a fragment at this offset of an IPv4 packet. + * offset == 0 means that (if this is an IPv4 packet) + * this is the first or only fragment. + */ + u_short offset = 0; + + /* + * Local copies of addresses. They are only valid if we have + * an IP packet. + * + * proto The protocol. Set to 0 for non-ip packets, + * or to the protocol read from the packet otherwise. + * proto != 0 means that we have an IPv4 packet. + * + * src_port, dst_port port numbers, in HOST format. Only + * valid for TCP and UDP packets. + * + * src_ip, dst_ip ip addresses, in NETWORK format. + * Only valid for IPv4 packets. + */ + u_int8_t proto; + u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ + struct in_addr src_ip, dst_ip; /* NOTE: network format */ + u_int16_t ip_len=0; + int pktlen; + int dyn_dir = MATCH_UNKNOWN; + ipfw_dyn_rule *q = NULL; + struct timeval timenow; + + if (m->m_flags & M_SKIP_FIREWALL) { + return 0; /* accept */ + } + + lck_mtx_lock(ipfw_mutex); + + getmicrotime(&timenow); + /* + * dyn_dir = MATCH_UNKNOWN when rules unchecked, + * MATCH_NONE when checked and not matched (q = NULL), + * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) + */ + + pktlen = m->m_pkthdr.len; + if (args->eh == NULL || /* layer 3 packet */ + ( m->m_pkthdr.len >= sizeof(struct ip) && + ntohs(args->eh->ether_type) == ETHERTYPE_IP)) + hlen = ip->ip_hl << 2; + + /* + * Collect parameters into local variables for faster matching. + */ + if (hlen == 0) { /* do not grab addresses for non-ip pkts */ + proto = args->f_id.proto = 0; /* mark f_id invalid */ + goto after_ip_checks; + } + + proto = args->f_id.proto = ip->ip_p; + src_ip = ip->ip_src; + dst_ip = ip->ip_dst; + if (args->eh != NULL) { /* layer 2 packets are as on the wire */ + offset = ntohs(ip->ip_off) & IP_OFFMASK; + ip_len = ntohs(ip->ip_len); + } else { + offset = ip->ip_off & IP_OFFMASK; + ip_len = ip->ip_len; + } + pktlen = ip_len < pktlen ? ip_len : pktlen; + +#define PULLUP_TO(len) \ + do { \ + if ((m)->m_len < (len)) { \ + args->m = m = m_pullup(m, (len)); \ + if (m == 0) \ + goto pullup_failed; \ + ip = mtod(m, struct ip *); \ + } \ + } while (0) + + if (offset == 0) { + switch (proto) { + case IPPROTO_TCP: + { + struct tcphdr *tcp; + + PULLUP_TO(hlen + sizeof(struct tcphdr)); + tcp = L3HDR(struct tcphdr, ip); + dst_port = tcp->th_dport; + src_port = tcp->th_sport; + args->f_id.flags = tcp->th_flags; + } + break; + + case IPPROTO_UDP: + { + struct udphdr *udp; + + PULLUP_TO(hlen + sizeof(struct udphdr)); + udp = L3HDR(struct udphdr, ip); + dst_port = udp->uh_dport; + src_port = udp->uh_sport; + } + break; + + case IPPROTO_ICMP: + PULLUP_TO(hlen + 4); /* type, code and checksum. */ + args->f_id.flags = L3HDR(struct icmp, ip)->icmp_type; + break; + + default: + break; + } +#undef PULLUP_TO + } + + args->f_id.src_ip = ntohl(src_ip.s_addr); + args->f_id.dst_ip = ntohl(dst_ip.s_addr); + args->f_id.src_port = src_port = ntohs(src_port); + args->f_id.dst_port = dst_port = ntohs(dst_port); + +after_ip_checks: + if (args->rule) { + /* + * Packet has already been tagged. Look for the next rule + * to restart processing. + * + * If fw_one_pass != 0 then just accept it. + * XXX should not happen here, but optimized out in + * the caller. + */ + if (fw_one_pass) { + lck_mtx_unlock(ipfw_mutex); + return 0; + } + + f = args->rule->next_rule; + if (f == NULL) + f = lookup_next_rule(args->rule); + } else { + /* + * Find the starting rule. It can be either the first + * one, or the one after divert_rule if asked so. + */ + int skipto = args->divert_rule; + + f = layer3_chain; + if (args->eh == NULL && skipto != 0) { + if (skipto >= IPFW_DEFAULT_RULE) { + lck_mtx_unlock(ipfw_mutex); + return(IP_FW_PORT_DENY_FLAG); /* invalid */ + } + while (f && f->rulenum <= skipto) + f = f->next; + if (f == NULL) { /* drop packet */ + lck_mtx_unlock(ipfw_mutex); + return(IP_FW_PORT_DENY_FLAG); + } + } + } + args->divert_rule = 0; /* reset to avoid confusion later */ + + /* + * Now scan the rules, and parse microinstructions for each rule. + */ + for (; f; f = f->next) { + int l, cmdlen; + ipfw_insn *cmd; + int skip_or; /* skip rest of OR block */ + +again: + if (f->reserved_1 == IPFW_RULE_INACTIVE) { + continue; + } + + if (set_disable & (1 << f->set) ) + continue; + + skip_or = 0; + for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; + l -= cmdlen, cmd += cmdlen) { + int match; + + /* + * check_body is a jump target used when we find a + * CHECK_STATE, and need to jump to the body of + * the target rule. + */ + +check_body: + cmdlen = F_LEN(cmd); + /* + * An OR block (insn_1 || .. || insn_n) has the + * F_OR bit set in all but the last instruction. + * The first match will set "skip_or", and cause + * the following instructions to be skipped until + * past the one with the F_OR bit clear. + */ + if (skip_or) { /* skip this instruction */ + if ((cmd->len & F_OR) == 0) + skip_or = 0; /* next one is good */ + continue; + } + match = 0; /* set to 1 if we succeed */ + + switch (cmd->opcode) { + /* + * The first set of opcodes compares the packet's + * fields with some pattern, setting 'match' if a + * match is found. At the end of the loop there is + * logic to deal with F_NOT and F_OR flags associated + * with the opcode. + */ + case O_NOP: + match = 1; + break; + + case O_FORWARD_MAC: + printf("ipfw: opcode %d unimplemented\n", + cmd->opcode); + break; + +#ifndef __APPLE__ + case O_GID: +#endif + case O_UID: + /* + * We only check offset == 0 && proto != 0, + * as this ensures that we have an IPv4 + * packet with the ports info. + */ + if (offset!=0) + break; + + { + struct inpcbinfo *pi; + int wildcard; + struct inpcb *pcb; + + if (proto == IPPROTO_TCP) { + wildcard = 0; + pi = &tcbinfo; + } else if (proto == IPPROTO_UDP) { + wildcard = 1; + pi = &udbinfo; + } else + break; + + pcb = (oif) ? + in_pcblookup_hash(pi, + dst_ip, htons(dst_port), + src_ip, htons(src_port), + wildcard, oif) : + in_pcblookup_hash(pi, + src_ip, htons(src_port), + dst_ip, htons(dst_port), + wildcard, NULL); + + if (pcb == NULL || pcb->inp_socket == NULL) + break; +#if __FreeBSD_version < 500034 +#define socheckuid(a,b) (kauth_cred_getuid((a)->so_cred) != (b)) +#endif + if (cmd->opcode == O_UID) { + match = +#ifdef __APPLE__ + (pcb->inp_socket->so_uid == (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); +#else + !socheckuid(pcb->inp_socket, + (uid_t)((ipfw_insn_u32 *)cmd)->d[0]); +#endif + } +#ifndef __APPLE__ + else { + match = 0; + kauth_cred_ismember_gid(pcb->inp_socket->so_cred, + (gid_t)((ipfw_insn_u32 *)cmd)->d[0], &match); + } +#endif + } + + break; + + case O_RECV: + match = iface_match(m->m_pkthdr.rcvif, + (ipfw_insn_if *)cmd); + break; + + case O_XMIT: + match = iface_match(oif, (ipfw_insn_if *)cmd); + break; + + case O_VIA: + match = iface_match(oif ? oif : + m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); + break; + + case O_MACADDR2: + if (args->eh != NULL) { /* have MAC header */ + u_int32_t *want = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->addr; + u_int32_t *mask = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->mask; + u_int32_t *hdr = (u_int32_t *)args->eh; + + match = + ( want[0] == (hdr[0] & mask[0]) && + want[1] == (hdr[1] & mask[1]) && + want[2] == (hdr[2] & mask[2]) ); + } + break; + + case O_MAC_TYPE: + if (args->eh != NULL) { + u_int16_t t = + ntohs(args->eh->ether_type); + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; !match && i>0; + i--, p += 2) + match = (t>=p[0] && t<=p[1]); + } + break; + + case O_FRAG: + match = (hlen > 0 && offset != 0); + break; + + case O_IN: /* "out" is "not in" */ + match = (oif == NULL); + break; + + case O_LAYER2: + match = (args->eh != NULL); + break; + + case O_PROTO: + /* + * We do not allow an arg of 0 so the + * check of "proto" only suffices. + */ + match = (proto == cmd->arg1); + break; + + case O_IP_SRC: + match = (hlen > 0 && + ((ipfw_insn_ip *)cmd)->addr.s_addr == + src_ip.s_addr); + break; + + case O_IP_SRC_MASK: + case O_IP_DST_MASK: + if (hlen > 0) { + uint32_t a = + (cmd->opcode == O_IP_DST_MASK) ? + dst_ip.s_addr : src_ip.s_addr; + uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; + int i = cmdlen-1; + + for (; !match && i>0; i-= 2, p+= 2) + match = (p[0] == (a & p[1])); + } + break; + + case O_IP_SRC_ME: + if (hlen > 0) { + struct ifnet *tif; + + INADDR_TO_IFP(src_ip, tif); + match = (tif != NULL); + } + break; + + case O_IP_DST_SET: + case O_IP_SRC_SET: + if (hlen > 0) { + u_int32_t *d = (u_int32_t *)(cmd+1); + u_int32_t addr = + cmd->opcode == O_IP_DST_SET ? + args->f_id.dst_ip : + args->f_id.src_ip; + + if (addr < d[0]) + break; + addr -= d[0]; /* subtract base */ + match = (addr < cmd->arg1) && + ( d[ 1 + (addr>>5)] & + (1<<(addr & 0x1f)) ); + } + break; + + case O_IP_DST: + match = (hlen > 0 && + ((ipfw_insn_ip *)cmd)->addr.s_addr == + dst_ip.s_addr); + break; + + case O_IP_DST_ME: + if (hlen > 0) { + struct ifnet *tif; + + INADDR_TO_IFP(dst_ip, tif); + match = (tif != NULL); + } + break; + + case O_IP_SRCPORT: + case O_IP_DSTPORT: + /* + * offset == 0 && proto != 0 is enough + * to guarantee that we have an IPv4 + * packet with port info. + */ + if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) + && offset == 0) { + u_int16_t x = + (cmd->opcode == O_IP_SRCPORT) ? + src_port : dst_port ; + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; !match && i>0; + i--, p += 2) + match = (x>=p[0] && x<=p[1]); + } + break; + + case O_ICMPTYPE: + match = (offset == 0 && proto==IPPROTO_ICMP && + icmptype_match(ip, (ipfw_insn_u32 *)cmd) ); + break; + + case O_IPOPT: + match = (hlen > 0 && ipopts_match(ip, cmd) ); + break; + + case O_IPVER: + match = (hlen > 0 && cmd->arg1 == ip->ip_v); + break; + + case O_IPID: + case O_IPLEN: + case O_IPTTL: + if (hlen > 0) { /* only for IP packets */ + uint16_t x; + uint16_t *p; + int i; + + if (cmd->opcode == O_IPLEN) + x = ip_len; + else if (cmd->opcode == O_IPTTL) + x = ip->ip_ttl; + else /* must be IPID */ + x = ntohs(ip->ip_id); + if (cmdlen == 1) { + match = (cmd->arg1 == x); + break; + } + /* otherwise we have ranges */ + p = ((ipfw_insn_u16 *)cmd)->ports; + i = cmdlen - 1; + for (; !match && i>0; i--, p += 2) + match = (x >= p[0] && x <= p[1]); + } + break; + + case O_IPPRECEDENCE: + match = (hlen > 0 && + (cmd->arg1 == (ip->ip_tos & 0xe0)) ); + break; + + case O_IPTOS: + match = (hlen > 0 && + flags_match(cmd, ip->ip_tos)); + break; + + case O_TCPFLAGS: + match = (proto == IPPROTO_TCP && offset == 0 && + flags_match(cmd, + L3HDR(struct tcphdr,ip)->th_flags)); + break; + + case O_TCPOPTS: + match = (proto == IPPROTO_TCP && offset == 0 && + tcpopts_match(ip, cmd)); + break; + + case O_TCPSEQ: + match = (proto == IPPROTO_TCP && offset == 0 && + ((ipfw_insn_u32 *)cmd)->d[0] == + L3HDR(struct tcphdr,ip)->th_seq); + break; + + case O_TCPACK: + match = (proto == IPPROTO_TCP && offset == 0 && + ((ipfw_insn_u32 *)cmd)->d[0] == + L3HDR(struct tcphdr,ip)->th_ack); + break; + + case O_TCPWIN: + match = (proto == IPPROTO_TCP && offset == 0 && + cmd->arg1 == + L3HDR(struct tcphdr,ip)->th_win); + break; + + case O_ESTAB: + /* reject packets which have SYN only */ + /* XXX should i also check for TH_ACK ? */ + match = (proto == IPPROTO_TCP && offset == 0 && + (L3HDR(struct tcphdr,ip)->th_flags & + (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); + break; + + case O_LOG: + if (fw_verbose) + ipfw_log(f, hlen, args->eh, m, oif); + match = 1; + break; + + case O_PROB: + match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); + break; + + case O_VERREVPATH: + /* Outgoing packets automatically pass/match */ + match = ((oif != NULL) || + (m->m_pkthdr.rcvif == NULL) || + verify_rev_path(src_ip, m->m_pkthdr.rcvif)); + break; + + case O_IPSEC: +#ifdef FAST_IPSEC + match = (m_tag_find(m, + PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); +#endif +#ifdef IPSEC + match = (ipsec_gethist(m, NULL) != NULL); +#endif + /* otherwise no match */ + break; + + /* + * The second set of opcodes represents 'actions', + * i.e. the terminal part of a rule once the packet + * matches all previous patterns. + * Typically there is only one action for each rule, + * and the opcode is stored at the end of the rule + * (but there are exceptions -- see below). + * + * In general, here we set retval and terminate the + * outer loop (would be a 'break 3' in some language, + * but we need to do a 'goto done'). + * + * Exceptions: + * O_COUNT and O_SKIPTO actions: + * instead of terminating, we jump to the next rule + * ('goto next_rule', equivalent to a 'break 2'), + * or to the SKIPTO target ('goto again' after + * having set f, cmd and l), respectively. + * + * O_LIMIT and O_KEEP_STATE: these opcodes are + * not real 'actions', and are stored right + * before the 'action' part of the rule. + * These opcodes try to install an entry in the + * state tables; if successful, we continue with + * the next opcode (match=1; break;), otherwise + * the packet * must be dropped + * ('goto done' after setting retval); + * + * O_PROBE_STATE and O_CHECK_STATE: these opcodes + * cause a lookup of the state table, and a jump + * to the 'action' part of the parent rule + * ('goto check_body') if an entry is found, or + * (CHECK_STATE only) a jump to the next rule if + * the entry is not found ('goto next_rule'). + * The result of the lookup is cached to make + * further instances of these opcodes are + * effectively NOPs. + */ + case O_LIMIT: + case O_KEEP_STATE: + if (install_state(f, + (ipfw_insn_limit *)cmd, args)) { + retval = IP_FW_PORT_DENY_FLAG; + goto done; /* error/limit violation */ + } + match = 1; + break; + + case O_PROBE_STATE: + case O_CHECK_STATE: + /* + * dynamic rules are checked at the first + * keep-state or check-state occurrence, + * with the result being stored in dyn_dir. + * The compiler introduces a PROBE_STATE + * instruction for us when we have a + * KEEP_STATE (because PROBE_STATE needs + * to be run first). + */ + if (dyn_dir == MATCH_UNKNOWN && + (q = lookup_dyn_rule(&args->f_id, + &dyn_dir, proto == IPPROTO_TCP ? + L3HDR(struct tcphdr, ip) : NULL)) + != NULL) { + /* + * Found dynamic entry, update stats + * and jump to the 'action' part of + * the parent rule. + */ + q->pcnt++; + q->bcnt += pktlen; + f = q->rule; + cmd = ACTION_PTR(f); + l = f->cmd_len - f->act_ofs; + goto check_body; + } + /* + * Dynamic entry not found. If CHECK_STATE, + * skip to next rule, if PROBE_STATE just + * ignore and continue with next opcode. + */ + if (cmd->opcode == O_CHECK_STATE) + goto next_rule; + match = 1; + break; + + case O_ACCEPT: + retval = 0; /* accept */ + goto done; + + case O_PIPE: + case O_QUEUE: + args->rule = f; /* report matching rule */ + retval = cmd->arg1 | IP_FW_PORT_DYNT_FLAG; + goto done; + + case O_DIVERT: + case O_TEE: + if (args->eh) /* not on layer 2 */ + break; + args->divert_rule = f->rulenum; + retval = (cmd->opcode == O_DIVERT) ? + cmd->arg1 : + cmd->arg1 | IP_FW_PORT_TEE_FLAG; + goto done; + + case O_COUNT: + case O_SKIPTO: + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = timenow.tv_sec; + if (cmd->opcode == O_COUNT) + goto next_rule; + /* handle skipto */ + if (f->next_rule == NULL) + lookup_next_rule(f); + f = f->next_rule; + goto again; + + case O_REJECT: + /* + * Drop the packet and send a reject notice + * if the packet is not ICMP (or is an ICMP + * query), and it is not multicast/broadcast. + */ + if (hlen > 0 && + (proto != IPPROTO_ICMP || + is_icmp_query(ip)) && + !(m->m_flags & (M_BCAST|M_MCAST)) && + !IN_MULTICAST(dst_ip.s_addr)) { + send_reject(args, cmd->arg1, + offset,ip_len); + m = args->m; + } + /* FALLTHROUGH */ + case O_DENY: + retval = IP_FW_PORT_DENY_FLAG; + goto done; + + case O_FORWARD_IP: + if (args->eh) /* not valid on layer2 pkts */ + break; + if (!q || dyn_dir == MATCH_FORWARD) + args->next_hop = + &((ipfw_insn_sa *)cmd)->sa; + retval = 0; + goto done; + + default: + panic("-- unknown opcode %d\n", cmd->opcode); + } /* end of switch() on opcodes */ + + if (cmd->len & F_NOT) + match = !match; + + if (match) { + if (cmd->len & F_OR) + skip_or = 1; + } else { + if (!(cmd->len & F_OR)) /* not an OR block, */ + break; /* try next rule */ + } + + } /* end of inner for, scan opcodes */ + +next_rule:; /* try next rule */ + + } /* end of outer for, scan rules */ + printf("ipfw: ouch!, skip past end of rules, denying packet\n"); + lck_mtx_unlock(ipfw_mutex); + return(IP_FW_PORT_DENY_FLAG); + +done: + /* Update statistics */ + f->pcnt++; + f->bcnt += pktlen; + f->timestamp = timenow.tv_sec; + lck_mtx_unlock(ipfw_mutex); + return retval; + +pullup_failed: + if (fw_verbose) + printf("ipfw: pullup failed\n"); + lck_mtx_unlock(ipfw_mutex); + return(IP_FW_PORT_DENY_FLAG); +} + +/* + * When a rule is added/deleted, clear the next_rule pointers in all rules. + * These will be reconstructed on the fly as packets are matched. + * Must be called at splimp(). + */ +static void +flush_rule_ptrs(void) +{ + struct ip_fw *rule; + + for (rule = layer3_chain; rule; rule = rule->next) + rule->next_rule = NULL; +} + +/* + * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given + * pipe/queue, or to all of them (match == NULL). + * Must be called at splimp(). + */ +void +flush_pipe_ptrs(struct dn_flow_set *match) +{ + struct ip_fw *rule; + + for (rule = layer3_chain; rule; rule = rule->next) { + ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule); + + if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE) + continue; + /* + * XXX Use bcmp/bzero to handle pipe_ptr to overcome + * possible alignment problems on 64-bit architectures. + * This code is seldom used so we do not worry too + * much about efficiency. + */ + if (match == NULL || + !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) ) + bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr)); + } +} + +/* + * Add a new rule to the list. Copy the rule into a malloc'ed area, then + * possibly create a rule number and add the rule to the list. + * Update the rule_number in the input struct so the caller knows it as well. + */ +static int +add_rule(struct ip_fw **head, struct ip_fw *input_rule) +{ + struct ip_fw *rule, *f, *prev; + int s; + int l = RULESIZE(input_rule); + + if (*head == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) + return (EINVAL); + + rule = _MALLOC(l, M_IPFW, M_WAIT); + if (rule == NULL) { + printf("ipfw2: add_rule MALLOC failed\n"); + return (ENOSPC); + } + + bzero(rule, l); + bcopy(input_rule, rule, l); + + rule->next = NULL; + rule->next_rule = NULL; + + rule->pcnt = 0; + rule->bcnt = 0; + rule->timestamp = 0; + + if (*head == NULL) { /* default rule */ + *head = rule; + goto done; + } + + /* + * If rulenum is 0, find highest numbered rule before the + * default rule, and add autoinc_step + */ + if (autoinc_step < 1) + autoinc_step = 1; + else if (autoinc_step > 1000) + autoinc_step = 1000; + if (rule->rulenum == 0) { + /* + * locate the highest numbered rule before default + */ + for (f = *head; f; f = f->next) { + if (f->rulenum == IPFW_DEFAULT_RULE) + break; + rule->rulenum = f->rulenum; + } + if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) + rule->rulenum += autoinc_step; + input_rule->rulenum = rule->rulenum; + } + + /* + * Now insert the new rule in the right place in the sorted list. + */ + for (prev = NULL, f = *head; f; prev = f, f = f->next) { + if (f->rulenum > rule->rulenum) { /* found the location */ + if (prev) { + rule->next = f; + prev->next = rule; + } else { /* head insert */ + rule->next = *head; + *head = rule; + } + break; + } + } + flush_rule_ptrs(); +done: + static_count++; + static_len += l; + DEB(printf("ipfw: installed rule %d, static count now %d\n", + rule->rulenum, static_count);) + return (0); +} + +/** + * Free storage associated with a static rule (including derived + * dynamic rules). + * The caller is in charge of clearing rule pointers to avoid + * dangling pointers. + * @return a pointer to the next entry. + * Arguments are not checked, so they better be correct. + * Must be called at splimp(). + */ +static struct ip_fw * +delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule) +{ + struct ip_fw *n; + int l = RULESIZE(rule); + + n = rule->next; + remove_dyn_rule(rule, NULL /* force removal */); + if (prev == NULL) + *head = n; + else + prev->next = n; + static_count--; + static_len -= l; + +#if DUMMYNET + if (DUMMYNET_LOADED) + ip_dn_ruledel_ptr(rule); +#endif /* DUMMYNET */ + _FREE(rule, M_IPFW); + return n; +} + +#if DEBUG_INACTIVE_RULES +static void +print_chain(struct ip_fw **chain) +{ + struct ip_fw *rule = *chain; + + for (; rule; rule = rule->next) { + ipfw_insn *cmd = ACTION_PTR(rule); + + printf("ipfw: rule->rulenum = %d\n", rule->rulenum); + + if (rule->reserved_1 == IPFW_RULE_INACTIVE) { + printf("ipfw: rule->reserved = IPFW_RULE_INACTIVE\n"); + } + + switch (cmd->opcode) { + case O_DENY: + printf("ipfw: ACTION: Deny\n"); + break; + + case O_REJECT: + if (cmd->arg1==ICMP_REJECT_RST) + printf("ipfw: ACTION: Reset\n"); + else if (cmd->arg1==ICMP_UNREACH_HOST) + printf("ipfw: ACTION: Reject\n"); + break; + + case O_ACCEPT: + printf("ipfw: ACTION: Accept\n"); + break; + case O_COUNT: + printf("ipfw: ACTION: Count\n"); + break; + case O_DIVERT: + printf("ipfw: ACTION: Divert\n"); + break; + case O_TEE: + printf("ipfw: ACTION: Tee\n"); + break; + case O_SKIPTO: + printf("ipfw: ACTION: SkipTo\n"); + break; + case O_PIPE: + printf("ipfw: ACTION: Pipe\n"); + break; + case O_QUEUE: + printf("ipfw: ACTION: Queue\n"); + break; + case O_FORWARD_IP: + printf("ipfw: ACTION: Forward\n"); + break; + default: + printf("ipfw: invalid action! %d\n", cmd->opcode); + } + } +} +#endif /* DEBUG_INACTIVE_RULES */ + +static void +flush_inactive(void *param) +{ + struct ip_fw *inactive_rule = (struct ip_fw *)param; + struct ip_fw *rule, *prev; + + lck_mtx_lock(ipfw_mutex); + + for (rule = layer3_chain, prev = NULL; rule; ) { + if (rule == inactive_rule && rule->reserved_1 == IPFW_RULE_INACTIVE) { + struct ip_fw *n = rule; + + if (prev == NULL) { + layer3_chain = rule->next; + } + else { + prev->next = rule->next; + } + rule = rule->next; + _FREE(n, M_IPFW); + } + else { + prev = rule; + rule = rule->next; + } + } + +#if DEBUG_INACTIVE_RULES + print_chain(&layer3_chain); +#endif + lck_mtx_unlock(ipfw_mutex); +} + +static void +mark_inactive(struct ip_fw **prev, struct ip_fw **rule) +{ + int l = RULESIZE(*rule); + + if ((*rule)->reserved_1 != IPFW_RULE_INACTIVE) { + (*rule)->reserved_1 = IPFW_RULE_INACTIVE; + static_count--; + static_len -= l; + + timeout(flush_inactive, *rule, 30*hz); /* 30 sec. */ + } + + *prev = *rule; + *rule = (*rule)->next; +} + +/* + * Deletes all rules from a chain (except rules in set RESVD_SET + * unless kill_default = 1). + * Must be called at splimp(). + */ +static void +free_chain(struct ip_fw **chain, int kill_default) +{ + struct ip_fw *prev, *rule; + + flush_rule_ptrs(); /* more efficient to do outside the loop */ + for (prev = NULL, rule = *chain; rule ; ) + if (kill_default || rule->set != RESVD_SET) { + ipfw_insn *cmd = ACTION_PTR(rule); + + /* skip over forwarding rules so struct isn't + * deleted while pointer is still in use elsewhere + */ + if (cmd->opcode == O_FORWARD_IP) { + mark_inactive(&prev, &rule); + } + else { + rule = delete_rule(chain, prev, rule); + } + } + else { + prev = rule; + rule = rule->next; + } +} + +/** + * Remove all rules with given number, and also do set manipulation. + * Assumes chain != NULL && *chain != NULL. + * + * The argument is an u_int32_t. The low 16 bit are the rule or set number, + * the next 8 bits are the new set, the top 8 bits are the command: + * + * 0 delete rules with given number + * 1 delete rules with given set number + * 2 move rules with given number to new set + * 3 move rules with given set number to new set + * 4 swap sets with given numbers + */ +static int +del_entry(struct ip_fw **chain, u_int32_t arg) +{ + struct ip_fw *prev = NULL, *rule = *chain; + int s; + u_int16_t rulenum; /* rule or old_set */ + u_int8_t cmd, new_set; + + rulenum = arg & 0xffff; + cmd = (arg >> 24) & 0xff; + new_set = (arg >> 16) & 0xff; + + if (cmd > 4) + return EINVAL; + if (new_set > RESVD_SET) + return EINVAL; + if (cmd == 0 || cmd == 2) { + if (rulenum >= IPFW_DEFAULT_RULE) + return EINVAL; + } else { + if (rulenum > RESVD_SET) /* old_set */ + return EINVAL; + } + + switch (cmd) { + case 0: /* delete rules with given number */ + /* + * locate first rule to delete + */ + for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) + ; + if (rule->rulenum != rulenum) + return EINVAL; + + /* + * flush pointers outside the loop, then delete all matching + * rules. prev remains the same throughout the cycle. + */ + flush_rule_ptrs(); + while (rule->rulenum == rulenum) { + ipfw_insn *cmd = ACTION_PTR(rule); + + /* keep forwarding rules around so struct isn't + * deleted while pointer is still in use elsewhere + */ + if (cmd->opcode == O_FORWARD_IP) { + mark_inactive(&prev, &rule); + } + else { + rule = delete_rule(chain, prev, rule); + } + } + break; + + case 1: /* delete all rules with given set number */ + flush_rule_ptrs(); + while (rule->rulenum < IPFW_DEFAULT_RULE) { + if (rule->set == rulenum) { + ipfw_insn *cmd = ACTION_PTR(rule); + + /* keep forwarding rules around so struct isn't + * deleted while pointer is still in use elsewhere + */ + if (cmd->opcode == O_FORWARD_IP) { + mark_inactive(&prev, &rule); + } + else { + rule = delete_rule(chain, prev, rule); + } + } + else { + prev = rule; + rule = rule->next; + } + } + break; + + case 2: /* move rules with given number to new set */ + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->rulenum == rulenum) + rule->set = new_set; + break; + + case 3: /* move rules with given set number to new set */ + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->set == rulenum) + rule->set = new_set; + break; + + case 4: /* swap two sets */ + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->set == rulenum) + rule->set = new_set; + else if (rule->set == new_set) + rule->set = rulenum; + break; + } + return 0; +} + +/* + * Clear counters for a specific rule. + */ +static void +clear_counters(struct ip_fw *rule, int log_only) +{ + ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); + + if (log_only == 0) { + rule->bcnt = rule->pcnt = 0; + rule->timestamp = 0; + } + if (l->o.opcode == O_LOG) + l->log_left = l->max_log; +} + +/** + * Reset some or all counters on firewall rules. + * @arg frwl is null to clear all entries, or contains a specific + * rule number. + * @arg log_only is 1 if we only want to reset logs, zero otherwise. + */ +static int +zero_entry(int rulenum, int log_only) +{ + struct ip_fw *rule; + int s; + char *msg; + + if (rulenum == 0) { + norule_counter = 0; + for (rule = layer3_chain; rule; rule = rule->next) + clear_counters(rule, log_only); + msg = log_only ? "ipfw: All logging counts reset.\n" : + "ipfw: Accounting cleared.\n"; + } else { + int cleared = 0; + /* + * We can have multiple rules with the same number, so we + * need to clear them all. + */ + for (rule = layer3_chain; rule; rule = rule->next) + if (rule->rulenum == rulenum) { + while (rule && rule->rulenum == rulenum) { + clear_counters(rule, log_only); + rule = rule->next; + } + cleared = 1; + break; + } + if (!cleared) /* we did not find any matching rules */ + return (EINVAL); + msg = log_only ? "ipfw: Entry %d logging count reset.\n" : + "ipfw: Entry %d cleared.\n"; + } + if (fw_verbose) + { + dolog((LOG_AUTHPRIV | LOG_NOTICE, msg, rulenum)); + } + return (0); +} + +/* + * Check validity of the structure before insert. + * Fortunately rules are simple, so this mostly need to check rule sizes. + */ +static int +check_ipfw_struct(struct ip_fw *rule, int size) +{ + int l, cmdlen = 0; + int have_action=0; + ipfw_insn *cmd; + + if (size < sizeof(*rule)) { + printf("ipfw: rule too short\n"); + return (EINVAL); + } + /* first, check for valid size */ + l = RULESIZE(rule); + if (l != size) { + printf("ipfw: size mismatch (have %d want %d)\n", size, l); + return (EINVAL); + } + /* + * Now go for the individual checks. Very simple ones, basically only + * instruction sizes. + */ + for (l = rule->cmd_len, cmd = rule->cmd ; + l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (cmdlen > l) { + printf("ipfw: opcode %d size truncated\n", + cmd->opcode); + return EINVAL; + } + DEB(printf("ipfw: opcode %d\n", cmd->opcode);) + switch (cmd->opcode) { + case O_PROBE_STATE: + case O_KEEP_STATE: + case O_PROTO: + case O_IP_SRC_ME: + case O_IP_DST_ME: + case O_LAYER2: + case O_IN: + case O_FRAG: + case O_IPOPT: + case O_IPTOS: + case O_IPPRECEDENCE: + case O_IPVER: + case O_TCPWIN: + case O_TCPFLAGS: + case O_TCPOPTS: + case O_ESTAB: + case O_VERREVPATH: + case O_IPSEC: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + break; + case O_UID: +#ifndef __APPLE__ + case O_GID: +#endif /* __APPLE__ */ + case O_IP_SRC: + case O_IP_DST: + case O_TCPSEQ: + case O_TCPACK: + case O_PROB: + case O_ICMPTYPE: + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + break; + + case O_LIMIT: + if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) + goto bad_size; + break; + + case O_LOG: + if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) + goto bad_size; + + /* enforce logging limit */ + if (fw_verbose && + ((ipfw_insn_log *)cmd)->max_log == 0 && verbose_limit != 0) { + ((ipfw_insn_log *)cmd)->max_log = verbose_limit; + } + + ((ipfw_insn_log *)cmd)->log_left = + ((ipfw_insn_log *)cmd)->max_log; + + break; + + case O_IP_SRC_MASK: + case O_IP_DST_MASK: + /* only odd command lengths */ + if ( !(cmdlen & 1) || cmdlen > 31) + goto bad_size; + break; + + case O_IP_SRC_SET: + case O_IP_DST_SET: + if (cmd->arg1 == 0 || cmd->arg1 > 256) { + printf("ipfw: invalid set size %d\n", + cmd->arg1); + return EINVAL; + } + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + + (cmd->arg1+31)/32 ) + goto bad_size; + break; + + case O_MACADDR2: + if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) + goto bad_size; + break; + + case O_NOP: + case O_IPID: + case O_IPTTL: + case O_IPLEN: + if (cmdlen < 1 || cmdlen > 31) + goto bad_size; + break; + + case O_MAC_TYPE: + case O_IP_SRCPORT: + case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ + if (cmdlen < 2 || cmdlen > 31) + goto bad_size; + break; + + case O_RECV: + case O_XMIT: + case O_VIA: + if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) + goto bad_size; + break; + + case O_PIPE: + case O_QUEUE: + if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe)) + goto bad_size; + goto check_action; + + case O_FORWARD_IP: + if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) + goto bad_size; + goto check_action; + + case O_FORWARD_MAC: /* XXX not implemented yet */ + case O_CHECK_STATE: + case O_COUNT: + case O_ACCEPT: + case O_DENY: + case O_REJECT: + case O_SKIPTO: + case O_DIVERT: + case O_TEE: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; +check_action: + if (have_action) { + printf("ipfw: opcode %d, multiple actions" + " not allowed\n", + cmd->opcode); + return EINVAL; + } + have_action = 1; + if (l != cmdlen) { + printf("ipfw: opcode %d, action must be" + " last opcode\n", + cmd->opcode); + return EINVAL; + } + break; + default: + printf("ipfw: opcode %d, unknown opcode\n", + cmd->opcode); + return EINVAL; + } + } + if (have_action == 0) { + printf("ipfw: missing action\n"); + return EINVAL; + } + return 0; + +bad_size: + printf("ipfw: opcode %d size %d wrong\n", + cmd->opcode, cmdlen); + return EINVAL; +} + + +/** + * {set|get}sockopt parser. + */ +static int +ipfw_ctl(struct sockopt *sopt) +{ +#define RULE_MAXSIZE (256*sizeof(u_int32_t)) + u_int32_t api_version; + int command; + int error, s; + size_t size; + struct ip_fw *bp , *buf, *rule; + + /* copy of orig sopt to send to ipfw_get_command_and_version() */ + struct sockopt tmp_sopt = *sopt; + struct timeval timenow; + + getmicrotime(&timenow); + + /* + * Disallow modifications in really-really secure mode, but still allow + * the logging counters to be reset. + */ + if (sopt->sopt_name == IP_FW_ADD || + (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { +#if __FreeBSD_version >= 500034 + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error) + return (error); +#else /* FreeBSD 4.x */ + if (securelevel >= 3) + return (EPERM); +#endif + } + + /* first get the command and version, then do conversion as necessary */ + error = ipfw_get_command_and_version(&tmp_sopt, &command, &api_version); + + if (error) { + /* error getting the version */ + return error; + } + + switch (command) { + case IP_FW_GET: + /* + * pass up a copy of the current rules. Static rules + * come first (the last of which has number IPFW_DEFAULT_RULE), + * followed by a possibly empty list of dynamic rule. + * The last dynamic rule has NULL in the "next" field. + */ + lck_mtx_lock(ipfw_mutex); + size = static_len; /* size of static rules */ + if (ipfw_dyn_v) /* add size of dyn.rules */ + size += (dyn_count * sizeof(ipfw_dyn_rule)); + + /* + * XXX todo: if the user passes a short length just to know + * how much room is needed, do not bother filling up the + * buffer, just jump to the sooptcopyout. + */ + buf = _MALLOC(size, M_TEMP, M_WAITOK); + if (buf == 0) { + lck_mtx_unlock(ipfw_mutex); + error = ENOBUFS; + break; + } + + bzero(buf, size); + + bp = buf; + for (rule = layer3_chain; rule ; rule = rule->next) { + int i = RULESIZE(rule); + + if (rule->reserved_1 == IPFW_RULE_INACTIVE) { + continue; + } + bcopy(rule, bp, i); + bcopy(&set_disable, &(bp->next_rule), + sizeof(set_disable)); + bp = (struct ip_fw *)((char *)bp + i); + } + if (ipfw_dyn_v) { + int i; + ipfw_dyn_rule *p, *dst, *last = NULL; + + dst = (ipfw_dyn_rule *)bp; + for (i = 0 ; i < curr_dyn_buckets ; i++ ) + for ( p = ipfw_dyn_v[i] ; p != NULL ; + p = p->next, dst++ ) { + bcopy(p, dst, sizeof *p); + bcopy(&(p->rule->rulenum), &(dst->rule), + sizeof(p->rule->rulenum)); + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + bcopy(&dst, &dst->next, sizeof(dst)); + last = dst ; + dst->expire = + TIME_LEQ(dst->expire, timenow.tv_sec) ? + 0 : dst->expire - timenow.tv_sec ; + } + if (last != NULL) /* mark last dynamic rule */ + bzero(&last->next, sizeof(last)); + } + lck_mtx_unlock(ipfw_mutex); + + /* convert back if necessary and copyout */ + if (api_version == IP_FW_VERSION_0) { + int i, len = 0; + struct ip_old_fw *buf2, *rule_vers0; + + buf2 = _MALLOC(static_count * sizeof(struct ip_old_fw), M_TEMP, M_WAITOK); + if (buf2 == 0) { + error = ENOBUFS; + } + + if (!error) { + bp = buf; + rule_vers0 = buf2; + + for (i = 0; i < static_count; i++) { + /* static rules have different sizes */ + int j = RULESIZE(bp); + ipfw_convert_from_latest(bp, rule_vers0, api_version); + bp = (struct ip_fw *)((char *)bp + j); + len += sizeof(*rule_vers0); + rule_vers0++; + } + error = sooptcopyout(sopt, buf2, len); + _FREE(buf2, M_TEMP); + } + } else if (api_version == IP_FW_VERSION_1) { + int i, len = 0, buf_size; + struct ip_fw_compat *buf2, *rule_vers1; + struct ipfw_dyn_rule_compat *dyn_rule_vers1, *dyn_last = NULL; + ipfw_dyn_rule *p; + + buf_size = static_count * sizeof(struct ip_fw_compat) + + dyn_count * sizeof(struct ipfw_dyn_rule_compat); + + buf2 = _MALLOC(buf_size, M_TEMP, M_WAITOK); + if (buf2 == 0) { + error = ENOBUFS; + } + + if (!error) { + bp = buf; + rule_vers1 = buf2; + + /* first do static rules */ + for (i = 0; i < static_count; i++) { + /* static rules have different sizes */ + int j = RULESIZE(bp); + ipfw_convert_from_latest(bp, rule_vers1, api_version); + bp = (struct ip_fw *)((char *)bp + j); + len += sizeof(*rule_vers1); + rule_vers1++; + } + + /* now do dynamic rules */ + dyn_rule_vers1 = (struct ipfw_dyn_rule_compat *)rule_vers1; + if (ipfw_dyn_v) { + for (i = 0; i < curr_dyn_buckets; i++) { + for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) { + (int) dyn_rule_vers1->chain = p->rule->rulenum; + dyn_rule_vers1->id = p->id; + dyn_rule_vers1->mask = p->id; + dyn_rule_vers1->type = p->dyn_type; + dyn_rule_vers1->expire = p->expire; + dyn_rule_vers1->pcnt = p->pcnt; + dyn_rule_vers1->bcnt = p->bcnt; + dyn_rule_vers1->bucket = p->bucket; + dyn_rule_vers1->state = p->state; + + dyn_rule_vers1->next = dyn_rule_vers1; + dyn_last = dyn_rule_vers1; + + len += sizeof(*dyn_rule_vers1); + dyn_rule_vers1++; + } + } + + if (dyn_last != NULL) { + dyn_last->next = NULL; + } + } + + error = sooptcopyout(sopt, buf2, len); + _FREE(buf2, M_TEMP); + } + } else { + error = sooptcopyout(sopt, buf, size); + } + + _FREE(buf, M_TEMP); + break; + + case IP_FW_FLUSH: + /* + * Normally we cannot release the lock on each iteration. + * We could do it here only because we start from the head all + * the times so there is no risk of missing some entries. + * On the other hand, the risk is that we end up with + * a very inconsistent ruleset, so better keep the lock + * around the whole cycle. + * + * XXX this code can be improved by resetting the head of + * the list to point to the default rule, and then freeing + * the old list without the need for a lock. + */ + + lck_mtx_lock(ipfw_mutex); + free_chain(&layer3_chain, 0 /* keep default rule */); +#if DEBUG_INACTIVE_RULES + print_chain(&layer3_chain); +#endif + lck_mtx_unlock(ipfw_mutex); + break; + + case IP_FW_ADD: + rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK); + if (rule == 0) { + error = ENOBUFS; + break; + } + + bzero(rule, RULE_MAXSIZE); + + if (api_version != IP_FW_CURRENT_API_VERSION) { + error = ipfw_convert_to_latest(sopt, rule, api_version); + } + else { + error = sooptcopyin(sopt, rule, RULE_MAXSIZE, + sizeof(struct ip_fw) ); + } + + if (!error) { + if ((api_version == IP_FW_VERSION_0) || (api_version == IP_FW_VERSION_1)) { + /* the rule has already been checked so just + * adjust sopt_valsize to match what would be expected. + */ + sopt->sopt_valsize = RULESIZE(rule); + } + error = check_ipfw_struct(rule, sopt->sopt_valsize); + if (!error) { + lck_mtx_lock(ipfw_mutex); + error = add_rule(&layer3_chain, rule); + lck_mtx_unlock(ipfw_mutex); + + size = RULESIZE(rule); + if (!error && sopt->sopt_dir == SOPT_GET) { + /* convert back if necessary and copyout */ + if (api_version == IP_FW_VERSION_0) { + struct ip_old_fw rule_vers0; + + ipfw_convert_from_latest(rule, &rule_vers0, api_version); + sopt->sopt_valsize = sizeof(struct ip_old_fw); + + error = sooptcopyout(sopt, &rule_vers0, sizeof(struct ip_old_fw)); + } else if (api_version == IP_FW_VERSION_1) { + struct ip_fw_compat rule_vers1; + + ipfw_convert_from_latest(rule, &rule_vers1, api_version); + sopt->sopt_valsize = sizeof(struct ip_fw_compat); + + error = sooptcopyout(sopt, &rule_vers1, sizeof(struct ip_fw_compat)); + } else { + error = sooptcopyout(sopt, rule, size); + } + } + } + } + + _FREE(rule, M_TEMP); + break; + + case IP_FW_DEL: + { + /* + * IP_FW_DEL is used for deleting single rules or sets, + * and (ab)used to atomically manipulate sets. + * rule->set_masks is used to distinguish between the two: + * rule->set_masks[0] == 0 + * delete single rule or set of rules, + * or reassign rules (or sets) to a different set. + * rule->set_masks[0] != 0 + * atomic disable/enable sets. + * rule->set_masks[0] contains sets to be disabled, + * rule->set_masks[1] contains sets to be enabled. + */ + /* there is only a simple rule passed in + * (no cmds), so use a temp struct to copy + */ + struct ip_fw temp_rule = { 0 }; + + if (api_version != IP_FW_CURRENT_API_VERSION) { + error = ipfw_convert_to_latest(sopt, &temp_rule, api_version); + } + else { + error = sooptcopyin(sopt, &temp_rule, sizeof(struct ip_fw), + sizeof(struct ip_fw) ); + } + + if (!error) { + /* set_masks is used to distinguish between deleting + * single rules or atomically manipulating sets + */ + lck_mtx_lock(ipfw_mutex); + + if (temp_rule.set_masks[0] != 0) { + /* set manipulation */ + set_disable = + (set_disable | temp_rule.set_masks[0]) & ~temp_rule.set_masks[1] & + ~(1<rulenum */ + { + /* there is only a simple rule passed in + * (no cmds), so use a temp struct to copy + */ + struct ip_fw temp_rule = { 0 }; + + if (api_version != IP_FW_CURRENT_API_VERSION) { + error = ipfw_convert_to_latest(sopt, &temp_rule, api_version); + } + else { + if (sopt->sopt_val != 0) { + error = sooptcopyin(sopt, &temp_rule, sizeof(struct ip_fw), + sizeof(struct ip_fw) ); + } + } + + if (!error) { + lck_mtx_lock(ipfw_mutex); + error = zero_entry(temp_rule.rulenum, sopt->sopt_name == IP_FW_RESETLOG); + lck_mtx_unlock(ipfw_mutex); + } + break; + } + default: + printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); + error = EINVAL; + } + + return (error); +} + +/** + * dummynet needs a reference to the default rule, because rules can be + * deleted while packets hold a reference to them. When this happens, + * dummynet changes the reference to the default rule (it could well be a + * NULL pointer, but this way we do not need to check for the special + * case, plus here he have info on the default behaviour). + */ +struct ip_fw *ip_fw_default_rule; + +/* + * This procedure is only used to handle keepalives. It is invoked + * every dyn_keepalive_period + */ +static void +ipfw_tick(void * __unused unused) +{ + int i; + int s; + ipfw_dyn_rule *q; + struct timeval timenow; + + + if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) + goto done; + + getmicrotime(&timenow); + + lck_mtx_lock(ipfw_mutex); + for (i = 0 ; i < curr_dyn_buckets ; i++) { + for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { + if (q->dyn_type == O_LIMIT_PARENT) + continue; + if (q->id.proto != IPPROTO_TCP) + continue; + if ( (q->state & BOTH_SYN) != BOTH_SYN) + continue; + if (TIME_LEQ( timenow.tv_sec+dyn_keepalive_interval, + q->expire)) + continue; /* too early */ + if (TIME_LEQ(q->expire, timenow.tv_sec)) + continue; /* too late, rule expired */ + + send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); + send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0); + } + } + lck_mtx_unlock(ipfw_mutex); +done: + timeout(ipfw_tick, NULL, dyn_keepalive_period*hz); +} + +void +ipfw_init(void) +{ + struct ip_fw default_rule; + + /* setup locks */ + ipfw_mutex_grp_attr = lck_grp_attr_alloc_init(); + ipfw_mutex_grp = lck_grp_alloc_init("ipfw", ipfw_mutex_grp_attr); + ipfw_mutex_attr = lck_attr_alloc_init(); + lck_attr_setdefault(ipfw_mutex_attr); + + if ((ipfw_mutex = lck_mtx_alloc_init(ipfw_mutex_grp, ipfw_mutex_attr)) == NULL) { + printf("ipfw_init: can't alloc ipfw_mutex\n"); + return; + } + + layer3_chain = NULL; + + bzero(&default_rule, sizeof default_rule); + + default_rule.act_ofs = 0; + default_rule.rulenum = IPFW_DEFAULT_RULE; + default_rule.cmd_len = 1; + default_rule.set = RESVD_SET; + + default_rule.cmd[0].len = 1; + default_rule.cmd[0].opcode = +#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT + 1 ? O_ACCEPT : +#endif + O_DENY; + + if (add_rule(&layer3_chain, &default_rule)) { + printf("ipfw2: add_rule failed adding default rule\n"); + printf("ipfw2 failed initialization!!\n"); + fw_enable = 0; + } + else { + ip_fw_default_rule = layer3_chain; +#if 0 + /* Radar 3920649, don't print unncessary messages to the log */ + printf("ipfw2 initialized, divert %s, " + "rule-based forwarding enabled, default to %s, logging ", + #ifdef IPDIVERT + "enabled", + #else + "disabled", + #endif + default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); +#endif + + #ifdef IPFIREWALL_VERBOSE + fw_verbose = 1; + #endif + #ifdef IPFIREWALL_VERBOSE_LIMIT + verbose_limit = IPFIREWALL_VERBOSE_LIMIT; + #endif + if (fw_verbose == 0) + printf("disabled\n"); + else if (verbose_limit == 0) + printf("unlimited\n"); + else + printf("limited to %d packets/entry by default\n", + verbose_limit); + } + + ip_fw_chk_ptr = ipfw_chk; + ip_fw_ctl_ptr = ipfw_ctl; + + ipfwstringlen = strlen( ipfwstring ); + + timeout(ipfw_tick, NULL, hz); +} + +#endif /* IPFW2 */ diff --git a/bsd/netinet/ip_fw2.h b/bsd/netinet/ip_fw2.h new file mode 100644 index 000000000..43dcf98ce --- /dev/null +++ b/bsd/netinet/ip_fw2.h @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_fw2.h,v 1.1.2.4 2003/07/17 06:03:39 luigi Exp $ + */ + +#ifndef _IPFW2_H +#define _IPFW2_H + +/* + * The kernel representation of ipfw rules is made of a list of + * 'instructions' (for all practical purposes equivalent to BPF + * instructions), which specify which fields of the packet + * (or its metadata) should be analysed. + * + * Each instruction is stored in a structure which begins with + * "ipfw_insn", and can contain extra fields depending on the + * instruction type (listed below). + * Note that the code is written so that individual instructions + * have a size which is a multiple of 32 bits. This means that, if + * such structures contain pointers or other 64-bit entities, + * (there is just one instance now) they may end up unaligned on + * 64-bit architectures, so the must be handled with care. + * + * "enum ipfw_opcodes" are the opcodes supported. We can have up + * to 256 different opcodes. + */ + +enum ipfw_opcodes { /* arguments (4 byte each) */ + O_NOP, + + O_IP_SRC, /* u32 = IP */ + O_IP_SRC_MASK, /* ip = IP/mask */ + O_IP_SRC_ME, /* none */ + O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_DST, /* u32 = IP */ + O_IP_DST_MASK, /* ip = IP/mask */ + O_IP_DST_ME, /* none */ + O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ + O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ + O_PROTO, /* arg1=protocol */ + + O_MACADDR2, /* 2 mac addr:mask */ + O_MAC_TYPE, /* same as srcport */ + + O_LAYER2, /* none */ + O_IN, /* none */ + O_FRAG, /* none */ + + O_RECV, /* none */ + O_XMIT, /* none */ + O_VIA, /* none */ + + O_IPOPT, /* arg1 = 2*u8 bitmap */ + O_IPLEN, /* arg1 = len */ + O_IPID, /* arg1 = id */ + + O_IPTOS, /* arg1 = id */ + O_IPPRECEDENCE, /* arg1 = precedence << 5 */ + O_IPTTL, /* arg1 = TTL */ + + O_IPVER, /* arg1 = version */ + O_UID, /* u32 = id */ + O_GID, /* u32 = id */ + O_ESTAB, /* none (tcp established) */ + O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ + O_TCPWIN, /* arg1 = desired win */ + O_TCPSEQ, /* u32 = desired seq. */ + O_TCPACK, /* u32 = desired seq. */ + O_ICMPTYPE, /* u32 = icmp bitmap */ + O_TCPOPTS, /* arg1 = 2*u8 bitmap */ + + O_VERREVPATH, /* none */ + + O_PROBE_STATE, /* none */ + O_KEEP_STATE, /* none */ + O_LIMIT, /* ipfw_insn_limit */ + O_LIMIT_PARENT, /* dyn_type, not an opcode. */ + + /* + * These are really 'actions'. + */ + + O_LOG, /* ipfw_insn_log */ + O_PROB, /* u32 = match probability */ + + O_CHECK_STATE, /* none */ + O_ACCEPT, /* none */ + O_DENY, /* none */ + O_REJECT, /* arg1=icmp arg (same as deny) */ + O_COUNT, /* none */ + O_SKIPTO, /* arg1=next rule number */ + O_PIPE, /* arg1=pipe number */ + O_QUEUE, /* arg1=queue number */ + O_DIVERT, /* arg1=port number */ + O_TEE, /* arg1=port number */ + O_FORWARD_IP, /* fwd sockaddr */ + O_FORWARD_MAC, /* fwd mac */ + + /* + * More opcodes. + */ + O_IPSEC, /* has ipsec history */ + + O_LAST_OPCODE /* not an opcode! */ +}; + +/* + * Template for instructions. + * + * ipfw_insn is used for all instructions which require no operands, + * a single 16-bit value (arg1), or a couple of 8-bit values. + * + * For other instructions which require different/larger arguments + * we have derived structures, ipfw_insn_*. + * + * The size of the instruction (in 32-bit words) is in the low + * 6 bits of "len". The 2 remaining bits are used to implement + * NOT and OR on individual instructions. Given a type, you can + * compute the length to be put in "len" using F_INSN_SIZE(t) + * + * F_NOT negates the match result of the instruction. + * + * F_OR is used to build or blocks. By default, instructions + * are evaluated as part of a logical AND. An "or" block + * { X or Y or Z } contains F_OR set in all but the last + * instruction of the block. A match will cause the code + * to skip past the last instruction of the block. + * + * NOTA BENE: in a couple of places we assume that + * sizeof(ipfw_insn) == sizeof(u_int32_t) + * this needs to be fixed. + * + */ +typedef struct _ipfw_insn { /* template for instructions */ + enum ipfw_opcodes opcode:8; + u_int8_t len; /* numer of 32-byte words */ +#define F_NOT 0x80 +#define F_OR 0x40 +#define F_LEN_MASK 0x3f +#define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) + + u_int16_t arg1; +} ipfw_insn; + +/* + * The F_INSN_SIZE(type) computes the size, in 4-byte words, of + * a given type. + */ +#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) + +/* + * This is used to store an array of 16-bit entries (ports etc.) + */ +typedef struct _ipfw_insn_u16 { + ipfw_insn o; + u_int16_t ports[2]; /* there may be more */ +} ipfw_insn_u16; + +/* + * This is used to store an array of 32-bit entries + * (uid, single IPv4 addresses etc.) + */ +typedef struct _ipfw_insn_u32 { + ipfw_insn o; + u_int32_t d[1]; /* one or more */ +} ipfw_insn_u32; + +/* + * This is used to store IP addr-mask pairs. + */ +typedef struct _ipfw_insn_ip { + ipfw_insn o; + struct in_addr addr; + struct in_addr mask; +} ipfw_insn_ip; + +/* + * This is used to forward to a given address (ip). + */ +typedef struct _ipfw_insn_sa { + ipfw_insn o; + struct sockaddr_in sa; +} ipfw_insn_sa; + +/* + * This is used for MAC addr-mask pairs. + */ +typedef struct _ipfw_insn_mac { + ipfw_insn o; + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ +} ipfw_insn_mac; + +/* + * This is used for interface match rules (recv xx, xmit xx). + */ +typedef struct _ipfw_insn_if { + ipfw_insn o; + union { + struct in_addr ip; + int32_t unit; + } p; + char name[IFNAMSIZ]; +} ipfw_insn_if; + +/* + * This is used for pipe and queue actions, which need to store + * a single pointer (which can have different size on different + * architectures. + * Note that, because of previous instructions, pipe_ptr might + * be unaligned in the overall structure, so it needs to be + * manipulated with care. + */ +typedef struct _ipfw_insn_pipe { + ipfw_insn o; + void *pipe_ptr; /* XXX */ +} ipfw_insn_pipe; + +/* + * This is used for limit rules. + */ +typedef struct _ipfw_insn_limit { + ipfw_insn o; + u_int8_t _pad; + u_int8_t limit_mask; /* combination of DYN_* below */ +#define DYN_SRC_ADDR 0x1 +#define DYN_SRC_PORT 0x2 +#define DYN_DST_ADDR 0x4 +#define DYN_DST_PORT 0x8 + + u_int16_t conn_limit; +} ipfw_insn_limit; + +/* + * This is used for log instructions. + */ +typedef struct _ipfw_insn_log { + ipfw_insn o; + u_int32_t max_log; /* how many do we log -- 0 = all */ + u_int32_t log_left; /* how many left to log */ +} ipfw_insn_log; + +/* Version of this API */ +#define IP_FW_VERSION_NONE 0 +#define IP_FW_VERSION_0 10 /* old ipfw */ +#define IP_FW_VERSION_1 20 /* ipfw in Jaguar/Panther */ +#define IP_FW_VERSION_2 30 /* ipfw2 */ +#define IP_FW_CURRENT_API_VERSION IP_FW_VERSION_2 + +/* + * Here we have the structure representing an ipfw rule. + * + * It starts with a general area (with link fields and counters) + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. + * + * Given a rule pointer r: + * + * r->cmd is the start of the first instruction. + * ACTION_PTR(r) is the start of the first action (things to do + * once a rule matched). + * + * When assembling instruction, remember the following: + * + * + if a rule has a "keep-state" (or "limit") option, then the + * first instruction (at r->cmd) MUST BE an O_PROBE_STATE + * + if a rule has a "log" option, then the first action + * (at ACTION_PTR(r)) MUST be O_LOG + * + * NOTE: we use a simple linked list of rules because we never need + * to delete a rule without scanning the list. We do not use + * queue(3) macros for portability and readability. + */ + +struct ip_fw { + u_int32_t version; /* Version of this structure. MUST be set */ + /* by clients. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION. */ + void *context; /* Context that is usable by user processes to */ + /* identify this rule. */ + struct ip_fw *next; /* linked list of rules */ + struct ip_fw *next_rule; /* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int8_t set; /* rule set (0..31) */ + u_int32_t set_masks[2]; /* masks for manipulating sets atomically */ +#define RESVD_SET 31 /* set for default and persistent rules */ + u_int8_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt; /* Packet counter */ + u_int64_t bcnt; /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + u_int32_t reserved_1; /* reserved - set to 0 */ + u_int32_t reserved_2; /* reserved - set to 0 */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#define ACTION_PTR(rule) \ + (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) + +#define RULESIZE(rule) (sizeof(struct ip_fw) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + +/* + * This structure is used as a flow mask and a flow id for various + * parts of the code. + */ +struct ipfw_flow_id { + u_int32_t dst_ip; + u_int32_t src_ip; + u_int16_t dst_port; + u_int16_t src_port; + u_int8_t proto; + u_int8_t flags; /* protocol-specific flags */ +}; + +/* + * Dynamic ipfw rule. + */ +typedef struct _ipfw_dyn_rule ipfw_dyn_rule; + +struct _ipfw_dyn_rule { + ipfw_dyn_rule *next; /* linked list of rules. */ + struct ip_fw *rule; /* pointer to rule */ + /* 'rule' is used to pass up the rule number (from the parent) */ + + ipfw_dyn_rule *parent; /* pointer to parent rule */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + struct ipfw_flow_id id; /* (masked) flow id */ + u_int32_t expire; /* expire time */ + u_int32_t bucket; /* which bucket in hash table */ + u_int32_t state; /* state of this rule (typically a + * combination of TCP flags) + */ + u_int32_t ack_fwd; /* most recent ACKs in forward */ + u_int32_t ack_rev; /* and reverse directions (used */ + /* to generate keepalives) */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ +}; + +/* + * Definitions for IP option names. + */ +#define IP_FW_IPOPT_LSRR 0x01 +#define IP_FW_IPOPT_SSRR 0x02 +#define IP_FW_IPOPT_RR 0x04 +#define IP_FW_IPOPT_TS 0x08 + +/* + * Definitions for TCP option names. + */ +#define IP_FW_TCPOPT_MSS 0x01 +#define IP_FW_TCPOPT_WINDOW 0x02 +#define IP_FW_TCPOPT_SACK 0x04 +#define IP_FW_TCPOPT_TS 0x08 +#define IP_FW_TCPOPT_CC 0x10 + +#define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ + +/* + * Main firewall chains definitions and global var's definitions. + */ +#ifdef KERNEL + +#define IP_FW_PORT_DYNT_FLAG 0x10000 +#define IP_FW_PORT_TEE_FLAG 0x20000 +#define IP_FW_PORT_DENY_FLAG 0x40000 + +/* + * Arguments for calling ipfw_chk() and dummynet_io(). We put them + * all into a structure because this way it is easier and more + * efficient to pass variables around and extend the interface. + */ +struct ip_fw_args { + struct mbuf *m; /* the mbuf chain */ + struct ifnet *oif; /* output interface */ + struct sockaddr_in *next_hop; /* forward address */ + struct ip_fw *rule; /* matching rule */ + struct ether_header *eh; /* for bridged packets */ + + struct route *ro; /* for dummynet */ + struct sockaddr_in *dst; /* for dummynet */ + int flags; /* for dummynet */ + + struct ipfw_flow_id f_id; /* grabbed from IP header */ + u_int16_t divert_rule; /* divert cookie */ + u_int32_t retval; +}; + +/* + * Function definitions. + */ + +/* Firewall hooks */ +struct sockopt; +struct dn_flow_set; + +void flush_pipe_ptrs(struct dn_flow_set *match); /* used by dummynet */ +void ipfw_init(void); /* called from raw_ip.c: load_ipfw() */ + +typedef int ip_fw_chk_t (struct ip_fw_args *args); +typedef int ip_fw_ctl_t (struct sockopt *); +extern ip_fw_chk_t *ip_fw_chk_ptr; +extern ip_fw_ctl_t *ip_fw_ctl_ptr; +extern int fw_one_pass; +extern int fw_enable; +#define IPFW_LOADED (ip_fw_chk_ptr != NULL) +#endif /* KERNEL */ + +#endif /* _IPFW2_H */ diff --git a/bsd/netinet/ip_fw2_compat.c b/bsd/netinet/ip_fw2_compat.c new file mode 100644 index 000000000..1e0ee62a6 --- /dev/null +++ b/bsd/netinet/ip_fw2_compat.c @@ -0,0 +1,2253 @@ +/* IPFW2 Backward Compatibility */ + +/* Convert to and from IPFW2 structures. */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ip_fw2_compat.h" + +#define FW2_DEBUG_VERBOSE 0 + +/* + * _s_x is a structure that stores a string <-> token pairs, used in + * various places in the parser. Entries are stored in arrays, + * with an entry with s=NULL as terminator. + * The search routines are match_token() and match_value(). + * Often, an element with x=0 contains an error string. + * + */ +struct _s_x { + char const *s; + int x; +}; + +#define NO_VERSION_STR "IP_FW_VERSION_NONE" +#define VERSION_ZERO_STR "IP_FW_VERSION_0" +#define VERSION_ONE_STR "IP_FW_VERSION_1" +#define CURRENT_API_VERSION_STR "IP_FW_CURRENT_API_VERSION" + +static struct _s_x f_tcpflags[] = { + { "syn", TH_SYN }, + { "fin", TH_FIN }, + { "ack", TH_ACK }, + { "psh", TH_PUSH }, + { "rst", TH_RST }, + { "urg", TH_URG }, + { "tcp flag", 0 }, + { NULL, 0 } +}; + +static struct _s_x f_tcpopts[] = { + { "mss", IP_FW_TCPOPT_MSS }, + { "maxseg", IP_FW_TCPOPT_MSS }, + { "window", IP_FW_TCPOPT_WINDOW }, + { "sack", IP_FW_TCPOPT_SACK }, + { "ts", IP_FW_TCPOPT_TS }, + { "timestamp", IP_FW_TCPOPT_TS }, + { "cc", IP_FW_TCPOPT_CC }, + { "tcp option", 0 }, + { NULL, 0 } +}; + +/* + * IP options span the range 0 to 255 so we need to remap them + * (though in fact only the low 5 bits are significant). + */ +static struct _s_x f_ipopts[] = { + { "ssrr", IP_FW_IPOPT_SSRR}, + { "lsrr", IP_FW_IPOPT_LSRR}, + { "rr", IP_FW_IPOPT_RR}, + { "ts", IP_FW_IPOPT_TS}, + { "ip option", 0 }, + { NULL, 0 } +}; + +static struct _s_x f_iptos[] = { + { "lowdelay", IPTOS_LOWDELAY}, + { "throughput", IPTOS_THROUGHPUT}, + { "reliability", IPTOS_RELIABILITY}, + { "mincost", IPTOS_MINCOST}, + { "congestion", IPTOS_CE}, + { "ecntransport", IPTOS_ECT}, + { "ip tos option", 0}, + { NULL, 0 } +}; + +static struct _s_x limit_masks[] = { + {"all", DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT}, + {"src-addr", DYN_SRC_ADDR}, + {"src-port", DYN_SRC_PORT}, + {"dst-addr", DYN_DST_ADDR}, + {"dst-port", DYN_DST_PORT}, + {NULL, 0} +}; + +static void +ipfw_print_fw_flags(u_int flags) +{ + /* print action */ + switch (flags & IP_FW_F_COMMAND_COMPAT) { + case IP_FW_F_ACCEPT_COMPAT: + printf("IP_FW_F_ACCEPT_COMPAT\n"); + break; + case IP_FW_F_COUNT_COMPAT: + printf("IP_FW_F_COUNT_COMPAT\n"); + break; + case IP_FW_F_PIPE_COMPAT: + printf("IP_FW_F_PIPE_COMPAT\n"); + break; + case IP_FW_F_QUEUE_COMPAT: + printf("IP_FW_F_QUEUE_COMPAT\n"); + break; + case IP_FW_F_SKIPTO_COMPAT: + printf("IP_FW_F_SKIPTO_COMPAT\n"); + break; + case IP_FW_F_DIVERT_COMPAT: + printf("IP_FW_F_DIVERT_COMPAT\n"); + break; + case IP_FW_F_TEE_COMPAT: + printf("IP_FW_F_TEE_COMPAT\n"); + break; + case IP_FW_F_FWD_COMPAT: + printf("IP_FW_F_FWD_COMPAT\n"); + break; + case IP_FW_F_DENY_COMPAT: + printf("IP_FW_F_DENY_COMPAT\n"); + break; + case IP_FW_F_REJECT_COMPAT: + printf("IP_FW_F_REJECT_COMPAT\n"); + break; + case IP_FW_F_CHECK_S_COMPAT: + printf("IP_FW_F_CHECK_S_COMPAT\n"); + break; + default: + printf("No action given\n"); + break; + } + + /* print commands */ + if (flags & IP_FW_F_IN_COMPAT) { + printf("IP_FW_F_IN_COMPAT\n"); + } + if (flags & IP_FW_F_OUT_COMPAT) { + printf("IP_FW_F_OUT_COMPAT\n"); + } + if (flags & IP_FW_F_IIFACE_COMPAT) { + printf("IP_FW_F_IIFACE_COMPAT\n"); + } + if (flags & IP_FW_F_OIFACE_COMPAT) { + printf("IP_FW_F_OIFACE_COMPAT\n"); + } + if (flags & IP_FW_F_PRN_COMPAT) { + printf("IP_FW_F_PRN_COMPAT\n"); + } + if (flags & IP_FW_F_SRNG_COMPAT) { + printf("IP_FW_F_SRNG_COMPAT\n"); + } + if (flags & IP_FW_F_DRNG_COMPAT) { + printf("IP_FW_F_DRNG_COMPAT\n"); + } + if (flags & IP_FW_F_FRAG_COMPAT) { + printf("IP_FW_F_FRAG_COMPAT\n"); + } + if (flags & IP_FW_F_IIFNAME_COMPAT) { + printf("IP_FW_F_IIFNAME_COMPAT\n"); + } + if (flags & IP_FW_F_OIFNAME_COMPAT) { + printf("IP_FW_F_OIFNAME_COMPAT\n"); + } + if (flags & IP_FW_F_INVSRC_COMPAT) { + printf("IP_FW_F_INVSRC_COMPAT\n"); + } + if (flags & IP_FW_F_INVDST_COMPAT) { + printf("IP_FW_F_INVDST_COMPAT\n"); + } + if (flags & IP_FW_F_ICMPBIT_COMPAT) { + printf("IP_FW_F_ICMPBIT_COMPAT\n"); + } + if (flags & IP_FW_F_UID_COMPAT) { + printf("IP_FW_F_UID_COMPAT\n"); + } + if (flags & IP_FW_F_RND_MATCH_COMPAT) { + printf("IP_FW_F_RND_MATCH_COMPAT\n"); + } + if (flags & IP_FW_F_SMSK_COMPAT) { + printf("IP_FW_F_SMSK_COMPAT\n"); + } + if (flags & IP_FW_F_DMSK_COMPAT) { + printf("IP_FW_F_DMSK_COMPAT\n"); + } + if (flags & IP_FW_BRIDGED_COMPAT) { + printf("IP_FW_BRIDGED_COMPAT\n"); + } + if (flags & IP_FW_F_KEEP_S_COMPAT) { + printf("IP_FW_F_KEEP_S_COMPAT\n"); + } + if (flags & IP_FW_F_CHECK_S_COMPAT) { + printf("IP_FW_F_CHECK_S_COMPAT\n"); + } + if (flags & IP_FW_F_SME_COMPAT) { + printf("IP_FW_F_SME_COMPAT\n"); + } + if (flags & IP_FW_F_DME_COMPAT) { + printf("IP_FW_F_DME_COMPAT\n"); + } +} + +static void +print_fw_version(u_int32_t api_version) +{ + switch (api_version) { + case IP_FW_VERSION_0: + printf("Version: %s\n", VERSION_ZERO_STR); + break; + case IP_FW_VERSION_1: + printf("Version: %s\n", VERSION_ONE_STR); + break; + case IP_FW_CURRENT_API_VERSION: + printf("Version: %s\n", CURRENT_API_VERSION_STR); + break; + case IP_FW_VERSION_NONE: + printf("Version: %s\n", NO_VERSION_STR); + break; + default: + printf("Unrecognized version\n"); + break; + } +} + +static void +ipfw_print_vers1_struct(struct ip_fw_compat *vers1_rule) +{ + char ipv4str[MAX_IPv4_STR_LEN]; + print_fw_version(vers1_rule->version); + printf("Rule #%d\n", vers1_rule->fw_number); + + ipfw_print_fw_flags(vers1_rule->fw_flg); + + printf("fw_pcnt: %d\n", vers1_rule->fw_pcnt); + printf("fw_bcnt: %d\n", vers1_rule->fw_bcnt); + printf("fw_src: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_src, ipv4str, sizeof(ipv4str))); + printf("fw_dst: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_dst, ipv4str, sizeof(ipv4str))); + printf("fw_smsk: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_smsk, ipv4str, sizeof(ipv4str))); + printf("fw_dmsk: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_dmsk, ipv4str, sizeof(ipv4str))); + + if (vers1_rule->fw_flg & IP_FW_F_ICMPBIT_COMPAT) { + int type_index; + int first = 1; + + printf(" icmptype"); + + for (type_index = 0; + type_index < (IP_FW_ICMPTYPES_DIM_COMPAT * sizeof(unsigned) * 8); + ++type_index) { + if (vers1_rule->fw_uar_compat.fw_icmptypes[type_index / (sizeof(unsigned) * 8)] & + (1U << (type_index % (sizeof(unsigned) * 8)))) { + printf("%c%d", first == 1 ? ' ' : ',', type_index); + first = 0; + } + } + } else { + int i, nsp, ndp; + + nsp = IP_FW_GETNSRCP_COMPAT(vers1_rule); + for (i = 0; i < nsp; i++) { + printf("source ports: fw_uar_compat.fw_pts: %04x", vers1_rule->fw_uar_compat.fw_pts[i]); + if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_SRNG_COMPAT)) + printf("-"); + else if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_SMSK_COMPAT)) + printf(":"); + else + printf(","); + } + + printf("\n"); + + ndp = IP_FW_GETNDSTP_COMPAT(vers1_rule); + for (i = 0; i < ndp; i++) { + printf("source ports: fw_uar_compat.fw_pts: %04x", vers1_rule->fw_uar_compat.fw_pts[nsp+i]); + if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_DRNG_COMPAT)) + printf("-"); + else if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_DMSK_COMPAT)) + printf(":"); + else + printf(","); + } + + printf("\n"); + } + + printf("fw_ipflg: %d\n", vers1_rule->fw_ipflg); + printf("fw_ipopt: %d\n", vers1_rule->fw_ipopt); + printf("fw_ipnopt: %d\n", vers1_rule->fw_ipnopt); + printf("fw_tcpopt: %d\n", vers1_rule->fw_tcpopt); + printf("fw_tcpnopt: %d\n", vers1_rule->fw_tcpnopt); + printf("fw_tcpf: %d\n", vers1_rule->fw_tcpf); + printf("fw_tcpnf: %d\n", vers1_rule->fw_tcpnf); + printf("timestamp: %d\n", vers1_rule->timestamp); + + if ((vers1_rule->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + printf("fw_in_if: "); + inet_ntop(AF_INET, &vers1_rule->fw_in_if.fu_via_ip, ipv4str, + sizeof(ipv4str)); + printf("fu_via_ip: %s\n", ipv4str); + printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_in_if.fu_via_if_compat.name); + printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_in_if.fu_via_if_compat.unit); + } else { + if (vers1_rule->fw_flg & IP_FW_F_IIFACE_COMPAT) { + printf("fw_in_if: "); + printf("fu_via_ip: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_in_if.fu_via_ip, ipv4str, + sizeof(ipv4str))); + printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_in_if.fu_via_if_compat.name); + printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_in_if.fu_via_if_compat.unit); + } + if (vers1_rule->fw_flg & IP_FW_F_OIFACE_COMPAT) { + printf("fw_out_if: "); + printf("fu_via_ip: %s\n", + inet_ntop(AF_INET, &vers1_rule->fw_out_if.fu_via_ip, + ipv4str, sizeof(ipv4str))); + printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_out_if.fu_via_if_compat.name); + printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_out_if.fu_via_if_compat.unit); + } + } + + printf("fw_prot: %d\n", vers1_rule->fw_prot); + printf("fw_nports: %d\n", vers1_rule->fw_nports); + printf("pipe_ptr: %x\n", vers1_rule->pipe_ptr); + printf("next_rule_ptr: %x\n", vers1_rule->next_rule_ptr); + printf("fw_uid: %d\n", vers1_rule->fw_uid); + printf("fw_logamount: %d\n", vers1_rule->fw_logamount); + printf("fw_loghighest: %d\n", vers1_rule->fw_loghighest); +} + +static void +print_icmptypes(ipfw_insn_u32 *cmd) +{ + int i; + char sep= ' '; + + printf(" icmptypes"); + for (i = 0; i < 32; i++) { + if ( (cmd->d[0] & (1 << (i))) == 0) + continue; + printf("%c%d", sep, i); + sep = ','; + } +} + +/* + * print flags set/clear in the two bitmasks passed as parameters. + * There is a specialized check for f_tcpflags. + */ +static void +print_flags(char const *name, ipfw_insn *cmd, struct _s_x *list) +{ + char const *comma = ""; + int i; + uint8_t set = cmd->arg1 & 0xff; + uint8_t clear = (cmd->arg1 >> 8) & 0xff; + + if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) { + printf(" setup"); + return; + } + + printf(" %s ", name); + for (i=0; list[i].x != 0; i++) { + if (set & list[i].x) { + set &= ~list[i].x; + printf("%s%s", comma, list[i].s); + comma = ","; + } + if (clear & list[i].x) { + clear &= ~list[i].x; + printf("%s!%s", comma, list[i].s); + comma = ","; + } + } +} + +static int +contigmask(uint8_t *p, int len) +{ + int i, n; + + for (i=0; id; + char ipv4str[MAX_IPv4_STR_LEN]; + + printf("%s ", cmd->o.len & F_NOT ? " not": ""); + + if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { + printf("me"); + return; + } + + /* + * len == 2 indicates a single IP, whereas lists of 1 or more + * addr/mask pairs have len = (2n+1). We convert len to n so we + * use that to count the number of entries. + */ + for (len = len / 2; len > 0; len--, a += 2) { + int mb = /* mask length */ + (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) ? + 32 : contigmask((uint8_t *)&(a[1]), 32); + if (mb == 0) { /* any */ + printf("any"); + } else { /* numeric IP followed by some kind of mask */ + printf("%s", inet_ntop(AF_INET, &a[0], ipv4str, sizeof(ipv4str))); + if (mb < 0) + printf(":%s", inet_ntop(AF_INET, &a[1], ipv4str, sizeof(ipv4str))); + else if (mb < 32) + printf("/%d", mb); + } + if (len > 1) + printf(","); + } +} + +/* + * prints a MAC address/mask pair + */ +static void +print_mac(uint8_t *addr, uint8_t *mask) +{ + int l = contigmask(mask, 48); + + if (l == 0) + printf(" any"); + else { + printf(" %02x:%02x:%02x:%02x:%02x:%02x", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + if (l == -1) + printf("&%02x:%02x:%02x:%02x:%02x:%02x", + mask[0], mask[1], mask[2], + mask[3], mask[4], mask[5]); + else if (l < 48) + printf("/%d", l); + } +} + +static void +ipfw_print_vers2_struct(struct ip_fw *vers2_rule) +{ + int l; + ipfw_insn *cmd; + ipfw_insn_log *logptr = NULL; + char ipv4str[MAX_IPv4_STR_LEN]; + + print_fw_version(vers2_rule->version); + + printf("act_ofs: %d\n", vers2_rule->act_ofs); + printf("cmd_len: %d\n", vers2_rule->cmd_len); + printf("rulenum: %d\n", vers2_rule->rulenum); + printf("set: %d\n", vers2_rule->set); + printf("pcnt: %d\n", vers2_rule->pcnt); + printf("bcnt: %d\n", vers2_rule->bcnt); + printf("timestamp: %d\n", vers2_rule->timestamp); + + /* + * first print actions + */ + for (l = vers2_rule->cmd_len - vers2_rule->act_ofs, cmd = ACTION_PTR(vers2_rule); + l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch(cmd->opcode) { + case O_CHECK_STATE: + printf("check-state"); + break; + + case O_ACCEPT: + printf("allow"); + break; + + case O_COUNT: + printf("count"); + break; + + case O_DENY: + printf("deny"); + break; + + case O_REJECT: + if (cmd->arg1 == ICMP_REJECT_RST) + printf("reset"); + else if (cmd->arg1 == ICMP_UNREACH_HOST) + printf("reject"); + else + printf("unreach %u", cmd->arg1); + break; + + case O_SKIPTO: + printf("skipto %u", cmd->arg1); + break; + + case O_PIPE: + printf("pipe %u", cmd->arg1); + break; + + case O_QUEUE: + printf("queue %u", cmd->arg1); + break; + + case O_DIVERT: + printf("divert %u", cmd->arg1); + break; + + case O_TEE: + printf("tee %u", cmd->arg1); + break; + + case O_FORWARD_IP: + { + ipfw_insn_sa *s = (ipfw_insn_sa *)cmd; + + printf("fwd %s", + inet_ntop(AF_INET, &s->sa.sin_addr, ipv4str, + sizeof(ipv4str))); + if (s->sa.sin_port) + printf(",%d", s->sa.sin_port); + break; + } + + case O_LOG: /* O_LOG is printed last */ + logptr = (ipfw_insn_log *)cmd; + break; + + default: + printf("** unrecognized action %d len %d", + cmd->opcode, cmd->len); + } + } + if (logptr) { + if (logptr->max_log > 0) + printf(" log logamount %d", logptr->max_log); + else + printf(" log"); + } + + /* + * then print the body. + */ + for (l = vers2_rule->act_ofs, cmd = vers2_rule->cmd ; + l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + /* useful alias */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; + + switch(cmd->opcode) { + case O_PROB: + break; /* done already */ + + case O_PROBE_STATE: + break; /* no need to print anything here */ + + case O_MACADDR2: + { + ipfw_insn_mac *m = (ipfw_insn_mac *)cmd; + + if (cmd->len & F_NOT) + printf(" not"); + printf(" MAC"); + print_mac(m->addr, m->mask); + print_mac(m->addr + 6, m->mask + 6); + printf("\n"); + break; + } + case O_MAC_TYPE: + { + uint16_t *p = ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("0x%04x", p[0]); + if (p[0] != p[1]) { + printf("-"); + printf("0x%04x", p[1]); + } + printf(","); + } + break; + } + case O_IP_SRC: + case O_IP_SRC_MASK: + case O_IP_SRC_ME: + print_ip((ipfw_insn_ip *)cmd); + break; + + case O_IP_DST: + case O_IP_DST_MASK: + case O_IP_DST_ME: + print_ip((ipfw_insn_ip *)cmd); + break; + + case O_IP_DSTPORT: + case O_IP_SRCPORT: + { + uint16_t *p = ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("0x%04x", p[0]); + if (p[0] != p[1]) { + printf("-"); + printf("0x%04x", p[1]); + } + printf(","); + } + break; + } + case O_PROTO: + { + printf("O_PROTO"); + + if (cmd->len & F_NOT) + printf(" not"); + + printf(" %u", cmd->arg1); + + break; + } + + default: /*options ... */ + { + if (cmd->len & F_NOT && cmd->opcode != O_IN) + printf(" not"); + switch(cmd->opcode) { + case O_FRAG: + printf("O_FRAG"); + break; + + case O_IN: + printf(cmd->len & F_NOT ? " out" : " O_IN"); + break; + + case O_LAYER2: + printf(" O_LAYER2"); + break; + case O_XMIT: + case O_RECV: + case O_VIA: + { + char const *s; + ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd; + + if (cmd->opcode == O_XMIT) + s = "O_XMIT"; + else if (cmd->opcode == O_RECV) + s = "O_RECV"; + else /* if (cmd->opcode == O_VIA) */ + s = "O_VIA"; + if (cmdif->name[0] == '\0') { + printf(" %s %s", s, + inet_ntop(AF_INET, &cmdif->p.ip, ipv4str, + sizeof(ipv4str))); + } + else if (cmdif->p.unit == -1) + printf(" %s %s*", s, cmdif->name); + else + printf(" %s %s%d", s, cmdif->name, + cmdif->p.unit); + } + break; + + case O_IPID: + if (F_LEN(cmd) == 1) + printf(" ipid %u", cmd->arg1 ); + else { + uint16_t *p = ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("0x%04x", p[0]); + if (p[0] != p[1]) { + printf("-"); + printf("0x%04x", p[1]); + } + printf(","); + } + } + + break; + + case O_IPTTL: + if (F_LEN(cmd) == 1) + printf(" ipttl %u", cmd->arg1 ); + else { + uint16_t *p = ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("0x%04x", p[0]); + if (p[0] != p[1]) { + printf("-"); + printf("0x%04x", p[1]); + } + printf(","); + } + } + + break; + + case O_IPVER: + printf(" ipver %u", cmd->arg1 ); + break; + + case O_IPPRECEDENCE: + printf(" ipprecedence %u", (cmd->arg1) >> 5 ); + break; + + case O_IPLEN: + if (F_LEN(cmd) == 1) + printf(" iplen %u", cmd->arg1 ); + else { + uint16_t *p = ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("0x%04x", p[0]); + if (p[0] != p[1]) { + printf("-"); + printf("0x%04x", p[1]); + } + printf(","); + } + } + + break; + + case O_IPOPT: + print_flags("ipoptions", cmd, f_ipopts); + break; + + case O_IPTOS: + print_flags("iptos", cmd, f_iptos); + break; + + case O_ICMPTYPE: + print_icmptypes((ipfw_insn_u32 *)cmd); + break; + + case O_ESTAB: + printf(" established"); + break; + + case O_TCPFLAGS: + print_flags("tcpflags", cmd, f_tcpflags); + break; + + case O_TCPOPTS: + print_flags("tcpoptions", cmd, f_tcpopts); + break; + + case O_TCPWIN: + printf(" tcpwin %d", ntohs(cmd->arg1)); + break; + + case O_TCPACK: + printf(" tcpack %ld", ntohl(cmd32->d[0])); + break; + + case O_TCPSEQ: + printf(" tcpseq %ld", ntohl(cmd32->d[0])); + break; + + case O_UID: + printf(" uid %u", cmd32->d[0]); + break; + + case O_GID: + printf(" gid %u", cmd32->d[0]); + break; + + case O_VERREVPATH: + printf(" verrevpath"); + break; + + case O_IPSEC: + printf(" ipsec"); + break; + + case O_NOP: + break; + + case O_KEEP_STATE: + printf(" keep-state"); + break; + + case O_LIMIT: + { + struct _s_x *p = limit_masks; + ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; + uint8_t x = c->limit_mask; + char const *comma = " "; + + printf(" limit"); + for (; p->x != 0 ; p++) + if ((x & p->x) == p->x) { + x &= ~p->x; + printf("%s%s", comma, p->s); + comma = ","; + } + printf(" %d", c->conn_limit); + + break; + } + + default: + printf(" [opcode %d len %d]", + cmd->opcode, cmd->len); + } /* switch */ + } /* default */ + } /* switch */ + } /* for */ +} + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + * from ipfw2.c + */ +static ipfw_insn * +next_cmd(ipfw_insn *cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return cmd; +} + +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + * from ipfw2.c + */ +static void +fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, uint16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len) & (F_NOT | F_OR)) | 1; + cmd->arg1 = arg; +} + + +static u_int32_t +fill_compat_tcpflags(u_int32_t flags) { + u_int32_t flags_compat = 0; + + if (flags & TH_FIN) + flags_compat |= IP_FW_TCPF_FIN_COMPAT; + if (flags & TH_SYN) + flags_compat |= IP_FW_TCPF_SYN_COMPAT; + if (flags & TH_RST) + flags_compat |= IP_FW_TCPF_RST_COMPAT; + if (flags & TH_PUSH) + flags_compat |= IP_FW_TCPF_PSH_COMPAT; + if (flags & TH_ACK) + flags_compat |= IP_FW_TCPF_ACK_COMPAT; + if (flags & TH_URG) + flags_compat |= IP_FW_TCPF_URG_COMPAT; + + return flags_compat; +} + + +/* ******************************************** + * *********** Convert from Latest ************ + * ********************************************/ + +/* + * Things we're actively ignoring: + * sets, sets of addresses, blocks (NOT, OR) + */ +static void +ipfw_map_from_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +{ + int l; + ipfw_insn *cmd; + + for (l = curr_rule->act_ofs, cmd = curr_rule->cmd ; + l > 0 ; + l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + /* useful alias */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; + + switch (cmd->opcode) { + case O_PROTO: + /* protocol */ + compat_rule->fw_prot = cmd->arg1; + break; + + case O_IP_SRC_ME: + compat_rule->fw_flg |= IP_FW_F_SME_COMPAT; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } + break; + + case O_IP_SRC_MASK: + { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + compat_rule->fw_src = ip->addr; + compat_rule->fw_smsk = ip->mask; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } + break; + } + + case O_IP_SRC: + /* one IP */ + /* source - + * for now we only deal with one address + * per rule and ignore sets of addresses + */ + compat_rule->fw_src.s_addr = cmd32->d[0]; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } + break; + + case O_IP_SRCPORT: + { + /* source ports */ + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, j; + + /* copy list of ports */ + for (i = F_LEN(cmd) - 1, j = 0; i > 0; i--, j++, p += 2) { + if (p[0] != p[1]) { + /* this is a range */ + compat_rule->fw_flg |= IP_FW_F_SRNG_COMPAT; + compat_rule->fw_uar_compat.fw_pts[j++] = p[0]; + compat_rule->fw_uar_compat.fw_pts[j] = p[1]; + } else { + compat_rule->fw_uar_compat.fw_pts[j] = p[0]; + } + } + IP_FW_SETNSRCP_COMPAT(compat_rule, j); + + break; + } + + case O_IP_DST_ME: + /* destination */ + compat_rule->fw_flg |= IP_FW_F_DME_COMPAT; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } + break; + + case O_IP_DST_MASK: + { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + compat_rule->fw_dst = ip->addr; + compat_rule->fw_dmsk = ip->mask; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } + break; + } + case O_IP_DST: + /* one IP */ + /* dest - + * for now we only deal with one address + * per rule, and ignore sets of addresses + */ + compat_rule->fw_dst.s_addr = cmd32->d[0]; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } + break; + + case O_IP_DSTPORT: + { + /* dest. ports */ + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, + j = IP_FW_GETNSRCP_COMPAT(compat_rule); + + /* copy list of ports */ + for (i = F_LEN(cmd) - 1; i > 0; i--, j++, p += 2) { + if (p[0] != p[1]) { + /* this is a range */ + compat_rule->fw_flg |= IP_FW_F_DRNG_COMPAT; + compat_rule->fw_uar_compat.fw_pts[j++] = p[0]; + compat_rule->fw_uar_compat.fw_pts[j] = p[1]; + } else { + compat_rule->fw_uar_compat.fw_pts[j] = p[0]; + } + } + IP_FW_SETNDSTP_COMPAT(compat_rule, (j - IP_FW_GETNSRCP_COMPAT(compat_rule))); + + break; + } + + case O_LOG: + { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + + compat_rule->fw_flg |= IP_FW_F_PRN_COMPAT; + compat_rule->fw_logamount = c->max_log; + break; + } + case O_UID: + compat_rule->fw_flg |= IP_FW_F_UID_COMPAT; + compat_rule->fw_uid = cmd32->d[0]; + break; + + case O_IN: + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_OUT_COMPAT; + } else { + compat_rule->fw_flg |= IP_FW_F_IN_COMPAT; + } + break; + + case O_KEEP_STATE: + compat_rule->fw_flg |= IP_FW_F_KEEP_S_COMPAT; + break; + + case O_LAYER2: + compat_rule->fw_flg |= IP_FW_BRIDGED_COMPAT; + break; + + case O_XMIT: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + compat_rule->fw_flg |= IP_FW_F_OIFACE_COMPAT; + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->p.ip.s_addr != 0) { + compat_rule->fw_flg |= IP_FW_F_OIFACE_COMPAT; + ifu.fu_via_ip = ifcmd->p.ip; + } else { + compat_rule->fw_flg |= IP_FW_F_OIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } + compat_rule->fw_out_if = ifu; + + break; + } + + case O_RECV: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + compat_rule->fw_flg |= IP_FW_F_IIFACE_COMPAT; + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->p.ip.s_addr != 0) { + compat_rule->fw_flg |= IP_FW_F_IIFACE_COMPAT; + ifu.fu_via_ip = ifcmd->p.ip; + } else { + compat_rule->fw_flg |= IP_FW_F_IIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } + compat_rule->fw_in_if = ifu; + + break; + } + + case O_VIA: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->name[0] != '\0') { + compat_rule->fw_flg |= IP_FW_F_IIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } else { + ifu.fu_via_ip = ifcmd->p.ip; + } + compat_rule->fw_flg |= IF_FW_F_VIAHACK_COMPAT; + compat_rule->fw_out_if = compat_rule->fw_in_if = ifu; + + break; + } + + case O_FRAG: + compat_rule->fw_flg |= IP_FW_F_FRAG_COMPAT; + break; + + case O_IPOPT: + /* IP options */ + compat_rule->fw_ipopt = (cmd->arg1 & 0xff); + compat_rule->fw_ipnopt = ((cmd->arg1 >> 8) & 0xff); + break; + + case O_TCPFLAGS: + /* check for "setup" */ + if ((cmd->arg1 & 0xff) == TH_SYN && + ((cmd->arg1 >> 8) & 0xff) == TH_ACK) { + compat_rule->fw_tcpf = IP_FW_TCPF_SYN_COMPAT; + compat_rule->fw_tcpnf = IP_FW_TCPF_ACK_COMPAT; + } + else { + compat_rule->fw_tcpf = fill_compat_tcpflags(cmd->arg1 & 0xff); + compat_rule->fw_tcpnf = fill_compat_tcpflags((cmd->arg1 >> 8) & 0xff); + } + break; + + case O_TCPOPTS: + /* TCP options */ + compat_rule->fw_tcpopt = (cmd->arg1 & 0xff); + compat_rule->fw_tcpnopt = ((cmd->arg1 >> 8) & 0xff); + break; + + case O_ESTAB: + compat_rule->fw_ipflg |= IP_FW_IF_TCPEST_COMPAT; + break; + + case O_ICMPTYPE: + { + /* ICMP */ + /* XXX: check this */ + int i, type; + + compat_rule->fw_flg |= IP_FW_F_ICMPBIT_COMPAT; + for (i = 0; i < sizeof(uint32_t) ; i++) { + type = cmd32->d[0] & i; + + compat_rule->fw_uar_compat.fw_icmptypes[type / (sizeof(unsigned) * 8)] |= + 1 << (type % (sizeof(unsigned) * 8)); + } + break; + } + default: + break; + } /* switch */ + } /* for */ +} + +static void +ipfw_map_from_actions(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +{ + int l; + ipfw_insn *cmd; + + for (l = curr_rule->cmd_len - curr_rule->act_ofs, cmd = ACTION_PTR(curr_rule); + l > 0 ; + l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch (cmd->opcode) { + case O_ACCEPT: + compat_rule->fw_flg |= IP_FW_F_ACCEPT_COMPAT; + break; + case O_COUNT: + compat_rule->fw_flg |= IP_FW_F_COUNT_COMPAT; + break; + case O_PIPE: + compat_rule->fw_flg |= IP_FW_F_PIPE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_QUEUE: + compat_rule->fw_flg |= IP_FW_F_QUEUE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_SKIPTO: + compat_rule->fw_flg |= IP_FW_F_SKIPTO_COMPAT; + compat_rule->fw_skipto_rule_compat = cmd->arg1; + break; + case O_DIVERT: + compat_rule->fw_flg |= IP_FW_F_DIVERT_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_TEE: + compat_rule->fw_flg |= IP_FW_F_TEE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_FORWARD_IP: + { + ipfw_insn_sa *p = (ipfw_insn_sa *)cmd; + + compat_rule->fw_flg |= IP_FW_F_FWD_COMPAT; + compat_rule->fw_fwd_ip_compat.sin_len = p->sa.sin_len; + compat_rule->fw_fwd_ip_compat.sin_family = p->sa.sin_family; + compat_rule->fw_fwd_ip_compat.sin_port = p->sa.sin_port; + compat_rule->fw_fwd_ip_compat.sin_addr = p->sa.sin_addr; + + break; + } + case O_DENY: + compat_rule->fw_flg |= IP_FW_F_DENY_COMPAT; + break; + case O_REJECT: + compat_rule->fw_flg |= IP_FW_F_REJECT_COMPAT; + compat_rule->fw_reject_code_compat = cmd->arg1; + break; + case O_CHECK_STATE: + compat_rule->fw_flg |= IP_FW_F_CHECK_S_COMPAT; + break; + default: + break; + } + } +} + +static void +ipfw_version_latest_to_one(struct ip_fw *curr_rule, struct ip_fw_compat *rule_vers1) +{ + if (!rule_vers1) + return; + + bzero(rule_vers1, sizeof(struct ip_fw_compat)); + + rule_vers1->version = IP_FW_VERSION_1; + rule_vers1->context = curr_rule->context; + rule_vers1->fw_number = curr_rule->rulenum; + rule_vers1->fw_pcnt = curr_rule->pcnt; + rule_vers1->fw_bcnt = curr_rule->bcnt; + rule_vers1->timestamp = curr_rule->timestamp; + + /* convert actions */ + ipfw_map_from_actions(curr_rule, rule_vers1); + + /* convert commands */ + ipfw_map_from_cmds(curr_rule, rule_vers1); + +#if FW2_DEBUG_VERBOSE + ipfw_print_vers1_struct(rule_vers1); +#endif +} + +/* first convert to version one then to version zero */ +static void +ipfw_version_latest_to_zero(struct ip_fw *curr_rule, struct ip_old_fw *rule_vers0) +{ + struct ip_fw_compat rule_vers1; + + ipfw_version_latest_to_one(curr_rule, &rule_vers1); + + bzero(rule_vers0, sizeof(struct ip_old_fw)); + bcopy(&rule_vers1.fw_uar_compat, &rule_vers0->fw_uar, sizeof(rule_vers1.fw_uar_compat)); + bcopy(&rule_vers1.fw_in_if, &rule_vers0->fw_in_if, sizeof(rule_vers1.fw_in_if)); + bcopy(&rule_vers1.fw_out_if, &rule_vers0->fw_out_if, sizeof(rule_vers1.fw_out_if)); + bcopy(&rule_vers1.fw_un_compat, &rule_vers0->fw_un, sizeof(rule_vers1.fw_un_compat)); + + rule_vers0->fw_pcnt = rule_vers1.fw_pcnt; + rule_vers0->fw_bcnt = rule_vers1.fw_bcnt; + rule_vers0->fw_src = rule_vers1.fw_src; + rule_vers0->fw_dst = rule_vers1.fw_dst; + rule_vers0->fw_smsk = rule_vers1.fw_smsk; + rule_vers0->fw_dmsk = rule_vers1.fw_dmsk; + rule_vers0->fw_number = rule_vers1.fw_number; + rule_vers0->fw_flg = rule_vers1.fw_flg; + rule_vers0->fw_ipopt = rule_vers1.fw_ipopt; + rule_vers0->fw_ipnopt = rule_vers1.fw_ipnopt; + rule_vers0->fw_tcpf = rule_vers1.fw_tcpf; + rule_vers0->fw_tcpnf = rule_vers1.fw_tcpnf; + rule_vers0->timestamp = rule_vers1.timestamp; + rule_vers0->fw_prot = rule_vers1.fw_prot; + rule_vers0->fw_nports = rule_vers1.fw_nports; + rule_vers0->pipe_ptr = rule_vers1.pipe_ptr; + rule_vers0->next_rule_ptr = rule_vers1.next_rule_ptr; + + if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB; +} + +void +ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version) +{ + switch (api_version) { + case IP_FW_VERSION_0: + { + struct ip_old_fw *rule_vers0 = old_rule; + + ipfw_version_latest_to_zero(curr_rule, rule_vers0); + break; + } + case IP_FW_VERSION_1: + { + struct ip_fw_compat *rule_vers1 = old_rule; + + ipfw_version_latest_to_one(curr_rule, rule_vers1); + break; + } + case IP_FW_CURRENT_API_VERSION: + /* ipfw2 for now, don't need to do anything */ + break; + + default: + /* unknown version */ + break; + } +} + + +/* ******************************************** + * *********** Convert to Latest ************** + * ********************************************/ + +/* from ip_fw.c */ +static int +ipfw_check_vers1_struct(struct ip_fw_compat *frwl) +{ + /* Check for invalid flag bits */ + if ((frwl->fw_flg & ~IP_FW_F_MASK_COMPAT) != 0) { + /* + printf(("%s undefined flag bits set (flags=%x)\n", + err_prefix, frwl->fw_flg)); + */ + return (EINVAL); + } + if (frwl->fw_flg == IP_FW_F_CHECK_S_COMPAT) { + /* check-state */ + return 0 ; + } + /* Must apply to incoming or outgoing (or both) */ + if (!(frwl->fw_flg & (IP_FW_F_IN_COMPAT | IP_FW_F_OUT_COMPAT))) { + /* + printf(("%s neither in nor out\n", err_prefix)); + */ + return (EINVAL); + } + /* Empty interface name is no good */ + if (((frwl->fw_flg & IP_FW_F_IIFNAME_COMPAT) + && !*frwl->fw_in_if.fu_via_if_compat.name) + || ((frwl->fw_flg & IP_FW_F_OIFNAME_COMPAT) + && !*frwl->fw_out_if.fu_via_if_compat.name)) { + /* + printf(("%s empty interface name\n", err_prefix)); + */ + return (EINVAL); + } + /* Sanity check interface matching */ + if ((frwl->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + ; /* allow "via" backwards compatibility */ + } else if ((frwl->fw_flg & IP_FW_F_IN_COMPAT) + && (frwl->fw_flg & IP_FW_F_OIFACE_COMPAT)) { + /* + printf(("%s outgoing interface check on incoming\n", + err_prefix)); + */ + return (EINVAL); + } + /* Sanity check port ranges */ + if ((frwl->fw_flg & IP_FW_F_SRNG_COMPAT) && IP_FW_GETNSRCP_COMPAT(frwl) < 2) { + /* + printf(("%s src range set but n_src_p=%d\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl))); + */ + return (EINVAL); + } + if ((frwl->fw_flg & IP_FW_F_DRNG_COMPAT) && IP_FW_GETNDSTP_COMPAT(frwl) < 2) { + /* + printf(("%s dst range set but n_dst_p=%d\n", + err_prefix, IP_FW_GETNDSTP_COMPAT(frwl))); + */ + return (EINVAL); + } + if (IP_FW_GETNSRCP_COMPAT(frwl) + IP_FW_GETNDSTP_COMPAT(frwl) > IP_FW_MAX_PORTS_COMPAT) { + /* + printf(("%s too many ports (%d+%d)\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl), IP_FW_GETNDSTP_COMPAT(frwl))); + */ + return (EINVAL); + } + /* + * Protocols other than TCP/UDP don't use port range + */ + if ((frwl->fw_prot != IPPROTO_TCP) && + (frwl->fw_prot != IPPROTO_UDP) && + (IP_FW_GETNSRCP_COMPAT(frwl) || IP_FW_GETNDSTP_COMPAT(frwl))) { + /* + printf(("%s port(s) specified for non TCP/UDP rule\n", + err_prefix)); + */ + return (EINVAL); + } + + /* + * Rather than modify the entry to make such entries work, + * we reject this rule and require user level utilities + * to enforce whatever policy they deem appropriate. + */ + if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) || + (frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) { + /* + printf(("%s rule never matches\n", err_prefix)); + */ + return (EINVAL); + } + + if ((frwl->fw_flg & IP_FW_F_FRAG_COMPAT) && + (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) { + if (frwl->fw_nports) { + /* + printf(("%s cannot mix 'frag' and ports\n", err_prefix)); + */ + return (EINVAL); + } + if (frwl->fw_prot == IPPROTO_TCP && + frwl->fw_tcpf != frwl->fw_tcpnf) { + /* + printf(("%s cannot mix 'frag' and TCP flags\n", err_prefix)); + */ + return (EINVAL); + } + } + + /* Check command specific stuff */ + switch (frwl->fw_flg & IP_FW_F_COMMAND_COMPAT) + { + case IP_FW_F_REJECT_COMPAT: + if (frwl->fw_reject_code_compat >= 0x100 + && !(frwl->fw_prot == IPPROTO_TCP + && frwl->fw_reject_code_compat == IP_FW_REJECT_RST_COMPAT)) { + /* + printf(("%s unknown reject code\n", err_prefix)); + */ + return (EINVAL); + } + break; + case IP_FW_F_DIVERT_COMPAT: /* Diverting to port zero is invalid */ + case IP_FW_F_TEE_COMPAT: + case IP_FW_F_PIPE_COMPAT: /* piping through 0 is invalid */ + case IP_FW_F_QUEUE_COMPAT: /* piping through 0 is invalid */ + if (frwl->fw_divert_port_compat == 0) { + /* + printf(("%s can't divert to port 0\n", err_prefix)); + */ + return (EINVAL); + } + break; + case IP_FW_F_DENY_COMPAT: + case IP_FW_F_ACCEPT_COMPAT: + case IP_FW_F_COUNT_COMPAT: + case IP_FW_F_SKIPTO_COMPAT: + case IP_FW_F_FWD_COMPAT: + case IP_FW_F_UID_COMPAT: + break; + default: + /* + printf(("%s invalid command\n", err_prefix)); + */ + return (EINVAL); + } + + return 0; +} + +static void +ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +{ + int k; + uint32_t actbuf[255], cmdbuf[255]; + ipfw_insn *action, *cmd, *src, *dst; + ipfw_insn *have_state = NULL, /* track check-state or keep-state */ + *end_action = NULL, + *end_cmd = NULL; + + if (!compat_rule || !curr_rule || !(curr_rule->cmd)) { + return; + } + + /* preemptively check the old ip_fw rule to + * make sure it's valid before starting to copy stuff + */ + if (ipfw_check_vers1_struct(compat_rule)) { + /* bad rule */ + return; + } + + bzero(actbuf, sizeof(actbuf)); /* actions go here */ + bzero(cmdbuf, sizeof(cmdbuf)); + + /* fill in action */ + action = (ipfw_insn *)actbuf; + { + u_int flag = compat_rule->fw_flg; + + action->len = 1; /* default */ + + if (flag & IP_FW_F_CHECK_S_COMPAT) { + have_state = action; + action->opcode = O_CHECK_STATE; + } + else { + switch (flag & IP_FW_F_COMMAND_COMPAT) { + case IP_FW_F_ACCEPT_COMPAT: + action->opcode = O_ACCEPT; + break; + case IP_FW_F_COUNT_COMPAT: + action->opcode = O_COUNT; + break; + case IP_FW_F_PIPE_COMPAT: + action->opcode = O_PIPE; + action->len = F_INSN_SIZE(ipfw_insn_pipe); + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_QUEUE_COMPAT: + action->opcode = O_QUEUE; + action->len = F_INSN_SIZE(ipfw_insn_pipe); + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_SKIPTO_COMPAT: + action->opcode = O_SKIPTO; + action->arg1 = compat_rule->fw_skipto_rule_compat; + break; + case IP_FW_F_DIVERT_COMPAT: + action->opcode = O_DIVERT; + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_TEE_COMPAT: + action->opcode = O_TEE; + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_FWD_COMPAT: + { + ipfw_insn_sa *p = (ipfw_insn_sa *)action; + + action->opcode = O_FORWARD_IP; + action->len = F_INSN_SIZE(ipfw_insn_sa); + + p->sa.sin_len = compat_rule->fw_fwd_ip_compat.sin_len; + p->sa.sin_family = compat_rule->fw_fwd_ip_compat.sin_family; + p->sa.sin_port = compat_rule->fw_fwd_ip_compat.sin_port; + p->sa.sin_addr = compat_rule->fw_fwd_ip_compat.sin_addr; + + break; + } + case IP_FW_F_DENY_COMPAT: + action->opcode = O_DENY; + action->arg1 = 0; + break; + case IP_FW_F_REJECT_COMPAT: + action->opcode = O_REJECT; + action->arg1 = compat_rule->fw_reject_code_compat; + break; + default: + action->opcode = O_NOP; + break; + } + } + + /* action is mandatory */ + if (action->opcode == O_NOP) { + return; + } + + action = next_cmd(action); + } /* end actions */ + + cmd = (ipfw_insn *)cmdbuf; + + /* this is O_CHECK_STATE, we're done */ + if (have_state) { + goto done; + } + + { + ipfw_insn *prev = NULL; + u_int flag = compat_rule->fw_flg; + + /* logging */ + if (flag & IP_FW_F_PRN_COMPAT) { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + + cmd->opcode = O_LOG; + cmd->len |= F_INSN_SIZE(ipfw_insn_log); + c->max_log = compat_rule->fw_logamount; + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* protocol */ + if (compat_rule->fw_prot != 0) { + fill_cmd(cmd, O_PROTO, compat_rule->fw_prot); + prev = cmd; + cmd = next_cmd(cmd); + } + + /* source */ + if (flag & IP_FW_F_SME_COMPAT) { + cmd->opcode = O_IP_SRC_ME; + cmd->len |= F_INSN_SIZE(ipfw_insn); + if (flag & IP_FW_F_INVSRC_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (compat_rule->fw_smsk.s_addr != 0) { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + ip->addr = compat_rule->fw_src; + ip->mask = compat_rule->fw_smsk; + cmd->opcode = O_IP_SRC_MASK; + cmd->len |= F_INSN_SIZE(ipfw_insn_ip); /* double check this */ + } else { + /* one IP */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + if (compat_rule->fw_src.s_addr == 0) { + /* any */ + cmd32->o.len &= ~F_LEN_MASK; /* zero len */ + } else { + cmd32->d[0] = compat_rule->fw_src.s_addr; + cmd32->o.opcode = O_IP_SRC; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + } + } + + if (flag & IP_FW_F_INVSRC_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + if (F_LEN(cmd) != 0) { /* !any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } + + /* source ports */ + { + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, j = 0, + nports = IP_FW_GETNSRCP_COMPAT(compat_rule), + have_range = 0; + + cmd->opcode = O_IP_SRCPORT; + for (i = 0; i < nports; i++) { + if (((flag & IP_FW_F_SRNG_COMPAT) || + (flag & IP_FW_F_SMSK_COMPAT)) && !have_range) { + p[0] = compat_rule->fw_uar_compat.fw_pts[i++]; + p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + have_range = 1; + } else { + p[0] = p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + } + p += 2; + j++; + } + + if (j > 0) { + ports->o.len |= j+1; /* leave F_NOT and F_OR untouched */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* destination */ + if (flag & IP_FW_F_DME_COMPAT) { + cmd->opcode = O_IP_DST_ME; + cmd->len |= F_INSN_SIZE(ipfw_insn); + if (flag & IP_FW_F_INVDST_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (compat_rule->fw_dmsk.s_addr != 0) { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + ip->addr = compat_rule->fw_dst; + ip->mask = compat_rule->fw_dmsk; + cmd->opcode = O_IP_DST_MASK; + cmd->len |= F_INSN_SIZE(ipfw_insn_ip); /* double check this */ + } else { + /* one IP */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + if (compat_rule->fw_dst.s_addr == 0) { + /* any */ + cmd32->o.len &= ~F_LEN_MASK; /* zero len */ + } else { + cmd32->d[0] = compat_rule->fw_dst.s_addr; + cmd32->o.opcode = O_IP_DST; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + } + } + + if (flag & IP_FW_F_INVDST_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + if (F_LEN(cmd) != 0) { /* !any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } + + /* dest. ports */ + { + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i = IP_FW_GETNSRCP_COMPAT(compat_rule), + j = 0, + nports = (IP_FW_GETNDSTP_COMPAT(compat_rule) + i), + have_range = 0; + + cmd->opcode = O_IP_DSTPORT; + for (; i < nports; i++, p += 2) { + if (((flag & IP_FW_F_DRNG_COMPAT) || + (flag & IP_FW_F_DMSK_COMPAT)) && !have_range) { + /* range */ + p[0] = compat_rule->fw_uar_compat.fw_pts[i++]; + p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + have_range = 1; + } else { + p[0] = p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + } + j++; + } + + if (j > 0) { + ports->o.len |= j+1; /* leave F_NOT and F_OR untouched */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } + + if (flag & IP_FW_F_UID_COMPAT) { + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + cmd32->o.opcode = O_UID; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + cmd32->d[0] = compat_rule->fw_uid; + + prev = cmd; + cmd = next_cmd(cmd); + } + + if (flag & IP_FW_F_KEEP_S_COMPAT) { + have_state = cmd; + fill_cmd(cmd, O_KEEP_STATE, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + if (flag & IP_FW_BRIDGED_COMPAT) { + fill_cmd(cmd, O_LAYER2, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + if ((flag & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + /* via */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_in_if; + + cmd->opcode = O_VIA; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (compat_rule->fw_flg & IP_FW_F_IIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (flag & IP_FW_F_IN_COMPAT) { + fill_cmd(cmd, O_IN, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + if (flag & IP_FW_F_OUT_COMPAT) { + /* if the previous command was O_IN, and this + * is being set as well, it's equivalent to not + * having either command, so let's back up prev + * to the cmd before it and move cmd to prev. + */ + if (prev->opcode == O_IN) { + cmd = prev; + bzero(cmd, sizeof(*cmd)); + } else { + cmd->len ^= F_NOT; /* toggle F_NOT */ + fill_cmd(cmd, O_IN, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + } + if (flag & IP_FW_F_OIFACE_COMPAT) { + /* xmit */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_out_if; + + cmd->opcode = O_XMIT; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (flag & IP_FW_F_OIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } + else if (flag & IP_FW_F_IIFACE_COMPAT) { + /* recv */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_in_if; + + cmd->opcode = O_RECV; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (flag & IP_FW_F_IIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } + } + + if (flag & IP_FW_F_FRAG_COMPAT) { + fill_cmd(cmd, O_FRAG, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* IP options */ + if (compat_rule->fw_ipopt != 0 || compat_rule->fw_ipnopt != 0) { + fill_cmd(cmd, O_IPOPT, (compat_rule->fw_ipopt & 0xff) | + (compat_rule->fw_ipnopt & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } + + if (compat_rule->fw_prot == IPPROTO_TCP) { + if (compat_rule->fw_ipflg & IP_FW_IF_TCPEST_COMPAT) { + fill_cmd(cmd, O_ESTAB, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* TCP options and flags */ + if (compat_rule->fw_tcpf != 0 || compat_rule->fw_tcpnf != 0) { + if ((compat_rule->fw_tcpf & IP_FW_TCPF_SYN_COMPAT) && + compat_rule->fw_tcpnf & IP_FW_TCPF_ACK_COMPAT) { + fill_cmd(cmd, O_TCPFLAGS, (TH_SYN) | ( (TH_ACK) & 0xff) <<8); + + prev = cmd; + cmd = next_cmd(cmd); + } + else { + fill_cmd(cmd, O_TCPFLAGS, (compat_rule->fw_tcpf & 0xff) | + (compat_rule->fw_tcpnf & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } + } + if (compat_rule->fw_tcpopt != 0 || compat_rule->fw_tcpnopt != 0) { + fill_cmd(cmd, O_TCPOPTS, (compat_rule->fw_tcpopt & 0xff) | + (compat_rule->fw_tcpnopt & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } + } + + /* ICMP */ + /* XXX: check this */ + if (flag & IP_FW_F_ICMPBIT_COMPAT) { + int i; + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + cmd32->o.opcode = O_ICMPTYPE; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + + for (i = 0; i < IP_FW_ICMPTYPES_DIM_COMPAT; i++) { + cmd32->d[0] |= compat_rule->fw_uar_compat.fw_icmptypes[i]; + } + + prev = cmd; + cmd = next_cmd(cmd); + } + } /* end commands */ + +done: + /* finally, copy everything into the current + * rule buffer in the right order. + */ + dst = curr_rule->cmd; + + /* first, do match probability */ + if (compat_rule->fw_flg & IP_FW_F_RND_MATCH_COMPAT) { + dst->opcode = O_PROB; + dst->len = 2; + *((int32_t *)(dst+1)) = compat_rule->pipe_ptr; + dst += dst->len; + } + + /* generate O_PROBE_STATE if necessary */ + if (have_state && have_state->opcode != O_CHECK_STATE) { + fill_cmd(dst, O_PROBE_STATE, 0); + dst = next_cmd(dst); + } + + /* + * copy all commands but O_LOG, O_KEEP_STATE + */ + for (src = (ipfw_insn *)cmdbuf; src != cmd; src += k) { + k = F_LEN(src); + + switch (src->opcode) { + case O_LOG: + case O_KEEP_STATE: + break; + default: + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; + } + } + + /* + * put back the have_state command as last opcode + */ + if (have_state && have_state->opcode != O_CHECK_STATE) { + k = F_LEN(have_state); + bcopy(have_state, dst, k * sizeof(uint32_t)); + dst += k; + } + + /* + * start action section + */ + curr_rule->act_ofs = dst - curr_rule->cmd; + + /* + * put back O_LOG if necessary + */ + src = (ipfw_insn *)cmdbuf; + if (src->opcode == O_LOG) { + k = F_LEN(src); + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; + } + + /* + * copy all other actions + */ + for (src = (ipfw_insn *)actbuf; src != action; src += k) { + k = F_LEN(src); + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; + } + + curr_rule->cmd_len = (uint32_t *)dst - (uint32_t *)(curr_rule->cmd); + + return; +} + +static int +ipfw_version_one_to_version_two(struct sockopt *sopt, struct ip_fw *curr_rule, + struct ip_fw_compat *rule_vers1) +{ + int err = EINVAL; + struct ip_fw_compat *rule_ptr; + struct ip_fw_compat rule; + + if (rule_vers1) { + rule_ptr = rule_vers1; + err = 0; + } else { + /* do some basic size checking here, more extensive checking later */ + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw_compat)) + return err; + + if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw_compat), + sizeof(struct ip_fw_compat)))) { + return err; + } + + rule_ptr = &rule; + } + + /* deal with commands */ + ipfw_convert_to_cmds(curr_rule, rule_ptr); + + curr_rule->version = IP_FW_CURRENT_API_VERSION; + curr_rule->context = rule_ptr->context; + curr_rule->rulenum = rule_ptr->fw_number; + curr_rule->pcnt = rule_ptr->fw_pcnt; + curr_rule->bcnt = rule_ptr->fw_bcnt; + curr_rule->timestamp = rule_ptr->timestamp; + + +#if FW2_DEBUG_VERBOSE + ipfw_print_vers2_struct(curr_rule); +#endif /* FW2_DEBUG_VERBOSE */ + + return err; +} + +/* This converts to whatever the latest version is. Currently the + * latest version of the firewall is ipfw2. + */ +static int +ipfw_version_one_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, struct ip_fw_compat *rule_vers1) +{ + int err; + + /* if rule_vers1 is not null then this is coming from + * ipfw_version_zero_to_latest(), so pass that along; + * otherwise let ipfw_version_one_to_version_two() + * get the rule from sopt. + */ + err = ipfw_version_one_to_version_two(sopt, curr_rule, rule_vers1); + + return err; +} + +static void +ipfw_version_zero_to_one(struct ip_old_fw *rule_vers0, struct ip_fw_compat *rule_vers1) +{ + bzero(rule_vers1, sizeof(struct ip_fw_compat)); + bcopy(&rule_vers0->fw_uar, &rule_vers1->fw_uar_compat, sizeof(rule_vers0->fw_uar)); + bcopy(&rule_vers0->fw_in_if, &rule_vers1->fw_in_if, sizeof(rule_vers0->fw_in_if)); + bcopy(&rule_vers0->fw_out_if, &rule_vers1->fw_out_if, sizeof(rule_vers0->fw_out_if)); + bcopy(&rule_vers0->fw_un, &rule_vers1->fw_un_compat, sizeof(rule_vers0->fw_un)); + + rule_vers1->version = 10; + rule_vers1->fw_pcnt = rule_vers0->fw_pcnt; + rule_vers1->fw_bcnt = rule_vers0->fw_bcnt; + rule_vers1->fw_src = rule_vers0->fw_src; + rule_vers1->fw_dst = rule_vers0->fw_dst; + rule_vers1->fw_smsk = rule_vers0->fw_smsk; + rule_vers1->fw_dmsk = rule_vers0->fw_dmsk; + rule_vers1->fw_number = rule_vers0->fw_number; + rule_vers1->fw_flg = rule_vers0->fw_flg; + rule_vers1->fw_ipopt = rule_vers0->fw_ipopt; + rule_vers1->fw_ipnopt = rule_vers0->fw_ipnopt; + rule_vers1->fw_tcpf = rule_vers0->fw_tcpf & ~IP_OLD_FW_TCPF_ESTAB; + rule_vers1->fw_tcpnf = rule_vers0->fw_tcpnf; + rule_vers1->timestamp = rule_vers0->timestamp; + rule_vers1->fw_prot = rule_vers0->fw_prot; + rule_vers1->fw_nports = rule_vers0->fw_nports; + rule_vers1->pipe_ptr = rule_vers0->pipe_ptr; + rule_vers1->next_rule_ptr = rule_vers0->next_rule_ptr; + rule_vers1->fw_ipflg = (rule_vers0->fw_tcpf & IP_OLD_FW_TCPF_ESTAB) ? IP_FW_IF_TCPEST_COMPAT : 0; +} + +/* first convert to version one, then to version two */ +static int +ipfw_version_zero_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule) +{ + int err; + struct ip_old_fw rule_vers0; + struct ip_fw_compat rule_vers1; + + if (sopt->sopt_name == IP_OLD_FW_GET || + sopt->sopt_name == IP_OLD_FW_FLUSH || + sopt->sopt_val == NULL) { + /* In the old-style API, it was legal to not pass in a rule + * structure for certain firewall operations (e.g. flush, + * reset log). If that's the situation, we pretend we received + * a blank structure. */ + bzero(curr_rule, sizeof(struct ip_fw)); + curr_rule->version = 10; + } + else { + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_old_fw)) { + return EINVAL; + } + + err = sooptcopyin(sopt, &rule_vers0, sizeof(struct ip_old_fw), + sizeof(struct ip_old_fw)); + if (err) { + return err; + } + + ipfw_version_zero_to_one(&rule_vers0, &rule_vers1); + } + + return (ipfw_version_one_to_latest(sopt, curr_rule, &rule_vers1)); +} + +/* rule is a u_int32_t buffer[255] into which the converted + * (if necessary) rules go. + */ +int +ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, int api_version) +{ + int err = 0; + + /* the following functions copy the rules passed in and + * convert to latest structures based on version + */ + switch (api_version) { + case IP_FW_VERSION_0: + /* this is the oldest version we support */ + err = ipfw_version_zero_to_latest(sopt, curr_rule); + break; + + case IP_FW_VERSION_1: + /* this is the version supported in Panther */ + err = ipfw_version_one_to_latest(sopt, curr_rule, NULL); + break; + + case IP_FW_CURRENT_API_VERSION: + /* IPFW2 for now */ + /* do nothing here... */ + break; + + default: + /* unrecognized/unsupported version */ + err = EINVAL; + break; + } + + return err; +} + +int +ipfw_get_command_and_version(struct sockopt *sopt, int *command, u_int32_t *api_version) +{ + int cmd; + int err = 0; + u_int32_t vers = IP_FW_VERSION_NONE; + + /* first deal with the oldest version */ + if (sopt->sopt_name == IP_OLD_FW_GET) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_GET; + } + else if (sopt->sopt_name == IP_OLD_FW_FLUSH) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_FLUSH; + } + else if (sopt->sopt_name == IP_OLD_FW_ZERO) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_ZERO; + } + else if (sopt->sopt_name == IP_OLD_FW_ADD) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_ADD; + } + else if (sopt->sopt_name == IP_OLD_FW_DEL) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_DEL; + } + else if (sopt->sopt_name == IP_OLD_FW_RESETLOG) { + vers = IP_FW_VERSION_0; + cmd = IP_FW_RESETLOG; + } + else { + cmd = sopt->sopt_name; + } + + if (vers == IP_FW_VERSION_NONE) { + /* working off the fact that the offset + * is the same in both structs. + */ + struct ip_fw rule; + + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw)) + return EINVAL; + + if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw), + sizeof(struct ip_fw)))) { + return err; + } + + vers = rule.version; + } + + if (command) { + *command = cmd; + } + + if (api_version) { + *api_version = vers; + } + + return err; +} + diff --git a/bsd/netinet/ip_fw2_compat.h b/bsd/netinet/ip_fw2_compat.h new file mode 100644 index 000000000..f0b7da0db --- /dev/null +++ b/bsd/netinet/ip_fw2_compat.h @@ -0,0 +1,375 @@ +/* IPFW backward compatibility */ + +#ifndef _IP_FW_COMPAT_H_ +#define _IP_FW_COMPAT_H_ + + +/* prototypes */ +void ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version); +int ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *rule, int api_version); +int ipfw_get_command_and_version(struct sockopt *sopt, int *command, u_int32_t *api_version); + + +/* + * ****************************** + * ****** IPFW version one ****** + * ****************************** + */ + +/* + * This union structure identifies an interface, either explicitly + * by name or implicitly by IP address. The flags IP_FW_F_IIFNAME + * and IP_FW_F_OIFNAME say how to interpret this structure. An + * interface unit number of -1 matches any unit number, while an + * IP address of 0.0.0.0 indicates matches any interface. + * + * The receive and transmit interfaces are only compared against the + * the packet if the corresponding bit (IP_FW_F_IIFACE or IP_FW_F_OIFACE) + * is set. Note some packets lack a receive or transmit interface + * (in which case the missing "interface" never matches). + */ + +union ip_fw_if_compat { + struct in_addr fu_via_ip; /* Specified by IP address */ + struct { /* Specified by interface name */ +#define FW_IFNLEN_COMPAT 10 /* need room ! was IFNAMSIZ */ + char name[FW_IFNLEN_COMPAT]; + short unit; /* -1 means match any unit */ + } fu_via_if_compat; +}; + +/* + * Format of an IP firewall descriptor + * + * fw_src, fw_dst, fw_smsk, fw_dmsk are always stored in network byte order. + * fw_flg and fw_n*p are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +struct ip_fw_compat { + u_int32_t version; /* Version of this structure. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION by clients. */ + void *context; /* Context that is usable by user processes to */ + /* identify this rule. */ + u_int64_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + u_short fw_number; /* Rule number */ + u_int fw_flg; /* Flags word */ +#define IP_FW_MAX_PORTS_COMPAT 10 /* A reasonable maximum */ + union { + u_short fw_pts[IP_FW_MAX_PORTS_COMPAT]; /* Array of port numbers to match */ +#define IP_FW_ICMPTYPES_MAX_COMPAT 128 +#define IP_FW_ICMPTYPES_DIM_COMPAT (IP_FW_ICMPTYPES_MAX_COMPAT / (sizeof(unsigned) * 8)) + unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM_COMPAT]; /* ICMP types bitmap */ + } fw_uar_compat; + u_int fw_ipflg; /* IP flags word */ + u_char fw_ipopt,fw_ipnopt; /* IP options set/unset */ + u_char fw_tcpopt,fw_tcpnopt; /* TCP options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ + long timestamp; /* timestamp (tv_sec) of last match */ + union ip_fw_if_compat fw_in_if, fw_out_if; /* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */ + u_short fu_pipe_nr; /* queue number (option DUMMYNET) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + struct sockaddr_in fu_fwd_ip; + } fw_un_compat; + u_char fw_prot; /* IP protocol */ + /* + * N'of src ports and # of dst ports in ports array (dst ports + * follow src ports; max of 10 ports in all; count of 0 means + * match all ports) + */ + u_char fw_nports; + void *pipe_ptr; /* flow_set ptr for dummynet pipe */ + void *next_rule_ptr ; /* next rule in case of match */ + uid_t fw_uid; /* uid to match */ + int fw_logamount; /* amount to log */ + u_int64_t fw_loghighest; /* highest number packet to log */ +}; + +/* + * extended ipfw structure... some fields in the original struct + * can be used to pass parameters up/down, namely pointers + * void *pipe_ptr + * void *next_rule_ptr + * some others can be used to pass parameters down, namely counters etc. + * u_int64_t fw_pcnt,fw_bcnt; + * long timestamp; + */ + +struct ip_fw_ext_compat { /* extended structure */ + struct ip_fw rule; /* must be at offset 0 */ + long dont_match_prob; /* 0x7fffffff means 1.0, always fail */ + u_int dyn_type; /* type for dynamic rule */ +}; + +struct ip_fw_chain_compat { + LIST_ENTRY(ip_fw_chain_compat) next; + struct ip_fw_compat *rule; +}; + +/* + * dynamic ipfw rule + */ +struct ipfw_dyn_rule_compat { + struct ipfw_dyn_rule *next ; + + struct ipfw_flow_id id ; + struct ipfw_flow_id mask ; + struct ip_fw_chain_compat *chain ; /* pointer to parent rule */ + u_int32_t type ; /* rule type */ + u_int32_t expire ; /* expire time */ + u_int64_t pcnt, bcnt; /* match counters */ + u_int32_t bucket ; /* which bucket in hash table */ + u_int32_t state ; /* state of this rule (typ. a */ + /* combination of TCP flags) */ +} ; + +#define IP_FW_GETNSRCP_COMPAT(rule) ((rule)->fw_nports & 0x0f) +#define IP_FW_SETNSRCP_COMPAT(rule, n) do { \ + (rule)->fw_nports &= ~0x0f; \ + (rule)->fw_nports |= (n); \ + } while (0) +#define IP_FW_GETNDSTP_COMPAT(rule) ((rule)->fw_nports >> 4) +#define IP_FW_SETNDSTP_COMPAT(rule, n) do { \ + (rule)->fw_nports &= ~0xf0; \ + (rule)->fw_nports |= (n) << 4;\ + } while (0) + +#define fw_divert_port_compat fw_un_compat.fu_divert_port +#define fw_skipto_rule_compat fw_un_compat.fu_skipto_rule +#define fw_reject_code_compat fw_un_compat.fu_reject_code +#define fw_pipe_nr_compat fw_un_compat.fu_pipe_nr +#define fw_fwd_ip_compat fw_un_compat.fu_fwd_ip + +/* + * Values for "flags" field . + */ +#define IP_FW_F_COMMAND_COMPAT 0x000000ff /* Mask for type of chain entry: */ +#define IP_FW_F_DENY_COMPAT 0x00000000 /* This is a deny rule */ +#define IP_FW_F_REJECT_COMPAT 0x00000001 /* Deny and send a response packet */ +#define IP_FW_F_ACCEPT_COMPAT 0x00000002 /* This is an accept rule */ +#define IP_FW_F_COUNT_COMPAT 0x00000003 /* This is a count rule */ +#define IP_FW_F_DIVERT_COMPAT 0x00000004 /* This is a divert rule */ +#define IP_FW_F_TEE_COMPAT 0x00000005 /* This is a tee rule */ +#define IP_FW_F_SKIPTO_COMPAT 0x00000006 /* This is a skipto rule */ +#define IP_FW_F_FWD_COMPAT 0x00000007 /* This is a "change forwarding address" rule */ +#define IP_FW_F_PIPE_COMPAT 0x00000008 /* This is a dummynet rule */ +#define IP_FW_F_QUEUE_COMPAT 0x00000009 /* This is a dummynet queue */ + +#define IP_FW_F_IN_COMPAT 0x00000100 /* Check inbound packets */ +#define IP_FW_F_OUT_COMPAT 0x00000200 /* Check outbound packets */ +#define IP_FW_F_IIFACE_COMPAT 0x00000400 /* Apply inbound interface test */ +#define IP_FW_F_OIFACE_COMPAT 0x00000800 /* Apply outbound interface test */ + +#define IP_FW_F_PRN_COMPAT 0x00001000 /* Print if this rule matches */ + +#define IP_FW_F_SRNG_COMPAT 0x00002000 /* The first two src ports are a min * + * and max range (stored in host byte * + * order). */ + +#define IP_FW_F_DRNG_COMPAT 0x00004000 /* The first two dst ports are a min * + * and max range (stored in host byte * + * order). */ + +#define IP_FW_F_FRAG_COMPAT 0x00008000 /* Fragment */ + +#define IP_FW_F_IIFNAME_COMPAT 0x00010000 /* In interface by name/unit (not IP) */ +#define IP_FW_F_OIFNAME_COMPAT 0x00020000 /* Out interface by name/unit (not IP) */ + +#define IP_FW_F_INVSRC_COMPAT 0x00040000 /* Invert sense of src check */ +#define IP_FW_F_INVDST_COMPAT 0x00080000 /* Invert sense of dst check */ + +#define IP_FW_F_ICMPBIT_COMPAT 0x00100000 /* ICMP type bitmap is valid */ + +#define IP_FW_F_UID_COMPAT 0x00200000 /* filter by uid */ + +#define IP_FW_F_RND_MATCH_COMPAT 0x00800000 /* probabilistic rule match */ +#define IP_FW_F_SMSK_COMPAT 0x01000000 /* src-port + mask */ +#define IP_FW_F_DMSK_COMPAT 0x02000000 /* dst-port + mask */ +#define IP_FW_BRIDGED_COMPAT 0x04000000 /* only match bridged packets */ +#define IP_FW_F_KEEP_S_COMPAT 0x08000000 /* keep state */ +#define IP_FW_F_CHECK_S_COMPAT 0x10000000 /* check state */ + +#define IP_FW_F_SME_COMPAT 0x20000000 /* source = me */ +#define IP_FW_F_DME_COMPAT 0x40000000 /* destination = me */ + +#define IP_FW_F_MASK_COMPAT 0x7FFFFFFF /* All possible flag bits mask */ + +/* + * Flags for the 'fw_ipflg' field, for comparing values of ip and its protocols. + */ +#define IP_FW_IF_TCPEST_COMPAT 0x00000020 /* established TCP connection */ +#define IP_FW_IF_TCPMSK_COMPAT 0x00000020 /* mask of all TCP values */ + +/* + * Definitions for TCP flags. + */ +#define IP_FW_TCPF_FIN_COMPAT TH_FIN +#define IP_FW_TCPF_SYN_COMPAT TH_SYN +#define IP_FW_TCPF_RST_COMPAT TH_RST +#define IP_FW_TCPF_PSH_COMPAT TH_PUSH +#define IP_FW_TCPF_ACK_COMPAT TH_ACK +#define IP_FW_TCPF_URG_COMPAT TH_URG + +/* + * For backwards compatibility with rules specifying "via iface" but + * not restricted to only "in" or "out" packets, we define this combination + * of bits to represent this configuration. + */ + +#define IF_FW_F_VIAHACK_COMPAT (IP_FW_F_IN_COMPAT|IP_FW_F_OUT_COMPAT|IP_FW_F_IIFACE_COMPAT|IP_FW_F_OIFACE_COMPAT) + +/* + * Definitions for REJECT response codes. + * Values less than 256 correspond to ICMP unreachable codes. + */ +#define IP_FW_REJECT_RST_COMPAT 0x0100 /* TCP packets: send RST */ + + +/* + * ****************************** + * ****** IPFW version zero ***** + * ****************************** + */ + +/* + * This union structure identifies an interface, either explicitly + * by name or implicitly by IP address. The flags IP_FW_F_IIFNAME + * and IP_FW_F_OIFNAME say how to interpret this structure. An + * interface unit number of -1 matches any unit number, while an + * IP address of 0.0.0.0 indicates matches any interface. + * + * The receive and transmit interfaces are only compared against the + * the packet if the corresponding bit (IP_FW_F_IIFACE or IP_FW_F_OIFACE) + * is set. Note some packets lack a receive or transmit interface + * (in which case the missing "interface" never matches). + */ + +union ip_old_fw_if { + struct in_addr fu_via_ip; /* Specified by IP address */ + struct { /* Specified by interface name */ +#define OLD_FW_IFNLEN 10 /* need room ! was IFNAMSIZ */ + char name[OLD_FW_IFNLEN]; + short unit; /* -1 means match any unit */ + } fu_via_if; +}; + +/* + * Format of an IP firewall descriptor + * + * fw_src, fw_dst, fw_smsk, fw_dmsk are always stored in network byte order. + * fw_flg and fw_n*p are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + * Warning: setsockopt() will fail if sizeof(struct ip_fw) > MLEN (108) + */ + +struct ip_old_fw { + u_int64_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk; /* Mask for src and dest IP addr */ + u_short fw_number; /* Rule number */ + u_int fw_flg; /* Flags word */ +#define IP_OLD_FW_MAX_PORTS 10 /* A reasonable maximum */ + union { + u_short fw_pts[IP_OLD_FW_MAX_PORTS]; /* Array of port numbers to match */ +#define IP_OLD_FW_ICMPTYPES_MAX 128 +#define IP_OLD_FW_ICMPTYPES_DIM (IP_OLD_FW_ICMPTYPES_MAX / (sizeof(unsigned) * 8)) + unsigned fw_icmptypes[IP_OLD_FW_ICMPTYPES_DIM]; /* ICMP types bitmap */ + } fw_uar; + u_char fw_ipopt,fw_ipnopt; /* IP options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ + long timestamp; /* timestamp (tv_sec) of last match */ + union ip_old_fw_if fw_in_if, fw_out_if; /* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */ + u_short fu_pipe_nr; /* pipe number (option DUMMYNET) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + struct sockaddr_in fu_fwd_ip; + } fw_un; + u_char fw_prot; /* IP protocol */ + u_char fw_nports; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ + void *pipe_ptr; /* Pipe ptr in case of dummynet pipe */ + void *next_rule_ptr ; /* next rule in case of match */ +}; + +#define IP_OLD_FW_GETNSRCP(rule) ((rule)->fw_nports & 0x0f) +#define IP_OLD_FW_SETNSRCP(rule, n) do { \ + (rule)->fw_nports &= ~0x0f; \ + (rule)->fw_nports |= (n); \ + } while (0) +#define IP_OLD_FW_GETNDSTP(rule) ((rule)->fw_nports >> 4) +#define IP_OLD_FW_SETNDSTP(rule, n) do { \ + (rule)->fw_nports &= ~0xf0; \ + (rule)->fw_nports |= (n) << 4;\ + } while (0) + +#define old_fw_divert_port fw_un.fu_divert_port +#define old_fw_skipto_rule fw_un.fu_skipto_rule +#define old_fw_reject_code fw_un.fu_reject_code +#define old_fw_pipe_nr fw_un.fu_pipe_nr +#define old_fw_fwd_ip fw_un.fu_fwd_ip + +/* + * Values for "flags" field . + */ +#define IP_OLD_FW_F_COMMAND 0x000000ff /* Mask for type of chain entry: */ +#define IP_OLD_FW_F_DENY 0x00000000 /* This is a deny rule */ +#define IP_OLD_FW_F_REJECT 0x00000001 /* Deny and send a response packet */ +#define IP_OLD_FW_F_ACCEPT 0x00000002 /* This is an accept rule */ +#define IP_OLD_FW_F_COUNT 0x00000003 /* This is a count rule */ +#define IP_OLD_FW_F_DIVERT 0x00000004 /* This is a divert rule */ +#define IP_OLD_FW_F_TEE 0x00000005 /* This is a tee rule */ +#define IP_OLD_FW_F_SKIPTO 0x00000006 /* This is a skipto rule */ +#define IP_OLD_FW_F_FWD 0x00000007 /* This is a "change forwarding address" rule */ +#define IP_OLD_FW_F_PIPE 0x00000008 /* This is a dummynet rule */ + +#define IP_OLD_FW_F_IN 0x00000100 /* Check inbound packets */ +#define IP_OLD_FW_F_OUT 0x00000200 /* Check outbound packets */ +#define IP_OLD_FW_F_IIFACE 0x00000400 /* Apply inbound interface test */ +#define IP_OLD_FW_F_OIFACE 0x00000800 /* Apply outbound interface test */ + +#define IP_OLD_FW_F_PRN 0x00001000 /* Print if this rule matches */ + +#define IP_OLD_FW_F_SRNG 0x00002000 /* The first two src ports are a min * + * and max range (stored in host byte * + * order). */ + +#define IP_OLD_FW_F_DRNG 0x00004000 /* The first two dst ports are a min * + * and max range (stored in host byte * + * order). */ + +#define IP_OLD_FW_F_FRAG 0x00008000 /* Fragment */ + +#define IP_OLD_FW_F_IIFNAME 0x00010000 /* In interface by name/unit (not IP) */ +#define IP_OLD_FW_F_OIFNAME 0x00020000 /* Out interface by name/unit (not IP) */ + +#define IP_OLD_FW_F_INVSRC 0x00040000 /* Invert sense of src check */ +#define IP_OLD_FW_F_INVDST 0x00080000 /* Invert sense of dst check */ + +#define IP_OLD_FW_F_ICMPBIT 0x00100000 /* ICMP type bitmap is valid */ + +#define IP_OLD_FW_F_MASK 0x001FFFFF /* All possible flag bits mask */ + +/* + * For backwards compatibility with rules specifying "via iface" but + * not restricted to only "in" or "out" packets, we define this combination + * of bits to represent this configuration. + */ + +#define IF_OLD_FW_F_VIAHACK (IP_OLD_FW_F_IN|IP_OLD_FW_F_OUT|IP_OLD_FW_F_IIFACE|IP_OLD_FW_F_OIFACE) + +/* + * Definitions for TCP flags - abridged + */ +#define IP_OLD_FW_TCPF_ESTAB 0x40 + + +#endif /* _IP_FW_COMPAT_H_ */ diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 82ba9a44f..d6fbacb09 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -148,9 +148,9 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, int icmpprintfs = 0; #endif -static void icmp_reflect __P((struct mbuf *)); -static void icmp_send __P((struct mbuf *, struct mbuf *)); -static int ip_next_mtu __P((int, int)); +static void icmp_reflect(struct mbuf *); +static void icmp_send(struct mbuf *, struct mbuf *); +static int ip_next_mtu(int, int); extern struct protosw inetsw[]; @@ -159,11 +159,12 @@ extern struct protosw inetsw[]; * in response to bad packet ip. */ void -icmp_error(n, type, code, dest, destifp) - struct mbuf *n; - int type, code; - n_long dest; - struct ifnet *destifp; +icmp_error( + struct mbuf *n, + int type, + int code, + n_long dest, + struct ifnet *destifp) { register struct ip *oip = mtod(n, struct ip *), *nip; register unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2; @@ -279,8 +280,9 @@ icmp_input(m, hlen) int icmplen = ip->ip_len; register int i; struct in_ifaddr *ia; - void (*ctlfunc) __P((int, struct sockaddr *, void *)); + void (*ctlfunc)(int, struct sockaddr *, void *); int code; + char ipv4str[MAX_IPv4_STR_LEN]; /* * Locate icmp structure in mbuf, and check @@ -288,10 +290,12 @@ icmp_input(m, hlen) */ #if ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(ip->ip_src)); + char buf[MAX_IPv4_STR_LEN]; + printf("icmp_input from %s to %s, len %d\n", - buf, inet_ntoa(ip->ip_dst), icmplen); + inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)), + inet_ntop(AF_INET, &ip->ip_dst, ipv4str, sizeof(ipv4str)), + icmplen); } #endif if (icmplen < ICMP_MINLEN) { @@ -446,7 +450,9 @@ icmp_input(m, hlen) 1); #if DEBUG_MTUDISC printf("MTU for %s reduced to %d\n", - inet_ntoa(icmpsrc.sin_addr), mtu); + inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str, + sizeof(ipv4str)), + mtu); #endif if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) { /* rt->rt_rmx.rmx_mtu = @@ -537,8 +543,11 @@ icmp_input(m, hlen) (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); if (ia == 0) break; - if (ia->ia_ifp == 0) + if (ia->ia_ifp == 0) { + ifafree(&ia->ia_ifa); + ia = 0; break; + } icp->icmp_type = ICMP_MASKREPLY; icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; if (ip->ip_src.s_addr == 0) { @@ -547,6 +556,7 @@ icmp_input(m, hlen) else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; } + ifafree(&ia->ia_ifa); reflect: ip->ip_len += hlen; /* since ip_input deducts this */ icmpstat.icps_reflect++; @@ -590,11 +600,12 @@ reflect: icmpdst.sin_addr = icp->icmp_gwaddr; #if ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst)); + char buf[MAX_IPv4_STR_LEN]; printf("redirect dst %s to %s\n", - buf, inet_ntoa(icp->icmp_gwaddr)); + inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, buf, sizeof(buf)), + inet_ntop(AF_INET, &icp->icmp_gwaddr, ipv4str, + sizeof(ipv4str))); } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; @@ -657,6 +668,7 @@ icmp_reflect(m) * or anonymous), use the address which corresponds * to the incoming interface. */ + lck_mtx_lock(rt_mtx); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) break; @@ -664,6 +676,8 @@ icmp_reflect(m) t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) break; } + if (ia) + ifaref(&ia->ia_ifa); icmpdst.sin_addr = t; if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif) ia = (struct in_ifaddr *)ifaof_ifpforaddr( @@ -672,11 +686,16 @@ icmp_reflect(m) * The following happens if the packet was not addressed to us, * and was received on an interface with no IP address. */ - if (ia == (struct in_ifaddr *)0) + if (ia == (struct in_ifaddr *)0) { ia = in_ifaddrhead.tqh_first; + ifaref(&ia->ia_ifa); + } + lck_mtx_unlock(rt_mtx); t = IA_SIN(ia)->sin_addr; ip->ip_src = t; ip->ip_ttl = ip_defttl; + ifafree(&ia->ia_ifa); + ia = NULL; if (optlen > 0) { register u_char *cp; @@ -770,6 +789,7 @@ icmp_send(m, opts) register int hlen; register struct icmp *icp; struct route ro; + char ipv4str[MAX_IPv4_STR_LEN]; hlen = IP_VHL_HL(ip->ip_vhl) << 2; m->m_data += hlen; @@ -779,16 +799,17 @@ icmp_send(m, opts) icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); m->m_data -= hlen; m->m_len += hlen; - m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.rcvif = 0; m->m_pkthdr.aux = NULL; m->m_pkthdr.csum_data = 0; m->m_pkthdr.csum_flags = 0; #if ICMPPRINTFS if (icmpprintfs) { - char buf[4 * sizeof "123"]; - strcpy(buf, inet_ntoa(ip->ip_dst)); + char buf[MAX_IPv4_STR_LEN]; + printf("icmp_send dst %s src %s\n", - buf, inet_ntoa(ip->ip_src)); + inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)), + inet_ntop(AF_INET, &ip->ip_src, ipv4str, sizeof(ipv4str))); } #endif bzero(&ro, sizeof ro); @@ -893,7 +914,7 @@ badport_bandlim(int which) if (icmplim <= 0 || which > BANDLIM_MAX || which < 0) return(0); - getmicrotime(&time); + getmicrouptime(&time); secs = time.tv_sec - lticks[which].tv_sec ; @@ -959,7 +980,7 @@ __private_extern__ struct pr_usrreqs icmp_dgram_usrreqs = { pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, icmp_dgram_send, pru_sense_null, rip_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; /* Like rip_attach but without root privilege enforcement */ @@ -1059,8 +1080,7 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n /* Only IPv4 */ if (IP_VHL_V(ip->ip_vhl) != 4) goto bad; - if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len || - ip->ip_len > 65535) + if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len) goto bad; /* Bogus fragments can tie up peer resources */ if (ip->ip_off != 0) @@ -1070,12 +1090,22 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n goto bad; /* To prevent spoofing, specified source address must be one of ours */ if (ip->ip_src.s_addr != INADDR_ANY) { - if (TAILQ_EMPTY(&in_ifaddrhead)) + socket_unlock(so, 0); + lck_mtx_lock(rt_mtx); + if (TAILQ_EMPTY(&in_ifaddrhead)) { + lck_mtx_unlock(rt_mtx); + socket_lock(so, 0); goto bad; + } TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { - if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) + if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) { + lck_mtx_unlock(rt_mtx); + socket_lock(so, 0); goto ours; + } } + lck_mtx_unlock(rt_mtx); + socket_lock(so, 0); goto bad; } ours: diff --git a/bsd/netinet/ip_icmp.h b/bsd/netinet/ip_icmp.h index 22e119eb1..8aff81cb0 100644 --- a/bsd/netinet/ip_icmp.h +++ b/bsd/netinet/ip_icmp.h @@ -206,11 +206,9 @@ struct icmp { (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -void icmp_error __P((struct mbuf *, int, int, n_long, struct ifnet *)); -void icmp_input __P((struct mbuf *, int)); -#endif /* __APPLE_API_PRIVATE */ -#endif +#ifdef KERNEL_PRIVATE +void icmp_error(struct mbuf *, int, int, n_long, struct ifnet *); +void icmp_input(struct mbuf *, int); +#endif KERNEL_PRIVATE #endif diff --git a/bsd/netinet/ip_id.c b/bsd/netinet/ip_id.c index 4e7e70980..42630415b 100644 --- a/bsd/netinet/ip_id.c +++ b/bsd/netinet/ip_id.c @@ -87,9 +87,9 @@ static u_int16_t ru_msb = 0; static long ru_reseed; static u_int32_t tmp; /* Storage for unused random */ -static u_int16_t pmod __P((u_int16_t, u_int16_t, u_int16_t)); -static void ip_initid __P((void)); -u_int16_t ip_randomid __P((void)); +static u_int16_t pmod(u_int16_t, u_int16_t, u_int16_t); +static void ip_initid(void); +u_int16_t ip_randomid(void); /* * Do a fast modular exponation, returned value will be in the range @@ -135,7 +135,7 @@ ip_initid(void) int noprime = 1; struct timeval time; - getmicrotime(&time); + getmicrouptime(&time); read_random((void *) &tmp, sizeof(tmp)); ru_x = (tmp & 0xFFFF) % RU_M; @@ -186,7 +186,7 @@ ip_randomid(void) int i, n; struct timeval time; - getmicrotime(&time); + getmicrouptime(&time); if (ru_counter >= RU_MAX || time.tv_sec > ru_reseed) ip_initid(); diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index 5ac01c9e1..4a219f43c 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -70,12 +70,13 @@ #include #include +#include #include #include #include #include -#include +#include #include #include @@ -87,6 +88,9 @@ #include #include +#include + +#include /* needed for AUTOCONFIGURING: */ #include @@ -116,6 +120,7 @@ #if IPSEC extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif int rsvp_on = 0; @@ -149,7 +154,7 @@ SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); static int nipq = 0; /* total # of reass queues */ -static int maxnipq = 0; +static int maxnipq; SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW, &maxnipq, 0, "Maximum number of IPv4 fragment reassembly queue entries"); @@ -159,6 +164,12 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, &maxfragsperpacket, 0, "Maximum number of IPv4 fragments allowed per packet"); +static int maxfrags; +SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW, + &maxfrags, 0, "Maximum number of IPv4 fragments allowed"); + +static int currentfrags = 0; + /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table @@ -203,7 +214,15 @@ SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD, (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) static struct ipq ipq[IPREASS_NHASH]; +static TAILQ_HEAD(ipq_list, ipq) ipq_list = + TAILQ_HEAD_INITIALIZER(ipq_list); const int ipintrq_present = 1; +lck_mtx_t *ip_mutex; +lck_attr_t *ip_mutex_attr; +lck_grp_t *ip_mutex_grp; +lck_grp_attr_t *ip_mutex_grp_attr; +lck_mtx_t *inet_domain_mutex; +extern lck_mtx_t *domain_proto_mtx; #if IPCTL_DEFMTU SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, @@ -219,14 +238,14 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, /* Firewall hooks */ ip_fw_chk_t *ip_fw_chk_ptr; -ip_fw_ctl_t *ip_fw_ctl_ptr; int fw_enable = 1 ; +int fw_one_pass = 1; #if DUMMYNET -ip_dn_ctl_t *ip_dn_ctl_ptr; +ip_dn_io_t *ip_dn_io_ptr; #endif -int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)) = NULL; +int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **) = NULL; SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW, 0, "link local"); @@ -237,7 +256,7 @@ SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD, SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW, 0, "link local input"); -int ip_linklocal_in_allowbadttl = 0; +int ip_linklocal_in_allowbadttl = 1; SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW, &ip_linklocal_in_allowbadttl, 0, "Allow incoming link local packets with TTL less than 255"); @@ -258,29 +277,26 @@ static struct ip_srcrt { struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; } ip_srcrt; -struct sockaddr_in *ip_fw_fwd_addr; - #ifdef __APPLE__ extern struct mbuf* m_dup(register struct mbuf *m, int how); #endif -static void save_rte __P((u_char *, struct in_addr)); -static int ip_dooptions __P((struct mbuf *)); -static void ip_forward __P((struct mbuf *, int)); -static void ip_freef __P((struct ipq *)); +static void save_rte(u_char *, struct in_addr); +static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *, struct route *ipforward_rt); +static void ip_forward(struct mbuf *, int, struct sockaddr_in *, struct route *ipforward_rt); +static void ip_freef(struct ipq *); #if IPDIVERT #ifdef IPDIVERT_44 -static struct mbuf *ip_reass __P((struct mbuf *, - struct ipq *, struct ipq *, u_int32_t *, u_int16_t *)); +static struct mbuf *ip_reass(struct mbuf *, + struct ipq *, struct ipq *, u_int32_t *, u_int16_t *); #else -static struct mbuf *ip_reass __P((struct mbuf *, - struct ipq *, struct ipq *, u_int16_t *, u_int16_t *)); +static struct mbuf *ip_reass(struct mbuf *, + struct ipq *, struct ipq *, u_int16_t *, u_int16_t *); #endif #else -static struct mbuf *ip_reass __P((struct mbuf *, struct ipq *, struct ipq *)); +static struct mbuf *ip_reass(struct mbuf *, struct ipq *, struct ipq *); #endif -static struct in_ifaddr *ip_rtaddr __P((struct in_addr)); -void ipintr __P((void)); +void ipintr(void); #if RANDOM_IP_ID extern u_short ip_id; @@ -299,11 +315,13 @@ ip_init() register struct protosw *pr; register int i; static ip_initialized = 0; + struct timeval timenow; + if (!ip_initialized) { TAILQ_INIT(&in_ifaddrhead); - pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); + pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); for (i = 0; i < IPPROTO_MAX; i++) @@ -318,16 +336,43 @@ ip_init() ipq[i].next = ipq[i].prev = &ipq[i]; maxnipq = nmbclusters / 32; - maxfragsperpacket = 16; + maxfrags = maxnipq * 2; + maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */ #if RANDOM_IP_ID - ip_id = time_second & 0xffff; + getmicrouptime(&timenow); + ip_id = timenow.tv_sec & 0xffff; #endif ipintrq.ifq_maxlen = ipqmaxlen; + + ipf_init(); + + ip_mutex_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(ip_mutex_grp_attr); + + ip_mutex_grp = lck_grp_alloc_init("ip", ip_mutex_grp_attr); + + ip_mutex_attr = lck_attr_alloc_init(); + + lck_attr_setdefault(ip_mutex_attr); + + if ((ip_mutex = lck_mtx_alloc_init(ip_mutex_grp, ip_mutex_attr)) == NULL) { + printf("ip_init: can't alloc ip_mutex\n"); + return; + } + ip_initialized = 1; } } +static void +ip_proto_input( + protocol_family_t protocol, + mbuf_t packet) +{ + ip_input(packet); +} + /* Initialize the PF_INET domain, and add in the pre-defined protos */ void in_dinit() @@ -341,15 +386,90 @@ in_dinit() { kprintf("Initing %d protosw entries\n", in_proto_count); dp = &inetdomain; + dp->dom_flags = DOM_REENTRANT; for (i=0, pr = &inetsw[0]; idom_mtx; inetdomain_initted = 1; + + lck_mtx_unlock(domain_proto_mtx); + proto_register_input(PF_INET, ip_proto_input, NULL); + lck_mtx_lock(domain_proto_mtx); } } +__private_extern__ void +ip_proto_dispatch_in( + struct mbuf *m, + int hlen, + u_int8_t proto, + ipfilter_t inject_ipfref) +{ + struct ipfilter *filter; + int seen = (inject_ipfref == 0); + int changed_header = 0; + struct ip *ip; + + if (!TAILQ_EMPTY(&ipv4_filters)) { + ipf_ref(); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_ipfref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_input) { + errno_t result; + + if (changed_header == 0) { + changed_header = 1; + ip = mtod(m, struct ip *); + ip->ip_len = htons(ip->ip_len + hlen); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); + } + result = filter->ipf_filter.ipf_input( + filter->ipf_filter.cookie, (mbuf_t*)&m, hlen, proto); + if (result == EJUSTRETURN) { + ipf_unref(); + return; + } + if (result != 0) { + ipf_unref(); + m_freem(m); + return; + } + } + } + ipf_unref(); + } + /* + * If there isn't a specific lock for the protocol + * we're about to call, use the generic lock for AF_INET. + * otherwise let the protocol deal with its own locking + */ + ip = mtod(m, struct ip *); + + if (changed_header) { + ip->ip_len = ntohs(ip->ip_len) - hlen; + ip->ip_off = ntohs(ip->ip_off); + } + + if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) { + lck_mtx_lock(inet_domain_mutex); + (*ip_protox[ip->ip_p]->pr_input)(m, hlen); + lck_mtx_unlock(inet_domain_mutex); + } + else + (*ip_protox[ip->ip_p]->pr_input)(m, hlen); + +} + +/* + * ipforward_rt cleared in in_addroute() + * when a new route is successfully created. + */ static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; -static struct route ipforward_rt; /* * Ip input routine. Checksum and byte swap header. If fragmented @@ -363,41 +483,77 @@ ip_input(struct mbuf *m) struct in_ifaddr *ia = NULL; int i, hlen, mff, checkif; u_short sum; - u_int16_t divert_cookie; /* firewall cookie */ struct in_addr pkt_dst; -#if IPDIVERT - u_int16_t divert_info = 0; /* packet divert/tee info */ -#endif - struct ip_fw_chain *rule = NULL; + u_int32_t div_info = 0; /* packet divert/tee info */ + struct ip_fw_args args; + ipfilter_t inject_filter_ref = 0; + struct m_tag *tag; + struct route ipforward_rt = { 0 }; + + lck_mtx_lock(ip_mutex); + + args.eh = NULL; + args.oif = NULL; + args.rule = NULL; + args.divert_rule = 0; /* divert cookie */ + args.next_hop = NULL; + + /* Grab info from mtags prepended to the chain */ +#if DUMMYNET + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { + struct dn_pkt_tag *dn_tag; + + dn_tag = (struct dn_pkt_tag *)(tag+1); + args.rule = dn_tag->rule; + + m_tag_delete(m, tag); + } +#endif /* DUMMYNET */ -#if IPDIVERT - /* Get and reset firewall cookie */ - divert_cookie = ip_divert_cookie; - ip_divert_cookie = 0; -#else - divert_cookie = 0; -#endif + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { + struct divert_tag *div_tag; + + div_tag = (struct divert_tag *)(tag+1); + args.divert_rule = div_tag->cookie; -#if IPFIREWALL && DUMMYNET - /* - * dummynet packet are prepended a vestigial mbuf with - * m_type = MT_DUMMYNET and m_data pointing to the matching - * rule. - */ - if (m->m_type == MT_DUMMYNET) { - rule = (struct ip_fw_chain *)(m->m_data) ; - m = m->m_next ; - ip = mtod(m, struct ip *); - hlen = IP_VHL_HL(ip->ip_vhl) << 2; - goto iphack ; - } else - rule = NULL ; -#endif + m_tag_delete(m, tag); + } + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { + struct ip_fwd_tag *ipfwd_tag; + + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); + args.next_hop = ipfwd_tag->next_hop; + m_tag_delete(m, tag); + } + #if DIAGNOSTIC if (m == NULL || (m->m_flags & M_PKTHDR) == 0) panic("ip_input no HDR"); #endif + + if (args.rule) { /* dummynet already filtered us */ + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + inject_filter_ref = ipf_get_inject_filter(m); + goto iphack ; + } + + /* + * No need to proccess packet twice if we've + * already seen it + */ + inject_filter_ref = ipf_get_inject_filter(m); + if (inject_filter_ref != 0) { + lck_mtx_unlock(ip_mutex); + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + ip->ip_len = ntohs(ip->ip_len) - hlen; + ip->ip_off = ntohs(ip->ip_off); + ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref); + return; + } + ipstat.ips_total++; if (m->m_pkthdr.len < sizeof(struct ip)) @@ -406,6 +562,7 @@ ip_input(struct mbuf *m) if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; + lck_mtx_unlock(ip_mutex); return; } ip = mtod(m, struct ip *); @@ -426,6 +583,7 @@ ip_input(struct mbuf *m) if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { ipstat.ips_badhlen++; + lck_mtx_unlock(ip_mutex); return; } ip = mtod(m, struct ip *); @@ -447,14 +605,15 @@ ip_input(struct mbuf *m) if (ip->ip_ttl != MAXTTL) { ip_linklocal_stat.iplls_in_badttl++; /* Silently drop link local traffic with bad TTL */ - if (ip_linklocal_in_allowbadttl != 0) + if (!ip_linklocal_in_allowbadttl) goto bad; } } if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0) || (apple_hwcksum_rx == 0) || - ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) - m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */ + ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) { + m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */ + } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); @@ -526,55 +685,57 @@ iphack: if (fr_checkp) { struct mbuf *m1 = m; - if ((*fr_checkp)(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) + if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) return; ip = mtod(m = m1, struct ip *); } - if (fw_enable && ip_fw_chk_ptr) { + if (fw_enable && IPFW_LOADED) { #if IPFIREWALL_FORWARD /* * If we've been forwarded from the output side, then * skip the firewall a second time */ - if (ip_fw_fwd_addr) + if (args.next_hop) goto ours; #endif /* IPFIREWALL_FORWARD */ - /* - * See the comment in ip_output for the return values - * produced by the firewall. - */ - i = (*ip_fw_chk_ptr)(&ip, - hlen, NULL, &divert_cookie, &m, &rule, &ip_fw_fwd_addr); + + args.m = m; + i = ip_fw_chk_ptr(&args); + m = args.m; + if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ - if (m) - m_freem(m); + if (m) + m_freem(m); + lck_mtx_unlock(ip_mutex); return; - } + } ip = mtod(m, struct ip *); /* just in case m changed */ - if (i == 0 && ip_fw_fwd_addr == NULL) /* common case */ + if (i == 0 && args.next_hop == NULL) /* common case */ goto pass; #if DUMMYNET - if ((i & IP_FW_PORT_DYNT_FLAG) != 0) { - /* send packet to the appropriate pipe */ - dummynet_io(i&0xffff,DN_TO_IP_IN,m,NULL,NULL,0, rule); + if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { + /* Send packet to the appropriate pipe */ + lck_mtx_unlock(ip_mutex); + ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); return; } -#endif +#endif /* DUMMYNET */ #if IPDIVERT if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { /* Divert or tee packet */ - divert_info = i; + div_info = i; goto ours; } #endif #if IPFIREWALL_FORWARD - if (i == 0 && ip_fw_fwd_addr != NULL) + if (i == 0 && args.next_hop != NULL) goto pass; #endif /* * if we get here, the packet must be dropped */ m_freem(m); + lck_mtx_unlock(ip_mutex); return; } pass: @@ -586,10 +747,8 @@ pass: * to be sent and the original packet to be freed). */ ip_nhops = 0; /* for source routed packets */ - if (hlen > sizeof (struct ip) && ip_dooptions(m)) { -#if IPFIREWALL_FORWARD - ip_fw_fwd_addr = NULL; -#endif + if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) { + lck_mtx_unlock(ip_mutex); return; } @@ -616,8 +775,8 @@ pass: * Cache the destination address of the packet; this may be * changed by use of 'ipfw fwd'. */ - pkt_dst = ip_fw_fwd_addr == NULL ? - ip->ip_dst : ip_fw_fwd_addr->sin_addr; + pkt_dst = args.next_hop == NULL ? + ip->ip_dst : args.next_hop->sin_addr; /* * Enable a consistency check between the destination address @@ -635,13 +794,16 @@ pass: */ checkif = ip_checkinterface && (ipforwarding == 0) && ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && - (ip_fw_fwd_addr == NULL); + (args.next_hop == NULL); + lck_mtx_lock(rt_mtx); TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { #define satosin(sa) ((struct sockaddr_in *)(sa)) - if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) + if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { + lck_mtx_unlock(rt_mtx); goto ours; + } /* * If the address matches, verify that the packet @@ -649,8 +811,10 @@ pass: * enabled. */ if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && - (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) + (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) { + lck_mtx_unlock(rt_mtx); goto ours; + } /* * Only accept broadcast packets that arrive via the * matching interface. Reception of forwarded directed @@ -661,12 +825,17 @@ pass: if ((!checkif || ia->ia_ifp == m->m_pkthdr.rcvif) && ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) { if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == - pkt_dst.s_addr) + pkt_dst.s_addr) { + lck_mtx_unlock(rt_mtx); goto ours; - if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) + } + if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) { + lck_mtx_unlock(rt_mtx); goto ours; + } } } + lck_mtx_unlock(rt_mtx); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; if (ip_mrouter) { @@ -678,9 +847,11 @@ pass: * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ - if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { + if (ip_mforward && + ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { ipstat.ips_cantforward++; m_freem(m); + lck_mtx_unlock(ip_mutex); return; } @@ -701,6 +872,7 @@ pass: if (inm == NULL) { ipstat.ips_notmember++; m_freem(m); + lck_mtx_unlock(ip_mutex); return; } goto ours; @@ -719,6 +891,7 @@ pass: if (m->m_len < sizeof(struct udpiphdr) && (m = m_pullup(m, sizeof(struct udpiphdr))) == 0) { udpstat.udps_hdrops++; + lck_mtx_unlock(ip_mutex); return; } ui = mtod(m, struct udpiphdr *); @@ -738,20 +911,20 @@ pass: goto ours; } m_freem(m); + lck_mtx_unlock(ip_mutex); return; } #endif + lck_mtx_unlock(ip_mutex); /* * Not for us; forward if possible and desirable. */ if (ipforwarding == 0) { ipstat.ips_cantforward++; m_freem(m); - } else - ip_forward(m, 0); -#if IPFIREWALL_FORWARD - ip_fw_fwd_addr = NULL; -#endif + } else { + ip_forward(m, 0, args.next_hop, &ipforward_rt); + } return; ours: @@ -775,10 +948,34 @@ ours: /* If maxnipq is 0, never accept fragments. */ if (maxnipq == 0) { - ipstat.ips_fragments++; + ipstat.ips_fragments++; ipstat.ips_fragdropped++; goto bad; + } + + /* + * If we will exceed the number of fragments in queues, timeout the + * oldest fragemented packet to make space. + */ + if (currentfrags >= maxfrags) { + fp = TAILQ_LAST(&ipq_list, ipq_list); + ipstat.ips_fragtimeout += fp->ipq_nfrags; + + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) { + /* + * If we match the fragment queue we were going to + * discard, drop this packet too. + */ + ipstat.ips_fragdropped++; + ip_freef(fp); + goto bad; } + + ip_freef(fp); + } sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); /* @@ -792,8 +989,6 @@ ours: ip->ip_p == fp->ipq_p) goto found; - fp = 0; - /* * Enforce upper bound on number of fragmented packets * for which we attempt reassembly; @@ -801,23 +996,15 @@ ours: */ if ((nipq > maxnipq) && (maxnipq > 0)) { /* - * drop something from the tail of the current queue - * before proceeding further + * drop the oldest fragment before proceeding further */ - if (ipq[sum].prev == &ipq[sum]) { /* gak */ - for (i = 0; i < IPREASS_NHASH; i++) { - if (ipq[i].prev != &ipq[i]) { - ipstat.ips_fragtimeout += - ipq[i].prev->ipq_nfrags; - ip_freef(ipq[i].prev); - break; - } - } - } else { - ipstat.ips_fragtimeout += ipq[sum].prev->ipq_nfrags; - ip_freef(ipq[sum].prev); - } + fp = TAILQ_LAST(&ipq_list, ipq_list); + ipstat.ips_fragtimeout += fp->ipq_nfrags; + ip_freef(fp); } + + fp = NULL; + found: /* * Adjust ip_len to not reflect header, @@ -827,34 +1014,34 @@ found: if (ip->ip_off & IP_MF) { /* * Make sure that fragments have a data length - * that's a non-zero multiple of 8 bytes. + * that's a non-zero multiple of 8 bytes. */ if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { ipstat.ips_toosmall++; /* XXX */ goto bad; } m->m_flags |= M_FRAG; - } else + } else { + /* Clear the flag in case packet comes from loopback */ m->m_flags &= ~M_FRAG; + } ip->ip_off <<= 3; /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf, and update - * the divert info in divert_info and args.divert_rule. + * the divert info in div_info and args.divert_rule. */ ipstat.ips_fragments++; m->m_pkthdr.header = ip; #if IPDIVERT m = ip_reass(m, - fp, &ipq[sum], &divert_info, &divert_cookie); + fp, &ipq[sum], &div_info, &args.divert_rule); #else m = ip_reass(m, fp, &ipq[sum]); #endif if (m == 0) { -#if IPFIREWALL_FORWARD - ip_fw_fwd_addr = NULL; -#endif + lck_mtx_unlock(ip_mutex); return; } ipstat.ips_reassembled++; @@ -863,7 +1050,7 @@ found: hlen = IP_VHL_HL(ip->ip_vhl) << 2; #if IPDIVERT /* Restore original checksum before diverting packet */ - if (divert_info != 0) { + if (div_info != 0) { ip->ip_len += hlen; HTONS(ip->ip_len); HTONS(ip->ip_off); @@ -881,14 +1068,14 @@ found: /* * Divert or tee packet to the divert protocol if required. * - * If divert_info is zero then cookie should be too, so we shouldn't + * If div_info is zero then cookie should be too, so we shouldn't * need to clear them here. Assume divert_packet() does so also. */ - if (divert_info != 0) { + if (div_info != 0) { struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ - if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0) + if ((div_info & IP_FW_PORT_TEE_FLAG) != 0) clone = m_dup(m, M_DONTWAIT); /* Restore packet header fields to original values */ @@ -897,13 +1084,15 @@ found: HTONS(ip->ip_off); /* Deliver packet to divert input routine */ - ip_divert_cookie = divert_cookie; - divert_packet(m, 1, divert_info & 0xffff); ipstat.ips_delivered++; + lck_mtx_unlock(ip_mutex); + divert_packet(m, 1, div_info & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ - if (clone == NULL) + if (clone == NULL) { return; + } + lck_mtx_lock(ip_mutex); m = clone; ip = mtod(m, struct ip *); } @@ -915,10 +1104,14 @@ found: * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR) != 0 && - ipsec4_in_reject(m, NULL)) { - ipsecstat.in_polvio++; - goto bad; + if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR) != 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject(m, NULL)) { + ipsecstat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + goto bad; + } + lck_mtx_unlock(sadb_mutex); } #endif @@ -927,49 +1120,45 @@ found: */ ipstat.ips_delivered++; { - KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - - (*ip_protox[ip->ip_p]->pr_input)(m, hlen); -#if IPFIREWALL_FORWARD - ip_fw_fwd_addr = NULL; /* tcp needed it */ -#endif + if (args.next_hop && ip->ip_p == IPPROTO_TCP) { + /* TCP needs IPFORWARD info if available */ + struct m_tag *fwd_tag; + struct ip_fwd_tag *ipfwd_tag; + + fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) { + goto bad; + } + + ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + ipfwd_tag->next_hop = args.next_hop; + + m_tag_prepend(m, fwd_tag); + + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + + lck_mtx_unlock(ip_mutex); + + /* TCP deals with its own locking */ + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); + } else { + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + + lck_mtx_unlock(ip_mutex); + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); + } + return; } bad: -#if IPFIREWALL_FORWARD - ip_fw_fwd_addr = NULL; -#endif KERNEL_DEBUG(DBG_LAYER_END, 0,0,0,0,0); + lck_mtx_unlock(ip_mutex); m_freem(m); } -/* - * IP software interrupt routine - to go away sometime soon - */ -void -ipintr(void) -{ - int s; - struct mbuf *m; - - KERNEL_DEBUG(DBG_FNC_IP_INPUT | DBG_FUNC_START, 0,0,0,0,0); - - while(1) { - s = splimp(); - IF_DEQUEUE(&ipintrq, m); - splx(s); - if (m == 0) { - KERNEL_DEBUG(DBG_FNC_IP_INPUT | DBG_FUNC_END, 0,0,0,0,0); - return; - } - - ip_input(m); - } -} - -NETISR_SET(NETISR_IP, ipintr); - /* * Take incoming datagram fragment and try to reassemble it into * whole datagram. If a chain for reassembly of this datagram already @@ -1037,6 +1226,7 @@ ip_reass(m, fp, where) #endif fp->ipq_div_cookie = 0; #endif + TAILQ_INSERT_HEAD(&ipq_list, fp, ipq_list); goto inserted; } else { fp->ipq_nfrags++; @@ -1100,6 +1290,7 @@ ip_reass(m, fp, where) } inserted: + currentfrags++; #if IPDIVERT /* @@ -1172,8 +1363,12 @@ inserted: for (q = nq; q != NULL; q = nq) { nq = q->m_nextpkt; q->m_nextpkt = NULL; + if (q->m_pkthdr.csum_flags & CSUM_TCP_SUM16) + m->m_pkthdr.csum_flags = 0; + else { m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; + } m_cat(m, q); } @@ -1199,6 +1394,8 @@ inserted: ip->ip_src = fp->ipq_src; ip->ip_dst = fp->ipq_dst; remque((void*)fp); + TAILQ_REMOVE(&ipq_list, fp, ipq_list); + currentfrags -= fp->ipq_nfrags; nipq--; (void) m_free(dtom(fp)); m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2); @@ -1234,14 +1431,10 @@ static void ip_freef(fp) struct ipq *fp; { - register struct mbuf *q; - - while (fp->ipq_frags) { - q = fp->ipq_frags; - fp->ipq_frags = q->m_nextpkt; - m_freem(q); - } + currentfrags -= fp->ipq_nfrags; + m_freem_list(fp->ipq_frags); remque((void*)fp); + TAILQ_REMOVE(&ipq_list, fp, ipq_list); (void) m_free(dtom(fp)); nipq--; } @@ -1255,9 +1448,8 @@ void ip_slowtimo() { register struct ipq *fp; - int s = splnet(); int i; - + lck_mtx_lock(ip_mutex); for (i = 0; i < IPREASS_NHASH; i++) { fp = ipq[i].next; if (fp == 0) @@ -1287,7 +1479,7 @@ ip_slowtimo() } } ipflow_slowtimo(); - splx(s); + lck_mtx_unlock(ip_mutex); } /* @@ -1298,12 +1490,14 @@ ip_drain() { int i; + lck_mtx_lock(ip_mutex); for (i = 0; i < IPREASS_NHASH; i++) { while (ipq[i].next != &ipq[i]) { ipstat.ips_fragdropped += ipq[i].next->ipq_nfrags; ip_freef(ipq[i].next); } } + lck_mtx_unlock(ip_mutex); in_rtqdrain(); } @@ -1311,12 +1505,16 @@ ip_drain() * Do option processing on a datagram, * possibly discarding it if bad options are encountered, * or forwarding it if source-routed. + * The pass argument is used when operating in the IPSTEALTH + * mode to tell what options to process: + * [LS]SRR (pass 0) or the others (pass 1). + * The reason for as many as two passes is that when doing IPSTEALTH, + * non-routing options should be processed only if the packet is for us. * Returns 1 if packet has been forwarded/freed, * 0 if the packet should be processed further. */ static int -ip_dooptions(m) - struct mbuf *m; +ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop, struct route *ipforward_rt) { register struct ip *ip = mtod(m, struct ip *); register u_char *cp; @@ -1387,6 +1585,10 @@ ip_dooptions(m) */ break; } + else { + ifafree(&ia->ia_ifa); + ia = NULL; + } off--; /* 0 origin */ if (off > optlen - (int)sizeof(struct in_addr)) { /* @@ -1400,15 +1602,16 @@ ip_dooptions(m) if (!ip_dosourceroute) { if (ipforwarding) { - char buf[16]; /* aaa.bbb.ccc.ddd\0 */ + char buf[MAX_IPv4_STR_LEN]; + char buf2[MAX_IPv4_STR_LEN]; /* * Acting as a router, so generate ICMP */ nosourcerouting: - strcpy(buf, inet_ntoa(ip->ip_dst)); - log(LOG_WARNING, + log(LOG_WARNING, "attempted source route from %s to %s\n", - inet_ntoa(ip->ip_src), buf); + inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)), + inet_ntop(AF_INET, &ip->ip_dst, buf2, sizeof(buf2))); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; @@ -1431,10 +1634,12 @@ nosourcerouting: if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * #define SA struct sockaddr * - if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) - ia = (INA)ifa_ifwithnet((SA)&ipaddr); - } else - ia = ip_rtaddr(ipaddr.sin_addr); + if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) { + ia = (INA)ifa_ifwithnet((SA)&ipaddr); + } + } else { + ia = ip_rtaddr(ipaddr.sin_addr, ipforward_rt); + } if (ia == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; @@ -1443,6 +1648,8 @@ nosourcerouting: ip->ip_dst = ipaddr.sin_addr; (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); + ifafree(&ia->ia_ifa); + ia = NULL; cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts @@ -1471,14 +1678,17 @@ nosourcerouting: * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ - if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && - (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { - type = ICMP_UNREACH; - code = ICMP_UNREACH_HOST; - goto bad; + if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) != 0) { + if ((ia = ip_rtaddr(ipaddr.sin_addr, ipforward_rt)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + goto bad; + } } (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); + ifafree(&ia->ia_ifa); + ia = NULL; cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; @@ -1523,6 +1733,8 @@ nosourcerouting: (void)memcpy(sin, &IA_SIN(ia)->sin_addr, sizeof(struct in_addr)); ipt->ipt_ptr += sizeof(struct in_addr); + ifafree(&ia->ia_ifa); + ia = NULL; break; case IPOPT_TS_PRESPEC: @@ -1534,8 +1746,10 @@ nosourcerouting: } (void)memcpy(&ipaddr.sin_addr, sin, sizeof(struct in_addr)); - if (ifa_ifwithaddr((SA)&ipaddr) == 0) + if ((ia = (struct in_ifaddr*)ifa_ifwithaddr((SA)&ipaddr)) == 0) continue; + ifafree(&ia->ia_ifa); + ia = NULL; ipt->ipt_ptr += sizeof(struct in_addr); break; @@ -1552,13 +1766,15 @@ nosourcerouting: } } if (forward && ipforwarding) { - ip_forward(m, 1); + ip_forward(m, 1, next_hop, ipforward_rt); return (1); } return (0); bad: ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2; /* XXX icmp_error adds in hdr length */ + lck_mtx_unlock(ip_mutex); icmp_error(m, type, code, 0, 0); + lck_mtx_lock(ip_mutex); ipstat.ips_badoptions++; return (1); } @@ -1567,29 +1783,37 @@ bad: * Given address of next destination (final or next hop), * return internet address info of interface to be used to get there. */ -static struct in_ifaddr * -ip_rtaddr(dst) - struct in_addr dst; +struct in_ifaddr * +ip_rtaddr(dst, rt) + struct in_addr dst; + struct route *rt; { register struct sockaddr_in *sin; - sin = (struct sockaddr_in *) &ipforward_rt.ro_dst; + sin = (struct sockaddr_in *)&rt->ro_dst; - if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr || - ipforward_rt.ro_rt->generation_id != route_generation) { - if (ipforward_rt.ro_rt) { - rtfree(ipforward_rt.ro_rt); - ipforward_rt.ro_rt = 0; + lck_mtx_lock(rt_mtx); + if (rt->ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr || + rt->ro_rt->generation_id != route_generation) { + if (rt->ro_rt) { + rtfree_locked(rt->ro_rt); + rt->ro_rt = 0; } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; - rtalloc_ign(&ipforward_rt, RTF_PRCLONING); + rtalloc_ign_locked(rt, RTF_PRCLONING); } - if (ipforward_rt.ro_rt == 0) + if (rt->ro_rt == 0) { + lck_mtx_unlock(rt_mtx); return ((struct in_ifaddr *)0); - return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa); + } + + if (rt->ro_rt->rt_ifa) + ifaref(rt->ro_rt->rt_ifa); + lck_mtx_unlock(rt_mtx); + return ((struct in_ifaddr *) rt->ro_rt->rt_ifa); } /* @@ -1735,9 +1959,7 @@ u_char inetctlerrmap[PRC_NCMDS] = { * via a source route. */ static void -ip_forward(m, srcrt) - struct mbuf *m; - int srcrt; +ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route *ipforward_rt) { register struct ip *ip = mtod(m, struct ip *); register struct sockaddr_in *sin; @@ -1745,21 +1967,28 @@ ip_forward(m, srcrt) int error, type = 0, code = 0; struct mbuf *mcopy; n_long dest; + struct in_addr pkt_dst; struct ifnet *destifp; #if IPSEC struct ifnet dummyifp; #endif dest = 0; + /* + * Cache the destination address of the packet; this may be + * changed by use of 'ipfw fwd'. + */ + pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; + #if DIAGNOSTIC if (ipprintfs) printf("forward: src %lx dst %lx ttl %x\n", - (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr, + (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr, ip->ip_ttl); #endif - if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { + if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) { ipstat.ips_cantforward++; m_freem(m); return; @@ -1776,24 +2005,24 @@ ip_forward(m, srcrt) } #endif - sin = (struct sockaddr_in *)&ipforward_rt.ro_dst; - if ((rt = ipforward_rt.ro_rt) == 0 || - ip->ip_dst.s_addr != sin->sin_addr.s_addr || - ipforward_rt.ro_rt->generation_id != route_generation) { - if (ipforward_rt.ro_rt) { - rtfree(ipforward_rt.ro_rt); - ipforward_rt.ro_rt = 0; + sin = (struct sockaddr_in *)&ipforward_rt->ro_dst; + if ((rt = ipforward_rt->ro_rt) == 0 || + pkt_dst.s_addr != sin->sin_addr.s_addr || + ipforward_rt->ro_rt->generation_id != route_generation) { + if (ipforward_rt->ro_rt) { + rtfree(ipforward_rt->ro_rt); + ipforward_rt->ro_rt = 0; } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); - sin->sin_addr = ip->ip_dst; + sin->sin_addr = pkt_dst; - rtalloc_ign(&ipforward_rt, RTF_PRCLONING); - if (ipforward_rt.ro_rt == 0) { + rtalloc_ign(ipforward_rt, RTF_PRCLONING); + if (ipforward_rt->ro_rt == 0) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); return; } - rt = ipforward_rt.ro_rt; + rt = ipforward_rt->ro_rt; } /* @@ -1842,7 +2071,7 @@ ip_forward(m, srcrt) if (rt->rt_flags & RTF_GATEWAY) dest = satosin(rt->rt_gateway)->sin_addr.s_addr; else - dest = ip->ip_dst.s_addr; + dest = pkt_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; @@ -1853,8 +2082,28 @@ ip_forward(m, srcrt) } } - error = ip_output(m, (struct mbuf *)0, &ipforward_rt, + { + if (next_hop) { + /* Pass IPFORWARD info if available */ + struct m_tag *tag; + struct ip_fwd_tag *ipfwd_tag; + + tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (tag == NULL) { + error = ENOBUFS; + m_freem(m); + return; + } + + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); + ipfwd_tag->next_hop = next_hop; + + m_tag_prepend(m, tag); + } + error = ip_output_list(m, 0, (struct mbuf *)0, ipforward_rt, IP_FORWARDING, 0); + } if (error) ipstat.ips_cantforward++; else { @@ -1863,7 +2112,7 @@ ip_forward(m, srcrt) ipstat.ips_redirectsent++; else { if (mcopy) { - ipflow_create(&ipforward_rt, mcopy); + ipflow_create(ipforward_rt, mcopy); m_freem(mcopy); } return; @@ -1892,8 +2141,8 @@ ip_forward(m, srcrt) type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; #ifndef IPSEC - if (ipforward_rt.ro_rt) - destifp = ipforward_rt.ro_rt->rt_ifp; + if (ipforward_rt->ro_rt) + destifp = ipforward_rt->ro_rt->rt_ifp; #else /* * If the packet is routed over IPsec tunnel, tell the @@ -1901,25 +2150,25 @@ ip_forward(m, srcrt) * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ - if (ipforward_rt.ro_rt) { + if (ipforward_rt->ro_rt) { struct secpolicy *sp = NULL; int ipsecerror; int ipsechdr; struct route *ro; if (ipsec_bypass) { - destifp = ipforward_rt.ro_rt->rt_ifp; + destifp = ipforward_rt->ro_rt->rt_ifp; ipstat.ips_cantfrag++; break; } - + lck_mtx_lock(sadb_mutex); sp = ipsec4_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); if (sp == NULL) - destifp = ipforward_rt.ro_rt->rt_ifp; + destifp = ipforward_rt->ro_rt->rt_ifp; else { /* count IPsec header size */ ipsechdr = ipsec4_hdrsiz(mcopy, @@ -1950,6 +2199,7 @@ ip_forward(m, srcrt) key_freesp(sp); } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ ipstat.ips_cantfrag++; @@ -1968,11 +2218,11 @@ ip_forward(m, srcrt) } void -ip_savecontrol(inp, mp, ip, m) - register struct inpcb *inp; - register struct mbuf **mp; - register struct ip *ip; - register struct mbuf *m; +ip_savecontrol( + register struct inpcb *inp, + register struct mbuf **mp, + register struct ip *ip, + register struct mbuf *m) { if (inp->inp_socket->so_options & SO_TIMESTAMP) { struct timeval tv; @@ -2018,6 +2268,7 @@ ip_savecontrol(inp, mp, ip, m) struct sockaddr_dl *sdp; struct sockaddr_dl *sdl2 = &sdlbuf.sdl; + ifnet_head_lock_shared(); if (((ifp = m->m_pkthdr.rcvif)) && ( ifp->if_index && (ifp->if_index <= if_index))) { sdp = (struct sockaddr_dl *)(ifnet_addrs @@ -2038,6 +2289,7 @@ makedummy: sdl2->sdl_index = 0; sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; } + ifnet_head_done(); *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, IP_RECVIF, IPPROTO_IP); if (*mp) diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c index b522d5f95..584058aa0 100644 --- a/bsd/netinet/ip_mroute.c +++ b/bsd/netinet/ip_mroute.c @@ -71,13 +71,13 @@ #endif #ifndef MROUTING -extern u_long _ip_mcast_src __P((int vifi)); -extern int _ip_mforward __P((struct ip *ip, struct ifnet *ifp, - struct mbuf *m, struct ip_moptions *imo)); -extern int _ip_mrouter_done __P((void)); -extern int _ip_mrouter_get __P((struct socket *so, struct sockopt *sopt)); -extern int _ip_mrouter_set __P((struct socket *so, struct sockopt *sopt)); -extern int _mrt_ioctl __P((int req, caddr_t data, struct proc *p)); +extern u_long _ip_mcast_src(int vifi); +extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, + struct mbuf *m, struct ip_moptions *imo); +extern int _ip_mrouter_done(void); +extern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); +extern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); +extern int _mrt_ioctl(int req, caddr_t data, struct proc *p); /* * Dummy routines and globals used when multicast routing is not compiled in. @@ -215,7 +215,7 @@ ip_rsvp_force_done(so) struct socket *ip_mrouter = NULL; static struct mrtstat mrtstat; #else /* MROUTE_LKM */ -extern void X_ipip_input __P((struct mbuf *m, int iphlen)); +extern void X_ipip_input(struct mbuf *m, int iphlen); extern struct mrtstat mrtstat; static int ip_mrtproto; #endif @@ -286,13 +286,13 @@ static int have_encap_tunnel = 0; static u_long last_encap_src; static struct vif *last_encap_vif; -static u_long X_ip_mcast_src __P((int vifi)); -static int X_ip_mforward __P((struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo)); -static int X_ip_mrouter_done __P((void)); -static int X_ip_mrouter_get __P((struct socket *so, struct sockopt *m)); -static int X_ip_mrouter_set __P((struct socket *so, struct sockopt *m)); -static int X_legal_vif_num __P((int vif)); -static int X_mrt_ioctl __P((int cmd, caddr_t data)); +static u_long X_ip_mcast_src(int vifi); +static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); +static int X_ip_mrouter_done(void); +static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); +static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); +static int X_legal_vif_num(int vif); +static int X_mrt_ioctl(int cmd, caddr_t data); static int get_sg_cnt(struct sioc_sg_req *); static int get_vif_cnt(struct sioc_vif_req *); @@ -713,6 +713,8 @@ add_vif(vifcp) ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa == 0) return EADDRNOTAVAIL; ifp = ifa->ifa_ifp; + ifafree(ifa); + ifa = NULL; if (vifcp->vifc_flags & VIFF_TUNNEL) { if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { @@ -1076,14 +1078,17 @@ socket_send(s, mm, src) struct mbuf *mm; struct sockaddr_in *src; { + socket_lock(s, 1); if (s) { if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, - mm, (struct mbuf *)0) != 0) { + mm, (struct mbuf *)0, NULL) != 0) { sorwakeup(s); + socket_unlock(s, 1); return 0; } } + socket_unlock(s, 1); m_freem(mm); return -1; } @@ -1336,10 +1341,7 @@ expire_upcalls(void *unused) struct mfc *mfc, **nptr; int i; int s; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); s = splnet(); for (i = 0; i < MFCTBLSIZ; i++) { @@ -1382,7 +1384,6 @@ expire_upcalls(void *unused) } splx(s); timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); - (void) thread_funnel_set(network_flock, FALSE); } /* @@ -1646,8 +1647,6 @@ ipip_input(m, iphlen) struct ifnet *ifp = m->m_pkthdr.rcvif; register struct ip *ip = mtod(m, struct ip *); register int hlen = ip->ip_hl << 2; - register int s; - register struct ifqueue *ifq; register struct vif *vifp; if (!have_encap_tunnel) { @@ -1698,23 +1697,8 @@ ipip_input(m, iphlen) m->m_len -= IP_HDR_LEN; m->m_pkthdr.len -= IP_HDR_LEN; m->m_pkthdr.rcvif = ifp; - - ifq = &ipintrq; - s = splimp(); - if (IF_QFULL(ifq)) { - IF_DROP(ifq); - m_freem(m); - } else { - IF_ENQUEUE(ifq, m); - /* - * normally we would need a "schednetisr(NETISR_IP)" - * here but we were called by ip_input and it is going - * to loop back & try to dequeue the packet we just - * queued as soon as we return so we avoid the - * unnecessary software interrrupt. - */ - } - splx(s); + + proto_inject(PF_INET, m); } /* @@ -1852,11 +1836,8 @@ tbf_reprocess_q(xvifp) void *xvifp; { register struct vif *vifp = xvifp; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); if (ip_mrouter == NULL) { - (void) thread_funnel_set(network_flock, FALSE); return; } @@ -1866,7 +1847,6 @@ tbf_reprocess_q(xvifp) if (vifp->v_tbf->tbf_q_len) timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); - (void) thread_funnel_set(network_flock, FALSE); } /* function that will selectively discard a member of the queue diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h index a667f4ea4..c9e42d75d 100644 --- a/bsd/netinet/ip_mroute.h +++ b/bsd/netinet/ip_mroute.h @@ -87,7 +87,9 @@ #define MRT_ASSERT 107 /* enable PIM assert processing */ +#ifdef KERNEL_PRIVATE #define GET_TIME(t) microtime(&t) +#endif KERNEL_PRIVATE /* * Types and macros for handling bitmaps with one bit per virtual interface. @@ -172,11 +174,14 @@ struct sioc_vif_req { u_long ibytes; /* Input byte count on vif */ u_long obytes; /* Output byte count on vif */ }; - +#ifdef PRIVATE /* * The kernel's virtual-interface structure. */ +struct tbf; +struct ifnet; +struct socket; struct vif { u_char v_flags; /* VIFF_ flags defined above */ u_char v_threshold; /* min ttl required to forward on vif*/ @@ -193,6 +198,7 @@ struct vif { u_int v_rsvp_on; /* RSVP listening on this vif */ struct socket *v_rsvpd; /* RSVP daemon socket */ }; +#endif /* * The kernel's multicast forwarding cache entry structure @@ -228,7 +234,9 @@ struct igmpmsg { u_char unused3; struct in_addr im_src, im_dst; }; +#define MFCTBLSIZ 256 +#ifdef KERNEL_PRIVATE /* * Argument structure used for pkt info. while upcall is made */ @@ -242,7 +250,6 @@ struct rtdetq { struct rtdetq *next; /* Next in list of packets */ }; -#define MFCTBLSIZ 256 #if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ #define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1)) #else @@ -270,21 +277,17 @@ struct tbf struct mbuf *tbf_t; /* tail-insertion pointer */ }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE struct sockopt; -extern int (*ip_mrouter_set) __P((struct socket *, struct sockopt *)); -extern int (*ip_mrouter_get) __P((struct socket *, struct sockopt *)); -extern int (*ip_mrouter_done) __P((void)); +extern int (*ip_mrouter_set)(struct socket *, struct sockopt *); +extern int (*ip_mrouter_get)(struct socket *, struct sockopt *); +extern int (*ip_mrouter_done)(void); #if MROUTING -extern int (*mrt_ioctl) __P((int, caddr_t)); +extern int (*mrt_ioctl)(int, caddr_t); #else -extern int (*mrt_ioctl) __P((int, caddr_t, struct proc *)); +extern int (*mrt_ioctl)(int, caddr_t, struct proc *); #endif -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ - -#endif /* _NETINET_IP_MROUTE_H_ */ +#endif KERNEL_PRIVATE +#endif _NETINET_IP_MROUTE_H_ diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index edee063bc..9fd7a09a1 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,6 +65,8 @@ #include #include #include +#include +#include #include #include @@ -76,6 +78,8 @@ #include #include +#include + #include "faith.h" #include @@ -87,16 +91,6 @@ #define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) -#if vax -#include -#endif - -#if __FreeBSD__ -#include - -static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); -#endif - #if IPSEC #include #include @@ -108,6 +102,7 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #endif /*IPSEC*/ #include +#include #if DUMMYNET #include @@ -120,42 +115,45 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); (ntohl(a.s_addr))&0xFF); #endif +#if IPSEC +extern lck_mtx_t *sadb_mutex; +#endif + u_short ip_id; -static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); -static struct ifnet *ip_multicast_if __P((struct in_addr *, int *)); -static void ip_mloopback - __P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int)); -static int ip_getmoptions - __P((struct sockopt *, struct ip_moptions *)); -static int ip_pcbopts __P((int, struct mbuf **, struct mbuf *)); -static int ip_setmoptions - __P((struct sockopt *, struct ip_moptions **)); +static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); +static struct ifnet *ip_multicast_if(struct in_addr *, int *); +static void ip_mloopback(struct ifnet *, struct mbuf *, + struct sockaddr_in *, int); +static int ip_getmoptions(struct sockopt *, struct ip_moptions *); +static int ip_pcbopts(int, struct mbuf **, struct mbuf *); +static int ip_setmoptions(struct sockopt *, struct ip_moptions **); int ip_createmoptions(struct ip_moptions **imop); int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq); int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq); -int ip_optcopy __P((struct ip *, struct ip *)); -extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **)); +int ip_optcopy(struct ip *, struct ip *); +extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **); #ifdef __APPLE__ extern struct mbuf* m_dup(register struct mbuf *m, int how); #endif -static u_long lo_dl_tag = 0; - -void in_delayed_cksum(struct mbuf *m); extern int apple_hwcksum_tx; extern u_long route_generation; extern struct protosw inetsw[]; extern struct ip_linklocal_stat ip_linklocal_stat; +extern lck_mtx_t *ip_mutex; /* temporary: for testing */ #if IPSEC extern int ipsec_bypass; #endif +static int ip_maxchainsent = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW, + &ip_maxchainsent, 0, "use dlil_output_list"); /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -163,12 +161,26 @@ extern int ipsec_bypass; * The mbuf opt, if present, will not be freed. */ int -ip_output(m0, opt, ro, flags, imo) - struct mbuf *m0; - struct mbuf *opt; - struct route *ro; - int flags; - struct ip_moptions *imo; +ip_output( + struct mbuf *m0, + struct mbuf *opt, + struct route *ro, + int flags, + struct ip_moptions *imo) +{ + int error; + error = ip_output_list(m0, 0, opt, ro, flags, imo); + return error; +} + +int +ip_output_list( + struct mbuf *m0, + int packetchain, + struct mbuf *opt, + struct route *ro, + int flags, + struct ip_moptions *imo) { struct ip *ip, *mhip; struct ifnet *ifp = NULL; @@ -178,80 +190,116 @@ ip_output(m0, opt, ro, flags, imo) struct sockaddr_in *dst = NULL; struct in_ifaddr *ia = NULL; int isbroadcast, sw_csum; + struct in_addr pkt_dst; #if IPSEC struct route iproute; struct socket *so = NULL; struct secpolicy *sp = NULL; #endif - u_int16_t divert_cookie; /* firewall cookie */ #if IPFIREWALL_FORWARD int fwd_rewrite_src = 0; #endif - struct ip_fw_chain *rule = NULL; - -#if IPDIVERT - /* Get and reset firewall cookie */ - divert_cookie = ip_divert_cookie; - ip_divert_cookie = 0; -#else - divert_cookie = 0; -#endif + struct ip_fw_args args; + int didfilter = 0; + ipfilter_t inject_filter_ref = 0; + struct m_tag *tag; + struct route dn_route; + struct mbuf * packetlist; + int pktcnt = 0; + + lck_mtx_lock(ip_mutex); KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); -#if IPFIREWALL && DUMMYNET - /* - * dummynet packet are prepended a vestigial mbuf with - * m_type = MT_DUMMYNET and m_data pointing to the matching - * rule. - */ - if (m->m_type == MT_DUMMYNET) { - /* - * the packet was already tagged, so part of the - * processing was already done, and we need to go down. - * Get parameters from the header. - */ - rule = (struct ip_fw_chain *)(m->m_data) ; - opt = NULL ; - ro = & ( ((struct dn_pkt *)m)->ro ) ; - imo = NULL ; - dst = ((struct dn_pkt *)m)->dn_dst ; - ifp = ((struct dn_pkt *)m)->ifp ; - flags = ((struct dn_pkt *)m)->flags; - m0 = m = m->m_next ; -#if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { - so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); - } + packetlist = m0; + args.eh = NULL; + args.rule = NULL; + args.next_hop = NULL; + args.divert_rule = 0; /* divert cookie */ + + /* Grab info from mtags prepended to the chain */ +#if DUMMYNET + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { + struct dn_pkt_tag *dn_tag; + + dn_tag = (struct dn_pkt_tag *)(tag+1); + args.rule = dn_tag->rule; + opt = NULL; + dn_route = dn_tag->ro; + ro = &dn_route; + + imo = NULL; + dst = dn_tag->dn_dst; + ifp = dn_tag->ifp; + flags = dn_tag->flags; + + m_tag_delete(m0, tag); + } +#endif /* DUMMYNET */ + + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { + struct divert_tag *div_tag; + + div_tag = (struct divert_tag *)(tag+1); + args.divert_rule = div_tag->cookie; + + m_tag_delete(m0, tag); + } + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { + struct ip_fwd_tag *ipfwd_tag; + + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); + args.next_hop = ipfwd_tag->next_hop; + + m_tag_delete(m0, tag); + } + + m = m0; + +#if DIAGNOSTIC + if ( !m || (m->m_flags & M_PKTHDR) != 0) + panic("ip_output no HDR"); + if (!ro) + panic("ip_output no route, proto = %d", + mtod(m, struct ip *)->ip_p); #endif + + if (args.rule != NULL) { /* dummynet already saw us */ ip = mtod(m, struct ip *); hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; if (ro->ro_rt != NULL) ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; - goto sendit; - } else - rule = NULL ; + if (ia) + ifaref(&ia->ia_ifa); +#if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); + } #endif + goto sendit; + } + #if IPSEC if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { so = ipsec_getsocket(m); (void)ipsec_setsocket(m, NULL); } #endif +loopit: + /* + * No need to proccess packet twice if we've + * already seen it + */ + inject_filter_ref = ipf_get_inject_filter(m); -#if DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("ip_output no HDR"); - if (!ro) - panic("ip_output no route, proto = %d", - mtod(m, struct ip *)->ip_p); -#endif if (opt) { m = ip_insertoptions(m, opt, &len); hlen = len; } ip = mtod(m, struct ip *); + pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; + /* * Fill in IP header. */ @@ -281,15 +329,17 @@ ip_output(m0, opt, ro, flags, imo) * cache with IPv6. */ - if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) && - ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) && - (ifa_foraddr(ip->ip_src.s_addr) == NULL)) { - error = EADDRNOTAVAIL; - goto bad; + { + if (ro->ro_rt && (ro->ro_rt->generation_id != route_generation) && + ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && (ip->ip_src.s_addr != INADDR_ANY) && + (ifa_foraddr(ip->ip_src.s_addr) == 0)) { + error = EADDRNOTAVAIL; + goto bad; + } } if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin_family != AF_INET || - dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + dst->sin_addr.s_addr != pkt_dst.s_addr)) { rtfree(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } @@ -297,7 +347,7 @@ ip_output(m0, opt, ro, flags, imo) bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; + dst->sin_addr = pkt_dst; } /* * If routing to interface only, @@ -306,11 +356,14 @@ ip_output(m0, opt, ro, flags, imo) #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) #define sintosa(sin) ((struct sockaddr *)(sin)) if (flags & IP_ROUTETOIF) { - if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && - (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + if (ia) + ifafree(&ia->ia_ifa); + if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) { + if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } } ifp = ia->ia_ifp; ip->ip_ttl = 1; @@ -332,7 +385,11 @@ ip_output(m0, opt, ro, flags, imo) error = EHOSTUNREACH; goto bad; } + if (ia) + ifafree(&ia->ia_ifa); ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) @@ -342,7 +399,7 @@ ip_output(m0, opt, ro, flags, imo) else isbroadcast = in_broadcast(dst->sin_addr, ifp); } - if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { struct in_multi *inm; m->m_flags |= M_MCAST; @@ -395,7 +452,9 @@ ip_output(m0, opt, ro, flags, imo) } } - IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); + ifnet_lock_shared(ifp); + IN_LOOKUP_MULTI(pkt_dst, ifp, inm); + ifnet_lock_done(ifp); if (inm != NULL && (imo == NULL || imo->imo_multicast_loop)) { /* @@ -403,6 +462,42 @@ ip_output(m0, opt, ro, flags, imo) * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ + if (!TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == 0); + struct ipf_pktopts *ippo = 0, ipf_pktopts; + + if (imo) { + ippo = &ipf_pktopts; + ipf_pktopts.ippo_mcast_ifnet = imo->imo_multicast_ifp; + ipf_pktopts.ippo_mcast_ttl = imo->imo_multicast_ttl; + ipf_pktopts.ippo_mcast_loop = imo->imo_multicast_loop; + } + + lck_mtx_unlock(ip_mutex); + ipf_ref(); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + lck_mtx_lock(ip_mutex); + goto bad; + } + } + } + lck_mtx_lock(ip_mutex); + ipf_unref(); + didfilter = 1; + } ip_mloopback(ifp, m, dst, hlen); } else { @@ -429,6 +524,7 @@ ip_output(m0, opt, ro, flags, imo) imo = NULL; if (ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); + lck_mtx_unlock(ip_mutex); goto done; } } @@ -444,6 +540,7 @@ ip_output(m0, opt, ro, flags, imo) */ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { m_freem(m); + lck_mtx_unlock(ip_mutex); goto done; } @@ -466,15 +563,6 @@ ip_output(m0, opt, ro, flags, imo) #endif /* IPFIREWALL_FORWARD */ } #endif /* notdef */ - /* - * Verify that we have any chance at all of being able to queue - * the packet or packet fragments - */ - if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= - ifp->if_snd.ifq_maxlen) { - error = ENOBUFS; - goto bad; - } /* * Look for broadcast address and @@ -512,6 +600,35 @@ sendit: } } +injectit: + if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == 0); + + lck_mtx_unlock(ip_mutex); + ipf_ref(); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + lck_mtx_lock(ip_mutex); + goto bad; + } + } + } + ipf_unref(); + lck_mtx_lock(ip_mutex); + } + #if IPSEC /* temporary for testing only: bypass ipsec alltogether */ @@ -520,6 +637,8 @@ sendit: KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); + lck_mtx_lock(sadb_mutex); + /* get SP for this packet */ if (so == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); @@ -529,6 +648,7 @@ sendit: if (sp == NULL) { ipsecstat.out_inval++; KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); + lck_mtx_unlock(sadb_mutex); goto bad; } @@ -542,12 +662,14 @@ sendit: */ ipsecstat.out_polvio++; KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0); + lck_mtx_unlock(sadb_mutex); goto bad; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0); + lck_mtx_unlock(sadb_mutex); goto skip_ipsec; case IPSEC_POLICY_IPSEC: @@ -555,6 +677,7 @@ sendit: /* acquire a policy */ error = key_spdacquire(sp); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0); + lck_mtx_unlock(sadb_mutex); goto bad; } break; @@ -588,8 +711,11 @@ sendit: HTONS(ip->ip_len); HTONS(ip->ip_off); + lck_mtx_unlock(ip_mutex); error = ipsec4_output(&state, sp, flags); - + lck_mtx_unlock(sadb_mutex); + lck_mtx_lock(ip_mutex); + m0 = m = state.m; if (flags & IP_ROUTETOIF) { @@ -639,7 +765,7 @@ sendit: /* Check that there wasn't a route change and src is still valid */ if (ro->ro_rt->generation_id != route_generation) { - if (ifa_foraddr(ip->ip_src.s_addr) == NULL && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { + if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { error = EADDRNOTAVAIL; KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0); goto bad; @@ -657,7 +783,11 @@ sendit: goto bad; } } else { + if (ia) + ifafree(&ia->ia_ifa); ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; } @@ -665,6 +795,31 @@ sendit: NTOHS(ip->ip_len); NTOHS(ip->ip_off); KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); + + /* Pass to filters again */ + if (!TAILQ_EMPTY(&ipv4_filters)) { + struct ipfilter *filter; + + lck_mtx_unlock(ip_mutex); + ipf_ref(); + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { + if (filter->ipf_filter.ipf_output) { + errno_t result; + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, 0); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + lck_mtx_lock(ip_mutex); + goto bad; + } + } + } + ipf_unref(); + lck_mtx_lock(ip_mutex); + } skip_ipsec: #endif /*IPSEC*/ @@ -678,19 +833,27 @@ skip_ipsec: if (fr_checkp) { struct mbuf *m1 = m; - if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) + if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1) { + lck_mtx_unlock(ip_mutex); goto done; + } ip = mtod(m0 = m = m1, struct ip *); } /* * Check with the firewall... + * but not if we are already being fwd'd from a firewall. */ - if (fw_enable && ip_fw_chk_ptr) { + if (fw_enable && IPFW_LOADED && !args.next_hop) { struct sockaddr_in *old = dst; - off = (*ip_fw_chk_ptr)(&ip, - hlen, ifp, &divert_cookie, &m, &rule, &dst); + args.m = m; + args.next_hop = dst; + args.oif = ifp; + off = ip_fw_chk_ptr(&args); + m = args.m; + dst = args.next_hop; + /* * On return we must do the following: * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) @@ -710,13 +873,14 @@ skip_ipsec: if (m) m_freem(m); error = EACCES ; + lck_mtx_unlock(ip_mutex); goto done ; } ip = mtod(m, struct ip *); if (off == 0 && dst == old) /* common case */ goto pass ; #if DUMMYNET - if ((off & IP_FW_PORT_DYNT_FLAG) != 0) { + if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { /* * pass the pkt to dummynet. Need to include * pipe number, m, ifp, ro, dst because these are @@ -726,11 +890,16 @@ skip_ipsec: * XXX note: if the ifp or ro entry are deleted * while a pkt is in dummynet, we are in trouble! */ - error = dummynet_io(off & 0xffff, DN_TO_IP_OUT, m, - ifp,ro,dst,rule, flags); + args.ro = ro; + args.dst = dst; + args.flags = flags; + + lck_mtx_unlock(ip_mutex); + error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, + &args); goto done; } -#endif +#endif /* DUMMYNET */ #if IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { struct mbuf *clone = NULL; @@ -753,8 +922,7 @@ skip_ipsec: HTONS(ip->ip_off); /* Deliver packet to divert input routine */ - ip_divert_cookie = divert_cookie; - divert_packet(m, 0, off & 0xffff); + divert_packet(m, 0, off & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ if (clone != NULL) { @@ -762,6 +930,7 @@ skip_ipsec: ip = mtod(m, struct ip *); goto pass; } + lck_mtx_unlock(ip_mutex); goto done; } #endif @@ -777,7 +946,7 @@ skip_ipsec: * And I'm babbling. */ if (off == 0 && old != dst) { - struct in_ifaddr *ia; + struct in_ifaddr *ia_fw; /* It's changed... */ /* There must be a better way to do this next line... */ @@ -800,31 +969,65 @@ skip_ipsec: * as the packet runs through ip_input() as * it is done through a ISR. */ - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) { /* * If the addr to forward to is one * of ours, we pretend to * be the destination for this packet. */ - if (IA_SIN(ia)->sin_addr.s_addr == + if (IA_SIN(ia_fw)->sin_addr.s_addr == dst->sin_addr.s_addr) break; } if (ia) { /* tell ip_input "dont filter" */ - ip_fw_fwd_addr = dst; + struct m_tag *fwd_tag; + struct ip_fwd_tag *ipfwd_tag; + + fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) { + error = ENOBUFS; + goto bad; + } + + ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + ipfwd_tag->next_hop = args.next_hop; + + m_tag_prepend(m, fwd_tag); + if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & + m->m_pkthdr.csum_flags) == 0) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; + CSUM_IP_CHECKED | CSUM_IP_VALID; + } + else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + ip->ip_sum = in_cksum(m, hlen); } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; HTONS(ip->ip_len); HTONS(ip->ip_off); - ip_input(m); + + lck_mtx_unlock(ip_mutex); + + /* we need to call dlil_output to run filters + * and resync to avoid recursion loops. + */ + if (lo_ifp) { + dlil_output(lo_ifp, PF_INET, m, 0, (struct sockaddr *)dst, 0); + } + else { + printf("ip_output: no loopback ifp for forwarding!!!\n"); + } goto done; } /* Some of the logic for this was @@ -844,7 +1047,7 @@ skip_ipsec: goto bad; } - ia = ifatoia(ro_fwd->ro_rt->rt_ifa); + ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa); ifp = ro_fwd->ro_rt->rt_ifp; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) @@ -864,7 +1067,7 @@ skip_ipsec: * interface, do it again, from the new one. */ if (fwd_rewrite_src) - ip->ip_src = IA_SIN(ia)->sin_addr; + ip->ip_src = IA_SIN(ia_fw)->sin_addr; goto pass ; } #endif /* IPFIREWALL_FORWARD */ @@ -873,8 +1076,9 @@ skip_ipsec: * we have to drop the pkt */ m_freem(m); - error = EACCES; /* not sure this is the right error msg */ - goto done; + error = EACCES; /* not sure this is the right error msg */ + lck_mtx_unlock(ip_mutex); + goto done; } pass: @@ -885,13 +1089,15 @@ pass: (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { ipstat.ips_badaddr++; m_freem(m); - /* - * Simply drop the packet just like a firewall -- we do not want the - * the application to feel the pain, not yet... - * Returning ENETUNREACH like ip6_output does in some similar cases - * could startle the otherwise clueless process that specifies + /* + * Do not simply drop the packet just like a firewall -- we want the + * the application to feel the pain. + * Return ENETUNREACH like ip6_output does in some similar cases. + * This can startle the otherwise clueless process that specifies * loopback as the source address. */ + error = ENETUNREACH; + lck_mtx_unlock(ip_mutex); goto done; } #endif @@ -955,14 +1161,29 @@ pass: if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) ipsec_delaux(m); #endif -#if __APPLE__ - error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, + if (packetchain == 0) { + lck_mtx_unlock(ip_mutex); + error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt, (struct sockaddr *)dst, 0); -#else - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); -#endif - goto done; + goto done; + } + else { /* packet chaining allows us to reuse the route for all packets */ + m = m->m_nextpkt; + if (m == NULL) { + if (pktcnt > ip_maxchainsent) + ip_maxchainsent = pktcnt; + //send + lck_mtx_unlock(ip_mutex); + error = dlil_output_list(ifp, PF_INET, packetlist, (void *) ro->ro_rt, + (struct sockaddr *)dst, 0); + pktcnt = 0; + goto done; + + } + m0 = m; + pktcnt++; + goto loopit; + } } /* * Too large for interface; fragment if possible. @@ -998,8 +1219,10 @@ pass: if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { in_delayed_cksum(m); - if (m == NULL) + if (m == NULL) { + lck_mtx_unlock(ip_mutex); return(ENOMEM); + } m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } @@ -1047,8 +1270,9 @@ pass: goto sendorfree; } m->m_pkthdr.len = mhlen + len; - m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.rcvif = 0; m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; + m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; HTONS(mhip->ip_off); mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { @@ -1084,6 +1308,7 @@ sendorfree: KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + lck_mtx_unlock(ip_mutex); for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; @@ -1100,14 +1325,10 @@ sendorfree: ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #endif - -#if __APPLE__ - error = dlil_output(ifptodlt(ifp, PF_INET), m, (void *) ro->ro_rt, + if ((packetchain != 0) && (pktcnt > 0)) + panic("ip_output: mix of packet in packetlist is wrong=%x", packetlist); + error = dlil_output(ifp, PF_INET, m, (void *) ro->ro_rt, (struct sockaddr *)dst, 0); -#else - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); -#endif } else m_freem(m); } @@ -1116,6 +1337,10 @@ sendorfree: ipstat.ips_fragmented++; } done: + if (ia) { + ifafree(&ia->ia_ifa); + ia = NULL; + } #if IPSEC if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { if (ro == &iproute && ro->ro_rt) { @@ -1125,7 +1350,9 @@ done: if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ip_output call free SP:%x\n", sp)); + lck_mtx_lock(sadb_mutex); key_freesp(sp); + lck_mtx_unlock(sadb_mutex); } } #endif /* IPSEC */ @@ -1134,36 +1361,128 @@ done: return (error); bad: m_freem(m0); + lck_mtx_unlock(ip_mutex); goto done; } void -in_delayed_cksum(struct mbuf *m) +in_delayed_cksum_offset(struct mbuf *m, int ip_offset) { struct ip *ip; u_short csum, offset; - ip = mtod(m, struct ip *); + + while (ip_offset > m->m_len) { + ip_offset -= m->m_len; + m = m->m_next; + if (m) { + printf("in_delayed_cksum_withoffset failed - ip_offset wasn't in the packet\n"); + return; + } + } + + if (ip_offset + sizeof(struct ip) > m->m_len) { + printf("delayed m_pullup, m->len: %d off: %d p: %d\n", + m->m_len, ip_offset, ip->ip_p); + /* + * XXX + * this shouldn't happen + */ + m = m_pullup(m, ip_offset + sizeof(struct ip)); + } + + /* Gross */ + if (ip_offset) { + m->m_len -= ip_offset; + m->m_data += ip_offset; + } + + ip = mtod(m, struct ip*); offset = IP_VHL_HL(ip->ip_vhl) << 2 ; csum = in_cksum_skip(m, ip->ip_len, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ + + /* Gross */ + if (ip_offset) { + if (M_LEADINGSPACE(m) < ip_offset) + panic("in_delayed_cksum_withoffset - chain modified!\n"); + m->m_len += ip_offset; + m->m_data -= ip_offset; + } if (offset > ip->ip_len) /* bogus offset */ return; - if (offset + sizeof(u_short) > m->m_len) { + if (offset + ip_offset + sizeof(u_short) > m->m_len) { printf("delayed m_pullup, m->len: %d off: %d p: %d\n", - m->m_len, offset, ip->ip_p); + m->m_len, offset + ip_offset, ip->ip_p); /* * XXX * this shouldn't happen, but if it does, the * correct behavior may be to insert the checksum * in the existing chain instead of rearranging it. */ - m = m_pullup(m, offset + sizeof(u_short)); + m = m_pullup(m, offset + ip_offset + sizeof(u_short)); + } + *(u_short *)(m->m_data + offset + ip_offset) = csum; +} + +void +in_delayed_cksum(struct mbuf *m) +{ + in_delayed_cksum_offset(m, 0); +} + +void +in_cksum_offset(struct mbuf* m, size_t ip_offset) +{ + struct ip* ip = NULL; + int hlen = 0; + + while (ip_offset > m->m_len) { + ip_offset -= m->m_len; + m = m->m_next; + if (m) { + printf("in_cksum_offset failed - ip_offset wasn't in the packet\n"); + return; + } + } + + if (ip_offset + sizeof(struct ip) > m->m_len) { + printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %d\n", + m->m_len, ip_offset); + /* + * XXX + * this shouldn't happen + */ + m = m_pullup(m, ip_offset + sizeof(struct ip)); + } + + /* Gross */ + if (ip_offset) { + m->m_len -= ip_offset; + m->m_data += ip_offset; + } + + ip = mtod(m, struct ip*); + +#ifdef _IP_VHL + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); + + /* Gross */ + if (ip_offset) { + if (M_LEADINGSPACE(m) < ip_offset) + panic("in_cksum_offset - chain modified!\n"); + m->m_len += ip_offset; + m->m_data -= ip_offset; } - *(u_short *)(m->m_data + offset) = csum; } /* @@ -1193,7 +1512,7 @@ ip_insertoptions(m, opt, phlen) MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n == 0) return (m); - n->m_pkthdr.rcvif = (struct ifnet *)0; + n->m_pkthdr.rcvif = 0; n->m_pkthdr.len = m->m_pkthdr.len + optlen; m->m_len -= sizeof(struct ip); m->m_data += sizeof(struct ip); @@ -1410,24 +1729,24 @@ ip_ctloutput(so, sopt) struct mbuf *m; int optname; - if (sopt->sopt_valsize > MCLBYTES) { - error = EMSGSIZE; - break; - } - + if (sopt->sopt_valsize > MCLBYTES) { + error = EMSGSIZE; + break; + } if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; priv = (sopt->sopt_p != NULL && - suser(sopt->sopt_p->p_ucred, - &sopt->sopt_p->p_acflag) != 0) ? 0 : 1; + proc_suser(sopt->sopt_p) != 0) ? 0 : 1; if (m) { req = mtod(m, caddr_t); len = m->m_len; } optname = sopt->sopt_name; + lck_mtx_lock(sadb_mutex); error = ipsec4_set_policy(inp, optname, req, len, priv); + lck_mtx_unlock(sadb_mutex); m_freem(m); break; } @@ -1533,7 +1852,9 @@ ip_ctloutput(so, sopt) req = mtod(m, caddr_t); len = m->m_len; } + lck_mtx_lock(sadb_mutex); error = ipsec4_get_policy(sotoinpcb(so), req, len, &m); + lck_mtx_unlock(sadb_mutex); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0) @@ -1679,9 +2000,13 @@ ip_multicast_if(a, ifindexp) *ifindexp = 0; if (ntohl(a->s_addr) >> 24 == 0) { ifindex = ntohl(a->s_addr) & 0xffffff; - if (ifindex < 0 || if_index < ifindex) + ifnet_head_lock_shared(); + if (ifindex < 0 || if_index < ifindex) { + ifnet_head_done(); return NULL; + } ifp = ifindex2ifnet[ifindex]; + ifnet_head_done(); if (ifindexp) *ifindexp = ifindex; } else { @@ -1705,7 +2030,6 @@ ip_setmoptions(sopt, imop) struct ifnet *ifp = NULL; struct ip_moptions *imo = *imop; int ifindex; - int s; if (imo == NULL) { /* @@ -1756,10 +2080,8 @@ ip_setmoptions(sopt, imop) * IP address. Find the interface and confirm that * it supports multicasting. */ - s = splimp(); ifp = ip_multicast_if(&addr, &ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { - splx(s); error = EADDRNOTAVAIL; break; } @@ -1768,7 +2090,6 @@ ip_setmoptions(sopt, imop) imo->imo_multicast_addr = addr; else imo->imo_multicast_addr.s_addr = INADDR_ANY; - splx(s); break; case IP_MULTICAST_TTL: @@ -1900,14 +2221,12 @@ ip_addmembership( struct sockaddr_in *dst; struct ifnet *ifp = NULL; int error = 0; - int s = 0; int i; if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { error = EINVAL; return error; } - s = splimp(); /* * If no interface address was provided, use the interface of * the route to the given multicast address. @@ -1939,7 +2258,6 @@ ip_addmembership( */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; - splx(s); return error; } /* @@ -1954,12 +2272,10 @@ ip_addmembership( } if (i < imo->imo_num_memberships) { error = EADDRINUSE; - splx(s); return error; } if (i == IP_MAX_MEMBERSHIPS) { error = ETOOMANYREFS; - splx(s); return error; } /* @@ -1969,11 +2285,9 @@ ip_addmembership( if ((imo->imo_membership[i] = in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { error = ENOBUFS; - splx(s); return error; } ++imo->imo_num_memberships; - splx(s); return error; } @@ -1987,7 +2301,6 @@ ip_dropmembership( struct ip_mreq *mreq) { int error = 0; - int s = 0; struct ifnet* ifp = NULL; int i; @@ -1996,7 +2309,6 @@ ip_dropmembership( return error; } - s = splimp(); /* * If an interface address was specified, get a pointer * to its ifnet structure. @@ -2007,7 +2319,6 @@ ip_dropmembership( ifp = ip_multicast_if(&mreq->imr_interface, NULL); if (ifp == NULL) { error = EADDRNOTAVAIL; - splx(s); return error; } } @@ -2023,21 +2334,19 @@ ip_dropmembership( } if (i == imo->imo_num_memberships) { error = EADDRNOTAVAIL; - splx(s); return error; } /* * Give up the multicast address record to which the * membership points. */ - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); /* * Remove the gap in the membership array. */ for (++i; i < imo->imo_num_memberships; ++i) imo->imo_membership[i-1] = imo->imo_membership[i]; --imo->imo_num_memberships; - splx(s); return error; } @@ -2119,8 +2428,7 @@ ip_freemoptions(imo) if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) - if (imo->imo_membership[i] != NULL) - in_delmulti(imo->imo_membership[i]); + in_delmulti(&imo->imo_membership[i]); FREE(imo, M_IPMOPTS); } } @@ -2204,18 +2512,15 @@ ip_mloopback(ifp, m, dst, hlen) * a filter has tapped-in. */ - if (lo_dl_tag == 0) - dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag); - /* * Stuff the 'real' ifp into the pkthdr, to be used in matching * in ip_input(); we need the loopback ifp/dl_tag passed as args * to make the loopback driver compliant with the data link * requirements. */ - if (lo_dl_tag) { + if (lo_ifp) { copym->m_pkthdr.rcvif = ifp; - dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *) dst, 0); + dlil_output(lo_ifp, PF_INET, copym, 0, (struct sockaddr *) dst, 0); } else { printf("Warning: ip_output call to dlil_find_dltag failed!\n"); m_freem(copym); diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index d5c0f22f6..ddba0e79f 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -58,7 +58,6 @@ #define _NETINET_IP_VAR_H_ #include -#ifdef __APPLE_API_PRIVATE /* * Overlay for ip header used by other protocols (tcp, udp). */ @@ -70,6 +69,7 @@ struct ipovly { struct in_addr ih_dst; /* destination internet address */ }; +#ifdef KERNEL_PRIVATE /* * Ip reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. @@ -84,7 +84,8 @@ struct ipq { struct mbuf *ipq_frags; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; u_long ipq_nfrags; - u_long reserved[3]; /* for future use */ + TAILQ_ENTRY(ipq) ipq_list; + u_long reserved[1]; /* for future use */ #if IPDIVERT #ifdef IPDIVERT_44 u_int32_t ipq_div_info; /* ipfw divert port & flags */ @@ -101,7 +102,9 @@ struct ipq { * The actual length of the options (including ipopt_dst) * is in m_len. */ +#endif /* KERNEL_PRIVATE */ #define MAX_IPOPTLEN 40 +#ifdef KERNEL_PRIVATE struct ipoption { struct in_addr ipopt_dst; /* first-hop dst if source routed */ @@ -121,9 +124,14 @@ struct ip_moptions { u_long imo_multicast_vif; /* vif num outgoing multicasts */ struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */ }; -#endif /* __APPLE_API_PRIVATE */ -#ifdef __APPLE_API_UNSTABLE +/* mbuf tag for ip_forwarding info */ +struct ip_fwd_tag { + struct sockaddr_in *next_hop; /* next_hop */ +}; + +#endif /* KERNEL_PRIVATE */ + struct ipstat { u_long ips_total; /* total packets received */ u_long ips_badsum; /* checksum bad */ @@ -155,10 +163,6 @@ struct ipstat { u_long ips_nogif; /* no match gif found */ u_long ips_badaddr; /* invalid address on header */ }; -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_PRIVATE -#ifdef KERNEL struct ip_linklocal_stat { u_long iplls_in_total; @@ -167,6 +171,7 @@ struct ip_linklocal_stat { u_long iplls_out_badttl; }; +#ifdef KERNEL_PRIVATE /* flags passed to ip_output as last parameter */ #define IP_FORWARDING 0x1 /* most of ip header exists */ #define IP_RAWOUTPUT 0x2 /* raw ip header exists */ @@ -188,55 +193,48 @@ extern int ipforwarding; /* ip forwarding */ extern struct protosw *ip_protox[]; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */ -extern int (*legal_vif_num) __P((int)); -extern u_long (*ip_mcast_src) __P((int)); +extern int (*legal_vif_num)(int); +extern u_long (*ip_mcast_src)(int); extern int rsvp_on; extern struct pr_usrreqs rip_usrreqs; -int ip_ctloutput __P((struct socket *, struct sockopt *sopt)); -void ip_drain __P((void)); -void ip_freemoptions __P((struct ip_moptions *)); -void ip_init __P((void)); -extern int (*ip_mforward) __P((struct ip *, struct ifnet *, struct mbuf *, - struct ip_moptions *)); -int ip_output __P((struct mbuf *, - struct mbuf *, struct route *, int, struct ip_moptions *)); -void ip_savecontrol __P((struct inpcb *, struct mbuf **, struct ip *, - struct mbuf *)); -void ip_slowtimo __P((void)); +int ip_ctloutput(struct socket *, struct sockopt *sopt); +void ip_drain(void); +void ip_freemoptions(struct ip_moptions *); +void ip_init(void); +extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, + struct ip_moptions *); +int ip_output(struct mbuf *, + struct mbuf *, struct route *, int, struct ip_moptions *); +int ip_output_list(struct mbuf *, int, + struct mbuf *, struct route *, int, struct ip_moptions *); +struct in_ifaddr * + ip_rtaddr(struct in_addr, struct route *); +void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, + struct mbuf *); +void ip_slowtimo(void); struct mbuf * - ip_srcroute __P((void)); -void ip_stripoptions __P((struct mbuf *, struct mbuf *)); + ip_srcroute(void); +void ip_stripoptions(struct mbuf *, struct mbuf *); #if RANDOM_IP_ID u_int16_t - ip_randomid __P((void)); + ip_randomid(void); #endif -int rip_ctloutput __P((struct socket *, struct sockopt *)); -void rip_ctlinput __P((int, struct sockaddr *, void *)); -void rip_init __P((void)); -void rip_input __P((struct mbuf *, int)); -int rip_output __P((struct mbuf *, struct socket *, u_long)); -void ipip_input __P((struct mbuf *, int)); -void rsvp_input __P((struct mbuf *, int)); -int ip_rsvp_init __P((struct socket *)); -int ip_rsvp_done __P((void)); -int ip_rsvp_vif_init __P((struct socket *, struct sockopt *)); -int ip_rsvp_vif_done __P((struct socket *, struct sockopt *)); -void ip_rsvp_force_done __P((struct socket *)); - -#if IPDIVERT -void div_init __P((void)); -void div_input __P((struct mbuf *, int)); -void divert_packet __P((struct mbuf *, int, int)); -extern struct pr_usrreqs div_usrreqs; -extern u_int16_t ip_divert_cookie; -#endif - -extern struct sockaddr_in *ip_fw_fwd_addr; +int rip_ctloutput(struct socket *, struct sockopt *); +void rip_ctlinput(int, struct sockaddr *, void *); +void rip_init(void); +void rip_input(struct mbuf *, int); +int rip_output(struct mbuf *, struct socket *, u_long); +int rip_unlock(struct socket *, int, int); +void ipip_input(struct mbuf *, int); +void rsvp_input(struct mbuf *, int); +int ip_rsvp_init(struct socket *); +int ip_rsvp_done(void); +int ip_rsvp_vif_init(struct socket *, struct sockopt *); +int ip_rsvp_vif_done(struct socket *, struct sockopt *); +void ip_rsvp_force_done(struct socket *); void in_delayed_cksum(struct mbuf *m); -#endif /* _KERNEL */ -#endif /* __APPLE_API_PRIVATE */ - -#endif /* !_NETINET_IP_VAR_H_ */ +#endif KERNEL_PRIVATE +#endif !_NETINET_IP_VAR_H_ diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c new file mode 100644 index 000000000..52b8a0f25 --- /dev/null +++ b/bsd/netinet/kpi_ipfilter.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include /* for definition of NULL */ +#include +#include +#include +#include +#include + +#define _IP_VHL +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * kipf_lock and kipf_ref protect the linkage of the list of IP filters + * An IP filter can be removed only when kipf_ref is zero + * If an IP filter cannot be removed because kipf_ref is not null, then + * the IP filter is marjed and kipf_delayed_remove is set so that when + * kipf_ref eventually goes down to zero, the IP filter is removed + */ +static lck_mtx_t *kipf_lock = 0; +static unsigned long kipf_ref = 0; +static unsigned long kipf_delayed_remove = 0; + +__private_extern__ struct ipfilter_list ipv4_filters = TAILQ_HEAD_INITIALIZER(ipv4_filters); +__private_extern__ struct ipfilter_list ipv6_filters = TAILQ_HEAD_INITIALIZER(ipv6_filters); +__private_extern__ struct ipfilter_list tbr_filters = TAILQ_HEAD_INITIALIZER(tbr_filters); + +__private_extern__ void +ipf_ref(void) +{ + lck_mtx_lock(kipf_lock); + kipf_ref++; + lck_mtx_unlock(kipf_lock); +} + +__private_extern__ void +ipf_unref(void) +{ + lck_mtx_lock(kipf_lock); + + if (kipf_ref == 0) + panic("ipf_unref: kipf_ref == 0\n"); + + kipf_ref--; + if (kipf_ref == 0 && kipf_delayed_remove != 0) { + struct ipfilter *filter; + + while ((filter = TAILQ_FIRST(&tbr_filters))) { + ipf_detach_func ipf_detach = filter->ipf_filter.ipf_detach; + void* cookie = filter->ipf_filter.cookie; + + TAILQ_REMOVE(filter->ipf_head, filter, ipf_link); + TAILQ_REMOVE(&tbr_filters, filter, ipf_tbr); + kipf_delayed_remove--; + + if (ipf_detach) { + lck_mtx_unlock(kipf_lock); + ipf_detach(cookie); + lck_mtx_lock(kipf_lock); + /* In case some filter got to run while we released the lock */ + if (kipf_ref != 0) + break; + } + } + } + lck_mtx_unlock(kipf_lock); +} + +static errno_t +ipf_add( + const struct ipf_filter* filter, + ipfilter_t *filter_ref, + struct ipfilter_list *head) +{ + struct ipfilter *new_filter; + if (filter->name == NULL || (filter->ipf_input == NULL && filter->ipf_output == NULL)) + return EINVAL; + + MALLOC(new_filter, struct ipfilter*, sizeof(*new_filter), M_IFADDR, M_WAITOK); + if (new_filter == NULL) + return ENOMEM; + + lck_mtx_lock(kipf_lock); + new_filter->ipf_filter = *filter; + new_filter->ipf_head = head; + + /* + * 3957298 + * Make sure third parties have a chance to filter packets before + * SharedIP. Always SharedIP at the end of the list. + */ + if (filter->name != NULL && + strcmp(filter->name, "com.apple.nke.SharedIP") == 0) { + TAILQ_INSERT_TAIL(head, new_filter, ipf_link); + } + else { + TAILQ_INSERT_HEAD(head, new_filter, ipf_link); + } + + lck_mtx_unlock(kipf_lock); + + *filter_ref = (ipfilter_t)new_filter; + return 0; +} + +errno_t +ipf_addv4( + const struct ipf_filter* filter, + ipfilter_t *filter_ref) +{ + return ipf_add(filter, filter_ref, &ipv4_filters); +} + +errno_t +ipf_addv6( + const struct ipf_filter* filter, + ipfilter_t *filter_ref) +{ + return ipf_add(filter, filter_ref, &ipv6_filters); +} + +errno_t +ipf_remove( + ipfilter_t filter_ref) +{ + struct ipfilter *match = (struct ipfilter*)filter_ref; + struct ipfilter_list *head; + + if (match == 0 || (match->ipf_head != &ipv4_filters && match->ipf_head != &ipv6_filters)) + return EINVAL; + + head = match->ipf_head; + + lck_mtx_lock(kipf_lock); + TAILQ_FOREACH(match, head, ipf_link) { + if (match == (struct ipfilter*)filter_ref) { + ipf_detach_func ipf_detach = match->ipf_filter.ipf_detach; + void* cookie = match->ipf_filter.cookie; + + /* + * Cannot detach when they are filters running + */ + if (kipf_ref) { + kipf_delayed_remove++; + TAILQ_INSERT_TAIL(&tbr_filters, match, ipf_tbr); + match->ipf_filter.ipf_input = 0; + match->ipf_filter.ipf_output = 0; + lck_mtx_unlock(kipf_lock); + } else { + TAILQ_REMOVE(head, match, ipf_link); + lck_mtx_unlock(kipf_lock); + if (ipf_detach) + ipf_detach(cookie); + FREE(match, M_IFADDR); + } + return 0; + } + } + lck_mtx_unlock(kipf_lock); + + return ENOENT; +} + +int log_for_en1 = 0; + +errno_t +ipf_inject_input( + mbuf_t data, + ipfilter_t filter_ref) +{ + struct mbuf *m = (struct mbuf*)data; + struct m_tag *mtag = 0; + struct ip *ip = mtod(m, struct ip *); + u_int8_t vers; + int hlen; + errno_t error = 0; + protocol_family_t proto; + + vers = IP_VHL_V(ip->ip_vhl); + + switch (vers) { + case 4: + proto = PF_INET; + break; + case 6: + proto = PF_INET6; + break; + default: + error = ENOTSUP; + goto done; + } + + if (filter_ref == 0 && m->m_pkthdr.rcvif == 0) { + m->m_pkthdr.rcvif = ifunit("lo0"); + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.csum_flags = 0; + if (vers == 4) { + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, hlen); + } + } + if (filter_ref != 0) { + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT, + sizeof (ipfilter_t), M_NOWAIT); + if (mtag == NULL) { + error = ENOMEM; + goto done; + } + *(ipfilter_t*)(mtag+1) = filter_ref; + m_tag_prepend(m, mtag); + } + + error = proto_inject(proto, data); + +done: + return error; +} + +static errno_t +ipf_injectv4_out( + mbuf_t data, + ipfilter_t filter_ref, + ipf_pktopts_t options) +{ + struct route ro; + struct sockaddr_in *sin = (struct sockaddr_in*)&ro.ro_dst; + struct ip *ip; + struct mbuf *m = (struct mbuf*)data; + errno_t error = 0; + struct m_tag *mtag = 0; + struct ip_moptions *imo = 0, ip_moptions; + + /* Make the IP header contiguous in the mbuf */ + if ((size_t)m->m_len < sizeof(struct ip)) { + m = m_pullup(m, sizeof(struct ip)); + if (m == NULL) return ENOMEM; + } + ip = (struct ip*)m_mtod(m); + + if (filter_ref != 0) { + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT, + sizeof (ipfilter_t), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return ENOMEM; + } + *(ipfilter_t*)(mtag+1) = filter_ref; + m_tag_prepend(m, mtag); + } + + if (options && (options->ippo_flags & IPPOF_MCAST_OPTS)) { + imo = &ip_moptions; + + bzero(imo, sizeof(struct ip6_moptions)); + imo->imo_multicast_ifp = options->ippo_mcast_ifnet; + imo->imo_multicast_ttl = options->ippo_mcast_ttl; + imo->imo_multicast_loop = options->ippo_mcast_loop; + } + + /* Fill out a route structure and get a route */ + bzero(&ro, sizeof(struct route)); + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_family = AF_INET; + sin->sin_port = 0; + sin->sin_addr = ip->ip_dst; + rtalloc(&ro); + if (ro.ro_rt == NULL) { + m_freem(m); + return ENETUNREACH; + } + /* Send */ + error = ip_output(m, NULL, &ro, IP_ALLOWBROADCAST | IP_RAWOUTPUT, imo); + + /* Release the route */ + if (ro.ro_rt) + rtfree(ro.ro_rt); + + return error; +} + +static errno_t +ipf_injectv6_out( + mbuf_t data, + ipfilter_t filter_ref, + ipf_pktopts_t options) +{ + struct route_in6 ro; + struct sockaddr_in6 *sin6 = &ro.ro_dst; + struct ip6_hdr *ip6; + struct mbuf *m = (struct mbuf*)data; + errno_t error = 0; + struct m_tag *mtag = 0; + struct ip6_moptions *im6o = 0, ip6_moptions; + + /* Make the IP header contiguous in the mbuf */ + if ((size_t)m->m_len < sizeof(struct ip6_hdr)) { + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) return ENOMEM; + } + ip6 = (struct ip6_hdr*)m_mtod(m); + + if (filter_ref != 0) { + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT, + sizeof (ipfilter_t), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return ENOMEM; + } + *(ipfilter_t*)(mtag+1) = filter_ref; + m_tag_prepend(m, mtag); + } + + if (options && (options->ippo_flags & IPPOF_MCAST_OPTS)) { + im6o = &ip6_moptions; + + bzero(im6o, sizeof(struct ip6_moptions)); + im6o->im6o_multicast_ifp = options->ippo_mcast_ifnet; + im6o->im6o_multicast_hlim = options->ippo_mcast_ttl; + im6o->im6o_multicast_loop = options->ippo_mcast_loop; + } + + + /* Fill out a route structure and get a route */ + bzero(&ro, sizeof(struct route_in6)); + sin6->sin6_len = sizeof(struct sockaddr_in6); + sin6->sin6_family = AF_INET6; + sin6->sin6_addr = ip6->ip6_dst; +#if 0 + /* This is breaks loopback multicast! */ + /* The scope ID should already at s6_addr16[1] */ + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + /* Hack, pull the scope_id out of the dest addr */ + sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); + ip6->ip6_dst.s6_addr16[1] = 0; + } else + sin6->sin6_scope_id = 0; +#endif + rtalloc((struct route*)&ro); + if (ro.ro_rt == NULL) { + m_freem(m); + return ENETUNREACH; + } + + /* Send */ + error = ip6_output(m, NULL, &ro, 0, im6o, NULL, 0); + + /* Release the route */ + if (ro.ro_rt) + rtfree(ro.ro_rt); + + return error; +} + +errno_t +ipf_inject_output( + mbuf_t data, + ipfilter_t filter_ref, + ipf_pktopts_t options) +{ + struct mbuf *m = (struct mbuf*)data; + u_int8_t vers; + errno_t error = 0; + + /* Make one byte of the header contiguous in the mbuf */ + if (m->m_len < 1) { + m = m_pullup(m, 1); + if (m == NULL) + goto done; + } + + vers = (*(u_int8_t*)m_mtod(m)) >> 4; + switch (vers) + { + case 4: + error = ipf_injectv4_out(data, filter_ref, options); + break; + case 6: + error = ipf_injectv6_out(data, filter_ref, options); + break; + default: + m_freem(m); + error = ENOTSUP; + break; + } + +done: + return error; +} + +__private_extern__ ipfilter_t +ipf_get_inject_filter(struct mbuf *m) +{ + ipfilter_t filter_ref = 0; + struct m_tag *mtag; + + mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFILT, NULL); + if (mtag) { + filter_ref = *(ipfilter_t *)(mtag+1); + + m_tag_delete(m, mtag); + } + return filter_ref; +} + +__private_extern__ int +ipf_init(void) +{ + int error = 0; + lck_grp_attr_t *grp_attributes = 0; + lck_attr_t *lck_attributes = 0; + lck_grp_t *lck_grp = 0; + + grp_attributes = lck_grp_attr_alloc_init(); + if (grp_attributes == 0) { + printf("ipf_init: lck_grp_attr_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + lck_grp_attr_setdefault(grp_attributes); + + lck_grp = lck_grp_alloc_init("IP Filter", grp_attributes); + if (lck_grp == 0) { + printf("ipf_init: lck_grp_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + + lck_attributes = lck_attr_alloc_init(); + if (lck_attributes == 0) { + printf("ipf_init: lck_attr_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + lck_attr_setdefault(lck_attributes); + + kipf_lock = lck_mtx_alloc_init(lck_grp, lck_attributes); + if (kipf_lock == 0) { + printf("ipf_init: lck_mtx_alloc_init failed\n"); + error = ENOMEM; + goto done; + } + done: + if (error != 0) { + if (kipf_lock) { + lck_mtx_free(kipf_lock, lck_grp); + kipf_lock = 0; + } + } + if (lck_grp) { + lck_grp_free(lck_grp); + lck_grp = 0; + } + if (grp_attributes) { + lck_grp_attr_free(grp_attributes); + grp_attributes = 0; + } + if (lck_attributes) { + lck_attr_free(lck_attributes); + lck_attributes = 0; + } + + return error; +} diff --git a/bsd/netinet/kpi_ipfilter.h b/bsd/netinet/kpi_ipfilter.h new file mode 100644 index 000000000..bc0ae7867 --- /dev/null +++ b/bsd/netinet/kpi_ipfilter.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_ipfilter.h + This header defines an API to attach IP filters. IP filters may be + attached to intercept either IPv4 or IPv6 packets. The filters can + intercept all IP packets in to and out of the host regardless of + interface. + */ + +#ifndef __KPI_IPFILTER__ +#define __KPI_IPFILTER__ + +#include + +/* + * ipf_pktopts + * + * Options for outgoing packets. The options need to be preserved when + * re-injecting a packet. + */ +struct ipf_pktopts { + u_int32_t ippo_flags; + ifnet_t ippo_mcast_ifnet; + int ippo_mcast_loop; + u_int8_t ippo_mcast_ttl; +}; +#define IPPOF_MCAST_OPTS 0x1 + +typedef struct ipf_pktopts* ipf_pktopts_t; + +/*! + @typedef ipf_input_func + + @discussion ipf_input_func is used to filter incoming ip packets. + The IP filter is called for packets from all interfaces. The + filter is called between when the general IP processing is + handled and when the packet is passed up to the next layer + protocol such as udp or tcp. In the case of encapsulation, such + as UDP in ESP (IPSec), your filter will be called once for ESP + and then again for UDP. This will give your filter an + opportunity to process the ESP header as well as the decrypted + packet. Offset and protocol are used to determine where in the + packet processing is currently occuring. If you're only + interested in TCP or UDP packets, just return 0 if protocol + doesn't match TCP or UDP. + @param cookie The cookie specified when your filter was attached. + @param data The reassembled ip packet, data will start at the ip + header. + @param offset An offset to the next header + (udp/tcp/icmp/esp/etc...). + @param protocol The protocol type (udp/tcp/icmp/etc...) of the IP packet + @result Return: + 0 - The caller will continue with normal processing of the packet. + EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. + Anything Else - The caller will free the packet and stop processing. +*/ +typedef errno_t (*ipf_input_func)(void* cookie, mbuf_t *data, int offset, u_int8_t protocol); + +/*! + @typedef ipf_output_func + + @discussion ipf_output_func is used to filter outbound ip packets. + The IP filter is called for packets to all interfaces. The + filter is called before fragmentation and IPSec processing. If + you need to change the destination IP address, call + ipf_inject_output and return EJUSTRETURN. + @param cookie The cookie specified when your filter was attached. + @param data The ip packet, will contain an IP header followed by the + rest of the IP packet. + @result Return: + 0 - The caller will continue with normal processing of the packet. + EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. + Anything Else - The caller will free the packet and stop processing. +*/ +typedef errno_t (*ipf_output_func)(void* cookie, mbuf_t *data, ipf_pktopts_t options); + +/*! + @typedef ipf_detach_func + + @discussion ipf_detach_func is called to notify your filter that it + has been detached. + @param cookie The cookie specified when your filter was attached. +*/ +typedef void (*ipf_detach_func)(void* cookie); + +/*! + @typedef ipf_filter + @discussion This structure is used to define an IP filter for + use with the ipf_addv4 or ipf_addv6 function. + @field cookie A kext defined cookie that will be passed to all + filter functions. + @field name A filter name used for debugging purposes. + @field ipf_input The filter function to handle inbound packets. + @field ipf_output The filter function to handle outbound packets. + @field ipf_detach The filter function to notify of a detach. +*/ +struct ipf_filter { + void* cookie; + const char* name; + ipf_input_func ipf_input; + ipf_output_func ipf_output; + ipf_detach_func ipf_detach; +}; + +struct opaque_ipfilter; +typedef struct opaque_ipfilter* ipfilter_t; + +/*! + @function ipf_addv4 + @discussion Attaches an IPv4 ip filter. + @param filter A structure defining the filter. + @param filter_ref A reference to the filter used to detach it. + @result 0 on success otherwise the errno error. + */ +errno_t ipf_addv4(const struct ipf_filter* filter, ipfilter_t *filter_ref); + +/*! + @function ipf_addv6 + @discussion Attaches an IPv6 ip filter. + @param filter A structure defining the filter. + @param filter_ref A reference to the filter used to detach it. + @result 0 on success otherwise the errno error. + */ +errno_t ipf_addv6(const struct ipf_filter* filter, ipfilter_t *filter_ref); + +/*! + @function ipf_remove + @discussion Detaches an IPv4 or IPv6 filter. + @param filter_ref The reference to the filter returned from ipf_addv4 or + ipf_addv6. + @result 0 on success otherwise the errno error. + */ +errno_t ipf_remove(ipfilter_t filter_ref); + +/*! + @function ipf_inject_input + @discussion Inject an IP packet as though it had just been + reassembled in ip_input. When re-injecting a packet intercepted + by the filter's ipf_input function, an IP filter can pass its + reference to avoid processing the packet twice. This also + prevents ip filters installed before this filter from + getting a chance to process the packet. If the filter modified + the packet, it should not specify the filter ref to give other + filters a chance to process the new packet. + + Caller is responsible for freeing mbuf chain in the event that + ipf_inject_input returns an error. + @param data The complete IPv4 or IPv6 packet, receive interface must + be set. + @param filter_ref The reference to the filter injecting the data + @result 0 on success otherwise the errno error. + */ +errno_t ipf_inject_input(mbuf_t data, ipfilter_t filter_ref); + +/*! + @function ipf_inject_output + @discussion Inject an IP packet as though it had just been sent to + ip_output. When re-injecting a packet intercepted by the + filter's ipf_output function, an IP filter can pass its + reference to avoid processing the packet twice. This also + prevents ip filters installed before this filter from getting a + chance to process the packet. If the filter modified the packet, + it should not specify the filter ref to give other filters a + chance to process the new packet. + @param data The complete IPv4 or IPv6 packet. + @param filter_ref The reference to the filter injecting the data + @param options Output options for the packet + @result 0 on success otherwise the errno error. ipf_inject_output + will always free the mbuf. + */ +errno_t ipf_inject_output(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options); + +#endif /* __KPI_IPFILTER__ */ diff --git a/bsd/ppc/table.h b/bsd/netinet/kpi_ipfilter_var.h similarity index 51% rename from bsd/ppc/table.h rename to bsd/netinet/kpi_ipfilter_var.h index ff4d1dc34..f77f97214 100644 --- a/bsd/ppc/table.h +++ b/bsd/netinet/kpi_ipfilter_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,20 +19,40 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1989 Next, Inc. - */ - -#ifndef _BSD_PPC_TABLE_H_ -#define _BSD_PPC_TABLE_H_ + +#ifndef __KPI_IPFILTER_VAR__ +#define __KPI_IPFILTER_VAR__ #include +#include + +#ifdef KERNEL_PRIVATE + +/* Private data structure, stripped out by ifdef tool */ +/* Implementation specific bits */ + +#include + +struct ipfilter { + TAILQ_ENTRY(ipfilter) ipf_link; + struct ipf_filter ipf_filter; + struct ipfilter_list *ipf_head; + TAILQ_ENTRY(ipfilter) ipf_tbr; +}; +TAILQ_HEAD(ipfilter_list, ipfilter); + + +extern struct ipfilter_list ipv6_filters; +extern struct ipfilter_list ipv4_filters; + +ipfilter_t ipf_get_inject_filter(struct mbuf *m); +void ipf_ref(void); +void ipf_unref(void); +int ipf_init(void); + +void ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, ipfilter_t ipfref); -#ifdef __APPLE_API_OBSOLETE -/* - * No machine dependent table calls for ppc. - */ -#endif /* __APPLE_API_OBSOLETE */ +#endif /* KERNEL_PRIVATE */ -#endif /* _BSD_PPC_TABLE_H_ */ +#endif /*__KPI_IPFILTER_VAR__*/ diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 60f2ce01b..f361be892 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -93,11 +94,19 @@ #if IPSEC extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif +extern u_long route_generation; struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; +/* control hooks for ipfw and dummynet */ +ip_fw_ctl_t *ip_fw_ctl_ptr; +#if DUMMYNET +ip_dn_ctl_t *ip_dn_ctl_ptr; +#endif /* DUMMYNET */ + /* * Nominal space allocated to a raw ip socket. */ @@ -114,6 +123,8 @@ struct inpcbinfo ripcbinfo; void rip_init() { + struct inpcbinfo *pcbinfo; + LIST_INIT(&ripcb); ripcbinfo.listhead = &ripcb; /* @@ -128,6 +139,24 @@ rip_init() (4096 * sizeof(struct inpcb)), 4096, "ripzone"); + pcbinfo = &ripcbinfo; + /* + * allocate lock group attribute and group for udp pcb mutexes + */ + pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); + + pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr); + + /* + * allocate the lock attribute for udp pcb mutexes + */ + pcbinfo->mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(pcbinfo->mtx_attr); + + if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) + return; /* pretty much dead if this fails... */ + } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; @@ -145,8 +174,10 @@ rip_input(m, iphlen) register struct inpcb *inp; struct inpcb *last = 0; struct mbuf *opts = 0; + int skipit; ripsrc.sin_addr = ip->ip_src; + lck_rw_lock_shared(ripcbinfo.mtx); LIST_FOREACH(inp, &ripcb, inp_list) { #if INET6 if ((inp->inp_vflag & INP_IPV4) == 0) @@ -162,16 +193,23 @@ rip_input(m, iphlen) continue; if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); - + #if IPSEC /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && n && ipsec4_in_reject_so(n, last->inp_socket)) { - m_freem(n); - ipsecstat.in_polvio++; - /* do not inject data to pcb */ - } else + skipit = 0; + if (ipsec_bypass == 0 && n) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject_so(n, last->inp_socket)) { + m_freem(n); + ipsecstat.in_polvio++; + /* do not inject data to pcb */ + skipit = 1; + } + lck_mtx_unlock(sadb_mutex); + } #endif /*IPSEC*/ - if (n) { + if (n && skipit == 0) { + int error = 0; if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) ip_savecontrol(last, &opts, ip, n); @@ -180,51 +218,60 @@ rip_input(m, iphlen) n->m_pkthdr.len -= iphlen; n->m_data += iphlen; } +// ###LOCK need to lock that socket? if (sbappendaddr(&last->inp_socket->so_rcv, (struct sockaddr *)&ripsrc, n, - opts) == 0) { - /* should notify about lost packet */ - kprintf("rip_input can't append to socket\n"); - m_freem(n); - if (opts) - m_freem(opts); - } else + opts, &error) != 0) { sorwakeup(last->inp_socket); + } + else { + if (error) { + /* should notify about lost packet */ + kprintf("rip_input can't append to socket\n"); + } + } opts = 0; } } last = inp; } + lck_rw_done(ripcbinfo.mtx); #if IPSEC /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && last && ipsec4_in_reject_so(m, last->inp_socket)) { - m_freem(m); - ipsecstat.in_polvio++; - ipstat.ips_delivered--; - /* do not inject data to pcb */ - } else + skipit = 0; + if (ipsec_bypass == 0 && last) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject_so(m, last->inp_socket)) { + m_freem(m); + ipsecstat.in_polvio++; + ipstat.ips_delivered--; + /* do not inject data to pcb */ + skipit = 1; + } + lck_mtx_unlock(sadb_mutex); + } #endif /*IPSEC*/ - if (last) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, m); - if (last->inp_flags & INP_STRIPHDR) { - m->m_len -= iphlen; - m->m_pkthdr.len -= iphlen; - m->m_data += iphlen; - } - if (sbappendaddr(&last->inp_socket->so_rcv, - (struct sockaddr *)&ripsrc, m, opts) == 0) { - kprintf("rip_input(2) can't append to socket\n"); + if (skipit == 0) { + if (last) { + if (last->inp_flags & INP_CONTROLOPTS || + last->inp_socket->so_options & SO_TIMESTAMP) + ip_savecontrol(last, &opts, ip, m); + if (last->inp_flags & INP_STRIPHDR) { + m->m_len -= iphlen; + m->m_pkthdr.len -= iphlen; + m->m_data += iphlen; + } + if (sbappendaddr(&last->inp_socket->so_rcv, + (struct sockaddr *)&ripsrc, m, opts, NULL) != 0) { + sorwakeup(last->inp_socket); + } else { + kprintf("rip_input(2) can't append to socket\n"); + } + } else { m_freem(m); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - } else { - m_freem(m); - ipstat.ips_noproto++; - ipstat.ips_delivered--; + ipstat.ips_noproto++; + ipstat.ips_delivered--; + } } } @@ -293,23 +340,27 @@ rip_output(m, so, dst) } #endif /*IPSEC*/ - return (ip_output(m, inp->inp_options, &inp->inp_route, flags, + if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->generation_id != route_generation) { + rtfree(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)0; + } + + return (ip_output_list(m, 0, inp->inp_options, &inp->inp_route, flags, inp->inp_moptions)); } -int +extern int load_ipfw() { kern_return_t err; - /* Load the kext by the identifier */ - err = kmod_load_extension("com.apple.nke.IPFirewall"); - if (err) return err; + ipfw_init(); - if (ip_fw_ctl_ptr == NULL) { - /* Wait for the kext to finish loading */ - err = tsleep(&ip_fw_ctl_ptr, PWAIT | PCATCH, "load_ipfw_kext", 5 * 60 /* 5 seconds */); - } +#if DUMMYNET + if (!DUMMYNET_LOADED) + ip_dn_init(); +#endif /* DUMMYNET */ + err = 0; return err == 0 && ip_fw_ctl_ptr == NULL ? -1 : err; } @@ -357,10 +408,10 @@ rip_ctloutput(so, sopt) #if DUMMYNET case IP_DUMMYNET_GET: - if (ip_dn_ctl_ptr == NULL) - error = ENOPROTOOPT ; - else + if (DUMMYNET_LOADED) error = ip_dn_ctl_ptr(sopt); + else + error = ENOPROTOOPT; break ; #endif /* DUMMYNET */ @@ -428,10 +479,10 @@ rip_ctloutput(so, sopt) case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: - if (ip_dn_ctl_ptr == NULL) - error = ENOPROTOOPT ; - else + if (DUMMYNET_LOADED) error = ip_dn_ctl_ptr(sopt); + else + error = ENOPROTOOPT ; break ; #endif @@ -493,6 +544,7 @@ rip_ctlinput(cmd, sa, vip) switch (cmd) { case PRC_IFDOWN: + lck_mtx_lock(rt_mtx); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa @@ -500,7 +552,7 @@ rip_ctlinput(cmd, sa, vip) /* * in_ifscrub kills the interface route. */ - in_ifscrub(ia->ia_ifp, ia); + in_ifscrub(ia->ia_ifp, ia, 1); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right @@ -511,16 +563,20 @@ rip_ctlinput(cmd, sa, vip) break; } } + lck_mtx_unlock(rt_mtx); break; case PRC_IFUP: + lck_mtx_lock(rt_mtx); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa) break; } - if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) + if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) { + lck_mtx_unlock(rt_mtx); return; + } flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; @@ -528,7 +584,8 @@ rip_ctlinput(cmd, sa, vip) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; - err = rtinit(&ia->ia_ifa, RTM_ADD, flags); + err = rtinit_locked(&ia->ia_ifa, RTM_ADD, flags); + lck_mtx_unlock(rt_mtx); if (err == 0) ia->ia_flags |= IFA_ROUTE; break; @@ -612,15 +669,21 @@ rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *addr = (struct sockaddr_in *)nam; + struct ifaddr *ifa = NULL; if (nam->sa_len != sizeof(*addr)) return EINVAL; - if (TAILQ_EMPTY(&ifnet) || ((addr->sin_family != AF_INET) && + if (TAILQ_EMPTY(&ifnet_head) || ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) || (addr->sin_addr.s_addr && - ifa_ifwithaddr((struct sockaddr *)addr) == 0)) + (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)) { return EADDRNOTAVAIL; + } + else if (ifa) { + ifafree(ifa); + ifa = NULL; + } inp->inp_laddr = addr->sin_addr; return 0; } @@ -633,7 +696,7 @@ rip_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (nam->sa_len != sizeof(*addr)) return EINVAL; - if (TAILQ_EMPTY(&ifnet)) + if (TAILQ_EMPTY(&ifnet_head)) return EADDRNOTAVAIL; if ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) @@ -673,6 +736,33 @@ rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, return rip_output(m, so, dst); } +int +rip_unlock(struct socket *so, int refcount, int debug) +{ + int lr_saved; + struct inpcb *inp = sotoinpcb(so); +#ifdef __ppc__ + if (debug == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = debug; +#endif + if (refcount) { + if (so->so_usecount <= 0) + panic("rip_unlock: bad refoucnt so=%x val=%x\n", so, so->so_usecount); + so->so_usecount--; + if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { + lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + lck_rw_lock_exclusive(ripcbinfo.mtx); + in_pcbdispose(inp); + lck_rw_done(ripcbinfo.mtx); + return(0); + } + } + lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + return(0); +} + static int rip_pcblist SYSCTL_HANDLER_ARGS { @@ -685,58 +775,64 @@ rip_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + lck_rw_lock_exclusive(ripcbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { n = ripcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); + lck_rw_done(ripcbinfo.mtx); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(ripcbinfo.mtx); return EPERM; + } /* * OK, now we're committed to doing something. */ - s = splnet(); gencnt = ripcbinfo.ipi_gencnt; n = ripcbinfo.ipi_count; - splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); - if (error) + if (error) { + lck_rw_done(ripcbinfo.mtx); return error; + } /* * We are done if there is no pcb */ - if (n == 0) + if (n == 0) { + lck_rw_done(ripcbinfo.mtx); return 0; + } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == 0) + if (inp_list == 0) { + lck_rw_done(ripcbinfo.mtx); return ENOMEM; + } - s = splnet(); for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { - if (inp->inp_gencnt <= gencnt) + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; } - splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); + inpcb_to_compat(inp, &xi.xi_inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); @@ -750,14 +846,13 @@ rip_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - s = splnet(); xig.xig_gen = ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = ripcbinfo.ipi_count; - splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); + lck_rw_done(ripcbinfo.mtx); return error; } @@ -769,5 +864,5 @@ struct pr_usrreqs rip_usrreqs = { pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h index 30479bd88..89e6ba826 100644 --- a/bsd/netinet/tcp.h +++ b/bsd/netinet/tcp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,9 +58,12 @@ #ifndef _NETINET_TCP_H_ #define _NETINET_TCP_H_ #include +#include +#include -typedef u_int32_t tcp_seq; -typedef u_int32_t tcp_cc; /* connection count per rfc1644 */ +#ifndef _POSIX_C_SOURCE +typedef __uint32_t tcp_seq; +typedef __uint32_t tcp_cc; /* connection count per rfc1644 */ #define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */ #define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */ @@ -70,19 +73,19 @@ typedef u_int32_t tcp_cc; /* connection count per rfc1644 */ * Per RFC 793, September, 1981. */ struct tcphdr { - u_short th_sport; /* source port */ - u_short th_dport; /* destination port */ + unsigned short th_sport; /* source port */ + unsigned short th_dport; /* destination port */ tcp_seq th_seq; /* sequence number */ tcp_seq th_ack; /* acknowledgement number */ -#if BYTE_ORDER == LITTLE_ENDIAN - u_int th_x2:4, /* (unused) */ - th_off:4; /* data offset */ +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN + unsigned int th_x2:4, /* (unused) */ + th_off:4; /* data offset */ #endif -#if BYTE_ORDER == BIG_ENDIAN - u_int th_off:4, /* data offset */ - th_x2:4; /* (unused) */ +#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN + unsigned int th_off:4, /* data offset */ + th_x2:4; /* (unused) */ #endif - u_char th_flags; + unsigned char th_flags; #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 @@ -93,9 +96,9 @@ struct tcphdr { #define TH_CWR 0x80 #define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG|TH_ECE|TH_CWR) - u_short th_win; /* window */ - u_short th_sum; /* checksum */ - u_short th_urp; /* urgent pointer */ + unsigned short th_win; /* window */ + unsigned short th_sum; /* checksum */ + unsigned short th_urp; /* urgent pointer */ }; #define TCPOPT_EOL 0 @@ -140,6 +143,15 @@ struct tcphdr { */ #define TCP_MINMSS 216 +/* + * TCP_MINMSSOVERLOAD is defined to be 1000 which should cover any type + * of interactive TCP session. + * See tcp_subr.c tcp_minmssoverload SYSCTL declaration and tcp_input.c + * for more comments. + * Setting this to "0" disables the minmssoverload check. + */ +#define TCP_MINMSSOVERLOAD 1000 + /* * Default maximum segment size for TCP6. * With an IP6 MSS of 1280, this is 1220, @@ -158,14 +170,17 @@ struct tcphdr { #define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ #define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) /* max space left for options */ +#endif /* _POSIX_C_SOURCE */ /* * User-settable options (used with setsockopt). */ #define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#ifndef _POSIX_C_SOURCE #define TCP_MAXSEG 0x02 /* set maximum segment size */ #define TCP_NOPUSH 0x04 /* don't push last block of write */ #define TCP_NOOPT 0x08 /* don't use TCP options */ #define TCP_KEEPALIVE 0x10 /* idle time used when SO_KEEPALIVE is enabled */ +#endif /* _POSIX_C_SOURCE */ #endif diff --git a/bsd/netinet/tcp_debug.h b/bsd/netinet/tcp_debug.h index 76a05474e..1fbf37aa9 100644 --- a/bsd/netinet/tcp_debug.h +++ b/bsd/netinet/tcp_debug.h @@ -58,7 +58,7 @@ #ifndef _NETINET_TCP_DEBUG_H_ #define _NETINET_TCP_DEBUG_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE struct tcp_debug { n_time td_time; @@ -97,11 +97,6 @@ static char *tanames[] = #define TCP_NDEBUG 100 -#ifndef KERNEL -/* XXX common variables for broken applications. */ -struct tcp_debug tcp_debug[TCP_NDEBUG]; -int tcp_debx; -#endif -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ #endif /* !_NETINET_TCP_DEBUG_H_ */ diff --git a/bsd/netinet/tcp_fsm.h b/bsd/netinet/tcp_fsm.h index e4a2e6826..6bbb41490 100644 --- a/bsd/netinet/tcp_fsm.h +++ b/bsd/netinet/tcp_fsm.h @@ -99,7 +99,7 @@ #define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) #define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL_PRIVATE #ifdef TCPOUTFLAGS /* * Flags used when sending segments in tcp_output. @@ -121,19 +121,19 @@ static u_char tcp_outflags[TCP_NSTATES] = { TH_ACK, /* 10, TIME_WAIT */ }; #endif -#endif /* __APPLE_API_UNSTABLE */ +#endif KERNEL_PRIVATE #if KPROF -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE int tcp_acounts[TCP_NSTATES][PRU_NREQ]; -#endif /* __APPLE_API_PRIVATE */ +#endif KERNEL_PRIVATE #endif #ifdef TCPSTATES char *tcpstates[] = { "CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD", "ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING", - "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT", + "LAST_ACK", "FIN_WAIT_2", "TIME_WAIT" }; #endif diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 46fe20ed1..b931b642d 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -129,6 +129,7 @@ extern int apple_hwcksum_rx; #if IPSEC extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif struct tcpstat tcpstat; @@ -188,14 +189,14 @@ struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; -static void tcp_dooptions __P((struct tcpcb *, - u_char *, int, struct tcphdr *, struct tcpopt *)); -static void tcp_pulloutofband __P((struct socket *, - struct tcphdr *, struct mbuf *, int)); -static int tcp_reass __P((struct tcpcb *, struct tcphdr *, int *, - struct mbuf *)); -static void tcp_xmit_timer __P((struct tcpcb *, int)); -static int tcp_newreno __P((struct tcpcb *, struct tcphdr *)); +static void tcp_dooptions(struct tcpcb *, + u_char *, int, struct tcphdr *, struct tcpopt *); +static void tcp_pulloutofband(struct socket *, + struct tcphdr *, struct mbuf *, int); +static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, + struct mbuf *); +static void tcp_xmit_timer(struct tcpcb *, int); +static int tcp_newreno __P((struct tcpcb *, struct tcphdr *)); /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ #if INET6 @@ -212,16 +213,39 @@ do { \ extern u_long *delack_bitmask; +extern void ipfwsyslog( int level, char *format,...); +extern int ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ); +extern int fw_verbose; + +#define log_in_vain_log( a ) { \ + if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ + ipfwsyslog a ; \ + } \ + else log a ; \ +} + /* - * Indicate whether this ack should be delayed. We can delay the ack if - * - delayed acks are enabled and - * - there is no delayed ack timer in progress and + * Indicate whether this ack should be delayed. + * We can delay the ack if: + * - delayed acks are enabled (set to 1) and * - our last ack wasn't a 0-sized window. We never want to delay - * the ack that opens up a 0-sized window. + * the ack that opens up a 0-sized window. + * - delayed acks are enabled (set to 2, "more compatible") and + * - our last ack wasn't a 0-sized window. + * - if the peer hasn't sent us a TH_PUSH data packet (this solves 3649245) + * - the peer hasn't sent us a TH_PUSH data packet, if he did, take this as a clue that we + * need to ACK with no delay. This helps higher level protocols who won't send + * us more data even if the window is open because their last "segment" hasn't been ACKed + * + * */ #define DELAY_ACK(tp) \ - (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \ - (tp->t_flags & TF_RXWIN0SENT) == 0) + (((tcp_delack_enabled == 1) && ((tp->t_flags & TF_RXWIN0SENT) == 0)) || \ + (((tcp_delack_enabled == 2) && (tp->t_flags & TF_RXWIN0SENT) == 0) && \ + ((thflags & TH_PUSH) == 0) && ((tp->t_flags & TF_DELACK) == 0))) + + +static int tcpdropdropablreq(struct socket *head); static int @@ -237,6 +261,7 @@ tcp_reass(tp, th, tlenp, m) struct tseg_qent *te; struct socket *so = tp->t_inpcb->inp_socket; int flags; + int dowakeup = 0; /* * Call with th==0 after become established to @@ -362,8 +387,10 @@ present: LIST_REMOVE(q, tqe_q); if (so->so_state & SS_CANTRCVMORE) m_freem(q->tqe_m); - else - sbappend(&so->so_rcv, q->tqe_m); + else { + if (sbappend(&so->so_rcv, q->tqe_m)) + dowakeup = 1; + } FREE(q, M_TSEGQ); tcp_reass_qsize--; q = nq; @@ -387,8 +414,9 @@ present: (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) | (tp->t_inpcb->inp_faddr.s_addr & 0xffff)), 0,0,0); - } - sorwakeup(so); + } + if (dowakeup) + sorwakeup(so); /* done with socket lock held */ return (flags); } @@ -407,7 +435,7 @@ tcp6_input(mp, offp) register struct mbuf *m = *mp; struct in6_ifaddr *ia6; - IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), return IPPROTO_DONE); /* * draft-itojun-ipv6-tcp-to-anycast @@ -451,13 +479,26 @@ tcp_input(m, off0) #endif int dropsocket = 0; int iss = 0; + int nosock = 0; u_long tiwin; struct tcpopt to; /* options in this segment */ struct rmxp_tao *taop; /* pointer to our TAO cache entry */ struct rmxp_tao tao_noncached; /* in case there's no cached entry */ + struct sockaddr_in *next_hop = NULL; #if TCPDEBUG short ostate = 0; #endif + struct m_tag *fwd_tag; + + /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ + fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL); + if (fwd_tag != NULL) { + struct ip_fwd_tag *ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + + next_hop = ipfwd_tag->next_hop; + m_tag_delete(m, fwd_tag); + } + #if INET6 struct ip6_hdr *ip6 = NULL; int isipv6; @@ -483,7 +524,7 @@ tcp_input(m, off0) tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) { tcpstat.tcps_rcvbadsum++; - goto drop; + goto dropnosock; } th = (struct tcphdr *)((caddr_t)ip6 + off0); @@ -500,7 +541,7 @@ tcp_input(m, off0) */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { /* XXX stat */ - goto drop; + goto dropnosock; } } else #endif /* INET6 */ @@ -579,7 +620,7 @@ tcp_input(m, off0) } if (th->th_sum) { tcpstat.tcps_rcvbadsum++; - goto drop; + goto dropnosock; } #if INET6 /* Re-initialization for later version check */ @@ -594,13 +635,13 @@ tcp_input(m, off0) off = th->th_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { tcpstat.tcps_rcvbadoff++; - goto drop; + goto dropnosock; } tlen -= off; /* tlen is used instead of ti->ti_len */ if (off > sizeof (struct tcphdr)) { #if INET6 if (isipv6) { - IP6_EXTHDR_CHECK(m, off0, off, ); + IP6_EXTHDR_CHECK(m, off0, off, return); ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); } else @@ -647,7 +688,7 @@ tcp_input(m, off0) * This is incompatible with RFC1644 extensions (T/TCP). */ if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN)) - goto drop; + goto dropnosock; #endif /* @@ -673,7 +714,7 @@ tcp_input(m, off0) */ findpcb: #if IPFIREWALL_FORWARD - if (ip_fw_fwd_addr != NULL + if (next_hop != NULL #if INET6 && isipv6 == NULL /* IPv6 support is not yet */ #endif /* INET6 */ @@ -688,19 +729,18 @@ findpcb: /* * No, then it's new. Try find the ambushing socket */ - if (!ip_fw_fwd_addr->sin_port) { + if (!next_hop->sin_port) { inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, - th->th_sport, ip_fw_fwd_addr->sin_addr, + th->th_sport, next_hop->sin_addr, th->th_dport, 1, m->m_pkthdr.rcvif); } else { inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport, - ip_fw_fwd_addr->sin_addr, - ntohs(ip_fw_fwd_addr->sin_port), 1, + next_hop->sin_addr, + ntohs(next_hop->sin_port), 1, m->m_pkthdr.rcvif); } } - ip_fw_fwd_addr = NULL; } else #endif /* IPFIREWALL_FORWARD */ { @@ -716,17 +756,23 @@ findpcb: } #if IPSEC + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); #if INET6 - if (isipv6) { - if (ipsec_bypass == 0 && inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) { - ipsec6stat.in_polvio++; - goto drop; - } - } else + if (isipv6) { + if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) { + ipsec6stat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + goto dropnosock; + } + } else #endif /* INET6 */ - if (ipsec_bypass == 0 && inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) { - ipsecstat.in_polvio++; - goto drop; + if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) { + ipsecstat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + goto dropnosock; + } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ @@ -739,55 +785,88 @@ findpcb: if (inp == NULL) { if (log_in_vain) { #if INET6 - char dbuf[INET6_ADDRSTRLEN], sbuf[INET6_ADDRSTRLEN]; + char dbuf[MAX_IPv6_STR_LEN], sbuf[MAX_IPv6_STR_LEN]; #else /* INET6 */ - char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"]; + char dbuf[MAX_IPv4_STR_LEN], sbuf[MAX_IPv4_STR_LEN]; #endif /* INET6 */ #if INET6 if (isipv6) { - strcpy(dbuf, ip6_sprintf(&ip6->ip6_dst)); - strcpy(sbuf, ip6_sprintf(&ip6->ip6_src)); + inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf)); + inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf)); } else #endif - { - strcpy(dbuf, inet_ntoa(ip->ip_dst)); - strcpy(sbuf, inet_ntoa(ip->ip_src)); - } + { + inet_ntop(AF_INET, &ip->ip_dst, dbuf, sizeof(dbuf)); + inet_ntop(AF_INET, &ip->ip_src, sbuf, sizeof(sbuf)); + } switch (log_in_vain) { case 1: if(thflags & TH_SYN) log(LOG_INFO, - "Connection attempt to TCP %s:%d from %s:%d\n", - dbuf, ntohs(th->th_dport), - sbuf, - ntohs(th->th_sport)); + "Connection attempt to TCP %s:%d from %s:%d\n", + dbuf, ntohs(th->th_dport), + sbuf, + ntohs(th->th_sport)); break; case 2: log(LOG_INFO, - "Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n", - dbuf, ntohs(th->th_dport), sbuf, - ntohs(th->th_sport), thflags); + "Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n", + dbuf, ntohs(th->th_dport), sbuf, + ntohs(th->th_sport), thflags); + break; + case 3: + if ((thflags & TH_SYN) && + !(m->m_flags & (M_BCAST | M_MCAST)) && +#if INET6 + ((isipv6 && !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) || + (!isipv6 && ip->ip_dst.s_addr != ip->ip_src.s_addr)) +#else + ip->ip_dst.s_addr != ip->ip_src.s_addr +#endif + ) + log_in_vain_log((LOG_INFO, + "Stealth Mode connection attempt to TCP %s:%d from %s:%d\n", + dbuf, ntohs(th->th_dport), + sbuf, + ntohs(th->th_sport))); break; default: break; } } if (blackhole) { - switch (blackhole) { - case 1: - if (thflags & TH_SYN) - goto drop; - break; - case 2: - goto drop; - default: - goto drop; - } + if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type != IFT_LOOP) + switch (blackhole) { + case 1: + if (thflags & TH_SYN) + goto dropnosock; + break; + case 2: + goto dropnosock; + default: + goto dropnosock; + } } rstreason = BANDLIM_RST_CLOSEDPORT; - goto dropwithreset; + goto dropwithresetnosock; + } + so = inp->inp_socket; + if (so == NULL) { + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) + inp = NULL; // pretend we didn't find it +#if TEMPDEBUG + printf("tcp_input: no more socket for inp=%x\n", inp); +#endif + goto dropnosock; } + tcp_lock(so, 1, 2); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(so, 1, 2); + inp = NULL; // pretend we didn't find it + goto dropnosock; + } + tp = intotcpcb(inp); if (tp == 0) { rstreason = BANDLIM_RST_CLOSEDPORT; @@ -811,7 +890,6 @@ findpcb: else tiwin = th->th_win; - so = inp->inp_socket; if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { #if TCPDEBUG if (so->so_options & SO_DEBUG) { @@ -827,11 +905,10 @@ findpcb: } #endif if (so->so_options & SO_ACCEPTCONN) { - register struct tcpcb *tp0 = tp; + register struct tcpcb *tp0 = tp; struct socket *so2; -#if IPSEC struct socket *oso; -#endif + struct sockaddr_storage from; #if INET6 struct inpcb *oinp = sotoinpcb(so); #endif /* INET6 */ @@ -900,31 +977,51 @@ findpcb: } } #endif - - so2 = sonewconn(so, 0); + if (so->so_filt) { + if (isipv6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)&from; + + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = th->th_sport; + sin6->sin6_flowinfo = 0; + sin6->sin6_addr = ip6->ip6_src; + sin6->sin6_scope_id = 0; + } else { + struct sockaddr_in *sin = (struct sockaddr_in*)&from; + + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = th->th_sport; + sin->sin_addr = ip->ip_src; + } + so2 = sonewconn(so, 0, (struct sockaddr*)&from); + } else { + so2 = sonewconn(so, 0, NULL); + } if (so2 == 0) { tcpstat.tcps_listendrop++; - so2 = sodropablereq(so); - if (so2) { - if (tcp_lq_overflow) - sototcpcb(so2)->t_flags |= - TF_LQ_OVERFLOW; - tcp_drop(sototcpcb(so2), ETIMEDOUT); - so2 = sonewconn(so, 0); + if (tcpdropdropablreq(so)) { + if (so->so_filt) + so2 = sonewconn(so, 0, (struct sockaddr*)&from); + else + so2 = sonewconn(so, 0, NULL); } - if (!so2) + if (!so2) goto drop; } /* * Make sure listening socket did not get closed during socket allocation, - * not only this is incorrect but it is know to cause panic - */ + * not only this is incorrect but it is know to cause panic + */ if (so->so_gencnt != ogencnt) goto drop; -#if IPSEC + oso = so; -#endif + tcp_unlock(so, 0, 0); /* Unlock but keep a reference on listener for now */ + so = so2; + tcp_lock(so, 1, 0); /* * This is ugly, but .... * @@ -950,7 +1047,7 @@ findpcb: } #endif /* INET6 */ inp->inp_lport = th->th_dport; - if (in_pcbinshash(inp) != 0) { + if (in_pcbinshash(inp, 0) != 0) { /* * Undo the assignments above if we failed to * put the PCB on the hash lists. @@ -962,6 +1059,8 @@ findpcb: #endif /* INET6 */ inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; + tcp_lock(oso, 0, 0); /* release ref on parent */ + tcp_unlock(oso, 1, 0); goto drop; } #if IPSEC @@ -978,6 +1077,8 @@ findpcb: * Note: dropwithreset makes sure we don't * send a RST in response to a RST. */ + tcp_lock(oso, 0, 0); /* release ref on parent */ + tcp_unlock(oso, 1, 0); if (thflags & TH_ACK) { tcpstat.tcps_badsyn++; rstreason = BANDLIM_RST_OPENPORT; @@ -1010,22 +1111,26 @@ findpcb: } else #endif /* INET6 */ inp->inp_options = ip_srcroute(); + tcp_lock(oso, 0, 0); #if IPSEC /* copy old policy into new socket's */ if (sotoinpcb(oso)->inp_sp) { int error = 0; + lck_mtx_lock(sadb_mutex); /* Is it a security hole here to silently fail to copy the policy? */ if (inp->inp_sp != NULL) error = ipsec_init_policy(so, &inp->inp_sp); if (error != 0 || ipsec_copy_policy(sotoinpcb(oso)->inp_sp, inp->inp_sp)) printf("tcp_input: could not copy policy\n"); + lck_mtx_unlock(sadb_mutex); } #endif + tcp_unlock(oso, 1, 0); /* now drop the reference on the listener */ tp = intotcpcb(inp); tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY); - + tp->t_inpcb->inp_ip_ttl = tp0->t_inpcb->inp_ip_ttl; /* Compute proper scaling value from buffer space */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && TCP_MAXWIN << tp->request_r_scale < @@ -1036,6 +1141,68 @@ findpcb: } } +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif + /* + * Radar 3529618 + * This is the second part of the MSS DoS prevention code (after + * minmss on the sending side) and it deals with too many too small + * tcp packets in a too short timeframe (1 second). + * + * For every full second we count the number of received packets + * and bytes. If we get a lot of packets per second for this connection + * (tcp_minmssoverload) we take a closer look at it and compute the + * average packet size for the past second. If that is less than + * tcp_minmss we get too many packets with very small payload which + * is not good and burdens our system (and every packet generates + * a wakeup to the process connected to our socket). We can reasonable + * expect this to be small packet DoS attack to exhaust our CPU + * cycles. + * + * Care has to be taken for the minimum packet overload value. This + * value defines the minimum number of packets per second before we + * start to worry. This must not be too low to avoid killing for + * example interactive connections with many small packets like + * telnet or SSH. + * + * Setting either tcp_minmssoverload or tcp_minmss to "0" disables + * this check. + * + * Account for packet if payload packet, skip over ACK, etc. + */ + if (tcp_minmss && tcp_minmssoverload && + tp->t_state == TCPS_ESTABLISHED && tlen > 0) { + if (tp->rcv_reset > tcp_now) { + tp->rcv_pps++; + tp->rcv_byps += tlen + off; + if (tp->rcv_pps > tcp_minmssoverload) { + if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) { + char ipstrbuf[MAX_IPv6_STR_LEN]; + printf("too many small tcp packets from " + "%s:%u, av. %lubyte/packet, " + "dropping connection\n", +#ifdef INET6 + isipv6 ? + inet_ntop(AF_INET6, &inp->in6p_faddr, ipstrbuf, + sizeof(ipstrbuf)) : +#endif + inet_ntop(AF_INET, &inp->inp_faddr, ipstrbuf, + sizeof(ipstrbuf)), + inp->inp_fport, + tp->rcv_byps / tp->rcv_pps); + tp = tcp_drop(tp, ECONNRESET); +/* tcpstat.tcps_minmssdrops++; */ + goto drop; + } + } + } else { + tp->rcv_reset = tcp_now + PR_SLOWHZ; + tp->rcv_pps = 1; + tp->rcv_byps = tlen + off; + } + } + /* * Segment received on connection. * Reset idle time and keep-alive timer. @@ -1144,9 +1311,10 @@ findpcb: else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; - if (so->so_snd.sb_cc) + sowwakeup(so); /* has to be done with socket lock held */ + if ((so->so_snd.sb_cc) || (tp->t_flags & TF_ACKNOW)) (void) tcp_output(tp); - sowwakeup(so); + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; } @@ -1167,7 +1335,8 @@ findpcb: * Add data to socket buffer. */ m_adj(m, drop_hdrlen); /* delayed header drop */ - sbappend(&so->so_rcv, m); + if (sbappend(&so->so_rcv, m)) + sorwakeup(so); #if INET6 if (isipv6) { KERNEL_DEBUG(DBG_LAYER_END, ((th->th_dport << 16) | th->th_sport), @@ -1181,14 +1350,13 @@ findpcb: (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)), th->th_seq, th->th_ack, th->th_win); } - if (tcp_delack_enabled) { - TCP_DELACK_BITSET(tp->t_inpcb->hash_element); + if (DELAY_ACK(tp)) { tp->t_flags |= TF_DELACK; } else { tp->t_flags |= TF_ACKNOW; tcp_output(tp); } - sorwakeup(so); + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; } @@ -1200,6 +1368,9 @@ findpcb: * Receive window is amount of space in rcv queue, * but not less than advertised window. */ +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif { int win; win = sbspace(&so->so_rcv); @@ -1234,7 +1405,10 @@ findpcb: register struct sockaddr_in6 *sin6; #endif - if (thflags & TH_RST) +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif + if (thflags & TH_RST) goto drop; if (thflags & TH_ACK) { rstreason = BANDLIM_RST_OPENPORT; @@ -1299,6 +1473,9 @@ findpcb: } else #endif { +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_NOWAIT); if (sin == NULL) @@ -1368,7 +1545,7 @@ findpcb: * segment. Otherwise must send ACK now in case * the other side is slow starting. */ - if (tcp_delack_enabled && ((thflags & TH_FIN) || + if (DELAY_ACK(tp) && ((thflags & TH_FIN) || (tlen != 0 && #if INET6 (isipv6 && in6_localaddr(&inp->in6p_faddr)) @@ -1380,11 +1557,11 @@ findpcb: ) #endif /* INET6 */ ))) { - TCP_DELACK_BITSET(tp->t_inpcb->hash_element); tp->t_flags |= (TF_DELACK | TF_NEEDSYN); } - else + else { tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN); + } /* * Limit the `virtual advertised window' to TCP_MAXWIN @@ -1523,12 +1700,12 @@ findpcb: * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ - if (tcp_delack_enabled && tlen != 0) { - TCP_DELACK_BITSET(tp->t_inpcb->hash_element); + if (DELAY_ACK(tp) && tlen != 0) { tp->t_flags |= TF_DELACK; } - else + else { tp->t_flags |= TF_ACKNOW; + } /* * Received in SYN_SENT[*] state. * Transitions: @@ -1634,6 +1811,7 @@ trimthenstep6: } if (CC_GT(to.to_cc, tp->cc_recv)) { tp = tcp_close(tp); + tcp_unlock(so, 1, 50); goto findpcb; } else @@ -1744,6 +1922,9 @@ trimthenstep6: goto drop; } +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. @@ -1873,6 +2054,7 @@ trimthenstep6: SEQ_GT(th->th_seq, tp->rcv_nxt)) { iss = tcp_new_isn(tp); tp = tcp_close(tp); + tcp_unlock(so, 1, 0); goto findpcb; } /* @@ -2203,10 +2385,17 @@ process_ACK: tp->snd_wnd -= acked; ourfinisacked = 0; } + sowwakeup(so); + /* detect una wraparound */ + if (SEQ_GEQ(tp->snd_una, tp->snd_recover) && + SEQ_LT(th->th_ack, tp->snd_recover)) + tp->snd_recover = th->th_ack; + if (SEQ_GT(tp->snd_una, tp->snd_high) && + SEQ_LEQ(th->th_ack, tp->snd_high)) + tp->snd_high = th->th_ack - 1; tp->snd_una = th->th_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; - sowwakeup(so); switch (tp->t_state) { @@ -2230,6 +2419,7 @@ process_ACK: } add_to_time_wait(tp); tp->t_state = TCPS_FIN_WAIT_2; + goto drop; } break; @@ -2389,25 +2579,19 @@ dodata: /* XXX */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && TCPS_HAVEESTABLISHED(tp->t_state)) { -#ifdef __APPLE__ - if (tcp_delack_enabled) { - TCP_DELACK_BITSET(tp->t_inpcb->hash_element); + if (DELAY_ACK(tp) && ((tp->t_flags & TF_ACKNOW) == 0)) { tp->t_flags |= TF_DELACK; } -#else - if (DELAY_ACK(tp)) - callout_reset(tp->tt_delack, tcp_delacktime, - tcp_timer_delack, tp); -#endif - else + else { tp->t_flags |= TF_ACKNOW; + } tp->rcv_nxt += tlen; thflags = th->th_flags & TH_FIN; tcpstat.tcps_rcvpack++; tcpstat.tcps_rcvbyte += tlen; ND6_HINT(tp); - sbappend(&so->so_rcv, m); - sorwakeup(so); + if (sbappend(&so->so_rcv, m)) + sorwakeup(so); } else { thflags = tcp_reass(tp, th, &tlen, m); tp->t_flags |= TF_ACKNOW; @@ -2456,12 +2640,12 @@ dodata: /* XXX */ * Otherwise, since we received a FIN then no * more input can be expected, send ACK now. */ - if (tcp_delack_enabled && (tp->t_flags & TF_NEEDSYN)) { - TCP_DELACK_BITSET(tp->t_inpcb->hash_element); + if (DELAY_ACK(tp) && (tp->t_flags & TF_NEEDSYN)) { tp->t_flags |= TF_DELACK; } - else + else { tp->t_flags |= TF_ACKNOW; + } tp->rcv_nxt++; } switch (tp->t_state) { @@ -2527,6 +2711,7 @@ dodata: /* XXX */ */ if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tcp_output(tp); + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; @@ -2560,9 +2745,11 @@ dropafterack: m_freem(m); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; - +dropwithresetnosock: + nosock = 1; dropwithreset: /* * Generate a RST, dropping incoming segment. @@ -2610,11 +2797,17 @@ dropwithreset: (tcp_seq)0, TH_RST|TH_ACK); } /* destroy temporarily created socket */ - if (dropsocket) - (void) soabort(so); + if (dropsocket) { + (void) soabort(so); + tcp_unlock(so, 1, 0); + } + else + if ((inp != NULL) && (nosock == 0)) + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; - +dropnosock: + nosock = 1; drop: /* * Drop space held by incoming segment and return. @@ -2626,8 +2819,13 @@ drop: #endif m_freem(m); /* destroy temporarily created socket */ - if (dropsocket) - (void) soabort(so); + if (dropsocket) { + (void) soabort(so); + tcp_unlock(so, 1, 0); + } + else + if (nosock == 0) + tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); return; } @@ -3188,6 +3386,7 @@ tcp_newreno(tp, th) * is called) */ tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una); + tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); tp->snd_cwnd = ocwnd; if (SEQ_GT(onxt, tp->snd_nxt)) @@ -3201,3 +3400,78 @@ tcp_newreno(tp, th) } return (0); } + +/* + * Drop a random TCP connection that hasn't been serviced yet and + * is eligible for discard. There is a one in qlen chance that + * we will return a null, saying that there are no dropable + * requests. In this case, the protocol specific code should drop + * the new request. This insures fairness. + * + * The listening TCP socket "head" must be locked + */ +static int +tcpdropdropablreq(struct socket *head) +{ + struct socket *so; + unsigned int i, j, qlen; + static int rnd; + static struct timeval old_runtime; + static unsigned int cur_cnt, old_cnt; + struct timeval tv; + struct inpcb *inp = NULL; + + microtime(&tv); + if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) { + old_runtime = tv; + old_cnt = cur_cnt / i; + cur_cnt = 0; + } + + so = TAILQ_FIRST(&head->so_incomp); + if (!so) + return 0; + + qlen = head->so_incqlen; + if (++cur_cnt > qlen || old_cnt > qlen) { + rnd = (314159 * rnd + 66329) & 0xffff; + j = ((qlen + 1) * rnd) >> 16; + + while (j-- && so) + so = TAILQ_NEXT(so, so_list); + } + /* Find a connection that is not already closing */ + while (so) { + inp = (struct inpcb *)so->so_pcb; + + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) + break; + + so = TAILQ_NEXT(so, so_list); + } + if (!so) + return 0; + + /* Let's remove this connection from the incomplete list */ + tcp_lock(so, 1, 0); + + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(so, 1, 0); + return 0; + } + sototcpcb(so)->t_flags |= TF_LQ_OVERFLOW; + head->so_incqlen--; + head->so_qlen--; + so->so_head = NULL; + TAILQ_REMOVE(&head->so_incomp, so, so_list); + so->so_usecount--; /* No more held by so_head */ + + tcp_drop(sototcpcb(so), ETIMEDOUT); + + tcp_unlock(so, 1, 0); + + return 1; + +} + + diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index e9fd4814e..36e310fd1 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -121,7 +121,14 @@ int tcp_do_newreno = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno, 0, "Enable NewReno Algorithms"); -struct mbuf *m_copym_with_hdrs __P((struct mbuf*, int, int, int, struct mbuf**, int*)); +int tcp_packet_chaining = 50; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, CTLFLAG_RW, &tcp_packet_chaining, + 0, "Enable TCP output packet chaining"); + +struct mbuf *m_copym_with_hdrs(struct mbuf*, int, int, int, struct mbuf**, int*); +static long packchain_newlist = 0; +static long packchain_looped = 0; +static long packchain_sent = 0; /* temporary: for testing */ @@ -131,7 +138,25 @@ extern int ipsec_bypass; extern int slowlink_wsize; /* window correction for slow links */ extern u_long route_generation; +extern int fw_enable; /* firewall is on: disable packet chaining */ +extern int ipsec_bypass; +extern vm_size_t so_cache_zone_element_size; + +static __inline__ u_int16_t +get_socket_id(struct socket * s) +{ + u_int16_t val; + + if (so_cache_zone_element_size == 0) { + return (0); + } + val = (u_int16_t)(((u_int32_t)s) / so_cache_zone_element_size); + if (val == 0) { + val = 0xffff; + } + return (val); +} /* * Tcp output routine: figure out what should be sent and send it. @@ -152,7 +177,7 @@ tcp_output(tp) register struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; - int idle, sendalot; + int idle, sendalot, howmuchsent = 0; int maxburst = TCP_MAXBURST; struct rmxp_tao *taop; struct rmxp_tao tao_noncached; @@ -160,9 +185,13 @@ tcp_output(tp) int m_off; struct mbuf *m_last = 0; struct mbuf *m_head = 0; + struct mbuf *packetlist = 0; + struct mbuf *lastpacket = 0; #if INET6 int isipv6 = tp->t_inpcb->inp_vflag & INP_IPV6 ; #endif + short packchain_listadd = 0; + u_int16_t socket_id = get_socket_id(so); /* @@ -172,11 +201,7 @@ tcp_output(tp) * to send, then transmit; otherwise, investigate further. */ idle = (tp->snd_max == tp->snd_una); -#ifdef __APPLE__ if (idle && tp->t_rcvtime >= tp->t_rxtcur) { -#else - if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) { -#endif /* * We have been idle for "a while" and no acks are * expected to clock out any data we send -- @@ -231,7 +256,7 @@ again: if ((tp->t_inpcb->inp_route.ro_rt != NULL && (tp->t_inpcb->inp_route.ro_rt->generation_id != route_generation)) || (tp->t_inpcb->inp_route.ro_rt == NULL)) { /* check that the source address is still valid */ - if (ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr) == NULL) { + if (ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr) == 0) { if (tp->t_state >= TCPS_CLOSE_WAIT) { tcp_close(tp); return(EADDRNOTAVAIL); @@ -250,6 +275,11 @@ again: } } + if (packetlist) { + error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + (so->so_options & SO_DONTROUTE), 0); + tp->t_lastchain = 0; + } if (so->so_flags & SOF_NOADDRAVAIL) return(EADDRNOTAVAIL); else @@ -323,6 +353,11 @@ again: off--, len++; if (len > 0 && tp->t_state == TCPS_SYN_SENT && taop->tao_ccsent == 0) { + if (packetlist) { + error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + (so->so_options & SO_DONTROUTE), 0); + tp->t_lastchain = 0; + } KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return 0; } @@ -363,6 +398,7 @@ again: } if (len > tp->t_maxseg) { len = tp->t_maxseg; + howmuchsent += len; sendalot = 1; } if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) @@ -469,8 +505,13 @@ again: } /* - * No reason to send a segment, just return. + * If there is no reason to send a segment, just return. + * but if there is some packets left in the packet list, send them now. */ + if (packetlist) { + error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + (so->so_options & SO_DONTROUTE), 0); + } KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return (0); @@ -634,6 +675,7 @@ send: */ flags &= ~TH_FIN; len = tp->t_maxopd - optlen - ipoptlen; + howmuchsent += len; sendalot = 1; } @@ -798,7 +840,7 @@ send: m->m_data += max_linkhdr; m->m_len = hdrlen; } - m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.rcvif = 0; #if INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); @@ -864,6 +906,20 @@ send: win = (long)TCP_MAXWIN << tp->rcv_scale; th->th_win = htons((u_short) (win>>tp->rcv_scale)); } + + /* + * Adjust the RXWIN0SENT flag - indicate that we have advertised + * a 0 window. This may cause the remote transmitter to stall. This + * flag tells soreceive() to disable delayed acknowledgements when + * draining the buffer. This can occur if the receiver is attempting + * to read more data then can be buffered prior to transmitting on + * the connection. + */ + if (win == 0) + tp->t_flags |= TF_RXWIN0SENT; + else + tp->t_flags &= ~TF_RXWIN0SENT; + if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); th->th_flags |= TH_URG; @@ -994,10 +1050,11 @@ send: goto out; } #endif /*IPSEC*/ + m->m_pkthdr.socket_id = socket_id; error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &tp->t_inpcb->in6p_route, - (so->so_options & SO_DONTROUTE), NULL, NULL); + (so->so_options & SO_DONTROUTE), NULL, NULL, 0); } else #endif /* INET6 */ { @@ -1050,9 +1107,49 @@ send: if (ipsec_bypass == 0) ipsec_setsocket(m, so); #endif /*IPSEC*/ - error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, - (so->so_options & SO_DONTROUTE), 0); - } + + /* + * The socket is kept locked while sending out packets in ip_output, even if packet chaining is not active. + */ + + m->m_pkthdr.socket_id = socket_id; + if (packetlist) { + m->m_nextpkt = NULL; + lastpacket->m_nextpkt = m; + lastpacket = m; + packchain_listadd++; + } + else { + m->m_nextpkt = NULL; + packchain_newlist++; + packetlist = lastpacket = m; + packchain_listadd=0; + } + + if ((ipsec_bypass == 0) || fw_enable || sendalot == 0 || (tp->t_state != TCPS_ESTABLISHED) || + (tp->snd_cwnd <= (tp->snd_wnd / 4)) || + (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || tp->t_force != 0 || + packchain_listadd >= tcp_packet_chaining) { + lastpacket->m_nextpkt = 0; + error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, + (so->so_options & SO_DONTROUTE), 0); + tp->t_lastchain = packchain_listadd; + packchain_sent++; + packetlist = NULL; + if (error == 0) + howmuchsent = 0; + } + else { + error = 0; + packchain_looped++; + tcpstat.tcps_sndtotal++; + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + goto again; + } + } if (error) { /* @@ -1064,15 +1161,19 @@ send: * No need to check for TH_FIN here because * the TF_SENTFIN flag handles that case. */ - if ((flags & TH_SYN) == 0) - tp->snd_nxt -= len; + if ((flags & TH_SYN) == 0) + tp->snd_nxt -= howmuchsent; } + howmuchsent = 0; out: if (error == ENOBUFS) { if (!tp->t_timer[TCPT_REXMT] && !tp->t_timer[TCPT_PERSIST]) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; tcp_quench(tp->t_inpcb, 0); + if (packetlist) + m_freem_list(packetlist); + tp->t_lastchain = 0; KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return (0); } @@ -1084,18 +1185,28 @@ out: * not do so here. */ tcp_mtudisc(tp->t_inpcb, 0); + if (packetlist) + m_freem_list(packetlist); + tp->t_lastchain = 0; KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return 0; } if ((error == EHOSTUNREACH || error == ENETDOWN) && TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_softerror = error; + if (packetlist) + m_freem_list(packetlist); + tp->t_lastchain = 0; KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return (0); } + if (packetlist) + m_freem_list(packetlist); + tp->t_lastchain = 0; KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return (error); } +sentit: tcpstat.tcps_sndtotal++; /* @@ -1109,8 +1220,8 @@ out: tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); - KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); - if (sendalot) + KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,0,0,0,0,0); + if (sendalot && (!tcp_do_newreno || --maxburst)) goto again; return (0); } @@ -1120,7 +1231,6 @@ tcp_setpersist(tp) register struct tcpcb *tp; { int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; - int tt; if (tp->t_timer[TCPT_REXMT]) panic("tcp_setpersist: retransmit pending"); diff --git a/bsd/netinet/tcp_seq.h b/bsd/netinet/tcp_seq.h index 57f1c9262..5023c0a18 100644 --- a/bsd/netinet/tcp_seq.h +++ b/bsd/netinet/tcp_seq.h @@ -85,7 +85,7 @@ /* Macro to increment a CC: skip 0 which has a special meaning */ #define CC_INC(c) (++(c) == 0 ? ++(c) : (c)) -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * Macros to initialize tcp sequence numbers for * send and receive from initial send and receive @@ -96,15 +96,11 @@ #define tcp_sendseqinit(tp) \ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \ - (tp)->iss + (tp)->snd_recover = (tp)->snd_high = (tp)->iss #define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) /* timestamp wrap-around time */ -#endif /* __APPLE_API_PRIVATE */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern tcp_cc tcp_ccgen; /* global connection count */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE #endif /* _NETINET_TCP_SEQ_H_ */ diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index cb7bda653..0d8a16867 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -67,11 +67,13 @@ #include #endif #include +#include #include #include #include #include #include +#include @@ -120,10 +122,12 @@ #define DBG_FNC_TCP_CLOSE NETDBG_CODE(DBG_NETTCP, ((5 << 8) | 2)) +extern int tcp_lq_overflow; /* temporary: for testing */ #if IPSEC extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif int tcp_mssdflt = TCP_MSS; @@ -149,6 +153,23 @@ int tcp_minmss = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW, &tcp_minmss , 0, "Minmum TCP Maximum Segment Size"); +/* + * Number of TCP segments per second we accept from remote host + * before we start to calculate average segment size. If average + * segment size drops below the minimum TCP MSS we assume a DoS + * attack and reset+drop the connection. Care has to be taken not to + * set this value too small to not kill interactive type connections + * (telnet, SSH) which send many small packets. + */ +#ifdef FIX_WORKAROUND_FOR_3894301 +__private_extern__ int tcp_minmssoverload = TCP_MINMSSOVERLOAD; +#else +__private_extern__ int tcp_minmssoverload = 0; +#endif +SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW, + &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to" + "be under the MINMSS Size"); + static int tcp_do_rfc1323 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); @@ -161,7 +182,7 @@ static int tcp_tcbhashsize = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); -static int do_tcpdrain = 1; +static int do_tcpdrain = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); @@ -180,8 +201,8 @@ static int tcp_isn_reseed_interval = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret"); -static void tcp_cleartaocache __P((void)); -static void tcp_notify __P((struct inpcb *, int)); +static void tcp_cleartaocache(void); +static void tcp_notify(struct inpcb *, int); /* * Target size of TCP PCB hash tables. Must be a power of two. @@ -237,7 +258,7 @@ int get_tcp_str_size() return sizeof(struct tcpcb); } -int tcp_freeq __P((struct tcpcb *tp)); +int tcp_freeq(struct tcpcb *tp); /* @@ -249,6 +270,7 @@ tcp_init() int hashsize = TCBHASHSIZE; vm_size_t str_size; int i; + struct inpcbinfo *pcbinfo; tcp_ccgen = 1; tcp_cleartaocache(); @@ -260,11 +282,12 @@ tcp_init() tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; read_random(&tcp_now, sizeof(tcp_now)); - tcp_now = tcp_now & 0x7fffffffffffffff; /* Starts tcp internal 500ms clock at a random value */ + tcp_now = tcp_now & 0x7fffffff; /* Starts tcp internal 500ms clock at a random value */ LIST_INIT(&tcb); tcbinfo.listhead = &tcb; + pcbinfo = &tcbinfo; #ifndef __APPLE__ TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); #endif @@ -301,10 +324,29 @@ tcp_init() if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR - tcbinfo.last_pcb = 0; dummy_tcb.t_state = TCP_NSTATES; dummy_tcb.t_flags = 0; tcbinfo.dummy_cb = (caddr_t) &dummy_tcb; + + /* + * allocate lock group attribute and group for tcp pcb mutexes + */ + pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); + pcbinfo->mtx_grp = lck_grp_alloc_init("tcppcb", pcbinfo->mtx_grp_attr); + + /* + * allocate the lock attribute for tcp pcb mutexes + */ + pcbinfo->mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(pcbinfo->mtx_attr); + + if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) { + printf("tcp_init: mutex not alloced!\n"); + return; /* pretty much dead if this fails... */ + } + + in_pcb_nat_init(&tcbinfo, AF_INET, IPPROTO_TCP, SOCK_STREAM); delack_bitmask = _MALLOC((4 * hashsize)/32, M_PCB, M_WAITOK); @@ -530,7 +572,7 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags) } m->m_len = tlen; m->m_pkthdr.len = tlen; - m->m_pkthdr.rcvif = (struct ifnet *) 0; + m->m_pkthdr.rcvif = 0; nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; @@ -571,7 +613,7 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags) #endif #if INET6 if (isipv6) { - (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL); + (void)ip6_output(m, NULL, ro6, ipflags, NULL, NULL, 0); if (ro6 == &sro6 && ro6->ro_rt) { rtfree(ro6->ro_rt); ro6->ro_rt = NULL; @@ -579,7 +621,7 @@ tcp_respond(tp, ipgen, th, m, ack, seq, flags) } else #endif /* INET6 */ { - (void) ip_output(m, NULL, ro, ipflags, NULL); + (void) ip_output_list(m, 0, NULL, ro, ipflags, NULL); if (ro == &sro && ro->ro_rt) { rtfree(ro->ro_rt); ro->ro_rt = NULL; @@ -731,7 +773,6 @@ tcp_close(tp) } } #endif - KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_START, tp,0,0,0,0); switch (tp->t_state) @@ -859,7 +900,7 @@ tcp_close(tp) * mark route for deletion if no information is * cached. */ - if ((tp->t_flags & TF_LQ_OVERFLOW) && + if ((tp->t_flags & TF_LQ_OVERFLOW) && tcp_lq_overflow && ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0)){ if (rt->rt_rmx.rmx_rtt == 0) rt->rt_flags |= RTF_DELCLONE; @@ -874,7 +915,6 @@ tcp_close(tp) inp->inp_saved_ppcb = (caddr_t) tp; #endif - inp->inp_ppcb = NULL; soisdisconnected(so); #if INET6 if (INP_CHECK_SOCKAF(so, AF_INET6)) @@ -908,6 +948,9 @@ tcp_freeq(tp) void tcp_drain() { +/* + * ###LD 05/19/04 locking issue, tcpdrain is disabled, deadlock situation with tcbinfo.mtx + */ if (do_tcpdrain) { struct inpcb *inpb; @@ -922,6 +965,7 @@ tcp_drain() * where we're really low on mbufs, this is potentially * usefull. */ + lck_rw_lock_exclusive(tcbinfo.mtx); for (inpb = LIST_FIRST(tcbinfo.listhead); inpb; inpb = LIST_NEXT(inpb, inp_list)) { if ((tcpb = intotcpcb(inpb))) { @@ -934,6 +978,7 @@ tcp_drain() } } } + lck_rw_done(tcbinfo.mtx); } } @@ -953,7 +998,7 @@ tcp_notify(inp, error) { struct tcpcb *tp; - if (inp == NULL) + if (inp == NULL || (inp->inp_state == INPCB_STATE_DEAD)) return; /* pcb is gone already */ tp = (struct tcpcb *)inp->inp_ppcb; @@ -993,66 +1038,73 @@ tcp_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + lck_rw_lock_shared(tcbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { n = tcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb); + lck_rw_done(tcbinfo.mtx); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(tcbinfo.mtx); return EPERM; + } /* * OK, now we're committed to doing something. */ - s = splnet(); gencnt = tcbinfo.ipi_gencnt; n = tcbinfo.ipi_count; - splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); - if (error) + if (error) { + lck_rw_done(tcbinfo.mtx); return error; + } /* * We are done if there is no pcb */ - if (n == 0) + if (n == 0) { + lck_rw_done(tcbinfo.mtx); return 0; + } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == 0) + if (inp_list == 0) { + lck_rw_done(tcbinfo.mtx); return ENOMEM; + } - s = splnet(); for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { #ifdef __APPLE__ - if (inp->inp_gencnt <= gencnt) + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) #else if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) #endif inp_list[i++] = inp; } - splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xtcpcb xt; caddr_t inp_ppcb; xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ - bcopy(inp, &xt.xt_inp, sizeof *inp); + inpcb_to_compat(inp, &xt.xt_inp); inp_ppcb = inp->inp_ppcb; - if (inp_ppcb != NULL) + if (inp_ppcb != NULL) { bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); + } else bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); if (inp->inp_socket) @@ -1068,14 +1120,13 @@ tcp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - s = splnet(); xig.xig_gen = tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = tcbinfo.ipi_count; - splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); + lck_rw_done(tcbinfo.mtx); return error; } @@ -1103,7 +1154,7 @@ tcp_getcred(SYSCTL_HANDLER_ARGS) error = ENOENT; goto out; } - error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(struct ucred)); + error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(*(kauth_cred_t)0); out: splx(s); return (error); @@ -1150,7 +1201,7 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS) goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred, - sizeof(struct ucred)); + sizeof(*(kauth_cred_t)0); out: splx(s); return (error); @@ -1173,7 +1224,7 @@ tcp_ctlinput(cmd, sa, vip) struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; - void (*notify) __P((struct inpcb *, int)) = tcp_notify; + void (*notify)(struct inpcb *, int) = tcp_notify; tcp_seq icmp_seq; int s; @@ -1196,21 +1247,25 @@ tcp_ctlinput(cmd, sa, vip) else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip) { - s = splnet(); th = (struct tcphdr *)((caddr_t)ip + (IP_VHL_HL(ip->ip_vhl) << 2)); inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, 0, NULL); if (inp != NULL && inp->inp_socket != NULL) { + tcp_lock(inp->inp_socket, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(inp->inp_socket, 1, 0); + return; + } icmp_seq = htonl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_seq, tp->snd_una) && SEQ_LT(icmp_seq, tp->snd_max)) (*notify)(inp, inetctlerrmap[cmd]); + tcp_unlock(inp->inp_socket, 1, 0); } - splx(s); } else - in_pcbnotifyall(&tcb, faddr, inetctlerrmap[cmd], notify); + in_pcbnotifyall(&tcbinfo, faddr, inetctlerrmap[cmd], notify); } #if INET6 @@ -1221,7 +1276,7 @@ tcp6_ctlinput(cmd, sa, d) void *d; { struct tcphdr th; - void (*notify) __P((struct inpcb *, int)) = tcp_notify; + void (*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; struct ip6ctlparam *ip6cp = NULL; @@ -1271,11 +1326,11 @@ tcp6_ctlinput(cmd, sa, d) bzero(&th, sizeof(th)); m_copydata(m, off, sizeof(*thp), (caddr_t)&th); - in6_pcbnotify(&tcb, sa, th.th_dport, + in6_pcbnotify(&tcbinfo, sa, th.th_dport, (struct sockaddr *)ip6cp->ip6c_src, th.th_sport, cmd, notify); } else - in6_pcbnotify(&tcb, sa, 0, (struct sockaddr *)sa6_src, + in6_pcbnotify(&tcbinfo, sa, 0, (struct sockaddr *)sa6_src, 0, cmd, notify); } #endif /* INET6 */ @@ -1586,6 +1641,7 @@ ipsec_hdrsiz_tcp(tp) if (!m) return 0; + lck_mtx_lock(sadb_mutex); #if INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { ip6 = mtod(m, struct ip6_hdr *); @@ -1603,7 +1659,7 @@ ipsec_hdrsiz_tcp(tp) tcp_fillheaders(tp, ip, th); hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } - + lck_mtx_unlock(sadb_mutex); m_free(m); return hdrsiz; } @@ -1647,3 +1703,88 @@ static void tcp_cleartaocache() { } + +int +tcp_lock(so, refcount, lr) + struct socket *so; + int refcount; + int lr; +{ + int lr_saved; +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; +#endif + + if (so->so_pcb) { + lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } + else { + panic("tcp_lock: so=%x NO PCB! lr=%x\n", so, lr_saved); + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + } + + if (so->so_usecount < 0) + panic("tcp_lock: so=%x so_pcb=%x lr=%x ref=%x\n", + so, so->so_pcb, lr_saved, so->so_usecount); + + if (refcount) + so->so_usecount++; + so->reserved3 = (void *)lr_saved; + return (0); +} + +int +tcp_unlock(so, refcount, lr) + struct socket *so; + int refcount; + int lr; +{ + int lr_saved; +#ifdef __ppc__ + if (lr == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = lr; +#endif + +#ifdef MORE_TCPLOCK_DEBUG + printf("tcp_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", + so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount, lr_saved); +#endif + if (refcount) + so->so_usecount--; + + if (so->so_usecount < 0) + panic("tcp_unlock: so=%x usecount=%x\n", so, so->so_usecount); + if (so->so_pcb == NULL) { + panic("tcp_unlock: so=%x NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); + lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + } + else { + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } + so->reserved4 = (void *)lr_saved; + return (0); +} + +lck_mtx_t * +tcp_getlock(so, locktype) + struct socket *so; + int locktype; +{ + struct inpcb *inp = sotoinpcb(so); + + if (so->so_pcb) { + if (so->so_usecount < 0) + panic("tcp_getlock: so=%x usecount=%x\n", so, so->so_usecount); + return(inp->inpcb_mtx); + } + else { + panic("tcp_getlock: so=%x NULL so_pcb\n", so); + return (so->so_proto->pr_domain->dom_mtx); + } +} diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index a99efeea7..b3c5388b9 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -64,6 +64,7 @@ #include #include #include +#include #include /* before tcp_seq.h, for tcp_random18() */ @@ -154,11 +155,16 @@ int cur_tw_slot = 0; u_long *delack_bitmask; -void add_to_time_wait(tp) +void add_to_time_wait_locked(tp) struct tcpcb *tp; { int tw_slot; + /* pcb list should be locked when we get here */ +#if 0 + lck_mtx_assert(tp->t_inpcb->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif + LIST_REMOVE(tp->t_inpcb, inp_list); if (tp->t_timer[TCPT_2MSL] == 0) @@ -172,6 +178,19 @@ void add_to_time_wait(tp) LIST_INSERT_HEAD(&time_wait_slots[tw_slot], tp->t_inpcb, inp_list); } +void add_to_time_wait(tp) + struct tcpcb *tp; +{ + struct inpcbinfo *pcbinfo = &tcbinfo; + + if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + tcp_unlock(tp->t_inpcb->inp_socket, 0, 0); + lck_rw_lock_exclusive(pcbinfo->mtx); + tcp_lock(tp->t_inpcb->inp_socket, 0, 0); + } + add_to_time_wait_locked(tp); + lck_rw_done(pcbinfo->mtx); +} @@ -182,49 +201,46 @@ void add_to_time_wait(tp) void tcp_fasttimo() { - register struct inpcb *inp; + struct inpcb *inp, *inpnxt; register struct tcpcb *tp; - register u_long i,j; - register u_long temp_mask; - register u_long elem_base = 0; - struct inpcbhead *head; - int s = splnet(); + struct inpcbinfo *pcbinfo = &tcbinfo; - static - int delack_checked = 0; + int delack_checked = 0, delack_done = 0; KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_START, 0,0,0,0,0); - if (!tcp_delack_enabled) + if (tcp_delack_enabled == 0) return; - for (i=0; i < (tcbinfo.hashsize / 32); i++) { - if (delack_bitmask[i]) { - temp_mask = 1; - for (j=0; j < 32; j++) { - if (temp_mask & delack_bitmask[i]) { - head = &tcbinfo.hashbase[elem_base + j]; - for (inp=head->lh_first; inp != 0; inp = inp->inp_hash.le_next) { - delack_checked++; - if ((tp = (struct tcpcb *)inp->inp_ppcb) && (tp->t_flags & TF_DELACK)) { - tp->t_flags &= ~TF_DELACK; - tp->t_flags |= TF_ACKNOW; - tcpstat.tcps_delack++; - (void) tcp_output(tp); - } - } - } - temp_mask <<= 1; + lck_rw_lock_shared(pcbinfo->mtx); + + /* Walk the list of valid tcpcbs and send ACKS on the ones with DELACK bit set */ + + for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; + /* NOTE: it's OK to check the tp because the pcb can't be removed while we hold pcbinfo->mtx) */ + if ((tp = (struct tcpcb *)inp->inp_ppcb) && (tp->t_flags & TF_DELACK)) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + tcp_lock(inp->inp_socket, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(inp->inp_socket, 1, 0); + continue; + } + if (tp->t_flags & TF_DELACK) { + delack_done++; + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_delack++; + (void) tcp_output(tp); } - delack_bitmask[i] = 0; - } - elem_base += 32; + tcp_unlock(inp->inp_socket, 1, 0); + } } - KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_checked,tcpstat.tcps_delack,0,0,0); - splx(s); - + KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_checked, delack_done, tcpstat.tcps_delack,0,0); + lck_rw_done(pcbinfo->mtx); } /* @@ -235,41 +251,54 @@ tcp_fasttimo() void tcp_slowtimo() { - register struct inpcb *ip, *ipnxt; - register struct tcpcb *tp; - register int i; - int s; + struct inpcb *inp, *inpnxt; + struct tcpcb *tp; + struct socket *so; + int i; #if TCPDEBUG int ostate; #endif #if KDEBUG static int tws_checked; #endif + struct inpcbinfo *pcbinfo = &tcbinfo; KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_START, 0,0,0,0,0); - s = splnet(); tcp_maxidle = tcp_keepcnt * tcp_keepintvl; - ip = tcb.lh_first; - if (ip == NULL) { - splx(s); - return; - } + lck_rw_lock_shared(pcbinfo->mtx); + /* * Search through tcb's and update active timers. */ - for (; ip != NULL; ip = ipnxt) { - ipnxt = ip->inp_list.le_next; - tp = intotcpcb(ip); - if (tp == 0 || tp->t_state == TCPS_LISTEN) + for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; + + if (in_pcb_checkstate(inp, WNT_ACQUIRE,0) == WNT_STOPUSING) continue; + + so = inp->inp_socket; + tcp_lock(so, 1, 0); + + if ((in_pcb_checkstate(inp, WNT_RELEASE,1) == WNT_STOPUSING) && so->so_usecount == 1) { + tcp_unlock(so, 1, 0); + continue; + } + tp = intotcpcb(inp); + if (tp == 0 || tp->t_state == TCPS_LISTEN) { + tcp_unlock(so, 1, 0); + continue; + } /* * Bogus state when port owned by SharedIP with loopback as the * only configured interface: BlueBox does not filters loopback */ - if (tp->t_state == TCP_NSTATES) - continue; + if (tp->t_state == TCP_NSTATES) { + tcp_unlock(so, 1, 0); + continue; + } + for (i = 0; i < TCPT_NTIMERS; i++) { if (tp->t_timer[i] && --tp->t_timer[i] == 0) { @@ -292,9 +321,9 @@ tcp_slowtimo() tp->t_rcvtime++; tp->t_starttime++; if (tp->t_rtttime) - tp->t_rtttime++; + tp->t_rtttime++; tpgone: - ; + tcp_unlock(so, 1, 0); } #if KDEBUG @@ -306,16 +335,27 @@ tpgone: * Process the items in the current time-wait slot */ - for (ip = time_wait_slots[cur_tw_slot].lh_first; ip; ip = ipnxt) + for (inp = time_wait_slots[cur_tw_slot].lh_first; inp; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; #if KDEBUG tws_checked++; #endif - ipnxt = ip->inp_list.le_next; - tp = intotcpcb(ip); + + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + + tcp_lock(inp->inp_socket, 1, 0); + + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) + goto twunlock; + + tp = intotcpcb(inp); if (tp == NULL) { /* tp already closed, remove from list */ - LIST_REMOVE(ip, inp_list); - continue; +#if TEMPDEBUG + printf("tcp_slowtimo: tp is null in time-wait slot!\n"); +#endif + goto twunlock; } if (tp->t_timer[TCPT_2MSL] >= N_TIME_WAIT_SLOTS) { tp->t_timer[TCPT_2MSL] -= N_TIME_WAIT_SLOTS; @@ -324,14 +364,77 @@ tpgone: else tp->t_timer[TCPT_2MSL] = 0; - if (tp->t_timer[TCPT_2MSL] == 0) - tp = tcp_timers(tp, TCPT_2MSL); + if (tp->t_timer[TCPT_2MSL] == 0) + tp = tcp_timers(tp, TCPT_2MSL); /* tp can be returned null if tcp_close is called */ +twunlock: + tcp_unlock(inp->inp_socket, 1, 0); + } + + if (lck_rw_lock_shared_to_exclusive(pcbinfo->mtx) != 0) + lck_rw_lock_exclusive(pcbinfo->mtx); /* Upgrade failed, lost lock no take it again exclusive */ + + + for (inp = tcb.lh_first; inp != NULL; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; + /* Ignore nat/SharedIP dummy pcbs */ + if (inp->inp_socket == &tcbinfo.nat_dummy_socket) + continue; + + if (inp->inp_wantcnt != WNT_STOPUSING) + continue; + + so = inp->inp_socket; + if (!lck_mtx_try_lock(inp->inpcb_mtx)) {/* skip if in use */ +#if TEMPDEBUG + printf("tcp_slowtimo so=%x STOPUSING but locked...\n", so); +#endif + continue; + } + + if (so->so_usecount == 0) + in_pcbdispose(inp); + else { + tp = intotcpcb(inp); + /* Check for embryonic socket stuck on listener queue (4023660) */ + if ((so->so_usecount == 1) && (tp->t_state == TCPS_CLOSED) && + (so->so_head != NULL) && (so->so_state & SS_INCOMP)) { + so->so_usecount--; + in_pcbdispose(inp); + } else + lck_mtx_unlock(inp->inpcb_mtx); + } } + /* Now cleanup the time wait ones */ + for (inp = time_wait_slots[cur_tw_slot].lh_first; inp; inp = inpnxt) + { + inpnxt = inp->inp_list.le_next; + + if (inp->inp_wantcnt != WNT_STOPUSING) + continue; + + so = inp->inp_socket; + if (!lck_mtx_try_lock(inp->inpcb_mtx)) /* skip if in use */ + continue; + if (so->so_usecount == 0) + in_pcbdispose(inp); + else { + tp = intotcpcb(inp); + /* Check for embryonic socket stuck on listener queue (4023660) */ + if ((so->so_usecount == 1) && (tp->t_state == TCPS_CLOSED) && + (so->so_head != NULL) && (so->so_state & SS_INCOMP)) { + so->so_usecount--; + in_pcbdispose(inp); + } else + lck_mtx_unlock(inp->inpcb_mtx); + } + } + + tcp_now++; if (++cur_tw_slot >= N_TIME_WAIT_SLOTS) cur_tw_slot = 0; - tcp_now++; /* for timestamps */ - splx(s); + + lck_rw_done(pcbinfo->mtx); KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); } @@ -376,6 +479,7 @@ tcp_timers(tp, timer) int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; #endif /* INET6 */ + so_tmp = tp->t_inpcb->inp_socket; switch (timer) { @@ -388,11 +492,13 @@ tcp_timers(tp, timer) case TCPT_2MSL: if (tp->t_state != TCPS_TIME_WAIT && tp->t_rcvtime <= tcp_maxidle) { - tp->t_timer[TCPT_2MSL] = tcp_keepintvl; - add_to_time_wait(tp); + tp->t_timer[TCPT_2MSL] = (unsigned long)tcp_keepintvl; + add_to_time_wait_locked(tp); } - else + else { tp = tcp_close(tp); + return(tp); + } break; /* @@ -404,7 +510,6 @@ tcp_timers(tp, timer) if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; tcpstat.tcps_timeoutdrop++; - so_tmp = tp->t_inpcb->inp_socket; tp = tcp_drop(tp, tp->t_softerror ? tp->t_softerror : ETIMEDOUT); postevent(so_tmp, 0, EV_TIMEOUT); @@ -549,7 +654,7 @@ tcp_timers(tp, timer) if ((always_keepalive || tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + tcp_maxidle) + if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (unsigned long)tcp_maxidle) goto dropit; /* * Send a packet designed to force a response @@ -583,7 +688,6 @@ tcp_timers(tp, timer) #endif dropit: tcpstat.tcps_keepdrops++; - so_tmp = tp->t_inpcb->inp_socket; tp = tcp_drop(tp, ETIMEDOUT); postevent(so_tmp, 0, EV_TIMEOUT); break; diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index e3f5282d5..e4979a078 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -106,10 +106,11 @@ * amount of time probing, then we drop the connection. */ +#ifdef PRIVATE + /* * Time constants. */ -#ifdef __APPLE_API_PRIVATE #define TCPTV_MSL ( 30*PR_SLOWHZ) /* max seg lifetime (hah!) */ #define TCPTV_SRTTBASE 0 /* base roundtrip time; if 0, no idea yet */ @@ -140,6 +141,7 @@ static char *tcptimers[] = { "REXMT", "PERSIST", "KEEP", "2MSL" }; #endif +#ifdef KERNEL /* * Force a time value to be in a certain range. */ @@ -151,8 +153,6 @@ static char *tcptimers[] = (tv) = (tvmax); \ } while(0) -#ifdef KERNEL - #define TCP_KEEPIDLE(tp) \ (tp->t_keepidle && (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \ tp->t_keepidle : tcp_keepidle) @@ -167,13 +167,13 @@ extern int tcp_msl; extern int tcp_ttl; /* time to live for TCP segs */ extern int tcp_backoff[]; -void tcp_timer_2msl __P((void *xtp)); -void tcp_timer_keep __P((void *xtp)); -void tcp_timer_persist __P((void *xtp)); -void tcp_timer_rexmt __P((void *xtp)); -void tcp_timer_delack __P((void *xtp)); +void tcp_timer_2msl(void *xtp); +void tcp_timer_keep(void *xtp); +void tcp_timer_persist(void *xtp); +void tcp_timer_rexmt(void *xtp); +void tcp_timer_delack(void *xtp); #endif /* KERNEL */ -#endif /* __APPLE_API_PRIVATE */ - +#endif /* PRIVATE */ #endif /* !_NETINET_TCP_TIMER_H_ */ + diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index 246e031c0..d29331e28 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -104,17 +104,15 @@ */ extern char *tcpstates[]; /* XXX ??? */ -static int tcp_attach __P((struct socket *, struct proc *)); -static int tcp_connect __P((struct tcpcb *, struct sockaddr *, - struct proc *)); +static int tcp_attach(struct socket *, struct proc *); +static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *); #if INET6 -static int tcp6_connect __P((struct tcpcb *, struct sockaddr *, - struct proc *)); +static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *); #endif /* INET6 */ static struct tcpcb * - tcp_disconnect __P((struct tcpcb *)); + tcp_disconnect(struct tcpcb *); static struct tcpcb * - tcp_usrclosed __P((struct tcpcb *)); + tcp_usrclosed(struct tcpcb *); #if TCPDEBUG #define TCPDEBUG0 int ostate = 0 @@ -134,7 +132,6 @@ static struct tcpcb * static int tcp_usr_attach(struct socket *so, int proto, struct proc *p) { - int s = splnet(); int error; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = 0; @@ -155,7 +152,6 @@ tcp_usr_attach(struct socket *so, int proto, struct proc *p) tp = sototcpcb(so); out: TCPDEBUG2(PRU_ATTACH); - splx(s); return error; } @@ -169,16 +165,17 @@ out: static int tcp_usr_detach(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; TCPDEBUG0; - if (inp == 0) { - splx(s); + if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { return EINVAL; /* XXX */ } +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif tp = intotcpcb(inp); /* In case we got disconnected from the peer */ if (tp == 0) @@ -187,21 +184,19 @@ tcp_usr_detach(struct socket *so) tp = tcp_disconnect(tp); out: TCPDEBUG2(PRU_DETACH); - splx(s); return error; } #define COMMON_START() TCPDEBUG0; \ do { \ - if (inp == 0) { \ - splx(s); \ + if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \ return EINVAL; \ } \ tp = intotcpcb(inp); \ TCPDEBUG1(); \ } while(0) -#define COMMON_END(req) out: TCPDEBUG2(req); splx(s); return error; goto out +#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out /* @@ -210,7 +205,6 @@ out: static int tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -239,7 +233,6 @@ tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) static int tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -285,7 +278,6 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p) static int tcp_usr_listen(struct socket *so, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -302,7 +294,6 @@ tcp_usr_listen(struct socket *so, struct proc *p) static int tcp6_usr_listen(struct socket *so, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -330,7 +321,6 @@ tcp6_usr_listen(struct socket *so, struct proc *p) static int tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -362,7 +352,6 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -419,11 +408,13 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) static int tcp_usr_disconnect(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; - + +#if 1 + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif COMMON_START(); /* In case we got disconnected from the peer */ if (tp == 0) @@ -440,7 +431,6 @@ tcp_usr_disconnect(struct socket *so) static int tcp_usr_accept(struct socket *so, struct sockaddr **nam) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = NULL; @@ -450,8 +440,7 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam) error = ECONNABORTED; goto out; } - if (inp == 0) { - splx(s); + if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { return (EINVAL); } tp = intotcpcb(inp); @@ -464,7 +453,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam) static int tcp6_usr_accept(struct socket *so, struct sockaddr **nam) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = NULL; @@ -474,8 +462,7 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) error = ECONNABORTED; goto out; } - if (inp == 0) { - splx(s); + if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { return (EINVAL); } tp = intotcpcb(inp); @@ -490,7 +477,6 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) static int tcp_usr_shutdown(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -512,7 +498,6 @@ tcp_usr_shutdown(struct socket *so) static int tcp_usr_rcvd(struct socket *so, int flags) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -536,7 +521,6 @@ static int tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -545,7 +529,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, #endif TCPDEBUG0; - if (inp == NULL) { + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a @@ -660,7 +644,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, static int tcp_usr_abort(struct socket *so) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -670,6 +653,7 @@ tcp_usr_abort(struct socket *so) if (tp == 0) goto out; tp = tcp_drop(tp, ECONNABORTED); + so->so_usecount--; COMMON_END(PRU_ABORT); } @@ -679,7 +663,6 @@ tcp_usr_abort(struct socket *so) static int tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) { - int s = splnet(); int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; @@ -709,7 +692,7 @@ struct pr_usrreqs tcp_usrreqs = { tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; #if INET6 @@ -718,7 +701,7 @@ struct pr_usrreqs tcp6_usrreqs = { tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, - in6_mapped_sockaddr, sosend, soreceive, sopoll + in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; #endif /* INET6 */ @@ -761,29 +744,51 @@ tcp_connect(tp, nam, p) error = in_pcbladdr(inp, nam, &ifaddr); if (error) return error; + + tcp_unlock(inp->inp_socket, 0, 0); oinp = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : ifaddr->sin_addr, inp->inp_lport, 0, NULL); + + tcp_lock(inp->inp_socket, 0, 0); if (oinp) { + tcp_lock(oinp->inp_socket, 1, 0); + if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) { + tcp_unlock(oinp->inp_socket, 1, 0); + goto skip_oinp; + } + if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && otp->t_state == TCPS_TIME_WAIT && otp->t_starttime < tcp_msl && (otp->t_flags & TF_RCVD_CC)) otp = tcp_close(otp); - else + else { + printf("tcp_connect: inp=%x err=EADDRINUSE\n", inp); + tcp_unlock(oinp->inp_socket, 1, 0); return EADDRINUSE; + } + tcp_unlock(oinp->inp_socket, 1, 0); } +skip_oinp: if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr : inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && inp->inp_lport == sin->sin_port) return EINVAL; + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = ifaddr->sin_addr; inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; in_pcbrehash(inp); + lck_rw_done(inp->inp_pcbinfo->mtx); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -829,7 +834,7 @@ tcp6_connect(tp, nam, p) struct socket *so = inp->inp_socket; struct tcpcb *otp; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; - struct in6_addr *addr6; + struct in6_addr addr6; struct rmxp_tao *taop; struct rmxp_tao tao_noncached; int error; @@ -848,12 +853,14 @@ tcp6_connect(tp, nam, p) error = in6_pcbladdr(inp, nam, &addr6); if (error) return error; + tcp_unlock(inp->inp_socket, 0, 0); oinp = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) - ? addr6 + ? &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); + tcp_lock(inp->inp_socket, 0, 0); if (oinp) { if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && otp->t_state == TCPS_TIME_WAIT && @@ -863,13 +870,20 @@ tcp6_connect(tp, nam, p) else return EADDRINUSE; } + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) - inp->in6p_laddr = *addr6; + inp->in6p_laddr = addr6; inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != NULL) inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); + lck_rw_done(inp->inp_pcbinfo->mtx); /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -917,15 +931,13 @@ tcp_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { - int error, opt, optval, s; + int error, opt, optval; struct inpcb *inp; struct tcpcb *tp; error = 0; - s = splnet(); /* XXX */ inp = sotoinpcb(so); if (inp == NULL) { - splx(s); return (ECONNRESET); } if (sopt->sopt_level != IPPROTO_TCP) { @@ -935,12 +947,10 @@ tcp_ctloutput(so, sopt) else #endif /* INET6 */ error = ip_ctloutput(so, sopt); - splx(s); return (error); } tp = intotcpcb(inp); if (tp == NULL) { - splx(s); return (ECONNRESET); } @@ -1031,7 +1041,6 @@ tcp_ctloutput(so, sopt) error = sooptcopyout(sopt, &optval, sizeof optval); break; } - splx(s); return (error); } diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index f47cd5e4c..08b897f6c 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -58,8 +58,11 @@ #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ #include +#include +#include #include -#ifdef __APPLE_API_PRIVATE + +#ifdef KERNEL_PRIVATE #define N_TIME_WAIT_SLOTS 128 /* must be power of 2 */ @@ -88,19 +91,10 @@ struct tcptemp { #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ -#ifdef __APPLE__ -#define TCP_DELACK_BITSET(hash_elem)\ -delack_bitmask[((hash_elem) >> 5)] |= 1 << ((hash_elem) & 0x1F) - -#define DELACK_BITMASK_ON 1 -#define DELACK_BITMASK_THRESH 300 -#endif - /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. */ -#if KERNEL struct tcpcb { struct tsegqe_head t_segq; int t_dupacks; /* consecutive dup acks recd */ @@ -192,26 +186,109 @@ struct tcpcb { /* RFC 1644 variables */ tcp_cc cc_send; /* send connection count */ tcp_cc cc_recv; /* receive connection count */ - tcp_seq snd_recover; /* for use in fast recovery */ + tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ /* experimental */ u_long snd_cwnd_prev; /* cwnd prior to retransmit */ u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ u_long t_badrxtwin; /* window for retransmit recovery */ int t_keepidle; /* keepalive idle timer (override global if > 0) */ + int t_lastchain; /* amount of packets chained last time around */ + +/* 3529618 MSS overload prevention */ + u_long rcv_reset; + u_long rcv_pps; + u_long rcv_byps; + tcp_seq snd_high; /* for use in NewReno Fast Recovery */ + tcp_seq snd_high_prev; /* snd_high prior to retransmit */ + }; -#else -#define tcpcb otcpcb +/* + * Structure to hold TCP options that are only used during segment + * processing (in tcp_input), but not held in the tcpcb. + * It's basically used to reduce the number of parameters + * to tcp_dooptions. + */ +struct tcpopt { + u_long to_flag; /* which options are present */ +#define TOF_TS 0x0001 /* timestamp */ +#define TOF_CC 0x0002 /* CC and CCnew are exclusive */ +#define TOF_CCNEW 0x0004 +#define TOF_CCECHO 0x0008 + u_long to_tsval; + u_long to_tsecr; + tcp_cc to_cc; /* holds CC or CCnew */ + tcp_cc to_ccecho; + u_short reserved; /* unused now: was to_maxseg */ +}; -#endif +/* + * The TAO cache entry which is stored in the protocol family specific + * portion of the route metrics. + */ +struct rmxp_tao { + tcp_cc tao_cc; /* latest CC in valid SYN */ + tcp_cc tao_ccsent; /* latest CC sent to peer */ + u_short tao_mssopt; /* peer's cached MSS */ +#ifdef notyet + u_short tao_flags; /* cache status flags */ +#define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */ +#define TAOF_OK 0x0002 /* peer does understand rfc1644 */ +#define TAOF_UNDEF 0 /* we don't know yet */ +#endif /* notyet */ +}; +#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) + +#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) +#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ +#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This version of the macro adapted from a paper by Lawrence + * Brakmo and Larry Peterson which outlines a problem caused + * by insufficient precision in the original implementation, + * which results in inappropriately large RTO values for very + * fast networks. + */ +#define TCP_REXMTVAL(tp) \ + max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) /* * Jaguar compatible TCP control block, for xtcpcb * Does not have the old fields */ struct otcpcb { +#else +struct tseg_qent; +LIST_HEAD(tsegqe_head, tseg_qent); + +struct tcpcb { +#endif /* KERNEL_PRIVATE */ struct tsegqe_head t_segq; int t_dupacks; /* consecutive dup acks recd */ struct tcptemp *unused; /* unused now: was t_template */ @@ -302,90 +379,13 @@ struct otcpcb { /* RFC 1644 variables */ tcp_cc cc_send; /* send connection count */ tcp_cc cc_recv; /* receive connection count */ - tcp_seq snd_recover; /* for use in fast recovery */ + tcp_seq snd_recover; /* for use in fast recovery */ /* experimental */ u_long snd_cwnd_prev; /* cwnd prior to retransmit */ u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ u_long t_badrxtwin; /* window for retransmit recovery */ }; - -/* - * Structure to hold TCP options that are only used during segment - * processing (in tcp_input), but not held in the tcpcb. - * It's basically used to reduce the number of parameters - * to tcp_dooptions. - */ -struct tcpopt { - u_long to_flag; /* which options are present */ -#define TOF_TS 0x0001 /* timestamp */ -#define TOF_CC 0x0002 /* CC and CCnew are exclusive */ -#define TOF_CCNEW 0x0004 -#define TOF_CCECHO 0x0008 - u_long to_tsval; - u_long to_tsecr; - tcp_cc to_cc; /* holds CC or CCnew */ - tcp_cc to_ccecho; - u_short reserved; /* unused now: was to_maxseg */ -}; - -/* - * The TAO cache entry which is stored in the protocol family specific - * portion of the route metrics. - */ -struct rmxp_tao { - tcp_cc tao_cc; /* latest CC in valid SYN */ - tcp_cc tao_ccsent; /* latest CC sent to peer */ - u_short tao_mssopt; /* peer's cached MSS */ -#ifdef notyet - u_short tao_flags; /* cache status flags */ -#define TAOF_DONT 0x0001 /* peer doesn't understand rfc1644 */ -#define TAOF_OK 0x0002 /* peer does understand rfc1644 */ -#define TAOF_UNDEF 0 /* we don't know yet */ -#endif /* notyet */ -}; -#define rmx_taop(r) ((struct rmxp_tao *)(r).rmx_filler) - -#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) -#define sototcpcb(so) (intotcpcb(sotoinpcb(so))) - -/* - * The smoothed round-trip time and estimated variance - * are stored as fixed point numbers scaled by the values below. - * For convenience, these scales are also used in smoothing the average - * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). - * With these scales, srtt has 3 bits to the right of the binary point, - * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the - * binary point, and is smoothed with an ALPHA of 0.75. - */ -#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ -#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ -#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ -#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ -#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ - -/* - * The initial retransmission should happen at rtt + 4 * rttvar. - * Because of the way we do the smoothing, srtt and rttvar - * will each average +1/2 tick of bias. When we compute - * the retransmit timer, we want 1/2 tick of rounding and - * 1 extra tick because of +-1/2 tick uncertainty in the - * firing of the timer. The bias will give us exactly the - * 1.5 tick we need. But, because the bias is - * statistical, we have to test that we don't drop below - * the minimum feasible timer (which is 2 ticks). - * This version of the macro adapted from a paper by Lawrence - * Brakmo and Larry Peterson which outlines a problem caused - * by insufficient precision in the original implementation, - * which results in inappropriately large RTO values for very - * fast networks. - */ -#define TCP_REXMTVAL(tp) \ - max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ - + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) -#endif /* __APPLE_API_PRIVATE */ - -#ifdef __APPLE_API_UNSTABLE /* * TCP statistics. * Many of these should be kept per connection, @@ -456,18 +456,20 @@ struct tcpstat { u_long tcps_mturesent; /* resends due to MTU discovery */ u_long tcps_listendrop; /* listen queue overflows */ }; -#endif /* __APPLE_API_UNSTABLE */ /* * TCB structure exported to user-land via sysctl(3). * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ -#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) struct xtcpcb { size_t xt_len; +#ifdef KERNEL_PRIVATE + struct inpcb_compat xt_inp; +#else struct inpcb xt_inp; -#if KERNEL +#endif +#ifdef KERNEL_PRIVATE struct otcpcb xt_tp; #else struct tcpcb xt_tp; @@ -475,7 +477,6 @@ struct xtcpcb { struct xsocket xt_socket; u_quad_t xt_alignment_hack; }; -#endif /* * Names for TCP sysctl objects @@ -495,6 +496,7 @@ struct xtcpcb { #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ #define TCPCTL_MAXID 14 +#ifdef KERNEL_PRIVATE #define TCPCTL_NAMES { \ { 0, 0 }, \ { "rfc1323", CTLTYPE_INT }, \ @@ -512,68 +514,72 @@ struct xtcpcb { { "v6mssdflt", CTLTYPE_INT }, \ } -#ifdef __APPLE_API_PRIVATE -#ifdef KERNEL #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_tcp); -#endif +#endif /* SYSCTL_DECL */ extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ extern struct inpcbinfo tcbinfo; extern struct tcpstat tcpstat; /* tcp statistics */ extern int tcp_mssdflt; /* XXX */ extern int tcp_minmss; -extern int tcp_delack_enabled; +extern int tcp_minmssoverload; extern int tcp_do_newreno; extern int ss_fltsz; extern int ss_fltsz_local; #ifdef __APPLE__ extern u_long tcp_now; /* for RFC 1323 timestamps */ extern int tcp_delack_enabled; -#endif +#endif /* __APPLE__ */ -void tcp_canceltimers __P((struct tcpcb *)); +void tcp_canceltimers(struct tcpcb *); struct tcpcb * - tcp_close __P((struct tcpcb *)); -void tcp_ctlinput __P((int, struct sockaddr *, void *)); -int tcp_ctloutput __P((struct socket *, struct sockopt *)); + tcp_close(struct tcpcb *); +void tcp_ctlinput(int, struct sockaddr *, void *); +int tcp_ctloutput(struct socket *, struct sockopt *); struct tcpcb * - tcp_drop __P((struct tcpcb *, int)); -void tcp_drain __P((void)); -void tcp_fasttimo __P((void)); + tcp_drop(struct tcpcb *, int); +void tcp_drain(void); +void tcp_fasttimo(void); struct rmxp_tao * - tcp_gettaocache __P((struct inpcb *)); -void tcp_init __P((void)); -void tcp_input __P((struct mbuf *, int)); -void tcp_mss __P((struct tcpcb *, int)); -int tcp_mssopt __P((struct tcpcb *)); -void tcp_drop_syn_sent __P((struct inpcb *, int)); -void tcp_mtudisc __P((struct inpcb *, int)); + tcp_gettaocache(struct inpcb *); +void tcp_init(void); +void tcp_input(struct mbuf *, int); +void tcp_mss(struct tcpcb *, int); +int tcp_mssopt(struct tcpcb *); +void tcp_drop_syn_sent(struct inpcb *, int); +void tcp_mtudisc(struct inpcb *, int); struct tcpcb * - tcp_newtcpcb __P((struct inpcb *)); -int tcp_output __P((struct tcpcb *)); -void tcp_quench __P((struct inpcb *, int)); -void tcp_respond __P((struct tcpcb *, void *, - struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int)); + tcp_newtcpcb(struct inpcb *); +int tcp_output(struct tcpcb *); +void tcp_quench(struct inpcb *, int); +void tcp_respond(struct tcpcb *, void *, + struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); struct rtentry * - tcp_rtlookup __P((struct inpcb *)); -void tcp_setpersist __P((struct tcpcb *)); -void tcp_slowtimo __P((void)); + tcp_rtlookup(struct inpcb *); +void tcp_setpersist(struct tcpcb *); +void tcp_slowtimo(void); struct tcptemp * - tcp_maketemplate __P((struct tcpcb *)); -void tcp_fillheaders __P((struct tcpcb *, void *, void *)); + tcp_maketemplate(struct tcpcb *); +void tcp_fillheaders(struct tcpcb *, void *, void *); struct tcpcb * - tcp_timers __P((struct tcpcb *, int)); -void tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *, - int)); + tcp_timers(struct tcpcb *, int); +void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); +int tcp_lock (struct socket *, int, int); +int tcp_unlock (struct socket *, int, int); +#ifdef _KERN_LOCKS_H_ +lck_mtx_t * tcp_getlock (struct socket *, int); +#else +void * tcp_getlock (struct socket *, int); +#endif + extern struct pr_usrreqs tcp_usrreqs; extern u_long tcp_sendspace; extern u_long tcp_recvspace; -tcp_seq tcp_new_isn __P((struct tcpcb *)); +tcp_seq tcp_new_isn(struct tcpcb *); -#endif /* KERNEL */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_RPIVATE */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/bsd/netinet/tcpip.h b/bsd/netinet/tcpip.h index 79410f82d..b9830e140 100644 --- a/bsd/netinet/tcpip.h +++ b/bsd/netinet/tcpip.h @@ -58,7 +58,7 @@ #ifndef _NETINET_TCPIP_H_ #define _NETINET_TCPIP_H_ #include - +#include /* * Tcp+ip header, after ip options removed. */ diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index 5d7ce5151..4eafd6c8f 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -68,6 +68,7 @@ #include #include +#include #include #include @@ -91,6 +92,7 @@ #if IPSEC #include extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif /*IPSEC*/ @@ -133,9 +135,22 @@ extern int apple_hwcksum_rx; extern int esp_udp_encap_port; extern u_long route_generation; +extern void ipfwsyslog( int level, char *format,...); + +extern int fw_verbose; + +#define log_in_vain_log( a ) { \ + if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ + ipfwsyslog a ; \ + } \ + else log a ; \ +} + struct udpstat udpstat; /* from udp_var.h */ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD, &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); +SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, CTLFLAG_RD, + &udbinfo.ipi_count, 0, "Number of active PCBs"); static struct sockaddr_in udp_in = { sizeof(udp_in), AF_INET }; #if INET6 @@ -152,25 +167,23 @@ struct udp_ip6 { } udp_ip6; #endif /* INET6 */ -static void udp_append __P((struct inpcb *last, struct ip *ip, - struct mbuf *n, int off)); +static void udp_append(struct inpcb *last, struct ip *ip, + struct mbuf *n, int off); #if INET6 -static void ip_2_ip6_hdr __P((struct ip6_hdr *ip6, struct ip *ip)); +static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip); #endif -static int udp_detach __P((struct socket *so)); -static int udp_output __P((struct inpcb *, struct mbuf *, struct sockaddr *, - struct mbuf *, struct proc *)); +static int udp_detach(struct socket *so); +static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); +extern int ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ); void udp_init() { - vm_size_t str_size; - int stat; - u_char fake_owner; - struct in_addr laddr; - struct in_addr faddr; - u_short lport; + vm_size_t str_size; + struct inpcbinfo *pcbinfo; + LIST_INIT(&udb); udbinfo.listhead = &udb; @@ -181,7 +194,21 @@ udp_init() str_size = (vm_size_t) sizeof(struct inpcb); udbinfo.ipi_zone = (void *) zinit(str_size, 80000*str_size, 8192, "udpcb"); - udbinfo.last_pcb = 0; + pcbinfo = &udbinfo; + /* + * allocate lock group attribute and group for udp pcb mutexes + */ + pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(pcbinfo->mtx_grp_attr); + + pcbinfo->mtx_grp = lck_grp_alloc_init("udppcb", pcbinfo->mtx_grp_attr); + + pcbinfo->mtx_attr = lck_attr_alloc_init(); + lck_attr_setdefault(pcbinfo->mtx_attr); + + if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) + return; /* pretty much dead if this fails... */ + in_pcb_nat_init(&udbinfo, AF_INET, IPPROTO_UDP, SOCK_DGRAM); #else udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets, @@ -227,6 +254,7 @@ udp_input(m, iphlen) int len; struct ip save_ip; struct sockaddr *append_sa; + struct inpcbinfo *pcbinfo = &udbinfo; udpstat.udps_ipackets++; @@ -325,6 +353,7 @@ doudpcksum: if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { struct inpcb *last; + lck_rw_lock_shared(pcbinfo->mtx); /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match @@ -341,6 +370,7 @@ doudpcksum: * fixing the interface. Maybe 4.5BSD will remedy this?) */ + /* * Construct sockaddr format source address. */ @@ -360,39 +390,67 @@ doudpcksum: if (inp->inp_socket == &udbinfo.nat_dummy_socket) continue; #endif -#if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (inp->inp_socket == NULL) continue; + if (inp != sotoinpcb(inp->inp_socket)) + panic("udp_input: bad so back ptr inp=%x\n", inp); +#if INET6 + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; #endif - if (inp->inp_lport != uh->uh_dport) + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) { + continue; + } + + udp_lock(inp->inp_socket, 1, 0); + + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_unlock(inp->inp_socket, 1, 0); continue; + } + + if (inp->inp_lport != uh->uh_dport) { + udp_unlock(inp->inp_socket, 1, 0); + continue; + } if (inp->inp_laddr.s_addr != INADDR_ANY) { if (inp->inp_laddr.s_addr != - ip->ip_dst.s_addr) + ip->ip_dst.s_addr) { + udp_unlock(inp->inp_socket, 1, 0); continue; + } } if (inp->inp_faddr.s_addr != INADDR_ANY) { if (inp->inp_faddr.s_addr != ip->ip_src.s_addr || - inp->inp_fport != uh->uh_sport) + inp->inp_fport != uh->uh_sport) { + udp_unlock(inp->inp_socket, 1, 0); continue; + } } if (last != NULL) { struct mbuf *n; - #if IPSEC + int skipit = 0; /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && ipsec4_in_reject_so(m, last->inp_socket)) { - ipsecstat.in_polvio++; - /* do not inject data to pcb */ - } else + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject_so(m, last->inp_socket)) { + ipsecstat.in_polvio++; + /* do not inject data to pcb */ + skipit = 1; + } + lck_mtx_unlock(sadb_mutex); + } + if (skipit == 0) #endif /*IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { udp_append(last, ip, n, iphlen + sizeof(struct udphdr)); } + udp_unlock(last->inp_socket, 1, 0); } last = inp; /* @@ -406,6 +464,7 @@ doudpcksum: if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } + lck_rw_done(pcbinfo->mtx); if (last == NULL) { /* @@ -418,15 +477,23 @@ doudpcksum: } #if IPSEC /* check AH/ESP integrity. */ - if (ipsec_bypass == 0 && m && ipsec4_in_reject_so(m, last->inp_socket)) { - ipsecstat.in_polvio++; - goto bad; + if (ipsec_bypass == 0 && m) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject_so(m, last->inp_socket)) { + ipsecstat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + udp_unlock(last->inp_socket, 1, 0); + goto bad; + } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ udp_append(last, ip, m, iphlen + sizeof(struct udphdr)); + udp_unlock(last->inp_socket, 1, 0); return; } +#if IPSEC /* * UDP to port 4500 with a payload where the first four bytes are * not zero is a UDP encapsulated IPSec packet. Packets where @@ -472,6 +539,7 @@ doudpcksum: return; } } +#endif /* * Locate pcb for datagram. @@ -480,13 +548,25 @@ doudpcksum: ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (inp == NULL) { if (log_in_vain) { - char buf[4*sizeof "123"]; - - strcpy(buf, inet_ntoa(ip->ip_dst)); - log(LOG_INFO, - "Connection attempt to UDP %s:%d from %s:%d\n", - buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src), - ntohs(uh->uh_sport)); + char buf[MAX_IPv4_STR_LEN]; + char buf2[MAX_IPv4_STR_LEN]; + + /* check src and dst address */ + if (log_in_vain != 3) + log(LOG_INFO, + "Connection attempt to UDP %s:%d from %s:%d\n", + inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)), + ntohs(uh->uh_dport), + inet_ntop(AF_INET, &ip->ip_src, buf2, sizeof(buf2)), + ntohs(uh->uh_sport)); + else if (!(m->m_flags & (M_BCAST | M_MCAST)) && + ip->ip_dst.s_addr != ip->ip_src.s_addr) + log_in_vain_log((LOG_INFO, + "Stealth Mode connection attempt to UDP %s:%d from %s:%d\n", + inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)), + ntohs(uh->uh_dport), + inet_ntop(AF_INET, &ip->ip_src, buf2, sizeof(buf2)), + ntohs(uh->uh_sport))) } udpstat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { @@ -498,18 +578,31 @@ doudpcksum: goto bad; #endif if (blackhole) - goto bad; + if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type != IFT_LOOP) + goto bad; *ip = save_ip; ip->ip_len += iphlen; icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0); KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); return; } -#if IPSEC - if (ipsec_bypass == 0 && inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) { - ipsecstat.in_polvio++; + udp_lock(inp->inp_socket, 1, 0); + + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_unlock(inp->inp_socket, 1, 0); goto bad; } +#if IPSEC + if (ipsec_bypass == 0 && inp != NULL) { + lck_mtx_lock(sadb_mutex); + if (ipsec4_in_reject_so(m, inp->inp_socket)) { + ipsecstat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + udp_unlock(inp->inp_socket, 1, 0); + goto bad; + } + lck_mtx_unlock(sadb_mutex); + } #endif /*IPSEC*/ /* @@ -545,11 +638,13 @@ doudpcksum: } else #endif append_sa = (struct sockaddr *)&udp_in; - if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) { + if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts, NULL) == 0) { udpstat.udps_fullsock++; - goto bad; } - sorwakeup(inp->inp_socket); + else { + sorwakeup(inp->inp_socket); + } + udp_unlock(inp->inp_socket, 1, 0); KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); return; bad: @@ -622,10 +717,7 @@ udp_append(last, ip, n, off) #endif append_sa = (struct sockaddr *)&udp_in; m_adj(n, off); - if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) { - m_freem(n); - if (opts) - m_freem(opts); + if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts, NULL) == 0) { udpstat.udps_fullsock++; } else sorwakeup(last->inp_socket); @@ -653,10 +745,9 @@ udp_ctlinput(cmd, sa, vip) { struct ip *ip = vip; struct udphdr *uh; - void (*notify) __P((struct inpcb *, int)) = udp_notify; + void (*notify)(struct inpcb *, int) = udp_notify; struct in_addr faddr; struct inpcb *inp; - int s; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) @@ -670,21 +761,26 @@ udp_ctlinput(cmd, sa, vip) else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip) { - s = splnet(); uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, 0, NULL); - if (inp != NULL && inp->inp_socket != NULL) + if (inp != NULL && inp->inp_socket != NULL) { + udp_lock(inp->inp_socket, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_unlock(inp->inp_socket, 1, 0); + return; + } (*notify)(inp, inetctlerrmap[cmd]); - splx(s); + udp_unlock(inp->inp_socket, 1, 0); + } } else - in_pcbnotifyall(&udb, faddr, inetctlerrmap[cmd], notify); + in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify); } static int udp_pcblist SYSCTL_HANDLER_ARGS { - int error, i, n, s; + int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; @@ -693,58 +789,64 @@ udp_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - if (req->oldptr == 0) { + lck_rw_lock_exclusive(udbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { n = udbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); + lck_rw_done(udbinfo.mtx); return 0; } - if (req->newptr != 0) + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(udbinfo.mtx); return EPERM; + } /* * OK, now we're committed to doing something. */ - s = splnet(); gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; - splx(s); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); - if (error) + if (error) { + lck_rw_done(udbinfo.mtx); return error; + } /* * We are done if there is no pcb */ - if (n == 0) + if (n == 0) { + lck_rw_done(udbinfo.mtx); return 0; + } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { + lck_rw_done(udbinfo.mtx); return ENOMEM; } for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { - if (inp->inp_gencnt <= gencnt) + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; } - splx(s); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ - bcopy(inp, &xi.xi_inp, sizeof *inp); + inpcb_to_compat(inp, &xi.xi_inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); error = SYSCTL_OUT(req, &xi, sizeof xi); @@ -758,14 +860,13 @@ udp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - s = splnet(); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; - splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); + lck_rw_done(udbinfo.mtx); return error; } @@ -774,6 +875,21 @@ SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, +static __inline__ u_int16_t +get_socket_id(struct socket * s) +{ + u_int16_t val; + + if (s == NULL) { + return (0); + } + val = (u_int16_t)(((u_int32_t)s) / sizeof(struct socket)); + if (val == 0) { + val = 0xffff; + } + return (val); +} + static int udp_output(inp, m, addr, control, p) register struct inpcb *inp; @@ -784,8 +900,12 @@ udp_output(inp, m, addr, control, p) { register struct udpiphdr *ui; register int len = m->m_pkthdr.len; - struct in_addr laddr; - int s = 0, error = 0; + struct sockaddr_in *sin, src; + struct in_addr origladdr, laddr, faddr; + u_short lport, fport; + struct sockaddr_in *ifaddr; + int error = 0, udp_dodisconnect = 0; + KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); @@ -806,8 +926,12 @@ udp_output(inp, m, addr, control, p) * Reacquire a new source address if INADDR_ANY was specified */ +#if 1 + lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); +#endif + if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->generation_id != route_generation) { - if (ifa_foraddr(inp->inp_laddr.s_addr) == NULL) { /* src address is gone */ + if (ifa_foraddr(inp->inp_laddr.s_addr) == 0) { /* src address is gone */ if (inp->inp_flags & INP_INADDR_ANY) inp->inp_faddr.s_addr = INADDR_ANY; /* new src will be set later */ else { @@ -819,23 +943,51 @@ udp_output(inp, m, addr, control, p) inp->inp_route.ro_rt = (struct rtentry *)0; } + origladdr= laddr = inp->inp_laddr; + faddr = inp->inp_faddr; + lport = inp->inp_lport; + fport = inp->inp_fport; + if (addr) { - laddr = inp->inp_laddr; - if (inp->inp_faddr.s_addr != INADDR_ANY) { + sin = (struct sockaddr_in *)addr; + if (faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; } - /* - * Must block input while temporarily connected. - */ - s = splnet(); - error = in_pcbconnect(inp, addr, p); - if (error) { - splx(s); - goto release; + if (lport == 0) { + /* + * In case we don't have a local port set, go through the full connect. + * We don't have a local port yet (ie, we can't be looked up), + * so it's not an issue if the input runs at the same time we do this. + */ + error = in_pcbconnect(inp, addr, p); + if (error) { + goto release; + } + laddr = inp->inp_laddr; + lport = inp->inp_lport; + faddr = inp->inp_faddr; + fport = inp->inp_fport; + udp_dodisconnect = 1; + } + else { + /* Fast path case + * we have a full address and a local port. + * use those info to build the packet without changing the pcb + * and interfering with the input path. See 3851370 + */ + if (laddr.s_addr == INADDR_ANY) { + if ((error = in_pcbladdr(inp, addr, &ifaddr)) != 0) + goto release; + laddr = ifaddr->sin_addr; + inp->inp_flags |= INP_INADDR_ANY; /* from pcbconnect: remember we don't care about src addr.*/ + } + + faddr = sin->sin_addr; + fport = sin->sin_port; } } else { - if (inp->inp_faddr.s_addr == INADDR_ANY) { + if (faddr.s_addr == INADDR_ANY) { error = ENOTCONN; goto release; } @@ -859,10 +1011,10 @@ udp_output(inp, m, addr, control, p) ui = mtod(m, struct udpiphdr *); bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = IPPROTO_UDP; - ui->ui_src = inp->inp_laddr; - ui->ui_dst = inp->inp_faddr; - ui->ui_sport = inp->inp_lport; - ui->ui_dport = inp->inp_fport; + ui->ui_src = laddr; + ui->ui_dst = faddr; + ui->ui_sport = lport; + ui->ui_dport = fport; ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); /* @@ -890,23 +1042,22 @@ udp_output(inp, m, addr, control, p) goto abort; } #endif /*IPSEC*/ - error = ip_output(m, inp->inp_options, &inp->inp_route, + m->m_pkthdr.socket_id = get_socket_id(inp->inp_socket); + error = ip_output_list(m, 0, inp->inp_options, &inp->inp_route, (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)), inp->inp_moptions); - if (addr) { + if (udp_dodisconnect) { in_pcbdisconnect(inp); - inp->inp_laddr = laddr; /* XXX rehash? */ - splx(s); + inp->inp_laddr = origladdr; /* XXX rehash? */ } KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0,0,0,0); return (error); abort: - if (addr) { + if (udp_dodisconnect) { in_pcbdisconnect(inp); - inp->inp_laddr = laddr; /* XXX rehash? */ - splx(s); + inp->inp_laddr = origladdr; /* XXX rehash? */ } release: @@ -934,15 +1085,12 @@ static int udp_abort(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) - return EINVAL; /* ??? possible? panic instead? */ + panic("udp_abort: so=%x null inp\n", so); /* ??? possible? panic instead? */ soisdisconnected(so); - s = splnet(); in_pcbdetach(inp); - splx(s); return 0; } @@ -950,20 +1098,18 @@ static int udp_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; - int error; long s; + int error; inp = sotoinpcb(so); if (inp != 0) - return EINVAL; + panic ("udp_attach so=%x inp=%x\n", so, inp); - error = soreserve(so, udp_sendspace, udp_recvspace); - if (error) - return error; - s = splnet(); error = in_pcballoc(so, &udbinfo, p); - splx(s); if (error) return error; + error = soreserve(so, udp_sendspace, udp_recvspace); + if (error) + return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; @@ -974,14 +1120,12 @@ static int udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; - int s, error; + int error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; - s = splnet(); error = in_pcbbind(inp, nam, p); - splx(s); return error; } @@ -989,17 +1133,15 @@ static int udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; - int s, error; + int error; inp = sotoinpcb(so); if (inp == 0) return EINVAL; if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; - s = splnet(); error = in_pcbconnect(inp, nam, p); - splx(s); - if (error == 0) + if (error == 0) soisconnected(so); return error; } @@ -1008,14 +1150,12 @@ static int udp_detach(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) - return EINVAL; - s = splnet(); + panic("udp_detach: so=%x null inp\n", so); /* ??? possible? panic instead? */ in_pcbdetach(inp); - splx(s); + inp->inp_state = INPCB_STATE_DEAD; return 0; } @@ -1023,7 +1163,6 @@ static int udp_disconnect(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) @@ -1031,10 +1170,8 @@ udp_disconnect(struct socket *so) if (inp->inp_faddr.s_addr == INADDR_ANY) return ENOTCONN; - s = splnet(); in_pcbdisconnect(inp); inp->inp_laddr.s_addr = INADDR_ANY; - splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } @@ -1070,6 +1207,135 @@ struct pr_usrreqs udp_usrreqs = { pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown, - in_setsockaddr, sosend, soreceive, sopoll + in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; + +int +udp_lock(so, refcount, debug) + struct socket *so; + int refcount, debug; +{ + int lr_saved; +#ifdef __ppc__ + if (debug == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = debug; +#endif + + if (so->so_pcb) { + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } + else { + panic("udp_lock: so=%x NO PCB! lr=%x\n", so, lr_saved); + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + } + + if (refcount) + so->so_usecount++; + + so->reserved3= lr_saved; + return (0); +} + +int +udp_unlock(so, refcount, debug) + struct socket *so; + int refcount; + int debug; +{ + int lr_saved; + struct inpcb *inp = sotoinpcb(so); + struct inpcbinfo *pcbinfo = &udbinfo; +#ifdef __ppc__ + if (debug == 0) { + __asm__ volatile("mflr %0" : "=r" (lr_saved)); + } + else lr_saved = debug; +#endif + if (refcount) { + so->so_usecount--; +#if 0 + if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { + if (lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + in_pcbdispose(inp); + lck_rw_done(pcbinfo->mtx); + return(0); + } + } +#endif + } + if (so->so_pcb == NULL) { + panic("udp_unlock: so=%x NO PCB! lr=%x\n", so, lr_saved); + lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + } + else { + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } + + + so->reserved4 = lr_saved; + return (0); +} + +lck_mtx_t * +udp_getlock(so, locktype) + struct socket *so; + int locktype; +{ + struct inpcb *inp = sotoinpcb(so); + + + if (so->so_pcb) + return(inp->inpcb_mtx); + else { + panic("udp_getlock: so=%x NULL so_pcb\n", so); + return (so->so_proto->pr_domain->dom_mtx); + } +} + +void +udp_slowtimo() +{ + struct inpcb *inp, *inpnxt; + struct socket *so; + struct inpcbinfo *pcbinfo = &udbinfo; + + lck_rw_lock_exclusive(pcbinfo->mtx); + + for (inp = udb.lh_first; inp != NULL; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; + + /* Ignore nat/SharedIP dummy pcbs */ + if (inp->inp_socket == &udbinfo.nat_dummy_socket) + continue; + + if (inp->inp_wantcnt != WNT_STOPUSING) + continue; + + so = inp->inp_socket; + if (!lck_mtx_try_lock(inp->inpcb_mtx)) /* skip if busy, no hurry for cleanup... */ + continue; + + if (so->so_usecount == 0) + in_pcbdispose(inp); + else + lck_mtx_unlock(inp->inpcb_mtx); + } + lck_rw_done(pcbinfo->mtx); +} + +int +ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ) +{ + if ( dstaddr == srcaddr ){ + return 0; + } + return 1; +} + diff --git a/bsd/netinet/udp_var.h b/bsd/netinet/udp_var.h index 4c2976f31..3a1c1873b 100644 --- a/bsd/netinet/udp_var.h +++ b/bsd/netinet/udp_var.h @@ -59,7 +59,6 @@ #include #include -#ifdef __APPLE_API_PRIVATE /* * UDP kernel structures and variables. @@ -79,9 +78,7 @@ struct udpiphdr { #define ui_sum ui_u.uh_sum #define ui_next ui_i.ih_next #define ui_prev ui_i.ih_prev -#endif /* __APPLE_API_PRIVATE */ -#ifdef __APPLE_API_UNSTABLE struct udpstat { /* input statistics: */ u_long udps_ipackets; /* total input packets */ @@ -102,7 +99,6 @@ struct udpstat { u_long udps_noportmcast; #endif }; -#endif /* __APPLE_API_UNSTABLE */ /* * Names for UDP sysctl objects @@ -114,6 +110,7 @@ struct udpstat { #define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */ #define UDPCTL_MAXID 6 +#ifdef KERNEL_PRIVATE #define UDPCTL_NAMES { \ { 0, 0 }, \ { "checksum", CTLTYPE_INT }, \ @@ -123,8 +120,6 @@ struct udpstat { { "pcblist", CTLTYPE_STRUCT }, \ } -#ifdef __APPLE_API_PRIVATE -#ifdef KERNEL SYSCTL_DECL(_net_inet_udp); extern struct pr_usrreqs udp_usrreqs; @@ -135,13 +130,20 @@ extern u_long udp_recvspace; extern struct udpstat udpstat; extern int log_in_vain; -void udp_ctlinput __P((int, struct sockaddr *, void *)); -void udp_init __P((void)); -void udp_input __P((struct mbuf *, int)); +void udp_ctlinput(int, struct sockaddr *, void *); +void udp_init(void); +void udp_input(struct mbuf *, int); -void udp_notify __P((struct inpcb *inp, int errno)); -int udp_shutdown __P((struct socket *so)); +void udp_notify(struct inpcb *inp, int errno); +int udp_shutdown(struct socket *so); +int udp_lock (struct socket *, int, int); +int udp_unlock (struct socket *, int, int); +void udp_slowtimo (void); +#ifdef _KERN_LOCKS_H_ +lck_mtx_t * udp_getlock (struct socket *, int); +#else +void * udp_getlock (struct socket *, int); #endif -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif KERNEL_PRIVATE +#endif _NETINET_UDP_VAR_H_ diff --git a/bsd/netinet6/Makefile b/bsd/netinet6/Makefile index 37a4991a7..fc12c8bef 100644 --- a/bsd/netinet6/Makefile +++ b/bsd/netinet6/Makefile @@ -20,16 +20,18 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - ah.h esp_rijndael.h in6_ifattach.h ip6.h \ - ip6_var.h ipsec.h pim6.h tcp6_var.h \ - ah6.h icmp6.h in6_pcb.h ip6_ecn.h \ - ip6protosw.h ipsec6.h pim6_var.h udp6_var.h \ - esp.h in6.h in6_prefix.h \ - ipcomp.h mld6_var.h raw_ip6.h esp6.h \ - in6_gif.h in6_var.h ip6_mroute.h ipcomp6.h \ - nd6.h scope6_var.h ip6_fw.h + ah.h ipsec.h pim6.h \ + esp.h in6.h ipcomp.h raw_ip6.h \ + in6_var.h ip6_mroute.h nd6.h ip6_fw.h +PRIVATE_DATAFILES = \ + in6_pcb.h ip6_var.h pim6_var.h +PRIVATE_KERNELFILES = \ + ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \ + in6_prefix.h ip6_ecn.h ip6_fw.h \ + ip6protosw.h ipcomp6.h ipsec6.h mld6_var.h \ + raw_ip6.h scope6_var.h tcp6_var.h udp6_var.h INSTALL_MI_LIST = ${DATAFILES} @@ -37,14 +39,11 @@ INSTALL_MI_DIR = netinet6 EXPORT_MI_LIST = ${DATAFILES} -EXPORT_MI_DIR = netinet6 +EXPORT_MI_DIR = ${INSTALL_MI_DIR} -PRIVATE_DATAFILES = \ - ip6_fw.h +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} -INSTALL_MI_LCL_KERN_LIST = ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) - - diff --git a/bsd/netinet6/ah.h b/bsd/netinet6/ah.h index 62c5eda54..3e7f8dcf8 100644 --- a/bsd/netinet6/ah.h +++ b/bsd/netinet6/ah.h @@ -55,8 +55,7 @@ struct newah { /* variable size, 32bit bound*/ /* Authentication data */ }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct secasvar; struct ah_algorithm_state { @@ -65,29 +64,28 @@ struct ah_algorithm_state { }; struct ah_algorithm { - int (*sumsiz) __P((struct secasvar *)); - int (*mature) __P((struct secasvar *)); + int (*sumsiz)(struct secasvar *); + int (*mature)(struct secasvar *); int keymin; /* in bits */ int keymax; /* in bits */ const char *name; - int (*init) __P((struct ah_algorithm_state *, struct secasvar *)); - void (*update) __P((struct ah_algorithm_state *, caddr_t, size_t)); - void (*result) __P((struct ah_algorithm_state *, caddr_t)); + int (*init)(struct ah_algorithm_state *, struct secasvar *); + void (*update)(struct ah_algorithm_state *, caddr_t, size_t); + void (*result)(struct ah_algorithm_state *, caddr_t); }; #define AH_MAXSUMSIZE 16 -extern const struct ah_algorithm *ah_algorithm_lookup __P((int)); +extern const struct ah_algorithm *ah_algorithm_lookup(int); /* cksum routines */ -extern int ah_hdrlen __P((struct secasvar *)); +extern int ah_hdrlen(struct secasvar *); -extern size_t ah_hdrsiz __P((struct ipsecrequest *)); -extern void ah4_input __P((struct mbuf *, int)); -extern int ah4_output __P((struct mbuf *, struct ipsecrequest *)); -extern int ah4_calccksum __P((struct mbuf *, caddr_t, size_t, - const struct ah_algorithm *, struct secasvar *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern size_t ah_hdrsiz(struct ipsecrequest *); +extern void ah4_input(struct mbuf *, int); +extern int ah4_output(struct mbuf *, struct ipsecrequest *); +extern int ah4_calccksum(struct mbuf *, caddr_t, size_t, + const struct ah_algorithm *, struct secasvar *); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_AH_H_*/ +#endif _NETINET6_AH_H_ diff --git a/bsd/netinet6/ah6.h b/bsd/netinet6/ah6.h index 8ac8dd613..1525e3fc5 100644 --- a/bsd/netinet6/ah6.h +++ b/bsd/netinet6/ah6.h @@ -38,18 +38,16 @@ #define _NETINET6_AH6_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct secasvar; -extern int ah6_input __P((struct mbuf **, int *)); -extern int ah6_output __P((struct mbuf *, u_char *, struct mbuf *, - struct ipsecrequest *)); -extern int ah6_calccksum __P((struct mbuf *, caddr_t, size_t, - const struct ah_algorithm *, struct secasvar *)); +extern int ah6_input(struct mbuf **, int *); +extern int ah6_output(struct mbuf *, u_char *, struct mbuf *, + struct ipsecrequest *); +extern int ah6_calccksum(struct mbuf *, caddr_t, size_t, + const struct ah_algorithm *, struct secasvar *); -extern void ah6_ctlinput __P((int, struct sockaddr *, void *)); -#endif /* __APPLE_API_PRIVATE */ -#endif +extern void ah6_ctlinput(int, struct sockaddr *, void *); +#endif /* KERNEL_PRIVATE */ #endif /*_NETINET6_AH6_H_*/ diff --git a/bsd/netinet6/ah_core.c b/bsd/netinet6/ah_core.c index 519c1b6a4..c30b9c759 100644 --- a/bsd/netinet6/ah_core.c +++ b/bsd/netinet6/ah_core.c @@ -86,57 +86,46 @@ #define HMACSIZE 16 -static int ah_sumsiz_1216 __P((struct secasvar *)); -static int ah_sumsiz_zero __P((struct secasvar *)); -static int ah_none_mature __P((struct secasvar *)); -static int ah_none_init __P((struct ah_algorithm_state *, struct secasvar *)); -static void ah_none_loop __P((struct ah_algorithm_state *, caddr_t, size_t)); -static void ah_none_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_keyed_md5_mature __P((struct secasvar *)); -static int ah_keyed_md5_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_keyed_md5_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_keyed_md5_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_keyed_sha1_mature __P((struct secasvar *)); -static int ah_keyed_sha1_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_keyed_sha1_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_keyed_sha1_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_hmac_md5_mature __P((struct secasvar *)); -static int ah_hmac_md5_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_hmac_md5_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_hmac_md5_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_hmac_sha1_mature __P((struct secasvar *)); -static int ah_hmac_sha1_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_hmac_sha1_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_hmac_sha1_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_hmac_sha2_256_mature __P((struct secasvar *)); -static int ah_hmac_sha2_256_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_hmac_sha2_256_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_hmac_sha2_256_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_hmac_sha2_384_mature __P((struct secasvar *)); -static int ah_hmac_sha2_384_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_hmac_sha2_384_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_hmac_sha2_384_result __P((struct ah_algorithm_state *, caddr_t)); -static int ah_hmac_sha2_512_mature __P((struct secasvar *)); -static int ah_hmac_sha2_512_init __P((struct ah_algorithm_state *, - struct secasvar *)); -static void ah_hmac_sha2_512_loop __P((struct ah_algorithm_state *, caddr_t, - size_t)); -static void ah_hmac_sha2_512_result __P((struct ah_algorithm_state *, caddr_t)); - -static void ah_update_mbuf __P((struct mbuf *, int, int, - const struct ah_algorithm *, struct ah_algorithm_state *)); +static int ah_sumsiz_1216(struct secasvar *); +static int ah_sumsiz_zero(struct secasvar *); +static int ah_none_mature(struct secasvar *); +static int ah_none_init(struct ah_algorithm_state *, struct secasvar *); +static void ah_none_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_none_result(struct ah_algorithm_state *, caddr_t); +static int ah_keyed_md5_mature(struct secasvar *); +static int ah_keyed_md5_init(struct ah_algorithm_state *, struct secasvar *); +static void ah_keyed_md5_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_keyed_md5_result(struct ah_algorithm_state *, caddr_t); +static int ah_keyed_sha1_mature(struct secasvar *); +static int ah_keyed_sha1_init(struct ah_algorithm_state *, struct secasvar *); +static void ah_keyed_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_keyed_sha1_result(struct ah_algorithm_state *, caddr_t); +static int ah_hmac_md5_mature(struct secasvar *); +static int ah_hmac_md5_init(struct ah_algorithm_state *, struct secasvar *); +static void ah_hmac_md5_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_hmac_md5_result(struct ah_algorithm_state *, caddr_t); +static int ah_hmac_sha1_mature(struct secasvar *); +static int ah_hmac_sha1_init(struct ah_algorithm_state *, struct secasvar *); +static void ah_hmac_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_hmac_sha1_result(struct ah_algorithm_state *, caddr_t); +static int ah_hmac_sha2_256_mature(struct secasvar *); +static int ah_hmac_sha2_256_init(struct ah_algorithm_state *, + struct secasvar *); +static void ah_hmac_sha2_256_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_hmac_sha2_256_result(struct ah_algorithm_state *, caddr_t); +static int ah_hmac_sha2_384_mature(struct secasvar *); +static int ah_hmac_sha2_384_init(struct ah_algorithm_state *, + struct secasvar *); +static void ah_hmac_sha2_384_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_hmac_sha2_384_result(struct ah_algorithm_state *, caddr_t); +static int ah_hmac_sha2_512_mature(struct secasvar *); +static int ah_hmac_sha2_512_init(struct ah_algorithm_state *, + struct secasvar *); +static void ah_hmac_sha2_512_loop(struct ah_algorithm_state *, caddr_t, size_t); +static void ah_hmac_sha2_512_result(struct ah_algorithm_state *, caddr_t); + +static void ah_update_mbuf(struct mbuf *, int, int, + const struct ah_algorithm *, struct ah_algorithm_state *); const struct ah_algorithm * ah_algorithm_lookup(idx) diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index e055cd53b..06340aa1e 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -48,8 +48,8 @@ #include #include -#include #include +#include #include #include @@ -91,6 +91,7 @@ #define IPLEN_FLIPPED +extern lck_mtx_t *sadb_mutex; #if INET extern struct protosw inetsw[]; @@ -110,6 +111,7 @@ ah4_input(struct mbuf *m, int off) int s; size_t stripsiz = 0; + lck_mtx_lock(sadb_mutex); #ifndef PULLDOWN_TEST if (m->m_len < off + sizeof(struct newah)) { @@ -447,17 +449,9 @@ ah4_input(struct mbuf *m, int off) ipsecstat.in_nomem++; goto fail; } - - s = splimp(); - if (IF_QFULL(&ipintrq)) { - ipsecstat.in_inval++; - splx(s); - goto fail; - } - IF_ENQUEUE(&ipintrq, m); - m = NULL; - schednetisr(NETISR_IP); /*can be skipped but to make sure*/ - splx(s); + lck_mtx_unlock(sadb_mutex); + proto_input(PF_INET, m); + lck_mtx_lock(sadb_mutex); nxt = IPPROTO_DONE; } else { /* @@ -531,7 +525,9 @@ ah4_input(struct mbuf *m, int off) ipsecstat.in_polvio++; goto fail; } - (*ip_protox[nxt]->pr_input)(m, off); + lck_mtx_unlock(sadb_mutex); + ip_proto_dispatch_in(m, off, nxt, 0); + lck_mtx_lock(sadb_mutex); } else m_freem(m); m = NULL; @@ -543,6 +539,7 @@ ah4_input(struct mbuf *m, int off) key_freesav(sav); } ipsecstat.in_success++; + lck_mtx_unlock(sadb_mutex); return; fail: @@ -551,6 +548,7 @@ fail: printf("DP ah4_input call free SA:%p\n", sav)); key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); if (m) m_freem(m); return; @@ -577,14 +575,17 @@ ah6_input(mp, offp) int s; size_t stripsiz = 0; + lck_mtx_lock(sadb_mutex); + #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(struct ah), {lck_mtx_unlock(sadb_mutex);return IPPROTO_DONE;}); ah = (struct ah *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct newah)); if (ah == NULL) { ipseclog((LOG_DEBUG, "IPv6 AH input: can't pullup\n")); ipsec6stat.in_inval++; + lck_mtx_unlock(sadb_mutex); return IPPROTO_DONE; } #endif @@ -662,7 +663,8 @@ ah6_input(mp, offp) goto fail; } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(struct ah) + sizoff + siz1, IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(struct ah) + sizoff + siz1, + {lck_mtx_unlock(sadb_mutex);return IPPROTO_DONE;}); #else IP6_EXTHDR_GET(ah, struct ah *, m, off, sizeof(struct ah) + sizoff + siz1); @@ -748,7 +750,8 @@ ah6_input(mp, offp) sizoff = (sav->flags & SADB_X_EXT_OLD) ? 0 : 4; IP6_EXTHDR_CHECK(m, off, sizeof(struct ah) + sizoff + siz1 - + sizeof(struct ip6_hdr), IPPROTO_DONE); + + sizeof(struct ip6_hdr), + {lck_mtx_unlock(sadb_mutex);return IPPROTO_DONE;}); nip6 = (struct ip6_hdr *)((u_char *)(ah + 1) + sizoff + siz1); if (!IN6_ARE_ADDR_EQUAL(&nip6->ip6_src, &ip6->ip6_src) @@ -849,17 +852,9 @@ ah6_input(mp, offp) ipsec6stat.in_nomem++; goto fail; } - - s = splimp(); - if (IF_QFULL(&ip6intrq)) { - ipsec6stat.in_inval++; - splx(s); - goto fail; - } - IF_ENQUEUE(&ip6intrq, m); - m = NULL; - schednetisr(NETISR_IPV6); /* can be skipped but to make sure */ - splx(s); + lck_mtx_unlock(sadb_mutex); + proto_input(PF_INET6, m); + lck_mtx_lock(sadb_mutex); nxt = IPPROTO_DONE; } else { /* @@ -933,6 +928,7 @@ ah6_input(mp, offp) key_freesav(sav); } ipsec6stat.in_success++; + lck_mtx_unlock(sadb_mutex); return nxt; fail: @@ -941,6 +937,7 @@ fail: printf("DP ah6_input call free SA:%p\n", sav)); key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); if (m) m_freem(m); return IPPROTO_DONE; @@ -1007,6 +1004,7 @@ ah6_ctlinput(cmd, sa, d) */ sa6_src = ip6cp->ip6c_src; sa6_dst = (struct sockaddr_in6 *)sa; + lck_mtx_lock(sadb_mutex); sav = key_allocsa(AF_INET6, (caddr_t)&sa6_src->sin6_addr, (caddr_t)&sa6_dst->sin6_addr, @@ -1017,6 +1015,7 @@ ah6_ctlinput(cmd, sa, d) valid++; key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); /* XXX Further validation? */ diff --git a/bsd/netinet6/ah_output.c b/bsd/netinet6/ah_output.c index 5a4f92cff..4a7940c46 100644 --- a/bsd/netinet6/ah_output.c +++ b/bsd/netinet6/ah_output.c @@ -78,7 +78,7 @@ #include #if INET -static struct in_addr *ah4_finaldst __P((struct mbuf *)); +static struct in_addr *ah4_finaldst(struct mbuf *); #endif /* diff --git a/bsd/netinet6/dest6.c b/bsd/netinet6/dest6.c index 8127ebeae..ae7a18b8a 100644 --- a/bsd/netinet6/dest6.c +++ b/bsd/netinet6/dest6.c @@ -68,7 +68,7 @@ dest6_input(mp, offp) /* validation of the length of the header */ #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), return IPPROTO_DONE); dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, sizeof(*dstopts)); @@ -78,7 +78,7 @@ dest6_input(mp, offp) dstoptlen = (dstopts->ip6d_len + 1) << 3; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, dstoptlen, IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, dstoptlen, return IPPROTO_DONE); dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, dstoptlen); @@ -107,7 +107,7 @@ dest6_input(mp, offp) default: /* unknown option */ optlen = ip6_unknown_opt(opt, m, - opt - mtod(m, u_int8_t *)); + opt - mtod(m, u_int8_t *), 0); if (optlen == -1) return (IPPROTO_DONE); optlen += 2; diff --git a/bsd/netinet6/esp.h b/bsd/netinet6/esp.h index 295048940..1d9d0c0ba 100644 --- a/bsd/netinet6/esp.h +++ b/bsd/netinet6/esp.h @@ -67,43 +67,41 @@ struct esptail { /*variable size, 32bit bound*/ /* Authentication data (new IPsec)*/ }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct secasvar; struct esp_algorithm { size_t padbound; /* pad boundary, in byte */ int ivlenval; /* iv length, in byte */ - int (*mature) __P((struct secasvar *)); + int (*mature)(struct secasvar *); int keymin; /* in bits */ int keymax; /* in bits */ - int (*schedlen) __P((const struct esp_algorithm *)); + int (*schedlen)(const struct esp_algorithm *); const char *name; - int (*ivlen) __P((const struct esp_algorithm *, struct secasvar *)); - int (*decrypt) __P((struct mbuf *, size_t, - struct secasvar *, const struct esp_algorithm *, int)); - int (*encrypt) __P((struct mbuf *, size_t, size_t, - struct secasvar *, const struct esp_algorithm *, int)); + int (*ivlen)(const struct esp_algorithm *, struct secasvar *); + int (*decrypt)(struct mbuf *, size_t, + struct secasvar *, const struct esp_algorithm *, int); + int (*encrypt)(struct mbuf *, size_t, size_t, + struct secasvar *, const struct esp_algorithm *, int); /* not supposed to be called directly */ - int (*schedule) __P((const struct esp_algorithm *, struct secasvar *)); - int (*blockdecrypt) __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); - int (*blockencrypt) __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); + int (*schedule)(const struct esp_algorithm *, struct secasvar *); + int (*blockdecrypt)(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); + int (*blockencrypt)(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); }; -extern const struct esp_algorithm *esp_algorithm_lookup __P((int)); -extern int esp_max_ivlen __P((void)); +extern const struct esp_algorithm *esp_algorithm_lookup(int); +extern int esp_max_ivlen(void); /* crypt routines */ -extern int esp4_output __P((struct mbuf *, struct ipsecrequest *)); -extern void esp4_input __P((struct mbuf *, int off)); -extern size_t esp_hdrsiz __P((struct ipsecrequest *)); +extern int esp4_output(struct mbuf *, struct ipsecrequest *); +extern void esp4_input(struct mbuf *, int off); +extern size_t esp_hdrsiz(struct ipsecrequest *); -extern int esp_schedule __P((const struct esp_algorithm *, struct secasvar *)); -extern int esp_auth __P((struct mbuf *, size_t, size_t, - struct secasvar *, u_char *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern int esp_schedule(const struct esp_algorithm *, struct secasvar *); +extern int esp_auth(struct mbuf *, size_t, size_t, + struct secasvar *, u_char *); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_ESP_H_*/ +#endif _NETINET6_ESP_H_ diff --git a/bsd/netinet6/esp6.h b/bsd/netinet6/esp6.h index 74b5acc91..bb2c20529 100644 --- a/bsd/netinet6/esp6.h +++ b/bsd/netinet6/esp6.h @@ -38,14 +38,12 @@ #define _NETINET6_ESP6_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern int esp6_output __P((struct mbuf *, u_char *, struct mbuf *, - struct ipsecrequest *)); -extern int esp6_input __P((struct mbuf **, int *)); +#ifdef KERNEL_PRIVATE +extern int esp6_output(struct mbuf *, u_char *, struct mbuf *, + struct ipsecrequest *); +extern int esp6_input(struct mbuf **, int *); -extern void esp6_ctlinput __P((int, struct sockaddr *, void *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*_KERNEL*/ +extern void esp6_ctlinput(int, struct sockaddr *, void *); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_ESP6_H_*/ +#endif _NETINET6_ESP6_H_ diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c index 7b8b124c6..21daa3f41 100644 --- a/bsd/netinet6/esp_core.c +++ b/bsd/netinet6/esp_core.c @@ -44,6 +44,8 @@ #include #include +#include + #include #include @@ -82,49 +84,51 @@ #define DBG_LAYER_END NETDBG_CODE(DBG_NETIPSEC, 3) #define DBG_FNC_ESPAUTH NETDBG_CODE(DBG_NETIPSEC, (8 << 8)) -static int esp_null_mature __P((struct secasvar *)); -static int esp_null_decrypt __P((struct mbuf *, size_t, - struct secasvar *, const struct esp_algorithm *, int)); -static int esp_null_encrypt __P((struct mbuf *, size_t, size_t, - struct secasvar *, const struct esp_algorithm *, int)); -static int esp_descbc_mature __P((struct secasvar *)); -static int esp_descbc_ivlen __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_des_schedule __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_des_schedlen __P((const struct esp_algorithm *)); -static int esp_des_blockdecrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_des_blockencrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_cbc_mature __P((struct secasvar *)); -static int esp_blowfish_schedule __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_blowfish_schedlen __P((const struct esp_algorithm *)); -static int esp_blowfish_blockdecrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_blowfish_blockencrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_cast128_schedule __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_cast128_schedlen __P((const struct esp_algorithm *)); -static int esp_cast128_blockdecrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_cast128_blockencrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_3des_schedule __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_3des_schedlen __P((const struct esp_algorithm *)); -static int esp_3des_blockdecrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_3des_blockencrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -static int esp_common_ivlen __P((const struct esp_algorithm *, - struct secasvar *)); -static int esp_cbc_decrypt __P((struct mbuf *, size_t, - struct secasvar *, const struct esp_algorithm *, int)); -static int esp_cbc_encrypt __P((struct mbuf *, size_t, size_t, - struct secasvar *, const struct esp_algorithm *, int)); +extern lck_mtx_t *sadb_mutex; + +static int esp_null_mature(struct secasvar *); +static int esp_null_decrypt(struct mbuf *, size_t, + struct secasvar *, const struct esp_algorithm *, int); +static int esp_null_encrypt(struct mbuf *, size_t, size_t, + struct secasvar *, const struct esp_algorithm *, int); +static int esp_descbc_mature(struct secasvar *); +static int esp_descbc_ivlen(const struct esp_algorithm *, + struct secasvar *); +static int esp_des_schedule(const struct esp_algorithm *, + struct secasvar *); +static int esp_des_schedlen(const struct esp_algorithm *); +static int esp_des_blockdecrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_des_blockencrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_cbc_mature(struct secasvar *); +static int esp_blowfish_schedule(const struct esp_algorithm *, + struct secasvar *); +static int esp_blowfish_schedlen(const struct esp_algorithm *); +static int esp_blowfish_blockdecrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_blowfish_blockencrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_cast128_schedule(const struct esp_algorithm *, + struct secasvar *); +static int esp_cast128_schedlen(const struct esp_algorithm *); +static int esp_cast128_blockdecrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_cast128_blockencrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_3des_schedule(const struct esp_algorithm *, + struct secasvar *); +static int esp_3des_schedlen(const struct esp_algorithm *); +static int esp_3des_blockdecrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_3des_blockencrypt(const struct esp_algorithm *, + struct secasvar *, u_int8_t *, u_int8_t *); +static int esp_common_ivlen(const struct esp_algorithm *, + struct secasvar *); +static int esp_cbc_decrypt(struct mbuf *, size_t, + struct secasvar *, const struct esp_algorithm *, int); +static int esp_cbc_encrypt(struct mbuf *, size_t, size_t, + struct secasvar *, const struct esp_algorithm *, int); #define MAXIVLEN 16 @@ -151,11 +155,11 @@ static const struct esp_algorithm esp_algorithms[] = { esp_common_ivlen, esp_cbc_decrypt, esp_cbc_encrypt, esp_cast128_schedule, esp_cast128_blockdecrypt, esp_cast128_blockencrypt, }, - { 16, 16, esp_cbc_mature, 128, 256, esp_rijndael_schedlen, - "rijndael-cbc", - esp_common_ivlen, esp_cbc_decrypt, - esp_cbc_encrypt, esp_rijndael_schedule, - esp_rijndael_blockdecrypt, esp_rijndael_blockencrypt }, + { 16, 16, esp_cbc_mature, 128, 256, esp_aes_schedlen, + "aes-cbc", + esp_common_ivlen, esp_cbc_decrypt_aes, + esp_cbc_encrypt_aes, esp_aes_schedule, + 0, 0 }, }; const struct esp_algorithm * @@ -819,10 +823,6 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen) soff += s->m_len; s = s->m_next; } - - /* skip over empty mbuf */ - while (s && s->m_len == 0) - s = s->m_next; } m_freem(scut->m_next); @@ -1025,10 +1025,6 @@ esp_cbc_encrypt(m, off, plen, sav, algo, ivlen) soff += s->m_len; s = s->m_next; } - - /* skip over empty mbuf */ - while (s && s->m_len == 0) - s = s->m_next; } m_freem(scut->m_next); @@ -1129,7 +1125,7 @@ esp_auth(m0, skip, length, sav, sum) KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 5,0,0,0,0); return error; } - + lck_mtx_unlock(sadb_mutex); while (0 < length) { if (!m) panic("mbuf chain?"); @@ -1147,7 +1143,7 @@ esp_auth(m0, skip, length, sav, sum) } (*algo->result)(&s, sumbuf); bcopy(sumbuf, sum, siz); /*XXX*/ - + lck_mtx_lock(sadb_mutex); KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 6,0,0,0,0); return 0; } diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index 463a4182d..4b080c6b4 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -48,8 +48,8 @@ #include #include -#include #include +#include #include #include @@ -96,6 +96,7 @@ #define DBG_FNC_DECRYPT NETDBG_CODE(DBG_NETIPSEC, (7 << 8)) #define IPLEN_FLIPPED +extern lck_mtx_t *sadb_mutex; #if INET extern struct protosw inetsw[]; @@ -121,6 +122,8 @@ esp4_input(m, off) size_t esplen; int s; + lck_mtx_lock(sadb_mutex); + KERNEL_DEBUG(DBG_FNC_ESPIN | DBG_FUNC_START, 0,0,0,0,0); /* sanity check for alignment. */ if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) { @@ -397,16 +400,9 @@ noreplaycheck: /* Clear the csum flags, they can't be valid for the inner headers */ m->m_pkthdr.csum_flags = 0; - s = splimp(); - if (IF_QFULL(&ipintrq)) { - ipsecstat.in_inval++; - splx(s); - goto bad; - } - IF_ENQUEUE(&ipintrq, m); - m = NULL; - schednetisr(NETISR_IP); /*can be skipped but to make sure*/ - splx(s); + lck_mtx_unlock(sadb_mutex); + proto_input(PF_INET, m); + lck_mtx_lock(sadb_mutex); nxt = IPPROTO_DONE; KERNEL_DEBUG(DBG_FNC_ESPIN | DBG_FUNC_END, 2,0,0,0,0); } else { @@ -457,7 +453,9 @@ noreplaycheck: goto bad; } KERNEL_DEBUG(DBG_FNC_ESPIN | DBG_FUNC_END, 3,0,0,0,0); - (*ip_protox[nxt]->pr_input)(m, off); + lck_mtx_unlock(sadb_mutex); + ip_proto_dispatch_in(m, off, nxt, 0); + lck_mtx_lock(sadb_mutex); } else m_freem(m); m = NULL; @@ -469,6 +467,7 @@ noreplaycheck: key_freesav(sav); } ipsecstat.in_success++; + lck_mtx_unlock(sadb_mutex); return; bad: @@ -477,6 +476,7 @@ bad: printf("DP esp4_input call free SA:%p\n", sav)); key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); if (m) m_freem(m); KERNEL_DEBUG(DBG_FNC_ESPIN | DBG_FUNC_END, 4,0,0,0,0); @@ -504,6 +504,8 @@ esp6_input(mp, offp) size_t esplen; int s; + lck_mtx_lock(sadb_mutex); + /* sanity check for alignment. */ if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) { ipseclog((LOG_ERR, "IPv6 ESP input: packet alignment problem " @@ -513,12 +515,13 @@ esp6_input(mp, offp) } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, ESPMAXLEN, IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, ESPMAXLEN, {lck_mtx_unlock(sadb_mutex); return IPPROTO_DONE;}); esp = (struct esp *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(esp, struct esp *, m, off, ESPMAXLEN); if (esp == NULL) { ipsec6stat.in_inval++; + lck_mtx_unlock(sadb_mutex); return IPPROTO_DONE; } #endif @@ -672,7 +675,7 @@ noreplaycheck: } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, esplen + ivlen, IPPROTO_DONE); /*XXX*/ + IP6_EXTHDR_CHECK(m, off, esplen + ivlen, return IPPROTO_DONE); /*XXX*/ #else IP6_EXTHDR_GET(esp, struct esp *, m, off, esplen + ivlen); if (esp == NULL) { @@ -776,17 +779,9 @@ noreplaycheck: ipsec6stat.in_nomem++; goto bad; } - - s = splimp(); - if (IF_QFULL(&ip6intrq)) { - ipsec6stat.in_inval++; - splx(s); - goto bad; - } - IF_ENQUEUE(&ip6intrq, m); - m = NULL; - schednetisr(NETISR_IPV6); /*can be skipped but to make sure*/ - splx(s); + lck_mtx_unlock(sadb_mutex); + proto_input(PF_INET6, m); + lck_mtx_lock(sadb_mutex); nxt = IPPROTO_DONE; } else { /* @@ -894,6 +889,7 @@ noreplaycheck: key_freesav(sav); } ipsec6stat.in_success++; + lck_mtx_unlock(sadb_mutex); return nxt; bad: @@ -902,6 +898,7 @@ bad: printf("DP esp6_input call free SA:%p\n", sav)); key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); if (m) m_freem(m); return IPPROTO_DONE; @@ -986,6 +983,7 @@ esp6_ctlinput(cmd, sa, d) */ sa6_src = ip6cp->ip6c_src; sa6_dst = (struct sockaddr_in6 *)sa; + lck_mtx_lock(sadb_mutex); sav = key_allocsa(AF_INET6, (caddr_t)&sa6_src->sin6_addr, (caddr_t)&sa6_dst->sin6_addr, @@ -996,6 +994,7 @@ esp6_ctlinput(cmd, sa, d) valid++; key_freesav(sav); } + lck_mtx_unlock(sadb_mutex); /* XXX Further validation? */ diff --git a/bsd/netinet6/esp_output.c b/bsd/netinet6/esp_output.c index b3dc4d22c..b1b00cda2 100644 --- a/bsd/netinet6/esp_output.c +++ b/bsd/netinet6/esp_output.c @@ -87,8 +87,8 @@ #define DBG_FNC_ESPOUT NETDBG_CODE(DBG_NETIPSEC, (4 << 8)) #define DBG_FNC_ENCRYPT NETDBG_CODE(DBG_NETIPSEC, (5 << 8)) -static int esp_output __P((struct mbuf *, u_char *, struct mbuf *, - struct ipsecrequest *, int)); +static int esp_output(struct mbuf *, u_char *, struct mbuf *, + struct ipsecrequest *, int); extern int esp_udp_encap_port; extern u_int32_t natt_now; diff --git a/bsd/netinet6/esp_rijndael.c b/bsd/netinet6/esp_rijndael.c index fa35c593c..f2ebe936d 100644 --- a/bsd/netinet6/esp_rijndael.c +++ b/bsd/netinet6/esp_rijndael.c @@ -34,6 +34,10 @@ #include #include #include +#include +#include + +#include #include #include @@ -42,72 +46,381 @@ #include #include -#include +#include #include -/* as rijndael uses assymetric scheduled keys, we need to do it twice. */ +#define AES_BLOCKLEN 16 + +extern lck_mtx_t *sadb_mutex; + int -esp_rijndael_schedlen(algo) +esp_aes_schedlen(algo) const struct esp_algorithm *algo; { - return sizeof(keyInstance) * 2; + return sizeof(aes_ctx); } int -esp_rijndael_schedule(algo, sav) +esp_aes_schedule(algo, sav) const struct esp_algorithm *algo; struct secasvar *sav; { - keyInstance *k; - - k = (keyInstance *)sav->sched; - if (rijndael_makeKey(&k[0], DIR_DECRYPT, _KEYLEN(sav->key_enc) * 8, - _KEYBUF(sav->key_enc)) < 0) - return -1; - if (rijndael_makeKey(&k[1], DIR_ENCRYPT, _KEYLEN(sav->key_enc) * 8, - _KEYBUF(sav->key_enc)) < 0) - return -1; + aes_ctx *ctx = (aes_ctx*)sav->sched; + + gen_tabs(); + aes_decrypt_key(_KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->decrypt); + aes_encrypt_key(_KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->encrypt); + return 0; } + +/* The following 2 functions decrypt or encrypt the contents of + * the mbuf chain passed in keeping the IP and ESP header's in place, + * along with the IV. + * The code attempts to call the crypto code with the largest chunk + * of data it can based on the amount of source data in + * the current source mbuf and the space remaining in the current + * destination mbuf. The crypto code requires data to be a multiples + * of 16 bytes. A separate buffer is used when a 16 byte block spans + * mbufs. + * + * m = mbuf chain + * off = offset to ESP header + * + * local vars for source: + * soff = offset from beginning of the chain to the head of the + * current mbuf. + * scut = last mbuf that contains headers to be retained + * scutoff = offset to end of the headers in scut + * s = the current mbuf + * sn = current offset to data in s (next source data to process) + * + * local vars for dest: + * d0 = head of chain + * d = current mbuf + * dn = current offset in d (next location to store result) + */ + + int -esp_rijndael_blockdecrypt(algo, sav, s, d) - const struct esp_algorithm *algo; +esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) + struct mbuf *m; + size_t off; struct secasvar *sav; - u_int8_t *s; - u_int8_t *d; + const struct esp_algorithm *algo; + int ivlen; { - cipherInstance c; - keyInstance *p; - - /* does not take advantage of CBC mode support */ - bzero(&c, sizeof(c)); - if (rijndael_cipherInit(&c, MODE_ECB, NULL) < 0) - return -1; - p = (keyInstance *)sav->sched; - if (rijndael_blockDecrypt(&c, &p[0], s, algo->padbound * 8, d) < 0) - return -1; + struct mbuf *s; + struct mbuf *d, *d0, *dp; + int soff; /* offset from the head of chain, to head of this mbuf */ + int sn, dn; /* offset from the head of the mbuf, to meat */ + size_t ivoff, bodyoff; + u_int8_t iv[AES_BLOCKLEN], *dptr; + u_int8_t sbuf[AES_BLOCKLEN], *sp; + struct mbuf *scut; + int scutoff; + int i, len; + + + if (ivlen != AES_BLOCKLEN) { + ipseclog((LOG_ERR, "esp_cbc_decrypt %s: " + "unsupported ivlen %d\n", algo->name, ivlen)); + m_freem(m); + return EINVAL; + } + + if (sav->flags & SADB_X_EXT_OLD) { + /* RFC 1827 */ + ivoff = off + sizeof(struct esp); + bodyoff = off + sizeof(struct esp) + ivlen; + } else { + ivoff = off + sizeof(struct newesp); + bodyoff = off + sizeof(struct newesp) + ivlen; + } + + if (m->m_pkthdr.len < bodyoff) { + ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n", + algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + m_freem(m); + return EINVAL; + } + if ((m->m_pkthdr.len - bodyoff) % AES_BLOCKLEN) { + ipseclog((LOG_ERR, "esp_cbc_decrypt %s: " + "payload length must be multiple of %d\n", + algo->name, AES_BLOCKLEN)); + m_freem(m); + return EINVAL; + } + + /* grab iv */ + m_copydata(m, ivoff, ivlen, iv); + + lck_mtx_unlock(sadb_mutex); + s = m; + soff = sn = dn = 0; + d = d0 = dp = NULL; + sp = dptr = NULL; + + /* skip header/IV offset */ + while (soff < bodyoff) { + if (soff + s->m_len > bodyoff) { + sn = bodyoff - soff; + break; + } + + soff += s->m_len; + s = s->m_next; + } + scut = s; + scutoff = sn; + + /* skip over empty mbuf */ + while (s && s->m_len == 0) + s = s->m_next; + + while (soff < m->m_pkthdr.len) { + /* source */ + if (sn + AES_BLOCKLEN <= s->m_len) { + /* body is continuous */ + sp = mtod(s, u_int8_t *) + sn; + len = s->m_len - sn; + len -= len % AES_BLOCKLEN; // full blocks only + } else { + /* body is non-continuous */ + m_copydata(s, sn, AES_BLOCKLEN, sbuf); + sp = sbuf; + len = AES_BLOCKLEN; // 1 block only in sbuf + } + + /* destination */ + if (!d || dn + AES_BLOCKLEN > d->m_len) { + if (d) + dp = d; + MGET(d, M_DONTWAIT, MT_DATA); + i = m->m_pkthdr.len - (soff + sn); + if (d && i > MLEN) { + MCLGET(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + m_free(d); + d = NULL; + } + } + if (!d) { + m_freem(m); + if (d0) + m_freem(d0); + lck_mtx_lock(sadb_mutex); + return ENOBUFS; + } + if (!d0) + d0 = d; + if (dp) + dp->m_next = d; + d->m_len = M_TRAILINGSPACE(d); + d->m_len -= d->m_len % AES_BLOCKLEN; + if (d->m_len > i) + d->m_len = i; + dptr = mtod(d, u_int8_t *); + dn = 0; + } + + /* adjust len if greater than space available in dest */ + if (len > d->m_len - dn) + len = d->m_len - dn; + + /* decrypt */ + aes_decrypt_cbc(sp, iv, len >> 4, dptr + dn, + (aes_decrypt_ctx*)(&(((aes_ctx*)sav->sched)->decrypt))); + + /* udpate offsets */ + sn += len; + dn += len; + + // next iv + bcopy(sp + len - AES_BLOCKLEN, iv, AES_BLOCKLEN); + + /* find the next source block */ + while (s && sn >= s->m_len) { + sn -= s->m_len; + soff += s->m_len; + s = s->m_next; + } + + } + + /* free un-needed source mbufs and add dest mbufs to chain */ + m_freem(scut->m_next); + scut->m_len = scutoff; + scut->m_next = d0; + + /* just in case */ + bzero(iv, sizeof(iv)); + bzero(sbuf, sizeof(sbuf)); + lck_mtx_lock(sadb_mutex); + return 0; } int -esp_rijndael_blockencrypt(algo, sav, s, d) - const struct esp_algorithm *algo; +esp_cbc_encrypt_aes(m, off, plen, sav, algo, ivlen) + struct mbuf *m; + size_t off; + size_t plen; struct secasvar *sav; - u_int8_t *s; - u_int8_t *d; + const struct esp_algorithm *algo; + int ivlen; { - cipherInstance c; - keyInstance *p; - - /* does not take advantage of CBC mode support */ - bzero(&c, sizeof(c)); - if (rijndael_cipherInit(&c, MODE_ECB, NULL) < 0) - return -1; - p = (keyInstance *)sav->sched; - if (rijndael_blockEncrypt(&c, &p[1], s, algo->padbound * 8, d) < 0) - return -1; + struct mbuf *s; + struct mbuf *d, *d0, *dp; + int soff, doff; /* offset from the head of chain, to head of this mbuf */ + int sn, dn; /* offset from the head of the mbuf, to meat */ + size_t ivoff, bodyoff; + u_int8_t *ivp, *dptr; + u_int8_t sbuf[AES_BLOCKLEN], *sp; + struct mbuf *scut; + int scutoff; + int i, len; + + if (ivlen != AES_BLOCKLEN) { + ipseclog((LOG_ERR, "esp_cbc_encrypt %s: " + "unsupported ivlen %d\n", algo->name, ivlen)); + m_freem(m); + return EINVAL; + } + + if (sav->flags & SADB_X_EXT_OLD) { + /* RFC 1827 */ + ivoff = off + sizeof(struct esp); + bodyoff = off + sizeof(struct esp) + ivlen; + } else { + ivoff = off + sizeof(struct newesp); + bodyoff = off + sizeof(struct newesp) + ivlen; + } + + /* put iv into the packet */ + m_copyback(m, ivoff, ivlen, sav->iv); + ivp = sav->iv; + + if (m->m_pkthdr.len < bodyoff) { + ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n", + algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + m_freem(m); + return EINVAL; + } + if ((m->m_pkthdr.len - bodyoff) % AES_BLOCKLEN) { + ipseclog((LOG_ERR, "esp_cbc_encrypt %s: " + "payload length must be multiple of %lu\n", + algo->name, AES_BLOCKLEN)); + m_freem(m); + return EINVAL; + } + lck_mtx_unlock(sadb_mutex); + + s = m; + soff = sn = dn = 0; + d = d0 = dp = NULL; + sp = dptr = NULL; + + /* skip headers/IV */ + while (soff < bodyoff) { + if (soff + s->m_len > bodyoff) { + sn = bodyoff - soff; + break; + } + + soff += s->m_len; + s = s->m_next; + } + scut = s; + scutoff = sn; + + /* skip over empty mbuf */ + while (s && s->m_len == 0) + s = s->m_next; + + while (soff < m->m_pkthdr.len) { + /* source */ + if (sn + AES_BLOCKLEN <= s->m_len) { + /* body is continuous */ + sp = mtod(s, u_int8_t *) + sn; + len = s->m_len - sn; + len -= len % AES_BLOCKLEN; // full blocks only + } else { + /* body is non-continuous */ + m_copydata(s, sn, AES_BLOCKLEN, sbuf); + sp = sbuf; + len = AES_BLOCKLEN; // 1 block only in sbuf + } + + /* destination */ + if (!d || dn + AES_BLOCKLEN > d->m_len) { + if (d) + dp = d; + MGET(d, M_DONTWAIT, MT_DATA); + i = m->m_pkthdr.len - (soff + sn); + if (d && i > MLEN) { + MCLGET(d, M_DONTWAIT); + if ((d->m_flags & M_EXT) == 0) { + m_free(d); + d = NULL; + } + } + if (!d) { + m_freem(m); + if (d0) + m_freem(d0); + lck_mtx_lock(sadb_mutex); + return ENOBUFS; + } + if (!d0) + d0 = d; + if (dp) + dp->m_next = d; + + d->m_len = M_TRAILINGSPACE(d); + d->m_len -= d->m_len % AES_BLOCKLEN; + if (d->m_len > i) + d->m_len = i; + dptr = mtod(d, u_int8_t *); + dn = 0; + } + + /* adjust len if greater than space available */ + if (len > d->m_len - dn) + len = d->m_len - dn; + + /* encrypt */ + aes_encrypt_cbc(sp, ivp, len >> 4, dptr + dn, + (aes_encrypt_ctx*)(&(((aes_ctx*)sav->sched)->encrypt))); + + /* update offsets */ + sn += len; + dn += len; + + /* next iv */ + ivp = dptr + dn - AES_BLOCKLEN; // last block encrypted + + /* find the next source block and skip empty mbufs */ + while (s && sn >= s->m_len) { + sn -= s->m_len; + soff += s->m_len; + s = s->m_next; + } + + } + + /* free un-needed source mbufs and add dest mbufs to chain */ + m_freem(scut->m_next); + scut->m_len = scutoff; + scut->m_next = d0; + + /* just in case */ + bzero(sbuf, sizeof(sbuf)); + lck_mtx_lock(sadb_mutex); + key_sa_stir_iv(sav); + return 0; } diff --git a/bsd/netinet6/esp_rijndael.h b/bsd/netinet6/esp_rijndael.h index e571f820f..9d60216a9 100644 --- a/bsd/netinet6/esp_rijndael.h +++ b/bsd/netinet6/esp_rijndael.h @@ -31,12 +31,13 @@ */ #include -#ifdef __APPLE_API_PRIVATE -int esp_rijndael_schedlen __P((const struct esp_algorithm *)); -int esp_rijndael_schedule __P((const struct esp_algorithm *, - struct secasvar *)); -int esp_rijndael_blockdecrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -int esp_rijndael_blockencrypt __P((const struct esp_algorithm *, - struct secasvar *, u_int8_t *, u_int8_t *)); -#endif /* __APPLE_API_PRIVATE */ +#ifdef KERNEL_PRIVATE +int esp_aes_schedlen(const struct esp_algorithm *); +int esp_aes_schedule(const struct esp_algorithm *, struct secasvar *); +int esp_cbc_decrypt_aes(struct mbuf *, size_t, struct secasvar *, + const struct esp_algorithm *, int); +int +esp_cbc_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, + const struct esp_algorithm *, int); + +#endif KERNEL_PRIVATE diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index f686d01cd..a495a0464 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -61,21 +62,23 @@ */ #define IN6_IFSTAT_STRICT -static void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *)); -static void frag6_deq __P((struct ip6asfrag *)); -static void frag6_insque __P((struct ip6q *, struct ip6q *)); -static void frag6_remque __P((struct ip6q *)); -static void frag6_freef __P((struct ip6q *)); +static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); +static void frag6_deq(struct ip6asfrag *); +static void frag6_insque(struct ip6q *, struct ip6q *); +static void frag6_remque(struct ip6q *); +static void frag6_freef(struct ip6q *); /* XXX we eventually need splreass6, or some real semaphore */ int frag6_doing_reass; u_int frag6_nfragpackets; +static u_int frag6_nfrags; struct ip6q ip6q; /* ip6 reassemble queue */ #ifndef __APPLE__ MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); #endif +extern lck_mtx_t *inet6_domain_mutex; /* * Initialise reassembly queue and fragment identifier. */ @@ -85,6 +88,7 @@ frag6_init() struct timeval tv; ip6_maxfragpackets = nmbclusters / 32; + ip6_maxfrags = nmbclusters / 4; /* * in many cases, random() here does NOT return random number @@ -126,6 +130,8 @@ frag6_init() */ /* * Fragment input + * NOTE: this function is called with the inet6_domain_mutex held from ip6_input. + * inet6_domain_mutex is protecting he frag6 queue manipulation. */ int frag6_input(mp, offp) @@ -148,7 +154,7 @@ frag6_input(mp, offp) ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); @@ -211,6 +217,16 @@ frag6_input(mp, offp) frag6_doing_reass = 1; + /* + * Enforce upper bound on number of fragments. + * If maxfrag is 0, never accept fragments. + * If maxfrag is -1, accept all fragments without limitation. + */ + if (ip6_maxfrags < 0) + ; + else if (frag6_nfrags >= (u_int)ip6_maxfrags) + goto dropfrag; + for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && @@ -253,6 +269,8 @@ frag6_input(mp, offp) q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ + + q6->ip6q_nfrag = 0; } /* @@ -431,6 +449,8 @@ insert: * the most recently active fragmented packet. */ frag6_enq(ip6af, af6->ip6af_up); + frag6_nfrags++; + q6->ip6q_nfrag++; #if 0 /* xxx */ if (q6 != ip6q.ip6q_next) { frag6_remque(q6); @@ -493,6 +513,7 @@ insert: /* this comes with no copy if the boundary is on cluster */ if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { frag6_remque(q6); + frag6_nfrags -= q6->ip6q_nfrag; FREE(q6, M_FTABLE); frag6_nfragpackets--; goto dropfrag; @@ -510,6 +531,7 @@ insert: } frag6_remque(q6); + frag6_nfrags -= q6->ip6q_nfrag; FREE(q6, M_FTABLE); frag6_nfragpackets--; @@ -571,7 +593,6 @@ frag6_freef(q6) /* restoure source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; - icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_REASSEMBLY, 0); } else @@ -580,6 +601,7 @@ frag6_freef(q6) } frag6_remque(q6); + frag6_nfrags -= q6->ip6q_nfrag; FREE(q6, M_FTABLE); frag6_nfragpackets--; } @@ -636,7 +658,7 @@ void frag6_slowtimo() { struct ip6q *q6; - int s = splnet(); + lck_mtx_lock(inet6_domain_mutex); frag6_doing_reass = 1; q6 = ip6q.ip6q_next; @@ -679,7 +701,7 @@ frag6_slowtimo() } #endif - splx(s); + lck_mtx_unlock(inet6_domain_mutex); } /* @@ -690,9 +712,11 @@ frag6_drain() { if (frag6_doing_reass) return; + lck_mtx_lock(inet6_domain_mutex); while (ip6q.ip6q_next != &ip6q) { ip6stat.ip6s_fragdropped++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ frag6_freef(ip6q.ip6q_next); } + lck_mtx_unlock(inet6_domain_mutex); } diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index 654dc8b15..042bdd76d 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -68,6 +68,7 @@ #include #include +#include #include #include #include @@ -109,35 +110,6 @@ extern int ipsec_bypass; #include -#if HAVE_NRL_INPCB -/* inpcb members */ -#define in6pcb inpcb -#define in6p_laddr inp_laddr6 -#define in6p_faddr inp_faddr6 -#define in6p_icmp6filt inp_icmp6filt -#define in6p_route inp_route -#define in6p_socket inp_socket -#define in6p_flags inp_flags -#define in6p_moptions inp_moptions6 -#define in6p_outputopts inp_outputopts6 -#define in6p_ip6 inp_ipv6 -#define in6p_flowinfo inp_flowinfo -#define in6p_sp inp_sp -#define in6p_next inp_next -#define in6p_prev inp_prev -/* macro names */ -#define sotoin6pcb sotoinpcb -/* function names */ -#define in6_pcbdetach in_pcbdetach -#define in6_rtchange in_rtchange - -/* - * for KAME src sync over BSD*'s. XXX: FreeBSD (>=3) are VERY different from - * others... - */ -#define in6p_ip6_nxt inp_ipv6.ip6_nxt -#endif - extern struct domain inet6domain; extern struct ip6protosw inet6sw[]; extern struct ip6protosw *ip6_protox[]; @@ -149,23 +121,26 @@ extern int icmp6errppslim; static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; - -static void icmp6_errcount __P((struct icmp6errstat *, int, int)); -static int icmp6_rip6_input __P((struct mbuf **, int)); -static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int)); -static const char *icmp6_redirect_diag __P((struct in6_addr *, - struct in6_addr *, struct in6_addr *)); +extern struct inpcbinfo ripcbinfo; +extern lck_mtx_t *ip6_mutex; +extern lck_mtx_t *nd6_mutex; + +static void icmp6_errcount(struct icmp6errstat *, int, int); +static int icmp6_rip6_input(struct mbuf **, int); +static int icmp6_ratelimit(const struct in6_addr *, const int, const int); +static const char *icmp6_redirect_diag(struct in6_addr *, + struct in6_addr *, struct in6_addr *); #ifndef HAVE_PPSRATECHECK -static int ppsratecheck __P((struct timeval *, int *, int)); +static int ppsratecheck(struct timeval *, int *, int); #endif -static struct mbuf *ni6_input __P((struct mbuf *, int)); -static struct mbuf *ni6_nametodns __P((const char *, int, int)); -static int ni6_dnsmatch __P((const char *, int, const char *, int)); -static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, - struct ifnet **, char *)); -static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, - struct ifnet *, int)); -static int icmp6_notify_error __P((struct mbuf *, int, int, int)); +static struct mbuf *ni6_input(struct mbuf *, int); +static struct mbuf *ni6_nametodns(const char *, int, int); +static int ni6_dnsmatch(const char *, int, const char *, int); +static int ni6_addrs(struct icmp6_nodeinfo *, + struct ifnet **, char *); +static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, + struct ifnet *, int); +static int icmp6_notify_error(struct mbuf *, int, int, int); #ifdef COMPAT_RFC1885 static struct route_in6 icmp6_reflect_rt; @@ -252,6 +227,7 @@ icmp6_error(m, type, code, param) icmp6stat.icp6s_error++; + lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_NOTOWNED); /* count per-type-code statistics */ icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code); @@ -263,7 +239,7 @@ icmp6_error(m, type, code, param) #endif #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), return); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); @@ -300,7 +276,7 @@ icmp6_error(m, type, code, param) struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); + IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), return); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, @@ -401,7 +377,7 @@ icmp6_input(mp, offp) int code, sum, noff; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), return IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif @@ -659,7 +635,7 @@ icmp6_input(mp, offp) if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), - IPPROTO_DONE); + return IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) @@ -873,7 +849,7 @@ icmp6_notify_error(m, off, icmp6len, code) #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), - -1); + return -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, @@ -887,7 +863,7 @@ icmp6_notify_error(m, off, icmp6len, code) /* Detect the upper level protocol */ { - void (*ctlfunc) __P((int, struct sockaddr *, void *)); + void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); @@ -909,7 +885,7 @@ icmp6_notify_error(m, off, icmp6len, code) #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), - -1); + return -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else @@ -938,7 +914,7 @@ icmp6_notify_error(m, off, icmp6len, code) */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), - -1); + return -1); rth = (struct ip6_rthdr *)(mtod(m, caddr_t) + eoff); #else @@ -964,7 +940,7 @@ icmp6_notify_error(m, off, icmp6len, code) #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, - -1); + return -1); rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, @@ -987,7 +963,7 @@ icmp6_notify_error(m, off, icmp6len, code) #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), - -1); + return -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else @@ -1092,7 +1068,7 @@ icmp6_notify_error(m, off, icmp6len, code) icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } - ctlfunc = (void (*) __P((int, struct sockaddr *, void *))) + ctlfunc = (void (*)(int, struct sockaddr *, void *)) (ip6_protox[nxt]->pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, @@ -1209,11 +1185,15 @@ ni6_input(m, off) /* unicast/anycast, fine */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { + ifafree(&ia6->ia_ifa); + ia6 = NULL; nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } + ifafree(&ia6->ia_ifa); + ia6 = NULL; } else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) ; /* link-local multicast, fine */ else @@ -1356,7 +1336,7 @@ ni6_input(m, off) replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: - addrs = ni6_addrs(ni6, m, &ifp, subj); + addrs = ni6_addrs(ni6, &ifp, subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ @@ -1640,9 +1620,8 @@ ni6_dnsmatch(a, alen, b, blen) * calculate the number of addresses to be returned in the node info reply. */ static int -ni6_addrs(ni6, m, ifpp, subj) +ni6_addrs(ni6, ifpp, subj) struct icmp6_nodeinfo *ni6; - struct mbuf *m; struct ifnet **ifpp; char *subj; { @@ -1669,9 +1648,10 @@ ni6_addrs(ni6, m, ifpp, subj) } } - for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { addrsofif = 0; + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -1724,13 +1704,16 @@ ni6_addrs(ni6, m, ifpp, subj) } addrsofif++; /* count the address */ } + ifnet_lock_done(ifp); if (iffound) { *ifpp = ifp; + ifnet_head_done(); return(addrsofif); } addrs += addrsofif; } + ifnet_head_done(); return(addrs); } @@ -1741,7 +1724,7 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) struct ifnet *ifp0; int resid; { - struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet); + struct ifnet *ifp = ifp0; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; @@ -1749,14 +1732,20 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; + struct timeval timenow; + + getmicrotime(&timenow); if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return(0); /* needless to copy */ again: - for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) - { + ifnet_head_lock_shared(); + if (ifp == NULL) ifp = TAILQ_FIRST(&ifnet_head); + + for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { @@ -1820,6 +1809,8 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; + ifnet_lock_done(ifp); + ifnet_head_done(); return(copied); } @@ -1842,8 +1833,8 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > - time_second) - ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); + timenow.tv_sec) + ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - timenow.tv_sec); else ltime = 0; } @@ -1863,9 +1854,11 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } + ifnet_lock_done(ifp); if (ifp0) /* we need search only on the specified IF */ break; } + ifnet_head_done(); if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; @@ -1910,6 +1903,7 @@ icmp6_rip6_input(mp, off) /* KAME hack: recover scopeid */ (void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif); + lck_rw_lock_shared(ripcbinfo.mtx); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) @@ -1939,31 +1933,24 @@ icmp6_rip6_input(mp, off) m_adj(n, off); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, - n, opts) == 0) { - /* should notify about lost packet */ - m_freem(n); - if (opts) { - m_freem(opts); - } - } else + n, opts, NULL) != 0) { sorwakeup(last->in6p_socket); + } opts = NULL; } } last = in6p; } + lck_rw_done(ripcbinfo.mtx); if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, m); /* strip intermediate headers */ m_adj(m, off); if (sbappendaddr(&last->in6p_socket->so_rcv, - (struct sockaddr *)&rip6src, m, opts) == 0) { - m_freem(m); - if (opts) - m_freem(opts); - } else + (struct sockaddr *)&rip6src, m, opts, NULL) != 0) { sorwakeup(last->in6p_socket); + } } else { m_freem(m); ip6stat.ip6s_delivered--; @@ -1983,7 +1970,7 @@ icmp6_reflect(m, off) struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia; - struct in6_addr t, *src = 0; + struct in6_addr t, src_storage, *src = 0; int plen; int type, code; struct ifnet *outif = NULL; @@ -2105,12 +2092,14 @@ icmp6_reflect(m, off) * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ - for (ia = in6_ifaddr; ia; ia = ia->ia_next) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia; ia = ia->ia_next) if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) && (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) { src = &t; break; } + lck_mtx_unlock(nd6_mutex); if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) { /* * This is the case if the dst is our link-local address @@ -2129,7 +2118,7 @@ icmp6_reflect(m, off) * source address of the erroneous packet. */ bzero(&ro, sizeof(ro)); - src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e); + src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &src_storage, &e); if (ro.ro_rt) rtfree(ro.ro_rt); /* XXX: we could use this */ if (src == NULL) { @@ -2169,9 +2158,9 @@ icmp6_reflect(m, off) #endif /*IPSEC*/ #ifdef COMPAT_RFC1885 - ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif); + ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, 0); #else - ip6_output(m, NULL, NULL, 0, NULL, &outif); + ip6_output(m, NULL, NULL, 0, NULL, &outif, 0); #endif if (outif) icmp6_ifoutstat_inc(outif, type, code); @@ -2233,7 +2222,7 @@ icmp6_redirect_input(m, off) goto freeit; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, icmp6len,); + IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); @@ -2410,7 +2399,7 @@ icmp6_redirect_output(m0, rt) struct rtentry *rt; { struct ifnet *ifp; /* my outgoing interface */ - struct in6_addr *ifp_ll6; + struct in6_addr ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ @@ -2444,7 +2433,7 @@ icmp6_redirect_output(m0, rt) src_sa.sin6_addr = sip6->ip6_src; /* we don't currently use sin6_scope_id, but eventually use it */ src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src); - if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) + if (nd6_is_addr_neighbor(&src_sa, ifp, 0) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ @@ -2483,7 +2472,8 @@ icmp6_redirect_output(m0, rt) IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; - ifp_ll6 = &ia->ia_addr.sin6_addr; + ifp_ll6 = ia->ia_addr.sin6_addr; + ifafree(&ia->ia_ifa); } /* get ip6 linklocal address for the router. */ @@ -2505,7 +2495,7 @@ icmp6_redirect_output(m0, rt) ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ - bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); + bcopy(&ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ @@ -2545,7 +2535,7 @@ icmp6_redirect_output(m0, rt) struct nd_opt_hdr *nd_opt; char *lladdr; - rt_router = nd6_lookup(router_ll6, 0, ifp); + rt_router = nd6_lookup(router_ll6, 0, ifp, 0); if (!rt_router) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; @@ -2676,7 +2666,7 @@ noredhdropt:; if (ipsec_bypass == 0) (void)ipsec_setsocket(m, NULL); #endif /*IPSEC*/ - ip6_output(m, NULL, NULL, 0, NULL, &outif); + ip6_output(m, NULL, NULL, 0, NULL, &outif, 0); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c index f3648e697..8887a091d 100644 --- a/bsd/netinet6/in6.c +++ b/bsd/netinet6/in6.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -130,13 +131,14 @@ const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0}; -static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, - struct ifnet *, struct proc *)); -static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *, - struct sockaddr_in6 *, int)); -static void in6_unlink_ifa __P((struct in6_ifaddr *, struct ifnet *)); +static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t, + struct ifnet *, struct proc *); +static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, + struct sockaddr_in6 *, int); +static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *, int); struct in6_multihead in6_multihead; /* XXX BSS initialization */ +extern struct lck_mtx_t *nd6_mutex; /* * Subroutine for in6_ifaddloop() and in6_ifremloop(). @@ -162,7 +164,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, * which changes the outgoing interface to the loopback interface. */ - e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr, + e = rtrequest_locked(cmd, ifa->ifa_addr, ifa->ifa_addr, (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); if (e != 0) { @@ -196,7 +198,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) if (nrt->rt_refcnt <= 0) { /* XXX: we should free the entry ourselves. */ rtref(nrt); - rtfree(nrt); + rtfree_locked(nrt); } } else { /* the cmd must be RTM_ADD here */ @@ -217,13 +219,15 @@ in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; + lck_mtx_lock(rt_mtx); /* If there is no loopback entry, allocate one. */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); + rt = rtalloc1_locked(ifa->ifa_addr, 0, 0); if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) in6_ifloop_request(RTM_ADD, ifa); if (rt) rt->rt_refcnt--; + lck_mtx_unlock(rt_mtx); } /* @@ -231,7 +235,7 @@ in6_ifaddloop(struct ifaddr *ifa) * if it exists. */ static void -in6_ifremloop(struct ifaddr *ifa) +in6_ifremloop(struct ifaddr *ifa, int locked) { struct in6_ifaddr *ia; struct rtentry *rt; @@ -253,13 +257,17 @@ in6_ifremloop(struct ifaddr *ifa) * (probably p2p) interfaces. * XXX: we should avoid such a configuration in IPv6... */ - for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + if (!locked) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) { ia_count++; if (ia_count > 1) break; } } + if (!locked) + lck_mtx_unlock(nd6_mutex); if (ia_count == 1) { /* @@ -270,12 +278,14 @@ in6_ifremloop(struct ifaddr *ifa) * a subnet-router anycast address on an interface attahced * to a shared medium. */ - rt = rtalloc1(ifa->ifa_addr, 0, 0); + lck_mtx_lock(rt_mtx); + rt = rtalloc1_locked(ifa->ifa_addr, 0, 0); if (rt != NULL && (rt->rt_flags & RTF_HOST) != 0 && (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { rt->rt_refcnt--; in6_ifloop_request(RTM_DELETE, ifa); } + lck_mtx_unlock(rt_mtx); } } @@ -289,16 +299,23 @@ in6_ifindex2scopeid(idx) if (idx < 0 || if_index < idx) return -1; + + ifnet_head_lock_shared(); ifp = ifindex2ifnet[idx]; + ifnet_head_done(); + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; - if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) + if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { + ifnet_lock_done(ifp); return sin6->sin6_scope_id & 0xffff; + } } + ifnet_lock_done(ifp); return -1; } @@ -370,11 +387,14 @@ in6_control(so, cmd, data, ifp, p) struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; int privileged, error = 0; - u_long dl_tag; + int index; + struct timeval timenow; + + getmicrotime(&timenow); privileged = 0; #ifdef __APPLE__ - if (p == NULL || !suser(p->p_ucred, &p->p_acflag)) + if (p == NULL || !proc_suser(p)) #else if (p == NULL || !suser(p)) #endif @@ -390,6 +410,15 @@ in6_control(so, cmd, data, ifp, p) return(EOPNOTSUPP); switch (cmd) { + case SIOCAUTOCONF_START: + case SIOCAUTOCONF_STOP: + case SIOCLL_START: + case SIOCLL_STOP: + case SIOCPROTOATTACH_IN6: + case SIOCPROTODETACH_IN6: + if (!privileged) + return(EPERM); + break; case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: @@ -449,28 +478,32 @@ in6_control(so, cmd, data, ifp, p) switch (cmd) { case SIOCAUTOCONF_START: + ifnet_lock_exclusive(ifp); ifp->if_eflags |= IFEF_ACCEPT_RTADVD; + ifnet_lock_done(ifp); return (0); case SIOCAUTOCONF_STOP: { - struct ifaddr *ifa, *nifa = NULL; - + struct in6_ifaddr *ia, *nia = NULL; + + ifnet_lock_exclusive(ifp); ifp->if_eflags &= ~IFEF_ACCEPT_RTADVD; + ifnet_lock_done(ifp); - /* nuke prefix list. this may try to remove some of ifaddrs as well */ + /* nuke prefix list. this may try to remove some ifaddrs as well */ in6_purgeprefix(ifp); /* removed autoconfigured address from interface */ - - for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) - { - nifa = TAILQ_NEXT(ifa, ifa_list); - if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET6) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) continue; - if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_AUTOCONF) - in6_purgeaddr(ifa); + if (ia->ia6_flags & IN6_IFF_AUTOCONF) + in6_purgeaddr(&ia->ia_ifa, 1); } + lck_mtx_unlock(nd6_mutex); return (0); } @@ -491,24 +524,25 @@ in6_control(so, cmd, data, ifp, p) case SIOCLL_STOP: { - struct ifaddr *ifa, *nifa = NULL; - + struct in6_ifaddr *ia, *nia = NULL; + /* removed link local addresses from interface */ - for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) - { - nifa = TAILQ_NEXT(ifa, ifa_list); - if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET6) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) continue; - if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) - in6_purgeaddr(ifa); + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) + in6_purgeaddr(&ia->ia_ifa, 1); } + lck_mtx_unlock(nd6_mutex); return (0); } case SIOCPROTOATTACH_IN6: - + switch (ifp->if_type) { #if IFT_BRIDGE /*OpenBSD 2.8*/ /* some of the interfaces are inherently not IPv6 capable */ @@ -517,7 +551,7 @@ in6_control(so, cmd, data, ifp, p) #endif default: - if (error = dlil_plumb_protocol(PF_INET6, ifp, &dl_tag)) + if (error = dlil_plumb_protocol(PF_INET6, ifp)) printf("SIOCPROTOATTACH_IN6: %s error=%d\n", if_name(ifp), error); break; @@ -573,7 +607,8 @@ in6_control(so, cmd, data, ifp, p) * and should be unused. */ /* we decided to obsolete this command (20000704) */ - return(EINVAL); + error = EINVAL; + goto ioctl_cleanup; case SIOCDIFADDR_IN6: /* @@ -583,8 +618,11 @@ in6_control(so, cmd, data, ifp, p) * address from the day one, we consider "remove the first one" * semantics to be not preferable. */ - if (ia == NULL) - return(EADDRNOTAVAIL); + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto ioctl_cleanup; + } + /* FALLTHROUGH */ case SIOCAIFADDR_IN6: /* @@ -592,10 +630,14 @@ in6_control(so, cmd, data, ifp, p) * the corresponding operation. */ if (ifra->ifra_addr.sin6_family != AF_INET6 || - ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) - return(EAFNOSUPPORT); - if (!privileged) - return(EPERM); + ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { + error = EAFNOSUPPORT; + goto ioctl_cleanup; + } + if (!privileged) { + error = EPERM; + goto ioctl_cleanup; + } break; @@ -607,26 +649,34 @@ in6_control(so, cmd, data, ifp, p) case SIOCGIFDSTADDR_IN6: case SIOCGIFALIFETIME_IN6: /* must think again about its semantics */ - if (ia == NULL) - return(EADDRNOTAVAIL); + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto ioctl_cleanup; + } break; case SIOCSIFALIFETIME_IN6: { struct in6_addrlifetime *lt; - if (!privileged) - return(EPERM); - if (ia == NULL) - return(EADDRNOTAVAIL); + if (!privileged) { + error = EPERM; + goto ioctl_cleanup; + } + if (ia == NULL) { + error = EADDRNOTAVAIL; + goto ioctl_cleanup; + } /* sanity for overflow - beware unsigned */ lt = &ifr->ifr_ifru.ifru_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME - && lt->ia6t_vltime + time_second < time_second) { - return EINVAL; + && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME - && lt->ia6t_pltime + time_second < time_second) { - return EINVAL; + && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; } break; } @@ -639,8 +689,10 @@ in6_control(so, cmd, data, ifp, p) break; case SIOCGIFDSTADDR_IN6: - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) - return(EINVAL); + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { + error = EINVAL; + goto ioctl_cleanup; + } /* * XXX: should we check if ifa_dstaddr is NULL and return * an error? @@ -657,28 +709,34 @@ in6_control(so, cmd, data, ifp, p) break; case SIOCGIFSTAT_IN6: - if (ifp == NULL) - return EINVAL; - if (in6_ifstat == NULL || ifp->if_index >= in6_ifstatmax - || in6_ifstat[ifp->if_index] == NULL) { + if (ifp == NULL) { + error = EINVAL; + goto ioctl_cleanup; + } + index = ifp->if_index; + if (in6_ifstat == NULL || index >= in6_ifstatmax + || in6_ifstat[index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_stat)); } else - ifr->ifr_ifru.ifru_stat = *in6_ifstat[ifp->if_index]; + ifr->ifr_ifru.ifru_stat = *in6_ifstat[index]; break; case SIOCGIFSTAT_ICMP6: - if (ifp == NULL) - return EINVAL; - if (icmp6_ifstat == NULL || ifp->if_index >= icmp6_ifstatmax || - icmp6_ifstat[ifp->if_index] == NULL) { + if (ifp == NULL) { + error = EINVAL; + goto ioctl_cleanup; + } + index = ifp->if_index; + if (icmp6_ifstat == NULL || index >= icmp6_ifstatmax || + icmp6_ifstat[index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_icmp6stat)); } else ifr->ifr_ifru.ifru_icmp6stat = - *icmp6_ifstat[ifp->if_index]; + *icmp6_ifstat[index]; break; case SIOCGIFALIFETIME_IN6: @@ -690,38 +748,44 @@ in6_control(so, cmd, data, ifp, p) /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = - time_second + ia->ia6_lifetime.ia6t_vltime; + timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = - time_second + ia->ia6_lifetime.ia6t_pltime; + timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; break; case SIOCAIFADDR_IN6: { - int i, error = 0; + int i; struct nd_prefix pr0, *pr; - if (dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET6, &dl_tag) == EPROTONOSUPPORT) { - /* Address is added without previous IPv6 configurator support (gif, stf etc...) */ - if (error = dlil_plumb_protocol(PF_INET6, ifp, &dl_tag)) { + /* Attempt to attache the protocol, in case it isn't attached */ + error = dlil_plumb_protocol(PF_INET6, ifp); + if (error) { + if (error != EEXIST) { printf("SIOCAIFADDR_IN6: %s can't plumb protocol error=%d\n", if_name(ifp), error); - return (error); + goto ioctl_cleanup; } + + /* Ignore, EEXIST */ + error = 0; + } + else { + /* PF_INET6 wasn't previously attached */ in6_if_up(ifp, NULL); } - /* * first, make or update the interface address structure, * and link it to the list. */ if ((error = in6_update_ifa(ifp, ifra, ia)) != 0) - return(error); + goto ioctl_cleanup; /* * then, make the prefix on-link on the interface. @@ -767,11 +831,12 @@ in6_control(so, cmd, data, ifp, p) * interface route. */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) - return(error); + goto ioctl_cleanup; if (pr == NULL) { log(LOG_ERR, "nd6_prelist_add succedded but " "no prefix\n"); - return(EINVAL); /* XXX panic here? */ + error = EINVAL; + goto ioctl_cleanup; } } if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) @@ -808,8 +873,7 @@ in6_control(so, cmd, data, ifp, p) * addresses, that is, this address might make * other addresses detached. */ - pfxlist_onlink_check(); - in6_post_msg(ifp, KEV_INET6_NEW_USER_ADDR, ia); + pfxlist_onlink_check(0); } break; @@ -858,23 +922,22 @@ in6_control(so, cmd, data, ifp, p) } purgeaddr: - in6_purgeaddr(&ia->ia_ifa); + in6_purgeaddr(&ia->ia_ifa, 0); break; } default: #ifdef __APPLE__ - error = dlil_ioctl(PF_INET6, ifp, cmd, (caddr_t)data); - return error; - + error = dlil_ioctl(PF_INET6, ifp, cmd, (caddr_t)data); + goto ioctl_cleanup; #else if (ifp == NULL || ifp->if_ioctl == 0) return(EOPNOTSUPP); return((*ifp->if_ioctl)(ifp, cmd, data)); #endif } - - return(0); +ioctl_cleanup: + return error; } /* @@ -893,7 +956,10 @@ in6_update_ifa(ifp, ifra, ia) struct in6_ifaddr *oia; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; + struct timeval timenow; + + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED); /* Validate parameters */ if (ifp == NULL || ifra == NULL) /* this maybe redundant */ return(EINVAL); @@ -912,6 +978,14 @@ in6_update_ifa(ifp, ifra, ia) */ if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) return(EINVAL); + /* + * Set the address family value for the mask if it was not set. + * Radar 3899482. + */ + if (ifra->ifra_prefixmask.sin6_len == sizeof(struct sockaddr_in6) && + ifra->ifra_prefixmask.sin6_family == 0) { + ifra->ifra_prefixmask.sin6_family = AF_INET6; + } /* * Because the IPv6 address architecture is classless, we require * users to specify a (non 0) prefix length (mask) for a new address. @@ -985,9 +1059,11 @@ in6_update_ifa(ifp, ifra, ia) } } /* lifetime consistency check */ + + getmicrotime(&timenow); lt = &ifra->ifra_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME - && lt->ia6t_vltime + time_second < time_second) { + && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { return EINVAL; } if (lt->ia6t_vltime == 0) { @@ -1000,7 +1076,7 @@ in6_update_ifa(ifp, ifra, ia) ip6_sprintf(&ifra->ifra_addr.sin6_addr)); } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME - && lt->ia6t_pltime + time_second < time_second) { + && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { return EINVAL; } @@ -1018,7 +1094,7 @@ in6_update_ifa(ifp, ifra, ia) ia = (struct in6_ifaddr *) _MALLOC(sizeof(*ia), M_IFADDR, M_NOWAIT); if (ia == NULL) - return (ENOBUFS); + return ENOBUFS; bzero((caddr_t)ia, sizeof(*ia)); /* Initialize the address and masks */ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; @@ -1038,15 +1114,18 @@ in6_update_ifa(ifp, ifra, ia) = (struct sockaddr *)&ia->ia_prefixmask; ia->ia_ifp = ifp; - if ((oia = in6_ifaddr) != NULL) { + lck_mtx_lock(nd6_mutex); + if ((oia = in6_ifaddrs) != NULL) { for ( ; oia->ia_next; oia = oia->ia_next) continue; oia->ia_next = ia; } else - in6_ifaddr = ia; + in6_ifaddrs = ia; + lck_mtx_unlock(nd6_mutex); - TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, - ifa_list); + ifnet_lock_exclusive(ifp); + if_attach_ifa(ifp, &ia->ia_ifa); + ifnet_lock_done(ifp); } /* set prefix mask */ @@ -1108,7 +1187,7 @@ in6_update_ifa(ifp, ifra, ia) iilen = (sizeof(ia->ia_prefixmask.sin6_addr) << 3) - plen; if ((error = in6_prefix_add_ifid(iilen, ia)) != 0) { - in6_purgeaddr((struct ifaddr *)ia); + in6_purgeaddr((struct ifaddr *)ia, 0); return(error); } } @@ -1131,14 +1210,14 @@ in6_update_ifa(ifp, ifra, ia) llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; - (void)in6_addmulti(&llsol, ifp, &error); + (void)in6_addmulti(&llsol, ifp, &error, 0); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(&llsol), if_name(ifp), error); - in6_purgeaddr((struct ifaddr *)ia); + in6_purgeaddr((struct ifaddr *)ia, 0); return(error); } } @@ -1157,7 +1236,9 @@ in6_update_ifa(ifp, ifra, ia) mltaddr.sin6_addr = in6addr_linklocal_allnodes; mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); + ifnet_lock_done(ifp); if (in6m == NULL) { rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, @@ -1165,7 +1246,7 @@ in6_update_ifa(ifp, ifra, ia) (struct sockaddr *)&mltmask, RTF_UP|RTF_CLONING, /* xxx */ (struct rtentry **)0); - (void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error); + (void)in6_addmulti(&mltaddr.sin6_addr, ifp, &error, 0); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: addmulti failed for " @@ -1181,10 +1262,12 @@ in6_update_ifa(ifp, ifra, ia) #define hostnamelen strlen(hostname) if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr) == 0) { + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); + ifnet_lock_done(ifp); if (in6m == NULL && ia != NULL) { (void)in6_addmulti(&mltaddr.sin6_addr, - ifp, &error); + ifp, &error, 0); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: " "addmulti failed for " @@ -1210,7 +1293,9 @@ in6_update_ifa(ifp, ifra, ia) mltaddr.sin6_addr = in6addr_nodelocal_allnodes; + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); + ifnet_lock_done(ifp); if (in6m == NULL && ia_loop != NULL) { rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, @@ -1219,7 +1304,7 @@ in6_update_ifa(ifp, ifra, ia) RTF_UP, (struct rtentry **)0); (void)in6_addmulti(&mltaddr.sin6_addr, ifp, - &error); + &error, 0); if (error != 0) { log(LOG_WARNING, "in6_update_ifa: " "addmulti failed for %s on %s " @@ -1239,12 +1324,12 @@ in6_update_ifa(ifp, ifra, ia) /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = - time_second + ia->ia6_lifetime.ia6t_vltime; + timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = - time_second + ia->ia6_lifetime.ia6t_pltime; + timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; @@ -1273,13 +1358,13 @@ in6_update_ifa(ifp, ifra, ia) * anyway. */ if (hostIsNew) - in6_unlink_ifa(ia, ifp); + in6_unlink_ifa(ia, ifp, 0); return(error); } void -in6_purgeaddr(ifa) - struct ifaddr *ifa; +in6_purgeaddr( + struct ifaddr *ifa, int nd6_locked) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; @@ -1308,7 +1393,7 @@ in6_purgeaddr(ifa) } /* Remove ownaddr's loopback rtentry, if it exists. */ - in6_ifremloop(&(ia->ia_ifa)); + in6_ifremloop(&(ia->ia_ifa), nd6_locked); if (ifp->if_flags & IFF_MULTICAST) { /* @@ -1325,29 +1410,35 @@ in6_purgeaddr(ifa) ia->ia_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(llsol, ifp, in6m); + ifnet_lock_done(ifp); if (in6m) - in6_delmulti(in6m); + in6_delmulti(in6m, nd6_locked); } + in6_unlink_ifa(ia, ifp, nd6_locked); in6_post_msg(ifp, KEV_INET6_ADDR_DELETED, ia); - in6_unlink_ifa(ia, ifp); } static void -in6_unlink_ifa(ia, ifp) +in6_unlink_ifa(ia, ifp, nd6_locked) struct in6_ifaddr *ia; struct ifnet *ifp; + int nd6_locked; { int plen, iilen; struct in6_ifaddr *oia; - int s = splnet(); - TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list); + ifnet_lock_exclusive(ifp); + if_detach_ifa(ifp, &ia->ia_ifa); + ifnet_lock_done(ifp); + if (!nd6_locked) + lck_mtx_lock(nd6_mutex); oia = ia; - if (oia == (ia = in6_ifaddr)) - in6_ifaddr = ia->ia_next; + if (oia == (ia = in6_ifaddrs)) + in6_ifaddrs = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; @@ -1358,7 +1449,6 @@ in6_unlink_ifa(ia, ifp) printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n"); } } - if (oia->ia6_ifpr) { /* check for safety */ plen = in6_mask2len(&oia->ia_prefixmask.sin6_addr, NULL); iilen = (sizeof(oia->ia_prefixmask.sin6_addr) << 3) - plen; @@ -1381,36 +1471,38 @@ in6_unlink_ifa(ia, ifp) oia->ia6_ndpr = NULL; } - pfxlist_onlink_check(); + pfxlist_onlink_check(1); } + if (!nd6_locked) + lck_mtx_unlock(nd6_mutex); + /* - * release another refcnt for the link from in6_ifaddr. + * release another refcnt for the link from in6_ifaddrs. * Note that we should decrement the refcnt at least once for all *BSD. */ ifafree(&oia->ia_ifa); - splx(s); } void in6_purgeif(ifp) struct ifnet *ifp; { - struct ifaddr *ifa, *nifa = NULL; + struct in6_ifaddr *ia, *nia = NULL; if (ifp == NULL || &ifp->if_addrlist == NULL) return; - for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { - nifa = TAILQ_NEXT(ifa, ifa_list); - if (ifa->ifa_addr == NULL) + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) continue; - if (ifa->ifa_addr->sa_family != AF_INET6) - continue; - in6_purgeaddr(ifa); + in6_purgeaddr(&ia->ia_ifa, 1); } + lck_mtx_unlock(nd6_mutex); in6_ifdetach(ifp); } @@ -1492,8 +1584,9 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) case SIOCALIFADDR: { struct in6_aliasreq ifra; - struct in6_addr *hostid = NULL; + struct in6_addr hostid; int prefixlen; + int hostid_found = 0; if ((iflr->flags & IFLR_PREFIX) != 0) { struct sockaddr_in6 *sin6; @@ -1506,7 +1599,8 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); if (!ifa) return EADDRNOTAVAIL; - hostid = IFA_IN6(ifa); + hostid = *IFA_IN6(ifa); + hostid_found = 1; /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) @@ -1529,22 +1623,22 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) bcopy(&iflr->addr, &ifra.ifra_addr, ((struct sockaddr *)&iflr->addr)->sa_len); - if (hostid) { + if (hostid_found) { /* fill in hostid part */ ifra.ifra_addr.sin6_addr.s6_addr32[2] = - hostid->s6_addr32[2]; + hostid.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] = - hostid->s6_addr32[3]; + hostid.s6_addr32[3]; } if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /*XXX*/ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, ((struct sockaddr *)&iflr->dstaddr)->sa_len); - if (hostid) { + if (hostid_found) { ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = - hostid->s6_addr32[2]; + hostid.s6_addr32[2]; ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = - hostid->s6_addr32[3]; + hostid.s6_addr32[3]; } } @@ -1593,6 +1687,7 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) } } + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -1617,6 +1712,7 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) break; } + ifnet_lock_done(ifp); if (!ifa) return EADDRNOTAVAIL; ia = ifa2ia6(ifa); @@ -1700,7 +1796,6 @@ in6_ifinit(ifp, ia, sin6, newhost) int newhost; { int error = 0, plen, ifacount = 0; - int s = splimp(); struct ifaddr *ifa; /* @@ -1708,6 +1803,7 @@ in6_ifinit(ifp, ia, sin6, newhost) * if this is its first address, * and to validate the address if necessary. */ + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) @@ -1716,25 +1812,17 @@ in6_ifinit(ifp, ia, sin6, newhost) continue; ifacount++; } + ifnet_lock_done(ifp); ia->ia_addr = *sin6; if (ifacount <= 1 && -#ifdef __APPLE__ (error = dlil_ioctl(PF_INET6, ifp, SIOCSIFADDR, (caddr_t)ia))) { if (error) { - splx(s); return(error); } } -#else - ifp->if_ioctl && (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { - splx(s); - return(error); - } -#endif - splx(s); ia->ia_ifa.ifa_metric = ifp->if_metric; @@ -1775,15 +1863,15 @@ in6_ifinit(ifp, ia, sin6, newhost) * given interface. */ struct in6_multi * -in6_addmulti(maddr6, ifp, errorp) +in6_addmulti(maddr6, ifp, errorp, nd6_locked) struct in6_addr *maddr6; struct ifnet *ifp; int *errorp; + int nd6_locked; { struct in6_multi *in6m; struct sockaddr_in6 sin6; struct ifmultiaddr *ifma; - int s = splnet(); *errorp = 0; @@ -1798,7 +1886,6 @@ in6_addmulti(maddr6, ifp, errorp) sin6.sin6_addr = *maddr6; *errorp = if_addmulti(ifp, (struct sockaddr *)&sin6, &ifma); if (*errorp) { - splx(s); return 0; } @@ -1813,7 +1900,6 @@ in6_addmulti(maddr6, ifp, errorp) at interrupt time? If so, need to fix if_addmulti. XXX */ in6m = (struct in6_multi *)_MALLOC(sizeof(*in6m), M_IPMADDR, M_NOWAIT); if (in6m == NULL) { - splx(s); return (NULL); } @@ -1822,14 +1908,17 @@ in6_addmulti(maddr6, ifp, errorp) in6m->in6m_ifp = ifp; in6m->in6m_ifma = ifma; ifma->ifma_protospec = in6m; + if (nd6_locked == 0) + lck_mtx_lock(nd6_mutex); LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry); + if (nd6_locked == 0) + lck_mtx_unlock(nd6_mutex); /* * Let MLD6 know that we have joined a new IP6 multicast * group. */ mld6_start_listening(in6m); - splx(s); return(in6m); } @@ -1837,26 +1926,30 @@ in6_addmulti(maddr6, ifp, errorp) * Delete a multicast address record. */ void -in6_delmulti(in6m) - struct in6_multi *in6m; +in6_delmulti( + struct in6_multi *in6m, int nd6locked) { struct ifmultiaddr *ifma = in6m->in6m_ifma; - int s = splnet(); - if (ifma && ifma->ifma_refcount == 1) { + if (ifma && ifma->ifma_usecount == 1) { /* * No remaining claims to this record; let MLD6 know * that we are leaving the multicast group. */ mld6_stop_listening(in6m); ifma->ifma_protospec = 0; + if (nd6locked == 0) + lck_mtx_lock(nd6_mutex); LIST_REMOVE(in6m, in6m_entry); + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); FREE(in6m, M_IPMADDR); } /* XXX - should be separate API for when we have an ifma? */ - if (ifma) - if_delmultiaddr(ifma); - splx(s); + if (ifma) { + if_delmultiaddr(ifma, 0); + ifma_release(ifma); + } } /* @@ -1869,6 +1962,7 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags) { struct ifaddr *ifa; + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) @@ -1882,11 +1976,11 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags) break; } } + ifnet_lock_done(ifp); return((struct in6_ifaddr *)ifa); } - /* * find the internet address corresponding to a given interface and address. */ @@ -1897,6 +1991,7 @@ in6ifa_ifpwithaddr(ifp, addr) { struct ifaddr *ifa; + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr == NULL) @@ -1906,6 +2001,7 @@ in6ifa_ifpwithaddr(ifp, addr) if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) break; } + ifnet_lock_done(ifp); return((struct in6_ifaddr *)ifa); } @@ -1973,11 +2069,15 @@ in6_localaddr(in6) if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; - for (ia = in6_ifaddr; ia; ia = ia->ia_next) + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia; ia = ia->ia_next) if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, - &ia->ia_prefixmask.sin6_addr)) + &ia->ia_prefixmask.sin6_addr)) { + lck_mtx_unlock(nd6_mutex); return 1; + } + lck_mtx_unlock(nd6_mutex); return (0); } @@ -1987,18 +2087,22 @@ in6_is_addr_deprecated(sa6) { struct in6_ifaddr *ia; - for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &sa6->sin6_addr) && #if SCOPEDROUTING ia->ia_addr.sin6_scope_id == sa6->sin6_scope_id && #endif - (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) + (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { + lck_mtx_unlock(nd6_mutex); return(1); /* true */ + } /* XXX: do we still have to go thru the rest of the list? */ } + lck_mtx_unlock(nd6_mutex); return(0); /* false */ } @@ -2046,7 +2150,8 @@ in6_are_prefix_equal(p1, p2, len) if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) return(0); - if (p1->s6_addr[bytelen] >> (8 - bitlen) != + if (bitlen != 0 && + p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) return(0); @@ -2081,9 +2186,9 @@ in6_prefixlen2mask(maskp, len) * return the best address out of the same scope */ struct in6_ifaddr * -in6_ifawithscope(oifp, dst) - struct ifnet *oifp; - struct in6_addr *dst; +in6_ifawithscope( + struct ifnet *oifp, + struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), src_scope, best_scope = 0; int blen = -1; @@ -2103,8 +2208,8 @@ in6_ifawithscope(oifp, dst) * Comparing an interface with the outgoing interface will be done * only at the final stage of tiebreaking. */ - for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { /* * We can never take an address that breaks the scope zone * of the destination. @@ -2112,6 +2217,7 @@ in6_ifawithscope(oifp, dst) if (in6_addr2scopeid(ifp, dst) != in6_addr2scopeid(oifp, dst)) continue; + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { int tlen = -1, dscopecmp, bscopecmp, matchcmp; @@ -2328,12 +2434,17 @@ in6_ifawithscope(oifp, dst) goto replace; /* (9) */ replace: + ifaref(ifa); + if (ifa_best) + ifafree(&ifa_best->ia_ifa); ifa_best = (struct in6_ifaddr *)ifa; blen = tlen >= 0 ? tlen : in6_matchlen(IFA_IN6(ifa), dst); best_scope = in6_addrscope(&ifa_best->ia_addr.sin6_addr); } + ifnet_lock_done(ifp); } + ifnet_head_done(); /* count statistics for future improvements */ if (ifa_best == NULL) @@ -2361,9 +2472,9 @@ in6_ifawithscope(oifp, dst) * found, return the first valid address from designated IF. */ struct in6_ifaddr * -in6_ifawithifp(ifp, dst) - struct ifnet *ifp; - struct in6_addr *dst; +in6_ifawithifp( + struct ifnet *ifp, + struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; @@ -2378,6 +2489,7 @@ in6_ifawithifp(ifp, dst) * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -2410,8 +2522,10 @@ in6_ifawithifp(ifp, dst) besta = (struct in6_ifaddr *)ifa; } } - if (besta) + if (besta) { + ifnet_lock_done(ifp); return(besta); + } TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { @@ -2428,9 +2542,11 @@ in6_ifawithifp(ifp, dst) dep[1] = (struct in6_ifaddr *)ifa; continue; } - + + ifnet_lock_done(ifp); return (struct in6_ifaddr *)ifa; } + ifnet_lock_done(ifp); /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) @@ -2447,9 +2563,9 @@ extern int in6_init2done; * perform DAD when interface becomes IFF_UP. */ void -in6_if_up(ifp, ifra) - struct ifnet *ifp; - struct in6_aliasreq *ifra; +in6_if_up( + struct ifnet *ifp, + struct in6_aliasreq *ifra) { struct ifaddr *ifa; struct in6_ifaddr *ia; @@ -2464,6 +2580,7 @@ in6_if_up(ifp, ifra) in6_ifattach(ifp, NULL, ifra); dad_delay = 0; + ifnet_lock_exclusive(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -2472,11 +2589,12 @@ in6_if_up(ifp, ifra) if (ia->ia6_flags & IN6_IFF_TENTATIVE) nd6_dad_start(ifa, &dad_delay); } + ifnet_lock_done(ifp); } int -in6if_do_dad(ifp) - struct ifnet *ifp; +in6if_do_dad( + struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return(0); @@ -2521,12 +2639,13 @@ in6_setmaxmtu() unsigned long maxmtu = 0; struct ifnet *ifp; - for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { if ((ifp->if_flags & IFF_LOOPBACK) == 0 && nd_ifinfo[ifp->if_index].linkmtu > maxmtu) maxmtu = nd_ifinfo[ifp->if_index].linkmtu; } + ifnet_head_done(); if (maxmtu) /* update only when maxmtu is positive */ in6_maxmtu = maxmtu; } diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index d9df90b9b..47f18fed4 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -72,6 +72,13 @@ #ifndef _NETINET6_IN6_H_ #define _NETINET6_IN6_H_ #include +#include + + +#ifndef _SA_FAMILY_T +#define _SA_FAMILY_T +typedef __uint8_t sa_family_t; +#endif /* * Identification of the network protocol stack @@ -81,6 +88,7 @@ #define __KAME__ #define __KAME_VERSION "20010528/apple-darwin" +#ifndef _POSIX_C_SOURCE /* * Local port number conventions: * @@ -114,15 +122,16 @@ #define IPV6PORT_ANONMAX 65535 #define IPV6PORT_RESERVEDMIN 600 #define IPV6PORT_RESERVEDMAX (IPV6PORT_RESERVED-1) +#endif /* _POSIX_C_SOURCE */ /* * IPv6 address */ struct in6_addr { union { - u_int8_t __u6_addr8[16]; - u_int16_t __u6_addr16[8]; - u_int32_t __u6_addr32[4]; + __uint8_t __u6_addr8[16]; + __uint16_t __u6_addr16[8]; + __uint32_t __u6_addr32[4]; } __u6_addr; /* 128-bit IP6 address */ }; @@ -138,16 +147,16 @@ struct in6_addr { /* * Socket address for IPv6 */ -#if !defined(_XOPEN_SOURCE) +#ifndef _POSIX_C_SOURCE #define SIN6_LEN -#endif +#endif /* _POSIX_C_SOURCE */ struct sockaddr_in6 { - u_int8_t sin6_len; /* length of this struct(sa_family_t)*/ - u_int8_t sin6_family; /* AF_INET6 (sa_family_t) */ - u_int16_t sin6_port; /* Transport layer port # (in_port_t)*/ - u_int32_t sin6_flowinfo; /* IP6 flow information */ + __uint8_t sin6_len; /* length of this struct(sa_family_t)*/ + sa_family_t sin6_family; /* AF_INET6 (sa_family_t) */ + in_port_t sin6_port; /* Transport layer port # (in_port_t)*/ + __uint32_t sin6_flowinfo; /* IP6 flow information */ struct in6_addr sin6_addr; /* IP6 address */ - u_int32_t sin6_scope_id; /* scope zone index */ + __uint32_t sin6_scope_id; /* scope zone index */ }; /* @@ -165,7 +174,7 @@ struct sockaddr_in6 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} #endif -#ifdef KERNEL +#ifdef KERNEL_PRIVATE extern const struct sockaddr_in6 sa6_any; extern const struct in6_addr in6mask0; @@ -173,7 +182,7 @@ extern const struct in6_addr in6mask32; extern const struct in6_addr in6mask64; extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; -#endif /* KERNEL */ +#endif KERNEL_PRIVATE /* * Macros started with IPV6_ADDR is KAME local @@ -209,6 +218,7 @@ extern const struct in6_addr in6mask128; #define IN6ADDR_LOOPBACK_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} +#ifndef _POSIX_C_SOURCE #define IN6ADDR_NODELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} @@ -218,12 +228,15 @@ extern const struct in6_addr in6mask128; #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} +#endif /* _POSIX_C_SOURCE */ extern const struct in6_addr in6addr_any; extern const struct in6_addr in6addr_loopback; +#ifndef _POSIX_C_SOURCE extern const struct in6_addr in6addr_nodelocal_allnodes; extern const struct in6_addr in6addr_linklocal_allnodes; extern const struct in6_addr in6addr_linklocal_allrouters; +#endif /* _POSIX_C_SOURCE */ /* * Equality @@ -235,8 +248,10 @@ extern const struct in6_addr in6addr_linklocal_allrouters; #define IN6_ARE_ADDR_EQUAL(a, b) \ (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) #else +#ifndef _POSIX_C_SOURCE #define IN6_ARE_ADDR_EQUAL(a, b) \ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#endif /* _POSIX_C_SOURCE */ #endif #ifdef KERNEL /* non standard */ @@ -251,37 +266,37 @@ extern const struct in6_addr in6addr_linklocal_allrouters; * Unspecified */ #define IN6_IS_ADDR_UNSPECIFIED(a) \ - ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == 0)) + ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == 0)) /* * Loopback */ #define IN6_IS_ADDR_LOOPBACK(a) \ - ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1))) + ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1))) /* * IPv4 compatible */ #define IN6_IS_ADDR_V4COMPAT(a) \ - ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1))) + ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1))) /* * Mapped */ #define IN6_IS_ADDR_V4MAPPED(a) \ - ((*(const u_int32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const u_int32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff))) + ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff))) /* * KAME Scope Values @@ -368,43 +383,47 @@ extern const struct in6_addr in6addr_linklocal_allrouters; #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_preferred != 0 && \ - (a)->ia6_lifetime.ia6t_preferred < time_second) + (a)->ia6_lifetime.ia6t_preferred < timenow.tv_sec) #define IFA6_IS_INVALID(a) \ ((a)->ia6_lifetime.ia6t_expire != 0 && \ - (a)->ia6_lifetime.ia6t_expire < time_second) -#endif /* _KERNEL */ + (a)->ia6_lifetime.ia6t_expire < timenow.tv_sec) +#endif /* KERNEL */ /* * IP6 route structure */ -#ifdef __APPLE_API_PRIVATE -#if !defined(_XOPEN_SOURCE) +#ifndef _POSIX_C_SOURCE +#ifdef PRIVATE struct route_in6 { struct rtentry *ro_rt; struct sockaddr_in6 ro_dst; }; -#endif -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ +#endif /* _POSIX_C_SOURCE */ /* * Options for use with [gs]etsockopt at the IPV6 level. * First word of comment is data type; bool is stored in int. */ /* no hdrincl */ +#ifndef _POSIX_C_SOURCE #if 0 /* the followings are relic in IPv4 and hence are disabled */ #define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ #define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ #define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ #define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ #define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ -#endif +#endif 0 #define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ +#endif /* _POSIX_C_SOURCE */ #define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ -#define IPV6_MULTICAST_IF 9 /* u_char; set/get IP6 multicast i/f */ -#define IPV6_MULTICAST_HOPS 10 /* u_char; set/get IP6 multicast hops */ -#define IPV6_MULTICAST_LOOP 11 /* u_char; set/get IP6 multicast loopback */ +#define IPV6_MULTICAST_IF 9 /* __uint8_t; set/get IP6 multicast i/f */ +#define IPV6_MULTICAST_HOPS 10 /* __uint8_t; set/get IP6 multicast hops */ +#define IPV6_MULTICAST_LOOP 11 /* __uint8_t; set/get IP6 mcast loopback */ #define IPV6_JOIN_GROUP 12 /* ip6_mreq; join a group membership */ #define IPV6_LEAVE_GROUP 13 /* ip6_mreq; leave a group membership */ + +#ifndef _POSIX_C_SOURCE #define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ #define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ /* RFC2292 options */ @@ -417,15 +436,17 @@ struct route_in6 { #define IPV6_PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ #define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ +#endif /* _POSIX_C_SOURCE */ #define IPV6_V6ONLY 27 /* bool; only bind INET6 at wildcard bind */ +#ifndef _POSIX_C_SOURCE #ifndef KERNEL #define IPV6_BINDV6ONLY IPV6_V6ONLY -#endif +#endif KERNEL #if 1 /*IPSEC*/ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ -#endif +#endif 1 #define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */ #if 1 /*IPV6FIREWALL*/ @@ -434,7 +455,7 @@ struct route_in6 { #define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ #define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ #define IPV6_FW_GET 34 /* get entire firewall rule chain */ -#endif +#endif 1 /* to define items, should talk with KAME guys first, for *BSD compatibility */ @@ -472,7 +493,6 @@ struct in6_pktinfo { #define IPV6_PORTRANGE_HIGH 1 /* "high" - request firewall bypass */ #define IPV6_PORTRANGE_LOW 2 /* "low" - vouchsafe security */ -#if !defined(_XOPEN_SOURCE) /* * Definitions for inet6 sysctl operations. * @@ -481,6 +501,54 @@ struct in6_pktinfo { */ #define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ +/* + * Names for IP sysctl objects + */ +#define IPV6CTL_FORWARDING 1 /* act as router */ +#define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ +#define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ +#ifdef notyet +#define IPV6CTL_DEFMTU 4 /* default MTU */ +#endif +#define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ +#define IPV6CTL_STATS 6 /* stats */ +#define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ +#define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ +#define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ +#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ +#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ +#define IPV6CTL_ACCEPT_RTADV 12 +#define IPV6CTL_KEEPFAITH 13 +#define IPV6CTL_LOG_INTERVAL 14 +#define IPV6CTL_HDRNESTLIMIT 15 +#define IPV6CTL_DAD_COUNT 16 +#define IPV6CTL_AUTO_FLOWLABEL 17 +#define IPV6CTL_DEFMCASTHLIM 18 +#define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ +#define IPV6CTL_KAME_VERSION 20 +#define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ +#define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ +#if 0 /*obsolete*/ +#define IPV6CTL_MAPPED_ADDR 23 +#endif +#define IPV6CTL_V6ONLY 24 +#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ +#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ +#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ + +#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ +#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ +#define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ +#define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ +#define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ + +#define IPV6CTL_MAXFRAGS 41 /* max fragments */ + +/* New entries should be added here from current IPV6CTL_MAXID value. */ +/* to define items, should talk with KAME guys first, for *BSD compatibility */ +#define IPV6CTL_MAXID 42 + +#ifdef KERNEL_PRIVATE #define CTL_IPV6PROTO_NAMES { \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, \ @@ -527,54 +595,6 @@ struct in6_pktinfo { { 0, 0 }, \ { "pim6", CTLTYPE_NODE }, \ } - -/* - * Names for IP sysctl objects - */ -#define IPV6CTL_FORWARDING 1 /* act as router */ -#define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ -#define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ -#ifdef notyet -#define IPV6CTL_DEFMTU 4 /* default MTU */ -#endif -#define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ -#define IPV6CTL_STATS 6 /* stats */ -#define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ -#define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ -#define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ -#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ -#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ -#define IPV6CTL_ACCEPT_RTADV 12 -#define IPV6CTL_KEEPFAITH 13 -#define IPV6CTL_LOG_INTERVAL 14 -#define IPV6CTL_HDRNESTLIMIT 15 -#define IPV6CTL_DAD_COUNT 16 -#define IPV6CTL_AUTO_FLOWLABEL 17 -#define IPV6CTL_DEFMCASTHLIM 18 -#define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ -#define IPV6CTL_KAME_VERSION 20 -#define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ -#define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ -#if 0 /*obsolete*/ -#define IPV6CTL_MAPPED_ADDR 23 -#endif -#define IPV6CTL_V6ONLY 24 -#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ -#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ -#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ - -#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses (RFC3041) */ -#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ -#define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ -#define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ -#define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ - -/* New entries should be added here from current IPV6CTL_MAXID value. */ -/* to define items, should talk with KAME guys first, for *BSD compatibility */ -#define IPV6CTL_MAXID 37 - -#endif /* !_XOPEN_SOURCE */ - /* * Redefinition of mbuf flags */ @@ -583,74 +603,73 @@ struct in6_pktinfo { #define M_LOOP M_PROTO4 #define M_AUTHIPDGM M_PROTO5 -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE struct cmsghdr; struct mbuf; struct ifnet; struct in6_aliasreq; -int in6_cksum __P((struct mbuf *, u_int8_t, u_int32_t, u_int32_t)); -int in6_localaddr __P((struct in6_addr *)); -int in6_addrscope __P((struct in6_addr *)); -struct in6_ifaddr *in6_ifawithscope __P((struct ifnet *, struct in6_addr *)); -struct in6_ifaddr *in6_ifawithifp __P((struct ifnet *, struct in6_addr *)); -extern void in6_if_up __P((struct ifnet *, struct in6_aliasreq *)); +int in6_cksum(struct mbuf *, __uint8_t, __uint32_t, __uint32_t); +int in6_localaddr(struct in6_addr *); +int in6_addrscope(struct in6_addr *); +struct in6_ifaddr *in6_ifawithscope(struct ifnet *, struct in6_addr *); +struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); +extern void in6_if_up(struct ifnet *, struct in6_aliasreq *); struct sockaddr; -void in6_sin6_2_sin __P((struct sockaddr_in *sin, - struct sockaddr_in6 *sin6)); -void in6_sin_2_v4mapsin6 __P((struct sockaddr_in *sin, - struct sockaddr_in6 *sin6)); -void in6_sin6_2_sin_in_sock __P((struct sockaddr *nam)); -void in6_sin_2_v4mapsin6_in_sock __P((struct sockaddr **nam)); +void in6_sin6_2_sin(struct sockaddr_in *sin, + struct sockaddr_in6 *sin6); +void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, + struct sockaddr_in6 *sin6); +void in6_sin6_2_sin_in_sock(struct sockaddr *nam); +void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE +#ifndef KERNEL __BEGIN_DECLS struct cmsghdr; -extern int inet6_option_space __P((int)); -extern int inet6_option_init __P((void *, struct cmsghdr **, int)); -extern int inet6_option_append __P((struct cmsghdr *, const u_int8_t *, - int, int)); -extern u_int8_t *inet6_option_alloc __P((struct cmsghdr *, int, int, int)); -extern int inet6_option_next __P((const struct cmsghdr *, u_int8_t **)); -extern int inet6_option_find __P((const struct cmsghdr *, u_int8_t **, int)); - -extern size_t inet6_rthdr_space __P((int, int)); -extern struct cmsghdr *inet6_rthdr_init __P((void *, int)); -extern int inet6_rthdr_add __P((struct cmsghdr *, const struct in6_addr *, - unsigned int)); -extern int inet6_rthdr_lasthop __P((struct cmsghdr *, unsigned int)); +extern int inet6_option_space(int); +extern int inet6_option_init(void *, struct cmsghdr **, int); +extern int inet6_option_append(struct cmsghdr *, const __uint8_t *, + int, int); +extern __uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int); +extern int inet6_option_next(const struct cmsghdr *, __uint8_t **); +extern int inet6_option_find(const struct cmsghdr *, __uint8_t **, int); + +extern size_t inet6_rthdr_space(int, int); +extern struct cmsghdr *inet6_rthdr_init(void *, int); +extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *, + unsigned int); +extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int); #if 0 /* not implemented yet */ -extern int inet6_rthdr_reverse __P((const struct cmsghdr *, struct cmsghdr *)); +extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *); #endif -extern int inet6_rthdr_segments __P((const struct cmsghdr *)); -extern struct in6_addr *inet6_rthdr_getaddr __P((struct cmsghdr *, int)); -extern int inet6_rthdr_getflags __P((const struct cmsghdr *, int)); - -extern int inet6_opt_init __P((void *, size_t)); -extern int inet6_opt_append __P((void *, size_t, int, u_int8_t, - size_t, u_int8_t, void **)); -extern int inet6_opt_finish __P((void *, size_t, int)); -extern int inet6_opt_set_val __P((void *, size_t, void *, int)); - -extern int inet6_opt_next __P((void *, size_t, int, u_int8_t *, - size_t *, void **)); -extern int inet6_opt_find __P((void *, size_t, int, u_int8_t, - size_t *, void **)); -extern int inet6_opt_get_val __P((void *, size_t, void *, int)); -extern size_t inet6_rth_space __P((int, int)); -extern void *inet6_rth_init __P((void *, int, int, int)); -extern int inet6_rth_add __P((void *, const struct in6_addr *)); -extern int inet6_rth_reverse __P((const void *, void *)); -extern int inet6_rth_segments __P((const void *)); -extern struct in6_addr *inet6_rth_getaddr __P((const void *, int)); +extern int inet6_rthdr_segments(const struct cmsghdr *); +extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int); +extern int inet6_rthdr_getflags(const struct cmsghdr *, int); + +extern int inet6_opt_init(void *, size_t); +extern int inet6_opt_append(void *, size_t, int, __uint8_t, + size_t, __uint8_t, void **); +extern int inet6_opt_finish(void *, size_t, int); +extern int inet6_opt_set_val(void *, size_t, void *, int); + +extern int inet6_opt_next(void *, size_t, int, __uint8_t *, + size_t *, void **); +extern int inet6_opt_find(void *, size_t, int, __uint8_t, + size_t *, void **); +extern int inet6_opt_get_val(void *, size_t, void *, int); +extern size_t inet6_rth_space(int, int); +extern void *inet6_rth_init(void *, int, int, int); +extern int inet6_rth_add(void *, const struct in6_addr *); +extern int inet6_rth_reverse(const void *, void *); +extern int inet6_rth_segments(const void *); +extern struct in6_addr *inet6_rth_getaddr(const void *, int); __END_DECLS - +#endif !KERNEL +#endif /* _POSIX_C_SOURCE */ #endif /* !_NETINET6_IN6_H_ */ diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c index 2886c784b..5c1138d0a 100644 --- a/bsd/netinet6/in6_gif.c +++ b/bsd/netinet6/in6_gif.c @@ -68,11 +68,11 @@ #include int -in6_gif_output(ifp, family, m, rt) - struct ifnet *ifp; - int family; /* family of the packet to be encapsulate. */ - struct mbuf *m; - struct rtentry *rt; +in6_gif_output( + struct ifnet *ifp, + int family, /* family of the packet to be encapsulate. */ + struct mbuf *m, + struct rtentry *rt) { struct gif_softc *sc = (struct gif_softc*)ifp; struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst; @@ -201,9 +201,9 @@ in6_gif_output(ifp, family, m, rt) * it is too painful to ask for resend of inner packet, to achieve * path MTU discovery for encapsulated packets. */ - return(ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL)); + return(ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, 0)); #else - return(ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL)); + return(ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, 0)); #endif } diff --git a/bsd/netinet6/in6_gif.h b/bsd/netinet6/in6_gif.h index f34f963dd..6e292cd5d 100644 --- a/bsd/netinet6/in6_gif.h +++ b/bsd/netinet6/in6_gif.h @@ -34,12 +34,12 @@ #define _NETINET6_IN6_GIF_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define GIF_HLIM 30 -int in6_gif_input __P((struct mbuf **, int *)); -int in6_gif_output __P((struct ifnet *, int, struct mbuf *, struct rtentry *)); -int gif_encapcheck6 __P((const struct mbuf *, int, int, void *)); -#endif /* __APPLE_API_PRIVATE */ +int in6_gif_input(struct mbuf **, int *); +int in6_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); +int gif_encapcheck6(const struct mbuf *, int, int, void *); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_IN6_GIF_H_*/ +#endif _NETINET6_IN6_GIF_H_ diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c index 2b627e522..2ccb29cd0 100644 --- a/bsd/netinet6/in6_ifattach.c +++ b/bsd/netinet6/in6_ifattach.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ struct icmp6_ifstat **icmp6_ifstat = NULL; size_t in6_ifstatmax = 0; size_t icmp6_ifstatmax = 0; unsigned long in6_maxmtu = 0; +extern lck_mtx_t *nd6_mutex; #if IP6_AUTO_LINKLOCAL int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; @@ -76,13 +78,14 @@ int ip6_auto_linklocal = 1; /* enable by default */ extern struct inpcbinfo udbinfo; extern struct inpcbinfo ripcbinfo; +extern lck_mtx_t *rt_mtx; -static int get_rand_ifid __P((struct ifnet *, struct in6_addr *)); -static int generate_tmp_ifid __P((u_int8_t *, const u_int8_t *, u_int8_t *)); -static int get_hw_ifid __P((struct ifnet *, struct in6_addr *)); -static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *)); -static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *, struct in6_aliasreq *)); -static int in6_ifattach_loopback __P((struct ifnet *)); +static int get_rand_ifid(struct ifnet *, struct in6_addr *); +static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *); +static int get_hw_ifid(struct ifnet *, struct in6_addr *); +static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); +static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *, struct in6_aliasreq *); +static int in6_ifattach_loopback(struct ifnet *); #define EUI64_GBIT 0x01 #define EUI64_UBIT 0x02 @@ -103,9 +106,9 @@ static int in6_ifattach_loopback __P((struct ifnet *)); * We currently use MD5(hostname) for it. */ static int -get_rand_ifid(ifp, in6) - struct ifnet *ifp; - struct in6_addr *in6; /* upper 64bits are preserved */ +get_rand_ifid( + struct ifnet *ifp, + struct in6_addr *in6) /* upper 64bits are preserved */ { MD5_CTX ctxt; u_int8_t digest[16]; @@ -137,9 +140,10 @@ get_rand_ifid(ifp, in6) } static int -generate_tmp_ifid(seed0, seed1, ret) - u_int8_t *seed0, *ret; - const u_int8_t *seed1; +generate_tmp_ifid( + u_int8_t *seed0, + const u_int8_t *seed1, + u_int8_t *ret) { MD5_CTX ctxt; u_int8_t seed[16], digest[16], nullbuf[8]; @@ -226,9 +230,9 @@ generate_tmp_ifid(seed0, seed1, ret) * XXX assumes single sockaddr_dl (AF_LINK address) per an interface */ static int -get_hw_ifid(ifp, in6) - struct ifnet *ifp; - struct in6_addr *in6; /* upper 64bits are preserved */ +get_hw_ifid( + struct ifnet *ifp, + struct in6_addr *in6) /* upper 64bits are preserved */ { struct ifaddr *ifa; struct sockaddr_dl *sdl; @@ -238,6 +242,8 @@ get_hw_ifid(ifp, in6) static u_int8_t allone[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + /* Why doesn't this code use ifnet_addrs? */ + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) @@ -252,10 +258,12 @@ get_hw_ifid(ifp, in6) goto found; } + ifnet_lock_done(ifp); return -1; found: + ifnet_lock_done(ifp); addr = LLADDR(sdl); addrlen = sdl->sdl_alen; @@ -265,6 +273,8 @@ found: case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: #if IFT_IEEE80211 case IFT_IEEE80211: #endif @@ -359,10 +369,10 @@ found: * sources. */ static int -get_ifid(ifp0, altifp, in6) - struct ifnet *ifp0; - struct ifnet *altifp; /* secondary EUI64 source */ - struct in6_addr *in6; +get_ifid( + struct ifnet *ifp0, + struct ifnet *altifp, /* secondary EUI64 source */ + struct in6_addr *in6) { struct ifnet *ifp; @@ -381,8 +391,8 @@ get_ifid(ifp0, altifp, in6) } /* next, try to get it from some other hardware interface */ - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { if (ifp == ifp0) continue; if (get_hw_ifid(ifp, in6) != 0) @@ -396,9 +406,11 @@ get_ifid(ifp0, altifp, in6) nd6log((LOG_DEBUG, "%s: borrow interface identifier from %s\n", if_name(ifp0), if_name(ifp))); + ifnet_head_done(); goto success; } } + ifnet_head_done(); /* last resort: get from random number source */ if (get_rand_ifid(ifp, in6) == 0) { @@ -423,22 +435,22 @@ success: } static int -in6_ifattach_linklocal(ifp, altifp, ifra_passed) - struct ifnet *ifp; - struct ifnet *altifp; /* secondary EUI64 source */ - struct in6_aliasreq *ifra_passed; +in6_ifattach_linklocal( + struct ifnet *ifp, + struct ifnet *altifp, /* secondary EUI64 source */ + struct in6_aliasreq *ifra_passed) { struct in6_ifaddr *ia; struct in6_aliasreq ifra; struct nd_prefix pr0; - int i, dl_tag, error; + int i, error; /* * configure link-local address. */ bzero(&ifra, sizeof(ifra)); - dlil_plumb_protocol(PF_INET6, ifp, &dl_tag); + dlil_plumb_protocol(PF_INET6, ifp); /* * in6_update_ifa() does not use ifra_name, but we accurately set it @@ -572,8 +584,8 @@ in6_ifattach_linklocal(ifp, altifp, ifra_passed) } static int -in6_ifattach_loopback(ifp) - struct ifnet *ifp; /* must be IFT_LOOP */ +in6_ifattach_loopback( + struct ifnet *ifp) /* must be IFT_LOOP */ { struct in6_aliasreq ifra; int error; @@ -633,11 +645,11 @@ in6_ifattach_loopback(ifp) * when ifp == NULL, the caller is responsible for filling scopeid. */ int -in6_nigroup(ifp, name, namelen, in6) - struct ifnet *ifp; - const char *name; - int namelen; - struct in6_addr *in6; +in6_nigroup( + struct ifnet *ifp, + const char *name, + int namelen, + struct in6_addr *in6) { const char *p; u_char *q; @@ -680,9 +692,9 @@ in6_nigroup(ifp, name, namelen, in6) } void -in6_nigroup_attach(name, namelen) - const char *name; - int namelen; +in6_nigroup_attach( + const char *name, + int namelen) { struct ifnet *ifp; struct sockaddr_in6 mltaddr; @@ -695,12 +707,14 @@ in6_nigroup_attach(name, namelen) if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0) return; - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); + ifnet_lock_done(ifp); if (!in6m) { - if (!in6_addmulti(&mltaddr.sin6_addr, ifp, &error)) { + if (!in6_addmulti(&mltaddr.sin6_addr, ifp, &error, 0)) { nd6log((LOG_ERR, "%s: failed to join %s " "(errno=%d)\n", if_name(ifp), ip6_sprintf(&mltaddr.sin6_addr), @@ -708,12 +722,13 @@ in6_nigroup_attach(name, namelen) } } } + ifnet_head_done(); } void -in6_nigroup_detach(name, namelen) - const char *name; - int namelen; +in6_nigroup_detach( + const char *name, + int namelen) { struct ifnet *ifp; struct sockaddr_in6 mltaddr; @@ -725,13 +740,16 @@ in6_nigroup_detach(name, namelen) if (in6_nigroup(NULL, name, namelen, &mltaddr.sin6_addr) != 0) return; - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) - { + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_list) { mltaddr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mltaddr.sin6_addr, ifp, in6m); + ifnet_lock_done(ifp); if (in6m) - in6_delmulti(in6m); + in6_delmulti(in6m, 0); } + ifnet_head_done(); } /* @@ -740,16 +758,15 @@ in6_nigroup_detach(name, namelen) * XXX multiple link-local address case */ void -in6_ifattach(ifp, altifp, ifra) - struct ifnet *ifp; - struct ifnet *altifp; /* secondary EUI64 source */ - struct in6_aliasreq *ifra; +in6_ifattach( + struct ifnet *ifp, + struct ifnet *altifp, /* secondary EUI64 source */ + struct in6_aliasreq *ifra) { static size_t if_indexlim = 8; struct in6_ifaddr *ia; struct in6_addr in6; - /* * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. @@ -830,11 +847,15 @@ in6_ifattach(ifp, altifp, ifra) * XXX multiple loopback interface case. */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + struct in6_ifaddr *ia6 = NULL; in6 = in6addr_loopback; - if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) { + if ((ia6 = in6ifa_ifpwithaddr(ifp, &in6)) == NULL) { if (in6_ifattach_loopback(ifp) != 0) return; } + else { + ifafree(&ia6->ia_ifa); + } } /* @@ -880,10 +901,10 @@ statinit: * from the ifnet list in bsdi. */ void -in6_ifdetach(ifp) - struct ifnet *ifp; +in6_ifdetach( + struct ifnet *ifp) { - struct in6_ifaddr *ia, *oia; + struct in6_ifaddr *ia, *oia, *nia; struct ifaddr *ifa, *next; struct rtentry *rt; short rtflags; @@ -898,13 +919,17 @@ in6_ifdetach(ifp) nd6_purge(ifp); /* nuke any of IPv6 addresses we have */ - for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) - { - next = ifa->ifa_list.tqe_next; - if (ifa->ifa_addr->sa_family != AF_INET6) + + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) continue; - in6_purgeaddr(ifa); + in6_purgeaddr(&ia->ia_ifa, 1); } + lck_mtx_unlock(nd6_mutex); + + ifnet_lock_exclusive(ifp); /* undo everything done by in6_ifattach(), just in case */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) @@ -920,25 +945,28 @@ in6_ifdetach(ifp) ia = (struct in6_ifaddr *)ifa; /* remove from the routing table */ + lck_mtx_lock(rt_mtx); if ((ia->ia_flags & IFA_ROUTE) - && (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { + && (rt = rtalloc1_locked((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { rtflags = rt->rt_flags; - rtfree(rt); - rtrequest(RTM_DELETE, + rtfree_locked(rt); + rtrequest_locked(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, rtflags, (struct rtentry **)0); } + lck_mtx_unlock(rt_mtx); /* remove from the linked list */ - TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list); + if_detach_ifa(ifp, &ia->ia_ifa); ifafree(&ia->ia_ifa); /* also remove from the IPv6 address chain(itojun&jinmei) */ oia = ia; - if (oia == (ia = in6_ifaddr)) - in6_ifaddr = ia->ia_next; + lck_mtx_lock(nd6_mutex); + if (oia == (ia = in6_ifaddrs)) + in6_ifaddrs = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; @@ -950,27 +978,11 @@ in6_ifdetach(ifp) "list\n", if_name(ifp))); } } + lck_mtx_unlock(nd6_mutex); - IFAFREE(&oia->ia_ifa); - } - -#ifndef __APPLE__ - -/* This is a cause for reentrency, as those multicast addresses are - * freed both from the interface detaching and triggered by the closing of the socket - * Let the socket do the cleanup and not force it from the interface level - */ - /* leave from all multicast groups joined */ - in6_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp); - in6_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp); - for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) { - in6m_next = LIST_NEXT(in6m, in6m_entry); - if (in6m->in6m_ifp != ifp) - continue; - in6_delmulti(in6m); - in6m = NULL; + ifafree(&oia->ia_ifa); } -#endif /* __APPLE__ */ + ifnet_lock_done(ifp); /* * remove neighbor management table. we call it twice just to make @@ -988,20 +1000,22 @@ in6_ifdetach(ifp) sin6.sin6_family = AF_INET6; sin6.sin6_addr = in6addr_linklocal_allnodes; sin6.sin6_addr.s6_addr16[1] = htons(ifp->if_index); - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + lck_mtx_lock(rt_mtx); + rt = rtalloc1_locked((struct sockaddr *)&sin6, 0, 0UL); if (rt && rt->rt_ifp == ifp) { - rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); - rtfree(rt); + rtfree_locked(rt); } + lck_mtx_unlock(rt_mtx); } void -in6_get_tmpifid(ifp, retbuf, baseid, generate) - struct ifnet *ifp; - u_int8_t *retbuf; - const u_int8_t *baseid; - int generate; +in6_get_tmpifid( + struct ifnet *ifp, + u_int8_t *retbuf, + const u_int8_t *baseid, + int generate) { u_int8_t nullbuf[8]; struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; @@ -1022,31 +1036,18 @@ in6_get_tmpifid(ifp, retbuf, baseid, generate) bcopy(ndi->randomid, retbuf, 8); } -void -in6_tmpaddrtimer_funneled(void *ignored_arg) -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - in6_tmpaddrtimer(ignored_arg); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - extern size_t nd_ifinfo_indexlim; extern int ip6_use_tempaddr; void -in6_tmpaddrtimer(ignored_arg) - void *ignored_arg; +in6_tmpaddrtimer( + void *ignored_arg) { int i; struct nd_ifinfo *ndi; u_int8_t nullbuf[8]; int s = splnet(); - timeout(in6_tmpaddrtimer_funneled, (caddr_t)0, + timeout(in6_tmpaddrtimer, (caddr_t)0, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz); @@ -1068,5 +1069,6 @@ in6_tmpaddrtimer(ignored_arg) } } } + splx(s); } diff --git a/bsd/netinet6/in6_ifattach.h b/bsd/netinet6/in6_ifattach.h index 307bd0b8f..f0b7d2d09 100644 --- a/bsd/netinet6/in6_ifattach.h +++ b/bsd/netinet6/in6_ifattach.h @@ -33,16 +33,14 @@ #define _NETINET6_IN6_IFATTACH_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -void in6_nigroup_attach __P((const char *, int)); -void in6_nigroup_detach __P((const char *, int)); -void in6_ifattach __P((struct ifnet *, struct ifnet *, struct in6_aliasreq *)); -void in6_ifdetach __P((struct ifnet *)); -void in6_get_tmpifid __P((struct ifnet *, u_int8_t *, const u_int8_t *, int)); -void in6_tmpaddrtimer __P((void *)); -int in6_nigroup __P((struct ifnet *, const char *, int, struct in6_addr *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#ifdef KERNEL_PRIVATE +void in6_nigroup_attach(const char *, int); +void in6_nigroup_detach(const char *, int); +void in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *); +void in6_ifdetach(struct ifnet *); +void in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int); +void in6_tmpaddrtimer(void *); +int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *); +#endif KERNEL_PRIVATE -#endif /* _NETINET6_IN6_IFATTACH_H_ */ +#endif _NETINET6_IN6_IFATTACH_H_ diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 709196f22..2e82a0376 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -91,6 +112,9 @@ #include #include +#include +#include + #include "faith.h" #if defined(NFAITH) && NFAITH > 0 #include @@ -106,15 +130,16 @@ #include #endif #include +extern lck_mtx_t *sadb_mutex; #endif /* IPSEC */ struct in6_addr zeroin6_addr; int -in6_pcbbind(inp, nam, p) - register struct inpcb *inp; - struct sockaddr *nam; - struct proc *p; +in6_pcbbind( + struct inpcb *inp, + struct sockaddr *nam, + struct proc *p) { struct socket *so = inp->inp_socket; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; @@ -122,25 +147,36 @@ in6_pcbbind(inp, nam, p) u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); - if (!in6_ifaddr) /* XXX broken! */ + if (!in6_ifaddrs) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) return(EINVAL); if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) wild = 1; + socket_unlock(so, 0); /* keep reference */ + lck_rw_lock_exclusive(pcbinfo->mtx); if (nam) { sin6 = (struct sockaddr_in6 *)nam; - if (nam->sa_len != sizeof(*sin6)) + if (nam->sa_len != sizeof(*sin6)) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EINVAL); + } /* * family check. */ - if (nam->sa_family != AF_INET6) + if (nam->sa_family != AF_INET6) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EAFNOSUPPORT); + } /* KAME hack: embed scopeid */ - if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) + if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return EINVAL; + } /* this must be cleared for ifa_ifwithaddr() */ sin6->sin6_scope_id = 0; @@ -159,8 +195,11 @@ in6_pcbbind(inp, nam, p) struct ifaddr *ia = NULL; sin6->sin6_port = 0; /* yech... */ - if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) + if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EADDRNOTAVAIL); + } /* * XXX: bind to an anycast address might accidentally @@ -171,20 +210,24 @@ in6_pcbbind(inp, nam, p) if (ia && ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { + ifafree(ia); + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EADDRNOTAVAIL); } + ifafree(ia); + ia = NULL; } if (lport) { struct inpcb *t; /* GROSS */ if (ntohs(lport) < IPV6PORT_RESERVED && p && -#if 0 - suser(p->p_ucred, &p->p_acflag)) -#else - ((so->so_state & SS_PRIV) == 0)) -#endif + ((so->so_state & SS_PRIV) == 0)) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EACCES); + } if (so->so_uid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { @@ -196,8 +239,11 @@ in6_pcbbind(inp, nam, p) !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || (t->inp_socket->so_options & SO_REUSEPORT) == 0) && - so->so_uid != t->inp_socket->so_uid) + so->so_uid != t->inp_socket->so_uid) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EADDRINUSE); + } if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; @@ -212,14 +258,21 @@ in6_pcbbind(inp, nam, p) (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) + INP_SOCKAF(t->inp_socket))) { + + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EADDRINUSE); + } } } t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, lport, wild); - if (t && (reuseport & t->inp_socket->so_options) == 0) + if (t && (reuseport & t->inp_socket->so_options) == 0) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return(EADDRINUSE); + } if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { struct sockaddr_in sin; @@ -233,25 +286,33 @@ in6_pcbbind(inp, nam, p) (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) + INP_SOCKAF(t->inp_socket))) { + lck_rw_done(pcbinfo->mtx); + socket_lock(so, 0); return (EADDRINUSE); + } } } inp->in6p_laddr = sin6->sin6_addr; } + socket_lock(so, 0); if (lport == 0) { int e; - if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p)) != 0) + if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) { + lck_rw_done(pcbinfo->mtx); return(e); + } } else { inp->inp_lport = lport; - if (in_pcbinshash(inp) != 0) { + if (in_pcbinshash(inp, 1) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; + lck_rw_done(pcbinfo->mtx); return (EAGAIN); } - } + } + lck_rw_done(pcbinfo->mtx); return(0); } @@ -268,12 +329,15 @@ in6_pcbbind(inp, nam, p) */ int -in6_pcbladdr(inp, nam, plocal_addr6) - register struct inpcb *inp; - struct sockaddr *nam; - struct in6_addr **plocal_addr6; +in6_pcbladdr( + struct inpcb *inp, + struct sockaddr *nam, + struct in6_addr *plocal_addr6) { - register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct in6_addr *addr6 = NULL; + struct in6_addr src_storage; + struct ifnet *ifp = NULL; int error = 0; @@ -288,7 +352,7 @@ in6_pcbladdr(inp, nam, plocal_addr6) if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) return EINVAL; - if (in6_ifaddr) { + if (in6_ifaddrs) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. @@ -302,15 +366,16 @@ in6_pcbladdr(inp, nam, plocal_addr6) * with the address specified by setsockopt(IPV6_PKTINFO). * Is it the intended behavior? */ - *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, + addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp->in6p_moptions, &inp->in6p_route, - &inp->in6p_laddr, &error); - if (*plocal_addr6 == 0) { + &inp->in6p_laddr, &src_storage, &error); + if (addr6 == 0) { if (error == 0) error = EADDRNOTAVAIL; return(error); } + *plocal_addr6 = *addr6; /* * Don't do pcblookup call here; return interface in * plocal_addr6 @@ -333,12 +398,13 @@ in6_pcbladdr(inp, nam, plocal_addr6) */ int in6_pcbconnect(inp, nam, p) - register struct inpcb *inp; + struct inpcb *inp; struct sockaddr *nam; struct proc *p; { - struct in6_addr *addr6; - register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct in6_addr addr6; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct inpcb *pcb; int error; /* @@ -347,12 +413,15 @@ in6_pcbconnect(inp, nam, p) */ if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) return(error); - - if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, + socket_unlock(inp->inp_socket, 0); + pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) - ? addr6 : &inp->in6p_laddr, - inp->inp_lport, 0, NULL) != NULL) { + ? &addr6 : &inp->in6p_laddr, + inp->inp_lport, 0, NULL); + socket_lock(inp->inp_socket, 0); + if (pcb != NULL) { + in_pcb_checkstate(pcb, WNT_RELEASE, 0); return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { @@ -361,7 +430,13 @@ in6_pcbconnect(inp, nam, p) if (error) return (error); } - inp->in6p_laddr = *addr6; + inp->in6p_laddr = addr6; + } + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; @@ -372,6 +447,7 @@ in6_pcbconnect(inp, nam, p) (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); in_pcbrehash(inp); + lck_rw_done(inp->inp_pcbinfo->mtx); return (0); } @@ -383,13 +459,14 @@ in6_pcbconnect(inp, nam, p) * an entry to the caller for later use. */ struct in6_addr * -in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) - struct sockaddr_in6 *dstsock; - struct ip6_pktopts *opts; - struct ip6_moptions *mopts; - struct route_in6 *ro; - struct in6_addr *laddr; - int *errorp; +in6_selectsrc( + struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, + struct ip6_moptions *mopts, + struct route_in6 *ro, + struct in6_addr *laddr, + struct in6_addr *src_storage, + int *errorp) { struct in6_addr *dst; struct in6_ifaddr *ia6 = 0; @@ -426,7 +503,9 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); + return(src_storage); } /* @@ -455,7 +534,9 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); + return(src_storage); } /* @@ -480,7 +561,9 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&ia6->ia_addr.sin6_addr); + *src_storage = ia6->ia_addr.sin6_addr; + ifafree(&ia6->ia_ifa); + return(src_storage); } } @@ -495,17 +578,21 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); - rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL); + rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0); if (rt) { ia6 = in6_ifawithscope(rt->rt_ifp, dst); - if (ia6 == 0) + if (ia6 == 0) { + ifaref(&rt->rt_ifa); ia6 = ifatoia6(rt->rt_ifa); + } } if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifaref(&rt->rt_ifa); + return(src_storage); } } @@ -546,14 +633,18 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) if (ro->ro_rt) { ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); - if (ia6 == 0) /* xxx scope error ?*/ + if (ia6 == 0) { /* xxx scope error ?*/ + ifaref(ro->ro_rt->rt_ifa); ia6 = ifatoia6(ro->ro_rt->rt_ifa); + } } if (ia6 == 0) { *errorp = EHOSTUNREACH; /* no route */ return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifaref(&rt->rt_ifa); + return(src_storage); } *errorp = EADDRNOTAVAIL; @@ -568,9 +659,9 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) * 3. The system default hoplimit. */ int -in6_selecthlim(in6p, ifp) - struct in6pcb *in6p; - struct ifnet *ifp; +in6_selecthlim( + struct in6pcb *in6p, + struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) return(in6p->in6p_hops); @@ -585,11 +676,18 @@ void in6_pcbdisconnect(inp) struct inpcb *inp; { + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); inp->inp_fport = 0; /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); + lck_rw_done(inp->inp_pcbinfo->mtx); if (inp->inp_socket->so_state & SS_NOFDREF) in6_pcbdetach(inp); } @@ -602,27 +700,35 @@ in6_pcbdetach(inp) struct inpcbinfo *ipi = inp->inp_pcbinfo; #if IPSEC - if (inp->in6p_sp != NULL) + if (inp->in6p_sp != NULL) { + lck_mtx_lock(sadb_mutex); ipsec6_delete_pcbpolicy(inp); + lck_mtx_unlock(sadb_mutex); + } #endif /* IPSEC */ - inp->inp_gencnt = ++ipi->ipi_gencnt; - in_pcbremlists(inp); - sotoinpcb(so) = 0; - sofree(so); - - if (inp->in6p_options) - m_freem(inp->in6p_options); - ip6_freepcbopts(inp->in6p_outputopts); - ip6_freemoptions(inp->in6p_moptions); - if (inp->in6p_route.ro_rt) - rtfree(inp->in6p_route.ro_rt); - /* Check and free IPv4 related resources in case of mapped addr */ - if (inp->inp_options) - (void)m_free(inp->inp_options); - ip_freemoptions(inp->inp_moptions); - inp->inp_vflag = 0; - zfree(ipi->ipi_zone, inp); + if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) + printf("in6_pcbdetach so=%x can't be marked dead ok\n", so); + + inp->inp_state = INPCB_STATE_DEAD; + + if ((so->so_flags & SOF_PCBCLEARING) == 0) { + inp->inp_vflag = 0; + so->so_flags |= SOF_PCBCLEARING; + inp->inp_gencnt = ++ipi->ipi_gencnt; + if (inp->in6p_options) + m_freem(inp->in6p_options); + ip6_freepcbopts(inp->in6p_outputopts); + ip6_freemoptions(inp->in6p_moptions); + if (inp->in6p_route.ro_rt) + rtfree(inp->in6p_route.ro_rt); + /* Check and free IPv4 related resources in case of mapped addr */ + if (inp->inp_options) + (void)m_free(inp->inp_options); + ip_freemoptions(inp->inp_moptions); + inp->inp_moptions = NULL; + + } } struct sockaddr * @@ -684,20 +790,16 @@ in6_setsockaddr(so, nam) struct socket *so; struct sockaddr **nam; { - int s; - register struct inpcb *inp; + struct inpcb *inp; struct in6_addr addr; in_port_t port; - s = splnet(); inp = sotoinpcb(so); if (!inp) { - splx(s); return EINVAL; } port = inp->inp_lport; addr = inp->in6p_laddr; - splx(s); *nam = in6_sockaddr(port, &addr); return 0; @@ -708,20 +810,16 @@ in6_setpeeraddr(so, nam) struct socket *so; struct sockaddr **nam; { - int s; struct inpcb *inp; struct in6_addr addr; in_port_t port; - s = splnet(); inp = sotoinpcb(so); if (!inp) { - splx(s); return EINVAL; } port = inp->inp_fport; addr = inp->in6p_faddr; - splx(s); *nam = in6_sockaddr(port, &addr); return 0; @@ -777,20 +875,21 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) * Must be called at splnet. */ void -in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify) - struct inpcbhead *head; +in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify) + struct inpcbinfo *pcbinfo; struct sockaddr *dst; const struct sockaddr *src; u_int fport_arg, lport_arg; int cmd; -// struct inpcb *(*notify) __P((struct inpcb *, int)); - void (*notify) __P((struct inpcb *, int)); +// struct inpcb *(*notify)(struct inpcb *, int); + void (*notify)(struct inpcb *, int); { struct inpcb *inp, *ninp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; - int errno, s; + int errno; + struct inpcbhead *head = pcbinfo->listhead; if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6) return; @@ -822,7 +921,7 @@ in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; - s = splnet(); + lck_rw_lock_shared(pcbinfo->mtx); for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { ninp = LIST_NEXT(inp, inp_list); @@ -849,14 +948,21 @@ in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify) (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) || - (fport && inp->inp_fport != fport)) + (fport && inp->inp_fport != fport)) continue; + do_notify: - if (notify) + if (notify) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + socket_lock(inp->inp_socket, 1); (*notify)(inp, errno); + (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); + socket_unlock(inp->inp_socket, 1); + } } - splx(s); + lck_rw_done(pcbinfo->mtx); } /* @@ -869,7 +975,7 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) u_int lport_arg; int wild_okay; { - register struct inpcb *inp; + struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; @@ -947,11 +1053,12 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) return (match); } } - +#ifndef APPLE +/* this is not used in Darwin */ void -in6_pcbpurgeif0(head, ifp) - struct in6pcb *head; - struct ifnet *ifp; +in6_pcbpurgeif0( + struct in6pcb *head, + struct ifnet *ifp) { struct in6pcb *in6p; struct ip6_moptions *im6o; @@ -986,6 +1093,7 @@ in6_pcbpurgeif0(head, ifp) } } } +#endif /* * Check for alternatives when higher level complains @@ -1007,9 +1115,10 @@ in6_losing(in6p) (struct sockaddr *)&in6p->in6p_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); + lck_mtx_lock(rt_mtx); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); if (rt->rt_flags & RTF_DYNAMIC) - (void)rtrequest(RTM_DELETE, rt_key(rt), + (void)rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); else @@ -1017,7 +1126,8 @@ in6_losing(in6p) * A new route can be allocated * the next time output is attempted. */ - rtfree(rt); + rtfree_locked(rt); + lck_mtx_unlock(rt_mtx); } } @@ -1026,9 +1136,9 @@ in6_losing(in6p) * and allocate a (hopefully) better one. */ void -in6_rtchange(inp, errno) - struct inpcb *inp; - int errno; +in6_rtchange( + struct inpcb *inp, + int errno) { if (inp->in6p_route.ro_rt) { rtfree(inp->in6p_route.ro_rt); @@ -1044,15 +1154,17 @@ in6_rtchange(inp, errno) * Lookup PCB in hash list. */ struct inpcb * -in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) - struct inpcbinfo *pcbinfo; - struct in6_addr *faddr, *laddr; - u_int fport_arg, lport_arg; - int wildcard; - struct ifnet *ifp; +in6_pcblookup_hash( + struct inpcbinfo *pcbinfo, + struct in6_addr *faddr, + u_int fport_arg, + struct in6_addr *laddr, + u_int lport_arg, + int wildcard, + struct ifnet *ifp) { struct inpcbhead *head; - register struct inpcb *inp; + struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; int faith; @@ -1062,6 +1174,8 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) faith = 0; #endif + lck_rw_lock_shared(pcbinfo->mtx); + /* * First look for an exact match. */ @@ -1076,9 +1190,16 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) inp->inp_fport == fport && inp->inp_lport == lport) { /* - * Found. - */ - return (inp); + * Found. Check if pcb is still valid + */ + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (inp); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } } } if (wildcard) { @@ -1094,18 +1215,34 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) if (faith && (inp->inp_flags & INP_FAITH) == 0) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - laddr)) - return (inp); + laddr)) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (inp); + } + else { /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->mtx); + return(NULL); + } + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) local_wild = inp; } } - return (local_wild); + if (local_wild && in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->mtx); + return (local_wild); + } + else { + lck_rw_done(pcbinfo->mtx); + return (NULL); + } } /* * Not found. */ + lck_rw_done(pcbinfo->mtx); return (NULL); } diff --git a/bsd/netinet6/in6_pcb.h b/bsd/netinet6/in6_pcb.h index b0ebf339e..2bae22c14 100644 --- a/bsd/netinet6/in6_pcb.h +++ b/bsd/netinet6/in6_pcb.h @@ -67,50 +67,50 @@ #define _NETINET6_IN6_PCB_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -void in6_pcbpurgeif0 __P((struct in6pcb *, struct ifnet *)); -void in6_losing __P((struct inpcb *)); -int in6_pcballoc __P((struct socket *, struct inpcbinfo *, struct proc *)); -int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct proc *)); -int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct proc *)); -void in6_pcbdetach __P((struct inpcb *)); -void in6_pcbdisconnect __P((struct inpcb *)); -int in6_pcbladdr __P((struct inpcb *, struct sockaddr *, - struct in6_addr **)); +#ifndef APPLE +//void in6_pcbpurgeif0(struct in6pcb *, struct ifnet *); +#endif +void in6_losing(struct inpcb *); +int in6_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); +int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); +int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); +void in6_pcbdetach(struct inpcb *); +void in6_pcbdisconnect(struct inpcb *); +int in6_pcbladdr(struct inpcb *, struct sockaddr *, + struct in6_addr *); struct inpcb * - in6_pcblookup_local __P((struct inpcbinfo *, - struct in6_addr *, u_int, int)); + in6_pcblookup_local(struct inpcbinfo *, + struct in6_addr *, u_int, int); struct inpcb * - in6_pcblookup_hash __P((struct inpcbinfo *, + in6_pcblookup_hash(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, - u_int, int, struct ifnet *)); -void in6_pcbnotify __P((struct inpcbhead *, struct sockaddr *, + u_int, int, struct ifnet *); +void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *, u_int, const struct sockaddr *, u_int, int, - void (*)(struct inpcb *, int))); + void (*)(struct inpcb *, int)); void - in6_rtchange __P((struct inpcb *, int)); + in6_rtchange(struct inpcb *, int); struct sockaddr * - in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p)); + in6_sockaddr(in_port_t port, struct in6_addr *addr_p); struct sockaddr * - in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p)); -int in6_setpeeraddr __P((struct socket *so, struct sockaddr **nam)); -int in6_setsockaddr __P((struct socket *so, struct sockaddr **nam)); -int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam)); -int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam)); -struct in6_addr *in6_selectsrc __P((struct sockaddr_in6 *, + in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p); +int in6_setpeeraddr(struct socket *so, struct sockaddr **nam); +int in6_setsockaddr(struct socket *so, struct sockaddr **nam); +int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); +int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); +struct in6_addr *in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, - struct route_in6 *, - struct in6_addr *, int *)); -int in6_selecthlim __P((struct in6pcb *, struct ifnet *)); -int in6_pcbsetport __P((struct in6_addr *, struct inpcb *, struct proc *)); -void init_sin6 __P((struct sockaddr_in6 *sin6, struct mbuf *m)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ + struct route_in6 *, struct in6_addr *, + struct in6_addr *, int *); +int in6_selecthlim(struct in6pcb *, struct ifnet *); +int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct proc *, int); +void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m); +#endif KERNEL_PRIVATE -#endif /* !_NETINET6_IN6_PCB_H_ */ +#endif !_NETINET6_IN6_PCB_H_ diff --git a/bsd/netinet6/in6_prefix.c b/bsd/netinet6/in6_prefix.c index b9096fe2b..88a945059 100644 --- a/bsd/netinet6/in6_prefix.c +++ b/bsd/netinet6/in6_prefix.c @@ -94,13 +94,14 @@ struct rr_prhead rr_prefix; #include -static void add_each_addr __P((struct socket *so, struct rr_prefix *rpp, - struct rp_addr *rap)); -static int create_ra_entry __P((struct rp_addr **rapp)); -static int add_each_prefix __P((struct socket *so, struct rr_prefix *rpp)); -static void free_rp_entries __P((struct rr_prefix *rpp)); -static int link_stray_ia6s __P((struct rr_prefix *rpp)); -static void rp_remove __P((struct rr_prefix *rpp)); +static void add_each_addr(struct socket *so, struct rr_prefix *rpp, + struct rp_addr *rap); +static int create_ra_entry(struct rp_addr **rapp); +static int add_each_prefix(struct socket *so, struct rr_prefix *rpp); +static void free_rp_entries(struct rr_prefix *rpp); +static int link_stray_ia6s(struct rr_prefix *rpp); +static void rp_remove(struct rr_prefix *rpp); +extern lck_mtx_t *prefix6_mutex; /* * Copy bits from src to tgt, from off bit for len bits. @@ -158,6 +159,7 @@ in6_prefixwithifp(struct ifnet *ifp, int plen, struct in6_addr *dst) struct ifprefix *ifpr; /* search matched prefix */ + ifnet_lock_shared(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -167,6 +169,7 @@ in6_prefixwithifp(struct ifnet *ifp, int plen, struct in6_addr *dst) if (plen <= in6_matchlen(dst, IFPR_IN6(ifpr))) break; } + ifnet_lock_done(ifp); return (ifpr); } @@ -192,6 +195,7 @@ search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr) * which matches the addr */ + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) @@ -200,13 +204,17 @@ search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr) in6_matchlen(&ipr->ipr_prefix.sin6_addr, IFA_IN6(ifa))) break; } - if (ifa == NULL) + if (ifa == NULL) { + ifnet_lock_done(ifp); return NULL; + } rpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr); - if (rpp != 0) + if (rpp != 0) { + ifnet_lock_done(ifp); return rpp; - + } + for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -217,6 +225,7 @@ search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr) IFPR_IN6(ifpr))) break; } + ifnet_lock_done(ifp); if (ifpr != NULL) log(LOG_ERR, "in6_prefix.c: search_matched_prefix: addr %s" "has no pointer to prefix %s\n", ip6_sprintf(IFA_IN6(ifa)), @@ -237,6 +246,7 @@ mark_matched_prefixes(u_long cmd, struct ifnet *ifp, struct in6_rrenumreq *irr) int matchlen, matched = 0; /* search matched prefixes */ + ifnet_lock_exclusive(ifp); /* Should if_prefixhead be protected by IPv6?? */ for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -282,6 +292,7 @@ mark_matched_prefixes(u_long cmd, struct ifnet *ifp, struct in6_rrenumreq *irr) "ND autoconfigured addr?\n", ip6_sprintf(IFA_IN6(ifa))); } + ifnet_lock_done(ifp); return matched; } @@ -294,6 +305,7 @@ delmark_global_prefixes(struct ifnet *ifp, struct in6_rrenumreq *irr) struct ifprefix *ifpr; /* search matched prefixes */ + ifnet_lock_exclusive(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -305,6 +317,7 @@ delmark_global_prefixes(struct ifnet *ifp, struct in6_rrenumreq *irr) IPV6_ADDR_SCOPE_GLOBAL) ifpr2rp(ifpr)->rp_statef_delmark = 1; } + ifnet_lock_done(ifp); } /* Unmark prefixes */ @@ -314,6 +327,7 @@ unmark_prefixes(struct ifnet *ifp) struct ifprefix *ifpr; /* unmark all prefix */ + ifnet_lock_exclusive(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -324,22 +338,26 @@ unmark_prefixes(struct ifnet *ifp) ifpr2rp(ifpr)->rp_statef_addmark = 0; ifpr2rp(ifpr)->rp_statef_delmark = 0; } + ifnet_lock_done(ifp); } static void init_prefix_ltimes(struct rr_prefix *rpp) { + struct timeval timenow; + + getmicrotime(&timenow); if (rpp->rp_pltime == RR_INFINITE_LIFETIME || rpp->rp_rrf_decrprefd == 0) rpp->rp_preferred = 0; else - rpp->rp_preferred = time_second + rpp->rp_pltime; + rpp->rp_preferred = timenow.tv_sec + rpp->rp_pltime; if (rpp->rp_vltime == RR_INFINITE_LIFETIME || rpp->rp_rrf_decrvalid == 0) rpp->rp_expire = 0; else - rpp->rp_expire = time_second + rpp->rp_vltime; + rpp->rp_expire = timenow.tv_sec + rpp->rp_vltime; } static int @@ -377,6 +395,7 @@ search_ifidwithprefix(struct rr_prefix *rpp, struct in6_addr *ifid) { struct rp_addr *rap; + lck_mtx_lock(prefix6_mutex); LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry) { if (rr_are_ifid_equal(ifid, &rap->ra_ifid, @@ -384,6 +403,7 @@ search_ifidwithprefix(struct rr_prefix *rpp, struct in6_addr *ifid) rpp->rp_plen)) break; } + lck_mtx_unlock(prefix6_mutex); return rap; } @@ -407,9 +427,9 @@ assign_ra_entry(struct rr_prefix *rpp, int iilen, struct in6_ifaddr *ia) #if 0 /* Can't do this now, because rpp may be on th stack. should fix it? */ ia->ia6_ifpr = rp2ifpr(rpp); #endif - s = splnet(); + lck_mtx_lock(prefix6_mutex); LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - splx(s); + lck_mtx_unlock(prefix6_mutex); return 0; } @@ -424,7 +444,7 @@ in6_prefix_add_llifid(int iilen, struct in6_ifaddr *ia) struct rr_prefix *rpp; struct rp_addr *rap; struct socket so; - int error, s; + int error; if ((error = create_ra_entry(&rap)) != 0) return(error); @@ -435,6 +455,7 @@ in6_prefix_add_llifid(int iilen, struct in6_ifaddr *ia) /* XXX: init dummy so */ bzero(&so, sizeof(so)); /* insert into list */ + lck_mtx_lock(prefix6_mutex); LIST_FOREACH(rpp, &rr_prefix, rp_entry) { /* @@ -443,11 +464,10 @@ in6_prefix_add_llifid(int iilen, struct in6_ifaddr *ia) if (rpp->rp_ifp != ia->ia_ifp) continue; - s = splnet(); LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - splx(s); add_each_addr(&so, rpp, rap); } + lck_mtx_unlock(prefix6_mutex); return 0; } @@ -546,9 +566,9 @@ in6_prefix_remove_ifid(int iilen, struct in6_ifaddr *ia) return; rap = search_ifidwithprefix(ifpr2rp(ia->ia6_ifpr), IA6_IN6(ia)); if (rap != NULL) { - int s = splnet(); + lck_mtx_lock(prefix6_mutex); LIST_REMOVE(rap, ra_entry); - splx(s); + lck_mtx_unlock(prefix6_mutex); if (rap->ra_addr) ifafree(&rap->ra_addr->ia_ifa); FREE(rap, M_RR_ADDR); @@ -559,12 +579,13 @@ in6_prefix_remove_ifid(int iilen, struct in6_ifaddr *ia) } void -in6_purgeprefix(ifp) - struct ifnet *ifp; +in6_purgeprefix( + struct ifnet *ifp) { struct ifprefix *ifpr, *nextifpr; /* delete prefixes before ifnet goes away */ + ifnet_lock_exclusive(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = nextifpr) { @@ -574,6 +595,7 @@ in6_purgeprefix(ifp) continue; (void)delete_each_prefix(ifpr2rp(ifpr), PR_ORIG_KERNEL); } + ifnet_lock_done(ifp); } static void @@ -675,6 +697,7 @@ rrpr_update(struct socket *so, struct rr_prefix *new) int s; /* search existing prefix */ + ifnet_lock_exclusive(new->rp_ifp); for (ifpr = TAILQ_FIRST(&new->rp_ifp->if_prefixhead); ifpr; ifpr = TAILQ_NEXT(ifpr, ifpr_list)) { @@ -695,8 +718,10 @@ rrpr_update(struct socket *so, struct rr_prefix *new) * If the origin of the already-installed prefix is more * preferable than the new one, ignore installation request. */ - if (rpp->rp_origin > new->rp_origin) + if (rpp->rp_origin > new->rp_origin) { + ifnet_lock_done(new->rp_ifp); return(EPERM); + } /* update prefix information */ rpp->rp_flags.prf_ra = new->rp_flags.prf_ra; @@ -712,6 +737,7 @@ rrpr_update(struct socket *so, struct rr_prefix *new) * add rp_addr entries in new into rpp, if they have not * been already included in rpp. */ + lck_mtx_lock(prefix6_mutex); while (!LIST_EMPTY(&new->rp_addrhead)) { rap = LIST_FIRST(&new->rp_addrhead); @@ -723,10 +749,9 @@ rrpr_update(struct socket *so, struct rr_prefix *new) FREE(rap, M_RR_ADDR); continue; } - s = splnet(); LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - splx(s); } + lck_mtx_unlock(prefix6_mutex); } else { /* * We got a fresh prefix. @@ -737,9 +762,11 @@ rrpr_update(struct socket *so, struct rr_prefix *new) if (rpp == NULL) { log(LOG_ERR, "in6_prefix.c: rrpr_update:%d" ": ENOBUFS for rr_prefix\n", __LINE__); + ifnet_lock_done(new->rp_ifp); return(ENOBUFS); } /* initilization */ + lck_mtx_lock(prefix6_mutex); *rpp = *new; LIST_INIT(&rpp->rp_addrhead); /* move rp_addr entries of new to rpp */ @@ -749,6 +776,7 @@ rrpr_update(struct socket *so, struct rr_prefix *new) LIST_REMOVE(rap, ra_entry); LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); } + lck_mtx_unlock(prefix6_mutex); /* let rp_ifpr.ifpr_prefix point rr_prefix. */ rpp->rp_ifpr.ifpr_prefix = (struct sockaddr *)&rpp->rp_prefix; @@ -769,10 +797,11 @@ rrpr_update(struct socket *so, struct rr_prefix *new) rp2ifpr(rpp)->ifpr_type = IN6_PREFIX_RR; } /* link rr_prefix entry to rr_prefix list */ - s = splnet(); + lck_mtx_lock(prefix6_mutex); LIST_INSERT_HEAD(&rr_prefix, rpp, rp_entry); - splx(s); + lck_mtx_unlock(prefix6_mutex); } + ifnet_lock_done(new->rp_ifp); if (!new->rp_raf_auto) return 0; @@ -782,6 +811,7 @@ rrpr_update(struct socket *so, struct rr_prefix *new) * If it existed but not pointing to the prefix yet, * init the prefix pointer. */ + lck_mtx_lock(prefix6_mutex); LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry) { if (rap->ra_addr != NULL) { @@ -791,6 +821,7 @@ rrpr_update(struct socket *so, struct rr_prefix *new) } add_each_addr(so, rpp, rap); } + lck_mtx_unlock(prefix6_mutex); return 0; } @@ -804,14 +835,14 @@ add_each_prefix(struct socket *so, struct rr_prefix *rpp) static void rp_remove(struct rr_prefix *rpp) { - int s; - s = splnet(); /* unlink rp_entry from if_prefixlist */ + lck_mtx_lock(prefix6_mutex); { struct ifnet *ifp = rpp->rp_ifp; struct ifprefix *ifpr; + ifnet_lock_exclusive(ifp); if ((ifpr = TAILQ_FIRST(&ifp->if_prefixhead)) == rp2ifpr(rpp)) TAILQ_FIRST(&ifp->if_prefixhead) = TAILQ_NEXT(ifpr, ifpr_list); @@ -825,10 +856,11 @@ rp_remove(struct rr_prefix *rpp) else printf("Couldn't unlink rr_prefix from ifp\n"); } + ifnet_lock_done(ifp); } /* unlink rp_entry from rr_prefix list */ LIST_REMOVE(rpp, rp_entry); - splx(s); + lck_mtx_unlock(prefix6_mutex); FREE(rpp, M_IP6RR); } @@ -871,6 +903,7 @@ init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr, irr->irr_u_uselen, min(ifpr->ifpr_plen - irr->irr_u_uselen, irr->irr_u_keeplen)); + lck_mtx_lock(prefix6_mutex); LIST_FOREACH(orap, &(ifpr2rp(ifpr)->rp_addrhead), ra_entry) { struct rp_addr *rap; @@ -893,6 +926,7 @@ init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr, /* Is some FlagMasks for rrf necessary? */ rpp->rp_rrf = irr->irr_rrf; rpp->rp_origin = irr->irr_origin; + lck_mtx_unlock(prefix6_mutex); return 0; } @@ -904,6 +938,7 @@ free_rp_entries(struct rr_prefix *rpp) * This func is only called with rpp on stack(not on list). * So no splnet() here */ + lck_mtx_lock(prefix6_mutex); while (!LIST_EMPTY(&rpp->rp_addrhead)) { struct rp_addr *rap; @@ -914,6 +949,7 @@ free_rp_entries(struct rr_prefix *rpp) ifafree(&rap->ra_addr->ia_ifa); FREE(rap, M_RR_ADDR); } + lck_mtx_unlock(prefix6_mutex); } static int @@ -925,6 +961,7 @@ add_useprefixes(struct socket *so, struct ifnet *ifp, int error = 0; /* add prefixes to each of marked prefix */ + ifnet_lock_exclusive(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = nextifpr) { nextifpr = TAILQ_NEXT(ifpr, ifpr_list); @@ -937,6 +974,7 @@ add_useprefixes(struct socket *so, struct ifnet *ifp, error = add_each_prefix(so, &rp); } } + ifnet_lock_done(ifp); /* free each rp_addr entry */ free_rp_entries(&rp); @@ -947,14 +985,20 @@ static void unprefer_prefix(struct rr_prefix *rpp) { struct rp_addr *rap; + struct timeval timenow; + getmicrotime(&timenow); + + lck_mtx_lock(prefix6_mutex); for (rap = rpp->rp_addrhead.lh_first; rap != NULL; rap = rap->ra_entry.le_next) { if (rap->ra_addr == NULL) continue; - rap->ra_addr->ia6_lifetime.ia6t_preferred = time_second; + rap->ra_addr->ia6_lifetime.ia6t_preferred = timenow.tv_sec; rap->ra_addr->ia6_lifetime.ia6t_pltime = 0; } + lck_mtx_unlock(prefix6_mutex); + } int @@ -965,29 +1009,28 @@ delete_each_prefix(struct rr_prefix *rpp, u_char origin) if (rpp->rp_origin > origin) return(EPERM); + lck_mtx_lock(prefix6_mutex); while (rpp->rp_addrhead.lh_first != NULL) { struct rp_addr *rap; int s; - s = splnet(); rap = LIST_FIRST(&rpp->rp_addrhead); if (rap == NULL) { - splx(s); break; } LIST_REMOVE(rap, ra_entry); - splx(s); if (rap->ra_addr == NULL) { FREE(rap, M_RR_ADDR); continue; } rap->ra_addr->ia6_ifpr = NULL; - in6_purgeaddr(&rap->ra_addr->ia_ifa); + in6_purgeaddr(&rap->ra_addr->ia_ifa, 0); ifafree(&rap->ra_addr->ia_ifa); FREE(rap, M_RR_ADDR); } rp_remove(rpp); + lck_mtx_unlock(prefix6_mutex); return error; } @@ -998,6 +1041,7 @@ delete_prefixes(struct ifnet *ifp, u_char origin) struct ifprefix *ifpr, *nextifpr; /* delete prefixes marked as tobe deleted */ + ifnet_lock_exclusive(ifp); for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = nextifpr) { nextifpr = TAILQ_NEXT(ifpr, ifpr_list); @@ -1007,6 +1051,7 @@ delete_prefixes(struct ifnet *ifp, u_char origin) if (ifpr2rp(ifpr)->rp_statef_delmark) (void)delete_each_prefix(ifpr2rp(ifpr), origin); } + ifnet_lock_done(ifp); } static int @@ -1060,7 +1105,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, int error = 0; /* - * Failsafe for errneous address config program. + * Failsafe for erroneous address config program. * Let's hope rrenumd don't make a mistakes. */ if (ipr->ipr_origin <= PR_ORIG_RA) @@ -1132,6 +1177,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, free_rp_entries(&rp_tmp); break; } + ifnet_lock_exclusive(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) @@ -1155,8 +1201,11 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, rp_tmp.rp_plen, (sizeof(rap->ra_ifid) << 3) - rp_tmp.rp_plen); /* insert into list */ + lck_mtx_lock(prefix6_mutex); LIST_INSERT_HEAD(&rp_tmp.rp_addrhead, rap, ra_entry); + lck_mtx_unlock(prefix6_mutex); } + ifnet_lock_done(ifp); error = add_each_prefix(so, &rp_tmp); @@ -1169,39 +1218,28 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, if (rpp == NULL || ifp != rpp->rp_ifp) return (EADDRNOTAVAIL); + ifnet_lock_exclusive(ifp); error = delete_each_prefix(rpp, ipr->ipr_origin); + ifnet_lock_done(ifp); break; } bad: return error; } -void -in6_rr_timer_funneled(void *ignored_arg) -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - in6_rr_timer(ignored_arg); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - void in6_rr_timer(void *ignored_arg) { - int s; struct rr_prefix *rpp; + struct timeval timenow; - timeout(in6_rr_timer_funneled, (caddr_t)0, ip6_rr_prune * hz); + getmicrotime(&timenow); - s = splnet(); /* expire */ + lck_mtx_lock(prefix6_mutex); rpp = LIST_FIRST(&rr_prefix); while (rpp) { - if (rpp->rp_expire && rpp->rp_expire < time_second) { + if (rpp->rp_expire && rpp->rp_expire < timenow.tv_sec) { struct rr_prefix *next_rpp; next_rpp = LIST_NEXT(rpp, rp_entry); @@ -1209,9 +1247,10 @@ in6_rr_timer(void *ignored_arg) rpp = next_rpp; continue; } - if (rpp->rp_preferred && rpp->rp_preferred < time_second) + if (rpp->rp_preferred && rpp->rp_preferred < timenow.tv_sec) unprefer_prefix(rpp); rpp = LIST_NEXT(rpp, rp_entry); } - splx(s); + lck_mtx_unlock(prefix6_mutex); + timeout(in6_rr_timer, (caddr_t)0, ip6_rr_prune * hz); } diff --git a/bsd/netinet6/in6_prefix.h b/bsd/netinet6/in6_prefix.h index 29a04cff3..d235a069b 100644 --- a/bsd/netinet6/in6_prefix.h +++ b/bsd/netinet6/in6_prefix.h @@ -30,7 +30,7 @@ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct rr_prefix { struct ifprefix rp_ifpr; LIST_ENTRY(rr_prefix) rp_entry; @@ -85,8 +85,7 @@ LIST_HEAD(rr_prhead, rr_prefix); extern struct rr_prhead rr_prefix; -void in6_rr_timer __P((void *)); -void in6_rr_timer_funneled __P((void *)); -int delete_each_prefix __P((struct rr_prefix *rpp, u_char origin)); +void in6_rr_timer(void *); +int delete_each_prefix (struct rr_prefix *rpp, u_char origin); -#endif /* __APPLE_API_PRIVATE */ +#endif KERNEL_PRIVATE diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index d33c161ce..0f5281bfb 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -102,6 +102,8 @@ #include #include +#include + #if IPSEC #include #if INET6 @@ -133,12 +135,13 @@ extern struct domain inet6domain; static struct pr_usrreqs nousrreqs; +lck_mtx_t *inet6_domain_mutex; #define PR_LISTEN 0 #define PR_ABRTACPTDIS 0 extern struct domain inet6domain; -extern int in6_inithead __P((void **, int)); +extern int in6_inithead(void **, int); void in6_dinit(void); static int rip6_pr_output(struct mbuf *m, struct socket *so, struct sockaddr_in6 *, struct mbuf *); @@ -148,15 +151,21 @@ struct ip6protosw inet6sw[] = { 0, 0, 0, 0, 0, ip6_init, 0, frag6_slowtimo, frag6_drain, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 + }, -{ SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, +{ SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK, udp6_input, 0, udp6_ctlinput, ip6_ctloutput, 0, 0, 0, 0, 0, - 0, &udp6_usrreqs + 0, + &udp6_usrreqs, + udp_lock, udp_unlock, udp_getlock + }, -{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN, +{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_PROTOLOCK|PR_PCBLOCK|PR_DISPOSE, tcp6_input, 0, tcp6_ctlinput, tcp_ctloutput, 0, #if INET /* don't call initialization and timeout routines twice */ @@ -164,60 +173,79 @@ struct ip6protosw inet6sw[] = { #else tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, #endif - 0, &tcp6_usrreqs, + 0, + &tcp6_usrreqs, + tcp_lock, tcp_unlock, tcp_getlock + }, { SOCK_RAW, &inet6domain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, rip6_input, rip6_pr_output, rip6_ctlinput, rip6_ctloutput, 0, 0, 0, 0, 0, - 0, &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, { SOCK_RAW, &inet6domain, IPPROTO_ICMPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, icmp6_input, rip6_pr_output, rip6_ctlinput, rip6_ctloutput, 0, icmp6_init, icmp6_fasttimo, 0, 0, - 0, &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, { SOCK_RAW, &inet6domain, IPPROTO_DSTOPTS,PR_ATOMIC|PR_ADDR, dest6_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, { SOCK_RAW, &inet6domain, IPPROTO_ROUTING,PR_ATOMIC|PR_ADDR, route6_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, { SOCK_RAW, &inet6domain, IPPROTO_FRAGMENT,PR_ATOMIC|PR_ADDR, frag6_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, #if IPSEC -{ SOCK_RAW, &inet6domain, IPPROTO_AH, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inet6domain, IPPROTO_AH, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, ah6_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, #if IPSEC_ESP -{ SOCK_RAW, &inet6domain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inet6domain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, esp6_input, 0, esp6_ctlinput, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, #endif -{ SOCK_RAW, &inet6domain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inet6domain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, ipcomp6_input, 0, 0, 0, 0, 0, 0, 0, 0, - 0, &nousrreqs + 0, + &nousrreqs, + 0, 0, 0 }, #endif /* IPSEC */ #if INET @@ -225,27 +253,35 @@ struct ip6protosw inet6sw[] = { encap6_input, rip6_pr_output, 0, rip6_ctloutput, 0, encap_init, 0, 0, 0, - 0, &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, #endif /*INET*/ { SOCK_RAW, &inet6domain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, encap6_input, rip6_pr_output, 0, rip6_ctloutput, 0, encap_init, 0, 0, 0, - 0, &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, { SOCK_RAW, &inet6domain, IPPROTO_PIM, PR_ATOMIC|PR_ADDR|PR_LASTHDR, pim6_input, rip6_pr_output, 0, rip6_ctloutput, 0, - 0, 0, 0, 0, - 0, &rip6_usrreqs + 0, 0, 0, 0, + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, /* raw wildcard */ -{ SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR, +{ SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR|PR_LASTHDR, rip6_input, rip6_pr_output, 0, rip6_ctloutput, 0, 0, 0, 0, 0, - 0, &rip6_usrreqs + 0, + &rip6_usrreqs, + 0, rip_unlock, 0 }, }; @@ -276,6 +312,7 @@ in6_dinit() for (i=0, pr = &inet6sw[0]; idom_mtx; inet6domain_initted = 1; } } @@ -307,6 +344,7 @@ int ip6_defhlim = IPV6_DEFHLIM; int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS; int ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */ int ip6_maxfragpackets; /* initialized in frag6.c:frag6_init() */ +int ip6_maxfrags; int ip6_log_interval = 5; int ip6_hdrnestlimit = 50; /* appropriate? */ int ip6_dad_count = 1; /* DupAddrDetectionTransmits */ @@ -415,6 +453,8 @@ SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD, &ip6stat, ip6stat, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, CTLFLAG_RW, &ip6_maxfragpackets, 0, ""); +SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, + maxfrags, CTLFLAG_RW, &ip6_maxfrags, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_RW, &ip6_accept_rtadv, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, @@ -451,6 +491,9 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal, 0, ""); SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD, &rip6stat, rip6stat, ""); +SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD, + &mrt6stat, mrt6stat, ""); + /* net.inet6.icmp6 */ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, diff --git a/bsd/netinet6/in6_rmx.c b/bsd/netinet6/in6_rmx.c index 805f2a9e0..1aa407220 100644 --- a/bsd/netinet6/in6_rmx.c +++ b/bsd/netinet6/in6_rmx.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -99,9 +100,10 @@ #include #include -extern int in6_inithead __P((void **head, int off)); -static void in6_rtqtimo __P((void *rock)); -static void in6_mtutimo __P((void *rock)); +extern int in6_inithead(void **head, int off); +static void in6_rtqtimo(void *rock); +static void in6_mtutimo(void *rock); +extern lck_mtx_t *rt_mtx; #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ @@ -160,21 +162,21 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * Find out if it is because of an * ARP entry and delete it if so. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, + rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && rt2->rt_gateway && rt2->rt_gateway->sa_family == AF_LINK) { - rtrequest(RTM_DELETE, + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); } - rtfree(rt2); + rtfree_locked(rt2); } } else if (ret == NULL && rt->rt_flags & RTF_CLONING) { struct rtentry *rt2; @@ -190,7 +192,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * net route entry, 3ffe:0501:: -> if0. * This case should not raise an error. */ - rt2 = rtalloc1((struct sockaddr *)sin6, 0, + rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) @@ -200,7 +202,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, && rt2->rt_ifp == rt->rt_ifp) { ret = rt2->rt_nodes; } - rtfree(rt2); + rtfree_locked(rt2); } } return ret; @@ -252,6 +254,8 @@ static void in6_clsroute(struct radix_node *rn, struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + struct timeval timenow; + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ @@ -268,11 +272,12 @@ in6_clsroute(struct radix_node *rn, struct radix_node_head *head) * If rtq_reallyold is 0, just delete the route without * waiting for a timeout cycle to kill it. */ + getmicrotime(&timenow); if (rtq_reallyold != 0) { rt->rt_flags |= RTPRF_OURS; - rt->rt_rmx.rmx_expire = time_second + rtq_reallyold; + rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } else { - rtrequest(RTM_DELETE, + rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); @@ -300,15 +305,19 @@ in6_rtqkill(struct radix_node *rn, void *rock) struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; + struct timeval timenow; + + getmicrotime(&timenow); + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); if (rt->rt_flags & RTPRF_OURS) { ap->found++; - if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) { + if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest(RTM_DELETE, + err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); @@ -319,9 +328,9 @@ in6_rtqkill(struct radix_node *rn, void *rock) } } else { if (ap->updating - && (rt->rt_rmx.rmx_expire - time_second + && (rt->rt_rmx.rmx_expire - timenow.tv_sec > rtq_reallyold)) { - rt->rt_rmx.rmx_expire = time_second + rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; } ap->nextstop = lmin(ap->nextstop, @@ -335,19 +344,6 @@ in6_rtqkill(struct radix_node *rn, void *rock) #define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ static int rtq_timeout = RTQ_TIMEOUT; -static void -in6_rtqtimo_funneled(void *rock) -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - in6_rtqtimo(rock); -#endif -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - static void in6_rtqtimo(void *rock) { @@ -355,15 +351,16 @@ in6_rtqtimo(void *rock) struct rtqk_arg arg; struct timeval atv; static time_t last_adjusted_timeout = 0; - int s; + struct timeval timenow; + + getmicrotime(&timenow); arg.found = arg.killed = 0; arg.rnh = rnh; - arg.nextstop = time_second + rtq_timeout; + arg.nextstop = timenow.tv_sec + rtq_timeout; arg.draining = arg.updating = 0; - s = splnet(); + lck_mtx_lock(rt_mtx); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - splx(s); /* * Attempt to be somewhat dynamic about this: @@ -374,28 +371,27 @@ in6_rtqtimo(void *rock) * hard. */ if ((arg.found - arg.killed > rtq_toomany) - && (time_second - last_adjusted_timeout >= rtq_timeout) + && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout) && rtq_reallyold > rtq_minreallyold) { rtq_reallyold = 2*rtq_reallyold / 3; if (rtq_reallyold < rtq_minreallyold) { rtq_reallyold = rtq_minreallyold; } - last_adjusted_timeout = time_second; + last_adjusted_timeout = timenow.tv_sec; #if DIAGNOSTIC log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d", rtq_reallyold); #endif arg.found = arg.killed = 0; arg.updating = 1; - s = splnet(); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - splx(s); } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - time_second; - timeout(in6_rtqtimo_funneled, rock, tvtohz(&atv)); + atv.tv_sec = arg.nextstop - timenow.tv_sec; + lck_mtx_unlock(rt_mtx); + timeout(in6_rtqtimo, rock, tvtohz(&atv)); } /* @@ -411,13 +407,16 @@ in6_mtuexpire(struct radix_node *rn, void *rock) { struct rtentry *rt = (struct rtentry *)rn; struct mtuex_arg *ap = rock; + struct timeval timenow; + + getmicrotime(&timenow); /* sanity */ if (!rt) panic("rt == NULL in in6_mtuexpire"); if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { - if (rt->rt_rmx.rmx_expire <= time_second) { + if (rt->rt_rmx.rmx_expire <= timenow.tv_sec) { rt->rt_flags |= RTF_PROBEMTU; } else { ap->nextstop = lmin(ap->nextstop, @@ -430,43 +429,32 @@ in6_mtuexpire(struct radix_node *rn, void *rock) #define MTUTIMO_DEFAULT (60*1) -static void -in6_mtutimo_funneled(void *rock) -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); - in6_mtutimo(rock); -#endif -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - static void in6_mtutimo(void *rock) { struct radix_node_head *rnh = rock; struct mtuex_arg arg; struct timeval atv; - int s; + struct timeval timenow; + + getmicrotime(&timenow); arg.rnh = rnh; - arg.nextstop = time_second + MTUTIMO_DEFAULT; - s = splnet(); + arg.nextstop = timenow.tv_sec + MTUTIMO_DEFAULT; + lck_mtx_lock(rt_mtx); rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); - splx(s); atv.tv_usec = 0; atv.tv_sec = arg.nextstop; - if (atv.tv_sec < time_second) { + if (atv.tv_sec < timenow.tv_sec) { #if DIAGNOSTIC log(LOG_DEBUG, "IPv6: invalid mtu expiration time on routing table\n"); #endif - arg.nextstop = time_second + 30; /*last resort*/ + arg.nextstop = timenow.tv_sec + 30; /*last resort*/ } - atv.tv_sec -= time_second; - timeout(in6_mtutimo_funneled, rock, tvtohz(&atv)); + atv.tv_sec -= timenow.tv_sec; + lck_mtx_unlock(rt_mtx); + timeout(in6_mtutimo, rock, tvtohz(&atv)); } #if 0 diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 3f9c8e4ff..dd0b5c0b2 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,7 @@ #include #include "loop.h" +extern lck_mtx_t *rt_mtx; /* * Return an IPv6 address, which is the most appropriate for a given @@ -105,13 +107,14 @@ * an entry to the caller for later use. */ struct in6_addr * -in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) - struct sockaddr_in6 *dstsock; - struct ip6_pktopts *opts; - struct ip6_moptions *mopts; - struct route_in6 *ro; - struct in6_addr *laddr; - int *errorp; +in6_selectsrc( + struct sockaddr_in6 *dstsock, + struct ip6_pktopts *opts, + struct ip6_moptions *mopts, + struct route_in6 *ro, + struct in6_addr *laddr, + struct in6_addr *src_storage, + int *errorp) { struct in6_addr *dst; struct in6_ifaddr *ia6 = 0; @@ -148,7 +151,8 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + return src_storage; } /* @@ -177,7 +181,8 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + return src_storage; } /* @@ -202,7 +207,8 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + return src_storage; } } @@ -217,7 +223,7 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); - rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL); + rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0); if (rt) { ia6 = in6_ifawithscope(rt->rt_ifp, dst); if (ia6 == 0) @@ -227,7 +233,8 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) *errorp = EADDRNOTAVAIL; return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + return src_storage; } } @@ -236,16 +243,17 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) * our src addr is taken from the i/f, else punt. */ if (ro) { + lck_mtx_lock(rt_mtx); if (ro->ro_rt && (!(ro->ro_rt->rt_flags & RTF_UP) || satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { - rtfree(ro->ro_rt); + rtfree_locked(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == (struct ifnet *)0) { + ro->ro_rt->rt_ifp == 0) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ @@ -256,12 +264,13 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) sa6->sin6_addr = *dst; sa6->sin6_scope_id = dstsock->sin6_scope_id; if (IN6_IS_ADDR_MULTICAST(dst)) { - ro->ro_rt = rtalloc1(&((struct route *)ro) + ro->ro_rt = rtalloc1_locked(&((struct route *)ro) ->ro_dst, 0, 0UL); } else { - rtalloc((struct route *)ro); + rtalloc_ign_locked((struct route *)ro, 0UL); } } + lck_mtx_unlock(rt_mtx); /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using @@ -272,8 +281,14 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) if (ro->ro_rt) { ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); - if (ia6 == 0) /* xxx scope error ?*/ + if (ia6 == 0) { ia6 = ifatoia6(ro->ro_rt->rt_ifa); + if (ia6) + ifaref(&ia6->ia_ifa); + } + else { + ifaref(&ia6->ia_ifa); + } } #if 0 /* @@ -291,14 +306,17 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) ia6 = ifatoia6(ifa_ifwithnet(sin6tosa(&sin6))); if (ia6 == 0) return(0); - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + return src_storage; } #endif /* 0 */ if (ia6 == 0) { *errorp = EHOSTUNREACH; /* no route */ return(0); } - return(&satosin6(&ia6->ia_addr)->sin6_addr); + *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); + return src_storage; } *errorp = EADDRNOTAVAIL; @@ -313,9 +331,9 @@ in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) * 3. The system default hoplimit. */ int -in6_selecthlim(in6p, ifp) - struct in6pcb *in6p; - struct ifnet *ifp; +in6_selecthlim( + struct in6pcb *in6p, + struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) return(in6p->in6p_hops); @@ -330,15 +348,23 @@ in6_selecthlim(in6p, ifp) * share this function by all *bsd*... */ int -in6_pcbsetport(laddr, inp, p) +in6_pcbsetport(laddr, inp, p, locked) struct in6_addr *laddr; struct inpcb *inp; struct proc *p; + int locked; { struct socket *so = inp->inp_socket; u_int16_t lport = 0, first, last, *lastport; int count, error = 0, wild = 0; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; + if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */ + if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } + } /* XXX: this is redundant when called from in6_pcbbind */ if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) @@ -351,13 +377,11 @@ in6_pcbsetport(laddr, inp, p) last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { -#ifdef __APPLE__ - if (p && (error = suser(p->p_ucred, &p->p_acflag))) -#else - if (p && (error = suser(p))) -#endif - + if (p && (error = proc_suser(p))) { + if (!locked) + lck_rw_done(pcbinfo->mtx); return error; + } first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ lastport = &pcbinfo->lastlow; @@ -386,6 +410,8 @@ in6_pcbsetport(laddr, inp, p) * occurred above. */ inp->in6p_laddr = in6addr_any; + if (!locked) + lck_rw_done(pcbinfo->mtx); return (EAGAIN); } --*lastport; @@ -407,6 +433,8 @@ in6_pcbsetport(laddr, inp, p) * occurred above. */ inp->in6p_laddr = in6addr_any; + if (!locked) + lck_rw_done(pcbinfo->mtx); return (EAGAIN); } ++*lastport; @@ -418,12 +446,16 @@ in6_pcbsetport(laddr, inp, p) } inp->inp_lport = lport; - if (in_pcbinshash(inp) != 0) { + if (in_pcbinshash(inp, 1) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; + if (!locked) + lck_rw_done(pcbinfo->mtx); return (EAGAIN); } + if (!locked) + lck_rw_done(pcbinfo->mtx); return(0); } @@ -443,17 +475,17 @@ in6_pcbsetport(laddr, inp, p) * we may want to change the function to return something other than ifp. */ int -in6_embedscope(in6, sin6, in6p, ifpp) - struct in6_addr *in6; - const struct sockaddr_in6 *sin6; +in6_embedscope( + struct in6_addr *in6, + const struct sockaddr_in6 *sin6, #ifdef HAVE_NRL_INPCB - struct inpcb *in6p; + struct inpcb *in6p, #define in6p_outputopts inp_outputopts6 #define in6p_moptions inp_moptions6 #else - struct in6pcb *in6p; + struct in6pcb *in6p, #endif - struct ifnet **ifpp; + struct ifnet **ifpp) { struct ifnet *ifp = NULL; u_int32_t scopeid; @@ -518,10 +550,10 @@ in6_embedscope(in6, sin6, in6p, ifpp) * embedded scopeid thing. */ int -in6_recoverscope(sin6, in6, ifp) - struct sockaddr_in6 *sin6; - const struct in6_addr *in6; - struct ifnet *ifp; +in6_recoverscope( + struct sockaddr_in6 *sin6, + const struct in6_addr *in6, + struct ifnet *ifp) { u_int32_t scopeid; diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index 071ad9226..787393d33 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -95,7 +95,7 @@ struct in6_addrlifetime { u_int32_t ia6t_pltime; /* prefix lifetime */ }; -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp @@ -115,12 +115,11 @@ struct in6_ifaddr { * (for autoconfigured addresses only) */ }; -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ /* * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12). */ -#ifdef __APPLE_API_UNSTABLE struct in6_ifstat { u_quad_t ifs6_in_receive; /* # of total input datagram */ u_quad_t ifs6_in_hdrerr; /* # of datagrams with invalid hdr */ @@ -236,7 +235,6 @@ struct icmp6_ifstat { /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */ u_quad_t ifs6_out_mlddone; }; -#endif /* __APPLE_API_UNSTABLE */ struct in6_ifreq { char ifr_name[IFNAMSIZ]; @@ -345,7 +343,7 @@ struct in6_rrenumreq { #define irr_rrf_decrvalid irr_flags.prf_rr.decrvalid #define irr_rrf_decrprefd irr_flags.prf_rr.decrprefd -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * Given a pointer to an in6_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in6 @@ -359,9 +357,8 @@ struct in6_rrenumreq { #define IFA_DSTIN6(x) (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr) #define IFPR_IN6(x) (&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr) +#endif KERNEL_PRIVATE - -#ifdef __APPLE__ /* * Event data, internet6 style. */ @@ -391,34 +388,26 @@ struct kev_in6_data { #define KEV_INET6_NEW_RTADV_ADDR 5 /* Autoconf router advertised address has appeared */ #define KEV_INET6_DEFROUTER 6 /* Default router dectected by kernel */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE /* Utility function used inside netinet6 kernel code for generating events */ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); -#endif -#endif /* __APPLE__ */ -#endif /* __APPLE_API_PRIVATE */ +#endif KERNEL_PRIVATE -#ifdef KERNEL #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) -#endif #define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) #define SIOCGIFADDR_IN6 _IOWR('i', 33, struct in6_ifreq) -#ifdef KERNEL -#ifdef __APPLE_API_OBSOLETE /* * SIOCSxxx ioctls should be unused (see comments in in6.c), but * we do not shift numbers for binary compatibility. */ #define SIOCSIFDSTADDR_IN6 _IOW('i', 14, struct in6_ifreq) #define SIOCSIFNETMASK_IN6 _IOW('i', 22, struct in6_ifreq) -#endif /* __APPLE_API_OBSOLETE */ -#endif #define SIOCGIFDSTADDR_IN6 _IOWR('i', 34, struct in6_ifreq) #define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) @@ -467,7 +456,6 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #define SIOCGETMIFCNT_IN6 _IOWR('u', 107, \ struct sioc_mif_req6) /* get pkt cnt per if */ -#ifdef KERNEL_PRIVATE /* * temporary control calls to attach/detach IP to/from an ethernet interface */ @@ -478,8 +466,6 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #define SIOCLL_STOP _IOWR('i', 131, struct in6_ifreq) /* deconfigure linklocal from interface */ #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */ #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */ -#endif KERNEL_PRIVATE - #define IN6_IFF_ANYCAST 0x01 /* anycast address */ #define IN6_IFF_TENTATIVE 0x02 /* tentative address */ @@ -503,9 +489,8 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b)) #endif -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern struct in6_ifaddr *in6_ifaddr; +#ifdef KERNEL_PRIVATE +extern struct in6_ifaddr *in6_ifaddrs; extern struct in6_ifstat **in6_ifstat; extern size_t in6_ifstatmax; @@ -514,10 +499,11 @@ extern struct icmp6_ifstat **icmp6_ifstat; extern size_t icmp6_ifstatmax; #define in6_ifstat_inc(ifp, tag) \ do { \ - if ((ifp) && (ifp)->if_index <= if_index \ - && (ifp)->if_index < in6_ifstatmax \ - && in6_ifstat && in6_ifstat[(ifp)->if_index]) { \ - in6_ifstat[(ifp)->if_index]->tag++; \ + int _z_index = ifp ? ifp->if_index : 0; \ + if ((_z_index) && _z_index <= if_index \ + && _z_index < in6_ifstatmax \ + && in6_ifstat && in6_ifstat[_z_index]) { \ + in6_ifstat[_z_index]->tag++; \ } \ } while (0) @@ -527,7 +513,7 @@ extern u_char inet6ctlerrmap[]; extern unsigned long in6_maxmtu; #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPMADDR); -#endif +#endif MALLOC_DECLARE /* * Macro for finding the internet address structure (in6_ifaddr) corresponding @@ -548,10 +534,6 @@ do { \ (ia) = (struct in6_ifaddr *)ifa; \ } while (0) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ - -#ifdef __APPLE_API_PRIVATE /* * Multi-cast membership entry. One for each group/ifp that a PCB * belongs to. @@ -570,10 +552,7 @@ struct in6_multi { u_int in6m_state; /* state of the membership */ u_int in6m_timer; /* MLD6 listener report timer */ }; -#endif /* __APPLE_API_PRIVATE */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern LIST_HEAD(in6_multihead, in6_multi) in6_multihead; /* @@ -631,8 +610,8 @@ do { \ } while(0) struct in6_multi *in6_addmulti __P((struct in6_addr *, struct ifnet *, - int *)); -void in6_delmulti __P((struct in6_multi *)); + int *, int)); +void in6_delmulti __P((struct in6_multi *, int)); extern int in6_ifindex2scopeid __P((int)); extern int in6_mask2len __P((struct in6_addr *, u_char *)); extern void in6_len2mask __P((struct in6_addr *, int)); @@ -640,7 +619,7 @@ int in6_control __P((struct socket *, u_long, caddr_t, struct ifnet *, struct proc *)); int in6_update_ifa __P((struct ifnet *, struct in6_aliasreq *, struct in6_ifaddr *)); -void in6_purgeaddr __P((struct ifaddr *)); +void in6_purgeaddr __P((struct ifaddr *, int)); int in6if_do_dad __P((struct ifnet *)); void in6_purgeif __P((struct ifnet *)); void in6_savemkludge __P((struct in6_ifaddr *)); @@ -669,7 +648,6 @@ int in6_embedscope __P((struct in6_addr *, const struct sockaddr_in6 *, int in6_recoverscope __P((struct sockaddr_in6 *, const struct in6_addr *, struct ifnet *)); void in6_clearscope __P((struct in6_addr *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE -#endif /* _NETINET6_IN6_VAR_H_ */ +#endif _NETINET6_IN6_VAR_H_ diff --git a/bsd/netinet6/ip6_ecn.h b/bsd/netinet6/ip6_ecn.h index 27104fcb8..6e926018a 100644 --- a/bsd/netinet6/ip6_ecn.h +++ b/bsd/netinet6/ip6_ecn.h @@ -36,9 +36,7 @@ */ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern void ip6_ecn_ingress __P((int, u_int32_t *, const u_int32_t *)); -extern void ip6_ecn_egress __P((int, const u_int32_t *, u_int32_t *)); -#endif /* __APPLE_API_PRIVATE */ -#endif +#ifdef KERNEL_PRIVATE +extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); +extern void ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); +#endif KERNEL_PRIVATE diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c index d87abb122..d857cc307 100644 --- a/bsd/netinet6/ip6_forward.c +++ b/bsd/netinet6/ip6_forward.c @@ -66,6 +66,8 @@ #endif #include extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; +extern lck_mtx_t *ip6_mutex; #endif /* IPSEC */ #include @@ -88,9 +90,10 @@ struct route_in6 ip6_forward_rt; */ void -ip6_forward(m, srcrt) +ip6_forward(m, srcrt, locked) struct mbuf *m; int srcrt; + int locked; { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst; @@ -101,6 +104,10 @@ ip6_forward(m, srcrt) #if IPSEC struct secpolicy *sp = NULL; #endif + struct timeval timenow; + + getmicrotime(&timenow); + #if IPSEC /* @@ -110,10 +117,15 @@ ip6_forward(m, srcrt) * Don't increment ip6s_cantforward because this is the check * before forwarding packet actually. */ - if (ipsec_bypass == 0 && ipsec6_in_reject(m, NULL)) { - ipsec6stat.in_polvio++; - m_freem(m); - return; + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject(m, NULL)) { + ipsec6stat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + m_freem(m); + return; + } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ @@ -128,8 +140,8 @@ ip6_forward(m, srcrt) IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ - if (ip6_log_time + ip6_log_interval < time_second) { - ip6_log_time = time_second; + if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { + ip6_log_time = timenow.tv_sec; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -144,8 +156,12 @@ ip6_forward(m, srcrt) if (ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT, 0); + if (locked) + lck_mtx_lock(ip6_mutex); return; } ip6->ip6_hlim -= IPV6_HLIMDEC; @@ -164,7 +180,7 @@ ip6_forward(m, srcrt) #if IPSEC if (ipsec_bypass != 0) goto skip_ipsec; - + lck_mtx_lock(sadb_mutex); /* get a security policy for this packet */ sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &error); @@ -178,6 +194,7 @@ ip6_forward(m, srcrt) m_freem(mcopy); #endif } + lck_mtx_unlock(sadb_mutex); m_freem(m); return; } @@ -200,6 +217,7 @@ ip6_forward(m, srcrt) m_freem(mcopy); #endif } + lck_mtx_unlock(sadb_mutex); m_freem(m); return; @@ -207,6 +225,7 @@ ip6_forward(m, srcrt) case IPSEC_POLICY_NONE: /* no need to do IPsec. */ key_freesp(sp); + lck_mtx_unlock(sadb_mutex); goto skip_ipsec; case IPSEC_POLICY_IPSEC: @@ -222,6 +241,7 @@ ip6_forward(m, srcrt) m_freem(mcopy); #endif } + lck_mtx_unlock(sadb_mutex); m_freem(m); return; } @@ -233,6 +253,7 @@ ip6_forward(m, srcrt) /* should be panic ?? */ printf("ip6_forward: Invalid policy found. %d\n", sp->policy); key_freesp(sp); + lck_mtx_unlock(sadb_mutex); goto skip_ipsec; } @@ -281,10 +302,12 @@ ip6_forward(m, srcrt) m_freem(mcopy); #endif } + lck_mtx_unlock(sadb_mutex); m_freem(m); return; } } + lck_mtx_unlock(sadb_mutex); skip_ipsec: #endif /* IPSEC */ @@ -308,8 +331,12 @@ ip6_forward(m, srcrt) ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); + if (locked) + lck_mtx_lock(ip6_mutex); } m_freem(m); return; @@ -330,8 +357,12 @@ ip6_forward(m, srcrt) ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); + if (locked) + lck_mtx_lock(ip6_mutex); } m_freem(m); return; @@ -352,8 +383,8 @@ ip6_forward(m, srcrt) ip6stat.ip6s_badscope++; in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); - if (ip6_log_time + ip6_log_interval < time_second) { - ip6_log_time = time_second; + if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { + ip6_log_time = timenow.tv_sec; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", @@ -362,9 +393,14 @@ ip6_forward(m, srcrt) ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } - if (mcopy) + if (mcopy) { + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_BEYONDSCOPE, 0); + if (locked) + lck_mtx_lock(ip6_mutex); + } m_freem(m); return; } @@ -388,6 +424,7 @@ ip6_forward(m, srcrt) * case, as we have the outgoing interface for * encapsulated packet as "rt->rt_ifp". */ + lck_mtx_lock(sadb_mutex); sp = ipsec6_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); if (sp) { @@ -396,7 +433,7 @@ ip6_forward(m, srcrt) if (ipsechdrsiz < mtu) mtu -= ipsechdrsiz; } - + lck_mtx_unlock(sadb_mutex); /* * if mtu becomes less than minimum MTU, * tell minimum MTU (and I'll need to fragment it). @@ -404,7 +441,11 @@ ip6_forward(m, srcrt) if (mtu < IPV6_MMTU) mtu = IPV6_MMTU; #endif + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); + if (locked) + lck_mtx_lock(ip6_mutex); } m_freem(m); return; @@ -435,8 +476,12 @@ ip6_forward(m, srcrt) * type/code is based on suggestion by Rich Draves. * not sure if it is the best pick. */ + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); + if (locked) + lck_mtx_lock(ip6_mutex); m_freem(m); return; } @@ -449,7 +494,7 @@ ip6_forward(m, srcrt) if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; /* If ipfw says divert, we have to just drop packet */ - if ((*ip6_fw_chk_ptr)(&ip6, rt->rt_ifp, &port, &m)) { + if (ip6_fw_chk_ptr(&ip6, rt->rt_ifp, &port, &m)) { m_freem(m); goto freecopy; } @@ -503,7 +548,7 @@ ip6_forward(m, srcrt) in6_clearscope(&ip6->ip6_dst); #endif - error = nd6_output(rt->rt_ifp, origifp, m, dst, rt); + error = nd6_output(rt->rt_ifp, origifp, m, dst, rt, locked); if (error) { in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); ip6stat.ip6s_cantforward++; @@ -547,7 +592,11 @@ ip6_forward(m, srcrt) code = ICMP6_DST_UNREACH_ADDR; break; } + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, type, code, 0); + if (locked) + lck_mtx_lock(ip6_mutex); return; freecopy: diff --git a/bsd/netinet6/ip6_fw.c b/bsd/netinet6/ip6_fw.c new file mode 100644 index 000000000..db7926f14 --- /dev/null +++ b/bsd/netinet6/ip6_fw.c @@ -0,0 +1,1369 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* $FreeBSD: src/sys/netinet6/ip6_fw.c,v 1.2.2.9 2002/04/28 05:40:27 suz Exp $ */ +/* $KAME: ip6_fw.c,v 1.21 2001/01/24 01:25:32 itojun Exp $ */ + +/* + * Copyright (C) 1998, 1999, 2000 and 2001 WIDE Project. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the project nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1993 Daniel Boulet + * Copyright (c) 1994 Ugen J.S.Antsilevich + * Copyright (c) 1996 Alex Nash + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + */ + +/* + * Implement IPv6 packet firewall + */ + + +#ifdef IP6DIVERT +#error "NOT SUPPORTED IPV6 DIVERT" +#endif +#ifdef IP6FW_DIVERT_RESTART +#error "NOT SUPPORTED IPV6 DIVERT" +#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +MALLOC_DEFINE(M_IP6FW, "Ip6Fw/Ip6Acct", "Ip6Fw/Ip6Acct chain's"); + +static int fw6_debug = 1; +#ifdef IPV6FIREWALL_VERBOSE +static int fw6_verbose = 1; +#else +static int fw6_verbose = 0; +#endif +#ifdef IPV6FIREWALL_VERBOSE_LIMIT +static int fw6_verbose_limit = IPV6FIREWALL_VERBOSE_LIMIT; +#else +static int fw6_verbose_limit = 0; +#endif + +LIST_HEAD (ip6_fw_head, ip6_fw_chain) ip6_fw_chain; + +#ifdef SYSCTL_NODE +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); +SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, enable, CTLFLAG_RW, + &ip6_fw_enable, 0, "Enable ip6fw"); +SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, debug, CTLFLAG_RW, &fw6_debug, 0, ""); +SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose, CTLFLAG_RW, &fw6_verbose, 0, ""); +SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &fw6_verbose_limit, 0, ""); +#endif + +#define dprintf(a) do { \ + if (fw6_debug) \ + printf a; \ + } while (0) +#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 + +static int add_entry6 __P((struct ip6_fw_head *chainptr, struct ip6_fw *frwl)); +static int del_entry6 __P((struct ip6_fw_head *chainptr, u_short number)); +static int zero_entry6 __P((struct ip6_fw *frwl)); +static struct ip6_fw *check_ip6fw_struct __P((struct ip6_fw *m)); +static int ip6opts_match __P((struct ip6_hdr **ip6, struct ip6_fw *f, + struct mbuf **m, + int *off, int *nxt, u_short *offset)); +static int port_match6 __P((u_short *portptr, int nports, u_short port, + int range_flag)); +static int tcp6flg_match __P((struct tcphdr *tcp6, struct ip6_fw *f)); +static int icmp6type_match __P((struct icmp6_hdr * icmp, struct ip6_fw * f)); +static void ip6fw_report __P((struct ip6_fw *f, struct ip6_hdr *ip6, + struct ifnet *rif, struct ifnet *oif, int off, int nxt)); + +static int ip6_fw_chk __P((struct ip6_hdr **pip6, + struct ifnet *oif, u_int16_t *cookie, struct mbuf **m)); +static int ip6_fw_ctl __P((struct sockopt *)); + +static char err_prefix[] = "ip6_fw_ctl:"; +extern lck_mtx_t *ip6_mutex; + +/* + * Returns 1 if the port is matched by the vector, 0 otherwise + */ +static +__inline int +port_match6(u_short *portptr, int nports, u_short port, int range_flag) +{ + if (!nports) + return 1; + if (range_flag) { + if (portptr[0] <= port && port <= portptr[1]) { + return 1; + } + nports -= 2; + portptr += 2; + } + while (nports-- > 0) { + if (*portptr++ == port) { + return 1; + } + } + return 0; +} + +static int +tcp6flg_match(struct tcphdr *tcp6, struct ip6_fw *f) +{ + u_char flg_set, flg_clr; + + /* + * If an established connection is required, reject packets that + * have only SYN of RST|ACK|SYN set. Otherwise, fall through to + * other flag requirements. + */ + if ((f->fw_ipflg & IPV6_FW_IF_TCPEST) && + ((tcp6->th_flags & (IPV6_FW_TCPF_RST | IPV6_FW_TCPF_ACK | + IPV6_FW_TCPF_SYN)) == IPV6_FW_TCPF_SYN)) + return 0; + + flg_set = tcp6->th_flags & f->fw_tcpf; + flg_clr = tcp6->th_flags & f->fw_tcpnf; + + if (flg_set != f->fw_tcpf) + return 0; + if (flg_clr) + return 0; + + return 1; +} + +static int +icmp6type_match(struct icmp6_hdr *icmp6, struct ip6_fw *f) +{ + int type; + + if (!(f->fw_flg & IPV6_FW_F_ICMPBIT)) + return(1); + + type = icmp6->icmp6_type; + + /* check for matching type in the bitmap */ + if (type < IPV6_FW_ICMPTYPES_DIM * sizeof(unsigned) * 8 && + (f->fw_icmp6types[type / (sizeof(unsigned) * 8)] & + (1U << (type % (8 * sizeof(unsigned)))))) + return(1); + + return(0); /* no match */ +} + +static int +is_icmp6_query(struct ip6_hdr *ip6, int off) +{ + const struct icmp6_hdr *icmp6; + int icmp6_type; + + icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); + icmp6_type = icmp6->icmp6_type; + + if (icmp6_type == ICMP6_ECHO_REQUEST || + icmp6_type == ICMP6_MEMBERSHIP_QUERY || + icmp6_type == ICMP6_WRUREQUEST || + icmp6_type == ICMP6_FQDN_QUERY || + icmp6_type == ICMP6_NI_QUERY) + return(1); + + return(0); +} + +static int +ip6opts_match(struct ip6_hdr **pip6, struct ip6_fw *f, struct mbuf **m, + int *off, int *nxt, u_short *offset) +{ + int len; + struct ip6_hdr *ip6 = *pip6; + struct ip6_ext *ip6e; + u_char opts, nopts, nopts_sve; + + opts = f->fw_ip6opt; + nopts = nopts_sve = f->fw_ip6nopt; + + *nxt = ip6->ip6_nxt; + *off = sizeof(struct ip6_hdr); + len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); + while (*off < len) { + ip6e = (struct ip6_ext *)((caddr_t) ip6 + *off); + if ((*m)->m_len < *off + sizeof(*ip6e)) + goto opts_check; /* XXX */ + + switch(*nxt) { + case IPPROTO_FRAGMENT: + if ((*m)->m_len >= *off + sizeof(struct ip6_frag)) { + struct ip6_frag *ip6f; + + ip6f = (struct ip6_frag *) ((caddr_t)ip6 + *off); + *offset = ip6f->ip6f_offlg & IP6F_OFF_MASK; + } + opts &= ~IPV6_FW_IP6OPT_FRAG; + nopts &= ~IPV6_FW_IP6OPT_FRAG; + *off += sizeof(struct ip6_frag); + break; + case IPPROTO_AH: + opts &= ~IPV6_FW_IP6OPT_AH; + nopts &= ~IPV6_FW_IP6OPT_AH; + *off += (ip6e->ip6e_len + 2) << 2; + break; + default: + switch (*nxt) { + case IPPROTO_HOPOPTS: + opts &= ~IPV6_FW_IP6OPT_HOPOPT; + nopts &= ~IPV6_FW_IP6OPT_HOPOPT; + break; + case IPPROTO_ROUTING: + opts &= ~IPV6_FW_IP6OPT_ROUTE; + nopts &= ~IPV6_FW_IP6OPT_ROUTE; + break; + case IPPROTO_ESP: + opts &= ~IPV6_FW_IP6OPT_ESP; + nopts &= ~IPV6_FW_IP6OPT_ESP; + break; + case IPPROTO_NONE: + opts &= ~IPV6_FW_IP6OPT_NONXT; + nopts &= ~IPV6_FW_IP6OPT_NONXT; + goto opts_check; + break; + case IPPROTO_DSTOPTS: + opts &= ~IPV6_FW_IP6OPT_OPTS; + nopts &= ~IPV6_FW_IP6OPT_OPTS; + break; + default: + goto opts_check; + break; + } + *off += (ip6e->ip6e_len + 1) << 3; + break; + } + *nxt = ip6e->ip6e_nxt; + + } + opts_check: + if (f->fw_ip6opt == f->fw_ip6nopt) /* XXX */ + return 1; + + if (opts == 0 && nopts == nopts_sve) + return 1; + else + return 0; +} + +static +__inline int +iface_match(struct ifnet *ifp, union ip6_fw_if *ifu, int byname) +{ + /* Check by name or by IP address */ + if (byname) { + /* Check unit number (-1 is wildcard) */ + if (ifu->fu_via_if.unit != -1 + && ifp->if_unit != ifu->fu_via_if.unit) + return(0); + /* Check name */ + if (strncmp(ifp->if_name, ifu->fu_via_if.name, IP6FW_IFNLEN)) + return(0); + return(1); + } else if (!IN6_IS_ADDR_UNSPECIFIED(&ifu->fu_via_ip6)) { /* Zero == wildcard */ + struct ifaddr *ia; + + ifnet_lock_shared(ifp); + for (ia = ifp->if_addrlist.tqh_first; ia; ia = ia->ifa_list.tqe_next) + { + + if (ia->ifa_addr == NULL) + continue; + if (ia->ifa_addr->sa_family != AF_INET6) + continue; + if (!IN6_ARE_ADDR_EQUAL(&ifu->fu_via_ip6, + &(((struct sockaddr_in6 *) + (ia->ifa_addr))->sin6_addr))) + continue; + ifnet_lock_done(ifp); + return(1); + } + ifnet_lock_done(ifp); + return(0); + } + return(1); +} + +static void +ip6fw_report(struct ip6_fw *f, struct ip6_hdr *ip6, + struct ifnet *rif, struct ifnet *oif, int off, int nxt) +{ + static int counter; + struct tcphdr *const tcp6 = (struct tcphdr *) ((caddr_t) ip6+ off); + struct udphdr *const udp = (struct udphdr *) ((caddr_t) ip6+ off); + struct icmp6_hdr *const icmp6 = (struct icmp6_hdr *) ((caddr_t) ip6+ off); + int count; + char *action; + char action2[32], proto[102], name[18]; + int len; + + count = f ? f->fw_pcnt : ++counter; + if (fw6_verbose_limit != 0 && count > fw6_verbose_limit) + return; + + /* Print command name */ + snprintf(SNPARGS(name, 0), "ip6fw: %d", f ? f->fw_number : -1); + + action = action2; + if (!f) + action = "Refuse"; + else { + switch (f->fw_flg & IPV6_FW_F_COMMAND) { + case IPV6_FW_F_DENY: + action = "Deny"; + break; + case IPV6_FW_F_REJECT: + if (f->fw_reject_code == IPV6_FW_REJECT_RST) + action = "Reset"; + else + action = "Unreach"; + break; + case IPV6_FW_F_ACCEPT: + action = "Accept"; + break; + case IPV6_FW_F_COUNT: + action = "Count"; + break; + case IPV6_FW_F_DIVERT: + snprintf(SNPARGS(action2, 0), "Divert %d", + f->fw_divert_port); + break; + case IPV6_FW_F_TEE: + snprintf(SNPARGS(action2, 0), "Tee %d", + f->fw_divert_port); + break; + case IPV6_FW_F_SKIPTO: + snprintf(SNPARGS(action2, 0), "SkipTo %d", + f->fw_skipto_rule); + break; + default: + action = "UNKNOWN"; + break; + } + } + + switch (nxt) { + case IPPROTO_TCP: + len = snprintf(SNPARGS(proto, 0), "TCP [%s]", + ip6_sprintf(&ip6->ip6_src)); + if (off > 0) + len += snprintf(SNPARGS(proto, len), ":%d ", + ntohs(tcp6->th_sport)); + else + len += snprintf(SNPARGS(proto, len), " "); + len += snprintf(SNPARGS(proto, len), "[%s]", + ip6_sprintf(&ip6->ip6_dst)); + if (off > 0) + snprintf(SNPARGS(proto, len), ":%d", + ntohs(tcp6->th_dport)); + break; + case IPPROTO_UDP: + len = snprintf(SNPARGS(proto, 0), "UDP [%s]", + ip6_sprintf(&ip6->ip6_src)); + if (off > 0) + len += snprintf(SNPARGS(proto, len), ":%d ", + ntohs(udp->uh_sport)); + else + len += snprintf(SNPARGS(proto, len), " "); + len += snprintf(SNPARGS(proto, len), "[%s]", + ip6_sprintf(&ip6->ip6_dst)); + if (off > 0) + snprintf(SNPARGS(proto, len), ":%d", + ntohs(udp->uh_dport)); + break; + case IPPROTO_ICMPV6: + if (off > 0) + len = snprintf(SNPARGS(proto, 0), "IPV6-ICMP:%u.%u ", + icmp6->icmp6_type, icmp6->icmp6_code); + else + len = snprintf(SNPARGS(proto, 0), "IPV6-ICMP "); + len += snprintf(SNPARGS(proto, len), "[%s]", + ip6_sprintf(&ip6->ip6_src)); + snprintf(SNPARGS(proto, len), " [%s]", + ip6_sprintf(&ip6->ip6_dst)); + break; + default: + len = snprintf(SNPARGS(proto, 0), "P:%d [%s]", nxt, + ip6_sprintf(&ip6->ip6_src)); + snprintf(SNPARGS(proto, len), " [%s]", + ip6_sprintf(&ip6->ip6_dst)); + break; + } + + if (oif) + log(LOG_AUTHPRIV | LOG_INFO, "%s %s %s out via %s\n", + name, action, proto, if_name(oif)); + else if (rif) + log(LOG_AUTHPRIV | LOG_INFO, "%s %s %s in via %s\n", + name, action, proto, if_name(rif)); + else + log(LOG_AUTHPRIV | LOG_INFO, "%s %s %s", + name, action, proto); + if (fw6_verbose_limit != 0 && count == fw6_verbose_limit) + log(LOG_AUTHPRIV | LOG_INFO, "ip6fw: limit reached on entry %d\n", + f ? f->fw_number : -1); +} + +/* + * Parameters: + * + * ip Pointer to packet header (struct ip6_hdr *) + * hlen Packet header length + * oif Outgoing interface, or NULL if packet is incoming + * #ifndef IP6FW_DIVERT_RESTART + * *cookie Ignore all divert/tee rules to this port (if non-zero) + * #else + * *cookie Skip up to the first rule past this rule number; + * #endif + * *m The packet; we set to NULL when/if we nuke it. + * + * Return value: + * + * 0 The packet is to be accepted and routed normally OR + * the packet was denied/rejected and has been dropped; + * in the latter case, *m is equal to NULL upon return. + * port Divert the packet to port. + */ + +static int +ip6_fw_chk(struct ip6_hdr **pip6, + struct ifnet *oif, u_int16_t *cookie, struct mbuf **m) +{ + struct ip6_fw_chain *chain; + struct ip6_fw *rule = NULL; + struct ip6_hdr *ip6 = *pip6; + struct ifnet *const rif = (*m)->m_pkthdr.rcvif; + u_short offset = 0; + int off = sizeof(struct ip6_hdr), nxt = ip6->ip6_nxt; + u_short src_port, dst_port; +#ifdef IP6FW_DIVERT_RESTART + u_int16_t skipto = *cookie; +#else + u_int16_t ignport = ntohs(*cookie); +#endif + struct timeval timenow; + + getmicrotime(&timenow); + + *cookie = 0; + /* + * Go down the chain, looking for enlightment + * #ifdef IP6FW_DIVERT_RESTART + * If we've been asked to start at a given rule immediatly, do so. + * #endif + */ + chain = LIST_FIRST(&ip6_fw_chain); +#ifdef IP6FW_DIVERT_RESTART + if (skipto) { + if (skipto >= 65535) + goto dropit; + while (chain && (chain->rule->fw_number <= skipto)) { + chain = LIST_NEXT(chain, chain); + } + if (! chain) goto dropit; + } +#endif /* IP6FW_DIVERT_RESTART */ + for (; chain; chain = LIST_NEXT(chain, chain)) { + struct ip6_fw *const f = chain->rule; + + if (oif) { + /* Check direction outbound */ + if (!(f->fw_flg & IPV6_FW_F_OUT)) + continue; + } else { + /* Check direction inbound */ + if (!(f->fw_flg & IPV6_FW_F_IN)) + continue; + } + +#define IN6_ARE_ADDR_MASKEQUAL(x,y,z) (\ + (((x)->s6_addr32[0] & (y)->s6_addr32[0]) == (z)->s6_addr32[0]) && \ + (((x)->s6_addr32[1] & (y)->s6_addr32[1]) == (z)->s6_addr32[1]) && \ + (((x)->s6_addr32[2] & (y)->s6_addr32[2]) == (z)->s6_addr32[2]) && \ + (((x)->s6_addr32[3] & (y)->s6_addr32[3]) == (z)->s6_addr32[3])) + + /* If src-addr doesn't match, not this rule. */ + if (((f->fw_flg & IPV6_FW_F_INVSRC) != 0) ^ + (!IN6_ARE_ADDR_MASKEQUAL(&ip6->ip6_src,&f->fw_smsk,&f->fw_src))) + continue; + + /* If dest-addr doesn't match, not this rule. */ + if (((f->fw_flg & IPV6_FW_F_INVDST) != 0) ^ + (!IN6_ARE_ADDR_MASKEQUAL(&ip6->ip6_dst,&f->fw_dmsk,&f->fw_dst))) + continue; + +#undef IN6_ARE_ADDR_MASKEQUAL + /* Interface check */ + if ((f->fw_flg & IF6_FW_F_VIAHACK) == IF6_FW_F_VIAHACK) { + struct ifnet *const iface = oif ? oif : rif; + + /* Backwards compatibility hack for "via" */ + if (!iface || !iface_match(iface, + &f->fw_in_if, f->fw_flg & IPV6_FW_F_OIFNAME)) + continue; + } else { + /* Check receive interface */ + if ((f->fw_flg & IPV6_FW_F_IIFACE) + && (!rif || !iface_match(rif, + &f->fw_in_if, f->fw_flg & IPV6_FW_F_IIFNAME))) + continue; + /* Check outgoing interface */ + if ((f->fw_flg & IPV6_FW_F_OIFACE) + && (!oif || !iface_match(oif, + &f->fw_out_if, f->fw_flg & IPV6_FW_F_OIFNAME))) + continue; + } + + /* Check IP options */ + if (!ip6opts_match(&ip6, f, m, &off, &nxt, &offset)) + continue; + + /* Fragments */ + if ((f->fw_flg & IPV6_FW_F_FRAG) && !offset) + continue; + + /* Check protocol; if wildcard, match */ + if (f->fw_prot == IPPROTO_IPV6) + goto got_match; + + /* If different, don't match */ + if (nxt != f->fw_prot) + continue; + +#define PULLUP_TO(len) do { \ + if ((*m)->m_len < (len) \ + && (*m = m_pullup(*m, (len))) == 0) { \ + goto dropit; \ + } \ + *pip6 = ip6 = mtod(*m, struct ip6_hdr *); \ + } while (0) + + /* Protocol specific checks */ + switch (nxt) { + case IPPROTO_TCP: + { + struct tcphdr *tcp6; + + if (offset == 1) { /* cf. RFC 1858 */ + PULLUP_TO(off + 4); /* XXX ? */ + goto bogusfrag; + } + if (offset != 0) { + /* + * TCP flags and ports aren't available in this + * packet -- if this rule specified either one, + * we consider the rule a non-match. + */ + if (f->fw_nports != 0 || + f->fw_tcpf != f->fw_tcpnf) + continue; + + break; + } + PULLUP_TO(off + 14); + tcp6 = (struct tcphdr *) ((caddr_t)ip6 + off); + if (((f->fw_tcpf != f->fw_tcpnf) || + (f->fw_ipflg & IPV6_FW_IF_TCPEST)) && + !tcp6flg_match(tcp6, f)) + continue; + src_port = ntohs(tcp6->th_sport); + dst_port = ntohs(tcp6->th_dport); + goto check_ports; + } + + case IPPROTO_UDP: + { + struct udphdr *udp; + + if (offset != 0) { + /* + * Port specification is unavailable -- if this + * rule specifies a port, we consider the rule + * a non-match. + */ + if (f->fw_nports != 0) + continue; + + break; + } + PULLUP_TO(off + 4); + udp = (struct udphdr *) ((caddr_t)ip6 + off); + src_port = ntohs(udp->uh_sport); + dst_port = ntohs(udp->uh_dport); +check_ports: + if (!port_match6(&f->fw_pts[0], + IPV6_FW_GETNSRCP(f), src_port, + f->fw_flg & IPV6_FW_F_SRNG)) + continue; + if (!port_match6(&f->fw_pts[IPV6_FW_GETNSRCP(f)], + IPV6_FW_GETNDSTP(f), dst_port, + f->fw_flg & IPV6_FW_F_DRNG)) + continue; + break; + } + + case IPPROTO_ICMPV6: + { + struct icmp6_hdr *icmp; + + if (offset != 0) /* Type isn't valid */ + break; + PULLUP_TO(off + 2); + icmp = (struct icmp6_hdr *) ((caddr_t)ip6 + off); + if (!icmp6type_match(icmp, f)) + continue; + break; + } +#undef PULLUP_TO + +bogusfrag: + if (fw6_verbose) + ip6fw_report(NULL, ip6, rif, oif, off, nxt); + goto dropit; + } + +got_match: +#ifndef IP6FW_DIVERT_RESTART + /* Ignore divert/tee rule if socket port is "ignport" */ + switch (f->fw_flg & IPV6_FW_F_COMMAND) { + case IPV6_FW_F_DIVERT: + case IPV6_FW_F_TEE: + if (f->fw_divert_port == ignport) + continue; /* ignore this rule */ + break; + } + +#endif /* IP6FW_DIVERT_RESTART */ + /* Update statistics */ + f->fw_pcnt += 1; + f->fw_bcnt += ntohs(ip6->ip6_plen); + f->timestamp = timenow.tv_sec; + + /* Log to console if desired */ + if ((f->fw_flg & IPV6_FW_F_PRN) && fw6_verbose) + ip6fw_report(f, ip6, rif, oif, off, nxt); + + /* Take appropriate action */ + switch (f->fw_flg & IPV6_FW_F_COMMAND) { + case IPV6_FW_F_ACCEPT: + return(0); + case IPV6_FW_F_COUNT: + continue; + case IPV6_FW_F_DIVERT: +#ifdef IP6FW_DIVERT_RESTART + *cookie = f->fw_number; +#else + *cookie = htons(f->fw_divert_port); +#endif /* IP6FW_DIVERT_RESTART */ + return(f->fw_divert_port); + case IPV6_FW_F_TEE: + /* + * XXX someday tee packet here, but beware that you + * can't use m_copym() or m_copypacket() because + * the divert input routine modifies the mbuf + * (and these routines only increment reference + * counts in the case of mbuf clusters), so need + * to write custom routine. + */ + continue; + case IPV6_FW_F_SKIPTO: +#ifdef DIAGNOSTIC + while (chain->chain.le_next + && chain->chain.le_next->rule->fw_number + < f->fw_skipto_rule) +#else + while (chain->chain.le_next->rule->fw_number + < f->fw_skipto_rule) +#endif + chain = chain->chain.le_next; + continue; + } + + /* Deny/reject this packet using this rule */ + rule = f; + break; + } + +#ifdef DIAGNOSTIC + /* Rule 65535 should always be there and should always match */ + if (!chain) + panic("ip6_fw: chain"); +#endif + + /* + * At this point, we're going to drop the packet. + * Send a reject notice if all of the following are true: + * + * - The packet matched a reject rule + * - The packet is not an ICMP packet, or is an ICMP query packet + * - The packet is not a multicast or broadcast packet + */ + if ((rule->fw_flg & IPV6_FW_F_COMMAND) == IPV6_FW_F_REJECT + && (nxt != IPPROTO_ICMPV6 || is_icmp6_query(ip6, off)) + && !((*m)->m_flags & (M_BCAST|M_MCAST)) + && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + switch (rule->fw_reject_code) { + case IPV6_FW_REJECT_RST: + { + struct tcphdr *const tcp = + (struct tcphdr *) ((caddr_t)ip6 + off); + struct { + struct ip6_hdr ip6; + struct tcphdr th; + } ti; + tcp_seq ack, seq; + int flags; + + if (offset != 0 || (tcp->th_flags & TH_RST)) + break; + + ti.ip6 = *ip6; + ti.th = *tcp; + ti.th.th_seq = ntohl(ti.th.th_seq); + ti.th.th_ack = ntohl(ti.th.th_ack); + ti.ip6.ip6_nxt = IPPROTO_TCP; + if (ti.th.th_flags & TH_ACK) { + ack = 0; + seq = ti.th.th_ack; + flags = TH_RST; + } else { + ack = ti.th.th_seq; + if (((*m)->m_flags & M_PKTHDR) != 0) { + ack += (*m)->m_pkthdr.len - off + - (ti.th.th_off << 2); + } else if (ip6->ip6_plen) { + ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) + - off - (ti.th.th_off << 2); + } else { + m_freem(*m); + *m = 0; + break; + } + seq = 0; + flags = TH_RST|TH_ACK; + } + bcopy(&ti, ip6, sizeof(ti)); + tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1), + *m, ack, seq, flags); + *m = NULL; + break; + } + default: /* Send an ICMP unreachable using code */ + if (oif) + (*m)->m_pkthdr.rcvif = oif; + lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(ip6_mutex); + icmp6_error(*m, ICMP6_DST_UNREACH, + rule->fw_reject_code, 0); + lck_mtx_lock(ip6_mutex); + *m = NULL; + break; + } + } + +dropit: + /* + * Finally, drop the packet. + */ + if (*m) { + m_freem(*m); + *m = NULL; + } + return(0); +} + +static int +add_entry6(struct ip6_fw_head *chainptr, struct ip6_fw *frwl) +{ + struct ip6_fw *ftmp = 0; + struct ip6_fw_chain *fwc = 0, *fcp, *fcpl = 0; + u_short nbr = 0; + int s; + + fwc = _MALLOC(sizeof *fwc, M_IP6FW, M_WAITOK); + ftmp = _MALLOC(sizeof *ftmp, M_IP6FW, M_WAITOK); + if (!fwc || !ftmp) { + dprintf(("%s malloc said no\n", err_prefix)); + if (fwc) FREE(fwc, M_IP6FW); + if (ftmp) FREE(ftmp, M_IP6FW); + return (ENOSPC); + } + + bcopy(frwl, ftmp, sizeof(struct ip6_fw)); + ftmp->fw_in_if.fu_via_if.name[IP6FW_IFNLEN - 1] = '\0'; + ftmp->fw_pcnt = 0L; + ftmp->fw_bcnt = 0L; + fwc->rule = ftmp; + + s = splnet(); + + if (!chainptr->lh_first) { + LIST_INSERT_HEAD(chainptr, fwc, chain); + splx(s); + return(0); + } else if (ftmp->fw_number == (u_short)-1) { + if (fwc) FREE(fwc, M_IP6FW); + if (ftmp) FREE(ftmp, M_IP6FW); + splx(s); + dprintf(("%s bad rule number\n", err_prefix)); + return (EINVAL); + } + + /* If entry number is 0, find highest numbered rule and add 100 */ + if (ftmp->fw_number == 0) { + for (fcp = chainptr->lh_first; fcp; fcp = fcp->chain.le_next) { + if (fcp->rule->fw_number != (u_short)-1) + nbr = fcp->rule->fw_number; + else + break; + } + if (nbr < (u_short)-1 - 100) + nbr += 100; + ftmp->fw_number = nbr; + } + + /* Got a valid number; now insert it, keeping the list ordered */ + for (fcp = chainptr->lh_first; fcp; fcp = fcp->chain.le_next) { + if (fcp->rule->fw_number > ftmp->fw_number) { + if (fcpl) { + LIST_INSERT_AFTER(fcpl, fwc, chain); + } else { + LIST_INSERT_HEAD(chainptr, fwc, chain); + } + break; + } else { + fcpl = fcp; + } + } + + splx(s); + return (0); +} + +static int +del_entry6(struct ip6_fw_head *chainptr, u_short number) +{ + struct ip6_fw_chain *fcp; + int s; + + s = splnet(); + + fcp = chainptr->lh_first; + if (number != (u_short)-1) { + for (; fcp; fcp = fcp->chain.le_next) { + if (fcp->rule->fw_number == number) { + LIST_REMOVE(fcp, chain); + splx(s); + FREE(fcp->rule, M_IP6FW); + FREE(fcp, M_IP6FW); + return 0; + } + } + } + + splx(s); + return (EINVAL); +} + +static int +zero_entry6(struct ip6_fw *frwl) +{ + struct ip6_fw_chain *fcp; + int s; + + /* + * It's possible to insert multiple chain entries with the + * same number, so we don't stop after finding the first + * match if zeroing a specific entry. + */ + s = splnet(); + for (fcp = ip6_fw_chain.lh_first; fcp; fcp = fcp->chain.le_next) + if (!frwl || frwl->fw_number == 0 || frwl->fw_number == fcp->rule->fw_number) { + fcp->rule->fw_bcnt = fcp->rule->fw_pcnt = 0; + fcp->rule->timestamp = 0; + } + splx(s); + + if (fw6_verbose) { + if (frwl) + log(LOG_AUTHPRIV | LOG_NOTICE, + "ip6fw: Entry %d cleared.\n", frwl->fw_number); + else + log(LOG_AUTHPRIV | LOG_NOTICE, + "ip6fw: Accounting cleared.\n"); + } + + return(0); +} + +static struct ip6_fw * +check_ip6fw_struct(struct ip6_fw *frwl) +{ + /* Check for invalid flag bits */ + if ((frwl->fw_flg & ~IPV6_FW_F_MASK) != 0) { + dprintf(("%s undefined flag bits set (flags=%x)\n", + err_prefix, frwl->fw_flg)); + return (NULL); + } + /* Must apply to incoming or outgoing (or both) */ + if (!(frwl->fw_flg & (IPV6_FW_F_IN | IPV6_FW_F_OUT))) { + dprintf(("%s neither in nor out\n", err_prefix)); + return (NULL); + } + /* Empty interface name is no good */ + if (((frwl->fw_flg & IPV6_FW_F_IIFNAME) + && !*frwl->fw_in_if.fu_via_if.name) + || ((frwl->fw_flg & IPV6_FW_F_OIFNAME) + && !*frwl->fw_out_if.fu_via_if.name)) { + dprintf(("%s empty interface name\n", err_prefix)); + return (NULL); + } + /* Sanity check interface matching */ + if ((frwl->fw_flg & IF6_FW_F_VIAHACK) == IF6_FW_F_VIAHACK) { + ; /* allow "via" backwards compatibility */ + } else if ((frwl->fw_flg & IPV6_FW_F_IN) + && (frwl->fw_flg & IPV6_FW_F_OIFACE)) { + dprintf(("%s outgoing interface check on incoming\n", + err_prefix)); + return (NULL); + } + /* Sanity check port ranges */ + if ((frwl->fw_flg & IPV6_FW_F_SRNG) && IPV6_FW_GETNSRCP(frwl) < 2) { + dprintf(("%s src range set but n_src_p=%d\n", + err_prefix, IPV6_FW_GETNSRCP(frwl))); + return (NULL); + } + if ((frwl->fw_flg & IPV6_FW_F_DRNG) && IPV6_FW_GETNDSTP(frwl) < 2) { + dprintf(("%s dst range set but n_dst_p=%d\n", + err_prefix, IPV6_FW_GETNDSTP(frwl))); + return (NULL); + } + if (IPV6_FW_GETNSRCP(frwl) + IPV6_FW_GETNDSTP(frwl) > IPV6_FW_MAX_PORTS) { + dprintf(("%s too many ports (%d+%d)\n", + err_prefix, IPV6_FW_GETNSRCP(frwl), IPV6_FW_GETNDSTP(frwl))); + return (NULL); + } + /* + * Protocols other than TCP/UDP don't use port range + */ + if ((frwl->fw_prot != IPPROTO_TCP) && + (frwl->fw_prot != IPPROTO_UDP) && + (IPV6_FW_GETNSRCP(frwl) || IPV6_FW_GETNDSTP(frwl))) { + dprintf(("%s port(s) specified for non TCP/UDP rule\n", + err_prefix)); + return(NULL); + } + + /* + * Rather than modify the entry to make such entries work, + * we reject this rule and require user level utilities + * to enforce whatever policy they deem appropriate. + */ + if ((frwl->fw_src.s6_addr32[0] & (~frwl->fw_smsk.s6_addr32[0])) || + (frwl->fw_src.s6_addr32[1] & (~frwl->fw_smsk.s6_addr32[1])) || + (frwl->fw_src.s6_addr32[2] & (~frwl->fw_smsk.s6_addr32[2])) || + (frwl->fw_src.s6_addr32[3] & (~frwl->fw_smsk.s6_addr32[3])) || + (frwl->fw_dst.s6_addr32[0] & (~frwl->fw_dmsk.s6_addr32[0])) || + (frwl->fw_dst.s6_addr32[1] & (~frwl->fw_dmsk.s6_addr32[1])) || + (frwl->fw_dst.s6_addr32[2] & (~frwl->fw_dmsk.s6_addr32[2])) || + (frwl->fw_dst.s6_addr32[3] & (~frwl->fw_dmsk.s6_addr32[3]))) { + dprintf(("%s rule never matches\n", err_prefix)); + return(NULL); + } + + if ((frwl->fw_flg & IPV6_FW_F_FRAG) && + (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) { + if (frwl->fw_nports) { + dprintf(("%s cannot mix 'frag' and ports\n", err_prefix)); + return(NULL); + } + if (frwl->fw_prot == IPPROTO_TCP && + frwl->fw_tcpf != frwl->fw_tcpnf) { + dprintf(("%s cannot mix 'frag' with TCP flags\n", err_prefix)); + return(NULL); + } + } + + /* Check command specific stuff */ + switch (frwl->fw_flg & IPV6_FW_F_COMMAND) + { + case IPV6_FW_F_REJECT: + if (frwl->fw_reject_code >= 0x100 + && !(frwl->fw_prot == IPPROTO_TCP + && frwl->fw_reject_code == IPV6_FW_REJECT_RST)) { + dprintf(("%s unknown reject code\n", err_prefix)); + return(NULL); + } + break; + case IPV6_FW_F_DIVERT: /* Diverting to port zero is invalid */ + case IPV6_FW_F_TEE: + if (frwl->fw_divert_port == 0) { + dprintf(("%s can't divert to port 0\n", err_prefix)); + return (NULL); + } + break; + case IPV6_FW_F_DENY: + case IPV6_FW_F_ACCEPT: + case IPV6_FW_F_COUNT: + case IPV6_FW_F_SKIPTO: + break; + default: + dprintf(("%s invalid command\n", err_prefix)); + return(NULL); + } + + return frwl; +} + +/*#####*/ +#if 0 +static int +ip6_fw_ctl(int stage, struct mbuf **mm) +{ + int error; + struct mbuf *m; + + if (stage == IPV6_FW_GET) { + struct ip6_fw_chain *fcp = ip6_fw_chain.lh_first; + *mm = m = m_get(M_WAIT, MT_DATA); /* XXX */ + if (!m) + return(ENOBUFS); + if (sizeof *(fcp->rule) > MLEN) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return(ENOBUFS); + } + } + for (; fcp; fcp = fcp->chain.le_next) { + bcopy(fcp->rule, m->m_data, sizeof *(fcp->rule)); + m->m_len = sizeof *(fcp->rule); + m->m_next = m_get(M_WAIT, MT_DATA); /* XXX */ + if (!m->m_next) { + m_freem(*mm); + return(ENOBUFS); + } + m = m->m_next; + if (sizeof *(fcp->rule) > MLEN) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) { + m_freem(*mm); + return(ENOBUFS); + } + } + m->m_len = 0; + } + return (0); + } + m = *mm; + /* only allow get calls if secure mode > 2 */ + if (securelevel > 2) { + if (m) { + (void)m_freem(m); + *mm = 0; + } + return(EPERM); + } + if (stage == IPV6_FW_FLUSH) { + while (ip6_fw_chain.lh_first != NULL && + ip6_fw_chain.lh_first->rule->fw_number != (u_short)-1) { + struct ip6_fw_chain *fcp = ip6_fw_chain.lh_first; + int s = splnet(); + LIST_REMOVE(ip6_fw_chain.lh_first, chain); + splx(s); + FREE(fcp->rule, M_IP6FW); + FREE(fcp, M_IP6FW); + } + if (m) { + (void)m_freem(m); + *mm = 0; + } + return (0); + } + if (stage == IPV6_FW_ZERO) { + error = zero_entry6(m); + if (m) { + (void)m_freem(m); + *mm = 0; + } + return (error); + } + if (m == NULL) { + printf("%s NULL mbuf ptr\n", err_prefix); + return (EINVAL); + } + + if (stage == IPV6_FW_ADD) { + struct ip6_fw *frwl = check_ip6fw_mbuf(m); + + if (!frwl) + error = EINVAL; + else + error = add_entry6(&ip6_fw_chain, frwl); + if (m) { + (void)m_freem(m); + *mm = 0; + } + return error; + } + if (stage == IPV6_FW_DEL) { + if (m->m_len != sizeof(struct ip6_fw)) { + dprintf(("%s len=%ld, want %lu\n", err_prefix, m->m_len, + sizeof(struct ip6_fw))); + error = EINVAL; + } else if (mtod(m, struct ip6_fw *)->fw_number == (u_short)-1) { + dprintf(("%s can't delete rule 65535\n", err_prefix)); + error = EINVAL; + } else + error = del_entry6(&ip6_fw_chain, + mtod(m, struct ip6_fw *)->fw_number); + if (m) { + (void)m_freem(m); + *mm = 0; + } + return error; + } + + dprintf(("%s unknown request %d\n", err_prefix, stage)); + if (m) { + (void)m_freem(m); + *mm = 0; + } + return (EINVAL); +} +#endif + +static int +ip6_fw_ctl(struct sockopt *sopt) +{ + int error = 0; + int spl; + int valsize; + struct ip6_fw rule; + + if (securelevel >= 3 && + (sopt->sopt_dir != SOPT_GET || sopt->sopt_name != IPV6_FW_GET)) + return (EPERM); + + /* We ALWAYS expect the client to pass in a rule structure so that we can + * check the version of the API that they are using. In the case of a + * IPV6_FW_GET operation, the first rule of the output buffer passed to us + * must have the version set. */ + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof rule) return EINVAL; + + /* save sopt->sopt_valsize */ + valsize = sopt->sopt_valsize; + if (error = sooptcopyin(sopt, &rule, sizeof(rule), sizeof(rule))) + return error; + + if (rule.version != IPV6_FW_CURRENT_API_VERSION) return EINVAL; + rule.version = 0xFFFFFFFF; /* version is meaningless once rules "make it in the door". */ + + switch (sopt->sopt_name) + { + case IPV6_FW_GET: + { + struct ip6_fw_chain *fcp; + struct ip6_fw *buf; + size_t size = 0; + + spl = splnet(); + LIST_FOREACH(fcp, &ip6_fw_chain, chain) + size += sizeof *buf; + + buf = _MALLOC(size, M_TEMP, M_WAITOK); + if (!buf) error = ENOBUFS; + else + { + struct ip6_fw *bp = buf; + LIST_FOREACH(fcp, &ip6_fw_chain, chain) + { + bcopy(fcp->rule, bp, sizeof *bp); + bp->version = IPV6_FW_CURRENT_API_VERSION; + bp++; + } + } + + splx(spl); + if (buf) + { + sopt->sopt_valsize = valsize; + error = sooptcopyout(sopt, buf, size); + FREE(buf, M_TEMP); + } + + break; + } + + case IPV6_FW_FLUSH: + spl = splnet(); + while (ip6_fw_chain.lh_first && + ip6_fw_chain.lh_first->rule->fw_number != (u_short)-1) + { + struct ip6_fw_chain *fcp = ip6_fw_chain.lh_first; + LIST_REMOVE(ip6_fw_chain.lh_first, chain); + FREE(fcp->rule, M_IP6FW); + FREE(fcp, M_IP6FW); + } + splx(spl); + break; + + case IPV6_FW_ZERO: + error = zero_entry6(&rule); + break; + + case IPV6_FW_ADD: + if (check_ip6fw_struct(&rule)) + error = add_entry6(&ip6_fw_chain, &rule); + else + error = EINVAL; + break; + + case IPV6_FW_DEL: + if (rule.fw_number == (u_short)-1) + { + dprintf(("%s can't delete rule 65535\n", err_prefix)); + error = EINVAL; + } + else + error = del_entry6(&ip6_fw_chain, rule.fw_number); + break; + + default: + dprintf(("%s invalid option %d\n", err_prefix, sopt->sopt_name)); + error = EINVAL; + } + + return error; +} + +void +ip6_fw_init(void) +{ + struct ip6_fw default_rule; + + ip6_fw_chk_ptr = ip6_fw_chk; + ip6_fw_ctl_ptr = ip6_fw_ctl; + LIST_INIT(&ip6_fw_chain); + + bzero(&default_rule, sizeof default_rule); + default_rule.fw_prot = IPPROTO_IPV6; + default_rule.fw_number = (u_short)-1; +#ifdef IPV6FIREWALL_DEFAULT_TO_ACCEPT + default_rule.fw_flg |= IPV6_FW_F_ACCEPT; +#else + default_rule.fw_flg |= IPV6_FW_F_DENY; +#endif + default_rule.fw_flg |= IPV6_FW_F_IN | IPV6_FW_F_OUT; + if (check_ip6fw_struct(&default_rule) == NULL || + add_entry6(&ip6_fw_chain, &default_rule)) + panic(__FUNCTION__); + + printf("IPv6 packet filtering initialized, "); +#ifdef IPV6FIREWALL_DEFAULT_TO_ACCEPT + printf("default to accept, "); +#endif +#ifndef IPV6FIREWALL_VERBOSE + printf("logging disabled\n"); +#else + if (fw6_verbose_limit == 0) + printf("unlimited logging\n"); + else + printf("logging limited to %d packets/entry\n", + fw6_verbose_limit); +#endif +} + diff --git a/bsd/netinet6/ip6_fw.h b/bsd/netinet6/ip6_fw.h index ed1a10c89..cfb2c4ab3 100644 --- a/bsd/netinet6/ip6_fw.h +++ b/bsd/netinet6/ip6_fw.h @@ -207,8 +207,7 @@ struct ip6_fw_chain { /* * Main firewall chains definitions and global var's definitions. */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define M_IP6FW M_IPFW @@ -221,14 +220,13 @@ void ip6_fw_init(void); /* Firewall hooks */ struct ip6_hdr; struct sockopt; -typedef int ip6_fw_chk_t __P((struct ip6_hdr**, struct ifnet*, - u_short *, struct mbuf**)); -typedef int ip6_fw_ctl_t __P((struct sockopt *)); +typedef int ip6_fw_chk_t(struct ip6_hdr**, struct ifnet*, + u_short *, struct mbuf**); +typedef int ip6_fw_ctl_t(struct sockopt *); extern ip6_fw_chk_t *ip6_fw_chk_ptr; extern ip6_fw_ctl_t *ip6_fw_ctl_ptr; extern int ip6_fw_enable; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /* _IP6_FW_H */ diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index 2dc986a66..ccfce46ed 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -79,13 +79,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -108,10 +108,13 @@ #include #endif extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif #include +#include + #include /* we need it for NLOOP. */ @@ -125,11 +128,7 @@ extern struct ip6protosw inet6sw[]; struct ip6protosw * ip6_protox[IPPROTO_MAX]; static int ip6qmaxlen = IFQ_MAXLEN; -struct in6_ifaddr *in6_ifaddr; - -extern void in6_tmpaddrtimer_funneled(void *); -extern void nd6_timer_funneled(void *); -extern void in6_rr_timer_funneled(void *); +struct in6_ifaddr *in6_ifaddrs; int ip6_forward_srcrt; /* XXX */ int ip6_sourcecheck; /* XXX */ @@ -149,22 +148,39 @@ struct ip6stat ip6stat; #ifdef __APPLE__ struct ifqueue ip6intrq; +lck_mtx_t *ip6_mutex; +lck_mtx_t *dad6_mutex; +lck_mtx_t *nd6_mutex; +lck_mtx_t *prefix6_mutex; +lck_attr_t *ip6_mutex_attr; +lck_grp_t *ip6_mutex_grp; +lck_grp_attr_t *ip6_mutex_grp_attr; +extern lck_mtx_t *inet6_domain_mutex; #endif +extern int loopattach_done; -static void ip6_init2 __P((void *)); -static struct mbuf *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *)); +static void ip6_init2(void *); +static struct mbuf *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); -static int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *)); +static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #if PULLDOWN_TEST -static struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int)); +static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); #endif #ifdef __APPLE__ -void gifattach __P((void)); -void faithattach __P((void)); -void stfattach __P((void)); +void gifattach(void); +void faithattach(void); +void stfattach(void); #endif +static void +ip6_proto_input( + protocol_family_t protocol, + mbuf_t packet) +{ + ip6_input(packet); +} + /* * IP6 initialization: fill in IP6 protocol switch table. * All protocols not implemented in kernel go to raw IP6 protocol handler. @@ -175,12 +191,13 @@ ip6_init() struct ip6protosw *pr; int i; struct timeval tv; + extern lck_mtx_t *domain_proto_mtx; #if DIAGNOSTIC if (sizeof(struct protosw) != sizeof(struct ip6protosw)) panic("sizeof(protosw) != sizeof(ip6protosw)"); #endif - pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); + pr = (struct ip6protosw *)pffindproto_locked(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip6_init"); for (i = 0; i < IPPROTO_MAX; i++) @@ -193,10 +210,34 @@ ip6_init() } } + ip6_mutex_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(ip6_mutex_grp_attr); + + ip6_mutex_grp = lck_grp_alloc_init("ip6", ip6_mutex_grp_attr); + ip6_mutex_attr = lck_attr_alloc_init(); + lck_attr_setdefault(ip6_mutex_attr); + + if ((ip6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { + printf("ip6_init: can't alloc ip6_mutex\n"); + return; + } + if ((dad6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { + printf("ip6_init: can't alloc dad6_mutex\n"); + return; + } + if ((nd6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { + printf("ip6_init: can't alloc nd6_mutex\n"); + return; + } + + if ((prefix6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { + printf("ip6_init: can't alloc prefix6_mutex\n"); + return; + } + + inet6domain.dom_flags = DOM_REENTRANT; + ip6intrq.ifq_maxlen = ip6qmaxlen; -#ifndef __APPLE__ - register_netisr(NETISR_IPV6, ip6intr); -#endif nd6_init(); frag6_init(); icmp6_init(); @@ -208,32 +249,36 @@ ip6_init() ip6_flow_seq = random() ^ tv.tv_usec; microtime(&tv); ip6_desync_factor = (random() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR; - timeout(ip6_init2, (caddr_t)0, 2 * hz); + timeout(ip6_init2, (caddr_t)0, 1 * hz); + + lck_mtx_unlock(domain_proto_mtx); + proto_register_input(PF_INET6, ip6_proto_input, NULL); + lck_mtx_lock(domain_proto_mtx); } static void ip6_init2(dummy) void *dummy; { -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif /* * to route local address of p2p link to loopback, * assign loopback address first. */ + if (loopattach_done == 0) { + timeout(ip6_init2, (caddr_t)0, 1 * hz); + return; + } in6_ifattach(&loif[0], NULL, NULL); #ifdef __APPLE__ /* nd6_timer_init */ - timeout(nd6_timer_funneled, (caddr_t)0, hz); + timeout(nd6_timer, (caddr_t)0, hz); /* router renumbering prefix list maintenance */ - timeout(in6_rr_timer_funneled, (caddr_t)0, hz); + timeout(in6_rr_timer, (caddr_t)0, hz); /* timer for regeneranation of temporary addresses randomize ID */ - timeout(in6_tmpaddrtimer_funneled, (caddr_t)0, + timeout(in6_tmpaddrtimer, (caddr_t)0, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz); @@ -264,9 +309,6 @@ ip6_init2(dummy) #endif in6_init2done = 1; -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif } #if __FreeBSD__ @@ -275,25 +317,6 @@ ip6_init2(dummy) SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); #endif -/* - * IP6 input interrupt handling. Just pass the packet to ip6_input. - */ -void -ip6intr(void) -{ - int s; - struct mbuf *m; - - for (;;) { - s = splimp(); - IF_DEQUEUE(&ip6intrq, m); - splx(s); - if (m == 0) - return; - ip6_input(m); - } -} - extern struct route_in6 ip6_forward_rt; void @@ -306,7 +329,22 @@ ip6_input(m) u_int32_t rtalert = ~0; int nxt = 0, ours = 0; struct ifnet *deliverifp = NULL; + ipfilter_t inject_ipfref = 0; + int seen; + /* + * No need to proccess packet twice if we've + * already seen it + */ + inject_ipfref = ipf_get_inject_filter(m); + if (inject_ipfref != 0) { + ip6 = mtod(m, struct ip6_hdr *); + nxt = ip6->ip6_nxt; + seen = 0; + goto injectit; + } else + seen = 1; + #if IPSEC /* * should the inner packet be considered authentic? @@ -323,6 +361,7 @@ ip6_input(m) */ ip6_delaux(m); + lck_mtx_lock(ip6_mutex); /* * mbuf statistics */ @@ -369,6 +408,7 @@ ip6_input(m) } if (n == NULL) { m_freem(m); + lck_mtx_unlock(ip6_mutex); return; /*ENOBUFS*/ } @@ -377,7 +417,8 @@ ip6_input(m) m_freem(m); m = n; } - IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /*nothing*/); + IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), + {lck_mtx_unlock(ip6_mutex); return;}); #endif if (m->m_len < sizeof(struct ip6_hdr)) { @@ -386,6 +427,7 @@ ip6_input(m) if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) { ip6stat.ip6s_toosmall++; in6_ifstat_inc(inifp, ifs6_in_hdrerr); + lck_mtx_unlock(ip6_mutex); return; } } @@ -411,8 +453,10 @@ ip6_input(m) m_freem(m); m = NULL; } - if (!m) + if (!m) { + lck_mtx_unlock(ip6_mutex); return; + } } /* @@ -502,12 +546,15 @@ ip6_input(m) */ if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) != 0 && IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { - if (!in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst)) { + struct in6_ifaddr *ia6; + if (!(ia6 = in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst))) { + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); /* m is already freed */ return; } + ifafree(&ia6->ia_ifa); ours = 1; deliverifp = m->m_pkthdr.rcvif; @@ -647,7 +694,6 @@ ip6_input(m) "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst))); - goto bad; } } @@ -713,6 +759,7 @@ ip6_input(m) #if 0 /*touches NULL pointer*/ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); #endif + lck_mtx_unlock(ip6_mutex); return; /* m have already been freed */ } @@ -733,6 +780,7 @@ ip6_input(m) ip6stat.ip6s_badoptions++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); @@ -746,6 +794,7 @@ ip6_input(m) sizeof(struct ip6_hbh)); if (hbh == NULL) { ip6stat.ip6s_tooshort++; + lck_mtx_unlock(ip6_mutex); return; } #endif @@ -794,14 +843,17 @@ ip6_input(m) if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { ip6stat.ip6s_cantforward++; m_freem(m); + lck_mtx_unlock(ip6_mutex); return; } if (!ours) { m_freem(m); + lck_mtx_unlock(ip6_mutex); return; } } else if (!ours) { - ip6_forward(m, 0); + ip6_forward(m, 0, 1); + lck_mtx_unlock(ip6_mutex); return; } @@ -828,12 +880,17 @@ ip6_input(m) */ ip6stat.ip6s_delivered++; in6_ifstat_inc(deliverifp, ifs6_in_deliver); + + lck_mtx_unlock(ip6_mutex); +injectit: nest = 0; while (nxt != IPPROTO_DONE) { + struct ipfilter *filter; + if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { ip6stat.ip6s_toomanyhdr++; - goto bad; + goto badunlocked; } /* @@ -843,7 +900,7 @@ ip6_input(m) if (m->m_pkthdr.len < off) { ip6stat.ip6s_tooshort++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); - goto bad; + goto badunlocked; } #if 0 @@ -868,18 +925,58 @@ ip6_input(m) * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if ((ipsec_bypass == 0) && (ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0 && - ipsec6_in_reject(m, NULL)) { - ipsec6stat.in_polvio++; - goto bad; + if ((ipsec_bypass == 0) && (ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject(m, NULL)) { + ipsec6stat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + goto badunlocked; + } + lck_mtx_unlock(sadb_mutex); } #endif - nxt = (*ip6_protox[nxt]->pr_input)(&m, &off); + /* + * Call IP filter on last header only + */ + if ((ip6_protox[nxt]->pr_flags & PR_LASTHDR) != 0 && !TAILQ_EMPTY(&ipv6_filters)) { + ipf_ref(); + TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { + if (seen == 0) { + if ((struct ipfilter *)inject_ipfref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_input) { + errno_t result; + + result = filter->ipf_filter.ipf_input( + filter->ipf_filter.cookie, (mbuf_t*)&m, off, nxt); + if (result == EJUSTRETURN) { + ipf_unref(); + return; + } + if (result != 0) { + ipf_unref(); + m_freem(m); + return; + } + } + } + ipf_unref(); + } + if (!(ip6_protox[nxt]->pr_flags & PR_PROTOLOCK)) { + lck_mtx_lock(inet6_domain_mutex); + nxt = (*ip6_protox[nxt]->pr_input)(&m, &off); + lck_mtx_unlock(inet6_domain_mutex); + } + else + nxt = (*ip6_protox[nxt]->pr_input)(&m, &off); } return; bad: + lck_mtx_unlock(ip6_mutex); + badunlocked: m_freem(m); + return; } /* @@ -930,11 +1027,11 @@ ip6_hopopts_input(plenp, rtalertp, mp, offp) /* validation of the length of the header */ #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); + IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), return -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); hbhlen = (hbh->ip6h_len + 1) << 3; - IP6_EXTHDR_CHECK(m, off, hbhlen, -1); + IP6_EXTHDR_CHECK(m, off, hbhlen, return -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, @@ -1009,9 +1106,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) } if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { /* XXX stat */ + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); + lck_mtx_lock(ip6_mutex); return(-1); } optlen = IP6OPT_RTALERT_LEN; @@ -1026,9 +1125,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) } if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { /* XXX stat */ + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); + lck_mtx_lock(ip6_mutex); return(-1); } optlen = IP6OPT_JUMBO_LEN; @@ -1040,9 +1141,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) ip6 = mtod(m, struct ip6_hdr *); if (ip6->ip6_plen) { ip6stat.ip6s_badoptions++; + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt - opthead); + lck_mtx_lock(ip6_mutex); return(-1); } @@ -1064,9 +1167,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) */ if (*plenp != 0) { ip6stat.ip6s_badoptions++; + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); + lck_mtx_lock(ip6_mutex); return(-1); } #endif @@ -1076,9 +1181,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) */ if (jumboplen <= IPV6_MAXPACKET) { ip6stat.ip6s_badoptions++; + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); + lck_mtx_lock(ip6_mutex); return(-1); } *plenp = jumboplen; @@ -1090,9 +1197,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) goto bad; } optlen = ip6_unknown_opt(opt, m, - erroff + opt - opthead); - if (optlen == -1) + erroff + opt - opthead, 1); + if (optlen == -1) { + /* ip6_unknown opt unlocked ip6_mutex */ return(-1); + } optlen += 2; break; } @@ -1100,7 +1209,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) return(0); - bad: + bad: m_freem(m); return(-1); } @@ -1112,10 +1221,11 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) * is not continuous in order to return an ICMPv6 error. */ int -ip6_unknown_opt(optp, m, off) +ip6_unknown_opt(optp, m, off, locked) u_int8_t *optp; struct mbuf *m; int off; + int locked; { struct ip6_hdr *ip6; @@ -1127,7 +1237,11 @@ ip6_unknown_opt(optp, m, off) return(-1); case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ ip6stat.ip6s_badoptions++; + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); + if (locked) + lck_mtx_lock(ip6_mutex); return(-1); case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ ip6stat.ip6s_badoptions++; @@ -1135,9 +1249,14 @@ ip6_unknown_opt(optp, m, off) if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || (m->m_flags & (M_BCAST|M_MCAST))) m_freem(m); - else + else { + if (locked) + lck_mtx_unlock(ip6_mutex); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); + if (locked) + lck_mtx_lock(ip6_mutex); + } return(-1); } @@ -1162,17 +1281,8 @@ ip6_savecontrol(in6p, mp, ip6, m) struct ip6_hdr *ip6; struct mbuf *m; { - struct proc *p = current_proc(); /* XXX */ - int privileged = 0; int rthdr_exist = 0; -#ifdef __APPLE__ - if (p && !suser(p->p_ucred, &p->p_acflag)) -#else - if (p && !suser(p)) -#endif - privileged++; - #if SO_TIMESTAMP if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) { struct timeval tv; @@ -1211,12 +1321,13 @@ ip6_savecontrol(in6p, mp, ip6, m) } /* - * IPV6_HOPOPTS socket option. We require super-user privilege - * for the option, but it might be too strict, since there might - * be some hop-by-hop options which can be returned to normal user. + * IPV6_HOPOPTS socket option. Recall that we required super-user + * privilege for the option (see ip6_ctloutput), but it might be too + * strict, since there might be some hop-by-hop options which can be + * returned to normal user. * See RFC 2292 section 6. */ - if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0 && privileged) { + if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) { /* * Check if a hop-by-hop options header is contatined in the * received packet, and if so, store the options as ancillary @@ -1224,7 +1335,7 @@ ip6_savecontrol(in6p, mp, ip6, m) * just after the IPv6 header, which fact is assured through * the IPv6 input processing. */ - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + ip6 = mtod(m, struct ip6_hdr *); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { struct ip6_hbh *hbh; int hbhlen = 0; @@ -1300,7 +1411,7 @@ ip6_savecontrol(in6p, mp, ip6, m) if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) { - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + ip6 = mtod(m, struct ip6_hdr *); int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); /* @@ -1364,14 +1475,6 @@ ip6_savecontrol(in6p, mp, ip6, m) if ((in6p->in6p_flags & IN6P_DSTOPTS) == 0) break; - /* - * We also require super-user privilege for - * the option. - * See the comments on IN6_HOPOPTS. - */ - if (!privileged) - break; - *mp = sbcreatecontrol((caddr_t)ip6e, elen, IPV6_DSTOPTS, IPPROTO_IPV6); @@ -1565,7 +1668,8 @@ ip6_nexthdr(m, off, proto, nxtp) if (m->m_pkthdr.len < off + sizeof(fh)) return -1; m_copydata(m, off, sizeof(fh), (caddr_t)&fh); - if ((ntohs(fh.ip6f_offlg) & IP6F_OFF_MASK) != 0) + /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ + if (fh.ip6f_offlg & IP6F_OFF_MASK) return -1; if (nxtp) *nxtp = fh.ip6f_nxt; diff --git a/bsd/netinet6/ip6_mroute.c b/bsd/netinet6/ip6_mroute.c index 595cf6c91..84e1a08ad 100644 --- a/bsd/netinet6/ip6_mroute.c +++ b/bsd/netinet6/ip6_mroute.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -78,15 +79,16 @@ static MALLOC_DEFINE(M_MRTABLE, "mf6c", "multicast forwarding cache entry"); #define M_HASCL(m) ((m)->m_flags & M_EXT) -static int ip6_mdq __P((struct mbuf *, struct ifnet *, struct mf6c *)); -static void phyint_send __P((struct ip6_hdr *, struct mif6 *, struct mbuf *)); +static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); +static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); -static int set_pim6 __P((int *)); -static int socket_send __P((struct socket *, struct mbuf *, - struct sockaddr_in6 *)); -static int register_send __P((struct ip6_hdr *, struct mif6 *, - struct mbuf *)); +static int set_pim6(int *); +static int socket_send(struct socket *, struct mbuf *, + struct sockaddr_in6 *); +static int register_send(struct ip6_hdr *, struct mif6 *, + struct mbuf *); +extern lck_mtx_t *ip6_mutex; /* * Globals. All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static, * except for netstat or debugging purposes. @@ -112,8 +114,7 @@ u_int mrt6debug = 0; /* debug level */ #define DEBUG_PIM 0x40 #endif -static void expire_upcalls __P((void *)); -static void expire_upcalls_funneled __P((void *)); +static void expire_upcalls(void *); #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ @@ -124,7 +125,6 @@ extern struct socket *ip_mrouter; #endif #endif -static u_long lo_dl_tag = 0; /* * 'Interfaces' associated with decapsulator (so we can tell * packets that went through it from ones that get reflected @@ -208,13 +208,13 @@ u_long upcall_data[UPCALL_MAX + 1]; static void collate(); #endif /* UPCALL_TIMING */ -static int get_sg_cnt __P((struct sioc_sg_req6 *)); -static int get_mif6_cnt __P((struct sioc_mif_req6 *)); -static int ip6_mrouter_init __P((struct socket *, struct mbuf *, int)); -static int add_m6if __P((struct mif6ctl *)); -static int del_m6if __P((mifi_t *)); -static int add_m6fc __P((struct mf6cctl *)); -static int del_m6fc __P((struct mf6cctl *)); +static int get_sg_cnt(struct sioc_sg_req6 *); +static int get_mif6_cnt(struct sioc_mif_req6 *); +static int ip6_mrouter_init(struct socket *, int, int); +static int add_m6if(struct mif6ctl *); +static int del_m6if(mifi_t *); +static int add_m6fc(struct mf6cctl *); +static int del_m6fc(struct mf6cctl *); #ifndef __APPLE__ static struct callout expire_upcalls_ch; @@ -227,53 +227,66 @@ ip6_mrouter_set(so, sopt) struct socket *so; struct sockopt *sopt; { - int error = 0; - struct mbuf *m; + int error = 0; + int optval; + struct mif6ctl mifc; + struct mf6cctl mfcc; + mifi_t mifi; if (so != ip6_mrouter && sopt->sopt_name != MRT6_INIT) return (EACCES); - if (sopt->sopt_valsize > MCLBYTES) - return (EMSGSIZE); - - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ - return (error); - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - return (error); - switch (sopt->sopt_name) { case MRT6_INIT: #if MRT6_OINIT case MRT6_OINIT: #endif - error = ip6_mrouter_init(so, m, sopt->sopt_name); + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + error = ip6_mrouter_init(so, optval, sopt->sopt_name); break; case MRT6_DONE: error = ip6_mrouter_done(); break; case MRT6_ADD_MIF: - error = add_m6if(mtod(m, struct mif6ctl *)); - break; - case MRT6_DEL_MIF: - error = del_m6if(mtod(m, mifi_t *)); + error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc)); + if (error) + break; + error = add_m6if(&mifc); break; case MRT6_ADD_MFC: - error = add_m6fc(mtod(m, struct mf6cctl *)); + error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); + if (error) + break; + error = add_m6fc(&mfcc); break; case MRT6_DEL_MFC: - error = del_m6fc(mtod(m, struct mf6cctl *)); + error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc)); + if (error) + break; + error = del_m6fc(&mfcc); + break; + case MRT6_DEL_MIF: + error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi)); + if (error) + break; + error = del_m6if(&mifi); break; case MRT6_PIM: - error = set_pim6(mtod(m, int *)); + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + error = set_pim6(&optval); break; default: error = EOPNOTSUPP; break; } - (void)m_freem(m); - return(error); + return (error); } /* @@ -328,11 +341,8 @@ get_sg_cnt(req) struct sioc_sg_req6 *req; { struct mf6c *rt; - int s; - s = splnet(); MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt); - splx(s); if (rt != NULL) { req->pktcnt = rt->mf6c_pkt_cnt; req->bytecnt = rt->mf6c_byte_cnt; @@ -382,13 +392,11 @@ set_pim6(i) * Enable multicast routing */ static int -ip6_mrouter_init(so, m, cmd) +ip6_mrouter_init(so, v, cmd) struct socket *so; - struct mbuf *m; + int v; int cmd; { - int *v; - #if MRT6DEBUG if (mrt6debug) log(LOG_DEBUG, @@ -400,12 +408,8 @@ ip6_mrouter_init(so, m, cmd) so->so_proto->pr_protocol != IPPROTO_ICMPV6) return EOPNOTSUPP; - if (!m || (m->m_len != sizeof(int *))) - return ENOPROTOOPT; - - v = mtod(m, int *); - if (*v != 1) - return ENOPROTOOPT; + if (v != 1) + return (ENOPROTOOPT); if (ip6_mrouter != NULL) return EADDRINUSE; @@ -421,7 +425,7 @@ ip6_mrouter_init(so, m, cmd) callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); #else - timeout(expire_upcalls_funneled, (caddr_t)NULL, EXPIRE_TIMEOUT); + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); #endif #if MRT6DEBUG @@ -444,9 +448,7 @@ ip6_mrouter_done() struct in6_ifreq ifr; struct mf6c *rt; struct rtdetq *rte; - int s; - s = splnet(); /* * For each phyint in use, disable promiscuous reception of all IPv6 @@ -492,11 +494,12 @@ ip6_mrouter_done() #ifndef __APPLE__ callout_stop(&expire_upcalls_ch); #else - untimeout(expire_upcalls_funneled, (caddr_t)NULL); + untimeout(expire_upcalls, (caddr_t)NULL); #endif /* * Free all multicast forwarding cache entries. + *###LD 5/27 needs locking */ for (i = 0; i < MF6CTBLSIZ; i++) { rt = mf6ctable[i]; @@ -526,7 +529,6 @@ ip6_mrouter_done() ip6_mrouter = NULL; ip6_mrouter_ver = 0; - splx(s); #if MRT6DEBUG if (mrt6debug) @@ -547,7 +549,7 @@ add_m6if(mifcp) { struct mif6 *mifp; struct ifnet *ifp; - int error, s; + int error; #if notyet struct tbf *m_tbf = tbftable + mifcp->mif6c_mifi; #endif @@ -577,14 +579,11 @@ add_m6if(mifcp) if ((ifp->if_flags & IFF_MULTICAST) == 0) return EOPNOTSUPP; - s = splnet(); error = if_allmulti(ifp, 1); - splx(s); if (error) return error; } - s = splnet(); mifp->m6_flags = mifcp->mif6c_flags; mifp->m6_ifp = ifp; #if notyet @@ -596,7 +595,6 @@ add_m6if(mifcp) mifp->m6_pkt_out = 0; mifp->m6_bytes_in = 0; mifp->m6_bytes_out = 0; - splx(s); /* Adjust nummifs up if the mifi is higher than nummifs */ if (nummifs <= mifcp->mif6c_mifi) @@ -623,14 +621,12 @@ del_m6if(mifip) struct mif6 *mifp = mif6table + *mifip; mifi_t mifi; struct ifnet *ifp; - int s; if (*mifip >= nummifs) return EINVAL; if (mifp->m6_ifp == NULL) return EINVAL; - s = splnet(); if (!(mifp->m6_flags & MIFF_REGISTER)) { /* @@ -646,7 +642,7 @@ del_m6if(mifip) bzero((caddr_t)qtable[*mifip], sizeof(qtable[*mifip])); bzero((caddr_t)mifp->m6_tbf, sizeof(*(mifp->m6_tbf))); #endif - bzero((caddr_t)mifp, sizeof (*mifp)); + bzero((caddr_t)mifp, sizeof(*mifp)); /* Adjust nummifs down */ for (mifi = nummifs; mifi > 0; mifi--) @@ -654,7 +650,6 @@ del_m6if(mifip) break; nummifs = mifi; - splx(s); #if MRT6DEBUG if (mrt6debug) @@ -691,17 +686,14 @@ add_m6fc(mfccp) mfccp->mf6cc_parent); #endif - s = splnet(); rt->mf6c_parent = mfccp->mf6cc_parent; rt->mf6c_ifset = mfccp->mf6cc_ifset; - splx(s); return 0; } /* * Find the entry for which the upcall was made and update */ - s = splnet(); hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr, mfccp->mf6cc_mcastgrp.sin6_addr); for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) { @@ -815,7 +807,6 @@ add_m6fc(mfccp) mf6ctable[hash] = rt; } } - splx(s); return 0; } @@ -858,7 +849,6 @@ del_m6fc(mfccp) struct mf6c *rt; struct mf6c **nptr; u_long hash; - int s; origin = mfccp->mf6cc_origin; mcastgrp = mfccp->mf6cc_mcastgrp; @@ -871,7 +861,6 @@ del_m6fc(mfccp) ip6_sprintf(&mcastgrp.sin6_addr)); #endif - s = splnet(); nptr = &mf6ctable[hash]; while ((rt = *nptr) != NULL) { @@ -885,14 +874,12 @@ del_m6fc(mfccp) nptr = &rt->mf6c_next; } if (rt == NULL) { - splx(s); return EADDRNOTAVAIL; } *nptr = rt->mf6c_next; FREE(rt, M_MRTABLE); - splx(s); return 0; } @@ -903,15 +890,16 @@ socket_send(s, mm, src) struct mbuf *mm; struct sockaddr_in6 *src; { +//### LD 5/27/04 needs locking! +// if (s) { if (sbappendaddr(&s->so_rcv, (struct sockaddr *)src, - mm, (struct mbuf *)0) != 0) { + mm, (struct mbuf *)0, NULL) != 0) { sorwakeup(s); return 0; } } - m_freem(mm); return -1; } @@ -937,6 +925,7 @@ ip6_mforward(ip6, ifp, m) struct mbuf *mm; int s; mifi_t mifi; + struct timeval timenow; #if MRT6DEBUG if (mrt6debug & DEBUG_FORWARD) @@ -961,10 +950,11 @@ ip6_mforward(ip6, ifp, m) * MLD packets can be sent with the unspecified source address * (although such packets must normally set 1 to the hop limit field). */ + getmicrotime(&timenow); if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; - if (ip6_log_time + ip6_log_interval < time_second) { - ip6_log_time = time_second; + if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { + ip6_log_time = timenow.tv_sec; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -1188,20 +1178,6 @@ ip6_mforward(ip6, ifp, m) } } -static void -expire_upcalls_funneled(unused) - void *unused; -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - expire_upcalls(unused); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} - /* * Clean up cache entries if upcalls are not serviced * Call from the Slow Timeout mechanism, every half second. @@ -1262,7 +1238,7 @@ expire_upcalls(unused) callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); #else - timeout(expire_upcalls_funneled, (caddr_t)NULL, EXPIRE_TIMEOUT); + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); #endif } @@ -1483,7 +1459,8 @@ phyint_send(ip6, mifp, m) im6o.im6o_multicast_hlim = ip6->ip6_hlim; im6o.im6o_multicast_loop = 1; error = ip6_output(mb_copy, NULL, &ro, - IPV6_FORWARDING, &im6o, NULL); + IPV6_FORWARDING, &im6o, NULL, 0); + #if MRT6DEBUG if (mrt6debug & DEBUG_XMIT) @@ -1521,12 +1498,14 @@ phyint_send(ip6, mifp, m) #ifdef __APPLE__ /* Make sure the HW checksum flags are cleaned before sending the packet */ - mb_copy->m_pkthdr.rcvif = (struct ifnet *)0; + mb_copy->m_pkthdr.rcvif = 0; mb_copy->m_pkthdr.csum_data = 0; mb_copy->m_pkthdr.csum_flags = 0; - error = dlil_output(ifptodlt(ifp, PF_INET6), mb_copy, + lck_mtx_unlock(ip6_mutex); + error = dlil_output(ifp, PF_INET6, mb_copy, NULL, (struct sockaddr *)&ro.ro_dst, 0); + lck_mtx_lock(ip6_mutex); #else error = (*ifp->if_output)(ifp, mb_copy, (struct sockaddr *)&ro.ro_dst, @@ -1679,7 +1658,7 @@ pim6_input(mp, offp) * possibly the PIM REGISTER header */ #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, minlen, return IPPROTO_DONE); /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); @@ -1847,11 +1826,11 @@ pim6_input(mp, offp) #ifdef __APPLE__ - if (lo_dl_tag == 0) - dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag); - - if (lo_dl_tag) - dlil_output(lo_dl_tag, m, 0, (struct sockaddr *)&dst, 0); + if (lo_ifp) { + lck_mtx_unlock(ip6_mutex); + dlil_output(lo_ifp, PF_INET6, m, 0, (struct sockaddr *)&dst, 0); + lck_mtx_lock(ip6_mutex); + } else { printf("Warning: pim6_input call to dlil_find_dltag failed!\n"); m_freem(m); diff --git a/bsd/netinet6/ip6_mroute.h b/bsd/netinet6/ip6_mroute.h index 5bae8b74e..f38b57753 100644 --- a/bsd/netinet6/ip6_mroute.h +++ b/bsd/netinet6/ip6_mroute.h @@ -51,7 +51,7 @@ /* * Multicast Routing set/getsockopt commands. */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE #define MRT6_OINIT 100 /* initialize forwarder (omrt6msg) */ #endif #define MRT6_DONE 101 /* shut down forwarder */ @@ -62,12 +62,8 @@ #define MRT6_PIM 107 /* enable pim code */ #define MRT6_INIT 108 /* initialize forwarder (mrt6msg) */ -#if BSD >= 199103 -#define GET_TIME(t) microtime(&t) -#elif defined(sun) -#define GET_TIME(t) uniqtime(&t) -#else -#define GET_TIME(t) ((t) = time) +#ifdef __APPLE__ +#define GET_TIME(t) getmicrotime(&t) #endif /* @@ -140,6 +136,7 @@ struct mrt6stat { u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ }; +#ifdef KERNEL_PRIVATE #if MRT6_OINIT /* * Struct used to communicate from kernel to multicast router @@ -160,6 +157,7 @@ struct omrt6msg { struct in6_addr im6_src, im6_dst; }; #endif +#endif KERNEL_PRIVATE /* * Structure used to communicate from kernel to multicast router. @@ -203,8 +201,7 @@ struct sioc_mif_req6 { u_quad_t obytes; /* Output byte count on mif */ }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE /* * The kernel's multicast-interface structure. */ @@ -247,6 +244,15 @@ struct mf6c { #define MF6C_INCOMPLETE_PARENT ((mifi_t)-1) +#define MF6CTBLSIZ 256 +#if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0 /* from sys:route.h */ +#define MF6CHASHMOD(h) ((h) & (MF6CTBLSIZ - 1)) +#else +#define MF6CHASHMOD(h) ((h) % MF6CTBLSIZ) +#endif + +#define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ + /* * Argument structure used for pkt info. while upcall is made */ @@ -261,20 +267,14 @@ struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ }; #endif /* _NETINET_IP_MROUTE_H_ */ -#define MF6CTBLSIZ 256 -#if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0 /* from sys:route.h */ -#define MF6CHASHMOD(h) ((h) & (MF6CTBLSIZ - 1)) -#else -#define MF6CHASHMOD(h) ((h) % MF6CTBLSIZ) -#endif - -#define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ +#ifdef KERNEL +extern struct mrt6stat mrt6stat; -int ip6_mrouter_set __P((struct socket *so, struct sockopt *sopt)); -int ip6_mrouter_get __P((struct socket *so, struct sockopt *sopt)); -int ip6_mrouter_done __P((void)); -int mrt6_ioctl __P((int, caddr_t)); -#endif /* __APPLE_API_PRIVATE */ +int ip6_mrouter_set(struct socket *so, struct sockopt *sopt); +int ip6_mrouter_get(struct socket *so, struct sockopt *sopt); +int ip6_mrouter_done(void); +int mrt6_ioctl(int, caddr_t); #endif /* KERNEL */ +#endif /* PRIVATE */ #endif /* !_NETINET6_IP6_MROUTE_H_ */ diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index f6023f234..c2bdcc28d 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include @@ -97,21 +98,23 @@ #endif #include extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; +extern lck_mtx_t *nd6_mutex; #endif /* IPSEC */ #include #include +#include + #ifndef __APPLE__ static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); #endif -static u_long lo_dl_tag = 0; extern u_long route_generation; - struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; @@ -120,19 +123,20 @@ struct ip6_exthdrs { struct mbuf *ip6e_dest2; }; -static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *, - struct socket *, struct sockopt *sopt)); -static int ip6_setmoptions __P((int, struct inpcb *, struct mbuf *)); -static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **)); -static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int)); -static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, - struct ip6_frag **)); -static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t)); -static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *)); +static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, + struct socket *, struct sockopt *sopt); +static int ip6_setmoptions(int, struct inpcb *, struct mbuf *); +static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **); +static int ip6_copyexthdr(struct mbuf **, caddr_t, int); +static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, + struct ip6_frag **); +static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); +static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); extern int ip_createmoptions(struct ip_moptions **imop); extern int ip_addmembership(struct ip_moptions *imo, struct ip_mreq *mreq); extern int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq); +extern lck_mtx_t *ip6_mutex; /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 @@ -146,13 +150,14 @@ extern int ip_dropmembership(struct ip_moptions *imo, struct ip_mreq *mreq); * which is rt_rmx.rmx_mtu. */ int -ip6_output(m0, opt, ro, flags, im6o, ifpp) - struct mbuf *m0; - struct ip6_pktopts *opt; - struct route_in6 *ro; - int flags; - struct ip6_moptions *im6o; - struct ifnet **ifpp; /* XXX: just for statistics */ +ip6_output( + struct mbuf *m0, + struct ip6_pktopts *opt, + struct route_in6 *ro, + int flags, + struct ip6_moptions *im6o, + struct ifnet **ifpp, /* XXX: just for statistics */ + int locked) { struct ip6_hdr *ip6, *mhip6; struct ifnet *ifp, *origifp; @@ -169,20 +174,26 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int needipsec = 0; + ipfilter_t inject_filter_ref; + #if IPSEC int needipsectun = 0; struct socket *so = NULL; struct secpolicy *sp = NULL; + if (!locked) + lck_mtx_lock(ip6_mutex); /* for AH processing. stupid to have "socket" variable in IP layer... */ if (ipsec_bypass == 0) { so = ipsec_getsocket(m); (void)ipsec_setsocket(m, NULL); } - ip6 = mtod(m, struct ip6_hdr *); #endif /* IPSEC */ + ip6 = mtod(m, struct ip6_hdr *); + inject_filter_ref = ipf_get_inject_filter(m); + #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ @@ -210,7 +221,8 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) #if IPSEC if (ipsec_bypass != 0) goto skip_ipsec; - + + lck_mtx_lock(sadb_mutex); /* get a security policy for this packet */ if (so == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); @@ -219,6 +231,7 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) if (sp == NULL) { ipsec6stat.out_inval++; + lck_mtx_unlock(sadb_mutex); goto freehdrs; } @@ -231,6 +244,7 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) * This packet is just discarded. */ ipsec6stat.out_polvio++; + lck_mtx_unlock(sadb_mutex); goto freehdrs; case IPSEC_POLICY_BYPASS: @@ -243,6 +257,7 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); + lck_mtx_unlock(sadb_mutex); goto freehdrs; } needipsec = 1; @@ -252,6 +267,7 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } + lck_mtx_unlock(sadb_mutex); skip_ipsec: #endif /* IPSEC */ @@ -361,6 +377,61 @@ ip6_output(m0, opt, ro, flags, im6o, ifpp) MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); + if (!TAILQ_EMPTY(&ipv6_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == 0); + int fixscope = 0; + struct ipf_pktopts *ippo = 0, ipf_pktopts; + + if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + ippo = &ipf_pktopts; + ippo->ippo_flags = IPPOF_MCAST_OPTS; + ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp; + ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim; + ippo->ippo_mcast_loop = im6o->im6o_multicast_loop; + } + + /* Hack: embed the scope_id in the destination */ + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && + (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) { + fixscope = 1; + ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id); + } + { + lck_mtx_unlock(ip6_mutex); + ipf_ref(); + TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { + /* + * No need to proccess packet twice if we've + * already seen it + */ + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output) { + errno_t result; + + result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + if (result == EJUSTRETURN) { + ipf_unref(); + locked = 1; /* Don't want to take lock to unlock it right away */ + goto done; + } + if (result != 0) { + ipf_unref(); + locked = 1; /* Don't want to take lock to unlock it right away */ + goto bad; + } + } + } + ipf_unref(); + lck_mtx_lock(ip6_mutex); + } + /* Hack: cleanup embedded scope_id if we put it there */ + if (fixscope) + ip6->ip6_dst.s6_addr16[1] = 0; + } + #if IPSEC if (!needipsec) goto skip_ipsec2; @@ -512,9 +583,10 @@ skip_ipsec2:; state.m = m; state.ro = (struct route *)ro; state.dst = (struct sockaddr *)dst; - + + lck_mtx_lock(sadb_mutex); error = ipsec6_output_tunnel(&state, sp, flags); - + lck_mtx_unlock(sadb_mutex); m = state.m; ro = (struct route_in6 *)state.ro; dst = (struct sockaddr_in6 *)state.dst; @@ -537,6 +609,7 @@ skip_ipsec2:; error = 0; break; } + lck_mtx_unlock(sadb_mutex); goto bad; } @@ -554,16 +627,18 @@ skip_ipsec2:; * if an interface is specified from an upper layer, * ifp must point it. */ + lck_mtx_lock(rt_mtx); if (ro->ro_rt == 0) { /* * non-bsdi always clone routes, if parent is * PRF_CLONING. */ - rtalloc((struct route *)ro); + rtalloc_ign_locked((struct route *)ro, 0UL); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; + lck_mtx_unlock(rt_mtx); /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */ goto bad; } @@ -572,6 +647,7 @@ skip_ipsec2:; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; + lck_mtx_unlock(rt_mtx); m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ in6_ifstat_inc(ifp, ifs6_out_request); @@ -652,12 +728,14 @@ skip_ipsec2:; * ``net'' ff00::/8). */ if (ifp == NULL) { + lck_mtx_lock(rt_mtx); if (ro->ro_rt == 0) { - ro->ro_rt = rtalloc1((struct sockaddr *) + ro->ro_rt = rtalloc1_locked((struct sockaddr *) &ro->ro_dst, 0, 0UL); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; + lck_mtx_unlock(rt_mtx); error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */ goto bad; @@ -665,6 +743,7 @@ skip_ipsec2:; ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; + lck_mtx_unlock(rt_mtx); } if ((flags & IPV6_FORWARDING) == 0) @@ -680,7 +759,9 @@ skip_ipsec2:; error = ENETUNREACH; goto bad; } + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); + ifnet_lock_done(ifp); if (in6m != NULL && (im6o == NULL || im6o->im6o_multicast_loop)) { /* @@ -836,7 +917,7 @@ skip_ipsec2:; u_short port = 0; m->m_pkthdr.rcvif = NULL; /* XXX */ /* If ipfw says divert, we have to just drop packet */ - if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) { + if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) { m_freem(m); goto done; } @@ -914,7 +995,7 @@ skip_ipsec2:; ipsec_delaux(m); #endif - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, 1); goto done; } else if (mtu < IPV6_MMTU) { /* @@ -1011,7 +1092,8 @@ skip_ipsec2:; } m_cat(m, m_frgpart); m->m_pkthdr.len = len + hlen + sizeof(*ip6f); - m->m_pkthdr.rcvif = (struct ifnet *)0; + m->m_pkthdr.rcvif = 0; + m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; @@ -1044,7 +1126,8 @@ sendorfree: /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif - error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); + error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, 1); + } else m_freem(m); } @@ -1053,6 +1136,8 @@ sendorfree: ip6stat.ip6s_fragmented++; done: + if (!locked) + lck_mtx_unlock(ip6_mutex); if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */ rtfree(ro->ro_rt); } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { @@ -1060,8 +1145,11 @@ done: } #if IPSEC - if (sp != NULL) + if (sp != NULL) { + lck_mtx_lock(sadb_mutex); key_freesp(sp); + lck_mtx_unlock(sadb_mutex); + } #endif /* IPSEC */ return(error); @@ -1267,6 +1355,7 @@ ip6_ctloutput(so, sopt) int optlen; struct proc *p; + level = error = optval = 0; if (sopt == NULL) panic("ip6_ctloutput: arg soopt is NULL"); else { @@ -1276,9 +1365,8 @@ ip6_ctloutput(so, sopt) optlen = sopt->sopt_valsize; p = sopt->sopt_p; } - error = optval = 0; - privileged = (p == 0 || suser(p->p_ucred, &p->p_acflag)) ? 0 : 1; + privileged = (p == 0 || proc_suser(p)) ? 0 : 1; if (level == IPPROTO_IPV6) { switch (op) { @@ -1485,10 +1573,10 @@ do { \ size_t len = 0; struct mbuf *m; - if (sopt->sopt_valsize > MCLBYTES) { - error = EMSGSIZE; - break; - } + if (sopt->sopt_valsize > MCLBYTES) { + error = EMSGSIZE; + break; + } if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ @@ -1497,8 +1585,10 @@ do { \ req = mtod(m, caddr_t); len = m->m_len; } + lck_mtx_lock(sadb_mutex); error = ipsec6_set_policy(in6p, optname, req, len, privileged); + lck_mtx_unlock(sadb_mutex); m_freem(m); } break; @@ -1583,8 +1673,8 @@ do { \ case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: - if (optname == IPV6_HOPOPTS || - optname == IPV6_DSTOPTS || + if ((optname == IPV6_HOPOPTS || + optname == IPV6_DSTOPTS) && !privileged) return(EPERM); switch (optname) { @@ -1636,10 +1726,10 @@ do { \ struct mbuf *m = NULL; struct mbuf **mp = &m; - if (sopt->sopt_valsize > MCLBYTES) { - error = EMSGSIZE; - break; - } + if (sopt->sopt_valsize > MCLBYTES) { + error = EMSGSIZE; + break; + } error = soopt_getm(sopt, &m); /* XXX */ if (error != NULL) break; @@ -1650,7 +1740,9 @@ do { \ req = mtod(m, caddr_t); len = m->m_len; } + lck_mtx_lock(sadb_mutex); error = ipsec6_get_policy(in6p, req, len, mp); + lck_mtx_unlock(sadb_mutex); if (error == 0) error = soopt_mcopyout(sopt, m); /*XXX*/ if (error == 0 && m) @@ -1720,7 +1812,7 @@ ip6_pcbopts(pktopt, m, so, sopt) } /* set options specified by user. */ - if (p && !suser(p->p_ucred, &p->p_acflag)) + if (p && !proc_suser(p)) priv = 1; if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) { ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */ @@ -1869,10 +1961,10 @@ ip6_freepcbopts(pktopt) * Set the IP6 multicast options in response to user setsockopt(). */ static int -ip6_setmoptions(optname, in6p, m) - int optname; - struct inpcb* in6p; - struct mbuf *m; +ip6_setmoptions( + int optname, + struct inpcb* in6p, + struct mbuf *m) { int error = 0; u_int loop, ifindex; @@ -2005,7 +2097,7 @@ ip6_setmoptions(optname, in6p, m) * all multicast addresses. Only super user is allowed * to do this. */ - if (suser(p->p_ucred, &p->p_acflag)) + if (suser(kauth_cred_get(), 0)) { error = EACCES; break; @@ -2022,12 +2114,14 @@ ip6_setmoptions(optname, in6p, m) ifp = ifindex2ifnet[mreq->ipv6mr_interface]; + lck_mtx_lock(rt_mtx); TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) { if (ifa->ia_ifp == ifp) { v4req.imr_interface = IA_SIN(ifa)->sin_addr; break; } } + lck_mtx_unlock(rt_mtx); if (v4req.imr_multiaddr.s_addr == 0) { /* Interface has no IPv4 address. */ @@ -2093,6 +2187,7 @@ ip6_setmoptions(optname, in6p, m) /* * See if the membership already exists. */ + lck_mtx_lock(nd6_mutex); for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) if (imm->i6mm_maddr->in6m_ifp == ifp && @@ -2101,6 +2196,7 @@ ip6_setmoptions(optname, in6p, m) break; if (imm != NULL) { error = EADDRINUSE; + lck_mtx_unlock(nd6_mutex); break; } /* @@ -2110,14 +2206,17 @@ ip6_setmoptions(optname, in6p, m) imm = _MALLOC(sizeof(*imm), M_IPMADDR, M_WAITOK); if (imm == NULL) { error = ENOBUFS; + lck_mtx_unlock(nd6_mutex); break; } if ((imm->i6mm_maddr = - in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) { + in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error, 1)) == NULL) { FREE(imm, M_IPMADDR); + lck_mtx_unlock(nd6_mutex); break; } LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); + lck_mtx_unlock(nd6_mutex); break; case IPV6_LEAVE_GROUP: @@ -2142,7 +2241,7 @@ ip6_setmoptions(optname, in6p, m) ifp = ifindex2ifnet[mreq->ipv6mr_interface]; if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { - if (suser(p->p_ucred, &p->p_acflag)) { + if (suser(kauth_cred_get(), 0)) { error = EACCES; break; } @@ -2155,12 +2254,14 @@ ip6_setmoptions(optname, in6p, m) if (ifp != NULL) { struct in_ifaddr *ifa; + lck_mtx_lock(rt_mtx); TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) { if (ifa->ia_ifp == ifp) { v4req.imr_interface = IA_SIN(ifa)->sin_addr; break; } } + lck_mtx_unlock(rt_mtx); } error = ip_dropmembership(imo, &v4req); @@ -2180,6 +2281,7 @@ ip6_setmoptions(optname, in6p, m) /* * Find the membership in the membership list. */ + lck_mtx_lock(nd6_mutex); for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) { if ((ifp == NULL || @@ -2191,6 +2293,7 @@ ip6_setmoptions(optname, in6p, m) if (imm == NULL) { /* Unable to resolve interface */ error = EADDRNOTAVAIL; + lck_mtx_unlock(nd6_mutex); break; } /* @@ -2198,7 +2301,8 @@ ip6_setmoptions(optname, in6p, m) * membership points. */ LIST_REMOVE(imm, i6mm_chain); - in6_delmulti(imm->i6mm_maddr); + in6_delmulti(imm->i6mm_maddr, 1); + lck_mtx_unlock(nd6_mutex); FREE(imm, M_IPMADDR); break; @@ -2210,6 +2314,7 @@ ip6_setmoptions(optname, in6p, m) /* * If all options have default values, no need to keep the mbuf. */ + lck_mtx_lock(nd6_mutex); if (im6o->im6o_multicast_ifp == NULL && im6o->im6o_multicast_hlim == ip6_defmcasthlim && im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && @@ -2225,6 +2330,7 @@ ip6_setmoptions(optname, in6p, m) ip_freemoptions(imo); in6p->inp_moptions = 0; } + lck_mtx_unlock(nd6_mutex); return(error); } @@ -2287,13 +2393,15 @@ ip6_freemoptions(im6o) if (im6o == NULL) return; - + + lck_mtx_lock(nd6_mutex); while ((imm = im6o->im6o_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); if (imm->i6mm_maddr) - in6_delmulti(imm->i6mm_maddr); + in6_delmulti(imm->i6mm_maddr, 1); FREE(imm, M_IPMADDR); } + lck_mtx_unlock(nd6_mutex); FREE(im6o, M_IPMOPTS); } @@ -2372,8 +2480,12 @@ ip6_setpktoptions(control, opt, priv, needcopy) ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6)); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | - IN6_IFF_NOTREADY)) != 0) + IN6_IFF_NOTREADY)) != 0) { + if (ia6) ifafree(&ia6->ia_ifa); return(EADDRNOTAVAIL); + } + ifafree(&ia6->ia_ifa); + ia6 = NULL; } break; @@ -2519,10 +2631,10 @@ ip6_setpktoptions(control, opt, priv, needcopy) * pointer that might NOT be &loif -- easier than replicating that code here. */ void -ip6_mloopback(ifp, m, dst) - struct ifnet *ifp; - struct mbuf *m; - struct sockaddr_in6 *dst; +ip6_mloopback( + struct ifnet *ifp, + struct mbuf *m, + struct sockaddr_in6 *dst) { struct mbuf *copym; struct ip6_hdr *ip6; @@ -2564,16 +2676,15 @@ ip6_mloopback(ifp, m, dst) /* Makes sure the HW checksum flags are cleaned before sending the packet */ - copym->m_pkthdr.rcvif = (struct ifnet *)0; + copym->m_pkthdr.rcvif = 0; copym->m_pkthdr.csum_data = 0; copym->m_pkthdr.csum_flags = 0; - if (lo_dl_tag == 0) - dlil_find_dltag(APPLE_IF_FAM_LOOPBACK, 0, PF_INET, &lo_dl_tag); - - if (lo_dl_tag) { + if (lo_ifp) { copym->m_pkthdr.rcvif = ifp; - dlil_output(lo_dl_tag, copym, 0, (struct sockaddr *)dst, 0); + lck_mtx_unlock(ip6_mutex); + dlil_output(lo_ifp, PF_INET6, copym, 0, (struct sockaddr *)dst, 0); + lck_mtx_lock(ip6_mutex); } else m_free(copym); #else diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index 254c8559f..96ca49088 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -69,7 +69,7 @@ #define _NETINET6_IP6_VAR_H_ #include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* * IP6 reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. @@ -91,6 +91,7 @@ struct ip6q { #if notyet u_char *ip6q_nxtp; #endif + int ip6q_nfrag; /* number of fragments */ }; struct ip6asfrag { @@ -153,6 +154,7 @@ struct ip6_pktopts { /* * Control options for incoming packets */ +#endif /* KERNEL_PRIVATE */ struct ip6stat { u_quad_t ip6s_total; /* total packets received */ @@ -215,6 +217,7 @@ struct ip6stat { u_quad_t ip6s_forward_cachemiss; }; +#ifdef KERNEL_PRIVATE #ifdef KERNEL /* * IPv6 onion peeling state. @@ -244,9 +247,7 @@ struct ip6aux { * with IPsec it may not be accurate. */ }; -#endif -#ifdef KERNEL /* flags passed to ip6_output as last parameter */ #define IPV6_DADOUTPUT 0x01 /* DAD */ #define IPV6_FORWARDING 0x02 /* most of IPv6 header exists */ @@ -267,7 +268,8 @@ extern int ip6_v6only; extern struct socket *ip6_mrouter; /* multicast routing daemon */ extern int ip6_sendredirects; /* send IP redirects when forwarding? */ -extern int ip6_maxfragpackets; /* Maximum packets in reassembly queue */ +extern int ip6_maxfragpackets; /* Maximum packets in reassembly queue */ +extern int ip6_maxfrags; /* Maximum fragments in reassembly queue */ extern int ip6_sourcecheck; /* Verify source interface */ extern int ip6_sourcecheck_interval; /* Interval between log messages */ extern int ip6_accept_rtadv; /* Acts as a host not a router */ @@ -293,65 +295,65 @@ struct sockopt; struct inpcb; -int icmp6_ctloutput __P((struct socket *, struct sockopt *sopt)); +int icmp6_ctloutput(struct socket *, struct sockopt *sopt); struct in6_ifaddr; -void ip6_init __P((void)); -void ip6intr __P((void)); -void ip6_input __P((struct mbuf *)); -struct in6_ifaddr *ip6_getdstifaddr __P((struct mbuf *)); -void ip6_freepcbopts __P((struct ip6_pktopts *)); -void ip6_freemoptions __P((struct ip6_moptions *)); -int ip6_unknown_opt __P((u_int8_t *, struct mbuf *, int)); -char * ip6_get_prevhdr __P((struct mbuf *, int)); -int ip6_nexthdr __P((struct mbuf *, int, int, int *)); -int ip6_lasthdr __P((struct mbuf *, int, int, int *)); - -struct mbuf *ip6_addaux __P((struct mbuf *)); -struct mbuf *ip6_findaux __P((struct mbuf *)); -void ip6_delaux __P((struct mbuf *)); - -int ip6_mforward __P((struct ip6_hdr *, struct ifnet *, struct mbuf *)); -int ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *, - u_int32_t *)); -void ip6_savecontrol __P((struct inpcb *, struct mbuf **, struct ip6_hdr *, - struct mbuf *)); -void ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *, - u_int32_t *)); -int ip6_sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); - -void ip6_forward __P((struct mbuf *, int)); - -void ip6_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in6 *)); -int ip6_output __P((struct mbuf *, struct ip6_pktopts *, +void ip6_init(void); +void ip6intr(void); +void ip6_input(struct mbuf *); +struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *); +void ip6_freepcbopts(struct ip6_pktopts *); +void ip6_freemoptions(struct ip6_moptions *); +int ip6_unknown_opt(u_int8_t *, struct mbuf *, int, int); +char * ip6_get_prevhdr(struct mbuf *, int); +int ip6_nexthdr(struct mbuf *, int, int, int *); +int ip6_lasthdr(struct mbuf *, int, int, int *); + +struct mbuf *ip6_addaux(struct mbuf *); +struct mbuf *ip6_findaux(struct mbuf *); +void ip6_delaux(struct mbuf *); + +int ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); +int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, + u_int32_t *); +void ip6_savecontrol(struct inpcb *, struct mbuf **, struct ip6_hdr *, + struct mbuf *); +void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, + u_int32_t *); +int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); + +void ip6_forward(struct mbuf *, int, int); + +void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); +int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, int, - struct ip6_moptions *, struct ifnet **)); -int ip6_ctloutput __P((struct socket *, struct sockopt *sopt)); -void init_ip6pktopts __P((struct ip6_pktopts *)); -int ip6_setpktoptions __P((struct mbuf *, struct ip6_pktopts *, int, int)); -void ip6_clearpktopts __P((struct ip6_pktopts *, int, int)); -struct ip6_pktopts *ip6_copypktopts __P((struct ip6_pktopts *, int)); -int ip6_optlen __P((struct inpcb *)); - -int route6_input __P((struct mbuf **, int *)); - -void frag6_init __P((void)); -int frag6_input __P((struct mbuf **, int *)); -void frag6_slowtimo __P((void)); -void frag6_drain __P((void)); - -void rip6_init __P((void)); -int rip6_input __P((struct mbuf **mp, int *offset)); -void rip6_ctlinput __P((int, struct sockaddr *, void *)); -int rip6_ctloutput __P((struct socket *so, struct sockopt *sopt)); -int rip6_output __P((struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *)); -int rip6_usrreq __P((struct socket *, - int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *)); - -int dest6_input __P((struct mbuf **, int *)); -int none_input __P((struct mbuf **, int *)); + struct ip6_moptions *, struct ifnet **, int locked); +int ip6_ctloutput(struct socket *, struct sockopt *sopt); +void init_ip6pktopts(struct ip6_pktopts *); +int ip6_setpktoptions(struct mbuf *, struct ip6_pktopts *, int, int); +void ip6_clearpktopts(struct ip6_pktopts *, int, int); +struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); +int ip6_optlen(struct inpcb *); + +int route6_input(struct mbuf **, int *); + +void frag6_init(void); +int frag6_input(struct mbuf **, int *); +void frag6_slowtimo(void); +void frag6_drain(void); + +void rip6_init(void); +int rip6_input(struct mbuf **mp, int *offset); +void rip6_ctlinput(int, struct sockaddr *, void *); +int rip6_ctloutput(struct socket *so, struct sockopt *sopt); +int rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *); +int rip6_usrreq(struct socket *, + int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *); + +int dest6_input(struct mbuf **, int *); +int none_input(struct mbuf **, int *); #endif /* KERNEL */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* !_NETINET6_IP6_VAR_H_ */ diff --git a/bsd/netinet6/ip6protosw.h b/bsd/netinet6/ip6protosw.h index beee88937..f0386fa39 100644 --- a/bsd/netinet6/ip6protosw.h +++ b/bsd/netinet6/ip6protosw.h @@ -71,7 +71,9 @@ #ifndef _NETINET6_IP6PROTOSW_H_ #define _NETINET6_IP6PROTOSW_H_ #include -#ifdef __APPLE_API_PRIVATE + +#ifdef KERNEL_PRIVATE +#include /* * Protocol switch table for IPv6. @@ -124,27 +126,27 @@ struct ip6protosw { short pr_protocol; /* protocol number */ unsigned int pr_flags; /* see below */ /* protocol-protocol hooks */ - int (*pr_input) __P((struct mbuf **, int *)); + int (*pr_input)(struct mbuf **, int *); /* input to protocol (from below) */ - int (*pr_output) __P((struct mbuf *m, struct socket *so, - struct sockaddr_in6 *, struct mbuf *)); + int (*pr_output)(struct mbuf *m, struct socket *so, + struct sockaddr_in6 *, struct mbuf *); /* output to protocol (from above) */ - void (*pr_ctlinput)__P((int, struct sockaddr *, void *)); + void (*pr_ctlinput)(int, struct sockaddr *, void *); /* control input (from below) */ - int (*pr_ctloutput)__P((struct socket *, struct sockopt *)); + int (*pr_ctloutput)(struct socket *, struct sockopt *); /* control output (from above) */ /* user-protocol hook */ - int (*pr_usrreq) /* user request: see list below */ - __P((struct socket *, int, struct mbuf *, - struct mbuf *, struct mbuf *, struct proc *)); + int (*pr_usrreq)(struct socket *, int, struct mbuf *, + struct mbuf *, struct mbuf *, struct proc *); + /* user request: see list below */ /* utility hooks */ - void (*pr_init) __P((void)); /* initialization hook */ - void (*pr_fasttimo) __P((void)); + void (*pr_init)(void); /* initialization hook */ + void (*pr_fasttimo)(void); /* fast timeout (200ms) */ - void (*pr_slowtimo) __P((void)); + void (*pr_slowtimo)(void); /* slow timeout (500ms) */ - void (*pr_drain) __P((void)); + void (*pr_drain)(void); /* flush any excess space possible */ #ifdef __APPLE__ /* for compat. with IPv4 protosw */ @@ -153,12 +155,19 @@ struct ip6protosw { struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ #ifdef __APPLE__ + int (*pr_lock) (struct socket *so, int locktype, int debug); /* lock function for protocol */ + int (*pr_unlock) (struct socket *so, int locktype, int debug); /* unlock for protocol */ +#ifdef _KERN_LOCKS_H_ + lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); /* unlock for protocol */ +#else + void * (*pr_getlock) (struct socket *so, int locktype); /* unlock for protocol */ +#endif /* Filter hooks */ TAILQ_HEAD(pr6_sfilter, NFDescriptor) pr_sfilter; struct ip6protosw *pr_next; /* Chain for domain */ - u_long reserved[4]; + u_long reserved[1]; #endif }; -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif KERNEL_PRIVATE +#endif _NETINET6_IP6PROTOSW_H_ diff --git a/bsd/netinet6/ipcomp.h b/bsd/netinet6/ipcomp.h index 383a67555..41ab61b85 100644 --- a/bsd/netinet6/ipcomp.h +++ b/bsd/netinet6/ipcomp.h @@ -53,18 +53,18 @@ struct ipcomp { #define IPCOMP_CPI_NEGOTIATE_MIN 256 #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE struct ipcomp_algorithm { - int (*compress) __P((struct mbuf *, struct mbuf *, size_t *)); - int (*decompress) __P((struct mbuf *, struct mbuf *, size_t *)); + int (*compress)(struct mbuf *, struct mbuf *, size_t *); + int (*decompress)(struct mbuf *, struct mbuf *, size_t *); size_t minplen; /* minimum required length for compression */ }; struct ipsecrequest; -extern const struct ipcomp_algorithm *ipcomp_algorithm_lookup __P((int)); -extern void ipcomp4_input __P((struct mbuf *, int)); -extern int ipcomp4_output __P((struct mbuf *, struct ipsecrequest *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern const struct ipcomp_algorithm *ipcomp_algorithm_lookup(int); +extern void ipcomp4_input(struct mbuf *, int); +extern int ipcomp4_output(struct mbuf *, struct ipsecrequest *); +#endif KERNEL_PRIVATE +#endif KERNEL -#endif /*_NETINET6_IPCOMP_H_*/ +#endif _NETINET6_IPCOMP_H_ diff --git a/bsd/netinet6/ipcomp6.h b/bsd/netinet6/ipcomp6.h index b0ca0316e..3091dc6b5 100644 --- a/bsd/netinet6/ipcomp6.h +++ b/bsd/netinet6/ipcomp6.h @@ -38,12 +38,10 @@ #define _NETINET6_IPCOMP6_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern int ipcomp6_input __P((struct mbuf **, int *)); -extern int ipcomp6_output __P((struct mbuf *, u_char *, struct mbuf *, - struct ipsecrequest *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +#ifdef KERNEL_PRIVATE +extern int ipcomp6_input(struct mbuf **, int *); +extern int ipcomp6_output(struct mbuf *, u_char *, struct mbuf *, + struct ipsecrequest *); +#endif KERNEL_PRIVATE #endif /*_NETINET6_IPCOMP6_H_*/ diff --git a/bsd/netinet6/ipcomp_core.c b/bsd/netinet6/ipcomp_core.c index 1ca103860..38f70861c 100644 --- a/bsd/netinet6/ipcomp_core.c +++ b/bsd/netinet6/ipcomp_core.c @@ -50,7 +50,6 @@ #include #include -#include #include #include @@ -65,11 +64,11 @@ #include -static void *deflate_alloc __P((void *, u_int, u_int)); -static void deflate_free __P((void *, void *)); -static int deflate_common __P((struct mbuf *, struct mbuf *, size_t *, int)); -static int deflate_compress __P((struct mbuf *, struct mbuf *, size_t *)); -static int deflate_decompress __P((struct mbuf *, struct mbuf *, size_t *)); +static void *deflate_alloc(void *, u_int, u_int); +static void deflate_free(void *, void *); +static int deflate_common(struct mbuf *, struct mbuf *, size_t *, int); +static int deflate_compress(struct mbuf *, struct mbuf *, size_t *); +static int deflate_decompress(struct mbuf *, struct mbuf *, size_t *); /* * We need to use default window size (2^15 = 32Kbytes as of writing) for diff --git a/bsd/netinet6/ipcomp_input.c b/bsd/netinet6/ipcomp_input.c index 7ea00daf6..45821aa26 100644 --- a/bsd/netinet6/ipcomp_input.c +++ b/bsd/netinet6/ipcomp_input.c @@ -49,9 +49,9 @@ #include #include -#include #include #include +#include #include #include @@ -80,7 +80,7 @@ #define IPLEN_FLIPPED - +extern lck_mtx_t *sadb_mutex; void ipcomp4_input(struct mbuf *m, int off) { @@ -95,6 +95,7 @@ ipcomp4_input(struct mbuf *m, int off) size_t newlen, olen; struct secasvar *sav = NULL; + lck_mtx_lock(sadb_mutex); if (m->m_pkthdr.len < off + sizeof(struct ipcomp)) { ipseclog((LOG_DEBUG, "IPv4 IPComp input: assumption failed " @@ -153,7 +154,9 @@ ipcomp4_input(struct mbuf *m, int off) olen = m->m_pkthdr.len; newlen = m->m_pkthdr.len - off; + lck_mtx_unlock(sadb_mutex); error = (*algo->decompress)(m, m->m_next, &newlen); + lck_mtx_lock(sadb_mutex); if (error != 0) { if (error == EINVAL) ipsecstat.in_inval++; @@ -214,18 +217,22 @@ ipcomp4_input(struct mbuf *m, int off) ipsecstat.in_polvio++; goto fail; } - (*ip_protox[nxt]->pr_input)(m, off); - + lck_mtx_unlock(sadb_mutex); + ip_proto_dispatch_in(m, off, nxt, 0); + lck_mtx_lock(sadb_mutex); } else m_freem(m); m = NULL; ipsecstat.in_success++; + lck_mtx_unlock(sadb_mutex); return; fail: if (sav) key_freesav(sav); + + lck_mtx_unlock(sadb_mutex); if (m) m_freem(m); return; @@ -252,6 +259,7 @@ ipcomp6_input(mp, offp) m = *mp; off = *offp; + lck_mtx_lock(sadb_mutex); md = m_pulldown(m, off, sizeof(*ipcomp), NULL); if (!m) { m = NULL; /*already freed*/ @@ -291,7 +299,9 @@ ipcomp6_input(mp, offp) m->m_pkthdr.len -= sizeof(struct ipcomp); newlen = m->m_pkthdr.len - off; + lck_mtx_unlock(sadb_mutex); error = (*algo->decompress)(m, md, &newlen); + lck_mtx_lock(sadb_mutex); if (error != 0) { if (error == EINVAL) ipsec6stat.in_inval++; @@ -330,6 +340,7 @@ ipcomp6_input(mp, offp) *offp = off; *mp = m; ipsec6stat.in_success++; + lck_mtx_unlock(sadb_mutex); return nxt; fail: @@ -337,6 +348,7 @@ fail: m_freem(m); if (sav) key_freesav(sav); + lck_mtx_unlock(sadb_mutex); return IPPROTO_DONE; } #endif /* INET6 */ diff --git a/bsd/netinet6/ipcomp_output.c b/bsd/netinet6/ipcomp_output.c index a8a839b93..7a8d39b1d 100644 --- a/bsd/netinet6/ipcomp_output.c +++ b/bsd/netinet6/ipcomp_output.c @@ -49,9 +49,9 @@ #include #include -#include #include #include +#include #include #include @@ -78,8 +78,10 @@ #include -static int ipcomp_output __P((struct mbuf *, u_char *, struct mbuf *, - struct ipsecrequest *, int)); +extern lck_mtx_t *sadb_mutex; + +static int ipcomp_output(struct mbuf *, u_char *, struct mbuf *, + struct ipsecrequest *, int); /* * Modify the packet so that the payload is compressed. @@ -204,7 +206,9 @@ ipcomp_output(m, nexthdrp, md, isr, af) mprev->m_next = md; /* compress data part */ + lck_mtx_unlock(sadb_mutex); if ((*algo->compress)(m, md, &plen) || mprev->m_next == NULL) { + lck_mtx_lock(sadb_mutex); ipseclog((LOG_ERR, "packet compression failure\n")); m = NULL; m_freem(md0); @@ -213,6 +217,7 @@ ipcomp_output(m, nexthdrp, md, isr, af) error = EINVAL; goto fail; } + lck_mtx_lock(sadb_mutex); stat->out_comphist[sav->alg_enc]++; md = mprev->m_next; diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 9a9f6ebe8..7d8f334fd 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #include @@ -111,6 +113,8 @@ int ipsec_debug = 0; #define DBG_FNC_GETPOL_ADDR NETDBG_CODE(DBG_NETIPSEC, (2 << 8)) #define DBG_FNC_IPSEC_OUT NETDBG_CODE(DBG_NETIPSEC, (3 << 8)) +extern lck_mtx_t *sadb_mutex; +extern lck_mtx_t *ip6_mutex; struct ipsecstat ipsecstat; int ip4_ah_cleartos = 1; @@ -201,43 +205,43 @@ SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD, esp_randpad, CTLFLAG_RW, &ip6_esp_randpad, 0, ""); #endif /* INET6 */ -static int ipsec_setspidx_mbuf - __P((struct secpolicyindex *, u_int, u_int, struct mbuf *, int)); -static int ipsec4_setspidx_inpcb __P((struct mbuf *, struct inpcb *pcb)); +static int ipsec_setspidx_mbuf(struct secpolicyindex *, u_int, u_int, + struct mbuf *, int); +static int ipsec4_setspidx_inpcb(struct mbuf *, struct inpcb *pcb); #if INET6 -static int ipsec6_setspidx_in6pcb __P((struct mbuf *, struct in6pcb *pcb)); +static int ipsec6_setspidx_in6pcb(struct mbuf *, struct in6pcb *pcb); #endif -static int ipsec_setspidx __P((struct mbuf *, struct secpolicyindex *, int)); -static void ipsec4_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); -static int ipsec4_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); +static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int); +static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int); +static int ipsec4_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *); #if INET6 -static void ipsec6_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); -static int ipsec6_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); +static void ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *, int); +static int ipsec6_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *); #endif -static struct inpcbpolicy *ipsec_newpcbpolicy __P((void)); -static void ipsec_delpcbpolicy __P((struct inpcbpolicy *)); -static struct secpolicy *ipsec_deepcopy_policy __P((struct secpolicy *src)); -static int ipsec_set_policy __P((struct secpolicy **pcb_sp, - int optname, caddr_t request, size_t len, int priv)); -static int ipsec_get_policy __P((struct secpolicy *pcb_sp, struct mbuf **mp)); -static void vshiftl __P((unsigned char *, int, int)); -static int ipsec_in_reject __P((struct secpolicy *, struct mbuf *)); -static size_t ipsec_hdrsiz __P((struct secpolicy *)); +static struct inpcbpolicy *ipsec_newpcbpolicy(void); +static void ipsec_delpcbpolicy(struct inpcbpolicy *); +static struct secpolicy *ipsec_deepcopy_policy(struct secpolicy *src); +static int ipsec_set_policy(struct secpolicy **pcb_sp, + int optname, caddr_t request, size_t len, int priv); +static int ipsec_get_policy(struct secpolicy *pcb_sp, struct mbuf **mp); +static void vshiftl(unsigned char *, int, int); +static int ipsec_in_reject(struct secpolicy *, struct mbuf *); +static size_t ipsec_hdrsiz(struct secpolicy *); #if INET -static struct mbuf *ipsec4_splithdr __P((struct mbuf *)); +static struct mbuf *ipsec4_splithdr(struct mbuf *); #endif #if INET6 -static struct mbuf *ipsec6_splithdr __P((struct mbuf *)); +static struct mbuf *ipsec6_splithdr(struct mbuf *); #endif #if INET -static int ipsec4_encapsulate __P((struct mbuf *, struct secasvar *)); +static int ipsec4_encapsulate(struct mbuf *, struct secasvar *); #endif #if INET6 -static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *)); +static int ipsec6_encapsulate(struct mbuf *, struct secasvar *); #endif -static struct mbuf *ipsec_addaux __P((struct mbuf *)); -static struct mbuf *ipsec_findaux __P((struct mbuf *)); -static void ipsec_optaux __P((struct mbuf *, struct mbuf *)); +static struct mbuf *ipsec_addaux(struct mbuf *); +static struct mbuf *ipsec_findaux(struct mbuf *); +static void ipsec_optaux(struct mbuf *, struct mbuf *); void ipsec_send_natt_keepalive(struct secasvar *sav); static int @@ -266,7 +270,7 @@ sysctl_def_policy SYSCTL_HANDLER_ARGS * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. + * others : error occurred. * others: a pointer to SP * * NOTE: IPv6 mapped adddress concern is implemented here. @@ -282,12 +286,13 @@ ipsec4_getpolicybysock(m, dir, so, error) struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec4_getpolicybysock: NULL pointer was passed.\n"); - + if (so->so_pcb == NULL) { - /* Socket may be closing or without PCB */ + printf("ipsec4_getpolicybysock: so->so_pcb == NULL\n"); return ipsec4_getpolicybyaddr(m, dir, 0, error); } @@ -460,7 +465,7 @@ ipsec4_getpolicybysock(m, dir, so, error) * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. + * others : error occurred. */ struct secpolicy * ipsec4_getpolicybyaddr(m, dir, flag, error) @@ -474,6 +479,8 @@ ipsec4_getpolicybyaddr(m, dir, flag, error) if (ipsec_bypass != 0) return 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || error == NULL) panic("ipsec4_getpolicybyaddr: NULL pointer was passed.\n"); @@ -528,7 +535,7 @@ ipsec4_getpolicybyaddr(m, dir, flag, error) * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. + * others : error occurred. * others: a pointer to SP */ struct secpolicy * @@ -542,6 +549,8 @@ ipsec6_getpolicybysock(m, dir, so, error) struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); @@ -685,7 +694,7 @@ ipsec6_getpolicybysock(m, dir, so, error) * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occured. + * others : error occurred. */ #ifndef IP_FORWARDING #define IP_FORWARDING 1 @@ -700,6 +709,8 @@ ipsec6_getpolicybyaddr(m, dir, flag, error) { struct secpolicy *sp = NULL; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || error == NULL) panic("ipsec6_getpolicybyaddr: NULL pointer was passed.\n"); @@ -1011,7 +1022,7 @@ ipsec4_get_ulp(m, spidx, needport) uh.uh_dport; return; case IPPROTO_AH: - if (m->m_pkthdr.len > off + sizeof(ip6e)) + if (off + sizeof(ip6e) > m->m_pkthdr.len) return; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); off += (ip6e.ip6e_len + 2) << 2; @@ -1183,6 +1194,8 @@ ipsec_init_policy(so, pcb_sp) { struct inpcbpolicy *new; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (so == NULL || pcb_sp == NULL) panic("ipsec_init_policy: NULL pointer was passed.\n"); @@ -1197,7 +1210,7 @@ ipsec_init_policy(so, pcb_sp) #ifdef __APPLE__ if (so->so_uid == 0) #else - if (so->so_cred != 0 && so->so_cred->pc_ucred->cr_uid == 0) + if (so->so_cred != 0 && !suser(so->so_cred->pc_ucred, NULL)) #endif new->priv = 1; else @@ -1233,6 +1246,8 @@ ipsec_copy_policy(old, new) if (ipsec_bypass != 0) return 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + sp = ipsec_deepcopy_policy(old->sp_in); if (sp) { key_freesp(new->sp_in); @@ -1323,6 +1338,8 @@ ipsec_set_policy(pcb_sp, optname, request, len, priv) struct secpolicy *newsp = NULL; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) return EINVAL; @@ -1366,6 +1383,8 @@ ipsec_get_policy(pcb_sp, mp) struct mbuf **mp; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (pcb_sp == NULL || mp == NULL) return EINVAL; @@ -1396,6 +1415,8 @@ ipsec4_set_policy(inp, optname, request, len, priv) struct secpolicy **pcb_sp; int error = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (inp == NULL || request == NULL) return EINVAL; @@ -1441,6 +1462,8 @@ ipsec4_get_policy(inp, request, len, mp) struct secpolicy *pcb_sp; int error = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (inp == NULL || request == NULL || mp == NULL) return EINVAL; @@ -1476,6 +1499,8 @@ int ipsec4_delete_pcbpolicy(inp) struct inpcb *inp; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (inp == NULL) panic("ipsec4_delete_pcbpolicy: NULL pointer was passed.\n"); @@ -1512,6 +1537,8 @@ ipsec6_set_policy(in6p, optname, request, len, priv) struct secpolicy **pcb_sp; int error = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (in6p == NULL || request == NULL) return EINVAL; @@ -1557,6 +1584,8 @@ ipsec6_get_policy(in6p, request, len, mp) struct secpolicy *pcb_sp; int error = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (in6p == NULL || request == NULL || mp == NULL) return EINVAL; @@ -1591,6 +1620,8 @@ int ipsec6_delete_pcbpolicy(in6p) struct in6pcb *in6p; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (in6p == NULL) panic("ipsec6_delete_pcbpolicy: NULL pointer was passed.\n"); @@ -1626,6 +1657,8 @@ ipsec_get_reqlevel(isr) u_int level = 0; u_int esp_trans_deflev, esp_net_deflev, ah_trans_deflev, ah_net_deflev; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (isr == NULL || isr->sp == NULL) panic("ipsec_get_reqlevel: NULL pointer is passed.\n"); @@ -1685,6 +1718,7 @@ ipsec_get_reqlevel(isr) level = ah_net_deflev; else level = ah_trans_deflev; + break; case IPPROTO_IPCOMP: /* * we don't really care, as IPcomp document says that @@ -1734,6 +1768,8 @@ ipsec_in_reject(sp, m) printf("ipsec_in_reject: using SP\n"); kdebug_secpolicy(sp)); + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: @@ -1814,6 +1850,8 @@ ipsec4_in_reject_so(m, so) int error; int result; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ @@ -1844,12 +1882,17 @@ ipsec4_in_reject(m, inp) struct mbuf *m; struct inpcb *inp; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (inp == NULL) return ipsec4_in_reject_so(m, NULL); if (inp->inp_socket) return ipsec4_in_reject_so(m, inp->inp_socket); else panic("ipsec4_in_reject: invalid inpcb/socket"); + + /* NOTREACHED */ + return 0; } #if INET6 @@ -1867,6 +1910,8 @@ ipsec6_in_reject_so(m, so) int error; int result; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ @@ -1896,12 +1941,17 @@ ipsec6_in_reject(m, in6p) struct mbuf *m; struct in6pcb *in6p; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (in6p == NULL) return ipsec6_in_reject_so(m, NULL); if (in6p->in6p_socket) return ipsec6_in_reject_so(m, in6p->in6p_socket); else panic("ipsec6_in_reject: invalid in6p/socket"); + + /* NOTREACHED */ + return 0; } #endif @@ -1921,6 +1971,8 @@ ipsec_hdrsiz(sp) printf("ipsec_hdrsiz: using SP\n"); kdebug_secpolicy(sp)); + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: @@ -1992,6 +2044,8 @@ ipsec4_hdrsiz(m, dir, inp) int error; size_t size; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ @@ -2034,6 +2088,8 @@ ipsec6_hdrsiz(m, dir, in6p) int error; size_t size; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL) return 0; /* XXX shoud be panic ? */ @@ -2075,6 +2131,8 @@ ipsec4_encapsulate(m, sav) size_t hlen; size_t plen; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* can't tunnel between different AFs */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family @@ -2194,6 +2252,8 @@ ipsec6_encapsulate(m, sav) struct ip6_hdr *ip6; size_t plen; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* can't tunnel between different AFs */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family @@ -2285,6 +2345,8 @@ ipsec_chkreplay(seq, sav) u_int32_t wsizeb; /* constant: bits of window size */ int frlast; /* constant: last frame */ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sav == NULL) panic("ipsec_chkreplay: NULL pointer was passed.\n"); @@ -2344,6 +2406,8 @@ ipsec_updatereplay(seq, sav) u_int32_t wsizeb; /* constant: bits of window size */ int frlast; /* constant: last frame */ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sav == NULL) panic("ipsec_chkreplay: NULL pointer was passed.\n"); @@ -2517,6 +2581,8 @@ ipsec_logsastr(sav) char *p; struct secasindex *saidx = &sav->sah->saidx; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* validity check */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family) @@ -2591,7 +2657,6 @@ ipsec4_output(state, sp, flags) struct ip *ip = NULL; struct ipsecrequest *isr = NULL; struct secasindex saidx; - int s; int error; struct sockaddr_in *dst4; struct sockaddr_in *sin; @@ -2605,6 +2670,8 @@ ipsec4_output(state, sp, flags) if (!state->dst) panic("state->dst == NULL in ipsec4_output"); + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + KERNEL_DEBUG(DBG_FNC_IPSEC_OUT | DBG_FUNC_START, 0,0,0,0,0); KEYDEBUG(KEYDEBUG_IPSEC_DATA, @@ -2687,7 +2754,6 @@ ipsec4_output(state, sp, flags) * There may be the case that SA status will be changed when * we are refering to one. So calling splsoftnet(). */ - s = splnet(); if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* @@ -2698,19 +2764,16 @@ ipsec4_output(state, sp, flags) ipseclog((LOG_ERR, "ipsec4_output: " "family mismatched between inner and outer spi=%u\n", (u_int32_t)ntohl(isr->sav->spi))); - splx(s); error = EAFNOSUPPORT; goto bad; } state->m = ipsec4_splithdr(state->m); if (!state->m) { - splx(s); error = ENOMEM; goto bad; } error = ipsec4_encapsulate(state->m, isr->sav); - splx(s); if (error) { state->m = NULL; goto bad; @@ -2743,8 +2806,7 @@ ipsec4_output(state, sp, flags) state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst4 = (struct sockaddr_in *)state->dst; } - } else - splx(s); + } state->m = ipsec4_splithdr(state->m); if (!state->m) { @@ -2841,7 +2903,8 @@ ipsec6_output_trans(state, nexthdrp, mprev, sp, flags, tun) KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_trans: applyed SP\n"); kdebug_secpolicy(sp)); - + + lck_mtx_lock(sadb_mutex); *tun = 0; for (isr = sp->req; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { @@ -2900,8 +2963,10 @@ ipsec6_output_trans(state, nexthdrp, mprev, sp, flags, tun) * XXX: should we directly notify sockets via * pfctlinputs? */ + lck_mtx_unlock(ip6_mutex); icmp6_error(state->m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN, 0); + lck_mtx_lock(ip6_mutex); state->m = NULL; /* icmp6_error freed the mbuf */ goto bad; } @@ -2971,9 +3036,11 @@ ipsec6_output_trans(state, nexthdrp, mprev, sp, flags, tun) if (isr != NULL) *tun = 1; + lck_mtx_unlock(sadb_mutex); return 0; bad: + lck_mtx_unlock(sadb_mutex); m_freem(state->m); state->m = NULL; return error; @@ -2994,7 +3061,6 @@ ipsec6_output_tunnel(state, sp, flags) int error = 0; int plen; struct sockaddr_in6* dst6; - int s; if (!state) panic("state == NULL in ipsec6_output_tunnel"); @@ -3003,6 +3069,8 @@ ipsec6_output_tunnel(state, sp, flags) if (!sp) panic("sp == NULL in ipsec6_output_tunnel"); + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_tunnel: applyed SP\n"); kdebug_secpolicy(sp)); @@ -3097,7 +3165,6 @@ ipsec6_output_tunnel(state, sp, flags) * There may be the case that SA status will be changed when * we are refering to one. So calling splsoftnet(). */ - s = splnet(); if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* @@ -3108,7 +3175,6 @@ ipsec6_output_tunnel(state, sp, flags) ipseclog((LOG_ERR, "ipsec6_output_tunnel: " "family mismatched between inner and outer, spi=%u\n", (u_int32_t)ntohl(isr->sav->spi))); - splx(s); ipsec6stat.out_inval++; error = EAFNOSUPPORT; goto bad; @@ -3116,13 +3182,11 @@ ipsec6_output_tunnel(state, sp, flags) state->m = ipsec6_splithdr(state->m); if (!state->m) { - splx(s); ipsec6stat.out_nomem++; error = ENOMEM; goto bad; } error = ipsec6_encapsulate(state->m, isr->sav); - splx(s); if (error) { state->m = 0; goto bad; @@ -3157,8 +3221,7 @@ ipsec6_output_tunnel(state, sp, flags) state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst6 = (struct sockaddr_in6 *)state->dst; } - } else - splx(s); + } state->m = ipsec6_splithdr(state->m); if (!state->m) { @@ -3311,6 +3374,8 @@ ipsec4_tunnel_validate(m, off, nxt0, sav) struct secpolicy *sp; struct ip *oip; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + #if DIAGNOSTIC if (m->m_len < sizeof(struct ip)) panic("too short mbuf on ipsec4_tunnel_validate"); @@ -3378,8 +3443,9 @@ ipsec4_tunnel_validate(m, off, nxt0, sav) sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst, (struct sockaddr *)&isrc, (struct sockaddr *)&idst); - if (!sp) + if (!sp) { return 0; + } key_freesp(sp); return 1; @@ -3400,6 +3466,8 @@ ipsec6_tunnel_validate(m, off, nxt0, sav) struct secpolicy *sp; struct ip6_hdr *oip6; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + #if DIAGNOSTIC if (m->m_len < sizeof(struct ip6_hdr)) panic("too short mbuf on ipsec6_tunnel_validate"); @@ -3725,9 +3793,12 @@ ipsec_send_natt_keepalive( struct mbuf *m; struct udphdr *uh; struct ip *ip; - + int error; + if ((esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0) return; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; @@ -3742,17 +3813,23 @@ ipsec_send_natt_keepalive( uh = (struct udphdr*)((char*)m_mtod(m) + sizeof(struct ip)); m->m_len = sizeof(struct udpiphdr) + 1; bzero(m_mtod(m), m->m_len); - ip->ip_len = ntohs(m->m_len); + m->m_pkthdr.len = m->m_len; + + ip->ip_len = m->m_len; ip->ip_ttl = ip_defttl; ip->ip_p = IPPROTO_UDP; ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; - uh->uh_sport = ntohs((u_short)esp_udp_encap_port); - uh->uh_dport = ntohs(sav->remote_ike_port); + uh->uh_sport = htons((u_short)esp_udp_encap_port); + uh->uh_dport = htons(sav->remote_ike_port); uh->uh_ulen = htons(1 + sizeof(struct udphdr)); uh->uh_sum = 0; *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF; - if (ip_output(m, NULL, &sav->sah->sa_route, IP_NOIPSEC, NULL) == 0) + lck_mtx_unlock(sadb_mutex); + error = ip_output(m, NULL, &sav->sah->sa_route, IP_NOIPSEC, NULL); + lck_mtx_lock(sadb_mutex); + if (error == 0) sav->natt_last_activity = natt_now; + } diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index 4fcddc80c..2a4f6dcef 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -39,10 +39,9 @@ #include #include +#ifdef KERNEL_PRIVATE #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. @@ -123,8 +122,7 @@ struct secspacq { int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +#endif /* KERNEL_PRIVATE */ /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 @@ -179,7 +177,6 @@ struct secspacq { */ #define IPSEC_REPLAYWSIZE 32 -#ifdef __APPLE_API_UNSTABLE /* statistics for ipsec processing */ struct ipsecstat { u_quad_t in_success; /* succeeded inbound process */ @@ -209,8 +206,8 @@ struct ipsecstat { u_quad_t out_ahhist[256]; u_quad_t out_comphist[256]; }; -#endif /* __APPLE_API_UNSTABLE */ +#ifdef KERNEL_PRIVATE /* * Definitions for IPsec & Key sysctl operations. */ @@ -269,7 +266,6 @@ struct ipsecstat { } #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE struct ipsec_output_state { struct mbuf *m; struct route *ro; @@ -297,59 +293,56 @@ extern int ip4_esp_randpad; #define ipseclog(x) do { if (ipsec_debug) log x; } while (0) -extern struct secpolicy *ipsec4_getpolicybysock - __P((struct mbuf *, u_int, struct socket *, int *)); -extern struct secpolicy *ipsec4_getpolicybyaddr - __P((struct mbuf *, u_int, int, int *)); +extern struct secpolicy *ipsec4_getpolicybysock(struct mbuf *, u_int, + struct socket *, int *); +extern struct secpolicy *ipsec4_getpolicybyaddr(struct mbuf *, u_int, int, + int *); struct inpcb; -extern int ipsec_init_policy __P((struct socket *so, struct inpcbpolicy **)); -extern int ipsec_copy_policy - __P((struct inpcbpolicy *, struct inpcbpolicy *)); -extern u_int ipsec_get_reqlevel __P((struct ipsecrequest *)); - -extern int ipsec4_set_policy __P((struct inpcb *inp, int optname, - caddr_t request, size_t len, int priv)); -extern int ipsec4_get_policy __P((struct inpcb *inpcb, caddr_t request, - size_t len, struct mbuf **mp)); -extern int ipsec4_delete_pcbpolicy __P((struct inpcb *)); -extern int ipsec4_in_reject_so __P((struct mbuf *, struct socket *)); -extern int ipsec4_in_reject __P((struct mbuf *, struct inpcb *)); +extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); +extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); +extern u_int ipsec_get_reqlevel(struct ipsecrequest *); + +extern int ipsec4_set_policy(struct inpcb *inp, int optname, + caddr_t request, size_t len, int priv); +extern int ipsec4_get_policy(struct inpcb *inpcb, caddr_t request, + size_t len, struct mbuf **mp); +extern int ipsec4_delete_pcbpolicy(struct inpcb *); +extern int ipsec4_in_reject_so(struct mbuf *, struct socket *); +extern int ipsec4_in_reject(struct mbuf *, struct inpcb *); struct secas; struct tcpcb; -extern int ipsec_chkreplay __P((u_int32_t, struct secasvar *)); -extern int ipsec_updatereplay __P((u_int32_t, struct secasvar *)); +extern int ipsec_chkreplay(u_int32_t, struct secasvar *); +extern int ipsec_updatereplay(u_int32_t, struct secasvar *); -extern size_t ipsec4_hdrsiz __P((struct mbuf *, u_int, struct inpcb *)); -extern size_t ipsec_hdrsiz_tcp __P((struct tcpcb *)); +extern size_t ipsec4_hdrsiz(struct mbuf *, u_int, struct inpcb *); +extern size_t ipsec_hdrsiz_tcp(struct tcpcb *); struct ip; -extern const char *ipsec4_logpacketstr __P((struct ip *, u_int32_t)); -extern const char *ipsec_logsastr __P((struct secasvar *)); - -extern void ipsec_dumpmbuf __P((struct mbuf *)); - -extern int ipsec4_output __P((struct ipsec_output_state *, struct secpolicy *, - int)); -extern int ipsec4_tunnel_validate __P((struct mbuf *, int, u_int, - struct secasvar *)); -extern struct mbuf *ipsec_copypkt __P((struct mbuf *)); -extern void ipsec_delaux __P((struct mbuf *)); -extern int ipsec_setsocket __P((struct mbuf *, struct socket *)); -extern struct socket *ipsec_getsocket __P((struct mbuf *)); -extern int ipsec_addhist __P((struct mbuf *, int, u_int32_t)); -extern struct ipsec_history *ipsec_gethist __P((struct mbuf *, int *)); -extern void ipsec_clearhist __P((struct mbuf *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern const char *ipsec4_logpacketstr(struct ip *, u_int32_t); +extern const char *ipsec_logsastr(struct secasvar *); + +extern void ipsec_dumpmbuf(struct mbuf *); + +extern int ipsec4_output(struct ipsec_output_state *, struct secpolicy *, int); +extern int ipsec4_tunnel_validate(struct mbuf *, int, u_int, struct secasvar *); +extern struct mbuf *ipsec_copypkt(struct mbuf *); +extern void ipsec_delaux(struct mbuf *); +extern int ipsec_setsocket(struct mbuf *, struct socket *); +extern struct socket *ipsec_getsocket(struct mbuf *); +extern int ipsec_addhist(struct mbuf *, int, u_int32_t); +extern struct ipsec_history *ipsec_gethist(struct mbuf *, int *); +extern void ipsec_clearhist(struct mbuf *); +#endif KERNEL +#endif KERNEL_PRIVATE #ifndef KERNEL -extern caddr_t ipsec_set_policy __P((char *, int)); -extern int ipsec_get_policylen __P((caddr_t)); -extern char *ipsec_dump_policy __P((caddr_t, char *)); +extern caddr_t ipsec_set_policy(char *, int); +extern int ipsec_get_policylen(caddr_t); +extern char *ipsec_dump_policy(caddr_t, char *); -extern const char *ipsec_strerror __P((void)); -#endif /*!KERNEL*/ +extern const char *ipsec_strerror(void); +#endif KERNEL -#endif /*_NETINET6_IPSEC_H_*/ +#endif _NETINET6_IPSEC_H_ diff --git a/bsd/netinet6/ipsec6.h b/bsd/netinet6/ipsec6.h index 9fd4fc75a..33bcfe125 100644 --- a/bsd/netinet6/ipsec6.h +++ b/bsd/netinet6/ipsec6.h @@ -41,8 +41,7 @@ #include #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern struct ipsecstat ipsec6stat; extern struct secpolicy ip6_def_policy; extern int ip6_esp_trans_deflev; @@ -52,35 +51,34 @@ extern int ip6_ah_net_deflev; extern int ip6_ipsec_ecn; extern int ip6_esp_randpad; -extern struct secpolicy *ipsec6_getpolicybysock - __P((struct mbuf *, u_int, struct socket *, int *)); -extern struct secpolicy *ipsec6_getpolicybyaddr - __P((struct mbuf *, u_int, int, int *)); +extern struct secpolicy *ipsec6_getpolicybysock(struct mbuf *, u_int, + struct socket *, int *); +extern struct secpolicy *ipsec6_getpolicybyaddr(struct mbuf *, u_int, int, + int *); struct inpcb; -extern int ipsec6_in_reject_so __P((struct mbuf *, struct socket *)); -extern int ipsec6_delete_pcbpolicy __P((struct inpcb *)); -extern int ipsec6_set_policy __P((struct inpcb *inp, int optname, - caddr_t request, size_t len, int priv)); -extern int ipsec6_get_policy - __P((struct inpcb *inp, caddr_t request, size_t len, struct mbuf **mp)); -extern int ipsec6_in_reject __P((struct mbuf *, struct inpcb *)); +extern int ipsec6_in_reject_so(struct mbuf *, struct socket *); +extern int ipsec6_delete_pcbpolicy(struct inpcb *); +extern int ipsec6_set_policy(struct inpcb *inp, int optname, + caddr_t request, size_t len, int priv); +extern int ipsec6_get_policy(struct inpcb *inp, caddr_t request, size_t len, + struct mbuf **mp); +extern int ipsec6_in_reject(struct mbuf *, struct inpcb *); struct tcp6cb; -extern size_t ipsec6_hdrsiz __P((struct mbuf *, u_int, struct inpcb *)); +extern size_t ipsec6_hdrsiz(struct mbuf *, u_int, struct inpcb *); struct ip6_hdr; -extern const char *ipsec6_logpacketstr __P((struct ip6_hdr *, u_int32_t)); +extern const char *ipsec6_logpacketstr(struct ip6_hdr *, u_int32_t); -extern int ipsec6_output_trans __P((struct ipsec_output_state *, u_char *, - struct mbuf *, struct secpolicy *, int, int *)); -extern int ipsec6_output_tunnel __P((struct ipsec_output_state *, - struct secpolicy *, int)); -extern int ipsec6_tunnel_validate __P((struct mbuf *, int, u_int, - struct secasvar *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern int ipsec6_output_trans(struct ipsec_output_state *, u_char *, + struct mbuf *, struct secpolicy *, int, int *); +extern int ipsec6_output_tunnel(struct ipsec_output_state *, + struct secpolicy *, int); +extern int ipsec6_tunnel_validate(struct mbuf *, int, u_int, + struct secasvar *); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_IPSEC6_H_*/ +#endif _NETINET6_IPSEC6_H_ diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c index 7b1b091ce..df44cb746 100644 --- a/bsd/netinet6/mld6.c +++ b/bsd/netinet6/mld6.c @@ -100,6 +100,7 @@ */ #define MLD6_UNSOLICITED_REPORT_INTERVAL 10 +extern lck_mtx_t *nd6_mutex; static struct ip6_pktopts ip6_opts; static int mld6_timers_are_running; static int mld6_init_done = 0 ; @@ -107,7 +108,7 @@ static int mld6_init_done = 0 ; static struct in6_addr mld6_all_nodes_linklocal = IN6ADDR_LINKLOCAL_ALLNODES_INIT; static struct in6_addr mld6_all_routers_linklocal = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; -static void mld6_sendpkt __P((struct in6_multi *, int, const struct in6_addr *)); +static void mld6_sendpkt(struct in6_multi *, int, const struct in6_addr *); void mld6_init() @@ -137,8 +138,8 @@ mld6_init() } void -mld6_start_listening(in6m) - struct in6_multi *in6m; +mld6_start_listening( + struct in6_multi *in6m) { int s = splnet(); @@ -166,8 +167,8 @@ mld6_start_listening(in6m) } void -mld6_stop_listening(in6m) - struct in6_multi *in6m; +mld6_stop_listening( + struct in6_multi *in6m) { mld6_all_nodes_linklocal.s6_addr16[1] = htons(in6m->in6m_ifp->if_index); /* XXX */ @@ -182,9 +183,9 @@ mld6_stop_listening(in6m) } void -mld6_input(m, off) - struct mbuf *m; - int off; +mld6_input( + struct mbuf *m, + int off) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct mld6_hdr *mldh; @@ -195,7 +196,7 @@ mld6_input(m, off) int timer; /* timer value in the MLD query header */ #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(*mldh),); + IP6_EXTHDR_CHECK(m, off, sizeof(*mldh), return); mldh = (struct mld6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(mldh, struct mld6_hdr *, m, off, sizeof(*mldh)); @@ -255,6 +256,7 @@ mld6_input(m, off) * - Use the value specified in the query message as * the maximum timeout. */ + ifnet_lock_exclusive(ifp); IFP_TO_IA6(ifp, ia); if (ia == NULL) break; @@ -301,6 +303,7 @@ mld6_input(m, off) } } } + ifnet_lock_done(ifp); if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr)) mldh->mld6_addr.s6_addr16[1] = 0; /* XXX */ @@ -328,11 +331,13 @@ mld6_input(m, off) * If we belong to the group being reported, stop * our timer for that group. */ + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(mldh->mld6_addr, ifp, in6m); if (in6m) { in6m->in6m_timer = 0; /* transit to idle state */ in6m->in6m_state = MLD6_OTHERLISTENER; /* clear flag */ } + ifnet_lock_done(ifp); if (IN6_IS_ADDR_MC_LINKLOCAL(&mldh->mld6_addr)) mldh->mld6_addr.s6_addr16[1] = 0; /* XXX */ @@ -350,7 +355,6 @@ mld6_fasttimeo() { struct in6_multi *in6m; struct in6_multistep step; - int s; /* * Quick check to see if any work needs to be done, in order @@ -359,7 +363,7 @@ mld6_fasttimeo() if (!mld6_timers_are_running) return; - s = splnet(); + lck_mtx_lock(nd6_mutex); mld6_timers_are_running = 0; IN6_FIRST_MULTI(step, in6m); while (in6m != NULL) { @@ -373,14 +377,14 @@ mld6_fasttimeo() } IN6_NEXT_MULTI(step, in6m); } - splx(s); + lck_mtx_unlock(nd6_mutex); } static void -mld6_sendpkt(in6m, type, dst) - struct in6_multi *in6m; - int type; - const struct in6_addr *dst; +mld6_sendpkt( + struct in6_multi *in6m, + int type, + const struct in6_addr *dst) { struct mbuf *mh, *md; struct mld6_hdr *mldh; @@ -458,7 +462,7 @@ mld6_sendpkt(in6m, type, dst) /* increment output statictics */ icmp6stat.icp6s_outhist[type]++; - ip6_output(mh, &ip6_opts, NULL, 0, &im6o, &outif); + ip6_output(mh, &ip6_opts, NULL, 0, &im6o, &outif, 0); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); switch (type) { diff --git a/bsd/netinet6/mld6_var.h b/bsd/netinet6/mld6_var.h index dd252c18a..cd583fef1 100644 --- a/bsd/netinet6/mld6_var.h +++ b/bsd/netinet6/mld6_var.h @@ -34,8 +34,7 @@ #define _NETINET6_MLD6_VAR_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #define MLD6_RANDOM_DELAY(X) (random() % (X) + 1) @@ -45,12 +44,11 @@ #define MLD6_OTHERLISTENER 0 #define MLD6_IREPORTEDLAST 1 -void mld6_init __P((void)); -void mld6_input __P((struct mbuf *, int)); -void mld6_start_listening __P((struct in6_multi *)); -void mld6_stop_listening __P((struct in6_multi *)); -void mld6_fasttimeo __P((void)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +void mld6_init(void); +void mld6_input(struct mbuf *, int); +void mld6_start_listening(struct in6_multi *); +void mld6_stop_listening(struct in6_multi *); +void mld6_fasttimeo(void); +#endif KERNEL_PRIVATE -#endif /* _NETINET6_MLD6_VAR_H_ */ +#endif _NETINET6_MLD6_VAR_H_ diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index baff898b4..228c6fa2e 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -50,6 +50,7 @@ #include #include #include +#include #define DONT_WARN_OBSOLETE #include @@ -110,9 +111,12 @@ struct nd_prhead nd_prefix = { 0 }; int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; static struct sockaddr_in6 all1_sa; -static void nd6_slowtimo_funneled __P((void *)); -static int regen_tmpaddr __P((struct in6_ifaddr *)); +static int regen_tmpaddr(struct in6_ifaddr *); +extern lck_mtx_t *rt_mtx; +extern lck_mtx_t *ip6_mutex; +extern lck_mtx_t *nd6_mutex; +static void nd6_slowtimo(void *ignored_arg); void nd6_init() @@ -136,12 +140,12 @@ nd6_init() nd6_init_done = 1; /* start timer */ - timeout(nd6_slowtimo_funneled, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); + timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); } void -nd6_ifattach(ifp) - struct ifnet *ifp; +nd6_ifattach( + struct ifnet *ifp) { /* @@ -194,15 +198,15 @@ nd6_ifattach(ifp) * changes, which means we might have to adjust the ND level MTU. */ void -nd6_setmtu(ifp) - struct ifnet *ifp; +nd6_setmtu( + struct ifnet *ifp) { #ifndef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif struct nd_ifinfo *ndi; - u_long oldmaxmtu, oldlinkmtu, dl_tag; + u_long oldmaxmtu, oldlinkmtu; /* * Make sure IPv6 is enabled for the interface first, @@ -210,8 +214,6 @@ nd6_setmtu(ifp) */ if (ifp->if_index >= nd_ifinfo_indexlim) { - if (dlil_find_dltag(ifp->if_family, ifp->if_unit, PF_INET6, &dl_tag) != EPROTONOSUPPORT) - nd6log((LOG_INFO, "setmtu for ifp=% but nd6 is not attached\n", if_name(ifp))); return; /* we're out of bound for nd_ifinfo */ } @@ -223,6 +225,8 @@ nd6_setmtu(ifp) case IFT_ARCNET: /* XXX MTU handling needs more work */ ndi->maxmtu = MIN(60480, ifp->if_mtu); break; + case IFT_L2VLAN: /* XXX what if the VLAN isn't over ethernet? */ + case IFT_IEEE8023ADLAG: case IFT_ETHER: ndi->maxmtu = MIN(ETHERMTU, ifp->if_mtu); break; @@ -272,10 +276,10 @@ nd6_setmtu(ifp) } void -nd6_option_init(opt, icmp6len, ndopts) - void *opt; - int icmp6len; - union nd_opts *ndopts; +nd6_option_init( + void *opt, + int icmp6len, + union nd_opts *ndopts) { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; @@ -292,8 +296,8 @@ nd6_option_init(opt, icmp6len, ndopts) * Take one ND option. */ struct nd_opt_hdr * -nd6_option(ndopts) - union nd_opts *ndopts; +nd6_option( + union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; @@ -344,8 +348,8 @@ nd6_option(ndopts) * multiple options of the same type. */ int -nd6_options(ndopts) - union nd_opts *ndopts; +nd6_options( + union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; @@ -424,33 +428,20 @@ skip1: * ND6 timer routine to expire default route list and prefix list */ void -nd6_timer_funneled(ignored_arg) - void *ignored_arg; +nd6_timer( + void *ignored_arg) { -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - nd6_timer(ignored_arg); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} -void -nd6_timer(ignored_arg) - void *ignored_arg; -{ - int s; struct llinfo_nd6 *ln; struct nd_defrouter *dr; struct nd_prefix *pr; - struct ifnet *ifp; + struct ifnet *ifp = NULL; struct in6_ifaddr *ia6, *nia6; struct in6_addrlifetime *lt6; + struct timeval timenow; + + getmicrotime(&timenow); - s = splnet(); - timeout(nd6_timer_funneled, (caddr_t)0, nd6_prune * hz); ln = llinfo_nd6.ln_next; while (ln && ln != &llinfo_nd6) { @@ -471,7 +462,7 @@ nd6_timer(ignored_arg) ndi = &nd_ifinfo[ifp->if_index]; dst = (struct sockaddr_in6 *)rt_key(rt); - if (ln->ln_expire > time_second) { + if (ln->ln_expire > timenow.tv_sec) { ln = next; continue; } @@ -498,10 +489,10 @@ nd6_timer(ignored_arg) case ND6_LLINFO_INCOMPLETE: if (ln->ln_asked < nd6_mmaxtries) { ln->ln_asked++; - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[ifp->if_index].retrans / 1000; nd6_ns_output(ifp, NULL, &dst->sin6_addr, - ln, 0); + ln, 0, 0); } else { struct mbuf *m = ln->ln_hold; ln->ln_hold = NULL; @@ -526,7 +517,7 @@ nd6_timer(ignored_arg) case ND6_LLINFO_REACHABLE: if (ln->ln_expire) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } break; @@ -541,23 +532,23 @@ nd6_timer(ignored_arg) /* We need NUD */ ln->ln_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + ndi->retrans / 1000; nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr, - ln, 0); + ln, 0, 0); } else { ln->ln_state = ND6_LLINFO_STALE; /* XXX */ - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } break; case ND6_LLINFO_PROBE: if (ln->ln_asked < nd6_umaxtries) { ln->ln_asked++; - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[ifp->if_index].retrans / 1000; nd6_ns_output(ifp, &dst->sin6_addr, - &dst->sin6_addr, ln, 0); + &dst->sin6_addr, ln, 0, 0); } else { next = nd6_free(rt); } @@ -567,12 +558,13 @@ nd6_timer(ignored_arg) } /* expire default router list */ + lck_mtx_lock(nd6_mutex); dr = TAILQ_FIRST(&nd_defrouter); while (dr) { - if (dr->expire && dr->expire < time_second) { + if (dr->expire && dr->expire < timenow.tv_sec) { struct nd_defrouter *t; t = TAILQ_NEXT(dr, dr_entry); - defrtrlist_del(dr); + defrtrlist_del(dr, 1); dr = t; } else { dr = TAILQ_NEXT(dr, dr_entry); @@ -586,7 +578,7 @@ nd6_timer(ignored_arg) * rather separate address lifetimes and prefix lifetimes. */ addrloop: - for (ia6 = in6_ifaddr; ia6; ia6 = nia6) { + for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) { nia6 = ia6->ia_next; /* check address lifetime */ lt6 = &ia6->ia6_lifetime; @@ -609,7 +601,7 @@ nd6_timer(ignored_arg) regen = 1; } - in6_purgeaddr(&ia6->ia_ifa); + in6_purgeaddr(&ia6->ia_ifa, 1); if (regen) goto addrloop; /* XXX: see below */ @@ -660,7 +652,7 @@ nd6_timer(ignored_arg) * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ - if (pr->ndpr_expire && pr->ndpr_expire < time_second) { + if (pr->ndpr_expire && pr->ndpr_expire < timenow.tv_sec) { struct nd_prefix *t; t = pr->ndpr_next; @@ -669,23 +661,28 @@ nd6_timer(ignored_arg) * separate. NEVER perform in6_purgeaddr here. */ - prelist_remove(pr); + prelist_remove(pr, 1); pr = t; } else pr = pr->ndpr_next; } - splx(s); + lck_mtx_unlock(nd6_mutex); + timeout(nd6_timer, (caddr_t)0, nd6_prune * hz); } static int -regen_tmpaddr(ia6) - struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */ +regen_tmpaddr( + struct in6_ifaddr *ia6) /* deprecated/invalidated temporary address */ { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; + struct timeval timenow; + + getmicrotime(&timenow); ifp = ia6->ia_ifa.ifa_ifp; + ifnet_lock_exclusive(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { @@ -726,6 +723,7 @@ regen_tmpaddr(ia6) if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; } + ifnet_lock_done(ifp); if (public_ifa6 != NULL) { int e; @@ -746,14 +744,15 @@ regen_tmpaddr(ia6) * ifp goes away. */ void -nd6_purge(ifp) - struct ifnet *ifp; +nd6_purge( + struct ifnet *ifp) { struct llinfo_nd6 *ln, *nln; struct nd_defrouter *dr, *ndr, drany; struct nd_prefix *pr, *npr; /* Nuke default router list entries toward ifp */ + lck_mtx_lock(nd6_mutex); if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { /* * The first entry of the list may be stored in @@ -762,11 +761,11 @@ nd6_purge(ifp) for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) { ndr = TAILQ_NEXT(dr, dr_entry); if (dr->ifp == ifp) - defrtrlist_del(dr); + defrtrlist_del(dr, 1); } dr = TAILQ_FIRST(&nd_defrouter); if (dr->ifp == ifp) - defrtrlist_del(dr); + defrtrlist_del(dr, 1); } /* Nuke prefix list entries toward ifp */ @@ -781,7 +780,7 @@ nd6_purge(ifp) * by itself. * (jinmei@kame.net 20010129) */ - prelist_remove(pr); + prelist_remove(pr, 1); } } @@ -795,6 +794,7 @@ nd6_purge(ifp) defrouter_delreq(&drany, 0); defrouter_select(); } + lck_mtx_unlock(nd6_mutex); /* * Nuke neighbor cache entries for the ifp. @@ -820,10 +820,11 @@ nd6_purge(ifp) } struct rtentry * -nd6_lookup(addr6, create, ifp) - struct in6_addr *addr6; - int create; - struct ifnet *ifp; +nd6_lookup( + struct in6_addr *addr6, + int create, + struct ifnet *ifp, + int rt_locked) { struct rtentry *rt; struct sockaddr_in6 sin6; @@ -835,7 +836,9 @@ nd6_lookup(addr6, create, ifp) #if SCOPEDROUTING sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6); #endif - rt = rtalloc1((struct sockaddr *)&sin6, create, 0UL); + if (!rt_locked) + lck_mtx_lock(rt_mtx); + rt = rtalloc1_locked((struct sockaddr *)&sin6, create, 0UL); if (rt && (rt->rt_flags & RTF_LLINFO) == 0) { /* * This is the case for the default route. @@ -844,7 +847,7 @@ nd6_lookup(addr6, create, ifp) * interface route. */ if (create) { - rtfree(rt); + rtfree_locked(rt); rt = 0; } } @@ -861,8 +864,11 @@ nd6_lookup(addr6, create, ifp) */ struct ifaddr *ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); - if (ifa == NULL) + if (ifa == NULL) { + if (!rt_locked) + lck_mtx_unlock(rt_mtx); return(NULL); + } /* * Create a new route. RTF_LLINFO is necessary @@ -870,26 +876,35 @@ nd6_lookup(addr6, create, ifp) * destination in nd6_rtrequest which will be * called in rtrequest via ifa->ifa_rtrequest. */ - if ((e = rtrequest(RTM_ADD, (struct sockaddr *)&sin6, + if ((e = rtrequest_locked(RTM_ADD, (struct sockaddr *)&sin6, ifa->ifa_addr, (struct sockaddr *)&all1_sa, (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) & ~RTF_CLONING, - &rt)) != 0) - log(LOG_ERR, - "nd6_lookup: failed to add route for a " - "neighbor(%s), errno=%d\n", - ip6_sprintf(addr6), e); - if (rt == NULL) + &rt)) != 0) { + if (e != EEXIST) + log(LOG_ERR, + "nd6_lookup: failed to add route for a " + "neighbor(%s), errno=%d\n", + ip6_sprintf(addr6), e); + } + ifafree(ifa); + if (rt == NULL) { + if (!rt_locked) + lck_mtx_unlock(rt_mtx); return(NULL); + } if (rt->rt_llinfo) { struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; ln->ln_state = ND6_LLINFO_NOSTATE; } - } else + } else { + if (!rt_locked) + lck_mtx_unlock(rt_mtx); return(NULL); + } } rtunref(rt); /* @@ -908,13 +923,17 @@ nd6_lookup(addr6, create, ifp) rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || (ifp && rt->rt_ifa->ifa_ifp != ifp))) { + if (!rt_locked) + lck_mtx_unlock(rt_mtx); if (create) { log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n", ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec"); /* xxx more logs... kazu */ } return(NULL); - } + } + if (!rt_locked) + lck_mtx_unlock(rt_mtx); return(rt); } @@ -923,9 +942,10 @@ nd6_lookup(addr6, create, ifp) * XXX: should take care of the destination of a p2p link? */ int -nd6_is_addr_neighbor(addr, ifp) - struct sockaddr_in6 *addr; - struct ifnet *ifp; +nd6_is_addr_neighbor( + struct sockaddr_in6 *addr, + struct ifnet *ifp, + int rt_locked) { struct ifaddr *ifa; int i; @@ -946,27 +966,30 @@ nd6_is_addr_neighbor(addr, ifp) * If the address matches one of our addresses, * it should be a neighbor. */ + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_INET6) - next: continue; + continue; for (i = 0; i < 4; i++) { if ((IFADDR6(ifa).s6_addr32[i] ^ addr->sin6_addr.s6_addr32[i]) & - IFMASK6(ifa).s6_addr32[i]) - goto next; + IFMASK6(ifa).s6_addr32[i]) + continue; } + ifnet_lock_done(ifp); return(1); } + ifnet_lock_done(ifp); /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ - if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) + if (nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked) != NULL) return(1); return(0); @@ -978,8 +1001,8 @@ nd6_is_addr_neighbor(addr, ifp) * Free an nd6 llinfo entry. */ struct llinfo_nd6 * -nd6_free(rt) - struct rtentry *rt; +nd6_free( + struct rtentry *rt) { struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; @@ -991,12 +1014,11 @@ nd6_free(rt) */ if (!ip6_forwarding && (ip6_accept_rtadv || (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { - int s; - s = splnet(); + lck_mtx_lock(nd6_mutex); dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, rt->rt_ifp); - if (ln && ln->ln_router || dr) { + if ((ln && ln->ln_router) || dr) { /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. @@ -1027,7 +1049,7 @@ nd6_free(rt) * before the default router selection, we perform * the check now. */ - pfxlist_onlink_check(); + pfxlist_onlink_check(1); if (dr == TAILQ_FIRST(&nd_defrouter)) { /* @@ -1043,7 +1065,7 @@ nd6_free(rt) defrouter_select(); } } - splx(s); + lck_mtx_unlock(nd6_mutex); } /* @@ -1074,12 +1096,15 @@ nd6_free(rt) * XXX cost-effective metods? */ void -nd6_nud_hint(rt, dst6, force) - struct rtentry *rt; - struct in6_addr *dst6; - int force; +nd6_nud_hint( + struct rtentry *rt, + struct in6_addr *dst6, + int force) { struct llinfo_nd6 *ln; + struct timeval timenow; + + getmicrotime(&timenow); /* * If the caller specified "rt", use that. Otherwise, resolve the @@ -1088,7 +1113,7 @@ nd6_nud_hint(rt, dst6, force) if (!rt) { if (!dst6) return; - if (!(rt = nd6_lookup(dst6, 0, NULL))) + if (!(rt = nd6_lookup(dst6, 0, NULL, 0))) return; } @@ -1116,21 +1141,23 @@ nd6_nud_hint(rt, dst6, force) ln->ln_state = ND6_LLINFO_REACHABLE; if (ln->ln_expire) - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[rt->rt_ifp->if_index].reachable; } void -nd6_rtrequest(req, rt, sa) - int req; - struct rtentry *rt; - struct sockaddr *sa; /* xxx unused */ +nd6_rtrequest( + int req, + struct rtentry *rt, + struct sockaddr *sa) /* xxx unused */ { struct sockaddr *gate = rt->rt_gateway; struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; + struct timeval timenow; + if ((rt->rt_flags & RTF_GATEWAY)) return; @@ -1148,7 +1175,7 @@ nd6_rtrequest(req, rt, sa) if (req == RTM_RESOLVE && (nd6_need_cache(ifp) == 0 || /* stf case */ - !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp))) { + !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp, 1))) { /* * FreeBSD and BSD/OS often make a cloned host route based * on a less-specific route (e.g. the default route). @@ -1167,6 +1194,7 @@ nd6_rtrequest(req, rt, sa) return; } + getmicrotime(&timenow); switch (req) { case RTM_ADD: /* @@ -1189,7 +1217,7 @@ nd6_rtrequest(req, rt, sa) SDL(gate)->sdl_type = ifp->if_type; SDL(gate)->sdl_index = ifp->if_index; if (ln) - ln->ln_expire = time_second; + ln->ln_expire = timenow.tv_sec; #if 1 if (ln && ln->ln_expire == 0) { /* kludge for desktops */ @@ -1278,7 +1306,7 @@ nd6_rtrequest(req, rt, sa) * initialized in rtrequest(), so rt_expire is 0. */ ln->ln_state = ND6_LLINFO_NOSTATE; - ln->ln_expire = time_second; + ln->ln_expire = timenow.tv_sec; } rt->rt_flags |= RTF_LLINFO; ln->ln_next = llinfo_nd6.ln_next; @@ -1332,7 +1360,7 @@ nd6_rtrequest(req, rt, sa) llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; - if (!in6_addmulti(&llsol, ifp, &error)) { + if (!in6_addmulti(&llsol, ifp, &error, 0)) { nd6log((LOG_ERR, "%s: failed to join " "%s (errno=%d)\n", if_name(ifp), ip6_sprintf(&llsol), error)); @@ -1357,9 +1385,11 @@ nd6_rtrequest(req, rt, sa) llsol.s6_addr32[2] = htonl(1); llsol.s6_addr8[12] = 0xff; + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(llsol, ifp, in6m); + ifnet_lock_done(ifp); if (in6m) - in6_delmulti(in6m); + in6_delmulti(in6m, 0); } nd6_inuse--; ln->ln_next->ln_prev = ln->ln_prev; @@ -1370,15 +1400,15 @@ nd6_rtrequest(req, rt, sa) if (ln->ln_hold) m_freem(ln->ln_hold); ln->ln_hold = NULL; - Free((caddr_t)ln); + R_Free((caddr_t)ln); } } int -nd6_ioctl(cmd, data, ifp) - u_long cmd; - caddr_t data; - struct ifnet *ifp; +nd6_ioctl( + u_long cmd, + caddr_t data, + struct ifnet *ifp) { struct in6_drlist *drl = (struct in6_drlist *)data; struct in6_prlist *prl = (struct in6_prlist *)data; @@ -1389,15 +1419,14 @@ nd6_ioctl(cmd, data, ifp) struct nd_prefix *pr; struct rtentry *rt; int i = 0, error = 0; - int s; switch (cmd) { case SIOCGDRLST_IN6: /* * obsolete API, use sysctl under net.inet6.icmp6 */ + lck_mtx_lock(nd6_mutex); bzero(drl, sizeof(*drl)); - s = splnet(); dr = TAILQ_FIRST(&nd_defrouter); while (dr && i < DRLSTSIZ) { drl->defrouter[i].rtaddr = dr->rtaddr; @@ -1417,7 +1446,7 @@ nd6_ioctl(cmd, data, ifp) i++; dr = TAILQ_NEXT(dr, dr_entry); } - splx(s); + lck_mtx_unlock(nd6_mutex); break; case SIOCGPRLST_IN6: /* @@ -1429,7 +1458,7 @@ nd6_ioctl(cmd, data, ifp) * how about separating ioctls into two? */ bzero(prl, sizeof(*prl)); - s = splnet(); + lck_mtx_lock(nd6_mutex); pr = nd_prefix.lh_first; while (pr && i < PRLSTSIZ) { struct nd_pfxrouter *pfr; @@ -1490,8 +1519,7 @@ nd6_ioctl(cmd, data, ifp) i++; } } - splx(s); - + lck_mtx_unlock(nd6_mutex); break; case OSIOCGIFINFO_IN6: if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { @@ -1531,16 +1559,18 @@ nd6_ioctl(cmd, data, ifp) * route equals to the top of default router list */ bzero(&any, sizeof(any)); - defrouter_delreq(&any, 0); + lck_mtx_lock(nd6_mutex); + defrouter_delreq(&any, 1); defrouter_select(); + lck_mtx_unlock(nd6_mutex); /* xxx sumikawa: flush prefix list */ break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; + lck_mtx_lock(nd6_mutex); - s = splnet(); for (pr = nd_prefix.lh_first; pr; pr = next) { struct in6_ifaddr *ia, *ia_next; @@ -1550,7 +1580,7 @@ nd6_ioctl(cmd, data, ifp) continue; /* XXX */ /* do we really have to remove addresses as well? */ - for (ia = in6_ifaddr; ia; ia = ia_next) { + for (ia = in6_ifaddrs; ia; ia = ia_next) { /* ia might be removed. keep the next ptr. */ ia_next = ia->ia_next; @@ -1558,11 +1588,11 @@ nd6_ioctl(cmd, data, ifp) continue; if (ia->ia6_ndpr == pr) - in6_purgeaddr(&ia->ia_ifa); + in6_purgeaddr(&ia->ia_ifa, 1); } - prelist_remove(pr); + prelist_remove(pr, 1); } - splx(s); + lck_mtx_unlock(nd6_mutex); break; } case SIOCSRTRFLUSH_IN6: @@ -1570,7 +1600,7 @@ nd6_ioctl(cmd, data, ifp) /* flush all the default routers */ struct nd_defrouter *dr, *next; - s = splnet(); + lck_mtx_lock(nd6_mutex); if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) { /* * The first entry of the list may be stored in @@ -1578,11 +1608,11 @@ nd6_ioctl(cmd, data, ifp) */ for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) { next = TAILQ_NEXT(dr, dr_entry); - defrtrlist_del(dr); + defrtrlist_del(dr, 1); } - defrtrlist_del(TAILQ_FIRST(&nd_defrouter)); + defrtrlist_del(TAILQ_FIRST(&nd_defrouter), 1); } - splx(s); + lck_mtx_unlock(nd6_mutex); break; } case SIOCGNBRINFO_IN6: @@ -1602,10 +1632,8 @@ nd6_ioctl(cmd, data, ifp) *idp = htons(ifp->if_index); } - s = splnet(); - if ((rt = nd6_lookup(&nb_addr, 0, ifp)) == NULL) { + if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) { error = EINVAL; - splx(s); break; } ln = (struct llinfo_nd6 *)rt->rt_llinfo; @@ -1613,7 +1641,6 @@ nd6_ioctl(cmd, data, ifp) nbi->asked = ln->ln_asked; nbi->isrouter = ln->ln_router; nbi->expire = ln->ln_expire; - splx(s); break; } @@ -1632,13 +1659,13 @@ nd6_ioctl(cmd, data, ifp) * on reception of inbound ND6 packets. (RS/RA/NS/redirect) */ struct rtentry * -nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code) - struct ifnet *ifp; - struct in6_addr *from; - char *lladdr; - int lladdrlen; - int type; /* ICMP6 type */ - int code; /* type dependent information */ +nd6_cache_lladdr( + struct ifnet *ifp, + struct in6_addr *from, + char *lladdr, + int lladdrlen, + int type, /* ICMP6 type */ + int code) /* type dependent information */ { struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; @@ -1648,6 +1675,7 @@ nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code) int olladdr; int llchange; int newstate = 0; + struct timeval timenow; if (!ifp) panic("ifp == NULL in nd6_cache_lladdr"); @@ -1667,8 +1695,10 @@ nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ + getmicrotime(&timenow); - rt = nd6_lookup(from, 0, ifp); + lck_mtx_lock(rt_mtx); + rt = nd6_lookup(from, 0, ifp, 1); if (!rt) { #if 0 /* nothing must be done if there's no lladdr */ @@ -1676,16 +1706,20 @@ nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code) return NULL; #endif - rt = nd6_lookup(from, 1, ifp); + rt = nd6_lookup(from, 1, ifp, 1); is_newentry = 1; } else { /* do nothing if static ndp is set */ - if (rt->rt_flags & RTF_STATIC) + if (rt->rt_flags & RTF_STATIC) { + lck_mtx_unlock(rt_mtx); return NULL; + } is_newentry = 0; } - if (!rt) + lck_mtx_unlock(rt_mtx); + + if (!rt) return NULL; if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { fail: @@ -1758,7 +1792,7 @@ fail: * we must set the timer now, although it is actually * meaningless. */ - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; if (ln->ln_hold) { /* @@ -1767,12 +1801,12 @@ fail: */ nd6_output(ifp, ifp, ln->ln_hold, (struct sockaddr_in6 *)rt_key(rt), - rt); + rt, 0); ln->ln_hold = NULL; } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ - ln->ln_expire = time_second; + ln->ln_expire = timenow.tv_sec; } } @@ -1856,22 +1890,23 @@ fail: * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && ln->ln_router && !ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) + if (do_update && ln->ln_router && !ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + lck_mtx_lock(nd6_mutex); defrouter_select(); + lck_mtx_unlock(nd6_mutex); + } return rt; } static void -nd6_slowtimo(ignored_arg) - void *ignored_arg; +nd6_slowtimo( + void *ignored_arg) { - int s = splnet(); int i; struct nd_ifinfo *nd6if; - s = splnet(); - timeout(nd6_slowtimo_funneled, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); + lck_mtx_lock(nd6_mutex); for (i = 1; i < if_index + 1; i++) { if (!nd_ifinfo || i >= nd_ifinfo_indexlim) continue; @@ -1888,37 +1923,27 @@ nd6_slowtimo(ignored_arg) nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } - splx(s); + lck_mtx_unlock(nd6_mutex); + timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); } -static void -nd6_slowtimo_funneled(ignored_arg) - void *ignored_arg; -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - nd6_slowtimo(ignored_arg); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} #define senderr(e) { error = (e); goto bad;} int -nd6_output(ifp, origifp, m0, dst, rt0) - struct ifnet *ifp; - struct ifnet *origifp; - struct mbuf *m0; - struct sockaddr_in6 *dst; - struct rtentry *rt0; +nd6_output( + struct ifnet *ifp, + struct ifnet *origifp, + struct mbuf *m0, + struct sockaddr_in6 *dst, + struct rtentry *rt0, + int locked) { struct mbuf *m = m0; struct rtentry *rt = rt0; struct sockaddr_in6 *gw6 = NULL; struct llinfo_nd6 *ln = NULL; int error = 0; + struct timeval timenow; if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; @@ -1929,19 +1954,23 @@ nd6_output(ifp, origifp, m0, dst, rt0) /* * next hop determination. This routine is derived from ether_outpout. */ + lck_mtx_lock(rt_mtx); if (rt) { if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL)) != + if ((rt0 = rt = rtalloc1_locked((struct sockaddr *)dst, 1, 0UL)) != NULL) { rtunref(rt); if (rt->rt_ifp != ifp) { /* XXX: loop care? */ + lck_mtx_unlock(rt_mtx); return nd6_output(ifp, origifp, m0, - dst, rt); + dst, rt, locked); } - } else + } else { + lck_mtx_unlock(rt_mtx); senderr(EHOSTUNREACH); + } } if (rt->rt_flags & RTF_GATEWAY) { @@ -1955,13 +1984,14 @@ nd6_output(ifp, origifp, m0, dst, rt0) * if the gateway is our own address, which is * sometimes used to install a route to a p2p link. */ - if (!nd6_is_addr_neighbor(gw6, ifp) || + if (!nd6_is_addr_neighbor(gw6, ifp, 1) || in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { /* * We allow this kind of tricky route only * when the outgoing interface is p2p. * XXX: we may need a more generic rule here. */ + lck_mtx_unlock(rt_mtx); if ((ifp->if_flags & IFF_POINTOPOINT) == 0) senderr(EHOSTUNREACH); @@ -1971,10 +2001,12 @@ nd6_output(ifp, origifp, m0, dst, rt0) if (rt->rt_gwroute == 0) goto lookup; if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL); - if ((rt = rt->rt_gwroute) == 0) + rtfree_locked(rt); rt = rt0; + lookup: rt->rt_gwroute = rtalloc1_locked(rt->rt_gateway, 1, 0UL); + if ((rt = rt->rt_gwroute) == 0) { + lck_mtx_unlock(rt_mtx); senderr(EHOSTUNREACH); + } } } } @@ -1995,10 +2027,11 @@ nd6_output(ifp, origifp, m0, dst, rt0) * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ - if (nd6_is_addr_neighbor(dst, ifp) && - (rt = nd6_lookup(&dst->sin6_addr, 1, ifp)) != NULL) + if (nd6_is_addr_neighbor(dst, ifp, 1) && + (rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 1)) != NULL) ln = (struct llinfo_nd6 *)rt->rt_llinfo; } + lck_mtx_unlock(rt_mtx); if (!ln || !rt) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) { @@ -2012,11 +2045,13 @@ nd6_output(ifp, origifp, m0, dst, rt0) goto sendpkt; /* send anyway */ } + getmicrotime(&timenow); + /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } /* @@ -2029,7 +2064,7 @@ nd6_output(ifp, origifp, m0, dst, rt0) if (ln->ln_state == ND6_LLINFO_STALE) { ln->ln_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; - ln->ln_expire = time_second + nd6_delay; + ln->ln_expire = timenow.tv_sec + nd6_delay; } /* @@ -2056,11 +2091,11 @@ nd6_output(ifp, origifp, m0, dst, rt0) ln->ln_hold = m; if (ln->ln_expire) { if (ln->ln_asked < nd6_mmaxtries && - ln->ln_expire < time_second) { + ln->ln_expire < timenow.tv_sec) { ln->ln_asked++; - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[ifp->if_index].retrans / 1000; - nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); + nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, locked); } } return(0); @@ -2075,7 +2110,12 @@ nd6_output(ifp, origifp, m0, dst, rt0) if ((ifp->if_flags & IFF_LOOPBACK) != 0) { m->m_pkthdr.rcvif = origifp; /* forwarding rules require the original scope_id */ - return (dlil_output(ifptodlt(origifp, PF_INET6), m, (caddr_t)rt, (struct sockaddr *)dst,0)); + if (locked) + lck_mtx_unlock(ip6_mutex); + error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, (struct sockaddr *)dst, 0); + if (locked) + lck_mtx_lock(ip6_mutex); + return error; } else { /* Do not allow loopback address to wind up on a wire */ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -2084,18 +2124,24 @@ nd6_output(ifp, origifp, m0, dst, rt0) IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { ip6stat.ip6s_badscope++; /* - * Simply drop the packet just like a firewall -- we do not want the - * the application to feel the pain, not yet... - * Returning ENETUNREACH like ip6_output does in some similar cases - * could startle the otherwise clueless process that specifies + * Do not simply drop the packet just like a firewall -- we want the + * the application to feel the pain. + * Return ENETUNREACH like ip6_output does in some similar cases. + * This can startle the otherwise clueless process that specifies * loopback as the source address. */ + error = ENETUNREACH; goto bad; } } - m->m_pkthdr.rcvif = (struct ifnet *)0; - return (dlil_output(ifptodlt(ifp, PF_INET6), m, (caddr_t)rt, (struct sockaddr *)dst, 0)); + m->m_pkthdr.rcvif = 0; + if (locked) + lck_mtx_unlock(ip6_mutex); + error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, (struct sockaddr *)dst, 0); + if (locked) + lck_mtx_lock(ip6_mutex); + return(error); #else if ((ifp->if_flags & IFF_LOOPBACK) != 0) { return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, @@ -2112,8 +2158,8 @@ nd6_output(ifp, origifp, m0, dst, rt0) #undef senderr int -nd6_need_cache(ifp) - struct ifnet *ifp; +nd6_need_cache( + struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface @@ -2127,9 +2173,8 @@ nd6_need_cache(ifp) case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: -#if IFT_L2VLAN case IFT_L2VLAN: -#endif + case IFT_IEEE8023ADLAG: #if IFT_IEEE80211 case IFT_IEEE80211: #endif @@ -2141,12 +2186,12 @@ nd6_need_cache(ifp) } int -nd6_storelladdr(ifp, rt, m, dst, desten) - struct ifnet *ifp; - struct rtentry *rt; - struct mbuf *m; - struct sockaddr *dst; - u_char *desten; +nd6_storelladdr( + struct ifnet *ifp, + struct rtentry *rt, + struct mbuf *m, + struct sockaddr *dst, + u_char *desten) { int i; struct sockaddr_dl *sdl; @@ -2155,9 +2200,8 @@ nd6_storelladdr(ifp, rt, m, dst, desten) switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: -#if IFT_L2VLAN - case IFT_L2VLAN: -#endif + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: #if IFT_IEEE80211 case IFT_IEEE80211: #endif @@ -2194,6 +2238,69 @@ nd6_storelladdr(ifp, rt, m, dst, desten) bcopy(LLADDR(sdl), desten, sdl->sdl_alen); return(1); } + +extern errno_t arp_route_to_gateway_route(const struct sockaddr *net_dest, + route_t hint, route_t *out_route); + +errno_t +nd6_lookup_ipv6( + ifnet_t ifp, + const struct sockaddr_in6 *ip6_dest, + struct sockaddr_dl *ll_dest, + size_t ll_dest_len, + route_t hint, + mbuf_t packet) +{ + route_t route = hint; + errno_t result = 0; + struct sockaddr_dl *sdl = NULL; + size_t copy_len; + + if (ip6_dest->sin6_family != AF_INET6) + return EAFNOSUPPORT; + + if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) + return ENETDOWN; + + if (hint) { + result = arp_route_to_gateway_route((const struct sockaddr*)ip6_dest, hint, &route); + if (result != 0) + return result; + } + + if ((packet->m_flags & M_MCAST) != 0) { + return dlil_resolve_multi(ifp, (const struct sockaddr*)ip6_dest, + ll_dest, ll_dest_len); + } + + if (route == NULL) { + /* this could happen, if we could not allocate memory */ + return ENOBUFS; + } + + lck_mtx_lock(rt_mtx); + + if (route->rt_gateway->sa_family != AF_LINK) { + printf("nd6_lookup_ipv6: gateway address not AF_LINK\n"); + result = EADDRNOTAVAIL; + goto done; + } + + sdl = SDL(route->rt_gateway); + if (sdl->sdl_alen == 0) { + /* this should be impossible, but we bark here for debugging */ + printf("nd6_storelladdr: sdl_alen == 0\n"); + result = EHOSTUNREACH; + } + + copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len; + bcopy(sdl, ll_dest, copy_len); + +done: + lck_mtx_unlock(rt_mtx); + return result; +} + #ifndef __APPLE__ static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS; static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS; @@ -2215,6 +2322,7 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS return EPERM; error = 0; + lck_mtx_lock(nd6_mutex); for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { @@ -2242,6 +2350,7 @@ nd6_sysctl_drlist SYSCTL_HANDLER_ARGS if (error) break; } + lck_mtx_unlock(nd6_mutex); return error; } @@ -2257,6 +2366,8 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS return EPERM; error = 0; + lck_mtx_lock(nd6_mutex); + for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { u_short advrtrs; size_t advance; @@ -2316,6 +2427,7 @@ nd6_sysctl_prlist SYSCTL_HANDLER_ARGS if (error) break; } + lck_mtx_unlock(nd6_mutex); return error; } #endif diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h index d774afb3a..8f525ce33 100644 --- a/bsd/netinet6/nd6.h +++ b/bsd/netinet6/nd6.h @@ -41,6 +41,7 @@ #include +#ifdef KERNEL_PRIVATE struct llinfo_nd6 { struct llinfo_nd6 *ln_next; struct llinfo_nd6 *ln_prev; @@ -52,6 +53,7 @@ struct llinfo_nd6 { short ln_router; /* 2^0: ND6 router bit */ int ln_byhint; /* # of times we made it reachable by UL hint */ }; +#endif /* KERNEL_PRIVATE */ #define ND6_LLINFO_NOSTATE -2 /* @@ -185,8 +187,7 @@ struct in6_ndifreq { #define ND6_INFINITE_LIFETIME 0xffffffff -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE /* node constants */ #define MAX_REACHABLE_TIME 3600000 /* msec */ #define REACHABLE_TIME 30000 /* msec */ @@ -338,67 +339,96 @@ union nd_opts { /* XXX: need nd6_var.h?? */ /* nd6.c */ -void nd6_init __P((void)); -void nd6_ifattach __P((struct ifnet *)); -int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *)); -void nd6_option_init __P((void *, int, union nd_opts *)); -struct nd_opt_hdr *nd6_option __P((union nd_opts *)); -int nd6_options __P((union nd_opts *)); -struct rtentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *)); -void nd6_setmtu __P((struct ifnet *)); -void nd6_timer __P((void *)); -void nd6_purge __P((struct ifnet *)); -struct llinfo_nd6 *nd6_free __P((struct rtentry *)); -void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int)); -int nd6_resolve __P((struct ifnet *, struct rtentry *, - struct mbuf *, struct sockaddr *, u_char *)); -void nd6_rtrequest __P((int, struct rtentry *, struct sockaddr *)); -int nd6_ioctl __P((u_long, caddr_t, struct ifnet *)); -struct rtentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *, - char *, int, int, int)); -int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct rtentry *)); -int nd6_storelladdr __P((struct ifnet *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *)); -int nd6_need_cache __P((struct ifnet *)); +void nd6_init(void); +void nd6_ifattach(struct ifnet *); +int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int); +void nd6_option_init(void *, int, union nd_opts *); +struct nd_opt_hdr *nd6_option(union nd_opts *); +int nd6_options(union nd_opts *); +struct rtentry *nd6_lookup(struct in6_addr *, int, struct ifnet *, int); +void nd6_setmtu(struct ifnet *); +void nd6_timer(void *); +void nd6_purge(struct ifnet *); +struct llinfo_nd6 *nd6_free(struct rtentry *); +void nd6_nud_hint(struct rtentry *, struct in6_addr *, int); +int nd6_resolve(struct ifnet *, struct rtentry *, + struct mbuf *, struct sockaddr *, u_char *); +void nd6_rtrequest(int, struct rtentry *, struct sockaddr *); +int nd6_ioctl(u_long, caddr_t, struct ifnet *); +struct rtentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *, + char *, int, int, int); +int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, int); +int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, + struct sockaddr *, u_char *); +int nd6_need_cache(struct ifnet *); /* nd6_nbr.c */ -void nd6_na_input __P((struct mbuf *, int, int)); -void nd6_na_output __P((struct ifnet *, const struct in6_addr *, - const struct in6_addr *, u_long, int, struct sockaddr *)); -void nd6_ns_input __P((struct mbuf *, int, int)); -void nd6_ns_output __P((struct ifnet *, const struct in6_addr *, - const struct in6_addr *, struct llinfo_nd6 *, int)); -caddr_t nd6_ifptomac __P((struct ifnet *)); -void nd6_dad_start __P((struct ifaddr *, int *)); -void nd6_dad_stop __P((struct ifaddr *)); -void nd6_dad_duplicated __P((struct ifaddr *)); +void nd6_na_input(struct mbuf *, int, int); +void nd6_na_output(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, u_long, int, struct sockaddr *); +void nd6_ns_input(struct mbuf *, int, int); +void nd6_ns_output(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, struct llinfo_nd6 *, int, int); +caddr_t nd6_ifptomac(struct ifnet *); +void nd6_dad_start(struct ifaddr *, int *); +void nd6_dad_stop(struct ifaddr *); +void nd6_dad_duplicated(struct ifaddr *); /* nd6_rtr.c */ -void nd6_rs_input __P((struct mbuf *, int, int)); -void nd6_ra_input __P((struct mbuf *, int, int)); -void prelist_del __P((struct nd_prefix *)); -void defrouter_addreq __P((struct nd_defrouter *)); -void defrouter_delreq __P((struct nd_defrouter *, int)); -void defrouter_select __P((void)); -void defrtrlist_del __P((struct nd_defrouter *)); -void prelist_remove __P((struct nd_prefix *)); -int prelist_update __P((struct nd_prefix *, struct nd_defrouter *, - struct mbuf *)); -int nd6_prelist_add __P((struct nd_prefix *, struct nd_defrouter *, - struct nd_prefix **)); -int nd6_prefix_onlink __P((struct nd_prefix *)); -int nd6_prefix_offlink __P((struct nd_prefix *)); -void pfxlist_onlink_check __P((void)); -struct nd_defrouter *defrouter_lookup __P((struct in6_addr *, - struct ifnet *)); -struct nd_prefix *nd6_prefix_lookup __P((struct nd_prefix *)); -int in6_init_prefix_ltimes __P((struct nd_prefix *ndpr)); -void rt6_flush __P((struct in6_addr *, struct ifnet *)); -int nd6_setdefaultiface __P((int)); -int in6_tmpifadd __P((const struct in6_ifaddr *, int)); - -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +void nd6_rs_input(struct mbuf *, int, int); +void nd6_ra_input(struct mbuf *, int, int); +void prelist_del(struct nd_prefix *); +void defrouter_addreq(struct nd_defrouter *); +void defrouter_delreq(struct nd_defrouter *, int); +void defrouter_select(void); +void defrtrlist_del(struct nd_defrouter *, int); +void prelist_remove(struct nd_prefix *, int); +int prelist_update(struct nd_prefix *, struct nd_defrouter *, + struct mbuf *); +int nd6_prelist_add(struct nd_prefix *, struct nd_defrouter *, + struct nd_prefix **); +int nd6_prefix_onlink(struct nd_prefix *, int, int); +int nd6_prefix_offlink(struct nd_prefix *); +void pfxlist_onlink_check(int); +struct nd_defrouter *defrouter_lookup(struct in6_addr *, + struct ifnet *); +struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *); +int in6_init_prefix_ltimes(struct nd_prefix *ndpr); +void rt6_flush(struct in6_addr *, struct ifnet *); +int nd6_setdefaultiface(int); +int in6_tmpifadd(const struct in6_ifaddr *, int); +#endif /* KERNEL_PRIVATE */ + +#ifdef KERNEL + +/*! + @function nd6_lookup_ipv6 + @discussion This function will check the routing table for a cached + neighbor discovery entry or trigger an neighbor discovery query + to resolve the IPv6 address to a link-layer address. + + nd entries are stored in the routing table. This function will + lookup the IPv6 destination in the routing table. If the + destination requires forwarding to a gateway, the route of the + gateway will be looked up. The route entry is inspected to + determine if the link layer destination address is known. If + unknown, neighbor discovery will be used to resolve the entry. + @param interface The interface the packet is being sent on. + @param ip6_dest The IPv6 destination of the packet. + @param ll_dest On output, the link-layer destination. + @param ll_dest_len The length of the buffer for ll_dest. + @param hint Any routing hint passed down from the protocol. + @param packet The packet being transmitted. + @result May return an error such as EHOSTDOWN or ENETUNREACH. If + this function returns EJUSTRETURN, the packet has been queued + and will be sent when the address is resolved. If any other + value is returned, the caller is responsible for disposing of + the packet. + */ +errno_t nd6_lookup_ipv6(ifnet_t interface, const struct sockaddr_in6 *ip6_dest, + struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, + mbuf_t packet); +#endif KERNEL #endif /* _NETINET6_ND6_H_ */ diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c index ae8185854..5e968a96e 100644 --- a/bsd/netinet6/nd6_nbr.c +++ b/bsd/netinet6/nd6_nbr.c @@ -68,22 +68,23 @@ extern int ipsec_bypass; #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; -static struct dadq *nd6_dad_find __P((struct ifaddr *)); +static struct dadq *nd6_dad_find(struct ifaddr *); #ifndef __APPLE__ -static void nd6_dad_starttimer __P((struct dadq *, int)); -static void nd6_dad_stoptimer __P((struct dadq *)); +static void nd6_dad_starttimer(struct dadq *, int); +static void nd6_dad_stoptimer(struct dadq *); #else -void nd6_dad_stoptimer __P((struct ifaddr *)); +void nd6_dad_stoptimer(struct ifaddr *); #endif -static void nd6_dad_timer __P((struct ifaddr *)); -static void nd6_dad_timer_funnel __P((struct ifaddr *)); -static void nd6_dad_ns_output __P((struct dadq *, struct ifaddr *)); -static void nd6_dad_ns_input __P((struct ifaddr *)); -static void nd6_dad_na_input __P((struct ifaddr *)); +static void nd6_dad_timer(struct ifaddr *); +static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); +static void nd6_dad_ns_input(struct ifaddr *); +static void nd6_dad_na_input(struct ifaddr *); static int dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/ static int dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ +extern lck_mtx_t *dad6_mutex; +extern lck_mtx_t *nd6_mutex; /* * Input an Neighbor Solicitation Message. * @@ -91,9 +92,10 @@ static int dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ * Based on RFC 2462 (duplicated address detection) */ void -nd6_ns_input(m, off, icmp6len) - struct mbuf *m; - int off, icmp6len; +nd6_ns_input( + struct mbuf *m, + int off, + int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -111,7 +113,7 @@ nd6_ns_input(m, off, icmp6len) struct sockaddr_dl *proxydl = NULL; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, icmp6len,); + IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); @@ -333,11 +335,13 @@ nd6_ns_input(m, off, icmp6len) * Based on RFC 2462 (duplicated address detection) */ void -nd6_ns_output(ifp, daddr6, taddr6, ln, dad) - struct ifnet *ifp; - const struct in6_addr *daddr6, *taddr6; - struct llinfo_nd6 *ln; /* for source address determination */ - int dad; /* duplicated address detection */ +nd6_ns_output( + struct ifnet *ifp, + const struct in6_addr *daddr6, + const struct in6_addr *taddr6, + struct llinfo_nd6 *ln, /* for source address determination */ + int dad, /* duplicated address detection */ + int locked) { struct mbuf *m; struct ip6_hdr *ip6; @@ -513,7 +517,7 @@ nd6_ns_output(ifp, daddr6, taddr6, ln, dad) if (ipsec_bypass == 0) (void)ipsec_setsocket(m, NULL); #endif - ip6_output(m, NULL, NULL, dad ? IPV6_DADOUTPUT : 0, &im6o, &outif); + ip6_output(m, NULL, NULL, dad ? IPV6_DADOUTPUT : 0, &im6o, &outif, locked); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_neighborsolicit); @@ -532,9 +536,10 @@ nd6_ns_output(ifp, daddr6, taddr6, ln, dad) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void -nd6_na_input(m, off, icmp6len) - struct mbuf *m; - int off, icmp6len; +nd6_na_input( + struct mbuf *m, + int off, + int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -555,6 +560,7 @@ nd6_na_input(m, off, icmp6len) struct rtentry *rt; struct sockaddr_dl *sdl; union nd_opts ndopts; + struct timeval timenow; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, @@ -565,7 +571,7 @@ nd6_na_input(m, off, icmp6len) } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, icmp6len,); + IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); @@ -646,12 +652,13 @@ nd6_na_input(m, off, icmp6len) /* * If no neighbor cache entry is found, NA SHOULD silently be discarded. */ - rt = nd6_lookup(&taddr6, 0, ifp); + rt = nd6_lookup(&taddr6, 0, ifp, 0); if ((rt == NULL) || ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) || ((sdl = SDL(rt->rt_gateway)) == NULL)) goto freeit; + getmicrotime(&timenow); if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, @@ -669,11 +676,11 @@ nd6_na_input(m, off, icmp6len) ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (ln->ln_expire) - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[rt->rt_ifp->if_index].reachable; } else { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } if ((ln->ln_router = is_router) != 0) { /* @@ -681,7 +688,7 @@ nd6_na_input(m, off, icmp6len) * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ - pfxlist_onlink_check(); + pfxlist_onlink_check(0); } } else { int llchange; @@ -727,7 +734,7 @@ nd6_na_input(m, off, icmp6len) */ if (ln->ln_state == ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } goto freeit; } else if (is_override /* (2a) */ @@ -750,13 +757,13 @@ nd6_na_input(m, off, icmp6len) ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (ln->ln_expire) { - ln->ln_expire = time_second + + ln->ln_expire = timenow.tv_sec + nd_ifinfo[ifp->if_index].reachable; } } else { if (lladdr && llchange) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = time_second + nd6_gctimer; + ln->ln_expire = timenow.tv_sec + nd6_gctimer; } } } @@ -779,21 +786,25 @@ nd6_na_input(m, off, icmp6len) * is only called under the network software interrupt * context. However, we keep it just for safety. */ - s = splnet(); + lck_mtx_lock(nd6_mutex); dr = defrouter_lookup(in6, rt->rt_ifp); - if (dr) - defrtrlist_del(dr); - else if (!ip6_forwarding && (ip6_accept_rtadv || (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { - /* - * Even if the neighbor is not in the default - * router list, the neighbor may be used - * as a next hop for some destinations - * (e.g. redirect case). So we must - * call rt6_flush explicitly. - */ - rt6_flush(&ip6->ip6_src, rt->rt_ifp); + if (dr) { + defrtrlist_del(dr, 1); + lck_mtx_unlock(nd6_mutex); + } + else { + lck_mtx_unlock(nd6_mutex); + if (!ip6_forwarding && (ip6_accept_rtadv || (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + /* + * Even if the neighbor is not in the default + * router list, the neighbor may be used + * as a next hop for some destinations + * (e.g. redirect case). So we must + * call rt6_flush explicitly. + */ + rt6_flush(&ip6->ip6_src, rt->rt_ifp); + } } - splx(s); } ln->ln_router = is_router; } @@ -805,7 +816,7 @@ nd6_na_input(m, off, icmp6len) * argument as the 1st one. */ nd6_output(ifp, ifp, ln->ln_hold, - (struct sockaddr_in6 *)rt_key(rt), rt); + (struct sockaddr_in6 *)rt_key(rt), rt, 0); ln->ln_hold = 0; } @@ -828,12 +839,13 @@ nd6_na_input(m, off, icmp6len) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void -nd6_na_output(ifp, daddr6, taddr6, flags, tlladdr, sdl0) - struct ifnet *ifp; - const struct in6_addr *daddr6, *taddr6; - u_long flags; - int tlladdr; /* 1 if include target link-layer address */ - struct sockaddr *sdl0; /* sockaddr_dl (= proxy NA) or NULL */ +nd6_na_output( + struct ifnet *ifp, + const struct in6_addr *daddr6, + const struct in6_addr *taddr6, + u_long flags, + int tlladdr, /* 1 if include target link-layer address */ + struct sockaddr *sdl0) /* sockaddr_dl (= proxy NA) or NULL */ { struct mbuf *m; struct ip6_hdr *ip6; @@ -962,7 +974,7 @@ nd6_na_output(ifp, daddr6, taddr6, flags, tlladdr, sdl0) if (ipsec_bypass == 0) (void)ipsec_setsocket(m, NULL); #endif - ip6_output(m, NULL, NULL, 0, &im6o, &outif); + ip6_output(m, NULL, NULL, 0, &im6o, &outif, 0); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_neighboradvert); @@ -971,25 +983,10 @@ nd6_na_output(ifp, daddr6, taddr6, flags, tlladdr, sdl0) } caddr_t -nd6_ifptomac(ifp) - struct ifnet *ifp; +nd6_ifptomac( + struct ifnet *ifp) { - switch (ifp->if_type) { - case IFT_ARCNET: - case IFT_ETHER: - case IFT_FDDI: - case IFT_IEEE1394: -#if IFT_L2VLAN - case IFT_L2VLAN: -#endif -#if IFT_IEEE80211 - case IFT_IEEE80211: -#endif - return ((caddr_t)(ifp + 1)); - break; - default: - return NULL; - } + return ((caddr_t)ifnet_lladdr(ifp)); } TAILQ_HEAD(dadq_head, dadq); @@ -1007,40 +1004,43 @@ static struct dadq_head dadq; static int dad_init = 0; static struct dadq * -nd6_dad_find(ifa) - struct ifaddr *ifa; +nd6_dad_find( + struct ifaddr *ifa) { struct dadq *dp; - + lck_mtx_lock(dad6_mutex); for (dp = dadq.tqh_first; dp; dp = dp->dad_list.tqe_next) { - if (dp->dad_ifa == ifa) + if (dp->dad_ifa == ifa) { + lck_mtx_unlock(dad6_mutex); return dp; + } } + lck_mtx_unlock(dad6_mutex); return NULL; } #ifdef __APPLE__ void -nd6_dad_stoptimer(ifa) - struct ifaddr *ifa; +nd6_dad_stoptimer( + struct ifaddr *ifa) { - untimeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa); + untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa); } #else static void -nd6_dad_starttimer(dp, ticks) - struct dadq *dp; - int ticks; +nd6_dad_starttimer( + struct dadq *dp, + int ticks) { callout_reset(&dp->dad_timer_ch, ticks, - (void (*) __P((void *)))nd6_dad_timer, (void *)dp->dad_ifa); + (void (*)(void *))nd6_dad_timer, (void *)dp->dad_ifa); } static void -nd6_dad_stoptimer(dp) - struct dadq *dp; +nd6_dad_stoptimer( + struct dadq *dp) { callout_stop(&dp->dad_timer_ch); @@ -1051,9 +1051,9 @@ nd6_dad_stoptimer(dp) * Start Duplicated Address Detection (DAD) for specified interface address. */ void -nd6_dad_start(ifa, tick) - struct ifaddr *ifa; - int *tick; /* minimum delay ticks for IFF_UP event */ +nd6_dad_start( + struct ifaddr *ifa, + int *tick) /* minimum delay ticks for IFF_UP event */ { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; @@ -1103,7 +1103,9 @@ nd6_dad_start(ifa, tick) return; } bzero(dp, sizeof(*dp)); + lck_mtx_lock(dad6_mutex); TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list); + lck_mtx_unlock(dad6_mutex); nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr))); @@ -1121,7 +1123,7 @@ nd6_dad_start(ifa, tick) dp->dad_ns_ocount = dp->dad_ns_tcount = 0; if (tick == NULL) { nd6_dad_ns_output(dp, ifa); - timeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa, + timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000); } else { int ntick; @@ -1131,7 +1133,7 @@ nd6_dad_start(ifa, tick) else ntick = *tick + random() % (hz / 2); *tick = ntick; - timeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa, + timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, ntick); } } @@ -1140,8 +1142,8 @@ nd6_dad_start(ifa, tick) * terminate DAD unconditionally. used for address removals. */ void -nd6_dad_stop(ifa) - struct ifaddr *ifa; +nd6_dad_stop( + struct ifaddr *ifa) { struct dadq *dp; @@ -1153,34 +1155,20 @@ nd6_dad_stop(ifa) return; } - untimeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa); + untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa); + lck_mtx_lock(dad6_mutex); TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); + lck_mtx_unlock(dad6_mutex); FREE(dp, M_IP6NDP); dp = NULL; ifafree(ifa); } -static void -nd6_dad_timer_funnel(ifa) - struct ifaddr *ifa; -{ - -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - nd6_dad_timer(ifa); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif - -} - static void -nd6_dad_timer(ifa) - struct ifaddr *ifa; +nd6_dad_timer( + struct ifaddr *ifa) { int s; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; @@ -1218,7 +1206,9 @@ nd6_dad_timer(ifa) nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n", if_name(ifa->ifa_ifp))); + lck_mtx_lock(dad6_mutex); TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); + lck_mtx_unlock(dad6_mutex); FREE(dp, M_IP6NDP); dp = NULL; ifafree(ifa); @@ -1231,7 +1221,7 @@ nd6_dad_timer(ifa) * We have more NS to go. Send NS packet for DAD. */ nd6_dad_ns_output(dp, ifa); - timeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa, + timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000); } else { /* @@ -1296,7 +1286,10 @@ nd6_dad_timer(ifa) if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr))); + lck_mtx_lock(dad6_mutex); TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); + lck_mtx_unlock(dad6_mutex); + in6_post_msg(ia->ia_ifp, KEV_INET6_NEW_USER_ADDR, ia); FREE(dp, M_IP6NDP); dp = NULL; ifafree(ifa); @@ -1308,8 +1301,8 @@ done: } void -nd6_dad_duplicated(ifa) - struct ifaddr *ifa; +nd6_dad_duplicated( + struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; @@ -1329,7 +1322,7 @@ nd6_dad_duplicated(ifa) ia->ia6_flags |= IN6_IFF_DUPLICATED; /* We are done with DAD, with duplicated address found. (failure) */ - untimeout((void (*) __P((void *)))nd6_dad_timer_funnel, (void *)ifa); + untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa); log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", @@ -1337,16 +1330,18 @@ nd6_dad_duplicated(ifa) log(LOG_ERR, "%s: manual intervention required\n", if_name(ifa->ifa_ifp)); + lck_mtx_lock(dad6_mutex); TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list); + lck_mtx_unlock(dad6_mutex); FREE(dp, M_IP6NDP); dp = NULL; ifafree(ifa); } static void -nd6_dad_ns_output(dp, ifa) - struct dadq *dp; - struct ifaddr *ifa; +nd6_dad_ns_output( + struct dadq *dp, + struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp = ifa->ifa_ifp; @@ -1366,15 +1361,14 @@ nd6_dad_ns_output(dp, ifa) } dp->dad_ns_ocount++; - nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1); + nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1, 0); } static void -nd6_dad_ns_input(ifa) - struct ifaddr *ifa; +nd6_dad_ns_input( + struct ifaddr *ifa) { struct in6_ifaddr *ia; - struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; int duplicate; @@ -1383,7 +1377,6 @@ nd6_dad_ns_input(ifa) panic("ifa == NULL in nd6_dad_ns_input"); ia = (struct in6_ifaddr *)ifa; - ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; duplicate = 0; dp = nd6_dad_find(ifa); @@ -1420,8 +1413,8 @@ nd6_dad_ns_input(ifa) } static void -nd6_dad_na_input(ifa) - struct ifaddr *ifa; +nd6_dad_na_input( + struct ifaddr *ifa) { struct dadq *dp; diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c index c3fd29bc6..6ca948351 100644 --- a/bsd/netinet6/nd6_rtr.c +++ b/bsd/netinet6/nd6_rtr.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -62,22 +63,21 @@ #define SDL(s) ((struct sockaddr_dl *)s) -static struct nd_defrouter *defrtrlist_update __P((struct nd_defrouter *)); -static struct in6_ifaddr *in6_ifadd __P((struct nd_prefix *, - struct in6_addr *)); -static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *, - struct nd_defrouter *)); -static void pfxrtr_add __P((struct nd_prefix *, struct nd_defrouter *)); -static void pfxrtr_del __P((struct nd_pfxrouter *)); -static struct nd_pfxrouter *find_pfxlist_reachable_router - __P((struct nd_prefix *)); -static void defrouter_addifreq __P((struct ifnet *)); -static void nd6_rtmsg __P((int, struct rtentry *)); +static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); +static struct in6_ifaddr *in6_ifadd(struct nd_prefix *, + struct in6_addr *); +static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *, + struct nd_defrouter *); +static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); +static void pfxrtr_del(struct nd_pfxrouter *); +static struct nd_pfxrouter *find_pfxlist_reachable_router(struct nd_prefix *); +static void defrouter_addifreq(struct ifnet *); +static void nd6_rtmsg(int, struct rtentry *); -static void in6_init_address_ltimes __P((struct nd_prefix *ndpr, - struct in6_addrlifetime *lt6)); +static void in6_init_address_ltimes(struct nd_prefix *ndpr, + struct in6_addrlifetime *lt6); -static int rt6_deleteroute __P((struct radix_node *, void *)); +static int rt6_deleteroute(struct radix_node *, void *); extern int nd6_recalc_reachtm_interval; @@ -96,6 +96,9 @@ static int ip6_temp_valid_lifetime = 1800; */ int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; +extern lck_mtx_t *rt_mtx; +extern lck_mtx_t *nd6_mutex; + /* * Receive Router Solicitation Message - just for routers. * Router solicitation/advertisement is mostly managed by userland program @@ -104,9 +107,10 @@ int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; * Based on RFC 2461 */ void -nd6_rs_input(m, off, icmp6len) - struct mbuf *m; - int off, icmp6len; +nd6_rs_input( + struct mbuf *m, + int off, + int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -146,7 +150,7 @@ nd6_rs_input(m, off, icmp6len) goto freeit; #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, icmp6len,); + IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len); @@ -197,9 +201,10 @@ nd6_rs_input(m, off, icmp6len) * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing */ void -nd6_ra_input(m, off, icmp6len) - struct mbuf *m; - int off, icmp6len; +nd6_ra_input( + struct mbuf *m, + int off, + int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; @@ -214,6 +219,9 @@ nd6_ra_input(m, off, icmp6len) #endif union nd_opts ndopts; struct nd_defrouter *dr; + struct timeval timenow; + + getmicrotime(&timenow); if (ip6_accept_rtadv == 0 && ((ifp->if_eflags & IFEF_ACCEPT_RTADVD) == 0)) goto freeit; @@ -234,7 +242,7 @@ nd6_ra_input(m, off, icmp6len) } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, icmp6len,); + IP6_EXTHDR_CHECK(m, off, icmp6len, return); nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len); @@ -260,7 +268,7 @@ nd6_ra_input(m, off, icmp6len) dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); - dr0.expire = time_second + dr0.rtlifetime; + dr0.expire = timenow.tv_sec + dr0.rtlifetime; dr0.ifp = ifp; dr0.advint = 0; /* Mobile IPv6 */ dr0.advint_expire = 0; /* Mobile IPv6 */ @@ -338,7 +346,7 @@ nd6_ra_input(m, off, icmp6len) pr.ndpr_prefix.sin6_family = AF_INET6; pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; - pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif; + pr.ndpr_ifp = m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; @@ -422,7 +430,7 @@ nd6_ra_input(m, off, icmp6len) * router's neighbor cache, which might also affect our on-link * detection of adveritsed prefixes. */ - pfxlist_onlink_check(); + pfxlist_onlink_check(0); } freeit: @@ -446,6 +454,8 @@ nd6_rtmsg(cmd, rt) { struct rt_addrinfo info; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; @@ -458,12 +468,11 @@ nd6_rtmsg(cmd, rt) } void -defrouter_addreq(new) - struct nd_defrouter *new; +defrouter_addreq( + struct nd_defrouter *new) { struct sockaddr_in6 def, mask, gate; struct rtentry *newrt = NULL; - int s; Bzero(&def, sizeof(def)); Bzero(&mask, sizeof(mask)); @@ -474,27 +483,28 @@ defrouter_addreq(new) def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; - s = splnet(); - (void)rtrequest(RTM_ADD, (struct sockaddr *)&def, + lck_mtx_lock(rt_mtx); + (void)rtrequest_locked(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt); if (newrt) { nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ rtunref(newrt); } - splx(s); + lck_mtx_unlock(rt_mtx); return; } /* Add a route to a given interface as default */ void -defrouter_addifreq(ifp) - struct ifnet *ifp; +defrouter_addifreq( + struct ifnet *ifp) { struct sockaddr_in6 def, mask; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; struct rtentry *newrt = NULL; - int error, flags; + int error; + u_long flags; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); @@ -514,8 +524,9 @@ defrouter_addifreq(ifp) return; } + lck_mtx_lock(rt_mtx); flags = ifa->ifa_flags; - error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, + error = rtrequest_locked(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, (struct sockaddr *)&mask, flags, &newrt); if (error != 0) { nd6log((LOG_ERR, @@ -532,15 +543,20 @@ defrouter_addifreq(ifp) } in6_post_msg(ifp, KEV_INET6_DEFROUTER, (struct in6_ifaddr *)ifa); } + lck_mtx_unlock(rt_mtx); + ifafree(ifa); } struct nd_defrouter * -defrouter_lookup(addr, ifp) - struct in6_addr *addr; - struct ifnet *ifp; +defrouter_lookup( + struct in6_addr *addr, + struct ifnet *ifp) { struct nd_defrouter *dr; + + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) @@ -551,9 +567,9 @@ defrouter_lookup(addr, ifp) } void -defrouter_delreq(dr, dofree) - struct nd_defrouter *dr; - int dofree; +defrouter_delreq( + struct nd_defrouter *dr, + int dofree) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; @@ -567,7 +583,8 @@ defrouter_delreq(dr, dofree) def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; - rtrequest(RTM_DELETE, (struct sockaddr *)&def, + lck_mtx_lock(rt_mtx); + rtrequest_locked(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt); @@ -579,17 +596,18 @@ defrouter_delreq(dr, dofree) * rtrequest(). */ rtref(oldrt); - rtfree(oldrt); + rtfree_locked(oldrt); } } if (dofree) /* XXX: necessary? */ FREE(dr, M_IP6NDP); + lck_mtx_unlock(rt_mtx); } void -defrtrlist_del(dr) - struct nd_defrouter *dr; +defrtrlist_del( + struct nd_defrouter *dr, int nd6locked) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; @@ -603,6 +621,8 @@ defrtrlist_del(dr) rt6_flush(&dr->rtaddr, dr->ifp); } + if (nd6locked == 0) + lck_mtx_lock(nd6_mutex); if (dr == TAILQ_FIRST(&nd_defrouter)) deldr = dr; /* The router is primary. */ @@ -616,7 +636,7 @@ defrtrlist_del(dr) if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } - pfxlist_onlink_check(); + pfxlist_onlink_check(1); /* * If the router is the primary one, choose a new one. @@ -626,6 +646,9 @@ defrtrlist_del(dr) if (deldr) defrouter_select(); + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); + FREE(dr, M_IP6NDP); } @@ -642,7 +665,6 @@ defrtrlist_del(dr) void defrouter_select() { - int s = splnet(); struct nd_defrouter *dr, anydr; struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; @@ -650,9 +672,11 @@ defrouter_select() /* * Search for a (probably) reachable router from the list. */ + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { - if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) && + if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp, 0)) && (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && ND6_IS_LLINFO_PROBREACH(ln)) { /* Got it, and move it to the head */ @@ -707,21 +731,20 @@ defrouter_select() } } - splx(s); return; } static struct nd_defrouter * -defrtrlist_update(new) - struct nd_defrouter *new; +defrtrlist_update( + struct nd_defrouter *new) { struct nd_defrouter *dr, *n; - int s = splnet(); + lck_mtx_lock(nd6_mutex); if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { /* entry exists */ if (new->rtlifetime == 0) { - defrtrlist_del(dr); + defrtrlist_del(dr, 1); dr = NULL; } else { /* override */ @@ -729,19 +752,19 @@ defrtrlist_update(new) dr->rtlifetime = new->rtlifetime; dr->expire = new->expire; } - splx(s); + lck_mtx_unlock(nd6_mutex); return(dr); } /* entry does not exist */ if (new->rtlifetime == 0) { - splx(s); + lck_mtx_unlock(nd6_mutex); return(NULL); } n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT); if (n == NULL) { - splx(s); + lck_mtx_unlock(nd6_mutex); return(NULL); } bzero(n, sizeof(*n)); @@ -755,18 +778,19 @@ defrtrlist_update(new) TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry); if (TAILQ_FIRST(&nd_defrouter) == n) defrouter_select(); - splx(s); + lck_mtx_unlock(nd6_mutex); return(n); } static struct nd_pfxrouter * -pfxrtr_lookup(pr, dr) - struct nd_prefix *pr; - struct nd_defrouter *dr; +pfxrtr_lookup( + struct nd_prefix *pr, + struct nd_defrouter *dr) { struct nd_pfxrouter *search; + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) { if (search->router == dr) break; @@ -776,12 +800,14 @@ pfxrtr_lookup(pr, dr) } static void -pfxrtr_add(pr, dr) - struct nd_prefix *pr; - struct nd_defrouter *dr; +pfxrtr_add( + struct nd_prefix *pr, + struct nd_defrouter *dr) { struct nd_pfxrouter *new; + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + new = (struct nd_pfxrouter *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) return; @@ -790,23 +816,25 @@ pfxrtr_add(pr, dr) LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); - pfxlist_onlink_check(); + pfxlist_onlink_check(1); } static void -pfxrtr_del(pfr) - struct nd_pfxrouter *pfr; +pfxrtr_del( + struct nd_pfxrouter *pfr) { + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); LIST_REMOVE(pfr, pfr_entry); FREE(pfr, M_IP6NDP); } struct nd_prefix * -nd6_prefix_lookup(pr) - struct nd_prefix *pr; +nd6_prefix_lookup( + struct nd_prefix *pr) { struct nd_prefix *search; + lck_mtx_lock(nd6_mutex); for (search = nd_prefix.lh_first; search; search = search->ndpr_next) { if (pr->ndpr_ifp == search->ndpr_ifp && pr->ndpr_plen == search->ndpr_plen && @@ -817,17 +845,19 @@ nd6_prefix_lookup(pr) break; } } + lck_mtx_unlock(nd6_mutex); return(search); } int -nd6_prelist_add(pr, dr, newp) - struct nd_prefix *pr, **newp; - struct nd_defrouter *dr; +nd6_prelist_add( + struct nd_prefix *pr, + struct nd_defrouter *dr, + struct nd_prefix **newp) { struct nd_prefix *new = NULL; - int i, s; + int i; new = (struct nd_prefix *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT); if (new == NULL) @@ -845,16 +875,15 @@ nd6_prelist_add(pr, dr, newp) new->ndpr_prefix.sin6_addr.s6_addr32[i] &= new->ndpr_mask.s6_addr32[i]; - s = splnet(); /* link ndpr_entry to nd_prefix list */ + lck_mtx_lock(nd6_mutex); LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry); - splx(s); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { int e; - if ((e = nd6_prefix_onlink(new)) != 0) { + if ((e = nd6_prefix_onlink(new, 0, 1)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), @@ -866,16 +895,17 @@ nd6_prelist_add(pr, dr, newp) if (dr) { pfxrtr_add(new, dr); } + lck_mtx_unlock(nd6_mutex); return 0; } void -prelist_remove(pr) - struct nd_prefix *pr; +prelist_remove( + struct nd_prefix *pr, int nd6locked) { struct nd_pfxrouter *pfr, *next; - int e, s; + int e; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; @@ -900,8 +930,8 @@ prelist_remove(pr) if (pr->ndpr_refcnt > 0) return; /* notice here? */ - s = splnet(); - + if (nd6locked == 0) + lck_mtx_lock(nd6_mutex); /* unlink ndpr_entry from nd_prefix list */ LIST_REMOVE(pr, ndpr_entry); @@ -911,28 +941,29 @@ prelist_remove(pr) FREE(pfr, M_IP6NDP); } - splx(s); FREE(pr, M_IP6NDP); - pfxlist_onlink_check(); + pfxlist_onlink_check(1); + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); } int -prelist_update(new, dr, m) - struct nd_prefix *new; - struct nd_defrouter *dr; /* may be NULL */ - struct mbuf *m; +prelist_update( + struct nd_prefix *new, + struct nd_defrouter *dr, /* may be NULL */ + struct mbuf *m) { struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL; struct ifaddr *ifa; struct ifnet *ifp = new->ndpr_ifp; struct nd_prefix *pr; - int s = splnet(); int error = 0; int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; + struct timeval timenow; auth = 0; if (m) { @@ -973,7 +1004,7 @@ prelist_update(new, dr, m) (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { int e; - if ((e = nd6_prefix_onlink(pr)) != 0) { + if ((e = nd6_prefix_onlink(pr, 0, 0)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " @@ -983,9 +1014,11 @@ prelist_update(new, dr, m) /* proceed anyway. XXX: is it correct? */ } } - + + lck_mtx_lock(nd6_mutex); if (dr && pfxrtr_lookup(pr, dr) == NULL) pfxrtr_add(pr, dr); + lck_mtx_unlock(nd6_mutex); } else { struct nd_prefix *newpr = NULL; @@ -1050,6 +1083,9 @@ prelist_update(new, dr, m) * form an address. Note that even a manually configured address * should reject autoconfiguration of a new address. */ + getmicrotime(&timenow); + + ifnet_lock_exclusive(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { struct in6_ifaddr *ifa6; @@ -1092,7 +1128,7 @@ prelist_update(new, dr, m) lt6_tmp = ifa6->ia6_lifetime; storedlifetime = IFA6_IS_INVALID(ifa6) ? 0 : - (lt6_tmp.ia6t_expire - time_second); + (lt6_tmp.ia6t_expire - timenow.tv_sec); if (TWOHOUR < new->ndpr_vltime || storedlifetime < new->ndpr_vltime) { @@ -1146,6 +1182,7 @@ prelist_update(new, dr, m) ifa6->ia6_lifetime = lt6_tmp; } + ifnet_lock_done(ifp); if (ia6_match == NULL && new->ndpr_vltime) { /* * No address matched and the valid lifetime is non-zero. @@ -1190,7 +1227,7 @@ prelist_update(new, dr, m) * of other addresses, so we check and update it. * XXX: what if address duplication happens? */ - pfxlist_onlink_check(); + pfxlist_onlink_check(0); } else { /* just set an error. do not bark here. */ error = EADDRNOTAVAIL; /* XXX: might be unused. */ @@ -1200,7 +1237,6 @@ prelist_update(new, dr, m) afteraddrconf: end: - splx(s); return error; } @@ -1210,17 +1246,19 @@ prelist_update(new, dr, m) * XXX: lengthy function name... */ static struct nd_pfxrouter * -find_pfxlist_reachable_router(pr) - struct nd_prefix *pr; +find_pfxlist_reachable_router( + struct nd_prefix *pr) { struct nd_pfxrouter *pfxrtr; struct rtentry *rt; struct llinfo_nd6 *ln; + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, - pfxrtr->router->ifp)) && + pfxrtr->router->ifp, 0)) && (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && ND6_IS_LLINFO_PROBREACH(ln)) break; /* found */ @@ -1244,7 +1282,7 @@ find_pfxlist_reachable_router(pr) * is no router around us. */ void -pfxlist_onlink_check() +pfxlist_onlink_check(int nd6locked) { struct nd_prefix *pr; struct in6_ifaddr *ifa; @@ -1253,6 +1291,9 @@ pfxlist_onlink_check() * Check if there is a prefix that has a reachable advertising * router. */ + if (nd6locked == 0) + lck_mtx_lock(nd6_mutex); + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr)) break; @@ -1327,7 +1368,7 @@ pfxlist_onlink_check() if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && pr->ndpr_raf_onlink) { - if ((e = nd6_prefix_onlink(pr)) != 0) { + if ((e = nd6_prefix_onlink(pr, 0, 1)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", @@ -1345,7 +1386,7 @@ pfxlist_onlink_check() * always be attached. * The precise detection logic is same as the one for prefixes. */ - for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { + for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; @@ -1362,7 +1403,7 @@ pfxlist_onlink_check() break; } if (ifa) { - for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { + for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; @@ -1376,18 +1417,20 @@ pfxlist_onlink_check() } } else { - for (ifa = in6_ifaddr; ifa; ifa = ifa->ia_next) { + for (ifa = in6_ifaddrs; ifa; ifa = ifa->ia_next) { if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; ifa->ia6_flags &= ~IN6_IFF_DETACHED; } } + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); } int -nd6_prefix_onlink(pr) - struct nd_prefix *pr; +nd6_prefix_onlink( + struct nd_prefix *pr, int rtlocked, int nd6locked) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; @@ -1412,6 +1455,10 @@ nd6_prefix_onlink(pr) * Although such a configuration is expected to be rare, we explicitly * allow it. */ + if (nd6locked == 0) + lck_mtx_lock(nd6_mutex); + else + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) { if (opr == pr) continue; @@ -1422,10 +1469,15 @@ nd6_prefix_onlink(pr) if (opr->ndpr_plen == pr->ndpr_plen && in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr, &opr->ndpr_prefix.sin6_addr, - pr->ndpr_plen)) + pr->ndpr_plen)) { + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); return(0); + } } + if (nd6locked == 0) + lck_mtx_unlock(nd6_mutex); /* * We prefer link-local addresses as the associated interface address. */ @@ -1435,11 +1487,13 @@ nd6_prefix_onlink(pr) IN6_IFF_ANYCAST); if (ifa == NULL) { /* XXX: freebsd does not have ifa_ifwithaf */ + ifnet_lock_exclusive(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family == AF_INET6) break; } + ifnet_lock_done(ifp); /* should we care about ia6_flags? */ } if (ifa == NULL) { @@ -1464,6 +1518,10 @@ nd6_prefix_onlink(pr) bzero(&mask6, sizeof(mask6)); mask6.sin6_len = sizeof(mask6); mask6.sin6_addr = pr->ndpr_mask; + + if (rtlocked == 0) + lck_mtx_lock(rt_mtx); + rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP; if (nd6_need_cache(ifp)) { /* explicitly set in case ifa_flags does not set the flag. */ @@ -1474,7 +1532,7 @@ nd6_prefix_onlink(pr) */ rtflags &= ~RTF_CLONING; } - error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, + error = rtrequest_locked(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix, ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { @@ -1495,12 +1553,14 @@ nd6_prefix_onlink(pr) if (rt != NULL) rtunref(rt); + if (rtlocked == 0) + lck_mtx_unlock(rt_mtx); return(error); } int -nd6_prefix_offlink(pr) - struct nd_prefix *pr; +nd6_prefix_offlink( + struct nd_prefix *pr) { int error = 0; struct ifnet *ifp = pr->ndpr_ifp; @@ -1525,7 +1585,8 @@ nd6_prefix_offlink(pr) mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); - error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, + lck_mtx_lock(rt_mtx); + error = rtrequest_locked(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt); if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; @@ -1541,6 +1602,7 @@ nd6_prefix_offlink(pr) * If there's one, try to make the prefix on-link on the * interface. */ + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); for (opr = nd_prefix.lh_first; opr; opr = opr->ndpr_next) { if (opr == pr) continue; @@ -1561,7 +1623,7 @@ nd6_prefix_offlink(pr) pr->ndpr_plen)) { int e; - if ((e = nd6_prefix_onlink(opr)) != 0) { + if ((e = nd6_prefix_onlink(opr, 1, 1)) != 0) { nd6log((LOG_ERR, "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " @@ -1586,17 +1648,18 @@ nd6_prefix_offlink(pr) if (rt->rt_refcnt <= 0) { /* XXX: we should free the entry ourselves. */ rtref(rt); - rtfree(rt); + rtfree_locked(rt); } } + lck_mtx_unlock(rt_mtx); return(error); } static struct in6_ifaddr * -in6_ifadd(pr, ifid) - struct nd_prefix *pr; - struct in6_addr *ifid; /* Mobile IPv6 addition */ +in6_ifadd( + struct nd_prefix *pr, + struct in6_addr *ifid) /* Mobile IPv6 addition */ { struct ifnet *ifp = pr->ndpr_ifp; struct ifaddr *ifa; @@ -1733,9 +1796,9 @@ in6_ifadd(pr, ifid) } int -in6_tmpifadd(ia0, forcegen) - const struct in6_ifaddr *ia0; /* corresponding public address */ - int forcegen; +in6_tmpifadd( + const struct in6_ifaddr *ia0, /* corresponding public address */ + int forcegen) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *newia; @@ -1744,6 +1807,9 @@ in6_tmpifadd(ia0, forcegen) int trylimit = 3; /* XXX: adhoc value */ u_int32_t randid[2]; time_t vltime0, pltime0; + struct timeval timenow; + + getmicrotime(&timenow); bzero(&ifra, sizeof(ifra)); strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); @@ -1790,14 +1856,14 @@ in6_tmpifadd(ia0, forcegen) */ if (ia0->ia6_lifetime.ia6t_expire != 0) { vltime0 = IFA6_IS_INVALID(ia0) ? 0 : - (ia0->ia6_lifetime.ia6t_expire - time_second); + (ia0->ia6_lifetime.ia6t_expire - timenow.tv_sec); if (vltime0 > ip6_temp_valid_lifetime) vltime0 = ip6_temp_valid_lifetime; } else vltime0 = ip6_temp_valid_lifetime; if (ia0->ia6_lifetime.ia6t_preferred != 0) { pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 : - (ia0->ia6_lifetime.ia6t_preferred - time_second); + (ia0->ia6_lifetime.ia6t_preferred - timenow.tv_sec); if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor){ pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor; @@ -1840,7 +1906,7 @@ in6_tmpifadd(ia0, forcegen) * and, in fact, we surely need the check when we create a new * temporary address due to deprecation of an old temporary address. */ - pfxlist_onlink_check(); + pfxlist_onlink_check(0); return(0); } @@ -1848,6 +1914,9 @@ in6_tmpifadd(ia0, forcegen) int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { + struct timeval timenow; + + getmicrotime(&timenow); /* check if preferred lifetime > valid lifetime. RFC2462 5.5.3 (c) */ if (ndpr->ndpr_pltime > ndpr->ndpr_vltime) { nd6log((LOG_INFO, "in6_init_prefix_ltimes: preferred lifetime" @@ -1858,11 +1927,11 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr) if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else - ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime; + ndpr->ndpr_preferred = timenow.tv_sec + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else - ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime; + ndpr->ndpr_expire = timenow.tv_sec + ndpr->ndpr_vltime; return 0; } @@ -1870,12 +1939,15 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr) static void in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { + struct timeval timenow; + + getmicrotime(&timenow); /* Valid lifetime must not be updated unless explicitly specified. */ /* init ia6t_expire */ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) lt6->ia6t_expire = 0; else { - lt6->ia6t_expire = time_second; + lt6->ia6t_expire = timenow.tv_sec; lt6->ia6t_expire += lt6->ia6t_vltime; } @@ -1883,7 +1955,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) lt6->ia6t_preferred = 0; else { - lt6->ia6t_preferred = time_second; + lt6->ia6t_preferred = timenow.tv_sec; lt6->ia6t_preferred += lt6->ia6t_pltime; } } @@ -1894,34 +1966,35 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) * it shouldn't be called when acting as a router. */ void -rt6_flush(gateway, ifp) - struct in6_addr *gateway; - struct ifnet *ifp; +rt6_flush( + struct in6_addr *gateway, + struct ifnet *ifp) { struct radix_node_head *rnh = rt_tables[AF_INET6]; - int s = splnet(); /* We'll care only link-local addresses */ if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { - splx(s); return; } + lck_mtx_lock(rt_mtx); /* XXX: hack for KAME's link-local address kludge */ gateway->s6_addr16[1] = htons(ifp->if_index); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - splx(s); + lck_mtx_unlock(rt_mtx); } static int -rt6_deleteroute(rn, arg) - struct radix_node *rn; - void *arg; +rt6_deleteroute( + struct radix_node *rn, + void *arg) { #define SIN6(s) ((struct sockaddr_in6 *)s) struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; + lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) return(0); @@ -1943,20 +2016,21 @@ rt6_deleteroute(rn, arg) if ((rt->rt_flags & RTF_HOST) == 0) return(0); - return(rtrequest(RTM_DELETE, rt_key(rt), + return(rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0)); #undef SIN6 } int -nd6_setdefaultiface(ifindex) - int ifindex; +nd6_setdefaultiface( + int ifindex) { int error = 0; if (ifindex < 0 || if_index < ifindex) return(EINVAL); + lck_mtx_lock(nd6_mutex); if (nd6_defifindex != ifindex) { nd6_defifindex = ifindex; if (nd6_defifindex > 0) @@ -1983,5 +2057,6 @@ nd6_setdefaultiface(ifindex) scope6_setdefault(nd6_defifp); } + lck_mtx_unlock(nd6_mutex); return(error); } diff --git a/bsd/netinet6/pim6_var.h b/bsd/netinet6/pim6_var.h index 29abc1192..1cb8ec648 100644 --- a/bsd/netinet6/pim6_var.h +++ b/bsd/netinet6/pim6_var.h @@ -42,7 +42,6 @@ * Modified by Pavlin Ivanov Radoslavov, USC/ISI, May 1998 */ -#ifdef __APPLE_API_UNSTABLE struct pim6stat { u_quad_t pim6s_rcv_total; /* total PIM messages received */ u_quad_t pim6s_rcv_tooshort; /* received with too few bytes */ @@ -52,15 +51,9 @@ struct pim6stat { u_quad_t pim6s_rcv_badregisters; /* received invalid registers */ u_quad_t pim6s_snd_registers; /* sent registers */ }; -#endif -#if (defined(KERNEL)) || (defined(_KERNEL)) -#ifdef __APPLE_API_PRIVATE extern struct pim6stat pim6stat; -int pim6_input __P((struct mbuf **, int*)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ /* * Names for PIM sysctl objects @@ -68,8 +61,13 @@ int pim6_input __P((struct mbuf **, int*)); #define PIM6CTL_STATS 1 /* statistics (read-only) */ #define PIM6CTL_MAXID 2 +#ifdef KERNEL_PRIVATE #define PIM6CTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ } -#endif /* _NETINET6_PIM6_VAR_H_ */ + +int pim6_input(struct mbuf **, int*); + +#endif KERNEL_PRIVATE +#endif _NETINET6_PIM6_VAR_H_ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index 97eca96dc..7e3094d30 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -92,11 +92,13 @@ #include #endif #include +#include #if IPSEC #include #include extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; #endif /*IPSEC*/ @@ -116,6 +118,7 @@ extern struct inpcbhead ripcb; extern struct inpcbinfo ripcbinfo; extern u_long rip_sendspace; extern u_long rip_recvspace; +extern u_long route_generation; struct rip6stat rip6stat; @@ -125,9 +128,9 @@ struct rip6stat rip6stat; * mbuf chain. */ int -rip6_input(mp, offp) - struct mbuf **mp; - int *offp; +rip6_input( + struct mbuf **mp, + int *offp) { struct mbuf *m = *mp; register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -149,6 +152,7 @@ rip6_input(mp, offp) init_sin6(&rip6src, m); /* general init */ + lck_rw_lock_shared(ripcbinfo.mtx); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->in6p_vflag & INP_IPV6) == 0) continue; @@ -176,10 +180,14 @@ rip6_input(mp, offp) /* * Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && n && ipsec6_in_reject_so(n, last->inp_socket)) { - m_freem(n); - ipsec6stat.in_polvio++; - /* do not inject data into pcb */ + if (ipsec_bypass == 0 && n) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject_so(n, last->inp_socket)) { + m_freem(n); + ipsec6stat.in_polvio++; + /* do not inject data into pcb */ + } + lck_mtx_unlock(sadb_mutex); } else #endif /*IPSEC*/ if (n) { @@ -190,10 +198,7 @@ rip6_input(mp, offp) m_adj(n, *offp); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, - n, opts) == 0) { - m_freem(n); - if (opts) - m_freem(opts); + n, opts, NULL) == 0) { rip6stat.rip6s_fullsock++; } else sorwakeup(last->in6p_socket); @@ -202,15 +207,20 @@ rip6_input(mp, offp) } last = in6p; } + lck_rw_done(ripcbinfo.mtx); #if IPSEC /* * Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && last && ipsec6_in_reject_so(m, last->inp_socket)) { - m_freem(m); - ipsec6stat.in_polvio++; - ip6stat.ip6s_delivered--; - /* do not inject data into pcb */ + if (ipsec_bypass == 0 && last) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject_so(m, last->inp_socket)) { + m_freem(m); + ipsec6stat.in_polvio++; + ip6stat.ip6s_delivered--; + /* do not inject data into pcb */ + } + lck_mtx_unlock(sadb_mutex); } else #endif /*IPSEC*/ if (last) { @@ -220,10 +230,7 @@ rip6_input(mp, offp) /* strip intermediate headers */ m_adj(m, *offp); if (sbappendaddr(&last->in6p_socket->so_rcv, - (struct sockaddr *)&rip6src, m, opts) == 0) { - m_freem(m); - if (opts) - m_freem(opts); + (struct sockaddr *)&rip6src, m, opts, NULL) == 0) { rip6stat.rip6s_fullsock++; } else sorwakeup(last->in6p_socket); @@ -245,17 +252,17 @@ rip6_input(mp, offp) } void -rip6_ctlinput(cmd, sa, d) - int cmd; - struct sockaddr *sa; - void *d; +rip6_ctlinput( + int cmd, + struct sockaddr *sa, + void *d) { struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; - void (*notify) __P((struct inpcb *, int)) = in6_rtchange; + void (*notify)(struct inpcb *, int) = in6_rtchange; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) @@ -283,7 +290,7 @@ rip6_ctlinput(cmd, sa, d) sa6_src = &sa6_any; } - (void) in6_pcbnotify(&ripcb, sa, 0, (struct sockaddr *)sa6_src, + (void) in6_pcbnotify(&ripcbinfo, sa, 0, (struct sockaddr *)sa6_src, 0, cmd, notify); } @@ -292,11 +299,11 @@ rip6_ctlinput(cmd, sa, d) * Tack on options user may have setup with control call. */ int -rip6_output(m, so, dstsock, control) - register struct mbuf *m; - struct socket *so; - struct sockaddr_in6 *dstsock; - struct mbuf *control; +rip6_output( + register struct mbuf *m, + struct socket *so, + struct sockaddr_in6 *dstsock, + struct mbuf *control) { struct in6_addr *dst; struct ip6_hdr *ip6; @@ -383,12 +390,13 @@ rip6_output(m, so, dstsock, control) */ { struct in6_addr *in6a; + struct in6_addr storage; if ((in6a = in6_selectsrc(dstsock, optp, in6p->in6p_moptions, &in6p->in6p_route, &in6p->in6p_laddr, - &error)) == 0) { + &storage, &error)) == 0) { if (error == 0) error = EADDRNOTAVAIL; goto bad; @@ -441,8 +449,13 @@ rip6_output(m, so, dstsock, control) } #endif /*IPSEC*/ + if (in6p->in6p_route.ro_rt && in6p->in6p_route.ro_rt->generation_id != route_generation) { + rtfree(in6p->in6p_route.ro_rt); + in6p->in6p_route.ro_rt = (struct rtentry *)0; + } + error = ip6_output(m, optp, &in6p->in6p_route, 0, - in6p->in6p_moptions, &oifp); + in6p->in6p_moptions, &oifp, 0); if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); @@ -467,13 +480,19 @@ rip6_output(m, so, dstsock, control) return(error); } +static void +load_ip6fw() +{ + ip6_fw_init(); +} + /* * Raw IPv6 socket option processing. */ int -rip6_ctloutput(so, sopt) - struct socket *so; - struct sockopt *sopt; +rip6_ctloutput( + struct socket *so, + struct sockopt *sopt) { int error; @@ -491,6 +510,16 @@ rip6_ctloutput(so, sopt) switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { + case IPV6_FW_ADD: + case IPV6_FW_GET: + if (ip6_fw_ctl_ptr == 0) + load_ip6fw(); + if (ip6_fw_ctl_ptr) + error = ip6_fw_ctl_ptr(sopt); + else + error = ENOPROTOOPT; + break; + case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: @@ -508,6 +537,18 @@ rip6_ctloutput(so, sopt) case SOPT_SET: switch (sopt->sopt_name) { + case IPV6_FW_ADD: + case IPV6_FW_DEL: + case IPV6_FW_FLUSH: + case IPV6_FW_ZERO: + if (ip6_fw_ctl_ptr == 0) + load_ip6fw(); + if (ip6_fw_ctl_ptr) + error = ip6_fw_ctl_ptr(sopt); + else + error = ENOPROTOOPT; + break; + case MRT6_INIT: case MRT6_DONE: case MRT6_ADD_MIF: @@ -536,7 +577,7 @@ rip6_attach(struct socket *so, int proto, struct proc *p) inp = sotoinpcb(so); if (inp) panic("rip6_attach"); - if (p && (error = suser(p->p_ucred, &p->p_acflag)) != 0) + if (p && (error = proc_suser(p)) != 0) return error; error = soreserve(so, rip_sendspace, rip_recvspace); @@ -607,7 +648,7 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) if (nam->sa_len != sizeof(*addr)) return EINVAL; - if (TAILQ_EMPTY(&ifnet) || addr->sin6_family != AF_INET6) + if (TAILQ_EMPTY(&ifnet_head) || addr->sin6_family != AF_INET6) return EADDRNOTAVAIL; #if ENABLE_DEFAULT_SCOPE if (addr->sin6_scope_id == 0) { /* not change if specified */ @@ -621,8 +662,10 @@ rip6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) ((struct in6_ifaddr *)ia)->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { + if (ia) ifafree(ia); return(EADDRNOTAVAIL); } + ifafree(ia); inp->in6p_laddr = addr->sin6_addr; return 0; } @@ -633,6 +676,7 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) struct inpcb *inp = sotoinpcb(so); struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam; struct in6_addr *in6a = NULL; + struct in6_addr storage; int error = 0; #if ENABLE_DEFAULT_SCOPE struct sockaddr_in6 tmp; @@ -640,7 +684,7 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (nam->sa_len != sizeof(*addr)) return EINVAL; - if (TAILQ_EMPTY(&ifnet)) + if (TAILQ_EMPTY(&ifnet_head)) return EADDRNOTAVAIL; if (addr->sin6_family != AF_INET6) return EAFNOSUPPORT; @@ -655,7 +699,7 @@ rip6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) /* Source address selection. XXX: need pcblookup? */ in6a = in6_selectsrc(addr, inp->in6p_outputopts, inp->in6p_moptions, &inp->in6p_route, - &inp->in6p_laddr, &error); + &inp->in6p_laddr, &storage, &error); if (in6a == NULL) return (error ? error : EADDRNOTAVAIL); inp->in6p_laddr = *in6a; @@ -713,5 +757,5 @@ struct pr_usrreqs rip6_usrreqs = { pru_connect2_notsupp, in6_control, rip6_detach, rip6_disconnect, pru_listen_notsupp, in6_setpeeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, rip6_send, pru_sense_null, rip6_shutdown, - in6_setsockaddr, sosend, soreceive, sopoll + in6_setsockaddr, sosend, soreceive, pru_sopoll_notsupp }; diff --git a/bsd/netinet6/raw_ip6.h b/bsd/netinet6/raw_ip6.h index 879428b11..30cf70e60 100644 --- a/bsd/netinet6/raw_ip6.h +++ b/bsd/netinet6/raw_ip6.h @@ -48,10 +48,6 @@ struct rip6stat { u_quad_t rip6s_opackets; /* total output packets */ }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern struct rip6stat rip6stat; -#endif -#endif #endif diff --git a/bsd/netinet6/route6.c b/bsd/netinet6/route6.c index acd4263cb..9e01f4b16 100644 --- a/bsd/netinet6/route6.c +++ b/bsd/netinet6/route6.c @@ -44,8 +44,8 @@ #include -static int ip6_rthdr0 __P((struct mbuf *, struct ip6_hdr *, - struct ip6_rthdr0 *)); +static int ip6_rthdr0(struct mbuf *, struct ip6_hdr *, + struct ip6_rthdr0 *); int route6_input(mp, offp) @@ -70,7 +70,7 @@ route6_input(mp, offp) } #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(*rh), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(*rh), return IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); rh = (struct ip6_rthdr *)((caddr_t)ip6 + off); #else @@ -91,7 +91,7 @@ route6_input(mp, offp) * due to IP6_EXTHDR_CHECK assumption, we cannot handle * very big routing header (max rhlen == 2048). */ - IP6_EXTHDR_CHECK(m, off, rhlen, IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, rhlen, return IPPROTO_DONE); #else /* * note on option length: @@ -207,11 +207,11 @@ ip6_rthdr0(m, ip6, rh0) #if COMPAT_RFC1883 if (rh0->ip6r0_slmap[index / 8] & (1 << (7 - (index % 8)))) - ip6_forward(m, IPV6_SRCRT_NEIGHBOR); + ip6_forward(m, IPV6_SRCRT_NEIGHBOR, 0); else - ip6_forward(m, IPV6_SRCRT_NOTNEIGHBOR); + ip6_forward(m, IPV6_SRCRT_NOTNEIGHBOR, 0); #else - ip6_forward(m, 1); + ip6_forward(m, 1, 0); #endif return(-1); /* m would be freed in ip6_forward() */ diff --git a/bsd/netinet6/scope6.c b/bsd/netinet6/scope6.c index ed33f804d..845695f71 100644 --- a/bsd/netinet6/scope6.c +++ b/bsd/netinet6/scope6.c @@ -56,8 +56,8 @@ static size_t if_indexlim = 8; struct scope6_id *scope6_ids = NULL; void -scope6_ifattach(ifp) - struct ifnet *ifp; +scope6_ifattach( + struct ifnet *ifp) { int s = splnet(); @@ -108,9 +108,9 @@ scope6_ifattach(ifp) } int -scope6_set(ifp, idlist) - struct ifnet *ifp; - u_int32_t *idlist; +scope6_set( + struct ifnet *ifp, + u_int32_t *idlist) { int i, s; int error = 0; @@ -159,9 +159,9 @@ scope6_set(ifp, idlist) } int -scope6_get(ifp, idlist) - struct ifnet *ifp; - u_int32_t *idlist; +scope6_get( + struct ifnet *ifp, + u_int32_t *idlist) { if (scope6_ids == NULL) /* paranoid? */ return(EINVAL); @@ -233,18 +233,19 @@ struct in6_addr *addr; } int -in6_addr2scopeid(ifp, addr) - struct ifnet *ifp; /* must not be NULL */ - struct in6_addr *addr; /* must not be NULL */ +in6_addr2scopeid( + struct ifnet *ifp, /* must not be NULL */ + struct in6_addr *addr) /* must not be NULL */ { int scope = in6_addrscope(addr); + int index = ifp->if_index; if (scope6_ids == NULL) /* paranoid? */ return(0); /* XXX */ - if (ifp->if_index >= if_indexlim) + if (index >= if_indexlim) return(0); /* XXX */ -#define SID scope6_ids[ifp->if_index] +#define SID scope6_ids[index] switch(scope) { case IPV6_ADDR_SCOPE_NODELOCAL: return(-1); /* XXX: is this an appropriate value? */ @@ -265,8 +266,8 @@ in6_addr2scopeid(ifp, addr) } void -scope6_setdefault(ifp) - struct ifnet *ifp; /* note that this might be NULL */ +scope6_setdefault( + struct ifnet *ifp) /* note that this might be NULL */ { /* * Currently, this function just set the default "link" according to @@ -283,8 +284,8 @@ scope6_setdefault(ifp) } int -scope6_get_default(idlist) - u_int32_t *idlist; +scope6_get_default( + u_int32_t *idlist) { if (scope6_ids == NULL) /* paranoid? */ return(EINVAL); @@ -296,8 +297,8 @@ scope6_get_default(idlist) } u_int32_t -scope6_addr2default(addr) - struct in6_addr *addr; +scope6_addr2default( + struct in6_addr *addr) { return(scope6_ids[0].s6id_list[in6_addrscope(addr)]); } diff --git a/bsd/netinet6/scope6_var.h b/bsd/netinet6/scope6_var.h index 5831fde09..d7fd15e77 100644 --- a/bsd/netinet6/scope6_var.h +++ b/bsd/netinet6/scope6_var.h @@ -34,16 +34,14 @@ #define _NETINET6_SCOPE6_VAR_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -void scope6_ifattach __P((struct ifnet *)); -int scope6_set __P((struct ifnet *, u_int32_t *)); -int scope6_get __P((struct ifnet *, u_int32_t *)); -void scope6_setdefault __P((struct ifnet *)); -int scope6_get_default __P((u_int32_t *)); -u_int32_t scope6_in6_addrscope __P((struct in6_addr *)); -u_int32_t scope6_addr2default __P((struct in6_addr *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#ifdef KERNEL_PRIVATE +void scope6_ifattach(struct ifnet *); +int scope6_set(struct ifnet *, u_int32_t *); +int scope6_get(struct ifnet *, u_int32_t *); +void scope6_setdefault(struct ifnet *); +int scope6_get_default(u_int32_t *); +u_int32_t scope6_in6_addrscope(struct in6_addr *); +u_int32_t scope6_addr2default(struct in6_addr *); +#endif KERNEL_PRIVATE -#endif /* _NETINET6_SCOPE6_VAR_H_ */ +#endif _NETINET6_SCOPE6_VAR_H_ diff --git a/bsd/netinet6/tcp6_var.h b/bsd/netinet6/tcp6_var.h index 286307c32..5b535dda5 100644 --- a/bsd/netinet6/tcp6_var.h +++ b/bsd/netinet6/tcp6_var.h @@ -69,8 +69,7 @@ #define _NETINET_TCP6_VAR_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_tcp6); #endif @@ -78,14 +77,13 @@ SYSCTL_DECL(_net_inet6_tcp6); extern int tcp_v6mssdflt; /* XXX */ struct ip6_hdr; -void tcp6_ctlinput __P((int, struct sockaddr *, void *)); -void tcp6_init __P((void)); -int tcp6_input __P((struct mbuf **, int *)); -struct rtentry *tcp_rtlookup6 __P((struct inpcb *)); +void tcp6_ctlinput(int, struct sockaddr *, void *); +void tcp6_init(void); +int tcp6_input(struct mbuf **, int *); +struct rtentry *tcp_rtlookup6(struct inpcb *); extern struct pr_usrreqs tcp6_usrreqs; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif KERNEL_PRIVATE -#endif /* _NETINET_TCP6_VAR_H_ */ +#endif _NETINET_TCP6_VAR_H_ diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c index 8d8c6df34..226b199c0 100644 --- a/bsd/netinet6/udp6_output.c +++ b/bsd/netinet6/udp6_output.c @@ -118,6 +118,21 @@ extern int ipsec_bypass; #define udp6stat udpstat #define udp6s_opackets udps_opackets +static __inline__ u_int16_t +get_socket_id(struct socket * s) +{ + u_int16_t val; + + if (s == NULL) { + return (0); + } + val = (u_int16_t)(((u_int32_t)s) / sizeof(struct socket)); + if (val == 0) { + val = 0xffff; + } + return (val); +} + int udp6_output(in6p, m, addr6, control, p) struct in6pcb *in6p; @@ -138,10 +153,11 @@ udp6_output(in6p, m, addr6, control, p) int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; + struct in6_addr storage; priv = 0; #ifdef __APPLE__ - if (p && !suser(p->p_ucred, &p->p_acflag)) + if (p && !proc_suser(p)) #else if (p && !suser(p)) #endif @@ -208,7 +224,7 @@ udp6_output(in6p, m, addr6, control, p) laddr = in6_selectsrc(sin6, in6p->in6p_outputopts, in6p->in6p_moptions, &in6p->in6p_route, - &in6p->in6p_laddr, &error); + &in6p->in6p_laddr, &storage, &error); } else laddr = &in6p->in6p_laddr; /* XXX */ if (laddr == NULL) { @@ -217,7 +233,7 @@ udp6_output(in6p, m, addr6, control, p) goto release; } if (in6p->in6p_lport == 0 && - (error = in6_pcbsetport(laddr, in6p, p)) != 0) + (error = in6_pcbsetport(laddr, in6p, p, 0)) != 0) goto release; } else { if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { @@ -300,8 +316,9 @@ udp6_output(in6p, m, addr6, control, p) goto release; } #endif /*IPSEC*/ + m->m_pkthdr.socket_id = get_socket_id(in6p->in6p_socket); error = ip6_output(m, in6p->in6p_outputopts, &in6p->in6p_route, - flags, in6p->in6p_moptions, NULL); + flags, in6p->in6p_moptions, NULL, 0); break; case AF_INET: error = EAFNOSUPPORT; diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index 9bab08c72..3be50feec 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,8 @@ #include #include extern int ipsec_bypass; +extern lck_mtx_t *sadb_mutex; +extern lck_mtx_t *nd6_mutex; #endif /*IPSEC*/ #include "faith.h" @@ -115,14 +118,26 @@ extern int ipsec_bypass; */ extern struct protosw inetsw[]; -static int in6_mcmatch __P((struct inpcb *, struct in6_addr *, struct ifnet *)); -static int udp6_detach __P((struct socket *so)); +static int in6_mcmatch(struct inpcb *, struct in6_addr *, struct ifnet *); +static int udp6_detach(struct socket *so); + + +extern void ipfwsyslog( int level, char *format,...); +extern int fw_verbose; + +#define log_in_vain_log( a ) { \ + if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ + ipfwsyslog a ; \ + } \ + else log a ; \ +} + static int -in6_mcmatch(in6p, ia6, ifp) - struct inpcb *in6p; - register struct in6_addr *ia6; - struct ifnet *ifp; +in6_mcmatch( + struct inpcb *in6p, + register struct in6_addr *ia6, + struct ifnet *ifp) { struct ip6_moptions *im6o = in6p->in6p_moptions; struct in6_multi_mship *imm; @@ -130,21 +145,25 @@ in6_mcmatch(in6p, ia6, ifp) if (im6o == NULL) return 0; + lck_mtx_lock(nd6_mutex); for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) { if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, - ia6)) + ia6)) { + lck_mtx_unlock(nd6_mutex); return 1; + } } + lck_mtx_unlock(nd6_mutex); return 0; } int -udp6_input(mp, offp) - struct mbuf **mp; - int *offp; +udp6_input( + struct mbuf **mp, + int *offp) { struct mbuf *m = *mp; register struct ip6_hdr *ip6; @@ -154,8 +173,9 @@ udp6_input(mp, offp) int off = *offp; int plen, ulen; struct sockaddr_in6 udp_in6; + struct inpcbinfo *pcbinfo = &udbinfo; - IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), return IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); @@ -234,23 +254,42 @@ udp6_input(mp, offp) * (Algorithm copied from raw_intr().) */ last = NULL; + lck_rw_lock_shared(pcbinfo->mtx); + LIST_FOREACH(in6p, &udb, inp_list) { + if ((in6p->inp_vflag & INP_IPV6) == 0) continue; - if (in6p->in6p_lport != uh->uh_dport) + + if (in_pcb_checkstate(in6p, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + + udp_lock(in6p->in6p_socket, 1, 0); + + if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_unlock(in6p->in6p_socket, 1, 0); + continue; + } + if (in6p->in6p_lport != uh->uh_dport) { + udp_unlock(in6p->in6p_socket, 1, 0); continue; + } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst) && !in6_mcmatch(in6p, &ip6->ip6_dst, - m->m_pkthdr.rcvif)) + m->m_pkthdr.rcvif)) { + udp_unlock(in6p->in6p_socket, 1, 0); continue; + } } if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src) || - in6p->in6p_fport != uh->uh_sport) + in6p->in6p_fport != uh->uh_sport) { + udp_unlock(in6p->in6p_socket, 1, 0); continue; + } } if (last != NULL) { @@ -260,9 +299,13 @@ udp6_input(mp, offp) /* * Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && ipsec6_in_reject_so(m, last->inp_socket)) - ipsec6stat.in_polvio++; + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject_so(m, last->inp_socket)) + ipsec6stat.in_polvio++; /* do not inject data into pcb */ + lck_mtx_unlock(sadb_mutex); + } else #endif /*IPSEC*/ if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { @@ -281,15 +324,13 @@ udp6_input(mp, offp) m_adj(n, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, - n, opts) == 0) { - m_freem(n); - if (opts) - m_freem(opts); + n, opts, NULL) == 0) { udpstat.udps_fullsock++; } else sorwakeup(last->in6p_socket); opts = NULL; } + udp_unlock(last->in6p_socket, 1, 0); } last = in6p; /* @@ -304,6 +345,7 @@ udp6_input(mp, offp) (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } + lck_rw_done(pcbinfo->mtx); if (last == NULL) { /* @@ -321,9 +363,15 @@ udp6_input(mp, offp) /* * Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && ipsec6_in_reject_so(m, last->inp_socket)) { - ipsec6stat.in_polvio++; - goto bad; + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject_so(m, last->inp_socket)) { + ipsec6stat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + udp_unlock(last->in6p_socket, 1, 0); + goto bad; + } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ if (last->in6p_flags & IN6P_CONTROLOPTS @@ -333,11 +381,15 @@ udp6_input(mp, offp) m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, - m, opts) == 0) { + m, opts, NULL) == 0) { udpstat.udps_fullsock++; + m = NULL; + opts = NULL; + udp_unlock(last->in6p_socket, 1, 0); goto bad; } sorwakeup(last->in6p_socket); + udp_unlock(last->in6p_socket, 1, 0); return IPPROTO_DONE; } /* @@ -351,10 +403,17 @@ udp6_input(mp, offp) char buf[INET6_ADDRSTRLEN]; strcpy(buf, ip6_sprintf(&ip6->ip6_dst)); - log(LOG_INFO, - "Connection attempt to UDP %s:%d from %s:%d\n", - buf, ntohs(uh->uh_dport), - ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); + if (log_in_vain != 3) + log(LOG_INFO, + "Connection attempt to UDP %s:%d from %s:%d\n", + buf, ntohs(uh->uh_dport), + ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); + else if (!(m->m_flags & (M_BCAST | M_MCAST)) && + !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) + log_in_vain_log((LOG_INFO, + "Connection attempt to UDP %s:%d from %s:%d\n", + buf, ntohs(uh->uh_dport), + ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport))); } udpstat.udps_noport++; if (m->m_flags & M_MCAST) { @@ -371,9 +430,15 @@ udp6_input(mp, offp) /* * Check AH/ESP integrity. */ - if (ipsec_bypass == 0 && ipsec6_in_reject_so(m, in6p->in6p_socket)) { - ipsec6stat.in_polvio++; - goto bad; + if (ipsec_bypass == 0) { + lck_mtx_lock(sadb_mutex); + if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { + ipsec6stat.in_polvio++; + lck_mtx_unlock(sadb_mutex); + in_pcb_checkstate(in6p, WNT_RELEASE, 0); + goto bad; + } + lck_mtx_unlock(sadb_mutex); } #endif /*IPSEC*/ @@ -381,6 +446,13 @@ udp6_input(mp, offp) * Construct sockaddr format source address. * Stuff source address and datagram in user buffer. */ + udp_lock(in6p->in6p_socket, 1, 0); + + if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_unlock(in6p->in6p_socket, 1, 0); + goto bad; + } + init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; if (in6p->in6p_flags & IN6P_CONTROLOPTS @@ -389,11 +461,15 @@ udp6_input(mp, offp) m_adj(m, off + sizeof(struct udphdr)); if (sbappendaddr(&in6p->in6p_socket->so_rcv, (struct sockaddr *)&udp_in6, - m, opts) == 0) { + m, opts, NULL) == 0) { + m = NULL; + opts = NULL; udpstat.udps_fullsock++; + udp_unlock(in6p->in6p_socket, 1, 0); goto bad; } sorwakeup(in6p->in6p_socket); + udp_unlock(in6p->in6p_socket, 1, 0); return IPPROTO_DONE; bad: if (m) @@ -404,10 +480,10 @@ bad: } void -udp6_ctlinput(cmd, sa, d) - int cmd; - struct sockaddr *sa; - void *d; +udp6_ctlinput( + int cmd, + struct sockaddr *sa, + void *d) { struct udphdr uh; struct ip6_hdr *ip6; @@ -415,7 +491,7 @@ udp6_ctlinput(cmd, sa, d) int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; - void (*notify) __P((struct inpcb *, int)) = udp_notify; + void (*notify)(struct inpcb *, int) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; @@ -460,11 +536,11 @@ udp6_ctlinput(cmd, sa, d) bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); - (void) in6_pcbnotify(&udb, sa, uh.uh_dport, + (void) in6_pcbnotify(&udbinfo, sa, uh.uh_dport, (struct sockaddr*)ip6cp->ip6c_src, uh.uh_sport, cmd, notify); } else - (void) in6_pcbnotify(&udb, sa, 0, (struct sockaddr *)&sa6_src, + (void) in6_pcbnotify(&udbinfo, sa, 0, (struct sockaddr *)&sa6_src, 0, cmd, notify); } @@ -482,7 +558,7 @@ udp6_getcred SYSCTL_HANDLER_ARGS if (req->newlen != sizeof(addrs)) return (EINVAL); - if (req->oldlen != sizeof(struct ucred)) + if (req->oldlen != sizeof(*(kauth_cred_t)0)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) @@ -497,7 +573,7 @@ udp6_getcred SYSCTL_HANDLER_ARGS goto out; } error = SYSCTL_OUT(req, inp->inp_socket->so_cred->pc_ucred, - sizeof(struct ucred)); + sizeof(*(kauth_cred_t)0)); out: splx(s); @@ -513,15 +589,12 @@ static int udp6_abort(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; /* ??? possible? panic instead? */ soisdisconnected(so); - s = splnet(); in6_pcbdetach(inp); - splx(s); return 0; } @@ -529,22 +602,21 @@ static int udp6_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; - int s, error; + int error; inp = sotoinpcb(so); if (inp != 0) return EINVAL; + error = in_pcballoc(so, &udbinfo, p); + if (error) + return error; + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return error; } - s = splnet(); - error = in_pcballoc(so, &udbinfo, p); - splx(s); - if (error) - return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if (ip6_mapped_addr_on) @@ -565,7 +637,7 @@ static int udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; - int s, error; + int error; inp = sotoinpcb(so); if (inp == 0) @@ -586,16 +658,12 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; - s = splnet(); error = in_pcbbind(inp, (struct sockaddr *)&sin, p); - splx(s); return error; } } - s = splnet(); error = in6_pcbbind(inp, nam, p); - splx(s); return error; } @@ -603,7 +671,7 @@ static int udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; - int s, error; + int error; inp = sotoinpcb(so); if (inp == 0) @@ -619,9 +687,7 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (inp->inp_faddr.s_addr != INADDR_ANY) return EISCONN; in6_sin6_2_sin(&sin, sin6_p); - s = splnet(); error = in_pcbconnect(inp, (struct sockaddr *)&sin, p); - splx(s); if (error == 0) { inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; @@ -633,9 +699,7 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return EISCONN; - s = splnet(); error = in6_pcbconnect(inp, nam, p); - splx(s); if (error == 0) { if (ip6_mapped_addr_on || (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { /* should be non mapped addr */ inp->inp_vflag &= ~INP_IPV4; @@ -650,14 +714,11 @@ static int udp6_detach(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) return EINVAL; - s = splnet(); in6_pcbdetach(inp); - splx(s); return 0; } @@ -665,7 +726,6 @@ static int udp6_disconnect(struct socket *so) { struct inpcb *inp; - int s; inp = sotoinpcb(so); if (inp == 0) @@ -681,10 +741,8 @@ udp6_disconnect(struct socket *so) if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return ENOTCONN; - s = splnet(); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; - splx(s); so->so_state &= ~SS_ISCONNECTED; /* XXX */ return 0; } @@ -749,5 +807,5 @@ struct pr_usrreqs udp6_usrreqs = { pru_connect2_notsupp, in6_control, udp6_detach, udp6_disconnect, pru_listen_notsupp, in6_mapped_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, udp6_send, pru_sense_null, udp_shutdown, - in6_mapped_sockaddr, sosend, soreceive, sopoll + in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; diff --git a/bsd/netinet6/udp6_var.h b/bsd/netinet6/udp6_var.h index 417190c62..49e35cc55 100644 --- a/bsd/netinet6/udp6_var.h +++ b/bsd/netinet6/udp6_var.h @@ -66,18 +66,16 @@ #define _NETINET6_UDP6_VAR_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE SYSCTL_DECL(_net_inet6_udp6); extern struct pr_usrreqs udp6_usrreqs; -void udp6_ctlinput __P((int, struct sockaddr *, void *)); -int udp6_input __P((struct mbuf **, int *)); -int udp6_output __P((struct inpcb *inp, struct mbuf *m, +void udp6_ctlinput(int, struct sockaddr *, void *); +int udp6_input(struct mbuf **, int *); +int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, - struct proc *p)); -#endif /* __APPLE_API_PRIVATE */ -#endif + struct proc *p); +#endif KERNEL_PRIVATE -#endif /*_NETINET6_UDP6_VAR_H_*/ +#endif _NETINET6_UDP6_VAR_H_ diff --git a/bsd/netkey/Makefile b/bsd/netkey/Makefile index 0ef16274a..def3c0629 100644 --- a/bsd/netkey/Makefile +++ b/bsd/netkey/Makefile @@ -20,7 +20,13 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - key.h key_debug.h keydb.h keysock.h keyv2.h key_var.h + keysock.h + +PRIVATE_DATAFILES = \ + key_debug.h keydb.h key_var.h + +PRIVATE_KERNELFILES = \ + key.h INSTALL_MI_LIST = ${DATAFILES} @@ -30,6 +36,9 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = netkey +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + +INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} ${PRIVATE_KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index bb183c5a1..9576d7afe 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -50,6 +50,8 @@ #include #include +#include + #include #include #include @@ -111,6 +113,12 @@ #define FULLMASK 0xff +lck_grp_t *sadb_mutex_grp; +lck_grp_attr_t *sadb_mutex_grp_attr; +lck_attr_t *sadb_mutex_attr; +lck_mtx_t *sadb_mutex; +extern lck_mtx_t *nd6_mutex; + /* * Note on SA reference counting: * - SAs that are not in DEAD state will have (total external reference + 1) @@ -132,7 +140,7 @@ static u_int key_larval_lifetime = 30; /* interval to expire acquiring, 30(s)*/ static int key_blockacq_count = 10; /* counter for blocking SADB_ACQUIRE.*/ static int key_blockacq_lifetime = 20; /* lifetime for blocking SADB_ACQUIRE.*/ static int key_preferred_oldsa = 0; /* preferred old sa rather than new sa.*/ -static int natt_keepalive_interval = 29; /* interval between natt keepalives.*/ +static int natt_keepalive_interval = 20; /* interval between natt keepalives.*/ static u_int32_t acq_seq = 0; static int key_tick_init_random = 0; @@ -142,6 +150,11 @@ static LIST_HEAD(_sptree, secpolicy) sptree[IPSEC_DIR_MAX]; /* SPD */ static LIST_HEAD(_sahtree, secashead) sahtree; /* SAD */ static LIST_HEAD(_regtree, secreg) regtree[SADB_SATYPE_MAX + 1]; /* registed list */ + +#define SPIHASHSIZE 128 +#define SPIHASH(x) (((x) ^ ((x) >> 16)) % SPIHASHSIZE) +static LIST_HEAD(_spihash, secasvar) spihash[SPIHASHSIZE]; + #ifndef IPSEC_NONBLOCK_ACQUIRE static LIST_HEAD(_acqtree, secacq) acqtree; /* acquiring list */ #endif @@ -268,6 +281,10 @@ SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, prefered_oldsa, CTLFLAG_RW,\ SYSCTL_INT(_net_key, KEYCTL_NATT_KEEPALIVE_INTERVAL, natt_keepalive_interval, CTLFLAG_RW,\ &natt_keepalive_interval, 0, ""); +/* PF_KEY statistics */ +SYSCTL_STRUCT(_net_key, KEYCTL_PFKEYSTAT, pfkeystat, CTLFLAG_RD,\ + &pfkeystat, pfkeystat, ""); + #ifndef LIST_FOREACH #define LIST_FOREACH(elm, head, field) \ for (elm = LIST_FIRST(head); elm; elm = LIST_NEXT(elm, field)) @@ -365,60 +382,61 @@ struct sadb_msghdr { int extlen[SADB_EXT_MAX + 1]; }; -static struct secasvar *key_allocsa_policy __P((struct secasindex *)); -static void key_freesp_so __P((struct secpolicy **)); -static struct secasvar *key_do_allocsa_policy __P((struct secashead *, u_int)); -static void key_delsp __P((struct secpolicy *)); -static struct secpolicy *key_getsp __P((struct secpolicyindex *)); -static struct secpolicy *key_getspbyid __P((u_int32_t)); -static u_int32_t key_newreqid __P((void)); -static struct mbuf *key_gather_mbuf __P((struct mbuf *, - const struct sadb_msghdr *, int, int, int *)); -static int key_spdadd __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static u_int32_t key_getnewspid __P((void)); -static int key_spddelete __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spddelete2 __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spdget __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spdflush __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_spddump __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static struct mbuf *key_setdumpsp __P((struct secpolicy *, - u_int8_t, u_int32_t, u_int32_t)); -static u_int key_getspreqmsglen __P((struct secpolicy *)); -static int key_spdexpire __P((struct secpolicy *)); -static struct secashead *key_newsah __P((struct secasindex *)); -static void key_delsah __P((struct secashead *)); -static struct secasvar *key_newsav __P((struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *)); -static void key_delsav __P((struct secasvar *)); -static struct secashead *key_getsah __P((struct secasindex *)); -static struct secasvar *key_checkspidup __P((struct secasindex *, u_int32_t)); -static struct secasvar *key_getsavbyspi __P((struct secashead *, u_int32_t)); -static int key_setsaval __P((struct secasvar *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_mature __P((struct secasvar *)); -static struct mbuf *key_setdumpsa __P((struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t)); -static struct mbuf *key_setsadbmsg __P((u_int8_t, u_int16_t, u_int8_t, - u_int32_t, pid_t, u_int16_t)); -static struct mbuf *key_setsadbsa __P((struct secasvar *)); -static struct mbuf *key_setsadbaddr __P((u_int16_t, - struct sockaddr *, u_int8_t, u_int16_t)); +static struct secasvar *key_allocsa_policy(struct secasindex *); +static void key_freesp_so(struct secpolicy **); +static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int); +static void key_delsp(struct secpolicy *); +static struct secpolicy *key_getsp(struct secpolicyindex *); +static struct secpolicy *key_getspbyid(u_int32_t); +static u_int32_t key_newreqid(void); +static struct mbuf *key_gather_mbuf(struct mbuf *, + const struct sadb_msghdr *, int, int, int *); +static int key_spdadd(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static u_int32_t key_getnewspid(void); +static int key_spddelete(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spddelete2(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spdget(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spdflush(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spddump(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static struct mbuf *key_setdumpsp(struct secpolicy *, + u_int8_t, u_int32_t, u_int32_t); +static u_int key_getspreqmsglen(struct secpolicy *); +static int key_spdexpire(struct secpolicy *); +static struct secashead *key_newsah(struct secasindex *); +static void key_delsah(struct secashead *); +static struct secasvar *key_newsav(struct mbuf *, + const struct sadb_msghdr *, struct secashead *, int *); +static void key_delsav(struct secasvar *); +static struct secashead *key_getsah(struct secasindex *); +static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); +static void key_setspi __P((struct secasvar *, u_int32_t)); +static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t); +static int key_setsaval(struct secasvar *, struct mbuf *, + const struct sadb_msghdr *); +static int key_mature(struct secasvar *); +static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, + u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, + u_int32_t, pid_t, u_int16_t); +static struct mbuf *key_setsadbsa(struct secasvar *); +static struct mbuf *key_setsadbaddr(u_int16_t, + struct sockaddr *, u_int8_t, u_int16_t); #if 0 -static struct mbuf *key_setsadbident __P((u_int16_t, u_int16_t, caddr_t, - int, u_int64_t)); +static struct mbuf *key_setsadbident(u_int16_t, u_int16_t, caddr_t, + int, u_int64_t); #endif -static struct mbuf *key_setsadbxsa2 __P((u_int8_t, u_int32_t, u_int32_t)); -static struct mbuf *key_setsadbxpolicy __P((u_int16_t, u_int8_t, - u_int32_t)); -static void *key_newbuf __P((const void *, u_int)); +static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); +static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, + u_int32_t); +static void *key_newbuf(const void *, u_int); #if INET6 -static int key_ismyaddr6 __P((struct sockaddr_in6 *)); +static int key_ismyaddr6(struct sockaddr_in6 *); #endif /* flags for key_cmpsaidx() */ @@ -426,79 +444,102 @@ static int key_ismyaddr6 __P((struct sockaddr_in6 *)); #define CMP_MODE_REQID 2 /* additionally HEAD, reqid, mode. */ #define CMP_REQID 3 /* additionally HEAD, reaid. */ #define CMP_EXACTLY 4 /* all elements. */ -static int key_cmpsaidx - __P((struct secasindex *, struct secasindex *, int)); - -static int key_cmpspidx_exactly - __P((struct secpolicyindex *, struct secpolicyindex *)); -static int key_cmpspidx_withmask - __P((struct secpolicyindex *, struct secpolicyindex *)); -static int key_sockaddrcmp __P((struct sockaddr *, struct sockaddr *, int)); -static int key_bbcmp __P((caddr_t, caddr_t, u_int)); -static void key_srandom __P((void)); -static u_int16_t key_satype2proto __P((u_int8_t)); -static u_int8_t key_proto2satype __P((u_int16_t)); - -static int key_getspi __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static u_int32_t key_do_getnewspi __P((struct sadb_spirange *, - struct secasindex *)); -static int key_update __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); +static int key_cmpsaidx(struct secasindex *, struct secasindex *, int); + +static int key_cmpspidx_exactly(struct secpolicyindex *, + struct secpolicyindex *); +static int key_cmpspidx_withmask(struct secpolicyindex *, + struct secpolicyindex *); +static int key_sockaddrcmp(struct sockaddr *, struct sockaddr *, int); +static int key_bbcmp(caddr_t, caddr_t, u_int); +static void key_srandom(void); +static u_int16_t key_satype2proto(u_int8_t); +static u_int8_t key_proto2satype(u_int16_t); + +static int key_getspi(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static u_int32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); +static int key_update(struct socket *, struct mbuf *, + const struct sadb_msghdr *); #if IPSEC_DOSEQCHECK -static struct secasvar *key_getsavbyseq __P((struct secashead *, u_int32_t)); +static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t); #endif -static int key_add __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_setident __P((struct secashead *, struct mbuf *, - const struct sadb_msghdr *)); -static struct mbuf *key_getmsgbuf_x1 __P((struct mbuf *, - const struct sadb_msghdr *)); -static int key_delete __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_get __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); - -static void key_getcomb_setlifetime __P((struct sadb_comb *)); +static int key_add(struct socket *, struct mbuf *, const struct sadb_msghdr *); +static int key_setident(struct secashead *, struct mbuf *, + const struct sadb_msghdr *); +static struct mbuf *key_getmsgbuf_x1(struct mbuf *, const struct sadb_msghdr *); +static int key_delete(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); + +static void key_getcomb_setlifetime(struct sadb_comb *); #if IPSEC_ESP -static struct mbuf *key_getcomb_esp __P((void)); +static struct mbuf *key_getcomb_esp(void); #endif -static struct mbuf *key_getcomb_ah __P((void)); -static struct mbuf *key_getcomb_ipcomp __P((void)); -static struct mbuf *key_getprop __P((const struct secasindex *)); +static struct mbuf *key_getcomb_ah(void); +static struct mbuf *key_getcomb_ipcomp(void); +static struct mbuf *key_getprop(const struct secasindex *); -static int key_acquire __P((struct secasindex *, struct secpolicy *)); +static int key_acquire(struct secasindex *, struct secpolicy *); #ifndef IPSEC_NONBLOCK_ACQUIRE -static struct secacq *key_newacq __P((struct secasindex *)); -static struct secacq *key_getacq __P((struct secasindex *)); -static struct secacq *key_getacqbyseq __P((u_int32_t)); +static struct secacq *key_newacq(struct secasindex *); +static struct secacq *key_getacq(struct secasindex *); +static struct secacq *key_getacqbyseq(u_int32_t); #endif -static struct secspacq *key_newspacq __P((struct secpolicyindex *)); -static struct secspacq *key_getspacq __P((struct secpolicyindex *)); -static int key_acquire2 __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_register __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_expire __P((struct secasvar *)); -static int key_flush __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_dump __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_promisc __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)); -static int key_senderror __P((struct socket *, struct mbuf *, int)); -static int key_validate_ext __P((const struct sadb_ext *, int)); -static int key_align __P((struct mbuf *, struct sadb_msghdr *)); +static struct secspacq *key_newspacq(struct secpolicyindex *); +static struct secspacq *key_getspacq(struct secpolicyindex *); +static int key_acquire2(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_register(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_expire(struct secasvar *); +static int key_flush(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_dump(struct socket *, struct mbuf *, const struct sadb_msghdr *); +static int key_promisc(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_senderror(struct socket *, struct mbuf *, int); +static int key_validate_ext(const struct sadb_ext *, int); +static int key_align(struct mbuf *, struct sadb_msghdr *); #if 0 -static const char *key_getfqdn __P((void)); -static const char *key_getuserfqdn __P((void)); +static const char *key_getfqdn(void); +static const char *key_getuserfqdn(void); #endif -static void key_sa_chgstate __P((struct secasvar *, u_int8_t)); -static struct mbuf *key_alloc_mbuf __P((int)); +static void key_sa_chgstate(struct secasvar *, u_int8_t); +static struct mbuf *key_alloc_mbuf(int); extern int ipsec_bypass; void ipsec_send_natt_keepalive(struct secasvar *sav); + +/* + * PF_KEY init + * setup locks and call raw_init() + * + */ +void +key_init(void) +{ + + int i; + + sadb_mutex_grp_attr = lck_grp_attr_alloc_init(); + sadb_mutex_grp = lck_grp_alloc_init("sadb", sadb_mutex_grp_attr); + sadb_mutex_attr = lck_attr_alloc_init(); + lck_attr_setdefault(sadb_mutex_attr); + + if ((sadb_mutex = lck_mtx_alloc_init(sadb_mutex_grp, sadb_mutex_attr)) == NULL) { + printf("key_init: can't alloc sadb_mutex\n"); + return; + } + + for (i = 0; i < SPIHASHSIZE; i++) + LIST_INIT(&spihash[i]); + + raw_init(); +} + + /* %%% IPsec policy management */ /* * allocating a SP for OUTBOUND or INBOUND packet. @@ -515,6 +556,7 @@ key_allocsp(spidx, dir) struct timeval tv; int s; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* sanity check */ if (spidx == NULL) panic("key_allocsp: NULL pointer is passed.\n"); @@ -529,7 +571,6 @@ key_allocsp(spidx, dir) } /* get a SP entry */ - s = splnet(); /*called from softclock()*/ KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("*** objects\n"); kdebug_secpolicyindex(spidx)); @@ -545,7 +586,6 @@ key_allocsp(spidx, dir) goto found; } - splx(s); return NULL; found: @@ -556,7 +596,6 @@ found: microtime(&tv); sp->lastused = tv.tv_sec; sp->refcnt++; - splx(s); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP key_allocsp cause refcnt++:%d SP:%p\n", sp->refcnt, sp)); @@ -580,13 +619,14 @@ key_gettunnel(osrc, odst, isrc, idst) struct sockaddr *os, *od, *is, *id; struct secpolicyindex spidx; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (isrc->sa_family != idst->sa_family) { ipseclog((LOG_ERR, "protocol family mismatched %d != %d\n.", isrc->sa_family, idst->sa_family)); return NULL; } - s = splnet(); /*called from softclock()*/ LIST_FOREACH(sp, &sptree[dir], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; @@ -626,14 +666,13 @@ key_gettunnel(osrc, odst, isrc, idst) goto found; } } - splx(s); + return NULL; found: microtime(&tv); sp->lastused = tv.tv_sec; sp->refcnt++; - splx(s); return sp; } @@ -651,6 +690,8 @@ key_checkrequest(isr, saidx) u_int level; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (isr == NULL || saidx == NULL) panic("key_checkrequest: NULL pointer is passed.\n"); @@ -742,6 +783,8 @@ key_allocsa_policy(saidx) const u_int *saorder_state_valid; int arraysize; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; @@ -791,6 +834,8 @@ key_do_allocsa_policy(sah, state) { struct secasvar *sav, *nextsav, *candidate, *d; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* initilize */ candidate = NULL; @@ -909,7 +954,7 @@ key_do_allocsa_policy(sah, state) * allocating a SA entry for a *INBOUND* packet. * Must call key_freesav() later. * OUT: positive: pointer to a sav. - * NULL: not found, or error occured. + * NULL: not found, or error occurred. * * In the comparison, source address will be ignored for RFC2401 conformance. * To quote, from section 4.1: @@ -926,15 +971,16 @@ key_allocsa(family, src, dst, proto, spi) caddr_t src, dst; u_int32_t spi; { - struct secashead *sah; - struct secasvar *sav; - u_int stateidx, state; + struct secasvar *sav, *match; + u_int stateidx, state, tmpidx, matchidx; struct sockaddr_in sin; struct sockaddr_in6 sin6; int s; const u_int *saorder_state_valid; int arraysize; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (src == NULL || dst == NULL) panic("key_allocsa: NULL pointer is passed.\n"); @@ -957,115 +1003,118 @@ key_allocsa(family, src, dst, proto, spi) * IPsec tunnel packet is received. But ESP tunnel mode is * encrypted so we can't check internal IP header. */ - s = splnet(); /*called from softclock()*/ - LIST_FOREACH(sah, &sahtree, chain) { - /* - * search a valid state list for inbound packet. - * the search order is not important. - */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { + /* + * search a valid state list for inbound packet. + * the search order is not important. + */ + match = NULL; + matchidx = arraysize; + LIST_FOREACH(sav, &spihash[SPIHASH(spi)], spihash) { + if (sav->spi != spi) + continue; + if (proto != sav->sah->saidx.proto) + continue; + if (family != sav->sah->saidx.src.ss_family || + family != sav->sah->saidx.dst.ss_family) + continue; + tmpidx = arraysize; + for (stateidx = 0; stateidx < matchidx; stateidx++) { state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, "key_allocsav"); - if (proto != sav->sah->saidx.proto) - continue; - if (spi != sav->spi) - continue; - if (family != sav->sah->saidx.src.ss_family || - family != sav->sah->saidx.dst.ss_family) - continue; + if (sav->state == state) { + tmpidx = stateidx; + break; + } + } + if (tmpidx >= matchidx) + continue; #if 0 /* don't check src */ - /* check src address */ - switch (family) { - case AF_INET: - bzero(&sin, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - bcopy(src, &sin.sin_addr, - sizeof(sin.sin_addr)); - if (key_sockaddrcmp((struct sockaddr*)&sin, - (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) - continue; - - break; - case AF_INET6: - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - bcopy(src, &sin6.sin6_addr, - sizeof(sin6.sin6_addr)); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { - /* kame fake scopeid */ - sin6.sin6_scope_id = - ntohs(sin6.sin6_addr.s6_addr16[1]); - sin6.sin6_addr.s6_addr16[1] = 0; - } - if (key_sockaddrcmp((struct sockaddr*)&sin6, - (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) - continue; - break; - default: - ipseclog((LOG_DEBUG, "key_allocsa: " - "unknown address family=%d.\n", - family)); - continue; - } + /* check src address */ + switch (family) { + case AF_INET: + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + bcopy(src, &sin.sin_addr, + sizeof(sin.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&sin, + (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) + continue; + break; + case AF_INET6: + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + bcopy(src, &sin6.sin6_addr, + sizeof(sin6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { + /* kame fake scopeid */ + sin6.sin6_scope_id = + ntohs(sin6.sin6_addr.s6_addr16[1]); + sin6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&sin6, + (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) + continue; + break; + default: + ipseclog((LOG_DEBUG, "key_allocsa: " + "unknown address family=%d.\n", + family)); + continue; + } #endif - /* check dst address */ - switch (family) { - case AF_INET: - bzero(&sin, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - bcopy(dst, &sin.sin_addr, - sizeof(sin.sin_addr)); - if (key_sockaddrcmp((struct sockaddr*)&sin, - (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) - continue; - - break; - case AF_INET6: - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - bcopy(dst, &sin6.sin6_addr, - sizeof(sin6.sin6_addr)); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { - /* kame fake scopeid */ - sin6.sin6_scope_id = - ntohs(sin6.sin6_addr.s6_addr16[1]); - sin6.sin6_addr.s6_addr16[1] = 0; - } - if (key_sockaddrcmp((struct sockaddr*)&sin6, - (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) - continue; - break; - default: - ipseclog((LOG_DEBUG, "key_allocsa: " - "unknown address family=%d.\n", - family)); - continue; - } + /* check dst address */ + switch (family) { + case AF_INET: + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + bcopy(dst, &sin.sin_addr, + sizeof(sin.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&sin, + (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) + continue; - goto found; - } + break; + case AF_INET6: + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + bcopy(dst, &sin6.sin6_addr, + sizeof(sin6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { + /* kame fake scopeid */ + sin6.sin6_scope_id = + ntohs(sin6.sin6_addr.s6_addr16[1]); + sin6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&sin6, + (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) + continue; + break; + default: + ipseclog((LOG_DEBUG, "key_allocsa: " + "unknown address family=%d.\n", family)); + continue; } + + match = sav; + matchidx = tmpidx; } + if (match) + goto found; /* not found */ - splx(s); return NULL; found: - sav->refcnt++; - splx(s); + match->refcnt++; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP allocsa cause refcnt++:%d SA:%p\n", - sav->refcnt, sav)); - return sav; + match->refcnt, match)); + return match; } /* @@ -1076,6 +1125,8 @@ void key_freesp(sp) struct secpolicy *sp; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sp == NULL) panic("key_freesp: NULL pointer is passed.\n"); @@ -1091,6 +1142,7 @@ key_freesp(sp) return; } +#if 0 /* * Must be called after calling key_allocsp(). * For the packet with socket. @@ -1099,6 +1151,8 @@ void key_freeso(so) struct socket *so; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL) panic("key_freeso: NULL pointer is passed.\n"); @@ -1148,11 +1202,15 @@ key_freeso(so) return; } +#endif static void key_freesp_so(sp) struct secpolicy **sp; { + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sp == NULL || *sp == NULL) panic("key_freesp_so: sp == NULL\n"); @@ -1183,6 +1241,8 @@ void key_freesav(sav) struct secasvar *sav; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sav == NULL) panic("key_freesav: NULL pointer is passed.\n"); @@ -1208,6 +1268,8 @@ key_delsp(sp) { int s; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sp == NULL) panic("key_delsp: NULL pointer is passed.\n"); @@ -1258,6 +1320,8 @@ key_getsp(spidx) { struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (spidx == NULL) panic("key_getsp: NULL pointer is passed.\n"); @@ -1285,6 +1349,8 @@ key_getspbyid(id) { struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(sp, &sptree[IPSEC_DIR_INBOUND], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; @@ -1334,6 +1400,8 @@ key_msg2sp(xpl0, len, error) { struct secpolicy *newsp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (xpl0 == NULL) panic("key_msg2sp: NULL pointer was passed.\n"); @@ -1551,6 +1619,8 @@ key_newreqid() { static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + auto_reqid = (auto_reqid == ~0 ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); @@ -1571,6 +1641,8 @@ key_sp2msg(sp) caddr_t p; struct mbuf *m; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check. */ if (sp == NULL) panic("key_sp2msg: NULL pointer was passed.\n"); @@ -1729,6 +1801,8 @@ key_spdadd(so, m, mhp) struct timeval tv; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdadd: NULL pointer is passed.\n"); @@ -1949,6 +2023,8 @@ key_getnewspid() int count = key_spi_trycnt; /* XXX */ struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* when requesting to allocate spi ranged */ while (count--) { newid = (policy_id = (policy_id == ~0 ? 1 : policy_id + 1)); @@ -1990,6 +2066,8 @@ key_spddelete(so, m, mhp) struct secpolicyindex spidx; struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddelete: NULL pointer is passed.\n"); @@ -2084,6 +2162,8 @@ key_spddelete2(so, m, mhp) u_int32_t id; struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddelete2: NULL pointer is passed.\n"); @@ -2181,6 +2261,8 @@ key_spdget(so, m, mhp) struct secpolicy *sp; struct mbuf *n; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdget: NULL pointer is passed.\n"); @@ -2213,7 +2295,7 @@ key_spdget(so, m, mhp) * send * * to KMD, and expect to receive - * with SADB_X_SPDACQUIRE if error occured, + * with SADB_X_SPDACQUIRE if error occurred, * or * * with SADB_X_SPDUPDATE from KMD by PF_KEY. @@ -2230,6 +2312,8 @@ key_spdacquire(sp) struct secspacq *newspacq; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sp == NULL) panic("key_spdacquire: NULL pointer is passed.\n"); @@ -2302,6 +2386,8 @@ key_spdflush(so, m, mhp) struct secpolicy *sp; u_int dir; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdflush: NULL pointer is passed.\n"); @@ -2353,6 +2439,8 @@ key_spddump(so, m, mhp) u_int dir; struct mbuf *n; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddump: NULL pointer is passed.\n"); @@ -2391,6 +2479,8 @@ key_setdumpsp(sp, type, seq, pid) { struct mbuf *result = NULL, *m; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid, sp->refcnt); if (!m) goto fail; @@ -2447,6 +2537,8 @@ key_getspreqmsglen(sp) { u_int tlen; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + tlen = sizeof(struct sadb_x_policy); /* if is the policy for ipsec ? */ @@ -2489,8 +2581,7 @@ key_spdexpire(sp) int error = -1; struct sadb_lifetime *lt; - /* XXX: Why do we lock ? */ - s = splnet(); /*called from softclock()*/ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* sanity check */ if (sp == NULL) @@ -2583,7 +2674,6 @@ key_spdexpire(sp) fail: if (result) m_freem(result); - splx(s); return error; } @@ -2599,6 +2689,8 @@ key_newsah(saidx) { struct secashead *newsah; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (saidx == NULL) panic("key_newsaidx: NULL pointer is passed.\n"); @@ -2628,6 +2720,8 @@ key_delsah(sah) int s; int zombie = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sah == NULL) panic("key_delsah: NULL pointer is passed.\n"); @@ -2706,6 +2800,8 @@ key_newsav(m, mhp, sah, errp) struct secasvar *newsav; const struct sadb_sa *xsa; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || mhp == NULL || mhp->msg == NULL || sah == NULL) panic("key_newsa: NULL pointer is passed.\n"); @@ -2720,7 +2816,7 @@ key_newsav(m, mhp, sah, errp) switch (mhp->msg->sadb_msg_type) { case SADB_GETSPI: - newsav->spi = 0; + key_setspi(newsav, 0); #if IPSEC_DOSEQCHECK /* sync sequence number */ @@ -2741,7 +2837,7 @@ key_newsav(m, mhp, sah, errp) return NULL; } xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - newsav->spi = xsa->sadb_sa_spi; + key_setspi(newsav, xsa->sadb_sa_spi); newsav->seq = mhp->msg->sadb_msg_seq; break; default: @@ -2785,6 +2881,8 @@ static void key_delsav(sav) struct secasvar *sav; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sav == NULL) panic("key_delsav: NULL pointer is passed.\n"); @@ -2795,6 +2893,9 @@ key_delsav(sav) /* remove from SA header */ if (__LIST_CHAINED(sav)) LIST_REMOVE(sav, chain); + + if (sav->spihash.le_prev || sav->spihash.le_next) + LIST_REMOVE(sav, spihash); if (sav->key_auth != NULL) { bzero(_KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth)); @@ -2849,6 +2950,8 @@ key_getsah(saidx) { struct secashead *sah; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; @@ -2871,8 +2974,10 @@ key_checkspidup(saidx, spi) struct secasindex *saidx; u_int32_t spi; { - struct secashead *sah; struct secasvar *sav; + u_int stateidx, state; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* check address family */ if (saidx->src.ss_family != saidx->dst.ss_family) { @@ -2881,17 +2986,35 @@ key_checkspidup(saidx, spi) } /* check all SAD */ - LIST_FOREACH(sah, &sahtree, chain) { - if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst)) + LIST_FOREACH(sav, &spihash[SPIHASH(spi)], spihash) { + if (sav->spi != spi) continue; - sav = key_getsavbyspi(sah, spi); - if (sav != NULL) - return sav; + for (stateidx = 0; + stateidx < _ARRAYLEN(saorder_state_alive); + stateidx++) { + state = saorder_state_alive[stateidx]; + if (sav->state == state && + key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) + return sav; + } } return NULL; } +static void +key_setspi(sav, spi) + struct secasvar *sav; + u_int32_t spi; +{ + + sav->spi = spi; + if (sav->spihash.le_prev || sav->spihash.le_next) + LIST_REMOVE(sav, spihash); + LIST_INSERT_HEAD(&spihash[SPIHASH(spi)], sav, spihash); +} + + /* * search SAD litmited alive SA, protocol, SPI. * OUT: @@ -2903,31 +3026,27 @@ key_getsavbyspi(sah, spi) struct secashead *sah; u_int32_t spi; { - struct secasvar *sav; - u_int stateidx, state; - - /* search all status */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - - state = saorder_state_alive[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "key_getsavbyspi: " - "invalid sav->state (queue: %d SA: %d)\n", - state, sav->state)); - continue; + struct secasvar *sav, *match; + u_int stateidx, state, matchidx; + + match = NULL; + matchidx = _ARRAYLEN(saorder_state_alive); + LIST_FOREACH(sav, &spihash[SPIHASH(spi)], spihash) { + if (sav->spi != spi) + continue; + if (sav->sah != sah) + continue; + for (stateidx = 0; stateidx < matchidx; stateidx++) { + state = saorder_state_alive[stateidx]; + if (sav->state == state) { + match = sav; + matchidx = stateidx; + break; } - - if (sav->spi == spi) - return sav; } } - return NULL; + return match; } /* @@ -2950,6 +3069,8 @@ key_setsaval(sav, m, mhp) int error = 0; struct timeval tv; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_setsaval: NULL pointer is passed.\n"); @@ -3235,6 +3356,8 @@ key_mature(sav) mature = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* check SPI value */ switch (sav->sah->saidx.proto) { case IPPROTO_ESP: @@ -3419,6 +3542,8 @@ key_setdumpsa(sav, type, satype, seq, pid) SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY, }; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt); if (m == NULL) goto fail; @@ -3825,6 +3950,7 @@ key_ismyaddr(sa) switch (sa->sa_family) { #if INET case AF_INET: + lck_mtx_lock(rt_mtx); sin = (struct sockaddr_in *)sa; for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) @@ -3833,9 +3959,11 @@ key_ismyaddr(sa) sin->sin_len == ia->ia_addr.sin_len && sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) { + lck_mtx_unlock(rt_mtx); return 1; } } + lck_mtx_unlock(rt_mtx); break; #endif #if INET6 @@ -3863,10 +3991,13 @@ key_ismyaddr6(sin6) struct in6_ifaddr *ia; struct in6_multi *in6m; - for (ia = in6_ifaddr; ia; ia = ia->ia_next) { + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia; ia = ia->ia_next) { if (key_sockaddrcmp((struct sockaddr *)&sin6, - (struct sockaddr *)&ia->ia_addr, 0) == 0) + (struct sockaddr *)&ia->ia_addr, 0) == 0) { + lck_mtx_unlock(nd6_mutex); return 1; + } /* * XXX Multicast @@ -3876,9 +4007,12 @@ key_ismyaddr6(sin6) */ in6m = NULL; IN6_LOOKUP_MULTI(sin6->sin6_addr, ia->ia_ifp, in6m); - if (in6m) + if (in6m) { + lck_mtx_unlock(nd6_mutex); return 1; + } } + lck_mtx_unlock(nd6_mutex); /* loopback, just for safety */ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) @@ -4187,18 +4321,6 @@ key_bbcmp(p1, p2, bits) * and do to remove or to expire. * XXX: year 2038 problem may remain. */ -void -key_timehandler_funnel(void) -{ -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); -#endif - key_timehandler(); -#ifdef __APPLE__ - (void) thread_funnel_set(network_flock, FALSE); -#endif -} void key_timehandler(void) @@ -4209,8 +4331,7 @@ key_timehandler(void) microtime(&tv); - s = splnet(); /*called from softclock()*/ - + lck_mtx_lock(sadb_mutex); /* SPD */ { struct secpolicy *sp, *nextsp; @@ -4472,12 +4593,12 @@ key_timehandler(void) natt_now++; + lck_mtx_unlock(sadb_mutex); #ifndef IPSEC_DEBUG2 /* do exchange to tick time !! */ - (void)timeout((void *)key_timehandler_funnel, (void *)0, hz); + (void)timeout((void *)key_timehandler, (void *)0, hz); #endif /* IPSEC_DEBUG2 */ - splx(s); return; } @@ -4618,6 +4739,8 @@ key_getspi(so, m, mhp) u_int32_t reqid; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_getspi: NULL pointer is passed.\n"); @@ -4710,7 +4833,7 @@ key_getspi(so, m, mhp) } /* set spi */ - newsav->spi = htonl(spi); + key_setspi(newsav, htonl(spi)); #ifndef IPSEC_NONBLOCK_ACQUIRE /* delete the entry in acqtree */ @@ -4811,6 +4934,8 @@ key_do_getnewspi(spirange, saidx) u_int32_t min, max; int count = key_spi_trycnt; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* set spi range to allocate */ if (spirange != NULL) { min = spirange->sadb_spirange_min; @@ -4896,6 +5021,8 @@ key_update(so, m, mhp) u_int32_t reqid; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_update: NULL pointer is passed.\n"); @@ -5003,9 +5130,9 @@ key_update(so, m, mhp) } /* check SA values to be mature. */ - if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { + if ((error = key_mature(sav)) != 0) { key_freesav(sav); - return key_senderror(so, m, 0); + return key_senderror(so, m, error); } { @@ -5039,6 +5166,8 @@ key_getsavbyseq(sah, seq) struct secasvar *sav; u_int state; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + state = SADB_SASTATE_LARVAL; /* search SAD with sequence number ? */ @@ -5091,6 +5220,8 @@ key_add(so, m, mhp) u_int32_t reqid; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_add: NULL pointer is passed.\n"); @@ -5200,6 +5331,8 @@ key_setident(sah, m, mhp) const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sah == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_setident: NULL pointer is passed.\n"); @@ -5296,8 +5429,8 @@ key_getmsgbuf_x1(m, mhp) return n; } -static int key_delete_all __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *, u_int16_t)); +static int key_delete_all(struct socket *, struct mbuf *, + const struct sadb_msghdr *, u_int16_t); /* * SADB_DELETE processing @@ -5323,6 +5456,8 @@ key_delete(so, m, mhp) struct secasvar *sav = NULL; u_int16_t proto; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_delete: NULL pointer is passed.\n"); @@ -5427,6 +5562,8 @@ key_delete_all(so, m, mhp, proto) struct secasvar *sav, *nextsav; u_int stateidx, state; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); @@ -5513,6 +5650,8 @@ key_get(so, m, mhp) struct secasvar *sav = NULL; u_int16_t proto; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_get: NULL pointer is passed.\n"); @@ -5839,7 +5978,7 @@ key_getprop(saidx) * * to KMD, and expect to receive - * with SADB_ACQUIRE if error occured, + * with SADB_ACQUIRE if error occurred, * or * with SADB_GETSPI * from KMD by PF_KEY. @@ -5866,6 +6005,8 @@ key_acquire(saidx, sp) int error = -1; u_int32_t seq; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (saidx == NULL) panic("key_acquire: NULL pointer is passed.\n"); @@ -6065,6 +6206,8 @@ key_getacq(saidx) { struct secacq *acq; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(acq, &acqtree, chain) { if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) return acq; @@ -6079,6 +6222,8 @@ key_getacqbyseq(seq) { struct secacq *acq; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(acq, &acqtree, chain) { if (acq->seq == seq) return acq; @@ -6118,6 +6263,8 @@ key_getspacq(spidx) { struct secspacq *acq; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + LIST_FOREACH(acq, &spacqtree, chain) { if (key_cmpspidx_exactly(spidx, &acq->spidx)) return acq; @@ -6152,15 +6299,17 @@ key_acquire2(so, m, mhp) u_int16_t proto; int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_acquire2: NULL pointer is passed.\n"); /* * Error message from KMd. - * We assume that if error was occured in IKEd, the length of PFKEY + * We assume that if error was occurred in IKEd, the length of PFKEY * message is equal to the size of sadb_msg structure. - * We do not raise error even if error occured in this function. + * We do not raise error even if error occurred in this function. */ if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { #ifndef IPSEC_NONBLOCK_ACQUIRE @@ -6266,6 +6415,8 @@ key_register(so, m, mhp) { struct secreg *reg, *newreg = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_register: NULL pointer is passed.\n"); @@ -6294,8 +6445,10 @@ key_register(so, m, mhp) } bzero((caddr_t)newreg, sizeof(*newreg)); + socket_lock(so, 1); newreg->so = so; ((struct keycb *)sotorawcb(so))->kp_registered++; + socket_unlock(so, 1); /* add regnode to regtree. */ LIST_INSERT_HEAD(®tree[mhp->msg->sadb_msg_satype], newreg, chain); @@ -6429,6 +6582,8 @@ key_freereg(so) struct secreg *reg; int i; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL) panic("key_freereg: NULL pointer is passed.\n"); @@ -6473,8 +6628,7 @@ key_expire(sav) int error = -1; struct sadb_lifetime *lt; - /* XXX: Why do we lock ? */ - s = splnet(); /*called from softclock()*/ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* sanity check */ if (sav == NULL) @@ -6606,6 +6760,8 @@ key_flush(so, m, mhp) u_int8_t state; u_int stateidx; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || mhp == NULL || mhp->msg == NULL) panic("key_flush: NULL pointer is passed.\n"); @@ -6689,6 +6845,8 @@ key_dump(so, m, mhp) struct sadb_msg *newmsg; struct mbuf *n; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_dump: NULL pointer is passed.\n"); @@ -6764,6 +6922,8 @@ key_promisc(so, m, mhp) { int olen; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_promisc: NULL pointer is passed.\n"); @@ -6780,7 +6940,8 @@ key_promisc(so, m, mhp) } else if (olen == sizeof(struct sadb_msg)) { /* enable/disable promisc mode */ struct keycb *kp; - + + socket_lock(so, 1); if ((kp = (struct keycb *)sotorawcb(so)) == NULL) return key_senderror(so, m, EINVAL); mhp->msg->sadb_msg_errno = 0; @@ -6790,8 +6951,10 @@ key_promisc(so, m, mhp) kp->kp_promisc = mhp->msg->sadb_msg_satype; break; default: + socket_unlock(so, 1); return key_senderror(so, m, EINVAL); } + socket_unlock(so, 1); /* send the original message back to everyone */ mhp->msg->sadb_msg_errno = 0; @@ -6806,8 +6969,8 @@ key_promisc(so, m, mhp) } } -static int (*key_typesw[]) __P((struct socket *, struct mbuf *, - const struct sadb_msghdr *)) = { +static int (*key_typesw[])(struct socket *, struct mbuf *, + const struct sadb_msghdr *) = { NULL, /* SADB_RESERVED */ key_getspi, /* SADB_GETSPI */ key_update, /* SADB_UPDATE */ @@ -6855,6 +7018,8 @@ key_parse(m, so) int error; int target; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (m == NULL || so == NULL) panic("key_parse: NULL pointer is passed.\n"); @@ -7105,6 +7270,8 @@ key_senderror(so, m, code) { struct sadb_msg *msg; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (m->m_len < sizeof(struct sadb_msg)) panic("invalid mbuf passed to key_senderror"); @@ -7283,7 +7450,7 @@ key_validate_ext(ext, len) } void -key_init() +key_domain_init() { int i; @@ -7315,7 +7482,7 @@ key_init() #endif #ifndef IPSEC_DEBUG2 - timeout((void *)key_timehandler_funnel, (void *)0, hz); + timeout((void *)key_timehandler, (void *)0, hz); #endif /*IPSEC_DEBUG2*/ /* initialize key statistics */ @@ -7343,6 +7510,8 @@ key_checktunnelsanity(sav, family, src, dst) caddr_t src; caddr_t dst; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + /* sanity check */ if (sav->sah == NULL) panic("sav->sah == NULL at key_checktunnelsanity"); @@ -7422,6 +7591,8 @@ key_sa_recordxfer(sav, m) struct secasvar *sav; struct mbuf *m; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (!sav) panic("key_sa_recordxfer called with sav == NULL"); if (!m) @@ -7473,6 +7644,7 @@ key_sa_routechange(dst) struct secashead *sah; struct route *ro; + lck_mtx_lock(sadb_mutex); LIST_FOREACH(sah, &sahtree, chain) { ro = &sah->sa_route; if (ro->ro_rt && dst->sa_len == ro->ro_dst.sa_len @@ -7481,6 +7653,7 @@ key_sa_routechange(dst) ro->ro_rt = (struct rtentry *)NULL; } } + lck_mtx_unlock(sadb_mutex); return; } @@ -7490,6 +7663,8 @@ key_sa_chgstate(sav, state) struct secasvar *sav; u_int8_t state; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (sav == NULL) panic("key_sa_chgstate called with sav == NULL"); @@ -7508,6 +7683,8 @@ key_sa_stir_iv(sav) struct secasvar *sav; { + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); + if (!sav->iv) panic("key_sa_stir_iv called with sav == NULL"); key_randomfill(sav->iv, sav->ivlen); diff --git a/bsd/netkey/key.h b/bsd/netkey/key.h index 99cf8bee4..ce509fa9f 100644 --- a/bsd/netkey/key.h +++ b/bsd/netkey/key.h @@ -33,8 +33,7 @@ #define _NETKEY_KEY_H_ #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern struct key_cb key_cb; @@ -47,34 +46,30 @@ struct socket; struct sadb_msg; struct sadb_x_policy; -extern struct secpolicy *key_allocsp __P((struct secpolicyindex *, u_int)); -extern struct secpolicy *key_gettunnel __P((struct sockaddr *, - struct sockaddr *, struct sockaddr *, struct sockaddr *)); -extern int key_checkrequest - __P((struct ipsecrequest *isr, struct secasindex *)); -extern struct secasvar *key_allocsa __P((u_int, caddr_t, caddr_t, - u_int, u_int32_t)); -extern void key_freesp __P((struct secpolicy *)); -extern void key_freeso __P((struct socket *)); -extern void key_freesav __P((struct secasvar *)); -extern struct secpolicy *key_newsp __P((void)); -extern struct secpolicy *key_msg2sp __P((struct sadb_x_policy *, - size_t, int *)); -extern struct mbuf *key_sp2msg __P((struct secpolicy *)); -extern int key_ismyaddr __P((struct sockaddr *)); -extern int key_spdacquire __P((struct secpolicy *)); -extern void key_timehandler __P((void)); -extern u_long key_random __P((void)); -extern void key_randomfill __P((void *, size_t)); -extern void key_freereg __P((struct socket *)); -extern int key_parse __P((struct mbuf *, struct socket *)); -extern void key_init __P((void)); -extern int key_checktunnelsanity __P((struct secasvar *, u_int, - caddr_t, caddr_t)); -extern void key_sa_recordxfer __P((struct secasvar *, struct mbuf *)); -extern void key_sa_routechange __P((struct sockaddr *)); -extern void key_sa_stir_iv __P((struct secasvar *)); +extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +extern struct secpolicy *key_gettunnel(struct sockaddr *, + struct sockaddr *, struct sockaddr *, struct sockaddr *); +extern int key_checkrequest(struct ipsecrequest *isr, struct secasindex *); +extern struct secasvar *key_allocsa(u_int, caddr_t, caddr_t, + u_int, u_int32_t); +extern void key_freesp(struct secpolicy *); +extern void key_freeso(struct socket *); +extern void key_freesav(struct secasvar *); +extern struct secpolicy *key_newsp(void); +extern struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); +extern struct mbuf *key_sp2msg(struct secpolicy *); +extern int key_ismyaddr(struct sockaddr *); +extern int key_spdacquire(struct secpolicy *); +extern void key_timehandler(void); +extern u_long key_random(void); +extern void key_randomfill(void *, size_t); +extern void key_freereg(struct socket *); +extern int key_parse(struct mbuf *, struct socket *); +extern void key_domain_init(void); +extern int key_checktunnelsanity(struct secasvar *, u_int, caddr_t, caddr_t); +extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); +extern void key_sa_routechange(struct sockaddr *); +extern void key_sa_stir_iv(struct secasvar *); -#endif /* __APPLE_API_PRIVATE */ -#endif /* defined(_KERNEL) */ +#endif /* KERNEL_PRIVATE */ #endif /* _NETKEY_KEY_H_ */ diff --git a/bsd/netkey/key_debug.c b/bsd/netkey/key_debug.c index 6db61a105..5c3acffdc 100644 --- a/bsd/netkey/key_debug.c +++ b/bsd/netkey/key_debug.c @@ -55,17 +55,17 @@ #if !defined(KERNEL) || (defined(KERNEL) && defined(IPSEC_DEBUG)) -static void kdebug_sadb_prop __P((struct sadb_ext *)); -static void kdebug_sadb_identity __P((struct sadb_ext *)); -static void kdebug_sadb_supported __P((struct sadb_ext *)); -static void kdebug_sadb_lifetime __P((struct sadb_ext *)); -static void kdebug_sadb_sa __P((struct sadb_ext *)); -static void kdebug_sadb_address __P((struct sadb_ext *)); -static void kdebug_sadb_key __P((struct sadb_ext *)); -static void kdebug_sadb_x_sa2 __P((struct sadb_ext *)); +static void kdebug_sadb_prop(struct sadb_ext *); +static void kdebug_sadb_identity(struct sadb_ext *); +static void kdebug_sadb_supported(struct sadb_ext *); +static void kdebug_sadb_lifetime(struct sadb_ext *); +static void kdebug_sadb_sa(struct sadb_ext *); +static void kdebug_sadb_address(struct sadb_ext *); +static void kdebug_sadb_key(struct sadb_ext *); +static void kdebug_sadb_x_sa2(struct sadb_ext *); #ifdef KERNEL -static void kdebug_secreplay __P((struct secreplay *)); +static void kdebug_secreplay(struct secreplay *); #endif #ifndef KERNEL diff --git a/bsd/netkey/key_debug.h b/bsd/netkey/key_debug.h index ebbf17f04..a9f823d77 100644 --- a/bsd/netkey/key_debug.h +++ b/bsd/netkey/key_debug.h @@ -33,8 +33,6 @@ #define _NETKEY_KEY_DEBUG_H_ #include -#if !defined(KERNEL) || (defined(KERNEL) && defined(IPSEC_DEBUG)) - /* debug flags */ #define KEYDEBUG_STAMP 0x00000001 /* path */ #define KEYDEBUG_DATA 0x00000002 /* data */ @@ -59,11 +57,10 @@ struct sadb_msg; struct sadb_ext; -extern void kdebug_sadb __P((struct sadb_msg *)); -extern void kdebug_sadb_x_policy __P((struct sadb_ext *)); +extern void kdebug_sadb(struct sadb_msg *); +extern void kdebug_sadb_x_policy(struct sadb_ext *); #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern u_int32_t key_debug_level; struct secpolicy; @@ -72,26 +69,21 @@ struct secasindex; struct secasvar; struct secreplay; struct mbuf; -extern void kdebug_secpolicy __P((struct secpolicy *)); -extern void kdebug_secpolicyindex __P((struct secpolicyindex *)); -extern void kdebug_secasindex __P((struct secasindex *)); -extern void kdebug_secasv __P((struct secasvar *)); -extern void kdebug_mbufhdr __P((struct mbuf *)); -extern void kdebug_mbuf __P((struct mbuf *)); -#endif /* __APPLE_API_PRIVATE */ -#endif /*KERNEL*/ +extern void kdebug_secpolicy(struct secpolicy *); +extern void kdebug_secpolicyindex(struct secpolicyindex *); +extern void kdebug_secasindex(struct secasindex *); +extern void kdebug_secasv(struct secasvar *); +extern void kdebug_mbufhdr(struct mbuf *); +extern void kdebug_mbuf(struct mbuf *); +#endif KERNEL struct sockaddr; -extern void kdebug_sockaddr __P((struct sockaddr *)); - -extern void ipsec_hexdump __P((caddr_t, int)); -extern void ipsec_bindump __P((caddr_t, int)); +extern void kdebug_sockaddr(struct sockaddr *); -#else +extern void ipsec_hexdump(caddr_t, int); +extern void ipsec_bindump(caddr_t, int); #define KEYDEBUG(lev,arg) -#endif /*!defined(KERNEL) || (defined(KERNEL) && defined(IPSEC_DEBUG))*/ - #endif /* _NETKEY_KEY_DEBUG_H_ */ diff --git a/bsd/netkey/key_var.h b/bsd/netkey/key_var.h index aa7d7f677..7f3f45c52 100644 --- a/bsd/netkey/key_var.h +++ b/bsd/netkey/key_var.h @@ -32,7 +32,6 @@ #ifndef _NETKEY_KEY_VAR_H_ #define _NETKEY_KEY_VAR_H_ #include -#ifdef __APPLE_API_PRIVATE /* sysctl */ #define KEYCTL_DEBUG_LEVEL 1 @@ -47,9 +46,11 @@ #define KEYCTL_ESP_AUTH 10 #define KEYCTL_AH_KEYMIN 11 #define KEYCTL_PREFERED_OLDSA 12 -#define KEYCTL_NATT_KEEPALIVE_INTERVAL 13 -#define KEYCTL_MAXID 14 +#define KEYCTL_NATT_KEEPALIVE_INTERVAL 13 +#define KEYCTL_PFKEYSTAT 14 +#define KEYCTL_MAXID 15 +#ifdef KERNEL_PRIVATE #define KEYCTL_NAMES { \ { 0, 0 }, \ { "debug", CTLTYPE_INT }, \ @@ -65,6 +66,7 @@ { "ah_keymin", CTLTYPE_INT }, \ { "prefered_oldsa", CTLTYPE_INT }, \ { "natt_keepalive_interval", CTLTYPE_INT }, \ + { "pfkeystat", CTLTYPE_STRUCT }, \ } #define KEYCTL_VARS { \ @@ -90,5 +92,6 @@ #define _KEYBUF(key) ((caddr_t)((caddr_t)(key) + sizeof(struct sadb_key))) #endif /*KERNEL*/ -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* _NETKEY_KEY_VAR_H_ */ + diff --git a/bsd/netkey/keydb.c b/bsd/netkey/keydb.c index a96589042..8da09b877 100644 --- a/bsd/netkey/keydb.c +++ b/bsd/netkey/keydb.c @@ -51,7 +51,7 @@ MALLOC_DEFINE(M_SECA, "key mgmt", "security associations, key management"); -static void keydb_delsecasvar __P((struct secasvar *)); +static void keydb_delsecasvar(struct secasvar *); /* * secpolicy management diff --git a/bsd/netkey/keydb.h b/bsd/netkey/keydb.h index 8c70fa95a..80b16054f 100644 --- a/bsd/netkey/keydb.h +++ b/bsd/netkey/keydb.h @@ -34,7 +34,7 @@ #include #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #include @@ -70,7 +70,7 @@ struct secashead { /* Security Association */ struct secasvar { LIST_ENTRY(secasvar) chain; - + LIST_ENTRY(secasvar) spihash; int refcnt; /* reference count */ u_int8_t state; /* Status of this Association */ @@ -144,23 +144,23 @@ struct key_cb { }; /* secpolicy */ -extern struct secpolicy *keydb_newsecpolicy __P((void)); -extern void keydb_delsecpolicy __P((struct secpolicy *)); +extern struct secpolicy *keydb_newsecpolicy(void); +extern void keydb_delsecpolicy(struct secpolicy *); /* secashead */ -extern struct secashead *keydb_newsecashead __P((void)); -extern void keydb_delsecashead __P((struct secashead *)); +extern struct secashead *keydb_newsecashead(void); +extern void keydb_delsecashead(struct secashead *); /* secasvar */ -extern struct secasvar *keydb_newsecasvar __P((void)); -extern void keydb_refsecasvar __P((struct secasvar *)); -extern void keydb_freesecasvar __P((struct secasvar *)); +extern struct secasvar *keydb_newsecasvar(void); +extern void keydb_refsecasvar(struct secasvar *); +extern void keydb_freesecasvar(struct secasvar *); /* secreplay */ -extern struct secreplay *keydb_newsecreplay __P((size_t)); -extern void keydb_delsecreplay __P((struct secreplay *)); +extern struct secreplay *keydb_newsecreplay(size_t); +extern void keydb_delsecreplay(struct secreplay *); /* secreg */ -extern struct secreg *keydb_newsecreg __P((void)); -extern void keydb_delsecreg __P((struct secreg *)); +extern struct secreg *keydb_newsecreg(void); +extern void keydb_delsecreg(struct secreg *); -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* KERNEL */ #endif /* _NETKEY_KEYDB_H_ */ diff --git a/bsd/netkey/keysock.c b/bsd/netkey/keysock.c index 364473ea0..a65bcc690 100644 --- a/bsd/netkey/keysock.c +++ b/bsd/netkey/keysock.c @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -54,10 +55,14 @@ #include #include +extern lck_mtx_t *raw_mtx; +extern lck_mtx_t *sadb_mutex; +extern void key_init(void); + struct sockaddr key_dst = { 2, PF_KEY, }; struct sockaddr key_src = { 2, PF_KEY, }; -static int key_sendup0 __P((struct rawcb *, struct mbuf *, int)); +static int key_sendup0(struct rawcb *, struct mbuf *, int); struct pfkeystat pfkeystat; @@ -80,7 +85,6 @@ key_output(m, va_alist) { struct sadb_msg *msg; int len, error = 0; - int s; #ifndef __APPLE__ struct socket *so; va_list ap; @@ -93,6 +97,8 @@ key_output(m, va_alist) if (m == 0) panic("key_output: NULL pointer was passed.\n"); + socket_unlock(so, 0); + lck_mtx_lock(sadb_mutex); pfkeystat.out_total++; pfkeystat.out_bytes += m->m_pkthdr.len; @@ -135,14 +141,14 @@ key_output(m, va_alist) goto end; } - /*XXX giant lock*/ - s = splnet(); error = key_parse(m, so); m = NULL; - splx(s); + end: if (m) m_freem(m); + lck_mtx_unlock(sadb_mutex); + socket_lock(so, 0); return error; } @@ -157,6 +163,7 @@ key_sendup0(rp, m, promisc) { int error; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); if (promisc) { struct sadb_msg *pmsg; @@ -184,103 +191,18 @@ key_sendup0(rp, m, promisc) } if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src, - m, NULL)) { + m, NULL, &error)) { #if IPSEC_DEBUG printf("key_sendup0: sbappendaddr failed\n"); #endif pfkeystat.in_nomem++; - m_freem(m); - error = ENOBUFS; - } else - error = 0; - sorwakeup(rp->rcb_socket); + } + else { + sorwakeup(rp->rcb_socket); + } return error; } -/* XXX this interface should be obsoleted. */ -int -key_sendup(so, msg, len, target) - struct socket *so; - struct sadb_msg *msg; - u_int len; - int target; /*target of the resulting message*/ -{ - struct mbuf *m, *n, *mprev; - int tlen; - - /* sanity check */ - if (so == 0 || msg == 0) - panic("key_sendup: NULL pointer was passed.\n"); - - KEYDEBUG(KEYDEBUG_KEY_DUMP, - printf("key_sendup: \n"); - kdebug_sadb(msg)); - - /* - * we increment statistics here, just in case we have ENOBUFS - * in this function. - */ - pfkeystat.in_total++; - pfkeystat.in_bytes += len; - pfkeystat.in_msgtype[msg->sadb_msg_type]++; - - /* - * Get mbuf chain whenever possible (not clusters), - * to save socket buffer. We'll be generating many SADB_ACQUIRE - * messages to listening key sockets. If we simply allocate clusters, - * sbappendaddr() will raise ENOBUFS due to too little sbspace(). - * sbspace() computes # of actual data bytes AND mbuf region. - * - * TODO: SADB_ACQUIRE filters should be implemented. - */ - tlen = len; - m = mprev = NULL; - while (tlen > 0) { - if (tlen == len) { - MGETHDR(n, M_DONTWAIT, MT_DATA); - n->m_len = MHLEN; - } else { - MGET(n, M_DONTWAIT, MT_DATA); - n->m_len = MLEN; - } - if (!n) { - pfkeystat.in_nomem++; - return ENOBUFS; - } - if (tlen >= MCLBYTES) { /*XXX better threshold? */ - MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { - m_free(n); - m_freem(m); - pfkeystat.in_nomem++; - return ENOBUFS; - } - n->m_len = MCLBYTES; - } - - if (tlen < n->m_len) - n->m_len = tlen; - n->m_next = NULL; - if (m == NULL) - m = mprev = n; - else { - mprev->m_next = n; - mprev = n; - } - tlen -= n->m_len; - n = NULL; - } - m->m_pkthdr.len = len; - m->m_pkthdr.rcvif = NULL; - m_copyback(m, 0, len, (caddr_t)msg); - - /* avoid duplicated statistics */ - pfkeystat.in_total--; - pfkeystat.in_bytes -= len; - pfkeystat.in_msgtype[msg->sadb_msg_type]--; - - return key_sendup_mbuf(so, m, target); -} /* so can be NULL if target != KEY_SENDUP_ONE */ int @@ -295,6 +217,7 @@ key_sendup_mbuf(so, m, target) struct rawcb *rp; int error = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); if (m == NULL) panic("key_sendup_mbuf: NULL pointer was passed.\n"); if (so == NULL && target == KEY_SENDUP_ONE) @@ -318,7 +241,8 @@ key_sendup_mbuf(so, m, target) msg = mtod(m, struct sadb_msg *); pfkeystat.in_msgtype[msg->sadb_msg_type]++; } - + + lck_mtx_lock(raw_mtx); LIST_FOREACH(rp, &rawcb_list, list) { if (rp->rcb_proto.sp_family != PF_KEY) @@ -329,7 +253,8 @@ key_sendup_mbuf(so, m, target) } kp = (struct keycb *)rp; - + + socket_lock(rp->rcb_socket, 1); /* * If you are in promiscuous mode, and when you get broadcasted * reply, you'll get two PF_KEY messages. @@ -343,15 +268,15 @@ key_sendup_mbuf(so, m, target) } /* the exact target will be processed later */ - if (so && sotorawcb(so) == rp) + if (so && sotorawcb(so) == rp) { + socket_unlock(rp->rcb_socket, 1); continue; + } sendup = 0; switch (target) { case KEY_SENDUP_ONE: /* the statement has no effect */ - if (so && sotorawcb(so) == rp) - sendup++; break; case KEY_SENDUP_ALL: sendup++; @@ -363,8 +288,12 @@ key_sendup_mbuf(so, m, target) } pfkeystat.in_msgtarget[target]++; - if (!sendup) + if (!sendup) { + socket_unlock(rp->rcb_socket, 1); continue; + } + else + sendup = 0; // clear for next iteration if ((n = m_copy(m, 0, (int)M_COPYALL)) == NULL) { #if IPSEC_DEBUG @@ -372,19 +301,26 @@ key_sendup_mbuf(so, m, target) #endif m_freem(m); pfkeystat.in_nomem++; + socket_unlock(rp->rcb_socket, 1); + lck_mtx_unlock(raw_mtx); return ENOBUFS; } - if ((error = key_sendup0(rp, n, 0)) != 0) { - m_freem(m); - return error; - } - + /* + * ignore error even if queue is full. PF_KEY does not + * guarantee the delivery of the message. + * this is important when target == KEY_SENDUP_ALL. + */ + key_sendup0(rp, n, 0); + socket_unlock(rp->rcb_socket, 1); n = NULL; } + lck_mtx_unlock(raw_mtx); if (so) { + socket_lock(so, 1); error = key_sendup0(sotorawcb(so), m, 0); + socket_unlock(so, 1); m = NULL; } else { error = 0; @@ -400,10 +336,8 @@ key_sendup_mbuf(so, m, target) static int key_abort(struct socket *so) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_abort(so); - splx(s); return error; } @@ -415,7 +349,7 @@ static int key_attach(struct socket *so, int proto, struct proc *p) { struct keycb *kp; - int s, error; + int error; if (sotorawcb(so) != 0) return EISCONN; /* XXX panic? */ @@ -424,36 +358,29 @@ key_attach(struct socket *so, int proto, struct proc *p) return ENOBUFS; bzero(kp, sizeof *kp); - /* - * The splnet() is necessary to block protocols from sending - * error notifications (like RTM_REDIRECT or RTM_LOSING) while - * this PCB is extant but incompletely initialized. - * Probably we should try to do more of this work beforehand and - * eliminate the spl. - */ - s = splnet(); so->so_pcb = (caddr_t)kp; + kp->kp_promisc = kp->kp_registered = 0; + kp->kp_raw.rcb_laddr = &key_src; + kp->kp_raw.rcb_faddr = &key_dst; + error = raw_usrreqs.pru_attach(so, proto, p); kp = (struct keycb *)sotorawcb(so); if (error) { _FREE(kp, M_PCB); so->so_pcb = (caddr_t) 0; - splx(s); + so->so_flags |= SOF_PCBCLEARING; printf("key_usrreq: key_usrreq results %d\n", error); return error; } - kp->kp_promisc = kp->kp_registered = 0; - + socket_lock(so, 1); if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */ key_cb.key_count++; key_cb.any_count++; - kp->kp_raw.rcb_laddr = &key_src; - kp->kp_raw.rcb_faddr = &key_dst; soisconnected(so); so->so_options |= SO_USELOOPBACK; + socket_unlock(so, 1); - splx(s); return 0; } @@ -464,10 +391,8 @@ key_attach(struct socket *so, int proto, struct proc *p) static int key_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */ - splx(s); return error; } @@ -478,10 +403,8 @@ key_bind(struct socket *so, struct sockaddr *nam, struct proc *p) static int key_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */ - splx(s); return error; } @@ -493,19 +416,19 @@ static int key_detach(struct socket *so) { struct keycb *kp = (struct keycb *)sotorawcb(so); - int s, error; + int error; - s = splnet(); if (kp != 0) { - if (kp->kp_raw.rcb_proto.sp_protocol - == PF_KEY) /* XXX: AF_KEY */ + if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */ key_cb.key_count--; key_cb.any_count--; - + socket_unlock(so, 0); + lck_mtx_lock(sadb_mutex); key_freereg(so); + lck_mtx_unlock(sadb_mutex); + socket_lock(so, 0); } error = raw_usrreqs.pru_detach(so); - splx(s); return error; } @@ -516,10 +439,8 @@ key_detach(struct socket *so) static int key_disconnect(struct socket *so) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_disconnect(so); - splx(s); return error; } @@ -530,10 +451,8 @@ key_disconnect(struct socket *so) static int key_peeraddr(struct socket *so, struct sockaddr **nam) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_peeraddr(so, nam); - splx(s); return error; } @@ -545,10 +464,8 @@ static int key_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_send(so, flags, m, nam, control, p); - splx(s); return error; } @@ -559,10 +476,8 @@ key_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, static int key_shutdown(struct socket *so) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_shutdown(so); - splx(s); return error; } @@ -573,10 +488,8 @@ key_shutdown(struct socket *so) static int key_sockaddr(struct socket *so, struct sockaddr **nam) { - int s, error; - s = splnet(); + int error; error = raw_usrreqs.pru_sockaddr(so, nam); - splx(s); return error; } @@ -587,7 +500,7 @@ struct pr_usrreqs key_usrreqs = { key_disconnect, pru_listen_notsupp, key_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp, key_send, pru_sense_null, key_shutdown, - key_sockaddr, sosend, soreceive, sopoll + key_sockaddr, sosend, soreceive, pru_sopoll_notsupp }; /* sysctl */ @@ -603,13 +516,15 @@ struct protosw keysw[] = { { SOCK_RAW, &keydomain, PF_KEY_V2, PR_ATOMIC|PR_ADDR, 0, key_output, raw_ctlinput, 0, 0, - raw_init, 0, 0, 0, - 0, &key_usrreqs + key_init, 0, 0, 0, + 0, + &key_usrreqs, + 0, 0, 0, } }; struct domain keydomain = - { PF_KEY, "key", key_init, 0, 0, + { PF_KEY, "key", key_domain_init, 0, 0, keysw, 0, 0,0, sizeof(struct key_cb), 0 diff --git a/bsd/netkey/keysock.h b/bsd/netkey/keysock.h index f176d1a20..c055f2488 100644 --- a/bsd/netkey/keysock.h +++ b/bsd/netkey/keysock.h @@ -62,8 +62,8 @@ struct pfkeystat { #define KEY_SENDUP_ALL 1 #define KEY_SENDUP_REGISTERED 2 +#ifdef KERNEL_PRIVATE #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE struct keycb { struct rawcb kp_raw; /* rawcb */ int kp_promisc; /* promiscuous mode */ @@ -73,16 +73,16 @@ struct keycb { extern struct pfkeystat pfkeystat; #ifdef __APPLE__ -extern int key_output __P((struct mbuf *, struct socket* so)); +extern int key_output(struct mbuf *, struct socket* so); #else -extern int key_output __P((struct mbuf *, ...)); +extern int key_output(struct mbuf *, ...); #endif -extern int key_usrreq __P((struct socket *, - int, struct mbuf *, struct mbuf *, struct mbuf *)); +extern int key_usrreq(struct socket *, + int, struct mbuf *, struct mbuf *, struct mbuf *); -extern int key_sendup __P((struct socket *, struct sadb_msg *, u_int, int)); -extern int key_sendup_mbuf __P((struct socket *, struct mbuf *, int)); -#endif /* __APPLE_API_PRIVATE */ +extern int key_sendup(struct socket *, struct sadb_msg *, u_int, int); +extern int key_sendup_mbuf(struct socket *, struct mbuf *, int); #endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /*_NETKEY_KEYSOCK_H_*/ diff --git a/bsd/nfs/Makefile b/bsd/nfs/Makefile index 55a7668eb..fa4b4198d 100644 --- a/bsd/nfs/Makefile +++ b/bsd/nfs/Makefile @@ -21,8 +21,8 @@ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ krpc.h nfs.h nfsdiskless.h nfsm_subs.h nfsmount.h nfsnode.h \ - nlminfo.h nfs_lock.h \ - nfsproto.h nfsrtt.h nfsrvcache.h nqnfs.h rpcv2.h xdr_subs.h + nfs_lock.h \ + nfsproto.h nfsrtt.h nfsrvcache.h rpcv2.h xdr_subs.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/nfs/krpc.h b/bsd/nfs/krpc.h index 16fe898a8..5d69fd6f6 100644 --- a/bsd/nfs/krpc.h +++ b/bsd/nfs/krpc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,13 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * - */ - + #ifndef __NFS_KRPC_H__ #define __NFS_KRPC_H__ @@ -35,12 +29,12 @@ #include #ifdef __APPLE_API_PRIVATE -int krpc_call __P((struct sockaddr_in *sin, +int krpc_call(struct sockaddr_in *sin, u_int sotype, u_int prog, u_int vers, u_int func, - struct mbuf **data, struct sockaddr_in **from)); + mbuf_t *data, struct sockaddr_in *from); -int krpc_portmap __P((struct sockaddr_in *sin, - u_int prog, u_int vers, u_int16_t *portp)); +int krpc_portmap(struct sockaddr_in *sin, + u_int prog, u_int vers, u_int proto, u_int16_t *portp); /* diff --git a/bsd/nfs/krpc_subr.c b/bsd/nfs/krpc_subr.c index 8726f59d9..694b152f5 100644 --- a/bsd/nfs/krpc_subr.c +++ b/bsd/nfs/krpc_subr.c @@ -64,12 +64,13 @@ #include #include #include -#include +#include #include #include #include #include #include +#include #include #include @@ -140,10 +141,10 @@ struct rpc_reply { * Returns non-zero error on failure. */ int -krpc_portmap(sin, prog, vers, portp) - struct sockaddr_in *sin; /* server address */ - u_int prog, vers; /* host order */ - u_int16_t *portp; /* network order */ +krpc_portmap(sin, prog, vers, proto, portp) + struct sockaddr_in *sin; /* server address */ + u_int prog, vers, proto; /* host order */ + u_int16_t *portp; /* network order */ { struct sdata { u_int32_t prog; /* call program */ @@ -155,7 +156,7 @@ krpc_portmap(sin, prog, vers, portp) u_int16_t pad; u_int16_t port; } *rdata; - struct mbuf *m; + mbuf_t m; int error; /* The portmapper port is fixed. */ @@ -164,30 +165,32 @@ krpc_portmap(sin, prog, vers, portp) return 0; } - m = m_gethdr(M_WAIT, MT_DATA); - if (m == NULL) - return ENOBUFS; - m->m_len = sizeof(*sdata); - m->m_pkthdr.len = m->m_len; - sdata = mtod(m, struct sdata *); + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &m); + if (error) + return error; + mbuf_setlen(m, sizeof(*sdata)); + mbuf_pkthdr_setlen(m, sizeof(*sdata)); + sdata = mbuf_data(m); /* Do the RPC to get it. */ sdata->prog = htonl(prog); sdata->vers = htonl(vers); - sdata->proto = htonl(IPPROTO_UDP); + sdata->proto = htonl(proto); sdata->port = 0; sin->sin_port = htons(PMAPPORT); - error = krpc_call(sin, PMAPPROG, PMAPVERS, - PMAPPROC_GETPORT, &m, NULL); + error = krpc_call(sin, SOCK_DGRAM, PMAPPROG, PMAPVERS, PMAPPROC_GETPORT, &m, NULL); if (error) return error; - rdata = mtod(m, struct rdata *); + rdata = mbuf_data(m); *portp = rdata->port; - m_freem(m); - return 0; + if (!rdata->port) + error = EPROGUNAVAIL; + + mbuf_freem(m); + return (error); } /* @@ -196,22 +199,21 @@ krpc_portmap(sin, prog, vers, portp) * the address from whence the response came is saved there. */ int -krpc_call(sa, prog, vers, func, data, from_p) +krpc_call(sa, sotype, prog, vers, func, data, from_p) struct sockaddr_in *sa; - u_int prog, vers, func; - struct mbuf **data; /* input/output */ - struct sockaddr_in **from_p; /* output */ + u_int sotype, prog, vers, func; + mbuf_t *data; /* input/output */ + struct sockaddr_in *from_p; /* output */ { - struct socket *so; + socket_t so; struct sockaddr_in *sin; - struct mbuf *m, *nam, *mhead, *mhck; + mbuf_t m, nam, mhead; struct rpc_call *call; struct rpc_reply *reply; - struct uio auio; - int error, rcvflg, timo, secs, len; + int error, timo, secs, len; static u_int32_t xid = ~0xFF; u_int16_t tport; - struct sockopt sopt; + int maxpacket = 1<<16; /* * Validate address family. @@ -222,13 +224,11 @@ krpc_call(sa, prog, vers, func, data, from_p) /* Free at end if not null. */ nam = mhead = NULL; - if (from_p) - *from_p = 0; /* * Create socket and set its recieve timeout. */ - if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0))) + if ((error = sock_socket(AF_INET, sotype, 0, 0, 0, &so))) goto out; { @@ -236,14 +236,8 @@ krpc_call(sa, prog, vers, func, data, from_p) tv.tv_sec = 1; tv.tv_usec = 0; - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_RCVTIMEO; - sopt.sopt_val = &tv; - sopt.sopt_valsize = sizeof tv; - - if (error = sosetopt(so, &sopt)) + + if ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) goto out; } @@ -252,12 +246,9 @@ krpc_call(sa, prog, vers, func, data, from_p) * Enable broadcast if necessary. */ - if (from_p) { + if (from_p && (sotype == SOCK_DGRAM)) { int on = 1; - sopt.sopt_name = SO_BROADCAST; - sopt.sopt_val = &on; - sopt.sopt_valsize = sizeof on; - if (error = sosetopt(so, &sopt)) + if ((error = sock_setsockopt(so, SOL_SOCKET, SO_BROADCAST, &on, sizeof(on)))) goto out; } @@ -266,19 +257,22 @@ krpc_call(sa, prog, vers, func, data, from_p) * because some NFS servers refuse requests from * non-reserved (non-privileged) ports. */ - m = m_getclr(M_WAIT, MT_SONAME); - sin = mtod(m, struct sockaddr_in *); - sin->sin_len = m->m_len = sizeof(*sin); + if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m))) + goto out; + sin = mbuf_data(m); + bzero(sin, sizeof(*sin)); + mbuf_setlen(m, sizeof(*sin)); + sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; tport = IPPORT_RESERVED; do { tport--; sin->sin_port = htons(tport); - error = sobind(so, mtod(m, struct sockaddr *)); + error = sock_bind(so, (struct sockaddr*)sin); } while (error == EADDRINUSE && tport > IPPORT_RESERVED / 2); - m_freem(m); + mbuf_freem(m); m = NULL; if (error) { printf("bind failed\n"); @@ -288,13 +282,27 @@ krpc_call(sa, prog, vers, func, data, from_p) /* * Setup socket address for the server. */ - nam = m_get(M_WAIT, MT_SONAME); - if (nam == NULL) { - error = ENOBUFS; + if ((error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &nam))) goto out; + sin = mbuf_data(nam); + mbuf_setlen(nam, sa->sin_len); + bcopy((caddr_t)sa, (caddr_t)sin, sa->sin_len); + + if (sotype == SOCK_STREAM) { + struct timeval tv; + tv.tv_sec = 60; + tv.tv_usec = 0; + error = sock_connect(so, mbuf_data(nam), MSG_DONTWAIT); + if (error && (error != EINPROGRESS)) + goto out; + error = sock_connectwait(so, &tv); + if (error) { + if (error == EINPROGRESS) + error = ETIMEDOUT; + printf("krpc_call: error waiting for TCP socket connect: %d\n", error); + goto out; + } } - sin = mtod(nam, struct sockaddr_in *); - bcopy((caddr_t)sa, (caddr_t)sin, (nam->m_len = sa->sin_len)); /* * Prepend RPC message header. @@ -302,23 +310,31 @@ krpc_call(sa, prog, vers, func, data, from_p) m = *data; *data = NULL; #if DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) + if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: send data w/o pkthdr"); - if (m->m_pkthdr.len < m->m_len) + if (mbuf_pkthdr_len(m) < mbuf_len(m)) panic("krpc_call: pkthdr.len not set"); #endif - mhead = m_prepend(m, sizeof(*call), M_WAIT); - if (mhead == NULL) { - error = ENOBUFS; + len = sizeof(*call); + if (sotype == SOCK_STREAM) + len += 4; /* account for RPC record marker */ + mhead = m; + if ((error = mbuf_prepend(&mhead, len, MBUF_WAITOK))) + goto out; + if ((error = mbuf_pkthdr_setrcvif(mhead, NULL))) goto out; - } - mhead->m_pkthdr.len += sizeof(*call); - mhead->m_pkthdr.rcvif = NULL; /* * Fill in the RPC header */ - call = mtod(mhead, struct rpc_call *); + if (sotype == SOCK_STREAM) { + /* first, fill in RPC record marker */ + u_long *recmark = mbuf_data(mhead); + *recmark = htonl(0x80000000 | (mbuf_pkthdr_len(mhead) - 4)); + call = (struct rpc_call *)(recmark + 1); + } else { + call = mbuf_data(mhead); + } bzero((caddr_t)call, sizeof(*call)); xid++; call->rp_xid = htonl(xid); @@ -337,13 +353,20 @@ krpc_call(sa, prog, vers, func, data, from_p) */ timo = 0; for (;;) { + struct msghdr msg; + /* Send RPC request (or re-send). */ - m = m_copym(mhead, 0, M_COPYALL, M_WAIT); - if (m == NULL) { - error = ENOBUFS; + if ((error = mbuf_copym(mhead, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) goto out; + bzero(&msg, sizeof(msg)); + if (sotype == SOCK_STREAM) { + msg.msg_name = NULL; + msg.msg_namelen = 0; + } else { + msg.msg_name = mbuf_data(nam); + msg.msg_namelen = mbuf_len(nam); } - error = sosend(so, mtod(nam, struct sockaddr *), NULL, m, NULL, 0); + error = sock_sendmbuf(so, &msg, m, 0, 0); if (error) { printf("krpc_call: sosend: %d\n", error); goto out; @@ -357,31 +380,70 @@ krpc_call(sa, prog, vers, func, data, from_p) printf("RPC timeout for server " IP_FORMAT "\n", IP_LIST(&(sin->sin_addr.s_addr))); - /* - * soreceive is now conditionally using this pointer - * if present, it updates per-proc stats - */ - auio.uio_procp = NULL; - /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ secs = timo; while (secs > 0) { - if ((from_p) && (*from_p)){ - FREE(*from_p, M_SONAME); - *from_p = NULL; - } - + size_t readlen; + if (m) { - m_freem(m); + mbuf_freem(m); m = NULL; } - auio.uio_resid = len = 1<<16; - rcvflg = 0; - - error = soreceive(so, (struct sockaddr **) from_p, &auio, &m, NULL, &rcvflg); + if (sotype == SOCK_STREAM) { + int maxretries = 60; + struct iovec_32 aio; + aio.iov_base = (uintptr_t) &len; + aio.iov_len = sizeof(u_long); + bzero(&msg, sizeof(msg)); + msg.msg_iov = (struct iovec *) &aio; + msg.msg_iovlen = 1; + do { + error = sock_receive(so, &msg, MSG_WAITALL, &readlen); + if ((error == EWOULDBLOCK) && (--maxretries <= 0)) + error = ETIMEDOUT; + } while (error == EWOULDBLOCK); + if (!error && readlen < aio.iov_len) { + /* only log a message if we got a partial word */ + if (readlen != 0) + printf("short receive (%d/%d) from server " IP_FORMAT "\n", + readlen, sizeof(u_long), IP_LIST(&(sin->sin_addr.s_addr))); + error = EPIPE; + } + if (error) + goto out; + len = ntohl(len) & ~0x80000000; + /* + * This is SERIOUS! We are out of sync with the sender + * and forcing a disconnect/reconnect is all I can do. + */ + if (len > maxpacket) { + printf("impossible packet length (%d) from server %s\n", + len, IP_LIST(&(sin->sin_addr.s_addr))); + error = EFBIG; + goto out; + } + + do { + readlen = len; + error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &readlen); + } while (error == EWOULDBLOCK); + + if (!error && (len > (int)readlen)) { + printf("short receive (%d/%d) from server %s\n", + readlen, len, IP_LIST(&(sin->sin_addr.s_addr))); + error = EPIPE; + } + } else { + len = maxpacket; + readlen = len; + bzero(&msg, sizeof(msg)); + msg.msg_name = from_p; + msg.msg_namelen = (from_p == NULL) ? 0 : sizeof(*from_p); + error = sock_receivembuf(so, &msg, &m, 0, &readlen); + } if (error == EWOULDBLOCK) { secs--; @@ -389,14 +451,14 @@ krpc_call(sa, prog, vers, func, data, from_p) } if (error) goto out; - len -= auio.uio_resid; + len = readlen; /* Does the reply contain at least a header? */ if (len < MIN_REPLY_HDR) continue; - if (m->m_len < MIN_REPLY_HDR) + if (mbuf_len(m) < MIN_REPLY_HDR) continue; - reply = mtod(m, struct rpc_reply *); + reply = mbuf_data(m); /* Is it the right reply? */ if (reply->rp_direction != htonl(RPC_REPLY)) @@ -404,7 +466,7 @@ krpc_call(sa, prog, vers, func, data, from_p) if (reply->rp_xid != htonl(xid)) continue; - + /* Was RPC accepted? (authorization OK) */ if (reply->rp_astatus != 0) { error = ntohl(reply->rp_u.rpu_errno); @@ -463,17 +525,16 @@ krpc_call(sa, prog, vers, func, data, from_p) * contiguous (fix callers instead). -gwr */ #if DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) + if ((mbuf_flags(m) & MBUF_PKTHDR) == 0) panic("krpc_call: received pkt w/o header?"); #endif - len = m->m_pkthdr.len; - if (m->m_len < len) { - m = m_pullup(m, len); - if (m == NULL) { - error = ENOBUFS; + len = mbuf_pkthdr_len(m); + if (sotype == SOCK_STREAM) + len -= 4; /* the RPC record marker was read separately */ + if (mbuf_len(m) < len) { + if ((error = mbuf_pullup(&m, len))) goto out; - } - reply = mtod(m, struct rpc_reply *); + reply = mbuf_data(m); } /* @@ -484,13 +545,13 @@ krpc_call(sa, prog, vers, func, data, from_p) len += ntohl(reply->rp_u.rpu_ok.rp_auth.rp_alen); len = (len + 3) & ~3; /* XXX? */ } - m_adj(m, len); + mbuf_adj(m, len); /* result */ *data = m; out: - if (nam) m_freem(nam); - if (mhead) m_freem(mhead); - soclose(so); + if (nam) mbuf_freem(nam); + if (mhead) mbuf_freem(mhead); + sock_close(so); return error; } diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index d9385473a..b955a56e8 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,6 +63,7 @@ #define _NFS_NFS_H_ #include +#include #ifdef __APPLE_API_PRIVATE /* @@ -79,6 +80,7 @@ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ +#define NFS_TRYLATERDEL 15 /* Initial try later delay (sec) */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ @@ -86,6 +88,12 @@ #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif +#ifndef NFS_MINDIRATTRTIMO +#define NFS_MINDIRATTRTIMO 5 /* directory attribute cache timeout in sec */ +#endif +#ifndef NFS_MAXDIRATTRTIMO +#define NFS_MAXDIRATTRTIMO 60 +#endif #define NFS_WSIZE 16384 /* Def. write data size <= 16K */ #define NFS_RSIZE 16384 /* Def. read data size <= 16K */ #define NFS_DGRAM_WSIZE 8192 /* UDP Def. write data size <= 8K */ @@ -111,7 +119,7 @@ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) -#define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) +#define NFS_ISV3(v) (VFSTONFS(vnode_mount(v))->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) @@ -153,7 +161,7 @@ /* * Arguments to mount NFS */ -#define NFS_ARGSVERSION 3 /* change when nfs_args changes */ +#define NFS_ARGSVERSION 4 /* change when nfs_args changes */ struct nfs_args { int version; /* args structure version number */ struct sockaddr *addr; /* file server address */ @@ -170,11 +178,99 @@ struct nfs_args { int retrans; /* times to retry send */ int maxgrouplist; /* Max. size of group list */ int readahead; /* # of blocks to readahead */ - int leaseterm; /* Term (sec) of lease */ - int deadthresh; /* Retrans threshold */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ + char *hostname; /* server's name */ + /* NFS_ARGSVERSION 3 ends here */ + int acregmin; /* reg file min attr cache timeout */ + int acregmax; /* reg file max attr cache timeout */ + int acdirmin; /* dir min attr cache timeout */ + int acdirmax; /* dir max attr cache timeout */ +}; + +struct nfs_args3 { + int version; /* args structure version number */ + struct sockaddr *addr; /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + u_char *fh; /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ char *hostname; /* server's name */ }; +// LP64todo - should this move? +#ifdef KERNEL +/* LP64 version of nfs_args. all pointers and longs + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with nfs_args + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_nfs_args { + int version; /* args structure version number */ + user_addr_t addr; /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + user_addr_t fh; /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ + user_addr_t hostname; /* server's name */ + /* NFS_ARGSVERSION 3 ends here */ + int acregmin; /* reg file min attr cache timeout */ + int acregmax; /* reg file max attr cache timeout */ + int acdirmin; /* dir min attr cache timeout */ + int acdirmax; /* dir max attr cache timeout */ +}; +struct user_nfs_args3 { + int version; /* args structure version number */ + user_addr_t addr; /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + user_addr_t fh; /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ + user_addr_t hostname; /* server's name */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + /* * NFS mount option flags */ @@ -186,29 +282,31 @@ struct nfs_args { #define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */ #define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */ #define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */ -#define NFSMNT_NQNFS 0x00000100 /* Use Nqnfs protocol */ #define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */ #define NFSMNT_KERB 0x00000400 /* Use Kerberos authentication */ #define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */ -#define NFSMNT_LEASETERM 0x00001000 /* set lease term (nqnfs) */ #define NFSMNT_READAHEAD 0x00002000 /* set read ahead */ -#define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */ #define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */ #define NFSMNT_RDIRPLUS 0x00010000 /* Use Readdirplus for V3 */ #define NFSMNT_READDIRSIZE 0x00020000 /* Set readdir size */ #define NFSMNT_NOLOCKS 0x00040000 /* don't support file locking */ +#define NFSMNT_ACREGMIN 0x00100000 /* reg min attr cache timeout */ +#define NFSMNT_ACREGMAX 0x00200000 /* reg max attr cache timeout */ +#define NFSMNT_ACDIRMIN 0x00400000 /* dir min attr cache timeout */ +#define NFSMNT_ACDIRMAX 0x00800000 /* dir max attr cache timeout */ +/* + * NFS mount state flags (nm_state) + */ #define NFSSTA_LOCKTIMEO 0x00002000 /* experienced a lock req timeout */ #define NFSSTA_MOUNTED 0x00004000 /* completely mounted */ -#define NFSSTA_LOCKSWORK 0x00008000 /* lock ops have worked. */ +#define NFSSTA_LOCKSWORK 0x00008000 /* lock ops have worked. */ #define NFSSTA_TIMEO 0x00010000 /* experienced a timeout. */ #define NFSSTA_FORCE 0x00020000 /* doing a forced unmount. */ #define NFSSTA_HASWRITEVERF 0x00040000 /* Has write verifier for V3 */ #define NFSSTA_GOTPATHCONF 0x00080000 /* Got the V3 pathconf info */ #define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */ #define NFSSTA_MNTD 0x00200000 /* Mnt server for mnt point */ -#define NFSSTA_DISMINPROG 0x00400000 /* Dismount in progress */ -#define NFSSTA_DISMNT 0x00800000 /* Dismounted */ #define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */ #define NFSSTA_WANTSND 0x02000000 /* Want above */ #define NFSSTA_RCVLOCK 0x04000000 /* Rcv socket lock */ @@ -218,6 +316,14 @@ struct nfs_args { #define NFSSTA_WANTAUTH 0x40000000 /* Wants an authenticator */ #define NFSSTA_AUTHERR 0x80000000 /* Authentication error */ +/* + * NFS mount pathconf info flags (nm_fsinfo.pcflags) + */ +#define NFSPCINFO_NOTRUNC 0x01 +#define NFSPCINFO_CHOWN_RESTRICTED 0x02 +#define NFSPCINFO_CASE_INSENSITIVE 0x04 +#define NFSPCINFO_CASE_PRESERVING 0x08 + /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. @@ -228,6 +334,28 @@ struct nfsd_args { int namelen; /* Length of name */ }; +// LP64todo - should this move? +#ifdef KERNEL +/* LP64 version of nfsd_args. all pointers and longs + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with nfsd_args + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_nfsd_args { + int sock; /* Socket to serve */ + user_addr_t name; /* Client addr for connection based sockets */ + int namelen; /* Length of name */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ @@ -253,6 +381,140 @@ struct nfsd_cargs { NFSKERBKEY_T ncd_key; /* Session key */ }; +/* + * NFS Server File Handle structures + */ + +/* NFS export handle identifies which NFS export */ +#define NFS_FH_VERSION 0x4e580000 /* 'NX00' */ +struct nfs_exphandle { + uint32_t nxh_version; /* data structure version */ + uint32_t nxh_fsid; /* File System Export ID */ + uint32_t nxh_expid; /* Export ID */ + uint16_t nxh_flags; /* export handle flags */ + uint8_t nxh_reserved; /* future use */ + uint8_t nxh_fidlen; /* length of File ID */ +}; + +/* nxh_flags */ +#define NXHF_INVALIDFH 0x0001 /* file handle is invalid */ + +#define NFS_MAX_FID_SIZE (NFS_MAX_FH_SIZE - sizeof(struct nfs_exphandle)) +#define NFSV2_MAX_FID_SIZE (NFSV2_MAX_FH_SIZE - sizeof(struct nfs_exphandle)) + +/* NFS server internal view of fhandle_t */ +struct nfs_filehandle { + int nfh_len; /* total length of file handle */ + struct nfs_exphandle nfh_xh; /* export handle */ + unsigned char nfh_fid[NFS_MAX_FID_SIZE]; /* File ID */ +}; + +/* + * NFS export data structures + */ + +struct nfs_export_net_args { + uint32_t nxna_flags; /* export flags */ + struct xucred nxna_cred; /* mapped credential for root/all user */ + struct sockaddr_storage nxna_addr; /* net address to which exported */ + struct sockaddr_storage nxna_mask; /* mask for net address */ +}; + +struct nfs_export_args { + uint32_t nxa_fsid; /* export FS ID */ + uint32_t nxa_expid; /* export ID */ + char *nxa_fspath; /* export FS path */ + char *nxa_exppath; /* export sub-path */ + uint32_t nxa_flags; /* export arg flags */ + uint32_t nxa_netcount; /* #entries in ex_nets array */ + struct nfs_export_net_args *nxa_nets; /* array of net args */ +}; + +#ifdef KERNEL +/* LP64 version of export_args */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_nfs_export_args { + uint32_t nxa_fsid; /* export FS ID */ + uint32_t nxa_expid; /* export ID */ + user_addr_t nxa_fspath; /* export FS path */ + user_addr_t nxa_exppath; /* export sub-path */ + uint32_t nxa_flags; /* export arg flags */ + uint32_t nxa_netcount; /* #entries in ex_nets array */ + user_addr_t nxa_nets; /* array of net args */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL */ + +/* nfs export arg flags */ +#define NXA_DELETE 0x0001 /* delete the specified export(s) */ +#define NXA_ADD 0x0002 /* add the specified export(s) */ +#define NXA_REPLACE 0x0003 /* delete and add the specified export(s) */ + +/* export option flags */ +#define NX_READONLY 0x0001 /* exported read-only */ +#define NX_DEFAULTEXPORT 0x0002 /* exported to the world */ +#define NX_MAPROOT 0x0004 /* map root access to anon credential */ +#define NX_MAPALL 0x0008 /* map all access to anon credential */ +#define NX_KERB 0x0010 /* exported with Kerberos uid mapping */ + +#ifdef KERNEL +struct nfs_exportfs; + +struct nfs_export_options { + uint32_t nxo_flags; /* export options */ + kauth_cred_t nxo_cred; /* mapped credential */ +}; + +/* Network address lookup element and individual export options */ +struct nfs_netopt { + struct radix_node no_rnodes[2]; /* radix tree glue */ + struct nfs_export_options no_opt; /* export options */ +}; + +/* Network export information */ +/* one of these for each exported directory */ +struct nfs_export { + LIST_ENTRY(nfs_export) nx_next; /* FS export list */ + LIST_ENTRY(nfs_export) nx_hash; /* export hash chain */ + struct nfs_export *nx_parent; /* parent export */ + uint32_t nx_id; /* export ID */ + uint32_t nx_flags; /* export flags */ + struct nfs_exportfs *nx_fs; /* exported file system */ + char *nx_path; /* exported file system sub-path */ + struct nfs_filehandle nx_fh; /* export root file handle */ + struct nfs_export_options nx_defopt; /* default options */ + uint32_t nx_expcnt; /* # exports in table */ + struct radix_node_head *nx_rtable[AF_MAX+1]; /* table of exports (netopts) */ +}; + +/* NFS exported file system info */ +/* one of these for each exported file system */ +struct nfs_exportfs { + LIST_ENTRY(nfs_exportfs) nxfs_next; /* exported file system list */ + uint32_t nxfs_id; /* exported file system ID */ + char *nxfs_path; /* exported file system path */ + LIST_HEAD(,nfs_export) nxfs_exports; /* list of exports for this file system */ +}; + +extern LIST_HEAD(nfsexpfslist, nfs_exportfs) nfs_exports; +extern lck_rw_t nfs_export_rwlock; // lock for export data structures +#define NFSEXPHASHVAL(FSID, EXPID) \ + (((FSID) >> 24) ^ ((FSID) >> 16) ^ ((FSID) >> 8) ^ (EXPID)) +#define NFSEXPHASH(FSID, EXPID) \ + (&nfsexphashtbl[NFSEXPHASHVAL((FSID),(EXPID)) & nfsexphash]) +extern LIST_HEAD(nfsexphashhead, nfs_export) *nfsexphashtbl; +extern u_long nfsexphash; + +#endif // KERNEL + /* * XXX to allow amd to include nfs.h without nfsproto.h */ @@ -290,9 +552,6 @@ struct nfsstats { int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; - int srvnqnfs_leases; - int srvnqnfs_maxleases; - int srvnqnfs_getleases; int srvvop_writes; int pageins; int pageouts; @@ -309,6 +568,7 @@ struct nfsstats { #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 +#define NFSSVC_EXPORT 0x200 /* * Flags for nfsclnt() system call. @@ -345,6 +605,7 @@ struct nfsstats { * by them and break. */ #ifdef KERNEL +#include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_NFSREQ); @@ -352,36 +613,40 @@ MALLOC_DECLARE(M_NFSMNT); MALLOC_DECLARE(M_NFSDIROFF); MALLOC_DECLARE(M_NFSRVDESC); MALLOC_DECLARE(M_NFSUID); -MALLOC_DECLARE(M_NQLEASE); MALLOC_DECLARE(M_NFSD); MALLOC_DECLARE(M_NFSBIGFH); #endif -struct uio; struct vattr; struct nameidata; /* XXX */ +struct uio; struct vnode_attr; struct nameidata; /* XXX */ struct nfsbuf; +struct nfs_vattr; +struct nfsnode; #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) +__private_extern__ int nfs_mbuf_mlen, nfs_mbuf_mhlen, + nfs_mbuf_minclsize, nfs_mbuf_mclbytes; + /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ - (e) != EIO && ((s) & PR_CONNREQUIRED) == 0) + (e) != EIO && ((s)) != SOCK_STREAM) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; - struct mbuf *r_mreq; - struct mbuf *r_mrep; - struct mbuf *r_md; + mbuf_t r_mreq; + mbuf_t r_mrep; + mbuf_t r_md; caddr_t r_dpos; struct nfsmount *r_nmp; - struct vnode *r_vp; + vnode_t r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ @@ -389,7 +654,7 @@ struct nfsreq { int r_timer; /* tick counter on reply */ u_int32_t r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ - struct proc *r_procp; /* Proc that did I/O system call */ + proc_t r_procp; /* Proc that did I/O system call */ long r_lastmsg; /* time of last tprintf */ }; @@ -406,7 +671,6 @@ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; #define R_SOCKERR 0x0010 /* Fatal error on socket */ #define R_TPRINTFMSG 0x0020 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x0040 /* Must resend request */ -#define R_GETONEREP 0x0080 /* Probe for one reply only */ #define R_BUSY 0x0100 /* Locked. */ #define R_WAITING 0x0200 /* Someone waiting for lock. */ #define R_RESENDERR 0x0400 /* resend failed. */ @@ -418,7 +682,7 @@ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ -#define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ +#define NFS_UIDHASHSIZ 13 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) @@ -434,7 +698,7 @@ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; */ union nethostaddr { u_long had_inetaddr; - struct mbuf *had_nam; + mbuf_t had_nam; }; struct nfsuid { @@ -442,7 +706,7 @@ struct nfsuid { LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ - struct ucred nu_cr; /* Cred uid mapped to */ + kauth_cred_t nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ @@ -461,46 +725,50 @@ struct nfsuid { struct nfsrv_rec { STAILQ_ENTRY(nfsrv_rec) nr_link; struct sockaddr *nr_address; - struct mbuf *nr_packet; + mbuf_t nr_packet; }; #endif struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ - TAILQ_HEAD(, nfsuid) ns_uidlruhead; - struct file *ns_fp; - struct socket *ns_so; - struct mbuf *ns_nam; - struct mbuf *ns_raw; - struct mbuf *ns_rawend; - struct mbuf *ns_rec; - struct mbuf *ns_recend; - struct mbuf *ns_frag; - short ns_flag; /* modified under kernel funnel */ - short ns_nflag; /* modified under network funnel */ - int ns_solock; + lck_rw_t ns_rwlock; /* lock for most fields */ + socket_t ns_so; + mbuf_t ns_nam; + mbuf_t ns_raw; + mbuf_t ns_rawend; + mbuf_t ns_rec; + mbuf_t ns_recend; + mbuf_t ns_frag; + int ns_flag; + int ns_sotype; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; + lck_mtx_t ns_wgmutex; /* mutex for write gather fields */ + u_quad_t ns_wgtime; /* next Write deadline (usec) */ LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ - LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; + TAILQ_HEAD(, nfsuid) ns_uidlruhead; + LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; }; -/* Bits for "ns_*flag" */ -#define SLP_VALID 0x01 /* ns_flag */ -#define SLP_DOREC 0x02 /* ns_flag */ -#define SLPN_NEEDQ 0x04 /* ns_nflag */ -#define SLPN_DISCONN 0x08 /* ns_nflag */ -#define SLPN_GETSTREAM 0x10 /* ns_nflag */ -#define SLPN_LASTFRAG 0x20 /* ns_nflag */ -#define SLP_ALLFLAGS 0xff /* ns_flag && ns_nflag */ +/* Bits for "ns_flag" */ +#define SLP_VALID 0x01 /* nfs sock valid */ +#define SLP_DOREC 0x02 /* nfs sock has received data to process */ +#define SLP_NEEDQ 0x04 /* network socket has data to receive */ +#define SLP_DISCONN 0x08 /* socket needs to be zapped */ +#define SLP_GETSTREAM 0x10 /* currently in nfsrv_getstream() */ +#define SLP_LASTFRAG 0x20 /* on last fragment of RPC record */ +#define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; -extern int nfssvc_sockhead_flag; -#define SLP_INIT 0x01 -#define SLP_WANTINIT 0x02 + +/* locks for nfssvc_sock's */ +extern lck_grp_attr_t *nfs_slp_group_attr; +extern lck_attr_t *nfs_slp_lock_attr; +extern lck_grp_t *nfs_slp_rwlock_group; +extern lck_grp_t *nfs_slp_mutex_group; /* * One of these structures is allocated for each nfsd. @@ -513,7 +781,7 @@ struct nfsd { u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; - struct proc *nfsd_procp; /* Proc ptr */ + proc_t nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; @@ -534,11 +802,11 @@ struct nfsrv_descript { LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ - struct mbuf *nd_mrep; /* Request mbuf list */ - struct mbuf *nd_md; /* Current dissect mbuf */ - struct mbuf *nd_mreq; /* Reply mbuf list */ - struct mbuf *nd_nam; /* and socket addr */ - struct mbuf *nd_nam2; /* return socket addr */ + mbuf_t nd_mrep; /* Request mbuf list */ + mbuf_t nd_md; /* Current dissect mbuf */ + mbuf_t nd_mreq; /* Reply mbuf list */ + mbuf_t nd_nam; /* and socket addr */ + mbuf_t nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ u_int32_t nd_procnum; /* RPC # */ int nd_stable; /* storage type */ @@ -546,19 +814,13 @@ struct nfsrv_descript { int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ - u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ - fhandle_t nd_fh; /* File handle */ - struct ucred nd_cr; /* Credentials */ + struct nfs_filehandle nd_fh; /* File handle */ + kauth_cred_t nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ -#define ND_READ LEASE_READ -#define ND_WRITE LEASE_WRITE -#define ND_CHECK 0x04 -#define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 -#define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) @@ -571,14 +833,23 @@ extern int nfsd_head_flag; * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ - ((o)->nd_eoff >= (n)->nd_off && \ - !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) + (((o)->nd_eoff >= (n)->nd_off) && \ + ((o)->nd_fh.nfh_len == (n)->nd_fh.nfh_len) && \ + !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, (o)->nd_fh.nfh_len)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ - !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ + !bcmp((caddr_t)(o)->nd_cr, (caddr_t)(n)->nd_cr, \ sizeof (struct ucred))) +/* mutex for nfs server */ +extern lck_grp_t * nfsd_lck_grp; +extern lck_grp_attr_t * nfsd_lck_grp_attr; +extern lck_attr_t * nfsd_lck_attr; +extern lck_mtx_t *nfsd_mutex; + +extern int nfs_numnfsd, nfsd_waiting; + /* * Defines for WebNFS */ @@ -596,280 +867,177 @@ extern int nfsd_head_flag; * Macro for converting escape characters in WebNFS pathnames. * Should really be in libkern. */ - +#define ISHEX(c) \ + ((((c) >= 'a') && ((c) <= 'f')) || \ + (((c) >= 'A') && ((c) <= 'F')) || \ + (((c) >= '0') && ((c) <= '9'))) #define HEXTOC(c) \ ((c) >= 'a' ? ((c) - ('a' - 10)) : \ ((c) >= 'A' ? ((c) - ('A' - 10)) : ((c) - '0'))) #define HEXSTRTOI(p) \ ((HEXTOC(p[0]) << 4) + HEXTOC(p[1])) -#define NFSDIAG 0 -#if NFSDIAG - -extern int nfs_debug; -#define NFS_DEBUG_ASYNCIO 1 /* asynchronous i/o */ -#define NFS_DEBUG_WG 2 /* server write gathering */ -#define NFS_DEBUG_RC 4 /* server request caching */ -#define NFS_DEBUG_SILLY 8 /* nfs_sillyrename (.nfsXXX aka turd files) */ -#define NFS_DEBUG_DUP 16 /* debug duplicate requests */ -#define NFS_DEBUG_ATTR 32 - -#define NFS_DPF(cat, args) \ - do { \ - if (nfs_debug & NFS_DEBUG_##cat) kprintf args; \ - } while (0) - -#else - -#define NFS_DPF(cat, args) - -#endif /* NFSDIAG */ - -int nfs_init __P((struct vfsconf *vfsp)); -int nfs_reply __P((struct nfsreq *)); -int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); -int nfs_send __P((struct socket *, struct mbuf *, struct mbuf *, - struct nfsreq *)); -int nfs_rephead __P((int, struct nfsrv_descript *, struct nfssvc_sock *, - int, int, u_quad_t *, struct mbuf **, struct mbuf **, - caddr_t *)); -int nfs_sndlock __P((struct nfsreq *)); -void nfs_sndunlock __P((struct nfsreq *)); -int nfs_slplock __P((struct nfssvc_sock *, int)); -void nfs_slpunlock __P((struct nfssvc_sock *)); -int nfs_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *)); -int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, - int)); -int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *)); -int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *, - int *)); -int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *)); -int nfs_asyncio __P((struct nfsbuf *, struct ucred *)); -int nfs_doio __P((struct nfsbuf *, struct ucred *, struct proc *)); -int nfs_readlinkrpc __P((struct vnode *, struct uio *, struct ucred *)); -int nfs_sigintr __P((struct nfsmount *, struct nfsreq *, struct proc *)); -int nfs_readdirplusrpc __P((struct vnode *, struct uio *, struct ucred *)); -int nfsm_disct __P((struct mbuf **, caddr_t *, int, int, caddr_t *)); -void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *, - struct nfs_fattr *)); -void nfsm_srvwcc __P((struct nfsrv_descript *, int, struct vattr *, int, - struct vattr *, struct mbuf **, char **)); -void nfsm_srvpostopattr __P((struct nfsrv_descript *, int, struct vattr *, - struct mbuf **, char **)); -int netaddr_match __P((int, union nethostaddr *, struct mbuf *)); -int nfs_request __P((struct vnode *, struct mbuf *, int, struct proc *, - struct ucred *, struct mbuf **, struct mbuf **, - caddr_t *, u_int64_t *)); -int nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *, - struct vattr *, int, u_int64_t *)); -int nfs_namei __P((struct nameidata *, fhandle_t *, int, - struct nfssvc_sock *, struct mbuf *, struct mbuf **, - caddr_t *, struct vnode **, struct proc *, int, int)); -void nfsm_adj __P((struct mbuf *, int, int)); -int nfsm_mbuftouio __P((struct mbuf **, struct uio *, int, caddr_t *)); -void nfsrv_initcache __P((void)); -int nfs_getauth __P((struct nfsmount *, struct nfsreq *, struct ucred *, - char **, int *, char *, int *, NFSKERBKEY_T)); -int nfs_getnickauth __P((struct nfsmount *, struct ucred *, char **, - int *, char *, int)); -int nfs_savenickauth __P((struct nfsmount *, struct ucred *, int, - NFSKERBKEY_T, struct mbuf **, char **, - struct mbuf *)); -int nfs_adv __P((struct mbuf **, caddr_t *, int, int)); -void nfs_nhinit __P((void)); -void nfs_timer __P((void*)); -u_long nfs_hash __P((nfsfh_t *, int)); -int nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *, - struct nfsrv_descript **)); -int nfsrv_getcache __P((struct nfsrv_descript *, struct nfssvc_sock *, - struct mbuf **)); -void nfsrv_updatecache __P((struct nfsrv_descript *, int, struct mbuf *)); -void nfsrv_cleancache __P((void)); -int nfs_bind_resv_thread_wake __P((void)); -int nfs_connect __P((struct nfsmount *, struct nfsreq *)); -void nfs_disconnect __P((struct nfsmount *)); -int nfs_getattrcache __P((struct vnode *, struct vattr *)); -int nfsm_strtmbuf __P((struct mbuf **, char **, char *, long)); -int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *, - int)); -int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *)); -void nfsrv_init __P((int)); -int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, - struct ucred *cred, struct proc *procp)); -int nfs_flushcommits(struct vnode *, struct proc *); -void nfs_clearcommit __P((struct mount *)); -int nfsrv_errmap __P((struct nfsrv_descript *, int)); -void nfsrvw_sort __P((gid_t *, int)); -void nfsrv_setcred __P((struct ucred *, struct ucred *)); -int nfs_buf_write __P((struct nfsbuf *)); -int nfsrv_object_create __P((struct vnode *)); -void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); -int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, - struct proc *, struct mbuf **)); -int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, - struct proc *p)); - -int nfsrv3_access __P((struct nfsrv_descript *nfsd, +__BEGIN_DECLS + +int nfs_init(struct vfsconf *vfsp); +void nfs_mbuf_init(void); +int nfs_reply(struct nfsreq *); +int nfs_getreq(struct nfsrv_descript *,struct nfsd *,int); +int nfs_send(socket_t, mbuf_t, mbuf_t, struct nfsreq *); +int nfs_rephead(int, struct nfsrv_descript *, struct nfssvc_sock *, + int, mbuf_t *, mbuf_t *, caddr_t *); +int nfs_sndlock(struct nfsreq *); +void nfs_sndunlock(struct nfsreq *); +int nfs_vinvalbuf(vnode_t, int, struct ucred *, proc_t, int); +int nfs_buf_page_inval(vnode_t vp, off_t offset); +int nfs_readrpc(vnode_t, struct uio *, struct ucred *, proc_t); +int nfs_writerpc(vnode_t, struct uio *, struct ucred *, proc_t, int *, int *); +int nfs_readdirrpc(vnode_t, struct uio *, struct ucred *, proc_t); +int nfs_readdirplusrpc(vnode_t, struct uio *, struct ucred *, proc_t); +int nfs_asyncio(struct nfsbuf *, struct ucred *); +int nfs_doio(struct nfsbuf *, struct ucred *, proc_t); +int nfs_readlinkrpc(vnode_t, struct uio *, struct ucred *, proc_t); +int nfs_sigintr(struct nfsmount *, struct nfsreq *, proc_t); +int nfsm_disct(mbuf_t *, caddr_t *, int, int, caddr_t *); +void nfsm_srvfattr(struct nfsrv_descript *, struct vnode_attr *, + struct nfs_fattr *); +void nfsm_srvwcc(struct nfsrv_descript *, int, struct vnode_attr *, int, + struct vnode_attr *, mbuf_t *, char **); +void nfsm_srvpostopattr(struct nfsrv_descript *, int, struct vnode_attr *, + mbuf_t *, char **); +int netaddr_match(int, union nethostaddr *, mbuf_t); +int nfs_request(vnode_t, mount_t, mbuf_t, int, proc_t, + struct ucred *, mbuf_t *, mbuf_t *, + caddr_t *, u_int64_t *); +int nfs_parsefattr(mbuf_t *, caddr_t *, int, struct nfs_vattr *); +int nfs_loadattrcache(struct nfsnode *, struct nfs_vattr *, u_int64_t *, int); +int nfsm_path_mbuftond(mbuf_t *, caddr_t *, int, int, int *, struct nameidata *); +int nfs_namei(struct nfsrv_descript *, struct vfs_context *, struct nameidata *, + struct nfs_filehandle *, mbuf_t, int, vnode_t *, + struct nfs_export **, struct nfs_export_options **); +void nfsm_adj(mbuf_t, int, int); +int nfsm_mbuftouio(mbuf_t *, struct uio *, int, caddr_t *); +void nfsrv_initcache(void); +int nfs_getauth(struct nfsmount *, struct nfsreq *, struct ucred *, + char **, int *, char *, int *, NFSKERBKEY_T); +int nfs_getnickauth(struct nfsmount *, struct ucred *, char **, + int *, char *, int); +int nfs_savenickauth(struct nfsmount *, struct ucred *, int, + NFSKERBKEY_T, mbuf_t *, char **, + mbuf_t); +int nfs_adv(mbuf_t *, caddr_t *, int, int); +void nfs_nhinit(void); +void nfs_timer_funnel(void*); +void nfs_timer(void*); +u_long nfs_hash(u_char *, int); +int nfsrv_dorec(struct nfssvc_sock *, struct nfsd *, + struct nfsrv_descript **); +int nfsrv_getcache(struct nfsrv_descript *, struct nfssvc_sock *, + mbuf_t *); +void nfsrv_updatecache(struct nfsrv_descript *, int, mbuf_t); +void nfsrv_cleancache(void); +int nfs_bind_resv_thread_wake(void); +int nfs_connect(struct nfsmount *, struct nfsreq *); +void nfs_disconnect(struct nfsmount *); +int nfs_getattr_no_vnode(mount_t,u_char *,int,struct ucred *,proc_t,struct nfs_vattr *,u_int64_t *); +int nfs_getattr(vnode_t vp, struct nfs_vattr *nvap, struct ucred *cred, proc_t p); +int nfs_getattrcache(vnode_t, struct nfs_vattr *); +int nfs_attrcachetimeout(vnode_t); +int nfsm_strtmbuf(mbuf_t *, char **, char *, long); +int nfs_bioread(vnode_t, struct uio *, int, struct ucred *, proc_t); +int nfsm_uiotombuf(struct uio *, mbuf_t *, int, caddr_t *); +void nfsrv_init(int); +int nfs_commit(vnode_t vp, u_quad_t offset, u_int32_t count, + struct ucred *cred, proc_t procp); +int nfs_flushcommits(vnode_t, proc_t, int); +int nfs_flush(vnode_t,int,struct ucred *,proc_t,int); +void nfs_clearcommit(mount_t); +int nfsrv_errmap(struct nfsrv_descript *, int); +void nfsrvw_sort(gid_t *, int); +void nfsrv_setcred(struct ucred *, struct ucred *); +int nfs_buf_write(struct nfsbuf *); +void nfsrv_wakenfsd(struct nfssvc_sock *slp); +int nfsrv_writegather(struct nfsrv_descript **, struct nfssvc_sock *, + proc_t, mbuf_t *); +int nfs_fsinfo(struct nfsmount *, vnode_t, struct ucred *, proc_t p); +int nfs_pathconfrpc(vnode_t, struct nfsv3_pathconf *, kauth_cred_t, proc_t); +void nfs_pathconf_cache(struct nfsmount *, struct nfsv3_pathconf *); + +int nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *, - struct nfssvc_sock *, struct mbuf *, int *, - int, int)); -int nfsrv_setpublicfs __P((struct mount *, struct netexport *, - struct export_args *)); -int nfs_ispublicfh __P((fhandle_t *)); -int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, - struct nfssvc_sock *slp, struct proc *procp, - struct mbuf **mrq)); -int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_readdir __P((struct nfsrv_descript *nfsd, + proc_t procp, mbuf_t *mrq); +int nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_credcheck(struct nfsrv_descript *, struct nfs_export *, + struct nfs_export_options *); +int nfsrv_export(struct user_nfs_export_args *, struct vfs_context *); +int nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2); +int nfsrv_fhtovp(struct nfs_filehandle *, mbuf_t, int, vnode_t *, + struct nfs_export **, struct nfs_export_options **); +int nfs_ispublicfh(struct nfs_filehandle *); +int nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_pathconf(struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, proc_t procp, + mbuf_t *mrq); +void nfsrv_rcv(socket_t, caddr_t arg, int waitflag); +void nfsrv_rcv_locked(socket_t, struct nfssvc_sock *slp, int waitflag); +int nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, - struct nfssvc_sock *slp, struct proc *procp, - struct mbuf **mrq)); -int nfsrv_readlink __P((struct nfsrv_descript *nfsd, - struct nfssvc_sock *slp, struct proc *procp, - struct mbuf **mrq)); -int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_setattr __P((struct nfsrv_descript *nfsd, + proc_t procp, mbuf_t *mrq); +int nfsrv_readdirplus(struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, proc_t procp, + mbuf_t *mrq); +int nfsrv_readlink(struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, proc_t procp, + mbuf_t *mrq); +int nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_statfs __P((struct nfsrv_descript *nfsd, + proc_t procp, mbuf_t *mrq); +void nfsrv_slpderef(struct nfssvc_sock *slp); +int nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_symlink __P((struct nfsrv_descript *nfsd, + proc_t procp, mbuf_t *mrq); +int nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, - struct proc *procp, struct mbuf **mrq)); -void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); -void nfsrv_slpderef __P((struct nfssvc_sock *slp)); + proc_t procp, mbuf_t *mrq); +int nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, + proc_t procp, mbuf_t *mrq); +int nfsrv_vptofh( struct nfs_export *, int, struct nfs_filehandle *, + vnode_t, struct vfs_context *, struct nfs_filehandle *); -void nfs_up(struct nfsreq *, struct nfsmount *, struct proc *, - const char *, int); -void nfs_down(struct nfsreq *, struct nfsmount *, struct proc *, - const char *, int, int); +void nfs_up(struct nfsmount *, proc_t, int, const char *); +void nfs_down(struct nfsmount *, proc_t, int, int, const char *); -/* - * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG) - * But some of this code may prove useful someday... - */ -#undef NFSDIAG -#if NFSDIAG - -extern int nfstraceindx; -#define NFSTBUFSIZ 8912 -struct nfstracerec { uint i1, i2, i3, i4; }; -extern struct nfstracerec nfstracebuf[NFSTBUFSIZ]; -extern uint nfstracemask; /* 32 bits - trace points over 31 are unconditional */ - -/* 0x0000000f nfs_getattrcache trace points */ -#define NFSTRC_GAC_MISS 0x00 /* 0x00000001 cache miss */ -#define NFSTRC_GAC_HIT 0x01 /* 0x00000002 cache hit */ -#define NFSTRC_GAC_NP 0x02 /* 0x00000004 np size mismatch - vp... */ -/* 0x00000038 nfs_loadattrcache trace points */ -#define NFSTRC_LAC 0x03 /* 0x00000008 function entry point - vp */ -#define NFSTRC_LAC_INIT 0x04 /* 0x00000010 new vp & init n_mtime - vp */ -#define NFSTRC_LAC_NP 0x05 /* 0x00000020 np size mismatch - vp... */ -/* 0x000000c0 nfs_getattr trace points */ -#define NFSTRC_GA_INV 0x06 /* 0x00000040 times mismatch - vp */ -#define NFSTRC_GA_INV1 0x07 /* 0x00000080 invalidate ok - vp */ -/* 0x00000100 vmp_invalidate trace points */ -#define NFSTRC_VMP_INV 0x08 /* 0x00000100 function entry point - vmp */ -/* 0x00000200 nfs_request trace points */ -#define NFSTRC_REQ 0x09 /* 0x00000200 - alternates vp and procnum */ -/* 0x00000c00 vmp_push_range trace points */ -#define NFSTRC_VPR 0xa /* 0x00000400 entry point - vp... */ -#define NFSTRC_VPR_DONE 0xb /* 0x00000800 tail exit - error # */ -/* 0x00003000 nfs_doio trace points */ -#define NFSTRC_DIO 0xc /* 0x00001000 entry point - vp */ -#define NFSTRC_DIO_DONE 0xd /* 0x00002000 exit points - vp */ -/* 0x000fc000 congestion window trace points */ -#define NFSTRC_CWND_INIT 0xe -#define NFSTRC_CWND_REPLY 0xf -#define NFSTRC_CWND_TIMER 0x10 -#define NFSTRC_CWND_REQ1 0x11 -#define NFSTRC_CWND_REQ2 0x12 -#define NFSTRC_CWND_SOFT 0x13 -/* 0xfff00000 nfs_rcvlock & nfs_rcvunlock trace points */ -#define NFSTRC_ECONN 0x14 -#define NFSTRC_RCVERR 0x15 -#define NFSTRC_REQFREE 0x16 -#define NFSTRC_NOTMINE 0x17 -#define NFSTRC_6 0x18 -#define NFSTRC_7 0x19 -#define NFSTRC_RCVLCKINTR 0x1a -#define NFSTRC_RCVALREADY 0x1b -#define NFSTRC_RCVLCKW 0x1c /* 0x10000000 seeking recieve lock (waiting) */ -#define NFSTRC_RCVLCK 0x1d /* 0x20000000 getting recieve lock */ -#define NFSTRC_RCVUNLW 0x1e /* 0x40000000 releasing rcv lock w/ wakeup */ -#define NFSTRC_RCVUNL 0x1f /* 0x80000000 releasing rcv lock w/o wakeup */ -/* trace points beyond 31 are on if any of above points are on */ -#define NFSTRC_GA_INV2 0x20 /* nfs_getattr invalidate - error# */ -#define NFSTRC_VBAD 0x21 -#define NFSTRC_REQERR 0x22 -#define NFSTRC_RPCERR 0x23 -#define NFSTRC_DISSECTERR 0x24 -#define NFSTRC_CONTINUE 0xff /* continuation record for previous entry */ - -#define NFSTRACEX(a1, a2, a3, a4) \ -( \ - nfstracebuf[nfstraceindx].i1 = (uint)(a1), \ - nfstracebuf[nfstraceindx].i2 = (uint)(a2), \ - nfstracebuf[nfstraceindx].i3 = (uint)(a3), \ - nfstracebuf[nfstraceindx].i4 = (uint)(a4), \ - nfstraceindx = (nfstraceindx + 1) % NFSTBUFSIZ, \ - 1 \ -) - -#define NFSTRACE(cnst, fptr) \ -( \ - (nfstracemask && ((cnst) > 31 || nfstracemask & 1<<(cnst))) ? \ - NFSTRACEX((cnst), (fptr), current_thread(), \ - clock_get_system_value().tv_nsec) : \ - 0 \ -) - -#define NFSTRACE4(cnst, fptr, a2, a3, a4) \ -( \ - NFSTRACE(cnst,fptr) ? \ - NFSTRACEX(NFSTRC_CONTINUE, a2, a3, a4) : \ - 0 \ -) - -#else /* NFSDIAG */ - -# define NFSTRACE(cnst, fptr) -# define NFSTRACE4(cnst, fptr, a2, a3, a4) - -#endif /* NFSDIAG */ +struct nfs_diskless; +int nfs_boot_init(struct nfs_diskless *nd, proc_t procp); +int nfs_boot_getfh(struct nfs_diskless *nd, proc_t procp, int v3, int sotype); + +__END_DECLS #endif /* KERNEL */ #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index c756be676..966cf72f5 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,27 +62,31 @@ #include #include #include -#include +#include +#include #include #include #include -#include +#include #include #include -#include +#include +#include #include #include #include #include +#include +#include #include #include #include #include -#include #include +#include #include @@ -100,16 +104,21 @@ extern int nfs_numasync; extern int nfs_ioddelwri; extern struct nfsstats nfsstats; -#define NFSBUFHASH(dvp, lbn) \ - (&nfsbufhashtbl[((long)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & nfsbufhash]) +#define NFSBUFHASH(np, lbn) \ + (&nfsbufhashtbl[((long)(np) / sizeof(*(np)) + (int)(lbn)) & nfsbufhash]) LIST_HEAD(nfsbufhashhead, nfsbuf) *nfsbufhashtbl; struct nfsbuffreehead nfsbuffree, nfsbuffreemeta, nfsbufdelwri; u_long nfsbufhash; -int nfsbufhashlock, nfsbufcnt, nfsbufmin, nfsbufmax; +int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer; int nfs_nbdwrite; time_t nfsbuffreeuptimestamp; +lck_grp_t *nfs_buf_lck_grp; +lck_grp_attr_t *nfs_buf_lck_grp_attr; +lck_attr_t *nfs_buf_lck_attr; +lck_mtx_t *nfs_buf_mutex; + #define NFSBUFWRITE_THROTTLE 9 #define NFSBUF_LRU_STALE 120 #define NFSBUF_META_STALE 240 @@ -130,7 +139,7 @@ time_t nfsbuffreeuptimestamp; #define META_FREEUP_MIN_FRAC 2 #define NFS_BUF_FREEUP() \ - do { \ + do { \ /* only call nfs_buf_freeup() if it has work to do: */ \ if (((nfsbuffreecnt > nfsbufcnt/LRU_FREEUP_MIN_FRAC) || \ (nfsbuffreemetacnt > nfsbufcnt/META_FREEUP_MIN_FRAC)) && \ @@ -144,17 +153,28 @@ time_t nfsbuffreeuptimestamp; void nfs_nbinit(void) { - nfsbufhashlock = 0; - nfsbufhashtbl = hashinit(nbuf, M_TEMP, &nfsbufhash); - TAILQ_INIT(&nfsbuffree); - TAILQ_INIT(&nfsbuffreemeta); - TAILQ_INIT(&nfsbufdelwri); - nfsbufcnt = nfsbuffreecnt = nfsbuffreemetacnt = nfsbufdelwricnt = 0; - nfsbufmin = 128; // XXX tune me! - nfsbufmax = 8192; // XXX tune me! + nfs_buf_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfs_buf_lck_grp_attr); + nfs_buf_lck_grp = lck_grp_alloc_init("nfs_buf", nfs_buf_lck_grp_attr); + + nfs_buf_lck_attr = lck_attr_alloc_init(); + + nfs_buf_mutex = lck_mtx_alloc_init(nfs_buf_lck_grp, nfs_buf_lck_attr); + + nfsbufcnt = nfsbufmetacnt = + nfsbuffreecnt = nfsbuffreemetacnt = nfsbufdelwricnt = 0; + nfsbufmin = 128; + nfsbufmax = (sane_size >> PAGE_SHIFT) / 4; + nfsbufmetamax = (sane_size >> PAGE_SHIFT) / 16; nfsneedbuffer = 0; nfs_nbdwrite = 0; nfsbuffreeuptimestamp = 0; + + nfsbufhashtbl = hashinit(nfsbufmax/4, M_TEMP, &nfsbufhash); + TAILQ_INIT(&nfsbuffree); + TAILQ_INIT(&nfsbuffreemeta); + TAILQ_INIT(&nfsbufdelwri); + } /* @@ -166,41 +186,38 @@ nfs_buf_freeup(int timer) struct nfsbuf *fbp; struct timeval now; int count; + struct nfsbuffreehead nfsbuffreeup; + + TAILQ_INIT(&nfsbuffreeup); + + lck_mtx_lock(nfs_buf_mutex); microuptime(&now); nfsbuffreeuptimestamp = now.tv_sec; - FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, count); + FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, 0); + count = timer ? nfsbuffreecnt/LRU_FREEUP_FRAC_ON_TIMER : LRU_TO_FREEUP; while ((nfsbufcnt > nfsbufmin) && (count-- > 0)) { fbp = TAILQ_FIRST(&nfsbuffree); if (!fbp) break; - if ((fbp->nb_timestamp + (2*NFSBUF_LRU_STALE)) > now.tv_sec) + if (fbp->nb_refs) + break; + if (NBUFSTAMPVALID(fbp) && + (fbp->nb_timestamp + (2*NFSBUF_LRU_STALE)) > now.tv_sec) break; nfs_buf_remfree(fbp); /* disassociate buffer from any vnode */ if (fbp->nb_vp) { - struct vnode *oldvp; if (fbp->nb_vnbufs.le_next != NFSNOLIST) { LIST_REMOVE(fbp, nb_vnbufs); fbp->nb_vnbufs.le_next = NFSNOLIST; } - oldvp = fbp->nb_vp; fbp->nb_vp = NULL; - HOLDRELE(oldvp); } LIST_REMOVE(fbp, nb_hash); - /* nuke any creds */ - if (fbp->nb_rcred != NOCRED) - crfree(fbp->nb_rcred); - if (fbp->nb_wcred != NOCRED) - crfree(fbp->nb_wcred); - /* if buf was NB_META, dump buffer */ - if (ISSET(fbp->nb_flags, NB_META) && fbp->nb_data) { - FREE(fbp->nb_data, M_TEMP); - } - FREE(fbp, M_TEMP); + TAILQ_INSERT_TAIL(&nfsbuffreeup, fbp, nb_free); nfsbufcnt--; } @@ -209,36 +226,54 @@ nfs_buf_freeup(int timer) fbp = TAILQ_FIRST(&nfsbuffreemeta); if (!fbp) break; - if ((fbp->nb_timestamp + (2*NFSBUF_META_STALE)) > now.tv_sec) + if (fbp->nb_refs) + break; + if (NBUFSTAMPVALID(fbp) && + (fbp->nb_timestamp + (2*NFSBUF_META_STALE)) > now.tv_sec) break; nfs_buf_remfree(fbp); /* disassociate buffer from any vnode */ if (fbp->nb_vp) { - struct vnode *oldvp; if (fbp->nb_vnbufs.le_next != NFSNOLIST) { LIST_REMOVE(fbp, nb_vnbufs); fbp->nb_vnbufs.le_next = NFSNOLIST; } - oldvp = fbp->nb_vp; fbp->nb_vp = NULL; - HOLDRELE(oldvp); } LIST_REMOVE(fbp, nb_hash); + TAILQ_INSERT_TAIL(&nfsbuffreeup, fbp, nb_free); + nfsbufcnt--; + nfsbufmetacnt--; + } + + FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, 0); + NFSBUFCNTCHK(1); + + lck_mtx_unlock(nfs_buf_mutex); + + while ((fbp = TAILQ_FIRST(&nfsbuffreeup))) { + TAILQ_REMOVE(&nfsbuffreeup, fbp, nb_free); /* nuke any creds */ - if (fbp->nb_rcred != NOCRED) - crfree(fbp->nb_rcred); - if (fbp->nb_wcred != NOCRED) - crfree(fbp->nb_wcred); - /* if buf was NB_META, dump buffer */ - if (ISSET(fbp->nb_flags, NB_META) && fbp->nb_data) { - FREE(fbp->nb_data, M_TEMP); + if (fbp->nb_rcred != NOCRED) { + kauth_cred_rele(fbp->nb_rcred); + fbp->nb_rcred = NOCRED; } + if (fbp->nb_wcred != NOCRED) { + kauth_cred_rele(fbp->nb_wcred); + fbp->nb_wcred = NOCRED; + } + /* if buf was NB_META, dump buffer */ + if (ISSET(fbp->nb_flags, NB_META) && fbp->nb_data) + kfree(fbp->nb_data, fbp->nb_bufsize); FREE(fbp, M_TEMP); - nfsbufcnt--; } - FSDBG(320, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, count); + } +/* + * remove a buffer from the freelist + * (must be called with nfs_buf_mutex held) + */ void nfs_buf_remfree(struct nfsbuf *bp) { @@ -247,7 +282,7 @@ nfs_buf_remfree(struct nfsbuf *bp) if (ISSET(bp->nb_flags, NB_DELWRI)) { nfsbufdelwricnt--; TAILQ_REMOVE(&nfsbufdelwri, bp, nb_free); - } else if (ISSET(bp->nb_flags, NB_META) && !ISSET(bp->nb_flags, NB_INVAL)) { + } else if (ISSET(bp->nb_flags, NB_META)) { nfsbuffreemetacnt--; TAILQ_REMOVE(&nfsbuffreemeta, bp, nb_free); } else { @@ -255,17 +290,33 @@ nfs_buf_remfree(struct nfsbuf *bp) TAILQ_REMOVE(&nfsbuffree, bp, nb_free); } bp->nb_free.tqe_next = NFSNOLIST; - NFSBUFCNTCHK(); + NFSBUFCNTCHK(1); } /* * check for existence of nfsbuf in cache */ +boolean_t +nfs_buf_is_incore(vnode_t vp, daddr64_t blkno) +{ + boolean_t rv; + lck_mtx_lock(nfs_buf_mutex); + if (nfs_buf_incore(vp, blkno)) + rv = TRUE; + else + rv = FALSE; + lck_mtx_unlock(nfs_buf_mutex); + return (rv); +} + +/* + * return incore buffer (must be called with nfs_buf_mutex held) + */ struct nfsbuf * -nfs_buf_incore(struct vnode *vp, daddr_t blkno) +nfs_buf_incore(vnode_t vp, daddr64_t blkno) { /* Search hash chain */ - struct nfsbuf * bp = NFSBUFHASH(vp, blkno)->lh_first; + struct nfsbuf * bp = NFSBUFHASH(VTONFS(vp), blkno)->lh_first; for (; bp != NULL; bp = bp->nb_hash.le_next) if (bp->nb_lblkno == blkno && bp->nb_vp == vp) { if (!ISSET(bp->nb_flags, NB_INVAL)) { @@ -285,15 +336,20 @@ nfs_buf_incore(struct vnode *vp, daddr_t blkno) * later when/if we need to write the data (again). */ int -nfs_buf_page_inval(struct vnode *vp, off_t offset) +nfs_buf_page_inval(vnode_t vp, off_t offset) { struct nfsbuf *bp; + int error = 0; + + lck_mtx_lock(nfs_buf_mutex); bp = nfs_buf_incore(vp, ubc_offtoblk(vp, offset)); if (!bp) - return (0); + goto out; FSDBG(325, bp, bp->nb_flags, bp->nb_dirtyoff, bp->nb_dirtyend); - if (ISSET(bp->nb_flags, NB_BUSY)) - return (EBUSY); + if (ISSET(bp->nb_lflags, NBL_BUSY)) { + error = EBUSY; + goto out; + } /* * If there's a dirty range in the buffer, check to * see if this page intersects with the dirty range. @@ -303,24 +359,39 @@ nfs_buf_page_inval(struct vnode *vp, off_t offset) int start = offset - NBOFF(bp); if (bp->nb_dirtyend <= start || bp->nb_dirtyoff >= (start + PAGE_SIZE)) - return (0); - return (EBUSY); + error = 0; + else + error = EBUSY; } - return (0); +out: + lck_mtx_unlock(nfs_buf_mutex); + return (error); } +/* + * set up the UPL for a buffer + * (must NOT be called with nfs_buf_mutex held) + */ int nfs_buf_upl_setup(struct nfsbuf *bp) { kern_return_t kret; upl_t upl; - int s; + int upl_flags; if (ISSET(bp->nb_flags, NB_PAGELIST)) return (0); + upl_flags = UPL_PRECIOUS; + if (! ISSET(bp->nb_flags, NB_READ)) { + /* + * We're doing a "write", so we intend to modify + * the pages we're gathering. + */ + upl_flags |= UPL_WILL_MODIFY; + } kret = ubc_create_upl(bp->nb_vp, NBOFF(bp), bp->nb_bufsize, - &upl, NULL, UPL_PRECIOUS); + &upl, NULL, upl_flags); if (kret == KERN_INVALID_ARGUMENT) { /* vm object probably doesn't exist any more */ bp->nb_pagelist = NULL; @@ -334,13 +405,15 @@ nfs_buf_upl_setup(struct nfsbuf *bp) FSDBG(538, bp, NBOFF(bp), bp->nb_bufsize, bp->nb_vp); - s = splbio(); bp->nb_pagelist = upl; SET(bp->nb_flags, NB_PAGELIST); - splx(s); return (0); } +/* + * update buffer's valid/dirty info from UBC + * (must NOT be called with nfs_buf_mutex held) + */ void nfs_buf_upl_check(struct nfsbuf *bp) { @@ -390,6 +463,10 @@ nfs_buf_upl_check(struct nfsbuf *bp) FSDBG(539, bp->nb_validoff, bp->nb_validend, bp->nb_dirtyoff, bp->nb_dirtyend); } +/* + * make sure that a buffer is mapped + * (must NOT be called with nfs_buf_mutex held) + */ static int nfs_buf_map(struct nfsbuf *bp) { @@ -465,18 +542,19 @@ nfs_buf_normalize_valid_range(struct nfsnode *np, struct nfsbuf *bp) pg++; bp->nb_validend = pg * PAGE_SIZE; /* clip to EOF */ - if (NBOFF(bp) + bp->nb_validend > np->n_size) + if (NBOFF(bp) + bp->nb_validend > (off_t)np->n_size) bp->nb_validend = np->n_size % bp->nb_bufsize; } /* * try to push out some delayed/uncommitted writes + * ("locked" indicates whether nfs_buf_mutex is already held) */ static void -nfs_buf_delwri_push(void) +nfs_buf_delwri_push(int locked) { struct nfsbuf *bp; - int i; + int i, error; if (TAILQ_EMPTY(&nfsbufdelwri)) return; @@ -487,63 +565,99 @@ nfs_buf_delwri_push(void) /* otherwise, try to do some of the work ourselves */ i = 0; + if (!locked) + lck_mtx_lock(nfs_buf_mutex); while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) { struct nfsnode *np = VTONFS(bp->nb_vp); nfs_buf_remfree(bp); + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN); + nfs_buf_refrele(bp); + if (error) + break; + if (!bp->nb_vp) { + /* buffer is no longer valid */ + nfs_buf_drop(bp); + continue; + } if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { /* put buffer at end of delwri list */ TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free); nfsbufdelwricnt++; - nfs_flushcommits(np->n_vnode, (struct proc *)0); + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); + nfs_flushcommits(np->n_vnode, NULL, 1); } else { - SET(bp->nb_flags, (NB_BUSY | NB_ASYNC)); + SET(bp->nb_flags, NB_ASYNC); + lck_mtx_unlock(nfs_buf_mutex); nfs_buf_write(bp); } i++; + lck_mtx_lock(nfs_buf_mutex); } + if (!locked) + lck_mtx_unlock(nfs_buf_mutex); } /* - * Get an nfs cache block. - * Allocate a new one if the block isn't currently in the cache - * and return the block marked busy. If the calling process is - * interrupted by a signal for an interruptible mount point, return - * NULL. + * Get an nfs buffer. + * + * Returns errno on error, 0 otherwise. + * Any buffer is returned in *bpp. + * + * If NBLK_ONLYVALID is set, only return buffer if found in cache. + * If NBLK_NOWAIT is set, don't wait for the buffer if it's marked BUSY. + * + * Check for existence of buffer in cache. + * Or attempt to reuse a buffer from one of the free lists. + * Or allocate a new buffer if we haven't already hit max allocation. + * Or wait for a free buffer. + * + * If available buffer found, prepare it, and return it. + * + * If the calling process is interrupted by a signal for + * an interruptible mount point, return EINTR. */ -struct nfsbuf * +int nfs_buf_get( - struct vnode *vp, - daddr_t blkno, + vnode_t vp, + daddr64_t blkno, int size, - struct proc *p, - int operation) + proc_t p, + int flags, + struct nfsbuf **bpp) { struct nfsnode *np = VTONFS(vp); struct nfsbuf *bp; - int i, biosize, bufsize, rv; - struct ucred *cred; + int biosize, bufsize; + kauth_cred_t cred; int slpflag = PCATCH; + int operation = (flags & NBLK_OPMASK); + int error = 0; + struct timespec ts; - FSDBG_TOP(541, vp, blkno, size, operation); + FSDBG_TOP(541, vp, blkno, size, flags); + *bpp = NULL; bufsize = size; if (bufsize > MAXBSIZE) panic("nfs_buf_get: buffer larger than MAXBSIZE requested"); - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; - if (UBCINVALID(vp) || !UBCINFOEXISTS(vp)) - operation = BLK_META; - else if (bufsize < biosize) + if (UBCINVALID(vp) || !UBCINFOEXISTS(vp)) { + operation = NBLK_META; + } else if (bufsize < biosize) { /* reg files should always have biosize blocks */ bufsize = biosize; + } - /* if BLK_WRITE, check for too many delayed/uncommitted writes */ - if ((operation == BLK_WRITE) && (nfs_nbdwrite > ((nfsbufcnt*3)/4))) { + /* if NBLK_WRITE, check for too many delayed/uncommitted writes */ + if ((operation == NBLK_WRITE) && (nfs_nbdwrite > ((nfsbufcnt*3)/4))) { FSDBG_TOP(542, vp, blkno, nfs_nbdwrite, ((nfsbufcnt*3)/4)); /* poke the delwri list */ - nfs_buf_delwri_push(); + nfs_buf_delwri_push(0); /* sleep to let other threads run... */ tsleep(&nfs_nbdwrite, PCATCH, "nfs_nbdwrite", 1); @@ -551,45 +665,36 @@ nfs_buf_get( } loop: - /* - * Obtain a lock to prevent a race condition if the - * MALLOC() below happens to block. - */ - if (nfsbufhashlock) { - while (nfsbufhashlock) { - nfsbufhashlock = -1; - tsleep(&nfsbufhashlock, PCATCH, "nfsbufget", 0); - if (nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p)) - return (NULL); - } - goto loop; - } - nfsbufhashlock = 1; + lck_mtx_lock(nfs_buf_mutex); /* check for existence of nfsbuf in cache */ - if (bp = nfs_buf_incore(vp, blkno)) { + if ((bp = nfs_buf_incore(vp, blkno))) { /* if busy, set wanted and wait */ - if (ISSET(bp->nb_flags, NB_BUSY)) { + if (ISSET(bp->nb_lflags, NBL_BUSY)) { + if (flags & NBLK_NOWAIT) { + lck_mtx_unlock(nfs_buf_mutex); + FSDBG_BOT(541, vp, blkno, bp, 0xbcbcbcbc); + return (0); + } FSDBG_TOP(543, vp, blkno, bp, bp->nb_flags); - SET(bp->nb_flags, NB_WANTED); - /* unlock hash */ - if (nfsbufhashlock < 0) { - nfsbufhashlock = 0; - wakeup(&nfsbufhashlock); - } else - nfsbufhashlock = 0; - tsleep(bp, slpflag|(PRIBIO+1), "nfsbufget", (slpflag == PCATCH) ? 0 : 2*hz); + SET(bp->nb_lflags, NBL_WANTED); + + ts.tv_sec = 2; + ts.tv_nsec = 0; + msleep(bp, nfs_buf_mutex, slpflag|(PRIBIO+1)|PDROP, + "nfsbufget", (slpflag == PCATCH) ? 0 : &ts); slpflag = 0; FSDBG_BOT(543, vp, blkno, bp, bp->nb_flags); - if (nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p)) { - FSDBG_BOT(541, vp, blkno, 0, EINTR); - return (NULL); + if ((error = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p))) { + FSDBG_BOT(541, vp, blkno, 0, error); + return (error); } goto loop; } if (bp->nb_bufsize != bufsize) panic("nfsbuf size mismatch"); - SET(bp->nb_flags, (NB_BUSY | NB_CACHE)); + SET(bp->nb_lflags, NBL_BUSY); + SET(bp->nb_flags, NB_CACHE); nfs_buf_remfree(bp); /* additional paranoia: */ if (ISSET(bp->nb_flags, NB_PAGELIST)) @@ -597,8 +702,15 @@ loop: goto buffer_setup; } + if (flags & NBLK_ONLYVALID) { + lck_mtx_unlock(nfs_buf_mutex); + FSDBG_BOT(541, vp, blkno, 0, 0x0000cace); + return (0); + } + /* * where to get a free buffer: + * - if meta and maxmeta reached, must reuse meta * - alloc new if we haven't reached min bufs * - if free lists are NOT empty * - if free list is stale, use it @@ -608,19 +720,24 @@ loop: * - start clearing out delwri list and try again */ - if ((nfsbufcnt > nfsbufmin) && + if ((operation == NBLK_META) && (nfsbufmetacnt >= nfsbufmetamax)) { + /* if we've hit max meta buffers, must reuse a meta buffer */ + bp = TAILQ_FIRST(&nfsbuffreemeta); + } else if ((nfsbufcnt > nfsbufmin) && (!TAILQ_EMPTY(&nfsbuffree) || !TAILQ_EMPTY(&nfsbuffreemeta))) { /* try to pull an nfsbuf off a free list */ struct nfsbuf *lrubp, *metabp; struct timeval now; microuptime(&now); - /* if the next LRU or META buffer is stale, use it */ + /* if the next LRU or META buffer is invalid or stale, use it */ lrubp = TAILQ_FIRST(&nfsbuffree); - if (lrubp && ((lrubp->nb_timestamp + NFSBUF_LRU_STALE) < now.tv_sec)) + if (lrubp && (!NBUFSTAMPVALID(lrubp) || + ((lrubp->nb_timestamp + NFSBUF_LRU_STALE) < now.tv_sec))) bp = lrubp; metabp = TAILQ_FIRST(&nfsbuffreemeta); - if (!bp && metabp && ((metabp->nb_timestamp + NFSBUF_META_STALE) < now.tv_sec)) + if (!bp && metabp && (!NBUFSTAMPVALID(metabp) || + ((metabp->nb_timestamp + NFSBUF_META_STALE) < now.tv_sec))) bp = metabp; if (!bp && (nfsbufcnt >= nfsbufmax)) { @@ -640,58 +757,67 @@ loop: bp = metabp; } } + } - if (bp) { - /* we have a buffer to reuse */ - FSDBG(544, vp, blkno, bp, bp->nb_flags); - nfs_buf_remfree(bp); - if (ISSET(bp->nb_flags, NB_DELWRI)) - panic("nfs_buf_get: delwri"); - SET(bp->nb_flags, NB_BUSY); - /* disassociate buffer from previous vnode */ - if (bp->nb_vp) { - struct vnode *oldvp; - if (bp->nb_vnbufs.le_next != NFSNOLIST) { - LIST_REMOVE(bp, nb_vnbufs); - bp->nb_vnbufs.le_next = NFSNOLIST; - } - oldvp = bp->nb_vp; - bp->nb_vp = NULL; - HOLDRELE(oldvp); - } - LIST_REMOVE(bp, nb_hash); - /* nuke any creds we're holding */ - cred = bp->nb_rcred; - if (cred != NOCRED) { - bp->nb_rcred = NOCRED; - crfree(cred); - } - cred = bp->nb_wcred; - if (cred != NOCRED) { - bp->nb_wcred = NOCRED; - crfree(cred); + if (bp) { + /* we have a buffer to reuse */ + FSDBG(544, vp, blkno, bp, bp->nb_flags); + nfs_buf_remfree(bp); + if (ISSET(bp->nb_flags, NB_DELWRI)) + panic("nfs_buf_get: delwri"); + SET(bp->nb_lflags, NBL_BUSY); + /* disassociate buffer from previous vnode */ + if (bp->nb_vp) { + if (bp->nb_vnbufs.le_next != NFSNOLIST) { + LIST_REMOVE(bp, nb_vnbufs); + bp->nb_vnbufs.le_next = NFSNOLIST; } - /* if buf will no longer be NB_META, dump old buffer */ - if ((operation != BLK_META) && - ISSET(bp->nb_flags, NB_META) && bp->nb_data) { - FREE(bp->nb_data, M_TEMP); + bp->nb_vp = NULL; + } + LIST_REMOVE(bp, nb_hash); + /* nuke any creds we're holding */ + cred = bp->nb_rcred; + if (cred != NOCRED) { + bp->nb_rcred = NOCRED; + kauth_cred_rele(cred); + } + cred = bp->nb_wcred; + if (cred != NOCRED) { + bp->nb_wcred = NOCRED; + kauth_cred_rele(cred); + } + /* if buf will no longer be NB_META, dump old buffer */ + if (operation == NBLK_META) { + if (!ISSET(bp->nb_flags, NB_META)) + nfsbufmetacnt++; + } else if (ISSET(bp->nb_flags, NB_META)) { + if (bp->nb_data) { + kfree(bp->nb_data, bp->nb_bufsize); bp->nb_data = NULL; } - /* re-init buf fields */ - bp->nb_error = 0; - bp->nb_validoff = bp->nb_validend = -1; - bp->nb_dirtyoff = bp->nb_dirtyend = 0; - bp->nb_valid = 0; - bp->nb_dirty = 0; + nfsbufmetacnt--; } - } - - if (!bp) { - if (nfsbufcnt < nfsbufmax) { + /* re-init buf fields */ + bp->nb_error = 0; + bp->nb_validoff = bp->nb_validend = -1; + bp->nb_dirtyoff = bp->nb_dirtyend = 0; + bp->nb_valid = 0; + bp->nb_dirty = 0; + } else { + /* no buffer to reuse */ + if ((nfsbufcnt < nfsbufmax) && + ((operation != NBLK_META) || (nfsbufmetacnt < nfsbufmetamax))) { /* just alloc a new one */ MALLOC(bp, struct nfsbuf *, sizeof(struct nfsbuf), M_TEMP, M_WAITOK); + if (!bp) { + lck_mtx_unlock(nfs_buf_mutex); + FSDBG_BOT(541, vp, blkno, 0, error); + return (ENOMEM); + } nfsbufcnt++; - NFSBUFCNTCHK(); + if (operation == NBLK_META) + nfsbufmetacnt++; + NFSBUFCNTCHK(1); /* init nfsbuf */ bzero(bp, sizeof(*bp)); bp->nb_free.tqe_next = NFSNOLIST; @@ -700,46 +826,42 @@ loop: } else { /* too many bufs... wait for buffers to free up */ FSDBG_TOP(546, vp, blkno, nfsbufcnt, nfsbufmax); - /* unlock hash */ - if (nfsbufhashlock < 0) { - nfsbufhashlock = 0; - wakeup(&nfsbufhashlock); - } else - nfsbufhashlock = 0; /* poke the delwri list */ - nfs_buf_delwri_push(); + nfs_buf_delwri_push(1); nfsneedbuffer = 1; - tsleep(&nfsneedbuffer, PCATCH, "nfsbufget", 0); + msleep(&nfsneedbuffer, nfs_buf_mutex, PCATCH|PDROP, + "nfsbufget", 0); FSDBG_BOT(546, vp, blkno, nfsbufcnt, nfsbufmax); - if (nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p)) { - FSDBG_BOT(541, vp, blkno, 0, EINTR); - return (NULL); + if ((error = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p))) { + FSDBG_BOT(541, vp, blkno, 0, error); + return (error); } goto loop; } } -setup_nfsbuf: - /* setup nfsbuf */ - bp->nb_flags = NB_BUSY; + bp->nb_lflags = NBL_BUSY; + bp->nb_flags = 0; bp->nb_lblkno = blkno; /* insert buf in hash */ - LIST_INSERT_HEAD(NFSBUFHASH(vp, blkno), bp, nb_hash); + LIST_INSERT_HEAD(NFSBUFHASH(np, blkno), bp, nb_hash); /* associate buffer with new vnode */ - VHOLD(vp); bp->nb_vp = vp; LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs); buffer_setup: + /* unlock hash */ + lck_mtx_unlock(nfs_buf_mutex); + switch (operation) { - case BLK_META: + case NBLK_META: SET(bp->nb_flags, NB_META); if ((bp->nb_bufsize != bufsize) && bp->nb_data) { - FREE(bp->nb_data, M_TEMP); + kfree(bp->nb_data, bp->nb_bufsize); bp->nb_data = NULL; bp->nb_validoff = bp->nb_validend = -1; bp->nb_dirtyoff = bp->nb_dirtyend = 0; @@ -748,37 +870,62 @@ buffer_setup: CLR(bp->nb_flags, NB_CACHE); } if (!bp->nb_data) - MALLOC(bp->nb_data, caddr_t, bufsize, M_TEMP, M_WAITOK); - if (!bp->nb_data) - panic("nfs_buf_get: null nb_data"); + bp->nb_data = kalloc(bufsize); + if (!bp->nb_data) { + /* Ack! couldn't allocate the data buffer! */ + /* cleanup buffer and return error */ + lck_mtx_lock(nfs_buf_mutex); + LIST_REMOVE(bp, nb_vnbufs); + bp->nb_vnbufs.le_next = NFSNOLIST; + bp->nb_vp = NULL; + /* invalidate usage timestamp to allow immediate freeing */ + NBUFSTAMPINVALIDATE(bp); + if (bp->nb_free.tqe_next != NFSNOLIST) + panic("nfsbuf on freelist"); + TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free); + nfsbuffreecnt++; + lck_mtx_unlock(nfs_buf_mutex); + FSDBG_BOT(541, vp, blkno, 0xb00, ENOMEM); + return (ENOMEM); + } bp->nb_bufsize = bufsize; break; - case BLK_READ: - case BLK_WRITE: + case NBLK_READ: + case NBLK_WRITE: + /* + * Set or clear NB_READ now to let the UPL subsystem know + * if we intend to modify the pages or not. + */ + if (operation == NBLK_READ) { + SET(bp->nb_flags, NB_READ); + } else { + CLR(bp->nb_flags, NB_READ); + } if (bufsize < PAGE_SIZE) bufsize = PAGE_SIZE; bp->nb_bufsize = bufsize; bp->nb_validoff = bp->nb_validend = -1; - if (UBCISVALID(vp)) { + if (UBCINFOEXISTS(vp)) { /* setup upl */ if (nfs_buf_upl_setup(bp)) { /* unable to create upl */ /* vm object must no longer exist */ - /* cleanup buffer and return NULL */ + /* cleanup buffer and return error */ + lck_mtx_lock(nfs_buf_mutex); LIST_REMOVE(bp, nb_vnbufs); bp->nb_vnbufs.le_next = NFSNOLIST; bp->nb_vp = NULL; - /* clear usage timestamp to allow immediate freeing */ - bp->nb_timestamp = 0; - HOLDRELE(vp); + /* invalidate usage timestamp to allow immediate freeing */ + NBUFSTAMPINVALIDATE(bp); if (bp->nb_free.tqe_next != NFSNOLIST) panic("nfsbuf on freelist"); TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free); nfsbuffreecnt++; + lck_mtx_unlock(nfs_buf_mutex); FSDBG_BOT(541, vp, blkno, 0x2bc, EIO); - return (NULL); + return (EIO); } nfs_buf_upl_check(bp); } @@ -788,23 +935,19 @@ buffer_setup: panic("nfs_buf_get: %d unknown operation", operation); } - /* unlock hash */ - if (nfsbufhashlock < 0) { - nfsbufhashlock = 0; - wakeup(&nfsbufhashlock); - } else - nfsbufhashlock = 0; + *bpp = bp; FSDBG_BOT(541, vp, blkno, bp, bp->nb_flags); - return (bp); + return (0); } void nfs_buf_release(struct nfsbuf *bp, int freeup) { - struct vnode *vp = bp->nb_vp; + vnode_t vp = bp->nb_vp; struct timeval now; + int wakeup_needbuffer, wakeup_buffer, wakeup_nbdwrite; FSDBG_TOP(548, bp, NBOFF(bp), bp->nb_flags, bp->nb_data); FSDBG(548, bp->nb_validoff, bp->nb_validend, bp->nb_dirtyoff, bp->nb_dirtyend); @@ -830,7 +973,7 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) panic("ubc_upl_unmap failed"); bp->nb_data = NULL; } - if (bp->nb_flags & (NB_ERROR | NB_INVAL)) { + if (bp->nb_flags & (NB_ERROR | NB_INVAL | NB_NOCACHE)) { if (bp->nb_flags & (NB_READ | NB_INVAL)) upl_flags = UPL_ABORT_DUMP_PAGES; else @@ -858,28 +1001,34 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) } pagelist_cleanup_done: /* was this the last buffer in the file? */ - if (NBOFF(bp) + bp->nb_bufsize > VTONFS(vp)->n_size) { + if (NBOFF(bp) + bp->nb_bufsize > (off_t)(VTONFS(vp)->n_size)) { /* if so, invalidate all pages of last buffer past EOF */ - int biosize = vp->v_mount->mnt_stat.f_iosize; - off_t off, size; - off = trunc_page_64(VTONFS(vp)->n_size) + PAGE_SIZE_64; - size = trunc_page_64(NBOFF(bp) + biosize) - off; - if (size) - ubc_invalidate(vp, off, size); + int biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + off_t start, end; + start = trunc_page_64(VTONFS(vp)->n_size) + PAGE_SIZE_64; + end = trunc_page_64(NBOFF(bp) + biosize); + if (end > start) { + if (!(rv = ubc_sync_range(vp, start, end, UBC_INVALIDATE))) + printf("nfs_buf_release(): ubc_sync_range failed!\n"); + } } CLR(bp->nb_flags, NB_PAGELIST); bp->nb_pagelist = NULL; } + lck_mtx_lock(nfs_buf_mutex); + + wakeup_needbuffer = wakeup_buffer = wakeup_nbdwrite = 0; + /* Wake up any processes waiting for any buffer to become free. */ if (nfsneedbuffer) { nfsneedbuffer = 0; - wakeup(&nfsneedbuffer); + wakeup_needbuffer = 1; } /* Wake up any processes waiting for _this_ buffer to become free. */ - if (ISSET(bp->nb_flags, NB_WANTED)) { - CLR(bp->nb_flags, NB_WANTED); - wakeup(bp); + if (ISSET(bp->nb_lflags, NBL_WANTED)) { + CLR(bp->nb_lflags, NBL_WANTED); + wakeup_buffer = 1; } /* If it's not cacheable, or an error, mark it invalid. */ @@ -893,31 +1042,34 @@ pagelist_cleanup_done: bp->nb_vnbufs.le_next = NFSNOLIST; } bp->nb_vp = NULL; - HOLDRELE(vp); /* if this was a delayed write, wakeup anyone */ /* waiting for delayed writes to complete */ if (ISSET(bp->nb_flags, NB_DELWRI)) { CLR(bp->nb_flags, NB_DELWRI); - nfs_nbdwrite--; - NFSBUFCNTCHK(); - wakeup((caddr_t)&nfs_nbdwrite); + OSAddAtomic(-1, (SInt32*)&nfs_nbdwrite); + NFSBUFCNTCHK(1); + wakeup_nbdwrite = 1; } - /* clear usage timestamp to allow immediate freeing */ - bp->nb_timestamp = 0; + /* invalidate usage timestamp to allow immediate freeing */ + NBUFSTAMPINVALIDATE(bp); /* put buffer at head of free list */ if (bp->nb_free.tqe_next != NFSNOLIST) panic("nfsbuf on freelist"); SET(bp->nb_flags, NB_INVAL); - TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free); - nfsbuffreecnt++; - if (freeup) - NFS_BUF_FREEUP(); + if (ISSET(bp->nb_flags, NB_META)) { + TAILQ_INSERT_HEAD(&nfsbuffreemeta, bp, nb_free); + nfsbuffreemetacnt++; + } else { + TAILQ_INSERT_HEAD(&nfsbuffree, bp, nb_free); + nfsbuffreecnt++; + } } else if (ISSET(bp->nb_flags, NB_DELWRI)) { /* put buffer at end of delwri list */ if (bp->nb_free.tqe_next != NFSNOLIST) panic("nfsbuf on freelist"); TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free); nfsbufdelwricnt++; + freeup = 0; } else { /* update usage timestamp */ microuptime(&now); @@ -932,16 +1084,26 @@ pagelist_cleanup_done: TAILQ_INSERT_TAIL(&nfsbuffree, bp, nb_free); nfsbuffreecnt++; } - if (freeup) - NFS_BUF_FREEUP(); } - NFSBUFCNTCHK(); + NFSBUFCNTCHK(1); /* Unlock the buffer. */ - CLR(bp->nb_flags, (NB_ASYNC | NB_BUSY | NB_NOCACHE | NB_STABLE | NB_IOD)); + CLR(bp->nb_flags, (NB_ASYNC | NB_NOCACHE | NB_STABLE | NB_IOD)); + CLR(bp->nb_lflags, NBL_BUSY); FSDBG_BOT(548, bp, NBOFF(bp), bp->nb_flags, bp->nb_data); + + lck_mtx_unlock(nfs_buf_mutex); + + if (wakeup_needbuffer) + wakeup(&nfsneedbuffer); + if (wakeup_buffer) + wakeup(bp); + if (wakeup_nbdwrite) + wakeup(&nfs_nbdwrite); + if (freeup) + NFS_BUF_FREEUP(); } /* @@ -953,8 +1115,12 @@ nfs_buf_iowait(struct nfsbuf *bp) { FSDBG_TOP(549, bp, NBOFF(bp), bp->nb_flags, bp->nb_error); + lck_mtx_lock(nfs_buf_mutex); + while (!ISSET(bp->nb_flags, NB_DONE)) - tsleep(bp, PRIBIO + 1, "nfs_buf_iowait", 0); + msleep(bp, nfs_buf_mutex, PRIBIO + 1, "nfs_buf_iowait", 0); + + lck_mtx_unlock(nfs_buf_mutex); FSDBG_BOT(549, bp, NBOFF(bp), bp->nb_flags, bp->nb_error); @@ -973,13 +1139,11 @@ nfs_buf_iowait(struct nfsbuf *bp) void nfs_buf_iodone(struct nfsbuf *bp) { - struct vnode *vp; FSDBG_TOP(550, bp, NBOFF(bp), bp->nb_flags, bp->nb_error); if (ISSET(bp->nb_flags, NB_DONE)) panic("nfs_buf_iodone already"); - SET(bp->nb_flags, NB_DONE); /* note that it's done */ /* * I/O was done, so don't believe * the DIRTY state from VM anymore @@ -988,21 +1152,20 @@ nfs_buf_iodone(struct nfsbuf *bp) if (!ISSET(bp->nb_flags, NB_READ)) { CLR(bp->nb_flags, NB_WRITEINPROG); - vpwakeup(bp->nb_vp); - } - - /* Wakeup the throttled write operations as needed */ - vp = bp->nb_vp; - if (vp && (vp->v_flag & VTHROTTLED) - && (vp->v_numoutput <= (NFSBUFWRITE_THROTTLE / 3))) { - vp->v_flag &= ~VTHROTTLED; - wakeup((caddr_t)&vp->v_numoutput); + /* + * vnode_writedone() takes care of waking up + * any throttled write operations + */ + vnode_writedone(bp->nb_vp); } - - if (ISSET(bp->nb_flags, NB_ASYNC)) /* if async, release it */ + if (ISSET(bp->nb_flags, NB_ASYNC)) { /* if async, release it */ + SET(bp->nb_flags, NB_DONE); /* note that it's done */ nfs_buf_release(bp, 1); - else { /* or just wakeup the buffer */ - CLR(bp->nb_flags, NB_WANTED); + } else { /* or just wakeup the buffer */ + lck_mtx_lock(nfs_buf_mutex); + SET(bp->nb_flags, NB_DONE); /* note that it's done */ + CLR(bp->nb_lflags, NBL_WANTED); + lck_mtx_unlock(nfs_buf_mutex); wakeup(bp); } @@ -1010,10 +1173,9 @@ nfs_buf_iodone(struct nfsbuf *bp) } void -nfs_buf_write_delayed(struct nfsbuf *bp) +nfs_buf_write_delayed(struct nfsbuf *bp, proc_t p) { - struct proc *p = current_proc(); - struct vnode *vp = bp->nb_vp; + vnode_t vp = bp->nb_vp; FSDBG_TOP(551, bp, NBOFF(bp), bp->nb_flags, 0); FSDBG(551, bp, bp->nb_dirtyoff, bp->nb_dirtyend, bp->nb_dirty); @@ -1028,22 +1190,21 @@ nfs_buf_write_delayed(struct nfsbuf *bp) SET(bp->nb_flags, NB_DELWRI); if (p && p->p_stats) p->p_stats->p_ru.ru_oublock++; /* XXX */ - nfs_nbdwrite++; - NFSBUFCNTCHK(); + OSAddAtomic(1, (SInt32*)&nfs_nbdwrite); + NFSBUFCNTCHK(0); /* move to dirty list */ + lck_mtx_lock(nfs_buf_mutex); if (bp->nb_vnbufs.le_next != NFSNOLIST) LIST_REMOVE(bp, nb_vnbufs); LIST_INSERT_HEAD(&VTONFS(vp)->n_dirtyblkhd, bp, nb_vnbufs); + lck_mtx_unlock(nfs_buf_mutex); } /* * If the vnode has "too many" write operations in progress * wait for them to finish the IO */ - while (vp->v_numoutput >= NFSBUFWRITE_THROTTLE) { - vp->v_flag |= VTHROTTLED; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "nfs_buf_write_delayed", 0); - } + (void)vnode_waitforwrites(vp, VNODE_ASYNC_THROTTLE, 0, 0, "nfs_buf_write_delayed"); /* * If we have too many delayed write buffers, @@ -1068,40 +1229,187 @@ nfs_buf_write_delayed(struct nfsbuf *bp) return; } +/* + * add a reference to a buffer so it doesn't disappear while being used + * (must be called with nfs_buf_mutex held) + */ +void +nfs_buf_refget(struct nfsbuf *bp) +{ + bp->nb_refs++; +} +/* + * release a reference on a buffer + * (must be called with nfs_buf_mutex held) + */ +void +nfs_buf_refrele(struct nfsbuf *bp) +{ + bp->nb_refs--; +} + +/* + * mark a particular buffer as BUSY + * (must be called with nfs_buf_mutex held) + */ +errno_t +nfs_buf_acquire(struct nfsbuf *bp, int flags, int slpflag, int slptimeo) +{ + errno_t error; + struct timespec ts; + + if (ISSET(bp->nb_lflags, NBL_BUSY)) { + /* + * since the mutex_lock may block, the buffer + * may become BUSY, so we need to recheck for + * a NOWAIT request + */ + if (flags & NBAC_NOWAIT) + return (EBUSY); + SET(bp->nb_lflags, NBL_WANTED); + + ts.tv_sec = (slptimeo/100); + /* the hz value is 100; which leads to 10ms */ + ts.tv_nsec = (slptimeo % 100) * 10 * NSEC_PER_USEC * 1000; + + error = msleep(bp, nfs_buf_mutex, slpflag | (PRIBIO + 1), + "nfs_buf_acquire", &ts); + if (error) + return (error); + return (EAGAIN); + } + if (flags & NBAC_REMOVE) + nfs_buf_remfree(bp); + SET(bp->nb_lflags, NBL_BUSY); + + return (0); +} + +/* + * simply drop the BUSY status of a buffer + * (must be called with nfs_buf_mutex held) + */ +void +nfs_buf_drop(struct nfsbuf *bp) +{ + int need_wakeup = 0; + + if (!ISSET(bp->nb_lflags, NBL_BUSY)) + panic("nfs_buf_drop: buffer not busy!"); + if (ISSET(bp->nb_lflags, NBL_WANTED)) { + /* + * delay the actual wakeup until after we + * clear NBL_BUSY and we've dropped nfs_buf_mutex + */ + need_wakeup = 1; + } + /* Unlock the buffer. */ + CLR(bp->nb_lflags, (NBL_BUSY | NBL_WANTED)); + + if (need_wakeup) + wakeup(bp); +} + +/* + * prepare for iterating over an nfsnode's buffer list + * this lock protects the queue manipulation + * (must be called with nfs_buf_mutex held) + */ +int +nfs_buf_iterprepare(struct nfsnode *np, struct nfsbuflists *iterheadp, int flags) +{ + struct nfsbuflists *listheadp; + + if (flags & NBI_DIRTY) + listheadp = &np->n_dirtyblkhd; + else + listheadp = &np->n_cleanblkhd; + + if ((flags & NBI_NOWAIT) && (np->n_bufiterflags & NBI_ITER)) { + LIST_INIT(iterheadp); + return(EWOULDBLOCK); + } + + while (np->n_bufiterflags & NBI_ITER) { + np->n_bufiterflags |= NBI_ITERWANT; + msleep(&np->n_bufiterflags, nfs_buf_mutex, 0, "nfs_buf_iterprepare", 0); + } + if (LIST_EMPTY(listheadp)) { + LIST_INIT(iterheadp); + return(EINVAL); + } + np->n_bufiterflags |= NBI_ITER; + + iterheadp->lh_first = listheadp->lh_first; + listheadp->lh_first->nb_vnbufs.le_prev = &iterheadp->lh_first; + LIST_INIT(listheadp); + + return(0); +} + +/* + * cleanup after iterating over an nfsnode's buffer list + * this lock protects the queue manipulation + * (must be called with nfs_buf_mutex held) + */ +void +nfs_buf_itercomplete(struct nfsnode *np, struct nfsbuflists *iterheadp, int flags) +{ + struct nfsbuflists * listheadp; + struct nfsbuf *bp; + + if (flags & NBI_DIRTY) + listheadp = &np->n_dirtyblkhd; + else + listheadp = &np->n_cleanblkhd; + + while (!LIST_EMPTY(iterheadp)) { + bp = LIST_FIRST(iterheadp); + LIST_REMOVE(bp, nb_vnbufs); + LIST_INSERT_HEAD(listheadp, bp, nb_vnbufs); + } + + np->n_bufiterflags &= ~NBI_ITER; + if (np->n_bufiterflags & NBI_ITERWANT) { + np->n_bufiterflags &= ~NBI_ITERWANT; + wakeup(&np->n_bufiterflags); + } +} + /* * Vnode op for read using bio * Any similarity to readip() is purely coincidental */ int -nfs_bioread(vp, uio, ioflag, cred, getpages) - register struct vnode *vp; - register struct uio *uio; - int ioflag; - struct ucred *cred; - int getpages; // XXX unused! +nfs_bioread( + vnode_t vp, + struct uio *uio, + __unused int ioflag, + kauth_cred_t cred, + proc_t p) { struct nfsnode *np = VTONFS(vp); - int biosize, i; + int biosize; off_t diff; - struct nfsbuf *bp = 0, *rabp; - struct vattr vattr; - struct proc *p; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - daddr_t lbn, rabn, lastrabn = -1; + struct nfsbuf *bp = NULL, *rabp; + struct nfs_vattr nvattr; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + daddr64_t lbn, rabn, lastrabn = -1, tlbn; int bufsize; int nra, error = 0, n = 0, on = 0; - int operation = (getpages? BLK_PAGEIN : BLK_READ); caddr_t dp; - struct dirent *direntp; + struct dirent *direntp = NULL; + enum vtype vtype; + int nocachereadahead = 0; - FSDBG_TOP(514, vp, uio->uio_offset, uio->uio_resid, ioflag); + FSDBG_TOP(514, vp, uio->uio_offset, uio_uio_resid(uio), ioflag); #if DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("nfs_read mode"); #endif - if (uio->uio_resid == 0) { + if (uio_uio_resid(uio) == 0) { FSDBG_BOT(514, vp, 0xd1e0001, 0, 0); return (0); } @@ -1109,16 +1417,15 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) FSDBG_BOT(514, vp, 0xd1e0002, 0, EINVAL); return (EINVAL); } - p = uio->uio_procp; - if ((nmp->nm_flag & NFSMNT_NFSV3) && - !(nmp->nm_state & NFSSTA_GOTFSINFO)) - (void)nfs_fsinfo(nmp, vp, cred, p); - biosize = vp->v_mount->mnt_stat.f_iosize; + + if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) + nfs_fsinfo(nmp, vp, cred, p); + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; + vtype = vnode_vtype(vp); /* * For nfs, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. - * For nqnfs, full cache consistency is maintained within the loop. * For nfs: * If the file's modify time on the server has changed since the * last read rpc or you have written to the file, @@ -1128,126 +1435,121 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) * attributes. * NB: This implies that cache data can be read when up to * NFS_MAXATTRTIMEO seconds out of date. If you find that you need - * current attributes this could be forced by setting n_xid to 0 - * before the VOP_GETATTR() call. + * current attributes this could be forced by setting calling + * NATTRINVALIDATE() before the nfs_getattr() call. */ - if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { - if (np->n_flag & NMODIFIED) { - if (vp->v_type != VREG) { - if (vp->v_type != VDIR) - panic("nfs: bioread, not dir"); - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(514, vp, 0xd1e0003, 0, error); - return (error); - } - } - np->n_xid = 0; - error = VOP_GETATTR(vp, &vattr, cred, p); + if (np->n_flag & NNEEDINVALIDATE) { + np->n_flag &= ~NNEEDINVALIDATE; + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cred, p, 1); + } + if (np->n_flag & NMODIFIED) { + if (vtype != VREG) { + if (vtype != VDIR) + panic("nfs: bioread, not dir"); + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { - FSDBG_BOT(514, vp, 0xd1e0004, 0, error); + FSDBG_BOT(514, vp, 0xd1e0003, 0, error); return (error); } - if (vp->v_type == VDIR) { - /* if directory changed, purge any name cache entries */ - if (np->n_ncmtime != vattr.va_mtime.tv_sec) + } + NATTRINVALIDATE(np); + error = nfs_getattr(vp, &nvattr, cred, p); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0004, 0, error); + return (error); + } + if (vtype == VDIR) { + /* if directory changed, purge any name cache entries */ + if (nfstimespeccmp(&np->n_ncmtime, &nvattr.nva_mtime, !=)) + cache_purge(vp); + np->n_ncmtime = nvattr.nva_mtime; + } + np->n_mtime = nvattr.nva_mtime; + } else { + error = nfs_getattr(vp, &nvattr, cred, p); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0005, 0, error); + return (error); + } + if (nfstimespeccmp(&np->n_mtime, &nvattr.nva_mtime, !=)) { + if (vtype == VDIR) { + nfs_invaldir(vp); + /* purge name cache entries */ + if (nfstimespeccmp(&np->n_ncmtime, &nvattr.nva_mtime, !=)) cache_purge(vp); - np->n_ncmtime = vattr.va_mtime.tv_sec; } - np->n_mtime = vattr.va_mtime.tv_sec; - } else { - error = VOP_GETATTR(vp, &vattr, cred, p); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { - FSDBG_BOT(514, vp, 0xd1e0005, 0, error); + FSDBG_BOT(514, vp, 0xd1e0006, 0, error); return (error); } - if (np->n_mtime != vattr.va_mtime.tv_sec) { - if (vp->v_type == VDIR) { - nfs_invaldir(vp); - /* purge name cache entries */ - if (np->n_ncmtime != vattr.va_mtime.tv_sec) - cache_purge(vp); - } + if (vtype == VDIR) + np->n_ncmtime = nvattr.nva_mtime; + np->n_mtime = nvattr.nva_mtime; + } + } + + if (vnode_isnocache(vp)) { + if (!(np->n_flag & NNOCACHE)) { + if (NVALIDBUFS(np)) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { - FSDBG_BOT(514, vp, 0xd1e0006, 0, error); + FSDBG_BOT(514, vp, 0xd1e000a, 0, error); return (error); } - if (vp->v_type == VDIR) - np->n_ncmtime = vattr.va_mtime.tv_sec; - np->n_mtime = vattr.va_mtime.tv_sec; } + np->n_flag |= NNOCACHE; } + } else if (np->n_flag & NNOCACHE) { + np->n_flag &= ~NNOCACHE; } - do { - /* - * Get a valid lease. If cached data is stale, flush it. - */ - if (nmp->nm_flag & NFSMNT_NQNFS) { - if (NQNFS_CKINVALID(vp, np, ND_READ)) { - do { - error = nqnfs_getlease(vp, ND_READ, cred, p); - } while (error == NQNFS_EXPIRED); - if (error) { - FSDBG_BOT(514, vp, 0xd1e0007, 0, error); - return (error); - } - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE) || - ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { - if (vp->v_type == VDIR) - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(514, vp, 0xd1e0008, 0, error); - return (error); - } - np->n_brev = np->n_lrev; - } - } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(514, vp, 0xd1e0009, 0, error); - return (error); - } - } - } - if ((np->n_flag & NQNFSNONCACHE) || (vp->v_flag & VNOCACHE_DATA)) { - if ((vp->v_flag & VNOCACHE_DATA) && - (np->n_dirtyblkhd.lh_first || np->n_cleanblkhd.lh_first)) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(514, vp, 0xd1e000a, 0, error); + do { + if (np->n_flag & NNOCACHE) { + switch (vtype) { + case VREG: + /* + * If we have only a block or so to read, + * just do the rpc directly. + * If we have a couple blocks or more to read, + * then we'll take advantage of readahead within + * this loop to try to fetch all the data in parallel + */ + if (!nocachereadahead && (uio_uio_resid(uio) < 2*biosize)) { + error = nfs_readrpc(vp, uio, cred, p); + FSDBG_BOT(514, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } - } - switch (vp->v_type) { - case VREG: - error = nfs_readrpc(vp, uio, cred); - FSDBG_BOT(514, vp, uio->uio_offset, uio->uio_resid, error); - return (error); + nocachereadahead = 1; + break; case VLNK: - error = nfs_readlinkrpc(vp, uio, cred); - FSDBG_BOT(514, vp, uio->uio_offset, uio->uio_resid, error); + error = nfs_readlinkrpc(vp, uio, cred, p); + FSDBG_BOT(514, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); case VDIR: break; default: - printf(" NQNFSNONCACHE: type %x unexpected\n", vp->v_type); + printf(" NFSNOCACHE: type %x unexpected\n", vtype); }; } - switch (vp->v_type) { + switch (vtype) { case VREG: lbn = uio->uio_offset / biosize; /* * Copy directly from any cached pages without grabbing the bufs. + * + * Note: for "nocache" reads, we don't copy directly from UBC + * because any cached pages will be for readahead buffers that + * need to be invalidated anyway before we finish this request. */ - if (uio->uio_segflg == UIO_USERSPACE) { - int io_resid = uio->uio_resid; + if (!(np->n_flag & NNOCACHE) && + (uio->uio_segflg == UIO_USERSPACE32 || + uio->uio_segflg == UIO_USERSPACE64 || + uio->uio_segflg == UIO_USERSPACE)) { + // LP64todo - fix this! + int io_resid = uio_uio_resid(uio); diff = np->n_size - uio->uio_offset; if (diff < io_resid) io_resid = diff; @@ -1260,7 +1562,7 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) } /* count any biocache reads that we just copied directly */ if (lbn != uio->uio_offset / biosize) { - nfsstats.biocache_reads += (uio->uio_offset / biosize) - lbn; + OSAddAtomic((uio->uio_offset / biosize) - lbn, (SInt32*)&nfsstats.biocache_reads); FSDBG(514, vp, 0xcacefeed, uio->uio_offset, error); } } @@ -1280,16 +1582,23 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) continue; } lastrabn = rabn; - if ((off_t)rabn * biosize >= np->n_size) + if ((off_t)rabn * biosize >= (off_t)np->n_size) + break; + if ((np->n_flag & NNOCACHE) && + (((off_t)rabn * biosize) >= (uio->uio_offset + uio_uio_resid(uio)))) + /* for uncached readahead, don't go beyond end of request */ break; /* check if block exists and is valid. */ - rabp = nfs_buf_incore(vp, rabn); - if (rabp && nfs_buf_upl_valid_range(rabp, 0, rabp->nb_bufsize)) + error = nfs_buf_get(vp, rabn, biosize, p, NBLK_READ|NBLK_NOWAIT, &rabp); + if (error) { + FSDBG_BOT(514, vp, 0xd1e000b, 1, error); + return (error); + } + if (!rabp) + continue; + if (nfs_buf_upl_valid_range(rabp, 0, rabp->nb_bufsize)) { + nfs_buf_release(rabp, 1); continue; - rabp = nfs_buf_get(vp, rabn, biosize, p, operation); - if (!rabp) { - FSDBG_BOT(514, vp, 0xd1e000b, 0, EINTR); - return (EINTR); } if (!ISSET(rabp->nb_flags, (NB_CACHE|NB_DELWRI))) { SET(rabp->nb_flags, (NB_READ|NB_ASYNC)); @@ -1303,12 +1612,12 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) } } - if ((uio->uio_resid <= 0) || (uio->uio_offset >= np->n_size)) { - FSDBG_BOT(514, vp, uio->uio_offset, uio->uio_resid, 0xaaaaaaaa); + if ((uio_uio_resid(uio) <= 0) || (uio->uio_offset >= (off_t)np->n_size)) { + FSDBG_BOT(514, vp, uio->uio_offset, uio_uio_resid(uio), 0xaaaaaaaa); return (0); } - nfsstats.biocache_reads++; + OSAddAtomic(1, (SInt32*)&nfsstats.biocache_reads); /* * If the block is in the cache and has the required data @@ -1318,13 +1627,14 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) */ again: bufsize = biosize; - n = min((unsigned)(bufsize - on), uio->uio_resid); + // LP64todo - fix this! + n = min((unsigned)(bufsize - on), uio_uio_resid(uio)); diff = np->n_size - uio->uio_offset; if (diff < n) n = diff; - bp = nfs_buf_get(vp, lbn, bufsize, p, operation); - if (!bp) { + error = nfs_buf_get(vp, lbn, bufsize, p, NBLK_READ, &bp); + if (error) { FSDBG_BOT(514, vp, 0xd1e000c, 0, EINTR); return (EINTR); } @@ -1350,8 +1660,8 @@ again: if (bp->nb_validoff < 0) { /* valid range isn't set up, so */ /* set it to what we know is valid */ - bp->nb_validoff = trunc_page_32(on); - bp->nb_validend = round_page_32(on+n); + bp->nb_validoff = trunc_page(on); + bp->nb_validend = round_page(on+n); nfs_buf_normalize_valid_range(np, bp); } goto buffer_ready; @@ -1363,11 +1673,10 @@ again: /* so write the buffer out and try again */ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL)); SET(bp->nb_flags, NB_ASYNC); - /* - * NFS has embedded ucred so crhold() risks zone corruption - */ - if (bp->nb_wcred == NOCRED) - bp->nb_wcred = crdup(cred); + if (bp->nb_wcred == NOCRED) { + kauth_cred_ref(cred); + bp->nb_wcred = cred; + } error = nfs_buf_write(bp); if (error) { FSDBG_BOT(514, vp, 0xd1e000d, 0, error); @@ -1382,20 +1691,22 @@ again: bp->nb_valid = 0; } else { /* read the page range in */ - struct iovec iov; - struct uio auio; - auio.uio_iov = &iov; - auio.uio_iovcnt = 1; - auio.uio_offset = NBOFF(bp) + firstpg * PAGE_SIZE_64; - auio.uio_resid = (lastpg - firstpg + 1) * PAGE_SIZE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_procp = p; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + NFS_BUF_MAP(bp); - iov.iov_base = bp->nb_data + firstpg * PAGE_SIZE; - iov.iov_len = auio.uio_resid; - error = nfs_readrpc(vp, &auio, cred); + auio = uio_createwithbuffer(1, (NBOFF(bp) + firstpg * PAGE_SIZE_64), + UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + error = ENOMEM; + } else { + uio_addiov(auio, CAST_USER_ADDR_T((bp->nb_data + firstpg * PAGE_SIZE)), + ((lastpg - firstpg + 1) * PAGE_SIZE)); + error = nfs_readrpc(vp, auio, cred, p); + } if (error) { + if (np->n_flag & NNOCACHE) + SET(bp->nb_flags, NB_NOCACHE); nfs_buf_release(bp, 1); FSDBG_BOT(514, vp, 0xd1e000e, 0, error); return (error); @@ -1404,10 +1715,10 @@ again: bp->nb_validoff = trunc_page_32(on); bp->nb_validend = round_page_32(on+n); nfs_buf_normalize_valid_range(np, bp); - if (auio.uio_resid > 0) { + if (uio_resid(auio) > 0) { /* if short read, must have hit EOF, */ /* so zero the rest of the range */ - bzero(iov.iov_base, auio.uio_resid); + bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio)); } /* mark the pages (successfully read) as valid */ for (pg=firstpg; pg <= lastpg; pg++) @@ -1420,13 +1731,14 @@ again: CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL)); error = nfs_doio(bp, cred, p); if (error) { + if (np->n_flag & NNOCACHE) + SET(bp->nb_flags, NB_NOCACHE); nfs_buf_release(bp, 1); FSDBG_BOT(514, vp, 0xd1e000f, 0, error); return (error); } } buffer_ready: - vp->v_lastr = lbn; /* validate read range against valid range and clip */ if (bp->nb_validend > 0) { diff = (on >= bp->nb_validend) ? 0 : (bp->nb_validend - on); @@ -1437,11 +1749,11 @@ buffer_ready: NFS_BUF_MAP(bp); break; case VLNK: - nfsstats.biocache_readlinks++; - bp = nfs_buf_get(vp, (daddr_t)0, NFS_MAXPATHLEN, p, operation); - if (!bp) { - FSDBG_BOT(514, vp, 0xd1e0010, 0, EINTR); - return (EINTR); + OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readlinks); + error = nfs_buf_get(vp, 0, NFS_MAXPATHLEN, p, NBLK_READ, &bp); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0010, 0, error); + return (error); } if (!ISSET(bp->nb_flags, NB_CACHE)) { SET(bp->nb_flags, NB_READ); @@ -1453,21 +1765,22 @@ buffer_ready: return (error); } } - n = min(uio->uio_resid, bp->nb_validend); + // LP64todo - fix this! + n = min(uio_uio_resid(uio), bp->nb_validend); on = 0; break; case VDIR: - nfsstats.biocache_readdirs++; + OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readdirs); if (np->n_direofoffset && uio->uio_offset >= np->n_direofoffset) { FSDBG_BOT(514, vp, 0xde0f0001, 0, 0); return (0); } lbn = uio->uio_offset / NFS_DIRBLKSIZ; on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); - bp = nfs_buf_get(vp, lbn, NFS_DIRBLKSIZ, p, operation); - if (!bp) { - FSDBG_BOT(514, vp, 0xd1e0012, 0, EINTR); - return (EINTR); + error = nfs_buf_get(vp, lbn, NFS_DIRBLKSIZ, p, NBLK_READ, &bp); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0012, 0, error); + return (error); } if (!ISSET(bp->nb_flags, NB_CACHE)) { SET(bp->nb_flags, NB_READ); @@ -1484,16 +1797,16 @@ buffer_ready: * reading from the beginning to get all the * offset cookies. */ - for (i = 0; i <= lbn && !error; i++) { + for (tlbn = 0; tlbn <= lbn && !error; tlbn++) { if (np->n_direofoffset - && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) { + && (tlbn * NFS_DIRBLKSIZ) >= np->n_direofoffset) { FSDBG_BOT(514, vp, 0xde0f0002, 0, 0); return (0); } - bp = nfs_buf_get(vp, i, NFS_DIRBLKSIZ, p, operation); - if (!bp) { - FSDBG_BOT(514, vp, 0xd1e0013, 0, EINTR); - return (EINTR); + error = nfs_buf_get(vp, tlbn, NFS_DIRBLKSIZ, p, NBLK_READ, &bp); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0013, 0, error); + return (error); } if (!ISSET(bp->nb_flags, NB_CACHE)) { SET(bp->nb_flags, NB_READ); @@ -1511,7 +1824,7 @@ buffer_ready: * is not the block we want, we throw away the * block and go for the next one via the for loop. */ - if (error || i < lbn) + if (error || tlbn < lbn) nfs_buf_release(bp, 1); } } @@ -1534,10 +1847,12 @@ buffer_ready: if (nfs_numasync > 0 && nmp->nm_readahead > 0 && (np->n_direofoffset == 0 || (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && - !(np->n_flag & NQNFSNONCACHE) && - !nfs_buf_incore(vp, lbn + 1)) { - rabp = nfs_buf_get(vp, lbn + 1, NFS_DIRBLKSIZ, p, - operation); + !nfs_buf_is_incore(vp, lbn + 1)) { + error = nfs_buf_get(vp, lbn + 1, NFS_DIRBLKSIZ, p, NBLK_READ|NBLK_NOWAIT, &rabp); + if (error) { + FSDBG_BOT(514, vp, 0xd1e0015, 0, error); + return (error); + } if (rabp) { if (!ISSET(rabp->nb_flags, (NB_CACHE))) { SET(rabp->nb_flags, (NB_READ | NB_ASYNC)); @@ -1555,7 +1870,8 @@ buffer_ready: * Make sure we use a signed variant of min() since * the second term may be negative. */ - n = lmin(uio->uio_resid, bp->nb_validend - on); + // LP64todo - fix this! + n = lmin(uio_uio_resid(uio), bp->nb_validend - on); /* * We keep track of the directory eof in * np->n_direofoffset and chop it off as an @@ -1583,28 +1899,30 @@ buffer_ready: } break; default: - printf("nfs_bioread: type %x unexpected\n",vp->v_type); - FSDBG_BOT(514, vp, 0xd1e0015, 0, EINVAL); + printf("nfs_bioread: type %x unexpected\n", vtype); + FSDBG_BOT(514, vp, 0xd1e0016, 0, EINVAL); return (EINVAL); }; if (n > 0) { error = uiomove(bp->nb_data + on, (int)n, uio); } - switch (vp->v_type) { + switch (vtype) { case VREG: + if (np->n_flag & NNOCACHE) + SET(bp->nb_flags, NB_NOCACHE); break; case VLNK: n = 0; break; case VDIR: - if (np->n_flag & NQNFSNONCACHE) - SET(bp->nb_flags, NB_INVAL); + break; + default: break; } - nfs_buf_release(bp, 1); - } while (error == 0 && uio->uio_resid > 0 && n > 0); - FSDBG_BOT(514, vp, uio->uio_offset, uio->uio_resid, error); + nfs_buf_release(bp, 1); + } while (error == 0 && uio_uio_resid(uio) > 0 && n > 0); + FSDBG_BOT(514, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } @@ -1614,42 +1932,54 @@ buffer_ready: */ int nfs_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; + struct vnop_write_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { struct uio *uio = ap->a_uio; - struct proc *p = uio->uio_procp; - struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); - struct ucred *cred = ap->a_cred; + proc_t p; + kauth_cred_t cred; int ioflag = ap->a_ioflag; struct nfsbuf *bp; - struct vattr vattr; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - daddr_t lbn; - int biosize, bufsize, writeop; - int n, on, error = 0, iomode, must_commit; + struct nfs_vattr nvattr; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + daddr64_t lbn; + int biosize, bufsize; + int n, on, error = 0; off_t boff, start, end, cureof; - struct iovec iov; + struct iovec_32 iov; struct uio auio; - FSDBG_TOP(515, vp, uio->uio_offset, uio->uio_resid, ioflag); + FSDBG_TOP(515, vp, uio->uio_offset, uio_uio_resid(uio), ioflag); #if DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("nfs_write mode"); - if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc()) + if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("nfs_write proc"); #endif - if (vp->v_type != VREG) + + p = vfs_context_proc(ap->a_context); + cred = vfs_context_ucred(ap->a_context); + + if (vnode_vtype(vp) != VREG) return (EIO); + + np->n_flag |= NWRBUSY; + + if (np->n_flag & NNEEDINVALIDATE) { + np->n_flag &= ~NNEEDINVALIDATE; + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cred, p, 1); + } if (np->n_flag & NWRITEERR) { - np->n_flag &= ~NWRITEERR; - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, np->n_error); + np->n_flag &= ~(NWRITEERR | NWRBUSY); + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), np->n_error); return (np->n_error); } if ((nmp->nm_flag & NFSMNT_NFSV3) && @@ -1657,17 +1987,19 @@ nfs_write(ap) (void)nfs_fsinfo(nmp, vp, cred, p); if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { - np->n_xid = 0; + NATTRINVALIDATE(np); error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { + np->n_flag &= ~NWRBUSY; FSDBG_BOT(515, vp, uio->uio_offset, 0x10bad01, error); return (error); } } if (ioflag & IO_APPEND) { - np->n_xid = 0; - error = VOP_GETATTR(vp, &vattr, cred, p); + NATTRINVALIDATE(np); + error = nfs_getattr(vp, &nvattr, cred, p); if (error) { + np->n_flag &= ~NWRBUSY; FSDBG_BOT(515, vp, uio->uio_offset, 0x10bad02, error); return (error); } @@ -1675,71 +2007,40 @@ nfs_write(ap) } } if (uio->uio_offset < 0) { + np->n_flag &= ~NWRBUSY; FSDBG_BOT(515, vp, uio->uio_offset, 0xbad0ff, EINVAL); return (EINVAL); } - if (uio->uio_resid == 0) { - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, 0); + if (uio_uio_resid(uio) == 0) { + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), 0); return (0); } - /* - * Maybe this should be above the vnode op call, but so long as - * file servers have no limits, i don't think it matters - */ - if (p && uio->uio_offset + uio->uio_resid > - p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { - psignal(p, SIGXFSZ); - FSDBG_BOT(515, vp, uio->uio_offset, 0x2b1f, EFBIG); - return (EFBIG); - } - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; - do { - /* - * Check for a valid write lease. - */ - if ((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_WRITE)) { - do { - error = nqnfs_getlease(vp, ND_WRITE, cred, p); - } while (error == NQNFS_EXPIRED); - if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, 0x11110001, error); - return (error); - } - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { + if (vnode_isnocache(vp)) { + if (!(np->n_flag & NNOCACHE)) { + if (NVALIDBUFS(np)) { error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, 0x11110002, error); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, 0, 0, error); return (error); } - np->n_brev = np->n_lrev; - } - } - if (ISSET(vp->v_flag, VNOCACHE_DATA) && - (np->n_dirtyblkhd.lh_first || np->n_cleanblkhd.lh_first)) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(515, vp, 0, 0, error); - return (error); } + np->n_flag |= NNOCACHE; } - if (((np->n_flag & NQNFSNONCACHE) || - ISSET(vp->v_flag, VNOCACHE_DATA)) && - uio->uio_iovcnt == 1) { - iomode = NFSV3WRITE_FILESYNC; - error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); - if (must_commit) - nfs_clearcommit(vp->v_mount); - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, error); - return (error); - } - nfsstats.biocache_writes++; + } else if (np->n_flag & NNOCACHE) { + np->n_flag &= ~NNOCACHE; + } + + do { + OSAddAtomic(1, (SInt32*)&nfsstats.biocache_writes); lbn = uio->uio_offset / biosize; on = uio->uio_offset % biosize; - n = min((unsigned)(biosize - on), uio->uio_resid); + // LP64todo - fix this + n = min((unsigned)(biosize - on), uio_uio_resid(uio)); again: bufsize = biosize; /* @@ -1748,22 +2049,22 @@ again: * either has no dirty region or that the given range is * contiguous with the existing dirty region. */ - bp = nfs_buf_get(vp, lbn, bufsize, p, BLK_WRITE); - if (!bp) { - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, EINTR); - return (EINTR); + error = nfs_buf_get(vp, lbn, bufsize, p, NBLK_WRITE, &bp); + if (error) { + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); + return (error); } /* map the block because we know we're going to write to it */ NFS_BUF_MAP(bp); - if (ISSET(vp->v_flag, VNOCACHE_DATA)) - SET(bp->nb_flags, (NB_NOCACHE|NB_INVAL)); + if (np->n_flag & NNOCACHE) + SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE)); - /* - * NFS has embedded ucred so crhold() risks zone corruption - */ - if (bp->nb_wcred == NOCRED) - bp->nb_wcred = crdup(cred); + if (bp->nb_wcred == NOCRED) { + kauth_cred_ref(cred); + bp->nb_wcred = cred; + } /* * If there's already a dirty range AND dirty pages in this block we @@ -1787,7 +2088,8 @@ again: SET(bp->nb_flags, (NB_ASYNC | NB_STABLE)); error = nfs_buf_write(bp); if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, error); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } goto again; @@ -1808,7 +2110,8 @@ again: SET(bp->nb_flags, (NB_ASYNC | NB_STABLE)); error = nfs_buf_write(bp); if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, error); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } goto again; @@ -1821,7 +2124,7 @@ again: if (NBPGDIRTY(bp,lastpg)) { bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE; /* clip to EOF */ - if (NBOFF(bp) + bp->nb_dirtyend > np->n_size) + if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) bp->nb_dirtyend = np->n_size - NBOFF(bp); } else bp->nb_dirtyend = on+n; @@ -1835,16 +2138,22 @@ again: * and zero the new bytes. */ cureof = (off_t)np->n_size; - if (uio->uio_offset + n > np->n_size) { + if (uio->uio_offset + n > (off_t)np->n_size) { struct nfsbuf *eofbp = NULL; - daddr_t eofbn = np->n_size / biosize; + daddr64_t eofbn = np->n_size / biosize; int eofoff = np->n_size % biosize; int neweofoff = (uio->uio_offset + n) % biosize; FSDBG(515, 0xb1ffa000, uio->uio_offset + n, eofoff, neweofoff); - if (eofoff && eofbn < lbn && nfs_buf_incore(vp, eofbn)) - eofbp = nfs_buf_get(vp, eofbn, biosize, p, BLK_WRITE); + if (eofoff && (eofbn < lbn)) { + error = nfs_buf_get(vp, eofbn, biosize, p, NBLK_WRITE|NBLK_ONLYVALID, &eofbp); + if (error) { + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); + return (error); + } + } /* if we're extending within the same last block */ /* and the block is flagged as being cached... */ @@ -1891,8 +2200,8 @@ again: */ char *d; int i; - if (ISSET(vp->v_flag, VNOCACHE_DATA)) - SET(eofbp->nb_flags, (NB_NOCACHE|NB_INVAL)); + if (np->n_flag & NNOCACHE) + SET(eofbp->nb_flags, (NB_NOCACHE|NB_STABLE)); NFS_BUF_MAP(eofbp); FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e); d = eofbp->nb_data; @@ -1915,7 +2224,7 @@ again: * If dirtyend exceeds file size, chop it down. This should * not occur unless there is a race. */ - if (NBOFF(bp) + bp->nb_dirtyend > np->n_size) + if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) bp->nb_dirtyend = np->n_size - NBOFF(bp); /* * UBC doesn't handle partial pages, so we need to make sure @@ -1979,34 +2288,39 @@ again: NFS_BUF_MAP(bp); /* setup uio for read(s) */ boff = NBOFF(bp); - auio.uio_iov = &iov; + auio.uio_iovs.iov32p = &iov; auio.uio_iovcnt = 1; +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ auio.uio_segflg = UIO_SYSSPACE; +#else + auio.uio_segflg = UIO_SYSSPACE32; +#endif auio.uio_rw = UIO_READ; - auio.uio_procp = p; if (dirtypg <= (end-1)/PAGE_SIZE) { /* there's a dirty page in the way, so just do two reads */ /* we'll read the preceding data here */ auio.uio_offset = boff + start; - auio.uio_resid = iov.iov_len = on - start; - iov.iov_base = bp->nb_data + start; - error = nfs_readrpc(vp, &auio, cred); + iov.iov_len = on - start; + uio_uio_resid_set(&auio, iov.iov_len); + iov.iov_base = (uintptr_t) bp->nb_data + start; + error = nfs_readrpc(vp, &auio, cred, p); if (error) { bp->nb_error = error; SET(bp->nb_flags, NB_ERROR); printf("nfs_write: readrpc %d", error); } - if (auio.uio_resid > 0) { - FSDBG(516, bp, iov.iov_base - bp->nb_data, auio.uio_resid, 0xd00dee01); - bzero(iov.iov_base, auio.uio_resid); + if (uio_uio_resid(&auio) > 0) { + FSDBG(516, bp, iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee01); + // LP64todo - fix this + bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio)); } /* update validoff/validend if necessary */ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) bp->nb_validoff = start; if ((bp->nb_validend < 0) || (bp->nb_validend < on)) bp->nb_validend = on; - if (np->n_size > boff + bp->nb_validend) + if ((off_t)np->n_size > boff + bp->nb_validend) bp->nb_validend = min(np->n_size - (boff + start), biosize); /* validate any pages before the write offset */ for (; start < on/PAGE_SIZE; start+=PAGE_SIZE) @@ -2037,17 +2351,19 @@ again: } else { /* now we'll read the (rest of the) data */ auio.uio_offset = boff + start; - auio.uio_resid = iov.iov_len = end - start; - iov.iov_base = bp->nb_data + start; - error = nfs_readrpc(vp, &auio, cred); + iov.iov_len = end - start; + uio_uio_resid_set(&auio, iov.iov_len); + iov.iov_base = (uintptr_t) (bp->nb_data + start); + error = nfs_readrpc(vp, &auio, cred, p); if (error) { bp->nb_error = error; SET(bp->nb_flags, NB_ERROR); printf("nfs_write: readrpc %d", error); } - if (auio.uio_resid > 0) { - FSDBG(516, bp, iov.iov_base - bp->nb_data, auio.uio_resid, 0xd00dee02); - bzero(iov.iov_base, auio.uio_resid); + if (uio_uio_resid(&auio) > 0) { + FSDBG(516, bp, iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee02); + // LP64todo - fix this + bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio)); } } /* update validoff/validend if necessary */ @@ -2055,7 +2371,7 @@ again: bp->nb_validoff = start; if ((bp->nb_validend < 0) || (bp->nb_validend < end)) bp->nb_validend = end; - if (np->n_size > boff + bp->nb_validend) + if ((off_t)np->n_size > boff + bp->nb_validend) bp->nb_validend = min(np->n_size - (boff + start), biosize); /* validate any pages before the write offset's page */ for (; start < trunc_page_32(on); start+=PAGE_SIZE) @@ -2070,44 +2386,20 @@ again: if (ISSET(bp->nb_flags, NB_ERROR)) { error = bp->nb_error; nfs_buf_release(bp, 1); - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, error); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } np->n_flag |= NMODIFIED; - /* - * Check for valid write lease and get one as required. - * In case nfs_buf_get() and/or nfs_buf_write() delayed us. - */ - if ((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_WRITE)) { - do { - error = nqnfs_getlease(vp, ND_WRITE, cred, p); - } while (error == NQNFS_EXPIRED); - if (error) { - nfs_buf_release(bp, 1); - FSDBG_BOT(515, vp, uio->uio_offset, 0x11220001, error); - return (error); - } - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { - nfs_buf_release(bp, 1); - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, 0x11220002, error); - return (error); - } - np->n_brev = np->n_lrev; - goto again; - } - } NFS_BUF_MAP(bp); error = uiomove((char *)bp->nb_data + on, n, uio); if (error) { SET(bp->nb_flags, NB_ERROR); nfs_buf_release(bp, 1); - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, error); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), error); return (error); } @@ -2153,36 +2445,29 @@ again: } CLR(bp->nb_flags, NB_NEEDCOMMIT); - if ((np->n_flag & NQNFSNONCACHE) || - (ioflag & IO_SYNC) || (vp->v_flag & VNOCACHE_DATA)) { + if (ioflag & IO_SYNC) { bp->nb_proc = p; error = nfs_buf_write(bp); if (error) { + np->n_flag &= ~NWRBUSY; FSDBG_BOT(515, vp, uio->uio_offset, - uio->uio_resid, error); + uio_uio_resid(uio), error); return (error); } - if (np->n_flag & NQNFSNONCACHE) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - FSDBG_BOT(515, vp, uio->uio_offset, - uio->uio_resid, error); - return (error); - } - } - } else if ((n + on) == biosize && (nmp->nm_flag & NFSMNT_NQNFS) == 0) { - bp->nb_proc = (struct proc *)0; + } else if (((n + on) == biosize) || (np->n_flag & NNOCACHE)) { + bp->nb_proc = NULL; SET(bp->nb_flags, NB_ASYNC); nfs_buf_write(bp); } else - nfs_buf_write_delayed(bp); + nfs_buf_write_delayed(bp, p); - if (np->n_needcommitcnt > (nbuf/16)) - nfs_flushcommits(vp, p); + if (np->n_needcommitcnt > (nfsbufcnt/16)) + nfs_flushcommits(vp, p, 1); - } while (uio->uio_resid > 0 && n > 0); + } while (uio_uio_resid(uio) > 0 && n > 0); - FSDBG_BOT(515, vp, uio->uio_offset, uio->uio_resid, 0); + np->n_flag &= ~NWRBUSY; + FSDBG_BOT(515, vp, uio->uio_offset, uio_uio_resid(uio), 0); return (0); } @@ -2191,60 +2476,63 @@ again: * Called with the underlying object locked. */ static int -nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, slptimeo) - register struct vnode *vp; - int flags; - struct ucred *cred; - struct proc *p; - int slpflag, slptimeo; +nfs_vinvalbuf_internal( + vnode_t vp, + int flags, + kauth_cred_t cred, + proc_t p, + int slpflag, + int slptimeo) { struct nfsbuf *bp; - struct nfsbuf *nbp, *blist; - int s, error = 0; + struct nfsbuflists blist; + int list, error = 0; struct nfsnode *np = VTONFS(vp); if (flags & V_SAVE) { - if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) + if ((error = nfs_flush(vp, MNT_WAIT, cred, p, + (flags & V_IGNORE_WRITEERR)))) return (error); - if (np->n_dirtyblkhd.lh_first) + if (!LIST_EMPTY(&np->n_dirtyblkhd)) panic("nfs_vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", - vp, np->n_dirtyblkhd.lh_first); + vp, LIST_FIRST(&np->n_dirtyblkhd)); } + lck_mtx_lock(nfs_buf_mutex); for (;;) { - blist = np->n_cleanblkhd.lh_first; - if (!blist) - blist = np->n_dirtyblkhd.lh_first; - if (!blist) - break; - - for (bp = blist; bp; bp = nbp) { - nbp = bp->nb_vnbufs.le_next; - s = splbio(); - if (ISSET(bp->nb_flags, NB_BUSY)) { - SET(bp->nb_flags, NB_WANTED); - FSDBG_TOP(556, vp, bp, NBOFF(bp), bp->nb_flags); - error = tsleep((caddr_t)bp, - slpflag | (PRIBIO + 1), "nfs_vinvalbuf", - slptimeo); - FSDBG_BOT(556, vp, bp, NBOFF(bp), bp->nb_flags); - splx(s); - if (error) { + list = NBI_CLEAN; + if (nfs_buf_iterprepare(np, &blist, list)) { + list = NBI_DIRTY; + if (nfs_buf_iterprepare(np, &blist, list)) + break; + } + while ((bp = LIST_FIRST(&blist))) { + LIST_REMOVE(bp, nb_vnbufs); + if (list == NBI_CLEAN) + LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs); + else + LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs); + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, NBAC_REMOVE, slpflag, slptimeo))) { + FSDBG(556, vp, bp, NBOFF(bp), bp->nb_flags); + if (error != EAGAIN) { FSDBG(554, vp, bp, -1, error); + nfs_buf_refrele(bp); + nfs_buf_itercomplete(np, &blist, list); + lck_mtx_unlock(nfs_buf_mutex); return (error); } - break; } + nfs_buf_refrele(bp); FSDBG(554, vp, bp, NBOFF(bp), bp->nb_flags); - nfs_buf_remfree(bp); - SET(bp->nb_flags, NB_BUSY); - splx(s); - if ((flags & V_SAVE) && UBCINFOEXISTS(vp) && (NBOFF(bp) < np->n_size)) { + lck_mtx_unlock(nfs_buf_mutex); + if ((flags & V_SAVE) && UBCINFOEXISTS(vp) && bp->nb_vp && + (NBOFF(bp) < (off_t)np->n_size)) { /* XXX extra paranoia: make sure we're not */ /* somehow leaving any dirty data around */ int mustwrite = 0; - int end = (NBOFF(bp) + bp->nb_bufsize >= np->n_size) ? - bp->nb_bufsize : (np->n_size - NBOFF(bp)); + int end = (NBOFF(bp) + bp->nb_bufsize > (off_t)np->n_size) ? + ((off_t)np->n_size - NBOFF(bp)) : bp->nb_bufsize; if (!ISSET(bp->nb_flags, NB_PAGELIST)) { error = nfs_buf_upl_setup(bp); if (error == EINVAL) { @@ -2252,8 +2540,7 @@ nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, slptimeo) /* hopefully we don't need to do */ /* anything for this buffer */ } else if (error) - printf("nfs_vinvalbuf: upl setup failed %d\n", - error); + printf("nfs_vinvalbuf: upl setup failed %d\n", error); bp->nb_valid = bp->nb_dirty = 0; } nfs_buf_upl_check(bp); @@ -2265,8 +2552,11 @@ nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, slptimeo) mustwrite++; } bp->nb_dirty &= (1 << (round_page_32(end)/PAGE_SIZE)) - 1; - if (bp->nb_dirty) - mustwrite++; + /* also make sure we'll have a credential to do the write */ + if (mustwrite && (bp->nb_wcred == NOCRED) && (cred == NOCRED)) { + printf("nfs_vinvalbuf: found dirty buffer with no write creds\n"); + mustwrite = 0; + } if (mustwrite) { FSDBG(554, vp, bp, 0xd00dee, bp->nb_flags); if (!ISSET(bp->nb_flags, NB_PAGELIST)) @@ -2276,30 +2566,39 @@ nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, slptimeo) /* (NB_NOCACHE indicates buffer should be discarded) */ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC)); SET(bp->nb_flags, NB_STABLE | NB_NOCACHE); - /* - * NFS has embedded ucred so crhold() risks zone corruption - */ - if (bp->nb_wcred == NOCRED) - bp->nb_wcred = crdup(cred); + if (bp->nb_wcred == NOCRED) { + kauth_cred_ref(cred); + bp->nb_wcred = cred; + } error = nfs_buf_write(bp); // Note: bp has been released if (error) { FSDBG(554, bp, 0xd00dee, 0xbad, error); np->n_error = error; np->n_flag |= NWRITEERR; + /* + * There was a write error and we need to + * invalidate attrs to sync with server. + * (if this write was extending the file, + * we may no longer know the correct size) + */ + NATTRINVALIDATE(np); error = 0; } - break; + lck_mtx_lock(nfs_buf_mutex); + continue; } } SET(bp->nb_flags, NB_INVAL); - // Note: We don't want to do FREEUPs here because - // that may modify the buffer chain we're iterating! + // hold off on FREEUPs until we're done here nfs_buf_release(bp, 0); + lck_mtx_lock(nfs_buf_mutex); } + nfs_buf_itercomplete(np, &blist, list); } + lck_mtx_unlock(nfs_buf_mutex); NFS_BUF_FREEUP(); - if (np->n_dirtyblkhd.lh_first || np->n_cleanblkhd.lh_first) + if (NVALIDBUFS(np)) panic("nfs_vinvalbuf: flush failed"); return (0); } @@ -2310,17 +2609,17 @@ nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, slptimeo) * doing the flush, just wait for completion. */ int -nfs_vinvalbuf(vp, flags, cred, p, intrflg) - struct vnode *vp; - int flags; - struct ucred *cred; - struct proc *p; - int intrflg; +nfs_vinvalbuf( + vnode_t vp, + int flags, + kauth_cred_t cred, + proc_t p, + int intrflg) { - register struct nfsnode *np = VTONFS(vp); - struct nfsmount *nmp = VFSTONFS(vp->v_mount); + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); int error = 0, slpflag, slptimeo; - int didhold = 0; + off_t size; FSDBG_TOP(554, vp, flags, intrflg, 0); @@ -2341,7 +2640,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) FSDBG_TOP(555, vp, flags, intrflg, np->n_flag); error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", slptimeo); FSDBG_BOT(555, vp, flags, intrflg, np->n_flag); - if (error && (error = nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p))) { + if (error && (error = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p))) { FSDBG_BOT(554, vp, flags, intrflg, error); return (error); } @@ -2354,7 +2653,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) error = nfs_vinvalbuf_internal(vp, flags, cred, p, slpflag, 0); while (error) { FSDBG(554, vp, 0, 0, error); - error = nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p); + error = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p); if (error) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { @@ -2371,13 +2670,15 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) np->n_flag &= ~NFLUSHWANT; wakeup((caddr_t)&np->n_flag); } - didhold = ubc_hold(vp); - if (didhold) { - int rv = ubc_clean(vp, 1); /* get the pages out of vm also */ + /* + * get the pages out of vm also + */ + if (UBCINFOEXISTS(vp) && (size = ubc_getsize(vp))) { + int rv = ubc_sync_range(vp, 0, size, UBC_PUSHALL | UBC_INVALIDATE); if (!rv) - panic("nfs_vinvalbuf(): ubc_clean failed!"); - ubc_rele(vp); + panic("nfs_vinvalbuf(): ubc_sync_range failed!"); } + FSDBG_BOT(554, vp, flags, intrflg, 0); return (0); } @@ -2390,7 +2691,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) int nfs_asyncio(bp, cred) struct nfsbuf *bp; - struct ucred *cred; + kauth_cred_t cred; { struct nfsmount *nmp; int i; @@ -2398,18 +2699,22 @@ nfs_asyncio(bp, cred) int slpflag = 0; int slptimeo = 0; int error, error2; + void *wakeme = NULL; + struct timespec ts; if (nfs_numasync == 0) return (EIO); FSDBG_TOP(552, bp, bp ? NBOFF(bp) : 0, bp ? bp->nb_flags : 0, 0); - nmp = ((bp != NULL) ? VFSTONFS(bp->nb_vp->v_mount) : NULL); + nmp = ((bp != NULL) ? VFSTONFS(vnode_mount(bp->nb_vp)) : NULL); again: if (nmp && nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; gotiod = FALSE; + lck_mtx_lock(nfs_iod_mutex); + /* no nfsbuf means tell nfsiod to process delwri list */ if (!bp) nfs_ioddelwri = 1; @@ -2423,21 +2728,23 @@ again: * Found one, so wake it up and tell it which * mount to process. */ - NFS_DPF(ASYNCIO, - ("nfs_asyncio: waking iod %d for mount %p\n", - i, nmp)); - nfs_iodwant[i] = (struct proc *)0; + nfs_iodwant[i] = NULL; nfs_iodmount[i] = nmp; if (nmp) nmp->nm_bufqiods++; - wakeup((caddr_t)&nfs_iodwant[i]); + wakeme = &nfs_iodwant[i]; gotiod = TRUE; break; } /* if we're just poking the delwri list, we're done */ - if (!bp) + if (!bp) { + lck_mtx_unlock(nfs_iod_mutex); + if (wakeme) + wakeup(wakeme); + FSDBG_BOT(552, bp, 0x10101010, wakeme, 0); return (0); + } /* * If none are free, we may already have an iod working on this mount @@ -2445,9 +2752,6 @@ again: */ if (!gotiod) { if (nmp->nm_bufqiods > 0) { - NFS_DPF(ASYNCIO, - ("nfs_asyncio: %d iods are already processing mount %p\n", - nmp->nm_bufqiods, nmp)); gotiod = TRUE; } } @@ -2470,14 +2774,18 @@ again: goto out; } FSDBG(552, bp, nmp->nm_bufqlen, 2*nfs_numasync, -1); - NFS_DPF(ASYNCIO, - ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); nmp->nm_bufqwant = TRUE; - error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, - "nfsaio", slptimeo); + + ts.tv_sec = (slptimeo/100); + /* the hz value is 100; which leads to 10ms */ + ts.tv_nsec = (slptimeo % 100) * 10 * NSEC_PER_USEC * 1000; + + error = msleep(&nmp->nm_bufq, nfs_iod_mutex, slpflag | PRIBIO, + "nfsaio", &ts); if (error) { error2 = nfs_sigintr(nmp, NULL, bp->nb_proc); if (error2) { + lck_mtx_unlock(nfs_iod_mutex); FSDBG_BOT(552, bp, NBOFF(bp), bp->nb_flags, error2); return (error2); } @@ -2491,43 +2799,39 @@ again: * so check and loop if nescessary. */ if (nmp->nm_bufqiods == 0) { - NFS_DPF(ASYNCIO, - ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); + lck_mtx_unlock(nfs_iod_mutex); goto again; } } if (ISSET(bp->nb_flags, NB_READ)) { if (bp->nb_rcred == NOCRED && cred != NOCRED) { - /* - * NFS has embedded ucred. - * Can not crhold() here as that causes zone corruption - */ - bp->nb_rcred = crdup(cred); + kauth_cred_ref(cred); + bp->nb_rcred = cred; } } else { SET(bp->nb_flags, NB_WRITEINPROG); if (bp->nb_wcred == NOCRED && cred != NOCRED) { - /* - * NFS has embedded ucred. - * Can not crhold() here as that causes zone corruption - */ - bp->nb_wcred = crdup(cred); + kauth_cred_ref(cred); + bp->nb_wcred = cred; } } TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, nb_free); nmp->nm_bufqlen++; + lck_mtx_unlock(nfs_iod_mutex); + if (wakeme) + wakeup(wakeme); FSDBG_BOT(552, bp, NBOFF(bp), bp->nb_flags, 0); return (0); } out: + lck_mtx_unlock(nfs_iod_mutex); /* * All the iods are busy on other mounts, so return EIO to * force the caller to process the i/o synchronously. */ - NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); FSDBG_BOT(552, bp, NBOFF(bp), bp->nb_flags, EIO); return (EIO); } @@ -2537,27 +2841,29 @@ out: * synchronously or from an nfsiod. */ int -nfs_doio(bp, cr, p) - struct nfsbuf *bp; - struct ucred *cr; - struct proc *p; +nfs_doio(struct nfsbuf *bp, kauth_cred_t cr, proc_t p) { - register struct uio *uiop; - register struct vnode *vp; + struct uio *uiop; + vnode_t vp; struct nfsnode *np; struct nfsmount *nmp; - int error = 0, diff, len, iomode, must_commit = 0; + int error = 0, diff, len, iomode, must_commit = 0, invalidate = 0; struct uio uio; - struct iovec io; + struct iovec_32 io; + enum vtype vtype; vp = bp->nb_vp; + vtype = vnode_vtype(vp); np = VTONFS(vp); - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); uiop = &uio; - uiop->uio_iov = &io; + uiop->uio_iovs.iov32p = &io; uiop->uio_iovcnt = 1; +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = p; +#else + uiop->uio_segflg = UIO_SYSSPACE32; +#endif /* * we've decided to perform I/O for this block, @@ -2573,31 +2879,34 @@ nfs_doio(bp, cr, p) bp->nb_dirtyend); if (ISSET(bp->nb_flags, NB_READ)) { - if (vp->v_type == VREG) + if (vtype == VREG) NFS_BUF_MAP(bp); - io.iov_len = uiop->uio_resid = bp->nb_bufsize; - io.iov_base = bp->nb_data; + io.iov_len = bp->nb_bufsize; + uio_uio_resid_set(uiop, io.iov_len); + io.iov_base = (uintptr_t) bp->nb_data; uiop->uio_rw = UIO_READ; - switch (vp->v_type) { + switch (vtype) { case VREG: uiop->uio_offset = NBOFF(bp); - nfsstats.read_bios++; - error = nfs_readrpc(vp, uiop, cr); - FSDBG(262, np->n_size, NBOFF(bp), uiop->uio_resid, error); + OSAddAtomic(1, (SInt32*)&nfsstats.read_bios); + error = nfs_readrpc(vp, uiop, cr, p); + FSDBG(262, np->n_size, NBOFF(bp), uio_uio_resid(uiop), error); if (!error) { /* update valid range */ bp->nb_validoff = 0; - if (uiop->uio_resid) { + if (uio_uio_resid(uiop) != 0) { /* * If len > 0, there is a hole in the file and * no writes after the hole have been pushed to * the server yet. * Just zero fill the rest of the valid area. */ - diff = bp->nb_bufsize - uiop->uio_resid; + // LP64todo - fix this + diff = bp->nb_bufsize - uio_uio_resid(uiop); len = np->n_size - (NBOFF(bp) + diff); if (len > 0) { - len = min(len, uiop->uio_resid); + // LP64todo - fix this + len = min(len, uio_uio_resid(uiop)); bzero((char *)bp->nb_data + diff, len); bp->nb_validend = diff + len; FSDBG(258, diff, len, 0, 1); @@ -2616,38 +2925,28 @@ nfs_doio(bp, cr, p) bp->nb_bufsize - bp->nb_validend, 0, 2); } } - if (p && (vp->v_flag & VTEXT) && - (((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_READ) && - np->n_lrev != np->n_brev) || - (!(nmp->nm_flag & NFSMNT_NQNFS) && - np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { - uprintf("Process killed due to text file modification\n"); - psignal(p, SIGKILL); - p->p_flag |= P_NOSWAP; - } break; case VLNK: uiop->uio_offset = (off_t)0; - nfsstats.readlink_bios++; - error = nfs_readlinkrpc(vp, uiop, cr); + OSAddAtomic(1, (SInt32*)&nfsstats.readlink_bios); + error = nfs_readlinkrpc(vp, uiop, cr, p); if (!error) { bp->nb_validoff = 0; bp->nb_validend = uiop->uio_offset; } break; case VDIR: - nfsstats.readdir_bios++; + OSAddAtomic(1, (SInt32*)&nfsstats.readdir_bios); uiop->uio_offset = NBOFF(bp); if (!(nmp->nm_flag & NFSMNT_NFSV3)) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; /* dk@farm.org */ if (nmp->nm_flag & NFSMNT_RDIRPLUS) { - error = nfs_readdirplusrpc(vp, uiop, cr); + error = nfs_readdirplusrpc(vp, uiop, cr, p); if (error == NFSERR_NOTSUPP) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; } if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) - error = nfs_readdirrpc(vp, uiop, cr); + error = nfs_readdirrpc(vp, uiop, cr, p); if (!error) { bp->nb_validoff = 0; bp->nb_validend = uiop->uio_offset - NBOFF(bp); @@ -2655,7 +2954,7 @@ nfs_doio(bp, cr, p) } break; default: - printf("nfs_doio: type %x unexpected\n", vp->v_type); + printf("nfs_doio: type %x unexpected\n", vtype); break; }; if (error) { @@ -2668,7 +2967,7 @@ nfs_doio(bp, cr, p) int doff, dend = 0; /* We need to make sure the pages are locked before doing I/O. */ - if (!ISSET(bp->nb_flags, NB_META) && UBCISVALID(vp)) { + if (!ISSET(bp->nb_flags, NB_META) && UBCINFOEXISTS(vp)) { if (!ISSET(bp->nb_flags, NB_PAGELIST)) { error = nfs_buf_upl_setup(bp); if (error) { @@ -2716,7 +3015,7 @@ nfs_doio(bp, cr, p) np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); } else if (error == NFSERR_STALEWRITEVERF) - nfs_clearcommit(vp->v_mount); + nfs_clearcommit(vnode_mount(vp)); } if (!error && bp->nb_dirtyend > 0) { @@ -2724,7 +3023,7 @@ nfs_doio(bp, cr, p) u_int32_t pagemask; int firstpg, lastpg; - if (NBOFF(bp) + bp->nb_dirtyend > np->n_size) + if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) bp->nb_dirtyend = np->n_size - NBOFF(bp); NFS_BUF_MAP(bp); @@ -2747,7 +3046,7 @@ nfs_doio(bp, cr, p) while (dend < bp->nb_bufsize && NBPGDIRTY(bp,dend/PAGE_SIZE)) dend += PAGE_SIZE; /* make sure to keep dend clipped to EOF */ - if (NBOFF(bp) + dend > np->n_size) + if (NBOFF(bp) + dend > (off_t)np->n_size) dend = np->n_size - NBOFF(bp); /* calculate range of complete pages being written */ firstpg = round_page_32(doff) / PAGE_SIZE; @@ -2766,17 +3065,18 @@ nfs_doio(bp, cr, p) iomode = NFSV3WRITE_FILESYNC; /* write the dirty range */ - io.iov_len = uiop->uio_resid = dend - doff; + io.iov_len = dend - doff; + uio_uio_resid_set(uiop, io.iov_len); uiop->uio_offset = NBOFF(bp) + doff; - io.iov_base = (char *)bp->nb_data + doff; + io.iov_base = (uintptr_t) bp->nb_data + doff; uiop->uio_rw = UIO_WRITE; - nfsstats.write_bios++; + OSAddAtomic(1, (SInt32*)&nfsstats.write_bios); SET(bp->nb_flags, NB_WRITEINPROG); - error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); + error = nfs_writerpc(vp, uiop, cr, p, &iomode, &must_commit); if (must_commit) - nfs_clearcommit(vp->v_mount); + nfs_clearcommit(vnode_mount(vp)); /* clear dirty bits for pages we've written */ if (!error) bp->nb_dirty &= ~pagemask; @@ -2812,8 +3112,8 @@ nfs_doio(bp, cr, p) CLR(bp->nb_flags, NB_INVAL | NB_NOCACHE); if (!ISSET(bp->nb_flags, NB_DELWRI)) { SET(bp->nb_flags, NB_DELWRI); - nfs_nbdwrite++; - NFSBUFCNTCHK(); + OSAddAtomic(1, (SInt32*)&nfs_nbdwrite); + NFSBUFCNTCHK(0); } FSDBG(261, bp->nb_validoff, bp->nb_validend, bp->nb_bufsize, 0); @@ -2824,11 +3124,11 @@ nfs_doio(bp, cr, p) */ if (ISSET(bp->nb_flags, NB_ASYNC)) { /* move to dirty list */ - int s = splbio(); + lck_mtx_lock(nfs_buf_mutex); if (bp->nb_vnbufs.le_next != NFSNOLIST) LIST_REMOVE(bp, nb_vnbufs); LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs); - splx(s); + lck_mtx_unlock(nfs_buf_mutex); } else { SET(bp->nb_flags, NB_EINTR); } @@ -2838,6 +3138,32 @@ nfs_doio(bp, cr, p) SET(bp->nb_flags, NB_ERROR); bp->nb_error = np->n_error = error; np->n_flag |= NWRITEERR; + /* + * There was a write error and we need to + * invalidate attrs and flush buffers in + * order to sync up with the server. + * (if this write was extending the file, + * we may no longer know the correct size) + * + * But we can't call vinvalbuf while holding + * this buffer busy. Set a flag to do it after + * releasing the buffer. + * + * Note we can only invalidate in this function + * if this is an async write and so the iodone + * below will release the buffer. Also, we + * shouldn't call vinvalbuf from nfsiod because + * that may deadlock waiting for the completion + * of writes that are queued up behind this one. + */ + if (ISSET(bp->nb_flags, NB_ASYNC) && + !ISSET(bp->nb_flags, NB_IOD)) { + invalidate = 1; + } else { + /* invalidate later */ + np->n_flag |= NNEEDINVALIDATE; + } + NATTRINVALIDATE(np); } /* clear the dirty range */ bp->nb_dirtyoff = bp->nb_dirtyend = 0; @@ -2846,9 +3172,9 @@ nfs_doio(bp, cr, p) if (!error && bp->nb_dirty) { /* there are pages marked dirty that need to be written out */ - int pg, cnt, npages, off, len; + int pg, count, npages, off; - nfsstats.write_bios++; + OSAddAtomic(1, (SInt32*)&nfsstats.write_bios); NFS_BUF_MAP(bp); @@ -2871,31 +3197,32 @@ nfs_doio(bp, cr, p) for (pg=0; pg < npages; pg++) { if (!NBPGDIRTY(bp,pg)) continue; - cnt = 1; - while (((pg+cnt) < npages) && NBPGDIRTY(bp,pg+cnt)) - cnt++; - /* write cnt pages starting with page pg */ + count = 1; + while (((pg+count) < npages) && NBPGDIRTY(bp,pg+count)) + count++; + /* write count pages starting with page pg */ off = pg * PAGE_SIZE; - len = cnt * PAGE_SIZE; + len = count * PAGE_SIZE; /* clip writes to EOF */ - if (NBOFF(bp) + off + len > np->n_size) + if (NBOFF(bp) + off + len > (off_t)np->n_size) len -= (NBOFF(bp) + off + len) - np->n_size; if (len > 0) { - io.iov_len = uiop->uio_resid = len; + io.iov_len = len; + uio_uio_resid_set(uiop, io.iov_len); uiop->uio_offset = NBOFF(bp) + off; - io.iov_base = (char *)bp->nb_data + off; - error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); + io.iov_base = (uintptr_t) bp->nb_data + off; + error = nfs_writerpc(vp, uiop, cr, p, &iomode, &must_commit); if (must_commit) - nfs_clearcommit(vp->v_mount); + nfs_clearcommit(vnode_mount(vp)); if (error) break; } /* clear dirty bits */ - while (cnt--) { + while (count--) { bp->nb_dirty &= ~(1 << pg); /* leave pg on last page */ - if (cnt) pg++; + if (count) pg++; } } if (!error) { @@ -2919,5 +3246,25 @@ nfs_doio(bp, cr, p) FSDBG_BOT(256, bp->nb_validoff, bp->nb_validend, bp->nb_bufsize, error); nfs_buf_iodone(bp); + + if (invalidate) { + /* + * There was a write error and we need to + * invalidate attrs and flush buffers in + * order to sync up with the server. + * (if this write was extending the file, + * we may no longer know the correct size) + * + * But we couldn't call vinvalbuf while holding + * the buffer busy. So we call vinvalbuf() after + * releasing the buffer. + * + * Note: we don't bother calling nfs_vinvalbuf() if + * there's already a flush in progress. + */ + if (!(np->n_flag & NFLUSHINPROG)) + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cr, p, 1); + } + return (error); } diff --git a/bsd/nfs/nfs_boot.c b/bsd/nfs/nfs_boot.c index 85de9123c..2eaf21273 100644 --- a/bsd/nfs/nfs_boot.c +++ b/bsd/nfs/nfs_boot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -92,12 +92,11 @@ #include #include #include -#include -#include +#include +#include #include #include -#include #include #include @@ -119,21 +118,15 @@ #include -extern char *strchr(const char *str, int ch); #if NETHER == 0 -int nfs_boot_init(nd, procp) - struct nfs_diskless *nd; - struct proc *procp; +int nfs_boot_init(struct nfs_diskless *nd, proc_t procp) { panic("nfs_boot_init: no ether"); } -int nfs_boot_getfh(nd, procp, v3) - struct nfs_diskless *nd; - struct proc *procp; - int v3; +int nfs_boot_getfh(struct nfs_diskless *nd, proc_t procp, int v3, int sotype) { panic("nfs_boot_getfh: no ether"); } @@ -161,17 +154,17 @@ int nfs_boot_getfh(nd, procp, v3) */ /* bootparam RPC */ -static int bp_whoami __P((struct sockaddr_in *bpsin, - struct in_addr *my_ip, struct in_addr *gw_ip)); -static int bp_getfile __P((struct sockaddr_in *bpsin, char *key, - struct sockaddr_in *mdsin, char *servname, char *path)); +static int bp_whoami(struct sockaddr_in *bpsin, + struct in_addr *my_ip, struct in_addr *gw_ip); +static int bp_getfile(struct sockaddr_in *bpsin, const char *key, + struct sockaddr_in *mdsin, char *servname, char *path); /* mountd RPC */ -static int md_mount __P((struct sockaddr_in *mdsin, char *path, int v3, - u_char *fhp, u_long *fhlenp)); +static int md_mount(struct sockaddr_in *mdsin, char *path, int v3, int sotype, + u_char *fhp, u_long *fhlenp); /* other helpers */ -static int get_file_handle __P((struct nfs_dlmount *ndmntp)); +static int get_file_handle(struct nfs_dlmount *ndmntp); #define IP_FORMAT "%d.%d.%d.%d" @@ -190,9 +183,7 @@ netboot_rootpath(struct in_addr * server_ip, * Called with an empty nfs_diskless struct to be filled in. */ int -nfs_boot_init(nd, procp) - struct nfs_diskless *nd; - struct proc *procp; +nfs_boot_init(struct nfs_diskless *nd, __unused proc_t procp) { struct sockaddr_in bp_sin; boolean_t do_bpwhoami = TRUE; @@ -201,15 +192,24 @@ nfs_boot_init(nd, procp) struct in_addr my_ip; struct sockaddr_in * sin_p; + /* make sure mbuf constants are set up */ + if (!nfs_mbuf_mlen) + nfs_mbuf_init(); + /* by this point, networking must already have been configured */ if (netboot_iaddr(&my_ip) == FALSE) { printf("nfs_boot: networking is not initialized\n"); error = ENXIO; - goto failed_noswitch; + goto failed; } /* get the root path information */ MALLOC_ZONE(nd->nd_root.ndm_path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!nd->nd_root.ndm_path) { + printf("nfs_boot: can't allocate root path buffer\n"); + error = ENOMEM; + goto failed; + } sin_p = &nd->nd_root.ndm_saddr; bzero((caddr_t)sin_p, sizeof(*sin_p)); sin_p->sin_len = sizeof(*sin_p); @@ -222,8 +222,6 @@ nfs_boot_init(nd, procp) } nd->nd_private.ndm_saddr.sin_addr.s_addr = 0; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (do_bpwhoami) { struct in_addr router; /* @@ -261,6 +259,11 @@ nfs_boot_init(nd, procp) #if !defined(NO_MOUNT_PRIVATE) if (do_bpgetfile) { /* get private path */ MALLOC_ZONE(nd->nd_private.ndm_path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!nd->nd_private.ndm_path) { + printf("nfs_boot: can't allocate private path buffer\n"); + error = ENOMEM; + goto failed; + } error = bp_getfile(&bp_sin, "private", &nd->nd_private.ndm_saddr, nd->nd_private.ndm_host, @@ -269,6 +272,11 @@ nfs_boot_init(nd, procp) char * check_path = NULL; MALLOC_ZONE(check_path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!check_path) { + printf("nfs_boot: can't allocate check_path buffer\n"); + error = ENOMEM; + goto failed; + } snprintf(check_path, MAXPATHLEN, "%s/private", nd->nd_root.ndm_path); if ((nd->nd_root.ndm_saddr.sin_addr.s_addr == nd->nd_private.ndm_saddr.sin_addr.s_addr) @@ -288,8 +296,6 @@ nfs_boot_init(nd, procp) } #endif /* NO_MOUNT_PRIVATE */ failed: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); -failed_noswitch: return (error); } @@ -298,16 +304,12 @@ failed_noswitch: * with file handles to be filled in. */ int -nfs_boot_getfh(nd, procp, v3) - struct nfs_diskless *nd; - struct proc *procp; - int v3; +nfs_boot_getfh(struct nfs_diskless *nd, __unused proc_t procp, int v3, int sotype) { int error = 0; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - nd->nd_root.ndm_nfsv3 = v3; + nd->nd_root.ndm_sotype = sotype; error = get_file_handle(&nd->nd_root); if (error) { printf("nfs_boot: get_file_handle(v%d) root failed, %d\n", @@ -319,6 +321,7 @@ nfs_boot_getfh(nd, procp, v3) if (nd->nd_private.ndm_saddr.sin_addr.s_addr) { /* get private file handle */ nd->nd_private.ndm_nfsv3 = v3; + nd->nd_private.ndm_sotype = sotype; error = get_file_handle(&nd->nd_private); if (error) { printf("nfs_boot: get_file_handle(v%d) private failed, %d\n", @@ -327,8 +330,7 @@ nfs_boot_getfh(nd, procp, v3) } } #endif /* NO_MOUNT_PRIVATE */ - failed: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); +failed: return (error); } @@ -344,7 +346,7 @@ get_file_handle(ndmntp) * using RPC to mountd/mount */ error = md_mount(&ndmntp->ndm_saddr, ndmntp->ndm_path, ndmntp->ndm_nfsv3, - ndmntp->ndm_fh, &ndmntp->ndm_fhlen); + ndmntp->ndm_sotype, ndmntp->ndm_fh, &ndmntp->ndm_fhlen); if (error) return (error); @@ -365,23 +367,25 @@ get_file_handle(ndmntp) * Get an mbuf with the given length, and * initialize the pkthdr length field. */ -static struct mbuf * -m_get_len(int msg_len) +static int +mbuf_get_with_len(int msg_len, mbuf_t *m) { - struct mbuf *m; - m = m_gethdr(M_WAIT, MT_DATA); - if (m == NULL) - return NULL; - if (msg_len > MHLEN) { - if (msg_len > MCLBYTES) + int error; + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, m); + if (error) + return (error); + if (msg_len > mbuf_maxlen(*m)) { + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, m); + if (error) { + mbuf_freem(*m); + return (error); + } + if (msg_len > mbuf_maxlen(*m)) panic("nfs_boot: msg_len > MCLBYTES"); - MCLGET(m, M_WAIT); - if (m == NULL) - return NULL; } - m->m_len = msg_len; - m->m_pkthdr.len = m->m_len; - return (m); + mbuf_setlen(*m, msg_len); + mbuf_pkthdr_setlen(*m, msg_len); + return (0); } @@ -438,8 +442,8 @@ bp_whoami(bpsin, my_ip, gw_ip) struct rpc_string *str; struct bp_inaddr *bia; - struct mbuf *m; - struct sockaddr_in *sin; + mbuf_t m; + struct sockaddr_in sin; int error, msg_len; int cn_len, dn_len; u_char *p; @@ -449,14 +453,14 @@ bp_whoami(bpsin, my_ip, gw_ip) * Get message buffer of sufficient size. */ msg_len = sizeof(*call); - m = m_get_len(msg_len); - if (m == NULL) - return ENOBUFS; + error = mbuf_get_with_len(msg_len, &m); + if (error) + return error; /* * Build request message for PMAPPROC_CALLIT. */ - call = mtod(m, struct whoami_call *); + call = mbuf_data(m); call->call_prog = htonl(BOOTPARAM_PROG); call->call_vers = htonl(BOOTPARAM_VERS); call->call_proc = htonl(BOOTPARAM_WHOAMI); @@ -474,32 +478,31 @@ bp_whoami(bpsin, my_ip, gw_ip) /* RPC: portmap/callit */ bpsin->sin_port = htons(PMAPPORT); - error = krpc_call(bpsin, PMAPPROG, PMAPVERS, - PMAPPROC_CALLIT, &m, &sin); + error = krpc_call(bpsin, SOCK_DGRAM, PMAPPROG, PMAPVERS, PMAPPROC_CALLIT, &m, &sin); if (error) return error; /* * Parse result message. */ - msg_len = m->m_len; - lp = mtod(m, long *); + msg_len = mbuf_len(m); + lp = mbuf_data(m); /* bootparam server port (also grab from address). */ - if (msg_len < sizeof(*lp)) + if (msg_len < (int)sizeof(*lp)) goto bad; msg_len -= sizeof(*lp); bpsin->sin_port = htons((short)ntohl(*lp++)); - bpsin->sin_addr.s_addr = sin->sin_addr.s_addr; + bpsin->sin_addr.s_addr = sin.sin_addr.s_addr; /* length of encapsulated results */ - if (msg_len < (ntohl(*lp) + sizeof(*lp))) + if (msg_len < (ntohl(*lp) + (int)sizeof(*lp))) goto bad; msg_len = ntohl(*lp++); p = (char*)lp; /* client name */ - if (msg_len < sizeof(*str)) + if (msg_len < (int)sizeof(*str)) goto bad; str = (struct rpc_string *)p; cn_len = ntohl(str->len); @@ -514,7 +517,7 @@ bp_whoami(bpsin, my_ip, gw_ip) msg_len -= RPC_STR_SIZE(cn_len); /* domain name */ - if (msg_len < sizeof(*str)) + if (msg_len < (int)sizeof(*str)) goto bad; str = (struct rpc_string *)p; dn_len = ntohl(str->len); @@ -529,7 +532,7 @@ bp_whoami(bpsin, my_ip, gw_ip) msg_len -= RPC_STR_SIZE(dn_len); /* gateway address */ - if (msg_len < sizeof(*bia)) + if (msg_len < (int)sizeof(*bia)) goto bad; bia = (struct bp_inaddr *)p; if (bia->atype != htonl(1)) @@ -546,10 +549,7 @@ bad: error = EBADRPC; out: - if (sin) - FREE(sin, M_SONAME); - - m_freem(m); + mbuf_freem(m); return(error); } @@ -564,13 +564,13 @@ out: static int bp_getfile(bpsin, key, md_sin, serv_name, pathname) struct sockaddr_in *bpsin; - char *key; + const char *key; struct sockaddr_in *md_sin; char *serv_name; char *pathname; { struct rpc_string *str; - struct mbuf *m; + mbuf_t m; struct bp_inaddr *bia; struct sockaddr_in *sin; u_char *p, *q; @@ -585,14 +585,14 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) msg_len = 0; msg_len += RPC_STR_SIZE(cn_len); msg_len += RPC_STR_SIZE(key_len); - m = m_get_len(msg_len); - if (m == NULL) - return ENOBUFS; + error = mbuf_get_with_len(msg_len, &m); + if (error) + return error; /* * Build request message. */ - p = mtod(m, u_char *); + p = mbuf_data(m); bzero(p, msg_len); /* client name (hostname) */ str = (struct rpc_string *)p; @@ -605,7 +605,7 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) bcopy(key, str->data, key_len); /* RPC: bootparam/getfile */ - error = krpc_call(bpsin, BOOTPARAM_PROG, BOOTPARAM_VERS, + error = krpc_call(bpsin, SOCK_DGRAM, BOOTPARAM_PROG, BOOTPARAM_VERS, BOOTPARAM_GETFILE, &m, NULL); if (error) return error; @@ -613,11 +613,11 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) /* * Parse result message. */ - p = mtod(m, u_char *); - msg_len = m->m_len; + p = mbuf_data(m); + msg_len = mbuf_len(m); /* server name */ - if (msg_len < sizeof(*str)) + if (msg_len < (int)sizeof(*str)) goto bad; str = (struct rpc_string *)p; sn_len = ntohl(str->len); @@ -631,7 +631,7 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) msg_len -= RPC_STR_SIZE(sn_len); /* server IP address (mountd) */ - if (msg_len < sizeof(*bia)) + if (msg_len < (int)sizeof(*bia)) goto bad; bia = (struct bp_inaddr *)p; if (bia->atype != htonl(1)) @@ -649,7 +649,7 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) msg_len -= sizeof(*bia); /* server pathname */ - if (msg_len < sizeof(*str)) + if (msg_len < (int)sizeof(*str)) goto bad; str = (struct rpc_string *)p; path_len = ntohl(str->len); @@ -666,7 +666,7 @@ bad: error = EBADRPC; out: - m_freem(m); + mbuf_freem(m); return(0); } @@ -677,10 +677,11 @@ out: * Also, sets sin->sin_port to the NFS service port. */ static int -md_mount(mdsin, path, v3, fhp, fhlenp) +md_mount(mdsin, path, v3, sotype, fhp, fhlenp) struct sockaddr_in *mdsin; /* mountd server address */ char *path; int v3; + int sotype; u_char *fhp; u_long *fhlenp; { @@ -690,28 +691,38 @@ md_mount(mdsin, path, v3, fhp, fhlenp) u_long errno; u_char data[NFSX_V3FHMAX + sizeof(u_long)]; } *rdata; - struct mbuf *m; + mbuf_t m; int error, mlen, slen; int mntversion = v3 ? RPCMNT_VER3 : RPCMNT_VER1; + int proto = (sotype == SOCK_STREAM) ? IPPROTO_TCP : IPPROTO_UDP; + in_port_t mntport, nfsport; /* Get port number for MOUNTD. */ - error = krpc_portmap(mdsin, RPCPROG_MNT, mntversion, - &mdsin->sin_port); - if (error) return error; + error = krpc_portmap(mdsin, RPCPROG_MNT, mntversion, proto, &mntport); + if (error) + return error; + + /* Get port number for NFS use. */ + /* (If NFS/proto unavailable, don't bother with the mount call) */ + error = krpc_portmap(mdsin, NFS_PROG, v3 ? NFS_VER3 : NFS_VER2, proto, &nfsport); + if (error) + return error; + + /* Set port number for MOUNTD */ + mdsin->sin_port = mntport; slen = strlen(path); mlen = RPC_STR_SIZE(slen); - m = m_get_len(mlen); - if (m == NULL) - return ENOBUFS; - str = mtod(m, struct rpc_string *); + error = mbuf_get_with_len(mlen, &m); + if (error) + return error; + str = mbuf_data(m); str->len = htonl(slen); bcopy(path, str->data, slen); /* Do RPC to mountd. */ - error = krpc_call(mdsin, RPCPROG_MNT, mntversion, - RPCMNT_MOUNT, &m, NULL); + error = krpc_call(mdsin, sotype, RPCPROG_MNT, mntversion, RPCMNT_MOUNT, &m, NULL); if (error) return error; /* message already freed */ @@ -720,41 +731,40 @@ md_mount(mdsin, path, v3, fhp, fhlenp) * + a v2 filehandle * + a v3 filehandle length + a v3 filehandle */ - mlen = m->m_len; - if (mlen < sizeof(u_long)) + mlen = mbuf_len(m); + if (mlen < (int)sizeof(u_long)) goto bad; - rdata = mtod(m, struct rdata *); + rdata = mbuf_data(m); error = ntohl(rdata->errno); if (error) goto out; if (v3) { u_long fhlen; u_char *fh; - if (mlen < sizeof(u_long)*2) + if (mlen < (int)sizeof(u_long)*2) goto bad; fhlen = ntohl(*(u_long*)rdata->data); fh = rdata->data + sizeof(u_long); - if (mlen < (sizeof(u_long)*2 + fhlen)) + if (mlen < (int)(sizeof(u_long)*2 + fhlen)) goto bad; bcopy(fh, fhp, fhlen); *fhlenp = fhlen; } else { - if (mlen < (sizeof(u_long) + NFSX_V2FH)) + if (mlen < ((int)sizeof(u_long) + NFSX_V2FH)) goto bad; bcopy(rdata->data, fhp, NFSX_V2FH); *fhlenp = NFSX_V2FH; } /* Set port number for NFS use. */ - error = krpc_portmap(mdsin, NFS_PROG, v3 ? NFS_VER3 : NFS_VER2, - &mdsin->sin_port); + mdsin->sin_port = nfsport; goto out; bad: error = EBADRPC; out: - m_freem(m); + mbuf_freem(m); return error; } diff --git a/bsd/nfs/nfs_lock.c b/bsd/nfs/nfs_lock.c index 398e6f868..762c140b0 100644 --- a/bsd/nfs/nfs_lock.c +++ b/bsd/nfs/nfs_lock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,22 +54,20 @@ #include #include #include /* for hz */ -#include -#include +#include #include #include /* for hz */ /* Must come after sys/malloc.h */ -#include -#include -#include -#include +#include +#include +#include /* for p_start */ +#include #include #include -#include #include #include -#include +#include -#include +#include #include @@ -84,17 +82,12 @@ #define OFF_MAX QUAD_MAX -uint64_t nfsadvlocks = 0; -struct timeval nfsadvlock_longest = {0, 0}; -struct timeval nfsadvlocks_time = {0, 0}; - /* * globals for managing the lockd fifo */ -pid_t nfslockdpid = 0; -struct file *nfslockdfp = 0; +vnode_t nfslockdvnode = 0; int nfslockdwaiting = 0; -int nfslockdfifowritten = 0; +time_t nfslockdstarttimeout = 0; int nfslockdfifolock = 0; #define NFSLOCKDFIFOLOCK_LOCKED 1 #define NFSLOCKDFIFOLOCK_WANT 2 @@ -303,20 +296,21 @@ nfs_lockxid_get(void) * (Also, if adding, try to clean up some stale entries.) */ static int -nfs_lock_pid_check(struct proc *p, int addflag, struct vnode *vp) +nfs_lock_pid_check(proc_t p, int addflag, vnode_t vp) { struct nfs_lock_pid *lp, *lplru, *lplru_next; - struct proc *plru; + proc_t plru; int error = 0; struct timeval now; /* lock hash */ loop: if (nfs_lock_pid_lock) { + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); while (nfs_lock_pid_lock) { nfs_lock_pid_lock = -1; tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0); - if ((error = nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p))) + if ((error = nfs_sigintr(nmp, NULL, p))) return (error); } goto loop; @@ -325,9 +319,9 @@ loop: /* Search hash chain */ error = ENOENT; - lp = NFS_LOCK_PID_HASH(p->p_pid)->lh_first; + lp = NFS_LOCK_PID_HASH(proc_pid(p))->lh_first; for (; lp != NULL; lp = lp->lp_hash.le_next) - if (lp->lp_pid == p->p_pid) { + if (lp->lp_pid == proc_pid(p)) { /* found pid... */ if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) { /* ...and it's valid */ @@ -390,15 +384,19 @@ loop: MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid), M_TEMP, M_WAITOK | M_ZERO); } - /* (re)initialize nfs_lock_pid info */ - lp->lp_pid = p->p_pid; - lp->lp_pid_start = p->p_stats->p_start; - /* insert pid in hash */ - LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash); - lp->lp_valid = 1; - lp->lp_time = now.tv_sec; - TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru); - error = 0; + if (!lp) { + error = ENOMEM; + } else { + /* (re)initialize nfs_lock_pid info */ + lp->lp_pid = proc_pid(p); + lp->lp_pid_start = p->p_stats->p_start; + /* insert pid in hash */ + LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash); + lp->lp_valid = 1; + lp->lp_time = now.tv_sec; + TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru); + error = 0; + } } /* unlock hash */ @@ -417,42 +415,46 @@ loop: * NFS advisory byte-level locks. */ int -nfs_dolock(struct vop_advlock_args *ap) -/* struct vop_advlock_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; +nfs_dolock(struct vnop_advlock_args *ap) +/* struct vnop_advlock_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + vfs_context_t a_context; }; */ { LOCKD_MSG_REQUEST msgreq; LOCKD_MSG *msg; - struct vnode *vp, *wvp; + vnode_t vp, wvp; struct nfsnode *np; int error, error1; struct flock *fl; int fmode, ioflg; - struct proc *p; struct nfsmount *nmp; - struct vattr vattr; + struct nfs_vattr nvattr; off_t start, end; struct timeval now; int timeo, endtime, lastmsg, wentdown = 0; int lockpidcheck; + kauth_cred_t cred; + proc_t p; + struct sockaddr *saddr; - p = current_proc(); + p = vfs_context_proc(ap->a_context); + cred = vfs_context_ucred(ap->a_context); vp = ap->a_vp; fl = ap->a_fl; np = VTONFS(vp); - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); if (nmp->nm_flag & NFSMNT_NOLOCKS) - return (EOPNOTSUPP); + return (ENOTSUP); /* * The NLM protocol doesn't allow the server to return an error @@ -474,11 +476,11 @@ nfs_dolock(struct vop_advlock_args *ap) return (EINVAL); } /* - * If daemon is running take a ref on its fifo + * If daemon is running take a ref on its fifo vnode */ - if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) { - if (!nfslockdwaiting) - return (EOPNOTSUPP); + if (!(wvp = nfslockdvnode)) { + if (!nfslockdwaiting && !nfslockdstarttimeout) + return (ENOTSUP); /* * Don't wake lock daemon if it hasn't been started yet and * this is an unlock request (since we couldn't possibly @@ -486,17 +488,45 @@ nfs_dolock(struct vop_advlock_args *ap) * uninformed unlock request due to closef()'s behavior of doing * unlocks on all files if a process has had a lock on ANY file. */ - if (!nfslockdfp && (fl->l_type == F_UNLCK)) + if (!nfslockdvnode && (fl->l_type == F_UNLCK)) return (EINVAL); - /* wake up lock daemon */ - (void)wakeup((void *)&nfslockdwaiting); - /* wait on nfslockdfp for a while to allow daemon to start */ - tsleep((void *)&nfslockdfp, PCATCH | PUSER, "lockd", 60*hz); - /* check for nfslockdfp and f_data */ - if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) - return (EOPNOTSUPP); + microuptime(&now); + if (nfslockdwaiting) { + /* wake up lock daemon */ + nfslockdstarttimeout = now.tv_sec + 60; + (void)wakeup((void *)&nfslockdwaiting); + } + /* wait on nfslockdvnode for a while to allow daemon to start */ + while (!nfslockdvnode && (now.tv_sec < nfslockdstarttimeout)) { + error = tsleep((void *)&nfslockdvnode, PCATCH | PUSER, "lockdstart", 2*hz); + if (error && (error != EWOULDBLOCK)) + return (error); + /* check that we still have our mount... */ + /* ...and that we still support locks */ + nmp = VFSTONFS(vnode_mount(vp)); + if (!nmp) + return (ENXIO); + if (nmp->nm_flag & NFSMNT_NOLOCKS) + return (ENOTSUP); + if (!error) + break; + microuptime(&now); + } + /* + * check for nfslockdvnode + * If it hasn't started by now, there's a problem. + */ + if (!(wvp = nfslockdvnode)) + return (ENOTSUP); + } + error = vnode_getwithref(wvp); + if (error) + return (ENOTSUP); + error = vnode_ref(wvp); + if (error) { + vnode_put(wvp); + return (ENOTSUP); } - VREF(wvp); /* * Need to check if this process has successfully acquired an NFS lock before. @@ -504,10 +534,14 @@ nfs_dolock(struct vop_advlock_args *ap) */ lockpidcheck = nfs_lock_pid_check(p, 0, vp); if (lockpidcheck) { - if (lockpidcheck != ENOENT) + if (lockpidcheck != ENOENT) { + vnode_rele(wvp); + vnode_put(wvp); return (lockpidcheck); + } if (ap->a_op == F_UNLCK) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (0); } } @@ -532,23 +566,27 @@ nfs_dolock(struct vop_advlock_args *ap) /* need to flush, and refetch attributes to make */ /* sure we have the correct end of file offset */ if (np->n_flag & NMODIFIED) { - np->n_xid = 0; - error = nfs_vinvalbuf(vp, V_SAVE, p->p_ucred, p, 1); + NATTRINVALIDATE(np); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (error); } } - np->n_xid = 0; - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); + NATTRINVALIDATE(np); + + error = nfs_getattr(vp, &nvattr, cred, p); if (error) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (error); } start = np->n_size + fl->l_start; break; default: - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (EINVAL); } if (fl->l_len == 0) @@ -560,12 +598,14 @@ nfs_dolock(struct vop_advlock_args *ap) start += fl->l_len; } if (start < 0) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (EINVAL); } if (!NFS_ISV3(vp) && ((start >= 0x80000000) || (end >= 0x80000000))) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (EINVAL); } @@ -583,37 +623,40 @@ nfs_dolock(struct vop_advlock_args *ap) msg->lm_fl.l_start = start; if (end != -1) msg->lm_fl.l_len = end - start + 1; - msg->lm_fl.l_pid = p->p_pid; + msg->lm_fl.l_pid = proc_pid(p); if (ap->a_flags & F_WAIT) msg->lm_flags |= LOCKD_MSG_BLOCK; if (ap->a_op == F_GETLK) msg->lm_flags |= LOCKD_MSG_TEST; - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) { - vrele(wvp); + vnode_rele(wvp); + vnode_put(wvp); return (ENXIO); } - bcopy(mtod(nmp->nm_nam, struct sockaddr *), &msg->lm_addr, - min(sizeof msg->lm_addr, - mtod(nmp->nm_nam, struct sockaddr *)->sa_len)); + saddr = mbuf_data(nmp->nm_nam); + bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len)); msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH; bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len); if (NFS_ISV3(vp)) msg->lm_flags |= LOCKD_MSG_NFSV3; - cru2x(p->p_ucred, &msg->lm_cred); + cru2x(cred, &msg->lm_cred); microuptime(&now); lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); fmode = FFLAGS(O_WRONLY); - if ((error = VOP_OPEN(wvp, fmode, kernproc->p_ucred, p))) { - vrele(wvp); + if ((error = VNOP_OPEN(wvp, fmode, ap->a_context))) { + vnode_rele(wvp); + vnode_put(wvp); return (error); } + vnode_lock(wvp); ++wvp->v_writecount; + vnode_unlock(wvp); /* allocate unique xid */ msg->lm_xid = nfs_lockxid_get(); @@ -623,8 +666,6 @@ nfs_dolock(struct vop_advlock_args *ap) #define IO_NOMACCHECK 0; ioflg = IO_UNIT | IO_NOMACCHECK; for (;;) { - VOP_LEASE(wvp, p, kernproc->p_ucred, LEASE_WRITE); - error = 0; while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) { nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT; @@ -638,18 +679,13 @@ nfs_dolock(struct vop_advlock_args *ap) nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED; error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0, - UIO_SYSSPACE, ioflg, kernproc->p_ucred, NULL, p); - - nfslockdfifowritten = 1; + UIO_SYSSPACE32, ioflg, proc_ucred(kernproc), NULL, p); nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED; if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) { nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT; wakeup((void *)&nfslockdfifolock); } - /* wake up lock daemon */ - if (nfslockdwaiting) - (void)wakeup((void *)&nfslockdwaiting); if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) { break; @@ -694,14 +730,14 @@ wait_for_granted: break; /* check that we still have our mount... */ /* ...and that we still support locks */ - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS)) break; /* * If the mount is hung and we've requested not to hang * on remote filesystems, then bail now. */ - if ((p != NULL) && ((p->p_flag & P_NOREMOTEHANG) != 0) && + if ((p != NULL) && ((proc_noremotehang(p)) != 0) && ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) { if (fl->l_type == F_UNLCK) printf("nfs_dolock: aborting unlock request " @@ -713,7 +749,7 @@ wait_for_granted: } if (error) { /* check that we still have our mount... */ - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) { if (error == EWOULDBLOCK) error = ENXIO; @@ -722,14 +758,14 @@ wait_for_granted: /* ...and that we still support locks */ if (nmp->nm_flag & NFSMNT_NOLOCKS) { if (error == EWOULDBLOCK) - error = EOPNOTSUPP; + error = ENOTSUP; break; } - if ((error == EOPNOTSUPP) && + if ((error == ENOTSUP) && (nmp->nm_state & NFSSTA_LOCKSWORK)) { /* * We have evidence that locks work, yet lockd - * returned EOPNOTSUPP. This is probably because + * returned ENOTSUP. This is probably because * it was unable to contact the server's lockd to * send it the request. * @@ -765,7 +801,7 @@ wait_for_granted: * If the mount is hung and we've requested not to hang * on remote filesystems, then bail now. */ - if ((p != NULL) && ((p->p_flag & P_NOREMOTEHANG) != 0) && + if ((p != NULL) && ((proc_noremotehang(p)) != 0) && ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) { if (fl->l_type == F_UNLCK) printf("nfs_dolock: aborting unlock request " @@ -779,8 +815,7 @@ wait_for_granted: (nmp->nm_tprintf_initial_delay != 0) && ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { lastmsg = now.tv_sec; - nfs_down(NULL, nmp, p, "lockd not responding", - 0, NFSSTA_LOCKTIMEO); + nfs_down(nmp, p, 0, NFSSTA_LOCKTIMEO, "lockd not responding"); wentdown = 1; } if (msgreq.lmr_errno == EINPROGRESS) { @@ -818,12 +853,10 @@ wait_for_granted: continue; } - if (wentdown) { - /* we got a reponse, so the server's lockd is OK */ - nfs_up(NULL, VFSTONFS(vp->v_mount), p, "lockd alive again", - NFSSTA_LOCKTIMEO); - wentdown = 0; - } + /* we got a reponse, so the server's lockd is OK */ + nfs_up(VFSTONFS(vnode_mount(vp)), p, NFSSTA_LOCKTIMEO, + wentdown ? "lockd alive again" : NULL); + wentdown = 0; if (msgreq.lmr_errno == EINPROGRESS) { /* got NLM_BLOCKED response */ @@ -877,7 +910,7 @@ wait_for_granted: if (!error) { /* record that NFS file locking has worked on this mount */ - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) nmp->nm_state |= NFSSTA_LOCKSWORK; /* @@ -892,10 +925,12 @@ wait_for_granted: } break; } - + nfs_lockdmsg_dequeue(&msgreq); - error1 = vn_close(wvp, FWRITE, kernproc->p_ucred, p); + error1 = VNOP_CLOSE(wvp, FWRITE, ap->a_context); + vnode_rele(wvp); + vnode_put(wvp); /* prefer any previous 'error' to our vn_close 'error1'. */ return (error != 0 ? error : error1); } @@ -905,20 +940,14 @@ wait_for_granted: * NFS advisory byte-level locks answer from the lock daemon. */ int -nfslockdans(struct proc *p, struct lockd_ans *ansp) +nfslockdans(proc_t p, struct lockd_ans *ansp) { LOCKD_MSG_REQUEST *msgreq; int error; - /* - * Let root, or someone who once was root (lockd generally - * switches to the daemon uid once it is done setting up) make - * this call. - * - * XXX This authorization check is probably not right. - */ - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0 && - p->p_cred->p_svuid != 0) + /* Let root make this call. */ + error = proc_suser(p); + if (error) return (error); /* the version should match, or we're out of sync */ @@ -972,28 +1001,38 @@ nfslockdans(struct proc *p, struct lockd_ans *ansp) * NFS advisory byte-level locks: fifo file# from the lock daemon. */ int -nfslockdfd(struct proc *p, int fd) +nfslockdfd(proc_t p, int fd) { int error; - struct file *fp, *ofp; + vnode_t vp, oldvp; - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if (error) return (error); if (fd < 0) { - fp = 0; + vp = NULL; } else { - error = getvnode(p, fd, &fp); + error = file_vnode(fd, &vp); if (error) return (error); - (void)fref(fp); + error = vnode_getwithref(vp); + if (error) + return (error); + error = vnode_ref(vp); + if (error) { + vnode_put(vp); + return (error); + } + } + oldvp = nfslockdvnode; + nfslockdvnode = vp; + if (oldvp) { + vnode_rele(oldvp); + } + (void)wakeup((void *)&nfslockdvnode); + if (vp) { + vnode_put(vp); } - ofp = nfslockdfp; - nfslockdfp = fp; - if (ofp) - (void)frele(ofp); - nfslockdpid = nfslockdfp ? p->p_pid : 0; - (void)wakeup((void *)&nfslockdfp); return (0); } @@ -1002,23 +1041,17 @@ nfslockdfd(struct proc *p, int fd) * lock daemon waiting for lock request */ int -nfslockdwait(struct proc *p) +nfslockdwait(proc_t p) { int error; - struct file *fp, *ofp; - if (p->p_pid != nfslockdpid) { - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); - } - if (nfslockdwaiting) + error = proc_suser(p); + if (error) + return (error); + if (nfslockdwaiting || nfslockdvnode) return (EBUSY); - if (nfslockdfifowritten) { - nfslockdfifowritten = 0; - return (0); - } + nfslockdstarttimeout = 0; nfslockdwaiting = 1; tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0); nfslockdwaiting = 0; diff --git a/bsd/nfs/nfs_lock.h b/bsd/nfs/nfs_lock.h index cc99a1fa8..512408454 100644 --- a/bsd/nfs/nfs_lock.h +++ b/bsd/nfs/nfs_lock.h @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /*- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * Redistribution and use in source and binary forms, with or without @@ -60,7 +81,7 @@ typedef struct nfs_lock_msg { u_int64_t lm_xid; /* unique message transaction ID */ struct flock lm_fl; /* The lock request. */ struct sockaddr_storage lm_addr; /* The address. */ - size_t lm_fh_len; /* The file handle length. */ + int lm_fh_len; /* The file handle length. */ struct xucred lm_cred; /* user cred for lock req */ u_int8_t lm_fh[NFS_SMALLFH]; /* The file handle. */ } LOCKD_MSG; @@ -96,7 +117,7 @@ struct lockd_ans { pid_t la_pid; /* pid of lock requester/owner */ off_t la_start; /* lock starting offset */ off_t la_len; /* lock length */ - size_t la_fh_len; /* The file handle length. */ + int la_fh_len; /* The file handle length. */ u_int8_t la_fh[NFS_SMALLFH]; /* The file handle. */ }; @@ -108,9 +129,12 @@ struct lockd_ans { #ifdef KERNEL void nfs_lockinit(void); -int nfs_dolock(struct vop_advlock_args *ap); -int nfslockdans(struct proc *p, struct lockd_ans *ansp); -int nfslockdfd(struct proc *p, int fd); -int nfslockdwait(struct proc *p); +int nfs_dolock(struct vnop_advlock_args *ap); +int nfslockdans(proc_t p, struct lockd_ans *ansp); +int nfslockdfd(proc_t p, int fd); +int nfslockdwait(proc_t p); + +extern vnode_t nfslockdvnode; +extern int nfslockdwaiting; #endif #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index b05a62cc7..36769aad4 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,9 +63,10 @@ #include #include #include -#include -#include +#include +#include #include +#include #include #include @@ -77,8 +78,10 @@ LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; u_long nfsnodehash; -#define TRUE 1 -#define FALSE 0 +lck_grp_t * nfs_node_hash_lck_grp; +lck_grp_attr_t * nfs_node_hash_lck_grp_attr; +lck_attr_t * nfs_node_hash_lck_attr; +lck_mtx_t *nfs_node_hash_mutex; /* * Initialize hash links for nfsnodes @@ -88,24 +91,28 @@ void nfs_nhinit(void) { nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash); + + nfs_node_hash_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfs_node_hash_lck_grp_attr); + nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", nfs_node_hash_lck_grp_attr); + + nfs_node_hash_lck_attr = lck_attr_alloc_init(); + + nfs_node_hash_mutex = lck_mtx_alloc_init(nfs_node_hash_lck_grp, nfs_node_hash_lck_attr); } /* * Compute an entry in the NFS hash table structure */ u_long -nfs_hash(fhp, fhsize) - register nfsfh_t *fhp; - int fhsize; +nfs_hash(u_char *fhp, int fhsize) { - register u_char *fhpp; - register u_long fhsum; - register int i; + u_long fhsum; + int i; - fhpp = &fhp->fh_bytes[0]; fhsum = 0; for (i = 0; i < fhsize; i++) - fhsum += *fhpp++; + fhsum += *fhp++; return (fhsum); } @@ -115,22 +122,25 @@ nfs_hash(fhp, fhsize) * In all cases, a pointer to a * nfsnode structure is returned. */ -int nfs_node_hash_lock; - int -nfs_nget(mntp, fhp, fhsize, npp) - struct mount *mntp; - register nfsfh_t *fhp; - int fhsize; - struct nfsnode **npp; +nfs_nget( + mount_t mntp, + vnode_t dvp, + struct componentname *cnp, + u_char *fhp, + int fhsize, + struct nfs_vattr *nvap, + u_int64_t *xidp, + int flags, + struct nfsnode **npp) { - struct proc *p = current_proc(); /* XXX */ struct nfsnode *np; struct nfsnodehashhead *nhpp; - register struct vnode *vp; - struct vnode *nvp; + vnode_t vp, nvp; int error; - struct mount *mp; + mount_t mp; + struct vnode_fsparam vfsp; + uint32_t vid; /* Check for unmount in progress */ if (!mntp || (mntp->mnt_kern_flag & MNTK_UNMOUNT)) { @@ -140,35 +150,39 @@ nfs_nget(mntp, fhp, fhsize, npp) nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); loop: + lck_mtx_lock(nfs_node_hash_mutex); for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { - mp = (np->n_flag & NINIT) ? np->n_mount : NFSTOV(np)->v_mount; + mp = (np->n_flag & NINIT) ? np->n_mount : vnode_mount(NFSTOV(np)); if (mntp != mp || np->n_fhsize != fhsize || - bcmp((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize)) + bcmp(fhp, np->n_fhp, fhsize)) continue; /* if the node is still being initialized, sleep on it */ if (np->n_flag & NINIT) { np->n_flag |= NWINIT; - tsleep(np, PINOD, "nfsngt", 0); + msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", 0); goto loop; } vp = NFSTOV(np); - if (vget(vp, LK_EXCLUSIVE, p)) - goto loop; - *npp = np; - return(0); - } - /* - * Obtain a lock to prevent a race condition if the getnewvnode() - * or MALLOC() below happens to block. - */ - if (nfs_node_hash_lock) { - while (nfs_node_hash_lock) { - nfs_node_hash_lock = -1; - tsleep(&nfs_node_hash_lock, PVM, "nfsngt", 0); + vid = vnode_vid(vp); + lck_mtx_unlock(nfs_node_hash_mutex); + if ((error = vnode_getwithvid(vp, vid))) { + /* + * If vnode is being reclaimed or has already + * changed identity, no need to wait. + */ + return (error); + } + /* update attributes */ + error = nfs_loadattrcache(np, nvap, xidp, 0); + if (error) { + vnode_put(vp); + } else { + if (dvp && cnp && (flags & NG_MAKEENTRY)) + cache_enter(dvp, vp, cnp); + *npp = np; } - goto loop; + return(error); } - nfs_node_hash_lock = 1; /* * allocate and initialize nfsnode and stick it in the hash @@ -176,72 +190,131 @@ loop: * hash before initialization is complete will wait for it. */ MALLOC_ZONE(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK); + if (!np) { + lck_mtx_unlock(nfs_node_hash_mutex); + *npp = 0; + return (ENOMEM); + } bzero((caddr_t)np, sizeof *np); np->n_flag |= NINIT; np->n_mount = mntp; - lockinit(&np->n_lock, PINOD, "nfsnode", 0, 0); - /* lock the new nfsnode */ - lockmgr(&np->n_lock, LK_EXCLUSIVE, NULL, p); - /* Insert the nfsnode in the hash queue for its new file handle */ + /* setup node's file handle */ if (fhsize > NFS_SMALLFH) { - MALLOC_ZONE(np->n_fhp, nfsfh_t *, + MALLOC_ZONE(np->n_fhp, u_char *, fhsize, M_NFSBIGFH, M_WAITOK); - } else - np->n_fhp = &np->n_fh; - bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); + if (!np->n_fhp) { + lck_mtx_unlock(nfs_node_hash_mutex); + FREE_ZONE(np, sizeof *np, M_NFSNODE); + *npp = 0; + return (ENOMEM); + } + } else { + np->n_fhp = &np->n_fh[0]; + } + bcopy(fhp, np->n_fhp, fhsize); np->n_fhsize = fhsize; - LIST_INSERT_HEAD(nhpp, np, n_hash); + + /* Insert the nfsnode in the hash queue for its new file handle */ np->n_flag |= NHASHED; + LIST_INSERT_HEAD(nhpp, np, n_hash); /* release lock on hash table */ - if (nfs_node_hash_lock < 0) - wakeup(&nfs_node_hash_lock); - nfs_node_hash_lock = 0; + lck_mtx_unlock(nfs_node_hash_mutex); + + /* do initial loading of attributes */ + error = nfs_loadattrcache(np, nvap, xidp, 1); + if (error) { + lck_mtx_lock(nfs_node_hash_mutex); + LIST_REMOVE(np, n_hash); + np->n_flag &= ~(NHASHED|NINIT); + if (np->n_flag & NWINIT) { + np->n_flag &= ~NWINIT; + wakeup((caddr_t)np); + } + lck_mtx_unlock(nfs_node_hash_mutex); + if (np->n_fhsize > NFS_SMALLFH) + FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); + FREE_ZONE(np, sizeof *np, M_NFSNODE); + *npp = 0; + return (error); + } + np->n_mtime = nvap->nva_mtime; + if (nvap->nva_type == VDIR) + np->n_ncmtime = nvap->nva_mtime; + NMODEINVALIDATE(np); /* now, attempt to get a new vnode */ - error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp); + vfsp.vnfs_mp = mntp; + vfsp.vnfs_vtype = nvap->nva_type; + vfsp.vnfs_str = "nfs"; + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_fsnode = np; + if (nvap->nva_type == VFIFO) + vfsp.vnfs_vops = fifo_nfsv2nodeop_p; + else if (nvap->nva_type == VBLK || nvap->nva_type == VCHR) + vfsp.vnfs_vops = spec_nfsv2nodeop_p; + else + vfsp.vnfs_vops = nfsv2_vnodeop_p; + vfsp.vnfs_markroot = (flags & NG_MARKROOT) ? 1 : 0; + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_rdev = 0; + vfsp.vnfs_filesize = nvap->nva_size; + vfsp.vnfs_cnp = cnp; + if (dvp && cnp && (flags & NG_MAKEENTRY)) + vfsp.vnfs_flags = 0; + else + vfsp.vnfs_flags = VNFS_NOCACHE; + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp); if (error) { + lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); - np->n_flag &= ~NHASHED; + np->n_flag &= ~(NHASHED|NINIT); + if (np->n_flag & NWINIT) { + np->n_flag &= ~NWINIT; + wakeup((caddr_t)np); + } + lck_mtx_unlock(nfs_node_hash_mutex); if (np->n_fhsize > NFS_SMALLFH) - FREE_ZONE((caddr_t)np->n_fhp, np->n_fhsize, M_NFSBIGFH); + FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); FREE_ZONE(np, sizeof *np, M_NFSNODE); *npp = 0; return (error); } vp = nvp; - vp->v_data = np; np->n_vnode = vp; + vnode_addfsref(vp); + vnode_settag(vp, VT_NFS); // XXX shouldn't this be a vnode_create() parameter? *npp = np; + /* node is now initialized */ - /* node is now initialized, check if anyone's waiting for it */ + /* check if anyone's waiting on this node */ + lck_mtx_lock(nfs_node_hash_mutex); np->n_flag &= ~NINIT; if (np->n_flag & NWINIT) { np->n_flag &= ~NWINIT; wakeup((caddr_t)np); } + lck_mtx_unlock(nfs_node_hash_mutex); return (error); } + int nfs_inactive(ap) - struct vop_inactive_args /* { - struct vnode *a_vp; - struct proc *a_p; + struct vnop_inactive_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; } */ *ap; { register struct nfsnode *np; register struct sillyrename *sp; - struct proc *p = current_proc(); /* XXX */ - extern int prtactive; - struct ucred *cred; + kauth_cred_t cred; np = VTONFS(ap->a_vp); - if (prtactive && ap->a_vp->v_usecount != 0) - vprint("nfs_inactive: pushing active", ap->a_vp); - if (ap->a_vp->v_type != VDIR) { + if (vnode_vtype(ap->a_vp) != VDIR) { sp = np->n_sillyrename; np->n_sillyrename = (struct sillyrename *)0; } else @@ -252,9 +325,11 @@ nfs_inactive(ap) * Remove the silly file that was rename'd earlier */ #if DIAGNOSTIC - kprintf("nfs_inactive removing %s, dvp=%x, a_vp=%x, ap=%x, np=%x, sp=%x\n", &sp->s_name[0], (unsigned)sp->s_dvp, (unsigned)ap->a_vp, (unsigned)ap, (unsigned)np, (unsigned)sp); + kprintf("nfs_inactive removing %s, dvp=%x, a_vp=%x, ap=%x, np=%x, sp=%x\n", + &sp->s_name[0], (unsigned)sp->s_dvp, (unsigned)ap->a_vp, (unsigned)ap, + (unsigned)np, (unsigned)sp); #endif - (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); + nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, vfs_context_proc(ap->a_context), 1); np->n_size = 0; ubc_setsize(ap->a_vp, (off_t)0); nfs_removeit(sp); @@ -263,19 +338,21 @@ nfs_inactive(ap) * again if another object gets created with the same filehandle * before this vnode gets reclaimed */ + lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); np->n_flag &= ~NHASHED; + lck_mtx_unlock(nfs_node_hash_mutex); cred = sp->s_cred; if (cred != NOCRED) { sp->s_cred = NOCRED; - crfree(cred); + kauth_cred_rele(cred); } - vrele(sp->s_dvp); + vnode_rele(sp->s_dvp); FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ); + vnode_recycle(ap->a_vp); } - np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED | - NQNFSNONCACHE | NQNFSWRITE | NHASHED); - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + /* clear all flags other than these */ + np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NHASHED); return (0); } @@ -284,36 +361,23 @@ nfs_inactive(ap) */ int nfs_reclaim(ap) - struct vop_reclaim_args /* { - struct vnode *a_vp; + struct vnop_reclaim_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - register struct nfsmount *nmp; - register struct nfsdmap *dp, *dp2; - extern int prtactive; + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct nfsdmap *dp, *dp2; - if (prtactive && vp->v_usecount != 0) - vprint("nfs_reclaim: pushing active", vp); + vnode_removefsref(vp); if (np->n_flag & NHASHED) { + lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); np->n_flag &= ~NHASHED; - } - - /* - * In case we block during FREE_ZONEs below, get the entry out - * of tbe name cache now so subsequent lookups won't find it. - */ - cache_purge(vp); - - /* - * For nqnfs, take it off the timer queue as required. - */ - nmp = VFSTONFS(vp->v_mount); - if (nmp && (nmp->nm_flag & NFSMNT_NQNFS) && np->n_timer.cqe_next != 0) { - CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); + lck_mtx_unlock(nfs_node_hash_mutex); } /* @@ -321,7 +385,7 @@ nfs_reclaim(ap) * large file handle structures that might be associated with * this nfs node. */ - if (vp->v_type == VDIR) { + if (vnode_vtype(vp) == VDIR) { dp = np->n_cookies.lh_first; while (dp) { dp2 = dp; @@ -331,66 +395,11 @@ nfs_reclaim(ap) } } if (np->n_fhsize > NFS_SMALLFH) { - FREE_ZONE((caddr_t)np->n_fhp, np->n_fhsize, M_NFSBIGFH); + FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); } + vnode_clearfsnode(vp); - FREE_ZONE(vp->v_data, sizeof (struct nfsnode), M_NFSNODE); - vp->v_data = (void *)0; + FREE_ZONE(np, sizeof(struct nfsnode), M_NFSNODE); return (0); } -/* - * Lock an nfsnode - */ -int -nfs_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - - /* - * Ugh, another place where interruptible mounts will get hung. - * If you make this call interruptible, then you have to fix all - * the VOP_LOCK() calls to expect interruptibility. - */ - if (vp->v_tag == VT_NON) - return (ENOENT); /* ??? -- got to check something and error, but what? */ - - return(lockmgr(&VTONFS(vp)->n_lock, ap->a_flags, &vp->v_interlock, - ap->a_p)); - -} - -/* - * Unlock an nfsnode - */ -int -nfs_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - return (lockmgr(&VTONFS(vp)->n_lock, ap->a_flags | LK_RELEASE, - &vp->v_interlock, ap->a_p)); -} - -/* - * Check for a locked nfsnode - */ -int -nfs_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - return (lockstatus(&VTONFS(ap->a_vp)->n_lock)); - -} diff --git a/bsd/nfs/nfs_nqlease.c b/bsd/nfs/nfs_nqlease.c deleted file mode 100644 index 955925a24..000000000 --- a/bsd/nfs/nfs_nqlease.c +++ /dev/null @@ -1,1353 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Rick Macklem at The University of Guelph. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)nfs_nqlease.c 8.9 (Berkeley) 5/20/95 - * FreeBSD-Id: nfs_nqlease.c,v 1.32 1997/11/07 08:53:23 phk Exp $ - */ - - -/* - * References: - * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant - * Mechanism for Distributed File Cache Consistency", - * In Proc. of the Twelfth ACM Symposium on Operating Systems - * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989. - * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching - * in the Sprite Network File System", ACM TOCS 6(1), - * pages 134-154, February 1988. - * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and - * Performance of Cache-Consistency Protocols", Digital - * Equipment Corporation WRL Research Report 89/5, May 1989. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -time_t nqnfsstarttime = (time_t)0; -int nqsrv_clockskew = NQ_CLOCKSKEW; -int nqsrv_writeslack = NQ_WRITESLACK; -int nqsrv_maxlease = NQ_MAXLEASE; -static int nqsrv_maxnumlease = NQ_MAXNUMLEASE; - -struct vop_lease_args; - -static int nqsrv_cmpnam __P((struct nfssvc_sock *, struct mbuf *, - struct nqhost *)); -extern void nqnfs_lease_updatetime __P((int deltat)); -static int nqnfs_vacated __P((struct vnode *vp, struct ucred *cred)); -static void nqsrv_addhost __P((struct nqhost *lph, struct nfssvc_sock *slp, - struct mbuf *nam)); -static void nqsrv_instimeq __P((struct nqlease *lp, u_long duration)); -static void nqsrv_locklease __P((struct nqlease *lp)); -static void nqsrv_send_eviction __P((struct vnode *vp, struct nqlease *lp, - struct nfssvc_sock *slp, - struct mbuf *nam, struct ucred *cred)); -static void nqsrv_unlocklease __P((struct nqlease *lp)); -static void nqsrv_waitfor_expiry __P((struct nqlease *lp)); - -/* - * Signifies which rpcs can have piggybacked lease requests - */ -int nqnfs_piggy[NFS_NPROCS] = { - 0, - 0, - ND_WRITE, - ND_READ, - 0, - ND_READ, - ND_READ, - ND_WRITE, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ND_READ, - ND_READ, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, -}; - -extern nfstype nfsv2_type[9]; -extern nfstype nfsv3_type[9]; -extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; -extern int nfsd_waiting; -extern struct nfsstats nfsstats; -extern int nfs_mount_type; - -#define TRUE 1 -#define FALSE 0 - -#ifndef NFS_NOSERVER -/* - * Get or check for a lease for "vp", based on ND_CHECK flag. - * The rules are as follows: - * - if a current non-caching lease, reply non-caching - * - if a current lease for same host only, extend lease - * - if a read cachable lease and a read lease request - * add host to list any reply cachable - * - else { set non-cachable for read-write sharing } - * send eviction notice messages to all other hosts that have lease - * wait for lease termination { either by receiving vacated messages - * from all the other hosts or expiry - * via. timeout } - * modify lease to non-cachable - * - else if no current lease, issue new one - * - reply - * - return boolean TRUE iff nam should be m_freem()'d - * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep() - * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease(). - * nqsrv_locklease() is coded such that at least one of LC_LOCKED and - * LC_WANTED is set whenever a process is tsleeping in it. The exception - * is when a new lease is being allocated, since it is not in the timer - * queue yet. (Ditto for the splsoftclock() and splx(s) calls) - */ -int -nqsrv_getlease(vp, duration, flags, slp, procp, nam, cachablep, frev, cred) - struct vnode *vp; - u_long *duration; - int flags; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf *nam; - int *cachablep; - u_quad_t *frev; - struct ucred *cred; -{ - register struct nqlease *lp; - register struct nqfhhashhead *lpp = 0; - register struct nqhost *lph = 0; - struct nqlease *tlp; - struct nqm **lphp; - struct vattr vattr; - fhandle_t fh; - int i, ok, error, s; - - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) - return (0); - if (*duration > nqsrv_maxlease) - *duration = nqsrv_maxlease; - error = VOP_GETATTR(vp, &vattr, cred, procp); - if (error) - return (error); - *frev = vattr.va_filerev; - s = splsoftclock(); - tlp = vp->v_lease; - if ((flags & ND_CHECK) == 0) - nfsstats.srvnqnfs_getleases++; - if (tlp == (struct nqlease *)0) { - - /* - * Find the lease by searching the hash list. - */ - fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fh.fh_fid); - if (error) { - splx(s); - return (error); - } - lpp = NQFHHASH(fh.fh_fid.fid_data); - for (lp = lpp->lh_first; lp != 0; lp = lp->lc_hash.le_next) - if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] && - fh.fh_fsid.val[1] == lp->lc_fsid.val[1] && - !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata, - fh.fh_fid.fid_len - sizeof (long))) { - /* Found it */ - lp->lc_vp = vp; - vp->v_lease = lp; - tlp = lp; - break; - } - } else - lp = tlp; - if (lp) { - if ((lp->lc_flag & LC_NONCACHABLE) || - (lp->lc_morehosts == (struct nqm *)0 && - nqsrv_cmpnam(slp, nam, &lp->lc_host))) - goto doreply; - if ((flags & ND_READ) && (lp->lc_flag & LC_WRITE) == 0) { - if (flags & ND_CHECK) - goto doreply; - if (nqsrv_cmpnam(slp, nam, &lp->lc_host)) - goto doreply; - i = 0; - if (lp->lc_morehosts) { - lph = lp->lc_morehosts->lpm_hosts; - lphp = &lp->lc_morehosts->lpm_next; - ok = 1; - } else { - lphp = &lp->lc_morehosts; - ok = 0; - } - while (ok && (lph->lph_flag & LC_VALID)) { - if (nqsrv_cmpnam(slp, nam, lph)) - goto doreply; - if (++i == LC_MOREHOSTSIZ) { - i = 0; - if (*lphp) { - lph = (*lphp)->lpm_hosts; - lphp = &((*lphp)->lpm_next); - } else - ok = 0; - } else - lph++; - } - nqsrv_locklease(lp); - if (!ok) { - MALLOC_ZONE(*lphp, struct nqm *, - sizeof(struct nqm), - M_NQMHOST, M_WAITOK); - bzero((caddr_t)*lphp, sizeof (struct nqm)); - lph = (*lphp)->lpm_hosts; - } - nqsrv_addhost(lph, slp, nam); - nqsrv_unlocklease(lp); - } else { - lp->lc_flag |= LC_NONCACHABLE; - nqsrv_locklease(lp); - nqsrv_send_eviction(vp, lp, slp, nam, cred); - nqsrv_waitfor_expiry(lp); - nqsrv_unlocklease(lp); - } -doreply: - /* - * Update the lease and return - */ - if ((flags & ND_CHECK) == 0) - nqsrv_instimeq(lp, *duration); - if (lp->lc_flag & LC_NONCACHABLE) - *cachablep = 0; - else { - *cachablep = 1; - if (flags & ND_WRITE) - lp->lc_flag |= LC_WRITTEN; - } - splx(s); - return (0); - } - splx(s); - if (flags & ND_CHECK) - return (0); - - /* - * Allocate new lease - * The value of nqsrv_maxnumlease should be set generously, so that - * the following "printf" happens infrequently. - */ - if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) { - printf("Nqnfs server, too many leases\n"); - do { - (void) tsleep((caddr_t)&lbolt, PSOCK, - "nqsrvnuml", 0); - } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease); - } - MALLOC_ZONE(lp, struct nqlease *, - sizeof (struct nqlease), M_NQLEASE, M_WAITOK); - bzero((caddr_t)lp, sizeof (struct nqlease)); - if (flags & ND_WRITE) - lp->lc_flag |= (LC_WRITE | LC_WRITTEN); - nqsrv_addhost(&lp->lc_host, slp, nam); - lp->lc_vp = vp; - lp->lc_fsid = fh.fh_fsid; - bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, - fh.fh_fid.fid_len - sizeof (long)); - if(!lpp) - panic("nfs_nqlease.c: Phoney lpp"); - LIST_INSERT_HEAD(lpp, lp, lc_hash); - vp->v_lease = lp; - s = splsoftclock(); - nqsrv_instimeq(lp, *duration); - splx(s); - *cachablep = 1; - if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases) - nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases; - return (0); -} - -/* - * Local lease check for server syscalls. - * Just set up args and let nqsrv_getlease() do the rest. - * nqnfs_vop_lease_check() is the VOP_LEASE() form of the same routine. - * Ifdef'd code in nfsnode.h renames these routines to whatever a particular - * OS needs. - */ -void -nqnfs_lease_check(vp, p, cred, flag) - struct vnode *vp; - struct proc *p; - struct ucred *cred; - int flag; -{ - u_long duration = 0; - int cache; - u_quad_t frev; - - (void) nqsrv_getlease(vp, &duration, ND_CHECK | flag, NQLOCALSLP, - p, (struct mbuf *)0, &cache, &frev, cred); -} - -int -nqnfs_vop_lease_check(ap) - struct vop_lease_args /* { - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; - } */ *ap; -{ - u_long duration = 0; - int cache; - u_quad_t frev; - - (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, - NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); - return (0); -} - -#endif /* NFS_NOSERVER */ - -/* - * Add a host to an nqhost structure for a lease. - */ -static void -nqsrv_addhost(lph, slp, nam) - register struct nqhost *lph; - struct nfssvc_sock *slp; - struct mbuf *nam; -{ - register struct sockaddr_in *saddr; - - if (slp == NQLOCALSLP) - lph->lph_flag |= (LC_VALID | LC_LOCAL); - else if (slp == nfs_udpsock) { - saddr = mtod(nam, struct sockaddr_in *); - lph->lph_flag |= (LC_VALID | LC_UDP); - lph->lph_inetaddr = saddr->sin_addr.s_addr; - lph->lph_port = saddr->sin_port; - } else if (slp == nfs_cltpsock) { - lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT); - lph->lph_flag |= (LC_VALID | LC_CLTP); - } else { - lph->lph_flag |= (LC_VALID | LC_SREF); - lph->lph_slp = slp; - slp->ns_sref++; - } -} - -/* - * Update the lease expiry time and position it in the timer queue correctly. - */ -static void -nqsrv_instimeq(lp, duration) - register struct nqlease *lp; - u_long duration; -{ - register struct nqlease *tlp; - time_t newexpiry; - struct timeval now; - - microtime(&now); - newexpiry = now.tv_sec + duration + nqsrv_clockskew; - if (lp->lc_expiry == newexpiry) - return; - if (lp->lc_timer.cqe_next != 0) { - CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); - } - lp->lc_expiry = newexpiry; - - /* - * Find where in the queue it should be. - */ - tlp = nqtimerhead.cqh_last; - while (tlp != (void *)&nqtimerhead && tlp->lc_expiry > newexpiry) - tlp = tlp->lc_timer.cqe_prev; -#ifdef HASNVRAM - if (tlp == nqtimerhead.cqh_last) - NQSTORENOVRAM(newexpiry); -#endif /* HASNVRAM */ - if (tlp == (void *)&nqtimerhead) { - CIRCLEQ_INSERT_HEAD(&nqtimerhead, lp, lc_timer); - } else { - CIRCLEQ_INSERT_AFTER(&nqtimerhead, tlp, lp, lc_timer); - } -} - -/* - * Compare the requesting host address with the lph entry in the lease. - * Return true iff it is the same. - * This is somewhat messy due to the union in the nqhost structure. - * The local host is indicated by the special value of NQLOCALSLP for slp. - */ -static int -nqsrv_cmpnam(slp, nam, lph) - register struct nfssvc_sock *slp; - struct mbuf *nam; - register struct nqhost *lph; -{ - register struct sockaddr_in *saddr; - struct mbuf *addr; - union nethostaddr lhaddr; - int ret; - - if (slp == NQLOCALSLP) { - if (lph->lph_flag & LC_LOCAL) - return (1); - else - return (0); - } - if (slp == nfs_udpsock || slp == nfs_cltpsock) - addr = nam; - else - addr = slp->ns_nam; - if (lph->lph_flag & LC_UDP) - ret = netaddr_match(AF_INET, &lph->lph_haddr, addr); - else if (lph->lph_flag & LC_CLTP) - ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr); - else { - if ((lph->lph_slp->ns_flag & SLP_VALID) == 0) - return (0); - saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *); - if (saddr->sin_family == AF_INET) - lhaddr.had_inetaddr = saddr->sin_addr.s_addr; - else - lhaddr.had_nam = lph->lph_slp->ns_nam; - ret = netaddr_match(saddr->sin_family, &lhaddr, addr); - } - return (ret); -} - -/* - * Send out eviction notice messages to all other hosts for the lease. - */ -static void -nqsrv_send_eviction(vp, lp, slp, nam, cred) - struct vnode *vp; - register struct nqlease *lp; - struct nfssvc_sock *slp; - struct mbuf *nam; - struct ucred *cred; -{ - register struct nqhost *lph = &lp->lc_host; - register struct mbuf *m; - register int siz; - struct nqm *lphnext = lp->lc_morehosts; - struct mbuf *mreq, *mb, *mb2, *mheadend; - struct socket *so; - struct mbuf *nam2; - struct sockaddr_in *saddr; - nfsfh_t nfh; - fhandle_t *fhp; - caddr_t bpos, cp; - u_long xid, *tl; - int len = 1, ok = 1, i = 0; - int sotype, solock; - - while (ok && (lph->lph_flag & LC_VALID)) { - if (nqsrv_cmpnam(slp, nam, lph)) - lph->lph_flag |= LC_VACATED; - else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { - if (lph->lph_flag & LC_UDP) { - MGET(nam2, M_WAIT, MT_SONAME); - saddr = mtod(nam2, struct sockaddr_in *); - nam2->m_len = saddr->sin_len = - sizeof (struct sockaddr_in); - saddr->sin_family = AF_INET; - saddr->sin_addr.s_addr = lph->lph_inetaddr; - saddr->sin_port = lph->lph_port; - so = nfs_udpsock->ns_so; - } else if (lph->lph_flag & LC_CLTP) { - nam2 = lph->lph_nam; - so = nfs_cltpsock->ns_so; - } else if (lph->lph_slp->ns_flag & SLP_VALID) { - nam2 = (struct mbuf *)0; - so = lph->lph_slp->ns_so; - } else - goto nextone; - sotype = so->so_type; - solock = (so->so_proto->pr_flags & PR_CONNREQUIRED); - nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED, - NFSX_V3FH + NFSX_UNSIGNED); - fhp = &nfh.fh_generic; - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - VFS_VPTOFH(vp, &fhp->fh_fid); - nfsm_srvfhtom(fhp, 1); - m = mreq; - siz = 0; - while (m) { - siz += m->m_len; - m = m->m_next; - } - if (siz <= 0 || siz > NFS_MAXPACKET) { - printf("mbuf siz=%d\n",siz); - panic("Bad nfs svc reply"); - } - m = nfsm_rpchead(cred, (NFSMNT_NFSV3 | NFSMNT_NQNFS), - NQNFSPROC_EVICTED, - RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, - 0, (char *)NULL, mreq, siz, &mheadend, &xid); - /* - * For stream protocols, prepend a Sun RPC - * Record Mark. - */ - if (sotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | - (m->m_pkthdr.len - NFSX_UNSIGNED)); - } - if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 && - (lph->lph_slp->ns_flag & SLP_VALID) == 0) || - (solock && nfs_slplock(lph->lph_slp, 0) == 0)) { - m_freem(m); - } else { - (void) nfs_send(so, nam2, m, - (struct nfsreq *)0); - if (solock) - nfs_slpunlock(lph->lph_slp); - } - if (lph->lph_flag & LC_UDP) - MFREE(nam2, m); - } -nextone: - if (++i == len) { - if (lphnext) { - i = 0; - len = LC_MOREHOSTSIZ; - lph = lphnext->lpm_hosts; - lphnext = lphnext->lpm_next; - } else - ok = 0; - } else - lph++; - } -} - -/* - * Wait for the lease to expire. - * This will occur when all clients have sent "vacated" messages to - * this server OR when it expires do to timeout. - */ -static void -nqsrv_waitfor_expiry(lp) - register struct nqlease *lp; -{ - register struct nqhost *lph; - register int i; - struct nqm *lphnext; - int len, ok; - struct timeval now; - -tryagain: - microtime(&now); - if (now.tv_sec > lp->lc_expiry) - return; - lph = &lp->lc_host; - lphnext = lp->lc_morehosts; - len = 1; - i = 0; - ok = 1; - while (ok && (lph->lph_flag & LC_VALID)) { - if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { - lp->lc_flag |= LC_EXPIREDWANTED; - (void) tsleep((caddr_t)&lp->lc_flag, PSOCK, - "nqexp", 0); - goto tryagain; - } - if (++i == len) { - if (lphnext) { - i = 0; - len = LC_MOREHOSTSIZ; - lph = lphnext->lpm_hosts; - lphnext = lphnext->lpm_next; - } else - ok = 0; - } else - lph++; - } -} - -#ifndef NFS_NOSERVER - -/* - * Nqnfs server timer that maintains the server lease queue. - * Scan the lease queue for expired entries: - * - when one is found, wakeup anyone waiting for it - * else dequeue and free - */ -void -nqnfs_serverd() -{ - register struct nqlease *lp; - register struct nqhost *lph; - struct nqlease *nextlp; - struct nqm *lphnext, *olphnext; - struct mbuf *n; - int i, len, ok; - struct timeval now; - - microtime(&now); - for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; - lp = nextlp) { - if (lp->lc_expiry >= now.tv_sec) - break; - nextlp = lp->lc_timer.cqe_next; - if (lp->lc_flag & LC_EXPIREDWANTED) { - lp->lc_flag &= ~LC_EXPIREDWANTED; - wakeup((caddr_t)&lp->lc_flag); - } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) { - /* - * Make a best effort at keeping a write caching lease long - * enough by not deleting it until it has been explicitly - * vacated or there have been no writes in the previous - * write_slack seconds since expiry and the nfsds are not - * all busy. The assumption is that if the nfsds are not - * all busy now (no queue of nfs requests), then the client - * would have been able to do at least one write to the - * file during the last write_slack seconds if it was still - * trying to push writes to the server. - */ - if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE && - ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) { - lp->lc_flag &= ~LC_WRITTEN; - nqsrv_instimeq(lp, nqsrv_writeslack); - } else { - CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); - LIST_REMOVE(lp, lc_hash); - /* - * This soft reference may no longer be valid, but - * no harm done. The worst case is if the vnode was - * recycled and has another valid lease reference, - * which is dereferenced prematurely. - */ - lp->lc_vp->v_lease = (struct nqlease *)0; - lph = &lp->lc_host; - lphnext = lp->lc_morehosts; - olphnext = (struct nqm *)0; - len = 1; - i = 0; - ok = 1; - while (ok && (lph->lph_flag & LC_VALID)) { - if (lph->lph_flag & LC_CLTP) - MFREE(lph->lph_nam, n); - if (lph->lph_flag & LC_SREF) - nfsrv_slpderef(lph->lph_slp); - if (++i == len) { - if (olphnext) { - FREE_ZONE((caddr_t)olphnext, - sizeof (struct nqm), - M_NQMHOST); - olphnext = (struct nqm *)0; - } - if (lphnext) { - olphnext = lphnext; - i = 0; - len = LC_MOREHOSTSIZ; - lph = lphnext->lpm_hosts; - lphnext = lphnext->lpm_next; - } else - ok = 0; - } else - lph++; - } - FREE_ZONE((caddr_t)lp, - sizeof (struct nqlease), M_NQLEASE); - if (olphnext) - FREE_ZONE((caddr_t)olphnext, - sizeof (struct nqm), M_NQMHOST); - nfsstats.srvnqnfs_leases--; - } - } - } -} - -/* - * Called from nfssvc_nfsd() for a getlease rpc request. - * Do the from/to xdr translation and call nqsrv_getlease() to - * do the real work. - */ -int -nqnfsrv_getlease(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; -{ - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct vattr va; - register struct vattr *vap = &va; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - u_quad_t frev; - caddr_t bpos; - int error = 0; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - int flags, rdonly, cache; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); - flags = fxdr_unsigned(int, *tl++); - nfsd->nd_duration = fxdr_unsigned(int, *tl); - error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, - (nfsd->nd_flag & ND_KERBAUTH), TRUE); - if (error) - nfsm_reply(0); - if (rdonly && flags == ND_WRITE) { - error = EROFS; - vput(vp); - nfsm_reply(0); - } - (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, slp, procp, - nam, &cache, &frev, cred); - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_V3FATTR + 4 * NFSX_UNSIGNED); - nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(cache); - *tl++ = txdr_unsigned(nfsd->nd_duration); - txdr_hyper(&frev, tl); - nfsm_build(fp, struct nfs_fattr *, NFSX_V3FATTR); - nfsm_srvfillattr(vap, fp); - nfsm_srvdone; -} - -/* - * Called from nfssvc_nfsd() when a "vacated" message is received from a - * client. Find the entry and expire it. - */ -int -nqnfsrv_vacated(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; -{ - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - register struct nqlease *lp; - register struct nqhost *lph; - struct nqlease *tlp = (struct nqlease *)0; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - struct nqm *lphnext; - struct mbuf *mreq, *mb; - int error = 0, i, len, ok, gotit = 0, cache = 0; - char *cp2, *bpos; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - m_freem(mrep); - /* - * Find the lease by searching the hash list. - */ - for (lp = NQFHHASH(fhp->fh_fid.fid_data)->lh_first; lp != 0; - lp = lp->lc_hash.le_next) - if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] && - fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] && - !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata, - MAXFIDSZ)) { - /* Found it */ - tlp = lp; - break; - } - if (tlp) { - lp = tlp; - len = 1; - i = 0; - lph = &lp->lc_host; - lphnext = lp->lc_morehosts; - ok = 1; - while (ok && (lph->lph_flag & LC_VALID)) { - if (nqsrv_cmpnam(slp, nam, lph)) { - lph->lph_flag |= LC_VACATED; - gotit++; - break; - } - if (++i == len) { - if (lphnext) { - len = LC_MOREHOSTSIZ; - i = 0; - lph = lphnext->lpm_hosts; - lphnext = lphnext->lpm_next; - } else - ok = 0; - } else - lph++; - } - if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) { - lp->lc_flag &= ~LC_EXPIREDWANTED; - wakeup((caddr_t)&lp->lc_flag); - } -nfsmout: - return (EPERM); - } - return (EPERM); -} - -#endif /* NFS_NOSERVER */ - -/* - * Client get lease rpc function. - */ -int -nqnfs_getlease(vp, rwflag, cred, p) - register struct vnode *vp; - int rwflag; - struct ucred *cred; - struct proc *p; -{ - register u_long *tl; - register caddr_t cp; - register long t1, t2; - register struct nfsnode *np; - struct nfsmount *nmp; - caddr_t bpos, dpos, cp2; - struct timeval now; - time_t reqtime; - int error = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - int cachable; - u_quad_t frev; - u_int64_t xid; - - nmp = VFSTONFS(vp->v_mount); - if (!nmp) - return (ENXIO); - - nfsstats.rpccnt[NQNFSPROC_GETLEASE]++; - mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED, - &bpos); - nfsm_fhtom(vp, 1); - nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(rwflag); - *tl = txdr_unsigned(nmp->nm_leaseterm); - microtime(&now); - reqtime = now.tv_sec; - nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred, &xid); - np = VTONFS(vp); - nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); - cachable = fxdr_unsigned(int, *tl++); - reqtime += fxdr_unsigned(int, *tl++); - microtime(&now); - if (reqtime > now.tv_sec) { - nmp = VFSTONFS(vp->v_mount); - if (!nmp) { - error = ENXIO; - } else { - fxdr_hyper(tl, &frev); - nqnfs_clientlease(nmp, np, rwflag, cachable, - reqtime, frev); - nfsm_loadattr(vp, (struct vattr *)0, &xid); - } - } else - error = NQNFS_EXPIRED; - nfsm_reqdone; - return (error); -} - -/* - * Client vacated message function. - */ -static int -nqnfs_vacated(vp, cred) - register struct vnode *vp; - struct ucred *cred; -{ - register caddr_t cp; - register struct mbuf *m; - register int i; - register u_long *tl; - register long t2; - caddr_t bpos; - u_long xid; - int error = 0; - struct mbuf *mreq, *mb, *mb2, *mheadend; - struct nfsmount *nmp; - struct nfsreq myrep; - int connrequired; - int *flagp; - - nmp = VFSTONFS(vp->v_mount); - if (!nmp) - return (ENXIO); - nfsstats.rpccnt[NQNFSPROC_VACATED]++; - nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_FH(1)); - nfsm_fhtom(vp, 1); - m = mreq; - i = 0; - while (m) { - i += m->m_len; - m = m->m_next; - } - m = nfsm_rpchead(cred, nmp->nm_flag, NQNFSPROC_VACATED, - RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, - 0, (char *)NULL, mreq, i, &mheadend, &xid); - if (nmp->nm_sotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - - NFSX_UNSIGNED)); - } - myrep.r_flags = 0; - myrep.r_nmp = nmp; - - connrequired = (nmp->nm_soflags & PR_CONNREQUIRED); - if (connrequired) - (void) nfs_sndlock(&myrep); - - (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep); - - if (connrequired) - nfs_sndunlock(&myrep); -nfsmout: - return (error); -} - -#ifndef NFS_NOSERVER - -/* - * Called for client side callbacks - */ -int -nqnfs_callback(nmp, mrep, md, dpos) - struct nfsmount *nmp; - struct mbuf *mrep, *md; - caddr_t dpos; -{ - register struct vnode *vp; - register u_long *tl; - register long t1; - nfsfh_t nfh; - fhandle_t *fhp; - struct nfsnode *np; - struct nfsd tnfsd; - struct nfssvc_sock *slp; - struct nfsrv_descript ndesc; - register struct nfsrv_descript *nfsd = &ndesc; - struct mbuf **mrq = (struct mbuf **)0, *mb, *mreq; - int error = 0, cache = 0; - char *cp2, *bpos; - u_quad_t frev; - -#ifndef nolint - slp = NULL; -#endif - nfsd->nd_mrep = mrep; - nfsd->nd_md = md; - nfsd->nd_dpos = dpos; - error = nfs_getreq(nfsd, &tnfsd, FALSE); - if (error) - return (error); - md = nfsd->nd_md; - dpos = nfsd->nd_dpos; - if (nfsd->nd_procnum != NQNFSPROC_EVICTED) { - m_freem(mrep); - return (EPERM); - } - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - m_freem(mrep); - error = nfs_nget(nmp->nm_mountp, (nfsfh_t *)fhp, NFSX_V3FH, &np); - if (error) - return (error); - vp = NFSTOV(np); - if (np->n_timer.cqe_next != 0) { - np->n_expiry = 0; - np->n_flag |= NQNFSEVICTED; - if (nmp->nm_timerhead.cqh_first != np) { - CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); - CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); - } - } - vput(vp); - nfsm_srvdone; -} - - -/* - * Nqnfs client helper daemon. Runs once a second to expire leases. - * It also get authorization strings for "kerb" mounts. - * It must start at the beginning of the list again after any potential - * "sleep" since nfs_reclaim() called from vclean() can pull a node off - * the list asynchronously. - */ -int -nqnfs_clientd(nmp, cred, ncd, flag, argp, p) - register struct nfsmount *nmp; - struct ucred *cred; - struct nfsd_cargs *ncd; - int flag; - caddr_t argp; - struct proc *p; -{ - register struct nfsnode *np; - struct vnode *vp; - struct nfsreq myrep; - struct nfsuid *nuidp, *nnuidp; - int error = 0, vpid; - register struct nfsreq *rp; - struct timeval now; - - /* - * First initialize some variables - */ - microtime(&now); - - /* - * If an authorization string is being passed in, get it. - */ - if ((flag & NFSSVC_GOTAUTH) && - (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT)) == 0) { - if (nmp->nm_state & NFSSTA_HASAUTH) - panic("cld kerb"); - if ((flag & NFSSVC_AUTHINFAIL) == 0) { - if (ncd->ncd_authlen <= nmp->nm_authlen && - ncd->ncd_verflen <= nmp->nm_verflen && - !copyin(ncd->ncd_authstr,nmp->nm_authstr,ncd->ncd_authlen)&& - !copyin(ncd->ncd_verfstr,nmp->nm_verfstr,ncd->ncd_verflen)){ - nmp->nm_authtype = ncd->ncd_authtype; - nmp->nm_authlen = ncd->ncd_authlen; - nmp->nm_verflen = ncd->ncd_verflen; -#if NFSKERB - nmp->nm_key = ncd->ncd_key; -#endif - } else - nmp->nm_state |= NFSSTA_AUTHERR; - } else - nmp->nm_state |= NFSSTA_AUTHERR; - nmp->nm_state |= NFSSTA_HASAUTH; - wakeup((caddr_t)&nmp->nm_authlen); - } else - nmp->nm_state |= NFSSTA_WAITAUTH; - - /* - * Loop every second updating queue until there is a termination sig. - */ - while ((nmp->nm_state & NFSSTA_DISMNT) == 0) { - if (nmp->nm_flag & NFSMNT_NQNFS) { - /* - * If there are no outstanding requests (and therefore no - * processes in nfs_reply) and there is data in the receive - * queue, poke for callbacks. - */ - if (nfs_reqq.tqh_first == 0 && nmp->nm_so && - nmp->nm_so->so_rcv.sb_cc > 0) { - myrep.r_flags = R_GETONEREP; - myrep.r_nmp = nmp; - myrep.r_mrep = (struct mbuf *)0; - myrep.r_procp = (struct proc *)0; - (void) nfs_reply(&myrep); - } - - /* - * Loop through the leases, updating as required. - */ - np = nmp->nm_timerhead.cqh_first; - while (np != (void *)&nmp->nm_timerhead && - (nmp->nm_state & NFSSTA_DISMINPROG) == 0) { - vp = NFSTOV(np); - vpid = vp->v_id; - if (np->n_expiry < now.tv_sec) { - if (vget(vp, LK_EXCLUSIVE, p) == 0) { - nmp->nm_inprog = vp; - if (vpid == vp->v_id) { - CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); - np->n_timer.cqe_next = 0; - if (np->n_flag & (NMODIFIED | NQNFSEVICTED)) { - if (np->n_flag & NQNFSEVICTED) { - if (vp->v_type == VDIR) - nfs_invaldir(vp); - cache_purge(vp); - (void) nfs_vinvalbuf(vp, - V_SAVE, cred, p, 0); - np->n_flag &= ~NQNFSEVICTED; - (void) nqnfs_vacated(vp, cred); - } else if (vp->v_type == VREG) { - (void) VOP_FSYNC(vp, cred, - MNT_WAIT, p); - np->n_flag &= ~NMODIFIED; - } - } - } - vrele(vp); - nmp->nm_inprog = NULLVP; - } - } else if ((np->n_expiry - NQ_RENEWAL) < now.tv_sec) { - if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE)) - == NQNFSWRITE && np->n_dirtyblkhd.lh_first && - vget(vp, LK_EXCLUSIVE, p) == 0) { - nmp->nm_inprog = vp; - if (vpid == vp->v_id && - nqnfs_getlease(vp, ND_WRITE, cred, p)==0) - np->n_brev = np->n_lrev; - vrele(vp); - nmp->nm_inprog = NULLVP; - } - } else - break; - if (np == nmp->nm_timerhead.cqh_first) - break; - np = nmp->nm_timerhead.cqh_first; - } - } - - /* - * Get an authorization string, if required. - */ - if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_DISMNT | NFSSTA_HASAUTH)) == 0) { - ncd->ncd_authuid = nmp->nm_authuid; - if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) - nmp->nm_state |= NFSSTA_WAITAUTH; - else - return (ENEEDAUTH); - } - - /* - * Wait a bit (no pun) and do it again. - */ - if ((nmp->nm_state & NFSSTA_DISMNT) == 0 && - (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) { - error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, - "nqnfstimr", hz / 3); - if (error == EINTR || error == ERESTART) - (void) dounmount(nmp->nm_mountp, 0, p); - } - } - - /* - * Finally, we can free up the mount structure. - */ - for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { - nnuidp = nuidp->nu_lru.tqe_next; - LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); - FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); - } - /* - * Loop through outstanding request list and remove dangling - * references to defunct nfsmount struct - */ - for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next) - if (rp->r_nmp == nmp) - rp->r_nmp = (struct nfsmount *)0; - /* Need to wake up any rcvlock waiters so they notice the unmount. */ - if (nmp->nm_state & NFSSTA_WANTRCV) { - nmp->nm_state &= ~NFSSTA_WANTRCV; - wakeup(&nmp->nm_state); - } - FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); - if (error == EWOULDBLOCK) - error = 0; - return (error); -} - -#endif /* NFS_NOSERVER */ - -/* - * Adjust all timer queue expiry times when the time of day clock is changed. - * Called from the settimeofday() syscall. - */ -void -nqnfs_lease_updatetime(deltat) - register int deltat; -{ - struct proc *p = current_proc(); /* XXX */ - struct nqlease *lp; - struct nfsnode *np; - struct mount *mp, *nxtmp; - struct nfsmount *nmp; - int s; - - if (nqnfsstarttime != 0) - nqnfsstarttime += deltat; - s = splsoftclock(); - for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; - lp = lp->lc_timer.cqe_next) - lp->lc_expiry += deltat; - splx(s); - - /* - * Search the mount list for all nqnfs mounts and do their timer - * queues. - */ - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nxtmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nxtmp = mp->mnt_list.cqe_next; - continue; - } - if (mp->mnt_stat.f_type == nfs_mount_type) { - nmp = VFSTONFS(mp); - if (nmp->nm_flag & NFSMNT_NQNFS) { - for (np = nmp->nm_timerhead.cqh_first; - np != (void *)&nmp->nm_timerhead; - np = np->n_timer.cqe_next) { - np->n_expiry += deltat; - } - } - } - simple_lock(&mountlist_slock); - nxtmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); -} - -/* - * Lock a server lease. - */ -static void -nqsrv_locklease(lp) - struct nqlease *lp; -{ - - while (lp->lc_flag & LC_LOCKED) { - lp->lc_flag |= LC_WANTED; - (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0); - } - lp->lc_flag |= LC_LOCKED; - lp->lc_flag &= ~LC_WANTED; -} - -/* - * Unlock a server lease. - */ -static void -nqsrv_unlocklease(lp) - struct nqlease *lp; -{ - - lp->lc_flag &= ~LC_LOCKED; - if (lp->lc_flag & LC_WANTED) - wakeup((caddr_t)lp); -} - -/* - * Update a client lease. - */ -void -nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev) - register struct nfsmount *nmp; - register struct nfsnode *np; - int rwflag, cachable; - time_t expiry; - u_quad_t frev; -{ - register struct nfsnode *tp; - - if (np->n_timer.cqe_next != 0) { - CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); - if (rwflag == ND_WRITE) - np->n_flag |= NQNFSWRITE; - } else if (rwflag == ND_READ) - np->n_flag &= ~NQNFSWRITE; - else - np->n_flag |= NQNFSWRITE; - if (cachable) - np->n_flag &= ~NQNFSNONCACHE; - else - np->n_flag |= NQNFSNONCACHE; - np->n_expiry = expiry; - np->n_lrev = frev; - tp = nmp->nm_timerhead.cqh_last; - while (tp != (void *)&nmp->nm_timerhead && tp->n_expiry > np->n_expiry) - tp = tp->n_timer.cqe_prev; - if (tp == (void *)&nmp->nm_timerhead) { - CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); - } else { - CIRCLEQ_INSERT_AFTER(&nmp->nm_timerhead, tp, np, n_timer); - } -} diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 271a85b6f..7f513b015 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,7 +68,7 @@ * 3 - build the rpc reply in an mbuf list * nb: * - do not mix the phases, since the nfsm_?? macros can return failures - * on a bad rpc or similar and do not do any vrele() or vput()'s + * on a bad rpc or similar and do not do any vnode_rele()s or vnode_put()s * * - the nfsm_reply() macro generates an nfs rpc reply with the nfs * error number iff error != 0 whereas @@ -82,30 +82,31 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #include -#include +#include #include #include #include #include #include +#include +#include +#include #include #include -#include #include #include #include #include #include -#include nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; @@ -127,10 +128,11 @@ int nfs_async = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); #endif -static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, - struct proc *, int)); -static void nfsrvw_coalesce __P((struct nfsrv_descript *, - struct nfsrv_descript *)); +static int nfsrv_authorize(vnode_t,vnode_t,kauth_action_t,vfs_context_t,struct nfs_export_options*,int); +static void nfsrvw_coalesce(struct nfsrv_descript *, struct nfsrv_descript *); + +#define THREAD_SAFE_FS(VP) \ + ((VP)->v_mount ? (VP)->v_mount->mnt_vtable->vfc_threadsafe : 0) /* * nfs v3 access service @@ -139,64 +141,126 @@ int nfsrv3_access(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; + vnode_t vp, dvp; + struct nfs_filehandle nfh; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, getret; + int error = 0, getret; char *cp2; - struct mbuf *mb, *mreq, *mb2; - struct vattr vattr, *vap = &vattr; - u_long testmode, nfsmode; - u_quad_t frev; + mbuf_t mb, mreq, mb2; + struct vnode_attr vattr, *vap = &vattr; + u_long nfsmode; + kauth_action_t testaction; + struct vfs_context context; + struct nfs_export *nx; + struct nfs_export_options *nxo; -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + nfsm_srvmtofh(&nfh); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, NULL); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); + nfsm_srvpostop_attr(1, NULL); return (0); } nfsmode = fxdr_unsigned(u_long, *tl); - if ((nfsmode & NFSV3ACCESS_READ) && - nfsrv_access(vp, VREAD, cred, rdonly, procp, 0)) - nfsmode &= ~NFSV3ACCESS_READ; - if (vp->v_type == VDIR) - testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | - NFSV3ACCESS_DELETE); - else - testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); - if ((nfsmode & testmode) && - nfsrv_access(vp, VWRITE, cred, rdonly, procp, 0)) - nfsmode &= ~testmode; - if (vp->v_type == VDIR) - testmode = NFSV3ACCESS_LOOKUP; - else - testmode = NFSV3ACCESS_EXECUTE; - if ((nfsmode & testmode) && - nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0)) - nfsmode &= ~testmode; - getret = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + /* + * Each NFS mode bit is tested separately. + * + * XXX this code is nominally correct, but returns a pessimistic + * rather than optimistic result. It will be necessary to add + * an NFS-specific interface to the vnode_authorize code to + * obtain good performance in the optimistic mode. + */ + if (nfsmode & NFSV3ACCESS_READ) { + if (vnode_isdir(vp)) { + testaction = + KAUTH_VNODE_LIST_DIRECTORY | + KAUTH_VNODE_READ_EXTATTRIBUTES; + } else { + testaction = + KAUTH_VNODE_READ_DATA | + KAUTH_VNODE_READ_EXTATTRIBUTES; + } + if (nfsrv_authorize(vp, NULL, testaction, &context, nxo, 0)) + nfsmode &= ~NFSV3ACCESS_READ; + } + if ((nfsmode & NFSV3ACCESS_LOOKUP) && + (!vnode_isdir(vp) || + nfsrv_authorize(vp, NULL, KAUTH_VNODE_SEARCH, &context, nxo, 0))) + nfsmode &= ~NFSV3ACCESS_LOOKUP; + if (nfsmode & NFSV3ACCESS_MODIFY) { + if (vnode_isdir(vp)) { + testaction = + KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY | + KAUTH_VNODE_DELETE_CHILD; + } else { + testaction = + KAUTH_VNODE_WRITE_DATA | + KAUTH_VNODE_WRITE_ATTRIBUTES | + KAUTH_VNODE_WRITE_EXTATTRIBUTES | + KAUTH_VNODE_WRITE_SECURITY; + } + if (nfsrv_authorize(vp, NULL, testaction, &context, nxo, 0)) + nfsmode &= ~NFSV3ACCESS_MODIFY; + } + if (nfsmode & NFSV3ACCESS_EXTEND) { + if (vnode_isdir(vp)) { + testaction = + KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY; + } else { + testaction = + KAUTH_VNODE_WRITE_DATA | + KAUTH_VNODE_APPEND_DATA; + } + if (nfsrv_authorize(vp, NULL, testaction, &context, nxo, 0)) + nfsmode &= ~NFSV3ACCESS_EXTEND; + } + dvp = NULLVP; + /* + * For hard links, this answer may be wrong if the vnode + * has multiple parents with different permissions. + */ + if ((nfsmode & NFSV3ACCESS_DELETE) && + (((dvp = vnode_getparent(vp)) == NULL) || + nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, &context, nxo, 0))) { + nfsmode &= ~NFSV3ACCESS_DELETE; + } + if (dvp != NULLVP) + vnode_put(dvp); + + if ((nfsmode & NFSV3ACCESS_EXECUTE) && + (vnode_isdir(vp) || + nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, &context, nxo, 0))) + nfsmode &= ~NFSV3ACCESS_EXECUTE; + + nfsm_srv_vattr_init(vap, 1); + getret = vnode_getattr(vp, vap, &context); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -206,43 +270,51 @@ int nfsrv_getattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct vattr va; - register struct vattr *vap = &va; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; + struct nfs_fattr *fp; + struct vnode_attr va; + struct vnode_attr *vap = &va; + vnode_t vp; + struct nfs_filehandle nfh; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache; + int error = 0; char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev; + mbuf_t mb, mb2, mreq; + struct vfs_context context; + struct nfs_export *nx; + struct nfs_export_options *nxo; + int v3 = (nfsd->nd_flag & ND_NFSV3); - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + nfsm_srvmtofh(&nfh); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(0); return (0); } - nqsrv_getl(vp, ND_READ); - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(0); + return (0); + } + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + nfsm_srv_vattr_init(vap, v3); + error = vnode_getattr(vp, vap, &context); + vnode_put(vp); + nfsm_reply(NFSX_FATTR(v3)); if (error) return (0); - nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); + nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(v3)); nfsm_srvfillattr(vap, fp); - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -252,33 +324,36 @@ int nfsrv_setattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, preat; - register struct vattr *vap = &va; - register struct nfsv2_sattr *sp; - register struct nfs_fattr *fp; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; + struct vnode_attr preat; + struct vnode_attr postat; + struct vnode_attr va; + struct vnode_attr *vap = &va; + struct nfsv2_sattr *sp; + struct nfs_fattr *fp; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; + int error = 0, preat_ret = 1, postat_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev; + mbuf_t mb, mb2, mreq; struct timespec guard; + struct vfs_context context; + kauth_action_t action; + uid_t saved_uid; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - VATTR_NULL(vap); + nfsm_srvmtofh(&nfh); + VATTR_INIT(vap); if (v3) { nfsm_srvsattr(vap); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); @@ -297,83 +372,99 @@ nfsrv_setattr(nfsd, slp, procp, mrq) * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) - vap->va_mode = nfstov_mode(sp->sa_mode); + VATTR_SET(vap, va_mode, nfstov_mode(sp->sa_mode)); if (sp->sa_uid != nfs_xdrneg1) - vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); + VATTR_SET(vap, va_uid, fxdr_unsigned(uid_t, sp->sa_uid)); if (sp->sa_gid != nfs_xdrneg1) - vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); + VATTR_SET(vap, va_gid, fxdr_unsigned(gid_t, sp->sa_gid)); if (sp->sa_size != nfs_xdrneg1) - vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); + VATTR_SET(vap, va_data_size, fxdr_unsigned(u_quad_t, sp->sa_size)); if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { -#ifdef notyet - fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); -#else - vap->va_atime.tv_sec = - fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); - vap->va_atime.tv_nsec = 0; -#endif + fxdr_nfsv2time(&sp->sa_atime, &vap->va_access_time); + VATTR_SET_ACTIVE(vap, va_access_time); + } + if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) { + fxdr_nfsv2time(&sp->sa_mtime, &vap->va_modify_time); + VATTR_SET_ACTIVE(vap, va_modify_time); } - if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) - fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); - } + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nfsd->nd_cr); + /* * Now that we have all the fields, lets do it. */ - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &postat); return (0); } - nqsrv_getl(vp, ND_WRITE); + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &postat); + return (0); + } + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + if (v3) { - error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); - if (!error && gcheck && - (preat.va_ctime.tv_sec != guard.tv_sec || - preat.va_ctime.tv_nsec != guard.tv_nsec)) + nfsm_srv_pre_vattr_init(&preat, v3); + error = preat_ret = vnode_getattr(vp, &preat, &context); + if (!error && gcheck && VATTR_IS_SUPPORTED(&preat, va_change_time) && + (preat.va_change_time.tv_sec != guard.tv_sec || + preat.va_change_time.tv_nsec != guard.tv_nsec)) error = NFSERR_NOT_SYNC; + if (!preat_ret && !VATTR_ALL_SUPPORTED(&preat)) + preat_ret = 1; if (error) { - vput(vp); + vnode_put(vp); nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &postat); return (0); } } /* - * If the size is being changed write acces is required, otherwise - * just check for a read only file system. + * If the credentials were mapped, we should + * map the same values in the attributes. */ - if (vap->va_size == ((u_quad_t)((quad_t) -1))) { - if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { - error = EROFS; - goto out; - } - } else { - if (vp->v_type == VDIR) { - error = EISDIR; - goto out; - } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly, - procp, 0))) - goto out; + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nfsd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nfsd->nd_cr)); + if (kauth_cred_ismember_gid(nfsd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nfsd->nd_cr)); } - error = VOP_SETATTR(vp, vap, cred, procp); - postat_ret = VOP_GETATTR(vp, vap, cred, procp); + + /* + * Authorize the attribute changes. + */ + if (((error = vnode_authattr(vp, vap, &action, &context))) || + ((error = nfsrv_authorize(vp, NULL, action, &context, nxo, 0)))) + goto out; + error = vnode_setattr(vp, vap, &context); + + nfsm_srv_vattr_init(&postat, v3); + postat_ret = vnode_getattr(vp, &postat, &context); if (!error) error = postat_ret; out: - vput(vp); + vnode_put(vp); nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); + nfsm_srvwcc_data(preat_ret, &preat, postat_ret, &postat); return (0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + nfsm_srvfillattr(&postat, fp); } - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -383,70 +474,73 @@ int nfsrv_lookup(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; + struct nfs_fattr *fp; struct nameidata nd, *ndp = &nd; -#ifdef notdef +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED struct nameidata ind; #endif - struct vnode *vp, *dirp; - nfsfh_t nfh; - fhandle_t *fhp; - register caddr_t cp; - register u_long *tl; - register long t1; + vnode_t vp, dirp = NULL; + struct nfs_filehandle dnfh, nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + caddr_t cp; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, cache, len, dirattr_ret = 1; + int error = 0, len, dirattr_ret = 1, isdotdot; int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vattr va, dirattr, *vap = &va; - u_quad_t frev; + mbuf_t mb, mb2, mreq; + struct vnode_attr va, dirattr, *vap = &va; + struct vfs_context context; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); + nfsm_srvmtofh(&dnfh); + nfsm_srvnamesiz(len, v3); - pubflag = nfs_ispublicfh(fhp); + pubflag = nfs_ispublicfh(&dnfh); - nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag); + nd.ni_cnd.cn_flags = LOCKLEAF; + error = nfsm_path_mbuftond(&md, &dpos, v3, pubflag, &len, &nd); + isdotdot = ((len == 2) && (nd.ni_cnd.cn_pnbuf[0] == '.') && (nd.ni_cnd.cn_pnbuf[1] == '.')); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &dnfh, nam, pubflag, &dirp, &nx, &nxo); -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED if (!error && pubflag) { - if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) { + if (vnode_vtype(nd.ni_vp) == VDIR && nfs_pub.np_index != NULL) { /* * Setup call to lookup() to see if we can find * the index file. Arguably, this doesn't belong * in a kernel.. Ugh. */ ind = nd; - VOP_UNLOCK(nd.ni_vp, 0, procp); ind.ni_pathlen = strlen(nfs_pub.np_index); ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf = nfs_pub.np_index; ind.ni_startdir = nd.ni_vp; - VREF(ind.ni_startdir); - error = lookup(&ind); - if (!error) { + ind.ni_usedvp = nd.ni_vp; + + if (!(error = lookup(&ind))) { /* * Found an index file. Get rid of * the old references. */ if (dirp) - vrele(dirp); + vnode_put(dirp); dirp = nd.ni_vp; - vrele(nd.ni_startdir); + vnode_put(nd.ni_startdir); ndp = &ind; } else error = 0; @@ -457,18 +551,20 @@ nfsrv_lookup(nfsd, slp, procp, mrq) * filesystem. */ - if (!error && ndp->ni_vp->v_mount != nfs_pub.np_mount) { - vput(nd.ni_vp); + if (!error && vnode_mount(ndp->ni_vp) != nfs_pub.np_mount) { + vnode_put(nd.ni_vp); + nameidone(&nd); error = EPERM; } } #endif if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - procp); - vrele(dirp); + if (v3) { + nfsm_srv_vattr_init(&dirattr, v3); + dirattr_ret = vnode_getattr(dirp, &dirattr, &context); + } + vnode_put(dirp); } if (error) { @@ -476,24 +572,21 @@ nfsrv_lookup(nfsd, slp, procp, mrq) nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } + nameidone(&nd); - nqsrv_getl(ndp->ni_startdir, ND_READ); - vrele(ndp->ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; vp = ndp->ni_vp; - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); + error = nfsrv_vptofh(nx, !v3, (isdotdot ? &dnfh : NULL), vp, &context, &nfh); + if (!error) { + nfsm_srv_vattr_init(vap, v3); + error = vnode_getattr(vp, vap, &context); + } + vnode_put(vp); + nfsm_reply(NFSX_SRVFH(v3, &nfh) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } - nfsm_srvfhtom(fhp, v3); + nfsm_srvfhtom(&nfh, v3); if (v3) { nfsm_srvpostop_attr(0, vap); nfsm_srvpostop_attr(dirattr_ret, &dirattr); @@ -501,7 +594,8 @@ nfsrv_lookup(nfsd, slp, procp, mrq) nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -511,104 +605,160 @@ int nfsrv_readlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; - register struct iovec *ivp = iv; - register struct mbuf *mp; - register u_long *tl; - register long t1; + mbuf_t mp; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, i, tlen, len, getret; + int error = 0, i, tlen, len, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; - struct vnode *vp; - struct vattr attr; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; - u_quad_t frev; + mbuf_t mb, mb2, mp2, mp3, mreq; + vnode_t vp; + struct vnode_attr attr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t uiop = NULL; + char uio_buf[ UIO_SIZEOF(4) ]; + char *uio_bufp = &uio_buf[0]; + int uio_buflen = UIO_SIZEOF(4); + int mblen; + struct vfs_context context; -#ifndef nolint - mp2 = mp3 = (struct mbuf *)0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + nfsm_srvmtofh(&nfh); len = 0; i = 0; + + mp2 = mp3 = NULL; + vp = NULL; while (len < NFS_MAXPATHLEN) { - MGET(mp, M_WAIT, MT_DATA); - MCLGET(mp, M_WAIT); - mp->m_len = NFSMSIZ(mp); + mp = NULL; + if ((error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mp))) + goto out; + mblen = mbuf_maxlen(mp); + mbuf_setlen(mp, mblen); if (len == 0) mp3 = mp2 = mp; else { - mp2->m_next = mp; + if ((error = mbuf_setnext(mp2, mp))) { + mbuf_free(mp); + goto out; + } mp2 = mp; } - if ((len+mp->m_len) > NFS_MAXPATHLEN) { - mp->m_len = NFS_MAXPATHLEN-len; + if ((len + mblen) > NFS_MAXPATHLEN) { + mbuf_setlen(mp, NFS_MAXPATHLEN - len); len = NFS_MAXPATHLEN; } else - len += mp->m_len; - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - i++; - ivp++; - } - uiop->uio_iov = iv; - uiop->uio_iovcnt = i; - uiop->uio_offset = 0; - uiop->uio_resid = len; - uiop->uio_rw = UIO_READ; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - m_freem(mp3); + len += mblen; + i++; + } + if (i > 4) { + uio_buflen = UIO_SIZEOF(i); + MALLOC(uio_bufp, char*, uio_buflen, M_TEMP, M_WAITOK); + if (!uio_bufp) { + error = ENOMEM; + mbuf_freem(mp3); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, NULL); + return (0); + } + } + uiop = uio_createwithbuffer(i, 0, UIO_SYSSPACE, UIO_READ, uio_bufp, uio_buflen); + if (!uiop) { + error = ENOMEM; + mbuf_freem(mp3); + if (uio_bufp != &uio_buf[0]) { + FREE(uio_bufp, M_TEMP); + uio_bufp = &uio_buf[0]; + } + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, NULL); + return (0); + } + mp = mp3; + while (mp) { + uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), mbuf_len(mp)); + mp = mbuf_next(mp); + } + + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { + mbuf_freem(mp3); + if (uio_bufp != &uio_buf[0]) { + FREE(uio_bufp, M_TEMP); + uio_bufp = &uio_buf[0]; + } + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, NULL); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + mbuf_freem(mp3); + if (uio_bufp != &uio_buf[0]) { + FREE(uio_bufp, M_TEMP); + uio_bufp = &uio_buf[0]; + } nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); + nfsm_srvpostop_attr(1, NULL); return (0); } - if (vp->v_type != VLNK) { + if (vnode_vtype(vp) != VLNK) { if (v3) error = EINVAL; else error = ENXIO; goto out; } - nqsrv_getl(vp, ND_READ); - error = VOP_READLINK(vp, uiop, cred); + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &context, nxo, 0))) + goto out; + error = VNOP_READLINK(vp, uiop, &context); out: - getret = VOP_GETATTR(vp, &attr, cred, procp); - vput(vp); + if (vp) { + if (v3) { + nfsm_srv_vattr_init(&attr, v3); + getret = vnode_getattr(vp, &attr, &context); + } + vnode_put(vp); + } if (error) { - m_freem(mp3); + mbuf_freem(mp3); mp3 = NULL; } nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &attr); - if (error) + if (error) { + if (uio_bufp != &uio_buf[0]) + FREE(uio_bufp, M_TEMP); return (0); + } } if (!error) { - if (uiop->uio_resid > 0) { - len -= uiop->uio_resid; + if (uiop && (uio_resid(uiop) > 0)) { + // LP64todo - fix this + len -= uio_resid(uiop); tlen = nfsm_rndup(len); nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); - mb->m_next = mp3; + mbuf_setnext(mb, mp3); } - nfsm_srvdone; +nfsmout: + if (uio_bufp != &uio_buf[0]) + FREE(uio_bufp, M_TEMP); + return (error); } /* @@ -618,37 +768,35 @@ int nfsrv_read(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct iovec *iv; - struct iovec *iv2; - register struct mbuf *m; - register struct nfs_fattr *fp; - register u_long *tl; - register long t1; - register int i; + mbuf_t m; + struct nfs_fattr *fp; + u_long *tl; + long t1; + int i; caddr_t bpos; - int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; + int error = 0, count, len, left, siz, tlen, getret; int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct mbuf *m2; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; - struct vattr va, *vap = &va; + mbuf_t mb, mb2, mreq; + mbuf_t m2; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t uiop = NULL; + char *uio_bufp = NULL; + struct vnode_attr va, *vap = &va; off_t off; - u_quad_t frev; - int didhold = 0; + char uio_buf[ UIO_SIZEOF(0) ]; + struct vfs_context context; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + nfsm_srvmtofh(&nfh); if (v3) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); @@ -657,39 +805,48 @@ nfsrv_read(nfsd, slp, procp, mrq) off = (off_t)fxdr_unsigned(u_long, *tl); } nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); + nfsm_srvpostop_attr(1, NULL); return (0); } - if (vp->v_type != VREG) { + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvpostop_attr(1, NULL); + return (0); + } + if (vnode_vtype(vp) != VREG) { if (v3) error = EINVAL; else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; } + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + if (!error) { - nqsrv_getl(vp, ND_READ); - if ((error = nfsrv_access(vp, VREAD, cred, rdonly, procp, 1))) - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 1); + if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &context, nxo, 1))) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, &context, nxo, 1); } - getret = VOP_GETATTR(vp, vap, cred, procp); + nfsm_srv_vattr_init(vap, v3); + getret = vnode_getattr(vp, vap, &context); if (!error) error = getret; if (error) { - vput(vp); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } - if (off >= vap->va_size) - cnt = 0; - else if ((off + reqlen) > vap->va_size) - cnt = nfsm_rndup(vap->va_size - off); + if ((u_quad_t)off >= vap->va_data_size) + count = 0; + else if (((u_quad_t)off + reqlen) > vap->va_data_size) + count = nfsm_rndup(vap->va_data_size - off); else - cnt = reqlen; - nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); + count = reqlen; + nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(count)); if (v3) { nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); *tl++ = nfs_true; @@ -700,56 +857,57 @@ nfsrv_read(nfsd, slp, procp, mrq) fp = (struct nfs_fattr *)tl; tl += (NFSX_V2FATTR / sizeof (u_long)); } - len = left = cnt; - if (cnt > 0) { + len = left = count; + if (count > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; m = m2 = mb; while (left > 0) { - siz = min(M_TRAILINGSPACE(m), left); + siz = min(mbuf_trailingspace(m), left); if (siz > 0) { left -= siz; i++; } if (left > 0) { - MGET(m, M_WAIT, MT_DATA); - MCLGET(m, M_WAIT); - m->m_len = 0; - m2->m_next = m; + m = NULL; + if ((error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &m))) + goto errorexit; + mbuf_setnext(m2, m); m2 = m; } } - MALLOC(iv, struct iovec *, i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = iv2 = iv; + MALLOC(uio_bufp, char *, UIO_SIZEOF(i), M_TEMP, M_WAITOK); + if (!uio_bufp) { + error = ENOMEM; + goto errorexit; + } + uiop = uio_createwithbuffer(i, off, UIO_SYSSPACE, UIO_READ, + uio_bufp, UIO_SIZEOF(i)); + if (!uiop) { + error = ENOMEM; + goto errorexit; + } m = mb; - left = cnt; + left = count; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_read iov"); - siz = min(M_TRAILINGSPACE(m), left); + siz = min(mbuf_trailingspace(m), left); if (siz > 0) { - iv->iov_base = mtod(m, caddr_t) + m->m_len; - iv->iov_len = siz; - m->m_len += siz; + tlen = mbuf_len(m); + uio_addiov(uiop, CAST_USER_ADDR_T((char *)mbuf_data(m) + tlen), siz); + mbuf_setlen(m, tlen + siz); left -= siz; - iv++; i++; } - m = m->m_next; - } - uiop->uio_iovcnt = i; - uiop->uio_offset = off; - uiop->uio_resid = cnt; - uiop->uio_rw = UIO_READ; - uiop->uio_segflg = UIO_SYSSPACE; - didhold = ubc_hold(vp); - error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); - off = uiop->uio_offset; - FREE((caddr_t)iv2, M_TEMP); + m = mbuf_next(m); + } + error = VNOP_READ(vp, uiop, IO_NODELOCKED, &context); + off = uio_offset(uiop); +errorexit: /* * This may seem a little weird that we drop the whole * successful read if we get an error on the getattr. @@ -761,31 +919,33 @@ nfsrv_read(nfsd, slp, procp, mrq) * postop attrs if the getattr fails. We might be able to * do that easier if we allocated separate mbufs for the data. */ - if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); + nfsm_srv_vattr_init(vap, v3); + if (error || (getret = vnode_getattr(vp, vap, &context))) { if (!error) error = getret; - m_freem(mreq); - vrele(vp); + mbuf_freem(mreq); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); + if (uio_bufp != NULL) { + FREE(uio_bufp, M_TEMP); + } return (0); } - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); } else { - uiop->uio_resid = 0; - vput(vp); + uiop = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + if (!uiop) { + error = ENOMEM; + goto errorexit; + } } + vnode_put(vp); nfsm_srvfillattr(vap, fp); - len -= uiop->uio_resid; + // LP64todo - fix this + len -= uio_resid(uiop); tlen = nfsm_rndup(len); - if (cnt != tlen || tlen != len) - nfsm_adj(mb, cnt - tlen, tlen - len); + if (count != tlen || tlen != len) + nfsm_adj(mb, count - tlen, tlen - len); if (v3) { *tl++ = txdr_unsigned(len); if (len < reqlen) @@ -794,7 +954,11 @@ nfsrv_read(nfsd, slp, procp, mrq) *tl++ = nfs_false; } *tl = txdr_unsigned(len); - nfsm_srvdone; +nfsmout: + if (uio_bufp != NULL) { + FREE(uio_bufp, M_TEMP); + } + return (error); } /* @@ -804,43 +968,40 @@ int nfsrv_write(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct iovec *ivp; - register int i, cnt; - register struct mbuf *mp; - register struct nfs_fattr *fp; - struct iovec *iv; - struct vattr va, forat; - register struct vattr *vap = &va; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, len, forat_ret = 1; - int ioflags, aftat_ret = 1, retlen, zeroing, adjust; + int i, count; + mbuf_t mp; + struct nfs_fattr *fp; + struct vnode_attr va, forat; + struct vnode_attr *vap = &va; + u_long *tl; + long t1; + caddr_t bpos, tpos; + int error = 0, len, forat_ret = 1; + int ioflags, aftat_ret = 1, retlen, zeroing, adjust, tlen; int stable = NFSV3WRITE_FILESYNC; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; + mbuf_t mb, mb2, mreq; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t uiop; off_t off; - u_quad_t frev; - int didhold = 0; + char *uio_bufp = NULL; + struct vfs_context context; if (mrep == NULL) { *mrq = NULL; return (0); } - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + nfsm_srvmtofh(&nfh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); @@ -854,7 +1015,7 @@ nfsrv_write(nfsd, slp, procp, mrq) stable = NFSV3WRITE_UNSTABLE; } retlen = len = fxdr_unsigned(long, *tl); - cnt = i = 0; + count = i = 0; /* * For NFS Version 2, it is not obvious what a write of zero length @@ -867,23 +1028,32 @@ nfsrv_write(nfsd, slp, procp, mrq) while (mp) { if (mp == md) { zeroing = 0; - adjust = dpos - mtod(mp, caddr_t); - mp->m_len -= adjust; - if (mp->m_len > 0 && adjust > 0) - NFSMADV(mp, adjust); + tpos = mbuf_data(mp); + tlen = mbuf_len(mp); + adjust = dpos - tpos; + tlen -= adjust; + mbuf_setlen(mp, tlen); + if (tlen > 0 && adjust > 0) { + tpos += adjust; + if ((error = mbuf_setdata(mp, tpos, tlen))) { + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + return (0); + } + } } if (zeroing) - mp->m_len = 0; - else if (mp->m_len > 0) { - i += mp->m_len; + mbuf_setlen(mp, 0); + else if ((tlen = mbuf_len(mp)) > 0) { + i += tlen; if (i > len) { - mp->m_len -= (i - len); + mbuf_setlen(mp, tlen - (i - len)); zeroing = 1; } - if (mp->m_len > 0) - cnt++; + if (mbuf_len(mp) > 0) + count++; } - mp = mp->m_next; + mp = mbuf_next(mp); } } if (len > NFS_MAXDATA || len < 0 || i < len) { @@ -892,44 +1062,65 @@ nfsrv_write(nfsd, slp, procp, mrq) nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } - if (v3) - forat_ret = VOP_GETATTR(vp, &forat, cred, procp); - if (vp->v_type != VREG) { + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + return (0); + } + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + if (v3) { + nfsm_srv_pre_vattr_init(&forat, v3); + forat_ret = vnode_getattr(vp, &forat, &context); + } + if (vnode_vtype(vp) != VREG) { if (v3) error = EINVAL; else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; } if (!error) { - nqsrv_getl(vp, ND_WRITE); - error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, &context, nxo, 1); } if (error) { - vput(vp); + vnode_put(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (len > 0) { - MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, - M_WAITOK); - uiop->uio_iov = iv = ivp; - uiop->uio_iovcnt = cnt; + MALLOC(uio_bufp, char *, UIO_SIZEOF(count), M_TEMP, M_WAITOK); + if (!uio_bufp) { + error = ENOMEM; + vnode_put(vp); + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + return (0); + } + uiop = uio_createwithbuffer(count, off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(count)); + if (!uiop) { + error = ENOMEM; + vnode_put(vp); + nfsm_reply(NFSX_WCCDATA(v3)); + nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + if (uio_bufp != NULL) { + FREE(uio_bufp, M_TEMP); + } + return (0); + } mp = mrep; while (mp) { - if (mp->m_len > 0) { - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - ivp++; - } - mp = mp->m_next; + if ((tlen = mbuf_len(mp)) > 0) + uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), tlen); + mp = mbuf_next(mp); } /* @@ -945,29 +1136,25 @@ nfsrv_write(nfsd, slp, procp, mrq) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); - uiop->uio_resid = len; - uiop->uio_rw = UIO_WRITE; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - uiop->uio_offset = off; - didhold = ubc_hold(vp); - error = VOP_WRITE(vp, uiop, ioflags, cred); - nfsstats.srvvop_writes++; - FREE((caddr_t)iv, M_TEMP); - } - aftat_ret = VOP_GETATTR(vp, vap, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + + error = VNOP_WRITE(vp, uiop, ioflags, &context); + OSAddAtomic(1, (SInt32*)(SInt32*)&nfsstats.srvvop_writes); + } + nfsm_srv_vattr_init(vap, v3); + aftat_ret = vnode_getattr(vp, vap, &context); + vnode_put(vp); if (!error) error = aftat_ret; nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); - if (error) + if (error) { + if (uio_bufp != NULL) { + FREE(uio_bufp, M_TEMP); + } return (0); + } nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* @@ -982,13 +1169,17 @@ nfsrv_write(nfsd, slp, procp, mrq) * but it may make the values more human readable, * for debugging purposes. */ - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); + *tl++ = txdr_unsigned(boottime_sec()); + *tl = txdr_unsigned(0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } - nfsm_srvdone; +nfsmout: + if (uio_bufp != NULL) { + FREE(uio_bufp, M_TEMP); + } + return (error); } /* @@ -1002,35 +1193,39 @@ int nfsrv_writegather(ndp, slp, procp, mrq) struct nfsrv_descript **ndp; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - register struct iovec *ivp; - register struct mbuf *mp; - register struct nfsrv_descript *wp, *nfsd, *owp, *swp; - register struct nfs_fattr *fp; - register int i; - struct iovec *iov; + mbuf_t mp; + struct nfsrv_descript *wp, *nfsd, *owp, *swp; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfs_fattr *fp; + int i; struct nfsrvw_delayhash *wpp; - struct ucred *cred; - struct vattr va, forat; - register u_long *tl; - register long t1; - caddr_t bpos, dpos; - int error = 0, rdonly, cache, len, forat_ret = 1; - int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; + kauth_cred_t cred; + struct vnode_attr va, forat; + u_long *tl; + long t1; + caddr_t bpos, dpos, tpos; + int error = 0, len, forat_ret = 1; + int ioflags, aftat_ret = 1, adjust, v3, zeroing, tlen; char *cp2; - struct mbuf *mb, *mb2, *mreq, *mrep, *md; - struct vnode *vp; - struct uio io, *uiop = &io; - u_quad_t frev, cur_usec; - int didhold; + mbuf_t mb, mb2, mreq, mrep, md; + vnode_t vp; + uio_t uiop = NULL; + char *uio_bufp = NULL; + u_quad_t cur_usec; struct timeval now; + struct vfs_context context; + + context.vc_proc = procp; #ifndef nolint i = 0; len = 0; #endif + *mrq = NULL; if (*ndp) { nfsd = *ndp; @@ -1038,7 +1233,8 @@ nfsrv_writegather(ndp, slp, procp, mrq) mrep = nfsd->nd_mrep; md = nfsd->nd_md; dpos = nfsd->nd_dpos; - cred = &nfsd->nd_cr; + cred = nfsd->nd_cr; + context.vc_ucred = cred; v3 = (nfsd->nd_flag & ND_NFSV3); LIST_INIT(&nfsd->nd_coalesce); nfsd->nd_mreq = NULL; @@ -1052,6 +1248,7 @@ nfsrv_writegather(ndp, slp, procp, mrq) * Now, get the write header.. */ nfsm_srvmtofh(&nfsd->nd_fh); + /* XXX shouldn't we be checking for invalid FHs before doing any more work? */ if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &nfsd->nd_off); @@ -1078,25 +1275,32 @@ nfsrv_writegather(ndp, slp, procp, mrq) while (mp) { if (mp == md) { zeroing = 0; - adjust = dpos - mtod(mp, caddr_t); - mp->m_len -= adjust; - if (mp->m_len > 0 && adjust > 0) - NFSMADV(mp, adjust); + tpos = mbuf_data(mp); + tlen = mbuf_len(mp); + adjust = dpos - tpos; + tlen -= adjust; + mbuf_setlen(mp, tlen); + if (tlen > 0 && adjust > 0) { + tpos += adjust; + if ((error = mbuf_setdata(mp, tpos, tlen))) + goto nfsmout; + } } if (zeroing) - mp->m_len = 0; + mbuf_setlen(mp, 0); else { - i += mp->m_len; + tlen = mbuf_len(mp); + i += tlen; if (i > len) { - mp->m_len -= (i - len); + mbuf_setlen(mp, tlen - (i - len)); zeroing = 1; } } - mp = mp->m_next; + mp = mbuf_next(mp); } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: - m_freem(mrep); + mbuf_freem(mrep); mrep = NULL; error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED, v3); @@ -1104,36 +1308,34 @@ nfsmout: nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsd->nd_mreq = mreq; nfsd->nd_mrep = NULL; - nfsd->nd_time = 0; + nfsd->nd_time = 1; } /* * Add this entry to the hash and time queues. */ - s = splsoftclock(); + lck_mtx_lock(&slp->ns_wgmutex); owp = NULL; wp = slp->ns_tq.lh_first; while (wp && wp->nd_time < nfsd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } - NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff)); if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_tq); } else { LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } if (nfsd->nd_mrep) { - wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); + wpp = NWDELAYHASH(slp, nfsd->nd_fh.nfh_fid); owp = NULL; wp = wpp->lh_first; - while (wp && - bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + while (wp && !nfsrv_fhmatch(&nfsd->nd_fh, &wp->nd_fh)) { owp = wp; wp = wp->nd_hash.le_next; } - while (wp && wp->nd_off < nfsd->nd_off && - !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + while (wp && (wp->nd_off < nfsd->nd_off) && + nfsrv_fhmatch(&nfsd->nd_fh, &wp->nd_fh)) { owp = wp; wp = wp->nd_hash.le_next; } @@ -1153,48 +1355,52 @@ nfsmout: LIST_INSERT_HEAD(wpp, nfsd, nd_hash); } } - splx(s); + } else { + lck_mtx_lock(&slp->ns_wgmutex); } /* - * Now, do VOP_WRITE()s for any one(s) that need to be done now + * Now, do VNOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { owp = nfsd->nd_tq.le_next; if (nfsd->nd_time > cur_usec) break; if (nfsd->nd_mreq) continue; - NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); LIST_REMOVE(nfsd, nd_hash); - splx(s); mrep = nfsd->nd_mrep; nfsd->nd_mrep = NULL; - cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); forat_ret = aftat_ret = 1; - error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, - nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + error = nfsrv_fhtovp(&nfsd->nd_fh, nfsd->nd_nam, TRUE, &vp, &nx, &nxo); if (!error) { - if (v3) - forat_ret = VOP_GETATTR(vp, &forat, cred, procp); - if (vp->v_type != VREG) { + error = nfsrv_credcheck(nfsd, nx, nxo); + if (error) + vnode_put(vp); + } + cred = nfsd->nd_cr; + context.vc_ucred = cred; + if (!error) { + if (v3) { + nfsm_srv_pre_vattr_init(&forat, v3); + forat_ret = vnode_getattr(vp, &forat, &context); + } + if (vnode_vtype(vp) != VREG) { if (v3) error = EINVAL; else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; } } else vp = NULL; if (!error) { - nqsrv_getl(vp, ND_WRITE); - error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, &context, nxo, 1); } if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) @@ -1203,48 +1409,43 @@ loop1: ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); - uiop->uio_rw = UIO_WRITE; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - uiop->uio_offset = nfsd->nd_off; - uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; - didhold = 0; - if (uiop->uio_resid > 0) { + + if (!error && ((nfsd->nd_eoff - nfsd->nd_off) > 0)) { mp = mrep; i = 0; while (mp) { - if (mp->m_len > 0) + if (mbuf_len(mp) > 0) i++; - mp = mp->m_next; + mp = mbuf_next(mp); } - uiop->uio_iovcnt = i; - MALLOC(iov, struct iovec *, i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = ivp = iov; - mp = mrep; - while (mp) { - if (mp->m_len > 0) { - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - ivp++; + + MALLOC(uio_bufp, char *, UIO_SIZEOF(i), M_TEMP, M_WAITOK); + if (uio_bufp) + uiop = uio_createwithbuffer(i, nfsd->nd_off, UIO_SYSSPACE, + UIO_WRITE, uio_bufp, UIO_SIZEOF(i)); + if (!uio_bufp || !uiop) + error = ENOMEM; + if (!error) { + mp = mrep; + while (mp) { + if ((tlen = mbuf_len(mp)) > 0) + uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), tlen); + mp = mbuf_next(mp); } - mp = mp->m_next; + error = VNOP_WRITE(vp, uiop, ioflags, &context); + OSAddAtomic(1, (SInt32*)&nfsstats.srvvop_writes); } - if (!error) { - didhold = ubc_hold(vp); - error = VOP_WRITE(vp, uiop, ioflags, cred); - nfsstats.srvvop_writes++; + if (uio_bufp) { + FREE(uio_bufp, M_TEMP); + uio_bufp = NULL; } - FREE((caddr_t)iov, M_TEMP); } - m_freem(mrep); + mbuf_freem(mrep); mrep = NULL; if (vp) { - aftat_ret = VOP_GETATTR(vp, &va, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + nfsm_srv_pre_vattr_init(&va, v3); + aftat_ret = vnode_getattr(vp, &va, &context); + vnode_put(vp); } /* @@ -1253,7 +1454,6 @@ loop1: */ swp = nfsd; do { - NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff)); if (error) { nfsm_writereply(NFSX_WCCDATA(v3), v3); if (v3) { @@ -1273,8 +1473,8 @@ loop1: * but it may make the values more human readable, * for debugging purposes. */ - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); + *tl++ = txdr_unsigned(boottime_sec()); + *tl = txdr_unsigned(0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(&va, fp); @@ -1288,38 +1488,32 @@ loop1: * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ - s = splsoftclock(); if (nfsd != swp) { - nfsd->nd_time = 0; + nfsd->nd_time = 1; LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } nfsd = swp->nd_coalesce.lh_first; if (nfsd) { LIST_REMOVE(nfsd, nd_tq); } - splx(s); } while (nfsd); - s = splsoftclock(); - swp->nd_time = 0; + swp->nd_time = 1; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); - splx(s); goto loop1; } - splx(s); /* * Search for a reply to return. */ - s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) if (nfsd->nd_mreq) { - NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); *mrq = nfsd->nd_mreq; *ndp = nfsd; break; } - splx(s); + slp->ns_wgtime = slp->ns_tq.lh_first ? slp->ns_tq.lh_first->nd_time : 0; + lck_mtx_unlock(&slp->ns_wgmutex); return (0); } @@ -1329,19 +1523,16 @@ loop1: * - merge nfsd->nd_mrep into owp->nd_mrep * - update the nd_eoff and nd_stable for owp * - put nfsd on owp's nd_coalesce list - * NB: Must be called at splsoftclock(). */ static void -nfsrvw_coalesce(owp, nfsd) - register struct nfsrv_descript *owp; - register struct nfsrv_descript *nfsd; +nfsrvw_coalesce( + struct nfsrv_descript *owp, + struct nfsrv_descript *nfsd) { - register int overlap; - register struct mbuf *mp; + int overlap, error; + mbuf_t mp, mpnext; struct nfsrv_descript *p; - NFS_DPF(WG, ("C%03x-%03x", - nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_hash); LIST_REMOVE(nfsd, nd_tq); if (owp->nd_eoff < nfsd->nd_eoff) { @@ -1349,14 +1540,17 @@ nfsrvw_coalesce(owp, nfsd) if (overlap < 0) panic("nfsrv_coalesce: bad off"); if (overlap > 0) - m_adj(nfsd->nd_mrep, overlap); + mbuf_adj(nfsd->nd_mrep, overlap); mp = owp->nd_mrep; - while (mp->m_next) - mp = mp->m_next; - mp->m_next = nfsd->nd_mrep; + while ((mpnext = mbuf_next(mp))) + mp = mpnext; + error = mbuf_setnext(mp, nfsd->nd_mrep); + if (error) + panic("nfsrvw_coalesce: mbuf_setnext failed: %d", error); owp->nd_eoff = nfsd->nd_eoff; - } else - m_freem(nfsd->nd_mrep); + } else { + mbuf_freem(nfsd->nd_mrep); + } nfsd->nd_mrep = NULL; if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) owp->nd_stable = NFSV3WRITE_FILESYNC; @@ -1380,13 +1574,15 @@ nfsrvw_coalesce(owp, nfsd) * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) + * + * XXX ILLEGAL */ void nfsrvw_sort(list, num) - register gid_t *list; - register int num; + gid_t *list; + int num; { - register int i, j; + int i, j; gid_t v; /* Insertion sort. */ @@ -1401,16 +1597,17 @@ nfsrvw_sort(list, num) /* * copy credentials making sure that the result can be compared with bcmp(). + * + * XXX ILLEGAL */ void -nfsrv_setcred(incred, outcred) - register struct ucred *incred, *outcred; +nfsrv_setcred(kauth_cred_t incred, kauth_cred_t outcred) { - register int i; + int i; - bzero((caddr_t)outcred, sizeof (struct ucred)); + bzero((caddr_t)outcred, sizeof (*outcred)); outcred->cr_ref = 1; - outcred->cr_uid = incred->cr_uid; + outcred->cr_uid = kauth_cred_getuid(incred); outcred->cr_ngroups = incred->cr_ngroups; for (i = 0; i < incred->cr_ngroups; i++) outcred->cr_groups[i] = incred->cr_groups[i]; @@ -1425,67 +1622,85 @@ int nfsrv_create(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register struct nfsv2_sattr *sp; - register u_long *tl; + struct nfs_fattr *fp; + struct vnode_attr dirfor, diraft, postat; + struct vnode_attr va; + struct vnode_attr *vap = &va; + struct nfsv2_sattr *sp; + u_long *tl; struct nameidata nd; - register caddr_t cp; - register long t1; + caddr_t cp; + long t1; caddr_t bpos; - int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; + int error = 0, rdev, len, tsize, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev, tempsize; + mbuf_t mb, mb2, mreq; + vnode_t vp, dvp, dirp = NULL; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + u_quad_t tempsize; u_char cverf[NFSX_V3CREATEVERF]; + struct vfs_context context; + uid_t saved_uid; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nfsd->nd_cr); #ifndef nolint rdev = 0; #endif nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; + vp = dvp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, v3); + nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) { + nd.ni_cnd.cn_nameiop = 0; nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - vrele(dirp); + vnode_put(dirp); return (0); } - VATTR_NULL(vap); + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + VATTR_INIT(vap); + if (v3) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSV3CREATE_GUARDED: - if (nd.ni_vp) { + if (vp) { error = EEXIST; break; } @@ -1496,179 +1711,229 @@ nfsrv_create(nfsd, slp, procp, mrq) nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); bcopy(cp, cverf, NFSX_V3CREATEVERF); exclusive_flag = 1; - if (nd.ni_vp == NULL) - vap->va_mode = 0; + if (vp == NULL) + VATTR_SET(vap, va_mode, 0); break; }; - vap->va_type = VREG; + VATTR_SET(vap, va_type, VREG); } else { + enum vtype v_type; + nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); - if (vap->va_type == VNON) - vap->va_type = VREG; - vap->va_mode = nfstov_mode(sp->sa_mode); - switch (vap->va_type) { + v_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); + if (v_type == VNON) + v_type = VREG; + VATTR_SET(vap, va_type, v_type); + VATTR_SET(vap, va_mode, nfstov_mode(sp->sa_mode)); + + switch (v_type) { case VREG: tsize = fxdr_unsigned(long, sp->sa_size); if (tsize != -1) - vap->va_size = (u_quad_t)tsize; + VATTR_SET(vap, va_data_size, (u_quad_t)tsize); break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(long, sp->sa_size); break; + default: + break; }; } /* - * Iff doesn't exist, create it + * If it doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ - if (nd.ni_vp == NULL) { + if (vp == NULL) { + kauth_acl_t xacl = NULL; + + /* + * If the credentials were mapped, we should + * map the same values in the attributes. + */ + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nfsd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nfsd->nd_cr)); + if (kauth_cred_ismember_gid(nfsd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nfsd->nd_cr)); + } + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context, nxo, 0); + + /* construct ACL and handle inheritance */ + if (!error) { + error = kauth_acl_inherit(dvp, + NULL, + &xacl, + 0 /* !isdir */, + &context); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); + } + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + + /* validate new-file security information */ + if (!error) { + error = vnode_authattr_new(dvp, vap, 0, &context); + if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) { + /* + * Most NFS servers just ignore the UID/GID attributes, so we + * try ignoring them if that'll help the request succeed. + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = vnode_authattr_new(dvp, vap, 0, &context); + } + } + if (vap->va_type == VREG || vap->va_type == VSOCK) { - vrele(nd.ni_startdir); - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + + if (!error) + error = VNOP_CREATE(dvp, &vp, &nd.ni_cnd, vap, &context); + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, &context); + + if (xacl != NULL) + kauth_acl_free(xacl); + if (!error) { - nfsrv_object_create(nd.ni_vp); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; if (exclusive_flag) { exclusive_flag = 0; - VATTR_NULL(vap); - bcopy(cverf, (caddr_t)&vap->va_atime, + VATTR_INIT(vap); + bcopy(cverf, (caddr_t)&vap->va_access_time, NFSX_V3CREATEVERF); - error = VOP_SETATTR(nd.ni_vp, vap, cred, - procp); + VATTR_SET_ACTIVE(vap, va_access_time); + // skip authorization, as this is an + // NFS internal implementation detail. + error = vnode_setattr(vp, vap, &context); } } + } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { - if (vap->va_type == VCHR && rdev == 0xffffffff) - vap->va_type = VFIFO; + if (vap->va_type == VCHR && rdev == (int)0xffffffff) + VATTR_SET(vap, va_type, VFIFO); if (vap->va_type != VFIFO && - (error = suser(cred, (u_short *)0))) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); + (error = suser(nfsd->nd_cr, (u_short *)0))) { nfsm_reply(0); - return (error); } else - vap->va_rdev = (dev_t)rdev; - nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))) { - vrele(nd.ni_startdir); + VATTR_SET(vap, va_rdev, (dev_t)rdev); + + error = VNOP_MKNOD(dvp, &vp, &nd.ni_cnd, vap, &context); + + if (xacl != NULL) + kauth_acl_free(xacl); + + if (error) { nfsm_reply(0); } + if (vp) { + vnode_recycle(vp); + vnode_put(vp); + vp = NULL; + } nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = cred; - if ((error = lookup(&nd))) { - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - nfsm_reply(0); + nd.ni_cnd.cn_flags &= ~LOCKPARENT; + nd.ni_cnd.cn_context = &context; + nd.ni_startdir = dvp; + nd.ni_usedvp = dvp; + error = lookup(&nd); + if (!error) { + if (nd.ni_cnd.cn_flags & ISSYMLINK) + error = EINVAL; } - nfsrv_object_create(nd.ni_vp); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - error = EINVAL; + if (error) nfsm_reply(0); - } } else { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); error = ENXIO; } - vp = nd.ni_vp; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); } else { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - vp = nd.ni_vp; - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (vap->va_size != -1) { - error = nfsrv_access(vp, VWRITE, cred, - (nd.ni_cnd.cn_flags & RDONLY), procp, 0); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); + + if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) { + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, + &context, nxo, 0); if (!error) { - nqsrv_getl(vp, ND_WRITE); - tempsize = vap->va_size; - VATTR_NULL(vap); - vap->va_size = tempsize; - error = VOP_SETATTR(vp, vap, cred, - procp); + tempsize = vap->va_data_size; + VATTR_INIT(vap); + VATTR_SET(vap, va_data_size, tempsize); + error = vnode_setattr(vp, vap, &context); } - if (error) - vput(vp); - } else { - if (error) - vput(vp); /* make sure we catch the EEXIST for nfsv3 */ } } if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = nfsrv_vptofh(nx, !v3, NULL, vp, &context, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postat, v3); + error = vnode_getattr(vp, &postat, &context); + } } + if (vp) + vnode_put(vp); + if (v3) { if (exclusive_flag && !error && - bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) + bcmp(cverf, (caddr_t)&postat.va_access_time, NFSX_V3CREATEVERF)) error = EEXIST; - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); + dirp = NULL; } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); + nfsm_reply(NFSX_SRVFH(v3, &nfh) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); + if (v3) { if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + nfsm_srvpostop_fh(&nfh); + nfsm_srvpostop_attr(0, &postat); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { - nfsm_srvfhtom(fhp, v3); + nfsm_srvfhtom(&nfh, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + nfsm_srvfillattr(&postat, fp); } return (0); nfsmout: - if (dirp) - vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - } - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vput(nd.ni_vp); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + } + if (dirp) + vnode_put(dirp); return (error); } @@ -1679,156 +1944,218 @@ int nfsrv_mknod(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register u_long *tl; + struct vnode_attr dirfor, diraft, postat; + struct vnode_attr va; + struct vnode_attr *vap = &va; + u_long *tl; struct nameidata nd; - register long t1; + long t1; caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int error = 0, len, dirfor_ret = 1, diraft_ret = 1; u_long major, minor; enum vtype vtyp; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; + mbuf_t mb, mb2, mreq; + vnode_t vp, dvp, dirp = NULL; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context hacked_context; /* XXX should we have this? */ + struct vfs_context context; + uid_t saved_uid; + kauth_acl_t xacl = NULL; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + hacked_context.vc_proc = procp; + hacked_context.vc_ucred = proc_ucred(procp); + + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nfsd->nd_cr); + vp = dvp = NULL; nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, 1); + nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); - if (dirp) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); + nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_path_mbuftond(&md, &dpos, 1, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); + if (dirp) { + nfsm_srv_pre_vattr_init(&dirfor, 1); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } if (error) { + nd.ni_cnd.cn_nameiop = 0; nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - vrele(dirp); + vnode_put(dirp); return (0); } + dvp = nd.ni_dvp; + vp = nd.ni_vp; + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vtyp = nfsv3tov_type(*tl); if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; error = NFSERR_BADTYPE; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); goto out; } - VATTR_NULL(vap); + VATTR_INIT(vap); nfsm_srvsattr(vap); + if (vtyp == VCHR || vtyp == VBLK) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_long, *tl++); minor = fxdr_unsigned(u_long, *tl); - vap->va_rdev = makedev(major, minor); + VATTR_SET(vap, va_rdev, makedev(major, minor)); } /* - * Iff doesn't exist, create it. + * If it doesn't exist, create it. */ - if (nd.ni_vp) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + if (vp) { error = EEXIST; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); goto out; } - vap->va_type = vtyp; - if (vtyp == VSOCK) { - vrele(nd.ni_startdir); - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); - if (!error) - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - } else { - if (vtyp != VFIFO && (error = suser(cred, (u_short *)0))) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - goto out; - } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))) { - vrele(nd.ni_startdir); - goto out; + VATTR_SET(vap, va_type, vtyp); + + /* + * If the credentials were mapped, we should + * map the same values in the attributes. + */ + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nfsd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nfsd->nd_cr)); + if (kauth_cred_ismember_gid(nfsd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nfsd->nd_cr)); + } + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context, nxo, 0); + + /* construct ACL and handle inheritance */ + if (!error) { + error = kauth_acl_inherit(dvp, + NULL, + &xacl, + 0 /* !isdir */, + &context); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); + } + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + + /* validate new-file security information */ + if (!error) { + error = vnode_authattr_new(dvp, vap, 0, &context); + if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) { + /* + * Most NFS servers just ignore the UID/GID attributes, so we + * try ignoring them if that'll help the request succeed. + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = vnode_authattr_new(dvp, vap, 0, &context); + } + } + + if (vtyp == VSOCK) { + error = VNOP_CREATE(dvp, &vp, &nd.ni_cnd, vap, &context); + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, &context); + } else { + if (vtyp != VFIFO && (error = suser(nfsd->nd_cr, (u_short *)0))) { + goto out1; + } + if ((error = VNOP_MKNOD(dvp, &vp, &nd.ni_cnd, vap, &context))) { + goto out1; + } + if (vp) { + vnode_recycle(vp); + vnode_put(vp); + vp = NULL; } nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = procp->p_ucred; + nd.ni_cnd.cn_flags &= ~LOCKPARENT; + nd.ni_cnd.cn_context = &hacked_context; + nd.ni_startdir = dvp; + nd.ni_usedvp = dvp; error = lookup(&nd); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - if (error) - goto out; - if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - error = EINVAL; + if (!error) { + vp = nd.ni_vp; + if (nd.ni_cnd.cn_flags & ISSYMLINK) + error = EINVAL; } } +out1: + if (xacl != NULL) + kauth_acl_free(xacl); out: - vp = nd.ni_vp; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); + if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = nfsrv_vptofh(nx, 0, NULL, vp, &context, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postat, 1); + error = vnode_getattr(vp, &postat, &context); + } } - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); - nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); + if (vp) + vnode_put(vp); + + nfsm_srv_vattr_init(&diraft, 1); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); + dirp = NULL; + + nfsm_reply(NFSX_SRVFH(1, &nfh) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + nfsm_srvpostop_fh(&nfh); + nfsm_srvpostop_attr(0, &postat); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); nfsmout: - if (dirp) - vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - } - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vput(nd.ni_vp); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + } + if (dirp) + vnode_put(dirp); return (error); } @@ -1839,84 +2166,86 @@ int nfsrv_remove(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; - register u_long *tl; - register long t1; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int error = 0, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *dirp; - struct vattr dirfor, diraft; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; + mbuf_t mb, mreq; + vnode_t vp, dvp, dirp = NULL; + struct vnode_attr dirfor, diraft; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context context; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + dvp = vp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, v3); -#ifndef nolint - vp = (struct vnode *)0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else - vrele(dirp); + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; + } } if (!error) { + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp->v_type == VDIR) { + + if (vnode_vtype(vp) == VDIR) error = EPERM; /* POSIX */ - goto out; - } - /* - * The root of a mounted filesystem cannot be deleted. - */ - if (vp->v_flag & VROOT) { + else if (vnode_isvroot(vp)) + /* + * The root of a mounted filesystem cannot be deleted. + */ error = EBUSY; - goto out; - } -out: - if (!error) { - nqsrv_getl(nd.ni_dvp, ND_WRITE); - nqsrv_getl(vp, ND_WRITE); + else + error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, &context, nxo, 0); - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + if (!error) + error = VNOP_REMOVE(dvp, vp, &nd.ni_cnd, 0, &context); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); - } + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(vp); + vnode_put(dvp); } - if (dirp && v3) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + if (dirp) { + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -1926,107 +2255,163 @@ int nfsrv_rename(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register long t1; + kauth_cred_t saved_cred = NULL; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; + int error = 0, fromlen, tolen; + int fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mreq; + char *cp2, *frompath = NULL, *topath = NULL; + mbuf_t mb, mreq; struct nameidata fromnd, tond; - struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; - struct vnode *tdirp = (struct vnode *)0; - struct vattr fdirfor, fdiraft, tdirfor, tdiraft; - nfsfh_t fnfh, tnfh; - fhandle_t *ffhp, *tfhp; - u_quad_t frev; - uid_t saved_uid; + vnode_t fvp, tvp, tdvp, fdvp, fdirp = NULL; + vnode_t tdirp = NULL; + struct vnode_attr fdirfor, fdiraft, tdirfor, tdiraft; + struct nfs_filehandle fnfh, tnfh; + struct nfs_export *fnx, *tnx; + struct nfs_export_options *fnxo, *tnxo; + enum vtype fvtype, tvtype; + int holding_mntlock; + mount_t locked_mp; + struct vfs_context context; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; #ifndef nolint - fvp = (struct vnode *)0; + fvp = (vnode_t)0; #endif - ffhp = &fnfh.fh_generic; - tfhp = &tnfh.fh_generic; - fromnd.ni_cnd.cn_nameiop = 0; - tond.ni_cnd.cn_nameiop = 0; - nfsm_srvmtofh(ffhp); - nfsm_srvnamesiz(len); + + /* + * these need to be set before + * calling any nfsm_xxxx macros + * since they may take us out + * through the error path + */ + holding_mntlock = 0; + fvp = tvp = NULL; + fdvp = tdvp = NULL; + locked_mp = NULL; + + nfsm_srvmtofh(&fnfh); + nfsm_srvnamesiz(fromlen, v3); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &fromlen, &fromnd); + if (error) { + nfsm_reply(0); + return (0); + } + frompath = fromnd.ni_cnd.cn_pnbuf; + nfsm_srvmtofh(&tnfh); + nfsm_strsiz(tolen, NFS_MAXNAMLEN, v3); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &tolen, &tond); + if (error) { + nfsm_reply(0); + FREE_ZONE(frompath, MAXPATHLEN, M_NAMEI); + return (0); + } + topath = tond.ni_cnd.cn_pnbuf; + /* * Remember our original uid so that we can reset cr_uid before * the second nfs_namei() call, in case it is remapped. */ - saved_uid = cred->cr_uid; - fromnd.ni_cnd.cn_cred = cred; + saved_cred = nfsd->nd_cr; + kauth_cred_ref(saved_cred); +retry: fromnd.ni_cnd.cn_nameiop = DELETE; - fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; - error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, - &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + fromnd.ni_cnd.cn_flags = WANTPARENT; + + fromnd.ni_cnd.cn_pnbuf = frompath; + frompath = NULL; + fromnd.ni_cnd.cn_pnlen = MAXPATHLEN; + fromnd.ni_cnd.cn_flags |= HASBUF; + + error = nfs_namei(nfsd, &context, &fromnd, &fnfh, nam, FALSE, &fdirp, &fnx, &fnxo); + if (error) + goto out; + fdvp = fromnd.ni_dvp; + fvp = fromnd.ni_vp; + if (fdirp) { - if (v3) - fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, - procp); - else { - vrele(fdirp); - fdirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&fdirfor, v3); + fdirfor_ret = vnode_getattr(fdirp, &fdirfor, &context); + } else { + vnode_put(fdirp); + fdirp = NULL; } } - if (error) { - nfsm_reply(2 * NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); - nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); - if (fdirp) - vrele(fdirp); - return (0); + fvtype = vnode_vtype(fvp); + + /* reset credential if it was remapped */ + if (nfsd->nd_cr != saved_cred) { + kauth_cred_rele(nfsd->nd_cr); + nfsd->nd_cr = saved_cred; + kauth_cred_ref(nfsd->nd_cr); } - fvp = fromnd.ni_vp; - nfsm_srvmtofh(tfhp); - nfsm_strsiz(len2, NFS_MAXNAMLEN); - cred->cr_uid = saved_uid; - tond.ni_cnd.cn_cred = cred; + tond.ni_cnd.cn_nameiop = RENAME; - tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; - error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, - &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); - if (tdirp) { - if (v3) - tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, - procp); - else { - vrele(tdirp); - tdirp = (struct vnode *)0; - } - } + tond.ni_cnd.cn_flags = WANTPARENT; + + tond.ni_cnd.cn_pnbuf = topath; + topath = NULL; + tond.ni_cnd.cn_pnlen = MAXPATHLEN; + tond.ni_cnd.cn_flags |= HASBUF; + + if (fvtype == VDIR) + tond.ni_cnd.cn_flags |= WILLBEDIR; + + error = nfs_namei(nfsd, &context, &tond, &tnfh, nam, FALSE, &tdirp, &tnx, &tnxo); if (error) { - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out1; + /* + * Translate error code for rename("dir1", "dir2/."). + */ + if (error == EISDIR && fvtype == VDIR) { + if (v3) + error = EINVAL; + else + error = ENOTEMPTY; + } + goto out; } tdvp = tond.ni_dvp; - tvp = tond.ni_vp; + tvp = tond.ni_vp; + + if (tdirp) { + if (v3) { + nfsm_srv_pre_vattr_init(&tdirfor, v3); + tdirfor_ret = vnode_getattr(tdirp, &tdirfor, &context); + } else { + vnode_put(tdirp); + tdirp = NULL; + } + } + if (tvp != NULL) { - if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + tvtype = vnode_vtype(tvp); + + if (fvtype == VDIR && tvtype != VDIR) { if (v3) error = EEXIST; else error = EISDIR; goto out; - } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + } else if (fvtype != VDIR && tvtype == VDIR) { if (v3) error = EEXIST; else error = ENOTDIR; goto out; } - if (tvp->v_type == VDIR && tvp->v_mountedhere) { + if (tvtype == VDIR && vnode_mountedhere(tvp)) { if (v3) error = EXDEV; else @@ -2034,95 +2419,346 @@ nfsrv_rename(nfsd, slp, procp, mrq) goto out; } } - if (fvp->v_type == VDIR && fvp->v_mountedhere) { + if (fvp == tdvp) { + if (v3) + error = EINVAL; + else + error = ENOTEMPTY; + goto out; + } + + /* + * Authorization. + * + * If tvp is a directory and not the same as fdvp, or tdvp is not the same as fdvp, + * the node is moving between directories and we need rights to remove from the + * old and add to the new. + * + * If tvp already exists and is not a directory, we need to be allowed to delete it. + * + * Note that we do not inherit when renaming. XXX this needs to be revisited to + * implement the deferred-inherit bit. + */ + { + int moving = 0; + + error = 0; + if ((tvp != NULL) && vnode_isdir(tvp)) { + if (tvp != fdvp) + moving = 1; + } else if (tdvp != fdvp) { + moving = 1; + } + if (moving) { + /* moving out of fdvp, must have delete rights */ + if ((error = nfsrv_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, &context, fnxo, 0)) != 0) + goto auth_exit; + /* moving into tdvp or tvp, must have rights to add */ + if ((error = nfsrv_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp, + NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + &context, tnxo, 0)) != 0) + goto auth_exit; + } else { + /* node staying in same directory, must be allowed to add new name */ + if ((error = nfsrv_authorize(fdvp, NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + &context, fnxo, 0)) != 0) + goto auth_exit; + } + /* overwriting tvp */ + if ((tvp != NULL) && !vnode_isdir(tvp) && + ((error = nfsrv_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, &context, tnxo, 0)) != 0)) + goto auth_exit; + + /* XXX more checks? */ + +auth_exit: + /* authorization denied */ + if (error != 0) + goto out; + } + + if ((vnode_mount(fvp) != vnode_mount(tdvp)) || + (tvp && (vnode_mount(fvp) != vnode_mount(tvp)))) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } - if (fvp->v_mount != tdvp->v_mount) { + /* + * The following edge case is caught here: + * (to cannot be a descendent of from) + * + * o fdvp + * / + * / + * o fvp + * \ + * \ + * o tdvp + * / + * / + * o tvp + */ + if (tdvp->v_parent == fvp) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } - if (fvp == tdvp) + if (fvtype == VDIR && vnode_mountedhere(fvp)) { if (v3) - error = EINVAL; + error = EXDEV; else error = ENOTEMPTY; + goto out; + } /* * If source is the same as the destination (that is the - * same vnode) then there is nothing to do. - * (fixed to have POSIX semantics - CSM 3/2/98) + * same vnode) then there is nothing to do... + * EXCEPT if the underlying file system supports case + * insensitivity and is case preserving. In this case + * the file system needs to handle the special case of + * getting the same vnode as target (fvp) and source (tvp). + * + * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE + * and _PC_CASE_PRESERVING can have this exception, and they need to + * handle the special case of getting the same vnode as target and + * source. NOTE: Then the target is unlocked going into vnop_rename, + * so not to cause locking problems. There is a single reference on tvp. + * + * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE + * that correct behaviour then is just to remove the source (link) */ - if (fvp == tvp) - error = -1; -out: - if (!error) { - nqsrv_getl(fromnd.ni_dvp, ND_WRITE); - nqsrv_getl(tdvp, ND_WRITE); - if (tvp) - nqsrv_getl(tvp, ND_WRITE); - error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, - tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + if ((fvp == tvp) && (fdvp == tdvp)) { + if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) { + goto out; + } + } + + if (holding_mntlock && vnode_mount(fvp) != locked_mp) { + /* + * we're holding a reference and lock + * on locked_mp, but it no longer matches + * what we want to do... so drop our hold + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp != fdvp && fvtype == VDIR) { + /* + * serialize renames that re-shape + * the tree... if holding_mntlock is + * set, then we're ready to go... + * otherwise we + * first need to drop the iocounts + * we picked up, second take the + * lock to serialize the access, + * then finally start the lookup + * process over with the lock held + */ + if (!holding_mntlock) { + /* + * need to grab a reference on + * the mount point before we + * drop all the iocounts... once + * the iocounts are gone, the mount + * could follow + */ + locked_mp = vnode_mount(fvp); + mount_ref(locked_mp, 0); + + /* make a copy of to path to pass to nfs_namei() again */ + MALLOC_ZONE(topath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (topath) + bcopy(tond.ni_cnd.cn_pnbuf, topath, tolen + 1); + + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + + /* make a copy of from path to pass to nfs_namei() again */ + MALLOC_ZONE(frompath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (frompath) + bcopy(fromnd.ni_cnd.cn_pnbuf, frompath, fromlen + 1); + + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromnd); + + vnode_put(fvp); + vnode_put(fdvp); + + if (fdirp) { + vnode_put(fdirp); + fdirp = NULL; + } + if (tdirp) { + vnode_put(tdirp); + tdirp = NULL; + } + mount_lock_renames(locked_mp); + holding_mntlock = 1; + + fvp = tvp = NULL; + fdvp = tdvp = NULL; + + fdirfor_ret = tdirfor_ret = 1; + + if (!topath || !frompath) { + /* we couldn't allocate a path, so bail */ + error = ENOMEM; + goto out; + } + + goto retry; + } } else { - VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); + /* + * when we dropped the iocounts to take + * the lock, we allowed the identity of + * the various vnodes to change... if they did, + * we may no longer be dealing with a rename + * that reshapes the tree... once we're holding + * the iocounts, the vnodes can't change type + * so we're free to drop the lock at this point + * and continue on + */ + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + } + + // save these off so we can later verify that fvp is the same + char *oname; + vnode_t oparent; + oname = fvp->v_name; + oparent = fvp->v_parent; + + error = VNOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, + tond.ni_dvp, tond.ni_vp, &tond.ni_cnd, &context); + /* + * fix up name & parent pointers. note that we first + * check that fvp has the same name/parent pointers it + * had before the rename call... this is a 'weak' check + * at best... + */ + if (oname == fvp->v_name && oparent == fvp->v_parent) { + int update_flags; + update_flags = VNODE_UPDATE_NAME; + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags); + } +out: + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp) { + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); if (tvp) - vput(tvp); - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - if (error == -1) - error = 0; + vnode_put(tvp); + vnode_put(tdvp); + + tdvp = NULL; + } + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromnd); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + + fdvp = NULL; } - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); - tond.ni_cnd.cn_flags &= ~HASBUF; -out1: if (fdirp) { - fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); - vrele(fdirp); + nfsm_srv_vattr_init(&fdiraft, v3); + fdiraft_ret = vnode_getattr(fdirp, &fdiraft, &context); + vnode_put(fdirp); + fdirp = NULL; } if (tdirp) { - tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); - vrele(tdirp); + nfsm_srv_vattr_init(&tdiraft, v3); + tdiraft_ret = vnode_getattr(tdirp, &tdiraft, &context); + vnode_put(tdirp); + tdirp = NULL; } - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); - fromnd.ni_cnd.cn_flags &= ~HASBUF; nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } + if (frompath) + FREE_ZONE(frompath, MAXPATHLEN, M_NAMEI); + if (topath) + FREE_ZONE(topath, MAXPATHLEN, M_NAMEI); + if (saved_cred) + kauth_cred_rele(saved_cred); return (0); nfsmout: + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + } + if (tdvp) { + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + } + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromnd); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + } if (fdirp) - vrele(fdirp); + vnode_put(fdirp); if (tdirp) - vrele(tdirp); - if (tond.ni_cnd.cn_nameiop) { - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); - tond.ni_cnd.cn_flags &= ~HASBUF; - } - if (fromnd.ni_cnd.cn_nameiop) { - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, - fromnd.ni_cnd.cn_pnlen, M_NAMEI); - fromnd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - } + vnode_put(tdirp); + if (frompath) + FREE_ZONE(frompath, MAXPATHLEN, M_NAMEI); + if (topath) + FREE_ZONE(topath, MAXPATHLEN, M_NAMEI); + if (saved_cred) + kauth_cred_rele(saved_cred); return (error); } @@ -2133,96 +2769,116 @@ int nfsrv_link(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; - register u_long *tl; - register long t1; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; + int error = 0, len, dirfor_ret = 1, diraft_ret = 1; int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *xp, *dirp = (struct vnode *)0; - struct vattr dirfor, diraft, at; - nfsfh_t nfh, dnfh; - fhandle_t *fhp, *dfhp; - u_quad_t frev; - - fhp = &nfh.fh_generic; - dfhp = &dnfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvmtofh(dfhp); - nfsm_srvnamesiz(len); - if ((error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + mbuf_t mb, mreq; + vnode_t vp, xp, dvp, dirp = NULL; + struct vnode_attr dirfor, diraft, at; + struct nfs_filehandle nfh, dnfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context context; + + vp = xp = dvp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvmtofh(&dnfh); + nfsm_srvnamesiz(len, v3); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } - if (vp->v_type == VDIR) { + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + nfsm_srvpostop_attr(getret, &at); + nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + return (0); + } + + /* we're not allowed to link to directories... */ + if (vnode_vtype(vp) == VDIR) { error = EPERM; /* POSIX */ goto out1; } - nd.ni_cnd.cn_cred = cred; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + /* ...or to anything that kauth doesn't want us to (eg. immutable items) */ + if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, &context, nxo, 0)) != 0) + goto out1; + nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; - error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &dnfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) goto out1; + dvp = nd.ni_dvp; xp = nd.ni_vp; - if (xp != NULL) { + + if (xp != NULL) error = EEXIST; - goto out; - } - xp = nd.ni_dvp; - if (vp->v_mount != xp->v_mount) + else if (vnode_mount(vp) != vnode_mount(dvp)) error = EXDEV; -out: - if (!error) { - nqsrv_getl(vp, ND_WRITE); - nqsrv_getl(xp, ND_WRITE); - error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - } + else + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context, nxo, 0); + + if (!error) + error = VNOP_LINK(vp, dvp, &nd.ni_cnd, &context); + + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (xp) + vnode_put(xp); + vnode_put(dvp); out1: - if (v3) - getret = VOP_GETATTR(vp, &at, cred, procp); + if (v3) { + nfsm_srv_vattr_init(&at, v3); + getret = vnode_getattr(vp, &at, &context); + } if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); } - vrele(vp); + vnode_put(vp); + nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -2232,142 +2888,198 @@ int nfsrv_symlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; + struct vnode_attr dirfor, diraft, postat; struct nameidata nd; - register struct vattr *vap = &va; - register u_long *tl; - register long t1; + struct vnode_attr va; + struct vnode_attr *vap = &va; + u_long *tl; + long t1; struct nfsv2_sattr *sp; - char *bpos, *pathcp = (char *)0, *cp2; - struct uio io; - struct iovec iv; - int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; + char *bpos, *linkdata = NULL, *cp2; + int error = 0, len, linkdatalen; + int dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); - struct mbuf *mb, *mreq, *mb2; - struct vnode *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; + mbuf_t mb, mreq, mb2; + vnode_t vp, dvp, dirp = NULL; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + struct vfs_context context; + uid_t saved_uid; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nfsd->nd_cr); nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; + vp = dvp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, v3); + nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + nd.ni_cnd.cn_flags = LOCKPARENT; + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; } } - if (error) - goto out; - VATTR_NULL(vap); + if (error) { + nd.ni_cnd.cn_nameiop = 0; + goto out1; + } + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + VATTR_INIT(vap); if (v3) nfsm_srvsattr(vap); - nfsm_strsiz(len2, NFS_MAXPATHLEN); - MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); - iv.iov_base = pathcp; - iv.iov_len = len2; - io.uio_resid = len2; - io.uio_offset = 0; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; - nfsm_mtouio(&io, len2); + nfsm_strsiz(linkdatalen, NFS_MAXPATHLEN, v3); + MALLOC(linkdata, caddr_t, linkdatalen + 1, M_TEMP, M_WAITOK); + if (!linkdata) { + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + vnode_put(nd.ni_dvp); + vnode_put(nd.ni_vp); + error = ENOMEM; + goto out; + } + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + vnode_put(nd.ni_dvp); + vnode_put(nd.ni_vp); + error = ENOMEM; + goto out; + } + uio_addiov(auio, CAST_USER_ADDR_T(linkdata), linkdatalen); + nfsm_mtouio(auio, linkdatalen); if (!v3) { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); - } - *(pathcp + len2) = '\0'; - if (nd.ni_vp) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); + VATTR_SET(vap, va_mode, fxdr_unsigned(u_short, sp->sa_mode)); + } + *(linkdata + linkdatalen) = '\0'; + if (vp) { error = EEXIST; goto out; } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); - if (error) - vrele(nd.ni_startdir); - else { - if (v3) { - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); - nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = cred; - error = lookup(&nd); - if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); + + /* + * If the credentials were mapped, we should + * map the same values in the attributes. + */ + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nfsd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nfsd->nd_cr)); + if (kauth_cred_ismember_gid(nfsd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nfsd->nd_cr)); + } + VATTR_SET(vap, va_type, VLNK); + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context, nxo, 0); + + /* validate given attributes */ + if (!error) { + error = vnode_authattr_new(dvp, vap, 0, &context); + if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) { + /* + * Most NFS servers just ignore the UID/GID attributes, so we + * try ignoring them if that'll help the request succeed. + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = vnode_authattr_new(dvp, vap, 0, &context); + } + } + if (!error) + error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, vap, linkdata, &context); + + if (!error && v3) { + if (vp == NULL) { + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW); + nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); + nd.ni_cnd.cn_context = &context; + nd.ni_startdir = dvp; + nd.ni_usedvp = dvp; + error = lookup(&nd); if (!error) - error = VOP_GETATTR(nd.ni_vp, vap, cred, - procp); - vput(nd.ni_vp); + vp = nd.ni_vp; + } + if (!error) { + error = nfsrv_vptofh(nx, !v3, NULL, vp, &context, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postat, v3); + error = vnode_getattr(vp, &postat, &context); + } } - } else - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; } out: - if (pathcp) - FREE(pathcp, M_TEMP); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + nd.ni_cnd.cn_nameiop = 0; + + if (vp) + vnode_put(vp); + vnode_put(dvp); +out1: + if (linkdata) + FREE(linkdata, M_TEMP); if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + nfsm_reply(NFSX_SRVFH(v3, &nfh) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + nfsm_srvpostop_fh(&nfh); + nfsm_srvpostop_attr(0, &postat); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } return (0); nfsmout: if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); } if (dirp) - vrele(dirp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - if (pathcp) - FREE(pathcp, M_TEMP); + vnode_put(dirp); + if (linkdata) + FREE(linkdata, M_TEMP); return (error); } @@ -2378,112 +3090,199 @@ int nfsrv_mkdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register struct nfs_fattr *fp; + struct vnode_attr dirfor, diraft, postat; + struct vnode_attr va; + struct vnode_attr *vap = &va; + struct nfs_fattr *fp; struct nameidata nd; - register caddr_t cp; - register u_long *tl; - register long t1; + caddr_t cp; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int error = 0, len; + int dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; + mbuf_t mb, mb2, mreq; + vnode_t vp, dvp, dirp = NULL; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context context; + uid_t saved_uid; + kauth_acl_t xacl = NULL; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nfsd->nd_cr); + + nd.ni_cnd.cn_nameiop = 0; + vp = dvp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, v3); + nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) { + nd.ni_cnd.cn_nameiop = 0; nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - vrele(dirp); + vnode_put(dirp); return (0); } - VATTR_NULL(vap); + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + VATTR_INIT(vap); if (v3) { nfsm_srvsattr(vap); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - vap->va_mode = nfstov_mode(*tl++); + VATTR_SET(vap, va_mode, nfstov_mode(*tl++)); } - vap->va_type = VDIR; - vp = nd.ni_vp; + VATTR_SET(vap, va_type, VDIR); + if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); error = EEXIST; goto out; } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + + /* + * If the credentials were mapped, we should + * map the same values in the attributes. + */ + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nfsd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nfsd->nd_cr)); + if (kauth_cred_ismember_gid(nfsd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nfsd->nd_cr)); + } + + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, &context, nxo, 0); + + /* construct ACL and handle inheritance */ if (!error) { - vp = nd.ni_vp; - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = kauth_acl_inherit(dvp, + NULL, + &xacl, /* isdir */ + 1, + &context); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); + } + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + + /* validate new-file security information */ + if (!error) { + error = vnode_authattr_new(dvp, vap, 0, &context); + if (error && (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid))) { + /* + * Most NFS servers just ignore the UID/GID attributes, so we + * try ignoring them if that'll help the request succeed. + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = vnode_authattr_new(dvp, vap, 0, &context); + } + } + + if (!error) + error = VNOP_MKDIR(dvp, &vp, &nd.ni_cnd, vap, &context); + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, &context); + + if (xacl != NULL) + kauth_acl_free(xacl); + + if (!error) { + error = nfsrv_vptofh(nx, !v3, NULL, vp, &context, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postat, v3); + error = vnode_getattr(vp, &postat, &context); + } + vnode_put(vp); + vp = NULL; } + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(dvp); out: + nd.ni_cnd.cn_nameiop = 0; + if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); + nfsm_reply(NFSX_SRVFH(v3, &nfh) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + nfsm_srvpostop_fh(&nfh); + nfsm_srvpostop_attr(0, &postat); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { - nfsm_srvfhtom(fhp, v3); + nfsm_srvfhtom(&nfh, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + nfsm_srvfillattr(&postat, fp); } return (0); nfsmout: + if (nd.ni_cnd.cn_nameiop) { + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + vnode_put(dvp); + if (vp) + vnode_put(vp); + } if (dirp) - vrele(dirp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); + vnode_put(dirp); return (error); } @@ -2494,110 +3293,120 @@ int nfsrv_rmdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register long t1; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; + int error = 0, len; + int dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - struct vattr dirfor, diraft; - nfsfh_t nfh; - fhandle_t *fhp; + mbuf_t mb, mreq; + vnode_t vp, dvp, dirp = NULL; + struct vnode_attr dirfor, diraft; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; struct nameidata nd; - u_quad_t frev; + struct vfs_context context; + + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + vp = dvp = NULL; + nfsm_srvmtofh(&nfh); + nfsm_srvnamesiz(len, v3); - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + error = nfsm_path_mbuftond(&md, &dpos, v3, FALSE, &len, &nd); + if (!error) + error = nfs_namei(nfsd, &context, &nd, &nfh, nam, FALSE, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (v3) { + nfsm_srv_pre_vattr_init(&dirfor, v3); + dirfor_ret = vnode_getattr(dirp, &dirfor, &context); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - vrele(dirp); + vnode_put(dirp); return (0); } + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp->v_type != VDIR) { + + if (vnode_vtype(vp) != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ - if (nd.ni_dvp == vp) { + if (dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ - if (vp->v_flag & VROOT) + if (vnode_isvroot(vp)) error = EBUSY; + if (!error) + error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, &context, nxo, 0); + if (!error) + error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, &context); out: - if (!error) { - nqsrv_getl(nd.ni_dvp, ND_WRITE); - nqsrv_getl(vp, ND_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); - } + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); + if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&diraft, v3); + diraft_ret = vnode_getattr(dirp, &diraft, &context); + vnode_put(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } - nfsm_srvdone; +nfsmout: + return (error); } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR - * - calls VOP_READDIR() + * - calls VNOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. - * - it only knows that it has encountered eof when the VOP_READDIR() + * - it only knows that it has encountered eof when the VNOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. - * NB: It is tempting to set eof to true if the VOP_READDIR() reads less + * NB: It is tempting to set eof to true if the VNOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } @@ -2608,49 +3417,44 @@ out: * the EOF flag. For readdirplus, the maxcount is the same, and the * dircount includes all that except for the entry attributes and handles. */ -struct flrep { - nfsuint64 fl_off; - u_long fl_postopok; - u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; - u_long fl_fhok; - u_long fl_fhsize; - u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; -}; int nfsrv_readdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register char *bp, *be; - register struct mbuf *mp; - register struct dirent *dp; - register caddr_t cp; - register u_long *tl; - register long t1; + char *bp, *be; + mbuf_t mp; + struct direntry *dp; + caddr_t cp; + u_long *tl; + long t1; caddr_t bpos; - struct mbuf *mb, *mb2, *mreq, *mp2; + mbuf_t mb, mb2, mreq, mp2; char *cpos, *cend, *cp2, *rbuf; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io; - struct iovec iv; + vnode_t vp; + struct vnode_attr at; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; - int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies = 0; + int siz, count, fullsiz, eofflag, nentries = 0; int v3 = (nfsd->nd_flag & ND_NFSV3); - u_quad_t frev, off, toff, verf; - u_long *cookies = NULL, *cookiep; + u_quad_t off, toff, verf; + nfsuint64 tquad; + int vnopflag; + struct vfs_context context; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF; + + nfsm_srvmtofh(&nfh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); @@ -2662,99 +3466,88 @@ nfsrv_readdir(nfsd, slp, procp, mrq) toff = fxdr_unsigned(u_quad_t, *tl++); } off = toff; - cnt = fxdr_unsigned(int, *tl); - siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + count = fxdr_unsigned(int, *tl); + siz = ((count + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } - nqsrv_getl(vp, ND_READ); + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; if (v3) { - error = getret = VOP_GETATTR(vp, &at, cred, procp); + nfsm_srv_vattr_init(&at, v3); + error = getret = vnode_getattr(vp, &at, &context); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; } if (!error) - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, &context, nxo, 0); if (error) { - vput(vp); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } - VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); -again: - iv.iov_base = rbuf; - iv.iov_len = fullsiz; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_offset = (off_t)off; - io.uio_resid = fullsiz; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; - eofflag = 0; - - if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; - } - if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { - FREE((caddr_t)rbuf, M_TEMP); + if (!rbuf) { + error = ENOMEM; + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } - error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); - off = (off_t)io.uio_offset; - /* - * We cannot set the error in the case where there are no cookies - * and no error, only, as FreeBSD. In the scenario the client is - * calling us back being told there were "more" entries on last readdir - * return, and we have no more entries, our VOP_READDIR can give - * cookies = NULL and no error. This is due to a zero size to MALLOC - * returning NULL unlike FreeBSD which returns a pointer. - * With FreeBSD it makes sense if the MALLOC failed and you get in that - * bind. For us, we need something more. Thus, we should make sure we - * had some cookies to return, but no pointer and no error for EPERM case. - * Otherwise, go thru normal processing of sending back the eofflag. This check - * is also legit on first call to the routine by client since . and .. - * should be returned. Make same change to nfsrv_readdirplus. - */ - if ((ncookies != 0) && !cookies && !error) - error = NFSERR_PERM; - + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + error = ENOMEM; + FREE(rbuf, M_TEMP); + vnode_put(vp); + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, &at); + return (0); + } +again: + uio_reset(auio, off, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz); + + eofflag = 0; + error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, &context); + off = uio_offset(auio); + if (v3) { - getret = VOP_GETATTR(vp, &at, cred, procp); + nfsm_srv_vattr_init(&at, v3); + getret = vnode_getattr(vp, &at, &context); if (!error) error = getret; } - VOP_UNLOCK(vp, 0, procp); if (error) { - vrele(vp); - _FREE((caddr_t)rbuf, M_TEMP); - if (cookies) - _FREE((caddr_t)cookies, M_TEMP); + vnode_put(vp); + FREE(rbuf, M_TEMP); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } - if (io.uio_resid) { - siz -= io.uio_resid; + if (uio_resid(auio) != 0) { + // LP64todo - fix this + siz -= uio_resid(auio); /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - vrele(vp); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { @@ -2766,8 +3559,7 @@ again: nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; *tl = nfs_true; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); + FREE(rbuf, M_TEMP); return (0); } } @@ -2778,32 +3570,19 @@ again: */ cpos = rbuf; cend = rbuf + siz; - dp = (struct dirent *)cpos; - cookiep = cookies; -#ifdef __FreeBSD__ - /* - * For some reason FreeBSD's ufs_readdir() chooses to back the - * directory offset up to a block boundary, so it is necessary to - * skip over the records that preceed the requested offset. This - * requires the assumption that file offset cookies monotonically - * increase. - */ - while (cpos < cend && ncookies > 0 && - (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { -#else - while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { -#endif + dp = (struct direntry *)cpos; + while (dp->d_fileno == 0 && cpos < cend && nentries > 0) { cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - if (cpos >= cend || ncookies == 0) { + if (cpos >= cend || nentries == 0) { toff = off; siz = fullsiz; goto again; } + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; @@ -2814,36 +3593,42 @@ again: len = 2 * NFSX_UNSIGNED; mp = mp2 = mb; bp = bpos; - be = bp + M_TRAILINGSPACE(mp); + be = bp + mbuf_trailingspace(mp); /* Loop through the records and build reply */ - while (cpos < cend && ncookies > 0) { + while (cpos < cend && nentries > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; + if (!v3 && (nlen > NFS_MAXNAMLEN)) + nlen = NFS_MAXNAMLEN; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); if (v3) len += 2 * NFSX_UNSIGNED; - if (len > cnt) { + if (len > count) { eofflag = 0; break; } /* * Build the directory record xdr from - * the dirent entry. + * the direntry entry. */ nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; + nfsm_clget; if (v3) { + txdr_hyper(&dp->d_fileno, &tquad); + *tl = tquad.nfsuquad[0]; + bp += NFSX_UNSIGNED; nfsm_clget; - *tl = 0; + *tl = tquad.nfsuquad[1]; + bp += NFSX_UNSIGNED; + } else { + *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; } nfsm_clget; - *tl = txdr_unsigned(dp->d_fileno); - bp += NFSX_UNSIGNED; - nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; @@ -2865,23 +3650,25 @@ again: /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; - nfsm_clget; - /* Finish off the record */ + /* Finish off the record with the cookie */ + nfsm_clget; if (v3) { - *tl = 0; + txdr_hyper(&dp->d_seekoff, &tquad); + *tl = tquad.nfsuquad[0]; bp += NFSX_UNSIGNED; nfsm_clget; + *tl = tquad.nfsuquad[1]; + bp += NFSX_UNSIGNED; + } else { + *tl = txdr_unsigned(dp->d_seekoff); + bp += NFSX_UNSIGNED; } - *tl = txdr_unsigned(*cookiep); - bp += NFSX_UNSIGNED; } cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -2893,135 +3680,150 @@ again: bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) - mp->m_len = bp - mtod(mp, caddr_t); + mbuf_setlen(mp, bp - (char*)mbuf_data(mp)); } else - mp->m_len += bp - bpos; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); - nfsm_srvdone; + mbuf_setlen(mp, mbuf_len(mp) + (bp - bpos)); + FREE(rbuf, M_TEMP); +nfsmout: + return (error); } +struct flrep { + nfsuint64 fl_off; + u_long fl_postopok; + u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; + u_long fl_fhok; + u_long fl_fhsize; + u_long fl_nfh[NFSX_V3FHMAX / sizeof (u_long)]; +}; + int nfsrv_readdirplus(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register char *bp, *be; - register struct mbuf *mp; - register struct dirent *dp; - register caddr_t cp; - register u_long *tl; - register long t1; + char *bp, *be; + mbuf_t mp; + struct direntry *dp; + caddr_t cp; + u_long *tl; + long t1; caddr_t bpos; - struct mbuf *mb, *mb2, *mreq, *mp2; + mbuf_t mb, mb2, mreq, mp2; char *cpos, *cend, *cp2, *rbuf; - struct vnode *vp, *nvp; + vnode_t vp, nvp; struct flrep fl; - nfsfh_t nfh; - fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; - struct uio io; - struct iovec iv; - struct vattr va, at, *vap = &va; + struct nfs_filehandle dnfh, *nfhp = (struct nfs_filehandle *)&fl.fl_fhsize; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + struct vnode_attr va, at, *vap = &va; struct nfs_fattr *fp; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; - int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies = 0; - u_quad_t frev, off, toff, verf; - u_long *cookies = NULL, *cookiep; - void *file; + int siz, count, fullsiz, eofflag, dirlen, nentries = 0, isdotdot; + u_quad_t off, toff, verf; + nfsuint64 tquad; + int vnopflag; + struct vfs_context context; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF; + vp = NULL; + nfsm_srvmtofh(&dnfh); nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; siz = fxdr_unsigned(int, *tl++); - cnt = fxdr_unsigned(int, *tl); + count = fxdr_unsigned(int, *tl); off = toff; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + if ((error = nfsrv_fhtovp(&dnfh, nam, TRUE, &vp, &nx, &nxo))) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } - error = getret = VOP_GETATTR(vp, &at, cred, procp); + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + nfsm_srv_vattr_init(&at, 1); + error = getret = vnode_getattr(vp, &at, &context); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; - if (!error) { - nqsrv_getl(vp, ND_READ); - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); - } + if (!error) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, &context, nxo, 0); if (error) { - vput(vp); + vnode_put(vp); + vp = NULL; nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } - VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + if (!rbuf) { + error = ENOMEM; + vnode_put(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + error = ENOMEM; + FREE(rbuf, M_TEMP); + vnode_put(vp); + vp = NULL; + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + return (0); + } again: - iv.iov_base = rbuf; - iv.iov_len = fullsiz; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_offset = (off_t)off; - io.uio_resid = fullsiz; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; + uio_reset(auio, off, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz); eofflag = 0; - if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; - } - if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { - FREE((caddr_t)rbuf, M_TEMP); - nfsm_reply(NFSX_V3POSTOPATTR); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); - off = (u_quad_t)io.uio_offset; - getret = VOP_GETATTR(vp, &at, cred, procp); - VOP_UNLOCK(vp, 0, procp); - /* - * See nfsrv_readdir comment above on this - */ - if ((ncookies != 0) && !cookies && !error) - error = NFSERR_PERM; + error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, &context); + off = uio_offset(auio); + nfsm_srv_vattr_init(&at, 1); + getret = vnode_getattr(vp, &at, &context); if (!error) error = getret; if (error) { - vrele(vp); - if (cookies) - _FREE((caddr_t)cookies, M_TEMP); - _FREE((caddr_t)rbuf, M_TEMP); + vnode_put(vp); + vp = NULL; + FREE(rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } - if (io.uio_resid) { - siz -= io.uio_resid; + if (uio_resid(auio) != 0) { + // LP64todo - fix this + siz -= uio_resid(auio); /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - vrele(vp); + vnode_put(vp); + vp = NULL; nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); @@ -3030,8 +3832,7 @@ again: tl += 2; *tl++ = nfs_false; *tl = nfs_true; - FREE((caddr_t)cookies, M_TEMP); - FREE((caddr_t)rbuf, M_TEMP); + FREE(rbuf, M_TEMP); return (0); } } @@ -3042,27 +3843,13 @@ again: */ cpos = rbuf; cend = rbuf + siz; - dp = (struct dirent *)cpos; - cookiep = cookies; -#ifdef __FreeBSD__ - /* - * For some reason FreeBSD's ufs_readdir() chooses to back the - * directory offset up to a block boundary, so it is necessary to - * skip over the records that preceed the requested offset. This - * requires the assumption that file offset cookies monotonically - * increase. - */ - while (cpos < cend && ncookies > 0 && - (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { -#else - while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { -#endif + dp = (struct direntry *)cpos; + while (dp->d_fileno == 0 && cpos < cend && nentries > 0) { cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - if (cpos >= cend || ncookies == 0) { + if (cpos >= cend || nentries == 0) { toff = off; siz = fullsiz; goto again; @@ -3070,70 +3857,56 @@ again: /* * Probe one of the directory entries to see if the filesystem - * supports VGET. See later comment for VFS_VGET changes. + * supports VGET. */ - if (vp->v_tag == VT_UFS) - file = (void *) dp->d_fileno; - else { - file = &dp->d_fileno; - } - - if (error = VFS_VGET(vp->v_mount, file, &nvp)) { - if (error == EOPNOTSUPP) /* let others get passed back */ + if ((error = VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, &context))) { + if (error == ENOTSUP) /* let others get passed back */ error = NFSERR_NOTSUPP; - vrele(vp); - _FREE((caddr_t)cookies, M_TEMP); - _FREE((caddr_t)rbuf, M_TEMP); + vnode_put(vp); + vp = NULL; + FREE(rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } - vput(nvp); + vnode_put(nvp); dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; - nfsm_reply(cnt); + nfsm_reply(count); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); mp = mp2 = mb; bp = bpos; - be = bp + M_TRAILINGSPACE(mp); + be = bp + mbuf_trailingspace(mp); /* Loop through the records and build reply */ - while (cpos < cend && ncookies > 0) { + while (cpos < cend && nentries > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; /* * Got to get the vnode for lookup per entry. - * HFS+/volfs and others use address of file identifier to VGET - * UFS, nullfs, umapfs use inode (u_int32_t) - * until they are consistent, we must differentiate now. - * UFS is the only one of the latter class that is exported. - * Note this will be pulled out as we resolve the VGET issue - * of which it should use u_in32_t or addresses. */ - - if (vp->v_tag == VT_UFS) - file = (void *) dp->d_fileno; - else - file = &dp->d_fileno; - - if (VFS_VGET(vp->v_mount, file, &nvp)) + if (VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, &context)) goto invalid; - bzero((caddr_t)nfhp, NFSX_V3FH); - nfhp->fh_fsid = - nvp->v_mount->mnt_stat.f_fsid; - if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { - vput(nvp); + isdotdot = ((dp->d_namlen == 2) && + (dp->d_name[0] == '.') && (dp->d_name[1] == '.')); + if (nfsrv_vptofh(nx, 0, (isdotdot ? &dnfh : NULL), nvp, &context, nfhp)) { + // XXX file handle is optional, so we should be able to + // XXX return this entry without the file handle + vnode_put(nvp); goto invalid; } - if (VOP_GETATTR(nvp, vap, cred, procp)) { - vput(nvp); + nfsm_srv_vattr_init(vap, 1); + if (vnode_getattr(nvp, vap, &context)) { + // XXX attributes are optional, so we should be able to + // XXX return this entry without the attributes + vnode_put(nvp); goto invalid; } - vput(nvp); + vnode_put(nvp); /* * If either the dircount or maxcount will be @@ -3141,35 +3914,37 @@ again: * are calculated conservatively, including all * XDR overheads. */ - len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + + len += (8 * NFSX_UNSIGNED + nlen + rem + nfhp->nfh_len + NFSX_V3POSTOPATTR); dirlen += (6 * NFSX_UNSIGNED + nlen + rem); - if (len > cnt || dirlen > fullsiz) { + if (len > count || dirlen > fullsiz) { eofflag = 0; break; } /* * Build the directory record xdr from - * the dirent entry. + * the direntry entry. */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); - fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); + fl.fl_fhsize = txdr_unsigned(nfhp->nfh_len); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; - fl.fl_off.nfsuquad[0] = 0; - fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); + txdr_hyper(&dp->d_seekoff, &fl.fl_off); nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; + nfsm_clget; - *tl = 0; + txdr_hyper(&dp->d_fileno, &tquad); + *tl = tquad.nfsuquad[0]; bp += NFSX_UNSIGNED; nfsm_clget; - *tl = txdr_unsigned(dp->d_fileno); + *tl = tquad.nfsuquad[1]; bp += NFSX_UNSIGNED; + nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; @@ -3196,7 +3971,7 @@ again: /* * Now copy the flrep structure out. */ - xfer = sizeof (struct flrep); + xfer = sizeof(struct flrep) - sizeof(fl.fl_nfh) + fl.fl_fhsize; cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; @@ -3213,11 +3988,11 @@ again: } invalid: cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - vrele(vp); + vnode_put(vp); + vp = NULL; nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; @@ -3229,12 +4004,14 @@ invalid: bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) - mp->m_len = bp - mtod(mp, caddr_t); + mbuf_setlen(mp, bp - (char*)mbuf_data(mp)); } else - mp->m_len += bp - bpos; - FREE((caddr_t)cookies, M_TEMP); - FREE((caddr_t)rbuf, M_TEMP); - nfsm_srvdone; + mbuf_setlen(mp, mbuf_len(mp) + (bp - bpos)); + FREE(rbuf, M_TEMP); +nfsmout: + if (vp) + vnode_put(vp); + return (error); } /* @@ -3244,63 +4021,66 @@ int nfsrv_commit(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr bfor, aft; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; + struct vnode_attr bfor, aft; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; + int error = 0, for_ret = 1, aft_ret = 1, count; char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev, off; - int didhold; + mbuf_t mb, mb2, mreq; + u_quad_t off; + struct vfs_context context; -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); + nfsm_srvmtofh(&nfh); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); /* - * XXX At this time VOP_FSYNC() does not accept offset and byte + * XXX At this time VNOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ fxdr_hyper(tl, &off); tl += 2; - cnt = fxdr_unsigned(int, *tl); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + count = fxdr_unsigned(int, *tl); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); return (0); } - for_ret = VOP_GETATTR(vp, &bfor, cred, procp); - didhold = ubc_hold(vp); - error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); - aft_ret = VOP_GETATTR(vp, &aft, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(2 * NFSX_UNSIGNED); + nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); + return (0); + } + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + nfsm_srv_pre_vattr_init(&bfor, 1); + for_ret = vnode_getattr(vp, &bfor, &context); + error = VNOP_FSYNC(vp, MNT_WAIT, &context); + nfsm_srv_vattr_init(&aft, 1); + aft_ret = vnode_getattr(vp, &aft, &context); + vnode_put(vp); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); + *tl++ = txdr_unsigned(boottime_sec()); + *tl = txdr_unsigned(0); } else return (0); - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -3310,44 +4090,55 @@ int nfsrv_statfs(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct statfs *sf; - register struct nfs_statfs *sfp; - register u_long *tl; - register long t1; + struct vfs_attr va; + struct nfs_statfs *sfp; + u_long *tl; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, getret = 1; + int error = 0, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - struct statfs statfs; - u_quad_t frev, tval; + mbuf_t mb, mb2, mreq; + vnode_t vp; + struct vnode_attr at; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + u_quad_t tval; + off_t blksize; + struct vfs_context context; -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + nfsm_srvmtofh(&nfh); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } - sf = &statfs; - error = VFS_STATFS(vp->v_mount, sf, procp); - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + VFSATTR_INIT(&va); + VFSATTR_WANTED(&va, f_blocks); + VFSATTR_WANTED(&va, f_bavail); + VFSATTR_WANTED(&va, f_files); + VFSATTR_WANTED(&va, f_ffree); + error = vfs_getattr(vnode_mount(vp), &va, &context); + blksize = vnode_mount(vp)->mnt_vfsstat.f_bsize; + nfsm_srv_vattr_init(&at, v3); + getret = vnode_getattr(vp, &at, &context); + vnode_put(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); @@ -3355,30 +4146,25 @@ nfsrv_statfs(nfsd, slp, procp, mrq) return (0); nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); if (v3) { - tval = (u_quad_t)(unsigned long)sf->f_blocks; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; + tval = (u_quad_t)(va.f_blocks * blksize); txdr_hyper(&tval, &sfp->sf_tbytes); - tval = (u_quad_t)(unsigned long)sf->f_bfree; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; + tval = (u_quad_t)(va.f_bfree * blksize); txdr_hyper(&tval, &sfp->sf_fbytes); - tval = (u_quad_t)(unsigned long)sf->f_bavail; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; + tval = (u_quad_t)(va.f_bavail * blksize); txdr_hyper(&tval, &sfp->sf_abytes); - sfp->sf_tfiles.nfsuquad[0] = 0; - sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); - sfp->sf_ffiles.nfsuquad[0] = 0; - sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); - sfp->sf_afiles.nfsuquad[0] = 0; - sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); + txdr_hyper(&va.f_files, &sfp->sf_tfiles); + txdr_hyper(&va.f_ffree, &sfp->sf_ffiles); + txdr_hyper(&va.f_ffree, &sfp->sf_afiles); sfp->sf_invarsec = 0; } else { sfp->sf_tsize = txdr_unsigned(NFS_V2MAXDATA); - sfp->sf_bsize = txdr_unsigned(sf->f_bsize); - sfp->sf_blocks = txdr_unsigned(sf->f_blocks); - sfp->sf_bfree = txdr_unsigned(sf->f_bfree); - sfp->sf_bavail = txdr_unsigned(sf->f_bavail); + sfp->sf_bsize = txdr_unsigned((unsigned)blksize); + sfp->sf_blocks = txdr_unsigned((unsigned)va.f_blocks); + sfp->sf_bfree = txdr_unsigned((unsigned)va.f_bfree); + sfp->sf_bavail = txdr_unsigned((unsigned)va.f_bavail); } - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -3388,39 +4174,44 @@ int nfsrv_fsinfo(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register struct nfsv3_fsinfo *sip; - register long t1; + u_long *tl; + struct nfsv3_fsinfo *sip; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, getret = 1, pref, max; + int error = 0, getret = 1, prefsize, maxsize; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; + mbuf_t mb, mb2, mreq; + vnode_t vp; + struct vnode_attr at; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context context; -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + nfsm_srvmtofh(&nfh); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + nfsm_srv_vattr_init(&at, 1); + getret = vnode_getattr(vp, &at, &context); + vnode_put(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); @@ -3430,17 +4221,18 @@ nfsrv_fsinfo(nfsd, slp, procp, mrq) * There should be file system VFS OP(s) to get this information. * For now, assume our usual NFS defaults. */ - if (slp->ns_so->so_type == SOCK_DGRAM) - max = pref = NFS_MAXDGRAMDATA; - else - max = pref = NFS_MAXDATA; - sip->fs_rtmax = txdr_unsigned(max); - sip->fs_rtpref = txdr_unsigned(pref); + if (slp->ns_sotype == SOCK_DGRAM) { + maxsize = NFS_MAXDGRAMDATA; + prefsize = NFS_PREFDGRAMDATA; + } else + maxsize = prefsize = NFS_MAXDATA; + sip->fs_rtmax = txdr_unsigned(maxsize); + sip->fs_rtpref = txdr_unsigned(prefsize); sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); - sip->fs_wtmax = txdr_unsigned(max); - sip->fs_wtpref = txdr_unsigned(pref); + sip->fs_wtmax = txdr_unsigned(maxsize); + sip->fs_wtpref = txdr_unsigned(prefsize); sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); - sip->fs_dtpref = txdr_unsigned(pref); + sip->fs_dtpref = txdr_unsigned(prefsize); sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; sip->fs_timedelta.nfsv3_sec = 0; @@ -3448,7 +4240,8 @@ nfsrv_fsinfo(nfsd, slp, procp, mrq) sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -3458,51 +4251,56 @@ int nfsrv_pathconf(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; + proc_t procp; + mbuf_t *mrq; { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; + mbuf_t mrep = nfsd->nd_mrep, md = nfsd->nd_md; + mbuf_t nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register struct nfsv3_pathconf *pc; - register long t1; + u_long *tl; + struct nfsv3_pathconf *pc; + long t1; caddr_t bpos; - int error = 0, rdonly, cache, getret = 1, linkmax, namemax; + int error = 0, getret = 1, linkmax, namemax; int chownres, notrunc, case_sensitive, case_preserving; char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; + mbuf_t mb, mb2, mreq; + vnode_t vp; + struct vnode_attr at; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct vfs_context context; -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { + nfsm_srvmtofh(&nfh); + if ((error = nfsrv_fhtovp(&nfh, nam, TRUE, &vp, &nx, &nxo))) { + nfsm_reply(NFSX_UNSIGNED); + nfsm_srvpostop_attr(getret, &at); + return (0); + } + if ((error = nfsrv_credcheck(nfsd, nx, nxo))) { + vnode_put(vp); nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } - error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); + context.vc_proc = procp; + context.vc_ucred = nfsd->nd_cr; + + error = VNOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax, &context); if (!error) - error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); + error = VNOP_PATHCONF(vp, _PC_NAME_MAX, &namemax, &context); if (!error) - error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); + error = VNOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres, &context); if (!error) - error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); + error = VNOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc, &context); if (!error) - error = VOP_PATHCONF(vp, _PC_CASE_SENSITIVE, &case_sensitive); + error = VNOP_PATHCONF(vp, _PC_CASE_SENSITIVE, &case_sensitive, &context); if (!error) - error = VOP_PATHCONF(vp, _PC_CASE_PRESERVING, &case_preserving); - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); + error = VNOP_PATHCONF(vp, _PC_CASE_PRESERVING, &case_preserving, &context); + nfsm_srv_vattr_init(&at, 1); + getret = vnode_getattr(vp, &at, &context); + vnode_put(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) @@ -3516,7 +4314,8 @@ nfsrv_pathconf(nfsd, slp, procp, mrq) pc->pc_caseinsensitive = txdr_unsigned(!case_sensitive); pc->pc_casepreserving = txdr_unsigned(case_preserving); - nfsm_srvdone; +nfsmout: + return (error); } /* @@ -3524,22 +4323,19 @@ nfsrv_pathconf(nfsd, slp, procp, mrq) */ /* ARGSUSED */ int -nfsrv_null(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_null( + struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + __unused proc_t procp, + mbuf_t *mrq) { - struct mbuf *mrep = nfsd->nd_mrep; + mbuf_t mrep = nfsd->nd_mrep; caddr_t bpos; - int error = NFSERR_RETVOID, cache; - struct mbuf *mb, *mreq; - u_quad_t frev; + int error = NFSERR_RETVOID; + mbuf_t mb, mreq; -#ifndef nolint - cache = 0; -#endif nfsm_reply(0); +nfsmout: return (0); } @@ -3548,83 +4344,79 @@ nfsrv_null(nfsd, slp, procp, mrq) */ /* ARGSUSED */ int -nfsrv_noop(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_noop( + struct nfsrv_descript *nfsd, + struct nfssvc_sock *slp, + __unused proc_t procp, + mbuf_t *mrq) { - struct mbuf *mrep = nfsd->nd_mrep; + mbuf_t mrep = nfsd->nd_mrep; caddr_t bpos; - int error, cache; - struct mbuf *mb, *mreq; - u_quad_t frev; + int error; + mbuf_t mb, mreq; -#ifndef nolint - cache = 0; -#endif if (nfsd->nd_repstat) error = nfsd->nd_repstat; else error = EPROCUNAVAIL; nfsm_reply(0); +nfsmout: return (0); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use - * vn_writechk() and VOP_ACCESS() for two reasons. + * vnode_authorize() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. - - * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS() + * + * The exception to rule 2 is EPERM. If a file is IMMUTABLE, vnode_authorize() * will return EPERM instead of EACCESS. EPERM is always an error. */ static int -nfsrv_access(vp, flags, cred, rdonly, p, override) - register struct vnode *vp; - int flags; - register struct ucred *cred; - int rdonly; - struct proc *p; - int override; +nfsrv_authorize( + vnode_t vp, + vnode_t dvp, + kauth_action_t action, + vfs_context_t context, + struct nfs_export_options *nxo, + int override) { - struct vattr vattr; + struct vnode_attr vattr; int error; - if (flags & VWRITE) { - /* Just vn_writechk() changed to check rdonly */ + + if (action & KAUTH_VNODE_WRITE_RIGHTS) { /* - * Disallow write attempts on read-only file systems; + * Disallow write attempts on read-only exports; * unless the file is a socket or a block or character * device resident on the file system. */ - if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { - switch (vp->v_type) { + if (nxo->nxo_flags & NX_READONLY) { + switch (vnode_vtype(vp)) { case VREG: case VDIR: case VLNK: case VCPLX: return (EROFS); + default: + break; } } - /* - * If there's shared text associated with - * the inode, we can't allow writing. - */ - if (vp->v_flag & VTEXT) - return (ETXTBSY); } - if ((error = VOP_GETATTR(vp, &vattr, cred, p))) - return (error); - error = VOP_ACCESS(vp, flags, cred, p); + error = vnode_authorize(vp, dvp, action, context); /* * Allow certain operations for the owner (reads and writes * on files that are already open). Picking up from FreeBSD. */ - if (override && error == EACCES && cred->cr_uid == vattr.va_uid) - error = 0; + if (override && (error == EACCES)) { + VATTR_INIT(&vattr); + VATTR_WANTED(&vattr, va_uid); + if ((vnode_getattr(vp, &vattr, context) == 0) && + (kauth_cred_getuid(vfs_context_ucred(context)) == vattr.va_uid)) + error = 0; + } return error; } #endif /* NFS_NOSERVER */ diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index ff2f55066..3f36830fa 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,18 +66,19 @@ #include #include #include -#include +#include +#include #include -#include +#include #include #include #include #include #include -#include #include #include -#include +#include +#include #include #include @@ -96,7 +97,6 @@ #include #include #include -#include #include @@ -110,9 +110,6 @@ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ (int)(B), (int)(C), (int)(D), (int)(E), 0) -#define TRUE 1 -#define FALSE 0 - /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc @@ -138,8 +135,7 @@ extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, rpc_auth_kerb; -extern u_long nfs_prog, nqnfs_prog; -extern time_t nqnfsstarttime; +extern u_long nfs_prog; extern struct nfsstats nfsstats; extern int nfsv3_procid[NFS_NPROCS]; extern int nfs_ticks; @@ -154,8 +150,7 @@ extern u_long nfs_xidwrap; * 4 - write */ static int proct[NFS_NPROCS] = { - 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, - 0, 0, 0, + 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0 }; /* @@ -177,27 +172,23 @@ static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; -static int nfs_msg __P((struct proc *, const char *, const char *, int)); -static int nfs_rcvlock __P((struct nfsreq *)); -static void nfs_rcvunlock __P((struct nfsreq *)); -static int nfs_receive __P((struct nfsreq *rep, struct mbuf **aname, - struct mbuf **mp)); -static int nfs_reconnect __P((struct nfsreq *rep)); -static void nfs_repbusy(struct nfsreq *rep); -static struct nfsreq * nfs_repnext(struct nfsreq *rep); +static int nfs_rcvlock(struct nfsreq *); +static void nfs_rcvunlock(struct nfsreq *); +static int nfs_receive(struct nfsreq *rep, mbuf_t *mp); +static int nfs_reconnect(struct nfsreq *rep); static void nfs_repdequeue(struct nfsreq *rep); /* XXX */ boolean_t current_thread_aborted(void); -kern_return_t thread_terminate(thread_act_t); +kern_return_t thread_terminate(thread_t); #ifndef NFS_NOSERVER -static int nfsrv_getstream __P((struct nfssvc_sock *,int)); +static int nfsrv_getstream(struct nfssvc_sock *,int); -int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, +int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, - struct proc *procp, - struct mbuf **mreqp)) = { + proc_t procp, + mbuf_t *mreqp) = { nfsrv_null, nfsrv_getattr, nfsrv_setattr, @@ -220,137 +211,10 @@ int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, nfsrv_fsinfo, nfsrv_pathconf, nfsrv_commit, - nqnfsrv_getlease, - nqnfsrv_vacated, - nfsrv_noop, nfsrv_noop }; #endif /* NFS_NOSERVER */ -/* - * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG) - * But some of this code may prove useful someday... - */ -#undef NFSDIAG -#if NFSDIAG -int nfstraceindx = 0; -struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}}; - -#define NFSTRACESUSPENDERS -#ifdef NFSTRACESUSPENDERS -uint nfstracemask = 0xfff00200; -int nfstracexid = -1; -uint onfstracemask = 0; -int nfstracesuspend = -1; -#define NFSTRACE_SUSPEND \ - { \ - if (nfstracemask) { \ - onfstracemask = nfstracemask; \ - nfstracemask = 0; \ - } \ - } -#define NFSTRACE_RESUME \ - { \ - nfstracesuspend = -1; \ - if (!nfstracemask) \ - nfstracemask = onfstracemask; \ - } -#define NFSTRACE_STARTSUSPENDCOUNTDOWN \ - { \ - nfstracesuspend = (nfstraceindx+100) % NFSTBUFSIZ; \ - } -#define NFSTRACE_SUSPENDING (nfstracesuspend != -1) -#define NFSTRACE_SUSPENSEOVER \ - (nfstracesuspend > 100 ? \ - (nfstraceindx >= nfstracesuspend || \ - nfstraceindx < nfstracesuspend - 100) : \ - (nfstraceindx >= nfstracesuspend && \ - nfstraceindx < nfstracesuspend + 8192 - 100)) -#else -uint nfstracemask = 0; -#endif /* NFSTRACESUSPENDERS */ - -int nfsprnttimo = 1; - -int nfsodata[1024]; -int nfsoprocnum, nfsolen; -int nfsbt[32], nfsbtlen; - -#if defined(__ppc__) -int -backtrace(int *where, int size) -{ - int register sp, *fp, numsaved; - - __asm__ volatile("mr %0,r1" : "=r" (sp)); - - fp = (int *)*((int *)sp); - size /= sizeof(int); - for (numsaved = 0; numsaved < size; numsaved++) { - *where++ = fp[2]; - if ((int)fp <= 0) - break; - fp = (int *)*fp; - } - return (numsaved); -} -#elif defined(__i386__) -int -backtrace() -{ - return (0); /* Till someone implements a real routine */ -} -#else -#error architecture not implemented. -#endif - -void -nfsdup(struct nfsreq *rep) -{ - int *ip, i, first = 1, end; - char *s, b[240]; - struct mbuf *mb; - - if ((nfs_debug & NFS_DEBUG_DUP) == 0) - return; - /* last mbuf in chain will be nfs content */ - for (mb = rep->r_mreq; mb->m_next; mb = mb->m_next) - ; - if (rep->r_procnum == nfsoprocnum && mb->m_len == nfsolen && - !bcmp((caddr_t)nfsodata, mb->m_data, nfsolen)) { - s = b + sprintf(b, "nfsdup x=%x p=%d h=", rep->r_xid, - rep->r_procnum); - end = (int)(VTONFS(rep->r_vp)->n_fhp); - ip = (int *)(end & ~3); - end += VTONFS(rep->r_vp)->n_fhsize; - while ((int)ip < end) { - i = *ip++; - if (first) { /* avoid leading zeroes */ - if (i == 0) - continue; - first = 0; - s += sprintf(s, "%x", i); - } else - s += sprintf(s, "%08x", i); - } - if (first) - sprintf(s, "%x", 0); - else /* eliminate trailing zeroes */ - while (*--s == '0') - *s = 0; - /* - * set a breakpoint here and you can view the - * current backtrace and the one saved in nfsbt - */ - kprintf("%s\n", b); - } - nfsoprocnum = rep->r_procnum; - nfsolen = mb->m_len; - bcopy(mb->m_data, (caddr_t)nfsodata, mb->m_len); - nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt)); -} -#endif /* NFSDIAG */ - /* * attempt to bind a socket to a reserved port @@ -358,7 +222,7 @@ nfsdup(struct nfsreq *rep) static int nfs_bind_resv(struct nfsmount *nmp) { - struct socket *so = nmp->nm_so; + socket_t so = nmp->nm_so; struct sockaddr_in sin; int error; u_short tport; @@ -372,7 +236,7 @@ nfs_bind_resv(struct nfsmount *nmp) tport = IPPORT_RESERVED - 1; sin.sin_port = htons(tport); - while (((error = sobind(so, (struct sockaddr *) &sin)) == EADDRINUSE) && + while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) && (--tport > IPPORT_RESERVED / 2)) sin.sin_port = htons(tport); return (error); @@ -385,7 +249,10 @@ int nfs_resv_mounts = 0; static int nfs_bind_resv_thread_state = 0; #define NFS_BIND_RESV_THREAD_STATE_INITTED 1 #define NFS_BIND_RESV_THREAD_STATE_RUNNING 2 -static struct slock nfs_bind_resv_slock; +lck_grp_t *nfs_bind_resv_lck_grp; +lck_grp_attr_t *nfs_bind_resv_lck_grp_attr; +lck_attr_t *nfs_bind_resv_lck_attr; +lck_mtx_t *nfs_bind_resv_mutex; struct nfs_bind_resv_request { TAILQ_ENTRY(nfs_bind_resv_request) brr_chain; struct nfsmount *brr_nmp; @@ -400,28 +267,25 @@ static void nfs_bind_resv_thread(void) { struct nfs_bind_resv_request *brreq; - boolean_t funnel_state; - funnel_state = thread_funnel_set(network_flock, TRUE); nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_RUNNING; while (nfs_resv_mounts > 0) { - simple_lock(&nfs_bind_resv_slock); + lck_mtx_lock(nfs_bind_resv_mutex); while ((brreq = TAILQ_FIRST(&nfs_bind_resv_request_queue))) { TAILQ_REMOVE(&nfs_bind_resv_request_queue, brreq, brr_chain); - simple_unlock(&nfs_bind_resv_slock); + lck_mtx_unlock(nfs_bind_resv_mutex); brreq->brr_error = nfs_bind_resv(brreq->brr_nmp); wakeup(brreq); - simple_lock(&nfs_bind_resv_slock); + lck_mtx_lock(nfs_bind_resv_mutex); } - simple_unlock(&nfs_bind_resv_slock); - (void)tsleep((caddr_t)&nfs_bind_resv_request_queue, PSOCK, + msleep((caddr_t)&nfs_bind_resv_request_queue, + nfs_bind_resv_mutex, PSOCK | PDROP, "nfs_bind_resv_request_queue", 0); } nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED; - (void) thread_funnel_set(network_flock, funnel_state); - (void) thread_terminate(current_act()); + (void) thread_terminate(current_thread()); } int @@ -445,7 +309,11 @@ nfs_bind_resv_nopriv(struct nfsmount *nmp) if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_RUNNING) { if (nfs_bind_resv_thread_state < NFS_BIND_RESV_THREAD_STATE_INITTED) { - simple_lock_init(&nfs_bind_resv_slock); + nfs_bind_resv_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfs_bind_resv_lck_grp_attr); + nfs_bind_resv_lck_grp = lck_grp_alloc_init("nfs_bind_resv", nfs_bind_resv_lck_grp_attr); + nfs_bind_resv_lck_attr = lck_attr_alloc_init(); + nfs_bind_resv_mutex = lck_mtx_alloc_init(nfs_bind_resv_lck_grp, nfs_bind_resv_lck_attr); TAILQ_INIT(&nfs_bind_resv_request_queue); nfs_bind_resv_thread_state = NFS_BIND_RESV_THREAD_STATE_INITTED; } @@ -456,9 +324,9 @@ nfs_bind_resv_nopriv(struct nfsmount *nmp) brreq.brr_nmp = nmp; brreq.brr_error = 0; - simple_lock(&nfs_bind_resv_slock); + lck_mtx_lock(nfs_bind_resv_mutex); TAILQ_INSERT_TAIL(&nfs_bind_resv_request_queue, &brreq, brr_chain); - simple_unlock(&nfs_bind_resv_slock); + lck_mtx_unlock(nfs_bind_resv_mutex); error = nfs_bind_resv_thread_wake(); if (error) { @@ -467,7 +335,7 @@ nfs_bind_resv_nopriv(struct nfsmount *nmp) return (error); } - (void) tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0); + tsleep((caddr_t)&brreq, PSOCK, "nfsbindresv", 0); return (brreq.brr_error); } @@ -477,30 +345,29 @@ nfs_bind_resv_nopriv(struct nfsmount *nmp) * We do not free the sockaddr if error. */ int -nfs_connect(nmp, rep) - struct nfsmount *nmp; - struct nfsreq *rep; +nfs_connect( + struct nfsmount *nmp, + __unused struct nfsreq *rep) { - struct socket *so; - int s, error, rcvreserve, sndreserve; + socket_t so; + int error, rcvreserve, sndreserve; struct sockaddr *saddr; + struct timeval timeo; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - nmp->nm_so = (struct socket *)0; - saddr = mtod(nmp->nm_nam, struct sockaddr *); - error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, - nmp->nm_soproto); + nmp->nm_so = 0; + saddr = mbuf_data(nmp->nm_nam); + error = sock_socket(saddr->sa_family, nmp->nm_sotype, + nmp->nm_soproto, 0, 0, &nmp->nm_so); if (error) { goto bad; } so = nmp->nm_so; - nmp->nm_soflags = so->so_proto->pr_flags; /* * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { - struct proc *p; + proc_t p; /* * sobind() requires current_proc() to have superuser privs. * If this bind is part of a reconnect, and the current proc @@ -508,7 +375,7 @@ nfs_connect(nmp, rep) * a kernel thread to process. */ if ((nmp->nm_state & NFSSTA_MOUNTED) && - (p = current_proc()) && suser(p->p_ucred, &p->p_acflag)) { + (p = current_proc()) && suser(kauth_cred_get(), 0)) { /* request nfs_bind_resv_thread() to do bind */ error = nfs_bind_resv_nopriv(nmp); } else { @@ -523,51 +390,40 @@ nfs_connect(nmp, rep) * unconnected for servers that reply from a port other than NFS_PORT. */ if (nmp->nm_flag & NFSMNT_NOCONN) { - if (nmp->nm_soflags & PR_CONNREQUIRED) { + if (nmp->nm_sotype == SOCK_STREAM) { error = ENOTCONN; goto bad; } } else { - error = soconnect(so, mtod(nmp->nm_nam, struct sockaddr *)); - if (error) { + struct timeval tv; + tv.tv_sec = 2; + tv.tv_usec = 0; + error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT); + if (error && error != EINPROGRESS) { goto bad; } - - /* - * Wait for the connection to complete. Cribbed from the - * connect system call but with the wait timing out so - * that interruptible mounts don't hang here for a long time. - */ - s = splnet(); - while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - (void) tsleep((caddr_t)&so->so_timeo, PSOCK, - "nfscon", 2 * hz); - if ((so->so_state & SS_ISCONNECTING) && - so->so_error == 0 && rep && - (error = nfs_sigintr(nmp, rep, rep->r_procp))) { - so->so_state &= ~SS_ISCONNECTING; - splx(s); + + while ((error = sock_connectwait(so, &tv)) == EINPROGRESS) { + if (rep && (error = nfs_sigintr(nmp, rep, rep->r_procp))) { goto bad; } } - if (so->so_error) { - error = so->so_error; - so->so_error = 0; - splx(s); - goto bad; - } - splx(s); } + /* * Always time out on recieve, this allows us to reconnect the * socket to deal with network changes. */ - so->so_rcv.sb_timeo = (2 * hz); + timeo.tv_usec = 0; + timeo.tv_sec = 2; + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { - so->so_snd.sb_timeo = (5 * hz); + timeo.tv_sec = 5; } else { - so->so_snd.sb_timeo = 0; + timeo.tv_sec = 0; } + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + if (nmp->nm_sotype == SOCK_DGRAM) { sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 3; rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * @@ -577,34 +433,18 @@ nfs_connect(nmp, rep) rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * (nmp->nm_readahead > 0 ? nmp->nm_readahead + 1 : 2); } else { + int proto; + int on = 1; + + sock_gettype(so, NULL, NULL, &proto); if (nmp->nm_sotype != SOCK_STREAM) panic("nfscon sotype"); - if (so->so_proto->pr_flags & PR_CONNREQUIRED) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); - } - if (so->so_proto->pr_protocol == IPPROTO_TCP) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + // Assume that SOCK_STREAM always requires a connection + sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); + + if (proto == IPPROTO_TCP) { + sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); } sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) * 3; @@ -616,14 +456,16 @@ nfs_connect(nmp, rep) sndreserve = NFS_MAXSOCKBUF; if (rcvreserve > NFS_MAXSOCKBUF) rcvreserve = NFS_MAXSOCKBUF; - error = soreserve(so, sndreserve, rcvreserve); + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &sndreserve, sizeof(sndreserve)); + if (error) { + goto bad; + } + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &rcvreserve, sizeof(rcvreserve)); if (error) { goto bad; } - so->so_rcv.sb_flags |= SB_NOINTR; - so->so_snd.sb_flags |= SB_NOINTR; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + sock_nointerrupt(so, 1); /* Initialize other non-zero congestion variables */ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = @@ -637,7 +479,6 @@ nfs_connect(nmp, rep) return (0); bad: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); nfs_disconnect(nmp); return (error); } @@ -652,11 +493,10 @@ bad: * nb: Must be called with the nfs_sndlock() set on the mount point. */ static int -nfs_reconnect(rep) - register struct nfsreq *rep; +nfs_reconnect(struct nfsreq *rep) { - register struct nfsreq *rp; - register struct nfsmount *nmp = rep->r_nmp; + struct nfsreq *rp; + struct nfsmount *nmp = rep->r_nmp; int error; nfs_disconnect(nmp); @@ -665,8 +505,9 @@ nfs_reconnect(rep) return (EINTR); if (error == EIO) return (EIO); - nfs_down(rep, rep->r_nmp, rep->r_procp, "can not connect", - error, NFSSTA_TIMEO); + nfs_down(rep->r_nmp, rep->r_procp, error, NFSSTA_TIMEO, + "can not connect"); + rep->r_flags |= R_TPRINTFMSG; if (!(nmp->nm_state & NFSSTA_MOUNTED)) { /* we're not yet completely mounted and */ /* we can't reconnect, so we fail */ @@ -674,10 +515,9 @@ nfs_reconnect(rep) } if ((error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp))) return (error); - (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); + tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } - NFS_DPF(DUP, ("nfs_reconnect RESEND\n")); /* * Loop through outstanding request list and fix up all requests * on old socket. @@ -693,19 +533,16 @@ nfs_reconnect(rep) * NFS disconnect. Clean up and unlink. */ void -nfs_disconnect(nmp) - register struct nfsmount *nmp; +nfs_disconnect(struct nfsmount *nmp) { - register struct socket *so; + socket_t so; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); if (nmp->nm_so) { so = nmp->nm_so; - nmp->nm_so = (struct socket *)0; - soshutdown(so, 2); - soclose(so); + nmp->nm_so = 0; + sock_shutdown(so, 2); + sock_close(so); } - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); } /* @@ -723,65 +560,61 @@ nfs_disconnect(nmp) */ int nfs_send(so, nam, top, rep) - register struct socket *so; - struct mbuf *nam; - register struct mbuf *top; + socket_t so; + mbuf_t nam; + mbuf_t top; struct nfsreq *rep; { struct sockaddr *sendnam; - int error, error2, soflags, flags; - int xidqueued = 0; + int error, error2, sotype, flags; + u_long xidqueued = 0; struct nfsreq *rp; - char savenametolog[MNAMELEN]; + char savenametolog[MAXPATHLEN]; + struct msghdr msg; if (rep) { error = nfs_sigintr(rep->r_nmp, rep, rep->r_procp); if (error) { - m_freem(top); + mbuf_freem(top); return (error); } if ((so = rep->r_nmp->nm_so) == NULL) { rep->r_flags |= R_MUSTRESEND; - m_freem(top); + mbuf_freem(top); return (0); } rep->r_flags &= ~R_MUSTRESEND; - soflags = rep->r_nmp->nm_soflags; TAILQ_FOREACH(rp, &nfs_reqq, r_chain) if (rp == rep) break; if (rp) xidqueued = rp->r_xid; - } else - soflags = so->so_proto->pr_flags; - if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED) || + } + sock_gettype(so, NULL, &sotype, NULL); + if ((sotype == SOCK_STREAM) || (sock_isconnected(so)) || (nam == 0)) sendnam = (struct sockaddr *)0; else - sendnam = mtod(nam, struct sockaddr *); + sendnam = mbuf_data(nam); - if (so->so_type == SOCK_SEQPACKET) + if (sotype == SOCK_SEQPACKET) flags = MSG_EOR; else flags = 0; -#if NFSDIAG - if (rep) - nfsdup(rep); -#endif /* - * Save the name here in case mount point goes away when we switch - * funnels. The name is using local stack and is large, but don't + * Save the name here in case mount point goes away if we block. + * The name is using local stack and is large, but don't * want to block if we malloc. */ if (rep) strncpy(savenametolog, - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname, - MNAMELEN); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error = sosend(so, sendnam, (struct uio *)0, top, - (struct mbuf *)0, flags); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname, + MAXPATHLEN - 1); + bzero(&msg, sizeof(msg)); + msg.msg_name = (caddr_t)sendnam; + msg.msg_namelen = sendnam == 0 ? 0 : sendnam->sa_len; + error = sock_sendmbuf(so, &msg, top, flags, NULL); if (error) { if (rep) { @@ -803,8 +636,6 @@ nfs_send(so, nam, top, rep) error = error2; } else { rep->r_flags |= R_MUSTRESEND; - NFS_DPF(DUP, - ("nfs_send RESEND error=%d\n", error)); } } else log(LOG_INFO, "nfsd send error %d\n", error); @@ -830,29 +661,22 @@ nfs_send(so, nam, top, rep) * we have read any of it, even if the system call has been interrupted. */ static int -nfs_receive(rep, aname, mp) - register struct nfsreq *rep; - struct mbuf **aname; - struct mbuf **mp; +nfs_receive(struct nfsreq *rep, mbuf_t *mp) { - register struct socket *so; - struct uio auio; - struct iovec aio; - register struct mbuf *m; - struct mbuf *control; - u_long len; - struct sockaddr **getnam; - struct sockaddr *tmp_nam; - struct mbuf *mhck; - struct sockaddr_in *sin; - int error, error2, sotype, rcvflg; - struct proc *p = current_proc(); /* XXX */ + socket_t so; + struct iovec_32 aio; + mbuf_t m, mlast; + u_long len, fraglen; + int error, error2, sotype; + proc_t p = current_proc(); /* XXX */ + struct msghdr msg; + size_t rcvlen; + int lastfragment; /* * Set up arguments for soreceive() */ - *mp = (struct mbuf *)0; - *aname = (struct mbuf *)0; + *mp = NULL; sotype = rep->r_nmp->nm_sotype; /* @@ -893,12 +717,11 @@ tryagain: goto tryagain; } while (rep->r_flags & R_MUSTRESEND) { - m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); - nfsstats.rpcretries++; - NFS_DPF(DUP, - ("nfs_receive RESEND %s\n", - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname)); - error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); + error = mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m); + if (!error) { + OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries); + error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); + } /* * we also hold rcv lock so rep is still * legit this point @@ -914,127 +737,115 @@ tryagain: } nfs_sndunlock(rep); if (sotype == SOCK_STREAM) { - aio.iov_base = (caddr_t) &len; - aio.iov_len = sizeof(u_long); - auio.uio_iov = &aio; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_offset = 0; - auio.uio_resid = sizeof(u_long); - auio.uio_procp = p; - do { - rcvflg = MSG_WAITALL; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - error = soreceive(so, (struct sockaddr **)0, &auio, - (struct mbuf **)0, (struct mbuf **)0, &rcvflg); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (!rep->r_nmp) /* if unmounted then bailout */ - goto shutout; - if (error == EWOULDBLOCK && rep) { - error2 = nfs_sigintr(rep->r_nmp, rep, p); - if (error2) - error = error2; - } - } while (error == EWOULDBLOCK); - if (!error && auio.uio_resid > 0) { - log(LOG_INFO, - "short receive (%d/%d) from nfs server %s\n", - sizeof(u_long) - auio.uio_resid, - sizeof(u_long), - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); - error = EPIPE; - } - if (error) - goto errout; - len = ntohl(len) & ~0x80000000; - /* - * This is SERIOUS! We are out of sync with the sender - * and forcing a disconnect/reconnect is all I can do. - */ - if (len > NFS_MAXPACKET) { - log(LOG_ERR, "%s (%d) from nfs server %s\n", - "impossible packet length", - len, - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); - error = EFBIG; - goto errout; - } - auio.uio_resid = len; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - do { - rcvflg = MSG_WAITALL; - error = soreceive(so, (struct sockaddr **)0, - &auio, mp, (struct mbuf **)0, &rcvflg); - if (!rep->r_nmp) /* if unmounted then bailout */ { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - goto shutout; - } - } while (error == EWOULDBLOCK || error == EINTR || - error == ERESTART); - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + error = 0; + len = 0; + lastfragment = 0; + mlast = NULL; + while (!error && !lastfragment) { + aio.iov_base = (uintptr_t) &fraglen; + aio.iov_len = sizeof(u_long); + bzero(&msg, sizeof(msg)); + msg.msg_iov = (struct iovec *) &aio; + msg.msg_iovlen = 1; + do { + error = sock_receive(so, &msg, MSG_WAITALL, &rcvlen); + if (!rep->r_nmp) /* if unmounted then bailout */ + goto shutout; + if (error == EWOULDBLOCK && rep) { + error2 = nfs_sigintr(rep->r_nmp, rep, p); + if (error2) + error = error2; + } + } while (error == EWOULDBLOCK); + if (!error && rcvlen < aio.iov_len) { + /* only log a message if we got a partial word */ + if (rcvlen != 0) + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + rcvlen, sizeof(u_long), + vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname); + error = EPIPE; + } + if (error) + goto errout; + lastfragment = ntohl(fraglen) & 0x80000000; + fraglen = ntohl(fraglen) & ~0x80000000; + len += fraglen; + /* + * This is SERIOUS! We are out of sync with the sender + * and forcing a disconnect/reconnect is all I can do. + */ + if (len > NFS_MAXPACKET) { + log(LOG_ERR, "%s (%d) from nfs server %s\n", + "impossible RPC record length", len, + vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname); + error = EFBIG; + goto errout; + } - if (!error && auio.uio_resid > 0) { - log(LOG_INFO, - "short receive (%d/%d) from nfs server %s\n", - len - auio.uio_resid, len, - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); - error = EPIPE; + m = NULL; + do { + rcvlen = fraglen; + error = sock_receivembuf(so, NULL, &m, MSG_WAITALL, &rcvlen); + if (!rep->r_nmp) /* if unmounted then bailout */ { + goto shutout; + } + } while (error == EWOULDBLOCK || error == EINTR || + error == ERESTART); + + if (!error && fraglen > rcvlen) { + log(LOG_INFO, + "short receive (%d/%d) from nfs server %s\n", + rcvlen, fraglen, + vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname); + error = EPIPE; + mbuf_freem(m); + } + if (!error) { + if (!*mp) { + *mp = m; + mlast = m; + } else { + error = mbuf_setnext(mlast, m); + if (error) { + printf("nfs_receive: mbuf_setnext failed %d\n", error); + mbuf_freem(m); + } + } + while (mbuf_next(mlast)) + mlast = mbuf_next(mlast); + } } } else { - /* - * NB: Since uio_resid is big, MSG_WAITALL is ignored - * and soreceive() will return when it has either a - * control msg or a data msg. - * We have no use for control msg., but must grab them - * and then throw them away so we know what is going - * on. - */ - auio.uio_resid = len = 100000000; /* Anything Big */ - auio.uio_procp = p; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + bzero(&msg, sizeof(msg)); do { - control = NULL; - rcvflg = 0; - error = soreceive(so, (struct sockaddr **)0, - &auio, mp, &control, &rcvflg); - if (control) - m_freem(control); + rcvlen = 100000000; + error = sock_receivembuf(so, &msg, mp, 0, &rcvlen); if (!rep->r_nmp) /* if unmounted then bailout */ { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); goto shutout; } if (error == EWOULDBLOCK && rep) { error2 = nfs_sigintr(rep->r_nmp, rep, p); if (error2) { - thread_funnel_switch(NETWORK_FUNNEL, - KERNEL_FUNNEL); return (error2); } } - } while (error == EWOULDBLOCK || - (!error && *mp == NULL && control)); - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + } while (error == EWOULDBLOCK); - if ((rcvflg & MSG_EOR) == 0) + if ((msg.msg_flags & MSG_EOR) == 0) printf("Egad!!\n"); if (!error && *mp == NULL) error = EPIPE; - len -= auio.uio_resid; + len = rcvlen; } errout: if (error && error != EINTR && error != ERESTART) { - m_freem(*mp); - *mp = (struct mbuf *)0; + mbuf_freem(*mp); + *mp = NULL; if (error != EPIPE) log(LOG_INFO, - "receive error %d from nfs server %s\n", - error, - rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); + "receive error %d from nfs server %s\n", error, + vfs_statfs(rep->r_nmp->nm_mountp)->f_mntfromname); error = nfs_sndlock(rep); if (!error) { error = nfs_reconnect(rep); @@ -1060,35 +871,18 @@ errout: return (ENXIO); so = rep->r_nmp->nm_so; } - if (so->so_state & SS_ISCONNECTED) - getnam = (struct sockaddr **)0; - else - getnam = &tmp_nam;; - auio.uio_resid = len = 1000000; - auio.uio_procp = p; - - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + bzero(&msg, sizeof(msg)); + len = 0; do { - rcvflg = 0; - error = soreceive(so, getnam, &auio, mp, - (struct mbuf **)0, &rcvflg); - - if ((getnam) && (*getnam)) { - MGET(mhck, M_WAIT, MT_SONAME); - mhck->m_len = (*getnam)->sa_len; - sin = mtod(mhck, struct sockaddr_in *); - bcopy(*getnam, sin, sizeof(struct sockaddr_in)); - mhck->m_hdr.mh_len = sizeof(struct sockaddr_in); - FREE(*getnam, M_SONAME); - *aname = mhck; - } + rcvlen = 1000000; + error = sock_receivembuf(so, &msg, mp, 0, &rcvlen); if (!rep->r_nmp) /* if unmounted then bailout */ - goto dgramout; + goto shutout; if (error) { error2 = nfs_sigintr(rep->r_nmp, rep, p); if (error2) { error = error2; - goto dgramout; + goto shutout; } } /* Reconnect for all errors. We may be receiving @@ -1099,8 +893,6 @@ errout: * although TCP doesn't seem to. */ if (error) { - thread_funnel_switch(NETWORK_FUNNEL, - KERNEL_FUNNEL); error2 = nfs_sndlock(rep); if (!error2) { error2 = nfs_reconnect(rep); @@ -1114,19 +906,13 @@ errout: } else { error = error2; } - thread_funnel_switch(KERNEL_FUNNEL, - NETWORK_FUNNEL); } } while (error == EWOULDBLOCK); - -dgramout: - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - len -= auio.uio_resid; } shutout: if (error) { - m_freem(*mp); - *mp = (struct mbuf *)0; + mbuf_freem(*mp); + *mp = NULL; } return (error); } @@ -1141,11 +927,10 @@ int nfs_reply(myrep) struct nfsreq *myrep; { - register struct nfsreq *rep; - register struct nfsmount *nmp = myrep->r_nmp; - register long t1; - struct mbuf *mrep, *md; - struct mbuf *nam; + struct nfsreq *rep; + struct nfsmount *nmp = myrep->r_nmp; + long t1; + mbuf_t mrep, md; u_long rxid, *tl; caddr_t dpos, cp2; int error; @@ -1185,15 +970,14 @@ nfs_reply(myrep) * Get the next Rpc reply off the socket. Assume myrep->r_nmp * is still intact by checks done in nfs_rcvlock. */ - /* XXX why do we ask for nam here? we don't use it! */ - error = nfs_receive(myrep, &nam, &mrep); - if (nam) - m_freem(nam); + error = nfs_receive(myrep, &mrep); /* * Bailout asap if nfsmount struct gone (unmounted). */ if (!myrep->r_nmp) { FSDBG(530, myrep->r_xid, myrep, nmp, -2); + if (mrep) + mbuf_freem(mrep); return (ENXIO); } if (error) { @@ -1201,25 +985,31 @@ nfs_reply(myrep) nfs_rcvunlock(myrep); /* Bailout asap if nfsmount struct gone (unmounted). */ - if (!myrep->r_nmp) + if (!myrep->r_nmp) { + if (mrep) + mbuf_freem(mrep); return (ENXIO); + } /* * Ignore routing errors on connectionless protocols?? */ - if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { - if (nmp->nm_so) - nmp->nm_so->so_error = 0; - if (myrep->r_flags & R_GETONEREP) - return (0); + if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) { + if (nmp->nm_so) { + int clearerror; + int optlen = sizeof(clearerror); + sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); + } continue; } + if (mrep) + mbuf_freem(mrep); return (error); } /* * We assume all is fine, but if we did not have an error - * and mrep is 0, better not dereference it. nfs_receieve + * and mrep is 0, better not dereference it. nfs_receive * calls soreceive which carefully sets error=0 when it got * errors on sbwait (tsleep). In most cases, I assume that's * so we could go back again. In tcp case, EPIPE is returned. @@ -1240,27 +1030,15 @@ nfs_reply(myrep) * Get the xid and check that it is an rpc reply */ md = mrep; - dpos = mtod(md, caddr_t); + dpos = mbuf_data(md); nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); rxid = *tl++; if (*tl != rpc_reply) { -#ifndef NFS_NOSERVER - if (nmp->nm_flag & NFSMNT_NQNFS) { - if (nqnfs_callback(nmp, mrep, md, dpos)) - nfsstats.rpcinvalid++; - } else { - nfsstats.rpcinvalid++; - m_freem(mrep); - } -#else - nfsstats.rpcinvalid++; - m_freem(mrep); -#endif + OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid); + mbuf_freem(mrep); nfsmout: if (nmp->nm_state & NFSSTA_RCVLOCK) nfs_rcvunlock(myrep); - if (myrep->r_flags & R_GETONEREP) - return (0); /* this path used by NQNFS */ continue; } @@ -1291,7 +1069,7 @@ nfsmout: panic("nfs_reply: proct[%d] is zero", rep->r_procnum); rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; - rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; + rt->fsid = vfs_statfs(nmp->nm_mountp)->f_fsid; microtime(&rt->tstamp); // XXX unused if (rep->r_flags & R_TIMING) rt->rtt = rep->r_rtt; @@ -1350,8 +1128,8 @@ nfsmout: * If it's mine, get out. */ if (rep == 0) { - nfsstats.rpcunexpected++; - m_freem(mrep); + OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected); + mbuf_freem(mrep); } else if (rep == myrep) { if (rep->r_mrep == NULL) panic("nfs_reply: nil r_mrep"); @@ -1359,8 +1137,6 @@ nfsmout: } FSDBG(530, myrep->r_xid, myrep, rep, rep ? rep->r_xid : myrep->r_flags); - if (myrep->r_flags & R_GETONEREP) - return (0); /* this path used by NQNFS */ } } @@ -1375,32 +1151,31 @@ nfsmout: * nb: always frees up mreq mbuf list */ int -nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) - struct vnode *vp; - struct mbuf *mrest; +nfs_request(vp, mp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) + vnode_t vp; + mount_t mp; + mbuf_t mrest; int procnum; - struct proc *procp; - struct ucred *cred; - struct mbuf **mrp; - struct mbuf **mdp; + proc_t procp; + kauth_cred_t cred; + mbuf_t *mrp; + mbuf_t *mdp; caddr_t *dposp; u_int64_t *xidp; { - register struct mbuf *m, *mrep, *m2; - register struct nfsreq *rep, *rp; - register u_long *tl; - register int i; + mbuf_t m, mrep, m2; + struct nfsreq re, *rep; + u_long *tl; + int i; struct nfsmount *nmp; - struct mbuf *md, *mheadend; - struct nfsnode *np; + mbuf_t md, mheadend; char nickv[RPCX_NICKVERF]; - time_t reqtime, waituntil; + time_t waituntil; caddr_t dpos, cp2; - int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; - int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; + int t1, error = 0, mrest_len, auth_len, auth_type; + int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; int verf_len, verf_type; u_long xid; - u_quad_t frev; char *auth_str, *verf_str; NFSKERBKEY_T key; /* save session key */ int nmsotype; @@ -1410,15 +1185,16 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) *mrp = NULL; if (xidp) *xidp = 0; + nmp = VFSTONFS(mp); - MALLOC_ZONE(rep, struct nfsreq *, - sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); + rep = &re; - nmp = VFSTONFS(vp->v_mount); + if (vp) + nmp = VFSTONFS(vnode_mount(vp)); if (nmp == NULL || (nmp->nm_state & (NFSSTA_FORCE|NFSSTA_TIMEO)) == (NFSSTA_FORCE|NFSSTA_TIMEO)) { - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); + mbuf_freem(mrest); return (ENXIO); } nmsotype = nmp->nm_sotype; @@ -1435,8 +1211,8 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) i = 0; m = mrest; while (m) { - i += m->m_len; - m = m->m_next; + i += mbuf_len(m); + m = mbuf_next(m); } mrest_len = i; @@ -1444,10 +1220,10 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) * Get the RPC header with authorization. */ kerbauth: - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; if (!nmp) { FSDBG_BOT(531, error, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); + mbuf_freem(mrest); return (ENXIO); } verf_str = auth_str = (char *)0; @@ -1458,24 +1234,20 @@ kerbauth: bzero((caddr_t)key, sizeof (key)); if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, &auth_len, verf_str, verf_len)) { - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; if (!nmp) { FSDBG_BOT(531, 2, vp, error, rep); - FREE_ZONE((caddr_t)rep, - sizeof (struct nfsreq), M_NFSREQ); - m_freem(mrest); + mbuf_freem(mrest); return (ENXIO); } error = nfs_getauth(nmp, rep, cred, &auth_str, &auth_len, verf_str, &verf_len, key); - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; if (!error && !nmp) error = ENXIO; if (error) { FSDBG_BOT(531, 2, vp, error, rep); - FREE_ZONE((caddr_t)rep, - sizeof (struct nfsreq), M_NFSREQ); - m_freem(mrest); + mbuf_freem(mrest); return (error); } } @@ -1487,25 +1259,35 @@ kerbauth: nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 5 * NFSX_UNSIGNED; } - m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, - auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); - if (xidp) - *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32); + error = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, + auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid, &m); if (auth_str) _FREE(auth_str, M_TEMP); + if (error) { + mbuf_freem(mrest); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); + return (error); + } + if (xidp) + *xidp = ntohl(xid) + ((u_int64_t)nfs_xidwrap << 32); /* * For stream protocols, insert a Sun RPC Record Mark. */ if (nmsotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | - (m->m_pkthdr.len - NFSX_UNSIGNED)); + error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); + if (error) { + mbuf_freem(m); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); + return (error); + } + *((u_long*)mbuf_data(m)) = + htonl(0x80000000 | (mbuf_pkthdr_len(m) - NFSX_UNSIGNED)); } rep->r_mreq = m; rep->r_xid = xid; tryagain: - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; if (nmp && (nmp->nm_flag & NFSMNT_SOFT)) rep->r_retry = nmp->nm_retry; else @@ -1520,18 +1302,13 @@ tryagain: /* * Do the client side RPC. */ - nfsstats.rpcrequests++; + OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests); /* * Chain request into list of outstanding requests. Be sure * to put it LAST so timer finds oldest requests first. */ - s = splsoftclock(); TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); - /* Get send time for nqnfs */ - microtime(&now); - reqtime = now.tv_sec; - /* * If backing off another request or avoiding congestion, don't * send this one now but let timer do it. If not timing a request, @@ -1540,9 +1317,8 @@ tryagain: if (nmp && nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { - int connrequired = (nmp->nm_soflags & PR_CONNREQUIRED); + int connrequired = (nmp->nm_sotype == SOCK_STREAM); - splx(s); if (connrequired) error = nfs_sndlock(rep); @@ -1558,19 +1334,19 @@ tryagain: rep->r_flags |= R_SENT; } - m2 = m_copym(m, 0, M_COPYALL, M_WAIT); - error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); + error = mbuf_copym(m, 0, MBUF_COPYALL, MBUF_WAITOK, &m2); + if (!error) + error = nfs_send(nmp->nm_so, nmp->nm_nam, m2, rep); if (connrequired) nfs_sndunlock(rep); } - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; if (error) { if (nmp) nmp->nm_sent -= NFS_CWNDSCALE; rep->r_flags &= ~R_SENT; } } else { - splx(s); rep->r_rtt = -1; } @@ -1585,7 +1361,7 @@ tryagain: */ nfs_repdequeue(rep); - nmp = VFSTONFS(vp->v_mount); + nmp = vp ? VFSTONFS(vnode_mount(vp)) : rep->r_nmp; /* * Decrement the outstanding request count. @@ -1603,16 +1379,16 @@ tryagain: * tprintf a response. */ if (!error) - nfs_up(rep, nmp, procp, "is alive again", NFSSTA_TIMEO); + nfs_up(nmp, procp, NFSSTA_TIMEO, + (rep->r_flags & R_TPRINTFMSG) ? "is alive again" : NULL); mrep = rep->r_mrep; md = rep->r_md; dpos = rep->r_dpos; if (!error && !nmp) error = ENXIO; if (error) { - m_freem(rep->r_mreq); + mbuf_freem(rep->r_mreq); FSDBG_BOT(531, error, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1626,18 +1402,19 @@ tryagain: else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { if (!failed_auth) { failed_auth++; - mheadend->m_next = (struct mbuf *)0; - m_freem(mrep); - m_freem(rep->r_mreq); - goto kerbauth; + error = mbuf_setnext(mheadend, NULL); + mbuf_freem(mrep); + mbuf_freem(rep->r_mreq); + if (!error) + goto kerbauth; + printf("nfs_request: mbuf_setnext failed\n"); } else error = EAUTH; } else error = EACCES; - m_freem(mrep); - m_freem(rep->r_mreq); + mbuf_freem(mrep); + mbuf_freem(rep->r_mreq); FSDBG_BOT(531, error, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1660,25 +1437,17 @@ tryagain: error = fxdr_unsigned(int, *tl); if ((nmp->nm_flag & NFSMNT_NFSV3) && error == NFSERR_TRYLATER) { - m_freem(mrep); + mbuf_freem(mrep); error = 0; microuptime(&now); waituntil = now.tv_sec + trylater_delay; - NFS_DPF(DUP, - ("nfs_request %s flag=%x trylater_cnt=%x waituntil=%lx trylater_delay=%x\n", - nmp->nm_mountp->mnt_stat.f_mntfromname, - nmp->nm_flag, trylater_cnt, waituntil, - trylater_delay)); while (now.tv_sec < waituntil) { - (void)tsleep((caddr_t)&lbolt, - PSOCK, "nqnfstry", 0); + tsleep((caddr_t)&lbolt, PSOCK, "nfstrylater", 0); microuptime(&now); } trylater_delay *= 2; if (trylater_delay > 60) trylater_delay = 60; - if (trylater_cnt < 7) - trylater_cnt++; goto tryagain; } @@ -1686,7 +1455,7 @@ tryagain: * If the File Handle was stale, invalidate the * lookup cache, just in case. */ - if (error == ESTALE) + if ((error == ESTALE) && vp) cache_purge(vp); if (nmp->nm_flag & NFSMNT_NFSV3) { *mrp = mrep; @@ -1694,49 +1463,26 @@ tryagain: *dposp = dpos; error |= NFSERR_RETERR; } else { - m_freem(mrep); + mbuf_freem(mrep); error &= ~NFSERR_RETERR; } - m_freem(rep->r_mreq); + mbuf_freem(rep->r_mreq); FSDBG_BOT(531, error, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, - sizeof (struct nfsreq), M_NFSREQ); return (error); } - /* - * For nqnfs, get any lease in reply - */ - if (nmp->nm_flag & NFSMNT_NQNFS) { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - if (*tl) { - np = VTONFS(vp); - nqlflag = fxdr_unsigned(int, *tl); - nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); - cachable = fxdr_unsigned(int, *tl++); - reqtime += fxdr_unsigned(int, *tl++); - microtime(&now); - if (reqtime > now.tv_sec) { - fxdr_hyper(tl, &frev); - nqnfs_clientlease(nmp, np, nqlflag, - cachable, reqtime, frev); - } - } - } *mrp = mrep; *mdp = md; *dposp = dpos; - m_freem(rep->r_mreq); + mbuf_freem(rep->r_mreq); FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (0); } - m_freem(mrep); + mbuf_freem(mrep); error = EPROTONOSUPPORT; nfsmout: - m_freem(rep->r_mreq); + mbuf_freem(rep->r_mreq); FSDBG_BOT(531, error, rep->r_xid, nmp, rep); - FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1746,36 +1492,47 @@ nfsmout: * siz arg. is used to decide if adding a cluster is worthwhile */ int -nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) +nfs_rephead(siz, nd, slp, err, mrq, mbp, bposp) int siz; struct nfsrv_descript *nd; struct nfssvc_sock *slp; int err; - int cache; - u_quad_t *frev; - struct mbuf **mrq; - struct mbuf **mbp; + mbuf_t *mrq; + mbuf_t *mbp; caddr_t *bposp; { - register u_long *tl; - register struct mbuf *mreq; + u_long *tl; + mbuf_t mreq; caddr_t bpos; - struct mbuf *mb, *mb2; + mbuf_t mb, mb2; + int error, mlen; - MGETHDR(mreq, M_WAIT, MT_DATA); - mb = mreq; /* * If this is a big reply, use a cluster else * try and leave leading space for the lower level headers. */ siz += RPC_REPLYSIZ; - if (siz >= MINCLSIZE) { - MCLGET(mreq, M_WAIT); - } else - mreq->m_data += max_hdr; - tl = mtod(mreq, u_long *); - mreq->m_len = 6 * NFSX_UNSIGNED; - bpos = ((caddr_t)tl) + mreq->m_len; + if (siz >= nfs_mbuf_minclsize) { + error = mbuf_getpacket(MBUF_WAITOK, &mreq); + } else { + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mreq); + } + if (error) { + /* unable to allocate packet */ + /* XXX nfsstat? */ + return (error); + } + mb = mreq; + tl = mbuf_data(mreq); + mlen = 6 * NFSX_UNSIGNED; + if (siz < nfs_mbuf_minclsize) { + /* leave space for lower level headers */ + tl += 80/sizeof(*tl); /* XXX max_hdr? XXX */ + mbuf_setdata(mreq, tl, mlen); + } else { + mbuf_setlen(mreq, mlen); + } + bpos = ((caddr_t)tl) + mlen; *tl++ = txdr_unsigned(nd->nd_retxid); *tl++ = rpc_reply; if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { @@ -1783,7 +1540,8 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) if (err & NFSERR_AUTHERR) { *tl++ = rpc_autherr; *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); - mreq->m_len -= NFSX_UNSIGNED; + mlen -= NFSX_UNSIGNED; + mbuf_setlen(mreq, mlen); bpos -= NFSX_UNSIGNED; } else { *tl++ = rpc_mismatch; @@ -1798,12 +1556,14 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) * verifier back, otherwise just RPCAUTH_NULL. */ if (nd->nd_flag & ND_KERBFULL) { - register struct nfsuid *nuidp; + struct nfsuid *nuidp; struct timeval ktvin, ktvout; + uid_t uid = kauth_cred_getuid(nd->nd_cr); - for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; + lck_rw_lock_shared(&slp->ns_rwlock); + for (nuidp = NUIDHASH(slp, uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && + if (kauth_cred_getuid(nuidp->nu_cr) == uid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) break; @@ -1827,11 +1587,12 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) *tl = ktvout.tv_sec; nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = ktvout.tv_usec; - *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); + *tl++ = txdr_unsigned(kauth_cred_getuid(nuidp->nu_cr)); } else { *tl++ = 0; *tl++ = 0; } + lck_rw_done(&slp->ns_rwlock); } else { *tl++ = 0; *tl++ = 0; @@ -1843,13 +1604,9 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) case EPROGMISMATCH: *tl = txdr_unsigned(RPC_PROGMISMATCH); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - if (nd->nd_flag & ND_NQNFS) { - *tl++ = txdr_unsigned(3); - *tl = txdr_unsigned(3); - } else { - *tl++ = txdr_unsigned(2); - *tl = txdr_unsigned(3); - } + // XXX hard coded versions + *tl++ = txdr_unsigned(2); + *tl = txdr_unsigned(3); break; case EPROCUNAVAIL: *tl = txdr_unsigned(RPC_PROCUNAVAIL); @@ -1867,30 +1624,16 @@ nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) *tl = 0; } break; - }; - } - - /* - * For nqnfs, piggyback lease as requested. - */ - if ((nd->nd_flag & ND_NQNFS) && err == 0) { - if (nd->nd_flag & ND_LEASE) { - nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); - *tl++ = txdr_unsigned(cache); - *tl++ = txdr_unsigned(nd->nd_duration); - txdr_hyper(frev, tl); - } else { - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = 0; } } + if (mrq != NULL) *mrq = mreq; *mbp = mb; *bposp = bpos; - if (err != 0 && err != NFSERR_RETVOID) - nfsstats.srvrpc_errs++; + if (err != 0 && err != NFSERR_RETVOID) { + OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs); + } return (0); } @@ -1918,8 +1661,7 @@ nfs_softterm(struct nfsreq *rep) } void -nfs_timer_funnel(arg) - void * arg; +nfs_timer_funnel(void * arg) { (void) thread_funnel_set(kernel_flock, TRUE); nfs_timer(arg); @@ -1930,25 +1672,22 @@ nfs_timer_funnel(arg) /* * Ensure rep isn't in use by the timer, then dequeue it. */ -void +static void nfs_repdequeue(struct nfsreq *rep) { - int s; while ((rep->r_flags & R_BUSY)) { rep->r_flags |= R_WAITING; tsleep(rep, PSOCK, "repdeq", 0); } - s = splsoftclock(); TAILQ_REMOVE(&nfs_reqq, rep, r_chain); - splx(s); } /* * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not * free()'d out from under it. */ -void +static void nfs_repbusy(struct nfsreq *rep) { @@ -1960,7 +1699,7 @@ nfs_repbusy(struct nfsreq *rep) /* * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. */ -struct nfsreq * +static struct nfsreq * nfs_repnext(struct nfsreq *rep) { struct nfsreq * nextrep; @@ -1991,55 +1730,27 @@ nfs_repnext(struct nfsreq *rep) * sure to set the r_retry field to 0 (implies nm_retry == 0). */ void -nfs_timer(arg) - void *arg; /* never used */ +nfs_timer(__unused void *arg) { - register struct nfsreq *rep; - register struct mbuf *m; - register struct socket *so; - register struct nfsmount *nmp; - register int timeo; - int s, error; + struct nfsreq *rep; + mbuf_t m; + socket_t so; + struct nfsmount *nmp; + int timeo; + int error; #ifndef NFS_NOSERVER - static long lasttime = 0; - register struct nfssvc_sock *slp; + struct nfssvc_sock *slp; u_quad_t cur_usec; #endif /* NFS_NOSERVER */ -#if NFSDIAG - int rttdiag; -#endif int flags, rexmit, cwnd, sent; u_long xid; struct timeval now; - s = splnet(); - /* - * XXX If preemptable threads are implemented the spls used for the - * outstanding request queue must be replaced with mutexes. - */ -#ifdef NFSTRACESUSPENDERS - if (NFSTRACE_SUSPENDING) { - TAILQ_FOREACH(rep, &nfs_reqq, r_chain) - if (rep->r_xid == nfstracexid) - break; - if (!rep) { - NFSTRACE_RESUME; - } else if (NFSTRACE_SUSPENSEOVER) { - NFSTRACE_SUSPEND; - } - } -#endif rep = TAILQ_FIRST(&nfs_reqq); if (rep != NULL) nfs_repbusy(rep); microuptime(&now); for ( ; rep != NULL ; rep = nfs_repnext(rep)) { -#ifdef NFSTRACESUSPENDERS - if (rep->r_mrep && !NFSTRACE_SUSPENDING) { - nfstracexid = rep->r_xid; - NFSTRACE_STARTSUSPENDCOUNTDOWN; - } -#endif nmp = rep->r_nmp; if (!nmp) /* unmounted */ continue; @@ -2051,12 +1762,13 @@ nfs_timer(arg) (rep->r_rexmit > 2 || (rep->r_flags & R_RESENDERR)) && rep->r_lastmsg + nmp->nm_tprintf_delay < now.tv_sec) { rep->r_lastmsg = now.tv_sec; - nfs_down(rep, rep->r_nmp, rep->r_procp, "not responding", - 0, NFSSTA_TIMEO); + nfs_down(rep->r_nmp, rep->r_procp, 0, NFSSTA_TIMEO, + "not responding"); + rep->r_flags |= R_TPRINTFMSG; if (!(nmp->nm_state & NFSSTA_MOUNTED)) { /* we're not yet completely mounted and */ /* we can't complete an RPC, so we fail */ - nfsstats.rpctimeouts++; + OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts); nfs_softterm(rep); continue; } @@ -2083,7 +1795,7 @@ nfs_timer(arg) * and never allow r_rexmit to be more than NFS_MAXREXMIT. */ if (rep->r_rexmit >= rep->r_retry) { /* too many */ - nfsstats.rpctimeouts++; + OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts); nfs_softterm(rep); continue; } @@ -2100,29 +1812,12 @@ nfs_timer(arg) * Resend it * Set r_rtt to -1 in case we fail to send it now. */ -#if NFSDIAG - rttdiag = rep->r_rtt; -#endif rep->r_rtt = -1; - if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && - ((nmp->nm_flag & NFSMNT_DUMBTIMR) || + if (((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || nmp->nm_sent < nmp->nm_cwnd) && - (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ - - struct proc *p = current_proc(); - -#if NFSDIAG - if (rep->r_flags & R_SENT && nfsprnttimo && - nmp->nm_timeouts >= nfsprnttimo) { - int t = proct[rep->r_procnum]; - if (t) - NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d A=%d D=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum, nmp->nm_srtt[t-1], nmp->nm_sdrtt[t-1])); - else - NFS_DPF(DUP, ("nfs_timer %s nmtm=%d tms=%d rtt=%d tm=%d p=%d\n", nmp->nm_mountp->mnt_stat.f_mntfromname, nmp->nm_timeo, nmp->nm_timeouts, rttdiag, timeo, rep->r_procnum)); - } - nfsdup(rep); -#endif /* NFSDIAG */ + (mbuf_copym(rep->r_mreq, 0, MBUF_COPYALL, MBUF_DONTWAIT, &m) == 0)){ + struct msghdr msg; /* * Iff first send, start timing * else turn timing off, backoff timer @@ -2143,61 +1838,61 @@ nfs_timer(arg) nmp->nm_cwnd >>= 1; if (nmp->nm_cwnd < NFS_CWNDSCALE) nmp->nm_cwnd = NFS_CWNDSCALE; - nfsstats.rpcretries++; + OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries); } else { rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - - if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) - error = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, 0, 0, p); - else - error = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, mtod(nmp->nm_nam, struct sockaddr *), 0, p); - - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + bzero(&msg, sizeof(msg)); + if ((nmp->nm_flag & NFSMNT_NOCONN) == NFSMNT_NOCONN) { + msg.msg_name = mbuf_data(nmp->nm_nam); + msg.msg_namelen = mbuf_len(nmp->nm_nam); + } + error = sock_sendmbuf(so, &msg, m, MSG_DONTWAIT, NULL); FSDBG(535, xid, error, sent, cwnd); if (error) { - if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) - so->so_error = 0; - rep->r_flags = flags | R_RESENDERR; - rep->r_rexmit = rexmit; - nmp->nm_cwnd = cwnd; - nmp->nm_sent = sent; - if (flags & R_SENT) - nfsstats.rpcretries--; + if (error == EWOULDBLOCK) { + rep->r_flags = flags; + rep->r_rexmit = rexmit; + nmp->nm_cwnd = cwnd; + nmp->nm_sent = sent; + rep->r_xid = xid; + } + else { + if (NFSIGNORE_SOERROR(nmp->nm_sotype, error)) { + int clearerror; + int optlen = sizeof(clearerror); + sock_getsockopt(nmp->nm_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); + } + rep->r_flags = flags | R_RESENDERR; + rep->r_rexmit = rexmit; + nmp->nm_cwnd = cwnd; + nmp->nm_sent = sent; + if (flags & R_SENT) + OSAddAtomic(-1, (SInt32*)&nfsstats.rpcretries); + } } else rep->r_rtt = 0; } } microuptime(&now); #ifndef NFS_NOSERVER - /* - * Call the nqnfs server timer once a second to handle leases. - */ - if (lasttime != now.tv_sec) { - lasttime = now.tv_sec; - nqnfs_serverd(); - } - /* * Scan the write gathering queues for writes that need to be * completed now. */ cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; + lck_mtx_lock(nfsd_mutex); TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { - if (LIST_FIRST(&slp->ns_tq) && - LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) + if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) nfsrv_wakenfsd(slp); } + lck_mtx_unlock(nfsd_mutex); #endif /* NFS_NOSERVER */ - splx(s); if (nfsbuffreeuptimestamp + 30 <= now.tv_sec) { /* @@ -2224,12 +1919,12 @@ int nfs_sigintr(nmp, rep, p) struct nfsmount *nmp; struct nfsreq *rep; - struct proc *p; + proc_t p; { - struct uthread *curr_td; sigset_t pending_sigs; int context_good = 0; struct nfsmount *repnmp; + extern proc_t kernproc; if (nmp == NULL) return (ENXIO); @@ -2249,7 +1944,7 @@ nfs_sigintr(nmp, rep, p) (NFSSTA_FORCE|NFSSTA_TIMEO)) return (EIO); /* Someone is unmounting us, go soft and mark it. */ - if ((repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT)) { + if (repnmp->nm_mountp->mnt_kern_flag & MNTK_FRCUNMOUNT) { repnmp->nm_flag |= NFSMNT_SOFT; nmp->nm_state |= NFSSTA_FORCE; } @@ -2257,7 +1952,7 @@ nfs_sigintr(nmp, rep, p) * If the mount is hung and we've requested not to hang * on remote filesystems, then bail now. */ - if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0 && + if (p != NULL && (proc_noremotehang(p)) != 0 && (repnmp->nm_state & NFSSTA_TIMEO) != 0) return (EIO); } @@ -2265,30 +1960,13 @@ nfs_sigintr(nmp, rep, p) if (p == NULL) return (0); - /* - * XXX: Since nfs doesn't have a good shot at getting the current - * thread we take a guess. (only struct proc * are passed to VOPs) - * What we do is look at the current thread, if it belongs to the - * passed in proc pointer then we have a "good/accurate" context - * and can make an accurate guess as to what to do. - * However if we have a bad context we have to make due with what - * is in the proc struct which may not be as up to date as we'd - * like. - * This is ok because the process will call us with the correct - * context after a short timeout while waiting for a response. - */ - curr_td = (struct uthread *)get_bsdthread_info(current_act()); - if (curr_td->uu_proc == p) - context_good = 1; - if (context_good && current_thread_aborted()) + /* Is this thread belongs to kernel task; then abort check is not needed */ + if ((current_proc() != kernproc) && current_thread_aborted()) { return (EINTR); + } /* mask off thread and process blocked signals. */ - if (context_good) - pending_sigs = curr_td->uu_siglist & ~curr_td->uu_sigmask; - else - pending_sigs = p->p_siglist; - /* mask off process level and NFS ignored signals. */ - pending_sigs &= ~p->p_sigignore & NFSINT_SIGMASK; + + pending_sigs = proc_pendingsignals(p, NFSINT_SIGMASK); if (pending_sigs && (nmp->nm_flag & NFSMNT_INT) != 0) return (EINTR); return (0); @@ -2304,8 +1982,8 @@ int nfs_sndlock(rep) struct nfsreq *rep; { - register int *statep; - struct proc *p; + int *statep; + proc_t p; int error, slpflag = 0, slptimeo = 0; if (rep->r_nmp == NULL) @@ -2320,10 +1998,9 @@ nfs_sndlock(rep) if (error) return (error); *statep |= NFSSTA_WANTSND; - if (p != NULL && (p->p_flag & P_NOREMOTEHANG) != 0) + if (p != NULL && (proc_noremotehang(p)) != 0) slptimeo = hz; - (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), - "nfsndlck", slptimeo); + tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsndlck", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; @@ -2346,7 +2023,7 @@ void nfs_sndunlock(rep) struct nfsreq *rep; { - register int *statep; + int *statep; if (rep->r_nmp == NULL) return; @@ -2361,10 +2038,9 @@ nfs_sndunlock(rep) } static int -nfs_rcvlock(rep) - register struct nfsreq *rep; +nfs_rcvlock(struct nfsreq *rep) { - register int *statep; + int *statep; int error, slpflag, slptimeo = 0; /* make sure we still have our mountpoint */ @@ -2398,10 +2074,9 @@ nfs_rcvlock(rep) * call nfs_sigintr periodically above. */ if (rep->r_procp != NULL && - (rep->r_procp->p_flag & P_NOREMOTEHANG) != 0) + (proc_noremotehang(rep->r_procp)) != 0) slptimeo = hz; - (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), - "nfsrcvlk", slptimeo); + tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; @@ -2417,7 +2092,7 @@ nfs_rcvlock(rep) } /* * nfs_reply will handle it if reply already arrived. - * (We may have slept or been preempted while on network funnel). + * (We may have slept or been preempted). */ FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *statep); *statep |= NFSSTA_RCVLOCK; @@ -2428,10 +2103,9 @@ nfs_rcvlock(rep) * Unlock the stream socket for others. */ static void -nfs_rcvunlock(rep) - register struct nfsreq *rep; +nfs_rcvunlock(struct nfsreq *rep) { - register int *statep; + int *statep; if (rep->r_nmp == NULL) return; @@ -2453,71 +2127,77 @@ nfs_rcvunlock(rep) * Socket upcall routine for the nfsd sockets. * The caddr_t arg is a pointer to the "struct nfssvc_sock". * Essentially do as much as possible non-blocking, else punt and it will - * be called with M_WAIT from an nfsd. - */ - /* - * Needs to run under network funnel + * be called with MBUF_WAITOK from an nfsd. */ void -nfsrv_rcv(so, arg, waitflag) - struct socket *so; - caddr_t arg; - int waitflag; +nfsrv_rcv(socket_t so, caddr_t arg, int waitflag) { - register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; - register struct mbuf *m; - struct mbuf *mp, *mhck; - struct sockaddr *nam; - struct uio auio; - int flags, ns_nflag=0, error; - struct sockaddr_in *sin; + struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; - if ((slp->ns_flag & SLP_VALID) == 0) + if (!nfs_numnfsd || !(slp->ns_flag & SLP_VALID)) return; + + lck_rw_lock_exclusive(&slp->ns_rwlock); + nfsrv_rcv_locked(so, slp, waitflag); + /* Note: ns_rwlock gets dropped when called with MBUF_DONTWAIT */ +} +void +nfsrv_rcv_locked(socket_t so, struct nfssvc_sock *slp, int waitflag) +{ + mbuf_t m, mp, mhck, m2; + int ns_flag=0, error; + struct msghdr msg; + size_t bytes_read; + + if ((slp->ns_flag & SLP_VALID) == 0) { + if (waitflag == MBUF_DONTWAIT) + lck_rw_done(&slp->ns_rwlock); + return; + } + #ifdef notdef /* * Define this to test for nfsds handling this under heavy load. */ - if (waitflag == M_DONTWAIT) { - ns_nflag = SLPN_NEEDQ; + if (waitflag == MBUF_DONTWAIT) { + ns_flag = SLP_NEEDQ; goto dorecs; } #endif - auio.uio_procp = NULL; - if (so->so_type == SOCK_STREAM) { + if (slp->ns_sotype == SOCK_STREAM) { /* * If there are already records on the queue, defer soreceive() * to an nfsd so that there is feedback to the TCP layer that * the nfs servers are heavily loaded. */ - if (slp->ns_rec && waitflag == M_DONTWAIT) { - ns_nflag = SLPN_NEEDQ; + if (slp->ns_rec && waitflag == MBUF_DONTWAIT) { + ns_flag = SLP_NEEDQ; goto dorecs; } /* * Do soreceive(). */ - auio.uio_resid = 1000000000; - flags = MSG_DONTWAIT; - error = soreceive(so, (struct sockaddr **) 0, &auio, &mp, (struct mbuf **)0, &flags); - if (error || mp == (struct mbuf *)0) { + bytes_read = 1000000000; + error = sock_receivembuf(so, NULL, &mp, MSG_DONTWAIT, &bytes_read); + if (error || mp == NULL) { if (error == EWOULDBLOCK) - ns_nflag = SLPN_NEEDQ; + ns_flag = SLP_NEEDQ; else - ns_nflag = SLPN_DISCONN; + ns_flag = SLP_DISCONN; goto dorecs; } m = mp; if (slp->ns_rawend) { - slp->ns_rawend->m_next = m; - slp->ns_cc += 1000000000 - auio.uio_resid; + if ((error = mbuf_setnext(slp->ns_rawend, m))) + panic("nfsrv_rcv: mbuf_setnext failed %d\n", error); + slp->ns_cc += bytes_read; } else { slp->ns_raw = m; - slp->ns_cc = 1000000000 - auio.uio_resid; + slp->ns_cc = bytes_read; } - while (m->m_next) - m = m->m_next; + while ((m2 = mbuf_next(m))) + m = m2; slp->ns_rawend = m; /* @@ -2526,48 +2206,59 @@ nfsrv_rcv(so, arg, waitflag) error = nfsrv_getstream(slp, waitflag); if (error) { if (error == EPERM) - ns_nflag = SLPN_DISCONN; + ns_flag = SLP_DISCONN; else - ns_nflag = SLPN_NEEDQ; + ns_flag = SLP_NEEDQ; } } else { + struct sockaddr_storage nam; + + bzero(&msg, sizeof(msg)); + msg.msg_name = (caddr_t)&nam; + msg.msg_namelen = sizeof(nam); + do { - auio.uio_resid = 1000000000; - flags = MSG_DONTWAIT | MSG_NEEDSA; - nam = 0; - mp = 0; - error = soreceive(so, &nam, &auio, &mp, - (struct mbuf **)0, &flags); - + bytes_read = 1000000000; + error = sock_receivembuf(so, &msg, &mp, MSG_DONTWAIT | MSG_NEEDSA, &bytes_read); if (mp) { - if (nam) { - MGET(mhck, M_WAIT, MT_SONAME); - mhck->m_len = nam->sa_len; - sin = mtod(mhck, struct sockaddr_in *); - bcopy(nam, sin, sizeof(struct sockaddr_in)); - mhck->m_hdr.mh_len = sizeof(struct sockaddr_in); - + if (msg.msg_name && (mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &mhck) == 0)) { + mbuf_setlen(mhck, nam.ss_len); + bcopy(&nam, mbuf_data(mhck), nam.ss_len); m = mhck; - m->m_next = mp; - } else + if (mbuf_setnext(m, mp)) { + /* trouble... just drop it */ + printf("nfsrv_rcv: mbuf_setnext failed\n"); + mbuf_free(mhck); + m = mp; + } + } else { m = mp; + } if (slp->ns_recend) - slp->ns_recend->m_nextpkt = m; + mbuf_setnextpkt(slp->ns_recend, m); else slp->ns_rec = m; slp->ns_recend = m; - m->m_nextpkt = (struct mbuf *)0; - } - if (nam) { - FREE(nam, M_SONAME); + mbuf_setnextpkt(m, NULL); } +#if 0 if (error) { - if ((so->so_proto->pr_flags & PR_CONNREQUIRED) + /* + * This may be needed in the future to support + * non-byte-stream connection-oriented protocols + * such as SCTP. + */ + /* + * This (slp->ns_sotype == SOCK_STREAM) should really + * be a check for PR_CONNREQUIRED. + */ + if ((slp->ns_sotype == SOCK_STREAM) && error != EWOULDBLOCK) { - ns_nflag = SLPN_DISCONN; + ns_flag = SLP_DISCONN; goto dorecs; } } +#endif } while (mp); } @@ -2575,13 +2266,16 @@ nfsrv_rcv(so, arg, waitflag) * Now try and process the request records, non-blocking. */ dorecs: - if (ns_nflag) - slp->ns_nflag |= ns_nflag; - if (waitflag == M_DONTWAIT && - (slp->ns_rec || (slp->ns_nflag & (SLPN_NEEDQ | SLPN_DISCONN)))) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - nfsrv_wakenfsd(slp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + if (ns_flag) + slp->ns_flag |= ns_flag; + if (waitflag == MBUF_DONTWAIT) { + int wake = (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))); + lck_rw_done(&slp->ns_rwlock); + if (wake && nfs_numnfsd) { + lck_mtx_lock(nfsd_mutex); + nfsrv_wakenfsd(slp); + lck_mtx_unlock(nfsd_mutex); + } } } @@ -2592,51 +2286,55 @@ dorecs: */ static int nfsrv_getstream(slp, waitflag) - register struct nfssvc_sock *slp; + struct nfssvc_sock *slp; int waitflag; { - register struct mbuf *m, **mpp; - register char *cp1, *cp2; - register int len; - struct mbuf *om, *m2, *recm; + mbuf_t m; + char *cp1, *cp2, *mdata; + int len, mlen, error; + mbuf_t om, m2, recm; u_long recmark; - if (slp->ns_nflag & SLPN_GETSTREAM) + if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); - slp->ns_nflag |= SLPN_GETSTREAM; + slp->ns_flag |= SLP_GETSTREAM; for (;;) { if (slp->ns_reclen == 0) { if (slp->ns_cc < NFSX_UNSIGNED) { - slp->ns_nflag &= ~SLPN_GETSTREAM; + slp->ns_flag &= ~SLP_GETSTREAM; return (0); } m = slp->ns_raw; - if (m->m_len >= NFSX_UNSIGNED) { - bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); - m->m_data += NFSX_UNSIGNED; - m->m_len -= NFSX_UNSIGNED; + mdata = mbuf_data(m); + mlen = mbuf_len(m); + if (mlen >= NFSX_UNSIGNED) { + bcopy(mdata, (caddr_t)&recmark, NFSX_UNSIGNED); + mdata += NFSX_UNSIGNED; + mlen -= NFSX_UNSIGNED; + mbuf_setdata(m, mdata, mlen); } else { cp1 = (caddr_t)&recmark; - cp2 = mtod(m, caddr_t); + cp2 = mdata; while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { - while (m->m_len == 0) { - m = m->m_next; - cp2 = mtod(m, caddr_t); + while (mlen == 0) { + m = mbuf_next(m); + cp2 = mbuf_data(m); + mlen = mbuf_len(m); } *cp1++ = *cp2++; - m->m_data++; - m->m_len--; + mlen--; + mbuf_setdata(m, cp2, mlen); } } slp->ns_cc -= NFSX_UNSIGNED; recmark = ntohl(recmark); slp->ns_reclen = recmark & ~0x80000000; if (recmark & 0x80000000) - slp->ns_nflag |= SLPN_LASTFRAG; + slp->ns_flag |= SLP_LASTFRAG; else - slp->ns_nflag &= ~SLPN_LASTFRAG; + slp->ns_flag &= ~SLP_LASTFRAG; if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { - slp->ns_nflag &= ~SLPN_GETSTREAM; + slp->ns_flag &= ~SLP_GETSTREAM; return (EPERM); } } @@ -2650,63 +2348,83 @@ nfsrv_getstream(slp, waitflag) recm = NULL; if (slp->ns_cc == slp->ns_reclen) { recm = slp->ns_raw; - slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; + slp->ns_raw = slp->ns_rawend = NULL; slp->ns_cc = slp->ns_reclen = 0; } else if (slp->ns_cc > slp->ns_reclen) { len = 0; m = slp->ns_raw; - om = (struct mbuf *)0; + mlen = mbuf_len(m); + mdata = mbuf_data(m); + om = NULL; while (len < slp->ns_reclen) { - if ((len + m->m_len) > slp->ns_reclen) { - m2 = m_copym(m, 0, slp->ns_reclen - len, - waitflag); - if (m2) { - if (om) { - om->m_next = m2; - recm = slp->ns_raw; - } else - recm = m2; - m->m_data += slp->ns_reclen - len; - m->m_len -= slp->ns_reclen - len; - len = slp->ns_reclen; - } else { - slp->ns_nflag &= ~SLPN_GETSTREAM; + if ((len + mlen) > slp->ns_reclen) { + if (mbuf_copym(m, 0, slp->ns_reclen - len, waitflag, &m2)) { + slp->ns_flag &= ~SLP_GETSTREAM; return (EWOULDBLOCK); } - } else if ((len + m->m_len) == slp->ns_reclen) { + if (om) { + if (mbuf_setnext(om, m2)) { + /* trouble... just drop it */ + printf("nfsrv_getstream: mbuf_setnext failed\n"); + mbuf_freem(m2); + slp->ns_flag &= ~SLP_GETSTREAM; + return (EWOULDBLOCK); + } + recm = slp->ns_raw; + } else { + recm = m2; + } + mdata += slp->ns_reclen - len; + mlen -= slp->ns_reclen - len; + mbuf_setdata(m, mdata, mlen); + len = slp->ns_reclen; + } else if ((len + mlen) == slp->ns_reclen) { om = m; - len += m->m_len; - m = m->m_next; + len += mlen; + m = mbuf_next(m); recm = slp->ns_raw; - om->m_next = (struct mbuf *)0; + if (mbuf_setnext(om, NULL)) { + printf("nfsrv_getstream: mbuf_setnext failed 2\n"); + slp->ns_flag &= ~SLP_GETSTREAM; + return (EWOULDBLOCK); + } + mlen = mbuf_len(m); + mdata = mbuf_data(m); } else { om = m; - len += m->m_len; - m = m->m_next; + len += mlen; + m = mbuf_next(m); + mlen = mbuf_len(m); + mdata = mbuf_data(m); } } slp->ns_raw = m; slp->ns_cc -= len; slp->ns_reclen = 0; } else { - slp->ns_nflag &= ~SLPN_GETSTREAM; + slp->ns_flag &= ~SLP_GETSTREAM; return (0); } /* * Accumulate the fragments into a record. */ - mpp = &slp->ns_frag; - while (*mpp) - mpp = &((*mpp)->m_next); - *mpp = recm; - if (slp->ns_nflag & SLPN_LASTFRAG) { + if (slp->ns_frag == NULL) { + slp->ns_frag = recm; + } else { + m = slp->ns_frag; + while ((m2 = mbuf_next(m))) + m = m2; + if ((error = mbuf_setnext(m, recm))) + panic("nfsrv_getstream: mbuf_setnext failed 3, %d\n", error); + } + if (slp->ns_flag & SLP_LASTFRAG) { if (slp->ns_recend) - slp->ns_recend->m_nextpkt = slp->ns_frag; + mbuf_setnextpkt(slp->ns_recend, slp->ns_frag); else slp->ns_rec = slp->ns_frag; slp->ns_recend = slp->ns_frag; - slp->ns_frag = (struct mbuf *)0; + slp->ns_frag = NULL; } } } @@ -2716,39 +2434,42 @@ nfsrv_getstream(slp, waitflag) */ int nfsrv_dorec(slp, nfsd, ndp) - register struct nfssvc_sock *slp; + struct nfssvc_sock *slp; struct nfsd *nfsd; struct nfsrv_descript **ndp; { - register struct mbuf *m; - register struct mbuf *nam; - register struct nfsrv_descript *nd; + mbuf_t m; + mbuf_t nam; + struct nfsrv_descript *nd; int error; *ndp = NULL; - if ((slp->ns_flag & SLP_VALID) == 0 || - (m = slp->ns_rec) == (struct mbuf *)0) + if ((slp->ns_flag & SLP_VALID) == 0 || (slp->ns_rec == NULL)) return (ENOBUFS); - slp->ns_rec = m->m_nextpkt; + MALLOC_ZONE(nd, struct nfsrv_descript *, + sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); + if (!nd) + return (ENOMEM); + m = slp->ns_rec; + slp->ns_rec = mbuf_nextpkt(m); if (slp->ns_rec) - m->m_nextpkt = (struct mbuf *)0; + mbuf_setnextpkt(m, NULL); else - slp->ns_recend = (struct mbuf *)0; - if (m->m_type == MT_SONAME) { + slp->ns_recend = NULL; + if (mbuf_type(m) == MBUF_TYPE_SONAME) { nam = m; - m = m->m_next; - nam->m_next = NULL; + m = mbuf_next(m); + if ((error = mbuf_setnext(nam, NULL))) + panic("nfsrv_dorec: mbuf_setnext failed %d\n", error); } else nam = NULL; - MALLOC_ZONE(nd, struct nfsrv_descript *, - sizeof (struct nfsrv_descript), M_NFSRVDESC, M_WAITOK); nd->nd_md = nd->nd_mrep = m; nd->nd_nam2 = nam; - nd->nd_dpos = mtod(m, caddr_t); + nd->nd_dpos = mbuf_data(m); error = nfs_getreq(nd, nfsd, TRUE); if (error) { if (nam) - m_freem(nam); + mbuf_freem(nam); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); return (error); } @@ -2764,26 +2485,32 @@ nfsrv_dorec(slp, nfsd, ndp) */ int nfs_getreq(nd, nfsd, has_header) - register struct nfsrv_descript *nd; + struct nfsrv_descript *nd; struct nfsd *nfsd; int has_header; { - register int len, i; - register u_long *tl; - register long t1; - struct uio uio; - struct iovec iov; + int len, i; + u_long *tl; + long t1; + uio_t uiop; caddr_t dpos, cp2, cp; u_long nfsvers, auth_type; uid_t nickuid; - int error = 0, nqnfs = 0, ticklen; - struct mbuf *mrep, *md; - register struct nfsuid *nuidp; + int error = 0, ticklen; + mbuf_t mrep, md; + struct nfsuid *nuidp; + uid_t user_id; + gid_t group_id; + int ngroups; + struct ucred temp_cred; struct timeval tvin, tvout, now; + char uio_buf[ UIO_SIZEOF(1) ]; #if 0 /* until encrypted keys are implemented */ NFSKERBKEYSCHED_T keys; /* stores key schedule */ #endif + nd->nd_cr = NULL; + mrep = nd->nd_mrep; md = nd->nd_md; dpos = nd->nd_dpos; @@ -2791,7 +2518,7 @@ nfs_getreq(nd, nfsd, has_header) nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED); nd->nd_retxid = fxdr_unsigned(u_long, *tl++); if (*tl++ != rpc_call) { - m_freem(mrep); + mbuf_freem(mrep); return (EBADRPC); } } else @@ -2804,31 +2531,23 @@ nfs_getreq(nd, nfsd, has_header) return (0); } if (*tl != nfs_prog) { - if (*tl == nqnfs_prog) - nqnfs++; - else { - nd->nd_repstat = EPROGUNAVAIL; - nd->nd_procnum = NFSPROC_NOOP; - return (0); - } + nd->nd_repstat = EPROGUNAVAIL; + nd->nd_procnum = NFSPROC_NOOP; + return (0); } tl++; nfsvers = fxdr_unsigned(u_long, *tl++); - if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || - (nfsvers != NQNFS_VER3 && nqnfs)) { + if ((nfsvers < NFS_VER2) || (nfsvers > NFS_VER3)) { nd->nd_repstat = EPROGMISMATCH; nd->nd_procnum = NFSPROC_NOOP; return (0); } - if (nqnfs) - nd->nd_flag = (ND_NFSV3 | ND_NQNFS); else if (nfsvers == NFS_VER3) nd->nd_flag = ND_NFSV3; nd->nd_procnum = fxdr_unsigned(u_long, *tl++); if (nd->nd_procnum == NFSPROC_NULL) return (0); - if (nd->nd_procnum >= NFS_NPROCS || - (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || + if ((nd->nd_procnum >= NFS_NPROCS) || (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { nd->nd_repstat = EPROCUNAVAIL; nd->nd_procnum = NFSPROC_NOOP; @@ -2839,7 +2558,7 @@ nfs_getreq(nd, nfsd, has_header) auth_type = *tl++; len = fxdr_unsigned(int, *tl++); if (len < 0 || len > RPCAUTH_MAXSIZ) { - m_freem(mrep); + mbuf_freem(mrep); return (EBADRPC); } @@ -2850,34 +2569,42 @@ nfs_getreq(nd, nfsd, has_header) if (auth_type == rpc_auth_unix) { len = fxdr_unsigned(int, *++tl); if (len < 0 || len > NFS_MAXNAMLEN) { - m_freem(mrep); + mbuf_freem(mrep); return (EBADRPC); } + bzero(&temp_cred, sizeof(temp_cred)); nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); - bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); - nd->nd_cr.cr_ref = 1; - nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); - nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); + user_id = fxdr_unsigned(uid_t, *tl++); + group_id = fxdr_unsigned(gid_t, *tl++); + temp_cred.cr_groups[0] = group_id; len = fxdr_unsigned(int, *tl); if (len < 0 || len > RPCAUTH_UNIXGIDS) { - m_freem(mrep); + mbuf_freem(mrep); return (EBADRPC); } nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED); for (i = 1; i <= len; i++) if (i < NGROUPS) - nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); + temp_cred.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); else tl++; - nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); - if (nd->nd_cr.cr_ngroups > 1) - nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); + ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); + if (ngroups > 1) + nfsrvw_sort(&temp_cred.cr_groups[0], ngroups); len = fxdr_unsigned(int, *++tl); if (len < 0 || len > RPCAUTH_MAXSIZ) { - m_freem(mrep); + mbuf_freem(mrep); return (EBADRPC); } + temp_cred.cr_uid = user_id; + temp_cred.cr_ngroups = ngroups; + nd->nd_cr = kauth_cred_create(&temp_cred); + if (nd->nd_cr == NULL) { + nd->nd_repstat = ENOMEM; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } if (len > 0) nfsm_adv(nfsm_rndup(len)); } else if (auth_type == rpc_auth_kerb) { @@ -2885,19 +2612,23 @@ nfs_getreq(nd, nfsd, has_header) case RPCAKN_FULLNAME: ticklen = fxdr_unsigned(int, *tl); *((u_long *)nfsd->nfsd_authstr) = *tl; - uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; - nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; - if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { - m_freem(mrep); + uiop = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!uiop) { + nd->nd_repstat = ENOMEM; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } + + // LP64todo - fix this + nfsd->nfsd_authlen = (nfsm_rndup(ticklen) + (NFSX_UNSIGNED * 2)); + if ((nfsm_rndup(ticklen) + NFSX_UNSIGNED) > (len - 2 * NFSX_UNSIGNED)) { + mbuf_freem(mrep); return (EBADRPC); } - uio.uio_offset = 0; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_segflg = UIO_SYSSPACE; - iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; - iov.iov_len = RPCAUTH_MAXSIZ - 4; - nfsm_mtouio(&uio, uio.uio_resid); + uio_addiov(uiop, CAST_USER_ADDR_T(&nfsd->nfsd_authstr[4]), RPCAUTH_MAXSIZ - 4); + // LP64todo - fix this + nfsm_mtouio(uiop, uio_resid(uiop)); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); if (*tl++ != rpc_auth_kerb || fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { @@ -2942,7 +2673,7 @@ nfs_getreq(nd, nfsd, has_header) for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == nickuid && + if (kauth_cred_getuid(nuidp->nu_cr) == nickuid && (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nd->nd_nam2))) @@ -2976,7 +2707,21 @@ nfs_getreq(nd, nfsd, has_header) nd->nd_procnum = NFSPROC_NOOP; return (0); } - nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); + bzero(&temp_cred, sizeof(temp_cred)); + ngroups = nuidp->nu_cr->cr_ngroups; + for (i = 0; i < ngroups; i++) + temp_cred.cr_groups[i] = nuidp->nu_cr->cr_groups[i]; + if (ngroups > 1) + nfsrvw_sort(&temp_cred.cr_groups[0], ngroups); + + temp_cred.cr_uid = kauth_cred_getuid(nuidp->nu_cr); + temp_cred.cr_ngroups = ngroups; + nd->nd_cr = kauth_cred_create(&temp_cred); + if (!nd->nd_cr) { + nd->nd_repstat = ENOMEM; + nd->nd_procnum = NFSPROC_NOOP; + return (0); + } nd->nd_flag |= ND_KERBNICK; }; } else { @@ -2985,23 +2730,12 @@ nfs_getreq(nd, nfsd, has_header) return (0); } - /* - * For nqnfs, get piggybacked lease request. - */ - if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - nd->nd_flag |= fxdr_unsigned(int, *tl); - if (nd->nd_flag & ND_LEASE) { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - nd->nd_duration = fxdr_unsigned(int, *tl); - } else - nd->nd_duration = NQ_MINLEASE; - } else - nd->nd_duration = NQ_MINLEASE; nd->nd_md = md; nd->nd_dpos = dpos; return (0); nfsmout: + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); return (error); } @@ -3009,36 +2743,46 @@ nfsmout: * Search for a sleeping nfsd and wake it up. * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the * running nfsds will go look for the work in the nfssvc_sock list. + * Note: Must be called with nfsd_mutex held. */ void -nfsrv_wakenfsd(slp) - struct nfssvc_sock *slp; +nfsrv_wakenfsd(struct nfssvc_sock *slp) { - register struct nfsd *nd; + struct nfsd *nd; if ((slp->ns_flag & SLP_VALID) == 0) return; - TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) { - if (nd->nfsd_flag & NFSD_WAITING) { - nd->nfsd_flag &= ~NFSD_WAITING; - if (nd->nfsd_slp) - panic("nfsd wakeup"); - slp->ns_sref++; - nd->nfsd_slp = slp; - wakeup((caddr_t)nd); - return; + + lck_rw_lock_exclusive(&slp->ns_rwlock); + + if (nfsd_waiting) { + TAILQ_FOREACH(nd, &nfsd_head, nfsd_chain) { + if (nd->nfsd_flag & NFSD_WAITING) { + nd->nfsd_flag &= ~NFSD_WAITING; + if (nd->nfsd_slp) + panic("nfsd wakeup"); + slp->ns_sref++; + nd->nfsd_slp = slp; + lck_rw_done(&slp->ns_rwlock); + wakeup((caddr_t)nd); + return; + } } } + slp->ns_flag |= SLP_DOREC; + + lck_rw_done(&slp->ns_rwlock); + nfsd_head_flag |= NFSD_CHECKSLP; } #endif /* NFS_NOSERVER */ static int -nfs_msg(p, server, msg, error) - struct proc *p; - const char *server, *msg; - int error; +nfs_msg(proc_t p, + const char *server, + const char *msg, + int error) { tpr_t tpr; @@ -3056,51 +2800,43 @@ nfs_msg(p, server, msg, error) } void -nfs_down(rep, nmp, proc, msg, error, flags) - struct nfsreq *rep; +nfs_down(nmp, proc, error, flags, msg) struct nfsmount *nmp; - struct proc *proc; - const char *msg; + proc_t proc; int error, flags; + const char *msg; { if (nmp == NULL) return; if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) { - vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, - VQ_NOTRESP, 0); + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0); nmp->nm_state |= NFSSTA_TIMEO; } if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) { - vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, - VQ_NOTRESPLOCK, 0); + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 0); nmp->nm_state |= NFSSTA_LOCKTIMEO; } - if (rep) - rep->r_flags |= R_TPRINTFMSG; - nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); + nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); } void -nfs_up(rep, nmp, proc, msg, flags) - struct nfsreq *rep; +nfs_up(nmp, proc, flags, msg) struct nfsmount *nmp; - struct proc *proc; - const char *msg; + proc_t proc; int flags; + const char *msg; { if (nmp == NULL) return; - if ((rep == NULL) || (rep->r_flags & R_TPRINTFMSG) != 0) - nfs_msg(proc, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); + if (msg) + nfs_msg(proc, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) { nmp->nm_state &= ~NFSSTA_TIMEO; - vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, - VQ_NOTRESP, 1); + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); } if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; - vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, - VQ_NOTRESPLOCK, 1); + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESPLOCK, 1); } } diff --git a/bsd/nfs/nfs_srvcache.c b/bsd/nfs/nfs_srvcache.c index 9e7007ddb..a4ce111ae 100644 --- a/bsd/nfs/nfs_srvcache.c +++ b/bsd/nfs/nfs_srvcache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,14 +67,15 @@ */ #include #include -#include +#include #include #include #include -#include +#include #include #include #include /* for dup_sockaddr */ +#include #include #if ISO @@ -96,8 +97,10 @@ LIST_HEAD(nfsrvhash, nfsrvcache) *nfsrvhashtbl; TAILQ_HEAD(nfsrvlru, nfsrvcache) nfsrvlruhead; u_long nfsrvhash; -#define TRUE 1 -#define FALSE 0 +lck_grp_t *nfsrv_reqcache_lck_grp; +lck_grp_attr_t *nfsrv_reqcache_lck_grp_attr; +lck_attr_t *nfsrv_reqcache_lck_attr; +lck_mtx_t *nfsrv_reqcache_mutex; #define NETFAMILY(rp) \ (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO) @@ -129,9 +132,6 @@ static int nonidempotent[NFS_NPROCS] = { FALSE, FALSE, FALSE, - FALSE, - FALSE, - FALSE, }; /* True iff the rpc reply is an nfs status ONLY! */ @@ -162,6 +162,12 @@ static int nfsv2_repstat[NFS_NPROCS] = { void nfsrv_initcache() { + /* init nfs server request cache mutex */ + nfsrv_reqcache_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfsrv_reqcache_lck_grp_attr); + nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", nfsrv_reqcache_lck_grp_attr); + nfsrv_reqcache_lck_attr = lck_attr_alloc_init(); + nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, nfsrv_reqcache_lck_attr); nfsrvhashtbl = hashinit(desirednfsrvcache, M_NFSD, &nfsrvhash); TAILQ_INIT(&nfsrvlruhead); @@ -183,15 +189,15 @@ nfsrv_initcache() */ int nfsrv_getcache(nd, slp, repp) - register struct nfsrv_descript *nd; + struct nfsrv_descript *nd; struct nfssvc_sock *slp; - struct mbuf **repp; + mbuf_t *repp; { - register struct nfsrvcache *rp; - struct mbuf *mb; + struct nfsrvcache *rp; + mbuf_t mb; struct sockaddr_in *saddr; caddr_t bpos; - int ret; + int ret, error; /* * Don't cache recent requests for reliable transport protocols. @@ -199,12 +205,12 @@ nfsrv_getcache(nd, slp, repp) */ if (!nd->nd_nam2) return (RC_DOIT); + lck_mtx_lock(nfsrv_reqcache_mutex); loop: for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; rp = rp->rc_hash.le_next) { if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { - NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); @@ -219,20 +225,23 @@ loop: if (rp->rc_state == RC_UNUSED) panic("nfsrv cache"); if (rp->rc_state == RC_INPROG) { - nfsstats.srvcache_inproghits++; + OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_inproghits); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { - nfsstats.srvcache_nonidemdonehits++; - nfs_rephead(0, nd, slp, rp->rc_status, - 0, (u_quad_t *)0, repp, &mb, &bpos); + OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits); + nfs_rephead(0, nd, slp, rp->rc_status, repp, &mb, &bpos); ret = RC_REPLY; } else if (rp->rc_flag & RC_REPMBUF) { - nfsstats.srvcache_nonidemdonehits++; - *repp = m_copym(rp->rc_reply, 0, M_COPYALL, - M_WAIT); - ret = RC_REPLY; + OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits); + error = mbuf_copym(rp->rc_reply, 0, MBUF_COPYALL, MBUF_WAITOK, repp); + if (error) { + printf("nfsrv cache: reply copym failed for nonidem request hit\n"); + ret = RC_DROPIT; + } else { + ret = RC_REPLY; + } } else { - nfsstats.srvcache_idemdonehits++; + OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_idemdonehits); rp->rc_state = RC_INPROG; ret = RC_DOIT; } @@ -241,18 +250,31 @@ loop: rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } + lck_mtx_unlock(nfsrv_reqcache_mutex); return (ret); } } - nfsstats.srvcache_misses++; - NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff)); + OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_misses); if (numnfsrvcache < desirednfsrvcache) { + /* try to allocate a new entry */ MALLOC(rp, struct nfsrvcache *, sizeof *rp, M_NFSD, M_WAITOK); - bzero((char *)rp, sizeof *rp); - numnfsrvcache++; - rp->rc_flag = RC_LOCKED; + if (rp) { + bzero((char *)rp, sizeof *rp); + numnfsrvcache++; + rp->rc_flag = RC_LOCKED; + } } else { + rp = NULL; + } + if (!rp) { + /* try to reuse the least recently used entry */ rp = nfsrvlruhead.tqh_first; + if (!rp) { + /* no entry to reuse? */ + /* OK, we just won't be able to cache this request */ + lck_mtx_unlock(nfsrv_reqcache_mutex); + return (RC_DOIT); + } while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); @@ -262,15 +284,15 @@ loop: LIST_REMOVE(rp, rc_hash); TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); if (rp->rc_flag & RC_REPMBUF) - m_freem(rp->rc_reply); + mbuf_freem(rp->rc_reply); if (rp->rc_flag & RC_NAM) - MFREE(rp->rc_nam, mb); + mbuf_freem(rp->rc_nam); rp->rc_flag &= (RC_LOCKED | RC_WANTED); } TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); rp->rc_state = RC_INPROG; rp->rc_xid = nd->nd_retxid; - saddr = mtod(nd->nd_nam, struct sockaddr_in *); + saddr = mbuf_data(nd->nd_nam); switch (saddr->sin_family) { case AF_INET: rp->rc_flag |= RC_INETADDR; @@ -278,8 +300,11 @@ loop: break; case AF_ISO: default: - rp->rc_flag |= RC_NAM; - rp->rc_nam = m_copym(nd->nd_nam, 0, M_COPYALL, M_WAIT); + error = mbuf_copym(nd->nd_nam, 0, MBUF_COPYALL, MBUF_WAITOK, &rp->rc_nam); + if (error) + printf("nfsrv cache: nam copym failed\n"); + else + rp->rc_flag |= RC_NAM; break; }; rp->rc_proc = nd->nd_procnum; @@ -289,6 +314,7 @@ loop: rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } + lck_mtx_unlock(nfsrv_reqcache_mutex); return (RC_DOIT); } @@ -297,20 +323,21 @@ loop: */ void nfsrv_updatecache(nd, repvalid, repmbuf) - register struct nfsrv_descript *nd; + struct nfsrv_descript *nd; int repvalid; - struct mbuf *repmbuf; + mbuf_t repmbuf; { - register struct nfsrvcache *rp; + struct nfsrvcache *rp; + int error; if (!nd->nd_nam2) return; + lck_mtx_lock(nfsrv_reqcache_mutex); loop: for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != 0; rp = rp->rc_hash.le_next) { if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { - NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0); @@ -328,9 +355,9 @@ loop: rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; } else { - rp->rc_reply = m_copym(repmbuf, - 0, M_COPYALL, M_WAIT); - rp->rc_flag |= RC_REPMBUF; + error = mbuf_copym(repmbuf, 0, MBUF_COPYALL, MBUF_WAITOK, &rp->rc_reply); + if (!error) + rp->rc_flag |= RC_REPMBUF; } } rp->rc_flag &= ~RC_LOCKED; @@ -338,10 +365,11 @@ loop: rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } + lck_mtx_unlock(nfsrv_reqcache_mutex); return; } } - NFS_DPF(RC, ("L%03x", nd->nd_retxid & 0xfff)); + lck_mtx_unlock(nfsrv_reqcache_mutex); } /* @@ -350,8 +378,9 @@ loop: void nfsrv_cleancache() { - register struct nfsrvcache *rp, *nextrp; + struct nfsrvcache *rp, *nextrp; + lck_mtx_lock(nfsrv_reqcache_mutex); for (rp = nfsrvlruhead.tqh_first; rp != 0; rp = nextrp) { nextrp = rp->rc_lru.tqe_next; LIST_REMOVE(rp, rc_hash); @@ -359,6 +388,7 @@ nfsrv_cleancache() _FREE(rp, M_NFSD); } numnfsrvcache = 0; + lck_mtx_unlock(nfsrv_reqcache_mutex); } #endif /* NFS_NOSERVER */ diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 836b85f0f..d0c970018 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,25 +66,25 @@ */ #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include #include #include #include #include #include -#include +#include #include +#include +#include +#include #include #include -#include - -#include #include #include @@ -96,7 +96,6 @@ #include #include #include -#include #include #include @@ -129,7 +128,9 @@ u_long nfs_xdrneg1; u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_auth_kerb; -u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; +u_long nfs_prog, nfs_true, nfs_false; +__private_extern__ int nfs_mbuf_mlen = 0, nfs_mbuf_mhlen = 0, + nfs_mbuf_minclsize = 0, nfs_mbuf_mclbytes = 0; /* And other global data */ static u_long nfs_xid = 0; @@ -144,15 +145,28 @@ enum vtype nv3tov_type[8]= { int nfs_mount_type; int nfs_ticks; +lck_grp_t *nfsd_lck_grp; +lck_grp_attr_t *nfsd_lck_grp_attr; +lck_attr_t *nfsd_lck_attr; +lck_mtx_t *nfsd_mutex; + +lck_grp_attr_t *nfs_slp_group_attr; +lck_attr_t *nfs_slp_lock_attr; +lck_grp_t *nfs_slp_rwlock_group; +lck_grp_t *nfs_slp_mutex_group; + struct nfs_reqq nfs_reqq; struct nfssvc_sockhead nfssvc_sockhead; -int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; -struct nfs_bufq nfs_bufq; -struct nqtimerhead nqtimerhead; -struct nqfhhashhead *nqfhhashtbl; -u_long nqfhhash; + +struct nfsexpfslist nfs_exports; +struct nfsexphashhead *nfsexphashtbl; +u_long nfsexphash; +lck_grp_attr_t *nfs_export_group_attr; +lck_attr_t *nfs_export_lock_attr; +lck_grp_t *nfs_export_rwlock_group; +lck_rw_t nfs_export_rwlock; #ifndef NFS_NOSERVER /* @@ -181,9 +195,6 @@ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, - NFSPROC_NOOP, - NFSPROC_NOOP, - NFSPROC_NOOP, NFSPROC_NOOP }; @@ -214,10 +225,7 @@ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, - NFSV2PROC_NOOP, - NFSV2PROC_NOOP, - NFSV2PROC_NOOP, - NFSV2PROC_NOOP, + NFSV2PROC_NOOP }; #ifndef NFS_NOSERVER @@ -581,12 +589,7 @@ static short *nfsrv_v3errmap[] = { #endif /* NFS_NOSERVER */ extern struct nfsrtt nfsrtt; -extern time_t nqnfsstarttime; -extern int nqsrv_clockskew; -extern int nqsrv_writeslack; -extern int nqsrv_maxlease; extern struct nfsstats nfsstats; -extern int nqnfs_piggy[NFS_NPROCS]; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfsnodehashhead *nfsnodehashtbl; @@ -600,46 +603,20 @@ LIST_HEAD(nfsnodehashhead, nfsnode); * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ -struct mbuf * -nfsm_reqh(vp, procid, hsiz, bposp) - struct vnode *vp; - u_long procid; - int hsiz; - caddr_t *bposp; +int +nfsm_reqh(int hsiz, caddr_t *bposp, mbuf_t *mbp) { - register struct mbuf *mb; - register u_long *tl; - register caddr_t bpos; - struct mbuf *mb2; - struct nfsmount *nmp; - int nqflag; - - MGET(mb, M_WAIT, MT_DATA); - if (hsiz >= MINCLSIZE) - MCLGET(mb, M_WAIT); - mb->m_len = 0; - bpos = mtod(mb, caddr_t); + int error; - /* - * For NQNFS, add lease request. - */ - if (vp) { - nmp = VFSTONFS(vp->v_mount); - if (nmp && (nmp->nm_flag & NFSMNT_NQNFS)) { - nqflag = NQNFS_NEEDLEASE(vp, procid); - if (nqflag) { - nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); - *tl++ = txdr_unsigned(nqflag); - *tl = txdr_unsigned(nmp->nm_leaseterm); - } else { - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = 0; - } - } - } - /* Finally, return values */ - *bposp = bpos; - return (mb); + *mbp = NULL; + if (hsiz >= nfs_mbuf_minclsize) + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, mbp); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, mbp); + if (error) + return (error); + *bposp = mbuf_data(*mbp); + return (0); } /* @@ -648,10 +625,10 @@ nfsm_reqh(vp, procid, hsiz, bposp) * come from outside of the kernel. * Returns the head of the mbuf list. */ -struct mbuf * +int nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, - verf_str, mrest, mrest_len, mbp, xidp) - register struct ucred *cr; + verf_str, mrest, mrest_len, mbp, xidp, mreqp) + kauth_cred_t cr; int nmflag; int procid; int auth_type; @@ -659,31 +636,40 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, char *auth_str; int verf_len; char *verf_str; - struct mbuf *mrest; + mbuf_t mrest; int mrest_len; - struct mbuf **mbp; + mbuf_t *mbp; u_long *xidp; + mbuf_t *mreqp; { - register struct mbuf *mb; - register u_long *tl; - register caddr_t bpos; - register int i; - struct mbuf *mreq, *mb2; - int siz, grpsiz, authsiz; + mbuf_t mb; + u_long *tl; + caddr_t bpos; + int i, error, len; + mbuf_t mreq, mb2; + int siz, grpsiz, authsiz, mlen; struct timeval tv; authsiz = nfsm_rndup(auth_len); - MGETHDR(mb, M_WAIT, MT_DATA); - if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { - MCLGET(mb, M_WAIT); - } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { - MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); + len = authsiz + 10 * NFSX_UNSIGNED; + if (len >= nfs_mbuf_minclsize) { + error = mbuf_getpacket(MBUF_WAITOK, &mb); } else { - MH_ALIGN(mb, 8 * NFSX_UNSIGNED); + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mb); + if (!error) { + if (len < nfs_mbuf_mhlen) + mbuf_align_32(mb, len); + else + mbuf_align_32(mb, 8 * NFSX_UNSIGNED); + } + } + if (error) { + /* unable to allocate packet */ + /* XXX nfsstat? */ + return (error); } - mb->m_len = 0; mreq = mb; - bpos = mtod(mb, caddr_t); + bpos = mbuf_data(mb); /* * First the RPC header. @@ -714,16 +700,11 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; - if (nmflag & NFSMNT_NQNFS) { - *tl++ = txdr_unsigned(NQNFS_PROG); - *tl++ = txdr_unsigned(NQNFS_VER3); - } else { - *tl++ = txdr_unsigned(NFS_PROG); - if (nmflag & NFSMNT_NFSV3) - *tl++ = txdr_unsigned(NFS_VER3); - else - *tl++ = txdr_unsigned(NFS_VER2); - } + *tl++ = txdr_unsigned(NFS_PROG); + if (nmflag & NFSMNT_NFSV3) + *tl++ = txdr_unsigned(NFS_VER3); + else + *tl++ = txdr_unsigned(NFS_VER2); if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(procid); else @@ -739,7 +720,7 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, nfsm_build(tl, u_long *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ - *tl++ = txdr_unsigned(cr->cr_uid); + *tl++ = txdr_unsigned(kauth_cred_getuid(cr)); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); @@ -748,19 +729,28 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, break; case RPCAUTH_KERB4: siz = auth_len; + mlen = mbuf_len(mb); while (siz > 0) { - if (M_TRAILINGSPACE(mb) == 0) { - MGET(mb2, M_WAIT, MT_DATA); - if (siz >= MINCLSIZE) - MCLGET(mb2, M_WAIT); - mb->m_next = mb2; + if (mbuf_trailingspace(mb) == 0) { + mb2 = NULL; + if (siz >= nfs_mbuf_minclsize) + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); + if (!error) + error = mbuf_setnext(mb, mb2); + if (error) { + mbuf_freem(mreq); + return (error); + } mb = mb2; - mb->m_len = 0; - bpos = mtod(mb, caddr_t); + mlen = 0; + bpos = mbuf_data(mb); } - i = min(siz, M_TRAILINGSPACE(mb)); + i = min(siz, mbuf_trailingspace(mb)); bcopy(auth_str, bpos, i); - mb->m_len += i; + mlen += i; + mbuf_setlen(mb, mlen); auth_str += i; bpos += i; siz -= i; @@ -768,7 +758,8 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; - mb->m_len += siz; + mlen += siz; + mbuf_setlen(mb, mlen); } break; }; @@ -778,22 +769,31 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, */ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (verf_str) { + mlen = mbuf_len(mb); *tl++ = txdr_unsigned(RPCAUTH_KERB4); *tl = txdr_unsigned(verf_len); siz = verf_len; while (siz > 0) { - if (M_TRAILINGSPACE(mb) == 0) { - MGET(mb2, M_WAIT, MT_DATA); - if (siz >= MINCLSIZE) - MCLGET(mb2, M_WAIT); - mb->m_next = mb2; + if (mbuf_trailingspace(mb) == 0) { + mb2 = NULL; + if (siz >= nfs_mbuf_minclsize) + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); + if (!error) + error = mbuf_setnext(mb, mb2); + if (error) { + mbuf_freem(mreq); + return (error); + } mb = mb2; - mb->m_len = 0; - bpos = mtod(mb, caddr_t); + mlen = 0; + bpos = mbuf_data(mb); } - i = min(siz, M_TRAILINGSPACE(mb)); + i = min(siz, mbuf_trailingspace(mb)); bcopy(verf_str, bpos, i); - mb->m_len += i; + mlen += i; + mbuf_setlen(mb, mlen); verf_str += i; bpos += i; siz -= i; @@ -801,17 +801,24 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; - mb->m_len += siz; + mlen += siz; + mbuf_setlen(mb, mlen); } } else { *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; } - mb->m_next = mrest; - mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; - mreq->m_pkthdr.rcvif = (struct ifnet *)0; + error = mbuf_pkthdr_setrcvif(mreq, 0); + if (!error) + error = mbuf_setnext(mb, mrest); + if (error) { + mbuf_freem(mreq); + return (error); + } + mbuf_pkthdr_setlen(mreq, authsiz + 10 * NFSX_UNSIGNED + mrest_len); *mbp = mb; - return (mreq); + *mreqp = mreq; + return (0); } /* @@ -819,62 +826,56 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, */ int nfsm_mbuftouio(mrep, uiop, siz, dpos) - struct mbuf **mrep; - register struct uio *uiop; + mbuf_t *mrep; + struct uio *uiop; int siz; caddr_t *dpos; { - register char *mbufcp, *uiocp; - register int xfer, left, len; - register struct mbuf *mp; + char *mbufcp, *uiocp; + int xfer, left, len; + mbuf_t mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; - len = mtod(mp, caddr_t)+mp->m_len-mbufcp; + len = (caddr_t)mbuf_data(mp) + mbuf_len(mp) - mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { - if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) + if (uiop->uio_iovcnt <= 0 || uiop->uio_iovs.iov32p == NULL) return (EFBIG); - left = uiop->uio_iov->iov_len; - uiocp = uiop->uio_iov->iov_base; + // LP64todo - fix this! + left = uio_iov_len(uiop); + uiocp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { - mp = mp->m_next; + mp = mbuf_next(mp); if (mp == NULL) return (EBADRPC); - mbufcp = mtod(mp, caddr_t); - len = mp->m_len; + mbufcp = mbuf_data(mp); + len = mbuf_len(mp); } xfer = (left > len) ? len : left; -#ifdef notdef - /* Not Yet.. */ - if (uiop->uio_iov->iov_op != NULL) - (*(uiop->uio_iov->iov_op)) - (mbufcp, uiocp, xfer); + if (UIO_SEG_IS_USER_SPACE(uiop->uio_segflg)) + copyout(mbufcp, CAST_USER_ADDR_T(uiocp), xfer); else -#endif - if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(mbufcp, uiocp, xfer); - else - copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; - uiop->uio_resid -= xfer; + uio_uio_resid_add(uiop, -xfer); } - if (uiop->uio_iov->iov_len <= siz) { + if (uio_iov_len(uiop) <= (size_t)siz) { uiop->uio_iovcnt--; - uiop->uio_iov++; + uio_next_iov(uiop); } else { - uiop->uio_iov->iov_base += uiosiz; - uiop->uio_iov->iov_len -= uiosiz; + uio_iov_base_add(uiop, uiosiz); + uio_iov_len_add(uiop, -uiosiz); } siz -= uiosiz; } @@ -895,78 +896,84 @@ nfsm_mbuftouio(mrep, uiop, siz, dpos) */ int nfsm_uiotombuf(uiop, mq, siz, bpos) - register struct uio *uiop; - struct mbuf **mq; + struct uio *uiop; + mbuf_t *mq; int siz; caddr_t *bpos; { - register char *uiocp; - register struct mbuf *mp, *mp2; - register int xfer, left, mlen; - int uiosiz, clflg, rem; + char *uiocp; + mbuf_t mp, mp2; + int xfer, left, mlen, mplen; + int uiosiz, clflg, rem, error; char *cp; if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); - if (siz > MLEN) /* or should it >= MCLBYTES ?? */ + if (siz > nfs_mbuf_mlen) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; + mplen = mbuf_len(mp); while (siz > 0) { - left = uiop->uio_iov->iov_len; - uiocp = uiop->uio_iov->iov_base; + // LP64todo - fix this! + left = uio_iov_len(uiop); + uiocp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); if (left > siz) left = siz; uiosiz = left; while (left > 0) { - mlen = M_TRAILINGSPACE(mp); + mlen = mbuf_trailingspace(mp); if (mlen == 0) { - MGET(mp, M_WAIT, MT_DATA); + mp = NULL; if (clflg) - MCLGET(mp, M_WAIT); - mp->m_len = 0; - mp2->m_next = mp; + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); + if (!error) + error = mbuf_setnext(mp2, mp); + if (error) + return (error); + mplen = 0; mp2 = mp; - mlen = M_TRAILINGSPACE(mp); + mlen = mbuf_trailingspace(mp); } xfer = (left > mlen) ? mlen : left; -#ifdef notdef - /* Not Yet.. */ - if (uiop->uio_iov->iov_op != NULL) - (*(uiop->uio_iov->iov_op)) - (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); + if (UIO_SEG_IS_USER_SPACE(uiop->uio_segflg)) + copyin(CAST_USER_ADDR_T(uiocp), (caddr_t)mbuf_data(mp) + mplen, xfer); else -#endif - if (uiop->uio_segflg == UIO_SYSSPACE) - bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); - else - copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); - mp->m_len += xfer; + bcopy(uiocp, (caddr_t)mbuf_data(mp) + mplen, xfer); + mplen += xfer; + mbuf_setlen(mp, mplen); left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; - uiop->uio_resid -= xfer; + uio_uio_resid_add(uiop, -xfer); } - uiop->uio_iov->iov_base += uiosiz; - uiop->uio_iov->iov_len -= uiosiz; + uio_iov_base_add(uiop, uiosiz); + uio_iov_len_add(uiop, -uiosiz); siz -= uiosiz; } if (rem > 0) { - if (rem > M_TRAILINGSPACE(mp)) { - MGET(mp, M_WAIT, MT_DATA); - mp->m_len = 0; - mp2->m_next = mp; + if (rem > mbuf_trailingspace(mp)) { + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); + if (!error) + error = mbuf_setnext(mp2, mp); + if (error) + return (error); + mplen = 0; } - cp = mtod(mp, caddr_t)+mp->m_len; + cp = (caddr_t)mbuf_data(mp) + mplen; for (left = 0; left < rem; left++) *cp++ = '\0'; - mp->m_len += rem; + mplen += rem; + mbuf_setlen(mp, mplen); *bpos = cp; - } else - *bpos = mtod(mp, caddr_t)+mp->m_len; + } else { + *bpos = (caddr_t)mbuf_data(mp) + mplen; + } *mq = mp; return (0); } @@ -979,60 +986,73 @@ nfsm_uiotombuf(uiop, mq, siz, bpos) */ int nfsm_disct(mdp, dposp, siz, left, cp2) - struct mbuf **mdp; + mbuf_t *mdp; caddr_t *dposp; int siz; int left; caddr_t *cp2; { - register struct mbuf *mp, *mp2; - register int siz2, xfer; - register caddr_t p; + mbuf_t mp, mp2; + int siz2, xfer, error, mp2len; + caddr_t p, mp2data; mp = *mdp; while (left == 0) { - *mdp = mp = mp->m_next; + *mdp = mp = mbuf_next(mp); if (mp == NULL) return (EBADRPC); - left = mp->m_len; - *dposp = mtod(mp, caddr_t); + left = mbuf_len(mp); + *dposp = mbuf_data(mp); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; - } else if (mp->m_next == NULL) { + } else if (mbuf_next(mp) == NULL) { return (EBADRPC); - } else if (siz > MHLEN) { + } else if (siz > nfs_mbuf_mhlen) { panic("nfs S too big"); } else { - MGET(mp2, M_WAIT, MT_DATA); - mp2->m_next = mp->m_next; - mp->m_next = mp2; - mp->m_len -= left; + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mp2); + if (error) + return (error); + error = mbuf_setnext(mp2, mbuf_next(mp)); + if (!error) + error = mbuf_setnext(mp, mp2); + if (error) { + mbuf_free(mp2); + return (error); + } + mbuf_setlen(mp, mbuf_len(mp) - left); mp = mp2; - *cp2 = p = mtod(mp, caddr_t); + *cp2 = p = mbuf_data(mp); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz-left; p += left; - mp2 = mp->m_next; + mp2 = mbuf_next(mp); + mp2data = mbuf_data(mp2); + mp2len = mbuf_len(mp2); /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (EBADRPC); - xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; + xfer = (siz2 > mp2len) ? mp2len : siz2; if (xfer > 0) { - bcopy(mtod(mp2, caddr_t), p, xfer); - NFSMADV(mp2, xfer); - mp2->m_len -= xfer; + bcopy(mp2data, p, xfer); + mp2data += xfer; + mp2len -= xfer; + mbuf_setdata(mp2, mp2data, mp2len); p += xfer; siz2 -= xfer; } - if (siz2 > 0) - mp2 = mp2->m_next; + if (siz2 > 0) { + mp2 = mbuf_next(mp2); + mp2data = mbuf_data(mp2); + mp2len = mbuf_len(mp2); + } } - mp->m_len = siz; + mbuf_setlen(mp, siz); *mdp = mp2; - *dposp = mtod(mp2, caddr_t); + *dposp = mp2data; } return (0); } @@ -1042,25 +1062,25 @@ nfsm_disct(mdp, dposp, siz, left, cp2) */ int nfs_adv(mdp, dposp, offs, left) - struct mbuf **mdp; + mbuf_t *mdp; caddr_t *dposp; int offs; int left; { - register struct mbuf *m; - register int s; + mbuf_t m; + int s; m = *mdp; s = left; while (s < offs) { offs -= s; - m = m->m_next; + m = mbuf_next(m); if (m == NULL) return (EBADRPC); - s = m->m_len; + s = mbuf_len(m); } *mdp = m; - *dposp = mtod(m, caddr_t)+offs; + *dposp = (caddr_t)mbuf_data(m) + offs; return (0); } @@ -1069,64 +1089,74 @@ nfs_adv(mdp, dposp, offs, left) */ int nfsm_strtmbuf(mb, bpos, cp, siz) - struct mbuf **mb; + mbuf_t *mb; char **bpos; char *cp; long siz; { - register struct mbuf *m1 = 0, *m2; - long left, xfer, len, tlen; + mbuf_t m1 = NULL, m2; + long left, xfer, len, tlen, mlen; u_long *tl; - int putsize; + int putsize, error; putsize = 1; m2 = *mb; - left = M_TRAILINGSPACE(m2); - if (left > 0) { + left = mbuf_trailingspace(m2); + if (left >= NFSX_UNSIGNED) { tl = ((u_long *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; - m2->m_len += NFSX_UNSIGNED; + len = mbuf_len(m2); + len += NFSX_UNSIGNED; + mbuf_setlen(m2, len); if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; - m2->m_len += left; + len += left; + mbuf_setlen(m2, len); left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { - MGET(m1, M_WAIT, MT_DATA); - if (siz > MLEN) - MCLGET(m1, M_WAIT); - m1->m_len = NFSMSIZ(m1); - m2->m_next = m1; + m1 = NULL; + if (siz > nfs_mbuf_mlen) + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &m1); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &m1); + if (!error) + error = mbuf_setnext(m2, m1); + if (error) + return (error); + mlen = mbuf_maxlen(m1); + mbuf_setlen(m1, mlen); m2 = m1; - tl = mtod(m1, u_long *); + tl = mbuf_data(m1); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); - m1->m_len -= NFSX_UNSIGNED; + mlen -= NFSX_UNSIGNED; + mbuf_setlen(m1, mlen); tlen = NFSX_UNSIGNED; putsize = 0; } - if (siz < m1->m_len) { + if (siz < mlen) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { - xfer = len = m1->m_len; + xfer = len = mlen; } bcopy(cp, (caddr_t) tl, xfer); - m1->m_len = len+tlen; + mbuf_setlen(m1, len + tlen); siz -= xfer; cp += xfer; } *mb = m1; - *bpos = mtod(m1, caddr_t)+m1->m_len; + *bpos = (caddr_t)mbuf_data(m1) + mbuf_len(m1); return (0); } @@ -1134,10 +1164,9 @@ nfsm_strtmbuf(mb, bpos, cp, siz) * Called once to initialize data structures... */ int -nfs_init(vfsp) - struct vfsconf *vfsp; +nfs_init(struct vfsconf *vfsp) { - register int i; + int i; /* * Check to see if major data structures haven't bloated. @@ -1158,6 +1187,7 @@ nfs_init(vfsp) printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); printf("Try unionizing the nu_nickname and nu_flag fields\n"); } + nfs_mount_type = vfsp->vfc_typenum; nfsrtt.pos = 0; rpc_vers = txdr_unsigned(RPC_VER2); @@ -1170,37 +1200,57 @@ nfs_init(vfsp) rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); nfs_prog = txdr_unsigned(NFS_PROG); - nqnfs_prog = txdr_unsigned(NQNFS_PROG); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); + nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { - nfs_iodwant[i] = (struct proc *)0; + nfs_iodwant[i] = NULL; nfs_iodmount[i] = (struct nfsmount *)0; } + /* init nfsiod mutex */ + nfs_iod_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfs_iod_lck_grp_attr); + nfs_iod_lck_grp = lck_grp_alloc_init("nfs_iod", nfs_iod_lck_grp_attr); + nfs_iod_lck_attr = lck_attr_alloc_init(); + nfs_iod_mutex = lck_mtx_alloc_init(nfs_iod_lck_grp, nfs_iod_lck_attr); + nfs_nbinit(); /* Init the nfsbuf table */ nfs_nhinit(); /* Init the nfsnode table */ nfs_lockinit(); /* Init the nfs lock state */ + #ifndef NFS_NOSERVER + /* init nfsd mutex */ + nfsd_lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(nfsd_lck_grp_attr); + nfsd_lck_grp = lck_grp_alloc_init("nfsd", nfsd_lck_grp_attr); + nfsd_lck_attr = lck_attr_alloc_init(); + nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, nfsd_lck_attr); + + /* init slp rwlock */ + nfs_slp_lock_attr = lck_attr_alloc_init(); + nfs_slp_group_attr = lck_grp_attr_alloc_init(); + nfs_slp_rwlock_group = lck_grp_alloc_init("nfs-slp-rwlock", nfs_slp_group_attr); + nfs_slp_mutex_group = lck_grp_alloc_init("nfs-slp-mutex", nfs_slp_group_attr); + + /* init export data structures */ + nfsexphashtbl = hashinit(8, M_TEMP, &nfsexphash); + LIST_INIT(&nfs_exports); + nfs_export_lock_attr = lck_attr_alloc_init(); + nfs_export_group_attr = lck_grp_attr_alloc_init(); + nfs_export_rwlock_group = lck_grp_alloc_init("nfs-export-rwlock", nfs_export_group_attr); + lck_rw_init(&nfs_export_rwlock, nfs_export_rwlock_group, nfs_export_lock_attr); + + lck_mtx_lock(nfsd_mutex); nfsrv_init(0); /* Init server data structures */ nfsrv_initcache(); /* Init the server request cache */ + lck_mtx_unlock(nfsd_mutex); #endif - /* - * Initialize the nqnfs server stuff. - */ - if (nqnfsstarttime == 0) { - nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease - + nqsrv_clockskew + nqsrv_writeslack; - NQLOADNOVRAM(nqnfsstarttime); - CIRCLEQ_INIT(&nqtimerhead); - nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); - } - /* * Initialize reply list and start timer */ @@ -1208,83 +1258,52 @@ nfs_init(vfsp) nfs_timer(0); - -/* XXX CSM 12/4/97 Where are these declared in FreeBSD? */ -#ifdef notyet - /* - * Set up lease_check and lease_updatetime so that other parts - * of the system can call us, if we are loadable. - */ -#ifndef NFS_NOSERVER - default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check; -#endif - lease_updatetime = nfs_lease_updatetime; -#endif vfsp->vfc_refcount++; /* make us non-unloadable */ return (0); } /* - * Attribute cache routines. - * nfs_loadattrcache() - loads or updates the cache contents from attributes - * that are on the mbuf list - * nfs_getattrcache() - returns valid attributes if found in cache, returns - * error otherwise + * initialize NFS's cache of mbuf constants */ +void +nfs_mbuf_init(void) +{ + struct mbuf_stat ms; + + mbuf_stats(&ms); + nfs_mbuf_mlen = ms.mlen; + nfs_mbuf_mhlen = ms.mhlen; + nfs_mbuf_minclsize = ms.minclsize; + nfs_mbuf_mclbytes = ms.mclbytes; +} /* - * Load the attribute cache (that lives in the nfsnode entry) with - * the values on the mbuf list and - * Iff vap not NULL - * copy the attributes to *vaper + * Parse the attributes that are in the mbuf list and store them in *nvap. */ int -nfs_loadattrcache(vpp, mdp, dposp, vaper, dontshrink, xidp) - struct vnode **vpp; - struct mbuf **mdp; - caddr_t *dposp; - struct vattr *vaper; - int dontshrink; - u_int64_t *xidp; +nfs_parsefattr(mbuf_t *mdp, caddr_t *dposp, int v3, struct nfs_vattr *nvap) { - register struct vnode *vp = *vpp; - register struct vattr *vap; - register struct nfs_fattr *fp; - register struct nfsnode *np; - register long t1; + struct nfs_fattr *fp; + long t1; caddr_t cp2; int error = 0, rdev; - struct mbuf *md; - enum vtype vtyp; + mbuf_t md; + enum vtype vtype; u_short vmode; - struct timespec mtime; - struct timeval now; - struct vnode *nvp; - int v3; - - FSDBG_TOP(527, vp, 0, *xidp >> 32, *xidp); - if (!VFSTONFS(vp->v_mount)) { - FSDBG_BOT(527, ENXIO, 1, 0, *xidp); - return (ENXIO); - } - - v3 = NFS_ISV3(vp); md = *mdp; - t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; + t1 = ((caddr_t)mbuf_data(md) + mbuf_len(md)) - *dposp; if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2))) { - FSDBG_BOT(527, error, 2, 0, *xidp); return (error); } fp = (struct nfs_fattr *)cp2; if (v3) { - vtyp = nfsv3tov_type(fp->fa_type); + vtype = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); - fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { - vtyp = nfsv2tov_type(fp->fa_type); + vtype = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX @@ -1305,26 +1324,79 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper, dontshrink, xidp) * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ - if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) - vtyp = IFTOVT(vmode); + if (vtype == VNON || (vtype == VREG && (vmode & S_IFMT) != 0)) + vtype = IFTOVT(vmode); rdev = fxdr_unsigned(long, fp->fa2_rdev); - fxdr_nfsv2time(&fp->fa2_mtime, &mtime); - /* * Really ugly NFSv2 kludge. */ - if (vtyp == VCHR && rdev == 0xffffffff) - vtyp = VFIFO; + if (vtype == VCHR && rdev == (int)0xffffffff) + vtype = VFIFO; + } + + nvap->nva_type = vtype; + nvap->nva_mode = (vmode & 07777); + nvap->nva_rdev = (dev_t)rdev; + nvap->nva_nlink = (uint64_t)fxdr_unsigned(u_long, fp->fa_nlink); + nvap->nva_uid = fxdr_unsigned(uid_t, fp->fa_uid); + nvap->nva_gid = fxdr_unsigned(gid_t, fp->fa_gid); + if (v3) { + fxdr_hyper(&fp->fa3_size, &nvap->nva_size); + nvap->nva_blocksize = 16*1024; + fxdr_hyper(&fp->fa3_used, &nvap->nva_bytes); + fxdr_hyper(&fp->fa3_fileid, &nvap->nva_fileid); + fxdr_nfsv3time(&fp->fa3_atime, &nvap->nva_atime); + fxdr_nfsv3time(&fp->fa3_mtime, &nvap->nva_mtime); + fxdr_nfsv3time(&fp->fa3_ctime, &nvap->nva_ctime); + } else { + nvap->nva_size = fxdr_unsigned(u_long, fp->fa2_size); + nvap->nva_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); + nvap->nva_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; + nvap->nva_fileid = (uint64_t)fxdr_unsigned(u_long, fp->fa2_fileid); + fxdr_nfsv2time(&fp->fa2_atime, &nvap->nva_atime); + fxdr_nfsv2time(&fp->fa2_mtime, &nvap->nva_mtime); + fxdr_nfsv2time(&fp->fa2_ctime, &nvap->nva_ctime); + } + + return (0); +} + +/* + * Load the attribute cache (that lives in the nfsnode entry) with + * the value pointed to by nvap, unless the file type in the attribute + * cache doesn't match the file type in the nvap, in which case log a + * warning and return ESTALE. + * + * If the dontshrink flag is set, then it's not safe to call ubc_setsize() + * to shrink the size of the file. + */ +int +nfs_loadattrcache( + struct nfsnode *np, + struct nfs_vattr *nvap, + u_int64_t *xidp, + int dontshrink) +{ + mount_t mp; + vnode_t vp; + struct timeval now; + struct nfs_vattr *npnvap; + + if (np->n_flag & NINIT) { + vp = NULL; + mp = np->n_mount; + } else { + vp = NFSTOV(np); + mp = vnode_mount(vp); + } + + FSDBG_TOP(527, vp, np, *xidp >> 32, *xidp); + + if (!VFSTONFS(mp)) { + FSDBG_BOT(527, ENXIO, 1, 0, *xidp); + return (ENXIO); } - /* - * If v_type == VNON it is a new node, so fill in the v_type, - * n_mtime fields. Check to see if it represents a special - * device, and if so, check for a possible alias. Once the - * correct vnode has been obtained, fill in the rest of the - * information. - */ - np = VTONFS(vp); if (*xidp < np->n_xid) { /* * We have already updated attributes with a response from @@ -1336,407 +1408,417 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper, dontshrink, xidp) * to indicate the attributes were dropped - only getattr * cares - it needs to retry the rpc. */ - np->n_xid = 0; + NATTRINVALIDATE(np); FSDBG_BOT(527, 0, np, np->n_xid, *xidp); *xidp = 0; return (0); } - if (vp->v_type != vtyp) { - if (vp->v_type != VNON) { - /* - * The filehandle has changed type on us. This can be - * caused by either the server not having unique filehandles - * or because another client has removed the previous - * filehandle and a new object (of a different type) - * has been created with the same filehandle. - * - * We can't simply switch the type on the vnode because - * there may be type-specific fields that need to be - * cleaned up or set up. - * - * So, what should we do with this vnode? - * - * About the best we can do is log a warning and return - * an error. ESTALE is about the closest error, but it - * is a little strange that we come up with this error - * internally instead of simply passing it through from - * the server. Hopefully, the vnode will be reclaimed - * soon so the filehandle can be reincarnated as the new - * object type. - */ - printf("nfs loadattrcache vnode changed type, was %d now %d", vp->v_type, vtyp); - FSDBG_BOT(527, ESTALE, 3, 0, *xidp); - return (ESTALE); - } else { - vp->v_type = vtyp; - } - if (vp->v_type == VFIFO) { - vp->v_op = fifo_nfsv2nodeop_p; - } - if (vp->v_type == VCHR || vp->v_type == VBLK) { - vp->v_op = spec_nfsv2nodeop_p; - nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); - if (nvp) { - /* - * Discard unneeded vnode, but save its nfsnode. - * Since the nfsnode does not have a lock, its - * vnode lock has to be carried over. - */ - nvp->v_vnlock = vp->v_vnlock; - vp->v_vnlock = NULL; - nvp->v_data = vp->v_data; - vp->v_data = NULL; - vp->v_op = spec_vnodeop_p; - vrele(vp); - vgone(vp); - /* - * Reinitialize aliased node. - */ - np->n_vnode = nvp; - *vpp = vp = nvp; - } - } - np->n_mtime = mtime.tv_sec; - if (vp->v_type == VDIR) - np->n_ncmtime = mtime.tv_sec; - FSDBG(527, vp, np->n_mtime, 0, 0); - } - np->n_xid = *xidp; - vap = &np->n_vattr; - vap->va_type = vtyp; - vap->va_mode = (vmode & 07777); - vap->va_rdev = (dev_t)rdev; - vap->va_mtime = mtime; - vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; - if (v3) { - vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); - vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); - vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); - fxdr_hyper(&fp->fa3_size, &vap->va_size); - vap->va_blocksize = 16*1024; - fxdr_hyper(&fp->fa3_used, &vap->va_bytes); - vap->va_fileid = fxdr_unsigned(int, fp->fa3_fileid.nfsuquad[1]); - fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); - fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); - vap->va_flags = 0; - vap->va_filerev = 0; - } else { - vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); - vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); - vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); - vap->va_size = fxdr_unsigned(u_long, fp->fa2_size); - vap->va_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); - vap->va_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; - vap->va_fileid = fxdr_unsigned(long, fp->fa2_fileid); - fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); - vap->va_flags = 0; - vap->va_ctime.tv_sec = fxdr_unsigned(long, fp->fa2_ctime.nfsv2_sec); - vap->va_ctime.tv_nsec = 0; - vap->va_gen = fxdr_unsigned(u_long, fp->fa2_ctime.nfsv2_usec); - vap->va_filerev = 0; + if (vp && (nvap->nva_type != vnode_vtype(vp))) { + /* + * The filehandle has changed type on us. This can be + * caused by either the server not having unique filehandles + * or because another client has removed the previous + * filehandle and a new object (of a different type) + * has been created with the same filehandle. + * + * We can't simply switch the type on the vnode because + * there may be type-specific fields that need to be + * cleaned up or set up. + * + * So, what should we do with this vnode? + * + * About the best we can do is log a warning and return + * an error. ESTALE is about the closest error, but it + * is a little strange that we come up with this error + * internally instead of simply passing it through from + * the server. Hopefully, the vnode will be reclaimed + * soon so the filehandle can be reincarnated as the new + * object type. + */ + printf("nfs loadattrcache vnode changed type, was %d now %d\n", + vnode_vtype(vp), nvap->nva_type); + FSDBG_BOT(527, ESTALE, 3, 0, *xidp); + return (ESTALE); } microuptime(&now); np->n_attrstamp = now.tv_sec; + np->n_xid = *xidp; - if (UBCINFOMISSING(vp) || UBCINFORECLAIMED(vp)) { - if (UBCINFORECLAIMED(vp) && ISSET(vp->v_flag, (VXLOCK|VORECLAIM))) { - // vnode is being vclean'ed, abort - FSDBG_BOT(527, ENXIO, 1, 0, *xidp); - return (ENXIO); - } - if ((error = ubc_info_init(vp))) { /* VREG */ - FSDBG_BOT(527, error, 3, 0, *xidp); - return(error); - } - } - - if (vap->va_size != np->n_size) { - FSDBG(527, vp, vap->va_size, np->n_size, - (vap->va_type == VREG) | - (np->n_flag & NMODIFIED ? 6 : 4)); - if (vap->va_type == VREG) { - int orig_size; + npnvap = &np->n_vattr; + nvap->nva_fsid = vfs_statfs(mp)->f_fsid.val[0]; + bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); - orig_size = np->n_size; - if (np->n_flag & NMODIFIED) { - if (vap->va_size < np->n_size) - vap->va_size = np->n_size; - else - np->n_size = vap->va_size; + if (vp) { + if (nvap->nva_size != np->n_size) { + FSDBG(527, vp, nvap->nva_size, np->n_size, + (nvap->nva_type == VREG) | + (np->n_flag & NMODIFIED ? 6 : 4)); + if (nvap->nva_type == VREG) { + int orig_size = np->n_size; + if (np->n_flag & NMODIFIED) { + if (nvap->nva_size < np->n_size) + nvap->nva_size = np->n_size; + else + np->n_size = nvap->nva_size; + } else + np->n_size = nvap->nva_size; + if (!UBCINFOEXISTS(vp) || + (dontshrink && np->n_size < (u_quad_t)ubc_getsize(vp))) { + nvap->nva_size = np->n_size = orig_size; + NATTRINVALIDATE(np); + } else { + ubc_setsize(vp, (off_t)np->n_size); /* XXX */ + } } else - np->n_size = vap->va_size; - if (!UBCINFOEXISTS(vp) || - dontshrink && np->n_size < ubc_getsize(vp)) { - vap->va_size = np->n_size = orig_size; - np->n_xid = 0; - } else { - ubc_setsize(vp, (off_t)np->n_size); /* XXX */ - } - } else - np->n_size = vap->va_size; + np->n_size = nvap->nva_size; + } + } else { + np->n_size = nvap->nva_size; } - if (vaper != NULL) { - bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); - if (np->n_flag & NCHG) { - if (np->n_flag & NACC) - vaper->va_atime = np->n_atim; - if (np->n_flag & NUPD) - vaper->va_mtime = np->n_mtim; - } + if (np->n_flag & NCHG) { + if (np->n_flag & NACC) + nvap->nva_atime = np->n_atim; + if (np->n_flag & NUPD) + nvap->nva_mtime = np->n_mtim; } + FSDBG_BOT(527, 0, np, 0, *xidp); return (0); } /* - * Check the time stamp - * If the cache is valid, copy contents to *vap and return 0 - * otherwise return an error + * Calculate the attribute timeout based on + * how recently the file has been modified. */ int -nfs_getattrcache(vp, vaper) - register struct vnode *vp; - struct vattr *vaper; +nfs_attrcachetimeout(vnode_t vp) { - register struct nfsnode *np = VTONFS(vp); - register struct vattr *vap; - struct timeval now, nowup; - int32_t timeo; + struct nfsnode *np = VTONFS(vp); + struct nfsmount *nmp; + struct timeval now; + int isdir, timeo; - if (np->n_xid == 0) { - FSDBG(528, vp, 0, 0, 0); - nfsstats.attrcache_misses++; - return (ENOENT); - } + if (!(nmp = VFSTONFS(vnode_mount(vp)))) + return (0); + + isdir = vnode_isdir(vp); - /* Set attribute timeout based on how recently the file has been modified. */ if ((np)->n_flag & NMODIFIED) - timeo = NFS_MINATTRTIMO; + timeo = isdir ? nmp->nm_acdirmin : nmp->nm_acregmin; else { /* Note that if the client and server clocks are way out of sync, */ /* timeout will probably get clamped to a min or max value */ microtime(&now); - timeo = (now.tv_sec - (np)->n_mtime) / 10; - if (timeo < NFS_MINATTRTIMO) - timeo = NFS_MINATTRTIMO; - else if (timeo > NFS_MAXATTRTIMO) - timeo = NFS_MAXATTRTIMO; + timeo = (now.tv_sec - (np)->n_mtime.tv_sec) / 10; + if (isdir) { + if (timeo < nmp->nm_acdirmin) + timeo = nmp->nm_acdirmin; + else if (timeo > nmp->nm_acdirmax) + timeo = nmp->nm_acdirmax; + } else { + if (timeo < nmp->nm_acregmin) + timeo = nmp->nm_acregmin; + else if (timeo > nmp->nm_acregmax) + timeo = nmp->nm_acregmax; + } } + return (timeo); +} + +/* + * Check the time stamp + * If the cache is valid, copy contents to *nvaper and return 0 + * otherwise return an error + */ +int +nfs_getattrcache(vp, nvaper) + vnode_t vp; + struct nfs_vattr *nvaper; +{ + struct nfsnode *np = VTONFS(vp); + struct nfs_vattr *nvap; + struct timeval nowup; + int32_t timeo; + + if (!NATTRVALID(np)) { + FSDBG(528, vp, 0, 0, 0); + OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); + return (ENOENT); + } + + timeo = nfs_attrcachetimeout(vp); + microuptime(&nowup); if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { FSDBG(528, vp, 0, 0, 1); - nfsstats.attrcache_misses++; + OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); return (ENOENT); } FSDBG(528, vp, 0, 0, 2); - nfsstats.attrcache_hits++; - vap = &np->n_vattr; + OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_hits); + nvap = &np->n_vattr; - if (vap->va_size != np->n_size) { - FSDBG(528, vp, vap->va_size, np->n_size, - (vap->va_type == VREG) | + if (nvap->nva_size != np->n_size) { + FSDBG(528, vp, nvap->nva_size, np->n_size, + (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); - if (vap->va_type == VREG) { + if (nvap->nva_type == VREG) { if (np->n_flag & NMODIFIED) { - if (vap->va_size < np->n_size) - vap->va_size = np->n_size; + if (nvap->nva_size < np->n_size) + nvap->nva_size = np->n_size; else - np->n_size = vap->va_size; + np->n_size = nvap->nva_size; } else - np->n_size = vap->va_size; + np->n_size = nvap->nva_size; ubc_setsize(vp, (off_t)np->n_size); /* XXX */ } else - np->n_size = vap->va_size; + np->n_size = nvap->nva_size; } - bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); + bcopy((caddr_t)nvap, (caddr_t)nvaper, sizeof(struct nfs_vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) - vaper->va_atime = np->n_atim; + nvaper->nva_atime = np->n_atim; if (np->n_flag & NUPD) - vaper->va_mtime = np->n_mtim; + nvaper->nva_mtime = np->n_mtim; } return (0); } #ifndef NFS_NOSERVER /* - * Set up nameidata for a lookup() call and do it. - * - * If pubflag is set, this call is done for a lookup operation on the - * public filehandle. In that case we allow crossing mountpoints and - * absolute pathnames. However, the caller is expected to check that - * the lookup result is within the public fs, and deny access if - * it is not. + * Extract a lookup path from the given mbufs and store it in + * a newly allocated buffer saved in the given nameidata structure. + * exptected string length given as *lenp and final string length + * (after any WebNFS processing) is returned in *lenp. */ int -nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag) - register struct nameidata *ndp; - fhandle_t *fhp; - int len; - struct nfssvc_sock *slp; - struct mbuf *nam; - struct mbuf **mdp; - caddr_t *dposp; - struct vnode **retdirp; - struct proc *p; - int kerbflag, pubflag; +nfsm_path_mbuftond( + mbuf_t *mdp, + caddr_t *dposp, + __unused int v3, + __unused int pubflag, + int* lenp, + struct nameidata *ndp) { - register int i, rem; - register struct mbuf *md; - register char *fromcp, *tocp, *cp; - struct iovec aiov; - struct uio auio; - struct vnode *dp; - int error, rdonly, linklen; + int i, len, len2, rem, error = 0; + mbuf_t md; + char *fromcp, *tocp; struct componentname *cnp = &ndp->ni_cnd; - int olen = len; - char *tmppn; - - *retdirp = (struct vnode *)0; +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED + int webcnt = 0, digitcnt = 0; + char hexdigits[2]; +#endif - if (len > MAXPATHLEN - 1) + len = *lenp; + if (len > (MAXPATHLEN - 1)) return (ENAMETOOLONG); - MALLOC_ZONE(cnp->cn_pnbuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + /* + * Get a buffer for the name to be translated, and copy the + * name into the buffer. + */ + MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!cnp->cn_pnbuf) + return (ENOMEM); cnp->cn_pnlen = MAXPATHLEN; + cnp->cn_flags |= HASBUF; /* - * Copy the name from the mbuf list to ndp->ni_pnbuf - * and set the various ndp fields appropriately. + * Copy the name from the mbuf list to the string + * + * Along the way, take note of any WebNFS characters + * and convert any % escapes. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; - rem = mtod(md, caddr_t) + md->m_len - fromcp; - cnp->cn_hash = 0; + rem = (caddr_t)mbuf_data(md) + mbuf_len(md) - fromcp; for (i = 1; i <= len; i++) { while (rem == 0) { - md = md->m_next; + md = mbuf_next(md); if (md == NULL) { error = EBADRPC; goto out; } - fromcp = mtod(md, caddr_t); - rem = md->m_len; + fromcp = mbuf_data(md); + rem = mbuf_len(md); } -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notdef - if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED + if (pubflag) { + if ((i == 1) && ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START)) { + switch ((unsigned char)*fromcp) { + case WEBNFS_NATIVE_CHAR: + /* + * 'Native' path for us is the same + * as a path according to the NFS spec, + * just skip the escape char. + */ + webcnt++; + fromcp++; + rem--; + /* next iteration of for loop */ + continue; + /* + * More may be added in the future, range 0x80-0xff. + * Don't currently support security query lookup (0x81). + */ + default: + error = EIO; + goto out; + } + } + if (digitcnt) { + /* We're expecting hex digits */ + if (!ISHEX(*fromcp)) { + error = ENOENT; + goto out; + } + digitcnt--; + hexdigits[digitcnt ? 0 : 1] = *fromcp++; + if (!digitcnt) + *tocp++ = HEXSTRTOI(hexdigits); + rem--; + /* next iteration of for loop */ + continue; + } else if (*fromcp == WEBNFS_ESC_CHAR) { + /* + * We can't really look at the next couple + * bytes here safely/easily, so we note that + * the next two characters should be hex + * digits and later save them in hexdigits[]. + * When we've got both, we'll convert it. + */ + digitcnt = 2; + webcnt += 2; + fromcp++; + rem--; + /* next iteration of for loop */ + continue; + } + } + if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) #else - if (*fromcp == '\0' || *fromcp == '/') { + if (*fromcp == '\0' || *fromcp == '/') #endif + { error = EACCES; goto out; } - cnp->cn_hash += (unsigned char)*fromcp * i; *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; - len = nfsm_rndup(len)-len; - if (len > 0) { - if (rem >= len) - *dposp += len; - else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) + len2 = nfsm_rndup(len)-len; + if (len2 > 0) { + if (rem >= len2) + *dposp += len2; + else if ((error = nfs_adv(mdp, dposp, len2, rem)) != 0) goto out; } +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED + if (pubflag) { + if (digitcnt) { + /* The string ended in the middle of an escape! */ + error = ENOENT; + goto out; + } + len -= webcnt; + } +#endif + +out: + if (error) { + if (cnp->cn_pnbuf) + FREE_ZONE(cnp->cn_pnbuf, MAXPATHLEN, M_NAMEI); + cnp->cn_flags &= ~HASBUF; + } else { + ndp->ni_pathlen = len; + *lenp = len; + } + return (error); +} + +/* + * Set up nameidata for a lookup() call and do it. + * + * If pubflag is set, this call is done for a lookup operation on the + * public filehandle. In that case we allow crossing mountpoints and + * absolute pathnames. However, the caller is expected to check that + * the lookup result is within the public fs, and deny access if + * it is not. + */ +int +nfs_namei( + struct nfsrv_descript *nfsd, + struct vfs_context *ctx, + struct nameidata *ndp, + struct nfs_filehandle *nfhp, + mbuf_t nam, + int pubflag, + vnode_t *retdirp, + struct nfs_export **nxp, + struct nfs_export_options **nxop) +{ +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED + char *cp; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + int linklen, olen = ndp->ni_pathlen; +#endif + vnode_t dp; + int error; + struct componentname *cnp = &ndp->ni_cnd; + char *tmppn; + + *retdirp = NULL; + /* * Extract and set starting directory. */ - error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, - nam, &rdonly, kerbflag, pubflag); + error = nfsrv_fhtovp(nfhp, nam, pubflag, &dp, nxp, nxop); if (error) goto out; - if (dp->v_type != VDIR) { - vrele(dp); + error = nfsrv_credcheck(nfsd, *nxp, *nxop); + if (error || (vnode_vtype(dp) != VDIR)) { + vnode_put(dp); error = ENOTDIR; goto out; } - if (rdonly) + ctx->vc_ucred = nfsd->nd_cr; + ndp->ni_cnd.cn_context = ctx; + + if (*nxop && ((*nxop)->nxo_flags & NX_READONLY)) cnp->cn_flags |= RDONLY; *retdirp = dp; -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet - if (pubflag) { - /* - * Oh joy. For WebNFS, handle those pesky '%' escapes, - * and the 'native path' indicator. - */ - - assert(olen <= MAXPATHLEN - 1); - - MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - fromcp = cnp->cn_pnbuf; - tocp = cp; - if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { - switch ((unsigned char)*fromcp) { - case WEBNFS_NATIVE_CHAR: - /* - * 'Native' path for us is the same - * as a path according to the NFS spec, - * just skip the escape char. - */ - fromcp++; - break; - /* - * More may be added in the future, range 0x80-0xff - */ - default: - error = EIO; - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - goto out; - } - } - /* - * Translate the '%' escapes, URL-style. - */ - while (*fromcp != '\0') { - if (*fromcp == WEBNFS_ESC_CHAR) { - if (fromcp[1] != '\0' && fromcp[2] != '\0') { - fromcp++; - *tocp++ = HEXSTRTOI(fromcp); - fromcp += 2; - continue; - } else { - error = ENOENT; - FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); - goto out; - } - } else - *tocp++ = *fromcp++; - } - *tocp = '\0'; - - tmppn = cnp->cn_pnbuf; - long len = cnp->cn_pnlen; - cnp->cn_pnbuf = cp; - cnp->cn_pnlen = MAXPATHLEN; - FREE_ZONE(tmppn, len, M_NAMEI); - - } -#endif - - ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; - ndp->ni_segflg = UIO_SYSSPACE; - -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED if (pubflag) { ndp->ni_rootdir = rootvnode; ndp->ni_loopcnt = 0; - if (cnp->cn_pnbuf[0] == '/') + if (cnp->cn_pnbuf[0] == '/') { + vnode_put(dp); dp = rootvnode; + error = vnode_get(dp); + if (error) { + *retdirp = NULL; + goto out; + } + } } else { cnp->cn_flags |= NOCROSSMOUNT; } @@ -1744,8 +1826,7 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag) cnp->cn_flags |= NOCROSSMOUNT; #endif - cnp->cn_proc = p; - VREF(dp); + ndp->ni_usedvp = dp; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; @@ -1760,54 +1841,67 @@ nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag) * Check for encountering a symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { - nfsrv_object_create(ndp->ni_vp); - if (cnp->cn_flags & (SAVENAME | SAVESTART)) { - cnp->cn_flags |= HASBUF; - return (0); - } - break; + return (0); } else { - if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) - VOP_UNLOCK(ndp->ni_dvp, 0, p); -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED if (!pubflag) { #endif - vrele(ndp->ni_dvp); - vput(ndp->ni_vp); - ndp->ni_vp = NULL; + if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) + vnode_put(ndp->ni_dvp); + if (ndp->ni_vp) { + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULL; + } error = EINVAL; break; -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULL; error = ELOOP; break; } /* XXX assert(olen <= MAXPATHLEN - 1); */ - if (ndp->ni_pathlen > 1) + if (ndp->ni_pathlen > 1) { MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - else + if (!cp) { + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULL; + error = ENOMEM; + break; + } + } else { cp = cnp->cn_pnbuf; - aiov.iov_base = cp; - aiov.iov_len = MAXPATHLEN; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - auio.uio_resid = MAXPATHLEN; - error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); + } + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULL; + if (ndp->ni_pathlen > 1) + FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + error = ENOMEM; + break; + } + uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); + error = VNOP_READLINK(ndp->ni_vp, auio, cnp->cn_context); if (error) { badlink: + vnode_put(ndp->ni_vp); + ndp->ni_vp = NULL; if (ndp->ni_pathlen > 1) FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); break; } - linklen = MAXPATHLEN - auio.uio_resid; + linklen = MAXPATHLEN - uio_resid(auio); if (linklen == 0) { error = ENOENT; goto badlink; @@ -1826,15 +1920,20 @@ badlink: } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; - vput(ndp->ni_vp); + + vnode_put(ndp->ni_vp); dp = ndp->ni_dvp; + ndp->ni_dvp = NULL; + /* * Check if root directory should replace current directory. */ if (cnp->cn_pnbuf[0] == '/') { - vrele(dp); + vnode_put(dp); dp = ndp->ni_rootdir; - VREF(dp); + error = vnode_get(dp); + if (error) + break; } #endif } @@ -1854,13 +1953,13 @@ out: */ void nfsm_adj(mp, len, nul) - struct mbuf *mp; - register int len; + mbuf_t mp; + int len; int nul; { - register struct mbuf *m; - register int count, i; - register char *cp; + mbuf_t m, mnext; + int count, i, mlen; + char *cp; /* * Trim from tail. Scan the mbuf chain, @@ -1872,15 +1971,18 @@ nfsm_adj(mp, len, nul) count = 0; m = mp; for (;;) { - count += m->m_len; - if (m->m_next == (struct mbuf *)0) + mlen = mbuf_len(m); + count += mlen; + mnext = mbuf_next(m); + if (mnext == NULL) break; - m = m->m_next; + m = mnext; } - if (m->m_len > len) { - m->m_len -= len; + if (mlen > len) { + mlen -= len; + mbuf_setlen(m, mlen); if (nul > 0) { - cp = mtod(m, caddr_t)+m->m_len-nul; + cp = (caddr_t)mbuf_data(m) + mlen - nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } @@ -1894,20 +1996,22 @@ nfsm_adj(mp, len, nul) * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ - for (m = mp; m; m = m->m_next) { - if (m->m_len >= count) { - m->m_len = count; + for (m = mp; m; m = mbuf_next(m)) { + mlen = mbuf_len(m); + if (mlen >= count) { + mlen = count; + mbuf_setlen(m, count); if (nul > 0) { - cp = mtod(m, caddr_t)+m->m_len-nul; + cp = (caddr_t)mbuf_data(m) + mlen - nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } - count -= m->m_len; + count -= mlen; } - for (m = m->m_next;m;m = m->m_next) - m->m_len = 0; + for (m = mbuf_next(m); m; m = mbuf_next(m)) + mbuf_setlen(m, 0); } /* @@ -1918,15 +2022,15 @@ void nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int before_ret; - register struct vattr *before_vap; + struct vnode_attr *before_vap; int after_ret; - struct vattr *after_vap; - struct mbuf **mbp; + struct vnode_attr *after_vap; + mbuf_t *mbp; char **bposp; { - register struct mbuf *mb = *mbp, *mb2; - register char *bpos = *bposp; - register u_long *tl; + mbuf_t mb = *mbp, mb2; + char *bpos = *bposp; + u_long *tl; if (before_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); @@ -1934,11 +2038,11 @@ nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) } else { nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); *tl++ = nfs_true; - txdr_hyper(&(before_vap->va_size), tl); + txdr_hyper(&(before_vap->va_data_size), tl); tl += 2; - txdr_nfsv3time(&(before_vap->va_mtime), tl); + txdr_nfsv3time(&(before_vap->va_modify_time), tl); tl += 2; - txdr_nfsv3time(&(before_vap->va_ctime), tl); + txdr_nfsv3time(&(before_vap->va_change_time), tl); } *bposp = bpos; *mbp = mb; @@ -1949,14 +2053,14 @@ void nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int after_ret; - struct vattr *after_vap; - struct mbuf **mbp; + struct vnode_attr *after_vap; + mbuf_t *mbp; char **bposp; { - register struct mbuf *mb = *mbp, *mb2; - register char *bpos = *bposp; - register u_long *tl; - register struct nfs_fattr *fp; + mbuf_t mb = *mbp, mb2; + char *bpos = *bposp; + u_long *tl; + struct nfs_fattr *fp; if (after_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); @@ -1973,117 +2077,640 @@ nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) void nfsm_srvfattr(nfsd, vap, fp) - register struct nfsrv_descript *nfsd; - register struct vattr *vap; - register struct nfs_fattr *fp; + struct nfsrv_descript *nfsd; + struct vnode_attr *vap; + struct nfs_fattr *fp; { + // XXX Should we assert here that all fields are supported? + fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); - txdr_hyper(&vap->va_size, &fp->fa3_size); - txdr_hyper(&vap->va_bytes, &fp->fa3_used); + txdr_hyper(&vap->va_data_size, &fp->fa3_size); + txdr_hyper(&vap->va_data_alloc, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); - fp->fa3_fileid.nfsuquad[0] = 0; - fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); - txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); - txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); - txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); + txdr_hyper(&vap->va_fileid, &fp->fa3_fileid); + txdr_nfsv3time(&vap->va_access_time, &fp->fa3_atime); + txdr_nfsv3time(&vap->va_modify_time, &fp->fa3_mtime); + txdr_nfsv3time(&vap->va_change_time, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); - fp->fa2_size = txdr_unsigned(vap->va_size); - fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); + fp->fa2_size = txdr_unsigned(vap->va_data_size); + fp->fa2_blocksize = txdr_unsigned(vap->va_iosize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); - fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); + fp->fa2_blocks = txdr_unsigned(vap->va_data_alloc / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); - txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); - txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); - txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); + txdr_nfsv2time(&vap->va_access_time, &fp->fa2_atime); + txdr_nfsv2time(&vap->va_modify_time, &fp->fa2_mtime); + txdr_nfsv2time(&vap->va_change_time, &fp->fa2_ctime); + } +} + +/* + * Build hash lists of net addresses and hang them off the NFS export. + * Called by nfsrv_export() to set up the lists of export addresses. + */ +static int +nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) +{ + struct nfs_export_net_args nxna; + struct nfs_netopt *no; + struct radix_node_head *rnh; + struct radix_node *rn; + struct sockaddr *saddr, *smask; + struct domain *dom; + int i, error; + unsigned int net; + user_addr_t uaddr; + kauth_cred_t cred; + struct ucred temp_cred; + + uaddr = unxa->nxa_nets; + for (net = 0; net < unxa->nxa_netcount; net++, uaddr += sizeof(nxna)) { + error = copyin(uaddr, &nxna, sizeof(nxna)); + if (error) + return (error); + + if (nxna.nxna_flags & (NX_MAPROOT|NX_MAPALL)) { + bzero(&temp_cred, sizeof(temp_cred)); + temp_cred.cr_uid = nxna.nxna_cred.cr_uid; + temp_cred.cr_ngroups = nxna.nxna_cred.cr_ngroups; + for (i=0; i < nxna.nxna_cred.cr_ngroups && i < NGROUPS; i++) + temp_cred.cr_groups[i] = nxna.nxna_cred.cr_groups[i]; + + cred = kauth_cred_create(&temp_cred); + if (!cred) + return (ENOMEM); + } else { + cred = NULL; + } + + if (nxna.nxna_addr.ss_len == 0) { + /* No address means this is a default/world export */ + if (nx->nx_flags & NX_DEFAULTEXPORT) + return (EEXIST); + nx->nx_flags |= NX_DEFAULTEXPORT; + nx->nx_defopt.nxo_flags = nxna.nxna_flags; + nx->nx_defopt.nxo_cred = cred; + nx->nx_expcnt++; + continue; + } + + i = sizeof(struct nfs_netopt); + i += nxna.nxna_addr.ss_len + nxna.nxna_mask.ss_len; + MALLOC(no, struct nfs_netopt *, i, M_NETADDR, M_WAITOK); + if (!no) + return (ENOMEM); + bzero(no, sizeof(struct nfs_netopt)); + no->no_opt.nxo_flags = nxna.nxna_flags; + no->no_opt.nxo_cred = cred; + + saddr = (struct sockaddr *)(no + 1); + bcopy(&nxna.nxna_addr, saddr, nxna.nxna_addr.ss_len); + if (nxna.nxna_mask.ss_len) { + smask = (struct sockaddr *)((caddr_t)saddr + nxna.nxna_addr.ss_len); + bcopy(&nxna.nxna_mask, smask, nxna.nxna_mask.ss_len); + } else { + smask = NULL; + } + i = saddr->sa_family; + if ((rnh = nx->nx_rtable[i]) == 0) { + /* + * Seems silly to initialize every AF when most are not + * used, do so on demand here + */ + for (dom = domains; dom; dom = dom->dom_next) + if (dom->dom_family == i && dom->dom_rtattach) { + dom->dom_rtattach((void **)&nx->nx_rtable[i], + dom->dom_rtoffset); + break; + } + if ((rnh = nx->nx_rtable[i]) == 0) { + kauth_cred_rele(cred); + _FREE(no, M_NETADDR); + return (ENOBUFS); + } + } + rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, no->no_rnodes); + if (rn == 0) { + /* + * One of the reasons that rnh_addaddr may fail is that + * the entry already exists. To check for this case, we + * look up the entry to see if it is there. If so, we + * do not need to make a new entry but do continue. + */ + int matched = 0; + rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); + if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && + (((struct nfs_netopt *)rn)->no_opt.nxo_flags == nxna.nxna_flags)) { + kauth_cred_t cred2 = ((struct nfs_netopt *)rn)->no_opt.nxo_cred; + if (cred && cred2 && (cred->cr_uid == cred2->cr_uid) && + (cred->cr_ngroups == cred2->cr_ngroups)) { + for (i=0; i < cred2->cr_ngroups && i < NGROUPS; i++) + if (cred->cr_groups[i] != cred2->cr_groups[i]) + break; + if (i >= cred2->cr_ngroups || i >= NGROUPS) + matched = 1; + } + } + kauth_cred_rele(cred); + _FREE(no, M_NETADDR); + if (matched) + continue; + return (EPERM); + } + nx->nx_expcnt++; + } + + return (0); +} + +/* + * In order to properly track an export's netopt count, we need to pass + * an additional argument to nfsrv_free_netopt() so that it can decrement + * the export's netopt count. + */ +struct nfsrv_free_netopt_arg { + uint32_t *cnt; + struct radix_node_head *rnh; +}; + +static int +nfsrv_free_netopt(struct radix_node *rn, void *w) +{ + struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w; + struct radix_node_head *rnh = fna->rnh; + uint32_t *cnt = fna->cnt; + struct nfs_netopt *nno = (struct nfs_netopt *)rn; + + (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); + if (nno->no_opt.nxo_cred) + kauth_cred_rele(nno->no_opt.nxo_cred); + _FREE((caddr_t)rn, M_NETADDR); + *cnt -= 1; + return (0); +} + +/* + * Free the net address hash lists that are hanging off the mount points. + */ +static void +nfsrv_free_addrlist(struct nfs_export *nx) +{ + int i; + struct radix_node_head *rnh; + struct nfsrv_free_netopt_arg fna; + + for (i = 0; i <= AF_MAX; i++) + if ( (rnh = nx->nx_rtable[i]) ) { + fna.rnh = rnh; + fna.cnt = &nx->nx_expcnt; + (*rnh->rnh_walktree)(rnh, nfsrv_free_netopt, (caddr_t)&fna); + _FREE((caddr_t)rnh, M_RTABLE); + nx->nx_rtable[i] = 0; + } +} + +void enablequotas(struct mount *mp, vfs_context_t ctx); // XXX + +int +nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) +{ + int error = 0, pathlen; + struct nfs_exportfs *nxfs, *nxfs2, *nxfs3; + struct nfs_export *nx, *nx2, *nx3; + struct nfs_filehandle nfh; + struct nameidata mnd, xnd; + vnode_t mvp = NULL, xvp = NULL; + mount_t mp; + char path[MAXPATHLEN]; + int expisroot; + + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) + return (error); + + lck_rw_lock_exclusive(&nfs_export_rwlock); + + // first check if we've already got an exportfs with the given ID + LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) { + if (nxfs->nxfs_id == unxa->nxa_fsid) + break; + } + if (nxfs) { + /* verify exported FS path matches given path */ + if (strcmp(path, nxfs->nxfs_path)) { + error = EEXIST; + goto unlock_out; + } + mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); + /* find exported FS root vnode */ + NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, nxfs->nxfs_path, ctx); + error = namei(&mnd); + if (error) + goto unlock_out; + mvp = mnd.ni_vp; + /* make sure it's (still) the root of a file system */ + if ((mvp->v_flag & VROOT) == 0) { + error = EINVAL; + goto out; + } + /* sanity check: this should be same mount */ + if (mp != vnode_mount(mvp)) { + error = EINVAL; + goto out; + } + } else { + /* no current exported file system with that ID */ + if (!(unxa->nxa_flags & NXA_ADD)) { + error = ENOENT; + goto unlock_out; + } + + /* find exported FS root vnode */ + NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, path, ctx); + error = namei(&mnd); + if (error) + goto unlock_out; + mvp = mnd.ni_vp; + /* make sure it's the root of a file system */ + if ((mvp->v_flag & VROOT) == 0) { + error = EINVAL; + goto out; + } + mp = vnode_mount(mvp); + + /* make sure the file system is NFS-exportable */ + nfh.nfh_len = NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(mvp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL); + if (!error && (nfh.nfh_len > (int)NFS_MAX_FID_SIZE)) + error = EIO; + if (error) + goto out; + + /* add an exportfs for it */ + MALLOC(nxfs, struct nfs_exportfs *, sizeof(struct nfs_exportfs), M_TEMP, M_WAITOK); + if (!nxfs) { + error = ENOMEM; + goto out; + } + bzero(nxfs, sizeof(struct nfs_exportfs)); + nxfs->nxfs_id = unxa->nxa_fsid; + MALLOC(nxfs->nxfs_path, char*, pathlen, M_TEMP, M_WAITOK); + if (!nxfs->nxfs_path) { + FREE(nxfs, M_TEMP); + error = ENOMEM; + goto out; + } + bcopy(path, nxfs->nxfs_path, pathlen); + /* insert into list in reverse-sorted order */ + nxfs3 = NULL; + LIST_FOREACH(nxfs2, &nfs_exports, nxfs_next) { + if (strcmp(nxfs->nxfs_path, nxfs2->nxfs_path) > 0) + break; + nxfs3 = nxfs2; + } + if (nxfs2) + LIST_INSERT_BEFORE(nxfs2, nxfs, nxfs_next); + else if (nxfs3) + LIST_INSERT_AFTER(nxfs3, nxfs, nxfs_next); + else + LIST_INSERT_HEAD(&nfs_exports, nxfs, nxfs_next); + + /* make sure any quotas are enabled before we export the file system */ + enablequotas(mp, ctx); } + + if (unxa->nxa_exppath) { + error = copyinstr(unxa->nxa_exppath, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) + goto out; + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + if (nx->nx_id == unxa->nxa_expid) + break; + } + if (nx) { + /* verify exported FS path matches given path */ + if (strcmp(path, nx->nx_path)) { + error = EEXIST; + goto out; + } + } else { + /* no current export with that ID */ + if (!(unxa->nxa_flags & NXA_ADD)) { + error = ENOENT; + goto out; + } + /* add an export for it */ + MALLOC(nx, struct nfs_export *, sizeof(struct nfs_export), M_TEMP, M_WAITOK); + if (!nx) { + error = ENOMEM; + goto out1; + } + bzero(nx, sizeof(struct nfs_export)); + nx->nx_id = unxa->nxa_expid; + nx->nx_fs = nxfs; + MALLOC(nx->nx_path, char*, pathlen, M_TEMP, M_WAITOK); + if (!nx->nx_path) { + error = ENOMEM; + FREE(nx, M_TEMP); + nx = NULL; + goto out1; + } + bcopy(path, nx->nx_path, pathlen); + /* insert into list in reverse-sorted order */ + nx3 = NULL; + LIST_FOREACH(nx2, &nxfs->nxfs_exports, nx_next) { + if (strcmp(nx->nx_path, nx2->nx_path) > 0) + break; + nx3 = nx2; + } + if (nx2) + LIST_INSERT_BEFORE(nx2, nx, nx_next); + else if (nx3) + LIST_INSERT_AFTER(nx3, nx, nx_next); + else + LIST_INSERT_HEAD(&nxfs->nxfs_exports, nx, nx_next); + /* insert into hash */ + LIST_INSERT_HEAD(NFSEXPHASH(nxfs->nxfs_id, nx->nx_id), nx, nx_hash); + + /* + * We don't allow nested exports. Check if the new entry + * nests with the entries before and after or if there's an + * entry for the file system root and subdirs. + */ + error = 0; + if ((nx3 && !strncmp(nx3->nx_path, nx->nx_path, pathlen - 1) && + (nx3->nx_path[pathlen-1] == '/')) || + (nx2 && !strncmp(nx2->nx_path, nx->nx_path, strlen(nx2->nx_path)) && + (nx->nx_path[strlen(nx2->nx_path)] == '/'))) + error = EINVAL; + if (!error) { + /* check export conflict with fs root export and vice versa */ + expisroot = !nx->nx_path[0] || + ((nx->nx_path[0] == '.') && !nx->nx_path[1]); + LIST_FOREACH(nx2, &nxfs->nxfs_exports, nx_next) { + if (expisroot) { + if (nx2 != nx) + break; + } else if (!nx2->nx_path[0]) + break; + else if ((nx2->nx_path[0] == '.') && !nx2->nx_path[1]) + break; + } + if (nx2) + error = EINVAL; + } + if (error) { + printf("nfsrv_export: attempt to register nested exports: %s/%s\n", + nxfs->nxfs_path, nx->nx_path); + goto out1; + } + + /* find export root vnode */ + if (!nx->nx_path[0] || ((nx->nx_path[0] == '.') && !nx->nx_path[1])) { + /* exporting file system's root directory */ + xvp = mvp; + vnode_get(xvp); + } else { + xnd.ni_cnd.cn_nameiop = LOOKUP; + xnd.ni_cnd.cn_flags = LOCKLEAF; + xnd.ni_pathlen = pathlen - 1; + xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf = path; + xnd.ni_startdir = mvp; + xnd.ni_usedvp = mvp; + xnd.ni_cnd.cn_context = ctx; + error = lookup(&xnd); + if (error) + goto out1; + xvp = xnd.ni_vp; + } + + if (vnode_vtype(xvp) != VDIR) { + error = EINVAL; + vnode_put(xvp); + goto out1; + } + + /* grab file handle */ + nx->nx_fh.nfh_xh.nxh_version = NFS_FH_VERSION; + nx->nx_fh.nfh_xh.nxh_fsid = nx->nx_fs->nxfs_id; + nx->nx_fh.nfh_xh.nxh_expid = nx->nx_id; + nx->nx_fh.nfh_xh.nxh_flags = 0; + nx->nx_fh.nfh_xh.nxh_reserved = 0; + nx->nx_fh.nfh_len = NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(xvp, &nx->nx_fh.nfh_len, &nx->nx_fh.nfh_fid[0], NULL); + if (!error && (nx->nx_fh.nfh_len > (int)NFS_MAX_FID_SIZE)) { + error = EIO; + } else { + nx->nx_fh.nfh_xh.nxh_fidlen = nx->nx_fh.nfh_len; + nx->nx_fh.nfh_len += sizeof(nx->nx_fh.nfh_xh); + } + + vnode_put(xvp); + if (error) + goto out1; + } + } else { + nx = NULL; + } + + /* perform the export changes */ + if (unxa->nxa_flags & NXA_DELETE) { + if (!nx) { + /* delete all exports on this file system */ + while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) { + LIST_REMOVE(nx, nx_next); + LIST_REMOVE(nx, nx_hash); + /* delete all netopts for this export */ + nfsrv_free_addrlist(nx); + nx->nx_flags &= ~NX_DEFAULTEXPORT; + if (nx->nx_defopt.nxo_cred) { + kauth_cred_rele(nx->nx_defopt.nxo_cred); + nx->nx_defopt.nxo_cred = NULL; + } + FREE(nx->nx_path, M_TEMP); + FREE(nx, M_TEMP); + } + goto out1; + } else { + /* delete all netopts for this export */ + nfsrv_free_addrlist(nx); + nx->nx_flags &= ~NX_DEFAULTEXPORT; + if (nx->nx_defopt.nxo_cred) { + kauth_cred_rele(nx->nx_defopt.nxo_cred); + nx->nx_defopt.nxo_cred = NULL; + } + } + } + if (unxa->nxa_flags & NXA_ADD) { + error = nfsrv_hang_addrlist(nx, unxa); + if (!error) + mp->mnt_flag |= MNT_EXPORTED; + } + +out1: + if (nx && !nx->nx_expcnt) { + /* export has no export options */ + LIST_REMOVE(nx, nx_next); + LIST_REMOVE(nx, nx_hash); + FREE(nx->nx_path, M_TEMP); + FREE(nx, M_TEMP); + } + if (LIST_EMPTY(&nxfs->nxfs_exports)) { + /* exported file system has no more exports */ + LIST_REMOVE(nxfs, nxfs_next); + FREE(nxfs->nxfs_path, M_TEMP); + FREE(nxfs, M_TEMP); + mp->mnt_flag &= ~MNT_EXPORTED; + } + +out: + if (mvp) { + vnode_put(mvp); + nameidone(&mnd); + } +unlock_out: + lck_rw_done(&nfs_export_rwlock); + return (error); +} + +static struct nfs_export_options * +nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) +{ + struct nfs_export_options *nxo = NULL; + struct nfs_netopt *no = NULL; + struct radix_node_head *rnh; + struct sockaddr *saddr; + + /* Lookup in the export list first. */ + if (nam != NULL) { + saddr = mbuf_data(nam); + rnh = nx->nx_rtable[saddr->sa_family]; + if (rnh != NULL) { + no = (struct nfs_netopt *) + (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); + if (no && no->no_rnodes->rn_flags & RNF_ROOT) + no = NULL; + if (no) + nxo = &no->no_opt; + } + } + /* If no address match, use the default if it exists. */ + if ((nxo == NULL) && (nx->nx_flags & NX_DEFAULTEXPORT)) + nxo = &nx->nx_defopt; + return (nxo); +} + +/* find an export for the given handle */ +static struct nfs_export * +nfsrv_fhtoexport(struct nfs_filehandle *nfhp) +{ + struct nfs_export *nx; + nx = NFSEXPHASH(nfhp->nfh_xh.nxh_fsid, nfhp->nfh_xh.nxh_expid)->lh_first; + for (; nx; nx = LIST_NEXT(nx, nx_hash)) { + if (nx->nx_fs->nxfs_id != nfhp->nfh_xh.nxh_fsid) + continue; + if (nx->nx_id != nfhp->nfh_xh.nxh_expid) + continue; + break; + } + return nx; } /* - * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) - * - look up fsid in mount list (if not found ret error) - * - get vp and export rights by calling VFS_FHTOVP() - * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon - * - if not lockflag unlock it with VOP_UNLOCK() + * nfsrv_fhtovp() - convert FH to vnode and export info */ int -nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag) - fhandle_t *fhp; - int lockflag; - struct vnode **vpp; - struct ucred *cred; - struct nfssvc_sock *slp; - struct mbuf *nam; - int *rdonlyp; - int kerbflag; - int pubflag; +nfsrv_fhtovp( + struct nfs_filehandle *nfhp, + mbuf_t nam, + __unused int pubflag, + vnode_t *vpp, + struct nfs_export **nxp, + struct nfs_export_options **nxop) { - struct proc *p = current_proc(); /* XXX */ - register struct mount *mp; - register int i; - struct ucred *credanon; - int error, exflags; + int error; + struct mount *mp; - *vpp = (struct vnode *)0; + *vpp = NULL; + *nxp = NULL; + *nxop = NULL; -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet - if (nfs_ispublicfh(fhp)) { + if (nfhp->nfh_xh.nxh_version != NFS_FH_VERSION) { + /* file handle format not supported */ + return (ESTALE); + } + if (nfhp->nfh_len > NFS_MAX_FH_SIZE) + return (EBADRPC); + if (nfhp->nfh_len < (int)sizeof(nfhp->nfh_xh)) + return (ESTALE); + if (nfhp->nfh_xh.nxh_flags & NXHF_INVALIDFH) + return (ESTALE); + +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED + if (nfs_ispublicfh(nfhp)) { if (!pubflag || !nfs_pub.np_valid) return (ESTALE); - fhp = &nfs_pub.np_handle; + nfhp = &nfs_pub.np_handle; } #endif - mp = vfs_getvfs(&fhp->fh_fsid); + *nxp = nfsrv_fhtoexport(nfhp); + if (!*nxp) + return (ESTALE); + + /* Get the export option structure for this tuple. */ + *nxop = nfsrv_export_lookup(*nxp, nam); + if (nam && (*nxop == NULL)) + return (EACCES); + + /* find mount structure */ + mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); if (!mp) return (ESTALE); - error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); + + error = VFS_FHTOVP(mp, nfhp->nfh_xh.nxh_fidlen, &nfhp->nfh_fid[0], vpp, NULL); if (error) return (error); /* vnode pointer should be good at this point or ... */ if (*vpp == NULL) return (ESTALE); - /* - * Check/setup credentials. - */ - if (exflags & MNT_EXKERB) { - if (!kerbflag) { - vput(*vpp); - return (NFSERR_AUTHERR | AUTH_TOOWEAK); - } - } else if (kerbflag) { - vput(*vpp); - return (NFSERR_AUTHERR | AUTH_TOOWEAK); - } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { - cred->cr_uid = credanon->cr_uid; - for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) - cred->cr_groups[i] = credanon->cr_groups[i]; - cred->cr_ngroups = i; - } - if (exflags & MNT_EXRDONLY) - *rdonlyp = 1; - else - *rdonlyp = 0; - - nfsrv_object_create(*vpp); + return (0); +} - if (!lockflag) - VOP_UNLOCK(*vpp, 0, p); +/* + * nfsrv_credcheck() - check/map credentials according to given export options + */ +int +nfsrv_credcheck( + struct nfsrv_descript *nfsd, + __unused struct nfs_export *nx, + struct nfs_export_options *nxo) +{ + if (nxo && nxo->nxo_cred) { + if ((nxo->nxo_flags & NX_MAPALL) || + ((nxo->nxo_flags & NX_MAPROOT) && !suser(nfsd->nd_cr, NULL))) { + kauth_cred_rele(nfsd->nd_cr); + nfsd->nd_cr = nxo->nxo_cred; + kauth_cred_ref(nfsd->nd_cr); + } + } return (0); } @@ -2094,17 +2721,84 @@ nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag) * transformed this to all zeroes in both cases, so check for it. */ int -nfs_ispublicfh(fhp) - fhandle_t *fhp; +nfs_ispublicfh(struct nfs_filehandle *nfhp) { - char *cp = (char *)fhp; - int i; - - for (i = 0; i < NFSX_V3FH; i++) + char *cp = (char *)nfhp; + unsigned int i; + + if (nfhp->nfh_len == 0) + return (TRUE); + if (nfhp->nfh_len != NFSX_V2FH) + return (FALSE); + for (i = 0; i < NFSX_V2FH; i++) if (*cp++ != 0) return (FALSE); return (TRUE); } + +/* + * nfsrv_vptofh() - convert vnode to file handle for given export + * + * If the caller is passing in a vnode for a ".." directory entry, + * they can pass a directory NFS file handle (dnfhp) which will be + * checked against the root export file handle. If it matches, we + * refuse to provide the file handle for the out-of-export directory. + */ +int +nfsrv_vptofh( + struct nfs_export *nx, + int v2, + struct nfs_filehandle *dnfhp, + vnode_t vp, + struct vfs_context *ctx, + struct nfs_filehandle *nfhp) +{ + int error; + + nfhp->nfh_xh.nxh_version = NFS_FH_VERSION; + nfhp->nfh_xh.nxh_fsid = nx->nx_fs->nxfs_id; + nfhp->nfh_xh.nxh_expid = nx->nx_id; + nfhp->nfh_xh.nxh_flags = 0; + nfhp->nfh_xh.nxh_reserved = 0; + + if (v2) + bzero(&nfhp->nfh_fid[0], NFSV2_MAX_FID_SIZE); + + /* if directory FH matches export root, return invalid FH */ + if (dnfhp && nfsrv_fhmatch(dnfhp, &nx->nx_fh)) { + nfhp->nfh_len = v2 ? NFSX_V2FH : sizeof(nfhp->nfh_xh); + nfhp->nfh_xh.nxh_fidlen = 0; + nfhp->nfh_xh.nxh_flags = NXHF_INVALIDFH; + return (0); + } + + nfhp->nfh_len = v2 ? NFSV2_MAX_FID_SIZE : NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(vp, &nfhp->nfh_len, &nfhp->nfh_fid[0], ctx); + if (error) + return (error); + if (nfhp->nfh_len > (int)(v2 ? NFSV2_MAX_FID_SIZE : NFS_MAX_FID_SIZE)) + return (EOVERFLOW); + nfhp->nfh_xh.nxh_fidlen = nfhp->nfh_len; + nfhp->nfh_len += sizeof(nfhp->nfh_xh); + if (v2 && (nfhp->nfh_len < NFSX_V2FH)) + nfhp->nfh_len = NFSX_V2FH; + + return (0); +} + +int +nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2) +{ + int len1, len2; + + len1 = sizeof(fh1->nfh_xh) + fh1->nfh_xh.nxh_fidlen; + len2 = sizeof(fh2->nfh_xh) + fh2->nfh_xh.nxh_fidlen; + if (len1 != len2) + return (0); + if (bcmp(&fh1->nfh_xh, &fh2->nfh_xh, len1)) + return (0); + return (1); +} #endif /* NFS_NOSERVER */ /* @@ -2118,13 +2812,13 @@ int netaddr_match(family, haddr, nam) int family; union nethostaddr *haddr; - struct mbuf *nam; + mbuf_t nam; { - register struct sockaddr_in *inetaddr; + struct sockaddr_in *inetaddr; switch (family) { case AF_INET: - inetaddr = mtod(nam, struct sockaddr_in *); + inetaddr = mbuf_data(nam); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); @@ -2132,10 +2826,10 @@ netaddr_match(family, haddr, nam) #if ISO case AF_ISO: { - register struct sockaddr_iso *isoaddr1, *isoaddr2; + struct sockaddr_iso *isoaddr1, *isoaddr2; - isoaddr1 = mtod(nam, struct sockaddr_iso *); - isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); + isoaddr1 = mbuf_data(nam); + isoaddr2 = mbuf_data(haddr->had_nam); if (isoaddr1->siso_family == AF_ISO && isoaddr1->siso_nlen > 0 && isoaddr1->siso_nlen == isoaddr2->siso_nlen && @@ -2150,19 +2844,19 @@ netaddr_match(family, haddr, nam) return (0); } -static nfsuint64 nfs_nullcookie = { 0, 0 }; +static nfsuint64 nfs_nullcookie = { { 0, 0 } }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(np, off, add) - register struct nfsnode *np; + struct nfsnode *np; off_t off; int add; { - register struct nfsdmap *dp, *dp2; - register int pos; + struct nfsdmap *dp, *dp2; + int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { @@ -2176,9 +2870,10 @@ nfs_getcookie(np, off, add) dp = np->n_cookies.lh_first; if (!dp) { if (add) { - MALLOC_ZONE(dp, struct nfsdmap *, - sizeof (struct nfsdmap), - M_NFSDIROFF, M_WAITOK); + MALLOC_ZONE(dp, struct nfsdmap *, sizeof(struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + if (!dp) + return ((nfsuint64 *)0); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else @@ -2192,9 +2887,10 @@ nfs_getcookie(np, off, add) return ((nfsuint64 *)0); dp = dp->ndm_list.le_next; } else if (add) { - MALLOC_ZONE(dp2, struct nfsdmap *, - sizeof (struct nfsdmap), - M_NFSDIROFF, M_WAITOK); + MALLOC_ZONE(dp2, struct nfsdmap *, sizeof(struct nfsdmap), + M_NFSDIROFF, M_WAITOK); + if (!dp2) + return ((nfsuint64 *)0); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; @@ -2217,12 +2913,12 @@ nfs_getcookie(np, off, add) */ void nfs_invaldir(vp) - register struct vnode *vp; + vnode_t vp; { - register struct nfsnode *np = VTONFS(vp); + struct nfsnode *np = VTONFS(vp); #if DIAGNOSTIC - if (vp->v_type != VDIR) + if (vnode_vtype(vp) != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; @@ -2238,33 +2934,38 @@ nfs_invaldir(vp) * dirty block list as NB_DELWRI, all this takes is clearing the NB_NEEDCOMMIT * flag. Once done the new write verifier can be set for the mount point. */ -void -nfs_clearcommit(mp) - struct mount *mp; +static int +nfs_clearcommit_callout(vnode_t vp, __unused void *arg) { - register struct vnode *vp, *nvp; - register struct nfsbuf *bp, *nbp; - struct nfsnode *np; - int s; - - s = splbio(); -loop: - for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { - if (vp->v_mount != mp) /* Paranoia */ - goto loop; - nvp = vp->v_mntvnodes.le_next; - np = VTONFS(vp); - for (bp = np->n_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->nb_vnbufs.le_next; - if ((bp->nb_flags & (NB_BUSY | NB_DELWRI | NB_NEEDCOMMIT)) - == (NB_DELWRI | NB_NEEDCOMMIT)) { - bp->nb_flags &= ~NB_NEEDCOMMIT; - np->n_needcommitcnt--; - CHECK_NEEDCOMMITCNT(np); - } + struct nfsnode *np = VTONFS(vp); + struct nfsbuflists blist; + struct nfsbuf *bp; + + lck_mtx_lock(nfs_buf_mutex); + if (nfs_buf_iterprepare(np, &blist, NBI_DIRTY)) { + lck_mtx_unlock(nfs_buf_mutex); + return (VNODE_RETURNED); + } + LIST_FOREACH(bp, &blist, nb_vnbufs) { + if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) + continue; + if ((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT)) + == (NB_DELWRI | NB_NEEDCOMMIT)) { + bp->nb_flags &= ~NB_NEEDCOMMIT; + np->n_needcommitcnt--; } + nfs_buf_drop(bp); } - splx(s); + CHECK_NEEDCOMMITCNT(np); + nfs_buf_itercomplete(np, &blist, NBI_DIRTY); + lck_mtx_unlock(nfs_buf_mutex); + return (VNODE_RETURNED); +} + +void +nfs_clearcommit(mount_t mp) +{ + vnode_iterate(mp, VNODE_NOLOCK_INTERNAL, nfs_clearcommit_callout, NULL); } #ifndef NFS_NOSERVER @@ -2275,9 +2976,9 @@ loop: int nfsrv_errmap(nd, err) struct nfsrv_descript *nd; - register int err; + int err; { - register short *defaulterrp, *errp; + short *defaulterrp, *errp; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { @@ -2297,16 +2998,5 @@ nfsrv_errmap(nd, err) return (NFSERR_IO); } -/* XXX CSM 11/25/97 Revisit when Ramesh merges vm with buffer cache */ -#define vfs_object_create(v, p, c, l) (0) - -int -nfsrv_object_create(struct vnode *vp) { - struct proc *curproc = current_proc(); - - if ((vp == NULL) || (vp->v_type != VREG)) - return 1; - return vfs_object_create(vp, curproc, curproc?curproc->p_ucred:NULL, 1); -} #endif /* NFS_NOSERVER */ diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index bfa8f08aa..bf82ef6bc 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,27 +66,29 @@ #include #endif #include -#include +#include #include #include -#include -#include -#include +#include +#include +#include /* for fdflags */ +#include #include #include #include #include -#include +#include #include #include #include #include -#include #include #include #include #include -#include +#include +#include +#include #include @@ -103,45 +105,40 @@ #include #include #include -#include #include #include +extern void unix_syscall_return(int); + /* Global defs. */ -extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, +extern int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd, struct nfssvc_sock *slp, - struct proc *procp, - struct mbuf **mreqp)); + proc_t procp, + mbuf_t *mreqp); extern int nfs_numasync; extern int nfs_ioddelwri; -extern time_t nqnfsstarttime; -extern int nqsrv_writeslack; extern int nfsrtton; extern struct nfsstats nfsstats; extern int nfsrvw_procrastinate; extern int nfsrvw_procrastinate_v3; + struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; static int nuidhash_max = NFS_MAXUIDHASH; -static void nfsrv_zapsock __P((struct nfssvc_sock *slp)); -static int nfssvc_iod __P((struct proc *)); - -#define TRUE 1 -#define FALSE 0 +static void nfsrv_zapsock(struct nfssvc_sock *slp); +static int nfssvc_iod(proc_t); +static int nfskerb_clientd(struct nfsmount *, struct nfsd_cargs *, int, user_addr_t, proc_t); static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON]; #ifndef NFS_NOSERVER int nfsd_waiting = 0; static struct nfsdrt nfsdrt; -static int nfs_numnfsd = 0; -static int notstarted = 1; -static int modify_flag = 0; -static void nfsd_rt __P((int sotype, struct nfsrv_descript *nd, - int cacherep)); -static int nfssvc_addsock __P((struct file *, struct mbuf *, - struct proc *)); -static int nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *)); +int nfs_numnfsd = 0; +static void nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep); +static int nfssvc_addsock(socket_t, mbuf_t, proc_t); +static int nfssvc_nfsd(struct nfsd_srvargs *,user_addr_t, proc_t); +static int nfssvc_export(user_addr_t, proc_t); static int nfs_privport = 0; /* XXX CSM 11/25/97 Upgrade sysctl.h someday */ @@ -159,45 +156,95 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate /* * Get file handle system call */ -#ifndef _SYS_SYSPROTO_H_ -struct getfh_args { - char *fname; - fhandle_t *fhp; -}; -#endif int -getfh(p, uap) - struct proc *p; - register struct getfh_args *uap; +getfh(proc_t p, struct getfh_args *uap, __unused int *retval) { - register struct vnode *vp; - fhandle_t fh; + vnode_t vp; + struct nfs_filehandle nfh; int error; struct nameidata nd; + struct vfs_context context; + char path[MAXPATHLEN], *ptr; + u_int pathlen; + struct nfs_exportfs *nxfs; + struct nfs_export *nx; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); - if(error) + error = proc_suser(p); + if (error) + return (error); + + error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, uap->fname, p); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, path, &context); error = namei(&nd); if (error) return (error); + nameidone(&nd); + vp = nd.ni_vp; - bzero((caddr_t)&fh, sizeof(fh)); - fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fh.fh_fid); - vput(vp); + + // find exportfs that matches f_mntonname + lck_rw_lock_shared(&nfs_export_rwlock); + ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname; + LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) { + if (!strcmp(nxfs->nxfs_path, ptr)) + break; + } + if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) { + error = EINVAL; + goto out; + } + // find export that best matches remainder of path + ptr = path + strlen(nxfs->nxfs_path); + while (*ptr && (*ptr == '/')) + ptr++; + LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) { + int len = strlen(nx->nx_path); + if (len == 0) // we've hit the export entry for the root directory + break; + if (!strncmp(nx->nx_path, ptr, len)) + break; + } + if (!nx) { + error = EINVAL; + goto out; + } + + bzero(&nfh, sizeof(nfh)); + nfh.nfh_xh.nxh_version = NFS_FH_VERSION; + nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id; + nfh.nfh_xh.nxh_expid = nx->nx_id; + nfh.nfh_xh.nxh_flags = 0; + nfh.nfh_xh.nxh_reserved = 0; + nfh.nfh_len = NFS_MAX_FID_SIZE; + error = VFS_VPTOFH(vp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL); + if (nfh.nfh_len > (int)NFS_MAX_FID_SIZE) + error = EOVERFLOW; + nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; + nfh.nfh_len += sizeof(nfh.nfh_xh); + +out: + lck_rw_done(&nfs_export_rwlock); + vnode_put(vp); if (error) return (error); - error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh)); + error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh)); return (error); } #endif /* NFS_NOSERVER */ +extern struct fileops vnops; + /* * syscall for the rpc.lockd to use to translate a NFS file handle into * an open descriptor. @@ -205,39 +252,30 @@ getfh(p, uap) * warning: do not remove the suser() call or this becomes one giant * security hole. */ -#ifndef _SYS_SYSPROTO_H_ -struct fhopen_args { - const struct fhandle *u_fhp; - int flags; -}; -#endif int -fhopen(p, uap, retval) - struct proc *p; - register struct fhopen_args *uap; - register_t *retval; +fhopen( proc_t p, + struct fhopen_args *uap, + register_t *retval) { - struct mount *mp; - struct vnode *vp; - struct fhandle fhp; - struct vattr vat; - struct vattr *vap = &vat; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; struct flock lf; - struct file *fp; - register struct filedesc *fdp = p->p_fd; - int fmode, mode, error, type; - struct file *nfp; + struct fileproc *fp, *nfp; + int fmode, error, type; int indx; - struct ucred *credanon; - int exflags; - struct ucred *cred = p->p_ucred; - int didhold = 0; - extern struct fileops vnops; + kauth_cred_t cred = proc_ucred(p); + struct vfs_context context; + kauth_action_t action; + + context.vc_proc = p; + context.vc_ucred = cred; /* * Must be super user */ - error = suser(cred, &p->p_acflag); + error = suser(cred, 0); if (error) return (error); @@ -245,98 +283,76 @@ fhopen(p, uap, retval) /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); - error = copyin((void*)uap->u_fhp, &fhp, sizeof(fhp)); + + error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len)); + if (error) + return (error); + if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) || + (nfh.nfh_len > (int)NFS_MAX_FH_SIZE)) + return (EINVAL); + error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len); if (error) return (error); - /* find the mount point */ - mp = vfs_getvfs(&fhp.fh_fsid); - if (mp == NULL) - return (ESTALE); - /* now give me my vnode, it gets returned to me locked */ -/* XXX CSM need to split VFS_CHECKEXP out of VFS_FHTOVP? */ - error = VFS_FHTOVP(mp, &fhp.fh_fid, NULL, &vp, &exflags, &credanon); + + lck_rw_lock_shared(&nfs_export_rwlock); + /* now give me my vnode, it gets returned to me with a reference */ + error = nfsrv_fhtovp(&nfh, NULL, TRUE, &vp, &nx, &nxo); + lck_rw_done(&nfs_export_rwlock); if (error) return (error); + /* - * from now on we have to make sure not - * to forget about the vnode - * any error that causes an abort must vput(vp) - * just set error = err and 'goto bad;'. + * From now on we have to make sure not + * to forget about the vnode. + * Any error that causes an abort must vnode_put(vp). + * Just set error = err and 'goto bad;'. */ /* * from vn_open */ - if (vp->v_type == VSOCK) { + if (vnode_vtype(vp) == VSOCK) { error = EOPNOTSUPP; goto bad; } - if (UBCINFOEXISTS(vp) && ((didhold = ubc_hold(vp)) == 0)) { - error = ENOENT; + /* disallow write operations on directories */ + if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { + error = EISDIR; goto bad; } - if (fmode & FREAD && fmode & (FWRITE | O_TRUNC)) { - int err = 0; - if (vp->v_type == VDIR) - err = EISDIR; - else - err = vn_writechk(vp); - if (err && !(error = VOP_ACCESS(vp, VREAD, cred, p))) - error = err; - if (error || (error = VOP_ACCESS(vp, VREAD|VWRITE, cred, p))) - goto bad; - } else if (fmode & FREAD) { - if ((error = VOP_ACCESS(vp, VREAD, cred, p))) - goto bad; - } else if (fmode & (FWRITE | O_TRUNC)) { - if (vp->v_type == VDIR) { - error = EISDIR; - goto bad; - } - if ((error = vn_writechk(vp)) || - (error = VOP_ACCESS(vp, VWRITE, cred, p))) - goto bad; - } - if (fmode & O_TRUNC) { - VOP_UNLOCK(vp, 0, p); /* XXX */ - VOP_LEASE(vp, p, cred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VATTR_NULL(vap); - vap->va_size = 0; - error = VOP_SETATTR(vp, vap, cred, p); - if (error) - goto bad; - } + /* compute action to be authorized */ + action = 0; + if (fmode & FREAD) + action |= KAUTH_VNODE_READ_DATA; + if (fmode & (FWRITE | O_TRUNC)) + action |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(vp, NULL, action, &context)) != 0) + goto bad; - error = VOP_OPEN(vp, fmode, cred, p); - if (error) + if ((error = VNOP_OPEN(vp, fmode, &context))) + goto bad; + if ((error = vnode_ref_ext(vp, fmode))) goto bad; - if (fmode & FWRITE) - if (++vp->v_writecount <= 0) - panic("fhopen: v_writecount"); /* * end of vn_open code */ + // starting here... error paths should call vn_close/vnode_put if ((error = falloc(p, &nfp, &indx)) != 0) { - if (fmode & FWRITE) - vp->v_writecount--; + vn_close(vp, fmode & FMASK, cred, p); goto bad; } fp = nfp; - /* - * Hold an extra reference to avoid having fp ripped out - * from under us while we block in the lock op - */ - fref(fp); - nfp->f_data = (caddr_t)vp; - nfp->f_flag = fmode & FMASK; - nfp->f_ops = &vnops; - nfp->f_type = DTYPE_VNODE; + fp->f_fglob->fg_flag = fmode & FMASK; + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)vp; + + // XXX do we really need to support this with fhopen()? if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -348,34 +364,26 @@ fhopen(p, uap, retval) type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp, 0, p); - if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, - type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdrelse(p, indx); - /* - * release our private reference - */ - frele(fp); - + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, &context))) { + vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p); + fp_free(p, indx, fp); return (error); } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - fp->f_flag |= FHASLOCK; + fp->f_fglob->fg_flag |= FHASLOCK; } - VOP_UNLOCK(vp, 0, p); + vnode_put(vp); + + proc_fdlock(p); *fdflags(p, indx) &= ~UF_RESERVED; - frele(fp); + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + *retval = indx; return (0); bad: - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); + vnode_put(vp); return (error); } @@ -386,22 +394,13 @@ bad: * - remains in the kernel as an nfsd * - remains in the kernel as an nfsiod */ -#ifndef _SYS_SYSPROTO_H_ -struct nfssvc_args { - int flag; - caddr_t argp; -}; -#endif int -nfssvc(p, uap) - struct proc *p; - register struct nfssvc_args *uap; +nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) { #ifndef NFS_NOSERVER struct nameidata nd; - struct file *fp; - struct mbuf *nam; - struct nfsd_args nfsdarg; + mbuf_t nam; + struct user_nfsd_args user_nfsdarg; struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs; struct nfsd_cargs ncd; struct nfsd *nfsd; @@ -409,6 +408,9 @@ nfssvc(p, uap) struct nfsuid *nuidp; struct nfsmount *nmp; struct timeval now; + socket_t so; + struct vfs_context context; + struct ucred temp_cred; #endif /* NFS_NOSERVER */ int error; @@ -417,13 +419,9 @@ nfssvc(p, uap) /* * Must be super user */ - error = suser(p->p_ucred, &p->p_acflag); + error = proc_suser(p); if(error) return (error); - while (nfssvc_sockhead_flag & SLP_INIT) { - nfssvc_sockhead_flag |= SLP_WANTINIT; - (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0); - } if (uap->flag & NFSSVC_BIOD) error = nfssvc_iod(p); #ifdef NFS_NOSERVER @@ -431,18 +429,26 @@ nfssvc(p, uap) error = ENXIO; #else /* !NFS_NOSERVER */ else if (uap->flag & NFSSVC_MNTD) { + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)); if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - UIO_USERSPACE, ncd.ncd_dirp, p); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + (proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), + CAST_USER_ADDR_T(ncd.ncd_dirp), &context); error = namei(&nd); if (error) return (error); - if ((nd.ni_vp->v_flag & VROOT) == 0) + nameidone(&nd); + + if (vnode_isvroot(nd.ni_vp) == 0) error = EINVAL; - nmp = VFSTONFS(nd.ni_vp->v_mount); - vput(nd.ni_vp); + nmp = VFSTONFS(vnode_mount(nd.ni_vp)); + vnode_put(nd.ni_vp); if (error) return (error); @@ -450,28 +456,45 @@ nfssvc(p, uap) (uap->flag & NFSSVC_GOTAUTH) == 0) return (0); nmp->nm_state |= NFSSTA_MNTD; - error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag, - uap->argp, p); + error = nfskerb_clientd(nmp, &ncd, uap->flag, uap->argp, p); } else if (uap->flag & NFSSVC_ADDSOCK) { - error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg)); + } else { + struct nfsd_args tmp_args; + error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args)); + if (error == 0) { + user_nfsdarg.sock = tmp_args.sock; + user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name); + user_nfsdarg.namelen = tmp_args.namelen; + } + } if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + /* get the socket */ + error = file_socket(user_nfsdarg.sock, &so); if (error) return (error); - /* - * Get the client address for connected sockets. - */ - if (nfsdarg.name == NULL || nfsdarg.namelen == 0) - nam = (struct mbuf *)0; - else { - error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen, - MT_SONAME); - if (error) + /* Get the client address for connected sockets. */ + if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) { + nam = NULL; + } else { + error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME); + if (error) { + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); return (error); + } } - error = nfssvc_addsock(fp, nam, p); - } else { + /* + * nfssvc_addsock() will grab a retain count on the socket + * to keep the socket from being closed when nfsd closes its + * file descriptor for it. + */ + error = nfssvc_addsock(so, nam, p); + /* drop the iocount file_socket() grabbed on the file descriptor */ + file_drop(user_nfsdarg.sock); + } else if (uap->flag & NFSSVC_NFSD) { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) return (error); @@ -486,14 +509,14 @@ nfssvc(p, uap) */ for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid && + if (kauth_cred_getuid(nuidp->nu_cr) == nsd->nsd_cr.cr_uid && (!nfsd->nfsd_nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2))) break; } if (nuidp) { - nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr,nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } else { /* @@ -507,23 +530,36 @@ nfssvc(p, uap) } else nuidp = (struct nfsuid *)0; if ((slp->ns_flag & SLP_VALID) == 0) { - if (nuidp) + if (nuidp) { FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + slp->ns_numuids--; + } } else { if (nuidp == (struct nfsuid *)0) { nuidp = slp->ns_uidlruhead.tqh_first; + if (!nuidp) + return (ENOMEM); LIST_REMOVE(nuidp, nu_hash); TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); } nuidp->nu_flag = 0; - nuidp->nu_cr = nsd->nsd_cr; - if (nuidp->nu_cr.cr_ngroups > NGROUPS) - nuidp->nu_cr.cr_ngroups = NGROUPS; - nuidp->nu_cr.cr_ref = 1; + + if (nsd->nsd_cr.cr_ngroups > NGROUPS) + nsd->nsd_cr.cr_ngroups = NGROUPS; + + nfsrv_setcred(&nsd->nsd_cr, &temp_cred); + nuidp->nu_cr = kauth_cred_create(&temp_cred); + + if (!nuidp->nu_cr) { + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (ENOMEM); + } nuidp->nu_timestamp = nsd->nsd_timestamp; microtime(&now); nuidp->nu_expire = now.tv_sec + nsd->nsd_ttl; @@ -535,8 +571,7 @@ nfssvc(p, uap) if (nfsd->nfsd_nd->nd_nam2) { struct sockaddr_in *saddr; - saddr = mtod(nfsd->nfsd_nd->nd_nam2, - struct sockaddr_in *); + saddr = mbuf_data(nfsd->nfsd_nd->nd_nam2); switch (saddr->sin_family) { case AF_INET: nuidp->nu_flag |= NU_INETADDR; @@ -546,9 +581,15 @@ nfssvc(p, uap) case AF_ISO: default: nuidp->nu_flag |= NU_NAM; - nuidp->nu_nam = m_copym( - nfsd->nfsd_nd->nd_nam2, 0, - M_COPYALL, M_WAIT); + error = mbuf_copym(nfsd->nfsd_nd->nd_nam2, 0, + MBUF_COPYALL, MBUF_WAITOK, + &nuidp->nu_nam); + if (error) { + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID); + slp->ns_numuids--; + return (error); + } break; }; } @@ -556,8 +597,8 @@ nfssvc(p, uap) nu_lru); LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid), nuidp, nu_hash); - nfsrv_setcred(&nuidp->nu_cr, - &nfsd->nfsd_nd->nd_cr); + nfsrv_setcred(nuidp->nu_cr, + nfsd->nfsd_nd->nd_cr); nfsd->nfsd_nd->nd_flag |= ND_KERBFULL; } } @@ -565,6 +606,10 @@ nfssvc(p, uap) if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd)) nfsd->nfsd_flag |= NFSD_AUTHFAIL; error = nfssvc_nfsd(nsd, uap->argp, p); + } else if (uap->flag & NFSSVC_EXPORT) { + error = nfssvc_export(uap->argp, p); + } else { + error = EINVAL; } #endif /* NFS_NOSERVER */ if (error == EINTR || error == ERESTART) @@ -572,57 +617,156 @@ nfssvc(p, uap) return (error); } +/* + * NFSKERB client helper daemon. + * Gets authorization strings for "kerb" mounts. + */ +static int +nfskerb_clientd( + struct nfsmount *nmp, + struct nfsd_cargs *ncd, + int flag, + user_addr_t argp, + proc_t p) +{ + struct nfsuid *nuidp, *nnuidp; + int error = 0; + struct nfsreq *rp; + struct timeval now; + + /* + * First initialize some variables + */ + microtime(&now); + + /* + * If an authorization string is being passed in, get it. + */ + if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_state & NFSSTA_MOUNTED) && + ((nmp->nm_state & NFSSTA_WAITAUTH) == 0)) { + if (nmp->nm_state & NFSSTA_HASAUTH) + panic("cld kerb"); + if ((flag & NFSSVC_AUTHINFAIL) == 0) { + if (ncd->ncd_authlen <= nmp->nm_authlen && + ncd->ncd_verflen <= nmp->nm_verflen && + !copyin(CAST_USER_ADDR_T(ncd->ncd_authstr),nmp->nm_authstr,ncd->ncd_authlen)&& + !copyin(CAST_USER_ADDR_T(ncd->ncd_verfstr),nmp->nm_verfstr,ncd->ncd_verflen)){ + nmp->nm_authtype = ncd->ncd_authtype; + nmp->nm_authlen = ncd->ncd_authlen; + nmp->nm_verflen = ncd->ncd_verflen; +#if NFSKERB + nmp->nm_key = ncd->ncd_key; +#endif + } else + nmp->nm_state |= NFSSTA_AUTHERR; + } else + nmp->nm_state |= NFSSTA_AUTHERR; + nmp->nm_state |= NFSSTA_HASAUTH; + wakeup((caddr_t)&nmp->nm_authlen); + } else { + nmp->nm_state |= NFSSTA_WAITAUTH; + } + + /* + * Loop every second updating queue until there is a termination sig. + */ + while (nmp->nm_state & NFSSTA_MOUNTED) { + /* Get an authorization string, if required. */ + if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH)) == 0) { + ncd->ncd_authuid = nmp->nm_authuid; + if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) + nmp->nm_state |= NFSSTA_WAITAUTH; + else + return (ENEEDAUTH); + } + /* Wait a bit (no pun) and do it again. */ + if ((nmp->nm_state & NFSSTA_MOUNTED) && + (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) { + error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, + "nfskrbtimr", hz / 3); + if (error == EINTR || error == ERESTART) + dounmount(nmp->nm_mountp, 0, p); + } + } + + /* + * Finally, we can free up the mount structure. + */ + for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID); + } + /* + * Loop through outstanding request list and remove dangling + * references to defunct nfsmount struct + */ + for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next) + if (rp->r_nmp == nmp) + rp->r_nmp = (struct nfsmount *)0; + /* Need to wake up any rcvlock waiters so they notice the unmount. */ + if (nmp->nm_state & NFSSTA_WANTRCV) { + nmp->nm_state &= ~NFSSTA_WANTRCV; + wakeup(&nmp->nm_state); + } + FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); + if (error == EWOULDBLOCK) + error = 0; + return (error); +} + #ifndef NFS_NOSERVER /* * Adds a socket to the list for servicing by nfsds. */ static int -nfssvc_addsock(fp, mynam, p) - struct file *fp; - struct mbuf *mynam; - struct proc *p; +nfssvc_addsock( + socket_t so, + mbuf_t mynam, + __unused proc_t p) { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; - struct nfssvc_sock *tslp; - int error, s; - - so = (struct socket *)fp->f_data; - tslp = (struct nfssvc_sock *)0; + int siz; + struct nfssvc_sock *slp; + struct nfssvc_sock *tslp = NULL; + int error, sodomain, sotype, soprotocol, on = 1; + struct timeval timeo; + + /* make sure mbuf constants are set up */ + if (!nfs_mbuf_mlen) + nfs_mbuf_init(); + + sock_gettype(so, &sodomain, &sotype, &soprotocol); + /* * Add it to the list, as required. */ - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - if (so->so_proto->pr_protocol == IPPROTO_UDP) { + if (soprotocol == IPPROTO_UDP) { tslp = nfs_udpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #if ISO - } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) { + } else if (soprotocol == ISOPROTO_CLTP) { tslp = nfs_cltpsock; - if (tslp->ns_flag & SLP_VALID) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if (!tslp || (tslp->ns_flag & SLP_VALID)) { + mbuf_freem(mynam); return (EPERM); } #endif /* ISO */ } /* reserve buffer space for 2 maximally-sized packets */ siz = NFS_MAXPACKET; - if (so->so_type == SOCK_STREAM) + if (sotype == SOCK_STREAM) siz += sizeof (u_long); siz *= 2; if (siz > NFS_MAXSOCKBUF) siz = NFS_MAXSOCKBUF; - error = soreserve(so, siz, siz); - if (error) { - m_freem(mynam); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + if ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &siz, sizeof(siz))) || + (error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &siz, sizeof(siz)))) { + mbuf_freem(mynam); return (error); } @@ -631,62 +775,54 @@ nfssvc_addsock(fp, mynam, p) * reserve some space. For datagram sockets, this can get called * repeatedly for the same socket, but that isn't harmful. */ - if (so->so_type == SOCK_STREAM) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sotype == SOCK_STREAM) { + sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); } - if (so->so_proto->pr_domain->dom_family == AF_INET && - so->so_proto->pr_protocol == IPPROTO_TCP) { - struct sockopt sopt; - int val; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - val = 1; - sosetopt(so, &sopt); + if (sodomain == AF_INET && soprotocol == IPPROTO_TCP) { + sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); } - so->so_rcv.sb_flags &= ~SB_NOINTR; - so->so_rcv.sb_timeo = 0; - so->so_snd.sb_flags &= ~SB_NOINTR; - so->so_snd.sb_timeo = 0; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - if (tslp) + sock_nointerrupt(so, 0); + + timeo.tv_usec = 0; + timeo.tv_sec = 0; + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + + if (tslp) { slp = tslp; - else { + lck_mtx_lock(nfsd_mutex); + } else { MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); + if (!slp) { + mbuf_freem(mynam); + return (ENOMEM); + } bzero((caddr_t)slp, sizeof (struct nfssvc_sock)); + lck_rw_init(&slp->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + lck_mtx_init(&slp->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr); TAILQ_INIT(&slp->ns_uidlruhead); + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain); } + + sock_retain(so); /* grab a retain count on the socket */ slp->ns_so = so; + slp->ns_sotype = sotype; slp->ns_nam = mynam; - slp->ns_fp = fp; - (void)fref(fp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - s = splnet(); + + socket_lock(so, 1); so->so_upcallarg = (caddr_t)slp; so->so_upcall = nfsrv_rcv; so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */ - slp->ns_nflag = SLPN_NEEDQ; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - slp->ns_flag = SLP_VALID; + socket_unlock(so, 1); + + slp->ns_flag = SLP_VALID | SLP_NEEDQ; + nfsrv_wakenfsd(slp); - splx(s); + lck_mtx_unlock(nfsd_mutex); + return (0); } @@ -697,86 +833,96 @@ nfssvc_addsock(fp, mynam, p) static int nfssvc_nfsd(nsd, argp, p) struct nfsd_srvargs *nsd; - caddr_t argp; - struct proc *p; + user_addr_t argp; + proc_t p; { - register struct mbuf *m; - register int siz; - register struct nfssvc_sock *slp; - register struct socket *so; + mbuf_t m, mreq; + struct nfssvc_sock *slp; struct nfsd *nfsd = nsd->nsd_nfsd; struct nfsrv_descript *nd = NULL; - struct mbuf *mreq; - int error = 0, cacherep, s, sotype, writes_todo; - int procrastinate; + int error = 0, cacherep, writes_todo; + int siz, procrastinate; u_quad_t cur_usec; struct timeval now; + boolean_t funnel_state; #ifndef nolint cacherep = RC_DOIT; writes_todo = 0; #endif - s = splnet(); if (nfsd == (struct nfsd *)0) { MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK); + if (!nfsd) + return (ENOMEM); nsd->nsd_nfsd = nfsd; bzero((caddr_t)nfsd, sizeof (struct nfsd)); nfsd->nfsd_procp = p; + lck_mtx_lock(nfsd_mutex); TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); nfs_numnfsd++; + lck_mtx_unlock(nfsd_mutex); } + + funnel_state = thread_funnel_set(kernel_flock, FALSE); + /* * Loop getting rpc requests until SIGKILL. */ for (;;) { if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) { - while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) == 0) { + lck_mtx_lock(nfsd_mutex); + while ((nfsd->nfsd_slp == NULL) && !(nfsd_head_flag & NFSD_CHECKSLP)) { nfsd->nfsd_flag |= NFSD_WAITING; nfsd_waiting++; - error = tsleep((caddr_t)nfsd, PSOCK | PCATCH, - "nfsd", 0); + error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", 0); nfsd_waiting--; - if (error) + if (error) { + lck_mtx_unlock(nfsd_mutex); goto done; + } } - if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 && - (nfsd_head_flag & NFSD_CHECKSLP) != 0) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; - slp = slp->ns_chain.tqe_next) { + if ((nfsd->nfsd_slp == NULL) && (nfsd_head_flag & NFSD_CHECKSLP)) { + TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { + lck_rw_lock_shared(&slp->ns_rwlock); if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) == (SLP_VALID | SLP_DOREC)) { + if (lck_rw_lock_shared_to_exclusive(&slp->ns_rwlock)) { + /* upgrade failed and we lost the lock; take exclusive and recheck */ + lck_rw_lock_exclusive(&slp->ns_rwlock); + if ((slp->ns_flag & (SLP_VALID | SLP_DOREC)) + != (SLP_VALID | SLP_DOREC)) { + /* flags no longer set, so skip this socket */ + lck_rw_done(&slp->ns_rwlock); + continue; + } + } slp->ns_flag &= ~SLP_DOREC; slp->ns_sref++; nfsd->nfsd_slp = slp; + lck_rw_done(&slp->ns_rwlock); break; } + lck_rw_done(&slp->ns_rwlock); } if (slp == 0) nfsd_head_flag &= ~NFSD_CHECKSLP; } - if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0) + lck_mtx_unlock(nfsd_mutex); + if ((slp = nfsd->nfsd_slp) == NULL) continue; + lck_rw_lock_exclusive(&slp->ns_rwlock); if (slp->ns_flag & SLP_VALID) { - nfs_slplock(slp, 1); - if (slp->ns_nflag & SLPN_DISCONN) { + if (slp->ns_flag & SLP_DISCONN) { nfsrv_zapsock(slp); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - } else if (slp->ns_nflag & SLPN_NEEDQ) { - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - slp->ns_nflag &= ~SLPN_NEEDQ; - nfsrv_rcv(slp->ns_so, (caddr_t)slp, - M_WAIT); - } else - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + } else if (slp->ns_flag & SLP_NEEDQ) { + slp->ns_flag &= ~SLP_NEEDQ; + nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK); + } error = nfsrv_dorec(slp, nfsd, &nd); - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - nfs_slpunlock(slp); microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - if (error && slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + if (error && slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { error = 0; cacherep = RC_DOIT; writes_todo = 1; @@ -784,6 +930,7 @@ nfssvc_nfsd(nsd, argp, p) writes_todo = 0; nfsd->nfsd_flag |= NFSD_REQINPROG; } + lck_rw_done(&slp->ns_rwlock); } else { error = 0; slp = nfsd->nfsd_slp; @@ -791,19 +938,18 @@ nfssvc_nfsd(nsd, argp, p) if (error || (slp->ns_flag & SLP_VALID) == 0) { if (nd) { if (nd->nd_nam2) - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } - nfsd->nfsd_slp = (struct nfssvc_sock *)0; + nfsd->nfsd_slp = NULL; nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsrv_slpderef(slp); continue; } - splx(s); - so = slp->ns_so; - sotype = so->so_type; if (nd) { microuptime(&nd->nd_starttime); if (nd->nd_nam2) @@ -816,42 +962,22 @@ nfssvc_nfsd(nsd, argp, p) */ if (nfsd->nfsd_flag & NFSD_NEEDAUTH) { nfsd->nfsd_flag &= ~NFSD_NEEDAUTH; - nsd->nsd_haddr = mtod(nd->nd_nam, - struct sockaddr_in *)->sin_addr.s_addr; + nsd->nsd_haddr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; nsd->nsd_authlen = nfsd->nfsd_authlen; nsd->nsd_verflen = nfsd->nfsd_verflen; - if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr, + if (!copyout(nfsd->nfsd_authstr,CAST_USER_ADDR_T(nsd->nsd_authstr), nfsd->nfsd_authlen) && - !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr, + !copyout(nfsd->nfsd_verfstr, CAST_USER_ADDR_T(nsd->nsd_verfstr), nfsd->nfsd_verflen) && - !copyout((caddr_t)nsd, argp, sizeof (*nsd))) + !copyout((caddr_t)nsd, argp, sizeof (*nsd))) { + thread_funnel_set(kernel_flock, funnel_state); return (ENEEDAUTH); + } cacherep = RC_DROPIT; } else cacherep = nfsrv_getcache(nd, slp, &mreq); - /* - * Check for just starting up for NQNFS and send - * fake "try again later" replies to the NQNFS clients. - */ - microtime(&now); - if (notstarted && nqnfsstarttime <= now.tv_sec) { - if (modify_flag) { - nqnfsstarttime = now.tv_sec + nqsrv_writeslack; - modify_flag = 0; - } else - notstarted = 0; - } - if (notstarted) { - if ((nd->nd_flag & ND_NQNFS) == 0) - cacherep = RC_DROPIT; - else if (nd->nd_procnum != NFSPROC_WRITE) { - nd->nd_procnum = NFSPROC_NOOP; - nd->nd_repstat = NQNFS_TRYLATER; - cacherep = RC_DOIT; - } else - modify_flag = 1; - } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { + if (nfsd->nfsd_flag & NFSD_AUTHFAIL) { nfsd->nfsd_flag &= ~NFSD_AUTHFAIL; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); @@ -859,18 +985,20 @@ nfssvc_nfsd(nsd, argp, p) } else if (nfs_privport) { /* Check if source port is privileged */ u_short port; - struct sockaddr *nam = mtod(nd->nd_nam, struct sockaddr*); + struct sockaddr *nam = mbuf_data(nd->nd_nam); struct sockaddr_in *sin; sin = (struct sockaddr_in *)nam; port = ntohs(sin->sin_port); if (port >= IPPORT_RESERVED && nd->nd_procnum != NFSPROC_NULL) { + char strbuf[MAX_IPv4_STR_LEN]; nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); cacherep = RC_DOIT; printf("NFS request from unprivileged port (%s:%d)\n", - (char *)(inet_ntoa(sin->sin_addr)), port); + inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)), + port); } } @@ -887,94 +1015,100 @@ nfssvc_nfsd(nsd, argp, p) procrastinate = nfsrvw_procrastinate_v3; else procrastinate = nfsrvw_procrastinate; - if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE && - procrastinate > 0 && !notstarted)) - error = nfsrv_writegather(&nd, slp, - nfsd->nfsd_procp, &mreq); + lck_rw_lock_shared(&nfs_export_rwlock); + if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0))) + error = nfsrv_writegather(&nd, slp, nfsd->nfsd_procp, &mreq); else - error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, - slp, nfsd->nfsd_procp, &mreq); + error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, slp, nfsd->nfsd_procp, &mreq); + lck_rw_done(&nfs_export_rwlock); if (mreq == NULL) break; if (error) { - if (nd->nd_procnum != NQNFSPROC_VACATED) - nfsstats.srv_errs++; + OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs); nfsrv_updatecache(nd, FALSE, mreq); if (nd->nd_nam2) { - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); nd->nd_nam2 = NULL; } break; } - nfsstats.srvrpccnt[nd->nd_procnum]++; + OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]); nfsrv_updatecache(nd, TRUE, mreq); - nd->nd_mrep = (struct mbuf *)0; + nd->nd_mrep = NULL; case RC_REPLY: m = mreq; siz = 0; while (m) { - siz += m->m_len; - m = m->m_next; + siz += mbuf_len(m); + m = mbuf_next(m); } if (siz <= 0 || siz > NFS_MAXPACKET) { printf("mbuf siz=%d\n",siz); panic("Bad nfs svc reply"); } m = mreq; - m->m_pkthdr.len = siz; - m->m_pkthdr.rcvif = (struct ifnet *)0; + mbuf_pkthdr_setlen(m, siz); + error = mbuf_pkthdr_setrcvif(m, NULL); + if (error) + panic("nfsd setrcvif failed: %d", error); /* * For stream protocols, prepend a Sun RPC * Record Mark. */ - if (sotype == SOCK_STREAM) { - M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); - *mtod(m, u_long *) = htonl(0x80000000 | siz); + if (slp->ns_sotype == SOCK_STREAM) { + error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); + if (!error) + *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz); } - if (so->so_proto->pr_flags & PR_CONNREQUIRED) - (void) nfs_slplock(slp, 1); - if (slp->ns_flag & SLP_VALID) - error = nfs_send(so, nd->nd_nam2, m, NULL); - else { - error = EPIPE; - m_freem(m); + if (!error) { + if (slp->ns_flag & SLP_VALID) { + error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL); + } else { + error = EPIPE; + mbuf_freem(m); + } + } else { + mbuf_freem(m); } mreq = NULL; if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); + nfsd_rt(slp->ns_sotype, nd, cacherep); if (nd->nd_nam2) { - MFREE(nd->nd_nam2, m); + mbuf_freem(nd->nd_nam2); nd->nd_nam2 = NULL; } if (nd->nd_mrep) { - m_freem(nd->nd_mrep); + mbuf_freem(nd->nd_mrep); nd->nd_mrep = NULL; } - if (error == EPIPE) + if (error == EPIPE) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); - if (so->so_proto->pr_flags & PR_CONNREQUIRED) - nfs_slpunlock(slp); + lck_rw_done(&slp->ns_rwlock); + } if (error == EINTR || error == ERESTART) { - FREE_ZONE((caddr_t)nd, - sizeof *nd, M_NFSRVDESC); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); + FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nfsrv_slpderef(slp); - s = splnet(); goto done; } break; case RC_DROPIT: if (nfsrtton) - nfsd_rt(sotype, nd, cacherep); - m_freem(nd->nd_mrep); - m_freem(nd->nd_nam2); + nfsd_rt(slp->ns_sotype, nd, cacherep); + mbuf_freem(nd->nd_mrep); + mbuf_freem(nd->nd_nam2); nd->nd_mrep = nd->nd_nam2 = NULL; break; }; if (nd) { if (nd->nd_mrep) - m_freem(nd->nd_mrep); + mbuf_freem(nd->nd_mrep); if (nd->nd_nam2) - m_freem(nd->nd_nam2); + mbuf_freem(nd->nd_nam2); + if (nd->nd_cr) + kauth_cred_rele(nd->nd_cr); FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC); nd = NULL; } @@ -986,34 +1120,71 @@ nfssvc_nfsd(nsd, argp, p) microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - s = splsoftclock(); - if (slp->ns_tq.lh_first && - slp->ns_tq.lh_first->nd_time <= cur_usec) { + if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) { cacherep = RC_DOIT; writes_todo = 1; - } else + } else { writes_todo = 0; - splx(s); + } } while (writes_todo); - s = splnet(); - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + lck_rw_lock_exclusive(&slp->ns_rwlock); if (nfsrv_dorec(slp, nfsd, &nd)) { - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + lck_rw_done(&slp->ns_rwlock); nfsd->nfsd_flag &= ~NFSD_REQINPROG; nfsd->nfsd_slp = NULL; nfsrv_slpderef(slp); - } else - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + } else { + lck_rw_done(&slp->ns_rwlock); + } } done: + thread_funnel_set(kernel_flock, funnel_state); + lck_mtx_lock(nfsd_mutex); TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain); - splx(s); - _FREE((caddr_t)nfsd, M_NFSD); + FREE(nfsd, M_NFSD); nsd->nsd_nfsd = (struct nfsd *)0; if (--nfs_numnfsd == 0) nfsrv_init(TRUE); /* Reinitialize everything */ + lck_mtx_unlock(nfsd_mutex); return (error); } + +static int +nfssvc_export(user_addr_t argp, proc_t p) +{ + int error = 0, is_64bit; + struct user_nfs_export_args unxa; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + is_64bit = IS_64BIT_PROCESS(p); + + /* copy in pointers to path and export args */ + if (is_64bit) { + error = copyin(argp, (caddr_t)&unxa, sizeof(unxa)); + } else { + struct nfs_export_args tnxa; + error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa)); + if (error == 0) { + /* munge into LP64 version of nfs_export_args structure */ + unxa.nxa_fsid = tnxa.nxa_fsid; + unxa.nxa_expid = tnxa.nxa_expid; + unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath); + unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath); + unxa.nxa_flags = tnxa.nxa_flags; + unxa.nxa_netcount = tnxa.nxa_netcount; + unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets); + } + } + if (error) + return (error); + + error = nfsrv_export(&unxa, &context); + + return (error); +} + #endif /* NFS_NOSERVER */ int nfs_defect = 0; @@ -1022,14 +1193,8 @@ int nfs_defect = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); #endif -#ifndef _SYS_SYSPROTO_H_ -struct nfsclnt_args { - int flag; - caddr_t argp; -}; -#endif int -nfsclnt(struct proc *p, struct nfsclnt_args *uap) +nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) { struct lockd_ans la; int error; @@ -1042,7 +1207,7 @@ nfsclnt(struct proc *p, struct nfsclnt_args *uap) return (error != 0 ? error : nfslockdans(p, &la)); } if (uap->flag == NFSCLNT_LOCKDFD) - return (nfslockdfd(p, (int)uap->argp)); + return (nfslockdfd(p, CAST_DOWN(int, uap->argp))); return EINVAL; } @@ -1055,12 +1220,9 @@ static int nfssvc_iod_continue(int); * Never returns unless it fails or gets killed. */ static int -nfssvc_iod(p) - struct proc *p; +nfssvc_iod(__unused proc_t p) { register int i, myiod; - struct nfsmount *nmp; - int error = 0; struct uthread *ut; /* @@ -1079,106 +1241,131 @@ nfssvc_iod(p) /* stuff myiod into uthread to get off local stack for continuation */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); ut->uu_state.uu_nfs_myiod = myiod; /* squirrel away for continuation */ nfssvc_iod_continue(0); /* NOTREACHED */ - + return (0); } /* * Continuation for Asynchronous I/O daemons for client nfs. */ static int -nfssvc_iod_continue(error) +nfssvc_iod_continue(int error) { register struct nfsbuf *bp; register int i, myiod; struct nfsmount *nmp; struct uthread *ut; - struct proc *p; + proc_t p; /* * real myiod is stored in uthread, recover it */ - ut = (struct uthread *)get_bsdthread_info(current_act()); + ut = (struct uthread *)get_bsdthread_info(current_thread()); myiod = ut->uu_state.uu_nfs_myiod; - p = current_proc(); + p = current_proc(); // XXX /* * Just loop around doin our stuff until SIGKILL * - actually we don't loop with continuations... */ + lck_mtx_lock(nfs_iod_mutex); for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL || nmp->nm_bufq.tqh_first == NULL) && error == 0 && nfs_ioddelwri == 0) { if (nmp) nmp->nm_bufqiods--; - nfs_iodwant[myiod] = p; + nfs_iodwant[myiod] = p; // XXX this doesn't need to be a proc_t nfs_iodmount[myiod] = NULL; - error = tsleep0((caddr_t)&nfs_iodwant[myiod], - PWAIT | PCATCH, "nfsidl", 0, nfssvc_iod_continue); - /* NOTREACHED */ + error = msleep0((caddr_t)&nfs_iodwant[myiod], nfs_iod_mutex, + PWAIT | PCATCH | PDROP, "nfsidl", 0, nfssvc_iod_continue); + lck_mtx_lock(nfs_iod_mutex); } if (error) { nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NULL; nfs_iodmount[myiod] = NULL; + lck_mtx_unlock(nfs_iod_mutex); nfs_numasync--; if (error == EINTR || error == ERESTART) error = 0; unix_syscall_return(error); } if (nmp != NULL) { - while ((bp = nmp->nm_bufq.tqh_first) != NULL) { + while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, nb_free); bp->nb_free.tqe_next = NFSNOLIST; nmp->nm_bufqlen--; if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { nmp->nm_bufqwant = FALSE; + lck_mtx_unlock(nfs_iod_mutex); wakeup(&nmp->nm_bufq); + } else { + lck_mtx_unlock(nfs_iod_mutex); } + + SET(bp->nb_flags, NB_IOD); if (ISSET(bp->nb_flags, NB_READ)) - (void) nfs_doio(bp, bp->nb_rcred, (struct proc *)0); + nfs_doio(bp, bp->nb_rcred, NULL); else - (void) nfs_doio(bp, bp->nb_wcred, (struct proc *)0); + nfs_doio(bp, bp->nb_wcred, NULL); + lck_mtx_lock(nfs_iod_mutex); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts */ if (nfs_defect && nmp->nm_bufqiods > 1) { - NFS_DPF(ASYNCIO, - ("nfssvc_iod: iod %d defecting from mount %p\n", - myiod, nmp)); nfs_iodmount[myiod] = NULL; nmp->nm_bufqiods--; break; } } } + lck_mtx_unlock(nfs_iod_mutex); + if (nfs_ioddelwri) { i = 0; nfs_ioddelwri = 0; + lck_mtx_lock(nfs_buf_mutex); while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) { struct nfsnode *np = VTONFS(bp->nb_vp); nfs_buf_remfree(bp); + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN); + nfs_buf_refrele(bp); + if (error) + break; + if (!bp->nb_vp) { + /* buffer is no longer valid */ + nfs_buf_drop(bp); + continue; + } if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { /* put buffer at end of delwri list */ TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free); nfsbufdelwricnt++; - nfs_flushcommits(np->n_vnode, (struct proc *)0); + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); + nfs_flushcommits(np->n_vnode, NULL, 1); } else { - SET(bp->nb_flags, (NB_BUSY | NB_ASYNC | NB_IOD)); + SET(bp->nb_flags, (NB_ASYNC | NB_IOD)); + lck_mtx_unlock(nfs_buf_mutex); nfs_buf_write(bp); } i++; + lck_mtx_lock(nfs_buf_mutex); } + lck_mtx_unlock(nfs_buf_mutex); } + + lck_mtx_lock(nfs_iod_mutex); } } @@ -1190,52 +1377,23 @@ nfssvc_iod_continue(error) * reassigned during cleanup. */ static void -nfsrv_zapsock(slp) - register struct nfssvc_sock *slp; +nfsrv_zapsock(struct nfssvc_sock *slp) { - register struct nfsuid *nuidp, *nnuidp; - register struct nfsrv_descript *nwp, *nnwp; - struct socket *so; - struct file *fp; - struct mbuf *m; - int s; + socket_t so; + if ((slp->ns_flag & SLP_VALID) == 0) + return; slp->ns_flag &= ~SLP_ALLFLAGS; - slp->ns_nflag &= ~SLP_ALLFLAGS; - fp = slp->ns_fp; - if (fp) { - slp->ns_fp = (struct file *)0; - so = slp->ns_so; - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - so->so_upcall = NULL; - so->so_rcv.sb_flags &= ~SB_UPCALL; - soshutdown(so, 2); - if (slp->ns_nam) - MFREE(slp->ns_nam, m); - m_freem(slp->ns_raw); - m_freem(slp->ns_rec); - slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL; - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - closef(fp, (struct proc *)0); - for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; - nuidp = nnuidp) { - nnuidp = nuidp->nu_lru.tqe_next; - LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); - if (nuidp->nu_flag & NU_NAM) - m_freem(nuidp->nu_nam); - FREE_ZONE((caddr_t)nuidp, - sizeof (struct nfsuid), M_NFSUID); - } - s = splsoftclock(); - for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { - nnwp = nwp->nd_tq.le_next; - LIST_REMOVE(nwp, nd_tq); - FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); - } - LIST_INIT(&slp->ns_tq); - splx(s); - } + + so = slp->ns_so; + if (so == NULL) + return; + + socket_lock(so, 1); + so->so_upcall = NULL; + so->so_rcv.sb_flags &= ~SB_UPCALL; + socket_unlock(so, 1); + sock_shutdown(so, SHUT_RDWR); } /* @@ -1246,7 +1404,7 @@ int nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) register struct nfsmount *nmp; struct nfsreq *rep; - struct ucred *cred; + kauth_cred_t cred; char **auth_str; int *auth_len; char *verf_str; @@ -1265,13 +1423,16 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) return (error); } } - nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH); + nmp->nm_state &= ~NFSSTA_WANTAUTH; MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK); + if (!*auth_str) + return (ENOMEM); nmp->nm_authstr = *auth_str; nmp->nm_authlen = RPCAUTH_MAXSIZ; nmp->nm_verfstr = verf_str; nmp->nm_verflen = *verf_len; - nmp->nm_authuid = cred->cr_uid; + nmp->nm_authuid = kauth_cred_getuid(cred); + nmp->nm_state &= ~NFSSTA_WAITAUTH; wakeup((caddr_t)&nmp->nm_authstr); /* @@ -1287,7 +1448,7 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) error = EAUTH; } if (error) - _FREE((caddr_t)*auth_str, M_TEMP); + FREE(*auth_str, M_TEMP); else { *auth_len = nmp->nm_authlen; *verf_len = nmp->nm_verflen; @@ -1306,13 +1467,13 @@ nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key) * Get a nickname authenticator and verifier. */ int -nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) - struct nfsmount *nmp; - struct ucred *cred; - char **auth_str; - int *auth_len; - char *verf_str; - int verf_len; +nfs_getnickauth( + struct nfsmount *nmp, + kauth_cred_t cred, + char **auth_str, + int *auth_len, + char *verf_str, + __unused int verf_len) { register struct nfsuid *nuidp; register u_long *nickp, *verfp; @@ -1322,22 +1483,25 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) if (verf_len < (4 * NFSX_UNSIGNED)) panic("nfs_getnickauth verf too small"); #endif - for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first; + for (nuidp = NMUIDHASH(nmp, kauth_cred_getuid(cred))->lh_first; nuidp != 0; nuidp = nuidp->nu_hash.le_next) { - if (nuidp->nu_cr.cr_uid == cred->cr_uid) + if (kauth_cred_getuid(nuidp->nu_cr) == kauth_cred_getuid(cred)) break; } microtime(&now); if (!nuidp || nuidp->nu_expire < now.tv_sec) return (EACCES); + MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); + if (!nickp) + return (ENOMEM); + /* * Move to the end of the lru list (end of lru == most recently used). */ TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); - MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK); *nickp++ = txdr_unsigned(RPCAKN_NICKNAME); *nickp = txdr_unsigned(nuidp->nu_nickname); *auth_str = (char *)nickp; @@ -1378,17 +1542,17 @@ nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len) int nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) register struct nfsmount *nmp; - struct ucred *cred; + kauth_cred_t cred; int len; NFSKERBKEY_T key; - struct mbuf **mdp; + mbuf_t *mdp; char **dposp; - struct mbuf *mrep; + mbuf_t mrep; { register struct nfsuid *nuidp; register u_long *tl; register long t1; - struct mbuf *md = *mdp; + mbuf_t md = *mdp; struct timeval ktvin, ktvout, now; u_long nick; char *dpos = *dposp, *cp2; @@ -1422,20 +1586,27 @@ nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep) sizeof (struct nfsuid), M_NFSUID, M_WAITOK); } else { + nuidp = NULL; + } + if (!nuidp) { nuidp = nmp->nm_uidlruhead.tqh_first; + if (!nuidp) { + error = ENOMEM; + goto nfsmout; + } LIST_REMOVE(nuidp, nu_hash); - TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, - nu_lru); + TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); + kauth_cred_rele(nuidp->nu_cr); } nuidp->nu_flag = 0; - nuidp->nu_cr.cr_uid = cred->cr_uid; + kauth_cred_ref(cred); + nuidp->nu_cr = cred; nuidp->nu_expire = now.tv_sec + NFS_KERBTTL; nuidp->nu_timestamp = ktvout; nuidp->nu_nickname = nick; bcopy(key, nuidp->nu_key, sizeof (key)); - TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, - nu_lru); - LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid), + TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru); + LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_getuid(cred)), nuidp, nu_hash); } } else @@ -1449,57 +1620,75 @@ nfsmout: #ifndef NFS_NOSERVER /* - * Derefence a server socket structure. If it has no more references and - * is no longer valid, you can throw it away. + * cleanup and release a server socket structure. */ -void -nfsrv_slpderef(slp) - register struct nfssvc_sock *slp; +static void +nfsrv_slpfree(struct nfssvc_sock *slp) { - if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) { - TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); - } -} + struct nfsuid *nuidp, *nnuidp; + struct nfsrv_descript *nwp, *nnwp; -/* - * Lock a socket against others. - */ -int -nfs_slplock(slp, wait) - register struct nfssvc_sock *slp; - int wait; -{ - int *statep = &slp->ns_solock; + if (slp->ns_so) { + sock_release(slp->ns_so); + slp->ns_so = NULL; + } + if (slp->ns_nam) + mbuf_free(slp->ns_nam); + if (slp->ns_raw) + mbuf_freem(slp->ns_raw); + if (slp->ns_rec) + mbuf_freem(slp->ns_rec); + slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL; + + for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0; + nuidp = nnuidp) { + nnuidp = nuidp->nu_lru.tqe_next; + LIST_REMOVE(nuidp, nu_hash); + TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru); + if (nuidp->nu_flag & NU_NAM) + mbuf_freem(nuidp->nu_nam); + kauth_cred_rele(nuidp->nu_cr); + FREE_ZONE((caddr_t)nuidp, + sizeof (struct nfsuid), M_NFSUID); + } - if (!wait && (*statep & NFSSTA_SNDLOCK)) - return(0); /* already locked, fail */ - while (*statep & NFSSTA_SNDLOCK) { - *statep |= NFSSTA_WANTSND; - (void) tsleep((caddr_t)statep, PZERO - 1, "nfsslplck", 0); + for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) { + nnwp = nwp->nd_tq.le_next; + LIST_REMOVE(nwp, nd_tq); + if (nwp->nd_cr) + kauth_cred_rele(nwp->nd_cr); + FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC); } - *statep |= NFSSTA_SNDLOCK; - return (1); + LIST_INIT(&slp->ns_tq); + + lck_rw_destroy(&slp->ns_rwlock, nfs_slp_rwlock_group); + lck_mtx_destroy(&slp->ns_wgmutex, nfs_slp_mutex_group); + FREE(slp, M_NFSSVC); } /* - * Unlock the stream socket for others. + * Derefence a server socket structure. If it has no more references and + * is no longer valid, you can throw it away. */ void -nfs_slpunlock(slp) - struct nfssvc_sock *slp; +nfsrv_slpderef(struct nfssvc_sock *slp) { - int *statep = &slp->ns_solock; - - if ((*statep & NFSSTA_SNDLOCK) == 0) - panic("nfs slpunlock"); - *statep &= ~NFSSTA_SNDLOCK; - if (*statep & NFSSTA_WANTSND) { - *statep &= ~NFSSTA_WANTSND; - wakeup((caddr_t)statep); + lck_mtx_lock(nfsd_mutex); + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_sref--; + if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) { + lck_rw_done(&slp->ns_rwlock); + lck_mtx_unlock(nfsd_mutex); + return; } + + TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); + lck_mtx_unlock(nfsd_mutex); + + nfsrv_slpfree(slp); } + /* * Initialize the data structures for the server. * Handshake with any new nfsds starting up to avoid any chance of @@ -1509,22 +1698,24 @@ void nfsrv_init(terminating) int terminating; { - register struct nfssvc_sock *slp, *nslp; + struct nfssvc_sock *slp, *nslp; - if (nfssvc_sockhead_flag & SLP_INIT) - panic("nfsd init"); - nfssvc_sockhead_flag |= SLP_INIT; if (terminating) { - for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) { - nslp = slp->ns_chain.tqe_next; - if (slp->ns_flag & SLP_VALID) + for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != 0; slp = nslp) { + nslp = TAILQ_NEXT(slp, ns_chain); + if (slp->ns_flag & SLP_VALID) { + lck_rw_lock_exclusive(&slp->ns_rwlock); nfsrv_zapsock(slp); + lck_rw_done(&slp->ns_rwlock); + } TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain); - _FREE((caddr_t)slp, M_NFSSVC); + /* grab the lock one final time in case anyone's using it */ + lck_rw_lock_exclusive(&slp->ns_rwlock); + nfsrv_slpfree(slp); } nfsrv_cleancache(); /* And clear out server cache */ -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet +/* XXX Revisit when enabling WebNFS */ +#ifdef WEBNFS_ENABLED } else nfs_pub.np_valid = 0; #else @@ -1532,26 +1723,33 @@ nfsrv_init(terminating) #endif TAILQ_INIT(&nfssvc_sockhead); - nfssvc_sockhead_flag &= ~SLP_INIT; - if (nfssvc_sockhead_flag & SLP_WANTINIT) { - nfssvc_sockhead_flag &= ~SLP_WANTINIT; - wakeup((caddr_t)&nfssvc_sockhead); - } TAILQ_INIT(&nfsd_head); nfsd_head_flag &= ~NFSD_CHECKSLP; MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); - TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + if (nfs_udpsock) { + bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_udpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_udpsock->ns_uidlruhead); + TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate UDP socket\n"); + } +#if ISO MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock), M_NFSSVC, M_WAITOK); - bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); - TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); - TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + if (nfs_cltpsock) { + bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock)); + lck_rw_init(&nfs_cltpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr); + TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead); + TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain); + } else { + printf("nfsrv_init() failed to allocate CLTP socket\n"); + } +#endif } /* @@ -1575,13 +1773,11 @@ nfsd_rt(sotype, nd, cacherep) rt->flag = DRT_CACHEDROP; if (sotype == SOCK_STREAM) rt->flag |= DRT_TCP; - if (nd->nd_flag & ND_NQNFS) - rt->flag |= DRT_NQNFS; else if (nd->nd_flag & ND_NFSV3) rt->flag |= DRT_NFSV3; rt->proc = nd->nd_procnum; - if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET) - rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr; + if (((struct sockaddr *)mbuf_data(nd->nd_nam))->sa_family == AF_INET) + rt->ipadr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr; else rt->ipadr = INADDR_ANY; microuptime(&now); diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index b08612aa8..6d72e61e0 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,16 +64,18 @@ #include #include #include -#include -#include -#include +#include /* for fs rooting to update rootdir in fdp */ +#include +#include #include #include #include -#include -#include +#include +#include #include #include +#include +#include #include #include @@ -94,30 +96,22 @@ #include #include #include -#include +#include -extern int nfs_mountroot __P((void)); +extern int nfs_mountroot(void); extern int nfs_ticks; extern int nfs_mount_type; extern int nfs_resv_mounts; struct nfsstats nfsstats; -static int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, - struct proc *); +static int nfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); /* XXX CSM 11/25/97 Upgrade sysctl.h someday */ #ifdef notyet SYSCTL_NODE(_vfs, MOUNT_NFS, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, ""); #endif -#if NFSDIAG -int nfs_debug; -/* XXX CSM 11/25/97 Upgrade sysctl.h someday */ -#ifdef notyet -SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); -#endif -#endif SYSCTL_DECL(_vfs_generic_nfs); SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW, 0, @@ -131,27 +125,18 @@ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_generic_nfs_client, NFS_TPRINTF_DELAY, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); -static int nfs_iosize __P((struct nfsmount *nmp)); -static int mountnfs __P((struct nfs_args *,struct mount *, - struct mbuf *,char *,char *,struct vnode **)); -static int nfs_mount __P(( struct mount *mp, char *path, caddr_t data, - struct nameidata *ndp, struct proc *p)); -static int nfs_start __P(( struct mount *mp, int flags, - struct proc *p)); -static int nfs_unmount __P(( struct mount *mp, int mntflags, - struct proc *p)); -static int nfs_root __P(( struct mount *mp, struct vnode **vpp)); -static int nfs_quotactl __P(( struct mount *mp, int cmds, uid_t uid, - caddr_t arg, struct proc *p)); -static int nfs_statfs __P(( struct mount *mp, struct statfs *sbp, - struct proc *p)); -static int nfs_sync __P(( struct mount *mp, int waitfor, - struct ucred *cred, struct proc *p)); -static int nfs_vptofh __P(( struct vnode *vp, struct fid *fhp)); -static int nfs_fhtovp __P((struct mount *mp, struct fid *fhp, - struct mbuf *nam, struct vnode **vpp, - int *exflagsp, struct ucred **credanonp)); -static int nfs_vget __P((struct mount *, void *, struct vnode **)); +static int nfs_iosize(struct nfsmount *nmp); +static int mountnfs(struct user_nfs_args *,mount_t,mbuf_t,proc_t,vnode_t *); +static int nfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t context); +static int nfs_start(mount_t mp, int flags, vfs_context_t context); +static int nfs_unmount(mount_t mp, int mntflags, vfs_context_t context); +static int nfs_root(mount_t mp, vnode_t *vpp, vfs_context_t context); +static int nfs_statfs(mount_t mp, struct vfsstatfs *sbp, vfs_context_t context); +static int nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context); +static int nfs_sync( mount_t mp, int waitfor, vfs_context_t context); +static int nfs_vptofh(vnode_t vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); +static int nfs_fhtovp(mount_t mp, int fhlen, unsigned char *fhp, vnode_t *vpp, vfs_context_t context); +static int nfs_vget(mount_t , ino64_t, vnode_t *, vfs_context_t context); /* @@ -162,36 +147,24 @@ struct vfsops nfs_vfsops = { nfs_start, nfs_unmount, nfs_root, - nfs_quotactl, - nfs_statfs, + NULL, /* quotactl */ + nfs_vfs_getattr, nfs_sync, nfs_vget, nfs_fhtovp, nfs_vptofh, nfs_init, - nfs_sysctl + nfs_sysctl, + NULL /* setattr */ }; -/* XXX CSM 11/25/97 Mysterious kernel.h ld crud */ -#ifdef notyet -VFS_SET(nfs_vfsops, nfs, MOUNT_NFS, VFCF_NETWORK); -#endif -void nfsargs_ntoh __P((struct nfs_args *)); static int -nfs_mount_diskless __P((struct nfs_dlmount *, char *, int, struct vnode **, - struct mount **)); +nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *); #if !defined(NO_MOUNT_PRIVATE) static int -nfs_mount_diskless_private __P((struct nfs_dlmount *, char *, int, - struct vnode **, struct mount **)); +nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *); #endif /* NO_MOUNT_PRIVATE */ -static void nfs_convert_oargs __P((struct nfs_args *args, - struct onfs_args *oargs)); -#if NFSDIAG -int nfsreqqusers = 0; -extern int nfsbtlen, nfsbtcpu, nfsbtthread, nfsbt[32]; -#endif static int nfs_iosize(nmp) struct nfsmount* nmp; @@ -213,76 +186,52 @@ static int nfs_iosize(nmp) return (trunc_page_32(iosize)); } -static void nfs_convert_oargs(args,oargs) - struct nfs_args *args; - struct onfs_args *oargs; -{ - args->version = NFS_ARGSVERSION; - args->addr = oargs->addr; - args->addrlen = oargs->addrlen; - args->sotype = oargs->sotype; - args->proto = oargs->proto; - args->fh = oargs->fh; - args->fhsize = oargs->fhsize; - args->flags = oargs->flags; - args->wsize = oargs->wsize; - args->rsize = oargs->rsize; - args->readdirsize = oargs->readdirsize; - args->timeo = oargs->timeo; - args->retrans = oargs->retrans; - args->maxgrouplist = oargs->maxgrouplist; - args->readahead = oargs->readahead; - args->leaseterm = oargs->leaseterm; - args->deadthresh = oargs->deadthresh; - args->hostname = oargs->hostname; -} - /* * nfs statfs call */ int -nfs_statfs(mp, sbp, p) - struct mount *mp; - register struct statfs *sbp; - struct proc *p; +nfs_statfs(mount_t mp, struct vfsstatfs *sbp, vfs_context_t context) { - register struct vnode *vp; - register struct nfs_statfs *sfp; - register caddr_t cp; - register u_long *tl; - register long t1, t2; + proc_t p = vfs_context_proc(context); + vnode_t vp; + struct nfs_statfs *sfp; + caddr_t cp; + u_long *tl; + long t1, t2; caddr_t bpos, dpos, cp2; struct nfsmount *nmp = VFSTONFS(mp); int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - struct ucred *cred; - extern int nfs_mount_type; + mbuf_t mreq, mrep, md, mb, mb2; u_int64_t xid; + kauth_cred_t cred; + struct ucred temp_cred; #ifndef nolint sfp = (struct nfs_statfs *)0; #endif vp = nmp->nm_dvp; - if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) + if ((error = vnode_get(vp))) return(error); - cred = crget(); - cred->cr_ngroups = 1; + + bzero(&temp_cred, sizeof(temp_cred)); + temp_cred.cr_ngroups = 1; + cred = kauth_cred_create(&temp_cred); + if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) - (void)nfs_fsinfo(nmp, vp, cred, p); - nfsstats.rpccnt[NFSPROC_FSSTAT]++; - nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); + nfs_fsinfo(nmp, vp, cred, p); + nfsm_reqhead(NFSX_FH(v3)); + if (error) { + kauth_cred_rele(cred); + vnode_put(vp); + return (error); + } + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_FSSTAT]); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_FSSTAT, p, cred, &xid); if (v3 && mrep) - nfsm_postop_attr(vp, retattr, &xid); + nfsm_postop_attr_update(vp, v3, retattr, &xid); nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); -/* XXX CSM 12/2/97 Cleanup when/if we integrate FreeBSD mount.h */ -#ifdef notyet - sbp->f_type = MOUNT_NFS; -#else - sbp->f_type = nfs_mount_type; -#endif sbp->f_flags = nmp->nm_flag; sbp->f_iosize = nfs_iosize(nmp); if (v3) { @@ -328,13 +277,226 @@ nfs_statfs(mp, sbp, p) sbp->f_files = 0; sbp->f_ffree = 0; } - if (sbp != &mp->mnt_stat) { - bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); - bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); - } nfsm_reqdone; - VOP_UNLOCK(vp, 0, p); - crfree(cred); + kauth_cred_rele(cred); + vnode_put(vp); + return (error); +} + +/* + * The nfs_statfs code is complicated, and used by mountnfs(), so leave it as-is + * and handle VFS_GETATTR by calling nfs_statfs and copying fields. + */ +static int +nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) +{ + int error = 0; + + if (VFSATTR_IS_ACTIVE(fsap, f_bsize) || + VFSATTR_IS_ACTIVE(fsap, f_iosize) || + VFSATTR_IS_ACTIVE(fsap, f_blocks) || + VFSATTR_IS_ACTIVE(fsap, f_bfree) || + VFSATTR_IS_ACTIVE(fsap, f_bavail) || + VFSATTR_IS_ACTIVE(fsap, f_bused) || + VFSATTR_IS_ACTIVE(fsap, f_files) || + VFSATTR_IS_ACTIVE(fsap, f_ffree)) { + struct vfsstatfs sb; + + error = nfs_statfs(mp, &sb, context); + if (!error) { + VFSATTR_RETURN(fsap, f_bsize, sb.f_bsize); + VFSATTR_RETURN(fsap, f_iosize, sb.f_iosize); + VFSATTR_RETURN(fsap, f_blocks, sb.f_blocks); + VFSATTR_RETURN(fsap, f_bfree, sb.f_bfree); + VFSATTR_RETURN(fsap, f_bavail, sb.f_bavail); + VFSATTR_RETURN(fsap, f_bused, sb.f_blocks - sb.f_bfree); + VFSATTR_RETURN(fsap, f_files, sb.f_files); + VFSATTR_RETURN(fsap, f_ffree, sb.f_ffree); + } + } + + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + struct nfsmount *nmp; + struct nfsv3_pathconf pc; + u_int32_t caps, valid; + vnode_t vp; + int v3; + + if (!(nmp = VFSTONFS(mp))) + return (ENXIO); + vp = nmp->nm_dvp; + v3 = (nmp->nm_flag & NFSMNT_NFSV3); + + /* + * The capabilities[] array defines what this volume supports. + * + * The valid[] array defines which bits this code understands + * the meaning of (whether the volume has that capability or not). + * Any zero bits here means "I don't know what you're asking about" + * and the caller cannot tell whether that capability is + * present or not. + */ + caps = valid = 0; + if (v3) { + /* try to get fsinfo if we haven't already */ + if (!(nmp->nm_state & NFSSTA_GOTFSINFO)) { + nfs_fsinfo(nmp, vp, vfs_context_ucred(context), + vfs_context_proc(context)); + if (!(nmp = VFSTONFS(vnode_mount(vp)))) + return (ENXIO); + } + if (nmp->nm_state & NFSSTA_GOTFSINFO) { + /* fsinfo indicates (non)support of links and symlinks */ + valid |= VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS; + if (nmp->nm_fsinfo.fsproperties & NFSV3FSINFO_SYMLINK) + caps |= VOL_CAP_FMT_SYMBOLICLINKS; + if (nmp->nm_fsinfo.fsproperties & NFSV3FSINFO_LINK) + caps |= VOL_CAP_FMT_HARDLINKS; + /* if fsinfo indicates all pathconf info is the same, */ + /* we can use it to report case attributes */ + if ((nmp->nm_fsinfo.fsproperties & NFSV3FSINFO_HOMOGENEOUS) && + !(nmp->nm_state & NFSSTA_GOTPATHCONF)) { + /* no cached pathconf info, try to get now */ + error = nfs_pathconfrpc(vp, &pc, + vfs_context_ucred(context), + vfs_context_proc(context)); + if (!(nmp = VFSTONFS(vnode_mount(vp)))) + return (ENXIO); + if (!error) { + /* all files have the same pathconf info, */ + /* so cache a copy of the results */ + nfs_pathconf_cache(nmp, &pc); + } + } + if (nmp->nm_state & NFSSTA_GOTPATHCONF) { + valid |= VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING; + if (!(nmp->nm_fsinfo.pcflags & + NFSPCINFO_CASE_INSENSITIVE)) + caps |= VOL_CAP_FMT_CASE_SENSITIVE; + if (nmp->nm_fsinfo.pcflags & + NFSPCINFO_CASE_PRESERVING) + caps |= VOL_CAP_FMT_CASE_PRESERVING; + } + /* Is server's max file size at least 2TB? */ + if (nmp->nm_fsinfo.maxfilesize >= 0x20000000000ULL) + caps |= VOL_CAP_FMT_2TB_FILESIZE; + } else { + /* + * NFSv3 supports 64 bits of file size. + * Without FSINFO from the server, we'll + * just assume maxfilesize >= 2TB + */ + caps |= VOL_CAP_FMT_2TB_FILESIZE; + } + } + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = + // VOL_CAP_FMT_PERSISTENTOBJECTIDS | + // VOL_CAP_FMT_SYMBOLICLINKS | + // VOL_CAP_FMT_HARDLINKS | + // VOL_CAP_FMT_JOURNAL | + // VOL_CAP_FMT_JOURNAL_ACTIVE | + // VOL_CAP_FMT_NO_ROOT_TIMES | + // VOL_CAP_FMT_SPARSE_FILES | + // VOL_CAP_FMT_ZERO_RUNS | + // VOL_CAP_FMT_CASE_SENSITIVE | + // VOL_CAP_FMT_CASE_PRESERVING | + // VOL_CAP_FMT_FAST_STATFS | + // VOL_CAP_FMT_2TB_FILESIZE | + caps; + fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + // VOL_CAP_FMT_SYMBOLICLINKS | + // VOL_CAP_FMT_HARDLINKS | + // VOL_CAP_FMT_JOURNAL | + // VOL_CAP_FMT_JOURNAL_ACTIVE | + // VOL_CAP_FMT_NO_ROOT_TIMES | + // VOL_CAP_FMT_SPARSE_FILES | + // VOL_CAP_FMT_ZERO_RUNS | + // VOL_CAP_FMT_CASE_SENSITIVE | + // VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE | + valid; + + /* + * We don't support most of the interfaces. + * + * We MAY support locking, but we don't have any easy way of probing. + * We can tell if there's no lockd running or if locks have been + * disabled for a mount, so we can definitely answer NO in that case. + * Any attempt to send a request to lockd to test for locking support + * may cause the lazily-launched locking daemons to be started + * unnecessarily. So we avoid that. However, we do record if we ever + * successfully perform a lock operation on a mount point, so if it + * looks like lock ops have worked, we do report that we support them. + */ + caps = valid = 0; + if ((!nfslockdvnode && !nfslockdwaiting) || + (nmp->nm_flag & NFSMNT_NOLOCKS)) { + /* locks disabled on this mount, so they definitely won't work */ + valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK; + } else if (nmp->nm_state & NFSSTA_LOCKSWORK) { + caps = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK; + valid = VOL_CAP_INT_ADVLOCK | VOL_CAP_INT_FLOCK; + } + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = + // VOL_CAP_INT_SEARCHFS | + // VOL_CAP_INT_ATTRLIST | + // VOL_CAP_INT_NFSEXPORT | + // VOL_CAP_INT_READDIRATTR | + // VOL_CAP_INT_EXCHANGEDATA | + // VOL_CAP_INT_COPYFILE | + // VOL_CAP_INT_ALLOCATE | + // VOL_CAP_INT_VOL_RENAME | + // VOL_CAP_INT_ADVLOCK | + // VOL_CAP_INT_FLOCK | + // VOL_CAP_INT_EXTENDED_SECURITY | + // VOL_CAP_INT_USERACCESS | + caps; + fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_COPYFILE | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + // VOL_CAP_INT_ADVLOCK | + // VOL_CAP_INT_FLOCK | + // VOL_CAP_INT_EXTENDED_SECURITY | + // VOL_CAP_INT_USERACCESS | + valid; + + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; + + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_capabilities); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { + fsap->f_attributes.validattr.commonattr = 0; + fsap->f_attributes.validattr.volattr = + ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.validattr.dirattr = 0; + fsap->f_attributes.validattr.fileattr = 0; + fsap->f_attributes.validattr.forkattr = 0; + + fsap->f_attributes.nativeattr.commonattr = 0; + fsap->f_attributes.nativeattr.volattr = + ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.nativeattr.dirattr = 0; + fsap->f_attributes.nativeattr.fileattr = 0; + fsap->f_attributes.nativeattr.forkattr = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_attributes); + } + return (error); } @@ -343,55 +505,60 @@ nfs_statfs(mp, sbp, p) */ int nfs_fsinfo(nmp, vp, cred, p) - register struct nfsmount *nmp; - register struct vnode *vp; - struct ucred *cred; - struct proc *p; + struct nfsmount *nmp; + vnode_t vp; + kauth_cred_t cred; + proc_t p; { - register struct nfsv3_fsinfo *fsp; - register caddr_t cp; - register long t1, t2; - register u_long *tl, pref, max; + struct nfsv3_fsinfo *fsp; + caddr_t cp; + long t1, t2; + u_long *tl; + int prefsize, maxsize; caddr_t bpos, dpos, cp2; int error = 0, retattr; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; u_int64_t xid; - nfsstats.rpccnt[NFSPROC_FSINFO]++; - nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); + nfsm_reqhead(NFSX_FH(1)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_FSINFO]); nfsm_fhtom(vp, 1); nfsm_request(vp, NFSPROC_FSINFO, p, cred, &xid); if (mrep) { - nfsm_postop_attr(vp, retattr, &xid); + nfsm_postop_attr_update(vp, 1, retattr, &xid); } if (!error) { nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); - pref = fxdr_unsigned(u_long, fsp->fs_wtpref); - if (pref < nmp->nm_wsize) - nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & + prefsize = fxdr_unsigned(u_long, fsp->fs_wtpref); + if (prefsize < nmp->nm_wsize) + nmp->nm_wsize = (prefsize + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); - max = fxdr_unsigned(u_long, fsp->fs_wtmax); - if (max < nmp->nm_wsize) { - nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); + maxsize = fxdr_unsigned(u_long, fsp->fs_wtmax); + if (maxsize < nmp->nm_wsize) { + nmp->nm_wsize = maxsize & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) - nmp->nm_wsize = max; + nmp->nm_wsize = maxsize; } - pref = fxdr_unsigned(u_long, fsp->fs_rtpref); - if (pref < nmp->nm_rsize) - nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & + prefsize = fxdr_unsigned(u_long, fsp->fs_rtpref); + if (prefsize < nmp->nm_rsize) + nmp->nm_rsize = (prefsize + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); - max = fxdr_unsigned(u_long, fsp->fs_rtmax); - if (max < nmp->nm_rsize) { - nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); + maxsize = fxdr_unsigned(u_long, fsp->fs_rtmax); + if (maxsize < nmp->nm_rsize) { + nmp->nm_rsize = maxsize & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) - nmp->nm_rsize = max; + nmp->nm_rsize = maxsize; } - pref = fxdr_unsigned(u_long, fsp->fs_dtpref); - if (pref < nmp->nm_readdirsize) - nmp->nm_readdirsize = pref; - if (max < nmp->nm_readdirsize) { - nmp->nm_readdirsize = max; + prefsize = fxdr_unsigned(u_long, fsp->fs_dtpref); + if (prefsize < nmp->nm_readdirsize) + nmp->nm_readdirsize = prefsize; + if (maxsize < nmp->nm_readdirsize) { + nmp->nm_readdirsize = maxsize; } + fxdr_hyper(&fsp->fs_maxfilesize, &nmp->nm_fsinfo.maxfilesize); + nmp->nm_fsinfo.fsproperties = fxdr_unsigned(u_long, fsp->fs_properties); nmp->nm_state |= NFSSTA_GOTFSINFO; } nfsm_reqdone; @@ -415,17 +582,16 @@ int nfs_mountroot() { struct nfs_diskless nd; - struct vattr attr; - struct mount *mp; - struct vnode *vp; - struct proc *procp; - long n; + struct nfs_vattr nvattr; + mount_t mp; + vnode_t vp; + proc_t procp; int error; #if !defined(NO_MOUNT_PRIVATE) - struct mount *mppriv; - struct vnode *vppriv; + mount_t mppriv; + vnode_t vppriv; #endif /* NO_MOUNT_PRIVATE */ - int v3; + int v3, sotype; procp = current_proc(); /* XXX */ @@ -440,11 +606,15 @@ nfs_mountroot() panic("nfs_boot_init failed with %d\n", error); } - /* try NFSv3 first, if that fails then try NFSv2 */ + /* + * Try NFSv3 first, then fallback to NFSv2. + * Likewise, try TCP first, then fall back to UDP. + */ v3 = 1; + sotype = SOCK_STREAM; tryagain: - error = nfs_boot_getfh(&nd, procp, v3); + error = nfs_boot_getfh(&nd, procp, v3, sotype); if (error) { if (error == EHOSTDOWN || error == EHOSTUNREACH) { if (nd.nd_root.ndm_path) @@ -456,49 +626,66 @@ tryagain: return (error); } if (v3) { - printf("nfs_boot_getfh(v3) failed with %d, trying v2...\n", error); + if (sotype == SOCK_STREAM) { + printf("nfs_boot_getfh(v3,TCP) failed with %d, trying UDP...\n", error); + sotype = SOCK_DGRAM; + goto tryagain; + } + printf("nfs_boot_getfh(v3,UDP) failed with %d, trying v2...\n", error); v3 = 0; + sotype = SOCK_STREAM; + goto tryagain; + } else if (sotype == SOCK_STREAM) { + printf("nfs_boot_getfh(v2,TCP) failed with %d, trying UDP...\n", error); + sotype = SOCK_DGRAM; goto tryagain; } - panic("nfs_boot_getfh(v2) failed with %d\n", error); + panic("nfs_boot_getfh(v2,UDP) failed with %d\n", error); } /* * Create the root mount point. */ #if !defined(NO_MOUNT_PRIVATE) - if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_RDONLY, &vp, &mp))) { + if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_RDONLY|MNT_ROOTFS, &vp, &mp))) #else - if (error = nfs_mount_diskless(&nd.nd_root, "/", NULL, &vp, &mp)) { + if ((error = nfs_mount_diskless(&nd.nd_root, "/", MNT_ROOTFS, &vp, &mp))) #endif /* NO_MOUNT_PRIVATE */ + { if (v3) { - printf("nfs_mount_diskless(v3) failed with %d, trying v2...\n", error); + if (sotype == SOCK_STREAM) { + printf("nfs_mount_diskless(v3,TCP) failed with %d, trying UDP...\n", error); + sotype = SOCK_DGRAM; + goto tryagain; + } + printf("nfs_mount_diskless(v3,UDP) failed with %d, trying v2...\n", error); v3 = 0; + sotype = SOCK_STREAM; + goto tryagain; + } else if (sotype == SOCK_STREAM) { + printf("nfs_mount_diskless(v2,TCP) failed with %d, trying UDP...\n", error); + sotype = SOCK_DGRAM; goto tryagain; } - panic("nfs_mount_diskless root failed with %d\n", error); + panic("nfs_mount_diskless(v2,UDP) root failed with %d\n", error); } printf("root on %s\n", (char *)&nd.nd_root.ndm_host); - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - vfs_unbusy(mp, procp); + vfs_unbusy(mp); + mount_list_add(mp); rootvp = vp; #if !defined(NO_MOUNT_PRIVATE) if (nd.nd_private.ndm_saddr.sin_addr.s_addr) { error = nfs_mount_diskless_private(&nd.nd_private, "/private", - NULL, &vppriv, &mppriv); + 0, &vppriv, &mppriv); if (error) { panic("nfs_mount_diskless private failed with %d\n", error); } printf("private on %s\n", (char *)&nd.nd_private.ndm_host); - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mppriv, mnt_list); - simple_unlock(&mountlist_slock); - vfs_unbusy(mppriv, procp); + vfs_unbusy(mppriv); + mount_list_add(mppriv); } #endif /* NO_MOUNT_PRIVATE */ @@ -509,10 +696,8 @@ tryagain: FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI); /* Get root attributes (for the time). */ - error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp); + error = nfs_getattr(vp, &nvattr, kauth_cred_get(), procp); if (error) panic("nfs_mountroot: getattr for root"); - n = attr.va_mtime.tv_sec; - inittodr(n); return (0); } @@ -520,56 +705,60 @@ tryagain: * Internal version of mount system call for diskless setup. */ static int -nfs_mount_diskless(ndmntp, mntname, mntflag, vpp, mpp) - struct nfs_dlmount *ndmntp; - char *mntname; - int mntflag; - struct vnode **vpp; - struct mount **mpp; +nfs_mount_diskless( + struct nfs_dlmount *ndmntp, + const char *mntname, + int mntflag, + vnode_t *vpp, + mount_t *mpp) { - struct nfs_args args; - struct mount *mp; - struct mbuf *m; + struct user_nfs_args args; + mount_t mp; + mbuf_t m; int error; - struct proc *procp; + proc_t procp; procp = current_proc(); /* XXX */ if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_host, &mp))) { - printf("nfs_mountroot: NFS not configured"); + printf("nfs_mount_diskless: NFS not configured"); return (error); } - mp->mnt_flag = mntflag; + + mp->mnt_flag |= mntflag; + if (!(mntflag & MNT_RDONLY)) + mp->mnt_flag &= ~MNT_RDONLY; /* Initialize mount args. */ bzero((caddr_t) &args, sizeof(args)); - args.addr = (struct sockaddr *)&ndmntp->ndm_saddr; - args.addrlen = args.addr->sa_len; - args.sotype = SOCK_DGRAM; - args.fh = ndmntp->ndm_fh; + args.addr = CAST_USER_ADDR_T(&ndmntp->ndm_saddr); + args.addrlen = ndmntp->ndm_saddr.sin_len; + args.sotype = ndmntp->ndm_sotype; + args.fh = CAST_USER_ADDR_T(&ndmntp->ndm_fh[0]); args.fhsize = ndmntp->ndm_fhlen; - args.hostname = ndmntp->ndm_host; + args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_host); args.flags = NFSMNT_RESVPORT; if (ndmntp->ndm_nfsv3) args.flags |= NFSMNT_NFSV3; - MGET(m, M_DONTWAIT, MT_SONAME); - bcopy((caddr_t)args.addr, mtod(m, caddr_t), - (m->m_len = args.addr->sa_len)); - if ((error = mountnfs(&args, mp, m, mntname, args.hostname, vpp))) { - printf("nfs_mountroot: mount %s failed: %d", mntname, error); - mp->mnt_vfc->vfc_refcount--; - - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, procp); - - FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m); + if (error) { + printf("nfs_mount_diskless: mbuf_get(soname) failed"); + return (error); + } + mbuf_setlen(m, ndmntp->ndm_saddr.sin_len); + bcopy((caddr_t)args.addr, mbuf_data(m), ndmntp->ndm_saddr.sin_len); + if ((error = mountnfs(&args, mp, m, procp, vpp))) { + printf("nfs_mountroot: mount %s failed: %d\n", mntname, error); + // XXX vfs_rootmountfailed(mp); + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); + vfs_unbusy(mp); + mount_lock_destroy(mp); + FREE_ZONE(mp, sizeof(struct mount), M_MOUNT); return (error); } -#if 0 /* Causes incorrect reporting of "mounted on" */ - (void) copystr(args.hostname, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0); -#endif /* 0 */ *mpp = mp; return (0); } @@ -580,23 +769,26 @@ nfs_mount_diskless(ndmntp, mntname, mntflag, vpp, mpp) * separately in diskless setup */ static int -nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp) - struct nfs_dlmount *ndmntp; - char *mntname; - int mntflag; - struct vnode **vpp; - struct mount **mpp; +nfs_mount_diskless_private( + struct nfs_dlmount *ndmntp, + const char *mntname, + int mntflag, + vnode_t *vpp, + mount_t *mpp) { - struct nfs_args args; - struct mount *mp; - struct mbuf *m; + struct user_nfs_args args; + mount_t mp; + mbuf_t m; int error; - struct proc *procp; - struct vfsconf *vfsp; + proc_t procp; + struct vfstable *vfsp; struct nameidata nd; - struct vnode *vp; + vnode_t vp; + struct vfs_context context; procp = current_proc(); /* XXX */ + context.vc_proc = procp; + context.vc_ucred = kauth_cred_get(); { /* @@ -605,49 +797,55 @@ nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp) */ struct filedesc *fdp; /* pointer to file descriptor state */ fdp = procp->p_fd; - mountlist.cqh_first->mnt_flag |= MNT_ROOTFS; + mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; /* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */ - if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) + if (VFS_ROOT(mountlist.tqh_first, &rootvnode, NULL)) panic("cannot find root vnode"); - VREF(rootvnode); + error = vnode_ref(rootvnode); + if (error) { + printf("nfs_mountroot: vnode_ref() failed on root vnode!\n"); + return (error); + } fdp->fd_cdir = rootvnode; - VOP_UNLOCK(rootvnode, 0, procp); fdp->fd_rdir = NULL; } /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, - mntname, procp); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + mntname, &context); if ((error = namei(&nd))) { - printf("nfs_mountroot: private namei failed!"); + printf("nfs_mountroot: private namei failed!\n"); return (error); } { - /* undo VREF in mimic main()! */ - vrele(rootvnode); + /* undo vnode_ref() in mimic main()! */ + vnode_rele(rootvnode); } + nameidone(&nd); vp = nd.ni_vp; - if ((error = vinvalbuf(vp, V_SAVE, procp->p_ucred, procp, 0, 0))) { - vput(vp); + + if ((error = VNOP_FSYNC(vp, MNT_WAIT, &context)) || + (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { + vnode_put(vp); return (error); } - if (vp->v_type != VDIR) { - vput(vp); + if (vnode_vtype(vp) != VDIR) { + vnode_put(vp); return (ENOTDIR); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, "nfs")) break; if (vfsp == NULL) { - printf("nfs_mountroot: private NFS not configured"); - vput(vp); + printf("nfs_mountroot: private NFS not configured\n"); + vnode_put(vp); return (ENODEV); } - if (vp->v_mountedhere != NULL) { - vput(vp); + if (vnode_mountedhere(vp) != NULL) { + vnode_put(vp); return (EBUSY); } @@ -655,51 +853,64 @@ nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp) * Allocate and initialize the filesystem. */ mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + if (!mp) { + printf("nfs_mountroot: unable to allocate mount structure\n"); + vnode_put(vp); + return (ENOMEM); + } bzero((char *)mp, (u_long)sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - (void)vfs_busy(mp, LK_NOWAIT, 0, procp); - LIST_INIT(&mp->mnt_vnodelist); - mp->mnt_op = vfsp->vfc_vfsops; - mp->mnt_vfc = vfsp; + mount_lock_init(mp); + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); + (void)vfs_busy(mp, LK_NOWAIT); + TAILQ_INIT(&mp->mnt_vnodelist); + mount_list_lock(); vfsp->vfc_refcount++; - mp->mnt_stat.f_type = vfsp->vfc_typenum; + mount_list_unlock(); + mp->mnt_vtable = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + // mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag = mntflag; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSNAMELEN-1); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; - mp->mnt_stat.f_owner = procp->p_ucred->cr_uid; - (void) copystr(mntname, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0); - (void) copystr(ndmntp->ndm_host, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); + (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MNAMELEN - 1, 0); + (void) copystr(ndmntp->ndm_host, mp->mnt_vfsstat.f_mntfromname, MNAMELEN - 1, 0); /* Initialize mount args. */ bzero((caddr_t) &args, sizeof(args)); - args.addr = (struct sockaddr *)&ndmntp->ndm_saddr; - args.addrlen = args.addr->sa_len; - args.sotype = SOCK_DGRAM; - args.fh = ndmntp->ndm_fh; + args.addr = CAST_USER_ADDR_T(&ndmntp->ndm_saddr); + args.addrlen = ndmntp->ndm_saddr.sin_len; + args.sotype = ndmntp->ndm_sotype; + args.fh = CAST_USER_ADDR_T(ndmntp->ndm_fh); args.fhsize = ndmntp->ndm_fhlen; - args.hostname = ndmntp->ndm_host; + args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_host); args.flags = NFSMNT_RESVPORT; if (ndmntp->ndm_nfsv3) args.flags |= NFSMNT_NFSV3; - MGET(m, M_DONTWAIT, MT_SONAME); - bcopy((caddr_t)args.addr, mtod(m, caddr_t), - (m->m_len = args.addr->sa_len)); - if ((error = mountnfs(&args, mp, m, mntname, args.hostname, &vp))) { - printf("nfs_mountroot: mount %s failed: %d", mntname, error); - mp->mnt_vfc->vfc_refcount--; - - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, procp); - + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_SONAME, &m); + if (error) { + printf("nfs_mount_diskless_private: mbuf_get(soname) failed"); + return (error); + } + mbuf_setlen(m, ndmntp->ndm_saddr.sin_len); + bcopy((caddr_t)args.addr, mbuf_data(m), ndmntp->ndm_saddr.sin_len); + if ((error = mountnfs(&args, mp, m, procp, &vp))) { + printf("nfs_mountroot: mount %s failed: %d\n", mntname, error); + mount_list_lock(); + vfsp->vfc_refcount--; + mount_list_unlock(); + vfs_unbusy(mp); + mount_lock_destroy(mp); FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); return (error); } @@ -714,66 +925,88 @@ nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp) * VFS Operations. * * mount system call - * It seems a bit dumb to copyinstr() the host and path here and then - * bcopy() them in mountnfs(), but I wanted to detect errors before - * doing the sockargs() call because sockargs() allocates an mbuf and - * an error after that means that I have to release the mbuf. */ -/* ARGSUSED */ static int -nfs_mount(mp, path, data, ndp, p) - struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +nfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t context) { - int error; - struct nfs_args args; - struct mbuf *nam; - struct vnode *vp; - char pth[MNAMELEN], hst[MNAMELEN]; + proc_t p = vfs_context_proc(context); + int error, argsvers; + struct user_nfs_args args; + struct nfs_args tempargs; + mbuf_t nam; size_t len; u_char nfh[NFSX_V3FHMAX]; + char *mntfrom; - error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); + error = copyin(data, (caddr_t)&argsvers, sizeof (argsvers)); if (error) return (error); - if (args.version != NFS_ARGSVERSION) { -#ifndef NO_COMPAT_PRELITE2 - /* - * If the argument version is unknown, then assume the - * caller is a pre-lite2 4.4BSD client and convert its - * arguments. - */ - struct onfs_args oargs; - error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args)); - if (error) - return (error); - nfs_convert_oargs(&args,&oargs); -#else /* NO_COMPAT_PRELITE2 */ + + switch (argsvers) { + case 3: + if (vfs_context_is64bit(context)) + error = copyin(data, (caddr_t)&args, sizeof (struct user_nfs_args3)); + else + error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args3)); + break; + case 4: + if (vfs_context_is64bit(context)) + error = copyin(data, (caddr_t)&args, sizeof (args)); + else + error = copyin(data, (caddr_t)&tempargs, sizeof (tempargs)); + break; + default: return (EPROGMISMATCH); -#endif /* !NO_COMPAT_PRELITE2 */ } - if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) - return (EINVAL); - error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) return (error); - error = copyinstr(path, pth, MNAMELEN-1, &len); + + if (!vfs_context_is64bit(context)) { + args.version = tempargs.version; + args.addrlen = tempargs.addrlen; + args.sotype = tempargs.sotype; + args.proto = tempargs.proto; + args.fhsize = tempargs.fhsize; + args.flags = tempargs.flags; + args.wsize = tempargs.wsize; + args.rsize = tempargs.rsize; + args.readdirsize = tempargs.readdirsize; + args.timeo = tempargs.timeo; + args.retrans = tempargs.retrans; + args.maxgrouplist = tempargs.maxgrouplist; + args.readahead = tempargs.readahead; + args.leaseterm = tempargs.leaseterm; + args.deadthresh = tempargs.deadthresh; + args.addr = CAST_USER_ADDR_T(tempargs.addr); + args.fh = CAST_USER_ADDR_T(tempargs.fh); + args.hostname = CAST_USER_ADDR_T(tempargs.hostname); + if (argsvers >= 4) { + args.acregmin = tempargs.acregmin; + args.acregmax = tempargs.acregmax; + args.acdirmin = tempargs.acdirmin; + args.acdirmax = tempargs.acdirmax; + } + } + + if (args.fhsize > NFSX_V3FHMAX) + return (EINVAL); + error = copyin(args.fh, (caddr_t)nfh, args.fhsize); if (error) return (error); - bzero(&pth[len], MNAMELEN - len); - error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); + + mntfrom = &vfs_statfs(mp)->f_mntfromname[0]; + error = copyinstr(args.hostname, mntfrom, MAXPATHLEN-1, &len); if (error) return (error); - bzero(&hst[len], MNAMELEN - len); + bzero(&mntfrom[len], MAXPATHLEN - len); + /* sockargs() call must be after above copyin() calls */ - error = sockargs(&nam, (caddr_t)args.addr, args.addrlen, MT_SONAME); + error = sockargs(&nam, args.addr, args.addrlen, MBUF_TYPE_SONAME); if (error) return (error); - args.fh = nfh; - error = mountnfs(&args, mp, nam, pth, hst, &vp); + + args.fh = CAST_USER_ADDR_T(&nfh[0]); + error = mountnfs(&args, mp, nam, p, &vp); return (error); } @@ -781,29 +1014,19 @@ nfs_mount(mp, path, data, ndp, p) * Common code for mount and mountroot */ static int -mountnfs(argp, mp, nam, pth, hst, vpp) - register struct nfs_args *argp; - register struct mount *mp; - struct mbuf *nam; - char *pth, *hst; - struct vnode **vpp; +mountnfs( + struct user_nfs_args *argp, + mount_t mp, + mbuf_t nam, + proc_t p, + vnode_t *vpp) { - register struct nfsmount *nmp; + struct nfsmount *nmp; struct nfsnode *np; int error, maxio; - struct vattr attrs; - struct proc *curproc; - - /* - * turning off NQNFS until we have further testing - * with UBC changes, in particular, nfs_pagein and nfs_pageout. - * Those have NQNFS defined out in conjunction with this - * returning an error. Remove when fully tested. - */ - if (argp->flags & NFSMNT_NQNFS) { - error = NFSERR_NOTSUPP; - goto bad2; - } + struct nfs_vattr nvattrs; + struct vfs_context context; /* XXX get from caller? */ + u_int64_t xid; /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes @@ -812,30 +1035,25 @@ mountnfs(argp, mp, nam, pth, hst, vpp) if (argp->sotype == SOCK_STREAM) argp->flags &= ~NFSMNT_NOCONN; - if (mp->mnt_flag & MNT_UPDATE) { + if (vfs_flags(mp) & MNT_UPDATE) { nmp = VFSTONFS(mp); /* update paths, file handles, etc, here XXX */ - m_freem(nam); + mbuf_freem(nam); return (0); } else { MALLOC_ZONE(nmp, struct nfsmount *, sizeof (struct nfsmount), M_NFSMNT, M_WAITOK); + if (!nmp) { + mbuf_freem(nam); + return (ENOMEM); + } bzero((caddr_t)nmp, sizeof (struct nfsmount)); TAILQ_INIT(&nmp->nm_uidlruhead); TAILQ_INIT(&nmp->nm_bufq); - mp->mnt_data = (qaddr_t)nmp; + vfs_setfsprivate(mp, nmp); } - vfs_getnewfsid(mp); - nmp->nm_mountp = mp; - nmp->nm_flag = argp->flags; - if (nmp->nm_flag & NFSMNT_NQNFS) - /* - * We have to set mnt_maxsymlink to a non-zero value so - * that COMPAT_43 routines will know that we are setting - * the d_type field in directories (and can zero it for - * unsuspecting binaries). - */ - mp->mnt_maxsymlinklen = 1; + + /* setup defaults */ nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; if (argp->sotype == SOCK_DGRAM) { @@ -848,18 +1066,21 @@ mountnfs(argp, mp, nam, pth, hst, vpp) nmp->nm_readdirsize = NFS_READDIRSIZE; nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_readahead = NFS_DEFRAHEAD; - nmp->nm_leaseterm = NQ_DEFLEASE; - nmp->nm_deadthresh = NQ_DEADTHRESH; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; - CIRCLEQ_INIT(&nmp->nm_timerhead); - nmp->nm_inprog = NULLVP; - bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); - bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); + nmp->nm_acregmin = NFS_MINATTRTIMO; + nmp->nm_acregmax = NFS_MAXATTRTIMO; + nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; + nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; + + vfs_getnewfsid(mp); + nmp->nm_mountp = mp; + vfs_setauthopaque(mp); + nmp->nm_flag = argp->flags; nmp->nm_nam = nam; if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { @@ -922,16 +1143,30 @@ mountnfs(argp, mp, nam, pth, hst, vpp) if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; - if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && - argp->leaseterm <= NQ_MAXLEASE) - nmp->nm_leaseterm = argp->leaseterm; - if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && - argp->deadthresh <= NQ_NEVERDEAD) - nmp->nm_deadthresh = argp->deadthresh; + + if (argp->version >= 4) { + if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) + nmp->nm_acregmin = argp->acregmin; + if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) + nmp->nm_acregmax = argp->acregmax; + if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) + nmp->nm_acdirmin = argp->acdirmin; + if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) + nmp->nm_acdirmax = argp->acdirmax; + if (nmp->nm_acregmin > nmp->nm_acregmax) + nmp->nm_acregmin = nmp->nm_acregmax; + if (nmp->nm_acdirmin > nmp->nm_acdirmax) + nmp->nm_acdirmin = nmp->nm_acdirmax; + } + /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; + /* make sure mbuf constants are set up */ + if (!nfs_mbuf_mlen) + nfs_mbuf_init(); + /* * For Connection based sockets (TCP,...) defer the connect until * the first request, in case the server is not responding. @@ -940,6 +1175,21 @@ mountnfs(argp, mp, nam, pth, hst, vpp) (error = nfs_connect(nmp, (struct nfsreq *)0))) goto bad; + /* + * Get file attributes for the mountpoint. These are needed + * in order to properly create the root vnode. + */ + // LP64todo - fix CAST_DOWN of argp->fh + error = nfs_getattr_no_vnode(mp, CAST_DOWN(caddr_t, argp->fh), argp->fhsize, + proc_ucred(p), p, &nvattrs, &xid); + if (error) { + /* + * we got problems... we couldn't get the attributes + * from the NFS server... so the mount fails. + */ + goto bad; + } + /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward @@ -948,31 +1198,24 @@ mountnfs(argp, mp, nam, pth, hst, vpp) * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ - error = nfs_nget(mp, (nfsfh_t *)argp->fh, argp->fhsize, &np); + error = nfs_nget(mp, NULL, NULL, CAST_DOWN(caddr_t, argp->fh), argp->fhsize, + &nvattrs, &xid, NG_MARKROOT, &np); if (error) goto bad; /* * save this vnode pointer. That way nfs_unmount() - * does not need to call nfs_net() just get it to drop + * does not need to call nfs_nget() just get it to drop * this vnode reference. */ nmp->nm_dvp = *vpp = NFSTOV(np); - - /* - * Get file attributes for the mountpoint. This has the side - * effect of filling in (*vpp)->v_type with the correct value. - */ - curproc = current_proc(); - error = VOP_GETATTR(*vpp, &attrs, curproc->p_ucred, curproc); + /* get usecount and drop iocount */ + error = vnode_ref(*vpp); if (error) { - /* - * we got problems... we couldn't get the attributes - * from the NFS server... so the mount fails. - */ - vput(*vpp); + vnode_put(*vpp); goto bad; } + vnode_put(*vpp); /* * Set the mount point's block I/O size. @@ -980,13 +1223,25 @@ mountnfs(argp, mp, nam, pth, hst, vpp) * the server about what its preferred I/O sizes are. */ if (nmp->nm_flag & NFSMNT_NFSV3) - nfs_fsinfo(nmp, *vpp, curproc->p_ucred, curproc); - mp->mnt_stat.f_iosize = nfs_iosize(nmp); + nfs_fsinfo(nmp, *vpp, proc_ucred(p), p); + vfs_statfs(mp)->f_iosize = nfs_iosize(nmp); /* - * Lose the lock but keep the ref. + * V3 mounts give us a (relatively) reliable remote access(2) + * call, so advertise the fact. + * + * XXX this may not be the best way to go, as the granularity + * offered isn't a good match to our needs. */ - VOP_UNLOCK(*vpp, 0, curproc); + if (nmp->nm_flag & NFSMNT_NFSV3) + vfs_setauthopaqueaccess(mp); + + /* + * Do statfs to ensure static info gets set to reasonable values. + */ + context.vc_proc = p; + context.vc_ucred = proc_ucred(p); + nfs_statfs(mp, vfs_statfs(mp), &context); if (nmp->nm_flag & NFSMNT_RESVPORT) nfs_resv_mounts++; @@ -995,8 +1250,7 @@ mountnfs(argp, mp, nam, pth, hst, vpp) bad: nfs_disconnect(nmp); FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); -bad2: - m_freem(nam); + mbuf_freem(nam); return (error); } @@ -1005,13 +1259,13 @@ bad2: * unmount system call */ static int -nfs_unmount(mp, mntflags, p) - struct mount *mp; - int mntflags; - struct proc *p; +nfs_unmount( + mount_t mp, + int mntflags, + __unused vfs_context_t context) { register struct nfsmount *nmp; - struct vnode *vp; + vnode_t vp; int error, flags = 0; nmp = VFSTONFS(mp); @@ -1029,19 +1283,12 @@ nfs_unmount(mp, mntflags, p) * Goes something like this.. * - Call vflush() to clear out vnodes for this file system, * except for the swap files. Deal with them in 2nd pass. - * It will do vgone making the vnode VBAD at that time. * - Decrement reference on the vnode representing remote root. * - Close the socket * - Free up the data structures */ vp = nmp->nm_dvp; - /* - * Must handshake with nqnfs_clientd() if it is active. - */ - nmp->nm_state |= NFSSTA_DISMINPROG; - while (nmp->nm_inprog != NULLVP) - (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); /* * vflush will check for busy vnodes on mountpoint. * Will do the right thing for MNT_FORCE. That is, we should @@ -1051,24 +1298,13 @@ nfs_unmount(mp, mntflags, p) if (mntflags & MNT_FORCE) { error = vflush(mp, NULLVP, flags); /* locks vp in the process */ } else { - if (vp->v_usecount > 1) { - nmp->nm_state &= ~NFSSTA_DISMINPROG; + if (vnode_isinuse(vp, 1)) return (EBUSY); - } error = vflush(mp, vp, flags); } - - if (error) { - nmp->nm_state &= ~NFSSTA_DISMINPROG; + if (error) return (error); - } - /* - * We are now committed to the unmount. - * For NQNFS, let the server daemon free the nfsmount structure. - */ - if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) - nmp->nm_state |= NFSSTA_DISMNT; nmp->nm_state &= ~NFSSTA_MOUNTED; if (nmp->nm_flag & NFSMNT_RESVPORT) { if (--nfs_resv_mounts == 0) @@ -1077,38 +1313,24 @@ nfs_unmount(mp, mntflags, p) /* * Release the root vnode reference held by mountnfs() - * vflush did the vgone for us when we didn't skip over - * it in the MNT_FORCE case. (Thus vp can't be locked when - * called vflush in non-skip vp case.) */ - vrele(vp); - if (!(mntflags & MNT_FORCE)) - vgone(vp); - mp->mnt_data = 0; /* don't want to end up using stale vp */ + vnode_rele(vp); + + (void)vflush(mp, NULLVP, FORCECLOSE); + vfs_setfsprivate(mp, 0); /* don't want to end up using stale vp */ + nfs_disconnect(nmp); - m_freem(nmp->nm_nam); + mbuf_freem(nmp->nm_nam); - if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) { - register struct nfsreq *rp; + if ((nmp->nm_flag & NFSMNT_KERB) == 0) { + struct nfsreq *rp; /* * Loop through outstanding request list and remove dangling * references to defunct nfsmount struct */ -#if NFSDIAG && 0 - if (hw_atomic_add(&nfsreqqusers, 1) != 1) - nfsatompanic("unmount add"); - nfsbtlen = backtrace(&nfsbt, sizeof(nfsbt)); - nfsbtcpu = cpu_number(); - nfsbtthread = (int)(current_thread()); -#endif - for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next) if (rp->r_nmp == nmp) rp->r_nmp = (struct nfsmount *)0; -#if NFSDIAG && 0 - if (hw_atomic_sub(&nfsreqqusers, 1) != 0) - nfsatompanic("unmount sub"); -#endif /* Need to wake up any rcvlock waiters so they notice the unmount. */ if (nmp->nm_state & NFSSTA_WANTRCV) { nmp->nm_state &= ~NFSSTA_WANTRCV; @@ -1123,163 +1345,147 @@ nfs_unmount(mp, mntflags, p) * Return root of a filesystem */ static int -nfs_root(mp, vpp) - struct mount *mp; - struct vnode **vpp; +nfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t context) { - register struct vnode *vp; + vnode_t vp; struct nfsmount *nmp; - int error, vpid; + int error; + u_long vpid; nmp = VFSTONFS(mp); vp = nmp->nm_dvp; - vpid = vp->v_id; - while (error = vget(vp, LK_EXCLUSIVE, current_proc())) { - /* vget may return ENOENT if the dir changes while in vget */ - /* If that happens, try vget again, else return the error */ - if ((error != ENOENT) || (vp->v_id == vpid)) + vpid = vnode_vid(vp); + while ((error = vnode_getwithvid(vp, vpid))) { + /* vnode_get() may return ENOENT if the dir changes. */ + /* If that happens, just try it again, else return the error. */ + if ((error != ENOENT) || (vnode_vid(vp) == vpid)) return (error); - vpid = vp->v_id; + vpid = vnode_vid(vp); } - if (vp->v_type == VNON) - vp->v_type = VDIR; - vp->v_flag |= VROOT; *vpp = vp; return (0); } -extern int syncprt; - /* * Flush out the buffer cache */ -/* ARGSUSED */ + +struct nfs_sync_cargs { + vfs_context_t context; + int waitfor; + int error; +}; + static int -nfs_sync(mp, waitfor, cred, p) - struct mount *mp; - int waitfor; - struct ucred *cred; - struct proc *p; +nfs_sync_callout(vnode_t vp, void *arg) { - register struct vnode *vp; - int error, allerror = 0; + struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg; + int error; - /* - * Force stale buffer cache information to be flushed. - */ -loop: - LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - int didhold; - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - if (VOP_ISLOCKED(vp) || LIST_FIRST(&VTONFS(vp)->n_dirtyblkhd) == NULL) - continue; - if (vget(vp, LK_EXCLUSIVE, p)) - goto loop; - didhold = ubc_hold(vp); - error = VOP_FSYNC(vp, cred, waitfor, p); - if (error) - allerror = error; - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); - } - return (allerror); + if (LIST_EMPTY(&VTONFS(vp)->n_dirtyblkhd)) + return (VNODE_RETURNED); + if (VTONFS(vp)->n_flag & NWRBUSY) + return (VNODE_RETURNED); + + error = nfs_flush(vp, cargs->waitfor, + vfs_context_ucred(cargs->context), + vfs_context_proc(cargs->context), 0); + if (error) + cargs->error = error; + + return (VNODE_RETURNED); +} + +static int +nfs_sync(mount_t mp, int waitfor, vfs_context_t context) +{ + struct nfs_sync_cargs cargs; + + cargs.waitfor = waitfor; + cargs.context = context; + cargs.error = 0; + + vnode_iterate(mp, 0, nfs_sync_callout, &cargs); + + return (cargs.error); } /* * NFS flat namespace lookup. * Currently unsupported. */ -/* ARGSUSED */ +/*ARGSUSED*/ static int -nfs_vget(mp, ino, vpp) - struct mount *mp; - void *ino; /* XXX void* or ino_t? */ - struct vnode **vpp; +nfs_vget( + __unused mount_t mp, + __unused ino64_t ino, + __unused vnode_t *vpp, + __unused vfs_context_t context) { - return (EOPNOTSUPP); + return (ENOTSUP); } /* * At this point, this should never happen */ -/* ARGSUSED */ +/*ARGSUSED*/ static int -nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) - register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; +nfs_fhtovp( + __unused mount_t mp, + __unused int fhlen, + __unused unsigned char *fhp, + __unused vnode_t *vpp, + __unused vfs_context_t context) { - return (EINVAL); + return (ENOTSUP); } /* * Vnode pointer to File handle, should never happen either */ -/* ARGSUSED */ +/*ARGSUSED*/ static int -nfs_vptofh(vp, fhp) - struct vnode *vp; - struct fid *fhp; +nfs_vptofh( + __unused vnode_t vp, + __unused int *fhlenp, + __unused unsigned char *fhp, + __unused vfs_context_t context) { - return (EINVAL); + return (ENOTSUP); } /* * Vfs start routine, a no-op. */ -/* ARGSUSED */ +/*ARGSUSED*/ static int -nfs_start(mp, flags, p) - struct mount *mp; - int flags; - struct proc *p; +nfs_start( + __unused mount_t mp, + __unused int flags, + __unused vfs_context_t context) { return (0); } -/* - * Do operations associated with quotas, not supported - */ -/* ARGSUSED */ -static int -nfs_quotactl(mp, cmd, uid, arg, p) - struct mount *mp; - int cmd; - uid_t uid; - caddr_t arg; - struct proc *p; -{ - - return (EOPNOTSUPP); -} - /* * Do that sysctl thang... */ static int -nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, - size_t newlen, struct proc *p) +nfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, vfs_context_t context) { int error = 0, val; - struct sysctl_req *req; + struct sysctl_req *req = NULL; struct vfsidctl vc; - struct mount *mp; - struct nfsmount *nmp; + struct user_vfsidctl user_vc; + mount_t mp; + struct nfsmount *nmp = NULL; struct vfsquery vq; + boolean_t is_64_bit; /* * All names at this level are terminal. @@ -1287,23 +1493,41 @@ nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, if(namelen > 1) return ENOTDIR; /* overloaded */ + is_64_bit = vfs_context_is64bit(context); + /* common code for "new style" VFS_CTL sysctl, get the mount. */ switch (name[0]) { case VFS_CTL_TIMEO: case VFS_CTL_QUERY: case VFS_CTL_NOLOCKS: - req = oldp; - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) - return (error); - mp = vfs_getvfs(&vc.vc_fsid); + req = CAST_DOWN(struct sysctl_req *, oldp); + if (is_64_bit) { + error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); + if (error) + return (error); + mp = vfs_getvfs(&user_vc.vc_fsid); + } + else { + error = SYSCTL_IN(req, &vc, sizeof(vc)); + if (error) + return (error); + mp = vfs_getvfs(&vc.vc_fsid); + } if (mp == NULL) return (ENOENT); nmp = VFSTONFS(mp); if (nmp == NULL) return (ENOENT); bzero(&vq, sizeof(vq)); - VCTLTOREQ(&vc, req); + req->newidx = 0; + if (is_64_bit) { + req->newptr = user_vc.vc_ptr; + req->newlen = (size_t)user_vc.vc_len; + } + else { + req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); + req->newlen = vc.vc_len; + } } switch(name[0]) { @@ -1331,12 +1555,12 @@ nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, return 0; case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; - if (req->oldptr != NULL) { + if (req->oldptr != USER_ADDR_NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } - if (req->newptr != NULL) { + if (req->newptr != USER_ADDR_NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); @@ -1347,7 +1571,7 @@ nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, } break; case VFS_CTL_QUERY: - if ((nmp->nm_state & NFSSTA_TIMEO)) + if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) @@ -1355,13 +1579,13 @@ nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: - if (req->oldptr != NULL) { + if (req->oldptr != USER_ADDR_NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } - if (req->newptr != NULL) { + if (req->newptr != USER_ADDR_NULL) { error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index a1f5b2a3d..c858df061 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -67,26 +67,28 @@ #include #include #include -#include -#include +#include +#include +#include #include -#include +#include #include -#include -#include +#include #include #include #include -#include +#include +#include +#include +#include #include #include -#include -#include #include #include +#include #include #include @@ -99,7 +101,6 @@ #include #include #include -#include #include #include @@ -121,121 +122,85 @@ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ (int)(B), (int)(C), (int)(D), (int)(E), 0) -#define TRUE 1 -#define FALSE 0 - -#define NFS_FREE_PNBUF(CNP) \ - do { \ - char *tmp = (CNP)->cn_pnbuf; \ - (CNP)->cn_pnbuf = NULL; \ - (CNP)->cn_flags &= ~HASBUF; \ - FREE_ZONE(tmp, (CNP)->cn_pnlen, M_NAMEI); \ - } while (0) - - -static int nfsspec_read __P((struct vop_read_args *)); -static int nfsspec_write __P((struct vop_write_args *)); -static int nfsfifo_read __P((struct vop_read_args *)); -static int nfsfifo_write __P((struct vop_write_args *)); -static int nfsspec_close __P((struct vop_close_args *)); -static int nfsfifo_close __P((struct vop_close_args *)); -#define nfs_poll vop_nopoll -static int nfs_ioctl __P((struct vop_ioctl_args *)); -static int nfs_select __P((struct vop_select_args *)); -static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *)); -static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); -static int nfs_lookup __P((struct vop_lookup_args *)); -static int nfs_create __P((struct vop_create_args *)); -static int nfs_mknod __P((struct vop_mknod_args *)); -static int nfs_open __P((struct vop_open_args *)); -static int nfs_close __P((struct vop_close_args *)); -static int nfs_access __P((struct vop_access_args *)); -static int nfs_getattr __P((struct vop_getattr_args *)); -static int nfs_setattr __P((struct vop_setattr_args *)); -static int nfs_read __P((struct vop_read_args *)); -static int nfs_mmap __P((struct vop_mmap_args *)); -static int nfs_fsync __P((struct vop_fsync_args *)); -static int nfs_remove __P((struct vop_remove_args *)); -static int nfs_link __P((struct vop_link_args *)); -static int nfs_rename __P((struct vop_rename_args *)); -static int nfs_mkdir __P((struct vop_mkdir_args *)); -static int nfs_rmdir __P((struct vop_rmdir_args *)); -static int nfs_symlink __P((struct vop_symlink_args *)); -static int nfs_readdir __P((struct vop_readdir_args *)); -static int nfs_bmap __P((struct vop_bmap_args *)); -static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **)); -static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); -static int nfsspec_access __P((struct vop_access_args *)); -static int nfs_readlink __P((struct vop_readlink_args *)); -static int nfs_print __P((struct vop_print_args *)); -static int nfs_pathconf __P((struct vop_pathconf_args *)); -static int nfs_advlock __P((struct vop_advlock_args *)); -static int nfs_blkatoff __P((struct vop_blkatoff_args *)); -static int nfs_valloc __P((struct vop_valloc_args *)); -static int nfs_vfree __P((struct vop_vfree_args *)); -static int nfs_truncate __P((struct vop_truncate_args *)); -static int nfs_update __P((struct vop_update_args *)); -static int nfs_pagein __P((struct vop_pagein_args *)); -static int nfs_pageout __P((struct vop_pageout_args *)); -static int nfs_blktooff __P((struct vop_blktooff_args *)); -static int nfs_offtoblk __P((struct vop_offtoblk_args *)); -static int nfs_cmap __P((struct vop_cmap_args *)); +static int nfsspec_read(struct vnop_read_args *); +static int nfsspec_write(struct vnop_write_args *); +static int nfsfifo_read(struct vnop_read_args *); +static int nfsfifo_write(struct vnop_write_args *); +static int nfsspec_close(struct vnop_close_args *); +static int nfsfifo_close(struct vnop_close_args *); +static int nfs_ioctl(struct vnop_ioctl_args *); +static int nfs_select(struct vnop_select_args *); +static int nfs_setattrrpc(vnode_t,struct vnode_attr *,kauth_cred_t,proc_t); +static int nfs_lookup(struct vnop_lookup_args *); +static int nfs_create(struct vnop_create_args *); +static int nfs_mknod(struct vnop_mknod_args *); +static int nfs_open(struct vnop_open_args *); +static int nfs_close(struct vnop_close_args *); +static int nfs_access(struct vnop_access_args *); +static int nfs_vnop_getattr(struct vnop_getattr_args *); +static int nfs_setattr(struct vnop_setattr_args *); +static int nfs_read(struct vnop_read_args *); +static int nfs_mmap(struct vnop_mmap_args *); +static int nfs_fsync(struct vnop_fsync_args *); +static int nfs_remove(struct vnop_remove_args *); +static int nfs_link(struct vnop_link_args *); +static int nfs_rename(struct vnop_rename_args *); +static int nfs_mkdir(struct vnop_mkdir_args *); +static int nfs_rmdir(struct vnop_rmdir_args *); +static int nfs_symlink(struct vnop_symlink_args *); +static int nfs_readdir(struct vnop_readdir_args *); +static int nfs_lookitup(vnode_t,char *,int,kauth_cred_t,proc_t,struct nfsnode **); +static int nfs_sillyrename(vnode_t,vnode_t,struct componentname *,kauth_cred_t,proc_t); +static int nfs_readlink(struct vnop_readlink_args *); +static int nfs_pathconf(struct vnop_pathconf_args *); +static int nfs_advlock(struct vnop_advlock_args *); +static int nfs_pagein(struct vnop_pagein_args *); +static int nfs_pageout(struct vnop_pageout_args *); +static int nfs_blktooff(struct vnop_blktooff_args *); +static int nfs_offtoblk(struct vnop_offtoblk_args *); +static int nfs_blockmap(struct vnop_blockmap_args *); /* * Global vfs data structures for nfs */ -vop_t **nfsv2_vnodeop_p; +vnop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { - { &vop_default_desc, (vop_t *)vn_default_error }, - { &vop_lookup_desc, (vop_t *)nfs_lookup }, /* lookup */ - { &vop_create_desc, (vop_t *)nfs_create }, /* create */ - { &vop_mknod_desc, (vop_t *)nfs_mknod }, /* mknod */ - { &vop_open_desc, (vop_t *)nfs_open }, /* open */ - { &vop_close_desc, (vop_t *)nfs_close }, /* close */ - { &vop_access_desc, (vop_t *)nfs_access }, /* access */ - { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */ - { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */ - { &vop_read_desc, (vop_t *)nfs_read }, /* read */ - { &vop_write_desc, (vop_t *)nfs_write }, /* write */ - { &vop_lease_desc, (vop_t *)nfs_lease_check }, /* lease */ - { &vop_ioctl_desc, (vop_t *)nfs_ioctl }, /* ioctl */ - { &vop_select_desc, (vop_t *)nfs_select }, /* select */ - { &vop_revoke_desc, (vop_t *)nfs_revoke }, /* revoke */ - { &vop_mmap_desc, (vop_t *)nfs_mmap }, /* mmap */ - { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */ - { &vop_seek_desc, (vop_t *)nfs_seek }, /* seek */ - { &vop_remove_desc, (vop_t *)nfs_remove }, /* remove */ - { &vop_link_desc, (vop_t *)nfs_link }, /* link */ - { &vop_rename_desc, (vop_t *)nfs_rename }, /* rename */ - { &vop_mkdir_desc, (vop_t *)nfs_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (vop_t *)nfs_rmdir }, /* rmdir */ - { &vop_symlink_desc, (vop_t *)nfs_symlink }, /* symlink */ - { &vop_readdir_desc, (vop_t *)nfs_readdir }, /* readdir */ - { &vop_readlink_desc, (vop_t *)nfs_readlink }, /* readlink */ - { &vop_abortop_desc, (vop_t *)nop_abortop }, /* abortop */ - { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */ - { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */ - { &vop_bmap_desc, (vop_t *)nfs_bmap }, /* bmap */ - { &vop_strategy_desc, (vop_t *)err_strategy }, /* strategy */ - { &vop_print_desc, (vop_t *)nfs_print }, /* print */ - { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (vop_t *)nfs_pathconf }, /* pathconf */ - { &vop_advlock_desc, (vop_t *)nfs_advlock }, /* advlock */ - { &vop_blkatoff_desc, (vop_t *)nfs_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (vop_t *)nfs_valloc }, /* valloc */ - { &vop_reallocblks_desc, (vop_t *)nfs_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (vop_t *)nfs_vfree }, /* vfree */ - { &vop_truncate_desc, (vop_t *)nfs_truncate }, /* truncate */ - { &vop_update_desc, (vop_t *)nfs_update }, /* update */ - { &vop_bwrite_desc, (vop_t *)err_bwrite }, /* bwrite */ - { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */ - { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (vop_t *)err_copyfile }, /* Copyfile */ - { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */ + { &vnop_default_desc, (vnop_t *)vn_default_error }, + { &vnop_lookup_desc, (vnop_t *)nfs_lookup }, /* lookup */ + { &vnop_create_desc, (vnop_t *)nfs_create }, /* create */ + { &vnop_mknod_desc, (vnop_t *)nfs_mknod }, /* mknod */ + { &vnop_open_desc, (vnop_t *)nfs_open }, /* open */ + { &vnop_close_desc, (vnop_t *)nfs_close }, /* close */ + { &vnop_access_desc, (vnop_t *)nfs_access }, /* access */ + { &vnop_getattr_desc, (vnop_t *)nfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (vnop_t *)nfs_setattr }, /* setattr */ + { &vnop_read_desc, (vnop_t *)nfs_read }, /* read */ + { &vnop_write_desc, (vnop_t *)nfs_write }, /* write */ + { &vnop_ioctl_desc, (vnop_t *)nfs_ioctl }, /* ioctl */ + { &vnop_select_desc, (vnop_t *)nfs_select }, /* select */ + { &vnop_revoke_desc, (vnop_t *)nfs_revoke }, /* revoke */ + { &vnop_mmap_desc, (vnop_t *)nfs_mmap }, /* mmap */ + { &vnop_fsync_desc, (vnop_t *)nfs_fsync }, /* fsync */ + { &vnop_remove_desc, (vnop_t *)nfs_remove }, /* remove */ + { &vnop_link_desc, (vnop_t *)nfs_link }, /* link */ + { &vnop_rename_desc, (vnop_t *)nfs_rename }, /* rename */ + { &vnop_mkdir_desc, (vnop_t *)nfs_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (vnop_t *)nfs_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (vnop_t *)nfs_symlink }, /* symlink */ + { &vnop_readdir_desc, (vnop_t *)nfs_readdir }, /* readdir */ + { &vnop_readlink_desc, (vnop_t *)nfs_readlink }, /* readlink */ + { &vnop_inactive_desc, (vnop_t *)nfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (vnop_t *)nfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (vnop_t *)err_strategy }, /* strategy */ + { &vnop_pathconf_desc, (vnop_t *)nfs_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (vnop_t *)nfs_advlock }, /* advlock */ + { &vnop_bwrite_desc, (vnop_t *)err_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (vnop_t *)nfs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (vnop_t *)nfs_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (vnop_t *)err_copyfile }, /* Copyfile */ + { &vnop_blktooff_desc, (vnop_t *)nfs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (vnop_t *)nfs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (vnop_t *)nfs_blockmap }, /* blockmap */ { NULL, NULL } }; struct vnodeopv_desc nfsv2_vnodeop_opv_desc = @@ -247,58 +212,42 @@ VNODEOP_SET(nfsv2_vnodeop_opv_desc); /* * Special device vnode ops */ -vop_t **spec_nfsv2nodeop_p; +vnop_t **spec_nfsv2nodeop_p; static struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = { - { &vop_default_desc, (vop_t *)vn_default_error }, - { &vop_lookup_desc, (vop_t *)spec_lookup }, /* lookup */ - { &vop_create_desc, (vop_t *)spec_create }, /* create */ - { &vop_mknod_desc, (vop_t *)spec_mknod }, /* mknod */ - { &vop_open_desc, (vop_t *)spec_open }, /* open */ - { &vop_close_desc, (vop_t *)nfsspec_close }, /* close */ - { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */ - { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */ - { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */ - { &vop_read_desc, (vop_t *)nfsspec_read }, /* read */ - { &vop_write_desc, (vop_t *)nfsspec_write }, /* write */ - { &vop_lease_desc, (vop_t *)spec_lease_check }, /* lease */ - { &vop_ioctl_desc, (vop_t *)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (vop_t *)spec_select }, /* select */ - { &vop_revoke_desc, (vop_t *)spec_revoke }, /* revoke */ - { &vop_mmap_desc, (vop_t *)spec_mmap }, /* mmap */ - { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */ - { &vop_seek_desc, (vop_t *)spec_seek }, /* seek */ - { &vop_remove_desc, (vop_t *)spec_remove }, /* remove */ - { &vop_link_desc, (vop_t *)spec_link }, /* link */ - { &vop_rename_desc, (vop_t *)spec_rename }, /* rename */ - { &vop_mkdir_desc, (vop_t *)spec_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (vop_t *)spec_rmdir }, /* rmdir */ - { &vop_symlink_desc, (vop_t *)spec_symlink }, /* symlink */ - { &vop_readdir_desc, (vop_t *)spec_readdir }, /* readdir */ - { &vop_readlink_desc, (vop_t *)spec_readlink }, /* readlink */ - { &vop_abortop_desc, (vop_t *)spec_abortop }, /* abortop */ - { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */ - { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */ - { &vop_bmap_desc, (vop_t *)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (vop_t *)spec_strategy }, /* strategy */ - { &vop_print_desc, (vop_t *)nfs_print }, /* print */ - { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (vop_t *)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (vop_t *)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (vop_t *)spec_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (vop_t *)spec_valloc }, /* valloc */ - { &vop_reallocblks_desc, (vop_t *)spec_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (vop_t *)spec_vfree }, /* vfree */ - { &vop_truncate_desc, (vop_t *)spec_truncate }, /* truncate */ - { &vop_update_desc, (vop_t *)nfs_update }, /* update */ - { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */ - { &vop_devblocksize_desc, (vop_t *)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */ - { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */ - { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */ + { &vnop_default_desc, (vnop_t *)vn_default_error }, + { &vnop_lookup_desc, (vnop_t *)spec_lookup }, /* lookup */ + { &vnop_create_desc, (vnop_t *)spec_create }, /* create */ + { &vnop_mknod_desc, (vnop_t *)spec_mknod }, /* mknod */ + { &vnop_open_desc, (vnop_t *)spec_open }, /* open */ + { &vnop_close_desc, (vnop_t *)nfsspec_close }, /* close */ + { &vnop_getattr_desc, (vnop_t *)nfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (vnop_t *)nfs_setattr }, /* setattr */ + { &vnop_read_desc, (vnop_t *)nfsspec_read }, /* read */ + { &vnop_write_desc, (vnop_t *)nfsspec_write }, /* write */ + { &vnop_ioctl_desc, (vnop_t *)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (vnop_t *)spec_select }, /* select */ + { &vnop_revoke_desc, (vnop_t *)spec_revoke }, /* revoke */ + { &vnop_mmap_desc, (vnop_t *)spec_mmap }, /* mmap */ + { &vnop_fsync_desc, (vnop_t *)nfs_fsync }, /* fsync */ + { &vnop_remove_desc, (vnop_t *)spec_remove }, /* remove */ + { &vnop_link_desc, (vnop_t *)spec_link }, /* link */ + { &vnop_rename_desc, (vnop_t *)spec_rename }, /* rename */ + { &vnop_mkdir_desc, (vnop_t *)spec_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (vnop_t *)spec_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (vnop_t *)spec_symlink }, /* symlink */ + { &vnop_readdir_desc, (vnop_t *)spec_readdir }, /* readdir */ + { &vnop_readlink_desc, (vnop_t *)spec_readlink }, /* readlink */ + { &vnop_inactive_desc, (vnop_t *)nfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (vnop_t *)nfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (vnop_t *)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (vnop_t *)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (vnop_t *)spec_advlock }, /* advlock */ + { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (vnop_t *)nfs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (vnop_t *)nfs_pageout }, /* Pageout */ + { &vnop_blktooff_desc, (vnop_t *)nfs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (vnop_t *)nfs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (vnop_t *)nfs_blockmap }, /* blockmap */ { NULL, NULL } }; struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = @@ -307,57 +256,42 @@ struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = VNODEOP_SET(spec_nfsv2nodeop_opv_desc); #endif -vop_t **fifo_nfsv2nodeop_p; +vnop_t **fifo_nfsv2nodeop_p; static struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = { - { &vop_default_desc, (vop_t *)vn_default_error }, - { &vop_lookup_desc, (vop_t *)fifo_lookup }, /* lookup */ - { &vop_create_desc, (vop_t *)fifo_create }, /* create */ - { &vop_mknod_desc, (vop_t *)fifo_mknod }, /* mknod */ - { &vop_open_desc, (vop_t *)fifo_open }, /* open */ - { &vop_close_desc, (vop_t *)nfsfifo_close }, /* close */ - { &vop_access_desc, (vop_t *)nfsspec_access }, /* access */ - { &vop_getattr_desc, (vop_t *)nfs_getattr }, /* getattr */ - { &vop_setattr_desc, (vop_t *)nfs_setattr }, /* setattr */ - { &vop_read_desc, (vop_t *)nfsfifo_read }, /* read */ - { &vop_write_desc, (vop_t *)nfsfifo_write }, /* write */ - { &vop_lease_desc, (vop_t *)fifo_lease_check }, /* lease */ - { &vop_ioctl_desc, (vop_t *)fifo_ioctl }, /* ioctl */ - { &vop_select_desc, (vop_t *)fifo_select }, /* select */ - { &vop_revoke_desc, (vop_t *)fifo_revoke }, /* revoke */ - { &vop_mmap_desc, (vop_t *)fifo_mmap }, /* mmap */ - { &vop_fsync_desc, (vop_t *)nfs_fsync }, /* fsync */ - { &vop_seek_desc, (vop_t *)fifo_seek }, /* seek */ - { &vop_remove_desc, (vop_t *)fifo_remove }, /* remove */ - { &vop_link_desc, (vop_t *)fifo_link }, /* link */ - { &vop_rename_desc, (vop_t *)fifo_rename }, /* rename */ - { &vop_mkdir_desc, (vop_t *)fifo_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (vop_t *)fifo_rmdir }, /* rmdir */ - { &vop_symlink_desc, (vop_t *)fifo_symlink }, /* symlink */ - { &vop_readdir_desc, (vop_t *)fifo_readdir }, /* readdir */ - { &vop_readlink_desc, (vop_t *)fifo_readlink }, /* readlink */ - { &vop_abortop_desc, (vop_t *)fifo_abortop }, /* abortop */ - { &vop_inactive_desc, (vop_t *)nfs_inactive }, /* inactive */ - { &vop_reclaim_desc, (vop_t *)nfs_reclaim }, /* reclaim */ - { &vop_lock_desc, (vop_t *)nfs_lock }, /* lock */ - { &vop_unlock_desc, (vop_t *)nfs_unlock }, /* unlock */ - { &vop_bmap_desc, (vop_t *)fifo_bmap }, /* bmap */ - { &vop_strategy_desc, (vop_t *)fifo_strategy }, /* strategy */ - { &vop_print_desc, (vop_t *)nfs_print }, /* print */ - { &vop_islocked_desc, (vop_t *)nfs_islocked }, /* islocked */ - { &vop_pathconf_desc, (vop_t *)fifo_pathconf }, /* pathconf */ - { &vop_advlock_desc, (vop_t *)fifo_advlock }, /* advlock */ - { &vop_blkatoff_desc, (vop_t *)fifo_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (vop_t *)fifo_valloc }, /* valloc */ - { &vop_reallocblks_desc, (vop_t *)fifo_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (vop_t *)fifo_vfree }, /* vfree */ - { &vop_truncate_desc, (vop_t *)fifo_truncate }, /* truncate */ - { &vop_update_desc, (vop_t *)nfs_update }, /* update */ - { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */ - { &vop_pagein_desc, (vop_t *)nfs_pagein }, /* Pagein */ - { &vop_pageout_desc, (vop_t *)nfs_pageout }, /* Pageout */ - { &vop_blktooff_desc, (vop_t *)nfs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (vop_t *)nfs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (vop_t *)nfs_cmap }, /* cmap */ + { &vnop_default_desc, (vnop_t *)vn_default_error }, + { &vnop_lookup_desc, (vnop_t *)fifo_lookup }, /* lookup */ + { &vnop_create_desc, (vnop_t *)fifo_create }, /* create */ + { &vnop_mknod_desc, (vnop_t *)fifo_mknod }, /* mknod */ + { &vnop_open_desc, (vnop_t *)fifo_open }, /* open */ + { &vnop_close_desc, (vnop_t *)nfsfifo_close }, /* close */ + { &vnop_getattr_desc, (vnop_t *)nfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (vnop_t *)nfs_setattr }, /* setattr */ + { &vnop_read_desc, (vnop_t *)nfsfifo_read }, /* read */ + { &vnop_write_desc, (vnop_t *)nfsfifo_write }, /* write */ + { &vnop_ioctl_desc, (vnop_t *)fifo_ioctl }, /* ioctl */ + { &vnop_select_desc, (vnop_t *)fifo_select }, /* select */ + { &vnop_revoke_desc, (vnop_t *)fifo_revoke }, /* revoke */ + { &vnop_mmap_desc, (vnop_t *)fifo_mmap }, /* mmap */ + { &vnop_fsync_desc, (vnop_t *)nfs_fsync }, /* fsync */ + { &vnop_remove_desc, (vnop_t *)fifo_remove }, /* remove */ + { &vnop_link_desc, (vnop_t *)fifo_link }, /* link */ + { &vnop_rename_desc, (vnop_t *)fifo_rename }, /* rename */ + { &vnop_mkdir_desc, (vnop_t *)fifo_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (vnop_t *)fifo_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (vnop_t *)fifo_symlink }, /* symlink */ + { &vnop_readdir_desc, (vnop_t *)fifo_readdir }, /* readdir */ + { &vnop_readlink_desc, (vnop_t *)fifo_readlink }, /* readlink */ + { &vnop_inactive_desc, (vnop_t *)nfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (vnop_t *)nfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (vnop_t *)fifo_strategy }, /* strategy */ + { &vnop_pathconf_desc, (vnop_t *)fifo_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (vnop_t *)fifo_advlock }, /* advlock */ + { &vnop_bwrite_desc, (vnop_t *)vn_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (vnop_t *)nfs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (vnop_t *)nfs_pageout }, /* Pageout */ + { &vnop_blktooff_desc, (vnop_t *)nfs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (vnop_t *)nfs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (vnop_t *)nfs_blockmap }, /* blockmap */ { NULL, NULL } }; struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = @@ -366,29 +300,35 @@ struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); #endif -static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, - struct componentname *cnp, - struct vattr *vap)); -static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen, - struct ucred *cred, struct proc *proc)); -static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr, - int fnamelen, struct vnode *tdvp, - char *tnameptr, int tnamelen, - struct ucred *cred, struct proc *proc)); -static int nfs_renameit __P((struct vnode *sdvp, - struct componentname *scnp, - struct sillyrename *sp)); +static int nfs_mknodrpc(vnode_t dvp, vnode_t *vpp, + struct componentname *cnp, + struct vnode_attr *vap, + kauth_cred_t cred, proc_t p); +static int nfs_removerpc(vnode_t dvp, char *name, int namelen, + kauth_cred_t cred, proc_t proc); +static int nfs_renamerpc(vnode_t fdvp, char *fnameptr, + int fnamelen, vnode_t tdvp, + char *tnameptr, int tnamelen, + kauth_cred_t cred, proc_t proc); /* * Global variables */ +extern u_long nfs_xdrneg1; extern u_long nfs_true, nfs_false; extern struct nfsstats nfsstats; extern nfstype nfsv3_type[9]; -struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +proc_t nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; + +lck_grp_t *nfs_iod_lck_grp; +lck_grp_attr_t *nfs_iod_lck_grp_attr; +lck_attr_t *nfs_iod_lck_attr; +lck_mtx_t *nfs_iod_mutex; + int nfs_numasync = 0; int nfs_ioddelwri = 0; + #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; @@ -514,8 +454,7 @@ static const short errortooutcome[ELAST+1] = { static short -nfs_pageouterrorhandler(error) - int error; +nfs_pageouterrorhandler(int error) { if (error > ELAST) return(DUMP); @@ -524,16 +463,16 @@ nfs_pageouterrorhandler(error) } static int -nfs3_access_otw(struct vnode *vp, +nfs3_access_otw(vnode_t vp, int wmode, - struct proc *p, - struct ucred *cred) + proc_t p, + kauth_cred_t cred) { const int v3 = 1; u_long *tl; int error = 0, attrflag; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; caddr_t bpos, dpos, cp2; register long t1, t2; register caddr_t cp; @@ -542,20 +481,22 @@ nfs3_access_otw(struct vnode *vp, u_int64_t xid; struct timeval now; - nfsstats.rpccnt[NFSPROC_ACCESS]++; - nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_ACCESS]); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); nfsm_request(vp, NFSPROC_ACCESS, p, cred, &xid); if (mrep) { - nfsm_postop_attr(vp, attrflag, &xid); + nfsm_postop_attr_update(vp, 1, attrflag, &xid); } if (!error) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); np->n_mode = rmode; - np->n_modeuid = cred->cr_uid; + np->n_modeuid = kauth_cred_getuid(cred); microuptime(&now); np->n_modestamp = now.tv_sec; } @@ -571,19 +512,20 @@ nfs3_access_otw(struct vnode *vp, */ static int nfs_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; + struct vnop_access_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_mode; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - int error = 0; + vnode_t vp = ap->a_vp; + int error = 0, dorpc; u_long mode, wmode; int v3 = NFS_ISV3(vp); struct nfsnode *np = VTONFS(vp); struct timeval now; + kauth_cred_t cred; /* * For nfs v3, do an access rpc, otherwise you are stuck emulating @@ -594,23 +536,50 @@ nfs_access(ap) * in the cache. */ if (v3) { - if (ap->a_mode & VREAD) - mode = NFSV3ACCESS_READ; - else - mode = 0; - if (vp->v_type == VDIR) { - if (ap->a_mode & VWRITE) - mode |= NFSV3ACCESS_MODIFY | - NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE; - if (ap->a_mode & VEXEC) + /* + * Convert KAUTH primitives to NFS access rights. + */ + mode = 0; + if (vnode_isdir(vp)) { + /* directory */ + if (ap->a_action & + (KAUTH_VNODE_LIST_DIRECTORY | + KAUTH_VNODE_READ_EXTATTRIBUTES)) + mode |= NFSV3ACCESS_READ; + if (ap->a_action & KAUTH_VNODE_SEARCH) mode |= NFSV3ACCESS_LOOKUP; + if (ap->a_action & + (KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY)) + mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND; + if (ap->a_action & KAUTH_VNODE_DELETE_CHILD) + mode |= NFSV3ACCESS_MODIFY; } else { - if (ap->a_mode & VWRITE) + /* file */ + if (ap->a_action & + (KAUTH_VNODE_READ_DATA | + KAUTH_VNODE_READ_EXTATTRIBUTES)) + mode |= NFSV3ACCESS_READ; + if (ap->a_action & KAUTH_VNODE_WRITE_DATA) mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND; - if (ap->a_mode & VEXEC) + if (ap->a_action & KAUTH_VNODE_APPEND_DATA) + mode |= NFSV3ACCESS_EXTEND; + if (ap->a_action & KAUTH_VNODE_EXECUTE) mode |= NFSV3ACCESS_EXECUTE; } - /* XXX safety belt, only make blanket request if caching */ + /* common */ + if (ap->a_action & KAUTH_VNODE_DELETE) + mode |= NFSV3ACCESS_DELETE; + if (ap->a_action & + (KAUTH_VNODE_WRITE_ATTRIBUTES | + KAUTH_VNODE_WRITE_EXTATTRIBUTES | + KAUTH_VNODE_WRITE_SECURITY)) + mode |= NFSV3ACCESS_MODIFY; + /* XXX this is pretty dubious */ + if (ap->a_action & KAUTH_VNODE_CHANGE_OWNER) + mode |= NFSV3ACCESS_MODIFY; + + /* if caching, always ask for every right */ if (nfsaccess_cache_timeout > 0) { wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | @@ -618,44 +587,50 @@ nfs_access(ap) } else wmode = mode; + cred = vfs_context_ucred(ap->a_context); + /* * Does our cached result allow us to give a definite yes to * this request? */ - microuptime(&now); - if (now.tv_sec < np->n_modestamp + nfsaccess_cache_timeout && - ap->a_cred->cr_uid == np->n_modeuid && - (np->n_mode & mode) == mode) { - /* nfsstats.accesscache_hits++; */ - } else { + dorpc = 1; + if (NMODEVALID(np)) { + microuptime(&now); + if ((now.tv_sec < (np->n_modestamp + nfsaccess_cache_timeout)) && + (kauth_cred_getuid(cred) == np->n_modeuid) && + ((np->n_mode & mode) == mode)) { + /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_hits); */ + dorpc = 0; + } + } + if (dorpc) { + /* Either a no, or a don't know. Go to the wire. */ + /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */ + error = nfs3_access_otw(vp, wmode, vfs_context_proc(ap->a_context), cred); + } + if (!error) { /* - * Either a no, or a don't know. Go to the wire. + * If we asked for DELETE but didn't get it, the server + * may simply not support returning that bit (possible + * on UNIX systems). So, we'll assume that it is OK, + * and just let any subsequent delete action fail if it + * really isn't deletable. */ - /* nfsstats.accesscache_misses++; */ - error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred); - if (!error) { - if ((np->n_mode & mode) != mode) - error = EACCES; - } + if ((mode & NFSV3ACCESS_DELETE) && + !(np->n_mode & NFSV3ACCESS_DELETE)) + np->n_mode |= NFSV3ACCESS_DELETE; + if ((np->n_mode & mode) != mode) + error = EACCES; } - } else - return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */ - /* - * Disallow write attempts on filesystems mounted read-only; - * unless the file is a socket, fifo, or a block or character - * device resident on the filesystem. - * CSM - moved EROFS check down per NetBSD rev 1.71. So you - * get the correct error value with layered filesystems. - * EKN - moved the return(error) below this so it does get called. - */ - if (!error && (ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { - switch (vp->v_type) { - case VREG: case VDIR: case VLNK: - error = EROFS; - default: - break; + } else { + /* v2 */ + if ((ap->a_action & KAUTH_VNODE_WRITE_RIGHTS) && vfs_isrdonly(vnode_mount(vp))) { + error = EROFS; + } else { + error = 0; } } + return (error); } @@ -670,82 +645,69 @@ nfs_access(ap) static int nfs_open(ap) - struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; + struct vnop_open_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_mode; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - struct vattr vattr; + struct nfs_vattr nvattr; + kauth_cred_t cred; + proc_t p; + enum vtype vtype; int error; - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { + vtype = vnode_vtype(vp); + if (vtype != VREG && vtype != VDIR && vtype != VLNK) { return (EACCES); } - /* - * Get a valid lease. If cached data is stale, flush it. - */ - if (nmp->nm_flag & NFSMNT_NQNFS) { - if (NQNFS_CKINVALID(vp, np, ND_READ)) { - do { - error = nqnfs_getlease(vp, ND_READ, ap->a_cred, - ap->a_p); - } while (error == NQNFS_EXPIRED); - if (error) + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + if (np->n_flag & NNEEDINVALIDATE) { + np->n_flag &= ~NNEEDINVALIDATE; + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cred, p, 1); + } + if (np->n_flag & NMODIFIED) { + if ((error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) == EINTR) return (error); - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { - if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, - ap->a_p, 1)) == EINTR) - return (error); - np->n_brev = np->n_lrev; - } + NATTRINVALIDATE(np); + if (vtype == VDIR) + np->n_direofoffset = 0; + error = nfs_getattr(vp, &nvattr, cred, p); + if (error) + return (error); + if (vtype == VDIR) { + /* if directory changed, purge any name cache entries */ + if (nfstimespeccmp(&np->n_ncmtime, &nvattr.nva_mtime, !=)) + cache_purge(vp); + np->n_ncmtime = nvattr.nva_mtime; } + np->n_mtime = nvattr.nva_mtime; } else { - if (np->n_flag & NMODIFIED) { - if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, - ap->a_p, 1)) == EINTR) - return (error); - np->n_xid = 0; - if (vp->v_type == VDIR) + error = nfs_getattr(vp, &nvattr, cred, p); + if (error) + return (error); + if (nfstimespeccmp(&np->n_mtime, &nvattr.nva_mtime, !=)) { + if (vtype == VDIR) { np->n_direofoffset = 0; - error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); - if (error) - return (error); - if (vp->v_type == VDIR) { - /* if directory changed, purge any name cache entries */ - if (np->n_ncmtime != vattr.va_mtime.tv_sec) + nfs_invaldir(vp); + /* purge name cache entries */ + if (nfstimespeccmp(&np->n_ncmtime, &nvattr.nva_mtime, !=)) cache_purge(vp); - np->n_ncmtime = vattr.va_mtime.tv_sec; } - np->n_mtime = vattr.va_mtime.tv_sec; - } else { - error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); - if (error) + if ((error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) == EINTR) return (error); - if (np->n_mtime != vattr.va_mtime.tv_sec) { - if (vp->v_type == VDIR) { - np->n_direofoffset = 0; - nfs_invaldir(vp); - /* purge name cache entries */ - if (np->n_ncmtime != vattr.va_mtime.tv_sec) - cache_purge(vp); - } - if ((error = nfs_vinvalbuf(vp, V_SAVE, - ap->a_cred, ap->a_p, 1)) == EINTR) - return (error); - if (vp->v_type == VDIR) - np->n_ncmtime = vattr.va_mtime.tv_sec; - np->n_mtime = vattr.va_mtime.tv_sec; - } + if (vtype == VDIR) + np->n_ncmtime = nvattr.nva_mtime; + np->n_mtime = nvattr.nva_mtime; } } - if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) - np->n_xid = 0; /* For Open/Close consistency */ + NATTRINVALIDATE(np); /* For Open/Close consistency */ return (0); } @@ -770,27 +732,28 @@ nfs_open(ap) * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * them. - * for NQNFS - do nothing now, since 2 is dealt with via leases and - * 1 should be dealt with via an fsync() system call for - * cases where write errors are important. */ /* ARGSUSED */ static int nfs_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vnode_t a_vp; + int a_fflag; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; + kauth_cred_t cred; + proc_t p; int error = 0; - if (vp->v_type == VREG) { + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + if (vnode_vtype(vp) == VREG) { #if DIAGNOSTIC register struct sillyrename *sp = np->n_sillyrename; if (sp) @@ -798,23 +761,16 @@ nfs_close(ap) &sp->s_name[0], (unsigned)(sp->s_dvp), (unsigned)vp, (unsigned)ap, (unsigned)np, (unsigned)sp); #endif - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); - if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && - (np->n_flag & NMODIFIED)) { - int getlock = !VOP_ISLOCKED(vp); - if (getlock) { - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - if (!error && !VFSTONFS(vp->v_mount)) { - VOP_UNLOCK(vp, 0, ap->a_p); - error = ENXIO; - } - if (error) - return (error); - } + if (np->n_flag & NNEEDINVALIDATE) { + np->n_flag &= ~NNEEDINVALIDATE; + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cred, p, 1); + } + if (np->n_flag & NMODIFIED) { if (NFS_ISV3(vp)) { - error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p); + error = nfs_flush(vp, MNT_WAIT, cred, p, 0); /* * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes @@ -822,11 +778,9 @@ nfs_close(ap) */ /* np->n_flag &= ~NMODIFIED; */ } else { - error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); } - np->n_xid = 0; - if (getlock) - VOP_UNLOCK(vp, 0, ap->a_p); + NATTRINVALIDATE(np); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; @@ -836,31 +790,99 @@ nfs_close(ap) return (error); } + +int +nfs_getattr_no_vnode( + mount_t mp, + u_char *fhp, + int fhsize, + kauth_cred_t cred, + proc_t p, + struct nfs_vattr *nvap, + u_int64_t *xidp) +{ + mbuf_t mreq, mrep, md, mb, mb2; + caddr_t bpos, dpos; + int t2; + u_long *tl; + caddr_t cp; + struct nfsmount *nmp = VFSTONFS(mp); + int v3 = (nmp->nm_flag & NFSMNT_NFSV3); + int hsiz; + int error = 0; + + // XXX fix this to use macros once the macros get cleaned up + //nfsm_reqhead(NFSX_FH(v3)); + hsiz = NFSX_FH(v3); + mb = NULL; + if (hsiz >= nfs_mbuf_minclsize) + error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mb); + else + error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb); + if (error) + return (error); + bpos = mbuf_data(mb); + mreq = mb; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_GETATTR]); + //nfsm_fhtom(vp, v3); + if (v3) { + t2 = nfsm_rndup(fhsize) + NFSX_UNSIGNED; + if (t2 <= mbuf_trailingspace(mb)) { + nfsm_build(tl, u_long *, t2); + *tl++ = txdr_unsigned(fhsize); + *(tl + ((t2>>2) - 2)) = 0; + bcopy((caddr_t)fhp,(caddr_t)tl, fhsize); + } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (caddr_t)fhp, fhsize))) { + error = t2; + mbuf_freem(mreq); + goto nfsmout; + } + } else { + nfsm_build(cp, caddr_t, NFSX_V2FH); + bcopy((caddr_t)fhp, cp, NFSX_V2FH); + } + //nfsm_request(vp, NFSPROC_GETATTR, p, cred, xidp); + if ((error = nfs_request(NULL, mp, mreq, NFSPROC_GETATTR, p, cred, &mrep, &md, &dpos, xidp))) { + if (error & NFSERR_RETERR) + error &= ~NFSERR_RETERR; + else + goto nfsmout; + } + if (!error) { + //nfsm_loadattr(vp, nvap, xidp); + error = nfs_parsefattr(&md, &dpos, v3, nvap); + if (error) { + mbuf_freem(mrep); + goto nfsmout; + } + } + nfsm_reqdone; + return (error); +} + /* * nfs getattr call from vfs. */ -static int -nfs_getattr(ap) - struct vop_getattr_args /* { - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +int +nfs_getattr( + vnode_t vp, + struct nfs_vattr *nvap, + kauth_cred_t cred, + proc_t p) { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - register caddr_t cp; - register u_long *tl; - register int t1, t2; + struct nfsnode *np = VTONFS(vp); + caddr_t cp; + u_long *tl; + int t1, t2; caddr_t bpos, dpos; int error = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; int v3; u_int64_t xid; int avoidfloods; - - FSDBG_TOP(513, np->n_size, np, np->n_vattr.va_size, np->n_flag); + + FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag); + /* * Update local times for special files. */ @@ -869,29 +891,35 @@ nfs_getattr(ap) /* * First look in the cache. */ - if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) { - FSDBG_BOT(513, np->n_size, 0, np->n_vattr.va_size, np->n_flag); + if ((error = nfs_getattrcache(vp, nvap)) == 0) { + FSDBG_BOT(513, np->n_size, 0, np->n_vattr.nva_size, np->n_flag); return (0); } if (error != ENOENT) { - FSDBG_BOT(513, np->n_size, error, np->n_vattr.va_size, + FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag); return (error); } - if (!VFSTONFS(vp->v_mount)) { - FSDBG_BOT(513, np->n_size, ENXIO, np->n_vattr.va_size, np->n_flag); + if (!VFSTONFS(vnode_mount(vp))) { + FSDBG_BOT(513, np->n_size, ENXIO, np->n_vattr.nva_size, np->n_flag); return (ENXIO); } v3 = NFS_ISV3(vp); error = 0; - if (v3 && nfsaccess_cache_timeout > 0) { - /* nfsstats.accesscache_misses++; */ - if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, - ap->a_cred)) + /* + * Try to get both the attributes and access info by making an + * ACCESS call and seeing if it returns updated attributes. + * But don't bother if we aren't caching access info or if the + * attributes returned wouldn't be cached. + */ + if (v3 && (nfsaccess_cache_timeout > 0) && + (nfs_attrcachetimeout(vp) > 0)) { + /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */ + if ((error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, p, cred))) return (error); - if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) + if ((error = nfs_getattrcache(vp, nvap)) == 0) return (0); if (error != ENOENT) return (error); @@ -899,14 +927,18 @@ nfs_getattr(ap) } avoidfloods = 0; tryagain: - nfsstats.rpccnt[NFSPROC_GETATTR]++; - nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); + nfsm_reqhead(NFSX_FH(v3)); + if (error) { + FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag); + return (error); + } + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_GETATTR]); nfsm_fhtom(vp, v3); - nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred, &xid); + nfsm_request(vp, NFSPROC_GETATTR, p, cred, &xid); if (!error) { - nfsm_loadattr(vp, ap->a_vap, &xid); + nfsm_loadattr(vp, v3, nvap, &xid); if (!xid) { /* out-of-order rpc - attributes were dropped */ - m_freem(mrep); + mbuf_freem(mrep); mrep = NULL; FSDBG(513, -1, np, np->n_xid << 32, np->n_xid); if (avoidfloods++ < 100) @@ -916,27 +948,72 @@ tryagain: */ panic("nfs_getattr: getattr flood\n"); } - if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) { + if (nfstimespeccmp(&np->n_mtime, &nvap->nva_mtime, !=)) { + enum vtype vtype = vnode_vtype(vp); FSDBG(513, -1, np, -1, vp); - if (vp->v_type == VDIR) { + if (vtype == VDIR) { nfs_invaldir(vp); /* purge name cache entries */ - if (np->n_ncmtime != ap->a_vap->va_mtime.tv_sec) + if (nfstimespeccmp(&np->n_ncmtime, &nvap->nva_mtime, !=)) cache_purge(vp); } - error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, - ap->a_p, 1); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); FSDBG(513, -1, np, -2, error); if (!error) { - if (vp->v_type == VDIR) - np->n_ncmtime = ap->a_vap->va_mtime.tv_sec; - np->n_mtime = ap->a_vap->va_mtime.tv_sec; + if (vtype == VDIR) + np->n_ncmtime = nvap->nva_mtime; + np->n_mtime = nvap->nva_mtime; } } } nfsm_reqdone; - FSDBG_BOT(513, np->n_size, -1, np->n_vattr.va_size, error); + FSDBG_BOT(513, np->n_size, -1, np->n_vattr.nva_size, error); + return (error); +} + + +static int +nfs_vnop_getattr( + struct vnop_getattr_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ *ap) +{ + int error; + struct nfs_vattr nva; + struct vnode_attr *vap = ap->a_vap; + + error = nfs_getattr(ap->a_vp, &nva, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context)); + if (error) + return (error); + + /* copy nva to *a_vap */ + VATTR_RETURN(vap, va_type, nva.nva_type); + VATTR_RETURN(vap, va_mode, nva.nva_mode); + VATTR_RETURN(vap, va_rdev, nva.nva_rdev); + VATTR_RETURN(vap, va_uid, nva.nva_uid); + VATTR_RETURN(vap, va_gid, nva.nva_gid); + VATTR_RETURN(vap, va_nlink, nva.nva_nlink); + VATTR_RETURN(vap, va_fileid, nva.nva_fileid); + VATTR_RETURN(vap, va_data_size, nva.nva_size); + VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes); + VATTR_RETURN(vap, va_iosize, nva.nva_blocksize); /* should this just be f_iosize? */ + VATTR_RETURN(vap, va_fsid, nva.nva_fsid); + vap->va_access_time.tv_sec = nva.nva_atime.tv_sec; + vap->va_access_time.tv_nsec = nva.nva_atime.tv_nsec; + VATTR_SET_SUPPORTED(vap, va_access_time); + vap->va_modify_time.tv_sec = nva.nva_mtime.tv_sec; + vap->va_modify_time.tv_nsec = nva.nva_mtime.tv_nsec; + VATTR_SET_SUPPORTED(vap, va_modify_time); + vap->va_change_time.tv_sec = nva.nva_ctime.tv_sec; + vap->va_change_time.tv_nsec = nva.nva_ctime.tv_nsec; + VATTR_SET_SUPPORTED(vap, va_change_time); + return (error); } @@ -945,158 +1022,174 @@ tryagain: */ static int nfs_setattr(ap) - struct vop_setattr_args /* { + struct vnop_setattr_args /* { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - register struct vattr *vap = ap->a_vap; + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vnode_attr *vap = ap->a_vap; int error = 0; u_quad_t tsize; + kauth_cred_t cred; + proc_t p; #ifndef nolint tsize = (u_quad_t)0; #endif -#ifdef XXX /* enable this code soon! (but test it first) */ - /* - * Setting of flags is not supported. - */ - if (vap->va_flags != VNOVAL) - return (EOPNOTSUPP); -#endif - - /* - * Disallow write attempts if the filesystem is mounted read-only. - */ - if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || - vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || - vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && - (vp->v_mount->mnt_flag & MNT_RDONLY)) + /* Setting of flags is not supported. */ + if (VATTR_IS_ACTIVE(vap, va_flags)) + return (ENOTSUP); + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_data_size); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + + /* Disallow write attempts if the filesystem is mounted read-only. */ + if ((VATTR_IS_ACTIVE(vap, va_flags) || VATTR_IS_ACTIVE(vap, va_mode) || + VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) || + VATTR_IS_ACTIVE(vap, va_access_time) || + VATTR_IS_ACTIVE(vap, va_modify_time)) && + vnode_vfsisrdonly(vp)) return (EROFS); - if (vap->va_size != VNOVAL) { - switch (vp->v_type) { + + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + switch (vnode_vtype(vp)) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: - if (vap->va_mtime.tv_sec == VNOVAL && - vap->va_atime.tv_sec == VNOVAL && - vap->va_mode == (u_short)VNOVAL && - vap->va_uid == (uid_t)VNOVAL && - vap->va_gid == (gid_t)VNOVAL) + if (!VATTR_IS_ACTIVE(vap, va_modify_time) && + !VATTR_IS_ACTIVE(vap, va_access_time) && + !VATTR_IS_ACTIVE(vap, va_mode) && + !VATTR_IS_ACTIVE(vap, va_uid) && + !VATTR_IS_ACTIVE(vap, va_gid)) return (0); - vap->va_size = VNOVAL; + VATTR_CLEAR_ACTIVE(vap, va_data_size); break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ - if (vp->v_mount->mnt_flag & MNT_RDONLY) + if (vnode_vfsisrdonly(vp)) return (EROFS); - FSDBG_TOP(512, np->n_size, vap->va_size, - np->n_vattr.va_size, np->n_flag); + FSDBG_TOP(512, np->n_size, vap->va_data_size, + np->n_vattr.nva_size, np->n_flag); if (np->n_flag & NMODIFIED) { - if (vap->va_size == 0) - error = nfs_vinvalbuf(vp, 0, - ap->a_cred, ap->a_p, 1); + if (vap->va_data_size == 0) + error = nfs_vinvalbuf(vp, 0, cred, p, 1); else - error = nfs_vinvalbuf(vp, V_SAVE, - ap->a_cred, ap->a_p, 1); + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error) { printf("nfs_setattr: nfs_vinvalbuf %d\n", error); - FSDBG_BOT(512, np->n_size, vap->va_size, - np->n_vattr.va_size, -1); + FSDBG_BOT(512, np->n_size, vap->va_data_size, + np->n_vattr.nva_size, -1); return (error); } - } else if (np->n_size > vap->va_size) { /* shrinking? */ - daddr_t obn, bn; - int biosize; + } else if (np->n_size > vap->va_data_size) { /* shrinking? */ + daddr64_t obn, bn; + int biosize, neweofoff, mustwrite; struct nfsbuf *bp; - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; obn = (np->n_size - 1) / biosize; - bn = vap->va_size / biosize; - for ( ; obn >= bn; obn--) - if (nfs_buf_incore(vp, obn)) { - bp = nfs_buf_get(vp, obn, biosize, 0, BLK_READ); - if (!bp) + bn = vap->va_data_size / biosize; + for ( ; obn >= bn; obn--) { + if (!nfs_buf_is_incore(vp, obn)) + continue; + error = nfs_buf_get(vp, obn, biosize, 0, NBLK_READ, &bp); + if (error) continue; - if (obn == bn) { - int neweofoff, mustwrite; - mustwrite = 0; - neweofoff = vap->va_size - NBOFF(bp); - /* check for any dirty data before the new EOF */ - if (bp->nb_dirtyend && bp->nb_dirtyoff < neweofoff) { + if (obn != bn) { + FSDBG(512, bp, bp->nb_flags, 0, obn); + SET(bp->nb_flags, NB_INVAL); + nfs_buf_release(bp, 1); + continue; + } + mustwrite = 0; + neweofoff = vap->va_data_size - NBOFF(bp); + /* check for any dirty data before the new EOF */ + if (bp->nb_dirtyend && bp->nb_dirtyoff < neweofoff) { /* clip dirty range to EOF */ if (bp->nb_dirtyend > neweofoff) - bp->nb_dirtyend = neweofoff; + bp->nb_dirtyend = neweofoff; mustwrite++; - } - bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1; - if (bp->nb_dirty) + } + bp->nb_dirty &= (1 << round_page_32(neweofoff)/PAGE_SIZE) - 1; + if (bp->nb_dirty) mustwrite++; - if (mustwrite) { - /* gotta write out dirty data before invalidating */ - /* (NB_STABLE indicates that data writes should be FILESYNC) */ - /* (NB_NOCACHE indicates buffer should be discarded) */ - CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ)); - SET(bp->nb_flags, NB_STABLE | NB_NOCACHE); + if (!mustwrite) { + FSDBG(512, bp, bp->nb_flags, 0, obn); + SET(bp->nb_flags, NB_INVAL); + nfs_buf_release(bp, 1); + continue; + } + /* gotta write out dirty data before invalidating */ + /* (NB_STABLE indicates that data writes should be FILESYNC) */ + /* (NB_NOCACHE indicates buffer should be discarded) */ + CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ)); + SET(bp->nb_flags, NB_STABLE | NB_NOCACHE); + if (bp->nb_wcred == NOCRED) { + kauth_cred_ref(cred); + bp->nb_wcred = cred; + } + error = nfs_buf_write(bp); + // Note: bp has been released + if (error) { + FSDBG(512, bp, 0xd00dee, 0xbad, error); + np->n_error = error; + np->n_flag |= NWRITEERR; /* - * NFS has embedded ucred so crhold() risks zone corruption + * There was a write error and we need to + * invalidate attrs and flush buffers in + * order to sync up with the server. + * (if this write was extending the file, + * we may no longer know the correct size) */ - if (bp->nb_wcred == NOCRED) - bp->nb_wcred = crdup(ap->a_cred); - error = nfs_buf_write(bp); - // Note: bp has been released - if (error) { - FSDBG(512, bp, 0xd00dee, 0xbad, error); - np->n_error = error; - np->n_flag |= NWRITEERR; - error = 0; - } - bp = NULL; - } + NATTRINVALIDATE(np); + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cred, p, 1); + error = 0; } - if (bp) { - FSDBG(512, bp, bp->nb_flags, 0, obn); - SET(bp->nb_flags, NB_INVAL); - nfs_buf_release(bp, 1); - } - } + } } tsize = np->n_size; - np->n_size = np->n_vattr.va_size = vap->va_size; - ubc_setsize(vp, (off_t)vap->va_size); /* XXX error? */ - }; - } else if ((vap->va_mtime.tv_sec != VNOVAL || - vap->va_atime.tv_sec != VNOVAL) && - (np->n_flag & NMODIFIED) && vp->v_type == VREG) { - error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); + np->n_size = np->n_vattr.nva_size = vap->va_data_size; + ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */ + } + } else if ((VATTR_IS_ACTIVE(vap, va_modify_time) || + VATTR_IS_ACTIVE(vap, va_access_time)) && + (np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) { + error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); if (error == EINTR) return (error); } - error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); - FSDBG_BOT(512, np->n_size, vap->va_size, np->n_vattr.va_size, error); - if (error && vap->va_size != VNOVAL) { + if (VATTR_IS_ACTIVE(vap, va_mode)) { + NMODEINVALIDATE(np); + } + error = nfs_setattrrpc(vp, vap, cred, p); + FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error); + if (error && VATTR_IS_ACTIVE(vap, va_data_size)) { /* make every effort to resync file size w/ server... */ - int err = 0; /* preserve "error" for return */ + int err; /* preserve "error" for return */ - printf("nfs_setattr: nfs_setattrrpc %d\n", error); - np->n_size = np->n_vattr.va_size = tsize; + np->n_size = np->n_vattr.nva_size = tsize; ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ - vap->va_size = tsize; - err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); - if (err) - printf("nfs_setattr1: nfs_setattrrpc %d\n", err); + vap->va_data_size = tsize; + err = nfs_setattrrpc(vp, vap, cred, p); + printf("nfs_setattr: nfs_setattrrpc %d %d\n", error, err); } return (error); } @@ -1106,10 +1199,10 @@ nfs_setattr(ap) */ static int nfs_setattrrpc(vp, vap, cred, procp) - register struct vnode *vp; - register struct vattr *vap; - struct ucred *cred; - struct proc *procp; + vnode_t vp; + struct vnode_attr *vap; + kauth_cred_t cred; + proc_t procp; { register struct nfsv2_sattr *sp; register caddr_t cp; @@ -1117,20 +1210,22 @@ nfs_setattrrpc(vp, vap, cred, procp) caddr_t bpos, dpos, cp2; u_long *tl; int error = 0, wccpostattr = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; int v3; u_int64_t xid; struct timeval now; - if (!VFSTONFS(vp->v_mount)) + if (!VFSTONFS(vnode_mount(vp))) return (ENXIO); v3 = NFS_ISV3(vp); - nfsstats.rpccnt[NFSPROC_SETATTR]++; - nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_SATTR(v3)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_SETATTR]); nfsm_fhtom(vp, v3); if (v3) { - if (vap->va_mode != (u_short)VNOVAL) { + if (VATTR_IS_ACTIVE(vap, va_mode)) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_mode); @@ -1138,7 +1233,7 @@ nfs_setattrrpc(vp, vap, cred, procp) nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } - if (vap->va_uid != (uid_t)VNOVAL) { + if (VATTR_IS_ACTIVE(vap, va_uid)) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_uid); @@ -1146,7 +1241,7 @@ nfs_setattrrpc(vp, vap, cred, procp) nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } - if (vap->va_gid != (gid_t)VNOVAL) { + if (VATTR_IS_ACTIVE(vap, va_gid)) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_gid); @@ -1154,20 +1249,20 @@ nfs_setattrrpc(vp, vap, cred, procp) nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } - if (vap->va_size != VNOVAL) { + if (VATTR_IS_ACTIVE(vap, va_data_size)) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = nfs_true; - txdr_hyper(&vap->va_size, tl); + txdr_hyper(&vap->va_data_size, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } microtime(&now); - if (vap->va_atime.tv_sec != VNOVAL) { - if (vap->va_atime.tv_sec != now.tv_sec) { + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + if (vap->va_access_time.tv_sec != now.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); - txdr_nfsv3time(&vap->va_atime, tl); + txdr_nfsv3time(&vap->va_access_time, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); @@ -1176,11 +1271,11 @@ nfs_setattrrpc(vp, vap, cred, procp) nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } - if (vap->va_mtime.tv_sec != VNOVAL) { - if (vap->va_mtime.tv_sec != now.tv_sec) { + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + if (vap->va_modify_time.tv_sec != now.tv_sec) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); - txdr_nfsv3time(&vap->va_mtime, tl); + txdr_nfsv3time(&vap->va_modify_time, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); @@ -1192,42 +1287,55 @@ nfs_setattrrpc(vp, vap, cred, procp) nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { + struct timespec neg1time = { -1, -1 }; nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - if (vap->va_mode == (u_short)VNOVAL) - sp->sa_mode = VNOVAL; - else - sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); - if (vap->va_uid == (uid_t)VNOVAL) - sp->sa_uid = VNOVAL; + if (VATTR_IS_ACTIVE(vap, va_mode)) + sp->sa_mode = vtonfsv2_mode(vnode_vtype(vp), vap->va_mode); else + sp->sa_mode = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_uid)) sp->sa_uid = txdr_unsigned(vap->va_uid); - if (vap->va_gid == (gid_t)VNOVAL) - sp->sa_gid = VNOVAL; else + sp->sa_uid = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_gid)) sp->sa_gid = txdr_unsigned(vap->va_gid); - sp->sa_size = txdr_unsigned(vap->va_size); - txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); - txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + else + sp->sa_gid = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_data_size)) + sp->sa_size = txdr_unsigned(vap->va_data_size); + else + sp->sa_size = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + txdr_nfsv2time(&vap->va_access_time, &sp->sa_atime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_atime); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + txdr_nfsv2time(&vap->va_modify_time, &sp->sa_mtime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_mtime); + } } nfsm_request(vp, NFSPROC_SETATTR, procp, cred, &xid); if (v3) { - time_t premtime = 0; + struct timespec premtime = { 0, 0 }; if (mrep) { - nfsm_wcc_data(vp, premtime, wccpostattr, &xid); + nfsm_wcc_data(vp, &premtime, wccpostattr, &xid); } /* if file hadn't changed, update cached mtime */ - if (VTONFS(vp)->n_mtime == premtime) { - VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(vp)->n_mtime, &premtime, ==)) { + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.nva_mtime; } /* if directory hadn't changed, update namecache mtime */ - if ((vp->v_type == VDIR) && (VTONFS(vp)->n_ncmtime == premtime)) { - VTONFS(vp)->n_ncmtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; + if ((vnode_vtype(vp) == VDIR) && + nfstimespeccmp(&VTONFS(vp)->n_ncmtime, &premtime, ==)) { + VTONFS(vp)->n_ncmtime = VTONFS(vp)->n_vattr.nva_mtime; } if (!wccpostattr) - VTONFS(vp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(vp)); } else { if (mrep) { - nfsm_loadattr(vp, (struct vattr *)0, &xid); + nfsm_loadattr(vp, v3, NULL, &xid); } } nfsm_reqdone; @@ -1241,258 +1349,234 @@ nfs_setattrrpc(vp, vap, cred, procp) */ static int nfs_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - register struct componentname *cnp = ap->a_cnp; - register struct vnode *dvp = ap->a_dvp; - register struct vnode **vpp = ap->a_vpp; - register int flags = cnp->cn_flags; - register struct vnode *newvp; - register u_long *tl; - register caddr_t cp; - register long t1, t2; + struct componentname *cnp = ap->a_cnp; + vnode_t dvp = ap->a_dvp; + vnode_t *vpp = ap->a_vpp; + int flags = cnp->cn_flags; + vnode_t newvp; + u_long *tl; + caddr_t cp; + long t1, t2; caddr_t bpos, dpos, cp2; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; long len; - nfsfh_t *fhp; - struct nfsnode *np; - int lockparent, wantparent, error = 0, attrflag, fhsize; + u_char *fhp; + struct nfsnode *dnp, *np; + int wantparent, error, attrflag, dattrflag, fhsize, fhisdvp; int v3 = NFS_ISV3(dvp); - struct proc *p = cnp->cn_proc; - int unlockdvp = 0; - u_int64_t xid; - struct vattr vattr; + u_int64_t xid, dxid; + struct nfs_vattr nvattr; + kauth_cred_t cred; + proc_t p; + int ngflags; - if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) - return (EROFS); *vpp = NULLVP; - if (dvp->v_type != VDIR) - return (ENOTDIR); - lockparent = flags & LOCKPARENT; + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + wantparent = flags & (LOCKPARENT|WANTPARENT); - np = VTONFS(dvp); + dnp = VTONFS(dvp); - if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred, p) && - (np->n_ncmtime != vattr.va_mtime.tv_sec)) { + error = nfs_getattr(dvp, &nvattr, cred, p); + if (error) + goto error_return; + if (nfstimespeccmp(&dnp->n_ncmtime, &nvattr.nva_mtime, !=)) { /* * This directory has changed on us. * Purge any name cache entries. */ cache_purge(dvp); - np->n_ncmtime = vattr.va_mtime.tv_sec; + dnp->n_ncmtime = nvattr.nva_mtime; } - if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { - int vpid; - - newvp = *vpp; - vpid = newvp->v_id; - - /* - * See the comment starting `Step through' in ufs/ufs_lookup.c - * for an explanation of the locking protocol - */ - - /* - * Note: we need to make sure to get a lock/ref on newvp - * before we possibly go off to the server in VOP_ACCESS. - */ - if (dvp == newvp) { - VREF(newvp); - error = 0; - } else if (flags & ISDOTDOT) { - VOP_UNLOCK(dvp, 0, p); - error = vget(newvp, LK_EXCLUSIVE, p); - if (!error) - error = vn_lock(dvp, LK_EXCLUSIVE, p); - } else { - error = vget(newvp, LK_EXCLUSIVE, p); - if (error) - VOP_UNLOCK(dvp, 0, p); - } - - if (error) - goto cache_lookup_out; - - if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p))) { - if (dvp == newvp) - vrele(newvp); - else - vput(newvp); - *vpp = NULLVP; - goto error_return; - } - - if ((dvp != newvp) && (!lockparent || !(flags & ISLASTCN))) - VOP_UNLOCK(dvp, 0, p); + error = cache_lookup(dvp, vpp, cnp); + switch (error) { + case ENOENT: + /* negative cache entry same as cache miss */ + error = 0; + /* FALLTHROUGH */ + case 0: + /* cache miss */ + break; + case -1: + /* cache hit, not really an error */ + { + struct vnop_access_args naa; + + OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_hits); + + /* check for directory access */ + naa.a_vp = dvp; + naa.a_action = KAUTH_VNODE_SEARCH; + naa.a_context = ap->a_context; + + /* compute actual success/failure based on accessibility */ + error = nfs_access(&naa); + } + /* FALLTHROUGH */ + default: + /* unexpected error from cache_lookup */ + goto error_return; + } + + /* check for lookup of "." */ + if ((cnp->cn_nameptr[0] == '.') && (cnp->cn_namelen == 1)) { + /* skip lookup, we know who we are */ + fhisdvp = 1; + fhp = NULL; + fhsize = 0; + mrep = NULL; + goto found; + } - if (vpid == newvp->v_id) { - nfsstats.lookupcache_hits++; - if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) - cnp->cn_flags |= SAVENAME; - error = 0; /* ignore any from VOP_GETATTR */ + /* do we know this name is too long? */ + if (v3) { + /* For NFSv3: need uniform pathconf info to test pc_namemax */ + struct nfsmount *nmp = VFSTONFS(vnode_mount(dvp)); + if (!nmp) { + error = ENXIO; goto error_return; } - vput(newvp); - if ((dvp != newvp) && lockparent && (flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); -cache_lookup_out: - error = vn_lock(dvp, LK_EXCLUSIVE, p); - *vpp = NULLVP; - if (error) + if (((nmp->nm_state & (NFSSTA_GOTFSINFO|NFSSTA_GOTPATHCONF)) == + (NFSSTA_GOTFSINFO|NFSSTA_GOTPATHCONF)) && + (nmp->nm_fsinfo.fsproperties & NFSV3FSINFO_HOMOGENEOUS) && + (cnp->cn_namelen > (long)nmp->nm_fsinfo.namemax)) { + error = ENAMETOOLONG; goto error_return; + } + } else if (cnp->cn_namelen > NFS_MAXNAMLEN) { + error = ENAMETOOLONG; + goto error_return; } error = 0; newvp = NULLVP; - nfsstats.lookupcache_misses++; - nfsstats.rpccnt[NFSPROC_LOOKUP]++; + + OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_misses); len = cnp->cn_namelen; - nfsm_reqhead(dvp, NFSPROC_LOOKUP, - NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + if (error) + goto error_return; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_LOOKUP]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN, v3); /* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */ - nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_request(dvp, NFSPROC_LOOKUP, p, cred, &xid); if (error) { if (mrep) { - nfsm_postop_attr(dvp, attrflag, &xid); - m_freem(mrep); + nfsm_postop_attr_update(dvp, v3, dattrflag, &xid); + mbuf_freem(mrep); } goto nfsmout; } + + /* get the filehandle */ nfsm_getfh(fhp, fhsize, v3); + /* is the file handle the same as this directory's file handle? */ + fhisdvp = NFS_CMPFH(dnp, fhp, fhsize); + + /* get attributes */ + if (v3) { + dxid = xid; + nfsm_postop_attr_get(v3, attrflag, &nvattr); + nfsm_postop_attr_update(dvp, v3, dattrflag, &dxid); + if (!attrflag && (!fhisdvp || !dattrflag)) { + /* We need valid attributes in order */ + /* to call nfs_nget/vnode_create(). */ + error = nfs_getattr_no_vnode(vnode_mount(dvp), + fhp, fhsize, cred, p, &nvattr, &xid); + if (error) { + mbuf_freem(mrep); + goto error_return; + } + } + } else { + nfsm_attr_get(v3, &nvattr); + } + +found: /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { - if (NFS_CMPFH(np, fhp, fhsize)) { - m_freem(mrep); + if (fhisdvp) { + mbuf_freem(mrep); error = EISDIR; goto error_return; } - if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) { - m_freem(mrep); + error = nfs_nget(vnode_mount(dvp), dvp, cnp, fhp, fhsize, + &nvattr, &xid, 0, &np); + if (error) { + mbuf_freem(mrep); goto error_return; } - newvp = NFSTOV(np); - if (v3) { - u_int64_t dxid = xid; + *vpp = NFSTOV(np); + mbuf_freem(mrep); - nfsm_postop_attr(newvp, attrflag, &xid); - nfsm_postop_attr(dvp, attrflag, &dxid); - if (np->n_xid == 0) { - /* - * VFS currently requires that we have valid - * attributes when returning success. - */ - error = VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p); - if (error) { - m_freem(mrep); - vput(newvp); - goto error_return; - } - } - } else - nfsm_loadattr(newvp, (struct vattr *)0, &xid); - *vpp = newvp; - m_freem(mrep); - cnp->cn_flags |= SAVENAME; - if (!lockparent) - VOP_UNLOCK(dvp, 0, p); - error = 0; goto error_return; } - if (NFS_CMPFH(np, fhp, fhsize)) { - VREF(dvp); - newvp = dvp; - } else if (flags & ISDOTDOT) { - VOP_UNLOCK(dvp, 0, p); - error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); + if ((cnp->cn_flags & MAKEENTRY) && + (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) + ngflags = NG_MAKEENTRY; + else + ngflags = 0; + + if (fhisdvp) { + error = vnode_get(dvp); if (error) { - m_freem(mrep); - vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p); + mbuf_freem(mrep); goto error_return; } - newvp = NFSTOV(np); - if (!lockparent || !(flags & ISLASTCN)) - unlockdvp = 1; /* keep dvp locked until after postops */ - if (error = vn_lock(dvp, LK_EXCLUSIVE, p)) { - m_freem(mrep); - vput(newvp); - goto error_return; + newvp = dvp; + /* test fhp to see if we have valid attributes in nvattr */ + if (fhp && (dnp->n_xid <= xid)) { + error = nfs_loadattrcache(dnp, &nvattr, &xid, 0); + if (error) { + vnode_put(dvp); + mbuf_freem(mrep); + goto error_return; + } } } else { - if ((error = nfs_nget(dvp->v_mount, fhp, fhsize, &np))) { - m_freem(mrep); + error = nfs_nget(vnode_mount(dvp), dvp, cnp, fhp, fhsize, + &nvattr, &xid, ngflags, &np); + if (error) { + mbuf_freem(mrep); goto error_return; } - if (!lockparent || !(flags & ISLASTCN)) - unlockdvp = 1; /* keep dvp locked until after postops */ newvp = NFSTOV(np); } - if (v3) { - u_int64_t dxid = xid; - - nfsm_postop_attr(newvp, attrflag, &xid); - nfsm_postop_attr(dvp, attrflag, &dxid); - if (np->n_xid == 0) { - /* - * VFS currently requires that we have valid - * attributes when returning success. - */ - error = VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p); - if (error) { - if (unlockdvp) - VOP_UNLOCK(dvp, 0, p); - m_freem(mrep); - vput(newvp); - goto error_return; - } - } - } else - nfsm_loadattr(newvp, (struct vattr *)0, &xid); - if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) - cnp->cn_flags |= SAVENAME; - if ((cnp->cn_flags & MAKEENTRY) && - (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { - cache_enter(dvp, newvp, cnp); - } *vpp = newvp; +// if (error == 0 && *vpp != NULL && *vpp != dvp) +// nfs_unlock(VTONFS(*vpp)); + nfsm_reqdone; - if (unlockdvp) - VOP_UNLOCK(dvp, 0, p); if (error) { - if (newvp != NULLVP) { - if (newvp == dvp) - vrele(newvp); - else - vput(newvp); - *vpp = NULLVP; - } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { - if (dvp->v_mount && (dvp->v_mount->mnt_flag & MNT_RDONLY)) + if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) error = EROFS; else error = EJUSTRETURN; - if (!lockparent) - VOP_UNLOCK(dvp, 0, p); } - if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) - cnp->cn_flags |= SAVENAME; } error_return: + if (error && *vpp) { + vnode_put(*vpp); + *vpp = NULLVP; + } return (error); } @@ -1502,18 +1586,19 @@ error_return: */ static int nfs_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; + struct vnop_read_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - - if (vp->v_type != VREG) + if (vnode_vtype(ap->a_vp) != VREG) return (EPERM); - return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); + return (nfs_bioread(ap->a_vp, ap->a_uio, ap->a_ioflag, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context))); } @@ -1522,17 +1607,18 @@ nfs_read(ap) */ static int nfs_readlink(ap) - struct vop_readlink_args /* { - struct vnode *a_vp; + struct vnop_readlink_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - - if (vp->v_type != VLNK) + if (vnode_vtype(ap->a_vp) != VLNK) return (EPERM); - return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0)); + return (nfs_bioread(ap->a_vp, ap->a_uio, 0, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context))); } /* @@ -1540,33 +1626,36 @@ nfs_readlink(ap) * Called by nfs_doio() from below the buffer cache. */ int -nfs_readlinkrpc(vp, uiop, cred) - register struct vnode *vp; - struct uio *uiop; - struct ucred *cred; +nfs_readlinkrpc( + vnode_t vp, + struct uio *uiop, + kauth_cred_t cred, + proc_t p) { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, len, attrflag; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; int v3; u_int64_t xid; - if (!VFSTONFS(vp->v_mount)) + if (!VFSTONFS(vnode_mount(vp))) return (ENXIO); v3 = NFS_ISV3(vp); - nfsstats.rpccnt[NFSPROC_READLINK]++; - nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); + nfsm_reqhead(NFSX_FH(v3)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_READLINK]); nfsm_fhtom(vp, v3); - nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred, &xid); + nfsm_request(vp, NFSPROC_READLINK, p, cred, &xid); if (v3 && mrep) - nfsm_postop_attr(vp, attrflag, &xid); + nfsm_postop_attr_update(vp, v3, attrflag, &xid); if (!error) { - nfsm_strsiz(len, NFS_MAXPATHLEN); - if (len == NFS_MAXPATHLEN) { + nfsm_strsiz(len, NFS_MAXPATHLEN, v3); + if (len >= NFS_MAXPATHLEN) { struct nfsnode *np = VTONFS(vp); #if DIAGNOSTIC if (!np) @@ -1586,37 +1675,41 @@ nfs_readlinkrpc(vp, uiop, cred) * Ditto above */ int -nfs_readrpc(vp, uiop, cred) - register struct vnode *vp; - struct uio *uiop; - struct ucred *cred; +nfs_readrpc( + vnode_t vp, + struct uio *uiop, + kauth_cred_t cred, + proc_t p) { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof = 0, attrflag; int v3, nmrsize; u_int64_t xid; - FSDBG_TOP(536, vp, uiop->uio_offset, uiop->uio_resid, 0); - nmp = VFSTONFS(vp->v_mount); + FSDBG_TOP(536, vp, uiop->uio_offset, uio_uio_resid(uiop), 0); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); v3 = NFS_ISV3(vp); nmrsize = nmp->nm_rsize; - tsiz = uiop->uio_resid; + // LP64todo - fix this + tsiz = uio_uio_resid(uiop); if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3) { - FSDBG_BOT(536, vp, uiop->uio_offset, uiop->uio_resid, EFBIG); + FSDBG_BOT(536, vp, uiop->uio_offset, uio_uio_resid(uiop), EFBIG); return (EFBIG); } while (tsiz > 0) { - nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmrsize) ? nmrsize : tsiz; - nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED * 3); + if (error) + break; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_READ]); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3); if (v3) { @@ -1628,26 +1721,26 @@ nfs_readrpc(vp, uiop, cred) *tl = 0; } FSDBG(536, vp, uiop->uio_offset, len, 0); - nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred, &xid); + nfsm_request(vp, NFSPROC_READ, p, cred, &xid); if (v3) { if (mrep) { - nfsm_postop_attr(vp, attrflag, &xid); + nfsm_postop_attr_update(vp, v3, attrflag, &xid); } if (error) { - m_freem(mrep); + mbuf_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else { if (mrep) { - nfsm_loadattr(vp, (struct vattr *)0, &xid); + nfsm_loadattr(vp, v3, NULL, &xid); } } if (mrep) { - nfsm_strsiz(retlen, nmrsize); + nfsm_strsiz(retlen, nmrsize, 0); nfsm_mtouio(uiop, retlen); - m_freem(mrep); + mbuf_freem(mrep); } else { retlen = 0; } @@ -1659,7 +1752,7 @@ nfs_readrpc(vp, uiop, cred) tsiz = 0; } nfsmout: - FSDBG_BOT(536, vp, eof, uiop->uio_resid, error); + FSDBG_BOT(536, vp, eof, uio_uio_resid(uiop), error); return (error); } @@ -1667,47 +1760,52 @@ nfsmout: * nfs write call */ int -nfs_writerpc(vp, uiop, cred, iomode, must_commit) - register struct vnode *vp; - register struct uio *uiop; - struct ucred *cred; - int *iomode, *must_commit; +nfs_writerpc( + vnode_t vp, + struct uio *uiop, + kauth_cred_t cred, + proc_t p, + int *iomode, + int *must_commit) { register u_long *tl; register caddr_t cp; register int t1, t2, backup; caddr_t bpos, dpos, cp2; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; struct nfsmount *nmp; int error = 0, len, tsiz, updatemtime = 0, wccpostattr = 0, rlen, commit; int v3, committed = NFSV3WRITE_FILESYNC; u_int64_t xid; + mount_t mp; #if DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs_writerpc: iovcnt > 1"); #endif - FSDBG_TOP(537, vp, uiop->uio_offset, uiop->uio_resid, *iomode); - nmp = VFSTONFS(vp->v_mount); + FSDBG_TOP(537, vp, uiop->uio_offset, uio_uio_resid(uiop), *iomode); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); v3 = NFS_ISV3(vp); *must_commit = 0; - tsiz = uiop->uio_resid; + // LP64todo - fix this + tsiz = uio_uio_resid(uiop); if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3) { - FSDBG_BOT(537, vp, uiop->uio_offset, uiop->uio_resid, EFBIG); + FSDBG_BOT(537, vp, uiop->uio_offset, uio_uio_resid(uiop), EFBIG); return (EFBIG); } while (tsiz > 0) { - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) { error = ENXIO; break; } - nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; - nfsm_reqhead(vp, NFSPROC_WRITE, - NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_reqhead(NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); + if (error) + break; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_WRITE]); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); @@ -1723,15 +1821,15 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) *tl = txdr_unsigned(len); FSDBG(537, vp, uiop->uio_offset, len, 0); nfsm_uiotom(uiop, len); - nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred, &xid); - nmp = VFSTONFS(vp->v_mount); + nfsm_request(vp, NFSPROC_WRITE, p, cred, &xid); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) error = ENXIO; if (v3) { if (mrep) { - time_t premtime; - nfsm_wcc_data(vp, premtime, wccpostattr, &xid); - if (VTONFS(vp)->n_mtime == premtime) + struct timespec premtime; + nfsm_wcc_data(vp, &premtime, wccpostattr, &xid); + if (nfstimespeccmp(&VTONFS(vp)->n_mtime, &premtime, ==)) updatemtime = 1; } if (!error) { @@ -1743,10 +1841,10 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) break; } else if (rlen < len) { backup = len - rlen; - uiop->uio_iov->iov_base -= backup; - uiop->uio_iov->iov_len += backup; + uio_iov_base_add(uiop, -backup); + uio_iov_len_add(uiop, backup); uiop->uio_offset -= backup; - uiop->uio_resid += backup; + uio_uio_resid_add(uiop, backup); len = rlen; } commit = fxdr_unsigned(int, *tl++); @@ -1773,13 +1871,13 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) } } else { if (mrep) { - nfsm_loadattr(vp, (struct vattr *)0, &xid); + nfsm_loadattr(vp, v3, NULL, &xid); } } if (updatemtime) - VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; - m_freem(mrep); + VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.nva_mtime; + mbuf_freem(mrep); /* * we seem to have a case where we end up looping on shutdown * and taking down nfs servers. For V3, error cases, there is @@ -1792,12 +1890,12 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) tsiz -= len; } nfsmout: - if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) + if ((mp = vnode_mount(vp)) && (vfs_flags(mp) & MNT_ASYNC)) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) - uiop->uio_resid = tsiz; - FSDBG_BOT(537, vp, committed, uiop->uio_resid, error); + uio_uio_resid_set(uiop, tsiz); + FSDBG_BOT(537, vp, committed, uio_uio_resid(uiop), error); return (error); } @@ -1807,100 +1905,121 @@ nfsmout: * mode set to specify the file type and the size field for rdev. */ static int -nfs_mknodrpc(dvp, vpp, cnp, vap) - register struct vnode *dvp; - register struct vnode **vpp; - register struct componentname *cnp; - register struct vattr *vap; +nfs_mknodrpc( + vnode_t dvp, + vnode_t *vpp, + struct componentname *cnp, + struct vnode_attr *vap, + kauth_cred_t cred, + proc_t p) { register struct nfsv2_sattr *sp; - register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; - struct vnode *newvp = (struct vnode *)0; + vnode_t newvp = (vnode_t)0; struct nfsnode *np = (struct nfsnode *)0; - struct vattr vattr; + struct nfs_vattr nvattr; char *cp2; caddr_t bpos, dpos; int error = 0, wccpostattr = 0, gotvp = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; u_long rdev; u_int64_t xid; int v3 = NFS_ISV3(dvp); + int gotuid, gotgid; - if (vap->va_type == VCHR || vap->va_type == VBLK) + if (!VATTR_IS_ACTIVE(vap, va_type)) + return (EINVAL); + if (vap->va_type == VCHR || vap->va_type == VBLK) { + if (!VATTR_IS_ACTIVE(vap, va_rdev)) + return (EINVAL); rdev = txdr_unsigned(vap->va_rdev); - else if (vap->va_type == VFIFO || vap->va_type == VSOCK) + } else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else { - VOP_ABORTOP(dvp, cnp); - vput(dvp); - return (EOPNOTSUPP); + return (ENOTSUP); } - if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) { - VOP_ABORTOP(dvp, cnp); - vput(dvp); + nfsm_reqhead(NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); + if (error) return (error); - } - nfsstats.rpccnt[NFSPROC_MKNOD]++; - nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + - + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); + + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_data_size); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + gotuid = VATTR_IS_ACTIVE(vap, va_uid); + gotgid = VATTR_IS_ACTIVE(vap, va_gid); + + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_MKNOD]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN, v3); if (v3) { - nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR); + nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl++ = vtonfsv3_type(vap->va_type); - sp3 = (struct nfsv3_sattr *)tl; - nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); + nfsm_v3sattr(vap); if (vap->va_type == VCHR || vap->va_type == VBLK) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(major(vap->va_rdev)); *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { + struct timespec neg1time = { -1, -1 }; nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); - sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); - sp->sa_gid = txdr_unsigned(vattr.va_gid); + sp->sa_mode = vtonfsv2_mode(vap->va_type, + (VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600)); + sp->sa_uid = gotuid ? (u_long)txdr_unsigned(vap->va_uid) : nfs_xdrneg1; + sp->sa_gid = gotgid ? (u_long)txdr_unsigned(vap->va_gid) : nfs_xdrneg1; sp->sa_size = rdev; - txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); - txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); - } - nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred, &xid); - if (!error) { - nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); - if (!gotvp) { - if (newvp) { - vput(newvp); - newvp = (struct vnode *)0; - } + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + txdr_nfsv2time(&vap->va_access_time, &sp->sa_atime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_atime); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + txdr_nfsv2time(&vap->va_modify_time, &sp->sa_mtime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_mtime); + } + } + nfsm_request(dvp, NFSPROC_MKNOD, p, cred, &xid); + /* XXX no EEXIST kludge here? */ + if (!error) { + nfsm_mtofh(dvp, cnp, newvp, v3, &xid, gotvp); + if (!gotvp) { error = nfs_lookitup(dvp, cnp->cn_nameptr, - cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); + cnp->cn_namelen, cred, p, &np); if (!error) newvp = NFSTOV(np); } } if (v3 && mrep) - nfsm_wcc_data(dvp, premtime, wccpostattr, &xid); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &xid); + if (!error && (gotuid || gotgid) && + (!newvp || nfs_getattrcache(newvp, &nvattr) || + (gotuid && (nvattr.nva_uid != vap->va_uid)) || + (gotgid && (nvattr.nva_gid != vap->va_gid)))) { + /* clear ID bits if server didn't use them (or we can't tell) */ + VATTR_CLEAR_SUPPORTED(vap, va_uid); + VATTR_CLEAR_SUPPORTED(vap, va_gid); + } nfsm_reqdone; if (error) { if (newvp) - vput(newvp); + vnode_put(newvp); } else { - if (cnp->cn_flags & MAKEENTRY) - cache_enter(dvp, newvp, cnp); *vpp = newvp; } VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; - vput(dvp); - NFS_FREE_PNBUF(cnp); + NATTRINVALIDATE(VTONFS(dvp)); return (error); } @@ -1911,20 +2030,21 @@ nfs_mknodrpc(dvp, vpp, cnp, vap) /* ARGSUSED */ static int nfs_mknod(ap) - struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; + struct vnop_mknod_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - struct vnode *newvp; int error; - error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); - if (!error && newvp) - vput(newvp); - *ap->a_vpp = 0; + error = nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context)); + return (error); } @@ -1934,50 +2054,66 @@ static u_long create_verf; */ static int nfs_create(ap) - struct vop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; + struct vnop_create_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct nfsv2_sattr *sp; - register struct nfsv3_sattr *sp3; - register u_long *tl; - register caddr_t cp; - register long t1, t2; + vnode_t dvp = ap->a_dvp; + struct vnode_attr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct nfs_vattr nvattr; + struct nfsv2_sattr *sp; + u_long *tl; + caddr_t cp; + long t1, t2; struct nfsnode *np = (struct nfsnode *)0; - struct vnode *newvp = (struct vnode *)0; + vnode_t newvp = (vnode_t)0; caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0, gotvp = 0, fmode = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - struct vattr vattr; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; int v3 = NFS_ISV3(dvp); + int gotuid, gotgid; u_int64_t xid; + kauth_cred_t cred; + proc_t p; + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + if (!VATTR_IS_ACTIVE(vap, va_type)) + return (EINVAL); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) - return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); + return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap, cred, p)); + + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_data_size); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + gotuid = VATTR_IS_ACTIVE(vap, va_uid); + gotgid = VATTR_IS_ACTIVE(vap, va_gid); - if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) { - VOP_ABORTOP(dvp, cnp); - vput(dvp); - return (error); - } if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: - nfsstats.rpccnt[NFSPROC_CREATE]++; - nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + + nfsm_reqhead(NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_CREATE]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN, v3); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (fmode & O_EXCL) { @@ -1990,35 +2126,39 @@ again: *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); - nfsm_build(tl, u_long *, NFSX_V3SRVSATTR); - sp3 = (struct nfsv3_sattr *)tl; - nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); + nfsm_v3sattr(vap); } } else { + struct timespec neg1time = { -1, -1 }; nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); - sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); - sp->sa_gid = txdr_unsigned(vattr.va_gid); + sp->sa_mode = vtonfsv2_mode(vap->va_type, + (VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600)); + sp->sa_uid = gotuid ? (u_long)txdr_unsigned(vap->va_uid) : nfs_xdrneg1; + sp->sa_gid = gotgid ? (u_long)txdr_unsigned(vap->va_gid) : nfs_xdrneg1; sp->sa_size = 0; - txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); - txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + txdr_nfsv2time(&vap->va_access_time, &sp->sa_atime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_atime); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + txdr_nfsv2time(&vap->va_modify_time, &sp->sa_mtime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_mtime); + } } - nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_request(dvp, NFSPROC_CREATE, p, cred, &xid); if (!error) { - nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); + nfsm_mtofh(dvp, cnp, newvp, v3, &xid, gotvp); if (!gotvp) { - if (newvp) { - vput(newvp); - newvp = (struct vnode *)0; - } error = nfs_lookitup(dvp, cnp->cn_nameptr, - cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); + cnp->cn_namelen, cred, p, &np); if (!error) newvp = NFSTOV(np); } } if (v3 && mrep) - nfsm_wcc_data(dvp, premtime, wccpostattr, &xid); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &xid); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { @@ -2026,25 +2166,36 @@ again: goto again; } if (newvp) - vput(newvp); + vnode_put(newvp); } else if (v3 && (fmode & O_EXCL)) { - error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); + error = nfs_setattrrpc(newvp, vap, cred, p); + if (error && (gotuid || gotgid)) { + /* it's possible the server didn't like our attempt to set IDs. */ + /* so, let's try it again without those */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = nfs_setattrrpc(newvp, vap, cred, p); + } if (error) - vput(newvp); + vnode_put(newvp); } if (!error) { - if (cnp->cn_flags & MAKEENTRY) - cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; - vput(dvp); - NFS_FREE_PNBUF(cnp); + NATTRINVALIDATE(VTONFS(dvp)); + if (!error && (gotuid || gotgid) && + (!newvp || nfs_getattrcache(newvp, &nvattr) || + (gotuid && (nvattr.nva_uid != vap->va_uid)) || + (gotgid && (nvattr.nva_gid != vap->va_gid)))) { + /* clear ID bits if server didn't use them (or we can't tell) */ + VATTR_CLEAR_SUPPORTED(vap, va_uid); + VATTR_CLEAR_SUPPORTED(vap, va_gid); + } return (error); } @@ -2053,7 +2204,7 @@ again: * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. - * - If v_usecount > 1 + * - If vnode_isinuse() * If a rename is not already in the works * call nfs_sillyrename() to set it up * else @@ -2061,53 +2212,35 @@ again: */ static int nfs_remove(ap) - struct vop_remove_args /* { + struct vnop_remove_args /* { struct vnodeop_desc *a_desc; - struct vnode * a_dvp; - struct vnode * a_vp; - struct componentname * a_cnp; + vnode_t a_dvp; + vnode_t a_vp; + struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; - register struct componentname *cnp = ap->a_cnp; - register struct nfsnode *np = VTONFS(vp); + vnode_t vp = ap->a_vp; + vnode_t dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct nfsnode *np = VTONFS(vp); int error = 0, gofree = 0; - struct vattr vattr; + struct nfs_vattr nvattr; + kauth_cred_t cred; + proc_t p; -#if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("nfs_remove: no name"); - if (vp->v_usecount < 1) - panic("nfs_remove: bad v_usecount"); -#endif + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); - if (UBCISVALID(vp)) { - /* regular files */ - if (UBCINFOEXISTS(vp)) - gofree = (ubc_isinuse(vp, 1)) ? 0 : 1; - else { - /* dead or dying vnode.With vnode locking panic instead of error */ - vput(dvp); - vput(vp); - NFS_FREE_PNBUF(cnp); - return (EIO); - } - } else { - /* UBC not in play */ - if (vp->v_usecount == 1) - gofree = 1; - } - if ((ap->a_cnp->cn_flags & NODELETEBUSY) && !gofree) { + gofree = vnode_isinuse(vp, 0) ? 0 : 1; + if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && !gofree) { /* Caller requested Carbon delete semantics, but file is busy */ - vput(dvp); - vput(vp); - NFS_FREE_PNBUF(cnp); return (EBUSY); } if (gofree || (np->n_sillyrename && - VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && - vattr.va_nlink > 1)) { + nfs_getattr(vp, &nvattr, cred, p) == 0 && + nvattr.nva_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is @@ -2118,13 +2251,13 @@ nfs_remove(ap) * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ - error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); + error = nfs_vinvalbuf(vp, 0, cred, p, 1); np->n_size = 0; ubc_setsize(vp, (off_t)0); /* XXX check error */ /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, - cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); + cnp->cn_namelen, cred, p); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT @@ -2139,19 +2272,21 @@ nfs_remove(ap) * again if another object gets created with the same filehandle * before this vnode gets reclaimed */ + lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); np->n_flag &= ~NHASHED; + lck_mtx_unlock(nfs_node_hash_mutex); + } + if (!error && !np->n_sillyrename) { + /* clear flags now: won't get nfs_inactive for recycled vnode */ + /* clear all flags other than these */ + np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NHASHED); + vnode_recycle(vp); } } else if (!np->n_sillyrename) { - error = nfs_sillyrename(dvp, vp, cnp); + error = nfs_sillyrename(dvp, vp, cnp, cred, p); } - np->n_xid = 0; - vput(dvp); - - VOP_UNLOCK(vp, 0, cnp->cn_proc); - NFS_FREE_PNBUF(cnp); - ubc_uncache(vp); - vrele(vp); + NATTRINVALIDATE(np); return (error); } @@ -2160,12 +2295,9 @@ nfs_remove(ap) * nfs file remove rpc called from nfs_inactive */ int -nfs_removeit(sp) - register struct sillyrename *sp; +nfs_removeit(struct sillyrename *sp) { - - return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, - (struct proc *)0)); + return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* @@ -2173,41 +2305,42 @@ nfs_removeit(sp) */ static int nfs_removerpc(dvp, name, namelen, cred, proc) - register struct vnode *dvp; + vnode_t dvp; char *name; int namelen; - struct ucred *cred; - struct proc *proc; + kauth_cred_t cred; + proc_t proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; int v3; u_int64_t xid; - if (!VFSTONFS(dvp->v_mount)) + if (!VFSTONFS(vnode_mount(dvp))) return (ENXIO); v3 = NFS_ISV3(dvp); - nfsstats.rpccnt[NFSPROC_REMOVE]++; - nfsm_reqhead(dvp, NFSPROC_REMOVE, - NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_REMOVE]); nfsm_fhtom(dvp, v3); - nfsm_strtom(name, namelen, NFS_MAXNAMLEN); + nfsm_strtom(name, namelen, NFS_MAXNAMLEN, v3); nfsm_request(dvp, NFSPROC_REMOVE, proc, cred, &xid); if (v3 && mrep) - nfsm_wcc_data(dvp, premtime, wccpostattr, &xid); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &xid); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(dvp)); return (error); } @@ -2216,34 +2349,40 @@ nfs_removerpc(dvp, name, namelen, cred, proc) */ static int nfs_rename(ap) - struct vop_rename_args /* { - struct vnode *a_fdvp; - struct vnode *a_fvp; + struct vnop_rename_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_fdvp; + vnode_t a_fvp; struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; + vnode_t a_tdvp; + vnode_t a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ *ap; { - register struct vnode *fvp = ap->a_fvp; - register struct vnode *tvp = ap->a_tvp; - register struct vnode *fdvp = ap->a_fdvp; - register struct vnode *tdvp = ap->a_tdvp; - register struct componentname *tcnp = ap->a_tcnp; - register struct componentname *fcnp = ap->a_fcnp; + vnode_t fvp = ap->a_fvp; + vnode_t tvp = ap->a_tvp; + vnode_t fdvp = ap->a_fdvp; + vnode_t tdvp = ap->a_tdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; int error, inuse=0; + mount_t fmp, tdmp, tmp; + struct nfsnode *tnp; + kauth_cred_t cred; + proc_t p; + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + tnp = tvp ? VTONFS(tvp) : NULL; -#if DIAGNOSTIC - if ((tcnp->cn_flags & HASBUF) == 0 || - (fcnp->cn_flags & HASBUF) == 0) - panic("nfs_rename: no name"); -#endif /* Check for cross-device rename */ - if ((fvp->v_mount != tdvp->v_mount) || - (tvp && (fvp->v_mount != tvp->v_mount))) { + fmp = vnode_mount(fvp); + tmp = tvp ? vnode_mount(tvp) : NULL; + tdmp = vnode_mount(tdvp); + if ((fmp != tdmp) || (tvp && (fmp != tmp))) { error = EXDEV; - if (tvp) - VOP_UNLOCK(tvp, 0, tcnp->cn_proc); goto out; } @@ -2255,66 +2394,54 @@ nfs_rename(ap) * links or case-variants) */ if (tvp && tvp != fvp) { - if (UBCISVALID(tvp)) { - /* regular files */ - if (UBCINFOEXISTS(tvp)) - inuse = (ubc_isinuse(tvp, 1)) ? 1 : 0; - else { - /* dead or dying vnode.With vnode locking panic instead of error */ - error = EIO; - VOP_UNLOCK(tvp, 0, tcnp->cn_proc); - goto out; - } - } else { - /* UBC not in play */ - if (tvp->v_usecount > 1) - inuse = 1; - } + inuse = vnode_isinuse(tvp, 0); } - if (inuse && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR) { - if (error = nfs_sillyrename(tdvp, tvp, tcnp)) { + if (inuse && !tnp->n_sillyrename && vnode_vtype(tvp) != VDIR) { + if ((error = nfs_sillyrename(tdvp, tvp, tcnp, cred, p))) { /* sillyrename failed. Instead of pressing on, return error */ - VOP_UNLOCK(tvp, 0, tcnp->cn_proc); goto out; /* should not be ENOENT. */ } else { /* sillyrename succeeded.*/ - VOP_UNLOCK(tvp, 0, tcnp->cn_proc); - ubc_uncache(tvp); /* get the nfs turd file to disappear */ - vrele(tvp); tvp = NULL; } } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, - tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, - tcnp->cn_proc); + tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, cred, p); - if (!error && tvp && tvp != fvp && !VTONFS(tvp)->n_sillyrename) { + /* + * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. + */ + if (error == ENOENT) + error = 0; + + if (!error && tvp && tvp != fvp && !tnp->n_sillyrename) { /* * remove nfsnode from hash now so we can't accidentally find it * again if another object gets created with the same filehandle * before this vnode gets reclaimed */ - LIST_REMOVE(VTONFS(tvp), n_hash); - VTONFS(tvp)->n_flag &= ~NHASHED; + lck_mtx_lock(nfs_node_hash_mutex); + LIST_REMOVE(tnp, n_hash); + tnp->n_flag &= ~NHASHED; + lck_mtx_unlock(nfs_node_hash_mutex); } + /* purge the old name cache entries and enter the new one */ cache_purge(fvp); if (tvp) { cache_purge(tvp); - VOP_UNLOCK(tvp, 0, tcnp->cn_proc); - ubc_uncache(tvp); /* get the nfs turd file to disappear */ + if (!error && !tnp->n_sillyrename) { + /* clear flags now: won't get nfs_inactive for recycled vnode */ + /* clear all flags other than these */ + tnp->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NHASHED); + vnode_recycle(tvp); + } } - + if (!error) + cache_enter(tdvp, fvp, tcnp); + out: - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vrele(tvp); /* already unlocked */ - vrele(fdvp); - vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ @@ -2324,74 +2451,62 @@ out: } /* - * nfs file rename rpc called from nfs_remove() above - */ -static int -nfs_renameit(sdvp, scnp, sp) - struct vnode *sdvp; - struct componentname *scnp; - register struct sillyrename *sp; -{ - return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, - sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); -} - -/* - * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). + * Do an nfs rename rpc. Called from nfs_rename() and nfs_sillyrename(). */ static int nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) - register struct vnode *fdvp; + vnode_t fdvp; char *fnameptr; int fnamelen; - register struct vnode *tdvp; + vnode_t tdvp; char *tnameptr; int tnamelen; - struct ucred *cred; - struct proc *proc; + kauth_cred_t cred; + proc_t proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, fwccpostattr = 0, twccpostattr = 0; - time_t fpremtime = 0, tpremtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct timespec fpremtime = { 0, 0 }, tpremtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; int v3; u_int64_t xid; - if (!VFSTONFS(fdvp->v_mount)) + if (!VFSTONFS(vnode_mount(fdvp))) return (ENXIO); v3 = NFS_ISV3(fdvp); - nfsstats.rpccnt[NFSPROC_RENAME]++; - nfsm_reqhead(fdvp, NFSPROC_RENAME, - (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + + nfsm_reqhead((NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_RENAME]); nfsm_fhtom(fdvp, v3); - nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); + nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN, v3); nfsm_fhtom(tdvp, v3); - nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); + nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN, v3); nfsm_request(fdvp, NFSPROC_RENAME, proc, cred, &xid); if (v3 && mrep) { u_int64_t txid = xid; - nfsm_wcc_data(fdvp, fpremtime, fwccpostattr, &xid); - nfsm_wcc_data(tdvp, tpremtime, twccpostattr, &txid); + nfsm_wcc_data(fdvp, &fpremtime, fwccpostattr, &xid); + nfsm_wcc_data(tdvp, &tpremtime, twccpostattr, &txid); } nfsm_reqdone; VTONFS(fdvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(fdvp)->n_ncmtime == fpremtime) - VTONFS(fdvp)->n_ncmtime = VTONFS(fdvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(fdvp)->n_ncmtime, &fpremtime, ==)) + VTONFS(fdvp)->n_ncmtime = VTONFS(fdvp)->n_vattr.nva_mtime; if (!fwccpostattr) - VTONFS(fdvp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(fdvp)); VTONFS(tdvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(tdvp)->n_ncmtime == tpremtime) - VTONFS(tdvp)->n_ncmtime = VTONFS(tdvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(tdvp)->n_ncmtime, &tpremtime, ==)) + VTONFS(tdvp)->n_ncmtime = VTONFS(tdvp)->n_vattr.nva_mtime; if (!twccpostattr) - VTONFS(tdvp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(tdvp)); return (error); } @@ -2400,44 +2515,36 @@ nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) */ static int nfs_link(ap) - struct vop_link_args /* { - struct vnode *a_vp; - struct vnode *a_tdvp; + struct vnop_link_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vnode_t a_tdvp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct vnode *tdvp = ap->a_tdvp; - register struct componentname *cnp = ap->a_cnp; - register u_long *tl; - register caddr_t cp; - register long t1, t2; + vnode_t vp = ap->a_vp; + vnode_t tdvp = ap->a_tdvp; + struct componentname *cnp = ap->a_cnp; + u_long *tl; + caddr_t cp; + long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0, attrflag = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - int v3, didhold; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; + int v3; u_int64_t xid; + kauth_cred_t cred; + proc_t p; - if (vp->v_mount != tdvp->v_mount) { - VOP_ABORTOP(vp, cnp); - vput(tdvp); + if (vnode_mount(vp) != vnode_mount(tdvp)) { return (EXDEV); } - /* need to get vnode lock for vp before calling VOP_FSYNC() */ - if (error = vn_lock(vp, LK_EXCLUSIVE, cnp->cn_proc)) { - VOP_ABORTOP(vp, cnp); - vput(tdvp); - return (error); - } + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); - if (!VFSTONFS(vp->v_mount)) { - VOP_UNLOCK(vp, 0, cnp->cn_proc); - VOP_ABORTOP(vp, cnp); - vput(tdvp); - return (ENXIO); - } v3 = NFS_ISV3(vp); /* @@ -2445,37 +2552,32 @@ nfs_link(ap) * doesn't get "out of sync" with the server. * XXX There should be a better way! */ - didhold = ubc_hold(vp); - VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); - VOP_UNLOCK(vp, 0, cnp->cn_proc); + nfs_flush(vp, MNT_WAIT, cred, p, 0); - nfsstats.rpccnt[NFSPROC_LINK]++; - nfsm_reqhead(vp, NFSPROC_LINK, - NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + nfsm_reqhead(NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_LINK]); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); - nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); - nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN, v3); + nfsm_request(vp, NFSPROC_LINK, p, cred, &xid); if (v3 && mrep) { u_int64_t txid = xid; - nfsm_postop_attr(vp, attrflag, &xid); - nfsm_wcc_data(tdvp, premtime, wccpostattr, &txid); + nfsm_postop_attr_update(vp, v3, attrflag, &xid); + nfsm_wcc_data(tdvp, &premtime, wccpostattr, &txid); } nfsm_reqdone; VTONFS(tdvp)->n_flag |= NMODIFIED; if (!attrflag) - VTONFS(vp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(vp)); /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(tdvp)->n_ncmtime == premtime) - VTONFS(tdvp)->n_ncmtime = VTONFS(tdvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(tdvp)->n_ncmtime, &premtime, ==)) + VTONFS(tdvp)->n_ncmtime = VTONFS(tdvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(tdvp)->n_xid = 0; - if (didhold) - ubc_rele(vp); - vput(tdvp); - NFS_FREE_PNBUF(cnp); + NATTRINVALIDATE(VTONFS(tdvp)); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ @@ -2489,76 +2591,127 @@ nfs_link(ap) */ static int nfs_symlink(ap) - struct vop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; + struct vnop_symlink_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; char *a_target; + vfs_context_t a_context; } */ *ap; { - register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct nfsv2_sattr *sp; - register struct nfsv3_sattr *sp3; - register u_long *tl; - register caddr_t cp; - register long t1, t2; + vnode_t dvp = ap->a_dvp; + struct vnode_attr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct nfs_vattr nvattr; + struct nfsv2_sattr *sp; + u_long *tl; + caddr_t cp; + long t1, t2; caddr_t bpos, dpos, cp2; - int slen, error = 0, wccpostattr = 0, gotvp; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - struct vnode *newvp = (struct vnode *)0; + int slen, error = 0, wccpostattr = 0, gotvp = 0; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; + vnode_t newvp = (vnode_t)0; int v3 = NFS_ISV3(dvp); + int gotuid, gotgid; u_int64_t xid; + kauth_cred_t cred; + proc_t p; + struct nfsnode *np = NULL; + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); - nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); - nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + + nfsm_reqhead(NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); + if (error) + return (error); + + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_data_size); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + gotuid = VATTR_IS_ACTIVE(vap, va_uid); + gotgid = VATTR_IS_ACTIVE(vap, va_gid); + + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_SYMLINK]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN, v3); if (v3) { - nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); - nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, - cnp->cn_cred->cr_gid); + nfsm_v3sattr(vap); } - nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); + nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN, v3); if (!v3) { + struct timespec neg1time = { -1, -1 }; nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); - sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); - sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); - sp->sa_size = -1; - txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); - txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + sp->sa_mode = vtonfsv2_mode(VLNK, + (VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600)); + sp->sa_uid = gotuid ? (u_long)txdr_unsigned(vap->va_uid) : nfs_xdrneg1; + sp->sa_gid = gotgid ? (u_long)txdr_unsigned(vap->va_gid) : nfs_xdrneg1; + sp->sa_size = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + txdr_nfsv2time(&vap->va_access_time, &sp->sa_atime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_atime); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + txdr_nfsv2time(&vap->va_modify_time, &sp->sa_mtime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_mtime); + } } - nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_request(dvp, NFSPROC_SYMLINK, p, cred, &xid); if (v3 && mrep) { u_int64_t dxid = xid; if (!error) - nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); - nfsm_wcc_data(dvp, premtime, wccpostattr, &dxid); + nfsm_mtofh(dvp, cnp, newvp, v3, &xid, gotvp); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &dxid); } nfsm_reqdone; - if (newvp) - vput(newvp); VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; - vput(dvp); - NFS_FREE_PNBUF(cnp); + NATTRINVALIDATE(VTONFS(dvp)); + /* - * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. + * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry + * if we can succeed in looking up the symlink. */ - if (error == EEXIST) - error = 0; + if ((error == EEXIST) || (!error && !gotvp)) { + if (newvp) { + vnode_put(newvp); + newvp = NULL; + } + error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cred, p, &np); + if (!error) { + newvp = NFSTOV(np); + if (vnode_vtype(newvp) != VLNK) + error = EEXIST; + } + } + if (!error && (gotuid || gotgid) && + (!newvp || nfs_getattrcache(newvp, &nvattr) || + (gotuid && (nvattr.nva_uid != vap->va_uid)) || + (gotgid && (nvattr.nva_gid != vap->va_gid)))) { + /* clear ID bits if server didn't use them (or we can't tell) */ + VATTR_CLEAR_SUPPORTED(vap, va_uid); + VATTR_CLEAR_SUPPORTED(vap, va_gid); + } + if (error) { + if (newvp) + vnode_put(newvp); + } else { + *ap->a_vpp = newvp; + } return (error); } @@ -2567,96 +2720,121 @@ nfs_symlink(ap) */ static int nfs_mkdir(ap) - struct vop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; + struct vnop_mkdir_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct nfsv2_sattr *sp; - register struct nfsv3_sattr *sp3; - register u_long *tl; - register caddr_t cp; - register long t1, t2; - register int len; + vnode_t dvp = ap->a_dvp; + struct vnode_attr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct nfs_vattr nvattr; + struct nfsv2_sattr *sp; + u_long *tl; + caddr_t cp; + long t1, t2; + int len; struct nfsnode *np = (struct nfsnode *)0; - struct vnode *newvp = (struct vnode *)0; + vnode_t newvp = (vnode_t)0; caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0; - time_t premtime = 0; + struct timespec premtime = { 0, 0 }; int gotvp = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - struct vattr vattr; + mbuf_t mreq, mrep, md, mb, mb2; int v3 = NFS_ISV3(dvp); + int gotuid, gotgid; u_int64_t xid, dxid; + kauth_cred_t cred; + proc_t p; + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); - if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) { - VOP_ABORTOP(dvp, cnp); - vput(dvp); - return (error); - } len = cnp->cn_namelen; - nfsstats.rpccnt[NFSPROC_MKDIR]++; - nfsm_reqhead(dvp, NFSPROC_MKDIR, - NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); + if (error) + return (error); + + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + VATTR_SET_SUPPORTED(vap, va_data_size); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + gotuid = VATTR_IS_ACTIVE(vap, va_uid); + gotgid = VATTR_IS_ACTIVE(vap, va_gid); + + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_MKDIR]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); + nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN, v3); if (v3) { - nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); - nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); + nfsm_v3sattr(vap); } else { + struct timespec neg1time = { -1, -1 }; nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); - sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); - sp->sa_gid = txdr_unsigned(vattr.va_gid); - sp->sa_size = -1; - txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); - txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); + sp->sa_mode = vtonfsv2_mode(VDIR, + (VATTR_IS_ACTIVE(vap, va_mode) ? vap->va_mode : 0600)); + sp->sa_uid = gotuid ? (u_long)txdr_unsigned(vap->va_uid) : nfs_xdrneg1; + sp->sa_gid = gotgid ? (u_long)txdr_unsigned(vap->va_gid) : nfs_xdrneg1; + sp->sa_size = nfs_xdrneg1; + if (VATTR_IS_ACTIVE(vap, va_access_time)) { + txdr_nfsv2time(&vap->va_access_time, &sp->sa_atime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_atime); + } + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { + txdr_nfsv2time(&vap->va_modify_time, &sp->sa_mtime); + } else { + txdr_nfsv2time(&neg1time, &sp->sa_mtime); + } } - nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_request(dvp, NFSPROC_MKDIR, p, cred, &xid); dxid = xid; if (!error) - nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); + nfsm_mtofh(dvp, cnp, newvp, v3, &xid, gotvp); if (v3 && mrep) - nfsm_wcc_data(dvp, premtime, wccpostattr, &dxid); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &dxid); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(dvp)); /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ if (error == EEXIST || (!error && !gotvp)) { if (newvp) { - vput(newvp); - newvp = (struct vnode *)0; + vnode_put(newvp); + newvp = NULL; } - error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, - cnp->cn_proc, &np); + error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cred, p, &np); if (!error) { newvp = NFSTOV(np); - if (newvp->v_type != VDIR) + if (vnode_vtype(newvp) != VDIR) error = EEXIST; } } + if (!error && (gotuid || gotgid) && + (!newvp || nfs_getattrcache(newvp, &nvattr) || + (gotuid && (nvattr.nva_uid != vap->va_uid)) || + (gotgid && (nvattr.nva_gid != vap->va_gid)))) { + /* clear ID bits if server didn't use them (or we can't tell) */ + VATTR_CLEAR_SUPPORTED(vap, va_uid); + VATTR_CLEAR_SUPPORTED(vap, va_gid); + } if (error) { if (newvp) - vput(newvp); + vnode_put(newvp); } else { - if (cnp->cn_flags & MAKEENTRY) - cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } - vput(dvp); - NFS_FREE_PNBUF(cnp); return (error); } @@ -2665,40 +2843,48 @@ nfs_mkdir(ap) */ static int nfs_rmdir(ap) - struct vop_rmdir_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; + struct vnop_rmdir_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; - register struct componentname *cnp = ap->a_cnp; - register u_long *tl; - register caddr_t cp; - register long t1, t2; + vnode_t vp = ap->a_vp; + vnode_t dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + u_long *tl; + caddr_t cp; + long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; int v3 = NFS_ISV3(dvp); u_int64_t xid; + kauth_cred_t cred; + proc_t p; - nfsstats.rpccnt[NFSPROC_RMDIR]++; - nfsm_reqhead(dvp, NFSPROC_RMDIR, - NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_RMDIR]); nfsm_fhtom(dvp, v3); - nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); - nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred, &xid); + nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN, v3); + nfsm_request(dvp, NFSPROC_RMDIR, p, cred, &xid); if (v3 && mrep) - nfsm_wcc_data(dvp, premtime, wccpostattr, &xid); + nfsm_wcc_data(dvp, &premtime, wccpostattr, &xid); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ - if (VTONFS(dvp)->n_ncmtime == premtime) - VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.va_mtime.tv_sec; + if (nfstimespeccmp(&VTONFS(dvp)->n_ncmtime, &premtime, ==)) + VTONFS(dvp)->n_ncmtime = VTONFS(dvp)->n_vattr.nva_mtime; if (!wccpostattr) - VTONFS(dvp)->n_xid = 0; + NATTRINVALIDATE(VTONFS(dvp)); cache_purge(vp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. @@ -2711,12 +2897,11 @@ nfs_rmdir(ap) * again if another object gets created with the same filehandle * before this vnode gets reclaimed */ + lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(VTONFS(vp), n_hash); VTONFS(vp)->n_flag &= ~NHASHED; + lck_mtx_unlock(nfs_node_hash_mutex); } - vput(vp); - vput(dvp); - NFS_FREE_PNBUF(cnp); return (error); } @@ -2725,36 +2910,41 @@ nfs_rmdir(ap) */ static int nfs_readdir(ap) - struct vop_readdir_args /* { - struct vnode *a_vp; + struct vnop_readdir_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long **a_cookies; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - register struct uio *uio = ap->a_uio; + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct uio *uio = ap->a_uio; int tresid, error; - struct vattr vattr; + struct nfs_vattr nvattr; + kauth_cred_t cred; + proc_t p; - if (vp->v_type != VDIR) + if (vnode_vtype(vp) != VDIR) return (EPERM); + + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); + /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { - if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { - if (NQNFS_CKCACHABLE(vp, ND_READ)) { - nfsstats.direofcache_hits++; - return (0); - } - } else if (!VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp)) { - if (np->n_mtime == vattr.va_mtime.tv_sec) { - nfsstats.direofcache_hits++; + if (!nfs_getattr(vp, &nvattr, cred, p)) { + if (nfstimespeccmp(&np->n_mtime, &nvattr.nva_mtime, ==)) { + OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_hits); return (0); } - if (np->n_ncmtime != vattr.va_mtime.tv_sec) { + if (nfstimespeccmp(&np->n_ncmtime, &nvattr.nva_mtime, !=)) { /* directory changed, purge any name cache entries */ cache_purge(vp); } @@ -2764,11 +2954,12 @@ nfs_readdir(ap) /* * Call nfs_bioread() to do the real work. */ - tresid = uio->uio_resid; - error = nfs_bioread(vp, uio, 0, ap->a_cred, 0); + // LP64todo - fix this + tresid = uio_uio_resid(uio); + error = nfs_bioread(vp, uio, 0, cred, p); - if (!error && uio->uio_resid == tresid) - nfsstats.direofcache_misses++; + if (!error && uio_uio_resid(uio) == tresid) + OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_misses); return (error); } @@ -2777,20 +2968,20 @@ nfs_readdir(ap) * Called from below the buffer cache by nfs_doio(). */ int -nfs_readdirrpc(vp, uiop, cred) - struct vnode *vp; - register struct uio *uiop; - struct ucred *cred; - +nfs_readdirrpc( + vnode_t vp, + struct uio *uiop, + kauth_cred_t cred, + proc_t p) { - register int len, left; + register int len, skiplen, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + mbuf_t mreq, mrep, md, mb, mb2; nfsuint64 cookie; struct nfsmount *nmp; struct nfsnode *dnp = VTONFS(vp); @@ -2805,10 +2996,10 @@ nfs_readdirrpc(vp, uiop, cred) #endif #if DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) || - (uiop->uio_resid & (NFS_DIRBLKSIZ - 1))) + (uio_uio_resid(uiop) & (NFS_DIRBLKSIZ - 1))) panic("nfs_readdirrpc: bad uio"); #endif - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); v3 = NFS_ISV3(vp); @@ -2828,9 +3019,10 @@ nfs_readdirrpc(vp, uiop, cred) * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { - nfsstats.rpccnt[NFSPROC_READDIR]++; - nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + - NFSX_READDIR(v3)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_READDIR(v3)); + if (error) + goto nfsmout; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_READDIR]); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); @@ -2843,17 +3035,17 @@ nfs_readdirrpc(vp, uiop, cred) *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmreaddirsize); - nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred, &xid); + nfsm_request(vp, NFSPROC_READDIR, p, cred, &xid); if (v3) { if (mrep) { - nfsm_postop_attr(vp, attrflag, &xid); + nfsm_postop_attr_update(vp, v3, attrflag, &xid); } if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; } else { - m_freem(mrep); + mbuf_freem(mrep); goto nfsmout; } } else if (!mrep) { @@ -2874,27 +3066,36 @@ nfs_readdirrpc(vp, uiop, cred) fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } - if (len <= 0 || len > NFS_MAXNAMLEN) { + /* Note: v3 supports longer names, but struct dirent doesn't */ + /* so we just truncate the names to fit */ + if (len <= 0) { error = EBADRPC; - m_freem(mrep); + mbuf_freem(mrep); goto nfsmout; } + if (len > MAXNAMLEN) { + skiplen = len - MAXNAMLEN; + len = MAXNAMLEN; + } else { + skiplen = 0; + } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ) > left) { + if ((tlen + (int)DIRHDSIZ) > left) { dp->d_reclen += left; - uiop->uio_iov->iov_base += left; - uiop->uio_iov->iov_len -= left; + uio_iov_base_add(uiop, left); + uio_iov_len_add(uiop, -left); uiop->uio_offset += left; - uiop->uio_resid -= left; + uio_uio_resid_add(uiop, -left); blksiz = 0; } - if ((tlen + DIRHDSIZ) > uiop->uio_resid) + if ((tlen + (int)DIRHDSIZ) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { - dp = (struct dirent *)uiop->uio_iov->iov_base; + // LP64todo - fix this! + dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; @@ -2903,19 +3104,28 @@ nfs_readdirrpc(vp, uiop, cred) if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; - uiop->uio_resid -= DIRHDSIZ; - uiop->uio_iov->iov_base += DIRHDSIZ; - uiop->uio_iov->iov_len -= DIRHDSIZ; +#if LP64KERN + uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ)); + uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ)); +#else + uio_uio_resid_add(uiop, -((int)DIRHDSIZ)); + uio_iov_len_add(uiop, -((int)DIRHDSIZ)); +#endif + uio_iov_base_add(uiop, DIRHDSIZ); nfsm_mtouio(uiop, len); - cp = uiop->uio_iov->iov_base; + // LP64todo - fix this! + cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); tlen -= len; *cp = '\0'; /* null terminate */ - uiop->uio_iov->iov_base += tlen; - uiop->uio_iov->iov_len -= tlen; + uio_iov_base_add(uiop, tlen); + uio_iov_len_add(uiop, -tlen); uiop->uio_offset += tlen; - uiop->uio_resid -= tlen; - } else + uio_uio_resid_add(uiop, -tlen); + } else { nfsm_adv(nfsm_rndup(len)); + } + if (skiplen) + nfsm_adv(nfsm_rndup(skiplen)); if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); } else { @@ -2938,7 +3148,7 @@ nfs_readdirrpc(vp, uiop, cred) nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } - m_freem(mrep); + mbuf_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ @@ -2947,10 +3157,10 @@ nfs_readdirrpc(vp, uiop, cred) if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; - uiop->uio_iov->iov_base += left; - uiop->uio_iov->iov_len -= left; + uio_iov_base_add(uiop, left); + uio_iov_len_add(uiop, -left); uiop->uio_offset += left; - uiop->uio_resid -= left; + uio_uio_resid_add(uiop, -left); } /* @@ -2960,10 +3170,11 @@ nfs_readdirrpc(vp, uiop, cred) if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { - if (uiop->uio_resid > 0) + if (uio_uio_resid(uiop) > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); - *cookiep = cookie; + if (cookiep) + *cookiep = cookie; } nfsmout: return (error); @@ -2973,46 +3184,47 @@ nfsmout: * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int -nfs_readdirplusrpc(vp, uiop, cred) - struct vnode *vp; - register struct uio *uiop; - struct ucred *cred; +nfs_readdirplusrpc( + vnode_t vp, + struct uio *uiop, + kauth_cred_t cred, + proc_t p) { - register int len, left; - register struct dirent *dp; - register u_long *tl; - register caddr_t cp; - register long t1, t2; - register struct vnode *newvp; - register nfsuint64 *cookiep; - caddr_t bpos, dpos, cp2, dpossav1, dpossav2; - struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; - struct nameidata nami, *ndp = &nami; - struct componentname *cnp = &ndp->ni_cnd; + int len, skiplen, left; + struct dirent *dp; + u_long *tl; + caddr_t cp; + long t1, t2; + vnode_t newvp; + nfsuint64 *cookiep; + caddr_t bpos, dpos, cp2; + mbuf_t mreq, mrep, md, mb, mb2; + struct componentname cn, *cnp = &cn; nfsuint64 cookie; struct nfsmount *nmp; struct nfsnode *dnp = VTONFS(vp), *np; - nfsfh_t *fhp; + u_char *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize, nmreaddirsize, nmrsize; u_int64_t xid, savexid; + struct nfs_vattr nvattr; #ifndef nolint dp = (struct dirent *)0; #endif #if DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || - (uiop->uio_resid & (DIRBLKSIZ - 1))) + (uio_uio_resid(uiop) & (DIRBLKSIZ - 1))) panic("nfs_readdirplusrpc: bad uio"); #endif - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) return (ENXIO); nmreaddirsize = nmp->nm_readdirsize; nmrsize = nmp->nm_rsize; - ndp->ni_dvp = vp; + bzero(cnp, sizeof(*cnp)); newvp = NULLVP; /* @@ -3029,9 +3241,10 @@ nfs_readdirplusrpc(vp, uiop, cred) * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { - nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; - nfsm_reqhead(vp, NFSPROC_READDIRPLUS, - NFSX_FH(1) + 6 * NFSX_UNSIGNED); + nfsm_reqhead(NFSX_FH(1) + 6 * NFSX_UNSIGNED); + if (error) + goto nfsmout; + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_READDIRPLUS]); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; @@ -3040,14 +3253,13 @@ nfs_readdirplusrpc(vp, uiop, cred) *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmreaddirsize); *tl = txdr_unsigned(nmrsize); - nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred, - &xid); + nfsm_request(vp, NFSPROC_READDIRPLUS, p, cred, &xid); savexid = xid; if (mrep) { - nfsm_postop_attr(vp, attrflag, &xid); + nfsm_postop_attr_update(vp, 1, attrflag, &xid); } if (error) { - m_freem(mrep); + mbuf_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); @@ -3060,27 +3272,36 @@ nfs_readdirplusrpc(vp, uiop, cred) nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); - if (len <= 0 || len > NFS_MAXNAMLEN) { + /* Note: v3 supports longer names, but struct dirent doesn't */ + /* so we just truncate the names to fit */ + if (len <= 0) { error = EBADRPC; - m_freem(mrep); + mbuf_freem(mrep); goto nfsmout; } + if (len > MAXNAMLEN) { + skiplen = len - MAXNAMLEN; + len = MAXNAMLEN; + } else { + skiplen = 0; + } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ) > left) { + if ((tlen + (int)DIRHDSIZ) > left) { dp->d_reclen += left; - uiop->uio_iov->iov_base += left; - uiop->uio_iov->iov_len -= left; + uio_iov_base_add(uiop, left); + uio_iov_len_add(uiop, -left); uiop->uio_offset += left; - uiop->uio_resid -= left; + uio_uio_resid_add(uiop, -left); blksiz = 0; } - if ((tlen + DIRHDSIZ) > uiop->uio_resid) + if ((tlen + (int)DIRHDSIZ) > uio_uio_resid(uiop)) bigenough = 0; if (bigenough) { - dp = (struct dirent *)uiop->uio_iov->iov_base; + // LP64todo - fix this! + dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; @@ -3089,21 +3310,30 @@ nfs_readdirplusrpc(vp, uiop, cred) if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; - uiop->uio_resid -= DIRHDSIZ; - uiop->uio_iov->iov_base += DIRHDSIZ; - uiop->uio_iov->iov_len -= DIRHDSIZ; - cnp->cn_nameptr = uiop->uio_iov->iov_base; +#if LP64KERN + uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ)); + uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ)); +#else + uio_uio_resid_add(uiop, -((int)DIRHDSIZ)); + uio_iov_len_add(uiop, -((int)DIRHDSIZ)); +#endif + uio_iov_base_add(uiop, DIRHDSIZ); + // LP64todo - fix this! + cnp->cn_nameptr = CAST_DOWN(caddr_t, uio_iov_base(uiop)); cnp->cn_namelen = len; nfsm_mtouio(uiop, len); - cp = uiop->uio_iov->iov_base; + cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); tlen -= len; *cp = '\0'; - uiop->uio_iov->iov_base += tlen; - uiop->uio_iov->iov_len -= tlen; + uio_iov_base_add(uiop, tlen); + uio_iov_len_add(uiop, -tlen); uiop->uio_offset += tlen; - uiop->uio_resid -= tlen; - } else + uio_uio_resid_add(uiop, -tlen); + } else { nfsm_adv(nfsm_rndup(len)); + } + if (skiplen) + nfsm_adv(nfsm_rndup(skiplen)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; @@ -3118,56 +3348,54 @@ nfs_readdirplusrpc(vp, uiop, cred) */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { - dpossav1 = dpos; - mdsav1 = md; - nfsm_adv(NFSX_V3FATTR); + /* grab attributes */ + nfsm_attr_get(1, &nvattr); + dp->d_type = IFTODT(VTTOIF(nvattr.nva_type)); + /* check for file handle */ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { - VREF(vp); - newvp = vp; - np = dnp; + error = vnode_ref(vp); + if (error) { + doit = 0; + } else { + newvp = vp; + np = dnp; + } } else if (!bigenough || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')) { /* + * XXXmacko I don't think this ".." thing is a problem anymore. * don't doit if we can't guarantee * that this entry is NOT ".." because * we would have to drop the lock on * the directory before getting the - * (lock on) the ".." vnode... and we + * lock on the ".." vnode... and we * don't want to drop the dvp lock in * the middle of a readdirplus. */ doit = 0; } else { - if ((error = nfs_nget(vp->v_mount, fhp, - fhsize, &np))) + cnp->cn_hash = 0; + + error = nfs_nget(vnode_mount(vp), vp, cnp, + fhp, fhsize, &nvattr, &xid, + NG_MAKEENTRY, &np); + if (error) doit = 0; else newvp = NFSTOV(np); } } - if (doit && bigenough) { - dpossav2 = dpos; - dpos = dpossav1; - mdsav2 = md; - md = mdsav1; + /* update attributes if not already updated */ + if (doit && bigenough && (np->n_xid <= savexid)) { xid = savexid; - nfsm_loadattr(newvp, (struct vattr *)0, &xid); - dpos = dpossav2; - md = mdsav2; - dp->d_type = - IFTODT(VTTOIF(np->n_vattr.va_type)); - ndp->ni_vp = newvp; - cnp->cn_hash = 0; - for (cp = cnp->cn_nameptr, i = 1; i <= len; - i++, cp++) - cnp->cn_hash += (unsigned char)*cp * i; - cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); + nfs_loadattrcache(np, &nvattr, &xid, 0); + /* any error can be ignored */ } } else { /* Just skip over the file handle */ @@ -3177,9 +3405,9 @@ nfs_readdirplusrpc(vp, uiop, cred) } if (newvp != NULLVP) { if (newvp == vp) - vrele(newvp); + vnode_rele(newvp); else - vput(newvp); + vnode_put(newvp); newvp = NULLVP; } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); @@ -3192,7 +3420,7 @@ nfs_readdirplusrpc(vp, uiop, cred) nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } - m_freem(mrep); + mbuf_freem(mrep); } /* * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ @@ -3201,10 +3429,10 @@ nfs_readdirplusrpc(vp, uiop, cred) if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; - uiop->uio_iov->iov_base += left; - uiop->uio_iov->iov_len -= left; + uio_iov_base_add(uiop, left); + uio_iov_len_add(uiop, -left); uiop->uio_offset += left; - uiop->uio_resid -= left; + uio_uio_resid_add(uiop, -left); } /* @@ -3214,19 +3442,13 @@ nfs_readdirplusrpc(vp, uiop, cred) if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { - if (uiop->uio_resid > 0) + if (uio_uio_resid(uiop) > 0) printf("EEK! readdirplusrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); - *cookiep = cookie; + if (cookiep) + *cookiep = cookie; } nfsmout: - if (newvp != NULLVP) { - if (newvp == vp) - vrele(newvp); - else - vput(newvp); - newvp = NULLVP; - } return (error); } @@ -3244,37 +3466,44 @@ nfsmout: static char sillyrename_name[] = ".nfsAAA%04x4.4"; static int -nfs_sillyrename(dvp, vp, cnp) - struct vnode *dvp, *vp; - struct componentname *cnp; +nfs_sillyrename( + vnode_t dvp, + vnode_t vp, + struct componentname *cnp, + kauth_cred_t cred, + proc_t p) { register struct sillyrename *sp; struct nfsnode *np; int error; short pid; - struct ucred *cred; + kauth_cred_t tmpcred; int i, j, k; cache_purge(vp); np = VTONFS(vp); #if DIAGNOSTIC - if (vp->v_type == VDIR) + if (vnode_vtype(vp) == VDIR) panic("nfs_sillyrename: dir"); #endif MALLOC_ZONE(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); - sp->s_cred = crdup(cnp->cn_cred); + if (!sp) + return (ENOMEM); + kauth_cred_ref(cred); + sp->s_cred = cred; sp->s_dvp = dvp; - VREF(dvp); + error = vnode_ref(dvp); + if (error) + goto bad_norele; /* Fudge together a funny name */ - pid = cnp->cn_proc->p_pid; + pid = proc_pid(p); sp->s_namlen = sprintf(sp->s_name, sillyrename_name, pid); /* Try lookitups until we get one that isn't there */ i = j = k = 0; - while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, - cnp->cn_proc, (struct nfsnode **)0) == 0) { + while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, p, NULL) == 0) { if (sp->s_name[4]++ >= 'z') sp->s_name[4] = 'A'; if (++i > ('z' - 'A' + 1)) { @@ -3302,10 +3531,11 @@ nfs_sillyrename(dvp, vp, cnp) } } /* now, do the rename */ - if ((error = nfs_renameit(dvp, cnp, sp))) + error = nfs_renamerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, + dvp, sp->s_name, sp->s_namlen, sp->s_cred, p); + if (error) goto bad; - error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, - cnp->cn_proc, &np); + error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, p, &np); #if DIAGNOSTIC kprintf("sillyrename: %s, vp=%x, np=%x, dvp=%x\n", &sp->s_name[0], (unsigned)vp, (unsigned)np, (unsigned)dvp); @@ -3313,10 +3543,11 @@ nfs_sillyrename(dvp, vp, cnp) np->n_sillyrename = sp; return (0); bad: - vrele(sp->s_dvp); - cred = sp->s_cred; + vnode_rele(sp->s_dvp); +bad_norele: + tmpcred = sp->s_cred; sp->s_cred = NOCRED; - crfree(cred); + kauth_cred_rele(tmpcred); FREE_ZONE((caddr_t)sp, sizeof (struct sillyrename), M_NFSREQ); return (error); } @@ -3331,81 +3562,119 @@ bad: */ static int nfs_lookitup(dvp, name, len, cred, procp, npp) - register struct vnode *dvp; + vnode_t dvp; char *name; int len; - struct ucred *cred; - struct proc *procp; + kauth_cred_t cred; + proc_t procp; struct nfsnode **npp; { - register u_long *tl; - register caddr_t cp; - register long t1, t2; - struct vnode *newvp = (struct vnode *)0; + u_long *tl; + caddr_t cp; + long t1, t2; + vnode_t newvp = (vnode_t)0; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos, cp2; int error = 0, fhlen, attrflag; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; - nfsfh_t *nfhp; + mbuf_t mreq, mrep, md, mb, mb2; + u_char *nfhp; int v3; - u_int64_t xid; + u_int64_t xid, dxid, savedxid; + struct nfs_vattr nvattr; - if (!VFSTONFS(dvp->v_mount)) + if (!VFSTONFS(vnode_mount(dvp))) return (ENXIO); v3 = NFS_ISV3(dvp); - nfsstats.rpccnt[NFSPROC_LOOKUP]++; - nfsm_reqhead(dvp, NFSPROC_LOOKUP, - NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + nfsm_reqhead(NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_LOOKUP]); nfsm_fhtom(dvp, v3); - nfsm_strtom(name, len, NFS_MAXNAMLEN); + nfsm_strtom(name, len, NFS_MAXNAMLEN, v3); nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred, &xid); if (npp && !error) { + savedxid = xid; nfsm_getfh(nfhp, fhlen, v3); + /* get attributes */ + if (v3) { + nfsm_postop_attr_get(v3, attrflag, &nvattr); + if (!attrflag) { + /* We need valid attributes in order */ + /* to call nfs_nget/vnode_create(). */ + error = nfs_getattr_no_vnode(vnode_mount(dvp), + nfhp, fhlen, cred, procp, &nvattr, &xid); + if (error) { + mbuf_freem(mrep); + goto nfsmout; + } + } + dxid = savedxid; + nfsm_postop_attr_update(dvp, v3, attrflag, &dxid); + } else { + nfsm_attr_get(v3, &nvattr); + } if (*npp) { np = *npp; - if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { - FREE_ZONE((caddr_t)np->n_fhp, - np->n_fhsize, M_NFSBIGFH); - np->n_fhp = &np->n_fh; - } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) - MALLOC_ZONE(np->n_fhp, nfsfh_t *, - fhlen, M_NFSBIGFH, M_WAITOK); - bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); + if (fhlen != np->n_fhsize) { + u_char *oldbuf = (np->n_fhsize > NFS_SMALLFH) ? np->n_fhp : NULL; + if (fhlen > NFS_SMALLFH) { + MALLOC_ZONE(np->n_fhp, u_char *, fhlen, M_NFSBIGFH, M_WAITOK); + if (!np->n_fhp) { + np->n_fhp = oldbuf; + error = ENOMEM; + mbuf_freem(mrep); + goto nfsmout; + } + } else { + np->n_fhp = &np->n_fh[0]; + } + if (oldbuf) { + FREE_ZONE(oldbuf, np->n_fhsize, M_NFSBIGFH); + } + } + bcopy(nfhp, np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); + error = nfs_loadattrcache(np, &nvattr, &xid, 0); + if (error) { + mbuf_freem(mrep); + goto nfsmout; + } } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { - VREF(dvp); newvp = dvp; + if (dnp->n_xid <= savedxid) { + dxid = savedxid; + error = nfs_loadattrcache(dnp, &nvattr, &dxid, 0); + if (error) { + mbuf_freem(mrep); + goto nfsmout; + } + } } else { - error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); + struct componentname cn, *cnp = &cn; + bzero(cnp, sizeof(*cnp)); + cnp->cn_nameptr = name; + cnp->cn_namelen = len; + + error = nfs_nget(vnode_mount(dvp), dvp, cnp, nfhp, fhlen, + &nvattr, &xid, NG_MAKEENTRY, &np); if (error) { - m_freem(mrep); + mbuf_freem(mrep); return (error); } newvp = NFSTOV(np); } - if (v3) { - nfsm_postop_attr(newvp, attrflag, &xid); - if (!attrflag && *npp == NULL) { - m_freem(mrep); - if (newvp == dvp) - vrele(newvp); - else - vput(newvp); - return (ENOENT); - } - } else - nfsm_loadattr(newvp, (struct vattr *)0, &xid); } nfsm_reqdone; if (npp && *npp == NULL) { if (error) { - if (newvp) + if (newvp) { if (newvp == dvp) - vrele(newvp); + vnode_rele(newvp); else - vput(newvp); + vnode_put(newvp); + } } else *npp = np; } @@ -3416,38 +3685,40 @@ nfs_lookitup(dvp, name, len, cred, procp, npp) * Nfs Version 3 commit rpc */ int -nfs_commit(vp, offset, cnt, cred, procp) - register struct vnode *vp; +nfs_commit(vp, offset, count, cred, procp) + vnode_t vp; u_quad_t offset; - int cnt; - struct ucred *cred; - struct proc *procp; + u_int32_t count; + kauth_cred_t cred; + proc_t procp; { - register caddr_t cp; - register u_long *tl; - register int t1, t2; - register struct nfsmount *nmp = VFSTONFS(vp->v_mount); + caddr_t cp; + u_long *tl; + int t1, t2; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); caddr_t bpos, dpos, cp2; int error = 0, wccpostattr = 0; - time_t premtime = 0; - struct mbuf *mreq, *mrep, *md, *mb, *mb2; + struct timespec premtime = { 0, 0 }; + mbuf_t mreq, mrep, md, mb, mb2; u_int64_t xid; - FSDBG(521, vp, offset, cnt, nmp->nm_state); + FSDBG(521, vp, offset, count, nmp->nm_state); if (!nmp) return (ENXIO); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) return (0); - nfsstats.rpccnt[NFSPROC_COMMIT]++; - nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); + nfsm_reqhead(NFSX_FH(1)); + if (error) + return (error); + OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[NFSPROC_COMMIT]); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); txdr_hyper(&offset, tl); tl += 2; - *tl = txdr_unsigned(cnt); + *tl = txdr_unsigned(count); nfsm_request(vp, NFSPROC_COMMIT, procp, cred, &xid); if (mrep) { - nfsm_wcc_data(vp, premtime, wccpostattr, &xid); + nfsm_wcc_data(vp, &premtime, wccpostattr, &xid); /* XXX can we do anything useful with the wcc info? */ } if (!error) { @@ -3464,34 +3735,19 @@ nfs_commit(vp, offset, cnt, cred, procp) } static int -nfs_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; - int *a_runb; - } */ *ap; +nfs_blockmap( + __unused struct vnop_blockmap_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + off_t a_foffset; + size_t a_size; + daddr64_t *a_bpn; + size_t *a_run; + void *a_poff; + int a_flags; + } */ *ap) { - register struct vnode *vp = ap->a_vp; - int devBlockSize = DEV_BSIZE; - - if (ap->a_vpp != NULL) - *ap->a_vpp = vp; - if (ap->a_bnp != NULL) { - if (!vp->v_mount) - return (ENXIO); - *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize, - devBlockSize); - } - if (ap->a_runp != NULL) - *ap->a_runp = 0; -#ifdef notyet - if (ap->a_runb != NULL) - *ap->a_runb = 0; -#endif - return (0); + return (ENOTSUP); } /* @@ -3499,50 +3755,55 @@ nfs_bmap(ap) * * NB Currently unsupported. */ -/* ARGSUSED */ +/*ARGSUSED*/ static int -nfs_mmap(ap) - struct vop_mmap_args /* { - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +nfs_mmap( + __unused struct vnop_mmap_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflags; + kauth_cred_t a_cred; + proc_t a_p; + } */ *ap) { return (EINVAL); } /* - * fsync vnode op. Just call nfs_flush(). + * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(ap) - struct vop_fsync_args /* { + struct vnop_fsync_args /* { struct vnodeop_desc *a_desc; - struct vnode * a_vp; - struct ucred * a_cred; - int a_waitfor; - struct proc * a_p; + vnode_t a_vp; + int a_waitfor; + vfs_context_t a_context; } */ *ap; { - return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p)); + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + proc_t p = vfs_context_proc(ap->a_context); + struct nfsnode *np = VTONFS(ap->a_vp); + int error; + + np->n_flag |= NWRBUSY; + error = nfs_flush(ap->a_vp, ap->a_waitfor, cred, p, 0); + np->n_flag &= ~NWRBUSY; + return (error); } int -nfs_flushcommits(struct vnode *vp, struct proc *p) +nfs_flushcommits(vnode_t vp, proc_t p, int nowait) { struct nfsnode *np = VTONFS(vp); - struct nfsbuf *bp, *nbp; - int i, s, error = 0, retv, bvecpos, wcred_set; + struct nfsbuf *bp; + struct nfsbuflists blist, commitlist; + int error = 0, retv, wcred_set, flags; u_quad_t off, endoff, toff; - struct ucred* wcred; - struct nfsbuf **bvec = NULL; -#define NFS_COMMITBVECSIZ 20 -#define NFS_MAXCOMMITBVECSIZ 1024 - struct nfsbuf *bvec_on_stack[NFS_COMMITBVECSIZ]; - int bvecsize = NFS_MAXCOMMITBVECSIZ; + u_int32_t count; + kauth_cred_t wcred = NULL; FSDBG_TOP(557, vp, np, 0, 0); @@ -3552,15 +3813,15 @@ nfs_flushcommits(struct vnode *vp, struct proc *p) * yet. The byte range is worked out for as many nfsbufs as we can handle * and the commit rpc is done. */ - if (np->n_dirtyblkhd.lh_first) + if (!LIST_EMPTY(&np->n_dirtyblkhd)) np->n_flag |= NMODIFIED; off = (u_quad_t)-1; endoff = 0; - bvecpos = 0; wcred_set = 0; + LIST_INIT(&commitlist); - if (!VFSTONFS(vp->v_mount)) { + if (!VFSTONFS(vnode_mount(vp))) { error = ENXIO; goto done; } @@ -3568,95 +3829,96 @@ nfs_flushcommits(struct vnode *vp, struct proc *p) error = EINVAL; goto done; } - s = splbio(); - /* - * Allocate space to remember the list of bufs to commit. It is - * important to use M_NOWAIT here to avoid a race with nfs_write - */ - MALLOC(bvec, struct nfsbuf **, - bvecsize * sizeof(struct nfsbuf *), M_TEMP, - M_NOWAIT); - if (bvec == NULL) { - bvec = bvec_on_stack; - bvecsize = NFS_COMMITBVECSIZ; - } - for (bp = np->n_dirtyblkhd.lh_first; bp && bvecpos < bvecsize; bp = nbp) { - nbp = bp->nb_vnbufs.le_next; - - if (((bp->nb_flags & (NB_BUSY | NB_DELWRI | NB_NEEDCOMMIT)) - != (NB_DELWRI | NB_NEEDCOMMIT))) - continue; - - nfs_buf_remfree(bp); - SET(bp->nb_flags, NB_BUSY); - /* - * we need a upl to see if the page has been - * dirtied (think mmap) since the unstable write, and - * also to prevent vm from paging it during our commit rpc - */ - if (!ISSET(bp->nb_flags, NB_PAGELIST)) { - retv = nfs_buf_upl_setup(bp); - if (retv) { - /* unable to create upl */ - /* vm object must no longer exist */ - /* this could be fatal if we need */ - /* to write the data again, we'll see... */ - printf("nfs_flushcommits: upl create failed %d\n", retv); - bp->nb_valid = bp->nb_dirty = 0; + flags = NBI_DIRTY; + if (nowait) + flags |= NBI_NOWAIT; + lck_mtx_lock(nfs_buf_mutex); + if (!nfs_buf_iterprepare(np, &blist, flags)) { + while ((bp = LIST_FIRST(&blist))) { + LIST_REMOVE(bp, nb_vnbufs); + LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs); + error = nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0); + if (error) + continue; + if (((bp->nb_flags & (NB_DELWRI | NB_NEEDCOMMIT)) + != (NB_DELWRI | NB_NEEDCOMMIT))) { + nfs_buf_drop(bp); + continue; } - } - nfs_buf_upl_check(bp); + nfs_buf_remfree(bp); + lck_mtx_unlock(nfs_buf_mutex); + /* + * we need a upl to see if the page has been + * dirtied (think mmap) since the unstable write, and + * also to prevent vm from paging it during our commit rpc + */ + if (!ISSET(bp->nb_flags, NB_PAGELIST)) { + retv = nfs_buf_upl_setup(bp); + if (retv) { + /* unable to create upl */ + /* vm object must no longer exist */ + /* this could be fatal if we need */ + /* to write the data again, we'll see... */ + printf("nfs_flushcommits: upl create failed %d\n", retv); + bp->nb_valid = bp->nb_dirty = 0; + } + } + nfs_buf_upl_check(bp); + lck_mtx_lock(nfs_buf_mutex); - FSDBG(557, bp, bp->nb_flags, bp->nb_valid, bp->nb_dirty); - FSDBG(557, bp->nb_validoff, bp->nb_validend, - bp->nb_dirtyoff, bp->nb_dirtyend); + FSDBG(557, bp, bp->nb_flags, bp->nb_valid, bp->nb_dirty); + FSDBG(557, bp->nb_validoff, bp->nb_validend, + bp->nb_dirtyoff, bp->nb_dirtyend); - /* - * We used to check for dirty pages here; if there were any - * we'd abort the commit and force the entire buffer to be - * written again. - * - * Instead of doing that, we now go ahead and commit the dirty - * range, and then leave the buffer around with dirty pages - * that will be written out later. - */ + /* + * We used to check for dirty pages here; if there were any + * we'd abort the commit and force the entire buffer to be + * written again. + * + * Instead of doing that, we now go ahead and commit the dirty + * range, and then leave the buffer around with dirty pages + * that will be written out later. + */ - /* in case blocking calls were made, re-evaluate nbp */ - nbp = bp->nb_vnbufs.le_next; + /* + * Work out if all buffers are using the same cred + * so we can deal with them all with one commit. + * + * XXX creds in bp's must be obtained by kauth_cred_ref on + * the same original cred in order for them to be equal. + */ + if (wcred_set == 0) { + wcred = bp->nb_wcred; + if (wcred == NOCRED) + panic("nfs: needcommit w/out wcred"); + wcred_set = 1; + } else if ((wcred_set == 1) && wcred != bp->nb_wcred) { + wcred_set = -1; + } + SET(bp->nb_flags, NB_WRITEINPROG); - /* - * Work out if all buffers are using the same cred - * so we can deal with them all with one commit. - */ - if (wcred_set == 0) { - wcred = bp->nb_wcred; - if (wcred == NOCRED) - panic("nfs: needcommit w/out wcred"); - wcred_set = 1; - } else if ((wcred_set == 1) && crcmp(wcred, bp->nb_wcred)) { - wcred_set = -1; + /* + * A list of these buffers is kept so that the + * second loop knows which buffers have actually + * been committed. This is necessary, since there + * may be a race between the commit rpc and new + * uncommitted writes on the file. + */ + LIST_REMOVE(bp, nb_vnbufs); + LIST_INSERT_HEAD(&commitlist, bp, nb_vnbufs); + toff = NBOFF(bp) + bp->nb_dirtyoff; + if (toff < off) + off = toff; + toff += (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff); + if (toff > endoff) + endoff = toff; } - SET(bp->nb_flags, NB_WRITEINPROG); - - /* - * A list of these buffers is kept so that the - * second loop knows which buffers have actually - * been committed. This is necessary, since there - * may be a race between the commit rpc and new - * uncommitted writes on the file. - */ - bvec[bvecpos++] = bp; - toff = NBOFF(bp) + bp->nb_dirtyoff; - if (toff < off) - off = toff; - toff += (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff); - if (toff > endoff) - endoff = toff; + nfs_buf_itercomplete(np, &blist, NBI_DIRTY); } - splx(s); + lck_mtx_unlock(nfs_buf_mutex); - if (bvecpos == 0) { + if (LIST_EMPTY(&commitlist)) { error = ENOBUFS; goto done; } @@ -3667,74 +3929,78 @@ nfs_flushcommits(struct vnode *vp, struct proc *p) * one call for all of them, otherwise commit each one * separately. */ - if (wcred_set == 1) - retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p); - else { + if (wcred_set == 1) { + /* + * Note, it's possible the commit range could be >2^32-1. + * If it is, we'll send one commit that covers the whole file. + */ + if ((endoff - off) > 0xffffffff) + count = 0; + else + count = (endoff - off); + retv = nfs_commit(vp, off, count, wcred, p); + } else { retv = 0; - - for (i = 0; i < bvecpos; i++) { - off_t off, size; - bp = bvec[i]; - off = NBOFF(bp) + bp->nb_dirtyoff; - size = (u_quad_t)(bp->nb_dirtyend - bp->nb_dirtyoff); - retv = nfs_commit(vp, off, (int)size, bp->nb_wcred, p); - if (retv) break; + LIST_FOREACH(bp, &commitlist, nb_vnbufs) { + toff = NBOFF(bp) + bp->nb_dirtyoff; + count = bp->nb_dirtyend - bp->nb_dirtyoff; + retv = nfs_commit(vp, toff, count, bp->nb_wcred, p); + if (retv) + break; } } if (retv == NFSERR_STALEWRITEVERF) - nfs_clearcommit(vp->v_mount); + nfs_clearcommit(vnode_mount(vp)); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ - for (i = 0; i < bvecpos; i++) { - bp = bvec[i]; + while ((bp = LIST_FIRST(&commitlist))) { + LIST_REMOVE(bp, nb_vnbufs); FSDBG(557, bp, retv, bp->nb_flags, bp->nb_dirty); - CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_WRITEINPROG)); - np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); if (retv) { + /* move back to dirty list */ + lck_mtx_lock(nfs_buf_mutex); + LIST_INSERT_HEAD(&VTONFS(vp)->n_dirtyblkhd, bp, nb_vnbufs); + lck_mtx_unlock(nfs_buf_mutex); nfs_buf_release(bp, 1); - } else { - s = splbio(); - vp->v_numoutput++; + continue; + } - if (ISSET(bp->nb_flags, NB_DELWRI)) { - nfs_nbdwrite--; - NFSBUFCNTCHK(); - wakeup((caddr_t)&nfs_nbdwrite); - } - CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI)); - /* if block still has dirty pages, we don't want it to */ - /* be released in nfs_buf_iodone(). So, don't set NB_ASYNC. */ - if (!bp->nb_dirty) - SET(bp->nb_flags, NB_ASYNC); + vnode_startwrite(vp); + if (ISSET(bp->nb_flags, NB_DELWRI)) { + OSAddAtomic(-1, (SInt32*)&nfs_nbdwrite); + NFSBUFCNTCHK(0); + wakeup(&nfs_nbdwrite); + } + CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI)); + /* if block still has dirty pages, we don't want it to */ + /* be released in nfs_buf_iodone(). So, don't set NB_ASYNC. */ + if (!bp->nb_dirty) + SET(bp->nb_flags, NB_ASYNC); - /* move to clean list */ - if (bp->nb_vnbufs.le_next != NFSNOLIST) - LIST_REMOVE(bp, nb_vnbufs); - LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs); + /* move to clean list */ + lck_mtx_lock(nfs_buf_mutex); + LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs); + lck_mtx_unlock(nfs_buf_mutex); - bp->nb_dirtyoff = bp->nb_dirtyend = 0; - splx(s); + bp->nb_dirtyoff = bp->nb_dirtyend = 0; - nfs_buf_iodone(bp); - if (bp->nb_dirty) { - /* throw it back in as a delayed write buffer */ - CLR(bp->nb_flags, NB_DONE); - nfs_buf_write_delayed(bp); - } + nfs_buf_iodone(bp); + if (bp->nb_dirty) { + /* throw it back in as a delayed write buffer */ + CLR(bp->nb_flags, NB_DONE); + nfs_buf_write_delayed(bp, p); } } done: - if (bvec != NULL && bvec != bvec_on_stack) - _FREE(bvec, M_TEMP); FSDBG_BOT(557, vp, np, 0, error); return (error); } @@ -3744,18 +4010,20 @@ done: * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ -static int -nfs_flush(vp, cred, waitfor, p) - register struct vnode *vp; - struct ucred *cred; - int waitfor; - struct proc *p; +int +nfs_flush( + vnode_t vp, + int waitfor, + __unused kauth_cred_t cred, + proc_t p, + int ignore_writeerr) { struct nfsnode *np = VTONFS(vp); - struct nfsbuf *bp, *nbp; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int i, s, error = 0, error2, slptimeo = 0, slpflag = 0; - int passone = 1; + struct nfsbuf *bp; + struct nfsbuflists blist; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + int error = 0, error2, slptimeo = 0, slpflag = 0; + int flags, passone = 1; FSDBG_TOP(517, vp, np, waitfor, 0); @@ -3774,81 +4042,92 @@ nfs_flush(vp, cred, waitfor, p) * dirty buffers. Then wait for all writes to complete. */ again: - FSDBG(518, np->n_dirtyblkhd.lh_first, np->n_flag, 0, 0); - if (np->n_dirtyblkhd.lh_first) + lck_mtx_lock(nfs_buf_mutex); + FSDBG(518, LIST_FIRST(&np->n_dirtyblkhd), np->n_flag, 0, 0); + if (!LIST_EMPTY(&np->n_dirtyblkhd)) np->n_flag |= NMODIFIED; - if (!VFSTONFS(vp->v_mount)) { + if (!VFSTONFS(vnode_mount(vp))) { + lck_mtx_unlock(nfs_buf_mutex); error = ENXIO; goto done; } /* Start/do any write(s) that are required. */ -loop: - s = splbio(); - for (bp = np->n_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->nb_vnbufs.le_next; - if (ISSET(bp->nb_flags, NB_BUSY)) { - FSDBG(524, bp, waitfor, passone, bp->nb_flags); - if (waitfor != MNT_WAIT || passone) - continue; - SET(bp->nb_flags, NB_WANTED); - error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), - "nfsfsync", slptimeo); - splx(s); - if (error) { - error2 = nfs_sigintr(VFSTONFS(vp->v_mount), - (struct nfsreq *)0, p); - if (error2) { - error = error2; - goto done; - } - if (slpflag == PCATCH) { - slpflag = 0; - slptimeo = 2 * hz; + if (!nfs_buf_iterprepare(np, &blist, NBI_DIRTY)) { + while ((bp = LIST_FIRST(&blist))) { + LIST_REMOVE(bp, nb_vnbufs); + LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs); + flags = (passone || (waitfor != MNT_WAIT)) ? NBAC_NOWAIT : 0; + if (flags != NBAC_NOWAIT) + nfs_buf_refget(bp); + while ((error = nfs_buf_acquire(bp, flags, slpflag, slptimeo))) { + FSDBG(524, bp, flags, bp->nb_lflags, bp->nb_flags); + if (error == EBUSY) + break; + if (error) { + error2 = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p); + if (error2) { + if (flags != NBAC_NOWAIT) + nfs_buf_refrele(bp); + nfs_buf_itercomplete(np, &blist, NBI_DIRTY); + lck_mtx_unlock(nfs_buf_mutex); + error = error2; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } } } - goto loop; - } - if (!ISSET(bp->nb_flags, NB_DELWRI)) - panic("nfs_fsync: not dirty"); - FSDBG(525, bp, passone, 0, bp->nb_flags); - if ((passone || (waitfor != MNT_WAIT)) && ISSET(bp->nb_flags, NB_NEEDCOMMIT)) - continue; - nfs_buf_remfree(bp); - if (ISSET(bp->nb_flags, NB_ERROR)) { - np->n_error = bp->nb_error ? bp->nb_error : EIO; - np->n_flag |= NWRITEERR; - nfs_buf_release(bp, 1); - continue; - } - if (passone) - SET(bp->nb_flags, NB_BUSY|NB_ASYNC); - else { - /* the NB_STABLE forces this to be written FILESYNC */ - SET(bp->nb_flags, NB_BUSY|NB_ASYNC|NB_STABLE); + if (flags != NBAC_NOWAIT) + nfs_buf_refrele(bp); + if (error == EBUSY) + continue; + if (!bp->nb_vp) { + /* buffer is no longer valid */ + nfs_buf_drop(bp); + continue; + } + if (!ISSET(bp->nb_flags, NB_DELWRI)) + panic("nfs_flush: not dirty"); + FSDBG(525, bp, passone, bp->nb_lflags, bp->nb_flags); + if ((passone || (waitfor != MNT_WAIT)) && + ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { + nfs_buf_drop(bp); + continue; + } + nfs_buf_remfree(bp); + lck_mtx_unlock(nfs_buf_mutex); + if (ISSET(bp->nb_flags, NB_ERROR)) { + np->n_error = bp->nb_error ? bp->nb_error : EIO; + np->n_flag |= NWRITEERR; + nfs_buf_release(bp, 1); + lck_mtx_lock(nfs_buf_mutex); + continue; + } + SET(bp->nb_flags, NB_ASYNC); + if (!passone) { + /* NB_STABLE forces this to be written FILESYNC */ + SET(bp->nb_flags, NB_STABLE); + } + nfs_buf_write(bp); + lck_mtx_lock(nfs_buf_mutex); } - splx(s); - nfs_buf_write(bp); - goto loop; + nfs_buf_itercomplete(np, &blist, NBI_DIRTY); } - splx(s); + lck_mtx_unlock(nfs_buf_mutex); if (waitfor == MNT_WAIT) { - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - error = tsleep((caddr_t)&vp->v_numoutput, - slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); - if (error) { - error2 = nfs_sigintr(VFSTONFS(vp->v_mount), - (struct nfsreq *)0, p); - if (error2) { - error = error2; + while ((error = vnode_waitforwrites(vp, 0, slpflag, slptimeo, "nfsflush"))) { + error2 = nfs_sigintr(VFSTONFS(vnode_mount(vp)), NULL, p); + if (error2) { + error = error2; goto done; - } - if (slpflag == PCATCH) { + } + if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; - } } } } @@ -3857,7 +4136,7 @@ loop: /* loop while it looks like there are still buffers to be */ /* commited and nfs_flushcommits() seems to be handling them. */ while (np->n_needcommitcnt) - if (nfs_flushcommits(vp, p)) + if (nfs_flushcommits(vp, p, 0)) break; } @@ -3866,14 +4145,12 @@ loop: goto again; } - if (waitfor == MNT_WAIT) { - if (np->n_dirtyblkhd.lh_first) { - goto again; - } + if ((waitfor == MNT_WAIT) && !LIST_EMPTY(&np->n_dirtyblkhd)) { + goto again; } FSDBG(526, np->n_flag, np->n_error, 0, 0); - if (np->n_flag & NWRITEERR) { + if (!ignore_writeerr && (np->n_flag & NWRITEERR)) { error = np->n_error; np->n_flag &= ~NWRITEERR; } @@ -3883,155 +4160,182 @@ done: } /* - * Return POSIX pathconf information applicable to nfs. - * - * The NFS V2 protocol doesn't support this, so just return EINVAL - * for V2. + * Do an nfs pathconf rpc. */ -/* ARGSUSED */ -static int -nfs_pathconf(ap) - struct vop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - } */ *ap; +int +nfs_pathconfrpc( + vnode_t vp, + struct nfsv3_pathconf *pc, + kauth_cred_t cred, + proc_t procp) { + mbuf_t mreq, mrep, md, mb, mb2; + caddr_t bpos, dpos, cp, cp2; + int32_t t1, t2; + u_long *tl; + u_int64_t xid; + int attrflag, error = 0; + struct nfsv3_pathconf *mpc; - return (EINVAL); -} + /* fetch pathconf info from server */ + nfsm_reqhead(NFSX_FH(1)); + if (error) + return (error); + nfsm_fhtom(vp, 1); + nfsm_request(vp, NFSPROC_PATHCONF, procp, cred, &xid); + nfsm_postop_attr_update(vp, 1, attrflag, &xid); + if (!error) { + nfsm_dissect(mpc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); + pc->pc_linkmax = fxdr_unsigned(long, mpc->pc_linkmax); + pc->pc_namemax = fxdr_unsigned(long, mpc->pc_namemax); + pc->pc_chownrestricted = fxdr_unsigned(long, mpc->pc_chownrestricted); + pc->pc_notrunc = fxdr_unsigned(long, mpc->pc_notrunc); + pc->pc_caseinsensitive = fxdr_unsigned(long, mpc->pc_caseinsensitive); + pc->pc_casepreserving = fxdr_unsigned(long, mpc->pc_casepreserving); + } + nfsm_reqdone; -/* - * NFS advisory byte-level locks (client) - */ -static int -nfs_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - return (nfs_dolock(ap)); + return (error); } -/* - * Print out the contents of an nfsnode. - */ -static int -nfs_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; +void +nfs_pathconf_cache(struct nfsmount *nmp, struct nfsv3_pathconf *pc) { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - - printf("tag VT_NFS, fileid %ld fsid 0x%lx", - np->n_vattr.va_fileid, np->n_vattr.va_fsid); - if (vp->v_type == VFIFO) - fifo_printinfo(vp); - printf("\n"); - return (0); + nmp->nm_state |= NFSSTA_GOTPATHCONF; + nmp->nm_fsinfo.linkmax = pc->pc_linkmax; + nmp->nm_fsinfo.namemax = pc->pc_namemax; + nmp->nm_fsinfo.pcflags = 0; + if (pc->pc_notrunc) + nmp->nm_fsinfo.pcflags |= NFSPCINFO_NOTRUNC; + if (pc->pc_chownrestricted) + nmp->nm_fsinfo.pcflags |= NFSPCINFO_CHOWN_RESTRICTED; + if (pc->pc_caseinsensitive) + nmp->nm_fsinfo.pcflags |= NFSPCINFO_CASE_INSENSITIVE; + if (pc->pc_casepreserving) + nmp->nm_fsinfo.pcflags |= NFSPCINFO_CASE_PRESERVING; } /* - * NFS directory offset lookup. - * Currently unsupported. + * Return POSIX pathconf information applicable to nfs. + * + * The NFS V2 protocol doesn't support this, so just return EINVAL + * for V2. */ +/* ARGSUSED */ static int -nfs_blkatoff(ap) - struct vop_blkatoff_args /* { - struct vnode *a_vp; - off_t a_offset; - char **a_res; - struct buf **a_bpp; +nfs_pathconf(ap) + struct vnop_pathconf_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_name; + register_t *a_retval; + vfs_context_t a_context; } */ *ap; { + vnode_t vp = ap->a_vp; + struct nfsmount *nmp; + struct nfsv3_pathconf pc; + int error = 0, cached; -#if DIAGNOSTIC - printf("nfs_blkatoff: unimplemented!!"); -#endif - return (EOPNOTSUPP); -} + nmp = VFSTONFS(vnode_mount(vp)); + if (!nmp) + return (ENXIO); + if (!NFS_ISV3(vp)) + return (EINVAL); -/* - * NFS flat namespace allocation. - * Currently unsupported. - */ -static int -nfs_valloc(ap) - struct vop_valloc_args /* { - struct vnode *a_pvp; - int a_mode; - struct ucred *a_cred; - struct vnode **a_vpp; - } */ *ap; -{ + switch (ap->a_name) { + case _PC_LINK_MAX: + case _PC_NAME_MAX: + case _PC_CHOWN_RESTRICTED: + case _PC_NO_TRUNC: + case _PC_CASE_SENSITIVE: + case _PC_CASE_PRESERVING: + break; + default: + /* don't bother contacting the server if we know the answer */ + return (EINVAL); + } - return (EOPNOTSUPP); -} + if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) { + /* no pathconf info cached */ + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + proc_t p = vfs_context_proc(ap->a_context); + error = nfs_pathconfrpc(vp, &pc, cred, p); + if (error) + return (error); + nmp = VFSTONFS(vnode_mount(vp)); + if (!nmp) + return (ENXIO); + if (!(nmp->nm_state & NFSSTA_GOTFSINFO)) { + nfs_fsinfo(nmp, vp, cred, p); + nmp = VFSTONFS(vnode_mount(vp)); + if (!nmp) + return (ENXIO); + } + if ((nmp->nm_state & NFSSTA_GOTFSINFO) && + (nmp->nm_fsinfo.fsproperties & NFSV3FSINFO_HOMOGENEOUS)) { + /* all files have the same pathconf info, */ + /* so cache a copy of the results */ + nfs_pathconf_cache(nmp, &pc); + } + } -/* - * NFS flat namespace free. - * Currently unsupported. - */ -static int -nfs_vfree(ap) - struct vop_vfree_args /* { - struct vnode *a_pvp; - ino_t a_ino; - int a_mode; - } */ *ap; -{ + cached = (nmp->nm_state & NFSSTA_GOTPATHCONF); + + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = cached ? nmp->nm_fsinfo.linkmax : pc.pc_linkmax; + break; + case _PC_NAME_MAX: + *ap->a_retval = cached ? nmp->nm_fsinfo.namemax : pc.pc_namemax; + break; + case _PC_CHOWN_RESTRICTED: + if (cached) + *ap->a_retval = (nmp->nm_fsinfo.pcflags & NFSPCINFO_CHOWN_RESTRICTED) ? 1 : 0; + else + *ap->a_retval = pc.pc_chownrestricted; + break; + case _PC_NO_TRUNC: + if (cached) + *ap->a_retval = (nmp->nm_fsinfo.pcflags & NFSPCINFO_NOTRUNC) ? 1 : 0; + else + *ap->a_retval = pc.pc_notrunc; + break; + case _PC_CASE_SENSITIVE: + if (cached) + *ap->a_retval = (nmp->nm_fsinfo.pcflags & NFSPCINFO_CASE_INSENSITIVE) ? 0 : 1; + else + *ap->a_retval = !pc.pc_caseinsensitive; + break; + case _PC_CASE_PRESERVING: + if (cached) + *ap->a_retval = (nmp->nm_fsinfo.pcflags & NFSPCINFO_CASE_PRESERVING) ? 1 : 0; + else + *ap->a_retval = pc.pc_casepreserving; + break; + default: + error = EINVAL; + } -#if DIAGNOSTIC - printf("nfs_vfree: unimplemented!!"); -#endif - return (EOPNOTSUPP); + return (error); } /* - * NFS file truncation. + * NFS advisory byte-level locks (client) */ static int -nfs_truncate(ap) - struct vop_truncate_args /* { - struct vnode *a_vp; - off_t a_length; +nfs_advlock(ap) + struct vnop_advlock_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; int a_flags; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - - /* Use nfs_setattr */ -#if DIAGNOSTIC - printf("nfs_truncate: unimplemented!!"); -#endif - return (EOPNOTSUPP); -} - -/* - * NFS update. - */ -static int -nfs_update(ap) - struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_ta; - struct timeval *a_tm; - int a_waitfor; - } */ *ap; -{ - - /* Use nfs_setattr */ -#if DIAGNOSTIC - printf("nfs_update: unimplemented!!"); -#endif - return (EOPNOTSUPP); + return (nfs_dolock(ap)); } /* @@ -4040,44 +4344,43 @@ nfs_update(ap) int nfs_buf_write(struct nfsbuf *bp) { - int s; int oldflags = bp->nb_flags, rv = 0; - off_t off; - struct vnode *vp = bp->nb_vp; - struct ucred *cr; - struct proc *p = current_proc(); + vnode_t vp = bp->nb_vp; + struct nfsnode *np = VTONFS(vp); + kauth_cred_t cr; + proc_t p = current_proc(); // XXX FSDBG_TOP(553, bp, NBOFF(bp), bp->nb_flags, 0); - if (!ISSET(bp->nb_flags, NB_BUSY)) + if (!ISSET(bp->nb_lflags, NBL_BUSY)) panic("nfs_buf_write: buffer is not busy???"); - s = splbio(); CLR(bp->nb_flags, (NB_READ|NB_DONE|NB_ERROR|NB_DELWRI)); if (ISSET(oldflags, NB_DELWRI)) { - nfs_nbdwrite--; - NFSBUFCNTCHK(); - wakeup((caddr_t)&nfs_nbdwrite); + OSAddAtomic(-1, (SInt32*)&nfs_nbdwrite); + NFSBUFCNTCHK(0); + wakeup(&nfs_nbdwrite); } /* move to clean list */ if (ISSET(oldflags, (NB_ASYNC|NB_DELWRI))) { + lck_mtx_lock(nfs_buf_mutex); if (bp->nb_vnbufs.le_next != NFSNOLIST) LIST_REMOVE(bp, nb_vnbufs); LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs); + lck_mtx_unlock(nfs_buf_mutex); } + vnode_startwrite(vp); - vp->v_numoutput++; if (p && p->p_stats) p->p_stats->p_ru.ru_oublock++; - splx(s); /* * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the request. */ if (ISSET(bp->nb_flags, NB_ASYNC)) - p = (struct proc *)0; + p = NULL; if (ISSET(bp->nb_flags, NB_READ)) cr = bp->nb_rcred; else @@ -4089,14 +4392,34 @@ nfs_buf_write(struct nfsbuf *bp) rv = nfs_buf_iowait(bp); /* move to clean list */ if (oldflags & NB_DELWRI) { - s = splbio(); + lck_mtx_lock(nfs_buf_mutex); if (bp->nb_vnbufs.le_next != NFSNOLIST) LIST_REMOVE(bp, nb_vnbufs); LIST_INSERT_HEAD(&VTONFS(vp)->n_cleanblkhd, bp, nb_vnbufs); - splx(s); + lck_mtx_unlock(nfs_buf_mutex); } + oldflags = bp->nb_flags; FSDBG_BOT(553, bp, NBOFF(bp), bp->nb_flags, rv); + if (cr) { + kauth_cred_ref(cr); + } nfs_buf_release(bp, 1); + if (ISSET(oldflags, NB_ERROR) && !(np->n_flag & NFLUSHINPROG)) { + /* + * There was a write error and we need to + * invalidate attrs and flush buffers in + * order to sync up with the server. + * (if this write was extending the file, + * we may no longer know the correct size) + * + * But we couldn't call vinvalbuf while holding + * the buffer busy. So we call vinvalbuf() after + * releasing the buffer. + */ + nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, cr, p, 1); + } + if (cr) + kauth_cred_rele(cr); return (rv); } @@ -4104,79 +4427,17 @@ nfs_buf_write(struct nfsbuf *bp) return (rv); } -/* - * nfs special file access vnode op. - * Essentially just get vattr and then imitate iaccess() since the device is - * local to the client. - */ -static int -nfsspec_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; -{ - register struct vattr *vap; - register gid_t *gp; - register struct ucred *cred = ap->a_cred; - struct vnode *vp = ap->a_vp; - mode_t mode = ap->a_mode; - struct vattr vattr; - register int i; - int error; - - /* - * Disallow write attempts on filesystems mounted read-only; - * unless the file is a socket, fifo, or a block or character - * device resident on the filesystem. - */ - if ((mode & VWRITE) && vp->v_mount && (vp->v_mount->mnt_flag & MNT_RDONLY)) { - switch (vp->v_type) { - case VREG: case VDIR: case VLNK: - return (EROFS); - } - } - /* - * If you're the super-user, - * you always get access. - */ - if (cred->cr_uid == 0) - return (0); - vap = &vattr; - error = VOP_GETATTR(vp, vap, cred, ap->a_p); - if (error) - return (error); - /* - * Access check is based on only one of owner, group, public. - * If not owner, then check group. If not a member of the - * group, then check public access. - */ - if (cred->cr_uid != vap->va_uid) { - mode >>= 3; - gp = cred->cr_groups; - for (i = 0; i < cred->cr_ngroups; i++, gp++) - if (vap->va_gid == *gp) - goto found; - mode >>= 3; -found: - ; - } - error = (vap->va_mode & mode) == mode ? 0 : EACCES; - return (error); -} - /* * Read wrapper for special devices. */ static int nfsspec_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; + struct vnop_read_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); @@ -4189,7 +4450,7 @@ nfsspec_read(ap) microtime(&now); np->n_atim.tv_sec = now.tv_sec; np->n_atim.tv_nsec = now.tv_usec * 1000; - return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); + return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap)); } /* @@ -4197,11 +4458,12 @@ nfsspec_read(ap) */ static int nfsspec_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; + struct vnop_write_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); @@ -4214,7 +4476,7 @@ nfsspec_write(ap) microtime(&now); np->n_mtim.tv_sec = now.tv_sec; np->n_mtim.tv_nsec = now.tv_usec * 1000; - return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); + return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -4224,45 +4486,51 @@ nfsspec_write(ap) */ static int nfsspec_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + struct vnop_close_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflag; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - struct vattr vattr; + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vnode_attr vattr; + mount_t mp; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; - if (vp->v_usecount == 1 && vp->v_mount && - (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - VATTR_NULL(&vattr); - if (np->n_flag & NACC) - vattr.va_atime = np->n_atim; - if (np->n_flag & NUPD) - vattr.va_mtime = np->n_mtim; - (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) { + VATTR_INIT(&vattr); + if (np->n_flag & NACC) { + vattr.va_access_time = np->n_atim; + VATTR_SET_ACTIVE(&vattr, va_access_time); + } + if (np->n_flag & NUPD) { + vattr.va_modify_time = np->n_mtim; + VATTR_SET_ACTIVE(&vattr, va_modify_time); + } + vnode_setattr(vp, &vattr, ap->a_context); } } - return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); + return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap)); } +extern vnop_t **fifo_vnodeop_p; + /* * Read wrapper for fifos. */ static int nfsfifo_read(ap) - struct vop_read_args /* { - struct vnode *a_vp; + struct vnop_read_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { - extern vop_t **fifo_vnodeop_p; register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval now; @@ -4273,7 +4541,7 @@ nfsfifo_read(ap) microtime(&now); np->n_atim.tv_sec = now.tv_sec; np->n_atim.tv_nsec = now.tv_usec * 1000; - return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap)); } /* @@ -4281,14 +4549,14 @@ nfsfifo_read(ap) */ static int nfsfifo_write(ap) - struct vop_write_args /* { - struct vnode *a_vp; + struct vnop_write_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; + int a_ioflag; + vfs_context_t a_context; } */ *ap; { - extern vop_t **fifo_vnodeop_p; register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval now; @@ -4299,7 +4567,7 @@ nfsfifo_write(ap) microtime(&now); np->n_mtim.tv_sec = now.tv_sec; np->n_mtim.tv_nsec = now.tv_usec * 1000; - return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -4309,18 +4577,18 @@ nfsfifo_write(ap) */ static int nfsfifo_close(ap) - struct vop_close_args /* { - struct vnode *a_vp; - int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + struct vnop_close_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflag; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct nfsnode *np = VTONFS(vp); - struct vattr vattr; + vnode_t vp = ap->a_vp; + struct nfsnode *np = VTONFS(vp); + struct vnode_attr vattr; struct timeval now; - extern vop_t **fifo_vnodeop_p; + mount_t mp; if (np->n_flag & (NACC | NUPD)) { microtime(&now); @@ -4333,22 +4601,34 @@ nfsfifo_close(ap) np->n_mtim.tv_nsec = now.tv_usec * 1000; } np->n_flag |= NCHG; - if (vp->v_usecount == 1 && vp->v_mount && - (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - VATTR_NULL(&vattr); - if (np->n_flag & NACC) - vattr.va_atime = np->n_atim; - if (np->n_flag & NUPD) - vattr.va_mtime = np->n_mtim; - (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); + if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) { + VATTR_INIT(&vattr); + if (np->n_flag & NACC) { + vattr.va_access_time = np->n_atim; + VATTR_SET_ACTIVE(&vattr, va_access_time); + } + if (np->n_flag & NUPD) { + vattr.va_modify_time = np->n_mtim; + VATTR_SET_ACTIVE(&vattr, va_modify_time); + } + vnode_setattr(vp, &vattr, ap->a_context); } } - return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); + return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } +/*ARGSUSED*/ static int -nfs_ioctl(ap) - struct vop_ioctl_args *ap; +nfs_ioctl( + __unused struct vnop_ioctl_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + u_long a_command; + caddr_t a_data; + int a_fflag; + kauth_cred_t a_cred; + proc_t a_p; + } */ *ap) { /* @@ -4358,9 +4638,18 @@ nfs_ioctl(ap) return (ENOTTY); } +/*ARGSUSED*/ static int -nfs_select(ap) - struct vop_select_args *ap; +nfs_select( + __unused struct vnop_select_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_which; + int a_fflags; + kauth_cred_t a_cred; + void *a_wql; + proc_t a_p; + } */ *ap) { /* @@ -4376,32 +4665,32 @@ nfs_select(ap) */ static int nfs_pagein(ap) - struct vop_pagein_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - struct ucred *a_cred, - int a_flags + struct vnop_pagein_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + upl_t a_pl; + vm_offset_t a_pl_offset; + off_t a_f_offset; + size_t a_size; + int a_flags; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; upl_t pl = ap->a_pl; size_t size= ap->a_size; off_t f_offset = ap->a_f_offset; vm_offset_t pl_offset = ap->a_pl_offset; int flags = ap->a_flags; - struct ucred *cred; + kauth_cred_t cred; + proc_t p; struct nfsnode *np = VTONFS(vp); int biosize, xsize, iosize; - struct vattr vattr; - struct proc *p = current_proc(); struct nfsmount *nmp; int error = 0; vm_offset_t ioaddr; struct uio auio; - struct iovec aiov; + struct iovec_32 aiov; struct uio * uio = &auio; int nofreeupl = flags & UPL_NOCOMMIT; upl_page_info_t *plinfo; @@ -4413,7 +4702,7 @@ nfs_pagein(ap) if (UBCINVALID(vp)) { printf("nfs_pagein: invalid vnode 0x%x", (int)vp); if (!nofreeupl) - (void) ubc_upl_abort(pl, NULL); + (void) ubc_upl_abort(pl, 0); return (EPERM); } UBCINFOCHECK("nfs_pagein", vp); @@ -4421,25 +4710,31 @@ nfs_pagein(ap) if (size <= 0) { printf("nfs_pagein: invalid size %d", size); if (!nofreeupl) - (void) ubc_upl_abort(pl, NULL); + (void) ubc_upl_abort(pl, 0); return (EINVAL); } - if (f_offset < 0 || f_offset >= np->n_size || (f_offset & PAGE_MASK_64)) { + if (f_offset < 0 || f_offset >= (off_t)np->n_size || (f_offset & PAGE_MASK_64)) { if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); return (EINVAL); } + cred = ubc_getcred(vp); if (cred == NOCRED) - cred = ap->a_cred; + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); auio.uio_offset = f_offset; +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ auio.uio_segflg = UIO_SYSSPACE; +#else + auio.uio_segflg = UIO_SYSSPACE32; +#endif auio.uio_rw = UIO_READ; - auio.uio_procp = NULL; + auio.uio_procp = p; - nmp = VFSTONFS(vp->v_mount); + nmp = VFSTONFS(vnode_mount(vp)); if (!nmp) { if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, @@ -4448,7 +4743,7 @@ nfs_pagein(ap) } if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) (void)nfs_fsinfo(nmp, vp, cred, p); - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; plinfo = ubc_upl_pageinfo(pl); ubc_upl_map(pl, &ioaddr); @@ -4462,35 +4757,35 @@ nfs_pagein(ap) * before sending the next one. * XXX Should we align these requests to block boundaries? */ - iosize = min(biosize, xsize); - uio->uio_resid = iosize; + iosize = min(biosize, xsize); aiov.iov_len = iosize; - aiov.iov_base = (caddr_t)ioaddr; - auio.uio_iov = &aiov; + aiov.iov_base = (uintptr_t)ioaddr; + auio.uio_iovs.iov32p = &aiov; auio.uio_iovcnt = 1; + uio_uio_resid_set(&auio, iosize); - FSDBG(322, uio->uio_offset, uio->uio_resid, ioaddr, xsize); -// XXX #warning our nfs_pagein does not support NQNFS + FSDBG(322, uio->uio_offset, uio_uio_resid(uio), ioaddr, xsize); /* * With UBC we get here only when the file data is not in the VM * page cache, so go ahead and read in. */ -#ifdef UBC_DEBUG - upl_ubc_alias_set(pl, current_act(), 2); -#endif /* UBC_DEBUG */ - nfsstats.pageins++; +#ifdef UPL_DEBUG + upl_ubc_alias_set(pl, current_thread(), 2); +#endif /* UPL_DEBUG */ + OSAddAtomic(1, (SInt32*)&nfsstats.pageins); - error = nfs_readrpc(vp, uio, cred); + error = nfs_readrpc(vp, uio, cred, p); if (!error) { - if (uio->uio_resid) { + if (uio_uio_resid(uio)) { /* * If uio_resid > 0, there is a hole in the file * and no writes after the hole have been pushed * to the server yet... or we're at the EOF * Just zero fill the rest of the valid area. */ - int zcnt = uio->uio_resid; + // LP64todo - fix this + int zcnt = uio_uio_resid(uio); int zoff = iosize - zcnt; bzero((char *)ioaddr + zoff, zcnt); @@ -4499,21 +4794,11 @@ nfs_pagein(ap) } ioaddr += iosize; xsize -= iosize; - } else - FSDBG(322, uio->uio_offset, uio->uio_resid, error, -1); - - nmp = VFSTONFS(vp->v_mount); - if (p && (vp->v_flag & VTEXT) && nmp && - ((nmp->nm_flag & NFSMNT_NQNFS && - NQNFS_CKINVALID(vp, np, ND_READ) && - np->n_lrev != np->n_brev) || - (!(nmp->nm_flag & NFSMNT_NQNFS) && - np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { - uprintf("Process killed due to text file modification\n"); - psignal(p, SIGKILL); - p->p_flag |= P_NOSWAP; + } else { + FSDBG(322, uio->uio_offset, uio_uio_resid(uio), error, -1); } + nmp = VFSTONFS(vnode_mount(vp)); } while (error == 0 && xsize > 0); ubc_upl_unmap(pl); @@ -4539,36 +4824,36 @@ nfs_pagein(ap) */ static int nfs_pageout(ap) - struct vop_pageout_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - struct ucred *a_cred, - int a_flags + struct vnop_pageout_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + upl_t a_pl; + vm_offset_t a_pl_offset; + off_t a_f_offset; + size_t a_size; + int a_flags; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; upl_t pl = ap->a_pl; size_t size= ap->a_size; off_t f_offset = ap->a_f_offset; vm_offset_t pl_offset = ap->a_pl_offset; int flags = ap->a_flags; - int ioflag = ap->a_flags; - struct proc *p = current_proc(); struct nfsnode *np = VTONFS(vp); - register struct ucred *cred; + kauth_cred_t cred; + proc_t p; struct nfsbuf *bp; - struct nfsmount *nmp = VFSTONFS(vp->v_mount); - daddr_t lbn; - int n = 0, on, error = 0, iomode, must_commit, s; + struct nfsmount *nmp = VFSTONFS(vnode_mount(vp)); + daddr64_t lbn; + int error = 0, iomode, must_commit; off_t off; vm_offset_t ioaddr; struct uio auio; - struct iovec aiov; + struct iovec_32 aiov; int nofreeupl = flags & UPL_NOCOMMIT; - int biosize, iosize, pgsize, xsize; + size_t biosize, iosize, pgsize, xsize; FSDBG(323, f_offset, size, pl, pl_offset); @@ -4595,7 +4880,7 @@ nfs_pageout(ap) ubc_upl_abort(pl, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY); return (ENXIO); } - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(vnode_mount(vp))->f_iosize; /* * Check to see whether the buffer is incore. @@ -4608,10 +4893,11 @@ nfs_pageout(ap) if (off + xsize > f_offset + size) xsize = f_offset + size - off; lbn = ubc_offtoblk(vp, off); - s = splbio(); - if (bp = nfs_buf_incore(vp, lbn)) { - FSDBG(323, off, 1, bp, bp->nb_flags); - if (ISSET(bp->nb_flags, NB_BUSY)) { + lck_mtx_lock(nfs_buf_mutex); + if ((bp = nfs_buf_incore(vp, lbn))) { + FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags); + if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) { + lck_mtx_unlock(nfs_buf_mutex); /* no panic. just tell vm we are busy */ if (!nofreeupl) ubc_upl_abort(pl, 0); @@ -4619,8 +4905,8 @@ nfs_pageout(ap) } if (bp->nb_dirtyend > 0) { /* - * if there's a dirty range in the buffer, check to - * see if it extends beyond the pageout region + * if there's a dirty range in the buffer, check + * to see if it extends beyond the pageout region * * if the dirty region lies completely within the * pageout region, we just invalidate the buffer @@ -4638,7 +4924,7 @@ nfs_pageout(ap) start = off; end = off + xsize; /* clip end to EOF */ - if (end > np->n_size) + if (end > (off_t)np->n_size) end = np->n_size; start -= boff; end -= boff; @@ -4646,6 +4932,8 @@ nfs_pageout(ap) (bp->nb_dirtyend > end)) { /* not gonna be able to clip the dirty region */ FSDBG(323, vp, bp, 0xd00deebc, EBUSY); + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); if (!nofreeupl) ubc_upl_abort(pl, 0); return (EBUSY); @@ -4659,24 +4947,29 @@ nfs_pageout(ap) bp->nb_dirtyoff = max(bp->nb_dirtyoff, end); FSDBG(323, bp, bp->nb_dirtyoff, bp->nb_dirtyend, 0xd00dee00); /* we're leaving this block dirty */ + nfs_buf_drop(bp); + lck_mtx_unlock(nfs_buf_mutex); continue; } } nfs_buf_remfree(bp); - SET(bp->nb_flags, (NB_BUSY | NB_INVAL)); + lck_mtx_unlock(nfs_buf_mutex); + SET(bp->nb_flags, NB_INVAL); if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { CLR(bp->nb_flags, NB_NEEDCOMMIT); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); } nfs_buf_release(bp, 1); + } else { + lck_mtx_unlock(nfs_buf_mutex); } - splx(s); } cred = ubc_getcred(vp); if (cred == NOCRED) - cred = ap->a_cred; + cred = vfs_context_ucred(ap->a_context); + p = vfs_context_proc(ap->a_context); if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; @@ -4685,11 +4978,10 @@ nfs_pageout(ap) UPL_ABORT_FREE_ON_EMPTY); return (np->n_error); } - if ((nmp->nm_flag & NFSMNT_NFSV3) && - !(nmp->nm_state & NFSSTA_GOTFSINFO)) - (void)nfs_fsinfo(nmp, vp, cred, p); + if ((nmp->nm_flag & NFSMNT_NFSV3) && !(nmp->nm_state & NFSSTA_GOTFSINFO)) + nfs_fsinfo(nmp, vp, cred, p); - if (f_offset < 0 || f_offset >= np->n_size || + if (f_offset < 0 || f_offset >= (off_t)np->n_size || f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) { if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, @@ -4700,7 +4992,7 @@ nfs_pageout(ap) ubc_upl_map(pl, &ioaddr); ioaddr += pl_offset; - if (f_offset + size > np->n_size) + if ((u_quad_t)f_offset + size > np->n_size) xsize = np->n_size - f_offset; else xsize = size; @@ -4718,16 +5010,20 @@ nfs_pageout(ap) * contents past end of the file before * releasing it in the VM page cache */ - if (f_offset < np->n_size && f_offset + size > np->n_size) { + if ((u_quad_t)f_offset < np->n_size && (u_quad_t)f_offset + size > np->n_size) { size_t io = np->n_size - f_offset; bzero((caddr_t)(ioaddr + io), size - io); FSDBG(321, np->n_size, f_offset, f_offset + io, size - io); } auio.uio_offset = f_offset; +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ auio.uio_segflg = UIO_SYSSPACE; +#else + auio.uio_segflg = UIO_SYSSPACE32; +#endif auio.uio_rw = UIO_READ; - auio.uio_procp = NULL; + auio.uio_procp = p; do { /* @@ -4737,23 +5033,23 @@ nfs_pageout(ap) * XXX Should we align these requests to block boundaries? */ iosize = min(biosize, xsize); - auio.uio_resid = iosize; + uio_uio_resid_set(&auio, iosize); aiov.iov_len = iosize; - aiov.iov_base = (caddr_t)ioaddr; - auio.uio_iov = &aiov; + aiov.iov_base = (uintptr_t)ioaddr; + auio.uio_iovs.iov32p = &aiov; auio.uio_iovcnt = 1; - FSDBG(323, auio.uio_offset, auio.uio_resid, ioaddr, xsize); -// XXX #warning our nfs_pageout does not support NQNFS - nfsstats.pageouts++; + FSDBG(323, auio.uio_offset, uio_uio_resid(&auio), ioaddr, xsize); + OSAddAtomic(1, (SInt32*)&nfsstats.pageouts); + + vnode_startwrite(vp); - vp->v_numoutput++; /* NMODIFIED would be set here if doing unstable writes */ iomode = NFSV3WRITE_FILESYNC; - error = nfs_writerpc(vp, &auio, cred, &iomode, &must_commit); + error = nfs_writerpc(vp, &auio, cred, p, &iomode, &must_commit); if (must_commit) - nfs_clearcommit(vp->v_mount); - vpwakeup(vp); + nfs_clearcommit(vnode_mount(vp)); + vnode_writedone(vp); if (error) goto cleanup; /* Note: no need to check uio_resid, because */ @@ -4790,7 +5086,7 @@ cleanup: if (!nofreeupl) { /* otherwise stacked file system has to handle this */ if (error) { - int abortflags; + int abortflags = 0; short action = nfs_pageouterrorhandler(error); switch (action) { @@ -4810,8 +5106,7 @@ cleanup: case RETRYWITHSLEEP: abortflags = UPL_ABORT_FREE_ON_EMPTY; /* pri unused. PSOCK for placeholder. */ - (void) tsleep(&lbolt, PSOCK, - "nfspageout", 0); + tsleep(&lbolt, PSOCK, "nfspageout", 0); break; case SEVER: /* not implemented */ default: @@ -4833,55 +5128,47 @@ cleanup: /* Blktooff derives file offset given a logical block number */ static int nfs_blktooff(ap) - struct vop_blktooff_args /* { - struct vnode *a_vp; - daddr_t a_lblkno; - off_t *a_offset; + struct vnop_blktooff_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + daddr64_t a_lblkno; + off_t *a_offset; } */ *ap; { int biosize; - register struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; + mount_t mp = vnode_mount(vp); - if (!vp->v_mount) + if (!mp) return (ENXIO); - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(mp)->f_iosize; - *ap->a_offset = (off_t)ap->a_lblkno * biosize; + *ap->a_offset = (off_t)(ap->a_lblkno * biosize); return (0); } static int nfs_offtoblk(ap) - struct vop_offtoblk_args /* { - struct vnode *a_vp; - off_t a_offset; - daddr_t *a_lblkno; + struct vnop_offtoblk_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + off_t a_offset; + daddr64_t *a_lblkno; } */ *ap; { int biosize; - register struct vnode *vp = ap->a_vp; + vnode_t vp = ap->a_vp; + mount_t mp = vnode_mount(vp); - if (!vp->v_mount) + if (!mp) return (ENXIO); - biosize = vp->v_mount->mnt_stat.f_iosize; + biosize = vfs_statfs(mp)->f_iosize; - *ap->a_lblkno = (daddr_t)(ap->a_offset / biosize); + *ap->a_lblkno = (daddr64_t)(ap->a_offset / biosize); return (0); } -static int -nfs_cmap(ap) - struct vop_cmap_args /* { - struct vnode *a_vp; - off_t a_offset; - size_t a_size; - daddr_t *a_bpn; - size_t *a_run; - void *a_poff; - } */ *ap; -{ - return (EOPNOTSUPP); -} + diff --git a/bsd/nfs/nfsdiskless.h b/bsd/nfs/nfsdiskless.h index 3fa123d7f..c5292026d 100644 --- a/bsd/nfs/nfsdiskless.h +++ b/bsd/nfs/nfsdiskless.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -111,33 +111,11 @@ struct nfs_dlmount { char ndm_host[MNAMELEN]; /* Host name for mount pt */ char *ndm_path; /* path name for mount pt */ u_long ndm_nfsv3; /* NFSv3 or NFSv2? */ + u_long ndm_sotype; /* SOCK_STREAM or SOCK_DGRAM? */ u_long ndm_fhlen; /* length of file handle */ u_char ndm_fh[NFSX_V3FHMAX]; /* The file's file handle */ }; -/* - * Old arguments to mount NFS - */ -struct onfs_args { - struct sockaddr *addr; /* file server address */ - int addrlen; /* length of address */ - int sotype; /* Socket type */ - int proto; /* and Protocol */ - u_char *fh; /* File handle to be mounted */ - int fhsize; /* Size, in bytes, of fh */ - int flags; /* flags */ - int wsize; /* write size in bytes */ - int rsize; /* read size in bytes */ - int readdirsize; /* readdir size in bytes */ - int timeo; /* initial timeout in .1 secs */ - int retrans; /* times to retry send */ - int maxgrouplist; /* Max. size of group list */ - int readahead; /* # of blocks to readahead */ - int leaseterm; /* Term (sec) of lease */ - int deadthresh; /* Retrans threshold */ - char *hostname; /* server's name */ -}; - struct nfs_diskless { struct nfs_dlmount nd_root; /* Mount info for root */ struct nfs_dlmount nd_private; /* Mount info for private */ diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h index cc1ac71b2..1e8de1dd5 100644 --- a/bsd/nfs/nfsm_subs.h +++ b/bsd/nfs/nfsm_subs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -75,25 +75,12 @@ /* * First define what the actual subs. return */ -struct mbuf *nfsm_reqh __P((struct vnode *vp, u_long procid, int hsiz, - caddr_t *bposp)); -struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, +int nfsm_reqh(int hsiz, caddr_t *bposp, mbuf_t *mbp); +int nfsm_rpchead(struct ucred *cr, int nmflag, int procid, int auth_type, int auth_len, char *auth_str, int verf_len, char *verf_str, - struct mbuf *mrest, int mrest_len, - struct mbuf **mbp, u_long *xidp)); - -#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define NFSMINOFF(m) \ - if (M_HASCL(m)) \ - (m)->m_data = (m)->m_ext.ext_buf; \ - else if ((m)->m_flags & M_PKTHDR) \ - (m)->m_data = (m)->m_pktdat; \ - else \ - (m)->m_data = (m)->m_dat -#define NFSMADV(m, s) (m)->m_data += (s) -#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \ - (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN)) + mbuf_t mrest, int mrest_len, + mbuf_t *mbp, u_long *xidp, mbuf_t *mreqp); /* * Now for the macros that do the simple stuff and call the functions @@ -109,27 +96,31 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, */ #define nfsm_build(a,c,s) \ - { if ((s) > M_TRAILINGSPACE(mb)) { \ - MGET(mb2, M_WAIT, MT_DATA); \ - if ((s) > MLEN) \ - panic("build > MLEN"); \ - mb->m_next = mb2; \ + { if ((s) > mbuf_trailingspace(mb)) { \ + int __nfsm_error; \ + __nfsm_error = mbuf_get(MBUF_WAITOK, MBUF_TYPE_DATA, &mb2); \ + if (__nfsm_error) \ + panic("nfsm_build mbuf_get error %d", __nfsm_error); \ + if ((s) > mbuf_maxlen(mb2)) \ + panic("nfsm_build size error"); \ + __nfsm_error = mbuf_setnext(mb, mb2); \ + if (__nfsm_error) \ + panic("nfsm_build mbuf_setnext error %d", __nfsm_error); \ mb = mb2; \ - mb->m_len = 0; \ - bpos = mtod(mb, caddr_t); \ + bpos = mbuf_data(mb); \ } \ (a) = (c)(bpos); \ - mb->m_len += (s); \ + mbuf_setlen(mb, (mbuf_len(mb) + (s))); \ bpos += (s); } #define nfsm_dissect(a, c, s) \ - { t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + { t1 = ((caddr_t)mbuf_data(md)) + mbuf_len(md) - dpos; \ if (t1 >= (s)) { \ (a) = (c)(dpos); \ dpos += (s); \ } else if ((t1 = nfsm_disct(&md, &dpos, (s), t1, &cp2))) { \ error = t1; \ - m_freem(mrep); \ + mbuf_freem(mrep); \ goto nfsmout; \ } else { \ (a) = (c)cp2; \ @@ -138,7 +129,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, #define nfsm_fhtom(v, v3) \ { if (v3) { \ t2 = nfsm_rndup(VTONFS(v)->n_fhsize) + NFSX_UNSIGNED; \ - if (t2 <= M_TRAILINGSPACE(mb)) { \ + if (t2 <= mbuf_trailingspace(mb)) { \ nfsm_build(tl, u_long *, t2); \ *tl++ = txdr_unsigned(VTONFS(v)->n_fhsize); \ *(tl + ((t2>>2) - 2)) = 0; \ @@ -147,7 +138,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, \ (caddr_t)VTONFS(v)->n_fhp, VTONFS(v)->n_fhsize))) { \ error = t2; \ - m_freem(mreq); \ + mbuf_freem(mreq); \ goto nfsmout; \ } \ } else { \ @@ -157,47 +148,67 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, #define nfsm_srvfhtom(f, v3) \ { if (v3) { \ - nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FH); \ - *tl++ = txdr_unsigned(NFSX_V3FH); \ - bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED + (unsigned)(f)->nfh_len); \ + *tl++ = txdr_unsigned((f)->nfh_len); \ + bcopy((caddr_t)&(f)->nfh_xh, (caddr_t)tl, (f)->nfh_len); \ } else { \ nfsm_build(cp, caddr_t, NFSX_V2FH); \ - bcopy((caddr_t)(f), cp, NFSX_V2FH); \ + bcopy((caddr_t)&(f)->nfh_xh, cp, NFSX_V2FH); \ } } #define nfsm_srvpostop_fh(f) \ - { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3FH); \ + { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED + (unsigned)(f)->nfh_len); \ *tl++ = nfs_true; \ - *tl++ = txdr_unsigned(NFSX_V3FH); \ - bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ + *tl++ = txdr_unsigned((f)->nfh_len); \ + bcopy((caddr_t)&(f)->nfh_xh, (caddr_t)tl, (f)->nfh_len); \ } -#define nfsm_mtofh(d, v, v3, f, x) \ - { struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \ - if (v3) { \ +#define nfsm_mtofh(d, cnp, v, v3, xp, f) \ + { \ + struct nfsnode *ttnp; u_char *ttfhp = NULL; \ + int ttfhsize = 0, ttgotfh = 1, ttgotattr = 1, ttgotnode = 0; \ + struct nfs_vattr ttvattr; \ + (v) = NULL; \ + /* XXX would be nice to not bail to nfsmout on error */ \ + if (v3) { /* check for file handle */ \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - (f) = fxdr_unsigned(int, *tl); \ - } else \ - (f) = 1; \ - if (f) { \ + ttgotfh = fxdr_unsigned(int, *tl); \ + } \ + if (ttgotfh) { \ + /* get file handle */ \ nfsm_getfh(ttfhp, ttfhsize, (v3)); \ - if ((t1 = nfs_nget((d)->v_mount, ttfhp, ttfhsize, \ - &ttnp))) { \ - error = t1; \ - m_freem(mrep); \ - goto nfsmout; \ - } \ - (v) = NFSTOV(ttnp); \ } \ - if (v3) { \ + if (v3) { /* check for attributes */ \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - if (f) \ - (f) = fxdr_unsigned(int, *tl); \ - else if (fxdr_unsigned(int, *tl)) \ + ttgotattr = fxdr_unsigned(int, *tl); \ + } \ + /* get attributes */ \ + if (ttgotattr) { \ + if (!ttgotfh) { \ nfsm_adv(NFSX_V3FATTR); \ + } else { \ + nfsm_attr_get(v3, &ttvattr); \ + } \ + } else if (ttgotfh) { \ + /* We need valid attributes in order */ \ + /* to call nfs_nget/vnode_create(). */ \ + t1 = nfs_getattr_no_vnode(vnode_mount(d), \ + ttfhp, ttfhsize, cred, p, &ttvattr, xp); \ + if (t1) \ + ttgotattr = 0; \ + } \ + if (ttgotfh && ttgotattr) { \ + int ttngflags = NG_MAKEENTRY; \ + if ((t1 = nfs_nget(vnode_mount(d), d, cnp, ttfhp, ttfhsize, \ + &ttvattr, xp, ttngflags, &ttnp))) { \ + error = t1; \ + ttgotnode = 0; \ + } else { \ + ttgotnode = 1; \ + (v) = NFSTOV(ttnp); \ + } \ } \ - if (f) \ - nfsm_loadattr((v), (struct vattr *)0, (x)); \ + (f) = ttgotnode; \ } #define nfsm_getfh(f, s, v3) \ @@ -205,38 +216,72 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (((s) = fxdr_unsigned(int, *tl)) <= 0 || \ (s) > NFSX_V3FHMAX) { \ - m_freem(mrep); \ + mbuf_freem(mrep); \ error = EBADRPC; \ goto nfsmout; \ } \ - } else \ + } else { \ (s) = NFSX_V2FH; \ - nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); } + } \ + nfsm_dissect((f), u_char *, nfsm_rndup(s)); } -#define nfsm_loadattr(v, a, x) \ - { struct vnode *ttvp = (v); \ - if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a), 0, \ - (x)))) { \ +#define nfsm_loadattr(v, v3, a, x) \ + { struct nfs_vattr ttvattr; \ + if ((t1 = nfs_parsefattr(&md, &dpos, v3, &ttvattr))) { \ error = t1; \ - m_freem(mrep); \ + mbuf_freem(mrep); \ goto nfsmout; \ } \ - (v) = ttvp; } + if ((t1 = nfs_loadattrcache(VTONFS(v), &ttvattr, (x), 0))) { \ + error = t1; \ + mbuf_freem(mrep); \ + goto nfsmout; \ + } \ + if (a) { \ + bcopy(&ttvattr, (a), sizeof(ttvattr)); \ + } \ + } -#define nfsm_postop_attr(v, f, x) \ - { struct vnode *ttvp = (v); \ +#define nfsm_attr_get(v3, vap) \ + { \ + if ((t1 = nfs_parsefattr(&md, &dpos, v3, vap))) { \ + error = t1; \ + mbuf_freem(mrep); \ + goto nfsmout; \ + } \ + } + +#define nfsm_postop_attr_get(v3, f, vap) \ + { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (((f) = fxdr_unsigned(int, *tl))) { \ - if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \ - (struct vattr *)0, 1, (x)))) { \ + if ((t1 = nfs_parsefattr(&md, &dpos, v3, vap))) { \ error = t1; \ (f) = 0; \ - m_freem(mrep); \ + mbuf_freem(mrep); \ + goto nfsmout; \ + } \ + } } + +#define nfsm_postop_attr_update(v, v3, f, x) \ + { \ + nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ + if (((f) = fxdr_unsigned(int, *tl))) { \ + struct nfs_vattr ttvattr; \ + if ((t1 = nfs_parsefattr(&md, &dpos, v3, &ttvattr))) { \ + error = t1; \ + (f) = 0; \ + mbuf_freem(mrep); \ + goto nfsmout; \ + } \ + if ((t1 = nfs_loadattrcache(VTONFS(v), &ttvattr, (x), 1))) { \ + error = t1; \ + (f) = 0; \ + mbuf_freem(mrep); \ goto nfsmout; \ } \ if (*(x) == 0) \ (f) = 0; \ - (v) = ttvp; \ } } #define nfsm_wcc_data(v, premtime, newpostattr, x) \ @@ -244,29 +289,84 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); \ - (premtime) = fxdr_unsigned(time_t, *(tl + 2)); \ + (premtime)->tv_sec = fxdr_unsigned(time_t, *(tl + 2)); \ + (premtime)->tv_nsec = fxdr_unsigned(time_t, *(tl + 3)); \ } else { \ - (premtime) = 0; \ + (premtime)->tv_sec = 0; \ + (premtime)->tv_nsec = 0; \ } \ - nfsm_postop_attr((v), (newpostattr), (x)); \ + nfsm_postop_attr_update((v), 1, (newpostattr), (x)); \ } -#define nfsm_v3sattr(s, a, u, g) \ - { (s)->sa_modetrue = nfs_true; \ - (s)->sa_mode = vtonfsv3_mode((a)->va_mode); \ - (s)->sa_uidtrue = nfs_true; \ - (s)->sa_uid = txdr_unsigned(u); \ - (s)->sa_gidtrue = nfs_true; \ - (s)->sa_gid = txdr_unsigned(g); \ - (s)->sa_sizefalse = nfs_false; \ - (s)->sa_atimetype = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ - (s)->sa_mtimetype = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ +#define nfsm_v3sattr(vap) \ + {\ + struct timeval now; \ + if (VATTR_IS_ACTIVE(vap, va_mode)) { \ + nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned(vap->va_mode); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if (VATTR_IS_ACTIVE(vap, va_uid)) { \ + nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned(vap->va_uid); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if (VATTR_IS_ACTIVE(vap, va_gid)) { \ + nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + *tl = txdr_unsigned(vap->va_gid); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + if (VATTR_IS_ACTIVE(vap, va_data_size)) { \ + nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); \ + *tl++ = nfs_true; \ + txdr_hyper(&vap->va_data_size, tl); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = nfs_false; \ + } \ + microtime(&now); \ + if (VATTR_IS_ACTIVE(vap, va_access_time)) { \ + if (vap->va_access_time.tv_sec != now.tv_sec) { \ + nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); \ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); \ + txdr_nfsv3time(&vap->va_access_time, tl); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ + if (VATTR_IS_ACTIVE(vap, va_modify_time)) { \ + if (vap->va_modify_time.tv_sec != now.tv_sec) { \ + nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); \ + *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); \ + txdr_nfsv3time(&vap->va_modify_time, tl); \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); \ + } \ + } else { \ + nfsm_build(tl, u_long *, NFSX_UNSIGNED); \ + *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); \ + } \ } -#define nfsm_strsiz(s,m) \ +#define nfsm_strsiz(s,m,v3) \ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \ - if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \ - m_freem(mrep); \ + (s) = fxdr_unsigned(long,*tl); \ + if (!(v3) && ((s) > (m))) { \ + mbuf_freem(mrep); \ error = EBADRPC; \ goto nfsmout; \ } } @@ -278,9 +378,10 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, nfsm_reply(0); \ } } -#define nfsm_srvnamesiz(s) \ +#define nfsm_srvnamesiz(s,v3) \ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \ - if (((s) = fxdr_unsigned(long,*tl)) > NFS_MAXNAMLEN) \ + (s) = fxdr_unsigned(long,*tl); \ + if (!(v3) && ((s) > NFS_MAXNAMLEN)) \ error = NFSERR_NAMETOL; \ if ((s) <= 0) \ error = EBADRPC; \ @@ -292,67 +393,50 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, if ((s) > 0 && \ (t1 = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \ error = t1; \ - m_freem(mrep); \ + mbuf_freem(mrep); \ goto nfsmout; \ } #define nfsm_uiotom(p,s) \ if ((t1 = nfsm_uiotombuf((p),&mb,(s),&bpos))) { \ error = t1; \ - m_freem(mreq); \ + mbuf_freem(mreq); \ goto nfsmout; \ } -#define nfsm_reqhead(v,a,s) \ - mb = mreq = nfsm_reqh((v),(a),(s),&bpos) +#define nfsm_reqhead(s) \ + error = nfsm_reqh((s), &bpos, &mreq); \ + mb = mreq; -#define nfsm_reqdone m_freem(mrep); \ +#define nfsm_reqdone mbuf_freem(mrep); \ nfsmout: #define nfsm_rndup(a) (((a)+3)&(~0x3)) -/* -* We seem to see cases mainly on shutdown where the vnode got recycled -* on use while waiting on server. Maybe nfs vnode locking will help if -* we implement that, but for now, check for bad vnodes and return an -* error. This call spot should catch most of them. Note that NFSv2 -* just goes to nfsmout here, while nfsV3 goes back to caller's next -* line for post-processing. It will do a nfsm_reqdone also making -* m_freem(mrep). Wondering if some of our freeing problems could be -* due to nfsv3 calling nfsm_reqdone unlike nfsv2. Separate problem. -*/ #define nfsm_request(v, t, p, c, x) \ - { \ - int nfsv3; \ - if (!VFSTONFS((v)->v_mount)) { \ - error = ENXIO; \ - goto nfsmout; \ - } \ - nfsv3 = (VFSTONFS((v)->v_mount))->nm_flag & NFSMNT_NFSV3; \ - if ((error = nfs_request((v), mreq, (t), (p), \ + if ((error = nfs_request((v), vnode_mount(v), mreq, (t), (p), \ (c), &mrep, &md, &dpos, (x)))) { \ if (error & NFSERR_RETERR) \ error &= ~NFSERR_RETERR; \ else \ goto nfsmout; \ - } \ } -#define nfsm_strtom(a,s,m) \ - if ((s) > (m)) { \ - m_freem(mreq); \ +#define nfsm_strtom(a,s,m,v3) \ + if (!(v3) && ((s) > (m))) { \ + mbuf_freem(mreq); \ error = ENAMETOOLONG; \ goto nfsmout; \ } \ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \ - if (t2 <= M_TRAILINGSPACE(mb)) { \ + if (t2 <= mbuf_trailingspace(mb)) { \ nfsm_build(tl,u_long *,t2); \ *tl++ = txdr_unsigned(s); \ *(tl+((t2>>2)-2)) = 0; \ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \ } else if ((t2 = nfsm_strtmbuf(&mb, &bpos, (a), (s)))) { \ error = t2; \ - m_freem(mreq); \ + mbuf_freem(mreq); \ goto nfsmout; \ } @@ -364,68 +448,102 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, { \ nfsd->nd_repstat = error; \ if (error && !(nfsd->nd_flag & ND_NFSV3)) \ - (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ - mrq, &mb, &bpos); \ + nfs_rephead(0, nfsd, slp, error, mrq, &mb, &bpos); \ else \ - (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ - mrq, &mb, &bpos); \ - m_freem(mrep); \ + nfs_rephead((s), nfsd, slp, error, mrq, &mb, &bpos); \ + mbuf_freem(mrep); \ mrep = NULL; \ mreq = *mrq; \ if (error && (!(nfsd->nd_flag & ND_NFSV3) || \ - error == EBADRPC)) \ - return(0); \ + error == EBADRPC)) { \ + error = 0; \ + goto nfsmout; \ + } \ } #define nfsm_writereply(s, v3) \ { \ nfsd->nd_repstat = error; \ if (error && !(v3)) \ - (void) nfs_rephead(0, nfsd, slp, error, cache, &frev, \ - &mreq, &mb, &bpos); \ + nfs_rephead(0, nfsd, slp, error, &mreq, &mb, &bpos); \ else \ - (void) nfs_rephead((s), nfsd, slp, error, cache, &frev, \ - &mreq, &mb, &bpos); \ + nfs_rephead((s), nfsd, slp, error, &mreq, &mb, &bpos); \ } #define nfsm_adv(s) \ - { t1 = mtod(md, caddr_t)+md->m_len-dpos; \ + { t1 = ((caddr_t)mbuf_data(md)) + mbuf_len(md) - dpos; \ if (t1 >= (s)) { \ dpos += (s); \ } else if ((t1 = nfs_adv(&md, &dpos, (s), t1))) { \ error = t1; \ - m_freem(mrep); \ + mbuf_freem(mrep); \ goto nfsmout; \ } } #define nfsm_srvmtofh(f) \ - { if (nfsd->nd_flag & ND_NFSV3) { \ + { \ + if (nfsd->nd_flag & ND_NFSV3) { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - if (fxdr_unsigned(int, *tl) != NFSX_V3FH) { \ + (f)->nfh_len = fxdr_unsigned(int, *tl); \ + if (((f)->nfh_len < (int)sizeof(struct nfs_exphandle)) || \ + ((f)->nfh_len > NFSX_V3FHMAX)) { \ error = EBADRPC; \ nfsm_reply(0); \ } \ + } else { \ + (f)->nfh_len = NFSX_V2FH; \ } \ - nfsm_dissect(tl, u_long *, NFSX_V3FH); \ - bcopy((caddr_t)tl, (caddr_t)(f), NFSX_V3FH); \ - if ((nfsd->nd_flag & ND_NFSV3) == 0) \ - nfsm_adv(NFSX_V2FH - NFSX_V3FH); \ + nfsm_dissect(tl, u_long *, (f)->nfh_len); \ + bcopy((caddr_t)tl, (caddr_t)&(f)->nfh_xh, (f)->nfh_len); \ } #define nfsm_clget \ if (bp >= be) { \ + int __nfsm_error, __nfsm_len; \ if (mp == mb) \ - mp->m_len += bp-bpos; \ - MGET(mp, M_WAIT, MT_DATA); \ - MCLGET(mp, M_WAIT); \ - mp->m_len = NFSMSIZ(mp); \ - mp2->m_next = mp; \ + mbuf_setlen(mp, mbuf_len(mp) + bp - bpos); \ + mp = NULL; \ + __nfsm_error = mbuf_mclget(MBUF_WAITOK, MBUF_TYPE_DATA, &mp); \ + if (__nfsm_error) \ + panic("nfsm_clget: mbuf_mclget error %d", __nfsm_error); \ + __nfsm_len = mbuf_maxlen(mp); \ + mbuf_setlen(mp, __nfsm_len); \ + __nfsm_error = mbuf_setnext(mp2, mp); \ + if (__nfsm_error) \ + panic("nfsm_clget: mbuf_setnext error %d", __nfsm_error); \ mp2 = mp; \ - bp = mtod(mp, caddr_t); \ - be = bp+mp->m_len; \ + bp = mbuf_data(mp); \ + be = bp + __nfsm_len; \ } \ tl = (u_long *)bp +#define nfsm_srv_vattr_init(vap, v3) \ + { \ + VATTR_INIT(vap); \ + VATTR_WANTED((vap), va_type); \ + VATTR_WANTED((vap), va_mode); \ + VATTR_WANTED((vap), va_nlink); \ + VATTR_WANTED((vap), va_uid); \ + VATTR_WANTED((vap), va_gid); \ + VATTR_WANTED((vap), va_data_size); \ + VATTR_WANTED((vap), va_data_alloc); \ + VATTR_WANTED((vap), va_rdev); \ + VATTR_WANTED((vap), va_fsid); \ + VATTR_WANTED((vap), va_fileid); \ + VATTR_WANTED((vap), va_access_time); \ + VATTR_WANTED((vap), va_modify_time); \ + VATTR_WANTED((vap), va_change_time); \ + if (!v3) VATTR_WANTED((vap), va_iosize); \ + } + +#define nfsm_srv_pre_vattr_init(vap, v3) \ + { \ + VATTR_INIT(vap); \ + VATTR_WANTED((vap), va_data_size); \ + VATTR_WANTED((vap), va_modify_time); \ + VATTR_WANTED((vap), va_change_time); \ + } + #define nfsm_srvfillattr(a, f) \ nfsm_srvfattr(nfsd, (a), (f)) @@ -437,48 +555,49 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, #define nfsm_srvsattr(a) \ { \ - struct timeval now; \ + struct timespec now; \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - (a)->va_mode = nfstov_mode(*tl); \ + VATTR_SET(a, va_mode, nfstov_mode(*tl)); \ } \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - (a)->va_uid = fxdr_unsigned(uid_t, *tl); \ + VATTR_SET(a, va_uid, fxdr_unsigned(uid_t, *tl)); \ } \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - (a)->va_gid = fxdr_unsigned(gid_t, *tl); \ + VATTR_SET(a, va_gid, fxdr_unsigned(gid_t, *tl)); \ } \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); \ - fxdr_hyper(tl, &(a)->va_size); \ + fxdr_hyper(tl, &(a)->va_data_size); \ + VATTR_SET_ACTIVE(a, va_data_size); \ } \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ - microtime(&now); \ + nanotime(&now); \ switch (fxdr_unsigned(int, *tl)) { \ case NFSV3SATTRTIME_TOCLIENT: \ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); \ - fxdr_nfsv3time(tl, &(a)->va_atime); \ + fxdr_nfsv3time(tl, &(a)->va_access_time); \ + VATTR_SET_ACTIVE(a, va_access_time); \ break; \ case NFSV3SATTRTIME_TOSERVER: \ - (a)->va_atime.tv_sec = now.tv_sec; \ - (a)->va_atime.tv_nsec = now.tv_usec * 1000; \ + VATTR_SET(a, va_access_time, now); \ break; \ }; \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ switch (fxdr_unsigned(int, *tl)) { \ case NFSV3SATTRTIME_TOCLIENT: \ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); \ - fxdr_nfsv3time(tl, &(a)->va_mtime); \ + fxdr_nfsv3time(tl, &(a)->va_modify_time); \ + VATTR_SET_ACTIVE(a, va_modify_time); \ break; \ case NFSV3SATTRTIME_TOSERVER: \ - (a)->va_mtime.tv_sec = now.tv_sec; \ - (a)->va_mtime.tv_nsec = now.tv_usec * 1000; \ + VATTR_SET(a, va_modify_time, now); \ break; \ }; } diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index 577dde099..0c97699ad 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -74,14 +74,13 @@ struct nfsmount { int nm_flag; /* Flags for soft/hard... */ int nm_state; /* Internal state flags */ - struct mount *nm_mountp; /* Vfs structure for this filesystem */ + mount_t nm_mountp; /* Vfs structure for this filesystem */ int nm_numgrps; /* Max. size of groupslist */ struct vnode *nm_dvp; /* root directory vnode pointer */ - struct socket *nm_so; /* Rpc socket */ + socket_t nm_so; /* Rpc socket */ int nm_sotype; /* Type of socket */ int nm_soproto; /* and protocol */ - int nm_soflags; /* pr_flags for socket protocol */ - struct mbuf *nm_nam; /* Addr of server */ + mbuf_t nm_nam; /* Addr of server */ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ int nm_retry; /* Max retries */ int nm_srtt[4]; /* Timers for rpcs */ @@ -89,14 +88,14 @@ struct nfsmount { int nm_sent; /* Request send count */ int nm_cwnd; /* Request send window */ int nm_timeouts; /* Request timeouts */ - int nm_deadthresh; /* Threshold of timeouts-->dead server*/ int nm_rsize; /* Max size of read rpc */ int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ - int nm_leaseterm; /* Term (sec) for NQNFS lease */ - CIRCLEQ_HEAD(, nfsnode) nm_timerhead; /* Head of lease timer queue */ - struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */ + int nm_acregmin; /* reg file min attr cache timeout */ + int nm_acregmax; /* reg file max attr cache timeout */ + int nm_acdirmin; /* dir min attr cache timeout */ + int nm_acdirmax; /* dir max attr cache timeout */ uid_t nm_authuid; /* Uid for authenticator */ int nm_authtype; /* Authenticator type */ int nm_authlen; /* and length */ @@ -114,14 +113,21 @@ struct nfsmount { int nm_bufqiods; /* number of iods processing queue */ int nm_tprintf_initial_delay; /* delay first "server down" */ int nm_tprintf_delay; /* delay between "server down" */ + struct { /* fsinfo & (homogenous) pathconf info */ + u_int64_t maxfilesize; /* max size of a file */ + u_long linkmax; /* max # hard links to an object */ + u_long namemax; /* max length of filename component */ + u_char pcflags; /* boolean pathconf properties */ + u_char fsproperties; /* fsinfo properties */ + } nm_fsinfo; }; #if defined(KERNEL) /* - * Convert mount ptr to nfsmount ptr. + * Convert mount_t to struct nfsmount* */ -#define VFSTONFS(mp) ((mp) ? ((struct nfsmount *)((mp)->mnt_data)) : NULL) +#define VFSTONFS(mp) ((mp) ? ((struct nfsmount *)vfs_fsprivate(mp)) : NULL) #ifndef NFS_TPRINTF_INITIAL_DELAY #define NFS_TPRINTF_INITIAL_DELAY 12 diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index 53f821217..ada189445 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,8 +69,6 @@ #ifndef _NFS_NFS_H_ #include #endif -#include - /* * Silly rename structure that hangs off the nfsnode until the name @@ -78,7 +76,7 @@ */ struct sillyrename { struct ucred *s_cred; - struct vnode *s_dvp; + vnode_t s_dvp; long s_namlen; char s_name[20]; }; @@ -107,9 +105,11 @@ struct nfsbuf { LIST_ENTRY(nfsbuf) nb_vnbufs; /* vnode's nfsbuf chain */ TAILQ_ENTRY(nfsbuf) nb_free; /* free list position if not active. */ volatile long nb_flags; /* NB_* flags. */ - time_t nb_timestamp; /* buffer timestamp */ + volatile long nb_lflags; /* NBL_* flags. */ + volatile long nb_refs; /* outstanding references. */ long nb_bufsize; /* buffer size */ - daddr_t nb_lblkno; /* logical block number. */ + daddr64_t nb_lblkno; /* logical block number. */ + time_t nb_timestamp; /* buffer timestamp */ int nb_error; /* errno value. */ u_int32_t nb_valid; /* valid pages in buf */ u_int32_t nb_dirty; /* dirty pages in buf */ @@ -118,20 +118,27 @@ struct nfsbuf { int nb_dirtyoff; /* offset in buffer of dirty region. */ int nb_dirtyend; /* offset of end of dirty region. */ caddr_t nb_data; /* mapped buffer */ - struct vnode * nb_vp; /* device vnode */ - struct proc * nb_proc; /* associated proc; NULL if kernel. */ + vnode_t nb_vp; /* device vnode */ + proc_t nb_proc; /* associated proc; NULL if kernel. */ struct ucred * nb_rcred; /* read credentials reference */ struct ucred * nb_wcred; /* write credentials reference */ void * nb_pagelist; /* upl */ }; +/* + * These flags are kept in b_lflags... + * nfs_buf_mutex must be held before examining/updating + */ +#define NBL_BUSY 0x00000001 /* I/O in progress. */ +#define NBL_WANTED 0x00000002 /* Process wants this buffer. */ + /* * These flags are kept in nb_flags and they're (purposefully) * very similar to the B_* flags for struct buf. + * nfs_buf_mutex is not needed to examine/update these. */ #define NB_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ #define NB_ASYNC 0x00000004 /* Start I/O, do not wait. */ -#define NB_BUSY 0x00000010 /* I/O in progress. */ #define NB_CACHE 0x00000020 /* Bread found us in the cache. */ #define NB_STABLE 0x00000040 /* write FILESYNC not UNSTABLE. */ #define NB_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ @@ -143,19 +150,41 @@ struct nfsbuf { #define NB_NOCACHE 0x00008000 /* Do not cache block after use. */ #define NB_READ 0x00100000 /* Read buffer. */ #define NB_PAGELIST 0x00400000 /* Buffer describes pagelist I/O. */ -#define NB_WANTED 0x00800000 /* Process wants this buffer. */ #define NB_WRITE 0x00000000 /* Write buffer (pseudo flag). */ #define NB_WRITEINPROG 0x01000000 /* Write in progress. */ #define NB_META 0x40000000 /* buffer contains meta-data. */ #define NB_IOD 0x80000000 /* buffer being handled by nfsiod. */ - +/* Flags for operation type in nfs_buf_get() */ +#define NBLK_READ 0x00000001 /* buffer for read */ +#define NBLK_WRITE 0x00000002 /* buffer for write */ +#define NBLK_META 0x00000004 /* buffer for metadata */ +#define NBLK_OPMASK 0x00000007 /* operation mask */ +/* modifiers for above flags... */ +#define NBLK_NOWAIT 0x40000000 /* don't wait on busy buffer */ +#define NBLK_ONLYVALID 0x80000000 /* only return cached buffer */ + +/* These flags are used for nfsbuf iterating */ +#define NBI_ITER 0x01 /* iteration in progress */ +#define NBI_ITERWANT 0x02 /* waiting to iterate */ +#define NBI_CLEAN 0x04 /* requesting clean buffers */ +#define NBI_DIRTY 0x08 /* requesting dirty buffers */ +#define NBI_NOWAIT 0x10 /* don't block on NBI_ITER */ + +/* Flags for nfs_buf_acquire */ +#define NBAC_NOWAIT 0x01 /* Don't wait if buffer is busy */ +#define NBAC_REMOVE 0x02 /* Remove from free list once buffer is acquired */ + +/* some convenience macros... */ #define NBOFF(BP) ((off_t)(BP)->nb_lblkno * (off_t)(BP)->nb_bufsize) #define NBPGVALID(BP,P) (((BP)->nb_valid >> (P)) & 0x1) #define NBPGDIRTY(BP,P) (((BP)->nb_dirty >> (P)) & 0x1) #define NBPGVALID_SET(BP,P) ((BP)->nb_valid |= (1 << (P))) #define NBPGDIRTY_SET(BP,P) ((BP)->nb_dirty |= (1 << (P))) +#define NBUFSTAMPVALID(BP) ((BP)->nb_timestamp != ~0) +#define NBUFSTAMPINVALIDATE(BP) ((BP)->nb_timestamp = ~0) + #define NFS_BUF_MAP(BP) \ do { \ if (!(BP)->nb_data && nfs_buf_map(BP)) \ @@ -167,33 +196,58 @@ TAILQ_HEAD(nfsbuffreehead, nfsbuf); #define NFSNOLIST ((struct nfsbuf *)0xdeadbeef) -extern int nfsbufhashlock, nfsbufcnt, nfsbufmin, nfsbufmax; +extern lck_mtx_t *nfs_buf_mutex; +extern int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; extern int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer; extern int nfs_nbdwrite; extern struct nfsbuffreehead nfsbuffree, nfsbufdelwri; extern time_t nfsbuffreeuptimestamp; -#define NFSBUFCNTCHK() \ +#define NFSBUFCNTCHK(locked) \ do { \ + if (!locked) lck_mtx_lock(nfs_buf_mutex); \ if ( (nfsbufcnt < 0) || \ (nfsbufcnt > nfsbufmax) || \ + (nfsbufmetacnt < 0) || \ + (nfsbufmetacnt > nfsbufmetamax) || \ + (nfsbufmetacnt > nfsbufcnt) || \ (nfsbuffreecnt < 0) || \ (nfsbuffreecnt > nfsbufmax) || \ (nfsbuffreecnt > nfsbufcnt) || \ (nfsbuffreemetacnt < 0) || \ (nfsbuffreemetacnt > nfsbufmax) || \ (nfsbuffreemetacnt > nfsbufcnt) || \ + (nfsbuffreemetacnt > nfsbufmetamax) || \ + (nfsbuffreemetacnt > nfsbufmetacnt) || \ (nfsbufdelwricnt < 0) || \ (nfsbufdelwricnt > nfsbufmax) || \ (nfsbufdelwricnt > nfsbufcnt) || \ (nfs_nbdwrite < 0) || \ (nfs_nbdwrite > nfsbufcnt) || \ 0) \ - panic("nfsbuf count error: max %d cnt %d free %d meta %d delwr %d bdw %d\n", \ - nfsbufmax, nfsbufcnt, nfsbuffreecnt, nfsbuffreemetacnt, \ + panic("nfsbuf count error: max %d meta %d cnt %d meta %d free %d meta %d delwr %d bdw %d\n", \ + nfsbufmax, nfsbufmetamax, nfsbufcnt, nfsbufmetacnt, nfsbuffreecnt, nfsbuffreemetacnt, \ nfsbufdelwricnt, nfs_nbdwrite); \ + if (!locked) lck_mtx_unlock(nfs_buf_mutex); \ } while (0) +struct nfs_vattr { + enum vtype nva_type; /* vnode type (for create) */ + u_short nva_mode; /* files access mode and type */ + dev_t nva_rdev; /* device the special file represents */ + uid_t nva_uid; /* owner user id */ + gid_t nva_gid; /* owner group id */ + uint32_t nva_fsid; /* file system id (dev for now) */ + uint64_t nva_nlink; /* number of references to file */ + uint64_t nva_fileid; /* file id */ + uint64_t nva_size; /* file size in bytes */ + uint64_t nva_bytes; /* bytes of disk space held by file */ + uint32_t nva_blocksize; /* blocksize preferred for i/o */ + struct timespec nva_atime; /* time of last access */ + struct timespec nva_mtime; /* time of last modification */ + struct timespec nva_ctime; /* time file changed */ +}; + /* * The nfsnode is the nfs equivalent to ufs's inode. Any similarity * is purely coincidental. @@ -202,30 +256,25 @@ extern time_t nfsbuffreeuptimestamp; * An nfsnode is 'named' by its file handle. (nget/nfs_node.c) * If this structure exceeds 256 bytes (it is currently 256 using 4.4BSD-Lite * type definitions), file handles of > 32 bytes should probably be split out - * into a separate MALLOC()'d data structure. (Reduce the size of nfsfh_t by - * changing the definition in sys/mount.h of NFS_SMALLFH.) + * into a separate MALLOC()'d data structure. (Reduce the size of nfsnode.n_fh + * by changing the definition in nfsproto.h of NFS_SMALLFH.) * NB: Hopefully the current order of the fields is such that everything will * be well aligned and, therefore, tightly packed. */ struct nfsnode { - struct lock__bsd__ n_lock; /* the vnode lock */ LIST_ENTRY(nfsnode) n_hash; /* Hash chain */ - CIRCLEQ_ENTRY(nfsnode) n_timer; /* Nqnfs timer chain */ u_quad_t n_size; /* Current size of file */ - u_quad_t n_brev; /* Modify rev when cached */ - u_quad_t n_lrev; /* Modify rev for lease */ - struct vattr n_vattr; /* Vnode attribute cache */ + struct nfs_vattr n_vattr; /* Vnode attribute cache */ time_t n_attrstamp; /* Attr. cache timestamp */ u_int32_t n_mode; /* ACCESS mode cache */ uid_t n_modeuid; /* credentials having mode */ time_t n_modestamp; /* mode cache timestamp */ - time_t n_mtime; /* Prev modify time. */ - time_t n_ncmtime; /* namecache modify time. */ - time_t n_expiry; /* Lease expiry time */ - nfsfh_t *n_fhp; /* NFS File Handle */ + struct timespec n_mtime; /* Prev modify time. */ + struct timespec n_ncmtime; /* namecache modify time. */ + u_char *n_fhp; /* NFS File Handle */ union { - struct vnode *n_vp; /* associated vnode */ - struct mount *n_mp; /* associated mount (NINIT) */ + vnode_t n_vp; /* associated vnode */ + mount_t n_mp; /* associated mount (NINIT) */ } n_un0; struct lockf *n_lockf; /* Locking record of file */ int n_error; /* Save write error value */ @@ -243,13 +292,19 @@ struct nfsnode { } n_un3; short n_fhsize; /* size in bytes, of fh */ short n_flag; /* Flag for locking.. */ - nfsfh_t n_fh; /* Small File Handle */ + u_char n_fh[NFS_SMALLFH];/* Small File Handle */ u_int64_t n_xid; /* last xid to loadattr */ struct nfsbuflists n_cleanblkhd; /* clean blocklist head */ struct nfsbuflists n_dirtyblkhd; /* dirty blocklist head */ int n_needcommitcnt;/* # bufs that need committing */ + int n_bufiterflags; /* buf iterator flags */ }; +#define nfstimespeccmp(tvp, uvp, cmp) \ + (((tvp)->tv_sec == (uvp)->tv_sec) ? \ + ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) + #define CHECK_NEEDCOMMITCNT(np) \ do { \ if ((np)->n_needcommitcnt < 0) { \ @@ -274,9 +329,9 @@ struct nfsnode { #define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */ #define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x0008 /* Flag write errors so close will know */ -#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */ -#define NQNFSWRITE 0x0040 /* Write lease */ -#define NQNFSEVICTED 0x0080 /* Has been evicted */ +#define NNEEDINVALIDATE 0x0010 /* need to call vinvalbuf() */ +#define NNOCACHE 0x0020 /* all bufs are uncached */ +#define NWRBUSY 0x0040 /* node in write/fsync */ #define NACC 0x0100 /* Special file accessed */ #define NUPD 0x0200 /* Special file updated */ #define NCHG 0x0400 /* Special file times changed */ @@ -284,64 +339,86 @@ struct nfsnode { #define NINIT 0x2000 /* node is being initialized */ #define NWINIT 0x4000 /* someone waiting for init to complete */ +#define NATTRVALID(np) ((np)->n_attrstamp != ~0) +#define NATTRINVALIDATE(np) ((np)->n_attrstamp = ~0) +#define NMODEVALID(np) ((np)->n_modestamp != ~0) +#define NMODEINVALIDATE(np) ((np)->n_modestamp = ~0) + +#define NVALIDBUFS(np) (!LIST_EMPTY(&(np)->n_dirtyblkhd) || \ + !LIST_EMPTY(&(np)->n_cleanblkhd)) + +/* + * NFS-specific flags for nfs_vinvalbuf/nfs_flush + */ +#define V_IGNORE_WRITEERR 0x8000 + +/* + * Flags for nfs_nget() + */ +#define NG_MARKROOT 0x0001 /* mark vnode as root of FS */ +#define NG_MAKEENTRY 0x0002 /* add name cache entry for vnode */ + /* * Convert between nfsnode pointers and vnode pointers */ -#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data) -#define NFSTOV(np) ((struct vnode *)(np)->n_vnode) +#define VTONFS(vp) ((struct nfsnode *)vnode_fsnode(vp)) +#define NFSTOV(np) ((np)->n_vnode) + +/* nfsnode hash table mutex */ +extern lck_mtx_t *nfs_node_hash_mutex; /* - * Queue head for nfsiod's + * nfsiod structures */ -extern TAILQ_HEAD(nfs_bufq, buf) nfs_bufq; -extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; +extern proc_t nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; +extern lck_grp_t *nfs_iod_lck_grp; +extern lck_grp_attr_t *nfs_iod_lck_grp_attr; +extern lck_attr_t *nfs_iod_lck_attr; +extern lck_mtx_t *nfs_iod_mutex; #if defined(KERNEL) -typedef int vop_t __P((void *)); -extern vop_t **fifo_nfsv2nodeop_p; -extern vop_t **nfsv2_vnodeop_p; -extern vop_t **spec_nfsv2nodeop_p; +typedef int vnop_t(void *); +extern vnop_t **fifo_nfsv2nodeop_p; +extern vnop_t **nfsv2_vnodeop_p; +extern vnop_t **spec_nfsv2nodeop_p; /* * Prototypes for NFS vnode operations */ -int nfs_write __P((struct vop_write_args *)); -#define nfs_lease_check ((int (*) __P((struct vop_lease_args *)))nullop) -#define nqnfs_vop_lease_check lease_check -int nqnfs_vop_lease_check __P((struct vop_lease_args *)); -#define nfs_revoke vop_revoke -#define nfs_seek ((int (*) __P((struct vop_seek_args *)))nullop) -int nfs_inactive __P((struct vop_inactive_args *)); -int nfs_reclaim __P((struct vop_reclaim_args *)); -int nfs_lock __P((struct vop_lock_args *)); -int nfs_unlock __P((struct vop_unlock_args *)); -int nfs_islocked __P((struct vop_islocked_args *)); - -#define nfs_reallocblks \ - ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp) +int nfs_write(struct vnop_write_args *); +#define nfs_revoke nop_revoke +#define nfs_seek ((int (*)(struct vnop_seek_args *))nullop) //XXXdead? +int nfs_inactive(struct vnop_inactive_args *); +int nfs_reclaim(struct vnop_reclaim_args *); -/* other stuff */ -int nfs_removeit __P((struct sillyrename *)); -int nfs_nget __P((struct mount *,nfsfh_t *,int,struct nfsnode **)); -nfsuint64 *nfs_getcookie __P((struct nfsnode *, off_t, int)); -void nfs_invaldir __P((struct vnode *)); -#define nqnfs_lease_updatetime lease_updatetime +/* other stuff */ +int nfs_removeit(struct sillyrename *); +int nfs_nget(mount_t,vnode_t,struct componentname *,u_char *,int,struct nfs_vattr *,u_int64_t *,int,struct nfsnode **); +nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int); +void nfs_invaldir(vnode_t); /* nfsbuf functions */ void nfs_nbinit(void); void nfs_buf_remfree(struct nfsbuf *); -struct nfsbuf * nfs_buf_incore(struct vnode *, daddr_t); -struct nfsbuf * nfs_buf_get(struct vnode *, daddr_t, int, struct proc *, int); +boolean_t nfs_buf_is_incore(vnode_t, daddr64_t); +struct nfsbuf * nfs_buf_incore(vnode_t, daddr64_t); +int nfs_buf_get(vnode_t, daddr64_t, int, proc_t, int, struct nfsbuf **); int nfs_buf_upl_setup(struct nfsbuf *bp); void nfs_buf_upl_check(struct nfsbuf *bp); void nfs_buf_release(struct nfsbuf *, int); int nfs_buf_iowait(struct nfsbuf *); void nfs_buf_iodone(struct nfsbuf *); -void nfs_buf_write_delayed(struct nfsbuf *); +void nfs_buf_write_delayed(struct nfsbuf *, proc_t); void nfs_buf_freeup(int); +void nfs_buf_refget(struct nfsbuf *bp); +void nfs_buf_refrele(struct nfsbuf *bp); +void nfs_buf_drop(struct nfsbuf *); +errno_t nfs_buf_acquire(struct nfsbuf *, int, int, int); +int nfs_buf_iterprepare(struct nfsnode *, struct nfsbuflists *, int); +void nfs_buf_itercomplete(struct nfsnode *, struct nfsbuflists *, int); #endif /* KERNEL */ diff --git a/bsd/nfs/nfsproto.h b/bsd/nfs/nfsproto.h index eebd0ee26..d44115245 100644 --- a/bsd/nfs/nfsproto.h +++ b/bsd/nfs/nfsproto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -85,6 +85,7 @@ #define NFS_VER3 3 #define NFS_V2MAXDATA 8192 #define NFS_MAXDGRAMDATA 16384 +#define NFS_PREFDGRAMDATA 8192 #define NFS_MAXDATA (60*1024) // XXX not ready for 64K-128K #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 @@ -145,11 +146,9 @@ #define NFSX_V2STATFS 20 /* specific to NFS Version 3 */ -#define NFSX_V3FH (sizeof (fhandle_t)) /* size this server uses */ #define NFSX_V3FHMAX 64 /* max. allowed by protocol */ #define NFSX_V3FATTR 84 #define NFSX_V3SATTR 60 /* max. all fields filled in */ -#define NFSX_V3SRVSATTR (sizeof (struct nfsv3_sattr)) #define NFSX_V3POSTOPATTR (NFSX_V3FATTR + NFSX_UNSIGNED) #define NFSX_V3WCCDATA (NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED) #define NFSX_V3COOKIEVERF 8 @@ -162,7 +161,7 @@ /* variants for both versions */ #define NFSX_FH(v3) ((v3) ? (NFSX_V3FHMAX + NFSX_UNSIGNED) : \ NFSX_V2FH) -#define NFSX_SRVFH(v3) ((v3) ? NFSX_V3FH : NFSX_V2FH) +#define NFSX_SRVFH(v3,FH) ((v3) ? (FH)->nfh_len : NFSX_V2FH) #define NFSX_FATTR(v3) ((v3) ? NFSX_V3FATTR : NFSX_V2FATTR) #define NFSX_PREOPATTR(v3) ((v3) ? (7 * NFSX_UNSIGNED) : 0) #define NFSX_POSTOPATTR(v3) ((v3) ? (NFSX_V3FATTR + NFSX_UNSIGNED) : 0) @@ -206,13 +205,8 @@ #endif /* !NFS_PROGRAM */ -/* And leasing (nqnfs) procedure numbers (must be last) */ -#define NQNFSPROC_GETLEASE 22 -#define NQNFSPROC_VACATED 23 -#define NQNFSPROC_EVICTED 24 - -#define NFSPROC_NOOP 25 -#define NFS_NPROCS 26 +#define NFSPROC_NOOP 22 +#define NFS_NPROCS 23 /* Actual Version 2 procedure numbers */ #define NFSV2PROC_NULL 0 @@ -264,8 +258,8 @@ /* Conversion macros */ #define vtonfsv2_mode(t,m) \ - txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : \ - MAKEIMODE((t), (m))) + txdr_unsigned(((t) == VFIFO) ? vnode_makeimode(VCHR, (m)) : \ + vnode_makeimode((t), (m))) #define vtonfsv3_mode(m) txdr_unsigned((m) & 07777) #define nfstov_mode(a) (fxdr_unsigned(u_short, (a))&07777) #define vtonfsv2_type(a) txdr_unsigned(nfsv2_type[((long)(a))]) @@ -292,11 +286,6 @@ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, #ifndef NFS_SMALLFH #define NFS_SMALLFH 64 #endif -union nfsfh { - fhandle_t fh_generic; - u_char fh_bytes[NFS_SMALLFH]; -}; -typedef union nfsfh nfsfh_t; struct nfsv2_time { u_long nfsv2_sec; @@ -405,21 +394,6 @@ struct nfsv2_sattr { nfstime2 sa_mtime; }; -/* - * NFS Version 3 sattr structure for the new node creation case. - */ -struct nfsv3_sattr { - u_long sa_modetrue; - u_long sa_mode; - u_long sa_uidtrue; - u_long sa_uid; - u_long sa_gidtrue; - u_long sa_gid; - u_long sa_sizefalse; - u_long sa_atimetype; - u_long sa_mtimetype; -}; - struct nfs_statfs { union { struct { diff --git a/bsd/nfs/nfsrtt.h b/bsd/nfs/nfsrtt.h index 1cebaf787..8e00d78ca 100644 --- a/bsd/nfs/nfsrtt.h +++ b/bsd/nfs/nfsrtt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -104,7 +104,6 @@ struct nfsrtt { /* * Bits for the flags field. */ -#define DRT_NQNFS 0x01 /* Rpc used Nqnfs protocol */ #define DRT_TCP 0x02 /* Client used TCP transport */ #define DRT_CACHEREPLY 0x04 /* Reply was from recent request cache */ #define DRT_CACHEDROP 0x08 /* Rpc request dropped, due to recent reply */ diff --git a/bsd/nfs/nfsrvcache.h b/bsd/nfs/nfsrvcache.h index 1e7f97766..b18671041 100644 --- a/bsd/nfs/nfsrvcache.h +++ b/bsd/nfs/nfsrvcache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -77,7 +77,7 @@ struct nfsrvcache { LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */ u_long rc_xid; /* rpc id number */ union { - struct mbuf *ru_repmb; /* Reply mbuf list OR */ + mbuf_t ru_repmb; /* Reply mbuf list OR */ int ru_repstat; /* Reply status */ } rc_un; union nethostaddr rc_haddr; /* Host address */ @@ -107,7 +107,6 @@ struct nfsrvcache { #define RC_WANTED 0x02 #define RC_REPSTATUS 0x04 #define RC_REPMBUF 0x08 -#define RC_NQNFS 0x10 #define RC_INETADDR 0x20 #define RC_NAM 0x40 diff --git a/bsd/nfs/nlminfo.h b/bsd/nfs/nlminfo.h deleted file mode 100644 index d149664da..000000000 --- a/bsd/nfs/nlminfo.h +++ /dev/null @@ -1,52 +0,0 @@ -/*- - * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Berkeley Software Design Inc's name may not be used to endorse or - * promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from BSDI nlminfo.h,v 2.1 1998/03/18 01:30:38 don Exp - * $FreeBSD: src/sys/nfsclient/nlminfo.h,v 1.1 2001/04/17 20:45:22 alfred Exp $ - */ - -#include - -#ifdef __APPLE_API_PRIVATE - -/* - * Misc NLM information, some needed for the master lockd process, and some - * needed by every process doing nlm based locking. - */ -struct nlminfo { - /* these are used by any process doing nlm locking */ - int msg_seq; /* sequence counter for lock requests */ - int retcode; /* return code for lock requests */ - int set_getlk; - int getlk_pid; - off_t getlk_start; - off_t getlk_len; - struct timeval pid_start; /* process starting time */ - struct timeval nlm_lockstart; /* XXX debug */ -}; - -extern void nlminfo_release(struct proc *p); -#endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/nfs/nqnfs.h b/bsd/nfs/nqnfs.h deleted file mode 100644 index bb432511a..000000000 --- a/bsd/nfs/nqnfs.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Rick Macklem at The University of Guelph. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)nqnfs.h 8.3 (Berkeley) 3/30/95 - * FreeBSD-Id: nqnfs.h,v 1.14 1997/08/16 19:16:05 wollman Exp $ - */ - - -#ifndef _NFS_NQNFS_H_ -#define _NFS_NQNFS_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -/* - * Definitions for NQNFS (Not Quite NFS) cache consistency protocol. - */ - -/* Tunable constants */ -#define NQ_CLOCKSKEW 3 /* Clock skew factor (sec) */ -#define NQ_WRITESLACK 5 /* Delay for write cache flushing */ -#define NQ_MAXLEASE 60 /* Max lease duration (sec) */ -#define NQ_MINLEASE 5 /* Min lease duration (sec) */ -#define NQ_DEFLEASE 30 /* Default lease duration (sec) */ -#define NQ_RENEWAL 3 /* Time before expiry (sec) to renew */ -#define NQ_TRYLATERDEL 15 /* Initial try later delay (sec) */ -#define NQ_MAXNUMLEASE 2048 /* Upper bound on number of server leases */ -#define NQ_DEADTHRESH NQ_NEVERDEAD /* Default nm_deadthresh */ -#define NQ_NEVERDEAD 9 /* Greater than max. nm_timeouts */ -#define NQLCHSZ 256 /* Server hash table size */ - -#define NQNFS_PROG 300105 /* As assigned by Sun */ -#define NQNFS_VER3 3 -#define NQNFS_EVICTSIZ 156 /* Size of eviction request in bytes */ - -/* - * Definitions used for saving the "last lease expires" time in Non-volatile - * RAM on the server. The default definitions below assume that NOVRAM is not - * available. - */ -#ifdef HASNVRAM -# undef HASNVRAM -#endif -#define NQSTORENOVRAM(t) -#define NQLOADNOVRAM(t) - -/* - * Defn and structs used on the server to maintain state for current leases. - * The list of host(s) that hold the lease are kept as nqhost structures. - * The first one lives in nqlease and any others are held in a linked - * list of nqm structures hanging off of nqlease. - * - * Each nqlease structure is chained into two lists. The first is a list - * ordered by increasing expiry time for nqsrv_timer() and the second is a chain - * hashed on lc_fh. - */ -#define LC_MOREHOSTSIZ 10 - -struct nqhost { - union { - struct { - u_short udp_flag; - u_short udp_port; - union nethostaddr udp_haddr; - } un_udp; - struct { - u_short connless_flag; - u_short connless_spare; - union nethostaddr connless_haddr; - } un_connless; - struct { - u_short conn_flag; - u_short conn_spare; - struct nfssvc_sock *conn_slp; - } un_conn; - } lph_un; -}; -#define lph_flag lph_un.un_udp.udp_flag -#define lph_port lph_un.un_udp.udp_port -#define lph_haddr lph_un.un_udp.udp_haddr -#define lph_inetaddr lph_un.un_udp.udp_haddr.had_inetaddr -#define lph_claddr lph_un.un_connless.connless_haddr -#define lph_nam lph_un.un_connless.connless_haddr.had_nam -#define lph_slp lph_un.un_conn.conn_slp - -struct nqlease { - LIST_ENTRY(nqlease) lc_hash; /* Fhandle hash list */ - CIRCLEQ_ENTRY(nqlease) lc_timer; /* Timer queue list */ - time_t lc_expiry; /* Expiry time (sec) */ - struct nqhost lc_host; /* Host that got lease */ - struct nqm *lc_morehosts; /* Other hosts that share read lease */ - fsid_t lc_fsid; /* Fhandle */ - char lc_fiddata[MAXFIDSZ]; - struct vnode *lc_vp; /* Soft reference to associated vnode */ -}; -#define lc_flag lc_host.lph_un.un_udp.udp_flag - -/* lc_flag bits */ -#define LC_VALID 0x0001 /* Host address valid */ -#define LC_WRITE 0x0002 /* Write cache */ -#define LC_NONCACHABLE 0x0004 /* Non-cachable lease */ -#define LC_LOCKED 0x0008 /* Locked */ -#define LC_WANTED 0x0010 /* Lock wanted */ -#define LC_EXPIREDWANTED 0x0020 /* Want lease when expired */ -#define LC_UDP 0x0040 /* Host address for udp socket */ -#define LC_CLTP 0x0080 /* Host address for other connectionless */ -#define LC_LOCAL 0x0100 /* Host is server */ -#define LC_VACATED 0x0200 /* Host has vacated lease */ -#define LC_WRITTEN 0x0400 /* Recently wrote to the leased file */ -#define LC_SREF 0x0800 /* Holds a nfssvc_sock reference */ - -struct nqm { - struct nqm *lpm_next; - struct nqhost lpm_hosts[LC_MOREHOSTSIZ]; -}; - -/* - * Special value for slp for local server calls. - */ -#define NQLOCALSLP ((struct nfssvc_sock *) -1) - -/* - * Server side macros. - */ -#define nqsrv_getl(v, l) \ - (void) nqsrv_getlease((v), &nfsd->nd_duration, \ - ((nfsd->nd_flag & ND_LEASE) ? (nfsd->nd_flag & ND_LEASE) : \ - ((l) | ND_CHECK)), \ - slp, procp, nfsd->nd_nam, &cache, &frev, cred) - -/* - * Client side macros that check for a valid lease. - */ -#define NQNFS_CKINVALID(v, n, f) \ - ((time.tv_sec > (n)->n_expiry && \ - VFSTONFS((v)->v_mount)->nm_timeouts < VFSTONFS((v)->v_mount)->nm_deadthresh) \ - || ((f) == ND_WRITE && ((n)->n_flag & NQNFSWRITE) == 0)) - -#define NQNFS_CKCACHABLE(v, f) \ - ((time.tv_sec <= VTONFS(v)->n_expiry || \ - VFSTONFS((v)->v_mount)->nm_timeouts >= VFSTONFS((v)->v_mount)->nm_deadthresh) \ - && (VTONFS(v)->n_flag & NQNFSNONCACHE) == 0 && \ - ((f) == ND_READ || (VTONFS(v)->n_flag & NQNFSWRITE))) - -#define NQNFS_NEEDLEASE(v, p) \ - (time.tv_sec > VTONFS(v)->n_expiry ? \ - ((VTONFS(v)->n_flag & NQNFSEVICTED) ? 0 : nqnfs_piggy[p]) : \ - (((time.tv_sec + NQ_RENEWAL) > VTONFS(v)->n_expiry && \ - nqnfs_piggy[p]) ? \ - ((VTONFS(v)->n_flag & NQNFSWRITE) ? \ - ND_WRITE : nqnfs_piggy[p]) : 0)) - -/* - * List head for timer queue. - */ -extern CIRCLEQ_HEAD(nqtimerhead, nqlease) nqtimerhead; - -/* - * List head for the file handle hash table. - */ -#define NQFHHASH(f) \ - (&nqfhhashtbl[(*((u_long *)(f))) & nqfhhash]) -extern LIST_HEAD(nqfhhashhead, nqlease) *nqfhhashtbl; -extern u_long nqfhhash; - -/* - * Nqnfs return status numbers. - */ -#define NQNFS_EXPIRED 500 -#define NQNFS_TRYLATER 501 - -#if defined(KERNEL) -void nqnfs_lease_check __P((struct vnode *, struct proc *, struct ucred *, int)); -void nqnfs_lease_updatetime __P((int)); -int nqsrv_getlease __P((struct vnode *, u_long *, int, - struct nfssvc_sock *, struct proc *, - struct mbuf *, int *, u_quad_t *, - struct ucred *)); -int nqnfs_getlease __P((struct vnode *,int,struct ucred *,struct proc *)); -int nqnfs_callback __P((struct nfsmount *,struct mbuf *,struct mbuf *,caddr_t)); -int nqnfs_clientd __P((struct nfsmount *,struct ucred *,struct nfsd_cargs *,int,caddr_t,struct proc *)); -struct nfsnode; -void nqnfs_clientlease __P((struct nfsmount *, struct nfsnode *, int, int, time_t, u_quad_t)); -void nqnfs_serverd __P((void)); -int nqnfsrv_getlease __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **)); -int nqnfsrv_vacated __P((struct nfsrv_descript *, struct nfssvc_sock *, struct proc *, struct mbuf **)); -#endif - -#endif /* __APPLE_API_PRIVATE */ -#endif /* _NFS_NQNFS_H_ */ diff --git a/bsd/nfs/rpcv2.h b/bsd/nfs/rpcv2.h index d7a7a7df3..7bd0cca92 100644 --- a/bsd/nfs/rpcv2.h +++ b/bsd/nfs/rpcv2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -79,7 +79,6 @@ #define RPCAUTH_UNIX 1 #define RPCAUTH_SHORT 2 #define RPCAUTH_KERB4 4 -#define RPCAUTH_NQNFS 300000 #define RPCAUTH_MAXSIZ 400 #define RPCVERF_MAXSIZ 12 /* For Kerb, can actually be 400 */ #define RPCAUTH_UNIXGIDS 16 diff --git a/bsd/nfs/xdr_subs.h b/bsd/nfs/xdr_subs.h index e786cb8c7..f6bc3748e 100644 --- a/bsd/nfs/xdr_subs.h +++ b/bsd/nfs/xdr_subs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,10 +22,7 @@ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. @@ -40,8 +37,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. + * This product includes software developed by the University of + * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -58,9 +55,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)xdr_subs.h 8.3 (Berkeley) 3/30/95 + * @(#)xdr_subs.h 8.3 (Berkeley) 3/30/95 * FreeBSD-Id: xdr_subs.h,v 1.9 1997/02/22 09:42:53 peter Exp $ */ + #ifndef _NFS_XDR_SUBS_H_ diff --git a/bsd/ppc/Makefile b/bsd/ppc/Makefile index 790667692..633b7a521 100644 --- a/bsd/ppc/Makefile +++ b/bsd/ppc/Makefile @@ -8,15 +8,21 @@ include $(MakeInc_cmd) include $(MakeInc_def) DATAFILES = \ - cpu.h disklabel.h endian.h exec.h label_t.h param.h profile.h \ - psl.h ptrace.h reboot.h reg.h setjmp.h signal.h spl.h \ - table.h types.h ucontext.h user.h vmparam.h + endian.h param.h profile.h \ + setjmp.h signal.h \ + types.h ucontext.h vmparam.h _types.h + +KERNELFILES = \ + endian.h param.h profile.h \ + signal.h \ + types.h vmparam.h _types.h INSTALL_MD_LIST = ${DATAFILES} +INSTALL_MD_LCL_LIST = ${DATAFILES} disklabel.h INSTALL_MD_DIR = ppc -EXPORT_MD_LIST = ${DATAFILES} +EXPORT_MD_LIST = ${KERNELFILES} EXPORT_MD_DIR = ppc diff --git a/bsd/ppc/_types.h b/bsd/ppc/_types.h new file mode 100644 index 000000000..337362194 --- /dev/null +++ b/bsd/ppc/_types.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _BSD_PPC__TYPES_H_ +#define _BSD_PPC__TYPES_H_ + +/* + * This header file contains integer types. It's intended to also contain + * flotaing point and other arithmetic types, as needed, later. + */ + +#ifdef __GNUC__ +typedef __signed char __int8_t; +#else /* !__GNUC__ */ +typedef char __int8_t; +#endif /* !__GNUC__ */ +typedef unsigned char __uint8_t; +typedef unsigned short __int16_t; +typedef unsigned short __uint16_t; +typedef int __int32_t; +typedef unsigned int __uint32_t; +typedef long long __int64_t; +typedef unsigned long long __uint64_t; + +typedef long __darwin_intptr_t; +typedef unsigned int __darwin_natural_t; + +/* + * The rune type below is declared to be an ``int'' instead of the more natural + * ``unsigned long'' or ``long''. Two things are happening here. It is not + * unsigned so that EOF (-1) can be naturally assigned to it and used. Also, + * it looks like 10646 will be a 31 bit standard. This means that if your + * ints cannot hold 32 bits, you will be in trouble. The reason an int was + * chosen over a long is that the is*() and to*() routines take ints (says + * ANSI C), but they use __darwin_ct_rune_t instead of int. By changing it + * here, you lose a bit of ANSI conformance, but your programs will still + * work. + * + * NOTE: rune_t is not covered by ANSI nor other standards, and should not + * be instantiated outside of lib/libc/locale. Use wchar_t. wchar_t and + * rune_t must be the same type. Also wint_t must be no narrower than + * wchar_t, and should also be able to hold all members of the largest + * character set plus one extra value (WEOF). wint_t must be at least 16 bits. + */ + +typedef int __darwin_ct_rune_t; /* ct_rune_t */ + +/* + * mbstate_t is an opaque object to keep conversion state, during multibyte + * stream conversions. The content must not be referenced by user programs. + */ +typedef union { + char __mbstate8[128]; + long long _mbstateL; /* for alignment */ +} __mbstate_t; + +typedef __mbstate_t __darwin_mbstate_t; /* mbstate_t */ + +#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__) +typedef __PTRDIFF_TYPE__ __darwin_ptrdiff_t; /* ptr1 - ptr2 */ +#else +typedef int __darwin_ptrdiff_t; /* ptr1 - ptr2 */ +#endif /* __GNUC__ */ + +#if defined(__GNUC__) && defined(__SIZE_TYPE__) +typedef __SIZE_TYPE__ __darwin_size_t; /* sizeof() */ +#else +typedef unsigned long __darwin_size_t; /* sizeof() */ +#endif + +#ifdef KERNEL +typedef char * __darwin_va_list; /* va_list */ +#else /* !KERNEL */ +#if (__GNUC__ > 2) +typedef __builtin_va_list __darwin_va_list; /* va_list */ +#else +typedef char * __darwin_va_list; /* va_list */ +#endif +#endif /* KERNEL */ + +#if defined(__GNUC__) && defined(__WCHAR_TYPE__) +typedef __WCHAR_TYPE__ __darwin_wchar_t; /* wchar_t */ +#else +typedef __darwin_ct_rune_t __darwin_wchar_t; /* wchar_t */ +#endif + +typedef __darwin_wchar_t __darwin_rune_t; /* rune_t */ + +#if defined(__GNUC__) && defined(__WINT_TYPE__) +typedef __WINT_TYPE__ __darwin_wint_t; /* wint_t */ +#else +typedef __darwin_ct_rune_t __darwin_wint_t; /* wint_t */ +#endif + +typedef unsigned long __darwin_clock_t; /* clock() */ +typedef __uint32_t __darwin_socklen_t; /* socklen_t (duh) */ +typedef long __darwin_ssize_t; /* byte count or error */ +typedef long __darwin_time_t; /* time() */ + +#endif /* _BSD_PPC__TYPES_H_ */ diff --git a/bsd/ppc/disklabel.h b/bsd/ppc/disklabel.h index 02a84a604..9d97865f0 100644 --- a/bsd/ppc/disklabel.h +++ b/bsd/ppc/disklabel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,13 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* -* - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * - */ - + #ifndef _BSD_PPC_DISKLABEL_H_ #define _BSD_PPC_DISKLABEL_H_ diff --git a/bsd/ppc/endian.h b/bsd/ppc/endian.h index 984cdb588..72808459e 100644 --- a/bsd/ppc/endian.h +++ b/bsd/ppc/endian.h @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + /* * Copyright (c) 1995 NeXT Computer, Inc. All rights reserved. * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. @@ -64,55 +86,25 @@ #define _QUAD_HIGHWORD 0 #define _QUAD_LOWWORD 1 -#if defined(KERNEL) || !defined(_POSIX_SOURCE) /* * Definitions for byte order, according to byte significance from low * address to high. */ -#define LITTLE_ENDIAN 1234 /* LSB first: i386, vax */ -#define BIG_ENDIAN 4321 /* MSB first: 68000, ibm, net, ppc */ -#define PDP_ENDIAN 3412 /* LSB first in word, MSW first in long */ - -#define BYTE_ORDER BIG_ENDIAN +#define __DARWIN_LITTLE_ENDIAN 1234 /* LSB first: i386, vax */ +#define __DARWIN_BIG_ENDIAN 4321 /* MSB first: 68000, ibm, net, ppc */ +#define __DARWIN_PDP_ENDIAN 3412 /* LSB first in word, MSW first in long */ -#include +#define __DARWIN_BYTE_ORDER __DARWIN_BIG_ENDIAN -#ifndef __ASSEMBLER__ -__BEGIN_DECLS -unsigned long htonl __P((unsigned long)); -unsigned short htons __P((unsigned short)); -unsigned long ntohl __P((unsigned long)); -unsigned short ntohs __P((unsigned short)); -__END_DECLS -#endif /* __ASSEMBLER__ */ - -/* - * Macros for network/external number representation conversion. - */ -#if BYTE_ORDER == BIG_ENDIAN && !defined(lint) -#define ntohl(x) (x) -#define ntohs(x) (x) -#define htonl(x) (x) -#define htons(x) (x) +#if defined(KERNEL) || !defined(_POSIX_C_SOURCE) -#define NTOHL(x) (x) -#define NTOHS(x) (x) -#define HTONL(x) (x) -#define HTONS(x) (x) +#define LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN +#define BIG_ENDIAN __DARWIN_BIG_ENDIAN +#define PDP_ENDIAN __DARWIN_PDP_ENDIAN -#else +#define BYTE_ORDER __DARWIN_BYTE_ORDER -#include - -#define ntohl(x) NXSwapBigLongToHost(x) -#define ntohs(x) NXSwapBigShortToHost(x) -#define htonl(x) NXSwapHostLongToBig(x) -#define htons(x) NXSwapHostShortToBig(x) +#include -#define NTOHL(x) (x) = ntohl((u_long)x) -#define NTOHS(x) (x) = ntohs((u_short)x) -#define HTONL(x) (x) = htonl((u_long)x) -#define HTONS(x) (x) = htons((u_short)x) -#endif -#endif /* defined(KERNEL) || !defined(_POSIX_SOURCE) */ +#endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ #endif /* !_PPC_ENDIAN_H_ */ diff --git a/bsd/ppc/exec.h b/bsd/ppc/exec.h index 339bac2c7..86024c6d6 100644 --- a/bsd/ppc/exec.h +++ b/bsd/ppc/exec.h @@ -45,7 +45,7 @@ #include -#ifdef __APPLE_API_OBSOLETE +#ifdef BSD_KERNEL_PRIVATE /* Size of a page in an object file. */ #define __LDPGSZ 4096 @@ -96,13 +96,7 @@ struct exec { unsigned int a_drsize; /* data relocation size */ }; -/* - * Address of ps_strings structure (in user space). - */ -#define PS_STRINGS \ - ((struct ps_strings *)(USRSTACK - sizeof(struct ps_strings))) - -#endif /* __APPLE_API_OBSOLETE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _BSD_PPC_EXEC_H_ */ diff --git a/bsd/ppc/param.h b/bsd/ppc/param.h index fa8f2cf46..ece487f07 100644 --- a/bsd/ppc/param.h +++ b/bsd/ppc/param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,22 +20,17 @@ * @APPLE_LICENSE_HEADER_END@ */ /* Copyright (c) 1993,1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * - */ #ifndef _PPC_PARAM_H_ #define _PPC_PARAM_H_ /* * Round p (pointer or byte index) up to a correctly-aligned value for all - * data types (int, long, ...). The result is u_int and must be cast to - * any desired pointer type. + * data types (int, long, ...). The result is unsigned int and must be + * cast to any desired pointer type. */ #define ALIGNBYTES 3 -#define ALIGN(p) (((u_int)(p) + ALIGNBYTES) &~ ALIGNBYTES) +#define ALIGN(p) (((unsigned int)(p) + ALIGNBYTES) &~ ALIGNBYTES) #define NBPG 4096 /* bytes/page */ #define PGOFSET (NBPG-1) /* byte offset into page */ diff --git a/bsd/ppc/reboot.h b/bsd/ppc/reboot.h index 0a47e6a49..576b8658c 100644 --- a/bsd/ppc/reboot.h +++ b/bsd/ppc/reboot.h @@ -28,8 +28,7 @@ /* * Empty file (publicly) */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Use most significant 16 bits to avoid collisions with * machine independent flags. @@ -43,9 +42,8 @@ #define RB_BOOTNEXT 0x00400000 /* reboot into NeXT */ #define RB_BOOTDOS 0x00800000 /* reboot into DOS */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _BSD_PPC_REBOOT_H_ */ diff --git a/bsd/ppc/reg.h b/bsd/ppc/reg.h index 58f1be653..b45306d2c 100644 --- a/bsd/ppc/reg.h +++ b/bsd/ppc/reg.h @@ -26,17 +26,13 @@ #ifndef _BSD_PPC_REG_H_ #define _BSD_PPC_REG_H_ -#include -#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* Index into the thread_state */ #define SP 3 #define PC 0 -#endif /* __APPLE_API_PRIVATE */ - #endif /* KERNEL_PRIVATE */ #endif /* _BSD_PPC_REG_H_ */ diff --git a/bsd/ppc/setjmp.h b/bsd/ppc/setjmp.h index f7b318d92..cb9c7cd33 100644 --- a/bsd/ppc/setjmp.h +++ b/bsd/ppc/setjmp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,44 +33,69 @@ struct _jmp_buf { struct sigcontext sigcontext; /* kernel state preserved by set/longjmp */ - unsigned long vmask __attribute__((aligned(8))); /* vector mask register */ - unsigned long vreg[32 * 4] __attribute__((aligned(16))); + unsigned int vmask __attribute__((aligned(8))); /* vector mask register */ + unsigned int vreg[32 * 4] __attribute__((aligned(16))); /* 32 128-bit vector registers */ }; /* * _JBLEN is number of ints required to save the following: - * r1, r2, r13-r31, lr, cr, ctr, xer, sig == 26 ints - * fr14 - fr31 = 18 doubles = 36 ints + * r1, r2, r13-r31, lr, cr, ctr, xer, sig == 26 register_t sized + * fr14 - fr31 = 18 doubles * vmask, 32 vector registers = 129 ints * 2 ints to get all the elements aligned + * + * register_t is 2 ints for ppc64 threads */ +#define _JBLEN64 (26*2 + 18*2 + 129 + 1) +#define _JBLEN32 (26 + 18*2 + 129 + 1) +#define _JBLEN_MAX _JBLEN64 -#define _JBLEN (26 + 36 + 129 + 1) +/* + * Locally scoped sizes + */ +#if defined(__ppc64__) +#define _JBLEN _JBLEN64 +#else +#define _JBLEN _JBLEN32 +#endif #if defined(KERNEL) -typedef struct sigcontext jmp_buf[1]; -typedef struct __sigjmp_buf { - int __storage[_JBLEN + 1] __attribute__((aligned(8))); - } sigjmp_buf[1]; +typedef struct sigcontext32 jmp_buf32[1]; +typedef struct __sigjmp_buf32 { + int __storage[_JBLEN32 + 1] __attribute__((aligned(8))); + } sigjmp_buf32[1]; + +typedef struct sigcontext64 jmp_buf64[1]; +typedef struct __sigjmp_buf64 { + int __storage[_JBLEN64 + 1] __attribute__((aligned(8))); + } sigjmp_buf64[1]; + +/* + * JMM - have to decide how the kernel will deal with this. + * For now, hard-code the 32-bit types. + */ +typedef struct sigcontext32 jmp_buf[1]; +typedef struct __sigjmp_buf32 sigjmp_buf[1]; + #else typedef int jmp_buf[_JBLEN]; typedef int sigjmp_buf[_JBLEN + 1]; #endif __BEGIN_DECLS -extern int setjmp __P((jmp_buf env)); -extern void longjmp __P((jmp_buf env, int val)); +extern int setjmp(jmp_buf env); +extern void longjmp(jmp_buf env, int val); #ifndef _ANSI_SOURCE -int sigsetjmp __P((sigjmp_buf env, int val)); -void siglongjmp __P((sigjmp_buf env, int val)); +int _setjmp(jmp_buf env); +void _longjmp(jmp_buf, int val); +int sigsetjmp(sigjmp_buf env, int val); +void siglongjmp(sigjmp_buf env, int val); #endif /* _ANSI_SOURCE */ -#if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE) -int _setjmp __P((jmp_buf env)); -void _longjmp __P((jmp_buf, int val)); -void longjmperror __P((void)); +#if !defined(_ANSI_SOURCE) && !defined(_POSIX_C_SOURCE) +void longjmperror(void); #endif /* neither ANSI nor POSIX */ __END_DECLS diff --git a/bsd/ppc/signal.h b/bsd/ppc/signal.h index ef4138630..fee82c365 100644 --- a/bsd/ppc/signal.h +++ b/bsd/ppc/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,13 +63,35 @@ typedef enum { * to the handler to allow it to properly restore state if * a non-standard exit is performed. */ +struct sigcontext32 { + int sc_onstack; /* sigstack state to restore */ + int sc_mask; /* signal mask to restore */ + int sc_ir; /* pc */ + int sc_psw; /* processor status word */ + int sc_sp; /* stack pointer if sc_regs == NULL */ + void *sc_regs; /* (kernel private) saved state */ +}; + +struct sigcontext64 { + int sc_onstack; /* sigstack state to restore */ + int sc_mask; /* signal mask to restore */ + long long sc_ir; /* pc */ + long long sc_psw; /* processor status word */ + long long sc_sp; /* stack pointer if sc_regs == NULL */ + void *sc_regs; /* (kernel private) saved state */ +}; + +/* + * LP64todo - Have to decide how to handle this. + * For now, just duplicate the 32-bit context as the generic one. + */ struct sigcontext { int sc_onstack; /* sigstack state to restore */ int sc_mask; /* signal mask to restore */ - int sc_ir; /* pc */ + int sc_ir; /* pc */ int sc_psw; /* processor status word */ int sc_sp; /* stack pointer if sc_regs == NULL */ - void *sc_regs; /* (kernel private) saved state */ + void *sc_regs; /* (kernel private) saved state */ }; #endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/ppc/spl.h b/bsd/ppc/spl.h deleted file mode 100644 index 01d0c0b21..000000000 --- a/bsd/ppc/spl.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _BSD_PPC_SPL_H_ -#define _BSD_PPC_SPL_H_ - -#ifdef KERNEL -#ifndef __ASSEMBLER__ -/* - * Machine-dependent SPL definitions. - * - */ -typedef unsigned spl_t; - -extern unsigned sploff(void); -extern unsigned splhigh(void); -extern unsigned splsched(void); -extern unsigned splclock(void); -extern unsigned splpower(void); -extern unsigned splvm(void); -extern unsigned splbio(void); -extern unsigned splimp(void); -extern unsigned spltty(void); -extern unsigned splnet(void); -extern unsigned splsoftclock(void); - -extern void spllo(void); -extern void splon(unsigned level); -extern void splx(unsigned level); -extern void spln(unsigned level); -#define splstatclock() splhigh() - -#endif /* __ASSEMBLER__ */ - -#endif - -#endif /* _BSD_PPC_SPL_H_ */ diff --git a/bsd/ppc/types.h b/bsd/ppc/types.h index f370e9bf1..58b77b5a3 100644 --- a/bsd/ppc/types.h +++ b/bsd/ppc/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,26 +61,66 @@ #define _MACHTYPES_H_ #ifndef __ASSEMBLER__ +#include #include /* * Basic integral types. Omit the typedef if * not possible for a machine/compiler combination. */ +#ifndef _INT8_T +#define _INT8_T typedef __signed char int8_t; +#endif typedef unsigned char u_int8_t; +#ifndef _INT16_T +#define _INT16_T typedef short int16_t; +#endif typedef unsigned short u_int16_t; +#ifndef _INT32_T +#define _INT32_T typedef int int32_t; +#endif typedef unsigned int u_int32_t; +#ifndef _INT64_T +#define _INT64_T typedef long long int64_t; +#endif typedef unsigned long long u_int64_t; +#if defined(__ppc64__) +typedef int64_t register_t; +#else typedef int32_t register_t; +#endif -typedef long int intptr_t; -typedef unsigned long int uintptr_t; +#ifndef _INTPTR_T +#define _INTPTR_T +typedef __darwin_intptr_t intptr_t; +#endif +#ifndef _UINTPTR_T +#define _UINTPTR_T +typedef unsigned long uintptr_t; +#endif +/* with LP64 support pointers and longs from user address space may vary */ +/* in size depending on the type of process (currently 32 or 64-bit, but */ +/* may change in the future). These types are used for reserving the largest */ +/* possible size. */ +// LP64todo - typedef mach_vm_address_t user_addr_t; /* varying length pointers from user space */ +// LP64todo - typedef mach_vm_size_t user_size_t; /* varying length values from user space (unsigned) */ +typedef u_int64_t user_addr_t; +typedef u_int64_t user_size_t; +typedef int64_t user_ssize_t; +typedef int64_t user_long_t; +typedef u_int64_t user_ulong_t; +typedef int64_t user_time_t; +#define USER_ADDR_NULL ((user_addr_t) 0) +#define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)((uintptr_t)(a_ptr))) + +#ifndef __offsetof #define __offsetof(type, field) ((size_t)(&((type *)0)->field)) +#endif #endif /* __ASSEMBLER__ */ #endif /* _MACHTYPES_H_ */ diff --git a/bsd/ppc/ucontext.h b/bsd/ppc/ucontext.h index 77c459244..ab434a1d4 100644 --- a/bsd/ppc/ucontext.h +++ b/bsd/ppc/ucontext.h @@ -24,27 +24,44 @@ #define _PPC_UCONTEXT_H_ -#include +#include +#ifndef _POSIX_C_SOURCE struct mcontext { - ppc_exception_state_t es; - ppc_thread_state_t ss; - ppc_float_state_t fs; - ppc_vector_state_t vs; + struct ppc_exception_state es; + struct ppc_thread_state ss; + struct ppc_float_state fs; + struct ppc_vector_state vs; }; - #define PPC_MCONTEXT_SIZE (PPC_THREAD_STATE_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int) +#else /* _POSIX_C_SOURCE */ +struct __darwin_mcontext { + struct __darwin_ppc_exception_state es; + struct __darwin_ppc_thread_state ss; + struct __darwin_ppc_float_state fs; + struct __darwin_ppc_vector_state vs; +}; +#endif /* _POSIX_C_SOURCE */ -typedef struct mcontext * mcontext_t; +#ifndef _MCONTEXT_T +#define _MCONTEXT_T +typedef __darwin_mcontext_t mcontext_t; +#endif +#ifndef _POSIX_C_SOURCE struct mcontext64 { - ppc_exception_state_t es; - ppc_thread_state64_t ss; - ppc_float_state_t fs; - ppc_vector_state_t vs; + struct ppc_exception_state64 es; + struct ppc_thread_state64 ss; + struct ppc_float_state fs; + struct ppc_vector_state vs; }; #define PPC_MCONTEXT64_SIZE (PPC_THREAD_STATE64_COUNT + PPC_FLOAT_STATE_COUNT + PPC_EXCEPTION_STATE_COUNT + PPC_VECTOR_STATE_COUNT) * sizeof(int) +#ifndef _MCONTEXT64_T +#define _MCONTEXT64_T typedef struct mcontext64 * mcontext64_t; +#endif + +#endif /* _POSIX_C_SOURCE */ #endif /* _PPC_UCONTEXT_H_ */ diff --git a/bsd/ppc/user.h b/bsd/ppc/user.h deleted file mode 100644 index 5914cf757..000000000 --- a/bsd/ppc/user.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (C) 1989, NeXT, Inc. - * - * bsd/ppc/user.h - * - * We can use the default definition of u, so this file is empty. - */ - -#warning ---- Empty bsd/ppc/user.h ---- diff --git a/bsd/ppc/vmparam.h b/bsd/ppc/vmparam.h index 1f762947f..013608b64 100644 --- a/bsd/ppc/vmparam.h +++ b/bsd/ppc/vmparam.h @@ -25,7 +25,13 @@ #include -#define USRSTACK 0xc0000000 +#define USRSTACK (0xc0000000) + +/* + * put the default 64-bit stack at the max address + * (minus one 32-bit address space for other incidentals) + */ +#define USRSTACK64 (MACH_VM_MAX_ADDRESS - VM_MAX_ADDRESS) /* * Virtual memory related constants, all in bytes diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 9505779d9..946ec4094 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -19,30 +19,94 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +# In both the framework PrivateHeader area and /usr/include/sys DATAFILES = \ appleapiopts.h acct.h aio.h attr.h \ - buf.h callout.h cdefs.h clist.h conf.h \ - dir.h dirent.h disk.h disklabel.h disktab.h dkstat.h dmap.h domain.h \ - errno.h ev.h event.h exec.h fcntl.h file.h filedesc.h filio.h gmon.h ioccom.h ioctl.h \ - ioctl_compat.h ipc.h kernel.h kern_event.h ktrace.h loadable_fs.h lock.h lockf.h mach_swapon.h malloc.h \ - kdebug.h linker_set.h md5.h kern_control.h \ - mbuf.h mman.h mount.h msgbuf.h mtio.h namei.h netport.h param.h paths.h \ - proc.h protosw.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \ + buf.h cdefs.h conf.h \ + dir.h dirent.h disk.h dkstat.h \ + errno.h ev.h event.h fcntl.h file.h filedesc.h filio.h gmon.h \ + ioccom.h ioctl.h \ + ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h loadable_fs.h lock.h lockf.h \ + kauth.h kdebug.h md5.h kern_control.h malloc.h \ + mbuf.h mman.h mount.h msg.h msgbuf.h mtio.h netport.h param.h paths.h pipe.h poll.h \ + proc.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \ select.h sem.h semaphore.h shm.h signal.h signalvar.h socket.h socketvar.h sockio.h stat.h \ - syscall.h sysctl.h syslimits.h syslog.h systm.h sys_domain.h termios.h time.h \ + syscall.h sysctl.h syslimits.h syslog.h sys_domain.h termios.h time.h \ timeb.h times.h tprintf.h trace.h tty.h ttychars.h ttycom.h \ ttydefaults.h ttydev.h types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ - user.h utfconv.h utsname.h ux_exception.h vadvise.h vcmd.h version.h \ - vm.h vmmeter.h vmparam.h vnioctl.h vnode.h vnode_if.h vstat.h wait.h + user.h utfconv.h utsname.h vadvise.h vcmd.h version.h \ + vm.h vmmeter.h vmparam.h vnioctl.h vnode.h vnode_if.h vstat.h wait.h xattr.h \ + _types.h _endian.h domain.h protosw.h + +# Only in the framework PrivateHeader area +PRIVATE_DATAFILES = \ + disklabel.h \ + ipcs.h \ + sem_internal.h \ + shm_internal.h \ + ux_exception.h \ + ktrace.h \ + vnioctl.h + +# KERNELFILES will appear only in the kernel framework +KERNELFILES = \ + appleapiopts.h attr.h \ + buf.h cdefs.h conf.h \ + dir.h dirent.h disk.h dkstat.h \ + errno.h ev.h event.h fcntl.h file.h filedesc.h filio.h \ + ioccom.h ioctl.h \ + ioctl_compat.h kernel.h kernel_types.h kern_event.h lock.h lockf.h \ + kauth.h kdebug.h md5.h kern_control.h malloc.h namei.h \ + mman.h mbuf.h mount.h mtio.h netport.h param.h paths.h \ + proc.h queue.h quota.h random.h resource.h resourcevar.h \ + select.h signal.h socket.h socketvar.h sockio.h stat.h \ + sysctl.h syslimits.h syslog.h systm.h sys_domain.h time.h \ + types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ + utfconv.h version.h \ + vm.h vmparam.h vnode.h vnode_if.h xattr.h \ + _types.h _endian.h protosw.h domain.h \ + kpi_mbuf.h kpi_socket.h kpi_socketfilter.h \ + ttycom.h termios.h + + +# Only in the private kernel framework +PRIVATE_KERNELFILES = \ + disktab.h \ + file_internal.h \ + mach_swapon.h \ + msgbuf.h \ + eventvar.h \ + mount_internal.h \ + proc_internal.h \ + ptrace_internal.h \ + vnode_internal.h \ + signalvar.h \ + tty.h ttychars.h \ + ttydefaults.h ttydev.h \ + user.h \ + ubc_internal.h \ + uio_internal.h \ + vfs_context.h + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = sys -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} syscall.h ktrace.h linker_set.h EXPORT_MI_DIR = sys +# /System/Library/Frameworks/System.framework/PrivateHeaders +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + +# /System/Library/Frameworks/Kernel.framework/PrivateHeaders + +INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} + +# /System/Library/Frameworks/Kernel.framework/Headers + +INSTALL_KF_MI_LIST = ${KERNELFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/miscfs/specfs/lockf.h b/bsd/sys/_endian.h similarity index 53% rename from bsd/miscfs/specfs/lockf.h rename to bsd/sys/_endian.h index c10b0b2d8..8d0c683b8 100644 --- a/bsd/miscfs/specfs/lockf.h +++ b/bsd/sys/_endian.h @@ -1,4 +1,27 @@ /* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Copyright (c) 1995 NeXT Computer, Inc. All rights reserved. * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ @@ -19,14 +42,10 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* - * Copyright (c) 1991, 1993 + * Copyright (c) 1987, 1991, 1993 * The Regents of the University of California. All rights reserved. * - * This code is derived from software contributed to Berkeley by - * Scooter Morris at Genentech Inc. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -54,61 +73,60 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)lockf.h 8.2 (Berkeley) 10/26/94 */ -#ifndef _UFS_LOCKF_H_ -#define _UFS_LOCKF_H_ -#include +#ifndef _SYS__ENDIAN_H_ +#define _SYS__ENDIAN_H_ + +#include -#ifdef __APPLE_API_PRIVATE /* - * The lockf structure is a kernel structure which contains the information - * associated with a byte range lock. The lockf structures are linked into - * the specinfo structure. Locks are sorted by the starting byte of the lock for - * efficiency. + * Macros for network/external number representation conversion. */ -TAILQ_HEAD(locklist, lockf); +#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN && !defined(lint) +#define ntohl(x) (x) +#define ntohs(x) (x) +#define htonl(x) (x) +#define htons(x) (x) -struct specinfo; -struct lockf { - short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ - short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ - off_t lf_start; /* Byte # of the start of the lock */ - off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ - caddr_t lf_id; /* Id of the resource holding the lock */ - struct specinfo *lf_specinfo; /* Back pointer to the specinfo */ - struct lockf *lf_next; /* Pointer to the next lock on this info */ - struct locklist lf_blkhd; /* List of requests blocked on this lock */ - TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ -}; +#if defined(KERNEL) || !defined(_POSIX_C_SOURCE) +#define NTOHL(x) (x) +#define NTOHS(x) (x) +#define HTONL(x) (x) +#define HTONS(x) (x) +#endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ -/* Maximum length of sleep chains to traverse to try and detect deadlock. */ -#define MAXDEPTH 50 +#else -__BEGIN_DECLS -void spec_lf_addblock __P((struct lockf *, struct lockf *)); -int spec_lf_clearlock __P((struct lockf *)); -int spec_lf_findoverlap __P((struct lockf *, - struct lockf *, int, struct lockf ***, struct lockf **)); -struct lockf * - spec_lf_getblock __P((struct lockf *)); -int spec_lf_getlock __P((struct lockf *, struct flock *)); -int spec_lf_setlock __P((struct lockf *)); -void spec_lf_split __P((struct lockf *, struct lockf *)); -void spec_lf_wakelock __P((struct lockf *)); -__END_DECLS - -#ifdef LOCKF_DEBUG -extern int lockf_debug; +#if !defined(__ASSEMBLER__) +#include +#include + __BEGIN_DECLS -void spec_lf_print __P((char *, struct lockf *)); -void spec_lf_printlist __P((char *, struct lockf *)); +uint16_t ntohs(uint16_t); +uint16_t htons(uint16_t); +uint32_t ntohl(uint32_t); +uint32_t htonl(uint32_t); __END_DECLS -#endif +#endif /* !defined(__ASSEMBLER__) */ + +#define ntohs(x) NXSwapBigShortToHost(x) +#define htons(x) NXSwapHostShortToBig(x) -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _UFS_LOCKF_H_ */ +#if defined(__LP64__) +#define ntohl(x) NXSwapBigIntToHost(x) +#define htonl(x) NXSwapHostIntToBig(x) +#else +#define ntohl(x) NXSwapBigLongToHost(x) +#define htonl(x) NXSwapHostLongToBig(x) +#endif /* defined(__LP64__) */ +#if defined(KERNEL) || !defined(_POSIX_C_SOURCE) +#define NTOHL(x) (x) = ntohl((u_long)x) +#define NTOHS(x) (x) = ntohs((u_short)x) +#define HTONL(x) (x) = htonl((u_long)x) +#define HTONS(x) (x) = htons((u_short)x) +#endif /* defined(KERNEL) || !defined(_POSIX_C_SOURCE) */ +#endif /* __DARWIN_BYTE_ORDER != __DARWIN_BIG_ENDIAN || defined(lint) */ +#endif /* !_SYS__ENDIAN_H_ */ diff --git a/bsd/sys/_types.h b/bsd/sys/_types.h new file mode 100644 index 000000000..addbb39b1 --- /dev/null +++ b/bsd/sys/_types.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS__TYPES_H_ +#define _SYS__TYPES_H_ + +#include +#include + +/* Forward references */ +#ifndef _POSIX_C_SOURCE +struct mcontext; +struct mcontext64; +#else /* _POSIX_C_SOURCE */ +struct __darwin_mcontext; +#endif /* _POSIX_C_SOURCE */ + +/* pthread opaque structures */ +#if defined(__LP64__) +#define __PTHREAD_SIZE__ 1168 +#define __PTHREAD_ATTR_SIZE__ 56 +#define __PTHREAD_MUTEXATTR_SIZE__ 8 +#define __PTHREAD_MUTEX_SIZE__ 56 +#define __PTHREAD_CONDATTR_SIZE__ 8 +#define __PTHREAD_COND_SIZE__ 40 +#define __PTHREAD_ONCE_SIZE__ 8 +#define __PTHREAD_RWLOCK_SIZE__ 192 +#define __PTHREAD_RWLOCKATTR_SIZE__ 16 +#else /* __LP64__ */ +#define __PTHREAD_SIZE__ 596 +#define __PTHREAD_ATTR_SIZE__ 36 +#define __PTHREAD_MUTEXATTR_SIZE__ 8 +#define __PTHREAD_MUTEX_SIZE__ 40 +#define __PTHREAD_CONDATTR_SIZE__ 4 +#define __PTHREAD_COND_SIZE__ 24 +#define __PTHREAD_ONCE_SIZE__ 4 +#define __PTHREAD_RWLOCK_SIZE__ 124 +#define __PTHREAD_RWLOCKATTR_SIZE__ 12 +#endif /* __LP64__ */ + +struct __darwin_pthread_handler_rec +{ + void (*__routine)(void *); /* Routine to call */ + void *__arg; /* Argument to pass */ + struct __darwin_pthread_handler_rec *__next; +}; +struct _opaque_pthread_attr_t { long __sig; char __opaque[__PTHREAD_ATTR_SIZE__]; }; +struct _opaque_pthread_cond_t { long __sig; char __opaque[__PTHREAD_COND_SIZE__]; }; +struct _opaque_pthread_condattr_t { long __sig; char __opaque[__PTHREAD_CONDATTR_SIZE__]; }; +struct _opaque_pthread_mutex_t { long __sig; char __opaque[__PTHREAD_MUTEX_SIZE__]; }; +struct _opaque_pthread_mutexattr_t { long __sig; char __opaque[__PTHREAD_MUTEXATTR_SIZE__]; }; +struct _opaque_pthread_once_t { long __sig; char __opaque[__PTHREAD_ONCE_SIZE__]; }; +struct _opaque_pthread_rwlock_t { long __sig; char __opaque[__PTHREAD_RWLOCK_SIZE__]; }; +struct _opaque_pthread_rwlockattr_t { long __sig; char __opaque[__PTHREAD_RWLOCKATTR_SIZE__]; }; +struct _opaque_pthread_t { long __sig; struct __darwin_pthread_handler_rec *__cleanup_stack; char __opaque[__PTHREAD_SIZE__]; }; + +/* + * Type definitions; takes common type definitions that must be used + * in multiple header files due to [XSI], removes them from the system + * space, and puts them in the implementation space. + */ + +#ifdef __cplusplus +#ifdef __GNUG__ +#define __DARWIN_NULL __null +#else /* ! __GNUG__ */ +#ifdef __LP64__ +#define __DARWIN_NULL (0L) +#else /* !__LP64__ */ +#define __DARWIN_NULL 0 +#endif /* __LP64__ */ +#endif /* __GNUG__ */ +#else /* ! __cplusplus */ +#define __DARWIN_NULL ((void *)0) +#endif /* __cplusplus */ + +typedef __int64_t __darwin_blkcnt_t; /* total blocks */ +typedef __int32_t __darwin_blksize_t; /* preferred block size */ +typedef __int32_t __darwin_dev_t; /* dev_t */ +typedef unsigned int __darwin_fsblkcnt_t; /* Used by statvfs and fstatvfs */ +typedef unsigned int __darwin_fsfilcnt_t; /* Used by statvfs and fstatvfs */ +typedef __uint32_t __darwin_gid_t; /* [???] process and group IDs */ +typedef __uint32_t __darwin_id_t; /* [XSI] pid_t, uid_t, or gid_t*/ +typedef __uint32_t __darwin_ino_t; /* [???] Used for inodes */ +typedef __darwin_natural_t __darwin_mach_port_name_t; /* Used by mach */ +typedef __darwin_mach_port_name_t __darwin_mach_port_t; /* Used by mach */ +#ifndef _POSIX_C_SOURCE +typedef struct mcontext *__darwin_mcontext_t; /* [???] machine context */ +typedef struct mcontext64 *__darwin_mcontext64_t; /* [???] machine context */ +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_mcontext *__darwin_mcontext_t; /* [???] machine context */ +#endif /* _POSIX_C_SOURCE */ +typedef __uint16_t __darwin_mode_t; /* [???] Some file attributes */ +typedef __int64_t __darwin_off_t; /* [???] Used for file sizes */ +typedef __int32_t __darwin_pid_t; /* [???] process and group IDs */ +typedef struct _opaque_pthread_attr_t + __darwin_pthread_attr_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_cond_t + __darwin_pthread_cond_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_condattr_t + __darwin_pthread_condattr_t; /* [???] Used for pthreads */ +typedef unsigned long __darwin_pthread_key_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_mutex_t + __darwin_pthread_mutex_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_mutexattr_t + __darwin_pthread_mutexattr_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_once_t + __darwin_pthread_once_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_rwlock_t + __darwin_pthread_rwlock_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_rwlockattr_t + __darwin_pthread_rwlockattr_t; /* [???] Used for pthreads */ +typedef struct _opaque_pthread_t + *__darwin_pthread_t; /* [???] Used for pthreads */ +typedef __uint32_t __darwin_sigset_t; /* [???] signal set */ +typedef __int32_t __darwin_suseconds_t; /* [???] microseconds */ +typedef __uint32_t __darwin_uid_t; /* [???] user IDs */ +typedef __uint32_t __darwin_useconds_t; /* [???] microseconds */ +typedef unsigned char __darwin_uuid_t[16]; + +/* Structure used in sigaltstack call. */ +#ifndef _POSIX_C_SOURCE +struct sigaltstack +#else /* _POSIX_C_SOURCE */ +struct __darwin_sigaltstack +#endif /* _POSIX_C_SOURCE */ +{ + void *ss_sp; /* signal stack base */ + __darwin_size_t ss_size; /* signal stack length */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ +}; +#ifndef _POSIX_C_SOURCE +typedef struct sigaltstack __darwin_stack_t; /* [???] signal stack */ +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_sigaltstack __darwin_stack_t; /* [???] signal stack */ +#endif /* _POSIX_C_SOURCE */ + +/* user context */ +#ifndef _POSIX_C_SOURCE +struct ucontext +#else /* _POSIX_C_SOURCE */ +struct __darwin_ucontext +#endif /* _POSIX_C_SOURCE */ +{ + int uc_onstack; + __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ + __darwin_stack_t uc_stack; /* stack used by this context */ +#ifndef _POSIX_C_SOURCE + struct ucontext *uc_link; /* pointer to resuming context */ +#else /* _POSIX_C_SOURCE */ + struct __darwin_ucontext *uc_link; /* pointer to resuming context */ +#endif /* _POSIX_C_SOURCE */ + __darwin_size_t uc_mcsize; /* size of the machine context passed in */ + __darwin_mcontext_t uc_mcontext; /* pointer to machine specific context */ +}; +#ifndef _POSIX_C_SOURCE +typedef struct ucontext __darwin_ucontext_t; /* [???] user context */ +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ucontext __darwin_ucontext_t; /* [???] user context */ +#endif /* _POSIX_C_SOURCE */ + +#ifndef _POSIX_C_SOURCE +struct ucontext64 { + int uc_onstack; + __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ + __darwin_stack_t uc_stack; /* stack used by this context */ + struct ucontext64 *uc_link; /* pointer to resuming context */ + __darwin_size_t uc_mcsize; /* size of the machine context passed in */ + __darwin_mcontext64_t uc_mcontext64; /* pointer to machine specific context */ +}; +typedef struct ucontext64 __darwin_ucontext64_t; /* [???] user context */ +#endif /* _POSIX_C_SOURCE */ + +#ifdef KERNEL +#ifndef offsetof +#define offsetof(type, member) ((size_t)(&((type *)0)->member)) +#endif /* offsetof */ +#endif /* KERNEL */ +#endif /* _SYS__TYPES_H_ */ diff --git a/bsd/sys/acct.h b/bsd/sys/acct.h index 01aa44369..1cd61259b 100644 --- a/bsd/sys/acct.h +++ b/bsd/sys/acct.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,7 +63,7 @@ #define _SYS_ACCT_H_ #include - +#include /* * Accounting structures; these use a comp_t type which is a 3 bits base 8 * exponent, 13 bit fraction ``floating point'' number. Units are 1/AHZ @@ -100,7 +100,11 @@ struct acct { #ifdef KERNEL #ifdef __APPLE_API_PRIVATE extern struct vnode *acctp; -int acct_process __P((struct proc *p)); + +__BEGIN_DECLS +int acct_process(struct proc *p); +__END_DECLS + #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/aio.h b/bsd/sys/aio.h index 57b3e3d17..f2d41b32c 100644 --- a/bsd/sys/aio.h +++ b/bsd/sys/aio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ #define _SYS_AIO_H_ #include +#include struct aiocb { int aio_fildes; /* File descriptor */ @@ -43,6 +44,29 @@ struct aiocb { int aio_lio_opcode; /* Operation to be performed */ }; +// LP64todo - should this move? +#ifdef KERNEL + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_aiocb { + int aio_fildes; /* File descriptor */ + off_t aio_offset; /* File offset */ + user_addr_t aio_buf; /* Location of buffer */ + user_size_t aio_nbytes; /* Length of transfer */ + int aio_reqprio; /* Request priority offset */ + struct user_sigevent aio_sigevent; /* Signal number and value */ + int aio_lio_opcode; /* Operation to be performed */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + /* * aio_cancel() return values */ @@ -102,7 +126,9 @@ struct aiocb { * notification is given when the operation is complete */ -#define O_SYNC 0x0 /* queued IO is completed as if by fsync() */ +#ifndef O_SYNC /* XXX investigate documentation error */ +#define O_SYNC 0x0080 /* queued IO is completed as if by fsync() */ +#endif #if 0 /* O_DSYNC - NOT SUPPORTED */ #define O_DSYNC 0x1 /* queued async IO is completed as if by fdatasync() */ #endif @@ -112,6 +138,8 @@ struct aiocb { * Prototypes */ +__BEGIN_DECLS + /* * Attempt to cancel one or more asynchronous I/O requests currently outstanding * against file descriptor fd. The aiocbp argument points to the asynchronous I/O @@ -223,5 +251,7 @@ int lio_listio( int mode, struct aiocb *const aiocblist[], int nent, struct sigevent *sigp ); +__END_DECLS + #endif /* KERNEL */ #endif /* _SYS_AIO_H_ */ diff --git a/bsd/sys/aio_kern.h b/bsd/sys/aio_kern.h index 6c113a744..09401f0b3 100644 --- a/bsd/sys/aio_kern.h +++ b/bsd/sys/aio_kern.h @@ -38,15 +38,15 @@ struct aio_workq_entry { TAILQ_ENTRY( aio_workq_entry ) aio_workq_link; struct proc *procp; /* user proc that queued this request */ - struct aiocb *uaiocbp; /* pointer passed in from user land */ - struct aiocb *fsyncp; /* not NULL means this request must complete */ + user_addr_t uaiocbp; /* pointer passed in from user land */ + user_addr_t fsyncp; /* not NULL means this request must complete */ /* before an aio_fsync call can proceed. */ vm_map_t aio_map; /* user land map we have a reference to */ - ssize_t returnval; /* return value from read / write request */ + user_ssize_t returnval; /* return value from read / write request */ int errorval; /* error value from read / write request */ int flags; long group_tag; /* identifier used to group IO requests */ - struct aiocb aiocb; /* copy of aiocb from user land */ + struct user_aiocb aiocb; /* copy of aiocb from user land */ }; typedef struct aio_workq_entry aio_workq_entry; @@ -66,11 +66,27 @@ typedef struct aio_workq_entry aio_workq_entry; /* waiting for one or more active IO requests to */ /* complete */ +/* + * Prototypes + */ + +__private_extern__ void +_aio_close(struct proc *p, int fd); + +__private_extern__ void +_aio_exit(struct proc *p); + +__private_extern__ void +_aio_exec(struct proc *p); + +__private_extern__ void +_aio_create_worker_threads(int num); + +__private_extern__ void +aio_init(void); -__private_extern__ void _aio_close( struct proc *p, int fd ); -__private_extern__ void _aio_exit( struct proc *p ); -__private_extern__ void _aio_exec( struct proc *p ); -__private_extern__ void _aio_create_worker_threads( int num ); +task_t +get_aiotask(void); #endif /* KERNEL */ diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index 40a4a586c..80a7512c2 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,21 +32,14 @@ #include #ifdef __APPLE_API_UNSTABLE -#ifndef _SYS_TYPES_H_ #include -#endif -#ifndef _SYS_UCRED_H #include -#endif -#ifndef _SYS_MOUNT_H_ -#include -#endif -#ifndef _SYS_TIME_H_ #include -#endif +#include #define FSOPT_NOFOLLOW 0x00000001 -#define FSOPT_NOINMEMUPDATE 0x00000002 +#define FSOPT_NOINMEMUPDATE 0x00000002 +#define FSOPT_REPORT_FULLSIZE 0x00000004 /* we currently aren't anywhere near this amount for a valid * fssearchblock.sizeofsearchparams1 or fssearchblock.sizeofsearchparams2 @@ -92,8 +85,8 @@ typedef struct attribute_set { } attribute_set_t; typedef struct attrreference { - long attr_dataoffset; - size_t attr_length; + int32_t attr_dataoffset; + u_int32_t attr_length; } attrreference_t; /* XXX PPD This is derived from HFSVolumePriv.h and should perhaps be referenced from there? */ @@ -117,6 +110,11 @@ typedef struct vol_capabilities_attr { vol_capabilities_set_t valid; } vol_capabilities_attr_t; +/* + * XXX this value needs to be raised - 3893388 + */ +#define ATTR_MAX_BUFFER 8192 + /* * VOL_CAP_FMT_PERSISTENTOBJECTIDS: When set, the volume has object IDs * that are persistent (retain their values even when the volume is @@ -174,27 +172,33 @@ typedef struct vol_capabilities_attr { * the statfs information in its in-memory structures should set this bit. * A volume that must always read from disk or always perform a network * transaction should not set this bit. + * + * VOL_CAP_FMT_2TB_FILESIZE: When set, the volume format supports file + * size upto 2TB. This bit does not necessarily mean that the file + * system does not support file size more than 2TB. This bit does + * not mean that the currently available space on the volume is 2TB. */ -#define VOL_CAP_FMT_PERSISTENTOBJECTIDS 0x00000001 -#define VOL_CAP_FMT_SYMBOLICLINKS 0x00000002 -#define VOL_CAP_FMT_HARDLINKS 0x00000004 -#define VOL_CAP_FMT_JOURNAL 0x00000008 -#define VOL_CAP_FMT_JOURNAL_ACTIVE 0x00000010 -#define VOL_CAP_FMT_NO_ROOT_TIMES 0x00000020 -#define VOL_CAP_FMT_SPARSE_FILES 0x00000040 -#define VOL_CAP_FMT_ZERO_RUNS 0x00000080 -#define VOL_CAP_FMT_CASE_SENSITIVE 0x00000100 -#define VOL_CAP_FMT_CASE_PRESERVING 0x00000200 -#define VOL_CAP_FMT_FAST_STATFS 0x00000400 +#define VOL_CAP_FMT_PERSISTENTOBJECTIDS 0x00000001 +#define VOL_CAP_FMT_SYMBOLICLINKS 0x00000002 +#define VOL_CAP_FMT_HARDLINKS 0x00000004 +#define VOL_CAP_FMT_JOURNAL 0x00000008 +#define VOL_CAP_FMT_JOURNAL_ACTIVE 0x00000010 +#define VOL_CAP_FMT_NO_ROOT_TIMES 0x00000020 +#define VOL_CAP_FMT_SPARSE_FILES 0x00000040 +#define VOL_CAP_FMT_ZERO_RUNS 0x00000080 +#define VOL_CAP_FMT_CASE_SENSITIVE 0x00000100 +#define VOL_CAP_FMT_CASE_PRESERVING 0x00000200 +#define VOL_CAP_FMT_FAST_STATFS 0x00000400 +#define VOL_CAP_FMT_2TB_FILESIZE 0x00000800 /* * VOL_CAP_INT_SEARCHFS: When set, the volume implements the - * searchfs() system call (the VOP_SEARCHFS vnode operation). + * searchfs() system call (the vnop_searchfs vnode operation). * * VOL_CAP_INT_ATTRLIST: When set, the volume implements the - * getattrlist() and setattrlist() system calls (VOP_GETATTRLIST - * and VOP_SETATTRLIST vnode operations) for the volume, files, + * getattrlist() and setattrlist() system calls (vnop_getattrlist + * and vnop_setattrlist vnode operations) for the volume, files, * and directories. The volume may or may not implement the * readdirattr() system call. XXX Is there any minimum set * of attributes that should be supported? To determine the @@ -205,17 +209,17 @@ typedef struct vol_capabilities_attr { * of NFS volumes. * * VOL_CAP_INT_READDIRATTR: When set, the volume implements the - * readdirattr() system call (VOP_READDIRATTR vnode operation). + * readdirattr() system call (vnop_readdirattr vnode operation). * * VOL_CAP_INT_EXCHANGEDATA: When set, the volume implements the - * exchangedata() system call (VOP_EXCHANGE vnode operation). + * exchangedata() system call (VNOP_EXCHANGE vnode operation). * * VOL_CAP_INT_COPYFILE: When set, the volume implements the * VOP_COPYFILE vnode operation. (XXX There should be a copyfile() * system call in .) * * VOL_CAP_INT_ALLOCATE: When set, the volume implements the - * VOP_ALLOCATE vnode operation, which means it implements the + * VNOP_ALLOCATE vnode operation, which means it implements the * F_PREALLOCATE selector of fcntl(2). * * VOL_CAP_INT_VOL_RENAME: When set, the volume implements the @@ -223,38 +227,38 @@ typedef struct vol_capabilities_attr { * The volume can be renamed by setting ATTR_VOL_NAME with setattrlist(). * * VOL_CAP_INT_ADVLOCK: When set, the volume implements POSIX style - * byte range locks via VOP_ADVLOCK (accessible from fcntl(2)). + * byte range locks via vnop_advlock (accessible from fcntl(2)). * * VOL_CAP_INT_FLOCK: When set, the volume implements whole-file flock(2) - * style locks via VOP_ADVLOCK. This includes the O_EXLOCK and O_SHLOCK + * style locks via vnop_advlock. This includes the O_EXLOCK and O_SHLOCK * flags of the open(2) call. * */ -#define VOL_CAP_INT_SEARCHFS 0x00000001 -#define VOL_CAP_INT_ATTRLIST 0x00000002 -#define VOL_CAP_INT_NFSEXPORT 0x00000004 -#define VOL_CAP_INT_READDIRATTR 0x00000008 -#define VOL_CAP_INT_EXCHANGEDATA 0x00000010 -#define VOL_CAP_INT_COPYFILE 0x00000020 -#define VOL_CAP_INT_ALLOCATE 0x00000040 -#define VOL_CAP_INT_VOL_RENAME 0x00000080 -#define VOL_CAP_INT_ADVLOCK 0x00000100 -#define VOL_CAP_INT_FLOCK 0x00000200 +#define VOL_CAP_INT_SEARCHFS 0x00000001 +#define VOL_CAP_INT_ATTRLIST 0x00000002 +#define VOL_CAP_INT_NFSEXPORT 0x00000004 +#define VOL_CAP_INT_READDIRATTR 0x00000008 +#define VOL_CAP_INT_EXCHANGEDATA 0x00000010 +#define VOL_CAP_INT_COPYFILE 0x00000020 +#define VOL_CAP_INT_ALLOCATE 0x00000040 +#define VOL_CAP_INT_VOL_RENAME 0x00000080 +#define VOL_CAP_INT_ADVLOCK 0x00000100 +#define VOL_CAP_INT_FLOCK 0x00000200 +#define VOL_CAP_INT_EXTENDED_SECURITY 0x00000400 +#define VOL_CAP_INT_USERACCESS 0x00000800 typedef struct vol_attributes_attr { attribute_set_t validattr; attribute_set_t nativeattr; } vol_attributes_attr_t; -#define DIR_MNTSTATUS_MNTPOINT 0x00000001 - #define ATTR_CMN_NAME 0x00000001 #define ATTR_CMN_DEVID 0x00000002 #define ATTR_CMN_FSID 0x00000004 #define ATTR_CMN_OBJTYPE 0x00000008 #define ATTR_CMN_OBJTAG 0x00000010 #define ATTR_CMN_OBJID 0x00000020 -#define ATTR_CMN_OBJPERMANENTID 0x00000040 +#define ATTR_CMN_OBJPERMANENTID 0x00000040 #define ATTR_CMN_PAROBJID 0x00000080 #define ATTR_CMN_SCRIPT 0x00000100 #define ATTR_CMN_CRTIME 0x00000200 @@ -267,12 +271,13 @@ typedef struct vol_attributes_attr { #define ATTR_CMN_GRPID 0x00010000 #define ATTR_CMN_ACCESSMASK 0x00020000 #define ATTR_CMN_FLAGS 0x00040000 -#define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 -#define ATTR_CMN_NAMEDATTRLIST 0x00100000 #define ATTR_CMN_USERACCESS 0x00200000 +#define ATTR_CMN_EXTENDED_SECURITY 0x00400000 +#define ATTR_CMN_UUID 0x00800000 +#define ATTR_CMN_GRPUUID 0x01000000 #define ATTR_CMN_VALIDMASK 0x003FFFFF -#define ATTR_CMN_SETMASK 0x0007FF00 +#define ATTR_CMN_SETMASK 0x01C7FF00 #define ATTR_CMN_VOLSETMASK 0x00006700 #define ATTR_VOL_FSTYPE 0x00000001 @@ -280,19 +285,19 @@ typedef struct vol_attributes_attr { #define ATTR_VOL_SIZE 0x00000004 #define ATTR_VOL_SPACEFREE 0x00000008 #define ATTR_VOL_SPACEAVAIL 0x00000010 -#define ATTR_VOL_MINALLOCATION 0x00000020 -#define ATTR_VOL_ALLOCATIONCLUMP 0x00000040 -#define ATTR_VOL_IOBLOCKSIZE 0x00000080 +#define ATTR_VOL_MINALLOCATION 0x00000020 +#define ATTR_VOL_ALLOCATIONCLUMP 0x00000040 +#define ATTR_VOL_IOBLOCKSIZE 0x00000080 #define ATTR_VOL_OBJCOUNT 0x00000100 #define ATTR_VOL_FILECOUNT 0x00000200 #define ATTR_VOL_DIRCOUNT 0x00000400 -#define ATTR_VOL_MAXOBJCOUNT 0x00000800 +#define ATTR_VOL_MAXOBJCOUNT 0x00000800 #define ATTR_VOL_MOUNTPOINT 0x00001000 #define ATTR_VOL_NAME 0x00002000 #define ATTR_VOL_MOUNTFLAGS 0x00004000 -#define ATTR_VOL_MOUNTEDDEVICE 0x00008000 -#define ATTR_VOL_ENCODINGSUSED 0x00010000 -#define ATTR_VOL_CAPABILITIES 0x00020000 +#define ATTR_VOL_MOUNTEDDEVICE 0x00008000 +#define ATTR_VOL_ENCODINGSUSED 0x00010000 +#define ATTR_VOL_CAPABILITIES 0x00020000 #define ATTR_VOL_ATTRIBUTES 0x40000000 #define ATTR_VOL_INFO 0x80000000 @@ -303,7 +308,8 @@ typedef struct vol_attributes_attr { /* File/directory attributes: */ #define ATTR_DIR_LINKCOUNT 0x00000001 #define ATTR_DIR_ENTRYCOUNT 0x00000002 -#define ATTR_DIR_MOUNTSTATUS 0x00000004 +#define ATTR_DIR_MOUNTSTATUS 0x00000004 +#define DIR_MNTSTATUS_MNTPOINT 0x00000001 #define ATTR_DIR_VALIDMASK 0x00000007 #define ATTR_DIR_SETMASK 0x00000000 @@ -311,18 +317,14 @@ typedef struct vol_attributes_attr { #define ATTR_FILE_LINKCOUNT 0x00000001 #define ATTR_FILE_TOTALSIZE 0x00000002 #define ATTR_FILE_ALLOCSIZE 0x00000004 -#define ATTR_FILE_IOBLOCKSIZE 0x00000008 -#define ATTR_FILE_CLUMPSIZE 0x00000010 +#define ATTR_FILE_IOBLOCKSIZE 0x00000008 #define ATTR_FILE_DEVTYPE 0x00000020 -#define ATTR_FILE_FILETYPE 0x00000040 #define ATTR_FILE_FORKCOUNT 0x00000080 #define ATTR_FILE_FORKLIST 0x00000100 -#define ATTR_FILE_DATALENGTH 0x00000200 -#define ATTR_FILE_DATAALLOCSIZE 0x00000400 -#define ATTR_FILE_DATAEXTENTS 0x00000800 -#define ATTR_FILE_RSRCLENGTH 0x00001000 -#define ATTR_FILE_RSRCALLOCSIZE 0x00002000 -#define ATTR_FILE_RSRCEXTENTS 0x00004000 +#define ATTR_FILE_DATALENGTH 0x00000200 +#define ATTR_FILE_DATAALLOCSIZE 0x00000400 +#define ATTR_FILE_RSRCLENGTH 0x00001000 +#define ATTR_FILE_RSRCALLOCSIZE 0x00002000 #define ATTR_FILE_VALIDMASK 0x00007FFF #define ATTR_FILE_SETMASK 0x00000020 @@ -333,14 +335,25 @@ typedef struct vol_attributes_attr { #define ATTR_FORK_VALIDMASK 0x00000003 #define ATTR_FORK_SETMASK 0x00000000 -#define SRCHFS_START 0x00000001 +/* Obsolete, implemented, not supported */ +#define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 /* not implemented */ +#define ATTR_CMN_NAMEDATTRLIST 0x00100000 /* not implemented */ +#define ATTR_FILE_CLUMPSIZE 0x00000010 /* obsolete */ +#define ATTR_FILE_FILETYPE 0x00000040 /* always zero */ +#define ATTR_FILE_DATAEXTENTS 0x00000800 /* obsolete, HFS-specific */ +#define ATTR_FILE_RSRCEXTENTS 0x00004000 /* obsolete, HFS-specific */ + +/* + * Searchfs + */ +#define SRCHFS_START 0x00000001 #define SRCHFS_MATCHPARTIALNAMES 0x00000002 -#define SRCHFS_MATCHDIRS 0x00000004 -#define SRCHFS_MATCHFILES 0x00000008 -#define SRCHFS_SKIPLINKS 0x00000010 -#define SRCHFS_SKIPINVISIBLE 0x00000020 -#define SRCHFS_SKIPPACKAGES 0x00000040 -#define SRCHFS_SKIPINAPPROPRIATE 0x00000080 +#define SRCHFS_MATCHDIRS 0x00000004 +#define SRCHFS_MATCHFILES 0x00000008 +#define SRCHFS_SKIPLINKS 0x00000010 +#define SRCHFS_SKIPINVISIBLE 0x00000020 +#define SRCHFS_SKIPPACKAGES 0x00000040 +#define SRCHFS_SKIPINAPPROPRIATE 0x00000080 #define SRCHFS_NEGATEPARAMS 0x80000000 #define SRCHFS_VALIDOPTIONSMASK 0x800000FF @@ -358,6 +371,37 @@ struct fssearchblock { struct attrlist searchattrs; }; +#ifdef KERNEL +/* LP64 version of fssearchblock. all pointers and longs + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with fssearchblock + */ +// LP64todo - should this move? + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_fssearchblock { + user_addr_t returnattrs; + user_addr_t returnbuffer; + user_size_t returnbuffersize; + user_ulong_t maxmatches; + struct timeval timelimit; + user_addr_t searchparams1; + user_size_t sizeofsearchparams1; + user_addr_t searchparams2; + user_size_t sizeofsearchparams2; + struct attrlist searchattrs; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + + +#endif // KERNEL + struct searchstate { u_char reserved[556]; // sizeof( SearchState ) diff --git a/bsd/sys/audit.h b/bsd/sys/audit.h deleted file mode 100644 index 5b53aa206..000000000 --- a/bsd/sys/audit.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -#ifndef _SYS_AUDIT_H -#define _SYS_AUDIT_H - -#include -#include -#include -#include -#include - -#define AUDIT_RECORD_MAGIC 0x828a0f1b -#define MAX_AUDIT_RECORDS 20 -#define MAX_AUDIT_RECORD_SIZE 4096 - -/* - * Define the masks for the classes of audit events. - */ -#define AU_NULL 0x00000000 -#define AU_FREAD 0x00000001 -#define AU_FWRITE 0x00000002 -#define AU_FACCESS 0x00000004 -#define AU_FMODIFY 0x00000008 -#define AU_FCREATE 0x00000010 -#define AU_FDELETE 0x00000020 -#define AU_CLOSE 0x00000040 -#define AU_PROCESS 0x00000080 -#define AU_NET 0x00000100 -#define AU_IPC 0x00000200 -#define AU_NONAT 0x00000400 -#define AU_ADMIN 0x00000800 -#define AU_LOGIN 0x00001000 -#define AU_TFM 0x00002000 -#define AU_APPL 0x00004000 -#define AU_SETL 0x00008000 -#define AU_IFLOAT 0x00010000 -#define AU_PRIV 0x00020000 -#define AU_MAC_RW 0x00040000 -#define AU_XCONN 0x00080000 -#define AU_XCREATE 0x00100000 -#define AU_XDELETE 0x00200000 -#define AU_XIFLOAT 0x00400000 -#define AU_XPRIVS 0x00800000 -#define AU_XPRIVF 0x01000000 -#define AU_XMOVE 0x02000000 -#define AU_XDACF 0x04000000 -#define AU_XMACF 0x08000000 -#define AU_XSECATTR 0x10000000 -#define AU_IOCTL 0x20000000 -#define AU_EXEC 0x40000000 -#define AU_OTHER 0x80000000 -#define AU_ALL 0xffffffff - -/* - * IPC types - */ -#define AT_IPC_MSG ((u_char)1) /* message IPC id */ -#define AT_IPC_SEM ((u_char)2) /* semaphore IPC id */ -#define AT_IPC_SHM ((u_char)3) /* shared mem IPC id */ - -/* - * Audit conditions. - */ -#define AUC_UNSET 0 -#define AUC_AUDITING 1 -#define AUC_NOAUDIT 2 -#define AUC_DISABLED -1 - -/* - * auditon(2) commands. - */ -#define A_GETPOLICY 2 -#define A_SETPOLICY 3 -#define A_GETKMASK 4 -#define A_SETKMASK 5 -#define A_GETQCTRL 6 -#define A_SETQCTRL 7 -#define A_GETCWD 8 -#define A_GETCAR 9 -#define A_GETSTAT 12 -#define A_SETSTAT 13 -#define A_SETUMASK 14 -#define A_SETSMASK 15 -#define A_GETCOND 20 -#define A_SETCOND 21 -#define A_GETCLASS 22 -#define A_SETCLASS 23 -#define A_GETPINFO 24 -#define A_SETPMASK 25 -#define A_SETFSIZE 26 -#define A_GETFSIZE 27 -#define A_GETPINFO_ADDR 28 -#define A_GETKAUDIT 29 -#define A_SETKAUDIT 30 - -/* - * Audit policy controls. - */ -#define AUDIT_CNT 0x0001 -#define AUDIT_AHLT 0x0002 -#define AUDIT_ARGV 0x0004 -#define AUDIT_ARGE 0x0008 -#define AUDIT_PASSWD 0x0010 -#define AUDIT_SEQ 0x0020 -#define AUDIT_WINDATA 0x0040 -#define AUDIT_USER 0x0080 -#define AUDIT_GROUP 0x0100 -#define AUDIT_TRAIL 0x0200 -#define AUDIT_PATH 0x0400 - -typedef uid_t au_id_t; -typedef pid_t au_asid_t; -typedef u_int16_t au_event_t; -typedef u_int16_t au_emod_t; -typedef u_int32_t au_class_t; - -struct au_tid { - dev_t port; - u_int32_t machine; -}; -typedef struct au_tid au_tid_t; - -struct au_tid_addr { - dev_t at_port; - u_int32_t at_type; - u_int32_t at_addr[4]; -}; -typedef struct au_tid_addr au_tid_addr_t; - -struct au_mask { - unsigned int am_success; /* success bits */ - unsigned int am_failure; /* failure bits */ -}; -typedef struct au_mask au_mask_t; - -struct auditinfo { - au_id_t ai_auid; /* Audit user ID */ - au_mask_t ai_mask; /* Audit masks */ - au_tid_t ai_termid; /* Terminal ID */ - au_asid_t ai_asid; /* Audit session ID */ -}; -typedef struct auditinfo auditinfo_t; - -struct auditinfo_addr { - au_id_t ai_auid; /* Audit user ID */ - au_mask_t ai_mask; /* Audit masks */ - au_tid_addr_t ai_termid; /* Terminal ID */ - au_asid_t ai_asid; /* Audit session ID */ -}; -typedef struct auditinfo_addr auditinfo_addr_t; - -/* Token and record structures */ - -struct au_token { - u_char *t_data; - size_t len; - TAILQ_ENTRY(au_token) tokens; -}; -typedef struct au_token token_t; - -struct au_record { - char used; /* Is this record currently being used */ - int desc; /* The descriptor associated with this record */ - TAILQ_HEAD(, au_token) token_q; /* queue of BSM tokens */ - u_char *data; - size_t len; - LIST_ENTRY(au_record) au_rec_q; -}; -typedef struct au_record au_record_t; - -#ifndef KERNEL -#include - -__BEGIN_DECLS -int audit (const void *, int); -int auditon (int, void *, int); -int auditsvc (int, int); -int auditctl (const char *); -int getauid (au_id_t *); -int setauid (const au_id_t *); -int getaudit (struct auditinfo *); -int setaudit (const struct auditinfo *); -int getaudit_addr (struct auditinfo_addr *, int); -int setaudit_addr (const struct auditinfo_addr *, int); -__END_DECLS -#endif /* !KERNEL */ - -#endif /* !_SYS_AUDIT_H */ diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index 86ceeeb96..91aa77cf7 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,226 +63,388 @@ #ifndef _SYS_BUF_H_ #define _SYS_BUF_H_ -#include - -#ifdef KERNEL -#include -#include -#include #include +#include +#include + -#ifdef __APPLE_API_PRIVATE - -#define NOLIST ((struct buf *)0x87654321) - -/* - * The buffer header describes an I/O operation in the kernel. - */ -struct buf { - LIST_ENTRY(buf) b_hash; /* Hash chain. */ - LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ - TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ - struct proc *b_proc; /* Associated proc; NULL if kernel. */ - volatile long b_flags; /* B_* flags. */ - int b_error; /* Errno value. */ - long b_bufsize; /* Allocated buffer size. */ - long b_bcount; /* Valid bytes in buffer. */ - long b_resid; /* Remaining I/O. */ - dev_t b_dev; /* Device associated with buffer. */ - struct { - caddr_t b_addr; /* Memory, superblocks, indirect etc.*/ - } b_un; - void *b_saveaddr; /* Original b_addr for physio. */ - daddr_t b_lblkno; /* Logical block number. */ - daddr_t b_blkno; /* Underlying physical block number. */ - /* Function to call upon completion. */ - void (*b_iodone) __P((struct buf *)); - struct vnode *b_vp; /* Device vnode. */ - int b_dirtyoff; /* Offset in buffer of dirty region. */ - int b_dirtyend; /* Offset of end of dirty region. */ - int b_validoff; /* Offset in buffer of valid region. */ - int b_validend; /* Offset of end of valid region. */ - struct ucred *b_rcred; /* Read credentials reference. */ - struct ucred *b_wcred; /* Write credentials reference. */ - int b_timestamp; /* timestamp for queuing operation */ - long b_vectorcount; /* number of vectors in b_vectorlist */ - void *b_vectorlist; /* vector list for I/O */ - void *b_pagelist; /* to save pagelist info */ - long b_vects[2]; /* vectorlist when b_vectorcount is 1 */ - long b_whichq; /* the free list the buffer belongs to */ - TAILQ_ENTRY(buf) b_act; /* Device driver queue when active */ - void *b_drvdata; /* Device driver private use */ -}; - -/* - * For portability with historic industry practice, the cylinder number has - * to be maintained in the `b_resid' field. - */ -#define b_cylinder b_resid /* Cylinder number for disksort(). */ - -/* Device driver compatibility definitions. */ -#define b_active b_bcount /* Driver queue head: drive active. */ -#define b_data b_un.b_addr /* b_un.b_addr is not changeable. */ -#define b_errcnt b_resid /* Retry count while I/O in progress. */ -#define iodone biodone /* Old name for biodone. */ -#define iowait biowait /* Old name for biowait. */ - -/* cluster_io definitions for use with io bufs */ -#define b_uploffset b_bufsize -#define b_trans_head b_freelist.tqe_prev -#define b_trans_next b_freelist.tqe_next -#define b_real_bp b_saveaddr -#define b_iostate b_rcred - -/* journaling uses this cluster i/o field for its own - * purposes because meta data buf's should never go - * through the clustering code. - */ -#define b_transaction b_vectorlist - - - -/* - * These flags are kept in b_flags. - */ -#define B_AGE 0x00000001 /* Move to age queue when I/O done. */ -#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */ -#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */ -#define B_BAD 0x00000008 /* Bad block revectoring in progress. */ -#define B_BUSY 0x00000010 /* I/O in progress. */ -#define B_CACHE 0x00000020 /* Bread found us in the cache. */ -#define B_CALL 0x00000040 /* Call b_iodone from biodone. */ -#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */ -#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */ -#define B_DONE 0x00000200 /* I/O completed. */ -#define B_EINTR 0x00000400 /* I/O was interrupted */ -#define B_ERROR 0x00000800 /* I/O error occurred. */ -#define B_WASDIRTY 0x00001000 /* page was found dirty in the VM cache */ -#define B_INVAL 0x00002000 /* Does not contain valid info. */ -#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */ -#define B_NOCACHE 0x00008000 /* Do not cache block after use. */ -#define B_PAGEOUT 0x00010000 /* Page out indicator... */ -#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */ -#define B_PHYS 0x00040000 /* I/O to user memory. */ -#define B_RAW 0x00080000 /* Set by physio for raw transfers. */ -#define B_READ 0x00100000 /* Read buffer. */ -#define B_TAPE 0x00200000 /* Magnetic tape I/O. */ -#define B_PAGELIST 0x00400000 /* Buffer describes pagelist I/O. */ -#define B_WANTED 0x00800000 /* Process wants this buffer. */ #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ -#define B_WRITEINPROG 0x01000000 /* Write in progress. */ -#define B_HDRALLOC 0x02000000 /* zone allocated buffer header */ -#define B_NORELSE 0x04000000 /* don't brelse() in bwrite() */ -#define B_NEED_IODONE 0x08000000 - /* need to do a biodone on the */ - /* real_bp associated with a cluster_io */ -#define B_COMMIT_UPL 0x10000000 - /* commit pages in upl when */ - /* I/O completes/fails */ -#define B_ZALLOC 0x20000000 /* b_data is zalloc()ed */ -#define B_META 0x40000000 /* buffer contains meta-data. */ -#define B_VECTORLIST 0x80000000 /* Used by device drivers. */ - - -/* - * Zero out the buffer's data area. - */ -#define clrbuf(bp) { \ - bzero((bp)->b_data, (u_int)(bp)->b_bcount); \ - (bp)->b_resid = 0; \ -} - -/* Flags to low-level allocation routines. */ -#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ -#define B_SYNC 0x02 /* Do all allocations synchronously. */ -#define B_NOBUFF 0x04 /* Do not allocate struct buf */ - -/* Flags for operation type in getblk() */ -#define BLK_READ 0x01 /* buffer for read */ -#define BLK_WRITE 0x02 /* buffer for write */ -#define BLK_PAGEIN 0x04 /* buffer for pagein */ -#define BLK_PAGEOUT 0x08 /* buffer for pageout */ -#define BLK_META 0x10 /* buffer for metadata */ -#define BLK_CLREAD 0x20 /* buffer for cluster read */ -#define BLK_CLWRITE 0x40 /* buffer for cluster write */ +#define B_READ 0x00000001 /* Read buffer. */ +#define B_ASYNC 0x00000002 /* Start I/O, do not wait. */ +#define B_NOCACHE 0x00000004 /* Do not cache block after use. */ +#define B_DELWRI 0x00000008 /* Delay I/O until buffer reused. */ +#define B_LOCKED 0x00000010 /* Locked in core (not reusable). */ +#define B_PHYS 0x00000020 /* I/O to user memory. */ +#define B_CLUSTER 0x00000040 /* UPL based I/O generated by cluster layer */ +#define B_PAGEIO 0x00000080 /* Page in/out */ +#define B_META 0x00000100 /* buffer contains meta-data. */ +/* + * make sure to check when adding flags that + * that the new flags don't overlap the definitions + * in buf_internal.h + */ -extern int nbuf; /* The number of buffer headers */ -extern struct buf *buf; /* The buffer headers. */ +__BEGIN_DECLS -#endif /* __APPLE_API_PRIVATE */ +/* + * mark the buffer associated with buf_t + * as AGED with respect to the LRU cache + */ +void buf_markaged(buf_t); +/* + * mark the buffer associated with buf_t + * as invalid... on release, it will go + * directly to the free list + */ +void buf_markinvalid(buf_t); -#ifdef __APPLE_API_UNSTABLE -/* Macros to clear/set/test flags. */ -#define SET(t, f) (t) |= (f) -#define CLR(t, f) (t) &= ~(f) -#define ISSET(t, f) ((t) & (f)) -#endif /* __APPLE_API_UNSTABLE */ +/* + * mark the buffer assoicated with buf_t + * as a delayed write... + */ +void buf_markdelayed(buf_t); -#ifdef __APPLE_API_PRIVATE /* - * Definitions for the buffer free lists. + * mark the buffer associated with buf_t + * as having been interrupted... EINTR */ -#define BQUEUES 6 /* number of free buffer queues */ +void buf_markeintr(buf_t); -#define BQ_LOCKED 0 /* super-blocks &c */ -#define BQ_LRU 1 /* lru, useful buffers */ -#define BQ_AGE 2 /* rubbish */ -#define BQ_EMPTY 3 /* buffer headers with no memory */ -#define BQ_META 4 /* buffer containing metadata */ -#define BQ_LAUNDRY 5 /* buffers that need cleaning */ -#endif /* __APPLE_API_PRIVATE */ +/* + * returns 1 if the buffer associated with buf_t + * contains valid data... 0 if it does not + */ +int buf_valid(buf_t); -__BEGIN_DECLS -#ifdef __APPLE_API_UNSTABLE -int allocbuf __P((struct buf *, int)); -void bawrite __P((struct buf *)); -void bdwrite __P((struct buf *)); -void biodone __P((struct buf *)); -int biowait __P((struct buf *)); -int bread __P((struct vnode *, daddr_t, int, - struct ucred *, struct buf **)); -int meta_bread __P((struct vnode *, daddr_t, int, - struct ucred *, struct buf **)); -int breada __P((struct vnode *, daddr_t, int, daddr_t, int, - struct ucred *, struct buf **)); -int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, - struct ucred *, struct buf **)); -int meta_breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, - struct ucred *, struct buf **)); -void brelse __P((struct buf *)); -void bremfree __P((struct buf *)); -void bufinit __P((void)); -void bwillwrite __P((void)); -int bwrite __P((struct buf *)); -struct buf *getblk __P((struct vnode *, daddr_t, int, int, int, int)); -struct buf *geteblk __P((int)); -struct buf *incore __P((struct vnode *, daddr_t)); -u_int minphys __P((struct buf *bp)); -int physio __P((void (*)(struct buf *), struct buf *, dev_t, int , u_int (*)(struct buf *), struct uio *, int )); -int count_busy_buffers __P((void)); -struct buf *alloc_io_buf __P((struct vnode *, int)); -void free_io_buf __P((struct buf *)); -void reassignbuf __P((struct buf *, struct vnode *)); -#endif /* __APPLE_API_UNSTABLE */ -__END_DECLS +/* + * returns 1 if the buffer was already valid + * in the cache... i.e. no I/O was performed + * returns 0 otherwise + */ +int buf_fromcache(buf_t); + +/* + * returns the UPL associated with buf_t + */ +void * buf_upl(buf_t); + +/* + * returns the offset into the UPL + * associated with buf_t which is to be + * used as the base offset for this I/O + */ +uint32_t buf_uploffset(buf_t); + +/* + * returns read credential associated with buf_t + * a reference is taken which must be explicilty dropped + */ +ucred_t buf_rcred(buf_t); + +/* + * returns write credential associated with buf_t + * a reference is taken which must be explicilty dropped + */ +ucred_t buf_wcred(buf_t); + +/* + * returns process handle associated with buf_t + * i.e identity of task that issued the I/O + */ +proc_t buf_proc(buf_t); + +uint32_t buf_dirtyoff(buf_t); +uint32_t buf_dirtyend(buf_t); +void buf_setdirtyoff(buf_t, uint32_t); +void buf_setdirtyend(buf_t, uint32_t); + +/* + * return the errno value associated with buf_t + */ +errno_t buf_error(buf_t); + +/* + * set errno on buf_t + */ +void buf_seterror(buf_t, errno_t); + +/* + * set specified flags on buf_t + * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO + */ +void buf_setflags(buf_t, int32_t); + +/* + * clear specified flags on buf_t + * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO + */ +void buf_clearflags(buf_t, int32_t); + +/* + * return external flags associated with buf_t + * B_CLUSTER/B_PHYS/B_LOCKED/B_DELWRI/B_ASYNC/B_READ/B_WRITE/B_META/B_PAGEIO + */ +int32_t buf_flags(buf_t); + +/* + * clears I/O related flags (both internal and + * external) associated with buf_t and allows + * the following to be set... + * B_READ/B_WRITE/B_ASYNC/B_NOCACHE + */ +void buf_reset(buf_t, int32_t); + +/* + * insure that the data storage associated with buf_t + * is addressable + */ +errno_t buf_map(buf_t, caddr_t *); + +/* + * release our need to have the storage associated + * with buf_t in an addressable state + */ +errno_t buf_unmap(buf_t); + +/* + * set driver specific data for buf_t + */ +void buf_setdrvdata(buf_t, void *); + +/* + * retrieve driver specific data associated with buf_t + */ +void * buf_drvdata(buf_t); + +/* + * set fs specific data for buf_t + */ +void buf_setfsprivate(buf_t, void *); + +/* + * retrieve driver specific data associated with buf_t + */ +void * buf_fsprivate(buf_t); + +/* + * retrieve the phsyical block number associated with buf_t + */ +daddr64_t buf_blkno(buf_t); + +/* + * retrieve the logical block number associated with buf_t + * i.e. the block number derived from the file offset + */ +daddr64_t buf_lblkno(buf_t); + +/* + * set the phsyical block number associated with buf_t + */ +void buf_setblkno(buf_t, daddr64_t); + +/* + * set the logical block number associated with buf_t + * i.e. the block number derived from the file offset + */ +void buf_setlblkno(buf_t, daddr64_t); + +/* + * retrieve the count of valid bytes associated with buf_t + */ +uint32_t buf_count(buf_t); + +/* + * retrieve the size of the data store assoicated with buf_t + */ +uint32_t buf_size(buf_t); + +/* + * retrieve the residual I/O count assoicated with buf_t + * i.e. number of bytes that have not yet been completed + */ +uint32_t buf_resid(buf_t); + +/* + * set the count of bytes associated with buf_t + * typically used to set the size of the I/O to be performed + */ +void buf_setcount(buf_t, uint32_t); + +/* + * set the size of the buffer store associated with buf_t + * typically used when providing private storage to buf_t + */ +void buf_setsize(buf_t, uint32_t); + +/* + * set the size in bytes of the unfinished I/O associated with buf_t + */ +void buf_setresid(buf_t, uint32_t); + +/* + * associate kernel addressable storage with buf_t + */ +void buf_setdataptr(buf_t, uintptr_t); + +/* + * retrieve pointer to buffer associated with buf_t + * if non-null, than guaranteed to be kernel addressable + * size of buffer can be retrieved via buf_size + * size of valid data can be retrieved via buf_count + * if NULL, than use buf_map/buf_unmap to manage access to the underlying storage + */ +uintptr_t buf_dataptr(buf_t); -#ifdef __APPLE_API_PRIVATE /* - * Stats on usefulness of the buffer cache + * return the vnode_t associated with buf_t */ -struct bufstats { - long bufs_incore; /* found incore */ - long bufs_busyincore; /* found incore. was busy */ - long bufs_vmhits; /* not incore. found in VM */ - long bufs_miss; /* not incore. not in VM */ - long bufs_sleeps; /* buffer starvation */ - long bufs_eblk; /* Calls to geteblk */ - long bufs_iobufmax; /* Max. number of IO buffers used */ - long bufs_iobufinuse; /* number of IO buffers in use */ - long bufs_iobufsleeps; /* IO buffer starvation */ -}; -#endif /* __APPLE_API_PRIVATE */ +vnode_t buf_vnode(buf_t); + +/* + * assign vnode_t to buf_t... the + * device currently associated with + * but_t is not changed. + */ +void buf_setvnode(buf_t, vnode_t); + +/* + * return the dev_t associated with buf_t + */ +dev_t buf_device(buf_t); + +/* + * assign the dev_t associated with vnode_t + * to buf_t + */ +errno_t buf_setdevice(buf_t, vnode_t); + +errno_t buf_strategy(vnode_t, void *); + +/* + * flags for buf_invalblkno + */ +#define BUF_WAIT 0x01 + +errno_t buf_invalblkno(vnode_t, daddr64_t, int); + + +/* + * return the callback function pointer + * if the callback is still valid + * returns NULL if a buffer that was not + * allocated via buf_alloc is specified + * or if a callback has not been set or + * it has already fired... + */ +void * buf_callback(buf_t); + +/* + * assign a one-shot callback function (driven from biodone) + * to a buf_t allocated via buf_alloc... a caller specified + * arg is passed to the callback function + */ +errno_t buf_setcallback(buf_t, void (*)(buf_t, void *), void *); + +/* + * add a upl_t to a buffer allocated via buf_alloc + * and set the offset into the upl_t (must be page + * aligned). + */ +errno_t buf_setupl(buf_t, upl_t, uint32_t); + +/* + * allocate a buf_t that is a clone of the buf_t + * passed in, but whose I/O range is a subset... + * if a callback routine is specified, it will + * be called from buf_biodone with the bp and + * arg specified. + * it must be freed via buf_free + */ +buf_t buf_clone(buf_t, int, int, void (*)(buf_t, void *), void *); + +/* + * allocate a buf_t associated with vnode_t + * that has NO storage associated with it + * but is suitable for use in issuing I/Os + * after storage has been assigned via buf_setdataptr + * or buf_addupl + */ +buf_t buf_alloc(vnode_t); + +/* + * free a buf_t that was allocated via buf_alloc + * any private storage associated with buf_t is the + * responsiblity of the caller to release + */ +void buf_free(buf_t); + +/* + * flags for buf_invalidateblks + */ +#define BUF_WRITE_DATA 0x0001 /* write data blocks first */ +#define BUF_SKIP_META 0x0002 /* skip over metadata blocks */ + +int buf_invalidateblks(vnode_t, int, int, int); +/* + * flags for buf_flushdirtyblks and buf_iterate + */ +#define BUF_SKIP_NONLOCKED 0x01 +#define BUF_SKIP_LOCKED 0x02 + +void buf_flushdirtyblks(vnode_t, int, int, char *); +void buf_iterate(vnode_t, int (*)(buf_t, void *), int, void *); + +#define BUF_RETURNED 0 +#define BUF_RETURNED_DONE 1 +#define BUF_CLAIMED 2 +#define BUF_CLAIMED_DONE 3 + +/* + * zero the storage associated with buf_t + */ +void buf_clear(buf_t); + +errno_t buf_bawrite(buf_t); +errno_t buf_bdwrite(buf_t); +errno_t buf_bwrite(buf_t); + +void buf_biodone(buf_t); +errno_t buf_biowait(buf_t); +void buf_brelse(buf_t); + +errno_t buf_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *); +errno_t buf_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *); +errno_t buf_meta_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *); +errno_t buf_meta_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *); + +u_int minphys(buf_t bp); +int physio(void (*)(buf_t), buf_t, dev_t, int , u_int (*)(buf_t), struct uio *, int ); + + +/* + * Flags for operation type in getblk() + */ +#define BLK_READ 0x01 /* buffer for read */ +#define BLK_WRITE 0x02 /* buffer for write */ +#define BLK_META 0x10 /* buffer for metadata */ +/* + * modifier for above flags... if set, getblk will only return + * a bp that is already valid... i.e. found in the cache + */ +#define BLK_ONLYVALID 0x80000000 + +/* timeout is in msecs */ +buf_t buf_getblk(vnode_t, daddr64_t, int, int, int, int); +buf_t buf_geteblk(int); + +__END_DECLS + + +/* Macros to clear/set/test flags. */ +#define SET(t, f) (t) |= (f) +#define CLR(t, f) (t) &= ~(f) +#define ISSET(t, f) ((t) & (f)) + -#endif /* KERNEL */ #endif /* !_SYS_BUF_H_ */ diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h new file mode 100644 index 000000000..e06f99253 --- /dev/null +++ b/bsd/sys/buf_internal.h @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)buf.h 8.9 (Berkeley) 3/30/95 + */ + +#ifndef _SYS_BUF_INTERNAL_H_ +#define _SYS_BUF_INTERNAL_H_ + +#include + +#ifdef KERNEL +#include +#include +#include +#include +#include +#include + + +extern lck_mtx_t *buf_mtxp; +#define NOLIST ((struct buf *)0x87654321) + +/* + * The buffer header describes an I/O operation in the kernel. + */ +struct buf { + LIST_ENTRY(buf) b_hash; /* Hash chain. */ + LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ + TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ + int b_timestamp; /* timestamp for queuing operation */ + long b_whichq; /* the free list the buffer belongs to */ + volatile long b_flags; /* B_* flags. */ + volatile long b_lflags; /* BL_BUSY | BL_WANTED flags... protected by buf_mtx */ + int b_error; /* errno value. */ + long b_bufsize; /* Allocated buffer size. */ + long b_bcount; /* Valid bytes in buffer. */ + long b_resid; /* Remaining I/O. */ + dev_t b_dev; /* Device associated with buffer. */ + uintptr_t b_datap; /* Memory, superblocks, indirect etc.*/ + daddr64_t b_lblkno; /* Logical block number. */ + daddr64_t b_blkno; /* Underlying physical block number. */ + void (*b_iodone)(buf_t, void *); /* Function to call upon completion. */ + vnode_t b_vp; /* Device vnode. */ + struct ucred *b_rcred; /* Read credentials reference. */ + struct ucred *b_wcred; /* Write credentials reference. */ + void * b_upl; /* Pointer to UPL */ + buf_t b_real_bp; /* used to track bp generated through cluster_bp */ + TAILQ_ENTRY(buf) b_act; /* Device driver queue when active */ + void * b_drvdata; /* Device driver private use */ + void * b_fsprivate; /* filesystem private use */ + void * b_transaction; /* journal private use */ + int b_dirtyoff; /* Offset in buffer of dirty region. */ + int b_dirtyend; /* Offset of end of dirty region. */ + int b_validoff; /* Offset in buffer of valid region. */ + int b_validend; /* Offset of end of valid region. */ + proc_t b_proc; /* Associated proc; NULL if kernel. */ +#ifdef JOE_DEBUG + void * b_owner; + int b_tag; + void * b_lastbrelse; + int b_stackbrelse[6]; + int b_stackgetblk[6]; +#endif +}; + + +/* cluster_io definitions for use with io bufs */ +#define b_uploffset b_bufsize +#define b_trans_head b_freelist.tqe_prev +#define b_trans_next b_freelist.tqe_next +#define b_iostate b_rcred + +/* + * These flags are kept in b_lflags... + * buf_mtxp must be held before examining/updating + */ +#define BL_BUSY 0x00000001 /* I/O in progress. */ +#define BL_WANTED 0x00000002 /* Process wants this buffer. */ +#define BL_IOBUF 0x00000004 /* buffer allocated via 'buf_alloc' */ + + +/* + * mask used by buf_flags... these are the readable external flags + */ +#define BUF_X_RDFLAGS (B_CLUSTER | B_PHYS | B_LOCKED | B_DELWRI | B_ASYNC |\ + B_READ | B_WRITE | B_META | B_PAGEIO) +/* + * mask used by buf_clearflags/buf_setflags... these are the writable external flags + */ +#define BUF_X_WRFLAGS (B_LOCKED | B_NOCACHE | B_ASYNC | B_READ | B_WRITE | B_PAGEIO) + +/* + * These flags are kept in b_flags... access is lockless + * External flags are defined in buf.h and cannot overlap + * the internal flags + * + * these flags are internal... there definition may change + */ +#define B_CACHE 0x00010000 /* getblk found us in the cache. */ +#define B_DONE 0x00020000 /* I/O completed. */ +#define B_INVAL 0x00040000 /* Does not contain valid info. */ +#define B_ERROR 0x00080000 /* I/O error occurred. */ +#define B_EINTR 0x00100000 /* I/O was interrupted */ +#define B_AGE 0x00200000 /* Move to age queue when I/O done. */ +#define B_FILTER 0x00400000 /* call b_iodone from biodone as an in-line filter */ +#define B_CALL 0x00800000 /* Call b_iodone from biodone, assumes b_iodone consumes bp */ +#define B_RAW 0x01000000 /* Set by physio for raw transfers. */ +#define B_WASDIRTY 0x02000000 /* page was found dirty in the VM cache */ +#define B_HDRALLOC 0x04000000 /* zone allocated buffer header */ +#define B_ZALLOC 0x08000000 /* b_datap is zalloc()ed */ +/* + * private flags used by the journal layer + */ +#define B_NORELSE 0x10000000 /* don't brelse() in bwrite() */ +/* + * private flags used by by the cluster layer + */ +#define B_NEED_IODONE 0x20000000 /* need biodone on the real_bp associated with a cluster_io */ +#define B_COMMIT_UPL 0x40000000 /* commit/abort the UPL on I/O success/failure */ +/* + * can we deprecate? + */ +#define B_TAPE 0x80000000 /* Magnetic tape I/O. */ + + +/* Flags to low-level allocation routines. */ +#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ +#define B_SYNC 0x02 /* Do all allocations synchronously. */ +#define B_NOBUFF 0x04 /* Do not allocate struct buf */ + + +extern int niobuf; /* The number of IO buffer headers for cluster IO */ +extern int nbuf; /* The number of buffer headers */ +extern struct buf *buf; /* The buffer headers. */ + + +/* + * Definitions for the buffer free lists. + */ +#define BQUEUES 6 /* number of free buffer queues */ + +#define BQ_LOCKED 0 /* super-blocks &c */ +#define BQ_LRU 1 /* lru, useful buffers */ +#define BQ_AGE 2 /* rubbish */ +#define BQ_EMPTY 3 /* buffer headers with no memory */ +#define BQ_META 4 /* buffer containing metadata */ +#define BQ_LAUNDRY 5 /* buffers that need cleaning */ + + +__BEGIN_DECLS + +buf_t alloc_io_buf(vnode_t, int); +void free_io_buf(buf_t); + +int allocbuf(struct buf *, int); +void bufinit(void); + +void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void **, void **); + +/* + * Flags for buf_acquire + */ +#define BAC_NOWAIT 0x01 /* Don't wait if buffer is busy */ +#define BAC_REMOVE 0x02 /* Remove from free list once buffer is acquired */ +#define BAC_SKIP_NONLOCKED 0x04 /* Don't return LOCKED buffers */ +#define BAC_SKIP_LOCKED 0x08 /* Only return LOCKED buffers */ + +void cluster_init(void); +void buf_drop(buf_t); +errno_t buf_acquire(buf_t, int, int, int); + +int count_busy_buffers(void); +int count_lock_queue(void); + + +__END_DECLS + + +/* + * Stats on usefulness of the buffer cache + */ +struct bufstats { + long bufs_incore; /* found incore */ + long bufs_busyincore; /* found incore. was busy */ + long bufs_vmhits; /* not incore. found in VM */ + long bufs_miss; /* not incore. not in VM */ + long bufs_sleeps; /* buffer starvation */ + long bufs_eblk; /* Calls to geteblk */ + long bufs_iobufmax; /* Max. number of IO buffers used */ + long bufs_iobufinuse; /* number of IO buffers in use */ + long bufs_iobufsleeps; /* IO buffer starvation */ +}; + +#endif /* KERNEL */ +#endif /* !_SYS_BUF_H_ */ diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 3ec2547df..46dc8ec7f 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -158,6 +158,38 @@ #define __unused #endif +/* + * GCC 2.95 provides `__restrict' as an extension to C90 to support the + * C99-specific `restrict' type qualifier. We happen to use `__restrict' as + * a way to define the `restrict' type qualifier without disturbing older + * software that is unaware of C99 keywords. + */ +#if !(__GNUC__ == 2 && __GNUC_MINOR__ == 95) +#if __STDC_VERSION__ < 199901 +#define __restrict +#else +#define __restrict restrict +#endif +#endif + +/* + * Compiler-dependent macros to declare that functions take printf-like + * or scanf-like arguments. They are null except for versions of gcc + * that are known to support the features properly. Functions declared + * with these attributes will cause compilation warnings if there is a + * mismatch between the format string and subsequent function parameter + * types. + */ +#if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7 +#define __printflike(fmtarg, firstvararg) \ + __attribute__((__format__ (__printf__, fmtarg, firstvararg))) +#define __scanflike(fmtarg, firstvararg) \ + __attribute__((__format__ (__scanf__, fmtarg, firstvararg))) +#else +#define __printflike(fmtarg, firstvararg) +#define __scanflike(fmtarg, firstvararg) +#endif + #define __IDSTRING(name,string) static const char name[] __unused = string #ifndef __COPYRIGHT @@ -176,4 +208,153 @@ #define __PROJECT_VERSION(s) __IDSTRING(project_version,s) #endif +/* + * The __DARWIN_ALIAS macros is used to do symbol renaming, + * they allow old code to use the old symbol thus maintiang binary + * compatability while new code can use a new improved version of the + * same function. + * + * By default newly complied code will actually get the same symbols + * that the old code did. Defining any of _APPLE_C_SOURCE, _XOPEN_SOURCE, + * or _POSIX_C_SOURCE will give you the new symbols. Defining _XOPEN_SOURCE + * or _POSIX_C_SOURCE also restricts the avilable symbols to a subset of + * Apple's APIs. + * + * __DARWIN_ALIAS is used by itself if the function signature has not + * changed, it is used along with a #ifdef check for __DARWIN_UNIX03 + * if the signature has changed. Because the __LP64__ enviroment + * only supports UNIX03 sementics it causes __DARWIN_UNIX03 to be + * defined, but causes __DARWIN_ALIAS to do no symbol mangling. + */ + +#if !defined(__DARWIN_UNIX03) +#if defined(_APPLE_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE) || defined(__LP64__) +#if defined(_NONSTD_SOURCE) +#error "Can't define both _NONSTD_SOURCE and any of _APPLE_C_SOURCE, _XOPEN_SOURCE, _POSIX_C_SOURCE, or __LP64__" +#endif /* _NONSTD_SOURCE */ +#define __DARWIN_UNIX03 1 +#elif defined(_NONSTD_SOURCE) +#define __DARWIN_UNIX03 0 +#else /* default */ +#define __DARWIN_UNIX03 0 +#endif /* _APPLE_C_SOURCE || _XOPEN_SOURCE || _POSIX_C_SOURCE || __LP64__ */ +#endif /* !__DARWIN_UNIX03 */ + +#if __DARWIN_UNIX03 && !defined(__LP64__) +#define __DARWIN_ALIAS(sym) __asm("_" __STRING(sym) "$UNIX2003") +#else +#define __DARWIN_ALIAS(sym) +#endif + + +/* + * POSIX.1 requires that the macros we test be defined before any standard + * header file is included. This permits us to convert values for feature + * testing, as necessary, using only _POSIX_C_SOURCE. + * + * Here's a quick run-down of the versions: + * defined(_POSIX_SOURCE) 1003.1-1988 + * _POSIX_C_SOURCE == 1L 1003.1-1990 + * _POSIX_C_SOURCE == 2L 1003.2-1992 C Language Binding Option + * _POSIX_C_SOURCE == 199309L 1003.1b-1993 + * _POSIX_C_SOURCE == 199506L 1003.1c-1995, 1003.1i-1995, + * and the omnibus ISO/IEC 9945-1: 1996 + * _POSIX_C_SOURCE == 200112L 1003.1-2001 + * + * In addition, the X/Open Portability Guide, which is now the Single UNIX + * Specification, defines a feature-test macro which indicates the version of + * that specification, and which subsumes _POSIX_C_SOURCE. + */ + +/* Deal with IEEE Std. 1003.1-1990, in which _POSIX_C_SOURCE == 1L. */ +#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE == 1L +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199009L +#endif + +/* Deal with IEEE Std. 1003.2-1992, in which _POSIX_C_SOURCE == 2L. */ +#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE == 2L +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199209L +#endif + +/* Deal with various X/Open Portability Guides and Single UNIX Spec. */ +#ifdef _XOPEN_SOURCE +#if _XOPEN_SOURCE - 0L >= 600L +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200112L +#elif _XOPEN_SOURCE - 0L >= 500L +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 199506L +#endif +#endif + +/* + * Deal with all versions of POSIX. The ordering relative to the tests above is + * important. + */ +#if defined(_POSIX_SOURCE) && !defined(_POSIX_C_SOURCE) +#define _POSIX_C_SOURCE 198808L +#endif + +/* + * long long is not supported in c89 (__STRICT_ANSI__), but g++ -ansi and + * c99 still want long longs. While not perfect, we allow long longs for + * g++. + */ +#define __DARWIN_NO_LONG_LONG (defined(__STRICT_ANSI__) \ + && (__STDC_VERSION__-0 < 199901L) \ + && !defined(__GNUG__)) + +/* + * Long double compatibility macro allow selecting variant symbols based + * on the old (compatible) 64-bit long doubles, or the new 128-bit + * long doubles. This applies only to ppc; i386 already has long double + * support, while ppc64 doesn't have any backwards history. + */ +#if defined(__ppc__) +# if defined(__LDBL_MANT_DIG__) && defined(__DBL_MANT_DIG__) && \ + __LDBL_MANT_DIG__ > __DBL_MANT_DIG__ +# if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0 < 1040 +# define __DARWIN_LDBL_COMPAT(x) __asm("_" __STRING(x) "$LDBLStub") +# else +# define __DARWIN_LDBL_COMPAT(x) __asm("_" __STRING(x) "$LDBL128") +# endif +# define __DARWIN_LDBL_COMPAT2(x) __asm("_" __STRING(x) "$LDBL128") +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0 +# else +# define __DARWIN_LDBL_COMPAT(x) /* nothing */ +# define __DARWIN_LDBL_COMPAT2(x) /* nothing */ +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 1 +# endif +#elif defined(__i386__) || defined(__ppc64__) +# define __DARWIN_LDBL_COMPAT(x) /* nothing */ +# define __DARWIN_LDBL_COMPAT2(x) /* nothing */ +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0 +#else +# error Unknown architecture +#endif + +/* + * Structure alignment control macros. These specify how certain + * shared structures should be aligned. Some may need backward + * compatible legacy (POWER) alignment, while others may need + * forward compatible (NATURAL) alignment. + */ +#if !defined(__DARWIN_ALIGN_POWER) +#if defined(__ppc64__) +#define __DARWIN_ALIGN_POWER 1 +#else +#define __DARWIN_ALIGN_POWER 0 +#endif +#endif /* __DARWIN_ALIGN_POWER */ + +#if !defined(__DARWIN_ALIGN_NATURAL) +#if defined(__ppc__) && defined(KERNEL) +#define __DARWIN_ALIGN_NATURAL 1 +#else +#define __DARWIN_ALIGN_NATURAL 0 +#endif +#endif /* __DARWIN_ALIGN_NATURAL */ + #endif /* !_CDEFS_H_ */ diff --git a/bsd/sys/clist.h b/bsd/sys/clist.h index 64cb2eaea..5503f994a 100644 --- a/bsd/sys/clist.h +++ b/bsd/sys/clist.h @@ -58,10 +58,8 @@ #ifndef _SYS_CLIST_H_ #define _SYS_CLIST_H_ -#include -#ifdef __APPLE_API_PRIVATE -#ifdef KERNEL +#ifdef KERNEL_PRIVATE struct cblock { struct cblock *c_next; /* next cblock in queue */ @@ -71,8 +69,7 @@ struct cblock { extern struct cblock *cfree, *cfreelist; extern int cfreecount, nclist; -#endif /* KERNEL */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* _SYS_CLIST_H_ */ diff --git a/bsd/sys/conf.h b/bsd/sys/conf.h index 9e7e80687..7dd5ad281 100644 --- a/bsd/sys/conf.h +++ b/bsd/sys/conf.h @@ -76,28 +76,36 @@ struct tty; struct uio; struct vnode; -#ifdef __APPLE_API_UNSTABLE +/* + * Types for d_type. + * These are returned by ioctl FIODTYPE + */ +#define D_TAPE 1 +#define D_DISK 2 +#define D_TTY 3 + +#ifdef KERNEL /* * Device switch function types. */ -typedef int open_close_fcn_t __P((dev_t dev, int flags, int devtype, - struct proc *p)); - -typedef struct tty *d_devtotty_t __P((dev_t dev)); - -typedef void strategy_fcn_t __P((struct buf *bp)); -typedef int ioctl_fcn_t __P((dev_t dev, u_long cmd, caddr_t data, - int fflag, struct proc *p)); -typedef int dump_fcn_t (); /* parameters vary by architecture */ -typedef int psize_fcn_t __P((dev_t dev)); -typedef int read_write_fcn_t __P((dev_t dev, struct uio *uio, int ioflag)); -typedef int stop_fcn_t __P((struct tty *tp, int rw)); -typedef int reset_fcn_t __P((int uban)); -typedef int select_fcn_t __P((dev_t dev, int which, void * wql, struct proc *p)); -typedef int mmap_fcn_t __P(()); -typedef int getc_fcn_t __P((dev_t dev)); -typedef int putc_fcn_t __P((dev_t dev, char c)); -typedef int d_poll_t __P((dev_t dev, int events, struct proc *p)); +typedef int open_close_fcn_t(dev_t dev, int flags, int devtype, + struct proc *p); + +typedef struct tty *d_devtotty_t(dev_t dev); + +typedef void strategy_fcn_t(struct buf *bp); +typedef int ioctl_fcn_t(dev_t dev, u_long cmd, caddr_t data, + int fflag, struct proc *p); +typedef int dump_fcn_t(void); /* parameters vary by architecture */ +typedef int psize_fcn_t(dev_t dev); +typedef int read_write_fcn_t(dev_t dev, struct uio *uio, int ioflag); +typedef int stop_fcn_t(struct tty *tp, int rw); +typedef int reset_fcn_t(int uban); +typedef int select_fcn_t(dev_t dev, int which, void * wql, struct proc *p); +typedef int mmap_fcn_t(void); +typedef int getc_fcn_t(dev_t dev); +typedef int putc_fcn_t(dev_t dev, char c); +typedef int d_poll_t(dev_t dev, int events, struct proc *p); #define d_open_t open_close_fcn_t #define d_close_t open_close_fcn_t @@ -113,8 +121,8 @@ typedef int d_poll_t __P((dev_t dev, int events, struct proc *p)); #define d_putc_t putc_fcn_t __BEGIN_DECLS -int enodev (); /* avoid actual prototype for multiple use */ -void enodev_strat(); +int enodev(void); +void enodev_strat(void); __END_DECLS /* @@ -134,12 +142,6 @@ __END_DECLS #define eno_putc ((putc_fcn_t *)&enodev) #define eno_select ((select_fcn_t *)&enodev) -/* - * Types for d_type. - */ -#define D_TAPE 1 -#define D_DISK 2 -#define D_TTY 3 /* * Block device switch table @@ -154,14 +156,13 @@ struct bdevsw { int d_type; }; -#ifdef KERNEL d_devtotty_t nodevtotty; d_write_t nowrite; -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern struct bdevsw bdevsw[]; -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ /* * Contents of empty bdevsw slot. @@ -170,7 +171,6 @@ extern struct bdevsw bdevsw[]; { eno_opcl, eno_opcl, eno_strat, eno_ioctl, \ eno_dump, eno_psize, 0 } -#endif /* KERNEL */ /* * Character device switch table @@ -180,23 +180,22 @@ struct cdevsw { open_close_fcn_t *d_close; read_write_fcn_t *d_read; read_write_fcn_t *d_write; - ioctl_fcn_t *d_ioctl; - stop_fcn_t *d_stop; - reset_fcn_t *d_reset; + ioctl_fcn_t *d_ioctl; + stop_fcn_t *d_stop; + reset_fcn_t *d_reset; struct tty **d_ttys; select_fcn_t *d_select; - mmap_fcn_t *d_mmap; + mmap_fcn_t *d_mmap; strategy_fcn_t *d_strategy; - getc_fcn_t *d_getc; - putc_fcn_t *d_putc; - int d_type; + getc_fcn_t *d_getc; + putc_fcn_t *d_putc; + int d_type; }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern struct cdevsw cdevsw[]; -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ /* * Contents of empty cdevsw slot. @@ -209,39 +208,45 @@ extern struct cdevsw cdevsw[]; (select_fcn_t *)seltrue, eno_mmap, eno_strat, eno_getc, \ eno_putc, 0 \ } + #endif /* KERNEL */ - + +#ifdef KERNEL_PRIVATE +typedef int l_open_t (dev_t dev, struct tty *tp); +typedef int l_close_t(struct tty *tp, int flags); +typedef int l_read_t (struct tty *tp, struct uio *uio, int flag); +typedef int l_write_t(struct tty *tp, struct uio *uio, int flag); +typedef int l_ioctl_t(struct tty *tp, u_long cmd, caddr_t data, int flag, + struct proc *p); +typedef int l_rint_t (int c, struct tty *tp); +typedef void l_start_t(struct tty *tp); +typedef int l_modem_t(struct tty *tp, int flag); + /* * Line discipline switch table */ struct linesw { - int (*l_open) __P((dev_t dev, struct tty *tp)); - int (*l_close) __P((struct tty *tp, int flags)); - int (*l_read) __P((struct tty *tp, struct uio *uio, - int flag)); - int (*l_write) __P((struct tty *tp, struct uio *uio, - int flag)); - int (*l_ioctl) __P((struct tty *tp, u_long cmd, caddr_t data, - int flag, struct proc *p)); - int (*l_rint) __P((int c, struct tty *tp)); - int (*l_start) __P((struct tty *tp)); - int (*l_modem) __P((struct tty *tp, int flag)); + l_open_t *l_open; + l_close_t *l_close; + l_read_t *l_read; + l_write_t *l_write; + l_ioctl_t *l_ioctl; + l_rint_t *l_rint; + l_start_t *l_start; + l_modem_t *l_modem; }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern struct linesw linesw[]; extern int nlinesw; -#endif /* __APPLE_API_PRIVATE */ -int ldisc_register __P((int , struct linesw *)); -void ldisc_deregister __P((int)); +int ldisc_register(int , struct linesw *); +void ldisc_deregister(int); #define LDISC_LOAD -1 /* Loadable line discipline */ -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ -#ifdef __APPLE_API_OBSOLETE +#ifdef BSD_KERNEL_PRIVATE /* * Swap device table */ @@ -255,11 +260,9 @@ struct swdevt { #define SW_SEQUENTIAL 0x02 #define sw_freed sw_flags /* XXX compat */ -#ifdef KERNEL extern struct swdevt swdevt[]; -#endif /* KERNEL */ -#endif /* __APPLE_API_OBSOLETE */ +#endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL @@ -271,15 +274,14 @@ extern struct swdevt swdevt[]; * else -1 */ __BEGIN_DECLS -int bdevsw_isfree __P((int)); -int bdevsw_add __P((int, struct bdevsw *)); -int bdevsw_remove __P((int, struct bdevsw *)); -int cdevsw_isfree __P((int)); -int cdevsw_add __P((int, struct cdevsw *)); -int cdevsw_remove __P((int, struct cdevsw *)); +int bdevsw_isfree(int); +int bdevsw_add(int, struct bdevsw *); +int bdevsw_remove(int, struct bdevsw *); +int cdevsw_isfree(int); +int cdevsw_add(int, struct cdevsw *); +int cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev); +int cdevsw_remove(int, struct cdevsw *); __END_DECLS #endif /* KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ - #endif /* _SYS_CONF_H_ */ diff --git a/bsd/sys/dirent.h b/bsd/sys/dirent.h index 8dc0359cc..1b4d5e501 100644 --- a/bsd/sys/dirent.h +++ b/bsd/sys/dirent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,19 +69,50 @@ #ifndef _SYS_DIRENT_H #define _SYS_DIRENT_H +#include +#include + +#ifndef _INO_T +typedef __darwin_ino_t ino_t; /* inode number */ +#define _INO_T +#endif + +#define __DARWIN_MAXNAMLEN 255 + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + struct dirent { - u_int32_t d_fileno; /* file number of entry */ - u_int16_t d_reclen; /* length of this record */ - u_int8_t d_type; /* file type, see below */ - u_int8_t d_namlen; /* length of string in d_name */ -#ifdef _POSIX_SOURCE - char d_name[255 + 1]; /* name must be no longer than this */ -#else -#define MAXNAMLEN 255 - char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */ + ino_t d_ino; /* file number of entry */ + __uint16_t d_reclen; /* length of this record */ + __uint8_t d_type; /* file type, see below */ + __uint8_t d_namlen; /* length of string in d_name */ + char d_name[__DARWIN_MAXNAMLEN + 1]; /* name must be no longer than this */ +}; + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset #endif + +#ifdef KERNEL +#include + +/* Extended directory entry */ +struct direntry{ + ino64_t d_ino; /* file number of entry */ + __uint64_t d_seekoff; /* seek offset (optional, used by servers) */ + __uint16_t d_reclen; /* length of this record */ + __uint16_t d_namlen; /* length of string in d_name */ + __uint8_t d_type; /* file type, see below */ + u_char d_name[MAXPATHLEN - 1]; /* entry name (up to MAXPATHLEN - 1 bytes) */ }; +#endif + +#ifndef _POSIX_C_SOURCE +#define d_fileno d_ino /* backward compatibility */ +#define MAXNAMLEN __DARWIN_MAXNAMLEN /* * File types */ @@ -100,5 +131,6 @@ struct dirent { */ #define IFTODT(mode) (((mode) & 0170000) >> 12) #define DTTOIF(dirtype) ((dirtype) << 12) +#endif #endif /* _SYS_DIRENT_H */ diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 0d5dc53bf..6e3d535ef 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,6 +25,7 @@ #include #include +#include /* * Definitions @@ -54,6 +55,10 @@ * DKIOCGETMAXSEGMENTBYTECOUNTWRITE get maximum segment byte count for writes */ +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + typedef struct { char path[128]; @@ -67,6 +72,7 @@ typedef struct u_int8_t reserved0096[4]; /* reserved, clear to zero */ } dk_format_capacity_t; +/* LP64todo: not 64-bit clean */ typedef struct { dk_format_capacity_t * capacities; @@ -75,6 +81,10 @@ typedef struct u_int8_t reserved0064[8]; /* reserved, clear to zero */ } dk_format_capacities_t; +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + #define DKIOCEJECT _IO('d', 21) #define DKIOCSYNCHRONIZECACHE _IO('d', 22) @@ -98,10 +108,11 @@ typedef struct #define DKIOCGETMAXSEGMENTBYTECOUNTWRITE _IOR('d', 69, u_int64_t) #ifdef KERNEL -#define DKIOCGETISVIRTUAL _IOR('d', 72, u_int32_t) #define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, u_int32_t) #define DKIOCSETBLOCKSIZE _IOW('d', 24, u_int32_t) #define DKIOCGETBSDUNIT _IOR('d', 27, u_int32_t) +#define DKIOCISVIRTUAL _IOR('d', 72, u_int32_t) +#define DKIOCGETBASE _IOR('d', 73, u_int64_t) #endif /* KERNEL */ #endif /* _SYS_DISK_H_ */ diff --git a/bsd/sys/disklabel.h b/bsd/sys/disklabel.h index 4fa09f226..14eb3d746 100644 --- a/bsd/sys/disklabel.h +++ b/bsd/sys/disklabel.h @@ -58,6 +58,7 @@ #define _SYS_DISKLABEL_H_ #include +#include /* for daddr_t */ #ifdef __APPLE_API_OBSOLETE @@ -357,7 +358,7 @@ struct partinfo { #include __BEGIN_DECLS -struct disklabel *getdiskbyname __P((const char *)); +struct disklabel *getdiskbyname(const char *); __END_DECLS #endif diff --git a/bsd/sys/dkstat.h b/bsd/sys/dkstat.h index b0b256936..fa0060f6f 100644 --- a/bsd/sys/dkstat.h +++ b/bsd/sys/dkstat.h @@ -63,17 +63,11 @@ #ifndef _SYS_DKSTAT_H_ #define _SYS_DKSTAT_H_ -#include - -#ifdef __APPLE_API_PRIVATE - -#ifdef KERNEL +#ifdef KERNEL_PRIVATE extern long tk_cancc; extern long tk_nin; extern long tk_nout; extern long tk_rawcc; #endif -#endif /* __APPLE_API_PRIVATE */ - #endif /* _SYS_DKSTAT_H_ */ diff --git a/bsd/sys/domain.h b/bsd/sys/domain.h index e6a75966c..c55eaeccd 100644 --- a/bsd/sys/domain.h +++ b/bsd/sys/domain.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,51 +60,68 @@ #ifndef _SYS_DOMAIN_H_ #define _SYS_DOMAIN_H_ -#include +#ifdef PRIVATE +#include +#ifdef KERNEL +#include +#endif /* KERNEL */ /* * Structure per communications domain. */ +#include + /* * Forward structure declarations for function prototypes [sic]. */ -#ifdef __APPLE_API_UNSTABLE struct mbuf; +#define DOM_REENTRANT 0x01 + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif struct domain { int dom_family; /* AF_xxx */ char *dom_name; - void (*dom_init) /* initialize domain data structures */ - __P((void)); - int (*dom_externalize) /* externalize access rights */ - __P((struct mbuf *)); - void (*dom_dispose) /* dispose of internalized rights */ - __P((struct mbuf *)); + void (*dom_init)(void); /* initialize domain data structures */ + int (*dom_externalize)(struct mbuf *); + /* externalize access rights */ + void (*dom_dispose)(struct mbuf *); + /* dispose of internalized rights */ struct protosw *dom_protosw; /* Chain of protosw's for AF */ struct domain *dom_next; - int (*dom_rtattach) /* initialize routing table */ - __P((void **, int)); + int (*dom_rtattach)(void **, int); + /* initialize routing table */ int dom_rtoffset; /* an arg to rtattach, in bits */ int dom_maxrtkey; /* for routing layer */ int dom_protohdrlen; /* Let the protocol tell us */ int dom_refs; /* # socreates outstanding */ - u_long reserved[4]; +#ifdef _KERN_LOCKS_H_ + lck_mtx_t *dom_mtx; /* domain global mutex */ +#else + void *dom_mtx; /* domain global mutex */ +#endif + u_long dom_flags; + u_long reserved[2]; }; +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + #ifdef KERNEL extern struct domain *domains; extern struct domain localdomain; + +__BEGIN_DECLS extern void net_add_domain(struct domain *dp); extern int net_del_domain(struct domain *); +__END_DECLS #define DOMAIN_SET(domain_set) -/* -#define DOMAIN_SET(name) \ - DATA_SET(domain_set, name ## domain) -*/ - -#endif -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL */ +#endif /* PRIVATE */ #endif /* _SYS_DOMAIN_H_ */ diff --git a/bsd/sys/errno.h b/bsd/sys/errno.h index f108d3121..b19b04da4 100644 --- a/bsd/sys/errno.h +++ b/bsd/sys/errno.h @@ -66,7 +66,7 @@ #if !defined(KERNEL) && !defined(KERNEL_PRIVATE) #include __BEGIN_DECLS -extern int * __error __P((void)); +extern int * __error(void); #define errno (*__error()) __END_DECLS #endif @@ -90,7 +90,7 @@ __END_DECLS #define ENOMEM 12 /* Cannot allocate memory */ #define EACCES 13 /* Permission denied */ #define EFAULT 14 /* Bad address */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define ENOTBLK 15 /* Block device required */ #endif #define EBUSY 16 /* Device busy */ @@ -103,9 +103,7 @@ __END_DECLS #define ENFILE 23 /* Too many open files in system */ #define EMFILE 24 /* Too many open files */ #define ENOTTY 25 /* Inappropriate ioctl for device */ -#ifndef _POSIX_SOURCE #define ETXTBSY 26 /* Text file busy */ -#endif #define EFBIG 27 /* File too large */ #define ENOSPC 28 /* No space left on device */ #define ESPIPE 29 /* Illegal seek */ @@ -119,7 +117,6 @@ __END_DECLS /* non-blocking and interrupt i/o */ #define EAGAIN 35 /* Resource temporarily unavailable */ -#ifndef _POSIX_SOURCE #define EWOULDBLOCK EAGAIN /* Operation would block */ #define EINPROGRESS 36 /* Operation now in progress */ #define EALREADY 37 /* Operation already in progress */ @@ -131,12 +128,25 @@ __END_DECLS #define EPROTOTYPE 41 /* Protocol wrong type for socket */ #define ENOPROTOOPT 42 /* Protocol not available */ #define EPROTONOSUPPORT 43 /* Protocol not supported */ +#ifndef _POSIX_C_SOURCE #define ESOCKTNOSUPPORT 44 /* Socket type not supported */ -#endif /* ! _POSIX_SOURCE */ -#define ENOTSUP 45 /* Operation not supported */ -#ifndef _POSIX_SOURCE -#define EOPNOTSUPP ENOTSUP /* Operation not supported */ +#endif /* ! _POSIX_C_SOURCE */ +#define ENOTSUP 45 /* Operation not supported */ +#if !__DARWIN_UNIX03 && !defined(KERNEL) +/* + * This is the same for binary and source copmpatability, unless compiling + * the kernel itself, or compiling __DARWIN_UNIX03; if compiling for the + * kernel, the correct value will be returned. If compiling non-POSIX + * source, the kernel return value will be converted by a stub in libc, and + * if compiling source with __DARWIN_UNIX03, the conversion in libc is not + * done, and the caller gets the expected (discrete) value. + */ +#define EOPNOTSUPP ENOTSUP /* Operation not supported on socket */ +#endif /* !__DARWIN_UNIX03 && !KERNEL */ + +#ifndef _POSIX_C_SOURCE #define EPFNOSUPPORT 46 /* Protocol family not supported */ +#endif /* _POSIX_C_SOURCE */ #define EAFNOSUPPORT 47 /* Address family not supported by protocol family */ #define EADDRINUSE 48 /* Address already in use */ #define EADDRNOTAVAIL 49 /* Can't assign requested address */ @@ -150,73 +160,94 @@ __END_DECLS #define ENOBUFS 55 /* No buffer space available */ #define EISCONN 56 /* Socket is already connected */ #define ENOTCONN 57 /* Socket is not connected */ +#ifndef _POSIX_C_SOURCE #define ESHUTDOWN 58 /* Can't send after socket shutdown */ #define ETOOMANYREFS 59 /* Too many references: can't splice */ +#endif /* _POSIX_C_SOURCE */ #define ETIMEDOUT 60 /* Operation timed out */ #define ECONNREFUSED 61 /* Connection refused */ #define ELOOP 62 /* Too many levels of symbolic links */ -#endif /* _POSIX_SOURCE */ #define ENAMETOOLONG 63 /* File name too long */ /* should be rearranged */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define EHOSTDOWN 64 /* Host is down */ +#endif /* _POSIX_C_SOURCE */ #define EHOSTUNREACH 65 /* No route to host */ -#endif /* _POSIX_SOURCE */ #define ENOTEMPTY 66 /* Directory not empty */ /* quotas & mush */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define EPROCLIM 67 /* Too many processes */ #define EUSERS 68 /* Too many users */ +#endif /* _POSIX_C_SOURCE */ #define EDQUOT 69 /* Disc quota exceeded */ /* Network File System */ #define ESTALE 70 /* Stale NFS file handle */ +#ifndef _POSIX_C_SOURCE #define EREMOTE 71 /* Too many levels of remote in path */ #define EBADRPC 72 /* RPC struct is bad */ #define ERPCMISMATCH 73 /* RPC version wrong */ #define EPROGUNAVAIL 74 /* RPC prog. not avail */ #define EPROGMISMATCH 75 /* Program version wrong */ #define EPROCUNAVAIL 76 /* Bad procedure for program */ -#endif /* _POSIX_SOURCE */ +#endif /* _POSIX_C_SOURCE */ #define ENOLCK 77 /* No locks available */ #define ENOSYS 78 /* Function not implemented */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define EFTYPE 79 /* Inappropriate file type or format */ #define EAUTH 80 /* Authentication error */ #define ENEEDAUTH 81 /* Need authenticator */ -#endif /* _POSIX_SOURCE */ /* Intelligent device errors */ #define EPWROFF 82 /* Device power is off */ #define EDEVERR 83 /* Device error, e.g. paper out */ +#endif /* _POSIX_C_SOURCE */ -#ifndef _POSIX_SOURCE #define EOVERFLOW 84 /* Value too large to be stored in data type */ /* Program loading errors */ +#ifndef _POSIX_C_SOURCE #define EBADEXEC 85 /* Bad executable */ #define EBADARCH 86 /* Bad CPU type in executable */ #define ESHLIBVERS 87 /* Shared library version mismatch */ #define EBADMACHO 88 /* Malformed Macho file */ +#endif /* _POSIX_C_SOURCE */ #define ECANCELED 89 /* Operation canceled */ #define EIDRM 90 /* Identifier removed */ #define ENOMSG 91 /* No message of desired type */ #define EILSEQ 92 /* Illegal byte sequence */ +#ifndef _POSIX_C_SOURCE #define ENOATTR 93 /* Attribute not found */ - -#define ELAST 93 /* Must be equal largest errno */ -#endif /* _POSIX_SOURCE */ +#endif /* _POSIX_C_SOURCE */ + +#define EBADMSG 94 /* Bad message */ +#define EMULTIHOP 95 /* Reserved */ +#define ENODATA 96 /* No message available on STREAM */ +#define ENOLINK 97 /* Reserved */ +#define ENOSR 98 /* No STREAM resources */ +#define ENOSTR 99 /* Not a STREAM */ +#define EPROTO 100 /* Protocol error */ +#define ETIME 101 /* STREAM ioctl timeout */ + +#if __DARWIN_UNIX03 || defined(KERNEL) +/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */ +#define EOPNOTSUPP 102 /* Operation not supported on socket */ +#endif /* __DARWIN_UNIX03 || KERNEL */ + +#ifndef _POSIX_C_SOURCE +#define ELAST 102 /* Must be equal largest errno */ +#endif /* _POSIX_C_SOURCE */ #ifdef KERNEL /* pseudo-errors returned inside kernel to modify return to process */ -#define ERESTART -1 /* restart syscall */ -#define EJUSTRETURN -2 /* don't modify regs, just return */ +#define ERESTART (-1) /* restart syscall */ +#define EJUSTRETURN (-2) /* don't modify regs, just return */ #endif #endif /* _SYS_ERRNO_H_ */ diff --git a/bsd/sys/ev.h b/bsd/sys/ev.h index 16757b77f..39c4aeb61 100644 --- a/bsd/sys/ev.h +++ b/bsd/sys/ev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,9 +24,12 @@ #ifndef _SYS_EV_H_ #define _SYS_EV_H_ +#if !defined(__LP64__) + #include #include +#include struct eventreq { int er_type; @@ -59,8 +62,7 @@ typedef struct eventreq *er_t; #define EV_TIMEOUT 0x20000 #define EV_DMASK 0xffffff00 -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct eventqelt { TAILQ_ENTRY(eventqelt) ee_slist; @@ -68,12 +70,13 @@ struct eventqelt { struct eventreq ee_req; struct proc * ee_proc; u_int ee_flags; -#define EV_QUEUED 1 +#define EV_QUEUED 0x01 u_int ee_eventmask; - struct socket *ee_sp; }; -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +int waitevent_close(struct proc *p, struct fileproc *); +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* !defined(__LP64__) */ #endif /* _SYS_EV_H_ */ diff --git a/bsd/sys/event.h b/bsd/sys/event.h index a01242ab1..9f8d6c00a 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,6 +50,10 @@ #ifndef _SYS_EVENT_H_ #define _SYS_EVENT_H_ +#include +#include +#include + #define EVFILT_READ (-1) #define EVFILT_WRITE (-2) #define EVFILT_AIO (-3) /* attached to aio requests */ @@ -61,16 +65,42 @@ #define EVFILT_FS (-9) /* Filesystem events */ #define EVFILT_SYSCOUNT 9 +#define EVFILT_THREADMARKER EVFILT_SYSCOUNT /* Internal use only */ + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif struct kevent { uintptr_t ident; /* identifier for this event */ short filter; /* filter for event */ - u_short flags; - u_int fflags; - intptr_t data; + unsigned short flags; /* general flags */ + unsigned int fflags; /* filter-specific flags */ + intptr_t data; /* filter-specific data */ +#ifdef KERNEL_PRIVATE + user_addr_t udata; /* opaque user data identifier */ +#else void *udata; /* opaque user data identifier */ +#endif +}; + +#ifdef KERNEL_PRIVATE + +struct user_kevent { + uint64_t ident; /* identifier for this event */ + short filter; /* filter for event */ + unsigned short flags; /* general flags */ + unsigned int fflags; /* filter-specific flags */ + int64_t data; /* filter-specific data */ + user_addr_t udata; /* opaque user data identifier */ }; +#endif + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp__ = (kevp); \ __kevp__->ident = (a); \ @@ -92,6 +122,7 @@ struct kevent { #define EV_CLEAR 0x0020 /* clear event state after reporting */ #define EV_SYSFLAGS 0xF000 /* reserved by system */ +#define EV_FLAG0 0x1000 /* filter-specific flag */ #define EV_FLAG1 0x2000 /* filter-specific flag */ /* returned values */ @@ -99,23 +130,46 @@ struct kevent { #define EV_ERROR 0x4000 /* error, data contains errno */ /* - * data/hint flags for EVFILT_{READ|WRITE}, shared with userspace + * Filter specific flags for EVFILT_READ + * + * The default behavior for EVFILT_READ is to make the "read" determination + * relative to the current file descriptor read pointer. The EV_POLL + * flag indicates the determination should be made via poll(2) semantics + * (which always returns true for regular files - regardless of the amount + * of unread data in the file). + * + * On input, EV_OOBAND specifies that only OOB data should be looked for. + * The returned data count is the number of bytes beyond the current OOB marker. + * + * On output, EV_OOBAND indicates that OOB data is present + * If it was not specified as an input parameter, then the data count is the + * number of bytes before the current OOB marker. If at the marker, the + * data count indicates the number of bytes available after it. In either + * case, it's the amount of data one could expect to receive next. */ -#define NOTE_LOWAT 0x0001 /* low water mark */ +#define EV_POLL EV_FLAG0 +#define EV_OOBAND EV_FLAG1 /* - * data/hint flags for EVFILT_VNODE, shared with userspace + * data/hint fflags for EVFILT_{READ|WRITE}, shared with userspace + * + * The default behavior for EVFILT_READ is to make the determination + * realtive to the current file descriptor read pointer. + */ +#define NOTE_LOWAT 0x00000001 /* low water mark */ +/* + * data/hint fflags for EVFILT_VNODE, shared with userspace */ -#define NOTE_DELETE 0x0001 /* vnode was removed */ -#define NOTE_WRITE 0x0002 /* data contents changed */ -#define NOTE_EXTEND 0x0004 /* size increased */ -#define NOTE_ATTRIB 0x0008 /* attributes changed */ -#define NOTE_LINK 0x0010 /* link count changed */ -#define NOTE_RENAME 0x0020 /* vnode was renamed */ -#define NOTE_REVOKE 0x0040 /* vnode access was revoked */ +#define NOTE_DELETE 0x00000001 /* vnode was removed */ +#define NOTE_WRITE 0x00000002 /* data contents changed */ +#define NOTE_EXTEND 0x00000004 /* size increased */ +#define NOTE_ATTRIB 0x00000008 /* attributes changed */ +#define NOTE_LINK 0x00000010 /* link count changed */ +#define NOTE_RENAME 0x00000020 /* vnode was renamed */ +#define NOTE_REVOKE 0x00000040 /* vnode access was revoked */ /* - * data/hint flags for EVFILT_PROC, shared with userspace + * data/hint fflags for EVFILT_PROC, shared with userspace */ #define NOTE_EXIT 0x80000000 /* process exited */ #define NOTE_FORK 0x40000000 /* process forked */ @@ -123,14 +177,36 @@ struct kevent { #define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */ #define NOTE_PDATAMASK 0x000fffff /* mask for pid */ +/* + * data/hint fflags for EVFILT_TIMER, shared with userspace. + * The default is a (repeating) interval timer with the data + * specifying the timeout interval in milliseconds. + * + * All timeouts are implicitly EV_CLEAR events. + */ +#define NOTE_SECONDS 0x00000001 /* data is seconds */ +#define NOTE_USECONDS 0x00000002 /* data is microseconds */ +#define NOTE_NSECONDS 0x00000004 /* data is nanoseconds */ +#define NOTE_ABSOLUTE 0x00000008 /* absolute timeout */ + /* ... implicit EV_ONESHOT */ + /* additional flags for EVFILT_PROC */ #define NOTE_TRACK 0x00000001 /* follow across forks */ #define NOTE_TRACKERR 0x00000002 /* could not track child */ #define NOTE_CHILD 0x00000004 /* am a child process */ -#ifdef KERNEL_PRIVATE +#ifndef KERNEL +/* Temporay solution for BootX to use inode.h till kqueue moves to vfs layer */ +#include +struct knote; +SLIST_HEAD(klist, knote); +#endif + +#ifdef KERNEL + +#ifdef KERNEL_PRIVATE #include #ifdef MALLOC_DECLARE @@ -143,32 +219,33 @@ MALLOC_DECLARE(M_KQUEUE); */ #define NOTE_SIGNAL 0x08000000 +TAILQ_HEAD(kqtailq, knote); /* a list of "queued" events */ + struct knote { - /* JMM - line these up with wait_queue_link */ -#if 0 - struct wait_queue_link kn_wql; /* wait queue linkage */ -#else + int kn_inuse; /* inuse count */ + struct kqtailq *kn_tq; /* pointer to tail queue */ + TAILQ_ENTRY(knote) kn_tqe; /* linkage for tail queue */ + struct kqueue *kn_kq; /* which kqueue we are on */ + SLIST_ENTRY(knote) kn_link; /* linkage for search list */ SLIST_ENTRY(knote) kn_selnext; /* klist element chain */ - void *kn_type; /* knote vs. thread */ - struct klist *kn_list; /* pointer to list we are on */ - SLIST_ENTRY(knote) kn_link; /* members of kqueue */ - struct kqueue *kn_kq; /* which kqueue we are on */ -#endif - TAILQ_ENTRY(knote) kn_tqe; /* ...ready to process */ union { - struct file *p_fp; /* file data pointer */ + struct fileproc *p_fp; /* file data pointer */ struct proc *p_proc; /* proc pointer */ } kn_ptr; struct filterops *kn_fop; - int kn_status; + int kn_status; /* status bits */ int kn_sfflags; /* saved filter flags */ struct kevent kn_kevent; - intptr_t kn_sdata; /* saved data field */ caddr_t kn_hook; + int kn_hookid; + int64_t kn_sdata; /* saved data field */ + #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ -#define KN_DETACHED 0x08 /* knote is detached */ +#define KN_DROPPING 0x08 /* knote is being dropped */ +#define KN_USEWAIT 0x10 /* wait for knote use */ +#define KN_DROPWAIT 0x20 /* wait for knote drop */ #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter @@ -180,9 +257,9 @@ struct knote { struct filterops { int f_isfd; /* true if ident == filedescriptor */ - int (*f_attach) __P((struct knote *kn)); - void (*f_detach) __P((struct knote *kn)); - int (*f_event) __P((struct knote *kn, long hint)); + int (*f_attach)(struct knote *kn); + void (*f_detach)(struct knote *kn); + int (*f_event)(struct knote *kn, long hint); }; struct proc; @@ -198,42 +275,33 @@ extern void klist_init(struct klist *list); extern void knote(struct klist *list, long hint); extern int knote_attach(struct klist *list, struct knote *kn); extern int knote_detach(struct klist *list, struct knote *kn); -extern void knote_remove(struct proc *p, struct klist *list); extern void knote_fdclose(struct proc *p, int fd); -extern int kqueue_register(struct kqueue *kq, - struct kevent *kev, struct proc *p); -#else /* !KERNEL_PRIVATE */ +#endif /* !KERNEL_PRIVATE */ + +#else /* KERNEL */ -/* - * This is currently visible to userland to work around broken - * programs which pull in or . - */ -#include -struct knote; -SLIST_HEAD(klist, knote); -#include struct timespec; __BEGIN_DECLS -int kqueue __P((void)); -int kevent __P((int kq, const struct kevent *changelist, int nchanges, +int kqueue(void); +int kevent(int kq, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, - const struct timespec *timeout)); + const struct timespec *timeout); __END_DECLS -#include -#ifdef __APPLE_API_PRIVATE -#include +#ifdef PRIVATE +#include __BEGIN_DECLS -mach_port_t kqueue_portset_np __P((int kq)); -int kqueue_from_portset_np __P((mach_port_t portset)); +mach_port_t kqueue_portset_np(int kq); +int kqueue_from_portset_np(mach_port_t portset); __END_DECLS -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ + +#endif /* KERNEL */ -#endif /* !KERNEL_PRIVATE */ #endif /* !_SYS_EVENT_H_ */ diff --git a/bsd/sys/eventvar.h b/bsd/sys/eventvar.h index c259f7a8c..8206a7201 100644 --- a/bsd/sys/eventvar.h +++ b/bsd/sys/eventvar.h @@ -50,6 +50,7 @@ #ifndef _SYS_EVENTVAR_H_ #define _SYS_EVENTVAR_H_ +#include #include #include @@ -57,19 +58,27 @@ #define KQEXTENT 256 /* linear growth by this amount */ struct kqueue { -#if 0 - /* threads, member notes, and notes for us in parent sets */ - struct wait_queue_set kq_wqs; -#else + decl_lck_spin_data( ,kq_lock) /* kqueue lock */ int kq_state; - int kq_lock; /* space for a lock */ - TAILQ_HEAD(kqlist, knote) kq_head; /* list of pending events */ - int kq_count; /* number of pending events */ -#endif - struct selinfo kq_sel; /* JMM - parent set at some point */ - struct filedesc *kq_fdp; + int kq_count; /* number of queued events */ + struct kqtailq kq_head; /* list of queued events */ + struct kqtailq kq_inprocess; /* list of in-process events */ + struct selinfo kq_sel; /* parent select/kqueue info */ + struct filedesc *kq_fdp; + #define KQ_SEL 0x01 #define KQ_SLEEP 0x02 +#define KQ_PROCWAIT 0x04 }; +extern struct kqueue *kqueue_alloc(struct proc *); +extern void kqueue_dealloc(struct kqueue *, struct proc *); + +typedef int (*kevent_callback_t)(struct kqueue *, struct kevent *, void *); +typedef void (*kevent_continue_t)(struct kqueue *, void *, int); + +extern int kevent_register(struct kqueue *, struct kevent *, struct proc *); +extern int kevent_scan(struct kqueue *, kevent_callback_t, kevent_continue_t, + void *, struct timeval *, struct proc *); + #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/bsd/sys/exec.h b/bsd/sys/exec.h index 5d4fb571e..e3b9d6e5b 100644 --- a/bsd/sys/exec.h +++ b/bsd/sys/exec.h @@ -65,38 +65,13 @@ #include -#ifdef __APPLE_API_OBSOLETE /* - * The following structure is found at the top of the user stack of each - * user process. The ps program uses it to locate argv and environment - * strings. Programs that wish ps to display other information may modify - * it; normally ps_argvstr points to the text for argv[0], and ps_nargvstr - * is the same as the program's argc. The fields ps_envstr and ps_nenvstr - * are the equivalent for the environment. + * XXX at this point, this file only exists for backward compatability with + * XXX software which includes instead of the more correct + * XXX and/or need the inclusion of + * XXX as a side effect. */ -struct ps_strings { - char *ps_argvstr; /* first of 0 or more argument strings */ - int ps_nargvstr; /* the number of argument strings */ - char *ps_envstr; /* first of 0 or more environment strings */ - int ps_nenvstr; /* the number of environment strings */ -}; - -#endif /* __APPLE_API_OBSOLETE */ - #include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -/* - * Arguments to the exec system call. - */ -struct execve_args { - char *fname; - char **argp; - char **envp; -}; -#endif /*__APPLE_API_PRIVATE */ -#endif /* KERNEL */ - #endif /* !_SYS_EXEC_H_ */ diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index 297c7791c..0519d9522 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,9 +69,28 @@ * described by POSIX for ; it also includes * related kernel definitions. */ +#include +#include -#ifndef KERNEL -#include +/* We should not be exporting size_t here. Temporary for gcc bootstrapping. */ +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +#ifndef _MODE_T +typedef __darwin_mode_t mode_t; +#define _MODE_T +#endif + +#ifndef _OFF_T +typedef __darwin_off_t off_t; +#define _OFF_T +#endif + +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T #endif /* @@ -95,19 +114,20 @@ * FREAD and FWRITE are excluded from the #ifdef KERNEL so that TIOCFLUSH, * which was documented to use FREAD/FWRITE, continues to work. */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define FREAD 0x0001 #define FWRITE 0x0002 #endif #define O_NONBLOCK 0x0004 /* no delay */ #define O_APPEND 0x0008 /* set append mode */ -#ifndef _POSIX_SOURCE +#define O_SYNC 0x0080 /* synchronous writes */ +#ifndef _POSIX_C_SOURCE #define O_SHLOCK 0x0010 /* open with shared file lock */ #define O_EXLOCK 0x0020 /* open with exclusive file lock */ #define O_ASYNC 0x0040 /* signal pgrp when data ready */ -#define O_FSYNC 0x0080 /* synchronous writes */ +#define O_FSYNC O_SYNC /* source compatibility: do not use */ #define O_NOFOLLOW 0x0100 /* don't follow symlinks */ -#endif +#endif /* _POSIX_C_SOURCE */ #define O_CREAT 0x0200 /* create if nonexistant */ #define O_TRUNC 0x0400 /* truncate to zero length */ #define O_EXCL 0x0800 /* error if already exists */ @@ -116,12 +136,17 @@ #define FDEFER 0x2000 /* defer for next gc pass */ #define FHASLOCK 0x4000 /* descriptor holds advisory lock */ #endif -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define O_EVTONLY 0x8000 /* descriptor requested for event notifications only */ #endif +#ifdef KERNEL +#define FWASWRITTEN 0x10000 /* descriptor was written */ +#endif + /* defined by POSIX 1003.1; BSD default, so no bit required */ #define O_NOCTTY 0 /* don't assign controlling terminal */ +//#define O_SYNC /* ??? POSIX: Write according to synchronized I/O file integrity completion */ #ifdef KERNEL /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ @@ -139,7 +164,7 @@ * and by fcntl. We retain the F* names for the kernel f_flags field * and for backward compatibility for fcntl. */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define FAPPEND O_APPEND /* kernel/compat */ #define FASYNC O_ASYNC /* kernel/compat */ #define FFSYNC O_FSYNC /* kernel */ @@ -152,7 +177,7 @@ * Flags used for copyfile(2) */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define CPF_OVERWRITE 1 #define CPF_IGNORE_MODE 2 #define CPF_MASK (CPF_OVERWRITE|CPF_IGNORE_MODE) @@ -168,13 +193,12 @@ #define F_SETFD 2 /* set file descriptor flags */ #define F_GETFL 3 /* get file status flags */ #define F_SETFL 4 /* set file status flags */ -#ifndef _POSIX_SOURCE #define F_GETOWN 5 /* get SIGIO/SIGURG proc/pgrp */ #define F_SETOWN 6 /* set SIGIO/SIGURG proc/pgrp */ -#endif #define F_GETLK 7 /* get record locking information */ #define F_SETLK 8 /* set record locking information */ #define F_SETLKW 9 /* F_SETLK; wait if blocked */ +#ifndef _POSIX_C_SOURCE #define F_CHKCLEAN 41 /* Used for regression test */ #define F_PREALLOCATE 42 /* Preallocate storage */ #define F_SETSIZE 43 /* Truncate a file without zeroing space */ @@ -186,6 +210,13 @@ #define F_LOG2PHYS 49 /* file offset to device offset */ #define F_GETPATH 50 /* return the full path of the fd */ #define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */ +#define F_PATHPKG_CHECK 52 /* find which component (if any) is a package */ +#define F_FREEZE_FS 53 /* "freeze" all fs operations */ +#define F_THAW_FS 54 /* "thaw" all fs operations */ + +// FS-specific fcntl()'s numbers begin at 0x00010000 and go up +#define FCNTL_FS_SPECIFIC_BASE 0x00010000 +#endif /* _POSIX_C_SOURCE */ /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ @@ -200,6 +231,65 @@ #define F_POSIX 0x040 /* Use POSIX semantics for lock */ #endif +/* + * [XSI] The values used for l_whence shall be defined as described + * in + */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* set file offset to offset */ +#define SEEK_CUR 1 /* set file offset to current plus offset */ +#define SEEK_END 2 /* set file offset to EOF plus offset */ +#endif /* !SEEK_SET */ + +/* + * [XSI] The symbolic names for file modes for use as values of mode_t + * shall be defined as described in + */ +#ifndef S_IFMT +/* File type */ +#define S_IFMT 0170000 /* [XSI] type of file mask */ +#define S_IFIFO 0010000 /* [XSI] named pipe (fifo) */ +#define S_IFCHR 0020000 /* [XSI] character special */ +#define S_IFDIR 0040000 /* [XSI] directory */ +#define S_IFBLK 0060000 /* [XSI] block special */ +#define S_IFREG 0100000 /* [XSI] regular */ +#define S_IFLNK 0120000 /* [XSI] symbolic link */ +#define S_IFSOCK 0140000 /* [XSI] socket */ +#ifndef _POSIX_C_SOURCE +#define S_IFWHT 0160000 /* whiteout */ +#define S_IFXATTR 0200000 /* extended attribute */ +#endif + +/* File mode */ +/* Read, write, execute/search by owner */ +#define S_IRWXU 0000700 /* [XSI] RWX mask for owner */ +#define S_IRUSR 0000400 /* [XSI] R for owner */ +#define S_IWUSR 0000200 /* [XSI] W for owner */ +#define S_IXUSR 0000100 /* [XSI] X for owner */ +/* Read, write, execute/search by group */ +#define S_IRWXG 0000070 /* [XSI] RWX mask for group */ +#define S_IRGRP 0000040 /* [XSI] R for group */ +#define S_IWGRP 0000020 /* [XSI] W for group */ +#define S_IXGRP 0000010 /* [XSI] X for group */ +/* Read, write, execute/search by others */ +#define S_IRWXO 0000007 /* [XSI] RWX mask for other */ +#define S_IROTH 0000004 /* [XSI] R for other */ +#define S_IWOTH 0000002 /* [XSI] W for other */ +#define S_IXOTH 0000001 /* [XSI] X for other */ + +#define S_ISUID 0004000 /* [XSI] set user id on execution */ +#define S_ISGID 0002000 /* [XSI] set group id on execution */ +#define S_ISVTX 0001000 /* [XSI] directory restrcted delete */ + +#ifndef _POSIX_C_SOURCE +#define S_ISTXT S_ISVTX /* sticky bit: not supported */ +#define S_IREAD S_IRUSR /* backward compatability */ +#define S_IWRITE S_IWUSR /* backward compatability */ +#define S_IEXEC S_IXUSR /* backward compatability */ +#endif +#endif /* !S_IFMT */ + +#ifndef _POSIX_C_SOURCE /* allocate flags (F_PREALLOCATE) */ #define F_ALLOCATECONTIG 0x00000002 /* allocate contigious space */ @@ -210,6 +300,7 @@ #define F_PEOFPOSMODE 3 /* Make it past all of the SEEK pos modes so that */ /* we can keep them in sync should we desire */ #define F_VOLPOSMODE 4 /* specify volume starting postion */ +#endif /* _POSIX_C_SOURCE */ /* * Advisory file segment locking data type - @@ -224,6 +315,7 @@ struct flock { }; +#ifndef _POSIX_C_SOURCE /* * advisory file read data type - * information passed by user to system @@ -234,18 +326,16 @@ struct radvisory { }; -#ifndef _POSIX_SOURCE /* lock operations for flock(2) */ #define LOCK_SH 0x01 /* shared file lock */ #define LOCK_EX 0x02 /* exclusive file lock */ #define LOCK_NB 0x04 /* don't block when locking */ #define LOCK_UN 0x08 /* unlock file */ -#endif /* fstore_t type used by F_DEALLOCATE and F_PREALLOCATE commands */ typedef struct fstore { - u_int32_t fst_flags; /* IN: flags word */ + unsigned int fst_flags; /* IN: flags word */ int fst_posmode; /* IN: indicates use of offset field */ off_t fst_offset; /* IN: start of the region */ off_t fst_length; /* IN: size of the region */ @@ -256,10 +346,34 @@ typedef struct fstore { typedef struct fbootstraptransfer { off_t fbt_offset; /* IN: offset to start read/write */ - size_t fbt_length; /* IN: number of bytes to transfer */ + size_t fbt_length; /* IN: number of bytes to transfer */ void *fbt_buffer; /* IN: buffer to be read/written */ } fbootstraptransfer_t; + +// LP64todo - should this move? +#ifdef KERNEL +/* LP64 version of fbootstraptransfer. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with fbootstraptransfer + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +typedef struct user_fbootstraptransfer { + off_t fbt_offset; /* IN: offset to start read/write */ + user_size_t fbt_length; /* IN: number of bytes to transfer */ + user_addr_t fbt_buffer; /* IN: buffer to be read/written */ +} user_fbootstraptransfer_t; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + /* * For F_LOG2PHYS this information is passed back to user * Currently only devoffset is returned - that is the VOP_BMAP @@ -276,27 +390,66 @@ typedef struct fbootstraptransfer { * and a per filesystem type flag will be needed to interpret the * contiguous bytes count result from CMAP. */ +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + struct log2phys { - u_int32_t l2p_flags; /* unused so far */ + unsigned int l2p_flags; /* unused so far */ off_t l2p_contigbytes; /* unused so far */ off_t l2p_devoffset; /* bytes into device */ }; -#ifndef _POSIX_SOURCE +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + #define O_POPUP 0x80000000 /* force window to popup on open */ #define O_ALERT 0x20000000 /* small, clean popup window */ -#endif +#endif /* _POSIX_C_SOURCE */ #ifndef KERNEL -#include + +#ifndef _POSIX_C_SOURCE +#ifndef _FILESEC_T +struct _filesec; +typedef struct _filesec *filesec_t; +#define _FILESEC_T +#endif +typedef enum { + FILESEC_OWNER = 1, + FILESEC_GROUP = 2, + FILESEC_UUID = 3, + FILESEC_MODE = 4, + FILESEC_ACL = 5, + FILESEC_GRPUUID = 6, + +/* XXX these are private to the implementation */ + FILESEC_ACL_RAW = 100, + FILESEC_ACL_ALLOCSIZE = 101 +} filesec_property_t; + +/* XXX backwards compatibility */ +#define FILESEC_GUID FILESEC_UUID +#endif /* _POSIX_C_SOURCE */ __BEGIN_DECLS -int open __P((const char *, int, ...)); -int creat __P((const char *, mode_t)); -int fcntl __P((int, int, ...)); -#ifndef _POSIX_SOURCE -int flock __P((int, int)); -#endif /* !_POSIX_SOURCE */ +int open(const char *, int, ...); +int creat(const char *, mode_t); +int fcntl(int, int, ...); +#ifndef _POSIX_C_SOURCE +int openx_np(const char *, int, filesec_t); +int flock(int, int); +filesec_t filesec_init(void); +filesec_t filesec_dup(filesec_t); +void filesec_free(filesec_t); +int filesec_get_property(filesec_t, filesec_property_t, void *); +int filesec_set_property(filesec_t, filesec_property_t, const void *); +int filesec_unset_property(filesec_t, filesec_property_t); +int filesec_query_property(filesec_t, filesec_property_t, int *); +#define _FILESEC_UNSET_PROPERTY ((void *)0) +#define _FILESEC_REMOVE_ACL ((void *)1) +#endif /* !_POSIX_C_SOURCE */ __END_DECLS #endif diff --git a/bsd/sys/file.h b/bsd/sys/file.h index 142529b92..710159af8 100644 --- a/bsd/sys/file.h +++ b/bsd/sys/file.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,141 +59,46 @@ #define _SYS_FILE_H_ #include +#include #include #include +#include +#include #ifdef KERNEL -#include #include -#include +#include +#endif -struct proc; -struct uio; -struct knote; -#ifdef __APPLE_API_UNSTABLE +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif -/* - * Kernel descriptor table. - * One entry for each open kernel vnode and socket. - */ -struct file { - LIST_ENTRY(file) f_list;/* list of active files */ +/* for the compat sake; */ +struct extern_file { + LIST_ENTRY(extern_file) f_list; /* list of active files */ short f_flag; /* see fcntl.h */ -#define DTYPE_VNODE 1 /* file */ -#define DTYPE_SOCKET 2 /* communications endpoint */ -#define DTYPE_PSXSHM 3 /* POSIX Shared memory */ -#define DTYPE_PSXSEM 4 /* POSIX Semaphores */ -#define DTYPE_KQUEUE 5 /* kqueue */ short f_type; /* descriptor type */ short f_count; /* reference count */ short f_msgcount; /* references from message queue */ struct ucred *f_cred; /* credentials associated with descriptor */ - struct fileops { - int (*fo_read) __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, - struct proc *p)); - int (*fo_write) __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, - struct proc *p)); -#define FOF_OFFSET 1 - int (*fo_ioctl) __P((struct file *fp, u_long com, - caddr_t data, struct proc *p)); - int (*fo_select) __P((struct file *fp, int which, - void *wql, struct proc *p)); - int (*fo_close) __P((struct file *fp, struct proc *p)); - int (*fo_kqfilter) __P((struct file *fp, struct knote *kn, - struct proc *p)); - } *f_ops; + void * f_ops; off_t f_offset; caddr_t f_data; /* vnode or socket or SHM or semaphore */ }; -#ifdef __APPLE_API_PRIVATE -LIST_HEAD(filelist, file); -extern struct filelist filehead; /* head of list of open files */ -extern int maxfiles; /* kernel limit on number of open files */ -extern int nfiles; /* actual number of open files */ -#endif /* __APPLE_API_PRIVATE */ +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif +#ifdef KERNEL __BEGIN_DECLS -int fref __P((struct file *)); /* take a reference on file pointer */ -int frele __P((struct file *)); /* release a reference on file pointer */ -int fcount __P((struct file *)); /* returns the reference count */ - -static __inline int fo_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static __inline int fo_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static __inline int fo_ioctl __P((struct file *fp, u_long com, caddr_t data, - struct proc *p)); -static __inline int fo_select __P((struct file *fp, int which, void *wql, - struct proc *p)); -static __inline int fo_close __P((struct file *fp, struct proc *p)); -static __inline int fo_kqfilter __P((struct file *fp, struct knote *kn, - struct proc *p)); - -static __inline int -fo_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct proc *p) -{ - int error; - - if ((error = fref(fp)) == -1) - return (EBADF); - error = (*fp->f_ops->fo_read)(fp, uio, cred, flags, p); - frele(fp); - return (error); -} - -static __inline int -fo_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct proc *p) -{ - int error; - - if ((error = fref(fp)) == -1) - return (EBADF); - error = (*fp->f_ops->fo_write)(fp, uio, cred, flags, p); - frele(fp); - return (error); -} - -static __inline int -fo_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) -{ - int error; - - if ((error = fref(fp)) == -1) - return (EBADF); - error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); - frele(fp); - return (error); -} - -static __inline int -fo_select(struct file *fp, int which, void *wql, struct proc *p) -{ - int error; - - error = (*fp->f_ops->fo_select)(fp, which, wql, p); - return (error); -} - -static __inline int -fo_close(struct file *fp, struct proc *p) -{ - - return ((*fp->f_ops->fo_close)(fp, p)); -} - -static __inline int -fo_kqfilter(struct file *fp, struct knote *kn, struct proc *p) -{ - return ((*fp->f_ops->fo_kqfilter)(fp, kn, p)); -} - +int file_socket(int, socket_t *); +int file_vnode(int, vnode_t *); +int file_flags(int, int *); +int file_drop(int); __END_DECLS -#endif /* __APPLE_API_UNSTABLE */ - #endif /* KERNEL */ #endif /* !_SYS_FILE_H_ */ diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h new file mode 100644 index 000000000..76dd19505 --- /dev/null +++ b/bsd/sys/file_internal.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)file.h 8.3 (Berkeley) 1/9/95 + */ + +#ifndef _SYS_FILE_INTERNAL_H_ +#define _SYS_FILE_INTERNAL_H_ + +#include +#include +#include + +#ifdef KERNEL +#include +#include +#include +#include +#include + +struct proc; +struct uio; +struct knote; +#ifdef __APPLE_API_UNSTABLE + +struct file; + + +/* + * Kernel descriptor table. + * One entry for each open kernel vnode and socket. + */ +struct fileproc { + int32_t f_flags; + int32_t f_iocount; + struct fileglob * f_fglob; + void * f_waddr; +}; + +#define FILEPROC_NULL (struct fileproc *)0 + +#define FP_INCREATE 0x0001 +#define FP_INCLOSE 0x0002 +#define FP_INSELECT 0x0004 +#define FP_INCHRREAD 0x0008 +#define FP_WRITTEN 0x0010 +#define FP_CLOSING 0x0020 +#define FP_WAITCLOSE 0x0040 +#define FP_AIOISSUED 0x0080 +#define FP_WAITEVENT 0x0100 + + +/* defns of close_internal */ +#define CLOSEINT_LOCKED 1 +#define CLOSEINT_WAITONCLOSE 2 +#define CLOSEINT_NOFDRELSE 4 +#define CLOSEINT_NOFDNOREF 8 + +struct fileglob { + LIST_ENTRY(fileglob) f_list;/* list of active files */ + LIST_ENTRY(fileglob) f_msglist;/* list of active files */ + int32_t fg_flag; /* see fcntl.h */ + int32_t fg_type; /* descriptor type */ + int32_t fg_count; /* reference count */ + int32_t fg_msgcount; /* references from message queue */ + struct ucred *fg_cred; /* credentials associated with descriptor */ + struct fileops { + int (*fo_read) __P((struct fileproc *fp, struct uio *uio, + struct ucred *cred, int flags, + struct proc *p)); + int (*fo_write) __P((struct fileproc *fp, struct uio *uio, + struct ucred *cred, int flags, + struct proc *p)); +#define FOF_OFFSET 1 + int (*fo_ioctl) __P((struct fileproc *fp, u_long com, + caddr_t data, struct proc *p)); + int (*fo_select) __P((struct fileproc *fp, int which, + void *wql, struct proc *p)); + int (*fo_close) __P((struct fileglob *fg, struct proc *p)); + int (*fo_kqfilter) __P((struct fileproc *fp, struct knote *kn, + struct proc *p)); + int (*fo_drain) (struct fileproc *fp, struct proc *p); + } *fg_ops; + off_t fg_offset; + caddr_t fg_data; /* vnode or socket or SHM or semaphore */ + lck_mtx_t fg_lock; + int32_t fg_lflags; /* file global flags */ + unsigned int fg_lockpc[4]; + unsigned int fg_unlockpc[4]; +}; + +/* file types */ +#define DTYPE_VNODE 1 /* file */ +#define DTYPE_SOCKET 2 /* communications endpoint */ +#define DTYPE_PSXSHM 3 /* POSIX Shared memory */ +#define DTYPE_PSXSEM 4 /* POSIX Semaphores */ +#define DTYPE_KQUEUE 5 /* kqueue */ +#define DTYPE_PIPE 6 /* pipe */ +#define DTYPE_FSEVENTS 7 /* fsevents */ + +/* defines for fg_lflags */ +#define FG_TERM 0x01 /* the fileglob is terminating .. */ +#define FG_INSMSGQ 0x02 /* insert to msgqueue pending .. */ +#define FG_WINSMSGQ 0x04 /* wait for the fielglob is in msgque */ +#define FG_RMMSGQ 0x08 /* the fileglob is being removed from msgqueue */ +#define FG_WRMMSGQ 0x10 /* wait for the fileglob to be removed from msgqueue */ + + +#ifdef __APPLE_API_PRIVATE +LIST_HEAD(filelist, fileglob); +LIST_HEAD(fmsglist, fileglob); +extern struct filelist filehead; /* head of list of open files */ +extern struct fmsglist fmsghead; /* head of list of open files */ +extern int maxfiles; /* kernel limit on number of open files */ +extern int nfiles; /* actual number of open files */ +#endif /* __APPLE_API_PRIVATE */ + + +__BEGIN_DECLS +int fo_read(struct fileproc *fp, struct uio *uio, + struct ucred *cred, int flags, struct proc *p); +int fo_write(struct fileproc *fp, struct uio *uio, + struct ucred *cred, int flags, struct proc *p); +int fo_ioctl(struct fileproc *fp, u_long com, caddr_t data, + struct proc *p); +int fo_select(struct fileproc *fp, int which, void *wql, + struct proc *p); +int fo_close(struct fileglob *fg, struct proc *p); +int fo_kqfilter(struct fileproc *fp, struct knote *kn, + struct proc *p); +void fileproc_drain(proc_t, struct fileproc *); +void fp_setflags(proc_t, struct fileproc *, int); +void fp_clearflags(proc_t, struct fileproc *, int); +int fp_drop(struct proc *p, int fd, struct fileproc *fp, int locked); +int fp_drop_written(proc_t p, int fd, struct fileproc *fp); +int fp_drop_event(proc_t p, int fd, struct fileproc *fp); +int fp_free(struct proc * p, int fd, struct fileproc * fp); +struct kqueue; +int fp_getfkq(struct proc *p, int fd, struct fileproc **resultfp, struct kqueue **resultkq); +struct psemnode; +int fp_getfpsem(struct proc *p, int fd, struct fileproc **resultfp, struct psemnode **resultpsem); +struct vnode; +int fp_getfvp(struct proc *p, int fd, struct fileproc **resultfp, struct vnode **resultvp); +struct socket; +int fp_getfsock(struct proc *p, int fd, struct fileproc **resultfp, struct socket **results); +int fp_lookup(struct proc *p, int fd, struct fileproc **resultfp, int locked); +int close_internal(struct proc *p, int fd, struct fileproc *fp, int flags); +int closef_locked(struct fileproc *fp, struct fileglob *fg, struct proc *p); +void fg_insertuipc(struct fileglob * fg); +void fg_removeuipc(struct fileglob * fg); +__END_DECLS + +#endif /* __APPLE_API_UNSTABLE */ + +#endif /* KERNEL */ + +#endif /* !_SYS_FILE_INTERNAL_H_ */ diff --git a/bsd/sys/filedesc.h b/bsd/sys/filedesc.h index f1f2fb4fd..5212cc45c 100644 --- a/bsd/sys/filedesc.h +++ b/bsd/sys/filedesc.h @@ -81,7 +81,7 @@ struct klist; struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ + struct fileproc **fd_ofiles; /* file structures for open files */ char *fd_ofileflags; /* per-process open file flags */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ @@ -95,14 +95,22 @@ struct filedesc { struct klist *fd_knlist; /* list of attached knotes */ u_long fd_knhashmask; /* size of knhash */ struct klist *fd_knhash; /* hash table for attached knotes */ + int fd_flags; }; +/* + * definitions for fd_flags; + */ +#define FD_CHROOT 0x01 /* process was chrooted... keep track even */ + /* if we're force unmounted and unable to */ + /* take a vnode_ref on fd_rdir during a fork */ + /* * Per-process open flags. */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ -#define UF_MAPPED 0x02 /* mapped from device */ #define UF_RESERVED 0x04 /* open pending / in progress */ +#define UF_CLOSING 0x08 /* close in progress */ /* * Storage required per open file descriptor. @@ -113,24 +121,23 @@ struct filedesc { /* * Kernel global variables and routines. */ -extern int dupfdopen __P((struct filedesc *fdp, - int indx, int dfd, int mode, int error)); -extern int fdalloc __P((struct proc *p, int want, int *result)); -extern void fdrelse __P((struct proc *p, int fd)); -extern int fdavail __P((struct proc *p, int n)); -extern int fdgetf __P((struct proc *p, int fd, struct file **resultfp)); +extern int dupfdopen(struct filedesc *fdp, + int indx, int dfd, int mode, int error); +extern int fdalloc(struct proc *p, int want, int *result); +extern void fdrelse(struct proc *p, int fd); +extern int fdavail(struct proc *p, int n); #define fdfile(p, fd) \ (&(p)->p_fd->fd_ofiles[(fd)]) #define fdflags(p, fd) \ (&(p)->p_fd->fd_ofileflags[(fd)]) -extern int falloc __P((struct proc *p, - struct file **resultfp, int *resultfd)); -extern void ffree __P((struct file *fp)); +extern int falloc(struct proc *p, + struct fileproc **resultfp, int *resultfd); +extern void ffree(struct file *fp); #ifdef __APPLE_API_PRIVATE -extern struct filedesc *fdcopy __P((struct proc *p)); -extern void fdfree __P((struct proc *p)); -extern void fdexec __P((struct proc *p)); +extern struct filedesc *fdcopy(struct proc *p); +extern void fdfree(struct proc *p); +extern void fdexec(struct proc *p); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/fsctl.h b/bsd/sys/fsctl.h index 63198c7ec..0d2033f06 100644 --- a/bsd/sys/fsctl.h +++ b/bsd/sys/fsctl.h @@ -65,15 +65,12 @@ #include -/* get size of mount info struct: */ -#define FSGETMOUNTINFOSIZE _IOR('m', 1, long) - #ifndef KERNEL #include __BEGIN_DECLS -int fsctl __P((const char *, unsigned long, void *, unsigned long)); +int fsctl(const char *, unsigned long, void *, unsigned long); __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/fsevents.h b/bsd/sys/fsevents.h new file mode 100644 index 000000000..91b9f5958 --- /dev/null +++ b/bsd/sys/fsevents.h @@ -0,0 +1,88 @@ +#ifndef FSEVENT_H +#define FSEVENT_H 1 + +// Event types that you can ask to listen for +#define FSE_INVALID -1 +#define FSE_CREATE_FILE 0 +#define FSE_DELETE 1 +#define FSE_STAT_CHANGED 2 +#define FSE_RENAME 3 +#define FSE_CONTENT_MODIFIED 4 +#define FSE_EXCHANGE 5 +#define FSE_FINDER_INFO_CHANGED 6 +#define FSE_CREATE_DIR 7 +#define FSE_CHOWN 8 + +#define FSE_MAX_EVENTS 9 +#define FSE_ALL_EVENTS 998 + +#define FSE_EVENTS_DROPPED 999 + +// Actions for each event type +#define FSE_IGNORE 0 +#define FSE_REPORT 1 +#define FSE_ASK 2 // Not implemented yet + +// The types of each of the arguments for an event +// Each type is followed by the size and then the +// data. FSE_ARG_VNODE is just a path string +#define FSE_ARG_VNODE 0x0001 // next arg is a vnode pointer +#define FSE_ARG_STRING 0x0002 // next arg is length followed by string ptr +#define FSE_ARG_PATH 0x0003 // next arg is a full path +#define FSE_ARG_INT32 0x0004 // next arg is a 32-bit int +#define FSE_ARG_INT64 0x0005 // next arg is a 64-bit int +#define FSE_ARG_RAW 0x0006 // next arg is a length followed by a void ptr +#define FSE_ARG_INO 0x0007 // next arg is the inode number (ino_t) +#define FSE_ARG_UID 0x0008 // next arg is the file's uid (uid_t) +#define FSE_ARG_DEV 0x0009 // next arg is the file's dev_t +#define FSE_ARG_MODE 0x000a // next arg is the file's mode (as an int32, file type only) +#define FSE_ARG_GID 0x000b // next arg is the file's gid (gid_t) +#define FSE_ARG_FINFO 0x000c // kernel internal only +#define FSE_ARG_DONE 0xb33f // no more arguments + +#define FSE_MAX_ARGS 12 + + +// ioctl's on /dev/fsevents +typedef struct fsevent_clone_args { + int8_t *event_list; + int32_t num_events; + int32_t event_queue_depth; + int32_t *fd; +} fsevent_clone_args; + +#define FSEVENTS_CLONE _IOW('s', 1, fsevent_clone_args) + + +// ioctl's on the cloned fd +typedef struct fsevent_dev_filter_args { + uint32_t num_devices; + dev_t *devices; +} fsevent_dev_filter_args; + +#define FSEVENTS_DEVICE_FILTER _IOW('s', 100, fsevent_dev_filter_args) + + +#ifdef KERNEL + +int need_fsevent(int type, vnode_t vp); +int add_fsevent(int type, vfs_context_t, ...); +void fsevent_unmount(struct mount *mp); + +// misc utility functions for fsevent info and pathbuffers... +typedef struct fse_info { + dev_t dev; + ino_t ino; + int32_t mode; // note: this is not a mode_t (it's 32-bits, not 16) + uid_t uid; + gid_t gid; +} fse_info; + +int get_fse_info(struct vnode *vp, fse_info *fse, vfs_context_t ctx); + +char *get_pathbuff(void); +void release_pathbuff(char *path); + +#endif /* KERNEL */ + +#endif /* FSEVENT_H */ diff --git a/bsd/net/if_slvar.h b/bsd/sys/imgact.h similarity index 54% rename from bsd/net/if_slvar.h rename to bsd/sys/imgact.h index cb3132ef1..7a6920171 100644 --- a/bsd/net/if_slvar.h +++ b/bsd/sys/imgact.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,9 +19,9 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. +/* + * Copyright (c) 1993, David Greenman + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -39,7 +39,7 @@ * may be used to endorse or promote products derived from this software * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE @@ -50,66 +50,50 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)if_slvar.h 8.3 (Berkeley) 2/1/94 - * */ +#ifndef _SYS_IMGACT_H_ +#define _SYS_IMGACT_H_ -#ifndef _NET_IF_SLVAR_H_ -#define _NET_IF_SLVAR_H_ -#include +#define IMG_SHSIZE 512 /* largest shell interpreter, in bytes */ -#ifndef DONT_WARN_OBSOLETE -#warning if_slvar.h is not used by the darwin kernel -#endif +struct proc; +struct nameidata; -#include -#include -#include +struct image_params { + user_addr_t ip_user_fname; /* argument */ + user_addr_t ip_user_argv; /* argument */ + user_addr_t ip_user_envv; /* argument */ + struct vnode *ip_vp; /* file */ + struct vnode_attr *ip_vattr; /* run file attributes */ + struct vnode_attr *ip_origvattr; /* invocation file attributes */ + char *ip_vdata; /* file data (up to one page) */ + int ip_flags; /* image flags */ + int ip_argc; /* argument count */ + char *ip_argv; /* argument vector beginning */ + int ip_envc; /* environment count */ + char *ip_strings; /* base address for strings */ + char *ip_strendp; /* current end pointer */ + char *ip_strendargvp; /* end of argv/start of envp */ + int ip_strspace; /* remaining space */ + user_size_t ip_arch_offset; /* subfile offset in ip_vp */ + user_size_t ip_arch_size; /* subfile length in ip_vp */ + char ip_interp_name[IMG_SHSIZE]; /* interpreter name */ -#ifdef __APPLE_API_PRIVATE + /* Next two fields are for support of Classic... */ + char *ip_p_comm; /* optional alt p->p_comm */ + char *ip_tws_cache_name; /* task working set cache */ + struct vfs_context *ip_vfs_context; /* VFS context */ + struct nameidata *ip_ndp; /* current nameidata */ + thread_t ip_vfork_thread; /* thread created, if vfork */ +}; /* - * Definitions for SLIP interface data structures - * - * (This exists so programs like slstats can get at the definition - * of sl_softc.) + * Image flags */ -struct sl_softc { - struct ifnet sc_if; /* network-visible interface */ - struct ifqueue sc_fastq; /* interactive output queue */ - struct tty *sc_ttyp; /* pointer to tty structure */ - u_char *sc_mp; /* pointer to next available buf char */ - u_char *sc_ep; /* pointer to last available buf char */ - u_char *sc_buf; /* input buffer */ - u_int sc_flags; /* see below */ - u_int sc_escape; /* =1 if last char input was FRAME_ESCAPE */ - long sc_lasttime; /* last time a char arrived */ - long sc_abortcount; /* number of abort escape chars */ - long sc_starttime; /* time of first abort in window */ - u_int sc_keepalive; /* time to decide link hang */ - u_int sc_outfill; /* time to send FRAME_END when output idle */ - /* - * Handles for scheduling outfill and - * keepalive timeouts. - */ -#if FB3x - struct callout_handle sc_ofhandle; - struct callout_handle sc_kahandle; -#endif - struct slcompress sc_comp; /* tcp compression data */ -}; - -/* internal flags */ -#define SC_ERROR 0x0001 /* had an input error */ -#define SC_OUTWAIT 0x0002 /* waiting for output fill */ -#define SC_KEEPALIVE 0x0004 /* input keepalive */ -#define SC_STATIC 0x0008 /* it is static unit */ - -/* visible flags */ -#define SC_COMPRESS IFF_LINK0 /* compress TCP traffic */ -#define SC_NOICMP IFF_LINK1 /* suppress ICMP traffic */ -#define SC_AUTOCOMP IFF_LINK2 /* auto-enable TCP compression */ +#define IMGPF_NONE 0x00000000 /* No flags */ +#define IMGPF_INTERPRET 0x00000001 /* Interpreter invoked */ +#define IMGPF_RESERVED1 0x00000002 /* reserved */ +#define IMGPF_WAS_64BIT 0x00000004 /* exec from a 64Bit binary */ +#define IMGPF_IS_64BIT 0x00000008 /* exec to a 64Bit binary */ -#endif /* __APPLE_API_PRIVATE */ -#endif +#endif /* !_SYS_IMGACT */ diff --git a/bsd/sys/ioctl.h b/bsd/sys/ioctl.h index a9dfad6b3..006b04d44 100644 --- a/bsd/sys/ioctl.h +++ b/bsd/sys/ioctl.h @@ -89,7 +89,7 @@ struct ttysize { #include __BEGIN_DECLS -int ioctl __P((int, unsigned long, ...)); +int ioctl(int, unsigned long, ...); __END_DECLS #endif /* !KERNEL */ #endif /* !_SYS_IOCTL_H_ */ @@ -99,9 +99,9 @@ __END_DECLS * Compatability with old terminal driver * * Source level -> #define USE_OLD_TTY - * Kernel level -> options COMPAT_43 or COMPAT_SUNOS + * Kernel level -> options COMPAT_SUNOS */ #if defined(USE_OLD_TTY) || COMPAT_43 || defined(COMPAT_SUNOS) || \ - defined(COMPAT_SVR4) || defined(COMPAT_NEXT_3X) + defined(COMPAT_SVR4) || defined(COMPAT_NEXT_3X) || COMPAT_43_TTY #include #endif /* !_SYS_IOCTL_H_ */ diff --git a/bsd/sys/ioctl_compat.h b/bsd/sys/ioctl_compat.h index 9ff3a2bc1..11d837db0 100644 --- a/bsd/sys/ioctl_compat.h +++ b/bsd/sys/ioctl_compat.h @@ -123,26 +123,33 @@ struct sgttyb { #define EVENP 0x00000080 /* get/send even parity */ #define ANYP 0x000000c0 /* get any parity/send none */ #define NLDELAY 0x00000300 /* \n delay */ +#define TBDELAY 0x00000c00 /* horizontal tab delay */ +#define XTABS 0x00000c00 /* expand tabs on output */ +#define CRDELAY 0x00003000 /* \r delay */ +#define VTDELAY 0x00004000 /* vertical tab delay */ +#define BSDELAY 0x00008000 /* \b delay */ +#ifndef _SYS_TERMIOS_H_ +/* + * These manifest constants have the same names as those in , + * so you are not permitted to have both definitions in scope simultaneously + * in the same compilation unit. + */ #define NL0 0x00000000 #define NL1 0x00000100 /* tty 37 */ #define NL2 0x00000200 /* vt05 */ #define NL3 0x00000300 -#define TBDELAY 0x00000c00 /* horizontal tab delay */ #define TAB0 0x00000000 #define TAB1 0x00000400 /* tty 37 */ #define TAB2 0x00000800 -#define XTABS 0x00000c00 /* expand tabs on output */ -#define CRDELAY 0x00003000 /* \r delay */ #define CR0 0x00000000 #define CR1 0x00001000 /* tn 300 */ #define CR2 0x00002000 /* tty 37 */ #define CR3 0x00003000 /* concept 100 */ -#define VTDELAY 0x00004000 /* vertical tab delay */ #define FF0 0x00000000 #define FF1 0x00004000 /* tty 37 */ -#define BSDELAY 0x00008000 /* \b delay */ #define BS0 0x00000000 #define BS1 0x00008000 +#endif /* !_SYS_TERMIOS_H_ */ #define ALLDELAY (NLDELAY|TBDELAY|CRDELAY|VTDELAY|BSDELAY) #define CRTBS 0x00010000 /* do backspacing for crt */ #define PRTERA 0x00020000 /* \ ... / erase */ diff --git a/bsd/sys/ipc.h b/bsd/sys/ipc.h index 5c642955e..13479cf6a 100644 --- a/bsd/sys/ipc.h +++ b/bsd/sys/ipc.h @@ -64,42 +64,116 @@ * @(#)ipc.h 8.4 (Berkeley) 2/19/95 */ -#include - /* * SVID compatible ipc.h file */ #ifndef _SYS_IPC_H_ #define _SYS_IPC_H_ -struct ipc_perm { - ushort cuid; /* creator user id */ - ushort cgid; /* creator group id */ - ushort uid; /* user id */ - ushort gid; /* group id */ - ushort mode; /* r/w permission */ - ushort seq; /* sequence # (to generate unique msg/sem/shm id) */ - key_t key; /* user specified msg/sem/shm key */ +#include +#include + +#include + +/* + * [XSI] The uid_t, gid_t, mode_t, and key_t types SHALL be defined as + * described in . + */ +#ifndef _UID_T +typedef __darwin_uid_t uid_t; /* user id */ +#define _UID_T +#endif + +#ifndef _GID_T +typedef __darwin_gid_t gid_t; +#define _GID_T +#endif + +#ifndef _MODE_T +typedef __darwin_mode_t mode_t; +#define _MODE_T +#endif + +#ifndef _KEY_T +#define _KEY_T +typedef __int32_t key_t; +#endif + +/* + * Technically, we should force all code references to the new structure + * definition, not in just the standards conformance case, and leave the + * legacy interface there for binary compatibility only. Currently, we + * are only forcing this for programs requesting standards conformance. + */ +#if defined(__POSIX_C_SOURCE) || defined(kernel) || defined(__LP64__) +/* + * [XSI] Information used in determining permission to perform an IPC + * operation + */ +struct __ipc_perm_new { + uid_t uid; /* [XSI] Owner's user ID */ + gid_t gid; /* [XSI] Owner's group ID */ + uid_t cuid; /* [XSI] Creator's user ID */ + gid_t cgid; /* [XSI] Creator's group ID */ + mode_t mode; /* [XSI] Read/write permission */ + unsigned short _seq; /* Reserved for internal use */ + key_t _key; /* Reserved for internal use */ }; +#define ipc_perm __ipc_perm_new +#else /* !_POSIX_C_SOURCE */ +#define ipc_perm __ipc_perm_old +#endif /* !_POSIX_C_SOURCE */ -/* common mode bits */ -#define IPC_R 000400 /* read permission */ -#define IPC_W 000200 /* write/alter permission */ -#define IPC_M 010000 /* permission to change control info */ +#if !defined(__POSIX_C_SOURCE) && !defined(__LP64__) +/* + * Legacy structure; this structure is maintained for binary backward + * compatability with previous versions of the interface. New code + * should not use this interface, since ID values may be truncated. + */ +struct __ipc_perm_old { + __uint16_t cuid; /* Creator's user ID */ + __uint16_t cgid; /* Creator's group ID */ + __uint16_t uid; /* Owner's user ID */ + __uint16_t gid; /* Owner's group ID */ + mode_t mode; /* Read/Write permission */ + __uint16_t seq; /* Reserved for internal use */ + key_t key; /* Reserved for internal use */ +}; +#endif /* !_POSIX_C_SOURCE */ + +/* + * [XSI] Definitions shall be provided for the following constants: + */ + +/* Mode bits */ +#define IPC_CREAT 001000 /* Create entry if key does not exist */ +#define IPC_EXCL 002000 /* Fail if key exists */ +#define IPC_NOWAIT 004000 /* Error if request must wait */ -/* SVID required constants (same values as system 5) */ -#define IPC_CREAT 001000 /* create entry if key does not exist */ -#define IPC_EXCL 002000 /* fail if key exists */ -#define IPC_NOWAIT 004000 /* error if request must wait */ +/* Keys */ +#define IPC_PRIVATE ((key_t)0) /* Private key */ -#define IPC_PRIVATE (key_t)0 /* private key */ +/* Control commands */ +#define IPC_RMID 0 /* Remove identifier */ +#define IPC_SET 1 /* Set options */ +#define IPC_STAT 2 /* Get options */ -#define IPC_RMID 0 /* remove identifier */ -#define IPC_SET 1 /* set options */ -#define IPC_STAT 2 /* get options */ -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifndef _POSIX_C_SOURCE + +/* common mode bits */ +#define IPC_R 000400 /* Read permission */ +#define IPC_W 000200 /* Write/alter permission */ +#define IPC_M 010000 /* Modify control info permission */ + +#endif /* !_POSIX_C_SOURCE */ + + +#ifdef BSD_KERNEL_PRIVATE +/* + * Kernel implementation details which should not be utilized by user + * space programs. + */ /* Macros to convert between ipc ids and array indices or sequence ids */ #define IPCID_TO_IX(id) ((id) & 0xffff) @@ -108,18 +182,16 @@ struct ipc_perm { struct ucred; -int ipcperm __P((struct ucred *, struct ipc_perm *, int)); -#endif /* __APPLE_API_PRIVATE */ -#else /* ! KERNEL */ - -/* XXX doesn't really belong here, but has been historical practice in SysV. */ +int ipcperm(struct ucred *, struct ipc_perm *, int); +#endif /* BSD_KERNEL_PRIVATE */ -#include +#ifndef KERNEL __BEGIN_DECLS -key_t ftok __P((const char *, int)); +/* [XSI] */ +key_t ftok(const char *, int); __END_DECLS -#endif /* KERNEL */ +#endif /* !KERNEL */ #endif /* !_SYS_IPC_H_ */ diff --git a/bsd/sys/ipcs.h b/bsd/sys/ipcs.h new file mode 100644 index 000000000..e4a6e23f4 --- /dev/null +++ b/bsd/sys/ipcs.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * NOTE: Internal ipcs.h header; all interfaces are private; if you want this + * same information from your own program, popen(3) the ipcs(2) command and + * parse its output, or your program may not work on future OS releases. + */ + +#ifndef _SYS_IPCS_H_ +#define _SYS_IPCS_H_ + +#include +#include + +#define IPCS_MAGIC 0x00000001 /* Version */ + +/* + * IPCS_command + * + * This is the IPCS command structure used for obtaining status about the + * System V IPC mechanisms. All other operations are based on the per + * subsystem (shm, msg, ipc) *ctl entry point, which can be called once + * this information is known. + */ + +struct IPCS_command { + int ipcs_magic; /* Magic number for struct layout */ + int ipcs_op; /* Operation to perform */ + int ipcs_cursor; /* Cursor for iteration functions */ + int ipcs_datalen; /* Length of ipcs_data area */ + void *ipcs_data; /* OP specific data */ +}; + +#ifdef KERNEL_PRIVATE +#include + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_IPCS_command { + int ipcs_magic; /* Magic number for struct layout */ + int ipcs_op; /* Operation to perform */ + int ipcs_cursor; /* Cursor for iteration functions */ + int ipcs_datalen; /* Length of ipcs_data area */ + user_addr_t ipcs_data; /* OP specific data */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL_PRIVATE */ + +/* + * OP code values for 'ipcs_op' + */ +#define IPCS_SHM_CONF 0x00000001 /* Obtain shared memory config */ +#define IPCS_SHM_ITER 0x00000002 /* Iterate shared memory info */ + +#define IPCS_SEM_CONF 0x00000010 /* Obtain semaphore config */ +#define IPCS_SEM_ITER 0x00000020 /* Iterate semaphore info */ + +#define IPCS_MSG_CONF 0x00000100 /* Obtain message queue config */ +#define IPCS_MSG_ITER 0x00000200 /* Iterate message queue info */ + +/* + * Sysctl oid name values + */ +#define IPCS_SHM_SYSCTL "kern.sysv.ipcs.shm" +#define IPCS_SEM_SYSCTL "kern.sysv.ipcs.sem" +#define IPCS_MSG_SYSCTL "kern.sysv.ipcs.msg" + + +#endif /* _SYS_IPCS_H_ */ diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h new file mode 100644 index 000000000..eb87187e9 --- /dev/null +++ b/bsd/sys/kauth.h @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_KAUTH_H +#define _SYS_KAUTH_H + +#include +#include + +#ifdef __APPLE_API_EVOLVING + +/* + * Identities. + */ + +#define KAUTH_UID_NONE (~(uid_t)0 - 100) /* not a valid UID */ +#define KAUTH_GID_NONE (~(gid_t)0 - 100) /* not a valid GID */ + +#ifndef _KAUTH_GUID +#define _KAUTH_GUID +/* Apple-style globally unique identifier */ +typedef struct { +#define KAUTH_GUID_SIZE 16 /* 128-bit identifier */ + unsigned char g_guid[KAUTH_GUID_SIZE]; +} guid_t; +#define _GUID_T +#endif /* _KAUTH_GUID */ + +/* NT Security Identifier, structure as defined by Microsoft */ +#pragma pack(1) /* push packing of 1 byte */ +typedef struct { + u_int8_t sid_kind; + u_int8_t sid_authcount; + u_int8_t sid_authority[6]; +#define KAUTH_NTSID_MAX_AUTHORITIES 16 + u_int32_t sid_authorities[KAUTH_NTSID_MAX_AUTHORITIES]; +} ntsid_t; +#pragma pack() /* pop packing to previous packing level */ +#define _NTSID_T + +/* valid byte count inside a SID structure */ +#define KAUTH_NTSID_HDRSIZE (8) +#define KAUTH_NTSID_SIZE(_s) (KAUTH_NTSID_HDRSIZE + ((_s)->sid_authcount * sizeof(u_int32_t))) + +/* + * External lookup message payload + */ +struct kauth_identity_extlookup { + u_int32_t el_seqno; /* request sequence number */ + u_int32_t el_result; /* lookup result */ +#define KAUTH_EXTLOOKUP_SUCCESS 0 /* results here are good */ +#define KAUTH_EXTLOOKUP_BADRQ 1 /* request badly formatted */ +#define KAUTH_EXTLOOKUP_FAILURE 2 /* transient failure during lookup */ +#define KAUTH_EXTLOOKUP_FATAL 3 /* permanent failure during lookup */ +#define KAUTH_EXTLOOKUP_INPROG 100 /* request in progress */ + u_int32_t el_flags; +#define KAUTH_EXTLOOKUP_VALID_UID (1<<0) +#define KAUTH_EXTLOOKUP_VALID_UGUID (1<<1) +#define KAUTH_EXTLOOKUP_VALID_USID (1<<2) +#define KAUTH_EXTLOOKUP_VALID_GID (1<<3) +#define KAUTH_EXTLOOKUP_VALID_GGUID (1<<4) +#define KAUTH_EXTLOOKUP_VALID_GSID (1<<5) +#define KAUTH_EXTLOOKUP_WANT_UID (1<<6) +#define KAUTH_EXTLOOKUP_WANT_UGUID (1<<7) +#define KAUTH_EXTLOOKUP_WANT_USID (1<<8) +#define KAUTH_EXTLOOKUP_WANT_GID (1<<9) +#define KAUTH_EXTLOOKUP_WANT_GGUID (1<<10) +#define KAUTH_EXTLOOKUP_WANT_GSID (1<<11) +#define KAUTH_EXTLOOKUP_WANT_MEMBERSHIP (1<<12) +#define KAUTH_EXTLOOKUP_VALID_MEMBERSHIP (1<<13) +#define KAUTH_EXTLOOKUP_ISMEMBER (1<<14) + uid_t el_uid; /* user ID */ + guid_t el_uguid; /* user GUID */ + u_int32_t el_uguid_valid; /* TTL on translation result (seconds) */ + ntsid_t el_usid; /* user NT SID */ + u_int32_t el_usid_valid; /* TTL on translation result (seconds) */ + gid_t el_gid; /* group ID */ + guid_t el_gguid; /* group GUID */ + u_int32_t el_gguid_valid; /* TTL on translation result (seconds) */ + ntsid_t el_gsid; /* group SID */ + u_int32_t el_gsid_valid; /* TTL on translation result (seconds) */ + u_int32_t el_member_valid; /* TTL on group lookup result */ +}; + +#define KAUTH_EXTLOOKUP_REGISTER (0) +#define KAUTH_EXTLOOKUP_RESULT (1<<0) +#define KAUTH_EXTLOOKUP_WORKER (1<<1) + + +#ifdef KERNEL +/* + * Credentials. + */ + +#if 0 +/* + * Supplemental credential data. + * + * This interface allows us to associate arbitrary data with a credential. + * As with the credential, the data is considered immutable. + */ +struct kauth_cred_supplement { + TAILQ_ENTRY(kauth_cred_supplement) kcs_link; + + int kcs_ref; /* reference count */ + int kcs_id; /* vended identifier */ + size_t kcs_size; /* size of data field */ + char kcs_data[0]; +}; + +typedef struct kauth_cred_supplement *kauth_cred_supplement_t; + +struct kauth_cred { + TAILQ_ENTRY(kauth_cred) kc_link; + + int kc_ref; /* reference count */ + uid_t kc_uid; /* effective user id */ + uid_t kc_ruid; /* real user id */ + uid_t kc_svuid; /* saved user id */ + gid_t kc_gid; /* effective group id */ + gid_t kc_rgid; /* real group id */ + gid_t kc_svgid; /* saved group id */ + + int kc_flags; +#define KAUTH_CRED_GRPOVERRIDE (1<<0) /* private group list is authoritative */ + + int kc_npvtgroups; /* private group list, advisory or authoritative */ + gid_t kc_pvtgroups[NGROUPS]; /* based on KAUTH_CRED_GRPOVERRIDE flag */ + + int kc_nsuppgroups; /* supplementary group list */ + gid_t *kc_suppgroups; + + int kc_nwhtgroups; /* whiteout group list */ + gid_t *kc_whtgroups; + + struct auditinfo cr_au; /* user auditing data */ + + int kc_nsupplement; /* entry count in supplemental data pointer array */ + kauth_cred_supplement_t *kc_supplement; +}; +#else + +/* XXX just for now */ +#include +// typedef struct ucred *kauth_cred_t; +#endif + +/* Kernel SPI for now */ +__BEGIN_DECLS +extern uid_t kauth_getuid(void); +extern uid_t kauth_getruid(void); +extern gid_t kauth_getgid(void); +extern gid_t kauth_getrgid(void); +extern kauth_cred_t kauth_cred_get(void); +extern kauth_cred_t kauth_cred_get_with_ref(void); +extern kauth_cred_t kauth_cred_proc_ref(proc_t procp); +extern kauth_cred_t kauth_cred_alloc(void); +extern kauth_cred_t kauth_cred_create(kauth_cred_t cred); +extern void kauth_cred_ref(kauth_cred_t _cred); +extern void kauth_cred_rele(kauth_cred_t _cred); +extern kauth_cred_t kauth_cred_dup(kauth_cred_t cred); +extern kauth_cred_t kauth_cred_copy_real(kauth_cred_t cred); +extern void kauth_cred_unref(kauth_cred_t _cred); +extern kauth_cred_t kauth_cred_setuid(kauth_cred_t cred, uid_t uid); +extern kauth_cred_t kauth_cred_seteuid(kauth_cred_t cred, uid_t euid); +extern kauth_cred_t kauth_cred_setgid(kauth_cred_t cred, gid_t gid); +extern kauth_cred_t kauth_cred_setegid(kauth_cred_t cred, gid_t egid); +extern kauth_cred_t kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); +extern kauth_cred_t kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); +extern kauth_cred_t kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmuid); +extern kauth_cred_t kauth_cred_find(kauth_cred_t cred); +extern int kauth_cred_getgroups(gid_t *_groups, int *_groupcount); +extern int kauth_cred_assume(uid_t _uid); +extern uid_t kauth_cred_getuid(kauth_cred_t _cred); +extern gid_t kauth_cred_getgid(kauth_cred_t _cred); +extern int kauth_cred_guid2uid(guid_t *_guid, uid_t *_uidp); +extern int kauth_cred_guid2gid(guid_t *_guid, gid_t *_gidp); +extern int kauth_cred_ntsid2uid(ntsid_t *_sid, uid_t *_uidp); +extern int kauth_cred_ntsid2gid(ntsid_t *_sid, gid_t *_gidp); +extern int kauth_cred_ntsid2guid(ntsid_t *_sid, guid_t *_guidp); +extern int kauth_cred_uid2guid(uid_t _uid, guid_t *_guidp); +extern int kauth_cred_getguid(kauth_cred_t _cred, guid_t *_guidp); +extern int kauth_cred_gid2guid(gid_t _gid, guid_t *_guidp); +extern int kauth_cred_uid2ntsid(uid_t _uid, ntsid_t *_sidp); +extern int kauth_cred_getntsid(kauth_cred_t _cred, ntsid_t *_sidp); +extern int kauth_cred_gid2ntsid(gid_t _gid, ntsid_t *_sidp); +extern int kauth_cred_guid2ntsid(guid_t *_guid, ntsid_t *_sidp); +extern int kauth_cred_ismember_gid(kauth_cred_t _cred, gid_t _gid, int *_resultp); +extern int kauth_cred_ismember_guid(kauth_cred_t _cred, guid_t *_guidp, int *_resultp); + +extern int kauth_cred_supplementary_register(const char *name, int *ident); +extern int kauth_cred_supplementary_add(kauth_cred_t cred, int ident, const void *data, size_t datasize); +extern int kauth_cred_supplementary_remove(kauth_cred_t cred, int ident); + +/* NOT KPI - fast path for in-kernel code only */ +extern int kauth_cred_issuser(kauth_cred_t _cred); + + +/* GUID, NTSID helpers */ +extern guid_t kauth_null_guid; +extern int kauth_guid_equal(guid_t *_guid1, guid_t *_guid2); +extern int kauth_ntsid_equal(ntsid_t *_sid1, ntsid_t *_sid2); + +extern int kauth_wellknown_guid(guid_t *_guid); +#define KAUTH_WKG_NOT 0 /* not a well-known GUID */ +#define KAUTH_WKG_OWNER 1 +#define KAUTH_WKG_GROUP 2 +#define KAUTH_WKG_NOBODY 3 +#define KAUTH_WKG_EVERYBODY 4 + +extern int cantrace(proc_t cur_procp, kauth_cred_t creds, proc_t traced_procp, int *errp); + +__END_DECLS + +#endif /* KERNEL */ + +/* + * Generic Access Control Lists. + */ +#if defined(KERNEL) || defined (_SYS_ACL_H) + +typedef u_int32_t kauth_ace_rights_t; + +/* Access Control List Entry (ACE) */ +struct kauth_ace { + guid_t ace_applicable; + u_int32_t ace_flags; +#define KAUTH_ACE_KINDMASK 0xf +#define KAUTH_ACE_PERMIT 1 +#define KAUTH_ACE_DENY 2 +#define KAUTH_ACE_AUDIT 3 /* not implemented */ +#define KAUTH_ACE_ALARM 4 /* not implemented */ +#define KAUTH_ACE_INHERITED (1<<4) +#define KAUTH_ACE_FILE_INHERIT (1<<5) +#define KAUTH_ACE_DIRECTORY_INHERIT (1<<6) +#define KAUTH_ACE_LIMIT_INHERIT (1<<7) +#define KAUTH_ACE_ONLY_INHERIT (1<<8) +#define KAUTH_ACE_SUCCESS (1<<9) /* not implemented (AUDIT/ALARM) */ +#define KAUTH_ACE_FAILURE (1<<10) /* not implemented (AUDIT/ALARM) */ + kauth_ace_rights_t ace_rights; /* scope specific */ + /* These rights are never tested, but may be present in an ACL */ +#define KAUTH_ACE_GENERIC_ALL (1<<21) +#define KAUTH_ACE_GENERIC_EXECUTE (1<<22) +#define KAUTH_ACE_GENERIC_WRITE (1<<23) +#define KAUTH_ACE_GENERIC_READ (1<<24) + +}; + +#ifndef _KAUTH_ACE +#define _KAUTH_ACE +typedef struct kauth_ace *kauth_ace_t; +#endif + + +/* Access Control List */ +struct kauth_acl { + u_int32_t acl_entrycount; + u_int32_t acl_flags; + + struct kauth_ace acl_ace[]; +}; + +/* + * XXX this value needs to be raised - 3893388 + */ +#define KAUTH_ACL_MAX_ENTRIES 128 + +/* + * The low 16 bits of the flags field are reserved for filesystem + * internal use and must be preserved by all APIs. This includes + * round-tripping flags through user-space interfaces. + */ +#define KAUTH_ACL_FLAGS_PRIVATE (0xffff) + +/* + * The high 16 bits of the flags are used to store attributes and + * to request specific handling of the ACL. + */ + +/* inheritance will be deferred until the first rename operation */ +#define KAUTH_ACL_DEFER_INHERIT (1<<16) +/* this ACL must not be overwritten as part of an inheritance operation */ +#define KAUTH_ACL_NO_INHERIT (1<<17) + +#define KAUTH_ACL_SIZE(c) (sizeof(struct kauth_acl) + (c) * sizeof(struct kauth_ace)) +#define KAUTH_ACL_COPYSIZE(p) KAUTH_ACL_SIZE((p)->acl_entrycount) + + +#ifndef _KAUTH_ACL +#define _KAUTH_ACL +typedef struct kauth_acl *kauth_acl_t; +#endif + +#ifdef KERNEL +__BEGIN_DECLS +kauth_acl_t kauth_acl_alloc(int size); +void kauth_acl_free(kauth_acl_t fsp); +__END_DECLS +#endif + + +/* + * Extended File Security. + */ + +/* File Security information */ +struct kauth_filesec { + u_int32_t fsec_magic; +#define KAUTH_FILESEC_MAGIC 0x012cc16d + guid_t fsec_owner; + guid_t fsec_group; + + struct kauth_acl fsec_acl; + /* acl_entrycount that tells us the ACL is not valid */ +#define KAUTH_FILESEC_NOACL ((u_int32_t)(-1)) +}; + +/* backwards compatibility */ +#define fsec_entrycount fsec_acl.acl_entrycount +#define fsec_flags fsec_acl.acl_flags +#define fsec_ace fsec_acl.acl_ace +#define KAUTH_FILESEC_FLAGS_PRIVATE KAUTH_ACL_FLAGS_PRIVATE +#define KAUTH_FILESEC_DEFER_INHERIT KAUTH_ACL_DEFER_INHERIT +#define KAUTH_FILESEC_NO_INHERIT KAUTH_ACL_NO_INHERIT +#define KAUTH_FILESEC_NONE ((kauth_filesec_t)0) +#define KAUTH_FILESEC_WANTED ((kauth_filesec_t)1) + +#ifndef _KAUTH_FILESEC +#define _KAUTH_FILESEC +typedef struct kauth_filesec *kauth_filesec_t; +#endif + +#define KAUTH_FILESEC_SIZE(c) (sizeof(struct kauth_filesec) + (c) * sizeof(struct kauth_ace)) +#define KAUTH_FILESEC_COPYSIZE(p) KAUTH_FILESEC_SIZE(((p)->fsec_entrycount == KAUTH_FILESEC_NOACL) ? 0 : (p)->fsec_entrycount) +#define KAUTH_FILESEC_COUNT(s) ((s - sizeof(struct kauth_filesec)) / sizeof(struct kauth_ace)) + +#define KAUTH_FILESEC_XATTR "com.apple.system.Security" + +__BEGIN_DECLS +kauth_filesec_t kauth_filesec_alloc(int size); +void kauth_filesec_free(kauth_filesec_t fsp); +int kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp); +__END_DECLS + +#endif /* KERNEL || */ + + +#ifdef KERNEL +/* + * Scope management. + */ +struct kauth_scope; +typedef struct kauth_scope *kauth_scope_t; +struct kauth_listener; +typedef struct kauth_listener *kauth_listener_t; +#ifndef _KAUTH_ACTION_T +typedef int kauth_action_t; +# define _KAUTH_ACTION_T +#endif + +typedef int (* kauth_scope_callback_t)(kauth_cred_t _credential, + void *_idata, + kauth_action_t _action, + uintptr_t _arg0, + uintptr_t _arg1, + uintptr_t _arg2, + uintptr_t _arg3); + +#define KAUTH_RESULT_ALLOW (1) +#define KAUTH_RESULT_DENY (2) +#define KAUTH_RESULT_DEFER (3) + +struct kauth_acl_eval { + kauth_ace_t ae_acl; + int ae_count; + kauth_ace_rights_t ae_requested; + kauth_ace_rights_t ae_residual; + int ae_result; + int ae_options; +#define KAUTH_AEVAL_IS_OWNER (1<<0) /* authorizing operation for owner */ +#define KAUTH_AEVAL_IN_GROUP (1<<1) /* authorizing operation for groupmember */ + /* expansions for 'generic' rights bits */ + kauth_ace_rights_t ae_exp_gall; + kauth_ace_rights_t ae_exp_gread; + kauth_ace_rights_t ae_exp_gwrite; + kauth_ace_rights_t ae_exp_gexec; +}; + +typedef struct kauth_acl_eval *kauth_acl_eval_t; + +__BEGIN_DECLS +extern kauth_scope_t kauth_register_scope(const char *_identifier, kauth_scope_callback_t _callback, void *_idata); +extern void kauth_deregister_scope(kauth_scope_t _scope); +extern kauth_listener_t kauth_listen_scope(const char *_identifier, kauth_scope_callback_t _callback, void *_idata); +extern void kauth_unlisten_scope(kauth_listener_t _scope); +extern int kauth_authorize_action(kauth_scope_t _scope, kauth_cred_t _credential, kauth_action_t _action, + uintptr_t _arg0, uintptr_t _arg1, uintptr_t _arg2, uintptr_t _arg3); +extern int kauth_acl_evaluate(kauth_cred_t _credential, kauth_acl_eval_t _eval); +extern int kauth_acl_inherit(vnode_t _dvp, kauth_acl_t _initial, kauth_acl_t *_product, int _isdir, vfs_context_t _ctx); + +/* default scope handlers */ +extern int kauth_authorize_allow(kauth_cred_t _credential, void *_idata, kauth_action_t _action, + uintptr_t _arg0, uintptr_t _arg1, uintptr_t _arg2, uintptr_t _arg3); +__END_DECLS + +/* + * Generic scope. + */ +#define KAUTH_SCOPE_GENERIC "com.apple.kauth.generic" + +/* Actions */ +#define KAUTH_GENERIC_ISSUSER 1 + +__BEGIN_DECLS +extern int kauth_authorize_generic(kauth_cred_t credential, kauth_action_t action); +__END_DECLS + +/* + * Process/task scope. + */ +#define KAUTH_SCOPE_PROCESS "com.apple.kauth.process" + +/* Actions */ +#define KAUTH_PROCESS_CANSIGNAL 1 +#define KAUTH_PROCESS_CANTRACE 2 + +__BEGIN_DECLS +extern int kauth_authorize_process(kauth_cred_t _credential, kauth_action_t _action, + struct proc *_process, uintptr_t _arg1, uintptr_t _arg2, uintptr_t _arg3); +__END_DECLS + +/* + * Vnode operation scope. + * + * Prototype for vnode_authorize is in vnode.h + */ +#define KAUTH_SCOPE_VNODE "com.apple.kauth.vnode" + +/* + * File system operation scope. + * + */ +#define KAUTH_SCOPE_FILEOP "com.apple.kauth.fileop" + +/* Actions */ +#define KAUTH_FILEOP_OPEN 1 +#define KAUTH_FILEOP_CLOSE 2 +#define KAUTH_FILEOP_RENAME 3 +#define KAUTH_FILEOP_EXCHANGE 4 +#define KAUTH_FILEOP_LINK 5 +#define KAUTH_FILEOP_EXEC 6 + +/* + * arguments passed to KAUTH_FILEOP_OPEN listeners + * arg0 is pointer to vnode (vnode *) for given user path. + * arg1 is pointer to path (char *) passed in to open. + * arguments passed to KAUTH_FILEOP_CLOSE listeners + * arg0 is pointer to vnode (vnode *) for file to be closed. + * arg1 is pointer to path (char *) of file to be closed. + * arg2 is close flags. + * arguments passed to KAUTH_FILEOP_RENAME listeners + * arg0 is pointer to "from" path (char *). + * arg1 is pointer to "to" path (char *). + * arguments passed to KAUTH_FILEOP_EXCHANGE listeners + * arg0 is pointer to file 1 path (char *). + * arg1 is pointer to file 2 path (char *). + * arguments passed to KAUTH_FILEOP_LINK listeners + * arg0 is pointer to path to file we are linking to (char *). + * arg1 is pointer to path to the new link file (char *). + * arguments passed to KAUTH_FILEOP_EXEC listeners + * arg0 is pointer to vnode (vnode *) for executable. + * arg1 is pointer to path (char *) to executable. + */ + +/* Flag values returned to close listeners. */ +#define KAUTH_FILEOP_CLOSE_MODIFIED (1<<1) + +__BEGIN_DECLS +extern int kauth_authorize_fileop_has_listeners(void); +extern int kauth_authorize_fileop(kauth_cred_t _credential, kauth_action_t _action, + uintptr_t _arg0, uintptr_t _arg1); +__END_DECLS + +#endif /* KERNEL */ + +/* Actions, also rights bits in an ACE */ + +#if defined(KERNEL) || defined (_SYS_ACL_H) +#define KAUTH_VNODE_READ_DATA (1<<1) +#define KAUTH_VNODE_LIST_DIRECTORY KAUTH_VNODE_READ_DATA +#define KAUTH_VNODE_WRITE_DATA (1<<2) +#define KAUTH_VNODE_ADD_FILE KAUTH_VNODE_WRITE_DATA +#define KAUTH_VNODE_EXECUTE (1<<3) +#define KAUTH_VNODE_SEARCH KAUTH_VNODE_EXECUTE +#define KAUTH_VNODE_DELETE (1<<4) +#define KAUTH_VNODE_APPEND_DATA (1<<5) +#define KAUTH_VNODE_ADD_SUBDIRECTORY KAUTH_VNODE_APPEND_DATA +#define KAUTH_VNODE_DELETE_CHILD (1<<6) +#define KAUTH_VNODE_READ_ATTRIBUTES (1<<7) +#define KAUTH_VNODE_WRITE_ATTRIBUTES (1<<8) +#define KAUTH_VNODE_READ_EXTATTRIBUTES (1<<9) +#define KAUTH_VNODE_WRITE_EXTATTRIBUTES (1<<10) +#define KAUTH_VNODE_READ_SECURITY (1<<11) +#define KAUTH_VNODE_WRITE_SECURITY (1<<12) +#define KAUTH_VNODE_TAKE_OWNERSHIP (1<<13) + +/* backwards compatibility only */ +#define KAUTH_VNODE_CHANGE_OWNER KAUTH_VNODE_TAKE_OWNERSHIP + +/* For Windows interoperability only */ +#define KAUTH_VNODE_SYNCHRONIZE (1<<20) + +/* (1<<21) - (1<<24) are reserved for generic rights bits */ + +/* Actions not expressed as rights bits */ +/* + * Authorizes the vnode as the target of a hard link. + */ +#define KAUTH_VNODE_LINKTARGET (1<<25) + +/* + * Indicates that other steps have been taken to authorise the action, + * but authorisation should be denied for immutable objects. + */ +#define KAUTH_VNODE_CHECKIMMUTABLE (1<<26) + +/* Action modifiers */ +/* + * The KAUTH_VNODE_ACCESS bit is passed to the callback if the authorisation + * request in progress is advisory, rather than authoritative. Listeners + * performing consequential work (i.e. not strictly checking authorisation) + * may test this flag to avoid performing unnecessary work. + * + * This bit will never be present in an ACE. + */ +#define KAUTH_VNODE_ACCESS (1<<31) + +/* + * The KAUTH_VNODE_NOIMMUTABLE bit is passed to the callback along with the + * KAUTH_VNODE_WRITE_SECURITY bit (and no others) to indicate that the + * caller wishes to change one or more of the immutable flags, and the + * state of these flags should not be considered when authorizing the request. + * The system immutable flags are only ignored when the system securelevel + * is low enough to allow their removal. + */ +#define KAUTH_VNODE_NOIMMUTABLE (1<<30) + +/* The expansions of the GENERIC bits at evaluation time */ +#define KAUTH_VNODE_GENERIC_READ_BITS (KAUTH_VNODE_READ_DATA | \ + KAUTH_VNODE_READ_ATTRIBUTES | \ + KAUTH_VNODE_READ_EXTATTRIBUTES | \ + KAUTH_VNODE_READ_SECURITY) + +#define KAUTH_VNODE_GENERIC_WRITE_BITS (KAUTH_VNODE_WRITE_DATA | \ + KAUTH_VNODE_APPEND_DATA | \ + KAUTH_VNODE_DELETE | \ + KAUTH_VNODE_DELETE_CHILD | \ + KAUTH_VNODE_WRITE_ATTRIBUTES | \ + KAUTH_VNODE_WRITE_EXTATTRIBUTES | \ + KAUTH_VNODE_WRITE_SECURITY) + +#define KAUTH_VNODE_GENERIC_EXECUTE_BITS (KAUTH_VNODE_EXECUTE) + +#define KAUTH_VNODE_GENERIC_ALL_BITS (KAUTH_VNODE_GENERIC_READ_BITS | \ + KAUTH_VNODE_GENERIC_WRITE_BITS | \ + KAUTH_VNODE_GENERIC_EXECUTE_BITS) + +/* + * Some sets of bits, defined here for convenience. + */ +#define KAUTH_VNODE_WRITE_RIGHTS (KAUTH_VNODE_ADD_FILE | \ + KAUTH_VNODE_ADD_SUBDIRECTORY | \ + KAUTH_VNODE_DELETE_CHILD | \ + KAUTH_VNODE_WRITE_DATA | \ + KAUTH_VNODE_APPEND_DATA | \ + KAUTH_VNODE_DELETE | \ + KAUTH_VNODE_WRITE_ATTRIBUTES | \ + KAUTH_VNODE_WRITE_EXTATTRIBUTES | \ + KAUTH_VNODE_WRITE_SECURITY | \ + KAUTH_VNODE_TAKE_OWNERSHIP | \ + KAUTH_VNODE_LINKTARGET | \ + KAUTH_VNODE_CHECKIMMUTABLE) + + +#endif /* KERNEL || */ + +#ifdef KERNEL +#include /* lck_grp_t */ + +/* + * Debugging + * + * XXX this wouldn't be necessary if we had a *real* debug-logging system. + */ +#if 0 +# ifndef _FN_KPRINTF +# define _FN_KPRINTF +void kprintf(const char *fmt, ...); +# endif +# define KAUTH_DEBUG_ENABLE +# define K_UUID_FMT "%08x:%08x:%08x:%08x" +# define K_UUID_ARG(_u) *(int *)&_u.g_guid[0],*(int *)&_u.g_guid[4],*(int *)&_u.g_guid[8],*(int *)&_u.g_guid[12] +# define KAUTH_DEBUG(fmt, args...) do { kprintf("%s:%d: " fmt "\n", __PRETTY_FUNCTION__, __LINE__ , ##args); } while (0) +# define KAUTH_DEBUG_CTX(_c) KAUTH_DEBUG("p = %p c = %p", _c->vc_proc, _c->vc_ucred) +# define VFS_DEBUG(_ctx, _vp, fmt, args...) \ + do { \ + kprintf("%p '%s' %s:%d " fmt "\n", \ + _ctx, \ + (_vp != NULL && _vp->v_name != NULL) ? _vp->v_name : "????", \ + __PRETTY_FUNCTION__, __LINE__ , \ + ##args); \ + } while(0) +#else +# define KAUTH_DEBUG(fmt, args...) do { } while (0) +# define VFS_DEBUG(ctx, vp, fmt, args...) do { } while(0) +#endif + +/* + * Initialisation. + */ +extern lck_grp_t *kauth_lck_grp; +__BEGIN_DECLS +extern void kauth_init(void); +extern void kauth_identity_init(void); +extern void kauth_groups_init(void); +extern void kauth_cred_init(void); +extern void kauth_resolver_init(void); +__END_DECLS +#endif + +#endif /* __APPLE_API_EVOLVING */ +#endif /* _SYS_KAUTH_H */ + diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 0f00781f6..28f6456c7 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -75,9 +75,11 @@ __BEGIN_DECLS #define DBG_DRIVERS 6 #define DBG_TRACE 7 #define DBG_DLIL 8 +#define DBG_SECURITY 9 #define DBG_MISC 20 #define DBG_DYLD 31 #define DBG_QT 32 +#define DBG_APPS 33 #define DBG_MIG 255 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ @@ -96,6 +98,7 @@ __BEGIN_DECLS #define DBG_MACH_VM 0x30 /* Virtual Memory */ #define DBG_MACH_SCHED 0x40 /* Scheduler */ #define DBG_MACH_MSGID_INVALID 0x50 /* Messages - invalid */ +#define DBG_MACH_LOCKS 0x60 /* new lock APIs */ /* Codes for Scheduler (DBG_MACH_SCHED) */ #define MACH_SCHED 0x0 /* Scheduler */ @@ -211,8 +214,10 @@ __BEGIN_DECLS #define TRACEDBG_CODE(SubClass,code) KDBG_CODE(DBG_TRACE, SubClass, code) #define MISCDBG_CODE(SubClass,code) KDBG_CODE(DBG_MISC, SubClass, code) #define DLILDBG_CODE(SubClass,code) KDBG_CODE(DBG_DLIL, SubClass, code) +#define SECURITYDBG_CODE(SubClass,code) KDBG_CODE(DBG_SECURITY, SubClass, code) #define DYLDDBG_CODE(SubClass,code) KDBG_CODE(DBG_DYLD, SubClass, code) #define QTDBG_CODE(SubClass,code) KDBG_CODE(DBG_QT, SubClass, code) +#define APPSDBG_CODE(SubClass,code) KDBG_CODE(DBG_APPS, SubClass, code) /* Usage: * kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), @@ -262,6 +267,13 @@ extern void kernel_debug(unsigned int debugid, unsigned int arg1, unsigned int a extern void kernel_debug1(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5); +/* + * LP64todo - for some reason these are problematic + */ +extern void kdbg_trace_data(struct proc *proc, long *arg_pid); + +extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4); + #if KDEBUG #define KERNEL_DEBUG(x,a,b,c,d,e) \ @@ -276,18 +288,21 @@ do { \ kernel_debug1(x,a,b,c,d,e); \ } while(0) +#define __kdebug_only + #else #define KERNEL_DEBUG(x,a,b,c,d,e) #define KERNEL_DEBUG1(x,a,b,c,d,e) +#define __kdebug_only __unused #endif #endif /* __APPLE_API_UNSTABLE */ __END_DECLS -#ifdef KERNEL_PRIVATE +#ifdef PRIVATE #ifdef __APPLE_API_PRIVATE /* * private kernel_debug definitions @@ -303,8 +318,9 @@ unsigned int arg5; /* will hold current thread */ unsigned int debugid; } kd_buf; -#define KDBG_THREAD_MASK 0x7fffffff -#define KDBG_CPU_MASK 0x80000000 +#define KDBG_TIMESTAMP_MASK 0x00ffffffffffffffULL +#define KDBG_CPU_MASK 0x0f00000000000000ULL +#define KDBG_CPU_SHIFT 56 /* Debug Flags */ #define KDBG_INIT 0x1 @@ -315,6 +331,7 @@ unsigned int debugid; #define KDBG_PIDCHECK 0x10 #define KDBG_MAPINIT 0x20 #define KDBG_PIDEXCLUDE 0x40 +#define KDBG_LOCKINIT 0x80 typedef struct { unsigned int type; @@ -393,11 +410,11 @@ typedef struct int npcbufs; int bufsize; int enable; - unsigned long pcsample_beg; - unsigned long pcsample_end; + unsigned int pcsample_beg; + unsigned int pcsample_end; } pcinfo_t; #endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL_PRIVATE */ +#endif /* PRIVATE */ #endif /* !BSD_SYS_KDEBUG_H */ diff --git a/bsd/sys/kern_audit.h b/bsd/sys/kern_audit.h deleted file mode 100644 index 7475e299e..000000000 --- a/bsd/sys/kern_audit.h +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ - -#ifndef _SYS_KERN_AUDIT_H -#define _SYS_KERN_AUDIT_H - -#ifdef KERNEL - -/* - * Audit subsystem condition flags. The audit_enabled flag is set and - * removed automatically as a result of configuring log files, and - * can be observed but should not be directly manipulated. The audit - * suspension flag permits audit to be temporarily disabled without - * reconfiguring the audit target. - */ -extern int audit_enabled; -extern int audit_suspended; - -#define BSM_SUCCESS 0 -#define BSM_FAILURE 1 -#define BSM_NOAUDIT 2 - -/* - * Define the masks for the audited arguments. - */ -#define ARG_EUID 0x0000000000000001ULL -#define ARG_RUID 0x0000000000000002ULL -#define ARG_SUID 0x0000000000000004ULL -#define ARG_EGID 0x0000000000000008ULL -#define ARG_RGID 0x0000000000000010ULL -#define ARG_SGID 0x0000000000000020ULL -#define ARG_PID 0x0000000000000040ULL -#define ARG_UID 0x0000000000000080ULL -#define ARG_AUID 0x0000000000000100ULL -#define ARG_GID 0x0000000000000200ULL -#define ARG_FD 0x0000000000000400ULL -#define UNUSED 0x0000000000000800ULL -#define ARG_FFLAGS 0x0000000000001000ULL -#define ARG_MODE 0x0000000000002000ULL -#define ARG_DEV 0x0000000000004000ULL -#define ARG_ACCMODE 0x0000000000008000ULL -#define ARG_CMODE 0x0000000000010000ULL -#define ARG_MASK 0x0000000000020000ULL -#define ARG_SIGNUM 0x0000000000040000ULL -#define ARG_LOGIN 0x0000000000080000ULL -#define ARG_SADDRINET 0x0000000000100000ULL -#define ARG_SADDRINET6 0x0000000000200000ULL -#define ARG_SADDRUNIX 0x0000000000400000ULL -#define ARG_KPATH1 0x0000000000800000ULL -#define ARG_KPATH2 0x0000000001000000ULL -#define ARG_UPATH1 0x0000000002000000ULL -#define ARG_UPATH2 0x0000000004000000ULL -#define ARG_TEXT 0x0000000008000000ULL -#define ARG_VNODE1 0x0000000010000000ULL -#define ARG_VNODE2 0x0000000020000000ULL -#define ARG_SVIPC_CMD 0x0000000040000000ULL -#define ARG_SVIPC_PERM 0x0000000080000000ULL -#define ARG_SVIPC_ID 0x0000000100000000ULL -#define ARG_SVIPC_ADDR 0x0000000200000000ULL -#define ARG_GROUPSET 0x0000000400000000ULL -#define ARG_CMD 0x0000000800000000ULL -#define ARG_SOCKINFO 0x0000001000000000ULL -#define ARG_NONE 0x0000000000000000ULL -#define ARG_ALL 0xFFFFFFFFFFFFFFFFULL - -struct vnode_au_info { - mode_t vn_mode; - uid_t vn_uid; - gid_t vn_gid; - dev_t vn_dev; - long vn_fsid; - long vn_fileid; - long vn_gen; -}; - -struct groupset { - gid_t gidset[NGROUPS]; - u_int gidset_size; -}; - -struct socket_info { - int sodomain; - int sotype; - int soprotocol; -}; - -struct audit_record { - /* Audit record header. */ - u_int32_t ar_magic; - int ar_event; - int ar_retval; /* value returned to the process */ - int ar_errno; /* return status of system call */ - struct timespec ar_starttime; - struct timespec ar_endtime; - u_int64_t ar_valid_arg; /* Bitmask of valid arguments */ - - /* Audit subject information. */ - struct xucred ar_subj_cred; - uid_t ar_subj_ruid; - gid_t ar_subj_rgid; - gid_t ar_subj_egid; - uid_t ar_subj_auid; /* Audit user ID */ - pid_t ar_subj_asid; /* Audit session ID */ - pid_t ar_subj_pid; - struct au_tid ar_subj_term; - char ar_subj_comm[MAXCOMLEN + 1]; - struct au_mask ar_subj_amask; - - /* Operation arguments. */ - uid_t ar_arg_euid; - uid_t ar_arg_ruid; - uid_t ar_arg_suid; - gid_t ar_arg_egid; - gid_t ar_arg_rgid; - gid_t ar_arg_sgid; - pid_t ar_arg_pid; - uid_t ar_arg_uid; - uid_t ar_arg_auid; - gid_t ar_arg_gid; - struct groupset ar_arg_groups; - int ar_arg_fd; - int ar_arg_fflags; - mode_t ar_arg_mode; - int ar_arg_dev; - int ar_arg_accmode; - int ar_arg_cmode; - int ar_arg_mask; - u_int ar_arg_signum; - char ar_arg_login[MAXLOGNAME]; - struct sockaddr ar_arg_sockaddr; - struct socket_info ar_arg_sockinfo; - char *ar_arg_upath1; - char *ar_arg_upath2; - char *ar_arg_kpath1; - char *ar_arg_kpath2; - char *ar_arg_text; - struct au_mask ar_arg_amask; - struct vnode_au_info ar_arg_vnode1; - struct vnode_au_info ar_arg_vnode2; - int ar_arg_cmd; - int ar_arg_svipc_cmd; - struct ipc_perm ar_arg_svipc_perm; - int ar_arg_svipc_id; - void * ar_arg_svipc_addr; -}; - -/* - * In-kernel version of audit record; the basic record plus queue meta-data. - * This record can also have a pointer set to some opaque data that will - * be passed through to the audit writing mechanism. - */ -struct kaudit_record { - struct audit_record k_ar; - caddr_t k_udata; /* user data */ - u_int k_ulen; /* user data length */ - struct uthread *k_uthread; /* thread we are auditing */ - TAILQ_ENTRY(kaudit_record) k_q; -}; - -struct proc; -struct vnode; -struct componentname; - -void audit_abort(struct kaudit_record *ar); -void audit_commit(struct kaudit_record *ar, int error, - int retval); -void audit_init(void); -void audit_shutdown(void); - -struct kaudit_record *audit_new(int event, struct proc *p, - struct uthread *uthread); - -void audit_syscall_enter(unsigned short code, struct proc *proc, struct uthread *uthread); -void audit_syscall_exit(int error, struct proc *proc, - struct uthread *uthread); - -int kaudit_to_bsm(struct kaudit_record *kar, - struct au_record **pau); - -int bsm_rec_verify(caddr_t rec); - -/* - * Kernel versions of the BSM audit record functions. - */ -struct au_record *kau_open(void); -int kau_write(struct au_record *rec, token_t *m); -int kau_close(struct au_record *rec, - struct timespec *endtime, short event); -void kau_free(struct au_record *rec); -void kau_init(void); -token_t *kau_to_file(char *file, struct timeval *tv); -token_t *kau_to_header(struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header32(struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header64(struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -/* - * The remaining kernel functions are conditionally compiled in as they - * are wrapped by a macro, and the macro should be the only place in - * the source tree where these functions are referenced. - */ -#ifdef AUDIT -void audit_arg_accmode(int mode); -void audit_arg_cmode(int cmode); -void audit_arg_fd(int fd); -void audit_arg_fflags(int fflags); -void audit_arg_gid(gid_t gid, gid_t egid, gid_t rgid, - gid_t sgid); -void audit_arg_uid(uid_t uid, uid_t euid, uid_t ruid, - uid_t suid); -void audit_arg_groupset(gid_t *gidset, u_int gidset_size); -void audit_arg_login(char[MAXLOGNAME]); -void audit_arg_mask(int mask); -void audit_arg_mode(mode_t mode); -void audit_arg_dev(int dev); -void audit_arg_owner(uid_t uid, gid_t gid); -void audit_arg_pid(pid_t pid); -void audit_arg_signum(u_int signum); -void audit_arg_socket(int sodomain, int sotype, - int soprotocol); -void audit_arg_sockaddr(struct proc *p, - struct sockaddr *so); -void audit_arg_auid(uid_t auid); -void audit_arg_upath(struct proc *p, char *upath, - u_int64_t flags); -void audit_arg_vnpath(struct vnode *vp, u_int64_t flags); -void audit_arg_text(char *text); -void audit_arg_cmd(int cmd); -void audit_arg_svipc_cmd(int cmd); -void audit_arg_svipc_perm(struct ipc_perm *perm); -void audit_arg_svipc_id(int id); -void audit_arg_svipc_addr(void *addr); - -void audit_proc_init(struct proc *p); -void audit_proc_fork(struct proc *parent, - struct proc *child); -void audit_proc_free(struct proc *p); - -/* - * Define a macro to wrap the audit_arg_* calls by checking the global - * audit_enabled flag before performing the actual call. - */ -#define AUDIT_ARG(op, args...) do { \ - if (audit_enabled) \ - audit_arg_ ## op (args); \ - } while (0) - -#define AUDIT_CMD(audit_cmd) do { \ - if (audit_enabled) { \ - audit_cmd; \ - } \ - } while (0) - -#else /* !AUDIT */ -#define AUDIT_ARG(op, args...) do { \ - } while (0) - -#define AUDIT_CMD(audit_cmd) do { \ - } while (0) - -#endif /* AUDIT */ - -#endif /* KERNEL */ - -#endif /* !_SYS_KERN_AUDIT_H */ diff --git a/bsd/sys/kern_control.h b/bsd/sys/kern_control.h index 04b37c06a..f032e79a2 100644 --- a/bsd/sys/kern_control.h +++ b/bsd/sys/kern_control.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,220 +19,409 @@ * * @APPLE_LICENSE_HEADER_END@ */ +/*! + @header kern_control.h + This header defines an API to communicate between a kernel + extension and a process outside of the kernel. + */ +#ifndef KPI_KERN_CONTROL_H +#define KPI_KERN_CONTROL_H -#ifndef SYS_KERN_CONTROL_H -#define SYS_KERN_CONTROL_H #include -#ifdef __APPLE_API_UNSTABLE /* * Define Controller event subclass, and associated events. + * Subclass of KEV_SYSTEM_CLASS */ -/* Subclass of KEV_SYSTEM_CLASS */ -#define KEV_CTL_SUBCLASS 1 +/*! + @defined KEV_CTL_SUBCLASS + @discussion The kernel event subclass for kernel control events. +*/ +#define KEV_CTL_SUBCLASS 2 + +/*! + @defined KEV_CTL_REGISTERED + @discussion The event code indicating a new controller was + registered. The data portion will contain a ctl_event_data. +*/ +#define KEV_CTL_REGISTERED 1 /* a new controller appears */ -#define KEV_CTL_REGISTERED 1 /* a new controller appears */ -#define KEV_CTL_DEREGISTERED 2 /* a controller disappears */ +/*! + @defined KEV_CTL_DEREGISTERED + @discussion The event code indicating a controller was unregistered. + The data portion will contain a ctl_event_data. +*/ +#define KEV_CTL_DEREGISTERED 2 /* a controller disappears */ -/* All KEV_CTL_SUBCLASS events share the same header */ +/*! + @struct ctl_event_data + @discussion This structure is used for KEV_CTL_SUBCLASS kernel + events. + @field ctl_id The kernel control id. + @field ctl_unit The kernel control unit. +*/ struct ctl_event_data { - u_int32_t ctl_id; + u_int32_t ctl_id; /* Kernel Controller ID */ u_int32_t ctl_unit; }; - /* * Controls destined to the Controller Manager. */ -#define CTLIOCGCOUNT _IOR('N', 1, int) /* get number of control structures registered */ +/*! + @defined CTLIOCGCOUNT + @discussion The CTLIOCGCOUNT ioctl can be used to determine the + number of kernel controllers registered. +*/ +#define CTLIOCGCOUNT _IOR('N', 2, int) /* get number of control structures registered */ + +/*! + @defined CTLIOCGINFO + @discussion The CTLIOCGINFO ioctl can be used to convert a kernel + control name to a kernel control id. +*/ +#define CTLIOCGINFO _IOWR('N', 3, struct ctl_info) /* get id from name */ + + +/*! + @defined MAX_KCTL_NAME + @discussion Kernel control names must be no longer than + MAX_KCTL_NAME. +*/ +#define MAX_KCTL_NAME 96 /* - * Controller address structure - * used to establish contact between user client and kernel controller - * sc_id/sc_unit uniquely identify each controller - * sc_id is a 32-bit "signature" obtained by developers from Apple Computer - * sc_unit is a unit number for this sc_id, and is privately used - * by the developper to identify several instances to control + * Controls destined to the Controller Manager. */ -struct sockaddr_ctl -{ - u_char sc_len; /* sizeof(struct sockaddr_ctl) */ - u_char sc_family; /* AF_SYSTEM */ - u_int16_t ss_sysaddr; /* AF_SYS_CONTROL */ - u_int32_t sc_id; /* 32-bit "signature" managed by Apple */ +/*! + @struct ctl_info + @discussion This structure is used with the CTLIOCGINFO ioctl to + translate from a kernel control name to a control id. + @field ctl_id The kernel control id, filled out upon return. + @field ctl_name The kernel control name to find. +*/ +struct ctl_info { + u_int32_t ctl_id; /* Kernel Controller ID */ + char ctl_name[MAX_KCTL_NAME]; /* Kernel Controller Name (a C string) */ +}; + + +/*! + @struct sockaddr_ctl + @discussion The controller address structure is used to establish + contact between a user client and a kernel controller. The + sc_id/sc_unit uniquely identify each controller. sc_id is a + unique identifier assigned to the controller. The identifier can + be assigned by the system at registration time or be a 32-bit + creator code obtained from Apple Computer. sc_unit is a unit + number for this sc_id, and is privately used by the kernel + controller to identify several instances of the controller. + @field sc_len The length of the structure. + @field sc_family AF_SYSTEM. + @field ss_sysaddr AF_SYS_KERNCONTROL. + @field sc_id Controller unique identifier. + @field sc_unit Kernel controller private unit number. + @field sc_reserved Reserved, must be set to zero. +*/ +struct sockaddr_ctl { + u_char sc_len; /* depends on size of bundle ID string */ + u_char sc_family; /* AF_SYSTEM */ + u_int16_t ss_sysaddr; /* AF_SYS_KERNCONTROL */ + u_int32_t sc_id; /* Controller unique identifier */ u_int32_t sc_unit; /* Developer private unit number */ u_int32_t sc_reserved[5]; }; -#endif /* __APPLE_API_UNSTABLE */ #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE -/* Reference to a controller object */ +#include + +/*! + @typedef kern_ctl_ref + @discussion A control reference is used to track an attached kernel + control. Registering a kernel control will create a kernel + control reference. This reference is required for sending data + or removing the kernel control. This reference will be passed to + callbacks for that kernel control. +*/ typedef void * kern_ctl_ref; -/* Support flags for controllers */ -#define CTL_FLAG_PRIVILEGED 0x1 /* user must be root to contact controller */ +/*! + @defined CTL_FLAG_PRIVILEGED + @discussion The CTL_FLAG_PRIVILEGED flag is passed in ctl_flags. If + this flag is set, only privileged processes may attach to this + kernel control. +*/ +#define CTL_FLAG_PRIVILEGED 0x1 +/*! + @defined CTL_FLAG_REG_ID_UNIT + @discussion The CTL_FLAG_REG_ID_UNIT flag is passed to indicate that + the ctl_id specified should be used. If this flag is not + present, a unique ctl_id will be dynamically assigned to your + kernel control. The CTLIOCGINFO ioctl can be used by the client + to find the dynamically assigned id based on the control name + specified in ctl_name. +*/ +#define CTL_FLAG_REG_ID_UNIT 0x2 +/*! + @defined CTL_FLAG_REG_SOCK_STREAM + @discussion Use the CTL_FLAG_REG_SOCK_STREAM flag when client need to open + socket of type SOCK_STREAM to communicate with the kernel control. + By default kernel control sockets are of type SOCK_DGRAM. +*/ +#define CTL_FLAG_REG_SOCK_STREAM 0x4 /* Data flags for controllers */ -#define CTL_DATA_NOWAKEUP 0x1 /* don't wake up client yet */ +/*! + @defined CTL_DATA_NOWAKEUP + @discussion The CTL_DATA_NOWAKEUP flag can be used for the enqueue + data and enqueue mbuf functions to indicate that the process + should not be woken up yet. This is useful when you want to + enqueue data using more than one call but only want to wake up + the client after all of the data has been enqueued. +*/ +#define CTL_DATA_NOWAKEUP 0x1 +/*! + @defined CTL_DATA_EOR + @discussion The CTL_DATA_NOWAKEUP flag can be used for the enqueue + data and enqueue mbuf functions to mark the end of a record. +*/ +#define CTL_DATA_EOR 0x2 +/*! + @typedef ctl_connect_func + @discussion The ctl_connect_func is used to receive + notification of a client connecting to the kernel control. + @param kctlref The control ref for the kernel control the client is + connecting to. + @param sac The address used to connect to this control. The field sc_unit + contains the unit number of the kernel control instance the client is + connecting to. If CTL_FLAG_REG_ID_UNIT was set when the kernel control + was registered, sc_unit is the ctl_unit of the kern_ctl_reg structure. + If CTL_FLAG_REG_ID_UNIT was not set when the kernel control was + registered, sc_unit is the dynamically allocated unit number of + the new kernel control instance that is used for this connection. + @param unitinfo A place for the kernel control to store a pointer to + per-connection data. + */ +typedef errno_t (*ctl_connect_func)(kern_ctl_ref kctlref, + struct sockaddr_ctl *sac, + void **unitinfo); -/* - * Controller registration structure, given at registration time +/*! + @typedef ctl_disconnect_func + @discussion The ctl_disconnect_func is used to receive notification + that a client has disconnected from the kernel control. This + usually happens when the socket is closed. If this is the last + socket attached to your kernel control, you may unregister your + kernel control from this callback. + @param kctlref The control ref for the kernel control instance the client has + disconnected from. + @param unit The unit number of the kernel control instance the client has + disconnected from. + @param unitinfo The unitinfo value specified by the connect function + when the client connected. */ +typedef errno_t (*ctl_disconnect_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo); + +/*! + @typedef ctl_send_func + @discussion The ctl_send_func is used to receive data sent from + the client to the kernel control. + @param kctlref The control ref of the kernel control. + @param unit The unit number of the kernel control instance the client has + connected to. + @param unitinfo The unitinfo value specified by the connect function + when the client connected. + @param m The data sent by the client to the kernel control in an + mbuf chain. + @param flags The flags specified by the client when calling + send/sendto/sendmsg (MSG_OOB/MSG_DONTROUTE). + */ +typedef errno_t (*ctl_send_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + mbuf_t m, int flags); + +/*! + @typedef ctl_setopt_func + @discussion The ctl_setopt_func is used to handle set socket option + calls for the SYSPROTO_CONTROL option level. + @param kctlref The control ref of the kernel control. + @param unit The unit number of the kernel control instance. + @param unitinfo The unitinfo value specified by the connect function + when the client connected. + @param opt The socket option. + @param data A pointer to the socket option data. The data has + already been copied in to the kernel for you. + @param len The length of the socket option data. + */ +typedef errno_t (*ctl_setopt_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t len); + +/*! + @typedef ctl_getopt_func + @discussion The ctl_getopt_func is used to handle client get socket + option requests for the SYSPROTO_CONTROL option level. A buffer + is allocated for storage and passed to your function. The length + of that buffer is also passed. Upon return, you should set *len + to length of the buffer used. In some cases, data may be NULL. + When this happens, *len should be set to the length you would + have returned had data not been NULL. If the buffer is too small, + return an error. + @param kctlref The control ref of the kernel control. + @param unit The unit number of the kernel control instance. + @param unitinfo The unitinfo value specified by the connect function + when the client connected. + @param opt The socket option. + @param data A buffer to copy the results in to. May be NULL, see + discussion. + @param len A pointer to the length of the buffer. This should be set + to the length of the buffer used before returning. + */ +typedef errno_t (*ctl_getopt_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t *len); + +/*! + @struct kern_ctl_reg + @discussion This structure defines the properties of a kernel + control being registered. + @field ctl_name A Bundle ID string of up to MAX_KCTL_NAME bytes (including the ending zero). + This string should not be empty. + @field ctl_id The control ID may be dynamically assigned or it can be a + 32-bit creator code assigned by DTS. + For a DTS assigned creator code the CTL_FLAG_REG_ID_UNIT flag must be set. + For a dynamically assigned control ID, do not set the CTL_FLAG_REG_ID_UNIT flag. + The value of the dynamically assigned control ID is set to this field + when the registration succeeds. + @field ctl_unit A separate unit number to register multiple units that + share the same control ID with DTS assigned creator code when + the CTL_FLAG_REG_ID_UNIT flag is set. + This field is ignored for a dynamically assigned control ID. + @field ctl_flags CTL_FLAG_PRIVILEGED and/or CTL_FLAG_REG_ID_UNIT. + @field ctl_sendsize Override the default send size. If set to zero, + the default send size will be used, and this default value + is set to this field to be retrieved by the caller. + @field ctl_recvsize Override the default receive size. If set to + zero, the default receive size will be used, and this default value + is set to this field to be retrieved by the caller. + @field ctl_connect Specify the function to be called whenever a client + connects to the kernel control. This field must be specified. + @field ctl_disconnect Specify a function to be called whenever a + client disconnects from the kernel control. + @field ctl_send Specify a function to handle data send from the + client to the kernel control. + @field ctl_setopt Specify a function to handle set socket option + operations for the kernel control. + @field ctl_getopt Specify a function to handle get socket option + operations for the kernel control. +*/ struct kern_ctl_reg { - /* control information */ - u_int32_t ctl_id; /* unique id of the controller, provided by DTS */ - u_int32_t ctl_unit; /* unit number for the controller, for the specified id */ - /* a controller can be registered several times with the same id */ - /* but must have a different unit number */ - + /* control information */ + char ctl_name[MAX_KCTL_NAME]; + u_int32_t ctl_id; + u_int32_t ctl_unit; + /* control settings */ - u_int32_t ctl_flags; /* support flags */ - u_int32_t ctl_sendsize; /* override send/receive buffer size */ - u_int32_t ctl_recvsize; /* 0 = use default values */ + u_int32_t ctl_flags; + u_int32_t ctl_sendsize; + u_int32_t ctl_recvsize; /* Dispatch functions */ - - int (*ctl_connect) - (kern_ctl_ref ctlref, void *userdata); - /* Make contact, called when user client calls connect */ - /* the socket with the id/unit of the controller */ - - void (*ctl_disconnect) - (kern_ctl_ref ctlref, void *userdata); - /* Break contact, called when user client */ - /* closes the control socket */ - - int (*ctl_write) - (kern_ctl_ref ctlref, void *userdata, struct mbuf *m); - /* Send data to the controller, called when user client */ - /* writes data to the socket */ - - int (*ctl_set) - (kern_ctl_ref ctlref, void *userdata, int opt, void *data, size_t len); - /* set controller configuration, called when user client */ - /* calls setsockopt() for the socket */ - /* opt is the option number */ - /* data points to the data, already copied in kernel space */ - /* len is the lenght of the data buffer */ - - int (*ctl_get) - (kern_ctl_ref ctlref, void *userdata, int opt, void *data, size_t *len); - /* get controller configuration, called when user client */ - /* calls getsockopt() for the socket */ - /* opt is the option number */ - /* data points to the data buffer of max lenght len */ - /* the controller can directly copy data in the buffer space */ - /* and does not need to worry about copying out the data */ - /* as long as it respects the max buffer lenght */ - /* on input, len contains the maximum buffer length */ - /* on output, len contains the actual buffer lenght */ - /* if data is NULL on input, then, by convention, the controller */ - /* should return in len the lenght of the data it would like */ - /* to return in the subsequent call for that option */ - - /* prepare the future */ - u_int32_t ctl_reserved[4]; /* for future use if needed */ + ctl_connect_func ctl_connect; + ctl_disconnect_func ctl_disconnect; + ctl_send_func ctl_send; + ctl_setopt_func ctl_setopt; + ctl_getopt_func ctl_getopt; }; +/*! + @function ctl_register + @discussion Register a kernel control. This will enable clients to + connect to the kernel control using a PF_SYSTEM socket. + @param userkctl A structure defining the kernel control to be + attached. The ctl_connect callback must be specified, the other callbacks + are optional. If ctl_connect is set to zero, ctl_register fails with + the error code EINVAL. + @param kctlref Upon successful return, the kctlref will contain a + reference to the attached kernel control. This reference is used + to unregister the kernel control. This reference will also be + passed in to the callbacks each time they are called. + @result 0 - Kernel control was registered. + EINVAL - The registration structure was not valid. + ENOMEM - There was insufficient memory. + EEXIST - A controller with that id/unit is already registered. + */ +errno_t +ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref); -/* - * FUNCTION : - * Register the controller to the controller manager - * For example, can be called from a Kernel Extension Start routine - * - * PARAMETERS : - * userctl : Registration structure containing control information - * and callback functions for the controller. - * Callbacks are optional and can be null. - * A controller with all callbacks set to null would not be very useful. - * userdata : This parameter is for use by the controller and - * will be passed to every callback function - * - * RETURN CODE : - * 0 : No error - * ctlref will be filled with a control reference, - * to use in subsequent call to the controller manager - * EINVAL : Invalid registration structure - * ENOMEM : Not enough memory available to register the controller - * EEXIST : Controller id/unit already registered +/*! + @function ctl_deregister + @discussion Unregister a kernel control. A kernel extension must + unregister it's kernel control(s) before unloading. If a kernel + control has clients attached, this call will fail. + @param kctlref The control reference of the control to unregister. + @result 0 - Kernel control was unregistered. + EINVAL - The kernel control reference was invalid. + EBUSY - The kernel control has clients still attached. */ - -int -ctl_register(struct kern_ctl_reg *userctl, void *userdata, kern_ctl_ref *ctlref); +errno_t +ctl_deregister(kern_ctl_ref kctlref); -/* - * FUNCTION : - * Deregister the controller - * For example, can be called from a Kernel Extension Stop routine - * - * PARAMETERS : - * ctlref : Reference to the controller previously registered - * - * RETURN CODE : - * 0 : No error, - * The controller manager no longer knows about the controller - * EINVAL : Invalid reference +/*! + @function ctl_enqueuedata + @discussion Send data from the kernel control to the client. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param data A pointer to the data to send. + @param len The length of data to send. + @param flags Send flags. CTL_DATA_NOWAKEUP is currently the only + supported flag. + @result 0 - Data was enqueued to be read by the client. + EINVAL - Invalid parameters. + EMSGSIZE - The buffer is too large. + ENOBUFS - The queue is full or there are no free mbufs. */ - -int -ctl_deregister(kern_ctl_ref ctlref); +errno_t +ctl_enqueuedata(kern_ctl_ref kctlref, u_int32_t unit, void *data, size_t len, u_int32_t flags); -/* - * FUNCTION : - * Send data to the application in contact with the controller - * ctl_enqueuedata will allocate a mbuf, copy data and enqueue it. - * - * PARAMETERS : - * ctlref : Reference to the controller previously registered - * data : Data to send - * len : Length of the data (maximum lenght of MCLBYTES) - * flags : Flags used when enqueing - * CTL_DATA_NOWAKEUP = just enqueue, don't wake up client - * - * RETURN CODE : - * 0 : No error - * EINVAL: Invalid reference - * EMSGSIZE: The buffer is too large - * ENOTCONN : No user client is connected - * ENOBUFS : Socket buffer is full, or can't get a new mbuf - * The controller should re-enqueue later +/*! + @function ctl_enqueuembuf + @discussion Send data stored in an mbuf chain from the kernel + control to the client. The caller is responsible for freeing + the mbuf chain if ctl_enqueuembuf returns an error. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param m An mbuf chain containing the data to send to the client. + @param flags Send flags. CTL_DATA_NOWAKEUP is currently the only + supported flag. + @result 0 - Data was enqueued to be read by the client. + EINVAL - Invalid parameters. + ENOBUFS - The queue is full. */ - -int -ctl_enqueuedata(kern_ctl_ref ctlref, void *data, size_t len, u_int32_t flags); +errno_t +ctl_enqueuembuf(kern_ctl_ref kctlref, u_int32_t unit, mbuf_t m, u_int32_t flags); -/* - * FUNCTION : - * Send data to the application in contact with the controller - * - * PARAMETERS : - * ctlref : Reference to the controller previously registered - * m : mbuf containing the data to send - * flags : Flags used when enqueing - * CTL_DATA_NOWAKEUP = just enqueue, don't wake up client - * - * RETURN CODE : - * 0 : No error - * EINVAL: Invalid reference - * ENOTCONN : No user client is connected - * ENOBUFS : Socket buffer is full, - * The controller should either free the mbuf or re-enqueue later + +/*! + @function ctl_getenqueuespace + @discussion Retrieve the amount of space currently available for data to be sent + from the kernel control to the client. + @param kctlref The control reference of the kernel control. + @param unit The unit number of the kernel control instance. + @param space The address where to return the current space available + @result 0 - Data was enqueued to be read by the client. + EINVAL - Invalid parameters. */ - -int -ctl_enqueuembuf(kern_ctl_ref ctlref, struct mbuf *m, u_int32_t flags); +errno_t +ctl_getenqueuespace(kern_ctl_ref kctlref, u_int32_t unit, size_t *space); + -#endif /* __APPLE_API_UNSTABLE */ #endif /* KERNEL */ -#endif /* SYS_KERN_CONTROL_H */ +#endif /* KPI_KERN_CONTROL_H */ diff --git a/bsd/sys/kern_event.h b/bsd/sys/kern_event.h index 5a232c02a..ca40cc713 100644 --- a/bsd/sys/kern_event.h +++ b/bsd/sys/kern_event.h @@ -20,7 +20,11 @@ * @APPLE_LICENSE_HEADER_END@ */ /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ - +/*! + @header kern_event.h + This header defines in-kernel functions for generating kernel events as well + as functions for receiving kernel events using a kernel event socket. + */ #ifndef SYS_KERN_EVENT_H #define SYS_KERN_EVENT_H @@ -41,54 +45,184 @@ * Vendor Code */ +/*! + @defined KEV_VENDOR_APPLE + @discussion Apple generated kernel events use the hard coded vendor code + value of 1. Third party kernel events use a dynamically allocated vendor + code. The vendor code can be found using the SIOCGKEVVENDOR ioctl. +*/ #define KEV_VENDOR_APPLE 1 /* - * Definition of top-level classifications + * Definition of top-level classifications for KEV_VENDOR_APPLE */ -#define KEV_NETWORK_CLASS 1 -#define KEV_IOKIT_CLASS 2 -#define KEV_SYSTEM_CLASS 3 +/*! + @defined KEV_NETWORK_CLASS + @discussion Network kernel event class. +*/ +#define KEV_NETWORK_CLASS 1 + +/*! + @defined KEV_IOKIT_CLASS + @discussion IOKit kernel event class. +*/ +#define KEV_IOKIT_CLASS 2 + +/*! + @defined KEV_IOKIT_CLASS + @discussion System kernel event class. +*/ +#define KEV_SYSTEM_CLASS 3 +/*! + @defined KEV_APPLESHARE_CLASS + @discussion AppleShare kernel event class. +*/ +#define KEV_APPLESHARE_CLASS 4 +/*! + @struct kern_event_msg + @discussion This structure is prepended to all kernel events. This structure + is used to determine the format of the remainder of the kernel event. + This structure will appear on all messages received on a kernel event + socket. To post a kernel event, a slightly different structure is used. + @field total_size Total size of the kernel event message including the + header. + @field vendor_code The vendor code indicates which vendor generated the + kernel event. This gives every vendor a unique set of classes and + subclasses to use. Use the SIOCGKEVVENDOR ioctl to look up vendor codes + for vendors other than Apple. Apple uses KEV_VENDOR_APPLE. + @field kev_class The class of the kernel event. + @field kev_subclass The subclass of the kernel event. + @field id Monotonically increasing value. + @field event_code The event code. + @field event_data Any additional data about this event. Format will depend + on the vendor_code, kev_class, kev_subclass, and event_code. The length + of the event_data can be determined using total_size - + KEV_MSG_HEADER_SIZE. +*/ struct kern_event_msg { - u_long total_size; /* Size of entire event msg */ - u_long vendor_code; /* For non-Apple extensibility */ - u_long kev_class; /* Layer of event source */ - u_long kev_subclass; /* Component within layer */ - u_long id; /* Monotonically increasing value */ - u_long event_code; /* unique code */ - u_long event_data[1]; /* One or more data longwords */ + u_long total_size; /* Size of entire event msg */ + u_long vendor_code; /* For non-Apple extensibility */ + u_long kev_class; /* Layer of event source */ + u_long kev_subclass; /* Component within layer */ + u_long id; /* Monotonically increasing value */ + u_long event_code; /* unique code */ + u_long event_data[1]; /* One or more data longwords */ }; -#define KEV_MSG_HEADER_SIZE (6 * sizeof(u_long)) - +/*! + @defined KEV_MSG_HEADER_SIZE + @discussion Size of the header portion of the kern_event_msg structure. This + accounts for everything right up to event_data. The size of the data can + be found by subtracting KEV_MSG_HEADER_SIZE from the total size from the + kern_event_msg. +*/ +#define KEV_MSG_HEADER_SIZE (offsetof(struct kern_event_msg, event_data[0])) +/*! + @struct kev_request + @discussion This structure is used with the SIOCSKEVFILT and SIOCGKEVFILT to + set and get the control filter setting for a kernel control socket. + @field total_size Total size of the kernel event message including the + header. + @field vendor_code All kernel events that don't match this vendor code will + be ignored. KEV_ANY_VENDOR can be used to receive kernel events with any + vendor code. + @field kev_class All kernel events that don't match this class will be + ignored. KEV_ANY_CLASS can be used to receive kernel events with any + class. + @field kev_subclass All kernel events that don't match this subclass will be + ignored. KEV_ANY_SUBCLASS can be used to receive kernel events with any + subclass. +*/ struct kev_request { u_long vendor_code; u_long kev_class; u_long kev_subclass; }; +/*! + @defined KEV_VENDOR_CODE_MAX_STR_LEN + @discussion This define sets the maximum length of a string that can be used + to identify a vendor or kext when looking up a vendor code. +*/ +#define KEV_VENDOR_CODE_MAX_STR_LEN 200 + +/*! + @struct kev_vendor_code + @discussion This structure is used with the SIOCGKEVVENDOR ioctl to convert + from a string identifying a kext or vendor, in the form of a bundle + identifier, to a vendor code. + @field vendor_code After making the SIOCGKEVVENDOR ioctl call, this will + be filled in with the vendor code if there is one. + @field vendor_string A bundle style identifier. +*/ +struct kev_vendor_code { + u_long vendor_code; + char vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN]; +}; + + +/*! + @defined SIOCGKEVID + @discussion Retrieve the current event id. Each event generated will have + a new idea. The next event to be generated will have an id of id+1. +*/ #define SIOCGKEVID _IOR('e', 1, u_long) + +/*! + @defined SIOCSKEVFILT + @discussion Set the kernel event filter for this socket. Kernel events not + matching this filter will not be received on this socket. +*/ #define SIOCSKEVFILT _IOW('e', 2, struct kev_request) + +/*! + @defined SIOCGKEVFILT + @discussion Retrieve the kernel event filter for this socket. Kernel events + not matching this filter will not be received on this socket. +*/ #define SIOCGKEVFILT _IOR('e', 3, struct kev_request) -#ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE +/*! + @defined SIOCGKEVVENDOR + @discussion Lookup the vendor code for the specified vendor. ENOENT will be + returned if a vendor code for that vendor string does not exist. +*/ +#define SIOCGKEVVENDOR _IOWR('e', 4, struct kev_vendor_code) +#ifdef KERNEL +/*! + @define N_KEV_VECTORS + @discussion The maximum number of kev_d_vectors for a kernel event. +*/ #define N_KEV_VECTORS 5 +/*! + @struct kev_d_vectors + @discussion This structure is used to append some data to a kernel event. + @field data_length The length of data. + @field data_ptr A pointer to data. +*/ struct kev_d_vectors { - u_long data_length; /* Length of the event data */ void *data_ptr; /* Pointer to event data */ -}; - +}; +/*! + @struct kev_d_vectors + @discussion This structure is used when posting a kernel event. + @field vendor_code The vendor code assigned by kev_vendor_code_find. + @field kev_class The event's class. + @field kev_class The event's subclass. + @field kev_class The event's code. + @field dv An array of vectors describing additional data to be appended to + the kernel event. +*/ struct kev_msg { u_long vendor_code; /* For non-Apple extensibility */ u_long kev_class; /* Layer of event source */ @@ -97,10 +231,38 @@ struct kev_msg { struct kev_d_vectors dv[N_KEV_VECTORS]; /* Up to n data vectors */ }; -int kev_post_msg(struct kev_msg *event); +/*! + @function kev_vendor_code_find + @discussion Lookup a vendor_code given a unique string. If the vendor code + has not been used since launch, a unique integer will be assigned for + that string. Vendor codes will remain the same until the machine is + rebooted. + @param vendor_string A bundle style vendor identifier (i.e. com.apple). + @param vender_code Upon return, a unique vendor code for use when posting + kernel events. + @result May return ENOMEM if memory constraints prevent allocation of a new + vendor code. + */ +errno_t kev_vendor_code_find(const char *vendor_string, u_long *vender_code); + +/*! + @function kev_msg_post + @discussion Post a kernel event message. + @param event_msg A structure defining the kernel event message to post. + @result Will return zero upon success. May return a number of errors + depending on the type of failure. EINVAL indicates that there was + something wrong with the kerne event. The vendor code of the kernel + event must be assigned using kev_vendor_code_find. If the message is + too large, EMSGSIZE will be returned. + */ +errno_t kev_msg_post(struct kev_msg *event_msg); -#endif /* ___APPLE_API_UNSTABLE */ -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE +/* + * Internal version of kev_post_msg. Allows posting Apple vendor code kernel + * events. + */ +int kev_post_msg(struct kev_msg *event); LIST_HEAD(kern_event_head, kern_event_pcb); @@ -114,7 +276,7 @@ struct kern_event_pcb { #define sotoevpcb(so) ((struct kern_event_pcb *)((so)->so_pcb)) -#endif /* __APPLE_API_PRIVATE */ -#endif -#endif +#endif /* PRIVATE */ +#endif /* KERNEL */ +#endif /* SYS_KERN_EVENT_H */ diff --git a/bsd/sys/kernel.h b/bsd/sys/kernel.h index 420f56e2d..c7a2c0b77 100644 --- a/bsd/sys/kernel.h +++ b/bsd/sys/kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,7 +68,7 @@ #include -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* Global variables for the kernel. */ /* 1.1 */ @@ -79,19 +79,20 @@ extern char domainname[MAXHOSTNAMELEN]; extern int domainnamelen; /* 1.2 */ -extern struct timeval boottime; -#ifdef __APPLE_API_OBSOLETE -extern volatile struct timeval time; -extern struct timezone tz; /* XXX */ +extern int stathz; /* statistics clock's frequency */ +extern int profhz; /* profiling clock's frequency */ +#endif /* BSD_KERNEL_PRIVATE */ + + +#ifdef KERNEL_PRIVATE extern int lbolt; /* once a second sleep address */ + +extern struct timezone tz; /* XXX */ + extern int tick; /* usec per tick (1000000 / hz) */ extern int hz; /* system clock's frequency */ -extern int stathz; /* statistics clock's frequency */ -extern int profhz; /* profiling clock's frequency */ -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/kernel_types.h b/bsd/sys/kernel_types.h new file mode 100644 index 000000000..89eb0ed0b --- /dev/null +++ b/bsd/sys/kernel_types.h @@ -0,0 +1,127 @@ +#ifndef _KERN_SYS_KERNELTYPES_H_ +#define _KERN_SYS_KERNELTYPES_H_ + +#include +#include +#include + +#ifdef BSD_BUILD +/* Macros(?) to clear/set/test flags. */ +#define SET(t, f) (t) |= (f) +#define CLR(t, f) (t) &= ~(f) +#define ISSET(t, f) ((t) & (f)) +#endif + + +typedef int errno_t; +typedef int64_t daddr64_t; + +typedef int64_t ino64_t; + +#ifndef BSD_BUILD +struct buf; +typedef struct buf * buf_t; + +struct file; +typedef struct file * file_t; + +struct ucred; +typedef struct ucred * ucred_t; + +struct mount; +typedef struct mount * mount_t; + +#ifdef TBDDDD +typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ +#endif TBDDDD + +struct vnode; +typedef struct vnode * vnode_t; + +struct proc; +typedef struct proc * proc_t; + +struct uio; +typedef struct uio * uio_t; + +struct vfs_context; +typedef struct vfs_context * vfs_context_t; + +struct vfstable; +typedef struct vfstable * vfstable_t; + +struct __ifnet; +struct __mbuf; +struct __pkthdr; +struct __socket; +struct __sockopt; +struct __ifaddr; +struct __ifmultiaddr; +struct __ifnet_filter; +struct __rtentry; + +typedef struct __ifnet* ifnet_t; +typedef struct __mbuf* mbuf_t; +typedef struct __pkthdr* pkthdr_t; +typedef struct __socket* socket_t; +typedef struct __sockopt* sockopt_t; +typedef struct __ifaddr* ifaddr_t; +typedef struct __ifmultiaddr* ifmultiaddr_t; +typedef struct __ifnet_filter* interface_filter_t; +typedef struct __rtentry* route_t; + +#else /* BSD_BUILD */ + +typedef struct buf * buf_t; +typedef struct file * file_t; +typedef struct ucred * ucred_t; +typedef struct mount * mount_t; +typedef struct vnode * vnode_t; +typedef struct proc * proc_t; +typedef struct uio * uio_t; +typedef struct user_iovec * user_iovec_t; +typedef struct vfs_context * vfs_context_t; +typedef struct vfstable * vfstable_t; + +#if KERNEL_PRIVATE +typedef struct kern_iovec * kern_iovec_t; +typedef struct ifnet* ifnet_t; +typedef struct mbuf* mbuf_t; +typedef struct pkthdr* pkthdr_t; +typedef struct socket* socket_t; +typedef struct sockopt* sockopt_t; +typedef struct ifaddr* ifaddr_t; +typedef struct ifmultiaddr* ifmultiaddr_t; +typedef struct ifnet_filter* interface_filter_t; +typedef struct rtentry* route_t; +#endif /* KERNEL_PRIVATE */ + +#endif /* !BSD_BUILD */ + +#ifndef _KAUTH_GUID +#define _KAUTH_GUID +/* Apple-style globally unique identifier */ +typedef struct { +#define KAUTH_GUID_SIZE 16 /* 128-bit identifier */ + unsigned char g_guid[KAUTH_GUID_SIZE]; +} guid_t; +#define _GUID_T +#endif /* _KAUTH_GUID */ + +#ifndef _KAUTH_ACE +#define _KAUTH_ACE +struct kauth_ace; +typedef struct kauth_ace * kauth_ace_t; +#endif +#ifndef _KAUTH_ACL +#define _KAUTH_ACL +struct kauth_acl; +typedef struct kauth_acl * kauth_acl_t; +#endif +#ifndef _KAUTH_FILESEC +#define _KAUTH_FILESEC +struct kauth_filesec; +typedef struct kauth_filesec * kauth_filesec_t; +#endif + +#endif /* !_KERN_SYS_KERNELTYPES_H_ */ diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h new file mode 100644 index 000000000..5a1d26d8e --- /dev/null +++ b/bsd/sys/kpi_mbuf.h @@ -0,0 +1,1127 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_mbuf.h + This header defines an API for interacting with mbufs. mbufs are the + primary method of storing packets in the networking stack. + + mbufs are used to store various items in the networking stack. The + most common usage of an mbuf is to store a packet or data on a + socket waiting to be sent or received. The mbuf is a contiguous + structure with some header followed by some data. To store more data + than would fit in an mbuf, external data is used. Most mbufs with + external data use clusters to store the external data. + + mbufs can be chained, contiguous data in a packet can be found by + following the m_next chain. Packets may be bundled together using + m_nextpacket. Many parts of the stack do not properly handle chains + of packets. When in doubt, don't chain packets. + */ + +#ifndef __KPI_MBUF__ +#define __KPI_MBUF__ +#include +#include + +/*! + @enum mbuf_flags_t + @abstract Constants defining mbuf flags. Only the flags listed below + can be set or retreieved. + @constant MBUF_EXT Indicates this mbuf has external data. + @constant MBUF_PKTHDR Indicates this mbuf has a packet header. + @constant MBUF_EOR Indicates this mbuf is the end of a record. + @constant MBUF_BCAST Indicates this packet will be sent or was + received as a brodcast. + @constant MBUF_MCAST Indicates this packet will be sent or was + received as a multicast. + @constant MBUF_FRAG Indicates this packet is a fragment of a larger + packet. + @constant MBUF_FIRSTFRAG Indicates this packet is the first fragment. + @constant MBUF_LASTFRAG Indicates this packet is the last fragment. + @constant MBUF_PROMISC Indicates this packet was only received + because the interface is in promiscuous mode. This should be set + by the demux function. These packets will be discarded after + being passed to any interface filters. +*/ +enum { + MBUF_EXT = 0x0001, /* has associated external storage */ + MBUF_PKTHDR = 0x0002, /* start of record */ + MBUF_EOR = 0x0004, /* end of record */ + + MBUF_BCAST = 0x0100, /* send/received as link-level broadcast */ + MBUF_MCAST = 0x0200, /* send/received as link-level multicast */ + MBUF_FRAG = 0x0400, /* packet is a fragment of a larger packet */ + MBUF_FIRSTFRAG = 0x0800, /* packet is first fragment */ + MBUF_LASTFRAG = 0x1000, /* packet is last fragment */ + MBUF_PROMISC = 0x2000 /* packet is promiscuous */ +}; +typedef u_int32_t mbuf_flags_t; + +/*! + @enum mbuf_type_t + @abstract Types of mbufs. + @discussion Some mbufs represent packets, some represnt data waiting + on sockets. Other mbufs store control data or other various + structures. The mbuf type is used to store what sort of data the + mbuf contains. + @constant MBUF_MT_FREE Indicates the mbuf is free and is + sitting on the queue of free mbufs. If you find that an mbuf you + have a reference to has this type, something has gone terribly + wrong. + @constant MBUF_MT_DATA Indicates this mbuf is being used to store + data. + @constant MBUF_MT_HEADER Indicates this mbuf has a packet header, + this is probably a packet. + @constant MBUF_MT_SOCKET Socket structure. + @constant MBUF_MT_PCB Protocol control block. + @constant MBUF_MT_RTABLE Routing table entry. + @constant MBUF_MT_HTABLE IMP host tables???. + @constant MBUF_MT_ATABLE Address resolution table data. + @constant MBUF_MT_SONAME Socket name, usually a sockaddr of some + sort. + @constant MBUF_MT_FTABLE Fragment reassembly header. + @constant MBUF_MT_RIGHTS Access rights. + @constant MBUF_MT_IFADDR Interface address. + @constant MBUF_MT_CONTROL Extra-data protocol message (control + message). + @constant MBUF_MT_OOBDATA Out of band data. +*/ +enum { + MBUF_TYPE_FREE = 0, /* should be on free list */ + MBUF_TYPE_DATA = 1, /* dynamic (data) allocation */ + MBUF_TYPE_HEADER = 2, /* packet header */ + MBUF_TYPE_SOCKET = 3, /* socket structure */ + MBUF_TYPE_PCB = 4, /* protocol control block */ + MBUF_TYPE_RTABLE = 5, /* routing tables */ + MBUF_TYPE_HTABLE = 6, /* IMP host tables */ + MBUF_TYPE_ATABLE = 7, /* address resolution tables */ + MBUF_TYPE_SONAME = 8, /* socket name */ + MBUF_TYPE_SOOPTS = 10, /* socket options */ + MBUF_TYPE_FTABLE = 11, /* fragment reassembly header */ + MBUF_TYPE_RIGHTS = 12, /* access rights */ + MBUF_TYPE_IFADDR = 13, /* interface address */ + MBUF_TYPE_CONTROL = 14, /* extra-data protocol message */ + MBUF_TYPE_OOBDATA = 15 /* expedited data */ +}; +typedef u_int32_t mbuf_type_t; + +/*! + @enum mbuf_csum_request_flags_t + @abstract Checksum performed/requested flags. + @discussion Mbufs often contain packets. Some hardware supports + performing checksums in hardware. The stack uses these flags to + indicate to the driver what sort of checksumming should be + handled in by the driver/hardware. These flags will only be set + if the driver indicates that it supports the corresponding + checksums using ifnet_set_offload. + @constant MBUF_CSUM_REQ_IP Indicates the IP checksum has not been + calculated yet. + @constant MBUF_CSUM_REQ_TCP Indicates the TCP checksum has not been + calculated yet. + @constant MBUF_CSUM_REQ_UDP Indicates the UDP checksum has not been + calculated yet. +*/ +enum { +#ifdef KERNEL_PRIVATE + MBUF_CSUM_REQ_SUM16 = 0x1000, /* Weird apple hardware checksum */ +#endif KERNEL_PRIVATE + MBUF_CSUM_REQ_IP = 0x0001, + MBUF_CSUM_REQ_TCP = 0x0002, + MBUF_CSUM_REQ_UDP = 0x0004 +}; +typedef u_int32_t mbuf_csum_request_flags_t; + +/*! + @enum mbuf_csum_performed_flags_t + @abstract Checksum performed/requested flags. + @discussion Mbufs often contain packets. Some hardware supports + performing checksums in hardware. The driver uses these flags to + communicate to the stack the checksums that were calculated in + hardware. + @constant MBUF_CSUM_DID_IP Indicates that the driver/hardware verified + the IP checksum in hardware. + @constant MBUF_CSUM_IP_GOOD Indicates whether or not the IP checksum + was good or bad. Only valid when MBUF_CSUM_DID_IP is set. + @constant MBUF_CSUM_DID_DATA Indicates that the TCP or UDP checksum + was calculated. The value for the checksum calculated in + hardware should be passed as the second parameter of + mbuf_set_csum_performed. The hardware calculated checksum value + can be retrieved using the second parameter passed to + mbuf_get_csum_performed. + @constant MBUF_CSUM_PSEUDO_HDR If set, this indicates that the + checksum value for MBUF_CSUM_DID_DATA includes the pseudo header + value. If this is not set, the stack will calculate the pseudo + header value and add that to the checksum. The value of this bit + is only valid when MBUF_CSUM_DID_DATA is set. +*/ +enum { +#ifdef KERNEL_PRIVATE + MBUF_CSUM_TCP_SUM16 = MBUF_CSUM_REQ_SUM16, /* Weird apple hardware checksum */ +#endif + MBUF_CSUM_DID_IP = 0x0100, + MBUF_CSUM_IP_GOOD = 0x0200, + MBUF_CSUM_DID_DATA = 0x0400, + MBUF_CSUM_PSEUDO_HDR = 0x0800 +}; +typedef u_int32_t mbuf_csum_performed_flags_t; + +/*! + @enum mbuf_how_t + @abstract Method of allocating an mbuf. + @discussion Blocking will cause the funnel to be dropped. If the + funnel is dropped, other threads may make changes to networking + data structures. This can lead to very bad things happening. + Blocking on the input our output path can also impact + performance. There are some cases where making a blocking call + is acceptable. When in doubt, use MBUF_DONTWAIT. + @constant MBUF_WAITOK Allow a call to allocate an mbuf to block. + @constant MBUF_DONTWAIT Don't allow the mbuf allocation call to + block, if blocking is necessary fail and return immediately. +*/ +enum { + MBUF_WAITOK = 0, /* Ok to block to get memory */ + MBUF_DONTWAIT = 1 /* Don't block, fail if blocking would be required */ +}; +typedef u_int32_t mbuf_how_t; + +typedef u_int32_t mbuf_tag_id_t; +typedef u_int16_t mbuf_tag_type_t; + +/*! + @struct mbuf_stat + @discussion The mbuf_stat contains mbuf statistics. + @field mbufs Number of mbufs (free or otherwise). + @field clusters Number of clusters (free or otherwise). + @field clfree Number of free clusters. + @field drops Number of times allocation failed. + @field wait Number of times allocation blocked. + @field drain Number of times protocol drain functions were called. + @field mtypes An array of counts of each type of mbuf allocated. + @field mcfail Number of times m_copym failed. + @field mpfail Number of times m_pullup failed. + @field msize Length of an mbuf. + @field mclbytes Length of an mbuf cluster. + @field minclsize Minimum length of data to allocate a cluster. + Anything smaller than this should be placed in chained mbufs. + @field mlen Length of data in an mbuf. + @field mhlen Length of data in an mbuf with a packet header. + @field bigclusters Number of big clusters. + @field bigclfree Number of unused big clusters. + @field bigmclbytes Length of a big mbuf cluster. +*/ +struct mbuf_stat { + u_long mbufs; /* mbufs obtained from page pool */ + u_long clusters; /* clusters obtained from page pool */ + u_long clfree; /* free clusters */ + u_long drops; /* times failed to find space */ + u_long wait; /* times waited for space */ + u_long drain; /* times drained protocols for space */ + u_short mtypes[256]; /* type specific mbuf allocations */ + u_long mcfail; /* times m_copym failed */ + u_long mpfail; /* times m_pullup failed */ + u_long msize; /* length of an mbuf */ + u_long mclbytes; /* length of an mbuf cluster */ + u_long minclsize; /* min length of data to allocate a cluster */ + u_long mlen; /* length of data in an mbuf */ + u_long mhlen; /* length of data in a header mbuf */ + u_long bigclusters; /* number of big clusters */ + u_long bigclfree; /* number of big clustser free */ + u_long bigmclbytes; /* length of data in a big cluster */ +}; + +/* Parameter for m_copym to copy all bytes */ +#define MBUF_COPYALL 1000000000 + +/* Data access */ +/*! + @function mbuf_data + @discussion Returns a pointer to the start of data in this mbuf. + There may be additional data on chained mbufs. The data you're + looking for may not be contiguous if it spans more than one + mbuf. Use mbuf_len to determine the lenght of data available in + this mbuf. If a data structure you want to access stradles two + mbufs in a chain, either use mbuf_pullup to get the data + contiguous in one mbuf or copy the pieces of data from each mbuf + in to a contiguous buffer. Using mbuf_pullup has the advantage + of not having to copy the data. On the other hand, if you don't + make sure there is space in the mbuf, mbuf_pullup may fail and + free the mbuf. + @param mbuf The mbuf. + @result A pointer to the data in the mbuf. + */ +void* mbuf_data(mbuf_t mbuf); + +/*! + @function mbuf_datastart + @discussion Returns the start of the space set aside for storing + data in an mbuf. An mbuf's data may come from a cluster or be + embedded in the mbuf structure itself. The data pointer + retrieved by mbuf_data may not be at the start of the data + (mbuf_leadingspace will be non-zero). This function will return to + you a pointer that matches mbuf_data() - mbuf_leadingspace(). + @param mbuf The mbuf. + @result A pointer to smallest possible value for data. + */ +void* mbuf_datastart(mbuf_t mbuf); + +/*! + @function mbuf_setdata + @discussion Sets the data and length values for an mbuf. The data + value must be in a valid range. In the case of an mbuf with a cluster, + the data value must point to a location in the cluster and the data + value plus the length, must be less than the end of the cluster. For + data embedded directly in an mbuf (no cluster), the data value must + fall somewhere between the start and end of the data area in the + mbuf and the data + length must also be in the same range. + @param mbuf The mbuf. + @param data The new pointer value for data. + @param len The new length of data in the mbuf. + @result 0 on success, errno error on failure. + */ +errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len); + +/*! + @function mbuf_align_32 + @discussion mbuf_align_32 is a replacement for M_ALIGN and MH_ALIGN. + mbuf_align_32 will set the data pointer to a location aligned on + a four byte boundry with at least 'len' bytes between the data + pointer and the end of the data block. + @param mbuf The mbuf. + @param len The minimum length of space that should follow the new + data location. + @result 0 on success, errno error on failure. + */ +errno_t mbuf_align_32(mbuf_t mbuf, size_t len); + +/*! + @function mbuf_data_to_physical + @discussion mbuf_data_to_physical is a replacement for mcl_to_paddr. + Given a pointer returned from mbuf_data of mbuf_datastart, + mbuf_data_to_physical will return the phyical address for that + block of data. + @param ptr A pointer to data stored in an mbuf. + @result The 64 bit physical address of the mbuf data or NULL if ptr + does not point to data stored in an mbuf. + */ +addr64_t mbuf_data_to_physical(void* ptr); + + +/* Allocation */ + +/*! + @function mbuf_get + @discussion Allocates an mbuf without a cluster for external data. + @param how Blocking or non-blocking. + @param type The type of the mbuf. + @param mbuf The mbuf. + @result 0 on success, errno error on failure. + */ +errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); + +/*! + @function mbuf_gethdr + @discussion Allocates an mbuf without a cluster for external data. + Sets a flag to indicate there is a packet header and initializes + the packet header. + @param how Blocking or non-blocking. + @param type The type of the mbuf. + @param mbuf The mbuf. + @result 0 on success, errno error on failure. + */ +errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); + + +/*! + @function mbuf_getcluster + @discussion Allocate a cluster of the requested size and attach it to + an mbuf for use as external data. If mbuf points to a NULL mbuf_t, + an mbuf will be allocated for you. If mbuf points to a non-NULL mbuf_t, + mbuf_getcluster may return a different mbuf_t than the one you + passed in. + @param how Blocking or non-blocking. + @param type The type of the mbuf. + @param size The size of the cluster to be allocated. Supported sizes for a + cluster are be 2048 or 4096. Any other value with return EINVAL. + @param mbuf The mbuf the cluster will be attached to. + @result 0 on success, errno error on failure. If you specified NULL + for the mbuf, any intermediate mbuf that may have been allocated + will be freed. If you specify an mbuf value in *mbuf, + mbuf_mclget will not free it. + EINVAL - Invalid parameter + ENOMEM - Not enough memory available + */ +errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, mbuf_t* mbuf); + +/*! + @function mbuf_mclget + @discussion Allocate a cluster and attach it to an mbuf for use as + external data. If mbuf points to a NULL mbuf_t, an mbuf will be + allocated for you. If mbuf points to a non-NULL mbuf_t, + mbuf_mclget may return a different mbuf_t than the one you + passed in. + @param how Blocking or non-blocking. + @param type The type of the mbuf. + @param mbuf The mbuf the cluster will be attached to. + @result 0 on success, errno error on failure. If you specified NULL + for the mbuf, any intermediate mbuf that may have been allocated + will be freed. If you specify an mbuf value in *mbuf, + mbuf_mclget will not free it. + */ +errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); + +/*! + @function mbuf_allocpacket + @discussion Allocate an mbuf chain to store a single packet of the requested length. + According to the requested length, a chain of mbufs will be created. The mbuf type + will be set to MBUF_TYPE_DATA. The caller may specify the maximum number of + buffer + @param how Blocking or non-blocking + @param packetlen The total length of the packet mbuf to be allocated. + The length must be greater than zero. + @param maxchunks An input/output pointer to the maximum number of mbufs segments making up the chain. + On input if maxchunks is zero, or the value pointed to by maxchunks is zero, + the packet will be made of as many buffer segments as necessary to fit the length. + The allocation will fail with ENOBUFS if the number of segments requested is too small and + the sum of the maximum size of each individual segment is less than the packet length. + On output, if the allocation succeed and maxchunks is non zero, it will point to + the actual number of segments allocated. + @param Upon success, *mbuf will be a reference to the new mbuf. + @result Returns 0 upon success or the following error code: + EINVAL - Invalid parameter + ENOMEM - Not enough memory available + ENOBUFS - Buffers not big enough for the maximum number of chunks requested +*/ +errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf); + +/*! + @function mbuf_getpacket + @discussion Allocate an mbuf, allocate and attach a cluster, and set + the packet header flag. + @param how Blocking or non-blocking. + @param mbuf Upon success, *mbuf will be a reference to the new mbuf. + @result 0 on success, errno error on failure. + */ +errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t* mbuf); + +/*! + @function mbuf_free + @discussion Frees a single mbuf. Not commonly used because it + doesn't touch the rest of the mbufs on the chain. + @param mbuf The mbuf to free. + @result The next mbuf in the chain. + */ +mbuf_t mbuf_free(mbuf_t mbuf); + +/*! + @function mbuf_freem + @discussion Frees a chain of mbufs link through mnext. + @param mbuf The first mbuf in the chain to free. + */ +void mbuf_freem(mbuf_t mbuf); + +/*! + @function mbuf_freem_list + @discussion Frees linked list of mbuf chains. Walks through + mnextpackt and does the equivalent of mbuf_mfreem to each. + @param mbuf The first mbuf in the linked list to free. + @result The number of mbufs freed. + */ +int mbuf_freem_list(mbuf_t mbuf); + +/*! + @function mbuf_leadingspace + @discussion Determines the space available in the mbuf proceeding + the current data. + @param mbuf The mbuf. + @result The number of unused bytes at the start of the mbuf. + */ +size_t mbuf_leadingspace(mbuf_t mbuf); + +/*! + @function mbuf_trailingspace + @discussion Determines the space available in the mbuf following + the current data. + @param mbuf The mbuf. + @result The number of unused bytes following the current data. + */ +size_t mbuf_trailingspace(mbuf_t mbuf); + +/* Manipulation */ + +/*! + @function mbuf_copym + @discussion Copies len bytes from offset from src to a new mbuf. + @param src The source mbuf. + @param offset The offset in the mbuf to start copying from. + @param len The the number of bytes to copy. + @param how To block or not to block, that is a question. + @param new_mbuf Upon success, the newly allocated mbuf. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_copym(mbuf_t src, size_t offset, size_t len, + mbuf_how_t how, mbuf_t* new_mbuf); + +/*! + @function mbuf_dup + @discussion Exactly duplicates an mbuf chain. + @param src The source mbuf. + @param how Blocking or non-blocking. + @param new_mbuf Upon success, the newly allocated mbuf. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_dup(mbuf_t src, mbuf_how_t how, mbuf_t* new_mbuf); + +/*! + @function mbuf_prepend + @discussion Prepend len bytes to an mbuf. If there is space + (mbuf_leadingspace >= len), the mbuf's data ptr is changed and + the same mbuf is returned. If there is no space, a new mbuf may + be allocated and prepended to the mbuf chain. If the operation + fails, the mbuf may be freed (*mbuf will be NULL). + @param mbuf The mbuf to prepend data to. This may change if a new + mbuf must be allocated or may be NULL if the operation fails. + @param len The length, in bytes, to be prepended to the mbuf. + @param how Blocking or non-blocking. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_prepend(mbuf_t* mbuf, size_t len, mbuf_how_t how); + +/*! + @function mbuf_split + @discussion Split an mbuf chain at a specific offset. + @param src The mbuf to be split. + @param offset The offset in the buffer where the mbuf should be + split. + @param how Blocking or non-blocking. + @param new_mbuf Upon success, the second half of the split mbuf + chain. + @result 0 upon success otherwise the errno error. In the case of + failure, the original mbuf chain passed in to src will be + preserved. + */ +errno_t mbuf_split(mbuf_t src, size_t offset, + mbuf_how_t how, mbuf_t* new_mbuf); + +/*! + @function mbuf_pullup + @discussion Move the next len bytes in to mbuf from other mbufs in + the chain. This is commonly used to get the IP and TCP or UDP + header contiguous in the first mbuf. If mbuf_pullup fails, the + entire mbuf chain will be freed. + @param mbuf The mbuf in the chain the data should be contiguous in. + @param len The number of bytes to pull from the next mbuf(s). + @result 0 upon success otherwise the errno error. In the case of an + error, the mbuf chain has been freed. + */ +errno_t mbuf_pullup(mbuf_t* mbuf, size_t len); + +/*! + @function mbuf_pulldown + @discussion Make length bytes at offset in the mbuf chain + contiguous. Nothing before offset bytes in the chain will be + modified. Upon return, location will be the mbuf the data is + contiguous in and offset will be the offset in that mbuf at + which the data is located. In the case of a failure, the mbuf + chain will be freed. + @param src The start of the mbuf chain. + @param offset Pass in a pointer to a value with the offset of the + data you're interested in making contiguous. Upon success, this + will be overwritten with the offset from the mbuf returned in + location. + @param length The length of data that should be made contiguous. + @param location Upon success, *location will be the mbuf the data is + in. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length, mbuf_t *location); + +/*! + @function mbuf_adj + @discussion Trims len bytes from the mbuf. If the length is greater + than zero, the bytes are trimmed from the front of the mbuf. If + the length is less than zero, the bytes are trimmed from the end + of the mbuf chain. + @param mbuf The mbuf chain to trim. + @param len The number of bytes to trim from the mbuf chain. + @result 0 upon success otherwise the errno error. + */ +void mbuf_adj(mbuf_t mbuf, int len); + +/*! + @function mbuf_copydata + @discussion Copies data out of an mbuf in to a specified buffer. If + the data is stored in a chain of mbufs, the data will be copied + from each mbuf in the chain until length bytes have been copied. + @param mbuf The mbuf chain to copy data out of. + @param offset The offset in to the mbuf to start copying. + @param length The number of bytes to copy. + @param out_data A pointer to the location where the data will be + copied. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_copydata(mbuf_t mbuf, size_t offset, size_t length, void* out_data); + +/*! + @function mbuf_copyback + @discussion Copies data from a buffer to an mbuf chain. + mbuf_copyback will grow the chain to fit the specified buffer. + + If mbuf_copydata is unable to allocate enough mbufs to grow the + chain, ENOBUFS will be returned. The mbuf chain will be shorter + than expected but all of the data up to the end of the mbuf + chain will be valid. + + If an offset is specified, mbuf_copyback will skip that many + bytes in the mbuf chain before starting to write the buffer in + to the chain. If the mbuf chain does not contain this many + bytes, mbufs will be allocated to create the space. + @param mbuf The first mbuf in the chain to copy the data in to. + @param offset Offset in bytes to skip before copying data. + @param length The length, in bytes, of the data to copy in to the mbuf + chain. + @param data A pointer to data in the kernel's address space. + @param how Blocking or non-blocking. + @result 0 upon success, EINVAL or ENOBUFS upon failure. + */ +errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length, + const void *data, mbuf_how_t how); + +#ifdef KERNEL_PRIVATE +/*! + @function mbuf_mclref + @discussion Incrememnt the reference count of the cluster. + @param mbuf The mbuf with the cluster to increment the refcount of. + @result 0 upon success otherwise the errno error. + */ +int mbuf_mclref(mbuf_t mbuf); + +/*! + @function mbuf_mclunref + @discussion Decrement the reference count of the cluster. + @param mbuf The mbuf with the cluster to decrement the refcount of. + @result 0 upon success otherwise the errno error. + */ +int mbuf_mclunref(mbuf_t mbuf); +#endif + +/*! + @function mbuf_mclhasreference + @discussion Check if a cluster of an mbuf is referenced by another mbuf. + References may be taken, for example, as a result of a call to + mbuf_split or mbuf_copym + @param mbuf The mbuf with the cluster to test. + @result 0 if there is no reference by another mbuf, 1 otherwise. + */ +int mbuf_mclhasreference(mbuf_t mbuf); + + +/* mbuf header */ + +/*! + @function mbuf_next + @discussion Returns the next mbuf in the chain. + @param mbuf The mbuf. + @result The next mbuf in the chain. + */ +mbuf_t mbuf_next(mbuf_t mbuf); + +/*! + @function mbuf_setnext + @discussion Sets the next mbuf in the chain. + @param mbuf The mbuf. + @param next The new next mbuf. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next); + +/*! + @function mbuf_nextpkt + @discussion Gets the next packet from the mbuf. + @param mbuf The mbuf. + @result The nextpkt. + */ +mbuf_t mbuf_nextpkt(mbuf_t mbuf); + +/*! + @function mbuf_setnextpkt + @discussion Sets the next packet attached to this mbuf. + @param mbuf The mbuf. + @param nextpkt The new next packet. + */ +void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt); + +/*! + @function mbuf_len + @discussion Gets the length of data in this mbuf. + @param mbuf The mbuf. + @result The length. + */ +size_t mbuf_len(mbuf_t mbuf); + +/*! + @function mbuf_setlen + @discussion Sets the length of data in this packet. Be careful to + not set the length over the space available in the mbuf. + @param mbuf The mbuf. + @param len The new length. + @result 0 upon success otherwise the errno error. + */ +void mbuf_setlen(mbuf_t mbuf, size_t len); + +/*! + @function mbuf_maxlen + @discussion Retrieves the maximum length of data that may be stored + in this mbuf. This value assumes that the data pointer was set + to the start of the possible range for that pointer + (mbuf_data_start). + @param mbuf The mbuf. + @result The maximum lenght of data for this mbuf. + */ +size_t mbuf_maxlen(mbuf_t mbuf); + +/*! + @function mbuf_type + @discussion Gets the type of mbuf. + @param mbuf The mbuf. + @result The type. + */ +mbuf_type_t mbuf_type(mbuf_t mbuf); + +/*! + @function mbuf_settype + @discussion Sets the type of mbuf. + @param mbuf The mbuf. + @param new_type The new type. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type); + +/*! + @function mbuf_flags + @discussion Returns the set flags. + @param mbuf The mbuf. + @result The flags. + */ +mbuf_flags_t mbuf_flags(mbuf_t mbuf); + +/*! + @function mbuf_setflags + @discussion Sets the set of set flags. + @param mbuf The mbuf. + @param flags The flags that should be set, all other flags will be cleared. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); + +/*! + @function mbuf_setflags_mask + @discussion Useful for setting or clearing individual flags. Easier + than calling mbuf_setflags(m, mbuf_flags(m) | M_FLAG). + @param mbuf The mbuf. + @param flags The flags that should be set or cleared. + @param mask The mask controlling which flags will be modified. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, + mbuf_flags_t mask); + +/*! + @function mbuf_copy_pkthdr + @discussion Copies the packet header from src to dest. + @param src The mbuf from which the packet header will be copied. + @param mbuf The mbuf to which the packet header will be copied. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_copy_pkthdr(mbuf_t dest, mbuf_t src); + +/*! + @function mbuf_pkthdr_len + @discussion Returns the length as reported by the packet header. + @param mbuf The mbuf containing the packet header with the length to + be changed. + @result The length, in bytes, of the packet. + */ +size_t mbuf_pkthdr_len(mbuf_t mbuf); + +/*! + @function mbuf_pkthdr_setlen + @discussion Sets the length of the packet in the packet header. + @param mbuf The mbuf containing the packet header. + @param len The new length of the packet. + @result 0 upon success otherwise the errno error. + */ +void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len); + +/*! + @function mbuf_pkthdr_rcvif + @discussion Returns a reference to the interface the packet was + received on. Increments the reference count of the interface + before returning. Caller is responsible for releasing + the reference by calling ifnet_release. + @param mbuf The mbuf containing the packet header. + @result A reference to the interface. + */ +ifnet_t mbuf_pkthdr_rcvif(mbuf_t mbuf); + +/*! + @function mbuf_pkthdr_setrcvif + @discussion Sets the interface the packet was received on. + @param mbuf The mbuf containing the packet header. + @param ifnet A reference to an interface. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifnet); + +/*! + @function mbuf_pkthdr_header + @discussion Returns a pointer to the packet header. + @param mbuf The mbuf containing the packet header. + @result A pointer to the packet header. + */ +void* mbuf_pkthdr_header(mbuf_t mbuf); + +/*! + @function mbuf_pkthdr_setheader + @discussion Sets the pointer to the packet header. + @param mbuf The mbuf containing the packet header. + @param ifnet A pointer to the header. + @result 0 upon success otherwise the errno error. + */ +void mbuf_pkthdr_setheader(mbuf_t mbuf, void* header); +#ifdef KERNEL_PRIVATE + +/* mbuf aux data */ + +/*! + @function mbuf_aux_add + @discussion Adds auxiliary data in the form of an mbuf. + @param mbuf The mbuf to add aux data to. + @param family The protocol family of the aux data to add. + @param type The mbuf type of the aux data to add. + @param aux_mbuf The aux mbuf allocated for you. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_aux_add(mbuf_t mbuf, int family, mbuf_type_t type, mbuf_t *aux_mbuf); + +/*! + @function mbuf_aux_find + @discussion Finds auxiliary data attached to an mbuf. + @param mbuf The mbuf to find aux data on. + @param family The protocol family of the aux data to add. + @param type The mbuf type of the aux data to add. + @result The aux data mbuf or NULL if there isn't one. + */ +mbuf_t mbuf_aux_find(mbuf_t mbuf, int family, mbuf_type_t type); + +/*! + @function mbuf_aux_delete + @discussion Free an mbuf used as aux data and disassosciate it from + the mbuf. + @param mbuf The mbuf to find aux data on. + @param aux The aux data to free. + */ +void mbuf_aux_delete(mbuf_t mbuf, mbuf_t aux); +#endif /* KERNEL_PRIVATE */ + +/* Checksums */ + +/*! + @function mbuf_inbound_modified + @discussion This function will clear the checksum flags to indicate + that a hardware checksum should not be used. Any filter + modifying data should call this function on an mbuf before + passing the packet up the stack. If a filter modifies a packet + in a way that affects any checksum, the filter is responsible + for either modifying the checksum to compensate for the changes + or verifying the checksum before making the changes and then + modifying the data and calculating a new checksum only if the + original checksum was valid. + @param mbuf The mbuf that has been modified. + */ +void mbuf_inbound_modified(mbuf_t mbuf); + +/*! + @function mbuf_outbound_finalize + @discussion This function will "finalize" the packet allowing your + code to inspect the final packet. + + There are a number of operations that are performed in hardware, + such as calculating checksums. This function will perform in + software the various opterations that were scheduled to be done + in hardware. Future operations may include IPSec processing or + vlan support. If you are redirecting a packet to a new interface + which may not have the same hardware support or encapsulating + the packet, you should call this function to force the stack to + calculate and fill out the checksums. This will bypass hardware + checksums but give you a complete packet to work with. If you + need to inspect aspects of the packet which may be generated by + hardware, you must call this function to get an aproximate final + packet. If you plan to modify the packet in any way, you should + call this function. + + This function should be called before modifying any outbound + packets. + + This function may be called at various levels, in some cases + additional headers may have already been prepended, such as the + case of a packet seen by an interface filter. To handle this, + the caller must pass the protocol family of the packet as well + as the offset from the start of the packet to the protocol + header. + @param mbuf The mbuf that should be finalized. + @param protocol_family The protocol family of the packet in the + mbuf. + @param protocol_offset The offset from the start of the mbuf to the + protocol header. For an IP packet with an ethernet header, this + would be the length of an ethernet header. + */ +void mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, + size_t protocol_offset); + +/*! + @function mbuf_set_vlan_tag + @discussion This function is used by interfaces that support vlan + tagging in hardware. This function will set properties in the + mbuf to indicate which vlan the packet was received for. + @param mbuf The mbuf containing the packet. + @param vlan The protocol family of the aux data to add. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan); + +/*! + @function mbuf_get_vlan_tag + @discussion This function is used by drivers that support hardware + vlan tagging to determine which vlan this packet belongs to. To + differentiate between the case where the vlan tag is zero and + the case where there is no vlan tag, this function will return + ENXIO when there is no vlan. + @param mbuf The mbuf containing the packet. + @param vlan The protocol family of the aux data to add. + @result 0 upon success otherwise the errno error. ENXIO indicates + that the vlan tag is not set. + */ +errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan); + +/*! + @function mbuf_clear_vlan_tag + @discussion This function will clear any vlan tag associated with + the mbuf. + @param mbuf The mbuf containing the packet. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_clear_vlan_tag(mbuf_t mbuf); + +#ifdef KERNEL_PRIVATE +/*! + @function mbuf_set_csum_requested + @discussion This function is used by the stack to indicate which + checksums should be calculated in hardware. The stack normally + sets these flags as the packet is processed in the outbound + direction. Just before send the packe to the interface, the + stack will look at these flags and perform any checksums in + software that are not supported by the interface. + @param mbuf The mbuf containing the packet. + @param request Flags indicating which checksums are being requested + for this packet. + @param value This parameter is currently unsupported. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_set_csum_requested(mbuf_t mbuf, + mbuf_csum_request_flags_t request, u_int32_t value); +#endif + +/*! + @function mbuf_get_csum_requested + @discussion This function is used by the driver to determine which + checksum operations should be performed in hardware. + @param mbuf The mbuf containing the packet. + @param request Flags indicating which checksums are being requested + for this packet. + @param value This parameter is currently unsupported. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_get_csum_requested(mbuf_t mbuf, + mbuf_csum_request_flags_t *request, u_int32_t *value); + +/*! + @function mbuf_clear_csum_requested + @discussion This function clears the checksum request flags. + @param mbuf The mbuf containing the packet. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_clear_csum_requested(mbuf_t mbuf); + +/*! + @function mbuf_set_csum_performed + @discussion This is used by the driver to indicate to the stack which + checksum operations were performed in hardware. + @param mbuf The mbuf containing the packet. + @param flags Flags indicating which hardware checksum operations + were performed. + @param value If the MBUF_CSUM_DID_DATA flag is set, value should be + set to the value of the TCP or UDP header as calculated by the + hardware. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_set_csum_performed(mbuf_t mbuf, + mbuf_csum_performed_flags_t flags, u_int32_t value); + +#ifdef KERNEL_PRIVATE +/*! + @function mbuf_get_csum_performed + @discussion This is used by the stack to determine which checksums + were calculated in hardware on the inbound path. + @param mbuf The mbuf containing the packet. + @param flags Flags indicating which hardware checksum operations + were performed. + @param value If the MBUF_CSUM_DID_DATA flag is set, value will be + set to the value of the TCP or UDP header as calculated by the + hardware. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_get_csum_performed(mbuf_t mbuf, + mbuf_csum_performed_flags_t *flags, u_int32_t *value); +#endif + +/*! + @function mbuf_clear_csum_performed + @discussion Clears the hardware checksum flags and values. + @param mbuf The mbuf containing the packet. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_clear_csum_performed(mbuf_t mbuf); + +/* mbuf tags */ + +/*! + @function mbuf_tag_id_find + @discussion Lookup the module id for a string. If there is no module + id assigned to this string, a new module id will be assigned. + The string should be the bundle id of the kext. In the case of a + tag that will be shared across multiple kexts, a common bundle id + style string should be used. + + The lookup operation is not optimized. A module should call this + function once during startup and chache the module id. The module id + will not be resassigned until the machine reboots. + @param module_string A unique string identifying your module. + Example: com.apple.nke.SharedIP. + @param module_id Upon return, a unique identifier for use with + mbuf_tag_* functions. This identifier is valid until the machine + is rebooted. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_tag_id_find(const char *module_string, + mbuf_tag_id_t *module_id); + +/*! + @function mbuf_tag_allocate + @discussion Allocate an mbuf tag. Mbuf tags allow various portions + of the stack to tag mbufs with data that will travel with the + mbuf through the stack. + + Tags may only be added to mbufs with packet headers + (MBUF_PKTHDR flag is set). Mbuf tags are freed when the mbuf is + freed or when mbuf_tag_free is called. + @param mbuf The mbuf to attach this tag to. + @param module_id A module identifier returned by mbuf_tag_id_find. + @param type A 16 bit type value. For a given module_id, you can use + a number of different tag types. + @param length The length, in bytes, to allocate for storage that + will be associated with this tag on this mbuf. + @param how Indicate whether you want to block and wait for memory if + memory is not immediately available. + @param data_p Upon successful return, *data_p will point to the + buffer allocated for the mtag. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type, size_t length, + mbuf_how_t how, void** data_p); + +/*! + @function mbuf_tag_find + @discussion Find the data associated with an mbuf tag. + @param mbuf The mbuf the tag is attached to. + @param module_id A module identifier returned by mbuf_tag_id_find. + @param type The 16 bit type of the tag to find. + @param length Upon success, the length of data will be store in + *length. + @param data_p Upon successful return, *data_p will point to the + buffer allocated for the mtag. + @result 0 upon success otherwise the errno error. + */ +errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type, size_t *length, void** data_p); + +/*! + @function mbuf_tag_free + @discussion Frees a previously allocated mbuf tag. + @param mbuf The mbuf the tag was allocated on. + @param module_id The ID of the tag to free. + @param type The type of the tag to free. + */ +void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type); + +/* mbuf stats */ + +/*! + @function mbuf_stats + @discussion Get the mbuf statistics. + @param stats Storage to copy the stats in to. + */ +void mbuf_stats(struct mbuf_stat* stats); + + + +/* IF_QUEUE interaction */ + +#define IF_ENQUEUE_MBUF(ifq, m) { \ + mbuf_setnextpkt((m), 0); \ + if ((ifq)->ifq_tail == 0) \ + (ifq)->ifq_head = (m); \ + else \ + mbuf_setnextpkt((mbuf_t)(ifq)->ifq_tail, (m)); \ + (ifq)->ifq_tail = (m); \ + (ifq)->ifq_len++; \ +} +#define IF_PREPEND_MBUF(ifq, m) { \ + mbuf_setnextpkt((m), (ifq)->ifq_head); \ + if ((ifq)->ifq_tail == 0) \ + (ifq)->ifq_tail = (m); \ + (ifq)->ifq_head = (m); \ + (ifq)->ifq_len++; \ +} +#define IF_DEQUEUE_MBUF(ifq, m) { \ + (m) = (ifq)->ifq_head; \ + if (m) { \ + if (((ifq)->ifq_head = mbuf_nextpkt((m))) == 0) \ + (ifq)->ifq_tail = 0; \ + mbuf_setnextpkt((m), 0); \ + (ifq)->ifq_len--; \ + } \ +} + + +#endif diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h new file mode 100644 index 000000000..13c56414f --- /dev/null +++ b/bsd/sys/kpi_socket.h @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_socket.h + This header defines an API for creating and interacting with sockets + in the kernel. It is possible to create sockets in the kernel + without an associated file descriptor. In some cases, a reference to + the socket may be known while the file descriptor is not. These + functions can be used for interacting with sockets in the kernel. + The API is similar to the user space socket API. + */ +#ifndef __KPI_SOCKET__ +#define __KPI_SOCKET__ + +#include +#include + +struct timeval; + +/*! + @typedef sock_upcall + + @discussion sock_upcall is used by a socket to notify an in kernel + client that data is waiting. Instead of making blocking calls in + the kernel, a client can specify an upcall which will be called + when data is available or the socket is ready for sending. + + Calls to your upcall function are not serialized and may be + called concurrently from multiple threads in the kernel. + + Your upcall function will be called when: + + @param so A reference to the socket that's ready. + @param cookie The cookie passed in when the socket was created. + @param waitf Indicates whether or not it's safe to block. +*/ +typedef void (*sock_upcall)(socket_t so, void* cookie, int waitf); + +/*! + @function sock_accept + @discussion Accepts an incoming connection on a socket. See 'man 2 + accept' for more information. Allocating a socket in this manner + creates a socket with no associated file descriptor. + @param so The listening socket you'd like to accept a connection on. + @param from A pointer to a socket address that will be filled in + with the address the connection is from. + @param fromlen Maximum length of from. + @param flags Supports MSG_DONTWAIT and MSG_USEUPCALL. If + MSG_DONTWAIT is set, accept will return EWOULDBLOCK if there are + no connections ready to be accepted. If MSG_USEUPCALL is set, + the created socket will use the same upcall function attached to + the original socket. + @param callback A notifier function to be called when an event + occurs on the socket. This may be NULL. + @param cookie A cookie passed directly to the callback. + @param new_so Upon success, *new_so will be a reference to a new + socket for tracking the connection. + @result 0 on success otherwise the errno error. + */ +errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen, + int flags, sock_upcall callback, void* cookie, + socket_t *new_so); + +/*! + @function sock_bind + @discussion Binds a socket to a specific address. See 'man 2 bind' + for more information. + @param so The socket to be bound. + @param to The local address the socket should be bound to. + @result 0 on success otherwise the errno error. + */ +errno_t sock_bind(socket_t so, const struct sockaddr *to); + +/*! + @function sock_connect + @discussion Initiates a connection on the socket. See 'man 2 + connect' for more information. + @param so The socket to be connect. + @param to The remote address the socket should connect to. + @param flags Flags for connecting. The only flag supported so far is + MSG_DONTWAIT. MSG_DONTWAIT will perform a non-blocking connect. + sock_connect will return immediately with EINPROGRESS. The + upcall, if supplied, will be called when the connection is + completed. + @result 0 on success, EINPROGRESS for a non-blocking connect that + has not completed, otherwise the errno error. + */ +errno_t sock_connect(socket_t so, const struct sockaddr *to, int flags); + +#ifdef KERNEL_PRIVATE +/*! + This function was added to support NFS. NFS does something funny, + setting a short timeout and checking to see if it should abort the + connect every two seconds. Ideally, NFS would use the upcall to be + notified when the connect is complete. + + If you feel you need to use this function, please contact us to + explain why. + + @function sock_connectwait + @discussion Allows a caller to wait on a socket connect. + @param so The socket being connected. + @param tv The amount of time to wait. + @result 0 on success otherwise the errno error. EINPROGRESS will be + returned if the connection did not complete in the timeout + specified. + */ +errno_t sock_connectwait(socket_t so, const struct timeval *tv); +#endif KERNEL_PRIVATE + +/*! + @function sock_getpeername + @discussion Retrieves the remote address of a connected socket. See + 'man 2 getpeername'. + @param so The socket. + @param peername Storage for the peer name. + @param peernamelen Length of storage for the peer name. + @result 0 on success otherwise the errno error. + */ +errno_t sock_getpeername(socket_t so, struct sockaddr *peername, int peernamelen); + +/*! + @function sock_getsockname + @discussion Retrieves the local address of a socket. See 'man 2 + getsockname'. + @param so The socket. + @param sockname Storage for the local name. + @param socknamelen Length of storage for the socket name. + @result 0 on success otherwise the errno error. + */ +errno_t sock_getsockname(socket_t so, struct sockaddr *sockname, int socknamelen); + +/*! + @function sock_getsockopt + @discussion Retrieves a socket option. See 'man 2 getsockopt'. + @param so The socket. + @param level Level of the socket option. + @param optname The option name. + @param optval The option value. + @param optlen The length of optval, returns the actual length. + @result 0 on success otherwise the errno error. + */ +errno_t sock_getsockopt(socket_t so, int level, int optname, void *optval, int *optlen); + +/*! + @function sock_ioctl + @discussion Performs an ioctl operation on a socket. See 'man 2 ioctl'. + @param so The socket. + @param request The ioctl name. + @param argp The argument. + @result 0 on success otherwise the errno error. + */ +errno_t sock_ioctl(socket_t so, unsigned long request, void *argp); + +/*! + @function sock_setsockopt + @discussion Sets a socket option. See 'man 2 setsockopt'. + @param so The socket. + @param level Level of the socket option. + @param optname The option name. + @param optval The option value. + @param optlen The length of optval. + @result 0 on success otherwise the errno error. + */ +errno_t sock_setsockopt(socket_t so, int level, int optname, const void *optval, int optlen); + +/*! + @function sock_listen + @discussion Indicate that the socket should start accepting incoming + connections. See 'man 2 listen'. + @param so The socket. + @param backlog The maximum length of the queue of pending connections. + @result 0 on success otherwise the errno error. + */ +errno_t sock_listen(socket_t so, int backlog); + +/*! + @function sock_receive + @discussion Receive data from a socket. Similar to recvmsg. See 'man + 2 recvmsg' for more information about receiving data. + @param so The socket. + @param msg The msg describing how the data should be received. + @param flags See 'man 2 recvmsg'. + @param recvdlen Number of bytes received, same as return value of + userland recvmsg. + @result 0 on success, EWOULDBLOCK if non-blocking and operation + would cause the thread to block, otherwise the errno error. + */ +errno_t sock_receive(socket_t so, struct msghdr *msg, int flags, size_t *recvdlen); + +/*! + @function sock_receivembuf + @discussion Receive data from a socket. Similar to sock_receive + though data is returned as a chain of mbufs. See 'man 2 recvmsg' + for more information about receiving data. + @param so The socket. + @param msg The msg describing how the data should be received. May + be NULL. The msg_iov is ignored. + @param data Upon return *data will be a reference to an mbuf chain + containing the data received. This eliminates copying the data + out of the mbufs. Caller is responsible for freeing the mbufs. + @param flags See 'man 2 recvmsg'. + @param recvlen Maximum number of bytes to receive in the mbuf chain. + Upon return, this value will be set to the number of bytes + received, same as return value of userland recvmsg. + @result 0 on success, EWOULDBLOCK if non-blocking and operation + would cause the thread to block, otherwise the errno error. + */ +errno_t sock_receivembuf(socket_t so, struct msghdr *msg, mbuf_t *data, int flags, size_t *recvlen); + +/*! + @function sock_send + @discussion Send data on a socket. Similar to sendmsg. See 'man 2 + sendmsg' for more information about sending data. + @param so The socket. + @param msg The msg describing how the data should be sent. Any + pointers must point to data in the kernel. + @param flags See 'man 2 sendmsg'. + @param sentlen The number of bytes sent. + @result 0 on success, EWOULDBLOCK if non-blocking and operation + would cause the thread to block, otherwise the errno error. + */ +errno_t sock_send(socket_t so, const struct msghdr *msg, int flags, size_t *sentlen); + +/*! + @function sock_sendmbuf + @discussion Send data in an mbuf on a socket. Similar to sock_send + only the data to be sent is taken from the mbuf chain. + @param so The socket. + @param msg The msg describing how the data should be sent. The + msg_iov is ignored. msg may be NULL. + @param data The mbuf chain of data to send. + @param flags See 'man 2 sendmsg'. + @param sentlen The number of bytes sent. + @result 0 on success, EWOULDBLOCK if non-blocking and operation + would cause the thread to block, otherwise the errno error. + Regardless of return value, the mbuf chain 'data' will be freed. + */ +errno_t sock_sendmbuf(socket_t so, const struct msghdr *msg, mbuf_t data, int flags, size_t *sentlen); + +/*! + @function sock_shutdown + @discussion Shutdown one or both directions of a connection. See + 'man 2 shutdown' for more information. + @param so The socket. + @param how SHUT_RD - shutdown receive. SHUT_WR - shutdown send. SHUT_RDWR - shutdown both. + @result 0 on success otherwise the errno error. + */ +errno_t sock_shutdown(socket_t so, int how); + +/*! + @function sock_socket + @discussion Allocate a socket. Allocating a socket in this manner + creates a socket with no associated file descriptor. For more + information, see 'man 2 socket'. + @param domain The socket domain (PF_INET, etc...). + @param type The socket type (SOCK_STREAM, SOCK_DGRAM, etc...). + @param protocol The socket protocol. + @param callback A notifier function to be called when an event + occurs on the socket. This may be NULL. + @param cookie A cookie passed directly to the callback. + @param new_so Upon success, a reference to the new socket. + @result 0 on success otherwise the errno error. + */ +errno_t sock_socket(int domain, int type, int protocol, sock_upcall callback, + void* cookie, socket_t *new_so); + +/*! + @function sock_close + @discussion Close the socket. + @param so The socket to close. This should only ever be a socket + created with sock_socket. Closing a socket created in user space + using sock_close may leave a file descriptor pointing to the closed + socket, resulting in undefined behavior. + */ +void sock_close(socket_t so); + +/*! + @function sock_retain + @discussion Prevents the socket from closing + @param so The socket to close. Increment a retain count on the + socket, preventing it from being closed when sock_close is + called. This is used when a File Descriptor is passed (and + closed) from userland and the kext wants to keep ownership of + that socket. It is used in conjunction with + sock_release(socket_t so). + */ +void sock_retain(socket_t so); + +/*! + @function sock_release + @discussion Decrement the retain count and close the socket if the + retain count reaches zero. + @param so The socket to release. This is used to release ownership + on a socket acquired with sock_retain. When the last retain + count is reached, this will call sock_close to close the socket. + */ +void sock_release(socket_t so); + +/*! + @function sock_setpriv + @discussion Set the privileged bit in the socket. Allows for + operations that require root privileges. + @param so The socket on which to modify the SS_PRIV flag. + @param on Indicate whether or not the SS_PRIV flag should be set. + @result 0 on success otherwise the errno error. + */ +errno_t sock_setpriv(socket_t so, int on); + +/*! + @function sock_isconnected + @discussion Returns whether or not the socket is connected. + @param so The socket to check. + @result 0 - socket is not connected. 1 - socket is connected. + */ +int sock_isconnected(socket_t so); + +/*! + @function sock_isnonblocking + @discussion Returns whether or not the socket is non-blocking. In + the context of this KPI, non-blocking means that functions to + perform operations on a socket will not wait for completion. + + To enable or disable blocking, use the FIONBIO ioctl. The + parameter is an int. If the int is zero, the socket will block. + If the parameter is non-zero, the socket will not block. + @result 0 - socket will block. 1 - socket will not block. + */ +int sock_isnonblocking(socket_t so); + +/*! + @function sock_gettype + @discussion Retrieves information about the socket. This is the same + information that was used to create the socket. If any of the + parameters following so are NULL, that information is not + retrieved. + @param so The socket to check. + @param domain The domain of the socket (PF_INET, etc...). May be NULL. + @param type The socket type (SOCK_STREAM, SOCK_DGRAM, etc...). May be NULL. + @param protocol The socket protocol. May be NULL. + @result 0 on success otherwise the errno error. + */ +errno_t sock_gettype(socket_t so, int *domain, int *type, int *protocol); + +#ifdef KERNEL_PRIVATE +/*! + @function sock_nointerrupt + @discussion Disables interrupt on socket buffers (sets SB_NOINTR on + send and receive socket buffers). + @param so The socket to modify. + @param on Indicate whether or not the SB_NOINTR flag should be set. + @result 0 on success otherwise the errno error. + */ +errno_t sock_nointerrupt(socket_t so, int on); +#endif KERNEL_PRIVATE +#endif __KPI_SOCKET__ diff --git a/bsd/sys/kpi_socketfilter.h b/bsd/sys/kpi_socketfilter.h new file mode 100644 index 000000000..efc3f75a2 --- /dev/null +++ b/bsd/sys/kpi_socketfilter.h @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*! + @header kpi_socketfilter.h + This header defines an API for intercepting communications at the + socket layer. + + For the most part, socket filters want to do three things: Filter + data in and out, watch for state changes, and intercept a few calls + for security. The number of function pointers supplied by a socket + filter has been significantly reduced. The filter no longer has any + knowledge of socket buffers. The filter no longer intercepts nearly + every internal socket call. There are two data filters, an in + filter, and an out filter. The in filter occurs before data is + placed in the receive socket buffer. This is done to avoid waking + the process unnecessarily. The out filter occurs before the data is + appended to the send socket buffer. This should cover inbound and + outbound data. For monitoring state changes, we've added a notify + function that will be called when various events that the filter can + not intercept occur. In addition, we've added a few functions that a + filter may use to intercept common operations. These functions are: + connect (inbound), connect (outbound), bind, set socket option, + get socket option, and listen. Bind, listen, connect in, and connect + out could be used together to build a fairly comprehensive firewall + without having to do much with individual packets. + */ +#ifndef __KPI_SOCKETFILTER__ +#define __KPI_SOCKETFILTER__ + +#include +#include + +struct sockaddr; + +/*! + @enum sflt_flags + @abstract Constants defining mbuf flags. Only the flags listed below + can be set or retreieved. + @constant SFLT_GLOBAL Indicates this socket filter should be + attached to all new sockets when they're created. + @constant SFLT_PROG Indicates this socket filter should be attached + only when request by the application using the SO_NKE socket + option. +*/ +enum { + SFLT_GLOBAL = 0x01, + SFLT_PROG = 0x02 +}; +typedef u_int32_t sflt_flags; + +/*! + @typedef sflt_handle + @abstract A 4 byte identifier used with the SO_NKE socket option to + identify the socket filter to be attached. +*/ +typedef u_int32_t sflt_handle; + +/*! + @enum sflt_event_t + @abstract Events notify a filter of state changes and other various + events related to the socket. These events can not be prevented + or intercepted, only observed. + @constant sock_evt_connected Indicates this socket has moved to the + connected state. + @constant sock_evt_disconnected Indicates this socket has moved to + the disconnected state. + @constant sock_evt_flush_read The read socket buffer has been + flushed. + @constant sock_evt_shutdown The read and or write side(s) of the + connection have been shutdown. The param will point to an + integer that indicates the direction that has been shutdown. See + 'man 2 shutdown' for more information. + @constant sock_evt_cantrecvmore Indicates the socket can not receive + more data. + @constant sock_evt_cantsendmore Indicates the socket can not send + more data. + @constant sock_evt_closing Indicates the socket is closing. +*/ +enum { + sock_evt_connecting = 1, + sock_evt_connected = 2, + sock_evt_disconnecting = 3, + sock_evt_disconnected = 4, + sock_evt_flush_read = 5, + sock_evt_shutdown = 6, /* param points to an integer specifying how (read, write, or both) see man 2 shutdown */ + sock_evt_cantrecvmore = 7, + sock_evt_cantsendmore = 8, + sock_evt_closing = 9 +}; +typedef u_int32_t sflt_event_t; + +/*! + @enum sflt_data_flag_t + @abstract Inbound and outbound data filters may handle many + different types of incoming and outgoing data. These flags help + distinguish between normal data, out-of-band data, and records. + @constant sock_data_filt_flag_oob Indicates this data is out-of-band + data. + @constant sock_data_filt_flag_record Indicates this data is a + record. This flag is only ever seen on inbound data. +*/ +enum { + sock_data_filt_flag_oob = 1, + sock_data_filt_flag_record = 2 +}; +typedef u_int32_t sflt_data_flag_t; + +/*! + @typedef sf_unregistered_func + + @discussion sf_unregistered_func is called to notify the filter it + has been unregistered. This is the last function the stack will + call and this function will only be called once all other + function calls in to your filter have completed. Once this + function has been called, your kext may safely unload. + @param handle The socket filter handle used to identify this filter. +*/ +typedef void (*sf_unregistered_func)(sflt_handle handle); + +/*! + @typedef sf_attach_func + + @discussion sf_attach_func is called to notify the filter it has + been attached to a socket. The filter may allocate memory for + this attachment and use the cookie to track it. This filter is + called in one of two cases: + 1) You've installed a global filter and a new socket was created. + 2) Your non-global socket filter is being attached using the SO_NKE + socket option. + @param cookie Used to allow the socket filter to set the cookie for + this attachment. + @param so The socket the filter is being attached to. + @result If you return a non-zero value, your filter will not be + attached to this socket. +*/ +typedef errno_t (*sf_attach_func)(void **cookie, socket_t so); + +/*! + @typedef sf_detach_func + + @discussion sf_detach_func is called to notify the filter it has + been detached from a socket. If the filter allocated any memory + for this attachment, it should be freed. This function will + be called when the socket is disposed of. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @result If you return a non-zero value, your filter will not be + attached to this socket. +*/ +typedef void (*sf_detach_func)(void *cookie, socket_t so); + +/*! + @typedef sf_notify_func + + @discussion sf_notify_func is called to notify the filter of various + state changes and other events occuring on the socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param event The type of event that has occurred. + @param param Additional information about the event. +*/ +typedef void (*sf_notify_func)(void *cookie, socket_t so, + sflt_event_t event, void *param); + +/*! + @typedef sf_getpeername_func + + @discussion sf_getpeername_func is called to allow a filter to + to intercept the getpeername function. When called, sa will + point to a pointer to a socket address that was malloced + in zone M_SONAME. If you want to replace this address, either + modify the currenty copy or allocate a new one and free the + old one. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param sa A pointer to a socket address pointer. + @result If you return a non-zero value, processing will stop. If + you return EJUSTRETURN, no further filters will be called + but a result of zero will be returned to the caller of + getpeername. +*/ +typedef int (*sf_getpeername_func)(void *cookie, socket_t so, + struct sockaddr **sa); + +/*! + @typedef sf_getsockname_func + + @discussion sf_getsockname_func is called to allow a filter to + to intercept the getsockname function. When called, sa will + point to a pointer to a socket address that was malloced + in zone M_SONAME. If you want to replace this address, either + modify the currenty copy or allocate a new one and free the + old one. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param sa A pointer to a socket address pointer. + @result If you return a non-zero value, processing will stop. If + you return EJUSTRETURN, no further filters will be called + but a result of zero will be returned to the caller of + getsockname. +*/ +typedef int (*sf_getsockname_func)(void *cookie, socket_t so, + struct sockaddr **sa); + +/*! + @typedef sf_data_in_func + + @discussion sf_data_in_func is called to filter incoming data. If your + filter intercepts data for later reinjection, it must queue all incoming + data to preserve the order of the data. Use sock_inject_data_in to later + reinject this data if you return EJUSTRETURN. Warning: This filter is on + the data path. Do not spend excesive time. Do not wait for data on + another socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param from The addres the data is from, may be NULL if the socket + is connected. + @param data The data being received. Control data may appear in the + mbuf chain, be sure to check the mbuf types to find control + data. + @param control Control data being passed separately from the data. + @param flags Flags to indicate if this is out of band data or a + record. + @result Return: + 0 - The caller will continue with normal processing of the data. + EJUSTRETURN - The caller will stop processing the data, the data will not be freed. + Anything Else - The caller will free the data and stop processing. +*/ +typedef errno_t (*sf_data_in_func)(void *cookie, socket_t so, + const struct sockaddr *from, mbuf_t *data, + mbuf_t *control, sflt_data_flag_t flags); + +/*! + @typedef sf_data_out_func + + @discussion sf_data_out_func is called to filter outbound data. If + your filter intercepts data for later reinjection, it must queue + all outbound data to preserve the order of the data when + reinjecting. Use sock_inject_data_out to later reinject this + data. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param from The address the data is from, may be NULL if the socket + is connected. + @param data The data being received. Control data may appear in the + mbuf chain, be sure to check the mbuf types to find control + data. + @param control Control data being passed separately from the data. + @param flags Flags to indicate if this is out of band data or a + record. + @result Return: + 0 - The caller will continue with normal processing of the data. + EJUSTRETURN - The caller will stop processing the data, the data will not be freed. + Anything Else - The caller will free the data and stop processing. +*/ +typedef errno_t (*sf_data_out_func)(void *cookie, socket_t so, + const struct sockaddr *to, mbuf_t *data, + mbuf_t *control, sflt_data_flag_t flags); + +/*! + @typedef sf_connect_in_func + + @discussion sf_connect_in_func is called to filter inbound connections. A + protocol will call this before accepting an incoming connection and + placing it on the queue of completed connections. Warning: This filter + is on the data path. Do not spend excesive time. Do not wait for data on + another socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param from The address the incoming connection is from. + @result Return: + 0 - The caller will continue with normal processing of the connection. + Anything Else - The caller will rejecting the incoming connection. +*/ +typedef errno_t (*sf_connect_in_func)(void *cookie, socket_t so, + const struct sockaddr *from); + +/*! + @typedef sf_connect_out_func + + @discussion sf_connect_out_func is called to filter outbound + connections. A protocol will call this before initiating an + outbound connection. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param to The remote address of the outbound connection. + @result Return: + 0 - The caller will continue with normal processing of the connection. + Anything Else - The caller will rejecting the outbound connection. +*/ +typedef errno_t (*sf_connect_out_func)(void *cookie, socket_t so, + const struct sockaddr *to); + +/*! + @typedef sf_bind_func + + @discussion sf_bind_func is called before performing a bind + operation on a socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param to The local address of the socket will be bound to. + @result Return: + 0 - The caller will continue with normal processing of the bind. + Anything Else - The caller will rejecting the bind. +*/ +typedef errno_t (*sf_bind_func)(void *cookie, socket_t so, + const struct sockaddr *to); + +/*! + @typedef sf_setoption_func + + @discussion sf_setoption_func is called before performing setsockopt + on a socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param opt The socket option to set. + @result Return: + 0 - The caller will continue with normal processing of the setsockopt. + Anything Else - The caller will stop processing and return this error. +*/ +typedef errno_t (*sf_setoption_func)(void *cookie, socket_t so, + sockopt_t opt); + +/*! + @typedef sf_getoption_func + + @discussion sf_getoption_func is called before performing getsockopt + on a socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param opt The socket option to get. + @result Return: + 0 - The caller will continue with normal processing of the getsockopt. + Anything Else - The caller will stop processing and return this error. +*/ +typedef errno_t (*sf_getoption_func)(void *cookie, socket_t so, + sockopt_t opt); + +/*! + @typedef sf_listen_func + + @discussion sf_listen_func is called before performing listen + on a socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @result Return: + 0 - The caller will continue with normal processing of listen. + Anything Else - The caller will stop processing and return this error. +*/ +typedef errno_t (*sf_listen_func)(void *cookie, socket_t so); + +/*! + @typedef sf_ioctl_func + + @discussion sf_ioctl_func is called before performing an ioctl + on a socket. + @param cookie Cookie value specified when the filter attach was + called. + @param so The socket the filter is attached to. + @param request The ioctl name. + @param argp A pointer to the ioctl parameter. + @result Return: + 0 - The caller will continue with normal processing of this ioctl. + Anything Else - The caller will stop processing and return this error. +*/ +typedef errno_t (*sf_ioctl_func)(void *cookie, socket_t so, + u_int32_t request, const char* argp); + +/*! + @struct sflt_filter + @discussion This structure is used to define a socket filter. + @field sf_handle A value used to find socket filters by + applications. An application can use this value to specify that + this filter should be attached when using the SO_NKE socket + option. + @field sf_flags Indicate whether this filter should be attached to + all new sockets or just those that request the filter be + attached using the SO_NKE socket option. + @field sf_name A name used for debug purposes. + @field sf_unregistered Your function for being notified when your + filter has been unregistered. + @field sf_attach Your function for handling attaches to sockets. + @field sf_detach Your function for handling detaches from sockets. + @field sf_notify Your function for handling events. May be null. + @field sf_data_in Your function for handling incoming data. May be + null. + @field sf_data_out Your function for handling outgoing data. May be + null. + @field sf_connect_in Your function for handling inbound + connections. May be null. + @field sf_connect_in Your function for handling outbound + connections. May be null. + @field sf_bind Your function for handling binds. May be null. + @field sf_setoption Your function for handling setsockopt. May be null. + @field sf_getoption Your function for handling getsockopt. May be null. + @field sf_listen Your function for handling listen. May be null. + @field sf_ioctl Your function for handling ioctls. May be null. +*/ +struct sflt_filter { + sflt_handle sf_handle; + int sf_flags; + char* sf_name; + + sf_unregistered_func sf_unregistered; + sf_attach_func sf_attach; + sf_detach_func sf_detach; + + sf_notify_func sf_notify; + sf_getpeername_func sf_getpeername; + sf_getsockname_func sf_getsockname; + sf_data_in_func sf_data_in; + sf_data_out_func sf_data_out; + sf_connect_in_func sf_connect_in; + sf_connect_out_func sf_connect_out; + sf_bind_func sf_bind; + sf_setoption_func sf_setoption; + sf_getoption_func sf_getoption; + sf_listen_func sf_listen; + sf_ioctl_func sf_ioctl; +}; + +/*! + @function sflt_register + @discussion Registers a socket filter. See 'man 2 socket' for a + desciption of domain, type, and protocol. + @param filter A structure describing the filter. + @param domain The protocol domain these filters will be attached to. + @param type The socket type these filters will be attached to. + @param protocol The protocol these filters will be attached to. + @result 0 on success otherwise the errno error. + */ +errno_t sflt_register(const struct sflt_filter *filter, int domain, + int type, int protocol); + +/*! + @function sflt_unregister + @discussion Unregisters a socket filter. This will not detach the + socket filter from all sockets it may be attached to at the + time, it will just prevent the socket filter from being attached + to any new sockets. + @param handle The sf_handle of the socket filter to unregister. + @result 0 on success otherwise the errno error. + */ +errno_t sflt_unregister(sflt_handle handle); + +/*! + @function sflt_attach + @discussion Attaches a socket filter to the specified socket. A + filter must be registered before it can be attached. + @param socket The socket the filter should be attached to. + @param handle The handle of the registered filter to be attached. + @result 0 on success otherwise the errno error. + */ +errno_t sflt_attach(socket_t socket, sflt_handle); + +/*! + @function sflt_detach + @discussion Detaches a socket filter from a specified socket. + @param socket The socket the filter should be detached from. + @param handle The handle of the registered filter to be detached. + @result 0 on success otherwise the errno error. + */ +errno_t sflt_detach(socket_t socket, sflt_handle); + +/* Functions for manipulating sockets */ +/* + * Inject data in to the receive buffer of the socket as if it + * had come from the network. + * + * flags should match sflt_data_flag_t + */ + +/*! + @function sock_inject_data_in + @discussion Inject data in to the receive buffer of the socket as if + it had come from the network. + @param so The socket to inject the data on. + @param from The address the data is from, only necessary on + un-connected sockets. A copy of the address will be made, caller + is responsible for freeing the address after calling this + function. + @param data The data and possibly control mbufs. + @param control The separate control mbufs. + @param flags Flags indicating the type of data. + @result 0 on success otherwise the errno error. If the function + returns an error, the caller is responsible for freeing the + mbuf. + */ +errno_t sock_inject_data_in(socket_t so, const struct sockaddr* from, + mbuf_t data, mbuf_t control, sflt_data_flag_t flags); + +/*! + @function sock_inject_data_out + @discussion Inject data in to the send buffer of the socket as if it + had come from the client. + @param so The socket to inject the data on. + @param to The address the data should be sent to, only necessary on + un-connected sockets. The caller is responsible for freeing the + to address after sock_inject_data_out returns. + @param data The data and possibly control mbufs. + @param control The separate control mbufs. + @param flags Flags indicating the type of data. + @result 0 on success otherwise the errno error. The data and control + values are always freed regardless of return value. + */ +errno_t sock_inject_data_out(socket_t so, const struct sockaddr* to, + mbuf_t data, mbuf_t control, sflt_data_flag_t flags); + + +/* + * sockopt_t accessors + */ + +enum { + sockopt_get = 1, + sockopt_set = 2 +}; +typedef u_int8_t sockopt_dir; + +/*! + @function sockopt_direction + @discussion Retreives the direction of the socket option (Get or + Set). + @param sopt The socket option. + @result sock_opt_get or sock_opt_set. + */ +sockopt_dir sockopt_direction(sockopt_t sopt); + +/*! + @function sockopt_level + @discussion Retreives the socket option level. (SOL_SOCKET, etc). + @param sopt The socket option. + @result The socket option level. See man 2 setsockopt + */ +int sockopt_level(sockopt_t sopt); + +/*! + @function sockopt_name + @discussion Retreives the socket option name. (SO_SNDBUF, etc). + @param sopt The socket option. + @result The socket option name. See man 2 setsockopt + */ +int sockopt_name(sockopt_t sopt); + +/*! + @function sockopt_valsize + @discussion Retreives the size of the socket option data. + @param sopt The socket option. + @result The length, in bytes, of the data. + */ +size_t sockopt_valsize(sockopt_t sopt); + +/*! + @function sockopt_copyin + @discussion Copies the data from the socket option to a buffer. + @param sopt The socket option. + @param data A pointer to the buffer to copy the data in to. + @param length The number of bytes to copy. + @result An errno error or zero upon success. + */ +errno_t sockopt_copyin(sockopt_t sopt, void *data, size_t length); + +/*! + @function sockopt_copyout + @discussion Copies the data from a buffer to a socket option. + @param sopt The socket option. + @param data A pointer to the buffer to copy the data out of. + @param length The number of bytes to copy. + @result An errno error or zero upon success. + */ +errno_t sockopt_copyout(sockopt_t sopt, void *data, size_t length); + +#endif diff --git a/bsd/sys/ktrace.h b/bsd/sys/ktrace.h index ce39adb2d..f07a9a8d1 100644 --- a/bsd/sys/ktrace.h +++ b/bsd/sys/ktrace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,15 +61,12 @@ #include -#if defined(MACH_KERNEL_PRIVATE) +#ifdef MACH_KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE -void ktrsyscall(void *, int, int, void *, int); +void ktrsyscall(void *, int, int, u_int64_t *, int); void ktrsysret(void *, int, int, int, int); -#endif /* __APPLE_API_PRIVATE */ #else - #ifdef __APPLE_API_UNSTABLE /* * operations to ktrace system call (KTROP(op)) @@ -85,6 +82,8 @@ void ktrsysret(void *, int, int, int, int); /* * ktrace record header + * + * LP64todo: not 64-bit safe */ struct ktr_header { int ktr_len; /* length of buf */ @@ -115,7 +114,7 @@ struct ktr_syscall { /* * followed by ktr_narg register_t */ - register_t ktr_args[1]; + u_int64_t ktr_args[1]; }; /* @@ -194,21 +193,20 @@ struct ktr_csw { #ifdef KERNEL #ifdef __APPLE_API_PRIVATE -void ktrnamei __P((struct vnode *,char *)); -void ktrcsw __P((struct vnode *, int, int, int)); -void ktrpsig __P((struct vnode *, int, sig_t, sigset_t *, int, int)); -void ktrgenio __P((struct vnode *, int, enum uio_rw, - struct uio *, int, int)); -void ktrsyscall __P((struct proc *, int, int, register_t args[], int)); -void ktrsysret __P((struct proc *, int, int, register_t, int)); +void ktrnamei(struct vnode *,char *); +void ktrcsw(struct vnode *, int, int); +void ktrpsig(struct vnode *, int, sig_t, sigset_t *, int); +void ktrgenio(struct vnode *, int, enum uio_rw, struct uio *, int); +void ktrsyscall(struct proc *, int, int, u_int64_t args[]); +void ktrsysret(struct proc *, int, int, register_t); #endif /* __APPLE_API_PRIVATE */ #else #include __BEGIN_DECLS -int ktrace __P((const char *, int, int, pid_t)); -int utrace __P((const void *, size_t)); +int ktrace(const char *, int, int, pid_t); +int utrace(const void *, size_t); __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/loadable_fs.h b/bsd/sys/loadable_fs.h index 1d198c74d..95f1c727d 100644 --- a/bsd/sys/loadable_fs.h +++ b/bsd/sys/loadable_fs.h @@ -37,9 +37,7 @@ #ifndef _SYS_LOADABLE_FS_ #define _SYS_LOADABLE_FS_ -#include -#ifdef __APPLE_API_UNSTABLE /* * Constants for Loadabls FS Utilities (in "/System/Library/Filesystems") * @@ -116,5 +114,4 @@ #define MNTOPT_FS "filesystem=" /* e.g. "filesystem=DOS" */ #define MNTOPT_REMOVABLE "removable" -#endif /* __APPLE_API_UNSTABLE */ #endif /* _SYS_LOADABLE_FS_ */ diff --git a/bsd/sys/lock.h b/bsd/sys/lock.h index c866f9570..5364f66e1 100644 --- a/bsd/sys/lock.h +++ b/bsd/sys/lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,76 +63,69 @@ #define _SYS_LOCK_H_ #include +#include +#include #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE -#include -#include -#if defined(simple_lock_init) -#undef simple_lock_init -#endif -#define simple_lock_init(l) usimple_lock_init((l),0) - -#if defined(simple_lock) -#undef simple_lock -#endif -#define simple_lock(l) ((void) 1) +#include -#if defined(simple_unlock) -#undef simple_unlock -#endif -#define simple_unlock(l) ((void) 1) - -#if defined(simple_lock_try) -#undef simple_lock_try -#endif -#define simple_lock_try(l) 1 #if defined(thread_sleep_simple_lock) #undef thread_sleep_simple_lock #endif #define thread_sleep_simple_lock(l, e, i) thread_sleep_funnel((e), (i)) -#endif /* __APPLE_API_UNSTABLE */ -#else /* KERNEL */ - -#ifndef _MACHINE_SIMPLE_LOCK_DATA_ -#define _MACHINE_SIMPLE_LOCK_DATA_ - -#include - -struct slock{ - volatile unsigned int lock_data[10]; -}; -typedef struct slock simple_lock_data_t; -typedef struct slock *simple_lock_t; -#define decl_simple_lock_data(class,name) \ -class simple_lock_data_t name; - -#endif /* _MACHINE_SIMPLE_LOCK_DATA_ */ #endif /* KERNEL */ -#ifdef __APPLE_API_UNSTABLE +#ifdef BSD_KERNEL_PRIVATE /* * The general lock structure. Provides for multiple shared locks, * upgrading from shared to exclusive, and sleeping until the lock * can be gained. The simple locks are defined in . */ struct lock__bsd__ { - simple_lock_data_t - lk_interlock; /* lock on remaining fields */ + void * lk_interlock[10]; /* lock on remaining fields */ u_int lk_flags; /* see below */ int lk_sharecount; /* # of accepted shared locks */ int lk_waitcount; /* # of processes sleeping for lock */ short lk_exclusivecount; /* # of recursive exclusive locks */ short lk_prio; /* priority at which to sleep */ - char *lk_wmesg; /* resource sleeping (for tsleep) */ + const char *lk_wmesg; /* resource sleeping (for tsleep) */ int lk_timo; /* maximum sleep time (for tsleep) */ pid_t lk_lockholder; /* pid of exclusive lock holder */ void *lk_lockthread; /* thread which acquired excl lock */ }; + +// LP64todo - should this move? + +/* LP64 version of lock__bsd__. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with lock__bsd__ + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_lock__bsd__ { + user_addr_t lk_interlock[10]; /* lock on remaining fields */ + u_int lk_flags; /* see below */ + int lk_sharecount; /* # of accepted shared locks */ + int lk_waitcount; /* # of processes sleeping for lock */ + short lk_exclusivecount; /* # of recursive exclusive locks */ + short lk_prio; /* priority at which to sleep */ + user_addr_t lk_wmesg; /* resource sleeping (for tsleep) */ + int lk_timo; /* maximum sleep time (for tsleep) */ + pid_t lk_lockholder; /* pid of exclusive lock holder */ + user_addr_t lk_lockthread; /* thread which acquired excl lock */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + /* * Lock request types: * LK_SHARED - get one of many possible shared locks. If a process @@ -231,12 +224,12 @@ struct lock__bsd__ { struct proc; -void lockinit __P((struct lock__bsd__ *, int prio, char *wmesg, int timo, - int flags)); -int lockmgr __P((struct lock__bsd__ *, u_int flags, - simple_lock_t, struct proc *p)); -int lockstatus __P((struct lock__bsd__ *)); +void lockinit(struct lock__bsd__ *, int prio, const char *wmesg, int timo, + int flags); +int lockmgr(struct lock__bsd__ *, u_int flags, + void *, struct proc *p); +int lockstatus(struct lock__bsd__ *); -#endif /* __APPLE_API_UNSTABLE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _SYS_LOCK_H_ */ diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index 6461cea8e..7c3e814d0 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,7 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. @@ -35,10 +34,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -56,59 +51,63 @@ * SUCH DAMAGE. * * @(#)lockf.h 8.1 (Berkeley) 6/11/93 + * $FreeBSD: src/sys/sys/lockf.h,v 1.16 2004/04/07 04:19:49 imp Exp $ */ #ifndef _SYS_LOCKF_H_ #define _SYS_LOCKF_H_ -#include -#include +#include +#include + +struct vnop_advlock_args; +struct vnode; + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_LOCKF); +#endif -#ifdef __APPLE_API_PRIVATE /* * The lockf structure is a kernel structure which contains the information * associated with a byte range lock. The lockf structures are linked into * the inode structure. Locks are sorted by the starting byte of the lock for * efficiency. */ +TAILQ_HEAD(locklist, lockf); + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + struct lockf { - short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ + short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ - off_t lf_start; /* The byte # of the start of the lock */ - off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ - caddr_t lf_id; /* The id of the resource holding the lock */ - struct lockf **lf_head; /* Back pointer to the head of lockf list */ - struct lockf *lf_next; /* A pointer to the next lock on this inode */ - struct lockf *lf_block; /* The list of blocked locks */ + off_t lf_start; /* Byte # of the start of the lock */ + off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ + caddr_t lf_id; /* Id of the resource holding the lock */ + struct lockf **lf_head; /* Back pointer to the head of the locf list */ + struct vnode *lf_vnode; /* Back pointer to the inode */ + struct lockf *lf_next; /* Pointer to the next lock on this inode */ + struct locklist lf_blkhd; /* List of requests blocked on this lock */ + TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ }; +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + /* Maximum length of sleep chains to traverse to try and detect deadlock. */ #define MAXDEPTH 50 __BEGIN_DECLS -void lf_addblock __P((struct lockf *, struct lockf *)); -int lf_advlock __P((struct lockf **, - off_t, caddr_t, int, struct flock *, int)); -int lf_clearlock __P((struct lockf *)); -int lf_findoverlap __P((struct lockf *, - struct lockf *, int, struct lockf ***, struct lockf **)); -struct lockf * - lf_getblock __P((struct lockf *)); -int lf_getlock __P((struct lockf *, struct flock *)); -int lf_setlock __P((struct lockf *)); -void lf_split __P((struct lockf *, struct lockf *)); -void lf_wakelock __P((struct lockf *)); -__END_DECLS -#if LOCKF_DEBUG -extern int lockf_debug; +int lf_advlock(struct vnop_advlock_args *); -__BEGIN_DECLS -void lf_print __P((char *, struct lockf *)); -void lf_printlist __P((char *, struct lockf *)); -__END_DECLS -#endif /* LOCKF_DEBUG */ +#ifdef LOCKF_DEBUG +void lf_print(char *, struct lockf *); +void lf_printlist(char *, struct lockf *); +#endif -#endif /* __APPLE_API_PRIVATE */ +__END_DECLS #endif /* !_SYS_LOCKF_H_ */ diff --git a/bsd/sys/mach_swapon.h b/bsd/sys/mach_swapon.h index fcba2d61b..152a9265b 100644 --- a/bsd/sys/mach_swapon.h +++ b/bsd/sys/mach_swapon.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,11 +23,7 @@ * Copyright (c) 1989,1995 NeXT, Inc. * All rights reserved. * - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * */ - #ifndef _MACH_SWAPON_H #define _MACH_SWAPON_H diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h index 0e68c6663..1bb501e9c 100644 --- a/bsd/sys/malloc.h +++ b/bsd/sys/malloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,8 +61,8 @@ #include -#define KMEMSTATS +#ifdef KERNEL /* * flags to malloc */ @@ -70,6 +70,11 @@ #define M_NOWAIT 0x0001 #define M_ZERO 0x0004 /* bzero the allocation */ + +#ifdef BSD_KERNEL_PRIVATE + +#define KMEMSTATS + /* * Types of memory to be allocated (not all are used by us) */ @@ -92,7 +97,7 @@ #define M_CRED 16 /* credentials */ #define M_PGRP 17 /* process group header */ #define M_SESSION 18 /* session header */ -#define M_IOV 19 /* large iov's */ +#define M_IOV32 19 /* large iov's for 32 bit process */ #define M_MOUNT 20 /* vfs mount struct */ #define M_FHANDLE 21 /* network file handle */ #define M_NFSREQ 22 /* NFS request header */ @@ -111,7 +116,7 @@ #define M_VMPVENT 35 /* VM phys-virt mapping entry */ #define M_VMPAGER 36 /* XXX: VM pager struct */ #define M_VMPGDATA 37 /* XXX: VM pager private data */ -#define M_FILE 38 /* Open file structure */ +#define M_FILEPROC 38 /* Open file structure */ #define M_FILEDESC 39 /* Open file descriptor table */ #define M_LOCKF 40 /* Byte-range locking structures */ #define M_PROC 41 /* Proc structures */ @@ -120,8 +125,8 @@ #define M_LFSNODE 44 /* LFS vnode private part */ #define M_FFSNODE 45 /* FFS vnode private part */ #define M_MFSNODE 46 /* MFS vnode private part */ -#define M_NQLEASE 47 /* Nqnfs lease */ -#define M_NQMHOST 48 /* Nqnfs host address table */ +#define M_NQLEASE 47 /* XXX: Nqnfs lease */ +#define M_NQMHOST 48 /* XXX: Nqnfs host address table */ #define M_NETADDR 49 /* Export host address structure */ #define M_NFSSVC 50 /* Nfs server structure */ #define M_NFSUID 51 /* Nfs uid mapping structure */ @@ -169,8 +174,34 @@ #define M_JNL_TR 92 /* Journaling: "struct transaction" */ #define M_SPECINFO 93 /* special file node */ #define M_KQUEUE 94 /* kqueue */ +#define M_HFSDIRHINT 95 /* HFS directory hint */ +#define M_CLRDAHEAD 96 /* storage for cluster read-ahead state */ +#define M_CLWRBEHIND 97 /* storage for cluster write-behind state */ +#define M_IOV64 98 /* large iov's for 64 bit process */ +#define M_FILEGLOB 99 /* fileglobal */ +#define M_KAUTH 100 /* kauth subsystem */ +#define M_DUMMYNET 101 /* dummynet */ +#define M_UNSAFEFS 102 /* storage for vnode lock state for unsafe FS */ -#define M_LAST 95 /* Must be last type + 1 */ +#else /* BSD_KERNEL_PRIVATE */ + +#define M_RTABLE 5 /* routing tables */ +#define M_IFADDR 9 /* interface address (IOFireWireIP)*/ +#define M_LOCKF 40 /* Byte-range locking structures (msdos) */ +#define M_TEMP 80 /* misc temporary data buffers */ +#define M_HFSMNT 75 /* HFS mount structure (afpfs) */ +#define M_KAUTH 100 /* kauth subsystem (smb) */ +#define M_SONAME 11 /* socket name (smb) */ +#define M_PCB 4 /* protocol control block (smb) */ +#define M_UDFNODE 84 /* UDF inodes (udf)*/ +#define M_UDFMNT 85 /* UDF mount structures (udf)*/ + +#endif /* BSD_KERNEL_PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE + + +#define M_LAST 103 /* Must be last type + 1 */ /* Strings corresponding to types of memory */ /* Must be in synch with the #defines above */ @@ -194,7 +225,7 @@ "cred", /* 16 M_CRED */ \ "pgrp", /* 17 M_PGRP */ \ "session", /* 18 M_SESSION */ \ - "iov", /* 19 M_IOV */ \ + "iov32", /* 19 M_IOV32 */ \ "mount", /* 20 M_MOUNT */ \ "fhandle", /* 21 M_FHANDLE */ \ "NFS req", /* 22 M_NFSREQ */ \ @@ -213,7 +244,7 @@ "VM pvmap", /* 35 M_VMPVENT */ \ "VM pager", /* 36 M_VMPAGER */ \ "VM pgdata", /* 37 M_VMPGDATA */ \ - "file", /* 38 M_FILE */ \ + "fileproc", /* 38 M_FILEPROC */ \ "file desc", /* 39 M_FILEDESC */ \ "lockf", /* 40 M_LOCKF */ \ "proc", /* 41 M_PROC */ \ @@ -269,7 +300,15 @@ "Journal", /* 91 M_JNL_JNL */\ "Transaction", /* 92 M_JNL_TR */\ "specinfo", /* 93 M_SPECINFO */\ - "kqueue" /* 94 M_KQUEUE */\ + "kqueue", /* 94 M_KQUEUE */\ + "HFS dirhint", /* 95 M_HFSDIRHINT */ \ + "cluster_read", /* 96 M_CLRDAHEAD */ \ + "cluster_write",/* 97 M_CLWRBEHIND */ \ + "iov64", /* 98 M_IOV64 */ \ + "fileglob", /* 99 M_FILEGLOB */ \ + "kauth", /* 100 M_KAUTH */ \ + "dummynet", /* 101 M_DUMMYNET */ \ + "unsafe_fsnode" /* 102 M_UNSAFEFS */ \ } struct kmemstats { @@ -285,10 +324,9 @@ struct kmemstats { long ks_spare; }; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE extern struct kmemstats kmemstats[]; -#endif /* __APPLE_API_PRIVATE */ + +#endif /* BSD_KERNEL_PRIVATE */ /* * The malloc/free primatives used @@ -306,24 +344,24 @@ extern struct kmemstats kmemstats[]; #define FREE_ZONE(addr, size, type) \ _FREE_ZONE((void *)addr, size, type) -extern void *_MALLOC __P(( +extern void *_MALLOC( size_t size, int type, - int flags)); + int flags); -extern void _FREE __P(( +extern void _FREE( void *addr, - int type)); + int type); -extern void *_MALLOC_ZONE __P(( +extern void *_MALLOC_ZONE( size_t size, int type, - int flags)); + int flags); -extern void _FREE_ZONE __P(( +extern void _FREE_ZONE( void *elem, size_t size, - int type)); + int type); #endif /* KERNEL */ diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index 94efd165a..d7a417160 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -72,8 +72,13 @@ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ +#include #include + +#ifdef KERNEL_PRIVATE + #include +#include /* * Mbufs are of a single size, MSIZE (machine/param.h), which @@ -83,7 +88,6 @@ * at least MINCLSIZE of data must be stored. */ -#ifdef __APPLE_API_UNSTABLE #define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ #define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ @@ -117,6 +121,16 @@ struct m_hdr { short mh_flags; /* flags; see below */ }; +/* + * Packet tag structure (see below for details). + */ +struct m_tag { + SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ + u_int16_t m_tag_type; /* Module specific type */ + u_int16_t m_tag_len; /* Length of data */ + u_int32_t m_tag_id; /* Module ID */ +}; + /* record/packet header in first mbuf of chain; valid if M_PKTHDR set */ struct pkthdr { int len; /* total packet length */ @@ -133,11 +147,11 @@ struct pkthdr { struct mbuf *aux; /* extra data buffer; ipsec/others */ #ifdef KERNEL_PRIVATE u_short vlan_tag; /* VLAN tag, host byte order */ - u_short reserved_1; /* for future use */ + u_short socket_id; /* socket id */ #else KERNEL_PRIVATE - void *reserved1; /* for future use */ + u_int reserved1; /* for future use */ #endif KERNEL_PRIVATE - void *reserved2; /* for future use */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ }; @@ -194,10 +208,12 @@ struct mbuf { #define M_FRAG 0x0400 /* packet is a fragment of a larger packet */ #define M_FIRSTFRAG 0x0800 /* packet is first fragment */ #define M_LASTFRAG 0x1000 /* packet is last fragment */ +#define M_PROMISC 0x2000 /* packet is promiscuous (shouldn't go to stack) */ /* flags copied when copying m_pkthdr */ -#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \ - M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG) +#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO2|M_PROTO3 | \ + M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG | \ + M_FIRSTFRAG|M_LASTFRAG|M_PROMISC) /* flags indicating hw checksum support and sw checksum requirements [freebsd4.1]*/ #define CSUM_IP 0x0001 /* will csum IP */ @@ -214,7 +230,6 @@ struct mbuf { #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ -#ifdef KERNEL_PRIVATE /* * Note: see also IF_HWASSIST_CSUM defined in */ @@ -242,8 +257,11 @@ struct mbuf { #define MT_IFADDR 13 /* interface address */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ +#define MT_TAG 16 /* volatile metadata associated to pkts */ #define MT_MAX 32 /* enough? */ +#ifdef KERNEL_PRIVATE + /* flags to m_get/MGET */ /* Need to include malloc.h to get right options for malloc */ #include @@ -251,8 +269,6 @@ struct mbuf { #define M_DONTWAIT M_NOWAIT #define M_WAIT M_WAITOK -#ifdef __APPLE_API_PRIVATE - /* * mbuf utility macros: * @@ -261,14 +277,14 @@ struct mbuf { * drivers. */ +#ifdef _KERN_LOCKS_H_ +extern lck_mtx_t * mbuf_mlock; +#else +extern void * mbuf_mlock; +#endif -extern -decl_simple_lock_data(, mbuf_slock); -#define MBUF_LOCK() usimple_lock(&mbuf_slock); -#define MBUF_UNLOCK() usimple_unlock(&mbuf_slock); -#define MBUF_LOCKINIT() simple_lock_init(&mbuf_slock); - -#endif /* __APPLE_API_PRIVATE */ +#define MBUF_LOCK() lck_mtx_lock(mbuf_mlock); +#define MBUF_UNLOCK() lck_mtx_unlock(mbuf_mlock); /* * mbuf allocation/deallocation macros: @@ -287,11 +303,7 @@ decl_simple_lock_data(, mbuf_slock); #define MCHECK(m) #endif -#ifdef __APPLE_API_PRIVATE extern struct mbuf *mfree; /* mbuf free list */ -extern simple_lock_data_t mbuf_slock; -#endif /* __APPLE_API_PRIVATE */ - #define MGET(m, how, type) ((m) = m_get((how), (type))) @@ -320,6 +332,16 @@ union mcluster { #define MCLGET(m, how) ((m) = m_mclget(m, how)) +/* + * Mbuf big cluster + */ + +union mbigcluster { + union mbigcluster *mbc_next; + char mbc_buf[NBPG]; +}; + + #define MCLHASREFERENCE(m) m_mclhasreference(m) /* @@ -381,10 +403,35 @@ union mcluster { /* compatiblity with 4.3 */ #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT) +#endif /* KERNEL_PRIVATE */ + /* * Mbuf statistics. */ +/* LP64todo - not 64-bit safe */ struct mbstat { + u_long m_mbufs; /* mbufs obtained from page pool */ + u_long m_clusters; /* clusters obtained from page pool */ + u_long m_spare; /* spare field */ + u_long m_clfree; /* free clusters */ + u_long m_drops; /* times failed to find space */ + u_long m_wait; /* times waited for space */ + u_long m_drain; /* times drained protocols for space */ + u_short m_mtypes[256]; /* type specific mbuf allocations */ + u_long m_mcfail; /* times m_copym failed */ + u_long m_mpfail; /* times m_pullup failed */ + u_long m_msize; /* length of an mbuf */ + u_long m_mclbytes; /* length of an mbuf cluster */ + u_long m_minclsize; /* min length of data to allocate a cluster */ + u_long m_mlen; /* length of data in an mbuf */ + u_long m_mhlen; /* length of data in a header mbuf */ + u_long m_bigclusters; /* clusters obtained from page pool */ + u_long m_bigclfree; /* free clusters */ + u_long m_bigmclbytes; /* length of an mbuf cluster */ +}; + +/* Compatibillity with 10.3 */ +struct ombstat { u_long m_mbufs; /* mbufs obtained from page pool */ u_long m_clusters; /* clusters obtained from page pool */ u_long m_spare; /* spare field */ @@ -401,6 +448,7 @@ struct mbstat { u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ }; +#ifdef KERNEL_PRIVATE /* * pkthdr.aux type tags. @@ -423,50 +471,136 @@ extern int max_protohdr; /* largest protocol header */ extern int max_hdr; /* largest link+protocol header */ extern int max_datalen; /* MHLEN - max_hdr */ -struct mbuf *m_copym __P((struct mbuf *, int, int, int)); -struct mbuf *m_split __P((struct mbuf *, int, int)); -struct mbuf *m_free __P((struct mbuf *)); -struct mbuf *m_get __P((int, int)); -struct mbuf *m_getpacket __P((void)); -struct mbuf *m_getclr __P((int, int)); -struct mbuf *m_gethdr __P((int, int)); -struct mbuf *m_prepend __P((struct mbuf *, int, int)); -struct mbuf *m_prepend_2 __P((struct mbuf *, int, int)); -struct mbuf *m_pullup __P((struct mbuf *, int)); -struct mbuf *m_retry __P((int, int)); -struct mbuf *m_retryhdr __P((int, int)); -void m_adj __P((struct mbuf *, int)); -int m_clalloc __P((int, int)); -void m_freem __P((struct mbuf *)); -int m_freem_list __P((struct mbuf *)); -struct mbuf *m_devget __P((char *, int, int, struct ifnet *, void (*)())); -char *mcl_to_paddr __P((char *)); -struct mbuf *m_pulldown __P((struct mbuf*, int, int, int*)); -struct mbuf *m_aux_add __P((struct mbuf *, int, int)); -struct mbuf *m_aux_find __P((struct mbuf *, int, int)); -void m_aux_delete __P((struct mbuf *, struct mbuf *)); - -struct mbuf *m_mclget __P((struct mbuf *, int)); -caddr_t m_mclalloc __P((int)); -void m_mclfree __P((caddr_t p)); -int m_mclhasreference __P((struct mbuf *)); -void m_copy_pkthdr __P((struct mbuf *, struct mbuf*)); - -int m_mclref __P((struct mbuf *)); -int m_mclunref __P((struct mbuf *)); - -void * m_mtod __P((struct mbuf *)); -struct mbuf * m_dtom __P((void *)); -int m_mtocl __P((void *)); -union mcluster *m_cltom __P((int )); - -int m_trailingspace __P((struct mbuf *)); -int m_leadingspace __P((struct mbuf *)); - -void m_mchtype __P((struct mbuf *m, int t)); - -void m_mcheck __P((struct mbuf*)); +__BEGIN_DECLS +struct mbuf *m_copym(struct mbuf *, int, int, int); +struct mbuf *m_split(struct mbuf *, int, int); +struct mbuf *m_free(struct mbuf *); +struct mbuf *m_get(int, int); +struct mbuf *m_getpacket(void); +struct mbuf *m_getclr(int, int); +struct mbuf *m_gethdr(int, int); +struct mbuf *m_prepend(struct mbuf *, int, int); +struct mbuf *m_prepend_2(struct mbuf *, int, int); +struct mbuf *m_pullup(struct mbuf *, int); +struct mbuf *m_retry(int, int); +struct mbuf *m_retryhdr(int, int); +void m_adj(struct mbuf *, int); +void m_freem(struct mbuf *); +int m_freem_list(struct mbuf *); +struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(const void *, void *, size_t)); +char *mcl_to_paddr(char *); +struct mbuf *m_pulldown(struct mbuf*, int, int, int*); +struct mbuf *m_aux_add(struct mbuf *, int, int); +struct mbuf *m_aux_find(struct mbuf *, int, int); +void m_aux_delete(struct mbuf *, struct mbuf *); + +struct mbuf *m_mclget(struct mbuf *, int); +caddr_t m_mclalloc(int); +void m_mclfree(caddr_t p); +int m_mclhasreference(struct mbuf *); +void m_copy_pkthdr(struct mbuf *, struct mbuf*); + +int m_mclref(struct mbuf *); +int m_mclunref(struct mbuf *); + +void * m_mtod(struct mbuf *); +struct mbuf * m_dtom(void *); +int m_mtocl(void *); +union mcluster *m_cltom(int ); + +int m_trailingspace(struct mbuf *); +int m_leadingspace(struct mbuf *); + +void m_mchtype(struct mbuf *m, int t); +void m_mcheck(struct mbuf*); + +void m_copyback(struct mbuf *, int , int , caddr_t); +void m_copydata(struct mbuf *, int , int , caddr_t); +struct mbuf* m_dup(struct mbuf *m, int how); +void m_cat(struct mbuf *, struct mbuf *); +struct mbuf *m_copym_with_hdrs(struct mbuf*, int, int, int, struct mbuf**, int*); +struct mbuf *m_getpackets(int, int, int); +struct mbuf * m_getpackethdrs(int , int ); +struct mbuf* m_getpacket_how(int ); +struct mbuf * m_getpackets_internal(unsigned int *, int , int , int , size_t); +struct mbuf * m_allocpacket_internal(unsigned int * , size_t , unsigned int *, int , int , size_t ); + +__END_DECLS +/* + Packets may have annotations attached by affixing a list of "packet + tags" to the pkthdr structure. Packet tags are dynamically allocated + semi-opaque data structures that have a fixed header (struct m_tag) + that specifies the size of the memory block and an pair that + identifies it. The id identifies the module and the type identifies the + type of data for that module. The id of zero is reserved for the kernel. + + Note that the packet tag returned by m_tag_allocate has the default + memory alignment implemented by malloc. To reference private data one + can use a construct like: + + struct m_tag *mtag = m_tag_allocate(...); + struct foo *p = (struct foo *)(mtag+1); + + if the alignment of struct m_tag is sufficient for referencing members + of struct foo. Otherwise it is necessary to embed struct m_tag within + the private data structure to insure proper alignment; e.g. + + struct foo { + struct m_tag tag; + ... + }; + struct foo *p = (struct foo *) m_tag_allocate(...); + struct m_tag *mtag = &p->tag; + */ + +#define KERNEL_MODULE_TAG_ID 0 + +enum { + KERNEL_TAG_TYPE_NONE = 0, + KERNEL_TAG_TYPE_DUMMYNET = 1, + KERNEL_TAG_TYPE_DIVERT = 2, + KERNEL_TAG_TYPE_IPFORWARD = 3, + KERNEL_TAG_TYPE_IPFILT = 4 +}; + +/* + * As a temporary and low impact solution to replace the even uglier + * approach used so far in some parts of the network stack (which relies + * on global variables), packet tag-like annotations are stored in MT_TAG + * mbufs (or lookalikes) prepended to the actual mbuf chain. + * + * m_type = MT_TAG + * m_flags = m_tag_id + * m_next = next buffer in chain. + * + * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines. + */ +#define _m_tag_id m_hdr.mh_flags + +__BEGIN_DECLS + +/* Packet tag routines */ +struct m_tag *m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait); +void m_tag_free(struct m_tag *); +void m_tag_prepend(struct mbuf *, struct m_tag *); +void m_tag_unlink(struct mbuf *, struct m_tag *); +void m_tag_delete(struct mbuf *, struct m_tag *); +void m_tag_delete_chain(struct mbuf *, struct m_tag *); +struct m_tag *m_tag_locate(struct mbuf *,u_int32_t id, u_int16_t type, + struct m_tag *); +struct m_tag *m_tag_copy(struct m_tag *, int wait); +int m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int wait); +void m_tag_init(struct mbuf *); +struct m_tag *m_tag_first(struct mbuf *); +struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); + +__END_DECLS + +#endif /* KERNEL */ + +#endif /* KERNEL_PRIVATE */ +#ifdef KERNEL +#include #endif -#endif /* __APPLE_API_UNSTABLE */ #endif /* !_SYS_MBUF_H_ */ diff --git a/bsd/sys/md5.h b/bsd/sys/md5.h index 5ae59a40b..f825f0aaa 100644 --- a/bsd/sys/md5.h +++ b/bsd/sys/md5.h @@ -47,7 +47,7 @@ char * MD5End(MD5_CTX *, char *); char * MD5File(const char *, char *); char * MD5Data(const unsigned char *, unsigned int, char *); #ifdef KERNEL -void MD5Transform __P((u_int32_t [4], const unsigned char [64])); +void MD5Transform(u_int32_t [4], const unsigned char [64]); #endif __END_DECLS #endif /* !KERNEL || __APPLE_API_PRIVATE */ diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h index 7907a55b7..aeaab5a7f 100644 --- a/bsd/sys/mman.h +++ b/bsd/sys/mman.h @@ -55,127 +55,185 @@ * @(#)mman.h 8.1 (Berkeley) 6/2/93 */ +/* + * Currently unsupported: + * + * [TYM] POSIX_TYPED_MEM_ALLOCATE + * [TYM] POSIX_TYPED_MEM_ALLOCATE_CONTIG + * [TYM] POSIX_TYPED_MEM_MAP_ALLOCATABLE + * [TYM] struct posix_typed_mem_info + * [TYM] posix_mem_offset() + * [TYM] posix_typed_mem_get_info() + * [TYM] posix_typed_mem_open() + */ + #ifndef _SYS_MMAN_H_ #define _SYS_MMAN_H_ #include -#include +#include + +#include + +/* + * [various] The mode_t, off_t, and size_t types shall be defined as + * described in + */ +#ifndef _MODE_T +typedef __darwin_mode_t mode_t; +#define _MODE_T +#endif + +#ifndef _OFF_T +typedef __darwin_off_t off_t; +#define _OFF_T +#endif + +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + /* * Protections are chosen from these bits, or-ed together */ -#define PROT_NONE 0x00 /* no permissions */ -#define PROT_READ 0x01 /* pages can be read */ -#define PROT_WRITE 0x02 /* pages can be written */ -#define PROT_EXEC 0x04 /* pages can be executed */ +#define PROT_NONE 0x00 /* [MC2] no permissions */ +#define PROT_READ 0x01 /* [MC2] pages can be read */ +#define PROT_WRITE 0x02 /* [MC2] pages can be written */ +#define PROT_EXEC 0x04 /* [MC2] pages can be executed */ /* * Flags contain sharing type and options. * Sharing types; choose one. */ -#define MAP_SHARED 0x0001 /* share changes */ -#define MAP_PRIVATE 0x0002 /* changes are private */ +#define MAP_SHARED 0x0001 /* [MF|SHM] share changes */ +#define MAP_PRIVATE 0x0002 /* [MF|SHM] changes are private */ +#ifndef _POSIX_C_SOURCE #define MAP_COPY MAP_PRIVATE /* Obsolete */ +#endif /* !_POSIX_C_SOURCE */ /* * Other flags */ -#define MAP_FIXED 0x0010 /* map addr must be exactly as requested */ +#define MAP_FIXED 0x0010 /* [MF|SHM] interpret addr exactly */ +#ifndef _POSIX_C_SOURCE #define MAP_RENAME 0x0020 /* Sun: rename private pages to file */ #define MAP_NORESERVE 0x0040 /* Sun: don't reserve needed swap area */ #define MAP_RESERVED0080 0x0080 /* previously unimplemented MAP_INHERIT */ #define MAP_NOEXTEND 0x0100 /* for MAP_FILE, don't change file size */ #define MAP_HASSEMAPHORE 0x0200 /* region may contain semaphores */ +#endif /* !_POSIX_C_SOURCE */ -#ifdef _P1003_1B_VISIBLE /* * Process memory locking */ -#define MCL_CURRENT 0x0001 /* Lock only current memory */ -#define MCL_FUTURE 0x0002 /* Lock all future memory as well */ - -#endif /* _P1003_1B_VISIBLE */ +#define MCL_CURRENT 0x0001 /* [ML] Lock only current memory */ +#define MCL_FUTURE 0x0002 /* [ML] Lock all future memory as well */ /* * Error return from mmap() */ -#define MAP_FAILED ((void *)-1) +#define MAP_FAILED ((void *)-1) /* [MF|SHM] mmap failed */ /* * msync() flags */ -#define MS_SYNC 0x0000 /* msync synchronously */ -#define MS_ASYNC 0x0001 /* return immediately */ -#define MS_INVALIDATE 0x0002 /* invalidate all cached data */ +#define MS_ASYNC 0x0001 /* [MF|SIO] return immediately */ +#define MS_INVALIDATE 0x0002 /* [MF|SIO] invalidate all cached data */ +#define MS_SYNC 0x0010 /* [MF|SIO] msync synchronously */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define MS_KILLPAGES 0x0004 /* invalidate pages, leave mapped */ #define MS_DEACTIVATE 0x0008 /* deactivate pages, leave mapped */ -#endif /* * Mapping type */ #define MAP_FILE 0x0000 /* map from file (default) */ #define MAP_ANON 0x1000 /* allocated from memory, swap space */ +#endif /* !_POSIX_C_SOURCE */ + /* * Advice to madvise */ -#define MADV_NORMAL 0 /* no further special treatment */ -#define MADV_RANDOM 1 /* expect random page references */ -#define MADV_SEQUENTIAL 2 /* expect sequential page references */ -#define MADV_WILLNEED 3 /* will need these pages */ -#define MADV_DONTNEED 4 /* dont need these pages */ -#define MADV_FREE 5 /* dont need these pages, and junk contents */ -#define POSIX_MADV_NORMAL MADV_NORMAL -#define POSIX_MADV_RANDOM MADV_RANDOM -#define POSIX_MADV_SEQUENTIAL MADV_SEQUENTIAL -#define POSIX_MADV_WILLNEED MADV_WILLNEED -#define POSIX_MADV_DONTNEED MADV_DONTNEED +#define POSIX_MADV_NORMAL 0 /* [MC1] no further special treatment */ +#define POSIX_MADV_RANDOM 1 /* [MC1] expect random page refs */ +#define POSIX_MADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */ +#define POSIX_MADV_WILLNEED 3 /* [MC1] will need these pages */ +#define POSIX_MADV_DONTNEED 4 /* [MC1] dont need these pages */ + +#ifndef _POSIX_C_SOURCE +#define MADV_NORMAL POSIX_MADV_NORMAL +#define MADV_RANDOM POSIX_MADV_RANDOM +#define MADV_SEQUENTIAL POSIX_MADV_SEQUENTIAL +#define MADV_WILLNEED POSIX_MADV_WILLNEED +#define MADV_DONTNEED POSIX_MADV_DONTNEED +#define MADV_FREE 5 /* pages unneeded, discard contents */ /* * Return bits from mincore */ -#define MINCORE_INCORE 0x1 /* Page is incore */ -#define MINCORE_REFERENCED 0x2 /* Page has been referenced by us */ -#define MINCORE_MODIFIED 0x4 /* Page has been modified by us */ -#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */ -#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */ +#define MINCORE_INCORE 0x1 /* Page is incore */ +#define MINCORE_REFERENCED 0x2 /* Page has been referenced by us */ +#define MINCORE_MODIFIED 0x4 /* Page has been modified by us */ +#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */ +#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */ +#endif /* !_POSIX_C_SOURCE */ -#ifndef KERNEL -#include +#ifndef KERNEL __BEGIN_DECLS -#ifdef _P1003_1B_VISIBLE -int mlockall __P((int)); -int munlockall __P((void)); -#endif /* _P1003_1B_VISIBLE */ -int mlock __P((const void *, size_t)); -#ifndef _MMAP_DECLARED -#define _MMAP_DECLARED -void * mmap __P((void *, size_t, int, int, int, off_t)); +/* [ML] */ +int mlockall(int); +int munlockall(void); +/* [MR] */ +int mlock(const void *, size_t); +#ifndef _MMAP +#define _MMAP +/* [MC3]*/ +void * mmap(void *, size_t, int, int, int, off_t) __DARWIN_ALIAS(mmap); #endif -int mprotect __P((const void *, size_t, int)); -int msync __P((void *, size_t, int)); -int munlock __P((const void *, size_t)); -int munmap __P((void *, size_t)); -int shm_open __P((const char *, int, ...)); -int shm_unlink __P((const char *)); -int posix_madvise __P((void *, size_t, int)); -#ifndef _POSIX_SOURCE -#ifdef __APPLE_API_PRIVATE -int load_shared_file __P((char *, caddr_t, u_long, - caddr_t *, int, sf_mapping_t *, int *)); -int reset_shared_file __P((caddr_t *, int, sf_mapping_t *)); -int new_system_shared_regions __P((void)); -#endif /* __APPLE_API_PRIVATE */ -int madvise __P((void *, size_t, int)); -int mincore __P((const void *, size_t, char *)); -int minherit __P((void *, size_t, int)); +/* [MPR] */ +int mprotect(void *, size_t, int) __DARWIN_ALIAS(mprotect); +/* [MF|SIO] */ +int msync(void *, size_t, int) __DARWIN_ALIAS(msync); +/* [MR] */ +int munlock(const void *, size_t); +/* [MC3]*/ +int munmap(void *, size_t) __DARWIN_ALIAS(munmap); +/* [SHM] */ +int shm_open(const char *, int, ...); +int shm_unlink(const char *); +/* [ADV] */ +int posix_madvise(void *, size_t, int); + +#ifndef _POSIX_C_SOURCE +int madvise(void *, size_t, int); +int mincore(const void *, size_t, char *); +int minherit(void *, size_t, int); #endif __END_DECLS -#endif /* !KERNEL */ +#else /* KERNEL */ + +void pshm_cache_init(void); /* for bsd_init() */ + +/* + * XXX routine exported by posix_shm.c, but never used there, only used in + * XXX kern_mman.c in the implementation of mmap(). + */ +struct mmap_args; +struct fileproc; +int pshm_mmap(struct proc *p, struct mmap_args *uap, user_addr_t *retval, + struct fileproc *fp, off_t pageoff); +/* Really need to overhaul struct fileops to avoid this... */ +struct pshmnode; +int pshm_stat(struct pshmnode *pnode, struct stat *sb); +struct fileproc; +int pshm_truncate(struct proc *p, struct fileproc *fp, int fd, off_t length, register_t *retval); + +#endif /* KERNEL */ #endif /* !_SYS_MMAN_H_ */ diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index 539742c58..58d5cc0ea 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,28 +59,19 @@ #define _SYS_MOUNT_H_ #include +#include +#include /* needed for vol_capabilities_attr_t */ + #ifndef KERNEL +#include #include +#include /* XXX needed for user builds */ +#else +#include #endif -#include -#include -#include -#include /* XXX for AF_MAX */ typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ -/* - * File identifier. - * These are unique per filesystem on a single machine. - */ -#define MAXFIDSZ 16 - -struct fid { - u_short fid_len; /* length of data in bytes */ - u_short fid_reserved; /* force longword alignment */ - char fid_data[MAXFIDSZ]; /* data (variable length) */ -}; - /* * file system statistics */ @@ -88,6 +79,9 @@ struct fid { #define MFSNAMELEN 15 /* length of fs type name, not inc. null */ #define MNAMELEN 90 /* length of buffer for returned name */ +/* + * LP64 - WARNING - must be kept in sync with struct user_statfs in mount_internal.h. + */ struct statfs { short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ @@ -116,38 +110,116 @@ struct statfs { #endif }; -#ifdef __APPLE_API_PRIVATE + +#define MFSTYPENAMELEN 16 /* length of fs type name including null */ + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + +struct vfsstatfs { + uint32_t f_bsize; /* fundamental file system block size */ + size_t f_iosize; /* optimal transfer block size */ + uint64_t f_blocks; /* total data blocks in file system */ + uint64_t f_bfree; /* free blocks in fs */ + uint64_t f_bavail; /* free blocks avail to non-superuser */ + uint64_t f_bused; /* free blocks avail to non-superuser */ + uint64_t f_files; /* total file nodes in file system */ + uint64_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + uint64_t f_flags; /* copy of mount exported flags */ + char f_fstypename[MFSTYPENAMELEN];/* fs type name inclus */ + char f_mntonname[MAXPATHLEN];/* directory on which mounted */ + char f_mntfromname[MAXPATHLEN];/* mounted filesystem */ + uint32_t f_fssubtype; /* fs sub-type (flavor) */ + void *f_reserved[2]; /* For future use == 0 */ +}; + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + +#define VFSATTR_INIT(s) ((s)->f_supported = (s)->f_active = 0LL) +#define VFSATTR_SET_SUPPORTED(s, a) ((s)->f_supported |= VFSATTR_ ## a) +#define VFSATTR_IS_SUPPORTED(s, a) ((s)->f_supported & VFSATTR_ ## a) +#define VFSATTR_CLEAR_ACTIVE(s, a) ((s)->f_active &= ~VFSATTR_ ## a) +#define VFSATTR_IS_ACTIVE(s, a) ((s)->f_active & VFSATTR_ ## a) +#define VFSATTR_ALL_SUPPORTED(s) (((s)->f_active & (s)->f_supported) == (s)->f_active) +#define VFSATTR_WANTED(s, a) ((s)->f_active |= VFSATTR_ ## a) +#define VFSATTR_RETURN(s, a, x) do { (s)-> a = (x); VFSATTR_SET_SUPPORTED(s, a);} while(0) + +#define VFSATTR_f_objcount (1LL<< 0) +#define VFSATTR_f_filecount (1LL<< 1) +#define VFSATTR_f_dircount (1LL<< 2) +#define VFSATTR_f_maxobjcount (1LL<< 3) +#define VFSATTR_f_bsize (1LL<< 4) +#define VFSATTR_f_iosize (1LL<< 5) +#define VFSATTR_f_blocks (1LL<< 6) +#define VFSATTR_f_bfree (1LL<< 7) +#define VFSATTR_f_bavail (1LL<< 8) +#define VFSATTR_f_bused (1LL<< 9) +#define VFSATTR_f_files (1LL<< 10) +#define VFSATTR_f_ffree (1LL<< 11) +#define VFSATTR_f_fsid (1LL<< 12) +#define VFSATTR_f_owner (1LL<< 13) +#define VFSATTR_f_capabilities (1LL<< 14) +#define VFSATTR_f_attributes (1LL<< 15) +#define VFSATTR_f_create_time (1LL<< 16) +#define VFSATTR_f_modify_time (1LL<< 17) +#define VFSATTR_f_access_time (1LL<< 18) +#define VFSATTR_f_backup_time (1LL<< 19) +#define VFSATTR_f_fssubtype (1LL<< 20) +#define VFSATTR_f_vol_name (1LL<< 21) +#define VFSATTR_f_signature (1LL<< 22) +#define VFSATTR_f_carbon_fsid (1LL<< 23) + /* - * Structure per mounted file system. Each mounted file system has an - * array of operations and an instance record. The file systems are - * put on a doubly linked list. + * New VFS_STAT argument structure. */ -LIST_HEAD(vnodelst, vnode); - -struct mount { - CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */ - struct vfsops *mnt_op; /* operations on fs */ - struct vfsconf *mnt_vfc; /* configuration info */ - struct vnode *mnt_vnodecovered; /* vnode we mounted on */ - struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ - struct lock__bsd__ mnt_lock; /* mount structure lock */ - int mnt_flag; /* flags */ - int mnt_kern_flag; /* kernel only flags */ - int mnt_maxsymlinklen; /* max size of short symlink */ - struct statfs mnt_stat; /* cache of filesystem stats */ - qaddr_t mnt_data; /* private data */ - /* Cached values of the IO constraints for the device */ - union { - u_int32_t mntu_maxreadcnt; /* Max. byte count for read */ - void *mntu_xinfo_ptr; /* points at extended IO constraints */ - } mnt_un; /* if MNTK_IO_XINFO is set */ -#define mnt_maxreadcnt mnt_un.mntu_maxreadcnt -#define mnt_xinfo_ptr mnt_un.mntu_xinfo_ptr - u_int32_t mnt_maxwritecnt; /* Max. byte count for write */ - u_int16_t mnt_segreadcnt; /* Max. segment count for read */ - u_int16_t mnt_segwritecnt; /* Max. segment count for write */ +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + +struct vfs_attr { + uint64_t f_supported; + uint64_t f_active; + + uint64_t f_objcount; /* number of filesystem objects in volume */ + uint64_t f_filecount; /* ... files */ + uint64_t f_dircount; /* ... directories */ + uint64_t f_maxobjcount; /* maximum number of filesystem objects */ + + uint32_t f_bsize; /* block size for the below size values */ + size_t f_iosize; /* optimal transfer block size */ + uint64_t f_blocks; /* total data blocks in file system */ + uint64_t f_bfree; /* free blocks in fs */ + uint64_t f_bavail; /* free blocks avail to non-superuser */ + uint64_t f_bused; /* blocks in use */ + uint64_t f_files; /* total file nodes in file system */ + uint64_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + + vol_capabilities_attr_t f_capabilities; + vol_attributes_attr_t f_attributes; + + struct timespec f_create_time; /* creation time */ + struct timespec f_modify_time; /* last modification time */ + struct timespec f_access_time; /* time of last access */ + struct timespec f_backup_time; /* last backup time */ + + uint32_t f_fssubtype; /* filesystem subtype */ + + char *f_vol_name; /* volume name */ + + uint16_t f_signature; /* used for ATTR_VOL_SIGNATURE, Carbon's FSVolumeInfo.signature */ + uint16_t f_carbon_fsid; /* same as Carbon's FSVolumeInfo.filesystemID */ }; -#endif /* __APPLE_API_PRIVATE */ + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif /* * User specifiable flags. @@ -162,18 +234,20 @@ struct mount { #define MNT_UNION 0x00000020 /* union with underlying filesystem */ #define MNT_ASYNC 0x00000040 /* file system written asynchronously */ #define MNT_DONTBROWSE 0x00100000 /* file system is not appropriate path to user data */ -#define MNT_UNKNOWNPERMISSIONS 0x00200000 /* no known mapping for uid/gid in permissions information on disk */ +#define MNT_IGNORE_OWNERSHIP 0x00200000 /* VFS will ignore ownership information on filesystem + * objects */ #define MNT_AUTOMOUNTED 0x00400000 /* filesystem was mounted by automounter */ #define MNT_JOURNALED 0x00800000 /* filesystem is journaled */ +#define MNT_NOUSERXATTR 0x01000000 /* Don't allow user extended attributes */ +#define MNT_DEFWRITE 0x02000000 /* filesystem should defer writes */ + +/* backwards compatibility only */ +#define MNT_UNKNOWNPERMISSIONS MNT_IGNORE_OWNERSHIP /* * NFS export related mount flags. */ -#define MNT_EXRDONLY 0x00000080 /* exported read only */ #define MNT_EXPORTED 0x00000100 /* file system is exported */ -#define MNT_DEFEXPORTED 0x00000200 /* exported to the world */ -#define MNT_EXPORTANON 0x00000400 /* use anon uid mapping for everyone */ -#define MNT_EXKERB 0x00000800 /* exported with Kerberos uid mapping */ /* * Flags set by internal operations. @@ -182,7 +256,6 @@ struct mount { #define MNT_QUOTA 0x00002000 /* quotas are enabled on filesystem */ #define MNT_ROOTFS 0x00004000 /* identifies the root filesystem */ #define MNT_DOVOLFS 0x00008000 /* FS supports volfs */ -#define MNT_FIXEDSCRIPTENCODING 0x10000000 /* FS supports only fixed script encoding [HFS] */ /* * XXX I think that this could now become (~(MNT_CMDFLAGS)) @@ -190,11 +263,11 @@ struct mount { */ #define MNT_VISFLAGMASK (MNT_RDONLY | MNT_SYNCHRONOUS | MNT_NOEXEC | \ MNT_NOSUID | MNT_NODEV | MNT_UNION | \ - MNT_ASYNC | MNT_EXRDONLY | MNT_EXPORTED | \ - MNT_DEFEXPORTED | MNT_EXPORTANON| MNT_EXKERB | \ + MNT_ASYNC | MNT_EXPORTED | \ MNT_LOCAL | MNT_QUOTA | \ MNT_ROOTFS | MNT_DOVOLFS | MNT_DONTBROWSE | \ - MNT_UNKNOWNPERMISSIONS | MNT_AUTOMOUNTED | MNT_JOURNALED | MNT_FIXEDSCRIPTENCODING ) + MNT_UNKNOWNPERMISSIONS | MNT_AUTOMOUNTED | MNT_JOURNALED | \ + MNT_DEFWRITE) /* * External filesystem command modifier flags. * Unmount can use the MNT_FORCE flag. @@ -202,28 +275,12 @@ struct mount { * External filesystem control flags. */ #define MNT_UPDATE 0x00010000 /* not a real mount, just an update */ -#define MNT_DELEXPORT 0x00020000 /* delete export host lists */ #define MNT_RELOAD 0x00040000 /* reload filesystem data */ #define MNT_FORCE 0x00080000 /* force unmount or readonly change */ -#define MNT_CMDFLAGS (MNT_UPDATE|MNT_DELEXPORT|MNT_RELOAD|MNT_FORCE) +#define MNT_CMDFLAGS (MNT_UPDATE|MNT_RELOAD|MNT_FORCE) + + -/* - * Internal filesystem control flags stored in mnt_kern_flag. - * - * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed - * past the mount point. This keeps the subtree stable during mounts - * and unmounts. - */ -#define MNTK_VIRTUALDEV 0x00200000 /* mounted on a virtual device i.e. a disk image */ -#define MNTK_ROOTDEV 0x00400000 /* this filesystem resides on the same device as the root */ -#define MNTK_IO_XINFO 0x00800000 /* mnt_un.mntu_ioptr has a malloc associated with it */ -#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ -#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ -#define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ -#if REV_ENDIAN_FS -#define MNT_REVEND 0x08000000 /* Reverse endian FS */ -#endif /* REV_ENDIAN_FS */ -#define MNTK_FRCUNMOUNT 0x10000000 /* Forced unmount wanted. */ /* * Sysctl CTL_VFS definitions. * @@ -240,8 +297,7 @@ struct mount { #define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ #define VFS_CONF 2 /* struct: vfsconf for filesystem given as next argument */ -#define VFS_FMOD_WATCH 3 /* block waiting for the next modified file */ -#define VFS_FMOD_WATCH_ENABLE 4 /* 1==enable, 0==disable */ +#define VFS_SET_PACKAGE_EXTS 3 /* set package extension list */ /* * Flags for various system call interfaces. @@ -251,46 +307,24 @@ struct mount { #define MNT_WAIT 1 /* synchronously wait for I/O to complete */ #define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ -/* - * Generic file handle - */ -struct fhandle { - fsid_t fh_fsid; /* File system id of mount point */ - struct fid fh_fid; /* File sys specific id */ -}; -typedef struct fhandle fhandle_t; -/* - * Export arguments for local filesystem mount calls. - */ -struct export_args { - int ex_flags; /* export related flags */ - uid_t ex_root; /* mapping for root uid */ - struct ucred ex_anon; /* mapping for anonymous user */ - struct sockaddr *ex_addr; /* net address to which exported */ - int ex_addrlen; /* and the net address length */ - struct sockaddr *ex_mask; /* mask of valid bits in saddr */ - int ex_masklen; /* and the smask length */ -}; +#ifndef KERNEL +struct mount; +typedef struct mount * mount_t; +struct vnode; +typedef struct vnode * vnode_t; +#endif -#ifdef __APPLE_API_UNSTABLE -/* - * Filesystem configuration information. One of these exists for each - * type of filesystem supported by the kernel. These are searched at - * mount time to identify the requested filesystem. - */ struct vfsconf { struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ - int (*vfc_mountroot)(void); /* if != NULL, routine to mount root */ + int (*vfc_mountroot)(mount_t, vnode_t); /* if != NULL, routine to mount root */ struct vfsconf *vfc_next; /* next in list */ }; -#endif /*__APPLE_API_UNSTABLE */ - struct vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on. */ @@ -299,9 +333,45 @@ struct vfsidctl { u_int32_t vc_spare[12]; /* spare (must be zero). */ }; + /* vfsidctl API version. */ #define VFS_CTL_VERS1 0x01 +#ifdef KERNEL +// LP64todo - should this move? + +/* LP64 version of vfsconf. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with vfsconf + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_vfsconf { + user_addr_t vfc_vfsops; /* filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* filesystem type name */ + int vfc_typenum; /* historic filesystem type number */ + int vfc_refcount; /* number mounted of this type */ + int vfc_flags; /* permanent flags */ + user_addr_t vfc_mountroot; /* if != NULL, routine to mount root */ + user_addr_t vfc_next; /* next in list */ +}; + +struct user_vfsidctl { + int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ + fsid_t vc_fsid; /* fsid to operate on. */ + user_addr_t vc_ptr; /* pointer to data structure. */ + user_size_t vc_len; /* sizeof said structure. */ + u_int32_t vc_spare[12]; /* spare (must be zero). */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL */ + /* * New style VFS sysctls, do not reuse/conflict with the namespace for * private sysctls. @@ -325,10 +395,9 @@ struct vfsquery { #define VQ_MOUNT 0x0008 /* new filesystem arrived */ #define VQ_UNMOUNT 0x0010 /* filesystem has left */ #define VQ_DEAD 0x0020 /* filesystem is dead, needs force unmount */ -#define VQ_ASSIST 0x0040 /* filesystem needs assistance from external - program */ +#define VQ_ASSIST 0x0040 /* filesystem needs assistance from external program */ #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ -#define VQ_FLAG0100 0x0100 /* placeholder */ +#define VQ_UPDATE 0x0100 /* filesystem information has changed */ #define VQ_FLAG0200 0x0200 /* placeholder */ #define VQ_FLAG0400 0x0400 /* placeholder */ #define VQ_FLAG0800 0x0800 /* placeholder */ @@ -337,128 +406,176 @@ struct vfsquery { #define VQ_FLAG4000 0x4000 /* placeholder */ #define VQ_FLAG8000 0x8000 /* placeholder */ -#ifdef KERNEL -/* Point a sysctl request at a vfsidctl's data. */ -#define VCTLTOREQ(vc, req) \ - do { \ - (req)->newptr = (vc)->vc_ptr; \ - (req)->newlen = (vc)->vc_len; \ - (req)->newidx = 0; \ - } while (0) -#endif #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE -extern int maxvfsconf; /* highest defined filesystem type */ -extern struct vfsconf *vfsconf; /* head of list of filesystem types */ -extern int maxvfsslots; /* Maximum slots available to be used */ -extern int numused_vfsslots; /* number of slots already used */ -int vfsconf_add __P((struct vfsconf *)); -int vfsconf_del __P((char *)); +/* Structure for setting device IO parameters per mount point */ +struct vfsioattr { + u_int32_t io_maxreadcnt; /* Max. byte count for read */ + u_int32_t io_maxwritecnt; /* Max. byte count for write */ + u_int32_t io_segreadcnt; /* Max. segment count for read */ + u_int32_t io_segwritecnt; /* Max. segment count for write */ + u_int32_t io_maxsegreadsize; /* Max. segment read size */ + u_int32_t io_maxsegwritesize; /* Max. segment write size */ + u_int32_t io_devblocksize; /* the underlying device block size */ + void * io_reserved[3]; /* extended attribute information */ +}; + /* - * Operations supported on mounted file system. + * Filesystem Registration information */ -#ifdef __STDC__ -struct nameidata; -struct mbuf; -#endif + +#define VFS_TBLTHREADSAFE 0x01 +#define VFS_TBLFSNODELOCK 0x02 +#define VFS_TBLNOTYPENUM 0x08 +#define VFS_TBLLOCALVOL 0x10 +#define VFS_TBL64BITREADY 0x20 + +struct vfs_fsentry { + struct vfsops * vfe_vfsops; /* vfs operations */ + int vfe_vopcnt; /* # of vnodeopv_desc being registered (reg, spec, fifo ...) */ + struct vnodeopv_desc ** vfe_opvdescs; /* null terminated; */ + int vfe_fstypenum; /* historic filesystem type number */ + char vfe_fsname[MFSNAMELEN]; /* filesystem type name */ + uint32_t vfe_flags; /* defines the FS capabilities */ + void * vfe_reserv[2]; /* reserved for future use; set this to zero*/ + }; + + struct vfsops { - int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, - struct nameidata *ndp, struct proc *p)); - int (*vfs_start) __P((struct mount *mp, int flags, - struct proc *p)); - int (*vfs_unmount) __P((struct mount *mp, int mntflags, - struct proc *p)); - int (*vfs_root) __P((struct mount *mp, struct vnode **vpp)); - int (*vfs_quotactl) __P((struct mount *mp, int cmds, uid_t uid, - caddr_t arg, struct proc *p)); - int (*vfs_statfs) __P((struct mount *mp, struct statfs *sbp, - struct proc *p)); - int (*vfs_sync) __P((struct mount *mp, int waitfor, - struct ucred *cred, struct proc *p)); - int (*vfs_vget) __P((struct mount *mp, void *ino, - struct vnode **vpp)); - int (*vfs_fhtovp) __P((struct mount *mp, struct fid *fhp, - struct mbuf *nam, struct vnode **vpp, - int *exflagsp, struct ucred **credanonp)); - int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); - int (*vfs_init) __P((struct vfsconf *)); - int (*vfs_sysctl) __P((int *, u_int, void *, size_t *, void *, - size_t, struct proc *)); + int (*vfs_mount)(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); + int (*vfs_start)(struct mount *mp, int flags, vfs_context_t context); + int (*vfs_unmount)(struct mount *mp, int mntflags, vfs_context_t context); + int (*vfs_root)(struct mount *mp, struct vnode **vpp, vfs_context_t context); + int (*vfs_quotactl)(struct mount *mp, int cmds, uid_t uid, caddr_t arg, vfs_context_t context); + int (*vfs_getattr)(struct mount *mp, struct vfs_attr *, vfs_context_t context); +/* int (*vfs_statfs)(struct mount *mp, struct vfsstatfs *sbp, vfs_context_t context);*/ + int (*vfs_sync)(struct mount *mp, int waitfor, vfs_context_t context); + int (*vfs_vget)(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); + int (*vfs_fhtovp)(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, + vfs_context_t context); + int (*vfs_vptofh)(struct vnode *vp, int *fhlen, unsigned char *fhp, vfs_context_t context); + int (*vfs_init)(struct vfsconf *); + int (*vfs_sysctl)(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t context); + int (*vfs_setattr)(struct mount *mp, struct vfs_attr *, vfs_context_t context); + void *vfs_reserved[7]; }; -#define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ - (*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P) -#define VFS_START(MP, FLAGS, P) (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P) -#define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P) -#define VFS_ROOT(MP, VPP) (*(MP)->mnt_op->vfs_root)(MP, VPP) -#define VFS_QUOTACTL(MP,C,U,A,P) (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P) -#define VFS_STATFS(MP, SBP, P) (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P) -#define VFS_SYNC(MP, WAIT, C, P) (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P) -#define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP) -#define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \ - (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED) -#define VFS_VPTOFH(VP, FIDP) (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP) /* - * Network address lookup element + * flags passed into vfs_iterate */ -struct netcred { - struct radix_node netc_rnodes[2]; - int netc_exflags; - struct ucred netc_anon; -}; /* - * Network export information + * return values from callback */ -struct netexport { - struct netcred ne_defexported; /* Default export */ - struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */ -}; +#define VFS_RETURNED 0 /* done with vnode, reference can be dropped */ +#define VFS_RETURNED_DONE 1 /* done with vnode, reference can be dropped, terminate iteration */ +#define VFS_CLAIMED 2 /* don't drop reference */ +#define VFS_CLAIMED_DONE 3 /* don't drop reference, terminate iteration */ + +__BEGIN_DECLS /* - * exported vnode operations + * prototypes for exported VFS operations */ -int vfs_busy __P((struct mount *, int, struct slock *, struct proc *)); -int vfs_export __P((struct mount *, struct netexport *, - struct export_args *)); -struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *, - struct mbuf *)); -void vfs_getnewfsid __P((struct mount *)); -struct mount *vfs_getvfs __P((fsid_t *)); -int vfs_mountedon __P((struct vnode *)); -void vfs_unbusy __P((struct mount *, struct proc *)); -#ifdef __APPLE_API_PRIVATE -int vfs_mountroot __P((void)); -int vfs_rootmountalloc __P((char *, char *, struct mount **)); -void vfs_unmountall __P((void)); -int safedounmount(struct mount *, int, struct proc *); -int dounmount(struct mount *, int, struct proc *); +extern int VFS_MOUNT(mount_t, vnode_t, user_addr_t, vfs_context_t); +extern int VFS_START(mount_t, int, vfs_context_t); +extern int VFS_UNMOUNT(mount_t, int, vfs_context_t); +extern int VFS_ROOT(mount_t, vnode_t *, vfs_context_t); +extern int VFS_QUOTACTL(mount_t, int, uid_t, caddr_t, vfs_context_t); +extern int VFS_SYNC(mount_t, int, vfs_context_t); +extern int VFS_VGET(mount_t, ino64_t, vnode_t *, vfs_context_t); +extern int VFS_FHTOVP(mount_t, int, unsigned char *, vnode_t *, vfs_context_t); +extern int VFS_VPTOFH(vnode_t, int *, unsigned char *, vfs_context_t); + +/* The file system registrartion KPI */ +int vfs_fsadd(struct vfs_fsentry *, vfstable_t *); +int vfs_fsremove(vfstable_t); +int vfs_iterate(int, int (*)(struct mount *, void *), void *); + +uint64_t vfs_flags(mount_t); +void vfs_setflags(mount_t, uint64_t); +void vfs_clearflags(mount_t, uint64_t); + +int vfs_issynchronous(mount_t); +int vfs_iswriteupgrade(mount_t); +int vfs_isupdate(mount_t); +int vfs_isreload(mount_t); +int vfs_isforce(mount_t); +int vfs_isrdonly(mount_t); +int vfs_isrdwr(mount_t); +int vfs_authopaque(mount_t); +int vfs_authopaqueaccess(mount_t); +void vfs_setauthopaque(mount_t); +void vfs_setauthopaqueaccess(mount_t); +void vfs_clearauthopaque(mount_t); +void vfs_clearauthopaqueaccess(mount_t); +int vfs_extendedsecurity(mount_t); +void vfs_setextendedsecurity(mount_t); +void vfs_clearextendedsecurity(mount_t); +void vfs_setlocklocal(mount_t); + + + +uint32_t vfs_maxsymlen(mount_t); +void vfs_setmaxsymlen(mount_t, uint32_t); +void * vfs_fsprivate(mount_t); +void vfs_setfsprivate(mount_t, void *mntdata); + +struct vfsstatfs * vfs_statfs(mount_t); +int vfs_update_vfsstat(mount_t, vfs_context_t); +int vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); +int vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); + +int vfs_typenum(mount_t); +void vfs_name(mount_t, char *); +int vfs_devblocksize(mount_t); +void vfs_ioattr(mount_t, struct vfsioattr *); +void vfs_setioattr(mount_t, struct vfsioattr *); +int vfs_64bitready(mount_t); + + +int vfs_busy(mount_t, int); +void vfs_unbusy(mount_t); + +void vfs_getnewfsid(struct mount *); +mount_t vfs_getvfs(fsid_t *); +mount_t vfs_getvfs_by_mntonname(u_char *); +int vfs_mountedon(struct vnode *); + void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); void vfs_event_init(void); -#endif /* __APPLE_API_PRIVATE */ -extern CIRCLEQ_HEAD(mntlist, mount) mountlist; -extern struct slock mountlist_slock; +__END_DECLS + +#endif /* KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ -#else /* !KERNEL */ +#ifndef KERNEL + +/* + * Generic file handle + */ +#define NFS_MAX_FH_SIZE 64 +#define NFSV2_MAX_FH_SIZE 32 +struct fhandle { + int fh_len; /* length of file handle */ + unsigned char fh_data[NFS_MAX_FH_SIZE]; /* file handle value */ +}; +typedef struct fhandle fhandle_t; -#include __BEGIN_DECLS -int fhopen __P((const struct fhandle *, int)); -int fstatfs __P((int, struct statfs *)); -int getfh __P((const char *, fhandle_t *)); -int getfsstat __P((struct statfs *, long, int)); -int getmntinfo __P((struct statfs **, int)); -int mount __P((const char *, const char *, int, void *)); -int statfs __P((const char *, struct statfs *)); -int unmount __P((const char *, int)); -int getvfsbyname __P((const char *, struct vfsconf *)); +int fhopen(const struct fhandle *, int); +int fstatfs(int, struct statfs *); +int getfh(const char *, fhandle_t *); +int getfsstat(struct statfs *, int, int); +int getmntinfo(struct statfs **, int); +int mount(const char *, const char *, int, void *); +int statfs(const char *, struct statfs *); +int unmount(const char *, int); +int getvfsbyname(const char *, struct vfsconf *); __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h new file mode 100644 index 000000000..8eacce4ea --- /dev/null +++ b/bsd/sys/mount_internal.h @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mount.h 8.21 (Berkeley) 5/20/95 + */ + +#ifndef _SYS_MOUNT_INTERNAL_H_ +#define _SYS_MOUNT_INTERNAL_H_ + +#include +#ifndef KERNEL +#include +#else +#include +#include +#endif +#include +#include +#include +#include /* XXX for AF_MAX */ +#include /* XXX for AF_MAX */ +#include +#include + +/* + * Structure per mounted file system. Each mounted file system has an + * array of operations and an instance record. The file systems are + * put on a doubly linked list. + */ +TAILQ_HEAD(vnodelst, vnode); + +struct mount { + TAILQ_ENTRY(mount) mnt_list; /* mount list */ + int32_t mnt_count; /* reference on the mount */ + lck_mtx_t mnt_mlock; /* mutex that protects mount point */ + struct vfsops *mnt_op; /* operations on fs */ + struct vfstable *mnt_vtable; /* configuration info */ + struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ + struct vnodelst mnt_workerqueue; /* list of vnodes this mount */ + struct vnodelst mnt_newvnodes; /* list of vnodes this mount */ + int mnt_flag; /* flags */ + int mnt_kern_flag; /* kernel only flags */ + int mnt_lflag; /* mount life cycle flags */ + int mnt_maxsymlinklen; /* max size of short symlink */ + struct vfsstatfs mnt_vfsstat; /* cache of filesystem stats */ + qaddr_t mnt_data; /* private data */ + /* Cached values of the IO constraints for the device */ + u_int32_t mnt_maxreadcnt; /* Max. byte count for read */ + u_int32_t mnt_maxwritecnt; /* Max. byte count for write */ + u_int32_t mnt_segreadcnt; /* Max. segment count for read */ + u_int32_t mnt_segwritecnt; /* Max. segment count for write */ + u_int32_t mnt_maxsegreadsize; /* Max. segment read size */ + u_int32_t mnt_maxsegwritesize; /* Max. segment write size */ + u_int32_t mnt_devblocksize; /* the underlying device block size */ + lck_rw_t mnt_rwlock; /* mutex readwrite lock */ + lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ + vnode_t mnt_devvp; /* the device mounted on for local file systems */ + int32_t mnt_crossref; /* refernces to cover lookups crossing into mp */ + int32_t mnt_iterref; /* refernces to cover iterations; drained makes it -ve */ + + /* XXX 3762912 hack to support HFS filesystem 'owner' */ + uid_t mnt_fsowner; + gid_t mnt_fsgroup; +}; + +/* XXX 3762912 hack to support HFS filesystem 'owner' */ +#define vfs_setowner(_mp, _uid, _gid) do {(_mp)->mnt_fsowner = (_uid); (_mp)->mnt_fsgroup = (_gid); } while (0) + + +/* mount point to which dead vps point to */ +extern struct mount * dead_mountp; + +/* + * Internal filesystem control flags stored in mnt_kern_flag. + * + * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed + * past the mount point. This keeps the subtree stable during mounts + * and unmounts. + * + * Note: We are counting down on new bit assignments. This is + * because the bits here were broken out from the high bits + * of the mount flags. + */ +#define MNTK_LOCK_LOCAL 0x00100000 /* advisory locking is done above the VFS itself */ +#define MNTK_VIRTUALDEV 0x00200000 /* mounted on a virtual device i.e. a disk image */ +#define MNTK_ROOTDEV 0x00400000 /* this filesystem resides on the same device as the root */ +#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ +#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ +#define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ +#if REV_ENDIAN_FS +#define MNT_REVEND 0x08000000 /* Reverse endian FS */ +#endif /* REV_ENDIAN_FS */ +#define MNTK_FRCUNMOUNT 0x10000000 /* Forced unmount wanted. */ +#define MNTK_AUTH_OPAQUE 0x20000000 /* authorisation decisions are not made locally */ +#define MNTK_AUTH_OPAQUE_ACCESS 0x40000000 /* VNOP_ACCESS is reliable for remote auth */ +#define MNTK_EXTENDED_SECURITY 0x80000000 /* extended security supported */ + +#define MNT_LBUSY 0x00000001 /* mount is busy */ +#define MNT_LUNMOUNT 0x00000002 /* mount in unmount */ +#define MNT_LFORCE 0x00000004 /* mount in forced unmount */ +#define MNT_LDRAIN 0x00000008 /* mount in drain */ +#define MNT_LITER 0x00000010 /* mount in iteration */ +#define MNT_LNEWVN 0x00000020 /* mount has new vnodes created */ +#define MNT_LWAIT 0x00000040 /* wait for unmount op */ +#define MNT_LITERWAIT 0x00000080 /* mount in iteration */ +#define MNT_LDEAD 0x00000100 /* mount already unmounted*/ + + +/* + * Generic file handle + */ +#define NFS_MAX_FH_SIZE 64 +#define NFSV2_MAX_FH_SIZE 32 +struct fhandle { + int fh_len; /* length of file handle */ + unsigned char fh_data[NFS_MAX_FH_SIZE]; /* file handle value */ +}; +typedef struct fhandle fhandle_t; + + + +/* + * Filesystem configuration information. One of these exists for each + * type of filesystem supported by the kernel. These are searched at + * mount time to identify the requested filesystem. + */ +struct vfstable { +/* THE FOLLOWING SHOULD KEEP THE SAME FOR user compat with sysctl */ + struct vfsops *vfc_vfsops; /* filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* filesystem type name */ + int vfc_typenum; /* historic filesystem type number */ + int vfc_refcount; /* number mounted of this type */ + int vfc_flags; /* permanent flags */ + int (*vfc_mountroot)(mount_t, vnode_t, vfs_context_t); /* if != NULL, routine to mount root */ + struct vfstable *vfc_next; /* next in list */ +/* Till the above we SHOULD KEEP THE SAME FOR user compat with sysctl */ + int vfc_threadsafe; /* FS is thread & premeption safe */ + lck_mtx_t vfc_lock; /* for non-threaded file systems */ + int vfc_vfsflags; /* for optional types */ + void * vfc_descptr; /* desc table allocated address */ + int vfc_descsize; /* size allocated for desc table */ + int vfc_64bitready; /* The file system is ready for 64bit */ +}; + +#define VFC_VFSLOCALARGS 0x02 +#define VFC_VFSGENERICARGS 0x04 +#define VFC_VFSNATIVEXATTR 0x10 + + +extern int maxvfsconf; /* highest defined filesystem type */ +extern struct vfstable *vfsconf; /* head of list of filesystem types */ +extern int maxvfsslots; /* Maximum slots available to be used */ +extern int numused_vfsslots; /* number of slots already used */ + +/* the following two are xnu private */ +struct vfstable * vfstable_add(struct vfstable *); +int vfstable_del(struct vfstable *); + + +struct vfsmount_args { + union { + struct { + char * mnt_fspec; + void * mnt_fsdata; + } mnt_localfs_args; + struct { + void * mnt_fsdata; /* FS specific */ + } mnt_remotefs_args; + } mountfs_args; +}; + + +/* + * LP64 version of statfs structure. + * NOTE - must be kept in sync with struct statfs in mount.h + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_statfs { + short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ + short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ + user_long_t f_bsize; /* fundamental file system block size */ + user_long_t f_iosize; /* optimal transfer block size */ + user_long_t f_blocks; /* total data blocks in file system */ + user_long_t f_bfree; /* free blocks in fs */ + user_long_t f_bavail; /* free blocks avail to non-superuser */ + user_long_t f_files; /* total file nodes in file system */ + user_long_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + short f_reserved1; /* spare for later */ + short f_type; /* type of filesystem */ + user_long_t f_flags; /* copy of mount exported flags */ + user_long_t f_reserved2[2]; /* reserved for future use */ + char f_fstypename[MFSNAMELEN]; /* fs type name */ + char f_mntonname[MNAMELEN]; /* directory on which mounted */ + char f_mntfromname[MNAMELEN];/* mounted filesystem */ +#if COMPAT_GETFSSTAT + char f_reserved3[0]; /* For alignment */ + user_long_t f_reserved4[0]; /* For future use */ +#else + char f_reserved3; /* For alignment */ + user_long_t f_reserved4[4]; /* For future use */ +#endif +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +__BEGIN_DECLS + +extern TAILQ_HEAD(mntlist, mount) mountlist; +void mount_list_lock(void); +void mount_list_unlock(void); +void mount_lock_init(mount_t); +void mount_lock_destroy(mount_t); +void mount_lock(mount_t); +void mount_unlock(mount_t); +void mount_lock_renames(mount_t); +void mount_unlock_renames(mount_t); +void mount_ref(mount_t, int); +void mount_drop(mount_t, int); + +/* vfs_rootmountalloc should be kept as a private api */ +errno_t vfs_rootmountalloc(const char *, const char *, mount_t *mpp); +errno_t vfs_init_io_attributes(vnode_t, mount_t); + +int vfs_mountroot(void); +void vfs_unmountall(void); +int safedounmount(struct mount *, int, struct proc *); +int dounmount(struct mount *, int, struct proc *); + +/* xnuy internal api */ +void mount_dropcrossref(mount_t, vnode_t, int); +int validfsnode(mount_t); +mount_t mount_lookupby_volfsid(int, int); +mount_t mount_list_lookupby_fsid(fsid_t *, int, int); +int mount_iterref(mount_t, int); +int mount_isdrained(mount_t, int); +void mount_iterdrop(mount_t); +void mount_iterdrain(mount_t); +void mount_iterreset(mount_t); + +__END_DECLS + +#endif /* !_SYS_MOUNT_INTERNAL_H_ */ diff --git a/bsd/sys/msg.h b/bsd/sys/msg.h index 76e3f1c27..d53ebd98c 100644 --- a/bsd/sys/msg.h +++ b/bsd/sys/msg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -45,53 +45,174 @@ #include -#ifdef __APPLE_API_UNSTABLE +#include +#include + +/* + * [XSI] All of the symbols from SHALL be defined when this + * header is included + */ #include /* - * The MSG_NOERROR identifier value, the msqid_ds struct and the msg struct - * are as defined by the SV API Intel 386 Processor Supplement. + * [XSI] The pid_t, time_t, key_t, size_t, and ssize_t types shall be + * defined as described in . + * + * NOTE: The definition of the key_t type is implicit from the + * inclusion of + */ +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T +#endif + +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif + +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +#ifndef _SSIZE_T +#define _SSIZE_T +typedef __darwin_ssize_t ssize_t; +#endif + +/* [XSI] Used for the number of messages in the message queue */ +typedef unsigned long msgqnum_t; + +/* [XSI] Used for the number of bytes allowed in a message queue */ +typedef unsigned long msglen_t; + + +/* + * Possible values for the fifth parameter to msgrcv(), in addition to the + * IPC_NOWAIT flag, which is permitted. */ +#define MSG_NOERROR 010000 /* [XSI] No error if big message */ + -#define MSG_NOERROR 010000 /* don't complain about too long msgs */ - -struct msqid_ds { - struct ipc_perm msg_perm; /* msg queue permission bits */ - struct msg *msg_first; /* first message in the queue */ - struct msg *msg_last; /* last message in the queue */ - u_long msg_cbytes; /* number of bytes in use on the queue */ - u_long msg_qnum; /* number of msgs in the queue */ - u_long msg_qbytes; /* max # of bytes on the queue */ - pid_t msg_lspid; /* pid of last msgsnd() */ - pid_t msg_lrpid; /* pid of last msgrcv() */ - time_t msg_stime; /* time of last msgsnd() */ - long msg_pad1; - time_t msg_rtime; /* time of last msgrcv() */ - long msg_pad2; - time_t msg_ctime; /* time of last msgctl() */ - long msg_pad3; - long msg_pad4[4]; +/* + * Technically, we should force all code references to the new structure + * definition, not in just the standards conformance case, and leave the + * legacy interface there for binary compatibility only. Currently, we + * are only forcing this for programs requesting standards conformance. + */ +#if defined(__POSIX_C_SOURCE) || defined(kernel) || defined(__LP64__) +/* + * Structure used internally. + * + * Structure whose address is passed as the third parameter to msgctl() + * when the second parameter is IPC_SET or IPC_STAT. In the case of the + * IPC_SET command, only the msg_perm.{uid|gid|perm} and msg_qbytes are + * honored. In the case of IPC_STAT, only the fields indicated as [XSI] + * mandated fields are guaranteed to meaningful: DO NOT depend on the + * contents of the other fields. + * + * NOTES: Reserved fields are not preserved across IPC_SET/IPC_STAT. + */ +struct __msqid_ds_new { + struct __ipc_perm_new msg_perm; /* [XSI] msg queue permissions */ + __int32_t msg_first; /* RESERVED: kernel use only */ + __int32_t msg_last; /* RESERVED: kernel use only */ + msglen_t msg_cbytes; /* # of bytes on the queue */ + msgqnum_t msg_qnum; /* [XSI] number of msgs on the queue */ + msglen_t msg_qbytes; /* [XSI] max bytes on the queue */ + pid_t msg_lspid; /* [XSI] pid of last msgsnd() */ + pid_t msg_lrpid; /* [XSI] pid of last msgrcv() */ + time_t msg_stime; /* [XSI] time of last msgsnd() */ + __int32_t msg_pad1; /* RESERVED: DO NOT USE */ + time_t msg_rtime; /* [XSI] time of last msgrcv() */ + __int32_t msg_pad2; /* RESERVED: DO NOT USE */ + time_t msg_ctime; /* [XSI] time of last msgctl() */ + __int32_t msg_pad3; /* RESERVED: DO NOT USE */ + __int32_t msg_pad4[4]; /* RESERVED: DO NOT USE */ +}; +#define msqid_ds __msqid_ds_new +#else /* !_POSIX_C_SOURCE */ +#define msqid_ds __msqid_ds_old +#endif /* !_POSIX_C_SOURCE */ + +#if !defined(__POSIX_C_SOURCE) && !defined(__LP64__) +struct __msqid_ds_old { + struct __ipc_perm_old msg_perm; /* [XSI] msg queue permissions */ + __int32_t msg_first; /* RESERVED: kernel use only */ + __int32_t msg_last; /* RESERVED: kernel use only */ + msglen_t msg_cbytes; /* # of bytes on the queue */ + msgqnum_t msg_qnum; /* [XSI] number of msgs on the queue */ + msglen_t msg_qbytes; /* [XSI] max bytes on the queue */ + pid_t msg_lspid; /* [XSI] pid of last msgsnd() */ + pid_t msg_lrpid; /* [XSI] pid of last msgrcv() */ + time_t msg_stime; /* [XSI] time of last msgsnd() */ + __int32_t msg_pad1; /* RESERVED: DO NOT USE */ + time_t msg_rtime; /* [XSI] time of last msgrcv() */ + __int32_t msg_pad2; /* RESERVED: DO NOT USE */ + time_t msg_ctime; /* [XSI] time of last msgctl() */ + __int32_t msg_pad3; /* RESERVED: DO NOT USE */ + __int32_t msg_pad4[4]; /* RESERVED: DO NOT USE */ }; +#endif /* !_POSIX_C_SOURCE */ + +#ifdef KERNEL +#ifdef __APPLE_API_PRIVATE +#include + +// LP64todo - should this move? + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_msqid_ds { + struct ipc_perm msg_perm; /* [XSI] msg queue permissions */ + struct msg *msg_first; /* first message in the queue */ + struct msg *msg_last; /* last message in the queue */ + msglen_t msg_cbytes; /* # of bytes on the queue */ + msgqnum_t msg_qnum; /* [XSI] number of msgs on the queue */ + msglen_t msg_qbytes; /* [XSI] max bytes on the queue */ + pid_t msg_lspid; /* [XSI] pid of last msgsnd() */ + pid_t msg_lrpid; /* [XSI] pid of last msgrcv() */ + user_time_t msg_stime; /* [XSI] time of last msgsnd() */ + __int32_t msg_pad1; /* RESERVED: DO NOT USE */ + user_time_t msg_rtime; /* [XSI] time of last msgrcv() */ + __int32_t msg_pad2; /* RESERVED: DO NOT USE */ + user_time_t msg_ctime; /* [XSI] time of last msgctl() */ + __int32_t msg_pad3; /* RESERVED: DO NOT USE */ + __int32_t msg_pad4[4]; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL */ + + +#ifndef _POSIX_C_SOURCE +#ifdef __APPLE_API_UNSTABLE +/* XXX kernel only; protect with macro later */ struct msg { - struct msg *msg_next; /* next msg in the chain */ - long msg_type; /* type of this message */ - /* >0 -> type of this message */ - /* 0 -> free header */ - u_short msg_ts; /* size of this message */ - short msg_spot; /* location of start of msg in buffer */ + struct msg *msg_next; /* next msg in the chain */ + long msg_type; /* type of this message */ + /* >0 -> type of this message */ + /* 0 -> free header */ + unsigned short msg_ts; /* size of this message */ + short msg_spot; /* location of msg start in buffer */ }; /* - * Structure describing a message. The SVID doesn't suggest any - * particular name for this structure. There is a reference in the - * msgop man page that reads "The structure mymsg is an example of what - * this user defined buffer might look like, and includes the following - * members:". This sentence is followed by two lines equivalent - * to the mtype and mtext field declarations below. It isn't clear - * if "mymsg" refers to the naem of the structure type or the name of an - * instance of the structure... + * Example structure describing a message whose address is to be passed as + * the second argument to the functions msgrcv() and msgsnd(). The only + * actual hard requirement is that the first field be of type long, and + * contain the message type. The user is encouraged to define their own + * application specific structure; this definition is included solely for + * backward compatability with existing source code. */ struct mymsg { long mtype; /* message type (+ve integer) */ @@ -158,26 +279,30 @@ struct msgmap { /* 0..(MSGSEG-1) -> index of next segment */ }; +/* The following four externs really, really need to die; should be static */ extern char *msgpool; /* MSGMAX byte long msg buffer pool */ extern struct msgmap *msgmaps; /* MSGSEG msgmap structures */ extern struct msg *msghdrs; /* MSGTQL msg headers */ -extern struct msqid_ds *msqids; /* MSGMNI msqid_ds struct's */ +extern struct user_msqid_ds *msqids; /* MSGMNI user_msqid_ds struct's */ #define MSG_LOCKED 01000 /* Is this msqid_ds locked? */ -#endif /* KERNEL */ +#endif /* KERNEL */ +#endif /* __APPLE_API_UNSTABLE */ +#endif /* !_POSIX_C_SOURCE */ #ifndef KERNEL -#include __BEGIN_DECLS -int msgsys __P((int, ...)); -int msgctl __P((int, int, struct msqid_ds *)); -int msgget __P((key_t, int)); -int msgsnd __P((int, void *, size_t, int)); -int msgrcv __P((int, void*, size_t, long, int)); +#ifndef _POSIX_C_SOURCE +int msgsys(int, ...); +#endif /* !_POSIX_C_SOURCE */ +int msgctl(int, int, struct msqid_ds *) __DARWIN_ALIAS(msgctl); +int msgget(key_t, int); +ssize_t msgrcv(int, void *, size_t, long, int); +int msgsnd(int, const void *, size_t, int); __END_DECLS + #endif /* !KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ #endif /* !_SYS_MSG_H_ */ diff --git a/bsd/sys/mtio.h b/bsd/sys/mtio.h index ab2f39e65..e6516a033 100644 --- a/bsd/sys/mtio.h +++ b/bsd/sys/mtio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,6 +59,7 @@ #define _SYS_MTIO_H_ #include +#include #ifdef __APPLE_API_OBSOLETE @@ -67,6 +68,7 @@ */ /* structure for MTIOCTOP - mag tape op command */ +/* LP64todo - not 64-bit safe */ struct mtop { short mt_op; /* operations defined below */ daddr_t mt_count; /* how many of them */ @@ -91,7 +93,7 @@ struct mtop { #define MTSETDNSTY 15 /* set density code for current mode */ /* structure for MTIOCGET - mag tape get status command */ - +/* LP64todo - not 64-bit safe */ struct mtget { short mt_type; /* type of magtape device */ /* the following two registers are grossly device dependent */ diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h index 1c5ce5991..775a41030 100644 --- a/bsd/sys/namei.h +++ b/bsd/sys/namei.h @@ -60,34 +60,22 @@ #include -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL +#define LOCKLEAF 0x0004 /* lock inode on return */ +#define LOCKPARENT 0x0008 /* want parent vnode returned */ +#define WANTPARENT 0x0010 /* want parent vnode returned */ +#endif + + +#ifdef BSD_KERNEL_PRIVATE #include #include +#include +#include +#include -/* - * Lookup parameters: this structure describes the subset of - * information from the nameidata structure that is passed - * through the VOP interface. - */ -struct componentname { - /* - * Arguments to lookup. - */ - u_long cn_nameiop; /* namei operation */ - u_long cn_flags; /* flags to namei */ - struct proc *cn_proc; /* process requesting lookup */ - struct ucred *cn_cred; /* credentials */ - /* - * Shared between lookup and commit routines. - */ - char *cn_pnbuf; /* pathname buffer */ - long cn_pnlen; /* length of allocated buffer */ - char *cn_nameptr; /* pointer to looked up name */ - long cn_namelen; /* length of looked up component */ - u_long cn_hash; /* hash value of looked up name */ - long cn_consume; /* chars to consume in lookup() */ -}; +#define PATHBUFLEN 256 /* * Encapsulation of namei parameters. @@ -96,17 +84,14 @@ struct nameidata { /* * Arguments to namei/lookup. */ - caddr_t ni_dirp; /* pathname pointer */ + user_addr_t ni_dirp; /* pathname pointer */ enum uio_seg ni_segflg; /* location of pathname */ - /* u_long ni_nameiop; namei operation */ - /* u_long ni_flags; flags to namei */ - /* struct proc *ni_proc; process requesting lookup */ /* * Arguments to lookup. */ - /* struct ucred *ni_cred; credentials */ struct vnode *ni_startdir; /* starting directory */ struct vnode *ni_rootdir; /* logical root directory */ + struct vnode *ni_usedvp; /* directory passed in via USEDVP */ /* * Results: returned from/manipulated by lookup */ @@ -117,116 +102,106 @@ struct nameidata { */ u_int ni_pathlen; /* remaining chars in path */ char *ni_next; /* next location in pathname */ + char ni_pathbuf[PATHBUFLEN]; u_long ni_loopcnt; /* count of symlinks encountered */ + struct componentname ni_cnd; }; #ifdef KERNEL -/* - * namei operations - */ -#define LOOKUP 0 /* perform name lookup only */ -#define CREATE 1 /* setup for file creation */ -#define DELETE 2 /* setup for file deletion */ -#define RENAME 3 /* setup for file renaming */ -#define OPMASK 3 /* mask for operation */ /* * namei operational modifier flags, stored in ni_cnd.flags */ -#define LOCKLEAF 0x0004 /* lock inode on return */ -#define LOCKPARENT 0x0008 /* want parent vnode returned locked */ -#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */ #define NOCACHE 0x0020 /* name must not be left in cache */ -#define FOLLOW 0x0040 /* follow symbolic links */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define SHAREDLEAF 0x0080 /* OK to have shared leaf lock */ #define MODMASK 0x00fc /* mask of operational modifiers */ /* * Namei parameter descriptors. * - * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP. - * If the caller of namei sets the flag (for example execve wants to - * know the name of the program that is being executed), then it must - * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must - * be freed by either the commit routine or the VOP_ABORT routine. * SAVESTART is set only by the callers of namei. It implies SAVENAME * plus the addition of saving the parent directory that contains the * name in ni_startdir. It allows repeated calls to lookup for the * name being sought. The caller is responsible for releasing the * buffer and for vrele'ing ni_startdir. */ -#define NOCROSSMOUNT 0x000100 /* do not cross mount points */ -#define RDONLY 0x000200 /* lookup with read-only semantics */ -#define HASBUF 0x000400 /* has allocated pathname buffer */ -#define SAVENAME 0x000800 /* save pathanme buffer */ -#define SAVESTART 0x001000 /* save starting directory */ -#define ISDOTDOT 0x002000 /* current component name is .. */ -#define MAKEENTRY 0x004000 /* entry is to be added to name cache */ -#define ISLASTCN 0x008000 /* this is last component of pathname */ -#define ISSYMLINK 0x010000 /* symlink needs interpretation */ -#define ISWHITEOUT 0x020000 /* found whiteout */ -#define DOWHITEOUT 0x040000 /* do whiteouts */ -#define WILLBEDIR 0x080000 /* new files will be dirs; allow trailing / */ -#define AUDITVNPATH1 0x100000 /* audit the path/vnode info */ -#define AUDITVNPATH2 0x200000 /* audit the path/vnode info */ -#define USEDVP 0x400000 /* start the lookup at ndp.ni_dvp */ -#define NODELETEBUSY 0x800000 /* donot delete busy files (Carbon semantic) */ -#define PARAMASK 0x3fff00 /* mask of parameter descriptors */ +#define NOCROSSMOUNT 0x00000100 /* do not cross mount points */ +#define RDONLY 0x00000200 /* lookup with read-only semantics */ +#define HASBUF 0x00000400 /* has allocated pathname buffer */ +#define SAVENAME 0x00000800 /* save pathanme buffer */ +#define SAVESTART 0x00001000 /* save starting directory */ +#define ISSYMLINK 0x00010000 /* symlink needs interpretation */ +#define DONOTAUTH 0x00020000 /* do not authorize during lookup */ +#define WILLBEDIR 0x00080000 /* new files will be dirs; allow trailing / */ +#define AUDITVNPATH1 0x00100000 /* audit the path/vnode info */ +#define AUDITVNPATH2 0x00200000 /* audit the path/vnode info */ +#define USEDVP 0x00400000 /* start the lookup at ndp.ni_dvp */ +#define PARAMASK 0x003fff00 /* mask of parameter descriptors */ +#define FSNODELOCKHELD 0x01000000 + /* * Initialization of an nameidata structure. */ -#define NDINIT(ndp, op, flags, segflg, namep, p) { \ +#define NDINIT(ndp, op, flags, segflg, namep, ctx) { \ (ndp)->ni_cnd.cn_nameiop = op; \ (ndp)->ni_cnd.cn_flags = flags; \ - (ndp)->ni_segflg = segflg; \ + if ((segflg) == UIO_USERSPACE) { \ + (ndp)->ni_segflg = ((IS_64BIT_PROCESS(vfs_context_proc(ctx))) ? UIO_USERSPACE64 : UIO_USERSPACE32); \ + } \ + else if ((segflg) == UIO_SYSSPACE) { \ + (ndp)->ni_segflg = UIO_SYSSPACE32; \ + } \ + else { \ + (ndp)->ni_segflg = segflg; \ + } \ (ndp)->ni_dirp = namep; \ - (ndp)->ni_cnd.cn_proc = p; \ + (ndp)->ni_cnd.cn_context = ctx; \ } #endif /* KERNEL */ /* * This structure describes the elements in the cache of recent - * names looked up by namei. NCHNAMLEN is sized to make structure - * size a power of two to optimize malloc's. Minimum reasonable - * size is 15. + * names looked up by namei. */ #define NCHNAMLEN 31 /* maximum name segment length we bother with */ +#define NCHASHMASK 0x7fffffff struct namecache { - LIST_ENTRY(namecache) nc_hash; /* hash chain */ - TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */ - struct vnode *nc_dvp; /* vnode of parent of name */ - u_long nc_dvpid; /* capability number of nc_dvp */ - struct vnode *nc_vp; /* vnode the name refers to */ - u_long nc_vpid; /* capability number of nc_vp */ - char *nc_name; /* segment name */ + TAILQ_ENTRY(namecache) nc_entry; /* chain of all entries */ + LIST_ENTRY(namecache) nc_hash; /* hash chain */ + LIST_ENTRY(namecache) nc_child; /* chain of ncp's that are children of a vp */ + union { + LIST_ENTRY(namecache) nc_link; /* chain of ncp's that 'name' a vp */ + TAILQ_ENTRY(namecache) nc_negentry; /* chain of ncp's that 'name' a vp */ + } nc_un; + vnode_t nc_dvp; /* vnode of parent of name */ + vnode_t nc_vp; /* vnode the name refers to */ + unsigned int nc_whiteout:1, /* name has whiteout applied */ + nc_hashval:31; /* hashval of stringname */ + char * nc_name; /* pointer to segment name in string cache */ }; + #ifdef KERNEL -struct mount; -extern u_long nextvnodeid; -int namei __P((struct nameidata *ndp)); -int lookup __P((struct nameidata *ndp)); -int relookup __P((struct vnode *dvp, struct vnode **vpp, - struct componentname *cnp)); - -/* namecache function prototypes */ -int cache_lookup __P((struct vnode *dvp, struct vnode **vpp, - struct componentname *cnp)); -void cache_enter __P((struct vnode *dvp, struct vnode *vpp, - struct componentname *cnp)); -void cache_purge __P((struct vnode *vp)); -void cache_purgevfs __P((struct mount *mp)); - -// -// Global string-cache routines. You can pass zero for nc_hash -// if you don't know it (add_name() will then compute the hash). -// There are no flags for now but maybe someday. -// -char *add_name(const char *name, size_t len, u_int nc_hash, u_int flags); -int remove_name(const char *name); +int namei(struct nameidata *ndp); +void nameidone(struct nameidata *); +int lookup(struct nameidata *ndp); +int relookup(struct vnode *dvp, struct vnode **vpp, + struct componentname *cnp); + +/* + * namecache function prototypes + */ +void cache_purgevfs(mount_t mp); +int cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, + vfs_context_t context, int *trailing_slash, int *dp_authorized); + +void vnode_cache_credentials(vnode_t vp, vfs_context_t context); +void vnode_uncache_credentials(vnode_t vp); +int reverse_lookup(vnode_t start_vp, vnode_t *lookup_vpp, + struct filedesc *fdp, vfs_context_t context, int *dp_authorized); #endif /* KERNEL */ @@ -234,15 +209,18 @@ int remove_name(const char *name); * Stats on usefulness of namei caches. */ struct nchstats { + long ncs_negtotal; long ncs_goodhits; /* hits that we can really use */ long ncs_neghits; /* negative hits that we can use */ long ncs_badhits; /* hits we must drop */ - long ncs_falsehits; /* hits with id mismatch */ long ncs_miss; /* misses */ - long ncs_long; /* long names that ignore cache */ long ncs_pass2; /* names found with passes == 2 */ long ncs_2passes; /* number of times we attempt it */ + long ncs_stolen; + long ncs_enters; + long ncs_deletes; + long ncs_badvid; }; -#endif /* __APPLE_API_UNSTABLE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_SYS_NAMEI_H_ */ diff --git a/bsd/sys/param.h b/bsd/sys/param.h index 61d1806c3..f9b6aafc2 100644 --- a/bsd/sys/param.h +++ b/bsd/sys/param.h @@ -70,9 +70,11 @@ #define NeXTBSD 1995064 /* NeXTBSD version (year, month, release) */ #define NeXTBSD4_0 0 /* NeXTBSD 4.0 */ +#include + #ifndef NULL -#define NULL 0 -#endif +#define NULL __DARWIN_NULL +#endif /* ! NULL */ #ifndef LOCORE #include @@ -137,6 +139,7 @@ #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PTTYBLOCK 0x200 /* for tty SIGTTOU and SIGTTIN blocking */ +#define PDROP 0x400 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ @@ -175,16 +178,20 @@ /* * File system parameters and macros. * - * The file system is made out of blocks of at most MAXBSIZE units, with + * The file system is made out of blocks of at most MAXPHYS units, with * smaller units (fragments) only in the last direct block. MAXBSIZE * primarily determines the size of buffers in the buffer pool. It may be - * made larger without any effect on existing file systems; however making - * it smaller make make some file systems unmountable. + * made larger than MAXPHYS without any effect on existing file systems; + * however making it smaller may make some file systems unmountable. + * We set this to track the value of (MAX_UPL_TRANSFER*PAGE_SIZE) from + * osfmk/mach/memory_object_types.h to bound it at the maximum UPL size. */ -#define MAXBSIZE MAXPHYS +#define MAXBSIZE (256 * 4096) #define MAXPHYSIO MAXPHYS #define MAXFRAG 8 +#define MAXPHYSIO_WIRED (16 * 1024 * 1024) + /* * MAXPATHLEN defines the longest permissable path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer diff --git a/bsd/sys/pipe.h b/bsd/sys/pipe.h new file mode 100644 index 000000000..c999c7dbf --- /dev/null +++ b/bsd/sys/pipe.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1996 John S. Dyson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice immediately at the beginning of the file, without modification, + * this list of conditions, and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Absolutely no warranty of function or purpose is made by the author + * John S. Dyson. + * 4. This work was done expressly for inclusion into FreeBSD. Other use + * is allowed if this notation is included. + * 5. Modifications may be freely made to this file if the above conditions + * are met. + * + * $FreeBSD: src/sys/sys/pipe.h,v 1.24 2003/08/13 20:01:38 alc Exp $ + */ + +#ifndef _SYS_PIPE_H_ +#define _SYS_PIPE_H_ + +#ifdef KERNEL +#include +#endif +#include /* for TAILQ macros */ +#include +#include + +/* + * Pipe buffer size, keep moderate in value, pipes take kva space. + */ +#ifndef PIPE_SIZE +#define PIPE_SIZE 16384 +#endif + +#ifndef BIG_PIPE_SIZE +#define BIG_PIPE_SIZE (64*1024) +#endif + +#ifndef SMALL_PIPE_SIZE +#define SMALL_PIPE_SIZE PAGE_SIZE +#endif + +/* + * PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger + * than PIPE_BUF. + */ +#ifndef PIPE_MINDIRECT +#define PIPE_MINDIRECT 8192 +#endif + +#define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1) + +/* + * Pipe buffer information. + * Separate in, out, cnt are used to simplify calculations. + * Buffered write is active when the buffer.cnt field is set. + */ +struct pipebuf { + u_int cnt; /* number of chars currently in buffer */ + u_int in; /* in pointer */ + u_int out; /* out pointer */ + u_int size; /* size of buffer */ + caddr_t buffer; /* kva of buffer */ +}; + + +#ifdef PIPE_DIRECT +/* + * Information to support direct transfers between processes for pipes. + */ +/* LP64todo - not 64bit safe */ +struct pipemapping { + vm_offset_t kva; /* kernel virtual address */ + vm_size_t cnt; /* number of chars in buffer */ + vm_size_t pos; /* current position of transfer */ + int npages; /* number of pages */ + vm_page_t ms[PIPENPAGES]; /* pages in source process */ +}; +#endif + +/* + * Bits in pipe_state. + */ +#define PIPE_ASYNC 0x004 /* Async? I/O. */ +#define PIPE_WANTR 0x008 /* Reader wants some characters. */ +#define PIPE_WANTW 0x010 /* Writer wants space to put characters. */ +#define PIPE_WANT 0x020 /* Pipe is wanted to be run-down. */ +#define PIPE_SEL 0x040 /* Pipe has a select active. */ +#define PIPE_EOF 0x080 /* Pipe is in EOF condition. */ +#define PIPE_LOCKFL 0x100 /* Process has exclusive access to pointers/data. */ +#define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */ +#define PIPE_DIRECTW 0x400 /* Pipe direct write active. */ +#define PIPE_DIRECTOK 0x800 /* Direct mode ok. */ +#define PIPE_KNOTE 0x1000 /* Pipe has kernel events activated */ + +#ifdef KERNEL +/* + * Per-pipe data structure. + * Two of these are linked together to produce bi-directional pipes. + */ +struct pipe { + struct pipebuf pipe_buffer; /* data storage */ +#ifdef PIPE_DIRECT + struct pipemapping pipe_map; /* pipe mapping for direct I/O */ +#endif + struct selinfo pipe_sel; /* for compat with select */ + pid_t pipe_pgid; /* information for async I/O */ + struct pipe *pipe_peer; /* link with other direction */ + u_int pipe_state; /* pipe status info */ + int pipe_busy; /* busy flag, mostly to handle rundown sanely */ +#ifdef MAC + struct label *pipe_label; /* pipe MAC label - shared */ +#endif + TAILQ_HEAD(,eventqelt) pipe_evlist; + lck_mtx_t *pipe_mtxp; /* shared mutex between both pipes */ +}; + +#define PIPE_MTX(pipe) ((pipe)->pipe_mtxp) + +#define PIPE_LOCK(pipe) lck_mtx_lock(PIPE_MTX(pipe)) +#define PIPE_UNLOCK(pipe) lck_mtx_unlock(PIPE_MTX(pipe)) +#define PIPE_LOCK_ASSERT(pipe, type) lck_mtx_assert(PIPE_MTX(pipe), (type)) + +__BEGIN_DECLS +extern int pipe_stat(struct pipe *, struct stat *); +__END_DECLS + +#endif /* KERNEL */ + +#endif /* !_SYS_PIPE_H_ */ diff --git a/bsd/sys/poll.h b/bsd/sys/poll.h index 443469915..7c1077722 100644 --- a/bsd/sys/poll.h +++ b/bsd/sys/poll.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -51,9 +51,6 @@ #ifndef _SYS_POLL_H_ #define _SYS_POLL_H_ -#include - -#ifdef __APPLE_API_PRIVATE /* * This file is intended to be compatable with the traditional poll.h. */ @@ -61,12 +58,6 @@ /* * Requestable events. If poll(2) finds any of these set, they are * copied to revents on return. - * XXX Note that FreeBSD doesn't make much distinction between POLLPRI - * and POLLRDBAND since none of the file types have distinct priority - * bands - and only some have an urgent "mode". - * XXX Note POLLIN isn't really supported in true SVSV terms. Under SYSV - * POLLIN includes all of normal, band and urgent data. Most poll handlers - * on FreeBSD only treat it as "normal" data. */ #define POLLIN 0x0001 /* any readable data available */ #define POLLPRI 0x0002 /* OOB/Urgent readable data */ @@ -78,7 +69,7 @@ /* * FreeBSD extensions: polling on a regular file might return one - * of these events (currently only supported on UFS). + * of these events (currently only supported on local filesystems). */ #define POLLEXTEND 0x0200 /* file may have been extended */ #define POLLATTRIB 0x0400 /* file attributes may have changed */ @@ -96,6 +87,29 @@ #define POLLSTANDARD (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLRDBAND|\ POLLWRBAND|POLLERR|POLLHUP|POLLNVAL) -#endif /* __APPLE_API_PRIVATE */ +struct pollfd +{ + int fd; + short events; + short revents; +}; + +typedef unsigned int nfds_t; + +#if !defined(KERNEL) + +#include + +__BEGIN_DECLS + +/* + * This is defined here (instead of ) because this is where + * traditional SVR4 code will look to find it. + */ +extern int poll (struct pollfd *, nfds_t, int); + +__END_DECLS + +#endif /* !KERNEL */ #endif /* !_SYS_POLL_H_ */ diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index a7a500a5c..cbf1b3a80 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -70,168 +70,25 @@ #include #include #include +#ifdef KERNEL +#include +#endif +#include -#ifdef __APPLE_API_PRIVATE - -/* - * One structure allocated per session. - */ -struct session { - int s_count; /* Ref cnt; pgrps in session. */ - struct proc *s_leader; /* Session leader. */ - struct vnode *s_ttyvp; /* Vnode of controlling terminal. */ - struct tty *s_ttyp; /* Controlling terminal. */ - pid_t s_sid; /* Session ID */ - char s_login[MAXLOGNAME]; /* Setlogin() name. */ -}; - -/* - * One structure allocated per process group. - */ -struct pgrp { - LIST_ENTRY(pgrp) pg_hash; /* Hash chain. */ - LIST_HEAD(, proc) pg_members; /* Pointer to pgrp members. */ - struct session *pg_session; /* Pointer to session. */ - pid_t pg_id; /* Pgrp id. */ - int pg_jobc; /* # procs qualifying pgrp for job control */ -}; - -/* - * Description of a process. - * - * This structure contains the information needed to manage a thread of - * control, known in UN*X as a process; it has references to substructures - * containing descriptions of things that the process uses, but may share - * with related processes. The process structure and the substructures - * are always addressible except for those marked "(PROC ONLY)" below, - * which might be addressible only on a processor on which the process - * is running. - */ -struct proc { - LIST_ENTRY(proc) p_list; /* List of all processes. */ - - /* substructures: */ - struct pcred *p_cred; /* Process owner's identity. */ - struct filedesc *p_fd; /* Ptr to open files structure. */ - struct pstats *p_stats; /* Accounting/statistics (PROC ONLY). */ - struct plimit *p_limit; /* Process limits. */ - struct sigacts *p_sigacts; /* Signal actions, state (PROC ONLY). */ - -#define p_ucred p_cred->pc_ucred -#define p_rlimit p_limit->pl_rlimit - - int p_flag; /* P_* flags. */ - char p_stat; /* S* process status. */ - char p_shutdownstate; - char p_pad1[2]; - - pid_t p_pid; /* Process identifier. */ - LIST_ENTRY(proc) p_pglist; /* List of processes in pgrp. */ - struct proc *p_pptr; /* Pointer to parent process. */ - LIST_ENTRY(proc) p_sibling; /* List of sibling processes. */ - LIST_HEAD(, proc) p_children; /* Pointer to list of children. */ - -/* The following fields are all zeroed upon creation in fork. */ -#define p_startzero p_oppid - - pid_t p_oppid; /* Save parent pid during ptrace. XXX */ - int p_dupfd; /* Sideways return value from fdopen. XXX */ - - /* scheduling */ - u_int p_estcpu; /* Time averaged value of p_cpticks. */ - int p_cpticks; /* Ticks of cpu time. */ - fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ - void *p_wchan; /* Sleep address. */ - char *p_wmesg; /* Reason for sleep. */ - u_int p_swtime; /* DEPRECATED (Time swapped in or out.) */ -#define p_argslen p_swtime /* Length of process arguments. */ - u_int p_slptime; /* Time since last blocked. */ - - struct itimerval p_realtimer; /* Alarm timer. */ - struct timeval p_rtime; /* Real time. */ - u_quad_t p_uticks; /* Statclock hits in user mode. */ - u_quad_t p_sticks; /* Statclock hits in system mode. */ - u_quad_t p_iticks; /* Statclock hits processing intr. */ - - int p_traceflag; /* Kernel trace points. */ - struct vnode *p_tracep; /* Trace to vnode. */ - - sigset_t p_siglist; /* DEPRECATED. */ - - struct vnode *p_textvp; /* Vnode of executable. */ - -/* End area that is zeroed on creation. */ -#define p_endzero p_hash.le_next - - /* - * Not copied, not zero'ed. - * Belongs after p_pid, but here to avoid shifting proc elements. - */ - LIST_ENTRY(proc) p_hash; /* Hash chain. */ - TAILQ_HEAD( ,eventqelt) p_evlist; - -/* The following fields are all copied upon creation in fork. */ -#define p_startcopy p_sigmask - - sigset_t p_sigmask; /* DEPRECATED */ - sigset_t p_sigignore; /* Signals being ignored. */ - sigset_t p_sigcatch; /* Signals being caught by user. */ - - u_char p_priority; /* Process priority. */ - u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ - char p_nice; /* Process "nice" value. */ - char p_comm[MAXCOMLEN+1]; - - struct pgrp *p_pgrp; /* Pointer to process group. */ - -/* End area that is copied on creation. */ -#define p_endcopy p_xstat - - u_short p_xstat; /* Exit status for wait; also stop signal. */ - u_short p_acflag; /* Accounting flags. */ - struct rusage *p_ru; /* Exit information. XXX */ +#ifdef XNU_KERNEL_PRIVATE +#define PROC_DEF_ENABLED +#else +#ifndef KERNEL +#define PROC_DEF_ENABLED +#endif +#endif - int p_debugger; /* 1: can exec set-bit programs if suser */ - - void *task; /* corresponding task */ - void *sigwait_thread; /* 'thread' holding sigwait */ - struct lock__bsd__ signal_lock; /* multilple thread prot for signals*/ - boolean_t sigwait; /* indication to suspend */ - void *exit_thread; /* Which thread is exiting? */ - caddr_t user_stack; /* where user stack was allocated */ - void * exitarg; /* exit arg for proc terminate */ - void * vm_shm; /* for sysV shared memory */ - int p_argc; /* saved argc for sysctl_procargs() */ - int p_vforkcnt; /* number of outstanding vforks */ - void * p_vforkact; /* activation running this vfork proc */ - TAILQ_HEAD( , uthread) p_uthlist; /* List of uthreads */ - /* Following fields are info from SIGCHLD */ - pid_t si_pid; - u_short si_status; - u_short si_code; - uid_t si_uid; - TAILQ_HEAD( , aio_workq_entry ) aio_activeq; /* active async IO requests */ - int aio_active_count; /* entries on aio_activeq */ - TAILQ_HEAD( , aio_workq_entry ) aio_doneq; /* completed async IO requests */ - int aio_done_count; /* entries on aio_doneq */ - - struct klist p_klist; /* knote list */ - struct auditinfo *p_au; /* User auditing data */ -#if DIAGNOSTIC -#if SIGNAL_DEBUG - unsigned int lockpc[8]; - unsigned int unlockpc[8]; -#endif /* SIGNAL_DEBUG */ -#endif /* DIAGNOSTIC */ -}; +#ifdef PROC_DEF_ENABLED -#else /* !__APPLE_API_PRIVATE */ struct session; struct pgrp; struct proc; -#endif /* !__APPLE_API_PRIVATE */ -#ifdef __APPLE_API_UNSTABLE /* Exported fields for kern sysctls */ struct extern_proc { union { @@ -288,8 +145,6 @@ struct extern_proc { struct rusage *p_ru; /* Exit information. XXX */ }; -#define p_session p_pgrp->pg_session -#define p_pgid p_pgrp->pg_id /* Status values. */ #define SIDL 1 /* Process being created by fork. */ @@ -299,146 +154,129 @@ struct extern_proc { #define SZOMB 5 /* Awaiting collection by parent. */ /* These flags are kept in p_flags. */ -#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ -#define P_CONTROLT 0x00002 /* Has a controlling terminal. */ -#define P_INMEM 0x00004 /* Loaded into memory. */ -#define P_NOCLDSTOP 0x00008 /* No SIGCHLD when children stop. */ -#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ -#define P_PROFIL 0x00020 /* Has started profiling. */ -#define P_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */ -#define P_SINTR 0x00080 /* Sleep is interruptible. */ -#define P_SUGID 0x00100 /* Had set id privileges since last exec. */ -#define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ -#define P_TIMEOUT 0x00400 /* Timing out during sleep. */ -#define P_TRACED 0x00800 /* Debugged process being traced. */ -#define P_WAITED 0x01000 /* Debugging process has waited for child. */ -#define P_WEXIT 0x02000 /* Working on exiting. */ -#define P_EXEC 0x04000 /* Process called exec. */ +#define P_ADVLOCK 0x00000001 /* Process may hold POSIX adv. lock */ +#define P_CONTROLT 0x00000002 /* Has a controlling terminal */ +#define P_LP64 0x00000004 /* Process is LP64 */ +#define P_NOCLDSTOP 0x00000008 /* No SIGCHLD when children stop */ + +#define P_PPWAIT 0x00000010 /* Parent waiting for chld exec/exit */ +#define P_PROFIL 0x00000020 /* Has started profiling */ +#define P_SELECT 0x00000040 /* Selecting; wakeup/waiting danger */ +#define P_CONTINUED 0x00000080 /* Process was stopped and continued */ + +#define P_SUGID 0x00000100 /* Has set privileges since last exec */ +#define P_SYSTEM 0x00000200 /* Sys proc: no sigs, stats or swap */ +#define P_TIMEOUT 0x00000400 /* Timing out during sleep */ +#define P_TRACED 0x00000800 /* Debugged process being traced */ + +#define P_WAITED 0x00001000 /* Debugging prc has waited for child */ +#define P_WEXIT 0x00002000 /* Working on exiting. */ +#define P_EXEC 0x00004000 /* Process called exec. */ /* Should be moved to machine-dependent areas. */ -#define P_OWEUPC 0x08000 /* Owe process an addupc() call at next ast. */ +#define P_OWEUPC 0x00008000 /* Owe process an addupc() call at next ast. */ -#define P_AFFINITY 0x0010000 /* xxx */ -#define P_CLASSIC 0x0020000 /* xxx */ +#define P_AFFINITY 0x00010000 /* xxx */ +#define P_CLASSIC 0x00020000 /* xxx */ /* -#define P_FSTRACE 0x10000 / * tracing via file system (elsewhere?) * / -#define P_SSTEP 0x20000 / * process needs single-step fixup ??? * / +#define P_FSTRACE 0x10000 / * tracing via file system (elsewhere?) * / +#define P_SSTEP 0x20000 / * process needs single-step fixup ??? * / */ -#define P_WAITING 0x0040000 /* process has a wait() in progress */ -#define P_KDEBUG 0x0080000 /* kdebug tracing is on for this process */ -#define P_TTYSLEEP 0x0100000 /* blocked due to SIGTTOU or SIGTTIN */ -#define P_REBOOT 0x0200000 /* Process called reboot() */ -#define P_TBE 0x0400000 /* Process is TBE */ -#define P_SIGEXC 0x0800000 /* signal exceptions */ -#define P_BTRACE 0x1000000 /* process is being branch traced */ -#define P_VFORK 0x2000000 /* process has vfork children */ -#define P_NOATTACH 0x4000000 -#define P_INVFORK 0x8000000 /* proc in vfork */ +#define P_WAITING 0x00040000 /* process has a wait() in progress */ +#define P_KDEBUG 0x00080000 /* kdebug tracing on for this process */ + +#define P_TTYSLEEP 0x00100000 /* blocked due to SIGTTOU or SIGTTIN */ +#define P_REBOOT 0x00200000 /* Process called reboot() */ +#define P_TBE 0x00400000 /* Process is TBE */ +#define P_SIGEXC 0x00800000 /* signal exceptions */ + +#define P_BTRACE 0x01000000 /* process is being branch traced */ +#define P_VFORK 0x02000000 /* process has vfork children */ +#define P_NOATTACH 0x04000000 +#define P_INVFORK 0x08000000 /* proc in vfork */ + #define P_NOSHLIB 0x10000000 /* no shared libs are in use for proc */ /* flag set on exec */ #define P_FORCEQUOTA 0x20000000 /* Force quota for root */ #define P_NOCLDWAIT 0x40000000 /* No zombies when chil procs exit */ #define P_NOREMOTEHANG 0x80000000 /* Don't hang on remote FS ops */ -#define P_NOSWAP 0 /* Obsolete: retained so that nothing breaks */ -#define P_PHYSIO 0 /* Obsolete: retained so that nothing breaks */ -#define P_FSTRACE 0 /* Obsolete: retained so that nothing breaks */ -#define P_SSTEP 0 /* Obsolete: retained so that nothing breaks */ - -/* - * Shareable process credentials (always resident). This includes a reference - * to the current user credentials as well as real and saved ids that may be - * used to change ids. - */ -struct pcred { - struct lock__bsd__ pc_lock; - struct ucred *pc_ucred; /* Current credentials. */ - uid_t p_ruid; /* Real user id. */ - uid_t p_svuid; /* Saved effective user id. */ - gid_t p_rgid; /* Real group id. */ - gid_t p_svgid; /* Saved effective group id. */ - int p_refcnt; /* Number of references. */ -}; +#define P_INMEM 0 /* Obsolete: retained for compilation */ +#define P_NOSWAP 0 /* Obsolete: retained for compilation */ +#define P_PHYSIO 0 /* Obsolete: retained for compilation */ +#define P_FSTRACE 0 /* Obsolete: retained for compilation */ +#define P_SSTEP 0 /* Obsolete: retained for compilation */ -#define pcred_readlock(p) lockmgr(&(p)->p_cred->pc_lock, \ - LK_SHARED, 0, (p)) -#define pcred_writelock(p) lockmgr(&(p)->p_cred->pc_lock, \ - LK_EXCLUSIVE, 0, (p)) -#define pcred_unlock(p) lockmgr(&(p)->p_cred->pc_lock, \ - LK_RELEASE, 0, (p)) -#endif /* __APPLE_API_UNSTABLE */ +#endif /* PROC_DEF_ENABLED */ #ifdef KERNEL - __BEGIN_DECLS -#ifdef __APPLE_API_PRIVATE -/* - * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, - * as it is used to represent "no process group". - */ -extern int nprocs, maxproc; /* Current and max number of procs. */ -__private_extern__ int hard_maxproc; /* hard limit */ - -#define PID_MAX 30000 -#define NO_PID 30001 - -#define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) -#define SESSHOLD(s) ((s)->s_count++) -#define SESSRELE(s) sessrele(s) - -#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) -extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; -extern u_long pidhash; - -#define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) -extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; -extern u_long pgrphash; - -LIST_HEAD(proclist, proc); -extern struct proclist allproc; /* List of all processes. */ -extern struct proclist zombproc; /* List of zombie processes. */ -extern struct proc *initproc, *kernproc; -extern void pgdelete __P((struct pgrp *pgrp)); -extern void sessrele __P((struct session *sess)); -extern void procinit __P((void)); -__private_extern__ char *proc_core_name(const char *name, uid_t uid, pid_t pid); + +extern proc_t kernproc; + extern int proc_is_classic(struct proc *p); struct proc *current_proc_EXTERNAL(void); -#endif /* __APPLE_API_PRIVATE */ - -#ifdef __APPLE_API_UNSTABLE - -extern int isinferior(struct proc *, struct proc *); -extern struct proc *pfind __P((pid_t)); /* Find process by id. */ -__private_extern__ struct proc *pzfind(pid_t); /* Find zombie by id. */ -extern struct pgrp *pgfind __P((pid_t)); /* Find process group by id. */ - -extern int chgproccnt __P((uid_t uid, int diff)); -extern int enterpgrp __P((struct proc *p, pid_t pgid, int mksess)); -extern void fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering)); -extern int inferior __P((struct proc *p)); -extern int leavepgrp __P((struct proc *p)); -#ifdef __APPLE_API_OBSOLETE -extern void mi_switch __P((void)); -#endif /* __APPLE_API_OBSOLETE */ -extern void resetpriority __P((struct proc *)); -extern void setrunnable __P((struct proc *)); -extern void setrunqueue __P((struct proc *)); -extern int sleep __P((void *chan, int pri)); -extern int tsleep __P((void *chan, int pri, char *wmesg, int timo)); -extern int tsleep0 __P((void *chan, int pri, char *wmesg, int timo, int (*continuation)(int))); -extern int tsleep1 __P((void *chan, int pri, char *wmesg, u_int64_t abstime, int (*continuation)(int))); -extern void unsleep __P((struct proc *)); -extern void wakeup __P((void *chan)); -#endif /* __APPLE_API_UNSTABLE */ -__END_DECLS +extern int msleep(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, struct timespec * ts ); +extern void unsleep(struct proc *); +extern void wakeup(void *chan); +extern void wakeup_one(caddr_t chan); + +/* proc kpis */ +/* this routine returns the pid of the current process */ +extern int proc_selfpid(void); +/* this routine returns the pid of the parent of the current process */ +extern int proc_selfppid(void); +/* this routine returns sends a signal signum to the process identified by the pid */ +extern void proc_signal(int pid, int signum); +/* this routine checks whether any signal identified by the mask are pending in the process identified by the pid. The check is on all threads of the process. */ +extern int proc_issignal(int pid, sigset_t mask); +/* this routine returns 1 if the pid1 is inferior of pid2 */ +extern int proc_isinferior(int pid1, int pid2); +/* this routine copies the process's name of the executable to the passed in buffer. It + * is always null terminated. The size of the buffer is to be passed in as well. This + * routine is to be used typically for debugging + */ +void proc_name(int pid, char * buf, int size); +/* This routine is simillar to proc_name except it returns for current process */ +void proc_selfname(char * buf, int size); + +/* find a process with a given pid. This comes with a reference which needs to be dropped by proc_rele */ +extern proc_t proc_find(int pid); +/* returns a handle to current process which is referenced. The reference needs to be dropped with proc_rele */ +extern proc_t proc_self(void); +/* releases the held reference on the process */ +extern int proc_rele(proc_t p); +/* returns the pid of the given process */ +extern int proc_pid(proc_t); +/* returns the pid of the parent of a given process */ +extern int proc_ppid(proc_t); +/* returns 1 if the process is marked for no remote hangs */ +extern int proc_noremotehang(proc_t); +/* returns 1 is the process is marked for force quota */ +extern int proc_forcequota(proc_t); + +/* this routine returns 1 if the process is running with 64bit address space, else 0 */ +extern int proc_is64bit(proc_t); +/* is this process exiting? */ +extern int proc_exiting(proc_t); +/* this routine returns error is the process is not one with super user privileges */ +int proc_suser(struct proc *p); +/* returns the ucred assicaited with the process; temporary api */ +struct ucred * proc_ucred(struct proc *p); + +/* LP64todo - figure out how to identify 64-bit processes if NULL procp */ +extern int IS_64BIT_PROCESS(proc_t); +extern int proc_pendingsignals(struct proc *, sigset_t); +extern int proc_tbe(struct proc *); + +#ifdef KERNEL_PRIVATE +extern int tsleep(void *chan, int pri, const char *wmesg, int timo); +extern int msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int64_t timo); +#endif -#ifdef __APPLE_API_OBSOLETE -/* FreeBSD source compatibility macro */ -#define PRISON_CHECK(p1, p2) (1) -#endif /* __APPLE_API_OBSOLETE */ +__END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h new file mode 100644 index 000000000..6d7c06111 --- /dev/null +++ b/bsd/sys/proc_internal.h @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */ +/*- + * Copyright (c) 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)proc_internal.h 8.15 (Berkeley) 5/19/95 + */ + +#ifndef _SYS_PROC_INTERNAL_H_ +#define _SYS_PROC_INTERNAL_H_ + +#include +__BEGIN_DECLS +#include +__END_DECLS + +/* + * One structure allocated per session. + */ +struct session { + int s_count; /* Ref cnt; pgrps in session. */ + struct proc *s_leader; /* Session leader. */ + struct vnode *s_ttyvp; /* Vnode of controlling terminal. */ + struct tty *s_ttyp; /* Controlling terminal. */ + pid_t s_sid; /* Session ID */ + char s_login[MAXLOGNAME]; /* Setlogin() name. */ +}; + +/* + * One structure allocated per process group. + */ +struct pgrp { + LIST_ENTRY(pgrp) pg_hash; /* Hash chain. */ + LIST_HEAD(, proc) pg_members; /* Pointer to pgrp members. */ + struct session *pg_session; /* Pointer to session. */ + pid_t pg_id; /* Pgrp id. */ + int pg_jobc; /* # procs qualifying pgrp for job control */ +}; + +struct proc; + +#define PROC_NULL (struct proc *)0; + +#define p_session p_pgrp->pg_session +#define p_pgid p_pgrp->pg_id + +/* + * Description of a process. + * + * This structure contains the information needed to manage a thread of + * control, known in UN*X as a process; it has references to substructures + * containing descriptions of things that the process uses, but may share + * with related processes. The process structure and the substructures + * are always addressible except for those marked "(PROC ONLY)" below, + * which might be addressible only on a processor on which the process + * is running. + */ +struct proc { + LIST_ENTRY(proc) p_list; /* List of all processes. */ + + /* substructures: */ + struct ucred *p_ucred; /* Process owner's identity. */ + struct filedesc *p_fd; /* Ptr to open files structure. */ + struct pstats *p_stats; /* Accounting/statistics (PROC ONLY). */ + struct plimit *p_limit; /* Process limits. */ + struct sigacts *p_sigacts; /* Signal actions, state (PROC ONLY). */ + +#define p_rlimit p_limit->pl_rlimit + + int p_flag; /* P_* flags. */ + char p_stat; /* S* process status. */ + char p_shutdownstate; + char p_pad1[2]; + + pid_t p_pid; /* Process identifier. */ + LIST_ENTRY(proc) p_pglist; /* List of processes in pgrp. */ + struct proc *p_pptr; /* Pointer to parent process. */ + LIST_ENTRY(proc) p_sibling; /* List of sibling processes. */ + LIST_HEAD(, proc) p_children; /* Pointer to list of children. */ + +/* The following fields are all zeroed upon creation in fork. */ +#define p_startzero p_oppid + + pid_t p_oppid; /* Save parent pid during ptrace. XXX */ + int p_dupfd; /* Sideways return value from fdopen. XXX */ + + /* scheduling */ + u_int p_estcpu; /* Time averaged value of p_cpticks. */ + int p_cpticks; /* Ticks of cpu time. */ + fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ + void *p_wchan; /* Sleep address. */ + char *p_wmesg; /* Reason for sleep. */ + u_int p_swtime; /* DEPRECATED (Time swapped in or out.) */ +#define p_argslen p_swtime /* Length of process arguments. */ + u_int p_slptime; /* Time since last blocked. */ + + struct itimerval p_realtimer; /* Alarm timer. */ + struct timeval p_rtime; /* Real time. */ + u_quad_t p_uticks; /* Statclock hits in user mode. */ + u_quad_t p_sticks; /* Statclock hits in system mode. */ + u_quad_t p_iticks; /* Statclock hits processing intr. */ + + int p_traceflag; /* Kernel trace points. */ + struct vnode *p_tracep; /* Trace to vnode. */ + + sigset_t p_siglist; /* DEPRECATED. */ + + struct vnode *p_textvp; /* Vnode of executable. */ + +/* End area that is zeroed on creation. */ +#define p_endzero p_hash.le_next + + /* + * Not copied, not zero'ed. + * Belongs after p_pid, but here to avoid shifting proc elements. + */ + LIST_ENTRY(proc) p_hash; /* Hash chain. */ + TAILQ_HEAD( ,eventqelt) p_evlist; + +/* The following fields are all copied upon creation in fork. */ +#define p_startcopy p_sigmask + + sigset_t p_sigmask; /* DEPRECATED */ + sigset_t p_sigignore; /* Signals being ignored. */ + sigset_t p_sigcatch; /* Signals being caught by user. */ + + u_char p_priority; /* Process priority. */ + u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ + char p_nice; /* Process "nice" value. */ + char p_comm[MAXCOMLEN+1]; + + struct pgrp *p_pgrp; /* Pointer to process group. */ + +/* End area that is copied on creation. */ +#define p_endcopy p_xstat + + u_short p_xstat; /* Exit status for wait; also stop signal. */ + u_short p_acflag; /* Accounting flags. */ + struct rusage *p_ru; /* Exit information. XXX */ + + int p_debugger; /* 1: can exec set-bit programs if suser */ + + void *task; /* corresponding task */ + void *sigwait_thread; /* 'thread' holding sigwait */ + char signal_lock[72]; + boolean_t sigwait; /* indication to suspend */ + void *exit_thread; /* Which thread is exiting? */ + user_addr_t user_stack; /* where user stack was allocated */ + void * exitarg; /* exit arg for proc terminate */ + void * vm_shm; /* for sysV shared memory */ + int p_argc; /* saved argc for sysctl_procargs() */ + int p_vforkcnt; /* number of outstanding vforks */ + void * p_vforkact; /* activation running this vfork proc */ + TAILQ_HEAD( , uthread) p_uthlist; /* List of uthreads */ + /* Following fields are info from SIGCHLD */ + pid_t si_pid; + u_short si_status; + u_short si_code; + uid_t si_uid; + TAILQ_HEAD( , aio_workq_entry ) aio_activeq; /* active async IO requests */ + int aio_active_count; /* entries on aio_activeq */ + TAILQ_HEAD( , aio_workq_entry ) aio_doneq; /* completed async IO requests */ + int aio_done_count; /* entries on aio_doneq */ + + struct klist p_klist; /* knote list */ + lck_mtx_t p_mlock; /* proc lock to protect evques */ + lck_mtx_t p_fdmlock; /* proc lock to protect evques */ + unsigned int p_fdlock_pc[4]; + unsigned int p_fdunlock_pc[4]; + int p_fpdrainwait; + int p_lflag; /* local flags */ +#if DIAGNOSTIC +#if SIGNAL_DEBUG + unsigned int lockpc[8]; + unsigned int unlockpc[8]; +#endif /* SIGNAL_DEBUG */ +#endif /* DIAGNOSTIC */ +}; + + +#define P_LDELAYTERM 0x1 /* */ +#define P_LNOZOMB 0x2 /* */ +#define P_LLOW_PRI_IO 0x4 +#define P_LPEXIT 0x8 +#define P_LBACKGROUND_IO 0x10 + +// LP64todo - should this move? +/* LP64 version of extern_proc. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with extern_proc + * but use native alignment of 64-bit process. + */ + +#ifdef KERNEL +#include /* user_timeval, user_itimerval */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_extern_proc { + union { + struct { + user_addr_t __p_forw; /* Doubly-linked run/sleep queue. */ + user_addr_t __p_back; + } p_st1; + struct user_timeval __p_starttime; /* process start time */ + } p_un; + user_addr_t p_vmspace; /* Address space. */ + user_addr_t p_sigacts; /* Signal actions, state (PROC ONLY). */ + int p_flag; /* P_* flags. */ + char p_stat; /* S* process status. */ + pid_t p_pid; /* Process identifier. */ + pid_t p_oppid; /* Save parent pid during ptrace. XXX */ + int p_dupfd; /* Sideways return value from fdopen. XXX */ + /* Mach related */ + user_addr_t user_stack; /* where user stack was allocated */ + user_addr_t exit_thread; /* XXX Which thread is exiting? */ + int p_debugger; /* allow to debug */ + boolean_t sigwait; /* indication to suspend */ + /* scheduling */ + u_int p_estcpu; /* Time averaged value of p_cpticks. */ + int p_cpticks; /* Ticks of cpu time. */ + fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ + user_addr_t p_wchan; /* Sleep address. */ + user_addr_t p_wmesg; /* Reason for sleep. */ + u_int p_swtime; /* Time swapped in or out. */ + u_int p_slptime; /* Time since last blocked. */ + struct user_itimerval p_realtimer; /* Alarm timer. */ + struct user_timeval p_rtime; /* Real time. */ + u_quad_t p_uticks; /* Statclock hits in user mode. */ + u_quad_t p_sticks; /* Statclock hits in system mode. */ + u_quad_t p_iticks; /* Statclock hits processing intr. */ + int p_traceflag; /* Kernel trace points. */ + user_addr_t p_tracep; /* Trace to vnode. */ + int p_siglist; /* DEPRECATED */ + user_addr_t p_textvp; /* Vnode of executable. */ + int p_holdcnt; /* If non-zero, don't swap. */ + sigset_t p_sigmask; /* DEPRECATED. */ + sigset_t p_sigignore; /* Signals being ignored. */ + sigset_t p_sigcatch; /* Signals being caught by user. */ + u_char p_priority; /* Process priority. */ + u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ + char p_nice; /* Process "nice" value. */ + char p_comm[MAXCOMLEN+1]; + user_addr_t p_pgrp; /* Pointer to process group. */ + user_addr_t p_addr; /* Kernel virtual addr of u-area (PROC ONLY). */ + u_short p_xstat; /* Exit status for wait; also stop signal. */ + u_short p_acflag; /* Accounting flags. */ + user_addr_t p_ru; /* Exit information. XXX */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif +#endif /* KERNEL */ + +/* + * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, + * as it is used to represent "no process group". + */ +extern int nprocs, maxproc; /* Current and max number of procs. */ +__private_extern__ int hard_maxproc; /* hard limit */ + +#define PID_MAX 30000 +#define NO_PID 30001 + +#define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) +#define SESSHOLD(s) ((s)->s_count++) +#define SESSRELE(s) sessrele(s) + +#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) +extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; +extern u_long pidhash; + +#define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) +extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; +extern u_long pgrphash; +extern lck_grp_t * proc_lck_grp; +extern lck_grp_attr_t * proc_lck_grp_attr; +extern lck_attr_t * proc_lck_attr; + +LIST_HEAD(proclist, proc); +extern struct proclist allproc; /* List of all processes. */ +extern struct proclist zombproc; /* List of zombie processes. */ +extern struct proc *initproc; +extern void pgdelete(struct pgrp *pgrp); +extern void sessrele(struct session *sess); +extern void procinit(void); +extern void proc_lock(struct proc *); +extern void proc_unlock(struct proc *); +extern void proc_fdlock(struct proc *); +extern void proc_fdunlock(struct proc *); +__private_extern__ char *proc_core_name(const char *name, uid_t uid, pid_t pid); +extern int isinferior(struct proc *, struct proc *); +extern struct proc *pfind(pid_t); /* Find process by id. */ +__private_extern__ struct proc *pzfind(pid_t); /* Find zombie by id. */ +extern struct pgrp *pgfind(pid_t); /* Find process group by id. */ + +extern int chgproccnt(uid_t uid, int diff); +extern int enterpgrp(struct proc *p, pid_t pgid, int mksess); +extern void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); +extern int inferior(struct proc *p); +extern int leavepgrp(struct proc *p); +extern void resetpriority(struct proc *); +extern void setrunnable(struct proc *); +extern void setrunqueue(struct proc *); +extern int sleep(void *chan, int pri); +extern int tsleep0(void *chan, int pri, const char *wmesg, int timo, int (*continuation)(int)); +extern int tsleep1(void *chan, int pri, const char *wmesg, u_int64_t abstime, int (*continuation)(int)); +extern int msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int timo, int (*continuation)(int)); +extern void vfork_return(thread_t th_act, struct proc *p, struct proc *p2, register_t *retval); + + +#endif /* !_SYS_PROC_INTERNAL_H_ */ diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index 80c1a3120..693c71733 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,12 +60,21 @@ #ifndef _SYS_PROTOSW_H_ #define _SYS_PROTOSW_H_ +#include +#include + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second */ + +#ifdef PRIVATE + /* Forward declare these structures referenced from prototypes below. */ struct mbuf; struct proc; struct sockaddr; struct socket; struct sockopt; +struct socket_filter; /*#ifdef _KERNEL*/ /* @@ -91,49 +100,65 @@ struct sockopt; * described below. */ -#include #include #include +#ifdef KERNEL +#include +#endif /* KERNEL */ + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif -#ifdef __APPLE_API_UNSTABLE struct protosw { short pr_type; /* socket type used for */ struct domain *pr_domain; /* domain protocol a member of */ short pr_protocol; /* protocol number */ unsigned int pr_flags; /* see below */ /* protocol-protocol hooks */ - void (*pr_input) __P((struct mbuf *, int len)); + void (*pr_input)(struct mbuf *, int len); /* input to protocol (from below) */ - int (*pr_output) __P((struct mbuf *m, struct socket *so)); + int (*pr_output)(struct mbuf *m, struct socket *so); /* output to protocol (from above) */ - void (*pr_ctlinput)__P((int, struct sockaddr *, void *)); + void (*pr_ctlinput)(int, struct sockaddr *, void *); /* control input (from below) */ - int (*pr_ctloutput)__P((struct socket *, struct sockopt *)); + int (*pr_ctloutput)(struct socket *, struct sockopt *); /* control output (from above) */ /* user-protocol hook */ void *pr_ousrreq; /* utility hooks */ - void (*pr_init) __P((void)); /* initialization hook */ - void (*pr_fasttimo) __P((void)); + void (*pr_init)(void); /* initialization hook */ + void (*pr_fasttimo)(void); /* fast timeout (200ms) */ - void (*pr_slowtimo) __P((void)); + void (*pr_slowtimo)(void); /* slow timeout (500ms) */ - void (*pr_drain) __P((void)); + void (*pr_drain)(void); /* flush any excess space possible */ #if __APPLE__ - int (*pr_sysctl)(); /* sysctl for protocol */ + int (*pr_sysctl)(int *, u_int, void *, size_t *, void *, size_t); + /* sysctl for protocol */ #endif struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ +#if __APPLE__ + int (*pr_lock) (struct socket *so, int locktype, int debug); /* lock function for protocol */ + int (*pr_unlock) (struct socket *so, int locktype, int debug); /* unlock for protocol */ +#ifdef _KERN_LOCKS_H_ + lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); +#else + void * (*pr_getlock) (struct socket *so, int locktype); +#endif +#endif #if __APPLE__ /* Implant hooks */ - TAILQ_HEAD(pr_sfilter, NFDescriptor) pr_sfilter; + TAILQ_HEAD(, socket_filter) pr_filter_head; struct protosw *pr_next; /* Chain for domain */ - u_long reserved[4]; /* Padding for future use */ + u_long reserved[1]; /* Padding for future use */ #endif }; -#define PR_SLOWHZ 2 /* 2 slow timeouts per second */ -#define PR_FASTHZ 5 /* 5 fast timeouts per second */ +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif /* * Values for pr_flags. @@ -144,13 +169,16 @@ struct protosw { * is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed * anyhow). */ -#define PR_ATOMIC 0x01 /* exchange atomic messages only */ -#define PR_ADDR 0x02 /* addresses given with messages */ +#define PR_ATOMIC 0x01 /* exchange atomic messages only */ +#define PR_ADDR 0x02 /* addresses given with messages */ #define PR_CONNREQUIRED 0x04 /* connection required by protocol */ -#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ -#define PR_RIGHTS 0x10 /* passes capabilities */ -#define PR_IMPLOPCL 0x20 /* implied open/close */ -#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ +#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ +#define PR_RIGHTS 0x10 /* passes capabilities */ +#define PR_IMPLOPCL 0x20 /* implied open/close */ +#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ +#define PR_PROTOLOCK 0x80 /* protocol takes care of it's own locking */ +#define PR_PCBLOCK 0x100 /* protocol supports per pcb finer grain locking */ +#define PR_DISPOSE 0x200 /* protocol requires late lists disposal */ /* * The arguments to usrreq are: @@ -217,35 +245,31 @@ struct uio; * migrate this stuff back into the main structure. */ struct pr_usrreqs { - int (*pru_abort) __P((struct socket *so)); - int (*pru_accept) __P((struct socket *so, struct sockaddr **nam)); - int (*pru_attach) __P((struct socket *so, int proto, - struct proc *p)); - int (*pru_bind) __P((struct socket *so, struct sockaddr *nam, - struct proc *p)); - int (*pru_connect) __P((struct socket *so, struct sockaddr *nam, - struct proc *p)); - int (*pru_connect2) __P((struct socket *so1, struct socket *so2)); - int (*pru_control) __P((struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p)); - int (*pru_detach) __P((struct socket *so)); - int (*pru_disconnect) __P((struct socket *so)); - int (*pru_listen) __P((struct socket *so, struct proc *p)); - int (*pru_peeraddr) __P((struct socket *so, - struct sockaddr **nam)); - int (*pru_rcvd) __P((struct socket *so, int flags)); - int (*pru_rcvoob) __P((struct socket *so, struct mbuf *m, - int flags)); - int (*pru_send) __P((struct socket *so, int flags, struct mbuf *m, + int (*pru_abort)(struct socket *so); + int (*pru_accept)(struct socket *so, struct sockaddr **nam); + int (*pru_attach)(struct socket *so, int proto, struct proc *p); + int (*pru_bind)(struct socket *so, struct sockaddr *nam, + struct proc *p); + int (*pru_connect)(struct socket *so, struct sockaddr *nam, + struct proc *p); + int (*pru_connect2)(struct socket *so1, struct socket *so2); + int (*pru_control)(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p); + int (*pru_detach)(struct socket *so); + int (*pru_disconnect)(struct socket *so); + int (*pru_listen)(struct socket *so, struct proc *p); + int (*pru_peeraddr)(struct socket *so, struct sockaddr **nam); + int (*pru_rcvd)(struct socket *so, int flags); + int (*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags); + int (*pru_send)(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, - struct proc *p)); + struct proc *p); #define PRUS_OOB 0x1 #define PRUS_EOF 0x2 #define PRUS_MORETOCOME 0x4 - int (*pru_sense) __P((struct socket *so, struct stat *sb)); - int (*pru_shutdown) __P((struct socket *so)); - int (*pru_sockaddr) __P((struct socket *so, - struct sockaddr **nam)); + int (*pru_sense)(struct socket *so, struct stat *sb); + int (*pru_shutdown)(struct socket *so); + int (*pru_sockaddr)(struct socket *so, struct sockaddr **nam); /* * These three added later, so they are out of order. They are used @@ -255,17 +279,19 @@ struct pr_usrreqs { * through these entry points. For protocols which still use * the generic code, these just point to those routines. */ - int (*pru_sosend) __P((struct socket *so, struct sockaddr *addr, + int (*pru_sosend)(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags)); - int (*pru_soreceive) __P((struct socket *so, + struct mbuf *control, int flags); + int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp)); - int (*pru_sopoll) __P((struct socket *so, int events, - struct ucred *cred, void *)); + struct mbuf **controlp, int *flagsp); + int (*pru_sopoll)(struct socket *so, int events, + struct ucred *cred, void *); }; +__BEGIN_DECLS + extern int pru_abort_notsupp(struct socket *so); extern int pru_accept_notsupp(struct socket *so, struct sockaddr **nam); extern int pru_attach_notsupp(struct socket *so, int proto, @@ -300,8 +326,9 @@ extern int pru_soreceive_notsupp(struct socket *so, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); extern int pru_sopoll_notsupp(struct socket *so, int events, - struct ucred *cred); + struct ucred *cred, void *); +__END_DECLS #endif /* KERNEL */ @@ -375,14 +402,20 @@ char *prcorequests[] = { #endif #ifdef KERNEL -void pfctlinput __P((int, struct sockaddr *)); -void pfctlinput2 __P((int, struct sockaddr *, void *)); -struct protosw *pffindproto __P((int family, int protocol, int type)); -struct protosw *pffindtype __P((int family, int type)); + +__BEGIN_DECLS + +void pfctlinput(int, struct sockaddr *); +void pfctlinput2(int, struct sockaddr *, void *); +struct protosw *pffindproto(int family, int protocol, int type); +struct protosw *pffindproto_locked(int family, int protocol, int type); +struct protosw *pffindtype(int family, int type); extern int net_add_proto(struct protosw *, struct domain *); extern int net_del_proto(int, int, struct domain *); +__END_DECLS + /* Temp hack to link static domains together */ #define LINK_PROTOS(psw) \ @@ -395,5 +428,6 @@ static void link_ ## psw ## _protos() \ } #endif -#endif /* __APPLE_API_UNSTABLE */ + +#endif /* PRIVATE */ #endif /* !_SYS_PROTOSW_H_ */ diff --git a/bsd/sys/ptrace.h b/bsd/sys/ptrace.h index 3ae7cdadb..61ae0448a 100644 --- a/bsd/sys/ptrace.h +++ b/bsd/sys/ptrace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,6 +59,7 @@ #define _SYS_PTRACE_H_ #include +#include #define PT_TRACE_ME 0 /* child declares it's being traced */ #define PT_READ_I 1 /* read word in child's I space */ @@ -80,21 +81,21 @@ #define PT_DENY_ATTACH 31 #define PT_FIRSTMACH 32 /* for machine-specific requests */ -#include /* machine-specific requests, if any */ + +__BEGIN_DECLS #ifdef KERNEL #ifdef __APPLE_API_PRIVATE -void proc_reparent __P((struct proc *child, struct proc *newparent)); + +void proc_reparent(struct proc *child, struct proc *newparent); #endif /* __APPLE_API_PRIVATE */ #else /* !KERNEL */ -#include - -__BEGIN_DECLS -int ptrace __P((int _request, pid_t _pid, caddr_t _addr, int _data)); -__END_DECLS +int ptrace(int _request, pid_t _pid, caddr_t _addr, int _data); #endif /* !KERNEL */ +__END_DECLS + #endif /* !_SYS_PTRACE_H_ */ diff --git a/bsd/net/netisr.h b/bsd/sys/ptrace_internal.h similarity index 63% rename from bsd/net/netisr.h rename to bsd/sys/ptrace_internal.h index f8db86f47..6d2f11907 100644 --- a/bsd/net/netisr.h +++ b/bsd/sys/ptrace_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,8 +21,8 @@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* - * Copyright (c) 1980, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -34,8 +34,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. + * This product includes software developed by the University of + * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -52,40 +52,46 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)netisr.h 8.1 (Berkeley) 6/10/93 + * @(#)uio.h 8.5 (Berkeley) 2/22/94 */ + +#ifndef _SYS_PTRACE_INTERNAL_H_ +#define _SYS_PTRACE_INTERNAL_H_ + #include +#ifdef KERNEL_PRIVATE +#include + /* - * The networking code runs as a seperate kernel task. + * Additional request flags used by shipping 3rd party products that have been + * patching ptrace. We should be able to remove these additional requests once + * the 3rd party products move to the KPis introduced in Tiger. */ +#define PT_VENDOR_REQUEST1 5561 /* reserved for 3rd party vendor */ + + +__BEGIN_DECLS + /* - * Each ``pup-level-1'' input queue has a bit in a ``netisr'' status - * word which is used to de-multiplex a single software - * interrupt used for scheduling the network code to calls - * on the lowest level routine of each protocol. + * WARNING - these are temporary KPI that allow binary compatibility with + * shipping product that must patch ptrace. These KPI will be removed in the + * next system release that follows Tiger. radar - 3928003 + * + * temp_patch_ptrace - patch ptrace using new_ptrace as current implementation. + * Returns the address of the original ptrace implementation. + * + * temp_unpatch_ptrace - restore ptrace to the original implementation. Caller + * must insure all inflight ptrace requests have drained before their kext + * is unloaded. */ -#define NETISR_IP 2 /* same as AF_INET */ -#define NETISR_IMP 3 /* same as AF_IMPLINK */ -#define NETISR_NS 6 /* same as AF_NS */ -#define NETISR_ISO 7 /* same as AF_ISO */ -#define NETISR_CCITT 10 /* same as AF_CCITT */ -#define NETISR_APPLETALK 16 /* same as AF_APPLETALK */ -#define NETISR_ARP 18 /* same as AF_LINK */ -#define NETISR_BLUE 26 /* same as psuedo_AF_BLUE */ -#define NETISR_IPV6 30 /* same as AF_INET6 */ +uintptr_t temp_patch_ptrace(uintptr_t new_ptrace); +void temp_unpatch_ptrace(void); -#define NETISR_SET(a,b) +__END_DECLS -#ifdef __APPLE_API_PRIVATE -#if defined(KERNEL) && !defined(LOCORE) -extern volatile int netisr; /* scheduling bits for network */ -void wakeup(void *); -extern int dlil_input_thread_wakeup; -#define setsoftnet() (wakeup((caddr_t)&dlil_input_thread_wakeup)) -#endif /* defined(KERNEL) && !defined(LOCORE) */ -#define schednetisr(anisr) { netisr |= 1<<(anisr); setsoftnet(); } -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL */ +#endif /* !_SYS_PTRACE_INTERNAL_H_ */ diff --git a/bsd/sys/queue.h b/bsd/sys/queue.h index bd4b21341..feb16f76e 100644 --- a/bsd/sys/queue.h +++ b/bsd/sys/queue.h @@ -539,8 +539,8 @@ remque(void *a) #else /* !__GNUC__ */ -void insque __P((void *a, void *b)); -void remque __P((void *a)); +void insque(void *a, void *b); +void remque(void *a); #endif /* __GNUC__ */ diff --git a/bsd/sys/quota.h b/bsd/sys/quota.h index 2e9aa0804..691eecc88 100644 --- a/bsd/sys/quota.h +++ b/bsd/sys/quota.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,6 +62,12 @@ #define _SYS_QUOTA_H #include +#include +#ifdef KERNEL_PRIVATE +#include +#endif + +#include #ifdef __APPLE_API_UNSTABLE /* @@ -159,6 +165,34 @@ struct dqblk { u_int32_t dqb_spare[4]; /* pad struct to power of 2 */ }; +#ifdef KERNEL_PRIVATE +#include /* user_time_t */ +/* LP64 version of struct dqblk. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct dqblk + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_dqblk { + u_int64_t dqb_bhardlimit; /* absolute limit on disk bytes alloc */ + u_int64_t dqb_bsoftlimit; /* preferred limit on disk bytes */ + u_int64_t dqb_curbytes; /* current byte count */ + u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ + u_int32_t dqb_isoftlimit; /* preferred inode limit */ + u_int32_t dqb_curinodes; /* current # allocated inodes */ + user_time_t dqb_btime; /* time limit for excessive disk use */ + user_time_t dqb_itime; /* time limit for excessive files */ + u_int32_t dqb_id; /* identifier (0 for empty entries) */ + u_int32_t dqb_spare[4]; /* pad struct to power of 2 */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif +#endif /* KERNEL_PRIVATE */ #define INITQMAGICS { \ 0xff31ff35, /* USRQUOTA */ \ @@ -211,46 +245,41 @@ dqhashshift(u_long size) #ifndef KERNEL - -#include - __BEGIN_DECLS -int quotactl __P((char *, int, int, caddr_t)); +int quotactl(char *, int, int, caddr_t); __END_DECLS #endif /* !KERNEL */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE #include -/* - * Macros to avoid subroutine calls to trivial functions. - */ -#if DIAGNOSTIC -#define DQREF(dq) dqref(dq) -#else -#define DQREF(dq) (dq)->dq_cnt++ -#endif /* Quota file info */ struct quotafile { + lck_mtx_t qf_lock; /* quota file mutex */ struct vnode *qf_vp; /* quota file vnode */ struct ucred *qf_cred; /* quota file access cred */ int qf_shift; /* primary hash shift */ int qf_maxentries; /* size of hash table (power of 2) */ - int qf_entrycnt; /* count of active entries */ + int qf_entrycnt; /* count of active entries */ time_t qf_btime; /* block quota time limit */ time_t qf_itime; /* inode quota time limit */ + + /* the following 2 fields are protected */ + /* by the quota list lock */ char qf_qflags; /* quota specific flags */ + int qf_refcnt; /* count of dquot refs on this file */ }; /* * Flags describing the runtime state of quotas. * (in qf_qflags) */ -#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */ +#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */ #define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */ +#define QTF_WANTED 0x04 /* waiting for change of state */ /* @@ -264,22 +293,28 @@ struct dquot { TAILQ_ENTRY(dquot) dq_freelist; /* free list */ u_int16_t dq_flags; /* flags, see below */ u_int16_t dq_cnt; /* count of active references */ - u_int16_t dq_spare; /* unused spare padding */ + u_int16_t dq_lflags; /* protected by the quota list lock */ u_int16_t dq_type; /* quota type of this dquot */ u_int32_t dq_id; /* identifier this applies to */ u_int32_t dq_index; /* index into quota file */ struct quotafile *dq_qfile; /* quota file that this is taken from */ struct dqblk dq_dqb; /* actual usage & quotas */ }; + +/* + * dq_lflags values + */ +#define DQ_LLOCK 0x01 /* this quota locked (no MODS) */ +#define DQ_LWANT 0x02 /* wakeup on unlock */ + /* - * Flag values. + * dq_flags values */ -#define DQ_LOCK 0x01 /* this quota locked (no MODS) */ -#define DQ_WANT 0x02 /* wakeup on unlock */ -#define DQ_MOD 0x04 /* this quota modified since read */ -#define DQ_FAKE 0x08 /* no limits here, just usage */ -#define DQ_BLKS 0x10 /* has been warned about blk limit */ -#define DQ_INODS 0x20 /* has been warned about inode limit */ +#define DQ_MOD 0x01 /* this quota modified since read */ +#define DQ_FAKE 0x02 /* no limits here, just usage */ +#define DQ_BLKS 0x04 /* has been warned about blk limit */ +#define DQ_INODS 0x08 /* has been warned about inode limit */ + /* * Shorthand notation. */ @@ -311,19 +346,27 @@ struct dquot { * on-disk dqblk data structures. */ __BEGIN_DECLS +void dqfileinit(struct quotafile *); int dqfileopen(struct quotafile *, int); void dqfileclose(struct quotafile *, int); void dqflush(struct vnode *); -int dqget(struct vnode *, u_long, struct quotafile *, int, struct dquot **); +int dqget(u_long, struct quotafile *, int, struct dquot **); void dqinit(void); void dqref(struct dquot *); -void dqrele(struct vnode *, struct dquot *); -void dqreclaim(struct vnode *, struct dquot *); -int dqsync(struct vnode *, struct dquot *); +void dqrele(struct dquot *); +void dqreclaim(struct dquot *); +int dqsync(struct dquot *); void dqsync_orphans(struct quotafile *); +void dqlock(struct dquot *); +void dqunlock(struct dquot *); + +int qf_get(struct quotafile *, int type); +void qf_put(struct quotafile *, int type); + +__private_extern__ void munge_dqblk(struct dqblk *dqblkp, struct user_dqblk *user_dqblkp, boolean_t to64); __END_DECLS -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/random.h b/bsd/sys/random.h index c8976a552..db6e0f701 100644 --- a/bsd/sys/random.h +++ b/bsd/sys/random.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,9 +24,12 @@ #define __SYS_RANDOM_H__ #include +#include #ifdef __APPLE_API_UNSTABLE +__BEGIN_DECLS void read_random(void* buffer, u_int numBytes); +__END_DECLS #endif /* __APPLE_API_UNSTABLE */ #endif /* __SYS_RANDOM_H__ */ diff --git a/bsd/sys/reboot.h b/bsd/sys/reboot.h index a312ed635..12eafdc11 100644 --- a/bsd/sys/reboot.h +++ b/bsd/sys/reboot.h @@ -122,6 +122,9 @@ ((partition) << B_PARTITIONSHIFT) | B_DEVMAGIC) #endif /* __APPLE_API_OBSOLETE */ + +#ifdef BSD_KERNEL_PRIVATE #include +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _SYS_REBOOT_H_ */ diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index e2f12be0f..823fcc738 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,29 +59,92 @@ #define _SYS_RESOURCE_H_ #include +#include +#include + + +/* [XSI] The timeval structure shall be defined as described in + * + * + * NB: We use __darwin_time_t and __darwin_suseconds_t here to avoid + * improperly exposing time_t and suseconds_t into the namespace. + */ +#ifndef _TIMEVAL +#define _TIMEVAL +struct timeval { + __darwin_time_t tv_sec; /* seconds */ + __darwin_suseconds_t tv_usec; /* and microseconds */ +}; +#endif + +/* The id_t type shall be defined as described in */ +#ifndef _ID_T +#define _ID_T +typedef __darwin_id_t id_t; /* can hold pid_t, gid_t, or uid_t */ +#endif + /* - * Process priority specifications to get/setpriority. + * Resource limit type (low 63 bits, excluding the sign bit) + */ +typedef __int64_t rlim_t; + + +/***** + * PRIORITY + */ + +/* + * Possible values of the first parameter to getpriority()/setpriority(), + * used to indicate the type of the second parameter. + */ +#define PRIO_PROCESS 0 /* Second argument is a PID */ +#define PRIO_PGRP 1 /* Second argument is a GID */ +#define PRIO_USER 2 /* Second argument is a UID */ + +#ifndef _POSIX_C_SOURCE +/* + * Range limitations for the value of the third parameter to setpriority(). */ #define PRIO_MIN -20 #define PRIO_MAX 20 +#endif /* !_POSIX_C_SOURCE */ -#define PRIO_PROCESS 0 -#define PRIO_PGRP 1 -#define PRIO_USER 2 -/* - * Resource utilization information. + +/***** + * RESOURCE USAGE */ -#define RUSAGE_SELF 0 -#define RUSAGE_CHILDREN -1 +/* + * Possible values of the first parameter to getrusage(), used to indicate + * the scope of the information to be returned. + */ +#define RUSAGE_SELF 0 /* Current process information */ +#define RUSAGE_CHILDREN -1 /* Current process' children */ +/* + * A structure representing an accounting of resource utilization. The + * address of an instance of this structure is the second parameter to + * getrusage(). + * + * Note: All values other than ru_utime and ru_stime are implementaiton + * defined and subject to change in a future release. Their use + * is discouraged for standards compliant programs. + */ struct rusage { struct timeval ru_utime; /* user time used */ struct timeval ru_stime; /* system time used */ +#ifdef _POSIX_C_SOURCE + long ru_opaque[14]; /* implementation defined */ +#else /* !_POSIX_C_SOURCE */ + /* + * Informational aliases for source compatibility with programs + * that need more information than that provided by standards, + * and which do not mind being OS-dependent. + */ long ru_maxrss; /* max resident set size */ -#define ru_first ru_ixrss +#define ru_first ru_ixrss /* internal: ruadd() range start */ long ru_ixrss; /* integral shared memory size */ long ru_idrss; /* integral unshared data " */ long ru_isrss; /* integral unshared stack " */ @@ -95,57 +158,107 @@ struct rusage { long ru_nsignals; /* signals received */ long ru_nvcsw; /* voluntary context switches */ long ru_nivcsw; /* involuntary " */ -#define ru_last ru_nivcsw +#define ru_last ru_nivcsw /* internal: ruadd() range end */ +#endif /* !_POSIX_C_SOURCE */ +}; + + + +// LP64todo - should this move? +#ifdef KERNEL +#include /* user_time_t */ + +/* LP64 version of struct timeval. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timeval + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_rusage_timeval { + user_time_t tv_sec; /* seconds */ + __darwin_suseconds_t tv_usec; /* and microseconds */ +}; +struct user_rusage { + struct user_rusage_timeval ru_utime; /* user time used */ + struct user_rusage_timeval ru_stime; /* system time used */ + user_long_t ru_maxrss; /* max resident set size */ + user_long_t ru_ixrss; /* integral shared memory size */ + user_long_t ru_idrss; /* integral unshared data " */ + user_long_t ru_isrss; /* integral unshared stack " */ + user_long_t ru_minflt; /* page reclaims */ + user_long_t ru_majflt; /* page faults */ + user_long_t ru_nswap; /* swaps */ + user_long_t ru_inblock; /* block input operations */ + user_long_t ru_oublock; /* block output operations */ + user_long_t ru_msgsnd; /* messages sent */ + user_long_t ru_msgrcv; /* messages received */ + user_long_t ru_nsignals; /* signals received */ + user_long_t ru_nvcsw; /* voluntary context switches */ + user_long_t ru_nivcsw; /* involuntary " */ }; +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + + +/***** + * RESOURCE LIMITS + */ + /* - * Resource limits + * Symbolic constants for resource limits; since all limits are representable + * as a type rlim_t, we are permitted to define RLIM_SAVED_* in terms of + * RLIM_INFINITY. */ -#define RLIMIT_CPU 0 /* cpu time in milliseconds */ -#define RLIMIT_FSIZE 1 /* maximum file size */ -#define RLIMIT_DATA 2 /* data size */ +#define RLIM_INFINITY (((__uint64_t)1 << 63) - 1) /* no limit */ +#define RLIM_SAVED_MAX RLIM_INFINITY /* Unrepresentable hard limit */ +#define RLIM_SAVED_CUR RLIM_INFINITY /* Unrepresentable soft limit */ + +/* + * Possible values of the first parameter to getrlimit()/setrlimit(), to + * indicate for which resource the operation is being performed. + */ +#define RLIMIT_CPU 0 /* cpu time per process, in ms */ +#define RLIMIT_FSIZE 1 /* file size */ +#define RLIMIT_DATA 2 /* data segment size */ #define RLIMIT_STACK 3 /* stack size */ #define RLIMIT_CORE 4 /* core file size */ -#define RLIMIT_RSS 5 /* resident set size */ +#define RLIMIT_AS 5 /* address space (resident set size) */ +#ifndef _POSIX_C_SOURCE +#define RLIMIT_RSS RLIMIT_AS /* source compatibility alias */ #define RLIMIT_MEMLOCK 6 /* locked-in-memory address space */ #define RLIMIT_NPROC 7 /* number of processes */ +#endif /* !_POSIX_C_SOURCE */ #define RLIMIT_NOFILE 8 /* number of open files */ +#ifndef _POSIX_C_SOURCE +#define RLIM_NLIMITS 9 /* total number of resource limits */ +#endif /* !_POSIX_C_SOURCE */ -#define RLIM_NLIMITS 9 /* number of resource limits */ - -#define RLIM_INFINITY (((u_quad_t)1 << 63) - 1) - -struct orlimit { - int32_t rlim_cur; /* current (soft) limit */ - int32_t rlim_max; /* maximum value for rlim_cur */ -}; - +/* + * A structure representing a resource limit. The address of an instance + * of this structure is the second parameter to getrlimit()/setrlimit(). + */ struct rlimit { rlim_t rlim_cur; /* current (soft) limit */ rlim_t rlim_max; /* maximum value for rlim_cur */ }; -/* Load average structure. */ -struct loadavg { - fixpt_t ldavg[3]; - long fscale; -}; -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -extern struct loadavg averunnable; -#define LSCALE 1000 /* scaling for "fixed point" arithmetic */ -#endif /* __APPLE_API_PRIVATE */ -#else -#include +#ifndef KERNEL __BEGIN_DECLS -int getpriority __P((int, int)); -int getrlimit __P((int, struct rlimit *)); -int getrusage __P((int, struct rusage *)); -int setpriority __P((int, int, int)); -int setrlimit __P((int, const struct rlimit *)); +int getpriority(int, id_t); +int getrlimit(int, struct rlimit *); +int getrusage(int, struct rusage *); +int setpriority(int, id_t, int); +int setrlimit(int, const struct rlimit *); __END_DECLS -#endif /* KERNEL */ +#endif /* !KERNEL */ #endif /* !_SYS_RESOURCE_H_ */ diff --git a/bsd/sys/resourcevar.h b/bsd/sys/resourcevar.h index 77b30d6fd..8593ed6ef 100644 --- a/bsd/sys/resourcevar.h +++ b/bsd/sys/resourcevar.h @@ -85,6 +85,17 @@ struct pstats { struct itimerval p_timer[3]; /* virtual-time timers */ #define pstat_endcopy p_start struct timeval p_start; /* starting time */ +#ifdef KERNEL + struct user_uprof { /* profile arguments */ + struct user_uprof *pr_next; /* multiple prof buffers allowed */ + user_addr_t pr_base; /* buffer base */ + user_size_t pr_size; /* buffer size */ + user_ulong_t pr_off; /* pc offset */ + user_ulong_t pr_scale; /* pc scaling */ + user_ulong_t pr_addr; /* temp storage for addr until AST */ + user_ulong_t pr_ticks; /* temp storage for ticks until AST */ + } user_p_prof; +#endif // KERNEL }; /* @@ -102,18 +113,21 @@ struct plimit { int p_refcnt; /* number of references */ }; +#ifdef KERNEL /* add user profiling from AST */ #define ADDUPROF(p) \ - addupc_task(p, \ - (p)->p_stats->p_prof.pr_addr, (p)->p_stats->p_prof.pr_ticks) + addupc_task(p, \ + (proc_is64bit((p)) ? (p)->p_stats->user_p_prof.pr_addr \ + : CAST_USER_ADDR_T((p)->p_stats->p_prof.pr_addr)), \ + (proc_is64bit((p)) ? (p)->p_stats->user_p_prof.pr_ticks \ + : (p)->p_stats->p_prof.pr_ticks)) -#ifdef KERNEL -void addupc_intr __P((struct proc *p, u_long pc, u_int ticks)); -void addupc_task __P((struct proc *p, u_long pc, u_int ticks)); -void calcru __P((struct proc *p, struct timeval *up, struct timeval *sp, - struct timeval *ip)); -void ruadd __P((struct rusage *ru, struct rusage *ru2)); -struct plimit *limcopy __P((struct plimit *lim)); +void addupc_intr(struct proc *p, u_long pc, u_int ticks); +void addupc_task(struct proc *p, user_addr_t pc, u_int ticks); +void calcru(struct proc *p, struct timeval *up, struct timeval *sp, + struct timeval *ip); +void ruadd(struct rusage *ru, struct rusage *ru2); +struct plimit *limcopy(struct plimit *lim); #endif /* KERNEL */ #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/sys/select.h b/bsd/sys/select.h index 6b0ea8e63..1bcd93484 100644 --- a/bsd/sys/select.h +++ b/bsd/sys/select.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,14 +59,90 @@ #include #include +#include -#ifdef __APPLE_API_UNSTABLE +/* + * The time_t and suseconds_t types shall be defined as described in + * + * The sigset_t type shall be defined as described in + * The timespec structure shall be defined as described in + */ +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif -__BEGIN_DECLS +#ifndef _SUSECONDS_T +#define _SUSECONDS_T +typedef __darwin_suseconds_t suseconds_t; +#endif + +#ifndef _SIGSET_T +#define _SIGSET_T +typedef __darwin_sigset_t sigset_t; +#endif + +#ifndef _TIMESPEC +#define _TIMESPEC +struct timespec { + time_t tv_sec; + long tv_nsec; +}; +#endif + +/* + * [XSI] The header shall define the fd_set type as a structure. + * [XSI] FD_CLR, FD_ISSET, FD_SET, FD_ZERO may be declared as a function, or + * defined as a macro, or both + * [XSI] FD_SETSIZE shall be defined as a macro + * + * Note: We use _FD_SET to protect all select related + * types and macros + */ +#ifndef _FD_SET +#define _FD_SET + +/* + * Select uses bit masks of file descriptors in longs. These macros + * manipulate such bit fields (the filesystem macros use chars). The + * extra protection here is to permit application redefinition above + * the default size. + */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 1024 +#endif + +#define __DARWIN_NBBY 8 /* bits in a byte */ +#define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ +#define __DARWIN_howmany(x, y) (((x) + ((y) - 1)) / (y)) /* # y's == x bits? */ + +typedef struct fd_set { + __int32_t fds_bits[__DARWIN_howmany(FD_SETSIZE, __DARWIN_NFDBITS)]; +} fd_set; + +#define FD_SET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] |= (1<<((n) % __DARWIN_NFDBITS))) +#define FD_CLR(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] &= ~(1<<((n) % __DARWIN_NFDBITS))) +#define FD_ISSET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] & (1<<((n) % __DARWIN_NFDBITS))) +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 +/* + * Use the built-in bzero function instead of the library version so that + * we do not pollute the namespace or introduce prototype warnings. + */ +#define FD_ZERO(p) __builtin_bzero(p, sizeof(*(p))) +#else +#define FD_ZERO(p) bzero(p, sizeof(*(p))) +#endif +#ifndef _POSIX_C_SOURCE +#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#endif /* !_POSIX_C_SOURCE */ + +#endif /* !_FD_SET */ #ifdef KERNEL +#ifdef KERNEL_PRIVATE #include #endif +#include #include @@ -74,17 +150,10 @@ __BEGIN_DECLS * Used to maintain information about processes that wish to be * notified when I/O becomes possible. */ +#ifdef KERNEL_PRIVATE struct selinfo { -#ifdef KERNEL - union { - struct wait_queue wait_queue; /* wait_queue for wait/wakeup */ - struct klist note; /* JMM - temporary separation */ - } si_u; -#define si_wait_queue si_u.wait_queue -#define si_note si_u.note -#else - char si_wait_queue[16]; -#endif + struct wait_queue si_wait_queue; /* wait_queue for wait/wakeup */ + struct klist si_note; /* JMM - temporary separation */ u_int si_flags; /* see below */ }; @@ -93,31 +162,38 @@ struct selinfo { #define SI_INITED 0x0008 /* selinfo has been inited */ #define SI_CLEAR 0x0010 /* selinfo has been cleared */ -#ifdef KERNEL -struct proc; +#else +struct selinfo; +#endif -void selrecord __P((struct proc *selector, struct selinfo *, void *)); -void selwakeup __P((struct selinfo *)); -void selthreadclear __P((struct selinfo *)); -#endif /* KERNEL */ +__BEGIN_DECLS + +void selrecord(proc_t selector, struct selinfo *, void *); +void selwakeup(struct selinfo *); +void selthreadclear(struct selinfo *); __END_DECLS -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL */ + #ifndef KERNEL +#ifndef _POSIX_C_SOURCE #include #ifndef __MWERKS__ #include #endif /* __MWERKS__ */ #include +#endif /* !_POSIX_C_SOURCE */ __BEGIN_DECLS #ifndef __MWERKS__ -int pselect(int, fd_set *, fd_set *, fd_set *, - const struct timespec *, const sigset_t *); +int pselect(int, fd_set * __restrict, fd_set * __restrict, + fd_set * __restrict, const struct timespec * __restrict, + const sigset_t * __restrict); #endif /* __MWERKS__ */ -int select(int, fd_set *, fd_set *, fd_set *, struct timeval *); +int select(int, fd_set * __restrict, fd_set * __restrict, + fd_set * __restrict, struct timeval * __restrict); __END_DECLS #endif /* ! KERNEL */ diff --git a/bsd/sys/sem.h b/bsd/sys/sem.h index 8db16a7a6..c31acf9ac 100644 --- a/bsd/sys/sem.h +++ b/bsd/sys/sem.h @@ -25,224 +25,191 @@ * SVID compatible sem.h file * * Author: Daniel Boulet - */ -/* * John Bellardo modified the implementation for Darwin. 12/2000 */ #ifndef _SYS_SEM_H_ #define _SYS_SEM_H_ -#include -#include -struct sem { - u_short semval; /* semaphore value */ - pid_t sempid; /* pid of last operation */ - u_short semncnt; /* # awaiting semval > cval */ - u_short semzcnt; /* # awaiting semval = 0 */ -}; - -struct semid_ds { - struct ipc_perm sem_perm; /* operation permission struct */ - struct sem *sem_base; /* pointer to first semaphore in set */ - u_short sem_nsems; /* number of sems in set */ - time_t sem_otime; /* last operation time */ - long sem_pad1; /* SVABI/386 says I need this here */ - time_t sem_ctime; /* last change time */ - /* Times measured in secs since */ - /* 00:00:00 GMT, Jan. 1, 1970 */ - long sem_pad2; /* SVABI/386 says I need this here */ - long sem_pad3[4]; /* SVABI/386 says I need this here */ -}; - -/* - * semop's sops parameter structure - */ -struct sembuf { - u_short sem_num; /* semaphore # */ - short sem_op; /* semaphore operation */ - short sem_flg; /* operation flags */ -}; -#define SEM_UNDO 010000 - -#define MAX_SOPS 5 /* maximum # of sembuf's per semop call */ - -/* - * semctl's arg parameter structure - */ -union semun { - int val; /* value for SETVAL */ - struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */ - u_short *array; /* array for GETALL & SETALL */ -}; - -/* - * commands for semctl - */ -#define GETNCNT 3 /* Return the value of semncnt {READ} */ -#define GETPID 4 /* Return the value of sempid {READ} */ -#define GETVAL 5 /* Return the value of semval {READ} */ -#define GETALL 6 /* Return semvals into arg.array {READ} */ -#define GETZCNT 7 /* Return the value of semzcnt {READ} */ -#define SETVAL 8 /* Set the value of semval to arg.val {ALTER} */ -#define SETALL 9 /* Set semvals from arg.array {ALTER} */ +#include +#include /* - * Permissions + * [XSI] All of the symbols from SHALL be defined + * when this header is included */ -#define SEM_A 0200 /* alter permission */ -#define SEM_R 0400 /* read permission */ +#include -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -/* - * Kernel implementation stuff - */ -#define SEMVMX 32767 /* semaphore maximum value */ -#define SEMAEM 16384 /* adjust on exit max value */ /* - * Configuration parameters. SEMMNI, SEMMNS, and SEMMNU are hard limits. - * The code dynamically allocates enough memory to satisfy the current - * demand in even increments of SEMMNI_INC, SEMMNS_INC, and SEMMNU_INC. - * The code will never allocate more than the hard limits. The *_INC's - * are defined in the kernel section of the header. - */ -/* - * Configuration parameters + * [XSI] The pid_t, time_t, key_t, and size_t types shall be defined as + * described in . + * + * NOTE: The definition of the key_t type is implicit from the + * inclusion of */ -#ifndef SEMMNS /* # of semaphores in system */ -#define SEMMNS (1048576/sizeof(struct sem)) -#endif /* no more than 1M of semaphore data */ -#ifndef SEMMNI /* # of semaphore identifiers */ -#define SEMMNI SEMMNS /* max of 1 for each semaphore */ -#endif -#ifndef SEMUME -#define SEMUME 10 /* max # of undo entries per process */ +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T #endif -#ifndef SEMMNU /* # of undo structures in system */ -#define SEMMNU SEMMNS /* 1 for each semaphore. This is quite large */ -#endif /* This should be max 1 for each process */ -/* shouldn't need tuning */ -#ifndef SEMMAP -#define SEMMAP 30 /* # of entries in semaphore map */ -#endif -#ifndef SEMMSL -#define SEMMSL SEMMNS /* max # of semaphores per id */ -#endif -#ifndef SEMOPM -#define SEMOPM 100 /* max # of operations per semop call */ +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; #endif +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif /* - * Undo structure (one per process) + * Technically, we should force all code references to the new structure + * definition, not in just the standards conformance case, and leave the + * legacy interface there for binary compatibility only. Currently, we + * are only forcing this for programs requesting standards conformance. */ -struct sem_undo { - struct sem_undo *un_next; /* ptr to next active undo structure */ - struct proc *un_proc; /* owner of this structure */ - short un_cnt; /* # of active entries */ - struct undo { - short un_adjval; /* adjust on exit values */ - short un_num; /* semaphore # */ - int un_id; /* semid */ - } un_ent[SEMUME]; /* undo entries */ +#if defined(__POSIX_C_SOURCE) || defined(kernel) || defined(__LP64__) +/* + * Structure used internally. + * + * This structure is exposed because standards dictate that it is used as + * the semun union member 'buf' as the fourth argment to semctl() when the + * third argument is IPC_STAT or IPC_SET. + * + * Note: only the fields sem_perm, sem_nsems, sem_otime, and sem_ctime + * are meaningful in user space. + */ +struct __semid_ds_new { + struct __ipc_perm_new sem_perm; /* [XSI] operation permission struct */ + __int32_t sem_base; /* 32 bit base ptr for semaphore set */ + unsigned short sem_nsems; /* [XSI] number of sems in set */ + time_t sem_otime; /* [XSI] last operation time */ + __int32_t sem_pad1; /* RESERVED: DO NOT USE! */ + time_t sem_ctime; /* [XSI] last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + __int32_t sem_pad2; /* RESERVED: DO NOT USE! */ + __int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ +}; +#define semid_ds __semid_ds_new +#else /* !_POSIX_C_SOURCE */ +#define semid_ds __semid_ds_old +#endif /* !_POSIX_C_SOURCE */ + +#if !defined(__POSIX_C_SOURCE) && !defined(__LP64__) +struct __semid_ds_old { + struct __ipc_perm_old sem_perm; /* [XSI] operation permission struct */ + __int32_t sem_base; /* 32 bit base ptr for semaphore set */ + unsigned short sem_nsems; /* [XSI] number of sems in set */ + time_t sem_otime; /* [XSI] last operation time */ + __int32_t sem_pad1; /* RESERVED: DO NOT USE! */ + time_t sem_ctime; /* [XSI] last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + __int32_t sem_pad2; /* RESERVED: DO NOT USE! */ + __int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ }; +#endif /* !_POSIX_C_SOURCE */ /* - * semaphore info struct + * Possible values for the third argument to semctl() */ -struct seminfo { - int semmap, /* # of entries in semaphore map */ - semmni, /* # of semaphore identifiers */ - semmns, /* # of semaphores in system */ - semmnu, /* # of undo structures in system */ - semmsl, /* max # of semaphores per id */ - semopm, /* max # of operations per semop call */ - semume, /* max # of undo entries per process */ - semusz, /* size in bytes of undo structure */ - semvmx, /* semaphore maximum value */ - semaem; /* adjust on exit max value */ -}; -extern struct seminfo seminfo; +#define GETNCNT 3 /* [XSI] Return the value of semncnt {READ} */ +#define GETPID 4 /* [XSI] Return the value of sempid {READ} */ +#define GETVAL 5 /* [XSI] Return the value of semval {READ} */ +#define GETALL 6 /* [XSI] Return semvals into arg.array {READ} */ +#define GETZCNT 7 /* [XSI] Return the value of semzcnt {READ} */ +#define SETVAL 8 /* [XSI] Set the value of semval to arg.val {ALTER} */ +#define SETALL 9 /* [XSI] Set semvals from arg.array {ALTER} */ -/* internal "mode" bits */ -#define SEM_ALLOC 01000 /* semaphore is allocated */ -#define SEM_DEST 02000 /* semaphore will be destroyed on last detach */ -#define SEMMNI_INC 8 /* increment value for semaphore identifiers */ -#define SEMMNS_INC 64 /* increment value for semaphores */ -#define SEMMNU_INC 32 /* increment value for undo structures */ +/* A semaphore; this is an anonymous structure, not for external use */ +struct sem { + unsigned short semval; /* semaphore value */ + pid_t sempid; /* pid of last operation */ + unsigned short semncnt; /* # awaiting semval > cval */ + unsigned short semzcnt; /* # awaiting semval == 0 */ +}; + /* - * Due to the way semaphore memory is allocated, we have to ensure that - * SEMUSZ is properly aligned. - * - * We are not doing strange semaphore memory allocation anymore, so - * these macros are no longer needed. + * Structure of array element for second argument to semop() */ +struct sembuf { + unsigned short sem_num; /* [XSI] semaphore # */ + short sem_op; /* [XSI] semaphore operation */ + short sem_flg; /* [XSI] operation flags */ +}; /* - * #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1)) + * Possible flag values for sem_flg */ +#define SEM_UNDO 010000 /* [XSI] Set up adjust on exit entry */ + + +#ifndef _POSIX_C_SOURCE -/* actual size of an undo structure */ /* - * #define SEMUSZ SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME])) + * System imposed limit on the value of the third parameter to semop(). + * This is arbitrary, and the standards unfortunately do not provide a + * way for user applications to retrieve this value (e.g. via sysconf() + * or from a manifest value in ). The value shown here is + * informational, and subject to change in future revisions. */ -#define SEMUSZ sizeof(struct sem_undo) +#define MAX_SOPS 5 /* maximum # of sembuf's per semop call */ -extern struct semid_ds *sema; /* semaphore id pool */ -extern struct sem *sem; /* semaphore pool */ -/* This is now a struct sem_undo with the new memory allocation - * extern int *semu; /* undo structure pool - */ -extern struct sem_undo *semu; /* undo structure pool */ /* - * Macro to find a particular sem_undo vector - */ -/* Until we can initialize seminfo.semusz to SEMUSZ, we hard code the size macro - * in SEMU. This should be fixed when (if) we implement dynamic pool sizes + * Union used as the fourth argment to semctl() in all cases. Specific + * member values are used for different values of the third parameter: + * + * Command Member + * ------------------------------------------- ------ + * GETALL, SETALL array + * SETVAL val + * IPC_STAT, IPC_SET buf * - * #define SEMU(ix) ((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz)) - */ -/* - * This macro doesn't work because we are using a staticly allocated array - * for semu now. - * #define SEMU(ix) ((struct sem_undo *)(((intptr_t)semu)+ix * SEMUSZ)) + * The union definition is intended to be defined by the user application + * in conforming applications; it is provided here for two reasons: + * + * 1) Historical source compatability for non-conforming applications + * expecting this header to declare the union type on their behalf + * + * 2) Documentation; specifically, 64 bit applications that do not pass + * this structure for 'val', or, alternately, a 64 bit type, will + * not function correctly */ -#define SEMU(ix) (&semu[ix]) +union semun { + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */ + unsigned short *array; /* array for GETALL & SETALL */ +}; +typedef union semun semun_t; /* - * Process sem_undo vectors at proc exit. + * Permissions */ -void semexit __P((struct proc *p)); +#define SEM_A 0200 /* alter permission */ +#define SEM_R 0400 /* read permission */ -/* - * Parameters to the semconfig system call - */ -typedef enum { - SEM_CONFIG_FREEZE, /* Freeze the semaphore facility. */ - SEM_CONFIG_THAW /* Thaw the semaphore facility. */ -} semconfig_ctl_t; +#endif /* !_POSIX_C_SOURCE */ -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ #ifndef KERNEL -#include __BEGIN_DECLS -int semsys __P((int, ...)); -int semctl __P((int, int, int, ...)); -int semget __P((key_t, int, int)); -int semop __P((int, struct sembuf *,unsigned)); +#ifndef _POSIX_C_SOURCE +int semsys(int, ...); +#endif /* !_POSIX_C_SOURCE */ +int semctl(int, int, int, ...) __DARWIN_ALIAS(semctl); +int semget(key_t, int, int); +int semop(int, struct sembuf *, size_t); __END_DECLS + #endif /* !KERNEL */ #endif /* !_SEM_H_ */ diff --git a/bsd/sys/sem_internal.h b/bsd/sys/sem_internal.h new file mode 100644 index 000000000..ed17b0e9b --- /dev/null +++ b/bsd/sys/sem_internal.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* $NetBSD: sem.h,v 1.5 1994/06/29 06:45:15 cgd Exp $ */ + +/* + * SVID compatible sem_internal.h file + * + * Author: Daniel Boulet + */ +/* + * John Bellardo modified the implementation for Darwin. 12/2000 + */ + +#ifndef _SYS_SEM__INTERNALH_ +#define _SYS_SEM__INTERNALH_ + +#include +#include + + +/* + * This structure is variant for 64 bits because of sem_otime and sem_ctime. + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_semid_ds { + struct ipc_perm sem_perm; /* [XSI] operation permission struct */ + struct sem *sem_base; /* 32 bit base ptr for semaphore set */ + unsigned short sem_nsems; /* [XSI] number of sems in set */ + user_time_t sem_otime; /* [XSI] last operation time */ + __int32_t sem_pad1; /* RESERVED: DO NOT USE! */ + user_time_t sem_ctime; /* [XSI] last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + __int32_t sem_pad2; /* RESERVED: DO NOT USE! */ + __int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ +}; + +union user_semun { + user_addr_t buf; /* buffer for IPC_STAT & IPC_SET */ + user_addr_t array; /* array for GETALL & SETALL */ +}; +typedef union user_semun user_semun_t; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + + +/* + * Kernel implementation stuff + */ +#define SEMVMX 32767 /* semaphore maximum value */ +#define SEMAEM 16384 /* adjust on exit max value */ + +/* + * Configuration parameters. SEMMNI, SEMMNS, and SEMMNU are hard limits. + * The code dynamically allocates enough memory to satisfy the current + * demand in even increments of SEMMNI_INC, SEMMNS_INC, and SEMMNU_INC. + * The code will never allocate more than the hard limits. The *_INC's + * are defined in the kernel section of the header. + */ +/* + * Configuration parameters + */ +#ifndef SEMMNS /* # of semaphores in system */ +#define SEMMNS (1048576/sizeof(struct sem)) +#endif /* no more than 1M of semaphore data */ +#ifndef SEMMNI /* # of semaphore identifiers */ +#define SEMMNI SEMMNS /* max of 1 for each semaphore */ +#endif +#ifndef SEMUME +#define SEMUME 10 /* max # of undo entries per process */ +#endif +#ifndef SEMMNU /* # of undo structures in system */ +#define SEMMNU SEMMNS /* 1 for each semaphore. This is quite large */ +#endif /* This should be max 1 for each process */ + +/* shouldn't need tuning */ +#ifndef SEMMAP +#define SEMMAP 30 /* # of entries in semaphore map */ +#endif +#ifndef SEMMSL +#define SEMMSL SEMMNS /* max # of semaphores per id */ +#endif +#ifndef SEMOPM +#define SEMOPM 100 /* max # of operations per semop call */ +#endif + + +/* + * Undo structure (internal: one per process) + */ +struct sem_undo { + struct sem_undo *un_next; /* ptr to next active undo structure */ + struct proc *un_proc; /* owner of this structure */ + short un_cnt; /* # of active entries */ + struct undo { + short une_adjval; /* adjust on exit values */ + short une_num; /* semaphore # */ + int une_id; /* semid */ + struct undo *une_next; /* next undo entry */ + } *un_ent; /* undo entries */ +}; + +/* + * semaphore info struct (internal; for administrative limits and ipcs) + */ +struct seminfo { + int semmap, /* # of entries in semaphore map */ + semmni, /* # of semaphore identifiers */ + semmns, /* # of semaphores in system */ + semmnu, /* # of undo structures in system */ + semmsl, /* max # of semaphores per id */ + semopm, /* max # of operations per semop call */ + semume, /* max # of undo entries per process */ + semusz, /* size in bytes of undo structure */ + semvmx, /* semaphore maximum value */ + semaem; /* adjust on exit max value */ +}; +extern struct seminfo seminfo; + +/* internal "mode" bits */ +#define SEM_ALLOC 01000 /* semaphore is allocated */ +#define SEM_DEST 02000 /* semaphore will be destroyed on last detach */ + +#define SEMMNI_INC 8 /* increment value for semaphore identifiers */ +#define SEMMNS_INC 64 /* increment value for semaphores */ +#define SEMMNU_INC 32 /* increment value for undo structures */ + +/* + * Due to the way semaphore memory is allocated, we have to ensure that + * SEMUSZ is properly aligned. + * + * We are not doing strange semaphore memory allocation anymore, so + * these macros are no longer needed. + */ + +/* + * #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1)) + */ + +/* actual size of an undo structure */ +/* + * #define SEMUSZ SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME])) + */ +#define SEMUSZ sizeof(struct sem_undo) + +extern struct user_semid_ds *sema; /* semaphore id pool */ +extern struct sem *sem_pool; /* semaphore pool */ +/* This is now a struct sem_undo with the new memory allocation + * extern int *semu; // undo structure pool + */ +extern struct sem_undo *semu; /* undo structure pool */ + +/* + * Macro to find a particular sem_undo vector + */ +/* Until we can initialize seminfo.semusz to SEMUSZ, we hard code the size macro + * in SEMU. This should be fixed when (if) we implement dynamic pool sizes + * + * #define SEMU(ix) ((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz)) + */ +/* + * This macro doesn't work because we are using a staticly allocated array + * for semu now. + * #define SEMU(ix) ((struct sem_undo *)(((intptr_t)semu)+ix * SEMUSZ)) + */ +#define SEMU(ix) (&semu[ix]) + + +/* + * Process sem_undo vectors at proc exit. + */ +void semexit(struct proc *p); + +/* + * Parameters to the semconfig system call + */ +typedef enum { + SEM_CONFIG_FREEZE, /* Freeze the semaphore facility. */ + SEM_CONFIG_THAW /* Thaw the semaphore facility. */ +} semconfig_ctl_t; + + +#endif /* !_SYS_SEM__INTERNALH_ */ diff --git a/bsd/sys/semaphore.h b/bsd/sys/semaphore.h index 7a5ea7ed4..10ba6378b 100644 --- a/bsd/sys/semaphore.h +++ b/bsd/sys/semaphore.h @@ -54,6 +54,8 @@ int sem_unlink(const char *); int sem_wait(sem_t *); __END_DECLS -#endif /* KERNEL */ +#else /* KERNEL */ +void psem_cache_init(void); +#endif /* KERNEL */ #endif /* _SYS_SEMAPHORE_H_ */ diff --git a/bsd/sys/shm.h b/bsd/sys/shm.h index f86d8aae8..dc3fc2b56 100644 --- a/bsd/sys/shm.h +++ b/bsd/sys/shm.h @@ -59,64 +59,124 @@ #ifndef _SYS_SHM_H_ #define _SYS_SHM_H_ -#include -#include +#include +#include + +/* + * [XSI] All of the symbols from SHALL be defined + * when this header is included + */ #include -#define SHM_RDONLY 010000 /* Attach read-only (else read-write) */ -#define SHM_RND 020000 /* Round attach address to SHMLBA */ -#define SHMLBA NBPG /* Segment low boundary address multiple */ +/* + * [XSI] The pid_t, time_t, key_t, and size_t types shall be defined as + * described in . + * + * NOTE: The definition of the key_t type is implicit from the + * inclusion of + */ +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T +#endif + +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif + +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +/* + * [XSI] The unsigned integer type used for the number of current attaches + * that MUST be able to store values at least as large as a type unsigned + * short. + */ +typedef unsigned short shmatt_t; + + +/* + * Possible flag values which may be OR'ed into the third argument to + * shmat() + */ +#define SHM_RDONLY 010000 /* [XSI] Attach read-only (else read-write) */ +#define SHM_RND 020000 /* [XSI] Round attach address to SHMLBA */ + +/* + * This value is symbolic, and generally not expected to be sed by user + * programs directly, although such ise is permitted by the standard. Its + * value in our implementation is equal to the number of bytes per page. + * + * NOTE: We DO NOT obtain this value from the appropriate system + * headers at this time, to avoid the resulting namespace + * pollution, which is why we discourages its use. + */ +#define SHMLBA 4096 /* [XSI] Segment low boundary address multiple*/ /* "official" access mode definitions; somewhat braindead since you have to specify (SHM_* >> 3) for group and (SHM_* >> 6) for world permissions */ #define SHM_R (IPC_R) #define SHM_W (IPC_W) - -struct shmid_ds { - struct ipc_perm shm_perm; /* operation permission structure */ - int shm_segsz; /* size of segment in bytes */ - pid_t shm_lpid; /* process ID of last shared memory op */ - pid_t shm_cpid; /* process ID of creator */ - short shm_nattch; /* number of current attaches */ - time_t shm_atime; /* time of last shmat() */ - time_t shm_dtime; /* time of last shmdt() */ - time_t shm_ctime; /* time of last change by shmctl() */ - void *shm_internal; /* sysv stupidity */ -}; - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE /* - * System 5 style catch-all structure for shared memory constants that - * might be of interest to user programs. Do we really want/need this? + * Technically, we should force all code references to the new structure + * definition, not in just the standards conformance case, and leave the + * legacy interface there for binary compatibility only. Currently, we + * are only forcing this for programs requesting standards conformance. */ -struct shminfo { - int shmmax, /* max shared memory segment size (bytes) */ - shmmin, /* min shared memory segment size (bytes) */ - shmmni, /* max number of shared memory identifiers */ - shmseg, /* max shared memory segments per process */ - shmall; /* max amount of shared memory (pages) */ +#if defined(__POSIX_C_SOURCE) || defined(kernel) || defined(__LP64__) +/* + * Structure used internally. + * + * This structure is exposed because standards dictate that it is used as + * the third argment to shmctl(). + * + * NOTE: The field shm_internal is not meaningful in user space, + * and mst not be used there. + */ +struct __shmid_ds_new { + struct __ipc_perm_new shm_perm; /* [XSI] Operation permission value */ + size_t shm_segsz; /* [XSI] Size of segment in bytes */ + pid_t shm_lpid; /* [XSI] PID of last shared memory op */ + pid_t shm_cpid; /* [XSI] PID of creator */ + short shm_nattch; /* [XSI] Number of current attaches */ + time_t shm_atime; /* [XSI] Time of last shmat() */ + time_t shm_dtime; /* [XSI] Time of last shmdt() */ + time_t shm_ctime; /* [XSI] Time of last shmctl() change */ + void *shm_internal; /* reserved for kernel use */ }; -extern struct shminfo shminfo; -extern struct shmid_ds *shmsegs; +#define shmid_ds __shmid_ds_new +#else /* !_POSIX_C_SOURCE */ +#define shmid_ds __shmid_ds_old +#endif /* !_POSIX_C_SOURCE */ -struct proc; - -void shmexit __P((struct proc *)); -void shmfork __P((struct proc *, struct proc *)); -__private_extern__ void shmexec __P((struct proc *)); -#endif /* __APPLE_API_PRIVATE */ -#else /* !KERNEL */ +#if !defined(__POSIX_C_SOURCE) && !defined(__LP64__) +struct __shmid_ds_old { + struct __ipc_perm_old shm_perm; /* [XSI] Operation permission value */ + size_t shm_segsz; /* [XSI] Size of segment in bytes */ + pid_t shm_lpid; /* [XSI] PID of last shared memory op */ + pid_t shm_cpid; /* [XSI] PID of creator */ + short shm_nattch; /* [XSI] Number of current attaches */ + time_t shm_atime; /* [XSI] Time of last shmat() */ + time_t shm_dtime; /* [XSI] Time of last shmdt() */ + time_t shm_ctime; /* [XSI] Time of last shmctl() change */ + void *shm_internal; /* reserved for kernel use */ +}; +#endif /* !_POSIX_C_SOURCE */ -#include +#ifndef KERNEL __BEGIN_DECLS -int shmsys __P((int, ...)); -void *shmat __P((int, void *, int)); -int shmget __P((key_t, int, int)); -int shmctl __P((int, int, struct shmid_ds *)); -int shmdt __P((void *)); +#ifndef _POSIX_C_SOURCE +int shmsys(int, ...); +#endif /* !_POSIX_C_SOURCE */ +void *shmat (int, const void *, int); +int shmctl(int, int, struct shmid_ds *) __DARWIN_ALIAS(shmctl); +int shmdt(const void *); +int shmget(key_t, size_t, int); __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/shm_internal.h b/bsd/sys/shm_internal.h new file mode 100644 index 000000000..e0bd76189 --- /dev/null +++ b/bsd/sys/shm_internal.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* $NetBSD: shm.h,v 1.15 1994/06/29 06:45:17 cgd Exp $ */ + +/* + * Copyright (c) 1994 Adam Glass + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Adam Glass. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * As defined+described in "X/Open System Interfaces and Headers" + * Issue 4, p. XXX + */ + +#ifndef _SYS_SHM_INTERNALH_ +#define _SYS_SHM_INTERNALH_ + +#include +#include + +#include + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_shmid_ds { + struct ipc_perm shm_perm; /* operation permission structure */ + user_size_t shm_segsz; /* size of segment in bytes */ + pid_t shm_lpid; /* PID of last shared memory op */ + pid_t shm_cpid; /* PID of creator */ + short shm_nattch; /* number of current attaches */ + time_t shm_atime; /* time of last shmat() */ + time_t shm_dtime; /* time of last shmdt() */ + time_t shm_ctime; /* time of last change by shmctl() */ + user_addr_t shm_internal; /* reserved for kernel use */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +/* + * System 5 style catch-all structure for shared memory constants that + * might be of interest to user programs. Also part of the ipcs interface. + * Note: use of user_ssize_t intentional: permits 32 bit ipcs to provide + * information about 64 bit programs shared segments. + */ +struct shminfo { + user_ssize_t shmmax; /* max shm segment size (bytes) */ + user_ssize_t shmmin; /* min shm segment size (bytes) */ + user_ssize_t shmmni; /* max number of shm identifiers */ + user_ssize_t shmseg; /* max shm segments per process */ + user_ssize_t shmall; /* max amount of shm (pages) */ +}; + +#ifdef KERNEL +extern struct shminfo shminfo; +extern struct user_shmid_ds *shmsegs; + +struct proc; + +__BEGIN_DECLS + +void shmexit(struct proc *); +int shmfork(struct proc *, struct proc *); +__private_extern__ void shmexec(struct proc *); + +__END_DECLS + +#endif /* kernel */ + +#endif /* !_SYS_SHM_INTERNALH_ */ diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index b074ab459..0fa8fb1cb 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,9 +63,10 @@ #ifndef _SYS_SIGNAL_H_ #define _SYS_SIGNAL_H_ +#include #include -#if !defined(_ANSI_SOURCE) && !defined(_POSIX_SOURCE) +#if !defined(_ANSI_SOURCE) && !defined(_POSIX_C_SOURCE) #define NSIG 32 /* counting 0; could be 33 (mask is 1-32) */ #endif @@ -75,77 +76,122 @@ #define SIGINT 2 /* interrupt */ #define SIGQUIT 3 /* quit */ #define SIGILL 4 /* illegal instruction (not reset when caught) */ -#if !defined(_POSIX_SOURCE) #define SIGTRAP 5 /* trace trap (not reset when caught) */ -#endif #define SIGABRT 6 /* abort() */ -#if !defined(_POSIX_SOURCE) +#if defined(_POSIX_C_SOURCE) +#define SIGPOLL 7 /* pollable event ([XSR] generated, not supported) */ +#else /* !_POSIX_C_SOURCE */ #define SIGIOT SIGABRT /* compatibility */ #define SIGEMT 7 /* EMT instruction */ -#endif +#endif /* !_POSIX_C_SOURCE */ #define SIGFPE 8 /* floating point exception */ #define SIGKILL 9 /* kill (cannot be caught or ignored) */ -#if !defined(_POSIX_SOURCE) #define SIGBUS 10 /* bus error */ -#endif #define SIGSEGV 11 /* segmentation violation */ -#if !defined(_POSIX_SOURCE) #define SIGSYS 12 /* bad argument to system call */ -#endif #define SIGPIPE 13 /* write on a pipe with no one to read it */ #define SIGALRM 14 /* alarm clock */ #define SIGTERM 15 /* software termination signal from kill */ -#if !defined(_POSIX_SOURCE) #define SIGURG 16 /* urgent condition on IO channel */ -#endif #define SIGSTOP 17 /* sendable stop signal not from tty */ #define SIGTSTP 18 /* stop signal from tty */ #define SIGCONT 19 /* continue a stopped process */ #define SIGCHLD 20 /* to parent on child stop or exit */ #define SIGTTIN 21 /* to readers pgrp upon background tty read */ #define SIGTTOU 22 /* like TTIN for output if (tp->t_local<OSTOP) */ -#if !defined(_POSIX_SOURCE) +#if !defined(_POSIX_C_SOURCE) #define SIGIO 23 /* input/output possible signal */ +#endif #define SIGXCPU 24 /* exceeded CPU time limit */ #define SIGXFSZ 25 /* exceeded file size limit */ #define SIGVTALRM 26 /* virtual time alarm */ #define SIGPROF 27 /* profiling time alarm */ +#if !defined(_POSIX_C_SOURCE) #define SIGWINCH 28 /* window size changes */ #define SIGINFO 29 /* information request */ #endif #define SIGUSR1 30 /* user defined signal 1 */ #define SIGUSR2 31 /* user defined signal 2 */ -#if defined(_ANSI_SOURCE) || defined(__cplusplus) +#if defined(_ANSI_SOURCE) || defined(_POSIX_C_SOURCE) || defined(__cplusplus) /* * Language spec sez we must list exactly one parameter, even though we * actually supply three. Ugh! + * SIG_HOLD is chosen to avoid KERN_SIG_* values in */ #define SIG_DFL (void (*)(int))0 #define SIG_IGN (void (*)(int))1 -#define SIG_ERR (void (*)(int))-1 +#define SIG_HOLD (void (*)(int))5 +#define SIG_ERR ((void (*)(int))-1) #else -#define SIG_DFL (void (*)())0 -#define SIG_IGN (void (*)())1 -#define SIG_ERR (void (*)())-1 +/* DO NOT REMOVE THE COMMENTED OUT int: fixincludes needs to see them */ +#define SIG_DFL (void (*)(/*int*/))0 +#define SIG_IGN (void (*)(/*int*/))1 +#define SIG_HOLD (void (*)(/*int*/))5 +#define SIG_ERR ((void (*)(/*int*/))-1) #endif #ifndef _ANSI_SOURCE -#include +#include + +#ifndef _MCONTEXT_T +#define _MCONTEXT_T +typedef __darwin_mcontext_t mcontext_t; +#endif + +#ifndef _POSIX_C_SOURCE +#ifndef _MCONTEXT64_T +#define _MCONTEXT64_T +typedef __darwin_mcontext64_t mcontext64_t; +#endif +#endif /* _POSIX_C_SOURCE */ + +#ifndef _PID_T +#define _PID_T +typedef __darwin_pid_t pid_t; +#endif + +#ifndef _PTHREAD_ATTR_T +#define _PTHREAD_ATTR_T +typedef __darwin_pthread_attr_t pthread_attr_t; +#endif + +#ifndef _SIGSET_T +#define _SIGSET_T +typedef __darwin_sigset_t sigset_t; +#endif + +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +#ifndef _UCONTEXT_T +#define _UCONTEXT_T +typedef __darwin_ucontext_t ucontext_t; +#endif -typedef unsigned int sigset_t; +#ifndef _POSIX_C_SOURCE +#ifndef _UCONTEXT64_T +#define _UCONTEXT64_T +typedef __darwin_ucontext64_t ucontext64_t; +#endif +#endif /* _POSIX_C_SOURCE */ + +#ifndef _UID_T +#define _UID_T +typedef __darwin_uid_t uid_t; +#endif union sigval { /* Members as suggested by Annex C of POSIX 1003.1b. */ - int sigval_int; - void *sigval_ptr; + int sival_int; + void *sival_ptr; }; -#define SIGEV_NONE 0 /* No async notification */ +#define SIGEV_NONE 0 /* No async notification */ #define SIGEV_SIGNAL 1 /* aio - completion notification */ -#ifdef __APPLE_API_PRIVATE #define SIGEV_THREAD 3 /* A notification function will be called to perform notification */ -#endif /*__APPLE_API_PRIVATE */ struct sigevent { int sigev_notify; /* Notification type */ @@ -155,19 +201,73 @@ struct sigevent { pthread_attr_t *sigev_notify_attributes; /* Notification attributes */ }; +// LP64todo - should this move? +#ifdef BSD_KERNEL_PRIVATE + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +union user_sigval { + struct { + int pad; /* assumes Motorolla byte order */ + int sival_int; + } size_equivalent; + user_addr_t sival_ptr; +}; + +struct user_sigevent { + int sigev_notify; /* Notification type */ + int sigev_signo; /* Signal number */ + union user_sigval sigev_value; /* Signal value */ + user_addr_t sigev_notify_function; /* Notify function */ + user_addr_t sigev_notify_attributes; /* Notify attributes */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* BSD_KERNEL_PRIVATE */ + typedef struct __siginfo { int si_signo; /* signal number */ int si_errno; /* errno association */ int si_code; /* signal code */ - int si_pid; /* sending process */ - unsigned int si_uid; /* sender's ruid */ + pid_t si_pid; /* sending process */ + uid_t si_uid; /* sender's ruid */ int si_status; /* exit value */ void *si_addr; /* faulting instruction */ union sigval si_value; /* signal value */ long si_band; /* band event for SIGPOLL */ - unsigned int pad[7]; /* Reserved for Future Use */ + unsigned long pad[7]; /* Reserved for Future Use */ } siginfo_t; +#ifdef BSD_KERNEL_PRIVATE + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +typedef struct __user_siginfo { + int si_signo; /* signal number */ + int si_errno; /* errno association */ + int si_code; /* signal code */ + pid_t si_pid; /* sending process */ + uid_t si_uid; /* sender's ruid */ + int si_status; /* exit value */ + user_addr_t si_addr; /* faulting instruction */ + union user_sigval si_value; /* signal value */ + user_long_t si_band; /* band event for SIGPOLL */ + user_ulong_t pad[7]; /* Reserved for Future Use */ +} user_siginfo_t; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* BSD_KERNEL_PRIVATE */ + /* * Incase of SIGILL and SIGFPE, si_addr contains the address of * faulting instruction. @@ -181,38 +281,68 @@ typedef struct __siginfo { /* Values for si_code */ /* Codes for SIGILL */ +#ifndef _POSIX_C_SOURCE #define ILL_NOOP 0 /* if only I knew... */ -#define ILL_ILLOPC 1 /* illegal opcode */ -#define ILL_ILLTRP 2 /* illegal trap */ -#define ILL_PRVOPC 3 /* privileged opcode */ +#endif +#define ILL_ILLOPC 1 /* [XSI] illegal opcode */ +#define ILL_ILLTRP 2 /* [XSI] illegal trap */ +#define ILL_PRVOPC 3 /* [XSI] privileged opcode */ +#define ILL_ILLOPN 4 /* [XSI] illegal operand -NOTIMP */ +#define ILL_ILLADR 5 /* [XSI] illegal addressing mode -NOTIMP */ +#define ILL_PRVREG 6 /* [XSI] privileged register -NOTIMP */ +#define ILL_COPROC 7 /* [XSI] coprocessor error -NOTIMP */ +#define ILL_BADSTK 8 /* [XSI] internal stack error -NOTIMP */ /* Codes for SIGFPE */ +#ifndef _POSIX_C_SOURCE #define FPE_NOOP 0 /* if only I knew... */ -#define FPE_FLTDIV 1 /* floating point divide by zero */ -#define FPE_FLTOVF 2 /* floating point overflow */ -#define FPE_FLTUND 3 /* floating point underflow */ -#define FPE_FLTRES 4 /* floating point inexact result */ -#define FPE_FLTINV 5 /* invalid floating point operation */ +#endif +#define FPE_FLTDIV 1 /* [XSI] floating point divide by zero */ +#define FPE_FLTOVF 2 /* [XSI] floating point overflow */ +#define FPE_FLTUND 3 /* [XSI] floating point underflow */ +#define FPE_FLTRES 4 /* [XSI] floating point inexact result */ +#define FPE_FLTINV 5 /* [XSI] invalid floating point operation */ +#define FPE_FLTSUB 6 /* [XSI] subscript out of range -NOTIMP */ +#define FPE_INTDIV 7 /* [XSI] integer divide by zero -NOTIMP */ +#define FPE_INTOVF 8 /* [XSI] integer overflow -NOTIMP */ /* Codes for SIGSEGV */ +#ifndef _POSIX_C_SOURCE #define SEGV_NOOP 0 /* if only I knew... */ -#define SEGV_MAPERR 1 /* address not mapped to object */ -#define SEGV_ACCERR 2 /* invalid permissions for mapped to object */ +#endif +#define SEGV_MAPERR 1 /* [XSI] address not mapped to object */ +#define SEGV_ACCERR 2 /* [XSI] invalid permission for mapped object */ /* Codes for SIGBUS */ +#ifndef _POSIX_C_SOURCE #define BUS_NOOP 0 /* if only I knew... */ -#define BUS_ADRALN 1 /* invalid address alignment */ +#endif +#define BUS_ADRALN 1 /* [XSI] Invalid address alignment */ +#define BUS_ADRERR 2 /* [XSI] Nonexistent physical address -NOTIMP */ +#define BUS_OBJERR 3 /* [XSI] Object-specific HW error - NOTIMP */ + +/* Codes for SIGTRAP */ +#define TRAP_BRKPT 1 /* [XSI] Process breakpoint -NOTIMP */ +#define TRAP_TRACE 2 /* [XSI] Process trace trap -NOTIMP */ /* Codes for SIGCHLD */ +#ifndef _POSIX_C_SOURCE #define CLD_NOOP 0 /* if only I knew... */ -#define CLD_EXITED 1 /* child has exited */ -#define CLD_KILLED 2 - /* child has terminated abnormally and did not create a core file */ -#define CLD_DUMPED 3 - /* child has terminated abnormally and create a core file */ -#define CLD_TRAPPED 4 /* traced child has trapped */ -#define CLD_STOPPED 5 /* child has stopped */ -#define CLD_CONTINUED 6 /* stopped child has continued */ +#endif +#define CLD_EXITED 1 /* [XSI] child has exited */ +#define CLD_KILLED 2 /* [XSI] terminated abnormally, no core file */ +#define CLD_DUMPED 3 /* [XSI] terminated abnormally, core file */ +#define CLD_TRAPPED 4 /* [XSI] traced child has trapped */ +#define CLD_STOPPED 5 /* [XSI] child has stopped */ +#define CLD_CONTINUED 6 /* [XSI] stopped child has continued */ + +/* Codes for SIGPOLL */ +#define POLL_IN 1 /* [XSR] Data input available */ +#define POLL_OUT 2 /* [XSR] Output buffers available */ +#define POLL_MSG 3 /* [XSR] Input message available */ +#define POLL_ERR 4 /* [XSR] I/O error */ +#define POLL_PRI 5 /* [XSR] High priority input available */ +#define POLL_HUP 6 /* [XSR] Device disconnected */ /* union for signal handlers */ union __sigaction_u { @@ -237,24 +367,63 @@ struct sigaction { sigset_t sa_mask; /* signal mask to apply */ int sa_flags; /* see signal options below */ }; + +#ifdef BSD_KERNEL_PRIVATE +#include + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +union __user_sigaction_u { + user_addr_t __sa_handler; + user_addr_t __sa_sigaction; +}; + +struct user_sigaction { + union __user_sigaction_u __sigaction_u; /* signal handler */ + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +struct __user_sigaction { + union __user_sigaction_u __sigaction_u; /* signal handler */ + user_addr_t sa_tramp; /* signal mask to apply */ + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#undef SIG_DFL +#undef SIG_IGN +#undef SIG_ERR +#define SIG_DFL ((user_addr_t)0LL) +#define SIG_IGN ((user_addr_t)1LL) +#define SIG_ERR ((user_addr_t)-1LL) + +#endif /* BSD_KERNEL_PRIVATE */ + + /* if SA_SIGINFO is set, sa_sigaction is to be used instead of sa_handler. */ #define sa_handler __sigaction_u.__sa_handler #define sa_sigaction __sigaction_u.__sa_sigaction - -#if !defined(_POSIX_SOURCE) #define SA_ONSTACK 0x0001 /* take signal on signal stack */ #define SA_RESTART 0x0002 /* restart system on signal return */ #define SA_DISABLE 0x0004 /* disable taking signals on alternate stack */ #define SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ +#define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ #define SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ #define SA_NOCLDWAIT 0x0020 /* don't keep zombies around */ #define SA_SIGINFO 0x0040 /* signal handler with SA_SIGINFO args */ +#ifndef _POSIX_C_SOURCE #define SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */ /* This will provide 64bit register set in a 32bit user address space */ #define SA_64REGSET 0x0200 /* signal handler with SA_SIGINFO args with 64bit regs information */ -#endif -#define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ +#endif /* !_POSIX_C_SOURCE */ /* * Flags for sigprocmask: @@ -264,32 +433,48 @@ struct sigaction { #define SIG_SETMASK 3 /* set specified signal set */ /* POSIX 1003.1b required values. */ -#define SI_USER 0x10001 -#define SI_QUEUE 0x10002 -#define SI_TIMER 0x10003 -#define SI_ASYNCIO 0x10004 -#define SI_MESGQ 0x10005 - -#if !defined(_POSIX_SOURCE) -#include -typedef void (*sig_t) __P((int)); /* type of signal function */ +#define SI_USER 0x10001 /* [CX] signal from kill() */ +#define SI_QUEUE 0x10002 /* [CX] signal from sigqueue() */ +#define SI_TIMER 0x10003 /* [CX] timer expiration */ +#define SI_ASYNCIO 0x10004 /* [CX] aio request completion */ +#define SI_MESGQ 0x10005 /* [CX] from message arrival on empty queue */ + +#ifndef _POSIX_C_SOURCE +typedef void (*sig_t)(int); /* type of signal function */ +#endif /* * Structure used in sigaltstack call. */ -struct sigaltstack { - char *ss_sp; /* signal stack base */ - int ss_size; /* signal stack length */ - int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ +#ifdef BSD_KERNEL_PRIVATE + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_sigaltstack { + user_addr_t ss_sp; /* signal stack base */ + user_size_t ss_size; /* signal stack length */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ }; -typedef struct sigaltstack stack_t; +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* BSD_KERNEL_PRIVATE */ + +#ifndef _STACK_T +#define _STACK_T +typedef __darwin_stack_t stack_t; +#endif #define SS_ONSTACK 0x0001 /* take signal on signal stack */ #define SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ #define MINSIGSTKSZ 32768 /* (32K)minimum allowable stack */ #define SIGSTKSZ 131072 /* (128K)recommended stack size */ +#ifndef _POSIX_C_SOURCE /* * 4.3 compatibility: * Signal vector "template" used in sigvec call. @@ -308,6 +493,7 @@ struct sigvec { #define SV_SIGINFO SA_SIGINFO #define sv_onstack sv_flags /* isn't compatibility wonderful! */ +#endif /* !_POSIX_C_SOURCE */ /* * Structure used in sigstack call. @@ -317,14 +503,14 @@ struct sigstack { int ss_onstack; /* current status */ }; +#ifndef _POSIX_C_SOURCE /* * Macro for converting signal number to a mask suitable for * sigblock(). */ #define sigmask(m) (1 << ((m)-1)) -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * signals delivered on a per-thread basis. */ @@ -333,12 +519,11 @@ struct sigstack { sigmask(SIGFPE)|sigmask(SIGBUS)|\ sigmask(SIGSEGV)|sigmask(SIGSYS)|\ sigmask(SIGPIPE)) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #define BADSIG SIG_ERR -#endif /* !_POSIX_SOURCE */ +#endif /* !_POSIX_C_SOURCE */ #endif /* !_ANSI_SOURCE */ /* @@ -346,6 +531,6 @@ struct sigstack { * defined by . */ __BEGIN_DECLS -void (*signal __P((int, void (*) __P((int))))) __P((int)); +void (*signal(int, void (*)(int)))(int); __END_DECLS #endif /* !_SYS_SIGNAL_H_ */ diff --git a/bsd/sys/signalvar.h b/bsd/sys/signalvar.h index 1e6e46ed8..cd0f01880 100644 --- a/bsd/sys/signalvar.h +++ b/bsd/sys/signalvar.h @@ -60,7 +60,7 @@ #include -#ifdef __APPLE_API_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Kernel signal definitions and data structures, * not exported to user programs. @@ -71,8 +71,8 @@ * (not necessarily resident). */ struct sigacts { - sig_t ps_sigact[NSIG]; /* disposition of signals */ - sig_t ps_trampact[NSIG]; /* disposition of signals */ + user_addr_t ps_sigact[NSIG]; /* disposition of signals */ + user_addr_t ps_trampact[NSIG]; /* disposition of signals */ sigset_t ps_catchmask[NSIG]; /* signals to be blocked */ sigset_t ps_sigonstack; /* signals to take on sigstack */ sigset_t ps_sigintr; /* signals that interrupt syscalls */ @@ -81,7 +81,7 @@ struct sigacts { sigset_t ps_siginfo; /* signals that want SA_SIGINFO args */ sigset_t ps_oldmask; /* saved mask from before sigpause */ int ps_flags; /* signal flags, below */ - struct sigaltstack ps_sigstk; /* sp & on stack state variable */ + struct user_sigaltstack ps_sigstk; /* sp, length & flags */ int ps_sig; /* for core dump/debugger XXX */ int ps_code; /* for core dump/debugger XXX */ int ps_addr; /* for core dump/debugger XXX */ @@ -93,10 +93,14 @@ struct sigacts { #define SAS_OLDMASK 0x01 /* need to restore mask before pause */ #define SAS_ALTSTACK 0x02 /* have alternate signal stack */ -/* additional signal action values, used only temporarily/internally */ -#define SIG_CATCH (void (*)())2 -#define SIG_HOLD (void (*)())3 -#define SIG_WAIT (void (*)())4 +/* + * Additional signal action values, used only temporarily/internally; these + * values should be non-intersecting with values defined in signal.h, e.g.: + * SIG_IGN, SIG_DFL, SIG_ERR, SIG_IGN. + */ +#define KERN_SIG_CATCH (void (*)(int))2 +#define KERN_SIG_HOLD (void (*)(int))3 +#define KERN_SIG_WAIT (void (*)(int))4 #define pgsigio(pgid, sig, notused) \ { \ @@ -187,37 +191,47 @@ int sigprop[NSIG + 1] = { #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) -#ifdef KERNEL /* * Machine-independent functions: */ -int coredump __P((struct proc *p)); -void execsigs __P((struct proc *p, thread_act_t thr_act)); -void gsignal __P((int pgid, int sig)); -int issignal __P((struct proc *p)); -int CURSIG __P((struct proc *p)); -int clear_procsiglist __P((struct proc *p, int bit)); -int clear_procsigmask __P((struct proc *p, int bit)); -int set_procsigmask __P((struct proc *p, int bit)); -void tty_pgsignal __P((struct pgrp *pgrp, int sig)); -void postsig __P((int sig)); -void siginit __P((struct proc *p)); -void trapsignal __P((struct proc *p, int sig, unsigned code)); -void pt_setrunnable __P((struct proc *p)); +int signal_lock(struct proc *); +int signal_unlock(struct proc *); +int coredump(struct proc *p); +void execsigs(struct proc *p, thread_t thread); +void gsignal(int pgid, int sig); +int issignal(struct proc *p); +int CURSIG(struct proc *p); +int clear_procsiglist(struct proc *p, int bit); +int clear_procsigmask(struct proc *p, int bit); +int set_procsigmask(struct proc *p, int bit); +void tty_pgsignal(struct pgrp *pgrp, int sig); +void postsig(int sig); +void siginit(struct proc *p); +void trapsignal(struct proc *p, int sig, unsigned code); +void pt_setrunnable(struct proc *p); /* * Machine-dependent functions: */ -void sendsig __P((struct proc *, sig_t action, int sig, - int returnmask, u_long code)); - -#ifdef __APPLE_API_UNSTABLE -void psignal __P((struct proc *p, int sig)); -void pgsignal __P((struct pgrp *pgrp, int sig, int checkctty)); -#endif /* __APPLE_API_UNSTABLE */ - -#endif /* KERNEL */ - -#endif /* __APPLE_API_PRIVATE */ +void sendsig(struct proc *, /*sig_t*/ user_addr_t action, int sig, + int returnmask, u_long code); + +void psignal(struct proc *p, int sig); +void pgsignal(struct pgrp *pgrp, int sig, int checkctty); +void threadsignal(thread_t sig_actthread, int signum, u_long code); +int thread_issignal(proc_t p, thread_t th, sigset_t mask); +void psignal_vfork(struct proc *p, task_t new_task, thread_t thr_act, + int signum); +void psignal_vtalarm(struct proc *); +void psignal_xcpu(struct proc *); +void psignal_sigprof(struct proc *); +void psignal_lock(struct proc *, int, int); +void signal_setast(thread_t sig_actthread); + +/* XXX not really very "inline"... */ +__inline__ void sig_lock_to_exit(struct proc *p); +__inline__ int sig_try_locked(struct proc *p); + +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_SYS_SIGNALVAR_H_ */ diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 957cabb8d..65567cc12 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,9 +60,9 @@ #ifndef _SYS_SOCKET_H_ #define _SYS_SOCKET_H_ -#ifndef __APPLE__ -#include -#endif +#include +#include + #define _NO_NAMESPACE_POLLUTION #include #undef _NO_NAMESPACE_POLLUTION @@ -74,10 +74,52 @@ /* * Data types. */ -typedef u_char sa_family_t; -#ifdef _BSD_SOCKLEN_T_ -typedef _BSD_SOCKLEN_T_ socklen_t; -#undef _BSD_SOCKLEN_T_ +#ifndef _GID_T +typedef __darwin_gid_t gid_t; +#define _GID_T +#endif + +#ifndef _OFF_T +typedef __darwin_off_t off_t; +#define _OFF_T +#endif + +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T +#endif + +#ifndef _SA_FAMILY_T +#define _SA_FAMILY_T +typedef __uint8_t sa_family_t; +#endif + +#ifndef _SOCKLEN_T +#define _SOCKLEN_T +typedef __darwin_socklen_t socklen_t; +#endif + +/* XXX Not explicitly defined by POSIX, but function return types are */ +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +/* XXX Not explicitly defined by POSIX, but function return types are */ +#ifndef _SSIZE_T +#define _SSIZE_T +typedef __darwin_ssize_t ssize_t; +#endif + +/* + * [XSI] The iovec structure shall be defined as described in . + */ +#ifndef _STRUCT_IOVEC +#define _STRUCT_IOVEC +struct iovec { + void * iov_base; /* [XSI] Base address of I/O memory region */ + size_t iov_len; /* [XSI] Size of region iov_base points to */ +}; #endif /* @@ -86,7 +128,9 @@ typedef _BSD_SOCKLEN_T_ socklen_t; #define SOCK_STREAM 1 /* stream socket */ #define SOCK_DGRAM 2 /* datagram socket */ #define SOCK_RAW 3 /* raw-protocol interface */ +#ifndef _POSIX_C_SOURCE #define SOCK_RDM 4 /* reliably-delivered message */ +#endif /* !_POSIX_C_SOURCE */ #define SOCK_SEQPACKET 5 /* sequenced packet stream */ /* @@ -98,9 +142,14 @@ typedef _BSD_SOCKLEN_T_ socklen_t; #define SO_KEEPALIVE 0x0008 /* keep connections alive */ #define SO_DONTROUTE 0x0010 /* just use interface addresses */ #define SO_BROADCAST 0x0020 /* permit sending of broadcast msgs */ +#ifndef _POSIX_C_SOURCE #define SO_USELOOPBACK 0x0040 /* bypass hardware when possible */ -#define SO_LINGER 0x0080 /* linger on close if data present */ +#define SO_LINGER 0x0080 /* linger on close if data present (in ticks) */ +#else +#define SO_LINGER 0x1080 /* linger on close if data present (in seconds) */ +#endif /* !_POSIX_C_SOURCE */ #define SO_OOBINLINE 0x0100 /* leave received OOB data in line */ +#ifndef _POSIX_C_SOURCE #define SO_REUSEPORT 0x0200 /* allow local address & port reuse */ #define SO_TIMESTAMP 0x0400 /* timestamp received dgram traffic */ #ifndef __APPLE__ @@ -111,6 +160,7 @@ typedef _BSD_SOCKLEN_T_ socklen_t; #define SO_WANTMORE 0x4000 /* APPLE: Give hint when more data ready */ #define SO_WANTOOBFLAG 0x8000 /* APPLE: Want OOB in MSG_FLAG on receive */ #endif +#endif /* !_POSIX_C_SOURCE */ /* * Additional options, not kept in so_options. @@ -123,13 +173,18 @@ typedef _BSD_SOCKLEN_T_ socklen_t; #define SO_RCVTIMEO 0x1006 /* receive timeout */ #define SO_ERROR 0x1007 /* get error status and clear */ #define SO_TYPE 0x1008 /* get socket type */ +#ifndef _POSIX_C_SOURCE /*efine SO_PRIVSTATE 0x1009 get/deny privileged state */ #ifdef __APPLE__ #define SO_NREAD 0x1020 /* APPLE: get 1st-packet byte count */ #define SO_NKE 0x1021 /* APPLE: Install socket-level NKE */ #define SO_NOSIGPIPE 0x1022 /* APPLE: No SIGPIPE on EPIPE */ #define SO_NOADDRERR 0x1023 /* APPLE: Returns EADDRNOTAVAIL when src is not available anymore */ +#define SO_NWRITE 0x1024 /* APPLE: Get number of bytes currently in send socket buffer */ +#define SO_LINGER_SEC 0x1080 /* linger on close if data present (in seconds) */ #endif +#endif /* !_POSIX_C_SOURCE */ + /* * Structure used for manipulating linger option. */ @@ -154,9 +209,12 @@ struct accept_filter_arg { * Address families. */ #define AF_UNSPEC 0 /* unspecified */ -#define AF_LOCAL 1 /* local to host (pipes) */ -#define AF_UNIX AF_LOCAL /* backward compatibility */ +#define AF_UNIX 1 /* local to host (pipes) */ +#ifndef _POSIX_C_SOURCE +#define AF_LOCAL AF_UNIX /* backward compatibility */ +#endif /* !_POSIX_C_SOURCE */ #define AF_INET 2 /* internetwork: UDP, TCP, etc. */ +#ifndef _POSIX_C_SOURCE #define AF_IMPLINK 3 /* arpanet imp addresses */ #define AF_PUP 4 /* pup protocols: e.g. BSP */ #define AF_CHAOS 5 /* mit CHAOS protocols */ @@ -188,7 +246,9 @@ struct accept_filter_arg { #define AF_ISDN 28 /* Integrated Services Digital Network*/ #define AF_E164 AF_ISDN /* CCITT E.164 recommendation */ #define pseudo_AF_KEY 29 /* Internal key-management function */ +#endif /* !_POSIX_C_SOURCE */ #define AF_INET6 30 /* IPv6 */ +#ifndef _POSIX_C_SOURCE #define AF_NATM 31 /* native ATM access */ #ifdef __APPLE__ #define AF_SYSTEM 32 /* Kernel event messages */ @@ -200,20 +260,28 @@ struct accept_filter_arg { #define pseudo_AF_HDRCMPLT 35 /* Used by BPF to not rewrite headers * in interface output routine */ +#ifdef PRIVATE +#define AF_AFP 36 /* Used by AFP */ +#else +#define AF_RESERVED_36 36 /* Reserved for internal usage */ +#endif + #ifndef __APPLE__ #define AF_NETGRAPH 32 /* Netgraph sockets */ #endif -#define AF_MAX 36 +#define AF_MAX 37 +#endif /* !_POSIX_C_SOURCE */ /* - * Structure used by kernel to store most - * addresses. + * [XSI] Structure used by kernel to store most addresses. */ struct sockaddr { - u_char sa_len; /* total length */ - u_char sa_family; /* address family */ - char sa_data[14]; /* actually longer; address value */ + __uint8_t sa_len; /* total length */ + sa_family_t sa_family; /* [XSI] address family */ + char sa_data[14]; /* [XSI] addr value (actually larger) */ }; + +#ifndef _POSIX_C_SOURCE #define SOCK_MAXADDRLEN 255 /* longest possible addresses */ /* @@ -221,24 +289,30 @@ struct sockaddr { * information in raw sockets. */ struct sockproto { - u_short sp_family; /* address family */ - u_short sp_protocol; /* protocol */ + __uint16_t sp_family; /* address family */ + __uint16_t sp_protocol; /* protocol */ }; +#endif /* !_POSIX_C_SOURCE*/ /* * RFC 2553: protocol-independent placeholder for socket addresses */ #define _SS_MAXSIZE 128 -#define _SS_ALIGNSIZE (sizeof(int64_t)) -#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(u_char) - sizeof(sa_family_t)) -#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(u_char) - sizeof(sa_family_t) - \ +#define _SS_ALIGNSIZE (sizeof(__int64_t)) +#define _SS_PAD1SIZE \ + (_SS_ALIGNSIZE - sizeof(__uint8_t) - sizeof(sa_family_t)) +#define _SS_PAD2SIZE \ + (_SS_MAXSIZE - sizeof(__uint8_t) - sizeof(sa_family_t) - \ _SS_PAD1SIZE - _SS_ALIGNSIZE) +/* + * [XSI] sockaddr_storage + */ struct sockaddr_storage { - u_char ss_len; /* address length */ - sa_family_t ss_family; /* address family */ + __uint8_t ss_len; /* address length */ + sa_family_t ss_family; /* [XSI] address family */ char __ss_pad1[_SS_PAD1SIZE]; - int64_t __ss_align; /* force desired structure storage alignment */ + __int64_t __ss_align; /* force structure storage alignment */ char __ss_pad2[_SS_PAD2SIZE]; }; @@ -284,6 +358,12 @@ struct sockaddr_storage { #define PF_SYSTEM AF_SYSTEM #define PF_NETBIOS AF_NETBIOS #define PF_PPP AF_PPP +#ifdef PRIVATE +#define PF_AFP AF_AFP +#else +#define PF_RESERVED_36 AF_RESERVED_36 +#endif + #else #define PF_ATM AF_ATM #define PF_NETGRAPH AF_NETGRAPH @@ -291,6 +371,12 @@ struct sockaddr_storage { #define PF_MAX AF_MAX +/* + * These do not have socket-layer support: + */ +#define PF_VLAN ((uint32_t)0x766c616e) /* 'vlan' */ +#define PF_BOND ((uint32_t)0x626f6e64) /* 'bond' */ + /* * Definitions for network related sysctl, CTL_NET. * @@ -301,6 +387,8 @@ struct sockaddr_storage { */ #define NET_MAXID AF_MAX + +#ifdef KERNEL_PRIVATE #define CTL_NET_NAMES { \ { 0, 0 }, \ { "local", CTLTYPE_NODE }, \ @@ -339,6 +427,7 @@ struct sockaddr_storage { { "ppp", CTLTYPE_NODE }, \ { "hdrcomplete", CTLTYPE_NODE }, \ } +#endif KERNEL_PRIVATE /* * PF_ROUTE - Routing table @@ -348,37 +437,74 @@ struct sockaddr_storage { * Fifth: type of info, defined below * Sixth: flag(s) to mask with for NET_RT_FLAGS */ -#define NET_RT_DUMP 1 /* dump; may limit to a.f. */ -#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ -#define NET_RT_IFLIST 3 /* survey interface list */ -#define NET_RT_MAXID 4 - +#define NET_RT_DUMP 1 /* dump; may limit to a.f. */ +#define NET_RT_FLAGS 2 /* by flags, e.g. RESOLVING */ +#define NET_RT_IFLIST 3 /* survey interface list */ +#define NET_RT_STAT 4 /* routing statistics */ +#define NET_RT_TRASH 5 /* routes not in table but not freed */ +#define NET_RT_IFLIST2 6 /* interface list with addresses */ +#define NET_RT_DUMP2 7 /* dump; may limit to a.f. */ +#define NET_RT_MAXID 8 + +#ifdef KERNEL_PRIVATE #define CTL_NET_RT_NAMES { \ { 0, 0 }, \ { "dump", CTLTYPE_STRUCT }, \ { "flags", CTLTYPE_STRUCT }, \ { "iflist", CTLTYPE_STRUCT }, \ + { "stat", CTLTYPE_STRUCT }, \ + { "trash", CTLTYPE_INT }, \ + { "iflist2", CTLTYPE_STRUCT }, \ + { "dump2", CTLTYPE_STRUCT }, \ } +#endif KERNEL_PRIVATE + /* * Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 /* - * Message header for recvmsg and sendmsg calls. + * [XSI] Message header for recvmsg and sendmsg calls. * Used value-result for recvmsg, value only for sendmsg. */ struct msghdr { - caddr_t msg_name; /* optional address */ + void *msg_name; /* [XSI] optional address */ + socklen_t msg_namelen; /* [XSI] size of address */ + struct iovec *msg_iov; /* [XSI] scatter/gather array */ + int msg_iovlen; /* [XSI] # elements in msg_iov */ + void *msg_control; /* [XSI] ancillary data, see below */ + socklen_t msg_controllen; /* [XSI] ancillary data buffer len */ + int msg_flags; /* [XSI] flags on received message */ +}; + +// LP64todo - should this move? +#ifdef KERNEL +/* LP64 version of struct msghdr. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with struct msghdr + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_msghdr { + user_addr_t msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ - struct iovec *msg_iov; /* scatter/gather array */ - u_int msg_iovlen; /* # elements in msg_iov */ - caddr_t msg_control; /* ancillary data, see below */ + user_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user_addr_t msg_control; /* ancillary data, see below */ socklen_t msg_controllen; /* ancillary data buffer len */ int msg_flags; /* flags on received message */ }; +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL + #define MSG_OOB 0x1 /* process out-of-band data */ #define MSG_PEEK 0x2 /* peek at incoming message */ #define MSG_DONTROUTE 0x4 /* send without using routing tables */ @@ -386,6 +512,7 @@ struct msghdr { #define MSG_TRUNC 0x10 /* data discarded before delivery */ #define MSG_CTRUNC 0x20 /* control data lost before delivery */ #define MSG_WAITALL 0x40 /* wait for full request or error */ +#ifndef _POSIX_C_SOURCE #define MSG_DONTWAIT 0x80 /* this message should be nonblocking */ #define MSG_EOF 0x100 /* data completes connection */ #ifdef __APPLE__ @@ -398,6 +525,13 @@ struct msghdr { #endif #define MSG_COMPAT 0x8000 /* used in sendit() */ #define MSG_NEEDSA 0x10000 /* Fail receive if socket address cannot be allocated */ +#ifdef KERNEL_PRIVATE +#define MSG_NBIO 0x20000 /* FIONBIO mode, used by fifofs */ +#endif +#ifdef KERNEL +#define MSG_USEUPCALL 0x80000000 /* Inherit upcall in sock_accept */ +#endif +#endif /* !_POSIX_C_SOURCE */ /* * Header for ancillary data objects in msg_control buffer. @@ -406,12 +540,13 @@ struct msghdr { * of message elements headed by cmsghdr structures. */ struct cmsghdr { - socklen_t cmsg_len; /* data byte count, including hdr */ - int cmsg_level; /* originating protocol */ - int cmsg_type; /* protocol-specific type */ -/* followed by u_char cmsg_data[]; */ + socklen_t cmsg_len; /* [XSI] data byte count, including hdr */ + int cmsg_level; /* [XSI] originating protocol */ + int cmsg_type; /* [XSI] protocol-specific type */ +/* followed by unsigned char cmsg_data[]; */ }; +#ifndef _POSIX_C_SOURCE #ifndef __APPLE__ /* * While we may have more groups than this, the cmsgcred struct must @@ -436,32 +571,35 @@ struct cmsgcred { gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ }; #endif +#endif /* !_POSIX_C_SOURCE */ /* given pointer to struct cmsghdr, return pointer to data */ -#define CMSG_DATA(cmsg) ((u_char *)(cmsg) + \ +#define CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ ALIGN(sizeof(struct cmsghdr))) /* given pointer to struct cmsghdr, return pointer to next cmsghdr */ #define CMSG_NXTHDR(mhdr, cmsg) \ - (((caddr_t)(cmsg) + ALIGN((cmsg)->cmsg_len) + \ + (((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len) + \ ALIGN(sizeof(struct cmsghdr)) > \ - (caddr_t)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \ - (struct cmsghdr *)NULL : \ - (struct cmsghdr *)((caddr_t)(cmsg) + ALIGN((cmsg)->cmsg_len))) + (unsigned char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \ + (struct cmsghdr *)0 /* NULL */ : \ + (struct cmsghdr *)((unsigned char *)(cmsg) + ALIGN((cmsg)->cmsg_len))) #define CMSG_FIRSTHDR(mhdr) ((struct cmsghdr *)(mhdr)->msg_control) +#ifndef _POSIX_C_SOURCE /* RFC 2292 additions */ - #define CMSG_SPACE(l) (ALIGN(sizeof(struct cmsghdr)) + ALIGN(l)) #define CMSG_LEN(l) (ALIGN(sizeof(struct cmsghdr)) + (l)) #ifdef KERNEL #define CMSG_ALIGN(n) ALIGN(n) #endif +#endif /* !_POSIX_C_SOURCE */ /* "Socket"-level control message types: */ #define SCM_RIGHTS 0x01 /* access rights (array of int) */ +#ifndef _POSIX_C_SOURCE #define SCM_TIMESTAMP 0x02 /* timestamp (struct timeval) */ #define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ @@ -469,21 +607,23 @@ struct cmsgcred { * 4.3 compat sockaddr, move to compat file later */ struct osockaddr { - u_short sa_family; /* address family */ + __uint16_t sa_family; /* address family */ char sa_data[14]; /* up to 14 bytes of direct address */ }; /* * 4.3-compat message header (move to compat file later). */ + // LP64todo - fix this. should msg_iov be * iovec_64? struct omsghdr { - caddr_t msg_name; /* optional address */ - int msg_namelen; /* size of address */ - struct iovec *msg_iov; /* scatter/gather array */ - int msg_iovlen; /* # elements in msg_iov */ - caddr_t msg_accrights; /* access rights sent/received */ - int msg_accrightslen; + void *msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + struct iovec *msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + void *msg_accrights; /* access rights sent/rcvd */ + int msg_accrightslen; }; +#endif /* !_POSIX_C_SOURCE */ /* * howto arguments for shutdown(2), specified by Posix.1g. @@ -492,6 +632,7 @@ struct omsghdr { #define SHUT_WR 1 /* shut down the writing side */ #define SHUT_RDWR 2 /* shut down both sides */ +#ifndef _POSIX_C_SOURCE #if SENDFILE /* * sendfile(2) header/trailer struct @@ -503,37 +644,49 @@ struct sf_hdtr { int trl_cnt; /* number of trailer iovec's */ }; #endif +#endif /* !_POSIX_C_SOURCE */ #ifndef KERNEL - -#include - __BEGIN_DECLS -int accept __P((int, struct sockaddr *, socklen_t *)); -int bind __P((int, const struct sockaddr *, socklen_t)); -int connect __P((int, const struct sockaddr *, socklen_t)); -int getpeername __P((int, struct sockaddr *, socklen_t *)); -int getsockname __P((int, struct sockaddr *, socklen_t *)); -int getsockopt __P((int, int, int, void *, int *)); -int listen __P((int, int)); -ssize_t recv __P((int, void *, size_t, int)); -ssize_t recvfrom __P((int, void *, size_t, int, struct sockaddr *, socklen_t *)); -ssize_t recvmsg __P((int, struct msghdr *, int)); -ssize_t send __P((int, const void *, size_t, int)); -ssize_t sendto __P((int, const void *, - size_t, int, const struct sockaddr *, socklen_t)); -ssize_t sendmsg __P((int, const struct msghdr *, int)); +int accept(int, struct sockaddr * __restrict, socklen_t * __restrict) + __DARWIN_ALIAS(accept); +int bind(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(bind); +int connect(int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS( connect); +int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict) + __DARWIN_ALIAS(getpeername); +int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict) + __DARWIN_ALIAS(getsockname); +int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict); +int listen(int, int) __DARWIN_ALIAS(listen); +ssize_t recv(int, void *, size_t, int) __DARWIN_ALIAS(recv); +ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, + socklen_t * __restrict) __DARWIN_ALIAS(recvfrom); +ssize_t recvmsg(int, struct msghdr *, int) __DARWIN_ALIAS(recvmsg); +ssize_t send(int, const void *, size_t, int) __DARWIN_ALIAS(send); +ssize_t sendmsg(int, const struct msghdr *, int) __DARWIN_ALIAS(sendmsg); +ssize_t sendto(int, const void *, size_t, + int, const struct sockaddr *, socklen_t) __DARWIN_ALIAS(sendto); +int setsockopt(int, int, int, const void *, socklen_t); +int shutdown(int, int); +int socket(int, int, int); +int socketpair(int, int, int, int *) __DARWIN_ALIAS(socketpair); +/* + * NOTIMP: + * int sockatmark(int s); + */ + +#ifndef _POSIX_C_SOURCE #if SENDFILE -int sendfile __P((int, int, off_t, size_t, struct sf_hdtr *, off_t *, int)); +int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int); #endif -int setsockopt __P((int, int, int, const void *, socklen_t)); -int shutdown __P((int, int)); -int socket __P((int, int, int)); -int socketpair __P((int, int, int, int *)); - -void pfctlinput __P((int, struct sockaddr *)); +void pfctlinput(int, struct sockaddr *); +#endif /* !_POSIX_C_SOURCE */ __END_DECLS #endif /* !KERNEL */ +#ifdef KERNEL +#include +#endif + #endif /* !_SYS_SOCKET_H_ */ diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index b3d700a9f..f069bf4ac 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,17 +65,19 @@ #include /* for struct selinfo */ #include #include +#include + /* * Hacks to get around compiler complaints */ struct mbuf; -struct kextcb; +struct socket_filter_entry; struct protosw; struct sockif; struct sockutil; +#ifdef KERNEL_PRIVATE /* strings for sleep message: */ -#ifdef __APPLE_API_UNSTABLE extern char netio[], netcon[], netcls[]; #define SOCKET_CACHE_ON #define SO_CACHE_FLUSH_INTERVAL 1 /* Seconds */ @@ -90,8 +92,11 @@ extern char netio[], netcon[], netcls[]; * handle on protocol and pointer to protocol * private data and error information. */ +#endif /* KERNEL_PRIVATE */ + typedef u_quad_t so_gen_t; +#ifdef KERNEL_PRIVATE #ifndef __APPLE__ /* We don't support BSD style socket filters */ struct accept_filter; @@ -147,9 +152,9 @@ struct socket { #endif struct selinfo sb_sel; /* process selecting read/write */ short sb_flags; /* flags, see below */ - short sb_timeo; /* timeout for read/write */ - void *reserved1; /* for future use if needed */ - void *reserved2; + struct timeval sb_timeo; /* timeout for read/write */ + void *reserved1; /* for future use if needed */ + void *reserved2; } so_rcv, so_snd; #define SB_MAX (256*1024) /* default for max chars in sockbuf */ #define SB_LOCK 0x01 /* lock on data queue */ @@ -169,7 +174,7 @@ struct socket { caddr_t so_tpcb; /* Wisc. protocol control block - XXX unused? */ #endif - void (*so_upcall) __P((struct socket *so, caddr_t arg, int waitf)); + void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); caddr_t so_upcallarg; /* Arg for above */ uid_t so_uid; /* who opened the socket */ /* NB: generation count must not be first; easiest to make it last. */ @@ -191,16 +196,20 @@ struct socket { struct mbuf *so_temp; /* Holding area for outbound frags */ /* Plug-in support - make the socket interface overridable */ struct mbuf *so_tail; - struct kextcb *so_ext; /* NKE hook */ + struct socket_filter_entry *so_filt; /* NKE hook */ u_long so_flags; /* Flags */ -#define SOF_NOSIGPIPE 0x00000001 -#define SOF_NOADDRAVAIL 0x00000002 /* returns EADDRNOTAVAIL if src address is gone */ - void *reserved2; - void *reserved3; - void *reserved4; +#define SOF_NOSIGPIPE 0x00000001 +#define SOF_NOADDRAVAIL 0x00000002 /* returns EADDRNOTAVAIL if src address is gone */ +#define SOF_PCBCLEARING 0x00000004 /* pru_disconnect done, no need to call pru_detach */ + int so_usecount; /* refcounting of socket use */; + int so_retaincnt; + u_int32_t so_filteruse; /* usecount for the socket filters */ + void *reserved3; /* Temporarily in use/debug: last socket lock LR */ + void *reserved4; /* Temporarily in use/debug: last socket unlock LR */ + #endif }; -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ /* * Socket state bits. @@ -220,6 +229,7 @@ struct socket { #define SS_INCOMP 0x800 /* Unaccepted, incomplete connection */ #define SS_COMP 0x1000 /* unaccepted, complete connection */ #define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ +#define SS_DRAINING 0x4000 /* close waiting for blocked system calls to drain */ /* * Externalized form of struct socket used by the sysctl(3) interface. @@ -253,11 +263,11 @@ struct xsocket { uid_t so_uid; /* XXX */ }; +#ifdef KERNEL_PRIVATE /* * Macros for sockets and socket buffering. */ -#ifdef __APPLE__ -#ifdef __APPLE_API_UNSTABLE + #define sbtoso(sb) (sb->sb_so) /* @@ -265,17 +275,20 @@ struct xsocket { * These are macros on FreeBSD. On Darwin the * implementation is in bsd/kern/uipc_socket2.c */ -int sb_notify __P((struct sockbuf *sb)); -long sbspace __P((struct sockbuf *sb)); -int sosendallatonce __P((struct socket *so)); -int soreadable __P((struct socket *so)); -int sowriteable __P((struct socket *so)); -void sballoc __P((struct sockbuf *sb, struct mbuf *m)); -void sbfree __P((struct sockbuf *sb, struct mbuf *m)); -int sblock __P((struct sockbuf *sb, int wf)); -void sbunlock __P((struct sockbuf *sb)); -void sorwakeup __P((struct socket * so)); -void sowwakeup __P((struct socket * so)); + +__BEGIN_DECLS +int sb_notify(struct sockbuf *sb); +long sbspace(struct sockbuf *sb); +int sosendallatonce(struct socket *so); +int soreadable(struct socket *so); +int sowriteable(struct socket *so); +void sballoc(struct sockbuf *sb, struct mbuf *m); +void sbfree(struct sockbuf *sb, struct mbuf *m); +int sblock(struct sockbuf *sb, int wf); +void sbunlock(struct sockbuf *sb, int locked); +void sorwakeup(struct socket * so); +void sowwakeup(struct socket * so); +__END_DECLS /* * Socket extension mechanism: control block hooks: @@ -294,10 +307,11 @@ struct kextcb }; #define EXT_NULL 0x0 /* STATE: Not in use */ #define sotokextcb(so) (so ? so->so_ext : 0) -#endif /* __APPLE___ */ #ifdef KERNEL +#define SO_FILT_HINT_LOCKED 0x1 + /* * Argument structure for sosetopt et seq. This is in the KERNEL * section because it will never be visible to user code. @@ -307,7 +321,7 @@ struct sockopt { enum sopt_dir sopt_dir; /* is this a get or a set? */ int sopt_level; /* second arg of [gs]etsockopt */ int sopt_name; /* third arg of [gs]etsockopt */ - void *sopt_val; /* fourth arg of [gs]etsockopt */ + user_addr_t sopt_val; /* fourth arg of [gs]etsockopt */ size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ struct proc *sopt_p; /* calling process or null if kernel */ }; @@ -340,109 +354,101 @@ struct ucred; struct uio; struct knote; -/* - * File operations on sockets. - */ -int soo_read __P((struct file *fp, struct uio *uio, struct ucred *cred, - int flags, struct proc *p)); -int soo_write __P((struct file *fp, struct uio *uio, struct ucred *cred, - int flags, struct proc *p)); -int soo_close __P((struct file *fp, struct proc *p)); -int soo_ioctl __P((struct file *fp, u_long cmd, caddr_t data, - struct proc *p)); -int soo_stat __P((struct socket *so, struct stat *ub)); -int soo_select __P((struct file *fp, int which, void * wql, struct proc *p)); -int soo_kqfilter __P((struct file *fp, struct knote *kn, struct proc *p)); - - /* * From uipc_socket and friends */ -struct sockaddr *dup_sockaddr __P((struct sockaddr *sa, int canwait)); -int getsock __P((struct filedesc *fdp, int fd, struct file **fpp)); -int sockargs __P((struct mbuf **mp, caddr_t buf, int buflen, int type)); -int getsockaddr __P((struct sockaddr **namp, caddr_t uaddr, size_t len)); -void sbappend __P((struct sockbuf *sb, struct mbuf *m)); -int sbappendaddr __P((struct sockbuf *sb, struct sockaddr *asa, - struct mbuf *m0, struct mbuf *control)); -int sbappendcontrol __P((struct sockbuf *sb, struct mbuf *m0, - struct mbuf *control)); -void sbappendrecord __P((struct sockbuf *sb, struct mbuf *m0)); -void sbcheck __P((struct sockbuf *sb)); -void sbcompress __P((struct sockbuf *sb, struct mbuf *m, struct mbuf *n)); +__BEGIN_DECLS +struct sockaddr *dup_sockaddr(struct sockaddr *sa, int canwait); +int getsock(struct filedesc *fdp, int fd, struct file **fpp); +int sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type); +int getsockaddr(struct sockaddr **namp, user_addr_t uaddr, size_t len); +int sbappend(struct sockbuf *sb, struct mbuf *m); +int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control, int *error_out); +int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control, int *error_out); +int sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +void sbcheck(struct sockbuf *sb); +int sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * - sbcreatecontrol __P((caddr_t p, int size, int type, int level)); -void sbdrop __P((struct sockbuf *sb, int len)); -void sbdroprecord __P((struct sockbuf *sb)); -void sbflush __P((struct sockbuf *sb)); -void sbinsertoob __P((struct sockbuf *sb, struct mbuf *m0)); -void sbrelease __P((struct sockbuf *sb)); -int sbreserve __P((struct sockbuf *sb, u_long cc)); -void sbtoxsockbuf __P((struct sockbuf *sb, struct xsockbuf *xsb)); -int sbwait __P((struct sockbuf *sb)); -int sb_lock __P((struct sockbuf *sb)); -int soabort __P((struct socket *so)); -int soaccept __P((struct socket *so, struct sockaddr **nam)); -struct socket *soalloc __P((int waitok, int dom, int type)); -int sobind __P((struct socket *so, struct sockaddr *nam)); -void socantrcvmore __P((struct socket *so)); -void socantsendmore __P((struct socket *so)); -int soclose __P((struct socket *so)); -int soconnect __P((struct socket *so, struct sockaddr *nam)); -int soconnect2 __P((struct socket *so1, struct socket *so2)); -int socreate __P((int dom, struct socket **aso, int type, int proto)); -void sodealloc __P((struct socket *so)); -int sodisconnect __P((struct socket *so)); -void sofree __P((struct socket *so)); -int sogetopt __P((struct socket *so, struct sockopt *sopt)); -void sohasoutofband __P((struct socket *so)); -void soisconnected __P((struct socket *so)); -void soisconnecting __P((struct socket *so)); -void soisdisconnected __P((struct socket *so)); -void soisdisconnecting __P((struct socket *so)); -int solisten __P((struct socket *so, int backlog)); + sbcreatecontrol(caddr_t p, int size, int type, int level); +void sbdrop(struct sockbuf *sb, int len); +void sbdroprecord(struct sockbuf *sb); +void sbflush(struct sockbuf *sb); +int sbinsertoob(struct sockbuf *sb, struct mbuf *m0); +void sbrelease(struct sockbuf *sb); +int sbreserve(struct sockbuf *sb, u_long cc); +void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); +int sbwait(struct sockbuf *sb); +int sb_lock(struct sockbuf *sb); +int soabort(struct socket *so); +int soaccept(struct socket *so, struct sockaddr **nam); +int soacceptlock (struct socket *so, struct sockaddr **nam, int dolock); +struct socket *soalloc(int waitok, int dom, int type); +int sobind(struct socket *so, struct sockaddr *nam); +void socantrcvmore(struct socket *so); +void socantsendmore(struct socket *so); +int soclose(struct socket *so); +int soconnect(struct socket *so, struct sockaddr *nam); +int soconnectlock (struct socket *so, struct sockaddr *nam, int dolock); +int soconnect2(struct socket *so1, struct socket *so2); +int socreate(int dom, struct socket **aso, int type, int proto); +void sodealloc(struct socket *so); +int sodisconnect(struct socket *so); +void sofree(struct socket *so); +int sogetopt(struct socket *so, struct sockopt *sopt); +void sohasoutofband(struct socket *so); +void soisconnected(struct socket *so); +void soisconnecting(struct socket *so); +void soisdisconnected(struct socket *so); +void soisdisconnecting(struct socket *so); +int solisten(struct socket *so, int backlog); struct socket * - sodropablereq __P((struct socket *head)); + sodropablereq(struct socket *head); struct socket * - sonewconn __P((struct socket *head, int connstatus)); -int sooptcopyin __P((struct sockopt *sopt, void *buf, size_t len, - size_t minlen)); -int sooptcopyout __P((struct sockopt *sopt, void *buf, size_t len)); + sonewconn(struct socket *head, int connstatus, const struct sockaddr* from); +int sooptcopyin(struct sockopt *sopt, void *data, size_t len, size_t minlen); +int sooptcopyout(struct sockopt *sopt, void *data, size_t len); +int socket_lock(struct socket *so, int refcount); +int socket_unlock(struct socket *so, int refcount); /* * XXX; prepare mbuf for (__FreeBSD__ < 3) routines. * Used primarily in IPSec and IPv6 code. */ -int soopt_getm __P((struct sockopt *sopt, struct mbuf **mp)); -int soopt_mcopyin __P((struct sockopt *sopt, struct mbuf *m)); -int soopt_mcopyout __P((struct sockopt *sopt, struct mbuf *m)); +int soopt_getm(struct sockopt *sopt, struct mbuf **mp); +int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m); +int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m); -int sopoll __P((struct socket *so, int events, struct ucred *cred, void *wql)); -int soreceive __P((struct socket *so, struct sockaddr **paddr, +int sopoll(struct socket *so, int events, struct ucred *cred, void *wql); +int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp)); -int soreserve __P((struct socket *so, u_long sndcc, u_long rcvcc)); -void sorflush __P((struct socket *so)); -int sosend __P((struct socket *so, struct sockaddr *addr, struct uio *uio, - struct mbuf *top, struct mbuf *control, int flags)); + struct mbuf **controlp, int *flagsp); +int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); +void sorflush(struct socket *so); +int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags); -int sosetopt __P((struct socket *so, struct sockopt *sopt)); -int soshutdown __P((struct socket *so, int how)); -void sotoxsocket __P((struct socket *so, struct xsocket *xso)); -void sowakeup __P((struct socket *so, struct sockbuf *sb)); +int sosetopt(struct socket *so, struct sockopt *sopt); +int soshutdown(struct socket *so, int how); +void sotoxsocket(struct socket *so, struct xsocket *xso); +void sowakeup(struct socket *so, struct sockbuf *sb); +int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p); #ifndef __APPLE__ /* accept filter functions */ -int accept_filt_add __P((struct accept_filter *filt)); -int accept_filt_del __P((char *name)); -struct accept_filter * accept_filt_get __P((char *name)); +int accept_filt_add(struct accept_filter *filt); +int accept_filt_del(char *name); +struct accept_filter * accept_filt_get(char *name); #ifdef ACCEPT_FILTER_MOD -int accept_filt_generic_mod_event __P((module_t mod, int event, void *data)); +int accept_filt_generic_mod_event(module_t mod, int event, void *data); SYSCTL_DECL(_net_inet_accf); #endif /* ACCEPT_FILTER_MOD */ #endif /* !defined(__APPLE__) */ +__END_DECLS + #endif /* KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ #endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h index 5a96e324b..701b94c1b 100644 --- a/bsd/sys/sockio.h +++ b/bsd/sys/sockio.h @@ -58,6 +58,8 @@ #ifndef _SYS_SOCKIO_H_ #define _SYS_SOCKIO_H_ +#include + #include /* Socket ioctl's. */ @@ -69,8 +71,10 @@ #define SIOCSPGRP _IOW('s', 8, int) /* set process group */ #define SIOCGPGRP _IOR('s', 9, int) /* get process group */ -#define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */ -#define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */ +#if 0 +#define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */ +#define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */ +#endif #define SIOCSIFADDR _IOW('i', 12, struct ifreq) /* set ifnet address */ #define OSIOCGIFADDR _IOWR('i', 13, struct ifreq) /* get ifnet address */ @@ -98,9 +102,13 @@ #define SIOCGIFDSTADDR _IOWR('i', 34, struct ifreq) /* get p-p address */ #define SIOCGIFBRDADDR _IOWR('i', 35, struct ifreq) /* get broadcast addr */ #define SIOCGIFCONF _IOWR('i', 36, struct ifconf) /* get ifnet list */ +#ifdef KERNEL_PRIVATE +#define SIOCGIFCONF64 _IOWR('i', 36, struct ifconf64) /* get ifnet list */ +#endif KERNEL_PRIVATE #define SIOCGIFNETMASK _IOWR('i', 37, struct ifreq) /* get net addr mask */ #define SIOCAUTOADDR _IOWR('i', 38, struct ifreq) /* autoconf address */ #define SIOCAUTONETMASK _IOW('i', 39, struct ifreq) /* autoconf netmask */ +#define SIOCARPIPLL _IOWR('i', 40, struct ifreq) /* arp for IPv4LL address */ #define SIOCADDMULTI _IOW('i', 49, struct ifreq) /* add m'cast addr */ @@ -111,6 +119,9 @@ #define SIOCSIFPHYS _IOW('i', 54, struct ifreq) /* set IF wire */ #define SIOCSIFMEDIA _IOWR('i', 55, struct ifreq) /* set net media */ #define SIOCGIFMEDIA _IOWR('i', 56, struct ifmediareq) /* get net media */ +#ifdef KERNEL_PRIVATE +#define SIOCGIFMEDIA64 _IOWR('i', 56, struct ifmediareq64) /* get net media (64-bit) */ +#endif KERNEL_PRIVATE #define SIOCSIFGENERIC _IOW('i', 57, struct ifreq) /* generic IF set op */ #define SIOCGIFGENERIC _IOWR('i', 58, struct ifreq) /* generic IF get op */ #define SIOCRSLVMULTI _IOWR('i', 59, struct rslvmulti_req) @@ -124,33 +135,40 @@ #define SIOCSLIFPHYADDR _IOW('i', 66, struct if_laddrreq) /* set gif addrs */ #define SIOCGLIFPHYADDR _IOWR('i', 67, struct if_laddrreq) /* get gif addrs */ - - - - - - -#ifdef KERNEL_PRIVATE +#define SIOCGIFDEVMTU _IOWR('i', 68, struct ifreq) /* get if ifdevmtu */ +#define SIOCSIFALTMTU _IOW('i', 69, struct ifreq) /* set if alternate mtu */ +#define SIOCGIFALTMTU _IOWR('i', 72, struct ifreq) /* get if alternate mtu */ +#define SIOCSIFBOND _IOW('i', 70, struct ifreq) /* set bond if config */ +#define SIOCGIFBOND _IOWR('i', 71, struct ifreq) /* get bond if config */ #define SIOCIFCREATE _IOWR('i', 120, struct ifreq) /* create clone if */ #define SIOCIFDESTROY _IOW('i', 121, struct ifreq) /* destroy clone if */ -#if 0 +#define SIOCSIFVLAN _IOW('i', 126, struct ifreq) /* set VLAN config */ +#define SIOCGIFVLAN _IOWR('i', 127, struct ifreq) /* get VLAN config */ +#define SIOCSETVLAN SIOCSIFVLAN +#define SIOCGETVLAN SIOCGIFVLAN +#ifdef KERNEL_PRIVATE +#define SIOCSIFDEVMTU SIOCSIFALTMTU /* deprecated */ +#endif KERNEL_PRIVATE + +#ifdef PRIVATE +#ifdef KERNEL #define SIOCIFGCLONERS _IOWR('i', 129, struct if_clonereq) /* get cloners */ -#endif 0 -#define SIOCSETVLAN _IOW('i', 126, struct ifreq) /* set VLAN config */ -#define SIOCGETVLAN _IOWR('i', 127, struct ifreq) /* get VLAN config */ +#define SIOCIFGCLONERS64 _IOWR('i', 129, struct if_clonereq64) /* get cloners */ +#endif KERNEL /* * temporary control calls to attach/detach IP to/from an ethernet interface */ #define SIOCPROTOATTACH _IOWR('i', 80, struct ifreq) /* attach proto to interface */ #define SIOCPROTODETACH _IOWR('i', 81, struct ifreq) /* detach proto from interface */ -#endif /* KERNEL_PRIVATE */ +#endif /* PRIVATE */ #define SIOCGIFASYNCMAP _IOWR('i', 124, struct ifreq) /* get ppp asyncmap */ #define SIOCSIFASYNCMAP _IOW('i', 125, struct ifreq) /* set ppp asyncmap */ +#ifdef PRIVATE #define SIOCSETOT _IOW('s', 128, int) /* set socket for LibOT */ - +#endif /* PRIVATE */ #endif /* !_SYS_SOCKIO_H_ */ diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index ffba038f3..3acb7c4a9 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,116 +64,305 @@ #ifndef _SYS_STAT_H_ #define _SYS_STAT_H_ -#include +#include +#include + +/* + * [XSI] The blkcnt_t, blksize_t, dev_t, ino_t, mode_t, nlink_t, uid_t, + * gid_t, off_t, and time_t types shall be defined as described in + * . + */ +#ifndef _BLKCNT_T +typedef __darwin_blkcnt_t blkcnt_t; +#define _BLKCNT_T +#endif + +#ifndef _BLKSIZE_T +typedef __darwin_blksize_t blksize_t; +#define _BLKSIZE_T +#endif + +#ifndef _DEV_T +typedef __darwin_dev_t dev_t; /* device number */ +#define _DEV_T +#endif + +#ifndef _INO_T +typedef __darwin_ino_t ino_t; /* inode number */ +#define _INO_T +#endif + +#ifndef _MODE_T +typedef __darwin_mode_t mode_t; +#define _MODE_T +#endif + +#ifndef _NLINK_T +typedef __uint16_t nlink_t; /* link count */ +#define _NLINK_T +#endif + +#ifndef _UID_T +typedef __darwin_uid_t uid_t; /* user id */ +#define _UID_T +#endif + +#ifndef _GID_T +typedef __darwin_gid_t gid_t; +#define _GID_T +#endif -#ifndef _POSIX_SOURCE +#ifndef _OFF_T +typedef __darwin_off_t off_t; +#define _OFF_T +#endif + +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif + +/* [XSI] The timespec structure may be defined as described in */ +#ifndef _TIMESPEC +#define _TIMESPEC +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* and nanoseconds */ +}; +// LP64todo - should this move? +#ifdef KERNEL +/* LP64 version of struct timespec. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timespec + */ +struct user_timespec { + user_time_t tv_sec; /* seconds */ + __int64_t tv_nsec; /* and nanoseconds */ +}; +#endif // KERNEL +#endif /* _TIMESPEC */ + + +#ifndef _POSIX_C_SOURCE +/* + * XXX So deprecated, it would make your head spin + * + * The old stat structure. In fact, this is not used by the kernel at all, + * and should not be used by user space, and should be removed from this + * header file entirely (along with the unused cvtstat() prototype in + * vnode_internal.h). + */ struct ostat { - u_int16_t st_dev; /* inode's device */ - ino_t st_ino; /* inode's number */ - mode_t st_mode; /* inode protection mode */ - nlink_t st_nlink; /* number of hard links */ - u_int16_t st_uid; /* user ID of the file's owner */ - u_int16_t st_gid; /* group ID of the file's group */ - u_int16_t st_rdev; /* device type */ - int32_t st_size; /* file size, in bytes */ + __uint16_t st_dev; /* inode's device */ + ino_t st_ino; /* inode's number */ + mode_t st_mode; /* inode protection mode */ + nlink_t st_nlink; /* number of hard links */ + __uint16_t st_uid; /* user ID of the file's owner */ + __uint16_t st_gid; /* group ID of the file's group */ + __uint16_t st_rdev; /* device type */ + __int32_t st_size; /* file size, in bytes */ struct timespec st_atimespec; /* time of last access */ struct timespec st_mtimespec; /* time of last data modification */ struct timespec st_ctimespec; /* time of last file status change */ - int32_t st_blksize; /* optimal blocksize for I/O */ - int32_t st_blocks; /* blocks allocated for file */ - u_int32_t st_flags; /* user defined flags for file */ - u_int32_t st_gen; /* file generation number */ + __int32_t st_blksize; /* optimal blocksize for I/O */ + __int32_t st_blocks; /* blocks allocated for file */ + __uint32_t st_flags; /* user defined flags for file */ + __uint32_t st_gen; /* file generation number */ }; -#endif /* !_POSIX_SOURCE */ +#endif /* !_POSIX_C_SOURCE */ +/* + * [XSI] This structure is used as the second parameter to the fstat(), + * lstat(), and stat() functions. + */ struct stat { - dev_t st_dev; /* inode's device */ - ino_t st_ino; /* inode's number */ - mode_t st_mode; /* inode protection mode */ - nlink_t st_nlink; /* number of hard links */ - uid_t st_uid; /* user ID of the file's owner */ - gid_t st_gid; /* group ID of the file's group */ - dev_t st_rdev; /* device type */ -#ifndef _POSIX_SOURCE + dev_t st_dev; /* [XSI] ID of device containing file */ + ino_t st_ino; /* [XSI] File serial number */ + mode_t st_mode; /* [XSI] Mode of file (see below) */ + nlink_t st_nlink; /* [XSI] Number of hard links */ + uid_t st_uid; /* [XSI] User ID of the file */ + gid_t st_gid; /* [XSI] Group ID of the file */ + dev_t st_rdev; /* [XSI] Device ID */ +#ifndef _POSIX_C_SOURCE struct timespec st_atimespec; /* time of last access */ struct timespec st_mtimespec; /* time of last data modification */ - struct timespec st_ctimespec; /* time of last file status change */ + struct timespec st_ctimespec; /* time of last status change */ #else - time_t st_atime; /* time of last access */ - long st_atimensec; /* nsec of last access */ - time_t st_mtime; /* time of last data modification */ - long st_mtimensec; /* nsec of last data modification */ - time_t st_ctime; /* time of last file status change */ - long st_ctimensec; /* nsec of last file status change */ -#endif - off_t st_size; /* file size, in bytes */ - int64_t st_blocks; /* blocks allocated for file */ - u_int32_t st_blksize; /* optimal blocksize for I/O */ - u_int32_t st_flags; /* user defined flags for file */ - u_int32_t st_gen; /* file generation number */ - int32_t st_lspare; - int64_t st_qspare[2]; + time_t st_atime; /* [XSI] Time of last access */ + long st_atimensec; /* nsec of last access */ + time_t st_mtime; /* [XSI] Last data modification time */ + long st_mtimensec; /* last data modification nsec */ + time_t st_ctime; /* [XSI] Time of last status change */ + long st_ctimensec; /* nsec of last status change */ +#endif + off_t st_size; /* [XSI] file size, in bytes */ + blkcnt_t st_blocks; /* [XSI] blocks allocated for file */ + blksize_t st_blksize; /* [XSI] optimal blocksize for I/O */ + __uint32_t st_flags; /* user defined flags for file */ + __uint32_t st_gen; /* file generation number */ + __int32_t st_lspare; /* RESERVED: DO NOT USE! */ + __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ }; +// LP64todo - should this move? +#ifdef KERNEL +#include -#ifndef _POSIX_SOURCE -#define st_atime st_atimespec.tv_sec -#define st_mtime st_mtimespec.tv_sec -#define st_ctime st_ctimespec.tv_sec +/* LP64 version of struct stat. time_t (see timespec) is a long and must + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with struct stat + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural #endif -#define S_ISUID 0004000 /* set user id on execution */ -#define S_ISGID 0002000 /* set group id on execution */ -#ifndef _POSIX_SOURCE -#define S_ISTXT 0001000 /* sticky bit */ +struct user_stat { + dev_t st_dev; /* [XSI] ID of device containing file */ + ino_t st_ino; /* [XSI] File serial number */ + mode_t st_mode; /* [XSI] Mode of file (see below) */ + nlink_t st_nlink; /* [XSI] Number of hard links */ + uid_t st_uid; /* [XSI] User ID of the file */ + gid_t st_gid; /* [XSI] Group ID of the file */ + dev_t st_rdev; /* [XSI] Device ID */ +#ifndef _POSIX_C_SOURCE + struct user_timespec st_atimespec; /* time of last access */ + struct user_timespec st_mtimespec; /* time of last data modification */ + struct user_timespec st_ctimespec; /* time of last status change */ +#else + user_time_t st_atime; /* [XSI] Time of last access */ + __int64_t st_atimensec; /* nsec of last access */ + user_time_t st_mtime; /* [XSI] Last data modification */ + __int64_t st_mtimensec; /* last data modification nsec */ + user_time_t st_ctime; /* [XSI] Time of last status change */ + __int64_t st_ctimensec; /* nsec of last status change */ #endif + off_t st_size; /* [XSI] File size, in bytes */ + blkcnt_t st_blocks; /* [XSI] Blocks allocated for file */ + blksize_t st_blksize; /* [XSI] Optimal blocksize for I/O */ + __uint32_t st_flags; /* user defined flags for file */ + __uint32_t st_gen; /* file generation number */ + __int32_t st_lspare; /* RESERVED: DO NOT USE! */ + __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +extern void munge_stat(struct stat *sbp, struct user_stat *usbp); -#define S_IRWXU 0000700 /* RWX mask for owner */ -#define S_IRUSR 0000400 /* R for owner */ -#define S_IWUSR 0000200 /* W for owner */ -#define S_IXUSR 0000100 /* X for owner */ +#endif // KERNEL -#ifndef _POSIX_SOURCE -#define S_IREAD S_IRUSR -#define S_IWRITE S_IWUSR -#define S_IEXEC S_IXUSR + +#ifndef _POSIX_C_SOURCE +#define st_atime st_atimespec.tv_sec +#define st_mtime st_mtimespec.tv_sec +#define st_ctime st_ctimespec.tv_sec #endif -#define S_IRWXG 0000070 /* RWX mask for group */ -#define S_IRGRP 0000040 /* R for group */ -#define S_IWGRP 0000020 /* W for group */ -#define S_IXGRP 0000010 /* X for group */ +/* + * [XSI] The following are symbolic names for the values of type mode_t. They + * are bitmap values. + */ +#ifndef S_IFMT +/* File type */ +#define S_IFMT 0170000 /* [XSI] type of file mask */ +#define S_IFIFO 0010000 /* [XSI] named pipe (fifo) */ +#define S_IFCHR 0020000 /* [XSI] character special */ +#define S_IFDIR 0040000 /* [XSI] directory */ +#define S_IFBLK 0060000 /* [XSI] block special */ +#define S_IFREG 0100000 /* [XSI] regular */ +#define S_IFLNK 0120000 /* [XSI] symbolic link */ +#define S_IFSOCK 0140000 /* [XSI] socket */ +#ifndef _POSIX_C_SOURCE +#define S_IFWHT 0160000 /* whiteout */ +#define S_IFXATTR 0200000 /* extended attribute */ +#endif -#define S_IRWXO 0000007 /* RWX mask for other */ -#define S_IROTH 0000004 /* R for other */ -#define S_IWOTH 0000002 /* W for other */ -#define S_IXOTH 0000001 /* X for other */ +/* File mode */ +/* Read, write, execute/search by owner */ +#define S_IRWXU 0000700 /* [XSI] RWX mask for owner */ +#define S_IRUSR 0000400 /* [XSI] R for owner */ +#define S_IWUSR 0000200 /* [XSI] W for owner */ +#define S_IXUSR 0000100 /* [XSI] X for owner */ +/* Read, write, execute/search by group */ +#define S_IRWXG 0000070 /* [XSI] RWX mask for group */ +#define S_IRGRP 0000040 /* [XSI] R for group */ +#define S_IWGRP 0000020 /* [XSI] W for group */ +#define S_IXGRP 0000010 /* [XSI] X for group */ +/* Read, write, execute/search by others */ +#define S_IRWXO 0000007 /* [XSI] RWX mask for other */ +#define S_IROTH 0000004 /* [XSI] R for other */ +#define S_IWOTH 0000002 /* [XSI] W for other */ +#define S_IXOTH 0000001 /* [XSI] X for other */ -#ifndef _POSIX_SOURCE -#define S_IFMT 0170000 /* type of file mask */ -#define S_IFIFO 0010000 /* named pipe (fifo) */ -#define S_IFCHR 0020000 /* character special */ -#define S_IFDIR 0040000 /* directory */ -#define S_IFBLK 0060000 /* block special */ -#define S_IFREG 0100000 /* regular */ -#define S_IFLNK 0120000 /* symbolic link */ -#define S_IFSOCK 0140000 /* socket */ -#define S_IFWHT 0160000 /* whiteout */ -#define S_ISVTX 0001000 /* save swapped text even after use */ +#define S_ISUID 0004000 /* [XSI] set user id on execution */ +#define S_ISGID 0002000 /* [XSI] set group id on execution */ +#define S_ISVTX 0001000 /* [XSI] directory restrcted delete */ + +#ifndef _POSIX_C_SOURCE +#define S_ISTXT S_ISVTX /* sticky bit: not supported */ +#define S_IREAD S_IRUSR /* backward compatability */ +#define S_IWRITE S_IWUSR /* backward compatability */ +#define S_IEXEC S_IXUSR /* backward compatability */ #endif +#endif /* !S_IFMT */ -#define S_ISDIR(m) (((m) & 0170000) == 0040000) /* directory */ -#define S_ISCHR(m) (((m) & 0170000) == 0020000) /* char special */ +/* + * [XSI] The following macros shall be provided to test whether a file is + * of the specified type. The value m supplied to the macros is the value + * of st_mode from a stat structure. The macro shall evaluate to a non-zero + * value if the test is true; 0 if the test is false. + */ #define S_ISBLK(m) (((m) & 0170000) == 0060000) /* block special */ -#define S_ISREG(m) (((m) & 0170000) == 0100000) /* regular file */ +#define S_ISCHR(m) (((m) & 0170000) == 0020000) /* char special */ +#define S_ISDIR(m) (((m) & 0170000) == 0040000) /* directory */ #define S_ISFIFO(m) (((m) & 0170000) == 0010000) /* fifo or socket */ -#ifndef _POSIX_SOURCE +#define S_ISREG(m) (((m) & 0170000) == 0100000) /* regular file */ #define S_ISLNK(m) (((m) & 0170000) == 0120000) /* symbolic link */ #define S_ISSOCK(m) (((m) & 0170000) == 0140000) /* socket */ +#ifndef _POSIX_C_SOURCE #define S_ISWHT(m) (((m) & 0170000) == 0160000) /* whiteout */ +#define S_ISXATTR(m) (((m) & 0200000) == 0200000) /* extended attribute */ #endif -#ifndef _POSIX_SOURCE +/* + * [XSI] The implementation may implement message queues, semaphores, or + * shared memory objects as distinct file types. The following macros + * shall be provided to test whether a file is of the specified type. + * The value of the buf argument supplied to the macros is a pointer to + * a stat structure. The macro shall evaluate to a non-zero value if + * the specified object is implemented as a distinct file type and the + * specified file type is contained in the stat structure referenced by + * buf. Otherwise, the macro shall evaluate to zero. + * + * NOTE: The current implementation does not do this, although + * this may change in future revisions, and co currently only + * provides these macros to ensure source compatability with + * implementations which do. + */ +#define S_TYPEISMQ(buf) (0) /* Test for a message queue */ +#define S_TYPEISSEM(buf) (0) /* Test for a semaphore */ +#define S_TYPEISSHM(buf) (0) /* Test for a shared memory object */ + +/* + * [TYM] The implementation may implement typed memory objects as distinct + * file types, and the following macro shall test whether a file is of the + * specified type. The value of the buf argument supplied to the macros is + * a pointer to a stat structure. The macro shall evaluate to a non-zero + * value if the specified object is implemented as a distinct file type and + * the specified file type is contained in the stat structure referenced by + * buf. Otherwise, the macro shall evaluate to zero. + * + * NOTE: The current implementation does not do this, although + * this may change in future revisions, and co currently only + * provides this macro to ensure source compatability with + * implementations which do. + */ +#define S_TYPEISTMO(buf) (0) /* Test for a typed memory object */ + + +#ifndef _POSIX_C_SOURCE #define ACCESSPERMS (S_IRWXU|S_IRWXG|S_IRWXO) /* 0777 */ /* 7777 */ #define ALLPERMS (S_ISUID|S_ISGID|S_ISTXT|S_IRWXU|S_IRWXG|S_IRWXO) @@ -211,21 +400,36 @@ struct stat { #endif #ifndef KERNEL -#include __BEGIN_DECLS -int chmod __P((const char *, mode_t)); -int fstat __P((int, struct stat *)); -int mkdir __P((const char *, mode_t)); -int mkfifo __P((const char *, mode_t)); -int stat __P((const char *, struct stat *)); -mode_t umask __P((mode_t)); -#ifndef _POSIX_SOURCE -int chflags __P((const char *, u_long)); -int fchflags __P((int, u_long)); -int fchmod __P((int, mode_t)); -int lstat __P((const char *, struct stat *)); +/* [XSI] */ +int chmod(const char *, mode_t); +int fchmod(int, mode_t); +int fstat(int, struct stat *); +int lstat(const char *, struct stat *); +int mkdir(const char *, mode_t); +int mkfifo(const char *, mode_t); +int stat(const char *, struct stat *); +int mknod(const char *, mode_t, dev_t); +mode_t umask(mode_t); + +#ifndef _POSIX_C_SOURCE +#ifndef _FILESEC_T +struct _filesec; +typedef struct _filesec *filesec_t; +#define _FILESEC_T #endif +int chflags(const char *, __uint32_t); +int chmodx_np(const char *, filesec_t); +int fchflags(int, __uint32_t); +int fchmodx_np(int, filesec_t); +int fstatx_np(int, struct stat *, filesec_t); +int lstatx_np(const char *, struct stat *, filesec_t); +int mkdirx_np(const char *, filesec_t); +int mkfifox_np(const char *, filesec_t); +int statx_np(const char *, struct stat *, filesec_t); +int umaskx_np(filesec_t); +#endif /* POSIX_C_SOURCE */ __END_DECLS #endif #endif /* !_SYS_STAT_H_ */ diff --git a/bsd/sys/sys_domain.h b/bsd/sys/sys_domain.h index b9582eca3..788b71d96 100644 --- a/bsd/sys/sys_domain.h +++ b/bsd/sys/sys_domain.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,6 +26,7 @@ #include +#include /* Kernel Events Protocol */ #define SYSPROTO_EVENT 1 /* kernel events protocol */ @@ -45,15 +46,17 @@ struct sockaddr_sys #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern struct domain systemdomain; /* built in system domain protocols init function */ -int kern_event_init(); -int kern_control_init(); +__BEGIN_DECLS +int kern_event_init(void); +int kern_control_init(void); +__END_DECLS -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* KERNEL */ #endif /* _SYSTEM_DOMAIN_H_ */ diff --git a/bsd/sys/syscall.h b/bsd/sys/syscall.h index 120dee023..d23b0ecf1 100644 --- a/bsd/sys/syscall.h +++ b/bsd/sys/syscall.h @@ -1,341 +1,456 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * @APPLE_LICENSE_HEADER_START@ * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1992, 1995-1999 Apple Computer, Inc. All Rights Reserved */ -/* - * - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + * + * + * System call switch table. * + * DO NOT EDIT-- this file is automatically generated. + * created from syscalls.master */ -#include -#ifdef __APPLE_API_PRIVATE +#ifndef _SYS_SYSCALL_H_ +#define _SYS_SYSCALL_H_ -#define SYS_syscall 0 -#define SYS_exit 1 -#define SYS_fork 2 -#define SYS_read 3 -#define SYS_write 4 -#define SYS_open 5 -#define SYS_close 6 -#define SYS_wait4 7 - /* 8 is old creat */ -#define SYS_link 9 -#define SYS_unlink 10 - /* 11 is obsolete execv */ -#define SYS_chdir 12 -#define SYS_fchdir 13 -#define SYS_mknod 14 -#define SYS_chmod 15 -#define SYS_chown 16 - /* 17 is obsolete sbreak */ +#include +#ifdef __APPLE_API_PRIVATE +#define SYS_syscall 0 +#define SYS_exit 1 +#define SYS_fork 2 +#define SYS_read 3 +#define SYS_write 4 +#define SYS_open 5 +#define SYS_close 6 +#define SYS_wait4 7 + /* 8 old creat */ +#define SYS_link 9 +#define SYS_unlink 10 + /* 11 old execv */ +#define SYS_chdir 12 +#define SYS_fchdir 13 +#define SYS_mknod 14 +#define SYS_chmod 15 +#define SYS_chown 16 +#define SYS_obreak 17 #if COMPAT_GETFSSTAT - /* 18 is old getfsstat */ +#define SYS_ogetfsstat 18 +#else +#define SYS_getfsstat 18 +#endif + /* 19 old lseek */ +#define SYS_getpid 20 + /* 21 old mount */ + /* 22 old umount */ +#define SYS_setuid 23 +#define SYS_getuid 24 +#define SYS_geteuid 25 +#define SYS_ptrace 26 +#define SYS_recvmsg 27 +#define SYS_sendmsg 28 +#define SYS_recvfrom 29 +#define SYS_accept 30 +#define SYS_getpeername 31 +#define SYS_getsockname 32 +#define SYS_access 33 +#define SYS_chflags 34 +#define SYS_fchflags 35 +#define SYS_sync 36 +#define SYS_kill 37 + /* 38 old stat */ +#define SYS_getppid 39 + /* 40 old lstat */ +#define SYS_dup 41 +#define SYS_pipe 42 +#define SYS_getegid 43 +#define SYS_profil 44 +#define SYS_ktrace 45 +#define SYS_sigaction 46 +#define SYS_getgid 47 +#define SYS_sigprocmask 48 +#define SYS_getlogin 49 +#define SYS_setlogin 50 +#define SYS_acct 51 +#define SYS_sigpending 52 +#define SYS_sigaltstack 53 +#define SYS_ioctl 54 +#define SYS_reboot 55 +#define SYS_revoke 56 +#define SYS_symlink 57 +#define SYS_readlink 58 +#define SYS_execve 59 +#define SYS_umask 60 +#define SYS_chroot 61 + /* 62 old fstat */ + /* 63 used internally , reserved */ + /* 64 old getpagesize */ +#define SYS_msync 65 +#define SYS_vfork 66 + /* 67 old vread */ + /* 68 old vwrite */ +#define SYS_sbrk 69 +#define SYS_sstk 70 + /* 71 old mmap */ +#define SYS_ovadvise 72 +#define SYS_munmap 73 +#define SYS_mprotect 74 +#define SYS_madvise 75 + /* 76 old vhangup */ + /* 77 old vlimit */ +#define SYS_mincore 78 +#define SYS_getgroups 79 +#define SYS_setgroups 80 +#define SYS_getpgrp 81 +#define SYS_setpgid 82 +#define SYS_setitimer 83 + /* 84 old wait */ +#define SYS_swapon 85 +#define SYS_getitimer 86 + /* 87 old gethostname */ + /* 88 old sethostname */ +#define SYS_getdtablesize 89 +#define SYS_dup2 90 + /* 91 old getdopt */ +#define SYS_fcntl 92 +#define SYS_select 93 + /* 94 old setdopt */ +#define SYS_fsync 95 +#define SYS_setpriority 96 +#define SYS_socket 97 +#define SYS_connect 98 + /* 99 old accept */ +#define SYS_getpriority 100 + /* 101 old send */ + /* 102 old recv */ +#ifdef __ppc__ + /* 103 old sigreturn */ +#else +#define SYS_sigreturn 103 +#endif +#define SYS_bind 104 +#define SYS_setsockopt 105 +#define SYS_listen 106 + /* 107 old vtimes */ + /* 108 old sigvec */ + /* 109 old sigblock */ + /* 110 old sigsetmask */ +#define SYS_sigsuspend 111 + /* 112 old sigstack */ + /* 113 old recvmsg */ + /* 114 old sendmsg */ + /* 115 old vtrace */ +#ifdef __ppc__ +#define SYS_ppc_gettimeofday 116 +#define SYS_gettimeofday 116 +#else +#define SYS_gettimeofday 116 +#endif +#define SYS_getrusage 117 +#define SYS_getsockopt 118 + /* 119 old resuba */ +#define SYS_readv 120 +#define SYS_writev 121 +#define SYS_settimeofday 122 +#define SYS_fchown 123 +#define SYS_fchmod 124 + /* 125 old recvfrom */ + /* 126 old setreuid */ + /* 127 old setregid */ +#define SYS_rename 128 + /* 129 old truncate */ + /* 130 old ftruncate */ +#define SYS_flock 131 +#define SYS_mkfifo 132 +#define SYS_sendto 133 +#define SYS_shutdown 134 +#define SYS_socketpair 135 +#define SYS_mkdir 136 +#define SYS_rmdir 137 +#define SYS_utimes 138 +#define SYS_futimes 139 +#define SYS_adjtime 140 + /* 141 old getpeername */ + /* 142 old gethostid */ + /* 143 old sethostid */ + /* 144 old getrlimit */ + /* 145 old setrlimit */ + /* 146 old killpg */ +#define SYS_setsid 147 + /* 148 old setquota */ + /* 149 old qquota */ + /* 150 old getsockname */ +#define SYS_getpgid 151 +#define SYS_setprivexec 152 +#define SYS_pread 153 +#define SYS_pwrite 154 +#if NFSSERVER +#define SYS_nfssvc 155 #else -#define SYS_getfsstat 18 + /* 155 */ #endif - /* 19 is old lseek */ -#define SYS_getpid 20 - /* 21 is obsolete mount */ - /* 22 is obsolete umount */ -#define SYS_setuid 23 -#define SYS_getuid 24 -#define SYS_geteuid 25 -#define SYS_ptrace 26 -#define SYS_recvmsg 27 -#define SYS_sendmsg 28 -#define SYS_recvfrom 29 -#define SYS_accept 30 -#define SYS_getpeername 31 -#define SYS_getsockname 32 -#define SYS_access 33 -#define SYS_chflags 34 -#define SYS_fchflags 35 -#define SYS_sync 36 -#define SYS_kill 37 - /* 38 is old stat */ -#define SYS_getppid 39 - /* 40 is old lstat */ -#define SYS_dup 41 -#define SYS_pipe 42 -#define SYS_getegid 43 -#define SYS_profil 44 -#define SYS_ktrace 45 -#define SYS_sigaction 46 -#define SYS_getgid 47 -#define SYS_sigprocmask 48 -#define SYS_getlogin 49 -#define SYS_setlogin 50 -#define SYS_acct 51 -#define SYS_sigpending 52 -#define SYS_sigaltstack 53 -#define SYS_ioctl 54 -#define SYS_reboot 55 -#define SYS_revoke 56 -#define SYS_symlink 57 -#define SYS_readlink 58 -#define SYS_execve 59 -#define SYS_umask 60 -#define SYS_chroot 61 - /* 62 is old fstat */ - /* 63 is unused */ - /* 64 is old getpagesize */ -#define SYS_msync 65 -#define SYS_vfork 66 - /* 67 is obsolete vread */ - /* 68 is obsolete vwrite */ -#define SYS_sbrk 69 -#define SYS_sstk 70 - /* 71 is old mmap */ - /* 72 is obsolete vadvise */ -#define SYS_munmap 73 -#define SYS_mprotect 74 -#define SYS_madvise 75 - /* 76 is obsolete vhangup */ - /* 77 is obsolete vlimit */ -#define SYS_mincore 78 -#define SYS_getgroups 79 -#define SYS_setgroups 80 -#define SYS_getpgrp 81 -#define SYS_setpgid 82 -#define SYS_setitimer 83 - /* 84 is old wait */ -#define SYS_swapon 85 -#define SYS_getitimer 86 - /* 87 is old gethostname */ - /* 88 is old sethostname */ -#define SYS_getdtablesize 89 -#define SYS_dup2 90 -#define SYS_fcntl 92 -#define SYS_select 93 - /* 94 is obsolete setdopt */ -#define SYS_fsync 95 -#define SYS_setpriority 96 -#define SYS_socket 97 -#define SYS_connect 98 - /* 99 is old accept */ -#define SYS_getpriority 100 - /* 101 is old send */ - /* 102 is old recv */ -#ifndef __ppc__ -#define SYS_sigreturn 103 + /* 156 old getdirentries */ +#define SYS_statfs 157 +#define SYS_fstatfs 158 +#define SYS_unmount 159 + /* 160 old async_daemon */ +#if NFSCLIENT +#define SYS_getfh 161 +#else + /* 161 */ #endif -#define SYS_bind 104 -#define SYS_setsockopt 105 -#define SYS_listen 106 - /* 107 is obsolete vtimes */ - /* 108 is old sigvec */ - /* 109 is old sigblock */ - /* 110 is old sigsetmask */ -#define SYS_sigsuspend 111 - /* 112 is old sigstack */ - /* 113 is old recvmsg */ - /* 114 is old sendmsg */ - /* 115 is obsolete vtrace */ -#define SYS_gettimeofday 116 -#define SYS_getrusage 117 -#define SYS_getsockopt 118 - /* 119 is obsolete resuba */ -#define SYS_readv 120 -#define SYS_writev 121 -#define SYS_settimeofday 122 -#define SYS_fchown 123 -#define SYS_fchmod 124 - /* 125 is old recvfrom */ - /* 126 is old setreuid */ - /* 127 is old setregid */ -#define SYS_rename 128 - /* 129 is old truncate */ - /* 130 is old ftruncate */ -#define SYS_flock 131 -#define SYS_mkfifo 132 -#define SYS_sendto 133 -#define SYS_shutdown 134 -#define SYS_socketpair 135 -#define SYS_mkdir 136 -#define SYS_rmdir 137 -#define SYS_utimes 138 -#define SYS_futimes 139 -#define SYS_adjtime 140 - /* 141 is old getpeername */ - /* 142 is old gethostid */ - /* 143 is old sethostid */ - /* 144 is old getrlimit */ - /* 145 is old setrlimit */ - /* 146 is old killpg */ -#define SYS_setsid 147 - /* 148 is obsolete setquota */ - /* 149 is obsolete quota */ - /* 150 is old getsockname */ -#define SYS_getpgid 151 -#define SYS_setprivexec 152 -#define SYS_pread 153 -#define SYS_pwrite 154 -#define SYS_nfssvc 155 - /* 156 is old getdirentries */ -#define SYS_statfs 157 -#define SYS_fstatfs 158 -#define SYS_unmount 159 - /* 160 is obsolete async_daemon */ -#define SYS_getfh 161 - /* 162 is old getdomainname */ - /* 163 is old setdomainname */ - /* 164 is obsolete pcfs_mount */ -#define SYS_quotactl 165 - /* 166 is obsolete exportfs */ -#define SYS_mount 167 - /* 168 is obsolete ustat */ - /* 169 is unused */ -#define SYS_table 170 - /* 171 is old wait_3 */ - /* 172 is obsolete rpause */ - /* 173 is unused */ - /* 174 is obsolete getdents */ -#define SYS_gc_control 175 -#define SYS_add_profil 176 - /* 177 is unused */ - /* 178 is unused */ - /* 179 is unused */ -#define SYS_kdebug_trace 180 -#define SYS_setgid 181 -#define SYS_setegid 182 -#define SYS_seteuid 183 + /* 162 old getdomainname */ + /* 163 old setdomainname */ + /* 164 */ +#define SYS_quotactl 165 + /* 166 old exportfs */ +#define SYS_mount 167 + /* 168 old ustat */ + /* 169 */ +#define SYS_table 170 + /* 171 old wait3 */ + /* 172 old rpause */ +#define SYS_waitid 173 + /* 174 old getdents */ + /* 175 old gc_control */ +#define SYS_add_profil 176 + /* 177 */ + /* 178 */ + /* 179 */ +#define SYS_kdebug_trace 180 +#define SYS_setgid 181 +#define SYS_setegid 182 +#define SYS_seteuid 183 #ifdef __ppc__ -#define SYS_sigreturn 184 +#define SYS_sigreturn 184 +#else + /* 184 */ #endif - /* 185 is unused */ - /* 186 is unused */ - /* 187 is unused */ -#define SYS_stat 188 -#define SYS_fstat 189 -#define SYS_lstat 190 -#define SYS_pathconf 191 -#define SYS_fpathconf 192 + /* 185 */ + /* 186 */ + /* 187 */ +#define SYS_stat 188 +#define SYS_fstat 189 +#define SYS_lstat 190 +#define SYS_pathconf 191 +#define SYS_fpathconf 192 #if COMPAT_GETFSSTAT -#define SYS_getfsstat 193 +#define SYS_getfsstat 193 +#else + /* 193 */ #endif -#define SYS_getrlimit 194 -#define SYS_setrlimit 195 -#define SYS_getdirentries 196 -#define SYS_mmap 197 -#define SYS___syscall 198 -#define SYS_lseek 199 -#define SYS_truncate 200 -#define SYS_ftruncate 201 -#define SYS___sysctl 202 -#define SYS_mlock 203 -#define SYS_munlock 204 -#define SYS_undelete 205 -#define SYS_ATsocket 206 -#define SYS_ATgetmsg 207 -#define SYS_ATputmsg 208 -#define SYS_ATPsndreq 209 -#define SYS_ATPsndrsp 210 -#define SYS_ATPgetreq 211 -#define SYS_ATPgetrsp 212 - /* 213 is reserved for AppleTalk */ -#define SYS_kqueue_from_portset_np 214 -#define SYS_kqueue_portset_np 215 -#define SYS_mkcomplex 216 -#define SYS_statv 217 -#define SYS_lstatv 218 -#define SYS_fstatv 219 -#define SYS_getattrlist 220 -#define SYS_setattrlist 221 -#define SYS_getdirentriesattr 222 -#define SYS_exchangedata 223 -#define SYS_checkuseraccess 224 -#define SYS_searchfs 225 - - /* 226 - 230 are reserved for HFS expansion */ - /* 231 - 241 are reserved */ -#define SYS_fsctl 242 - /* 243 - 246 are reserved */ -#define SYS_nfsclnt 247 /* from freebsd, for lockd */ -#define SYS_fhopen 248 /* from freebsd, for lockd */ - /* 249 is reserved */ -#define SYS_minherit 250 -#define SYS_semsys 251 -#define SYS_msgsys 252 -#define SYS_shmsys 253 -#define SYS_semctl 254 -#define SYS_semget 255 -#define SYS_semop 256 -#define SYS_semconfig 257 -#define SYS_msgctl 258 -#define SYS_msgget 259 -#define SYS_msgsnd 260 -#define SYS_msgrcv 261 -#define SYS_shmat 262 -#define SYS_shmctl 263 -#define SYS_shmdt 264 -#define SYS_shmget 265 -#define SYS_shm_open 266 -#define SYS_shm_unlink 267 -#define SYS_sem_open 268 -#define SYS_sem_close 269 -#define SYS_sem_unlink 270 -#define SYS_sem_wait 271 -#define SYS_sem_trywait 272 -#define SYS_sem_post 273 -#define SYS_sem_getvalue 274 -#define SYS_sem_init 275 -#define SYS_sem_destroy 276 - /* 277 - 295 are reserved */ -#define SYS_load_shared_file 296 -#define SYS_reset_shared_file 297 -#define SYS_new_system_shared_regions 298 - /* 299 - 309 are reserved */ -#define SYS_getsid 310 - /* 311 - 312 are reserved */ -#define SYS_aio_fsync 313 -#define SYS_aio_return 314 -#define SYS_aio_suspend 315 -#define SYS_aio_cancel 316 -#define SYS_aio_error 317 -#define SYS_aio_read 318 -#define SYS_aio_write 319 -#define SYS_lio_listio 320 - /* 321 - 323 are reserved */ -#define SYS_mlockall 324 -#define SYS_munlockall 325 - /* 326 is reserved */ -#define SYS_issetugid 327 -#define SYS___pthread_kill 328 -#define SYS_pthread_sigmask 329 -#define SYS_sigwait 330 - -#define SYS_audit 350 /* submit user space audit records */ -#define SYS_auditon 351 /* audit subsystem control */ - /* 352 is unused; used to be auditsvc */ -#define SYS_getauid 353 -#define SYS_setauid 354 -#define SYS_getaudit 355 -#define SYS_setaudit 356 -#define SYS_getaudit_addr 357 -#define SYS_setaudit_addr 358 -#define SYS_auditctl 359 /* audit file control */ +#define SYS_getrlimit 194 +#define SYS_setrlimit 195 +#define SYS_getdirentries 196 +#define SYS_mmap 197 + /* 198 __syscall */ +#define SYS_lseek 199 +#define SYS_truncate 200 +#define SYS_ftruncate 201 +#define SYS___sysctl 202 +#define SYS_mlock 203 +#define SYS_munlock 204 +#define SYS_undelete 205 +#ifdef __ppc__ +#define SYS_ATsocket 206 +#define SYS_ATgetmsg 207 +#define SYS_ATputmsg 208 +#define SYS_ATPsndreq 209 +#define SYS_ATPsndrsp 210 +#define SYS_ATPgetreq 211 +#define SYS_ATPgetrsp 212 + /* 213 Reserved for AppleTalk */ +#else +#define SYS_ATsocket 206 +#define SYS_ATgetmsg 207 +#define SYS_ATputmsg 208 +#define SYS_ATPsndreq 209 +#define SYS_ATPsndrsp 210 +#define SYS_ATPgetreq 211 +#define SYS_ATPgetrsp 212 + /* 213 Reserved for AppleTalk */ +#endif /* __ppc__ */ +#define SYS_kqueue_from_portset_np 214 +#define SYS_kqueue_portset_np 215 +#define SYS_mkcomplex 216 +#define SYS_statv 217 +#define SYS_lstatv 218 +#define SYS_fstatv 219 +#define SYS_getattrlist 220 +#define SYS_setattrlist 221 +#define SYS_getdirentriesattr 222 +#define SYS_exchangedata 223 +#ifdef __APPLE_API_OBSOLETE +#define SYS_checkuseraccess 224 +#else + /* 224 HFS checkuseraccess check access to a file */ +#endif /* __APPLE_API_OBSOLETE */ +#define SYS_searchfs 225 +#define SYS_delete 226 +#define SYS_copyfile 227 + /* 228 */ + /* 229 */ +#define SYS_poll 230 +#define SYS_watchevent 231 +#define SYS_waitevent 232 +#define SYS_modwatch 233 +#define SYS_getxattr 234 +#define SYS_fgetxattr 235 +#define SYS_setxattr 236 +#define SYS_fsetxattr 237 +#define SYS_removexattr 238 +#define SYS_fremovexattr 239 +#define SYS_listxattr 240 +#define SYS_flistxattr 241 +#define SYS_fsctl 242 +#define SYS_initgroups 243 + /* 244 */ + /* 245 */ + /* 246 */ +#if NFSCLIENT +#define SYS_nfsclnt 247 +#define SYS_fhopen 248 +#else + /* 247 */ + /* 248 */ +#endif + /* 249 */ +#define SYS_minherit 250 +#define SYS_semsys 251 +#define SYS_msgsys 252 +#define SYS_shmsys 253 +#define SYS_semctl 254 +#define SYS_semget 255 +#define SYS_semop 256 +#define SYS_semconfig 257 +#define SYS_msgctl 258 +#define SYS_msgget 259 +#define SYS_msgsnd 260 +#define SYS_msgrcv 261 +#define SYS_shmat 262 +#define SYS_shmctl 263 +#define SYS_shmdt 264 +#define SYS_shmget 265 +#define SYS_shm_open 266 +#define SYS_shm_unlink 267 +#define SYS_sem_open 268 +#define SYS_sem_close 269 +#define SYS_sem_unlink 270 +#define SYS_sem_wait 271 +#define SYS_sem_trywait 272 +#define SYS_sem_post 273 +#define SYS_sem_getvalue 274 +#define SYS_sem_init 275 +#define SYS_sem_destroy 276 +#define SYS_open_extended 277 +#define SYS_umask_extended 278 +#define SYS_stat_extended 279 +#define SYS_lstat_extended 280 +#define SYS_fstat_extended 281 +#define SYS_chmod_extended 282 +#define SYS_fchmod_extended 283 +#define SYS_access_extended 284 +#define SYS_settid 285 +#define SYS_gettid 286 +#define SYS_setsgroups 287 +#define SYS_getsgroups 288 +#define SYS_setwgroups 289 +#define SYS_getwgroups 290 +#define SYS_mkfifo_extended 291 +#define SYS_mkdir_extended 292 +#define SYS_identitysvc 293 + /* 294 */ + /* 295 */ +#define SYS_load_shared_file 296 +#define SYS_reset_shared_file 297 +#define SYS_new_system_shared_regions 298 +#define SYS_shared_region_map_file_np 299 +#define SYS_shared_region_make_private_np 300 + /* 301 */ + /* 302 */ + /* 303 */ + /* 304 */ + /* 305 */ + /* 306 */ + /* 307 */ + /* 308 */ + /* 309 */ +#define SYS_getsid 310 +#define SYS_settid_with_pid 311 + /* 312 */ +#define SYS_aio_fsync 313 +#define SYS_aio_return 314 +#define SYS_aio_suspend 315 +#define SYS_aio_cancel 316 +#define SYS_aio_error 317 +#define SYS_aio_read 318 +#define SYS_aio_write 319 +#define SYS_lio_listio 320 + /* 321 */ + /* 322 */ + /* 323 */ +#define SYS_mlockall 324 +#define SYS_munlockall 325 + /* 326 */ +#define SYS_issetugid 327 +#define SYS___pthread_kill 328 +#define SYS_pthread_sigmask 329 +#define SYS_sigwait 330 +#define SYS___disable_threadsignal 331 +#define SYS___pthread_markcancel 332 +#define SYS___pthread_canceled 333 +#define SYS___semwait_signal 334 +#define SYS_utrace 335 + /* 336 */ + /* 337 */ + /* 338 */ + /* 339 */ + /* 340 */ + /* 341 */ + /* 342 */ + /* 343 */ + /* 344 */ + /* 345 */ + /* 346 */ + /* 347 */ + /* 348 */ + /* 349 */ +#define SYS_audit 350 +#define SYS_auditon 351 + /* 352 */ +#define SYS_getauid 353 +#define SYS_setauid 354 +#define SYS_getaudit 355 +#define SYS_setaudit 356 +#define SYS_getaudit_addr 357 +#define SYS_setaudit_addr 358 +#define SYS_auditctl 359 + /* 360 */ + /* 361 */ +#define SYS_kqueue 362 +#define SYS_kevent 363 +#define SYS_lchown 364 + /* 365 */ + /* 366 */ + /* 367 */ + /* 368 */ + /* 369 */ +#define SYS_MAXSYSCALL 370 -#define SYS_kqueue 362 -#define SYS_kevent 363 #endif /* __APPLE_API_PRIVATE */ - +#endif /* !_SYS_SYSCALL_H_ */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index 241f9c132..e6fb7b1b6 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,15 +64,21 @@ /* * These are for the eproc structure defined below. */ +#include + #include #ifndef KERNEL #include #include +#else +#include #endif - -#include #include +#include + +#ifdef BSD_KERNEL_PRIVATE #include +#endif /* * Definitions for sysctl call. The sysctl call uses a hierarchical name @@ -125,7 +131,6 @@ struct ctlname { #define OID_AUTO (-1) #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, void *arg1, int arg2, \ struct sysctl_req *req) @@ -135,15 +140,15 @@ struct ctlname { */ struct sysctl_req { struct proc *p; - int lock; - void *oldptr; + int lock; + user_addr_t oldptr; size_t oldlen; size_t oldidx; - int (*oldfunc)(struct sysctl_req *, const void *, size_t); - void *newptr; + int (*oldfunc)(struct sysctl_req *, const void *, size_t); + user_addr_t newptr; size_t newlen; size_t newidx; - int (*newfunc)(struct sysctl_req *, void *, size_t); + int (*newfunc)(struct sysctl_req *, void *, size_t); }; SLIST_HEAD(sysctl_oid_list, sysctl_oid); @@ -167,6 +172,8 @@ struct sysctl_oid { #define SYSCTL_IN(r, p, l) (r->newfunc)(r, p, l) #define SYSCTL_OUT(r, p, l) (r->oldfunc)(r, p, l) +__BEGIN_DECLS + int sysctl_handle_int SYSCTL_HANDLER_ARGS; int sysctl_handle_long SYSCTL_HANDLER_ARGS; int sysctl_handle_quad SYSCTL_HANDLER_ARGS; @@ -181,6 +188,8 @@ int sysctl_handle_opaque SYSCTL_HANDLER_ARGS; void sysctl_register_oid(struct sysctl_oid *oidp); void sysctl_unregister_oid(struct sysctl_oid *oidp); +__END_DECLS + /* Declare an oid to allow child oids to be added to it. */ #define SYSCTL_DECL(name) \ extern struct sysctl_oid_list sysctl_##name##_children @@ -244,9 +253,30 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); #define SYSCTL_PROC(parent, nbr, name, access, ptr, arg, handler, fmt, descr) \ SYSCTL_OID(parent, nbr, name, access, \ ptr, arg, handler, fmt, descr) -#endif /* __APPLE_API_UNSTABLE */ + + +extern struct sysctl_oid_list sysctl__children; +SYSCTL_DECL(_kern); +SYSCTL_DECL(_sysctl); +SYSCTL_DECL(_vm); +SYSCTL_DECL(_vfs); +SYSCTL_DECL(_net); +SYSCTL_DECL(_debug); +SYSCTL_DECL(_hw); +SYSCTL_DECL(_machdep); +SYSCTL_DECL(_user); + #endif /* KERNEL */ +#ifdef XNU_KERNEL_PRIVATE +#define SYSCTL_DEF_ENABLED +#else +#ifndef KERNEL +#define SYSCTL_DEF_ENABLED +#endif +#endif + +#ifdef SYSCTL_DEF_ENABLED /* * Top-level identifiers */ @@ -308,21 +338,21 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); #define KERN_MAXFILESPERPROC 29 /* int: max open files per proc */ #define KERN_MAXPROCPERUID 30 /* int: max processes per uid */ #define KERN_DUMPDEV 31 /* dev_t: device to dump on */ -#define KERN_IPC 32 /* node: anything related to IPC */ -#define KERN_DUMMY 33 /* unused */ -#define KERN_PS_STRINGS 34 /* int: address of PS_STRINGS */ -#define KERN_USRSTACK 35 /* int: address of USRSTACK */ -#define KERN_LOGSIGEXIT 36 /* int: do we log sigexit procs? */ +#define KERN_IPC 32 /* node: anything related to IPC */ +#define KERN_DUMMY 33 /* unused */ +#define KERN_PS_STRINGS 34 /* int: address of PS_STRINGS */ +#define KERN_USRSTACK32 35 /* int: address of USRSTACK */ +#define KERN_LOGSIGEXIT 36 /* int: do we log sigexit procs? */ #define KERN_SYMFILE 37 /* string: kernel symbol filename */ #define KERN_PROCARGS 38 #define KERN_PCSAMPLES 39 /* node: pc sampling */ #define KERN_NETBOOT 40 /* int: are we netbooted? 1=yes,0=no */ #define KERN_PANICINFO 41 /* node: panic UI information */ -#define KERN_SYSV 42 /* node: panic UI information */ +#define KERN_SYSV 42 /* node: System V IPC information */ #define KERN_AFFINITY 43 /* xxx */ #define KERN_CLASSIC 44 /* xxx */ #define KERN_CLASSICHANDLER 45 /* xxx */ -#define KERN_AIOMAX 46 /* int: max aio requests */ +#define KERN_AIOMAX 46 /* int: max aio requests */ #define KERN_AIOPROCMAX 47 /* int: max aio requests per process */ #define KERN_AIOTHREADS 48 /* int: max aio worker threads */ #ifdef __APPLE_API_UNSTABLE @@ -331,8 +361,20 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); #define KERN_COREFILE 50 /* string: corefile format string */ #define KERN_COREDUMP 51 /* int: whether to coredump at all */ #define KERN_SUGID_COREDUMP 52 /* int: whether to dump SUGID cores */ -#define KERN_MAXID 53 /* number of valid kern ids */ - +#define KERN_PROCDELAYTERM 53 /* int: set/reset current proc for delayed termination during shutdown */ +#define KERN_SHREG_PRIVATIZABLE 54 /* int: can shared regions be privatized ? */ +#define KERN_PROC_LOW_PRI_IO 55 /* int: set/reset current proc for low priority I/O */ +#define KERN_LOW_PRI_WINDOW 56 /* int: set/reset throttle window - milliseconds */ +#define KERN_LOW_PRI_DELAY 57 /* int: set/reset throttle delay - milliseconds */ +#define KERN_POSIX 58 /* node: posix tunables */ +#define KERN_USRSTACK64 59 /* LP64 user stack query */ +#define KERN_MAXID 60 /* number of valid kern ids */ + +#if defined(__LP64__) +#define KERN_USRSTACK KERN_USRSTACK64 +#else +#define KERN_USRSTACK KERN_USRSTACK32 +#endif /* KERN_KDEBUG types */ #define KERN_KDEFLAGS 1 @@ -364,8 +406,7 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); /* KERN_PANICINFO types */ #define KERN_PANICINFO_MAXSIZE 1 /* quad: panic UI image size limit */ -#define KERN_PANICINFO_IMAGE16 2 /* string: path to the panic UI (16 bit) */ -#define KERN_PANICINFO_IMAGE32 3 /* string: path to the panic UI (32 bit) */ +#define KERN_PANICINFO_IMAGE 2 /* panic UI in 8-bit kraw format */ /* * KERN_SYSV identifiers @@ -417,7 +458,7 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); { "dumpdev", CTLTYPE_STRUCT }, /* we lie; don't print as int */ \ { "ipc", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ - { "ps_strings", CTLTYPE_INT }, \ + { "dummy", CTLTYPE_INT }, \ { "usrstack", CTLTYPE_INT }, \ { "logsigexit", CTLTYPE_INT }, \ { "symfile",CTLTYPE_STRING },\ @@ -435,7 +476,13 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); { "procargs2",CTLTYPE_STRUCT }, \ { "corefile",CTLTYPE_STRING }, \ { "coredump", CTLTYPE_INT }, \ - { "sugid_coredump", CTLTYPE_INT } \ + { "sugid_coredump", CTLTYPE_INT }, \ + { "delayterm", CTLTYPE_INT }, \ + { "shreg_private", CTLTYPE_INT }, \ + { "proc_low_pri_io", CTLTYPE_INT }, \ + { "low_pri_window", CTLTYPE_INT }, \ + { "low_pri_delay", CTLTYPE_INT }, \ + { "posix", CTLTYPE_NODE } \ } /* @@ -460,13 +507,31 @@ void sysctl_unregister_oid(struct sysctl_oid *oidp); * KERN_PROC subtype ops return arrays of augmented proc structures: */ #ifdef __APPLE_API_UNSTABLE + +struct _pcred { + char pc_lock[72]; /* opaque content */ + struct ucred *pc_ucred; /* Current credentials. */ + uid_t p_ruid; /* Real user id. */ + uid_t p_svuid; /* Saved effective user id. */ + gid_t p_rgid; /* Real group id. */ + gid_t p_svgid; /* Saved effective group id. */ + int p_refcnt; /* Number of references. */ +}; + +struct _ucred { + int32_t cr_ref; /* reference count */ + uid_t cr_uid; /* effective user id */ + short cr_ngroups; /* number of groups */ + gid_t cr_groups[NGROUPS]; /* groups */ +}; + struct kinfo_proc { struct extern_proc kp_proc; /* proc structure */ struct eproc { struct proc *e_paddr; /* address of proc */ struct session *e_sess; /* session pointer */ - struct pcred e_pcred; /* process credentials */ - struct ucred e_ucred; /* current credentials */ + struct _pcred e_pcred; /* process credentials */ + struct _ucred e_ucred; /* current credentials */ struct vmspace e_vm; /* address space */ pid_t e_ppid; /* parent process id */ pid_t e_pgid; /* process group id */ @@ -480,14 +545,74 @@ struct kinfo_proc { short e_xrssize; /* text rss */ short e_xccount; /* text references */ short e_xswrss; - long e_flag; + int32_t e_flag; #define EPROC_CTTY 0x01 /* controlling tty vnode active */ #define EPROC_SLEADER 0x02 /* session leader */ #define COMAPT_MAXLOGNAME 12 char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ - long e_spare[4]; + int32_t e_spare[4]; } kp_eproc; }; + +#ifdef BSD_KERNEL_PRIVATE +#include + +// LP64todo - should this move? + +/* LP64 version of _pcred. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with _pcred + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_pcred { + char pc_lock[72]; /* opaque content */ + user_addr_t pc_ucred; /* Current credentials. */ + uid_t p_ruid; /* Real user id. */ + uid_t p_svuid; /* Saved effective user id. */ + gid_t p_rgid; /* Real group id. */ + gid_t p_svgid; /* Saved effective group id. */ + int p_refcnt; /* Number of references. */ +}; + +/* LP64 version of kinfo_proc. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with kinfo_proc + */ +struct user_kinfo_proc { + struct user_extern_proc kp_proc; /* proc structure */ + struct user_eproc { + user_addr_t e_paddr; /* address of proc */ + user_addr_t e_sess; /* session pointer */ + struct user_pcred e_pcred; /* process credentials */ + struct _ucred e_ucred; /* current credentials */ + struct user_vmspace e_vm; /* address space */ + pid_t e_ppid; /* parent process id */ + pid_t e_pgid; /* process group id */ + short e_jobc; /* job control counter */ + dev_t e_tdev; /* controlling tty dev */ + pid_t e_tpgid; /* tty process group id */ + user_addr_t e_tsess; /* tty session pointer */ + char e_wmesg[WMESGLEN+1]; /* wchan message */ + segsz_t e_xsize; /* text size */ + short e_xrssize; /* text rss */ + short e_xccount; /* text references */ + short e_xswrss; + int32_t e_flag; + char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ + int32_t e_spare[4]; + } kp_eproc; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* BSD_KERNEL_PRIVATE */ + #endif /* __APPLE_API_UNSTABLE */ /* @@ -508,15 +633,60 @@ struct kinfo_proc { */ #define VM_METER 1 /* struct vmmeter */ #define VM_LOADAVG 2 /* struct loadavg */ -#define VM_MAXID 3 /* number of valid vm ids */ +/* + * Note: "3" was skipped sometime ago and should probably remain unused + * to avoid any new entry from being accepted by older kernels... + */ #define VM_MACHFACTOR 4 /* struct loadavg with mach factor*/ +#define VM_SWAPUSAGE 5 /* total swap usage */ +#define VM_MAXID 6 /* number of valid vm ids */ #define CTL_VM_NAMES { \ { 0, 0 }, \ { "vmmeter", CTLTYPE_STRUCT }, \ - { "loadavg", CTLTYPE_STRUCT } \ + { "loadavg", CTLTYPE_STRUCT }, \ + { 0, 0 }, /* placeholder for "3" (see comment above) */ \ + { "machfactor", CTLTYPE_STRUCT }, \ + { "swapusage", CTLTYPE_STRUCT } \ } +struct xsw_usage { + u_int64_t xsu_total; + u_int64_t xsu_avail; + u_int64_t xsu_used; + u_int32_t xsu_pagesize; + boolean_t xsu_encrypted; +}; + +#ifdef __APPLE_API_PRIVATE +/* Load average structure. Use of fixpt_t assume in scope. */ +/* XXX perhaps we should protect fixpt_t, and define it here (or discard it) */ +struct loadavg { + fixpt_t ldavg[3]; + long fscale; +}; +extern struct loadavg averunnable; +#define LSCALE 1000 /* scaling for "fixed point" arithmetic */ + +// LP64todo - should this move? +#ifdef BSD_KERNEL_PRIVATE + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif +struct user_loadavg { + fixpt_t ldavg[3]; + user_long_t fscale; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* __APPLE_API_PRIVATE */ + + /* * CTL_HW identifiers */ @@ -577,18 +747,24 @@ struct kinfo_proc { } /* - * These are the support HW selectors for sysctlbyname. Parameters that are byte count or frequencies are 64 bit numbers. + * These are the support HW selectors for sysctlbyname. Parameters that are byte counts or frequencies are 64 bit numbers. * All other parameters are 32 bit numbers. * * hw.memsize - The number of bytes of physical memory in the system. * - * hw.ncpu - The number maximum number of processor that could be available this boot. + * hw.ncpu - The maximum number of processors that could be available this boot. * Use this value for sizing of static per processor arrays; i.e. processor load statistics. * - * hw.activecpu - The number of cpus currently available for executing threads. + * hw.activecpu - The number of processors currently available for executing threads. * Use this number to determine the number threads to create in SMP aware applications. * This number can change when power management modes are changed. - * + * + * hw.physicalcpu - The number of physical processors available in the current power management mode. + * hw.physicalcpu_max - The maximum number of physical processors that could be available this boot + * + * hw.logicalcpu - The number of logical processors available in the current power management mode. + * hw.logicalcpu_max - The maximum number of logical processors that could be available this boot + * * hw.tbfrequency - This gives the time base frequency used by the OS and is the basis of all timing services. * In general is is better to use mach's or higher level timing services, but this value * is needed to convert the PPC Time Base registers to real time. @@ -605,6 +781,9 @@ struct kinfo_proc { * hw.cpusubtype - These values should be used to determine what processor family the running cpu is from so that * the best binary can be chosen, or the best dynamic code generated. They should not be used * to determine if a given processor feature is available. + * hw.cputhreadtype - This value will be present if the processor supports threads. Like hw.cpusubtype this selector + * should not be used to infer features, and only used to name the processors thread architecture. + * The values are defined in * * hw.byteorder - Gives the byte order of the processor. 4321 for big endian, 1234 for little. * @@ -699,20 +878,6 @@ struct kinfo_proc { #define CTL_DEBUG_MAXID 20 #ifdef KERNEL -#ifdef __APPLE_API_UNSTABLE - -extern struct sysctl_oid_list sysctl__children; -SYSCTL_DECL(_kern); -SYSCTL_DECL(_sysctl); -SYSCTL_DECL(_vm); -SYSCTL_DECL(_vfs); -SYSCTL_DECL(_net); -SYSCTL_DECL(_debug); -SYSCTL_DECL(_hw); -SYSCTL_DECL(_machdep); -SYSCTL_DECL(_user); - - #ifdef DEBUG /* * CTL_DEBUG variables. @@ -736,6 +901,7 @@ extern struct ctldebug debug10, debug11, debug12, debug13, debug14; extern struct ctldebug debug15, debug16, debug17, debug18, debug19; #endif /* DEBUG */ +#ifdef BSD_KERNEL_PRIVATE extern char machine[]; extern char osrelease[]; extern char ostype[]; @@ -747,18 +913,10 @@ void sysctl_unregister_set(struct linker_set *lsp); void sysctl_mib_init(void); int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *newp, size_t newlen); -int userland_sysctl(struct proc *p, int *name, u_int namelen, void *old, - size_t *oldlenp, int inkernel, void *newp, size_t newlen, +int userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t old, + size_t *oldlenp, int inkernel, user_addr_t newp, size_t newlen, size_t *retval); -/* - * Sysctl handling within the kernel. - * - * May be called with either or no funnel held; will take and - * switch funnels as required. - */ -int sysctlbyname __P((const char *, void *, size_t *, void *, size_t)); - /* * Internal sysctl function calling convention: * @@ -769,24 +927,30 @@ int sysctlbyname __P((const char *, void *, size_t *, void *, size_t)); * the name. */ typedef int (sysctlfn) - __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); - -int sysctl_int __P((void *, size_t *, void *, size_t, int *)); -int sysctl_rdint __P((void *, size_t *, void *, int)); -int sysctl_quad __P((void *, size_t *, void *, size_t, quad_t *)); -int sysctl_rdquad __P((void *, size_t *, void *, quad_t)); -int sysctl_string __P((void *, size_t *, void *, size_t, char *, int)); -int sysctl_rdstring __P((void *, size_t *, void *, char *)); -int sysctl_rdstruct __P((void *, size_t *, void *, void *, int)); - -#endif /* __APPLE_API_UNSTABLE */ + (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, struct proc *); + +int sysctl_int(user_addr_t, size_t *, user_addr_t, size_t, int *); +int sysctl_rdint(user_addr_t, size_t *, user_addr_t, int); +int sysctl_quad(user_addr_t, size_t *, user_addr_t, size_t, quad_t *); +int sysctl_rdquad(void *, size_t *, void *, quad_t); +int sysctl_string(user_addr_t, size_t *, user_addr_t, size_t, char *, int); +int sysctl_trstring(user_addr_t, size_t *, user_addr_t, size_t, char *, int); +int sysctl_rdstring(user_addr_t, size_t *, user_addr_t, char *); +int sysctl_rdstruct(user_addr_t, size_t *, user_addr_t, void *, int); + +#endif /* BSD_KERNEL_PRIVATE */ #else /* !KERNEL */ -#include __BEGIN_DECLS -int sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); -int sysctlbyname __P((const char *, void *, size_t *, void *, size_t)); -int sysctlnametomib __P((const char *, int *, size_t *)); +int sysctl(int *, u_int, void *, size_t *, void *, size_t); +int sysctlbyname(const char *, void *, size_t *, void *, size_t); +int sysctlnametomib(const char *, int *, size_t *); __END_DECLS + #endif /* KERNEL */ + + +#endif /* SYSCTL_DEF_ENABLED */ + + #endif /* !_SYS_SYSCTL_H_ */ diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h new file mode 100644 index 000000000..b8d73190d --- /dev/null +++ b/bsd/sys/sysent.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_SYSENT_H_ +#define _SYS_SYSENT_H_ + +#include +#include +#ifdef __ppc__ +#include +#endif + +#ifdef KERNEL_PRIVATE +#ifdef __APPLE_API_PRIVATE + +typedef int32_t sy_call_t(struct proc *, void *, int *); +typedef void sy_munge_t(const void *, void *); + +extern struct sysent { /* system call table */ + int16_t sy_narg; /* number of args */ + int8_t sy_cancel; /* funnel type */ + int8_t sy_funnel; /* funnel type */ + sy_call_t *sy_call; /* implementing function */ + sy_munge_t *sy_arg_munge32; /* system call aguments munger for 32-bit process */ + sy_munge_t *sy_arg_munge64; /* system call aguments munger for 64-bit process */ + int32_t sy_return_type; /* system call return types */ +} sysent[]; + +/* sy_funnel flags bits */ +#define FUNNEL_MASK 0x00ff +#define UNSAFE_64BIT 0x0100 + +/* + * Valid values for sy_cancel + */ +#define _SYSCALL_CANCEL_NONE 0 /* Not a cancellation point */ +#define _SYSCALL_CANCEL_PRE 1 /* Canbe cancelled on entry itself */ +#define _SYSCALL_CANCEL_POST 2 /* Can only be cancelled after syscall is run */ + +/* + * Valid values for sy_return_type + */ +#define _SYSCALL_RET_NONE 0 +#define _SYSCALL_RET_INT_T 1 +#define _SYSCALL_RET_UINT_T 2 +#define _SYSCALL_RET_OFF_T 3 +#define _SYSCALL_RET_ADDR_T 4 +#define _SYSCALL_RET_SIZE_T 5 +#define _SYSCALL_RET_SSIZE_T 6 + +extern int nsysent; + +#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ + +#endif /* !_SYS_SYSENT_H_ */ diff --git a/bsd/sys/syslimits.h b/bsd/sys/syslimits.h index dcf4cf403..187fd0791 100644 --- a/bsd/sys/syslimits.h +++ b/bsd/sys/syslimits.h @@ -77,10 +77,10 @@ #define PATH_MAX 1024 /* max bytes in pathname */ #define PIPE_BUF 512 /* max bytes for atomic pipe writes */ -#define BC_BASE_MAX INT_MAX /* max ibase/obase values in bc(1) */ -#define BC_DIM_MAX 65535 /* max array elements in bc(1) */ -#define BC_SCALE_MAX INT_MAX /* max scale value in bc(1) */ -#define BC_STRING_MAX INT_MAX /* max const string length in bc(1) */ +#define BC_BASE_MAX 99 /* max ibase/obase values in bc(1) */ +#define BC_DIM_MAX 2048 /* max array elements in bc(1) */ +#define BC_SCALE_MAX 99 /* max scale value in bc(1) */ +#define BC_STRING_MAX 1000 /* max const string length in bc(1) */ #define COLL_WEIGHTS_MAX 2 /* max weights for order keyword */ #define EQUIV_CLASS_MAX 2 #define EXPR_NEST_MAX 32 /* max expressions nested in expr(1) */ diff --git a/bsd/sys/syslog.h b/bsd/sys/syslog.h index aa0564e6e..4b8618692 100644 --- a/bsd/sys/syslog.h +++ b/bsd/sys/syslog.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,6 +59,7 @@ #define _SYS_SYSLOG_H_ #include +#include #define _PATH_LOG "/var/run/syslog" @@ -95,40 +96,40 @@ typedef struct _code { } CODE; CODE prioritynames[] = { - "alert", LOG_ALERT, - "crit", LOG_CRIT, - "debug", LOG_DEBUG, - "emerg", LOG_EMERG, - "err", LOG_ERR, - "error", LOG_ERR, /* DEPRECATED */ - "info", LOG_INFO, - "none", INTERNAL_NOPRI, /* INTERNAL */ - "notice", LOG_NOTICE, - "panic", LOG_EMERG, /* DEPRECATED */ - "warn", LOG_WARNING, /* DEPRECATED */ - "warning", LOG_WARNING, - NULL, -1, + { "alert", LOG_ALERT }, + { "crit", LOG_CRIT }, + { "debug", LOG_DEBUG }, + { "emerg", LOG_EMERG }, + { "err", LOG_ERR }, + { "error", LOG_ERR }, /* DEPRECATED */ + { "info", LOG_INFO }, + { "none", INTERNAL_NOPRI }, /* INTERNAL */ + { "notice", LOG_NOTICE }, + { "panic", LOG_EMERG }, /* DEPRECATED */ + { "warn", LOG_WARNING }, /* DEPRECATED */ + { "warning", LOG_WARNING }, + { 0, -1 } }; #endif /* facility codes */ -#define LOG_KERN (0<<3) /* kernel messages */ -#define LOG_USER (1<<3) /* random user-level messages */ -#define LOG_MAIL (2<<3) /* mail system */ -#define LOG_DAEMON (3<<3) /* system daemons */ -#define LOG_AUTH (4<<3) /* security/authorization messages */ -#define LOG_SYSLOG (5<<3) /* messages generated internally by syslogd */ -#define LOG_LPR (6<<3) /* line printer subsystem */ -#define LOG_NEWS (7<<3) /* network news subsystem */ -#define LOG_UUCP (8<<3) /* UUCP subsystem */ -#define LOG_CRON (9<<3) /* clock daemon */ -#define LOG_AUTHPRIV (10<<3) /* security/authorization messages (private) */ -#define LOG_FTP (11<<3) /* ftp daemon */ -#define LOG_NETINFO (12<<3) /* NetInfo */ +#define LOG_KERN (0<<3) /* kernel messages */ +#define LOG_USER (1<<3) /* random user-level messages */ +#define LOG_MAIL (2<<3) /* mail system */ +#define LOG_DAEMON (3<<3) /* system daemons */ +#define LOG_AUTH (4<<3) /* security/authorization messages */ +#define LOG_SYSLOG (5<<3) /* messages generated internally by syslogd */ +#define LOG_LPR (6<<3) /* line printer subsystem */ +#define LOG_NEWS (7<<3) /* network news subsystem */ +#define LOG_UUCP (8<<3) /* UUCP subsystem */ +#define LOG_CRON (9<<3) /* clock daemon */ +#define LOG_AUTHPRIV (10<<3) /* security/authorization messages (private) */ +#define LOG_FTP (11<<3) /* ftp daemon */ +#define LOG_NETINFO (12<<3) /* NetInfo */ #define LOG_REMOTEAUTH (13<<3) /* remote authentication/authorization */ -#define LOG_INSTALL (14<<3) /* installer subsystem */ +#define LOG_INSTALL (14<<3) /* installer subsystem */ +#define LOG_RAS (15<<3) /* Remote Access Service (VPN / PPP) */ - /* other codes through 15 reserved for system use */ #define LOG_LOCAL0 (16<<3) /* reserved for local use */ #define LOG_LOCAL1 (17<<3) /* reserved for local use */ #define LOG_LOCAL2 (18<<3) /* reserved for local use */ @@ -138,39 +139,43 @@ CODE prioritynames[] = { #define LOG_LOCAL6 (22<<3) /* reserved for local use */ #define LOG_LOCAL7 (23<<3) /* reserved for local use */ -#define LOG_NFACILITIES 24 /* current number of facilities */ +#define LOG_LAUNCHD (24<<3) /* launchd - general bootstrap daemon */ + +#define LOG_NFACILITIES 25 /* current number of facilities */ #define LOG_FACMASK 0x03f8 /* mask to extract facility part */ /* facility of pri */ #define LOG_FAC(p) (((p) & LOG_FACMASK) >> 3) #ifdef SYSLOG_NAMES CODE facilitynames[] = { - "auth", LOG_AUTH, - "authpriv", LOG_AUTHPRIV, - "cron", LOG_CRON, - "daemon", LOG_DAEMON, - "ftp", LOG_FTP, - "install", LOG_INSTALL, - "kern", LOG_KERN, - "lpr", LOG_LPR, - "mail", LOG_MAIL, - "mark", INTERNAL_MARK, /* INTERNAL */ - "netinfo", LOG_NETINFO, - "remoteauth", LOG_REMOTEAUTH, - "news", LOG_NEWS, - "security", LOG_AUTH, /* DEPRECATED */ - "syslog", LOG_SYSLOG, - "user", LOG_USER, - "uucp", LOG_UUCP, - "local0", LOG_LOCAL0, - "local1", LOG_LOCAL1, - "local2", LOG_LOCAL2, - "local3", LOG_LOCAL3, - "local4", LOG_LOCAL4, - "local5", LOG_LOCAL5, - "local6", LOG_LOCAL6, - "local7", LOG_LOCAL7, - NULL, -1, + { "auth", LOG_AUTH }, + { "authpriv", LOG_AUTHPRIV }, + { "cron", LOG_CRON }, + { "daemon", LOG_DAEMON }, + { "ftp", LOG_FTP }, + { "install", LOG_INSTALL }, + { "kern", LOG_KERN }, + { "lpr", LOG_LPR }, + { "mail", LOG_MAIL }, + { "mark", INTERNAL_MARK }, /* INTERNAL */ + { "netinfo", LOG_NETINFO }, + { "ras", LOG_RAS }, + { "remoteauth", LOG_REMOTEAUTH }, + { "news", LOG_NEWS }, + { "security", LOG_AUTH }, /* DEPRECATED */ + { "syslog", LOG_SYSLOG }, + { "user", LOG_USER }, + { "uucp", LOG_UUCP }, + { "local0", LOG_LOCAL0 }, + { "local1", LOG_LOCAL1 }, + { "local2", LOG_LOCAL2 }, + { "local3", LOG_LOCAL3 }, + { "local4", LOG_LOCAL4 }, + { "local5", LOG_LOCAL5 }, + { "local6", LOG_LOCAL6 }, + { "local7", LOG_LOCAL7 }, + { "launchd", LOG_LAUNCHD }, + { 0, -1 } }; #endif @@ -199,25 +204,19 @@ CODE facilitynames[] = { #define LOG_NOWAIT 0x10 /* don't wait for console forks: DEPRECATED */ #define LOG_PERROR 0x20 /* log to stderr as well */ -#include - #ifndef KERNEL - -/* - * Don't use va_list in the vsyslog() prototype. Va_list is typedef'd in two - * places ( and ), so if we include one - * of them here we may collide with the utility's includes. It's unreasonable - * for utilities to have to include one of them to include syslog.h, so we get - * _BSD_VA_LIST_ from and use it. - */ -#include +#ifndef _POSIX_C_SOURCE +#include /* for __darwin_va_list */ +#endif /* _POSIX_C_SOURCE */ __BEGIN_DECLS -void closelog __P((void)); -void openlog __P((const char *, int, int)); -int setlogmask __P((int)); -void syslog __P((int, const char *, ...)); -void vsyslog __P((int, const char *, _BSD_VA_LIST_)); +void closelog(void); +void openlog(const char *, int, int); +int setlogmask(int); +void syslog(int, const char *, ...) __DARWIN_LDBL_COMPAT(syslog); +#ifndef _POSIX_C_SOURCE +void vsyslog(int, const char *, __darwin_va_list) __DARWIN_LDBL_COMPAT(vsyslog); +#endif /* _POSIX_C_SOURCE */ __END_DECLS #else /* !KERNEL */ @@ -303,9 +302,11 @@ struct reg_desc { #endif /* __APPLE_API_OBSOLETE */ -void logpri __P((int)); -void log __P((int, const char *, ...)); -void addlog __P((const char *, ...)); +__BEGIN_DECLS +void logpri(int); +void log(int, const char *, ...); +void addlog(const char *, ...); +__END_DECLS #endif /* !KERNEL */ #endif /* !_SYS_SYSLOG_H_ */ diff --git a/bsd/sys/sysproto.h b/bsd/sys/sysproto.h new file mode 100644 index 000000000..9bc5c5f86 --- /dev/null +++ b/bsd/sys/sysproto.h @@ -0,0 +1,1610 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + * + * + * System call switch table. + * + * DO NOT EDIT-- this file is automatically generated. + * created from syscalls.master + */ + +#ifndef _SYS_SYSPROTO_H_ +#define _SYS_SYSPROTO_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef KERNEL +#ifdef __APPLE_API_PRIVATE +#ifdef __ppc__ +#define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ + ? 0 : sizeof(uint64_t) - sizeof(t)) +#else +#define PAD_(t) (sizeof(register_t) <= sizeof(t) \ + ? 0 : sizeof(register_t) - sizeof(t)) +#endif +#if BYTE_ORDER == LITTLE_ENDIAN +#define PADL_(t) 0 +#define PADR_(t) PAD_(t) +#else +#define PADL_(t) PAD_(t) +#define PADR_(t) 0 +#endif + +__BEGIN_DECLS +#ifndef __MUNGE_ONCE +#define __MUNGE_ONCE +#ifdef __ppc__ +void munge_w(const void *, void *); +void munge_ww(const void *, void *); +void munge_www(const void *, void *); +void munge_wwww(const void *, void *); +void munge_wwwww(const void *, void *); +void munge_wwwwww(const void *, void *); +void munge_wwwwwww(const void *, void *); +void munge_wwwwwwww(const void *, void *); +void munge_d(const void *, void *); +void munge_dd(const void *, void *); +void munge_ddd(const void *, void *); +void munge_dddd(const void *, void *); +void munge_ddddd(const void *, void *); +void munge_dddddd(const void *, void *); +void munge_ddddddd(const void *, void *); +void munge_dddddddd(const void *, void *); +void munge_wl(const void *, void *); +void munge_wlw(const void *, void *); +void munge_wwwl(const void *, void *); +void munge_wwwwl(const void *, void *); +void munge_wwwwwl(const void *, void *); +void munge_wsw(const void *, void *); +void munge_wws(const void *, void *); +void munge_wwwsw(const void *, void *); +#else +#define munge_w NULL +#define munge_ww NULL +#define munge_www NULL +#define munge_wwww NULL +#define munge_wwwww NULL +#define munge_wwwwww NULL +#define munge_wwwwwww NULL +#define munge_wwwwwwww NULL +#define munge_d NULL +#define munge_dd NULL +#define munge_ddd NULL +#define munge_dddd NULL +#define munge_ddddd NULL +#define munge_dddddd NULL +#define munge_ddddddd NULL +#define munge_dddddddd NULL +#define munge_wl NULL +#define munge_wlw NULL +#define munge_wwwl NULL +#define munge_wwwwl NULL +#define munge_wwwwwl NULL +#define munge_wsw NULL +#define munge_wws NULL +#define munge_wwwsw NULL +#endif // __ppc__ +#endif /* !__MUNGE_ONCE */ + +struct nosys_args { + register_t dummy; +}; +struct exit_args { + char rval_l_[PADL_(int)]; int rval; char rval_r_[PADR_(int)]; +}; +struct fork_args { + register_t dummy; +}; +struct read_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; + char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; +}; +struct write_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; + char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; +}; +struct open_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct close_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct wait4_args { + char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; + char status_l_[PADL_(user_addr_t)]; user_addr_t status; char status_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; + char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; +}; +struct link_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char link_l_[PADL_(user_addr_t)]; user_addr_t link; char link_r_[PADR_(user_addr_t)]; +}; +struct unlink_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct chdir_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct fchdir_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct mknod_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char dev_l_[PADL_(int)]; int dev; char dev_r_[PADR_(int)]; +}; +struct chmod_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct chown_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; + char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; +}; +struct obreak_args { + char nsize_l_[PADL_(char *)]; char * nsize; char nsize_r_[PADR_(char *)]; +}; +#if COMPAT_GETFSSTAT +struct ogetfsstat_args { + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#else +struct getfsstat_args { + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#endif +struct getpid_args { + register_t dummy; +}; +struct setuid_args { + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; +}; +struct getuid_args { + register_t dummy; +}; +struct geteuid_args { + register_t dummy; +}; +struct ptrace_args { + char req_l_[PADL_(int)]; int req; char req_r_[PADR_(int)]; + char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char data_l_[PADL_(int)]; int data; char data_r_[PADR_(int)]; +}; +struct recvmsg_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct sendmsg_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct recvfrom_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; + char fromlenaddr_l_[PADL_(user_addr_t)]; user_addr_t fromlenaddr; char fromlenaddr_r_[PADR_(user_addr_t)]; +}; +struct accept_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char anamelen_l_[PADL_(user_addr_t)]; user_addr_t anamelen; char anamelen_r_[PADR_(user_addr_t)]; +}; +struct getpeername_args { + char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; + char asa_l_[PADL_(user_addr_t)]; user_addr_t asa; char asa_r_[PADR_(user_addr_t)]; + char alen_l_[PADL_(user_addr_t)]; user_addr_t alen; char alen_r_[PADR_(user_addr_t)]; +}; +struct getsockname_args { + char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; + char asa_l_[PADL_(user_addr_t)]; user_addr_t asa; char asa_r_[PADR_(user_addr_t)]; + char alen_l_[PADL_(user_addr_t)]; user_addr_t alen; char alen_r_[PADR_(user_addr_t)]; +}; +struct access_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct chflags_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct fchflags_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct sync_args { + register_t dummy; +}; +struct kill_args { + char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; + char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)]; +}; +struct getppid_args { + register_t dummy; +}; +struct dup_args { + char fd_l_[PADL_(u_int)]; u_int fd; char fd_r_[PADR_(u_int)]; +}; +struct pipe_args { + register_t dummy; +}; +struct getegid_args { + register_t dummy; +}; +struct profil_args { + char bufbase_l_[PADL_(user_addr_t)]; user_addr_t bufbase; char bufbase_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; + char pcoffset_l_[PADL_(user_ulong_t)]; user_ulong_t pcoffset; char pcoffset_r_[PADR_(user_ulong_t)]; + char pcscale_l_[PADL_(u_int)]; u_int pcscale; char pcscale_r_[PADR_(u_int)]; +}; +struct ktrace_args { + char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; + char ops_l_[PADL_(int)]; int ops; char ops_r_[PADR_(int)]; + char facs_l_[PADL_(int)]; int facs; char facs_r_[PADR_(int)]; + char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; +}; +struct sigaction_args { + char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)]; + char nsa_l_[PADL_(user_addr_t)]; user_addr_t nsa; char nsa_r_[PADR_(user_addr_t)]; + char osa_l_[PADL_(user_addr_t)]; user_addr_t osa; char osa_r_[PADR_(user_addr_t)]; +}; +struct getgid_args { + register_t dummy; +}; +struct sigprocmask_args { + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; + char mask_l_[PADL_(user_addr_t)]; user_addr_t mask; char mask_r_[PADR_(user_addr_t)]; + char omask_l_[PADL_(user_addr_t)]; user_addr_t omask; char omask_r_[PADR_(user_addr_t)]; +}; +struct getlogin_args { + char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; + char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; +}; +struct setlogin_args { + char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; +}; +struct acct_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct sigpending_args { + char osv_l_[PADL_(user_addr_t)]; user_addr_t osv; char osv_r_[PADR_(user_addr_t)]; +}; +struct sigaltstack_args { + char nss_l_[PADL_(user_addr_t)]; user_addr_t nss; char nss_r_[PADR_(user_addr_t)]; + char oss_l_[PADL_(user_addr_t)]; user_addr_t oss; char oss_r_[PADR_(user_addr_t)]; +}; +struct ioctl_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char com_l_[PADL_(user_ulong_t)]; user_ulong_t com; char com_r_[PADR_(user_ulong_t)]; + char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; +}; +struct reboot_args { + char opt_l_[PADL_(int)]; int opt; char opt_r_[PADR_(int)]; + char command_l_[PADL_(user_addr_t)]; user_addr_t command; char command_r_[PADR_(user_addr_t)]; +}; +struct revoke_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct symlink_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char link_l_[PADL_(user_addr_t)]; user_addr_t link; char link_r_[PADR_(user_addr_t)]; +}; +struct readlink_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char count_l_[PADL_(int)]; int count; char count_r_[PADR_(int)]; +}; +struct execve_args { + char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; + char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; + char envp_l_[PADL_(user_addr_t)]; user_addr_t envp; char envp_r_[PADR_(user_addr_t)]; +}; +struct umask_args { + char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)]; +}; +struct chroot_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct msync_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct vfork_args { + register_t dummy; +}; +struct sbrk_args { + char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)]; +}; +struct sstk_args { + char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)]; +}; +struct ovadvise_args { + register_t dummy; +}; +struct munmap_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; +}; +struct mprotect_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; +}; +struct madvise_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char behav_l_[PADL_(int)]; int behav; char behav_r_[PADR_(int)]; +}; +struct mincore_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char vec_l_[PADL_(user_addr_t)]; user_addr_t vec; char vec_r_[PADR_(user_addr_t)]; +}; +struct getgroups_args { + char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; + char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; +}; +struct setgroups_args { + char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; + char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; +}; +struct getpgrp_args { + register_t dummy; +}; +struct setpgid_args { + char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; + char pgid_l_[PADL_(int)]; int pgid; char pgid_r_[PADR_(int)]; +}; +struct setitimer_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char itv_l_[PADL_(user_addr_t)]; user_addr_t itv; char itv_r_[PADR_(user_addr_t)]; + char oitv_l_[PADL_(user_addr_t)]; user_addr_t oitv; char oitv_r_[PADR_(user_addr_t)]; +}; +struct swapon_args { + register_t dummy; +}; +struct getitimer_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char itv_l_[PADL_(user_addr_t)]; user_addr_t itv; char itv_r_[PADR_(user_addr_t)]; +}; +struct getdtablesize_args { + register_t dummy; +}; +struct dup2_args { + char from_l_[PADL_(u_int)]; u_int from; char from_r_[PADR_(u_int)]; + char to_l_[PADL_(u_int)]; u_int to; char to_r_[PADR_(u_int)]; +}; +struct fcntl_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char arg_l_[PADL_(user_long_t)]; user_long_t arg; char arg_r_[PADR_(user_long_t)]; +}; +struct select_args { + char nd_l_[PADL_(int)]; int nd; char nd_r_[PADR_(int)]; + char in_l_[PADL_(user_addr_t)]; user_addr_t in; char in_r_[PADR_(user_addr_t)]; + char ou_l_[PADL_(user_addr_t)]; user_addr_t ou; char ou_r_[PADR_(user_addr_t)]; + char ex_l_[PADL_(user_addr_t)]; user_addr_t ex; char ex_r_[PADR_(user_addr_t)]; + char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; +}; +struct fsync_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct setpriority_args { + char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; + char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; + char prio_l_[PADL_(int)]; int prio; char prio_r_[PADR_(int)]; +}; +struct socket_args { + char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)]; + char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)]; + char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)]; +}; +struct connect_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char namelen_l_[PADL_(socklen_t)]; socklen_t namelen; char namelen_r_[PADR_(socklen_t)]; +}; +struct getpriority_args { + char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; + char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; +}; +#ifdef __ppc__ +#else +struct sigreturn_args { + char sigcntxp_l_[PADL_(struct sigcontext *)]; struct sigcontext * sigcntxp; char sigcntxp_r_[PADR_(struct sigcontext *)]; +}; +#endif +struct bind_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char namelen_l_[PADL_(socklen_t)]; socklen_t namelen; char namelen_r_[PADR_(socklen_t)]; +}; +struct setsockopt_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; + char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; + char val_l_[PADL_(user_addr_t)]; user_addr_t val; char val_r_[PADR_(user_addr_t)]; + char valsize_l_[PADL_(socklen_t)]; socklen_t valsize; char valsize_r_[PADR_(socklen_t)]; +}; +struct listen_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char backlog_l_[PADL_(int)]; int backlog; char backlog_r_[PADR_(int)]; +}; +struct sigsuspend_args { + char mask_l_[PADL_(sigset_t)]; sigset_t mask; char mask_r_[PADR_(sigset_t)]; +}; +#ifdef __ppc__ +struct ppc_gettimeofday_args { + char tp_l_[PADL_(user_addr_t)]; user_addr_t tp; char tp_r_[PADR_(user_addr_t)]; + char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; +}; +#else +struct gettimeofday_args { + char tp_l_[PADL_(user_addr_t)]; user_addr_t tp; char tp_r_[PADR_(user_addr_t)]; + char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; +}; +#endif +struct getrusage_args { + char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; + char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; +}; +struct getsockopt_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; + char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; + char val_l_[PADL_(user_addr_t)]; user_addr_t val; char val_r_[PADR_(user_addr_t)]; + char avalsize_l_[PADL_(user_addr_t)]; user_addr_t avalsize; char avalsize_r_[PADR_(user_addr_t)]; +}; +struct readv_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; + char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; +}; +struct writev_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; + char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; +}; +struct settimeofday_args { + char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; + char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; +}; +struct fchown_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; + char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; +}; +struct fchmod_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct rename_args { + char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; + char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; +}; +struct flock_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; +}; +struct mkfifo_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct sendto_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; + char tolen_l_[PADL_(socklen_t)]; socklen_t tolen; char tolen_r_[PADR_(socklen_t)]; +}; +struct shutdown_args { + char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; +}; +struct socketpair_args { + char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)]; + char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)]; + char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)]; + char rsv_l_[PADL_(user_addr_t)]; user_addr_t rsv; char rsv_r_[PADR_(user_addr_t)]; +}; +struct mkdir_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct rmdir_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct utimes_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char tptr_l_[PADL_(user_addr_t)]; user_addr_t tptr; char tptr_r_[PADR_(user_addr_t)]; +}; +struct futimes_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char tptr_l_[PADL_(user_addr_t)]; user_addr_t tptr; char tptr_r_[PADR_(user_addr_t)]; +}; +struct adjtime_args { + char delta_l_[PADL_(user_addr_t)]; user_addr_t delta; char delta_r_[PADR_(user_addr_t)]; + char olddelta_l_[PADL_(user_addr_t)]; user_addr_t olddelta; char olddelta_r_[PADR_(user_addr_t)]; +}; +struct setsid_args { + register_t dummy; +}; +struct getpgid_args { + char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; +}; +struct setprivexec_args { + char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; +}; +struct pread_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; +}; +struct pwrite_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; +}; +#if NFSSERVER +struct nfssvc_args { + char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; + char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; +}; +#else +#endif +struct statfs_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; +}; +struct fstatfs_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; +}; +struct unmount_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#if NFSCLIENT +struct getfh_args { + char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; + char fhp_l_[PADL_(user_addr_t)]; user_addr_t fhp; char fhp_r_[PADR_(user_addr_t)]; +}; +#else +#endif +struct quotactl_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; + char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; +}; +struct mount_args { + char type_l_[PADL_(user_addr_t)]; user_addr_t type; char type_r_[PADR_(user_addr_t)]; + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; +}; +struct waitid_args { + char idtype_l_[PADL_(idtype_t)]; idtype_t idtype; char idtype_r_[PADR_(idtype_t)]; + char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)]; + char infop_l_[PADL_(user_addr_t)]; user_addr_t infop; char infop_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct add_profil_args { + char bufbase_l_[PADL_(user_addr_t)]; user_addr_t bufbase; char bufbase_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; + char pcoffset_l_[PADL_(user_ulong_t)]; user_ulong_t pcoffset; char pcoffset_r_[PADR_(user_ulong_t)]; + char pcscale_l_[PADL_(u_int)]; u_int pcscale; char pcscale_r_[PADR_(u_int)]; +}; +struct kdebug_trace_args { + char code_l_[PADL_(int)]; int code; char code_r_[PADR_(int)]; + char arg1_l_[PADL_(int)]; int arg1; char arg1_r_[PADR_(int)]; + char arg2_l_[PADL_(int)]; int arg2; char arg2_r_[PADR_(int)]; + char arg3_l_[PADL_(int)]; int arg3; char arg3_r_[PADR_(int)]; + char arg4_l_[PADL_(int)]; int arg4; char arg4_r_[PADR_(int)]; + char arg5_l_[PADL_(int)]; int arg5; char arg5_r_[PADR_(int)]; +}; +struct setgid_args { + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; +}; +struct setegid_args { + char egid_l_[PADL_(gid_t)]; gid_t egid; char egid_r_[PADR_(gid_t)]; +}; +struct seteuid_args { + char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)]; +}; +#ifdef __ppc__ +struct sigreturn_args { + char uctx_l_[PADL_(user_addr_t)]; user_addr_t uctx; char uctx_r_[PADR_(user_addr_t)]; + char infostyle_l_[PADL_(int)]; int infostyle; char infostyle_r_[PADR_(int)]; +}; +#else +#endif +struct stat_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; +}; +struct fstat_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; +}; +struct lstat_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; +}; +struct pathconf_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; +}; +struct fpathconf_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; +}; +#if COMPAT_GETFSSTAT +struct getfsstat_args { + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(user_long_t)]; user_long_t bufsize; char bufsize_r_[PADR_(user_long_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#else +#endif +struct getrlimit_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char rlp_l_[PADL_(user_addr_t)]; user_addr_t rlp; char rlp_r_[PADR_(user_addr_t)]; +}; +struct setrlimit_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char rlp_l_[PADL_(user_addr_t)]; user_addr_t rlp; char rlp_r_[PADR_(user_addr_t)]; +}; +struct getdirentries_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; + char count_l_[PADL_(u_int)]; u_int count; char count_r_[PADR_(u_int)]; + char basep_l_[PADL_(user_addr_t)]; user_addr_t basep; char basep_r_[PADR_(user_addr_t)]; +}; +struct mmap_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)]; +}; +struct lseek_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; + char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; +}; +struct truncate_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; +}; +struct ftruncate_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; +}; +struct __sysctl_args { + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; + char old_l_[PADL_(user_addr_t)]; user_addr_t old; char old_r_[PADR_(user_addr_t)]; + char oldlenp_l_[PADL_(user_addr_t)]; user_addr_t oldlenp; char oldlenp_r_[PADR_(user_addr_t)]; + char new_l_[PADL_(user_addr_t)]; user_addr_t new; char new_r_[PADR_(user_addr_t)]; + char newlen_l_[PADL_(user_size_t)]; user_size_t newlen; char newlen_r_[PADR_(user_size_t)]; +}; +struct mlock_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; +}; +struct munlock_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; +}; +struct undelete_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +#ifdef __ppc__ +struct ATsocket_args { + char proto_l_[PADL_(int)]; int proto; char proto_r_[PADR_(int)]; +}; +struct ATgetmsg_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char ctlptr_l_[PADL_(void *)]; void * ctlptr; char ctlptr_r_[PADR_(void *)]; + char datptr_l_[PADL_(void *)]; void * datptr; char datptr_r_[PADR_(void *)]; + char flags_l_[PADL_(int *)]; int * flags; char flags_r_[PADR_(int *)]; +}; +struct ATputmsg_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char ctlptr_l_[PADL_(void *)]; void * ctlptr; char ctlptr_r_[PADR_(void *)]; + char datptr_l_[PADL_(void *)]; void * datptr; char datptr_r_[PADR_(void *)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct ATPsndreq_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(unsigned char *)]; unsigned char * buf; char buf_r_[PADR_(unsigned char *)]; + char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; + char nowait_l_[PADL_(int)]; int nowait; char nowait_r_[PADR_(int)]; +}; +struct ATPsndrsp_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char respbuff_l_[PADL_(unsigned char *)]; unsigned char * respbuff; char respbuff_r_[PADR_(unsigned char *)]; + char resplen_l_[PADL_(int)]; int resplen; char resplen_r_[PADR_(int)]; + char datalen_l_[PADL_(int)]; int datalen; char datalen_r_[PADR_(int)]; +}; +struct ATPgetreq_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char buf_l_[PADL_(unsigned char *)]; unsigned char * buf; char buf_r_[PADR_(unsigned char *)]; + char buflen_l_[PADL_(int)]; int buflen; char buflen_r_[PADR_(int)]; +}; +struct ATPgetrsp_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char bdsp_l_[PADL_(unsigned char *)]; unsigned char * bdsp; char bdsp_r_[PADR_(unsigned char *)]; +}; +#else +#endif /* __ppc__ */ +struct kqueue_from_portset_np_args { + char portset_l_[PADL_(int)]; int portset; char portset_r_[PADR_(int)]; +}; +struct kqueue_portset_np_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; +}; +struct getattrlist_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; + char attributeBuffer_l_[PADL_(user_addr_t)]; user_addr_t attributeBuffer; char attributeBuffer_r_[PADR_(user_addr_t)]; + char bufferSize_l_[PADL_(user_size_t)]; user_size_t bufferSize; char bufferSize_r_[PADR_(user_size_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; +}; +struct setattrlist_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; + char attributeBuffer_l_[PADL_(user_addr_t)]; user_addr_t attributeBuffer; char attributeBuffer_r_[PADR_(user_addr_t)]; + char bufferSize_l_[PADL_(user_size_t)]; user_size_t bufferSize; char bufferSize_r_[PADR_(user_size_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; +}; +struct getdirentriesattr_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; + char buffer_l_[PADL_(user_addr_t)]; user_addr_t buffer; char buffer_r_[PADR_(user_addr_t)]; + char buffersize_l_[PADL_(user_size_t)]; user_size_t buffersize; char buffersize_r_[PADR_(user_size_t)]; + char count_l_[PADL_(user_addr_t)]; user_addr_t count; char count_r_[PADR_(user_addr_t)]; + char basep_l_[PADL_(user_addr_t)]; user_addr_t basep; char basep_r_[PADR_(user_addr_t)]; + char newstate_l_[PADL_(user_addr_t)]; user_addr_t newstate; char newstate_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; +}; +struct exchangedata_args { + char path1_l_[PADL_(user_addr_t)]; user_addr_t path1; char path1_r_[PADR_(user_addr_t)]; + char path2_l_[PADL_(user_addr_t)]; user_addr_t path2; char path2_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; +}; +#ifdef __APPLE_API_OBSOLETE +struct checkuseraccess_args { + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char userid_l_[PADL_(uid_t)]; uid_t userid; char userid_r_[PADR_(uid_t)]; + char groups_l_[PADL_(gid_t *)]; gid_t * groups; char groups_r_[PADR_(gid_t *)]; + char ngroups_l_[PADL_(int)]; int ngroups; char ngroups_r_[PADR_(int)]; + char accessrequired_l_[PADL_(int)]; int accessrequired; char accessrequired_r_[PADR_(int)]; + char options_l_[PADL_(u_long)]; u_long options; char options_r_[PADR_(u_long)]; +}; +#else +#endif /* __APPLE_API_OBSOLETE */ +struct searchfs_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char searchblock_l_[PADL_(user_addr_t)]; user_addr_t searchblock; char searchblock_r_[PADR_(user_addr_t)]; + char nummatches_l_[PADL_(user_addr_t)]; user_addr_t nummatches; char nummatches_r_[PADR_(user_addr_t)]; + char scriptcode_l_[PADL_(user_ulong_t)]; user_ulong_t scriptcode; char scriptcode_r_[PADR_(user_ulong_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; + char state_l_[PADL_(user_addr_t)]; user_addr_t state; char state_r_[PADR_(user_addr_t)]; +}; +struct delete_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct copyfile_args { + char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; + char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +struct poll_args { + char fds_l_[PADL_(user_addr_t)]; user_addr_t fds; char fds_r_[PADR_(user_addr_t)]; + char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)]; + char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; +}; +struct watchevent_args { + char u_req_l_[PADL_(struct eventreq *)]; struct eventreq * u_req; char u_req_r_[PADR_(struct eventreq *)]; + char u_eventmask_l_[PADL_(int)]; int u_eventmask; char u_eventmask_r_[PADR_(int)]; +}; +struct waitevent_args { + char u_req_l_[PADL_(struct eventreq *)]; struct eventreq * u_req; char u_req_r_[PADR_(struct eventreq *)]; + char tv_l_[PADL_(struct timeval *)]; struct timeval * tv; char tv_r_[PADR_(struct timeval *)]; +}; +struct modwatch_args { + char u_req_l_[PADL_(struct eventreq *)]; struct eventreq * u_req; char u_req_r_[PADR_(struct eventreq *)]; + char u_eventmask_l_[PADL_(int)]; int u_eventmask; char u_eventmask_r_[PADR_(int)]; +}; +struct getxattr_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct fgetxattr_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct setxattr_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct fsetxattr_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct removexattr_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct fremovexattr_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct listxattr_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct flistxattr_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; + char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; + char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; +}; +struct fsctl_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char cmd_l_[PADL_(user_ulong_t)]; user_ulong_t cmd; char cmd_r_[PADR_(user_ulong_t)]; + char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; + char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; +}; +struct initgroups_args { + char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; + char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; + char gmuid_l_[PADL_(int)]; int gmuid; char gmuid_r_[PADR_(int)]; +}; +#if NFSCLIENT +struct nfsclnt_args { + char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; + char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; +}; +struct fhopen_args { + char u_fhp_l_[PADL_(user_addr_t)]; user_addr_t u_fhp; char u_fhp_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; +#else +#endif +struct minherit_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; + char inherit_l_[PADL_(int)]; int inherit; char inherit_r_[PADR_(int)]; +}; +struct semsys_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; + char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; + char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; + char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)]; +}; +struct msgsys_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; + char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; + char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; + char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)]; +}; +struct shmsys_args { + char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; + char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; + char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; + char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; +}; +struct semctl_args { + char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)]; + char semnum_l_[PADL_(int)]; int semnum; char semnum_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; +}; +struct semget_args { + char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; + char nsems_l_[PADL_(int)]; int nsems; char nsems_r_[PADR_(int)]; + char semflg_l_[PADL_(int)]; int semflg; char semflg_r_[PADR_(int)]; +}; +struct semop_args { + char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)]; + char sops_l_[PADL_(user_addr_t)]; user_addr_t sops; char sops_r_[PADR_(user_addr_t)]; + char nsops_l_[PADL_(int)]; int nsops; char nsops_r_[PADR_(int)]; +}; +struct semconfig_args { + char flag_l_[PADL_(semconfig_ctl_t)]; semconfig_ctl_t flag; char flag_r_[PADR_(semconfig_ctl_t)]; +}; +struct msgctl_args { + char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; +}; +struct msgget_args { + char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; + char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; +}; +struct msgsnd_args { + char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; + char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; + char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; + char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; +}; +struct msgrcv_args { + char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; + char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; + char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; + char msgtyp_l_[PADL_(user_long_t)]; user_long_t msgtyp; char msgtyp_r_[PADR_(user_long_t)]; + char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; +}; +struct shmat_args { + char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)]; + char shmaddr_l_[PADL_(user_addr_t)]; user_addr_t shmaddr; char shmaddr_r_[PADR_(user_addr_t)]; + char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)]; +}; +struct shmctl_args { + char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)]; + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; +}; +struct shmdt_args { + char shmaddr_l_[PADL_(user_addr_t)]; user_addr_t shmaddr; char shmaddr_r_[PADR_(user_addr_t)]; +}; +struct shmget_args { + char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)]; +}; +struct shm_open_args { + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char oflag_l_[PADL_(int)]; int oflag; char oflag_r_[PADR_(int)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; +}; +struct shm_unlink_args { + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; +}; +struct sem_open_args { + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; + char oflag_l_[PADL_(int)]; int oflag; char oflag_r_[PADR_(int)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char value_l_[PADL_(int)]; int value; char value_r_[PADR_(int)]; +}; +struct sem_close_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; +}; +struct sem_unlink_args { + char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; +}; +struct sem_wait_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; +}; +struct sem_trywait_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; +}; +struct sem_post_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; +}; +struct sem_getvalue_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; + char sval_l_[PADL_(user_addr_t)]; user_addr_t sval; char sval_r_[PADR_(user_addr_t)]; +}; +struct sem_init_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; + char phsared_l_[PADL_(int)]; int phsared; char phsared_r_[PADR_(int)]; + char value_l_[PADL_(u_int)]; u_int value; char value_r_[PADR_(u_int)]; +}; +struct sem_destroy_args { + char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; +}; +struct open_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct umask_extended_args { + char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct stat_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; + char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; +}; +struct lstat_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; + char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; +}; +struct fstat_extended_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; + char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; +}; +struct chmod_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct fchmod_extended_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct access_extended_args { + char entries_l_[PADL_(user_addr_t)]; user_addr_t entries; char entries_r_[PADR_(user_addr_t)]; + char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; + char results_l_[PADL_(user_addr_t)]; user_addr_t results; char results_r_[PADR_(user_addr_t)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; +}; +struct settid_args { + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; +}; +struct gettid_args { + char uidp_l_[PADL_(user_addr_t)]; user_addr_t uidp; char uidp_r_[PADR_(user_addr_t)]; + char gidp_l_[PADL_(user_addr_t)]; user_addr_t gidp; char gidp_r_[PADR_(user_addr_t)]; +}; +struct setsgroups_args { + char setlen_l_[PADL_(int)]; int setlen; char setlen_r_[PADR_(int)]; + char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; +}; +struct getsgroups_args { + char setlen_l_[PADL_(user_addr_t)]; user_addr_t setlen; char setlen_r_[PADR_(user_addr_t)]; + char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; +}; +struct setwgroups_args { + char setlen_l_[PADL_(int)]; int setlen; char setlen_r_[PADR_(int)]; + char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; +}; +struct getwgroups_args { + char setlen_l_[PADL_(user_addr_t)]; user_addr_t setlen; char setlen_r_[PADR_(user_addr_t)]; + char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; +}; +struct mkfifo_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct mkdir_extended_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; + char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; +}; +struct identitysvc_args { + char opcode_l_[PADL_(int)]; int opcode; char opcode_r_[PADR_(int)]; + char message_l_[PADL_(user_addr_t)]; user_addr_t message; char message_r_[PADR_(user_addr_t)]; +}; +struct load_shared_file_args { + char filename_l_[PADL_(char *)]; char * filename; char filename_r_[PADR_(char *)]; + char mfa_l_[PADL_(caddr_t)]; caddr_t mfa; char mfa_r_[PADR_(caddr_t)]; + char mfs_l_[PADL_(u_long)]; u_long mfs; char mfs_r_[PADR_(u_long)]; + char ba_l_[PADL_(caddr_t *)]; caddr_t * ba; char ba_r_[PADR_(caddr_t *)]; + char map_cnt_l_[PADL_(int)]; int map_cnt; char map_cnt_r_[PADR_(int)]; + char mappings_l_[PADL_(sf_mapping_t *)]; sf_mapping_t * mappings; char mappings_r_[PADR_(sf_mapping_t *)]; + char flags_l_[PADL_(int *)]; int * flags; char flags_r_[PADR_(int *)]; +}; +struct reset_shared_file_args { + char ba_l_[PADL_(caddr_t *)]; caddr_t * ba; char ba_r_[PADR_(caddr_t *)]; + char map_cnt_l_[PADL_(int)]; int map_cnt; char map_cnt_r_[PADR_(int)]; + char mappings_l_[PADL_(sf_mapping_t *)]; sf_mapping_t * mappings; char mappings_r_[PADR_(sf_mapping_t *)]; +}; +struct new_system_shared_regions_args { + register_t dummy; +}; +struct shared_region_map_file_np_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char mappingCount_l_[PADL_(uint32_t)]; uint32_t mappingCount; char mappingCount_r_[PADR_(uint32_t)]; + char mappings_l_[PADL_(user_addr_t)]; user_addr_t mappings; char mappings_r_[PADR_(user_addr_t)]; + char slide_p_l_[PADL_(user_addr_t)]; user_addr_t slide_p; char slide_p_r_[PADR_(user_addr_t)]; +}; +struct shared_region_make_private_np_args { + char rangeCount_l_[PADL_(uint32_t)]; uint32_t rangeCount; char rangeCount_r_[PADR_(uint32_t)]; + char ranges_l_[PADL_(user_addr_t)]; user_addr_t ranges; char ranges_r_[PADR_(user_addr_t)]; +}; +struct getsid_args { + char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; +}; +struct settid_with_pid_args { + char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; + char assume_l_[PADL_(int)]; int assume; char assume_r_[PADR_(int)]; +}; +struct aio_fsync_args { + char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)]; + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct aio_return_args { + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct aio_suspend_args { + char aiocblist_l_[PADL_(user_addr_t)]; user_addr_t aiocblist; char aiocblist_r_[PADR_(user_addr_t)]; + char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)]; + char timeoutp_l_[PADL_(user_addr_t)]; user_addr_t timeoutp; char timeoutp_r_[PADR_(user_addr_t)]; +}; +struct aio_cancel_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct aio_error_args { + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct aio_read_args { + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct aio_write_args { + char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; +}; +struct lio_listio_args { + char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char aiocblist_l_[PADL_(user_addr_t)]; user_addr_t aiocblist; char aiocblist_r_[PADR_(user_addr_t)]; + char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)]; + char sigp_l_[PADL_(user_addr_t)]; user_addr_t sigp; char sigp_r_[PADR_(user_addr_t)]; +}; +struct mlockall_args { + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; +}; +struct munlockall_args { + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; +}; +struct issetugid_args { + register_t dummy; +}; +struct __pthread_kill_args { + char thread_port_l_[PADL_(int)]; int thread_port; char thread_port_r_[PADR_(int)]; + char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)]; +}; +struct pthread_sigmask_args { + char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; + char set_l_[PADL_(user_addr_t)]; user_addr_t set; char set_r_[PADR_(user_addr_t)]; + char oset_l_[PADL_(user_addr_t)]; user_addr_t oset; char oset_r_[PADR_(user_addr_t)]; +}; +struct sigwait_args { + char set_l_[PADL_(user_addr_t)]; user_addr_t set; char set_r_[PADR_(user_addr_t)]; + char sig_l_[PADL_(user_addr_t)]; user_addr_t sig; char sig_r_[PADR_(user_addr_t)]; +}; +struct __disable_threadsignal_args { + char value_l_[PADL_(int)]; int value; char value_r_[PADR_(int)]; +}; +struct __pthread_markcancel_args { + char thread_port_l_[PADL_(int)]; int thread_port; char thread_port_r_[PADR_(int)]; +}; +struct __pthread_canceled_args { + char action_l_[PADL_(int)]; int action; char action_r_[PADR_(int)]; +}; +struct __semwait_signal_args { + char cond_sem_l_[PADL_(int)]; int cond_sem; char cond_sem_r_[PADR_(int)]; + char mutex_sem_l_[PADL_(int)]; int mutex_sem; char mutex_sem_r_[PADR_(int)]; + char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; + char relative_l_[PADL_(int)]; int relative; char relative_r_[PADR_(int)]; + char tv_sec_l_[PADL_(time_t)]; time_t tv_sec; char tv_sec_r_[PADR_(time_t)]; + char tv_nsec_l_[PADL_(int32_t)]; int32_t tv_nsec; char tv_nsec_r_[PADR_(int32_t)]; +}; +struct utrace_args { + char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; + char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; +}; +struct audit_args { + char record_l_[PADL_(user_addr_t)]; user_addr_t record; char record_r_[PADR_(user_addr_t)]; + char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; +}; +struct auditon_args { + char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; + char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; + char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; +}; +struct getauid_args { + char auid_l_[PADL_(user_addr_t)]; user_addr_t auid; char auid_r_[PADR_(user_addr_t)]; +}; +struct setauid_args { + char auid_l_[PADL_(user_addr_t)]; user_addr_t auid; char auid_r_[PADR_(user_addr_t)]; +}; +struct getaudit_args { + char auditinfo_l_[PADL_(user_addr_t)]; user_addr_t auditinfo; char auditinfo_r_[PADR_(user_addr_t)]; +}; +struct setaudit_args { + char auditinfo_l_[PADL_(user_addr_t)]; user_addr_t auditinfo; char auditinfo_r_[PADR_(user_addr_t)]; +}; +struct getaudit_addr_args { + char auditinfo_addr_l_[PADL_(user_addr_t)]; user_addr_t auditinfo_addr; char auditinfo_addr_r_[PADR_(user_addr_t)]; + char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; +}; +struct setaudit_addr_args { + char auditinfo_addr_l_[PADL_(user_addr_t)]; user_addr_t auditinfo_addr; char auditinfo_addr_r_[PADR_(user_addr_t)]; + char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; +}; +struct auditctl_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; +}; +struct kqueue_args { + register_t dummy; +}; +struct kevent_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char changelist_l_[PADL_(user_addr_t)]; user_addr_t changelist; char changelist_r_[PADR_(user_addr_t)]; + char nchanges_l_[PADL_(int)]; int nchanges; char nchanges_r_[PADR_(int)]; + char eventlist_l_[PADL_(user_addr_t)]; user_addr_t eventlist; char eventlist_r_[PADR_(user_addr_t)]; + char nevents_l_[PADL_(int)]; int nevents; char nevents_r_[PADR_(int)]; + char timeout_l_[PADL_(user_addr_t)]; user_addr_t timeout; char timeout_r_[PADR_(user_addr_t)]; +}; +struct lchown_args { + char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; + char owner_l_[PADL_(uid_t)]; uid_t owner; char owner_r_[PADR_(uid_t)]; + char group_l_[PADL_(gid_t)]; gid_t group; char group_r_[PADR_(gid_t)]; +}; +int nosys(struct proc *, struct nosys_args *, int *); +void exit(struct proc *, struct exit_args *, int *); +int fork(struct proc *, struct fork_args *, int *); +int read(struct proc *, struct read_args *, user_ssize_t *); +int write(struct proc *, struct write_args *, user_ssize_t *); +int open(struct proc *, struct open_args *, int *); +int close(struct proc *, struct close_args *, int *); +int wait4(struct proc *, struct wait4_args *, int *); +int link(struct proc *, struct link_args *, int *); +int unlink(struct proc *, struct unlink_args *, int *); +int chdir(struct proc *, struct chdir_args *, int *); +int fchdir(struct proc *, struct fchdir_args *, int *); +int mknod(struct proc *, struct mknod_args *, int *); +int chmod(struct proc *, struct chmod_args *, int *); +int chown(struct proc *, struct chown_args *, int *); +int obreak(struct proc *, struct obreak_args *, int *); +#if COMPAT_GETFSSTAT +int ogetfsstat(struct proc *, struct ogetfsstat_args *, int *); +#else +int getfsstat(struct proc *, struct getfsstat_args *, int *); +#endif +int getpid(struct proc *, struct getpid_args *, int *); +int setuid(struct proc *, struct setuid_args *, int *); +int getuid(struct proc *, struct getuid_args *, int *); +int geteuid(struct proc *, struct geteuid_args *, int *); +int ptrace(struct proc *, struct ptrace_args *, int *); +int recvmsg(struct proc *, struct recvmsg_args *, int *); +int sendmsg(struct proc *, struct sendmsg_args *, int *); +int recvfrom(struct proc *, struct recvfrom_args *, int *); +int accept(struct proc *, struct accept_args *, int *); +int getpeername(struct proc *, struct getpeername_args *, int *); +int getsockname(struct proc *, struct getsockname_args *, int *); +int access(struct proc *, struct access_args *, int *); +int chflags(struct proc *, struct chflags_args *, int *); +int fchflags(struct proc *, struct fchflags_args *, int *); +int sync(struct proc *, struct sync_args *, int *); +int kill(struct proc *, struct kill_args *, int *); +int getppid(struct proc *, struct getppid_args *, int *); +int dup(struct proc *, struct dup_args *, int *); +int pipe(struct proc *, struct pipe_args *, int *); +int getegid(struct proc *, struct getegid_args *, int *); +int profil(struct proc *, struct profil_args *, int *); +int ktrace(struct proc *, struct ktrace_args *, int *); +int sigaction(struct proc *, struct sigaction_args *, int *); +int getgid(struct proc *, struct getgid_args *, int *); +int sigprocmask(struct proc *, struct sigprocmask_args *, int *); +int getlogin(struct proc *, struct getlogin_args *, int *); +int setlogin(struct proc *, struct setlogin_args *, int *); +int acct(struct proc *, struct acct_args *, int *); +int sigpending(struct proc *, struct sigpending_args *, int *); +int sigaltstack(struct proc *, struct sigaltstack_args *, int *); +int ioctl(struct proc *, struct ioctl_args *, int *); +int reboot(struct proc *, struct reboot_args *, int *); +int revoke(struct proc *, struct revoke_args *, int *); +int symlink(struct proc *, struct symlink_args *, int *); +int readlink(struct proc *, struct readlink_args *, int *); +int execve(struct proc *, struct execve_args *, int *); +int umask(struct proc *, struct umask_args *, int *); +int chroot(struct proc *, struct chroot_args *, int *); +int msync(struct proc *, struct msync_args *, int *); +int vfork(struct proc *, struct vfork_args *, int *); +int sbrk(struct proc *, struct sbrk_args *, int *); +int sstk(struct proc *, struct sstk_args *, int *); +int ovadvise(struct proc *, struct ovadvise_args *, int *); +int munmap(struct proc *, struct munmap_args *, int *); +int mprotect(struct proc *, struct mprotect_args *, int *); +int madvise(struct proc *, struct madvise_args *, int *); +int mincore(struct proc *, struct mincore_args *, int *); +int getgroups(struct proc *, struct getgroups_args *, int *); +int setgroups(struct proc *, struct setgroups_args *, int *); +int getpgrp(struct proc *, struct getpgrp_args *, int *); +int setpgid(struct proc *, struct setpgid_args *, int *); +int setitimer(struct proc *, struct setitimer_args *, int *); +int swapon(struct proc *, struct swapon_args *, int *); +int getitimer(struct proc *, struct getitimer_args *, int *); +int getdtablesize(struct proc *, struct getdtablesize_args *, int *); +int dup2(struct proc *, struct dup2_args *, int *); +int fcntl(struct proc *, struct fcntl_args *, int *); +int select(struct proc *, struct select_args *, int *); +int fsync(struct proc *, struct fsync_args *, int *); +int setpriority(struct proc *, struct setpriority_args *, int *); +int socket(struct proc *, struct socket_args *, int *); +int connect(struct proc *, struct connect_args *, int *); +int getpriority(struct proc *, struct getpriority_args *, int *); +#ifdef __ppc__ +#else +int sigreturn(struct proc *, struct sigreturn_args *, int *); +#endif +int bind(struct proc *, struct bind_args *, int *); +int setsockopt(struct proc *, struct setsockopt_args *, int *); +int listen(struct proc *, struct listen_args *, int *); +int sigsuspend(struct proc *, struct sigsuspend_args *, int *); +#ifdef __ppc__ +int ppc_gettimeofday(struct proc *, struct ppc_gettimeofday_args *, int *); +#else +int gettimeofday(struct proc *, struct gettimeofday_args *, int *); +#endif +int getrusage(struct proc *, struct getrusage_args *, int *); +int getsockopt(struct proc *, struct getsockopt_args *, int *); +int readv(struct proc *, struct readv_args *, user_ssize_t *); +int writev(struct proc *, struct writev_args *, user_ssize_t *); +int settimeofday(struct proc *, struct settimeofday_args *, int *); +int fchown(struct proc *, struct fchown_args *, int *); +int fchmod(struct proc *, struct fchmod_args *, int *); +int rename(struct proc *, struct rename_args *, int *); +int flock(struct proc *, struct flock_args *, int *); +int mkfifo(struct proc *, struct mkfifo_args *, int *); +int sendto(struct proc *, struct sendto_args *, int *); +int shutdown(struct proc *, struct shutdown_args *, int *); +int socketpair(struct proc *, struct socketpair_args *, int *); +int mkdir(struct proc *, struct mkdir_args *, int *); +int rmdir(struct proc *, struct rmdir_args *, int *); +int utimes(struct proc *, struct utimes_args *, int *); +int futimes(struct proc *, struct futimes_args *, int *); +int adjtime(struct proc *, struct adjtime_args *, int *); +int setsid(struct proc *, struct setsid_args *, int *); +int getpgid(struct proc *, struct getpgid_args *, int *); +int setprivexec(struct proc *, struct setprivexec_args *, int *); +int pread(struct proc *, struct pread_args *, user_ssize_t *); +int pwrite(struct proc *, struct pwrite_args *, user_ssize_t *); +#if NFSSERVER +int nfssvc(struct proc *, struct nfssvc_args *, int *); +#else +#endif +int statfs(struct proc *, struct statfs_args *, int *); +int fstatfs(struct proc *, struct fstatfs_args *, int *); +int unmount(struct proc *, struct unmount_args *, int *); +#if NFSCLIENT +int getfh(struct proc *, struct getfh_args *, int *); +#else +#endif +int quotactl(struct proc *, struct quotactl_args *, int *); +int mount(struct proc *, struct mount_args *, int *); +int waitid(struct proc *, struct waitid_args *, int *); +int add_profil(struct proc *, struct add_profil_args *, int *); +int kdebug_trace(struct proc *, struct kdebug_trace_args *, int *); +int setgid(struct proc *, struct setgid_args *, int *); +int setegid(struct proc *, struct setegid_args *, int *); +int seteuid(struct proc *, struct seteuid_args *, int *); +#ifdef __ppc__ +int sigreturn(struct proc *, struct sigreturn_args *, int *); +#else +#endif +int stat(struct proc *, struct stat_args *, int *); +int fstat(struct proc *, struct fstat_args *, int *); +int lstat(struct proc *, struct lstat_args *, int *); +int pathconf(struct proc *, struct pathconf_args *, int *); +int fpathconf(struct proc *, struct fpathconf_args *, int *); +#if COMPAT_GETFSSTAT +int getfsstat(struct proc *, struct getfsstat_args *, int *); +#else +#endif +int getrlimit(struct proc *, struct getrlimit_args *, int *); +int setrlimit(struct proc *, struct setrlimit_args *, int *); +int getdirentries(struct proc *, struct getdirentries_args *, int *); +int mmap(struct proc *, struct mmap_args *, user_addr_t *); +int lseek(struct proc *, struct lseek_args *, off_t *); +int truncate(struct proc *, struct truncate_args *, int *); +int ftruncate(struct proc *, struct ftruncate_args *, int *); +int __sysctl(struct proc *, struct __sysctl_args *, int *); +int mlock(struct proc *, struct mlock_args *, int *); +int munlock(struct proc *, struct munlock_args *, int *); +int undelete(struct proc *, struct undelete_args *, int *); +#ifdef __ppc__ +int ATsocket(struct proc *, struct ATsocket_args *, int *); +int ATgetmsg(struct proc *, struct ATgetmsg_args *, int *); +int ATputmsg(struct proc *, struct ATputmsg_args *, int *); +int ATPsndreq(struct proc *, struct ATPsndreq_args *, int *); +int ATPsndrsp(struct proc *, struct ATPsndrsp_args *, int *); +int ATPgetreq(struct proc *, struct ATPgetreq_args *, int *); +int ATPgetrsp(struct proc *, struct ATPgetrsp_args *, int *); +#else +#endif /* __ppc__ */ +int kqueue_from_portset_np(struct proc *, struct kqueue_from_portset_np_args *, int *); +int kqueue_portset_np(struct proc *, struct kqueue_portset_np_args *, int *); +int getattrlist(struct proc *, struct getattrlist_args *, int *); +int setattrlist(struct proc *, struct setattrlist_args *, int *); +int getdirentriesattr(struct proc *, struct getdirentriesattr_args *, int *); +int exchangedata(struct proc *, struct exchangedata_args *, int *); +#ifdef __APPLE_API_OBSOLETE +int checkuseraccess(struct proc *, struct checkuseraccess_args *, int *); +#else +#endif /* __APPLE_API_OBSOLETE */ +int searchfs(struct proc *, struct searchfs_args *, int *); +int delete(struct proc *, struct delete_args *, int *); +int copyfile(struct proc *, struct copyfile_args *, int *); +int poll(struct proc *, struct poll_args *, int *); +int watchevent(struct proc *, struct watchevent_args *, int *); +int waitevent(struct proc *, struct waitevent_args *, int *); +int modwatch(struct proc *, struct modwatch_args *, int *); +int getxattr(struct proc *, struct getxattr_args *, user_ssize_t *); +int fgetxattr(struct proc *, struct fgetxattr_args *, user_ssize_t *); +int setxattr(struct proc *, struct setxattr_args *, int *); +int fsetxattr(struct proc *, struct fsetxattr_args *, int *); +int removexattr(struct proc *, struct removexattr_args *, int *); +int fremovexattr(struct proc *, struct fremovexattr_args *, int *); +int listxattr(struct proc *, struct listxattr_args *, user_ssize_t *); +int flistxattr(struct proc *, struct flistxattr_args *, user_ssize_t *); +int fsctl(struct proc *, struct fsctl_args *, int *); +int initgroups(struct proc *, struct initgroups_args *, int *); +#if NFSCLIENT +int nfsclnt(struct proc *, struct nfsclnt_args *, int *); +int fhopen(struct proc *, struct fhopen_args *, int *); +#else +#endif +int minherit(struct proc *, struct minherit_args *, int *); +int semsys(struct proc *, struct semsys_args *, int *); +int msgsys(struct proc *, struct msgsys_args *, int *); +int shmsys(struct proc *, struct shmsys_args *, int *); +int semctl(struct proc *, struct semctl_args *, int *); +int semget(struct proc *, struct semget_args *, int *); +int semop(struct proc *, struct semop_args *, int *); +int semconfig(struct proc *, struct semconfig_args *, int *); +int msgctl(struct proc *, struct msgctl_args *, int *); +int msgget(struct proc *, struct msgget_args *, int *); +int msgsnd(struct proc *, struct msgsnd_args *, int *); +int msgrcv(struct proc *, struct msgrcv_args *, user_ssize_t *); +int shmat(struct proc *, struct shmat_args *, int *); +int shmctl(struct proc *, struct shmctl_args *, int *); +int shmdt(struct proc *, struct shmdt_args *, int *); +int shmget(struct proc *, struct shmget_args *, int *); +int shm_open(struct proc *, struct shm_open_args *, int *); +int shm_unlink(struct proc *, struct shm_unlink_args *, int *); +int sem_open(struct proc *, struct sem_open_args *, user_addr_t *); +int sem_close(struct proc *, struct sem_close_args *, int *); +int sem_unlink(struct proc *, struct sem_unlink_args *, int *); +int sem_wait(struct proc *, struct sem_wait_args *, int *); +int sem_trywait(struct proc *, struct sem_trywait_args *, int *); +int sem_post(struct proc *, struct sem_post_args *, int *); +int sem_getvalue(struct proc *, struct sem_getvalue_args *, int *); +int sem_init(struct proc *, struct sem_init_args *, int *); +int sem_destroy(struct proc *, struct sem_destroy_args *, int *); +int open_extended(struct proc *, struct open_extended_args *, int *); +int umask_extended(struct proc *, struct umask_extended_args *, int *); +int stat_extended(struct proc *, struct stat_extended_args *, int *); +int lstat_extended(struct proc *, struct lstat_extended_args *, int *); +int fstat_extended(struct proc *, struct fstat_extended_args *, int *); +int chmod_extended(struct proc *, struct chmod_extended_args *, int *); +int fchmod_extended(struct proc *, struct fchmod_extended_args *, int *); +int access_extended(struct proc *, struct access_extended_args *, int *); +int settid(struct proc *, struct settid_args *, int *); +int gettid(struct proc *, struct gettid_args *, int *); +int setsgroups(struct proc *, struct setsgroups_args *, int *); +int getsgroups(struct proc *, struct getsgroups_args *, int *); +int setwgroups(struct proc *, struct setwgroups_args *, int *); +int getwgroups(struct proc *, struct getwgroups_args *, int *); +int mkfifo_extended(struct proc *, struct mkfifo_extended_args *, int *); +int mkdir_extended(struct proc *, struct mkdir_extended_args *, int *); +int identitysvc(struct proc *, struct identitysvc_args *, int *); +int load_shared_file(struct proc *, struct load_shared_file_args *, int *); +int reset_shared_file(struct proc *, struct reset_shared_file_args *, int *); +int new_system_shared_regions(struct proc *, struct new_system_shared_regions_args *, int *); +int shared_region_map_file_np(struct proc *, struct shared_region_map_file_np_args *, int *); +int shared_region_make_private_np(struct proc *, struct shared_region_make_private_np_args *, int *); +int getsid(struct proc *, struct getsid_args *, int *); +int settid_with_pid(struct proc *, struct settid_with_pid_args *, int *); +int aio_fsync(struct proc *, struct aio_fsync_args *, int *); +int aio_return(struct proc *, struct aio_return_args *, user_ssize_t *); +int aio_suspend(struct proc *, struct aio_suspend_args *, int *); +int aio_cancel(struct proc *, struct aio_cancel_args *, int *); +int aio_error(struct proc *, struct aio_error_args *, int *); +int aio_read(struct proc *, struct aio_read_args *, int *); +int aio_write(struct proc *, struct aio_write_args *, int *); +int lio_listio(struct proc *, struct lio_listio_args *, int *); +int mlockall(struct proc *, struct mlockall_args *, int *); +int munlockall(struct proc *, struct munlockall_args *, int *); +int issetugid(struct proc *, struct issetugid_args *, int *); +int __pthread_kill(struct proc *, struct __pthread_kill_args *, int *); +int pthread_sigmask(struct proc *, struct pthread_sigmask_args *, int *); +int sigwait(struct proc *, struct sigwait_args *, int *); +int __disable_threadsignal(struct proc *, struct __disable_threadsignal_args *, int *); +int __pthread_markcancel(struct proc *, struct __pthread_markcancel_args *, int *); +int __pthread_canceled(struct proc *, struct __pthread_canceled_args *, int *); +int __semwait_signal(struct proc *, struct __semwait_signal_args *, int *); +int utrace(struct proc *, struct utrace_args *, int *); +int audit(struct proc *, struct audit_args *, int *); +int auditon(struct proc *, struct auditon_args *, int *); +int getauid(struct proc *, struct getauid_args *, int *); +int setauid(struct proc *, struct setauid_args *, int *); +int getaudit(struct proc *, struct getaudit_args *, int *); +int setaudit(struct proc *, struct setaudit_args *, int *); +int getaudit_addr(struct proc *, struct getaudit_addr_args *, int *); +int setaudit_addr(struct proc *, struct setaudit_addr_args *, int *); +int auditctl(struct proc *, struct auditctl_args *, int *); +int kqueue(struct proc *, struct kqueue_args *, int *); +int kevent(struct proc *, struct kevent_args *, int *); +int lchown(struct proc *, struct lchown_args *, int *); + +__END_DECLS +#undef PAD_ +#undef PADL_ +#undef PADR_ + +#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL */ + +#endif /* !_SYS_SYSPROTO_H_ */ diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h index 3110e2679..e81bed439 100644 --- a/bsd/sys/systm.h +++ b/bsd/sys/systm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -95,46 +95,44 @@ #include #include #include +#include +#include +#ifdef BSD_KERNEL_PRIVATE #include #include -#include #include +#endif +#include __BEGIN_DECLS +#ifdef KERNEL +#include +#endif #include +#include __END_DECLS -#ifdef __APPLE_API_PRIVATE -extern int securelevel; /* system security level */ -extern const char *panicstr; /* panic message */ +#ifdef BSD_KERNEL_PRIVATE extern char version[]; /* system version */ extern char copyright[]; /* system copyright */ -extern struct sysent { /* system call table */ - int16_t sy_narg; /* number of args */ - int8_t sy_parallel;/* can execute in parallel */ - int8_t sy_funnel; /* funnel type */ - int32_t (*sy_call)(); /* implementing function */ -} sysent[]; -extern int nsysent; - extern int boothowto; /* reboot flags, from console subsystem */ extern int show_space; extern int nblkdev; /* number of entries in bdevsw */ extern int nchrdev; /* number of entries in cdevsw */ -extern dev_t rootdev; /* root device */ -extern struct vnode *rootvp; /* vnode equivalent to above */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ -#ifdef __APPLE_API_UNSTABLE +#ifdef KERNEL_PRIVATE #define NO_FUNNEL 0 #define KERNEL_FUNNEL 1 -#define NETWORK_FUNNEL 2 +extern int securelevel; /* system security level */ +extern dev_t rootdev; /* root device */ +extern struct vnode *rootvp; /* vnode equivalent to above */ extern funnel_t * kernel_flock; -extern funnel_t * network_flock; -#endif /* __APPLE_API_UNSTABLE */ + +#endif /* KERNEL_PRIVATE */ #define SYSINIT(a,b,c,d,e) #define MALLOC_DEFINE(a,b,c) @@ -146,95 +144,85 @@ extern funnel_t * network_flock; * General function declarations. */ __BEGIN_DECLS -int nullop __P((void)); -int enodev (); /* avoid actual prototype for multiple use */ -void enodev_strat(); -int nulldev(); -int enoioctl __P((void)); -int enxio __P((void)); -int eopnotsupp __P((void)); -int einval __P((void)); - -#ifdef __APPLE_API_UNSTABLE -int seltrue __P((dev_t dev, int which, struct proc *p)); +int nullop(void); +int nulldev(void); +int enoioctl(void); +int enxio(void); +int eopnotsupp(void); +int einval(void); + +#ifdef BSD_KERNEL_PRIVATE +int seltrue(dev_t dev, int which, struct proc *p); +void ttyprintf(struct tty *, const char *, ...); +void realitexpire(void *); +int hzto(struct timeval *tv); #endif /* __APPLE_API_UNSTABLE */ -void *hashinit __P((int count, int type, u_long *hashmask)); -int nosys __P((struct proc *, void *, register_t *)); +void *hashinit(int count, int type, u_long *hashmask); + +void tablefull(const char *); + +int kvprintf(char const *, void (*)(int, void*), void *, int, + __darwin_va_list); + +void uprintf(const char *, ...); + + +void ovbcopy(const void *from, void *to, size_t len); +int copywithin(void *saddr, void *daddr, size_t len); + +int fubyte(user_addr_t addr); +int fuibyte(user_addr_t addr); +int subyte(user_addr_t addr, int byte); +int suibyte(user_addr_t addr, int byte); +long fuword(user_addr_t addr); +long fuiword(user_addr_t addr); +int suword(user_addr_t addr, long word); +int suiword(user_addr_t addr, long word); +int64_t fulong(user_addr_t addr); +int sulong(user_addr_t addr, int64_t longword); +uint64_t fuulong(user_addr_t addr); +int suulong(user_addr_t addr, uint64_t ulongword); +#define fusize(_a) ((user_size_t)fulong(_a)) +#define susize(_a, _s) sulong((_a), (_s)) +#define fuptr(a) ((user_addr_t)fulong(_a) +#define suptr(_a, _p) sulong((_a), (_p)) +int useracc(user_addr_t addr, user_size_t len,int prot); -#ifdef __GNUC__ -volatile void panic __P((const char *, ...)); -#else -void panic __P((const char *, ...)); -#endif -void tablefull __P((const char *)); -void log __P((int, const char *, ...)); -void kprintf __P((const char *, ...)); -void ttyprintf __P((struct tty *, const char *, ...)); - -int kvprintf __P((char const *, void (*)(int, void*), void *, int, - _BSD_VA_LIST_)); - -int snprintf __P((char *, size_t, const char *, ...)); -int sprintf __P((char *buf, const char *, ...)); -void uprintf __P((const char *, ...)); -void vprintf __P((const char *, _BSD_VA_LIST_)); -int vsnprintf __P((char *, size_t, const char *, _BSD_VA_LIST_)); -int vsprintf __P((char *buf, const char *, _BSD_VA_LIST_)); - -void bcopy __P((const void *from, void *to, size_t len)); -void ovbcopy __P((const void *from, void *to, size_t len)); -void bzero __P((void *buf, size_t len)); - -int copystr __P((void *kfaddr, void *kdaddr, size_t len, size_t *done)); -int copyinstr __P((void *udaddr, void *kaddr, size_t len, size_t *done)); -int copyoutstr __P((void *kaddr, void *udaddr, size_t len, size_t *done)); -int copyin __P((void *udaddr, void *kaddr, size_t len)); -int copyout __P((void *kaddr, void *udaddr, size_t len)); -int copywithin __P((void *saddr, void *daddr, size_t len)); - -int fubyte __P((void *base)); -#ifdef notdef -int fuibyte __P((void *base)); -#endif -int subyte __P((void *base, int byte)); -int suibyte __P((void *base, int byte)); -long fuword __P((void *base)); -long fuiword __P((void *base)); -int suword __P((void *base, long word)); -int suiword __P((void *base, long word)); - -#ifdef __APPLE_API_UNSTABLE -int hzto __P((struct timeval *tv)); typedef void (*timeout_fcn_t)(void *); -void timeout __P((void (*)(void *), void *arg, int ticks)); -void untimeout __P((void (*)(void *), void *arg)); -void realitexpire __P((void *)); -#endif /* __APPLE_API_UNSTABLE */ +#ifdef KERNEL_PRIVATE +void timeout(void (*)(void *), void *arg, int ticks); +void untimeout(void (*)(void *), void *arg); +#endif /* KERNEL_PRIVATE */ +void bsd_timeout(void (*)(void *), void *arg, struct timespec * ts); +void bsd_untimeout(void (*)(void *), void *arg); -#ifdef __APPLE_API_PRIVATE -void bsd_hardclock __P((boolean_t usermode, caddr_t pc, int numticks)); -void gatherstats __P((boolean_t usermode, caddr_t pc)); +void set_fsblocksize(struct vnode *); -void initclocks __P((void)); +#ifdef BSD_KERNEL_PRIVATE +int vslock(user_addr_t addr, user_size_t len); +int vsunlock(user_addr_t addr, user_size_t len, int dirtied); +int clone_system_shared_regions(int shared_regions_active, + int chain_regions, + int base_vnode); -void startprofclock __P((struct proc *)); -void stopprofclock __P((struct proc *)); -void setstatclockrate __P((int hzrate)); -#ifdef DDB -/* debugger entry points */ -int Debugger __P((void)); /* in DDB only */ -#endif +extern kern_return_t bsd_exception(int, exception_data_type_t codes[], int); +extern void bsdinit_task(void); +void bsd_hardclock(boolean_t usermode, caddr_t pc, int numticks); +void gatherstats(boolean_t usermode, caddr_t pc); -void set_fsblocksize __P((struct vnode *)); -#endif /* __APPLE_API_PRIVATE */ +void initclocks(void); -void addlog __P((const char *, ...)); -void printf __P((const char *, ...)); +void startprofclock(struct proc *); +void stopprofclock(struct proc *); +void setstatclockrate(int hzrate); -extern boolean_t thread_funnel_switch(int oldfnl, int newfnl); +struct time_value; +void get_procrustime(struct time_value *tv); + +void load_init_program(struct proc *p); +#endif /* BSD_KERNEL_PRIVATE */ -#include __END_DECLS diff --git a/bsd/sys/table.h b/bsd/sys/table.h deleted file mode 100644 index a59713b4e..000000000 --- a/bsd/sys/table.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Mach Operating System - * Copyright (c) 1986 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -#ifndef _SYS_TABLE_ -#define _SYS_TABLE_ - -#include - -#warning obsolete header! Please delete the include from your sources. - -#ifdef KERNEL_PRIVATE - -#ifdef __APPLE_API_OBSOLETE -#include -#include - -#define TBL_LOADAVG 3 /* (no index) */ -#define TBL_ARGUMENTS 6 /* index by process ID */ -#define TBL_PROCINFO 10 /* index by proc table slot */ -#define TBL_MACHFACTOR 11 /* index by cpu number */ -#define TBL_CPUINFO 12 /* (no index), generic CPU info */ - -/* - * Machine specific table id base - */ -#define TBL_MACHDEP_BASE 0x4000 /* Machine dependent codes start here */ - -/* - * Return codes from machine dependent calls - */ -#define TBL_MACHDEP_NONE 0 /* Not handled by machdep code */ -#define TBL_MACHDEP_OKAY 1 /* Handled by machdep code */ -#define TBL_MACHDEP_BAD -1 /* Bad status from machdep code */ - - - -/* - * TBL_LOADAVG data layout - * (used by TBL_MACHFACTOR too) - */ -struct tbl_loadavg -{ - long tl_avenrun[3]; - int tl_lscale; /* 0 scale when floating point */ -}; - -/* - * TBL_PROCINFO data layout - */ -#define PI_COMLEN 19 /* length of command string */ -struct tbl_procinfo -{ - int pi_uid; /* user ID */ - int pi_pid; /* proc ID */ - int pi_ppid; /* parent proc ID */ - int pi_pgrp; /* proc group ID */ - int pi_ttyd; /* controlling terminal number */ - int pi_status; /* process status: */ -#define PI_EMPTY 0 /* no process */ -#define PI_ACTIVE 1 /* active process */ -#define PI_EXITING 2 /* exiting */ -#define PI_ZOMBIE 3 /* zombie */ - int pi_flag; /* other random flags */ - char pi_comm[PI_COMLEN+1]; - /* short command name */ -}; - -/* - * TBL_CPUINFO data layout - */ -struct tbl_cpuinfo -{ - int ci_swtch; /* # context switches */ - int ci_intr; /* # interrupts */ - int ci_syscall; /* # system calls */ - int ci_traps; /* # system traps */ - int ci_hz; /* # ticks per second */ - int ci_phz; /* profiling hz */ - int ci_cptime[CPUSTATES]; /* cpu state times */ -}; - - - -#ifdef KERNEL -/* - * Machine specific procedure prototypes. - */ -int machine_table(int id, int index, caddr_t addr, int nel, u_int lel, int set); -int machine_table_setokay(int id); -#endif /* KERNEL */ - -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* KERNEL_PRIVATE */ -#endif /* _SYS_TABLE_ */ - diff --git a/bsd/sys/termios.h b/bsd/sys/termios.h index 71afa6606..47ea1c9ec 100644 --- a/bsd/sys/termios.h +++ b/bsd/sys/termios.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,6 +58,8 @@ #ifndef _SYS_TERMIOS_H_ #define _SYS_TERMIOS_H_ +#include + /* * Special Control Characters * @@ -67,33 +69,33 @@ */ #define VEOF 0 /* ICANON */ #define VEOL 1 /* ICANON */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VEOL2 2 /* ICANON together with IEXTEN */ #endif #define VERASE 3 /* ICANON */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VWERASE 4 /* ICANON together with IEXTEN */ #endif #define VKILL 5 /* ICANON */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VREPRINT 6 /* ICANON together with IEXTEN */ #endif /* 7 spare 1 */ #define VINTR 8 /* ISIG */ #define VQUIT 9 /* ISIG */ #define VSUSP 10 /* ISIG */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VDSUSP 11 /* ISIG together with IEXTEN */ #endif #define VSTART 12 /* IXON, IXOFF */ #define VSTOP 13 /* IXON, IXOFF */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VLNEXT 14 /* IEXTEN */ #define VDISCARD 15 /* IEXTEN */ #endif #define VMIN 16 /* !ICANON */ #define VTIME 17 /* !ICANON */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define VSTATUS 18 /* ICANON together with IEXTEN */ /* 19 spare 2 */ #endif @@ -103,7 +105,7 @@ #define _POSIX_VDISABLE 0xff #endif -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define CCEQ(val, c) ((c) == (val) ? (val) != _POSIX_VDISABLE : 0) #endif @@ -121,25 +123,75 @@ #define ICRNL 0x00000100 /* map CR to NL (ala CRMOD) */ #define IXON 0x00000200 /* enable output flow control */ #define IXOFF 0x00000400 /* enable input flow control */ -#ifndef _POSIX_SOURCE #define IXANY 0x00000800 /* any char will restart after stop */ +#ifndef _POSIX_C_SOURCE #define IMAXBEL 0x00002000 /* ring bell on input queue full */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ /* * Output flags - software output processing */ #define OPOST 0x00000001 /* enable following output processing */ -#ifndef _POSIX_SOURCE #define ONLCR 0x00000002 /* map NL to CR-NL (ala CRMOD) */ +#ifndef _POSIX_C_SOURCE #define OXTABS 0x00000004 /* expand tabs to spaces */ #define ONOEOT 0x00000008 /* discard EOT's (^D) on output) */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ +/* + * The following block of features is unimplemented. Use of these flags in + * programs will currently result in unexpected behaviour. + * + * - Begin unimplemented features + */ +#define OCRNL 0x00000010 /* map CR to NL on output */ +#define ONOCR 0x00000020 /* no CR output at column 0 */ +#define ONLRET 0x00000040 /* NL performs CR function */ +#define OFILL 0x00000080 /* use fill characters for delay */ +#define NLDLY 0x00000300 /* \n delay */ +#define TABDLY 0x00000c00 /* horizontal tab delay */ +#define CRDLY 0x00003000 /* \r delay */ +#define FFDLY 0x00004000 /* form feed delay */ +#define BSDLY 0x00008000 /* \b delay */ +#define VTDLY 0x00010000 /* vertical tab delay */ +#define OFDEL 0x00020000 /* fill is DEL, else NUL */ +#if !defined(_SYS_IOCTL_COMPAT_H_) || defined(_POSIX_C_SOURCE) +/* + * These manifest constants have the same names as those in the header + * , so you are not permitted to have both definitions + * in scope simultaneously in the same compilation unit. Nevertheless, + * they are required to be in scope when _POSIX_C_SOURCE is requested; + * this means that including the header before this + * one whien _POSIX_C_SOURCE is in scope will result in redefintions. We + * attempt to maintain these as the same values so as to avoid this being + * an outright error in most compilers. + */ +#define NL0 0x00000000 +#define NL1 0x00000100 +#define NL2 0x00000200 +#define NL3 0x00000300 +#define TAB0 0x00000000 +#define TAB1 0x00000400 +#define TAB2 0x00000800 +#define TAB3 0x00000c00 +#define CR0 0x00000000 +#define CR1 0x00001000 +#define CR2 0x00002000 +#define CR3 0x00003000 +#define FF0 0x00000000 +#define FF1 0x00004000 +#define BS0 0x00000000 +#define BS1 0x00008000 +#define VT0 0x00000000 +#define VT1 0x00010000 +#endif /* !_SYS_IOCTL_COMPAT_H_ */ +/* + * + End unimplemented features + */ /* * Control flags - hardware control of terminal */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define CIGNORE 0x00000001 /* ignore control flags */ #endif #define CSIZE 0x00000300 /* character size mask */ @@ -153,7 +205,7 @@ #define PARODD 0x00002000 /* odd parity, else even */ #define HUPCL 0x00004000 /* hang up on last close */ #define CLOCAL 0x00008000 /* ignore modem status lines */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define CCTS_OFLOW 0x00010000 /* CTS flow control of output */ #define CRTSCTS (CCTS_OFLOW | CRTS_IFLOW) #define CRTS_IFLOW 0x00020000 /* RTS flow control of input */ @@ -172,30 +224,30 @@ * input flag. */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define ECHOKE 0x00000001 /* visual erase for line kill */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ #define ECHOE 0x00000002 /* visually erase chars */ #define ECHOK 0x00000004 /* echo NL after line kill */ #define ECHO 0x00000008 /* enable echoing */ #define ECHONL 0x00000010 /* echo NL even if ECHO is off */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define ECHOPRT 0x00000020 /* visual erase mode for hardcopy */ #define ECHOCTL 0x00000040 /* echo control chars as ^(Char) */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ #define ISIG 0x00000080 /* enable signals INTR, QUIT, [D]SUSP */ #define ICANON 0x00000100 /* canonicalize input lines */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define ALTWERASE 0x00000200 /* use alternate WERASE algorithm */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ #define IEXTEN 0x00000400 /* enable DISCARD and LNEXT */ #define EXTPROC 0x00000800 /* external processing */ #define TOSTOP 0x00400000 /* stop background jobs from output */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define FLUSHO 0x00800000 /* output being flushed (state) */ #define NOKERNINFO 0x02000000 /* no kernel output from VSTATUS */ #define PENDIN 0x20000000 /* XXX retype pending input (state) */ -#endif /*_POSIX_SOURCE */ +#endif /*_POSIX_C_SOURCE */ #define NOFLSH 0x80000000 /* don't flush after interrupt */ typedef unsigned long tcflag_t; @@ -212,13 +264,43 @@ struct termios { speed_t c_ospeed; /* output speed */ }; +#ifdef KERNEL +typedef unsigned long long user_tcflag_t; +typedef unsigned long long user_speed_t; + +/* + * LP64 version of struct termios. tcflag_t and speed_t are long and must + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with struct termios + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_termios { + user_tcflag_t c_iflag; /* input flags */ + user_tcflag_t c_oflag; /* output flags */ + user_tcflag_t c_cflag; /* control flags */ + user_tcflag_t c_lflag; /* local flags */ + cc_t c_cc[NCCS]; /* control chars */ + user_speed_t c_ispeed; /* input speed */ + user_speed_t c_ospeed; /* output speed */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL */ + /* * Commands passed to tcsetattr() for setting the termios structure. */ #define TCSANOW 0 /* make change immediate */ #define TCSADRAIN 1 /* drain output, then change */ #define TCSAFLUSH 2 /* drain output, flush input */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define TCSASOFT 0x10 /* flag - don't alter h.w. state */ #endif @@ -241,7 +323,7 @@ struct termios { #define B9600 9600 #define B19200 19200 #define B38400 38400 -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define B7200 7200 #define B14400 14400 #define B28800 28800 @@ -251,7 +333,7 @@ struct termios { #define B230400 230400 #define EXTA 19200 #define EXTB 38400 -#endif /* !_POSIX_SOURCE */ +#endif /* !_POSIX_C_SOURCE */ #ifndef KERNEL @@ -266,26 +348,26 @@ struct termios { #include __BEGIN_DECLS -speed_t cfgetispeed __P((const struct termios *)); -speed_t cfgetospeed __P((const struct termios *)); -int cfsetispeed __P((struct termios *, speed_t)); -int cfsetospeed __P((struct termios *, speed_t)); -int tcgetattr __P((int, struct termios *)); -int tcsetattr __P((int, int, const struct termios *)); -int tcdrain __P((int)); -int tcflow __P((int, int)); -int tcflush __P((int, int)); -int tcsendbreak __P((int, int)); +speed_t cfgetispeed(const struct termios *); +speed_t cfgetospeed(const struct termios *); +int cfsetispeed(struct termios *, speed_t); +int cfsetospeed(struct termios *, speed_t); +int tcgetattr(int, struct termios *); +int tcsetattr(int, int, const struct termios *); +int tcdrain(int); +int tcflow(int, int); +int tcflush(int, int); +int tcsendbreak(int, int); -#ifndef _POSIX_SOURCE -void cfmakeraw __P((struct termios *)); -int cfsetspeed __P((struct termios *, speed_t)); -#endif /* !_POSIX_SOURCE */ +#ifndef _POSIX_C_SOURCE +void cfmakeraw(struct termios *); +int cfsetspeed(struct termios *, speed_t); +#endif /* !_POSIX_C_SOURCE */ __END_DECLS #endif /* !KERNEL */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE /* * Include tty ioctl's that aren't just for backwards compatibility @@ -300,6 +382,6 @@ __END_DECLS */ #endif /* !_SYS_TERMIOS_H_ */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #include #endif diff --git a/bsd/sys/time.h b/bsd/sys/time.h index 49bdecbbc..a7791b3c3 100644 --- a/bsd/sys/time.h +++ b/bsd/sys/time.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,27 +58,145 @@ #ifndef _SYS_TIME_H_ #define _SYS_TIME_H_ -#include -#include +#include +#include + +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif + +#ifndef _SUSECONDS_T +#define _SUSECONDS_T +typedef __darwin_suseconds_t suseconds_t; +#endif + /* * Structure returned by gettimeofday(2) system call, * and used in other calls. */ +#ifndef _TIMEVAL +#define _TIMEVAL struct timeval { - int32_t tv_sec; /* seconds */ - int32_t tv_usec; /* and microseconds */ + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* and microseconds */ +}; +#endif /* _TIMEVAL */ + +/* + * Structure used as a parameter by getitimer(2) and setitimer(2) system + * calls. + */ +struct itimerval { + struct timeval it_interval; /* timer interval */ + struct timeval it_value; /* current value */ }; +/* + * Names of the interval timers, and structure + * defining a timer setting. + */ +#define ITIMER_REAL 0 +#define ITIMER_VIRTUAL 1 +#define ITIMER_PROF 2 + + +/* + * [XSI] The fd_set type shall be defined as described in . + * + * Note: We use _FD_SET to protect all select related + * types and macros + */ +#ifndef _FD_SET +#define _FD_SET + +/* + * Select uses bit masks of file descriptors in longs. These macros + * manipulate such bit fields (the filesystem macros use chars). The + * extra protection here is to permit application redefinition above + * the default size. + */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 1024 +#endif + +#define __DARWIN_NBBY 8 /* bits in a byte */ +#define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ +#define __DARWIN_howmany(x, y) (((x) + ((y) - 1)) / (y)) /* # y's == x bits? */ + +__BEGIN_DECLS +typedef struct fd_set { + __int32_t fds_bits[__DARWIN_howmany(FD_SETSIZE, __DARWIN_NFDBITS)]; +} fd_set; +__END_DECLS + +#define FD_SET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] |= (1<<((n) % __DARWIN_NFDBITS))) +#define FD_CLR(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] &= ~(1<<((n) % __DARWIN_NFDBITS))) +#define FD_ISSET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] & (1<<((n) % __DARWIN_NFDBITS))) +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 +/* + * Use the built-in bzero function instead of the library version so that + * we do not pollute the namespace or introduce prototype warnings. + */ +#define FD_ZERO(p) __builtin_bzero(p, sizeof(*(p))) +#else +#define FD_ZERO(p) bzero(p, sizeof(*(p))) +#endif +#ifndef _POSIX_C_SOURCE +#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#endif /* !_POSIX_C_SOURCE */ + +#endif /* !_FD_SET */ + + +#ifndef _POSIX_C_SOURCE /* * Structure defined by POSIX.4 to be like a timeval. */ -#ifndef _TIMESPEC_DECLARED -#define _TIMESPEC_DECLARED +#ifndef _TIMESPEC +#define _TIMESPEC struct timespec { time_t tv_sec; /* seconds */ + long tv_nsec; /* and nanoseconds */ +}; + +#ifdef KERNEL +// LP64todo - should this move? +#include /* user_time_t */ + +/* LP64 version of struct timeval. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timeval + */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_timeval { + user_time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* and microseconds */ +}; + +struct user_itimerval { + struct user_timeval it_interval; /* timer interval */ + struct user_timeval it_value; /* current value */ +}; + +/* LP64 version of struct timespec. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timespec + */ +struct user_timespec { + user_time_t tv_sec; /* seconds */ int32_t tv_nsec; /* and nanoseconds */ }; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif // KERNEL #endif #define TIMEVAL_TO_TIMESPEC(tv, ts) { \ @@ -102,8 +220,6 @@ struct timezone { #define DST_EET 5 /* Eastern European dst */ #define DST_CAN 6 /* Canada */ -#define time_second time.tv_sec - /* Operations on timevals. */ #define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0 #define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) @@ -132,19 +248,6 @@ struct timezone { #define timevalcmp(l, r, cmp) timercmp(l, r, cmp) /* freebsd */ -/* - * Names of the interval timers, and structure - * defining a timer setting. - */ -#define ITIMER_REAL 0 -#define ITIMER_VIRTUAL 1 -#define ITIMER_PROF 2 - -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ -}; - /* * Getkerninfo clock information structure */ @@ -155,39 +258,56 @@ struct clockinfo { int stathz; /* statistics clock frequency */ int profhz; /* profiling clock frequency */ }; +#endif /* ! _POSIX_C_SOURCE */ -#include #ifdef KERNEL -void microtime __P((struct timeval *tv)); -void microuptime __P((struct timeval *tv)); + +#ifndef _POSIX_C_SOURCE +__BEGIN_DECLS +void microtime(struct timeval *tv); +void microuptime(struct timeval *tv); #define getmicrotime(a) microtime(a) #define getmicrouptime(a) microuptime(a) -void nanotime __P((struct timespec *ts)); -void nanouptime __P((struct timespec *ts)); +void nanotime(struct timespec *ts); +void nanouptime(struct timespec *ts); #define getnanotime(a) nanotime(a) #define getnanouptime(a) nanouptime(a) -#ifdef __APPLE_API_PRIVATE -int itimerfix __P((struct timeval *tv)); -int itimerdecr __P((struct itimerval *itp, int usec)); -#endif /* __APPLE_API_PRIVATE */ +void timevaladd(struct timeval *t1, struct timeval *t2); +void timevalsub(struct timeval *t1, struct timeval *t2); +void timevalfix(struct timeval *t1); +#ifdef BSD_KERNEL_PRIVATE +time_t boottime_sec(void); +void inittodr(time_t base); +int itimerfix(struct timeval *tv); +int itimerdecr(struct itimerval *itp, int usec); +#endif /* BSD_KERNEL_PRIVATE */ + +__END_DECLS + +#endif /* ! _POSIX_C_SOURCE */ #else /* !KERNEL */ + +__BEGIN_DECLS + +#ifndef _POSIX_C_SOURCE #include -#ifndef _POSIX_SOURCE -#include +int adjtime(const struct timeval *, struct timeval *); +int futimes(int, const struct timeval *); +int settimeofday(const struct timeval *, const struct timezone *); +#endif /* ! _POSIX_C_SOURCE */ + +int getitimer(int, struct itimerval *); +int gettimeofday(struct timeval * __restrict, struct timezone * __restrict); +int select(int, fd_set * __restrict, fd_set * __restrict, + fd_set * __restrict, struct timeval * __restrict); +int setitimer(int, const struct itimerval * __restrict, + struct itimerval * __restrict); +int utimes(const char *, const struct timeval *); -__BEGIN_DECLS -int adjtime __P((const struct timeval *, struct timeval *)); -int futimes __P((int, const struct timeval *)); -int getitimer __P((int, struct itimerval *)); -int gettimeofday __P((struct timeval *, struct timezone *)); -int setitimer __P((int, const struct itimerval *, struct itimerval *)); -int settimeofday __P((const struct timeval *, const struct timezone *)); -int utimes __P((const char *, const struct timeval *)); __END_DECLS -#endif /* !POSIX */ #endif /* !KERNEL */ diff --git a/bsd/sys/timeb.h b/bsd/sys/timeb.h index 9277d37d7..604a4fabd 100644 --- a/bsd/sys/timeb.h +++ b/bsd/sys/timeb.h @@ -64,17 +64,30 @@ #define _SYS_TIMEB_H_ #include +#include +#include -#ifdef __APPLE_API_OBSOLETE +/* [XSI] The time_t type shall be defined as described in */ +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif -/* The ftime(2) system call structure -- deprecated. */ +/* + * [XSI] Structure whose address is passed as the first parameter to ftime() + */ struct timeb { - time_t time; /* seconds since the Epoch */ - unsigned short millitm; /* + milliseconds since the Epoch */ - short timezone; /* minutes west of CUT */ - short dstflag; /* DST == non-zero */ + time_t time; /* [XSI] Seconds since the Epoch */ + unsigned short millitm; /* [XSI] Milliseconds since the Epoch */ + short timezone; /* [XSI] Minutes west of CUT */ + short dstflag; /* [XSI] non-zero if DST in effect */ }; -#endif /* __APPLE_API_OBSOLETE */ +#ifndef KERNEL +__BEGIN_DECLS +/* [XSI] Legacy interface */ +int ftime(struct timeb *); +__END_DECLS +#endif /* !KERNEL */ #endif /* !_SYS_TIMEB_H_ */ diff --git a/bsd/sys/times.h b/bsd/sys/times.h index 01d0a3734..0cd072e49 100644 --- a/bsd/sys/times.h +++ b/bsd/sys/times.h @@ -63,25 +63,29 @@ #ifndef _SYS_TIMES_H_ #define _SYS_TIMES_H_ -#include +#include +#include +#include -#ifndef _BSD_CLOCK_T_DEFINED_ -#define _BSD_CLOCK_T_DEFINED_ -typedef _BSD_CLOCK_T_ clock_t; +/* [XSI] The clock_t type shall be defined as described in */ +#ifndef _CLOCK_T +#define _CLOCK_T +typedef __darwin_clock_t clock_t; #endif +/* + * [XSI] Structure whose address is passed as the first parameter to times() + */ struct tms { - clock_t tms_utime; /* User CPU time */ - clock_t tms_stime; /* System CPU time */ - clock_t tms_cutime; /* User CPU time of terminated child procs */ - clock_t tms_cstime; /* System CPU time of terminated child procs */ + clock_t tms_utime; /* [XSI] User CPU time */ + clock_t tms_stime; /* [XSI] System CPU time */ + clock_t tms_cutime; /* [XSI] Terminated children user CPU time */ + clock_t tms_cstime; /* [XSI] Terminated children System CPU time */ }; #ifndef KERNEL -#include - __BEGIN_DECLS -clock_t times __P((struct tms *)); +clock_t times(struct tms *); __END_DECLS #endif #endif /* !_SYS_TIMES_H_ */ diff --git a/bsd/sys/tprintf.h b/bsd/sys/tprintf.h index 8eaa93748..16f35aa38 100644 --- a/bsd/sys/tprintf.h +++ b/bsd/sys/tprintf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,15 +59,18 @@ #define _SYS_TPRINTF_H_ #include +#include #ifdef __APPLE_API_UNSTABLE typedef struct session *tpr_t; -tpr_t tprintf_open __P((struct proc *)); -void tprintf_close __P((tpr_t)); +__BEGIN_DECLS +tpr_t tprintf_open(struct proc *); +void tprintf_close(tpr_t); +void tprintf(tpr_t, const char *fmt, ...); +__END_DECLS -void tprintf __P((tpr_t, const char *fmt, ...)); #endif /* __APPLE_API_UNSTABLE */ #endif /* !_SYS_TPRINTF_H_ */ diff --git a/bsd/sys/trace.h b/bsd/sys/trace.h index c77ef1ca8..04d279aa5 100644 --- a/bsd/sys/trace.h +++ b/bsd/sys/trace.h @@ -133,7 +133,7 @@ extern struct proc *traceproc; extern int tracewhich, tracebuf[TRCSIZ]; extern u_int tracex; extern char traceflags[TR_NFLAGS]; -#define pack(v,b) (((v)->v_mount->mnt_stat.f_fsid.val[0])<<16)|(b) +#define pack(v,b) (((v)->v_mount->mnt_vfsstat.f_fsid.val[0])<<16)|(b) #define trace(a,b,c) { \ if (traceflags[a]) \ trace1(a,b,c); \ diff --git a/bsd/sys/tty.h b/bsd/sys/tty.h index c24e3083b..b74c3f510 100644 --- a/bsd/sys/tty.h +++ b/bsd/sys/tty.h @@ -68,22 +68,8 @@ #include #include /* For struct selinfo. */ -#ifdef __APPLE_API_UNSTABLE -#ifndef __APPLE__ -/* - * Clists are character lists, which is a variable length linked list - * of cblocks, with a count of the number of characters in the list. - */ -struct clist { - int c_cc; /* Number of characters in the clist. */ - int c_cbcount; /* Number of cblocks. */ - int c_cbmax; /* Max # cblocks allowed for this clist. */ - int c_cbreserved; /* # cblocks reserved for this clist. */ - char *c_cf; /* Pointer to the first cblock. */ - char *c_cl; /* Pointer to the last cblock. */ -}; -#else /* __APPLE__ */ +#ifdef KERNEL /* * NetBSD Clists are actually ring buffers. The c_cc, c_cf, c_cl fields have * exactly the same behaviour as in true clists. @@ -106,7 +92,6 @@ struct clist { #define TTYCLSIZE 1024 #endif -#endif /* __APPLE__ */ /* * Per-tty structure. @@ -134,11 +119,11 @@ struct tty { struct termios t_termios; /* Termios state. */ struct winsize t_winsize; /* Window size. */ /* Start output. */ - void (*t_oproc) __P((struct tty *)); + void (*t_oproc)(struct tty *); /* Stop output. */ - void (*t_stop) __P((struct tty *, int)); + void (*t_stop)(struct tty *, int); /* Set hardware state. */ - int (*t_param) __P((struct tty *, struct termios *)); + int (*t_param)(struct tty *, struct termios *); void *t_sc; /* XXX: net/if_sl.c:sl_softc. */ int t_column; /* Tty output column. */ int t_rocount, t_rocol; /* Tty. */ @@ -173,11 +158,13 @@ struct tty { #define TTYHOG 1024 #endif -#ifdef KERNEL #define TTMAXHIWAT roundup(2048, CBSIZE) #define TTMINHIWAT roundup(100, CBSIZE) #define TTMAXLOWAT 256 #define TTMINLOWAT 32 +#else +struct tty; +struct clist; #endif /* KERNEL */ /* These flags are kept in t_state. */ @@ -247,6 +234,7 @@ struct speedtab { #define TTY_OE 0x04000000 /* Overrun error */ #define TTY_BI 0x08000000 /* Break condition */ +#ifdef KERNEL /* Is tp controlling terminal for p? */ #define isctty(p, tp) \ ((p)->p_session == (tp)->t_session && (p)->p_flag & P_CONTROLT) @@ -265,87 +253,63 @@ struct speedtab { #define TSA_PTS_READ(tp) ((void *)&(tp)->t_canq) -#ifdef KERNEL __BEGIN_DECLS -#ifndef __APPLE__ -extern struct tty *constty; /* Temporary virtual console. */ - -int b_to_q __P((char *cp, int cc, struct clist *q)); -void catq __P((struct clist *from, struct clist *to)); -void clist_alloc_cblocks __P((struct clist *q, int ccmax, int ccres)); -void clist_free_cblocks __P((struct clist *q)); -/* void clist_init __P((void)); */ /* defined in systm.h for main() */ -int getc __P((struct clist *q)); -void ndflush __P((struct clist *q, int cc)); -int ndqb __P((struct clist *q, int flag)); -char *nextc __P((struct clist *q, char *cp, int *c)); -int putc __P((int c, struct clist *q)); -int q_to_b __P((struct clist *q, char *cp, int cc)); -int unputc __P((struct clist *q)); - -int ttcompat __P((struct tty *tp, int com, caddr_t data, int flag)); -int ttsetcompat __P((struct tty *tp, int *com, caddr_t data, struct termios *term)); -#else /* __APPLE__ */ -int b_to_q __P((u_char *cp, int cc, struct clist *q)); -void catq __P((struct clist *from, struct clist *to)); -void clist_init __P((void)); -int getc __P((struct clist *q)); -void ndflush __P((struct clist *q, int cc)); -int ndqb __P((struct clist *q, int flag)); -u_char *firstc __P((struct clist *clp, int *c)); -u_char *nextc __P((struct clist *q, u_char *cp, int *c)); -int putc __P((int c, struct clist *q)); -int q_to_b __P((struct clist *q, u_char *cp, int cc)); -int unputc __P((struct clist *q)); -int clalloc __P((struct clist *clp, int size, int quot)); -void clfree __P((struct clist *clp)); +int b_to_q(const u_char *cp, int cc, struct clist *q); +void catq(struct clist *from, struct clist *to); +void clist_init(void); +int getc(struct clist *q); +void ndflush(struct clist *q, int cc); +int ndqb(struct clist *q, int flag); +u_char *firstc (struct clist *clp, int *c); +u_char *nextc(struct clist *q, u_char *cp, int *c); +int putc(int c, struct clist *q); +int q_to_b(struct clist *q, u_char *cp, int cc); +int unputc(struct clist *q); +int clalloc(struct clist *clp, int size, int quot); +void clfree(struct clist *clp); +void cinit(void); +void clrbits(u_char *cp, int off, int len); #ifdef KERNEL_PRIVATE -int ttcompat __P((struct tty *tp, u_long com, caddr_t data, int flag, - struct proc *p)); -int ttsetcompat __P((struct tty *tp, u_long *com, caddr_t data, struct termios *term)); +int ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, + struct proc *p); +int ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term); #endif /* KERNEL_PRIVATE */ -#endif /* __APPLE__ */ -void termioschars __P((struct termios *t)); -int tputchar __P((int c, struct tty *tp)); -#ifndef __APPLE__ -int ttioctl __P((struct tty *tp, int com, void *data, int flag)); -#else -int ttioctl __P((struct tty *tp, u_long com, caddr_t data, int flag, - struct proc *p)); -#endif -int ttread __P((struct tty *tp, struct uio *uio, int flag)); -void ttrstrt __P((void *tp)); -int ttyselect __P((struct tty *tp, int rw, void * wql, struct proc *p)); -int ttselect __P((dev_t dev, int rw, void * wql, struct proc *p)); -void ttsetwater __P((struct tty *tp)); -int ttspeedtab __P((int speed, struct speedtab *table)); -int ttstart __P((struct tty *tp)); -void ttwakeup __P((struct tty *tp)); -int ttwrite __P((struct tty *tp, struct uio *uio, int flag)); -void ttwwakeup __P((struct tty *tp)); -void ttyblock __P((struct tty *tp)); -void ttychars __P((struct tty *tp)); -int ttycheckoutq __P((struct tty *tp, int wait)); -int ttyclose __P((struct tty *tp)); -void ttyflush __P((struct tty *tp, int rw)); -void ttyinfo __P((struct tty *tp)); -int ttyinput __P((int c, struct tty *tp)); -int ttylclose __P((struct tty *tp, int flag)); -int ttymodem __P((struct tty *tp, int flag)); -int ttyopen __P((dev_t device, struct tty *tp)); -int ttysleep __P((struct tty *tp, - void *chan, int pri, char *wmesg, int timeout)); -int ttywait __P((struct tty *tp)); -struct tty *ttymalloc __P((void)); -void ttyfree __P((struct tty *)); +void termioschars(struct termios *t); +int tputchar(int c, struct tty *tp); +int ttioctl(struct tty *tp, u_long com, caddr_t data, int flag, + struct proc *p); +int ttread(struct tty *tp, struct uio *uio, int flag); +void ttrstrt(void *tp); +int ttyselect(struct tty *tp, int rw, void * wql, struct proc *p); +int ttselect(dev_t dev, int rw, void * wql, struct proc *p); +void ttsetwater(struct tty *tp); +int ttspeedtab(int speed, struct speedtab *table); +int ttstart(struct tty *tp); +void ttwakeup(struct tty *tp); +int ttwrite(struct tty *tp, struct uio *uio, int flag); +void ttwwakeup(struct tty *tp); +void ttyblock(struct tty *tp); +void ttychars(struct tty *tp); +int ttycheckoutq(struct tty *tp, int wait); +int ttyclose(struct tty *tp); +void ttyflush(struct tty *tp, int rw); +void ttyinfo(struct tty *tp); +int ttyinput(int c, struct tty *tp); +int ttylclose(struct tty *tp, int flag); +int ttymodem(struct tty *tp, int flag); +int ttyopen(dev_t device, struct tty *tp); +int ttysleep(struct tty *tp, + void *chan, int pri, const char *wmesg, int timeout); +int ttywait(struct tty *tp); +struct tty *ttymalloc(void); +void ttyfree(struct tty *); __END_DECLS #endif /* KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ #endif /* !_SYS_TTY_H_ */ diff --git a/bsd/sys/ttycom.h b/bsd/sys/ttycom.h index 5baac4b80..b7c4d8b3c 100644 --- a/bsd/sys/ttycom.h +++ b/bsd/sys/ttycom.h @@ -104,6 +104,12 @@ struct winsize { #define TIOCSETA _IOW('t', 20, struct termios) /* set termios struct */ #define TIOCSETAW _IOW('t', 21, struct termios) /* drain output, set */ #define TIOCSETAF _IOW('t', 22, struct termios) /* drn out, fls in, set */ +#ifdef KERNEL +#define TIOCGETA_64 _IOR('t', 19, struct user_termios) +#define TIOCSETA_64 _IOW('t', 20, struct user_termios) +#define TIOCSETAW_64 _IOW('t', 21, struct user_termios) +#define TIOCSETAF_64 _IOW('t', 22, struct user_termios) +#endif /* KERNEL */ #define TIOCGETD _IOR('t', 26, int) /* get line discipline */ #define TIOCSETD _IOW('t', 27, int) /* set line discipline */ /* 127-124 compat */ diff --git a/bsd/sys/ttydefaults.h b/bsd/sys/ttydefaults.h index b11fbb6d3..078882c79 100644 --- a/bsd/sys/ttydefaults.h +++ b/bsd/sys/ttydefaults.h @@ -83,7 +83,7 @@ #define CEOL 0xff /* XXX avoid _POSIX_VDISABLE */ #define CERASE 0177 #define CINTR CTRL('c') -#define CSTATUS 0xff /* XXX avoid _POSIX_VDISABLE */ +#define CSTATUS CTRL('t') #define CKILL CTRL('u') #define CMIN 1 #define CQUIT 034 /* FS, ^\ */ diff --git a/bsd/sys/types.h b/bsd/sys/types.h index 4a05f931d..d9f9d810a 100644 --- a/bsd/sys/types.h +++ b/bsd/sys/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,96 +70,205 @@ /* Machine type dependent parameters. */ #include +#include -#include #include -#ifndef _POSIX_SOURCE -typedef unsigned char u_char; -typedef unsigned short u_short; -typedef unsigned int u_int; -typedef unsigned long u_long; -typedef unsigned short ushort; /* Sys V compatibility */ -typedef unsigned int uint; /* Sys V compatibility */ -#endif - -typedef u_int64_t u_quad_t; /* quads */ -typedef int64_t quad_t; -typedef quad_t * qaddr_t; - -typedef char * caddr_t; /* core address */ -typedef int32_t daddr_t; /* disk address */ -typedef int32_t dev_t; /* device number */ -typedef u_int32_t fixpt_t; /* fixed point number */ -typedef u_int32_t gid_t; /* group id */ -typedef u_int32_t in_addr_t; /* base type for internet address */ -typedef u_int16_t in_port_t; -typedef u_int32_t ino_t; /* inode number */ -typedef long key_t; /* IPC key (for Sys V IPC) */ -typedef u_int16_t mode_t; /* permissions */ -typedef u_int16_t nlink_t; /* link count */ -typedef quad_t off_t; /* file offset */ -typedef int32_t pid_t; /* process id */ -typedef quad_t rlim_t; /* resource limit */ -typedef int32_t segsz_t; /* segment size */ -typedef int32_t swblk_t; /* swap offset */ -typedef u_int32_t uid_t; /* user id */ -typedef u_int32_t useconds_t; /* microseconds (unsigned) */ - -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE +typedef unsigned char u_char; +typedef unsigned short u_short; +typedef unsigned int u_int; +#ifndef _U_LONG +typedef unsigned long u_long; +#define _U_LONG +#endif +typedef unsigned short ushort; /* Sys V compatibility */ +typedef unsigned int uint; /* Sys V compatibility */ +#endif + +typedef u_int64_t u_quad_t; /* quads */ +typedef int64_t quad_t; +typedef quad_t * qaddr_t; + +typedef char * caddr_t; /* core address */ +typedef int32_t daddr_t; /* disk address */ + +#ifndef _DEV_T +typedef __darwin_dev_t dev_t; /* device number */ +#define _DEV_T +#endif + +typedef u_int32_t fixpt_t; /* fixed point number */ + +#ifndef _BLKCNT_T +typedef __darwin_blkcnt_t blkcnt_t; +#define _BLKCNT_T +#endif + +#ifndef _BLKSIZE_T +typedef __darwin_blksize_t blksize_t; +#define _BLKSIZE_T +#endif + +#ifndef _GID_T +typedef __darwin_gid_t gid_t; +#define _GID_T +#endif + +#ifndef _IN_ADDR_T +#define _IN_ADDR_T +typedef __uint32_t in_addr_t; /* base type for internet address */ +#endif + +#ifndef _IN_PORT_T +#define _IN_PORT_T +typedef __uint16_t in_port_t; +#endif + +#ifndef _INO_T +typedef __darwin_ino_t ino_t; /* inode number */ +#define _INO_T +#endif + +#ifndef _KEY_T +#define _KEY_T +typedef __int32_t key_t; /* IPC key (for Sys V IPC) */ +#endif + +#ifndef _MODE_T +typedef __darwin_mode_t mode_t; +#define _MODE_T +#endif + +#ifndef _NLINK_T +typedef __uint16_t nlink_t; /* link count */ +#define _NLINK_T +#endif + +#ifndef _ID_T +#define _ID_T +typedef __darwin_id_t id_t; /* can hold pid_t, gid_t, or uid_t */ +#endif + +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T +#endif + +#ifndef _OFF_T +typedef __darwin_off_t off_t; +#define _OFF_T +#endif + +typedef int32_t segsz_t; /* segment size */ +typedef int32_t swblk_t; /* swap offset */ + +#ifndef _UID_T +typedef __darwin_uid_t uid_t; /* user id */ +#define _UID_T +#endif + +#ifndef _ID_T +typedef __darwin_id_t id_t; +#define _ID_T +#endif + +#ifndef _POSIX_C_SOURCE /* Major, minor numbers, dev_t's. */ #define major(x) ((int32_t)(((u_int32_t)(x) >> 24) & 0xff)) #define minor(x) ((int32_t)((x) & 0xffffff)) #define makedev(x,y) ((dev_t)(((x) << 24) | (y))) #endif -#ifndef _BSD_CLOCK_T_DEFINED_ -#define _BSD_CLOCK_T_DEFINED_ -typedef _BSD_CLOCK_T_ clock_t; +#ifndef _CLOCK_T +#define _CLOCK_T +typedef __darwin_clock_t clock_t; #endif -#ifndef _BSD_SIZE_T_DEFINED_ -#define _BSD_SIZE_T_DEFINED_ -typedef _BSD_SIZE_T_ size_t; +#ifndef _SIZE_T +#define _SIZE_T +/* DO NOT REMOVE THIS COMMENT: fixincludes needs to see + * _GCC_SIZE_T */ +typedef __darwin_size_t size_t; #endif -#ifndef _BSD_SSIZE_T_DEFINED_ -#define _BSD_SSIZE_T_DEFINED_ -typedef _BSD_SSIZE_T_ ssize_t; +#ifndef _SSIZE_T +#define _SSIZE_T +typedef __darwin_ssize_t ssize_t; #endif -#ifndef _BSD_TIME_T_DEFINED_ -#define _BSD_TIME_T_DEFINED_ -typedef _BSD_TIME_T_ time_t; +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; #endif -#ifndef _POSIX_SOURCE -#define NBBY 8 /* number of bits in a byte */ +#ifndef _USECONDS_T +#define _USECONDS_T +typedef __darwin_useconds_t useconds_t; +#endif + +#ifndef _SUSECONDS_T +#define _SUSECONDS_T +typedef __darwin_suseconds_t suseconds_t; +#endif + +#ifndef _POSIX_C_SOURCE +/* + * This code is present here in order to maintain historical backward + * compatability, and is intended to be removed at some point in the + * future; please include instead. + */ +#define NBBY 8 /* bits in a byte */ +#define NFDBITS (sizeof(__int32_t) * NBBY) /* bits per mask */ +#define howmany(x, y) (((x) + ((y) - 1)) / (y)) /* # y's == x bits? */ +typedef __int32_t fd_mask; + + +/* + * Note: We use _FD_SET to protect all select related + * types and macros + */ +#ifndef _FD_SET +#define _FD_SET /* * Select uses bit masks of file descriptors in longs. These macros - * manipulate such bit fields (the filesystem macros use chars). + * manipulate such bit fields (the filesystem macros use chars). The + * extra protection here is to permit application redefinition above + * the default size. */ #ifndef FD_SETSIZE #define FD_SETSIZE 1024 #endif -typedef int32_t fd_mask; -#define NFDBITS (sizeof(fd_mask) * NBBY) /* bits per mask */ - -#ifndef howmany -#define howmany(x, y) (((x) + ((y) - 1)) / (y)) -#endif +#define __DARWIN_NBBY 8 /* bits in a byte */ +#define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ +#define __DARWIN_howmany(x, y) (((x) + ((y) - 1)) / (y)) /* # y's == x bits? */ +__BEGIN_DECLS typedef struct fd_set { - fd_mask fds_bits[howmany(FD_SETSIZE, NFDBITS)]; + __int32_t fds_bits[__DARWIN_howmany(FD_SETSIZE, __DARWIN_NFDBITS)]; } fd_set; +__END_DECLS -#define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS))) -#define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS))) -#define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS))) -#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#define FD_SET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] |= (1<<((n) % __DARWIN_NFDBITS))) +#define FD_CLR(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] &= ~(1<<((n) % __DARWIN_NFDBITS))) +#define FD_ISSET(n, p) ((p)->fds_bits[(n)/__DARWIN_NFDBITS] & (1<<((n) % __DARWIN_NFDBITS))) +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 +/* + * Use the built-in bzero function instead of the library version so that + * we do not pollute the namespace or introduce prototype warnings. + */ +#define FD_ZERO(p) __builtin_bzero(p, sizeof(*(p))) +#else #define FD_ZERO(p) bzero(p, sizeof(*(p))) +#endif +#ifndef _POSIX_C_SOURCE +#define FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#endif /* !_POSIX_C_SOURCE */ + +#endif /* !_FD_SET */ + #if defined(__STDC__) && defined(KERNEL) /* @@ -177,49 +286,64 @@ struct tty; struct uio; #endif -#endif /* !_POSIX_SOURCE */ +#endif /* !_POSIX_C_SOURCE */ #endif /* __ASSEMBLER__ */ -struct _pthread_handler_rec -{ - void (*routine)(void *); /* Routine to call */ - void *arg; /* Argument to pass */ - struct _pthread_handler_rec *next; -}; - #ifndef __POSIX_LIB__ -#define __PTHREAD_SIZE__ 596 -#define __PTHREAD_ATTR_SIZE__ 36 -#define __PTHREAD_MUTEXATTR_SIZE__ 8 -#define __PTHREAD_MUTEX_SIZE__ 40 -#define __PTHREAD_CONDATTR_SIZE__ 4 -#define __PTHREAD_COND_SIZE__ 24 -#define __PTHREAD_ONCE_SIZE__ 4 -#define __PTHREAD_RWLOCK_SIZE__ 124 -#define __PTHREAD_RWLOCKATTR_SIZE__ 12 - - -typedef struct _opaque_pthread_t { long sig; struct _pthread_handler_rec *cleanup_stack; char opaque[__PTHREAD_SIZE__];} *pthread_t; - -typedef struct _opaque_pthread_attr_t { long sig; char opaque[__PTHREAD_ATTR_SIZE__]; } pthread_attr_t; - -typedef struct _opaque_pthread_mutexattr_t { long sig; char opaque[__PTHREAD_MUTEXATTR_SIZE__]; } pthread_mutexattr_t; - -typedef struct _opaque_pthread_mutex_t { long sig; char opaque[__PTHREAD_MUTEX_SIZE__]; } pthread_mutex_t; - -typedef struct _opaque_pthread_condattr_t { long sig; char opaque[__PTHREAD_CONDATTR_SIZE__]; } pthread_condattr_t; - -typedef struct _opaque_pthread_cond_t { long sig; char opaque[__PTHREAD_COND_SIZE__]; } pthread_cond_t; - -typedef struct _opaque_pthread_rwlockattr_t { long sig; char opaque[__PTHREAD_RWLOCKATTR_SIZE__]; } pthread_rwlockattr_t; +#ifndef _PTHREAD_ATTR_T +#define _PTHREAD_ATTR_T +typedef __darwin_pthread_attr_t pthread_attr_t; +#endif +#ifndef _PTHREAD_COND_T +#define _PTHREAD_COND_T +typedef __darwin_pthread_cond_t pthread_cond_t; +#endif +#ifndef _PTHREAD_CONDATTR_T +#define _PTHREAD_CONDATTR_T +typedef __darwin_pthread_condattr_t pthread_condattr_t; +#endif +#ifndef _PTHREAD_MUTEX_T +#define _PTHREAD_MUTEX_T +typedef __darwin_pthread_mutex_t pthread_mutex_t; +#endif +#ifndef _PTHREAD_MUTEXATTR_T +#define _PTHREAD_MUTEXATTR_T +typedef __darwin_pthread_mutexattr_t pthread_mutexattr_t; +#endif +#ifndef _PTHREAD_ONCE_T +#define _PTHREAD_ONCE_T +typedef __darwin_pthread_once_t pthread_once_t; +#endif +#ifndef _PTHREAD_RWLOCK_T +#define _PTHREAD_RWLOCK_T +typedef __darwin_pthread_rwlock_t pthread_rwlock_t; +#endif +#ifndef _PTHREAD_RWLOCKATTR_T +#define _PTHREAD_RWLOCKATTR_T +typedef __darwin_pthread_rwlockattr_t pthread_rwlockattr_t; +#endif +#ifndef _PTHREAD_T +#define _PTHREAD_T +typedef __darwin_pthread_t pthread_t; +#endif -typedef struct _opaque_pthread_rwlock_t { long sig; char opaque[__PTHREAD_RWLOCK_SIZE__]; } pthread_rwlock_t; +#endif /* __POSIX_LIB__ */ -typedef struct { long sig; char opaque[__PTHREAD_ONCE_SIZE__]; } pthread_once_t; +#ifndef _PTHREAD_KEY_T +#define _PTHREAD_KEY_T +typedef __darwin_pthread_key_t pthread_key_t; +#endif -#endif /* __POSIX_LIB__ */ +/* statvfs and fstatvfs */ +#ifndef _FSBLKCNT_T +#define _FSBLKCNT_T +typedef __darwin_fsblkcnt_t fsblkcnt_t; +#endif -typedef unsigned long pthread_key_t; /* Opaque 'pointer' */ +#ifndef _FSFILCNT_T +#define _FSFILCNT_T +typedef __darwin_fsfilcnt_t fsfilcnt_t; +#endif #endif /* !_SYS_TYPES_H_ */ diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h index 0e401f6f7..46be17aa7 100644 --- a/bsd/sys/ubc.h +++ b/bsd/sys/ubc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,10 +20,6 @@ * @APPLE_LICENSE_HEADER_END@ */ /* - * File: ubc.h - * Author: Umesh Vaishampayan [umeshv@apple.com] - * 05-Aug-1999 umeshv Created. - * * Header file for Unified Buffer Cache. * */ @@ -32,141 +28,58 @@ #define _SYS_UBC_H_ #include -#include -#include -#include - #include - +#include +#include #include -#define UBC_INFO_NULL ((struct ubc_info *) 0) -#define UBC_NOINFO ((struct ubc_info *)0xDEADD1ED) -#ifdef __APPLE_API_PRIVATE -extern struct zone *ubc_info_zone; +/* defns for ubc_sync_range() and ubc_msync */ -/* - * The following data structure keeps the information to associate - * a vnode to the correspondig VM objects. - */ - -struct ubc_info { - memory_object_t ui_pager; /* pager */ - memory_object_control_t ui_control; /* VM control for the pager */ - long ui_flags; /* flags */ - struct vnode *ui_vnode; /* The vnode for this ubc_info */ - struct ucred *ui_ucred; /* holds credentials for NFS paging */ - int ui_refcount;/* ref count on the ubc_info */ - off_t ui_size; /* file size for the vnode */ - long ui_mapped; /* is it currently mapped */ - void *ui_owner; /* for recursive ubc_busy */ -}; - -/* Defines for ui_flags */ -#define UI_NONE 0x00000000 /* none */ -#define UI_HASPAGER 0x00000001 /* has a pager associated */ -#define UI_INITED 0x00000002 /* newly initialized vnode */ -#define UI_HASOBJREF 0x00000004 /* hold a reference on object */ -#define UI_WASMAPPED 0x00000008 /* vnode was mapped */ -#define UI_DONTCACHE 0x00000010 /* do not cache object */ -#define UI_BUSY 0x00000020 /* for VM synchronization */ -#define UI_WANTED 0x00000040 /* for VM synchronization */ - -#endif /* __APPLE_API_PRIVATE */ - -#ifdef __APPLE_API_EVOLVING -/* - * exported primitives for loadable file systems. - */ +#define UBC_PUSHDIRTY 0x01 /* clean any dirty pages in the specified range to the backing store */ +#define UBC_PUSHALL 0x02 /* push both dirty and precious pages to the backing store */ +#define UBC_INVALIDATE 0x04 /* invalidate pages in the specified range... may be used with UBC_PUSHDIRTY/ALL */ +#define UBC_SYNC 0x08 /* wait for I/Os generated by UBC_PUSHDIRTY to complete */ __BEGIN_DECLS -int ubc_info_init __P((struct vnode *)); -void ubc_info_deallocate __P((struct ubc_info *)); -int ubc_setsize __P((struct vnode *, off_t)); -off_t ubc_getsize __P((struct vnode *)); -int ubc_uncache __P((struct vnode *)); -int ubc_umount __P((struct mount *)); -void ubc_unmountall __P(()); -int ubc_setcred __P((struct vnode *, struct proc *)); -struct ucred *ubc_getcred __P((struct vnode *)); -memory_object_t ubc_getpager __P((struct vnode *)); -memory_object_control_t ubc_getobject __P((struct vnode *, int)); -int ubc_setpager __P((struct vnode *, memory_object_t)); -int ubc_setflags __P((struct vnode *, int)); -int ubc_clearflags __P((struct vnode *, int)); -int ubc_issetflags __P((struct vnode *, int)); -off_t ubc_blktooff __P((struct vnode *, daddr_t)); -daddr_t ubc_offtoblk __P((struct vnode *, off_t)); -int ubc_clean __P((struct vnode *, int)); -int ubc_pushdirty __P((struct vnode *)); -int ubc_pushdirty_range __P((struct vnode *, off_t, off_t)); -int ubc_hold __P((struct vnode *)); -void ubc_rele __P((struct vnode *)); -void ubc_map __P((struct vnode *)); -int ubc_destroy_named __P((struct vnode *)); -int ubc_release_named __P((struct vnode *)); -int ubc_invalidate __P((struct vnode *, off_t, size_t)); -int ubc_isinuse __P((struct vnode *, int)); - -int ubc_page_op __P((struct vnode *, off_t, int, ppnum_t *, int *)); -/* cluster IO routines */ -int cluster_read __P((struct vnode *, struct uio *, off_t, int, int)); -int advisory_read __P((struct vnode *, off_t, off_t, int, int)); -int cluster_write __P((struct vnode *, struct uio*, off_t, off_t, - off_t, off_t, int, int)); -int cluster_push __P((struct vnode *)); -int cluster_release __P((struct vnode *)); -int cluster_pageout __P((struct vnode *, upl_t, vm_offset_t, off_t, int, - off_t, int, int)); -int cluster_pagein __P((struct vnode *, upl_t, vm_offset_t, off_t, int, - off_t, int, int)); -int cluster_bp __P((struct buf *)); -int cluster_copy_upl_data __P((struct uio *, upl_t, int, int)); -int cluster_copy_ubc_data __P((struct vnode *, struct uio *, int *, int)); +off_t ubc_blktooff(struct vnode *, daddr64_t); +daddr64_t ubc_offtoblk(struct vnode *, off_t); +off_t ubc_getsize(struct vnode *); +int ubc_setsize(struct vnode *, off_t); -/* UPL routines */ -int ubc_create_upl __P((struct vnode *, off_t, long, upl_t *, - upl_page_info_t **, int)); -int ubc_upl_map __P((upl_t, vm_offset_t *)); -int ubc_upl_unmap __P((upl_t)); -int ubc_upl_commit __P((upl_t)); -int ubc_upl_commit_range __P((upl_t, vm_offset_t, vm_size_t, int)); -int ubc_upl_abort __P((upl_t, int)); -int ubc_upl_abort_range __P((upl_t, vm_offset_t, vm_size_t, int)); -upl_page_info_t *ubc_upl_pageinfo __P((upl_t)); -__END_DECLS +struct ucred *ubc_getcred(struct vnode *); +int ubc_setcred(struct vnode *, struct proc *); -#define UBCINFOMISSING(vp) \ - ((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo == UBC_INFO_NULL)) +int ubc_sync_range(vnode_t, off_t, off_t, int); +errno_t ubc_msync(vnode_t, off_t, off_t, off_t *, int); +int ubc_pages_resident(vnode_t); -#define UBCINFORECLAIMED(vp) \ - ((vp) && ((vp)->v_type == VREG) && ((vp)->v_ubcinfo == UBC_NOINFO)) -#define UBCINFOEXISTS(vp) \ - ((vp) && ((vp)->v_type == VREG) && \ - ((vp)->v_ubcinfo) && ((vp)->v_ubcinfo != UBC_NOINFO)) +/* cluster IO routines */ +int advisory_read(vnode_t, off_t, off_t, int); -#define UBCISVALID(vp) \ - ((vp) && ((vp)->v_type == VREG) && !((vp)->v_flag & VSYSTEM)) +int cluster_read(vnode_t, struct uio *, off_t, int); +int cluster_write(vnode_t, struct uio *, off_t, off_t, off_t, off_t, int); +int cluster_pageout(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int); +int cluster_pagein(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int); +int cluster_push(vnode_t, int); +int cluster_bp(buf_t); +void cluster_zero(upl_t, vm_offset_t, int, buf_t); -#define UBCINVALID(vp) \ - (((vp) == NULL) || ((vp) && ((vp)->v_type != VREG)) \ - || ((vp) && ((vp)->v_flag & VSYSTEM))) -#define UBCINFOCHECK(fun, vp) \ - if ((vp) && ((vp)->v_type == VREG) && \ - (((vp)->v_ubcinfo == UBC_INFO_NULL) \ - || ((vp)->v_ubcinfo == UBC_NOINFO))) \ - panic("%s: lost ubc_info", (fun)); +/* UPL routines */ +int ubc_create_upl(vnode_t, off_t, long, upl_t *, upl_page_info_t **, int); +int ubc_upl_map(upl_t, upl_offset_t *); +int ubc_upl_unmap(upl_t); +int ubc_upl_commit(upl_t); +int ubc_upl_commit_range(upl_t, upl_offset_t, upl_size_t, int); +int ubc_upl_abort(upl_t, int); +int ubc_upl_abort_range(upl_t, upl_offset_t, upl_size_t, int); -/* Flags for ubc_getobject() */ -#define UBC_FLAGS_NONE 0x0000 -#define UBC_HOLDOBJECT 0x0001 -#define UBC_FOR_PAGEOUT 0x0002 +upl_page_info_t *ubc_upl_pageinfo(upl_t); -#endif /* __APPLE_API_EVOLVING */ +__END_DECLS #endif /* _SYS_UBC_H_ */ diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h new file mode 100644 index 000000000..1362f30ee --- /dev/null +++ b/bsd/sys/ubc_internal.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 1999-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * File: ubc.h + * Author: Umesh Vaishampayan [umeshv@apple.com] + * 05-Aug-1999 umeshv Created. + * + * Header file for Unified Buffer Cache. + * + */ + +#ifndef _SYS_UBC_INTERNAL_H_ +#define _SYS_UBC_INTERNAL_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + + +#define UBC_INFO_NULL ((struct ubc_info *) 0) + + +extern struct zone *ubc_info_zone; + + +#define MAX_CLUSTERS 4 /* maximum number of vfs clusters per vnode */ + +struct cl_extent { + daddr64_t b_addr; + daddr64_t e_addr; +}; + +struct cl_wextent { + daddr64_t b_addr; + daddr64_t e_addr; + int io_nocache; +}; + +struct cl_readahead { + lck_mtx_t cl_lockr; + daddr64_t cl_lastr; /* last block read by client */ + daddr64_t cl_maxra; /* last block prefetched by the read ahead */ + int cl_ralen; /* length of last prefetch */ +}; + +struct cl_writebehind { + lck_mtx_t cl_lockw; + int cl_hasbeenpaged; /* if set, indicates pager has cleaned pages associated with this file */ + void * cl_scmap; /* pointer to sparse cluster map */ + int cl_scdirty; /* number of dirty pages in the sparse cluster map */ + int cl_number; /* number of packed write behind clusters currently valid */ + struct cl_wextent cl_clusters[MAX_CLUSTERS]; /* packed write behind clusters */ +}; + + +/* + * The following data structure keeps the information to associate + * a vnode to the correspondig VM objects. + */ +struct ubc_info { + memory_object_t ui_pager; /* pager */ + memory_object_control_t ui_control; /* VM control for the pager */ + long ui_flags; /* flags */ + vnode_t *ui_vnode; /* The vnode for this ubc_info */ + ucred_t *ui_ucred; /* holds credentials for NFS paging */ + off_t ui_size; /* file size for the vnode */ + + struct cl_readahead *cl_rahead; /* cluster read ahead context */ + struct cl_writebehind *cl_wbehind; /* cluster write behind context */ +}; + +/* Defines for ui_flags */ +#define UI_NONE 0x00000000 /* none */ +#define UI_HASPAGER 0x00000001 /* has a pager associated */ +#define UI_INITED 0x00000002 /* newly initialized vnode */ +#define UI_HASOBJREF 0x00000004 /* hold a reference on object */ +#define UI_WASMAPPED 0x00000008 /* vnode was mapped */ +#define UI_ISMAPPED 0x00000010 /* vnode is currently mapped */ + +/* + * exported primitives for loadable file systems. + */ + +__BEGIN_DECLS +__private_extern__ int ubc_umount(struct mount *mp); +__private_extern__ void ubc_unmountall(void); +__private_extern__ memory_object_t ubc_getpager(struct vnode *); +__private_extern__ int ubc_map(struct vnode *, int); +__private_extern__ int ubc_destroy_named(struct vnode *); + +/* internal only */ +__private_extern__ void cluster_release(struct ubc_info *); + + +/* Flags for ubc_getobject() */ +#define UBC_FLAGS_NONE 0x0000 +#define UBC_HOLDOBJECT 0x0001 +#define UBC_FOR_PAGEOUT 0x0002 + +memory_object_control_t ubc_getobject(struct vnode *, int); + +int ubc_info_init(struct vnode *); +void ubc_info_deallocate (struct ubc_info *); + +int ubc_isinuse(struct vnode *, int); + +int ubc_page_op(vnode_t, off_t, int, ppnum_t *, int *); +int ubc_range_op(vnode_t, off_t, off_t, int, int *); + + +int cluster_copy_upl_data(struct uio *, upl_t, int, int); +int cluster_copy_ubc_data(vnode_t, struct uio *, int *, int); + + +int UBCINFOMISSING(vnode_t); +int UBCINFORECLAIMED(vnode_t); +int UBCINFOEXISTS(vnode_t); +int UBCISVALID(vnode_t); +int UBCINVALID(vnode_t); +int UBCINFOCHECK(const char *, vnode_t); + +__END_DECLS + + +#endif /* _SYS_UBC_INTERNAL_H_ */ + diff --git a/bsd/sys/ucontext.h b/bsd/sys/ucontext.h index dd97a8cef..f231226a2 100644 --- a/bsd/sys/ucontext.h +++ b/bsd/sys/ucontext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,29 +23,58 @@ #ifndef _SYS_UCONTEXT_H_ #define _SYS_UCONTEXT_H_ +#include +#include #include -struct ucontext { - int uc_onstack; - sigset_t uc_sigmask; /* signal mask used by this context */ - stack_t uc_stack; /* stack used by this context */ - struct ucontext *uc_link; /* pointer to resuming context */ - size_t uc_mcsize; /* size of the machine context passed in */ - mcontext_t uc_mcontext; /* machine specific context */ -}; +#ifndef _SIGSET_T +#define _SIGSET_T +typedef __darwin_sigset_t sigset_t; +#endif + +#ifndef _STACK_T +#define _STACK_T +typedef __darwin_stack_t stack_t; +#endif + +#ifndef _UCONTEXT_T +#define _UCONTEXT_T +#ifndef _POSIX_C_SOURCE +typedef struct ucontext ucontext_t; +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ucontext ucontext_t; +#endif /* _POSIX_C_SOURCE */ +#endif +#ifndef _POSIX_C_SOURCE +#ifndef _UCONTEXT64_T +#define _UCONTEXT64_T +typedef struct ucontext64 ucontext64_t; +#endif +#endif /* _POSIX_C_SOURCE */ -typedef struct ucontext ucontext_t; +#ifdef KERNEL +#include /* user_addr_t, user_size_t */ -struct ucontext64 { - int uc_onstack; - sigset_t uc_sigmask; /* signal mask used by this context */ - stack_t uc_stack; /* stack used by this context */ - struct ucontext *uc_link; /* pointer to resuming context */ - size_t uc_mcsize; /* size of the machine context passed in */ - mcontext64_t uc_mcontext64; /* machine specific context */ +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +/* kernel representation of struct ucontext64 for 64 bit processes */ +struct user_ucontext64 { + int uc_onstack; + sigset_t uc_sigmask; /* signal mask */ + struct user_sigaltstack uc_stack; /* stack */ + user_addr_t uc_link; /* ucontext pointer */ + user_size_t uc_mcsize; /* mcontext size */ + user_addr_t uc_mcontext64; /* machine context */ }; -typedef struct ucontext64 ucontext64_t; +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +typedef struct user_ucontext64 user_ucontext64_t; +#endif /* KERNEL */ #endif /* _SYS_UCONTEXT_H_ */ diff --git a/bsd/sys/ucred.h b/bsd/sys/ucred.h index b7f6be44d..7da8c28eb 100644 --- a/bsd/sys/ucred.h +++ b/bsd/sys/ucred.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,48 +59,59 @@ #define _SYS_UCRED_H_ #include +#include #include +#include #ifdef __APPLE_API_UNSTABLE + /* - * Credentials. + * In-kernel credential structure. + * + * Note that this structure should not be used outside the kernel, nor should + * it or copies of it be exported outside. */ struct ucred { + TAILQ_ENTRY(ucred) cr_link; /* never modify this without KAUTH_CRED_HASH_LOCK */ u_long cr_ref; /* reference count */ + + /* credential hash depends on everything from this point on (see kauth_cred_get_hashkey) */ uid_t cr_uid; /* effective user id */ - short cr_ngroups; /* number of groups */ - gid_t cr_groups[NGROUPS]; /* groups */ + uid_t cr_ruid; /* real user id */ + uid_t cr_svuid; /* saved user id */ + short cr_ngroups; /* number of groups in advisory list */ + gid_t cr_groups[NGROUPS]; /* advisory group list */ + gid_t cr_rgid; /* real group id */ + gid_t cr_svgid; /* saved group id */ + uid_t cr_gmuid; /* user id for group membership purposes */ + struct auditinfo cr_au; /* user auditing data */ }; +typedef struct ucred *kauth_cred_t; + /* * This is the external representation of struct ucred. */ struct xucred { u_int cr_version; /* structure layout version */ uid_t cr_uid; /* effective user id */ - short cr_ngroups; /* number of groups */ - gid_t cr_groups[NGROUPS]; /* groups */ + short cr_ngroups; /* number of advisory groups */ + gid_t cr_groups[NGROUPS]; /* advisory group list */ }; #define XUCRED_VERSION 0 #define cr_gid cr_groups[0] -#define NOCRED ((struct ucred *)0) /* no credential available */ -#define FSCRED ((struct ucred *)-1) /* filesystem credential */ +#define NOCRED ((kauth_cred_t )0) /* no credential available */ +#define FSCRED ((kauth_cred_t )-1) /* filesystem credential */ #ifdef KERNEL -#define crhold(cr) \ -{ \ - if (++(cr)->cr_ref == 0) \ - panic("crhold"); \ -} - -struct ucred *crcopy __P((struct ucred *cr)); -struct ucred *crdup __P((struct ucred *cr)); -void crfree __P((struct ucred *cr)); -struct ucred *crget __P((void)); -int crcmp __P((struct ucred *cr1, struct ucred *cr2)); -int suser __P((struct ucred *cred, u_short *acflag)); -void cru2x __P((struct ucred *cr, struct xucred *xcr)); - +#ifdef __APPLE_API_OBSOLETE +__BEGIN_DECLS +int crcmp(kauth_cred_t cr1, kauth_cred_t cr2); +int suser(kauth_cred_t cred, u_short *acflag); +int set_security_token(struct proc * p); +void cru2x(kauth_cred_t cr, struct xucred *xcr); +__END_DECLS +#endif /* __APPLE_API_OBSOLETE */ #endif /* KERNEL */ #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/uio.h b/bsd/sys/uio.h index 6e61edc6a..b133e43a0 100644 --- a/bsd/sys/uio.h +++ b/bsd/sys/uio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,56 +58,213 @@ #ifndef _SYS_UIO_H_ #define _SYS_UIO_H_ +#include +#include + +/* + * [XSI] The ssize_t and size_t types shall be defined as described + * in . + */ +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif + +#ifndef _SSIZE_T +#define _SSIZE_T +typedef __darwin_ssize_t ssize_t; +#endif + /* - * XXX - * iov_base should be a void *. + * [XSI] Structure whose address is passed as the second parameter to the + * readv() and writev() functions. */ +#ifndef _STRUCT_IOVEC +#define _STRUCT_IOVEC struct iovec { - char *iov_base; /* Base address. */ - size_t iov_len; /* Length. */ + void * iov_base; /* [XSI] Base address of I/O memory region */ + size_t iov_len; /* [XSI] Size of region iov_base points to */ }; +#endif -enum uio_rw { UIO_READ, UIO_WRITE }; -/* Segment flag values. */ -enum uio_seg { - UIO_USERSPACE, /* kernel address is virtual, to/from user virtual */ - UIO_USERISPACE, /* kernel address is virtual, to/from user virtual */ - UIO_SYSSPACE, /* kernel address is virtual, to/from system virtual */ - UIO_PHYS_USERSPACE, /* kernel address is physical, to/from user virtual */ - UIO_PHYS_SYSSPACE, /* kernel address is physical, to/from system virtual */ -}; +#ifndef _POSIX_C_SOURCE +/* + * IO direction for uio_t. + * UIO_READ - data moves into iovec(s) associated with uio_t + * UIO_WRITE - data moves out of iovec(s) associated with uio_t + */ +enum uio_rw { UIO_READ, UIO_WRITE }; +#endif #ifdef KERNEL -struct uio { - struct iovec *uio_iov; - int uio_iovcnt; - off_t uio_offset; - int uio_resid; - enum uio_seg uio_segflg; - enum uio_rw uio_rw; - struct proc *uio_procp; + +/* + * XXX This all really wants a uio_internal.h + */ + +#include + + +/* + * user / kernel address space type flags. + * WARNING - make sure to check when adding flags! Be sure new flags + * don't overlap the definitions in uio_internal.h + * NOTES - + * UIO_USERSPACE is equivalent to UIO_USERSPACE32, but UIO_USERSPACE32 + * is preferred. UIO_USERSPACE remains for backwards compatibility. + * UIO_SYSSPACE is equivalent to UIO_SYSSPACE32, but UIO_SYSSPACE32 + * is preferred. UIO_SYSSPACE remains for backwards compatibility. + */ +enum uio_seg { + UIO_USERSPACE = 0, /* kernel address is virtual, to/from user virtual */ + UIO_SYSSPACE = 2, /* kernel address is virtual, to/from system virtual */ + UIO_USERSPACE32 = 5, /* kernel address is virtual, to/from user 32-bit virtual */ + UIO_USERSPACE64 = 8, /* kernel address is virtual, to/from user 64-bit virtual */ + UIO_SYSSPACE32 = 11 /* kernel address is virtual, to/from system virtual */ }; +#define UIO_SEG_IS_USER_SPACE( a_uio_seg ) \ + ( (a_uio_seg) == UIO_USERSPACE64 || (a_uio_seg) == UIO_USERSPACE32 || \ + (a_uio_seg) == UIO_USERSPACE ) + + +__BEGIN_DECLS + +/* + * uio_create - create an uio_t. + * Space is allocated to hold up to a_iovcount number of iovecs. The uio_t + * is not fully initialized until all iovecs are added using uio_addiov calls. + * a_iovcount is the maximum number of iovecs you may add. + */ +uio_t uio_create( int a_iovcount, /* max number of iovecs */ + off_t a_offset, /* current offset */ + int a_spacetype, /* type of address space */ + int a_iodirection ); /* read or write flag */ + +/* + * uio_reset - reset an uio_t. + * Reset the given uio_t to initial values. The uio_t is not fully initialized + * until all iovecs are added using uio_add_ov calls. + * The a_iovcount value passed in the uio_create is the maximum number of + * iovecs you may add. + */ +void uio_reset( uio_t a_uio, + off_t a_offset, /* current offset */ + int a_spacetype, /* type of address space */ + int a_iodirection ); /* read or write flag */ + +/* + * uio_duplicate - allocate a new uio and make a copy of the given uio_t. + * may return NULL. + */ +uio_t uio_duplicate( uio_t a_uio ); + + +/* + * uio_free - free a uio_t allocated via uio_create. + */ +void uio_free( uio_t a_uio ); + +/* + * uio_addiov - add an iovec to the given uio_t. You may call this up to + * the a_iovcount number that was passed to uio_create. + * returns 0 if add was successful else non zero. + */ +int uio_addiov( uio_t a_uio, user_addr_t a_baseaddr, user_size_t a_length ); + +/* + * uio_getiov - get iovec data associated with the given uio_t. Use + * a_index to iterate over each iovec (0 to (uio_iovcnt(uio_t) - 1)). + * a_baseaddr_p and a_length_p may be NULL. + * returns -1 when a_index is out of range or invalid uio_t. + * returns 0 when data is returned. + */ +int uio_getiov( uio_t a_uio, + int a_index, + user_addr_t * a_baseaddr_p, + user_size_t * a_length_p ); + +/* + * uio_update - update the given uio_t for a_count of completed IO. + * This call adjusts decrements the current iovec length and residual IO, + * and increments the current iovec base address and offset value. + */ +void uio_update( uio_t a_uio, user_size_t a_count ); + +/* + * uio_resid - return the residual IO value for the given uio_t + */ +user_ssize_t uio_resid( uio_t a_uio ); + +/* + * uio_setresid - set the residual IO value for the given uio_t + */ +void uio_setresid( uio_t a_uio, user_ssize_t a_value ); + +/* + * uio_iovcnt - return count of active iovecs for the given uio_t + */ +int uio_iovcnt( uio_t a_uio ); + +/* + * uio_offset - return the current offset value for the given uio_t + */ +off_t uio_offset( uio_t a_uio ); + +/* + * uio_setoffset - set the current offset value for the given uio_t + */ +void uio_setoffset( uio_t a_uio, off_t a_offset ); + +/* + * uio_rw - return the read / write flag for the given uio_t + */ +int uio_rw( uio_t a_uio ); + +/* + * uio_setrw - set the read / write flag for the given uio_t + */ +void uio_setrw( uio_t a_uio, int a_value ); + +/* + * uio_isuserspace - return non zero value if the address space + * flag is for a user address space (could be 32 or 64 bit). + */ +int uio_isuserspace( uio_t a_uio ); + +/* + * uio_curriovbase - return the base address of the current iovec associated + * with the given uio_t. May return 0. + */ +user_addr_t uio_curriovbase( uio_t a_uio ); + +/* + * uio_curriovlen - return the length value of the current iovec associated + * with the given uio_t. + */ +user_size_t uio_curriovlen( uio_t a_uio ); + /* * Limits */ #define UIO_MAXIOV 1024 /* max 1K of iov's */ #define UIO_SMALLIOV 8 /* 8 on stack, else malloc */ -extern int uiomove __P((caddr_t cp, int n, struct uio *uio)); -extern int uiomove64 __P((unsigned long long cp, int n, struct uio *uio)); -extern int ureadc __P((int c, struct uio *uio)); -extern int uwritec __P((struct uio *uio)); +extern int uiomove(caddr_t cp, int n, struct uio *uio); +extern int uiomove64(unsigned long long cp, int n, struct uio *uio); +extern int ureadc(int c, struct uio *uio); +extern int uwritec(struct uio *uio); +__END_DECLS #endif /* KERNEL */ #ifndef KERNEL -#include __BEGIN_DECLS -ssize_t readv __P((int, const struct iovec *, int)); -ssize_t writev __P((int, const struct iovec *, int)); +ssize_t readv(int, const struct iovec *, int); +ssize_t writev(int, const struct iovec *, int); __END_DECLS #endif /* !KERNEL */ + #endif /* !_SYS_UIO_H_ */ diff --git a/bsd/sys/uio_internal.h b/bsd/sys/uio_internal.h new file mode 100644 index 000000000..a2a7cc0f5 --- /dev/null +++ b/bsd/sys/uio_internal.h @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uio.h 8.5 (Berkeley) 2/22/94 + */ + +#ifndef _SYS_UIO_INTERNAL_H_ +#define _SYS_UIO_INTERNAL_H_ + +#include + +#ifdef KERNEL_PRIVATE +#include +#include + +/* + * user / kernel address space type flags. + * WARNING - make sure to check when adding flags! Be sure new flags + * don't overlap the definitions in uio.h + */ +// UIO_USERSPACE 0 defined in uio.h +#define UIO_USERISPACE 1 +// UIO_SYSSPACE 2 defined in uio.h +#define UIO_PHYS_USERSPACE 3 +#define UIO_PHYS_SYSSPACE 4 +// UIO_USERSPACE32 5 defined in uio.h +#define UIO_USERISPACE32 6 +#define UIO_PHYS_USERSPACE32 7 +// UIO_USERSPACE64 8 defined in uio.h +#define UIO_USERISPACE64 9 +#define UIO_PHYS_USERSPACE64 10 +// UIO_SYSSPACE32 11 defined in uio.h +#define UIO_PHYS_SYSSPACE32 12 +#define UIO_SYSSPACE64 13 +#define UIO_PHYS_SYSSPACE64 14 + +__BEGIN_DECLS +struct user_iovec; + +// uio_iovsaddr was __private_extern__ temporary chnage for 3777436 +struct user_iovec * uio_iovsaddr( uio_t a_uio ); +__private_extern__ void uio_calculateresid( uio_t a_uio ); +__private_extern__ void uio_setcurriovlen( uio_t a_uio, user_size_t a_value ); +// uio_spacetype was __private_extern__ temporary chnage for 3777436 +int uio_spacetype( uio_t a_uio ); +__private_extern__ uio_t + uio_createwithbuffer( int a_iovcount, off_t a_offset, int a_spacetype, + int a_iodirection, void *a_buf_p, int a_buffer_size ); + +/* use kern_iovec for system space requests */ +struct kern_iovec { + u_int32_t iov_base; /* Base address. */ + u_int32_t iov_len; /* Length. */ +}; + +/* use user_iovec for user space requests */ +struct user_iovec { + user_addr_t iov_base; /* Base address. */ + user_size_t iov_len; /* Length. */ +}; + +#if 1 // LP64todo - remove this after kext adopt new KPI +#define uio_iov uio_iovs.iovp +#define iovec_32 kern_iovec +#define iovec_64 user_iovec +#define iov32p kiovp +#define iov64p uiovp +#endif + +union iovecs { + struct iovec *iovp; + struct kern_iovec *kiovp; + struct user_iovec *uiovp; +}; + +/* WARNING - use accessor calls for uio_iov and uio_resid since these */ +/* fields vary depending on the originating address space. */ +struct uio { + union iovecs uio_iovs; /* current iovec */ + int uio_iovcnt; /* active iovecs */ + off_t uio_offset; + int uio_resid; /* compatibility uio_resid (pre-LP64) */ + enum uio_seg uio_segflg; + enum uio_rw uio_rw; + proc_t uio_procp; /* obsolete - not used! */ + user_ssize_t uio_resid_64; + int uio_size; /* size for use with kfree */ + int uio_max_iovs; /* max number of iovecs this uio_t can hold */ + u_int32_t uio_flags; +}; + +/* values for uio_flags */ +#define UIO_FLAGS_INITED 0x00000001 +#define UIO_FLAGS_WE_ALLOCED 0x00000002 + +__END_DECLS + +/* + * UIO_SIZEOF - return the amount of space a uio_t requires to + * contain the given number of iovecs. Use this macro to + * create a stack buffer that can be passed to uio_createwithbuffer. + */ +#define UIO_SIZEOF( a_iovcount ) \ + ( sizeof(struct uio) + (sizeof(struct user_iovec) * (a_iovcount)) ) + +#define UIO_IS_64_BIT_SPACE( a_uio_t ) \ + ( (a_uio_t)->uio_segflg == UIO_USERSPACE64 || (a_uio_t)->uio_segflg == UIO_USERISPACE64 || \ + (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE64 || (a_uio_t)->uio_segflg == UIO_SYSSPACE64 || \ + (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE64 ) + +#define UIO_IS_32_BIT_SPACE( a_uio_t ) \ + ( (a_uio_t)->uio_segflg == UIO_USERSPACE || (a_uio_t)->uio_segflg == UIO_USERISPACE || \ + (a_uio_t)->uio_segflg == UIO_SYSSPACE || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE || \ + (a_uio_t)->uio_segflg == UIO_USERISPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE32 || \ + (a_uio_t)->uio_segflg == UIO_SYSSPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE32 || \ + (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE || (a_uio_t)->uio_segflg == UIO_USERSPACE32 ) + +#define UIO_IS_USER_SPACE32( a_uio_t ) \ + ( (a_uio_t)->uio_segflg == UIO_USERSPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE32 || \ + (a_uio_t)->uio_segflg == UIO_USERISPACE32 ) +#define UIO_IS_USER_SPACE64( a_uio_t ) \ + ( (a_uio_t)->uio_segflg == UIO_USERSPACE64 || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE64 || \ + (a_uio_t)->uio_segflg == UIO_USERISPACE64 ) +#define UIO_IS_USER_SPACE( a_uio_t ) \ + ( UIO_IS_USER_SPACE32((a_uio_t)) || UIO_IS_USER_SPACE64((a_uio_t)) || \ + (a_uio_t)->uio_segflg == UIO_USERSPACE || (a_uio_t)->uio_segflg == UIO_USERISPACE || \ + (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE ) + + +/* + * W A R N I N G!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * anything in this section will be removed. please move to the uio KPI + */ + +#if 1 // UIO_KPI - WARNING OBSOLETE!!!! LP64todo - remove these!!!! +// DO NOT USE THESE +#define IS_UIO_USER_SPACE32( segflg ) \ + ( (segflg) == UIO_USERSPACE32 || (segflg) == UIO_PHYS_USERSPACE32 || \ + (segflg) == UIO_USERISPACE32 ) +#define IS_UIO_USER_SPACE64( segflg ) \ + ( (segflg) == UIO_USERSPACE64 || (segflg) == UIO_PHYS_USERSPACE64 || \ + (segflg) == UIO_USERISPACE64 ) +#define IS_UIO_USER_SPACE( segflg ) \ + ( IS_UIO_USER_SPACE32((segflg)) || IS_UIO_USER_SPACE64((segflg)) || \ + (segflg) == UIO_USERSPACE || (segflg) == UIO_USERISPACE || \ + (segflg) == UIO_PHYS_USERSPACE ) + +#define IS_UIO_SYS_SPACE32( segflg ) \ + ( (segflg) == UIO_SYSSPACE32 || (segflg) == UIO_PHYS_SYSSPACE32 || \ + (segflg) == UIO_SYSSPACE || (segflg) == UIO_PHYS_SYSSPACE ) +#define IS_UIO_SYS_SPACE64( segflg ) \ + ( (segflg) == UIO_SYSSPACE64 || (segflg) == UIO_PHYS_SYSSPACE64 ) +#define IS_UIO_SYS_SPACE( segflg ) \ + ( IS_UIO_SYS_SPACE32((segflg)) || IS_UIO_SYS_SPACE64((segflg)) ) + +#define IS_OBSOLETE_UIO_SEGFLG(segflg) \ + ( (segflg) == UIO_USERSPACE || (segflg) == UIO_USERISPACE || \ + (segflg) == UIO_SYSSPACE || (segflg) == UIO_PHYS_USERSPACE || \ + (segflg) == UIO_PHYS_SYSSPACE ) +#define IS_VALID_UIO_SEGFLG(segflg) \ + ( IS_UIO_USER_SPACE((segflg)) || IS_UIO_SYS_SPACE((segflg)) ) + +/* accessor routines for uio and embedded iovecs */ +// WARNING all these are OBSOLETE!!!! +static inline int64_t uio_uio_resid( struct uio *a_uiop ); +static inline void uio_uio_resid_add( struct uio *a_uiop, int64_t a_amount ); +static inline void uio_uio_resid_set( struct uio *a_uiop, int64_t a_value ); + +static inline void uio_iov_base_add( struct uio *a_uiop, int64_t a_amount ); +static inline void uio_iov_base_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ); +static inline void uio_iov_len_add( struct uio *a_uiop, int64_t a_amount ); +static inline void uio_iov_len_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ); +static inline u_int64_t uio_iov_len( struct uio *a_uiop ); +static inline u_int64_t uio_iov_len_at( struct uio *a_uiop, int a_index ); +static inline u_int64_t uio_iov_base( struct uio *a_uiop ); +static inline u_int64_t uio_iov_base_at( struct uio *a_uiop, int a_index ); +static inline void uio_next_iov( struct uio *a_uiop ); +static inline void uio_iov_len_set( struct uio *a_uiop, u_int64_t a_value ); +static inline void uio_iov_len_set_at( struct uio *a_uiop, u_int64_t a_value, int a_index ); + + +static inline int64_t uio_uio_resid( struct uio *a_uiop ) +{ +//#warning obsolete - use uio_resid call + return( (int64_t)a_uiop->uio_resid ); +} + +static inline void uio_uio_resid_add( struct uio *a_uiop, int64_t a_amount ) +{ +//#warning obsolete - use uio_update or uio_addiov or uio_setresid if in kernel and you must + a_uiop->uio_resid += ((int32_t) a_amount); +} + +static inline void uio_uio_resid_set( struct uio *a_uiop, int64_t a_value ) +{ +//#warning obsolete - use uio_update or uio_addiov or uio_setresid if in kernel and you must + a_uiop->uio_resid = a_value; +} + +static inline u_int64_t uio_iov_base( struct uio *a_uiop ) +{ +//#warning obsolete - use uio_curriovbase call + return(uio_iov_base_at(a_uiop, 0)); +} + +static inline u_int64_t uio_iov_base_at( struct uio *a_uiop, int a_index ) +{ +//#warning obsolete - use uio_curriovbase call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + /* user space iovec was most likely a struct iovec so we must cast to uintptr_t first */ + return((u_int64_t)((uintptr_t)a_uiop->uio_iovs.iov32p[a_index].iov_base)); + } + if (IS_UIO_SYS_SPACE32(a_uiop->uio_segflg)) { + return((u_int64_t)a_uiop->uio_iovs.iov32p[a_index].iov_base); + } + if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + return(a_uiop->uio_iovs.iov64p[a_index].iov_base); + } + return(0); +} + +static inline u_int64_t uio_iov_len( struct uio *a_uiop ) +{ +//#warning obsolete - use uio_curriovlen call + return(uio_iov_len_at(a_uiop, 0)); +} + +static inline u_int64_t uio_iov_len_at( struct uio *a_uiop, int a_index ) +{ +//#warning obsolete - use uio_curriovlen call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || + IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || + IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + return((u_int64_t)a_uiop->uio_iovs.iov32p[a_index].iov_len); + } + if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + return(a_uiop->uio_iovs.iov64p[a_index].iov_len); + } + return(0); +} + +static inline void uio_iov_len_set_at( struct uio *a_uiop, u_int64_t a_value, int a_index ) +{ +//#warning obsolete - use uio_addiov call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || + IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || + IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov32p[a_index].iov_len = a_value; + } + else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov64p[a_index].iov_len = a_value; + } + return; +} + +static inline void uio_iov_len_set( struct uio *a_uiop, u_int64_t a_value ) +{ +//#warning obsolete - use uio_addiov call + return(uio_iov_len_set_at(a_uiop, a_value, 0)); +} + +static inline void uio_iov_len_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ) +{ +//#warning obsolete - use uio_addiov call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || + IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || + IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov32p[a_index].iov_len += ((int32_t) a_amount); + } + else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov64p[a_index].iov_len += a_amount; + } + return; +} + +static inline void uio_iov_len_add( struct uio *a_uiop, int64_t a_amount ) +{ +//#warning obsolete - use uio_addiov call + return(uio_iov_len_add_at(a_uiop, a_amount, 0)); +} + +static inline void uio_iov_base_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ) +{ +//#warning obsolete - use uio_addiov call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || + IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || + IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov32p[a_index].iov_base += ((int32_t) a_amount); + } + else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov64p[a_index].iov_base += a_amount; + } + return; +} + +static inline void uio_iov_base_add( struct uio *a_uiop, int64_t a_amount ) +{ +//#warning obsolete - use uio_addiov call + return(uio_iov_base_add_at(a_uiop, a_amount, 0)); +} + +static inline void uio_next_iov( struct uio *a_uiop ) +{ +//#warning obsolete - use uio_update call + if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || + IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || + IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov32p++; + } + else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { + a_uiop->uio_iovs.iov64p++; + } + return; +} + +/* + * WARNING - this routine relies on iovec_64 being larger than iovec_32 and will + * not work if you are going to initialize an array of iovec_64 as an array of + * iovec_32 then pass that array in a uio (since uio_iov is always expected to + * be an array of like sized iovecs - see how uio_next_iov gets to the next iovec) + */ +static inline void init_iovec( u_int64_t a_base, + u_int64_t a_len, + struct iovec_64 *a_iovp, + int is_64bit_process ) +{ +//#warning obsolete - use uio_create call + if (is_64bit_process) { + a_iovp->iov_base = a_base; + a_iovp->iov_len = a_len; + } + else { + struct iovec_32 *a_iov32p = (struct iovec_32 *) a_iovp; + a_iov32p->iov_base = a_base; + a_iov32p->iov_len = a_len; + } + return; +} + +#define INIT_UIO_BASE( uiop, iovcnt, offset, resid, rw, procp ) \ +{ \ + (uiop)->uio_iovcnt = (iovcnt); \ + (uiop)->uio_offset = (offset); \ + (uiop)->uio_resid = (resid); \ + (uiop)->uio_rw = (rw); \ + (uiop)->uio_procp = (procp); \ +} +#define INIT_UIO_USER32( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ +{ \ + (uiop)->uio_iovs.iov32p = (iovp); \ + (uiop)->uio_segflg = UIO_USERSPACE; \ + INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ +} +#define INIT_UIO_USER64( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ +{ \ + (uiop)->uio_iovs.iov64p = (iovp); \ + (uiop)->uio_segflg = UIO_USERSPACE64; \ + INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ +} +#define INIT_UIO_SYS32( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ +{ \ + (uiop)->uio_iovs.iov32p = (iovp); \ + (uiop)->uio_segflg = UIO_SYSSPACE; \ + INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ +} +#define INIT_UIO_USERSPACE( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ +{ \ + if (IS_64BIT_PROCESS((procp))) { \ + (uiop)->uio_iovs.iov64p = (iovp); \ + (uiop)->uio_segflg = UIO_USERSPACE64; \ + } \ + else { \ + (uiop)->uio_iovs.iov32p = (struct iovec_32 *)(iovp); \ + (uiop)->uio_segflg = UIO_USERSPACE; \ + } \ + INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ +} +#define INIT_UIO_SYSSPACE( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ +{ \ + if (0) { /* we do not support 64-bit system space yet */ \ + (uiop)->uio_iovs.iov64p = (iovp); \ + (uiop)->uio_segflg = UIO_SYSSPACE64; \ + } \ + else { \ + (uiop)->uio_iovs.iov32p = (struct iovec_32 *)(iovp); \ + (uiop)->uio_segflg = UIO_SYSSPACE; \ + } \ + INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ +} +#endif // UIO_KPI - WARNING OBSOLETE!!!! + + +#endif /* KERNEL */ +#endif /* !_SYS_UIO_INTERNAL_H_ */ diff --git a/bsd/sys/un.h b/bsd/sys/un.h index e3e4cdb5c..d200eff87 100644 --- a/bsd/sys/un.h +++ b/bsd/sys/un.h @@ -58,34 +58,53 @@ #define _SYS_UN_H_ #include +#include +#include + +/* [XSI] The sa_family_t type shall be defined as described in */ +#ifndef _SA_FAMILY_T +#define _SA_FAMILY_T +typedef __uint8_t sa_family_t; +#endif /* - * Definitions for UNIX IPC domain. + * [XSI] Definitions for UNIX IPC domain. */ struct sockaddr_un { - u_char sun_len; /* sockaddr len including null */ - u_char sun_family; /* AF_UNIX */ - char sun_path[104]; /* path name (gag) */ + unsigned char sun_len; /* sockaddr len including null */ + sa_family_t sun_family; /* [XSI] AF_UNIX */ + char sun_path[104]; /* [XSI] path name (gag) */ }; +#ifndef _POSIX_C_SOURCE +/* Socket options. */ +#define LOCAL_PEERCRED 0x001 /* retrieve peer credentails */ +#endif /* !_POSIX_C_SOURCE */ + + #ifdef KERNEL -#ifdef __APPLE_API_PRIVATE +#ifdef PRIVATE +__BEGIN_DECLS struct mbuf; struct socket; -int uipc_usrreq __P((struct socket *so, int req, struct mbuf *m, - struct mbuf *nam, struct mbuf *control)); -int unp_connect2 __P((struct socket *so, struct socket *so2)); -void unp_dispose __P((struct mbuf *m)); -int unp_externalize __P((struct mbuf *rights)); -void unp_init __P((void)); +int uipc_usrreq(struct socket *so, int req, struct mbuf *m, + struct mbuf *nam, struct mbuf *control); +int uipc_ctloutput (struct socket *so, struct sockopt *sopt); +int unp_connect2(struct socket *so, struct socket *so2); +void unp_dispose(struct mbuf *m); +int unp_externalize(struct mbuf *rights); +void unp_init(void); extern struct pr_usrreqs uipc_usrreqs; -#endif /* __APPLE_API_PRIVATE */ +__END_DECLS +#endif /* PRIVATE */ #else /* !KERNEL */ +#ifndef _POSIX_C_SOURCE /* actual length of an initialized sockaddr_un */ #define SUN_LEN(su) \ (sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path)) +#endif /* !_POSIX_C_SOURCE */ #endif /* KERNEL */ diff --git a/bsd/sys/unistd.h b/bsd/sys/unistd.h index d455b4db7..74fcc4411 100644 --- a/bsd/sys/unistd.h +++ b/bsd/sys/unistd.h @@ -58,9 +58,6 @@ #ifndef _SYS_UNISTD_H_ #define _SYS_UNISTD_H_ -/* compile-time symbolic constants */ -#define _POSIX_JOB_CONTROL /* implementation supports job control */ - /* * Although we have saved user/group IDs, we do not use them in setuid * as described in POSIX 1003.1, because the feature does not work for @@ -71,46 +68,74 @@ #define _POSIX_SAVED_IDS /* saved set-user-ID and set-group-ID */ #endif -#define _POSIX_VERSION 198808L -#define _POSIX2_VERSION 199212L +#define _POSIX_VERSION 200112L +#define _POSIX2_VERSION 200112L /* execution-time symbolic constants */ - /* chown requires appropriate privileges */ -#define _POSIX_CHOWN_RESTRICTED 1 - /* too-long path components generate errors */ -#define _POSIX_NO_TRUNC 1 /* may disable terminal special characters */ #ifndef _POSIX_VDISABLE #define _POSIX_VDISABLE ((unsigned char)'\377') #endif -#define _POSIX_THREADS -#define _POSIX_THREAD_ATTR_STACKADDR -#define _POSIX_THREAD_ATTR_STACKSIZE -#define _POSIX_THREAD_PRIORITY_SCHEDULING -#define _POSIX_THREAD_PRIO_INHERIT -#define _POSIX_THREAD_PRIO_PROTECT - #define _POSIX_THREAD_KEYS_MAX 128 /* access function */ #define F_OK 0 /* test for existence of file */ -#define X_OK 0x01 /* test for execute or search permission */ -#define W_OK 0x02 /* test for write permission */ -#define R_OK 0x04 /* test for read permission */ +#define X_OK (1<<0) /* test for execute or search permission */ +#define W_OK (1<<1) /* test for write permission */ +#define R_OK (1<<2) /* test for read permission */ + +#ifndef _POSIX_C_SOURCE +/* + * Extended access functions. + * Note that we depend on these matching the definitions in sys/kauth.h, + * but with the bits shifted left by 8. + */ +#define _READ_OK (1<<9) /* read file data / read directory */ +#define _WRITE_OK (1<<10) /* write file data / add file to directory */ +#define _EXECUTE_OK (1<<11) /* execute file / search in directory*/ +#define _DELETE_OK (1<<12) /* delete file / delete directory */ +#define _APPEND_OK (1<<13) /* append to file / add subdirectory to directory */ +#define _RMFILE_OK (1<<14) /* - / remove file from directory */ +#define _RATTR_OK (1<<15) /* read basic attributes */ +#define _WATTR_OK (1<<16) /* write basic attributes */ +#define _REXT_OK (1<<17) /* read extended attributes */ +#define _WEXT_OK (1<<18) /* write extended attributes */ +#define _RPERM_OK (1<<19) /* read permissions */ +#define _WPERM_OK (1<<20) /* write permissions */ +#define _CHOWN_OK (1<<21) /* change ownership */ + +#define _ACCESS_EXTENDED_MASK (_READ_OK | _WRITE_OK | _EXECUTE_OK | \ + _DELETE_OK | _APPEND_OK | \ + _RMFILE_OK | _REXT_OK | \ + _WEXT_OK | _RATTR_OK | _WATTR_OK | _RPERM_OK | \ + _WPERM_OK | _CHOWN_OK) +#endif /* whence values for lseek(2) */ +#ifndef SEEK_SET #define SEEK_SET 0 /* set file offset to offset */ #define SEEK_CUR 1 /* set file offset to current plus offset */ #define SEEK_END 2 /* set file offset to EOF plus offset */ +#endif /* !SEEK_SET */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE /* whence values for lseek(2); renamed by POSIX 1003.1 */ #define L_SET SEEK_SET #define L_INCR SEEK_CUR #define L_XTND SEEK_END #endif +#ifndef _POSIX_C_SOURCE +struct accessx_descriptor { + unsigned ad_name_offset; + int ad_flags; + int ad_pad[2]; +}; +#define ACCESSX_MAX_DESCRIPTORS 100 +#define ACCESSX_MAX_TABLESIZE (16 * 1024) +#endif + /* configurable pathname variables */ #define _PC_LINK_MAX 1 #define _PC_MAX_CANON 2 @@ -122,16 +147,15 @@ #define _PC_NO_TRUNC 8 #define _PC_VDISABLE 9 -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE #define _PC_NAME_CHARS_MAX 10 #define _PC_CASE_SENSITIVE 11 #define _PC_CASE_PRESERVING 12 +#define _PC_EXTENDED_SECURITY_NP 13 +#define _PC_AUTH_OPAQUE_NP 14 #endif /* configurable system strings */ #define _CS_PATH 1 -/* async IO support */ -#define _POSIX_ASYNCHRONOUS_IO - #endif /* !_SYS_UNISTD_H_ */ diff --git a/bsd/sys/unpcb.h b/bsd/sys/unpcb.h index 408cc93d1..2a5f8af33 100644 --- a/bsd/sys/unpcb.h +++ b/bsd/sys/unpcb.h @@ -60,6 +60,7 @@ #include #include #include +#include /* * Protocol control block for an active @@ -86,9 +87,13 @@ * so that changes in the sockbuf may be computed to modify * back pressure on the sender accordingly. */ -#ifdef __APPLE_API_PRIVATE -typedef u_quad_t unp_gen_t; + +typedef u_quad_t unp_gen_t; + +#ifdef PRIVATE LIST_HEAD(unp_head, unpcb); +#ifdef KERNEL +#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) struct unpcb { LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */ @@ -102,31 +107,67 @@ struct unpcb { int unp_cc; /* copy of rcv.sb_cc */ int unp_mbcnt; /* copy of rcv.sb_mbcnt */ unp_gen_t unp_gencnt; /* generation count of this instance */ + int unp_flags; /* flags */ + struct xucred unp_peercred; /* peer credentials, if applicable */ }; +#endif /* KERNEL */ -#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) -#endif /* __APPLE_API_PRIVATE */ +/* + * Flags in unp_flags. + * + * UNP_HAVEPC - indicates that the unp_peercred member is filled in + * and is really the credentials of the connected peer. This is used + * to determine whether the contents should be sent to the user or + * not. + * + * UNP_HAVEPCCACHED - indicates that the unp_peercred member is filled + * in, but does *not* contain the credentials of the connected peer + * (there may not even be a peer). This is set in unp_listen() when + * it fills in unp_peercred for later consumption by unp_connect(). + */ +#define UNP_HAVEPC 0x001 +#define UNP_HAVEPCCACHED 0x002 + +#ifdef KERNEL +struct unpcb_compat { +#else /* KERNEL */ +#define unpcb_compat unpcb +struct unpcb { +#endif /* KERNEL */ + LIST_ENTRY(unpcb_compat) unp_link; /* glue on list of all PCBs */ + struct socket *unp_socket; /* pointer back to socket */ + struct vnode *unp_vnode; /* if associated with file */ + ino_t unp_ino; /* fake inode number */ + struct unpcb_compat *unp_conn; /* control block of connected socket */ + struct unp_head unp_refs; /* referencing socket linked list */ + LIST_ENTRY(unpcb_compat) unp_reflink; /* link in unp_refs list */ + struct sockaddr_un *unp_addr; /* bound address of socket */ + int unp_cc; /* copy of rcv.sb_cc */ + int unp_mbcnt; /* copy of rcv.sb_mbcnt */ + unp_gen_t unp_gencnt; /* generation count of this instance */ +}; /* Hack alert -- this structure depends on . */ -#ifdef _SYS_SOCKETVAR_H_ -#ifdef __APPLE_API_UNSTABLE -struct xunpcb { - size_t xu_len; /* length of this structure */ - struct unpcb *xu_unpp; /* to help netstat, fstat */ - struct unpcb xu_unp; /* our information */ - union { - struct sockaddr_un xuu_addr; /* our bound address */ - char xu_dummy1[256]; - } xu_au; -#define xu_addr xu_au.xuu_addr - union { - struct sockaddr_un xuu_caddr; /* their bound address */ - char xu_dummy2[256]; - } xu_cau; -#define xu_caddr xu_cau.xuu_caddr - struct xsocket xu_socket; - u_quad_t xu_alignment_hack; +#ifdef _SYS_SOCKETVAR_H_ +struct xunpcb { + size_t xu_len; /* length of this structure */ + struct unpcb_compat *xu_unpp; /* to help netstat, fstat */ + struct unpcb_compat xu_unp; /* our information */ + union { + struct sockaddr_un xuu_addr; /* our bound address */ + char xu_dummy1[256]; + } xu_au; +#define xu_addr xu_au.xuu_addr + union { + struct sockaddr_un xuu_caddr; /* their bound address */ + char xu_dummy2[256]; + } xu_cau; +#define xu_caddr xu_cau.xuu_caddr + struct xsocket xu_socket; + u_quad_t xu_alignment_hack; }; +#endif /* _SYS_SOCKETVAR_H_ */ +#endif /* PRIVATE */ struct xunpgen { size_t xug_len; @@ -134,7 +175,5 @@ struct xunpgen { unp_gen_t xug_gen; so_gen_t xug_sogen; }; -#endif /* __APPLE_API_UNSTABLE */ -#endif /* _SYS_SOCKETVAR_H_ */ #endif /* _SYS_UNPCB_H_ */ diff --git a/bsd/sys/user.h b/bsd/sys/user.h index 8c1a005ab..b90a52481 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -68,14 +68,16 @@ #include #endif #include +#ifdef KERNEL_PRIVATE #include +#endif #include /* XXX */ #include #ifdef KERNEL - #ifdef __APPLE_API_PRIVATE -struct nlminfo; +#include + /* * Per-thread U area. */ @@ -83,39 +85,44 @@ struct uthread { int *uu_ar0; /* address of users saved R0 */ /* syscall parameters, results and catches */ - int uu_arg[8]; /* arguments to current system call */ + u_int64_t uu_arg[8]; /* arguments to current system call */ int *uu_ap; /* pointer to arglist */ int uu_rval[2]; /* thread exception handling */ int uu_code; /* ``code'' to trap */ - char uu_cursig; /* p_cursig for exc. */ - struct nlminfo *uu_nlminfo; /* for rpc.lockd */ - /* support for syscalls which use continuations */ + char uu_cursig; /* p_cursig for exc. */ + /* support for select - across system calls */ + struct _select { + u_int32_t *ibits, *obits; /* bits to select on */ + uint nbytes; /* number of bytes in ibits and obits */ + wait_queue_set_t wqset; /* cached across select calls */ + size_t allocsize; /* ...size of select cache */ + u_int64_t abstime; + int poll; + int error; + int count; + char * wql; + } uu_select; /* saved state for select() */ + /* to support continuations */ union { - struct _select { - u_int32_t *ibits, *obits; /* bits to select on */ - uint nbytes; /* number of bytes in ibits and obits */ - u_int64_t abstime; - int poll; - int error; - int count; - int nfcount; - char * wql; - int allocsize; /* select allocated size */ - } ss_select; /* saved state for select() */ - struct _wait { - int f; - } ss_wait; /* saved state for wait?() */ - struct _owait { - int pid; - int *status; - int options; - struct rusage *rusage; - } ss_owait; - int uu_nfs_myiod; /* saved state for nfsd */ + int uu_nfs_myiod; /* saved state for nfsd */ + struct _kevent_scan { + kevent_callback_t call; /* per-event callback */ + kevent_continue_t cont; /* whole call continuation */ + uint64_t deadline; /* computed deadline for operation */ + void *data; /* caller's private data */ + } ss_kevent_scan; /* saved state for kevent_scan() */ + struct _kevent { + struct _kevent_scan scan;/* space for the generic data */ + struct fileproc *fp; /* fileproc we hold iocount on */ + int fd; /* filedescriptor for kq */ + register_t *retval; /* place to store return val */ + user_addr_t eventlist; /* user-level event list address */ + int eventcount; /* user-level event count */ + int eventout; /* number of events output */ + } ss_kevent; /* saved state for kevent() */ } uu_state; - /* internal support for continuation framework */ int (*uu_continuation)(int); int uu_pri; @@ -123,27 +130,48 @@ struct uthread { int uu_flag; struct proc * uu_proc; void * uu_userstate; - wait_queue_sub_t uu_wqsub; sigset_t uu_siglist; /* signals pending for the thread */ sigset_t uu_sigwait; /* sigwait on this thread*/ sigset_t uu_sigmask; /* signal mask for the thread */ sigset_t uu_oldmask; /* signal mask saved before sigpause */ - thread_act_t uu_act; + thread_t uu_act; sigset_t uu_vforkmask; /* saved signal mask during vfork */ TAILQ_ENTRY(uthread) uu_list; /* List of uthreads in proc */ struct kaudit_record *uu_ar; /* audit record */ struct task* uu_aio_task; /* target task for async io */ + + /* network support for dlil layer locking */ + u_int32_t dlil_incremented_read; + lck_mtx_t *uu_mtx; + + int uu_lowpri_delay; + + struct ucred *uu_ucred; /* per thread credential */ + int uu_defer_reclaims; + vnode_t uu_vreclaims; + +#ifdef JOE_DEBUG + int uu_iocount; + int uu_vpindex; + void * uu_vps[32]; +#endif }; typedef struct uthread * uthread_t; /* Definition of uu_flag */ -#define USAS_OLDMASK 0x1 /* need to restore mask before pause */ -#define UNO_SIGMASK 0x2 /* exited thread; invalid sigmask */ -/* Kept same as in proc */ -#define P_VFORK 0x2000000 /* process has vfork children */ +#define UT_SAS_OLDMASK 0x00000001 /* need to restore mask before pause */ +#define UT_NO_SIGMASK 0x00000002 /* exited thread; invalid sigmask */ +#define UT_NOTCANCELPT 0x00000004 /* not a cancelation point */ +#define UT_CANCEL 0x00000008 /* thread marked for cancel */ +#define UT_CANCELED 0x00000010 /* thread cancelled */ +#define UT_CANCELDISABLE 0x00000020 /* thread cancel disabled */ + +#define UT_VFORK 0x02000000 /* thread has vfork children */ +#define UT_SETUID 0x04000000 /* thread is settugid() */ +#define UT_WASSETUID 0x08000000 /* thread was settugid() (in vfork) */ #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/sys/utfconv.h b/bsd/sys/utfconv.h index 18db6b179..1feafbf5b 100644 --- a/bsd/sys/utfconv.h +++ b/bsd/sys/utfconv.h @@ -37,13 +37,16 @@ #define UTF_PRECOMPOSED 0x08 /* generate precomposed UCS-2 */ __BEGIN_DECLS -size_t utf8_encodelen __P((const u_int16_t *, size_t, u_int16_t, int)); +size_t utf8_encodelen(const u_int16_t *, size_t, u_int16_t, int); -int utf8_encodestr __P((const u_int16_t *, size_t, u_int8_t *, size_t *, - size_t, u_int16_t, int)); +int utf8_encodestr(const u_int16_t *, size_t, u_int8_t *, size_t *, + size_t, u_int16_t, int); + +int utf8_decodestr(const u_int8_t *, size_t, u_int16_t *,size_t *, + size_t, u_int16_t, int); + +int utf8_validatestr(const u_int8_t*, size_t); -int utf8_decodestr __P((const u_int8_t *, size_t, u_int16_t *,size_t *, - size_t, u_int16_t, int)); __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/utsname.h b/bsd/sys/utsname.h index 35779be0d..8f3d2e2f8 100644 --- a/bsd/sys/utsname.h +++ b/bsd/sys/utsname.h @@ -61,20 +61,20 @@ #ifndef _SYS_UTSNAME_H #define _SYS_UTSNAME_H +#include + #define _SYS_NAMELEN 256 struct utsname { - char sysname[_SYS_NAMELEN]; /* Name of OS */ - char nodename[_SYS_NAMELEN]; /* Name of this network node */ - char release[_SYS_NAMELEN]; /* Release level */ - char version[_SYS_NAMELEN]; /* Version level */ - char machine[_SYS_NAMELEN]; /* Hardware type */ + char sysname[_SYS_NAMELEN]; /* [XSI] Name of OS */ + char nodename[_SYS_NAMELEN]; /* [XSI] Name of this network node */ + char release[_SYS_NAMELEN]; /* [XSI] Release level */ + char version[_SYS_NAMELEN]; /* [XSI] Version level */ + char machine[_SYS_NAMELEN]; /* [XSI] Hardware type */ }; -#include - __BEGIN_DECLS -int uname __P((struct utsname *)); +int uname(struct utsname *); __END_DECLS #endif /* !_SYS_UTSNAME_H */ diff --git a/bsd/sys/ux_exception.h b/bsd/sys/ux_exception.h index 2b55b6cb5..2f4372642 100644 --- a/bsd/sys/ux_exception.h +++ b/bsd/sys/ux_exception.h @@ -57,6 +57,10 @@ extern mach_port_name_t ux_exception_port; +boolean_t machine_exception(int exception, int code, int subcode, + int *unix_signal, int *unix_code); +void ux_handler_init(void); + #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/version.h b/bsd/sys/version.h index 308e5f5f4..a11538fb6 100644 --- a/bsd/sys/version.h +++ b/bsd/sys/version.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * diff --git a/bsd/sys/vfs_context.h b/bsd/sys/vfs_context.h new file mode 100644 index 000000000..931271de2 --- /dev/null +++ b/bsd/sys/vfs_context.h @@ -0,0 +1,14 @@ +#ifndef _BSD_SYS_VFS_CONTEXT_H_ +#define _BSD_SYS_VFS_CONTEXT_H_ + +#include +#include +#include +#include + +struct vfs_context { + proc_t vc_proc; + ucred_t vc_ucred; +}; + +#endif /* !_BSD_SYS_VFS_CONTEXT_H_ */ diff --git a/bsd/sys/vm.h b/bsd/sys/vm.h index 1718e1369..2ff69a04b 100644 --- a/bsd/sys/vm.h +++ b/bsd/sys/vm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,6 +63,9 @@ #define _SYS_VM_H #include +#include + +#ifdef BSD_KERNEL_PRIVATE /* Machine specific config stuff */ #if defined(KERNEL) && !defined(MACH_USER_API) @@ -71,7 +74,6 @@ #include #endif -#ifdef __APPLE_API_OBSOLETE /* * Shareable process virtual address space. * May eventually be merged with vm_map. @@ -92,22 +94,55 @@ struct vmspace { caddr_t vm_maxsaddr; /* user VA at max stack growth */ }; -#else /* __APPLE_API_OBSOLETE */ +#ifdef KERNEL +// LP64todo - should this move? +/* LP64 version of vmspace. all pointers + * grow when we're dealing with a 64-bit process. + * WARNING - keep in sync with vmspace + */ + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_vmspace { + int vm_refcnt; /* number of references */ + user_addr_t vm_shm; /* SYS5 shared memory private data XXX */ + segsz_t vm_rssize; /* current resident set size in pages */ + segsz_t vm_swrss; /* resident set size before last swap */ + segsz_t vm_tsize; /* text size (pages) XXX */ + segsz_t vm_dsize; /* data size (pages) XXX */ + segsz_t vm_ssize; /* stack size (pages) */ + user_addr_t vm_taddr; /* user virtual address of text XXX */ + user_addr_t vm_daddr; /* user virtual address of data XXX */ + user_addr_t vm_maxsaddr; /* user VA at max stack growth */ +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL */ + +#include + +#else /* BSD_KERNEL_PRIVATE */ /* just to keep kinfo_proc happy */ +/* NOTE: Pointer fields are size variant for LP64 */ struct vmspace { - int32_t dummy[10]; + int32_t dummy; + caddr_t dummy2; + int32_t dummy3[5]; + caddr_t dummy4[3]; }; -#endif /* __APPLE_API_OBSOLETE */ -#ifdef KERNEL +#endif /* BSD_KERNEL_PRIVATE */ -#ifdef __APPLE_API_PRIVATE -#ifdef BSD_BUILD -#include -#endif /* BSD_BUILD */ -#endif /* __APPLE_API_PRIVATE */ +#ifdef KERNEL +__BEGIN_DECLS struct proc *current_proc(void); +__END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/vmmeter.h b/bsd/sys/vmmeter.h index 0564670bc..caa2fbc15 100644 --- a/bsd/sys/vmmeter.h +++ b/bsd/sys/vmmeter.h @@ -110,9 +110,6 @@ struct vmmeter { u_int v_inactive_target; /* number of pages desired inactive */ u_int v_inactive_count; /* number of pages inactive */ }; -#ifdef KERNEL -extern struct vmmeter cnt; -#endif /* systemwide totals computed every five seconds */ struct vmtotal diff --git a/bsd/sys/vnioctl.h b/bsd/sys/vnioctl.h index e3a3729a6..37bb0de23 100644 --- a/bsd/sys/vnioctl.h +++ b/bsd/sys/vnioctl.h @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 @@ -46,10 +67,9 @@ #define _SYS_VNIOCTL_H_ #include +#include -#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE /* * Ioctl definitions for file (vnode) disk pseudo-device. */ @@ -62,10 +82,28 @@ typedef enum { struct vn_ioctl { char * vn_file; /* pathname of file to mount */ - int vn_size; /* (returned) size of disk */ + int vn_size; /* (returned) size of disk */ vncontrol_t vn_control; }; +#ifdef KERNEL_PRIVATE + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=natural +#endif + +struct user_vn_ioctl { + u_int64_t vn_file; /* pathname of file to mount */ + int vn_size; /* (returned) size of disk */ + vncontrol_t vn_control; +}; + +#if __DARWIN_ALIGN_NATURAL +#pragma options align=reset +#endif + +#endif /* KERNEL_PRIVATE */ + /* * Before you can use a unit, it must be configured with VNIOCSET. * The configuration persists across opens and closes of the device; @@ -74,11 +112,16 @@ struct vn_ioctl { */ #define VNIOCATTACH _IOWR('F', 0, struct vn_ioctl) /* attach file */ #define VNIOCDETACH _IOWR('F', 1, struct vn_ioctl) /* detach disk */ -#define VNIOCGSET _IOWR('F', 2, u_long ) /* set global option */ -#define VNIOCGCLEAR _IOWR('F', 3, u_long ) /* reset --//-- */ -#define VNIOCUSET _IOWR('F', 4, u_long ) /* set unit option */ -#define VNIOCUCLEAR _IOWR('F', 5, u_long ) /* reset --//-- */ +#define VNIOCGSET _IOWR('F', 2, u_int32_t ) /* set global option */ +#define VNIOCGCLEAR _IOWR('F', 3, u_int32_t ) /* reset --//-- */ +#define VNIOCUSET _IOWR('F', 4, u_int32_t ) /* set unit option */ +#define VNIOCUCLEAR _IOWR('F', 5, u_int32_t ) /* reset --//-- */ #define VNIOCSHADOW _IOWR('F', 6, struct vn_ioctl) /* attach shadow */ +#ifdef KERNEL_PRIVATE +#define VNIOCATTACH64 _IOWR('F', 0, struct user_vn_ioctl) /* attach file - LP64 */ +#define VNIOCDETACH64 _IOWR('F', 1, struct user_vn_ioctl) /* detach disk - LP64 */ +#define VNIOCSHADOW64 _IOWR('F', 6, struct user_vn_ioctl) /* attach shadow - LP64 */ +#endif /* KERNEL_PRIVATE */ #define VN_LABELS 0x1 /* Use disk(/slice) labels */ #define VN_FOLLOW 0x2 /* Debug flow in vn driver */ @@ -87,8 +130,5 @@ struct vn_ioctl { #define VN_DONTCLUSTER 0x10 /* Don't cluster */ #define VN_RESERVE 0x20 /* Pre-reserve swap */ -#endif /* __APPLE_API_PRIVATE */ - -#endif /* KERNEL_PRIVATE */ #endif /* _SYS_VNIOCTL_H_*/ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index a6f13a11b..9bac1aec0 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,19 +60,11 @@ #include #include -#include -#include - -#include -#include - -#include #ifdef KERNEL -#include -#include -#endif /* KERNEL */ +#include +#include +#endif -#ifdef __APPLE_API_PRIVATE /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, @@ -96,177 +88,300 @@ enum vtagtype { VT_UNION, VT_HFS, VT_VOLFS, VT_DEVFS, VT_WEBDAV, VT_UDF, VT_AFP, VT_CDDA, VT_CIFS,VT_OTHER}; + /* - * Each underlying filesystem allocates its own private area and hangs - * it from v_data. If non-null, this area is freed in getnewvnode(). + * flags for VNOP_BLOCKMAP */ -LIST_HEAD(buflists, buf); +#define VNODE_READ 0x01 +#define VNODE_WRITE 0x02 -#define MAX_CLUSTERS 4 /* maximum number of vfs clusters per vnode */ -struct v_cluster { - unsigned int start_pg; - unsigned int last_pg; -}; -struct v_padded_clusters { - long v_pad; - struct v_cluster v_c[MAX_CLUSTERS]; -}; +/* flags for VNOP_ALLOCATE */ +#define PREALLOCATE 0x00000001 /* preallocate allocation blocks */ +#define ALLOCATECONTIG 0x00000002 /* allocate contigious space */ +#define ALLOCATEALL 0x00000004 /* allocate all requested space */ + /* or no space at all */ +#define FREEREMAINDER 0x00000008 /* deallocate allocated but */ + /* unfilled blocks */ +#define ALLOCATEFROMPEOF 0x00000010 /* allocate from the physical eof */ +#define ALLOCATEFROMVOL 0x00000020 /* allocate from the volume offset */ /* - * Reading or writing any of these items requires holding the appropriate lock. - * v_freelist is locked by the global vnode_free_list simple lock. - * v_mntvnodes is locked by the global mntvnodes simple lock. - * v_flag, v_usecount, v_holdcount and v_writecount are - * locked by the v_interlock simple lock. + * Token indicating no attribute value yet assigned. some user source uses this */ -struct vnode { - u_long v_flag; /* vnode flags (see below) */ - long v_usecount; /* reference count of users */ - long v_holdcnt; /* page & buffer references */ - daddr_t v_lastr; /* last read (read-ahead) */ - u_long v_id; /* capability identifier */ - struct mount *v_mount; /* ptr to vfs we are in */ - int (**v_op)(void *); /* vnode operations vector */ - TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ - LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ - struct buflists v_cleanblkhd; /* clean blocklist head */ - struct buflists v_dirtyblkhd; /* dirty blocklist head */ - long v_numoutput; /* num of writes in progress */ - enum vtype v_type; /* vnode type */ - union { - struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ - struct socket *vu_socket; /* unix ipc (VSOCK) */ - struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */ - struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ - char *vu_name; /* name (only for VREG) */ - } v_un; - struct ubc_info *v_ubcinfo; /* valid for (VREG) */ - struct nqlease *v_lease; /* Soft reference to lease */ - void *v_scmap; /* pointer to sparse cluster map */ - int v_scdirty; /* number of dirty pages in the sparse cluster map */ - daddr_t v_ciosiz; /* real size of I/O for cluster */ - int v_clen; /* length of current cluster */ - int v_ralen; /* Read-ahead length */ - daddr_t v_maxra; /* last readahead block */ - union { - simple_lock_data_t v_ilk; /* lock on usecount and flag */ - struct v_padded_clusters v_cl; /* vfs cluster IO */ - } v_un1; -#define v_clusters v_un1.v_cl.v_c -#define v_interlock v_un1.v_ilk - - struct lock__bsd__ *v_vnlock; /* used for non-locking fs's */ - long v_writecount; /* reference count of writers */ - enum vtagtype v_tag; /* type of underlying data */ - void *v_data; /* private data for fs */ -}; -#define v_mountedhere v_un.vu_mountedhere -#define v_socket v_un.vu_socket -#define v_specinfo v_un.vu_specinfo -#define v_fifoinfo v_un.vu_fifoinfo +#define VNOVAL (-1) -// NOTE: Do not use these macros. They are for vfs internal use only. -#define VNAME(vp) ((char *)((vp)->v_type == VREG ? (vp)->v_un.vu_name : (vp)->v_scmap)) -#define VPARENT(vp) ((struct vnode *)((vp)->v_type == VREG ? (vp)->v_un1.v_cl.v_pad : (vp)->v_scdirty)) +#ifdef KERNEL +/* + * Flags for ioflag. + */ +#define IO_UNIT 0x0001 /* do I/O as atomic unit */ +#define IO_APPEND 0x0002 /* append write to end */ +#define IO_SYNC 0x0004 /* do I/O synchronously */ +#define IO_NODELOCKED 0x0008 /* underlying node already locked */ +#define IO_NDELAY 0x0010 /* FNDELAY flag set in file table */ +#define IO_NOZEROFILL 0x0020 /* F_SETSIZE fcntl uses to prevent zero filling */ +#define IO_TAILZEROFILL 0x0040 /* zero fills at the tail of write */ +#define IO_HEADZEROFILL 0x0080 /* zero fills at the head of write */ +#define IO_NOZEROVALID 0x0100 /* do not zero fill if valid page */ +#define IO_NOZERODIRTY 0x0200 /* do not zero fill if page is dirty */ +#define IO_CLOSE 0x0400 /* I/O issued from close path */ +#define IO_NOCACHE 0x0800 /* same effect as VNOCACHE_DATA, but only for this 1 I/O */ +#define IO_RAOFF 0x1000 /* same effect as VRAOFF, but only for this 1 I/O */ +#define IO_DEFWRITE 0x2000 /* defer write if vfs.defwrite is set */ /* - * Vnode flags. + * Component Name: this structure describes the pathname + * information that is passed through the VNOP interface. */ -#define VROOT 0x000001 /* root of its file system */ -#define VTEXT 0x000002 /* vnode is a pure text prototype */ -#define VSYSTEM 0x000004 /* vnode being used by kernel */ -#define VISTTY 0x000008 /* vnode represents a tty */ -#define VWASMAPPED 0x000010 /* vnode was mapped before */ -#define VTERMINATE 0x000020 /* terminating memory object */ -#define VTERMWANT 0x000040 /* wating for memory object death */ -#define VMOUNT 0x000080 /* mount operation in progress */ -#define VXLOCK 0x000100 /* vnode is locked to change underlying type */ -#define VXWANT 0x000200 /* process is waiting for vnode */ -#define VBWAIT 0x000400 /* waiting for output to complete */ -#define VALIASED 0x000800 /* vnode has an alias */ -#define VORECLAIM 0x001000 /* vm object is being reclaimed */ -#define VNOCACHE_DATA 0x002000 /* don't keep data cached once it's been consumed */ -#define VSTANDARD 0x004000 /* vnode obtained from common pool */ -#define VAGE 0x008000 /* Insert vnode at head of free list */ -#define VRAOFF 0x010000 /* read ahead disabled */ -#define VUINIT 0x020000 /* ubc_info being initialized */ -#define VUWANT 0x040000 /* process is wating for VUINIT */ -#define VUINACTIVE 0x080000 /* UBC vnode is on inactive list */ -#define VHASDIRTY 0x100000 /* UBC vnode may have 1 or more */ - /* delayed dirty pages that need to be flushed at the next 'sync' */ -#define VSWAP 0x200000 /* vnode is being used as swapfile */ -#define VTHROTTLED 0x400000 /* writes or pageouts have been throttled */ - /* wakeup tasks waiting when count falls below threshold */ -#define VNOFLUSH 0x800000 /* don't vflush() if SKIPSYSTEM */ -#define VDELETED 0x1000000 /* this vnode is being deleted */ -#define VFULLFSYNC 0x2000000 /* ask the drive to write the data to the media */ -#define VHASBEENPAGED 0x4000000 /* vnode has been recently paged to */ +struct componentname { + /* + * Arguments to lookup. + */ + u_long cn_nameiop; /* lookup operation */ + u_long cn_flags; /* flags (see below) */ +#ifdef BSD_KERNEL_PRIVATE + vfs_context_t cn_context; + void * pad_obsolete2; + +/* XXX use of these defines are deprecated */ +#define cn_proc (cn_context->vc_proc + 0) /* non-lvalue */ +#define cn_cred (cn_context->vc_ucred + 0) /* non-lvalue */ + +#else + void * obsolete1; /* use vfs_context_t */ + void * obsolete2; /* use vfs_context_t */ +#endif + /* + * Shared between lookup and commit routines. + */ + char *cn_pnbuf; /* pathname buffer */ + long cn_pnlen; /* length of allocated buffer */ + char *cn_nameptr; /* pointer to looked up name */ + long cn_namelen; /* length of looked up component */ + u_long cn_hash; /* hash value of looked up name */ + long cn_consume; /* chars to consume in lookup() */ +}; /* - * Vnode attributes. A field value of VNOVAL represents a field whose value - * is unavailable (getattr) or which is not to be changed (setattr). + * component name operations (for VNOP_LOOKUP) */ -struct vattr { - enum vtype va_type; /* vnode type (for create) */ - u_short va_mode; /* files access mode and type */ - short va_nlink; /* number of references to file */ - uid_t va_uid; /* owner user id */ - gid_t va_gid; /* owner group id */ - long va_fsid; /* file system id (dev for now) */ - long va_fileid; /* file id */ - u_quad_t va_size; /* file size in bytes */ - long va_blocksize; /* blocksize preferred for i/o */ - struct timespec va_atime; /* time of last access */ - struct timespec va_mtime; /* time of last modification */ - struct timespec va_ctime; /* time file changed */ - u_long va_gen; /* generation number of file */ - u_long va_flags; /* flags defined for file */ - dev_t va_rdev; /* device the special file represents */ - u_quad_t va_bytes; /* bytes of disk space held by file */ - u_quad_t va_filerev; /* file modification number */ - u_int va_vaflags; /* operations flags, see below */ - long va_spare; /* remain quad aligned */ +#define LOOKUP 0 /* perform name lookup only */ +#define CREATE 1 /* setup for file creation */ +#define DELETE 2 /* setup for file deletion */ +#define RENAME 3 /* setup for file renaming */ +#define OPMASK 3 /* mask for operation */ + +/* + * component name operational modifier flags + */ +#define FOLLOW 0x0040 /* follow symbolic links */ + +/* + * component name parameter descriptors. + */ +#define ISDOTDOT 0x002000 /* current component name is .. */ +#define MAKEENTRY 0x004000 /* entry is to be added to name cache */ +#define ISLASTCN 0x008000 /* this is last component of pathname */ +#define ISWHITEOUT 0x020000 /* found whiteout */ +#define DOWHITEOUT 0x040000 /* do whiteouts */ + + + +/* The following structure specifies a vnode for creation */ +struct vnode_fsparam { + struct mount * vnfs_mp; /* mount point to which this vnode_t is part of */ + enum vtype vnfs_vtype; /* vnode type */ + const char * vnfs_str; /* File system Debug aid */ + struct vnode * vnfs_dvp; /* The parent vnode */ + void * vnfs_fsnode; /* inode */ + int (**vnfs_vops)(void *); /* vnode dispatch table */ + int vnfs_markroot; /* is this a root vnode in FS (not a system wide one) */ + int vnfs_marksystem; /* is a system vnode */ + dev_t vnfs_rdev; /* dev_t for block or char vnodes */ + off_t vnfs_filesize; /* that way no need for getattr in UBC */ + struct componentname * vnfs_cnp; /* component name to add to namecache */ + uint32_t vnfs_flags; /* flags */ }; +#define VNFS_NOCACHE 0x01 /* do not add to name cache at this time */ +#define VNFS_CANTCACHE 0x02 /* never add this instance to the name cache */ + +#define VNCREATE_FLAVOR 0 +#define VCREATESIZE sizeof(struct vnode_fsparam) + /* - * Flags for va_vaflags. + * Vnode attributes, new-style. + * + * The vnode_attr structure is used to transact attribute changes and queries + * with the filesystem. + * + * Note that this structure may be extended, but existing fields must not move. */ -#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ -#define VA_EXCLUSIVE 0x02 /* exclusive create request */ + +#define VATTR_INIT(v) do {(v)->va_supported = (v)->va_active = 0ll; (v)->va_vaflags = 0;} while(0) +#define VATTR_SET_ACTIVE(v, a) ((v)->va_active |= VNODE_ATTR_ ## a) +#define VATTR_SET_SUPPORTED(v, a) ((v)->va_supported |= VNODE_ATTR_ ## a) +#define VATTR_IS_SUPPORTED(v, a) ((v)->va_supported & VNODE_ATTR_ ## a) +#define VATTR_CLEAR_ACTIVE(v, a) ((v)->va_active &= ~VNODE_ATTR_ ## a) +#define VATTR_CLEAR_SUPPORTED(v, a) ((v)->va_supported &= ~VNODE_ATTR_ ## a) +#define VATTR_IS_ACTIVE(v, a) ((v)->va_active & VNODE_ATTR_ ## a) +#define VATTR_ALL_SUPPORTED(v) (((v)->va_active & (v)->va_supported) == (v)->va_active) +#define VATTR_INACTIVE_SUPPORTED(v) do {(v)->va_active &= ~(v)->va_supported; (v)->va_supported = 0;} while(0) +#define VATTR_SET(v, a, x) do { (v)-> a = (x); VATTR_SET_ACTIVE(v, a);} while(0) +#define VATTR_WANTED(v, a) VATTR_SET_ACTIVE(v, a) +#define VATTR_RETURN(v, a, x) do { (v)-> a = (x); VATTR_SET_SUPPORTED(v, a);} while(0) +#define VATTR_NOT_RETURNED(v, a) (VATTR_IS_ACTIVE(v, a) && !VATTR_IS_SUPPORTED(v, a)) /* - * Flags for ioflag. + * Two macros to simplify conditional checking in kernel code. + */ +#define VATTR_IS(v, a, x) (VATTR_IS_SUPPORTED(v, a) && (v)-> a == (x)) +#define VATTR_IS_NOT(v, a, x) (VATTR_IS_SUPPORTED(v, a) && (v)-> a != (x)) + +#define VNODE_ATTR_va_rdev (1LL<< 0) /* 00000001 */ +#define VNODE_ATTR_va_nlink (1LL<< 1) /* 00000002 */ +#define VNODE_ATTR_va_total_size (1LL<< 2) /* 00000004 */ +#define VNODE_ATTR_va_total_alloc (1LL<< 3) /* 00000008 */ +#define VNODE_ATTR_va_data_size (1LL<< 4) /* 00000010 */ +#define VNODE_ATTR_va_data_alloc (1LL<< 5) /* 00000020 */ +#define VNODE_ATTR_va_iosize (1LL<< 6) /* 00000040 */ +#define VNODE_ATTR_va_uid (1LL<< 7) /* 00000080 */ +#define VNODE_ATTR_va_gid (1LL<< 8) /* 00000100 */ +#define VNODE_ATTR_va_mode (1LL<< 9) /* 00000200 */ +#define VNODE_ATTR_va_flags (1LL<<10) /* 00000400 */ +#define VNODE_ATTR_va_acl (1LL<<11) /* 00000800 */ +#define VNODE_ATTR_va_create_time (1LL<<12) /* 00001000 */ +#define VNODE_ATTR_va_access_time (1LL<<13) /* 00002000 */ +#define VNODE_ATTR_va_modify_time (1LL<<14) /* 00004000 */ +#define VNODE_ATTR_va_change_time (1LL<<15) /* 00008000 */ +#define VNODE_ATTR_va_backup_time (1LL<<16) /* 00010000 */ +#define VNODE_ATTR_va_fileid (1LL<<17) /* 00020000 */ +#define VNODE_ATTR_va_linkid (1LL<<18) /* 00040000 */ +#define VNODE_ATTR_va_parentid (1LL<<19) /* 00080000 */ +#define VNODE_ATTR_va_fsid (1LL<<20) /* 00100000 */ +#define VNODE_ATTR_va_filerev (1LL<<21) /* 00200000 */ +#define VNODE_ATTR_va_gen (1LL<<22) /* 00400000 */ +#define VNODE_ATTR_va_encoding (1LL<<23) /* 00800000 */ +#define VNODE_ATTR_va_type (1LL<<24) /* 01000000 */ +#define VNODE_ATTR_va_name (1LL<<25) /* 02000000 */ +#define VNODE_ATTR_va_uuuid (1LL<<26) /* 04000000 */ +#define VNODE_ATTR_va_guuid (1LL<<27) /* 08000000 */ +#define VNODE_ATTR_va_nchildren (1LL<<28) /* 10000000 */ + +#define VNODE_ATTR_BIT(n) (VNODE_ATTR_ ## n) +/* + * Read-only attributes. + */ +#define VNODE_ATTR_RDONLY (VNODE_ATTR_BIT(va_rdev) | \ + VNODE_ATTR_BIT(va_nlink) | \ + VNODE_ATTR_BIT(va_total_size) | \ + VNODE_ATTR_BIT(va_total_alloc) | \ + VNODE_ATTR_BIT(va_data_alloc) | \ + VNODE_ATTR_BIT(va_iosize) | \ + VNODE_ATTR_BIT(va_fileid) | \ + VNODE_ATTR_BIT(va_linkid) | \ + VNODE_ATTR_BIT(va_parentid) | \ + VNODE_ATTR_BIT(va_fsid) | \ + VNODE_ATTR_BIT(va_filerev) | \ + VNODE_ATTR_BIT(va_gen) | \ + VNODE_ATTR_BIT(va_name) | \ + VNODE_ATTR_BIT(va_type) | \ + VNODE_ATTR_BIT(va_nchildren)) +/* + * Attributes that can be applied to a new file object. */ -#define IO_UNIT 0x01 /* do I/O as atomic unit */ -#define IO_APPEND 0x02 /* append write to end */ -#define IO_SYNC 0x04 /* do I/O synchronously */ -#define IO_NODELOCKED 0x08 /* underlying node already locked */ -#define IO_NDELAY 0x10 /* FNDELAY flag set in file table */ -#define IO_NOZEROFILL 0x20 /* F_SETSIZE fcntl uses to prevent zero filling */ -#define IO_TAILZEROFILL 0x40 /* zero fills at the tail of write */ -#define IO_HEADZEROFILL 0x80 /* zero fills at the head of write */ -#define IO_NOZEROVALID 0x100 /* do not zero fill if valid page */ -#define IO_NOZERODIRTY 0x200 /* do not zero fill if page is dirty */ +#define VNODE_ATTR_NEWOBJ (VNODE_ATTR_BIT(va_rdev) | \ + VNODE_ATTR_BIT(va_uid) | \ + VNODE_ATTR_BIT(va_gid) | \ + VNODE_ATTR_BIT(va_mode) | \ + VNODE_ATTR_BIT(va_flags) | \ + VNODE_ATTR_BIT(va_acl) | \ + VNODE_ATTR_BIT(va_create_time) | \ + VNODE_ATTR_BIT(va_modify_time) | \ + VNODE_ATTR_BIT(va_change_time) | \ + VNODE_ATTR_BIT(va_encoding) | \ + VNODE_ATTR_BIT(va_type) | \ + VNODE_ATTR_BIT(va_uuuid) | \ + VNODE_ATTR_BIT(va_guuid)) + +struct vnode_attr { + /* bitfields */ + uint64_t va_supported; + uint64_t va_active; + + /* + * Control flags. The low 16 bits are reserved for the + * ioflags being passed for truncation operations. + */ + int va_vaflags; + + /* traditional stat(2) parameter fields */ + dev_t va_rdev; /* device id (device nodes only) */ + uint64_t va_nlink; /* number of references to this file */ + uint64_t va_total_size; /* size in bytes of all forks */ + uint64_t va_total_alloc; /* disk space used by all forks */ + uint64_t va_data_size; /* size in bytes of the main(data) fork */ + uint64_t va_data_alloc; /* disk space used by the main(data) fork */ + uint32_t va_iosize; /* optimal I/O blocksize */ + + /* file security information */ + uid_t va_uid; /* owner UID */ + gid_t va_gid; /* owner GID */ + mode_t va_mode; /* posix permissions */ + uint32_t va_flags; /* file flags */ + struct kauth_acl *va_acl; /* access control list */ + + /* timestamps */ + struct timespec va_create_time; /* time of creation */ + struct timespec va_access_time; /* time of last access */ + struct timespec va_modify_time; /* time of last data modification */ + struct timespec va_change_time; /* time of last metadata change */ + struct timespec va_backup_time; /* time of last backup */ + + /* file parameters */ + uint64_t va_fileid; /* file unique ID in filesystem */ + uint64_t va_linkid; /* file link unique ID */ + uint64_t va_parentid; /* parent ID */ + uint32_t va_fsid; /* filesystem ID */ + uint64_t va_filerev; /* file revision counter */ /* XXX */ + uint32_t va_gen; /* file generation count */ /* XXX - relationship of + * these two? */ + /* misc parameters */ + uint32_t va_encoding; /* filename encoding script */ + + enum vtype va_type; /* file type (create only) */ + char * va_name; /* Name for ATTR_CMN_NAME; MAXPATHLEN bytes */ + guid_t va_uuuid; /* file owner UUID */ + guid_t va_guuid; /* file group UUID */ + + uint64_t va_nchildren; /* Number of items in a directory */ + /* Meaningful for directories only */ + + /* add new fields here only */ +}; /* - * Modes. Some values same as Ixxx entries from inode.h for now. + * Flags for va_vaflags. */ -#define VSUID 04000 /* set user id on execution */ -#define VSGID 02000 /* set group id on execution */ -#define VSVTX 01000 /* save swapped text even after use */ -#define VREAD 00400 /* read, write, execute permissions */ -#define VWRITE 00200 -#define VEXEC 00100 +#define VA_UTIMES_NULL 0x010000 /* utimes argument was NULL */ +#define VA_EXCLUSIVE 0x020000 /* exclusive create request */ + + /* - * Token indicating no attribute value yet assigned. + * Modes. Some values same as Ixxx entries from inode.h for now. */ -#define VNOVAL (-1) +#define VSUID 0x800 /*04000*/ /* set user id on execution */ +#define VSGID 0x400 /*02000*/ /* set group id on execution */ +#define VSVTX 0x200 /*01000*/ /* save swapped text even after use */ +#define VREAD 0x100 /*00400*/ /* read, write, execute permissions */ +#define VWRITE 0x080 /*00200*/ +#define VEXEC 0x040 /*00100*/ + -#ifdef KERNEL /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). @@ -277,6 +392,7 @@ extern int vttoif_tab[]; #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) + /* * Flags to various vnode functions. */ @@ -284,146 +400,37 @@ extern int vttoif_tab[]; #define FORCECLOSE 0x0002 /* vflush: force file closeure */ #define WRITECLOSE 0x0004 /* vflush: only close writeable files */ #define SKIPSWAP 0x0008 /* vflush: skip vnodes marked VSWAP */ +#define SKIPROOT 0x0010 /* vflush: skip root vnodes marked VROOT */ #define DOCLOSE 0x0008 /* vclean: close active files */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */ -#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ +#define REVOKEALL 0x0001 /* vnop_revoke: revoke all aliases */ -/* flags for vop_allocate */ -#define PREALLOCATE 0x00000001 /* preallocate allocation blocks */ -#define ALLOCATECONTIG 0x00000002 /* allocate contigious space */ -#define ALLOCATEALL 0x00000004 /* allocate all requested space */ - /* or no space at all */ -#define FREEREMAINDER 0x00000008 /* deallocate allocated but */ - /* unfilled blocks */ -#define ALLOCATEFROMPEOF 0x00000010 /* allocate from the physical eof */ -#define ALLOCATEFROMVOL 0x00000020 /* allocate from the volume offset */ +/* VNOP_REMOVE: do not delete busy files (Carbon remove file semantics) */ +#define VNODE_REMOVE_NODELETEBUSY 0x0001 -#if DIAGNOSTIC -#define VATTR_NULL(vap) vattr_null(vap) -#define HOLDRELE(vp) holdrele(vp) -#define VHOLD(vp) vhold(vp) +/* VNOP_READDIR flags: */ +#define VNODE_READDIR_EXTENDED 0x0001 /* use extended directory entries */ +#define VNODE_READDIR_REQSEEKOFF 0x0002 /* requires seek offset (cookies) */ -void holdrele __P((struct vnode *)); -void vattr_null __P((struct vattr *)); -void vhold __P((struct vnode *)); -#else -#define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ -#define HOLDRELE(vp) holdrele(vp) /* decrease buf or page ref */ -extern __inline void holdrele(struct vnode *vp) -{ - simple_lock(&vp->v_interlock); - vp->v_holdcnt--; - simple_unlock(&vp->v_interlock); -} -#define VHOLD(vp) vhold(vp) /* increase buf or page ref */ -extern __inline void vhold(struct vnode *vp) -{ - simple_lock(&vp->v_interlock); - if (++vp->v_holdcnt <= 0) - panic("vhold: v_holdcnt"); - simple_unlock(&vp->v_interlock); -} -#endif /* DIAGNOSTIC */ - -#define VREF(vp) vref(vp) -void vref __P((struct vnode *)); -#define NULLVP ((struct vnode *)NULL) -/* - * Global vnode data. - */ -extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ -extern int desiredvnodes; /* number of vnodes desired */ -extern struct vattr va_null; /* predefined null vattr structure */ +#define NULLVP ((struct vnode *)NULL) /* * Macro/function to check for client cache inconsistency w.r.t. leasing. */ #define LEASE_READ 0x1 /* Check lease for readers */ #define LEASE_WRITE 0x2 /* Check lease for modifiers */ -#endif /* KERNEL */ - -/* - * Mods for exensibility. - */ - -/* - * Flags for vdesc_flags: - */ -#define VDESC_MAX_VPS 16 -/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ -#define VDESC_VP0_WILLRELE 0x0001 -#define VDESC_VP1_WILLRELE 0x0002 -#define VDESC_VP2_WILLRELE 0x0004 -#define VDESC_VP3_WILLRELE 0x0008 -#define VDESC_NOMAP_VPP 0x0100 -#define VDESC_VPP_WILLRELE 0x0200 -/* - * VDESC_NO_OFFSET is used to identify the end of the offset list - * and in places where no such field exists. - */ -#define VDESC_NO_OFFSET -1 -/* - * This structure describes the vnode operation taking place. - */ -struct vnodeop_desc { - int vdesc_offset; /* offset in vector--first for speed */ - char *vdesc_name; /* a readable name for debugging */ - int vdesc_flags; /* VDESC_* flags */ - - /* - * These ops are used by bypass routines to map and locate arguments. - * Creds and procs are not needed in bypass routines, but sometimes - * they are useful to (for example) transport layers. - * Nameidata is useful because it has a cred in it. - */ - int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ - int vdesc_vpp_offset; /* return vpp location */ - int vdesc_cred_offset; /* cred location, if any */ - int vdesc_proc_offset; /* proc location, if any */ - int vdesc_componentname_offset; /* if any */ - /* - * Finally, we've got a list of private data (about each operation) - * for each transport layer. (Support to manage this list is not - * yet part of BSD.) - */ - caddr_t *vdesc_transports; -}; +#ifndef BSD_KERNEL_PRIVATE +struct vnodeop_desc; +#endif -#endif /* __APPLE_API_PRIVATE */ - -#ifdef KERNEL - -#ifdef __APPLE_API_PRIVATE -/* - * A list of all the operation descs. - */ -extern struct vnodeop_desc *vnodeop_descs[]; - -/* - * Interlock for scanning list of vnodes attached to a mountpoint - */ -extern struct slock mntvnode_slock; - -/* - * This macro is very helpful in defining those offsets in the vdesc struct. - * - * This is stolen from X11R4. I ingored all the fancy stuff for - * Crays, so if you decide to port this to such a serious machine, - * you might want to consult Intrisics.h's XtOffset{,Of,To}. - */ -#define VOPARG_OFFSET(p_type,field) \ - ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL))) -#define VOPARG_OFFSETOF(s_type,field) \ - VOPARG_OFFSET(s_type*,field) -#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \ - ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET))) +extern int desiredvnodes; /* number of vnodes desired */ /* @@ -442,94 +449,195 @@ struct vnodeopv_desc { /* * A default routine which just returns an error. */ -int vn_default_error __P((void)); +int vn_default_error(void); /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ -struct vop_generic_args { +struct vnop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; +#ifndef _KAUTH_ACTION_T +typedef int kauth_action_t; +# define _KAUTH_ACTION_T +#endif + +#include + +__BEGIN_DECLS + +errno_t vnode_create(int, size_t, void *, vnode_t *); +int vnode_addfsref(vnode_t); +int vnode_removefsref(vnode_t); + +int vnode_hasdirtyblks(vnode_t); +int vnode_hascleanblks(vnode_t); +#define VNODE_ASYNC_THROTTLE 18 +/* timeout is in 10 msecs and not hz tick based */ +int vnode_waitforwrites(vnode_t, int, int, int, char *); +void vnode_startwrite(vnode_t); +void vnode_writedone(vnode_t); + +enum vtype vnode_vtype(vnode_t); +uint32_t vnode_vid(vnode_t); +mount_t vnode_mountedhere(vnode_t vp); +mount_t vnode_mount(vnode_t); +dev_t vnode_specrdev(vnode_t); +void * vnode_fsnode(vnode_t); +void vnode_clearfsnode(vnode_t); + +int vnode_isvroot(vnode_t); +int vnode_issystem(vnode_t); +int vnode_ismount(vnode_t); +int vnode_isreg(vnode_t); +int vnode_isdir(vnode_t); +int vnode_islnk(vnode_t); +int vnode_isfifo(vnode_t); +int vnode_isblk(vnode_t); +int vnode_ischr(vnode_t); + +int vnode_ismountedon(vnode_t); +void vnode_setmountedon(vnode_t); +void vnode_clearmountedon(vnode_t); + +int vnode_isnocache(vnode_t); +void vnode_setnocache(vnode_t); +void vnode_clearnocache(vnode_t); +int vnode_isnoreadahead(vnode_t); +void vnode_setnoreadahead(vnode_t); +void vnode_clearnoreadahead(vnode_t); +/* left only for compat reasons as User code depends on this from getattrlist, for ex */ +void vnode_settag(vnode_t, int); +int vnode_tag(vnode_t); +int vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); +int vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); + +#ifdef BSD_KERNEL_PRIVATE + /* - * VOCALL calls an op given an ops vector. We break it out because BSD's - * vclean changes the ops vector and then wants to call ops with the old - * vector. + * Indicate that a file has multiple hard links. VFS will always call + * VNOP_LOOKUP on this vnode. Volfs will always ask for it's parent + * object ID (instead of using the v_parent pointer). */ -#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP)) +void vnode_set_hard_link(vnode_t vp); + +vnode_t vnode_parent(vnode_t); +void vnode_setparent(vnode_t, vnode_t); +char * vnode_name(vnode_t); +void vnode_setname(vnode_t, char *); +int vnode_isnoflush(vnode_t); +void vnode_setnoflush(vnode_t); +void vnode_clearnoflush(vnode_t); +#endif + +uint32_t vnode_vfsmaxsymlen(vnode_t); +int vnode_vfsisrdonly(vnode_t); +int vnode_vfstypenum(vnode_t); +void vnode_vfsname(vnode_t, char *); +int vnode_vfs64bitready(vnode_t); + +proc_t vfs_context_proc(vfs_context_t); +ucred_t vfs_context_ucred(vfs_context_t); +int vfs_context_issuser(vfs_context_t); +int vfs_context_pid(vfs_context_t); +int vfs_context_issignal(vfs_context_t, sigset_t); +int vfs_context_suser(vfs_context_t); +int vfs_context_is64bit(vfs_context_t); +vfs_context_t vfs_context_create(vfs_context_t); +int vfs_context_rele(vfs_context_t); + + +int vflush(struct mount *mp, struct vnode *skipvp, int flags); +int vnode_get(vnode_t); +int vnode_getwithvid(vnode_t, int); +int vnode_put(vnode_t); +int vnode_ref(vnode_t); +void vnode_rele(vnode_t); +int vnode_isinuse(vnode_t, int); +void vnode_lock(vnode_t); +void vnode_unlock(vnode_t); +int vnode_recycle(vnode_t); +void vnode_reclaim(vnode_t); + +#define VNODE_UPDATE_PARENT 0x01 +#define VNODE_UPDATE_NAME 0x02 +#define VNODE_UPDATE_CACHE 0x04 +void vnode_update_identity(vnode_t vp, vnode_t dvp, char *name, int name_len, int name_hashval, int flags); + +int vn_bwrite(struct vnop_bwrite_args *ap); + +int vnode_authorize(vnode_t /*vp*/, vnode_t /*dvp*/, kauth_action_t, vfs_context_t); +int vnode_authattr(vnode_t, struct vnode_attr *, kauth_action_t *, vfs_context_t); +int vnode_authattr_new(vnode_t /*dvp*/, struct vnode_attr *, int /*noauth*/, vfs_context_t); +errno_t vnode_close(vnode_t, int, vfs_context_t); + +int vn_getpath(struct vnode *vp, char *pathbuf, int *len); /* - * This call works for vnodes in the kernel. + * Flags for the vnode_lookup and vnode_open */ -#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP)) -#define VDESC(OP) (& __CONCAT(OP,_desc)) -#define VOFFSET(OP) (VDESC(OP)->vdesc_offset) +#define VNODE_LOOKUP_NOFOLLOW 0x01 +#define VNODE_LOOKUP_NOCROSSMOUNT 0x02 +#define VNODE_LOOKUP_DOWHITEOUT 0x04 -#endif /* __APPLE_API_PRIVATE */ +errno_t vnode_lookup(const char *, int, vnode_t *, vfs_context_t); +errno_t vnode_open(const char *, int, int, int, vnode_t *, vfs_context_t); /* - * Finally, include the default set of vnode operations. + * exported vnode operations */ -#include +int vnode_iterate(struct mount *, int, int (*)(struct vnode *, void *), void *); /* - * vnode manipulation functions. + * flags passed into vnode_iterate */ -struct file; -struct mount; -struct nameidata; -struct ostat; -struct proc; +#define VNODE_RELOAD 0x01 +#define VNODE_WAIT 0x02 +#define VNODE_WRITEABLE 0x04 +#define VNODE_WITHID 0x08 +#define VNODE_NOLOCK_INTERNAL 0x10 +#define VNODE_NODEAD 0x20 +#define VNODE_NOSUSPEND 0x40 +#define VNODE_ITERATE_ALL 0x80 +#define VNODE_ITERATE_ACTIVE 0x100 +#define VNODE_ITERATE_INACTIVE 0x200 + +/* + * return values from callback + */ +#define VNODE_RETURNED 0 /* done with vnode, reference can be dropped */ +#define VNODE_RETURNED_DONE 1 /* done with vnode, reference can be dropped, terminate iteration */ +#define VNODE_CLAIMED 2 /* don't drop reference */ +#define VNODE_CLAIMED_DONE 3 /* don't drop reference, terminate iteration */ + + struct stat; -struct ucred; -struct uio; -struct vattr; -struct vnode; -struct vop_bwrite_args; - -#ifdef __APPLE_API_EVOLVING -int bdevvp __P((dev_t dev, struct vnode **vpp)); -void cvtstat __P((struct stat *st, struct ostat *ost)); -int getnewvnode __P((enum vtagtype tag, - struct mount *mp, int (**vops)(void *), struct vnode **vpp)); -void insmntque __P((struct vnode *vp, struct mount *mp)); -void vattr_null __P((struct vattr *vap)); -int vcount __P((struct vnode *vp)); -int vflush __P((struct mount *mp, struct vnode *skipvp, int flags)); -int vget __P((struct vnode *vp, int lockflag, struct proc *p)); -void vgone __P((struct vnode *vp)); -int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred, - struct proc *p, int slpflag, int slptimeo)); -void vprint __P((char *label, struct vnode *vp)); -int vrecycle __P((struct vnode *vp, struct slock *inter_lkp, - struct proc *p)); -int vn_bwrite __P((struct vop_bwrite_args *ap)); -int vn_close __P((struct vnode *vp, - int flags, struct ucred *cred, struct proc *p)); -int vn_lock __P((struct vnode *vp, int flags, struct proc *p)); -int vn_open __P((struct nameidata *ndp, int fmode, int cmode)); -#ifndef __APPLE_API_PRIVATE -__private_extern__ int - vn_open_modflags __P((struct nameidata *ndp, int *fmode, int cmode)); -#endif /* __APPLE_API_PRIVATE */ -int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, - int len, off_t offset, enum uio_seg segflg, int ioflg, - struct ucred *cred, int *aresid, struct proc *p)); -int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); -int vop_noislocked __P((struct vop_islocked_args *)); -int vop_nolock __P((struct vop_lock_args *)); -int vop_nounlock __P((struct vop_unlock_args *)); -int vop_revoke __P((struct vop_revoke_args *)); -struct vnode * - checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp)); -void vput __P((struct vnode *vp)); -void vrele __P((struct vnode *vp)); -int vaccess __P((mode_t file_mode, uid_t uid, gid_t gid, - mode_t acc_mode, struct ucred *cred)); -int getvnode __P((struct proc *p, int fd, struct file **fpp)); -#endif /* __APPLE_API_EVOLVING */ +int vn_stat(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx); +int vn_stat_noauth(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx); +int vn_revoke(vnode_t vp, int flags, vfs_context_t); +/* XXX BOGUS */ +int vaccess(mode_t file_mode, uid_t uid, gid_t gid, + mode_t acc_mode, struct ucred *cred); + + +/* namecache function prototypes */ +int cache_lookup(vnode_t dvp, vnode_t *vpp, struct componentname *cnp); +void cache_enter(vnode_t dvp, vnode_t vp, struct componentname *cnp); +void cache_purge(vnode_t vp); +void cache_purge_negatives(vnode_t vp); + +/* + * Global string-cache routines. You can pass zero for nc_hash + * if you don't know it (add_name() will then compute the hash). + * There are no flags for now but maybe someday. + */ +char *vfs_addname(const char *name, size_t len, u_int nc_hash, u_int flags); +int vfs_removename(const char *name); + +__END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h index abfc5beb5..aa1201b5f 100644 --- a/bsd/sys/vnode_if.h +++ b/bsd/sys/vnode_if.h @@ -68,685 +68,507 @@ #define _SYS_VNODE_IF_H_ #include - -#ifdef __APPLE_API_UNSTABLE -extern struct vnodeop_desc vop_default_desc; - - -struct vop_lookup_args { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; -}; -extern struct vnodeop_desc vop_lookup_desc; -#define VOP_LOOKUP(dvp, vpp, cnp) _VOP_LOOKUP(dvp, vpp, cnp) -static __inline int _VOP_LOOKUP(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) -{ - struct vop_lookup_args a; - a.a_desc = VDESC(vop_lookup); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - return (VCALL(dvp, VOFFSET(vop_lookup), &a)); -} - -struct vop_cachedlookup_args { +#include +#include +#include +#ifdef BSD_KERNEL_PRIVATE +#include +#endif +#include + + +#ifdef KERNEL + +extern struct vnodeop_desc vnop_default_desc; +extern struct vnodeop_desc vnop_lookup_desc; +extern struct vnodeop_desc vnop_create_desc; +extern struct vnodeop_desc vnop_whiteout_desc; +extern struct vnodeop_desc vnop_mknod_desc; +extern struct vnodeop_desc vnop_open_desc; +extern struct vnodeop_desc vnop_close_desc; +extern struct vnodeop_desc vnop_access_desc; +extern struct vnodeop_desc vnop_getattr_desc; +extern struct vnodeop_desc vnop_setattr_desc; +extern struct vnodeop_desc vnop_getattrlist_desc; +extern struct vnodeop_desc vnop_setattrlist_desc; +extern struct vnodeop_desc vnop_read_desc; +extern struct vnodeop_desc vnop_write_desc; +extern struct vnodeop_desc vnop_ioctl_desc; +extern struct vnodeop_desc vnop_select_desc; +extern struct vnodeop_desc vnop_exchange_desc; +extern struct vnodeop_desc vnop_revoke_desc; +extern struct vnodeop_desc vnop_mmap_desc; +extern struct vnodeop_desc vnop_mnomap_desc; +extern struct vnodeop_desc vnop_fsync_desc; +extern struct vnodeop_desc vnop_remove_desc; +extern struct vnodeop_desc vnop_link_desc; +extern struct vnodeop_desc vnop_rename_desc; +extern struct vnodeop_desc vnop_mkdir_desc; +extern struct vnodeop_desc vnop_rmdir_desc; +extern struct vnodeop_desc vnop_symlink_desc; +extern struct vnodeop_desc vnop_readdir_desc; +extern struct vnodeop_desc vnop_readdirattr_desc; +extern struct vnodeop_desc vnop_readlink_desc; +extern struct vnodeop_desc vnop_inactive_desc; +extern struct vnodeop_desc vnop_reclaim_desc; +extern struct vnodeop_desc vnop_print_desc; +extern struct vnodeop_desc vnop_pathconf_desc; +extern struct vnodeop_desc vnop_advlock_desc; +extern struct vnodeop_desc vnop_truncate_desc; +extern struct vnodeop_desc vnop_allocate_desc; +extern struct vnodeop_desc vnop_pagein_desc; +extern struct vnodeop_desc vnop_pageout_desc; +extern struct vnodeop_desc vnop_devblocksize_desc; +extern struct vnodeop_desc vnop_searchfs_desc; +extern struct vnodeop_desc vnop_copyfile_desc; +extern struct vnodeop_desc vnop_blktooff_desc; +extern struct vnodeop_desc vnop_offtoblk_desc; +extern struct vnodeop_desc vnop_blockmap_desc; +extern struct vnodeop_desc vnop_strategy_desc; +extern struct vnodeop_desc vnop_bwrite_desc; + +__BEGIN_DECLS +/* + *# + *#% lookup dvp L ? ? + *#% lookup vpp - L - + */ +struct vnop_lookup_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_cachedlookup_desc; -#define VOP_CACHEDLOOKUP(dvp, vpp, cnp) _VOP_CACHEDLOOKUP(dvp, vpp, cnp) -static __inline int _VOP_CACHEDLOOKUP(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) -{ - struct vop_cachedlookup_args a; - a.a_desc = VDESC(vop_cachedlookup); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - return (VCALL(dvp, VOFFSET(vop_cachedlookup), &a)); -} - -struct vop_create_args { +extern errno_t VNOP_LOOKUP(vnode_t, vnode_t *, struct componentname *, vfs_context_t); + + +/* + *# + *#% create dvp L L L + *#% create vpp - L - + *# + */ + +struct vnop_create_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_create_desc; -#define VOP_CREATE(dvp, vpp, cnp, vap) _VOP_CREATE(dvp, vpp, cnp, vap) -static __inline int _VOP_CREATE(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) -{ - struct vop_create_args a; - a.a_desc = VDESC(vop_create); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - a.a_vap = vap; - return (VCALL(dvp, VOFFSET(vop_create), &a)); -} - -struct vop_whiteout_args { +extern errno_t VNOP_CREATE(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); + +/* + *# + *#% whiteout dvp L L L + *#% whiteout cnp - - - + *#% whiteout flag - - - + *# + */ +struct vnop_whiteout_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; + vnode_t a_dvp; struct componentname *a_cnp; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_whiteout_desc; -#define VOP_WHITEOUT(dvp, cnp, flags) _VOP_WHITEOUT(dvp, cnp, flags) -static __inline int _VOP_WHITEOUT(struct vnode *dvp, struct componentname *cnp, int flags) -{ - struct vop_whiteout_args a; - a.a_desc = VDESC(vop_whiteout); - a.a_dvp = dvp; - a.a_cnp = cnp; - a.a_flags = flags; - return (VCALL(dvp, VOFFSET(vop_whiteout), &a)); -} - -struct vop_mknod_args { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; -}; -extern struct vnodeop_desc vop_mknod_desc; -#define VOP_MKNOD(dvp, vpp, cnp, vap) _VOP_MKNOD(dvp, vpp, cnp, vap) -static __inline int _VOP_MKNOD(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) -{ - struct vop_mknod_args a; - a.a_desc = VDESC(vop_mknod); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - a.a_vap = vap; - return (VCALL(dvp, VOFFSET(vop_mknod), &a)); -} - -struct vop_mkcomplex_args { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - u_long a_type; +extern errno_t VNOP_WHITEOUT(vnode_t, struct componentname *, int, vfs_context_t); + +/* + *# + *#% mknod dvp L U U + *#% mknod vpp - X - + *# + */ +struct vnop_mknod_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_mkcomplex_desc; -#define VOP_MKCOMPLEX(dvp, vpp, cnp, vap, type) _VOP_MKCOMPLEX(dvp, vpp, cnp, vap, type) -static __inline int _VOP_MKCOMPLEX(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap, u_long type) -{ - struct vop_mkcomplex_args a; - a.a_desc = VDESC(vop_mkcomplex); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - a.a_vap = vap; - a.a_type = type; - return (VCALL(dvp, VOFFSET(vop_mkcomplex), &a)); -} - -struct vop_open_args { +extern errno_t VNOP_MKNOD(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); + +/* + *# + *#% open vp L L L + *# + */ +struct vnop_open_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_open_desc; -#define VOP_OPEN(vp, mode, cred, p) _VOP_OPEN(vp, mode, cred, p) -static __inline int _VOP_OPEN(struct vnode *vp, int mode, struct ucred *cred, struct proc *p) -{ - struct vop_open_args a; - a.a_desc = VDESC(vop_open); - a.a_vp = vp; - a.a_mode = mode; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_open), &a)); -} - -struct vop_close_args { +extern errno_t VNOP_OPEN(vnode_t, int, vfs_context_t); + +/* + *# + *#% close vp U U U + *# + */ +struct vnop_close_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_close_desc; -#define VOP_CLOSE(vp, fflag, cred, p) _VOP_CLOSE(vp, fflag, cred, p) -static __inline int _VOP_CLOSE(struct vnode *vp, int fflag, struct ucred *cred, struct proc *p) -{ - struct vop_close_args a; - a.a_desc = VDESC(vop_close); - a.a_vp = vp; - a.a_fflag = fflag; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_close), &a)); -} - -struct vop_access_args { +extern errno_t VNOP_CLOSE(vnode_t, int, vfs_context_t); + +/* + *# + *#% access vp L L L + *# + */ +struct vnop_access_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vnode_t a_vp; + int a_action; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_access_desc; -#define VOP_ACCESS(vp, mode, cred, p) _VOP_ACCESS(vp, mode, cred, p) -static __inline int _VOP_ACCESS(struct vnode *vp, int mode, struct ucred *cred, struct proc *p) -{ - struct vop_access_args a; - a.a_desc = VDESC(vop_access); - a.a_vp = vp; - a.a_mode = mode; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_access), &a)); -} - -struct vop_getattr_args { +extern errno_t VNOP_ACCESS(vnode_t, int, vfs_context_t); + + +/* + *# + *#% getattr vp = = = + *# + */ +struct vnop_getattr_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_getattr_desc; -#define VOP_GETATTR(vp, vap, cred, p) _VOP_GETATTR(vp, vap, cred, p) -static __inline int _VOP_GETATTR(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct proc *p) -{ - struct vop_getattr_args a; - a.a_desc = VDESC(vop_getattr); - a.a_vp = vp; - a.a_vap = vap; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_getattr), &a)); -} - -struct vop_setattr_args { +extern errno_t VNOP_GETATTR(vnode_t, struct vnode_attr *, vfs_context_t); + +/* + *# + *#% setattr vp L L L + *# + */ +struct vnop_setattr_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_setattr_desc; -#define VOP_SETATTR(vp, vap, cred, p) _VOP_SETATTR(vp, vap, cred, p) -static __inline int _VOP_SETATTR(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct proc *p) -{ - struct vop_setattr_args a; - a.a_desc = VDESC(vop_setattr); - a.a_vp = vp; - a.a_vap = vap; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_setattr), &a)); -} - -struct vop_getattrlist_args { +extern errno_t VNOP_SETATTR(vnode_t, struct vnode_attr *, vfs_context_t); + +/* + *# + *#% getattrlist vp = = = + *# + */ +struct vnop_getattrlist_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_getattrlist_desc; -#define VOP_GETATTRLIST(vp, alist, uio, cred, p) _VOP_GETATTRLIST(vp, alist, uio, cred, p) -static __inline int _VOP_GETATTRLIST(struct vnode *vp, struct attrlist *alist, struct uio *uio, struct ucred *cred, struct proc *p) -{ - struct vop_getattrlist_args a; - a.a_desc = VDESC(vop_getattrlist); - a.a_vp = vp; - a.a_alist = alist; - a.a_uio = uio; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_getattrlist), &a)); -} - -struct vop_setattrlist_args { +extern errno_t VNOP_GETATTRLIST(vnode_t, struct attrlist *, struct uio *, int, vfs_context_t); + + +/* + *# + *#% setattrlist vp L L L + *# + */ +struct vnop_setattrlist_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_setattrlist_desc; -#define VOP_SETATTRLIST(vp, alist, uio, cred, p) _VOP_SETATTRLIST(vp, alist, uio, cred, p) -static __inline int _VOP_SETATTRLIST(struct vnode *vp, struct attrlist *alist, struct uio *uio, struct ucred *cred, struct proc *p) -{ - struct vop_setattrlist_args a; - a.a_desc = VDESC(vop_setattrlist); - a.a_vp = vp; - a.a_alist = alist; - a.a_uio = uio; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_setattrlist), &a)); -} - -struct vop_read_args { +extern errno_t VNOP_SETATTRLIST(vnode_t, struct attrlist *, struct uio *, int, vfs_context_t); + + +/* + *# + *#% read vp L L L + *# + */ +struct vnop_read_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_read_desc; -#define VOP_READ(vp, uio, ioflag, cred) _VOP_READ(vp, uio, ioflag, cred) -static __inline int _VOP_READ(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) -{ - struct vop_read_args a; - a.a_desc = VDESC(vop_read); - a.a_vp = vp; - a.a_uio = uio; - a.a_ioflag = ioflag; - a.a_cred = cred; - { - int _err; - extern int ubc_hold(struct vnode *vp); - extern void ubc_rele(struct vnode *vp); - int _didhold = ubc_hold(vp); - _err = VCALL(vp, VOFFSET(vop_read), &a); - if (_didhold) - ubc_rele(vp); - return (_err); - } -} - -struct vop_write_args { +extern errno_t VNOP_READ(vnode_t, struct uio *, int, vfs_context_t); + + +/* + *# + *#% write vp L L L + *# + */ +struct vnop_write_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_write_desc; -#define VOP_WRITE(vp, uio, ioflag, cred) _VOP_WRITE(vp, uio, ioflag, cred) -static __inline int _VOP_WRITE(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) -{ - struct vop_write_args a; - a.a_desc = VDESC(vop_write); - a.a_vp = vp; - a.a_uio = uio; - a.a_ioflag = ioflag; - a.a_cred = cred; - { - int _err; - extern int ubc_hold(struct vnode *vp); - extern void ubc_rele(struct vnode *vp); - int _didhold = ubc_hold(vp); - _err = VCALL(vp, VOFFSET(vop_write), &a); - if (_didhold) - ubc_rele(vp); - return (_err); - } -} - -struct vop_lease_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; -}; -extern struct vnodeop_desc vop_lease_desc; -#define VOP_LEASE(vp, p, cred, flag) _VOP_LEASE(vp, p, cred, flag) -static __inline int _VOP_LEASE(struct vnode *vp, struct proc *p, struct ucred *cred, int flag) -{ - struct vop_lease_args a; - a.a_desc = VDESC(vop_lease); - a.a_vp = vp; - a.a_p = p; - a.a_cred = cred; - a.a_flag = flag; - return (VCALL(vp, VOFFSET(vop_lease), &a)); -} - -struct vop_ioctl_args { +extern errno_t VNOP_WRITE(vnode_t, struct uio *, int, vfs_context_t); + + +/* + *# + *#% ioctl vp U U U + *# + */ +struct vnop_ioctl_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; u_long a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_ioctl_desc; -#define VOP_IOCTL(vp, command, data, fflag, cred, p) _VOP_IOCTL(vp, command, data, fflag, cred, p) -static __inline int _VOP_IOCTL(struct vnode *vp, u_long command, caddr_t data, int fflag, struct ucred *cred, struct proc *p) -{ - struct vop_ioctl_args a; - a.a_desc = VDESC(vop_ioctl); - a.a_vp = vp; - a.a_command = command; - a.a_data = data; - a.a_fflag = fflag; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_ioctl), &a)); -} - -struct vop_select_args { +extern errno_t VNOP_IOCTL(vnode_t, u_long, caddr_t, int, vfs_context_t); + + +/* + *# + *#% select vp U U U + *# + */ +struct vnop_select_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_which; int a_fflags; - struct ucred *a_cred; void *a_wql; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_select_desc; -#define VOP_SELECT(vp, which, fflags, cred, wql, p) _VOP_SELECT(vp, which, fflags, cred, wql, p) -static __inline int _VOP_SELECT(struct vnode *vp, int which, int fflags, struct ucred *cred, void *wql, struct proc *p) -{ - struct vop_select_args a; - a.a_desc = VDESC(vop_select); - a.a_vp = vp; - a.a_which = which; - a.a_fflags = fflags; - a.a_cred = cred; - a.a_wql = wql; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_select), &a)); -} - -struct vop_exchange_args { - struct vnodeop_desc *a_desc; - struct vnode *a_fvp; - struct vnode *a_tvp; - struct ucred *a_cred; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_exchange_desc; -#define VOP_EXCHANGE(fvp, tvp, cred, p) _VOP_EXCHANGE(fvp, tvp, cred, p) -static __inline int _VOP_EXCHANGE(struct vnode *fvp, struct vnode *tvp, struct ucred *cred, struct proc *p) -{ - struct vop_exchange_args a; - a.a_desc = VDESC(vop_exchange); - a.a_fvp = fvp; - a.a_tvp = tvp; - a.a_cred = cred; - a.a_p = p; - return (VCALL(fvp, VOFFSET(vop_exchange), &a)); -} - -struct vop_kqfilt_add_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct knote *a_kn; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_kqfilt_add_desc; -#define VOP_KQFILT_ADD(vp, kn, p) _VOP_KQFILT_ADD(vp, kn, p) -static __inline int _VOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, struct proc *p) -{ - struct vop_kqfilt_add_args a; - a.a_desc = VDESC(vop_kqfilt_add); - a.a_vp = vp; - a.a_kn = kn; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_kqfilt_add), &a)); -} - -struct vop_kqfilt_remove_args { +extern errno_t VNOP_SELECT(vnode_t, int, int, void *, vfs_context_t); + + +/* + *# + *#% exchange fvp L L L + *#% exchange tvp L L L + *# + */ +struct vnop_exchange_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - uintptr_t a_ident; - struct proc *a_p; + vnode_t a_fvp; + vnode_t a_tvp; + int a_options; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_kqfilt_remove_desc; -#define VOP_KQFILT_REMOVE(vp, ident, p) _VOP_KQFILT_REMOVE(vp, ident, p) -static __inline int _VOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, struct proc *p) -{ - struct vop_kqfilt_remove_args a; - a.a_desc = VDESC(vop_kqfilt_remove); - a.a_vp = vp; - a.a_ident = ident; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_kqfilt_remove), &a)); -} - -struct vop_revoke_args { +extern errno_t VNOP_EXCHANGE(vnode_t, vnode_t, int, vfs_context_t); + + +/* + *# + *#% revoke vp U U U + *# + */ +struct vnop_revoke_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_revoke_desc; -#define VOP_REVOKE(vp, flags) _VOP_REVOKE(vp, flags) -static __inline int _VOP_REVOKE(struct vnode *vp, int flags) -{ - struct vop_revoke_args a; - a.a_desc = VDESC(vop_revoke); - a.a_vp = vp; - a.a_flags = flags; - return (VCALL(vp, VOFFSET(vop_revoke), &a)); -} - -struct vop_mmap_args { +extern errno_t VNOP_REVOKE(vnode_t, int, vfs_context_t); + + +/* + *# + *# mmap - vp U U U + *# + */ +struct vnop_mmap_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_fflags; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_mmap_desc; -#define VOP_MMAP(vp, fflags, cred, p) _VOP_MMAP(vp, fflags, cred, p) -static __inline int _VOP_MMAP(struct vnode *vp, int fflags, struct ucred *cred, struct proc *p) -{ - struct vop_mmap_args a; - a.a_desc = VDESC(vop_mmap); - a.a_vp = vp; - a.a_fflags = fflags; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_mmap), &a)); -} - -struct vop_fsync_args { +extern errno_t VNOP_MMAP(vnode_t, int, vfs_context_t); + +/* + *# + *# mnomap - vp U U U + *# + */ +struct vnop_mnomap_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct ucred *a_cred; - int a_waitfor; - struct proc *a_p; + vnode_t a_vp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_fsync_desc; -#define VOP_FSYNC(vp, cred, waitfor, p) _VOP_FSYNC(vp, cred, waitfor, p) -static __inline int _VOP_FSYNC(struct vnode *vp, struct ucred *cred, int waitfor, struct proc *p) -{ - struct vop_fsync_args a; - a.a_desc = VDESC(vop_fsync); - a.a_vp = vp; - a.a_cred = cred; - a.a_waitfor = waitfor; - a.a_p = p; - { - int _err; - extern int ubc_hold(struct vnode *vp); - extern void ubc_rele(struct vnode *vp); - int _didhold = ubc_hold(vp); - _err = VCALL(vp, VOFFSET(vop_fsync), &a); - if (_didhold) - ubc_rele(vp); - return (_err); - } -} - -struct vop_seek_args { +extern errno_t VNOP_MNOMAP(vnode_t, vfs_context_t); + + +/* + *# + *#% fsync vp L L L + *# + */ +struct vnop_fsync_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - off_t a_oldoff; - off_t a_newoff; - struct ucred *a_cred; + vnode_t a_vp; + int a_waitfor; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_seek_desc; -#define VOP_SEEK(vp, oldoff, newoff, cred) _VOP_SEEK(vp, oldoff, newoff, cred) -static __inline int _VOP_SEEK(struct vnode *vp, off_t oldoff, off_t newoff, struct ucred *cred) -{ - struct vop_seek_args a; - a.a_desc = VDESC(vop_seek); - a.a_vp = vp; - a.a_oldoff = oldoff; - a.a_newoff = newoff; - a.a_cred = cred; - return (VCALL(vp, VOFFSET(vop_seek), &a)); -} - -struct vop_remove_args { +extern errno_t VNOP_FSYNC(vnode_t, int, vfs_context_t); + + +/* + *# + *#% remove dvp L U U + *#% remove vp L U U + *# + */ +struct vnop_remove_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode *a_vp; + vnode_t a_dvp; + vnode_t a_vp; struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_remove_desc; -#define VOP_REMOVE(dvp, vp, cnp) _VOP_REMOVE(dvp, vp, cnp) -static __inline int _VOP_REMOVE(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) -{ - struct vop_remove_args a; - a.a_desc = VDESC(vop_remove); - a.a_dvp = dvp; - a.a_vp = vp; - a.a_cnp = cnp; - return (VCALL(dvp, VOFFSET(vop_remove), &a)); -} - -struct vop_link_args { +extern errno_t VNOP_REMOVE(vnode_t, vnode_t, struct componentname *, int, vfs_context_t); + + +/* + *# + *#% link vp U U U + *#% link tdvp L U U + *# + */ +struct vnop_link_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct vnode *a_tdvp; + vnode_t a_vp; + vnode_t a_tdvp; struct componentname *a_cnp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_link_desc; -#define VOP_LINK(vp, tdvp, cnp) _VOP_LINK(vp, tdvp, cnp) -static __inline int _VOP_LINK(struct vnode *vp, struct vnode *tdvp, struct componentname *cnp) -{ - struct vop_link_args a; - a.a_desc = VDESC(vop_link); - a.a_vp = vp; - a.a_tdvp = tdvp; - a.a_cnp = cnp; - return (VCALL(vp, VOFFSET(vop_link), &a)); -} - -struct vop_rename_args { +extern errno_t VNOP_LINK(vnode_t, vnode_t, struct componentname *, vfs_context_t); + + +/* + *# + *#% rename fdvp U U U + *#% rename fvp U U U + *#% rename tdvp L U U + *#% rename tvp X U U + *# + */ +struct vnop_rename_args { struct vnodeop_desc *a_desc; - struct vnode *a_fdvp; - struct vnode *a_fvp; + vnode_t a_fdvp; + vnode_t a_fvp; struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; + vnode_t a_tdvp; + vnode_t a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_rename_desc; -#define VOP_RENAME(fdvp, fvp, fcnp, tdvp, tvp, tcnp) _VOP_RENAME(fdvp, fvp, fcnp, tdvp, tvp, tcnp) -static __inline int _VOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp) -{ - struct vop_rename_args a; - a.a_desc = VDESC(vop_rename); - a.a_fdvp = fdvp; - a.a_fvp = fvp; - a.a_fcnp = fcnp; - a.a_tdvp = tdvp; - a.a_tvp = tvp; - a.a_tcnp = tcnp; - return (VCALL(fdvp, VOFFSET(vop_rename), &a)); -} - -struct vop_mkdir_args { +extern errno_t VNOP_RENAME(vnode_t, vnode_t, struct componentname *, vnode_t, vnode_t, struct componentname *, vfs_context_t); + + +/* + *# + *#% mkdir dvp L U U + *#% mkdir vpp - L - + *# + */ +struct vnop_mkdir_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; + vnode_t a_dvp; + vnode_t *a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; -}; -extern struct vnodeop_desc vop_mkdir_desc; -#define VOP_MKDIR(dvp, vpp, cnp, vap) _VOP_MKDIR(dvp, vpp, cnp, vap) -static __inline int _VOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) -{ - struct vop_mkdir_args a; - a.a_desc = VDESC(vop_mkdir); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - a.a_vap = vap; - return (VCALL(dvp, VOFFSET(vop_mkdir), &a)); -} - -struct vop_rmdir_args { + struct vnode_attr *a_vap; + vfs_context_t a_context; + }; +extern errno_t VNOP_MKDIR(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); + + +/* + *# + *#% rmdir dvp L U U + *#% rmdir vp L U U + *# + */ +struct vnop_rmdir_args { struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode *a_vp; + vnode_t a_dvp; + vnode_t a_vp; struct componentname *a_cnp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_rmdir_desc; -#define VOP_RMDIR(dvp, vp, cnp) _VOP_RMDIR(dvp, vp, cnp) -static __inline int _VOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) -{ - struct vop_rmdir_args a; - a.a_desc = VDESC(vop_rmdir); - a.a_dvp = dvp; - a.a_vp = vp; - a.a_cnp = cnp; - return (VCALL(dvp, VOFFSET(vop_rmdir), &a)); -} - -struct vop_symlink_args { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - char *a_target; +extern errno_t VNOP_RMDIR(vnode_t, vnode_t, struct componentname *, vfs_context_t); + + +/* + *# + *#% symlink dvp L U U + *#% symlink vpp - U - + *# + */ +struct vnop_symlink_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + char *a_target; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_symlink_desc; -#define VOP_SYMLINK(dvp, vpp, cnp, vap, target) _VOP_SYMLINK(dvp, vpp, cnp, vap, target) -static __inline int _VOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap, char *target) -{ - struct vop_symlink_args a; - a.a_desc = VDESC(vop_symlink); - a.a_dvp = dvp; - a.a_vpp = vpp; - a.a_cnp = cnp; - a.a_vap = vap; - a.a_target = target; - return (VCALL(dvp, VOFFSET(vop_symlink), &a)); -} - -struct vop_readdir_args { +extern errno_t VNOP_SYMLINK(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, char *, vfs_context_t); + + +/* + *# + *#% readdir vp L L L + *# + * + * When VNOP_READDIR is called from the NFS Server, the nfs_data + * argument is non-NULL. + * + * The value of nfs_eofflag should be set to TRUE if the end of + * the directory was reached while reading. + * + * The directory seek offset (cookies) are returned to the NFS client and + * may be used later to restart a directory read part way through + * the directory. There is one cookie returned for each directory + * entry returned and its size is determince from nfs_sizeofcookie. + * The value of the cookie should be the logical offset within the + * directory where the on-disc version of the appropriate directory + * entry starts. Memory for the cookies is allocated from M_TEMP + * and it is freed by the caller of VNOP_READDIR. + * + */ + +struct vnop_readdir_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct uio *a_uio; - struct ucred *a_cred; + int a_flags; int *a_eofflag; - int *a_ncookies; - u_long **a_cookies; + int *a_numdirent; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_readdir_desc; -#define VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies) _VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies) -static __inline int _VOP_READDIR(struct vnode *vp, struct uio *uio, struct ucred *cred, int *eofflag, int *ncookies, u_long **cookies) -{ - struct vop_readdir_args a; - a.a_desc = VDESC(vop_readdir); - a.a_vp = vp; - a.a_uio = uio; - a.a_cred = cred; - a.a_eofflag = eofflag; - a.a_ncookies = ncookies; - a.a_cookies = cookies; - return (VCALL(vp, VOFFSET(vop_readdir), &a)); -} - -struct vop_readdirattr_args { +extern errno_t VNOP_READDIR(vnode_t, struct uio *, int, int *, int *, vfs_context_t); + + +/* + *# + *#% readdirattr vp L L L + *# + */ +struct vnop_readdirattr_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; u_long a_maxcount; @@ -754,480 +576,154 @@ struct vop_readdirattr_args { u_long *a_newstate; int *a_eofflag; u_long *a_actualcount; - u_long **a_cookies; - struct ucred *a_cred; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_readdirattr_desc; -#define VOP_READDIRATTR(vp, alist, uio, maxcount, options, newstate, eofflag, actualcount, cookies, cred) _VOP_READDIRATTR(vp, alist, uio, maxcount, options, newstate, eofflag, actualcount, cookies, cred) -static __inline int _VOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_long maxcount, u_long options, u_long *newstate, int *eofflag, u_long *actualcount, u_long **cookies, struct ucred *cred) -{ - struct vop_readdirattr_args a; - a.a_desc = VDESC(vop_readdirattr); - a.a_vp = vp; - a.a_alist = alist; - a.a_uio = uio; - a.a_maxcount = maxcount; - a.a_options = options; - a.a_newstate = newstate; - a.a_eofflag = eofflag; - a.a_actualcount = actualcount; - a.a_cookies = cookies; - a.a_cred = cred; - return (VCALL(vp, VOFFSET(vop_readdirattr), &a)); -} - -struct vop_readlink_args { +extern errno_t VNOP_READDIRATTR(vnode_t, struct attrlist *, struct uio *, u_long, u_long, u_long *, int *, u_long *, vfs_context_t); + + +/* + *# + *#% readlink vp L L L + *# + */ +struct vnop_readlink_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_readlink_desc; -#define VOP_READLINK(vp, uio, cred) _VOP_READLINK(vp, uio, cred) -static __inline int _VOP_READLINK(struct vnode *vp, struct uio *uio, struct ucred *cred) -{ - struct vop_readlink_args a; - a.a_desc = VDESC(vop_readlink); - a.a_vp = vp; - a.a_uio = uio; - a.a_cred = cred; - return (VCALL(vp, VOFFSET(vop_readlink), &a)); -} - -struct vop_abortop_args { - struct vnodeop_desc *a_desc; - struct vnode *a_dvp; - struct componentname *a_cnp; -}; -extern struct vnodeop_desc vop_abortop_desc; -#define VOP_ABORTOP(dvp, cnp) _VOP_ABORTOP(dvp, cnp) -static __inline int _VOP_ABORTOP(struct vnode *dvp, struct componentname *cnp) -{ - struct vop_abortop_args a; - a.a_desc = VDESC(vop_abortop); - a.a_dvp = dvp; - a.a_cnp = cnp; - return (VCALL(dvp, VOFFSET(vop_abortop), &a)); -} - -struct vop_inactive_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_inactive_desc; -#define VOP_INACTIVE(vp, p) _VOP_INACTIVE(vp, p) -static __inline int _VOP_INACTIVE(struct vnode *vp, struct proc *p) -{ - struct vop_inactive_args a; - a.a_desc = VDESC(vop_inactive); - a.a_vp = vp; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_inactive), &a)); -} - -struct vop_reclaim_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_reclaim_desc; -#define VOP_RECLAIM(vp, p) _VOP_RECLAIM(vp, p) -static __inline int _VOP_RECLAIM(struct vnode *vp, struct proc *p) -{ - struct vop_reclaim_args a; - a.a_desc = VDESC(vop_reclaim); - a.a_vp = vp; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_reclaim), &a)); -} - -struct vop_lock_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_lock_desc; -#define VOP_LOCK(vp, flags, p) _VOP_LOCK(vp, flags, p) -static __inline int _VOP_LOCK(struct vnode *vp, int flags, struct proc *p) -{ - struct vop_lock_args a; - a.a_desc = VDESC(vop_lock); - a.a_vp = vp; - a.a_flags = flags; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_lock), &a)); -} - -struct vop_unlock_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_unlock_desc; -#define VOP_UNLOCK(vp, flags, p) _VOP_UNLOCK(vp, flags, p) -static __inline int _VOP_UNLOCK(struct vnode *vp, int flags, struct proc *p) -{ - struct vop_unlock_args a; - a.a_desc = VDESC(vop_unlock); - a.a_vp = vp; - a.a_flags = flags; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_unlock), &a)); -} - -struct vop_bmap_args { +extern errno_t VNOP_READLINK(vnode_t, struct uio *, vfs_context_t); + + +/* + *# + *#% inactive vp L U U + *# + */ +struct vnop_inactive_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - daddr_t a_bn; - struct vnode **a_vpp; - daddr_t *a_bnp; - int *a_runp; + vnode_t a_vp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_bmap_desc; -#define VOP_BMAP(vp, bn, vpp, bnp, runp) _VOP_BMAP(vp, bn, vpp, bnp, runp) -static __inline int _VOP_BMAP(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr_t *bnp, int *runp) -{ - struct vop_bmap_args a; - a.a_desc = VDESC(vop_bmap); - a.a_vp = vp; - a.a_bn = bn; - a.a_vpp = vpp; - a.a_bnp = bnp; - a.a_runp = runp; - return (VCALL(vp, VOFFSET(vop_bmap), &a)); -} - -struct vop_print_args { +extern errno_t VNOP_INACTIVE(vnode_t, vfs_context_t); + + +/* + *# + *#% reclaim vp U U U + *# + */ +struct vnop_reclaim_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_print_desc; -#define VOP_PRINT(vp) _VOP_PRINT(vp) -static __inline int _VOP_PRINT(struct vnode *vp) -{ - struct vop_print_args a; - a.a_desc = VDESC(vop_print); - a.a_vp = vp; - return (VCALL(vp, VOFFSET(vop_print), &a)); -} - -struct vop_islocked_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; -}; -extern struct vnodeop_desc vop_islocked_desc; -#define VOP_ISLOCKED(vp) _VOP_ISLOCKED(vp) -static __inline int _VOP_ISLOCKED(struct vnode *vp) -{ - struct vop_islocked_args a; - a.a_desc = VDESC(vop_islocked); - a.a_vp = vp; - return (VCALL(vp, VOFFSET(vop_islocked), &a)); -} - -struct vop_pathconf_args { +extern errno_t VNOP_RECLAIM(vnode_t, vfs_context_t); + + +/* + *# + *#% pathconf vp L L L + *# + */ +struct vnop_pathconf_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; int a_name; register_t *a_retval; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_pathconf_desc; -#define VOP_PATHCONF(vp, name, retval) _VOP_PATHCONF(vp, name, retval) -static __inline int _VOP_PATHCONF(struct vnode *vp, int name, register_t *retval) -{ - struct vop_pathconf_args a; - a.a_desc = VDESC(vop_pathconf); - a.a_vp = vp; - a.a_name = name; - a.a_retval = retval; - return (VCALL(vp, VOFFSET(vop_pathconf), &a)); -} - -struct vop_advlock_args { +extern errno_t VNOP_PATHCONF(vnode_t, int, register_t *, vfs_context_t); /* register_t??????? */ + + +/* + *# + *#% advlock vp U U U + *# + */ +struct vnop_advlock_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_advlock_desc; -#define VOP_ADVLOCK(vp, id, op, fl, flags) _VOP_ADVLOCK(vp, id, op, fl, flags) -static __inline int _VOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags) -{ - struct vop_advlock_args a; - a.a_desc = VDESC(vop_advlock); - a.a_vp = vp; - a.a_id = id; - a.a_op = op; - a.a_fl = fl; - a.a_flags = flags; - return (VCALL(vp, VOFFSET(vop_advlock), &a)); -} - -struct vop_blkatoff_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - off_t a_offset; - char **a_res; - struct buf **a_bpp; -}; -extern struct vnodeop_desc vop_blkatoff_desc; -#define VOP_BLKATOFF(vp, offset, res, bpp) _VOP_BLKATOFF(vp, offset, res, bpp) -static __inline int _VOP_BLKATOFF(struct vnode *vp, off_t offset, char **res, struct buf **bpp) -{ - struct vop_blkatoff_args a; - a.a_desc = VDESC(vop_blkatoff); - a.a_vp = vp; - a.a_offset = offset; - a.a_res = res; - a.a_bpp = bpp; - return (VCALL(vp, VOFFSET(vop_blkatoff), &a)); -} - -struct vop_valloc_args { - struct vnodeop_desc *a_desc; - struct vnode *a_pvp; - int a_mode; - struct ucred *a_cred; - struct vnode **a_vpp; -}; -extern struct vnodeop_desc vop_valloc_desc; -#define VOP_VALLOC(pvp, mode, cred, vpp) _VOP_VALLOC(pvp, mode, cred, vpp) -static __inline int _VOP_VALLOC(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) -{ - struct vop_valloc_args a; - a.a_desc = VDESC(vop_valloc); - a.a_pvp = pvp; - a.a_mode = mode; - a.a_cred = cred; - a.a_vpp = vpp; - return (VCALL(pvp, VOFFSET(vop_valloc), &a)); -} - -struct vop_reallocblks_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct cluster_save *a_buflist; -}; -extern struct vnodeop_desc vop_reallocblks_desc; -#define VOP_REALLOCBLKS(vp, buflist) _VOP_REALLOCBLKS(vp, buflist) -static __inline int _VOP_REALLOCBLKS(struct vnode *vp, struct cluster_save *buflist) -{ - struct vop_reallocblks_args a; - a.a_desc = VDESC(vop_reallocblks); - a.a_vp = vp; - a.a_buflist = buflist; - return (VCALL(vp, VOFFSET(vop_reallocblks), &a)); -} - -struct vop_vfree_args { - struct vnodeop_desc *a_desc; - struct vnode *a_pvp; - ino_t a_ino; - int a_mode; -}; -extern struct vnodeop_desc vop_vfree_desc; -#define VOP_VFREE(pvp, ino, mode) _VOP_VFREE(pvp, ino, mode) -static __inline int _VOP_VFREE(struct vnode *pvp, ino_t ino, int mode) -{ - struct vop_vfree_args a; - a.a_desc = VDESC(vop_vfree); - a.a_pvp = pvp; - a.a_ino = ino; - a.a_mode = mode; - return (VCALL(pvp, VOFFSET(vop_vfree), &a)); -} - -struct vop_truncate_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - off_t a_length; - int a_flags; - struct ucred *a_cred; - struct proc *a_p; -}; -extern struct vnodeop_desc vop_truncate_desc; -#define VOP_TRUNCATE(vp, length, flags, cred, p) _VOP_TRUNCATE(vp, length, flags, cred, p) -static __inline int _VOP_TRUNCATE(struct vnode *vp, off_t length, int flags, struct ucred *cred, struct proc *p) -{ - struct vop_truncate_args a; - a.a_desc = VDESC(vop_truncate); - a.a_vp = vp; - a.a_length = length; - a.a_flags = flags; - a.a_cred = cred; - a.a_p = p; - { - int _err; - extern int ubc_hold(struct vnode *vp); - extern void ubc_rele(struct vnode *vp); - int _didhold = ubc_hold(vp); - _err = VCALL(vp, VOFFSET(vop_truncate), &a); - if (_didhold) - ubc_rele(vp); - return (_err); - } -} - -struct vop_allocate_args { +extern errno_t VNOP_ADVLOCK(vnode_t, caddr_t, int, struct flock *, int, vfs_context_t); + +/* + *# + *#% allocate vp L L L + *# + */ +struct vnop_allocate_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; off_t a_length; u_int32_t a_flags; off_t *a_bytesallocated; off_t a_offset; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_allocate_desc; -#define VOP_ALLOCATE(vp, length, flags, bytesallocated, offset, cred, p) _VOP_ALLOCATE(vp, length, flags, bytesallocated, offset, cred, p) -static __inline int _VOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesallocated, off_t offset, struct ucred *cred, struct proc *p) -{ - struct vop_allocate_args a; - a.a_desc = VDESC(vop_allocate); - a.a_vp = vp; - a.a_length = length; - a.a_flags = flags; - a.a_bytesallocated = bytesallocated; - a.a_offset = offset; - a.a_cred = cred; - a.a_p = p; - return (VCALL(vp, VOFFSET(vop_allocate), &a)); -} - -struct vop_update_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; -}; -extern struct vnodeop_desc vop_update_desc; -#define VOP_UPDATE(vp, access, modify, waitfor) _VOP_UPDATE(vp, access, modify, waitfor) -static __inline int _VOP_UPDATE(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor) -{ - struct vop_update_args a; - a.a_desc = VDESC(vop_update); - a.a_vp = vp; - a.a_access = access; - a.a_modify = modify; - a.a_waitfor = waitfor; - return (VCALL(vp, VOFFSET(vop_update), &a)); -} - -struct vop_pgrd_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; -}; -extern struct vnodeop_desc vop_pgrd_desc; -#define VOP_PGRD(vp, uio, cred) _VOP_PGRD(vp, uio, cred) -static __inline int _VOP_PGRD(struct vnode *vp, struct uio *uio, struct ucred *cred) -{ - struct vop_pgrd_args a; - a.a_desc = VDESC(vop_pgrd); - a.a_vp = vp; - a.a_uio = uio; - a.a_cred = cred; - return (VCALL(vp, VOFFSET(vop_pgrd), &a)); -} - -struct vop_pgwr_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - vm_offset_t a_offset; -}; -extern struct vnodeop_desc vop_pgwr_desc; -#define VOP_PGWR(vp, uio, cred, offset) _VOP_PGWR(vp, uio, cred, offset) -static __inline int _VOP_PGWR(struct vnode *vp, struct uio *uio, struct ucred *cred, vm_offset_t offset) -{ - struct vop_pgwr_args a; - a.a_desc = VDESC(vop_pgwr); - a.a_vp = vp; - a.a_uio = uio; - a.a_cred = cred; - a.a_offset = offset; - return (VCALL(vp, VOFFSET(vop_pgwr), &a)); -} - -struct vop_pagein_args { +extern errno_t VNOP_ALLOCATE(vnode_t, off_t, u_int32_t, off_t *, off_t, vfs_context_t); + +/* + *# + *#% pagein vp = = = + *# + */ +struct vnop_pagein_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; upl_t a_pl; vm_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; - struct ucred *a_cred; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_pagein_desc; -#define VOP_PAGEIN(vp, pl, pl_offset, f_offset, size, cred, flags) _VOP_PAGEIN(vp, pl, pl_offset, f_offset, size, cred, flags) -static __inline int _VOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, struct ucred *cred, int flags) -{ - struct vop_pagein_args a; - a.a_desc = VDESC(vop_pagein); - a.a_vp = vp; - a.a_pl = pl; - a.a_pl_offset = pl_offset; - a.a_f_offset = f_offset; - a.a_size = size; - a.a_cred = cred; - a.a_flags = flags; - return (VCALL(vp, VOFFSET(vop_pagein), &a)); -} - -struct vop_pageout_args { +extern errno_t VNOP_PAGEIN(vnode_t, upl_t, vm_offset_t, off_t, size_t, int, vfs_context_t); /* vm_offset_t ? */ + + +/* + *# + *#% pageout vp = = = + *# + */ +struct vnop_pageout_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; upl_t a_pl; vm_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; - struct ucred *a_cred; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_pageout_desc; -#define VOP_PAGEOUT(vp, pl, pl_offset, f_offset, size, cred, flags) _VOP_PAGEOUT(vp, pl, pl_offset, f_offset, size, cred, flags) -static __inline int _VOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, struct ucred *cred, int flags) -{ - struct vop_pageout_args a; - a.a_desc = VDESC(vop_pageout); - a.a_vp = vp; - a.a_pl = pl; - a.a_pl_offset = pl_offset; - a.a_f_offset = f_offset; - a.a_size = size; - a.a_cred = cred; - a.a_flags = flags; - return (VCALL(vp, VOFFSET(vop_pageout), &a)); -} - -struct vop_devblocksize_args { +extern errno_t VNOP_PAGEOUT(vnode_t, upl_t, vm_offset_t, off_t, size_t, int, vfs_context_t); + + +#ifdef BSD_KERNEL_PRIVATE +/* + *#% devblocksize vp = = = + *# + */ +struct vnop_devblocksize_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; register_t *a_retval; }; -extern struct vnodeop_desc vop_devblocksize_desc; -#define VOP_DEVBLOCKSIZE(vp, retval) _VOP_DEVBLOCKSIZE(vp, retval) -static __inline int _VOP_DEVBLOCKSIZE(struct vnode *vp, register_t *retval) -{ - struct vop_devblocksize_args a; - a.a_desc = VDESC(vop_devblocksize); - a.a_vp = vp; - a.a_retval = retval; - return (VCALL(vp, VOFFSET(vop_devblocksize), &a)); -} - -struct vop_searchfs_args { +#endif /* BSD_KERNEL_PRIVATE */ + +/* + *# + *#% searchfs vp L L L + *# + */ +struct vnop_searchfs_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; void *a_searchparams1; void *a_searchparams2; struct attrlist *a_searchattrs; @@ -1239,145 +735,156 @@ struct vop_searchfs_args { u_long a_options; struct uio *a_uio; struct searchstate *a_searchstate; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_searchfs_desc; -#define VOP_SEARCHFS(vp, searchparams1, searchparams2, searchattrs, maxmatches, timelimit, returnattrs, nummatches, scriptcode, options, uio, searchstate) _VOP_SEARCHFS(vp, searchparams1, searchparams2, searchattrs, maxmatches, timelimit, returnattrs, nummatches, scriptcode, options, uio, searchstate) -static __inline int _VOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, u_long maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, u_long *nummatches, u_long scriptcode, u_long options, struct uio *uio, struct searchstate *searchstate) -{ - struct vop_searchfs_args a; - a.a_desc = VDESC(vop_searchfs); - a.a_vp = vp; - a.a_searchparams1 = searchparams1; - a.a_searchparams2 = searchparams2; - a.a_searchattrs = searchattrs; - a.a_maxmatches = maxmatches; - a.a_timelimit = timelimit; - a.a_returnattrs = returnattrs; - a.a_nummatches = nummatches; - a.a_scriptcode = scriptcode; - a.a_options = options; - a.a_uio = uio; - a.a_searchstate = searchstate; - return (VCALL(vp, VOFFSET(vop_searchfs), &a)); -} - -struct vop_copyfile_args { +extern errno_t VNOP_SEARCHFS(vnode_t, void *, void *, struct attrlist *, u_long, struct timeval *, struct attrlist *, u_long *, u_long, u_long, struct uio *, struct searchstate *, vfs_context_t); + + +/* + *# + *#% copyfile fvp U U U + *#% copyfile tdvp L U U + *#% copyfile tvp X U U + *# + */ +struct vnop_copyfile_args { struct vnodeop_desc *a_desc; - struct vnode *a_fvp; - struct vnode *a_tdvp; - struct vnode *a_tvp; + vnode_t a_fvp; + vnode_t a_tdvp; + vnode_t a_tvp; struct componentname *a_tcnp; int a_mode; int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_copyfile_desc; -#define VOP_COPYFILE(fvp, tdvp, tvp, tcnp, mode, flags) _VOP_COPYFILE(fvp, tdvp, tvp, tcnp, mode, flags) -static __inline int _VOP_COPYFILE(struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, int mode, int flags) -{ - struct vop_copyfile_args a; - a.a_desc = VDESC(vop_copyfile); - a.a_fvp = fvp; - a.a_tdvp = tdvp; - a.a_tvp = tvp; - a.a_tcnp = tcnp; - a.a_mode = mode; - a.a_flags = flags; - return (VCALL(fvp, VOFFSET(vop_copyfile), &a)); -} - -struct vop_blktooff_args { +extern errno_t VNOP_COPYFILE(vnode_t, vnode_t, vnode_t, struct componentname *, int, int, vfs_context_t); + + +struct vnop_getxattr_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; - daddr_t a_lblkno; + vnode_t a_vp; + char * a_name; + uio_t a_uio; + size_t *a_size; + int a_options; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_getxattr_desc; +extern errno_t VNOP_GETXATTR(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t); + +struct vnop_setxattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + char * a_name; + uio_t a_uio; + int a_options; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_setxattr_desc; +extern errno_t VNOP_SETXATTR(vnode_t, const char *, uio_t, int, vfs_context_t); + +struct vnop_removexattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + char * a_name; + int a_options; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_removexattr_desc; +extern errno_t VNOP_REMOVEXATTR(vnode_t, const char *, int, vfs_context_t); + +struct vnop_listxattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + uio_t a_uio; + size_t *a_size; + int a_options; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_listxattr_desc; +extern errno_t VNOP_LISTXATTR(vnode_t, uio_t, size_t *, int, vfs_context_t); + + +/* + *# + *#% blktooff vp = = = + *# + */ +struct vnop_blktooff_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + daddr64_t a_lblkno; off_t *a_offset; }; -extern struct vnodeop_desc vop_blktooff_desc; -#define VOP_BLKTOOFF(vp, lblkno, offset) _VOP_BLKTOOFF(vp, lblkno, offset) -static __inline int _VOP_BLKTOOFF(struct vnode *vp, daddr_t lblkno, off_t *offset) -{ - struct vop_blktooff_args a; - a.a_desc = VDESC(vop_blktooff); - a.a_vp = vp; - a.a_lblkno = lblkno; - a.a_offset = offset; - return (VCALL(vp, VOFFSET(vop_blktooff), &a)); -} - -struct vop_offtoblk_args { +extern errno_t VNOP_BLKTOOFF(vnode_t, daddr64_t, off_t *); + + +/* + *# + *#% offtoblk vp = = = + *# + */ +struct vnop_offtoblk_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; }; -extern struct vnodeop_desc vop_offtoblk_desc; -#define VOP_OFFTOBLK(vp, offset, lblkno) _VOP_OFFTOBLK(vp, offset, lblkno) -static __inline int _VOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr_t *lblkno) -{ - struct vop_offtoblk_args a; - a.a_desc = VDESC(vop_offtoblk); - a.a_vp = vp; - a.a_offset = offset; - a.a_lblkno = lblkno; - return (VCALL(vp, VOFFSET(vop_offtoblk), &a)); -} - -struct vop_cmap_args { +extern errno_t VNOP_OFFTOBLK(vnode_t, off_t, daddr64_t *); + + +/* + *# + *#% blockmap vp L L L + *# + */ +struct vnop_blockmap_args { struct vnodeop_desc *a_desc; - struct vnode *a_vp; + vnode_t a_vp; off_t a_foffset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; + int a_flags; + vfs_context_t a_context; }; -extern struct vnodeop_desc vop_cmap_desc; -#define VOP_CMAP(vp, foffset, size, bpn, run, poff) _VOP_CMAP(vp, foffset, size, bpn, run, poff) -static __inline int _VOP_CMAP(struct vnode *vp, off_t foffset, size_t size, daddr_t *bpn, size_t *run, void *poff) -{ - struct vop_cmap_args a; - a.a_desc = VDESC(vop_cmap); - a.a_vp = vp; - a.a_foffset = foffset; - a.a_size = size; - a.a_bpn = bpn; - a.a_run = run; - a.a_poff = poff; - return (VCALL(vp, VOFFSET(vop_cmap), &a)); -} - -/* Special cases: */ -#include -#include +extern errno_t VNOP_BLOCKMAP(vnode_t, off_t, size_t, daddr64_t *, size_t *, void *, + int, vfs_context_t); -struct vop_strategy_args { +struct vnop_strategy_args { struct vnodeop_desc *a_desc; struct buf *a_bp; }; -extern struct vnodeop_desc vop_strategy_desc; -#define VOP_STRATEGY(bp) _VOP_STRATEGY(bp) -static __inline int _VOP_STRATEGY(struct buf *bp) -{ - struct vop_strategy_args a; - a.a_desc = VDESC(vop_strategy); - a.a_bp = bp; - return (VCALL(bp->b_vp, VOFFSET(vop_strategy), &a)); -} - -struct vop_bwrite_args { +extern errno_t VNOP_STRATEGY(struct buf *bp); + +struct vnop_bwrite_args { struct vnodeop_desc *a_desc; - struct buf *a_bp; + buf_t a_bp; }; -extern struct vnodeop_desc vop_bwrite_desc; -#define VOP_BWRITE(bp) _VOP_BWRITE(bp) -static __inline int _VOP_BWRITE(struct buf *bp) -{ - struct vop_bwrite_args a; - a.a_desc = VDESC(vop_bwrite); - a.a_bp = bp; - return (VCALL(bp->b_vp, VOFFSET(vop_bwrite), &a)); -} - -/* End of special cases. */ - -#endif /* __APPLE_API_UNSTABLE */ +extern errno_t VNOP_BWRITE(buf_t); + + +struct vnop_kqfilt_add_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct knote *a_kn; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_kqfilt_add_desc; +extern errno_t VNOP_KQFILT_ADD(vnode_t , struct knote *, vfs_context_t); + +struct vnop_kqfilt_remove_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + uintptr_t a_ident; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_kqfilt_remove_desc; +errno_t VNOP_KQFILT_REMOVE(vnode_t , uintptr_t , vfs_context_t); + +__END_DECLS + +#endif /* KERNEL */ + #endif /* !_SYS_VNODE_IF_H_ */ diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h new file mode 100644 index 000000000..df02742df --- /dev/null +++ b/bsd/sys/vnode_internal.h @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)vnode.h 8.17 (Berkeley) 5/20/95 + */ + +#ifndef _SYS_VNODE_INTERNAL_H_ +#define _SYS_VNODE_INTERNAL_H_ + +#define INTERIM_FSNODE_LOCK 1 + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +struct lockf; + +LIST_HEAD(buflists, buf); + + +struct unsafe_fsnode { + lck_mtx_t fsnodelock; + int32_t fsnode_count; + void * fsnodeowner; +}; + +/* + * Reading or writing any of these items requires holding the appropriate lock. + * v_freelist is locked by the global vnode_list_lock + * v_mntvnodes is locked by the mount_lock + * v_nclinks and v_ncchildren are protected by the global name_cache_lock + * v_cleanblkhd and v_dirtyblkhd and v_iterblkflags are locked via the global buf_mtxp + * the rest of the structure is protected by the vnode_lock + */ +struct vnode { + lck_mtx_t v_lock; /* vnode mutex */ + TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ + TAILQ_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ + LIST_HEAD(, namecache) v_nclinks; /* name cache entries that name this vnode */ + LIST_HEAD(, namecache) v_ncchildren; /* name cache entries that regard us as there parent */ + vnode_t v_defer_reclaimlist; /* in case we have to defer the reclaim to avoid recursion */ + u_long v_flag; /* vnode flags (see below) */ + u_short v_lflag; /* vnode local and named ref flags */ + u_char v_iterblkflags; /* buf iterator flags */ + u_char v_references; /* number of times io_count has been granted */ + int32_t v_kusecount; /* count of in-kernel refs */ + int32_t v_usecount; /* reference count of users */ + int32_t v_iocount; /* iocounters */ + void * v_owner; /* act that owns the vnode */ + enum vtype v_type; /* vnode type */ + u_long v_id; /* identity of vnode contents */ + union { + struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ + struct socket *vu_socket; /* unix ipc (VSOCK) */ + struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */ + struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ + struct ubc_info *vu_ubcinfo; /* valid for (VREG) */ + } v_un; + struct buflists v_cleanblkhd; /* clean blocklist head */ + struct buflists v_dirtyblkhd; /* dirty blocklist head */ + kauth_cred_t v_cred; + int v_cred_timestamp; + long v_numoutput; /* num of writes in progress */ + long v_writecount; /* reference count of writers */ + char * v_name; /* name component of the vnode */ + vnode_t v_parent; /* pointer to parent vnode */ +#ifdef INTERIM_FSNODE_LOCK + struct lockf *v_lockf; /* advisory lock list head */ + struct unsafe_fsnode *v_unsafefs; /* pointer to struct used to lock */ +#endif /* vnodes on unsafe filesystems */ + int (**v_op)(void *); /* vnode operations vector */ + enum vtagtype v_tag; /* type of underlying data */ + mount_t v_mount; /* ptr to vfs we are in */ + void * v_data; /* private data for fs */ +}; + +#define v_mountedhere v_un.vu_mountedhere +#define v_socket v_un.vu_socket +#define v_specinfo v_un.vu_specinfo +#define v_fifoinfo v_un.vu_fifoinfo +#define v_ubcinfo v_un.vu_ubcinfo + + +/* + * v_iterblkflags + */ +#define VBI_ITER 0x1 +#define VBI_ITERWANT 0x2 +#define VBI_CLEAN 0x4 +#define VBI_DIRTY 0x8 +#define VBI_NEWBUF 0x10 + + +/* + * v_lflags + */ +#define VL_SUSPENDED 0x0001 /* vnode is suspended */ +#define VL_DRAIN 0x0002 /* vnode is being drained */ +#define VL_TERMINATE 0x0004 /* vnode is marked for termination */ +#define VL_TERMWANT 0x0008 /* vnode is marked for termination */ +#define VL_DEAD 0x0010 /* vnode is dead and completed recycle */ +#define VL_MARKTERM 0x0020 /* vnode is dead and completed recycle */ +#define VL_MOUNTDEAD 0x0040 /* v_moutnedhere is dead */ +#define VL_NEEDINACTIVE 0x0080 /* delay VNOP_INACTIVE until iocount goes to 0 */ + +#define VNAMED_UBC 0x2000 /* ubc named reference */ +#define VNAMED_MOUNT 0x4000 /* mount point named reference */ +#define VNAMED_FSHASH 0x8000 /* FS hash named reference */ + + +/* + * v_flags + */ +#define VROOT 0x000001 /* root of its file system */ +#define VTEXT 0x000002 /* vnode is a pure text prototype */ +#define VSYSTEM 0x000004 /* vnode being used by kernel */ +#define VISTTY 0x000008 /* vnode represents a tty */ +#define VWASMAPPED 0x000010 /* vnode was mapped before */ +#define VTERMINATE 0x000020 /* terminating memory object */ +#define VTERMWANT 0x000040 /* wating for memory object death */ +#define VMOUNT 0x000080 /* mount operation in progress */ +#define VBWAIT 0x000100 /* waiting for output to complete */ +#define VALIASED 0x000200 /* vnode has an alias */ +#define VNOCACHE_DATA 0x000400 /* don't keep data cached once it's been consumed */ +#define VSTANDARD 0x000800 /* vnode obtained from common pool */ +#define VAGE 0x001000 /* Insert vnode at head of free list */ +#define VRAOFF 0x002000 /* read ahead disabled */ +#define VNCACHEABLE 0x004000 /* vnode is allowed to be put back in name cache */ +#define VUINACTIVE 0x008000 /* UBC vnode is on inactive list */ +#define VSWAP 0x010000 /* vnode is being used as swapfile */ +#define VTHROTTLED 0x020000 /* writes or pageouts have been throttled */ + /* wakeup tasks waiting when count falls below threshold */ +#define VNOFLUSH 0x040000 /* don't vflush() if SKIPSYSTEM */ +#define VLOCKLOCAL 0x080000 /* this vnode does adv locking in vfs */ +#define VISHARDLINK 0x100000 /* hard link needs special processing on lookup and in volfs */ + +#define VCRED_EXPIRED 2 /* number of seconds to keep cached credential valid */ + + +/* + * Global vnode data. + */ +extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ + + +/* + * Mods for exensibility. + */ + +/* + * Flags for vdesc_flags: + */ +#define VDESC_MAX_VPS 16 +/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ +#define VDESC_VP0_WILLRELE 0x0001 +#define VDESC_VP1_WILLRELE 0x0002 +#define VDESC_VP2_WILLRELE 0x0004 +#define VDESC_VP3_WILLRELE 0x0008 +#define VDESC_NOMAP_VPP 0x0100 +#define VDESC_VPP_WILLRELE 0x0200 + +/* + * VDESC_NO_OFFSET is used to identify the end of the offset list + * and in places where no such field exists. + */ +#define VDESC_NO_OFFSET -1 + +/* + * This structure describes the vnode operation taking place. + */ +struct vnodeop_desc { + int vdesc_offset; /* offset in vector--first for speed */ + char *vdesc_name; /* a readable name for debugging */ + int vdesc_flags; /* VDESC_* flags */ + + /* + * These ops are used by bypass routines to map and locate arguments. + * Creds and procs are not needed in bypass routines, but sometimes + * they are useful to (for example) transport layers. + * Nameidata is useful because it has a cred in it. + */ + int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ + int vdesc_vpp_offset; /* return vpp location */ + int vdesc_cred_offset; /* cred location, if any */ + int vdesc_proc_offset; /* proc location, if any */ + int vdesc_componentname_offset; /* if any */ + int vdesc_context_offset; /* context location, if any */ + /* + * Finally, we've got a list of private data (about each operation) + * for each transport layer. (Support to manage this list is not + * yet part of BSD.) + */ + caddr_t *vdesc_transports; +}; + +/* + * A list of all the operation descs. + */ +extern struct vnodeop_desc *vnodeop_descs[]; + +/* + * Interlock for scanning list of vnodes attached to a mountpoint + */ +extern void * mntvnode_slock; + +/* + * This macro is very helpful in defining those offsets in the vdesc struct. + * + * This is stolen from X11R4. I ingored all the fancy stuff for + * Crays, so if you decide to port this to such a serious machine, + * you might want to consult Intrisics.h's XtOffset{,Of,To}. + */ +#define VOPARG_OFFSET(p_type,field) \ + ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL))) +#define VOPARG_OFFSETOF(s_type,field) \ + VOPARG_OFFSET(s_type*,field) +#define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \ + ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET))) + + + +/* + * VOCALL calls an op given an ops vector. We break it out because BSD's + * vclean changes the ops vector and then wants to call ops with the old + * vector. + */ +#define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP)) + +/* + * This call works for vnodes in the kernel. + */ +#define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP)) +#define VDESC(OP) (& __CONCAT(OP,_desc)) +#define VOFFSET(OP) (VDESC(OP)->vdesc_offset) + + + +int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen); +int bdevvp(dev_t dev, struct vnode **vpp); +void cvtstat(struct stat *st, struct ostat *ost); +void vprint(const char *label, struct vnode *vp); + + +__private_extern__ int is_package_name(char *name, int len); +__private_extern__ int set_package_extensions_table(void *data, int nentries, int maxwidth); +int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, + int len, off_t offset, enum uio_seg segflg, int ioflg, + struct ucred *cred, int *aresid, struct proc *p); +int vn_rdwr_64(enum uio_rw rw, struct vnode *vp, uint64_t base, + int64_t len, off_t offset, enum uio_seg segflg, + int ioflg, struct ucred *cred, int *aresid, + struct proc *p); +void fifo_printinfo(struct vnode *vp); +int vn_lock(struct vnode *vp, int flags, struct proc *p); +int vn_open(struct nameidata *ndp, int fmode, int cmode); +int vn_open_modflags(struct nameidata *ndp, int *fmode, int cmode); +int vn_open_auth(struct nameidata *ndp, int *fmode, struct vnode_attr *); +int vn_close(vnode_t, int flags, struct ucred *cred, struct proc *p); + +#define VN_CREATE_NOAUTH (1<<0) +#define VN_CREATE_NOINHERIT (1<<1) +errno_t vn_create(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, int flags, vfs_context_t); + + +int vn_getxattr(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t); +int vn_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t); +int vn_removexattr(vnode_t, const char *, int, vfs_context_t); +int vn_listxattr(vnode_t, uio_t, size_t *, int, vfs_context_t); + +void name_cache_lock(void); +void name_cache_unlock(void); + +char * vnode_getname(vnode_t vp); +void vnode_putname(char *name); + +vnode_t vnode_getparent(vnode_t vp); + +int vn_pathconf(vnode_t, int, register_t *, vfs_context_t); + +void vnode_list_lock(void); +void vnode_list_unlock(void); +int vnode_ref_ext(vnode_t, int); +void vnode_rele_ext(vnode_t, int, int); +void vnode_rele_internal(vnode_t, int, int, int); +int vnode_getwithref(vnode_t); +int vnode_put_locked(vnode_t); + +int vnode_issock(vnode_t); + +void unlock_fsnode(vnode_t, int *); +int lock_fsnode(vnode_t, int *); + +errno_t vnode_resume(vnode_t); + +errno_t vnode_size(vnode_t, off_t *, vfs_context_t); +errno_t vnode_setsize(vnode_t, off_t, int ioflag, vfs_context_t); +int vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); + +void SPECHASH_LOCK(void); +void SPECHASH_UNLOCK(void); + +int check_cdevmounted(dev_t, enum vtype, int *); + +void vnode_authorize_init(void); + +#endif /* !_SYS_VNODE_INTERNAL_H_ */ diff --git a/bsd/sys/vstat.h b/bsd/sys/vstat.h index 83b87ed23..4a8817c8d 100644 --- a/bsd/sys/vstat.h +++ b/bsd/sys/vstat.h @@ -36,7 +36,7 @@ #include #include -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE struct vstat { fsid_t vst_volid; /* volume identifier */ @@ -49,7 +49,7 @@ struct vstat { gid_t vst_gid; /* group ID of the file's group */ dev_t vst_dev; /* inode's device */ dev_t vst_rdev; /* device type */ -#ifndef _POSIX_SOURCE +#ifndef _POSIX_C_SOURCE struct timespec vst_atimespec; /* time of last access */ struct timespec vst_mtimespec; /* time of last data modification */ struct timespec vst_ctimespec; /* time of last file status change */ @@ -67,7 +67,7 @@ struct vstat { u_int32_t vst_flags; /* user defined flags for file */ }; -#endif /* ! _POSIX_SOURCE */ +#endif /* ! _POSIX_C_SOURCE */ #endif /* __APPLE_API_OBSOLETE */ #endif /* !_SYS_VSTAT_H_ */ diff --git a/bsd/sys/wait.h b/bsd/sys/wait.h index 76bf41a3b..34aba1bb8 100644 --- a/bsd/sys/wait.h +++ b/bsd/sys/wait.h @@ -58,51 +58,119 @@ #ifndef _SYS_WAIT_H_ #define _SYS_WAIT_H_ +#include +#include + /* * This file holds definitions relevent to the wait4 system call * and the alternate interfaces that use it (wait, wait3, waitpid). */ +/* + * [XSI] The type idtype_t shall be defined as an enumeration type whose + * possible values shall include at least P_ALL, P_PID, and P_PGID. + */ +typedef enum { + P_ALL, + P_PID, + P_PGID +} idtype_t; + +/* + * [XSI] The id_t and pid_t types shall be defined as described + * in + */ +#ifndef _PID_T +typedef __darwin_pid_t pid_t; +#define _PID_T +#endif + +#ifndef _ID_T +typedef __darwin_id_t id_t; +#define _ID_T +#endif + +/* + * [XSI] The siginfo_t type shall be defined as described in + * [XSI] The rusage structure shall be defined as described in + * [XSI] Inclusion of the header may also make visible all + * symbols from and + * + * NOTE: This requirement is currently being satisfied by the direct + * inclusion of and , below. + * + * Software should not depend on the exposure of anything other + * than the types siginfo_t and struct rusage as a result of + * this inclusion. If you depend on any types or manifest + * values othe than siginfo_t and struct rusage from either of + * those files, you should explicitly include them yourself, as + * well, or in future releases your stware may not compile + * without modification. + */ +#include /* [XSI] for siginfo_t */ +#include /* [XSI] for struct rusage */ + +/* + * Option bits for the third argument of wait4. WNOHANG causes the + * wait to not hang if there are no stopped or terminated processes, rather + * returning an error indication in this case (pid==0). WUNTRACED + * indicates that the caller should receive status about untraced children + * which stop due to signals. If children are stopped and a wait without + * this option is done, it is as though they were still running... nothing + * about them is returned. + */ +#define WNOHANG 0x01 /* [XSI] don't hang in wait/no child to reap */ +#define WUNTRACED 0x02 /* [XSI] notify on stopped, untraced children */ + /* * Macros to test the exit status returned by wait * and extract the relevant values. */ -#ifdef _POSIX_SOURCE +#ifdef _POSIX_C_SOURCE #define _W_INT(i) (i) #else #define _W_INT(w) (*(int *)&(w)) /* convert union wait to int */ #define WCOREFLAG 0200 +#endif /* _POSIX_C_SOURCE */ -#endif /* _POSIX_SOURCE */ - +/* These macros are permited, as they are in the implementation namespace */ #define _WSTATUS(x) (_W_INT(x) & 0177) #define _WSTOPPED 0177 /* _WSTATUS if process is stopped */ + +/* + * [XSI] The header shall define the following macros for + * analysis of process status values + */ +#define WEXITSTATUS(x) (_W_INT(x) >> 8) +#define WIFCONTINUED(x) (x == 0x13) /* 0x13 == SIGCONT */ +#define WIFEXITED(x) (_WSTATUS(x) == 0) +#define WIFSIGNALED(x) (_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0) #define WIFSTOPPED(x) (_WSTATUS(x) == _WSTOPPED) #define WSTOPSIG(x) (_W_INT(x) >> 8) -#define WIFSIGNALED(x) (_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0) #define WTERMSIG(x) (_WSTATUS(x)) -#define WIFEXITED(x) (_WSTATUS(x) == 0) -#define WEXITSTATUS(x) (_W_INT(x) >> 8) -#if !defined(_POSIX_SOURCE) +#if !defined(_POSIX_C_SOURCE) #define WCOREDUMP(x) (_W_INT(x) & WCOREFLAG) #define W_EXITCODE(ret, sig) ((ret) << 8 | (sig)) #define W_STOPCODE(sig) ((sig) << 8 | _WSTOPPED) -#endif /* !defined(_POSIX_SOURCE) */ +#endif /* !defined(_POSIX_C_SOURCE) */ /* - * Option bits for the third argument of wait4. WNOHANG causes the - * wait to not hang if there are no stopped or terminated processes, rather - * returning an error indication in this case (pid==0). WUNTRACED - * indicates that the caller should receive status about untraced children - * which stop due to signals. If children are stopped and a wait without - * this option is done, it is as though they were still running... nothing - * about them is returned. + * [XSI] The following symbolic constants shall be defined as possible + * values for the fourth argument to waitid(). */ -#define WNOHANG 1 /* don't hang in wait */ -#define WUNTRACED 2 /* tell about stopped, untraced children */ +/* WNOHANG already defined for wait4() */ +/* WUNTRACED defined for wait4() but not for waitid() */ +#define WEXITED 0x04 /* [XSI] Processes which have exitted */ +#ifdef _POSIX_C_SOURCE +/* waitid() parameter */ +#define WSTOPPED 0x08 /* [XSI] Any child stopped by signal receipt */ +#endif +#define WCONTINUED 0x10 /* [XSI] Any child stopped then continued */ +#define WNOWAIT 0x20 /* [XSI] Leave process returned waitable */ + -#if !defined(_POSIX_SOURCE) +#if !defined(_POSIX_C_SOURCE) /* POSIX extensions and 4.2/4.3 compatability: */ /* @@ -125,13 +193,13 @@ union wait { * Terminated process status. */ struct { -#if BYTE_ORDER == LITTLE_ENDIAN +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN unsigned int w_Termsig:7, /* termination signal */ w_Coredump:1, /* core dump indicator */ w_Retcode:8, /* exit code if w_termsig==0 */ w_Filler:16; /* upper bits filler */ #endif -#if BYTE_ORDER == BIG_ENDIAN +#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN unsigned int w_Filler:16, /* upper bits filler */ w_Retcode:8, /* exit code if w_termsig==0 */ w_Coredump:1, /* core dump indicator */ @@ -144,12 +212,12 @@ union wait { * with the WUNTRACED option bit. */ struct { -#if BYTE_ORDER == LITTLE_ENDIAN +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN unsigned int w_Stopval:8, /* == W_STOPPED if stopped */ w_Stopsig:8, /* signal that stopped us */ w_Filler:16; /* upper bits filler */ #endif -#if BYTE_ORDER == BIG_ENDIAN +#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN unsigned int w_Filler:16, /* upper bits filler */ w_Stopsig:8, /* signal that stopped us */ w_Stopval:8; /* == W_STOPPED if stopped */ @@ -162,22 +230,24 @@ union wait { #define w_stopval w_S.w_Stopval #define w_stopsig w_S.w_Stopsig +/* + * Stopped state value; cannot use waitid() parameter of the same name + * in the same scope + */ #define WSTOPPED _WSTOPPED -#endif /* !defined(_POSIX_SOURCE) */ +#endif /* !defined(_POSIX_C_SOURCE) */ #ifndef KERNEL -#include -#include - __BEGIN_DECLS -struct rusage; /* forward declaration */ - -pid_t wait __P((int *)); -pid_t waitpid __P((pid_t, int *, int)); -#if !defined(_POSIX_SOURCE) -pid_t wait3 __P((int *, int, struct rusage *)); -pid_t wait4 __P((pid_t, int *, int, struct rusage *)); -#endif /* !defined(_POSIX_SOURCE) */ +pid_t wait(int *); +pid_t waitpid(pid_t, int *, int); +#ifndef _ANSI_SOURCE +int waitid(idtype_t, id_t, siginfo_t *, int); +#endif /* !_ANSI_SOURCE */ +#if !defined(_POSIX_C_SOURCE) +pid_t wait3(int *, int, struct rusage *); +pid_t wait4(pid_t, int *, int, struct rusage *); +#endif /* !defined(_POSIX_C_SOURCE) */ __END_DECLS #endif #endif /* !_SYS_WAIT_H_ */ diff --git a/bsd/sys/xattr.h b/bsd/sys/xattr.h new file mode 100644 index 000000000..6628bbeee --- /dev/null +++ b/bsd/sys/xattr.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_XATTR_H_ +#define _SYS_XATTR_H_ + +#include + +/* Options for pathname based xattr calls */ +#define XATTR_NOFOLLOW 0x0001 /* Don't follow symbolic links */ + +/* Options for setxattr calls */ +#define XATTR_CREATE 0x0002 /* set the value, fail if attr already exists */ +#define XATTR_REPLACE 0x0004 /* set the value, fail if attr does not exist */ + +/* Set this to bypass authorization checking (eg. if doing auth-related work) */ +#define XATTR_NOSECURITY 0x0008 + +#define XATTR_MAXNAMELEN 127 + +#define XATTR_FINDERINFO_NAME "com.apple.FinderInfo" + +#define XATTR_RESOURCEFORK_NAME "com.apple.ResourceFork" + + +#ifdef KERNEL +__BEGIN_DECLS +int xattr_protected(const char *); +int xattr_validatename(const char *); +__END_DECLS +#endif /* KERNEL */ + +#ifndef KERNEL +__BEGIN_DECLS + +ssize_t getxattr(const char *path, const char *name, void *value, size_t size, u_int32_t position, int options); + +ssize_t fgetxattr(int fd, const char *name, void *value, size_t size, u_int32_t position, int options); + +int setxattr(const char *path, const char *name, const void *value, size_t size, u_int32_t position, int options); + +int fsetxattr(int fd, const char *name, const void *value, size_t size, u_int32_t position, int options); + +int removexattr(const char *path, const char *name, int options); + +int fremovexattr(int fd, const char *name, int options); + +ssize_t listxattr(const char *path, char *namebuff, size_t size, int options); + +ssize_t flistxattr(int fd, char *namebuff, size_t size, int options); + +__END_DECLS +#endif /* KERNEL */ + +#endif /* _SYS_XATTR_H_ */ diff --git a/bsd/ufs/ffs/ffs_alloc.c b/bsd/ufs/ffs/ffs_alloc.c index 624d1bdd5..275808fd4 100644 --- a/bsd/ufs/ffs/ffs_alloc.c +++ b/bsd/ufs/ffs/ffs_alloc.c @@ -59,10 +59,11 @@ #include #include -#include +#include #include -#include -#include +#include +#include +#include #include #include #include @@ -82,18 +83,18 @@ extern u_long nextgennumber; -static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int)); -static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t)); -static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, - int)); -static ino_t ffs_dirpref __P((struct inode *)); -static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); -static void ffs_fserr __P((struct fs *, u_int, char *)); +static ufs_daddr_t ffs_alloccg(struct inode *, int, ufs_daddr_t, int); +static ufs_daddr_t ffs_alloccgblk(struct fs *, struct cg *, ufs_daddr_t); +static ufs_daddr_t ffs_clusteralloc(struct inode *, int, ufs_daddr_t, int); +static ino_t ffs_dirpref(struct inode *); +static ufs_daddr_t ffs_fragextend(struct inode *, int, long, int, int); +static void ffs_fserr(struct fs *, u_int, char *); static u_long ffs_hashalloc - __P((struct inode *, int, long, int, u_int32_t (*)())); -static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int)); -static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t, - int)); + (struct inode *, int, long, int, u_int32_t (*)()); +static ino_t ffs_nodealloccg(struct inode *, int, ufs_daddr_t, int); +static ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); +static void ffs_clusteracct + (struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt); /* * Allocate a block in the file system. @@ -118,7 +119,7 @@ ffs_alloc(ip, lbn, bpref, size, cred, bnp) register struct inode *ip; ufs_daddr_t lbn, bpref; int size; - struct ucred *cred; + kauth_cred_t cred; ufs_daddr_t *bnp; { register struct fs *fs; @@ -138,9 +139,9 @@ ffs_alloc(ip, lbn, bpref, size, cred, bnp) #endif /* DIAGNOSTIC */ if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) goto nospace; - if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) + if (suser(cred, NULL) && freespace(fs, fs->fs_minfree) <= 0) goto nospace; - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); #if QUOTA if (error = chkdq(ip, (int64_t)size, cred, 0)) return (error); @@ -166,7 +167,7 @@ ffs_alloc(ip, lbn, bpref, size, cred, bnp) (void) chkdq(ip, (int64_t)-size, cred, FORCE); #endif /* QUOTA */ nospace: - ffs_fserr(fs, cred->cr_uid, "file system full"); + ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); return (ENOSPC); } @@ -184,7 +185,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) ufs_daddr_t lbprev; ufs_daddr_t bpref; int osize, nsize; - struct ucred *cred; + kauth_cred_t cred; struct buf **bpp; { register struct fs *fs; @@ -206,7 +207,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) if (cred == NOCRED) panic("ffs_realloccg: missing credential\n"); #endif /* DIAGNOSTIC */ - if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) + if (suser(cred, NULL) != 0 && freespace(fs, fs->fs_minfree) <= 0) goto nospace; if ((bprev = ip->i_db[lbprev]) == 0) { printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", @@ -216,16 +217,16 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) /* * Allocate the extra space in the buffer. */ - if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) { - brelse(bp); + if (error = (int)buf_bread(ITOV(ip), (daddr64_t)((unsigned)lbprev), osize, NOCRED, &bp)) { + buf_brelse(bp); return (error); } - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); #if QUOTA if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0)) { - brelse(bp); + buf_brelse(bp); return (error); } #endif /* QUOTA */ @@ -234,13 +235,13 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) */ cg = dtog(fs, bprev); if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) { - if (bp->b_blkno != fsbtodb(fs, bno)) + if ((ufs_daddr_t)buf_blkno(bp) != fsbtodb(fs, bno)) panic("bad blockno"); ip->i_blocks += btodb(nsize - osize, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); - bp->b_flags |= B_DONE; - bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize); + buf_setflags(bp, B_DONE); + bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); *bpp = bp; return (0); } @@ -295,7 +296,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, (u_int32_t (*)())ffs_alloccg); if (bno > 0) { - bp->b_blkno = fsbtodb(fs, bno); + buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, bno))); ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), @@ -303,8 +304,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) ip->i_blocks += btodb(nsize - osize, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); - bp->b_flags |= B_DONE; - bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize); + buf_setflags(bp, B_DONE); + bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); *bpp = bp; return (0); } @@ -314,12 +315,12 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) */ (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE); #endif /* QUOTA */ - brelse(bp); + buf_brelse(bp); nospace: /* * no space available */ - ffs_fserr(fs, cred->cr_uid, "file system full"); + ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); return (ENOSPC); } @@ -343,12 +344,6 @@ nospace: int doasyncfree = 1; int doreallocblks = 1; -int -ffs_reallocblks(ap) - struct vop_reallocblks_args *ap; -{ - return (ENOSPC); -} /* * Allocate an inode in the file system. @@ -366,23 +361,21 @@ ffs_reallocblks(ap) * available inode is located. */ int -ffs_valloc(ap) - struct vop_valloc_args /* { - struct vnode *a_pvp; - int a_mode; - struct ucred *a_cred; - struct vnode **a_vpp; - } */ *ap; +ffs_valloc( + struct vnode *pvp, + mode_t mode, + kauth_cred_t cred, + struct vnode **vpp) + { - register struct vnode *pvp = ap->a_pvp; register struct inode *pip; register struct fs *fs; register struct inode *ip; - mode_t mode = ap->a_mode; + struct timeval tv; ino_t ino, ipref; int cg, error; - *ap->a_vpp = NULL; + *vpp = NULL; pip = VTOI(pvp); fs = pip->i_fs; if (fs->fs_cstotal.cs_nifree == 0) @@ -409,12 +402,14 @@ ffs_valloc(ap) ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg); if (ino == 0) goto noinodes; - error = VFS_VGET(pvp->v_mount, (void *)ino, ap->a_vpp); + + error = ffs_vget_internal(pvp->v_mount, ino, vpp, NULL, NULL, mode, 0); if (error) { - VOP_VFREE(pvp, ino, mode); + ffs_vfree(pvp, ino, mode); return (error); } - ip = VTOI(*ap->a_vpp); + ip = VTOI(*vpp); + if (ip->i_mode) { printf("mode = 0%o, inum = %d, fs = %s\n", ip->i_mode, ip->i_number, fs->fs_fsmnt); @@ -429,12 +424,13 @@ ffs_valloc(ap) /* * Set up a new generation number for this inode. */ - if (++nextgennumber < (u_long)time.tv_sec) - nextgennumber = time.tv_sec; + microtime(&tv); + if (++nextgennumber < (u_long)tv.tv_sec) + nextgennumber = tv.tv_sec; ip->i_gen = nextgennumber; return (0); noinodes: - ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); + ffs_fserr(fs, kauth_cred_getuid(cred), "out of inodes"); uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); return (ENOSPC); } @@ -753,6 +749,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) register struct fs *fs; register struct cg *cgp; struct buf *bp; + struct timeval tv; long bno; int frags, bbase; int i, error; @@ -772,13 +769,13 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) return (NULL); } /* read corresponding cylinder group info */ - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) { byte_swap_cgin(cgp, fs); @@ -790,10 +787,11 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp->cg_time = time.tv_sec; + microtime(&tv); + cgp->cg_time = tv.tv_sec; bno = dtogd(fs, bprev); for (i = numfrags(fs, osize); i < frags; i++) if (isclr(cg_blksfree(cgp), bno + i)) { @@ -801,7 +799,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } /* @@ -827,7 +825,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (bprev); } @@ -847,6 +845,7 @@ ffs_alloccg(ip, cg, bpref, size) register struct fs *fs; register struct cg *cgp; struct buf *bp; + struct timeval tv; register int i; int error, bno, frags, allocsiz; #if REV_ENDIAN_FS @@ -858,13 +857,13 @@ ffs_alloccg(ip, cg, bpref, size) fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) return (NULL); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -875,17 +874,18 @@ ffs_alloccg(ip, cg, bpref, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp->cg_time = time.tv_sec; + microtime(&tv); + cgp->cg_time = tv.tv_sec; if (size == fs->fs_bsize) { bno = ffs_alloccgblk(fs, cgp, bpref); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (bno); } /* @@ -907,7 +907,7 @@ ffs_alloccg(ip, cg, bpref, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } bno = ffs_alloccgblk(fs, cgp, bpref); @@ -924,7 +924,7 @@ ffs_alloccg(ip, cg, bpref, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (bno); } bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); @@ -933,7 +933,7 @@ ffs_alloccg(ip, cg, bpref, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } for (i = 0; i < frags; i++) @@ -949,7 +949,7 @@ ffs_alloccg(ip, cg, bpref, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (cg * fs->fs_fpg + bno); } @@ -1097,10 +1097,10 @@ ffs_clusteralloc(ip, cg, bpref, len) fs = ip->i_fs; if (fs->fs_maxcluster[cg] < len) return (NULL); - if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, - NOCRED, &bp)) + if (buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), (int)fs->fs_cgsize, + NOCRED, &bp)) goto fail; - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -1196,11 +1196,11 @@ ffs_clusteralloc(ip, cg, bpref, len) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (bno); fail: - brelse(bp); + buf_brelse(bp); return (0); } @@ -1223,6 +1223,7 @@ ffs_nodealloccg(ip, cg, ipref, mode) register struct fs *fs; register struct cg *cgp; struct buf *bp; + struct timeval tv; int error, start, len, loc, map, i; #if REV_ENDIAN_FS struct vnode *vp=ITOV(ip); @@ -1233,13 +1234,13 @@ ffs_nodealloccg(ip, cg, ipref, mode) fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nifree == 0) return (NULL); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -1249,11 +1250,12 @@ ffs_nodealloccg(ip, cg, ipref, mode) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (NULL); } - cgp->cg_time = time.tv_sec; + microtime(&tv); + cgp->cg_time = tv.tv_sec; if (ipref) { ipref %= fs->fs_ipg; if (isclr(cg_inosused(cgp), ipref)) @@ -1300,7 +1302,7 @@ gotit: if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (cg * fs->fs_ipg + ipref); } @@ -1311,6 +1313,7 @@ gotit: * free map. If a fragment is deallocated, a possible * block reassembly is checked. */ +void ffs_blkfree(ip, bno, size) register struct inode *ip; ufs_daddr_t bno; @@ -1319,6 +1322,7 @@ ffs_blkfree(ip, bno, size) register struct fs *fs; register struct cg *cgp; struct buf *bp; + struct timeval tv; ufs_daddr_t blkno; int i, error, cg, blk, frags, bbase; #if REV_ENDIAN_FS @@ -1326,6 +1330,7 @@ ffs_blkfree(ip, bno, size) struct mount *mp=vp->v_mount; int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ + fs = ip->i_fs; if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", @@ -1338,13 +1343,13 @@ ffs_blkfree(ip, bno, size) ffs_fserr(fs, ip->i_uid, "bad block"); return; } - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return; } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -1354,10 +1359,11 @@ ffs_blkfree(ip, bno, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return; } - cgp->cg_time = time.tv_sec; + microtime(&tv); + cgp->cg_time = tv.tv_sec; bno = dtogd(fs, bno); if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); @@ -1423,7 +1429,7 @@ ffs_blkfree(ip, bno, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); } #if DIAGNOSTIC @@ -1454,13 +1460,13 @@ ffs_checkblk(ip, bno, size) } if ((u_int)bno >= fs->fs_size) panic("checkblk: bad block %d", bno); - error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, dtog(fs, bno)))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return; } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -1470,7 +1476,7 @@ ffs_checkblk(ip, bno, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return; } bno = dtogd(fs, bno); @@ -1488,7 +1494,7 @@ ffs_checkblk(ip, bno, size) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (!free); } #endif /* DIAGNOSTIC */ @@ -1499,38 +1505,32 @@ ffs_checkblk(ip, bno, size) * The specified inode is placed back in the free map. */ int -ffs_vfree(ap) - struct vop_vfree_args /* { - struct vnode *a_pvp; - ino_t a_ino; - int a_mode; - } */ *ap; +ffs_vfree(struct vnode *vp, ino_t ino, int mode) { register struct fs *fs; register struct cg *cgp; register struct inode *pip; - ino_t ino = ap->a_ino; struct buf *bp; + struct timeval tv; int error, cg; #if REV_ENDIAN_FS - struct vnode *vp=ap->a_pvp; struct mount *mp=vp->v_mount; int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ - pip = VTOI(ap->a_pvp); + pip = VTOI(vp); fs = pip->i_fs; if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", pip->i_dev, ino, fs->fs_fsmnt); cg = ino_to_cg(fs, ino); - error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), - (int)fs->fs_cgsize, NOCRED, &bp); + error = (int)buf_bread(pip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), + (int)fs->fs_cgsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (0); } - cgp = (struct cg *)bp->b_data; + cgp = (struct cg *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -1540,10 +1540,11 @@ ffs_vfree(ap) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (0); } - cgp->cg_time = time.tv_sec; + microtime(&tv); + cgp->cg_time = tv.tv_sec; ino %= fs->fs_ipg; if (isclr(cg_inosused(cgp), ino)) { printf("dev = 0x%x, ino = %d, fs = %s\n", @@ -1557,7 +1558,7 @@ ffs_vfree(ap) cgp->cg_cs.cs_nifree++; fs->fs_cstotal.cs_nifree++; fs->fs_cs(fs, cg).cs_nifree++; - if ((ap->a_mode & IFMT) == IFDIR) { + if ((mode & IFMT) == IFDIR) { cgp->cg_cs.cs_ndir--; fs->fs_cstotal.cs_ndir--; fs->fs_cs(fs, cg).cs_ndir--; @@ -1567,7 +1568,7 @@ ffs_vfree(ap) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - bdwrite(bp); + buf_bdwrite(bp); return (0); } @@ -1641,11 +1642,8 @@ ffs_mapsearch(fs, cgp, bpref, allocsiz) * * Cnt == 1 means free; cnt == -1 means allocating. */ -ffs_clusteracct(fs, cgp, blkno, cnt) - struct fs *fs; - struct cg *cgp; - ufs_daddr_t blkno; - int cnt; +static void +ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt) { int32_t *sump; int32_t *lp; diff --git a/bsd/ufs/ffs/ffs_balloc.c b/bsd/ufs/ffs/ffs_balloc.c index 37cf82024..5a0cf7bcf 100644 --- a/bsd/ufs/ffs/ffs_balloc.c +++ b/bsd/ufs/ffs/ffs_balloc.c @@ -58,15 +58,16 @@ #include #include #include -#include +#include #include +#include #include -#include +#include #include #include #if REV_ENDIAN_FS -#include +#include #endif /* REV_ENDIAN_FS */ #include @@ -88,14 +89,14 @@ * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ -ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) - register struct inode *ip; - register ufs_daddr_t lbn; - int size; - struct ucred *cred; - struct buf **bpp; - int flags; - int * blk_alloc; +ffs_balloc( + register struct inode *ip, + register ufs_daddr_t lbn, + int size, + kauth_cred_t cred, + struct buf **bpp, + int flags, + int * blk_alloc) { register struct fs *fs; register ufs_daddr_t nb; @@ -107,8 +108,8 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; int alloc_buffer = 1; -#if REV_ENDIAN_FS struct mount *mp=vp->v_mount; +#if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ @@ -148,19 +149,20 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); - /* adjust the innode size we just grew */ + /* adjust the inode size we just grew */ /* it is in nb+1 as nb starts from 0 */ ip->i_size = (nb + 1) * fs->fs_bsize; - if (UBCISVALID(vp)) - ubc_setsize(vp, (off_t)ip->i_size); /* XXX check error */ - ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); + ubc_setsize(vp, (off_t)ip->i_size); + + ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; + if ((flags & B_SYNC) || (!alloc_buffer)) { if (!alloc_buffer) - SET(bp->b_flags, B_NOCACHE); - bwrite(bp); + buf_setflags(bp, B_NOCACHE); + buf_bwrite(bp); } else - bdwrite(bp); + buf_bdwrite(bp); /* note that bp is already released here */ } } @@ -171,9 +173,9 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { if (alloc_buffer) { - error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); + error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (error); } *bpp = bp; @@ -188,9 +190,9 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) nsize = fragroundup(fs, size); if (nsize <= osize) { if (alloc_buffer) { - error = bread(vp, lbn, osize, NOCRED, &bp); + error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); return (error); } ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -207,14 +209,19 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); - ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); + ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; - if(!alloc_buffer) { - SET(bp->b_flags, B_NOCACHE); + + /* adjust the inode size we just grew */ + ip->i_size = (lbn * fs->fs_bsize) + size; + ubc_setsize(vp, (off_t)ip->i_size); + + if (!alloc_buffer) { + buf_setflags(bp, B_NOCACHE); if (flags & B_SYNC) - bwrite(bp); + buf_bwrite(bp); else - bdwrite(bp); + buf_bdwrite(bp); } else *bpp = bp; return (0); @@ -231,10 +238,11 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) if (error) return (error); if (alloc_buffer) { - bp = getblk(vp, lbn, nsize, 0, 0, BLK_WRITE); - bp->b_blkno = fsbtodb(fs, newb); - if (flags & B_CLRBUF) - clrbuf(bp); + bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); + buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); + + if (flags & B_CLRBUF) + buf_clear(bp); } ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -270,16 +278,16 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) return (error); nb = newb; *allocblk++ = nb; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, BLK_META); - bp->b_blkno = fsbtodb(fs, nb); - clrbuf(bp); + bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); + buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); + buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); - } else if ((error = bwrite(bp)) != 0) { + buf_bdwrite(bp); + } else if ((error = buf_bwrite(bp)) != 0) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; @@ -290,13 +298,12 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { - error = meta_bread(vp, - indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); + error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); goto fail; } - bap = (ufs_daddr_t *)bp->b_data; + bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = NXSwapLong(bap[indirs[i].in_off]); @@ -310,29 +317,29 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) break; i += 1; if (nb != 0) { - brelse(bp); + buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - brelse(bp); + buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; - nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, BLK_META); - nbp->b_blkno = fsbtodb(fs, nb); - clrbuf(nbp); + nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); + buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); + buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(nbp); - } else if (error = bwrite(nbp)) { - brelse(bp); + buf_bdwrite(nbp); + } else if (error = buf_bwrite(nbp)) { + buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS @@ -349,9 +356,9 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) * delayed write. */ if (flags & B_SYNC) { - bwrite(bp); + buf_bwrite(bp); } else { - bdwrite(bp); + buf_bdwrite(bp); } } /* @@ -361,7 +368,7 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - brelse(bp); + buf_brelse(bp); goto fail; } nb = newb; @@ -380,15 +387,16 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) * delayed write. */ if ((flags & B_SYNC)) { - bwrite(bp); + buf_bwrite(bp); } else { - bdwrite(bp); + buf_bdwrite(bp); } if(alloc_buffer ) { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, BLK_WRITE); - nbp->b_blkno = fsbtodb(fs, nb); + nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); + buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); + if (flags & B_CLRBUF) - clrbuf(nbp); + buf_clear(nbp); } if (blk_alloc) { *blk_alloc = fs->fs_bsize; @@ -398,19 +406,19 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags, blk_alloc) return (0); } - brelse(bp); + buf_brelse(bp); if (alloc_buffer) { - if (flags & B_CLRBUF) { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); - if (error) { - brelse(nbp); - goto fail; + if (flags & B_CLRBUF) { + error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); + if (error) { + buf_brelse(nbp); + goto fail; + } + } else { + nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); + buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); } - } else { - nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, BLK_WRITE); - nbp->b_blkno = fsbtodb(fs, nb); - } - *bpp = nbp; + *bpp = nbp; } return (0); fail: @@ -425,8 +433,7 @@ fail: if (allocib != NULL) *allocib = 0; if (deallocated) { - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); - + devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. @@ -441,7 +448,7 @@ fail: /* * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence - * it does no breads (that could lead to deadblock as the page may be already + * it does no buf_breads (that could lead to deadblock as the page may be already * marked busy as it is being paged out. Also important to note that we are not * growing the file in pageouts. So ip->i_size cannot increase by this call * due to the way UBC works. @@ -450,12 +457,12 @@ fail: * Do not call with B_CLRBUF flags as this should only be called only * from pageouts */ -ffs_blkalloc(ip, lbn, size, cred, flags) - register struct inode *ip; - ufs_daddr_t lbn; - int size; - struct ucred *cred; - int flags; +ffs_blkalloc( + struct inode *ip, + ufs_daddr_t lbn, + int size, + kauth_cred_t cred, + int flags) { register struct fs *fs; register ufs_daddr_t nb; @@ -466,8 +473,8 @@ ffs_blkalloc(ip, lbn, size, cred, flags) int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; -#if REV_ENDIAN_FS struct mount *mp=vp->v_mount; +#if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ @@ -544,16 +551,16 @@ ffs_blkalloc(ip, lbn, size, cred, flags) return (error); nb = newb; *allocblk++ = nb; - bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, BLK_META); - bp->b_blkno = fsbtodb(fs, nb); - clrbuf(bp); + bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); + buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); + buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); - } else if (error = bwrite(bp)) { + buf_bdwrite(bp); + } else if (error = buf_bwrite(bp)) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; @@ -564,13 +571,12 @@ ffs_blkalloc(ip, lbn, size, cred, flags) * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { - error = meta_bread(vp, - indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); + error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); + buf_brelse(bp); goto fail; } - bap = (ufs_daddr_t *)bp->b_data; + bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = NXSwapLong(bap[indirs[i].in_off]); @@ -584,29 +590,29 @@ ffs_blkalloc(ip, lbn, size, cred, flags) break; i += 1; if (nb != 0) { - brelse(bp); + buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - brelse(bp); + buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; - nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, BLK_META); - nbp->b_blkno = fsbtodb(fs, nb); - clrbuf(nbp); + nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); + buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); + buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(nbp); - } else if (error = bwrite(nbp)) { - brelse(bp); + buf_bdwrite(nbp); + } else if (error = buf_bwrite(nbp)) { + buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS @@ -623,9 +629,9 @@ ffs_blkalloc(ip, lbn, size, cred, flags) * delayed write. */ if (flags & B_SYNC) { - bwrite(bp); + buf_bwrite(bp); } else { - bdwrite(bp); + buf_bdwrite(bp); } } /* @@ -635,7 +641,7 @@ ffs_blkalloc(ip, lbn, size, cred, flags) pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - brelse(bp); + buf_brelse(bp); goto fail; } nb = newb; @@ -654,13 +660,13 @@ ffs_blkalloc(ip, lbn, size, cred, flags) * delayed write. */ if (flags & B_SYNC) { - bwrite(bp); + buf_bwrite(bp); } else { - bdwrite(bp); + buf_bdwrite(bp); } return (0); } - brelse(bp); + buf_brelse(bp); return (0); fail: /* @@ -674,8 +680,7 @@ fail: if (allocib != NULL) *allocib = 0; if (deallocated) { - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); - + devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. diff --git a/bsd/ufs/ffs/ffs_extern.h b/bsd/ufs/ffs/ffs_extern.h index 95fefe238..c833abc20 100644 --- a/bsd/ufs/ffs/ffs_extern.h +++ b/bsd/ufs/ffs/ffs_extern.h @@ -80,13 +80,12 @@ #endif /* __APPLE_API_UNSTABLE */ struct buf; -struct fid; struct fs; struct inode; struct mount; struct nameidata; struct proc; -struct statfs; +struct vfsstatfs; struct timeval; struct ucred; struct uio; @@ -96,51 +95,46 @@ struct vfsconf; #ifdef __APPLE_API_PRIVATE __BEGIN_DECLS -int ffs_alloc __P((struct inode *, - ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *)); -int ffs_balloc __P((struct inode *, - ufs_daddr_t, int, struct ucred *, struct buf **, int, int *)); -int ffs_blkatoff __P((struct vop_blkatoff_args *)); -int ffs_blkfree __P((struct inode *, ufs_daddr_t, long)); -ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *)); -int ffs_bmap __P((struct vop_bmap_args *)); -void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t)); -int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, - struct vnode **, int *, struct ucred **)); -void ffs_fragacct __P((struct fs *, int, int32_t [], int)); -int ffs_fsync __P((struct vop_fsync_args *)); -int ffs_init __P((struct vfsconf *)); -int ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t)); -int ffs_mount __P((struct mount *, - char *, caddr_t, struct nameidata *, struct proc *)); -int ffs_mountfs __P((struct vnode *, struct mount *, struct proc *)); -int ffs_mountroot __P((void)); -int ffs_read __P((struct vop_read_args *)); -int ffs_reallocblks __P((struct vop_reallocblks_args *)); -int ffs_realloccg __P((struct inode *, - ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **)); -int ffs_reclaim __P((struct vop_reclaim_args *)); -void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t)); -int ffs_statfs __P((struct mount *, struct statfs *, struct proc *)); -int ffs_sync __P((struct mount *, int, struct ucred *, struct proc *)); -int ffs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, - struct proc *)); -int ffs_truncate __P((struct vop_truncate_args *)); -int ffs_unmount __P((struct mount *, int, struct proc *)); -int ffs_update __P((struct vop_update_args *)); -int ffs_valloc __P((struct vop_valloc_args *)); -int ffs_vfree __P((struct vop_vfree_args *)); -int ffs_vget __P((struct mount *, void *, struct vnode **)); -int ffs_vptofh __P((struct vnode *, struct fid *)); -int ffs_write __P((struct vop_write_args *)); -int ffs_pagein __P((struct vop_pagein_args *)); -int ffs_pageout __P((struct vop_pageout_args *)); -int ffs_blktooff __P((struct vop_blktooff_args *)); -int ffs_offtoblk __P((struct vop_offtoblk_args *)); +int ffs_fsync_internal(vnode_t, int); + +int ffs_blkatoff(vnode_t, off_t, char **, buf_t *); + +int ffs_alloc(struct inode *, + ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *); +int ffs_balloc(struct inode *, + ufs_daddr_t, int, struct ucred *, struct buf **, int, int *); +void ffs_blkfree(struct inode *, ufs_daddr_t, long); +ufs_daddr_t ffs_blkpref(struct inode *, ufs_daddr_t, int, ufs_daddr_t *); +void ffs_clrblock(struct fs *, u_char *, ufs_daddr_t); +int ffs_fhtovp(struct mount *, int, unsigned char *, struct vnode **, vfs_context_t); +void ffs_fragacct(struct fs *, int, int32_t [], int); +int ffs_fsync(struct vnop_fsync_args *); +int ffs_init(struct vfsconf *); +int ffs_isblock(struct fs *, u_char *, ufs_daddr_t); +int ffs_mount(struct mount *, vnode_t , user_addr_t, vfs_context_t); +int ffs_mountfs(struct vnode *, struct mount *, vfs_context_t); +int ffs_mountroot(mount_t, vnode_t, vfs_context_t); +int ffs_read(struct vnop_read_args *); +int ffs_realloccg(struct inode *, + ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **); +int ffs_reclaim(struct vnop_reclaim_args *); +void ffs_setblock(struct fs *, u_char *, ufs_daddr_t); +int ffs_vfs_getattr(struct mount *, struct vfs_attr *, vfs_context_t); +int ffs_vfs_setattr(struct mount *, struct vfs_attr *, vfs_context_t); +int ffs_sync(struct mount *, int, vfs_context_t); +int ffs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); +int ffs_unmount(struct mount *, int, vfs_context_t); +int ffs_update(struct vnode *, struct timeval *, struct timeval *, int); +int ffs_valloc(vnode_t dvp, mode_t mode, kauth_cred_t cred, vnode_t *vpp); +int ffs_vfree(struct vnode *vp, ino_t ino, int mode); +int ffs_vget(struct mount *, ino64_t, struct vnode **, vfs_context_t); +int ffs_vptofh(struct vnode *, int *, unsigned char *, vfs_context_t); +int ffs_write(struct vnop_write_args *); +int ffs_pagein(struct vnop_pagein_args *); +int ffs_pageout(struct vnop_pageout_args *); +int ffs_blktooff(struct vnop_blktooff_args *); +int ffs_offtoblk(struct vnop_offtoblk_args *); -#if DIAGNOSTIC -void ffs_checkoverlap __P((struct buf *, struct inode *)); -#endif __END_DECLS extern int (**ffs_vnodeop_p)(void *); diff --git a/bsd/ufs/ffs/ffs_inode.c b/bsd/ufs/ffs/ffs_inode.c index f84771f2c..4ee62c22b 100644 --- a/bsd/ufs/ffs/ffs_inode.c +++ b/bsd/ufs/ffs/ffs_inode.c @@ -60,11 +60,11 @@ #include #include -#include -#include +#include +#include /* for accessing p_stats */ #include -#include -#include +#include +#include #include #include #include @@ -87,8 +87,8 @@ #include #endif /* REV_ENDIAN_FS */ -static int ffs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, - ufs_daddr_t, int, long *)); +static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t, + ufs_daddr_t, int, long *); /* * Update the access, modified, and inode change times as specified by the @@ -100,25 +100,20 @@ static int ffs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, * complete. */ int -ffs_update(ap) - struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; - } */ *ap; +ffs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor) { register struct fs *fs; struct buf *bp; struct inode *ip; - int error; + struct timeval tv; + errno_t error; #if REV_ENDIAN_FS - struct mount *mp=(ap->a_vp)->v_mount; + struct mount *mp=(vp)->v_mount; int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ - ip = VTOI(ap->a_vp); - if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) { + ip = VTOI(vp); + if (vp->v_mount->mnt_flag & MNT_RDONLY) { ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); return (0); @@ -127,13 +122,15 @@ ffs_update(ap) (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) return (0); if (ip->i_flag & IN_ACCESS) - ip->i_atime = ap->a_access->tv_sec; + ip->i_atime = access->tv_sec; if (ip->i_flag & IN_UPDATE) { - ip->i_mtime = ap->a_modify->tv_sec; + ip->i_mtime = modify->tv_sec; ip->i_modrev++; } - if (ip->i_flag & IN_CHANGE) - ip->i_ctime = time.tv_sec; + if (ip->i_flag & IN_CHANGE) { + microtime(&tv); + ip->i_ctime = tv.tv_sec; + } ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); fs = ip->i_fs; /* @@ -144,62 +141,50 @@ ffs_update(ap) ip->i_din.di_ouid = ip->i_uid; /* XXX */ ip->i_din.di_ogid = ip->i_gid; /* XXX */ } /* XXX */ - if (error = bread(ip->i_devvp, - fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + if (error = buf_bread(ip->i_devvp, + (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))), (int)fs->fs_bsize, NOCRED, &bp)) { - brelse(bp); - return (error); + buf_brelse(bp); + return ((int)error); } #if REV_ENDIAN_FS if (rev_endian) - byte_swap_inode_out(ip, ((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number))); + byte_swap_inode_out(ip, ((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number))); else { #endif /* REV_ENDIAN_FS */ - *((struct dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)) = ip->i_din; + *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ - if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) - return (bwrite(bp)); + if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) + return ((int)buf_bwrite(bp)); else { - bdwrite(bp); + buf_bdwrite(bp); return (0); } } + #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ -/* - * Truncate the inode oip to at most length size, freeing the - * disk blocks. - */ -ffs_truncate(ap) - struct vop_truncate_args /* { - struct vnode *a_vp; - off_t a_length; - int a_flags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; + +int +ffs_truncate_internal(vnode_t ovp, off_t length, int flags, ucred_t cred) { - register struct vnode *ovp = ap->a_vp; + struct inode *oip; + struct fs *fs; ufs_daddr_t lastblock; - register struct inode *oip; ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; - off_t length = ap->a_length; - register struct fs *fs; - struct buf *bp; - int offset, size, level; - long count, nblocks, vflags, blocksreleased = 0; - struct timeval tv; - register int i; - int aflags, error, allerror; - off_t osize; - int devBlockSize=0; + buf_t bp; + int offset, size, level, i; + long count, nblocks, vflags, blocksreleased = 0; + struct timeval tv; + int aflags, error, allerror; + off_t osize; + int devBlockSize=0; #if QUOTA int64_t change; /* in bytes */ #endif /* QUOTA */ @@ -213,7 +198,7 @@ ffs_truncate(ap) if (length > fs->fs_maxfilesize) return (EFBIG); - tv = time; + microtime(&tv); if (ovp->v_type == VLNK && oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { #if DIAGNOSTIC @@ -223,12 +208,12 @@ ffs_truncate(ap) bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); oip->i_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOP_UPDATE(ovp, &tv, &tv, 1)); + return (ffs_update(ovp, &tv, &tv, 1)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOP_UPDATE(ovp, &tv, &tv, 0)); + return (ffs_update(ovp, &tv, &tv, 0)); } #if QUOTA if (error = getinoquota(oip)) @@ -245,25 +230,24 @@ ffs_truncate(ap) offset = blkoff(fs, length - 1); lbn = lblkno(fs, length - 1); aflags = B_CLRBUF; - if (ap->a_flags & IO_SYNC) + if (flags & IO_SYNC) aflags |= B_SYNC; - if (error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, - aflags , 0)) + if (error = ffs_balloc(oip, lbn, offset + 1, cred, &bp, aflags, 0)) return (error); oip->i_size = length; if (UBCINFOEXISTS(ovp)) { - bp->b_flags |= B_INVAL; - bwrite(bp); + buf_markinvalid(bp); + buf_bwrite(bp); ubc_setsize(ovp, (off_t)length); } else { if (aflags & B_SYNC) - bwrite(bp); + buf_bwrite(bp); else - bawrite(bp); + buf_bawrite(bp); } oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOP_UPDATE(ovp, &tv, &tv, 1)); + return (ffs_update(ovp, &tv, &tv, 1)); } /* * Shorten the size of the file. If the file is not being @@ -275,33 +259,34 @@ ffs_truncate(ap) if (UBCINFOEXISTS(ovp)) ubc_setsize(ovp, (off_t)length); - vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; - allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0); - + vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; + if (vflags & BUF_WRITE_DATA) + ffs_fsync_internal(ovp, MNT_WAIT); + allerror = buf_invalidateblks(ovp, vflags, 0, 0); + offset = blkoff(fs, length); if (offset == 0) { oip->i_size = length; } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; - if (ap->a_flags & IO_SYNC) + if (flags & IO_SYNC) aflags |= B_SYNC; - if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, - aflags, 0)) + if (error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags, 0)) return (error); oip->i_size = length; size = blksize(fs, oip, lbn); - bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + bzero((char *)buf_dataptr(bp) + offset, (u_int)(size - offset)); allocbuf(bp, size); if (UBCINFOEXISTS(ovp)) { - bp->b_flags |= B_INVAL; - bwrite(bp); + buf_markinvalid(bp); + buf_bwrite(bp); } else { if (aflags & B_SYNC) - bwrite(bp); + buf_bwrite(bp); else - bawrite(bp); + buf_bawrite(bp); } } /* @@ -314,7 +299,8 @@ ffs_truncate(ap) lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); - VOP_DEVBLOCKSIZE(oip->i_devvp,&devBlockSize); + + devBlockSize = vfs_devblocksize(vnode_mount(ovp)); nblocks = btodb(fs->fs_bsize, devBlockSize); /* @@ -332,7 +318,7 @@ ffs_truncate(ap) for (i = NDADDR - 1; i > lastblock; i--) oip->i_db[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; - if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT)) + if (error = ffs_update(ovp, &tv, &tv, MNT_WAIT)) allerror = error; /* * Having written the new inode to disk, save its new configuration @@ -343,8 +329,12 @@ ffs_truncate(ap) bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); oip->i_size = osize; - vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; - allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0); + + vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; + + if (vflags & BUF_WRITE_DATA) + ffs_fsync_internal(ovp, MNT_WAIT); + allerror = buf_invalidateblks(ovp, vflags, 0, 0); /* * Indirect blocks first. @@ -424,7 +414,7 @@ done: if (newblks[i] != oip->i_db[i]) panic("itrunc2"); if (length == 0 && - (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first)) + (vnode_hasdirtyblks(ovp) || vnode_hascleanblks(ovp))) panic("itrunc3"); #endif /* DIAGNOSTIC */ /* @@ -468,10 +458,10 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) ufs_daddr_t *copy, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; - int error = 0, allerror = 0; + errno_t error = 0, allerror = 0; int devBlockSize=0; -#if REV_ENDIAN_FS struct mount *mp=vp->v_mount; +#if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ @@ -486,7 +476,8 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) last = lastbn; if (lastbn > 0) last /= factor; - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); + + devBlockSize = vfs_devblocksize(mp); nblocks = btodb(fs->fs_bsize, devBlockSize); /* Doing a MALLOC here is asking for trouble. We can still @@ -494,51 +485,52 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) * low on memory and block in MALLOC */ - tbp = geteblk(fs->fs_bsize); - copy = (ufs_daddr_t *)tbp->b_data; + tbp = buf_geteblk(fs->fs_bsize); + copy = (ufs_daddr_t *)buf_dataptr(tbp); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have - * the on disk address, so we have to set the b_blkno field - * explicitly instead of letting bread do everything for us. + * the on disk address, so we have to set the blkno field + * explicitly instead of letting buf_bread do everything for us. */ vp = ITOV(ip); - bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, BLK_META); - if (bp->b_flags & (B_DONE | B_DELWRI)) { + bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, 0, 0, BLK_META); + + if (buf_valid(bp)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); current_proc()->p_stats->p_ru.ru_inblock++; /* pay for read */ - bp->b_flags |= B_READ; - if (bp->b_bcount > bp->b_bufsize) + buf_setflags(bp, B_READ); + if (buf_count(bp) > buf_size(bp)) panic("ffs_indirtrunc: bad buffer size"); - bp->b_blkno = dbn; - VOP_STRATEGY(bp); - error = biowait(bp); + buf_setblkno(bp, (daddr64_t)((unsigned)dbn)); + VNOP_STRATEGY(bp); + error = buf_biowait(bp); } if (error) { - brelse(bp); + buf_brelse(bp); *countp = 0; - brelse(tbp); - return (error); + buf_brelse(tbp); + return ((int)error); } - bap = (ufs_daddr_t *)bp->b_data; + bap = (ufs_daddr_t *)buf_dataptr(bp); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); bzero((caddr_t)&bap[last + 1], (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); if (last == -1) - bp->b_flags |= B_INVAL; + buf_markinvalid(bp); if (last != -1 && (vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = bwrite(bp); + error = buf_bwrite(bp); if (error) allerror = error; } @@ -591,8 +583,8 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) blocksreleased += blkcount; } } - brelse(tbp); + buf_brelse(tbp); *countp = blocksreleased; - return (allerror); + return ((int)allerror); } diff --git a/bsd/ufs/ffs/ffs_subr.c b/bsd/ufs/ffs/ffs_subr.c index c023a273d..5d466f630 100644 --- a/bsd/ufs/ffs/ffs_subr.c +++ b/bsd/ufs/ffs/ffs_subr.c @@ -58,7 +58,7 @@ #include #include #if REV_ENDIAN_FS -#include +#include #endif /* REV_ENDIAN_FS */ #ifndef KERNEL @@ -67,7 +67,7 @@ #else #include -#include +#include #include #include #include @@ -84,14 +84,9 @@ * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ +__private_extern__ int -ffs_blkatoff(ap) - struct vop_blkatoff_args /* { - struct vnode *a_vp; - off_t a_offset; - char **a_res; - struct buf **a_bpp; - } */ *ap; +ffs_blkatoff(vnode_t vp, off_t offset, char **res, buf_t *bpp) { struct inode *ip; register struct fs *fs; @@ -99,28 +94,28 @@ ffs_blkatoff(ap) ufs_daddr_t lbn; int bsize, error; #if REV_ENDIAN_FS - struct mount *mp=(ap->a_vp)->v_mount; + struct mount *mp = vnode_mount(vp); int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ - ip = VTOI(ap->a_vp); + ip = VTOI(vp); fs = ip->i_fs; - lbn = lblkno(fs, ap->a_offset); + lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); - *ap->a_bpp = NULL; - if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) { - brelse(bp); + *bpp = NULL; + if (error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), bsize, NOCRED, &bp)) { + buf_brelse(bp); return (error); } #if REV_ENDIAN_FS if (rev_endian) - byte_swap_dir_block_in(bp->b_data, bp->b_bcount); + byte_swap_dir_block_in((char *)buf_dataptr(bp), buf_count(bp)); #endif /* REV_ENDIAN_FS */ - if (ap->a_res) - *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); - *ap->a_bpp = bp; + if (res) + *res = (char *)buf_dataptr(bp) + blkoff(fs, offset); + *bpp = bp; return (0); } #endif @@ -160,59 +155,6 @@ ffs_fragacct(fs, fragmap, fraglist, cnt) } } -#if defined(KERNEL) && DIAGNOSTIC -void -ffs_checkoverlap(bp, ip) - struct buf *bp; - struct inode *ip; -{ - register struct buf *ebp, *ep; - register ufs_daddr_t start, last; - struct vnode *vp; -#ifdef NeXT - int devBlockSize=0; -#endif /* NeXT */ - - ebp = &buf[nbuf]; - start = bp->b_blkno; -#ifdef NeXT - VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize); - last = start + btodb(bp->b_bcount, devBlockSize) - 1; -#else - last = start + btodb(bp->b_bcount) - 1; -#endif /* NeXT */ - for (ep = buf; ep < ebp; ep++) { - if (ep == bp || (ep->b_flags & B_INVAL) || - ep->b_vp == NULLVP) - continue; - if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, - NULL)) - continue; - if (vp != ip->i_devvp) - continue; - /* look for overlap */ -#ifdef NeXT - if (ep->b_bcount == 0 || ep->b_blkno > last || - ep->b_blkno + btodb(ep->b_bcount, devBlockSize) <= start) - continue; - vprint("Disk overlap", vp); - (void)printf("\tstart %d, end %d overlap start %d, end %d\n", - start, last, ep->b_blkno, - ep->b_blkno + btodb(ep->b_bcount, devBlockSize) - 1); -#else - if (ep->b_bcount == 0 || ep->b_blkno > last || - ep->b_blkno + btodb(ep->b_bcount) <= start) - continue; - vprint("Disk overlap", vp); - (void)printf("\tstart %d, end %d overlap start %d, end %d\n", - start, last, ep->b_blkno, - ep->b_blkno + btodb(ep->b_bcount) - 1); -#endif /* NeXT */ - panic("Disk buffer overlap"); - } -} -#endif /* DIAGNOSTIC */ - /* * block operations * @@ -241,6 +183,8 @@ ffs_isblock(fs, cp, h) default: panic("ffs_isblock"); } + /* NOTREACHED */ + return 0; } /* diff --git a/bsd/ufs/ffs/ffs_vfsops.c b/bsd/ufs/ffs/ffs_vfsops.c index ab6ebe668..06d21d70a 100644 --- a/bsd/ufs/ffs/ffs_vfsops.c +++ b/bsd/ufs/ffs/ffs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,9 +60,11 @@ #include #include #include +#include #include -#include +#include #include +#include #include #include #include @@ -88,7 +90,7 @@ #include #endif /* REV_ENDIAN_FS */ -int ffs_sbupdate __P((struct ufsmount *, int)); +int ffs_sbupdate(struct ufsmount *, int); struct vfsops ufs_vfsops = { ffs_mount, @@ -96,68 +98,53 @@ struct vfsops ufs_vfsops = { ffs_unmount, ufs_root, ufs_quotactl, - ffs_statfs, + ffs_vfs_getattr, ffs_sync, ffs_vget, ffs_fhtovp, ffs_vptofh, ffs_init, ffs_sysctl, + ffs_vfs_setattr, + {0} }; extern u_long nextgennumber; +union _qcvt { + int64_t qcvt; + int32_t val[2]; +}; +#define SETHIGH(q, h) { \ + union _qcvt tmp; \ + tmp.qcvt = (q); \ + tmp.val[_QUAD_HIGHWORD] = (h); \ + (q) = tmp.qcvt; \ +} +#define SETLOW(q, l) { \ + union _qcvt tmp; \ + tmp.qcvt = (q); \ + tmp.val[_QUAD_LOWWORD] = (l); \ + (q) = tmp.qcvt; \ +} + /* * Called by main() when ufs is going to be mounted as root. */ -ffs_mountroot() +int +ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) { - extern struct vnode *rootvp; - struct fs *fs; - struct mount *mp; struct proc *p = current_proc(); /* XXX */ - struct ufsmount *ump; - u_int size; - int error; + int error; - /* - * Get vnode for rootdev. - */ - if (error = bdevvp(rootdev, &rootvp)) { - printf("ffs_mountroot: can't setup bdevvp"); - return (error); - } - if (error = vfs_rootmountalloc("ufs", "root_device", &mp)) { - vrele(rootvp); /* release the reference from bdevvp() */ - return (error); - } - - /* Must set the MNT_ROOTFS flag before doing the actual mount */ - mp->mnt_flag |= MNT_ROOTFS; - /* Set asynchronous flag by default */ - mp->mnt_flag |= MNT_ASYNC; + vfs_setflags(mp, MNT_ASYNC); - if (error = ffs_mountfs(rootvp, mp, p)) { - mp->mnt_vfc->vfc_refcount--; + if (error = ffs_mountfs(rvp, mp, context)) + return (error); - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, p); + (void)ffs_statfs(mp, vfs_statfs(mp), NULL); - vrele(rootvp); /* release the reference from bdevvp() */ - FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); - return (error); - } - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - ump = VFSTOUFS(mp); - fs = ump->um_fs; - (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); - (void)ffs_statfs(mp, &mp->mnt_stat, p); - vfs_unbusy(mp, p); - inittodr(fs->fs_time); return (0); } @@ -167,33 +154,33 @@ ffs_mountroot() * mount system call */ int -ffs_mount(mp, path, data, ndp, p) - register struct mount *mp; - char *path; - caddr_t data; - struct nameidata *ndp; - struct proc *p; +ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context) { - struct vnode *devvp; - struct ufs_args args; + struct proc *p = vfs_context_proc(context); struct ufsmount *ump; register struct fs *fs; u_int size; - int error, flags; + int error = 0, flags; mode_t accessmode; int ronly; int reload = 0; - if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) - return (error); /* - * If updating, check whether changing from read-only to - * read/write; if there is no device name, that's all we do. + * If updating, check whether changing from read-write to + * read-only; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + /* + * Flush any dirty data. + */ + VFS_SYNC(mp, MNT_WAIT, context); + /* + * Check for and optionally get rid of files open + * for writing. + */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; @@ -212,7 +199,7 @@ ffs_mount(mp, path, data, ndp, p) if ((mp->mnt_flag & MNT_RELOAD) || ronly) reload = 1; if ((reload) && - (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p))) + (error = ffs_reload(mp, vfs_context_ucred(context), p))) return (error); /* replace the ronly after load */ fs->fs_ronly = ronly; @@ -234,89 +221,91 @@ ffs_mount(mp, path, data, ndp, p) } if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { - /* - * If upgrade to read-write by non-root, then verify - * that user has necessary permissions on the device. - */ - if (p->p_ucred->cr_uid != 0) { - devvp = ump->um_devvp; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - if (error = VOP_ACCESS(devvp, VREAD | VWRITE, - p->p_ucred, p)) { - VOP_UNLOCK(devvp, 0, p); - return (error); - } - VOP_UNLOCK(devvp, 0, p); - } fs->fs_ronly = 0; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } - if (args.fspec == 0) { - /* - * Process export requests. - */ - return (vfs_export(mp, &ump->um_export, &args.export)); + if (devvp == 0) { + return(0); } } - /* - * Not an update, or updating the name: look up the name - * and verify that it refers to a sensible block device. - */ - NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); - if (error = namei(ndp)) - return (error); - devvp = ndp->ni_vp; - - if (devvp->v_type != VBLK) { - vrele(devvp); - return (ENOTBLK); - } - if (major(devvp->v_rdev) >= nblkdev) { - vrele(devvp); - return (ENXIO); - } - /* - * If mount by non-root, then verify that user has necessary - * permissions on the device. - */ - if (p->p_ucred->cr_uid != 0) { - accessmode = VREAD; - if ((mp->mnt_flag & MNT_RDONLY) == 0) - accessmode |= VWRITE; - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) { - vput(devvp); - return (error); - } - VOP_UNLOCK(devvp, 0, p); - } if ((mp->mnt_flag & MNT_UPDATE) == 0) - error = ffs_mountfs(devvp, mp, p); + error = ffs_mountfs(devvp, mp, context); else { if (devvp != ump->um_devvp) error = EINVAL; /* needs translation */ - else - vrele(devvp); } if (error) { - vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_fs; - (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, - (size_t *)&size); - bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); - bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, - MNAMELEN); - (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - (size_t *)&size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)ffs_statfs(mp, &mp->mnt_stat, p); + bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt)); + strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1); + (void)ffs_statfs(mp, &mp->mnt_vfsstat, p); return (0); } + +struct ffs_reload_cargs { + struct vnode *devvp; + kauth_cred_t cred; + struct fs *fs; + struct proc *p; + int error; +#if REV_ENDIAN_FS + int rev_endian; +#endif /* REV_ENDIAN_FS */ +}; + + +static int +ffs_reload_callback(struct vnode *vp, void *cargs) +{ + struct inode *ip; + struct buf *bp; + struct fs *fs; + struct ffs_reload_cargs *args; + + args = (struct ffs_reload_cargs *)cargs; + + /* + * flush all the buffers associated with this node + */ + if (buf_invalidateblks(vp, 0, 0, 0)) + panic("ffs_reload: dirty2"); + + /* + * Step 6: re-read inode data + */ + ip = VTOI(vp); + fs = args->fs; + + if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))), + (int)fs->fs_bsize, NOCRED, &bp)) { + buf_brelse(bp); + + return (VNODE_RETURNED_DONE); + } + +#if REV_ENDIAN_FS + if (args->rev_endian) { + byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + + ino_to_fsbo(fs, ip->i_number)), ip); + } else { +#endif /* REV_ENDIAN_FS */ + ip->i_din = *((struct dinode *)buf_dataptr(bp) + + ino_to_fsbo(fs, ip->i_number)); +#if REV_ENDIAN_FS + } +#endif /* REV_ENDIAN_FS */ + + buf_brelse(bp); + + return (VNODE_RETURNED); +} + + /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must @@ -330,19 +319,16 @@ ffs_mount(mp, path, data, ndp, p) * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ -ffs_reload(mountp, cred, p) - register struct mount *mountp; - struct ucred *cred; - struct proc *p; +ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) { - register struct vnode *vp, *nvp, *devvp; - struct inode *ip; + register struct vnode *devvp; void *space; struct buf *bp; struct fs *fs, *newfs; int i, blks, size, error; u_int64_t maxfilesize; /* XXX */ int32_t *lp; + struct ffs_reload_cargs args; #if REV_ENDIAN_FS int rev_endian = (mountp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ @@ -353,18 +339,18 @@ ffs_reload(mountp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + if (buf_invalidateblks(devvp, 0, 0, 0)) panic("ffs_reload: dirty1"); /* * Step 2: re-read superblock from disk. */ - VOP_DEVBLOCKSIZE(devvp,&size); + size = vfs_devblocksize(mountp); - if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) { - brelse(bp); + if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) { + buf_brelse(bp); return (error); } - newfs = (struct fs *)bp->b_data; + newfs = (struct fs *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) { byte_swap_sbin(newfs); @@ -377,7 +363,7 @@ ffs_reload(mountp, cred, p) byte_swap_sbout(newfs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOUFS(mountp)->um_fs; @@ -391,12 +377,12 @@ ffs_reload(mountp, cred, p) newfs->fs_contigdirs = fs->fs_contigdirs; bcopy(newfs, fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) - bp->b_flags |= B_INVAL; + buf_markinvalid(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_sbout(newfs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat(fs); maxfilesize = 0x100000000ULL; /* 4GB */ @@ -411,26 +397,26 @@ ffs_reload(mountp, cred, p) size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; - if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, - NOCRED, &bp)) { - brelse(bp); + if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size, + NOCRED, &bp)) { + buf_brelse(bp); return (error); } #if REV_ENDIAN_FS if (rev_endian) { /* csum swaps */ - byte_swap_ints((int *)bp->b_data, size / sizeof(int)); + byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); } #endif /* REV_ENDIAN_FS */ - bcopy(bp->b_data, space, (u_int)size); + bcopy((char *)buf_dataptr(bp), space, (u_int)size); #if REV_ENDIAN_FS if (rev_endian) { /* csum swaps */ - byte_swap_ints((int *)bp->b_data, size / sizeof(int)); + byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); } #endif /* REV_ENDIAN_FS */ space = (char *) space + size; - brelse(bp); + buf_brelse(bp); } /* * We no longer know anything about clusters per cylinder group. @@ -440,112 +426,72 @@ ffs_reload(mountp, cred, p) for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } - -loop: - simple_lock(&mntvnode_slock); - for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - if (vp->v_mount != mountp) { - simple_unlock(&mntvnode_slock); - goto loop; - } - nvp = vp->v_mntvnodes.le_next; - /* - * Step 4: invalidate all inactive vnodes. - */ - if (vrecycle(vp, &mntvnode_slock, p)) - goto loop; - /* - * Step 5: invalidate all cached file data. - */ - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { - goto loop; - } - if (vinvalbuf(vp, 0, cred, p, 0, 0)) - panic("ffs_reload: dirty2"); - /* - * Step 6: re-read inode data for all active vnodes. - */ - ip = VTOI(vp); - if (error = - bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int)fs->fs_bsize, NOCRED, &bp)) { - brelse(bp); - vput(vp); - return (error); - } #if REV_ENDIAN_FS - if (rev_endian) { - byte_swap_inode_in(((struct dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)), ip); - } else { + args.rev_endian = rev_endian; #endif /* REV_ENDIAN_FS */ - ip->i_din = *((struct dinode *)bp->b_data + - ino_to_fsbo(fs, ip->i_number)); -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - brelse(bp); - vput(vp); - simple_lock(&mntvnode_slock); - } - simple_unlock(&mntvnode_slock); - return (0); + args.devvp = devvp; + args.cred = cred; + args.fs = fs; + args.p = p; + args.error = 0; + /* + * ffs_reload_callback will be called for each vnode + * hung off of this mount point that can't be recycled... + * vnode_iterate will recycle those that it can (the VNODE_RELOAD option) + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback + */ + vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args); + + return (args.error); } /* * Common code for mount and mountroot */ int -ffs_mountfs(devvp, mp, p) - register struct vnode *devvp; +ffs_mountfs(devvp, mp, context) + struct vnode *devvp; struct mount *mp; - struct proc *p; + vfs_context_t context; { - register struct ufsmount *ump; + struct ufsmount *ump; struct buf *bp; - register struct fs *fs; + struct fs *fs; dev_t dev; struct buf *cgbp; struct cg *cgp; int32_t clustersumoff; void *space; - int error, i, blks, size, ronly; + int error, i, blks, ronly; + u_int32_t size; int32_t *lp; - struct ucred *cred; - extern struct vnode *rootvp; + kauth_cred_t cred; u_int64_t maxfilesize; /* XXX */ u_int dbsize = DEV_BSIZE; #if REV_ENDIAN_FS int rev_endian=0; #endif /* REV_ENDIAN_FS */ dev = devvp->v_rdev; - cred = p ? p->p_ucred : NOCRED; - /* - * Disallow multiple mounts of the same device. - * Disallow mounting of a device that is currently in use - * (except for root, which might share swap device for miniroot). - * Flush out any old buffers remaining from a previous use. - */ - if (error = vfs_mountedon(devvp)) - return (error); - if (vcount(devvp) > 1 && devvp != rootvp) - return (EBUSY); - if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) - return (error); + cred = vfs_context_ucred(context); - ronly = (mp->mnt_flag & MNT_RDONLY) != 0; - if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) - return (error); + ronly = vfs_isrdonly(mp); + bp = NULL; + ump = NULL; - VOP_DEVBLOCKSIZE(devvp,&size); + /* Advisory locking should be handled at the VFS layer */ + vfs_setlocklocal(mp); - bp = NULL; - ump = NULL; - if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, cred, &bp)) + /* Obtain the actual device block size */ + if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) { + error = ENXIO; goto out; - fs = (struct fs *)bp->b_data; + } + + if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), + SBSIZE, cred, &bp)) + goto out; + fs = (struct fs *)buf_dataptr(bp); #if REV_ENDIAN_FS if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || fs->fs_bsize < sizeof(struct fs)) { @@ -610,22 +556,14 @@ ffs_mountfs(devvp, mp, p) if(dbsize <= 0 ) { kprintf("device blocksize computaion failed\n"); } else { - if (VOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize, - FWRITE, NOCRED, p) != 0) { + if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize, + FWRITE, context) != 0) { kprintf("failed to set device blocksize\n"); } /* force the specfs to reread blocksize from size() */ set_fsblocksize(devvp); } - /* cache the IO attributes */ - error = vfs_init_io_attributes(devvp, mp); - if (error) { - printf("ffs_mountfs: vfs_init_io_attributes returned %d\n", - error); - goto out; - } - /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { #if REV_ENDIAN_FS @@ -642,12 +580,12 @@ ffs_mountfs(devvp, mp, p) * to avoid further corruption. PR#2216969 */ if (ronly == 0){ - if (error = bread (devvp, fsbtodb(fs, cgtod(fs, 0)), - (int)fs->fs_cgsize, NOCRED, &cgbp)) { - brelse(cgbp); + if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))), + (int)fs->fs_cgsize, NOCRED, &cgbp)) { + buf_brelse(cgbp); goto out; } - cgp = (struct cg *)cgbp->b_data; + cgp = (struct cg *)buf_dataptr(cgbp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_cgin(cgp,fs); @@ -657,7 +595,7 @@ ffs_mountfs(devvp, mp, p) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(cgbp); + buf_brelse(cgbp); goto out; } if (cgp->cg_clustersumoff != 0) { @@ -675,21 +613,21 @@ ffs_mountfs(devvp, mp, p) if (rev_endian) byte_swap_cgout(cgp,fs); #endif /* REV_ENDIAN_FS */ - brelse(cgbp); + buf_brelse(cgbp); } ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK); bzero((caddr_t)ump, sizeof *ump); ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); - bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); + bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) - bp->b_flags |= B_INVAL; + buf_markinvalid(bp); #if REV_ENDIAN_FS if (rev_endian) byte_swap_sbout(fs); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; @@ -704,18 +642,18 @@ ffs_mountfs(devvp, mp, p) size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; - if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, - cred, &bp)) { + if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), + size, cred, &bp)) { _FREE(fs->fs_csp, M_UFSMNT); goto out; } - bcopy(bp->b_data, space, (u_int)size); + bcopy((char *)buf_dataptr(bp), space, (u_int)size); #if REV_ENDIAN_FS if (rev_endian) byte_swap_ints((int *) space, size / sizeof(int)); #endif /* REV_ENDIAN_FS */ space = (char *)space + size; - brelse(bp); + buf_brelse(bp); bp = NULL; } if (fs->fs_contigsumsize > 0) { @@ -735,8 +673,8 @@ ffs_mountfs(devvp, mp, p) fs->fs_avgfpdir = AFPDIR; /* XXX End of compatibility */ mp->mnt_data = (qaddr_t)ump; - mp->mnt_stat.f_fsid.val[0] = (long)dev; - mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; + mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */ mp->mnt_maxsymlinklen = 60; #if REV_ENDIAN_FS @@ -750,8 +688,7 @@ ffs_mountfs(devvp, mp, p) ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) - ump->um_qfiles[i].qf_vp = NULLVP; - devvp->v_specflags |= SI_MOUNTEDON; + dqfileinit(&ump->um_qfiles[i]); ffs_oldfscompat(fs); ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ maxfilesize = 0x100000000ULL; /* 4GB */ @@ -767,12 +704,10 @@ ffs_mountfs(devvp, mp, p) return (0); out: if (bp) - brelse(bp); - (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); + buf_brelse(bp); if (ump) { _FREE(ump->um_fs, M_UFSMNT); _FREE(ump, M_UFSMNT); - mp->mnt_data = (qaddr_t)0; } return (error); } @@ -809,11 +744,12 @@ ffs_oldfscompat(fs) * unmount system call */ int -ffs_unmount(mp, mntflags, p) +ffs_unmount(mp, mntflags, context) struct mount *mp; int mntflags; - struct proc *p; + vfs_context_t context; { + struct proc *p = vfs_context_proc(context); register struct ufsmount *ump; register struct fs *fs; int error, flags; @@ -829,6 +765,7 @@ ffs_unmount(mp, mntflags, p) return (error); ump = VFSTOUFS(mp); fs = ump->um_fs; + if (fs->fs_ronly == 0) { fs->fs_clean = 1; if (error = ffs_sbupdate(ump, MNT_WAIT)) { @@ -843,20 +780,10 @@ ffs_unmount(mp, mntflags, p) #endif /* notyet */ } } - ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; - error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, - NOCRED, p); - if (error && !force) - return (error); - vrele(ump->um_devvp); - _FREE(fs->fs_csp, M_UFSMNT); _FREE(fs, M_UFSMNT); _FREE(ump, M_UFSMNT); - mp->mnt_data = (qaddr_t)0; -#if REV_ENDIAN_FS - mp->mnt_flag &= ~MNT_REVEND; -#endif /* REV_ENDIAN_FS */ + return (0); } @@ -907,7 +834,7 @@ ffs_flushfiles(mp, flags, p) (rootvp->v_usecount > (1 + quotafilecnt))) { error = EBUSY; /* root dir is still open */ } - vput(rootvp); + vnode_put(rootvp); } if (error && (flags & FORCECLOSE) == 0) return (error); @@ -915,7 +842,7 @@ ffs_flushfiles(mp, flags, p) for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_qfiles[i].qf_vp == NULLVP) continue; - quotaoff(p, mp, i); + quotaoff(mp, i); } /* * Here we fall through to vflush again to ensure @@ -932,10 +859,10 @@ ffs_flushfiles(mp, flags, p) * Get file system statistics. */ int -ffs_statfs(mp, sbp, p) +ffs_statfs(mp, sbp, context) struct mount *mp; - register struct statfs *sbp; - struct proc *p; + register struct vfsstatfs *sbp; + vfs_context_t context; { register struct ufsmount *ump; register struct fs *fs; @@ -946,22 +873,256 @@ ffs_statfs(mp, sbp, p) panic("ffs_statfs"); sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; - sbp->f_blocks = fs->fs_dsize; - sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + - fs->fs_cstotal.cs_nffree; - sbp->f_bavail = freespace(fs, fs->fs_minfree); - sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; - sbp->f_ffree = fs->fs_cstotal.cs_nifree; - if (sbp != &mp->mnt_stat) { - sbp->f_type = mp->mnt_vfc->vfc_typenum; - bcopy((caddr_t)mp->mnt_stat.f_mntonname, - (caddr_t)&sbp->f_mntonname[0], MNAMELEN); - bcopy((caddr_t)mp->mnt_stat.f_mntfromname, - (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize); + sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag + + fs->fs_cstotal.cs_nffree)); + sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree)); + sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO)); + sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree); + return (0); +} + +int +ffs_vfs_getattr(mp, fsap, context) + struct mount *mp; + struct vfs_attr *fsap; + vfs_context_t context; +{ + struct ufsmount *ump; + struct fs *fs; + kauth_cred_t cred; + struct vnode *devvp; + struct buf *bp; + struct ufslabel *ulp; + char *offset; + int bs, error, length; + + ump = VFSTOUFS(mp); + fs = ump->um_fs; + cred = vfs_context_ucred(context); + + VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize); + VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize); + VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize)); + VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long) + (fs->fs_cstotal.cs_nbfree * fs->fs_frag + + fs->fs_cstotal.cs_nffree))); + VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs, + fs->fs_minfree))); + VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long) + (fs->fs_ncg * fs->fs_ipg - ROOTINO))); + VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long) + fs->fs_cstotal.cs_nifree)); + + if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) { + fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0]; + fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1]; + VFSATTR_SET_SUPPORTED(fsap, f_fsid); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { + devvp = ump->um_devvp; + bs = vfs_devblocksize(mp); + + if (error = (int)buf_meta_bread(devvp, + (daddr64_t)(UFS_LABEL_OFFSET / bs), + MAX(bs, UFS_LABEL_SIZE), cred, &bp)) { + if (bp) + buf_brelse(bp); + return (error); + } + + /* + * Since the disklabel is read directly by older user space + * code, make sure this buffer won't remain in the cache when + * we release it. + */ + buf_setflags(bp, B_NOCACHE); + + offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs); + ulp = (struct ufslabel *)offset; + + if (ufs_label_check(ulp)) { + length = ulp->ul_namelen; +#if REV_ENDIAN_FS + if (mp->mnt_flag & MNT_REVEND) + length = NXSwapShort(length); +#endif + if (length > 0 && length <= UFS_MAX_LABEL_NAME) { + bcopy(ulp->ul_name, fsap->f_vol_name, length); + fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0'; + fsap->f_vol_name[length] = '\0'; + } + } + + buf_brelse(bp); + VFSATTR_SET_SUPPORTED(fsap, f_vol_name); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_SPARSE_FILES | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS ; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] + = VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] + = 0; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] + = 0; + + /* Capabilities we know about: */ + fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_JOURNAL | + VOL_CAP_FMT_JOURNAL_ACTIVE | + VOL_CAP_FMT_NO_ROOT_TIMES | + VOL_CAP_FMT_SPARSE_FILES | + VOL_CAP_FMT_ZERO_RUNS | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE; + fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_COPYFILE | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK ; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_capabilities); } + + if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { + fsap->f_attributes.validattr.commonattr = 0; + fsap->f_attributes.validattr.volattr = + ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.validattr.dirattr = 0; + fsap->f_attributes.validattr.fileattr = 0; + fsap->f_attributes.validattr.forkattr = 0; + + fsap->f_attributes.nativeattr.commonattr = 0; + fsap->f_attributes.nativeattr.volattr = + ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.nativeattr.dirattr = 0; + fsap->f_attributes.nativeattr.fileattr = 0; + fsap->f_attributes.nativeattr.forkattr = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_attributes); + } + return (0); } + +int +ffs_vfs_setattr(mp, fsap, context) + struct mount *mp; + struct vfs_attr *fsap; + vfs_context_t context; +{ + struct ufsmount *ump; + struct vnode *devvp; + struct buf *bp; + struct ufslabel *ulp; + kauth_cred_t cred; + char *offset; + int bs, error; + + + ump = VFSTOUFS(mp); + cred = vfs_context_ucred(context); + + if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { + devvp = ump->um_devvp; + bs = vfs_devblocksize(mp); + if (error = buf_meta_bread(devvp, + (daddr64_t)(UFS_LABEL_OFFSET / bs), + MAX(bs, UFS_LABEL_SIZE), cred, &bp)) { + if (bp) + buf_brelse(bp); + return (error); + } + + /* + * Since the disklabel is read directly by older user space + * code, make sure this buffer won't remain in the cache when + * we release it. + */ + buf_setflags(bp, B_NOCACHE); + + /* Validate the label structure; init if not valid */ + offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs); + ulp = (struct ufslabel *)offset; + if (!ufs_label_check(ulp)) + ufs_label_init(ulp); + + /* Copy new name over existing name */ + ulp->ul_namelen = strlen(fsap->f_vol_name); +#if REV_ENDIAN_FS + if (mp->mnt_flag & MNT_REVEND) + ulp->ul_namelen = NXSwapShort(ulp->ul_namelen); +#endif + bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen); + ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0'; + ulp->ul_name[ulp->ul_namelen] = '\0'; + + /* Update the checksum */ + ulp->ul_checksum = 0; + ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp)); + + /* Write the label back to disk */ + buf_bwrite(bp); + bp = NULL; + + VFSATTR_SET_SUPPORTED(fsap, f_vol_name); + } + + return (0); + } +struct ffs_sync_cargs { + vfs_context_t context; + int waitfor; + int error; +}; + + +static int +ffs_sync_callback(struct vnode *vp, void *cargs) +{ + struct inode *ip; + struct ffs_sync_cargs *args; + int error; + + args = (struct ffs_sync_cargs *)cargs; + + ip = VTOI(vp); + + if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) { + error = VNOP_FSYNC(vp, args->waitfor, args->context); + + if (error) + args->error = error; + + } + return (VNODE_RETURNED); +} + /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; @@ -970,17 +1131,17 @@ ffs_statfs(mp, sbp, p) * Note: we are always called with the filesystem marked `MPBUSY'. */ int -ffs_sync(mp, waitfor, cred, p) +ffs_sync(mp, waitfor, context) struct mount *mp; int waitfor; - struct ucred *cred; - struct proc *p; + vfs_context_t context; { struct vnode *nvp, *vp; - struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; + struct timeval tv; int error, allerror = 0; + struct ffs_sync_cargs args; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ @@ -990,58 +1151,23 @@ ffs_sync(mp, waitfor, cred, p) /* * Write back each (modified) inode. */ - simple_lock(&mntvnode_slock); -loop: - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = nvp) { - int didhold = 0; + args.context = context; + args.waitfor = waitfor; + args.error = 0; + /* + * ffs_sync_callback will be called for each vnode + * hung off of this mount point... the vnode will be + * properly referenced and unreferenced around the callback + */ + vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args); - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - if (vp->v_mount != mp) - goto loop; - simple_lock(&vp->v_interlock); - nvp = vp->v_mntvnodes.le_next; - ip = VTOI(vp); - - // restart our whole search if this guy is locked - // or being reclaimed. - if (ip == NULL || vp->v_flag & (VXLOCK|VORECLAIM)) { - simple_unlock(&vp->v_interlock); - continue; - } + if (args.error) + allerror = args.error; - if ((vp->v_type == VNON) || - ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - vp->v_dirtyblkhd.lh_first == NULL && !(vp->v_flag & VHASDIRTY))) { - simple_unlock(&vp->v_interlock); - continue; - } - simple_unlock(&mntvnode_slock); - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) - goto loop; - continue; - } - didhold = ubc_hold(vp); - if (error = VOP_FSYNC(vp, cred, waitfor, p)) - allerror = error; - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); - simple_lock(&mntvnode_slock); - } - simple_unlock(&mntvnode_slock); /* * Force stale file system control information to be flushed. */ - if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) + if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context)) allerror = error; #if QUOTA qsync(mp); @@ -1051,7 +1177,8 @@ loop: */ if (fs->fs_fmod != 0) { fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; + microtime(&tv); + fs->fs_time = tv.tv_sec; if (error = ffs_sbupdate(ump, waitfor)) allerror = error; } @@ -1065,10 +1192,25 @@ loop: * done by the calling routine. */ int -ffs_vget(mp, inop, vpp) - struct mount *mp; - void *inop; - struct vnode **vpp; +ffs_vget(mp, ino, vpp, context) + mount_t mp; + ino64_t ino; + vnode_t *vpp; + vfs_context_t context; +{ + return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0)); +} + + +int +ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted) + mount_t mp; + ino_t ino; + vnode_t *vpp; + vnode_t dvp; + struct componentname *cnp; + int mode; + int fhwanted; { struct proc *p = current_proc(); /* XXX */ struct fs *fs; @@ -1076,58 +1218,56 @@ ffs_vget(mp, inop, vpp) struct ufsmount *ump; struct buf *bp; struct vnode *vp; - ino_t ino; + struct vnode_fsparam vfsp; + struct timeval tv; + enum vtype vtype; dev_t dev; int i, type, error = 0; - ino = (ino_t) inop; - ump = VFSTOUFS(mp); - dev = ump->um_dev; - + *vpp = NULL; + ump = VFSTOUFS(mp); + dev = ump->um_dev; +#if 0 /* Check for unmount in progress */ if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - *vpp = NULL; return (EPERM); } +#endif + /* + * Allocate a new inode... do it before we check the + * cache, because the MALLOC_ZONE may block + */ + type = M_FFSNODE; + MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); - /* check in the inode hash */ + /* + * check in the inode hash + */ if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { + /* + * found it... get rid of the allocation + * that we didn't need and return + * the 'found' vnode + */ + FREE_ZONE(ip, sizeof(struct inode), type); vp = *vpp; - UBCINFOCHECK("ffs_vget", vp); return (0); } - + bzero((caddr_t)ip, sizeof(struct inode)); /* - * Not in inode hash. - * Allocate a new vnode/inode. + * lock the inode */ - type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ - MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); - bzero((caddr_t)ip, sizeof(struct inode)); - lockinit(&ip->i_lock, PINOD, "inode", 0, 0); - /* lock the inode */ - lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p); +// lockinit(&ip->i_lock, PINOD, "inode", 0, 0); +// lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p); ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; - SET(ip->i_flag, IN_ALLOC); #if QUOTA for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; #endif - - /* - * We could have blocked in MALLOC_ZONE. Check for the race. - */ - if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { - /* lost the race, clean up */ - FREE_ZONE(ip, sizeof(struct inode), type); - vp = *vpp; - UBCINFOCHECK("ffs_vget", vp); - return (0); - } - + SET(ip->i_flag, IN_ALLOC); /* * Put it onto its hash chain locked so that other requests for * this inode will block if they arrive while we are sleeping waiting @@ -1137,49 +1277,100 @@ ffs_vget(mp, inop, vpp) ufs_ihashins(ip); /* Read in the disk contents for the inode, copy into the inode. */ - if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), - (int)fs->fs_bsize, NOCRED, &bp)) { - brelse(bp); + if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))), + (int)fs->fs_bsize, NOCRED, &bp)) { + buf_brelse(bp); goto errout; } #if REV_ENDIAN_FS if (mp->mnt_flag & MNT_REVEND) { - byte_swap_inode_in(((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)),ip); + byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip); } else { - ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)); } #else - ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)); #endif /* REV_ENDIAN_FS */ - brelse(bp); - - if (error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) - goto errout; + buf_brelse(bp); + + if (mode == 0) + vtype = IFTOVT(ip->i_mode); + else + vtype = IFTOVT(mode); + + if (vtype == VNON) { + if (fhwanted) { + /* NFS is in play */ + error = ESTALE; + goto errout; + } else { + error = ENOENT; + goto errout; + } + } - vp->v_data = ip; - ip->i_vnode = vp; + vfsp.vnfs_mp = mp; + vfsp.vnfs_vtype = vtype; + vfsp.vnfs_str = "ufs"; + vfsp.vnfs_dvp = dvp; + vfsp.vnfs_fsnode = ip; + vfsp.vnfs_cnp = cnp; + + if (mode == 0) + vfsp.vnfs_filesize = ip->i_din.di_size; + else + vfsp.vnfs_filesize = 0; + + if (vtype == VFIFO ) + vfsp.vnfs_vops = FFS_FIFOOPS; + else if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_vops = ffs_specop_p; + else + vfsp.vnfs_vops = ffs_vnodeop_p; + + if (vtype == VBLK || vtype == VCHR) + vfsp.vnfs_rdev = ip->i_rdev; + else + vfsp.vnfs_rdev = 0; + + if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) + vfsp.vnfs_flags = 0; + else + vfsp.vnfs_flags = VNFS_NOCACHE; /* - * Initialize the vnode from the inode, check for aliases. - * Note that the underlying vnode may have changed. + * Tag root directory */ - if (error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp)) { - vput(vp); - *vpp = NULL; - goto out; - } + vfsp.vnfs_markroot = (ip->i_number == ROOTINO); + vfsp.vnfs_marksystem = 0; + + if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp))) + goto errout; + /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; - VREF(ip->i_devvp); + ip->i_vnode = vp; + + vnode_ref(ip->i_devvp); + vnode_addfsref(vp); + vnode_settag(vp, VT_UFS); + + /* + * Initialize modrev times + */ + microtime(&tv); + SETHIGH(ip->i_modrev, tv.tv_sec); + SETLOW(ip->i_modrev, tv.tv_usec * 4294); + /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { - if (++nextgennumber < (u_long)time.tv_sec) - nextgennumber = time.tv_sec; + if (++nextgennumber < (u_long)tv.tv_sec) + nextgennumber = tv.tv_sec; ip->i_gen = nextgennumber; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; @@ -1192,24 +1383,22 @@ ffs_vget(mp, inop, vpp) ip->i_uid = ip->i_din.di_ouid; /* XXX */ ip->i_gid = ip->i_din.di_ogid; /* XXX */ } /* XXX */ - - if (UBCINFOMISSING(vp) || UBCINFORECLAIMED(vp)) - ubc_info_init(vp); *vpp = vp; -out: CLR(ip->i_flag, IN_ALLOC); + if (ISSET(ip->i_flag, IN_WALLOC)) wakeup(ip); - return (error); + + return (0); errout: ufs_ihashrem(ip); - CLR(ip->i_flag, IN_ALLOC); + if (ISSET(ip->i_flag, IN_WALLOC)) wakeup(ip); FREE_ZONE(ip, sizeof(struct inode), type); - *vpp = NULL; + return (error); } @@ -1218,47 +1407,66 @@ errout: * * Have to be really careful about stale file handles: * - check that the inode number is valid - * - call ffs_vget() to get the locked inode + * - call vget to get the locked inode * - check for an unallocated inode (i_mode == 0) - * - check that the given client host has export rights and return - * those rights via. exflagsp and credanonp */ int -ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) +ffs_fhtovp(mp, fhlen, fhp, vpp, context) register struct mount *mp; - struct fid *fhp; - struct mbuf *nam; + int fhlen; + unsigned char *fhp; struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; + vfs_context_t context; { register struct ufid *ufhp; + register struct inode *ip; + struct vnode *nvp; struct fs *fs; + int error; + if (fhlen < (int)sizeof(struct ufid)) + return (EINVAL); ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); - return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); + error = ffs_vget_internal(mp, ufhp->ufid_ino, &nvp, NULL, NULL, 0, 1); + if (error) { + *vpp = NULLVP; + return (error); + } + ip = VTOI(nvp); + if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) { + vnode_put(nvp); + *vpp = NULLVP; + return (ESTALE); + } + *vpp = nvp; + return (0); } /* * Vnode pointer to File handle */ /* ARGSUSED */ -ffs_vptofh(vp, fhp) +int +ffs_vptofh(vp, fhlenp, fhp, context) struct vnode *vp; - struct fid *fhp; + int *fhlenp; + unsigned char *fhp; + vfs_context_t context; { register struct inode *ip; register struct ufid *ufhp; + if (*fhlenp < (int)sizeof(struct ufid)) + return (EOVERFLOW); ip = VTOI(vp); ufhp = (struct ufid *)fhp; - ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; + *fhlenp = sizeof(struct ufid); return (0); } @@ -1276,14 +1484,8 @@ ffs_init(vfsp) /* * fast filesystem related variables. */ -ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, vfs_context_t context) { extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree; @@ -1304,7 +1506,7 @@ ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) case FFS_ASYNCFREE: return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree)); default: - return (EOPNOTSUPP); + return (ENOTSUP); } /* NOTREACHED */ } @@ -1336,18 +1538,18 @@ ffs_sbupdate(mp, waitfor) size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; - bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), - size, 0, 0, BLK_META); - bcopy(space, bp->b_data, (u_int)size); + bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), + size, 0, 0, BLK_META); + bcopy(space, (char *)buf_dataptr(bp), (u_int)size); #if REV_ENDIAN_FS if (rev_endian) { - byte_swap_ints((int *)bp->b_data, size / sizeof(int)); + byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); } #endif /* REV_ENDIAN_FS */ space = (char *)space + size; if (waitfor != MNT_WAIT) - bawrite(bp); - else if (error = bwrite(bp)) + buf_bawrite(bp); + else if (error = (int)buf_bwrite(bp)) allerror = error; } /* @@ -1357,11 +1559,12 @@ ffs_sbupdate(mp, waitfor) */ if (allerror) return (allerror); - VOP_DEVBLOCKSIZE(mp->um_devvp,&devBlockSize); - bp = getblk(mp->um_devvp, (SBOFF/devBlockSize), (int)fs->fs_sbsize, 0, 0, BLK_META); - bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + devBlockSize = vfs_devblocksize(mp->um_mountp); + + bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META); + bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize); /* Restore compatibility to old file systems. XXX */ - dfs = (struct fs *)bp->b_data; /* XXX */ + dfs = (struct fs *)buf_dataptr(bp); /* XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ dfs->fs_nrpos = -1; /* XXX */ #if REV_ENDIAN_FS @@ -1371,7 +1574,7 @@ ffs_sbupdate(mp, waitfor) * fields get moved */ if (rev_endian) { - byte_swap_sbout((struct fs *)bp->b_data); + byte_swap_sbout((struct fs *)buf_dataptr(bp)); } #endif /* REV_ENDIAN_FS */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ @@ -1396,8 +1599,8 @@ ffs_sbupdate(mp, waitfor) } #endif /* REV_ENDIAN_FS */ if (waitfor != MNT_WAIT) - bawrite(bp); - else if (error = bwrite(bp)) + buf_bawrite(bp); + else if (error = (int)buf_bwrite(bp)) allerror = error; return (allerror); diff --git a/bsd/ufs/ffs/ffs_vnops.c b/bsd/ufs/ffs/ffs_vnops.c index 2e216c4f6..b8dec359e 100644 --- a/bsd/ufs/ffs/ffs_vnops.c +++ b/bsd/ufs/ffs/ffs_vnops.c @@ -62,11 +62,10 @@ #include #include #include -#include #include #include -#include -#include +#include +#include #include #include #include @@ -77,7 +76,6 @@ #include #include -#include #include #include #include @@ -96,60 +94,43 @@ /* Global vfs data structures for ufs. */ int (**ffs_vnodeop_p)(void *); struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)ufs_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)ufs_create }, /* create */ - { &vop_whiteout_desc, (VOPFUNC)ufs_whiteout }, /* whiteout */ - { &vop_mknod_desc, (VOPFUNC)ufs_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)ufs_open }, /* open */ - { &vop_close_desc, (VOPFUNC)ufs_close }, /* close */ - { &vop_access_desc, (VOPFUNC)ufs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)ffs_read }, /* read */ - { &vop_write_desc, (VOPFUNC)ffs_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)ufs_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)ufs_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)ufs_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)ufs_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)ufs_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)ufs_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)ufs_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)ufs_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)ufs_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)ufs_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)ufs_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)ufs_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)ufs_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)ufs_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)nop_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)ufs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)ufs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)ufs_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)ufs_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)ufs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)ufs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)ufs_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)ufs_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)ffs_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)ffs_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)ffs_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)ffs_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)ffs_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)ffs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)ufs_cmap }, /* cmap */ - { &vop_getattrlist_desc, (VOPFUNC)ufs_getattrlist }, /* getattrlist */ - { &vop_setattrlist_desc, (VOPFUNC)ufs_setattrlist }, /* setattrlist */ - { &vop_kqfilt_add_desc, (VOPFUNC)ufs_kqfilt_add }, /* kqfilt_add */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)ufs_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)ufs_create }, /* create */ + { &vnop_whiteout_desc, (VOPFUNC)ufs_whiteout }, /* whiteout */ + { &vnop_mknod_desc, (VOPFUNC)ufs_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)ufs_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)ufs_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)ffs_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)ffs_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)ufs_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)ufs_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)ufs_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)ufs_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)ufs_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)ufs_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)ufs_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)ufs_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)ufs_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)ufs_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)ufs_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)ufs_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)ufs_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)ufs_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ + { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)ufs_blockmap }, /* blockmap */ + { &vnop_kqfilt_add_desc, (VOPFUNC)ufs_kqfilt_add }, /* kqfilt_add */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc ffs_vnodeop_opv_desc = @@ -157,57 +138,42 @@ struct vnodeopv_desc ffs_vnodeop_opv_desc = int (**ffs_specop_p)(void *); struct vnodeopv_entry_desc ffs_specop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vop_close_desc, (VOPFUNC)ufsspec_close }, /* close */ - { &vop_access_desc, (VOPFUNC)ufs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)ufsspec_read }, /* read */ - { &vop_write_desc, (VOPFUNC)ufsspec_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)spec_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)spec_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)spec_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)spec_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)ufs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)ufs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)spec_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)ufs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)ufs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)spec_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)spec_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)spec_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)ffs_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)spec_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)ffs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ - { &vop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)spec_cmap }, /* cmap */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)ufsspec_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)ufsspec_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)ufsspec_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)spec_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_devblocksize_desc, (VOPFUNC)spec_devblocksize }, /* devblocksize */ + { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ + { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc ffs_specop_opv_desc = @@ -216,57 +182,42 @@ struct vnodeopv_desc ffs_specop_opv_desc = #if FIFO int (**ffs_fifoop_p)(void *); struct vnodeopv_entry_desc ffs_fifoop_entries[] = { - { &vop_default_desc, (VOPFUNC)vn_default_error }, - { &vop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vop_create_desc, (VOPFUNC)fifo_create }, /* create */ - { &vop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ - { &vop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vop_close_desc, (VOPFUNC)ufsfifo_close }, /* close */ - { &vop_access_desc, (VOPFUNC)ufs_access }, /* access */ - { &vop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vop_read_desc, (VOPFUNC)ufsfifo_read }, /* read */ - { &vop_write_desc, (VOPFUNC)ufsfifo_write }, /* write */ - { &vop_lease_desc, (VOPFUNC)fifo_lease_check }, /* lease */ - { &vop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ - { &vop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ - { &vop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vop_seek_desc, (VOPFUNC)fifo_seek }, /* seek */ - { &vop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ - { &vop_link_desc, (VOPFUNC)fifo_link }, /* link */ - { &vop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ - { &vop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ - { &vop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ - { &vop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ - { &vop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ - { &vop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ - { &vop_abortop_desc, (VOPFUNC)fifo_abortop }, /* abortop */ - { &vop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vop_lock_desc, (VOPFUNC)ufs_lock }, /* lock */ - { &vop_unlock_desc, (VOPFUNC)ufs_unlock }, /* unlock */ - { &vop_bmap_desc, (VOPFUNC)fifo_bmap }, /* bmap */ - { &vop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ - { &vop_print_desc, (VOPFUNC)ufs_print }, /* print */ - { &vop_islocked_desc, (VOPFUNC)ufs_islocked }, /* islocked */ - { &vop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ - { &vop_blkatoff_desc, (VOPFUNC)fifo_blkatoff }, /* blkatoff */ - { &vop_valloc_desc, (VOPFUNC)fifo_valloc }, /* valloc */ - { &vop_reallocblks_desc, (VOPFUNC)fifo_reallocblks }, /* reallocblks */ - { &vop_vfree_desc, (VOPFUNC)ffs_vfree }, /* vfree */ - { &vop_truncate_desc, (VOPFUNC)fifo_truncate }, /* truncate */ - { &vop_update_desc, (VOPFUNC)ffs_update }, /* update */ - { &vop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vop_cmap_desc, (VOPFUNC)ufs_cmap }, /* cmap */ - { &vop_kqfilt_add_desc, (VOPFUNC)ufsfifo_kqfilt_add }, /* kqfilt_add */ + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)fifo_create }, /* create */ + { &vnop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ + { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)ufsfifo_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)ufsfifo_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)ufsfifo_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ + { &vnop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC)fifo_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, + { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ + { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ + { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)ufs_blockmap }, /* blockmap */ + { &vnop_kqfilt_add_desc, (VOPFUNC)ufsfifo_kqfilt_add }, /* kqfilt_add */ { (struct vnodeop_desc*)NULL, (int(*)())NULL } }; struct vnodeopv_desc ffs_fifoop_opv_desc = @@ -287,82 +238,34 @@ int doclusterwrite = 0; /* ARGSUSED */ int ffs_fsync(ap) - struct vop_fsync_args /* { + struct vnop_fsync_args /* { struct vnode *a_vp; - struct ucred *a_cred; int a_waitfor; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp = ap->a_vp; - register struct buf *bp; + return(ffs_fsync_internal(ap->a_vp, ap->a_waitfor)); +} + + +int +ffs_fsync_internal(vnode_t vp, int waitfor) +{ struct timeval tv; - struct buf *nbp; - int s; - struct inode *ip = VTOI(vp); - int retry = 0; + int wait = (waitfor == MNT_WAIT); /* * Write out any clusters. */ - cluster_push(vp); + cluster_push(vp, 0); /* * Flush all dirty buffers associated with a vnode. */ -loop: - s = splbio(); - for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY)) - continue; - if ((bp->b_flags & B_DELWRI) == 0) - panic("ffs_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - /* - * Wait for I/O associated with indirect blocks to complete, - * since there is no way to quickly wait for them below. - */ - if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) - (void) bawrite(bp); - else - (void) bwrite(bp); - goto loop; - } - - if (ap->a_waitfor == MNT_WAIT) { - while (vp->v_numoutput) { - vp->v_flag |= VBWAIT; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "ffs_fsync", 0); - } - - if (vp->v_dirtyblkhd.lh_first) { - /* still have some dirty buffers */ - if (retry++ > 10) { - vprint("ffs_fsync: dirty", vp); - splx(s); - /* - * Looks like the requests are not - * getting queued to the driver. - * Retrying here causes a cpu bound loop. - * Yield to the other threads and hope - * for the best. - */ - (void)tsleep((caddr_t)&vp->v_numoutput, - PRIBIO + 1, "ffs_fsync", hz/10); - retry = 0; - } else { - splx(s); - } - /* try again */ - goto loop; - } - } - splx(s); - tv = time; - return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); + buf_flushdirtyblks(vp, wait, 0, (char *)"ffs_fsync"); + microtime(&tv); + + return (ffs_update(vp, &tv, &tv, wait)); } /* @@ -370,71 +273,63 @@ loop: */ int ffs_reclaim(ap) - struct vop_reclaim_args /* { + struct vnop_reclaim_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; int error; - if (error = ufs_reclaim(vp, ap->a_p)) + if ( (error = ufs_reclaim(vp, vfs_context_proc(ap->a_context))) ) return (error); - FREE_ZONE(vp->v_data, sizeof (struct inode), - VFSTOUFS(vp->v_mount)->um_devvp->v_tag == VT_MFS ? - M_MFSNODE : M_FFSNODE); - vp->v_data = NULL; + + FREE_ZONE(vnode_fsnode(vp), sizeof (struct inode), M_FFSNODE); + + vnode_clearfsnode(vp); + return (0); } /* Blktooff converts a logical block number to a file offset */ int ffs_blktooff(ap) - struct vop_blktooff_args /* { + struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */ *ap; { register struct inode *ip; register FS *fs; - ufs_daddr_t bn; - if (ap->a_vp == NULL) return (EINVAL); - ip = VTOI(ap->a_vp); - fs = ip->I_FS; - bn = ap->a_lblkno; + fs = VTOI(ap->a_vp)->I_FS; - if ((long)bn < 0) { - panic("-ve blkno in ffs_blktooff"); - bn = -(long)bn; - } + *ap->a_offset = (off_t)lblktosize(fs, ap->a_lblkno); - *ap->a_offset = (off_t)lblktosize(fs, bn); return (0); } /* Blktooff converts a logical block number to a file offset */ int ffs_offtoblk(ap) - struct vop_offtoblk_args /* { + struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */ *ap; { - register struct inode *ip; - register FS *fs; + register FS *fs; if (ap->a_vp == NULL) return (EINVAL); - ip = VTOI(ap->a_vp); - fs = ip->I_FS; + fs = VTOI(ap->a_vp)->I_FS; + + *ap->a_lblkno = (daddr64_t)lblkno(fs, ap->a_offset); - *ap->a_lblkno = (daddr_t)lblkno(fs, ap->a_offset); return (0); } diff --git a/bsd/ufs/ufs/Makefile b/bsd/ufs/ufs/Makefile index 5717ecbea..08c53815e 100644 --- a/bsd/ufs/ufs/Makefile +++ b/bsd/ufs/ufs/Makefile @@ -20,7 +20,7 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - dinode.h dir.h inode.h lockf.h quota.h ufs_extern.h ufsmount.h + dinode.h dir.h inode.h quota.h ufs_extern.h ufsmount.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/ufs/ufs/inode.h b/bsd/ufs/ufs/inode.h index ef40be739..37a5fc619 100644 --- a/bsd/ufs/ufs/inode.h +++ b/bsd/ufs/ufs/inode.h @@ -67,6 +67,7 @@ #ifdef __APPLE_API_PRIVATE #include #include +#include #include #include #include @@ -96,8 +97,8 @@ struct inode { struct klist i_knotes; /* knotes attached to this vnode */ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ - struct lockf *i_lockf;/* Head of byte-level lock list. */ - struct lock__bsd__ i_lock; /* Inode lock. */ + void *i_lockf; /* DEPRECATED */ + /* * Side effects; used during directory lookup. */ @@ -107,6 +108,7 @@ struct inode { doff_t i_offset; /* Offset of free space in directory. */ ino_t i_ino; /* Inode number of found directory. */ u_int32_t i_reclen; /* Size of found directory entry. */ + daddr_t i_lastr; /* last read... read-ahead */ /* * The on-disk dinode itself. */ @@ -173,17 +175,15 @@ struct indir { (ip)->i_modrev++; \ } \ if ((ip)->i_flag & IN_CHANGE) \ - (ip)->i_ctime = time.tv_sec; \ + (ip)->i_ctime = (t2)->tv_sec; \ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ } #define VN_KNOTE(vp, hint) KNOTE(&VTOI(vp)->i_knotes, (hint)) -/* This overlays the fid structure (see mount.h). */ +/* This overlays the FileID portion of NFS file handles. */ struct ufid { - u_int16_t ufid_len; /* Length of structure. */ - u_int16_t ufid_pad; /* Force 32-bit alignment. */ ino_t ufid_ino; /* File number (ino). */ int32_t ufid_gen; /* Generation number. */ }; diff --git a/bsd/ufs/ufs/lockf.h b/bsd/ufs/ufs/lockf.h deleted file mode 100644 index 92121a1f5..000000000 --- a/bsd/ufs/ufs/lockf.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Scooter Morris at Genentech Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)lockf.h 8.2 (Berkeley) 10/26/94 - */ -#ifndef _UFS_LOCKF_H_ -#define _UFS_LOCKF_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -/* - * The lockf structure is a kernel structure which contains the information - * associated with a byte range lock. The lockf structures are linked into - * the inode structure. Locks are sorted by the starting byte of the lock for - * efficiency. - */ -TAILQ_HEAD(locklist, lockf); - -struct lockf { - short lf_flags; /* Semantics: F_POSIX, F_FLOCK, F_WAIT */ - short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ - off_t lf_start; /* Byte # of the start of the lock */ - off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ - caddr_t lf_id; /* Id of the resource holding the lock */ - struct inode *lf_inode; /* Back pointer to the inode */ - struct lockf *lf_next; /* Pointer to the next lock on this inode */ - struct locklist lf_blkhd; /* List of requests blocked on this lock */ - TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ -}; - -/* Maximum length of sleep chains to traverse to try and detect deadlock. */ -#define MAXDEPTH 50 - -__BEGIN_DECLS -void lf_addblock __P((struct lockf *, struct lockf *)); -int lf_clearlock __P((struct lockf *)); -int lf_findoverlap __P((struct lockf *, - struct lockf *, int, struct lockf ***, struct lockf **)); -struct lockf * - lf_getblock __P((struct lockf *)); -int lf_getlock __P((struct lockf *, struct flock *)); -int lf_setlock __P((struct lockf *)); -void lf_split __P((struct lockf *, struct lockf *)); -void lf_wakelock __P((struct lockf *)); -__END_DECLS - -#ifdef LOCKF_DEBUG -extern int lockf_debug; - -__BEGIN_DECLS -void lf_print __P((char *, struct lockf *)); -void lf_printlist __P((char *, struct lockf *)); -__END_DECLS -#endif - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _UFS_LOCKF_H_ */ - diff --git a/bsd/ufs/ufs/quota.h b/bsd/ufs/ufs/quota.h index 13de74d26..f48b7f8f5 100644 --- a/bsd/ufs/ufs/quota.h +++ b/bsd/ufs/ufs/quota.h @@ -76,23 +76,23 @@ struct mount; struct proc; struct ucred; __BEGIN_DECLS -int chkdq __P((struct inode *, int64_t, struct ucred *, int)); -int chkdqchg __P((struct inode *, int64_t, struct ucred *, int)); -int chkiq __P((struct inode *, long, struct ucred *, int)); -int chkiqchg __P((struct inode *, long, struct ucred *, int)); -int getinoquota __P((struct inode *)); -int getquota __P((struct mount *, u_long, int, caddr_t)); -int qsync __P((struct mount *mp)); -int quotaoff __P((struct proc *, struct mount *, int)); -int quotaon __P((struct proc *, struct mount *, int, caddr_t, enum uio_seg)); -int setquota __P((struct mount *, u_long, int, caddr_t)); -int setuse __P((struct mount *, u_long, int, caddr_t)); -int ufs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); +int chkdq(struct inode *, int64_t, struct ucred *, int); +int chkdqchg(struct inode *, int64_t, struct ucred *, int); +int chkiq(struct inode *, long, struct ucred *, int); +int chkiqchg(struct inode *, long, struct ucred *, int); +int getinoquota(struct inode *); +int getquota(struct mount *, u_long, int, caddr_t); +int qsync(struct mount *mp); +int quotaoff(struct mount *, int); +int quotaon(vfs_context_t, struct mount *, int, caddr_t); +int setquota(struct mount *, u_long, int, caddr_t); +int setuse(struct mount *, u_long, int, caddr_t); +int ufs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t); __END_DECLS #if DIAGNOSTIC __BEGIN_DECLS -void chkdquot __P((struct inode *)); +void chkdquot(struct inode *); __END_DECLS #endif #endif /* KERNEL */ diff --git a/bsd/ufs/ufs/ufs_attrlist.c b/bsd/ufs/ufs/ufs_attrlist.c index 161391c1d..bea11d309 100644 --- a/bsd/ufs/ufs/ufs_attrlist.c +++ b/bsd/ufs/ufs/ufs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -28,40 +28,18 @@ #include #include -#include +#include #include #include #include +#include #include #include #include +#include #include "ufsmount.h" -/* -12345678901234567890123456789012345678901234567890123456789012345678901234567890 -*/ -enum { - UFS_ATTR_CMN_NATIVE = 0, - UFS_ATTR_CMN_SUPPORTED = 0, - UFS_ATTR_VOL_NATIVE = ATTR_VOL_NAME | - ATTR_VOL_CAPABILITIES | - ATTR_VOL_ATTRIBUTES, - UFS_ATTR_VOL_SUPPORTED = UFS_ATTR_VOL_NATIVE, - UFS_ATTR_DIR_NATIVE = 0, - UFS_ATTR_DIR_SUPPORTED = 0, - UFS_ATTR_FILE_NATIVE = 0, - UFS_ATTR_FILE_SUPPORTED = 0, - UFS_ATTR_FORK_NATIVE = 0, - UFS_ATTR_FORK_SUPPORTED = 0, - - UFS_ATTR_CMN_SETTABLE = 0, - UFS_ATTR_VOL_SETTABLE = ATTR_VOL_NAME, - UFS_ATTR_DIR_SETTABLE = 0, - UFS_ATTR_FILE_SETTABLE = 0, - UFS_ATTR_FORK_SETTABLE = 0 -}; - static char ufs_label_magic[4] = UFS_LABEL_MAGIC; /* Copied from diskdev_cmds/disklib/ufslabel.c */ @@ -90,8 +68,8 @@ reduce(int *sum) } /* Copied from diskdev_cmds/disklib/ufslabel.c */ -static unsigned short -in_cksum(void *data, int len) +__private_extern__ unsigned short +ul_cksum(void *data, int len) { u_short *w; int sum; @@ -135,7 +113,7 @@ in_cksum(void *data, int len) } /* Adapted from diskdev_cmds/disklib/ufslabel.c */ -static boolean_t +__private_extern__ boolean_t ufs_label_check(struct ufslabel *ul_p) { u_int16_t calc; @@ -166,7 +144,7 @@ ufs_label_check(struct ufslabel *ul_p) checksum = ul_p->ul_checksum; /* Remember previous checksum. */ ul_p->ul_checksum = 0; - calc = in_cksum(ul_p, sizeof(*ul_p)); + calc = ul_cksum(ul_p, sizeof(*ul_p)); if (calc != checksum) { #ifdef DEBUG printf("ufslabel_check: label checksum %x (should be %x)\n", @@ -177,632 +155,16 @@ ufs_label_check(struct ufslabel *ul_p) return (TRUE); } -static void +__private_extern__ void ufs_label_init(struct ufslabel *ul_p) { + struct timeval tv; + + microtime(&tv); + bzero(ul_p, sizeof(*ul_p)); ul_p->ul_version = htonl(UFS_LABEL_VERSION); bcopy(ufs_label_magic, &ul_p->ul_magic, sizeof(ul_p->ul_magic)); - ul_p->ul_time = htonl(time.tv_sec); -} - -static int -ufs_get_label(struct vnode *vp, struct ucred *cred, char *label, - int *name_length) -{ - int error; - int devBlockSize; - struct mount *mp; - struct vnode *devvp; - struct buf *bp; - struct ufslabel *ulp; - - mp = vp->v_mount; - devvp = VFSTOUFS(mp)->um_devvp; - VOP_DEVBLOCKSIZE(devvp, &devBlockSize); - - if (error = meta_bread(devvp, (ufs_daddr_t)(UFS_LABEL_OFFSET / devBlockSize), - UFS_LABEL_SIZE, cred, &bp)) - goto out; - - /* - * Since the disklabel is read directly by older user space code, - * make sure this buffer won't remain in the cache when we release it. - * - * It would be better if that user space code was modified to get - * at the fields of the disklabel via the filesystem (such as - * getattrlist). - */ - SET(bp->b_flags, B_NOCACHE); - - ulp = (struct ufslabel *) bp->b_data; - if (ufs_label_check(ulp)) { - int length; - /* Copy the name out */ - length = ulp->ul_namelen; -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) - length = NXSwapShort(length); -#endif - if (length > 0 && length <= UFS_MAX_LABEL_NAME) { - bcopy(ulp->ul_name, label, length); - *name_length = length; - } else { - /* Return an empty name */ - *label = '\0'; - *name_length = 0; - } - } - -out: - if (bp) - brelse(bp); - return error; -} - -static int ufs_set_label(struct vnode *vp, struct ucred *cred, - const char *label, int name_length) -{ - int error; - int devBlockSize; - struct mount *mp; - struct vnode *devvp; - struct buf *bp; - struct ufslabel *ulp; - - mp = vp->v_mount; - - /* Validate the new name's length */ - if (name_length < 0 || name_length > UFS_MAX_LABEL_NAME) - return EINVAL; - - /* Read UFS_LABEL_SIZE bytes at UFS_LABEL_OFFSET */ - devvp = VFSTOUFS(mp)->um_devvp; - VOP_DEVBLOCKSIZE(devvp, &devBlockSize); - if (error = meta_bread(devvp, (ufs_daddr_t)(UFS_LABEL_OFFSET / devBlockSize), - UFS_LABEL_SIZE, cred, &bp)) - goto out; - - /* - * Since the disklabel is read directly by older user space code, - * make sure this buffer won't remain in the cache when we release it. - * - * It would be better if that user space code was modified to get - * at the fields of the disklabel via the filesystem (such as - * getattrlist). - */ - SET(bp->b_flags, B_NOCACHE); - - /* Validate the label structure; init if not valid */ - ulp = (struct ufslabel *) bp->b_data; - if (!ufs_label_check(ulp)) - ufs_label_init(ulp); - - /* Copy new name over existing name */ - ulp->ul_namelen = name_length; -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) - ulp->ul_namelen = NXSwapShort(ulp->ul_namelen); -#endif - bcopy(label, ulp->ul_name, name_length); - - /* Update the checksum */ - ulp->ul_checksum = 0; - ulp->ul_checksum = in_cksum(ulp, sizeof(*ulp)); - - /* Write the label back to disk */ - bwrite(bp); - bp = NULL; - -out: - if (bp) - brelse(bp); - return error; -} - -/* - * Pack a C-style string into an attribute buffer. Returns the new varptr. - */ -static void * -packstr(char *s, void *attrptr, void *varptr) -{ - struct attrreference *ref = attrptr; - u_long length; - - length = strlen(s) + 1; /* String, plus terminator */ - - /* - * In the fixed-length part of buffer, store the offset and length of - * the variable-length data. - */ - ref->attr_dataoffset = (u_int8_t *)varptr - (u_int8_t *)attrptr; - ref->attr_length = length; - - /* Copy the string to variable-length part of buffer */ - (void) strncpy((unsigned char *)varptr, s, length); - - /* Advance pointer past string, and round up to multiple of 4 bytes */ - return (char *)varptr + ((length + 3) & ~3); + ul_p->ul_time = htonl(tv.tv_sec); } -/* - * Pack an unterminated string into an attribute buffer as a C-style - * string. Copies the indicated number of characters followed by a - * terminating '\0'. Returns the new varptr. - */ -static void * -packtext(u_char *text, u_int text_len, void *attrptr, void *varptr) -{ - struct attrreference *ref = attrptr; - u_long length; /* of the attribute, including terminator */ - - length = text_len + 1; /* String, plus terminator */ - - /* - * In the fixed-length part of buffer, store the offset and length of - * the variable-length data. - */ - ref->attr_dataoffset = (u_int8_t *) varptr - (u_int8_t *) attrptr; - ref->attr_length = length; - - /* Copy the string to variable-length part of buffer */ - bcopy(text, varptr, text_len); - ((char *) varptr)[text_len] = '\0'; - - /* Advance pointer past string, and round up to multiple of 4 bytes */ - return (char *) varptr + ((length + 3) & ~3); -} - -/* - * ufs_packvolattr - * - * Pack the volume-related attributes from a getattrlist call into result - * buffers. Fields are packed in order based on the bitmap masks. - * Attributes with smaller masks are packed first. - * - * The buffer pointers are updated to point past the data that was returned. - */ -static int ufs_packvolattr( - struct vnode *vp, /* The volume's vnode */ - struct ucred *cred, - struct attrlist *alist, /* Desired attributes */ - void **attrptrptr, /* Fixed-size attributes buffer */ - void **varptrptr) /* Variable-size attributes buffer */ -{ - int error; - attrgroup_t a; - void *attrptr = *attrptrptr; - void *varptr = *varptrptr; - - a = alist->volattr; - if (a) { - if (a & ATTR_VOL_NAME) { - int length; - char name[UFS_MAX_LABEL_NAME]; - - error = ufs_get_label(vp, cred, name, &length); - if (error) - return error; - - varptr = packtext(name, length, attrptr, varptr); - ++((struct attrreference *)attrptr); - } - - if (a & ATTR_VOL_CAPABILITIES) { - vol_capabilities_attr_t *vcapattrptr; - - vcapattrptr = (vol_capabilities_attr_t *) attrptr; - - /* - * Capabilities this volume format has. Note that - * we do not set VOL_CAP_FMT_PERSISTENTOBJECTIDS. - * That's because we can't resolve an inode number - * into a directory entry (parent and name), which - * Carbon would need to support PBResolveFileIDRef. - */ - vcapattrptr->capabilities[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_SYMBOLICLINKS | - VOL_CAP_FMT_HARDLINKS | - VOL_CAP_FMT_SPARSE_FILES | - VOL_CAP_FMT_CASE_SENSITIVE | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS ; - vcapattrptr->capabilities[VOL_CAPABILITIES_INTERFACES] - = VOL_CAP_INT_NFSEXPORT | - VOL_CAP_INT_VOL_RENAME | - VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK ; - vcapattrptr->capabilities[VOL_CAPABILITIES_RESERVED1] - = 0; - vcapattrptr->capabilities[VOL_CAPABILITIES_RESERVED2] - = 0; - - /* Capabilities we know about: */ - vcapattrptr->valid[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_PERSISTENTOBJECTIDS | - VOL_CAP_FMT_SYMBOLICLINKS | - VOL_CAP_FMT_HARDLINKS | - VOL_CAP_FMT_JOURNAL | - VOL_CAP_FMT_JOURNAL_ACTIVE | - VOL_CAP_FMT_NO_ROOT_TIMES | - VOL_CAP_FMT_SPARSE_FILES | - VOL_CAP_FMT_ZERO_RUNS | - VOL_CAP_FMT_CASE_SENSITIVE | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS ; - vcapattrptr->valid[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_SEARCHFS | - VOL_CAP_INT_ATTRLIST | - VOL_CAP_INT_NFSEXPORT | - VOL_CAP_INT_READDIRATTR | - VOL_CAP_INT_EXCHANGEDATA | - VOL_CAP_INT_COPYFILE | - VOL_CAP_INT_ALLOCATE | - VOL_CAP_INT_VOL_RENAME | - VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK ; - vcapattrptr->valid[VOL_CAPABILITIES_RESERVED1] = 0; - vcapattrptr->valid[VOL_CAPABILITIES_RESERVED2] = 0; - - ++((vol_capabilities_attr_t *)attrptr); - } - - if (a & ATTR_VOL_ATTRIBUTES) { - vol_attributes_attr_t *volattrptr; - - volattrptr = (vol_attributes_attr_t *)attrptr; - - volattrptr->validattr.commonattr = - UFS_ATTR_CMN_SUPPORTED; - volattrptr->validattr.volattr = - UFS_ATTR_VOL_SUPPORTED; - volattrptr->validattr.dirattr = - UFS_ATTR_DIR_SUPPORTED; - volattrptr->validattr.fileattr = - UFS_ATTR_FILE_SUPPORTED; - volattrptr->validattr.forkattr = - UFS_ATTR_FORK_SUPPORTED; - - volattrptr->nativeattr.commonattr = - UFS_ATTR_CMN_NATIVE; - volattrptr->nativeattr.volattr = - UFS_ATTR_VOL_NATIVE; - volattrptr->nativeattr.dirattr = - UFS_ATTR_DIR_NATIVE; - volattrptr->nativeattr.fileattr = - UFS_ATTR_FILE_NATIVE; - volattrptr->nativeattr.forkattr = - UFS_ATTR_FORK_NATIVE; - - ++((vol_attributes_attr_t *)attrptr); - } - } - - /* Update the buffer pointers to point past what we just returned */ - *attrptrptr = attrptr; - *varptrptr = varptr; - - return 0; -} - -/* - * Pack all attributes from a getattrlist or readdirattr call into - * the result buffer. For now, we only support volume attributes. - */ -static int -ufs_packattr(struct vnode *vp, struct ucred *cred, struct attrlist *alist, - void **attrptr, void **varptr) -{ - int error=0; - - if (alist->volattr != 0) - error = ufs_packvolattr(vp, cred, alist, attrptr, varptr); - - return error; -} - -/* - * Calculate the fixed-size space required to hold a set of attributes. - * For variable-length attributes, this will be the size of the - * attribute reference (an offset and length). - */ -static size_t -ufs_attrsize(struct attrlist *attrlist) -{ - size_t size; - attrgroup_t a = 0; - -#if ((ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | ATTR_CMN_OBJTYPE | \ - ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | \ - ATTR_CMN_PAROBJID | ATTR_CMN_SCRIPT | ATTR_CMN_CRTIME | \ - ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME | ATTR_CMN_ACCTIME | \ - ATTR_CMN_BKUPTIME | ATTR_CMN_FNDRINFO | ATTR_CMN_OWNERID | \ - ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | ATTR_CMN_NAMEDATTRCOUNT | \ - ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS) \ - != ATTR_CMN_VALIDMASK) -#error ufs_attrsize: Missing bits in common mask computation! -#endif - -#if ((ATTR_VOL_FSTYPE | ATTR_VOL_SIGNATURE | ATTR_VOL_SIZE | \ - ATTR_VOL_SPACEFREE | ATTR_VOL_SPACEAVAIL | ATTR_VOL_MINALLOCATION | \ - ATTR_VOL_ALLOCATIONCLUMP | ATTR_VOL_IOBLOCKSIZE | \ - ATTR_VOL_OBJCOUNT | ATTR_VOL_FILECOUNT | ATTR_VOL_DIRCOUNT | \ - ATTR_VOL_MAXOBJCOUNT | ATTR_VOL_MOUNTPOINT | ATTR_VOL_NAME | \ - ATTR_VOL_MOUNTFLAGS | ATTR_VOL_INFO | ATTR_VOL_MOUNTEDDEVICE | \ - ATTR_VOL_ENCODINGSUSED | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES) \ - != ATTR_VOL_VALIDMASK) -#error ufs_attrsize: Missing bits in volume mask computation! -#endif - -#if ((ATTR_DIR_LINKCOUNT | ATTR_DIR_ENTRYCOUNT | ATTR_DIR_MOUNTSTATUS) \ - != ATTR_DIR_VALIDMASK) -#error ufs_attrsize: Missing bits in directory mask computation! -#endif - -#if ((ATTR_FILE_LINKCOUNT | ATTR_FILE_TOTALSIZE | ATTR_FILE_ALLOCSIZE | \ - ATTR_FILE_IOBLOCKSIZE | ATTR_FILE_CLUMPSIZE | ATTR_FILE_DEVTYPE | \ - ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST | \ - ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE | \ - ATTR_FILE_DATAEXTENTS | ATTR_FILE_RSRCLENGTH | \ - ATTR_FILE_RSRCALLOCSIZE | ATTR_FILE_RSRCEXTENTS) \ - != ATTR_FILE_VALIDMASK) -#error ufs_attrsize: Missing bits in file mask computation! -#endif - -#if ((ATTR_FORK_TOTALSIZE | ATTR_FORK_ALLOCSIZE) != ATTR_FORK_VALIDMASK) -#error ufs_attrsize: Missing bits in fork mask computation! -#endif - - size = 0; - - if ((a = attrlist->volattr) != 0) { - if (a & ATTR_VOL_NAME) - size += sizeof(struct attrreference); - if (a & ATTR_VOL_CAPABILITIES) - size += sizeof(vol_capabilities_attr_t); - if (a & ATTR_VOL_ATTRIBUTES) - size += sizeof(vol_attributes_attr_t); - }; - - /* - * Ignore common, dir, file, and fork attributes since we - * don't support those yet. - */ - - return size; -} - -/* -# -#% getattrlist vp = = = -# - vop_getattrlist { - IN struct vnode *vp; - IN struct attrlist *alist; - INOUT struct uio *uio; - IN struct ucred *cred; - IN struct proc *p; - }; - - */ -__private_extern__ int -ufs_getattrlist(struct vop_getattrlist_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct attrlist *alist = ap->a_alist; - size_t fixedblocksize; - size_t attrblocksize; - size_t attrbufsize; - void *attrbufptr; - void *attrptr; - void *varptr; - int error; - - /* - * Check the attrlist for valid inputs (i.e. be sure we understand what - * caller is asking). - */ - if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || - ((alist->commonattr & ~ATTR_CMN_VALIDMASK) != 0) || - ((alist->volattr & ~ATTR_VOL_VALIDMASK) != 0) || - ((alist->dirattr & ~ATTR_DIR_VALIDMASK) != 0) || - ((alist->fileattr & ~ATTR_FILE_VALIDMASK) != 0) || - ((alist->forkattr & ~ATTR_FORK_VALIDMASK) != 0)) - return EINVAL; - - /* - * Requesting volume information requires setting the - * ATTR_VOL_INFO bit. Also, volume info requests are - * mutually exclusive with all other info requests. - */ - if ((alist->volattr != 0) && - (((alist->volattr & ATTR_VOL_INFO) == 0) || - (alist->dirattr != 0) || (alist->fileattr != 0) || - alist->forkattr != 0)) - return EINVAL; - - /* - * Make sure caller isn't asking for an attibute we don't support. - */ - if ((alist->commonattr & ~UFS_ATTR_CMN_SUPPORTED) != 0 || - (alist->volattr & ~(UFS_ATTR_VOL_SUPPORTED | ATTR_VOL_INFO)) != 0 || - (alist->dirattr & ~UFS_ATTR_DIR_SUPPORTED) != 0 || - (alist->fileattr & ~UFS_ATTR_FILE_SUPPORTED) != 0 || - (alist->forkattr & ~UFS_ATTR_FORK_SUPPORTED) != 0) - return EOPNOTSUPP; - - /* - * Requesting volume information requires a vnode for the volume root. - */ - if (alist->volattr && (vp->v_flag & VROOT) == 0) - return EINVAL; - - fixedblocksize = ufs_attrsize(alist); - attrblocksize = fixedblocksize + (sizeof(u_long)); - if (alist->volattr & ATTR_VOL_NAME) - attrblocksize += 516; /* 512 + terminator + padding */ - attrbufsize = MIN(ap->a_uio->uio_resid, attrblocksize); - MALLOC(attrbufptr, void *, attrblocksize, M_TEMP, M_WAITOK); - attrptr = attrbufptr; - *((u_long *)attrptr) = 0; /* Set buffer length in case of errors */ - ++((u_long *)attrptr); /* skip over length field */ - varptr = ((char *)attrptr) + fixedblocksize; - - error = ufs_packattr(vp, ap->a_cred, alist, &attrptr, &varptr); - - if (error == 0) { - /* Don't return more data than was generated */ - attrbufsize = MIN(attrbufsize, (size_t) varptr - (size_t) attrbufptr); - - /* Return the actual buffer length */ - *((u_long *) attrbufptr) = attrbufsize; - - error = uiomove((caddr_t) attrbufptr, attrbufsize, ap->a_uio); - } - - FREE(attrbufptr, M_TEMP); - return error; -} - - -/* - * Unpack the volume-related attributes from a setattrlist call into the - * appropriate in-memory and on-disk structures. - */ -static int -ufs_unpackvolattr( - struct vnode *vp, - struct ucred *cred, - attrgroup_t attrs, - void *attrbufptr) -{ - int i; - int error; - attrreference_t *attrref; - - error = 0; - - if (attrs & ATTR_VOL_NAME) { - char *name; - int name_length; - - attrref = attrbufptr; - name = ((char*)attrbufptr) + attrref->attr_dataoffset; - name_length = strlen(name); - ufs_set_label(vp, cred, name, name_length); - - /* Advance buffer pointer past attribute reference */ - attrbufptr = ++attrref; - } - - return error; -} - - - -/* - * Unpack the attributes from a setattrlist call into the - * appropriate in-memory and on-disk structures. Right now, - * we only support the volume name. - */ -static int -ufs_unpackattr( - struct vnode *vp, - struct ucred *cred, - struct attrlist *alist, - void *attrbufptr) -{ - int error; - - error = 0; - - if (alist->volattr != 0) { - error = ufs_unpackvolattr(vp, cred, alist->volattr, - attrbufptr); - } - - return error; -} - - - -/* -# -#% setattrlist vp L L L -# -vop_setattrlist { - IN struct vnode *vp; - IN struct attrlist *alist; - INOUT struct uio *uio; - IN struct ucred *cred; - IN struct proc *p; -}; -*/ -__private_extern__ int -ufs_setattrlist(struct vop_setattrlist_args *ap) -{ - struct vnode *vp = ap->a_vp; - struct attrlist *alist = ap->a_alist; - size_t attrblocksize; - void *attrbufptr; - int error; - - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - - /* - * Check the attrlist for valid inputs (i.e. be sure we understand - * what caller is asking). - */ - if ((alist->bitmapcount != ATTR_BIT_MAP_COUNT) || - ((alist->commonattr & ~ATTR_CMN_SETMASK) != 0) || - ((alist->volattr & ~ATTR_VOL_SETMASK) != 0) || - ((alist->dirattr & ~ATTR_DIR_SETMASK) != 0) || - ((alist->fileattr & ~ATTR_FILE_SETMASK) != 0) || - ((alist->forkattr & ~ATTR_FORK_SETMASK) != 0)) - return EINVAL; - - /* - * Setting volume information requires setting the - * ATTR_VOL_INFO bit. Also, volume info requests are - * mutually exclusive with all other info requests. - */ - if ((alist->volattr != 0) && - (((alist->volattr & ATTR_VOL_INFO) == 0) || - (alist->dirattr != 0) || (alist->fileattr != 0) || - alist->forkattr != 0)) - return EINVAL; - - /* - * Make sure caller isn't asking for an attibute we don't support. - * Right now, all we support is setting the volume name. - */ - if ((alist->commonattr & ~UFS_ATTR_CMN_SETTABLE) != 0 || - (alist->volattr & ~(UFS_ATTR_VOL_SETTABLE | ATTR_VOL_INFO)) != 0 || - (alist->dirattr & ~UFS_ATTR_DIR_SETTABLE) != 0 || - (alist->fileattr & ~UFS_ATTR_FILE_SETTABLE) != 0 || - (alist->forkattr & ~UFS_ATTR_FORK_SETTABLE) != 0) - return EOPNOTSUPP; - - /* - * Setting volume information requires a vnode for the volume root. - */ - if (alist->volattr && (vp->v_flag & VROOT) == 0) - return EINVAL; - - attrblocksize = ap->a_uio->uio_resid; - if (attrblocksize < ufs_attrsize(alist)) - return EINVAL; - - MALLOC(attrbufptr, void *, attrblocksize, M_TEMP, M_WAITOK); - - error = uiomove((caddr_t)attrbufptr, attrblocksize, ap->a_uio); - if (error) - goto ErrorExit; - - error = ufs_unpackattr(vp, ap->a_cred, alist, attrbufptr); - -ErrorExit: - FREE(attrbufptr, M_TEMP); - return error; -} diff --git a/bsd/ufs/ufs/ufs_bmap.c b/bsd/ufs/ufs/ufs_bmap.c index 86cf8a596..ca7fd9352 100644 --- a/bsd/ufs/ufs/ufs_bmap.c +++ b/bsd/ufs/ufs/ufs_bmap.c @@ -67,9 +67,9 @@ #include #include #include -#include -#include -#include +#include /* for p_stats */ +#include +#include #include #include #include @@ -85,33 +85,6 @@ #include #endif /* REV_ENDIAN_FS */ -/* - * Bmap converts a the logical block number of a file to its physical block - * number on the disk. The conversion is done by using the logical block - * number to index into the array of block pointers described by the dinode. - */ -int -ufs_bmap(ap) - struct vop_bmap_args /* { - struct vnode *a_vp; - ufs_daddr_t a_bn; - struct vnode **a_vpp; - ufs_daddr_t *a_bnp; - int *a_runp; - } */ *ap; -{ - /* - * Check for underlying vnode requests and ensure that logical - * to physical mapping is requested. - */ - if (ap->a_vpp != NULL) - *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; - if (ap->a_bnp == NULL) - return (0); - - return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, - ap->a_runp)); -} /* * Indirect blocks are now on the vnode for the file. They are given negative @@ -129,7 +102,7 @@ ufs_bmap(ap) int ufs_bmaparray(vp, bn, bnp, ap, nump, runp) - struct vnode *vp; + vnode_t vp; ufs_daddr_t bn; ufs_daddr_t *bnp; struct indir *ap; @@ -170,7 +143,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) * don't create a block larger than the device can handle. */ *runp = 0; - maxrun = MAXPHYSIO / mp->mnt_stat.f_iosize - 1; + maxrun = MAXPHYSIO / mp->mnt_vfsstat.f_iosize - 1; } xap = ap == NULL ? a : ap; @@ -197,44 +170,54 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { - /* - * Exit the loop if there is no disk address assigned yet and - * the indirect block isn't in the cache, or if we were - * looking for an indirect block and we've found it. - */ + ufs_daddr_t *dataptr; + int bop; + + if ((metalbn = xap->in_lbn) == bn) + /* + * found the indirect block we were + * looking for... exit the loop + */ + break; + + if (daddr == 0) + bop = BLK_ONLYVALID | BLK_META; + else + bop = BLK_META; - metalbn = xap->in_lbn; - if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn) + if (bp) + buf_brelse(bp); + bp = buf_getblk(vp, (daddr64_t)((unsigned)metalbn), mp->mnt_vfsstat.f_iosize, 0, 0, bop); + + if (bp == 0) { + /* + * Exit the loop if there is no disk address assigned yet and + * the indirect block isn't in the cache + */ break; + } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ - if (bp) - brelse(bp); - xap->in_exists = 1; - bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, BLK_META); - if (bp->b_flags & (B_DONE | B_DELWRI)) { - trace(TR_BREADHIT, pack(vp, mp->mnt_stat.f_iosize), metalbn); + + if (buf_valid(bp)) { + trace(TR_BREADHIT, pack(vp, mp->mnt_vfsstat.f_iosize), metalbn); } -#if DIAGNOSTIC - else if (!daddr) - panic("ufs_bmaparry: indirect block not in cache"); -#endif else { - trace(TR_BREADMISS, pack(vp, mp->mnt_stat.f_iosize), metalbn); - bp->b_blkno = blkptrtodb(ump, daddr); - bp->b_flags |= B_READ; - VOP_STRATEGY(bp); + trace(TR_BREADMISS, pack(vp, mp->mnt_vfsstat.f_iosize), metalbn); + buf_setblkno(bp, blkptrtodb(ump, (daddr64_t)((unsigned)daddr))); + buf_setflags(bp, B_READ); + VNOP_STRATEGY(bp); current_proc()->p_stats->p_ru.ru_inblock++; /* XXX */ - if (error = biowait(bp)) { - brelse(bp); + if (error = (int)buf_biowait(bp)) { + buf_brelse(bp); return (error); } } - - daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off]; + dataptr = (ufs_daddr_t *)buf_dataptr(bp); + daddr = dataptr[xap->in_off]; #if REV_ENDIAN_FS if (rev_endian) daddr = NXSwapLong(daddr); @@ -245,16 +228,16 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, - NXSwapLong(((ufs_daddr_t *)bp->b_data)[bn - 1]), - NXSwapLong(((ufs_daddr_t *)bp->b_data)[bn])); + NXSwapLong(dataptr[bn - 1]), + NXSwapLong(dataptr[bn])); ++bn, ++*runp); } else { #endif /* REV_ENDIAN_FS */ for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, - ((ufs_daddr_t *)bp->b_data)[bn - 1], - ((ufs_daddr_t *)bp->b_data)[bn]); + dataptr[bn - 1], + dataptr[bn]); ++bn, ++*runp); #if REV_ENDIAN_FS } @@ -262,7 +245,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp) } } if (bp) - brelse(bp); + buf_brelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; @@ -352,332 +335,91 @@ ufs_getlbns(vp, bn, ap, nump) return (0); } /* - * Cmap converts a the file offset of a file to its physical block - * number on the disk And returns contiguous size for transfer. + * blockmap converts a file offsetto its physical block + * number on the disk... it optionally returns the physically + * contiguous size. */ int -ufs_cmap(ap) - struct vop_cmap_args /* { +ufs_blockmap(ap) + struct vnop_blockmap_args /* { struct vnode *a_vp; off_t a_foffset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; + int a_flags; } */ *ap; { - struct vnode * vp = ap->a_vp; - ufs_daddr_t *bnp = ap->a_bpn; - size_t *runp = ap->a_run; - int size = ap->a_size; - daddr_t bn; - int nblks; - register struct inode *ip; + vnode_t vp = ap->a_vp; + daddr64_t * bnp = ap->a_bpn; + size_t * runp = ap->a_run; + int size = ap->a_size; + struct fs * fs; + struct inode *ip; + ufs_daddr_t lbn; ufs_daddr_t daddr = 0; - int devBlockSize=0; - struct fs *fs; - int retsize=0; - int error=0; + int devBlockSize = 0; + int retsize = 0; + int error = 0; + int nblks; ip = VTOI(vp); fs = ip->i_fs; + lbn = (ufs_daddr_t)lblkno(fs, ap->a_foffset); + devBlockSize = vfs_devblocksize(vnode_mount(vp)); - if (blkoff(fs, ap->a_foffset)) { - panic("ufs_cmap; allocation requested inside a block"); - } + if (blkoff(fs, ap->a_foffset)) + panic("ufs_blockmap; allocation requested inside a block"); - bn = (daddr_t)lblkno(fs, ap->a_foffset); - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + if (size % devBlockSize) + panic("ufs_blockmap: size is not multiple of device block size\n"); - if (size % devBlockSize) { - panic("ufs_cmap: size is not multiple of device block size\n"); - } - - if (error = VOP_BMAP(vp, bn, (struct vnode **) 0, &daddr, &nblks)) { - return(error); - } - - retsize = nblks * fs->fs_bsize; + if ((error = ufs_bmaparray(vp, lbn, &daddr, NULL, NULL, &nblks))) + return (error); if (bnp) - *bnp = daddr; + *bnp = (daddr64_t)daddr; if (ap->a_poff) *(int *)ap->a_poff = 0; - if (daddr == -1) { - if (size < fs->fs_bsize) { - retsize = fragroundup(fs, size); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - *runp = fs->fs_bsize; - } - return(0); - } - if (runp) { - if ((size < fs->fs_bsize)) { - *runp = size; - return(0); - } - if (retsize) { - retsize += fs->fs_bsize; - if(size >= retsize) - *runp = retsize; - else - *runp = size; + if (lbn < 0) { + /* + * we're dealing with the indirect blocks + * which are always fs_bsize in size + */ + retsize = (nblks + 1) * fs->fs_bsize; + } else if (daddr == -1 || nblks == 0) { + /* + * we're dealing with a 'hole'... UFS doesn't + * have a clean way to determine it's size + * or + * there's are no physically contiguous blocks + * so + * just return the size of the lbn we started with + */ + retsize = blksize(fs, ip, lbn); } else { - if (size < fs->fs_bsize) { - retsize = fragroundup(fs, size); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - *runp = fs->fs_bsize; - } - } - } - return (0); -} - - -#if NOTTOBEUSED -/* - * Cmap converts a the file offset of a file to its physical block - * number on the disk And returns contiguous size for transfer. - */ -int -ufs_cmap(ap) - struct vop_cmap_args /* { - struct vnode *a_vp; - off_t a_foffset; - size_t a_size; - daddr_t *a_bpn; - size_t *a_run; - void *a_poff; - } */ *ap; -{ - struct vnode * vp = ap->a_vp; - ufs_daddr_t *bnp = ap->a_bpn; - size_t *runp = ap->a_run; - daddr_t bn; - int nblks, blks; - int *nump; - register struct inode *ip; - struct buf *bp; - struct ufsmount *ump; - struct mount *mp; - struct vnode *devvp; - struct indir a[NIADDR], *xap; - ufs_daddr_t daddr; - long metalbn; - int error, maxrun, num; - int devBlockSize=0; - struct fs *fs; - int size = ap->a_size; - int block_offset=0; - int retsize=0; -#if 1 - daddr_t orig_blkno; - daddr_t orig_bblkno; -#endif /* 1 */ -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); - fs = ip->i_fs; - - mp = vp->v_mount; - ump = VFSTOUFS(mp); - - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - bn = (daddr_t)lblkno(fs, ap->a_foffset); - - if (size % devBlockSize) { - panic("ufs_cmap: size is not multiple of device block size\n"); - } - - block_offset = blkoff(fs, ap->a_foffset); - if (block_offset) { - panic("ufs_cmap; allocation requested inside a block"); - } - -#if 1 - VOP_OFFTOBLK(vp, ap->a_foffset, & orig_blkno); -#endif /* 1 */ - /* less than block size and not block offset aligned */ - if ( (size < fs->fs_bsize) && fragoff(fs, size) && block_offset ) { - panic("ffs_cmap: size not a mult of fragment\n"); - } -#if 0 - if (size > fs->fs_bsize && fragoff(fs, size)) { - panic("ffs_cmap: more than bsize & not a multiple of fragment\n"); - } -#endif /* 0 */ -#if REV_ENDIAN_FS - rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - if(runp) - *runp = 0; - - if ( size > MAXPHYSIO) - size = MAXPHYSIO; - nblks = (blkroundup(fs, size))/fs->fs_bsize; - - xap = a; - num = 0; - if (error = ufs_getlbns(vp, bn, xap, &num)) - return (error); - - blks = 0; - if (num == 0) { - daddr = blkptrtodb(ump, ip->i_db[bn]); - *bnp = ((daddr == 0) ? -1 : daddr); - if (daddr && runp) { - for (++bn; bn < NDADDR && blks < nblks && - ip->i_db[bn] && - is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); - ++bn, ++blks); - - if (blks) { - retsize = lblktosize(fs, blks); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - if (size < fs->fs_bsize) { - retsize = fragroundup(fs, size); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - *runp = fs->fs_bsize; - } - } - if (ap->a_poff) - *(int *)ap->a_poff = 0; - } -#if 1 - if (VOP_BMAP(vp, orig_blkno, NULL, &orig_bblkno, NULL)) { - panic("vop_bmap failed\n"); - } - if(daddr != orig_bblkno) { - panic("vop_bmap and vop_cmap differ\n"); - } -#endif /* 1 */ - return (0); - } - - - /* Get disk address out of indirect block array */ - daddr = ip->i_ib[xap->in_off]; - - devvp = VFSTOUFS(vp->v_mount)->um_devvp; - for (bp = NULL, ++xap; --num; ++xap) { - /* - * Exit the loop if there is no disk address assigned yet - * or if we were looking for an indirect block and we've - * found it. - */ - - metalbn = xap->in_lbn; - if (daddr == 0 || metalbn == bn) - break; - /* - * We have a disk address for it, go fetch it. - */ - if (bp) - brelse(bp); - - xap->in_exists = 1; - bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, BLK_META); - if (bp->b_flags & (B_DONE | B_DELWRI)) { - trace(TR_BREADHIT, pack(vp, mp->mnt_stat.f_iosize), metalbn); - } - else { - trace(TR_BREADMISS, pack(vp, mp->mnt_stat.f_iosize), metalbn); - bp->b_blkno = blkptrtodb(ump, daddr); - bp->b_flags |= B_READ; - VOP_STRATEGY(bp); - current_proc()->p_stats->p_ru.ru_inblock++; /* XXX */ - if (error = biowait(bp)) { - brelse(bp); - return (error); - } - } - - daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off]; -#if REV_ENDIAN_FS - if (rev_endian) - daddr = NXSwapLong(daddr); -#endif /* REV_ENDIAN_FS */ - if (num == 1 && daddr && runp) { - blks = 0; -#if REV_ENDIAN_FS - if (rev_endian) { - for (bn = xap->in_off + 1; - bn < MNINDIR(ump) && blks < maxrun && - is_sequential(ump, - NXSwapLong(((ufs_daddr_t *)bp->b_data)[bn - 1]), - NXSwapLong(((ufs_daddr_t *)bp->b_data)[bn])); - ++bn, ++blks); - } else { -#endif /* REV_ENDIAN_FS */ - for (bn = xap->in_off + 1; - bn < MNINDIR(ump) && blks < maxrun && - is_sequential(ump, - ((ufs_daddr_t *)bp->b_data)[bn - 1], - ((ufs_daddr_t *)bp->b_data)[bn]); - ++bn, ++blks); -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ + /* + * we have 1 or more blocks that are physically contiguous + * to our starting block number... the orignal block + (nblks - 1) + * blocks must be full sized since only the last block can be + * composed of fragments... + */ + retsize = nblks * fs->fs_bsize; + + /* + * now compute the size of the last block and add it in + */ + retsize += blksize(fs, ip, (lbn + nblks)); } + if (retsize < size) + *runp = retsize; + else + *runp = size; } - if (bp) - brelse(bp); - - daddr = blkptrtodb(ump, daddr); - *bnp = ((daddr == 0) ? -1 : daddr); - if (daddr && runp) { - if (blks) { - retsize = lblktosize(fs, blks); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - if (size < fs->fs_bsize) { - retsize = fragroundup(fs, size); - if(size >= retsize) - *runp = retsize; - else - *runp = size; - } else { - *runp = fs->fs_bsize; - } - } - - } - if (daddr && ap->a_poff) - *(int *)ap->a_poff = 0; -#if 1 - if (VOP_BMAP(vp, orig_blkno, (struct vnode **) 0, &orig_bblkno, 0)) { - panic("vop_bmap failed\n"); - } - if(daddr != orig_bblkno) { - panic("vop_bmap and vop_cmap differ\n"); - } -#endif /* 1 */ return (0); } -#endif /* NOTTOBEUSED */ diff --git a/bsd/ufs/ufs/ufs_byte_order.c b/bsd/ufs/ufs/ufs_byte_order.c index c8cf52d13..86f0b010d 100644 --- a/bsd/ufs/ufs/ufs_byte_order.c +++ b/bsd/ufs/ufs/ufs_byte_order.c @@ -339,11 +339,11 @@ byte_swap_dir_out(char *addr, int count) void byte_swap_dir_block_out(struct buf *bp) { - struct direct *ep = (struct direct *) bp->b_data; + struct direct *ep = (struct direct *) buf_dataptr(bp); int reclen, entryoffsetinblk = 0; - while (entryoffsetinblk < bp->b_bcount) { - ep = (struct direct *) (entryoffsetinblk + bp->b_data); + while (entryoffsetinblk < buf_count(bp)) { + ep = (struct direct *) (entryoffsetinblk + buf_dataptr(bp)); reclen = ep->d_reclen; entryoffsetinblk += reclen; byte_swap_int(ep->d_ino); diff --git a/bsd/ufs/ufs/ufs_byte_order.h b/bsd/ufs/ufs/ufs_byte_order.h index 12dd0badc..fda8614fd 100644 --- a/bsd/ufs/ufs/ufs_byte_order.h +++ b/bsd/ufs/ufs/ufs_byte_order.h @@ -41,26 +41,26 @@ #include #include -void byte_swap_longlongs __P((unsigned long long *, int)); -void byte_swap_ints __P((int *, int)); -void byte_swap_shorts __P((short *, int)); +void byte_swap_longlongs(unsigned long long *, int); +void byte_swap_ints(int *, int); +void byte_swap_shorts(short *, int); -/* void byte_swap_superblock __P((struct fs *)); */ -void byte_swap_sbin __P((struct fs *)); -void byte_swap_sbout __P((struct fs *)); -void byte_swap_csum __P((struct csum *)); -void byte_swap_ocylgroup __P((struct cg *)); -void byte_swap_cgin __P((struct cg *, struct fs *)); -void byte_swap_cgout __P((struct cg *, struct fs *)); +/* void byte_swap_superblock(struct fs *); */ +void byte_swap_sbin(struct fs *); +void byte_swap_sbout(struct fs *); +void byte_swap_csum(struct csum *); +void byte_swap_ocylgroup(struct cg *); +void byte_swap_cgin(struct cg *, struct fs *); +void byte_swap_cgout(struct cg *, struct fs *); -void byte_swap_inode_in __P((struct dinode *, struct inode *)); -void byte_swap_inode_out __P((struct inode *, struct dinode *)); +void byte_swap_inode_in(struct dinode *, struct inode *); +void byte_swap_inode_out(struct inode *, struct dinode *); -void byte_swap_dir_block_in __P((char *, int)); -void byte_swap_dir_block_out __P((struct buf *)); -void byte_swap_direct __P((struct direct *)); -void byte_swap_dirtemplate_in __P((struct dirtemplate *)); -void byte_swap_minidir_in __P((struct direct *)); +void byte_swap_dir_block_in(char *, int); +void byte_swap_dir_block_out(buf_t); +void byte_swap_direct(struct direct *); +void byte_swap_dirtemplate_in(struct dirtemplate *); +void byte_swap_minidir_in(struct direct *); #endif /* __APPLE_API_PRIVATE */ #endif /* _UFS_BYTE_ORDER_H_ */ diff --git a/bsd/ufs/ufs/ufs_extern.h b/bsd/ufs/ufs/ufs_extern.h index 75469fa99..d7e9815c9 100644 --- a/bsd/ufs/ufs/ufs_extern.h +++ b/bsd/ufs/ufs/ufs_extern.h @@ -63,7 +63,6 @@ struct buf; struct direct; struct disklabel; -struct fid; struct flock; struct inode; struct mbuf; @@ -73,96 +72,84 @@ struct proc; struct ucred; struct ufs_args; struct uio; -struct vattr; +struct vnode_attr; struct vfsconf; struct vnode; __BEGIN_DECLS +int ufs_remove_internal(vnode_t, vnode_t, struct componentname *, int); +int ufs_access_internal(vnode_t, mode_t, ucred_t); + +int ffs_read_internal(vnode_t, struct uio *, int); +int ffs_write_internal(vnode_t, struct uio *, int, ucred_t); +int ffs_truncate_internal(vnode_t, off_t, int, ucred_t); + void diskerr - __P((struct buf *, char *, char *, int, int, struct disklabel *)); -void disksort __P((struct buf *, struct buf *)); -u_int dkcksum __P((struct disklabel *)); -char *readdisklabel __P((dev_t, int (*)(), struct disklabel *)); -int setdisklabel __P((struct disklabel *, struct disklabel *, u_long)); -int writedisklabel __P((dev_t, int (*)(), struct disklabel *)); + (struct buf *, char *, char *, int, int, struct disklabel *); +void disksort(struct buf *, struct buf *); +u_int dkcksum(struct disklabel *); +char *readdisklabel(dev_t, int (*)(), struct disklabel *); +int setdisklabel(struct disklabel *, struct disklabel *, u_long); +int writedisklabel(dev_t, int (*)(), struct disklabel *); -int ufs_access __P((struct vop_access_args *)); -int ufs_advlock __P((struct vop_advlock_args *)); -int ufs_bmap __P((struct vop_bmap_args *)); -int ufs_check_export __P((struct mount *, struct ufid *, struct mbuf *, - struct vnode **, int *exflagsp, struct ucred **)); -int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *)); -int ufs_close __P((struct vop_close_args *)); -int ufs_create __P((struct vop_create_args *)); -void ufs_dirbad __P((struct inode *, doff_t, char *)); -int ufs_dirbadentry __P((struct vnode *, struct direct *, int)); -int ufs_dirempty __P((struct inode *, ino_t, struct ucred *)); -int ufs_direnter __P((struct inode *, struct vnode *,struct componentname *)); -int ufs_dirremove __P((struct vnode *, struct componentname*)); +int ufs_access(struct vnop_access_args *); +int ufs_checkpath(struct inode *, struct inode *, struct ucred *); +int ufs_close(struct vnop_close_args *); +int ufs_create(struct vnop_create_args *); +void ufs_dirbad(struct inode *, doff_t, const char *); +int ufs_dirbadentry(struct vnode *, struct direct *, int); +int ufs_dirempty(struct inode *, ino_t, struct ucred *); +int ufs_direnter(struct inode *, struct vnode *,struct componentname *); +int ufs_dirremove(struct vnode *, struct componentname*); int ufs_dirrewrite - __P((struct inode *, struct inode *, struct componentname *)); -int ufs_getattr __P((struct vop_getattr_args *)); -int ufs_getattrlist __P((struct vop_getattrlist_args *)); -int ufs_getlbns __P((struct vnode *, ufs_daddr_t, struct indir *, int *)); + (struct inode *, struct inode *, struct componentname *); +int ufs_getattr(struct vnop_getattr_args *); +int ufs_getlbns(struct vnode *, ufs_daddr_t, struct indir *, int *); struct vnode * - ufs_ihashget __P((dev_t, ino_t)); -void ufs_ihashinit __P((void)); -void ufs_ihashins __P((struct inode *)); + ufs_ihashget(dev_t, ino_t); +void ufs_ihashinit(void); +void ufs_ihashins(struct inode *); struct vnode * - ufs_ihashlookup __P((dev_t, ino_t)); -void ufs_ihashrem __P((struct inode *)); -int ufs_inactive __P((struct vop_inactive_args *)); -int ufs_init __P((struct vfsconf *)); -int ufs_ioctl __P((struct vop_ioctl_args *)); -int ufs_islocked __P((struct vop_islocked_args *)); -#if NFSSERVER -int lease_check __P((struct vop_lease_args *)); -#define ufs_lease_check lease_check -#else -#define ufs_lease_check ((int (*) __P((struct vop_lease_args *)))nullop) -#endif -int ufs_link __P((struct vop_link_args *)); -int ufs_lock __P((struct vop_lock_args *)); -int ufs_lookup __P((struct vop_lookup_args *)); -int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *)); -int ufs_mkdir __P((struct vop_mkdir_args *)); -int ufs_mknod __P((struct vop_mknod_args *)); -int ufs_mmap __P((struct vop_mmap_args *)); -int ufs_open __P((struct vop_open_args *)); -int ufs_pathconf __P((struct vop_pathconf_args *)); -int ufs_print __P((struct vop_print_args *)); -int ufs_readdir __P((struct vop_readdir_args *)); -int ufs_readlink __P((struct vop_readlink_args *)); -int ufs_reclaim __P((struct vnode *, struct proc *)); -int ufs_remove __P((struct vop_remove_args *)); -int ufs_rename __P((struct vop_rename_args *)); -#define ufs_revoke vop_revoke -int ufs_rmdir __P((struct vop_rmdir_args *)); -int ufs_root __P((struct mount *, struct vnode **)); -int ufs_seek __P((struct vop_seek_args *)); -int ufs_select __P((struct vop_select_args *)); -int ufs_kqfilt_add __P((struct vop_kqfilt_add_args *)); -int ufs_setattr __P((struct vop_setattr_args *)); -int ufs_setattrlist __P((struct vop_setattrlist_args *)); -int ufs_start __P((struct mount *, int, struct proc *)); -int ufs_strategy __P((struct vop_strategy_args *)); -int ufs_symlink __P((struct vop_symlink_args *)); -int ufs_unlock __P((struct vop_unlock_args *)); -int ufs_whiteout __P((struct vop_whiteout_args *)); -int ufs_vinit __P((struct mount *, - int (**)(), int (**)(), struct vnode **)); -int ufsspec_close __P((struct vop_close_args *)); -int ufsspec_read __P((struct vop_read_args *)); -int ufsspec_write __P((struct vop_write_args *)); + ufs_ihashlookup(dev_t, ino_t); +void ufs_ihashrem(struct inode *); +int ufs_inactive(struct vnop_inactive_args *); +int ufs_init(struct vfsconf *); +int ufs_ioctl(struct vnop_ioctl_args *); +int ufs_link(struct vnop_link_args *); +int ufs_lookup(struct vnop_lookup_args *); +int ufs_makeinode(struct vnode_attr *, struct vnode *, struct vnode **, struct componentname *); +int ufs_mkdir(struct vnop_mkdir_args *); +int ufs_mknod(struct vnop_mknod_args *); +int ufs_mmap(struct vnop_mmap_args *); +int ufs_open(struct vnop_open_args *); +int ufs_pathconf(struct vnop_pathconf_args *); +int ufs_readdir(struct vnop_readdir_args *); +int ufs_readlink(struct vnop_readlink_args *); +int ufs_reclaim(struct vnode *, struct proc *); +int ufs_remove(struct vnop_remove_args *); +int ufs_rename(struct vnop_rename_args *); +#define ufs_revoke nop_revoke +int ufs_rmdir(struct vnop_rmdir_args *); +int ufs_root(struct mount *, struct vnode **, vfs_context_t); +int ufs_select(struct vnop_select_args *); +int ufs_kqfilt_add(struct vnop_kqfilt_add_args *); +int ufs_setattr(struct vnop_setattr_args *); +int ufs_start(struct mount *, int, vfs_context_t); +int ufs_strategy(struct vnop_strategy_args *); +int ufs_symlink(struct vnop_symlink_args *); +int ufs_whiteout(struct vnop_whiteout_args *); +int ufsspec_close(struct vnop_close_args *); +int ufsspec_read(struct vnop_read_args *); +int ufsspec_write(struct vnop_write_args *); #if FIFO -int ufsfifo_read __P((struct vop_read_args *)); -int ufsfifo_write __P((struct vop_write_args *)); -int ufsfifo_close __P((struct vop_close_args *)); -int ufsfifo_kqfilt_add __P((struct vop_kqfilt_add_args *)); +int ufsfifo_read(struct vnop_read_args *); +int ufsfifo_write(struct vnop_write_args *); +int ufsfifo_close(struct vnop_close_args *); +int ufsfifo_kqfilt_add(struct vnop_kqfilt_add_args *); #endif -int ufs_blktooff __P((struct vop_blktooff_args *)); -int ufs_cmap __P((struct vop_cmap_args *)); +int ufs_blktooff(struct vnop_blktooff_args *); +int ufs_blockmap(struct vnop_blockmap_args *); __END_DECLS diff --git a/bsd/ufs/ufs/ufs_ihash.c b/bsd/ufs/ufs/ufs_ihash.c index a3da69ee5..140f8564a 100644 --- a/bsd/ufs/ufs/ufs_ihash.c +++ b/bsd/ufs/ufs/ufs_ihash.c @@ -57,7 +57,7 @@ #include #include -#include +#include #include #include #include @@ -72,7 +72,6 @@ LIST_HEAD(ihashhead, inode) *ihashtbl; u_long ihash; /* size of hash table - 1 */ #define INOHASH(device, inum) (&ihashtbl[((device) + (inum)) & ihash]) -struct slock ufs_ihash_slock; /* * Initialize inode hash table. @@ -82,7 +81,6 @@ ufs_ihashinit() { ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash); - simple_lock_init(&ufs_ihash_slock); } /* @@ -96,12 +94,9 @@ ufs_ihashlookup(dev, inum) { struct inode *ip; - simple_lock(&ufs_ihash_slock); for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) if (inum == ip->i_number && dev == ip->i_dev) break; - simple_unlock(&ufs_ihash_slock); - if (ip) return (ITOV(ip)); return (NULLVP); @@ -119,19 +114,18 @@ ufs_ihashget(dev, inum) struct proc *p = current_proc(); /* XXX */ struct inode *ip; struct vnode *vp; + uint32_t vid; loop: - simple_lock(&ufs_ihash_slock); for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) { if (inum == ip->i_number && dev == ip->i_dev) { - vp = ITOV(ip); + if (ISSET(ip->i_flag, IN_ALLOC)) { /* * inode is being created. Wait for it * to finish creation */ SET(ip->i_flag, IN_WALLOC); - simple_unlock(&ufs_ihash_slock); (void)tsleep((caddr_t)ip, PINOD, "ufs_ihashget", 0); goto loop; } @@ -143,18 +137,32 @@ loop: * error */ SET(ip->i_flag, IN_WTRANSIT); - simple_unlock(&ufs_ihash_slock); (void)tsleep((caddr_t)ip, PINOD, "ufs_ihashget1", 0); goto loop; } - simple_lock(&vp->v_interlock); - simple_unlock(&ufs_ihash_slock); - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) - goto loop; + vp = ITOV(ip); + /* + * the vid needs to be grabbed before we drop + * lock protecting the hash + */ + vid = vnode_vid(vp); + + /* + * we currently depend on running under the FS funnel + * when we do proper locking and advertise ourselves + * as thread safe, we'll need a lock to protect the + * hash lookup... this is where we would drop it + */ + if (vnode_getwithvid(vp, vid)) { + /* + * If vnode is being reclaimed, or has + * already changed identity, no need to wait + */ + return (NULL); + } return (vp); } } - simple_unlock(&ufs_ihash_slock); return (NULL); } @@ -166,13 +174,10 @@ void ufs_ihashins(ip) struct inode *ip; { - struct proc *p = current_proc(); struct ihashhead *ipp; - simple_lock(&ufs_ihash_slock); ipp = INOHASH(ip->i_dev, ip->i_number); LIST_INSERT_HEAD(ipp, ip, i_hash); - simple_unlock(&ufs_ihash_slock); } /* @@ -182,13 +187,9 @@ void ufs_ihashrem(ip) struct inode *ip; { - struct inode *iq; - - simple_lock(&ufs_ihash_slock); LIST_REMOVE(ip, i_hash); #if DIAGNOSTIC ip->i_hash.le_next = NULL; ip->i_hash.le_prev = NULL; #endif - simple_unlock(&ufs_ihash_slock); } diff --git a/bsd/ufs/ufs/ufs_inode.c b/bsd/ufs/ufs/ufs_inode.c index 90e6d8f93..41ae10abe 100644 --- a/bsd/ufs/ufs/ufs_inode.c +++ b/bsd/ufs/ufs/ufs_inode.c @@ -63,8 +63,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -82,14 +82,14 @@ extern int prtactive; */ int ufs_inactive(ap) - struct vop_inactive_args /* { + struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); - struct proc *p = ap->a_p; + struct proc *p = vfs_context_proc(ap->a_context); struct timeval tv; int mode, error = 0; extern int prtactive; @@ -112,25 +112,24 @@ ufs_inactive(ap) * inode from inodecache */ SET(ip->i_flag, IN_TRANSIT); - error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p); + error = ffs_truncate_internal(vp, (off_t)0, 0, NOCRED); ip->i_rdev = 0; mode = ip->i_mode; ip->i_mode = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; - VOP_VFREE(vp, ip->i_number, mode); + ffs_vfree(vp, ip->i_number, mode); } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { - tv = time; - VOP_UPDATE(vp, &tv, &tv, 0); + microtime(&tv); + ffs_update(vp, &tv, &tv, 0); } out: - VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ if (ip->i_mode == 0) - vrecycle(vp, (struct slock *)0, p); + vnode_recycle(vp); return (error); } @@ -148,24 +147,23 @@ ufs_reclaim(vp, p) if (prtactive && vp->v_usecount != 0) vprint("ufs_reclaim: pushing active", vp); + + vnode_removefsref(vp); /* * Remove the inode from its hash chain. */ ip = VTOI(vp); ufs_ihashrem(ip); - /* - * Purge old data structures associated with the inode. - */ - cache_purge(vp); + if (ip->i_devvp) { struct vnode *tvp = ip->i_devvp; ip->i_devvp = NULL; - vrele(tvp); + vnode_rele(tvp); } #if QUOTA for (i = 0; i < MAXQUOTAS; i++) { if (ip->i_dquot[i] != NODQUOT) { - dqrele(vp, ip->i_dquot[i]); + dqrele(ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } diff --git a/bsd/ufs/ufs/ufs_lookup.c b/bsd/ufs/ufs/ufs_lookup.c index 1e2a64188..48bbde8c5 100644 --- a/bsd/ufs/ufs/ufs_lookup.c +++ b/bsd/ufs/ufs/ufs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,15 +64,18 @@ #include #include #include -#include -#include +#include +#include #include +#include +#include #include #include #include #include #include +#include #if REV_ENDIAN_FS #include #include @@ -101,8 +104,7 @@ int dirchk = 0; * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may - * be "."., but the caller must check to ensure it does an vrele and vput - * instead of two vputs. + * be "."., * * Overall outline of ufs_lookup: * @@ -122,10 +124,11 @@ int dirchk = 0; */ int ufs_lookup(ap) - struct vop_lookup_args /* { + struct vnop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; + vfs_context_t a_context } */ *ap; { register struct vnode *vdp; /* vnode for directory being searched */ @@ -145,27 +148,28 @@ ufs_lookup(ap) struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ - int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int namlen, error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; - struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; - struct proc *p = cnp->cn_proc; + vfs_context_t context = ap->a_context; + kauth_cred_t cred; #if REV_ENDIAN_FS int rev_endian=0; #endif /* REV_ENDIAN_FS */ + cred = vfs_context_ucred(context); bp = NULL; slotoffset = -1; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); - lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + #if REV_ENDIAN_FS rev_endian=(vdp->v_mount->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ @@ -175,11 +179,6 @@ ufs_lookup(ap) */ if ((dp->i_mode & IFMT) != IFDIR) return (ENOTDIR); - if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) - return (error); - if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && - (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) - return (EROFS); /* * We now have a segment name to search for, and a directory to search. @@ -189,50 +188,10 @@ ufs_lookup(ap) * we are looking for is known already. */ if (error = cache_lookup(vdp, vpp, cnp)) { - int vpid; /* capability number of vnode */ - if (error == ENOENT) return (error); - /* - * Get the next vnode in the path. - * See comment below starting `Step through' for - * an explaination of the locking protocol. - */ - pdp = vdp; - dp = VTOI(*vpp); - vdp = *vpp; - vpid = vdp->v_id; - if (pdp == vdp) { /* lookup on "." */ - VREF(vdp); - error = 0; - } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp, 0, p); - error = vget(vdp, LK_EXCLUSIVE, p); - if (!error && lockparent && (flags & ISLASTCN)) - error = vn_lock(pdp, LK_EXCLUSIVE, p); - } else { - error = vget(vdp, LK_EXCLUSIVE, p); - if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); - } - /* - * Check that the capability number did not change - * while we were waiting for the lock. - */ - if (!error) { - if (vpid == vdp->v_id) - return (0); - vput(vdp); - if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); - } - if (error = vn_lock(pdp, LK_EXCLUSIVE, p)) - return (error); - vdp = pdp; - dp = VTOI(pdp); - *vpp = NULL; + return (0); } - /* * Suppress search for slots unless creating * file and at end of pathname, in which case @@ -247,7 +206,6 @@ ufs_lookup(ap) slotneeded = (sizeof(struct direct) - MAXNAMLEN + cnp->cn_namelen + 3) &~ 3; } - /* * If there is cached information on a previous search of * this directory, pick up where we last left off. @@ -259,7 +217,7 @@ ufs_lookup(ap) * profiling time and hence has been removed in the interest * of simplicity. */ - bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_vfsstat.f_iosize - 1; if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { entryoffsetinblock = 0; @@ -268,8 +226,8 @@ ufs_lookup(ap) } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) - return (error); + (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) + goto out; numdirpasses = 2; nchstats.ncs_2passes++; } @@ -288,11 +246,10 @@ searchloop: if (rev_endian) byte_swap_dir_block_out(bp); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); } - if (error = - VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) - return (error); + if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) + goto out; entryoffsetinblock = 0; } /* @@ -311,7 +268,7 @@ searchloop: * directory. Complete checks can be run by patching * "dirchk" to be true. */ - ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); + ep = (struct direct *)((char *)buf_dataptr(bp) + entryoffsetinblock); if (ep->d_reclen == 0 || dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) { int i; @@ -389,7 +346,7 @@ searchloop: if (rev_endian) byte_swap_dir_block_out(bp); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); goto found; } } @@ -415,7 +372,7 @@ notfound: if (rev_endian) byte_swap_dir_block_out(bp); #endif /* REV_ENDIAN_FS */ - brelse(bp); + buf_brelse(bp); } /* * If creating, and at end of pathname and current @@ -427,12 +384,6 @@ notfound: (ap->a_cnp->cn_flags & DOWHITEOUT) && (ap->a_cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_nlink != 0) { - /* - * Access for write is interpreted as allowing - * creation of files in the directory. - */ - if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) - return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, @@ -467,23 +418,20 @@ notfound: * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. - * The pathname buffer is saved so that the name - * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ - cnp->cn_flags |= SAVENAME; - if (!lockparent) - VOP_UNLOCK(vdp, 0, p); - return (EJUSTRETURN); + error = EJUSTRETURN; + goto out; } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(vdp, *vpp, cnp); - return (ENOENT); + error = ENOENT; + goto out; found: if (numdirpasses == 2) @@ -514,11 +462,6 @@ found: * on and lock the inode, being careful with ".". */ if (nameiop == DELETE && (flags & ISLASTCN)) { - /* - * Write access to directory required to delete files. - */ - if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) - return (error); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there @@ -530,30 +473,15 @@ found: else dp->i_count = dp->i_offset - prevoff; if (dp->i_number == dp->i_ino) { - VREF(vdp); + vnode_get(vdp); *vpp = vdp; - return (0); - } - if (error = VFS_VGET(vdp->v_mount, (void *)dp->i_ino, &tdp)) - return (error); - /* - * If directory is "sticky", then user must own - * the directory, or the file in it, else she - * may not delete it (unless she's root). This - * implements append-only directories. - */ - if ((dp->i_mode & ISVTX) && - cred->cr_uid != 0 && - cred->cr_uid != dp->i_uid && - tdp->v_type != VLNK && - VTOI(tdp)->i_uid != cred->cr_uid) { - vput(tdp); - return (EPERM); + error = 0; + goto out; } + if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) + goto out; *vpp = tdp; - if (!lockparent) - VOP_UNLOCK(vdp, 0, p); - return (0); + goto out; } /* @@ -563,25 +491,23 @@ found: * regular file, or empty directory. */ if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { - if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) - return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ - if (dp->i_number == dp->i_ino) - return (EISDIR); - if (error = VFS_VGET(vdp->v_mount, (void *)dp->i_ino, &tdp)) - return (error); + if (dp->i_number == dp->i_ino) { + error =EISDIR; + goto out; + } + if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) + goto out; *vpp = tdp; - cnp->cn_flags |= SAVENAME; - if (!lockparent) - VOP_UNLOCK(vdp, 0, p); - return (0); + + goto out; } /* - * Step through the translation in the name. We do not `vput' the + * Step through the translation in the name. We do not `vnode_put' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking @@ -601,48 +527,36 @@ found: */ pdp = vdp; if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ - if (error = VFS_VGET(vdp->v_mount, (void *)dp->i_ino, &tdp)) { - vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); - return (error); - } - if (lockparent && (flags & ISLASTCN) && - (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { - vput(tdp); - return (error); + if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) { + goto out; } *vpp = tdp; } else if (dp->i_number == dp->i_ino) { - VREF(vdp); /* we want ourself, ie "." */ + vnode_get(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { - if (error = VFS_VGET(vdp->v_mount, (void *)dp->i_ino, &tdp)) - return (error); - if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp, 0, p); + if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) + goto out; *vpp = tdp; } - /* - * Insert name into cache if appropriate. - */ - if (cnp->cn_flags & MAKEENTRY) - cache_enter(vdp, *vpp, cnp); - return (0); + error = 0; +out: + return (error); } void ufs_dirbad(ip, offset, how) struct inode *ip; doff_t offset; - char *how; + const char *how; { struct mount *mp; mp = ITOV(ip)->v_mount; (void)printf("%s: bad dir ino %d at offset %d: %s\n", - mp->mnt_stat.f_mntonname, ip->i_number, offset, how); - if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) + mp->mnt_vfsstat.f_mntonname, ip->i_number, offset, how); + if ((mp->mnt_vfsstat.f_flags & MNT_RDONLY) == 0) panic("bad dir"); } @@ -710,10 +624,6 @@ ufs_direnter(ip, dvp, cnp) register struct inode *dp; struct direct newdir; -#if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("direnter: missing name"); -#endif dp = VTOI(dvp); newdir.d_ino = ip->i_number; newdir.d_namlen = cnp->cn_namelen; @@ -728,28 +638,25 @@ ufs_direnter(ip, dvp, cnp) newdir.d_type = tmp; } # endif } - return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc)); + return (ufs_direnter2(dvp, &newdir, cnp->cn_context)); } /* * Common entry point for directory entry removal used by ufs_direnter * and ufs_whiteout */ -ufs_direnter2(dvp, dirp, cr, p) - struct vnode *dvp; - struct direct *dirp; - struct ucred *cr; - struct proc *p; +int +ufs_direnter2(struct vnode *dvp, struct direct *dirp, vfs_context_t ctx) { int newentrysize; struct inode *dp; struct buf *bp; - struct iovec aiov; - struct uio auio; + uio_t auio; u_int dsize; struct direct *ep, *nep; int error, loc, spacefree; char *dirbuf; + char uio_buf[ UIO_SIZEOF(1) ]; #if REV_ENDIAN_FS struct mount *mp=dvp->v_mount; int rev_endian=(mp->mnt_flag & MNT_REVEND); @@ -767,19 +674,14 @@ ufs_direnter2(dvp, dirp, cr, p) */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter2: newblk"); - auio.uio_offset = dp->i_offset; dirp->d_reclen = DIRBLKSIZ; - auio.uio_resid = newentrysize; - aiov.iov_len = newentrysize; - aiov.iov_base = (caddr_t)dirp; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dvp, &auio, IO_SYNC, cr); + auio = uio_createwithbuffer(1, dp->i_offset, UIO_SYSSPACE, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(dirp), newentrysize); + + error = ffs_write_internal(dvp, auio, IO_SYNC, vfs_context_ucred(ctx)); if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + VFSTOUFS(dvp->v_mount)->um_mountp->mnt_vfsstat.f_bsize) /* XXX should grow with balloc() */ panic("ufs_direnter2: frag size"); else if (!error) { @@ -810,7 +712,7 @@ ufs_direnter2(dvp, dirp, cr, p) /* * Get the block containing the space for the new directory entry. */ - if (error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) + if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) return (error); /* * Find space for the new entry. In the simple case, the entry at @@ -861,13 +763,14 @@ ufs_direnter2(dvp, dirp, cr, p) #endif /* REV_ENDIAN_FS */ if (mp->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) - error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); + error = ffs_truncate_internal(dvp, (off_t)dp->i_endoff, IO_SYNC, vfs_context_ucred(ctx)); + return (error); } @@ -903,8 +806,7 @@ ufs_dirremove(dvp, cnp) /* * Whiteout entry: set d_ino to WINO. */ - if (error = - VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) + if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; @@ -914,9 +816,9 @@ ufs_dirremove(dvp, cnp) #endif /* REV_ENDIAN_FS */ if (mp->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); @@ -926,8 +828,7 @@ ufs_dirremove(dvp, cnp) /* * First entry in block: set d_ino to zero. */ - if (error = - VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) + if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) return (error); ep->d_ino = 0; #if REV_ENDIAN_FS @@ -936,9 +837,9 @@ ufs_dirremove(dvp, cnp) #endif /* REV_ENDIAN_FS */ if (mp->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); @@ -946,7 +847,7 @@ ufs_dirremove(dvp, cnp) /* * Collapse new free space into previous entry. */ - if (error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), + if (error = ffs_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) return (error); ep->d_reclen += dp->i_reclen; @@ -956,11 +857,12 @@ ufs_dirremove(dvp, cnp) #endif /* REV_ENDIAN_FS */ if (mp->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); } @@ -979,7 +881,7 @@ ufs_dirrewrite(dp, ip, cnp) struct vnode *vdp = ITOV(dp); int error; - if (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) + if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) return (error); ep->d_ino = ip->i_number; if (vdp->v_mount->mnt_maxsymlinklen > 0) @@ -990,9 +892,9 @@ ufs_dirrewrite(dp, ip, cnp) #endif /* REV_ENDIAN_FS */ if (vdp->v_mount->mnt_flag & MNT_ASYNC) { error = 0; - bdwrite(bp); + buf_bdwrite(bp); } else { - error = VOP_BWRITE(bp); + error = VNOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); @@ -1008,10 +910,7 @@ ufs_dirrewrite(dp, ip, cnp) * NB: does not handle corrupted directories. */ int -ufs_dirempty(ip, parentino, cred) - register struct inode *ip; - ino_t parentino; - struct ucred *cred; +ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) { register off_t off; struct dirtemplate dbuf; @@ -1027,7 +926,7 @@ ufs_dirempty(ip, parentino, cred) for (off = 0; off < ip->i_size; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, - UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); + UIO_SYSSPACE32, IO_NODELOCKED, cred, &count, (struct proc *)0); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. @@ -1074,15 +973,15 @@ ufs_dirempty(ip, parentino, cred) /* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. - * The target is always vput before returning. */ int ufs_checkpath(source, target, cred) struct inode *source, *target; - struct ucred *cred; + kauth_cred_t cred; { struct vnode *vp; int error, rootino, namlen; + int need_put = 0; struct dirtemplate dirbuf; vp = ITOV(target); @@ -1101,7 +1000,7 @@ ufs_checkpath(source, target, cred) break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE32, IO_NODELOCKED, cred, (int *)0, (struct proc *)0); if (error != 0) break; @@ -1125,17 +1024,22 @@ ufs_checkpath(source, target, cred) } if (dirbuf.dotdot_ino == rootino) break; - vput(vp); - if (error = VFS_VGET(vp->v_mount, (void *)dirbuf.dotdot_ino, &vp)) { + + if (need_put) + vnode_put(vp); + + if (error = VFS_VGET(vp->v_mount, (ino64_t)dirbuf.dotdot_ino, &vp, NULL)) { /* XXX need context */ vp = NULL; break; } + need_put = 1; } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); - if (vp != NULL) - vput(vp); + if (need_put && vp) + vnode_put(vp); + return (error); } diff --git a/bsd/ufs/ufs/ufs_quota.c b/bsd/ufs/ufs/ufs_quota.c index e17245068..5ae7280be 100644 --- a/bsd/ufs/ufs/ufs_quota.c +++ b/bsd/ufs/ufs/ufs_quota.c @@ -62,9 +62,10 @@ #include #include #include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -101,7 +102,7 @@ getinoquota(ip) */ if (ip->i_dquot[USRQUOTA] == NODQUOT && (error = - dqget(vp, ip->i_uid, &ump->um_qfiles[USRQUOTA], USRQUOTA, &ip->i_dquot[USRQUOTA])) && + dqget(ip->i_uid, &ump->um_qfiles[USRQUOTA], USRQUOTA, &ip->i_dquot[USRQUOTA])) && error != EINVAL) return (error); /* @@ -110,7 +111,7 @@ getinoquota(ip) */ if (ip->i_dquot[GRPQUOTA] == NODQUOT && (error = - dqget(vp, ip->i_gid, &ump->um_qfiles[GRPQUOTA], GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && + dqget(ip->i_gid, &ump->um_qfiles[GRPQUOTA], GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && error != EINVAL) return (error); return (0); @@ -120,11 +121,7 @@ getinoquota(ip) * Update disk usage, and take corrective action. */ int -chkdq(ip, change, cred, flags) - register struct inode *ip; - int64_t change; - struct ucred *cred; - int flags; +chkdq(struct inode *ip, int64_t change, kauth_cred_t cred, int flags) { register struct dquot *dq; register int i; @@ -142,10 +139,8 @@ chkdq(ip, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + ncurbytes = dq->dq_curbytes + change; if (ncurbytes >= 0) dq->dq_curbytes = ncurbytes; @@ -153,29 +148,32 @@ chkdq(ip, change, cred, flags) dq->dq_curbytes = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } +#warning "hack for no cred passed to chkdq()" p = current_proc(); if (cred == NOCRED) - cred = kernproc->p_ucred; - if ((flags & FORCE) == 0 && ((cred->cr_uid != 0) || (p->p_flag & P_FORCEQUOTA))) { + cred = proc_ucred(kernproc); + if ((flags & FORCE) == 0 && (suser(cred, NULL) || (proc_forcequota(p)))) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - if (error = chkdqchg(ip, change, cred, i)) + if ( (error = chkdqchg(ip, change, cred, i)) ) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + dq->dq_curbytes += change; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } @@ -185,28 +183,29 @@ chkdq(ip, change, cred, flags) * Issue an error message if appropriate. */ int -chkdqchg(ip, change, cred, type) - struct inode *ip; - int64_t change; - struct ucred *cred; - int type; +chkdqchg(struct inode *ip, int64_t change, kauth_cred_t cred, int type) { register struct dquot *dq = ip->i_dquot[type]; - u_int64_t ncurbytes = dq->dq_curbytes + change; + u_int64_t ncurbytes; + dqlock(dq); + + ncurbytes = dq->dq_curbytes + change; /* * If user would exceed their hard limit, disallow space allocation. */ if (ncurbytes >= dq->dq_bhardlimit && dq->dq_bhardlimit) { if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == cred->cr_uid) { + ip->i_uid == kauth_cred_getuid(cred)) { #if 1 printf("\n%s: write failed, %s disk limit reached\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type]); #endif dq->dq_flags |= DQ_BLKS; } + dqunlock(dq); + return (EDQUOT); } /* @@ -214,31 +213,40 @@ chkdqchg(ip, change, cred, type) * allocation. Reset time limit as they cross their soft limit. */ if (ncurbytes >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { + struct timeval tv; + + microtime(&tv); if (dq->dq_curbytes < dq->dq_bsoftlimit) { - dq->dq_btime = time.tv_sec + + dq->dq_btime = tv.tv_sec + VFSTOUFS(ITOV(ip)->v_mount)->um_qfiles[type].qf_btime; #if 1 - if (ip->i_uid == cred->cr_uid) + if (ip->i_uid == kauth_cred_getuid(cred)) printf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type], "disk quota exceeded"); #endif + dqunlock(dq); + return (0); } - if (time.tv_sec > dq->dq_btime) { + if (tv.tv_sec > dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == cred->cr_uid) { + ip->i_uid == kauth_cred_getuid(cred)) { #if 1 printf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type], "disk quota exceeded for too long"); #endif dq->dq_flags |= DQ_BLKS; } + dqunlock(dq); + return (EDQUOT); } } + dqunlock(dq); + return (0); } @@ -246,11 +254,7 @@ chkdqchg(ip, change, cred, type) * Check the inode limit, applying corrective action. */ int -chkiq(ip, change, cred, flags) - register struct inode *ip; - long change; - struct ucred *cred; - int flags; +chkiq(struct inode *ip, long change, kauth_cred_t cred, int flags) { register struct dquot *dq; register int i; @@ -267,10 +271,8 @@ chkiq(ip, change, cred, flags) for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + ncurinodes = dq->dq_curinodes + change; if (ncurinodes >= 0) dq->dq_curinodes = ncurinodes; @@ -278,29 +280,32 @@ chkiq(ip, change, cred, flags) dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } +#warning "hack for no cred passed to chkiq()" p = current_proc(); if (cred == NOCRED) - cred = kernproc->p_ucred; - if ((flags & FORCE) == 0 && ((cred->cr_uid != 0) || (p->p_flag & P_FORCEQUOTA))) { + cred = proc_ucred(kernproc); + if ((flags & FORCE) == 0 && (suser(cred, NULL) || (proc_forcequota(p)))) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - if (error = chkiqchg(ip, change, cred, i)) + if ( (error = chkiqchg(ip, change, cred, i)) ) return (error); } } for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + dq->dq_curinodes += change; dq->dq_flags |= DQ_MOD; + + dqunlock(dq); } return (0); } @@ -310,28 +315,29 @@ chkiq(ip, change, cred, flags) * Issue an error message if appropriate. */ int -chkiqchg(ip, change, cred, type) - struct inode *ip; - long change; - struct ucred *cred; - int type; +chkiqchg(struct inode *ip, long change, kauth_cred_t cred, int type) { register struct dquot *dq = ip->i_dquot[type]; - long ncurinodes = dq->dq_curinodes + change; + long ncurinodes; + + dqlock(dq); + ncurinodes = dq->dq_curinodes + change; /* * If user would exceed their hard limit, disallow inode allocation. */ if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == cred->cr_uid) { + ip->i_uid == kauth_cred_getuid(cred)) { #if 1 printf("\n%s: write failed, %s inode limit reached\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type]); #endif dq->dq_flags |= DQ_INODS; } + dqunlock(dq); + return (EDQUOT); } /* @@ -339,31 +345,40 @@ chkiqchg(ip, change, cred, type) * allocation. Reset time limit as they cross their soft limit. */ if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { + struct timeval tv; + + microtime(&tv); if (dq->dq_curinodes < dq->dq_isoftlimit) { - dq->dq_itime = time.tv_sec + + dq->dq_itime = tv.tv_sec + VFSTOUFS(ITOV(ip)->v_mount)->um_qfiles[type].qf_itime; #if 1 - if (ip->i_uid == cred->cr_uid) + if (ip->i_uid == kauth_cred_getuid(cred)) printf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type], "inode quota exceeded"); #endif + dqunlock(dq); + return (0); } - if (time.tv_sec > dq->dq_itime) { + if (tv.tv_sec > dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == cred->cr_uid) { + ip->i_uid == kauth_cred_getuid(cred)) { #if 1 printf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_stat.f_mntonname, + ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, quotatypes[type], "inode quota exceeded for too long"); #endif dq->dq_flags |= DQ_INODS; } + dqunlock(dq); + return (EDQUOT); } } + dqunlock(dq); + return (0); } @@ -380,8 +395,7 @@ chkdquot(ip) register int i; for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_qfiles[i].qf_vp == NULLVP || - (ump->um_qfiles[i].qf_qflags & (QTF_OPENING|QTF_CLOSING))) + if (ump->um_qfiles[i].qf_vp == NULLVP) continue; if (ip->i_dquot[i] == NODQUOT) { vprint("chkdquot: missing dquot", ITOV(ip)); @@ -395,140 +409,186 @@ chkdquot(ip) * Code to process quotactl commands. */ + +struct ufs_quotaon_cargs { + int error; +}; + + +static int +ufs_quotaon_callback(struct vnode *vp, void *cargs) +{ + struct ufs_quotaon_cargs *args; + + args = (struct ufs_quotaon_cargs *)cargs; + + if ( (args->error = getinoquota(VTOI(vp))) ) + return (VNODE_RETURNED_DONE); + + return (VNODE_RETURNED); +} + + /* * Q_QUOTAON - set up a quota file for a particular file system. */ int -quotaon(p, mp, type, fname, segflg) - struct proc *p; +quotaon(context, mp, type, fnamep) + vfs_context_t context; struct mount *mp; register int type; - caddr_t fname; - enum uio_seg segflg; + caddr_t fnamep; { struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *vp, **vpp; - struct vnode *nextvp; - struct dquot *dq; - int error; - struct nameidata nd; + struct quotafile *qfp; + struct vnode *vp; + int error = 0; + struct ufs_quotaon_cargs args; - vpp = &ump->um_qfiles[type].qf_vp; - NDINIT(&nd, LOOKUP, FOLLOW, segflg, fname, p); - if (error = vn_open(&nd, FREAD|FWRITE, 0)) - return (error); - vp = nd.ni_vp; - VOP_UNLOCK(vp, 0, p); - if (vp->v_type != VREG) { - (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); - return (EACCES); + qfp = &ump->um_qfiles[type]; + + if ( (qf_get(qfp, QTF_OPENING)) ) + return (0); + + error = vnode_open(fnamep, FREAD|FWRITE, 0, 0, &vp, NULL); + if (error) { + goto out; } - if (*vpp != vp) - quotaoff(p, mp, type); - ump->um_qfiles[type].qf_qflags |= QTF_OPENING; - mp->mnt_flag |= MNT_QUOTA; - vp->v_flag |= VNOFLUSH; - *vpp = vp; + if (!vnode_isreg(vp)) { + (void) vnode_close(vp, FREAD|FWRITE, NULL); + error = EACCES; + goto out; + } + vfs_setflags(mp, (uint64_t)((unsigned int)MNT_QUOTA)); + vnode_setnoflush(vp); /* * Save the credential of the process that turned on quotas. */ - crhold(p->p_ucred); - ump->um_qfiles[type].qf_cred = p->p_ucred; - /* Finish initializing the quota file */ - if (error = dqfileopen(&ump->um_qfiles[type], type)) - goto exit; -#if 0 - ump->um_qfiles[type].qf_btime = MAX_DQ_TIME; - ump->um_qfiles[type].qf_itime = MAX_IQ_TIME; - if (dqget(NULLVP, 0, &ump->um_qfiles[type], type, &dq) == 0) { - if (dq->dq_btime > 0) - ump->um_qfiles[type].qf_btime = dq->dq_btime; - if (dq->dq_itime > 0) - ump->um_qfiles[type].qf_itime = dq->dq_itime; - dqrele(NULLVP, dq); - } -#endif + qfp->qf_vp = vp; + qfp->qf_cred = vfs_context_ucred(context); + kauth_cred_ref(qfp->qf_cred); + + /* + * Finish initializing the quota file + */ + if ( (error = dqfileopen(&ump->um_qfiles[type], type)) ) { + (void) vnode_close(vp, FREAD|FWRITE, NULL); + + kauth_cred_rele(qfp->qf_cred); + qfp->qf_cred = NOCRED; + qfp->qf_vp = NULLVP; + goto out; + } + qf_put(qfp, QTF_OPENING); + /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. + * + * ufs_quota_callback will be called for each vnode open for + * 'write' (VNODE_WRITEABLE) hung off of this mount point + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback */ -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) - continue; - if (vget(vp, LK_EXCLUSIVE, p)) - goto again; - if (error = getinoquota(VTOI(vp))) { - vput(vp); - break; - } - vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } -exit: - ump->um_qfiles[type].qf_qflags &= ~QTF_OPENING; + args.error = 0; + + vnode_iterate(mp, VNODE_WRITEABLE | VNODE_WAIT, ufs_quotaon_callback, (void *)&args); + + error = args.error; + if (error) - quotaoff(p, mp, type); + quotaoff(mp, type); return (error); +out: + qf_put(qfp, QTF_OPENING); + + return (error); +} + + + +struct ufs_quotaoff_cargs { + int type; +}; + +static int +ufs_quotaoff_callback(struct vnode *vp, void *cargs) +{ + struct ufs_quotaoff_cargs *args; + struct inode *ip; + struct dquot *dq; + + args = (struct ufs_quotaoff_cargs *)cargs; + + ip = VTOI(vp); + + dq = ip->i_dquot[args->type]; + ip->i_dquot[args->type] = NODQUOT; + + dqrele(dq); + + return (VNODE_RETURNED); } /* * Q_QUOTAOFF - turn off disk quotas for a filesystem. */ int -quotaoff(p, mp, type) - struct proc *p; - struct mount *mp; - register int type; +quotaoff(struct mount *mp, register int type) { - struct vnode *vp; - struct vnode *qvp, *nextvp; + struct vnode *qvp; struct ufsmount *ump = VFSTOUFS(mp); - struct dquot *dq; - struct inode *ip; - int error; - struct ucred *cred; + struct quotafile *qfp; + int error = 0; + kauth_cred_t cred; + struct ufs_quotaoff_cargs args; + + qfp = &ump->um_qfiles[type]; + + if ( (qf_get(qfp, QTF_CLOSING)) ) + return (0); + qvp = qfp->qf_vp; + + /* + * Sync out any orpaned dirty dquot entries. + */ + dqsync_orphans(qfp); - if ((qvp = ump->um_qfiles[type].qf_vp) == NULLVP) - return (0); - ump->um_qfiles[type].qf_qflags |= QTF_CLOSING; /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. + * + * ufs_quotaoff_callback will be called for each vnode + * hung off of this mount point + * the vnode will be in an 'unbusy' state (VNODE_WAIT) and + * properly referenced and unreferenced around the callback */ -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (vget(vp, LK_EXCLUSIVE, p)) - goto again; - ip = VTOI(vp); - dq = ip->i_dquot[type]; - ip->i_dquot[type] = NODQUOT; - dqrele(vp, dq); - vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } + args.type = type; + + vnode_iterate(mp, VNODE_WAIT, ufs_quotaoff_callback, (void *)&args); + dqflush(qvp); /* Finish tearing down the quota file */ - dqfileclose(&ump->um_qfiles[type], type); - qvp->v_flag &= ~VNOFLUSH; - error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p); - ump->um_qfiles[type].qf_vp = NULLVP; - cred = ump->um_qfiles[type].qf_cred; + dqfileclose(qfp, type); + + vnode_clearnoflush(qvp); + error = vnode_close(qvp, FREAD|FWRITE, NULL); + + qfp->qf_vp = NULLVP; + cred = qfp->qf_cred; if (cred != NOCRED) { - ump->um_qfiles[type].qf_cred = NOCRED; - crfree(cred); + qfp->qf_cred = NOCRED; + kauth_cred_rele(cred); } - ump->um_qfiles[type].qf_qflags &= ~QTF_CLOSING; for (type = 0; type < MAXQUOTAS; type++) if (ump->um_qfiles[type].qf_vp != NULLVP) break; if (type == MAXQUOTAS) mp->mnt_flag &= ~MNT_QUOTA; + + qf_put(qfp, QTF_CLOSING); + return (error); } @@ -536,19 +596,24 @@ again: * Q_GETQUOTA - return current values in a dqblk structure. */ int -getquota(mp, id, type, addr) +getquota(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { struct dquot *dq; int error; - if (error = dqget(NULLVP, id, &VFSTOUFS(mp)->um_qfiles[type], type, &dq)) + if ( (error = dqget(id, &VFSTOUFS(mp)->um_qfiles[type], type, &dq)) ) return (error); - error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk)); - dqrele(NULLVP, dq); + dqlock(dq); + + bcopy(&dq->dq_dqb, datap, sizeof(dq->dq_dqb)); + + dqunlock(dq); + dqrele(dq); + return (error); } @@ -556,47 +621,47 @@ getquota(mp, id, type, addr) * Q_SETQUOTA - assign an entire dqblk structure. */ int -setquota(mp, id, type, addr) +setquota(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { - register struct dquot *dq; - struct dquot *ndq; + struct dquot *dq; struct ufsmount *ump = VFSTOUFS(mp); - struct dqblk newlim; + struct dqblk * newlimp = (struct dqblk *) datap; + struct timeval tv; int error; - if (error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk))) - return (error); - if (error = dqget(NULLVP, id, &ump->um_qfiles[type], type, &ndq)) + error = dqget(id, &ump->um_qfiles[type], type, &dq); + if (error) return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were * under it, but now have a soft limit and are over it. */ - newlim.dqb_curbytes = dq->dq_curbytes; - newlim.dqb_curinodes = dq->dq_curinodes; + newlimp->dqb_curbytes = dq->dq_curbytes; + newlimp->dqb_curinodes = dq->dq_curinodes; if (dq->dq_id != 0) { - newlim.dqb_btime = dq->dq_btime; - newlim.dqb_itime = dq->dq_itime; + newlimp->dqb_btime = dq->dq_btime; + newlimp->dqb_itime = dq->dq_itime; + } + if (newlimp->dqb_bsoftlimit && + dq->dq_curbytes >= newlimp->dqb_bsoftlimit && + (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) { + microtime(&tv); + newlimp->dqb_btime = tv.tv_sec + ump->um_qfiles[type].qf_btime; } - if (newlim.dqb_bsoftlimit && - dq->dq_curbytes >= newlim.dqb_bsoftlimit && - (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) - newlim.dqb_btime = time.tv_sec + ump->um_qfiles[type].qf_btime; - if (newlim.dqb_isoftlimit && - dq->dq_curinodes >= newlim.dqb_isoftlimit && - (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) - newlim.dqb_itime = time.tv_sec + ump->um_qfiles[type].qf_itime; - dq->dq_dqb = newlim; + if (newlimp->dqb_isoftlimit && + dq->dq_curinodes >= newlimp->dqb_isoftlimit && + (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) { + microtime(&tv); + newlimp->dqb_itime = tv.tv_sec + ump->um_qfiles[type].qf_itime; + } + bcopy(newlimp, &dq->dq_dqb, sizeof(dq->dq_dqb)); if (dq->dq_curbytes < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) @@ -607,7 +672,10 @@ setquota(mp, id, type, addr) else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); + + dqunlock(dq); + dqrele(dq); + return (0); } @@ -615,48 +683,71 @@ setquota(mp, id, type, addr) * Q_SETUSE - set current inode and byte usage. */ int -setuse(mp, id, type, addr) +setuse(mp, id, type, datap) struct mount *mp; u_long id; int type; - caddr_t addr; + caddr_t datap; { - register struct dquot *dq; + struct dquot *dq; struct ufsmount *ump = VFSTOUFS(mp); - struct dquot *ndq; - struct dqblk usage; + struct timeval tv; int error; - - if (error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk))) - return (error); - if (error = dqget(NULLVP, id, &ump->um_qfiles[type], type, &ndq)) + struct dqblk *quotablkp = (struct dqblk *) datap; + + error = dqget(id, &ump->um_qfiles[type], type, &dq); + if (error) return (error); - dq = ndq; - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+1); - } + dqlock(dq); + /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dq->dq_bsoftlimit && dq->dq_curbytes < dq->dq_bsoftlimit && - usage.dqb_curbytes >= dq->dq_bsoftlimit) - dq->dq_btime = time.tv_sec + ump->um_qfiles[type].qf_btime; + quotablkp->dqb_curbytes >= dq->dq_bsoftlimit) { + microtime(&tv); + dq->dq_btime = tv.tv_sec + ump->um_qfiles[type].qf_btime; + } if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && - usage.dqb_curinodes >= dq->dq_isoftlimit) - dq->dq_itime = time.tv_sec + ump->um_qfiles[type].qf_itime; - dq->dq_curbytes = usage.dqb_curbytes; - dq->dq_curinodes = usage.dqb_curinodes; + quotablkp->dqb_curinodes >= dq->dq_isoftlimit) { + microtime(&tv); + dq->dq_itime = tv.tv_sec + ump->um_qfiles[type].qf_itime; + } + dq->dq_curbytes = quotablkp->dqb_curbytes; + dq->dq_curinodes = quotablkp->dqb_curinodes; if (dq->dq_curbytes < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_BLKS; if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; - dqrele(NULLVP, dq); + + dqunlock(dq); + dqrele(dq); + return (0); } + + +static int +ufs_qsync_callback(struct vnode *vp, __unused void *cargs) +{ + struct inode *ip; + struct dquot *dq; + int i; + + ip = VTOI(vp); + + for (i = 0; i < MAXQUOTAS; i++) { + dq = ip->i_dquot[i]; + if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) + dqsync(dq); + } + return (VNODE_RETURNED); +} + + /* * Q_SYNC - sync quota files to disk. */ @@ -665,10 +756,7 @@ qsync(mp) struct mount *mp; { struct ufsmount *ump = VFSTOUFS(mp); - struct proc *p = current_proc(); /* XXX */ - struct vnode *vp, *nextvp; - struct dquot *dq; - int i, error; + int i; /* * Check if the mount point has any quotas. @@ -682,33 +770,14 @@ qsync(mp) /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. + * + * ufs_qsync_callback will be called for each vnode + * hung off of this mount point + * the vnode will be + * properly referenced and unreferenced around the callback */ - simple_lock(&mntvnode_slock); -again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - if (vp->v_mount != mp) - goto again; - nextvp = vp->v_mntvnodes.le_next; - simple_lock(&vp->v_interlock); - simple_unlock(&mntvnode_slock); - error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); - if (error) { - simple_lock(&mntvnode_slock); - if (error == ENOENT) - goto again; - continue; - } - for (i = 0; i < MAXQUOTAS; i++) { - dq = VTOI(vp)->i_dquot[i]; - if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) - dqsync(vp, dq); - } - vput(vp); - simple_lock(&mntvnode_slock); - if (vp->v_mntvnodes.le_next != nextvp) - goto again; - } - simple_unlock(&mntvnode_slock); + vnode_iterate(mp, 0, ufs_qsync_callback, (void *)NULL); + return (0); } @@ -716,10 +785,10 @@ again: * Q_QUOTASTAT - get quota on/off status */ int -quotastat(mp, type, addr) +quotastat(mp, type, datap) struct mount *mp; register int type; - caddr_t addr; + caddr_t datap; { struct ufsmount *ump = VFSTOUFS(mp); int error = 0; @@ -729,8 +798,7 @@ quotastat(mp, type, addr) qstat = 1; /* quotas are on for this type */ else qstat = 0; /* quotas are off for this type */ - - error = copyout ((caddr_t)&qstat, addr, sizeof(qstat)); + *((int *)datap) = qstat; return (error); } diff --git a/bsd/ufs/ufs/ufs_readwrite.c b/bsd/ufs/ufs/ufs_readwrite.c index fc4a4cafe..9aa3b92cd 100644 --- a/bsd/ufs/ufs/ufs_readwrite.c +++ b/bsd/ufs/ufs/ufs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,45 +55,49 @@ * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 */ +#include +#include + + #define BLKSIZE(a, b, c) blksize(a, b, c) #define FS struct fs #define I_FS i_fs -#define PGRD ffs_pgrd -#define PGRD_S "ffs_pgrd" -#define PGWR ffs_pgwr -#define PGWR_S "ffs_pgwr" + + /* * Vnode op for reading. */ /* ARGSUSED */ ffs_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp; - register struct inode *ip; - register struct uio *uio; - register FS *fs; - struct buf *bp = (struct buf *)0; + return(ffs_read_internal(ap->a_vp, ap->a_uio, ap->a_ioflag)); +} + + +int +ffs_read_internal(vnode_t vp, struct uio *uio, int ioflag) +{ + struct inode *ip; + FS *fs; + buf_t bp = (struct buf *)0; ufs_daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; - int devBlockSize=0; int error; u_short mode; #if REV_ENDIAN_FS int rev_endian=0; #endif /* REV_ENDIAN_FS */ - vp = ap->a_vp; ip = VTOI(vp); mode = ip->i_mode; - uio = ap->a_uio; #if REV_ENDIAN_FS rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND); @@ -115,14 +119,13 @@ ffs_read(ap) if (uio->uio_offset > fs->fs_maxfilesize) return (EFBIG); - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - - if (UBCISVALID(vp)) { - error = cluster_read(vp, uio, (off_t)ip->i_size, - devBlockSize, 0); + if (UBCINFOEXISTS(vp)) { + error = cluster_read(vp, uio, (off_t)ip->i_size, 0); } else { - for (error = 0, bp = NULL; uio->uio_resid > 0; + for (error = 0, bp = NULL; uio_resid(uio) > 0; bp = NULL) { + char *buf_data; + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); @@ -130,46 +133,48 @@ ffs_read(ap) size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; + // LP64todo - fix this + if (uio_resid(uio) < xfersize) + xfersize = uio_resid(uio); if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) - error = bread(vp, lbn, size, NOCRED, &bp); - else if (lbn - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) { + error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), size, NOCRED, &bp); + else if (lbn - 1 == ip->i_lastr && !(vp->v_flag & VRAOFF)) { int nextsize = BLKSIZE(fs, ip, nextlbn); - error = breadn(vp, lbn, - size, &nextlbn, &nextsize, 1, NOCRED, &bp); + error = (int)buf_breadn(vp, (daddr64_t)((unsigned)lbn), + size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else - error = bread(vp, lbn, size, NOCRED, &bp); + error = (int)buf_bread(vp, lbn, size, NOCRED, &bp); if (error) break; - vp->v_lastr = lbn; + ip->i_lastr = lbn; /* - * We should only get non-zero b_resid when an I/O error + * We should only get non-zero buffer resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ - size -= bp->b_resid; + size -= buf_resid(bp); if (size < xfersize) { if (size == 0) break; xfersize = size; } + buf_data = (char *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_block_in((char *)bp->b_data + blkoffset, xfersize); + byte_swap_dir_block_in(buf_data + blkoffset, xfersize); } #endif /* REV_ENDIAN_FS */ if (error = - uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio)) { + uiomove(buf_data + blkoffset, (int)xfersize, uio)) { #if REV_ENDIAN_FS if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_block_in((char *)bp->b_data + blkoffset, xfersize); + byte_swap_dir_block_in(buf_data + blkoffset, xfersize); } #endif /* REV_ENDIAN_FS */ break; @@ -177,17 +182,17 @@ ffs_read(ap) #if REV_ENDIAN_FS if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_out((char *)bp->b_data + blkoffset, xfersize); + byte_swap_dir_out(buf_data + blkoffset, xfersize); } #endif /* REV_ENDIAN_FS */ if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize || uio->uio_offset == ip->i_size)) - bp->b_flags |= B_AGE; - brelse(bp); + buf_markaged(bp); + buf_brelse(bp); } } if (bp != NULL) - brelse(bp); + buf_brelse(bp); ip->i_flag |= IN_ACCESS; return (error); } @@ -196,23 +201,26 @@ ffs_read(ap) * Vnode op for writing. */ ffs_write(ap) - struct vop_write_args /* { + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { - register struct vnode *vp; - register struct uio *uio; - register struct inode *ip; - register FS *fs; - struct buf *bp; - struct proc *p; + return(ffs_write_internal(ap->a_vp, ap->a_uio, ap->a_ioflag, vfs_context_ucred(ap->a_context))); +} + + +ffs_write_internal(vnode_t vp, struct uio *uio, int ioflag, ucred_t cred) +{ + buf_t bp; + proc_t p; + struct inode *ip; + FS *fs; ufs_daddr_t lbn; off_t osize; - int blkoffset, flags, ioflag, resid, rsd, size, xfersize; - int devBlockSize=0; + int blkoffset, flags, resid, rsd, size, xfersize; int save_error=0, save_size=0; int blkalloc = 0; int error = 0; @@ -223,9 +231,6 @@ ffs_write(ap) int rev_endian=0; #endif /* REV_ENDIAN_FS */ - ioflag = ap->a_ioflag; - uio = ap->a_uio; - vp = ap->a_vp; ip = VTOI(vp); #if REV_ENDIAN_FS rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND); @@ -256,32 +261,19 @@ ffs_write(ap) fs = ip->I_FS; if (uio->uio_offset < 0 || - (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) + (u_int64_t)uio->uio_offset + uio_resid(uio) > fs->fs_maxfilesize) return (EFBIG); - if (uio->uio_resid == 0) + if (uio_resid(uio) == 0) return (0); - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - - /* - * Maybe this should be above the vnode op call, but so long as - * file servers have no limits, I don't think it matters. - */ - p = uio->uio_procp; - if (vp->v_type == VREG && p && - uio->uio_offset + uio->uio_resid > - p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { - psignal(p, SIGXFSZ); - return (EFBIG); - } - - resid = uio->uio_resid; + // LP64todo - fix this + resid = uio_resid(uio); osize = ip->i_size; flags = 0; if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) flags = B_SYNC; - if (UBCISVALID(vp)) { + if (UBCINFOEXISTS(vp)) { off_t filesize; off_t endofwrite; off_t local_offset; @@ -292,7 +284,8 @@ ffs_write(ap) int fblk; int loopcount; - endofwrite = uio->uio_offset + uio->uio_resid; + // LP64todo - fix this + endofwrite = uio->uio_offset + uio_resid(uio); if (endofwrite > ip->i_size) { filesize = endofwrite; @@ -303,7 +296,8 @@ ffs_write(ap) head_offset = ip->i_size; /* Go ahead and allocate the block that are going to be written */ - rsd = uio->uio_resid; + // LP64todo - fix this + rsd = uio_resid(uio); local_offset = uio->uio_offset; local_flags = 0; if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) @@ -331,7 +325,7 @@ ffs_write(ap) /* Allocate block without reading into a buf */ error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, + lbn, blkoffset + xfersize, cred, &bp, local_flags, &blkalloc); if (error) break; @@ -350,9 +344,9 @@ ffs_write(ap) if(error) { save_error = error; save_size = rsd; - uio->uio_resid -= rsd; - if (file_extended) - filesize -= rsd; + uio_setresid(uio, (uio_resid(uio) - rsd)); + if (file_extended) + filesize -= rsd; } flags = ioflag & IO_SYNC ? IO_SYNC : 0; @@ -387,17 +381,16 @@ ffs_write(ap) * we we'll zero fill from the current EOF to where the write begins */ - error = cluster_write(vp, uio, osize, filesize, head_offset, local_offset, devBlockSize, flags); + error = cluster_write(vp, uio, osize, filesize, head_offset, local_offset, flags); if (uio->uio_offset > osize) { if (error && ((ioflag & IO_UNIT)==0)) - (void)VOP_TRUNCATE(vp, uio->uio_offset, - ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); + (void)ffs_truncate_internal(vp, uio->uio_offset, ioflag & IO_SYNC, cred); ip->i_size = uio->uio_offset; ubc_setsize(vp, (off_t)ip->i_size); } if(save_error) { - uio->uio_resid += save_size; + uio_setresid(uio, (uio_resid(uio) + save_size)); if(!error) error = save_error; } @@ -407,48 +400,49 @@ ffs_write(ap) if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) flags = B_SYNC; - for (error = 0; uio->uio_resid > 0;) { + for (error = 0; uio_resid(uio) > 0;) { + char *buf_data; + lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; + if (uio_resid(uio) < xfersize) + // LP64todo - fix this + xfersize = uio_resid(uio); if (fs->fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags, 0); + error = ffs_balloc(ip, lbn, blkoffset + xfersize, cred, &bp, flags, 0); if (error) break; if (uio->uio_offset + xfersize > ip->i_size) { ip->i_size = uio->uio_offset + xfersize; - - if (UBCISVALID(vp)) - ubc_setsize(vp, (u_long)ip->i_size); /* XXX check errors */ + ubc_setsize(vp, (u_long)ip->i_size); } - size = BLKSIZE(fs, ip, lbn) - bp->b_resid; + size = BLKSIZE(fs, ip, lbn) - buf_resid(bp); if (size < xfersize) xfersize = size; - error = - uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + buf_data = (char *)buf_dataptr(bp); + + error = uiomove(buf_data + blkoffset, (int)xfersize, uio); #if REV_ENDIAN_FS if (rev_endian && S_ISDIR(ip->i_mode)) { - byte_swap_dir_out((char *)bp->b_data + blkoffset, xfersize); + byte_swap_dir_out(buf_data + blkoffset, xfersize); } #endif /* REV_ENDIAN_FS */ if (doingdirectory == 0 && (ioflag & IO_SYNC)) - (void)bwrite(bp); + (void)buf_bwrite(bp); else if (xfersize + blkoffset == fs->fs_bsize) { - bp->b_flags |= B_AGE; - bdwrite(bp); + buf_markaged(bp); + buf_bdwrite(bp); } else - bdwrite(bp); + buf_bdwrite(bp); if (error || xfersize == 0) break; ip->i_flag |= IN_CHANGE | IN_UPDATE; @@ -459,20 +453,23 @@ ffs_write(ap) * we clear the setuid and setgid bits as a precaution against * tampering. */ - if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) + if (resid > uio_resid(uio) && cred && suser(cred, NULL)) ip->i_mode &= ~(ISUID | ISGID); - if (resid > uio->uio_resid) + if (resid > uio_resid(uio)) VN_KNOTE(vp, NOTE_WRITE | (file_extended ? NOTE_EXTEND : 0)); if (error) { if (ioflag & IO_UNIT) { - (void)VOP_TRUNCATE(vp, osize, - ioflag & IO_SYNC, ap->a_cred, uio->uio_procp); - uio->uio_offset -= resid - uio->uio_resid; - uio->uio_resid = resid; + (void)ffs_truncate_internal(vp, osize, ioflag & IO_SYNC, cred); + // LP64todo - fix this + uio->uio_offset -= resid - uio_resid(uio); + uio_setresid(uio, resid); } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) - error = VOP_UPDATE(vp, (struct timeval *)&time, - (struct timeval *)&time, 1); + } else if (resid > uio_resid(uio) && (ioflag & IO_SYNC)) { + struct timeval tv; + + microtime(&tv); + error = ffs_update(vp, &tv, &tv, 1); + } return (error); } @@ -482,14 +479,14 @@ ffs_write(ap) */ /* ARGSUSED */ ffs_pagein(ap) - struct vop_pagein_args /* { + struct vnop_pagein_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; @@ -499,7 +496,6 @@ ffs_pagein(ap) vm_offset_t pl_offset = ap->a_pl_offset; int flags = ap->a_flags; register struct inode *ip; - int devBlockSize=0; int error; ip = VTOI(vp); @@ -518,10 +514,8 @@ ffs_pagein(ap) panic("%s: type %d", "ffs_pagein", vp->v_type); #endif - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + error = cluster_pagein(vp, pl, pl_offset, f_offset, size, (off_t)ip->i_size, flags); - error = cluster_pagein(vp, pl, pl_offset, f_offset, size, - (off_t)ip->i_size, devBlockSize, flags); /* ip->i_flag |= IN_ACCESS; */ return (error); } @@ -532,14 +526,14 @@ ffs_pagein(ap) * make sure the buf is not in hash queue when you return */ ffs_pageout(ap) - struct vop_pageout_args /* { + struct vnop_pageout_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; @@ -551,7 +545,6 @@ ffs_pageout(ap) register struct inode *ip; register FS *fs; int error ; - int devBlockSize=0; size_t xfer_size = 0; int local_flags=0; off_t local_offset; @@ -561,6 +554,7 @@ ffs_pageout(ap) int save_error =0, save_size=0; vm_offset_t lupl_offset; int nocommit = flags & UPL_NOCOMMIT; + int devBlockSize = 0; struct buf *bp; ip = VTOI(vp); @@ -596,7 +590,7 @@ ffs_pageout(ap) else xfer_size = size; - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(vp)); if (xfer_size & (PAGE_SIZE - 1)) { /* if not a multiple of page size @@ -607,7 +601,7 @@ ffs_pageout(ap) } /* - * once the block allocation is moved to ufs_cmap + * once the block allocation is moved to ufs_blockmap * we can remove all the size and offset checks above * cluster_pageout does all of this now * we need to continue to do it here so as not to @@ -625,7 +619,7 @@ ffs_pageout(ap) xsize = resid; /* Allocate block without reading into a buf */ error = ffs_blkalloc(ip, - lbn, blkoffset + xsize, ap->a_cred, + lbn, blkoffset + xsize, vfs_context_ucred(ap->a_context), local_flags); if (error) break; @@ -640,7 +634,7 @@ ffs_pageout(ap) } - error = cluster_pageout(vp, pl, pl_offset, f_offset, round_page_32(xfer_size), ip->i_size, devBlockSize, flags); + error = cluster_pageout(vp, pl, pl_offset, f_offset, round_page_32(xfer_size), ip->i_size, flags); if(save_error) { lupl_offset = size - save_size; diff --git a/bsd/ufs/ufs/ufs_vfsops.c b/bsd/ufs/ufs/ufs_vfsops.c index 06e006cf3..2285303a6 100644 --- a/bsd/ufs/ufs/ufs_vfsops.c +++ b/bsd/ufs/ufs/ufs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,10 +62,9 @@ #include #include -#include +#include #include -#include -#include +#include #include #include @@ -82,10 +81,10 @@ */ /* ARGSUSED */ int -ufs_start(mp, flags, p) +ufs_start(mp, flags, context) struct mount *mp; int flags; - struct proc *p; + vfs_context_t context; { return (0); @@ -95,14 +94,15 @@ ufs_start(mp, flags, p) * Return the root of a filesystem. */ int -ufs_root(mp, vpp) +ufs_root(mp, vpp, context) struct mount *mp; struct vnode **vpp; + vfs_context_t context; { struct vnode *nvp; int error; - if (error = VFS_VGET(mp, (void *)ROOTINO, &nvp)) + if (error = VFS_VGET(mp, (ino64_t)ROOTINO, &nvp, context)) return (error); *vpp = nvp; return (0); @@ -112,20 +112,21 @@ ufs_root(mp, vpp) * Do operations associated with quotas */ int -ufs_quotactl(mp, cmds, uid, arg, p) +ufs_quotactl(mp, cmds, uid, datap, context) struct mount *mp; int cmds; uid_t uid; - caddr_t arg; - struct proc *p; + caddr_t datap; + vfs_context_t context; { + struct proc *p = vfs_context_proc(context); int cmd, type, error; #if !QUOTA - return (EOPNOTSUPP); + return (ENOTSUP); #else if (uid == -1) - uid = p->p_cred->p_ruid; + uid = vfs_context_ucred(context)->cr_ruid; cmd = cmds >> SUBCMDSHIFT; switch (cmd) { @@ -133,40 +134,40 @@ ufs_quotactl(mp, cmds, uid, arg, p) case Q_QUOTASTAT: break; case Q_GETQUOTA: - if (uid == p->p_cred->p_ruid) + if (uid == vfs_context_ucred(context)->cr_ruid) break; /* fall through */ default: - if (error = suser(p->p_ucred, &p->p_acflag)) + if (error = vfs_context_suser(context)) return (error); } type = cmds & SUBCMDMASK; if ((u_int)type >= MAXQUOTAS) return (EINVAL); - if (vfs_busy(mp, LK_NOWAIT, 0, p)) + if (vfs_busy(mp, LK_NOWAIT)) return (0); switch (cmd) { case Q_QUOTAON: - error = quotaon(p, mp, type, arg, UIO_USERSPACE); + error = quotaon(context, mp, type, datap); break; case Q_QUOTAOFF: - error = quotaoff(p, mp, type); + error = quotaoff(mp, type); break; case Q_SETQUOTA: - error = setquota(mp, uid, type, arg); + error = setquota(mp, uid, type, datap); break; case Q_SETUSE: - error = setuse(mp, uid, type, arg); + error = setuse(mp, uid, type, datap); break; case Q_GETQUOTA: - error = getquota(mp, uid, type, arg); + error = getquota(mp, uid, type, datap); break; case Q_SYNC: @@ -174,14 +175,15 @@ ufs_quotactl(mp, cmds, uid, arg, p) break; case Q_QUOTASTAT: - error = quotastat(mp, type, arg); + error = quotastat(mp, type, datap); break; default: error = EINVAL; break; } - vfs_unbusy(mp, p); + vfs_unbusy(mp); + return (error); #endif } @@ -205,49 +207,3 @@ ufs_init(vfsp) return (0); } -/* - * This is the generic part of fhtovp called after the underlying - * filesystem has validated the file handle. - * - * Verify that a host should have access to a filesystem, and if so - * return a vnode for the presented file handle. - */ -int -ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp) - register struct mount *mp; - struct ufid *ufhp; - struct mbuf *nam; - struct vnode **vpp; - int *exflagsp; - struct ucred **credanonp; -{ - register struct inode *ip; - register struct netcred *np; - register struct ufsmount *ump = VFSTOUFS(mp); - struct vnode *nvp; - int error; - - /* - * Get the export permission structure for this tuple. - */ - np = vfs_export_lookup(mp, &ump->um_export, nam); - if (nam && (np == NULL)) - return (EACCES); - - if (error = VFS_VGET(mp, (void *)ufhp->ufid_ino, &nvp)) { - *vpp = NULLVP; - return (error); - } - ip = VTOI(nvp); - if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen) { - vput(nvp); - *vpp = NULLVP; - return (ESTALE); - } - *vpp = nvp; - if (np) { - *exflagsp = np->netc_exflags; - *credanonp = &np->netc_anon; - } - return (0); -} diff --git a/bsd/ufs/ufs/ufs_vnops.c b/bsd/ufs/ufs/ufs_vnops.c index f45e51d62..9e4fe6eda 100644 --- a/bsd/ufs/ufs/ufs_vnops.c +++ b/bsd/ufs/ufs/ufs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,25 +66,26 @@ #include #include #include -#include +#include #include #include #include +#include #include -#include -#include +#include +#include #include #include #include #include #include +#include #include #include #include -#include #include #include #include @@ -96,49 +97,40 @@ #include #endif /* REV_ENDIAN_FS */ -static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *)); -static int ufs_chown - __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *)); -static int filt_ufsread __P((struct knote *kn, long hint)); -static int filt_ufswrite __P((struct knote *kn, long hint)); -static int filt_ufsvnode __P((struct knote *kn, long hint)); -static void filt_ufsdetach __P((struct knote *kn)); -static int ufs_kqfilter __P((struct vop_kqfilter_args *ap)); - -union _qcvt { - int64_t qcvt; - int32_t val[2]; -}; -#define SETHIGH(q, h) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_HIGHWORD] = (h); \ - (q) = tmp.qcvt; \ -} -#define SETLOW(q, l) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_LOWWORD] = (l); \ - (q) = tmp.qcvt; \ -} + +static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct proc *); +static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t, + struct proc *); +static int filt_ufsread(struct knote *kn, long hint); +static int filt_ufswrite(struct knote *kn, long hint); +static int filt_ufsvnode(struct knote *kn, long hint); +static void filt_ufsdetach(struct knote *kn); + +#if FIFO +extern void fifo_printinfo(struct vnode *vp); +#endif /* FIFO */ +extern int ufs_direnter2(struct vnode *dvp, struct direct *dirp, + vfs_context_t ctx); + +static int ufs_readdirext(vnode_t vp, uio_t uio, int *eofflag, int *numdirent, + vfs_context_t context); /* * Create a regular file */ int ufs_create(ap) - struct vop_create_args /* { + struct vnop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */ *ap; { int error; - if (error = - ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), - ap->a_dvp, ap->a_vpp, ap->a_cnp)) + if ( (error = ufs_makeinode(ap->a_vap, ap->a_dvp, ap->a_vpp, ap->a_cnp)) ) return (error); VN_KNOTE(ap->a_dvp, NOTE_WRITE); return (0); @@ -147,24 +139,37 @@ ufs_create(ap) /* * Mknod vnode call */ -/* ARGSUSED */ int ufs_mknod(ap) - struct vop_mknod_args /* { + struct vnop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { - struct vattr *vap = ap->a_vap; + struct vnode_attr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + struct vnode *tvp; struct inode *ip; + struct componentname *cnp = ap->a_cnp; int error; - if (error = - ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, vpp, ap->a_cnp)) + /* use relookup to force correct directory hints */ + cnp->cn_flags &= ~MODMASK; + cnp->cn_flags |= (WANTPARENT | NOCACHE); + cnp->cn_nameiop = CREATE; + + (void) relookup(dvp, &tvp, cnp); + + /* get rid of reference relookup returned */ + if (tvp) + vnode_put(tvp); + + if ( (error = + ufs_makeinode(ap->a_vap, ap->a_dvp, vpp, ap->a_cnp)) ) return (error); VN_KNOTE(ap->a_dvp, NOTE_WRITE); ip = VTOI(*vpp); @@ -176,15 +181,6 @@ ufs_mknod(ap) */ ip->i_rdev = vap->va_rdev; } - /* - * Remove inode so that it will be reloaded by VFS_VGET and - * checked to see if it is an alias of an existing entry in - * the inode cache. - */ - vput(*vpp); - (*vpp)->v_type = VNON; - vgone(*vpp); - *vpp = 0; return (0); } @@ -193,14 +189,12 @@ ufs_mknod(ap) * * Nothing to do. */ -/* ARGSUSED */ int ufs_open(ap) - struct vop_open_args /* { + struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { @@ -218,160 +212,77 @@ ufs_open(ap) * * Update the times on the inode. */ -/* ARGSUSED */ int ufs_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); + struct timeval tv; - simple_lock(&vp->v_interlock); - if ((!UBCISVALID(vp) && vp->v_usecount > 1) - || (UBCISVALID(vp) && ubc_isinuse(vp, 1))) - ITIMES(ip, &time, &time); - simple_unlock(&vp->v_interlock); - - if (!VOP_ISLOCKED(vp)) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - - cluster_push(vp); - - VOP_UNLOCK(vp, 0, ap->a_p); - } - return (0); -} - -int -ufs_access(ap) - struct vop_access_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct ucred *cred = ap->a_cred; - mode_t mask, mode = ap->a_mode; - register gid_t *gp; - int i, error; - - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - if (mode & VWRITE) { - switch (vp->v_type) { - case VDIR: - case VLNK: - case VREG: - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); -#if QUOTA - if (error = getinoquota(ip)) - return (error); -#endif - break; - } - } - - /* If immutable bit set, nobody gets to write it. */ - if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE)) - return (EPERM); - - /* Otherwise, user id 0 always gets access. */ - if (cred->cr_uid == 0) - return (0); - - mask = 0; - - /* Otherwise, check the owner. */ - if (cred->cr_uid == ip->i_uid) { - if (mode & VEXEC) - mask |= S_IXUSR; - if (mode & VREAD) - mask |= S_IRUSR; - if (mode & VWRITE) - mask |= S_IWUSR; - return ((ip->i_mode & mask) == mask ? 0 : EACCES); + if (vnode_isinuse(vp, 1)) { + microtime(&tv); + ITIMES(ip, &tv, &tv); } - /* Otherwise, check the groups. */ - for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) - if (ip->i_gid == *gp) { - if (mode & VEXEC) - mask |= S_IXGRP; - if (mode & VREAD) - mask |= S_IRGRP; - if (mode & VWRITE) - mask |= S_IWGRP; - return ((ip->i_mode & mask) == mask ? 0 : EACCES); - } + cluster_push(vp, IO_CLOSE); - /* Otherwise, check everyone else. */ - if (mode & VEXEC) - mask |= S_IXOTH; - if (mode & VREAD) - mask |= S_IROTH; - if (mode & VWRITE) - mask |= S_IWOTH; - return ((ip->i_mode & mask) == mask ? 0 : EACCES); + return (0); } -/* ARGSUSED */ int ufs_getattr(ap) - struct vop_getattr_args /* { + struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); - register struct vattr *vap = ap->a_vap; + register struct vnode_attr *vap = ap->a_vap; int devBlockSize=0; + struct timeval tv; - ITIMES(ip, &time, &time); + microtime(&tv); + + ITIMES(ip, &tv, &tv); /* * Copy from inode table */ - vap->va_fsid = ip->i_dev; - vap->va_fileid = ip->i_number; - vap->va_mode = ip->i_mode & ~IFMT; - vap->va_nlink = ip->i_nlink; - vap->va_uid = ip->i_uid; - vap->va_gid = ip->i_gid; - vap->va_rdev = (dev_t)ip->i_rdev; - vap->va_size = ip->i_din.di_size; - vap->va_atime.tv_sec = ip->i_atime; - vap->va_atime.tv_nsec = ip->i_atimensec; - vap->va_mtime.tv_sec = ip->i_mtime; - vap->va_mtime.tv_nsec = ip->i_mtimensec; - vap->va_ctime.tv_sec = ip->i_ctime; - vap->va_ctime.tv_nsec = ip->i_ctimensec; - vap->va_flags = ip->i_flags; - vap->va_gen = ip->i_gen; - /* this doesn't belong here */ + VATTR_RETURN(vap, va_fsid, ip->i_dev); + VATTR_RETURN(vap, va_fileid, ip->i_number); + VATTR_RETURN(vap, va_mode, ip->i_mode & ~IFMT); + VATTR_RETURN(vap, va_nlink, ip->i_nlink); + VATTR_RETURN(vap, va_uid, ip->i_uid); + VATTR_RETURN(vap, va_gid, ip->i_gid); + VATTR_RETURN(vap, va_rdev, (dev_t)ip->i_rdev); + VATTR_RETURN(vap, va_data_size, ip->i_din.di_size); + vap->va_access_time.tv_sec = ip->i_atime; + vap->va_access_time.tv_nsec = ip->i_atimensec; + VATTR_SET_SUPPORTED(vap, va_access_time); + vap->va_modify_time.tv_sec = ip->i_mtime; + vap->va_modify_time.tv_nsec = ip->i_mtimensec; + VATTR_SET_SUPPORTED(vap, va_modify_time); + vap->va_change_time.tv_sec = ip->i_ctime; + vap->va_change_time.tv_nsec = ip->i_ctimensec; + VATTR_SET_SUPPORTED(vap, va_change_time); + VATTR_RETURN(vap, va_flags, ip->i_flags); + VATTR_RETURN(vap, va_gen, ip->i_gen); if (vp->v_type == VBLK) - vap->va_blocksize = BLKDEV_IOSIZE; + VATTR_RETURN(vap, va_iosize, BLKDEV_IOSIZE); else if (vp->v_type == VCHR) - vap->va_blocksize = MAXPHYSIO; + VATTR_RETURN(vap, va_iosize, MAXPHYSIO); else - vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); - vap->va_bytes = dbtob((u_quad_t)ip->i_blocks, devBlockSize); - vap->va_type = vp->v_type; - vap->va_filerev = ip->i_modrev; + VATTR_RETURN(vap, va_iosize, vp->v_mount->mnt_vfsstat.f_iosize); + devBlockSize = vfs_devblocksize(vnode_mount(vp)); + VATTR_RETURN(vap, va_data_alloc, dbtob((u_quad_t)ip->i_blocks, devBlockSize)); + VATTR_RETURN(vap, va_type, vp->v_type); + VATTR_RETURN(vap, va_filerev, ip->i_modrev); return (0); } @@ -380,109 +291,72 @@ ufs_getattr(ap) */ int ufs_setattr(ap) - struct vop_setattr_args /* { + struct vnop_setattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; + struct vnode_attr *a_vap; struct proc *a_p; + vfs_context_t a_context; } */ *ap; { - struct vattr *vap = ap->a_vap; + struct vnode_attr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); - struct ucred *cred = ap->a_cred; - struct proc *p = ap->a_p; + kauth_cred_t cred = vfs_context_ucred(ap->a_context); + struct proc *p = vfs_context_proc(ap->a_context); struct timeval atimeval, mtimeval; int error; + uid_t nuid; + gid_t ngid; /* - * Check for unsettable attributes. + * Go through the fields and update iff set. */ - if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || - (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || - (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || - ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { - return (EINVAL); - } - if (vap->va_flags != VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if (cred->cr_uid != ip->i_uid && - (error = suser(cred, &p->p_acflag))) - return (error); - if (cred->cr_uid == 0) { - if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) && - securelevel > 0) - return (EPERM); - ip->i_flags = vap->va_flags; - } else { - if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) || - (vap->va_flags & UF_SETTABLE) != vap->va_flags) - return (EPERM); - ip->i_flags &= SF_SETTABLE; - ip->i_flags |= (vap->va_flags & UF_SETTABLE); - } + if (VATTR_IS_ACTIVE(vap, va_flags)) { + ip->i_flags = vap->va_flags; ip->i_flag |= IN_CHANGE; - if (vap->va_flags & (IMMUTABLE | APPEND)) - return (0); } - if (ip->i_flags & (IMMUTABLE | APPEND)) - return (EPERM); - /* - * Go through the fields and update iff not VNOVAL. - */ - if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p)) + VATTR_SET_SUPPORTED(vap, va_flags); + + nuid = VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uid_t)VNOVAL; + ngid = VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (gid_t)VNOVAL; + if (nuid != (uid_t)VNOVAL || ngid != (gid_t)VNOVAL) { + if ( (error = ufs_chown(vp, nuid, ngid, cred, p)) ) return (error); } - if (vap->va_size != VNOVAL) { - /* - * Disallow write attempts on read-only file systems; - * unless the file is a socket, fifo, or a block or - * character device resident on the file system. - */ - switch (vp->v_type) { - case VDIR: - return (EISDIR); - case VLNK: - case VREG: - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - break; - } - if (error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)) + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); + + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + if ( (error = ffs_truncate_internal(vp, vap->va_data_size, vap->va_vaflags & 0xffff, cred)) ) return (error); } + VATTR_SET_SUPPORTED(vap, va_data_size); + ip = VTOI(vp); - if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - if (cred->cr_uid != ip->i_uid && - (error = suser(cred, &p->p_acflag)) && - ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || - (error = VOP_ACCESS(vp, VWRITE, cred, p)))) - return (error); - if (vap->va_atime.tv_sec != VNOVAL) + if (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time)) { + if (VATTR_IS_ACTIVE(vap, va_access_time)) ip->i_flag |= IN_ACCESS; - if (vap->va_mtime.tv_sec != VNOVAL) + if (VATTR_IS_ACTIVE(vap, va_modify_time)) ip->i_flag |= IN_CHANGE | IN_UPDATE; - atimeval.tv_sec = vap->va_atime.tv_sec; - atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; - mtimeval.tv_sec = vap->va_mtime.tv_sec; - mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; - if (error = VOP_UPDATE(vp, &atimeval, &mtimeval, 1)) + atimeval.tv_sec = vap->va_access_time.tv_sec; + atimeval.tv_usec = vap->va_access_time.tv_nsec / 1000; + mtimeval.tv_sec = vap->va_modify_time.tv_sec; + mtimeval.tv_usec = vap->va_modify_time.tv_nsec / 1000; + if ( (error = ffs_update(vp, &atimeval, &mtimeval, 1)) ) return (error); } - error = 0; - if (vap->va_mode != (mode_t)VNOVAL) { - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (EROFS); - error = ufs_chmod(vp, (int)vap->va_mode, cred, p); + VATTR_SET_SUPPORTED(vap, va_access_time); + VATTR_SET_SUPPORTED(vap, va_modify_time); + + if (VATTR_IS_ACTIVE(vap, va_mode)) { + if ((error = ufs_chmod(vp, (int)vap->va_mode, cred, p))) + return (error); } + VATTR_SET_SUPPORTED(vap, va_mode); + VN_KNOTE(vp, NOTE_ATTRIB); - return (error); + + return (0); } /* @@ -490,24 +364,10 @@ ufs_setattr(ap) * Inode must be locked before calling. */ static int -ufs_chmod(vp, mode, cred, p) - register struct vnode *vp; - register int mode; - register struct ucred *cred; - struct proc *p; +ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) { register struct inode *ip = VTOI(vp); - int error; - if (cred->cr_uid != ip->i_uid && - (error = suser(cred, &p->p_acflag))) - return (error); - if (cred->cr_uid) { - if (vp->v_type != VDIR && (mode & S_ISTXT)) - return (EFTYPE); - if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) - return (EPERM); - } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; @@ -519,17 +379,14 @@ ufs_chmod(vp, mode, cred, p) * inode must be locked prior to call. */ static int -ufs_chown(vp, uid, gid, cred, p) - register struct vnode *vp; - uid_t uid; - gid_t gid; - struct ucred *cred; - struct proc *p; +ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, + struct proc *p) { register struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; + int is_member; #if QUOTA register int i; int64_t change; /* in bytes */ @@ -540,34 +397,26 @@ ufs_chown(vp, uid, gid, cred, p) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; - /* - * If we don't own the file, are trying to change the owner - * of the file, or are not a member of the target group, - * the caller must be superuser or the call fails. - */ - if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid || - (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) && - (error = suser(cred, &p->p_acflag))) - return (error); ogid = ip->i_gid; ouid = ip->i_uid; #if QUOTA - if (error = getinoquota(ip)) + if ( (error = getinoquota(ip)) ) return (error); if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); + dqrele(ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); + dqrele(ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(vp)); + change = dbtob((int64_t)ip->i_blocks, devBlockSize); (void) chkdq(ip, -change, cred, CHOWN); (void) chkiq(ip, -1, cred, CHOWN); for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, ip->i_dquot[i]); + dqrele(ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif @@ -576,11 +425,11 @@ ufs_chown(vp, uid, gid, cred, p) #if QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); + dqrele(ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); + dqrele(ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { @@ -590,7 +439,7 @@ ufs_chown(vp, uid, gid, cred, p) (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { - dqrele(vp, ip->i_dquot[i]); + dqrele(ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } @@ -598,11 +447,11 @@ ufs_chown(vp, uid, gid, cred, p) ip->i_uid = ouid; if (getinoquota(ip) == 0) { if (ouid == uid) { - dqrele(vp, ip->i_dquot[USRQUOTA]); + dqrele(ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { - dqrele(vp, ip->i_dquot[GRPQUOTA]); + dqrele(ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); @@ -616,23 +465,17 @@ good: #endif /* QUOTA */ if (ouid != uid || ogid != gid) ip->i_flag |= IN_CHANGE; - if (ouid != uid && cred->cr_uid != 0) - ip->i_mode &= ~ISUID; - if (ogid != gid && cred->cr_uid != 0) - ip->i_mode &= ~ISGID; return (0); } -/* ARGSUSED */ int ufs_ioctl(ap) - struct vop_ioctl_args /* { + struct vnop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { @@ -648,21 +491,17 @@ ufs_ioctl(ap) vp = ap->a_vp; - VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); - ra = (struct radvisory *)(ap->a_data); ip = VTOI(vp); fs = ip->i_fs; if ((u_int64_t)ra->ra_offset >= ip->i_size) { - VOP_UNLOCK(vp, 0, ap->a_p); return (EFBIG); } - VOP_DEVBLOCKSIZE(ip->i_devvp, &devBlockSize); + devBlockSize = vfs_devblocksize(vnode_mount(vp)); + + error = advisory_read(vp, ip->i_size, ra->ra_offset, ra->ra_count); - error = advisory_read(vp, ip->i_size, ra->ra_offset, ra->ra_count, devBlockSize); - VOP_UNLOCK(vp, 0, ap->a_p); return (error); } default: @@ -670,19 +509,9 @@ ufs_ioctl(ap) } } -/* ARGSUSED */ int -ufs_select(ap) - struct vop_select_args /* { - struct vnode *a_vp; - int a_which; - int a_fflags; - struct ucred *a_cred; - void *a_wql; - struct proc *a_p; - } */ *ap; +ufs_select(__unused struct vnop_select_args *ap) { - /* * We should really check to see if I/O is possible. */ @@ -694,91 +523,65 @@ ufs_select(ap) * * NB Currently unsupported. */ -/* ARGSUSED */ int -ufs_mmap(ap) - struct vop_mmap_args /* { - struct vnode *a_vp; - int a_fflags; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap; +ufs_mmap(__unused struct vnop_mmap_args *ap) { - return (EINVAL); } -/* - * Seek on a file - * - * Nothing to do, so just return. - */ -/* ARGSUSED */ int -ufs_seek(ap) - struct vop_seek_args /* { +ufs_remove(ap) + struct vnop_remove_args /* { + struct vnode *a_dvp; struct vnode *a_vp; - off_t a_oldoff; - off_t a_newoff; - struct ucred *a_cred; + struct componentname *a_cnp; + int *a_flags; + vfs_context_t a_context; } */ *ap; { - - return (0); + return(ufs_remove_internal(ap->a_dvp, ap->a_vp, ap->a_cnp, ap->a_flags)); } + int -ufs_remove(ap) - struct vop_remove_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - } */ *ap; +ufs_remove_internal(vnode_t dvp, vnode_t vp, struct componentname *cnp, int flags) { struct inode *ip; - struct vnode *vp = ap->a_vp; - struct vnode *dvp = ap->a_dvp; + struct vnode *tvp; int error; - ip = VTOI(vp); - if ((ip->i_flags & (IMMUTABLE | APPEND)) || - (VTOI(dvp)->i_flags & APPEND)) { - error = EPERM; - goto out; - } - - if (ap->a_cnp->cn_flags & NODELETEBUSY) { + if (flags & VNODE_REMOVE_NODELETEBUSY) { /* Caller requested Carbon delete semantics */ - if ((!UBCISVALID(vp) && vp->v_usecount > 1) - || (UBCISVALID(vp) && ubc_isinuse(vp, 1))) { + if (vnode_isinuse(vp, 0)) { error = EBUSY; goto out; } } + cnp->cn_flags &= ~MODMASK; + cnp->cn_flags |= (WANTPARENT | NOCACHE); + cnp->cn_nameiop = DELETE; + + (void) relookup(dvp, &tvp, cnp); + + if (tvp == NULL) + return (ENOENT); + if (tvp != vp) + panic("ufs_remove_internal: relookup returned a different vp"); + /* + * get rid of reference relookup returned + */ + vnode_put(tvp); + + + ip = VTOI(vp); - if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) { + if ((error = ufs_dirremove(dvp, cnp)) == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; VN_KNOTE(vp, NOTE_DELETE); VN_KNOTE(dvp, NOTE_WRITE); } - - if (dvp != vp) - VOP_UNLOCK(vp, 0, ap->a_cnp->cn_proc); - - (void) ubc_uncache(vp); - - vrele(vp); - vput(dvp); - - return (error); - out: - if (dvp == vp) - vrele(vp); - else - vput(vp); - vput(dvp); return (error); } @@ -787,98 +590,72 @@ out: */ int ufs_link(ap) - struct vop_link_args /* { + struct vnop_link_args /* { struct vnode *a_vp; struct vnode *a_tdvp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); struct inode *ip; struct timeval tv; int error; -#if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_link: no name"); -#endif - if (tdvp->v_mount != vp->v_mount) { - VOP_ABORTOP(tdvp, cnp); - error = EXDEV; - goto out2; - } - if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { - VOP_ABORTOP(tdvp, cnp); - goto out2; - } ip = VTOI(vp); + if ((nlink_t)ip->i_nlink >= LINK_MAX) { - VOP_ABORTOP(tdvp, cnp); error = EMLINK; goto out1; } - if (ip->i_flags & (IMMUTABLE | APPEND)) { - VOP_ABORTOP(tdvp, cnp); - error = EPERM; - goto out1; - } ip->i_nlink++; ip->i_flag |= IN_CHANGE; - tv = time; - error = VOP_UPDATE(vp, &tv, &tv, 1); + microtime(&tv); + error = ffs_update(vp, &tv, &tv, 1); if (!error) error = ufs_direnter(ip, tdvp, cnp); if (error) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } - { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } VN_KNOTE(vp, NOTE_LINK); VN_KNOTE(tdvp, NOTE_WRITE); out1: - if (tdvp != vp) - VOP_UNLOCK(vp, 0, p); -out2: - vput(tdvp); return (error); } /* * whiteout vnode call */ + int ufs_whiteout(ap) - struct vop_whiteout_args /* { + struct vnop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; + vfs_context_t a_context; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; - int error; + int error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); - return (EOPNOTSUPP); + return (ENOTSUP); case CREATE: /* create a new directory whiteout */ #if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif @@ -887,7 +664,7 @@ ufs_whiteout(ap) newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc); + error = ufs_direnter2(dvp, &newdir, cnp->cn_context); break; case DELETE: @@ -901,12 +678,6 @@ ufs_whiteout(ap) error = ufs_dirremove(dvp, cnp); break; } - if (cnp->cn_flags & HASBUF) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } return (error); } @@ -937,13 +708,14 @@ ufs_whiteout(ap) */ int ufs_rename(ap) - struct vop_rename_args /* { + struct vnop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */ *ap; { struct vnode *tvp = ap->a_tvp; @@ -952,86 +724,78 @@ ufs_rename(ap) struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; - struct proc *p = fcnp->cn_proc; + vfs_context_t ctx = fcnp->cn_context; + struct proc *p = vfs_context_proc(ctx); struct inode *ip, *xp, *dp; struct dirtemplate dirbuf; struct timeval tv; - int doingdirectory = 0, oldparent = 0, newparent = 0; + ino_t doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0, ioflag; u_char namlen; + struct vnode *rl_vp = NULL; -#if DIAGNOSTIC - if ((tcnp->cn_flags & HASBUF) == 0 || - (fcnp->cn_flags & HASBUF) == 0) - panic("ufs_rename: no name"); -#endif - /* - * Check for cross-device rename. - */ - if ((fvp->v_mount != tdvp->v_mount) || - (tvp && (fvp->v_mount != tvp->v_mount))) { - error = EXDEV; -abortit: - VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ - vrele(fdvp); - vrele(fvp); - return (error); - } /* - * Check if just deleting a link name. + * Check if just deleting a link name or if we've lost a race. + * If another process completes the same rename after we've looked + * up the source and have blocked looking up the target, then the + * source and target inodes may be identical now although the + * names were never linked. */ - if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || - (VTOI(tdvp)->i_flags & APPEND))) { - error = EPERM; - goto abortit; - } if (fvp == tvp) { if (fvp->v_type == VDIR) { - error = EINVAL; + /* + * Linked directories are impossible, so we must + * have lost the race. Pretend that the rename + * completed before the lookup. + */ +#ifdef UFS_RENAME_DEBUG + printf("ufs_rename: fvp == tvp for directories\n"); +#endif + error = ENOENT; goto abortit; } - /* Release destination completely. */ - VOP_ABORTOP(tdvp, tcnp); - vput(tdvp); - vput(tvp); - - /* Delete source. */ - vrele(fdvp); - vrele(fvp); - fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((fcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost from startdir"); - fcnp->cn_nameiop = DELETE; - (void) relookup(fdvp, &fvp, fcnp); - return (VOP_REMOVE(fdvp, fvp, fcnp)); + /* + * don't need to check in here for permissions, must already have been granted + * ufs_remove_internal now does the relookup + */ + error = ufs_remove_internal(fdvp, fvp, fcnp, 0); + + return (error); } - if (error = vn_lock(fvp, LK_EXCLUSIVE, p)) + /* + * because the vnode_authorization code may have looked up in this directory + * between the original lookup and the actual call to VNOP_RENAME, we need + * to reset the directory hints... since we haven't dropped the FSNODELOCK + * on tdvp since this whole thing started, we expect relookup to return + * tvp (which may be NULL) + */ + tcnp->cn_flags &= ~MODMASK; + tcnp->cn_flags |= (WANTPARENT | NOCACHE); + + if ( (error = relookup(tdvp, &rl_vp, tcnp)) ) + panic("ufs_rename: relookup on target returned error"); + if (rl_vp != tvp) { + /* + * Don't panic. The only way this state will be reached is if + * another rename has taken effect. In that case, it's safe + * to restart this rename and let things sort themselves out. + */ + if (rl_vp) + vnode_put(rl_vp); + error = ERESTART; goto abortit; + } + if (rl_vp) { + vnode_put(rl_vp); + rl_vp = NULL; + } dp = VTOI(fdvp); ip = VTOI(fvp); - if ((ip->i_flags & (IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { - VOP_UNLOCK(fvp, 0, p); - error = EPERM; - goto abortit; - } + if ((ip->i_mode & IFMT) == IFDIR) { - /* - * Avoid ".", "..", and aliases of "." for obvious reasons. - */ - if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || - dp == ip || (fcnp->cn_flags&ISDOTDOT) || - (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp, 0, p); + if (ip->i_flag & IN_RENAME) { error = EINVAL; goto abortit; } @@ -1040,7 +804,6 @@ abortit: doingdirectory++; } VN_KNOTE(fdvp, NOTE_WRITE); /* XXX right place? */ - vrele(fdvp); /* * When the target exists, both the directory @@ -1059,9 +822,8 @@ abortit: */ ip->i_nlink++; ip->i_flag |= IN_CHANGE; - tv = time; - if (error = VOP_UPDATE(fvp, &tv, &tv, 1)) { - VOP_UNLOCK(fvp, 0, p); + microtime(&tv); + if ( (error = ffs_update(fvp, &tv, &tv, 1)) ) { goto bad; } @@ -1075,25 +837,26 @@ abortit: * to namei, as the parent directory is unlocked by the * call to checkpath(). */ - error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); - VOP_UNLOCK(fvp, 0, p); + if (oldparent != dp->i_number) newparent = dp->i_number; + if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; - if (xp != NULL) - vput(tvp); - if (error = ufs_checkpath(ip, dp, tcnp->cn_cred)) - goto out; - if ((tcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost to startdir"); - if (error = relookup(tdvp, &tvp, tcnp)) - goto out; + + if ( (error = ufs_checkpath(ip, dp, vfs_context_ucred(tcnp->cn_context))) ) + goto bad; + + if ( (error = relookup(tdvp, &tvp, tcnp)) ) + goto bad; + rl_vp = tvp; + dp = VTOI(tdvp); - xp = NULL; if (tvp) xp = VTOI(tvp); + else + xp = NULL; } /* * 2) If target doesn't exist, link the target @@ -1117,19 +880,18 @@ abortit: } dp->i_nlink++; dp->i_flag |= IN_CHANGE; - if (error = VOP_UPDATE(tdvp, &tv, &tv, 1)) + if ( (error = ffs_update(tdvp, &tv, &tv, 1)) ) goto bad; } - if (error = ufs_direnter(ip, tdvp, tcnp)) { + if ( (error = ufs_direnter(ip, tdvp, tcnp)) ) { if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; - (void)VOP_UPDATE(tdvp, &tv, &tv, 1); + (void)ffs_update(tdvp, &tv, &tv, 1); } goto bad; } VN_KNOTE(tdvp, NOTE_WRITE); - vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("rename: EXDEV"); @@ -1138,25 +900,13 @@ abortit: */ if (xp->i_number == ip->i_number) panic("rename: same file"); - /* - * If the parent directory is "sticky", then the user must - * own the parent directory, or the destination of the rename, - * otherwise the destination may not be changed (except by - * root). This implements append-only directories. - */ - if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && - tcnp->cn_cred->cr_uid != dp->i_uid && - xp->i_uid != tcnp->cn_cred->cr_uid) { - error = EPERM; - goto bad; - } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { - if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || + if (!ufs_dirempty(xp, dp->i_number, vfs_context_ucred(tcnp->cn_context)) || xp->i_nlink > 2) { error = ENOTEMPTY; goto bad; @@ -1170,7 +920,7 @@ abortit: error = EISDIR; goto bad; } - if (error = ufs_dirrewrite(dp, ip, tcnp)) + if ( (error = ufs_dirrewrite(dp, ip, tcnp)) ) goto bad; /* * If the target directory is in the same @@ -1183,7 +933,6 @@ abortit: dp->i_flag |= IN_CHANGE; } VN_KNOTE(tdvp, NOTE_WRITE); - vput(tdvp); /* * Adjust the link count of the target to * reflect the dirrewrite above. If this is @@ -1200,33 +949,35 @@ abortit: panic("rename: linked directory"); ioflag = ((tvp)->v_mount->mnt_flag & MNT_ASYNC) ? 0 : IO_SYNC; - error = VOP_TRUNCATE(tvp, (off_t)0, ioflag, - tcnp->cn_cred, tcnp->cn_proc); + error = ffs_truncate_internal(tvp, (off_t)0, ioflag, vfs_context_ucred(tcnp->cn_context)); } xp->i_flag |= IN_CHANGE; VN_KNOTE(tvp, NOTE_DELETE); - vput(tvp); xp = NULL; } - + if (rl_vp) + vnode_put(rl_vp); + rl_vp = NULL; + /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; - if ((fcnp->cn_flags & SAVESTART) == 0) - panic("ufs_rename: lost from startdir"); + fcnp->cn_flags |= (WANTPARENT | NOCACHE); + (void) relookup(fdvp, &fvp, fcnp); + if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); + rl_vp = fvp; } else { /* * From name has disappeared. */ if (doingdirectory) panic("rename: lost dir entry"); - vrele(ap->a_fvp); + return (0); } /* @@ -1236,7 +987,7 @@ abortit: * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IRENAME flag ensures that it cannot be moved by another + * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (xp != ip) { @@ -1254,8 +1005,8 @@ abortit: dp->i_flag |= IN_CHANGE; error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, - tcnp->cn_cred, (int *)0, (struct proc *)0); + UIO_SYSSPACE32, IO_NODELOCKED, + vfs_context_ucred(tcnp->cn_context), (int *)0, (struct proc *)0); if (error == 0) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (fvp->v_mount->mnt_maxsymlinklen <= 0) @@ -1275,9 +1026,9 @@ abortit: (void) vn_rdwr(UIO_WRITE, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE, + (off_t)0, UIO_SYSSPACE32, IO_NODELOCKED|IO_SYNC, - tcnp->cn_cred, (int *)0, + vfs_context_ucred(tcnp->cn_context), (int *)0, (struct proc *)0); cache_purge(fdvp); } @@ -1291,26 +1042,24 @@ abortit: xp->i_flag &= ~IN_RENAME; } VN_KNOTE(fvp, NOTE_RENAME); - if (dp) - vput(fdvp); - if (xp) - vput(fvp); - vrele(ap->a_fvp); + + if (rl_vp) + vnode_put(rl_vp); + return (error); bad: - if (xp) - vput(ITOV(xp)); - vput(ITOV(dp)); -out: + if (rl_vp) + vnode_put(rl_vp); + if (doingdirectory) ip->i_flag &= ~IN_RENAME; - if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - vput(fvp); - } else - vrele(fvp); + + ip->i_nlink--; + ip->i_flag |= IN_CHANGE; + ip->i_flag &= ~IN_RENAME; + +abortit: return (error); } @@ -1331,15 +1080,16 @@ static struct odirtemplate omastertemplate = { */ int ufs_mkdir(ap) - struct vop_mkdir_args /* { + struct vnop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; + vfs_context_t a_context; } */ *ap; { register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; + register struct vnode_attr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; @@ -1347,10 +1097,17 @@ ufs_mkdir(ap) struct timeval tv; int error, dmode; -#if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_mkdir: no name"); -#endif + /* use relookup to force correct directory hints */ + cnp->cn_flags &= ~MODMASK; + cnp->cn_flags |= (WANTPARENT | NOCACHE); + cnp->cn_nameiop = CREATE; + + (void) relookup(dvp, &tvp, cnp); + + /* get rid of reference relookup returned */ + if (tvp) + vnode_put(tvp); + dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; @@ -1358,37 +1115,35 @@ ufs_mkdir(ap) } dmode = vap->va_mode & 0777; dmode |= IFDIR; + /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ - if (error = VOP_VALLOC(dvp, dmode, cnp->cn_cred, &tvp)) + if ( (error = ffs_valloc(dvp, (mode_t)dmode, vfs_context_ucred(cnp->cn_context), &tvp)) ) goto out; ip = VTOI(tvp); - ip->i_uid = cnp->cn_cred->cr_uid; - ip->i_gid = dp->i_gid; + ip->i_uid = ap->a_vap->va_uid; + ip->i_gid = ap->a_vap->va_gid; + VATTR_SET_SUPPORTED(ap->a_vap, va_mode); + VATTR_SET_SUPPORTED(ap->a_vap, va_uid); + VATTR_SET_SUPPORTED(ap->a_vap, va_gid); #if QUOTA if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, cnp->cn_cred, 0))) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - VOP_VFREE(tvp, ip->i_number, dmode); - vput(tvp); - vput(dvp); + (error = chkiq(ip, 1, vfs_context_ucred(cnp->cn_context), 0))) { + ffs_vfree(tvp, ip->i_number, dmode); + vnode_put(tvp); return (error); } #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; - tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_nlink = 2; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; - tv = time; - error = VOP_UPDATE(tvp, &tv, &tv, 1); + microtime(&tv); + error = ffs_update(tvp, &tv, &tv, 1); /* * Bump link count in parent directory @@ -1398,7 +1153,7 @@ ufs_mkdir(ap) */ dp->i_nlink++; dp->i_flag |= IN_CHANGE; - if (error = VOP_UPDATE(dvp, &tv, &tv, 1)) + if ( (error = ffs_update(dvp, &tv, &tv, 1)) ) goto bad; /* Initialize directory with "." and ".." from static template. */ @@ -1410,14 +1165,14 @@ ufs_mkdir(ap) dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); + sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE32, + IO_NODELOCKED|IO_SYNC, vfs_context_ucred(cnp->cn_context), (int *)0, (struct proc *)0); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_vfsstat.f_bsize) panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ else { ip->i_size = DIRBLKSIZ; @@ -1425,31 +1180,28 @@ ufs_mkdir(ap) } /* Directory set up, now install it's entry in the parent directory. */ - if (error = ufs_direnter(ip, dvp, cnp)) { + if ( (error = ufs_direnter(ip, dvp, cnp)) ) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } bad: /* - * No need to do an explicit VOP_TRUNCATE here, vrele will do this + * No need to do an explicit vnop_truncate here, vnode_put will do it * for us because we set the link count to 0. */ if (error) { ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; - vput(tvp); + /* + * since we're not returning tvp due to the error, + * we're responsible for releasing it here + */ + vnode_put(tvp); } else { VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); *ap->a_vpp = tvp; }; out: - { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - vput(dvp); return (error); } @@ -1458,28 +1210,45 @@ out: */ int ufs_rmdir(ap) - struct vop_rmdir_args /* { + struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; + struct vnode *tvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error, ioflag; + ip = VTOI(vp); dp = VTOI(dvp); /* * No rmdir "." please. */ - if (dp == ip) { - vrele(dvp); - vput(vp); + if (dp == ip) return (EINVAL); - } + + + cnp->cn_flags &= ~MODMASK; + cnp->cn_flags |= (WANTPARENT | NOCACHE); + + (void) relookup(dvp, &tvp, cnp); + + if (tvp == NULL) + return (ENOENT); + if (tvp != vp) + panic("ufs_rmdir: relookup returned a different vp"); + /* + * get rid of reference relookup returned + */ + vnode_put(tvp); + + /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since @@ -1489,27 +1258,21 @@ ufs_rmdir(ap) */ error = 0; if (ip->i_nlink != 2 || - !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { + !ufs_dirempty(ip, dp->i_number, vfs_context_ucred(cnp->cn_context))) { error = ENOTEMPTY; goto out; } - if ((dp->i_flags & APPEND) || (ip->i_flags & (IMMUTABLE | APPEND))) { - error = EPERM; - goto out; - } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ - if (error = ufs_dirremove(dvp, cnp)) + if ( (error = ufs_dirremove(dvp, cnp)) ) goto out; VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); dp->i_nlink--; dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; /* * Truncate inode. The only stuff left * in the directory is "." and "..". The @@ -1523,14 +1286,10 @@ ufs_rmdir(ap) */ ip->i_nlink -= 2; ioflag = ((vp)->v_mount->mnt_flag & MNT_ASYNC) ? 0 : IO_SYNC; - error = VOP_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred, - cnp->cn_proc); + error = ffs_truncate_internal(vp, (off_t)0, ioflag, vfs_context_ucred(cnp->cn_context)); cache_purge(ITOV(ip)); out: - if (dvp) - vput(dvp); VN_KNOTE(vp, NOTE_DELETE); - vput(vp); return (error); } @@ -1539,20 +1298,20 @@ out: */ int ufs_symlink(ap) - struct vop_symlink_args /* { + struct vnop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_attr *a_vap; char *a_target; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp, **vpp = ap->a_vpp; register struct inode *ip; int len, error; - if (error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, - vpp, ap->a_cnp)) + if ( (error = ufs_makeinode(ap->a_vap, ap->a_dvp, vpp, ap->a_cnp)) ) return (error); VN_KNOTE(ap->a_dvp, NOTE_WRITE); vp = *vpp; @@ -1564,9 +1323,8 @@ ufs_symlink(ap) ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0, + UIO_SYSSPACE32, IO_NODELOCKED, vfs_context_ucred(ap->a_cnp->cn_context), (int *)0, (struct proc *)0); - vput(vp); return (error); } @@ -1581,49 +1339,60 @@ ufs_symlink(ap) */ int ufs_readdir(ap) - struct vop_readdir_args /* { + struct vnop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + int a_flags; int *a_eofflag; - int *ncookies; - u_long **a_cookies; + int *a_numdirent; + vfs_context_t a_context; } */ *ap; { - register struct uio *uio = ap->a_uio; + struct uio *uio = ap->a_uio; int error; size_t count, lost; - off_t off = uio->uio_offset; - count = uio->uio_resid; + if (ap->a_flags & VNODE_READDIR_EXTENDED) { + return ufs_readdirext(ap->a_vp, uio, ap->a_eofflag, + ap->a_numdirent, ap->a_context); + } + + // LP64todo - fix this + count = uio_resid(uio); /* Make sure we don't return partial entries. */ count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); if (count <= 0) return (EINVAL); - lost = uio->uio_resid - count; - uio->uio_resid = count; - uio->uio_iov->iov_len = count; + // LP64todo - fix this + lost = uio_resid(uio) - count; + uio_setresid(uio, count); + uio_iov_len_set(uio, count); # if (BYTE_ORDER == LITTLE_ENDIAN) if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { - error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); + error = ffs_read_internal(ap->a_vp, uio, 0); } else { struct dirent *dp, *edp; struct uio auio; - struct iovec aiov; + struct iovec_32 aiov; caddr_t dirbuf; int readcnt; u_char tmp; auio = *uio; - auio.uio_iov = &aiov; + auio.uio_iovs.iov32p = &aiov; auio.uio_iovcnt = 1; +#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ auio.uio_segflg = UIO_SYSSPACE; +#else + auio.uio_segflg = UIO_SYSSPACE32; +#endif aiov.iov_len = count; MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); - aiov.iov_base = dirbuf; - error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); + aiov.iov_base = (uintptr_t)dirbuf; + error = ffs_read_internal(ap->a_vp, &auio, 0); if (error == 0) { - readcnt = count - auio.uio_resid; + // LP64todo - fix this + readcnt = count - uio_resid(&auio); edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { tmp = dp->d_namlen; @@ -1643,56 +1412,137 @@ ufs_readdir(ap) FREE(dirbuf, M_TEMP); } # else - error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); + error = ffs_read_internal(ap->a_vp, uio, 0); # endif - if (!error && ap->a_ncookies != NULL) { - struct dirent* dpStart; - struct dirent* dpEnd; - struct dirent* dp; - int ncookies; - u_long *cookies; - u_long *cookiep; - /* - * Only the NFS server uses cookies, and it loads the - * directory block into system space, so we can just look at - * it directly. - */ - if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) - panic("ufs_readdir: unexpected uio from NFS server"); - dpStart = (struct dirent *) - (uio->uio_iov->iov_base - (uio->uio_offset - off)); - dpEnd = (struct dirent *) uio->uio_iov->iov_base; - for (dp = dpStart, ncookies = 0; - dp < dpEnd && dp->d_reclen != 0; - dp = (struct dirent *)((caddr_t)dp + dp->d_reclen)) - ncookies++; - MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, - M_WAITOK); - for (dp = dpStart, cookiep = cookies; - dp < dpEnd; - dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { - off += dp->d_reclen; - *cookiep++ = (u_long) off; - } - *ap->a_ncookies = ncookies; - *ap->a_cookies = cookies; + uio_setresid(uio, (uio_resid(uio) + lost)); + if (ap->a_eofflag) + *ap->a_eofflag = (off_t)VTOI(ap->a_vp)->i_size <= uio->uio_offset; + return (error); +} + + +/* + * ufs_readdirext reads directory entries into the buffer pointed + * to by uio, in a filesystem independent format. Up to uio_resid + * bytes of data can be transferred. The data in the buffer is a + * series of packed direntry structures where each one contains the + * following entries: + * + * d_reclen: length of record + * d_ino: file number of entry + * d_seekoff: seek offset (used by NFS server, aka cookie) + * d_type: file type + * d_namlen: length of string in d_name + * d_name: null terminated file name + * + * The current position (uio_offset) refers to the next block of + * entries. The offset will only be set to a value previously + * returned by ufs_readdirext or zero. This offset does not have + * to match the number of bytes returned (in uio_resid). + */ +#define EXT_DIRENT_LEN(namlen) \ + ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 3) & ~3) + +static int +ufs_readdirext(vnode_t vp, uio_t uio, int *eofflag, int *numdirent, + __unused vfs_context_t context) +{ + int error; + size_t count, lost; + off_t off = uio->uio_offset; + struct dirent *dp, *edp; + struct uio auio; + struct iovec_32 aiov; + caddr_t dirbuf; + struct direntry *xdp; + int nentries = 0; + + // LP64todo - fix this + count = uio_resid(uio); + /* Make sure we don't return partial entries. */ + count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); + if (count <= 0) + return (EINVAL); + // LP64todo - fix this + lost = uio_resid(uio) - count; + uio_setresid(uio, count); + uio_iov_len_set(uio, count); + + auio = *uio; + auio.uio_iovs.iov32p = &aiov; + auio.uio_iovcnt = 1; + /* LP64todo - can't use new segment flags until the drivers are ready */ + auio.uio_segflg = UIO_SYSSPACE; + aiov.iov_len = count; + MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); + aiov.iov_base = (uintptr_t)dirbuf; + + MALLOC(xdp, struct direntry *, sizeof(struct direntry), M_TEMP, M_WAITOK); + + error = ffs_read_internal(vp, &auio, 0); + if (error) + goto out; + + // LP64todo - fix this + edp = (struct dirent *)&dirbuf[count - uio_resid(&auio)]; + for (dp = (struct dirent *)dirbuf; dp < edp; ) { + +#if (BYTE_ORDER == LITTLE_ENDIAN) + u_char tmp; + + tmp = dp->d_namlen; + dp->d_namlen = dp->d_type; + dp->d_type = tmp; +#endif + xdp->d_reclen = EXT_DIRENT_LEN(dp->d_namlen); + if (xdp->d_reclen > uio_resid(uio)) { + break; /* user buffer is full */ + } + xdp->d_ino = dp->d_ino; + xdp->d_namlen = dp->d_namlen; + xdp->d_type = dp->d_type; + bcopy(dp->d_name, xdp->d_name, dp->d_namlen + 1); + off += dp->d_reclen; + xdp->d_seekoff = off; + error = uiomove((caddr_t)xdp, xdp->d_reclen, uio); + if (error) { + off -= dp->d_reclen; + break; /* unexpected this error is */ + } + nentries++; + + if (dp->d_reclen > 0) { + dp = (struct dirent *) + ((char *)dp + dp->d_reclen); + } else { + error = EIO; + break; + } } - uio->uio_resid += lost; - if (ap->a_eofflag) - *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; +out: + FREE(dirbuf, M_TEMP); + FREE(xdp, M_TEMP); + + /* Use the on-disk dirent offset */ + uio_setoffset(uio, off); + *numdirent = nentries; + uio_setresid(uio, (uio_resid(uio) + lost)); + if (eofflag) + *eofflag = (off_t)VTOI(vp)->i_size <= uio->uio_offset; return (error); } + /* * Return target name of a symbolic link */ int ufs_readlink(ap) - struct vop_readlink_args /* { + struct vnop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { register struct vnode *vp = ap->a_vp; @@ -1704,149 +1554,23 @@ ufs_readlink(ap) uiomove((char *)ip->i_shortlink, isize, ap->a_uio); return (0); } - return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); -} - -/* - * Lock an inode. If its already locked, set the WANT bit and sleep. - */ -int -ufs_lock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - if (VTOI(vp) == (struct inode *)NULL) - panic("inode in vnode is null\n"); - return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock, - ap->a_p)); -} - -/* - * Unlock an inode. - */ -int -ufs_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE, - &vp->v_interlock, ap->a_p)); + return (ffs_read_internal(vp, ap->a_uio, 0)); } /* - * Check for a locked inode. + * prepare and issue the I/O */ -int -ufs_islocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - - return (lockstatus(&VTOI(ap->a_vp)->i_lock)); -} - -/* - * Calculate the logical to physical mapping if not done already, - * then call the device strategy routine. - */ -int +errno_t ufs_strategy(ap) - struct vop_strategy_args /* { + struct vnop_strategy_args /* { struct buf *a_bp; } */ *ap; { - register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; - register struct inode *ip; - int error; - - ip = VTOI(vp); - if ( !(bp->b_flags & B_VECTORLIST)) { - if (vp->v_type == VBLK || vp->v_type == VCHR) - panic("ufs_strategy: spec"); - - - if (bp->b_flags & B_PAGELIST) { - /* - * if we have a page list associated with this bp, - * then go through cluste_bp since it knows how to - * deal with a page request that might span non-contiguous - * physical blocks on the disk... - */ -#if 1 - if (bp->b_blkno == bp->b_lblkno) { - if (error = VOP_BMAP(vp, bp->b_lblkno, NULL, - &bp->b_blkno, NULL)) { - bp->b_error = error; - bp->b_flags |= B_ERROR; - biodone(bp); - return (error); - } - } -#endif /* 1 */ - error = cluster_bp(bp); - vp = ip->i_devvp; - bp->b_dev = vp->v_rdev; - - return (error); - } - - if (bp->b_blkno == bp->b_lblkno) { - if (error = - VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) { - bp->b_error = error; - bp->b_flags |= B_ERROR; - biodone(bp); - return (error); - } - if ((long)bp->b_blkno == -1) - clrbuf(bp); - } - if ((long)bp->b_blkno == -1) { - biodone(bp); - return (0); - } - - } - - vp = ip->i_devvp; - bp->b_dev = vp->v_rdev; - VOCALL (vp->v_op, VOFFSET(vop_strategy), ap); - return (0); -} - -/* - * Print out the contents of an inode. - */ -int -ufs_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + buf_t bp = ap->a_bp; + vnode_t vp = buf_vnode(bp); + struct inode *ip = VTOI(vp); - printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number, - major(ip->i_dev), minor(ip->i_dev)); -#if FIFO - if (vp->v_type == VFIFO) - fifo_printinfo(vp); -#endif /* FIFO */ - lockmgr_printinfo(&ip->i_lock); - printf("\n"); - return (0); + return (buf_strategy(ip->i_devvp, ap)); } /* @@ -1854,11 +1578,11 @@ ufs_print(ap) */ int ufsspec_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { @@ -1866,27 +1590,27 @@ ufsspec_read(ap) * Set access flag. */ VTOI(ap->a_vp)->i_flag |= IN_ACCESS; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap)); } /* * Write wrapper for special devices. */ int -ufsspec_write(ap) - struct vop_write_args /* { +ufsspec_write( + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; - } */ *ap; + kauth_cred_t a_cred; + } */ *ap) { /* * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -1896,21 +1620,21 @@ ufsspec_write(ap) */ int ufsspec_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); + struct timeval tv; - simple_lock(&vp->v_interlock); - if (ap->a_vp->v_usecount > 1) - ITIMES(ip, &time, &time); - simple_unlock(&vp->v_interlock); - return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); + if (ap->a_vp->v_usecount > 1) { + microtime(&tv); + ITIMES(ip, &tv, &tv); + } + return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap)); } #if FIFO @@ -1919,11 +1643,11 @@ ufsspec_close(ap) */ int ufsfifo_read(ap) - struct vop_read_args /* { + struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */ *ap; { extern int (**fifo_vnodeop_p)(void *); @@ -1932,20 +1656,20 @@ ufsfifo_read(ap) * Set access flag. */ VTOI(ap->a_vp)->i_flag |= IN_ACCESS; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_read), ap)); } /* * Write wrapper for fifo's. */ int -ufsfifo_write(ap) - struct vop_write_args /* { +ufsfifo_write( + struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; - } */ *ap; + kauth_cred_t a_cred; + } */ *ap) { extern int (**fifo_vnodeop_p)(void *); @@ -1953,7 +1677,7 @@ ufsfifo_write(ap) * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_write), ap)); } /* @@ -1961,23 +1685,24 @@ ufsfifo_write(ap) * * Update the times on the inode then do device close. */ +int ufsfifo_close(ap) - struct vop_close_args /* { + struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */ *ap; { extern int (**fifo_vnodeop_p)(void *); struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); + struct timeval tv; - simple_lock(&vp->v_interlock); - if (ap->a_vp->v_usecount > 1) - ITIMES(ip, &time, &time); - simple_unlock(&vp->v_interlock); - return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); + if (ap->a_vp->v_usecount > 1) { + microtime(&tv); + ITIMES(ip, &tv, &tv); + } + return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } /* @@ -1987,12 +1712,12 @@ ufsfifo_close(ap) */ int ufsfifo_kqfilt_add(ap) - struct vop_kqfilt_add_args *ap; + struct vnop_kqfilt_add_args *ap; { extern int (**fifo_vnodeop_p)(void *); int error; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_add), ap); + error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap); if (error) error = ufs_kqfilt_add(ap); return (error); @@ -2006,12 +1731,12 @@ ufsfifo_kqfilt_add(ap) */ int ufsfifo_kqfilt_remove(ap) - struct vop_kqfilt_remove_args *ap; + struct vnop_kqfilt_remove_args *ap; { extern int (**fifo_vnodeop_p)(void *); int error; - error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_kqfilt_remove), ap); + error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap); if (error) error = ufs_kqfilt_remove(ap); return (error); @@ -2032,17 +1757,17 @@ static struct filterops ufsvnode_filtops = # #% kqfilt_add vp L L L # - vop_kqfilt_add + vnop_kqfilt_add IN struct vnode *vp; IN struct knote *kn; - IN struct proc *p; + IN vfs_context_t context; */ int ufs_kqfilt_add(ap) - struct vop_kqfilt_add_args /* { + struct vnop_kqfilt_add_args /* { struct vnode *a_vp; struct knote *a_kn; - struct proc *p; + vfs_context_t a_context; } */ *ap; { struct vnode *vp = ap->a_vp; @@ -2063,6 +1788,7 @@ ufs_kqfilt_add(ap) } kn->kn_hook = (caddr_t)vp; + kn->kn_hookid = vnode_vid(vp); KNOTE_ATTACH(&VTOI(vp)->i_knotes, kn); @@ -2077,75 +1803,108 @@ filt_ufsdetach(struct knote *kn) struct proc *p = current_proc(); vp = (struct vnode *)kn->kn_hook; - if (1) { /* ! KNDETACH_VNLOCKED */ - result = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (result) return; - }; + + if (vnode_getwithvid(vp, kn->kn_hookid)) + return; result = KNOTE_DETACH(&VTOI(vp)->i_knotes, kn); - - if (1) { /* ! KNDETACH_VNLOCKED */ - VOP_UNLOCK(vp, 0, p); - }; + vnode_put(vp); } -/*ARGSUSED*/ static int filt_ufsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; - struct inode *ip = VTOI(vp); + struct inode *ip; + int dropvp = 0; + int result; - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ + if (hint == 0) { + if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + dropvp = 1; + } if (hint == NOTE_REVOKE) { + /* + * filesystem is gone, so set the EOF flag and schedule + * the knote for deletion. + */ kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } - kn->kn_data = ip->i_size - kn->kn_fp->f_offset; - return (kn->kn_data != 0); + /* poll(2) semantics dictate always returning true */ + if (kn->kn_flags & EV_POLL) { + kn->kn_data = 1; + result = 1; + } else { + ip = VTOI(vp); + kn->kn_data = ip->i_size - kn->kn_fp->f_fglob->fg_offset; + result = (kn->kn_data != 0); + } + + if (dropvp) + vnode_put(vp); + + return (result); } -/*ARGSUSED*/ static int filt_ufswrite(struct knote *kn, long hint) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - if (hint == NOTE_REVOKE) + int dropvp = 0; + + if (hint == 0) { + if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + vnode_put(kn->kn_hook); + } + if (hint == NOTE_REVOKE) { + /* + * filesystem is gone, so set the EOF flag and schedule + * the knote for deletion. + */ + kn->kn_data = 0; kn->kn_flags |= (EV_EOF | EV_ONESHOT); - - kn->kn_data = 0; - return (1); + return (1); + } + kn->kn_data = 0; + return (1); } static int filt_ufsvnode(struct knote *kn, long hint) { + if (hint == 0) { + if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else + vnode_put(kn->kn_hook); + } if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; - if (hint == NOTE_REVOKE) { - kn->kn_flags |= EV_EOF; + if ((hint == NOTE_REVOKE)) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } + return (kn->kn_fflags != 0); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ +int ufs_pathconf(ap) - struct vop_pathconf_args /* { + struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; + vfs_context_t a_context; } */ *ap; { @@ -2174,172 +1933,12 @@ ufs_pathconf(ap) /* NOTREACHED */ } -/* - * Advisory record locking support - */ -int -ufs_advlock(ap) - struct vop_advlock_args /* { - struct vnode *a_vp; - caddr_t a_id; - int a_op; - struct flock *a_fl; - int a_flags; - } */ *ap; -{ - register struct inode *ip = VTOI(ap->a_vp); - register struct flock *fl = ap->a_fl; - register struct lockf *lock; - off_t start, end; - int error; - - /* - * Avoid the common case of unlocking when inode has no locks. - */ - if (ip->i_lockf == (struct lockf *)0) { - if (ap->a_op != F_SETLK) { - fl->l_type = F_UNLCK; - return (0); - } - } - /* - * Convert the flock structure into a start and end. - */ - switch (fl->l_whence) { - - case SEEK_SET: - case SEEK_CUR: - /* - * Caller is responsible for adding any necessary offset - * when SEEK_CUR is used. - */ - start = fl->l_start; - break; - - case SEEK_END: - start = ip->i_size + fl->l_start; - break; - - default: - return (EINVAL); - } - if (fl->l_len == 0) - end = -1; - else if (fl->l_len > 0) - end = start + fl->l_len - 1; - else { /* l_len is negative */ - end = start - 1; - start += fl->l_len; - } - if (start < 0) - return (EINVAL); - /* - * Create the lockf structure - */ - MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK); - lock->lf_start = start; - lock->lf_end = end; - lock->lf_id = ap->a_id; - lock->lf_inode = ip; - lock->lf_type = fl->l_type; - lock->lf_next = (struct lockf *)0; - TAILQ_INIT(&lock->lf_blkhd); - lock->lf_flags = ap->a_flags; - /* - * Do the requested operation. - */ - switch(ap->a_op) { - case F_SETLK: - return (lf_setlock(lock)); - - case F_UNLCK: - error = lf_clearlock(lock); - FREE(lock, M_LOCKF); - return (error); - - case F_GETLK: - error = lf_getlock(lock, fl); - FREE(lock, M_LOCKF); - return (error); - - default: - _FREE(lock, M_LOCKF); - return (EINVAL); - } - /* NOTREACHED */ -} - -/* - * Initialize the vnode associated with a new inode, handle aliased - * vnodes. - */ -int -ufs_vinit(mntp, specops, fifoops, vpp) - struct mount *mntp; - int (**specops)(); - int (**fifoops)(); - struct vnode **vpp; -{ - struct proc *p = current_proc(); /* XXX */ - struct inode *ip; - struct vnode *vp, *nvp; - - vp = *vpp; - ip = VTOI(vp); - switch(vp->v_type = IFTOVT(ip->i_mode)) { - case VCHR: - case VBLK: - vp->v_op = specops; - if (nvp = checkalias(vp, ip->i_rdev, mntp)) { - /* - * Discard unneeded vnode, but save its inode. - * Note that the lock is carried over in the inode - * to the replacement vnode. - */ - nvp->v_data = vp->v_data; - vp->v_data = NULL; - vp->v_op = spec_vnodeop_p; - vrele(vp); - vgone(vp); - /* - * Reinitialize aliased inode. - */ - vp = nvp; - ip->i_vnode = vp; - } - break; - case VFIFO: -#if FIFO - vp->v_op = fifoops; - break; -#else - return (EOPNOTSUPP); -#endif - case VREG: -#if 0 - ubc_info_init(vp); -#endif /* 0 */ - break; - default: - break; - } - if (ip->i_number == ROOTINO) - vp->v_flag |= VROOT; - /* - * Initialize modrev times - */ - SETHIGH(ip->i_modrev, time.tv_sec); - SETLOW(ip->i_modrev, time.tv_usec * 4294); - *vpp = vp; - return (0); -} - /* * Allocate a new inode. */ int -ufs_makeinode(mode, dvp, vpp, cnp) - int mode; +ufs_makeinode(vap, dvp, vpp, cnp) + struct vnode_attr *vap; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; @@ -2348,79 +1947,47 @@ ufs_makeinode(mode, dvp, vpp, cnp) struct timeval tv; struct vnode *tvp; int error; - + int is_member; + int mode; + + mode = MAKEIMODE(vap->va_type, vap->va_mode); pdir = VTOI(dvp); -#if DIAGNOSTIC - if ((cnp->cn_flags & HASBUF) == 0) - panic("ufs_makeinode: no name"); -#endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; - if (error = VOP_VALLOC(dvp, mode, cnp->cn_cred, &tvp)) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - vput(dvp); + if ( (error = ffs_valloc(dvp, (mode_t)mode, vfs_context_ucred(cnp->cn_context), &tvp)) ) return (error); - } + ip = VTOI(tvp); - ip->i_gid = pdir->i_gid; - if ((mode & IFMT) == IFLNK) - ip->i_uid = pdir->i_uid; - else - ip->i_uid = cnp->cn_cred->cr_uid; + ip->i_gid = vap->va_gid; + ip->i_uid = vap->va_uid; + VATTR_SET_SUPPORTED(vap, va_mode); + VATTR_SET_SUPPORTED(vap, va_uid); + VATTR_SET_SUPPORTED(vap, va_gid); #if QUOTA if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, cnp->cn_cred, 0))) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - VOP_VFREE(tvp, ip->i_number, mode); - vput(tvp); - vput(dvp); + (error = chkiq(ip, 1, vfs_context_ucred(cnp->cn_context), 0))) { + ffs_vfree(tvp, ip->i_number, mode); + vnode_put(tvp); return (error); } #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; - tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_nlink = 1; - if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && - suser(cnp->cn_cred, NULL)) - ip->i_mode &= ~ISGID; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; - /* - * initialize UBC before calling VOP_UPDATE and ufs_direnter - * Not doing so introduces probelms in handling error from - * those calls. - * It results in a "vget: stolen ubc_info" panic due to attempt - * to shutdown uninitialized UBC. - */ - if (UBCINFOMISSING(tvp) || UBCINFORECLAIMED(tvp)) - ubc_info_init(tvp); - /* * Make sure inode goes to disk before directory entry. */ - tv = time; - if (error = VOP_UPDATE(tvp, &tv, &tv, 1)) + microtime(&tv); + if ( (error = ffs_update(tvp, &tv, &tv, 1)) ) goto bad; - if (error = ufs_direnter(ip, dvp, cnp)) + if ( (error = ufs_direnter(ip, dvp, cnp)) ) goto bad; - if ((cnp->cn_flags & SAVESTART) == 0) { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - vput(dvp); *vpp = tvp; return (0); @@ -2430,16 +1997,10 @@ bad: * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ - { - char *tmp = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, cnp->cn_pnlen, M_NAMEI); - } - vput(dvp); ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; - vput(tvp); + vnode_put(tvp); + return (error); } diff --git a/bsd/ufs/ufs/ufsmount.h b/bsd/ufs/ufs/ufsmount.h index a54746310..79a073abd 100644 --- a/bsd/ufs/ufs/ufsmount.h +++ b/bsd/ufs/ufs/ufsmount.h @@ -67,7 +67,6 @@ */ struct ufs_args { char *fspec; /* block special device to mount */ - struct export_args export; /* network export information */ }; #endif /* __APPLE_API_UNSTABLE */ @@ -78,7 +77,6 @@ struct ufs_args { */ struct mfs_args { char *fspec; /* name to export for statfs */ - struct export_args export; /* if exported MFSes are supported */ caddr_t base; /* base of file system in memory */ u_long size; /* size of file system */ }; @@ -90,7 +88,6 @@ struct mfs_args { struct fs; struct mount; struct vnode; -struct netexport; /* This structure describes the UFS specific mount structure data. */ struct ufsmount { @@ -107,7 +104,6 @@ struct ufsmount { u_long um_nindir; /* indirect ptrs per block */ u_long um_bptrtodb; /* indir ptr to disk block */ u_long um_seqinc; /* inc between seq blocks */ - struct netexport um_export; /* export information */ int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */ }; diff --git a/bsd/uuid/Makefile b/bsd/uuid/Makefile new file mode 100644 index 000000000..8d5af9310 --- /dev/null +++ b/bsd/uuid/Makefile @@ -0,0 +1,60 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = \ + +INSTINC_SUBDIRS_PPC = \ + +INSTINC_SUBDIRS_I386 = \ + +EXPINC_SUBDIRS = \ + +EXPINC_SUBDIRS_PPC = \ + +EXPINC_SUBDIRS_I386 = \ + +# In both the framework PrivateHeader area and /usr/include/uuid +DATAFILES = \ + uuid.h + +# Only in the framework PrivateHeader area +PRIVATE_DATAFILES = \ + +# KERNELFILES will appear only in the kernel framework +KERNELFILES = \ + uuid.h + + +# Only in the private kernel framework +PRIVATE_KERNELFILES = \ + + +INSTALL_MI_LIST = ${DATAFILES} + +INSTALL_MI_DIR = uuid + +EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} + +EXPORT_MI_DIR = uuid + +# /System/Library/Frameworks/System.framework/PrivateHeaders +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + +# /System/Library/Frameworks/Kernel.framework/PrivateHeaders + +INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} + +# /System/Library/Frameworks/Kernel.framework/Headers + +INSTALL_KF_MI_LIST = ${KERNELFILES} + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/bsd/uuid/uuid.h b/bsd/uuid/uuid.h new file mode 100644 index 000000000..3d172d2f6 --- /dev/null +++ b/bsd/uuid/uuid.h @@ -0,0 +1,74 @@ +/* + * Public include file for the UUID library + * + * Copyright (C) 1996, 1997, 1998 Theodore Ts'o. + * + * %Begin-Header% + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * %End-Header% + */ + +#ifndef _UUID_UUID_H +#define _UUID_UUID_H + +#include + +#ifndef _UUID_T +#define _UUID_T +typedef __darwin_uuid_t uuid_t; +#endif /* _UUID_T */ + +#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \ + static const uuid_t name __attribute__ ((unused)) = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15} + +#ifdef __cplusplus +extern "C" { +#endif + +void uuid_clear(uuid_t uu); + +int uuid_compare(const uuid_t uu1, const uuid_t uu2); + +void uuid_copy(uuid_t dst, const uuid_t src); + +void uuid_generate(uuid_t out); +void uuid_generate_random(uuid_t out); +void uuid_generate_time(uuid_t out); + +int uuid_is_null(const uuid_t uu); + +int uuid_parse(const char *in, uuid_t uu); + +void uuid_unparse(const uuid_t uu, char *out); +void uuid_unparse_lower(const uuid_t uu, char *out); +void uuid_unparse_upper(const uuid_t uu, char *out); + +#ifdef __cplusplus +} +#endif + +#endif /* _UUID_UUID_H */ diff --git a/bsd/uxkern/ux_exception.c b/bsd/uxkern/ux_exception.c index 655634a46..e576ad299 100644 --- a/bsd/uxkern/ux_exception.c +++ b/bsd/uxkern/ux_exception.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,11 +39,12 @@ #include #include #include +#include #include +#include #include #include #include -#include #include #include @@ -51,13 +52,35 @@ #include #include +#include /* get_task_ipcspace() */ + +/* + * XXX Things that should be retrieved from Mach headers, but aren't + */ +struct ipc_object; +extern kern_return_t ipc_object_copyin(ipc_space_t space, mach_port_name_t name, + mach_msg_type_name_t msgt_name, struct ipc_object **objectp); +extern mach_msg_return_t mach_msg_receive(mach_msg_header_t *msg, + mach_msg_option_t option, mach_msg_size_t rcv_size, + mach_port_name_t rcv_name, mach_msg_timeout_t rcv_timeout, + void (*continuation)(mach_msg_return_t), + mach_msg_size_t slist_size); +extern mach_msg_return_t mach_msg_send(mach_msg_header_t *msg, + mach_msg_option_t option, mach_msg_size_t send_size, + mach_msg_timeout_t send_timeout, mach_port_name_t notify); +extern thread_t convert_port_to_thread(ipc_port_t port); +extern void ipc_port_release(ipc_port_t); + + + + /* * Unix exception handler. */ -static void ux_exception(); +static void ux_exception(int exception, int code, int subcode, + int *ux_signal, int *ux_code); -decl_simple_lock_data(static, ux_handler_init_lock) mach_port_name_t ux_exception_port; static task_t ux_handler_self; @@ -154,37 +177,33 @@ ux_handler(void) void ux_handler_init(void) { - simple_lock_init(&ux_handler_init_lock); ux_exception_port = MACH_PORT_NULL; (void) kernel_thread(kernel_task, ux_handler); - simple_lock(&ux_handler_init_lock); if (ux_exception_port == MACH_PORT_NULL) { - simple_unlock(&ux_handler_init_lock); assert_wait(&ux_exception_port, THREAD_UNINT); thread_block(THREAD_CONTINUE_NULL); } - else - simple_unlock(&ux_handler_init_lock); } kern_return_t catch_exception_raise( - mach_port_name_t exception_port, - mach_port_name_t thread_name, - mach_port_name_t task_name, - int exception, - exception_data_t code, - mach_msg_type_number_t codecnt + __unused mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + exception_data_t code, + __unused mach_msg_type_number_t codeCnt ) { task_t self = current_task(); - thread_act_t th_act; + thread_t th_act; ipc_port_t thread_port; - ipc_port_t task_port; kern_return_t result = MACH_MSG_SUCCESS; - int signal = 0; + int ux_signal = 0; u_long ucode = 0; struct uthread *ut; + mach_port_name_t thread_name = (mach_port_name_t)thread; /* XXX */ + mach_port_name_t task_name = (mach_port_name_t)task; /* XXX */ /* * Convert local thread name to global port. @@ -194,31 +213,31 @@ catch_exception_raise( MACH_MSG_TYPE_PORT_SEND, (void *) &thread_port) == MACH_MSG_SUCCESS)) { if (IPC_PORT_VALID(thread_port)) { - th_act = (thread_act_t)convert_port_to_act(thread_port); + th_act = convert_port_to_thread(thread_port); ipc_port_release(thread_port); } else { - th_act = THR_ACT_NULL; + th_act = THREAD_NULL; } /* * Catch bogus ports */ - if (th_act != THR_ACT_NULL) { + if (th_act != THREAD_NULL) { /* * Convert exception to unix signal and code. */ ut = get_bsdthread_info(th_act); ux_exception(exception, code[0], code[1], - &signal, &ucode); + &ux_signal, (int *)&ucode); /* * Send signal. */ - if (signal != 0) - threadsignal(th_act, signal, ucode); + if (ux_signal != 0) + threadsignal(th_act, ux_signal, ucode); - act_deallocate(th_act); + thread_deallocate(th_act); } else result = KERN_INVALID_ARGUMENT; @@ -230,23 +249,43 @@ catch_exception_raise( * Delete our send rights to the task and thread ports. */ (void)mach_port_deallocate(get_task_ipcspace(ux_handler_self), task_name); - (void)mach_port_deallocate(get_task_ipcspace(ux_handler_self),thread_name); + (void)mach_port_deallocate(get_task_ipcspace(ux_handler_self), thread_name); return (result); } + kern_return_t -catch_exception_raise_state(mach_port_name_t exception_port, int exception, exception_data_t code, mach_msg_type_number_t codeCnt, int flavor, thread_state_t old_state, int old_stateCnt, thread_state_t new_state, int new_stateCnt) +catch_exception_raise_state( + __unused mach_port_t exception_port, + __unused exception_type_t exception, + __unused const exception_data_t code, + __unused mach_msg_type_number_t codeCnt, + __unused int *flavor, + __unused const thread_state_t old_state, + __unused mach_msg_type_number_t old_stateCnt, + __unused thread_state_t new_state, + __unused mach_msg_type_number_t *new_stateCnt) { return(KERN_INVALID_ARGUMENT); } + kern_return_t -catch_exception_raise_state_identity(mach_port_name_t exception_port, mach_port_t thread, mach_port_t task, int exception, exception_data_t code, mach_msg_type_number_t codeCnt, int flavor, thread_state_t old_state, int old_stateCnt, thread_state_t new_state, int new_stateCnt) +catch_exception_raise_state_identity( + __unused mach_port_t exception_port, + __unused mach_port_t thread, + __unused mach_port_t task, + __unused exception_type_t exception, + __unused exception_data_t code, + __unused mach_msg_type_number_t codeCnt, + __unused int *flavor, + __unused thread_state_t old_state, + __unused mach_msg_type_number_t old_stateCnt, + __unused thread_state_t new_state, + __unused mach_msg_type_number_t *new_stateCnt) { return(KERN_INVALID_ARGUMENT); } -boolean_t machine_exception(); - /* * ux_exception translates a mach exception, code and subcode to * a signal and u.u_code. Calls machine_exception (machine dependent) diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c new file mode 100644 index 000000000..7f72472a9 --- /dev/null +++ b/bsd/vfs/kpi_vfs.c @@ -0,0 +1,4626 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kpi_vfs.c + */ + +/* + * External virtual filesystem routines + */ + +#undef DIAGNOSTIC +#define DIAGNOSTIC 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#define ESUCCESS 0 +#undef mount_t +#undef vnode_t + +#define COMPAT_ONLY + + +#define THREAD_SAFE_FS(VP) \ + ((VP)->v_unsafefs ? 0 : 1) + +#define NATIVE_XATTR(VP) \ + ((VP)->v_mount ? (VP)->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR : 0) + +static void xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, + int thread_safe, int force); +static void xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, + vfs_context_t context, int thread_safe); + + +static void +vnode_setneedinactive(vnode_t vp) +{ + cache_purge(vp); + + vnode_lock(vp); + vp->v_lflag |= VL_NEEDINACTIVE; + vnode_unlock(vp); +} + + +int +lock_fsnode(vnode_t vp, int *funnel_state) +{ + if (funnel_state) + *funnel_state = thread_funnel_set(kernel_flock, TRUE); + + if (vp->v_unsafefs) { + if (vp->v_unsafefs->fsnodeowner == current_thread()) { + vp->v_unsafefs->fsnode_count++; + } else { + lck_mtx_lock(&vp->v_unsafefs->fsnodelock); + + if (vp->v_lflag & (VL_TERMWANT | VL_TERMINATE | VL_DEAD)) { + lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); + + if (funnel_state) + (void) thread_funnel_set(kernel_flock, *funnel_state); + return (ENOENT); + } + vp->v_unsafefs->fsnodeowner = current_thread(); + vp->v_unsafefs->fsnode_count = 1; + } + } + return (0); +} + + +void +unlock_fsnode(vnode_t vp, int *funnel_state) +{ + if (vp->v_unsafefs) { + if (--vp->v_unsafefs->fsnode_count == 0) { + vp->v_unsafefs->fsnodeowner = NULL; + lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); + } + } + if (funnel_state) + (void) thread_funnel_set(kernel_flock, *funnel_state); +} + + + +/* ====================================================================== */ +/* ************ EXTERNAL KERNEL APIS ********************************** */ +/* ====================================================================== */ + +/* + * prototypes for exported VFS operations + */ +int +VFS_MOUNT(struct mount * mp, vnode_t devvp, user_addr_t data, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_mount == 0)) + return(ENOTSUP); + + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + + if (vfs_context_is64bit(context)) { + if (vfs_64bitready(mp)) { + error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, context); + } + else { + error = ENOTSUP; + } + } + else { + error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, context); + } + + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (error); +} + +int +VFS_START(struct mount * mp, int flags, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_start == 0)) + return(ENOTSUP); + + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_start)(mp, flags, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (error); +} + +int +VFS_UNMOUNT(struct mount *mp, int flags, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_unmount == 0)) + return(ENOTSUP); + + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_unmount)(mp, flags, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (error); +} + +int +VFS_ROOT(struct mount * mp, struct vnode ** vpp, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_root == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_root)(mp, vpp, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (error); +} + +int +VFS_QUOTACTL(struct mount *mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_quotactl == 0)) + return(ENOTSUP); + + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (error); +} + +int +VFS_GETATTR(struct mount *mp, struct vfs_attr *vfa, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_getattr == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_getattr)(mp, vfa, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + +int +VFS_SETATTR(struct mount *mp, struct vfs_attr *vfa, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_setattr == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_setattr)(mp, vfa, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + +int +VFS_SYNC(struct mount *mp, int flags, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_sync == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_sync)(mp, flags, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + +int +VFS_VGET(struct mount * mp, ino64_t ino, struct vnode **vpp, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + +int +VFS_FHTOVP(struct mount * mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((mp == dead_mountp) || (mp->mnt_op->vfs_fhtovp == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = mp->mnt_vtable->vfc_threadsafe; + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + +int +VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t context) +{ + int error; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if ((vp->v_mount == dead_mountp) || (vp->v_mount->mnt_op->vfs_vptofh == 0)) + return(ENOTSUP); + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, context); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return(error); +} + + +/* returns a copy of vfs type name for the mount_t */ +void +vfs_name(mount_t mp, char * buffer) +{ + strncpy(buffer, mp->mnt_vtable->vfc_name, MFSNAMELEN); +} + +/* returns vfs type number for the mount_t */ +int +vfs_typenum(mount_t mp) +{ + return(mp->mnt_vtable->vfc_typenum); +} + + +/* returns command modifier flags of mount_t ie. MNT_CMDFLAGS */ +uint64_t +vfs_flags(mount_t mp) +{ + return((uint64_t)(mp->mnt_flag & (MNT_CMDFLAGS | MNT_VISFLAGMASK))); +} + +/* set any of the command modifier flags(MNT_CMDFLAGS) in mount_t */ +void +vfs_setflags(mount_t mp, uint64_t flags) +{ + uint32_t lflags = (uint32_t)(flags & (MNT_CMDFLAGS | MNT_VISFLAGMASK)); + + mp->mnt_flag |= lflags; +} + +/* clear any of the command modifier flags(MNT_CMDFLAGS) in mount_t */ +void +vfs_clearflags(mount_t mp , uint64_t flags) +{ + uint32_t lflags = (uint32_t)(flags & (MNT_CMDFLAGS | MNT_VISFLAGMASK)); + + mp->mnt_flag &= ~lflags; +} + +/* Is the mount_t ronly and upgrade read/write requested? */ +int +vfs_iswriteupgrade(mount_t mp) /* ronly && MNTK_WANTRDWR */ +{ + return ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)); +} + + +/* Is the mount_t mounted ronly */ +int +vfs_isrdonly(mount_t mp) +{ + return (mp->mnt_flag & MNT_RDONLY); +} + +/* Is the mount_t mounted for filesystem synchronous writes? */ +int +vfs_issynchronous(mount_t mp) +{ + return (mp->mnt_flag & MNT_SYNCHRONOUS); +} + +/* Is the mount_t mounted read/write? */ +int +vfs_isrdwr(mount_t mp) +{ + return ((mp->mnt_flag & MNT_RDONLY) == 0); +} + + +/* Is mount_t marked for update (ie MNT_UPDATE) */ +int +vfs_isupdate(mount_t mp) +{ + return (mp->mnt_flag & MNT_UPDATE); +} + + +/* Is mount_t marked for reload (ie MNT_RELOAD) */ +int +vfs_isreload(mount_t mp) +{ + return ((mp->mnt_flag & MNT_UPDATE) && (mp->mnt_flag & MNT_RELOAD)); +} + +/* Is mount_t marked for reload (ie MNT_FORCE) */ +int +vfs_isforce(mount_t mp) +{ + if ((mp->mnt_flag & MNT_FORCE) || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) + return(1); + else + return(0); +} + +int +vfs_64bitready(mount_t mp) +{ + if ((mp->mnt_vtable->vfc_64bitready)) + return(1); + else + return(0); +} + +int +vfs_authopaque(mount_t mp) +{ + if ((mp->mnt_kern_flag & MNTK_AUTH_OPAQUE)) + return(1); + else + return(0); +} + +int +vfs_authopaqueaccess(mount_t mp) +{ + if ((mp->mnt_kern_flag & MNTK_AUTH_OPAQUE_ACCESS)) + return(1); + else + return(0); +} + +void +vfs_setauthopaque(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_AUTH_OPAQUE; + mount_unlock(mp); +} + +void +vfs_setauthopaqueaccess(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_AUTH_OPAQUE_ACCESS; + mount_unlock(mp); +} + +void +vfs_clearauthopaque(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE; + mount_unlock(mp); +} + +void +vfs_clearauthopaqueaccess(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE_ACCESS; + mount_unlock(mp); +} + +void +vfs_setextendedsecurity(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_EXTENDED_SECURITY; + mount_unlock(mp); +} + +void +vfs_clearextendedsecurity(mount_t mp) +{ + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY; + mount_unlock(mp); +} + +int +vfs_extendedsecurity(mount_t mp) +{ + return(mp->mnt_kern_flag & MNTK_EXTENDED_SECURITY); +} + +/* returns the max size of short symlink in this mount_t */ +uint32_t +vfs_maxsymlen(mount_t mp) +{ + return(mp->mnt_maxsymlinklen); +} + +/* set max size of short symlink on mount_t */ +void +vfs_setmaxsymlen(mount_t mp, uint32_t symlen) +{ + mp->mnt_maxsymlinklen = symlen; +} + +/* return a pointer to the RO vfs_statfs associated with mount_t */ +struct vfsstatfs * +vfs_statfs(mount_t mp) +{ + return(&mp->mnt_vfsstat); +} + +int +vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) +{ + int error; + char *vname; + + if ((error = VFS_GETATTR(mp, vfa, ctx)) != 0) + return(error); + + /* + * If we have a filesystem create time, use it to default some others. + */ + if (VFSATTR_IS_SUPPORTED(vfa, f_create_time)) { + if (VFSATTR_IS_ACTIVE(vfa, f_modify_time) && !VFSATTR_IS_SUPPORTED(vfa, f_modify_time)) + VFSATTR_RETURN(vfa, f_modify_time, vfa->f_create_time); + } + + return(0); +} + +int +vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) +{ + int error; + + if (vfs_isrdonly(mp)) + return EROFS; + + error = VFS_SETATTR(mp, vfa, ctx); + + /* + * If we had alternate ways of setting vfs attributes, we'd + * fall back here. + */ + + return error; +} + +/* return the private data handle stored in mount_t */ +void * +vfs_fsprivate(mount_t mp) +{ + return(mp->mnt_data); +} + +/* set the private data handle in mount_t */ +void +vfs_setfsprivate(mount_t mp, void *mntdata) +{ + mp->mnt_data = mntdata; +} + + +/* + * return the block size of the underlying + * device associated with mount_t + */ +int +vfs_devblocksize(mount_t mp) { + + return(mp->mnt_devblocksize); +} + + +/* + * return the io attributes associated with mount_t + */ +void +vfs_ioattr(mount_t mp, struct vfsioattr *ioattrp) +{ + if (mp == NULL) { + ioattrp->io_maxreadcnt = MAXPHYS; + ioattrp->io_maxwritecnt = MAXPHYS; + ioattrp->io_segreadcnt = 32; + ioattrp->io_segwritecnt = 32; + ioattrp->io_maxsegreadsize = MAXPHYS; + ioattrp->io_maxsegwritesize = MAXPHYS; + ioattrp->io_devblocksize = DEV_BSIZE; + } else { + ioattrp->io_maxreadcnt = mp->mnt_maxreadcnt; + ioattrp->io_maxwritecnt = mp->mnt_maxwritecnt; + ioattrp->io_segreadcnt = mp->mnt_segreadcnt; + ioattrp->io_segwritecnt = mp->mnt_segwritecnt; + ioattrp->io_maxsegreadsize = mp->mnt_maxsegreadsize; + ioattrp->io_maxsegwritesize = mp->mnt_maxsegwritesize; + ioattrp->io_devblocksize = mp->mnt_devblocksize; + } + ioattrp->io_reserved[0] = 0; + ioattrp->io_reserved[1] = 0; + ioattrp->io_reserved[2] = 0; +} + + +/* + * set the IO attributes associated with mount_t + */ +void +vfs_setioattr(mount_t mp, struct vfsioattr * ioattrp) +{ + if (mp == NULL) + return; + mp->mnt_maxreadcnt = ioattrp->io_maxreadcnt; + mp->mnt_maxwritecnt = ioattrp->io_maxwritecnt; + mp->mnt_segreadcnt = ioattrp->io_segreadcnt; + mp->mnt_segwritecnt = ioattrp->io_segwritecnt; + mp->mnt_maxsegreadsize = ioattrp->io_maxsegreadsize; + mp->mnt_maxsegwritesize = ioattrp->io_maxsegwritesize; + mp->mnt_devblocksize = ioattrp->io_devblocksize; +} + +/* + * Add a new filesystem into the kernel specified in passed in + * vfstable structure. It fills in the vnode + * dispatch vector that is to be passed to when vnodes are created. + * It returns a handle which is to be used to when the FS is to be removed + */ +typedef int (*PFI)(void *); +extern int vfs_opv_numops; +errno_t +vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) +{ +#pragma unused(data) + struct vfstable *newvfstbl = NULL; + int i,j; + int (***opv_desc_vector_p)(void *); + int (**opv_desc_vector)(void *); + struct vnodeopv_entry_desc *opve_descp; + int desccount; + int descsize; + PFI *descptr; + + /* + * This routine is responsible for all the initialization that would + * ordinarily be done as part of the system startup; + */ + + if (vfe == (struct vfs_fsentry *)0) + return(EINVAL); + + desccount = vfe->vfe_vopcnt; + if ((desccount <=0) || ((desccount > 5)) || (vfe->vfe_vfsops == (struct vfsops *)NULL) + || (vfe->vfe_opvdescs == (struct vnodeopv_desc **)NULL)) + return(EINVAL); + + + MALLOC(newvfstbl, void *, sizeof(struct vfstable), M_TEMP, + M_WAITOK); + bzero(newvfstbl, sizeof(struct vfstable)); + newvfstbl->vfc_vfsops = vfe->vfe_vfsops; + strncpy(&newvfstbl->vfc_name[0], vfe->vfe_fsname, MFSNAMELEN); + if ((vfe->vfe_flags & VFS_TBLNOTYPENUM)) + newvfstbl->vfc_typenum = maxvfsconf++; + else + newvfstbl->vfc_typenum = vfe->vfe_fstypenum; + + newvfstbl->vfc_refcount = 0; + newvfstbl->vfc_flags = 0; + newvfstbl->vfc_mountroot = NULL; + newvfstbl->vfc_next = NULL; + newvfstbl->vfc_threadsafe = 0; + newvfstbl->vfc_vfsflags = 0; + if (vfe->vfe_flags & VFS_TBL64BITREADY) + newvfstbl->vfc_64bitready= 1; + if (vfe->vfe_flags & VFS_TBLTHREADSAFE) + newvfstbl->vfc_threadsafe= 1; + if (vfe->vfe_flags & VFS_TBLFSNODELOCK) + newvfstbl->vfc_threadsafe= 1; + if ((vfe->vfe_flags & VFS_TBLLOCALVOL) == VFS_TBLLOCALVOL) + newvfstbl->vfc_flags |= MNT_LOCAL; + if (vfe->vfe_flags & VFS_TBLLOCALVOL) + newvfstbl->vfc_vfsflags |= VFC_VFSLOCALARGS; + else + newvfstbl->vfc_vfsflags |= VFC_VFSGENERICARGS; + + + /* + * Allocate and init the vectors. + * Also handle backwards compatibility. + * + * We allocate one large block to hold all + * vnode operation vectors stored contiguously. + */ + /* XXX - shouldn't be M_TEMP */ + + descsize = desccount * vfs_opv_numops * sizeof(PFI); + MALLOC(descptr, PFI *, descsize, + M_TEMP, M_WAITOK); + bzero(descptr, descsize); + + newvfstbl->vfc_descptr = descptr; + newvfstbl->vfc_descsize = descsize; + + + for (i= 0; i< desccount; i++ ) { + opv_desc_vector_p = vfe->vfe_opvdescs[i]->opv_desc_vector_p; + /* + * Fill in the caller's pointer to the start of the i'th vector. + * They'll need to supply it when calling vnode_create. + */ + opv_desc_vector = descptr + i * vfs_opv_numops; + *opv_desc_vector_p = opv_desc_vector; + + for (j = 0; vfe->vfe_opvdescs[i]->opv_desc_ops[j].opve_op; j++) { + opve_descp = &(vfe->vfe_opvdescs[i]->opv_desc_ops[j]); + + /* + * Sanity check: is this operation listed + * in the list of operations? We check this + * by seeing if its offest is zero. Since + * the default routine should always be listed + * first, it should be the only one with a zero + * offset. Any other operation with a zero + * offset is probably not listed in + * vfs_op_descs, and so is probably an error. + * + * A panic here means the layer programmer + * has committed the all-too common bug + * of adding a new operation to the layer's + * list of vnode operations but + * not adding the operation to the system-wide + * list of supported operations. + */ + if (opve_descp->opve_op->vdesc_offset == 0 && + opve_descp->opve_op->vdesc_offset != VOFFSET(vnop_default)) { + printf("vfs_fsadd: operation %s not listed in %s.\n", + opve_descp->opve_op->vdesc_name, + "vfs_op_descs"); + panic("vfs_fsadd: bad operation"); + } + /* + * Fill in this entry. + */ + opv_desc_vector[opve_descp->opve_op->vdesc_offset] = + opve_descp->opve_impl; + } + + + /* + * Finally, go back and replace unfilled routines + * with their default. (Sigh, an O(n^3) algorithm. I + * could make it better, but that'd be work, and n is small.) + */ + opv_desc_vector_p = vfe->vfe_opvdescs[i]->opv_desc_vector_p; + + /* + * Force every operations vector to have a default routine. + */ + opv_desc_vector = *opv_desc_vector_p; + if (opv_desc_vector[VOFFSET(vnop_default)] == NULL) + panic("vfs_fsadd: operation vector without default routine."); + for (j = 0; j < vfs_opv_numops; j++) + if (opv_desc_vector[j] == NULL) + opv_desc_vector[j] = + opv_desc_vector[VOFFSET(vnop_default)]; + + } /* end of each vnodeopv_desc parsing */ + + + + *handle = vfstable_add(newvfstbl); + + if (newvfstbl->vfc_typenum <= maxvfsconf ) + maxvfsconf = newvfstbl->vfc_typenum + 1; + numused_vfsslots++; + + if (newvfstbl->vfc_vfsops->vfs_init) + (*newvfstbl->vfc_vfsops->vfs_init)((struct vfsconf *)handle); + + FREE(newvfstbl, M_TEMP); + + return(0); +} + +/* + * Removes the filesystem from kernel. + * The argument passed in is the handle that was given when + * file system was added + */ +errno_t +vfs_fsremove(vfstable_t handle) +{ + struct vfstable * vfstbl = (struct vfstable *)handle; + void *old_desc = NULL; + errno_t err; + + /* Preflight check for any mounts */ + mount_list_lock(); + if ( vfstbl->vfc_refcount != 0 ) { + mount_list_unlock(); + return EBUSY; + } + mount_list_unlock(); + + /* + * save the old descriptor; the free cannot occur unconditionally, + * since vfstable_del() may fail. + */ + if (vfstbl->vfc_descptr && vfstbl->vfc_descsize) { + old_desc = vfstbl->vfc_descptr; + } + err = vfstable_del(vfstbl); + + /* free the descriptor if the delete was successful */ + if (err == 0 && old_desc) { + FREE(old_desc, M_TEMP); + } + + return(err); +} + +/* + * This returns a reference to mount_t + * which should be dropped using vfs_mountrele(). + * Not doing so will leak a mountpoint + * and associated data structures. + */ +errno_t +vfs_mountref(__unused mount_t mp ) /* gives a reference */ +{ + return(0); +} + +/* This drops the reference on mount_t that was acquired */ +errno_t +vfs_mountrele(__unused mount_t mp ) /* drops reference */ +{ + return(0); +} + +int +vfs_context_pid(vfs_context_t context) +{ + return (context->vc_proc->p_pid); +} + +int +vfs_context_suser(vfs_context_t context) +{ + return (suser(context->vc_ucred, 0)); +} +int +vfs_context_issignal(vfs_context_t context, sigset_t mask) +{ + if (context->vc_proc) + return(proc_pendingsignals(context->vc_proc, mask)); + return(0); +} + +int +vfs_context_is64bit(vfs_context_t context) +{ + if (context->vc_proc) + return(proc_is64bit(context->vc_proc)); + return(0); +} + +proc_t +vfs_context_proc(vfs_context_t context) +{ + return (context->vc_proc); +} + +vfs_context_t +vfs_context_create(vfs_context_t context) +{ + struct vfs_context * newcontext; + + newcontext = (struct vfs_context *)kalloc(sizeof(struct vfs_context)); + + if (newcontext) { + if (context) { + newcontext->vc_proc = context->vc_proc; + newcontext->vc_ucred = context->vc_ucred; + } else { + newcontext->vc_proc = proc_self(); + newcontext->vc_ucred = kauth_cred_get(); + } + return(newcontext); + } + return((vfs_context_t)0); +} + +int +vfs_context_rele(vfs_context_t context) +{ + if (context) + kfree(context, sizeof(struct vfs_context)); + return(0); +} + + +ucred_t +vfs_context_ucred(vfs_context_t context) +{ + return (context->vc_ucred); +} + +/* + * Return true if the context is owned by the superuser. + */ +int +vfs_context_issuser(vfs_context_t context) +{ + return(context->vc_ucred->cr_uid == 0); +} + + +/* XXXXXXXXXXXXXX VNODE KAPIS XXXXXXXXXXXXXXXXXXXXXXXXX */ + + +/* + * Convert between vnode types and inode formats (since POSIX.1 + * defines mode word of stat structure in terms of inode formats). + */ +enum vtype +vnode_iftovt(int mode) +{ + return(iftovt_tab[((mode) & S_IFMT) >> 12]); +} + +int +vnode_vttoif(enum vtype indx) +{ + return(vttoif_tab[(int)(indx)]); +} + +int +vnode_makeimode(int indx, int mode) +{ + return (int)(VTTOIF(indx) | (mode)); +} + + +/* + * vnode manipulation functions. + */ + +/* returns system root vnode reference; It should be dropped using vrele() */ +vnode_t +vfs_rootvnode(void) +{ + int error; + + error = vnode_get(rootvnode); + if (error) + return ((vnode_t)0); + else + return rootvnode; +} + + +uint32_t +vnode_vid(vnode_t vp) +{ + return ((uint32_t)(vp->v_id)); +} + +/* returns a mount reference; drop it with vfs_mountrelease() */ +mount_t +vnode_mount(vnode_t vp) +{ + return (vp->v_mount); +} + +/* returns a mount reference iff vnode_t is a dir and is a mount point */ +mount_t +vnode_mountedhere(vnode_t vp) +{ + mount_t mp; + + if ((vp->v_type == VDIR) && ((mp = vp->v_mountedhere) != NULL) && + (mp->mnt_vnodecovered == vp)) + return (mp); + else + return (mount_t)NULL; +} + +/* returns vnode type of vnode_t */ +enum vtype +vnode_vtype(vnode_t vp) +{ + return (vp->v_type); +} + +/* returns FS specific node saved in vnode */ +void * +vnode_fsnode(vnode_t vp) +{ + return (vp->v_data); +} + +void +vnode_clearfsnode(vnode_t vp) +{ + vp->v_data = 0; +} + +dev_t +vnode_specrdev(vnode_t vp) +{ + return(vp->v_rdev); +} + + +/* Accessor functions */ +/* is vnode_t a root vnode */ +int +vnode_isvroot(vnode_t vp) +{ + return ((vp->v_flag & VROOT)? 1 : 0); +} + +/* is vnode_t a system vnode */ +int +vnode_issystem(vnode_t vp) +{ + return ((vp->v_flag & VSYSTEM)? 1 : 0); +} + +/* if vnode_t mount operation in progress */ +int +vnode_ismount(vnode_t vp) +{ + return ((vp->v_flag & VMOUNT)? 1 : 0); +} + +/* is this vnode under recyle now */ +int +vnode_isrecycled(vnode_t vp) +{ + int ret; + + vnode_lock(vp); + ret = (vp->v_lflag & (VL_TERMINATE|VL_DEAD))? 1 : 0; + vnode_unlock(vp); + return(ret); +} + +/* is vnode_t marked to not keep data cached once it's been consumed */ +int +vnode_isnocache(vnode_t vp) +{ + return ((vp->v_flag & VNOCACHE_DATA)? 1 : 0); +} + +/* + * has sequential readahead been disabled on this vnode + */ +int +vnode_isnoreadahead(vnode_t vp) +{ + return ((vp->v_flag & VRAOFF)? 1 : 0); +} + +/* is vnode_t a standard one? */ +int +vnode_isstandard(vnode_t vp) +{ + return ((vp->v_flag & VSTANDARD)? 1 : 0); +} + +/* don't vflush() if SKIPSYSTEM */ +int +vnode_isnoflush(vnode_t vp) +{ + return ((vp->v_flag & VNOFLUSH)? 1 : 0); +} + +/* is vnode_t a regular file */ +int +vnode_isreg(vnode_t vp) +{ + return ((vp->v_type == VREG)? 1 : 0); +} + +/* is vnode_t a directory? */ +int +vnode_isdir(vnode_t vp) +{ + return ((vp->v_type == VDIR)? 1 : 0); +} + +/* is vnode_t a symbolic link ? */ +int +vnode_islnk(vnode_t vp) +{ + return ((vp->v_type == VLNK)? 1 : 0); +} + +/* is vnode_t a fifo ? */ +int +vnode_isfifo(vnode_t vp) +{ + return ((vp->v_type == VFIFO)? 1 : 0); +} + +/* is vnode_t a block device? */ +int +vnode_isblk(vnode_t vp) +{ + return ((vp->v_type == VBLK)? 1 : 0); +} + +/* is vnode_t a char device? */ +int +vnode_ischr(vnode_t vp) +{ + return ((vp->v_type == VCHR)? 1 : 0); +} + +/* is vnode_t a socket? */ +int +vnode_issock(vnode_t vp) +{ + return ((vp->v_type == VSOCK)? 1 : 0); +} + + +/* TBD: set vnode_t to not cache data after it is consumed once; used for quota */ +void +vnode_setnocache(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag |= VNOCACHE_DATA; + vnode_unlock(vp); +} + +void +vnode_clearnocache(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag &= ~VNOCACHE_DATA; + vnode_unlock(vp); +} + +void +vnode_setnoreadahead(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag |= VRAOFF; + vnode_unlock(vp); +} + +void +vnode_clearnoreadahead(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag &= ~VRAOFF; + vnode_unlock(vp); +} + + +/* mark vnode_t to skip vflush() is SKIPSYSTEM */ +void +vnode_setnoflush(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag |= VNOFLUSH; + vnode_unlock(vp); +} + +void +vnode_clearnoflush(vnode_t vp) +{ + vnode_lock(vp); + vp->v_flag &= ~VNOFLUSH; + vnode_unlock(vp); +} + + +/* is vnode_t a blkdevice and has a FS mounted on it */ +int +vnode_ismountedon(vnode_t vp) +{ + return ((vp->v_specflags & SI_MOUNTEDON)? 1 : 0); +} + +void +vnode_setmountedon(vnode_t vp) +{ + vnode_lock(vp); + vp->v_specflags |= SI_MOUNTEDON; + vnode_unlock(vp); +} + +void +vnode_clearmountedon(vnode_t vp) +{ + vnode_lock(vp); + vp->v_specflags &= ~SI_MOUNTEDON; + vnode_unlock(vp); +} + + +void +vnode_settag(vnode_t vp, int tag) +{ + vp->v_tag = tag; + +} + +int +vnode_tag(vnode_t vp) +{ + return(vp->v_tag); +} + +vnode_t +vnode_parent(vnode_t vp) +{ + + return(vp->v_parent); +} + +void +vnode_setparent(vnode_t vp, vnode_t dvp) +{ + vp->v_parent = dvp; +} + +char * +vnode_name(vnode_t vp) +{ + /* we try to keep v_name a reasonable name for the node */ + return(vp->v_name); +} + +void +vnode_setname(vnode_t vp, char * name) +{ + vp->v_name = name; +} + +/* return the registered FS name when adding the FS to kernel */ +void +vnode_vfsname(vnode_t vp, char * buf) +{ + strncpy(buf, vp->v_mount->mnt_vtable->vfc_name, MFSNAMELEN); +} + +/* return the FS type number */ +int +vnode_vfstypenum(vnode_t vp) +{ + return(vp->v_mount->mnt_vtable->vfc_typenum); +} + +int +vnode_vfs64bitready(vnode_t vp) +{ + + if ((vp->v_mount->mnt_vtable->vfc_64bitready)) + return(1); + else + return(0); +} + + + +/* return the visible flags on associated mount point of vnode_t */ +uint32_t +vnode_vfsvisflags(vnode_t vp) +{ + return(vp->v_mount->mnt_flag & MNT_VISFLAGMASK); +} + +/* return the command modifier flags on associated mount point of vnode_t */ +uint32_t +vnode_vfscmdflags(vnode_t vp) +{ + return(vp->v_mount->mnt_flag & MNT_CMDFLAGS); +} + +/* return the max symlink of short links of vnode_t */ +uint32_t +vnode_vfsmaxsymlen(vnode_t vp) +{ + return(vp->v_mount->mnt_maxsymlinklen); +} + +/* return a pointer to the RO vfs_statfs associated with vnode_t's mount point */ +struct vfsstatfs * +vnode_vfsstatfs(vnode_t vp) +{ + return(&vp->v_mount->mnt_vfsstat); +} + +/* return a handle to the FSs specific private handle associated with vnode_t's mount point */ +void * +vnode_vfsfsprivate(vnode_t vp) +{ + return(vp->v_mount->mnt_data); +} + +/* is vnode_t in a rdonly mounted FS */ +int +vnode_vfsisrdonly(vnode_t vp) +{ + return ((vp->v_mount->mnt_flag & MNT_RDONLY)? 1 : 0); +} + + +/* returns vnode ref to current working directory */ +vnode_t +current_workingdir(void) +{ + struct proc *p = current_proc(); + struct vnode * vp ; + + if ( (vp = p->p_fd->fd_cdir) ) { + if ( (vnode_getwithref(vp)) ) + return (NULL); + } + return vp; +} + +/* returns vnode ref to current root(chroot) directory */ +vnode_t +current_rootdir(void) +{ + struct proc *p = current_proc(); + struct vnode * vp ; + + if ( (vp = p->p_fd->fd_rdir) ) { + if ( (vnode_getwithref(vp)) ) + return (NULL); + } + return vp; +} + +static int +vnode_get_filesec(vnode_t vp, kauth_filesec_t *fsecp, vfs_context_t ctx) +{ + kauth_filesec_t fsec; + uio_t fsec_uio; + size_t fsec_size; + size_t xsize, rsize; + int error; + + fsec = NULL; + fsec_uio = NULL; + error = 0; + + /* find out how big the EA is */ + if (vn_getxattr(vp, KAUTH_FILESEC_XATTR, NULL, &xsize, XATTR_NOSECURITY, ctx) != 0) { + /* no EA, no filesec */ + if ((error == ENOATTR) || (error == ENOENT) || (error == EJUSTRETURN)) + error = 0; + /* either way, we are done */ + goto out; + } + + /* how many entries would fit? */ + fsec_size = KAUTH_FILESEC_COUNT(xsize); + + /* get buffer and uio */ + if (((fsec = kauth_filesec_alloc(fsec_size)) == NULL) || + ((fsec_uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ)) == NULL) || + uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), xsize)) { + KAUTH_DEBUG(" ERROR - could not allocate iov to read ACL"); + error = ENOMEM; + goto out; + } + + /* read security attribute */ + rsize = xsize; + if ((error = vn_getxattr(vp, + KAUTH_FILESEC_XATTR, + fsec_uio, + &rsize, + XATTR_NOSECURITY, + ctx)) != 0) { + + /* no attribute - no security data */ + if ((error == ENOATTR) || (error == ENOENT) || (error == EJUSTRETURN)) + error = 0; + /* either way, we are done */ + goto out; + } + + /* + * Validate security structure. If it's corrupt, we will + * just ignore it. + */ + if (rsize < KAUTH_FILESEC_SIZE(0)) { + KAUTH_DEBUG("ACL - DATA TOO SMALL (%d)", rsize); + goto out; + } + if (fsec->fsec_magic != KAUTH_FILESEC_MAGIC) { + KAUTH_DEBUG("ACL - BAD MAGIC %x", fsec->fsec_magic); + goto out; + } + if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && + (fsec->fsec_acl.acl_entrycount > KAUTH_ACL_MAX_ENTRIES)) { + KAUTH_DEBUG("ACL - BAD ENTRYCOUNT %x", fsec->fsec_entrycount); + goto out; + } + if ((fsec->fsec_acl.acl_entrycount != KAUTH_FILESEC_NOACL) && + (KAUTH_FILESEC_SIZE(fsec->fsec_acl.acl_entrycount) > rsize)) { + KAUTH_DEBUG("ACL - BUFFER OVERFLOW (%d entries too big for %d)", fsec->fsec_acl.acl_entrycount, rsize); + goto out; + } + + *fsecp = fsec; + fsec = NULL; + error = 0; +out: + if (fsec != NULL) + kauth_filesec_free(fsec); + if (fsec_uio != NULL) + uio_free(fsec_uio); + if (error) + *fsecp = NULL; + return(error); +} + +static int +vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context_t ctx) +{ + uio_t fsec_uio; + int error; + + fsec_uio = NULL; + + if ((fsec_uio = uio_create(2, 0, UIO_SYSSPACE, UIO_WRITE)) == NULL) { + KAUTH_DEBUG(" ERROR - could not allocate iov to write ACL"); + error = ENOMEM; + goto out; + } + uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), sizeof(struct kauth_filesec) - sizeof(struct kauth_acl)); + uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), KAUTH_ACL_COPYSIZE(acl)); + error = vn_setxattr(vp, + KAUTH_FILESEC_XATTR, + fsec_uio, + XATTR_NOSECURITY, /* we have auth'ed already */ + ctx); + VFS_DEBUG(ctx, vp, "SETATTR - set ACL returning %d", error); + +out: + if (fsec_uio != NULL) + uio_free(fsec_uio); + return(error); +} + + +int +vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) +{ + kauth_filesec_t fsec; + kauth_acl_t facl; + int error; + uid_t nuid; + gid_t ngid; + + /* don't ask for extended security data if the filesystem doesn't support it */ + if (!vfs_extendedsecurity(vnode_mount(vp))) { + VATTR_CLEAR_ACTIVE(vap, va_acl); + VATTR_CLEAR_ACTIVE(vap, va_uuuid); + VATTR_CLEAR_ACTIVE(vap, va_guuid); + } + + /* + * If the caller wants size values we might have to synthesise, give the + * filesystem the opportunity to supply better intermediate results. + */ + if (VATTR_IS_ACTIVE(vap, va_data_alloc) || + VATTR_IS_ACTIVE(vap, va_total_size) || + VATTR_IS_ACTIVE(vap, va_total_alloc)) { + VATTR_SET_ACTIVE(vap, va_data_size); + VATTR_SET_ACTIVE(vap, va_data_alloc); + VATTR_SET_ACTIVE(vap, va_total_size); + VATTR_SET_ACTIVE(vap, va_total_alloc); + } + + error = VNOP_GETATTR(vp, vap, ctx); + if (error) { + KAUTH_DEBUG("ERROR - returning %d", error); + goto out; + } + + /* + * If extended security data was requested but not returned, try the fallback + * path. + */ + if (VATTR_NOT_RETURNED(vap, va_acl) || VATTR_NOT_RETURNED(vap, va_uuuid) || VATTR_NOT_RETURNED(vap, va_guuid)) { + fsec = NULL; + + if ((vp->v_type == VDIR) || (vp->v_type == VLNK) || (vp->v_type == VREG)) { + /* try to get the filesec */ + if ((error = vnode_get_filesec(vp, &fsec, ctx)) != 0) + goto out; + } + /* if no filesec, no attributes */ + if (fsec == NULL) { + VATTR_RETURN(vap, va_acl, NULL); + VATTR_RETURN(vap, va_uuuid, kauth_null_guid); + VATTR_RETURN(vap, va_guuid, kauth_null_guid); + } else { + + /* looks good, try to return what we were asked for */ + VATTR_RETURN(vap, va_uuuid, fsec->fsec_owner); + VATTR_RETURN(vap, va_guuid, fsec->fsec_group); + + /* only return the ACL if we were actually asked for it */ + if (VATTR_IS_ACTIVE(vap, va_acl)) { + if (fsec->fsec_acl.acl_entrycount == KAUTH_FILESEC_NOACL) { + VATTR_RETURN(vap, va_acl, NULL); + } else { + facl = kauth_acl_alloc(fsec->fsec_acl.acl_entrycount); + if (facl == NULL) { + kauth_filesec_free(fsec); + error = ENOMEM; + goto out; + } + bcopy(&fsec->fsec_acl, facl, KAUTH_ACL_COPYSIZE(&fsec->fsec_acl)); + VATTR_RETURN(vap, va_acl, facl); + } + } + kauth_filesec_free(fsec); + } + } + /* + * If someone gave us an unsolicited filesec, toss it. We promise that + * we're OK with a filesystem giving us anything back, but our callers + * only expect what they asked for. + */ + if (VATTR_IS_SUPPORTED(vap, va_acl) && !VATTR_IS_ACTIVE(vap, va_acl)) { + if (vap->va_acl != NULL) + kauth_acl_free(vap->va_acl); + VATTR_CLEAR_SUPPORTED(vap, va_acl); + } + +#if 0 /* enable when we have a filesystem only supporting UUIDs */ + /* + * Handle the case where we need a UID/GID, but only have extended + * security information. + */ + if (VATTR_NOT_RETURNED(vap, va_uid) && + VATTR_IS_SUPPORTED(vap, va_uuuid) && + !kauth_guid_equal(&vap->va_uuuid, &kauth_null_guid)) { + if ((error = kauth_cred_guid2uid(&vap->va_uuuid, &nuid)) == 0) + VATTR_RETURN(vap, va_uid, nuid); + } + if (VATTR_NOT_RETURNED(vap, va_gid) && + VATTR_IS_SUPPORTED(vap, va_guuid) && + !kauth_guid_equal(&vap->va_guuid, &kauth_null_guid)) { + if ((error = kauth_cred_guid2gid(&vap->va_guuid, &ngid)) == 0) + VATTR_RETURN(vap, va_gid, ngid); + } +#endif + + /* + * Handle uid/gid == 99 and MNT_IGNORE_OWNERSHIP here. + */ + if (VATTR_IS_ACTIVE(vap, va_uid)) { + if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { + nuid = vp->v_mount->mnt_fsowner; + if (nuid == KAUTH_UID_NONE) + nuid = 99; + } else if (VATTR_IS_SUPPORTED(vap, va_uid)) { + nuid = vap->va_uid; + } else { + /* this will always be something sensible */ + nuid = vp->v_mount->mnt_fsowner; + } + if ((nuid == 99) && !vfs_context_issuser(ctx)) + nuid = kauth_cred_getuid(vfs_context_ucred(ctx)); + VATTR_RETURN(vap, va_uid, nuid); + } + if (VATTR_IS_ACTIVE(vap, va_gid)) { + if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { + ngid = vp->v_mount->mnt_fsgroup; + if (ngid == KAUTH_GID_NONE) + ngid = 99; + } else if (VATTR_IS_SUPPORTED(vap, va_gid)) { + ngid = vap->va_gid; + } else { + /* this will always be something sensible */ + ngid = vp->v_mount->mnt_fsgroup; + } + if ((ngid == 99) && !vfs_context_issuser(ctx)) + ngid = kauth_cred_getgid(vfs_context_ucred(ctx)); + VATTR_RETURN(vap, va_gid, ngid); + } + + /* + * Synthesise some values that can be reasonably guessed. + */ + if (!VATTR_IS_SUPPORTED(vap, va_iosize)) + VATTR_RETURN(vap, va_iosize, vp->v_mount->mnt_vfsstat.f_iosize); + + if (!VATTR_IS_SUPPORTED(vap, va_flags)) + VATTR_RETURN(vap, va_flags, 0); + + if (!VATTR_IS_SUPPORTED(vap, va_filerev)) + VATTR_RETURN(vap, va_filerev, 0); + + if (!VATTR_IS_SUPPORTED(vap, va_gen)) + VATTR_RETURN(vap, va_gen, 0); + + /* + * Default sizes. Ordering here is important, as later defaults build on earlier ones. + */ + if (!VATTR_IS_SUPPORTED(vap, va_data_size)) + VATTR_RETURN(vap, va_data_size, 0); + + /* do we want any of the possibly-computed values? */ + if (VATTR_IS_ACTIVE(vap, va_data_alloc) || + VATTR_IS_ACTIVE(vap, va_total_size) || + VATTR_IS_ACTIVE(vap, va_total_alloc)) { + /* make sure f_bsize is valid */ + if (vp->v_mount->mnt_vfsstat.f_bsize == 0) { + if ((error = vfs_update_vfsstat(vp->v_mount, ctx)) != 0) + goto out; + } + + /* default va_data_alloc from va_data_size */ + if (!VATTR_IS_SUPPORTED(vap, va_data_alloc)) + VATTR_RETURN(vap, va_data_alloc, roundup(vap->va_data_size, vp->v_mount->mnt_vfsstat.f_bsize)); + + /* default va_total_size from va_data_size */ + if (!VATTR_IS_SUPPORTED(vap, va_total_size)) + VATTR_RETURN(vap, va_total_size, vap->va_data_size); + + /* default va_total_alloc from va_total_size which is guaranteed at this point */ + if (!VATTR_IS_SUPPORTED(vap, va_total_alloc)) + VATTR_RETURN(vap, va_total_alloc, roundup(vap->va_total_size, vp->v_mount->mnt_vfsstat.f_bsize)); + } + + /* + * If we don't have a change time, pull it from the modtime. + */ + if (!VATTR_IS_SUPPORTED(vap, va_change_time) && VATTR_IS_SUPPORTED(vap, va_modify_time)) + VATTR_RETURN(vap, va_change_time, vap->va_modify_time); + + /* + * This is really only supported for the creation VNOPs, but since the field is there + * we should populate it correctly. + */ + VATTR_RETURN(vap, va_type, vp->v_type); + + /* + * The fsid can be obtained from the mountpoint directly. + */ + VATTR_RETURN(vap, va_fsid, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + +out: + + return(error); +} + +int +vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) +{ + int error, is_ownership_change=0; + + /* + * Make sure the filesystem is mounted R/W. + * If not, return an error. + */ + if (vfs_isrdonly(vp->v_mount)) + return(EROFS); + + /* + * If ownership is being ignored on this volume, we silently discard + * ownership changes. + */ + if (vp->v_mount->mnt_flag & MNT_IGNORE_OWNERSHIP) { + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + } + + if (VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { + is_ownership_change = 1; + } + + /* + * Make sure that extended security is enabled if we're going to try + * to set any. + */ + if (!vfs_extendedsecurity(vnode_mount(vp)) && + (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { + KAUTH_DEBUG("SETATTR - returning ENOTSUP to request to set extended security"); + return(ENOTSUP); + } + + error = VNOP_SETATTR(vp, vap, ctx); + + if ((error == 0) && !VATTR_ALL_SUPPORTED(vap)) + error = vnode_setattr_fallback(vp, vap, ctx); + + /* + * If we have changed any of the things about the file that are likely + * to result in changes to authorisation results, blow the vnode auth + * cache + */ + if (VATTR_IS_SUPPORTED(vap, va_mode) || + VATTR_IS_SUPPORTED(vap, va_uid) || + VATTR_IS_SUPPORTED(vap, va_gid) || + VATTR_IS_SUPPORTED(vap, va_flags) || + VATTR_IS_SUPPORTED(vap, va_acl) || + VATTR_IS_SUPPORTED(vap, va_uuuid) || + VATTR_IS_SUPPORTED(vap, va_guuid)) + vnode_uncache_credentials(vp); + // only send a stat_changed event if this is more than + // just an access time update + if (error == 0 && (vap->va_active != VNODE_ATTR_BIT(va_access_time))) { + if (need_fsevent(FSE_STAT_CHANGED, vp) || (is_ownership_change && need_fsevent(FSE_CHOWN, vp))) { + if (is_ownership_change == 0) + add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + else + add_fsevent(FSE_CHOWN, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } + } + return(error); +} + +/* + * Following an operation which sets attributes (setattr, create, etc.) we may + * need to perform fallback operations to get attributes saved. + */ +int +vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) +{ + kauth_filesec_t fsec; + kauth_acl_t facl; + struct kauth_filesec lfsec; + int error; + + error = 0; + + /* + * Extended security fallback via extended attributes. + * + * Note that we do not free the filesec; the caller is expected to do this. + */ + if (VATTR_NOT_RETURNED(vap, va_acl) || + VATTR_NOT_RETURNED(vap, va_uuuid) || + VATTR_NOT_RETURNED(vap, va_guuid)) { + VFS_DEBUG(ctx, vp, "SETATTR - doing filesec fallback"); + + /* + * Fail for file types that we don't permit extended security to be set on. + */ + if ((vp->v_type != VDIR) && (vp->v_type != VLNK) && (vp->v_type != VREG)) { + VFS_DEBUG(ctx, vp, "SETATTR - Can't write ACL to file type %d", vnode_vtype(vp)); + error = EINVAL; + goto out; + } + + /* + * If we don't have all the extended security items, we need to fetch the existing + * data to perform a read-modify-write operation. + */ + fsec = NULL; + if (!VATTR_IS_ACTIVE(vap, va_acl) || + !VATTR_IS_ACTIVE(vap, va_uuuid) || + !VATTR_IS_ACTIVE(vap, va_guuid)) { + if ((error = vnode_get_filesec(vp, &fsec, ctx)) != 0) { + KAUTH_DEBUG("SETATTR - ERROR %d fetching filesec for update", error); + goto out; + } + } + /* if we didn't get a filesec, use our local one */ + if (fsec == NULL) { + KAUTH_DEBUG("SETATTR - using local filesec for new/full update"); + fsec = &lfsec; + } else { + KAUTH_DEBUG("SETATTR - updating existing filesec"); + } + /* find the ACL */ + facl = &fsec->fsec_acl; + + /* if we're using the local filesec, we need to initialise it */ + if (fsec == &lfsec) { + fsec->fsec_magic = KAUTH_FILESEC_MAGIC; + fsec->fsec_owner = kauth_null_guid; + fsec->fsec_group = kauth_null_guid; + facl->acl_entrycount = KAUTH_FILESEC_NOACL; + facl->acl_flags = 0; + } + + /* + * Update with the supplied attributes. + */ + if (VATTR_IS_ACTIVE(vap, va_uuuid)) { + KAUTH_DEBUG("SETATTR - updating owner UUID"); + fsec->fsec_owner = vap->va_uuuid; + VATTR_SET_SUPPORTED(vap, va_uuuid); + } + if (VATTR_IS_ACTIVE(vap, va_guuid)) { + KAUTH_DEBUG("SETATTR - updating group UUID"); + fsec->fsec_group = vap->va_guuid; + VATTR_SET_SUPPORTED(vap, va_guuid); + } + if (VATTR_IS_ACTIVE(vap, va_acl)) { + if (vap->va_acl == NULL) { + KAUTH_DEBUG("SETATTR - removing ACL"); + facl->acl_entrycount = KAUTH_FILESEC_NOACL; + } else { + KAUTH_DEBUG("SETATTR - setting ACL with %d entries", vap->va_acl->acl_entrycount); + facl = vap->va_acl; + } + VATTR_SET_SUPPORTED(vap, va_acl); + } + + /* + * If the filesec data is all invalid, we can just remove the EA completely. + */ + if ((facl->acl_entrycount == KAUTH_FILESEC_NOACL) && + kauth_guid_equal(&fsec->fsec_owner, &kauth_null_guid) && + kauth_guid_equal(&fsec->fsec_group, &kauth_null_guid)) { + error = vn_removexattr(vp, KAUTH_FILESEC_XATTR, XATTR_NOSECURITY, ctx); + /* no attribute is ok, nothing to delete */ + if (error == ENOATTR) + error = 0; + VFS_DEBUG(ctx, vp, "SETATTR - remove filesec returning %d", error); + } else { + /* write the EA */ + error = vnode_set_filesec(vp, fsec, facl, ctx); + VFS_DEBUG(ctx, vp, "SETATTR - update filesec returning %d", error); + } + + /* if we fetched a filesec, dispose of the buffer */ + if (fsec != &lfsec) + kauth_filesec_free(fsec); + } +out: + + return(error); +} + +/* + * Definition of vnode operations. + */ + +#if 0 +/* + *# + *#% lookup dvp L ? ? + *#% lookup vpp - L - + */ +struct vnop_lookup_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + vfs_context_t a_context; +}; +#endif /* 0*/ + +errno_t +VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t context) +{ + int _err; + struct vnop_lookup_args a; + vnode_t vp; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_lookup_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + vnode_cache_credentials(dvp, context); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a); + + vp = *vpp; + + if (!thread_safe) { + if ( (cnp->cn_flags & ISLASTCN) ) { + if ( (cnp->cn_flags & LOCKPARENT) ) { + if ( !(cnp->cn_flags & FSNODELOCKHELD) ) { + /* + * leave the fsnode lock held on + * the directory, but restore the funnel... + * also indicate that we need to drop the + * fsnode_lock when we're done with the + * system call processing for this path + */ + cnp->cn_flags |= FSNODELOCKHELD; + + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% create dvp L L L + *#% create vpp - L - + *# + */ + +struct vnop_create_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t context) +{ + int _err; + struct vnop_create_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_create_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_vap = vap; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a); + if (_err == 0 && !NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); + } + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% whiteout dvp L L L + *#% whiteout cnp - - - + *#% whiteout flag - - - + *# + */ +struct vnop_whiteout_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t context) +{ + int _err; + struct vnop_whiteout_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_whiteout_desc; + a.a_dvp = dvp; + a.a_cnp = cnp; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + + #if 0 +/* + *# + *#% mknod dvp L U U + *#% mknod vpp - X - + *# + */ +struct vnop_mknod_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_attr * vap, vfs_context_t context) +{ + + int _err; + struct vnop_mknod_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_mknod_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_vap = vap; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_mknod_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% open vp L L L + *# + */ +struct vnop_open_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_mode; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_OPEN(vnode_t vp, int mode, vfs_context_t context) +{ + int _err; + struct vnop_open_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + a.a_desc = &vnop_open_desc; + a.a_vp = vp; + a.a_mode = mode; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a); + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% close vp U U U + *# + */ +struct vnop_close_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflag; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t context) +{ + int _err; + struct vnop_close_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + a.a_desc = &vnop_close_desc; + a.a_vp = vp; + a.a_fflag = fflag; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_close_desc.vdesc_offset])(&a); + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% access vp L L L + *# + */ +struct vnop_access_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_action; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_ACCESS(vnode_t vp, int action, vfs_context_t context) +{ + int _err; + struct vnop_access_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + a.a_desc = &vnop_access_desc; + a.a_vp = vp; + a.a_action = action; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_access_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% getattr vp = = = + *# + */ +struct vnop_getattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t context) +{ + int _err; + struct vnop_getattr_args a; + int thread_safe; + int funnel_state; + + a.a_desc = &vnop_getattr_desc; + a.a_vp = vp; + a.a_vap = vap; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_getattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% setattr vp L L L + *# + */ +struct vnop_setattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct vnode_attr *a_vap; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t context) +{ + int _err; + struct vnop_setattr_args a; + int thread_safe; + int funnel_state; + + a.a_desc = &vnop_setattr_desc; + a.a_vp = vp; + a.a_vap = vap; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_setattr_desc.vdesc_offset])(&a); + + /* + * Shadow uid/gid/mod change to extended attibute file. + */ + if (_err == 0 && !NATIVE_XATTR(vp)) { + struct vnode_attr va; + int change = 0; + + VATTR_INIT(&va); + if (VATTR_IS_ACTIVE(vap, va_uid)) { + VATTR_SET(&va, va_uid, vap->va_uid); + change = 1; + } + if (VATTR_IS_ACTIVE(vap, va_gid)) { + VATTR_SET(&va, va_gid, vap->va_gid); + change = 1; + } + if (VATTR_IS_ACTIVE(vap, va_mode)) { + VATTR_SET(&va, va_mode, vap->va_mode); + change = 1; + } + if (change) { + vnode_t dvp; + char *vname; + + dvp = vnode_getparent(vp); + vname = vnode_getname(vp); + + xattrfile_setattr(dvp, vname, &va, context, thread_safe); + if (dvp != NULLVP) + vnode_put(dvp); + if (vname != NULL) + vnode_putname(vname); + } + } + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% getattrlist vp = = = + *# + */ +struct vnop_getattrlist_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct uio *a_uio; + int a_options; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_GETATTRLIST(vnode_t vp, struct attrlist * alist, struct uio * uio, int options, vfs_context_t context) +{ + int _err; + struct vnop_getattrlist_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_getattrlist_desc; + a.a_vp = vp; + a.a_alist = alist; + a.a_uio = uio; + a.a_options = options; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_getattrlist_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% setattrlist vp L L L + *# + */ +struct vnop_setattrlist_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct uio *a_uio; + int a_options; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_SETATTRLIST(vnode_t vp, struct attrlist * alist, struct uio * uio, int options, vfs_context_t context) +{ + int _err; + struct vnop_setattrlist_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_setattrlist_desc; + a.a_vp = vp; + a.a_alist = alist; + a.a_uio = uio; + a.a_options = options; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_setattrlist_desc.vdesc_offset])(&a); + + vnode_uncache_credentials(vp); + + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% read vp L L L + *# + */ +struct vnop_read_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct uio *a_uio; + int a_ioflag; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t context) +{ + int _err; + struct vnop_read_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + + a.a_desc = &vnop_read_desc; + a.a_vp = vp; + a.a_uio = uio; + a.a_ioflag = ioflag; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_read_desc.vdesc_offset])(&a); + + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% write vp L L L + *# + */ +struct vnop_write_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct uio *a_uio; + int a_ioflag; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t context) +{ + struct vnop_write_args a; + int _err; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + + a.a_desc = &vnop_write_desc; + a.a_vp = vp; + a.a_uio = uio; + a.a_ioflag = ioflag; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_write_desc.vdesc_offset])(&a); + + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% ioctl vp U U U + *# + */ +struct vnop_ioctl_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + u_long a_command; + caddr_t a_data; + int a_fflag; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t context) +{ + int _err; + struct vnop_ioctl_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + + if (vfs_context_is64bit(context)) { + if (!vnode_vfs64bitready(vp)) { + return(ENOTTY); + } + } + + a.a_desc = &vnop_ioctl_desc; + a.a_vp = vp; + a.a_command = command; + a.a_data = data; + a.a_fflag = fflag; + a.a_context= context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_ioctl_desc.vdesc_offset])(&a); + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% select vp U U U + *# + */ +struct vnop_select_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_which; + int a_fflags; + void *a_wql; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t context) +{ + int _err; + struct vnop_select_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + a.a_desc = &vnop_select_desc; + a.a_vp = vp; + a.a_which = which; + a.a_fflags = fflags; + a.a_context = context; + a.a_wql = wql; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + if ( (_err = lock_fsnode(vp, NULL)) ) { + (void) thread_funnel_set(kernel_flock, funnel_state); + return (_err); + } + } + } + _err = (*vp->v_op[vnop_select_desc.vdesc_offset])(&a); + if (!thread_safe) { + if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { + unlock_fsnode(vp, NULL); + } + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% exchange fvp L L L + *#% exchange tvp L L L + *# + */ +struct vnop_exchange_args { + struct vnodeop_desc *a_desc; + vnode_t a_fvp; + vnode_t a_tvp; + int a_options; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t context) +{ + int _err; + struct vnop_exchange_args a; + int thread_safe; + int funnel_state = 0; + vnode_t lock_first = NULL, lock_second = NULL; + + a.a_desc = &vnop_exchange_desc; + a.a_fvp = fvp; + a.a_tvp = tvp; + a.a_options = options; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(fvp); + + if (!thread_safe) { + /* + * Lock in vnode address order to avoid deadlocks + */ + if (fvp < tvp) { + lock_first = fvp; + lock_second = tvp; + } else { + lock_first = tvp; + lock_second = fvp; + } + if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) { + return (_err); + } + if ( (_err = lock_fsnode(lock_second, NULL)) ) { + unlock_fsnode(lock_first, &funnel_state); + return (_err); + } + } + _err = (*fvp->v_op[vnop_exchange_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(lock_second, NULL); + unlock_fsnode(lock_first, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% revoke vp U U U + *# + */ +struct vnop_revoke_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t context) +{ + struct vnop_revoke_args a; + int _err; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_revoke_desc; + a.a_vp = vp; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_revoke_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *# mmap - vp U U U + *# + */ +struct vnop_mmap_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t context) +{ + int _err; + struct vnop_mmap_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_mmap_desc; + a.a_vp = vp; + a.a_fflags = fflags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_mmap_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *# mnomap - vp U U U + *# + */ +struct vnop_mnomap_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_MNOMAP(vnode_t vp, vfs_context_t context) +{ + int _err; + struct vnop_mnomap_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_mnomap_desc; + a.a_vp = vp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_mnomap_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% fsync vp L L L + *# + */ +struct vnop_fsync_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_waitfor; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t context) +{ + struct vnop_fsync_args a; + int _err; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_fsync_desc; + a.a_vp = vp; + a.a_waitfor = waitfor; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_fsync_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% remove dvp L U U + *#% remove vp L U U + *# + */ +struct vnop_remove_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t a_vp; + struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_context_t context) +{ + int _err; + struct vnop_remove_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_remove_desc; + a.a_dvp = dvp; + a.a_vp = vp; + a.a_cnp = cnp; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); + + if (_err == 0) { + vnode_setneedinactive(vp); + + if ( !(NATIVE_XATTR(dvp)) ) { + /* + * Remove any associated extended attibute file (._ AppleDouble file). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 1); + } + } + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% link vp U U U + *#% link tdvp L U U + *# + */ +struct vnop_link_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vnode_t a_tdvp; + struct componentname *a_cnp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t context) +{ + int _err; + struct vnop_link_args a; + int thread_safe; + int funnel_state = 0; + + /* + * For file systems with non-native extended attributes, + * disallow linking to an existing "._" Apple Double file. + */ + if ( !NATIVE_XATTR(tdvp) && (vp->v_type == VREG)) { + char *vname; + + vname = vnode_getname(vp); + if (vname != NULL) { + _err = 0; + if (vname[0] == '.' && vname[1] == '_' && vname[2] != '\0') { + _err = EPERM; + } + vnode_putname(vname); + if (_err) + return (_err); + } + } + a.a_desc = &vnop_link_desc; + a.a_vp = vp; + a.a_tdvp = tdvp; + a.a_cnp = cnp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*tdvp->v_op[vnop_link_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% rename fdvp U U U + *#% rename fvp U U U + *#% rename tdvp L U U + *#% rename tvp X U U + *# + */ +struct vnop_rename_args { + struct vnodeop_desc *a_desc; + vnode_t a_fdvp; + vnode_t a_fvp; + struct componentname *a_fcnp; + vnode_t a_tdvp; + vnode_t a_tvp; + struct componentname *a_tcnp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, + struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, + vfs_context_t context) +{ + int _err; + struct vnop_rename_args a; + int funnel_state = 0; + char smallname1[48]; + char smallname2[48]; + char *xfromname = NULL; + char *xtoname = NULL; + vnode_t lock_first = NULL, lock_second = NULL; + vnode_t fdvp_unsafe = NULLVP; + vnode_t tdvp_unsafe = NULLVP; + + a.a_desc = &vnop_rename_desc; + a.a_fdvp = fdvp; + a.a_fvp = fvp; + a.a_fcnp = fcnp; + a.a_tdvp = tdvp; + a.a_tvp = tvp; + a.a_tcnp = tcnp; + a.a_context = context; + + if (!THREAD_SAFE_FS(fdvp)) + fdvp_unsafe = fdvp; + if (!THREAD_SAFE_FS(tdvp)) + tdvp_unsafe = tdvp; + + if (fdvp_unsafe != NULLVP) { + /* + * Lock parents in vnode address order to avoid deadlocks + * note that it's possible for the fdvp to be unsafe, + * but the tdvp to be safe because tvp could be a directory + * in the root of a filesystem... in that case, tdvp is the + * in the filesystem that this root is mounted on + */ + if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) { + lock_first = fdvp_unsafe; + lock_second = NULL; + } else if (fdvp_unsafe < tdvp_unsafe) { + lock_first = fdvp_unsafe; + lock_second = tdvp_unsafe; + } else { + lock_first = tdvp_unsafe; + lock_second = fdvp_unsafe; + } + if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) + return (_err); + + if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { + unlock_fsnode(lock_first, &funnel_state); + return (_err); + } + + /* + * Lock both children in vnode address order to avoid deadlocks + */ + if (tvp == NULL || tvp == fvp) { + lock_first = fvp; + lock_second = NULL; + } else if (fvp < tvp) { + lock_first = fvp; + lock_second = tvp; + } else { + lock_first = tvp; + lock_second = fvp; + } + if ( (_err = lock_fsnode(lock_first, NULL)) ) + goto out1; + + if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { + unlock_fsnode(lock_first, NULL); + goto out1; + } + } + /* + * Save source and destination names (._ AppleDouble files). + * Skip if source already has a "._" prefix. + */ + if (!NATIVE_XATTR(fdvp) && + !(fcnp->cn_nameptr[0] == '.' && fcnp->cn_nameptr[1] == '_')) { + size_t len; + + /* Get source attribute file name. */ + len = fcnp->cn_namelen + 3; + if (len > sizeof(smallname1)) { + MALLOC(xfromname, char *, len, M_TEMP, M_WAITOK); + } else { + xfromname = &smallname1[0]; + } + strcpy(xfromname, "._"); + strncat(xfromname, fcnp->cn_nameptr, fcnp->cn_namelen); + xfromname[len-1] = '\0'; + + /* Get destination attribute file name. */ + len = tcnp->cn_namelen + 3; + if (len > sizeof(smallname2)) { + MALLOC(xtoname, char *, len, M_TEMP, M_WAITOK); + } else { + xtoname = &smallname2[0]; + } + strcpy(xtoname, "._"); + strncat(xtoname, tcnp->cn_nameptr, tcnp->cn_namelen); + xtoname[len-1] = '\0'; + } + + _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); + + if (fdvp_unsafe != NULLVP) { + if (lock_second != NULL) + unlock_fsnode(lock_second, NULL); + unlock_fsnode(lock_first, NULL); + } + if (_err == 0) { + if (tvp && tvp != fvp) + vnode_setneedinactive(tvp); + } + + /* + * Rename any associated extended attibute file (._ AppleDouble file). + */ + if (_err == 0 && !NATIVE_XATTR(fdvp) && xfromname != NULL) { + struct nameidata fromnd, tond; + int killdest = 0; + int error; + + /* + * Get source attribute file vnode. + * Note that fdvp already has an iocount reference and + * using DELETE will take an additional reference. + */ + NDINIT(&fromnd, DELETE, NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(xfromname), context); + fromnd.ni_dvp = fdvp; + error = namei(&fromnd); + + if (error) { + /* When source doesn't exist there still may be a destination. */ + if (error == ENOENT) { + killdest = 1; + } else { + goto out; + } + } else if (fromnd.ni_vp->v_type != VREG) { + vnode_put(fromnd.ni_vp); + nameidone(&fromnd); + killdest = 1; + } + if (killdest) { + struct vnop_remove_args args; + + /* + * Get destination attribute file vnode. + * Note that tdvp already has an iocount reference. + */ + NDINIT(&tond, DELETE, NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(xtoname), context); + tond.ni_dvp = tdvp; + error = namei(&tond); + if (error) { + goto out; + } + if (tond.ni_vp->v_type != VREG) { + vnode_put(tond.ni_vp); + nameidone(&tond); + goto out; + } + args.a_desc = &vnop_remove_desc; + args.a_dvp = tdvp; + args.a_vp = tond.ni_vp; + args.a_cnp = &tond.ni_cnd; + args.a_context = context; + + if (fdvp_unsafe != NULLVP) + error = lock_fsnode(tond.ni_vp, NULL); + if (error == 0) { + error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); + + if (fdvp_unsafe != NULLVP) + unlock_fsnode(tond.ni_vp, NULL); + + if (error == 0) + vnode_setneedinactive(tond.ni_vp); + } + vnode_put(tond.ni_vp); + nameidone(&tond); + goto out; + } + + /* + * Get destination attribute file vnode. + */ + NDINIT(&tond, RENAME, + NOCACHE | NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(xtoname), context); + tond.ni_dvp = tdvp; + error = namei(&tond); + + if (error) { + vnode_put(fromnd.ni_vp); + nameidone(&fromnd); + goto out; + } + a.a_desc = &vnop_rename_desc; + a.a_fdvp = fdvp; + a.a_fvp = fromnd.ni_vp; + a.a_fcnp = &fromnd.ni_cnd; + a.a_tdvp = tdvp; + a.a_tvp = tond.ni_vp; + a.a_tcnp = &tond.ni_cnd; + a.a_context = context; + + if (fdvp_unsafe != NULLVP) { + /* + * Lock in vnode address order to avoid deadlocks + */ + if (tond.ni_vp == NULL || tond.ni_vp == fromnd.ni_vp) { + lock_first = fromnd.ni_vp; + lock_second = NULL; + } else if (fromnd.ni_vp < tond.ni_vp) { + lock_first = fromnd.ni_vp; + lock_second = tond.ni_vp; + } else { + lock_first = tond.ni_vp; + lock_second = fromnd.ni_vp; + } + if ( (error = lock_fsnode(lock_first, NULL)) == 0) { + if (lock_second != NULL && (error = lock_fsnode(lock_second, NULL)) ) + unlock_fsnode(lock_first, NULL); + } + } + if (error == 0) { + error = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); + + if (fdvp_unsafe != NULLVP) { + if (lock_second != NULL) + unlock_fsnode(lock_second, NULL); + unlock_fsnode(lock_first, NULL); + } + if (error == 0) { + vnode_setneedinactive(fromnd.ni_vp); + + if (tond.ni_vp && tond.ni_vp != fromnd.ni_vp) + vnode_setneedinactive(tond.ni_vp); + } + } + vnode_put(fromnd.ni_vp); + if (tond.ni_vp) { + vnode_put(tond.ni_vp); + } + nameidone(&tond); + nameidone(&fromnd); + } +out: + if (xfromname && xfromname != &smallname1[0]) { + FREE(xfromname, M_TEMP); + } + if (xtoname && xtoname != &smallname2[0]) { + FREE(xtoname, M_TEMP); + } +out1: + if (fdvp_unsafe != NULLVP) { + if (tdvp_unsafe != NULLVP) + unlock_fsnode(tdvp_unsafe, NULL); + unlock_fsnode(fdvp_unsafe, &funnel_state); + } + return (_err); +} + + #if 0 +/* + *# + *#% mkdir dvp L U U + *#% mkdir vpp - L - + *# + */ +struct vnop_mkdir_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + struct vnode_attr *vap, vfs_context_t context) +{ + int _err; + struct vnop_mkdir_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_mkdir_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_vap = vap; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_mkdir_desc.vdesc_offset])(&a); + if (_err == 0 && !NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); + } + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% rmdir dvp L U U + *#% rmdir vp L U U + *# + */ +struct vnop_rmdir_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t a_vp; + struct componentname *a_cnp; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_context_t context) +{ + int _err; + struct vnop_rmdir_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_rmdir_desc; + a.a_dvp = dvp; + a.a_vp = vp; + a.a_cnp = cnp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_rmdir_desc.vdesc_offset])(&a); + + if (_err == 0) { + vnode_setneedinactive(vp); + + if ( !(NATIVE_XATTR(dvp)) ) { + /* + * Remove any associated extended attibute file (._ AppleDouble file). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 1); + } + } + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +/* + * Remove a ._ AppleDouble file + */ +#define AD_STALE_SECS (180) +static void +xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t context, int thread_safe, int force) { + vnode_t xvp; + struct nameidata nd; + char smallname[64]; + char *filename = NULL; + size_t len; + + if ((basename == NULL) || (basename[0] == '\0') || + (basename[0] == '.' && basename[1] == '_')) { + return; + } + filename = &smallname[0]; + len = snprintf(filename, sizeof(smallname), "._%s", basename); + if (len >= sizeof(smallname)) { + len++; /* snprintf result doesn't include '\0' */ + MALLOC(filename, char *, len, M_TEMP, M_WAITOK); + len = snprintf(filename, len, "._%s", basename); + } + NDINIT(&nd, DELETE, LOCKLEAF | NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(filename), context); + nd.ni_dvp = dvp; + if (namei(&nd) != 0) + goto out2; + + xvp = nd.ni_vp; + nameidone(&nd); + if (xvp->v_type != VREG) + goto out1; + + /* + * When creating a new object and a "._" file already + * exists, check to see if its a stale "._" file. + * + */ + if (!force) { + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_modify_time); + if (VNOP_GETATTR(xvp, &va, context) == 0 && + VATTR_IS_SUPPORTED(&va, va_data_size) && + VATTR_IS_SUPPORTED(&va, va_modify_time) && + va.va_data_size != 0) { + struct timeval tv; + + microtime(&tv); + if ((tv.tv_sec > va.va_modify_time.tv_sec) && + (tv.tv_sec - va.va_modify_time.tv_sec) > AD_STALE_SECS) { + force = 1; /* must be stale */ + } + } + } + if (force) { + struct vnop_remove_args a; + int error; + + a.a_desc = &vnop_remove_desc; + a.a_dvp = nd.ni_dvp; + a.a_vp = xvp; + a.a_cnp = &nd.ni_cnd; + a.a_context = context; + + if (!thread_safe) { + if ( (lock_fsnode(xvp, NULL)) ) + goto out1; + } + error = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); + + if (!thread_safe) + unlock_fsnode(xvp, NULL); + + if (error == 0) + vnode_setneedinactive(xvp); + } +out1: + /* Note: nd.ni_dvp's iocount is dropped by caller of VNOP_XXXX */ + vnode_put(xvp); +out2: + if (filename && filename != &smallname[0]) { + FREE(filename, M_TEMP); + } +} + +/* + * Shadow uid/gid/mod to a ._ AppleDouble file + */ +static void +xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, + vfs_context_t context, int thread_safe) { + vnode_t xvp; + struct nameidata nd; + char smallname[64]; + char *filename = NULL; + size_t len; + + if ((dvp == NULLVP) || + (basename == NULL) || (basename[0] == '\0') || + (basename[0] == '.' && basename[1] == '_')) { + return; + } + filename = &smallname[0]; + len = snprintf(filename, sizeof(smallname), "._%s", basename); + if (len >= sizeof(smallname)) { + len++; /* snprintf result doesn't include '\0' */ + MALLOC(filename, char *, len, M_TEMP, M_WAITOK); + len = snprintf(filename, len, "._%s", basename); + } + NDINIT(&nd, LOOKUP, NOFOLLOW | USEDVP, UIO_SYSSPACE, + CAST_USER_ADDR_T(filename), context); + nd.ni_dvp = dvp; + if (namei(&nd) != 0) + goto out2; + + xvp = nd.ni_vp; + nameidone(&nd); + + if (xvp->v_type == VREG) { + struct vnop_setattr_args a; + + a.a_desc = &vnop_setattr_desc; + a.a_vp = xvp; + a.a_vap = vap; + a.a_context = context; + + if (!thread_safe) { + if ( (lock_fsnode(xvp, NULL)) ) + goto out1; + } + (void) (*xvp->v_op[vnop_setattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(xvp, NULL); + } + } +out1: + vnode_put(xvp); +out2: + if (filename && filename != &smallname[0]) { + FREE(filename, M_TEMP); + } +} + + #if 0 +/* + *# + *#% symlink dvp L U U + *#% symlink vpp - U - + *# + */ +struct vnop_symlink_args { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + char *a_target; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, + struct vnode_attr *vap, char *target, vfs_context_t context) +{ + int _err; + struct vnop_symlink_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_symlink_desc; + a.a_dvp = dvp; + a.a_vpp = vpp; + a.a_cnp = cnp; + a.a_vap = vap; + a.a_target = target; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(dvp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { + return (_err); + } + } + _err = (*dvp->v_op[vnop_symlink_desc.vdesc_offset])(&a); + if (_err == 0 && !NATIVE_XATTR(dvp)) { + /* + * Remove stale Apple Double file (if any). + */ + xattrfile_remove(dvp, cnp->cn_nameptr, context, thread_safe, 0); + } + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% readdir vp L L L + *# + */ +struct vnop_readdir_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct uio *a_uio; + int a_flags; + int *a_eofflag; + int *a_numdirent; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, + int *numdirent, vfs_context_t context) +{ + int _err; + struct vnop_readdir_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_readdir_desc; + a.a_vp = vp; + a.a_uio = uio; + a.a_flags = flags; + a.a_eofflag = eofflag; + a.a_numdirent = numdirent; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_readdir_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% readdirattr vp L L L + *# + */ +struct vnop_readdirattr_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct attrlist *a_alist; + struct uio *a_uio; + u_long a_maxcount; + u_long a_options; + u_long *a_newstate; + int *a_eofflag; + u_long *a_actualcount; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_long maxcount, + u_long options, u_long *newstate, int *eofflag, u_long *actualcount, vfs_context_t context) +{ + int _err; + struct vnop_readdirattr_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_readdirattr_desc; + a.a_vp = vp; + a.a_alist = alist; + a.a_uio = uio; + a.a_maxcount = maxcount; + a.a_options = options; + a.a_newstate = newstate; + a.a_eofflag = eofflag; + a.a_actualcount = actualcount; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_readdirattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% readlink vp L L L + *# + */ +struct vnop_readlink_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct uio *a_uio; + vfs_context_t a_context; +}; +#endif /* 0 */ + +errno_t +VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t context) +{ + int _err; + struct vnop_readlink_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_readlink_desc; + a.a_vp = vp; + a.a_uio = uio; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_readlink_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% inactive vp L U U + *# + */ +struct vnop_inactive_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_INACTIVE(struct vnode *vp, vfs_context_t context) +{ + int _err; + struct vnop_inactive_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_inactive_desc; + a.a_vp = vp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_inactive_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% reclaim vp U U U + *# + */ +struct vnop_reclaim_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_RECLAIM(struct vnode *vp, vfs_context_t context) +{ + int _err; + struct vnop_reclaim_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_reclaim_desc; + a.a_vp = vp; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_reclaim_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% pathconf vp L L L + *# + */ +struct vnop_pathconf_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_name; + register_t *a_retval; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_PATHCONF(struct vnode *vp, int name, register_t *retval, vfs_context_t context) +{ + int _err; + struct vnop_pathconf_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_pathconf_desc; + a.a_vp = vp; + a.a_name = name; + a.a_retval = retval; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_pathconf_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% advlock vp U U U + *# + */ +struct vnop_advlock_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, vfs_context_t context) +{ + int _err; + struct vnop_advlock_args a; + int thread_safe; + int funnel_state = 0; + struct uthread * uth; + + a.a_desc = &vnop_advlock_desc; + a.a_vp = vp; + a.a_id = id; + a.a_op = op; + a.a_fl = fl; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + uth = get_bsdthread_info(current_thread()); + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + /* Disallow advisory locking on non-seekable vnodes */ + if (vnode_isfifo(vp)) { + _err = err_advlock(&a); + } else { + if ((vp->v_flag & VLOCKLOCAL)) { + /* Advisory locking done at this layer */ + _err = lf_advlock(&a); + } else { + /* Advisory locking done by underlying filesystem */ + _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a); + } + } + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + + +#if 0 +/* + *# + *#% allocate vp L L L + *# + */ +struct vnop_allocate_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + off_t a_length; + u_int32_t a_flags; + off_t *a_bytesallocated; + off_t a_offset; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesallocated, off_t offset, vfs_context_t context) +{ + int _err; + struct vnop_allocate_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_allocate_desc; + a.a_vp = vp; + a.a_length = length; + a.a_flags = flags; + a.a_bytesallocated = bytesallocated; + a.a_offset = offset; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_allocate_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% pagein vp = = = + *# + */ +struct vnop_pagein_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + upl_t a_pl; + vm_offset_t a_pl_offset; + off_t a_f_offset; + size_t a_size; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t context) +{ + int _err; + struct vnop_pagein_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_pagein_desc; + a.a_vp = vp; + a.a_pl = pl; + a.a_pl_offset = pl_offset; + a.a_f_offset = f_offset; + a.a_size = size; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_pagein_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% pageout vp = = = + *# + */ +struct vnop_pageout_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + upl_t a_pl; + vm_offset_t a_pl_offset; + off_t a_f_offset; + size_t a_size; + int a_flags; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t context) +{ + int _err; + struct vnop_pageout_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_pageout_desc; + a.a_vp = vp; + a.a_pl = pl; + a.a_pl_offset = pl_offset; + a.a_f_offset = f_offset; + a.a_size = size; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_pageout_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + + +#if 0 +/* + *# + *#% searchfs vp L L L + *# + */ +struct vnop_searchfs_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + void *a_searchparams1; + void *a_searchparams2; + struct attrlist *a_searchattrs; + u_long a_maxmatches; + struct timeval *a_timelimit; + struct attrlist *a_returnattrs; + u_long *a_nummatches; + u_long a_scriptcode; + u_long a_options; + struct uio *a_uio; + struct searchstate *a_searchstate; + vfs_context_t a_context; +}; + +#endif /* 0*/ +errno_t +VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, u_long maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, u_long *nummatches, u_long scriptcode, u_long options, struct uio *uio, struct searchstate *searchstate, vfs_context_t context) +{ + int _err; + struct vnop_searchfs_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_searchfs_desc; + a.a_vp = vp; + a.a_searchparams1 = searchparams1; + a.a_searchparams2 = searchparams2; + a.a_searchattrs = searchattrs; + a.a_maxmatches = maxmatches; + a.a_timelimit = timelimit; + a.a_returnattrs = returnattrs; + a.a_nummatches = nummatches; + a.a_scriptcode = scriptcode; + a.a_options = options; + a.a_uio = uio; + a.a_searchstate = searchstate; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_searchfs_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% copyfile fvp U U U + *#% copyfile tdvp L U U + *#% copyfile tvp X U U + *# + */ +struct vnop_copyfile_args { + struct vnodeop_desc *a_desc; + vnode_t a_fvp; + vnode_t a_tdvp; + vnode_t a_tvp; + struct componentname *a_tcnp; + int a_mode; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_COPYFILE(struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, + int mode, int flags, vfs_context_t context) +{ + int _err; + struct vnop_copyfile_args a; + a.a_desc = &vnop_copyfile_desc; + a.a_fvp = fvp; + a.a_tdvp = tdvp; + a.a_tvp = tvp; + a.a_tcnp = tcnp; + a.a_mode = mode; + a.a_flags = flags; + a.a_context = context; + _err = (*fvp->v_op[vnop_copyfile_desc.vdesc_offset])(&a); + return (_err); +} + + +errno_t +VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options, vfs_context_t context) +{ + struct vnop_getxattr_args a; + int error; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_getxattr_desc; + a.a_vp = vp; + a.a_name = name; + a.a_uio = uio; + a.a_size = size; + a.a_options = options; + a.a_context = context; + + thread_safe = THREAD_SAFE_FS(vp); + if (!thread_safe) { + if ( (error = lock_fsnode(vp, &funnel_state)) ) { + return (error); + } + } + error = (*vp->v_op[vnop_getxattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (error); +} + +errno_t +VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t context) +{ + struct vnop_setxattr_args a; + int error; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_setxattr_desc; + a.a_vp = vp; + a.a_name = name; + a.a_uio = uio; + a.a_options = options; + a.a_context = context; + + thread_safe = THREAD_SAFE_FS(vp); + if (!thread_safe) { + if ( (error = lock_fsnode(vp, &funnel_state)) ) { + return (error); + } + } + error = (*vp->v_op[vnop_setxattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (error); +} + +errno_t +VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t context) +{ + struct vnop_removexattr_args a; + int error; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_removexattr_desc; + a.a_vp = vp; + a.a_name = name; + a.a_options = options; + a.a_context = context; + + thread_safe = THREAD_SAFE_FS(vp); + if (!thread_safe) { + if ( (error = lock_fsnode(vp, &funnel_state)) ) { + return (error); + } + } + error = (*vp->v_op[vnop_removexattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (error); +} + +errno_t +VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t context) +{ + struct vnop_listxattr_args a; + int error; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_listxattr_desc; + a.a_vp = vp; + a.a_uio = uio; + a.a_size = size; + a.a_options = options; + a.a_context = context; + + thread_safe = THREAD_SAFE_FS(vp); + if (!thread_safe) { + if ( (error = lock_fsnode(vp, &funnel_state)) ) { + return (error); + } + } + error = (*vp->v_op[vnop_listxattr_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return (error); +} + + +#if 0 +/* + *# + *#% blktooff vp = = = + *# + */ +struct vnop_blktooff_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + daddr64_t a_lblkno; + off_t *a_offset; +}; +#endif /* 0*/ +errno_t +VNOP_BLKTOOFF(struct vnode *vp, daddr64_t lblkno, off_t *offset) +{ + int _err; + struct vnop_blktooff_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_blktooff_desc; + a.a_vp = vp; + a.a_lblkno = lblkno; + a.a_offset = offset; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_blktooff_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% offtoblk vp = = = + *# + */ +struct vnop_offtoblk_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + off_t a_offset; + daddr64_t *a_lblkno; +}; +#endif /* 0*/ +errno_t +VNOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr64_t *lblkno) +{ + int _err; + struct vnop_offtoblk_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = &vnop_offtoblk_desc; + a.a_vp = vp; + a.a_offset = offset; + a.a_lblkno = lblkno; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_offtoblk_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +/* + *# + *#% blockmap vp L L L + *# + */ +struct vnop_blockmap_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + off_t a_foffset; + size_t a_size; + daddr64_t *a_bpn; + size_t *a_run; + void *a_poff; + int a_flags; + vfs_context_t a_context; +}; +#endif /* 0*/ +errno_t +VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size_t *run, void *poff, int flags, vfs_context_t context) +{ + int _err; + struct vnop_blockmap_args a; + int thread_safe; + int funnel_state = 0; + struct vfs_context acontext; + + if (context == NULL) { + acontext.vc_proc = current_proc(); + acontext.vc_ucred = kauth_cred_get(); + context = &acontext; + } + a.a_desc = &vnop_blockmap_desc; + a.a_vp = vp; + a.a_foffset = foffset; + a.a_size = size; + a.a_bpn = bpn; + a.a_run = run; + a.a_poff = poff; + a.a_flags = flags; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + funnel_state = thread_funnel_set(kernel_flock, TRUE); + } + _err = (*vp->v_op[vnop_blockmap_desc.vdesc_offset])(&a); + if (!thread_safe) { + (void) thread_funnel_set(kernel_flock, funnel_state); + } + return (_err); +} + +#if 0 +struct vnop_strategy_args { + struct vnodeop_desc *a_desc; + struct buf *a_bp; +}; + +#endif /* 0*/ +errno_t +VNOP_STRATEGY(struct buf *bp) +{ + int _err; + struct vnop_strategy_args a; + a.a_desc = &vnop_strategy_desc; + a.a_bp = bp; + _err = (*buf_vnode(bp)->v_op[vnop_strategy_desc.vdesc_offset])(&a); + return (_err); +} + +#if 0 +struct vnop_bwrite_args { + struct vnodeop_desc *a_desc; + buf_t a_bp; +}; +#endif /* 0*/ +errno_t +VNOP_BWRITE(struct buf *bp) +{ + int _err; + struct vnop_bwrite_args a; + a.a_desc = &vnop_bwrite_desc; + a.a_bp = bp; + _err = (*buf_vnode(bp)->v_op[vnop_bwrite_desc.vdesc_offset])(&a); + return (_err); +} + +#if 0 +struct vnop_kqfilt_add_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct knote *a_kn; + vfs_context_t a_context; +}; +#endif +errno_t +VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t context) +{ + int _err; + struct vnop_kqfilt_add_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = VDESC(vnop_kqfilt_add); + a.a_vp = vp; + a.a_kn = kn; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_kqfilt_add_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return(_err); +} + +#if 0 +struct vnop_kqfilt_remove_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + uintptr_t a_ident; + vfs_context_t a_context; +}; +#endif +errno_t +VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t context) +{ + int _err; + struct vnop_kqfilt_remove_args a; + int thread_safe; + int funnel_state = 0; + + a.a_desc = VDESC(vnop_kqfilt_remove); + a.a_vp = vp; + a.a_ident = ident; + a.a_context = context; + thread_safe = THREAD_SAFE_FS(vp); + + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } + _err = (*vp->v_op[vnop_kqfilt_remove_desc.vdesc_offset])(&a); + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } + return(_err); +} + diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c new file mode 100644 index 000000000..7716e41e2 --- /dev/null +++ b/bsd/vfs/vfs_attrlist.c @@ -0,0 +1,1632 @@ +/* + * Copyright (c) 1995-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ATTR_TIME_SIZE -1 + +/* + * Structure describing the state of an in-progress attrlist operation. + */ +struct _attrlist_buf { + char *base; + char *fixedcursor; + char *varcursor; + ssize_t allocated; + ssize_t needed; +}; + + +/* + * Pack (count) bytes from (source) into (buf). + */ +static void +attrlist_pack_fixed(struct _attrlist_buf *ab, void *source, ssize_t count) +{ + ssize_t fit; + + /* how much room left in the buffer? */ + fit = imin(count, ab->allocated - (ab->fixedcursor - ab->base)); + if (fit > 0) + bcopy(source, ab->fixedcursor, fit); + + /* always move in increments of 4 */ + ab->fixedcursor += roundup(count, 4); +} +static void +attrlist_pack_variable2(struct _attrlist_buf *ab, const void *source, ssize_t count, const void *ext, ssize_t extcount) +{ + struct attrreference ar; + ssize_t fit; + + /* pack the reference to the variable object */ + ar.attr_dataoffset = ab->varcursor - ab->fixedcursor; + ar.attr_length = count + extcount; + attrlist_pack_fixed(ab, &ar, sizeof(ar)); + + /* calculate space and pack the variable object */ + fit = imin(count, ab->allocated - (ab->varcursor - ab->base)); + if (fit > 0) { + if (source != NULL) + bcopy(source, ab->varcursor, fit); + ab->varcursor += fit; + } + fit = imin(extcount, ab->allocated - (ab->varcursor - ab->base)); + if (fit > 0) { + if (ext != NULL) + bcopy(ext, ab->varcursor, fit); + ab->varcursor += fit; + } + /* always move in increments of 4 */ + ab->varcursor = (char *)roundup((uintptr_t)ab->varcursor, 4); +} +static void +attrlist_pack_variable(struct _attrlist_buf *ab, const void *source, ssize_t count) +{ + attrlist_pack_variable2(ab, source, count, NULL, 0); +} +static void +attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count) +{ + struct attrreference ar; + ssize_t fit, space; + + + /* + * Supplied count is character count of string text, excluding trailing nul + * which we always supply here. + */ + if (source == NULL) { + count = 0; + } else if (count == 0) { + count = strlen(source); + } + + /* + * Make the reference and pack it. + * Note that this is entirely independent of how much we get into + * the buffer. + */ + ar.attr_dataoffset = ab->varcursor - ab->fixedcursor; + ar.attr_length = count + 1; + attrlist_pack_fixed(ab, &ar, sizeof(ar)); + + /* calculate how much of the string text we can copy, and do that */ + space = ab->allocated - (ab->varcursor - ab->base); + fit = imin(count, space); + if (fit > 0) + bcopy(source, ab->varcursor, fit); + /* is there room for our trailing nul? */ + if (space > fit) + ab->varcursor[fit] = '\0'; + + /* always move in increments of 4 */ + ab->varcursor += roundup(count + 1, 4); +} + +#define ATTR_PACK(b, v) attrlist_pack_fixed(b, &v, sizeof(v)) +#define ATTR_PACK_CAST(b, t, v) \ + do { \ + t _f = (t)v; \ + ATTR_PACK(b, _f); \ + } while (0) + +#define ATTR_PACK_TIME(b, v, is64) \ + do { \ + if (is64) { \ + struct user_timespec us = {v.tv_sec, v.tv_nsec}; \ + ATTR_PACK(b, us); \ + } else { \ + ATTR_PACK(b, v); \ + } \ + } while(0) + + +/* + * Table-driven setup for all valid common/volume attributes. + */ +struct getvolattrlist_attrtab { + attrgroup_t attr; + uint64_t bits; +#define VFSATTR_BIT(b) (VFSATTR_ ## b) + ssize_t size; +}; +static struct getvolattrlist_attrtab getvolattrlist_common_tab[] = { + {ATTR_CMN_NAME, 0, sizeof(struct attrreference)}, + {ATTR_CMN_DEVID, 0, sizeof(dev_t)}, + {ATTR_CMN_FSID, 0, sizeof(fsid_t)}, + {ATTR_CMN_OBJTYPE, 0, sizeof(fsobj_type_t)}, + {ATTR_CMN_OBJTAG, 0, sizeof(fsobj_tag_t)}, + {ATTR_CMN_OBJID, 0, sizeof(fsobj_id_t)}, + {ATTR_CMN_OBJPERMANENTID, 0, sizeof(fsobj_id_t)}, + {ATTR_CMN_PAROBJID, 0, sizeof(fsobj_id_t)}, + {ATTR_CMN_SCRIPT, 0, sizeof(text_encoding_t)}, + {ATTR_CMN_CRTIME, VFSATTR_BIT(f_create_time), ATTR_TIME_SIZE}, + {ATTR_CMN_MODTIME, VFSATTR_BIT(f_modify_time), ATTR_TIME_SIZE}, + {ATTR_CMN_CHGTIME, VFSATTR_BIT(f_modify_time), ATTR_TIME_SIZE}, + {ATTR_CMN_ACCTIME, VFSATTR_BIT(f_access_time), ATTR_TIME_SIZE}, + {ATTR_CMN_BKUPTIME, VFSATTR_BIT(f_backup_time), ATTR_TIME_SIZE}, + {ATTR_CMN_FNDRINFO, 0, 32}, + {ATTR_CMN_OWNERID, 0, sizeof(uid_t)}, + {ATTR_CMN_GRPID, 0, sizeof(gid_t)}, + {ATTR_CMN_ACCESSMASK, 0, sizeof(uint32_t)}, + {ATTR_CMN_FLAGS, 0, sizeof(uint32_t)}, + {ATTR_CMN_USERACCESS, 0, sizeof(uint32_t)}, + {0, 0, 0} +}; + +static struct getvolattrlist_attrtab getvolattrlist_vol_tab[] = { + {ATTR_VOL_FSTYPE, 0, sizeof(uint32_t)}, + {ATTR_VOL_SIGNATURE, VFSATTR_BIT(f_signature), sizeof(uint32_t)}, + {ATTR_VOL_SIZE, VFSATTR_BIT(f_blocks), sizeof(off_t)}, + {ATTR_VOL_SPACEFREE, VFSATTR_BIT(f_bfree) | VFSATTR_BIT(f_bsize), sizeof(off_t)}, + {ATTR_VOL_SPACEAVAIL, VFSATTR_BIT(f_bavail) | VFSATTR_BIT(f_bsize), sizeof(off_t)}, + {ATTR_VOL_MINALLOCATION, VFSATTR_BIT(f_bsize), sizeof(off_t)}, + {ATTR_VOL_ALLOCATIONCLUMP, VFSATTR_BIT(f_bsize), sizeof(off_t)}, + {ATTR_VOL_IOBLOCKSIZE, VFSATTR_BIT(f_iosize), sizeof(uint32_t)}, + {ATTR_VOL_OBJCOUNT, VFSATTR_BIT(f_objcount), sizeof(uint32_t)}, + {ATTR_VOL_FILECOUNT, VFSATTR_BIT(f_filecount), sizeof(uint32_t)}, + {ATTR_VOL_DIRCOUNT, VFSATTR_BIT(f_dircount), sizeof(uint32_t)}, + {ATTR_VOL_MAXOBJCOUNT, VFSATTR_BIT(f_maxobjcount), sizeof(uint32_t)}, + {ATTR_VOL_MOUNTPOINT, 0, sizeof(struct attrreference)}, + {ATTR_VOL_NAME, VFSATTR_BIT(f_vol_name), sizeof(struct attrreference)}, + {ATTR_VOL_MOUNTFLAGS, 0, sizeof(uint32_t)}, + {ATTR_VOL_MOUNTEDDEVICE, 0, sizeof(struct attrreference)}, + {ATTR_VOL_ENCODINGSUSED, 0, sizeof(uint64_t)}, + {ATTR_VOL_CAPABILITIES, VFSATTR_BIT(f_capabilities), sizeof(vol_capabilities_attr_t)}, + {ATTR_VOL_ATTRIBUTES, VFSATTR_BIT(f_attributes), sizeof(vol_attributes_attr_t)}, + {ATTR_VOL_INFO, 0, 0}, + {0, 0, 0} +}; + +static int +getvolattrlist_parsetab(struct getvolattrlist_attrtab *tab, attrgroup_t attrs, struct vfs_attr *vsp, + ssize_t *sizep, int is_64bit) +{ + attrgroup_t recognised; + + recognised = 0; + do { + /* is this attribute set? */ + if (tab->attr & attrs) { + recognised |= tab->attr; + vsp->f_active |= tab->bits; + if (tab->size == ATTR_TIME_SIZE) { + if (is_64bit) { + *sizep += sizeof(struct user_timespec); + } else { + *sizep += sizeof(struct timespec); + } + } else { + *sizep += tab->size; + } + } + } while ((++tab)->attr != 0); + + /* check to make sure that we recognised all of the passed-in attributes */ + if (attrs & ~recognised) + return(EINVAL); + return(0); +} + +/* + * Given the attributes listed in alp, configure vap to request + * the data from a filesystem. + */ +static int +getvolattrlist_setupvfsattr(struct attrlist *alp, struct vfs_attr *vsp, ssize_t *sizep, int is_64bit) +{ + int error; + + /* + * Parse the above tables. + */ + *sizep = sizeof(uint32_t); /* length count */ + if (alp->commonattr && + (error = getvolattrlist_parsetab(getvolattrlist_common_tab, alp->commonattr, vsp, sizep, is_64bit)) != 0) + return(error); + if (alp->volattr && + (error = getvolattrlist_parsetab(getvolattrlist_vol_tab, alp->volattr, vsp, sizep, is_64bit)) != 0) + return(error); + + return(0); +} + +/* + * Table-driven setup for all valid common/dir/file/fork attributes against files. + */ +struct getattrlist_attrtab { + attrgroup_t attr; + uint64_t bits; +#define VATTR_BIT(b) (VNODE_ATTR_ ## b) + ssize_t size; + kauth_action_t action; +}; +static struct getattrlist_attrtab getattrlist_common_tab[] = { + {ATTR_CMN_NAME, VATTR_BIT(va_name), sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_DEVID, 0, sizeof(dev_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FSID, VATTR_BIT(va_fsid), sizeof(fsid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJTYPE, 0, sizeof(fsobj_type_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJTAG, 0, sizeof(fsobj_tag_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJID, VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OBJPERMANENTID, VATTR_BIT(va_fileid) | VATTR_BIT(va_linkid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_PAROBJID, VATTR_BIT(va_parentid), sizeof(fsobj_id_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_SCRIPT, VATTR_BIT(va_encoding), sizeof(text_encoding_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_CRTIME, VATTR_BIT(va_create_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_MODTIME, VATTR_BIT(va_modify_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_CHGTIME, VATTR_BIT(va_change_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_ACCTIME, VATTR_BIT(va_access_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_BKUPTIME, VATTR_BIT(va_backup_time), ATTR_TIME_SIZE, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FNDRINFO, 0, 32, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_OWNERID, VATTR_BIT(va_uid), sizeof(uid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_GRPID, VATTR_BIT(va_gid), sizeof(gid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_ACCESSMASK, VATTR_BIT(va_mode), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FLAGS, VATTR_BIT(va_flags), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_USERACCESS, 0, sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_EXTENDED_SECURITY, VATTR_BIT(va_acl), sizeof(struct attrreference), KAUTH_VNODE_READ_SECURITY}, + {ATTR_CMN_UUID, VATTR_BIT(va_uuuid), sizeof(guid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_GRPUUID, VATTR_BIT(va_guuid), sizeof(guid_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {0, 0, 0, 0} +}; +static struct getattrlist_attrtab getattrlist_dir_tab[] = { + {ATTR_DIR_LINKCOUNT, VATTR_BIT(va_nlink), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_DIR_ENTRYCOUNT, VATTR_BIT(va_nchildren), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + /* ATTR_DIR_ENTRYCOUNT falls back to va_nlink-2 if va_nchildren isn't supported, so request va_nlink just in case */ + {ATTR_DIR_ENTRYCOUNT, VATTR_BIT(va_nlink), 0, KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_DIR_MOUNTSTATUS, 0, sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {0, 0, 0, 0} +}; +static struct getattrlist_attrtab getattrlist_file_tab[] = { + {ATTR_FILE_LINKCOUNT, VATTR_BIT(va_nlink), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_TOTALSIZE, VATTR_BIT(va_total_size), sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_ALLOCSIZE, VATTR_BIT(va_total_alloc) | VATTR_BIT(va_total_size), sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_IOBLOCKSIZE, VATTR_BIT(va_iosize), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_DEVTYPE, VATTR_BIT(va_rdev), sizeof(dev_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_DATALENGTH, VATTR_BIT(va_total_size) | VATTR_BIT(va_data_size), sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_DATAALLOCSIZE, VATTR_BIT(va_total_alloc)| VATTR_BIT(va_data_alloc), sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_RSRCLENGTH, 0, sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_FILE_RSRCALLOCSIZE, 0, sizeof(off_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {0, 0, 0, 0} +}; + +static int +getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, struct vnode_attr *vap, + ssize_t *sizep, kauth_action_t *actionp, int is_64bit) +{ + attrgroup_t recognised; + + recognised = 0; + do { + /* is this attribute set? */ + if (tab->attr & attrs) { + recognised |= tab->attr; + vap->va_active |= tab->bits; + if (tab->size == ATTR_TIME_SIZE) { + if (is_64bit) { + *sizep += sizeof(struct user_timespec); + } else { + *sizep += sizeof(struct timespec); + } + } else { + *sizep += tab->size; + } + *actionp |= tab->action; + } + } while ((++tab)->attr != 0); + + /* check to make sure that we recognised all of the passed-in attributes */ + if (attrs & ~recognised) + return(EINVAL); + return(0); +} + +/* + * Given the attributes listed in alp, configure vap to request + * the data from a filesystem. + */ +static int +getattrlist_setupvattr(struct attrlist *alp, struct vnode_attr *vap, ssize_t *sizep, kauth_action_t *actionp, int is_64bit, int isdir) +{ + int error; + + /* + * Parse the above tables. + */ + *sizep = sizeof(uint32_t); /* length count */ + *actionp = 0; + if (alp->commonattr && + (error = getattrlist_parsetab(getattrlist_common_tab, alp->commonattr, vap, sizep, actionp, is_64bit)) != 0) + return(error); + if (isdir && alp->dirattr && + (error = getattrlist_parsetab(getattrlist_dir_tab, alp->dirattr, vap, sizep, actionp, is_64bit)) != 0) + return(error); + if (!isdir && alp->fileattr && + (error = getattrlist_parsetab(getattrlist_file_tab, alp->fileattr, vap, sizep, actionp, is_64bit)) != 0) + return(error); + + return(0); +} + + +/* + * Find something resembling a terminal component name in the mountedonname for vp + * + */ +static void +getattrlist_findnamecomp(const char *mn, const char **np, ssize_t *nl) +{ + int counting; + const char *cp; + + /* + * We're looking for the last sequence of non / characters, but + * not including any trailing / characters. + */ + *np = NULL; + *nl = 0; + counting = 0; + for (cp = mn; *cp != 0; cp++) { + if (!counting) { + /* start of run of chars */ + if (*cp != '/') { + *np = cp; + counting = 1; + } + } else { + /* end of run of chars */ + if (*cp == '/') { + *nl = cp - *np; + counting = 0; + } + } + } + /* need to close run? */ + if (counting) + *nl = cp - *np; +} + + +static int +getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, vfs_context_t ctx, int is_64bit) +{ + struct vfs_attr vs; + struct vnode_attr va; + struct _attrlist_buf ab; + int error; + ssize_t fixedsize, varsize; + const char *cnp; + ssize_t cnl; + mount_t mnt; + + ab.base = NULL; + VATTR_INIT(&va); + VFSATTR_INIT(&vs); + vs.f_vol_name = NULL; + mnt = vp->v_mount; + + + /* + * For now, the vnode must be the root of its filesystem. + * To relax this, we need to be able to find the root vnode of a filesystem + * from any vnode in the filesystem. + */ + if (!vnode_isvroot(vp)) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: volume attributes requested but not the root of a filesystem"); + goto out; + } + + /* + * Set up the vfs_attr structure and call the filesystem. + */ + if ((error = getvolattrlist_setupvfsattr(alp, &vs, &fixedsize, is_64bit)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); + goto out; + } + if (vs.f_active != 0) { + /* If we're going to ask for f_vol_name, allocate a buffer to point it at */ + if (VFSATTR_IS_ACTIVE(&vs, f_vol_name)) { + vs.f_vol_name = (char *) kalloc(MAXPATHLEN); + if (vs.f_vol_name == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate f_vol_name buffer"); + goto out; + } + } + + VFS_DEBUG(ctx, vp, "ATTRLIST - calling to get %016llx with supported %016llx", vs.f_active, vs.f_supported); + if ((error = vfs_getattr(mnt, &vs, ctx)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); + goto out; + } + + /* + * Did we ask for something the filesystem doesn't support? + */ + if (!VFSATTR_ALL_SUPPORTED(&vs)) { + /* default value for volume subtype */ + if (VFSATTR_IS_ACTIVE(&vs, f_fssubtype) + && !VFSATTR_IS_SUPPORTED(&vs, f_fssubtype)) + VFSATTR_RETURN(&vs, f_fssubtype, 0); + + /* + * If the file system didn't supply f_signature, then + * default it to 'BD', which is the generic signature + * that most Carbon file systems should return. + */ + if (VFSATTR_IS_ACTIVE(&vs, f_signature) + && !VFSATTR_IS_SUPPORTED(&vs, f_signature)) + VFSATTR_RETURN(&vs, f_signature, 0x4244); + + /* default for block size */ + if (VFSATTR_IS_ACTIVE(&vs, f_bsize) + && !VFSATTR_IS_SUPPORTED(&vs, f_bsize)) + VFSATTR_RETURN(&vs, f_bsize, mnt->mnt_devblocksize); + + /* check to see if our fixups were enough */ + if (!VFSATTR_ALL_SUPPORTED(&vs)) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not get all requested volume attributes"); + VFS_DEBUG(ctx, vp, "ATTRLIST - wanted %016llx got %016llx missing %016llx", + vs.f_active, vs.f_supported, vs.f_active & ~vs.f_supported); + goto out; + } + } + } + + /* + * Some fields require data from the root vp + */ + if (alp->commonattr & (ATTR_CMN_OWNERID | ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | ATTR_CMN_FLAGS | ATTR_CMN_SCRIPT)) { + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_flags); + VATTR_WANTED(&va, va_encoding); + + if ((error = vnode_getattr(vp, &va, ctx)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not fetch attributes from root vnode", vp); + goto out; + } + + if (VATTR_IS_ACTIVE(&va, va_encoding) && !VATTR_IS_SUPPORTED(&va, va_encoding)) + VATTR_RETURN(&va, va_encoding, 0x7e /* kTextEncodingMacUnicode */); + } + + /* + * Compute variable-size buffer requirements. + */ + varsize = 0; + if (alp->commonattr & ATTR_CMN_NAME) { + if (vp->v_mount->mnt_vfsstat.f_mntonname[1] == 0x00 && + vp->v_mount->mnt_vfsstat.f_mntonname[0] == '/') { + /* special case for boot volume. Use root name when it's + * available (which is the volume name) or just the mount on + * name of "/". we must do this for binary compatibility with + * pre Tiger code. returning nothing for the boot volume name + * breaks installers - 3961058 + */ + cnp = vnode_getname(vp); + if (cnp == NULL) { + /* just use "/" as name */ + cnp = &vp->v_mount->mnt_vfsstat.f_mntonname[0]; + } + cnl = strlen(cnp); + } + else { + getattrlist_findnamecomp(vp->v_mount->mnt_vfsstat.f_mntonname, &cnp, &cnl); + } + if (alp->commonattr & ATTR_CMN_NAME) + varsize += roundup(cnl + 1, 4); + } + if (alp->volattr & ATTR_VOL_MOUNTPOINT) + varsize += roundup(strlen(mnt->mnt_vfsstat.f_mntonname) + 1, 4); + if (alp->volattr & ATTR_VOL_NAME) { + vs.f_vol_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ + varsize += roundup(strlen(vs.f_vol_name) + 1, 4); + } + if (alp->volattr & ATTR_VOL_MOUNTEDDEVICE) + varsize += roundup(strlen(mnt->mnt_vfsstat.f_mntfromname) + 1, 4); + + /* + * Allocate a target buffer for attribute results. + * Note that since we won't ever copy out more than the caller requested, + * we never need to allocate more than they offer. + */ + ab.allocated = imin(uap->bufferSize, fixedsize + varsize); + if (ab.allocated > ATTR_MAX_BUFFER) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); + goto out; + } + MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_WAITOK); + if (ab.base == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d for copy buffer", ab.allocated); + goto out; + } + + /* + * Pack results into the destination buffer. + */ + ab.fixedcursor = ab.base + sizeof(uint32_t); + ab.varcursor = ab.base + fixedsize; + ab.needed = fixedsize + varsize; + + /* common attributes **************************************************/ + if (alp->commonattr & ATTR_CMN_NAME) + attrlist_pack_string(&ab, cnp, cnl); + if (alp->commonattr & ATTR_CMN_DEVID) + ATTR_PACK_CAST(&ab, dev_t, mnt->mnt_vfsstat.f_fsid.val[0]); + if (alp->commonattr & ATTR_CMN_FSID) + ATTR_PACK(&ab, mnt->mnt_vfsstat.f_fsid); + if (alp->commonattr & ATTR_CMN_OBJTYPE) + ATTR_PACK_CAST(&ab, fsobj_type_t, 0); + if (alp->commonattr & ATTR_CMN_OBJTAG) + ATTR_PACK_CAST(&ab, fsobj_tag_t, vp->v_tag); + if (alp->commonattr & ATTR_CMN_OBJID) { + fsobj_id_t f = {0, 0}; + ATTR_PACK(&ab, f); + } + if (alp->commonattr & ATTR_CMN_OBJPERMANENTID) { + fsobj_id_t f = {0, 0}; + ATTR_PACK(&ab, f); + } + if (alp->commonattr & ATTR_CMN_PAROBJID) { + fsobj_id_t f = {0, 0}; + ATTR_PACK(&ab, f); + } + /* note that this returns the encoding for the volume name, not the node name */ + if (alp->commonattr & ATTR_CMN_SCRIPT) + ATTR_PACK_CAST(&ab, text_encoding_t, va.va_encoding); + if (alp->commonattr & ATTR_CMN_CRTIME) + ATTR_PACK_TIME(&ab, vs.f_create_time, is_64bit); + if (alp->commonattr & ATTR_CMN_MODTIME) + ATTR_PACK_TIME(&ab, vs.f_modify_time, is_64bit); + if (alp->commonattr & ATTR_CMN_CHGTIME) + ATTR_PACK_TIME(&ab, vs.f_modify_time, is_64bit); + if (alp->commonattr & ATTR_CMN_ACCTIME) + ATTR_PACK_TIME(&ab, vs.f_access_time, is_64bit); + if (alp->commonattr & ATTR_CMN_BKUPTIME) + ATTR_PACK_TIME(&ab, vs.f_backup_time, is_64bit); + if (alp->commonattr & ATTR_CMN_FNDRINFO) { + char f[32]; + /* + * This attribute isn't really Finder Info, at least for HFS. + */ + if (vp->v_tag == VT_HFS) { + if ((error = VNOP_IOCTL(vp, HFS_GET_BOOT_INFO, (caddr_t)&f, 0, ctx)) != 0) + goto out; + } else { + /* XXX we could at least pass out the volume UUID here */ + bzero(&f, sizeof(f)); + } + attrlist_pack_fixed(&ab, f, sizeof(f)); + } + if (alp->commonattr & ATTR_CMN_OWNERID) + ATTR_PACK(&ab, va.va_uid); + if (alp->commonattr & ATTR_CMN_GRPID) + ATTR_PACK(&ab, va.va_gid); + if (alp->commonattr & ATTR_CMN_ACCESSMASK) + ATTR_PACK_CAST(&ab, uint32_t, va.va_mode); + if (alp->commonattr & ATTR_CMN_FLAGS) + ATTR_PACK(&ab, va.va_flags); + if (alp->commonattr & ATTR_CMN_USERACCESS) { /* XXX this is expensive and also duplicate work */ + uint32_t perms = 0; + if (vnode_isdir(vp)) { + if (vnode_authorize(vp, NULL, + KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, ctx) == 0) + perms |= W_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY, ctx) == 0) + perms |= R_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH, ctx) == 0) + perms |= X_OK; + } else { + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA, ctx) == 0) + perms |= W_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, ctx) == 0) + perms |= R_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, ctx) == 0) + perms |= X_OK; + } + KAUTH_DEBUG("ATTRLIST - returning user access %x", perms); + ATTR_PACK(&ab, perms); + } + + /* volume attributes **************************************************/ + + if (alp->volattr & ATTR_VOL_FSTYPE) + ATTR_PACK_CAST(&ab, uint32_t, vfs_typenum(mnt)); + if (alp->volattr & ATTR_VOL_SIGNATURE) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_signature); + if (alp->volattr & ATTR_VOL_SIZE) + ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_blocks); + if (alp->volattr & ATTR_VOL_SPACEFREE) + ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_bfree); + if (alp->volattr & ATTR_VOL_SPACEAVAIL) + ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_bavail); + if (alp->volattr & ATTR_VOL_MINALLOCATION) + ATTR_PACK_CAST(&ab, off_t, vs.f_bsize); + if (alp->volattr & ATTR_VOL_ALLOCATIONCLUMP) + ATTR_PACK_CAST(&ab, off_t, vs.f_bsize); /* not strictly true */ + if (alp->volattr & ATTR_VOL_IOBLOCKSIZE) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_iosize); + if (alp->volattr & ATTR_VOL_OBJCOUNT) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_objcount); + if (alp->volattr & ATTR_VOL_FILECOUNT) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_filecount); + if (alp->volattr & ATTR_VOL_DIRCOUNT) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_dircount); + if (alp->volattr & ATTR_VOL_MAXOBJCOUNT) + ATTR_PACK_CAST(&ab, uint32_t, vs.f_maxobjcount); + if (alp->volattr & ATTR_VOL_MOUNTPOINT) + attrlist_pack_string(&ab, mnt->mnt_vfsstat.f_mntonname, 0); + if (alp->volattr & ATTR_VOL_NAME) + attrlist_pack_string(&ab, vs.f_vol_name, 0); + if (alp->volattr & ATTR_VOL_MOUNTFLAGS) + ATTR_PACK_CAST(&ab, uint32_t, mnt->mnt_flag); + if (alp->volattr & ATTR_VOL_MOUNTEDDEVICE) + attrlist_pack_string(&ab, mnt->mnt_vfsstat.f_mntfromname, 0); + if (alp->volattr & ATTR_VOL_ENCODINGSUSED) + ATTR_PACK_CAST(&ab, uint64_t, ~0LL); /* return all encodings */ + if (alp->volattr & ATTR_VOL_CAPABILITIES) { + /* fix up volume capabilities */ + if (vfs_extendedsecurity(mnt)) { + vs.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXTENDED_SECURITY; + } else { + vs.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] &= ~VOL_CAP_INT_EXTENDED_SECURITY; + } + vs.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXTENDED_SECURITY; + ATTR_PACK(&ab, vs.f_capabilities); + } + if (alp->volattr & ATTR_VOL_ATTRIBUTES) { + /* fix up volume attribute information */ + if (vfs_extendedsecurity(mnt)) { + vs.f_attributes.validattr.commonattr |= (ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | ATTR_CMN_GRPUUID); + } else { + vs.f_attributes.validattr.commonattr &= ~(ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | ATTR_CMN_GRPUUID); + vs.f_attributes.nativeattr.commonattr &= ~(ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | ATTR_CMN_GRPUUID); + } + ATTR_PACK(&ab, vs.f_attributes); + } + + /* diagnostic */ + if ((ab.fixedcursor - ab.base) != fixedsize) + panic("packed field size mismatch; allocated %d but packed %d for common %08x vol %08x", + fixedsize, ab.fixedcursor - ab.base, alp->commonattr, alp->volattr); + if (ab.varcursor != (ab.base + ab.needed)) + panic("packed variable field size mismatch; used %d but expected %d", ab.varcursor - ab.base, ab.needed); + + /* + * In the compatible case, we report the smaller of the required and returned sizes. + * If the FSOPT_REPORT_FULLSIZE option is supplied, we report the full (required) size + * of the result buffer, even if we copied less out. The caller knows how big a buffer + * they gave us, so they can always check for truncation themselves. + */ + *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + + error = copyout(ab.base, uap->attributeBuffer, ab.allocated); + +out: + if (vs.f_vol_name != NULL) + kfree(vs.f_vol_name, MAXPATHLEN); + if (ab.base != NULL) + FREE(ab.base, M_TEMP); + VFS_DEBUG(ctx, vp, "ATTRLIST - returning %d", error); + return(error); +} + +/* + * Obtain attribute information about a filesystem object. + */ +int +getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *retval) +{ + struct attrlist al; + struct vnode_attr va; + struct vfs_context context, *ctx; + struct nameidata nd; + struct _attrlist_buf ab; + vnode_t vp; + u_long nameiflags; + kauth_action_t action; + ssize_t fixedsize, varsize; + const char *cnp; + char *vname = NULL; + ssize_t cnl; + int error; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + ctx = &context; + vp = NULL; + error = 0; + VATTR_INIT(&va); + va.va_name = NULL; + ab.base = NULL; + cnp = "unknown"; + cnl = 0; + + /* + * Look up the file. + */ + nameiflags = AUDITVNPATH1; + if (!(uap->options & FSOPT_NOFOLLOW)) + nameiflags |= FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, &context); + + if ((error = namei(&nd)) != 0) + goto out; + vp = nd.ni_vp; + nameidone(&nd); + + /* + * Fetch the attribute request. + */ + if ((error = copyin(uap->alist, &al, sizeof(al))) != 0) + goto out; + if (al.bitmapcount != ATTR_BIT_MAP_COUNT) { + error = EINVAL; + goto out; + } + + VFS_DEBUG(ctx, vp, "%p ATTRLIST - %s request common %08x vol %08x file %08x dir %08x fork %08x %sfollow on '%s'", + vp, p->p_comm, al.commonattr, al.volattr, al.fileattr, al.dirattr, al.forkattr, + (uap->options & FSOPT_NOFOLLOW) ? "no":"", vp->v_name); + + /* + * It is legal to request volume or file attributes, + * but not both. + */ + if (al.volattr) { + if (al.fileattr || al.dirattr || al.forkattr) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: mixed volume/file/directory/fork attributes"); + goto out; + } + /* handle volume attribute request */ + error = getvolattrlist(vp, uap, &al, &context, proc_is64bit(p)); + goto out; + } + + /* + * Set up the vnode_attr structure and authorise. + */ + if ((error = getattrlist_setupvattr(&al, &va, &fixedsize, &action, proc_is64bit(p), vnode_isdir(vp))) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); + goto out; + } + if ((error = vnode_authorize(vp, NULL, action, &context)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: authorisation failed/denied"); + goto out; + } + + if (va.va_active != 0) { + /* + * If we're going to ask for va_name, allocate a buffer to point it at + */ + if (VATTR_IS_ACTIVE(&va, va_name)) { + va.va_name = (char *) kalloc(MAXPATHLEN); + if (va.va_name == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: cannot allocate va_name buffer"); + goto out; + } + } + + /* + * Call the filesystem. + */ + if ((error = vnode_getattr(vp, &va, &context)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); + goto out; + } + + /* did we ask for something the filesystem doesn't support? */ + if (!VATTR_ALL_SUPPORTED(&va)) { + + /* + * There are a couple of special cases. If we are after object IDs, + * we can make do with va_fileid. + */ + if ((al.commonattr & (ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID)) && !VATTR_IS_SUPPORTED(&va, va_linkid)) + VATTR_CLEAR_ACTIVE(&va, va_linkid); /* forget we wanted this */ + /* + * Many (most?) filesystems don't know their parent object id. We can get it the + * hard way. + */ + if ((al.commonattr & ATTR_CMN_PAROBJID) && !VATTR_IS_SUPPORTED(&va, va_parentid)) + VATTR_CLEAR_ACTIVE(&va, va_parentid); + /* + * And we can report datasize/alloc from total. + */ + if ((al.fileattr & ATTR_FILE_DATALENGTH) && !VATTR_IS_SUPPORTED(&va, va_data_size)) + VATTR_CLEAR_ACTIVE(&va, va_data_size); + if ((al.fileattr & ATTR_FILE_DATAALLOCSIZE) && !VATTR_IS_SUPPORTED(&va, va_data_alloc)) + VATTR_CLEAR_ACTIVE(&va, va_data_alloc); + + /* + * If we don't have an encoding, go with UTF-8 + */ + if ((al.commonattr & ATTR_CMN_SCRIPT) && !VATTR_IS_SUPPORTED(&va, va_encoding)) + VATTR_RETURN(&va, va_encoding, 0x7e /* kTextEncodingMacUnicode */); + + /* + * If we don't have a name, we'll get one from the vnode or mount point. + */ + if ((al.commonattr & ATTR_CMN_NAME) && !VATTR_IS_SUPPORTED(&va, va_name)) { + VATTR_CLEAR_ACTIVE(&va, va_name); + } + + /* + * We used to return va_nlink-2 for ATTR_DIR_ENTRYCOUNT. The va_nchildren + * field is preferred, but we'll fall back to va_nlink-2 for compatibility + * with file systems which haven't adopted va_nchildren. Note: the "- 2" + * reflects the "." and ".." entries which are reported via POSIX APIs, but + * not via Carbon (since they don't in fact exist in HFS). + */ + if ((al.dirattr & ATTR_DIR_ENTRYCOUNT) && !VATTR_IS_SUPPORTED(&va, va_nchildren) && + VATTR_IS_SUPPORTED(&va, va_nlink)) { + VATTR_RETURN(&va, va_nchildren, va.va_nlink - 2); + } + + /* check again */ + if (!VATTR_ALL_SUPPORTED(&va)) { + error = ENOTSUP; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not get all requested file attributes"); + VFS_DEBUG(ctx, vp, "ATTRLIST - have %016llx wanted %016llx missing %016llx", + va.va_supported, va.va_active, va.va_active & ~va.va_supported); + goto out; + } + } + } + + /* + * Compute variable-space requirements. + */ + varsize = 0; /* length count */ + if (al.commonattr & ATTR_CMN_NAME) { + if (VATTR_IS_SUPPORTED(&va, va_name)) { + va.va_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ + cnp = va.va_name; + cnl = strlen(cnp); + } else { + if (vnode_isvroot(vp)) { + if (vp->v_mount->mnt_vfsstat.f_mntonname[1] == 0x00 && + vp->v_mount->mnt_vfsstat.f_mntonname[0] == '/') { + /* special case for boot volume. Use root name when it's + * available (which is the volume name) or just the mount on + * name of "/". we must do this for binary compatibility with + * pre Tiger code. returning nothing for the boot volume name + * breaks installers - 3961058 + */ + cnp = vname = vnode_getname(vp); + if (cnp == NULL) { + /* just use "/" as name */ + cnp = &vp->v_mount->mnt_vfsstat.f_mntonname[0]; + } + cnl = strlen(cnp); + } + else { + getattrlist_findnamecomp(vp->v_mount->mnt_vfsstat.f_mntonname, &cnp, &cnl); + } + } else { + cnp = vname = vnode_getname(vp); + cnl = 0; + if (cnp != NULL) { + cnl = strlen(cnp); + } + } + } + varsize += roundup(cnl + 1, 4); + } + + /* + * We have a kauth_acl_t but we will be returning a kauth_filesec_t. + * + * XXX This needs to change at some point; since the blob is opaque in + * user-space this is OK. + */ + if ((al.commonattr & ATTR_CMN_EXTENDED_SECURITY) && + VATTR_IS_SUPPORTED(&va, va_acl) && + (va.va_acl != NULL)) + varsize += roundup(KAUTH_FILESEC_SIZE(va.va_acl->acl_entrycount), 4); + + /* + * Allocate a target buffer for attribute results. + * Note that since we won't ever copy out more than the caller requested, + * we never need to allocate more than they offer. + */ + ab.allocated = imin(uap->bufferSize, fixedsize + varsize); + if (ab.allocated > ATTR_MAX_BUFFER) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); + goto out; + } + MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_WAITOK); + if (ab.base == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d for copy buffer", ab.allocated); + goto out; + } + + /* + * Pack results into the destination buffer. + */ + ab.fixedcursor = ab.base + sizeof(uint32_t); + ab.varcursor = ab.base + fixedsize; + ab.needed = fixedsize + varsize; + + /* common attributes **************************************************/ + if (al.commonattr & ATTR_CMN_NAME) + attrlist_pack_string(&ab, cnp, cnl); + if (al.commonattr & ATTR_CMN_DEVID) + ATTR_PACK_CAST(&ab, dev_t, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + if (al.commonattr & ATTR_CMN_FSID) + ATTR_PACK(&ab, vp->v_mount->mnt_vfsstat.f_fsid); + if (al.commonattr & ATTR_CMN_OBJTYPE) + ATTR_PACK_CAST(&ab, fsobj_type_t, vp->v_type); + if (al.commonattr & ATTR_CMN_OBJTAG) + ATTR_PACK_CAST(&ab, fsobj_tag_t, vp->v_tag); + if (al.commonattr & ATTR_CMN_OBJID) { + fsobj_id_t f; + /* + * Carbon can't deal with us reporting the target ID + * for links. So we ask the filesystem to give us the + * source ID as well, and if it gives us one, we use + * it instead. + */ + if (VATTR_IS_SUPPORTED(&va, va_linkid)) { + f.fid_objno = va.va_linkid; + } else { + f.fid_objno = va.va_fileid; + } + f.fid_generation = 0; + ATTR_PACK(&ab, f); + } + if (al.commonattr & ATTR_CMN_OBJPERMANENTID) { + fsobj_id_t f; + /* + * Carbon can't deal with us reporting the target ID + * for links. So we ask the filesystem to give us the + * source ID as well, and if it gives us one, we use + * it instead. + */ + if (VATTR_IS_SUPPORTED(&va, va_linkid)) { + f.fid_objno = va.va_linkid; + } else { + f.fid_objno = va.va_fileid; + } + f.fid_generation = 0; + ATTR_PACK(&ab, f); + } + if (al.commonattr & ATTR_CMN_PAROBJID) { + fsobj_id_t f; + /* + * If the filesystem doesn't know the parent ID, we can + * try to get it via v->v_parent. Don't need to worry + * about links here, as we dont allow hardlinks to + * directories. + */ + if (VATTR_IS_SUPPORTED(&va, va_parentid)) { + f.fid_objno = va.va_parentid; + } else { + struct vnode_attr lva; + vnode_t pvp; + + pvp = vnode_getparent(vp); + + if (pvp == NULLVP) { + error = ENOTSUP; + goto out; + } + VATTR_INIT(&lva); + VATTR_WANTED(&lva, va_fileid); + error = vnode_getattr(pvp, &lva, &context); + vnode_put(pvp); + + if (error != 0) + goto out; + f.fid_objno = lva.va_fileid; + } + f.fid_generation = 0; + ATTR_PACK(&ab, f); + } + if (al.commonattr & ATTR_CMN_SCRIPT) + ATTR_PACK_CAST(&ab, text_encoding_t, va.va_encoding); + if (al.commonattr & ATTR_CMN_CRTIME) + ATTR_PACK_TIME(&ab, va.va_create_time, proc_is64bit(p)); + if (al.commonattr & ATTR_CMN_MODTIME) + ATTR_PACK_TIME(&ab, va.va_modify_time, proc_is64bit(p)); + if (al.commonattr & ATTR_CMN_CHGTIME) + ATTR_PACK_TIME(&ab, va.va_change_time, proc_is64bit(p)); + if (al.commonattr & ATTR_CMN_ACCTIME) + ATTR_PACK_TIME(&ab, va.va_access_time, proc_is64bit(p)); + if (al.commonattr & ATTR_CMN_BKUPTIME) + ATTR_PACK_TIME(&ab, va.va_backup_time, proc_is64bit(p)); + if (al.commonattr & ATTR_CMN_FNDRINFO) { + uio_t auio; + size_t fisize; + char uio_buf[UIO_SIZEOF(1)]; + + fisize = imin(32, ab.allocated - (ab.fixedcursor - ab.base)); + if (fisize > 0) { + if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, uio_buf, sizeof(uio_buf))) == NULL) { + error = ENOMEM; + goto out; + } else { + uio_addiov(auio, CAST_USER_ADDR_T(ab.fixedcursor), fisize); + error = vn_getxattr(vp, XATTR_FINDERINFO_NAME, auio, &fisize, XATTR_NOSECURITY, &context); + uio_free(auio); + } + if (error != 0) { + if ((error == ENOENT) || (error == ENOATTR) || (error == ENOTSUP) || (error == EPERM)) { + VFS_DEBUG(ctx, vp, "ATTRLIST - No system.finderinfo attribute, returning zeroes"); + bzero(ab.fixedcursor, 32); + error = 0; + } else { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: reading system.finderinfo attribute"); + goto out; + } + } + } else { + VFS_DEBUG(ctx, vp, "ATTRLIST - no room in caller buffer for FINDERINFO"); + } + ab.fixedcursor += 32; + } + if (al.commonattr & ATTR_CMN_OWNERID) + ATTR_PACK(&ab, va.va_uid); + if (al.commonattr & ATTR_CMN_GRPID) + ATTR_PACK(&ab, va.va_gid); + if (al.commonattr & ATTR_CMN_ACCESSMASK) + ATTR_PACK_CAST(&ab, uint32_t, va.va_mode); + if (al.commonattr & ATTR_CMN_FLAGS) + ATTR_PACK(&ab, va.va_flags); + if (al.commonattr & ATTR_CMN_USERACCESS) { /* this is expensive */ + uint32_t perms = 0; + if (vnode_isdir(vp)) { + if (vnode_authorize(vp, NULL, + KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, &context) == 0) + perms |= W_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_LIST_DIRECTORY, &context) == 0) + perms |= R_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_SEARCH, &context) == 0) + perms |= X_OK; + } else { + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA, &context) == 0) + perms |= W_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_READ_DATA, &context) == 0) + perms |= R_OK; + if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_EXECUTE, &context) == 0) + perms |= X_OK; + } + VFS_DEBUG(ctx, vp, "ATTRLIST - granting perms %d", perms); + ATTR_PACK(&ab, perms); + } + if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) { + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { + struct kauth_filesec fsec; + /* + * We want to return a kauth_filesec (for now), but all we have is a kauth_acl. + */ + fsec.fsec_magic = KAUTH_FILESEC_MAGIC; + fsec.fsec_owner = kauth_null_guid; + fsec.fsec_group = kauth_null_guid; + attrlist_pack_variable2(&ab, &fsec, ((char *)&fsec.fsec_acl - (char *)&fsec), va.va_acl, KAUTH_ACL_COPYSIZE(va.va_acl)); + } else { + attrlist_pack_variable(&ab, NULL, 0); + } + } + if (al.commonattr & ATTR_CMN_UUID) { + if (!VATTR_IS_SUPPORTED(&va, va_uuuid)) { + ATTR_PACK(&ab, kauth_null_guid); + } else { + ATTR_PACK(&ab, va.va_uuuid); + } + } + if (al.commonattr & ATTR_CMN_GRPUUID) { + if (!VATTR_IS_SUPPORTED(&va, va_guuid)) { + ATTR_PACK(&ab, kauth_null_guid); + } else { + ATTR_PACK(&ab, va.va_guuid); + } + } + + /* directory attributes **************************************************/ + if (vnode_isdir(vp)) { + if (al.dirattr & ATTR_DIR_LINKCOUNT) /* full count of entries */ + ATTR_PACK_CAST(&ab, uint32_t, va.va_nlink); + if (al.dirattr & ATTR_DIR_ENTRYCOUNT) + ATTR_PACK_CAST(&ab, uint32_t, va.va_nchildren); + if (al.dirattr & ATTR_DIR_MOUNTSTATUS) + ATTR_PACK_CAST(&ab, uint32_t, (vp->v_flag & VROOT) ? DIR_MNTSTATUS_MNTPOINT : 0); + } + + /* file attributes **************************************************/ + if (!vnode_isdir(vp)) { + if (al.fileattr & ATTR_FILE_LINKCOUNT) + ATTR_PACK_CAST(&ab, uint32_t, va.va_nlink); + if (al.fileattr & ATTR_FILE_TOTALSIZE) + ATTR_PACK(&ab, va.va_total_size); + if (al.fileattr & ATTR_FILE_ALLOCSIZE) + ATTR_PACK(&ab, va.va_total_alloc); + if (al.fileattr & ATTR_FILE_IOBLOCKSIZE) + ATTR_PACK(&ab, va.va_iosize); + if (al.fileattr & ATTR_FILE_CLUMPSIZE) + ATTR_PACK_CAST(&ab, uint32_t, 0); /* XXX value is deprecated */ + if (al.fileattr & ATTR_FILE_DEVTYPE) { + if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { + ATTR_PACK(&ab, vp->v_specinfo->si_rdev); + } else { + ATTR_PACK_CAST(&ab, uint32_t, 0); + } + } + if (al.fileattr & ATTR_FILE_DATALENGTH) { + if (VATTR_IS_SUPPORTED(&va, va_data_size)) { + ATTR_PACK(&ab, va.va_data_size); + } else { + ATTR_PACK(&ab, va.va_total_size); + } + } + if (al.fileattr & ATTR_FILE_DATAALLOCSIZE) { + if (VATTR_IS_SUPPORTED(&va, va_data_alloc)) { + ATTR_PACK(&ab, va.va_data_alloc); + } else { + ATTR_PACK(&ab, va.va_total_alloc); + } + } + /* fetch resource fork size/allocation via xattr interface */ + if (al.fileattr & (ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)) { + size_t rsize; + if ((error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, &rsize, XATTR_NOSECURITY, &context)) != 0) { + if ((error == ENOENT) || (error == ENOATTR) || (error == ENOTSUP) || (error == EPERM)) { + rsize = 0; + error = 0; + } else { + goto out; + } + } + if (al.fileattr & ATTR_FILE_RSRCLENGTH) + ATTR_PACK_CAST(&ab, off_t, rsize); + if (al.fileattr & ATTR_FILE_RSRCALLOCSIZE) { + uint32_t blksize = vp->v_mount->mnt_vfsstat.f_bsize; + if (blksize == 0) + blksize = 512; + ATTR_PACK_CAST(&ab, off_t, (roundup(rsize, blksize))); + } + } + } + + /* diagnostic */ + if ((ab.fixedcursor - ab.base) != fixedsize) + panic("packed field size mismatch; allocated %d but packed %d for common %08x vol %08x", + fixedsize, ab.fixedcursor - ab.base, al.commonattr, al.volattr); + if (ab.varcursor != (ab.base + ab.needed)) + panic("packed variable field size mismatch; used %d but expected %d", ab.varcursor - ab.base, ab.needed); + + /* + * In the compatible case, we report the smaller of the required and returned sizes. + * If the FSOPT_REPORT_FULLSIZE option is supplied, we report the full (required) size + * of the result buffer, even if we copied less out. The caller knows how big a buffer + * they gave us, so they can always check for truncation themselves. + */ + *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + + error = copyout(ab.base, uap->attributeBuffer, ab.allocated); + +out: + if (va.va_name) + kfree(va.va_name, MAXPATHLEN); + if (vname) + vnode_putname(vname); + if (vp) + vnode_put(vp); + if (ab.base != NULL) + FREE(ab.base, M_TEMP); + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) + kauth_acl_free(va.va_acl); + + VFS_DEBUG(ctx, vp, "ATTRLIST - returning %d", error); + return(error); +} + +static int +attrlist_unpack_fixed(char **cursor, char *end, void *buf, ssize_t size) +{ + /* make sure we have enough source data */ + if ((*cursor) + size > end) + return(EINVAL); + + bcopy(*cursor, buf, size); + *cursor += size; + return(0); +} + +#define ATTR_UNPACK(v) do {if ((error = attrlist_unpack_fixed(&cursor, bufend, &v, sizeof(v))) != 0) goto out;} while(0); +#define ATTR_UNPACK_CAST(t, v) do { t _f; ATTR_UNPACK(_f); v = _f;} while(0) +#define ATTR_UNPACK_TIME(v, is64) \ + do { \ + if (is64) { \ + struct user_timespec us; \ + ATTR_UNPACK(us); \ + v.tv_sec = us.tv_sec; \ + v.tv_nsec = us.tv_nsec; \ + } else { \ + ATTR_UNPACK(v); \ + } \ + } while(0) + + +/* + * Write attributes. + */ +int +setattrlist(struct proc *p, register struct setattrlist_args *uap, __unused register_t *retval) +{ + struct attrlist al; + struct vfs_context context, *ctx; + struct vnode_attr va; + struct attrreference ar; + struct nameidata nd; + vnode_t vp; + u_long nameiflags; + kauth_action_t action; + char *user_buf, *cursor, *bufend, *fndrinfo, *cp, *volname; + int proc_is64, error; + uint32_t nace; + kauth_filesec_t rfsec; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + ctx = &context; + vp = NULL; + user_buf = NULL; + fndrinfo = NULL; + volname = NULL; + error = 0; + proc_is64 = proc_is64bit(p); + VATTR_INIT(&va); + + + /* + * Look up the file. + */ + nameiflags = 0; + if ((uap->options & FSOPT_NOFOLLOW) == 0) + nameiflags |= FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, uap->path, &context); + if ((error = namei(&nd)) != 0) + goto out; + vp = nd.ni_vp; + nameidone(&nd); + + /* + * Fetch the attribute set and validate. + */ + if ((error = copyin(uap->alist, (caddr_t) &al, sizeof (al)))) + goto out; + if (al.bitmapcount != ATTR_BIT_MAP_COUNT) { + error = EINVAL; + goto out; + } + + VFS_DEBUG(ctx, vp, "%p ATTRLIST - %s set common %08x vol %08x file %08x dir %08x fork %08x %sfollow on '%s'", + vp, p->p_comm, al.commonattr, al.volattr, al.fileattr, al.dirattr, al.forkattr, + (uap->options & FSOPT_NOFOLLOW) ? "no":"", vp->v_name); + + if (al.volattr) { + if ((al.volattr & ~ATTR_VOL_SETMASK) || + (al.commonattr & ~ATTR_CMN_VOLSETMASK) || + al.fileattr || + al.forkattr) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: attempt to set invalid volume attributes"); + goto out; + } + } else { + if ((al.commonattr & ~ATTR_CMN_SETMASK) || + (al.fileattr & ~ATTR_FILE_SETMASK) || + (al.dirattr & ~ATTR_DIR_SETMASK) || + (al.forkattr & ~ATTR_FORK_SETMASK)) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: attempt to set invalid file/folder attributes"); + goto out; + } + } + + /* + * Make the naive assumption that the caller has supplied a reasonable buffer + * size. We could be more careful by pulling in the fixed-size region, checking + * the attrref structures, then pulling in the variable section. + * We need to reconsider this for handling large ACLs, as they should probably be + * brought directly into a buffer. Multiple copyins will make this slower though. + * + * We could also map the user buffer if it is larger than some sensible mimimum. + */ + if (uap->bufferSize > ATTR_MAX_BUFFER) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size %d too large", uap->bufferSize); + error = ENOMEM; + goto out; + } + MALLOC(user_buf, char *, uap->bufferSize, M_TEMP, M_WAITOK); + if (user_buf == NULL) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not allocate %d bytes for buffer", uap->bufferSize); + error = ENOMEM; + goto out; + } + if ((error = copyin(uap->attributeBuffer, user_buf, uap->bufferSize)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer copyin failed"); + goto out; + } + VFS_DEBUG(ctx, vp, "ATTRLIST - copied in %d bytes of user attributes to %p", uap->bufferSize, user_buf); + + /* + * Unpack the argument buffer. + */ + cursor = user_buf; + bufend = cursor + uap->bufferSize; + + /* common */ + if (al.commonattr & ATTR_CMN_SCRIPT) { + ATTR_UNPACK(va.va_encoding); + VATTR_SET_ACTIVE(&va, va_encoding); + } + if (al.commonattr & ATTR_CMN_CRTIME) { + ATTR_UNPACK_TIME(va.va_create_time, proc_is64); + VATTR_SET_ACTIVE(&va, va_create_time); + } + if (al.commonattr & ATTR_CMN_MODTIME) { + ATTR_UNPACK_TIME(va.va_modify_time, proc_is64); + VATTR_SET_ACTIVE(&va, va_modify_time); + } + if (al.commonattr & ATTR_CMN_CHGTIME) { + ATTR_UNPACK_TIME(va.va_change_time, proc_is64); + VATTR_SET_ACTIVE(&va, va_change_time); + } + if (al.commonattr & ATTR_CMN_ACCTIME) { + ATTR_UNPACK_TIME(va.va_access_time, proc_is64); + VATTR_SET_ACTIVE(&va, va_access_time); + } + if (al.commonattr & ATTR_CMN_BKUPTIME) { + ATTR_UNPACK_TIME(va.va_backup_time, proc_is64); + VATTR_SET_ACTIVE(&va, va_backup_time); + } + if (al.commonattr & ATTR_CMN_FNDRINFO) { + if ((cursor + 32) > bufend) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - not enough data supplied for FINDERINFO"); + goto out; + } + fndrinfo = cursor; + cursor += 32; + } + if (al.commonattr & ATTR_CMN_OWNERID) { + ATTR_UNPACK(va.va_uid); + VATTR_SET_ACTIVE(&va, va_uid); + } + if (al.commonattr & ATTR_CMN_GRPID) { + ATTR_UNPACK(va.va_gid); + VATTR_SET_ACTIVE(&va, va_gid); + } + if (al.commonattr & ATTR_CMN_ACCESSMASK) { + ATTR_UNPACK_CAST(uint32_t, va.va_mode); + VATTR_SET_ACTIVE(&va, va_mode); + } + if (al.commonattr & ATTR_CMN_FLAGS) { + ATTR_UNPACK(va.va_flags); + VATTR_SET_ACTIVE(&va, va_flags); + } + if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) { + + /* + * We are (for now) passed a kauth_filesec_t, but all we want from + * it is the ACL. + */ + cp = cursor; + ATTR_UNPACK(ar); + cp += ar.attr_dataoffset; + rfsec = (kauth_filesec_t)cp; + if (((char *)(rfsec + 1) > bufend) || /* no space for acl */ + (rfsec->fsec_magic != KAUTH_FILESEC_MAGIC) || /* bad magic */ + (KAUTH_FILESEC_COPYSIZE(rfsec) != ar.attr_length) || /* size does not match */ + ((cp + KAUTH_FILESEC_COPYSIZE(rfsec)) > bufend)) { /* ACEs overrun buffer */ + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad ACL supplied", ar.attr_length); + goto out; + } + nace = rfsec->fsec_entrycount; + if (nace == KAUTH_FILESEC_NOACL) + nace = 0; + if (nace > KAUTH_ACL_MAX_ENTRIES) { /* ACL size invalid */ + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad ACL supplied"); + goto out; + } + nace = rfsec->fsec_acl.acl_entrycount; + if (nace == KAUTH_FILESEC_NOACL) { + /* deleting ACL */ + VATTR_SET(&va, va_acl, NULL); + } else { + + if (nace > KAUTH_ACL_MAX_ENTRIES) { /* ACL size invalid */ + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: supplied ACL is too large"); + goto out; + } + VATTR_SET(&va, va_acl, &rfsec->fsec_acl); + } + } + if (al.commonattr & ATTR_CMN_UUID) { + ATTR_UNPACK(va.va_uuuid); + VATTR_SET_ACTIVE(&va, va_uuuid); + } + if (al.commonattr & ATTR_CMN_GRPUUID) { + ATTR_UNPACK(va.va_guuid); + VATTR_SET_ACTIVE(&va, va_guuid); + } + + /* volume */ + if (al.volattr & ATTR_VOL_INFO) { + if (al.volattr & ATTR_VOL_NAME) { + volname = cursor; + ATTR_UNPACK(ar); + volname += ar.attr_dataoffset; + if ((volname + ar.attr_length) > bufend) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: volume name too big for caller buffer"); + goto out; + } + /* guarantee NUL termination */ + volname[ar.attr_length - 1] = 0; + } + } + + /* file */ + if (al.fileattr & ATTR_FILE_DEVTYPE) { + /* XXX does it actually make any sense to change this? */ + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - XXX device type change not implemented"); + goto out; + } + + /* + * Validate and authorize. + */ + action = 0; + if ((va.va_active != 0LL) && ((error = vnode_authattr(vp, &va, &action, &context)) != 0)) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: attribute changes refused: %d", error); + goto out; + } + /* + * We can auth file Finder Info here. HFS volume FinderInfo is really boot data, + * and will be auth'ed by the FS. + */ + if (fndrinfo != NULL) { + if (al.volattr & ATTR_VOL_INFO) { + if (vp->v_tag != VT_HFS) { + error = EINVAL; + goto out; + } + } else { + action |= KAUTH_VNODE_WRITE_ATTRIBUTES; + } + } + + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, &context)) != 0)) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: authorization failed"); + goto out; + } + + /* + * Write the attributes if we have any. + */ + if ((va.va_active != 0LL) && ((error = vnode_setattr(vp, &va, &context)) != 0)) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); + goto out; + } + + /* + * Write the Finder Info if we have any. + */ + if (fndrinfo != NULL) { + if (al.volattr & ATTR_VOL_INFO) { + if (vp->v_tag == VT_HFS) { + error = VNOP_IOCTL(vp, HFS_SET_BOOT_INFO, (caddr_t)fndrinfo, 0, &context); + if (error != 0) + goto out; + } else { + /* XXX should never get here */ + } + } else { + /* write Finder Info EA */ + uio_t auio; + char uio_buf[UIO_SIZEOF(1)]; + + if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, uio_buf, sizeof(uio_buf))) == NULL) { + error = ENOMEM; + } else { + uio_addiov(auio, CAST_USER_ADDR_T(fndrinfo), 32); + error = vn_setxattr(vp, XATTR_FINDERINFO_NAME, auio, XATTR_NOSECURITY, &context); + uio_free(auio); + } + + if (error == 0 && need_fsevent(FSE_FINDER_INFO_CHANGED, vp)) { + add_fsevent(FSE_FINDER_INFO_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } + + if (error != 0) { + goto out; + } + } + } + + /* + * Set the volume name, if we have one + */ + if (volname != NULL) + { + struct vfs_attr vs; + + VFSATTR_INIT(&vs); + + vs.f_vol_name = volname; /* References the setattrlist buffer directly */ + VFSATTR_WANTED(&vs, f_vol_name); + + if ((error = vfs_setattr(vp->v_mount, &vs, ctx)) != 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setting volume name failed"); + goto out; + } + + if (!VFSATTR_ALL_SUPPORTED(&vs)) { + error = EINVAL; + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not set volume name"); + goto out; + } + } + + /* all done and successful */ + +out: + if (vp != NULL) + vnode_put(vp); + if (user_buf != NULL) + FREE(user_buf, M_TEMP); + VFS_DEBUG(ctx, vp, "ATTRLIST - set returning %d", error); + return(error); +} diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 8919319e4..5371c4b3a 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,9 +58,6 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * The NEXTSTEP Software License Agreement specifies the terms - * and conditions for redistribution. - * * @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94 */ @@ -73,48 +70,55 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include -#include +#include #if DIAGNOSTIC #include #endif /* DIAGNOSTIC */ #include #include +#include + +#include #include #include +#if BALANCE_QUEUES static __inline__ void bufqinc(int q); static __inline__ void bufqdec(int q); +#endif -static int do_breadn_for_type(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, - int *rasizes, int nrablks, struct ucred *cred, struct buf **bpp, int queuetype); -static struct buf *getnewbuf(int slpflag, int slptimeo, int *queue); -static int bcleanbuf(struct buf *bp); -static int brecover_data(struct buf *bp); -extern void vwakeup(); +static int bcleanbuf(buf_t bp); +static int brecover_data(buf_t bp); +static boolean_t incore(vnode_t vp, daddr64_t blkno); +static buf_t incore_locked(vnode_t vp, daddr64_t blkno); +/* timeout is in msecs */ +static buf_t getnewbuf(int slpflag, int slptimeo, int *queue); +static void bremfree_locked(buf_t bp); +static void buf_reassign(buf_t bp, vnode_t newvp); +static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo); +static int buf_iterprepare(vnode_t vp, struct buflists *, int flags); +static void buf_itercomplete(vnode_t vp, struct buflists *, int flags); -extern int niobuf; /* The number of IO buffer headers for cluster IO */ -int blaundrycnt; +__private_extern__ int bdwrite_internal(buf_t, int); /* zone allocated buffer headers */ -static zone_t buf_hdr_zone; -static int buf_hdr_count; +static void bufzoneinit(void); +static void bcleanbuf_thread_init(void); +static void bcleanbuf_thread(void); + +static zone_t buf_hdr_zone; +static int buf_hdr_count; -#if TRACE -struct proc *traceproc; -int tracewhich, tracebuf[TRCSIZ]; -u_int tracex; -char traceflags[TR_NFLAGS]; -#endif /* TRACE */ /* * Definitions for the buffer hash lists. @@ -129,38 +133,60 @@ struct bufstats bufstats; /* Number of delayed write buffers */ int nbdwrite = 0; +int blaundrycnt = 0; -/* - * Insq/Remq for the buffer hash lists. - */ -#if 0 -#define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) -#define bremhash(bp) LIST_REMOVE(bp, b_hash) -#endif /* 0 */ - -TAILQ_HEAD(ioqueue, buf) iobufqueue; -TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; +static TAILQ_HEAD(ioqueue, buf) iobufqueue; +static TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; static int needbuffer; static int need_iobuffer; +static lck_grp_t *buf_mtx_grp; +static lck_attr_t *buf_mtx_attr; +static lck_grp_attr_t *buf_mtx_grp_attr; +static lck_mtx_t *iobuffer_mtxp; +static lck_mtx_t *buf_mtxp; + +static __inline__ int +buf_timestamp(void) +{ + struct timeval t; + microuptime(&t); + return (t.tv_sec); +} + /* * Insq/Remq for the buffer free lists. */ +#if BALANCE_QUEUES #define binsheadfree(bp, dp, whichq) do { \ TAILQ_INSERT_HEAD(dp, bp, b_freelist); \ bufqinc((whichq)); \ (bp)->b_whichq = whichq; \ - (bp)->b_timestamp = time.tv_sec; \ + (bp)->b_timestamp = buf_timestamp(); \ } while (0) #define binstailfree(bp, dp, whichq) do { \ TAILQ_INSERT_TAIL(dp, bp, b_freelist); \ bufqinc((whichq)); \ (bp)->b_whichq = whichq; \ - (bp)->b_timestamp = time.tv_sec; \ + (bp)->b_timestamp = buf_timestamp(); \ + } while (0) +#else +#define binsheadfree(bp, dp, whichq) do { \ + TAILQ_INSERT_HEAD(dp, bp, b_freelist); \ + (bp)->b_whichq = whichq; \ + (bp)->b_timestamp = buf_timestamp(); \ } while (0) +#define binstailfree(bp, dp, whichq) do { \ + TAILQ_INSERT_TAIL(dp, bp, b_freelist); \ + (bp)->b_whichq = whichq; \ + (bp)->b_timestamp = buf_timestamp(); \ + } while (0) +#endif + + #define BHASHENTCHECK(bp) \ if ((bp)->b_hash.le_prev != (struct buf **)0xdeadbeef) \ panic("%x: b_hash.le_prev is not deadbeef", (bp)); @@ -178,12 +204,6 @@ static int need_iobuffer; (bp)->b_vnbufs.le_next = NOLIST; \ } -simple_lock_data_t bufhashlist_slock; /* lock on buffer hash list */ - -/* number of per vnode, "in flight" buffer writes */ -#define BUFWRITE_THROTTLE 9 - - /* * Time in seconds before a buffer on a list is * considered as a stale buffer @@ -196,9 +216,11 @@ int lru_is_stale = LRU_IS_STALE; int age_is_stale = AGE_IS_STALE; int meta_is_stale = META_IS_STALE; + + /* LIST_INSERT_HEAD() with assertions */ static __inline__ void -blistenterhead(struct bufhashhdr * head, struct buf * bp) +blistenterhead(struct bufhashhdr * head, buf_t bp) { if ((bp->b_hash.le_next = (head)->lh_first) != NULL) (head)->lh_first->b_hash.le_prev = &(bp)->b_hash.le_next; @@ -209,16 +231,9 @@ blistenterhead(struct bufhashhdr * head, struct buf * bp) } static __inline__ void -binshash(struct buf *bp, struct bufhashhdr *dp) +binshash(buf_t bp, struct bufhashhdr *dp) { - struct buf *nbp; - - simple_lock(&bufhashlist_slock); - -#if 0 - if((bad = incore(bp->b_vp, bp->b_lblkno))) - panic("binshash: already incore bp 0x%x, bad 0x%x\n", bp, bad); -#endif /* 0 */ + buf_t nbp; BHASHENTCHECK(bp); @@ -229,13 +244,11 @@ binshash(struct buf *bp, struct bufhashhdr *dp) } blistenterhead(dp, bp); - simple_unlock(&bufhashlist_slock); } static __inline__ void -bremhash(struct buf *bp) +bremhash(buf_t bp) { - simple_lock(&bufhashlist_slock); if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef) panic("bremhash le_prev is deadbeef"); if (bp->b_hash.le_next == bp) @@ -244,324 +257,1262 @@ bremhash(struct buf *bp) if (bp->b_hash.le_next != NULL) bp->b_hash.le_next->b_hash.le_prev = bp->b_hash.le_prev; *bp->b_hash.le_prev = (bp)->b_hash.le_next; - simple_unlock(&bufhashlist_slock); } -/* - * Remove a buffer from the free list it's on - */ -void -bremfree(bp) - struct buf *bp; -{ - struct bqueues *dp = NULL; - int whichq = -1; - /* - * We only calculate the head of the freelist when removing - * the last element of the list as that is the only time that - * it is needed (e.g. to reset the tail pointer). - * - * NB: This makes an assumption about how tailq's are implemented. - */ - if (bp->b_freelist.tqe_next == NULL) { - for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) - if (dp->tqh_last == &bp->b_freelist.tqe_next) - break; - if (dp == &bufqueues[BQUEUES]) - panic("bremfree: lost tail"); - } - TAILQ_REMOVE(dp, bp, b_freelist); - whichq = bp->b_whichq; - bufqdec(whichq); - bp->b_whichq = -1; - bp->b_timestamp = 0; -} -/* - * Associate a buffer with a vnode. - */ -static void -bgetvp(vp, bp) - register struct vnode *vp; - register struct buf *bp; -{ - if (bp->b_vp != vp) - panic("bgetvp: not free"); - VHOLD(vp); - bp->b_vp = vp; - if (vp->v_type == VBLK || vp->v_type == VCHR) - bp->b_dev = vp->v_rdev; - else - bp->b_dev = NODEV; - /* - * Insert onto list for new vnode. - */ - bufinsvn(bp, &vp->v_cleanblkhd); +int +buf_valid(buf_t bp) { + + if ( (bp->b_flags & (B_DONE | B_DELWRI)) ) + return 1; + return 0; } -/* - * Disassociate a buffer from a vnode. - */ -static void -brelvp(bp) - register struct buf *bp; -{ - struct vnode *vp; +int +buf_fromcache(buf_t bp) { - if (bp->b_vp == (struct vnode *) 0) - panic("brelvp: NULL vp"); - /* - * Delete from old vnode list, if on one. - */ - if (bp->b_vnbufs.le_next != NOLIST) - bufremvn(bp); - vp = bp->b_vp; - bp->b_vp = (struct vnode *) 0; - HOLDRELE(vp); + if ( (bp->b_flags & B_CACHE) ) + return 1; + return 0; } -/* - * Reassign a buffer from one vnode to another. - * Used to assign file specific control information - * (indirect blocks) to the vnode to which they belong. - */ void -reassignbuf(bp, newvp) - register struct buf *bp; - register struct vnode *newvp; -{ - register struct buflists *listheadp; +buf_markinvalid(buf_t bp) { + + SET(bp->b_flags, B_INVAL); +} - if (newvp == NULL) { - printf("reassignbuf: NULL"); - return; - } - /* - * Delete from old vnode list, if on one. - */ - if (bp->b_vnbufs.le_next != NOLIST) - bufremvn(bp); - /* - * If dirty, put on list of dirty buffers; - * otherwise insert onto list of clean buffers. - */ - if (ISSET(bp->b_flags, B_DELWRI)) - listheadp = &newvp->v_dirtyblkhd; - else - listheadp = &newvp->v_cleanblkhd; - bufinsvn(bp, listheadp); +void +buf_markdelayed(buf_t bp) { + + SET(bp->b_flags, B_DELWRI); + buf_reassign(bp, bp->b_vp); } -static __inline__ void -bufhdrinit(struct buf *bp) -{ - bzero((char *)bp, sizeof *bp); - bp->b_dev = NODEV; - bp->b_rcred = NOCRED; - bp->b_wcred = NOCRED; - bp->b_vnbufs.le_next = NOLIST; - bp->b_flags = B_INVAL; +void +buf_markeintr(buf_t bp) { + + SET(bp->b_flags, B_EINTR); +} - return; +void +buf_markaged(buf_t bp) { + + SET(bp->b_flags, B_AGE); } -/* - * Initialize buffers and hash links for buffers. - */ -__private_extern__ void -bufinit() -{ - register struct buf *bp; - register struct bqueues *dp; - register int i; - int metabuf; - long whichq; - static void bufzoneinit(); - static void bcleanbuf_thread_init(); +errno_t +buf_error(buf_t bp) { + + return (bp->b_error); +} - /* Initialize the buffer queues ('freelists') and the hash table */ - for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) - TAILQ_INIT(dp); - bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); +void +buf_seterror(buf_t bp, errno_t error) { - simple_lock_init(&bufhashlist_slock ); + if ((bp->b_error = error)) + SET(bp->b_flags, B_ERROR); + else + CLR(bp->b_flags, B_ERROR); +} - metabuf = nbuf/8; /* reserved for meta buf */ +void +buf_setflags(buf_t bp, int32_t flags) { - /* Initialize the buffer headers */ - for (i = 0; i < nbuf; i++) { - bp = &buf[i]; - bufhdrinit(bp); + SET(bp->b_flags, (flags & BUF_X_WRFLAGS)); +} - /* - * metabuf buffer headers on the meta-data list and - * rest of the buffer headers on the empty list - */ - if (--metabuf) - whichq = BQ_META; - else - whichq = BQ_EMPTY; +void +buf_clearflags(buf_t bp, int32_t flags) { - BLISTNONE(bp); - dp = &bufqueues[whichq]; - binsheadfree(bp, dp, whichq); - binshash(bp, &invalhash); - } + CLR(bp->b_flags, (flags & BUF_X_WRFLAGS)); +} - for (; i < nbuf + niobuf; i++) { - bp = &buf[i]; - bufhdrinit(bp); - binsheadfree(bp, &iobufqueue, -1); - } +int32_t +buf_flags(buf_t bp) { + + return ((bp->b_flags & BUF_X_RDFLAGS)); +} - printf("using %d buffer headers and %d cluster IO buffer headers\n", - nbuf, niobuf); +void +buf_reset(buf_t bp, int32_t io_flags) { + + CLR(bp->b_flags, (B_READ | B_WRITE | B_ERROR | B_DONE | B_INVAL | B_ASYNC | B_NOCACHE)); + SET(bp->b_flags, (io_flags & (B_ASYNC | B_READ | B_WRITE | B_NOCACHE))); - /* Set up zones used by the buffer cache */ - bufzoneinit(); + bp->b_error = 0; +} - /* start the bcleanbuf() thread */ - bcleanbuf_thread_init(); +uint32_t +buf_count(buf_t bp) { + + return (bp->b_bcount); +} -#if 0 /* notyet */ - { - static void bufq_balance_thread_init(); - /* create a thread to do dynamic buffer queue balancing */ - bufq_balance_thread_init(); - } -#endif /* notyet */ +void +buf_setcount(buf_t bp, uint32_t bcount) { + + bp->b_bcount = bcount; } -static struct buf * -bio_doread(vp, blkno, size, cred, async, queuetype) - struct vnode *vp; - daddr_t blkno; - int size; - struct ucred *cred; - int async; - int queuetype; -{ - register struct buf *bp; - struct proc *p = current_proc(); +uint32_t +buf_size(buf_t bp) { + + return (bp->b_bufsize); +} - bp = getblk(vp, blkno, size, 0, 0, queuetype); +void +buf_setsize(buf_t bp, uint32_t bufsize) { + + bp->b_bufsize = bufsize; +} - /* - * If buffer does not have data valid, start a read. - * Note that if buffer is B_INVAL, getblk() won't return it. - * Therefore, it's valid if it's I/O has completed or been delayed. - */ - if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) { - /* Start I/O for the buffer (keeping credentials). */ - SET(bp->b_flags, B_READ | async); - if (cred != NOCRED && bp->b_rcred == NOCRED) { - /* - * NFS has embedded ucred. - * Can not crhold() here as that causes zone corruption - */ - bp->b_rcred = crdup(cred); - } +uint32_t +buf_resid(buf_t bp) { + + return (bp->b_resid); +} - VOP_STRATEGY(bp); +void +buf_setresid(buf_t bp, uint32_t resid) { + + bp->b_resid = resid; +} - trace(TR_BREADMISS, pack(vp, size), blkno); +uint32_t +buf_dirtyoff(buf_t bp) { - /* Pay for the read. */ - if (p && p->p_stats) - p->p_stats->p_ru.ru_inblock++; /* XXX */ - } else if (async) { - brelse(bp); - } + return (bp->b_dirtyoff); +} - trace(TR_BREADHIT, pack(vp, size), blkno); +uint32_t +buf_dirtyend(buf_t bp) { - return (bp); + return (bp->b_dirtyend); } -/* - * Read a disk block. - * This algorithm described in Bach (p.54). - */ -int -bread(vp, blkno, size, cred, bpp) - struct vnode *vp; - daddr_t blkno; - int size; - struct ucred *cred; - struct buf **bpp; -{ - register struct buf *bp; - /* Get buffer for block. */ - bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_READ); +void +buf_setdirtyoff(buf_t bp, uint32_t dirtyoff) { + + bp->b_dirtyoff = dirtyoff; +} - /* Wait for the read to complete, and return result. */ - return (biowait(bp)); +void +buf_setdirtyend(buf_t bp, uint32_t dirtyend) { + + bp->b_dirtyend = dirtyend; } -/* - * Read a disk block. [bread() for meta-data] - * This algorithm described in Bach (p.54). - */ -int -meta_bread(vp, blkno, size, cred, bpp) - struct vnode *vp; - daddr_t blkno; - int size; - struct ucred *cred; - struct buf **bpp; -{ - register struct buf *bp; +uintptr_t +buf_dataptr(buf_t bp) { + + return (bp->b_datap); +} - /* Get buffer for block. */ - bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_META); +void +buf_setdataptr(buf_t bp, uintptr_t data) { + + bp->b_datap = data; +} + +vnode_t +buf_vnode(buf_t bp) { + + return (bp->b_vp); +} + +void +buf_setvnode(buf_t bp, vnode_t vp) { + + bp->b_vp = vp; +} + + +void * +buf_callback(buf_t bp) +{ + if ( !(bp->b_lflags & BL_IOBUF) ) + return ((void *) NULL); + if ( !(bp->b_flags & B_CALL) ) + return ((void *) NULL); + + return ((void *)bp->b_iodone); +} + + +errno_t +buf_setcallback(buf_t bp, void (*callback)(buf_t, void *), void *transaction) +{ + + if ( !(bp->b_lflags & BL_IOBUF) ) + return (EINVAL); + + if (callback) + bp->b_flags |= (B_CALL | B_ASYNC); + else + bp->b_flags &= ~B_CALL; + bp->b_transaction = transaction; + bp->b_iodone = callback; + + return (0); +} + +errno_t +buf_setupl(buf_t bp, upl_t upl, uint32_t offset) +{ + + if ( !(bp->b_lflags & BL_IOBUF) ) + return (EINVAL); + + if (upl) + bp->b_flags |= B_CLUSTER; + else + bp->b_flags &= ~B_CLUSTER; + bp->b_upl = upl; + bp->b_uploffset = offset; + + return (0); +} + +buf_t +buf_clone(buf_t bp, int io_offset, int io_size, void (*iodone)(buf_t, void *), void *arg) +{ + buf_t io_bp; + + if (io_offset < 0 || io_size < 0) + return (NULL); + + if ((unsigned)(io_offset + io_size) > (unsigned)bp->b_bcount) + return (NULL); + + if (bp->b_flags & B_CLUSTER) { + if (io_offset && ((bp->b_uploffset + io_offset) & PAGE_MASK)) + return (NULL); + + if (((bp->b_uploffset + io_offset + io_size) & PAGE_MASK) && ((io_offset + io_size) < bp->b_bcount)) + return (NULL); + } + io_bp = alloc_io_buf(bp->b_vp, 0); + + io_bp->b_flags = bp->b_flags & (B_COMMIT_UPL | B_META | B_PAGEIO | B_CLUSTER | B_PHYS | B_ASYNC | B_READ); + + if (iodone) { + io_bp->b_transaction = arg; + io_bp->b_iodone = iodone; + io_bp->b_flags |= B_CALL; + } + if (bp->b_flags & B_CLUSTER) { + io_bp->b_upl = bp->b_upl; + io_bp->b_uploffset = bp->b_uploffset + io_offset; + } else { + io_bp->b_datap = (uintptr_t)(((char *)bp->b_datap) + io_offset); + } + io_bp->b_bcount = io_size; + + return (io_bp); +} + + + +void +buf_setfilter(buf_t bp, void (*filter)(buf_t, void *), void *transaction, + void **old_iodone, void **old_transaction) +{ + if (old_iodone) + *old_iodone = (void *)(bp->b_iodone); + if (old_transaction) + *old_transaction = (void *)(bp->b_transaction); + + bp->b_transaction = transaction; + bp->b_iodone = filter; + bp->b_flags |= B_FILTER; +} + + +daddr64_t +buf_blkno(buf_t bp) { + + return (bp->b_blkno); +} + +daddr64_t +buf_lblkno(buf_t bp) { + + return (bp->b_lblkno); +} + +void +buf_setblkno(buf_t bp, daddr64_t blkno) { + + bp->b_blkno = blkno; +} + +void +buf_setlblkno(buf_t bp, daddr64_t lblkno) { + + bp->b_lblkno = lblkno; +} + +dev_t +buf_device(buf_t bp) { + + return (bp->b_dev); +} + +errno_t +buf_setdevice(buf_t bp, vnode_t vp) { + + if ((vp->v_type != VBLK) && (vp->v_type != VCHR)) + return EINVAL; + bp->b_dev = vp->v_rdev; + + return 0; +} + + +void * +buf_drvdata(buf_t bp) { + + return (bp->b_drvdata); +} + +void +buf_setdrvdata(buf_t bp, void *drvdata) { + + bp->b_drvdata = drvdata; +} + +void * +buf_fsprivate(buf_t bp) { + + return (bp->b_fsprivate); +} + +void +buf_setfsprivate(buf_t bp, void *fsprivate) { + + bp->b_fsprivate = fsprivate; +} + +ucred_t +buf_rcred(buf_t bp) { + + return (bp->b_rcred); +} + +ucred_t +buf_wcred(buf_t bp) { + + return (bp->b_wcred); +} + +void * +buf_upl(buf_t bp) { + + return (bp->b_upl); +} + +uint32_t +buf_uploffset(buf_t bp) { + + return ((uint32_t)(bp->b_uploffset)); +} + +proc_t +buf_proc(buf_t bp) { + + return (bp->b_proc); +} + + +errno_t +buf_map(buf_t bp, caddr_t *io_addr) +{ + buf_t real_bp; + vm_offset_t vaddr; + kern_return_t kret; + + if ( !(bp->b_flags & B_CLUSTER)) { + *io_addr = (caddr_t)bp->b_datap; + return (0); + } + real_bp = (buf_t)(bp->b_real_bp); + + if (real_bp && real_bp->b_datap) { + /* + * b_real_bp is only valid if B_CLUSTER is SET + * if it's non-zero, than someone did a cluster_bp call + * if the backing physical pages were already mapped + * in before the call to cluster_bp (non-zero b_datap), + * than we just use that mapping + */ + *io_addr = (caddr_t)real_bp->b_datap; + return (0); + } + kret = ubc_upl_map(bp->b_upl, &vaddr); /* Map it in */ + + if (kret != KERN_SUCCESS) { + *io_addr = 0; + + return(ENOMEM); + } + vaddr += bp->b_uploffset; + + *io_addr = (caddr_t)vaddr; + + return (0); +} + +errno_t +buf_unmap(buf_t bp) +{ + buf_t real_bp; + kern_return_t kret; + + if ( !(bp->b_flags & B_CLUSTER)) + return (0); + /* + * see buf_map for the explanation + */ + real_bp = (buf_t)(bp->b_real_bp); + + if (real_bp && real_bp->b_datap) + return (0); + + if (bp->b_lflags & BL_IOBUF) { + /* + * when we commit these pages, we'll hit + * it with UPL_COMMIT_INACTIVE which + * will clear the reference bit that got + * turned on when we touched the mapping + */ + bp->b_flags |= B_AGE; + } + kret = ubc_upl_unmap(bp->b_upl); + + if (kret != KERN_SUCCESS) + return (EINVAL); + return (0); +} + + +void +buf_clear(buf_t bp) { + caddr_t baddr; + + if (buf_map(bp, &baddr) == 0) { + bzero(baddr, bp->b_bcount); + buf_unmap(bp); + } + bp->b_resid = 0; +} + + + +/* + * Read or write a buffer that is not contiguous on disk. + * buffer is marked done/error at the conclusion + */ +static int +buf_strategy_fragmented(vnode_t devvp, buf_t bp, off_t f_offset, size_t contig_bytes) +{ + vnode_t vp = buf_vnode(bp); + buf_t io_bp; /* For reading or writing a single block */ + int io_direction; + int io_resid; + size_t io_contig_bytes; + daddr64_t io_blkno; + int error = 0; + int bmap_flags; + + /* + * save our starting point... the bp was already mapped + * in buf_strategy before we got called + * no sense doing it again. + */ + io_blkno = bp->b_blkno; + /* + * Make sure we redo this mapping for the next I/O + * i.e. this can never be a 'permanent' mapping + */ + bp->b_blkno = bp->b_lblkno; + + /* + * Get an io buffer to do the deblocking + */ + io_bp = alloc_io_buf(devvp, 0); + + io_bp->b_lblkno = bp->b_lblkno; + io_bp->b_datap = bp->b_datap; + io_resid = bp->b_bcount; + io_direction = bp->b_flags & B_READ; + io_contig_bytes = contig_bytes; + + if (bp->b_flags & B_READ) + bmap_flags = VNODE_READ; + else + bmap_flags = VNODE_WRITE; + + for (;;) { + if (io_blkno == -1) + /* + * this is unexepected, but we'll allow for it + */ + bzero((caddr_t)io_bp->b_datap, (int)io_contig_bytes); + else { + io_bp->b_bcount = io_contig_bytes; + io_bp->b_bufsize = io_contig_bytes; + io_bp->b_resid = io_contig_bytes; + io_bp->b_blkno = io_blkno; + + buf_reset(io_bp, io_direction); + /* + * Call the device to do the I/O and wait for it + */ + if ((error = VNOP_STRATEGY(io_bp))) + break; + if ((error = (int)buf_biowait(io_bp))) + break; + if (io_bp->b_resid) { + io_resid -= (io_contig_bytes - io_bp->b_resid); + break; + } + } + if ((io_resid -= io_contig_bytes) == 0) + break; + f_offset += io_contig_bytes; + io_bp->b_datap += io_contig_bytes; + + /* + * Map the current position to a physical block number + */ + if ((error = VNOP_BLOCKMAP(vp, f_offset, io_resid, &io_blkno, &io_contig_bytes, NULL, bmap_flags, NULL))) + break; + } + buf_free(io_bp); + + if (error) + buf_seterror(bp, error); + bp->b_resid = io_resid; + /* + * This I/O is now complete + */ + buf_biodone(bp); + + return error; +} + + +/* + * struct vnop_strategy_args { + * struct buf *a_bp; + * } *ap; + */ +errno_t +buf_strategy(vnode_t devvp, void *ap) +{ + buf_t bp = ((struct vnop_strategy_args *)ap)->a_bp; + vnode_t vp = bp->b_vp; + int bmap_flags; + errno_t error; + + if (vp == NULL || vp->v_type == VCHR || vp->v_type == VBLK) + panic("buf_strategy: b_vp == NULL || vtype == VCHR | VBLK\n"); + /* + * associate the physical device with + * with this buf_t even if we don't + * end up issuing the I/O... + */ + bp->b_dev = devvp->v_rdev; + + if (bp->b_flags & B_READ) + bmap_flags = VNODE_READ; + else + bmap_flags = VNODE_WRITE; + + if ( !(bp->b_flags & B_CLUSTER)) { + + if ( (bp->b_upl) ) { + /* + * we have a UPL associated with this bp + * go through cluster_bp which knows how + * to deal with filesystem block sizes + * that aren't equal to the page size + */ + return (cluster_bp(bp)); + } + if (bp->b_blkno == bp->b_lblkno) { + off_t f_offset; + size_t contig_bytes; + + if ((error = VNOP_BLKTOOFF(vp, bp->b_lblkno, &f_offset))) { + buf_seterror(bp, error); + buf_biodone(bp); + + return (error); + } + if ((error = VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL))) { + buf_seterror(bp, error); + buf_biodone(bp); + + return (error); + } + if (bp->b_blkno == -1) + buf_clear(bp); + else if ((long)contig_bytes < bp->b_bcount) + return (buf_strategy_fragmented(devvp, bp, f_offset, contig_bytes)); + } + if (bp->b_blkno == -1) { + buf_biodone(bp); + return (0); + } + } + /* + * we can issue the I/O because... + * either B_CLUSTER is set which + * means that the I/O is properly set + * up to be a multiple of the page size, or + * we were able to successfully set up the + * phsyical block mapping + */ + return (VOCALL(devvp->v_op, VOFFSET(vnop_strategy), ap)); +} + + + +buf_t +buf_alloc(vnode_t vp) +{ + return(alloc_io_buf(vp, 0)); +} + +void +buf_free(buf_t bp) { + + free_io_buf(bp); +} + + + +void +buf_iterate(vnode_t vp, int (*callout)(buf_t, void *), int flags, void *arg) { + buf_t bp; + int retval; + struct buflists local_iterblkhd; + int lock_flags = BAC_NOWAIT | BAC_REMOVE; + + if (flags & BUF_SKIP_LOCKED) + lock_flags |= BAC_SKIP_LOCKED; + if (flags & BUF_SKIP_NONLOCKED) + lock_flags |= BAC_SKIP_NONLOCKED; + + lck_mtx_lock(buf_mtxp); + + if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY)) { + lck_mtx_unlock(buf_mtxp); + return; + } + while (!LIST_EMPTY(&local_iterblkhd)) { + bp = LIST_FIRST(&local_iterblkhd); + LIST_REMOVE(bp, b_vnbufs); + LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs); + + if (buf_acquire_locked(bp, lock_flags, 0, 0)) + continue; + + lck_mtx_unlock(buf_mtxp); + + retval = callout(bp, arg); + + switch (retval) { + case BUF_RETURNED: + buf_brelse(bp); + break; + case BUF_CLAIMED: + break; + case BUF_RETURNED_DONE: + buf_brelse(bp); + lck_mtx_lock(buf_mtxp); + goto out; + case BUF_CLAIMED_DONE: + lck_mtx_lock(buf_mtxp); + goto out; + } + lck_mtx_lock(buf_mtxp); + } +out: + buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY); + + lck_mtx_unlock(buf_mtxp); +} + + +/* + * Flush out and invalidate all buffers associated with a vnode. + */ +int +buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo) +{ + buf_t bp; + int error = 0; + int must_rescan = 1; + struct buflists local_iterblkhd; + + lck_mtx_lock(buf_mtxp); + + for (;;) { + if (must_rescan == 0) + /* + * the lists may not be empty, but all that's left at this + * point are metadata or B_LOCKED buffers which are being + * skipped... we know this because we made it through both + * the clean and dirty lists without dropping buf_mtxp... + * each time we drop buf_mtxp we bump "must_rescan" + */ + break; + if (LIST_EMPTY(&vp->v_cleanblkhd) && LIST_EMPTY(&vp->v_dirtyblkhd)) + break; + must_rescan = 0; + /* + * iterate the clean list + */ + if (buf_iterprepare(vp, &local_iterblkhd, VBI_CLEAN)) { + goto try_dirty_list; + } + while (!LIST_EMPTY(&local_iterblkhd)) { + bp = LIST_FIRST(&local_iterblkhd); + + LIST_REMOVE(bp, b_vnbufs); + LIST_INSERT_HEAD(&vp->v_cleanblkhd, bp, b_vnbufs); + + /* + * some filesystems distinguish meta data blocks with a negative logical block # + */ + if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META))) + continue; + + if ( (error = (int)buf_acquire_locked(bp, BAC_REMOVE | BAC_SKIP_LOCKED, slpflag, slptimeo)) ) { + if (error == EDEADLK) + /* + * this buffer was marked B_LOCKED... + * we didn't drop buf_mtxp, so we + * we don't need to rescan + */ + continue; + if (error == EAGAIN) { + /* + * found a busy buffer... we blocked and + * dropped buf_mtxp, so we're going to + * need to rescan after this pass is completed + */ + must_rescan++; + continue; + } + /* + * got some kind of 'real' error out of the msleep + * in buf_acquire_locked, terminate the scan and return the error + */ + buf_itercomplete(vp, &local_iterblkhd, VBI_CLEAN); + + lck_mtx_unlock(buf_mtxp); + return (error); + } + lck_mtx_unlock(buf_mtxp); + + SET(bp->b_flags, B_INVAL); + buf_brelse(bp); + + lck_mtx_lock(buf_mtxp); + + /* + * by dropping buf_mtxp, we allow new + * buffers to be added to the vnode list(s) + * we'll have to rescan at least once more + * if the queues aren't empty + */ + must_rescan++; + } + buf_itercomplete(vp, &local_iterblkhd, VBI_CLEAN); + +try_dirty_list: + /* + * Now iterate on dirty blks + */ + if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY)) { + continue; + } + while (!LIST_EMPTY(&local_iterblkhd)) { + bp = LIST_FIRST(&local_iterblkhd); + + LIST_REMOVE(bp, b_vnbufs); + LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs); + + /* + * some filesystems distinguish meta data blocks with a negative logical block # + */ + if ((flags & BUF_SKIP_META) && (bp->b_lblkno < 0 || ISSET(bp->b_flags, B_META))) + continue; + + if ( (error = (int)buf_acquire_locked(bp, BAC_REMOVE | BAC_SKIP_LOCKED, slpflag, slptimeo)) ) { + if (error == EDEADLK) + /* + * this buffer was marked B_LOCKED... + * we didn't drop buf_mtxp, so we + * we don't need to rescan + */ + continue; + if (error == EAGAIN) { + /* + * found a busy buffer... we blocked and + * dropped buf_mtxp, so we're going to + * need to rescan after this pass is completed + */ + must_rescan++; + continue; + } + /* + * got some kind of 'real' error out of the msleep + * in buf_acquire_locked, terminate the scan and return the error + */ + buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY); + + lck_mtx_unlock(buf_mtxp); + return (error); + } + lck_mtx_unlock(buf_mtxp); + + SET(bp->b_flags, B_INVAL); + + if (ISSET(bp->b_flags, B_DELWRI) && (flags & BUF_WRITE_DATA)) + (void) VNOP_BWRITE(bp); + else + buf_brelse(bp); + + lck_mtx_lock(buf_mtxp); + /* + * by dropping buf_mtxp, we allow new + * buffers to be added to the vnode list(s) + * we'll have to rescan at least once more + * if the queues aren't empty + */ + must_rescan++; + } + buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY); + } + lck_mtx_unlock(buf_mtxp); + + return (0); +} + +void +buf_flushdirtyblks(vnode_t vp, int wait, int flags, char *msg) { + buf_t bp; + int writes_issued = 0; + errno_t error; + int busy = 0; + struct buflists local_iterblkhd; + int lock_flags = BAC_NOWAIT | BAC_REMOVE; + + if (flags & BUF_SKIP_LOCKED) + lock_flags |= BAC_SKIP_LOCKED; + if (flags & BUF_SKIP_NONLOCKED) + lock_flags |= BAC_SKIP_NONLOCKED; +loop: + lck_mtx_lock(buf_mtxp); + + if (buf_iterprepare(vp, &local_iterblkhd, VBI_DIRTY) == 0) { + while (!LIST_EMPTY(&local_iterblkhd)) { + bp = LIST_FIRST(&local_iterblkhd); + LIST_REMOVE(bp, b_vnbufs); + LIST_INSERT_HEAD(&vp->v_dirtyblkhd, bp, b_vnbufs); + + if ((error = buf_acquire_locked(bp, lock_flags, 0, 0)) == EBUSY) + busy++; + if (error) + continue; + lck_mtx_unlock(buf_mtxp); + + bp->b_flags &= ~B_LOCKED; + + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if ((bp->b_vp == vp) || (wait == 0)) + (void) buf_bawrite(bp); + else + (void) VNOP_BWRITE(bp); + writes_issued++; + + lck_mtx_lock(buf_mtxp); + } + buf_itercomplete(vp, &local_iterblkhd, VBI_DIRTY); + } + lck_mtx_unlock(buf_mtxp); + + if (wait) { + (void)vnode_waitforwrites(vp, 0, 0, 0, msg); + + if (vp->v_dirtyblkhd.lh_first && busy) { + /* + * we had one or more BUSY buffers on + * the dirtyblock list... most likely + * these are due to delayed writes that + * were moved to the bclean queue but + * have not yet been 'written'. + * if we issued some writes on the + * previous pass, we try again immediately + * if we didn't, we'll sleep for some time + * to allow the state to change... + */ + if (writes_issued == 0) { + (void)tsleep((caddr_t)&vp->v_numoutput, + PRIBIO + 1, "vnode_flushdirtyblks", hz/20); + } + writes_issued = 0; + busy = 0; + + goto loop; + } + } +} + + +/* + * called with buf_mtxp held... + * this lock protects the queue manipulation + */ +static int +buf_iterprepare(vnode_t vp, struct buflists *iterheadp, int flags) +{ + struct buflists * listheadp; + + if (flags & VBI_DIRTY) + listheadp = &vp->v_dirtyblkhd; + else + listheadp = &vp->v_cleanblkhd; + + while (vp->v_iterblkflags & VBI_ITER) { + vp->v_iterblkflags |= VBI_ITERWANT; + msleep(&vp->v_iterblkflags, buf_mtxp, 0, "buf_iterprepare", 0); + } + if (LIST_EMPTY(listheadp)) { + LIST_INIT(iterheadp); + return(EINVAL); + } + vp->v_iterblkflags |= VBI_ITER; + + iterheadp->lh_first = listheadp->lh_first; + listheadp->lh_first->b_vnbufs.le_prev = &iterheadp->lh_first; + LIST_INIT(listheadp); + + return(0); +} + +/* + * called with buf_mtxp held... + * this lock protects the queue manipulation + */ +static void +buf_itercomplete(vnode_t vp, struct buflists *iterheadp, int flags) +{ + struct buflists * listheadp; + buf_t bp; + + if (flags & VBI_DIRTY) + listheadp = &vp->v_dirtyblkhd; + else + listheadp = &vp->v_cleanblkhd; + + while (!LIST_EMPTY(iterheadp)) { + bp = LIST_FIRST(iterheadp); + LIST_REMOVE(bp, b_vnbufs); + LIST_INSERT_HEAD(listheadp, bp, b_vnbufs); + } + vp->v_iterblkflags &= ~VBI_ITER; + + if (vp->v_iterblkflags & VBI_ITERWANT) { + vp->v_iterblkflags &= ~VBI_ITERWANT; + wakeup(&vp->v_iterblkflags); + } +} + + +static void +bremfree_locked(buf_t bp) +{ + struct bqueues *dp = NULL; + int whichq = -1; + + /* + * We only calculate the head of the freelist when removing + * the last element of the list as that is the only time that + * it is needed (e.g. to reset the tail pointer). + * + * NB: This makes an assumption about how tailq's are implemented. + */ + if (bp->b_freelist.tqe_next == NULL) { + for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) + if (dp->tqh_last == &bp->b_freelist.tqe_next) + break; + if (dp == &bufqueues[BQUEUES]) + panic("bremfree: lost tail"); + } + TAILQ_REMOVE(dp, bp, b_freelist); + whichq = bp->b_whichq; +#if BALANCE_QUEUES + bufqdec(whichq); +#endif + bp->b_whichq = -1; + bp->b_timestamp = 0; +} + +/* + * Associate a buffer with a vnode. + */ +static void +bgetvp(vnode_t vp, buf_t bp) +{ + + if (bp->b_vp != vp) + panic("bgetvp: not free"); + + if (vp->v_type == VBLK || vp->v_type == VCHR) + bp->b_dev = vp->v_rdev; + else + bp->b_dev = NODEV; + /* + * Insert onto list for new vnode. + */ + lck_mtx_lock(buf_mtxp); + bufinsvn(bp, &vp->v_cleanblkhd); + lck_mtx_unlock(buf_mtxp); +} + +/* + * Disassociate a buffer from a vnode. + */ +static void +brelvp(buf_t bp) +{ + vnode_t vp; + + if ((vp = bp->b_vp) == (vnode_t)NULL) + panic("brelvp: NULL vp"); + /* + * Delete from old vnode list, if on one. + */ + lck_mtx_lock(buf_mtxp); + if (bp->b_vnbufs.le_next != NOLIST) + bufremvn(bp); + lck_mtx_unlock(buf_mtxp); + + bp->b_vp = (vnode_t)NULL; +} + +/* + * Reassign a buffer from one vnode to another. + * Used to assign file specific control information + * (indirect blocks) to the vnode to which they belong. + */ +static void +buf_reassign(buf_t bp, vnode_t newvp) +{ + register struct buflists *listheadp; - /* Wait for the read to complete, and return result. */ - return (biowait(bp)); + if (newvp == NULL) { + printf("buf_reassign: NULL"); + return; + } + lck_mtx_lock(buf_mtxp); + + /* + * Delete from old vnode list, if on one. + */ + if (bp->b_vnbufs.le_next != NOLIST) + bufremvn(bp); + /* + * If dirty, put on list of dirty buffers; + * otherwise insert onto list of clean buffers. + */ + if (ISSET(bp->b_flags, B_DELWRI)) + listheadp = &newvp->v_dirtyblkhd; + else + listheadp = &newvp->v_cleanblkhd; + bufinsvn(bp, listheadp); + + lck_mtx_unlock(buf_mtxp); } -/* - * Read-ahead multiple disk blocks. The first is sync, the rest async. - */ -int -breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp) - struct vnode *vp; - daddr_t blkno; int size; - daddr_t rablks[]; int rasizes[]; - int nrablks; - struct ucred *cred; - struct buf **bpp; +static __inline__ void +bufhdrinit(buf_t bp) { - return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_READ)); + bzero((char *)bp, sizeof *bp); + bp->b_dev = NODEV; + bp->b_rcred = NOCRED; + bp->b_wcred = NOCRED; + bp->b_vnbufs.le_next = NOLIST; + bp->b_flags = B_INVAL; + + return; } /* - * Read-ahead multiple disk blocks. The first is sync, the rest async. - * [breadn() for meta-data] + * Initialize buffers and hash links for buffers. */ -int -meta_breadn(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp) - struct vnode *vp; - daddr_t blkno; int size; - daddr_t rablks[]; int rasizes[]; - int nrablks; - struct ucred *cred; - struct buf **bpp; +__private_extern__ void +bufinit() { - return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_META)); + buf_t bp; + struct bqueues *dp; + int i; + int metabuf; + long whichq; + + /* Initialize the buffer queues ('freelists') and the hash table */ + for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) + TAILQ_INIT(dp); + bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); + + metabuf = nbuf/8; /* reserved for meta buf */ + + /* Initialize the buffer headers */ + for (i = 0; i < nbuf; i++) { + bp = &buf[i]; + bufhdrinit(bp); + + /* + * metabuf buffer headers on the meta-data list and + * rest of the buffer headers on the empty list + */ + if (--metabuf) + whichq = BQ_META; + else + whichq = BQ_EMPTY; + + BLISTNONE(bp); + dp = &bufqueues[whichq]; + binsheadfree(bp, dp, whichq); + binshash(bp, &invalhash); + } + + for (; i < nbuf + niobuf; i++) { + bp = &buf[i]; + bufhdrinit(bp); + binsheadfree(bp, &iobufqueue, -1); + } + + /* + * allocate lock group attribute and group + */ + buf_mtx_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(buf_mtx_grp_attr); + buf_mtx_grp = lck_grp_alloc_init("buffer cache", buf_mtx_grp_attr); + + /* + * allocate the lock attribute + */ + buf_mtx_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(buf_mtx_attr); + + /* + * allocate and initialize mutex's for the buffer and iobuffer pools + */ + buf_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr); + iobuffer_mtxp = lck_mtx_alloc_init(buf_mtx_grp, buf_mtx_attr); + + if (iobuffer_mtxp == NULL) + panic("couldn't create iobuffer mutex"); + + if (buf_mtxp == NULL) + panic("couldn't create buf mutex"); + + /* + * allocate and initialize cluster specific global locks... + */ + cluster_init(); + + printf("using %d buffer headers and %d cluster IO buffer headers\n", + nbuf, niobuf); + + /* Set up zones used by the buffer cache */ + bufzoneinit(); + + /* start the bcleanbuf() thread */ + bcleanbuf_thread_init(); + +#if BALANCE_QUEUES + { + static void bufq_balance_thread_init(); + /* create a thread to do dynamic buffer queue balancing */ + bufq_balance_thread_init(); + } +#endif /* notyet */ +} + +static struct buf * +bio_doread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, int async, int queuetype) +{ + buf_t bp; + + bp = buf_getblk(vp, blkno, size, 0, 0, queuetype); + + /* + * If buffer does not have data valid, start a read. + * Note that if buffer is B_INVAL, buf_getblk() won't return it. + * Therefore, it's valid if it's I/O has completed or been delayed. + */ + if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) { + struct proc *p; + + p = current_proc(); + + /* Start I/O for the buffer (keeping credentials). */ + SET(bp->b_flags, B_READ | async); + if (cred != NOCRED && bp->b_rcred == NOCRED) { + kauth_cred_ref(cred); + bp->b_rcred = cred; + } + + VNOP_STRATEGY(bp); + + trace(TR_BREADMISS, pack(vp, size), blkno); + + /* Pay for the read. */ + if (p && p->p_stats) + p->p_stats->p_ru.ru_inblock++; /* XXX */ + + if (async) { + /* + * since we asked for an ASYNC I/O + * the biodone will do the brelse + * we don't want to pass back a bp + * that we don't 'own' + */ + bp = NULL; + } + } else if (async) { + buf_brelse(bp); + bp = NULL; + } + + trace(TR_BREADHIT, pack(vp, size), blkno); + + return (bp); } /* - * Perform the reads for breadn() and meta_breadn(). + * Perform the reads for buf_breadn() and buf_meta_breadn(). * Trivial modification to the breada algorithm presented in Bach (p.55). */ -static int -do_breadn_for_type(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, int *rasizes, - int nrablks, struct ucred *cred, struct buf **bpp, int queuetype) +static errno_t +do_breadn_for_type(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, + int nrablks, ucred_t cred, buf_t *bpp, int queuetype) { - register struct buf *bp; - int i; + buf_t bp; + int i; bp = *bpp = bio_doread(vp, blkno, size, cred, 0, queuetype); @@ -578,38 +1529,73 @@ do_breadn_for_type(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks, i } /* Otherwise, we had to start a read for it; wait until it's valid. */ - return (biowait(bp)); + return (buf_biowait(bp)); } + /* - * Read with single-block read-ahead. Defined in Bach (p.55), but - * implemented as a call to breadn(). - * XXX for compatibility with old file systems. + * Read a disk block. + * This algorithm described in Bach (p.54). */ -int -breada(vp, blkno, size, rablkno, rabsize, cred, bpp) - struct vnode *vp; - daddr_t blkno; int size; - daddr_t rablkno; int rabsize; - struct ucred *cred; - struct buf **bpp; +errno_t +buf_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) +{ + buf_t bp; + + /* Get buffer for block. */ + bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_READ); + + /* Wait for the read to complete, and return result. */ + return (buf_biowait(bp)); +} + +/* + * Read a disk block. [bread() for meta-data] + * This algorithm described in Bach (p.54). + */ +errno_t +buf_meta_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) +{ + buf_t bp; + + /* Get buffer for block. */ + bp = *bpp = bio_doread(vp, blkno, size, cred, 0, BLK_META); + + /* Wait for the read to complete, and return result. */ + return (buf_biowait(bp)); +} + +/* + * Read-ahead multiple disk blocks. The first is sync, the rest async. + */ +errno_t +buf_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, ucred_t cred, buf_t *bpp) { + return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_READ)); +} - return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp)); +/* + * Read-ahead multiple disk blocks. The first is sync, the rest async. + * [buf_breadn() for meta-data] + */ +errno_t +buf_meta_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, ucred_t cred, buf_t *bpp) +{ + return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_META)); } /* * Block write. Described in Bach (p.56) */ -int -bwrite(bp) - struct buf *bp; +errno_t +buf_bwrite(buf_t bp) { - int rv, sync, wasdelayed; - struct proc *p = current_proc(); - struct vnode *vp = bp->b_vp; + int sync, wasdelayed; + errno_t rv; + proc_t p = current_proc(); + vnode_t vp = bp->b_vp; - if (bp->b_data == 0) { + if (bp->b_datap == 0) { if (brecover_data(bp) == 0) return (0); } @@ -617,10 +1603,9 @@ bwrite(bp) sync = !ISSET(bp->b_flags, B_ASYNC); wasdelayed = ISSET(bp->b_flags, B_DELWRI); CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI)); - if (wasdelayed) { - nbdwrite--; - wakeup((caddr_t)&nbdwrite); - } + + if (wasdelayed) + OSAddAtomic(-1, &nbdwrite); if (!sync) { /* @@ -630,25 +1615,24 @@ bwrite(bp) * be properly notified that its I/O has completed. */ if (wasdelayed) - reassignbuf(bp, vp); + buf_reassign(bp, vp); else if (p && p->p_stats) p->p_stats->p_ru.ru_oublock++; /* XXX */ } - trace(TR_BUFWRITE, pack(vp, bp->b_bcount), bp->b_lblkno); /* Initiate disk write. Make sure the appropriate party is charged. */ - SET(bp->b_flags, B_WRITEINPROG); - vp->v_numoutput++; + + OSAddAtomic(1, &vp->v_numoutput); - VOP_STRATEGY(bp); + VNOP_STRATEGY(bp); if (sync) { /* * If I/O was synchronous, wait for it to complete. */ - rv = biowait(bp); + rv = buf_biowait(bp); /* * Pay for the I/O operation, if it's not been paid for, and @@ -656,7 +1640,7 @@ bwrite(bp) * were payed for above.) */ if (wasdelayed) - reassignbuf(bp, vp); + buf_reassign(bp, vp); else if (p && p->p_stats) p->p_stats->p_ru.ru_oublock++; /* XXX */ @@ -664,7 +1648,7 @@ bwrite(bp) /* Release the buffer. */ // XXXdbg - only if the unused bit is set if (!ISSET(bp->b_flags, B_NORELSE)) { - brelse(bp); + buf_brelse(bp); } else { CLR(bp->b_flags, B_NORELSE); } @@ -677,9 +1661,9 @@ bwrite(bp) int vn_bwrite(ap) - struct vop_bwrite_args *ap; + struct vnop_bwrite_args *ap; { - return (bwrite(ap->a_bp)); + return (buf_bwrite(ap->a_bp)); } /* @@ -697,17 +1681,15 @@ vn_bwrite(ap) * * Note: With the abilitty to allocate additional buffer * headers, we can get in to the situation where "too" many - * bdwrite()s can create situation where the kernel can create - * buffers faster than the disks can service. Doing a bawrite() in - * cases were we have "too many" outstanding bdwrite()s avoids that. + * buf_bdwrite()s can create situation where the kernel can create + * buffers faster than the disks can service. Doing a buf_bawrite() in + * cases were we have "too many" outstanding buf_bdwrite()s avoids that. */ __private_extern__ int -bdwrite_internal(bp, return_error) - struct buf *bp; - int return_error; +bdwrite_internal(buf_t bp, int return_error) { - struct proc *p = current_proc(); - struct vnode *vp = bp->b_vp; + proc_t p = current_proc(); + vnode_t vp = bp->b_vp; /* * If the block hasn't been seen before: @@ -719,265 +1701,250 @@ bdwrite_internal(bp, return_error) SET(bp->b_flags, B_DELWRI); if (p && p->p_stats) p->p_stats->p_ru.ru_oublock++; /* XXX */ - nbdwrite ++; - reassignbuf(bp, vp); + OSAddAtomic(1, &nbdwrite); + buf_reassign(bp, vp); } /* If this is a tape block, write it the block now. */ if (ISSET(bp->b_flags, B_TAPE)) { - /* bwrite(bp); */ - VOP_BWRITE(bp); + VNOP_BWRITE(bp); return (0); } /* - * If the vnode has "too many" write operations in progress - * wait for them to finish the IO - */ - while (vp->v_numoutput >= BUFWRITE_THROTTLE) { - vp->v_flag |= VTHROTTLED; - (void)tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "bdwrite", 0); - } - - /* - * If we have too many delayed write buffers, - * more than we can "safely" handle, just fall back to - * doing the async write + * if we're not LOCKED, but the total number of delayed writes + * has climbed above 75% of the total buffers in the system + * return an error if the caller has indicated that it can + * handle one in this case, otherwise schedule the I/O now + * this is done to prevent us from allocating tons of extra + * buffers when dealing with virtual disks (i.e. DiskImages), + * because additional buffers are dynamically allocated to prevent + * deadlocks from occurring + * + * however, can't do a buf_bawrite() if the LOCKED bit is set because the + * buffer is part of a transaction and can't go to disk until + * the LOCKED bit is cleared. */ - if (nbdwrite < 0) - panic("bdwrite: Negative nbdwrite"); - - // can't do a bawrite() if the LOCKED bit is set because the - // buffer is part of a transaction and can't go to disk until - // the LOCKED bit is cleared. if (!ISSET(bp->b_flags, B_LOCKED) && nbdwrite > ((nbuf/4)*3)) { if (return_error) return (EAGAIN); - else - bawrite(bp); - return (0); + /* + * If the vnode has "too many" write operations in progress + * wait for them to finish the IO + */ + (void)vnode_waitforwrites(vp, VNODE_ASYNC_THROTTLE, 0, 0, (char *)"buf_bdwrite"); + + return (buf_bawrite(bp)); } /* Otherwise, the "write" is done, so mark and release the buffer. */ SET(bp->b_flags, B_DONE); - brelse(bp); + buf_brelse(bp); return (0); } -void -bdwrite(bp) - struct buf *bp; +errno_t +buf_bdwrite(buf_t bp) { - (void) bdwrite_internal(bp, 0); + return (bdwrite_internal(bp, 0)); } /* - * Asynchronous block write; just an asynchronous bwrite(). + * Asynchronous block write; just an asynchronous buf_bwrite(). * * Note: With the abilitty to allocate additional buffer * headers, we can get in to the situation where "too" many - * bawrite()s can create situation where the kernel can create + * buf_bawrite()s can create situation where the kernel can create * buffers faster than the disks can service. * We limit the number of "in flight" writes a vnode can have to * avoid this. */ static int -bawrite_internal(bp, throttle) - struct buf *bp; - int throttle; +bawrite_internal(buf_t bp, int throttle) { - struct vnode *vp = bp->b_vp; + vnode_t vp = bp->b_vp; if (vp) { - /* - * If the vnode has "too many" write operations in progress - * wait for them to finish the IO - */ - while (vp->v_numoutput >= BUFWRITE_THROTTLE) { - if (throttle) { - vp->v_flag |= VTHROTTLED; - (void)tsleep((caddr_t)&vp->v_numoutput, - PRIBIO + 1, "bawrite", 0); - } else - return (EWOULDBLOCK); - } + if (throttle) + /* + * If the vnode has "too many" write operations in progress + * wait for them to finish the IO + */ + (void)vnode_waitforwrites(vp, VNODE_ASYNC_THROTTLE, 0, 0, (const char *)"buf_bawrite"); + else if (vp->v_numoutput >= VNODE_ASYNC_THROTTLE) + /* + * return to the caller and + * let him decide what to do + */ + return (EWOULDBLOCK); } - SET(bp->b_flags, B_ASYNC); - VOP_BWRITE(bp); - return (0); -} -void -bawrite(bp) - struct buf *bp; -{ - (void) bawrite_internal(bp, 1); + return (VNOP_BWRITE(bp)); } -/* - * bwillwrite: - * - * Called prior to the locking of any vnodes when we are expecting to - * write. We do not want to starve the buffer cache with too many - * dirty buffers so we block here. By blocking prior to the locking - * of any vnodes we attempt to avoid the situation where a locked vnode - * prevents the various system daemons from flushing related buffers. - */ - -void -bwillwrite(void) +errno_t +buf_bawrite(buf_t bp) { - /* XXX To be implemented later */ + return (bawrite_internal(bp, 1)); } + /* * Release a buffer on to the free lists. * Described in Bach (p. 46). */ void -brelse(bp) - struct buf *bp; +buf_brelse(buf_t bp) { struct bqueues *bufq; - int s; - long whichq; + long whichq; + upl_t upl; + int need_wakeup = 0; + int need_bp_wakeup = 0; + + + if (bp->b_whichq != -1 || !(bp->b_lflags & BL_BUSY)) + panic("buf_brelse: bad buffer = %x\n", bp); + +#ifdef JOE_DEBUG + bp->b_stackbrelse[0] = __builtin_return_address(0); + bp->b_stackbrelse[1] = __builtin_return_address(1); + bp->b_stackbrelse[2] = __builtin_return_address(2); + bp->b_stackbrelse[3] = __builtin_return_address(3); + bp->b_stackbrelse[4] = __builtin_return_address(4); + bp->b_stackbrelse[5] = __builtin_return_address(5); + + bp->b_lastbrelse = current_thread(); + bp->b_tag = 0; +#endif + if (bp->b_lflags & BL_IOBUF) { + free_io_buf(bp); + return; + } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_START, - bp->b_lblkno * PAGE_SIZE, (int)bp, (int)bp->b_data, + bp->b_lblkno * PAGE_SIZE, (int)bp, (int)bp->b_datap, bp->b_flags, 0); trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); - // if we're invalidating a buffer that has the B_CALL bit - // set then call the b_iodone function so it gets cleaned - // up properly. - // + /* + * if we're invalidating a buffer that has the B_FILTER bit + * set then call the b_iodone function so it gets cleaned + * up properly. + * + * the HFS journal code depends on this + */ if (ISSET(bp->b_flags, B_META) && ISSET(bp->b_flags, B_INVAL)) { - if (ISSET(bp->b_flags, B_CALL) && !ISSET(bp->b_flags, B_DELWRI)) { - panic("brelse: CALL flag set but not DELWRI! bp 0x%x\n", bp); - } - if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */ - void (*iodone_func)(struct buf *) = bp->b_iodone; + if (ISSET(bp->b_flags, B_FILTER)) { /* if necessary, call out */ + void (*iodone_func)(struct buf *, void *) = bp->b_iodone; + void *arg = (void *)bp->b_transaction; - CLR(bp->b_flags, B_CALL); /* but note callout done */ + CLR(bp->b_flags, B_FILTER); /* but note callout done */ bp->b_iodone = NULL; + bp->b_transaction = NULL; if (iodone_func == NULL) { panic("brelse: bp @ 0x%x has NULL b_iodone!\n", bp); } - (*iodone_func)(bp); + (*iodone_func)(bp, arg); } } - - /* IO is done. Cleanup the UPL state */ - if (!ISSET(bp->b_flags, B_META) - && UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) { + /* + * I/O is done. Cleanup the UPL state + */ + upl = bp->b_upl; + + if ( !ISSET(bp->b_flags, B_META) && UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) { kern_return_t kret; - upl_t upl; int upl_flags; - if ( !ISSET(bp->b_flags, B_PAGELIST)) { + if ( (upl == NULL) ) { if ( !ISSET(bp->b_flags, B_INVAL)) { kret = ubc_create_upl(bp->b_vp, - ubc_blktooff(bp->b_vp, bp->b_lblkno), - bp->b_bufsize, - &upl, - NULL, - UPL_PRECIOUS); + ubc_blktooff(bp->b_vp, bp->b_lblkno), + bp->b_bufsize, + &upl, + NULL, + UPL_PRECIOUS); + if (kret != KERN_SUCCESS) - panic("brelse: Failed to get pagelists"); -#ifdef UBC_DEBUG + panic("brelse: Failed to create UPL"); +#ifdef UPL_DEBUG upl_ubc_alias_set(upl, bp, 5); -#endif /* UBC_DEBUG */ - } else - upl = (upl_t) 0; +#endif /* UPL_DEBUG */ + } } else { - upl = bp->b_pagelist; - - if (bp->b_data) { + if (bp->b_datap) { kret = ubc_upl_unmap(upl); if (kret != KERN_SUCCESS) - panic("kernel_upl_unmap failed"); - bp->b_data = 0; + panic("ubc_upl_unmap failed"); + bp->b_datap = (uintptr_t)NULL; } } if (upl) { if (bp->b_flags & (B_ERROR | B_INVAL)) { - if (bp->b_flags & (B_READ | B_INVAL)) + if (bp->b_flags & (B_READ | B_INVAL)) upl_flags = UPL_ABORT_DUMP_PAGES; else upl_flags = 0; + ubc_upl_abort(upl, upl_flags); } else { - if (ISSET(bp->b_flags, B_NEEDCOMMIT)) - upl_flags = UPL_COMMIT_CLEAR_DIRTY ; - else if (ISSET(bp->b_flags, B_DELWRI | B_WASDIRTY)) - upl_flags = UPL_COMMIT_SET_DIRTY ; - else - upl_flags = UPL_COMMIT_CLEAR_DIRTY ; + if (ISSET(bp->b_flags, B_DELWRI | B_WASDIRTY)) + upl_flags = UPL_COMMIT_SET_DIRTY ; + else + upl_flags = UPL_COMMIT_CLEAR_DIRTY ; + ubc_upl_commit_range(upl, 0, bp->b_bufsize, upl_flags | - UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); + UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); } - s = splbio(); - CLR(bp->b_flags, B_PAGELIST); - bp->b_pagelist = 0; - splx(s); + bp->b_upl = NULL; } } else { - if(ISSET(bp->b_flags, B_PAGELIST)) - panic("brelse: pagelist set for non VREG; vp=%x", bp->b_vp); + if ( (upl) ) + panic("brelse: UPL set for non VREG; vp=%x", bp->b_vp); } - /* Wake up any processes waiting for any buffer to become free. */ - if (needbuffer) { - needbuffer = 0; - wakeup(&needbuffer); - } - - /* Wake up any proceeses waiting for _this_ buffer to become free. */ - if (ISSET(bp->b_flags, B_WANTED)) { - CLR(bp->b_flags, B_WANTED); - wakeup(bp); - } - - /* Block disk interrupts. */ - s = splbio(); - /* - * Determine which queue the buffer should be on, then put it there. + * If it's locked, don't report an error; try again later. */ - - /* If it's locked, don't report an error; try again later. */ if (ISSET(bp->b_flags, (B_LOCKED|B_ERROR)) == (B_LOCKED|B_ERROR)) CLR(bp->b_flags, B_ERROR); - - /* If it's not cacheable, or an error, mark it invalid. */ + /* + * If it's not cacheable, or an error, mark it invalid. + */ if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR))) SET(bp->b_flags, B_INVAL); - + if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) { /* * If it's invalid or empty, dissociate it from its vnode * and put on the head of the appropriate queue. */ - if (bp->b_vp) - brelvp(bp); - if (ISSET(bp->b_flags, B_DELWRI)) { - CLR(bp->b_flags, B_DELWRI); - nbdwrite--; - wakeup((caddr_t)&nbdwrite); - } + if (bp->b_vp) + brelvp(bp); + + if (ISSET(bp->b_flags, B_DELWRI)) + OSAddAtomic(-1, &nbdwrite); + + CLR(bp->b_flags, (B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE)); + /* + * Determine which queue the buffer should be on, then put it there. + */ if (bp->b_bufsize <= 0) whichq = BQ_EMPTY; /* no data */ else if (ISSET(bp->b_flags, B_META)) whichq = BQ_META; /* meta-data */ else whichq = BQ_AGE; /* invalid data */ - bufq = &bufqueues[whichq]; + + lck_mtx_lock(buf_mtxp); + binsheadfree(bp, bufq, whichq); } else { /* @@ -992,19 +1959,52 @@ brelse(bp) whichq = BQ_AGE; /* stale but valid data */ else whichq = BQ_LRU; /* valid data */ - bufq = &bufqueues[whichq]; + + CLR(bp->b_flags, (B_AGE | B_ASYNC | B_NOCACHE)); + + lck_mtx_lock(buf_mtxp); + binstailfree(bp, bufq, whichq); } + if (needbuffer) { + /* + * needbuffer is a global + * we're currently using buf_mtxp to protect it + * delay doing the actual wakeup until after + * we drop buf_mtxp + */ + needbuffer = 0; + need_wakeup = 1; + } + if (ISSET(bp->b_lflags, BL_WANTED)) { + /* + * delay the actual wakeup until after we + * clear BL_BUSY and we've dropped buf_mtxp + */ + need_bp_wakeup = 1; + } + /* + * Unlock the buffer. + */ + CLR(bp->b_lflags, (BL_BUSY | BL_WANTED)); - /* Unlock the buffer. */ - CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE)); - - /* Allow disk interrupts. */ - splx(s); + lck_mtx_unlock(buf_mtxp); + if (need_wakeup) { + /* + * Wake up any processes waiting for any buffer to become free. + */ + wakeup(&needbuffer); + } + if (need_bp_wakeup) { + /* + * Wake up any proceeses waiting for _this_ buffer to become free. + */ + wakeup(bp); + } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_END, - (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); + (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); } /* @@ -1014,10 +2014,25 @@ brelse(bp) * we normally don't return the buffer, unless the caller explicitly * wants us to. */ -struct buf * -incore(vp, blkno) - struct vnode *vp; - daddr_t blkno; +static boolean_t +incore(vnode_t vp, daddr64_t blkno) +{ + boolean_t retval; + + lck_mtx_lock(buf_mtxp); + + if (incore_locked(vp, blkno)) + retval = TRUE; + else + retval = FALSE; + lck_mtx_unlock(buf_mtxp); + + return (retval); +} + + +static buf_t +incore_locked(vnode_t vp, daddr64_t blkno) { struct buf *bp; @@ -1026,10 +2041,10 @@ incore(vp, blkno) /* Search hash chain */ for (; bp != NULL; bp = bp->b_hash.le_next) { if (bp->b_lblkno == blkno && bp->b_vp == vp && - !ISSET(bp->b_flags, B_INVAL)) + !ISSET(bp->b_flags, B_INVAL)) { return (bp); + } } - return (0); } @@ -1043,112 +2058,123 @@ incore(vp, blkno) * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ -struct buf * -getblk(vp, blkno, size, slpflag, slptimeo, operation) - register struct vnode *vp; - daddr_t blkno; - int size, slpflag, slptimeo, operation; +buf_t +buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int operation) { - struct buf *bp; - int s, err; + buf_t bp; + int err; upl_t upl; upl_page_info_t *pl; kern_return_t kret; - int error=0; - int pagedirty = 0; + int ret_only_valid; + struct timespec ts; + int upl_flags; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_START, - blkno * PAGE_SIZE, size, operation, 0, 0); -start: + (int)(blkno * PAGE_SIZE), size, operation, 0, 0); - s = splbio(); - if ((bp = incore(vp, blkno))) { - /* Found in the Buffer Cache */ - if (ISSET(bp->b_flags, B_BUSY)) { - /* but is busy */ + ret_only_valid = operation & BLK_ONLYVALID; + operation &= ~BLK_ONLYVALID; +start: + lck_mtx_lock(buf_mtxp); +start_locked: + if ((bp = incore_locked(vp, blkno))) { + /* + * Found in the Buffer Cache + */ + if (ISSET(bp->b_lflags, BL_BUSY)) { + /* + * but is busy + */ switch (operation) { case BLK_READ: case BLK_WRITE: case BLK_META: - SET(bp->b_flags, B_WANTED); + SET(bp->b_lflags, BL_WANTED); bufstats.bufs_busyincore++; - err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", - slptimeo); - splx(s); + + /* + * don't retake the mutex after being awakened... + * the time out is in msecs + */ + ts.tv_sec = (slptimeo/1000); + ts.tv_nsec = (slptimeo % 1000) * 10 * NSEC_PER_USEC * 1000; + + err = msleep(bp, buf_mtxp, slpflag | PDROP | (PRIBIO + 1), "buf_getblk", &ts); + /* * Callers who call with PCATCH or timeout are * willing to deal with the NULL pointer */ - if (err && ((slpflag & PCATCH) || - ((err == EWOULDBLOCK) && slptimeo))) + if (err && ((slpflag & PCATCH) || ((err == EWOULDBLOCK) && slptimeo))) return (NULL); goto start; /*NOTREACHED*/ break; - case BLK_PAGEIN: - /* pagein operation must not use getblk */ - panic("getblk: pagein for incore busy buffer"); - splx(s); - /*NOTREACHED*/ - break; - - case BLK_PAGEOUT: - /* pageout operation must not use getblk */ - panic("getblk: pageout for incore busy buffer"); - splx(s); - /*NOTREACHED*/ - break; - default: - panic("getblk: %d unknown operation 1", operation); + /* + * unknown operation requested + */ + panic("getblk: paging or unknown operation for incore busy buffer - %x\n", operation); /*NOTREACHED*/ break; } } else { - /* not busy */ - SET(bp->b_flags, (B_BUSY | B_CACHE)); - bremfree(bp); + /* + * buffer in core and not busy + */ + if ( (bp->b_upl) ) + panic("buffer has UPL, but not marked BUSY: %x", bp); + SET(bp->b_lflags, BL_BUSY); + SET(bp->b_flags, B_CACHE); +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 1; +#endif + bremfree_locked(bp); bufstats.bufs_incore++; - splx(s); + + lck_mtx_unlock(buf_mtxp); - allocbuf(bp, size); - if (ISSET(bp->b_flags, B_PAGELIST)) - panic("pagelist buffer is not busy"); + if ( !ret_only_valid) + allocbuf(bp, size); + upl_flags = 0; switch (operation) { - case BLK_READ: case BLK_WRITE: - if (UBCISVALID(bp->b_vp) && bp->b_bufsize) { + /* + * "write" operation: let the UPL subsystem + * know that we intend to modify the buffer + * cache pages we're gathering. + */ + upl_flags |= UPL_WILL_MODIFY; + case BLK_READ: + upl_flags |= UPL_PRECIOUS; + if (UBCINFOEXISTS(bp->b_vp) && bp->b_bufsize) { kret = ubc_create_upl(vp, - ubc_blktooff(vp, bp->b_lblkno), - bp->b_bufsize, - &upl, - &pl, - UPL_PRECIOUS); + ubc_blktooff(vp, bp->b_lblkno), + bp->b_bufsize, + &upl, + &pl, + upl_flags); if (kret != KERN_SUCCESS) - panic("Failed to get pagelists"); + panic("Failed to create UPL"); - SET(bp->b_flags, B_PAGELIST); - bp->b_pagelist = upl; + bp->b_upl = upl; - if (!upl_valid_page(pl, 0)) { - if (vp->v_tag != VT_NFS) - panic("getblk: incore buffer without valid page"); - CLR(bp->b_flags, B_CACHE); - } + if (upl_valid_page(pl, 0)) { + if (upl_dirty_page(pl, 0)) + SET(bp->b_flags, B_WASDIRTY); + else + CLR(bp->b_flags, B_WASDIRTY); + } else + CLR(bp->b_flags, (B_DONE | B_CACHE | B_WASDIRTY | B_DELWRI)); - if (upl_dirty_page(pl, 0)) - SET(bp->b_flags, B_WASDIRTY); - else - CLR(bp->b_flags, B_WASDIRTY); + kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data)); if (kret != KERN_SUCCESS) - panic("getblk: ubc_upl_map() failed with (%d)", - kret); - if (bp->b_data == 0) - panic("ubc_upl_map mapped 0"); + panic("getblk: ubc_upl_map() failed with (%d)", kret); } break; @@ -1157,35 +2183,42 @@ start: * VM is not involved in IO for the meta data * buffer already has valid data */ - if(bp->b_data == 0) - panic("bp->b_data null incore buf=%x", bp); - break; - - case BLK_PAGEIN: - case BLK_PAGEOUT: - panic("getblk: paging operation 1"); break; default: - panic("getblk: %d unknown operation 2", operation); + panic("getblk: paging or unknown operation for incore buffer- %d\n", operation); /*NOTREACHED*/ break; } } } else { /* not incore() */ int queue = BQ_EMPTY; /* Start with no preference */ - splx(s); - if ((operation == BLK_META) || (UBCINVALID(vp)) || - !(UBCINFOEXISTS(vp))) { - operation = BLK_META; + if (ret_only_valid) { + lck_mtx_unlock(buf_mtxp); + return (NULL); } + + if ((UBCINVALID(vp)) || !(UBCINFOEXISTS(vp))) + operation = BLK_META; + if ((bp = getnewbuf(slpflag, slptimeo, &queue)) == NULL) - goto start; - if (incore(vp, blkno)) { + goto start_locked; + + /* + * getnewbuf may block for a number of different reasons... + * if it does, it's then possible for someone else to + * create a buffer for the same block and insert it into + * the hash... if we see it incore at this point we dump + * the buffer we were working on and start over + */ + if (incore_locked(vp, blkno)) { SET(bp->b_flags, B_INVAL); binshash(bp, &invalhash); - brelse(bp); + + lck_mtx_unlock(buf_mtxp); + + buf_brelse(bp); goto start; } /* @@ -1194,15 +2227,11 @@ start: */ /* - * if it is meta, the queue may be set to other - * type so reset as well as mark it to be B_META + * mark the buffer as B_META if indicated * so that when buffer is released it will goto META queue - * Also, if the vnode is not VREG, then it is META */ - if (operation == BLK_META) { - SET(bp->b_flags, B_META); - queue = BQ_META; - } + if (operation == BLK_META) + SET(bp->b_flags, B_META); bp->b_blkno = bp->b_lblkno = blkno; bp->b_vp = vp; @@ -1212,158 +2241,138 @@ start: */ binshash(bp, BUFHASH(vp, blkno)); - s = splbio(); + lck_mtx_unlock(buf_mtxp); + bgetvp(vp, bp); - splx(s); allocbuf(bp, size); + upl_flags = 0; switch (operation) { case BLK_META: - /* buffer data is invalid */ - - if(bp->b_data == 0) - panic("bp->b_data is null %x",bp); - - bufstats.bufs_miss++; - - /* wakeup the buffer */ - CLR(bp->b_flags, B_WANTED); - wakeup(bp); + /* + * buffer data is invalid... + * + * I don't want to have to retake buf_mtxp, + * so the miss and vmhits counters are done + * with Atomic updates... all other counters + * in bufstats are protected with either + * buf_mtxp or iobuffer_mtxp + */ + OSAddAtomic(1, &bufstats.bufs_miss); break; - case BLK_READ: case BLK_WRITE: + /* + * "write" operation: let the UPL subsystem know + * that we intend to modify the buffer cache pages + * we're gathering. + */ + upl_flags |= UPL_WILL_MODIFY; + case BLK_READ: + { off_t f_offset; + size_t contig_bytes; + int bmap_flags; - if (ISSET(bp->b_flags, B_PAGELIST)) - panic("B_PAGELIST in bp=%x",bp); + if ( (bp->b_upl) ) + panic("bp already has UPL: %x",bp); + f_offset = ubc_blktooff(vp, blkno); + + upl_flags |= UPL_PRECIOUS; kret = ubc_create_upl(vp, - ubc_blktooff(vp, blkno), - bp->b_bufsize, - &upl, - &pl, - UPL_PRECIOUS); - if (kret != KERN_SUCCESS) - panic("Failed to get pagelists"); + f_offset, + bp->b_bufsize, + &upl, + &pl, + upl_flags); -#ifdef UBC_DEBUG + if (kret != KERN_SUCCESS) + panic("Failed to create UPL"); +#ifdef UPL_DEBUG upl_ubc_alias_set(upl, bp, 4); -#endif /* UBC_DEBUG */ - bp->b_pagelist = upl; - - SET(bp->b_flags, B_PAGELIST); +#endif /* UPL_DEBUG */ + bp->b_upl = upl; if (upl_valid_page(pl, 0)) { - SET(bp->b_flags, B_CACHE | B_DONE); - bufstats.bufs_vmhits++; - - pagedirty = upl_dirty_page(pl, 0); - if (pagedirty) - SET(bp->b_flags, B_WASDIRTY); - - if (vp->v_tag == VT_NFS) { - off_t f_offset; - int valid_size; + if (operation == BLK_READ) + bmap_flags = VNODE_READ; + else + bmap_flags = VNODE_WRITE; - bp->b_validoff = 0; - bp->b_dirtyoff = 0; + SET(bp->b_flags, B_CACHE | B_DONE); - f_offset = ubc_blktooff(vp, blkno); + OSAddAtomic(1, &bufstats.bufs_vmhits); - if (f_offset > vp->v_ubcinfo->ui_size) { - CLR(bp->b_flags, (B_CACHE|B_DONE|B_WASDIRTY)); - bp->b_validend = 0; - bp->b_dirtyend = 0; - } else { - valid_size = min(((unsigned int)(vp->v_ubcinfo->ui_size - f_offset)), PAGE_SIZE); - bp->b_validend = valid_size; + bp->b_validoff = 0; + bp->b_dirtyoff = 0; - if (pagedirty) - bp->b_dirtyend = valid_size; - else - bp->b_dirtyend = 0; + if (upl_dirty_page(pl, 0)) { + /* page is dirty */ + SET(bp->b_flags, B_WASDIRTY); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_NONE, - bp->b_validend, bp->b_dirtyend, - (int)vp->v_ubcinfo->ui_size, 0, 0); - } + bp->b_validend = bp->b_bcount; + bp->b_dirtyend = bp->b_bcount; } else { - bp->b_validoff = 0; - bp->b_dirtyoff = 0; - - if (pagedirty) { - /* page is dirty */ - bp->b_validend = bp->b_bcount; - bp->b_dirtyend = bp->b_bcount; - } else { - /* page is clean */ - bp->b_validend = bp->b_bcount; - bp->b_dirtyend = 0; - } - } - error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); - if(error) { - panic("getblk: VOP_BMAP failed"); - /*NOTREACHED*/ - /* - * XXX: We probably should invalidate the VM Page - */ - bp->b_error = error; - SET(bp->b_flags, (B_ERROR | B_INVAL)); - /* undo B_DONE that was set before upl_commit() */ - CLR(bp->b_flags, B_DONE); - brelse(bp); - return (0); + /* page is clean */ + bp->b_validend = bp->b_bcount; + bp->b_dirtyend = 0; } + /* + * try to recreate the physical block number associated with + * this buffer... + */ + if (VNOP_BLOCKMAP(vp, f_offset, bp->b_bcount, &bp->b_blkno, &contig_bytes, NULL, bmap_flags, NULL)) + panic("getblk: VNOP_BLOCKMAP failed"); + /* + * if the extent represented by this buffer + * is not completely physically contiguous on + * disk, than we can't cache the physical mapping + * in the buffer header + */ + if ((long)contig_bytes < bp->b_bcount) + bp->b_blkno = bp->b_lblkno; } else { - bufstats.bufs_miss++; - } - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data)); - if (kret != KERN_SUCCESS) { - panic("getblk: ubc_upl_map() " - "failed with (%d)", kret); + OSAddAtomic(1, &bufstats.bufs_miss); } - if (bp->b_data == 0) - panic("kernel_upl_map mapped 0"); - - break; + kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); - case BLK_PAGEIN: - case BLK_PAGEOUT: - panic("getblk: paging operation 2"); + if (kret != KERN_SUCCESS) + panic("getblk: ubc_upl_map() failed with (%d)", kret); break; + } default: - panic("getblk: %d unknown operation 3", operation); + panic("getblk: paging or unknown operation - %x", operation); /*NOTREACHED*/ break; } } - - if (bp->b_data == NULL) - panic("getblk: bp->b_addr is null"); - - if (bp->b_bufsize & 0xfff) { - if (ISSET(bp->b_flags, B_META) && (bp->b_bufsize & 0x1ff)) - panic("getblk: bp->b_bufsize = %d", bp->b_bufsize); - } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_END, - (int)bp, (int)bp->b_data, bp->b_flags, 3, 0); - + (int)bp, (int)bp->b_datap, bp->b_flags, 3, 0); + +#ifdef JOE_DEBUG + bp->b_stackgetblk[0] = __builtin_return_address(0); + bp->b_stackgetblk[1] = __builtin_return_address(1); + bp->b_stackgetblk[2] = __builtin_return_address(2); + bp->b_stackgetblk[3] = __builtin_return_address(3); + bp->b_stackgetblk[4] = __builtin_return_address(4); + bp->b_stackgetblk[5] = __builtin_return_address(5); +#endif return (bp); } /* * Get an empty, disassociated buffer of given size. */ -struct buf * -geteblk(size) +buf_t +buf_geteblk(size) int size; { - struct buf *bp; - int queue = BQ_EMPTY; + buf_t bp; + int queue = BQ_EMPTY; + + lck_mtx_lock(buf_mtxp); while ((bp = getnewbuf(0, 0, &queue)) == 0) ; @@ -1375,9 +2384,12 @@ geteblk(size) /* XXX need to implement logic to deal with other queues */ binshash(bp, &invalhash); - allocbuf(bp, size); bufstats.bufs_eblk++; + lck_mtx_unlock(buf_mtxp); + + allocbuf(bp, size); + return (bp); } @@ -1429,7 +2441,7 @@ getbufzone(size_t size) if ((size % 512) || (size < MINMETA) || (size > MAXMETA)) panic("getbufzone: incorect size = %d", size); - for (i = 0; meta_zones[i].mz_size != 0; i++) { + for (i = 0; meta_zones[i].mz_size != 0; i++) { if (meta_zones[i].mz_size >= size) break; } @@ -1450,91 +2462,69 @@ getbufzone(size_t size) */ int -allocbuf(bp, size) - struct buf *bp; - int size; +allocbuf(buf_t bp, int size) { vm_size_t desired_size; desired_size = roundup(size, CLBYTES); - if(desired_size < PAGE_SIZE) + if (desired_size < PAGE_SIZE) desired_size = PAGE_SIZE; if (desired_size > MAXBSIZE) panic("allocbuf: buffer larger than MAXBSIZE requested"); if (ISSET(bp->b_flags, B_META)) { - kern_return_t kret; zone_t zprev, z; - size_t nsize = roundup(size, MINMETA); - - if (bp->b_data) { - vm_offset_t elem = (vm_offset_t)bp->b_data; - - if (ISSET(bp->b_flags, B_ZALLOC)) - if (bp->b_bufsize <= MAXMETA) { - if (bp->b_bufsize < nsize) { - /* reallocate to a bigger size */ - - zprev = getbufzone(bp->b_bufsize); - if (nsize <= MAXMETA) { - desired_size = nsize; - z = getbufzone(nsize); - bp->b_data = (caddr_t)zalloc(z); - if(bp->b_data == 0) - panic("allocbuf: zalloc() returned NULL"); - } else { - kret = kmem_alloc(kernel_map, &bp->b_data, desired_size); - if (kret != KERN_SUCCESS) - panic("allocbuf: kmem_alloc() 0 returned %d", kret); - if(bp->b_data == 0) - panic("allocbuf: null b_data 0"); - CLR(bp->b_flags, B_ZALLOC); - } - bcopy((const void *)elem, bp->b_data, bp->b_bufsize); - zfree(zprev, elem); + int nsize = roundup(size, MINMETA); + + if (bp->b_datap) { + vm_offset_t elem = (vm_offset_t)bp->b_datap; + + if (ISSET(bp->b_flags, B_ZALLOC)) { + if (bp->b_bufsize < nsize) { + /* reallocate to a bigger size */ + + zprev = getbufzone(bp->b_bufsize); + if (nsize <= MAXMETA) { + desired_size = nsize; + z = getbufzone(nsize); + bp->b_datap = (uintptr_t)zalloc(z); } else { - desired_size = bp->b_bufsize; + bp->b_datap = (uintptr_t)NULL; + kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + CLR(bp->b_flags, B_ZALLOC); } - } else - panic("allocbuf: B_ZALLOC set incorrectly"); - else - if (bp->b_bufsize < desired_size) { + bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); + zfree(zprev, (void *)elem); + } else { + desired_size = bp->b_bufsize; + } + + } else { + if ((vm_size_t)bp->b_bufsize < desired_size) { /* reallocate to a bigger size */ - kret = kmem_alloc(kernel_map, &bp->b_data, desired_size); - if (kret != KERN_SUCCESS) - panic("allocbuf: kmem_alloc() returned %d", kret); - if(bp->b_data == 0) - panic("allocbuf: null b_data"); - bcopy((const void *)elem, bp->b_data, bp->b_bufsize); + bp->b_datap = (uintptr_t)NULL; + kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); kmem_free(kernel_map, elem, bp->b_bufsize); } else { desired_size = bp->b_bufsize; } + } } else { /* new allocation */ if (nsize <= MAXMETA) { desired_size = nsize; z = getbufzone(nsize); - bp->b_data = (caddr_t)zalloc(z); - if(bp->b_data == 0) - panic("allocbuf: zalloc() returned NULL 2"); + bp->b_datap = (uintptr_t)zalloc(z); SET(bp->b_flags, B_ZALLOC); - } else { - kret = kmem_alloc(kernel_map, &bp->b_data, desired_size); - if (kret != KERN_SUCCESS) - panic("allocbuf: kmem_alloc() 2 returned %d", kret); - if(bp->b_data == 0) - panic("allocbuf: null b_data 2"); - } + } else + kmem_alloc(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); } } - - if (ISSET(bp->b_flags, B_META) && (bp->b_data == 0)) - panic("allocbuf: bp->b_data is NULL, buf @ 0x%x", bp); - bp->b_bufsize = desired_size; bp->b_bcount = size; + return (0); } @@ -1554,30 +2544,33 @@ allocbuf(bp, size) * Initialize the fields and disassociate the buffer from the vnode. * Remove the buffer from the hash. Return the buffer and the queue * on which it was found. + * + * buf_mtxp is held upon entry + * returns with buf_mtxp locked */ -static struct buf * -getnewbuf(slpflag, slptimeo, queue) - int slpflag, slptimeo; - int *queue; +static buf_t +getnewbuf(int slpflag, int slptimeo, int * queue) { - register struct buf *bp; - register struct buf *lru_bp; - register struct buf *age_bp; - register struct buf *meta_bp; - register int age_time, lru_time, bp_time, meta_time; - int s; - int req = *queue; /* save it for restarts */ + buf_t bp; + buf_t lru_bp; + buf_t age_bp; + buf_t meta_bp; + int age_time, lru_time, bp_time, meta_time; + int req = *queue; /* save it for restarts */ + struct timespec ts; start: - s = splbio(); - - /* invalid request gets empty queue */ + /* + * invalid request gets empty queue + */ if ((*queue > BQUEUES) || (*queue < 0) || (*queue == BQ_LAUNDRY) || (*queue == BQ_LOCKED)) *queue = BQ_EMPTY; - /* (*queue == BQUEUES) means no preference */ + /* + * (*queue == BQUEUES) means no preference + */ if (*queue != BQUEUES) { /* Try for the requested queue first */ bp = bufqueues[*queue].tqh_first; @@ -1600,10 +2593,13 @@ start: *queue = BQ_EMPTY; goto found; } + lck_mtx_unlock(buf_mtxp); - /* Create a new temparory buffer header */ + /* Create a new temporary buffer header */ bp = (struct buf *)zalloc(buf_hdr_zone); + lck_mtx_lock(buf_mtxp); + if (bp) { bufhdrinit(bp); BLISTNONE(bp); @@ -1614,15 +2610,16 @@ start: buf_hdr_count++; goto found; } - - /* Log this error condition */ - printf("getnewbuf: No useful buffers"); + bufstats.bufs_sleeps++; /* wait for a free buffer of any kind */ needbuffer = 1; - bufstats.bufs_sleeps++; - tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo); - splx(s); + /* hz value is 100 */ + ts.tv_sec = (slptimeo/1000); + /* the hz value is 100; which leads to 10ms */ + ts.tv_nsec = (slptimeo % 1000) * NSEC_PER_USEC * 1000 * 10; + msleep(&needbuffer, buf_mtxp, slpflag|(PRIBIO+1), (char *)"getnewbuf", &ts); + return (0); } @@ -1638,8 +2635,10 @@ start: bp = age_bp; *queue = BQ_AGE; } else { /* buffer available on both AGE and LRU */ - age_time = time.tv_sec - age_bp->b_timestamp; - lru_time = time.tv_sec - lru_bp->b_timestamp; + int t = buf_timestamp(); + + age_time = t - age_bp->b_timestamp; + lru_time = t - lru_bp->b_timestamp; if ((age_time < 0) || (lru_time < 0)) { /* time set backwards */ bp = age_bp; *queue = BQ_AGE; @@ -1662,8 +2661,10 @@ start: bp = meta_bp; *queue = BQ_META; } else if (meta_bp) { - bp_time = time.tv_sec - bp->b_timestamp; - meta_time = time.tv_sec - meta_bp->b_timestamp; + int t = buf_timestamp(); + + bp_time = t - bp->b_timestamp; + meta_time = t - meta_bp->b_timestamp; if (!(bp_time < 0) && !(meta_time < 0)) { /* time not set backwards */ @@ -1678,138 +2679,256 @@ start: } } } - - if (bp == NULL) - panic("getnewbuf: null bp"); - found: - if (ISSET(bp->b_flags, B_LOCKED)) { - panic("getnewbuf: bp @ 0x%x is LOCKED! (flags 0x%x)\n", bp, bp->b_flags); - } - - if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef) - panic("getnewbuf: le_prev is deadbeef, buf @ 0x%x", bp); - - if(ISSET(bp->b_flags, B_BUSY)) - panic("getnewbuf reusing BUSY buf @ 0x%x", bp); + if (ISSET(bp->b_flags, B_LOCKED) || ISSET(bp->b_lflags, BL_BUSY)) + panic("getnewbuf: bp @ 0x%x is LOCKED or BUSY! (flags 0x%x)\n", bp, bp->b_flags); /* Clean it */ if (bcleanbuf(bp)) { - /* bawrite() issued, buffer not ready */ - splx(s); + /* + * moved to the laundry thread, buffer not ready + */ *queue = req; goto start; } - splx(s); return (bp); } -#include -#include -#include /* * Clean a buffer. * Returns 0 is buffer is ready to use, - * Returns 1 if issued a bawrite() to indicate + * Returns 1 if issued a buf_bawrite() to indicate * that the buffer is not ready. + * + * buf_mtxp is held upon entry + * returns with buf_mtxp locked */ static int -bcleanbuf(struct buf *bp) +bcleanbuf(buf_t bp) { - int s; - struct ucred *cred; - int hdralloc = 0; + ucred_t cred; - s = splbio(); /* Remove from the queue */ - bremfree(bp); + bremfree_locked(bp); /* Buffer is no longer on free lists. */ - SET(bp->b_flags, B_BUSY); - - /* Check whether the buffer header was "allocated" */ - if (ISSET(bp->b_flags, B_HDRALLOC)) - hdralloc = 1; - - if (bp->b_hash.le_prev == (struct buf **)0xdeadbeef) - panic("bcleanbuf: le_prev is deadbeef"); - + SET(bp->b_lflags, BL_BUSY); +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 2; +#endif /* * If buffer was a delayed write, start the IO by queuing * it on the LAUNDRY queue, and return 1 */ if (ISSET(bp->b_flags, B_DELWRI)) { - splx(s); binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY); blaundrycnt++; + + lck_mtx_unlock(buf_mtxp); + wakeup(&blaundrycnt); /* and give it a chance to run */ (void)thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock(buf_mtxp); return (1); } + bremhash(bp); + + lck_mtx_unlock(buf_mtxp); + + BLISTNONE(bp); + /* + * disassociate us from our vnode, if we had one... + */ + if (bp->b_vp) + brelvp(bp); + + if (ISSET(bp->b_flags, B_META)) { + vm_offset_t elem; + + elem = (vm_offset_t)bp->b_datap; + bp->b_datap = (uintptr_t)0xdeadbeef; + + if (ISSET(bp->b_flags, B_ZALLOC)) { + zone_t z; + + z = getbufzone(bp->b_bufsize); + zfree(z, (void *)elem); + } else + kmem_free(kernel_map, elem, bp->b_bufsize); + } + + trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); + + /* clear out various other fields */ + bp->b_bufsize = 0; + bp->b_datap = (uintptr_t)NULL; + bp->b_upl = (void *)NULL; + /* + * preserve the state of whether this buffer + * was allocated on the fly or not... + * the only other flag that should be set at + * this point is BL_BUSY... + */ +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 3; +#endif + bp->b_lflags = BL_BUSY; + bp->b_flags = (bp->b_flags & B_HDRALLOC); + bp->b_dev = NODEV; + bp->b_blkno = bp->b_lblkno = 0; + bp->b_iodone = NULL; + bp->b_error = 0; + bp->b_resid = 0; + bp->b_bcount = 0; + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_validoff = bp->b_validend = 0; + + /* nuke any credentials we were holding */ + cred = bp->b_rcred; + if (cred != NOCRED) { + bp->b_rcred = NOCRED; + kauth_cred_rele(cred); + } + cred = bp->b_wcred; + if (cred != NOCRED) { + bp->b_wcred = NOCRED; + kauth_cred_rele(cred); + } + lck_mtx_lock(buf_mtxp); + + return (0); +} + + + +errno_t +buf_invalblkno(vnode_t vp, daddr64_t lblkno, int flags) +{ + buf_t bp; + errno_t error; + + lck_mtx_lock(buf_mtxp); +relook: + if ((bp = incore_locked(vp, lblkno)) == (struct buf *)0) { + lck_mtx_unlock(buf_mtxp); + return (0); + } + if (ISSET(bp->b_lflags, BL_BUSY)) { + if ( !ISSET(flags, BUF_WAIT)) { + lck_mtx_unlock(buf_mtxp); + return (EBUSY); + } + SET(bp->b_lflags, BL_WANTED); + + error = msleep((caddr_t)bp, buf_mtxp, (PRIBIO + 1), (char *)"buf_invalblkno", 0); + + if (error) + return (error); + goto relook; + } + bremfree_locked(bp); + SET(bp->b_lflags, BL_BUSY); + SET(bp->b_flags, B_INVAL); +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 4; +#endif + lck_mtx_unlock(buf_mtxp); + buf_brelse(bp); + + return (0); +} + + +void +buf_drop(buf_t bp) +{ + int need_wakeup = 0; + + lck_mtx_lock(buf_mtxp); + + if (ISSET(bp->b_lflags, BL_WANTED)) { + /* + * delay the actual wakeup until after we + * clear BL_BUSY and we've dropped buf_mtxp + */ + need_wakeup = 1; + } + /* + * Unlock the buffer. + */ + CLR(bp->b_lflags, (BL_BUSY | BL_WANTED)); - if (bp->b_vp) - brelvp(bp); - bremhash(bp); - BLISTNONE(bp); + lck_mtx_unlock(buf_mtxp); - splx(s); + if (need_wakeup) { + /* + * Wake up any proceeses waiting for _this_ buffer to become free. + */ + wakeup(bp); + } +} - if (ISSET(bp->b_flags, B_META)) { - vm_offset_t elem = (vm_offset_t)bp->b_data; - if (elem == 0) - panic("bcleanbuf: NULL bp->b_data B_META buffer"); - if (ISSET(bp->b_flags, B_ZALLOC)) { - if (bp->b_bufsize <= MAXMETA) { - zone_t z; +errno_t +buf_acquire(buf_t bp, int flags, int slpflag, int slptimeo) { + errno_t error; - z = getbufzone(bp->b_bufsize); - bp->b_data = (caddr_t)0xdeadbeef; - zfree(z, elem); - CLR(bp->b_flags, B_ZALLOC); - } else - panic("bcleanbuf: B_ZALLOC set incorrectly"); - } else { - bp->b_data = (caddr_t)0xdeadbeef; - kmem_free(kernel_map, elem, bp->b_bufsize); - } - } + lck_mtx_lock(buf_mtxp); - trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); + error = buf_acquire_locked(bp, flags, slpflag, slptimeo); - /* disassociate us from our vnode, if we had one... */ - s = splbio(); + lck_mtx_unlock(buf_mtxp); - /* clear out various other fields */ - bp->b_bufsize = 0; - bp->b_data = 0; - bp->b_flags = B_BUSY; - if (hdralloc) - SET(bp->b_flags, B_HDRALLOC); - bp->b_dev = NODEV; - bp->b_blkno = bp->b_lblkno = 0; - bp->b_iodone = 0; - bp->b_error = 0; - bp->b_resid = 0; - bp->b_bcount = 0; - bp->b_dirtyoff = bp->b_dirtyend = 0; - bp->b_validoff = bp->b_validend = 0; + return (error); +} - /* nuke any credentials we were holding */ - cred = bp->b_rcred; - if (cred != NOCRED) { - bp->b_rcred = NOCRED; - crfree(cred); + +static errno_t +buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo) +{ + errno_t error; + struct timespec ts; + + if (ISSET(bp->b_flags, B_LOCKED)) { + if ((flags & BAC_SKIP_LOCKED)) + return (EDEADLK); + } else { + if ((flags & BAC_SKIP_NONLOCKED)) + return (EDEADLK); } - cred = bp->b_wcred; - if (cred != NOCRED) { - bp->b_wcred = NOCRED; - crfree(cred); + if (ISSET(bp->b_lflags, BL_BUSY)) { + /* + * since the mutex_lock may block, the buffer + * may become BUSY, so we need to + * recheck for a NOWAIT request + */ + if (flags & BAC_NOWAIT) + return (EBUSY); + SET(bp->b_lflags, BL_WANTED); + + /* the hz value is 100; which leads to 10ms */ + ts.tv_sec = (slptimeo/100); + ts.tv_nsec = (slptimeo % 100) * 10 * NSEC_PER_USEC * 1000; + error = msleep((caddr_t)bp, buf_mtxp, slpflag | (PRIBIO + 1), (char *)"buf_acquire", &ts); + + if (error) + return (error); + return (EAGAIN); } - splx(s); + if (flags & BAC_REMOVE) + bremfree_locked(bp); + SET(bp->b_lflags, BL_BUSY); +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 5; +#endif return (0); } @@ -1818,16 +2937,15 @@ bcleanbuf(struct buf *bp) * Wait for operations on the buffer to complete. * When they do, extract and return the I/O's error value. */ -int -biowait(bp) - struct buf *bp; +errno_t +buf_biowait(buf_t bp) { - int s; + lck_mtx_lock(buf_mtxp); - s = splbio(); while (!ISSET(bp->b_flags, B_DONE)) - tsleep(bp, PRIBIO + 1, "biowait", 0); - splx(s); + (void) msleep(bp, buf_mtxp, (PRIBIO+1), (char *)"buf_biowait", 0); + + lck_mtx_unlock(buf_mtxp); /* check for interruption of I/O (e.g. via NFS), then errors. */ if (ISSET(bp->b_flags, B_EINTR)) { @@ -1851,102 +2969,138 @@ biowait(bp) * process, invokes a procedure specified in the buffer structure" ] * * In real life, the pagedaemon (or other system processes) wants - * to do async stuff to, and doesn't want the buffer brelse()'d. + * to do async stuff to, and doesn't want the buffer buf_brelse()'d. * (for swap pager, that puts swap buffers on the free lists (!!!), * for the vn device, that puts malloc'd buffers on the free lists!) */ +extern struct timeval priority_IO_timestamp_for_root; +extern int hard_throttle_on_root; + void -biodone(bp) - struct buf *bp; +buf_biodone(buf_t bp) { - boolean_t funnel_state; - struct vnode *vp; - extern struct timeval priority_IO_timestamp_for_root; - extern int hard_throttle_on_root; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START, - (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); + (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); if (ISSET(bp->b_flags, B_DONE)) panic("biodone already"); - SET(bp->b_flags, B_DONE); /* note that it's done */ - /* - * I/O was done, so don't believe - * the DIRTY state from VM anymore - */ - CLR(bp->b_flags, B_WASDIRTY); - if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW)) - vwakeup(bp); /* wake up reader */ - if (kdebug_enable) { - int code = DKIO_DONE; + int code = DKIO_DONE; - if (bp->b_flags & B_READ) - code |= DKIO_READ; - if (bp->b_flags & B_ASYNC) - code |= DKIO_ASYNC; + if (bp->b_flags & B_READ) + code |= DKIO_READ; + if (bp->b_flags & B_ASYNC) + code |= DKIO_ASYNC; - if (bp->b_flags & B_META) - code |= DKIO_META; - else if (bp->b_flags & (B_PGIN | B_PAGEOUT)) - code |= DKIO_PAGING; + if (bp->b_flags & B_META) + code |= DKIO_META; + else if (bp->b_flags & B_PAGEIO) + code |= DKIO_PAGING; - KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - (unsigned int)bp, (unsigned int)bp->b_vp, - bp->b_resid, bp->b_error, 0); + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, + (unsigned int)bp, (unsigned int)bp->b_vp, + bp->b_resid, bp->b_error, 0); } - - /* Wakeup the throttled write operations as needed */ - vp = bp->b_vp; - if (vp - && (vp->v_flag & VTHROTTLED) - && (vp->v_numoutput <= (BUFWRITE_THROTTLE / 3))) { - vp->v_flag &= ~VTHROTTLED; - wakeup((caddr_t)&vp->v_numoutput); - } - if ((bp->b_flags & B_PGIN) && (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) { - priority_IO_timestamp_for_root = time; + if ((bp->b_vp != NULLVP) && + ((bp->b_flags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && + (bp->b_vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) { + microuptime(&priority_IO_timestamp_for_root); hard_throttle_on_root = 0; } - if (ISSET(bp->b_flags, B_CALL)) { /* if necessary, call out */ - void (*iodone_func)(struct buf *) = bp->b_iodone; + /* + * I/O was done, so don't believe + * the DIRTY state from VM anymore + */ + CLR(bp->b_flags, B_WASDIRTY); - CLR(bp->b_flags, B_CALL); /* but note callout done */ + if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW)) + /* + * wake up any writer's blocked + * on throttle or waiting for I/O + * to drain + */ + vnode_writedone(bp->b_vp); + + if (ISSET(bp->b_flags, (B_CALL | B_FILTER))) { /* if necessary, call out */ + void (*iodone_func)(struct buf *, void *) = bp->b_iodone; + void *arg = (void *)bp->b_transaction; + int callout = ISSET(bp->b_flags, B_CALL); + + CLR(bp->b_flags, (B_CALL | B_FILTER)); /* filters and callouts are one-shot */ bp->b_iodone = NULL; + bp->b_transaction = NULL; if (iodone_func == NULL) { panic("biodone: bp @ 0x%x has NULL b_iodone!\n", bp); } else { - (*iodone_func)(bp); + if (callout) + SET(bp->b_flags, B_DONE); /* note that it's done */ + (*iodone_func)(bp, arg); } - } else if (ISSET(bp->b_flags, B_ASYNC)) /* if async, release it */ - brelse(bp); - else { /* or just wakeup the buffer */ - CLR(bp->b_flags, B_WANTED); - wakeup(bp); + if (callout) + /* + * assumes that the call back function takes + * ownership of the bp and deals with releasing it if necessary + */ + goto biodone_done; + /* + * in this case the call back function is acting + * strictly as a filter... it does not take + * ownership of the bp and is expecting us + * to finish cleaning up... this is currently used + * by the HFS journaling code + */ } + if (ISSET(bp->b_flags, B_ASYNC)) { /* if async, release it */ + SET(bp->b_flags, B_DONE); /* note that it's done */ - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END, - (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); + buf_brelse(bp); + } else { /* or just wakeup the buffer */ + /* + * by taking the mutex, we serialize + * the buf owner calling buf_biowait so that we'll + * only see him in one of 2 states... + * state 1: B_DONE wasn't set and he's + * blocked in msleep + * state 2: he's blocked trying to take the + * mutex before looking at B_DONE + * BL_WANTED is cleared in case anyone else + * is blocked waiting for the buffer... note + * that we haven't cleared B_BUSY yet, so if + * they do get to run, their going to re-set + * BL_WANTED and go back to sleep + */ + lck_mtx_lock(buf_mtxp); - thread_funnel_set(kernel_flock, funnel_state); + CLR(bp->b_lflags, BL_WANTED); + SET(bp->b_flags, B_DONE); /* note that it's done */ + + lck_mtx_unlock(buf_mtxp); + + wakeup(bp); + } +biodone_done: + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END, + (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); } /* * Return a count of buffers on the "locked" queue. */ int -count_lock_queue() +count_lock_queue(void) { - register struct buf *bp; - register int n = 0; + buf_t bp; + int n = 0; + + lck_mtx_lock(buf_mtxp); for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = bp->b_freelist.tqe_next) n++; + lck_mtx_unlock(buf_mtxp); + return (n); } @@ -1954,13 +3108,13 @@ count_lock_queue() * Return a count of 'busy' buffers. Used at the time of shutdown. */ int -count_busy_buffers() +count_busy_buffers(void) { - register struct buf *bp; - register int nbusy = 0; + buf_t bp; + int nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) - if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) + if (!ISSET(bp->b_flags, B_INVAL) && ISSET(bp->b_lflags, BL_BUSY)) nbusy++; return (nbusy); } @@ -1974,107 +3128,350 @@ count_busy_buffers() void vfs_bufstats() { - int s, i, j, count; - register struct buf *bp; - register struct bqueues *dp; - int counts[MAXBSIZE/CLBYTES+1]; - static char *bname[BQUEUES] = - { "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" }; + int i, j, count; + register struct buf *bp; + register struct bqueues *dp; + int counts[MAXBSIZE/CLBYTES+1]; + static char *bname[BQUEUES] = + { "LOCKED", "LRU", "AGE", "EMPTY", "META", "LAUNDRY" }; + + for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { + count = 0; + for (j = 0; j <= MAXBSIZE/CLBYTES; j++) + counts[j] = 0; + + lck_mtx_lock(buf_mtxp); + + for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { + counts[bp->b_bufsize/CLBYTES]++; + count++; + } + lck_mtx_unlock(buf_mtxp); + + printf("%s: total-%d", bname[i], count); + for (j = 0; j <= MAXBSIZE/CLBYTES; j++) + if (counts[j] != 0) + printf(", %d-%d", j * CLBYTES, counts[j]); + printf("\n"); + } +} +#endif /* DIAGNOSTIC */ + +#define NRESERVEDIOBUFS 64 + + +buf_t +alloc_io_buf(vnode_t vp, int priv) +{ + buf_t bp; + + lck_mtx_lock(iobuffer_mtxp); + + while (((niobuf - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse) && !priv) || + (bp = iobufqueue.tqh_first) == NULL) { + bufstats.bufs_iobufsleeps++; + + need_iobuffer = 1; + (void) msleep(&need_iobuffer, iobuffer_mtxp, (PRIBIO+1), (const char *)"alloc_io_buf", 0); + } + TAILQ_REMOVE(&iobufqueue, bp, b_freelist); + + bufstats.bufs_iobufinuse++; + if (bufstats.bufs_iobufinuse > bufstats.bufs_iobufmax) + bufstats.bufs_iobufmax = bufstats.bufs_iobufinuse; + + lck_mtx_unlock(iobuffer_mtxp); + + /* + * initialize various fields + * we don't need to hold the mutex since the buffer + * is now private... the vp should have a reference + * on it and is not protected by this mutex in any event + */ + bp->b_timestamp = 0; + bp->b_proc = NULL; + + bp->b_datap = 0; + bp->b_flags = 0; + bp->b_lflags = BL_BUSY | BL_IOBUF; + bp->b_blkno = bp->b_lblkno = 0; +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 6; +#endif + bp->b_iodone = NULL; + bp->b_error = 0; + bp->b_resid = 0; + bp->b_bcount = 0; + bp->b_bufsize = 0; + bp->b_upl = NULL; + bp->b_vp = vp; + + if (vp && (vp->v_type == VBLK || vp->v_type == VCHR)) + bp->b_dev = vp->v_rdev; + else + bp->b_dev = NODEV; + + return (bp); +} + + +void +free_io_buf(buf_t bp) +{ + int need_wakeup = 0; + + /* + * put buffer back on the head of the iobufqueue + */ + bp->b_vp = NULL; + bp->b_flags = B_INVAL; + + lck_mtx_lock(iobuffer_mtxp); + + binsheadfree(bp, &iobufqueue, -1); + + if (need_iobuffer) { + /* + * Wake up any processes waiting because they need an io buffer + * + * do the wakeup after we drop the mutex... it's possible that the + * wakeup will be superfluous if need_iobuffer gets set again and + * another thread runs this path, but it's highly unlikely, doesn't + * hurt, and it means we don't hold up I/O progress if the wakeup blocks + * trying to grab a task related lock... + */ + need_iobuffer = 0; + need_wakeup = 1; + } + bufstats.bufs_iobufinuse--; + + lck_mtx_unlock(iobuffer_mtxp); + + if (need_wakeup) + wakeup(&need_iobuffer); +} + + + +/* + * If getnewbuf() calls bcleanbuf() on the same thread + * there is a potential for stack overrun and deadlocks. + * So we always handoff the work to a worker thread for completion + */ +#include +#include +#include + + +static void +bcleanbuf_thread_init(void) +{ + /* create worker thread */ + kernel_thread(kernel_task, bcleanbuf_thread); +} + +static void +bcleanbuf_thread(void) +{ + struct buf *bp; + int error = 0; + int loopcnt = 0; + + for (;;) { + lck_mtx_lock(buf_mtxp); + + while (blaundrycnt == 0) + (void)msleep((void *)&blaundrycnt, buf_mtxp, PRIBIO, "blaundry", 0); + + bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY]); + /* + * Remove from the queue + */ + bremfree_locked(bp); + blaundrycnt--; + + lck_mtx_unlock(buf_mtxp); + /* + * do the IO + */ + error = bawrite_internal(bp, 0); + + if (error) { + lck_mtx_lock(buf_mtxp); + + binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY); + blaundrycnt++; + + lck_mtx_unlock(buf_mtxp); + + if (loopcnt > 10) { + (void)tsleep((void *)&blaundrycnt, PRIBIO, "blaundry", 1); + loopcnt = 0; + } else { + (void)thread_block(THREAD_CONTINUE_NULL); + loopcnt++; + } + } + } +} + + +static int +brecover_data(buf_t bp) +{ + int upl_offset; + upl_t upl; + upl_page_info_t *pl; + kern_return_t kret; + vnode_t vp = bp->b_vp; + int upl_flags; + + + if ( !UBCINFOEXISTS(vp) || bp->b_bufsize == 0) + goto dump_buffer; + + upl_flags = UPL_PRECIOUS; + if (! (buf_flags(bp) & B_READ)) { + /* + * "write" operation: let the UPL subsystem know + * that we intend to modify the buffer cache pages we're + * gathering. + */ + upl_flags |= UPL_WILL_MODIFY; + } + + kret = ubc_create_upl(vp, + ubc_blktooff(vp, bp->b_lblkno), + bp->b_bufsize, + &upl, + &pl, + upl_flags); + if (kret != KERN_SUCCESS) + panic("Failed to create UPL"); + + for (upl_offset = 0; upl_offset < bp->b_bufsize; upl_offset += PAGE_SIZE) { + + if (!upl_valid_page(pl, upl_offset / PAGE_SIZE) || !upl_dirty_page(pl, upl_offset / PAGE_SIZE)) { + ubc_upl_abort(upl, 0); + goto dump_buffer; + } + } + bp->b_upl = upl; + + kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); + + if (kret != KERN_SUCCESS) + panic("getblk: ubc_upl_map() failed with (%d)", kret); + return (1); + +dump_buffer: + bp->b_bufsize = 0; + SET(bp->b_flags, B_INVAL); + buf_brelse(bp); + + return(0); +} + + + +/* + * disabled for now + */ + +#if FLUSH_QUEUES + +#define NFLUSH 32 + +static int +bp_cmp(void *a, void *b) +{ + buf_t *bp_a = *(buf_t **)a, + *bp_b = *(buf_t **)b; + daddr64_t res; - for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { - count = 0; - for (j = 0; j <= MAXBSIZE/CLBYTES; j++) - counts[j] = 0; - s = splbio(); - for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { - counts[bp->b_bufsize/CLBYTES]++; - count++; - } - splx(s); - printf("%s: total-%d", bname[i], count); - for (j = 0; j <= MAXBSIZE/CLBYTES; j++) - if (counts[j] != 0) - printf(", %d-%d", j * CLBYTES, counts[j]); - printf("\n"); - } + // don't have to worry about negative block + // numbers so this is ok to do. + // + res = (bp_a->b_blkno - bp_b->b_blkno); + + return (int)res; } -#endif /* DIAGNOSTIC */ -#define NRESERVEDIOBUFS 64 -__private_extern__ struct buf * -alloc_io_buf(vp, priv) - struct vnode *vp; - int priv; +int +bflushq(int whichq, mount_t mp) { - register struct buf *bp; - int s; + buf_t bp, next; + int i, buf_count; + int total_writes = 0; + static buf_t flush_table[NFLUSH]; - s = splbio(); - - while (niobuf - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse && !priv) { - need_iobuffer = 1; - bufstats.bufs_iobufsleeps++; - (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf", 0); + if (whichq < 0 || whichq >= BQUEUES) { + return (0); } - while ((bp = iobufqueue.tqh_first) == NULL) { - need_iobuffer = 1; - bufstats.bufs_iobufsleeps++; - (void) tsleep(&need_iobuffer, (PRIBIO+1), "alloc_io_buf1", 0); - } + restart: + lck_mtx_lock(buf_mtxp); - TAILQ_REMOVE(&iobufqueue, bp, b_freelist); - bp->b_timestamp = 0; + bp = TAILQ_FIRST(&bufqueues[whichq]); - /* clear out various fields */ - bp->b_flags = B_BUSY; - bp->b_blkno = bp->b_lblkno = 0; + for (buf_count = 0; bp; bp = next) { + next = bp->b_freelist.tqe_next; + + if (bp->b_vp == NULL || bp->b_vp->v_mount != mp) { + continue; + } - bp->b_iodone = 0; - bp->b_error = 0; - bp->b_resid = 0; - bp->b_bcount = 0; - bp->b_bufsize = 0; - bp->b_vp = vp; + if (ISSET(bp->b_flags, B_DELWRI) && !ISSET(bp->b_lflags, BL_BUSY)) { - if (vp->v_type == VBLK || vp->v_type == VCHR) - bp->b_dev = vp->v_rdev; - else - bp->b_dev = NODEV; - bufstats.bufs_iobufinuse++; - if (bufstats.bufs_iobufinuse > bufstats.bufs_iobufmax) - bufstats.bufs_iobufmax = bufstats.bufs_iobufinuse; - splx(s); + bremfree_locked(bp); +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 7; +#endif + SET(bp->b_lflags, BL_BUSY); + flush_table[buf_count] = bp; + buf_count++; + total_writes++; - return (bp); -} + if (buf_count >= NFLUSH) { + lck_mtx_unlock(buf_mtxp); -__private_extern__ void -free_io_buf(bp) - struct buf *bp; -{ - int s; + qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp); - s = splbio(); - /* put buffer back on the head of the iobufqueue */ - bp->b_vp = NULL; - bp->b_flags = B_INVAL; + for (i = 0; i < buf_count; i++) { + buf_bawrite(flush_table[i]); + } + goto restart; + } + } + } + lck_mtx_unlock(buf_mtxp); - binsheadfree(bp, &iobufqueue, -1); + if (buf_count > 0) { + qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp); - /* Wake up any processes waiting for any buffer to become free. */ - if (need_iobuffer) { - need_iobuffer = 0; - wakeup(&need_iobuffer); + for (i = 0; i < buf_count; i++) { + buf_bawrite(flush_table[i]); + } } - bufstats.bufs_iobufinuse--; - splx(s); + + return (total_writes); } +#endif -/* disabled for now */ + +#if BALANCE_QUEUES /* XXX move this to a separate file */ + +/* + * NOTE: THIS CODE HAS NOT BEEN UPDATED + * WITH RESPECT TO THE NEW LOCKING MODEL + */ + + /* * Dynamic Scaling of the Buffer Queues */ @@ -2170,6 +3567,27 @@ static __inline__ int initbufqscan(void); static __inline__ int nextbufq(int q); static void buqlimprt(int all); + +static __inline__ void +bufqinc(int q) +{ + if ((q < 0) || (q >= BQUEUES)) + return; + + bufqlim[q].bl_num++; + return; +} + +static __inline__ void +bufqdec(int q) +{ + if ((q < 0) || (q >= BQUEUES)) + return; + + bufqlim[q].bl_num--; + return; +} + static void bufq_balance_thread_init() { @@ -2236,11 +3654,8 @@ bufq_balance_thread_init() static void bufqscan_thread() { - boolean_t funnel_state; int moretodo = 0; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - for(;;) { do { int q; /* buffer queue to process */ @@ -2259,8 +3674,6 @@ bufqscan_thread() (void)tsleep((void *)&bufqscanwait, PRIBIO, "bufqscanwait", 60 * hz); moretodo = 0; } - - (void) thread_funnel_set(kernel_flock, FALSE); } /* Seed for the buffer queue balancing */ @@ -2288,7 +3701,7 @@ balancebufq(int q) { int moretodo = 0; int s = splbio(); - int n; + int n, t; /* reject invalid q */ if ((q < 0) || (q >= BQUEUES)) @@ -2314,6 +3727,8 @@ balancebufq(int q) moretodo |= btrimempty(n); goto out; } + + t = buf_timestamp(): for (; n > 0; n--) { struct buf *bp = bufqueues[q].tqh_first; @@ -2321,14 +3736,14 @@ balancebufq(int q) break; /* check if it's stale */ - if ((time.tv_sec - bp->b_timestamp) > bufqlim[q].bl_stale) { + if ((t - bp->b_timestamp) > bufqlim[q].bl_stale) { if (bcleanbuf(bp)) { - /* bawrite() issued, bp not ready */ + /* buf_bawrite() issued, bp not ready */ moretodo = 1; } else { /* release the cleaned buffer to BQ_EMPTY */ SET(bp->b_flags, B_INVAL); - brelse(bp); + buf_brelse(bp); } } else break; @@ -2350,26 +3765,6 @@ btrimempty(int n) return (0); } -static __inline__ void -bufqinc(int q) -{ - if ((q < 0) || (q >= BQUEUES)) - return; - - bufqlim[q].bl_num++; - return; -} - -static __inline__ void -bufqdec(int q) -{ - if ((q < 0) || (q >= BQUEUES)) - return; - - bufqlim[q].bl_num--; - return; -} - static void buqlimprt(int all) { @@ -2393,183 +3788,6 @@ buqlimprt(int all) } } -/* - * If the getnewbuf() calls bcleanbuf() on the same thread - * there is a potential for stack overrun and deadlocks. - * So we always handoff the work to worker thread for completion - */ - -static void -bcleanbuf_thread_init() -{ - static void bcleanbuf_thread(); - - /* create worker thread */ - kernel_thread(kernel_task, bcleanbuf_thread); -} - -static void -bcleanbuf_thread() -{ - boolean_t funnel_state; - struct buf *bp; - int error = 0; - int loopcnt = 0; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - -doit: - while (blaundrycnt == 0) - (void)tsleep((void *)&blaundrycnt, PRIBIO, "blaundry", 60 * hz); - bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY]); - /* Remove from the queue */ - bremfree(bp); - blaundrycnt--; - - /* do the IO */ - error = bawrite_internal(bp, 0); - if (error) { - binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY); - blaundrycnt++; - if (loopcnt > 10) { - (void)tsleep((void *)&blaundrycnt, PRIBIO, "blaundry", 1); - loopcnt = 0; - } else { - (void)thread_block(THREAD_CONTINUE_NULL); - loopcnt++; - } - } - /* start again */ - goto doit; - - (void) thread_funnel_set(kernel_flock, funnel_state); -} - - -static int -brecover_data(struct buf *bp) -{ - upl_t upl; - upl_page_info_t *pl; - int upl_offset; - kern_return_t kret; - struct vnode *vp = bp->b_vp; - - if (vp->v_tag == VT_NFS) - /* - * NFS currently deals with this case - * in a slightly different manner... - * continue to let it do so - */ - return(1); - - if (!UBCISVALID(vp) || bp->b_bufsize == 0) - goto dump_buffer; - - kret = ubc_create_upl(vp, - ubc_blktooff(vp, bp->b_lblkno), - bp->b_bufsize, - &upl, - &pl, - UPL_PRECIOUS); - if (kret != KERN_SUCCESS) - panic("Failed to get pagelists"); - - for (upl_offset = 0; upl_offset < bp->b_bufsize; upl_offset += PAGE_SIZE) { - - if (!upl_valid_page(pl, upl_offset / PAGE_SIZE) || !upl_dirty_page(pl, upl_offset / PAGE_SIZE)) { - ubc_upl_abort(upl, 0); - goto dump_buffer; - } - } - SET(bp->b_flags, B_PAGELIST); - bp->b_pagelist = upl; - - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data)); - if (kret != KERN_SUCCESS) - panic("getblk: ubc_upl_map() failed with (%d)", kret); - if (bp->b_data == 0) - panic("ubc_upl_map mapped 0"); - - return (1); - -dump_buffer: - bp->b_bufsize = 0; - SET(bp->b_flags, B_INVAL); - brelse(bp); - - return(0); -} - - -static int -bp_cmp(void *a, void *b) -{ - struct buf *bp_a = *(struct buf **)a, - *bp_b = *(struct buf **)b; - daddr_t res; - - // don't have to worry about negative block - // numbers so this is ok to do. - // - res = (bp_a->b_blkno - bp_b->b_blkno); - - return (int)res; -} - -#define NFLUSH 32 - -int -bflushq(int whichq, struct mount *mp) -{ - struct buf *bp, *next; - int i, buf_count, s; - int counter=0, total_writes=0; - static struct buf *flush_table[NFLUSH]; - - if (whichq < 0 || whichq >= BQUEUES) { - return; - } - - - restart: - bp = TAILQ_FIRST(&bufqueues[whichq]); - for(buf_count=0; bp; bp=next) { - next = bp->b_freelist.tqe_next; - - if (bp->b_vp == NULL || bp->b_vp->v_mount != mp) { - continue; - } - - if ((bp->b_flags & B_DELWRI) && (bp->b_flags & B_BUSY) == 0) { - if (whichq != BQ_LOCKED && (bp->b_flags & B_LOCKED)) { - panic("bflushq: bp @ 0x%x is locked!\n", bp); - } - - bremfree(bp); - bp->b_flags |= B_BUSY; - flush_table[buf_count] = bp; - buf_count++; - total_writes++; - - if (buf_count >= NFLUSH) { - qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp); - - for(i=0; i < buf_count; i++) { - bawrite(flush_table[i]); - } - - goto restart; - } - } - } +#endif - if (buf_count > 0) { - qsort(flush_table, buf_count, sizeof(struct buf *), bp_cmp); - for(i=0; i < buf_count; i++) { - bawrite(flush_table[i]); - } - } - return total_writes; -} diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index 85a9f2c7c..8cb282de6 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -61,11 +61,13 @@ #include #include #include -#include -#include +#include +#include #include #include #include +#include +#include /* * Name caching works as follows: @@ -79,10 +81,6 @@ * If it is a "negative" entry, (i.e. for a name that is known NOT to * exist) the vnode pointer will be NULL. * - * For simplicity (and economy of storage), names longer than - * a maximum length of NCHNAMLEN are not cached; they occur - * infrequently in any case, and are almost never of interest. - * * Upon reaching the last segment of a path, if the reference * is for DELETE, or NOCACHE is set (rewrite), and the * name is located in the cache, it will be dropped. @@ -91,59 +89,704 @@ /* * Structures associated with name cacheing. */ -#define NCHHASH(dvp, hash_val) \ - (&nchashtbl[((u_long)(dvp) ^ ((dvp)->v_id ^ (hash_val))) & nchash]) + LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ +u_long nchashmask; u_long nchash; /* size of hash table - 1 */ long numcache; /* number of cache entries allocated */ -TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */ +int desiredNodes; +int desiredNegNodes; +TAILQ_HEAD(, namecache) nchead; /* chain of all name cache entries */ +TAILQ_HEAD(, namecache) neghead; /* chain of only negative cache entries */ struct nchstats nchstats; /* cache effectiveness statistics */ -u_long nextvnodeid = 0; -int doingcache = 1; /* 1 => enable the cache */ + +/* vars for name cache list lock */ +lck_grp_t * namecache_lck_grp; +lck_grp_attr_t * namecache_lck_grp_attr; +lck_attr_t * namecache_lck_attr; +lck_mtx_t * namecache_mtx_lock; + +static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); +static int remove_name_locked(const char *); +static char *add_name_locked(const char *, size_t, u_int, u_int); +static void init_string_table(void); +static void cache_delete(struct namecache *, int); +static void dump_string_table(void); + +static void init_crc32(void); +static unsigned int crc32tab[256]; + + +#define NCHHASH(dvp, hash_val) \ + (&nchashtbl[(dvp->v_id ^ (hash_val)) & nchashmask]) + + + +// +// This function builds the path to a filename in "buff". The +// length of the buffer *INCLUDING* the trailing zero byte is +// returned in outlen. NOTE: the length includes the trailing +// zero byte and thus the length is one greater than what strlen +// would return. This is important and lots of code elsewhere +// in the kernel assumes this behavior. +// +int +build_path(vnode_t first_vp, char *buff, int buflen, int *outlen) +{ + vnode_t vp = first_vp; + char *end, *str; + int len, ret=0, counter=0; + + end = &buff[buflen-1]; + *end = '\0'; + + /* + * if this is the root dir of a file system... + */ + if (vp && (vp->v_flag & VROOT) && vp->v_mount) { + /* + * then if it's the root fs, just put in a '/' and get out of here + */ + if (vp->v_mount->mnt_flag & MNT_ROOTFS) { + *--end = '/'; + goto out; + } else { + /* + * else just use the covered vnode to get the mount path + */ + vp = vp->v_mount->mnt_vnodecovered; + } + } + name_cache_lock(); + + while (vp && vp->v_parent != vp) { + /* + * the maximum depth of a file system hierarchy is MAXPATHLEN/2 + * (with single-char names separated by slashes). we panic if + * we've ever looped more than that. + */ + if (counter++ > MAXPATHLEN/2) { + panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp); + } + str = vp->v_name; + + if (str == NULL) { + if (vp->v_parent != NULL) { + ret = EINVAL; + } + break; + } + len = strlen(str); + + /* + * check that there's enough space (make sure to include space for the '/') + */ + if ((end - buff) < (len + 1)) { + ret = ENOSPC; + break; + } + /* + * copy it backwards + */ + str += len; + + for (; len > 0; len--) { + *--end = *--str; + } + /* + * put in the path separator + */ + *--end = '/'; + + /* + * walk up the chain (as long as we're not the root) + */ + if (vp == first_vp && (vp->v_flag & VROOT)) { + if (vp->v_mount && vp->v_mount->mnt_vnodecovered) { + vp = vp->v_mount->mnt_vnodecovered->v_parent; + } else { + vp = NULLVP; + } + } else { + vp = vp->v_parent; + } + /* + * check if we're crossing a mount point and + * switch the vp if we are. + */ + if (vp && (vp->v_flag & VROOT) && vp->v_mount) { + vp = vp->v_mount->mnt_vnodecovered; + } + } + name_cache_unlock(); +out: + /* + * slide it down to the beginning of the buffer + */ + memmove(buff, end, &buff[buflen] - end); + + *outlen = &buff[buflen] - end; // length includes the trailing zero byte + + return ret; +} + /* - * Delete an entry from its hash list and move it to the front - * of the LRU list for immediate reuse. - * - * NOTE: THESE MACROS CAN BLOCK (in the call to remove_name()) - * SO BE CAREFUL IF YOU HOLD POINTERS TO nclruhead OR - * nchashtbl. + * return NULLVP if vp's parent doesn't + * exist, or we can't get a valid iocount + * else return the parent of vp */ -#if DIAGNOSTIC -#define PURGE(ncp) { \ - if (ncp->nc_hash.le_prev == 0) \ - panic("namecache purge le_prev"); \ - if (ncp->nc_hash.le_next == ncp) \ - panic("namecache purge le_next"); \ - LIST_REMOVE(ncp, nc_hash); \ - ncp->nc_hash.le_prev = 0; \ - TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ - TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \ - /* this has to come last because it could block */ \ - remove_name(ncp->nc_name); \ - ncp->nc_name = NULL; \ +vnode_t +vnode_getparent(vnode_t vp) +{ + vnode_t pvp = NULLVP; + int pvid; + + name_cache_lock(); + /* + * v_parent is stable behind the name_cache lock + * however, the only thing we can really guarantee + * is that we've grabbed a valid iocount on the + * parent of 'vp' at the time we took the name_cache lock... + * once we drop the lock, vp could get re-parented + */ + if ( (pvp = vp->v_parent) != NULLVP ) { + pvid = pvp->v_id; + + name_cache_unlock(); + + if (vnode_getwithvid(pvp, pvid) != 0) + pvp = NULL; + } else + name_cache_unlock(); + + return (pvp); +} + +char * +vnode_getname(vnode_t vp) +{ + char *name = NULL; + + name_cache_lock(); + + if (vp->v_name) + name = add_name_locked(vp->v_name, strlen(vp->v_name), 0, 0); + name_cache_unlock(); + + return (name); } -#else -#define PURGE(ncp) { \ - LIST_REMOVE(ncp, nc_hash); \ - ncp->nc_hash.le_prev = 0; \ - TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ - TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \ - /* this has to come last because it could block */ \ - remove_name(ncp->nc_name); \ - ncp->nc_name = NULL; \ + +void +vnode_putname(char *name) +{ + name_cache_lock(); + + remove_name_locked(name); + + name_cache_unlock(); +} + + +/* + * if VNODE_UPDATE_PARENT, and we can take + * a reference on dvp, then update vp with + * it's new parent... if vp already has a parent, + * then drop the reference vp held on it + * + * if VNODE_UPDATE_NAME, + * then drop string ref on v_name if it exists, and if name is non-NULL + * then pick up a string reference on name and record it in v_name... + * optionally pass in the length and hashval of name if known + * + * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp + */ +void +vnode_update_identity(vnode_t vp, vnode_t dvp, char *name, int name_len, int name_hashval, int flags) +{ + struct namecache *ncp; + vnode_t old_parentvp = NULLVP; + + + if (flags & VNODE_UPDATE_PARENT) { + if (dvp && vnode_ref(dvp) != 0) + dvp = NULLVP; + } else + dvp = NULLVP; + name_cache_lock(); + + if ( (flags & VNODE_UPDATE_NAME) && (name != vp->v_name) ) { + if (vp->v_name != NULL) { + remove_name_locked(vp->v_name); + vp->v_name = NULL; + } + if (name && *name) { + if (name_len == 0) + name_len = strlen(name); + vp->v_name = add_name_locked(name, name_len, name_hashval, 0); + } + } + if (flags & VNODE_UPDATE_PARENT) { + if (dvp != vp && dvp != vp->v_parent) { + old_parentvp = vp->v_parent; + vp->v_parent = dvp; + dvp = NULLVP; + + if (old_parentvp) + flags |= VNODE_UPDATE_CACHE; + } + } + if (flags & VNODE_UPDATE_CACHE) { + while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) + cache_delete(ncp, 1); + } + name_cache_unlock(); + + if (dvp != NULLVP) + vnode_rele(dvp); + + if (old_parentvp) { + struct uthread *ut; + + ut = get_bsdthread_info(current_thread()); + + /* + * indicated to vnode_rele that it shouldn't do a + * vnode_reclaim at this time... instead it will + * chain the vnode to the uu_vreclaims list... + * we'll be responsible for calling vnode_reclaim + * on each of the vnodes in this list... + */ + ut->uu_defer_reclaims = 1; + ut->uu_vreclaims = NULLVP; + + while ( (vp = old_parentvp) != NULLVP ) { + + vnode_lock(vp); + + vnode_rele_internal(vp, 0, 0, 1); + + /* + * check to see if the vnode is now in the state + * that would have triggered a vnode_reclaim in vnode_rele + * if it is, we save it's parent pointer and then NULL + * out the v_parent field... we'll drop the reference + * that was held on the next iteration of this loop... + * this short circuits a potential deep recursion if we + * have a long chain of parents in this state... + * we'll sit in this loop until we run into + * a parent in this chain that is not in this state + * + * make our check and the node_rele atomic + * with respect to the current vnode we're working on + * by holding the vnode lock + * if vnode_rele deferred the vnode_reclaim and has put + * this vnode on the list to be reaped by us, than + * it has left this vnode with an iocount == 1 + */ + if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && + ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { + /* + * vnode_rele wanted to do a vnode_reclaim on this vnode + * it should be sitting on the head of the uu_vreclaims chain + * pull the parent pointer now so that when we do the + * vnode_reclaim for each of the vnodes in the uu_vreclaims + * list, we won't recurse back through here + */ + name_cache_lock(); + old_parentvp = vp->v_parent; + vp->v_parent = NULLVP; + name_cache_unlock(); + } else { + /* + * we're done... we ran into a vnode that isn't + * being terminated + */ + old_parentvp = NULLVP; + } + vnode_unlock(vp); + } + ut->uu_defer_reclaims = 0; + + while ( (vp = ut->uu_vreclaims) != NULLVP) { + ut->uu_vreclaims = vp->v_defer_reclaimlist; + + /* + * vnode_put will drive the vnode_reclaim if + * we are still the only reference on this vnode + */ + vnode_put(vp); + } + } } -#endif /* DIAGNOSTIC */ + /* - * Move an entry that has been used to the tail of the LRU list - * so that it will be preserved for future use. + * Mark a vnode as having multiple hard links. HFS makes use of this + * because it keeps track of each link separately, and wants to know + * which link was actually used. + * + * This will cause the name cache to force a VNOP_LOOKUP on the vnode + * so that HFS can post-process the lookup. Also, volfs will call + * VNOP_GETATTR2 to determine the parent, instead of using v_parent. */ -#define TOUCH(ncp) { \ - if (ncp->nc_lru.tqe_next != 0) { \ - TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ - TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \ - } \ +void vnode_set_hard_link(vnode_t vp) +{ + vnode_lock(vp); + + /* + * In theory, we're changing the vnode's identity as far as the + * name cache is concerned, so we ought to grab the name cache lock + * here. However, there is already a race, and grabbing the name + * cache lock only makes the race window slightly smaller. + * + * The race happens because the vnode already exists in the name + * cache, and could be found by one thread before another thread + * can set the hard link flag. + */ + + vp->v_flag |= VISHARDLINK; + + vnode_unlock(vp); +} + + +void vnode_uncache_credentials(vnode_t vp) +{ + kauth_cred_t ucred = NULL; + + if (vp->v_cred) { + vnode_lock(vp); + + ucred = vp->v_cred; + vp->v_cred = NULL; + + vnode_unlock(vp); + + if (ucred) + kauth_cred_rele(ucred); + } +} + + +void vnode_cache_credentials(vnode_t vp, vfs_context_t context) +{ + kauth_cred_t ucred; + kauth_cred_t tcred = NOCRED; + struct timeval tv; + + ucred = vfs_context_ucred(context); + + if (vp->v_cred != ucred || (vp->v_mount->mnt_kern_flag & MNTK_AUTH_OPAQUE)) { + vnode_lock(vp); + + microuptime(&tv); + vp->v_cred_timestamp = tv.tv_sec; + + if (vp->v_cred != ucred) { + kauth_cred_ref(ucred); + + tcred = vp->v_cred; + vp->v_cred = ucred; + } + vnode_unlock(vp); + + if (tcred) + kauth_cred_rele(tcred); + } +} + +/* reverse_lookup - lookup by walking back up the parent chain while leveraging + * use of the name cache lock in order to protect our starting vnode. + * NOTE - assumes you already have search access to starting point. + * returns 0 when we have reached the root, current working dir, or chroot root + * + */ +int +reverse_lookup(vnode_t start_vp, vnode_t *lookup_vpp, struct filedesc *fdp, vfs_context_t context, int *dp_authorized) +{ + int vid, done = 0; + int auth_opaque = 0; + vnode_t dp = start_vp; + vnode_t vp = NULLVP; + kauth_cred_t ucred; + struct timeval tv; + + ucred = vfs_context_ucred(context); + *lookup_vpp = start_vp; + + name_cache_lock(); + + if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & MNTK_AUTH_OPAQUE) ) { + auth_opaque = 1; + microuptime(&tv); + } + for (;;) { + *dp_authorized = 0; + + if (auth_opaque && ((tv.tv_sec - dp->v_cred_timestamp) > VCRED_EXPIRED)) + break; + if (dp->v_cred != ucred) + break; + /* + * indicate that we're allowed to traverse this directory... + * even if we bail for some reason, this information is valid and is used + * to avoid doing a vnode_authorize + */ + *dp_authorized = 1; + + if ((dp->v_flag & VROOT) != 0 || /* Hit "/" */ + (dp == fdp->fd_cdir) || /* Hit process's working directory */ + (dp == fdp->fd_rdir)) { /* Hit process chroot()-ed root */ + done = 1; + break; + } + + if ( (vp = dp->v_parent) == NULLVP) + break; + + dp = vp; + *lookup_vpp = dp; + } /* for (;;) */ + + vid = dp->v_id; + + name_cache_unlock(); + + if (done == 0 && dp != start_vp) { + if (vnode_getwithvid(dp, vid) != 0) { + *lookup_vpp = start_vp; + } + } + + return((done == 1) ? 0 : -1); +} + +int +cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, vfs_context_t context, int *trailing_slash, int *dp_authorized) +{ + char *cp; /* pointer into pathname argument */ + int vid, vvid; + int auth_opaque = 0; + vnode_t vp = NULLVP; + vnode_t tdp = NULLVP; + kauth_cred_t ucred; + struct timeval tv; + unsigned int hash; + + ucred = vfs_context_ucred(context); + *trailing_slash = 0; + + name_cache_lock(); + + + if ( dp->v_mount && (dp->v_mount->mnt_kern_flag & MNTK_AUTH_OPAQUE) ) { + auth_opaque = 1; + microuptime(&tv); + } + for (;;) { + /* + * Search a directory. + * + * The cn_hash value is for use by cache_lookup + * The last component of the filename is left accessible via + * cnp->cn_nameptr for callers that need the name. + */ + hash = 0; + cp = cnp->cn_nameptr; + + while (*cp && (*cp != '/')) { + hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + } + /* + * the crc generator can legitimately generate + * a 0... however, 0 for us means that we + * haven't computed a hash, so use 1 instead + */ + if (hash == 0) + hash = 1; + cnp->cn_hash = hash; + cnp->cn_namelen = cp - cnp->cn_nameptr; + + ndp->ni_pathlen -= cnp->cn_namelen; + ndp->ni_next = cp; + + /* + * Replace multiple slashes by a single slash and trailing slashes + * by a null. This must be done before VNOP_LOOKUP() because some + * fs's don't know about trailing slashes. Remember if there were + * trailing slashes to handle symlinks, existing non-directories + * and non-existing files that won't be directories specially later. + */ + while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { + cp++; + ndp->ni_pathlen--; + + if (*cp == '\0') { + *trailing_slash = 1; + *ndp->ni_next = '\0'; + } + } + ndp->ni_next = cp; + + cnp->cn_flags &= ~(MAKEENTRY | ISLASTCN | ISDOTDOT); + + if (*cp == '\0') + cnp->cn_flags |= ISLASTCN; + + if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') + cnp->cn_flags |= ISDOTDOT; + + *dp_authorized = 0; + + if (auth_opaque && ((tv.tv_sec - dp->v_cred_timestamp) > VCRED_EXPIRED)) + break; + + if (dp->v_cred != ucred) + break; + /* + * indicate that we're allowed to traverse this directory... + * even if we fail the cache lookup or decide to bail for + * some other reason, this information is valid and is used + * to avoid doing a vnode_authorize before the call to VNOP_LOOKUP + */ + *dp_authorized = 1; + + if ( (cnp->cn_flags & (ISLASTCN | ISDOTDOT)) ) { + if (cnp->cn_nameiop != LOOKUP) + break; + if (cnp->cn_flags & (LOCKPARENT | NOCACHE | ISDOTDOT)) + break; + } + if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP) + break; + + if ( (cnp->cn_flags & ISLASTCN) ) + break; + + if (vp->v_type != VDIR) { + if (vp->v_type != VLNK) + vp = NULL; + break; + } + if (vp->v_mountedhere && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) + break; + + dp = vp; + vp = NULLVP; + + cnp->cn_nameptr = ndp->ni_next + 1; + ndp->ni_pathlen--; + while (*cnp->cn_nameptr == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } + } + if (vp != NULLVP) + vvid = vp->v_id; + vid = dp->v_id; + + name_cache_unlock(); + + + if ((vp != NULLVP) && (vp->v_type != VLNK) && + ((cnp->cn_flags & (ISLASTCN | LOCKPARENT | WANTPARENT | SAVESTART)) == ISLASTCN)) { + /* + * if we've got a child and it's the last component, and + * the lookup doesn't need to return the parent then we + * can skip grabbing an iocount on the parent, since all + * we're going to do with it is a vnode_put just before + * we return from 'lookup'. If it's a symbolic link, + * we need the parent in case the link happens to be + * a relative pathname. + */ + tdp = dp; + dp = NULLVP; + } else { +need_dp: + /* + * return the last directory we looked at + * with an io reference held + */ + if (dp == ndp->ni_usedvp) { + /* + * if this vnode matches the one passed in via USEDVP + * than this context already holds an io_count... just + * use vnode_get to get an extra ref for lookup to play + * with... can't use the getwithvid variant here because + * it will block behind a vnode_drain which would result + * in a deadlock (since we already own an io_count that the + * vnode_drain is waiting on)... vnode_get grabs the io_count + * immediately w/o waiting... it always succeeds + */ + vnode_get(dp); + } else if ( (vnode_getwithvid(dp, vid)) ) { + /* + * failure indicates the vnode + * changed identity or is being + * TERMINATED... in either case + * punt this lookup + */ + return (ENOENT); + } + } + if (vp != NULLVP) { + if ( (vnode_getwithvid(vp, vvid)) ) { + vp = NULLVP; + + /* + * can't get reference on the vp we'd like + * to return... if we didn't grab a reference + * on the directory (due to fast path bypass), + * then we need to do it now... we can't return + * with both ni_dvp and ni_vp NULL, and no + * error condition + */ + if (dp == NULLVP) { + dp = tdp; + goto need_dp; + } + } + } + ndp->ni_dvp = dp; + ndp->ni_vp = vp; + + return (0); +} + + +static vnode_t +cache_lookup_locked(vnode_t dvp, struct componentname *cnp) +{ + register struct namecache *ncp; + register struct nchashhead *ncpp; + register long namelen = cnp->cn_namelen; + char *nameptr = cnp->cn_nameptr; + unsigned int hashval = (cnp->cn_hash & NCHASHMASK); + vnode_t vp; + + ncpp = NCHHASH(dvp, cnp->cn_hash); + LIST_FOREACH(ncp, ncpp, nc_hash) { + if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { + if (memcmp(ncp->nc_name, nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) + break; + } + } + if (ncp == 0) + /* + * We failed to find an entry + */ + return (NULL); + + vp = ncp->nc_vp; + if (vp && (vp->v_flag & VISHARDLINK)) { + /* + * The file system wants a VNOP_LOOKUP on this vnode + */ + vp = NULL; + } + + return (vp); } @@ -152,26 +795,30 @@ int doingcache = 1; /* 1 => enable the cache */ // hash part of a componentname. // static unsigned int -hash_string(const char *str, int len) +hash_string(const char *cp, int len) { - unsigned int i, hashval = 0; + unsigned hash = 0; - if (len == 0) { - for(i=1; *str != 0; i++, str++) { - hashval += (unsigned char)*str * i; - } + if (len) { + while (len--) { + hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + } } else { - for(i=len; i > 0; i--, str++) { - hashval += (unsigned char)*str * (len - i + 1); - } + while (*cp != '\0') { + hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + } } - - return hashval; + /* + * the crc generator can legitimately generate + * a 0... however, 0 for us means that we + * haven't computed a hash, so use 1 instead + */ + if (hash == 0) + hash = 1; + return hash; } - - /* * Lookup an entry in the cache * @@ -193,74 +840,73 @@ cache_lookup(dvp, vpp, cnp) struct vnode **vpp; struct componentname *cnp; { - register struct namecache *ncp, *nnp; + register struct namecache *ncp; register struct nchashhead *ncpp; register long namelen = cnp->cn_namelen; char *nameptr = cnp->cn_nameptr; + unsigned int hashval = (cnp->cn_hash & NCHASHMASK); + uint32_t vid; + vnode_t vp; - if (!doingcache) { - cnp->cn_flags &= ~MAKEENTRY; - return (0); - } + name_cache_lock(); ncpp = NCHHASH(dvp, cnp->cn_hash); - for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { - nnp = ncp->nc_hash.le_next; - - if (ncp->nc_dvp == dvp && - strncmp(ncp->nc_name, nameptr, namelen) == 0 && - ncp->nc_name[namelen] == 0) { - /* Make sure the vp isn't stale. */ - if ((ncp->nc_dvpid != dvp->v_id) || - (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) { - nchstats.ncs_falsehits++; - PURGE(ncp); - continue; - } - break; + LIST_FOREACH(ncp, ncpp, nc_hash) { + if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { + if (memcmp(ncp->nc_name, nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) + break; } } - /* We failed to find an entry */ if (ncp == 0) { nchstats.ncs_miss++; + name_cache_unlock(); return (0); } /* We don't want to have an entry, so dump it */ if ((cnp->cn_flags & MAKEENTRY) == 0) { nchstats.ncs_badhits++; - PURGE(ncp); + cache_delete(ncp, 1); + name_cache_unlock(); return (0); } + vp = ncp->nc_vp; /* We found a "positive" match, return the vnode */ - if (ncp->nc_vp) { - if (ncp->nc_vp->v_flag & (VUINIT|VXLOCK|VTERMINATE|VORECLAIM)) { - PURGE(ncp); - return (0); - } - + if (vp) { nchstats.ncs_goodhits++; - TOUCH(ncp); - *vpp = ncp->nc_vp; + + vid = vp->v_id; + name_cache_unlock(); + + if (vnode_getwithvid(vp, vid)) { + name_cache_lock(); + nchstats.ncs_badvid++; + name_cache_unlock(); + return (0); + } + *vpp = vp; return (-1); } /* We found a negative match, and want to create it, so purge */ - if (cnp->cn_nameiop == CREATE) { + if (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) { nchstats.ncs_badhits++; - PURGE(ncp); + cache_delete(ncp, 1); + name_cache_unlock(); return (0); } /* * We found a "negative" match, ENOENT notifies client of this match. - * The nc_vpid field records whether this is a whiteout. + * The nc_whiteout field records whether this is a whiteout. */ nchstats.ncs_neghits++; - TOUCH(ncp); - cnp->cn_flags |= ncp->nc_vpid; + + if (ncp->nc_whiteout) + cnp->cn_flags |= ISWHITEOUT; + name_cache_unlock(); return (ENOENT); } @@ -273,57 +919,69 @@ cache_enter(dvp, vp, cnp) struct vnode *vp; struct componentname *cnp; { - register struct namecache *ncp; + register struct namecache *ncp, *negp; register struct nchashhead *ncpp; - if (!doingcache) - return; + if (cnp->cn_hash == 0) + cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); + + name_cache_lock(); + /* if the entry is for -ve caching vp is null */ + if ((vp != NULLVP) && (LIST_FIRST(&vp->v_nclinks))) { + /* + * someone beat us to the punch.. + * this vnode is already in the cache + */ + name_cache_unlock(); + return; + } /* * We allocate a new entry if we are less than the maximum - * allowed and the one at the front of the LRU list is in use. - * Otherwise we use the one at the front of the LRU list. + * allowed and the one at the front of the list is in use. + * Otherwise we use the one at the front of the list. */ - if (numcache < desiredvnodes && - ((ncp = nclruhead.tqh_first) == NULL || - ncp->nc_hash.le_prev != 0)) { - /* Add one more entry */ - ncp = (struct namecache *) - _MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK); + if (numcache < desiredNodes && + ((ncp = nchead.tqh_first) == NULL || + ncp->nc_hash.le_prev != 0)) { + /* + * Allocate one more entry + */ + ncp = (struct namecache *)_MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK); numcache++; - } else if (ncp = nclruhead.tqh_first) { - /* reuse an old entry */ - TAILQ_REMOVE(&nclruhead, ncp, nc_lru); + } else { + /* + * reuse an old entry + */ + ncp = TAILQ_FIRST(&nchead); + TAILQ_REMOVE(&nchead, ncp, nc_entry); + if (ncp->nc_hash.le_prev != 0) { -#if DIAGNOSTIC - if (ncp->nc_hash.le_next == ncp) - panic("cache_enter: le_next"); -#endif - LIST_REMOVE(ncp, nc_hash); - remove_name(ncp->nc_name); - ncp->nc_name = NULL; - ncp->nc_hash.le_prev = 0; + /* + * still in use... we need to + * delete it before re-using it + */ + nchstats.ncs_stolen++; + cache_delete(ncp, 0); } - } else { - /* give up */ - return; } + nchstats.ncs_enters++; /* * Fill in cache info, if vp is NULL this is a "negative" cache entry. - * For negative entries, we have to record whether it is a whiteout. - * the whiteout flag is stored in the nc_vpid field which is - * otherwise unused. */ ncp->nc_vp = vp; - if (vp) - ncp->nc_vpid = vp->v_id; - else - ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT; ncp->nc_dvp = dvp; - ncp->nc_dvpid = dvp->v_id; - ncp->nc_name = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); - TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); + ncp->nc_hashval = cnp->cn_hash; + ncp->nc_whiteout = FALSE; + ncp->nc_name = add_name_locked(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); + + /* + * make us the newest entry in the cache + * i.e. we'll be the last to be stolen + */ + TAILQ_INSERT_TAIL(&nchead, ncp, nc_entry); + ncpp = NCHHASH(dvp, cnp->cn_hash); #if DIAGNOSTIC { @@ -334,124 +992,294 @@ cache_enter(dvp, vp, cnp) panic("cache_enter: duplicate"); } #endif + /* + * make us available to be found via lookup + */ LIST_INSERT_HEAD(ncpp, ncp, nc_hash); + + if (vp) { + /* + * add to the list of name cache entries + * that point at vp + */ + LIST_INSERT_HEAD(&vp->v_nclinks, ncp, nc_un.nc_link); + } else { + /* + * this is a negative cache entry (vp == NULL) + * stick it on the negative cache list + * and record the whiteout state + */ + TAILQ_INSERT_TAIL(&neghead, ncp, nc_un.nc_negentry); + + if (cnp->cn_flags & ISWHITEOUT) + ncp->nc_whiteout = TRUE; + nchstats.ncs_negtotal++; + + if (nchstats.ncs_negtotal > desiredNegNodes) { + /* + * if we've reached our desired limit + * of negative cache entries, delete + * the oldest + */ + negp = TAILQ_FIRST(&neghead); + TAILQ_REMOVE(&neghead, negp, nc_un.nc_negentry); + + cache_delete(negp, 1); + } + } + /* + * add us to the list of name cache entries that + * are children of dvp + */ + LIST_INSERT_HEAD(&dvp->v_ncchildren, ncp, nc_child); + + name_cache_unlock(); } + +/* + * Initialize CRC-32 remainder table. + */ +static void init_crc32(void) +{ + /* + * the CRC-32 generator polynomial is: + * x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^10 + * + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + */ + unsigned int crc32_polynomial = 0x04c11db7; + unsigned int i,j; + + /* + * pre-calculate the CRC-32 remainder for each possible octet encoding + */ + for (i = 0; i < 256; i++) { + unsigned int crc_rem = i << 24; + + for (j = 0; j < 8; j++) { + if (crc_rem & 0x80000000) + crc_rem = (crc_rem << 1) ^ crc32_polynomial; + else + crc_rem = (crc_rem << 1); + } + crc32tab[i] = crc_rem; + } +} + + /* * Name cache initialization, from vfs_init() when we are booting */ void -nchinit() +nchinit(void) +{ + desiredNegNodes = (desiredvnodes / 10); + desiredNodes = desiredvnodes + desiredNegNodes; + + TAILQ_INIT(&nchead); + TAILQ_INIT(&neghead); + + init_crc32(); + + nchashtbl = hashinit(MAX(4096, (2 *desiredNodes)), M_CACHE, &nchash); + nchashmask = nchash; + nchash++; + + init_string_table(); + + /* Allocate mount list lock group attribute and group */ + namecache_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(namecache_lck_grp_attr); + + namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr); + + /* Allocate mount list lock attribute */ + namecache_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(namecache_lck_attr); + + /* Allocate mount list lock */ + namecache_mtx_lock = lck_mtx_alloc_init(namecache_lck_grp, namecache_lck_attr); + + +} + +void +name_cache_lock(void) { - static void init_string_table(void); + lck_mtx_lock(namecache_mtx_lock); +} - TAILQ_INIT(&nclruhead); - nchashtbl = hashinit(MAX(4096, desiredvnodes), M_CACHE, &nchash); +void +name_cache_unlock(void) +{ + lck_mtx_unlock(namecache_mtx_lock); - init_string_table(); } int resize_namecache(u_int newsize) { - struct nchashhead *new_table; - struct nchashhead *old_table; - struct nchashhead *old_head, *head; - struct namecache *entry, *next; - uint32_t i; - u_long new_mask, old_mask; + struct nchashhead *new_table; + struct nchashhead *old_table; + struct nchashhead *old_head, *head; + struct namecache *entry, *next; + uint32_t i, hashval; + int dNodes, dNegNodes; + u_long new_size, old_size; + + dNegNodes = (newsize / 10); + dNodes = newsize + dNegNodes; // we don't support shrinking yet - if (newsize < nchash) { + if (dNodes < desiredNodes) { return 0; } + new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask); + new_size = nchashmask + 1; - new_table = hashinit(newsize, M_CACHE, &new_mask); if (new_table == NULL) { return ENOMEM; } + name_cache_lock(); // do the switch! old_table = nchashtbl; nchashtbl = new_table; - old_mask = nchash; - nchash = new_mask; + old_size = nchash; + nchash = new_size; // walk the old table and insert all the entries into // the new table // - for(i=0; i <= old_mask; i++) { + for(i=0; i < old_size; i++) { old_head = &old_table[i]; for (entry=old_head->lh_first; entry != NULL; entry=next) { // // XXXdbg - Beware: this assumes that hash_string() does // the same thing as what happens in // lookup() over in vfs_lookup.c - head = NCHHASH(entry->nc_dvp, hash_string(entry->nc_name, 0)); - + hashval = hash_string(entry->nc_name, 0); + entry->nc_hashval = hashval; + head = NCHHASH(entry->nc_dvp, hashval); + next = entry->nc_hash.le_next; LIST_INSERT_HEAD(head, entry, nc_hash); } } + desiredNodes = dNodes; + desiredNegNodes = dNegNodes; + name_cache_unlock(); FREE(old_table, M_CACHE); return 0; } +static void +cache_delete(struct namecache *ncp, int age_entry) +{ + nchstats.ncs_deletes++; + + if (ncp->nc_vp) { + LIST_REMOVE(ncp, nc_un.nc_link); + } else { + TAILQ_REMOVE(&neghead, ncp, nc_un.nc_negentry); + nchstats.ncs_negtotal--; + } + LIST_REMOVE(ncp, nc_child); + + LIST_REMOVE(ncp, nc_hash); + /* + * this field is used to indicate + * that the entry is in use and + * must be deleted before it can + * be reused... + */ + ncp->nc_hash.le_prev = NULL; + + if (age_entry) { + /* + * make it the next one available + * for cache_enter's use + */ + TAILQ_REMOVE(&nchead, ncp, nc_entry); + TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry); + } + remove_name_locked(ncp->nc_name); + ncp->nc_name = NULL; +} + + +/* + * purge the entry associated with the + * specified vnode from the name cache + */ +void +cache_purge(vnode_t vp) +{ + struct namecache *ncp; + + if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL)) + return; + + name_cache_lock(); + while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) + cache_delete(ncp, 1); + while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) + cache_delete(ncp, 1); + + name_cache_unlock(); +} /* - * Invalidate a all entries to particular vnode. - * - * We actually just increment the v_id, that will do it. The entries will - * be purged by lookup as they get found. If the v_id wraps around, we - * need to ditch the entire cache, to avoid confusion. No valid vnode will - * ever have (v_id == 0). + * Purge all negative cache entries that are children of the + * given vnode. A case-insensitive file system (or any file + * system that has multiple equivalent names for the same + * directory entry) can use this when creating or renaming + * to remove negative entries that may no longer apply. */ void -cache_purge(vp) - struct vnode *vp; +cache_purge_negatives(vnode_t vp) { struct namecache *ncp; - struct nchashhead *ncpp; - vp->v_id = ++nextvnodeid; - if (nextvnodeid != 0) - return; - for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { - while (ncp = ncpp->lh_first) - PURGE(ncp); - } - vp->v_id = ++nextvnodeid; + name_cache_lock(); + + LIST_FOREACH(ncp, &vp->v_ncchildren, nc_child) + if (ncp->nc_vp == NULL) + cache_delete(ncp , 1); + + name_cache_unlock(); } /* * Flush all entries referencing a particular filesystem. * * Since we need to check it anyway, we will flush all the invalid - * entriess at the same time. + * entries at the same time. */ void cache_purgevfs(mp) struct mount *mp; { struct nchashhead *ncpp; - struct namecache *ncp, *nnp; + struct namecache *ncp; + name_cache_lock(); /* Scan hash tables for applicable entries */ - for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { - for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { - nnp = ncp->nc_hash.le_next; - if (ncp->nc_dvpid != ncp->nc_dvp->v_id || - (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) || - ncp->nc_dvp->v_mount == mp) { - PURGE(ncp); + for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { +restart: + for (ncp = ncpp->lh_first; ncp != 0; ncp = ncp->nc_hash.le_next) { + if (ncp->nc_dvp->v_mount == mp) { + cache_delete(ncp, 0); + goto restart; } } } + name_cache_unlock(); } @@ -476,7 +1304,7 @@ typedef struct string_t { static int -resize_string_ref_table() +resize_string_ref_table(void) { struct stringhead *new_table; struct stringhead *old_table; @@ -533,11 +1361,23 @@ init_string_table(void) char * -add_name(const char *name, size_t len, u_int hashval, u_int flags) +vfs_addname(const char *name, size_t len, u_int hashval, u_int flags) +{ + char * ptr; + + name_cache_lock(); + ptr = add_name_locked(name, len, hashval, flags); + name_cache_unlock(); + + return(ptr); +} + +static char * +add_name_locked(const char *name, size_t len, u_int hashval, __unused u_int flags) { struct stringhead *head; string_t *entry; - int chain_len = 0; + uint32_t chain_len = 0; // // If the table gets more than 3/4 full, resize it @@ -547,14 +1387,13 @@ add_name(const char *name, size_t len, u_int hashval, u_int flags) printf("failed to resize the hash table.\n"); } } - if (hashval == 0) { - hashval = hash_string(name, len); + hashval = hash_string(name, 0); } head = &string_ref_table[hashval & string_table_mask]; for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) { - if (strncmp(entry->str, name, len) == 0 && entry->str[len] == '\0') { + if (memcmp(entry->str, name, len) == 0 && entry->str[len] == '\0') { entry->refcount++; num_dups++; break; @@ -573,11 +1412,11 @@ add_name(const char *name, size_t len, u_int hashval, u_int flags) filled_buckets++; } - LIST_INSERT_HEAD(head, entry, hash_chain); entry->str = (char *)((char *)entry + sizeof(string_t)); strncpy(entry->str, name, len); entry->str[len] = '\0'; entry->refcount = 1; + LIST_INSERT_HEAD(head, entry, hash_chain); if (chain_len > max_chain_len) { max_chain_len = chain_len; @@ -591,11 +1430,26 @@ add_name(const char *name, size_t len, u_int hashval, u_int flags) } int -remove_name(const char *nameref) +vfs_removename(const char *nameref) +{ + int i; + + name_cache_lock(); + i = remove_name_locked(nameref); + name_cache_unlock(); + + return(i); + +} + + +static int +remove_name_locked(const char *nameref) { struct stringhead *head; string_t *entry; uint32_t hashval; + char * ptr; hashval = hash_string(nameref, 0); head = &string_ref_table[hashval & string_table_mask]; @@ -607,6 +1461,7 @@ remove_name(const char *nameref) if (head->lh_first == NULL) { filled_buckets--; } + ptr = entry->str; entry->str = NULL; nstrings--; @@ -628,12 +1483,14 @@ dump_string_table(void) { struct stringhead *head; string_t *entry; - int i; + u_long i; - for(i=0; i <= string_table_mask; i++) { + name_cache_lock(); + for (i = 0; i <= string_table_mask; i++) { head = &string_ref_table[i]; for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) { printf("%6d - %s\n", entry->refcount, entry->str); } } + name_cache_unlock(); } diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 616f09e1c..29a38b7c2 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,26 +56,34 @@ */ #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include +#include #include #include -#include -#include +#include #include #include +#include +#include + +#include +#include +#include #include + + #define CL_READ 0x01 #define CL_ASYNC 0x02 #define CL_COMMIT 0x04 @@ -87,6 +95,7 @@ #define CL_DEV_MEMORY 0x200 #define CL_PRESERVE 0x400 #define CL_THROTTLE 0x800 +#define CL_KEEPCACHED 0x1000 struct clios { @@ -96,57 +105,188 @@ struct clios { int io_wanted; /* someone is sleeping waiting for a change in state */ }; +static lck_grp_t *cl_mtx_grp; +static lck_attr_t *cl_mtx_attr; +static lck_grp_attr_t *cl_mtx_grp_attr; +static lck_mtx_t *cl_mtxp; + + +static int cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int non_rounded_size, + int flags, buf_t real_bp, struct clios *iostate); +static int cluster_iodone(buf_t bp, void *dummy); +static int cluster_rd_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize); +static int cluster_hard_throttle_on(vnode_t vp); + +static int cluster_read_x(vnode_t vp, struct uio *uio, off_t filesize, int flags); +static int cluster_write_x(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, + off_t headOff, off_t tailOff, int flags); +static int cluster_nocopy_read(vnode_t vp, struct uio *uio, off_t filesize); +static int cluster_nocopy_write(vnode_t vp, struct uio *uio, off_t newEOF); +static int cluster_phys_read(vnode_t vp, struct uio *uio, off_t filesize); +static int cluster_phys_write(vnode_t vp, struct uio *uio, off_t newEOF); +static int cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, int xsize, int flags); -static void cluster_zero(upl_t upl, vm_offset_t upl_offset, - int size, struct buf *bp); -static int cluster_read_x(struct vnode *vp, struct uio *uio, - off_t filesize, int devblocksize, int flags); -static int cluster_write_x(struct vnode *vp, struct uio *uio, - off_t oldEOF, off_t newEOF, off_t headOff, - off_t tailOff, int devblocksize, int flags); -static int cluster_nocopy_read(struct vnode *vp, struct uio *uio, - off_t filesize, int devblocksize, int flags); -static int cluster_nocopy_write(struct vnode *vp, struct uio *uio, - off_t newEOF, int devblocksize, int flags); -static int cluster_phys_read(struct vnode *vp, struct uio *uio, - off_t filesize, int devblocksize, int flags); -static int cluster_phys_write(struct vnode *vp, struct uio *uio, - off_t newEOF, int devblocksize, int flags); -static int cluster_align_phys_io(struct vnode *vp, struct uio *uio, - addr64_t usr_paddr, int xsize, int devblocksize, int flags); -static int cluster_push_x(struct vnode *vp, off_t EOF, unsigned int first, unsigned int last, int can_delay); -static int cluster_try_push(struct vnode *vp, off_t EOF, int can_delay, int push_all); - -static int sparse_cluster_switch(struct vnode *vp, off_t EOF); -static int sparse_cluster_push(struct vnode *vp, off_t EOF, int push_all); -static int sparse_cluster_add(struct vnode *vp, off_t EOF, daddr_t first, daddr_t last); +static void cluster_rd_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *ra); + +static int cluster_push_x(vnode_t vp, struct cl_extent *, off_t EOF, int flags); +static void cluster_push_EOF(vnode_t vp, off_t EOF); + +static int cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int can_delay, int push_all); + +static void sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF); +static void sparse_cluster_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_all); +static void sparse_cluster_add(struct cl_writebehind *, vnode_t vp, struct cl_extent *, off_t EOF); static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, int *setcountp); -static kern_return_t vfs_drt_unmark_pages(void **cmapp, off_t offset, u_int length); static kern_return_t vfs_drt_get_cluster(void **cmapp, off_t *offsetp, u_int *lengthp); static kern_return_t vfs_drt_control(void **cmapp, int op_type); -int ubc_page_op_with_control __P((memory_object_control_t, off_t, int, ppnum_t *, int *)); - +int is_file_clean(vnode_t, off_t); /* * throttle the number of async writes that * can be outstanding on a single vnode * before we issue a synchronous write */ -#define ASYNC_THROTTLE 18 -#define HARD_THROTTLE_MAXCNT 1 -#define HARD_THROTTLE_MAXSIZE (64 * 1024) +#define HARD_THROTTLE_MAXCNT 0 +#define HARD_THROTTLE_MAXSIZE (64 * 1024) int hard_throttle_on_root = 0; struct timeval priority_IO_timestamp_for_root; +void +cluster_init(void) { + /* + * allocate lock group attribute and group + */ + cl_mtx_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(cl_mtx_grp_attr); + cl_mtx_grp = lck_grp_alloc_init("cluster I/O", cl_mtx_grp_attr); + + /* + * allocate the lock attribute + */ + cl_mtx_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(clf_mtx_attr); + + /* + * allocate and initialize mutex's used to protect updates and waits + * on the cluster_io context + */ + cl_mtxp = lck_mtx_alloc_init(cl_mtx_grp, cl_mtx_attr); + + if (cl_mtxp == NULL) + panic("cluster_init: failed to allocate cl_mtxp"); +} + + + +#define CLW_ALLOCATE 0x01 +#define CLW_RETURNLOCKED 0x02 +/* + * if the read ahead context doesn't yet exist, + * allocate and initialize it... + * the vnode lock serializes multiple callers + * during the actual assignment... first one + * to grab the lock wins... the other callers + * will release the now unnecessary storage + * + * once the context is present, try to grab (but don't block on) + * the lock associated with it... if someone + * else currently owns it, than the read + * will run without read-ahead. this allows + * multiple readers to run in parallel and + * since there's only 1 read ahead context, + * there's no real loss in only allowing 1 + * reader to have read-ahead enabled. + */ +static struct cl_readahead * +cluster_get_rap(vnode_t vp) +{ + struct ubc_info *ubc; + struct cl_readahead *rap; + + ubc = vp->v_ubcinfo; + + if ((rap = ubc->cl_rahead) == NULL) { + MALLOC_ZONE(rap, struct cl_readahead *, sizeof *rap, M_CLRDAHEAD, M_WAITOK); + + bzero(rap, sizeof *rap); + rap->cl_lastr = -1; + lck_mtx_init(&rap->cl_lockr, cl_mtx_grp, cl_mtx_attr); + + vnode_lock(vp); + + if (ubc->cl_rahead == NULL) + ubc->cl_rahead = rap; + else { + lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp); + FREE_ZONE((void *)rap, sizeof *rap, M_CLRDAHEAD); + rap = ubc->cl_rahead; + } + vnode_unlock(vp); + } + if (lck_mtx_try_lock(&rap->cl_lockr) == TRUE) + return(rap); + + return ((struct cl_readahead *)NULL); +} + + +/* + * if the write behind context doesn't yet exist, + * and CLW_ALLOCATE is specified, allocate and initialize it... + * the vnode lock serializes multiple callers + * during the actual assignment... first one + * to grab the lock wins... the other callers + * will release the now unnecessary storage + * + * if CLW_RETURNLOCKED is set, grab (blocking if necessary) + * the lock associated with the write behind context before + * returning + */ + +static struct cl_writebehind * +cluster_get_wbp(vnode_t vp, int flags) +{ + struct ubc_info *ubc; + struct cl_writebehind *wbp; + + ubc = vp->v_ubcinfo; + + if ((wbp = ubc->cl_wbehind) == NULL) { + + if ( !(flags & CLW_ALLOCATE)) + return ((struct cl_writebehind *)NULL); + + MALLOC_ZONE(wbp, struct cl_writebehind *, sizeof *wbp, M_CLWRBEHIND, M_WAITOK); + + bzero(wbp, sizeof *wbp); + lck_mtx_init(&wbp->cl_lockw, cl_mtx_grp, cl_mtx_attr); + + vnode_lock(vp); + + if (ubc->cl_wbehind == NULL) + ubc->cl_wbehind = wbp; + else { + lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp); + FREE_ZONE((void *)wbp, sizeof *wbp, M_CLWRBEHIND); + wbp = ubc->cl_wbehind; + } + vnode_unlock(vp); + } + if (flags & CLW_RETURNLOCKED) + lck_mtx_lock(&wbp->cl_lockw); + + return (wbp); +} + + static int -cluster_hard_throttle_on(vp) - struct vnode *vp; +cluster_hard_throttle_on(vnode_t vp) { - static struct timeval hard_throttle_maxelapsed = { 0, 300000 }; + static struct timeval hard_throttle_maxelapsed = { 0, 200000 }; if (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV) { struct timeval elapsed; @@ -154,7 +294,7 @@ cluster_hard_throttle_on(vp) if (hard_throttle_on_root) return(1); - elapsed = time; + microuptime(&elapsed); timevalsub(&elapsed, &priority_IO_timestamp_for_root); if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <)) @@ -165,27 +305,24 @@ cluster_hard_throttle_on(vp) static int -cluster_iodone(bp) - struct buf *bp; +cluster_iodone(buf_t bp, __unused void *dummy) { - int b_flags; - int error; - int total_size; - int total_resid; - int upl_offset; - int zero_offset; - upl_t upl; - struct buf *cbp; - struct buf *cbp_head; - struct buf *cbp_next; - struct buf *real_bp; - struct vnode *vp; - struct clios *iostate; - int commit_size; - int pg_offset; - - - cbp_head = (struct buf *)(bp->b_trans_head); + int b_flags; + int error; + int total_size; + int total_resid; + int upl_offset; + int zero_offset; + upl_t upl; + buf_t cbp; + buf_t cbp_head; + buf_t cbp_next; + buf_t real_bp; + struct clios *iostate; + int commit_size; + int pg_offset; + + cbp_head = (buf_t)(bp->b_trans_head); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START, (int)cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); @@ -209,13 +346,15 @@ cluster_iodone(bp) cbp = cbp_head; upl_offset = cbp->b_uploffset; - upl = cbp->b_pagelist; + upl = cbp->b_upl; b_flags = cbp->b_flags; real_bp = cbp->b_real_bp; - vp = cbp->b_vp; zero_offset= cbp->b_validend; iostate = (struct clios *)cbp->b_iostate; + if (real_bp) + real_bp->b_dev = cbp->b_dev; + while (cbp) { if ((cbp->b_flags & B_ERROR) && error == 0) error = cbp->b_error; @@ -232,15 +371,15 @@ cluster_iodone(bp) if (zero_offset) cluster_zero(upl, zero_offset, PAGE_SIZE - (zero_offset & PAGE_MASK), real_bp); - if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput <= (ASYNC_THROTTLE / 3))) { - vp->v_flag &= ~VTHROTTLED; - wakeup((caddr_t)&vp->v_numoutput); - } if (iostate) { + int need_wakeup = 0; + /* * someone has issued multiple I/Os asynchrounsly * and is waiting for them to complete (streaming) */ + lck_mtx_lock(cl_mtxp); + if (error && iostate->io_error == 0) iostate->io_error = error; @@ -252,8 +391,12 @@ cluster_iodone(bp) * this io stream to change */ iostate->io_wanted = 0; - wakeup((caddr_t)&iostate->io_wanted); + need_wakeup = 1; } + lck_mtx_unlock(cl_mtxp); + + if (need_wakeup) + wakeup((caddr_t)&iostate->io_wanted); } if ((b_flags & B_NEED_IODONE) && real_bp) { if (error) { @@ -262,7 +405,7 @@ cluster_iodone(bp) } real_bp->b_resid = total_resid; - biodone(real_bp); + buf_biodone(real_bp); } if (error == 0 && total_resid) error = EIO; @@ -273,17 +416,27 @@ cluster_iodone(bp) if (error || (b_flags & B_NOCACHE)) { int upl_abort_code; + int page_in = 0; + int page_out = 0; - if ((b_flags & B_PAGEOUT) && (error != ENXIO)) /* transient error */ + if (b_flags & B_PAGEIO) { + if (b_flags & B_READ) + page_in = 1; + else + page_out = 1; + } + if (b_flags & B_CACHE) /* leave pages in the cache unchanged on error */ upl_abort_code = UPL_ABORT_FREE_ON_EMPTY; - else if (b_flags & B_PGIN) - upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR; + else if (page_out && (error != ENXIO)) /* transient error */ + upl_abort_code = UPL_ABORT_FREE_ON_EMPTY; + else if (page_in) + upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR; else upl_abort_code = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES; ubc_upl_abort_range(upl, upl_offset - pg_offset, commit_size, - upl_abort_code); - + upl_abort_code); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, (int)upl, upl_offset - pg_offset, commit_size, 0x80000000|upl_abort_code, 0); @@ -291,11 +444,8 @@ cluster_iodone(bp) } else { int upl_commit_flags = UPL_COMMIT_FREE_ON_EMPTY; - if (b_flags & B_PHYS) { - if (b_flags & B_READ) - upl_commit_flags |= UPL_COMMIT_SET_DIRTY; - } else if ( !(b_flags & B_PAGEOUT)) - upl_commit_flags |= UPL_COMMIT_CLEAR_DIRTY; + if ((b_flags & B_PHYS) && (b_flags & B_READ)) + upl_commit_flags |= UPL_COMMIT_SET_DIRTY; if (b_flags & B_AGE) upl_commit_flags |= UPL_COMMIT_INACTIVATE; @@ -307,27 +457,24 @@ cluster_iodone(bp) (int)upl, upl_offset - pg_offset, commit_size, upl_commit_flags, 0); } - } else + } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, (int)upl, upl_offset, 0, error, 0); + } return (error); } -static void -cluster_zero(upl, upl_offset, size, bp) - upl_t upl; - vm_offset_t upl_offset; - int size; - struct buf *bp; +void +cluster_zero(upl_t upl, vm_offset_t upl_offset, int size, buf_t bp) { upl_page_info_t *pl; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_START, upl_offset, size, (int)bp, 0, 0); - if (bp == NULL || bp->b_data == NULL) { + if (bp == NULL || bp->b_datap == 0) { pl = ubc_upl_pageinfo(upl); @@ -349,62 +496,83 @@ cluster_zero(upl, upl_offset, size, bp) upl_offset += zero_cnt; } } else - bzero((caddr_t)((vm_offset_t)bp->b_data + upl_offset), size); + bzero((caddr_t)((vm_offset_t)bp->b_datap + upl_offset), size); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_END, upl_offset, size, 0, 0, 0); } + static int -cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, real_bp, iostate) - struct vnode *vp; - upl_t upl; - vm_offset_t upl_offset; - off_t f_offset; - int non_rounded_size; - int devblocksize; - int flags; - struct buf *real_bp; - struct clios *iostate; +cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int non_rounded_size, + int flags, buf_t real_bp, struct clios *iostate) { - struct buf *cbp; - u_int size; - u_int io_size; - int io_flags; - int error = 0; - int retval = 0; - struct buf *cbp_head = 0; - struct buf *cbp_tail = 0; - int buf_count = 0; - int pg_count; - int pg_offset; - u_int max_iosize; - u_int max_vectors; - int priv; - int zero_offset = 0; - int async_throttle; - - if (devblocksize) - size = (non_rounded_size + (devblocksize - 1)) & ~(devblocksize - 1); - else - size = non_rounded_size; + buf_t cbp; + u_int size; + u_int io_size; + int io_flags; + int bmap_flags; + int error = 0; + int retval = 0; + buf_t cbp_head = NULL; + buf_t cbp_tail = NULL; + int trans_count = 0; + u_int pg_count; + int pg_offset; + u_int max_iosize; + u_int max_vectors; + int priv; + int zero_offset = 0; + int async_throttle = 0; + mount_t mp; + + mp = vp->v_mount; + + if (mp->mnt_devblocksize > 1) { + /* + * round the requested size up so that this I/O ends on a + * page boundary in case this is a 'write'... if the filesystem + * has blocks allocated to back the page beyond the EOF, we want to + * make sure to write out the zero's that are sitting beyond the EOF + * so that in case the filesystem doesn't explicitly zero this area + * if a hole is created via a lseek/write beyond the current EOF, + * it will return zeros when it's read back from the disk. If the + * physical allocation doesn't extend for the whole page, we'll + * only write/read from the disk up to the end of this allocation + * via the extent info returned from the VNOP_BLOCKMAP call. + */ + pg_offset = upl_offset & PAGE_MASK; + size = (((non_rounded_size + pg_offset) + (PAGE_SIZE - 1)) & ~PAGE_MASK) - pg_offset; + } else { + /* + * anyone advertising a blocksize of 1 byte probably + * can't deal with us rounding up the request size + * AFP is one such filesystem/device + */ + size = non_rounded_size; + } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_START, (int)f_offset, size, upl_offset, flags, 0); - if (flags & CL_READ) { - io_flags = (B_VECTORLIST | B_READ); + io_flags = (B_READ); + bmap_flags = VNODE_READ; - vfs_io_attributes(vp, B_READ, &max_iosize, &max_vectors); + max_iosize = mp->mnt_maxreadcnt; + max_vectors = mp->mnt_segreadcnt; } else { - io_flags = (B_VECTORLIST | B_WRITEINPROG); + io_flags = 0; + bmap_flags = VNODE_WRITE; - vfs_io_attributes(vp, B_WRITE, &max_iosize, &max_vectors); + max_iosize = mp->mnt_maxwritecnt; + max_vectors = mp->mnt_segwritecnt; } + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 22)) | DBG_FUNC_NONE, max_iosize, max_vectors, mp->mnt_devblocksize, 0, 0); + /* - * make sure the maximum iosize are at least the size of a page - * and that they are multiples of the page size + * make sure the maximum iosize is a + * multiple of the page size */ max_iosize &= ~PAGE_MASK; @@ -414,20 +582,20 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, max_iosize = HARD_THROTTLE_MAXSIZE; async_throttle = HARD_THROTTLE_MAXCNT; } else - async_throttle = ASYNC_THROTTLE; + async_throttle = VNODE_ASYNC_THROTTLE; } if (flags & CL_AGE) io_flags |= B_AGE; if (flags & CL_DUMP) io_flags |= B_NOCACHE; - if (flags & CL_PAGEIN) - io_flags |= B_PGIN; - if (flags & CL_PAGEOUT) - io_flags |= B_PAGEOUT; + if (flags & (CL_PAGEIN | CL_PAGEOUT)) + io_flags |= B_PAGEIO; if (flags & CL_COMMIT) io_flags |= B_COMMIT_UPL; if (flags & CL_PRESERVE) io_flags |= B_PHYS; + if (flags & CL_KEEPCACHED) + io_flags |= B_CACHE; if ((flags & CL_READ) && ((upl_offset + non_rounded_size) & PAGE_MASK) && (!(flags & CL_NOZERO))) { /* @@ -440,50 +608,117 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, zero_offset = upl_offset + non_rounded_size; } while (size) { - int vsize; - int i; - int pg_resid; - int num_contig; - daddr_t lblkno; - daddr_t blkno; + int pg_resid; + daddr64_t blkno; + daddr64_t lblkno; if (size > max_iosize) io_size = max_iosize; else io_size = size; - - if (error = VOP_CMAP(vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL)) { - if (error == EOPNOTSUPP) - panic("VOP_CMAP Unimplemented"); + + if ((error = VNOP_BLOCKMAP(vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL, bmap_flags, NULL))) { break; } + if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) + real_bp->b_blkno = blkno; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 24)) | DBG_FUNC_NONE, (int)f_offset, (int)blkno, io_size, zero_offset, 0); - if ( (!(flags & CL_READ) && (long)blkno == -1) || io_size == 0) { + if (io_size == 0) { + /* + * vnop_blockmap didn't return an error... however, it did + * return an extent size of 0 which means we can't + * make forward progress on this I/O... a hole in the + * file would be returned as a blkno of -1 with a non-zero io_size + * a real extent is returned with a blkno != -1 and a non-zero io_size + */ + error = EINVAL; + break; + } + if ( !(flags & CL_READ) && blkno == -1) { + off_t e_offset; + + /* + * we're writing into a 'hole' + */ if (flags & CL_PAGEOUT) { + /* + * if we got here via cluster_pageout + * then just error the request and return + * the 'hole' should already have been covered + */ error = EINVAL; break; - }; - - /* Try paging out the page individually before - giving up entirely and dumping it (it could - be mapped in a "hole" and require allocation - before the I/O: + } + if ( !(flags & CL_COMMIT)) { + /* + * currently writes always request the commit to happen + * as part of the io completion... however, if the CL_COMMIT + * flag isn't specified, than we can't issue the abort_range + * since the call site is going to abort or commit the same upl.. + * in this case we can only return an error + */ + error = EINVAL; + break; + } + /* + * we can get here if the cluster code happens to + * pick up a page that was dirtied via mmap vs + * a 'write' and the page targets a 'hole'... + * i.e. the writes to the cluster were sparse + * and the file was being written for the first time + * + * we can also get here if the filesystem supports + * 'holes' that are less than PAGE_SIZE.... because + * we can't know if the range in the page that covers + * the 'hole' has been dirtied via an mmap or not, + * we have to assume the worst and try to push the + * entire page to storage. + * + * Try paging out the page individually before + * giving up entirely and dumping it (the pageout + * path will insure that the zero extent accounting + * has been taken care of before we get back into cluster_io) */ - ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); - if (ubc_pushdirty_range(vp, f_offset, PAGE_SIZE_64) == 0) { - error = EINVAL; + ubc_upl_abort_range(upl, trunc_page(upl_offset), PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); + + e_offset = round_page_64(f_offset + 1); + + if (ubc_sync_range(vp, f_offset, e_offset, UBC_PUSHDIRTY) == 0) { + error = EINVAL; break; - }; - - f_offset += PAGE_SIZE_64; - upl_offset += PAGE_SIZE; - size -= PAGE_SIZE; + } + io_size = e_offset - f_offset; + + f_offset += io_size; + upl_offset += io_size; + + if (size >= io_size) + size -= io_size; + else + size = 0; + /* + * keep track of how much of the original request + * that we've actually completed... non_rounded_size + * may go negative due to us rounding the request + * to a page size multiple (i.e. size > non_rounded_size) + */ + non_rounded_size -= io_size; + + if (non_rounded_size <= 0) { + /* + * we've transferred all of the data in the original + * request, but we were unable to complete the tail + * of the last page because the file didn't have + * an allocation to back that portion... this is ok. + */ + size = 0; + } continue; } - lblkno = (daddr_t)(f_offset / PAGE_SIZE_64); + lblkno = (daddr64_t)(f_offset / PAGE_SIZE_64); /* * we have now figured out how much I/O we can do - this is in 'io_size' * pg_offset is the starting point in the first page for the I/O @@ -495,7 +730,7 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, /* * currently, can't deal with reading 'holes' in file */ - if ((long)blkno == -1) { + if (blkno == -1) { error = EINVAL; break; } @@ -506,7 +741,7 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, } else pg_count = (io_size + pg_offset + (PAGE_SIZE - 1)) / PAGE_SIZE; - if ((flags & CL_READ) && (long)blkno == -1) { + if ((flags & CL_READ) && blkno == -1) { int bytes_to_zero; /* @@ -518,7 +753,7 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, /* * if this upl contains the EOF and it is not a multiple of PAGE_SIZE * than 'zero_offset' will be non-zero - * if the 'hole' returned by VOP_CMAP extends all the way to the eof + * if the 'hole' returned by vnop_blockmap extends all the way to the eof * (indicated by the io_size finishing off the I/O request for this UPL) * than we're not going to issue an I/O for the * last page in this upl... we need to zero both the hole and the tail @@ -574,26 +809,39 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, upl_offset += io_size; f_offset += io_size; size -= io_size; + /* + * keep track of how much of the original request + * that we've actually completed... non_rounded_size + * may go negative due to us rounding the request + * to a page size multiple (i.e. size > non_rounded_size) + */ + non_rounded_size -= io_size; + if (non_rounded_size <= 0) { + /* + * we've transferred all of the data in the original + * request, but we were unable to complete the tail + * of the last page because the file didn't have + * an allocation to back that portion... this is ok. + */ + size = 0; + } if (cbp_head && pg_count) goto start_io; continue; - } else if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) { - real_bp->b_blkno = blkno; } - if (pg_count > max_vectors) { - io_size -= (pg_count - max_vectors) * PAGE_SIZE; - - if (io_size < 0) { + if (((pg_count - max_vectors) * PAGE_SIZE) > io_size) { io_size = PAGE_SIZE - pg_offset; pg_count = 1; - } else + } else { + io_size -= (pg_count - max_vectors) * PAGE_SIZE; pg_count = max_vectors; + } } - if ( !(vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)) + if ( !(mp->mnt_kern_flag & MNTK_VIRTUALDEV)) /* * if we're not targeting a virtual device i.e. a disk image * it's safe to dip into the reserve pool since real devices @@ -611,51 +859,44 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, cbp = alloc_io_buf(vp, priv); - if (flags & CL_PAGEOUT) { + u_int i; + for (i = 0; i < pg_count; i++) { - int s; - struct buf *bp; - - s = splbio(); - if (bp = incore(vp, lblkno + i)) { - if (!ISSET(bp->b_flags, B_BUSY)) { - bremfree(bp); - SET(bp->b_flags, (B_BUSY | B_INVAL)); - splx(s); - brelse(bp); - } else - panic("BUSY bp found in cluster_io"); - } - splx(s); + if (buf_invalblkno(vp, lblkno + i, 0) == EBUSY) + panic("BUSY bp found in cluster_io"); } } if (flags & CL_ASYNC) { - cbp->b_flags |= (B_CALL | B_ASYNC); - cbp->b_iodone = (void *)cluster_iodone; + if (buf_setcallback(cbp, (void *)cluster_iodone, NULL)) + panic("buf_setcallback failed\n"); } cbp->b_flags |= io_flags; cbp->b_lblkno = lblkno; cbp->b_blkno = blkno; cbp->b_bcount = io_size; - cbp->b_pagelist = upl; - cbp->b_uploffset = upl_offset; - cbp->b_trans_next = (struct buf *)0; - if (cbp->b_iostate = (void *)iostate) + if (buf_setupl(cbp, upl, upl_offset)) + panic("buf_setupl failed\n"); + + cbp->b_trans_next = (buf_t)NULL; + + if ((cbp->b_iostate = (void *)iostate)) /* * caller wants to track the state of this * io... bump the amount issued against this stream */ iostate->io_issued += io_size; - if (flags & CL_READ) + if (flags & CL_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 26)) | DBG_FUNC_NONE, - cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0); - else + (int)cbp->b_lblkno, (int)cbp->b_blkno, upl_offset, io_size, 0); + } + else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 27)) | DBG_FUNC_NONE, - cbp->b_lblkno, cbp->b_blkno, upl_offset, io_size, 0); + (int)cbp->b_lblkno, (int)cbp->b_blkno, upl_offset, io_size, 0); + } if (cbp_head) { cbp_tail->b_trans_next = cbp; @@ -664,14 +905,30 @@ cluster_io(vp, upl, upl_offset, f_offset, non_rounded_size, devblocksize, flags, cbp_head = cbp; cbp_tail = cbp; } - (struct buf *)(cbp->b_trans_head) = cbp_head; - buf_count++; + (buf_t)(cbp->b_trans_head) = cbp_head; + trans_count++; upl_offset += io_size; f_offset += io_size; size -= io_size; + /* + * keep track of how much of the original request + * that we've actually completed... non_rounded_size + * may go negative due to us rounding the request + * to a page size multiple (i.e. size > non_rounded_size) + */ + non_rounded_size -= io_size; - if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY) && ((flags & CL_ASYNC) || buf_count > 8)) || size == 0) { + if (non_rounded_size <= 0) { + /* + * we've transferred all of the data in the original + * request, but we were unable to complete the tail + * of the last page because the file didn't have + * an allocation to back that portion... this is ok. + */ + size = 0; + } + if ( (!(upl_offset & PAGE_MASK) && !(flags & CL_DEV_MEMORY) && ((flags & CL_ASYNC) || trans_count > 8)) || size == 0) { /* * if we have no more I/O to issue or * the current I/O we've prepared fully @@ -687,7 +944,7 @@ start_io: cbp_head->b_flags |= B_NEED_IODONE; cbp_head->b_real_bp = real_bp; } else - cbp_head->b_real_bp = (struct buf *)NULL; + cbp_head->b_real_bp = (buf_t)NULL; if (size == 0) { /* @@ -700,39 +957,40 @@ start_io: } else cbp_head->b_validend = 0; - if (flags & CL_THROTTLE) { - while (vp->v_numoutput >= async_throttle) { - vp->v_flag |= VTHROTTLED; - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "cluster_io", 0); - } - } + if (flags & CL_THROTTLE) + (void)vnode_waitforwrites(vp, async_throttle, 0, 0, (char *)"cluster_io"); + for (cbp = cbp_head; cbp;) { - struct buf * cbp_next; + buf_t cbp_next; - if (io_flags & B_WRITEINPROG) - cbp->b_vp->v_numoutput++; + if ( !(io_flags & B_READ)) + vnode_startwrite(vp); cbp_next = cbp->b_trans_next; - (void) VOP_STRATEGY(cbp); + (void) VNOP_STRATEGY(cbp); cbp = cbp_next; } if ( !(flags & CL_ASYNC)) { + int dummy; + for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) - biowait(cbp); - - if (error = cluster_iodone(cbp_head)) { - if ((flags & CL_PAGEOUT) && (error == ENXIO)) - retval = 0; /* drop the error */ - else - retval = error; - error = 0; + buf_biowait(cbp); + + if ((error = cluster_iodone(cbp_head, (void *)&dummy))) { + if ((flags & (CL_PAGEOUT | CL_KEEPCACHED) == CL_PAGEOUT) && (error == ENXIO)) + error = 0; /* drop the error */ + else { + if (retval == 0) + retval = error; + error = 0; + } } } - cbp_head = (struct buf *)0; - cbp_tail = (struct buf *)0; + cbp_head = (buf_t)NULL; + cbp_tail = (buf_t)NULL; - buf_count = 0; + trans_count = 0; } } if (error) { @@ -741,7 +999,7 @@ start_io: io_size = 0; for (cbp = cbp_head; cbp;) { - struct buf * cbp_next; + buf_t cbp_next; upl_offset -= cbp->b_bcount; size += cbp->b_bcount; @@ -752,11 +1010,15 @@ start_io: cbp = cbp_next; } if (iostate) { + int need_wakeup = 0; + /* * update the error condition for this stream * since we never really issued the io * just go ahead and adjust it back */ + lck_mtx_lock(cl_mtxp); + if (iostate->io_error == 0) iostate->io_error = error; iostate->io_issued -= io_size; @@ -767,8 +1029,12 @@ start_io: * this io stream to change */ iostate->io_wanted = 0; - wakeup((caddr_t)&iostate->io_wanted); + need_wakeup = 0; } + lck_mtx_unlock(cl_mtxp); + + if (need_wakeup) + wakeup((caddr_t)&iostate->io_wanted); } pg_offset = upl_offset & PAGE_MASK; abort_size = (size + pg_offset + (PAGE_SIZE - 1)) & ~PAGE_MASK; @@ -797,7 +1063,7 @@ start_io: real_bp->b_flags |= B_ERROR; real_bp->b_error = error; - biodone(real_bp); + buf_biodone(real_bp); } if (retval == 0) retval = error; @@ -810,12 +1076,7 @@ start_io: static int -cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize) - struct vnode *vp; - off_t f_offset; - u_int size; - off_t filesize; - int devblocksize; +cluster_rd_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize) { int pages_in_prefetch; @@ -836,7 +1097,7 @@ cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize) size = filesize - f_offset; pages_in_prefetch = (size + (PAGE_SIZE - 1)) / PAGE_SIZE; - advisory_read(vp, filesize, f_offset, size, devblocksize); + advisory_read(vp, filesize, f_offset, size); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 49)) | DBG_FUNC_END, (int)f_offset + size, pages_in_prefetch, 0, 1, 0); @@ -847,45 +1108,41 @@ cluster_rd_prefetch(vp, f_offset, size, filesize, devblocksize) static void -cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize) - struct vnode *vp; - daddr_t b_lblkno; - daddr_t e_lblkno; - off_t filesize; - int devblocksize; +cluster_rd_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct cl_readahead *rap) { - daddr_t r_lblkno; - off_t f_offset; - int size_of_prefetch; + daddr64_t r_addr; + off_t f_offset; + int size_of_prefetch; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START, - b_lblkno, e_lblkno, vp->v_lastr, 0, 0); + (int)extent->b_addr, (int)extent->e_addr, (int)rap->cl_lastr, 0, 0); - if (b_lblkno == vp->v_lastr && b_lblkno == e_lblkno) { + if (extent->b_addr == rap->cl_lastr && extent->b_addr == extent->e_addr) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, - vp->v_ralen, vp->v_maxra, vp->v_lastr, 0, 0); + rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 0, 0); return; } - if (vp->v_lastr == -1 || (b_lblkno != vp->v_lastr && b_lblkno != (vp->v_lastr + 1) && - (b_lblkno != (vp->v_maxra + 1) || vp->v_ralen == 0))) { - vp->v_ralen = 0; - vp->v_maxra = 0; + if (rap->cl_lastr == -1 || (extent->b_addr != rap->cl_lastr && extent->b_addr != (rap->cl_lastr + 1) && + (extent->b_addr != (rap->cl_maxra + 1) || rap->cl_ralen == 0))) { + rap->cl_ralen = 0; + rap->cl_maxra = 0; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, - vp->v_ralen, vp->v_maxra, vp->v_lastr, 1, 0); + rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 1, 0); return; } - if (e_lblkno < vp->v_maxra) { - if ((vp->v_maxra - e_lblkno) > (MAX_UPL_TRANSFER / 4)) { + if (extent->e_addr < rap->cl_maxra) { + if ((rap->cl_maxra - extent->e_addr) > (MAX_UPL_TRANSFER / 4)) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, - vp->v_ralen, vp->v_maxra, vp->v_lastr, 2, 0); + rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 2, 0); return; } } - r_lblkno = max(e_lblkno, vp->v_maxra) + 1; - f_offset = (off_t)r_lblkno * PAGE_SIZE_64; + r_addr = max(extent->e_addr, rap->cl_maxra) + 1; + f_offset = (off_t)(r_addr * PAGE_SIZE_64); size_of_prefetch = 0; @@ -893,39 +1150,40 @@ cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize) if (size_of_prefetch) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, - vp->v_ralen, vp->v_maxra, vp->v_lastr, 3, 0); + rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 3, 0); return; } if (f_offset < filesize) { - vp->v_ralen = vp->v_ralen ? min(MAX_UPL_TRANSFER, vp->v_ralen << 1) : 1; + daddr64_t read_size; - if (((e_lblkno + 1) - b_lblkno) > vp->v_ralen) - vp->v_ralen = min(MAX_UPL_TRANSFER, (e_lblkno + 1) - b_lblkno); + rap->cl_ralen = rap->cl_ralen ? min(MAX_UPL_TRANSFER, rap->cl_ralen << 1) : 1; - size_of_prefetch = cluster_rd_prefetch(vp, f_offset, vp->v_ralen * PAGE_SIZE, filesize, devblocksize); + read_size = (extent->e_addr + 1) - extent->b_addr; + + if (read_size > rap->cl_ralen) { + if (read_size > MAX_UPL_TRANSFER) + rap->cl_ralen = MAX_UPL_TRANSFER; + else + rap->cl_ralen = read_size; + } + size_of_prefetch = cluster_rd_prefetch(vp, f_offset, rap->cl_ralen * PAGE_SIZE, filesize); if (size_of_prefetch) - vp->v_maxra = (r_lblkno + size_of_prefetch) - 1; + rap->cl_maxra = (r_addr + size_of_prefetch) - 1; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_END, - vp->v_ralen, vp->v_maxra, vp->v_lastr, 4, 0); + rap->cl_ralen, (int)rap->cl_maxra, (int)rap->cl_lastr, 4, 0); } int -cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags) - struct vnode *vp; - upl_t upl; - vm_offset_t upl_offset; - off_t f_offset; - int size; - off_t filesize; - int devblocksize; - int flags; +cluster_pageout(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, + int size, off_t filesize, int flags) { int io_size; int rounded_size; off_t max_size; int local_flags; + struct cl_writebehind *wbp; if (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) /* @@ -944,6 +1202,8 @@ cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, fla local_flags |= CL_ASYNC; if ((flags & UPL_NOCOMMIT) == 0) local_flags |= CL_COMMIT; + if ((flags & UPL_KEEPCACHED)) + local_flags |= CL_KEEPCACHED; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE, @@ -988,22 +1248,16 @@ cluster_pageout(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, fla ubc_upl_abort_range(upl, upl_offset + rounded_size, size - rounded_size, UPL_ABORT_FREE_ON_EMPTY); } - vp->v_flag |= VHASBEENPAGED; + if ((wbp = cluster_get_wbp(vp, 0)) != NULL) + wbp->cl_hasbeenpaged = 1; - return (cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize, - local_flags, (struct buf *)0, (struct clios *)0)); + return (cluster_io(vp, upl, upl_offset, f_offset, io_size, + local_flags, (buf_t)NULL, (struct clios *)NULL)); } int -cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flags) - struct vnode *vp; - upl_t upl; - vm_offset_t upl_offset; - off_t f_offset; - int size; - off_t filesize; - int devblocksize; - int flags; +cluster_pagein(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, + int size, off_t filesize, int flags) { u_int io_size; int rounded_size; @@ -1048,42 +1302,45 @@ cluster_pagein(vp, upl, upl_offset, f_offset, size, filesize, devblocksize, flag ubc_upl_abort_range(upl, upl_offset + rounded_size, size - rounded_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); - retval = cluster_io(vp, upl, upl_offset, f_offset, io_size, devblocksize, - local_flags | CL_READ | CL_PAGEIN, (struct buf *)0, (struct clios *)0); + retval = cluster_io(vp, upl, upl_offset, f_offset, io_size, + local_flags | CL_READ | CL_PAGEIN, (buf_t)NULL, (struct clios *)NULL); - if (retval == 0) { - int b_lblkno; - int e_lblkno; + if (retval == 0 && !(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF)) { + struct cl_readahead *rap; - b_lblkno = (int)(f_offset / PAGE_SIZE_64); - e_lblkno = (int) - ((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64); + rap = cluster_get_rap(vp); - if (!(flags & UPL_NORDAHEAD) && !(vp->v_flag & VRAOFF) && rounded_size == PAGE_SIZE) { - /* - * we haven't read the last page in of the file yet - * so let's try to read ahead if we're in - * a sequential access pattern - */ - cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize); + if (rap != NULL) { + struct cl_extent extent; + + extent.b_addr = (daddr64_t)(f_offset / PAGE_SIZE_64); + extent.e_addr = (daddr64_t)((f_offset + ((off_t)io_size - 1)) / PAGE_SIZE_64); + + if (rounded_size == PAGE_SIZE) { + /* + * we haven't read the last page in of the file yet + * so let's try to read ahead if we're in + * a sequential access pattern + */ + cluster_rd_ahead(vp, &extent, filesize, rap); + } + rap->cl_lastr = extent.e_addr; + + lck_mtx_unlock(&rap->cl_lockr); } - vp->v_lastr = e_lblkno; } return (retval); } int -cluster_bp(bp) - struct buf *bp; +cluster_bp(buf_t bp) { off_t f_offset; int flags; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 19)) | DBG_FUNC_START, - (int)bp, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); + (int)bp, (int)bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); - if (bp->b_pagelist == (upl_t) 0) - panic("cluster_bp: can't handle NULL upl yet\n"); if (bp->b_flags & B_READ) flags = CL_ASYNC | CL_READ; else @@ -1091,207 +1348,196 @@ cluster_bp(bp) f_offset = ubc_blktooff(bp->b_vp, bp->b_lblkno); - return (cluster_io(bp->b_vp, bp->b_pagelist, 0, f_offset, bp->b_bcount, 0, flags, bp, (struct clios *)0)); + return (cluster_io(bp->b_vp, bp->b_upl, 0, f_offset, bp->b_bcount, flags, bp, (struct clios *)NULL)); } int -cluster_write(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t oldEOF; - off_t newEOF; - off_t headOff; - off_t tailOff; - int devblocksize; - int flags; +cluster_write(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff, int xflags) { int prev_resid; - int clip_size; + u_int clip_size; off_t max_io_size; - struct iovec *iov; int upl_size; int upl_flags; upl_t upl; int retval = 0; + int flags; + flags = xflags; + + if (vp->v_flag & VNOCACHE_DATA) + flags |= IO_NOCACHE; + + if ( (!(flags & IO_NOCACHE)) || (!uio) || (!UIO_SEG_IS_USER_SPACE(uio->uio_segflg))) { + /* + * go do a write through the cache if one of the following is true.... + * NOCACHE is not true + * there is no uio structure or it doesn't target USERSPACE + */ + return (cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags)); + } + +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ - if (vp->v_flag & VHASBEENPAGED) - { - /* - * this vnode had pages cleaned to it by - * the pager which indicates that either - * it's not very 'hot', or the system is - * being overwhelmed by a lot of dirty - * data being delayed in the VM cache... - * in either event, we'll push our remaining - * delayed data at this point... this will - * be more efficient than paging out 1 page at - * a time, and will also act as a throttle - * by delaying this client from writing any - * more data until all his delayed data has - * at least been queued to the uderlying driver. - */ - cluster_push(vp); - - vp->v_flag &= ~VHASBEENPAGED; - } - - if ( (!(vp->v_flag & VNOCACHE_DATA)) || (!uio) || (uio->uio_segflg != UIO_USERSPACE)) - { - /* - * go do a write through the cache if one of the following is true.... - * NOCACHE is not true - * there is no uio structure or it doesn't target USERSPACE - */ - return (cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)); - } - - while (uio->uio_resid && uio->uio_offset < newEOF && retval == 0) - { - /* - * we know we have a resid, so this is safe - * skip over any emtpy vectors - */ - iov = uio->uio_iov; - - while (iov->iov_len == 0) { - uio->uio_iov++; - uio->uio_iovcnt--; - iov = uio->uio_iov; - } - upl_size = PAGE_SIZE; - upl_flags = UPL_QUERY_OBJECT_TYPE; - - if ((vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, - &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) - { + while (uio_resid(uio) && uio->uio_offset < newEOF && retval == 0) { + u_int64_t iov_len; + u_int64_t iov_base; + /* - * the user app must have passed in an invalid address + * we know we have a resid, so this is safe + * skip over any emtpy vectors */ - return (EFAULT); - } - - /* - * We check every vector target but if it is physically - * contiguous space, we skip the sanity checks. - */ - if (upl_flags & UPL_PHYS_CONTIG) - { - if (flags & IO_HEADZEROFILL) - { - flags &= ~IO_HEADZEROFILL; - - if (retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, devblocksize, IO_HEADZEROFILL)) - return(retval); - } + iov_len = uio_iov_len(uio); - retval = cluster_phys_write(vp, uio, newEOF, devblocksize, flags); + while (iov_len == 0) { + uio_next_iov(uio); + uio->uio_iovcnt--; + iov_len = uio_iov_len(uio); + } + iov_base = uio_iov_base(uio); + + upl_size = PAGE_SIZE; + upl_flags = UPL_QUERY_OBJECT_TYPE; + + // LP64todo - fix this! + if ((vm_map_get_upl(current_map(), + CAST_DOWN(vm_offset_t, iov_base) & ~PAGE_MASK, + &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) { + /* + * the user app must have passed in an invalid address + */ + return (EFAULT); + } - if (uio->uio_resid == 0 && (flags & IO_TAILZEROFILL)) - { - return (cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, devblocksize, IO_HEADZEROFILL)); - } - } - else if ((uio->uio_resid < PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL))) - { /* - * we're here because we're don't have a physically contiguous target buffer - * go do a write through the cache if one of the following is true.... - * the total xfer size is less than a page... - * we're being asked to ZEROFILL either the head or the tail of the I/O... + * We check every vector target but if it is physically + * contiguous space, we skip the sanity checks. */ - return (cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags)); - } - else if (((int)uio->uio_offset & PAGE_MASK) || ((int)iov->iov_base & PAGE_MASK)) - { - if (((int)uio->uio_offset & PAGE_MASK) == ((int)iov->iov_base & PAGE_MASK)) - { - /* - * Bring the file offset write up to a pagesize boundary - * this will also bring the base address to a page boundary - * since they both are currently on the same offset within a page - * note: if we get here, uio->uio_resid is greater than PAGE_SIZE - * so the computed clip_size must always be less than the current uio_resid - */ - clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64)); - - /* - * Fake the resid going into the cluster_write_x call - * and restore it on the way out. - */ - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - else - { - /* - * can't get both the file offset and the buffer offset aligned to a page boundary - * so fire an I/O through the cache for this entire vector - */ - clip_size = iov->iov_len; - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - } - else - { - /* - * If we come in here, we know the offset into - * the file is on a pagesize boundary and the - * target buffer address is also on a page boundary - */ - max_io_size = newEOF - uio->uio_offset; - clip_size = uio->uio_resid; - if (iov->iov_len < clip_size) - clip_size = iov->iov_len; - if (max_io_size < clip_size) - clip_size = max_io_size; - - if (clip_size < PAGE_SIZE) - { - /* - * Take care of tail end of write in this vector - */ - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - else - { - /* round clip_size down to a multiple of pagesize */ - clip_size = clip_size & ~(PAGE_MASK); - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags); - if ((retval == 0) && uio->uio_resid) - retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - } /* end else */ - } /* end while */ + if (upl_flags & UPL_PHYS_CONTIG) { + int zflags; + + zflags = flags & ~IO_TAILZEROFILL; + zflags |= IO_HEADZEROFILL; + + if (flags & IO_HEADZEROFILL) { + /* + * in case we have additional vectors, we don't want to do this again + */ + flags &= ~IO_HEADZEROFILL; + + if ((retval = cluster_write_x(vp, (struct uio *)0, 0, uio->uio_offset, headOff, 0, zflags))) + return(retval); + } + retval = cluster_phys_write(vp, uio, newEOF); + + if (uio_resid(uio) == 0 && (flags & IO_TAILZEROFILL)) { + return (cluster_write_x(vp, (struct uio *)0, 0, tailOff, uio->uio_offset, 0, zflags)); + } + } + else if ((uio_resid(uio) < PAGE_SIZE) || (flags & (IO_TAILZEROFILL | IO_HEADZEROFILL))) { + /* + * we're here because we're don't have a physically contiguous target buffer + * go do a write through the cache if one of the following is true.... + * the total xfer size is less than a page... + * we're being asked to ZEROFILL either the head or the tail of the I/O... + */ + return (cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags)); + } + // LP64todo - fix this! + else if (((int)uio->uio_offset & PAGE_MASK) || (CAST_DOWN(int, iov_base) & PAGE_MASK)) { + if (((int)uio->uio_offset & PAGE_MASK) == (CAST_DOWN(int, iov_base) & PAGE_MASK)) { + /* + * Bring the file offset write up to a pagesize boundary + * this will also bring the base address to a page boundary + * since they both are currently on the same offset within a page + * note: if we get here, uio->uio_resid is greater than PAGE_SIZE + * so the computed clip_size must always be less than the current uio_resid + */ + clip_size = (PAGE_SIZE - (uio->uio_offset & PAGE_MASK_64)); + + /* + * Fake the resid going into the cluster_write_x call + * and restore it on the way out. + */ + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } else { + /* + * can't get both the file offset and the buffer offset aligned to a page boundary + * so fire an I/O through the cache for this entire vector + */ + // LP64todo - fix this + clip_size = iov_len; + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } + } else { + /* + * If we come in here, we know the offset into + * the file is on a pagesize boundary and the + * target buffer address is also on a page boundary + */ + max_io_size = newEOF - uio->uio_offset; + // LP64todo - fix this + clip_size = uio_resid(uio); + if (iov_len < clip_size) + // LP64todo - fix this! + clip_size = iov_len; + if (max_io_size < clip_size) + clip_size = max_io_size; + + if (clip_size < PAGE_SIZE) { + /* + * Take care of tail end of write in this vector + */ + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } else { + /* round clip_size down to a multiple of pagesize */ + clip_size = clip_size & ~(PAGE_MASK); + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_nocopy_write(vp, uio, newEOF); + + if ((retval == 0) && uio_resid(uio)) + retval = cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } + } /* end else */ + } /* end while */ + return(retval); } static int -cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t newEOF; - int devblocksize; - int flags; +cluster_nocopy_write(vnode_t vp, struct uio *uio, off_t newEOF) { upl_t upl; upl_page_info_t *pl; - off_t upl_f_offset; vm_offset_t upl_offset; - off_t max_io_size; int io_size; int io_flag; int upl_size; @@ -1299,15 +1545,16 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) int pages_in_pl; int upl_flags; kern_return_t kret; - struct iovec *iov; int i; int force_data_sync; int error = 0; struct clios iostate; + struct cl_writebehind *wbp; + struct iovec *iov; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START, - (int)uio->uio_offset, (int)uio->uio_resid, - (int)newEOF, devblocksize, 0); + (int)uio->uio_offset, (int)uio_resid(uio), + (int)newEOF, 0, 0); /* * When we enter this routine, we know @@ -1315,8 +1562,13 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) * -- the resid is a page multiple * -- the resid will not exceed iov_len */ - cluster_try_push(vp, newEOF, 0, 1); + + if ((wbp = cluster_get_wbp(vp, CLW_RETURNLOCKED)) != NULL) { + cluster_try_push(wbp, vp, newEOF, 0, 1); + + lck_mtx_unlock(&wbp->cl_lockw); + } iostate.io_completed = 0; iostate.io_issued = 0; iostate.io_error = 0; @@ -1324,13 +1576,15 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) iov = uio->uio_iov; - while (uio->uio_resid && uio->uio_offset < newEOF && error == 0) { - io_size = uio->uio_resid; + while (uio_resid(uio) && uio->uio_offset < newEOF && error == 0) { + io_size = uio_resid(uio); if (io_size > (MAX_UPL_TRANSFER * PAGE_SIZE)) io_size = MAX_UPL_TRANSFER * PAGE_SIZE; - upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK; + // LP64todo - fix this! + upl_offset = CAST_DOWN(vm_offset_t, iov->iov_base) & PAGE_MASK; + upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 76)) | DBG_FUNC_START, @@ -1342,8 +1596,9 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; + // LP64todo - fix this! kret = vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, + CAST_DOWN(vm_offset_t, iov->iov_base) & ~PAGE_MASK, &upl_size, &upl, NULL, @@ -1427,10 +1682,14 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) * if there are already too many outstanding writes * wait until some complete before issuing the next */ + lck_mtx_lock(cl_mtxp); + while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_write", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_nocopy_write", 0); } + lck_mtx_unlock(cl_mtxp); + if (iostate.io_error) { /* * one of the earlier writes we issued ran into a hard error @@ -1450,15 +1709,15 @@ cluster_nocopy_write(vp, uio, newEOF, devblocksize, flags) (int)upl_offset, (int)uio->uio_offset, io_size, io_flag, 0); error = cluster_io(vp, upl, upl_offset, uio->uio_offset, - io_size, devblocksize, io_flag, (struct buf *)0, &iostate); + io_size, io_flag, (buf_t)NULL, &iostate); iov->iov_len -= io_size; - iov->iov_base += io_size; - uio->uio_resid -= io_size; + ((u_int32_t)iov->iov_base) += io_size; + uio_setresid(uio, (uio_resid(uio) - io_size)); uio->uio_offset += io_size; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END, - (int)upl_offset, (int)uio->uio_offset, (int)uio->uio_resid, error, 0); + (int)upl_offset, (int)uio->uio_offset, (int)uio_resid(uio), error, 0); } /* end while */ @@ -1467,10 +1726,14 @@ wait_for_writes: * make sure all async writes issued as part of this stream * have completed before we return */ + lck_mtx_lock(cl_mtxp); + while (iostate.io_issued != iostate.io_completed) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_write", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_nocopy_write", 0); } + lck_mtx_unlock(cl_mtxp); + if (iostate.io_error) error = iostate.io_error; @@ -1482,12 +1745,7 @@ wait_for_writes: static int -cluster_phys_write(vp, uio, newEOF, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t newEOF; - int devblocksize; - int flags; +cluster_phys_write(vnode_t vp, struct uio *uio, off_t newEOF) { upl_page_info_t *pl; addr64_t src_paddr; @@ -1500,19 +1758,33 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) int pages_in_pl; int upl_flags; kern_return_t kret; - struct iovec *iov; int error = 0; + u_int64_t iov_base; + int devblocksize; + struct cl_writebehind *wbp; + devblocksize = vp->v_mount->mnt_devblocksize; /* * When we enter this routine, we know * -- the resid will not exceed iov_len * -- the vector target address is physcially contiguous */ - cluster_try_push(vp, newEOF, 0, 1); + if ((wbp = cluster_get_wbp(vp, CLW_RETURNLOCKED)) != NULL) { - iov = uio->uio_iov; - io_size = iov->iov_len; - upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK; + cluster_try_push(wbp, vp, newEOF, 0, 1); + + lck_mtx_unlock(&wbp->cl_lockw); + } +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + // LP64todo - fix this! + io_size = uio_iov_len(uio); + iov_base = uio_iov_base(uio); + upl_offset = CAST_DOWN(upl_offset_t, iov_base) & PAGE_MASK; upl_needed_size = upl_offset + io_size; pages_in_pl = 0; @@ -1520,8 +1792,9 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) upl_flags = UPL_FILE_IO | UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; + // LP64todo - fix this! kret = vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, + CAST_DOWN(upl_offset_t, iov_base) & ~PAGE_MASK, &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0); if (kret != KERN_SUCCESS) { @@ -1536,12 +1809,12 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) * This is a failure in the physical memory case. */ if (upl_size < upl_needed_size) { - kernel_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); + ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); return(EINVAL); } pl = ubc_upl_pageinfo(upl); - src_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + ((addr64_t)((u_int)iov->iov_base & PAGE_MASK)); + src_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + ((addr64_t)(iov_base & PAGE_MASK)); while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) { int head_size; @@ -1551,7 +1824,7 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) if (head_size > io_size) head_size = io_size; - error = cluster_align_phys_io(vp, uio, src_paddr, head_size, devblocksize, 0); + error = cluster_align_phys_io(vp, uio, src_paddr, head_size, 0); if (error) { ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); @@ -1570,21 +1843,21 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) * issue a synchronous write to cluster_io */ error = cluster_io(vp, upl, upl_offset, uio->uio_offset, - io_size, 0, CL_DEV_MEMORY, (struct buf *)0, (struct clios *)0); + io_size, CL_DEV_MEMORY, (buf_t)NULL, (struct clios *)NULL); } if (error == 0) { /* * The cluster_io write completed successfully, * update the uio structure */ - uio->uio_resid -= io_size; - iov->iov_len -= io_size; - iov->iov_base += io_size; + uio_setresid(uio, (uio_resid(uio) - io_size)); + uio_iov_len_add(uio, -io_size); + uio_iov_base_add(uio, io_size); uio->uio_offset += io_size; src_paddr += io_size; if (tail_size) - error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, devblocksize, 0); + error = cluster_align_phys_io(vp, uio, src_paddr, tail_size, 0); } /* * just release our hold on the physically contiguous @@ -1597,55 +1870,71 @@ cluster_phys_write(vp, uio, newEOF, devblocksize, flags) static int -cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t oldEOF; - off_t newEOF; - off_t headOff; - off_t tailOff; - int devblocksize; - int flags; +cluster_write_x(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff, int flags) { upl_page_info_t *pl; upl_t upl; - vm_offset_t upl_offset; + vm_offset_t upl_offset = 0; int upl_size; off_t upl_f_offset; int pages_in_upl; int start_offset; int xfer_resid; int io_size; - int io_flags; int io_offset; int bytes_to_zero; int bytes_to_move; kern_return_t kret; int retval = 0; - int uio_resid; + int io_resid; long long total_size; long long zero_cnt; off_t zero_off; long long zero_cnt1; off_t zero_off1; - daddr_t start_blkno; - daddr_t last_blkno; + struct cl_extent cl; int intersection; + struct cl_writebehind *wbp; + if ((wbp = cluster_get_wbp(vp, 0)) != NULL) + { + if (wbp->cl_hasbeenpaged) { + /* + * this vnode had pages cleaned to it by + * the pager which indicates that either + * it's not very 'hot', or the system is + * being overwhelmed by a lot of dirty + * data being delayed in the VM cache... + * in either event, we'll push our remaining + * delayed data at this point... this will + * be more efficient than paging out 1 page at + * a time, and will also act as a throttle + * by delaying this client from writing any + * more data until all his delayed data has + * at least been queued to the uderlying driver. + */ + if (wbp->cl_number || wbp->cl_scmap) + cluster_push_EOF(vp, newEOF); + wbp->cl_hasbeenpaged = 0; + } + } if (uio) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, (int)oldEOF, (int)newEOF, 0); + (int)uio->uio_offset, uio_resid(uio), (int)oldEOF, (int)newEOF, 0); - uio_resid = uio->uio_resid; + // LP64todo - fix this + io_resid = uio_resid(uio); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START, 0, 0, (int)oldEOF, (int)newEOF, 0); - uio_resid = 0; + io_resid = 0; } zero_cnt = 0; zero_cnt1 = 0; + zero_off = 0; + zero_off1 = 0; if (flags & IO_HEADZEROFILL) { /* @@ -1667,26 +1956,27 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) } if (flags & IO_TAILZEROFILL) { if (uio) { - zero_off1 = uio->uio_offset + uio->uio_resid; + // LP64todo - fix this + zero_off1 = uio->uio_offset + uio_resid(uio); if (zero_off1 < tailOff) zero_cnt1 = tailOff - zero_off1; } } if (zero_cnt == 0 && uio == (struct uio *) 0) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, - retval, 0, 0, 0, 0); - return (0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, + retval, 0, 0, 0, 0); + return (0); } - while ((total_size = (uio_resid + zero_cnt + zero_cnt1)) && retval == 0) { + while ((total_size = (io_resid + zero_cnt + zero_cnt1)) && retval == 0) { /* * for this iteration of the loop, figure out where our starting point is */ if (zero_cnt) { start_offset = (int)(zero_off & PAGE_MASK_64); upl_f_offset = zero_off - start_offset; - } else if (uio_resid) { + } else if (io_resid) { start_offset = (int)(uio->uio_offset & PAGE_MASK_64); upl_f_offset = uio->uio_offset - start_offset; } else { @@ -1699,12 +1989,11 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) if (total_size > (MAX_UPL_TRANSFER * PAGE_SIZE)) total_size = MAX_UPL_TRANSFER * PAGE_SIZE; - start_blkno = (daddr_t)(upl_f_offset / PAGE_SIZE_64); + cl.b_addr = (daddr64_t)(upl_f_offset / PAGE_SIZE_64); - if (uio && !(vp->v_flag & VNOCACHE_DATA) && - (flags & (IO_SYNC | IO_HEADZEROFILL | IO_TAILZEROFILL)) == 0) { + if (uio && ((flags & (IO_NOCACHE | IO_SYNC | IO_HEADZEROFILL | IO_TAILZEROFILL)) == 0)) { /* - * assumption... total_size <= uio_resid + * assumption... total_size <= io_resid * because IO_HEADZEROFILL and IO_TAILZEROFILL not set */ if ((start_offset + total_size) > (MAX_UPL_TRANSFER * PAGE_SIZE)) @@ -1716,7 +2005,7 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) if (retval) break; - uio_resid -= (total_size - xfer_resid); + io_resid -= (total_size - xfer_resid); total_size = xfer_resid; start_offset = (int)(uio->uio_offset & PAGE_MASK_64); upl_f_offset = uio->uio_offset - start_offset; @@ -1760,12 +2049,17 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, io_size, total_size, 0, 0); + /* + * Gather the pages from the buffer cache. + * The UPL_WILL_MODIFY flag lets the UPL subsystem know + * that we intend to modify these pages. + */ kret = ubc_create_upl(vp, - upl_f_offset, - upl_size, - &upl, - &pl, - UPL_SET_LITE); + upl_f_offset, + upl_size, + &upl, + &pl, + UPL_SET_LITE | UPL_WILL_MODIFY); if (kret != KERN_SUCCESS) panic("cluster_write: failed to get pagelist"); @@ -1785,8 +2079,8 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) if ((upl_f_offset + read_size) > newEOF) read_size = newEOF - upl_f_offset; - retval = cluster_io(vp, upl, 0, upl_f_offset, read_size, devblocksize, - CL_READ, (struct buf *)0, (struct clios *)0); + retval = cluster_io(vp, upl, 0, upl_f_offset, read_size, + CL_READ, (buf_t)NULL, (struct clios *)NULL); if (retval) { /* * we had an error during the read which causes us to abort @@ -1795,7 +2089,9 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) * there state and mark the failed page in error */ ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES); - ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); + + if (upl_size > PAGE_SIZE) + ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, (int)upl, 0, 0, retval, 0); @@ -1819,8 +2115,8 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) if ((upl_f_offset + upl_offset + read_size) > newEOF) read_size = newEOF - (upl_f_offset + upl_offset); - retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size, devblocksize, - CL_READ, (struct buf *)0, (struct clios *)0); + retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size, + CL_READ, (buf_t)NULL, (struct clios *)NULL); if (retval) { /* * we had an error during the read which causes us to abort @@ -1829,7 +2125,9 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) * modifying there state and mark the failed page in error */ ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES); - ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); + + if (upl_size > PAGE_SIZE) + ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, (int)upl, 0, 0, retval, 0); @@ -1868,8 +2166,8 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) zero_off += bytes_to_zero; io_offset += bytes_to_zero; } - if (xfer_resid && uio_resid) { - bytes_to_move = min(uio_resid, xfer_resid); + if (xfer_resid && io_resid) { + bytes_to_move = min(io_resid, xfer_resid); retval = cluster_copy_upl_data(uio, upl, io_offset, bytes_to_move); @@ -1880,7 +2178,7 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, (int)upl, 0, 0, retval, 0); } else { - uio_resid -= bytes_to_move; + io_resid -= bytes_to_move; xfer_resid -= bytes_to_move; io_offset += bytes_to_move; } @@ -1936,15 +2234,21 @@ cluster_write_x(vp, uio, oldEOF, newEOF, headOff, tailOff, devblocksize, flags) */ goto issue_io; check_cluster: + /* + * take the lock to protect our accesses + * of the writebehind and sparse cluster state + */ + wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED); + /* * calculate the last logical block number * that this delayed I/O encompassed */ - last_blkno = (upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64; + cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64); - if (vp->v_flag & VHASDIRTY) { + if (wbp->cl_scmap) { - if ( !(vp->v_flag & VNOCACHE_DATA)) { + if ( !(flags & IO_NOCACHE)) { /* * we've fallen into the sparse * cluster method of delaying dirty pages @@ -1958,7 +2262,9 @@ check_cluster: ubc_upl_commit_range(upl, 0, upl_size, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - sparse_cluster_add(vp, newEOF, start_blkno, last_blkno); + sparse_cluster_add(wbp, vp, &cl, newEOF); + + lck_mtx_unlock(&wbp->cl_lockw); continue; } @@ -1980,8 +2286,9 @@ check_cluster: */ upl_size = 0; } - sparse_cluster_push(vp, ubc_getsize(vp), 1); + sparse_cluster_push(wbp, vp, newEOF, 1); + wbp->cl_number = 0; /* * no clusters of either type present at this point * so just go directly to start_new_cluster since @@ -1993,13 +2300,13 @@ check_cluster: } upl_offset = 0; - if (vp->v_clen == 0) + if (wbp->cl_number == 0) /* * no clusters currently present */ goto start_new_cluster; - for (cl_index = 0; cl_index < vp->v_clen; cl_index++) { + for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) { /* * check each cluster that we currently hold * try to merge some or all of this write into @@ -2007,42 +2314,42 @@ check_cluster: * any portion of the write remains, start a * new cluster */ - if (start_blkno >= vp->v_clusters[cl_index].start_pg) { + if (cl.b_addr >= wbp->cl_clusters[cl_index].b_addr) { /* * the current write starts at or after the current cluster */ - if (last_blkno <= (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) { + if (cl.e_addr <= (wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER)) { /* * we have a write that fits entirely * within the existing cluster limits */ - if (last_blkno > vp->v_clusters[cl_index].last_pg) + if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr) /* * update our idea of where the cluster ends */ - vp->v_clusters[cl_index].last_pg = last_blkno; + wbp->cl_clusters[cl_index].e_addr = cl.e_addr; break; } - if (start_blkno < (vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER)) { + if (cl.b_addr < (wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER)) { /* * we have a write that starts in the middle of the current cluster * but extends beyond the cluster's limit... we know this because * of the previous checks * we'll extend the current cluster to the max - * and update the start_blkno for the current write to reflect that + * and update the b_addr for the current write to reflect that * the head of it was absorbed into this cluster... * note that we'll always have a leftover tail in this case since * full absorbtion would have occurred in the clause above */ - vp->v_clusters[cl_index].last_pg = vp->v_clusters[cl_index].start_pg + MAX_UPL_TRANSFER; + wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + MAX_UPL_TRANSFER; if (upl_size) { - int start_pg_in_upl; + daddr64_t start_pg_in_upl; - start_pg_in_upl = upl_f_offset / PAGE_SIZE_64; + start_pg_in_upl = (daddr64_t)(upl_f_offset / PAGE_SIZE_64); - if (start_pg_in_upl < vp->v_clusters[cl_index].last_pg) { - intersection = (vp->v_clusters[cl_index].last_pg - start_pg_in_upl) * PAGE_SIZE; + if (start_pg_in_upl < wbp->cl_clusters[cl_index].e_addr) { + intersection = (int)((wbp->cl_clusters[cl_index].e_addr - start_pg_in_upl) * PAGE_SIZE); ubc_upl_commit_range(upl, upl_offset, intersection, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); @@ -2051,7 +2358,7 @@ check_cluster: upl_size -= intersection; } } - start_blkno = vp->v_clusters[cl_index].last_pg; + cl.b_addr = wbp->cl_clusters[cl_index].e_addr; } /* * we come here for the case where the current write starts @@ -2065,16 +2372,16 @@ check_cluster: /* * the current write starts in front of the cluster we're currently considering */ - if ((vp->v_clusters[cl_index].last_pg - start_blkno) <= MAX_UPL_TRANSFER) { + if ((wbp->cl_clusters[cl_index].e_addr - cl.b_addr) <= MAX_UPL_TRANSFER) { /* * we can just merge the new request into * this cluster and leave it in the cache * since the resulting cluster is still * less than the maximum allowable size */ - vp->v_clusters[cl_index].start_pg = start_blkno; + wbp->cl_clusters[cl_index].b_addr = cl.b_addr; - if (last_blkno > vp->v_clusters[cl_index].last_pg) { + if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr) { /* * the current write completely * envelops the existing cluster and since @@ -2082,7 +2389,7 @@ check_cluster: * we can just use the start and last blocknos of the write * to generate the cluster limits */ - vp->v_clusters[cl_index].last_pg = last_blkno; + wbp->cl_clusters[cl_index].e_addr = cl.e_addr; } break; } @@ -2096,16 +2403,16 @@ check_cluster: * get an intersection with the current write * */ - if (last_blkno > vp->v_clusters[cl_index].last_pg - MAX_UPL_TRANSFER) { + if (cl.e_addr > wbp->cl_clusters[cl_index].e_addr - MAX_UPL_TRANSFER) { /* * the current write extends into the proposed cluster * clip the length of the current write after first combining it's * tail with the newly shaped cluster */ - vp->v_clusters[cl_index].start_pg = vp->v_clusters[cl_index].last_pg - MAX_UPL_TRANSFER; + wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - MAX_UPL_TRANSFER; if (upl_size) { - intersection = (last_blkno - vp->v_clusters[cl_index].start_pg) * PAGE_SIZE; + intersection = (int)((cl.e_addr - wbp->cl_clusters[cl_index].b_addr) * PAGE_SIZE); if (intersection > upl_size) /* @@ -2119,7 +2426,7 @@ check_cluster: UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); upl_size -= intersection; } - last_blkno = vp->v_clusters[cl_index].start_pg; + cl.e_addr = wbp->cl_clusters[cl_index].b_addr; } /* * if we get here, there was no way to merge @@ -2130,14 +2437,14 @@ check_cluster: */ } } - if (cl_index < vp->v_clen) + if (cl_index < wbp->cl_number) /* * we found an existing cluster(s) that we * could entirely merge this I/O into */ goto delay_io; - if (vp->v_clen < MAX_CLUSTERS && !(vp->v_flag & VNOCACHE_DATA)) + if (wbp->cl_number < MAX_CLUSTERS && !(flags & IO_NOCACHE)) /* * we didn't find an existing cluster to * merge into, but there's room to start @@ -2151,16 +2458,23 @@ check_cluster: * pushing one of the existing ones... if none of * them are able to be pushed, we'll switch * to the sparse cluster mechanism - * cluster_try_push updates v_clen to the + * cluster_try_push updates cl_number to the * number of remaining clusters... and * returns the number of currently unused clusters */ - if (vp->v_flag & VNOCACHE_DATA) - can_delay = 0; - else - can_delay = 1; + int ret_cluster_try_push = 0; + /* if writes are not deferred, call cluster push immediately */ + if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE)) { + if (flags & IO_NOCACHE) + can_delay = 0; + else + can_delay = 1; + + ret_cluster_try_push = cluster_try_push(wbp, vp, newEOF, can_delay, 0); + } - if (cluster_try_push(vp, newEOF, can_delay, 0) == 0) { + /* execute following regardless writes are deferred or not */ + if (ret_cluster_try_push == 0) { /* * no more room in the normal cluster mechanism * so let's switch to the more expansive but expensive @@ -2175,8 +2489,10 @@ check_cluster: ubc_upl_commit_range(upl, upl_offset, upl_size, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - sparse_cluster_switch(vp, newEOF); - sparse_cluster_add(vp, newEOF, start_blkno, last_blkno); + sparse_cluster_switch(wbp, vp, newEOF); + sparse_cluster_add(wbp, vp, &cl, newEOF); + + lck_mtx_unlock(&wbp->cl_lockw); continue; } @@ -2189,208 +2505,218 @@ check_cluster: * however, we don't want to push so much out that the write throttle kicks in and * hangs this thread up until some of the I/O completes... */ - while (vp->v_clen && (vp->v_numoutput <= (ASYNC_THROTTLE / 2))) - cluster_try_push(vp, newEOF, 0, 0); + if (!((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE)) { + while (wbp->cl_number && (vp->v_numoutput <= (VNODE_ASYNC_THROTTLE / 2))) + cluster_try_push(wbp, vp, newEOF, 0, 0); + } start_new_cluster: - if (vp->v_clen == 0) - vp->v_ciosiz = devblocksize; - - vp->v_clusters[vp->v_clen].start_pg = start_blkno; - vp->v_clusters[vp->v_clen].last_pg = last_blkno; - vp->v_clen++; + wbp->cl_clusters[wbp->cl_number].b_addr = cl.b_addr; + wbp->cl_clusters[wbp->cl_number].e_addr = cl.e_addr; + if (flags & IO_NOCACHE) + wbp->cl_clusters[wbp->cl_number].io_nocache = 1; + else + wbp->cl_clusters[wbp->cl_number].io_nocache = 0; + wbp->cl_number++; delay_io: if (upl_size) ubc_upl_commit_range(upl, upl_offset, upl_size, UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); + + lck_mtx_unlock(&wbp->cl_lockw); + continue; issue_io: /* + * we don't hold the vnode lock at this point + * + * because we had to ask for a UPL that provides currenty non-present pages, the + * UPL has been automatically set to clear the dirty flags (both software and hardware) + * upon committing it... this is not the behavior we want since it's possible for + * pages currently present as part of a mapped file to be dirtied while the I/O is in flight. * in order to maintain some semblance of coherency with mapped writes - * we need to write the cluster back out as a multiple of the PAGESIZE - * unless the cluster encompasses the last page of the file... in this - * case we'll round out to the nearest device block boundary + * we need to drop the current upl and pick it back up with COPYOUT_FROM set + * so that we correctly deal with a change in state of the hardware modify bit... + * we do this via cluster_push_x... by passing along the IO_SYNC flag, we force + * cluster_push_x to wait until all the I/Os have completed... cluster_push_x is also + * responsible for generating the correct sized I/O(s) */ - io_size = upl_size; - - if ((upl_f_offset + io_size) > newEOF) { - io_size = newEOF - upl_f_offset; - io_size = (io_size + (devblocksize - 1)) & ~(devblocksize - 1); - } - - if (flags & IO_SYNC) - io_flags = CL_THROTTLE | CL_COMMIT | CL_AGE; - else - io_flags = CL_THROTTLE | CL_COMMIT | CL_AGE | CL_ASYNC; + ubc_upl_commit_range(upl, 0, upl_size, + UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY); - if (vp->v_flag & VNOCACHE_DATA) - io_flags |= CL_DUMP; + cl.e_addr = (upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64; - retval = cluster_io(vp, upl, 0, upl_f_offset, io_size, devblocksize, - io_flags, (struct buf *)0, (struct clios *)0); + retval = cluster_push_x(vp, &cl, newEOF, flags); } } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, - retval, 0, uio_resid, 0, 0); + retval, 0, io_resid, 0, 0); return (retval); } int -cluster_read(vp, uio, filesize, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t filesize; - int devblocksize; - int flags; +cluster_read(vnode_t vp, struct uio *uio, off_t filesize, int xflags) { int prev_resid; - int clip_size; + u_int clip_size; off_t max_io_size; - struct iovec *iov; int upl_size; int upl_flags; upl_t upl; int retval = 0; + int flags; + flags = xflags; - if (!((vp->v_flag & VNOCACHE_DATA) && (uio->uio_segflg == UIO_USERSPACE))) - { - /* - * go do a read through the cache if one of the following is true.... - * NOCACHE is not true - * the uio request doesn't target USERSPACE - */ - return (cluster_read_x(vp, uio, filesize, devblocksize, flags)); - } - - while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) - { - /* - * we know we have a resid, so this is safe - * skip over any emtpy vectors - */ - iov = uio->uio_iov; - - while (iov->iov_len == 0) { - uio->uio_iov++; - uio->uio_iovcnt--; - iov = uio->uio_iov; - } - upl_size = PAGE_SIZE; - upl_flags = UPL_QUERY_OBJECT_TYPE; - - if ((vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, - &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) - { - /* - * the user app must have passed in an invalid address + if (vp->v_flag & VNOCACHE_DATA) + flags |= IO_NOCACHE; + if (vp->v_flag & VRAOFF) + flags |= IO_RAOFF; + + if (!((flags & IO_NOCACHE) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg))) { + /* + * go do a read through the cache if one of the following is true.... + * NOCACHE is not true + * the uio request doesn't target USERSPACE */ - return (EFAULT); - } - - /* - * We check every vector target but if it is physically - * contiguous space, we skip the sanity checks. - */ - if (upl_flags & UPL_PHYS_CONTIG) - { - retval = cluster_phys_read(vp, uio, filesize, devblocksize, flags); - } - else if (uio->uio_resid < PAGE_SIZE) - { + return (cluster_read_x(vp, uio, filesize, flags)); + } + +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + while (uio_resid(uio) && uio->uio_offset < filesize && retval == 0) { + u_int64_t iov_len; + u_int64_t iov_base; + /* - * we're here because we're don't have a physically contiguous target buffer - * go do a read through the cache if - * the total xfer size is less than a page... + * we know we have a resid, so this is safe + * skip over any emtpy vectors */ - return (cluster_read_x(vp, uio, filesize, devblocksize, flags)); - } - else if (((int)uio->uio_offset & PAGE_MASK) || ((int)iov->iov_base & PAGE_MASK)) - { - if (((int)uio->uio_offset & PAGE_MASK) == ((int)iov->iov_base & PAGE_MASK)) - { - /* - * Bring the file offset read up to a pagesize boundary - * this will also bring the base address to a page boundary - * since they both are currently on the same offset within a page - * note: if we get here, uio->uio_resid is greater than PAGE_SIZE - * so the computed clip_size must always be less than the current uio_resid - */ - clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64)); - - /* - * Fake the resid going into the cluster_read_x call - * and restore it on the way out. - */ - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_read_x(vp, uio, filesize, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - else - { - /* - * can't get both the file offset and the buffer offset aligned to a page boundary - * so fire an I/O through the cache for this entire vector - */ - clip_size = iov->iov_len; - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_read_x(vp, uio, filesize, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - } - else - { - /* - * If we come in here, we know the offset into - * the file is on a pagesize boundary + iov_len = uio_iov_len(uio); + + while (iov_len == 0) { + uio_next_iov(uio); + uio->uio_iovcnt--; + iov_len = uio_iov_len(uio); + } + iov_base = uio_iov_base(uio); + upl_size = PAGE_SIZE; + upl_flags = UPL_QUERY_OBJECT_TYPE; + + // LP64todo - fix this! + if ((vm_map_get_upl(current_map(), + CAST_DOWN(vm_offset_t, iov_base) & ~PAGE_MASK, + &upl_size, &upl, NULL, NULL, &upl_flags, 0)) != KERN_SUCCESS) { + /* + * the user app must have passed in an invalid address + */ + return (EFAULT); + } + + /* + * We check every vector target but if it is physically + * contiguous space, we skip the sanity checks. */ + if (upl_flags & UPL_PHYS_CONTIG) { + retval = cluster_phys_read(vp, uio, filesize); + } + else if (uio_resid(uio) < PAGE_SIZE) { + /* + * we're here because we're don't have a physically contiguous target buffer + * go do a read through the cache if + * the total xfer size is less than a page... + */ + return (cluster_read_x(vp, uio, filesize, flags)); + } + // LP64todo - fix this! + else if (((int)uio->uio_offset & PAGE_MASK) || (CAST_DOWN(int, iov_base) & PAGE_MASK)) { + if (((int)uio->uio_offset & PAGE_MASK) == (CAST_DOWN(int, iov_base) & PAGE_MASK)) { + /* + * Bring the file offset read up to a pagesize boundary + * this will also bring the base address to a page boundary + * since they both are currently on the same offset within a page + * note: if we get here, uio->uio_resid is greater than PAGE_SIZE + * so the computed clip_size must always be less than the current uio_resid + */ + clip_size = (PAGE_SIZE - (int)(uio->uio_offset & PAGE_MASK_64)); + + /* + * Fake the resid going into the cluster_read_x call + * and restore it on the way out. + */ + prev_resid = uio_resid(uio); + // LP64todo - fix this + uio_setresid(uio, clip_size); + + retval = cluster_read_x(vp, uio, filesize, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } else { + /* + * can't get both the file offset and the buffer offset aligned to a page boundary + * so fire an I/O through the cache for this entire vector + */ + // LP64todo - fix this! + clip_size = iov_len; + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_read_x(vp, uio, filesize, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } + } else { + /* + * If we come in here, we know the offset into + * the file is on a pagesize boundary + */ + max_io_size = filesize - uio->uio_offset; + // LP64todo - fix this + clip_size = uio_resid(uio); + if (iov_len < clip_size) + clip_size = iov_len; + if (max_io_size < clip_size) + clip_size = (int)max_io_size; + + if (clip_size < PAGE_SIZE) { + /* + * Take care of the tail end of the read in this vector. + */ + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); - max_io_size = filesize - uio->uio_offset; - clip_size = uio->uio_resid; - if (iov->iov_len < clip_size) - clip_size = iov->iov_len; - if (max_io_size < clip_size) - clip_size = (int)max_io_size; - - if (clip_size < PAGE_SIZE) - { - /* - * Take care of the tail end of the read in this vector. - */ - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_read_x(vp, uio, filesize, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - else - { - /* round clip_size down to a multiple of pagesize */ - clip_size = clip_size & ~(PAGE_MASK); - prev_resid = uio->uio_resid; - uio->uio_resid = clip_size; - retval = cluster_nocopy_read(vp, uio, filesize, devblocksize, flags); - if ((retval==0) && uio->uio_resid) - retval = cluster_read_x(vp, uio, filesize, devblocksize, flags); - uio->uio_resid = prev_resid - (clip_size - uio->uio_resid); - } - } /* end else */ - } /* end while */ + retval = cluster_read_x(vp, uio, filesize, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } else { + /* round clip_size down to a multiple of pagesize */ + clip_size = clip_size & ~(PAGE_MASK); + // LP64todo - fix this + prev_resid = uio_resid(uio); + uio_setresid(uio, clip_size); + + retval = cluster_nocopy_read(vp, uio, filesize); + + if ((retval==0) && uio_resid(uio)) + retval = cluster_read_x(vp, uio, filesize, flags); + + uio_setresid(uio, prev_resid - (clip_size - uio_resid(uio))); + } + } /* end else */ + } /* end while */ return(retval); } static int -cluster_read_x(vp, uio, filesize, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t filesize; - int devblocksize; - int flags; +cluster_read_x(vnode_t vp, struct uio *uio, off_t filesize, int flags) { upl_page_info_t *pl; upl_t upl; @@ -2400,44 +2726,49 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) int start_offset; int start_pg; int last_pg; - int uio_last; + int uio_last = 0; int pages_in_upl; off_t max_size; off_t last_ioread_offset; off_t last_request_offset; u_int size_of_prefetch; - int io_size; + u_int io_size; kern_return_t kret; int error = 0; int retval = 0; - u_int b_lblkno; - u_int e_lblkno; - struct clios iostate; u_int max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE; u_int rd_ahead_enabled = 1; u_int prefetch_enabled = 1; - + struct cl_readahead * rap; + struct clios iostate; + struct cl_extent extent; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0); + (int)uio->uio_offset, uio_resid(uio), (int)filesize, 0, 0); + + // LP64todo - fix this + last_request_offset = uio->uio_offset + uio_resid(uio); - if (cluster_hard_throttle_on(vp)) { + if ((flags & (IO_RAOFF|IO_NOCACHE)) || + ((last_request_offset & ~PAGE_MASK_64) == (uio->uio_offset & ~PAGE_MASK_64))) { rd_ahead_enabled = 0; - prefetch_enabled = 0; + rap = NULL; + } else { + if (cluster_hard_throttle_on(vp)) { + rd_ahead_enabled = 0; + prefetch_enabled = 0; - max_rd_size = HARD_THROTTLE_MAXSIZE; + max_rd_size = HARD_THROTTLE_MAXSIZE; + } + if ((rap = cluster_get_rap(vp)) == NULL) + rd_ahead_enabled = 0; } - if (vp->v_flag & (VRAOFF|VNOCACHE_DATA)) - rd_ahead_enabled = 0; - - last_request_offset = uio->uio_offset + uio->uio_resid; - if (last_request_offset > filesize) last_request_offset = filesize; - b_lblkno = (u_int)(uio->uio_offset / PAGE_SIZE_64); - e_lblkno = (u_int)((last_request_offset - 1) / PAGE_SIZE_64); + extent.b_addr = uio->uio_offset / PAGE_SIZE_64; + extent.e_addr = (last_request_offset - 1) / PAGE_SIZE_64; - if (vp->v_ralen && (vp->v_lastr == b_lblkno || (vp->v_lastr + 1) == b_lblkno)) { + if (rap != NULL && rap->cl_ralen && (rap->cl_lastr == extent.b_addr || (rap->cl_lastr + 1) == extent.b_addr)) { /* * determine if we already have a read-ahead in the pipe courtesy of the * last read systemcall that was issued... @@ -2445,7 +2776,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) * with respect to any read-ahead that might be necessary to * garner all the data needed to complete this read systemcall */ - last_ioread_offset = (vp->v_maxra * PAGE_SIZE_64) + PAGE_SIZE_64; + last_ioread_offset = (rap->cl_maxra * PAGE_SIZE_64) + PAGE_SIZE_64; if (last_ioread_offset < uio->uio_offset) last_ioread_offset = (off_t)0; @@ -2454,7 +2785,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) } else last_ioread_offset = (off_t)0; - while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) { + while (uio_resid(uio) && uio->uio_offset < filesize && retval == 0) { /* * compute the size of the upl needed to encompass * the requested read... limit each call to cluster_io @@ -2467,12 +2798,13 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) upl_f_offset = uio->uio_offset - (off_t)start_offset; max_size = filesize - uio->uio_offset; - if ((off_t)((unsigned int)uio->uio_resid) < max_size) - io_size = uio->uio_resid; + // LP64todo - fix this! + if ((off_t)((unsigned int)uio_resid(uio)) < max_size) + io_size = uio_resid(uio); else io_size = max_size; - if (!(vp->v_flag & VNOCACHE_DATA)) { + if (!(flags & IO_NOCACHE)) { while (io_size) { u_int io_resid; @@ -2497,7 +2829,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) if (size_of_prefetch > max_rd_size) size_of_prefetch = max_rd_size; - size_of_prefetch = cluster_rd_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, devblocksize); + size_of_prefetch = cluster_rd_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize); last_ioread_offset += (off_t)(size_of_prefetch * PAGE_SIZE); @@ -2534,16 +2866,17 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) * we're already finished the I/O for this read request * let's see if we should do a read-ahead */ - cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize); + cluster_rd_ahead(vp, &extent, filesize, rap); } } if (retval) break; if (io_size == 0) { - if (e_lblkno < vp->v_lastr) - vp->v_maxra = 0; - vp->v_lastr = e_lblkno; - + if (rap != NULL) { + if (extent.e_addr < rap->cl_lastr) + rap->cl_maxra = 0; + rap->cl_lastr = extent.e_addr; + } break; } start_offset = (int)(uio->uio_offset & PAGE_MASK_64); @@ -2563,11 +2896,11 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); kret = ubc_create_upl(vp, - upl_f_offset, - upl_size, - &upl, - &pl, - UPL_SET_LITE); + upl_f_offset, + upl_size, + &upl, + &pl, + UPL_SET_LITE); if (kret != KERN_SUCCESS) panic("cluster_read: failed to get pagelist"); @@ -2618,7 +2951,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) */ error = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, - io_size, devblocksize, CL_READ | CL_ASYNC, (struct buf *)0, &iostate); + io_size, CL_READ | CL_ASYNC, (buf_t)NULL, &iostate); } if (error == 0) { /* @@ -2643,8 +2976,9 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) if (val_size > max_size) val_size = max_size; - if (val_size > uio->uio_resid) - val_size = uio->uio_resid; + if (val_size > uio_resid(uio)) + // LP64todo - fix this + val_size = uio_resid(uio); if (last_ioread_offset == 0) last_ioread_offset = uio->uio_offset + val_size; @@ -2656,7 +2990,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) * pre-fetch I/O... the I/O latency will overlap * with the copying of the data */ - size_of_prefetch = cluster_rd_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize, devblocksize); + size_of_prefetch = cluster_rd_prefetch(vp, last_ioread_offset, size_of_prefetch, filesize); last_ioread_offset += (off_t)(size_of_prefetch * PAGE_SIZE); @@ -2671,16 +3005,22 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) * explicitly disabled it */ if (rd_ahead_enabled) - cluster_rd_ahead(vp, b_lblkno, e_lblkno, filesize, devblocksize); - - if (e_lblkno < vp->v_lastr) - vp->v_maxra = 0; - vp->v_lastr = e_lblkno; + cluster_rd_ahead(vp, &extent, filesize, rap); + + if (rap != NULL) { + if (extent.e_addr < rap->cl_lastr) + rap->cl_maxra = 0; + rap->cl_lastr = extent.e_addr; + } } + lck_mtx_lock(cl_mtxp); + while (iostate.io_issued != iostate.io_completed) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_read_x", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_x", 0); } + lck_mtx_unlock(cl_mtxp); + if (iostate.io_error) error = iostate.io_error; else @@ -2697,7 +3037,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, (int)upl, start_pg * PAGE_SIZE, io_size, error, 0); - if (error || (vp->v_flag & VNOCACHE_DATA)) + if (error || (flags & IO_NOCACHE)) ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); else @@ -2737,7 +3077,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) if (upl_dirty_page(pl, cur_pg)) commit_flags |= UPL_COMMIT_SET_DIRTY; - if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA)) + if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (flags & IO_NOCACHE)) ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); else @@ -2758,7 +3098,7 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) if (upl_dirty_page(pl, cur_pg)) commit_flags |= UPL_COMMIT_SET_DIRTY; - if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (vp->v_flag & VNOCACHE_DATA)) + if ( !(commit_flags & UPL_COMMIT_SET_DIRTY) && (flags & IO_NOCACHE)) ubc_upl_abort_range(upl, cur_pg * PAGE_SIZE, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); else @@ -2782,21 +3122,38 @@ cluster_read_x(vp, uio, filesize, devblocksize, flags) } if (retval == 0) retval = error; + + if ( uio_resid(uio) ) { + if (cluster_hard_throttle_on(vp)) { + rd_ahead_enabled = 0; + prefetch_enabled = 0; + + max_rd_size = HARD_THROTTLE_MAXSIZE; + } else { + if (rap != NULL) + rd_ahead_enabled = 1; + prefetch_enabled = 1; + + max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE; + } + } + } + if (rap != NULL) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END, + (int)uio->uio_offset, uio_resid(uio), rap->cl_lastr, retval, 0); + + lck_mtx_unlock(&rap->cl_lockr); + } else { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END, + (int)uio->uio_offset, uio_resid(uio), 0, retval, 0); } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, vp->v_lastr, retval, 0); return (retval); } static int -cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t filesize; - int devblocksize; - int flags; +cluster_nocopy_read(vnode_t vp, struct uio *uio, off_t filesize) { upl_t upl; upl_page_info_t *pl; @@ -2812,13 +3169,15 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) int i; int force_data_sync; int retval = 0; + int no_zero_fill = 0; + int abort_flag = 0; struct clios iostate; u_int max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE; u_int max_rd_ahead = MAX_UPL_TRANSFER * PAGE_SIZE * 2; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, (int)filesize, devblocksize, 0); + (int)uio->uio_offset, uio_resid(uio), (int)filesize, 0, 0); /* * When we enter this routine, we know @@ -2834,18 +3193,22 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) iov = uio->uio_iov; - if (cluster_hard_throttle_on(vp)) { - max_rd_size = HARD_THROTTLE_MAXSIZE; - max_rd_ahead = HARD_THROTTLE_MAXSIZE - 1; - } - while (uio->uio_resid && uio->uio_offset < filesize && retval == 0) { + while (uio_resid(uio) && uio->uio_offset < filesize && retval == 0) { + if (cluster_hard_throttle_on(vp)) { + max_rd_size = HARD_THROTTLE_MAXSIZE; + max_rd_ahead = HARD_THROTTLE_MAXSIZE - 1; + } else { + max_rd_size = MAX_UPL_TRANSFER * PAGE_SIZE; + max_rd_ahead = MAX_UPL_TRANSFER * PAGE_SIZE * 2; + } max_io_size = filesize - uio->uio_offset; - if (max_io_size < (off_t)((unsigned int)uio->uio_resid)) + // LP64todo - fix this + if (max_io_size < (off_t)((unsigned int)uio_resid(uio))) io_size = max_io_size; else - io_size = uio->uio_resid; + io_size = uio_resid(uio); /* * First look for pages already in the cache @@ -2889,20 +3252,34 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) */ goto wait_for_reads; - upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK; + // LP64todo - fix this! + upl_offset = CAST_DOWN(vm_offset_t, iov->iov_base) & PAGE_MASK; upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START, (int)upl_offset, upl_needed_size, (int)iov->iov_base, io_size, 0); + if (upl_offset == 0 && ((io_size & PAGE_MASK) == 0)) { + no_zero_fill = 1; + abort_flag = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY; + } else { + no_zero_fill = 0; + abort_flag = UPL_ABORT_FREE_ON_EMPTY; + } for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) { pages_in_pl = 0; upl_size = upl_needed_size; upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; - kret = vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, - &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, force_data_sync); + if (no_zero_fill) + upl_flags |= UPL_NOZEROFILL; + if (force_data_sync) + upl_flags |= UPL_FORCE_DATA_SYNC; + + // LP64todo - fix this! + kret = vm_map_create_upl(current_map(), + (vm_map_offset_t)(CAST_DOWN(vm_offset_t, iov->iov_base) & ~PAGE_MASK), + &upl_size, &upl, NULL, &pages_in_pl, &upl_flags); if (kret != KERN_SUCCESS) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, @@ -2926,8 +3303,7 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) if (i == pages_in_pl) break; - ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, - UPL_ABORT_FREE_ON_EMPTY); + ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, abort_flag); } if (force_data_sync >= 3) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, @@ -2942,8 +3318,7 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) io_size = (upl_size - (int)upl_offset) & ~PAGE_MASK; if (io_size == 0) { - ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, - UPL_ABORT_FREE_ON_EMPTY); + ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, abort_flag); goto wait_for_reads; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, @@ -2955,10 +3330,14 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) * if there are already too many outstanding reads * wait until some have completed before issuing the next read */ + lck_mtx_lock(cl_mtxp); + while ((iostate.io_issued - iostate.io_completed) > max_rd_ahead) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_read", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_nocopy_read", 0); } + lck_mtx_unlock(cl_mtxp); + if (iostate.io_error) { /* * one of the earlier reads we issued ran into a hard error @@ -2967,29 +3346,27 @@ cluster_nocopy_read(vp, uio, filesize, devblocksize, flags) * go wait for any other reads to complete before * returning the error to the caller */ - ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, - UPL_ABORT_FREE_ON_EMPTY); + ubc_upl_abort_range(upl, (upl_offset & ~PAGE_MASK), upl_size, abort_flag); goto wait_for_reads; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START, (int)upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0); - retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, - io_size, devblocksize, + retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, CL_PRESERVE | CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO, - (struct buf *)0, &iostate); + (buf_t)NULL, &iostate); /* * update the uio structure */ - iov->iov_base += io_size; + ((u_int32_t)iov->iov_base) += io_size; iov->iov_len -= io_size; - uio->uio_resid -= io_size; + uio_setresid(uio, (uio_resid(uio) - io_size)); uio->uio_offset += io_size; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END, - (int)upl, (int)uio->uio_offset, (int)uio->uio_resid, retval, 0); + (int)upl, (int)uio->uio_offset, (int)uio_resid(uio), retval, 0); } /* end while */ @@ -2998,60 +3375,77 @@ wait_for_reads: * make sure all async reads that are part of this stream * have completed before we return */ + lck_mtx_lock(cl_mtxp); + while (iostate.io_issued != iostate.io_completed) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_nocopy_read", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_nocopy_read", 0); } + lck_mtx_unlock(cl_mtxp); + if (iostate.io_error) retval = iostate.io_error; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_END, - (int)uio->uio_offset, (int)uio->uio_resid, 6, retval, 0); + (int)uio->uio_offset, (int)uio_resid(uio), 6, retval, 0); return (retval); } static int -cluster_phys_read(vp, uio, filesize, devblocksize, flags) - struct vnode *vp; - struct uio *uio; - off_t filesize; - int devblocksize; - int flags; +cluster_phys_read(vnode_t vp, struct uio *uio, off_t filesize) { upl_page_info_t *pl; upl_t upl; vm_offset_t upl_offset; addr64_t dst_paddr; off_t max_size; - int io_size; +#if LP64KERN + int64_t io_size; + u_int64_t iov_len; + u_int64_t iov_base; +#else + int io_size; + uint iov_len; + uint iov_base; +#endif int tail_size; int upl_size; int upl_needed_size; int pages_in_pl; int upl_flags; kern_return_t kret; - struct iovec *iov; struct clios iostate; int error; + int devblocksize; + devblocksize = vp->v_mount->mnt_devblocksize; /* * When we enter this routine, we know * -- the resid will not exceed iov_len * -- the target address is physically contiguous */ - iov = uio->uio_iov; +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + iov_len = uio_iov_len(uio); + iov_base = uio_iov_base(uio); max_size = filesize - uio->uio_offset; - if (max_size > (off_t)((unsigned int)iov->iov_len)) - io_size = iov->iov_len; + // LP64todo - fix this! + if (max_size < 0 || (u_int64_t)max_size > iov_len) + io_size = iov_len; else io_size = max_size; - upl_offset = (vm_offset_t)iov->iov_base & PAGE_MASK; + // LP64todo - fix this! + upl_offset = CAST_DOWN(vm_offset_t, iov_base) & PAGE_MASK; upl_needed_size = upl_offset + io_size; error = 0; @@ -3060,7 +3454,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) upl_flags = UPL_FILE_IO | UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE; kret = vm_map_get_upl(current_map(), - (vm_offset_t)iov->iov_base & ~PAGE_MASK, + CAST_DOWN(vm_offset_t, iov_base) & ~PAGE_MASK, &upl_size, &upl, NULL, &pages_in_pl, &upl_flags, 0); if (kret != KERN_SUCCESS) { @@ -3079,7 +3473,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) } pl = ubc_upl_pageinfo(upl); - dst_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + ((addr64_t)((u_int)iov->iov_base & PAGE_MASK)); + dst_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + ((addr64_t)(iov_base & PAGE_MASK)); while (((uio->uio_offset & (devblocksize - 1)) || io_size < devblocksize) && io_size) { int head_size; @@ -3089,7 +3483,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) if (head_size > io_size) head_size = io_size; - error = cluster_align_phys_io(vp, uio, dst_paddr, head_size, devblocksize, CL_READ); + error = cluster_align_phys_io(vp, uio, dst_paddr, head_size, CL_READ); if (error) { ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); @@ -3123,22 +3517,25 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) * if there are already too many outstanding reads * wait until some have completed before issuing the next */ + lck_mtx_lock(cl_mtxp); + while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_UPL_TRANSFER * PAGE_SIZE)) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_phys_read", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_phys_read", 0); } + lck_mtx_unlock(cl_mtxp); - error = cluster_io(vp, upl, upl_offset, uio->uio_offset, xsize, 0, + error = cluster_io(vp, upl, upl_offset, uio->uio_offset, xsize, CL_READ | CL_NOZERO | CL_DEV_MEMORY | CL_ASYNC, - (struct buf *)0, &iostate); + (buf_t)NULL, &iostate); /* * The cluster_io read was issued successfully, * update the uio structure */ if (error == 0) { - uio->uio_resid -= xsize; - iov->iov_len -= xsize; - iov->iov_base += xsize; + uio_setresid(uio, (uio_resid(uio) - xsize)); + uio_iov_base_add(uio, xsize); + uio_iov_len_add(uio, -xsize); uio->uio_offset += xsize; dst_paddr += xsize; upl_offset += xsize; @@ -3149,15 +3546,19 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) * make sure all async reads that are part of this stream * have completed before we proceed */ + lck_mtx_lock(cl_mtxp); + while (iostate.io_issued != iostate.io_completed) { iostate.io_wanted = 1; - tsleep((caddr_t)&iostate.io_wanted, PRIBIO + 1, "cluster_phys_read", 0); + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_phys_read", 0); } - if (iostate.io_error) { + lck_mtx_unlock(cl_mtxp); + + if (iostate.io_error) error = iostate.io_error; - } + if (error == 0 && tail_size) - error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, devblocksize, CL_READ); + error = cluster_align_phys_io(vp, uio, dst_paddr, tail_size, CL_READ); /* * just release our hold on the physically contiguous @@ -3174,12 +3575,7 @@ cluster_phys_read(vp, uio, filesize, devblocksize, flags) * the completed pages will be released into the VM cache */ int -advisory_read(vp, filesize, f_offset, resid, devblocksize) - struct vnode *vp; - off_t filesize; - off_t f_offset; - int resid; - int devblocksize; +advisory_read(vnode_t vp, off_t filesize, off_t f_offset, int resid) { upl_page_info_t *pl; upl_t upl; @@ -3197,11 +3593,11 @@ advisory_read(vp, filesize, f_offset, resid, devblocksize) int issued_io; int skip_range; - if (!UBCINFOEXISTS(vp)) + if ( !UBCINFOEXISTS(vp)) return(EINVAL); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START, - (int)f_offset, resid, (int)filesize, devblocksize, 0); + (int)f_offset, resid, (int)filesize, 0, 0); while (resid && f_offset < filesize && retval == 0) { /* @@ -3258,11 +3654,11 @@ advisory_read(vp, filesize, f_offset, resid, devblocksize) (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); kret = ubc_create_upl(vp, - upl_f_offset, - upl_size, - &upl, - &pl, - UPL_RET_ONLY_ABSENT | UPL_SET_LITE); + upl_f_offset, + upl_size, + &upl, + &pl, + UPL_RET_ONLY_ABSENT | UPL_SET_LITE); if (kret != KERN_SUCCESS) return(retval); issued_io = 0; @@ -3322,8 +3718,8 @@ advisory_read(vp, filesize, f_offset, resid, devblocksize) /* * issue an asynchronous read to cluster_io */ - retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, devblocksize, - CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (struct buf *)0, (struct clios *)0); + retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, + CL_ASYNC | CL_READ | CL_COMMIT | CL_AGE, (buf_t)NULL, (struct clios *)NULL); issued_io = 1; } @@ -3347,88 +3743,144 @@ advisory_read(vp, filesize, f_offset, resid, devblocksize) int -cluster_push(vp) - struct vnode *vp; +cluster_push(vnode_t vp, int flags) { - int retval; + int retval; + struct cl_writebehind *wbp; - if (!UBCINFOEXISTS(vp) || (vp->v_clen == 0 && !(vp->v_flag & VHASDIRTY))) - return(0); + if ( !UBCINFOEXISTS(vp)) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -1, 0); + return (0); + } + /* return if deferred write is set */ + if (((unsigned int)vfs_flags(vp->v_mount) & MNT_DEFWRITE) && (flags & IO_DEFWRITE)) { + return (0); + } + if ((wbp = cluster_get_wbp(vp, CLW_RETURNLOCKED)) == NULL) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -2, 0); + return (0); + } + if (wbp->cl_number == 0 && wbp->cl_scmap == NULL) { + lck_mtx_unlock(&wbp->cl_lockw); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -3, 0); + return(0); + } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_START, - vp->v_flag & VHASDIRTY, vp->v_clen, 0, 0, 0); + (int)wbp->cl_scmap, wbp->cl_number, flags, 0, 0); - if (vp->v_flag & VHASDIRTY) { - sparse_cluster_push(vp, ubc_getsize(vp), 1); + if (wbp->cl_scmap) { + sparse_cluster_push(wbp, vp, ubc_getsize(vp), 1); - vp->v_clen = 0; retval = 1; } else - retval = cluster_try_push(vp, ubc_getsize(vp), 0, 1); + retval = cluster_try_push(wbp, vp, ubc_getsize(vp), 0, 1); + + lck_mtx_unlock(&wbp->cl_lockw); + + if (flags & IO_SYNC) + (void)vnode_waitforwrites(vp, 0, 0, 0, (char *)"cluster_push"); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_END, - vp->v_flag & VHASDIRTY, vp->v_clen, retval, 0, 0); + (int)wbp->cl_scmap, wbp->cl_number, retval, 0, 0); return (retval); } -int -cluster_release(vp) - struct vnode *vp; +__private_extern__ void +cluster_release(struct ubc_info *ubc) { - off_t offset; - u_int length; + struct cl_writebehind *wbp; + struct cl_readahead *rap; + + if ((wbp = ubc->cl_wbehind)) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, (int)ubc, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); + + if (wbp->cl_scmap) + vfs_drt_control(&(wbp->cl_scmap), 0); + } else { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, (int)ubc, 0, 0, 0, 0); + } - if (vp->v_flag & VHASDIRTY) { - vfs_drt_control(&(vp->v_scmap), 0); + rap = ubc->cl_rahead; - vp->v_flag &= ~VHASDIRTY; + if (wbp != NULL) { + lck_mtx_destroy(&wbp->cl_lockw, cl_mtx_grp); + FREE_ZONE((void *)wbp, sizeof *wbp, M_CLWRBEHIND); + } + if ((rap = ubc->cl_rahead)) { + lck_mtx_destroy(&rap->cl_lockr, cl_mtx_grp); + FREE_ZONE((void *)rap, sizeof *rap, M_CLRDAHEAD); } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_END, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + ubc->cl_rahead = NULL; + ubc->cl_wbehind = NULL; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_END, (int)ubc, (int)rap, (int)wbp, 0, 0); +} + + +static void +cluster_push_EOF(vnode_t vp, off_t EOF) +{ + struct cl_writebehind *wbp; + + wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_START, + (int)wbp->cl_scmap, wbp->cl_number, (int)EOF, 0, 0); + + if (wbp->cl_scmap) + sparse_cluster_push(wbp, vp, EOF, 1); + else + cluster_try_push(wbp, vp, EOF, 0, 1); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_END, + (int)wbp->cl_scmap, wbp->cl_number, 0, 0, 0); + + lck_mtx_unlock(&wbp->cl_lockw); } static int -cluster_try_push(vp, EOF, can_delay, push_all) - struct vnode *vp; - off_t EOF; - int can_delay; - int push_all; +cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int can_delay, int push_all) { int cl_index; int cl_index1; int min_index; int cl_len; - int cl_total; int cl_pushed = 0; - struct v_cluster l_clusters[MAX_CLUSTERS]; + struct cl_wextent l_clusters[MAX_CLUSTERS]; /* + * the write behind context exists and has + * already been locked... + * * make a local 'sorted' copy of the clusters - * and clear vp->v_clen so that new clusters can + * and clear wbp->cl_number so that new clusters can * be developed */ - for (cl_index = 0; cl_index < vp->v_clen; cl_index++) { - for (min_index = -1, cl_index1 = 0; cl_index1 < vp->v_clen; cl_index1++) { - if (vp->v_clusters[cl_index1].start_pg == vp->v_clusters[cl_index1].last_pg) + for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) { + for (min_index = -1, cl_index1 = 0; cl_index1 < wbp->cl_number; cl_index1++) { + if (wbp->cl_clusters[cl_index1].b_addr == wbp->cl_clusters[cl_index1].e_addr) continue; if (min_index == -1) min_index = cl_index1; - else if (vp->v_clusters[cl_index1].start_pg < vp->v_clusters[min_index].start_pg) + else if (wbp->cl_clusters[cl_index1].b_addr < wbp->cl_clusters[min_index].b_addr) min_index = cl_index1; } if (min_index == -1) break; - l_clusters[cl_index].start_pg = vp->v_clusters[min_index].start_pg; - l_clusters[cl_index].last_pg = vp->v_clusters[min_index].last_pg; + l_clusters[cl_index].b_addr = wbp->cl_clusters[min_index].b_addr; + l_clusters[cl_index].e_addr = wbp->cl_clusters[min_index].e_addr; + l_clusters[cl_index].io_nocache = wbp->cl_clusters[min_index].io_nocache; - vp->v_clusters[min_index].start_pg = vp->v_clusters[min_index].last_pg; + wbp->cl_clusters[min_index].b_addr = wbp->cl_clusters[min_index].e_addr; } - cl_len = cl_index; - vp->v_clen = 0; + wbp->cl_number = 0; + + cl_len = cl_index; if (can_delay && cl_len == MAX_CLUSTERS) { int i; @@ -3444,8 +3896,8 @@ cluster_try_push(vp, EOF, can_delay, push_all) * * check to make sure that all the clusters except the last one are 'full'... and that each cluster * is adjacent to the next (i.e. we're looking for sequential writes) they were sorted above - * so we can just make a simple pass through up, to but not including the last one... - * note that last_pg is not inclusive, so it will be equal to the start_pg of the next cluster if they + * so we can just make a simple pass through, up to, but not including the last one... + * note that e_addr is not inclusive, so it will be equal to the b_addr of the next cluster if they * are sequential * * we let the last one be partial as long as it was adjacent to the previous one... @@ -3453,100 +3905,113 @@ cluster_try_push(vp, EOF, can_delay, push_all) * of order... if this occurs at the tail of the last cluster, we don't want to fall into the sparse cluster world... */ for (i = 0; i < MAX_CLUSTERS - 1; i++) { - if ((l_clusters[i].last_pg - l_clusters[i].start_pg) != MAX_UPL_TRANSFER) + if ((l_clusters[i].e_addr - l_clusters[i].b_addr) != MAX_UPL_TRANSFER) goto dont_try; - if (l_clusters[i].last_pg != l_clusters[i+1].start_pg) + if (l_clusters[i].e_addr != l_clusters[i+1].b_addr) goto dont_try; } } + /* + * drop the lock while we're firing off the I/Os... + * this is safe since I'm working off of a private sorted copy + * of the clusters, and I'm going to re-evaluate the public + * state after I retake the lock + */ + lck_mtx_unlock(&wbp->cl_lockw); + for (cl_index = 0; cl_index < cl_len; cl_index++) { + int flags; + struct cl_extent cl; + /* - * try to push each cluster in turn... cluster_push_x may not - * push the cluster if can_delay is TRUE and the cluster doesn't - * meet the critera for an immediate push + * try to push each cluster in turn... */ - if (cluster_push_x(vp, EOF, l_clusters[cl_index].start_pg, l_clusters[cl_index].last_pg, can_delay)) { - l_clusters[cl_index].start_pg = 0; - l_clusters[cl_index].last_pg = 0; + if (l_clusters[cl_index].io_nocache) + flags = IO_NOCACHE; + else + flags = 0; + cl.b_addr = l_clusters[cl_index].b_addr; + cl.e_addr = l_clusters[cl_index].e_addr; - cl_pushed++; + cluster_push_x(vp, &cl, EOF, flags); - if (push_all == 0) - break; - } + l_clusters[cl_index].b_addr = 0; + l_clusters[cl_index].e_addr = 0; + + cl_pushed++; + + if (push_all == 0) + break; } + lck_mtx_lock(&wbp->cl_lockw); + dont_try: if (cl_len > cl_pushed) { /* * we didn't push all of the clusters, so * lets try to merge them back in to the vnode */ - if ((MAX_CLUSTERS - vp->v_clen) < (cl_len - cl_pushed)) { + if ((MAX_CLUSTERS - wbp->cl_number) < (cl_len - cl_pushed)) { /* * we picked up some new clusters while we were trying to - * push the old ones (I don't think this can happen because - * I'm holding the lock, but just in case)... the sum of the + * push the old ones... this can happen because I've dropped + * the vnode lock... the sum of the * leftovers plus the new cluster count exceeds our ability * to represent them, so switch to the sparse cluster mechanism + * + * collect the active public clusters... */ - - /* - * first collect the new clusters sitting in the vp - */ - sparse_cluster_switch(vp, EOF); + sparse_cluster_switch(wbp, vp, EOF); for (cl_index = 0, cl_index1 = 0; cl_index < cl_len; cl_index++) { - if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg) + if (l_clusters[cl_index].b_addr == l_clusters[cl_index].e_addr) continue; - vp->v_clusters[cl_index1].start_pg = l_clusters[cl_index].start_pg; - vp->v_clusters[cl_index1].last_pg = l_clusters[cl_index].last_pg; + wbp->cl_clusters[cl_index1].b_addr = l_clusters[cl_index].b_addr; + wbp->cl_clusters[cl_index1].e_addr = l_clusters[cl_index].e_addr; + wbp->cl_clusters[cl_index1].io_nocache = l_clusters[cl_index].io_nocache; cl_index1++; } /* * update the cluster count */ - vp->v_clen = cl_index1; + wbp->cl_number = cl_index1; /* * and collect the original clusters that were moved into the * local storage for sorting purposes */ - sparse_cluster_switch(vp, EOF); + sparse_cluster_switch(wbp, vp, EOF); } else { /* * we've got room to merge the leftovers back in * just append them starting at the next 'hole' - * represented by vp->v_clen + * represented by wbp->cl_number */ - for (cl_index = 0, cl_index1 = vp->v_clen; cl_index < cl_len; cl_index++) { - if (l_clusters[cl_index].start_pg == l_clusters[cl_index].last_pg) + for (cl_index = 0, cl_index1 = wbp->cl_number; cl_index < cl_len; cl_index++) { + if (l_clusters[cl_index].b_addr == l_clusters[cl_index].e_addr) continue; - vp->v_clusters[cl_index1].start_pg = l_clusters[cl_index].start_pg; - vp->v_clusters[cl_index1].last_pg = l_clusters[cl_index].last_pg; + wbp->cl_clusters[cl_index1].b_addr = l_clusters[cl_index].b_addr; + wbp->cl_clusters[cl_index1].e_addr = l_clusters[cl_index].e_addr; + wbp->cl_clusters[cl_index1].io_nocache = l_clusters[cl_index].io_nocache; cl_index1++; } /* * update the cluster count */ - vp->v_clen = cl_index1; + wbp->cl_number = cl_index1; } } - return(MAX_CLUSTERS - vp->v_clen); + return(MAX_CLUSTERS - wbp->cl_number); } static int -cluster_push_x(vp, EOF, first, last, can_delay) - struct vnode *vp; - off_t EOF; - unsigned int first; - unsigned int last; - int can_delay; +cluster_push_x(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags) { upl_page_info_t *pl; upl_t upl; @@ -3560,19 +4025,21 @@ cluster_push_x(vp, EOF, first, last, can_delay) int io_flags; int upl_flags; int size; + int error = 0; + int retval; kern_return_t kret; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_START, - vp->v_clen, first, last, EOF, 0); + (int)cl->b_addr, (int)cl->e_addr, (int)EOF, flags, 0); - if ((pages_in_upl = last - first) == 0) { + if ((pages_in_upl = (int)(cl->e_addr - cl->b_addr)) == 0) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 0, 0, 0, 0); - return (1); + return (0); } upl_size = pages_in_upl * PAGE_SIZE; - upl_f_offset = (off_t)((unsigned long long)first * PAGE_SIZE_64); + upl_f_offset = (off_t)(cl->b_addr * PAGE_SIZE_64); if (upl_f_offset + upl_size >= EOF) { @@ -3584,7 +4051,7 @@ cluster_push_x(vp, EOF, first, last, can_delay) */ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 1, 0, 0, 0); - return(1); + return(0); } size = EOF - upl_f_offset; @@ -3595,7 +4062,19 @@ cluster_push_x(vp, EOF, first, last, can_delay) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_START, upl_size, size, 0, 0, 0); - if (vp->v_flag & VNOCACHE_DATA) + /* + * by asking for UPL_COPYOUT_FROM and UPL_RET_ONLY_DIRTY, we get the following desirable behavior + * + * - only pages that are currently dirty are returned... these are the ones we need to clean + * - the hardware dirty bit is cleared when the page is gathered into the UPL... the software dirty bit is set + * - if we have to abort the I/O for some reason, the software dirty bit is left set since we didn't clean the page + * - when we commit the page, the software dirty bit is cleared... the hardware dirty bit is untouched so that if + * someone dirties this page while the I/O is in progress, we don't lose track of the new state + * + * when the I/O completes, we no longer ask for an explicit clear of the DIRTY state (either soft or hard) + */ + + if ((vp->v_flag & VNOCACHE_DATA) || (flags & IO_NOCACHE)) upl_flags = UPL_COPYOUT_FROM | UPL_RET_ONLY_DIRTY | UPL_SET_LITE | UPL_WILL_BE_DUMPED; else upl_flags = UPL_COPYOUT_FROM | UPL_RET_ONLY_DIRTY | UPL_SET_LITE; @@ -3629,7 +4108,7 @@ cluster_push_x(vp, EOF, first, last, can_delay) ubc_upl_abort(upl, 0); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 2, 0, 0, 0); - return(1); + return(0); } for (last_pg = 0; last_pg < pages_in_upl; ) { @@ -3671,116 +4150,128 @@ cluster_push_x(vp, EOF, first, last, can_delay) io_size = min(size, (last_pg - start_pg) * PAGE_SIZE); - if (vp->v_flag & VNOCACHE_DATA) - io_flags = CL_THROTTLE | CL_COMMIT | CL_ASYNC | CL_DUMP; - else - io_flags = CL_THROTTLE | CL_COMMIT | CL_ASYNC; + io_flags = CL_THROTTLE | CL_COMMIT; + + if ( !(flags & IO_SYNC)) + io_flags |= CL_ASYNC; + + retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, + io_flags, (buf_t)NULL, (struct clios *)NULL); - cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, io_size, vp->v_ciosiz, io_flags, (struct buf *)0, (struct clios *)0); + if (error == 0 && retval) + error = retval; size -= io_size; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 51)) | DBG_FUNC_END, 1, 3, 0, 0, 0); - return(1); + return(error); } -static int -sparse_cluster_switch(struct vnode *vp, off_t EOF) +/* + * sparse_cluster_switch is called with the write behind lock held + */ +static void +sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF) { - int cl_index; + int cl_index; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); - if ( !(vp->v_flag & VHASDIRTY)) { - vp->v_flag |= VHASDIRTY; - vp->v_scdirty = 0; - vp->v_scmap = 0; - } - for (cl_index = 0; cl_index < vp->v_clen; cl_index++) { - int flags; - int start_pg; - int last_pg; + if (wbp->cl_scmap == NULL) + wbp->cl_scdirty = 0; + + for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) { + int flags; + struct cl_extent cl; + + for (cl.b_addr = wbp->cl_clusters[cl_index].b_addr; cl.b_addr < wbp->cl_clusters[cl_index].e_addr; cl.b_addr++) { - for (start_pg = vp->v_clusters[cl_index].start_pg; start_pg < vp->v_clusters[cl_index].last_pg; start_pg++) { + if (ubc_page_op(vp, (off_t)(cl.b_addr * PAGE_SIZE_64), 0, 0, &flags) == KERN_SUCCESS) { + if (flags & UPL_POP_DIRTY) { + cl.e_addr = cl.b_addr + 1; - if (ubc_page_op(vp, (off_t)(((off_t)start_pg) * PAGE_SIZE_64), 0, 0, &flags) == KERN_SUCCESS) { - if (flags & UPL_POP_DIRTY) - sparse_cluster_add(vp, EOF, start_pg, start_pg + 1); + sparse_cluster_add(wbp, vp, &cl, EOF); + } } } } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_END, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + wbp->cl_number = 0; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); } -static int -sparse_cluster_push(struct vnode *vp, off_t EOF, int push_all) +/* + * sparse_cluster_push is called with the write behind lock held + */ +static void +sparse_cluster_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_all) { - unsigned int first; - unsigned int last; - off_t offset; - u_int length; + struct cl_extent cl; + off_t offset; + u_int length; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, (int)vp, (int)vp->v_scmap, vp->v_scdirty, push_all, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, push_all, 0); if (push_all) - vfs_drt_control(&(vp->v_scmap), 1); + vfs_drt_control(&(wbp->cl_scmap), 1); for (;;) { - if (vfs_drt_get_cluster(&(vp->v_scmap), &offset, &length) != KERN_SUCCESS) { - vp->v_flag &= ~VHASDIRTY; - vp->v_clen = 0; + if (vfs_drt_get_cluster(&(wbp->cl_scmap), &offset, &length) != KERN_SUCCESS) break; - } - first = (unsigned int)(offset / PAGE_SIZE_64); - last = (unsigned int)((offset + length) / PAGE_SIZE_64); - cluster_push_x(vp, EOF, first, last, 0); + cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64); + cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64); + + wbp->cl_scdirty -= (int)(cl.e_addr - cl.b_addr); - vp->v_scdirty -= (last - first); + cluster_push_x(vp, &cl, EOF, 0); if (push_all == 0) break; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); } -static int -sparse_cluster_add(struct vnode *vp, off_t EOF, daddr_t first, daddr_t last) +/* + * sparse_cluster_add is called with the write behind lock held + */ +static void +sparse_cluster_add(struct cl_writebehind *wbp, vnode_t vp, struct cl_extent *cl, off_t EOF) { - u_int new_dirty; - u_int length; - off_t offset; + u_int new_dirty; + u_int length; + off_t offset; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (int)vp->v_scmap, vp->v_scdirty, first, last, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (int)wbp->cl_scmap, wbp->cl_scdirty, (int)cl->b_addr, (int)cl->e_addr, 0); - offset = (off_t)first * PAGE_SIZE_64; - length = (last - first) * PAGE_SIZE; + offset = (off_t)(cl->b_addr * PAGE_SIZE_64); + length = ((u_int)(cl->e_addr - cl->b_addr)) * PAGE_SIZE; - while (vfs_drt_mark_pages(&(vp->v_scmap), offset, length, &new_dirty) != KERN_SUCCESS) { + while (vfs_drt_mark_pages(&(wbp->cl_scmap), offset, length, &new_dirty) != KERN_SUCCESS) { /* * no room left in the map * only a partial update was done * push out some pages and try again */ - vp->v_scdirty += new_dirty; + wbp->cl_scdirty += new_dirty; - sparse_cluster_push(vp, EOF, 0); + sparse_cluster_push(wbp, vp, EOF, 0); offset += (new_dirty * PAGE_SIZE_64); length -= (new_dirty * PAGE_SIZE); } - vp->v_scdirty += new_dirty; + wbp->cl_scdirty += new_dirty; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_END, (int)vp, (int)vp->v_scmap, vp->v_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); } static int -cluster_align_phys_io(struct vnode *vp, struct uio *uio, addr64_t usr_paddr, int xsize, int devblocksize, int flags) +cluster_align_phys_io(vnode_t vp, struct uio *uio, addr64_t usr_paddr, int xsize, int flags) { struct iovec *iov; upl_page_info_t *pl; @@ -3788,15 +4279,28 @@ cluster_align_phys_io(struct vnode *vp, struct uio *uio, addr64_t usr_paddr, int addr64_t ubc_paddr; kern_return_t kret; int error = 0; + int did_read = 0; + int abort_flags; + int upl_flags; iov = uio->uio_iov; + upl_flags = UPL_SET_LITE; + if (! (flags & CL_READ)) { + /* + * "write" operation: let the UPL subsystem know + * that we intend to modify the buffer cache pages + * we're gathering. + */ + upl_flags |= UPL_WILL_MODIFY; + } + kret = ubc_create_upl(vp, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, &upl, &pl, - UPL_SET_LITE); + upl_flags); if (kret != KERN_SUCCESS) return(EINVAL); @@ -3805,13 +4309,14 @@ cluster_align_phys_io(struct vnode *vp, struct uio *uio, addr64_t usr_paddr, int /* * issue a synchronous read to cluster_io */ - error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, devblocksize, - CL_READ, (struct buf *)0, (struct clios *)0); + error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, + CL_READ, (buf_t)NULL, (struct clios *)NULL); if (error) { ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); return(error); } + did_read = 1; } ubc_paddr = ((addr64_t)upl_phys_page(pl, 0) << 12) + (addr64_t)(uio->uio_offset & PAGE_MASK_64); @@ -3832,16 +4337,21 @@ cluster_align_phys_io(struct vnode *vp, struct uio *uio, addr64_t usr_paddr, int /* * issue a synchronous write to cluster_io */ - error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, devblocksize, - 0, (struct buf *)0, (struct clios *)0); + error = cluster_io(vp, upl, 0, uio->uio_offset & ~PAGE_MASK_64, PAGE_SIZE, + 0, (buf_t)NULL, (struct clios *)NULL); } if (error == 0) { uio->uio_offset += xsize; - iov->iov_base += xsize; - iov->iov_len -= xsize; - uio->uio_resid -= xsize; + uio_iov_base_add(uio, xsize); + uio_iov_len_add(uio, -xsize); + uio_setresid(uio, (uio_resid(uio) - xsize)); } - ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); + if (did_read) + abort_flags = UPL_ABORT_FREE_ON_EMPTY; + else + abort_flags = UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_DUMP_PAGES; + + ubc_upl_abort_range(upl, 0, PAGE_SIZE, abort_flags); return (error); } @@ -3857,27 +4367,40 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int xsize) int segflg; int retval = 0; upl_page_info_t *pl; - boolean_t funnel_state = FALSE; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, upl_offset, xsize, 0); - - if (xsize >= (16 * 1024)) - funnel_state = thread_funnel_set(kernel_flock, FALSE); + (int)uio->uio_offset, uio_resid(uio), upl_offset, xsize, 0); segflg = uio->uio_segflg; switch(segflg) { + case UIO_USERSPACE32: + case UIO_USERISPACE32: + uio->uio_segflg = UIO_PHYS_USERSPACE32; + break; + case UIO_USERSPACE: case UIO_USERISPACE: uio->uio_segflg = UIO_PHYS_USERSPACE; break; + case UIO_USERSPACE64: + case UIO_USERISPACE64: + uio->uio_segflg = UIO_PHYS_USERSPACE64; + break; + + case UIO_SYSSPACE32: + uio->uio_segflg = UIO_PHYS_SYSSPACE32; + break; + case UIO_SYSSPACE: uio->uio_segflg = UIO_PHYS_SYSSPACE; break; + + case UIO_SYSSPACE64: + uio->uio_segflg = UIO_PHYS_SYSSPACE64; + break; } pl = ubc_upl_pageinfo(upl); @@ -3899,47 +4422,56 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int xsize) } uio->uio_segflg = segflg; - if (funnel_state == TRUE) - thread_funnel_set(kernel_flock, TRUE); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, retval, segflg, 0); + (int)uio->uio_offset, uio_resid(uio), retval, segflg, 0); return (retval); } int -cluster_copy_ubc_data(struct vnode *vp, struct uio *uio, int *io_resid, int mark_dirty) +cluster_copy_ubc_data(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty) { int segflg; int io_size; int xsize; int start_offset; - off_t f_offset; int retval = 0; memory_object_control_t control; - int op_flags = UPL_POP_SET | UPL_POP_BUSY; - boolean_t funnel_state = FALSE; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_START, - (int)uio->uio_offset, uio->uio_resid, 0, *io_resid, 0); + (int)uio->uio_offset, uio_resid(uio), 0, *io_resid, 0); control = ubc_getobject(vp, UBC_FLAGS_NONE); if (control == MEMORY_OBJECT_CONTROL_NULL) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, retval, 3, 0); + (int)uio->uio_offset, uio_resid(uio), retval, 3, 0); return(0); } - if (mark_dirty) - op_flags |= UPL_POP_DIRTY; - segflg = uio->uio_segflg; switch(segflg) { + case UIO_USERSPACE32: + case UIO_USERISPACE32: + uio->uio_segflg = UIO_PHYS_USERSPACE32; + break; + + case UIO_USERSPACE64: + case UIO_USERISPACE64: + uio->uio_segflg = UIO_PHYS_USERSPACE64; + break; + + case UIO_SYSSPACE32: + uio->uio_segflg = UIO_PHYS_SYSSPACE32; + break; + + case UIO_SYSSPACE64: + uio->uio_segflg = UIO_PHYS_SYSSPACE64; + break; + case UIO_USERSPACE: case UIO_USERISPACE: uio->uio_segflg = UIO_PHYS_USERSPACE; @@ -3949,44 +4481,28 @@ cluster_copy_ubc_data(struct vnode *vp, struct uio *uio, int *io_resid, int mark uio->uio_segflg = UIO_PHYS_SYSSPACE; break; } - io_size = *io_resid; - start_offset = (int)(uio->uio_offset & PAGE_MASK_64); - f_offset = uio->uio_offset - start_offset; - xsize = min(PAGE_SIZE - start_offset, io_size); - - while (io_size && retval == 0) { - ppnum_t pgframe; - - if (ubc_page_op_with_control(control, f_offset, op_flags, &pgframe, 0) != KERN_SUCCESS) - break; - - if (funnel_state == FALSE && io_size >= (16 * 1024)) - funnel_state = thread_funnel_set(kernel_flock, FALSE); - retval = uiomove64((addr64_t)(((addr64_t)pgframe << 12) + start_offset), xsize, uio); + if ( (io_size = *io_resid) ) { + start_offset = (int)(uio->uio_offset & PAGE_MASK_64); + xsize = uio_resid(uio); - ubc_page_op_with_control(control, f_offset, UPL_POP_CLR | UPL_POP_BUSY, 0, 0); - - io_size -= xsize; - start_offset = 0; - f_offset = uio->uio_offset; - xsize = min(PAGE_SIZE, io_size); + retval = memory_object_control_uiomove(control, uio->uio_offset - start_offset, + uio, start_offset, io_size, mark_dirty); + xsize -= uio_resid(uio); + io_size -= xsize; } uio->uio_segflg = segflg; *io_resid = io_size; - if (funnel_state == TRUE) - thread_funnel_set(kernel_flock, TRUE); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 34)) | DBG_FUNC_END, - (int)uio->uio_offset, uio->uio_resid, retval, 0x80000000 | segflg, 0); + (int)uio->uio_offset, uio_resid(uio), retval, 0x80000000 | segflg, 0); return(retval); } int -is_file_clean(struct vnode *vp, off_t filesize) +is_file_clean(vnode_t vp, off_t filesize) { off_t f_offset; int flags; @@ -4168,7 +4684,6 @@ struct vfs_drt_clustermap { * lastclean, iskips */ -static void vfs_drt_sanity(struct vfs_drt_clustermap *cmap); static kern_return_t vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp); static kern_return_t vfs_drt_free_map(struct vfs_drt_clustermap *cmap); static kern_return_t vfs_drt_search_index(struct vfs_drt_clustermap *cmap, @@ -4321,8 +4836,6 @@ vfs_drt_alloc_map(struct vfs_drt_clustermap **cmapp) static kern_return_t vfs_drt_free_map(struct vfs_drt_clustermap *cmap) { - kern_return_t ret; - kmem_free(kernel_map, (vm_offset_t)cmap, (cmap->scm_modulus == DRT_HASH_SMALL_MODULUS) ? DRT_SMALL_ALLOCATION : DRT_LARGE_ALLOCATION); return(KERN_SUCCESS); @@ -4335,8 +4848,7 @@ vfs_drt_free_map(struct vfs_drt_clustermap *cmap) static kern_return_t vfs_drt_search_index(struct vfs_drt_clustermap *cmap, u_int64_t offset, int *indexp) { - kern_return_t kret; - int index, i, tries; + int index, i; offset = DRT_ALIGN_ADDRESS(offset); index = DRT_HASH(cmap, offset); @@ -4513,7 +5025,7 @@ vfs_drt_do_mark_pages( } } DRT_HASH_SET_COUNT(cmap, index, ecount); -next: + offset += pgcount * PAGE_SIZE; length -= pgcount * PAGE_SIZE; } @@ -4556,11 +5068,13 @@ vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, int *setcountp) return(vfs_drt_do_mark_pages(cmapp, offset, length, setcountp, 1)); } +#if 0 static kern_return_t vfs_drt_unmark_pages(void **cmapp, off_t offset, u_int length) { return(vfs_drt_do_mark_pages(cmapp, offset, length, NULL, 0)); } +#endif /* * Get a cluster of dirty pages. @@ -4687,12 +5201,22 @@ vfs_drt_control(void **cmapp, int op_type) * Emit a summary of the state of the clustermap into the trace buffer * along with some caller-provided data. */ +#if KDEBUG static void -vfs_drt_trace(struct vfs_drt_clustermap *cmap, int code, int arg1, int arg2, int arg3, int arg4) +vfs_drt_trace(__unused struct vfs_drt_clustermap *cmap, int code, int arg1, int arg2, int arg3, int arg4) { KERNEL_DEBUG(code, arg1, arg2, arg3, arg4, 0); } +#else +static void +vfs_drt_trace(__unused struct vfs_drt_clustermap *cmap, __unused int code, + __unused int arg1, __unused int arg2, __unused int arg3, + __unused int arg4) +{ +} +#endif +#if 0 /* * Perform basic sanity check on the hash entry summary count * vs. the actual bits set in the entry. @@ -4715,3 +5239,4 @@ vfs_drt_sanity(struct vfs_drt_clustermap *cmap) panic("bits_on = %d, index = %d\n", bits_on, index); } } +#endif diff --git a/bsd/vfs/vfs_conf.c b/bsd/vfs/vfs_conf.c index 7f0274f33..e02553392 100644 --- a/bsd/vfs/vfs_conf.c +++ b/bsd/vfs/vfs_conf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,8 +57,8 @@ #include #include -#include -#include +#include +#include /* * These define the root filesystem, device, and root filesystem type. @@ -71,7 +71,9 @@ int (*mountroot)() = NULL; * Set up the initial array of known filesystem types. */ extern struct vfsops ufs_vfsops; +#if FFS extern int ffs_mountroot(); +#endif extern struct vfsops mfs_vfsops; extern int mfs_mountroot(); extern struct vfsops hfs_vfsops; @@ -90,60 +92,60 @@ extern struct vfsops devfs_vfsops; /* * Set up the filesystem operations for vnodes. */ -static struct vfsconf vfsconflist[] = { +static struct vfstable vfsconflist[] = { /* HFS/HFS+ Filesystem */ #if HFS - { &hfs_vfsops, "hfs", 17, 0, MNT_LOCAL | MNT_DOVOLFS, hfs_mountroot, NULL }, + { &hfs_vfsops, "hfs", 17, 0, MNT_LOCAL | MNT_DOVOLFS, hfs_mountroot, NULL, 0, {0}, VFC_VFSLOCALARGS, 0, 0 }, #endif /* Fast Filesystem */ #if FFS - { &ufs_vfsops, "ufs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL }, + { &ufs_vfsops, "ufs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL, 0, {0}, VFC_VFSLOCALARGS, 0, 0 }, #endif /* ISO9660 (aka CDROM) Filesystem */ #if CD9660 - { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL }, + { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL, 0, {0}, VFC_VFSLOCALARGS, 0, 0 }, #endif /* Memory-based Filesystem */ #if MFS - { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL }, + { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Sun-compatible Network Filesystem */ #if NFSCLIENT - { &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL }, + { &nfs_vfsops, "nfs", 2, 0, 0, nfs_mountroot, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Andrew Filesystem */ #if AFS - { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL }, + { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Loopback (Minimal) Filesystem Layer */ #if NULLFS - { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL }, + { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Union (translucent) Filesystem */ #if UNION - { &union_vfsops, "union", 15, 0, 0, NULL, NULL }, + { &union_vfsops, "union", 15, 0, 0, NULL, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* File Descriptor Filesystem */ #if FDESC - { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL }, + { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Volume ID Filesystem */ #if VOLFS - { &volfs_vfsops, "volfs", 18, 0, 0, NULL, NULL }, + { &volfs_vfsops, "volfs", 18, 0, 0, NULL, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif /* Device Filesystem */ #if DEVFS - { &devfs_vfsops, "devfs", 19, 0, 0, NULL, NULL }, + { &devfs_vfsops, "devfs", 19, 0, 0, NULL, NULL, 0, {0}, VFC_VFSGENERICARGS , 0, 0}, #endif {0}, @@ -169,7 +171,7 @@ static struct vfsconf vfsconflist[] = { int maxvfsslots = sizeof(vfsconflist) / sizeof (struct vfsconf); int numused_vfsslots = 0; int maxvfsconf = sizeof(vfsconflist) / sizeof (struct vfsconf); -struct vfsconf *vfsconf = vfsconflist; +struct vfstable *vfsconf = vfsconflist; /* * @@ -178,9 +180,11 @@ struct vfsconf *vfsconf = vfsconflist; * vectors. It is NULL terminated. * */ +#if FFS extern struct vnodeopv_desc ffs_vnodeop_opv_desc; extern struct vnodeopv_desc ffs_specop_opv_desc; extern struct vnodeopv_desc ffs_fifoop_opv_desc; +#endif extern struct vnodeopv_desc mfs_vnodeop_opv_desc; extern struct vnodeopv_desc dead_vnodeop_opv_desc; extern struct vnodeopv_desc fifo_vnodeop_opv_desc; @@ -203,10 +207,12 @@ extern struct vnodeopv_desc devfs_vnodeop_opv_desc; extern struct vnodeopv_desc devfs_spec_vnodeop_opv_desc; struct vnodeopv_desc *vfs_opv_descs[] = { +#if FFS &ffs_vnodeop_opv_desc, &ffs_specop_opv_desc, #if FIFO &ffs_fifoop_opv_desc, +#endif #endif &dead_vnodeop_opv_desc, #if FIFO diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c new file mode 100644 index 000000000..5949b796a --- /dev/null +++ b/bsd/vfs/vfs_fsevents.c @@ -0,0 +1,1402 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// where all our structs and defines come from +#include + + +typedef struct kfs_event_arg { + u_int16_t type; + u_int16_t len; + union { + struct vnode *vp; + char *str; + void *ptr; + int32_t int32; + dev_t dev; + ino_t ino; + int32_t mode; + uid_t uid; + gid_t gid; + } data; +}kfs_event_arg; + +#define KFS_NUM_ARGS FSE_MAX_ARGS +typedef struct kfs_event { + int32_t type; // type code of this event + u_int32_t refcount; // number of clients referencing this + pid_t pid; // pid of the process that did the op + kfs_event_arg args[KFS_NUM_ARGS]; +} kfs_event; + + +typedef struct fs_event_watcher { + SLIST_ENTRY(fs_event_watcher) link; + int8_t *event_list; // the events we're interested in + int32_t num_events; + dev_t *devices_to_watch; // only report events from these devices + uint32_t num_devices; + int32_t flags; + kfs_event **event_queue; + int32_t eventq_size; // number of event pointers in queue + int32_t rd, wr; // indices to the event_queue + int32_t blockers; +} fs_event_watcher; + +// fs_event_watcher flags +#define WATCHER_DROPPED_EVENTS 0x0001 +#define WATCHER_CLOSING 0x0002 + +static SLIST_HEAD(watch_list, fs_event_watcher) watch_list_head = { NULL }; + + +#define MAX_KFS_EVENTS 2048 + +// this array holds each pending event +static kfs_event fs_event_buf[MAX_KFS_EVENTS]; +static int free_event_idx = 0; +static int fs_event_init = 0; + +// +// this array records whether anyone is interested in a +// particular type of event. if no one is, we bail out +// early from the event delivery +// +static int16_t fs_event_type_watchers[FSE_MAX_EVENTS]; + +static int watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse); + +// +// Locks +// +static lck_grp_attr_t * fsevent_group_attr; +static lck_attr_t * fsevent_lock_attr; +static lck_grp_t * fsevent_mutex_group; + +static lck_grp_t * fsevent_rw_group; + +static lck_rw_t fsevent_big_lock; // always grab this first +static lck_mtx_t watch_list_lock; +static lck_mtx_t event_buf_lock; + + +static void init_pathbuff(void); + + +static void +fsevents_internal_init(void) +{ + int i; + + if (fs_event_init++ != 0) { + return; + } + + for(i=0; i < FSE_MAX_EVENTS; i++) { + fs_event_type_watchers[i] = 0; + } + + for(i=0; i < MAX_KFS_EVENTS; i++) { + fs_event_buf[i].type = FSE_INVALID; + fs_event_buf[i].refcount = 0; + } + + SLIST_INIT(&watch_list_head); + + fsevent_lock_attr = lck_attr_alloc_init(); + fsevent_group_attr = lck_grp_attr_alloc_init(); + fsevent_mutex_group = lck_grp_alloc_init("fsevent-mutex", fsevent_group_attr); + fsevent_rw_group = lck_grp_alloc_init("fsevent-rw", fsevent_group_attr); + + lck_mtx_init(&watch_list_lock, fsevent_mutex_group, fsevent_lock_attr); + lck_mtx_init(&event_buf_lock, fsevent_mutex_group, fsevent_lock_attr); + + lck_rw_init(&fsevent_big_lock, fsevent_rw_group, fsevent_lock_attr); + + init_pathbuff(); +} + +static void +lock_watch_list(void) +{ + lck_mtx_lock(&watch_list_lock); +} + +static void +unlock_watch_list(void) +{ + lck_mtx_unlock(&watch_list_lock); +} + +static void +lock_fs_event_buf(void) +{ + lck_mtx_lock(&event_buf_lock); +} + +static void +unlock_fs_event_buf(void) +{ + lck_mtx_unlock(&event_buf_lock); +} + +// forward prototype +static void do_free_event(kfs_event *kfse); + +static int +watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev) +{ + unsigned int i; + + // if there is not list of devices to watch, then always + // say we're interested so we'll report all events from + // all devices + if (watcher->devices_to_watch == NULL) { + return 1; + } + + for(i=0; i < watcher->num_devices; i++) { + if (dev == watcher->devices_to_watch[i]) { + // found a match! that means we want events + // from this device. + return 1; + } + } + + // if we're here it's not in the devices_to_watch[] + // list so that means we do not care about it + return 0; +} + + +int +need_fsevent(int type, vnode_t vp) +{ + fs_event_watcher *watcher; + dev_t dev; + + if (fs_event_type_watchers[type] == 0) + return (0); + dev = (dev_t)(vp->v_mount->mnt_vfsstat.f_fsid.val[0]); + + lock_watch_list(); + + SLIST_FOREACH(watcher, &watch_list_head, link) { + if (watcher->event_list[type] == FSE_REPORT && watcher_cares_about_dev(watcher, dev)) { + unlock_watch_list(); + return (1); + } + } + unlock_watch_list(); + + return (0); +} + + +int +add_fsevent(int type, vfs_context_t ctx, ...) +{ + struct proc *p = vfs_context_proc(ctx); + int i, arg_idx, num_deliveries=0; + kfs_event_arg *kea; + kfs_event *kfse; + fs_event_watcher *watcher; + va_list ap; + int error = 0; + dev_t dev = 0; + + va_start(ap, ctx); + + // if no one cares about this type of event, bail out + if (fs_event_type_watchers[type] == 0) { + va_end(ap); + return 0; + } + + lck_rw_lock_shared(&fsevent_big_lock); + + // find a free event and snag it for our use + // NOTE: do not do anything that would block until + // the lock is dropped. + lock_fs_event_buf(); + + for(i=0; i < MAX_KFS_EVENTS; i++) { + if (fs_event_buf[(free_event_idx + i) % MAX_KFS_EVENTS].type == FSE_INVALID) { + break; + } + } + + if (i >= MAX_KFS_EVENTS) { + // yikes! no free slots + unlock_fs_event_buf(); + va_end(ap); + + lock_watch_list(); + SLIST_FOREACH(watcher, &watch_list_head, link) { + watcher->flags |= WATCHER_DROPPED_EVENTS; + wakeup((caddr_t)watcher); + } + unlock_watch_list(); + lck_rw_done(&fsevent_big_lock); + + printf("fs_events: add_event: event queue is full! dropping events.\n"); + return ENOSPC; + } + + kfse = &fs_event_buf[(free_event_idx + i) % MAX_KFS_EVENTS]; + + free_event_idx++; + + kfse->type = type; + kfse->refcount = 0; + kfse->pid = p->p_pid; + + unlock_fs_event_buf(); // at this point it's safe to unlock + + // + // now process the arguments passed in and copy them into + // the kfse + // + arg_idx = 0; + while(arg_idx < KFS_NUM_ARGS) { + kea = &kfse->args[arg_idx++]; + kea->type = va_arg(ap, int32_t); + + if (kea->type == FSE_ARG_DONE) { + break; + } + + switch(kea->type) { + case FSE_ARG_VNODE: { + // this expands out into multiple arguments to the client + struct vnode *vp; + struct vnode_attr va; + + kea->data.vp = vp = va_arg(ap, struct vnode *); + if (kea->data.vp == NULL) { + panic("add_fsevent: you can't pass me a NULL vnode ptr (type %d)!\n", + kfse->type); + } + + if (vnode_ref_ext(kea->data.vp, O_EVTONLY) != 0) { + kea->type = FSE_ARG_DONE; + + error = EINVAL; + goto clean_up; + } + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + if (vnode_getattr(kea->data.vp, &va, ctx) != 0) { + vnode_rele_ext(kea->data.vp, O_EVTONLY, 0); + kea->type = FSE_ARG_DONE; + + error = EINVAL; + goto clean_up; + } + + kea++; + kea->type = FSE_ARG_DEV; + kea->data.dev = dev = (dev_t)va.va_fsid; + + kea++; + kea->type = FSE_ARG_INO; + kea->data.ino = (ino_t)va.va_fileid; + + kea++; + kea->type = FSE_ARG_MODE; + kea->data.mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode; + + kea++; + kea->type = FSE_ARG_UID; + kea->data.uid = va.va_uid; + + kea++; + kea->type = FSE_ARG_GID; + kea->data.gid = va.va_gid; + arg_idx += 5; + break; + } + + case FSE_ARG_FINFO: { + fse_info *fse; + + fse = va_arg(ap, fse_info *); + + kea->type = FSE_ARG_DEV; + kea->data.dev = dev = (dev_t)fse->dev; + + kea++; + kea->type = FSE_ARG_INO; + kea->data.ino = (ino_t)fse->ino; + + kea++; + kea->type = FSE_ARG_MODE; + kea->data.mode = (int32_t)fse->mode; + + kea++; + kea->type = FSE_ARG_UID; + kea->data.uid = (uid_t)fse->uid; + + kea++; + kea->type = FSE_ARG_GID; + kea->data.gid = (uid_t)fse->gid; + arg_idx += 4; + break; + } + + case FSE_ARG_STRING: + kea->len = (int16_t)(va_arg(ap, int32_t) & 0xffff); + kea->data.str = vfs_addname(va_arg(ap, char *), kea->len, 0, 0); + break; + + case FSE_ARG_INT32: + kea->data.int32 = va_arg(ap, int32_t); + break; + + case FSE_ARG_INT64: + printf("fs_events: 64-bit args not implemented.\n"); +// kea->data.int64 = va_arg(ap, int64_t); + break; + + case FSE_ARG_RAW: + kea->len = (int16_t)(va_arg(ap, int32_t) & 0xffff); + MALLOC(kea->data.ptr, void *, kea->len, M_TEMP, M_WAITOK); + memcpy(kea->data.ptr, va_arg(ap, void *), kea->len); + break; + + case FSE_ARG_DEV: + kea->data.dev = dev = va_arg(ap, dev_t); + break; + + case FSE_ARG_MODE: + kea->data.mode = va_arg(ap, int32_t); + break; + + case FSE_ARG_INO: + kea->data.ino = va_arg(ap, ino_t); + break; + + case FSE_ARG_UID: + kea->data.uid = va_arg(ap, uid_t); + break; + + case FSE_ARG_GID: + kea->data.gid = va_arg(ap, gid_t); + break; + + default: + printf("add_fsevent: unknown type %d\n", kea->type); + // just skip one 32-bit word and hope we sync up... + (void)va_arg(ap, int32_t); + } + } + + va_end(ap); + + // + // now we have to go and let everyone know that + // is interested in this type of event... + // + lock_watch_list(); + + SLIST_FOREACH(watcher, &watch_list_head, link) { + if (watcher->event_list[type] == FSE_REPORT && watcher_cares_about_dev(watcher, dev)) { + if (watcher_add_event(watcher, kfse) == 0) { + num_deliveries++; + } + } + } + + unlock_watch_list(); + + clean_up: + // just in case no one was interested after all... + if (num_deliveries == 0) { + do_free_event(kfse); + free_event_idx = (int)(kfse - &fs_event_buf[0]); + } + + lck_rw_done(&fsevent_big_lock); + return error; +} + +static void +do_free_event(kfs_event *kfse) +{ + int i; + kfs_event_arg *kea, all_args[KFS_NUM_ARGS]; + + lock_fs_event_buf(); + + // mark this fsevent as invalid + kfse->type = FSE_INVALID; + + // make a copy of this so we can free things without + // holding the fs_event_buf lock + // + memcpy(&all_args[0], &kfse->args[0], sizeof(all_args)); + + // and just to be anal, set this so that there are no args + kfse->args[0].type = FSE_ARG_DONE; + + free_event_idx = (kfse - fs_event_buf); + + unlock_fs_event_buf(); + + for(i=0; i < KFS_NUM_ARGS; i++) { + kea = &all_args[i]; + if (kea->type == FSE_ARG_DONE) { + break; + } + + switch(kea->type) { + case FSE_ARG_VNODE: + vnode_rele_ext(kea->data.vp, O_EVTONLY, 0); + break; + case FSE_ARG_STRING: + vfs_removename(kea->data.str); + break; + case FSE_ARG_RAW: + FREE(kea->data.ptr, M_TEMP); + break; + } + } +} + + +static int +add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_event_watcher **watcher_out) +{ + int i; + fs_event_watcher *watcher; + + if (eventq_size < 0 || eventq_size > MAX_KFS_EVENTS) { + eventq_size = MAX_KFS_EVENTS; + } + + // Note: the event_queue follows the fs_event_watcher struct + // in memory so we only have to do one allocation + MALLOC(watcher, + fs_event_watcher *, + sizeof(fs_event_watcher) + eventq_size * sizeof(kfs_event *), + M_TEMP, M_WAITOK); + + watcher->event_list = event_list; + watcher->num_events = num_events; + watcher->devices_to_watch = NULL; + watcher->num_devices = 0; + watcher->flags = 0; + watcher->event_queue = (kfs_event **)&watcher[1]; + watcher->eventq_size = eventq_size; + watcher->rd = 0; + watcher->wr = 0; + watcher->blockers = 0; + + lock_watch_list(); + + // now update the global list of who's interested in + // events of a particular type... + for(i=0; i < num_events; i++) { + if (event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) { + fs_event_type_watchers[i]++; + } + } + + SLIST_INSERT_HEAD(&watch_list_head, watcher, link); + + unlock_watch_list(); + + *watcher_out = watcher; + + return 0; +} + +static void +remove_watcher(fs_event_watcher *target) +{ + int i; + fs_event_watcher *watcher; + kfs_event *kfse; + + lck_rw_lock_shared(&fsevent_big_lock); + + lock_watch_list(); + + SLIST_FOREACH(watcher, &watch_list_head, link) { + if (watcher == target) { + SLIST_REMOVE(&watch_list_head, watcher, fs_event_watcher, link); + + for(i=0; i < watcher->num_events; i++) { + if (watcher->event_list[i] != FSE_IGNORE && i < FSE_MAX_EVENTS) { + fs_event_type_watchers[i]--; + } + } + + unlock_watch_list(); + + // drain the event_queue + for(i=watcher->rd; i != watcher->wr; i=(i+1) % watcher->eventq_size) { + kfse = watcher->event_queue[i]; + + if (OSAddAtomic(-1, (SInt32 *)&kfse->refcount) == 1) { + do_free_event(kfse); + } + } + + if (watcher->event_list) { + FREE(watcher->event_list, M_TEMP); + watcher->event_list = NULL; + } + if (watcher->devices_to_watch) { + FREE(watcher->devices_to_watch, M_TEMP); + watcher->devices_to_watch = NULL; + } + FREE(watcher, M_TEMP); + + lck_rw_done(&fsevent_big_lock); + return; + } + } + + unlock_watch_list(); + lck_rw_done(&fsevent_big_lock); +} + + +static int +watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse) +{ + if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) { + watcher->flags |= WATCHER_DROPPED_EVENTS; + wakeup((caddr_t)watcher); + return ENOSPC; + } + + watcher->event_queue[watcher->wr] = kfse; + OSAddAtomic(1, (SInt32 *)&kfse->refcount); + watcher->wr = (watcher->wr + 1) % watcher->eventq_size; + + // wake up the watcher if he's waiting! + wakeup((caddr_t)watcher); + + return 0; +} + + +static int +fmod_watch(fs_event_watcher *watcher, struct uio *uio) +{ + int i, error=0, last_full_event_resid; + kfs_event *kfse; + kfs_event_arg *kea; + uint16_t tmp16; + + // LP64todo - fix this + last_full_event_resid = uio_resid(uio); + + // need at least 2048 bytes of space (maxpathlen + 1 event buf) + if (uio_resid(uio) < 2048 || watcher == NULL) { + return EINVAL; + } + + + if (watcher->rd == watcher->wr) { + if (watcher->flags & WATCHER_CLOSING) { + return 0; + } + OSAddAtomic(1, (SInt32 *)&watcher->blockers); + + // there's nothing to do, go to sleep + error = tsleep((caddr_t)watcher, PUSER|PCATCH, "fsevents_empty", 0); + + OSAddAtomic(-1, (SInt32 *)&watcher->blockers); + + if (error != 0 || (watcher->flags & WATCHER_CLOSING)) { + return error; + } + } + + // if we dropped events, return that as an event first + if (watcher->flags & WATCHER_DROPPED_EVENTS) { + int32_t val = FSE_EVENTS_DROPPED; + + error = uiomove((caddr_t)&val, sizeof(int32_t), uio); + if (error == 0) { + val = 0; // a fake pid + error = uiomove((caddr_t)&val, sizeof(int32_t), uio); + + tmp16 = FSE_ARG_DONE; // makes it a consistent msg + error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio); + } + + if (error) { + return error; + } + + watcher->flags &= ~WATCHER_DROPPED_EVENTS; + } + +// check if the next chunk of data will fit in the user's +// buffer. if not, just goto get_out which will return +// the number of bytes worth of events that we did read. +// this leaves the event that didn't fit in the queue. +// + // LP64todo - fix this +#define CHECK_UPTR(size) if (size > (unsigned)uio_resid(uio)) { \ + uio_setresid(uio, last_full_event_resid); \ + goto get_out; \ + } + + for (; uio_resid(uio) > 0 && watcher->rd != watcher->wr; ) { + kfse = watcher->event_queue[watcher->rd]; + + // copy out the type of the event + CHECK_UPTR(sizeof(int32_t)); + if ((error = uiomove((caddr_t)&kfse->type, sizeof(int32_t), uio)) != 0) { + goto get_out; + } + + // now copy out the pid of the person that changed the file + CHECK_UPTR(sizeof(pid_t)); + if ((error = uiomove((caddr_t)&kfse->pid, sizeof(pid_t), uio)) != 0) { + goto get_out; + } + + error = 0; + for(i=0; i < KFS_NUM_ARGS && error == 0; i++) { + char *pathbuff; + int pathbuff_len; + + kea = &kfse->args[i]; + + tmp16 = (uint16_t)kea->type; + CHECK_UPTR(sizeof(uint16_t)); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + if (error || kea->type == FSE_ARG_DONE) { + break; + } + + switch(kea->type) { + case FSE_ARG_VNODE: + pathbuff = get_pathbuff(); + pathbuff_len = MAXPATHLEN; + if (kea->data.vp == NULL) { + printf("fmod_watch: whoa... vp == NULL (%d)!\n", kfse->type); + i--; + release_pathbuff(pathbuff); + continue; + } + + if (vn_getpath(kea->data.vp, pathbuff, &pathbuff_len) != 0 || pathbuff[0] == '\0') { +// printf("fmod_watch: vn_getpath failed! vp 0x%x vname 0x%x (%s) vparent 0x%x\n", +// kea->data.vp, +// VNAME(kea->data.vp), +// VNAME(kea->data.vp) ? VNAME(kea->data.vp) : "", +// VPARENT(kea->data.vp)); + } + CHECK_UPTR(sizeof(uint16_t)); + tmp16 = (uint16_t)pathbuff_len; + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + + CHECK_UPTR((unsigned)pathbuff_len); + error = uiomove((caddr_t)pathbuff, pathbuff_len, uio); + release_pathbuff(pathbuff); + break; + + + case FSE_ARG_STRING: + tmp16 = (int32_t)kea->len; + CHECK_UPTR(sizeof(uint16_t)); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + + CHECK_UPTR(kea->len); + error = uiomove((caddr_t)kea->data.str, kea->len, uio); + break; + + case FSE_ARG_INT32: + CHECK_UPTR(sizeof(uint16_t) + sizeof(int32_t)); + tmp16 = sizeof(int32_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.int32, sizeof(int32_t), uio); + break; + + case FSE_ARG_INT64: + printf("fs_events: 64-bit args not implemented on copyout.\n"); +// CHECK_UPTR(sizeof(uint16_t) + sizeof(int64_t)); +// tmp16 = sizeof(int64_t); +// error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); +// error = uiomove((caddr_t)&kea->data.int64, sizeof(int64_t), uio); + break; + + case FSE_ARG_RAW: + tmp16 = (uint16_t)kea->len; + CHECK_UPTR(sizeof(uint16_t)); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + + CHECK_UPTR(kea->len); + error = uiomove((caddr_t)kea->data.ptr, kea->len, uio); + break; + + case FSE_ARG_DEV: + CHECK_UPTR(sizeof(uint16_t) + sizeof(dev_t)); + tmp16 = sizeof(dev_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.dev, sizeof(dev_t), uio); + break; + + case FSE_ARG_INO: + CHECK_UPTR(sizeof(uint16_t) + sizeof(ino_t)); + tmp16 = sizeof(ino_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.ino, sizeof(ino_t), uio); + break; + + case FSE_ARG_MODE: + // XXXdbg - NOTE: we use 32-bits for the mode, not + // 16-bits like a real mode_t + CHECK_UPTR(sizeof(uint16_t) + sizeof(int32_t)); + tmp16 = sizeof(int32_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.mode, sizeof(int32_t), uio); + break; + + case FSE_ARG_UID: + CHECK_UPTR(sizeof(uint16_t) + sizeof(uid_t)); + tmp16 = sizeof(uid_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.uid, sizeof(uid_t), uio); + break; + + case FSE_ARG_GID: + CHECK_UPTR(sizeof(uint16_t) + sizeof(gid_t)); + tmp16 = sizeof(gid_t); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + error = uiomove((caddr_t)&kea->data.gid, sizeof(gid_t), uio); + break; + + default: + printf("fmod_watch: unknown arg type %d.\n", kea->type); + break; + } + } + + // make sure that we always end with a FSE_ARG_DONE + if (i >= KFS_NUM_ARGS) { + tmp16 = FSE_ARG_DONE; + CHECK_UPTR(sizeof(uint16_t)); + error = uiomove((caddr_t)&tmp16, sizeof(uint16_t), uio); + } + + + // LP64todo - fix this + last_full_event_resid = uio_resid(uio); + + watcher->rd = (watcher->rd + 1) % watcher->eventq_size; + + if (OSAddAtomic(-1, (SInt32 *)&kfse->refcount) == 1) { + do_free_event(kfse); + } + } + + get_out: + return error; +} + + +// release any references we might have on vnodes which are +// the mount point passed to us (so that it can be cleanly +// unmounted). +// +// since we don't want to lose the events we'll convert the +// vnode refs to the full path, inode #, and uid. +// +void +fsevent_unmount(struct mount *mp) +{ + int i, j; + kfs_event *kfse; + kfs_event_arg *kea; + + lck_rw_lock_exclusive(&fsevent_big_lock); + lock_fs_event_buf(); + + for(i=0; i < MAX_KFS_EVENTS; i++) { + if (fs_event_buf[i].type == FSE_INVALID) { + continue; + } + + kfse = &fs_event_buf[i]; + for(j=0; j < KFS_NUM_ARGS; j++) { + kea = &kfse->args[j]; + if (kea->type == FSE_ARG_DONE) { + break; + } + + if (kea->type == FSE_ARG_VNODE && kea->data.vp->v_mount == mp) { + struct vnode *vp; + char *pathbuff; + int pathbuff_len; + + vp = kea->data.vp; + pathbuff = get_pathbuff(); + pathbuff_len = MAXPATHLEN; + + if (vn_getpath(vp, pathbuff, &pathbuff_len) != 0 || pathbuff[0] == '\0') { + char *vname; + + vname = vnode_getname(vp); + + printf("fsevent_unmount: vn_getpath failed! vp 0x%x vname 0x%x (%s) vparent 0x%x\n", + vp, vname, vname ? vname : "", vp->v_parent); + + if (vname) + vnode_putname(vname); + } + + // switch the type of the string + kea->type = FSE_ARG_STRING; + kea->data.str = vfs_addname(pathbuff, pathbuff_len, 0, 0); + kea->len = pathbuff_len; + release_pathbuff(pathbuff); + + // and finally let go of the reference on the vnode + vnode_rele_ext(vp, O_EVTONLY, 0); + } + } + } + + unlock_fs_event_buf(); + lck_rw_done(&fsevent_big_lock); +} + + +// +// /dev/fsevents device code +// +static int fsevents_installed = 0; +static struct lock__bsd__ fsevents_lck; + +typedef struct fsevent_handle { + fs_event_watcher *watcher; + struct selinfo si; +} fsevent_handle; + + +static int +fseventsf_read(struct fileproc *fp, struct uio *uio, + __unused kauth_cred_t *cred, __unused int flags, + __unused struct proc *p) +{ + fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; + int error; + + error = fmod_watch(fseh->watcher, uio); + + return error; +} + +static int +fseventsf_write(__unused struct fileproc *fp, __unused struct uio *uio, + __unused kauth_cred_t *cred, __unused int flags, + __unused struct proc *p) +{ + return EIO; +} + + +static int +fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, struct proc *p) +{ + fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; + int ret = 0; + pid_t pid = 0; + fsevent_dev_filter_args *devfilt_args=(fsevent_dev_filter_args *)data; + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return 0; + + case FSEVENTS_DEVICE_FILTER: { + int new_num_devices; + dev_t *devices_to_watch, *tmp=NULL; + + if (devfilt_args->num_devices > 256) { + ret = EINVAL; + break; + } + + new_num_devices = devfilt_args->num_devices; + if (new_num_devices == 0) { + tmp = fseh->watcher->devices_to_watch; + + lock_watch_list(); + fseh->watcher->devices_to_watch = NULL; + fseh->watcher->num_devices = new_num_devices; + unlock_watch_list(); + + if (tmp) { + FREE(tmp, M_TEMP); + } + break; + } + + MALLOC(devices_to_watch, dev_t *, + new_num_devices * sizeof(dev_t), + M_TEMP, M_WAITOK); + if (devices_to_watch == NULL) { + ret = ENOMEM; + break; + } + + ret = copyin(CAST_USER_ADDR_T(devfilt_args->devices), + (void *)devices_to_watch, + new_num_devices * sizeof(dev_t)); + if (ret) { + FREE(devices_to_watch, M_TEMP); + break; + } + + lock_watch_list(); + fseh->watcher->num_devices = new_num_devices; + tmp = fseh->watcher->devices_to_watch; + fseh->watcher->devices_to_watch = devices_to_watch; + unlock_watch_list(); + + if (tmp) { + FREE(tmp, M_TEMP); + } + + break; + } + + default: + ret = EINVAL; + break; + } + + return (ret); +} + + +static int +fseventsf_select(struct fileproc *fp, int which, void *wql, struct proc *p) +{ + fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; + int ready = 0; + + if ((which != FREAD) || (fseh->watcher->flags & WATCHER_CLOSING)) { + return 0; + } + + + // if there's nothing in the queue, we're not ready + if (fseh->watcher->rd == fseh->watcher->wr) { + ready = 0; + } else { + ready = 1; + } + + if (!ready) { + selrecord(p, &fseh->si, wql); + } + + return ready; +} + + +static int +fseventsf_stat(struct fileproc *fp, struct stat *sb, struct proc *p) +{ + return ENOTSUP; +} + + +static int +fseventsf_close(struct fileglob *fg, struct proc *p) +{ + fsevent_handle *fseh = (struct fsevent_handle *)fg->fg_data; + + remove_watcher(fseh->watcher); + + fg->fg_data = NULL; + fseh->watcher = NULL; + FREE(fseh, M_TEMP); + + return 0; +} + +int +fseventsf_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p) +{ + // XXXdbg + return 0; +} + + +static int +fseventsf_drain(struct fileproc *fp, struct proc *p) +{ + int counter = 0; + fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; + + fseh->watcher->flags |= WATCHER_CLOSING; + + // if there are people still waiting, sleep for 10ms to + // let them clean up and get out of there. however we + // also don't want to get stuck forever so if they don't + // exit after 5 seconds we're tearing things down anyway. + while(fseh->watcher->blockers && counter++ < 500) { + // issue wakeup in case anyone is blocked waiting for an event + // do this each time we wakeup in case the blocker missed + // the wakeup due to the unprotected test of WATCHER_CLOSING + // and decision to tsleep in fmod_watch... this bit of + // latency is a decent tradeoff against not having to + // take and drop a lock in fmod_watch + wakeup((caddr_t)fseh->watcher); + + tsleep((caddr_t)fseh->watcher, PRIBIO, "watcher-close", 1); + } + + return 0; +} + + +static int +fseventsopen(dev_t dev, int flag, int mode, struct proc *p) +{ + if (!is_suser()) { + return EPERM; + } + + return 0; +} + +static int +fseventsclose(dev_t dev, int flag, int mode, struct proc *p) +{ + return 0; +} + +static int +fseventsread(dev_t dev, struct uio *uio, int ioflag) +{ + return EIO; +} + +static int +fseventswrite(dev_t dev, struct uio *uio, int ioflag) +{ + return EIO; +} + + +static struct fileops fsevents_fops = { + fseventsf_read, + fseventsf_write, + fseventsf_ioctl, + fseventsf_select, + fseventsf_close, + fseventsf_kqfilter, + fseventsf_drain +}; + + + +static int +fseventsioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct fileproc *f; + int fd, error; + fsevent_handle *fseh = NULL; + fsevent_clone_args *fse_clone_args=(fsevent_clone_args *)data; + int8_t *event_list; + + switch (cmd) { + case FSEVENTS_CLONE: + if (fse_clone_args->num_events < 0 || fse_clone_args->num_events > 4096) { + return EINVAL; + } + + MALLOC(fseh, fsevent_handle *, sizeof(fsevent_handle), + M_TEMP, M_WAITOK); + memset(fseh, 0, sizeof(fsevent_handle)); + + MALLOC(event_list, int8_t *, + fse_clone_args->num_events * sizeof(int8_t), + M_TEMP, M_WAITOK); + + error = copyin(CAST_USER_ADDR_T(fse_clone_args->event_list), + (void *)event_list, + fse_clone_args->num_events * sizeof(int8_t)); + if (error) { + FREE(event_list, M_TEMP); + FREE(fseh, M_TEMP); + return error; + } + + error = add_watcher(event_list, + fse_clone_args->num_events, + fse_clone_args->event_queue_depth, + &fseh->watcher); + if (error) { + FREE(event_list, M_TEMP); + FREE(fseh, M_TEMP); + return error; + } + + error = falloc(p, &f, &fd); + if (error) { + FREE(event_list, M_TEMP); + FREE(fseh, M_TEMP); + return (error); + } + proc_fdlock(p); + f->f_fglob->fg_flag = FREAD | FWRITE; + f->f_fglob->fg_type = DTYPE_FSEVENTS; + f->f_fglob->fg_ops = &fsevents_fops; + f->f_fglob->fg_data = (caddr_t) fseh; + proc_fdunlock(p); + copyout((void *)&fd, CAST_USER_ADDR_T(fse_clone_args->fd), sizeof(int32_t)); + proc_fdlock(p); + *fdflags(p, fd) &= ~UF_RESERVED; + fp_drop(p, fd, f, 1); + proc_fdunlock(p); + break; + + default: + error = EINVAL; + break; + } + + return error; +} + +static int +fseventsselect(dev_t dev, int rw, struct proc *p) +{ + return 0; +} + +static void +fsevents_wakeup(fsevent_handle *fseh) +{ + wakeup((caddr_t)fseh); + selwakeup(&fseh->si); +} + + +/* + * A struct describing which functions will get invoked for certain + * actions. + */ +static struct cdevsw fsevents_cdevsw = +{ + fseventsopen, /* open */ + fseventsclose, /* close */ + fseventsread, /* read */ + fseventswrite, /* write */ + fseventsioctl, /* ioctl */ + nulldev, /* stop */ + nulldev, /* reset */ + NULL, /* tty's */ + eno_select, /* select */ + eno_mmap, /* mmap */ + eno_strat, /* strategy */ + eno_getc, /* getc */ + eno_putc, /* putc */ + 0 /* type */ +}; + + +/* + * Called to initialize our device, + * and to register ourselves with devfs + */ + +void +fsevents_init(void) +{ + int ret; + + if (fsevents_installed) { + return; + } + + fsevents_installed = 1; + + lockinit(&fsevents_lck, PLOCK, "fsevents", 0, 0); + + ret = cdevsw_add(-1, &fsevents_cdevsw); + if (ret < 0) { + fsevents_installed = 0; + return; + } + + devfs_make_node(makedev (ret, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0644, "fsevents", 0); + + fsevents_internal_init(); +} + + + +// +// XXXdbg - temporary path buffer handling +// +#define NUM_PATH_BUFFS 16 +static char path_buff[NUM_PATH_BUFFS][MAXPATHLEN]; +static char path_buff_inuse[NUM_PATH_BUFFS]; + +static lck_grp_attr_t * pathbuff_group_attr; +static lck_attr_t * pathbuff_lock_attr; +static lck_grp_t * pathbuff_mutex_group; +static lck_mtx_t pathbuff_lock; + +static void +init_pathbuff(void) +{ + pathbuff_lock_attr = lck_attr_alloc_init(); + pathbuff_group_attr = lck_grp_attr_alloc_init(); + pathbuff_mutex_group = lck_grp_alloc_init("pathbuff-mutex", pathbuff_group_attr); + + lck_mtx_init(&pathbuff_lock, pathbuff_mutex_group, pathbuff_lock_attr); +} + +static void +lock_pathbuff(void) +{ + lck_mtx_lock(&pathbuff_lock); +} + +static void +unlock_pathbuff(void) +{ + lck_mtx_unlock(&pathbuff_lock); +} + + +char * +get_pathbuff(void) +{ + int i; + + lock_pathbuff(); + for(i=0; i < NUM_PATH_BUFFS; i++) { + if (path_buff_inuse[i] == 0) { + break; + } + } + + if (i >= NUM_PATH_BUFFS) { + char *path; + + unlock_pathbuff(); + MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + return path; + } + + path_buff_inuse[i] = 1; + unlock_pathbuff(); + return &path_buff[i][0]; +} + +void +release_pathbuff(char *path) +{ + int i; + + if (path == NULL) { + return; + } + + lock_pathbuff(); + for(i=0; i < NUM_PATH_BUFFS; i++) { + if (path == &path_buff[i][0]) { + path_buff[i][0] = '\0'; + path_buff_inuse[i] = 0; + unlock_pathbuff(); + return; + } + } + + unlock_pathbuff(); + + // if we get here then it wasn't one of our temp buffers + FREE_ZONE(path, MAXPATHLEN, M_NAMEI); +} + +int +get_fse_info(struct vnode *vp, fse_info *fse, vfs_context_t ctx) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + if (vnode_getattr(vp, &va, ctx) != 0) { + return -1; + } + + fse->dev = (dev_t)va.va_fsid; + fse->ino = (ino_t)va.va_fileid; + fse->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode; + fse->uid = (uid_t)va.va_uid; + fse->gid = (gid_t)va.va_gid; + + return 0; +} diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c index 5074582f7..4c4aabb22 100644 --- a/bsd/vfs/vfs_init.c +++ b/bsd/vfs/vfs_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,14 +62,13 @@ #include -#include +#include #include #include -#include +#include #include #include #include -#include #include #include @@ -106,7 +105,7 @@ int vn_default_error() { - return (EOPNOTSUPP); + return (ENOTSUP); } /* @@ -172,7 +171,7 @@ vfs_opv_init() */ if (opve_descp->opve_op->vdesc_offset == 0 && opve_descp->opve_op->vdesc_offset != - VOFFSET(vop_default)) { + VOFFSET(vnop_default)) { printf("operation %s not listed in %s.\n", opve_descp->opve_op->vdesc_name, "vfs_op_descs"); @@ -195,13 +194,13 @@ vfs_opv_init() /* * Force every operations vector to have a default routine. */ - if (opv_desc_vector[VOFFSET(vop_default)]==NULL) { + if (opv_desc_vector[VOFFSET(vnop_default)]==NULL) { panic("vfs_opv_init: operation vector without default routine."); } for (k = 0; kvfs_vfsops is encountered. */ - vattr_null(&va_null); numused_vfsslots = maxtypenum = 0; for (vfsp = vfsconf, i = 0; i < maxvfsconf; i++, vfsp++) { if (vfsp->vfc_vfsops == (struct vfsops *)0) @@ -280,14 +382,84 @@ vfsinit() if (maxtypenum <= vfsp->vfc_typenum) maxtypenum = vfsp->vfc_typenum + 1; (*vfsp->vfc_vfsops->vfs_init)(vfsp); + + lck_mtx_init(&vfsp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr); + numused_vfsslots++; } /* next vfc_typenum to be used */ maxvfsconf = maxtypenum; + + /* + * Initialize the vnop authorization scope. + */ + vnode_authorize_init(); + + /* + * create a mount point for dead vnodes + */ + MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), + M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + /* Initialize the default IO constraints */ + mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; + mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; + mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; + mp->mnt_devblocksize = DEV_BSIZE; + + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); + mp->mnt_flag = MNT_LOCAL; + mp->mnt_lflag = MNT_LDEAD; + mount_lock_init(mp); + dead_mountp = mp; +} + +void +vnode_list_lock() +{ + lck_mtx_lock(vnode_list_mtx_lock); +} + +void +vnode_list_unlock() +{ + lck_mtx_unlock(vnode_list_mtx_lock); +} + +void +mount_list_lock() +{ + lck_mtx_lock(mnt_list_mtx_lock); +} + +void +mount_list_unlock() +{ + lck_mtx_unlock(mnt_list_mtx_lock); +} + +void +mount_lock_init(mount_t mp) +{ + lck_mtx_init(&mp->mnt_mlock, mnt_lck_grp, mnt_lck_attr); + lck_mtx_init(&mp->mnt_renamelock, mnt_lck_grp, mnt_lck_attr); + lck_rw_init(&mp->mnt_rwlock, mnt_lck_grp, mnt_lck_attr); +} + +void +mount_lock_destroy(mount_t mp) +{ + lck_mtx_destroy(&mp->mnt_mlock, mnt_lck_grp); + lck_mtx_destroy(&mp->mnt_renamelock, mnt_lck_grp); + lck_rw_destroy(&mp->mnt_rwlock, mnt_lck_grp); } + /* - * Name: vfsconf_add + * Name: vfstable_add * * Description: Add a filesystem to the vfsconf list at the first * unused slot. If no slots are available, return an @@ -305,15 +477,12 @@ vfsinit() * * Warning: This code assumes that vfsconf[0] is non-empty. */ -int -vfsconf_add(struct vfsconf *nvfsp) +struct vfstable * +vfstable_add(struct vfstable *nvfsp) { int slot; - struct vfsconf *slotp; + struct vfstable *slotp; - if (nvfsp == NULL) /* overkill */ - return (-1); - /* * Find the next empty slot; we recognize an empty slot by a * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must @@ -325,7 +494,7 @@ vfsconf_add(struct vfsconf *nvfsp) } if (slot == maxvfsslots) { /* out of static slots; allocate one instead */ - MALLOC(slotp, struct vfsconf *, sizeof(struct vfsconf), + MALLOC(slotp, struct vfstable *, sizeof(struct vfstable), M_TEMP, M_WAITOK); } else { slotp = &vfsconf[slot]; @@ -338,7 +507,8 @@ vfsconf_add(struct vfsconf *nvfsp) * Note; Takes advantage of the fact that 'slot' was left * with the value of 'maxvfslots' in the allocation case. */ - bcopy(nvfsp, slotp, sizeof(struct vfsconf)); + bcopy(nvfsp, slotp, sizeof(struct vfstable)); + lck_mtx_init(&slotp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr); if (slot != 0) { slotp->vfc_next = vfsconf[slot - 1].vfc_next; vfsconf[slot - 1].vfc_next = slotp; @@ -347,22 +517,11 @@ vfsconf_add(struct vfsconf *nvfsp) } numused_vfsslots++; - /* - * Call through the ->vfs_init(); use slotp instead of nvfsp, - * so that if the FS cares where it's instance record is, it - * can find it later. - * - * XXX All code that calls ->vfs_init treats it as if it - * XXX returns a "void', and can never fail. - */ - if (nvfsp->vfc_vfsops->vfs_init) - (*nvfsp->vfc_vfsops->vfs_init)(slotp); - - return(0); + return(slotp); } /* - * Name: vfsconf_del + * Name: vfstable_del * * Description: Remove a filesystem from the vfsconf list by name. * If no such filesystem exists, return an error. @@ -375,30 +534,30 @@ vfsconf_add(struct vfsconf *nvfsp) * Notes: Hopefully all filesystems have unique names. */ int -vfsconf_del(char * fs_name) +vfstable_del(struct vfstable * vtbl) { - struct vfsconf **vcpp; - struct vfsconf *vcdelp; + struct vfstable **vcpp; + struct vfstable *vcdelp; /* - * Traverse the list looking for fs_name; if found, *vcpp + * Traverse the list looking for vtbl; if found, *vcpp * will contain the address of the pointer to the entry to * be removed. */ for( vcpp = &vfsconf; *vcpp; vcpp = &(*vcpp)->vfc_next) { - if (strcmp( (*vcpp)->vfc_name, fs_name) == 0) + if (*vcpp == vtbl) break; } - if (*vcpp == NULL) { - /* XXX need real error code for entry not found */ - return(-1); - } + if (*vcpp == NULL) + return(ESRCH); /* vtbl not on vfsconf list */ /* Unlink entry */ vcdelp = *vcpp; *vcpp = (*vcpp)->vfc_next; + lck_mtx_destroy(&vcdelp->vfc_lock, fsconf_lck_grp); + /* * Is this an entry from our static table? We find out by * seeing if the pointer to the object to be deleted places @@ -406,7 +565,7 @@ vfsconf_del(char * fs_name) */ if (vcdelp >= vfsconf && vcdelp < (vfsconf + maxvfsslots)) { /* Y */ /* Mark as empty for vfscon_add() */ - bzero(vcdelp, sizeof(struct vfsconf)); + bzero(vcdelp, sizeof(struct vfstable)); numused_vfsslots--; } else { /* N */ /* @@ -420,3 +579,16 @@ vfsconf_del(char * fs_name) return(0); } + +void +SPECHASH_LOCK(void) +{ + lck_mtx_lock(spechash_mtx_lock); +} + +void +SPECHASH_UNLOCK(void) +{ + lck_mtx_unlock(spechash_mtx_lock); +} + diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 19c28f39a..4389d214b 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1995-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,19 +34,18 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include #include #include #include -#include -#include +#include #include #include @@ -81,6 +80,13 @@ static int end_transaction(transaction *tr, int force_it); static void abort_transaction(journal *jnl, transaction *tr); static void dump_journal(journal *jnl); +static __inline__ void lock_journal(journal *jnl); +static __inline__ void unlock_journal(journal *jnl); +static __inline__ void lock_oldstart(journal *jnl); +static __inline__ void unlock_oldstart(journal *jnl); + + + // // 3105942 - Coalesce writes to the same block on journal replay @@ -178,6 +184,49 @@ calc_checksum(char *ptr, int len) return (~cksum); } +// +// Journal Locking +// +lck_grp_attr_t * jnl_group_attr; +lck_attr_t * jnl_lock_attr; +lck_grp_t * jnl_mutex_group; + +void +journal_init() +{ + jnl_lock_attr = lck_attr_alloc_init(); + jnl_group_attr = lck_grp_attr_alloc_init(); + jnl_mutex_group = lck_grp_alloc_init("jnl-mutex", jnl_group_attr); + + /* Turn on lock debugging */ + //lck_attr_setdebug(jnl_lock_attr); +} + +static __inline__ void +lock_journal(journal *jnl) +{ + lck_mtx_lock(&jnl->jlock); +} + +static __inline__ void +unlock_journal(journal *jnl) +{ + lck_mtx_unlock(&jnl->jlock); +} + +static __inline__ void +lock_oldstart(journal *jnl) +{ + lck_mtx_lock(&jnl->old_start_lock); +} + +static __inline__ void +unlock_oldstart(journal *jnl) +{ + lck_mtx_unlock(&jnl->old_start_lock); +} + + #define JNL_WRITE 0x0001 #define JNL_READ 0x0002 @@ -196,29 +245,23 @@ static size_t do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction) { int err, io_sz=0, curlen=len; - struct buf *bp; - int max_iosize=0, max_vectors; + buf_t bp; + int max_iosize = 128 * 1024; + struct vfsioattr ioattr; if (*offset < 0 || *offset > jnl->jhdr->size) { panic("jnl: do_jnl_io: bad offset 0x%llx (max 0x%llx)\n", *offset, jnl->jhdr->size); } + vfs_ioattr(vnode_mount(jnl->jdev), &ioattr); + + if (direction & JNL_WRITE) + max_iosize = ioattr.io_maxwritecnt; + else if (direction & JNL_READ) + max_iosize = ioattr.io_maxreadcnt; again: bp = alloc_io_buf(jnl->jdev, 1); - if (direction & JNL_WRITE) { - bp->b_flags |= 0; // don't have to set any flags (was: B_WRITEINPROG) - jnl->jdev->v_numoutput++; - vfs_io_attributes(jnl->jdev, B_WRITE, &max_iosize, &max_vectors); - } else if (direction & JNL_READ) { - bp->b_flags |= B_READ; - vfs_io_attributes(jnl->jdev, B_READ, &max_iosize, &max_vectors); - } - - if (max_iosize == 0) { - max_iosize = 128 * 1024; - } - if (*offset + (off_t)curlen > jnl->jhdr->size && *offset != 0 && jnl->jhdr->size != 0) { if (*offset == jnl->jhdr->size) { *offset = jnl->jhdr->jhdr_size; @@ -239,21 +282,24 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction panic("jnl: request for i/o to jnl-header without JNL_HEADER flag set! (len %d, data %p)\n", curlen, data); } - bp->b_bufsize = curlen; - bp->b_bcount = curlen; - bp->b_data = data; - bp->b_blkno = (daddr_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size); - bp->b_lblkno = (daddr_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size); - - err = VOP_STRATEGY(bp); + if (direction & JNL_READ) + buf_setflags(bp, B_READ); + else { + /* + * don't have to set any flags + */ + vnode_startwrite(jnl->jdev); + } + buf_setsize(bp, curlen); + buf_setcount(bp, curlen); + buf_setdataptr(bp, (uintptr_t)data); + buf_setblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size)); + buf_setlblkno(bp, (daddr64_t) ((jnl->jdev_offset + *offset) / (off_t)jnl->jhdr->jhdr_size)); + + err = VNOP_STRATEGY(bp); if (!err) { - err = biowait(bp); + err = (int)buf_biowait(bp); } - - bp->b_data = NULL; - bp->b_bufsize = bp->b_bcount = 0; - bp->b_blkno = bp->b_lblkno = -1; - free_io_buf(bp); if (err) { @@ -303,11 +349,14 @@ write_journal_header(journal *jnl) static int num_err_prints = 0; int ret; off_t jhdr_offset = 0; - + struct vfs_context context; + + context.vc_proc = current_proc(); + context.vc_ucred = NOCRED; // // XXXdbg note: this ioctl doesn't seem to do anything on firewire disks. // - ret = VOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NOCRED, current_proc()); + ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); if (ret != 0) { // // Only print this error if it's a different error than the @@ -345,7 +394,7 @@ write_journal_header(journal *jnl) // on an IDE bus analyzer with Larry Barras so while it // may seem obscure, it's not. // - VOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NOCRED, current_proc()); + VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); return 0; } @@ -365,12 +414,16 @@ free_old_stuff(journal *jnl) { transaction *tr, *next; - for(tr=jnl->tr_freeme; tr; tr=next) { - next = tr->next; - FREE_ZONE(tr, sizeof(transaction), M_JNL_TR); + lock_oldstart(jnl); + tr = jnl->tr_freeme; + jnl->tr_freeme = NULL; + unlock_oldstart(jnl); + + for(; tr; tr=next) { + next = tr->next; + FREE_ZONE(tr, sizeof(transaction), M_JNL_TR); } - jnl->tr_freeme = NULL; } @@ -382,7 +435,7 @@ free_old_stuff(journal *jnl) // not initiate any new i/o's or allocate/free memory. // static void -buffer_flushed_callback(struct buf *bp) +buffer_flushed_callback(struct buf *bp, void *arg) { transaction *tr; journal *jnl; @@ -390,29 +443,12 @@ buffer_flushed_callback(struct buf *bp) int i, bufsize; - //printf("jnl: buf flush: bp @ 0x%x l/blkno %d/%d vp 0x%x tr @ 0x%x\n", - // bp, bp->b_lblkno, bp->b_blkno, bp->b_vp, bp->b_transaction); + //printf("jnl: buf flush: bp @ 0x%x l/blkno %qd/%qd vp 0x%x tr @ 0x%x\n", + // bp, buf_lblkno(bp), buf_blkno(bp), buf_vnode(bp), arg); // snarf out the bits we want - bufsize = bp->b_bufsize; - tr = bp->b_transaction; - - bp->b_iodone = NULL; // don't call us for this guy again - bp->b_transaction = NULL; - - // - // This is what biodone() would do if it didn't call us. - // NOTE: THIS CODE *HAS* TO BE HERE! - // - if (ISSET(bp->b_flags, B_ASYNC)) { /* if async, release it */ - brelse(bp); - } else { /* or just wakeup the buffer */ - CLR(bp->b_flags, B_WANTED); - wakeup(bp); - } - - // NOTE: from here on out we do *NOT* touch bp anymore. - + bufsize = buf_size(bp); + tr = (transaction *)arg; // then we've already seen it if (tr == NULL) { @@ -431,7 +467,7 @@ buffer_flushed_callback(struct buf *bp) // update the number of blocks that have been flushed. // this buf may represent more than one block so take // that into account. - tr->num_flushed += bufsize; + OSAddAtomic(bufsize, &tr->num_flushed); // if this transaction isn't done yet, just return as @@ -440,11 +476,23 @@ buffer_flushed_callback(struct buf *bp) return; } + // this will single thread checking the transaction + lock_oldstart(jnl); + + if (tr->total_bytes == 0xfbadc0de) { + // then someone beat us to it... + unlock_oldstart(jnl); + return; + } + + // mark this so that we're the owner of dealing with the + // cleanup for this transaction + tr->total_bytes = 0xfbadc0de; + //printf("jnl: tr 0x%x (0x%llx 0x%llx) in jnl 0x%x completed.\n", // tr, tr->journal_start, tr->journal_end, jnl); // find this entry in the old_start[] index and mark it completed - simple_lock(&jnl->old_start_lock); for(i=0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) { if ((jnl->old_start[i] & ~(0x8000000000000000LL)) == tr->journal_start) { @@ -456,7 +504,7 @@ buffer_flushed_callback(struct buf *bp) panic("jnl: buffer_flushed: did not find tr w/start @ %lld (tr 0x%x, jnl 0x%x)\n", tr->journal_start, tr, jnl); } - simple_unlock(&jnl->old_start_lock); + unlock_oldstart(jnl); // if we are here then we need to update the journal header @@ -478,10 +526,12 @@ buffer_flushed_callback(struct buf *bp) jnl->completed_trs = ctr->next; } + lock_oldstart(jnl); next = jnl->completed_trs; // this starts us over again ctr->next = jnl->tr_freeme; jnl->tr_freeme = ctr; ctr = NULL; + unlock_oldstart(jnl); } else if (tr->journal_end == ctr->journal_start) { ctr->journal_start = tr->journal_start; next = jnl->completed_trs; // this starts us over again @@ -496,9 +546,6 @@ buffer_flushed_callback(struct buf *bp) } } - // at this point no one should be using this guy anymore - tr->total_bytes = 0xfbadc0de; - // if this is true then we didn't merge with anyone // so link ourselves in at the head of the completed // transaction list. @@ -525,8 +572,10 @@ buffer_flushed_callback(struct buf *bp) } else { // if we're here this tr got merged with someone else so // put it on the list to be free'd + lock_oldstart(jnl); tr->next = jnl->tr_freeme; jnl->tr_freeme = tr; + unlock_oldstart(jnl); } } @@ -578,47 +627,47 @@ swap_block_list_header(journal *jnl, block_list_header *blhdr) static int update_fs_block(journal *jnl, void *block_ptr, off_t fs_block, size_t bsize) { - int ret; + int ret; struct buf *oblock_bp=NULL; // first read the block we want. - ret = meta_bread(jnl->fsdev, (daddr_t)fs_block, bsize, NOCRED, &oblock_bp); + ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp); if (ret != 0) { printf("jnl: update_fs_block: error reading fs block # %lld! (ret %d)\n", fs_block, ret); if (oblock_bp) { - brelse(oblock_bp); + buf_brelse(oblock_bp); oblock_bp = NULL; } // let's try to be aggressive here and just re-write the block - oblock_bp = getblk(jnl->fsdev, (daddr_t)fs_block, bsize, 0, 0, BLK_META); + oblock_bp = buf_getblk(jnl->fsdev, (daddr64_t)fs_block, bsize, 0, 0, BLK_META); if (oblock_bp == NULL) { - printf("jnl: update_fs_block: getblk() for %lld failed! failing update.\n", fs_block); + printf("jnl: update_fs_block: buf_getblk() for %lld failed! failing update.\n", fs_block); return -1; } } // make sure it's the correct size. - if (oblock_bp->b_bufsize != bsize) { - brelse(oblock_bp); + if (buf_size(oblock_bp) != bsize) { + buf_brelse(oblock_bp); return -1; } // copy the journal data over top of it - memcpy(oblock_bp->b_data, block_ptr, bsize); + memcpy((void *)buf_dataptr(oblock_bp), block_ptr, bsize); - if ((ret = VOP_BWRITE(oblock_bp)) != 0) { + if ((ret = VNOP_BWRITE(oblock_bp)) != 0) { printf("jnl: update_fs_block: failed to update block %lld (ret %d)\n", fs_block,ret); return ret; } // and now invalidate it so that if someone else wants to read // it in a different size they'll be able to do it. - ret = meta_bread(jnl->fsdev, (daddr_t)fs_block, bsize, NOCRED, &oblock_bp); + ret = buf_meta_bread(jnl->fsdev, (daddr64_t)fs_block, bsize, NOCRED, &oblock_bp); if (oblock_bp) { - oblock_bp->b_flags |= B_INVAL; - brelse(oblock_bp); + buf_markinvalid(oblock_bp); + buf_brelse(oblock_bp); } return 0; @@ -781,7 +830,8 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num if (prev_block_end > block_end) { off_t new_num = block_end / jhdr_size; size_t new_size = prev_block_end - block_end; - size_t new_offset = (*buf_ptr)[blk_index-1].jnl_offset + (block_end - prev_block_start); + + new_offset = (*buf_ptr)[blk_index-1].jnl_offset + (block_end - prev_block_start); err = insert_block(jnl, buf_ptr, blk_index, new_num, new_size, new_offset, num_buckets_ptr, num_full_ptr, 0); if (err < 0) { @@ -815,7 +865,7 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num overlap = block_end - (*buf_ptr)[index].block_num*jhdr_size; if (overlap > 0) { if (overlap % jhdr_size != 0) { - panic("jnl: do_overlap: overlap of %d is not multiple of %d\n", overlap, jhdr_size); + panic("jnl: do_overlap: overlap of %lld is not multiple of %d\n", overlap, jhdr_size); } // if we partially overlap this entry, adjust its block number, jnl offset, and size @@ -873,7 +923,6 @@ static int add_block(journal *jnl, struct bucket **buf_ptr, off_t block_num, size_t size, size_t offset, int *num_buckets_ptr, int *num_full_ptr) { int blk_index, overwriting; - size_t jhdr_size = jnl->jhdr->jhdr_size; // on return from lookup_bucket(), blk_index is the index into the table where block_num should be // inserted (or the index of the elem to overwrite). @@ -902,10 +951,9 @@ static int replay_journal(journal *jnl) { int i, ret, orig_checksum, checksum, max_bsize; - struct buf *oblock_bp; block_list_header *blhdr; off_t offset; - char *buf, *block_ptr=NULL; + char *buff, *block_ptr=NULL; struct bucket *co_buf; int num_buckets = STARTING_BUCKETS, num_full; @@ -922,7 +970,7 @@ replay_journal(journal *jnl) } // allocate memory for the header_block. we'll read each blhdr into this - if (kmem_alloc(kernel_map, (vm_offset_t *)&buf, jnl->jhdr->blhdr_size)) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, jnl->jhdr->blhdr_size)) { printf("jnl: replay_journal: no memory for block buffer! (%d bytes)\n", jnl->jhdr->blhdr_size); return -1; @@ -946,13 +994,13 @@ replay_journal(journal *jnl) while(jnl->jhdr->start != jnl->jhdr->end) { offset = jnl->jhdr->start; - ret = read_journal_data(jnl, &offset, buf, jnl->jhdr->blhdr_size); + ret = read_journal_data(jnl, &offset, buff, jnl->jhdr->blhdr_size); if (ret != jnl->jhdr->blhdr_size) { printf("jnl: replay_journal: Could not read block list header block @ 0x%llx!\n", offset); goto bad_replay; } - blhdr = (block_list_header *)buf; + blhdr = (block_list_header *)buff; orig_checksum = blhdr->checksum; blhdr->checksum = 0; @@ -966,15 +1014,15 @@ replay_journal(journal *jnl) checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE); } if (checksum != orig_checksum) { - printf("jnl: replay_journal: bad block list header @ 0x%llx (checksum 0x%x != 0x%x)\n", - offset, orig_checksum, checksum); - goto bad_replay; - } + printf("jnl: replay_journal: bad block list header @ 0x%llx (checksum 0x%x != 0x%x)\n", + offset, orig_checksum, checksum); + goto bad_replay; + } if ( blhdr->max_blocks <= 0 || blhdr->max_blocks > 2048 || blhdr->num_blocks <= 0 || blhdr->num_blocks > blhdr->max_blocks) { - printf("jnl: replay_journal: bad looking journal entry: max: %d num: %d\n", - blhdr->max_blocks, blhdr->num_blocks); - goto bad_replay; + printf("jnl: replay_journal: bad looking journal entry: max: %d num: %d\n", + blhdr->max_blocks, blhdr->num_blocks); + goto bad_replay; } for(i=1; i < blhdr->num_blocks; i++) { @@ -1094,7 +1142,7 @@ replay_journal(journal *jnl) FREE(co_buf, M_TEMP); co_buf = NULL; - kmem_free(kernel_map, (vm_offset_t)buf, jnl->jhdr->blhdr_size); + kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size); return 0; bad_replay: @@ -1104,7 +1152,7 @@ replay_journal(journal *jnl) if (co_buf) { FREE(co_buf, M_TEMP); } - kmem_free(kernel_map, (vm_offset_t)buf, jnl->jhdr->blhdr_size); + kmem_free(kernel_map, (vm_offset_t)buff, jnl->jhdr->blhdr_size); return -1; } @@ -1190,10 +1238,14 @@ journal_create(struct vnode *jvp, void *arg) { journal *jnl; - int ret, phys_blksz; + int phys_blksz; + struct vfs_context context; + + context.vc_proc = current_proc(); + context.vc_ucred = FSCRED; /* Get the real physical block size. */ - if (VOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, FSCRED, NULL)) { + if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) { return NULL; } @@ -1218,7 +1270,7 @@ journal_create(struct vnode *jvp, jnl->flush = flush; jnl->flush_arg = arg; jnl->flags = (flags & JOURNAL_OPTION_FLAGS_MASK); - simple_lock_init(&jnl->old_start_lock); + lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr); if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { printf("jnl: create: could not allocate space for header buffer (%d bytes)\n", phys_blksz); @@ -1242,7 +1294,7 @@ journal_create(struct vnode *jvp, // jnl->jhdr->start = jnl->jhdr->size - (phys_blksz*3); // jnl->jhdr->end = jnl->jhdr->size - (phys_blksz*3); - lockinit(&jnl->jlock, PINOD, "journal", 0, 0); + lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr); if (write_journal_header(jnl) != 0) { printf("jnl: journal_create: failed to write journal header.\n"); @@ -1273,11 +1325,15 @@ journal_open(struct vnode *jvp, void *arg) { journal *jnl; - int orig_blksz=0, phys_blksz, blhdr_size; + int orig_blksz=0, phys_blksz; int orig_checksum, checksum; + struct vfs_context context; + + context.vc_proc = current_proc(); + context.vc_ucred = FSCRED; /* Get the real physical block size. */ - if (VOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, FSCRED, NULL)) { + if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) { return NULL; } @@ -1302,7 +1358,7 @@ journal_open(struct vnode *jvp, jnl->flush = flush; jnl->flush_arg = arg; jnl->flags = (flags & JOURNAL_OPTION_FLAGS_MASK); - simple_lock_init(&jnl->old_start_lock); + lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr); if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { printf("jnl: create: could not allocate space for header buffer (%d bytes)\n", phys_blksz); @@ -1362,7 +1418,7 @@ journal_open(struct vnode *jvp, orig_blksz = phys_blksz; phys_blksz = jnl->jhdr->jhdr_size; - if (VOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, FSCRED, NULL)) { + if (VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context)) { printf("jnl: could not set block size to %d bytes.\n", phys_blksz); goto bad_journal; } @@ -1420,7 +1476,7 @@ journal_open(struct vnode *jvp, } if (orig_blksz != 0) { - VOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, FSCRED, NULL); + VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context); phys_blksz = orig_blksz; if (orig_blksz < jnl->jhdr->jhdr_size) { printf("jnl: open: jhdr_size is %d but orig phys blk size is %d. switching.\n", @@ -1436,14 +1492,14 @@ journal_open(struct vnode *jvp, // set this now, after we've replayed the journal size_up_tbuffer(jnl, tbuffer_size, phys_blksz); - lockinit(&jnl->jlock, PINOD, "journal", 0, 0); + lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr); return jnl; bad_journal: if (orig_blksz != 0) { phys_blksz = orig_blksz; - VOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, FSCRED, NULL); + VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context); } kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz); bad_kmem_alloc: @@ -1464,14 +1520,8 @@ journal_close(journal *jnl) // jnl->flags |= JOURNAL_CLOSE_PENDING; - if (jnl->owner != current_act()) { - int ret; - - ret = lockmgr(&jnl->jlock, LK_EXCLUSIVE|LK_RETRY, NULL, current_proc()); - if (ret != 0) { - printf("jnl: close: locking the journal (0x%x) failed %d.\n", jnl, ret); - return; - } + if (jnl->owner != current_thread()) { + lock_journal(jnl); } // @@ -1619,7 +1669,7 @@ check_free_space(journal *jnl, int desired_size) // entries until there is enough space for the next transaction. // old_start_empty = 1; - simple_lock(&jnl->old_start_lock); + lock_oldstart(jnl); for(i=0; i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0]); i++) { int counter; @@ -1630,12 +1680,12 @@ check_free_space(journal *jnl, int desired_size) jnl->old_start[i], jnl); } - simple_unlock(&jnl->old_start_lock); + unlock_oldstart(jnl); if (jnl->flush) { jnl->flush(jnl->flush_arg); } tsleep((caddr_t)jnl, PRIBIO, "check_free_space1", 1); - simple_lock(&jnl->old_start_lock); + lock_oldstart(jnl); } if (jnl->old_start[i] == 0) { @@ -1646,11 +1696,13 @@ check_free_space(journal *jnl, int desired_size) jnl->jhdr->start = jnl->old_start[i]; jnl->old_start[i] = 0; if (free_space(jnl) > desired_size) { + unlock_oldstart(jnl); write_journal_header(jnl); + lock_oldstart(jnl); break; } } - simple_unlock(&jnl->old_start_lock); + unlock_oldstart(jnl); // if we bumped the start, loop and try again if (i < sizeof(jnl->old_start)/sizeof(jnl->old_start[0])) { @@ -1691,7 +1743,6 @@ journal_start_transaction(journal *jnl) { int ret; transaction *tr; - int prev_priv; CHECK_JOURNAL(jnl); @@ -1699,27 +1750,23 @@ journal_start_transaction(journal *jnl) return EINVAL; } - if (jnl->owner == current_act()) { + if (jnl->owner == current_thread()) { if (jnl->active_tr == NULL) { - panic("jnl: start_tr: active_tr is NULL (jnl @ 0x%x, owner 0x%x, current_act 0x%x\n", - jnl, jnl->owner, current_act()); + panic("jnl: start_tr: active_tr is NULL (jnl @ 0x%x, owner 0x%x, current_thread 0x%x\n", + jnl, jnl->owner, current_thread()); } jnl->nested_count++; return 0; } - ret = lockmgr(&jnl->jlock, LK_EXCLUSIVE|LK_RETRY, NULL, current_proc()); - if (ret != 0) { - printf("jnl: start_tr: locking the journal (0x%x) failed %d.\n", jnl, ret); - return EINVAL; - } + lock_journal(jnl); if (jnl->owner != NULL || jnl->nested_count != 0 || jnl->active_tr != NULL) { panic("jnl: start_tr: owner 0x%x, nested count 0x%x, active_tr 0x%x jnl @ 0x%x\n", jnl->owner, jnl->nested_count, jnl->active_tr, jnl); } - jnl->owner = current_act(); + jnl->owner = current_thread(); jnl->nested_count = 1; free_old_stuff(jnl); @@ -1743,15 +1790,13 @@ journal_start_transaction(journal *jnl) memset(tr, 0, sizeof(transaction)); tr->tbuffer_size = jnl->tbuffer_size; - thread_wire_internal(host_priv_self(), current_act(), TRUE, &prev_priv); + if (kmem_alloc(kernel_map, (vm_offset_t *)&tr->tbuffer, tr->tbuffer_size)) { FREE_ZONE(tr, sizeof(transaction), M_JNL_TR); printf("jnl: start transaction failed: no tbuffer mem\n"); ret = ENOMEM; - thread_wire_internal(host_priv_self(), current_act(), prev_priv, NULL); goto bad_start; } - thread_wire_internal(host_priv_self(), current_act(), prev_priv, NULL); // journal replay code checksum check depends on this. memset(tr->tbuffer, 0, BLHDR_CHECKSUM_SIZE); @@ -1774,7 +1819,7 @@ journal_start_transaction(journal *jnl) bad_start: jnl->owner = NULL; jnl->nested_count = 0; - lockmgr(&jnl->jlock, LK_RELEASE, NULL, current_proc()); + unlock_journal(jnl); return ret; } @@ -1792,35 +1837,35 @@ journal_modify_block_start(journal *jnl, struct buf *bp) // XXXdbg - for debugging I want this to be true. later it may // not be necessary. - if ((bp->b_flags & B_META) == 0) { + if ((buf_flags(bp) & B_META) == 0) { panic("jnl: modify_block_start: bp @ 0x%x is not a meta-data block! (jnl 0x%x)\n", bp, jnl); } tr = jnl->active_tr; CHECK_TRANSACTION(tr); - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { panic("jnl: modify_block_start: called w/out a transaction! jnl 0x%x, owner 0x%x, curact 0x%x\n", - jnl, jnl->owner, current_act()); + jnl, jnl->owner, current_thread()); } free_old_stuff(jnl); - //printf("jnl: mod block start (bp 0x%x vp 0x%x l/blkno %d/%d bsz %d; total bytes %d)\n", - // bp, bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_bufsize, tr->total_bytes); + //printf("jnl: mod block start (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d; total bytes %d)\n", + // bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes); // can't allow blocks that aren't an even multiple of the // underlying block size. - if ((bp->b_bufsize % jnl->jhdr->jhdr_size) != 0) { + if ((buf_size(bp) % jnl->jhdr->jhdr_size) != 0) { panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n", - bp->b_bufsize, jnl->jhdr->jhdr_size); + buf_size(bp), jnl->jhdr->jhdr_size); return -1; } // make sure that this transaction isn't bigger than the whole journal - if (tr->total_bytes+bp->b_bufsize >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) { + if (tr->total_bytes+buf_size(bp) >= (jnl->jhdr->size - jnl->jhdr->jhdr_size)) { panic("jnl: transaction too big (%d >= %lld bytes, bufsize %d, tr 0x%x bp 0x%x)\n", - tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), bp->b_bufsize, tr, bp); + tr->total_bytes, (tr->jnl->jhdr->size - jnl->jhdr->jhdr_size), buf_size(bp), tr, bp); return -1; } @@ -1828,14 +1873,17 @@ journal_modify_block_start(journal *jnl, struct buf *bp) // it out before we muck with it because it has data that belongs // (presumably) to another transaction. // - if ((bp->b_flags & B_DELWRI) && (bp->b_flags & B_LOCKED) == 0) { + if ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI) { - // this will cause it to not be brelse()'d - bp->b_flags |= B_NORELSE; - VOP_BWRITE(bp); - } + if (buf_flags(bp) & B_ASYNC) { + panic("modify_block_start: bp @ 0x% has async flag set!\n", bp); + } - bp->b_flags |= B_LOCKED; + // this will cause it to not be buf_brelse()'d + buf_setflags(bp, B_NORELSE); + VNOP_BWRITE(bp); + } + buf_setflags(bp, B_LOCKED); return 0; } @@ -1853,11 +1901,11 @@ journal_modify_block_abort(journal *jnl, struct buf *bp) // // if there's no active transaction then we just want to - // call brelse() and return since this is just a block + // call buf_brelse() and return since this is just a block // that happened to be modified as part of another tr. // if (tr == NULL) { - brelse(bp); + buf_brelse(bp); return 0; } @@ -1867,9 +1915,9 @@ journal_modify_block_abort(journal *jnl, struct buf *bp) CHECK_TRANSACTION(tr); - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { panic("jnl: modify_block_abort: called w/out a transaction! jnl 0x%x, owner 0x%x, curact 0x%x\n", - jnl, jnl->owner, current_act()); + jnl, jnl->owner, current_thread()); } free_old_stuff(jnl); @@ -1880,9 +1928,9 @@ journal_modify_block_abort(journal *jnl, struct buf *bp) for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) { for(i=1; i < blhdr->num_blocks; i++) { if (bp == blhdr->binfo[i].bp) { - if (bp->b_bufsize != blhdr->binfo[i].bsize) { + if (buf_size(bp) != blhdr->binfo[i].bsize) { panic("jnl: bp @ 0x%x changed size on me! (%d vs. %d, jnl 0x%x)\n", - bp, bp->b_bufsize, blhdr->binfo[i].bsize, jnl); + bp, buf_size(bp), blhdr->binfo[i].bsize, jnl); } break; } @@ -1901,10 +1949,10 @@ journal_modify_block_abort(journal *jnl, struct buf *bp) // on it and so we need to keep it locked in memory. // if (blhdr == NULL) { - bp->b_flags &= ~(B_LOCKED); + buf_clearflags(bp, B_LOCKED); } - brelse(bp); + buf_brelse(bp); return 0; } @@ -1926,19 +1974,18 @@ journal_modify_block_end(journal *jnl, struct buf *bp) tr = jnl->active_tr; CHECK_TRANSACTION(tr); - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { panic("jnl: modify_block_end: called w/out a transaction! jnl 0x%x, owner 0x%x, curact 0x%x\n", - jnl, jnl->owner, current_act()); + jnl, jnl->owner, current_thread()); } free_old_stuff(jnl); - //printf("jnl: mod block end: (bp 0x%x vp 0x%x l/blkno %d/%d bsz %d, total bytes %d)\n", - // bp, bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_bufsize, tr->total_bytes); + //printf("jnl: mod block end: (bp 0x%x vp 0x%x l/blkno %qd/%qd bsz %d, total bytes %d)\n", + // bp, buf_vnode(bp), buf_lblkno(bp), buf_blkno(bp), buf_size(bp), tr->total_bytes); - if ((bp->b_flags & B_LOCKED) == 0) { + if ((buf_flags(bp) & B_LOCKED) == 0) { panic("jnl: modify_block_end: bp 0x%x not locked! jnl @ 0x%x\n", bp, jnl); - bp->b_flags |= B_LOCKED; } // first check if it's already part of this transaction @@ -1947,9 +1994,9 @@ journal_modify_block_end(journal *jnl, struct buf *bp) for(i=1; i < blhdr->num_blocks; i++) { if (bp == blhdr->binfo[i].bp) { - if (bp->b_bufsize != blhdr->binfo[i].bsize) { + if (buf_size(bp) != blhdr->binfo[i].bsize) { panic("jnl: bp @ 0x%x changed size on me! (%d vs. %d, jnl 0x%x)\n", - bp, bp->b_bufsize, blhdr->binfo[i].bsize, jnl); + bp, buf_size(bp), blhdr->binfo[i].bsize, jnl); } break; } @@ -1964,11 +2011,10 @@ journal_modify_block_end(journal *jnl, struct buf *bp) if (blhdr == NULL && prev && (prev->num_blocks+1) <= prev->max_blocks - && (prev->bytes_used+bp->b_bufsize) <= tr->tbuffer_size) { + && (prev->bytes_used+buf_size(bp)) <= tr->tbuffer_size) { blhdr = prev; } else if (blhdr == NULL) { block_list_header *nblhdr; - int prev_priv; if (prev == NULL) { panic("jnl: modify block end: no way man, prev == NULL?!?, jnl 0x%x, bp 0x%x\n", jnl, bp); @@ -1981,12 +2027,10 @@ journal_modify_block_end(journal *jnl, struct buf *bp) // through prev->binfo[0].bnum. that's a skanky way to do things but // avoids having yet another linked list of small data structures to manage. - thread_wire_internal(host_priv_self(), current_act(), TRUE, &prev_priv); if (kmem_alloc(kernel_map, (vm_offset_t *)&nblhdr, tr->tbuffer_size)) { panic("jnl: end_tr: no space for new block tr @ 0x%x (total bytes: %d)!\n", tr, tr->total_bytes); } - thread_wire_internal(host_priv_self(), current_act(), prev_priv, NULL); // journal replay code checksum check depends on this. memset(nblhdr, 0, BLHDR_CHECKSUM_SIZE); @@ -2015,23 +2059,27 @@ journal_modify_block_end(journal *jnl, struct buf *bp) // copy the data into the in-memory transaction buffer blkptr = (char *)&((char *)blhdr)[tbuffer_offset]; - memcpy(blkptr, bp->b_data, bp->b_bufsize); + memcpy(blkptr, buf_dataptr(bp), buf_size(bp)); // if this is true then this is a new block we haven't seen if (i >= blhdr->num_blocks) { - vget(bp->b_vp, 0, current_proc()); + int bsize; + vnode_t vp; + + vp = buf_vnode(bp); + vnode_ref(vp); + bsize = buf_size(bp); - blhdr->binfo[i].bnum = (off_t)((unsigned)bp->b_blkno); - blhdr->binfo[i].bsize = bp->b_bufsize; + blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); + blhdr->binfo[i].bsize = bsize; blhdr->binfo[i].bp = bp; - blhdr->bytes_used += bp->b_bufsize; - tr->total_bytes += bp->b_bufsize; + blhdr->bytes_used += bsize; + tr->total_bytes += bsize; blhdr->num_blocks++; } - - bdwrite(bp); + buf_bdwrite(bp); return 0; } @@ -2040,6 +2088,7 @@ int journal_kill_block(journal *jnl, struct buf *bp) { int i; + int bflags; block_list_header *blhdr; transaction *tr; @@ -2052,44 +2101,49 @@ journal_kill_block(journal *jnl, struct buf *bp) tr = jnl->active_tr; CHECK_TRANSACTION(tr); - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { panic("jnl: modify_block_end: called w/out a transaction! jnl 0x%x, owner 0x%x, curact 0x%x\n", - jnl, jnl->owner, current_act()); + jnl, jnl->owner, current_thread()); } free_old_stuff(jnl); - if ((bp->b_flags & B_LOCKED) == 0) { - panic("jnl: kill block: bp 0x%x not locked! jnl @ 0x%x\n", bp, jnl); - } + bflags = buf_flags(bp); + + if ( !(bflags & B_LOCKED)) + panic("jnl: modify_block_end: called with bp not B_LOCKED"); + /* + * bp must be BL_BUSY and B_LOCKED + */ // first check if it's already part of this transaction for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) { for(i=1; i < blhdr->num_blocks; i++) { if (bp == blhdr->binfo[i].bp) { - bp->b_flags &= ~B_LOCKED; + vnode_t vp; - // this undoes the vget() in journal_modify_block_end() - vrele(bp->b_vp); + buf_clearflags(bp, B_LOCKED); - // if the block has the DELWRI and CALL bits sets, then + // this undoes the vnode_ref() in journal_modify_block_end() + vp = buf_vnode(bp); + vnode_rele_ext(vp, 0, 1); + + // if the block has the DELWRI and FILTER bits sets, then // things are seriously weird. if it was part of another // transaction then journal_modify_block_start() should // have force it to be written. // - if ((bp->b_flags & B_DELWRI) && (bp->b_flags & B_CALL)) { - panic("jnl: kill block: this defies all logic! bp 0x%x\n", bp); - } else { - tr->num_killed += bp->b_bufsize; - } - - if (bp->b_flags & B_BUSY) { - brelse(bp); - } - + //if ((bflags & B_DELWRI) && (bflags & B_FILTER)) { + // panic("jnl: kill block: this defies all logic! bp 0x%x\n", bp); + //} else { + tr->num_killed += buf_size(bp); + //} blhdr->binfo[i].bp = NULL; blhdr->binfo[i].bnum = (off_t)-1; + + buf_brelse(bp); + break; } } @@ -2106,9 +2160,9 @@ journal_kill_block(journal *jnl, struct buf *bp) static int journal_binfo_cmp(void *a, void *b) { - block_info *bi_a = (struct block_info *)a, - *bi_b = (struct block_info *)b; - daddr_t res; + block_info *bi_a = (struct block_info *)a; + block_info *bi_b = (struct block_info *)b; + daddr64_t res; if (bi_a->bp == NULL) { return 1; @@ -2120,7 +2174,7 @@ journal_binfo_cmp(void *a, void *b) // don't have to worry about negative block // numbers so this is ok to do. // - res = (bi_a->bp->b_blkno - bi_b->bp->b_blkno); + res = (buf_blkno(bi_a->bp) - buf_blkno(bi_b->bp)); return (int)res; } @@ -2130,6 +2184,7 @@ static int end_transaction(transaction *tr, int force_it) { int i, j, ret, amt; + errno_t errno; off_t end; journal *jnl = tr->jnl; struct buf *bp; @@ -2144,7 +2199,7 @@ end_transaction(transaction *tr, int force_it) // just save off the transaction pointer and return. if (tr->total_bytes == jnl->jhdr->blhdr_size) { jnl->cur_tr = tr; - return; + return 0; } // if our transaction buffer isn't very full, just hang @@ -2159,7 +2214,7 @@ end_transaction(transaction *tr, int force_it) && (tr->total_bytes <= ((tr->tbuffer_size*tr->num_blhdrs) - tr->tbuffer_size/8))) { jnl->cur_tr = tr; - return; + return 0; } @@ -2182,10 +2237,10 @@ end_transaction(transaction *tr, int force_it) // file system flush routine until it is (or we panic). // i = 0; - simple_lock(&jnl->old_start_lock); + lock_oldstart(jnl); while ((jnl->old_start[0] & 0x8000000000000000LL) != 0) { if (jnl->flush) { - simple_unlock(&jnl->old_start_lock); + unlock_oldstart(jnl); if (jnl->flush) { jnl->flush(jnl->flush_arg); @@ -2194,9 +2249,9 @@ end_transaction(transaction *tr, int force_it) // yield the cpu so others can get in to clear the lock bit (void)tsleep((void *)jnl, PRIBIO, "jnl-old-start-sleep", 1); - simple_lock(&jnl->old_start_lock); + lock_oldstart(jnl); } - if (i++ >= 100) { + if (i++ >= 500) { panic("jnl: transaction that started at 0x%llx is not completing! jnl 0x%x\n", jnl->old_start[0] & (~0x8000000000000000LL), jnl); } @@ -2209,14 +2264,17 @@ end_transaction(transaction *tr, int force_it) memcpy(&jnl->old_start[0], &jnl->old_start[1], sizeof(jnl->old_start)-sizeof(jnl->old_start[0])); jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] = tr->journal_start | 0x8000000000000000LL; - simple_unlock(&jnl->old_start_lock); + unlock_oldstart(jnl); // for each block, make sure that the physical block # is set for(blhdr=tr->blhdr; blhdr; blhdr=next) { for(i=1; i < blhdr->num_blocks; i++) { - + daddr64_t blkno; + daddr64_t lblkno; + struct vnode *vp; + bp = blhdr->binfo[i].bp; if (bp == NULL) { // only true if a block was "killed" if (blhdr->binfo[i].bnum != (off_t)-1) { @@ -2225,25 +2283,40 @@ end_transaction(transaction *tr, int force_it) } continue; } - - if (bp->b_vp == NULL && bp->b_lblkno == bp->b_blkno) { - panic("jnl: end_tr: DANGER! bp @ 0x%x w/null vp and l/blkno = %d/%d\n", - bp, bp->b_lblkno, bp->b_blkno); + vp = buf_vnode(bp); + blkno = buf_blkno(bp); + lblkno = buf_lblkno(bp); + + if (vp == NULL && lblkno == blkno) { + printf("jnl: end_tr: bad news! bp @ 0x%x w/null vp and l/blkno = %qd/%qd. aborting the transaction (tr 0x%x jnl 0x%x).\n", + bp, lblkno, blkno, tr, jnl); + goto bad_journal; } // if the lblkno is the same as blkno and this bp isn't // associated with the underlying file system device then // we need to call bmap() to get the actual physical block. // - if ((bp->b_lblkno == bp->b_blkno) && (bp->b_vp != jnl->fsdev)) { - if (VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL) != 0) { - printf("jnl: end_tr: can't bmap the bp @ 0x%x, jnl 0x%x\n", bp, jnl); + if ((lblkno == blkno) && (vp != jnl->fsdev)) { + off_t f_offset; + size_t contig_bytes; + + if (VNOP_BLKTOOFF(vp, lblkno, &f_offset)) { + printf("jnl: end_tr: vnop_blktooff failed @ 0x%x, jnl 0x%x\n", bp, jnl); + goto bad_journal; + } + if (VNOP_BLOCKMAP(vp, f_offset, buf_count(bp), &blkno, &contig_bytes, NULL, 0, NULL)) { + printf("jnl: end_tr: can't blockmap the bp @ 0x%x, jnl 0x%x\n", bp, jnl); goto bad_journal; } + if ((uint32_t)contig_bytes < buf_count(bp)) { + printf("jnl: end_tr: blk not physically contiguous on disk@ 0x%x, jnl 0x%x\n", bp, jnl); + goto bad_journal; + } + buf_setblkno(bp, blkno); } - // update this so we write out the correct physical block number! - blhdr->binfo[i].bnum = (off_t)((unsigned)bp->b_blkno); + blhdr->binfo[i].bnum = (off_t)(blkno); } next = (block_list_header *)((long)blhdr->binfo[0].bnum); @@ -2301,53 +2374,52 @@ end_transaction(transaction *tr, int force_it) continue; } - ret = meta_bread(blhdr->binfo[i].bp->b_vp, - (daddr_t)blhdr->binfo[i].bp->b_lblkno, - blhdr->binfo[i].bp->b_bufsize, + errno = buf_meta_bread(buf_vnode(blhdr->binfo[i].bp), + buf_lblkno(blhdr->binfo[i].bp), + buf_size(blhdr->binfo[i].bp), NOCRED, &bp); - if (ret == 0 && bp != NULL) { + if (errno == 0 && bp != NULL) { struct vnode *save_vp; - + void *cur_filter; + if (bp != blhdr->binfo[i].bp) { panic("jnl: end_tr: got back a different bp! (bp 0x%x should be 0x%x, jnl 0x%x\n", bp, blhdr->binfo[i].bp, jnl); } - if ((bp->b_flags & (B_LOCKED|B_DELWRI)) != (B_LOCKED|B_DELWRI)) { + if ((buf_flags(bp) & (B_LOCKED|B_DELWRI)) != (B_LOCKED|B_DELWRI)) { if (jnl->flags & JOURNAL_CLOSE_PENDING) { - brelse(bp); + buf_clearflags(bp, B_LOCKED); + buf_brelse(bp); continue; } else { - panic("jnl: end_tr: !!!DANGER!!! bp 0x%x flags (0x%x) not LOCKED & DELWRI\n", bp, bp->b_flags); + panic("jnl: end_tr: !!!DANGER!!! bp 0x%x flags (0x%x) not LOCKED & DELWRI\n", bp, buf_flags(bp)); } } + save_vp = buf_vnode(bp); - if (bp->b_iodone != NULL) { - panic("jnl: bp @ 0x%x (blkno %d, vp 0x%x) has non-null iodone (0x%x) buffflushcb 0x%x\n", - bp, bp->b_blkno, bp->b_vp, bp->b_iodone, buffer_flushed_callback); - } - - save_vp = bp->b_vp; + buf_setfilter(bp, buffer_flushed_callback, tr, &cur_filter, NULL); - bp->b_iodone = buffer_flushed_callback; - bp->b_transaction = tr; - bp->b_flags |= B_CALL; - bp->b_flags &= ~(B_LOCKED); + if (cur_filter) { + panic("jnl: bp @ 0x%x (blkno %qd, vp 0x%x) has non-null iodone (0x%x) buffflushcb 0x%x\n", + bp, buf_blkno(bp), save_vp, cur_filter, buffer_flushed_callback); + } + buf_clearflags(bp, B_LOCKED); // kicking off the write here helps performance - bawrite(bp); - // XXXdbg this is good for testing: bdwrite(bp); - //bdwrite(bp); + buf_bawrite(bp); + // XXXdbg this is good for testing: buf_bdwrite(bp); + //buf_bdwrite(bp); - // this undoes the vget() in journal_modify_block_end() - vrele(save_vp); - + // this undoes the vnode_ref() in journal_modify_block_end() + vnode_rele_ext(save_vp, 0, 1); } else { printf("jnl: end_transaction: could not find block %Ld vp 0x%x!\n", blhdr->binfo[i].bnum, blhdr->binfo[i].bp); if (bp) { - brelse(bp); + buf_clearflags(bp, B_LOCKED); + buf_brelse(bp); } } } @@ -2366,6 +2438,7 @@ end_transaction(transaction *tr, int force_it) bad_journal: jnl->flags |= JOURNAL_INVALID; + jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] &= ~0x8000000000000000LL; abort_transaction(jnl, tr); return -1; } @@ -2373,7 +2446,8 @@ end_transaction(transaction *tr, int force_it) static void abort_transaction(journal *jnl, transaction *tr) { - int i, ret; + int i; + errno_t errno; block_list_header *blhdr, *next; struct buf *bp; struct vnode *save_vp; @@ -2389,33 +2463,35 @@ abort_transaction(journal *jnl, transaction *tr) if (blhdr->binfo[i].bp == NULL) { continue; } - - ret = meta_bread(blhdr->binfo[i].bp->b_vp, - (daddr_t)blhdr->binfo[i].bp->b_lblkno, - blhdr->binfo[i].bp->b_bufsize, + if ( (buf_vnode(blhdr->binfo[i].bp) == NULL) || + !(buf_flags(blhdr->binfo[i].bp) & B_LOCKED) ) { + continue; + } + + errno = buf_meta_bread(buf_vnode(blhdr->binfo[i].bp), + buf_lblkno(blhdr->binfo[i].bp), + buf_size(blhdr->binfo[i].bp), NOCRED, &bp); - if (ret == 0) { + if (errno == 0) { if (bp != blhdr->binfo[i].bp) { panic("jnl: abort_tr: got back a different bp! (bp 0x%x should be 0x%x, jnl 0x%x\n", bp, blhdr->binfo[i].bp, jnl); } - // clear the locked bit and the delayed-write bit. we - // don't want these blocks going to disk. - bp->b_flags &= ~(B_LOCKED|B_DELWRI); - bp->b_flags |= B_INVAL; - save_vp = bp->b_vp; - - brelse(bp); + // releasing a bp marked invalid + // also clears the locked and delayed state + buf_markinvalid(bp); + save_vp = buf_vnode(bp); - vrele(save_vp); + buf_brelse(bp); + vnode_rele_ext(save_vp, 0, 1); } else { printf("jnl: abort_tr: could not find block %Ld vp 0x%x!\n", blhdr->binfo[i].bnum, blhdr->binfo[i].bp); if (bp) { - brelse(bp); + buf_brelse(bp); } } } @@ -2438,7 +2514,7 @@ int journal_end_transaction(journal *jnl) { int ret; - transaction *tr; + transaction *tr; CHECK_JOURNAL(jnl); @@ -2446,9 +2522,9 @@ journal_end_transaction(journal *jnl) return 0; } - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { panic("jnl: end_tr: I'm not the owner! jnl 0x%x, owner 0x%x, curact 0x%x\n", - jnl, jnl->owner, current_act()); + jnl, jnl->owner, current_thread()); } free_old_stuff(jnl); @@ -2462,8 +2538,6 @@ journal_end_transaction(journal *jnl) if (jnl->flags & JOURNAL_INVALID) { if (jnl->active_tr) { - transaction *tr; - if (jnl->cur_tr != NULL) { panic("jnl: journal @ 0x%x has active tr (0x%x) and cur tr (0x%x)\n", jnl, jnl->active_tr, jnl->cur_tr); @@ -2475,7 +2549,7 @@ journal_end_transaction(journal *jnl) } jnl->owner = NULL; - lockmgr(&jnl->jlock, LK_RELEASE, NULL, current_proc()); + unlock_journal(jnl); return EINVAL; } @@ -2492,7 +2566,7 @@ journal_end_transaction(journal *jnl) ret = end_transaction(tr, 0); jnl->owner = NULL; - lockmgr(&jnl->jlock, LK_RELEASE, NULL, current_proc()); + unlock_journal(jnl); return ret; } @@ -2509,14 +2583,10 @@ journal_flush(journal *jnl) return -1; } - if (jnl->owner != current_act()) { + if (jnl->owner != current_thread()) { int ret; - ret = lockmgr(&jnl->jlock, LK_EXCLUSIVE|LK_RETRY, NULL, current_proc()); - if (ret != 0) { - printf("jnl: flush: locking the journal (0x%x) failed %d.\n", jnl, ret); - return -1; - } + lock_journal(jnl); need_signal = 1; } @@ -2531,7 +2601,7 @@ journal_flush(journal *jnl) } if (need_signal) { - lockmgr(&jnl->jlock, LK_RELEASE, NULL, current_proc()); + unlock_journal(jnl); } return 0; @@ -2546,3 +2616,9 @@ journal_active(journal *jnl) return (jnl->active_tr == NULL) ? 0 : 1; } + +void * +journal_owner(journal *jnl) +{ + return jnl->owner; +} diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index 05606b1ba..cf87d421e 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -30,11 +30,12 @@ #define _SYS_VFS_JOURNAL_H_ #include +#include #ifdef __APPLE_API_UNSTABLE #include -#include +#include typedef struct block_info { off_t bnum; // block # on the file system device @@ -94,7 +95,7 @@ typedef struct journal_header { * In memory structure about the journal. */ typedef struct journal { - struct lock__bsd__ jlock; + lck_mtx_t jlock; // protects the struct journal data struct vnode *jdev; // vnode of the device where the journal lives off_t jdev_offset; // byte offset to the start of the journal @@ -118,11 +119,11 @@ typedef struct journal { transaction *tr_freeme; // transaction structs that need to be free'd - volatile off_t active_start; // the active start that we only keep in memory - simple_lock_data_t old_start_lock; // guard access - volatile off_t old_start[16]; // this is how we do lazy start update + volatile off_t active_start; // the active start that we only keep in memory + lck_mtx_t old_start_lock; // protects the old_start + volatile off_t old_start[16]; // this is how we do lazy start update - int last_flush_err; // last error from flushing the cache + int last_flush_err; // last error from flushing the cache } journal; /* internal-only journal flags (top 16 bits) */ @@ -134,10 +135,16 @@ typedef struct journal { /* journal_open/create options are always in the low-16 bits */ #define JOURNAL_OPTION_FLAGS_MASK 0x0000ffff +__BEGIN_DECLS /* * Prototypes. */ +/* + * Call journal_init() to initialize the journaling code (sets up lock attributes) + */ +void journal_init(void); + /* * Call journal_create() to create a new journal. You only * call this once, typically at file system creation time. @@ -200,7 +207,7 @@ journal *journal_open(struct vnode *jvp, * It flushes any outstanding transactions and makes sure the * journal is in a consistent state. */ -void journal_close(journal *journal); +void journal_close(journal *journalp); /* * flags for journal_create/open. only can use @@ -238,6 +245,9 @@ int journal_end_transaction(journal *jnl); int journal_active(journal *jnl); int journal_flush(journal *jnl); +void *journal_owner(journal *jnl); // compare against current_thread() + +__END_DECLS #endif /* __APPLE_API_UNSTABLE */ #endif /* !_SYS_VFS_JOURNAL_H_ */ diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index d65c10b90..d10ba8fd3 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -66,14 +66,16 @@ #include #include #include -#include -#include +#include +#include #include #include #include -#include +#include #include #include /* For _PC_NAME_MAX */ +#include +#include #include @@ -81,7 +83,8 @@ #include #endif -static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); + +static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); /* * Convert a pathname into a pointer to a locked inode. @@ -103,6 +106,7 @@ static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); * if symbolic link, massage name in buffer and continue * } */ + int namei(ndp) register struct nameidata *ndp; @@ -110,16 +114,16 @@ namei(ndp) register struct filedesc *fdp; /* pointer to file descriptor state */ register char *cp; /* pointer into pathname argument */ register struct vnode *dp; /* the directory we are searching */ - struct iovec aiov; /* uio for reading symbolic links */ - struct uio auio; - int error, linklen; + uio_t auio; + int error; struct componentname *cnp = &ndp->ni_cnd; - struct proc *p = cnp->cn_proc; + vfs_context_t ctx = cnp->cn_context; + struct proc *p = vfs_context_proc(ctx); char *tmppn; + char uio_buf[ UIO_SIZEOF(1) ]; - ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #if DIAGNOSTIC - if (!cnp->cn_cred || !cnp->cn_proc) + if (!vfs_context_ucred(ctx) || !p) panic ("namei: bad cred/proc"); if (cnp->cn_nameiop & (~OPMASK)) panic ("namei: nameiop contaminated with flags"); @@ -133,17 +137,34 @@ namei(ndp) * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) { - MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, - MAXPATHLEN, M_NAMEI, M_WAITOK); - cnp->cn_pnlen = MAXPATHLEN; - cnp->cn_flags |= HASBUF; + cnp->cn_pnbuf = &ndp->ni_pathbuf; + cnp->cn_pnlen = PATHBUFLEN; } - if (ndp->ni_segflg == UIO_SYSSPACE) - error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, - MAXPATHLEN, (size_t *)&ndp->ni_pathlen); - else +#if LP64_DEBUG + if (IS_VALID_UIO_SEGFLG(ndp->ni_segflg) == 0) { + panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + +retry_copy: + if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, - MAXPATHLEN, (size_t *)&ndp->ni_pathlen); + cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); + else + error = copystr(CAST_DOWN(void *, ndp->ni_dirp), cnp->cn_pnbuf, + cnp->cn_pnlen, (size_t *)&ndp->ni_pathlen); + + if (error == ENAMETOOLONG && !(cnp->cn_flags & HASBUF)) { + MALLOC_ZONE(cnp->cn_pnbuf, caddr_t, + MAXPATHLEN, M_NAMEI, M_WAITOK); + + cnp->cn_flags |= HASBUF; + cnp->cn_pnlen = MAXPATHLEN; + + goto retry_copy; + } + if (error) + goto error_out; /* If we are auditing the kernel pathname, save the user pathname */ if (cnp->cn_flags & AUDITVNPATH1) @@ -154,19 +175,9 @@ namei(ndp) /* * Do not allow empty pathnames */ - if (!error && *cnp->cn_pnbuf == '\0') + if (*cnp->cn_pnbuf == '\0') { error = ENOENT; - - if (!error && ((dp = fdp->fd_cdir) == NULL)) - error = EPERM; /* 3382843 */ - - if (error) { - tmppn = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI); - ndp->ni_vp = NULL; - return (error); + goto error_out; } ndp->ni_loopcnt = 0; #if KTRACE @@ -175,113 +186,155 @@ namei(ndp) #endif /* - * Get starting point for the translation. + * determine the starting point for the translation. */ - if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL) - ndp->ni_rootdir = rootvnode; - if (ndp->ni_cnd.cn_flags & USEDVP) { - dp = ndp->ni_dvp; - ndp->ni_dvp = NULL; - } else { - dp = fdp->fd_cdir; + if ((ndp->ni_rootdir = fdp->fd_rdir) == NULLVP) { + if ( !(fdp->fd_flags & FD_CHROOT)) + ndp->ni_rootdir = rootvnode; } + cnp->cn_nameptr = cnp->cn_pnbuf; - VREF(dp); - for (;;) { - /* - * Check if root directory should replace current directory. - * Done at start of translation and after symbolic link. - */ - cnp->cn_nameptr = cnp->cn_pnbuf; - if (*(cnp->cn_nameptr) == '/') { - vrele(dp); - while (*(cnp->cn_nameptr) == '/') { - cnp->cn_nameptr++; - ndp->ni_pathlen--; - } - dp = ndp->ni_rootdir; - VREF(dp); + ndp->ni_usedvp = NULLVP; + + if (*(cnp->cn_nameptr) == '/') { + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; } + dp = ndp->ni_rootdir; + } else if (cnp->cn_flags & USEDVP) { + dp = ndp->ni_dvp; + ndp->ni_usedvp = dp; + } else + dp = fdp->fd_cdir; + + if (dp == NULLVP) { + error = ENOENT; + goto error_out; + } + ndp->ni_dvp = NULLVP; + ndp->ni_vp = NULLVP; + + for (;;) { + int need_newpathbuf; + int linklen; + ndp->ni_startdir = dp; - if (error = lookup(ndp)) { - long len = cnp->cn_pnlen; - tmppn = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmppn, len, M_NAMEI); - return (error); + + if ( (error = lookup(ndp)) ) { + goto error_out; } /* * Check for symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { - if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { - tmppn = cnp->cn_pnbuf; - cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI); - } else { - cnp->cn_flags |= HASBUF; - } return (0); } - if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) - VOP_UNLOCK(ndp->ni_dvp, 0, p); + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } - if (ndp->ni_pathlen > 1) { + if (ndp->ni_pathlen > 1 || !(cnp->cn_flags & HASBUF)) + need_newpathbuf = 1; + else + need_newpathbuf = 0; + + if (need_newpathbuf) { MALLOC_ZONE(cp, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); } else { cp = cnp->cn_pnbuf; } - aiov.iov_base = cp; - aiov.iov_len = MAXPATHLEN; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - auio.uio_resid = MAXPATHLEN; - if (error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred)) { - if (ndp->ni_pathlen > 1) + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + + uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); + + error = VNOP_READLINK(ndp->ni_vp, auio, ctx); + if (error) { + if (need_newpathbuf) FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); break; } - linklen = MAXPATHLEN - auio.uio_resid; - if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { - if (ndp->ni_pathlen > 1) + // LP64todo - fix this + linklen = MAXPATHLEN - uio_resid(auio); + if (linklen + ndp->ni_pathlen > MAXPATHLEN) { + if (need_newpathbuf) FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); + error = ENAMETOOLONG; break; } - if (ndp->ni_pathlen > 1) { + if (need_newpathbuf) { long len = cnp->cn_pnlen; + tmppn = cnp->cn_pnbuf; bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); cnp->cn_pnbuf = cp; cnp->cn_pnlen = MAXPATHLEN; - FREE_ZONE(tmppn, len, M_NAMEI); + + if ( (cnp->cn_flags & HASBUF) ) + FREE_ZONE(tmppn, len, M_NAMEI); + else + cnp->cn_flags |= HASBUF; } else cnp->cn_pnbuf[linklen] = '\0'; + ndp->ni_pathlen += linklen; - vput(ndp->ni_vp); + cnp->cn_nameptr = cnp->cn_pnbuf; + + /* + * starting point for 'relative' + * symbolic link path + */ dp = ndp->ni_dvp; - } + /* + * get rid of references returned via 'lookup' + */ + vnode_put(ndp->ni_vp); + vnode_put(ndp->ni_dvp); + + ndp->ni_vp = NULLVP; + ndp->ni_dvp = NULLVP; - tmppn = cnp->cn_pnbuf; + /* + * Check if symbolic link restarts us at the root + */ + if (*(cnp->cn_nameptr) == '/') { + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } + if ((dp = ndp->ni_rootdir) == NULLVP) { + error = ENOENT; + goto error_out; + } + } + } + /* + * only come here if we fail to handle a SYMLINK... + * if either ni_dvp or ni_vp is non-NULL, then + * we need to drop the iocount that was picked + * up in the lookup routine + */ + if (ndp->ni_dvp) + vnode_put(ndp->ni_dvp); + if (ndp->ni_vp) + vnode_put(ndp->ni_vp); + error_out: + if ( (cnp->cn_flags & HASBUF) ) { + cnp->cn_flags &= ~HASBUF; + FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); + } cnp->cn_pnbuf = NULL; - cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmppn, cnp->cn_pnlen, M_NAMEI); + ndp->ni_vp = NULLVP; - vrele(ndp->ni_dvp); - vput(ndp->ni_vp); - ndp->ni_vp = NULL; return (error); } + /* * Search a pathname. * This is a very central and rather complicated routine. @@ -310,7 +363,7 @@ namei(ndp) * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent - * call VOP_LOOKUP routine for next component name + * call VNOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, @@ -324,111 +377,67 @@ int lookup(ndp) register struct nameidata *ndp; { - register char *cp; /* pointer into pathname argument */ - register struct vnode *dp = 0; /* the directory we are searching */ - struct vnode *tdp; /* saved dp */ - struct mount *mp; /* mount table entry */ - int namemax = 0; /* maximun number of bytes for filename returned by pathconf() */ - int docache; /* == 0 do not cache last component */ + register char *cp; /* pointer into pathname argument */ + vnode_t tdp; /* saved dp */ + vnode_t dp; /* the directory we are searching */ + mount_t mp; /* mount table entry */ + int docache = 1; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ - int dp_unlocked = 0; /* 1 => dp already VOP_UNLOCK()-ed */ int rdonly; /* lookup read-only flag bit */ int trailing_slash = 0; + int dp_authorized = 0; int error = 0; struct componentname *cnp = &ndp->ni_cnd; - struct proc *p = cnp->cn_proc; - int i; + vfs_context_t ctx = cnp->cn_context; /* * Setup: break out flag bits into variables. */ + if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { + if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) + docache = 0; + } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); - docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; - if (cnp->cn_nameiop == DELETE || - (wantparent && cnp->cn_nameiop != CREATE && - cnp->cn_nameiop != LOOKUP)) - docache = 0; rdonly = cnp->cn_flags & RDONLY; - ndp->ni_dvp = NULL; cnp->cn_flags &= ~ISSYMLINK; + cnp->cn_consume = 0; + dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; - vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); - cnp->cn_consume = 0; -dirloop: - /* - * Search a new directory. - * - * The cn_hash value is for use by vfs_cache. - * The last component of the filename is left accessible via - * cnp->cn_nameptr for callers that need the name. Callers needing - * the name set the SAVENAME flag. When done, they assume - * responsibility for freeing the pathname buffer. - */ - { - register unsigned int hash; - register unsigned int ch; - register int i; - - hash = 0; - cp = cnp->cn_nameptr; - ch = *cp; - if (ch == '\0') { - cnp->cn_namelen = 0; - goto emptyname; - } + cp = cnp->cn_nameptr; - for (i = 1; (ch != '/') && (ch != '\0'); i++) { - hash += ch * i; - ch = *(++cp); - } - cnp->cn_hash = hash; - } - cnp->cn_namelen = cp - cnp->cn_nameptr; - if (cnp->cn_namelen > NCHNAMLEN) { - if (VOP_PATHCONF(dp, _PC_NAME_MAX, &namemax)) - namemax = NAME_MAX; - if (cnp->cn_namelen > namemax) { - error = ENAMETOOLONG; + if (*cp == '\0') { + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; goto bad; } + goto emptyname; } -#ifdef NAMEI_DIAGNOSTIC - { char c = *cp; - *cp = '\0'; - printf("{%s}: ", cnp->cn_nameptr); - *cp = c; } -#endif - ndp->ni_pathlen -= cnp->cn_namelen; - ndp->ni_next = cp; +dirloop: + ndp->ni_vp = NULLVP; - /* - * Replace multiple slashes by a single slash and trailing slashes - * by a null. This must be done before VOP_LOOKUP() because some - * fs's don't know about trailing slashes. Remember if there were - * trailing slashes to handle symlinks, existing non-directories - * and non-existing files that won't be directories specially later. - */ - trailing_slash = 0; - while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { - cp++; - ndp->ni_pathlen--; - if (*cp == '\0') { - trailing_slash = 1; - *ndp->ni_next = '\0'; - } + if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized)) ) { + dp = NULLVP; + goto bad; + } + if ((cnp->cn_flags & ISLASTCN)) { + if (docache) + cnp->cn_flags |= MAKEENTRY; + } else + cnp->cn_flags |= MAKEENTRY; + + dp = ndp->ni_dvp; + + if (ndp->ni_vp != NULLVP) { + /* + * cache_lookup_path returned a non-NULL ni_vp then, + * we're guaranteed that the dp is a VDIR, it's + * been authorized, and vp is not ".." + */ + goto returned_from_lookup_path; } - ndp->ni_next = cp; - - cnp->cn_flags |= MAKEENTRY; - if (*cp == '\0' && docache == 0) - cnp->cn_flags &= ~MAKEENTRY; - - if (*ndp->ni_next == 0) - cnp->cn_flags |= ISLASTCN; - else - cnp->cn_flags &= ~ISLASTCN; /* * Handle "..": two special cases. @@ -440,62 +449,91 @@ dirloop: * vnode which was mounted on so we take the * .. in the other file system. */ - if (cnp->cn_namelen == 2 && - cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') { - cnp->cn_flags |= ISDOTDOT; - + if ( (cnp->cn_flags & ISDOTDOT) ) { for (;;) { - if (dp == ndp->ni_rootdir || dp == rootvnode) { - ndp->ni_dvp = dp; + if (dp == ndp->ni_rootdir || dp == rootvnode) { + ndp->ni_dvp = dp; ndp->ni_vp = dp; - VREF(dp); + /* + * we're pinned at the root + * we've already got one reference on 'dp' + * courtesy of cache_lookup_path... take + * another one for the ".." + * if we fail to get the new reference, we'll + * drop our original down in 'bad' + */ + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto bad; + } goto nextname; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) - break; + break; if (dp->v_mount == NULL) { /* forced umount */ - error = EBADF; + error = EBADF; goto bad; } - tdp = dp; - dp = dp->v_mount->mnt_vnodecovered; - vput(tdp); - VREF(dp); - vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); + dp = tdp->v_mount->mnt_vnodecovered; + + vnode_put(tdp); + + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; + goto bad; + } + ndp->ni_dvp = dp; + dp_authorized = 0; } - } else { - cnp->cn_flags &= ~ISDOTDOT; } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: - ndp->ni_dvp = dp; - ndp->ni_vp = NULL; - if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) { -#if DIAGNOSTIC - if (ndp->ni_vp != NULL) - panic("leaf should be empty"); -#endif -#ifdef NAMEI_DIAGNOSTIC - printf("not found\n"); -#endif + ndp->ni_vp = NULLVP; + + if (dp->v_type != VDIR) { + error = ENOTDIR; + goto lookup_error; + } + if ( !(dp_authorized || (cnp->cn_flags & DONOTAUTH)) ) { + if ( (error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx)) ) + goto lookup_error; + } + if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) { +lookup_error: if ((error == ENOENT) && (dp->v_flag & VROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(dp, NULL); + } tdp = dp; - dp = dp->v_mount->mnt_vnodecovered; - vput(tdp); - VREF(dp); - vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); + dp = tdp->v_mount->mnt_vnodecovered; + + vnode_put(tdp); + + if ( (vnode_getwithref(dp)) ) { + dp = NULLVP; + error = ENOENT; + goto bad; + } + ndp->ni_dvp = dp; + dp_authorized = 0; goto unionlookup; } if (error != EJUSTRETURN) goto bad; + + if (ndp->ni_vp != NULLVP) + panic("leaf should be empty"); + /* * If creating and at end of pathname, then can consider * allowing file to be created. @@ -504,27 +542,31 @@ unionlookup: error = EROFS; goto bad; } - if (*cp == '\0' && trailing_slash && - !(cnp->cn_flags & WILLBEDIR)) { + if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the - * (possibly locked) directory inode in ndp->ni_dvp. + * referenced directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { + if ( (vnode_get(ndp->ni_dvp)) ) { + error = ENOENT; + goto bad; + } ndp->ni_startdir = ndp->ni_dvp; - VREF(ndp->ni_startdir); } + if (!wantparent) + vnode_put(ndp->ni_dvp); + if (kdebug_enable) kdebug_lookup(ndp->ni_dvp, cnp); return (0); } -#ifdef NAMEI_DIAGNOSTIC - printf("found\n"); -#endif +returned_from_lookup_path: + dp = ndp->ni_vp; /* * Take into account any additional components consumed by @@ -536,48 +578,81 @@ unionlookup: ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } else { - int isdot_or_dotdot; + if (dp->v_name == NULL || dp->v_parent == NULLVP) { + int isdot_or_dotdot; + int update_flags = 0; - isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); + isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); - if (VNAME(ndp->ni_vp) == NULL && isdot_or_dotdot == 0) { - VNAME(ndp->ni_vp) = add_name(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); - } - if (VPARENT(ndp->ni_vp) == NULL && isdot_or_dotdot == 0) { - if (vget(ndp->ni_dvp, 0, p) == 0) { - VPARENT(ndp->ni_vp) = ndp->ni_dvp; + if (isdot_or_dotdot == 0) { + if (dp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); + } + } + + if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) { + /* + * missing from name cache, but should + * be in it... this can happen if volfs + * causes the vnode to be created or the + * name cache entry got recycled but the + * vnode didn't... + * check to make sure that ni_dvp is valid + * cache_lookup_path may return a NULL + */ + if (ndp->ni_dvp != NULL) + cache_enter(ndp->ni_dvp, dp, cnp); } - } } - dp = ndp->ni_vp; /* - * Check to see if the vnode has been mounted on; + * Check to see if the vnode has been mounted on... * if so find the root of the mounted file system. */ - while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && - (cnp->cn_flags & NOCROSSMOUNT) == 0) { - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - error = ENOENT; - goto bad2; - } - VOP_UNLOCK(dp, 0, p); - error = VFS_ROOT(mp, &tdp); - vfs_unbusy(mp, p); - if (error) { - dp_unlocked = 1; /* Signal error path 'dp' has already been unlocked */ - goto bad2; - }; - vrele(dp); - ndp->ni_vp = dp = tdp; +check_mounted_on: + if ((dp->v_type == VDIR) && dp->v_mountedhere && + ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { + + vnode_lock(dp); + + if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { + + mp->mnt_crossref++; + vnode_unlock(dp); + + if (vfs_busy(mp, 0)) { + mount_dropcrossref(mp, dp, 0); + goto check_mounted_on; + } + error = VFS_ROOT(mp, &tdp, ctx); + /* + * mount_dropcrossref does a vnode_put + * on dp if the 3rd arg is non-zero + */ + mount_dropcrossref(mp, dp, 1); + dp = NULL; + vfs_unbusy(mp); + + if (error) { + goto bad2; + } + ndp->ni_vp = dp = tdp; + + goto check_mounted_on; + } + vnode_unlock(dp); } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && - ((cnp->cn_flags & FOLLOW) || trailing_slash || - *ndp->ni_next == '/')) { + ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; return (0); } @@ -591,7 +666,7 @@ unionlookup: goto bad2; } trailing_slash = 0; - } + } nextname: /* @@ -605,7 +680,14 @@ nextname: cnp->cn_nameptr++; ndp->ni_pathlen--; } - vrele(ndp->ni_dvp); + vnode_put(ndp->ni_dvp); + + cp = cnp->cn_nameptr; + + if (*cp == '\0') + goto emptyname; + + vnode_put(dp); goto dirloop; } @@ -618,22 +700,32 @@ nextname: goto bad2; } if (cnp->cn_flags & SAVESTART) { + /* + * note that we already hold a reference + * on both dp and ni_dvp, but for some reason + * can't get another one... in this case we + * need to do vnode_put on dp in 'bad2' + */ + if ( (vnode_get(ndp->ni_dvp)) ) { + error = ENOENT; + goto bad2; + } ndp->ni_startdir = ndp->ni_dvp; - VREF(ndp->ni_startdir); } - if (!wantparent) - vrele(ndp->ni_dvp); + if (!wantparent && ndp->ni_dvp) + vnode_put(ndp->ni_dvp); + if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); - if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp, 0, p); + if (kdebug_enable) kdebug_lookup(dp, cnp); return (0); emptyname: + cnp->cn_namelen = 0; /* * A degenerate name (e.g. / or "") which is a way of * talking about a directory, e.g. like "/." or ".". @@ -647,34 +739,55 @@ emptyname: goto bad; } if (wantparent) { + /* + * note that we already hold a reference + * on dp, but for some reason can't + * get another one... in this case we + * need to do vnode_put on dp in 'bad' + */ + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto bad; + } ndp->ni_dvp = dp; - VREF(dp); } cnp->cn_flags &= ~ISDOTDOT; cnp->cn_flags |= ISLASTCN; ndp->ni_next = cp; ndp->ni_vp = dp; + if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); - if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) - VOP_UNLOCK(dp, 0, p); if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); bad2: - if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0') - VOP_UNLOCK(ndp->ni_dvp, 0, p); - vrele(ndp->ni_dvp); + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } + if (ndp->ni_dvp) + vnode_put(ndp->ni_dvp); + if (dp) + vnode_put(dp); + ndp->ni_vp = NULLVP; + + if (kdebug_enable) + kdebug_lookup(dp, cnp); + return (error); + bad: - if (dp_unlocked) { - vrele(dp); - } else { - vput(dp); - }; - ndp->ni_vp = NULL; + if ((cnp->cn_flags & FSNODELOCKHELD)) { + cnp->cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } + if (dp) + vnode_put(dp); + ndp->ni_vp = NULLVP; + if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); @@ -689,9 +802,7 @@ relookup(dvp, vpp, cnp) struct vnode *dvp, **vpp; struct componentname *cnp; { - struct proc *p = cnp->cn_proc; struct vnode *dp = 0; /* the directory we are searching */ - int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; @@ -699,41 +810,21 @@ relookup(dvp, vpp, cnp) int i, newhash; /* DEBUG: check name hash */ char *cp; /* DEBUG: check name ptr/len */ #endif + vfs_context_t ctx = cnp->cn_context;; /* * Setup: break out flag bits into variables. */ wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); - docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; - if (cnp->cn_nameiop == DELETE || - (wantparent && cnp->cn_nameiop != CREATE)) - docache = 0; rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; - dp = dvp; - vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); -/* dirloop: */ - /* - * Search a new directory. - * - * The cn_hash value is for use by vfs_cache. - * The last component of the filename is left accessible via - * cnp->cn_nameptr for callers that need the name. Callers needing - * the name set the SAVENAME flag. When done, they assume - * responsibility for freeing the pathname buffer. - */ -#ifdef NAMEI_DIAGNOSTIC - for (i=1, newhash = 0, cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) - newhash += (unsigned char)*cp * i; - if (newhash != cnp->cn_hash) - panic("relookup: bad hash"); - if (cnp->cn_namelen != cp - cnp->cn_nameptr) - panic ("relookup: bad len"); - if (*cp != 0) - panic("relookup: not last component"); - printf("{%s}: ", cnp->cn_nameptr); -#endif + if (cnp->cn_flags & NOCACHE) + cnp->cn_flags &= ~MAKEENTRY; + else + cnp->cn_flags |= MAKEENTRY; + + dp = dvp; /* * Check for degenerate name (e.g. / or "") @@ -749,27 +840,26 @@ relookup(dvp, vpp, cnp) error = ENOTDIR; goto bad; } - if (!(cnp->cn_flags & LOCKLEAF)) - VOP_UNLOCK(dp, 0, p); + if ( (vnode_get(dp)) ) { + error = ENOENT; + goto bad; + } *vpp = dp; + if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); } - - if (cnp->cn_flags & ISDOTDOT) - panic ("relookup: lookup on dot-dot"); - /* * We now have a segment name to search for, and a directory to search. */ - if (error = VOP_LOOKUP(dp, vpp, cnp)) { + if ( (error = VNOP_LOOKUP(dp, vpp, cnp, ctx)) ) { + if (error != EJUSTRETURN) + goto bad; #if DIAGNOSTIC if (*vpp != NULL) panic("leaf should be empty"); #endif - if (error != EJUSTRETURN) - goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. @@ -778,9 +868,6 @@ relookup(dvp, vpp, cnp) error = EROFS; goto bad; } - /* ASSERT(dvp == ndp->ni_startdir) */ - if (cnp->cn_flags & SAVESTART) - VREF(dvp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the @@ -807,25 +894,36 @@ relookup(dvp, vpp, cnp) goto bad2; } /* ASSERT(dvp == ndp->ni_startdir) */ - if (cnp->cn_flags & SAVESTART) - VREF(dvp); - if (!wantparent) - vrele(dvp); - if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp, 0, p); return (0); bad2: - if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp, 0, p); - vrele(dvp); -bad: - vput(dp); + vnode_put(dp); +bad: *vpp = NULL; + return (error); } +/* + * Free pathname buffer + */ +void +nameidone(struct nameidata *ndp) +{ + if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) { + ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; + unlock_fsnode(ndp->ni_dvp, NULL); + } + if (ndp->ni_cnd.cn_flags & HASBUF) { + char *tmp = ndp->ni_cnd.cn_pnbuf; + + ndp->ni_cnd.cn_pnbuf = NULL; + ndp->ni_cnd.cn_flags &= ~HASBUF; + FREE_ZONE(tmp, ndp->ni_cnd.cn_pnlen, M_NAMEI); + } +} + #define NUMPARMS 23 @@ -834,7 +932,7 @@ kdebug_lookup(dp, cnp) struct vnode *dp; struct componentname *cnp; { - register int i, n; + register unsigned int i, n; register int dbg_namelen; register int save_dbg_namelen; register char *dbg_nameptr; diff --git a/bsd/vfs/vfs_quota.c b/bsd/vfs/vfs_quota.c index 51bd854f8..118b7492d 100644 --- a/bsd/vfs/vfs_quota.c +++ b/bsd/vfs/vfs_quota.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,13 +62,32 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +/* vars for quota file lock */ +lck_grp_t * qf_lck_grp; +lck_grp_attr_t * qf_lck_grp_attr; +lck_attr_t * qf_lck_attr; + +/* vars for quota list lock */ +lck_grp_t * quota_list_lck_grp; +lck_grp_attr_t * quota_list_lck_grp_attr; +lck_attr_t * quota_list_lck_attr; +lck_mtx_t * quota_list_mtx_lock; + +/* Routines to lock and unlock the quota global data */ +static void dq_list_lock(void); +static void dq_list_unlock(void); + +static void dq_lock_internal(struct dquot *dq); +static void dq_unlock_internal(struct dquot *dq); + static u_int32_t quotamagic[MAXQUOTAS] = INITQMAGICS; @@ -80,20 +99,26 @@ static u_int32_t quotamagic[MAXQUOTAS] = INITQMAGICS; LIST_HEAD(dqhash, dquot) *dqhashtbl; u_long dqhash; +#define DQUOTINC 5 /* minimum free dquots desired */ +long numdquot, desireddquot = DQUOTINC; + /* * Dquot free list. */ -#define DQUOTINC 5 /* minimum free dquots desired */ TAILQ_HEAD(dqfreelist, dquot) dqfreelist; -long numdquot, desireddquot = DQUOTINC; - /* - * Dquot dirty orphans list. + * Dquot dirty orphans list */ TAILQ_HEAD(dqdirtylist, dquot) dqdirtylist; -static int dqlookup(struct quotafile *, u_long, struct dqblk *, u_int32_t *); +static int dqlookup(struct quotafile *, u_long, struct dqblk *, u_int32_t *); +static int dqsync_locked(struct dquot *dq); + +static void qf_lock(struct quotafile *); +static void qf_unlock(struct quotafile *); +static int qf_ref(struct quotafile *); +static void qf_rele(struct quotafile *); /* @@ -106,11 +131,245 @@ dqinit() dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash); TAILQ_INIT(&dqfreelist); TAILQ_INIT(&dqdirtylist); + + /* + * Allocate quota list lock group attribute and group + */ + quota_list_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(quota_list_lck_grp_attr); + quota_list_lck_grp = lck_grp_alloc_init("quota list", quota_list_lck_grp_attr); + + /* + * Allocate qouta list lock attribute + */ + quota_list_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(quota_list_lck_attr); + + /* + * Allocate quota list lock + */ + quota_list_mtx_lock = lck_mtx_alloc_init(quota_list_lck_grp, quota_list_lck_attr); + + + /* + * allocate quota file lock group attribute and group + */ + qf_lck_grp_attr= lck_grp_attr_alloc_init(); + lck_grp_attr_setstat(qf_lck_grp_attr); + qf_lck_grp = lck_grp_alloc_init("quota file", qf_lck_grp_attr); + + /* + * Allocate quota file lock attribute + */ + qf_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(qf_lck_attr); +} + + + +void +dq_list_lock(void) +{ + lck_mtx_lock(quota_list_mtx_lock); +} + +void +dq_list_unlock(void) +{ + lck_mtx_unlock(quota_list_mtx_lock); +} + + +/* + * must be called with the quota_list_lock held + */ +void +dq_lock_internal(struct dquot *dq) +{ + while (dq->dq_lflags & DQ_LLOCK) { + dq->dq_lflags |= DQ_LWANT; + msleep(&dq->dq_lflags, quota_list_mtx_lock, PVFS, "dq_lock_internal", 0); + } + dq->dq_lflags |= DQ_LLOCK; +} + +/* + * must be called with the quota_list_lock held + */ +void +dq_unlock_internal(struct dquot *dq) +{ + int wanted = dq->dq_lflags & DQ_LWANT; + + dq->dq_lflags &= ~(DQ_LLOCK | DQ_LWANT); + + if (wanted) + wakeup(&dq->dq_lflags); +} + +void +dqlock(struct dquot *dq) { + + lck_mtx_lock(quota_list_mtx_lock); + + dq_lock_internal(dq); + + lck_mtx_unlock(quota_list_mtx_lock); +} + +void +dqunlock(struct dquot *dq) { + + lck_mtx_lock(quota_list_mtx_lock); + + dq_unlock_internal(dq); + + lck_mtx_unlock(quota_list_mtx_lock); +} + + + +int +qf_get(struct quotafile *qfp, int type) +{ + int error = 0; + + dq_list_lock(); + + switch (type) { + + case QTF_OPENING: + while ( (qfp->qf_qflags & (QTF_OPENING | QTF_CLOSING)) ) { + if ( (qfp->qf_qflags & QTF_OPENING) ) { + error = EBUSY; + break; + } + if ( (qfp->qf_qflags & QTF_CLOSING) ) { + qfp->qf_qflags |= QTF_WANTED; + msleep(&qfp->qf_qflags, quota_list_mtx_lock, PVFS, "qf_get", 0); + } + } + if (qfp->qf_vp != NULLVP) + error = EBUSY; + if (error == 0) + qfp->qf_qflags |= QTF_OPENING; + break; + + case QTF_CLOSING: + if ( (qfp->qf_qflags & QTF_CLOSING) ) { + error = EBUSY; + break; + } + qfp->qf_qflags |= QTF_CLOSING; + + while ( (qfp->qf_qflags & QTF_OPENING) || qfp->qf_refcnt ) { + qfp->qf_qflags |= QTF_WANTED; + msleep(&qfp->qf_qflags, quota_list_mtx_lock, PVFS, "qf_get", 0); + } + if (qfp->qf_vp == NULLVP) { + qfp->qf_qflags &= ~QTF_CLOSING; + error = EBUSY; + } + break; + } + dq_list_unlock(); + + return (error); +} + +void +qf_put(struct quotafile *qfp, int type) +{ + + dq_list_lock(); + + switch (type) { + + case QTF_OPENING: + case QTF_CLOSING: + qfp->qf_qflags &= ~type; + break; + } + if ( (qfp->qf_qflags & QTF_WANTED) ) { + qfp->qf_qflags &= ~QTF_WANTED; + wakeup(&qfp->qf_qflags); + } + dq_list_unlock(); +} + + +static void +qf_lock(struct quotafile *qfp) +{ + lck_mtx_lock(&qfp->qf_lock); +} + +static void +qf_unlock(struct quotafile *qfp) +{ + lck_mtx_unlock(&qfp->qf_lock); +} + + +/* + * take a reference on the quota file while we're + * in dqget... this will prevent a quota_off from + * occurring while we're potentially playing with + * the quota file... the quota_off will stall until + * all the current references 'die'... once we start + * into quoto_off, all new references will be rejected + * we also don't want any dqgets being processed while + * we're in the middle of the quota_on... once we've + * actually got the quota file open and the associated + * struct quotafile inited, we can let them come through + * + * quota list lock must be held on entry + */ +static int +qf_ref(struct quotafile *qfp) +{ + int error = 0; + + if ( (qfp->qf_qflags & (QTF_OPENING | QTF_CLOSING)) || (qfp->qf_vp == NULLVP) ) + error = EINVAL; + else + qfp->qf_refcnt++; + + return (error); +} + +/* + * drop our reference and wakeup any waiters if + * we were the last one holding a ref + * + * quota list lock must be held on entry + */ +static void +qf_rele(struct quotafile *qfp) +{ + qfp->qf_refcnt--; + + if ( (qfp->qf_qflags & QTF_WANTED) && qfp->qf_refcnt == 0) { + qfp->qf_qflags &= ~QTF_WANTED; + wakeup(&qfp->qf_qflags); + } +} + + +void +dqfileinit(struct quotafile *qfp) +{ + qfp->qf_vp = NULLVP; + qfp->qf_qflags = 0; + + lck_mtx_init(&qfp->qf_lock, qf_lck_grp, qf_lck_attr); } /* * Initialize a quota file + * + * must be called with the quota file lock held */ int dqfileopen(qfp, type) @@ -118,39 +377,38 @@ dqfileopen(qfp, type) int type; { struct dqfilehdr header; - struct vattr vattr; - struct iovec aiov; - struct uio auio; - int error; + struct vfs_context context; + off_t file_size; + uio_t auio; + int error = 0; + char uio_buf[ UIO_SIZEOF(1) ]; + context.vc_proc = current_proc(); + context.vc_ucred = qfp->qf_cred; + /* Obtain the file size */ - error = VOP_GETATTR(qfp->qf_vp, &vattr, qfp->qf_cred, current_proc()); - if (error) - return (error); + if ((error = vnode_size(qfp->qf_vp, &file_size, &context)) != 0) + goto out; /* Read the file header */ - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&header; - aiov.iov_len = sizeof (header); - auio.uio_resid = sizeof (header); - auio.uio_offset = (off_t)(0); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_procp = (struct proc *)0; - error = VOP_READ(qfp->qf_vp, &auio, 0, qfp->qf_cred); + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(&header), sizeof (header)); + error = VNOP_READ(qfp->qf_vp, auio, 0, &context); if (error) - return (error); - else if (auio.uio_resid) - return (EINVAL); - + goto out; + else if (uio_resid(auio)) { + error = EINVAL; + goto out; + } /* Sanity check the quota file header. */ if ((header.dqh_magic != quotamagic[type]) || (header.dqh_version > QF_VERSION) || (!powerof2(header.dqh_maxentries)) || - (header.dqh_maxentries > (vattr.va_size / sizeof(struct dqblk)))) - return (EINVAL); - + (header.dqh_maxentries > (file_size / sizeof(struct dqblk)))) { + error = EINVAL; + goto out; + } /* Set up the time limits for this quota. */ if (header.dqh_btime > 0) qfp->qf_btime = header.dqh_btime; @@ -165,44 +423,33 @@ dqfileopen(qfp, type) qfp->qf_maxentries = header.dqh_maxentries; qfp->qf_entrycnt = header.dqh_entrycnt; qfp->qf_shift = dqhashshift(header.dqh_maxentries); - - return (0); +out: + return (error); } /* * Close down a quota file */ void -dqfileclose(qfp, type) - struct quotafile *qfp; - int type; +dqfileclose(struct quotafile *qfp, __unused int type) { struct dqfilehdr header; - struct iovec aiov; - struct uio auio; - - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&header; - aiov.iov_len = sizeof (header); - auio.uio_resid = sizeof (header); - auio.uio_offset = (off_t)(0); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_READ; - auio.uio_procp = (struct proc *)0; - if (VOP_READ(qfp->qf_vp, &auio, 0, qfp->qf_cred) == 0) { - header.dqh_entrycnt = qfp->qf_entrycnt; + struct vfs_context context; + uio_t auio; + char uio_buf[ UIO_SIZEOF(1) ]; + + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(&header), sizeof (header)); - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&header; - aiov.iov_len = sizeof (header); - auio.uio_resid = sizeof (header); - auio.uio_offset = (off_t)(0); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_procp = (struct proc *)0; - (void) VOP_WRITE(qfp->qf_vp, &auio, 0, qfp->qf_cred); + context.vc_proc = current_proc(); + context.vc_ucred = qfp->qf_cred; + + if (VNOP_READ(qfp->qf_vp, auio, 0, &context) == 0) { + header.dqh_entrycnt = qfp->qf_entrycnt; + uio_reset(auio, 0, UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, CAST_USER_ADDR_T(&header), sizeof (header)); + (void) VNOP_WRITE(qfp->qf_vp, auio, 0, &context); } } @@ -212,92 +459,249 @@ dqfileclose(qfp, type) * reading the information from the file if necessary. */ int -dqget(vp, id, qfp, type, dqp) - struct vnode *vp; +dqget(id, qfp, type, dqp) u_long id; struct quotafile *qfp; register int type; struct dquot **dqp; { - struct proc *p = current_proc(); /* XXX */ struct dquot *dq; + struct dquot *ndq = NULL; + struct dquot *fdq = NULL; struct dqhash *dqh; struct vnode *dqvp; int error = 0; - dqvp = qfp->qf_vp; - if (id == 0 || dqvp == NULLVP || (qfp->qf_qflags & QTF_CLOSING)) { + if ( id == 0 || qfp->qf_vp == NULLVP ) { + *dqp = NODQUOT; + return (EINVAL); + } + dq_list_lock(); + + if ( (qf_ref(qfp)) ) { + dq_list_unlock(); + + *dqp = NODQUOT; + return (EINVAL); + } + if ( (dqvp = qfp->qf_vp) == NULLVP ) { + qf_rele(qfp); + dq_list_unlock(); + *dqp = NODQUOT; return (EINVAL); } + dqh = DQHASH(dqvp, id); + +relookup: /* * Check the cache first. */ - dqh = DQHASH(dqvp, id); for (dq = dqh->lh_first; dq; dq = dq->dq_hash.le_next) { if (dq->dq_id != id || dq->dq_qfile->qf_vp != dqvp) continue; + + dq_lock_internal(dq); + /* + * dq_lock_internal may drop the quota_list_lock to msleep, so + * we need to re-evaluate the identity of this dq + */ + if (dq->dq_id != id || dq->dq_qfile == NULL || + dq->dq_qfile->qf_vp != dqvp) { + dq_unlock_internal(dq); + goto relookup; + } /* * Cache hit with no references. Take * the structure off the free list. */ - if (dq->dq_cnt == 0) { + if (dq->dq_cnt++ == 0) { if (dq->dq_flags & DQ_MOD) TAILQ_REMOVE(&dqdirtylist, dq, dq_freelist); else TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); } - DQREF(dq); + dq_unlock_internal(dq); + + if (fdq != NULL) { + /* + * we grabbed this from the free list in the first pass + * but we found the dq we were looking for in + * the cache the 2nd time through + * so stick it back on the free list and return the cached entry + */ + TAILQ_INSERT_HEAD(&dqfreelist, fdq, dq_freelist); + } + qf_rele(qfp); + dq_list_unlock(); + + if (ndq != NULL) { + /* + * we allocated this in the first pass + * but we found the dq we were looking for in + * the cache the 2nd time through so free it + */ + _FREE(ndq, M_DQUOT); + } *dqp = dq; + return (0); } /* * Not in cache, allocate a new one. */ - if (dqfreelist.tqh_first == NODQUOT && + if (TAILQ_EMPTY(&dqfreelist) && numdquot < MAXQUOTAS * desiredvnodes) desireddquot += DQUOTINC; - if (numdquot < desireddquot) { - dq = (struct dquot *)_MALLOC(sizeof *dq, M_DQUOT, M_WAITOK); - bzero((char *)dq, sizeof *dq); - numdquot++; + + if (fdq != NULL) { + /* + * we captured this from the free list + * in the first pass through, so go + * ahead and use it + */ + dq = fdq; + fdq = NULL; + } else if (numdquot < desireddquot) { + if (ndq == NULL) { + /* + * drop the quota list lock since MALLOC may block + */ + dq_list_unlock(); + + ndq = (struct dquot *)_MALLOC(sizeof *dq, M_DQUOT, M_WAITOK); + bzero((char *)ndq, sizeof *dq); + + dq_list_lock(); + /* + * need to look for the entry again in the cache + * since we dropped the quota list lock and + * someone else may have beaten us to creating it + */ + goto relookup; + } else { + /* + * we allocated this in the first pass through + * and we're still under out target, so go + * ahead and use it + */ + dq = ndq; + ndq = NULL; + numdquot++; + } } else { - if ((dq = dqfreelist.tqh_first) == NULL) { + if (TAILQ_EMPTY(&dqfreelist)) { + qf_rele(qfp); + dq_list_unlock(); + + if (ndq) { + /* + * we allocated this in the first pass through + * but we're now at the limit of our cache size + * so free it + */ + _FREE(ndq, M_DQUOT); + } tablefull("dquot"); *dqp = NODQUOT; return (EUSERS); } - if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) - panic("free dquot isn't"); + dq = TAILQ_FIRST(&dqfreelist); + + dq_lock_internal(dq); + + if (dq->dq_cnt || (dq->dq_flags & DQ_MOD)) { + /* + * we lost the race while we weren't holding + * the quota list lock... dq_lock_internal + * will drop it to msleep... this dq has been + * reclaimed... go find another + */ + dq_unlock_internal(dq); + + /* + * need to look for the entry again in the cache + * since we dropped the quota list lock and + * someone else may have beaten us to creating it + */ + goto relookup; + } TAILQ_REMOVE(&dqfreelist, dq, dq_freelist); - LIST_REMOVE(dq, dq_hash); + + if (dq->dq_qfile != NULL) { + LIST_REMOVE(dq, dq_hash); + dq->dq_qfile = NULL; + dq->dq_id = 0; + } + dq_unlock_internal(dq); + + /* + * because we may have dropped the quota list lock + * in the call to dq_lock_internal, we need to + * relookup in the hash in case someone else + * caused a dq with this identity to be created... + * if we don't find it, we'll use this one + */ + fdq = dq; + goto relookup; } + /* + * we've either freshly allocated a dq + * or we've atomically pulled it out of + * the hash and freelists... no one else + * can have a reference, which means no + * one else can be trying to use this dq + */ + dq_lock_internal(dq); + /* * Initialize the contents of the dquot structure. */ - if (vp != dqvp) - vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); - LIST_INSERT_HEAD(dqh, dq, dq_hash); - DQREF(dq); - dq->dq_flags = DQ_LOCK; + dq->dq_cnt = 1; + dq->dq_flags = 0; dq->dq_id = id; dq->dq_qfile = qfp; dq->dq_type = type; + /* + * once we insert it in the hash and + * drop the quota_list_lock, it can be + * 'found'... however, we're still holding + * the dq_lock which will keep us from doing + * anything with it until we've finished + * initializing it... + */ + LIST_INSERT_HEAD(dqh, dq, dq_hash); + dq_list_unlock(); + + if (ndq) { + /* + * we allocated this in the first pass through + * but we didn't need it, so free it after + * we've droped the quota list lock + */ + _FREE(ndq, M_DQUOT); + } + error = dqlookup(qfp, id, &dq->dq_dqb, &dq->dq_index); - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, p); - if (dq->dq_flags & DQ_WANT) - wakeup((caddr_t)dq); - dq->dq_flags = 0; /* * I/O error in reading quota file, release * quota structure and reflect problem to caller. */ if (error) { + dq_list_lock(); + + dq->dq_id = 0; + dq->dq_qfile = NULL; LIST_REMOVE(dq, dq_hash); - dqrele(vp, dq); + + dq_unlock_internal(dq); + qf_rele(qfp); + dq_list_unlock(); + + dqrele(dq); + *dqp = NODQUOT; return (error); } @@ -309,18 +713,26 @@ dqget(vp, id, qfp, type, dqp) dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; if (dq->dq_id != 0) { + struct timeval tv; + + microtime(&tv); if (dq->dq_btime == 0) - dq->dq_btime = time.tv_sec + qfp->qf_btime; + dq->dq_btime = tv.tv_sec + qfp->qf_btime; if (dq->dq_itime == 0) - dq->dq_itime = time.tv_sec + qfp->qf_itime; + dq->dq_itime = tv.tv_sec + qfp->qf_itime; } + dq_list_lock(); + dq_unlock_internal(dq); + qf_rele(qfp); + dq_list_unlock(); + *dqp = dq; return (0); } /* * Lookup a dqblk structure for the specified identifier and - * quota file. If there is no enetry for this identifier then + * quota file. If there is no entry for this identifier then * one is inserted. The actual hash table index is returned. */ static int @@ -331,22 +743,20 @@ dqlookup(qfp, id, dqb, index) u_int32_t *index; { struct vnode *dqvp; - struct ucred *cred; - struct iovec aiov; - struct uio auio; + struct vfs_context context; + uio_t auio; int i, skip, last; u_long mask; int error = 0; + char uio_buf[ UIO_SIZEOF(1) ]; + + + qf_lock(qfp); - if (id == 0) - return (EINVAL); dqvp = qfp->qf_vp; - cred = qfp->qf_cred; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; + context.vc_proc = current_proc(); + context.vc_ucred = qfp->qf_cred; mask = qfp->qf_maxentries - 1; i = dqhash1(id, qfp->qf_shift, mask); @@ -355,17 +765,14 @@ dqlookup(qfp, id, dqb, index) for (last = (i + (qfp->qf_maxentries-1) * skip) & mask; i != last; i = (i + skip) & mask) { - - aiov.iov_base = (caddr_t)dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)dqoffset(i); - auio.uio_rw = UIO_READ; - error = VOP_READ(dqvp, &auio, 0, cred); + auio = uio_createwithbuffer(1, dqoffset(i), UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(dqb), sizeof (struct dqblk)); + error = VNOP_READ(dqvp, auio, 0, &context); if (error) { printf("dqlookup: error %d looking up id %d at index %d\n", error, id, i); break; - } else if (auio.uio_resid) { + } else if (uio_resid(auio)) { error = EIO; printf("dqlookup: error looking up id %d at index %d\n", id, i); break; @@ -381,13 +788,10 @@ dqlookup(qfp, id, dqb, index) /* * Write back to reserve entry for this id */ - aiov.iov_base = (caddr_t)dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)dqoffset(i); - auio.uio_rw = UIO_WRITE; - error = VOP_WRITE(dqvp, &auio, 0, cred); - if (auio.uio_resid && error == 0) + uio_reset(auio, dqoffset(i), UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, CAST_USER_ADDR_T(dqb), sizeof (struct dqblk)); + error = VNOP_WRITE(dqvp, auio, 0, &context); + if (uio_resid(auio) && error == 0) error = EIO; if (error == 0) ++qfp->qf_entrycnt; @@ -397,62 +801,66 @@ dqlookup(qfp, id, dqb, index) if (dqb->dqb_id == id) break; } - + qf_unlock(qfp); + *index = i; /* remember index so we don't have to recompute it later */ + return (error); } -/* - * Obtain a reference to a dquot. - */ -void -dqref(dq) - struct dquot *dq; -{ - - dq->dq_cnt++; -} /* * Release a reference to a dquot. */ void -dqrele(vp, dq) - struct vnode *vp; - register struct dquot *dq; +dqrele(struct dquot *dq) { if (dq == NODQUOT) return; + dqlock(dq); + if (dq->dq_cnt > 1) { dq->dq_cnt--; + + dqunlock(dq); return; } if (dq->dq_flags & DQ_MOD) - (void) dqsync(vp, dq); - if (--dq->dq_cnt > 0) - return; + (void) dqsync_locked(dq); + dq->dq_cnt--; + + dq_list_lock(); TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); + dq_unlock_internal(dq); + dq_list_unlock(); } /* * Release a reference to a dquot but don't do any I/O. */ void -dqreclaim(vp, dq) - struct vnode *vp; - register struct dquot *dq; +dqreclaim(register struct dquot *dq) { + if (dq == NODQUOT) return; - if (--dq->dq_cnt > 0) - return; + dq_list_lock(); + dq_lock_internal(dq); + if (--dq->dq_cnt > 0) { + dq_unlock_internal(dq); + dq_list_unlock(); + return; + } if (dq->dq_flags & DQ_MOD) TAILQ_INSERT_TAIL(&dqdirtylist, dq, dq_freelist); else TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); + + dq_unlock_internal(dq); + dq_list_unlock(); } /* @@ -463,80 +871,108 @@ dqsync_orphans(qfp) struct quotafile *qfp; { struct dquot *dq; - + + dq_list_lock(); loop: TAILQ_FOREACH(dq, &dqdirtylist, dq_freelist) { - if ((dq->dq_flags & DQ_MOD) == 0) - panic("dqsync_orphans: dirty dquot isn't"); + if (dq->dq_qfile != qfp) + continue; + + dq_lock_internal(dq); + + if (dq->dq_qfile != qfp) { + /* + * the identity of this dq changed while + * the quota_list_lock was dropped + * dq_lock_internal can drop it to msleep + */ + dq_unlock_internal(dq); + goto loop; + } + if ((dq->dq_flags & DQ_MOD) == 0) { + /* + * someone cleaned and removed this from + * the dq from the dirty list while the + * quota_list_lock was dropped + */ + dq_unlock_internal(dq); + goto loop; + } if (dq->dq_cnt != 0) panic("dqsync_orphans: dquot in use"); - if (dq->dq_qfile == qfp) { - TAILQ_REMOVE(&dqdirtylist, dq, dq_freelist); + TAILQ_REMOVE(&dqdirtylist, dq, dq_freelist); - dq->dq_cnt++; - (void) dqsync(NULLVP, dq); - dq->dq_cnt--; + dq_list_unlock(); + /* + * we're still holding the dqlock at this point + * with the reference count == 0 + * we shouldn't be able + * to pick up another one since we hold dqlock + */ + (void) dqsync_locked(dq); + + dq_list_lock(); - if ((dq->dq_cnt == 0) && (dq->dq_flags & DQ_MOD) == 0) - TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); + TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist); - goto loop; - } + dq_unlock_internal(dq); + goto loop; + } + dq_list_unlock(); +} + +int +dqsync(struct dquot *dq) +{ + int error = 0; + + if (dq != NODQUOT) { + dqlock(dq); + + if ( (dq->dq_flags & DQ_MOD) ) + error = dqsync_locked(dq); + + dqunlock(dq); } + return (error); } + /* * Update the disk quota in the quota file. */ int -dqsync(vp, dq) - struct vnode *vp; - struct dquot *dq; +dqsync_locked(struct dquot *dq) { struct proc *p = current_proc(); /* XXX */ + struct vfs_context context; struct vnode *dqvp; - struct iovec aiov; - struct uio auio; + uio_t auio; int error; + char uio_buf[ UIO_SIZEOF(1) ]; - if (dq == NODQUOT) - panic("dqsync: dquot"); - if ((dq->dq_flags & DQ_MOD) == 0) + if (dq->dq_id == 0) { + dq->dq_flags &= ~DQ_MOD; return (0); - if (dq->dq_id == 0) - return(0); - if ((dqvp = dq->dq_qfile->qf_vp) == NULLVP) - panic("dqsync: file"); - if (vp != dqvp) - vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); - while (dq->dq_flags & DQ_LOCK) { - dq->dq_flags |= DQ_WANT; - sleep((caddr_t)dq, PINOD+2); - if ((dq->dq_flags & DQ_MOD) == 0) { - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, p); - return (0); - } } - dq->dq_flags |= DQ_LOCK; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = (caddr_t)&dq->dq_dqb; - aiov.iov_len = sizeof (struct dqblk); - auio.uio_resid = sizeof (struct dqblk); - auio.uio_offset = (off_t)dqoffset(dq->dq_index); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_rw = UIO_WRITE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dqvp, &auio, 0, dq->dq_qfile->qf_cred); - if (auio.uio_resid && error == 0) + if (dq->dq_qfile == NULL) + panic("dqsync: NULL dq_qfile"); + if ((dqvp = dq->dq_qfile->qf_vp) == NULLVP) + panic("dqsync: NULL qf_vp"); + + auio = uio_createwithbuffer(1, dqoffset(dq->dq_index), UIO_SYSSPACE, + UIO_WRITE, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(&dq->dq_dqb), sizeof (struct dqblk)); + + context.vc_proc = p; + context.vc_ucred = dq->dq_qfile->qf_cred; + + error = VNOP_WRITE(dqvp, auio, 0, &context); + if (uio_resid(auio) && error == 0) error = EIO; - if (dq->dq_flags & DQ_WANT) - wakeup((caddr_t)dq); - dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); - if (vp != dqvp) - VOP_UNLOCK(dqvp, 0, p); + dq->dq_flags &= ~DQ_MOD; + return (error); } @@ -555,6 +991,8 @@ dqflush(vp) * file off their hash chains (they will eventually * fall off the head of the free list and be re-used). */ + dq_list_lock(); + for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) { for (dq = dqh->lh_first; dq; dq = nextdq) { nextdq = dq->dq_hash.le_next; @@ -563,7 +1001,32 @@ dqflush(vp) if (dq->dq_cnt) panic("dqflush: stray dquot"); LIST_REMOVE(dq, dq_hash); - dq->dq_qfile = (struct quotafile *)0; + dq->dq_qfile = NULL; } } + dq_list_unlock(); +} + +/* + * LP64 support for munging dqblk structure. + * XXX conversion of user_time_t to time_t loses precision; not an issue for + * XXX us now, since we are only ever setting 32 bits worth of time into it. + */ +__private_extern__ void +munge_dqblk(struct dqblk *dqblkp, struct user_dqblk *user_dqblkp, boolean_t to64) +{ + if (to64) { + /* munge kernel (32 bit) dqblk into user (64 bit) dqblk */ + bcopy((caddr_t)dqblkp, (caddr_t)user_dqblkp, offsetof(struct dqblk, dqb_btime)); + user_dqblkp->dqb_id = dqblkp->dqb_id; + user_dqblkp->dqb_itime = dqblkp->dqb_itime; + user_dqblkp->dqb_btime = dqblkp->dqb_btime; + } + else { + /* munge user (64 bit) dqblk into kernel (32 bit) dqblk */ + bcopy((caddr_t)user_dqblkp, (caddr_t)dqblkp, offsetof(struct dqblk, dqb_btime)); + dqblkp->dqb_id = user_dqblkp->dqb_id; + dqblkp->dqb_itime = user_dqblkp->dqb_itime; /* XXX - lose precision */ + dqblkp->dqb_btime = user_dqblkp->dqb_btime; /* XXX - lose precision */ + } } diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index e91eab165..a01ac6c45 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,24 +69,30 @@ #include #include -#include -#include +#include +#include +#include #include -#include +#include +#include #include #include #include -#include +#include #include #include #include #include #include -#include +#include #include #include #include #include +#include +#include +#include +#include #include #include @@ -99,6 +105,11 @@ #include #include +extern lck_grp_t *vnode_lck_grp; +extern lck_attr_t *vnode_lck_attr; + + +extern lck_mtx_t * mnt_list_mtx_lock; enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -109,15 +120,39 @@ int vttoif_tab[9] = { S_IFSOCK, S_IFIFO, S_IFMT, }; -static void vfree(struct vnode *vp); -static void vinactive(struct vnode *vp); -static int vnreclaim(int count); -extern kern_return_t - adjust_vm_object_cache(vm_size_t oval, vm_size_t nval); +extern int ubc_isinuse_locked(vnode_t, int, int); +extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval); + +static void vnode_list_add(vnode_t); +static void vnode_list_remove(vnode_t); + +static errno_t vnode_drain(vnode_t); +static void vgone(vnode_t); +static void vclean(vnode_t vp, int flag, proc_t p); +static void vnode_reclaim_internal(vnode_t, int, int); + +static void vnode_dropiocount (vnode_t, int); +static errno_t vnode_getiocount(vnode_t vp, int locked, int vid, int vflags); +static int vget_internal(vnode_t, int, int); + +static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); +static int vnode_reload(vnode_t); +static int vnode_isinuse_locked(vnode_t, int, int); + +static void insmntque(vnode_t vp, mount_t mp); +mount_t mount_list_lookupby_fsid(fsid_t *, int, int); +static int mount_getvfscnt(void); +static int mount_fillfsids(fsid_t *, int ); +static void vnode_iterate_setup(mount_t); +static int vnode_umount_preflight(mount_t, vnode_t, int); +static int vnode_iterate_prepare(mount_t); +static int vnode_iterate_reloadq(mount_t); +static void vnode_iterate_clear(mount_t); TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list; /* vnode inactive list */ struct mntlist mountlist; /* mounted filesystem list */ +static int nummounts = 0; #if DIAGNOSTIC #define VLISTCHECK(fun, vp, list) \ @@ -166,31 +201,13 @@ struct mntlist mountlist; /* mounted filesystem list */ inactivevnodes--; \ } while(0) -#define VORECLAIM_ENABLE(vp) \ - do { \ - if (ISSET((vp)->v_flag, VORECLAIM)) \ - panic("vm_object_reclaim already"); \ - SET((vp)->v_flag, VORECLAIM); \ - } while(0) - -#define VORECLAIM_DISABLE(vp) \ - do { \ - CLR((vp)->v_flag, VORECLAIM); \ - if (ISSET((vp)->v_flag, VXWANT)) { \ - CLR((vp)->v_flag, VXWANT); \ - wakeup((caddr_t)(vp)); \ - } \ - } while(0) - /* * Have to declare first two locks as actual data even if !MACH_SLOCKS, since * a pointers to them get passed around. */ -simple_lock_data_t mountlist_slock; -simple_lock_data_t mntvnode_slock; -decl_simple_lock_data(,mntid_slock); -decl_simple_lock_data(,vnode_free_list_slock); -decl_simple_lock_data(,spechash_slock); +void * mntvnode_slock; +void * mntid_slock; +void * spechash_slock; /* * vnodetarget is the amount of vnodes we expect to get back @@ -198,7 +215,7 @@ decl_simple_lock_data(,spechash_slock); * As vnreclaim() is a mainly cpu bound operation for faster * processers this number could be higher. * Having this number too high introduces longer delays in - * the execution of getnewvnode(). + * the execution of new_vnode(). */ unsigned long vnodetarget; /* target for vnreclaim() */ #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */ @@ -229,19 +246,11 @@ unsigned long vnodetarget; /* target for vnreclaim() */ * Initialize the vnode management data structures. */ __private_extern__ void -vntblinit() +vntblinit(void) { - extern struct lock__bsd__ exchangelock; - - simple_lock_init(&mountlist_slock); - simple_lock_init(&mntvnode_slock); - simple_lock_init(&mntid_slock); - simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); - simple_lock_init(&vnode_free_list_slock); TAILQ_INIT(&vnode_inactive_list); - CIRCLEQ_INIT(&mountlist); - lockinit(&exchangelock, PVFS, "exchange", 0, 0); + TAILQ_INIT(&mountlist); if (!vnodetarget) vnodetarget = VNODE_FREE_TARGET; @@ -268,531 +277,858 @@ reset_vmobjectcache(unsigned int val1, unsigned int val2) return(adjust_vm_object_cache(oval, nval)); } -/* - * Mark a mount point as busy. Used to synchronize access and to delay - * unmounting. Interlock is not released on failure. - */ + +/* the timeout is in 10 msecs */ int -vfs_busy(mp, flags, interlkp, p) - struct mount *mp; - int flags; - struct slock *interlkp; - struct proc *p; -{ - int lkflags; +vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, char *msg) { + int error = 0; + struct timespec ts; - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - if (flags & LK_NOWAIT) - return (ENOENT); - mp->mnt_kern_flag |= MNTK_MWAIT; - if (interlkp) - simple_unlock(interlkp); - /* - * Since all busy locks are shared except the exclusive - * lock granted when unmounting, the only place that a - * wakeup needs to be done is at the release of the - * exclusive lock at the end of dounmount. - */ - sleep((caddr_t)mp, PVFS); - if (interlkp) - simple_lock(interlkp); - return (ENOENT); + KERNEL_DEBUG(0x3010280 | DBG_FUNC_START, (int)vp, output_target, vp->v_numoutput, 0, 0); + + if (vp->v_numoutput > output_target) { + + slpflag &= ~PDROP; + + vnode_lock(vp); + + while ((vp->v_numoutput > output_target) && error == 0) { + if (output_target) + vp->v_flag |= VTHROTTLED; + else + vp->v_flag |= VBWAIT; + ts.tv_sec = (slptimeout/100); + ts.tv_nsec = (slptimeout % 1000) * 10 * NSEC_PER_USEC * 1000 ; + error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts); + } + vnode_unlock(vp); } - lkflags = LK_SHARED; - if (interlkp) - lkflags |= LK_INTERLOCK; - if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) - panic("vfs_busy: unexpected lock failure"); - return (0); + KERNEL_DEBUG(0x3010280 | DBG_FUNC_END, (int)vp, output_target, vp->v_numoutput, error, 0); + + return error; } -/* - * Free a busy filesystem. - */ + void -vfs_unbusy(mp, p) - struct mount *mp; - struct proc *p; +vnode_startwrite(vnode_t vp) { + + OSAddAtomic(1, &vp->v_numoutput); +} + + +void +vnode_writedone(vnode_t vp) { + if (vp) { + int need_wakeup = 0; + + OSAddAtomic(-1, &vp->v_numoutput); + + vnode_lock(vp); - lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); + if (vp->v_numoutput < 0) + panic("vnode_writedone: numoutput < 0"); + + if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput < (VNODE_ASYNC_THROTTLE / 3))) { + vp->v_flag &= ~VTHROTTLED; + need_wakeup = 1; + } + if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) { + vp->v_flag &= ~VBWAIT; + need_wakeup = 1; + } + vnode_unlock(vp); + + if (need_wakeup) + wakeup((caddr_t)&vp->v_numoutput); + } } -/* - * Lookup a filesystem type, and if found allocate and initialize - * a mount structure for it. - * - * Devname is usually updated by mount(8) after booting. - */ + + int -vfs_rootmountalloc(fstypename, devname, mpp) - char *fstypename; - char *devname; - struct mount **mpp; +vnode_hasdirtyblks(vnode_t vp) { - struct proc *p = current_proc(); /* XXX */ - struct vfsconf *vfsp; - struct mount *mp; + struct cl_writebehind *wbp; - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (!strcmp(vfsp->vfc_name, fstypename)) - break; - if (vfsp == NULL) - return (ENODEV); - mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); + /* + * Not taking the buf_mtxp as there is little + * point doing it. Even if the lock is taken the + * state can change right after that. If their + * needs to be a synchronization, it must be driven + * by the caller + */ + if (vp->v_dirtyblkhd.lh_first) + return (1); + + if (!UBCINFOEXISTS(vp)) + return (0); - /* Initialize the default IO constraints */ - mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; - mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + wbp = vp->v_ubcinfo->cl_wbehind; + + if (wbp && (wbp->cl_number || wbp->cl_scmap)) + return (1); - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - (void)vfs_busy(mp, LK_NOWAIT, 0, p); - LIST_INIT(&mp->mnt_vnodelist); - mp->mnt_vfc = vfsp; - mp->mnt_op = vfsp->vfc_vfsops; - mp->mnt_flag = MNT_RDONLY; - mp->mnt_vnodecovered = NULLVP; - vfsp->vfc_refcount++; - mp->mnt_stat.f_type = vfsp->vfc_typenum; - mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); - mp->mnt_stat.f_mntonname[0] = '/'; - (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); - *mpp = mp; return (0); } -/* - * Find an appropriate filesystem to use for the root. If a filesystem - * has not been preselected, walk through the list of known filesystems - * trying those that have mountroot routines, and try them until one - * works or we have tried them all. - */ int -vfs_mountroot() +vnode_hascleanblks(vnode_t vp) { - struct vfsconf *vfsp; - extern int (*mountroot)(void); - int error; + /* + * Not taking the buf_mtxp as there is little + * point doing it. Even if the lock is taken the + * state can change right after that. If their + * needs to be a synchronization, it must be driven + * by the caller + */ + if (vp->v_cleanblkhd.lh_first) + return (1); + return (0); +} - if (mountroot != NULL) { - error = (*mountroot)(); - return (error); - } - - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { - if (vfsp->vfc_mountroot == NULL) - continue; - if ((error = (*vfsp->vfc_mountroot)()) == 0) - return (0); - if (error != EINVAL) - printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); +void +vnode_iterate_setup(mount_t mp) +{ + while (mp->mnt_lflag & MNT_LITER) { + mp->mnt_lflag |= MNT_LITERWAIT; + msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", 0); } - return (ENODEV); + + mp->mnt_lflag |= MNT_LITER; + } -/* - * Lookup a mount point by filesystem identifier. - */ -struct mount * -vfs_getvfs(fsid) - fsid_t *fsid; +static int +vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags) { - register struct mount *mp; + vnode_t vp; - simple_lock(&mountlist_slock); - CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { - if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { - simple_unlock(&mountlist_slock); - return (mp); + TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { + if (vp->v_type == VDIR) + continue; + if (vp == skipvp) + continue; + if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || + (vp->v_flag & VNOFLUSH))) + continue; + if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) + continue; + if ((flags & WRITECLOSE) && + (vp->v_writecount == 0 || vp->v_type != VREG)) + continue; + /* Look for busy vnode */ + if (((vp->v_usecount != 0) && + ((vp->v_usecount - vp->v_kusecount) != 0))) + return(1); } - } - simple_unlock(&mountlist_slock); - return ((struct mount *)0); + + return(0); } -/* - * Get a new unique fsid +/* + * This routine prepares iteration by moving all the vnodes to worker queue + * called with mount lock held */ -void -vfs_getnewfsid(mp) - struct mount *mp; +int +vnode_iterate_prepare(mount_t mp) { -static u_short xxxfs_mntid; + vnode_t vp; - fsid_t tfsid; - int mtype; + if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { + /* nothing to do */ + return (0); + } - simple_lock(&mntid_slock); - mtype = mp->mnt_vfc->vfc_typenum; - mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); - mp->mnt_stat.f_fsid.val[1] = mtype; - if (xxxfs_mntid == 0) - ++xxxfs_mntid; - tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); - tfsid.val[1] = mtype; - if (!CIRCLEQ_EMPTY(&mountlist)) { - while (vfs_getvfs(&tfsid)) { - tfsid.val[0]++; - xxxfs_mntid++; - } - } - mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; - simple_unlock(&mntid_slock); + vp = TAILQ_FIRST(&mp->mnt_vnodelist); + vp->v_mntvnodes.tqe_prev = &(mp->mnt_workerqueue.tqh_first); + mp->mnt_workerqueue.tqh_first = mp->mnt_vnodelist.tqh_first; + mp->mnt_workerqueue.tqh_last = mp->mnt_vnodelist.tqh_last; + + TAILQ_INIT(&mp->mnt_vnodelist); + if (mp->mnt_newvnodes.tqh_first != NULL) + panic("vnode_iterate_prepare: newvnode when entering vnode"); + TAILQ_INIT(&mp->mnt_newvnodes); + + return (1); } -/* - * Set vnode attributes to VNOVAL - */ -void -vattr_null(vap) - register struct vattr *vap; + +/* called with mount lock held */ +int +vnode_iterate_reloadq(mount_t mp) { + int moved = 0; + + /* add the remaining entries in workerq to the end of mount vnode list */ + if (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { + struct vnode * mvp; + mvp = TAILQ_LAST(&mp->mnt_vnodelist, vnodelst); + + /* Joining the workerque entities to mount vnode list */ + if (mvp) + mvp->v_mntvnodes.tqe_next = mp->mnt_workerqueue.tqh_first; + else + mp->mnt_vnodelist.tqh_first = mp->mnt_workerqueue.tqh_first; + mp->mnt_workerqueue.tqh_first->v_mntvnodes.tqe_prev = mp->mnt_vnodelist.tqh_last; + mp->mnt_vnodelist.tqh_last = mp->mnt_workerqueue.tqh_last; + TAILQ_INIT(&mp->mnt_workerqueue); + } + + /* add the newvnodes to the head of mount vnode list */ + if (!TAILQ_EMPTY(&mp->mnt_newvnodes)) { + struct vnode * nlvp; + nlvp = TAILQ_LAST(&mp->mnt_newvnodes, vnodelst); + + mp->mnt_newvnodes.tqh_first->v_mntvnodes.tqe_prev = &mp->mnt_vnodelist.tqh_first; + nlvp->v_mntvnodes.tqe_next = mp->mnt_vnodelist.tqh_first; + if(mp->mnt_vnodelist.tqh_first) + mp->mnt_vnodelist.tqh_first->v_mntvnodes.tqe_prev = &nlvp->v_mntvnodes.tqe_next; + else + mp->mnt_vnodelist.tqh_last = mp->mnt_newvnodes.tqh_last; + mp->mnt_vnodelist.tqh_first = mp->mnt_newvnodes.tqh_first; + TAILQ_INIT(&mp->mnt_newvnodes); + moved = 1; + } - vap->va_type = VNON; - vap->va_size = vap->va_bytes = VNOVAL; - vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = - vap->va_fsid = vap->va_fileid = - vap->va_blocksize = vap->va_rdev = - vap->va_atime.tv_sec = vap->va_atime.tv_nsec = - vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = - vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = - vap->va_flags = vap->va_gen = VNOVAL; - vap->va_vaflags = 0; + return(moved); } -/* - * Routines having to do with the management of the vnode table. - */ -extern int (**dead_vnodeop_p)(void *); -static void vclean __P((struct vnode *vp, int flag, struct proc *p)); -extern void vgonel __P((struct vnode *vp, struct proc *p)); -long numvnodes, freevnodes; -long inactivevnodes; -long vnode_reclaim_tried; -long vnode_objects_reclaimed; +void +vnode_iterate_clear(mount_t mp) +{ + mp->mnt_lflag &= ~MNT_LITER; + if (mp->mnt_lflag & MNT_LITERWAIT) { + mp->mnt_lflag &= ~MNT_LITERWAIT; + wakeup(mp); + } +} -extern struct vattr va_null; -/* - * Return the next vnode from the free list. - */ int -getnewvnode(tag, mp, vops, vpp) - enum vtagtype tag; - struct mount *mp; - int (**vops)(void *); - struct vnode **vpp; +vnode_iterate(mp, flags, callout, arg) + mount_t mp; + int flags; + int (*callout)(struct vnode *, void *); + void * arg; { - struct proc *p = current_proc(); /* XXX */ struct vnode *vp; - int cnt, didretry = 0; - static int reused = 0; /* track the reuse rate */ - int reclaimhits = 0; - -retry: - simple_lock(&vnode_free_list_slock); - /* - * MALLOC a vnode if the number of vnodes has not reached the desired - * value and the number on the free list is still reasonable... - * reuse from the freelist even though we may evict a name cache entry - * to reduce the number of vnodes that accumulate.... vnodes tie up - * wired memory and are never garbage collected - */ - if (numvnodes < desiredvnodes && (freevnodes < (2 * VNODE_FREE_MIN))) { - numvnodes++; - simple_unlock(&vnode_free_list_slock); - MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK); - bzero((char *)vp, sizeof *vp); - VLISTNONE(vp); /* avoid double queue removal */ - simple_lock_init(&vp->v_interlock); - goto done; - } + int vid, retval; + int ret = 0; - /* - * Once the desired number of vnodes are allocated, - * we start reusing the vnodes. - */ - if (freevnodes < VNODE_FREE_MIN) { - /* - * if we are low on vnodes on the freelist attempt to get - * some back from the inactive list and VM object cache - */ - simple_unlock(&vnode_free_list_slock); - (void)vnreclaim(vnodetarget); - simple_lock(&vnode_free_list_slock); - } - if (numvnodes >= desiredvnodes && reused > VNODE_TOOMANY_REUSED) { - reused = 0; - if (freevnodes < VNODE_FREE_ENOUGH) { - simple_unlock(&vnode_free_list_slock); - (void)vnreclaim(vnodetarget); - simple_lock(&vnode_free_list_slock); - } - } - - for (cnt = 0, vp = vnode_free_list.tqh_first; - vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) { - if (simple_lock_try(&vp->v_interlock)) { - /* got the interlock */ - if (ISSET(vp->v_flag, VORECLAIM)) { - /* skip over the vnodes that are being reclaimed */ - simple_unlock(&vp->v_interlock); - reclaimhits++; - } else - break; - } - } + mount_lock(mp); - /* - * Unless this is a bad time of the month, at most - * the first NCPUS items on the free list are - * locked, so this is close enough to being empty. - */ - if (vp == NULLVP) { - simple_unlock(&vnode_free_list_slock); - if (!(didretry++) && (vnreclaim(vnodetarget) > 0)) - goto retry; - tablefull("vnode"); - log(LOG_EMERG, "%d vnodes locked, %d desired, %d numvnodes, " - "%d free, %d inactive, %d being reclaimed\n", - cnt, desiredvnodes, numvnodes, freevnodes, inactivevnodes, - reclaimhits); - *vpp = 0; - return (ENFILE); - } + vnode_iterate_setup(mp); - if (vp->v_usecount) - panic("free vnode isn't: v_type = %d, v_usecount = %d?", - vp->v_type, vp->v_usecount); + /* it is returns 0 then there is nothing to do */ + retval = vnode_iterate_prepare(mp); - VREMFREE("getnewvnode", vp); - reused++; - simple_unlock(&vnode_free_list_slock); - vp->v_lease = NULL; - cache_purge(vp); - if (vp->v_type != VBAD) - vgonel(vp, p); /* clean and reclaim the vnode */ - else - simple_unlock(&vp->v_interlock); -#if DIAGNOSTIC - if (vp->v_data) - panic("cleaned vnode isn't"); - { - int s = splbio(); - if (vp->v_numoutput) - panic("Clean vnode has pending I/O's"); - splx(s); + if (retval == 0) { + vnode_iterate_clear(mp); + mount_unlock(mp); + return(ret); } -#endif - if (UBCINFOEXISTS(vp)) - panic("getnewvnode: ubcinfo not cleaned"); - else - vp->v_ubcinfo = UBC_INFO_NULL; + + /* iterate over all the vnodes */ + while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { + vp = TAILQ_FIRST(&mp->mnt_workerqueue); + TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); + TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); + vid = vp->v_id; + if ((vp->v_data == NULL) || (vp->v_type == VNON) || (vp->v_mount != mp)) { + continue; + } + mount_unlock(mp); - if (vp->v_flag & VHASDIRTY) - cluster_release(vp); + if ( vget_internal(vp, vid, (flags | VNODE_NODEAD| VNODE_WITHID | VNODE_NOSUSPEND))) { + mount_lock(mp); + continue; + } + if (flags & VNODE_RELOAD) { + /* + * we're reloading the filesystem + * cast out any inactive vnodes... + */ + if (vnode_reload(vp)) { + /* vnode will be recycled on the refcount drop */ + vnode_put(vp); + mount_lock(mp); + continue; + } + } - // make sure all these fields are cleared out as the - // name/parent stuff uses them and assumes they're - // cleared to null/0. - if (vp->v_scmap != NULL) { - panic("getnewvnode: vp @ 0x%x has non-null scmap.\n", vp); + retval = callout(vp, arg); + + switch (retval) { + case VNODE_RETURNED: + case VNODE_RETURNED_DONE: + vnode_put(vp); + if (retval == VNODE_RETURNED_DONE) { + mount_lock(mp); + ret = 0; + goto out; + } + break; + + case VNODE_CLAIMED_DONE: + mount_lock(mp); + ret = 0; + goto out; + case VNODE_CLAIMED: + default: + break; + } + mount_lock(mp); } - vp->v_un.vu_name = NULL; - vp->v_scdirty = 0; - vp->v_un1.v_cl.v_pad = 0; - - - vp->v_lastr = -1; - vp->v_ralen = 0; - vp->v_maxra = 0; - vp->v_ciosiz = 0; - vp->v_clen = 0; - vp->v_socket = 0; - /* we may have blocked, re-evaluate state */ - simple_lock(&vnode_free_list_slock); - if (VONLIST(vp)) { - if (vp->v_usecount == 0) - VREMFREE("getnewvnode", vp); - else if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("getnewvnode", vp); - } - simple_unlock(&vnode_free_list_slock); +out: + (void)vnode_iterate_reloadq(mp); + vnode_iterate_clear(mp); + mount_unlock(mp); + return (ret); +} -done: - vp->v_flag = VSTANDARD; - vp->v_type = VNON; - vp->v_tag = tag; - vp->v_op = vops; - insmntque(vp, mp); - *vpp = vp; - vp->v_usecount = 1; - vp->v_data = 0; - return (0); +void +mount_lock_renames(mount_t mp) +{ + lck_mtx_lock(&mp->mnt_renamelock); } -/* - * Move a vnode from one mount queue to another. - */ void -insmntque(vp, mp) - struct vnode *vp; - struct mount *mp; +mount_unlock_renames(mount_t mp) { + lck_mtx_unlock(&mp->mnt_renamelock); +} - simple_lock(&mntvnode_slock); - /* - * Delete from old mount point vnode list, if on one. - */ - if (vp->v_mount != NULL) - LIST_REMOVE(vp, v_mntvnodes); - /* - * Insert into list of vnodes for the new mount point, if available. - */ - if ((vp->v_mount = mp) != NULL) - LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); - simple_unlock(&mntvnode_slock); +void +mount_lock(mount_t mp) +{ + lck_mtx_lock(&mp->mnt_mlock); } -__inline void -vpwakeup(struct vnode *vp) +void +mount_unlock(mount_t mp) { - if (vp) { - if (--vp->v_numoutput < 0) - panic("vpwakeup: neg numoutput"); - if ((vp->v_flag & VBWAIT || vp->v_flag & VTHROTTLED) - && vp->v_numoutput <= 0) { - vp->v_flag &= ~(VBWAIT|VTHROTTLED); - wakeup((caddr_t)&vp->v_numoutput); - } - } + lck_mtx_unlock(&mp->mnt_mlock); } -/* - * Update outstanding I/O count and do wakeup if requested. - */ + void -vwakeup(bp) - register struct buf *bp; +mount_ref(mount_t mp, int locked) { - CLR(bp->b_flags, B_WRITEINPROG); - vpwakeup(bp->b_vp); + if ( !locked) + mount_lock(mp); + + mp->mnt_count++; + + if ( !locked) + mount_unlock(mp); } -/* - * Flush out and invalidate all buffers associated with a vnode. - * Called with the underlying object locked. - */ + +void +mount_drop(mount_t mp, int locked) +{ + if ( !locked) + mount_lock(mp); + + mp->mnt_count--; + + if (mp->mnt_count == 0 && (mp->mnt_lflag & MNT_LDRAIN)) + wakeup(&mp->mnt_lflag); + + if ( !locked) + mount_unlock(mp); +} + + int -vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) - register struct vnode *vp; - int flags; - struct ucred *cred; - struct proc *p; - int slpflag, slptimeo; +mount_iterref(mount_t mp, int locked) { - register struct buf *bp; - struct buf *nbp, *blist; - int s, error = 0; + int retval = 0; - if (flags & V_SAVE) { - if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) { - return (error); - } - if (vp->v_dirtyblkhd.lh_first) - panic("vinvalbuf: dirty bufs (vp 0x%x, bp 0x%x)", vp, vp->v_dirtyblkhd.lh_first); + if (!locked) + mount_list_lock(); + if (mp->mnt_iterref < 0) { + retval = 1; + } else { + mp->mnt_iterref++; } + if (!locked) + mount_list_unlock(); + return(retval); +} - for (;;) { - if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) - while (blist && blist->b_lblkno < 0) - blist = blist->b_vnbufs.le_next; - if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && - (flags & V_SAVEMETA)) - while (blist && blist->b_lblkno < 0) - blist = blist->b_vnbufs.le_next; - if (!blist) - break; +int +mount_isdrained(mount_t mp, int locked) +{ + int retval; - for (bp = blist; bp; bp = nbp) { - nbp = bp->b_vnbufs.le_next; - if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) - continue; - s = splbio(); - if (ISSET(bp->b_flags, B_BUSY)) { - SET(bp->b_flags, B_WANTED); - error = tsleep((caddr_t)bp, - slpflag | (PRIBIO + 1), "vinvalbuf", - slptimeo); - splx(s); - if (error) { - return (error); - } - break; - } - bremfree(bp); - SET(bp->b_flags, B_BUSY); - splx(s); + if (!locked) + mount_list_lock(); + if (mp->mnt_iterref < 0) + retval = 1; + else + retval = 0; + if (!locked) + mount_list_unlock(); + return(retval); +} + +void +mount_iterdrop(mount_t mp) +{ + mount_list_lock(); + mp->mnt_iterref--; + wakeup(&mp->mnt_iterref); + mount_list_unlock(); +} + +void +mount_iterdrain(mount_t mp) +{ + mount_list_lock(); + while (mp->mnt_iterref) + msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", 0 ); + /* mount iterations drained */ + mp->mnt_iterref = -1; + mount_list_unlock(); +} +void +mount_iterreset(mount_t mp) +{ + mount_list_lock(); + if (mp->mnt_iterref == -1) + mp->mnt_iterref = 0; + mount_list_unlock(); +} + +/* always called with mount lock held */ +int +mount_refdrain(mount_t mp) +{ + if (mp->mnt_lflag & MNT_LDRAIN) + panic("already in drain"); + mp->mnt_lflag |= MNT_LDRAIN; + + while (mp->mnt_count) + msleep((caddr_t)&mp->mnt_lflag, &mp->mnt_mlock, PVFS, "mount_drain", 0 ); + + if (mp->mnt_vnodelist.tqh_first != NULL) + panic("mount_refdrain: dangling vnode"); + + mp->mnt_lflag &= ~MNT_LDRAIN; + + return(0); +} + + +/* + * Mark a mount point as busy. Used to synchronize access and to delay + * unmounting. + */ +int +vfs_busy(mount_t mp, int flags) +{ + +restart: + if (mp->mnt_lflag & MNT_LDEAD) + return(ENOENT); + + if (mp->mnt_lflag & MNT_LUNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); + + mount_lock(mp); + + if (mp->mnt_lflag & MNT_LDEAD) { + mount_unlock(mp); + return(ENOENT); + } + if (mp->mnt_lflag & MNT_LUNMOUNT) { + mp->mnt_lflag |= MNT_LWAIT; /* - * XXX Since there are no node locks for NFS, I believe - * there is a slight chance that a delayed write will - * occur while sleeping just above, so check for it. + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. */ - if (ISSET(bp->b_flags, B_DELWRI) && (flags & V_SAVE)) { - (void) VOP_BWRITE(bp); - break; - } - - if (bp->b_flags & B_LOCKED) { - panic("vinvalbuf: bp @ 0x%x is locked!", bp); - break; - } else { - SET(bp->b_flags, B_INVAL); - } - brelse(bp); + msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", 0 ); + return (ENOENT); } + mount_unlock(mp); + } + + lck_rw_lock_shared(&mp->mnt_rwlock); + + /* + * until we are granted the rwlock, it's possible for the mount point to + * change state, so reevaluate before granting the vfs_busy + */ + if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { + lck_rw_done(&mp->mnt_rwlock); + goto restart; } - if (!(flags & V_SAVEMETA) && - (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) - panic("vinvalbuf: flush failed"); return (0); } +/* + * Free a busy filesystem. + */ + +void +vfs_unbusy(mount_t mp) +{ + lck_rw_done(&mp->mnt_rwlock); +} + + + +static void +vfs_rootmountfailed(mount_t mp) { + + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); + + vfs_unbusy(mp); + + mount_lock_destroy(mp); + + FREE_ZONE(mp, sizeof(struct mount), M_MOUNT); +} + +/* + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. + */ +static mount_t +vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) +{ + mount_t mp; + + mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + + /* Initialize the default IO constraints */ + mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; + mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; + mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; + mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; + mp->mnt_devblocksize = DEV_BSIZE; + + mount_lock_init(mp); + (void)vfs_busy(mp, LK_NOWAIT); + + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); + + mp->mnt_vtable = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY | MNT_ROOTFS; + mp->mnt_vnodecovered = NULLVP; + //mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + + mount_list_lock(); + vfsp->vfc_refcount++; + mount_list_unlock(); + + strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); + mp->mnt_vfsstat.f_mntonname[0] = '/'; + (void) copystr((char *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); + + return (mp); +} + +errno_t +vfs_rootmountalloc(const char *fstypename, const char *devname, mount_t *mpp) +{ + struct vfstable *vfsp; + + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + + *mpp = vfs_rootmountalloc_internal(vfsp, devname); + + if (*mpp) + return (0); + + return (ENOMEM); +} + + +/* + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. + */ +extern int (*mountroot)(void); + +int +vfs_mountroot() +{ + struct vfstable *vfsp; + struct vfs_context context; + int error; + mount_t mp; + + if (mountroot != NULL) { + /* + * used for netboot which follows a different set of rules + */ + error = (*mountroot)(); + return (error); + } + if ((error = bdevvp(rootdev, &rootvp))) { + printf("vfs_mountroot: can't setup bdevvp\n"); + return (error); + } + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) + continue; + + mp = vfs_rootmountalloc_internal(vfsp, "root_device"); + mp->mnt_devvp = rootvp; + + if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, &context)) == 0) { + mp->mnt_devvp->v_specflags |= SI_MOUNTEDON; + + vfs_unbusy(mp); + + mount_list_add(mp); + + /* + * cache the IO attributes for the underlying physical media... + * an error return indicates the underlying driver doesn't + * support all the queries necessary... however, reasonable + * defaults will have been set, so no reason to bail or care + */ + vfs_init_io_attributes(rootvp, mp); + /* + * get rid of iocount reference returned + * by bdevvp... it will have also taken + * a usecount reference which we want to keep + */ + vnode_put(rootvp); + + return (0); + } + vfs_rootmountfailed(mp); + + if (error != EINVAL) + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); + } + return (ENODEV); +} + +/* + * Lookup a mount point by filesystem identifier. + */ +extern mount_t vfs_getvfs_locked(fsid_t *); + +struct mount * +vfs_getvfs(fsid) + fsid_t *fsid; +{ + return (mount_list_lookupby_fsid(fsid, 0, 0)); +} + +struct mount * +vfs_getvfs_locked(fsid) + fsid_t *fsid; +{ + return(mount_list_lookupby_fsid(fsid, 1, 0)); +} + +struct mount * +vfs_getvfs_by_mntonname(u_char *path) +{ + mount_t retmp = (mount_t)0; + mount_t mp; + + mount_list_lock(); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (!strcmp(mp->mnt_vfsstat.f_mntonname, path)) { + retmp = mp; + goto out; + } + } +out: + mount_list_unlock(); + return (retmp); +} + +/* generation number for creation of new fsids */ +u_short mntid_gen = 0; +/* + * Get a new unique fsid + */ +void +vfs_getnewfsid(mp) + struct mount *mp; +{ + + fsid_t tfsid; + int mtype; + mount_t nmp; + + mount_list_lock(); + + /* generate a new fsid */ + mtype = mp->mnt_vtable->vfc_typenum; + if (++mntid_gen == 0) + mntid_gen++; + tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); + tfsid.val[1] = mtype; + + TAILQ_FOREACH(nmp, &mountlist, mnt_list) { + while (vfs_getvfs_locked(&tfsid)) { + if (++mntid_gen == 0) + mntid_gen++; + tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen); + } + } + mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0]; + mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1]; + mount_list_unlock(); +} + +/* + * Routines having to do with the management of the vnode table. + */ +extern int (**dead_vnodeop_p)(void *); +long numvnodes, freevnodes; +long inactivevnodes; + + +/* + * Move a vnode from one mount queue to another. + */ +static void +insmntque(vnode_t vp, mount_t mp) +{ + mount_t lmp; + /* + * Delete from old mount point vnode list, if on one. + */ + if ( (lmp = vp->v_mount) != NULL) { + if ((vp->v_lflag & VNAMED_MOUNT) == 0) + panic("insmntque: vp not in mount vnode list"); + vp->v_lflag &= ~VNAMED_MOUNT; + + mount_lock(lmp); + + mount_drop(lmp, 1); + + if (vp->v_mntvnodes.tqe_next == NULL) { + if (TAILQ_LAST(&lmp->mnt_vnodelist, vnodelst) == vp) + TAILQ_REMOVE(&lmp->mnt_vnodelist, vp, v_mntvnodes); + else if (TAILQ_LAST(&lmp->mnt_newvnodes, vnodelst) == vp) + TAILQ_REMOVE(&lmp->mnt_newvnodes, vp, v_mntvnodes); + else if (TAILQ_LAST(&lmp->mnt_workerqueue, vnodelst) == vp) + TAILQ_REMOVE(&lmp->mnt_workerqueue, vp, v_mntvnodes); + } else { + vp->v_mntvnodes.tqe_next->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_prev; + *vp->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_next; + } + vp->v_mntvnodes.tqe_next = 0; + vp->v_mntvnodes.tqe_prev = 0; + mount_unlock(lmp); + return; + } + + /* + * Insert into list of vnodes for the new mount point, if available. + */ + if ((vp->v_mount = mp) != NULL) { + mount_lock(mp); + if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0)) + panic("vp already in mount list"); + if (mp->mnt_lflag & MNT_LITER) + TAILQ_INSERT_HEAD(&mp->mnt_newvnodes, vp, v_mntvnodes); + else + TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + if (vp->v_lflag & VNAMED_MOUNT) + panic("insmntque: vp already in mount vnode list"); + if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) + panic("insmntque: vp on the free list\n"); + vp->v_lflag |= VNAMED_MOUNT; + mount_ref(mp, 1); + mount_unlock(mp); + } +} + + /* * Create a vnode for a block device. * Used for root filesystem, argdev, and swap areas. * Also used for memory file system special devices. */ int -bdevvp(dev, vpp) - dev_t dev; - struct vnode **vpp; +bdevvp(dev_t dev, vnode_t *vpp) { - register struct vnode *vp; - struct vnode *nvp; - int error; + vnode_t nvp; + int error; + struct vnode_fsparam vfsp; + struct vfs_context context; if (dev == NODEV) { *vpp = NULLVP; return (ENODEV); } - error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); - if (error) { + + context.vc_proc = current_proc(); + context.vc_ucred = FSCRED; + + vfsp.vnfs_mp = (struct mount *)0; + vfsp.vnfs_vtype = VBLK; + vfsp.vnfs_str = "bdevvp"; + vfsp.vnfs_dvp = 0; + vfsp.vnfs_fsnode = 0; + vfsp.vnfs_cnp = 0; + vfsp.vnfs_vops = spec_vnodeop_p; + vfsp.vnfs_rdev = dev; + vfsp.vnfs_filesize = 0; + + vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; + + vfsp.vnfs_marksystem = 0; + vfsp.vnfs_markroot = 0; + + if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp)) ) { *vpp = NULLVP; return (error); } - vp = nvp; - vp->v_type = VBLK; - if (nvp = checkalias(vp, dev, (struct mount *)0)) { - vput(vp); - vp = nvp; + if ( (error = vnode_ref(nvp)) ) { + panic("bdevvp failed: vnode_ref"); + return (error); } - *vpp = vp; + if ( (error = VNOP_FSYNC(nvp, MNT_WAIT, &context)) ) { + panic("bdevvp failed: fsync"); + return (error); + } + if ( (error = buf_invalidateblks(nvp, BUF_WRITE_DATA, 0, 0)) ) { + panic("bdevvp failed: invalidateblks"); + return (error); + } + if ( (error = VNOP_OPEN(nvp, FREAD, &context)) ) { + panic("bdevvp failed: open"); + return (error); + } + *vpp = nvp; + return (0); } @@ -804,74 +1140,72 @@ bdevvp(dev, vpp) * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ -struct vnode * -checkalias(nvp, nvp_rdev, mp) +static vnode_t +checkalias(nvp, nvp_rdev) register struct vnode *nvp; dev_t nvp_rdev; - struct mount *mp; { - struct proc *p = current_proc(); /* XXX */ struct vnode *vp; struct vnode **vpp; - struct specinfo *specinfop; - - if (nvp->v_type != VBLK && nvp->v_type != VCHR) - return (NULLVP); + int vid = 0; - MALLOC_ZONE(specinfop, struct specinfo *, sizeof(struct specinfo), - M_SPECINFO, M_WAITOK); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: - simple_lock(&spechash_slock); + SPECHASH_LOCK(); + for (vp = *vpp; vp; vp = vp->v_specnext) { - if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) - continue; + if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) { + vid = vp->v_id; + break; + } + } + SPECHASH_UNLOCK(); + + if (vp) { + if (vnode_getwithvid(vp,vid)) { + goto loop; + } + /* + * Termination state is checked in vnode_getwithvid + */ + vnode_lock(vp); + /* * Alias, but not in use, so flush it out. */ - simple_lock(&vp->v_interlock); - if (vp->v_usecount == 0) { - simple_unlock(&spechash_slock); - vgonel(vp, p); + if ((vp->v_iocount == 1) && (vp->v_usecount == 0)) { + vnode_reclaim_internal(vp, 1, 0); + vnode_unlock(vp); + vnode_put(vp); goto loop; } - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { - simple_unlock(&spechash_slock); - goto loop; - } - break; } if (vp == NULL || vp->v_tag != VT_NON) { - nvp->v_specinfo = specinfop; - specinfop = 0; /* buffer used */ + MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), + M_SPECINFO, M_WAITOK); bzero(nvp->v_specinfo, sizeof(struct specinfo)); nvp->v_rdev = nvp_rdev; + nvp->v_specflags = 0; + nvp->v_speclastr = -1; + + SPECHASH_LOCK(); nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; - nvp->v_specflags = 0; - simple_unlock(&spechash_slock); *vpp = nvp; + SPECHASH_UNLOCK(); + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; - vput(vp); + vnode_unlock(vp); + vnode_put(vp); } - /* Since buffer is used just return */ return (NULLVP); } - simple_unlock(&spechash_slock); - VOP_UNLOCK(vp, 0, p); - simple_lock(&vp->v_interlock); - vclean(vp, 0, p); - vp->v_op = nvp->v_op; - vp->v_tag = nvp->v_tag; - nvp->v_type = VNON; - insmntque(vp, mp); - if (specinfop) - FREE_ZONE((void *)specinfop, sizeof(struct specinfo), M_SPECINFO); return (vp); } + /* * Get a reference on a particular vnode and lock it if requested. * If the vnode was on the inactive list, remove it from the list. @@ -882,571 +1216,264 @@ loop: * and an error returned to indicate that the vnode is no longer * usable (possibly having been changed to a new file system type). */ -int -vget(vp, flags, p) - struct vnode *vp; - int flags; - struct proc *p; +static int +vget_internal(vnode_t vp, int vid, int vflags) { int error = 0; u_long vpid; - vpid = vp->v_id; // save off the original v_id - -retry: - - /* - * If the vnode is in the process of being cleaned out for - * another use, we wait for the cleaning to finish and then - * return failure. Cleaning is determined by checking that - * the VXLOCK flag is set. - */ - if ((flags & LK_INTERLOCK) == 0) - simple_lock(&vp->v_interlock); - if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) { - vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); - (void)tsleep((caddr_t)vp, PINOD, "vget", 0); - return (ENOENT); - } - - /* - * vnode is being terminated. - * wait for vnode_pager_no_senders() to clear VTERMINATE - */ - if (ISSET(vp->v_flag, VTERMINATE)) { - SET(vp->v_flag, VTERMWANT); - simple_unlock(&vp->v_interlock); - (void)tsleep((caddr_t)&vp->v_ubcinfo, PINOD, "vget1", 0); - return (ENOENT); - } - - /* - * if the vnode is being initialized, - * wait for it to finish initialization - */ - if (ISSET(vp->v_flag, VUINIT)) { - SET(vp->v_flag, VUWANT); - simple_unlock(&vp->v_interlock); - (void) tsleep((caddr_t)vp, PINOD, "vget2", 0); - goto retry; - } - - simple_lock(&vnode_free_list_slock); - if (VONLIST(vp)) { - if (vp->v_usecount == 0) - VREMFREE("vget", vp); - else if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vget", vp); - } - simple_unlock(&vnode_free_list_slock); - - if (++vp->v_usecount <= 0) - panic("vget: v_usecount"); - - /* - * Recover named reference as needed - */ - if (UBCISVALID(vp) && !UBCINFOMISSING(vp) && !ubc_issetflags(vp, UI_HASOBJREF)) { - simple_unlock(&vp->v_interlock); - if (ubc_getobject(vp, UBC_HOLDOBJECT) == MEMORY_OBJECT_CONTROL_NULL) { - error = ENOENT; - goto errout; - } - simple_lock(&vp->v_interlock); - } - - if (flags & LK_TYPE_MASK) { - if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) - goto errout; - if (vpid != vp->v_id) { // make sure it's still the same vnode - vput(vp); - return ENOENT; - } - return (0); - } - - if ((flags & LK_INTERLOCK) == 0) - simple_unlock(&vp->v_interlock); + vnode_lock(vp); - if (vpid != vp->v_id) { // make sure it's still the same vnode - vrele(vp); - return ENOENT; - } - - return (0); + if (vflags & VNODE_WITHID) + vpid = vid; + else + vpid = vp->v_id; // save off the original v_id -errout: - simple_lock(&vp->v_interlock); + if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0)) + /* + * vnode to be returned only if it has writers opened + */ + error = EINVAL; + else + error = vnode_getiocount(vp, 1, vpid, vflags); - /* - * we may have blocked. Re-evaluate the state - */ - simple_lock(&vnode_free_list_slock); - if (VONLIST(vp)) { - if (vp->v_usecount == 0) - VREMFREE("vget", vp); - else if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vget", vp); - } - simple_unlock(&vnode_free_list_slock); + vnode_unlock(vp); - /* - * If the vnode was not active in the first place - * must not call vrele() as VOP_INACTIVE() is not - * required. - * So inlined part of vrele() here. - */ - if (--vp->v_usecount == 1) { - if (UBCINFOEXISTS(vp)) { - vinactive(vp); - simple_unlock(&vp->v_interlock); - return (error); - } - } - if (vp->v_usecount > 0) { - simple_unlock(&vp->v_interlock); - return (error); - } - if (vp->v_usecount < 0) - panic("vget: negative usecount (%d)", vp->v_usecount); - vfree(vp); - simple_unlock(&vp->v_interlock); return (error); } -/* - * Get a pager reference on the particular vnode. - * - * This is called from ubc_info_init() and it is asumed that - * the vnode is not on the free list. - * It is also assumed that the vnode is neither being recycled - * by vgonel nor being terminated by vnode_pager_vrele(). - * - * The vnode interlock is NOT held by the caller. - */ -__private_extern__ int -vnode_pager_vget(vp) - struct vnode *vp; -{ - simple_lock(&vp->v_interlock); - - UBCINFOCHECK("vnode_pager_vget", vp); - - if (ISSET(vp->v_flag, (VXLOCK|VORECLAIM|VTERMINATE))) - panic("%s: dying vnode", "vnode_pager_vget"); - - simple_lock(&vnode_free_list_slock); - /* The vnode should not be on free list */ - if (VONLIST(vp)) { - if (vp->v_usecount == 0) - panic("%s: still on list", "vnode_pager_vget"); - else if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vnode_pager_vget", vp); - } - - /* The vnode should not be on the inactive list here */ - simple_unlock(&vnode_free_list_slock); - - /* After all those checks, now do the real work :-) */ - if (++vp->v_usecount <= 0) - panic("vnode_pager_vget: v_usecount"); - simple_unlock(&vp->v_interlock); - - return (0); -} - -/* - * Stubs to use when there is no locking to be done on the underlying object. - * A minimal shared lock is necessary to ensure that the underlying object - * is not revoked while an operation is in progress. So, an active shared - * count is maintained in an auxillary vnode lock structure. - */ -int -vop_nolock(ap) - struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; -{ -#ifdef notyet - /* - * This code cannot be used until all the non-locking filesystems - * (notably NFS) are converted to properly lock and release nodes. - * Also, certain vnode operations change the locking state within - * the operation (create, mknod, remove, link, rename, mkdir, rmdir, - * and symlink). Ideally these operations should not change the - * lock state, but should be changed to let the caller of the - * function unlock them. Otherwise all intermediate vnode layers - * (such as union, umapfs, etc) must catch these functions to do - * the necessary locking at their layer. Note that the inactive - * and lookup operations also change their lock state, but this - * cannot be avoided, so these two operations will always need - * to be handled in intermediate layers. - */ - struct vnode *vp = ap->a_vp; - int vnflags, flags = ap->a_flags; - - if (vp->v_vnlock == NULL) { - if ((flags & LK_TYPE_MASK) == LK_DRAIN) - return (0); - MALLOC(vp->v_vnlock, struct lock__bsd__ *, - sizeof(struct lock__bsd__), M_TEMP, M_WAITOK); - lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); - } - switch (flags & LK_TYPE_MASK) { - case LK_DRAIN: - vnflags = LK_DRAIN; - break; - case LK_EXCLUSIVE: - case LK_SHARED: - vnflags = LK_SHARED; - break; - case LK_UPGRADE: - case LK_EXCLUPGRADE: - case LK_DOWNGRADE: - return (0); - case LK_RELEASE: - default: - panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); - } - if (flags & LK_INTERLOCK) - vnflags |= LK_INTERLOCK; - return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); -#else /* for now */ - /* - * Since we are not using the lock manager, we must clear - * the interlock here. - */ - if (ap->a_flags & LK_INTERLOCK) - simple_unlock(&ap->a_vp->v_interlock); - return (0); -#endif -} - -/* - * Decrement the active use count. - */ int -vop_nounlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; - } */ *ap; +vnode_ref(vnode_t vp) { - struct vnode *vp = ap->a_vp; - if (vp->v_vnlock == NULL) - return (0); - return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); + return (vnode_ref_ext(vp, 0)); } -/* - * Return whether or not the node is in use. - */ int -vop_noislocked(ap) - struct vop_islocked_args /* { - struct vnode *a_vp; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - - if (vp->v_vnlock == NULL) - return (0); - return (lockstatus(vp->v_vnlock)); -} - -/* - * Vnode reference. - */ -void -vref(vp) - struct vnode *vp; +vnode_ref_ext(vnode_t vp, int fmode) { + int error = 0; - simple_lock(&vp->v_interlock); - if (vp->v_usecount <= 0) - panic("vref used where vget required"); + vnode_lock(vp); - /* If on the inactive list, remove it from there */ - simple_lock(&vnode_free_list_slock); - if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vref", vp); - simple_unlock(&vnode_free_list_slock); + /* + * once all the current call sites have been fixed to insure they have + * taken an iocount, we can toughen this assert up and insist that the + * iocount is non-zero... a non-zero usecount doesn't insure correctness + */ + if (vp->v_iocount <= 0 && vp->v_usecount <= 0) + panic("vnode_ref_ext: vp %x has no valid reference %d, %d", vp, vp->v_iocount, vp->v_usecount); - if (++vp->v_usecount <= 0) - panic("vref v_usecount"); - simple_unlock(&vp->v_interlock); -} + /* + * if you are the owner of drain/termination, can acquire usecount + */ + if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) { + if (vp->v_owner != current_thread()) { + error = ENOENT; + goto out; + } + } + vp->v_usecount++; -static void -clean_up_name_parent_ptrs(struct vnode *vp) -{ - if (VNAME(vp) || VPARENT(vp)) { - char *tmp1; - struct vnode *tmp2; - - // do it this way so we don't block before clearing - // these fields. - tmp1 = VNAME(vp); - tmp2 = VPARENT(vp); - VNAME(vp) = NULL; - VPARENT(vp) = NULL; - - if (tmp1) { - remove_name(tmp1); + if (fmode & FWRITE) { + if (++vp->v_writecount <= 0) + panic("vnode_ref_ext: v_writecount"); } - - if (tmp2) { - vrele(tmp2); + if (fmode & O_EVTONLY) { + if (++vp->v_kusecount <= 0) + panic("vnode_ref_ext: v_kusecount"); } - } +out: + vnode_unlock(vp); + + return (error); } /* * put the vnode on appropriate free list. - * called with v_interlock held. + * called with vnode LOCKED */ static void -vfree(vp) - struct vnode *vp; +vnode_list_add(vnode_t vp) { - funnel_t *curflock; - extern int disable_funnel; - - if ((curflock = thread_funnel_get()) != kernel_flock && - !(disable_funnel && curflock != THR_FUNNEL_NULL)) - panic("Entering vfree() without kernel funnel"); /* - * if the vnode is not obtained by calling getnewvnode() we - * are not responsible for the cleanup. Just return. + * if it is already on a list or non zero references return */ - if (!(vp->v_flag & VSTANDARD)) { + if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0)) return; - } - - if (vp->v_usecount != 0) - panic("vfree: v_usecount"); - - /* insert at tail of LRU list or at head if VAGE is set */ - simple_lock(&vnode_free_list_slock); + vnode_list_lock(); - // make sure the name & parent pointers get cleared out -// clean_up_name_parent_ptrs(vp); - - if (VONLIST(vp)) - panic("%s: vnode still on list", "vfree"); - - if (vp->v_flag & VAGE) { - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); + /* + * insert at tail of LRU list or at head if VAGE or VL_DEAD is set + */ + if ((vp->v_flag & VAGE) || (vp->v_lflag & VL_DEAD)) { + TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); vp->v_flag &= ~VAGE; - } else - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + } else { + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + } freevnodes++; - simple_unlock(&vnode_free_list_slock); - return; + + vnode_list_unlock(); } /* - * put the vnode on the inactive list. - * called with v_interlock held + * remove the vnode from appropriate free list. */ static void -vinactive(vp) - struct vnode *vp; +vnode_list_remove(vnode_t vp) { - funnel_t *curflock; - extern int disable_funnel; - - if ((curflock = thread_funnel_get()) != kernel_flock && - !(disable_funnel && curflock != THR_FUNNEL_NULL)) - panic("Entering vinactive() without kernel funnel"); - - if (!UBCINFOEXISTS(vp)) - panic("vinactive: not a UBC vnode"); - - if (vp->v_usecount != 1) - panic("vinactive: v_usecount"); - - simple_lock(&vnode_free_list_slock); - - if (VONLIST(vp)) - panic("%s: vnode still on list", "vinactive"); - VINACTIVECHECK("vinactive", vp, 0); - - TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_freelist); - SET(vp->v_flag, VUINACTIVE); - CLR(vp->v_flag, (VNOCACHE_DATA | VRAOFF)); - - inactivevnodes++; - simple_unlock(&vnode_free_list_slock); - return; + /* + * we want to avoid taking the list lock + * in the case where we're not on the free + * list... this will be true for most + * directories and any currently in use files + * + * we're guaranteed that we can't go from + * the not-on-list state to the on-list + * state since we hold the vnode lock... + * all calls to vnode_list_add are done + * under the vnode lock... so we can + * check for that condition (the prevelant one) + * without taking the list lock + */ + if (VONLIST(vp)) { + vnode_list_lock(); + /* + * however, we're not guaranteed that + * we won't go from the on-list state + * to the non-on-list state until we + * hold the vnode_list_lock... this + * is due to new_vnode removing vnodes + * from the free list uder the list_lock + * w/o the vnode lock... so we need to + * check again whether we're currently + * on the free list + */ + if (VONLIST(vp)) { + VREMFREE("vnode_list_remove", vp); + VLISTNONE(vp); + } + vnode_list_unlock(); + } } -/* - * vput(), just unlock and vrele() - */ void -vput(vp) - struct vnode *vp; +vnode_rele(vnode_t vp) { - struct proc *p = current_proc(); /* XXX */ + vnode_rele_internal(vp, 0, 0, 0); +} - simple_lock(&vp->v_interlock); - if (--vp->v_usecount == 1) { - if (UBCINFOEXISTS(vp)) { - vinactive(vp); - simple_unlock(&vp->v_interlock); - VOP_UNLOCK(vp, 0, p); - return; - } - } - if (vp->v_usecount > 0) { - simple_unlock(&vp->v_interlock); - VOP_UNLOCK(vp, 0, p); - return; - } -#if DIAGNOSTIC - if (vp->v_usecount < 0 || vp->v_writecount != 0) { - vprint("vput: bad ref count", vp); - panic("vput: v_usecount = %d, v_writecount = %d", - vp->v_usecount, vp->v_writecount); - } -#endif - simple_lock(&vnode_free_list_slock); - if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vref", vp); - simple_unlock(&vnode_free_list_slock); - simple_unlock(&vp->v_interlock); - VOP_INACTIVE(vp, p); - /* - * The interlock is not held and - * VOP_INCATIVE releases the vnode lock. - * We could block and the vnode might get reactivated - * Can not just call vfree without checking the state - */ - simple_lock(&vp->v_interlock); - if (!VONLIST(vp)) { - if (vp->v_usecount == 0) - vfree(vp); - else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) - vinactive(vp); - } - simple_unlock(&vp->v_interlock); +void +vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter) +{ + vnode_rele_internal(vp, fmode, dont_reenter, 0); } -/* - * Vnode release. - * If count drops to zero, call inactive routine and return to freelist. - */ + void -vrele(vp) - struct vnode *vp; +vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) { - struct proc *p = current_proc(); /* XXX */ - funnel_t *curflock; - extern int disable_funnel; + struct vfs_context context; - if ((curflock = thread_funnel_get()) != kernel_flock && - !(disable_funnel && curflock != THR_FUNNEL_NULL)) - panic("Entering vrele() without kernel funnel"); + if ( !locked) + vnode_lock(vp); - simple_lock(&vp->v_interlock); - if (--vp->v_usecount == 1) { - if (UBCINFOEXISTS(vp)) { - if ((vp->v_flag & VXLOCK) == 0) - vinactive(vp); - simple_unlock(&vp->v_interlock); - return; - } - } - if (vp->v_usecount > 0) { - simple_unlock(&vp->v_interlock); - return; + if (--vp->v_usecount < 0) + panic("vnode_rele_ext: vp %x usecount -ve : %d", vp, vp->v_usecount); + + if (fmode & FWRITE) { + if (--vp->v_writecount < 0) + panic("vnode_rele_ext: vp %x writecount -ve : %d", vp, vp->v_writecount); } -#if DIAGNOSTIC - if (vp->v_usecount < 0 || vp->v_writecount != 0) { - vprint("vrele: bad ref count", vp); - panic("vrele: ref cnt"); + if (fmode & O_EVTONLY) { + if (--vp->v_kusecount < 0) + panic("vnode_rele_ext: vp %x kusecount -ve : %d", vp, vp->v_kusecount); } -#endif - - if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) { - /* vnode is being cleaned, just return */ - vfree(vp); - simple_unlock(&vp->v_interlock); + if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { + /* + * vnode is still busy... if we're the last + * usecount, mark for a future call to VNOP_INACTIVE + * when the iocount finally drops to 0 + */ + if (vp->v_usecount == 0) { + vp->v_lflag |= VL_NEEDINACTIVE; + vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); + } + if ( !locked) + vnode_unlock(vp); return; } - - if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { - VOP_INACTIVE(vp, p); - /* - * vn_lock releases the interlock and - * VOP_INCATIVE releases the vnode lock. - * We could block and the vnode might get reactivated - * Can not just call vfree without checking the state + vp->v_flag &= ~(VNOCACHE_DATA | VRAOFF); + + if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) { + /* + * vnode is being cleaned, or + * we've requested that we don't reenter + * the filesystem on this release... in + * this case, we'll mark the vnode aged + * if it's been marked for termination */ - simple_lock(&vp->v_interlock); - if (!VONLIST(vp)) { - if (vp->v_usecount == 0) - vfree(vp); - else if ((vp->v_usecount == 1) && UBCINFOEXISTS(vp)) - vinactive(vp); + if (dont_reenter) { + if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) ) + vp->v_lflag |= VL_NEEDINACTIVE; + vp->v_flag |= VAGE; } - simple_unlock(&vp->v_interlock); - } -#if 0 - else { - vfree(vp); - simple_unlock(&vp->v_interlock); - kprintf("vrele: vn_lock() failed for vp = 0x%08x\n", vp); + vnode_list_add(vp); + if ( !locked) + vnode_unlock(vp); + return; } + /* + * at this point both the iocount and usecount + * are zero + * pick up an iocount so that we can call + * VNOP_INACTIVE with the vnode lock unheld + */ + vp->v_iocount++; +#ifdef JOE_DEBUG + record_vp(vp, 1); #endif -} - -void -vagevp(vp) - struct vnode *vp; -{ - simple_lock(&vp->v_interlock); - vp->v_flag |= VAGE; - simple_unlock(&vp->v_interlock); - return; -} - -/* - * Page or buffer structure gets a reference. - */ -void -vhold(vp) - register struct vnode *vp; -{ + vp->v_lflag &= ~VL_NEEDINACTIVE; + vnode_unlock(vp); - simple_lock(&vp->v_interlock); - vp->v_holdcnt++; - simple_unlock(&vp->v_interlock); -} + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + VNOP_INACTIVE(vp, &context); -/* - * Page or buffer structure frees a reference. - */ -void -holdrele(vp) - register struct vnode *vp; -{ + vnode_lock(vp); + /* + * because we dropped the vnode lock to call VNOP_INACTIVE + * the state of the vnode may have changed... we may have + * picked up an iocount, usecount or the MARKTERM may have + * been set... we need to reevaluate the reference counts + * to determine if we can call vnode_reclaim_internal at + * this point... if the reference counts are up, we'll pick + * up the MARKTERM state when they get subsequently dropped + */ + if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) && + ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) { + struct uthread *ut; - simple_lock(&vp->v_interlock); - if (vp->v_holdcnt <= 0) - panic("holdrele: holdcnt"); - vp->v_holdcnt--; - simple_unlock(&vp->v_interlock); + ut = get_bsdthread_info(current_thread()); + + if (ut->uu_defer_reclaims) { + vp->v_defer_reclaimlist = ut->uu_vreclaims; + ut->uu_vreclaims = vp; + goto defer_reclaim; + } + vnode_reclaim_internal(vp, 1, 0); + } + vnode_dropiocount(vp, 1); + vnode_list_add(vp); +defer_reclaim: + if ( !locked) + vnode_unlock(vp); + return; } /* @@ -1471,34 +1498,79 @@ vflush(mp, skipvp, flags) int flags; { struct proc *p = current_proc(); - struct vnode *vp, *nvp; + struct vnode *vp; int busy = 0; + int reclaimed = 0; + int vid, retval; - simple_lock(&mntvnode_slock); + mount_lock(mp); + vnode_iterate_setup(mp); + /* + * On regular unmounts(not forced) do a + * quick check for vnodes to be in use. This + * preserves the caching of vnodes. automounter + * tries unmounting every so often to see whether + * it is still busy or not. + */ + if ((flags & FORCECLOSE)==0) { + if (vnode_umount_preflight(mp, skipvp, flags)) { + vnode_iterate_clear(mp); + mount_unlock(mp); + return(EBUSY); + } + } loop: - for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { - if (vp->v_mount != mp) - goto loop; - nvp = vp->v_mntvnodes.le_next; + /* it is returns 0 then there is nothing to do */ + retval = vnode_iterate_prepare(mp); + + if (retval == 0) { + vnode_iterate_clear(mp); + mount_unlock(mp); + return(retval); + } + + /* iterate over all the vnodes */ + while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { + vp = TAILQ_FIRST(&mp->mnt_workerqueue); + TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); + TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); + if ( (vp->v_mount != mp) || (vp == skipvp)) { + continue; + } + vid = vp->v_id; + mount_unlock(mp); + vnode_lock(vp); + + if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { + vnode_unlock(vp); + mount_lock(mp); + continue; + } + /* - * Skip over a selected vnode. - */ - if (vp == skipvp) + * If requested, skip over vnodes marked VSYSTEM. + * Skip over all vnodes marked VNOFLUSH. + */ + if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || + (vp->v_flag & VNOFLUSH))) { + vnode_unlock(vp); + mount_lock(mp); continue; - - simple_lock(&vp->v_interlock); + } /* - * Skip over a vnodes marked VSYSTEM or VNOFLUSH. + * If requested, skip over vnodes marked VSWAP. */ - if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) || (vp->v_flag & VNOFLUSH))) { - simple_unlock(&vp->v_interlock); + if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { + vnode_unlock(vp); + mount_lock(mp); continue; } /* - * Skip over a vnodes marked VSWAP. + * If requested, skip over vnodes marked VSWAP. */ - if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) { - simple_unlock(&vp->v_interlock); + if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) { + vnode_unlock(vp); + mount_lock(mp); continue; } /* @@ -1507,17 +1579,27 @@ loop: */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) { - simple_unlock(&vp->v_interlock); + vnode_unlock(vp); + mount_lock(mp); continue; } /* - * With v_usecount == 0, all we need to do is clear + * If the real usecount is 0, all we need to do is clear * out the vnode data structures and we are done. */ - if (vp->v_usecount == 0) { - simple_unlock(&mntvnode_slock); - vgonel(vp, p); - simple_lock(&mntvnode_slock); + if (((vp->v_usecount == 0) || + ((vp->v_usecount - vp->v_kusecount) == 0))) { + vp->v_iocount++; /* so that drain waits for * other iocounts */ +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vnode_reclaim_internal(vp, 1, 0); + vnode_dropiocount(vp, 1); + vnode_list_add(vp); + + vnode_unlock(vp); + reclaimed++; + mount_lock(mp); continue; } /* @@ -1526,51 +1608,73 @@ loop: * anonymous device. For all other files, just kill them. */ if (flags & FORCECLOSE) { - simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgonel(vp, p); + vp->v_iocount++; /* so that drain waits * for other iocounts */ +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vnode_reclaim_internal(vp, 1, 0); + vnode_dropiocount(vp, 1); + vnode_list_add(vp); + vnode_unlock(vp); } else { vclean(vp, 0, p); + vp->v_mount = 0; /*override any dead_mountp */ + vp->v_lflag &= ~VL_DEAD; vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *)0); + vnode_unlock(vp); } - simple_lock(&mntvnode_slock); + mount_lock(mp); continue; } #if DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif - simple_unlock(&vp->v_interlock); + vnode_unlock(vp); + mount_lock(mp); busy++; } - simple_unlock(&mntvnode_slock); + + /* At this point the worker queue is completed */ + if (busy && ((flags & FORCECLOSE)==0) && reclaimed) { + busy = 0; + reclaimed = 0; + (void)vnode_iterate_reloadq(mp); + /* returned with mount lock held */ + goto loop; + } + + /* if new vnodes were created in between retry the reclaim */ + if ( vnode_iterate_reloadq(mp) != 0) { + if (!(busy && ((flags & FORCECLOSE)==0))) + goto loop; + } + vnode_iterate_clear(mp); + mount_unlock(mp); + if (busy && ((flags & FORCECLOSE)==0)) return (EBUSY); return (0); } +int num_recycledvnodes=0; /* * Disassociate the underlying file system from a vnode. - * The vnode interlock is held on entry. + * The vnode lock is held on entry. */ static void -vclean(vp, flags, p) - struct vnode *vp; - int flags; - struct proc *p; +vclean(vnode_t vp, int flags, proc_t p) { + struct vfs_context context; int active; - int didhold; + int need_inactive; + int already_terminating; + kauth_cred_t ucred = NULL; - /* - * if the vnode is not obtained by calling getnewvnode() we - * are not responsible for the cleanup. Just return. - */ - if (!(vp->v_flag & VSTANDARD)) { - simple_unlock(&vp->v_interlock); - return; - } + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* * Check to see if the vnode is in use. @@ -1578,124 +1682,103 @@ vclean(vp, flags, p) * so that its count cannot fall to zero and generate a * race against ourselves to recycle it. */ - if (active = vp->v_usecount) { - /* - * active vnode can not be on the free list. - * we are about to take an extra reference on this vnode - * do the queue management as needed - * Not doing so can cause "still on list" or - * "vnreclaim: v_usecount" panic if VOP_LOCK() blocks. - */ - simple_lock(&vnode_free_list_slock); - if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vclean", vp); - simple_unlock(&vnode_free_list_slock); + active = vp->v_usecount; - if (++vp->v_usecount <= 0) - panic("vclean: v_usecount"); - } + /* + * just in case we missed sending a needed + * VNOP_INACTIVE, we'll do it now + */ + need_inactive = (vp->v_lflag & VL_NEEDINACTIVE); + + vp->v_lflag &= ~VL_NEEDINACTIVE; /* * Prevent the vnode from being recycled or * brought into use while we clean it out. */ - if (vp->v_flag & VXLOCK) - panic("vclean: deadlock"); - vp->v_flag |= VXLOCK; + already_terminating = (vp->v_lflag & VL_TERMINATE); + + vp->v_lflag |= VL_TERMINATE; /* - * Even if the count is zero, the VOP_INACTIVE routine may still - * have the object locked while it cleans it out. The VOP_LOCK - * ensures that the VOP_INACTIVE routine is done with its work. - * For active vnodes, it ensures that no other activity can - * occur while the underlying object is being cleaned out. + * remove the vnode from any mount list + * it might be on... */ - VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); + insmntque(vp, (struct mount *)0); + ucred = vp->v_cred; + vp->v_cred = NULL; + + vnode_unlock(vp); + + if (ucred) + kauth_cred_rele(ucred); + + OSAddAtomic(1, &num_recycledvnodes); /* - * While blocked in VOP_LOCK() someone could have dropped - * reference[s] and we could land on the inactive list. - * if this vnode is on the inactive list - * take it off the list. + * purge from the name cache as early as possible... */ - simple_lock(&vnode_free_list_slock); - if (ISSET((vp)->v_flag, VUINACTIVE)) - VREMINACTIVE("vclean", vp); - simple_unlock(&vnode_free_list_slock); + cache_purge(vp); - /* Clean the pages in VM. */ if (active && (flags & DOCLOSE)) - VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); - - /* Clean the pages in VM. */ - didhold = ubc_hold(vp); - if ((active) && (didhold)) - (void)ubc_clean(vp, 0); /* do not invalidate */ + VNOP_CLOSE(vp, IO_NDELAY, &context); /* * Clean out any buffers associated with the vnode. */ if (flags & DOCLOSE) { +#if NFSCLIENT if (vp->v_tag == VT_NFS) nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0); else - vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); +#endif + { + VNOP_FSYNC(vp, MNT_WAIT, &context); + buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); + } + if (UBCINFOEXISTS(vp)) + /* + * Clean the pages in VM. + */ + (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL); } + if (UBCINFOEXISTS(vp)) + cluster_release(vp->v_ubcinfo); - if (active) - VOP_INACTIVE(vp, p); - else - VOP_UNLOCK(vp, 0, p); + if (active || need_inactive) + VNOP_INACTIVE(vp, &context); /* Destroy ubc named reference */ - if (didhold) { - ubc_rele(vp); - ubc_destroy_named(vp); - } - /* - * Make sure vp isn't on the inactive list. - */ - simple_lock(&vnode_free_list_slock); - if (ISSET((vp)->v_flag, VUINACTIVE)) { - VREMINACTIVE("vclean", vp); - } - simple_unlock(&vnode_free_list_slock); + ubc_destroy_named(vp); /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp, p)) + if (VNOP_RECLAIM(vp, &context)) panic("vclean: cannot reclaim"); // make sure the name & parent ptrs get cleaned out! - clean_up_name_parent_ptrs(vp); + vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); - cache_purge(vp); - if (vp->v_vnlock) { - struct lock__bsd__ *tmp = vp->v_vnlock; - if ((tmp->lk_flags & LK_DRAINED) == 0) - vprint("vclean: lock not drained", vp); - vp->v_vnlock = NULL; - FREE(tmp, M_TEMP); - } + vnode_lock(vp); - /* It's dead, Jim! */ + vp->v_mount = dead_mountp; vp->v_op = dead_vnodeop_p; vp->v_tag = VT_NON; + vp->v_data = NULL; - insmntque(vp, (struct mount *)0); + vp->v_lflag |= VL_DEAD; - /* - * Done with purge, notify sleepers of the grim news. - */ - vp->v_flag &= ~VXLOCK; - if (vp->v_flag & VXWANT) { - vp->v_flag &= ~VXWANT; - wakeup((caddr_t)vp); + if (already_terminating == 0) { + vp->v_lflag &= ~VL_TERMINATE; + /* + * Done with purge, notify sleepers of the grim news. + */ + if (vp->v_lflag & VL_TERMWANT) { + vp->v_lflag &= ~VL_TERMWANT; + wakeup(&vp->v_lflag); + } } - - if (active) - vrele(vp); } /* @@ -1703,64 +1786,53 @@ vclean(vp, flags, p) * and with all vnodes aliased to the requested vnode. */ int -vop_revoke(ap) - struct vop_revoke_args /* { - struct vnode *a_vp; - int a_flags; - } */ *ap; +vn_revoke(vnode_t vp, int flags, __unused vfs_context_t a_context) { - struct vnode *vp, *vq; - struct proc *p = current_proc(); + struct vnode *vq; + int vid; #if DIAGNOSTIC - if ((ap->a_flags & REVOKEALL) == 0) - panic("vop_revoke"); + if ((flags & REVOKEALL) == 0) + panic("vnop_revoke"); #endif - vp = ap->a_vp; - simple_lock(&vp->v_interlock); - if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, * wait until it is done and return. */ - if (vp->v_flag & VXLOCK) { - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); - (void)tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); - } - return (0); + vnode_lock(vp); + if (vp->v_lflag & VL_TERMINATE) { + vnode_unlock(vp); + return(ENOENT); } + vnode_unlock(vp); /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ - vp->v_flag |= VXLOCK; - simple_unlock(&vp->v_interlock); + SPECHASH_LOCK(); while (vp->v_flag & VALIASED) { - simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; - simple_unlock(&spechash_slock); - vgone(vq); + vid = vq->v_id; + SPECHASH_UNLOCK(); + if (vnode_getwithvid(vq,vid)){ + SPECHASH_LOCK(); + break; + } + vnode_reclaim_internal(vq, 0, 0); + vnode_put(vq); + SPECHASH_LOCK(); break; } - if (vq == NULLVP) - simple_unlock(&spechash_slock); } - /* - * Remove the lock so that vgone below will - * really eliminate the vnode after which time - * vgone will awaken any sleepers. - */ - simple_lock(&vp->v_interlock); - vp->v_flag &= ~VXLOCK; + SPECHASH_UNLOCK(); } - vgonel(vp, p); + vnode_reclaim_internal(vp, 0, 0); + return (0); } @@ -1769,197 +1841,174 @@ vop_revoke(ap) * Release the passed interlock if the vnode will be recycled. */ int -vrecycle(vp, inter_lkp, p) +vnode_recycle(vp) struct vnode *vp; - struct slock *inter_lkp; - struct proc *p; { + vnode_lock(vp); - simple_lock(&vp->v_interlock); - if (vp->v_usecount == 0) { - if (inter_lkp) - simple_unlock(inter_lkp); - vgonel(vp, p); - return (1); - } - simple_unlock(&vp->v_interlock); - return (0); + if (vp->v_iocount || vp->v_usecount) { + vp->v_lflag |= VL_MARKTERM; + vnode_unlock(vp); + return(0); + } + vnode_reclaim_internal(vp, 1, 0); + vnode_unlock(vp); + + return (1); } -/* - * Eliminate all activity associated with a vnode - * in preparation for reuse. - */ -void -vgone(vp) - struct vnode *vp; +static int +vnode_reload(vnode_t vp) { - struct proc *p = current_proc(); + vnode_lock(vp); - simple_lock(&vp->v_interlock); - vgonel(vp, p); + if ((vp->v_iocount > 1) || vp->v_usecount) { + vnode_unlock(vp); + return(0); + } + if (vp->v_iocount <= 0) + panic("vnode_reload with no iocount %d", vp->v_iocount); + + /* mark for release when iocount is dopped */ + vp->v_lflag |= VL_MARKTERM; + vnode_unlock(vp); + + return (1); } -/* - * vgone, with the vp interlock held. - */ -void -vgonel(vp, p) - struct vnode *vp; - struct proc *p; + +static void +vgone(vnode_t vp) { struct vnode *vq; struct vnode *vx; - /* - * if the vnode is not obtained by calling getnewvnode() we - * are not responsible for the cleanup. Just return. - */ - if (!(vp->v_flag & VSTANDARD)) { - simple_unlock(&vp->v_interlock); - return; - } - - /* - * If a vgone (or vclean) is already in progress, - * wait until it is done and return. - */ - if (vp->v_flag & VXLOCK) { - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); - (void)tsleep((caddr_t)vp, PINOD, "vgone", 0); - } - return; - } /* * Clean out the filesystem specific data. + * vclean also takes care of removing the + * vnode from any mount list it might be on */ - vclean(vp, DOCLOSE, p); - /* - * Delete from old mount point vnode list, if on one. - */ - if (vp->v_mount != NULL) - insmntque(vp, (struct mount *)0); + vclean(vp, DOCLOSE, current_proc()); + /* * If special device, remove it from special device alias list * if it is on one. */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { - simple_lock(&spechash_slock); - if (*vp->v_hashchain == vp) { - *vp->v_hashchain = vp->v_specnext; - } else { - for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { - if (vq->v_specnext != vp) - continue; - vq->v_specnext = vp->v_specnext; - break; - } + SPECHASH_LOCK(); + if (*vp->v_hashchain == vp) { + *vp->v_hashchain = vp->v_specnext; + } else { + for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_specnext != vp) + continue; + vq->v_specnext = vp->v_specnext; + break; + } if (vq == NULL) panic("missing bdev"); - } - if (vp->v_flag & VALIASED) { - vx = NULL; - for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { - if (vq->v_rdev != vp->v_rdev || - vq->v_type != vp->v_type) - continue; - if (vx) - break; - vx = vq; } - if (vx == NULL) - panic("missing alias"); - if (vq == NULL) - vx->v_flag &= ~VALIASED; - vp->v_flag &= ~VALIASED; - } - simple_unlock(&spechash_slock); - { - struct specinfo *tmp = vp->v_specinfo; - vp->v_specinfo = NULL; - FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO); - } - } - /* - * If it is on the freelist and not already at the head, - * move it to the head of the list. The test of the back - * pointer and the reference count of zero is because - * it will be removed from the free list by getnewvnode, - * but will not have its reference count incremented until - * after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to - * close the previous instance of the underlying object. - * So, the back pointer is explicitly set to `0xdeadb' in - * getnewvnode after removing it from the freelist to ensure - * that we do not try to move it here. - */ - if (vp->v_usecount == 0 && (vp->v_flag & VUINACTIVE) == 0) { - simple_lock(&vnode_free_list_slock); - if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && - vnode_free_list.tqh_first != vp) { - TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); - } - simple_unlock(&vnode_free_list_slock); + if (vp->v_flag & VALIASED) { + vx = NULL; + for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { + if (vq->v_rdev != vp->v_rdev || + vq->v_type != vp->v_type) + continue; + if (vx) + break; + vx = vq; + } + if (vx == NULL) + panic("missing alias"); + if (vq == NULL) + vx->v_flag &= ~VALIASED; + vp->v_flag &= ~VALIASED; + } + SPECHASH_UNLOCK(); + { + struct specinfo *tmp = vp->v_specinfo; + vp->v_specinfo = NULL; + FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO); + } } - vp->v_type = VBAD; } /* * Lookup a vnode by device number. */ int -vfinddev(dev, type, vpp) - dev_t dev; - enum vtype type; - struct vnode **vpp; +check_mountedon(dev_t dev, enum vtype type, int *errorp) { - struct vnode *vp; + vnode_t vp; int rc = 0; + int vid; - simple_lock(&spechash_slock); +loop: + SPECHASH_LOCK(); for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; - *vpp = vp; - rc = 1; - break; + vid = vp->v_id; + SPECHASH_UNLOCK(); + if (vnode_getwithvid(vp,vid)) + goto loop; + vnode_lock(vp); + if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { + vnode_unlock(vp); + if ((*errorp = vfs_mountedon(vp)) != 0) + rc = 1; + } else + vnode_unlock(vp); + vnode_put(vp); + return(rc); } - simple_unlock(&spechash_slock); - return (rc); + SPECHASH_UNLOCK(); + return (0); } /* * Calculate the total number of references to a special device. */ int -vcount(vp) - struct vnode *vp; +vcount(vnode_t vp) { - struct vnode *vq, *vnext; + vnode_t vq, vnext; int count; + int vid; loop: if ((vp->v_flag & VALIASED) == 0) - return (vp->v_usecount); - simple_lock(&spechash_slock); + return (vp->v_usecount - vp->v_kusecount); + + SPECHASH_LOCK(); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; + vid = vq->v_id; + SPECHASH_UNLOCK(); + + if (vnode_getwithvid(vq, vid)) { + goto loop; + } /* * Alias, but not in use, so flush it out. */ - if (vq->v_usecount == 0 && vq != vp) { - simple_unlock(&spechash_slock); - vgone(vq); + vnode_lock(vq); + if ((vq->v_usecount == 0) && (vq->v_iocount == 1) && vq != vp) { + vnode_reclaim_internal(vq, 1, 0); + vnode_unlock(vq); + vnode_put(vq); goto loop; } - count += vq->v_usecount; + count += (vq->v_usecount - vq->v_kusecount); + vnode_unlock(vq); + vnode_put(vq); + + SPECHASH_LOCK(); } - simple_unlock(&spechash_slock); + SPECHASH_UNLOCK(); + return (count); } @@ -1972,162 +2021,188 @@ static char *typename[] = { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; void -vprint(label, vp) - char *label; - register struct vnode *vp; +vprint(const char *label, struct vnode *vp) { - char buf[64]; + char sbuf[64]; if (label != NULL) printf("%s: ", label); - printf("type %s, usecount %d, writecount %d, refcount %d,", - typename[vp->v_type], vp->v_usecount, vp->v_writecount, - vp->v_holdcnt); - buf[0] = '\0'; + printf("type %s, usecount %d, writecount %d", + typename[vp->v_type], vp->v_usecount, vp->v_writecount); + sbuf[0] = '\0'; if (vp->v_flag & VROOT) - strcat(buf, "|VROOT"); + strcat(sbuf, "|VROOT"); if (vp->v_flag & VTEXT) - strcat(buf, "|VTEXT"); + strcat(sbuf, "|VTEXT"); if (vp->v_flag & VSYSTEM) - strcat(buf, "|VSYSTEM"); + strcat(sbuf, "|VSYSTEM"); if (vp->v_flag & VNOFLUSH) - strcat(buf, "|VNOFLUSH"); - if (vp->v_flag & VXLOCK) - strcat(buf, "|VXLOCK"); - if (vp->v_flag & VXWANT) - strcat(buf, "|VXWANT"); + strcat(sbuf, "|VNOFLUSH"); if (vp->v_flag & VBWAIT) - strcat(buf, "|VBWAIT"); + strcat(sbuf, "|VBWAIT"); if (vp->v_flag & VALIASED) - strcat(buf, "|VALIASED"); - if (buf[0] != '\0') - printf(" flags (%s)", &buf[1]); - if (vp->v_data == NULL) { - printf("\n"); - } else { - printf("\n\t"); - VOP_PRINT(vp); - } + strcat(sbuf, "|VALIASED"); + if (sbuf[0] != '\0') + printf(" flags (%s)", &sbuf[1]); } -#ifdef DEBUG -/* - * List all of the locked vnodes in the system. - * Called when debugging the kernel. - */ -void -printlockedvnodes() -{ - struct proc *p = current_proc(); - struct mount *mp, *nmp; - struct vnode *vp; - printf("Locked vnodes\n"); - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { - if (VOP_ISLOCKED(vp)) - vprint((char *)0, vp); - } - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); +int +vn_getpath(struct vnode *vp, char *pathbuf, int *len) +{ + return build_path(vp, pathbuf, *len, len); } -#endif + + +static char *extension_table=NULL; +static int nexts; +static int max_ext_width; static int -build_path(struct vnode *vp, char *buff, int buflen, int *outlen) -{ - char *end, *str; - int i, len, ret=0, counter=0; - - end = &buff[buflen-1]; - *--end = '\0'; - - while(vp && VPARENT(vp) != vp) { - // the maximum depth of a file system hierarchy is MAXPATHLEN/2 - // (with single-char names separated by slashes). we panic if - // we've ever looped more than that. - if (counter++ > MAXPATHLEN/2) { - panic("build_path: vnode parent chain is too long! vp 0x%x\n", vp); - } - str = VNAME(vp); - if (VNAME(vp) == NULL) { - if (VPARENT(vp) != NULL) { - ret = EINVAL; - } - break; - } - - // count how long the string is - for(len=0; *str; str++, len++) - /* nothing */; +extension_cmp(void *a, void *b) +{ + return (strlen((char *)a) - strlen((char *)b)); +} - // check that there's enough space - if ((end - buff) < len) { - ret = ENOSPC; - break; - } - // copy it backwards - for(; len > 0; len--) { - *--end = *--str; +// +// This is the api LaunchServices uses to inform the kernel +// the list of package extensions to ignore. +// +// Internally we keep the list sorted by the length of the +// the extension (from longest to shortest). We sort the +// list of extensions so that we can speed up our searches +// when comparing file names -- we only compare extensions +// that could possibly fit into the file name, not all of +// them (i.e. a short 8 character name can't have an 8 +// character extension). +// +__private_extern__ int +set_package_extensions_table(void *data, int nentries, int maxwidth) +{ + char *new_exts, *ptr; + int error, i, len; + + if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { + return EINVAL; + } + + MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); + + error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth); + if (error) { + FREE(new_exts, M_TEMP); + return error; + } + + if (extension_table) { + FREE(extension_table, M_TEMP); + } + extension_table = new_exts; + nexts = nentries; + max_ext_width = maxwidth; + + qsort(extension_table, nexts, maxwidth, extension_cmp); + + return 0; +} + + +__private_extern__ int +is_package_name(char *name, int len) +{ + int i, extlen; + char *ptr, *name_ext; + + if (len <= 3) { + return 0; + } + + name_ext = NULL; + for(ptr=name; *ptr != '\0'; ptr++) { + if (*ptr == '.') { + name_ext = ptr; } + } - // put in the path separator - *--end = '/'; + // if there is no "." extension, it can't match + if (name_ext == NULL) { + return 0; + } - // walk up the chain. - vp = VPARENT(vp); + // advance over the "." + name_ext++; - // check if we're crossing a mount point and - // switch the vp if we are. - if (vp && (vp->v_flag & VROOT)) { - vp = vp->v_mount->mnt_vnodecovered; + // now iterate over all the extensions to see if any match + ptr = &extension_table[0]; + for(i=0; i < nexts; i++, ptr+=max_ext_width) { + extlen = strlen(ptr); + if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { + // aha, a match! + return 1; } } - // slide it down to the beginning of the buffer - memmove(buff, end, &buff[buflen] - end); - - *outlen = &buff[buflen] - end; - - return ret; + // if we get here, no extension matched + return 0; } -__private_extern__ int -vn_getpath(struct vnode *vp, char *pathbuf, int *len) +int +vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *component) { - return build_path(vp, pathbuf, *len, len); -} + char *ptr, *end; + int comp=0; + + *component = -1; + if (*path != '/') { + return EINVAL; + } + + end = path + 1; + while(end < path + pathlen && *end != '\0') { + while(end < path + pathlen && *end == '/' && *end != '\0') { + end++; + } + + ptr = end; + + while(end < path + pathlen && *end != '/' && *end != '\0') { + end++; + } + + if (end > path + pathlen) { + // hmm, string wasn't null terminated + return EINVAL; + } + + *end = '\0'; + if (is_package_name(ptr, end - ptr)) { + *component = comp; + break; + } + end++; + comp++; + } + + return 0; +} /* * Top level filesystem related information gathering. */ +extern unsigned int vfs_nummntops; + int -vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) - int *name; - u_int namelen; - void *oldp; - size_t *oldlenp; - void *newp; - size_t newlen; - struct proc *p; +vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, struct proc *p) { - struct vfsconf *vfsp; + struct vfstable *vfsp; int *username; u_int usernamelen; int error; + struct vfsconf *vfsc; /* * The VFS_NUMMNTOPS shouldn't be at name[0] since @@ -2140,7 +2215,6 @@ vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) * name[1]: VFS_NUMMNTOPS */ if (namelen == 1 && name[0] == VFS_NUMMNTOPS) { - extern unsigned int vfs_nummntops; return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops)); } @@ -2148,13 +2222,18 @@ vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) if (namelen < 2) return (EISDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { + struct vfs_context context; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[0]) break; if (vfsp == NULL) - return (EOPNOTSUPP); + return (ENOTSUP); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, - oldp, oldlenp, newp, newlen, p)); + oldp, oldlenp, newp, newlen, &context)); } switch (name[1]) { case VFS_MAXTYPENUM: @@ -2166,9 +2245,27 @@ vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) - return (EOPNOTSUPP); - return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, - sizeof(struct vfsconf))); + return (ENOTSUP); + vfsc = (struct vfsconf *)vfsp; + if (proc_is64bit(p)) { + struct user_vfsconf usr_vfsc; + usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops); + bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name)); + usr_vfsc.vfc_typenum = vfsc->vfc_typenum; + usr_vfsc.vfc_refcount = vfsc->vfc_refcount; + usr_vfsc.vfc_flags = vfsc->vfc_flags; + usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot); + usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next); + return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc, + sizeof(usr_vfsc))); + } + else { + return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc, + sizeof(struct vfsconf))); + } + + case VFS_SET_PACKAGE_EXTS: + return set_package_extensions_table((void *)name[1], name[2], name[3]); } /* * We need to get back into the general MIB, so we need to re-prepend @@ -2179,8 +2276,8 @@ vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) M_TEMP, M_WAITOK); bcopy(name, username + 1, namelen * sizeof(*name)); username[0] = CTL_VFS; - error = userland_sysctl(p, username, usernamelen, oldp, oldlenp, 1, - newp, newlen, oldlenp); + error = userland_sysctl(p, username, usernamelen, oldp, + oldlenp, 1, newp, newlen, oldlenp); FREE(username, M_TEMP); return (error); } @@ -2193,11 +2290,9 @@ int kinfo_vdebug = 1; */ /* ARGSUSED */ int -sysctl_vnode(where, sizep, p) - char *where; - size_t *sizep; - struct proc *p; +sysctl_vnode(__unused user_addr_t where, __unused size_t *sizep) { +#if 0 struct mount *mp, *nmp; struct vnode *nvp, *vp; char *bp = where, *savebp; @@ -2212,55 +2307,46 @@ sysctl_vnode(where, sizep, p) } ewhere = where + *sizep; - simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + if (vfs_busy(mp, LK_NOWAIT)) { nmp = mp->mnt_list.cqe_next; continue; } savebp = bp; again: - simple_lock(&mntvnode_slock); - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = nvp) { + TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { /* * Check that the vp is still associated with * this filesystem. RACE: could have been * recycled onto the same filesystem. */ if (vp->v_mount != mp) { - simple_unlock(&mntvnode_slock); if (kinfo_vdebug) printf("kinfo: vp changed\n"); bp = savebp; goto again; } - nvp = vp->v_mntvnodes.le_next; if (bp + VPTRSZ + VNODESZ > ewhere) { - simple_unlock(&mntvnode_slock); - vfs_unbusy(mp, p); + vfs_unbusy(mp); *sizep = bp - where; return (ENOMEM); } - simple_unlock(&mntvnode_slock); if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) { - vfs_unbusy(mp, p); + vfs_unbusy(mp); return (error); } bp += VPTRSZ + VNODESZ; - simple_lock(&mntvnode_slock); } - simple_unlock(&mntvnode_slock); - simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); + vfs_unbusy(mp); } - simple_unlock(&mountlist_slock); *sizep = bp - where; return (0); +#else + return(EINVAL); +#endif } /* @@ -2273,10 +2359,12 @@ vfs_mountedon(vp) struct vnode *vq; int error = 0; - if (vp->v_specflags & SI_MOUNTEDON) - return (EBUSY); + SPECHASH_LOCK(); + if (vp->v_specflags & SI_MOUNTEDON) { + error = EBUSY; + goto out; + } if (vp->v_flag & VALIASED) { - simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -2286,8 +2374,9 @@ vfs_mountedon(vp) break; } } - simple_unlock(&spechash_slock); } +out: + SPECHASH_UNLOCK(); return (error); } @@ -2298,635 +2387,89 @@ vfs_mountedon(vp) __private_extern__ void vfs_unmountall() { - struct mount *mp, *nmp; + struct mount *mp; struct proc *p = current_proc(); + int error; /* * Since this only runs when rebooting, it is not interlocked. */ - for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_prev; - (void) dounmount(mp, MNT_FORCE, p); - } -} - -/* - * Build hash lists of net addresses and hang them off the mount point. - * Called by vfs_export() to set up the lists of export addresses. - */ -static int -vfs_hang_addrlist(mp, nep, argp) - struct mount *mp; - struct netexport *nep; - struct export_args *argp; -{ - register struct netcred *np; - register struct radix_node_head *rnh; - register int i; - struct radix_node *rn; - struct sockaddr *saddr, *smask = 0; - struct domain *dom; - int error; - - if (argp->ex_addrlen == 0) { - if (mp->mnt_flag & MNT_DEFEXPORTED) - return (EPERM); - np = &nep->ne_defexported; - np->netc_exflags = argp->ex_flags; - np->netc_anon = argp->ex_anon; - np->netc_anon.cr_ref = 1; - mp->mnt_flag |= MNT_DEFEXPORTED; - return (0); - } - i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; - MALLOC(np, struct netcred *, i, M_NETADDR, M_WAITOK); - bzero((caddr_t)np, i); - saddr = (struct sockaddr *)(np + 1); - if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) - goto out; - if (saddr->sa_len > argp->ex_addrlen) - saddr->sa_len = argp->ex_addrlen; - if (argp->ex_masklen) { - smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); - error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); - if (error) - goto out; - if (smask->sa_len > argp->ex_masklen) - smask->sa_len = argp->ex_masklen; - } - i = saddr->sa_family; - if ((rnh = nep->ne_rtable[i]) == 0) { - /* - * Seems silly to initialize every AF when most are not - * used, do so on demand here - */ - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_family == i && dom->dom_rtattach) { - dom->dom_rtattach((void **)&nep->ne_rtable[i], - dom->dom_rtoffset); - break; - } - if ((rnh = nep->ne_rtable[i]) == 0) { - error = ENOBUFS; - goto out; + mount_list_lock(); + while(!TAILQ_EMPTY(&mountlist)) { + mp = TAILQ_LAST(&mountlist, mntlist); + mount_list_unlock(); + error = dounmount(mp, MNT_FORCE, p); + if (error) { + mount_list_lock(); + TAILQ_REMOVE(&mountlist, mp, mnt_list); + printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + continue; } + mount_list_lock(); } - rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, - np->netc_rnodes); - if (rn == 0) { - /* - * One of the reasons that rnh_addaddr may fail is that - * the entry already exists. To check for this case, we - * look up the entry to see if it is there. If so, we - * do not need to make a new entry but do return success. - */ - _FREE(np, M_NETADDR); - rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); - if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && - ((struct netcred *)rn)->netc_exflags == argp->ex_flags && - !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, - (caddr_t)&argp->ex_anon, sizeof(struct ucred))) - return (0); - return (EPERM); - } - np->netc_exflags = argp->ex_flags; - np->netc_anon = argp->ex_anon; - np->netc_anon.cr_ref = 1; - return (0); -out: - _FREE(np, M_NETADDR); - return (error); + mount_list_unlock(); } -/* ARGSUSED */ -static int -vfs_free_netcred(rn, w) - struct radix_node *rn; - caddr_t w; -{ - register struct radix_node_head *rnh = (struct radix_node_head *)w; - - (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); - _FREE((caddr_t)rn, M_NETADDR); - return (0); -} -/* - * Free the net address hash lists that are hanging off the mount points. +/* + * This routine is called from vnode_pager_no_senders() + * which in turn can be called with vnode locked by vnode_uncache() + * But it could also get called as a result of vm_object_cache_trim(). + * In that case lock state is unknown. + * AGE the vnode so that it gets recycled quickly. */ -static void -vfs_free_addrlist(nep) - struct netexport *nep; +__private_extern__ void +vnode_pager_vrele(struct vnode *vp) { - register int i; - register struct radix_node_head *rnh; + vnode_lock(vp); - for (i = 0; i <= AF_MAX; i++) - if (rnh = nep->ne_rtable[i]) { - (*rnh->rnh_walktree)(rnh, vfs_free_netcred, - (caddr_t)rnh); - _FREE((caddr_t)rnh, M_RTABLE); - nep->ne_rtable[i] = 0; - } -} + if (!ISSET(vp->v_lflag, VL_TERMINATE)) + panic("vnode_pager_vrele: vp not in termination"); + vp->v_lflag &= ~VNAMED_UBC; -int -vfs_export(mp, nep, argp) - struct mount *mp; - struct netexport *nep; - struct export_args *argp; -{ - int error; + if (UBCINFOEXISTS(vp)) { + struct ubc_info *uip = vp->v_ubcinfo; - if (argp->ex_flags & MNT_DELEXPORT) { - vfs_free_addrlist(nep); - mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); - } - if (argp->ex_flags & MNT_EXPORTED) { - if (error = vfs_hang_addrlist(mp, nep, argp)) - return (error); - mp->mnt_flag |= MNT_EXPORTED; + if (ISSET(uip->ui_flags, UI_WASMAPPED)) + SET(vp->v_flag, VWASMAPPED); + vp->v_ubcinfo = UBC_INFO_NULL; + + ubc_info_deallocate(uip); + } else { + panic("NO ubcinfo in vnode_pager_vrele"); } - return (0); + vnode_unlock(vp); + + wakeup(&vp->v_lflag); } -struct netcred * -vfs_export_lookup(mp, nep, nam) - register struct mount *mp; - struct netexport *nep; - struct mbuf *nam; + +#include + +errno_t +vfs_init_io_attributes(vnode_t devvp, mount_t mp) { - register struct netcred *np; - register struct radix_node_head *rnh; - struct sockaddr *saddr; + int error; + off_t readblockcnt; + off_t writeblockcnt; + off_t readmaxcnt; + off_t writemaxcnt; + off_t readsegcnt; + off_t writesegcnt; + off_t readsegsize; + off_t writesegsize; + u_long blksize; + u_int64_t temp; + struct vfs_context context; - np = NULL; - if (mp->mnt_flag & MNT_EXPORTED) { - /* - * Lookup in the export list first. - */ - if (nam != NULL) { - saddr = mtod(nam, struct sockaddr *); - rnh = nep->ne_rtable[saddr->sa_family]; - if (rnh != NULL) { - np = (struct netcred *) - (*rnh->rnh_matchaddr)((caddr_t)saddr, - rnh); - if (np && np->netc_rnodes->rn_flags & RNF_ROOT) - np = NULL; - } - } - /* - * If no address match, use the default if it exists. - */ - if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) - np = &nep->ne_defexported; - } - return (np); -} - -/* - * try to reclaim vnodes from the memory - * object cache - */ -static int -vm_object_cache_reclaim(int count) -{ - int cnt; - void vnode_pager_release_from_cache(int *); - - /* attempt to reclaim vnodes from VM object cache */ - cnt = count; - vnode_pager_release_from_cache(&cnt); - return(cnt); -} - -/* - * Release memory object reference held by inactive vnodes - * and then try to reclaim some vnodes from the memory - * object cache - */ -static int -vnreclaim(int count) -{ - int i, loopcnt; - struct vnode *vp; - int err; - struct proc *p; - - i = 0; - loopcnt = 0; - - /* Try to release "count" vnodes from the inactive list */ -restart: - if (++loopcnt > inactivevnodes) { - /* - * I did my best trying to reclaim the vnodes. - * Do not try any more as that would only lead to - * long latencies. Also in the worst case - * this can get totally CPU bound. - * Just fall though and attempt a reclaim of VM - * object cache - */ - goto out; - } - - simple_lock(&vnode_free_list_slock); - for (vp = TAILQ_FIRST(&vnode_inactive_list); - (vp != NULLVP) && (i < count); - vp = TAILQ_NEXT(vp, v_freelist)) { - - if (!simple_lock_try(&vp->v_interlock)) - continue; - - if (vp->v_usecount != 1) - panic("vnreclaim: v_usecount"); - - if(!UBCINFOEXISTS(vp)) { - if (vp->v_type == VBAD) { - VREMINACTIVE("vnreclaim", vp); - simple_unlock(&vp->v_interlock); - continue; - } else - panic("non UBC vnode on inactive list"); - /* Should not reach here */ - } - - /* If vnode is already being reclaimed, wait */ - if ((vp->v_flag & VXLOCK) || (vp->v_flag & VORECLAIM)) { - vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); - simple_unlock(&vnode_free_list_slock); - (void)tsleep((caddr_t)vp, PINOD, "vocr", 0); - goto restart; - } - - /* - * if the vnode is being initialized, - * skip over it - */ - if (ISSET(vp->v_flag, VUINIT)) { - SET(vp->v_flag, VUWANT); - simple_unlock(&vp->v_interlock); - continue; - } - - VREMINACTIVE("vnreclaim", vp); - simple_unlock(&vnode_free_list_slock); - - if (ubc_issetflags(vp, UI_WASMAPPED)) { - /* - * We should not reclaim as it is likely - * to be in use. Let it die a natural death. - * Release the UBC reference if one exists - * and put it back at the tail. - */ - simple_unlock(&vp->v_interlock); - if (ubc_release_named(vp)) { - if (UBCINFOEXISTS(vp)) { - simple_lock(&vp->v_interlock); - if (vp->v_usecount == 1 && !VONLIST(vp)) - vinactive(vp); - simple_unlock(&vp->v_interlock); - } - } else { - simple_lock(&vp->v_interlock); - vinactive(vp); - simple_unlock(&vp->v_interlock); - } - } else { - int didhold; - - VORECLAIM_ENABLE(vp); - - /* - * scrub the dirty pages and invalidate the buffers - */ - p = current_proc(); - err = vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p); - if (err) { - /* cannot reclaim */ - simple_lock(&vp->v_interlock); - vinactive(vp); - VORECLAIM_DISABLE(vp); - i++; - simple_unlock(&vp->v_interlock); - goto restart; - } - - /* keep the vnode alive so we can kill it */ - simple_lock(&vp->v_interlock); - if(vp->v_usecount != 1) - panic("VOCR: usecount race"); - vp->v_usecount++; - simple_unlock(&vp->v_interlock); - - /* clean up the state in VM without invalidating */ - didhold = ubc_hold(vp); - if (didhold) - (void)ubc_clean(vp, 0); - - /* flush and invalidate buffers associated with the vnode */ - if (vp->v_tag == VT_NFS) - nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0); - else - vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); - - /* - * Note: for the v_usecount == 2 case, VOP_INACTIVE - * has not yet been called. Call it now while vp is - * still locked, it will also release the lock. - */ - if (vp->v_usecount == 2) - VOP_INACTIVE(vp, p); - else - VOP_UNLOCK(vp, 0, p); - - if (didhold) - ubc_rele(vp); - - /* - * destroy the ubc named reference. - * If we can't because it is held for I/Os - * in progress, just put it back on the inactive - * list and move on. Otherwise, the paging reference - * is toast (and so is this vnode?). - */ - if (ubc_destroy_named(vp)) { - i++; - } - simple_lock(&vp->v_interlock); - VORECLAIM_DISABLE(vp); - simple_unlock(&vp->v_interlock); - vrele(vp); /* release extra use we added here */ - } - /* inactive list lock was released, must restart */ - goto restart; - } - simple_unlock(&vnode_free_list_slock); - - vnode_reclaim_tried += i; -out: - i = vm_object_cache_reclaim(count); - vnode_objects_reclaimed += i; - - return(i); -} - -/* - * This routine is called from vnode_pager_no_senders() - * which in turn can be called with vnode locked by vnode_uncache() - * But it could also get called as a result of vm_object_cache_trim(). - * In that case lock state is unknown. - * AGE the vnode so that it gets recycled quickly. - * Check lock status to decide whether to call vput() or vrele(). - */ -__private_extern__ void -vnode_pager_vrele(struct vnode *vp) -{ - - boolean_t funnel_state; - int isvnreclaim = 1; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - /* Mark the vnode to be recycled */ - vagevp(vp); - - simple_lock(&vp->v_interlock); - /* - * If a vgone (or vclean) is already in progress, - * Do not bother with the ubc_info cleanup. - * Let the vclean deal with it. - */ - if (vp->v_flag & VXLOCK) { - CLR(vp->v_flag, VTERMINATE); - if (ISSET(vp->v_flag, VTERMWANT)) { - CLR(vp->v_flag, VTERMWANT); - wakeup((caddr_t)&vp->v_ubcinfo); - } - simple_unlock(&vp->v_interlock); - vrele(vp); - (void) thread_funnel_set(kernel_flock, funnel_state); - return; - } - - /* It's dead, Jim! */ - if (!ISSET(vp->v_flag, VORECLAIM)) { - /* - * called as a result of eviction of the memory - * object from the memory object cache - */ - isvnreclaim = 0; - - /* So serialize vnode operations */ - VORECLAIM_ENABLE(vp); - } - if (!ISSET(vp->v_flag, VTERMINATE)) - SET(vp->v_flag, VTERMINATE); - - cache_purge(vp); - - if (UBCINFOEXISTS(vp)) { - struct ubc_info *uip = vp->v_ubcinfo; - - if (ubc_issetflags(vp, UI_WASMAPPED)) - SET(vp->v_flag, VWASMAPPED); - - vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */ - simple_unlock(&vp->v_interlock); - ubc_info_deallocate(uip); - } else { - if ((vp->v_type == VBAD) && ((vp)->v_ubcinfo != UBC_INFO_NULL) - && ((vp)->v_ubcinfo != UBC_NOINFO)) { - struct ubc_info *uip = vp->v_ubcinfo; - - vp->v_ubcinfo = UBC_NOINFO; /* catch bad accesses */ - simple_unlock(&vp->v_interlock); - ubc_info_deallocate(uip); - } else { - simple_unlock(&vp->v_interlock); - } - } - - CLR(vp->v_flag, VTERMINATE); - - if (vp->v_type != VBAD){ - vgone(vp); /* revoke the vnode */ - vrele(vp); /* and drop the reference */ - } else - vrele(vp); - - if (ISSET(vp->v_flag, VTERMWANT)) { - CLR(vp->v_flag, VTERMWANT); - wakeup((caddr_t)&vp->v_ubcinfo); - } - if (!isvnreclaim) - VORECLAIM_DISABLE(vp); - (void) thread_funnel_set(kernel_flock, funnel_state); - return; -} - - -#if DIAGNOSTIC -int walk_vnodes_debug=0; - -void -walk_allvnodes() -{ - struct mount *mp, *nmp; - struct vnode *vp; - int cnt = 0; - - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - for (vp = mp->mnt_vnodelist.lh_first; - vp != NULL; - vp = vp->v_mntvnodes.le_next) { - if (vp->v_usecount < 0){ - if(walk_vnodes_debug) { - printf("vp is %x\n",vp); - } - } - } - nmp = mp->mnt_list.cqe_next; - } - for (cnt = 0, vp = vnode_free_list.tqh_first; - vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) { - if ((vp->v_usecount < 0) && walk_vnodes_debug) { - if(walk_vnodes_debug) { - printf("vp is %x\n",vp); - } - } - } - printf("%d - free\n", cnt); - - for (cnt = 0, vp = vnode_inactive_list.tqh_first; - vp != NULLVP; cnt++, vp = vp->v_freelist.tqe_next) { - if ((vp->v_usecount < 0) && walk_vnodes_debug) { - if(walk_vnodes_debug) { - printf("vp is %x\n",vp); - } - } - } - printf("%d - inactive\n", cnt); -} -#endif /* DIAGNOSTIC */ - - -struct x_constraints { - u_int32_t x_maxreadcnt; - u_int32_t x_maxsegreadsize; - u_int32_t x_maxsegwritesize; -}; - - -void -vfs_io_attributes(vp, flags, iosize, vectors) - struct vnode *vp; - int flags; /* B_READ or B_WRITE */ - int *iosize; - int *vectors; -{ - struct mount *mp; - - /* start with "reasonable" defaults */ - *iosize = MAXPHYS; - *vectors = 32; - - mp = vp->v_mount; - if (mp != NULL) { - switch (flags) { - case B_READ: - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - *iosize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt; - else - *iosize = mp->mnt_maxreadcnt; - *vectors = mp->mnt_segreadcnt; - break; - case B_WRITE: - *iosize = mp->mnt_maxwritecnt; - *vectors = mp->mnt_segwritecnt; - break; - default: - break; - } - if (*iosize == 0) - *iosize = MAXPHYS; - if (*vectors == 0) - *vectors = 32; - } - return; -} + proc_t p = current_proc(); -__private_extern__ -void -vfs_io_maxsegsize(vp, flags, maxsegsize) - struct vnode *vp; - int flags; /* B_READ or B_WRITE */ - int *maxsegsize; -{ - struct mount *mp; - - /* start with "reasonable" default */ - *maxsegsize = MAXPHYS; - - mp = vp->v_mount; - if (mp != NULL) { - switch (flags) { - case B_READ: - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize; - else - /* - * if the extended info doesn't exist - * then use the maxread I/O size as the - * max segment size... this is the previous behavior - */ - *maxsegsize = mp->mnt_maxreadcnt; - break; - case B_WRITE: - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - *maxsegsize = ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize; - else - /* - * if the extended info doesn't exist - * then use the maxwrite I/O size as the - * max segment size... this is the previous behavior - */ - *maxsegsize = mp->mnt_maxwritecnt; - break; - default: - break; - } - if (*maxsegsize == 0) - *maxsegsize = MAXPHYS; - } -} - - -#include - - -int -vfs_init_io_attributes(devvp, mp) - struct vnode *devvp; - struct mount *mp; -{ - int error; - off_t readblockcnt; - off_t writeblockcnt; - off_t readmaxcnt; - off_t writemaxcnt; - off_t readsegcnt; - off_t writesegcnt; - off_t readsegsize; - off_t writesegsize; - u_long blksize; - - u_int64_t temp; - - struct proc *p = current_proc(); - struct ucred *cred = p->p_ucred; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); int isvirtual = 0; /* @@ -2935,67 +2478,70 @@ vfs_init_io_attributes(devvp, mp) */ int thisunit = -1; static int rootunit = -1; - extern struct vnode *rootvp; if (rootunit == -1) { - if (VOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, cred, p)) + if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, &context)) rootunit = -1; else if (rootvp == devvp) mp->mnt_kern_flag |= MNTK_ROOTDEV; } if (devvp != rootvp && rootunit != -1) { - if (VOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, cred, p) == 0) { + if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, &context) == 0) { if (thisunit == rootunit) mp->mnt_kern_flag |= MNTK_ROOTDEV; } } - if (VOP_IOCTL(devvp, DKIOCGETISVIRTUAL, (caddr_t)&isvirtual, 0, cred, p) == 0) { + /* + * force the spec device to re-cache + * the underlying block size in case + * the filesystem overrode the initial value + */ + set_fsblocksize(devvp); + + + if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, + (caddr_t)&blksize, 0, &context))) + return (error); + + mp->mnt_devblocksize = blksize; + + if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, &context) == 0) { if (isvirtual) mp->mnt_kern_flag |= MNTK_VIRTUALDEV; } - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, - (caddr_t)&readblockcnt, 0, cred, p))) - return (error); - - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, - (caddr_t)&writeblockcnt, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, + (caddr_t)&readblockcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, - (caddr_t)&readmaxcnt, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, + (caddr_t)&writeblockcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, - (caddr_t)&writemaxcnt, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, + (caddr_t)&readmaxcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, - (caddr_t)&readsegcnt, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, + (caddr_t)&writemaxcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, - (caddr_t)&writesegcnt, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, + (caddr_t)&readsegcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD, - (caddr_t)&readsegsize, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, + (caddr_t)&writesegcnt, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, - (caddr_t)&writesegsize, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD, + (caddr_t)&readsegsize, 0, &context))) return (error); - if ((error = VOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, - (caddr_t)&blksize, 0, cred, p))) + if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, + (caddr_t)&writesegsize, 0, &context))) return (error); - - if ( !(mp->mnt_kern_flag & MNTK_IO_XINFO)) { - MALLOC(mp->mnt_xinfo_ptr, void *, sizeof(struct x_constraints), M_TEMP, M_WAITOK); - mp->mnt_kern_flag |= MNTK_IO_XINFO; - } - if (readmaxcnt) temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; else { @@ -3005,7 +2551,7 @@ vfs_init_io_attributes(devvp, mp) } else temp = MAXPHYS; } - ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxreadcnt = (u_int32_t)temp; + mp->mnt_maxreadcnt = (u_int32_t)temp; if (writemaxcnt) temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; @@ -3030,13 +2576,13 @@ vfs_init_io_attributes(devvp, mp) temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize; else temp = mp->mnt_maxreadcnt; - ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegreadsize = (u_int32_t)temp; + mp->mnt_maxsegreadsize = (u_int32_t)temp; if (writesegsize) temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize; else temp = mp->mnt_maxwritecnt; - ((struct x_constraints *)(mp->mnt_xinfo_ptr))->x_maxsegwritesize = (u_int32_t)temp; + mp->mnt_maxsegwritesize = (u_int32_t)temp; return (error); } @@ -3051,7 +2597,7 @@ vfs_event_init(void) } void -vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data) +vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data) { KNOTE(&fs_klist, event); @@ -3063,14 +2609,41 @@ vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data) static int sysctl_vfs_getvfscnt(void) { - struct mount *mp; - int ret = 0; + return(mount_getvfscnt()); +} + - simple_lock(&mountlist_slock); - CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) - ret++; - simple_unlock(&mountlist_slock); +static int +mount_getvfscnt(void) +{ + int ret; + + mount_list_lock(); + ret = nummounts; + mount_list_unlock(); return (ret); + +} + + + +static int +mount_fillfsids(fsid_t *fsidlst, int count) +{ + struct mount *mp; + int actual=0; + + actual = 0; + mount_list_lock(); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (actual <= count) { + fsidlst[actual] = mp->mnt_vfsstat.f_fsid; + actual++; + } + } + mount_list_unlock(); + return (actual); + } /* @@ -3086,13 +2659,13 @@ sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual) struct mount *mp; *actual = 0; - simple_lock(&mountlist_slock); - CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { + mount_list_lock(); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { (*actual)++; if (*actual <= count) - fsidlst[(*actual) - 1] = mp->mnt_stat.f_fsid; + fsidlst[(*actual) - 1] = mp->mnt_vfsstat.f_fsid; } - simple_unlock(&mountlist_slock); + mount_list_unlock(); return (*actual <= count ? 0 : ENOMEM); } @@ -3104,11 +2677,11 @@ sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS fsid_t *fsidlst; /* This is a readonly node. */ - if (req->newptr != NULL) + if (req->newptr != USER_ADDR_NULL) return (EPERM); /* they are querying us so just return the space required. */ - if (req->oldptr == NULL) { + if (req->oldptr == USER_ADDR_NULL) { req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t); return 0; } @@ -3150,22 +2723,38 @@ static int sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS { struct vfsidctl vc; + struct user_vfsidctl user_vc; struct mount *mp; - struct statfs *sp; + struct vfsstatfs *sp; struct proc *p; int *name; int error, flags, namelen; + struct vfs_context context; + boolean_t is_64_bit; name = arg1; namelen = arg2; p = req->p; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + is_64_bit = proc_is64bit(p); - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) - return (error); - if (vc.vc_vers != VFS_CTL_VERS1) - return (EINVAL); - mp = vfs_getvfs(&vc.vc_fsid); + if (is_64_bit) { + error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); + if (error) + return (error); + if (user_vc.vc_vers != VFS_CTL_VERS1) + return (EINVAL); + mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 0); + } + else { + error = SYSCTL_IN(req, &vc, sizeof(vc)); + if (error) + return (error); + if (vc.vc_vers != VFS_CTL_VERS1) + return (EINVAL); + mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 0); + } if (mp == NULL) return (ENOENT); /* reset so that the fs specific code can fetch it. */ @@ -3176,33 +2765,137 @@ sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS * SYSCTL_IN/OUT routines. */ if (mp->mnt_op->vfs_sysctl != NULL) { - error = mp->mnt_op->vfs_sysctl(name, namelen, - req, NULL, NULL, 0, req->p); - if (error != EOPNOTSUPP) + if (is_64_bit) { + if (vfs_64bitready(mp)) { + error = mp->mnt_op->vfs_sysctl(name, namelen, + CAST_USER_ADDR_T(req), + NULL, USER_ADDR_NULL, 0, + &context); + } + else { + error = ENOTSUP; + } + } + else { + error = mp->mnt_op->vfs_sysctl(name, namelen, + CAST_USER_ADDR_T(req), + NULL, USER_ADDR_NULL, 0, + &context); + } + if (error != ENOTSUP) return (error); } switch (name[0]) { case VFS_CTL_UMOUNT: - VCTLTOREQ(&vc, req); + req->newidx = 0; + if (is_64_bit) { + req->newptr = user_vc.vc_ptr; + req->newlen = (size_t)user_vc.vc_len; + } + else { + req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); + req->newlen = vc.vc_len; + } error = SYSCTL_IN(req, &flags, sizeof(flags)); if (error) break; error = safedounmount(mp, flags, p); break; case VFS_CTL_STATFS: - VCTLTOREQ(&vc, req); + req->newidx = 0; + if (is_64_bit) { + req->newptr = user_vc.vc_ptr; + req->newlen = (size_t)user_vc.vc_len; + } + else { + req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); + req->newlen = vc.vc_len; + } error = SYSCTL_IN(req, &flags, sizeof(flags)); if (error) break; - sp = &mp->mnt_stat; + sp = &mp->mnt_vfsstat; if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) + (error = vfs_update_vfsstat(mp, &context))) return (error); - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - error = SYSCTL_OUT(req, sp, sizeof(*sp)); + if (is_64_bit) { + struct user_statfs sfs; + bzero(&sfs, sizeof(sfs)); + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_bsize = (user_long_t)sp->f_bsize; + sfs.f_iosize = (user_long_t)sp->f_iosize; + sfs.f_blocks = (user_long_t)sp->f_blocks; + sfs.f_bfree = (user_long_t)sp->f_bfree; + sfs.f_bavail = (user_long_t)sp->f_bavail; + sfs.f_files = (user_long_t)sp->f_files; + sfs.f_ffree = (user_long_t)sp->f_ffree; + sfs.f_fsid = sp->f_fsid; + sfs.f_owner = sp->f_owner; + + strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); + strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); + strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); + + error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); + } + else { + struct statfs sfs; + bzero(&sfs, sizeof(struct statfs)); + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + + /* + * It's possible for there to be more than 2^^31 blocks in the filesystem, so we + * have to fudge the numbers here in that case. We inflate the blocksize in order + * to reflect the filesystem size as best we can. + */ + if (sp->f_blocks > LONG_MAX) { + int shift; + + /* + * Work out how far we have to shift the block count down to make it fit. + * Note that it's possible to have to shift so far that the resulting + * blocksize would be unreportably large. At that point, we will clip + * any values that don't fit. + * + * For safety's sake, we also ensure that f_iosize is never reported as + * being smaller than f_bsize. + */ + for (shift = 0; shift < 32; shift++) { + if ((sp->f_blocks >> shift) <= LONG_MAX) + break; + if ((sp->f_bsize << (shift + 1)) > LONG_MAX) + break; + } +#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) + sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift); + sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift); + sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift); +#undef __SHIFT_OR_CLIP + sfs.f_bsize = (long)(sp->f_bsize << shift); + sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize); + } else { + sfs.f_bsize = (long)sp->f_bsize; + sfs.f_iosize = (long)sp->f_iosize; + sfs.f_blocks = (long)sp->f_blocks; + sfs.f_bfree = (long)sp->f_bfree; + sfs.f_bavail = (long)sp->f_bavail; + } + sfs.f_files = (long)sp->f_files; + sfs.f_ffree = (long)sp->f_ffree; + sfs.f_fsid = sp->f_fsid; + sfs.f_owner = sp->f_owner; + + strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1); + strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1); + strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1); + + error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); + } break; default: - return (EOPNOTSUPP); + return (ENOTSUP); } return (error); } @@ -3247,7 +2940,7 @@ sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS struct proc *p; /* We need a pid. */ - if (req->newptr == NULL) + if (req->newptr == USER_ADDR_NULL) return (EINVAL); error = SYSCTL_IN(req, &pid, sizeof(pid)); @@ -3262,12 +2955,13 @@ sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS * Fetching the value is ok, but we only fetch if the old * pointer is given. */ - if (req->oldptr != NULL) { + if (req->oldptr != USER_ADDR_NULL) { out = !((p->p_flag & P_NOREMOTEHANG) == 0); error = SYSCTL_OUT(req, &out, sizeof(out)); return (error); } + /* XXX req->p->p_ucred -> kauth_cred_get() ??? */ /* cansignal offers us enough security. */ if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0) return (EPERM); @@ -3289,4 +2983,2811 @@ SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW, sysctl_vfs_ctlbyfsid, "ctlbyfsid"); SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW, 0, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); + + +int num_reusedvnodes=0; +static int +new_vnode(vnode_t *vpp) +{ + vnode_t vp; + int retries = 0; /* retry incase of tablefull */ + int vpid; + struct timespec ts; + +retry: + vnode_list_lock(); + + if ( !TAILQ_EMPTY(&vnode_free_list)) { + /* + * Pick the first vp for possible reuse + */ + vp = TAILQ_FIRST(&vnode_free_list); + + if (vp->v_lflag & VL_DEAD) + goto steal_this_vp; + } else + vp = NULL; + + /* + * we're either empty, or the next guy on the + * list is a valid vnode... if we're under the + * limit, we'll create a new vnode + */ + if (numvnodes < desiredvnodes) { + numvnodes++; + vnode_list_unlock(); + MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK); + bzero((char *)vp, sizeof *vp); + VLISTNONE(vp); /* avoid double queue removal */ + lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr); + + nanouptime(&ts); + vp->v_id = ts.tv_nsec; + vp->v_flag = VSTANDARD; + + goto done; + } + if (vp == NULL) { + /* + * we've reached the system imposed maximum number of vnodes + * but there isn't a single one available + * wait a bit and then retry... if we can't get a vnode + * after 100 retries, than log a complaint + */ + if (++retries <= 100) { + vnode_list_unlock(); + IOSleep(1); + goto retry; + } + + vnode_list_unlock(); + tablefull("vnode"); + log(LOG_EMERG, "%d desired, %d numvnodes, " + "%d free, %d inactive\n", + desiredvnodes, numvnodes, freevnodes, inactivevnodes); + *vpp = 0; + return (ENFILE); + } +steal_this_vp: + vpid = vp->v_id; + + VREMFREE("new_vnode", vp); + VLISTNONE(vp); + + vnode_list_unlock(); + vnode_lock(vp); + + /* + * We could wait for the vnode_lock after removing the vp from the freelist + * and the vid is bumped only at the very end of reclaim. So it is possible + * that we are looking at a vnode that is being terminated. If so skip it. + */ + if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || + VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) { + /* + * we lost the race between dropping the list lock + * and picking up the vnode_lock... someone else + * used this vnode and it is now in a new state + * so we need to go back and try again + */ + vnode_unlock(vp); + goto retry; + } + if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) { + /* + * we did a vnode_rele_ext that asked for + * us not to reenter the filesystem during + * the release even though VL_NEEDINACTIVE was + * set... we'll do it here by doing a + * vnode_get/vnode_put + * + * pick up an iocount so that we can call + * vnode_put and drive the VNOP_INACTIVE... + * vnode_put will either leave us off + * the freelist if a new ref comes in, + * or put us back on the end of the freelist + * or recycle us if we were marked for termination... + * so we'll just go grab a new candidate + */ + vp->v_iocount++; +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vnode_put_locked(vp); + vnode_unlock(vp); + goto retry; + } + OSAddAtomic(1, &num_reusedvnodes); + + /* Checks for anyone racing us for recycle */ + if (vp->v_type != VBAD) { + if (vp->v_lflag & VL_DEAD) + panic("new_vnode: the vnode is VL_DEAD but not VBAD"); + + (void)vnode_reclaim_internal(vp, 1, 1); + + if ((VONLIST(vp))) + panic("new_vnode: vp on list "); + if (vp->v_usecount || vp->v_iocount || vp->v_kusecount || + (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH))) + panic("new_vnode: free vnode still referenced\n"); + if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0)) + panic("new_vnode: vnode seems to be on mount list "); + if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren)) + panic("new_vnode: vnode still hooked into the name cache"); + } + if (vp->v_unsafefs) { + lck_mtx_destroy(&vp->v_unsafefs->fsnodelock, vnode_lck_grp); + FREE_ZONE((void *)vp->v_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); + vp->v_unsafefs = (struct unsafe_fsnode *)NULL; + } + vp->v_lflag = 0; + vp->v_writecount = 0; + vp->v_references = 0; + vp->v_iterblkflags = 0; + vp->v_flag = VSTANDARD; + /* vbad vnodes can point to dead_mountp */ + vp->v_mount = 0; + vp->v_defer_reclaimlist = (vnode_t)0; + + vnode_unlock(vp); +done: + *vpp = vp; + + return (0); +} + +void +vnode_lock(vnode_t vp) +{ + lck_mtx_lock(&vp->v_lock); +} + +void +vnode_unlock(vnode_t vp) +{ + lck_mtx_unlock(&vp->v_lock); +} + + + +int +vnode_get(struct vnode *vp) +{ + vnode_lock(vp); + + if ( (vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) ) { + vnode_unlock(vp); + return(ENOENT); + } + vp->v_iocount++; +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vnode_unlock(vp); + + return(0); +} + +int +vnode_getwithvid(vnode_t vp, int vid) +{ + return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID))); +} + +int +vnode_getwithref(vnode_t vp) +{ + return(vget_internal(vp, 0, 0)); +} + + +int +vnode_put(vnode_t vp) +{ + int retval; + + vnode_lock(vp); + retval = vnode_put_locked(vp); + vnode_unlock(vp); + + return(retval); +} + +int +vnode_put_locked(vnode_t vp) +{ + struct vfs_context context; + +retry: + if (vp->v_iocount < 1) + panic("vnode_put(%x): iocount < 1", vp); + + if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) { + vnode_dropiocount(vp, 1); + return(0); + } + if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) { + + vp->v_lflag &= ~VL_NEEDINACTIVE; + vnode_unlock(vp); + + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + VNOP_INACTIVE(vp, &context); + + vnode_lock(vp); + /* + * because we had to drop the vnode lock before calling + * VNOP_INACTIVE, the state of this vnode may have changed... + * we may pick up both VL_MARTERM and either + * an iocount or a usecount while in the VNOP_INACTIVE call + * we don't want to call vnode_reclaim_internal on a vnode + * that has active references on it... so loop back around + * and reevaluate the state + */ + goto retry; + } + vp->v_lflag &= ~VL_NEEDINACTIVE; + + if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM) + vnode_reclaim_internal(vp, 1, 0); + + vnode_dropiocount(vp, 1); + vnode_list_add(vp); + + return(0); +} + +/* is vnode_t in use by others? */ +int +vnode_isinuse(vnode_t vp, int refcnt) +{ + return(vnode_isinuse_locked(vp, refcnt, 0)); +} + + +static int +vnode_isinuse_locked(vnode_t vp, int refcnt, int locked) +{ + int retval = 0; + + if (!locked) + vnode_lock(vp); + if ((vp->v_type != VREG) && (vp->v_usecount > refcnt)) { + retval = 1; + goto out; + } + if (vp->v_type == VREG) { + retval = ubc_isinuse_locked(vp, refcnt, 1); + } + +out: + if (!locked) + vnode_unlock(vp); + return(retval); +} + + +/* resume vnode_t */ +errno_t +vnode_resume(vnode_t vp) +{ + + vnode_lock(vp); + + if (vp->v_owner == current_thread()) { + vp->v_lflag &= ~VL_SUSPENDED; + vp->v_owner = 0; + vnode_unlock(vp); + wakeup(&vp->v_iocount); + } else + vnode_unlock(vp); + + return(0); +} + +static errno_t +vnode_drain(vnode_t vp) +{ + + if (vp->v_lflag & VL_DRAIN) { + panic("vnode_drain: recursuve drain"); + return(ENOENT); + } + vp->v_lflag |= VL_DRAIN; + vp->v_owner = current_thread(); + + while (vp->v_iocount > 1) + msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", 0); + return(0); +} + + +/* + * if the number of recent references via vnode_getwithvid or vnode_getwithref + * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from + * the LRU list if it's currently on it... once the iocount and usecount both drop + * to 0, it will get put back on the end of the list, effectively making it younger + * this allows us to keep actively referenced vnodes in the list without having + * to constantly remove and add to the list each time a vnode w/o a usecount is + * referenced which costs us taking and dropping a global lock twice. + */ +#define UNAGE_THRESHHOLD 10 + +errno_t +vnode_getiocount(vnode_t vp, int locked, int vid, int vflags) +{ + int nodead = vflags & VNODE_NODEAD; + int nosusp = vflags & VNODE_NOSUSPEND; + + if (!locked) + vnode_lock(vp); + + for (;;) { + /* + * if it is a dead vnode with deadfs + */ + if (nodead && (vp->v_lflag & VL_DEAD) && ((vp->v_type == VBAD) || (vp->v_data == 0))) { + if (!locked) + vnode_unlock(vp); + return(ENOENT); + } + /* + * will return VL_DEAD ones + */ + if ((vp->v_lflag & (VL_SUSPENDED | VL_DRAIN | VL_TERMINATE)) == 0 ) { + break; + } + /* + * if suspended vnodes are to be failed + */ + if (nosusp && (vp->v_lflag & VL_SUSPENDED)) { + if (!locked) + vnode_unlock(vp); + return(ENOENT); + } + /* + * if you are the owner of drain/suspend/termination , can acquire iocount + * check for VL_TERMINATE; it does not set owner + */ + if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED | VL_TERMINATE)) && + (vp->v_owner == current_thread())) { + break; + } + if (vp->v_lflag & VL_TERMINATE) { + vp->v_lflag |= VL_TERMWANT; + + msleep(&vp->v_lflag, &vp->v_lock, PVFS, "vnode getiocount", 0); + } else + msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", 0); + } + if (vid != vp->v_id) { + if (!locked) + vnode_unlock(vp); + return(ENOENT); + } + if (++vp->v_references >= UNAGE_THRESHHOLD) { + vp->v_references = 0; + vnode_list_remove(vp); + } + vp->v_iocount++; +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + if (!locked) + vnode_unlock(vp); + return(0); +} + +static void +vnode_dropiocount (vnode_t vp, int locked) +{ + if (!locked) + vnode_lock(vp); + if (vp->v_iocount < 1) + panic("vnode_dropiocount(%x): v_iocount < 1", vp); + + vp->v_iocount--; +#ifdef JOE_DEBUG + record_vp(vp, -1); +#endif + if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) + wakeup(&vp->v_iocount); + + if (!locked) + vnode_unlock(vp); +} + + +void +vnode_reclaim(struct vnode * vp) +{ + vnode_reclaim_internal(vp, 0, 0); +} + +__private_extern__ +void +vnode_reclaim_internal(struct vnode * vp, int locked, int reuse) +{ + int isfifo = 0; + + if (!locked) + vnode_lock(vp); + + if (vp->v_lflag & VL_TERMINATE) { + panic("vnode reclaim in progress"); + } + vp->v_lflag |= VL_TERMINATE; + + if (vnode_drain(vp)) { + panic("vnode drain failed"); + vnode_unlock(vp); + return; + } + isfifo = (vp->v_type == VFIFO); + + if (vp->v_type != VBAD) + vgone(vp); /* clean and reclaim the vnode */ + + /* + * give the vnode a new identity so + * that vnode_getwithvid will fail + * on any stale cache accesses + */ + vp->v_id++; + if (isfifo) { + struct fifoinfo * fip; + + fip = vp->v_fifoinfo; + vp->v_fifoinfo = NULL; + FREE(fip, M_TEMP); + } + + vp->v_type = VBAD; + + if (vp->v_data) + panic("vnode_reclaim_internal: cleaned vnode isn't"); + if (vp->v_numoutput) + panic("vnode_reclaim_internal: Clean vnode has pending I/O's"); + if (UBCINFOEXISTS(vp)) + panic("vnode_reclaim_internal: ubcinfo not cleaned"); + if (vp->v_parent) + panic("vnode_reclaim_internal: vparent not removed"); + if (vp->v_name) + panic("vnode_reclaim_internal: vname not removed"); + + vp->v_socket = 0; + + vp->v_lflag &= ~VL_TERMINATE; + vp->v_lflag &= ~VL_DRAIN; + vp->v_owner = 0; + + if (vp->v_lflag & VL_TERMWANT) { + vp->v_lflag &= ~VL_TERMWANT; + wakeup(&vp->v_lflag); + } + if (!reuse && vp->v_usecount == 0) + vnode_list_add(vp); + if (!locked) + vnode_unlock(vp); +} + +/* USAGE: + * The following api creates a vnode and associates all the parameter specified in vnode_fsparam + * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias + * is obsoleted by this. + * vnode_create(int flavor, size_t size, void * param, vnode_t *vp) + */ +int +vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) +{ + int error; + int insert = 1; + vnode_t vp; + vnode_t nvp; + vnode_t dvp; + struct componentname *cnp; + struct vnode_fsparam *param = (struct vnode_fsparam *)data; + + if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) { + if ( (error = new_vnode(&vp)) ) { + return(error); + } else { + dvp = param->vnfs_dvp; + cnp = param->vnfs_cnp; + + vp->v_op = param->vnfs_vops; + vp->v_type = param->vnfs_vtype; + vp->v_data = param->vnfs_fsnode; + vp->v_iocount = 1; + + if (param->vnfs_markroot) + vp->v_flag |= VROOT; + if (param->vnfs_marksystem) + vp->v_flag |= VSYSTEM; + else if (vp->v_type == VREG) { + /* + * only non SYSTEM vp + */ + error = ubc_info_init_withsize(vp, param->vnfs_filesize); + if (error) { +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + vp->v_mount = 0; + vp->v_op = dead_vnodeop_p; + vp->v_tag = VT_NON; + vp->v_data = NULL; + vp->v_type = VBAD; + vp->v_lflag |= VL_DEAD; + + vnode_put(vp); + return(error); + } + } +#ifdef JOE_DEBUG + record_vp(vp, 1); +#endif + if (vp->v_type == VCHR || vp->v_type == VBLK) { + + if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) { + /* + * if checkalias returns a vnode, it will be locked + * + * first get rid of the unneeded vnode we acquired + */ + vp->v_data = NULL; + vp->v_op = spec_vnodeop_p; + vp->v_type = VBAD; + vp->v_lflag = VL_DEAD; + vp->v_data = NULL; + vp->v_tag = VT_NON; + vnode_put(vp); + + /* + * switch to aliased vnode and finish + * preparing it + */ + vp = nvp; + + vclean(vp, 0, current_proc()); + vp->v_op = param->vnfs_vops; + vp->v_type = param->vnfs_vtype; + vp->v_data = param->vnfs_fsnode; + vp->v_lflag = 0; + vp->v_mount = NULL; + insmntque(vp, param->vnfs_mp); + insert = 0; + vnode_unlock(vp); + } + } + + if (vp->v_type == VFIFO) { + struct fifoinfo *fip; + + MALLOC(fip, struct fifoinfo *, + sizeof(*fip), M_TEMP, M_WAITOK); + bzero(fip, sizeof(struct fifoinfo )); + vp->v_fifoinfo = fip; + } + /* The file systems usually pass the address of the location where + * where there store the vnode pointer. When we add the vnode in mount + * point and name cache they are discoverable. So the file system node + * will have the connection to vnode setup by then + */ + *vpp = vp; + + if (param->vnfs_mp) { + if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL) + vp->v_flag |= VLOCKLOCAL; + if (insert) { + /* + * enter in mount vnode list + */ + insmntque(vp, param->vnfs_mp); + } +#ifdef INTERIM_FSNODE_LOCK + if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) { + MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, + sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); + vp->v_unsafefs->fsnode_count = 0; + vp->v_unsafefs->fsnodeowner = (void *)NULL; + lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); + } +#endif /* INTERIM_FSNODE_LOCK */ + } + if (dvp && vnode_ref(dvp) == 0) { + vp->v_parent = dvp; + } + if (cnp) { + if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) { + /* + * enter into name cache + * we've got the info to enter it into the name cache now + */ + cache_enter(dvp, vp, cnp); + } + vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); + } + if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) { + /* + * this vnode is being created as cacheable in the name cache + * this allows us to re-enter it in the cache + */ + vp->v_flag |= VNCACHEABLE; + } + if ((vp->v_flag & VSYSTEM) && (vp->v_type != VREG)) + panic("incorrect vnode setup"); + + return(0); + } + } + return (EINVAL); +} + +int +vnode_addfsref(vnode_t vp) +{ + vnode_lock(vp); + if (vp->v_lflag & VNAMED_FSHASH) + panic("add_fsref: vp already has named reference"); + if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) + panic("addfsref: vp on the free list\n"); + vp->v_lflag |= VNAMED_FSHASH; + vnode_unlock(vp); + return(0); + +} +int +vnode_removefsref(vnode_t vp) +{ + vnode_lock(vp); + if ((vp->v_lflag & VNAMED_FSHASH) == 0) + panic("remove_fsref: no named reference"); + vp->v_lflag &= ~VNAMED_FSHASH; + vnode_unlock(vp); + return(0); + +} + + +int +vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg) +{ + mount_t mp; + int ret = 0; + fsid_t * fsid_list; + int count, actualcount, i; + void * allocmem; + + count = mount_getvfscnt(); + count += 10; + + fsid_list = (fsid_t *)kalloc(count * sizeof(fsid_t)); + allocmem = (void *)fsid_list; + + actualcount = mount_fillfsids(fsid_list, count); + + for (i=0; i< actualcount; i++) { + + /* obtain the mount point with iteration reference */ + mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1); + + if(mp == (struct mount *)0) + continue; + mount_lock(mp); + if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) { + mount_unlock(mp); + mount_iterdrop(mp); + continue; + + } + mount_unlock(mp); + + /* iterate over all the vnodes */ + ret = callout(mp, arg); + + mount_iterdrop(mp); + + switch (ret) { + case VFS_RETURNED: + case VFS_RETURNED_DONE: + if (ret == VFS_RETURNED_DONE) { + ret = 0; + goto out; + } + break; + + case VFS_CLAIMED_DONE: + ret = 0; + goto out; + case VFS_CLAIMED: + default: + break; + } + ret = 0; + } + +out: + kfree(allocmem, (count * sizeof(fsid_t))); + return (ret); +} + +/* + * Update the vfsstatfs structure in the mountpoint. + */ +int +vfs_update_vfsstat(mount_t mp, vfs_context_t ctx) +{ + struct vfs_attr va; + int error; + + /* + * Request the attributes we want to propagate into + * the per-mount vfsstat structure. + */ + VFSATTR_INIT(&va); + VFSATTR_WANTED(&va, f_iosize); + VFSATTR_WANTED(&va, f_blocks); + VFSATTR_WANTED(&va, f_bfree); + VFSATTR_WANTED(&va, f_bavail); + VFSATTR_WANTED(&va, f_bused); + VFSATTR_WANTED(&va, f_files); + VFSATTR_WANTED(&va, f_ffree); + VFSATTR_WANTED(&va, f_bsize); + VFSATTR_WANTED(&va, f_fssubtype); + if ((error = vfs_getattr(mp, &va, ctx)) != 0) { + KAUTH_DEBUG("STAT - filesystem returned error %d", error); + return(error); + } + + /* + * Unpack into the per-mount structure. + * + * We only overwrite these fields, which are likely to change: + * f_blocks + * f_bfree + * f_bavail + * f_bused + * f_files + * f_ffree + * + * And these which are not, but which the FS has no other way + * of providing to us: + * f_bsize + * f_iosize + * f_fssubtype + * + */ + if (VFSATTR_IS_SUPPORTED(&va, f_bsize)) { + mp->mnt_vfsstat.f_bsize = va.f_bsize; + } else { + mp->mnt_vfsstat.f_bsize = mp->mnt_devblocksize; /* default from the device block size */ + } + if (VFSATTR_IS_SUPPORTED(&va, f_iosize)) { + mp->mnt_vfsstat.f_iosize = va.f_iosize; + } else { + mp->mnt_vfsstat.f_iosize = 1024 * 1024; /* 1MB sensible I/O size */ + } + if (VFSATTR_IS_SUPPORTED(&va, f_blocks)) + mp->mnt_vfsstat.f_blocks = va.f_blocks; + if (VFSATTR_IS_SUPPORTED(&va, f_bfree)) + mp->mnt_vfsstat.f_bfree = va.f_bfree; + if (VFSATTR_IS_SUPPORTED(&va, f_bavail)) + mp->mnt_vfsstat.f_bavail = va.f_bavail; + if (VFSATTR_IS_SUPPORTED(&va, f_bused)) + mp->mnt_vfsstat.f_bused = va.f_bused; + if (VFSATTR_IS_SUPPORTED(&va, f_files)) + mp->mnt_vfsstat.f_files = va.f_files; + if (VFSATTR_IS_SUPPORTED(&va, f_ffree)) + mp->mnt_vfsstat.f_ffree = va.f_ffree; + + /* this is unlikely to change, but has to be queried for */ + if (VFSATTR_IS_SUPPORTED(&va, f_fssubtype)) + mp->mnt_vfsstat.f_fssubtype = va.f_fssubtype; + + return(0); +} + +void +mount_list_add(mount_t mp) +{ + mount_list_lock(); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + nummounts++; + mount_list_unlock(); +} + +void +mount_list_remove(mount_t mp) +{ + mount_list_lock(); + TAILQ_REMOVE(&mountlist, mp, mnt_list); + nummounts--; + mp->mnt_list.tqe_next = 0; + mp->mnt_list.tqe_prev = 0; + mount_list_unlock(); +} + +mount_t +mount_lookupby_volfsid(int volfs_id, int withref) +{ + mount_t cur_mount = (mount_t)0; + mount_t mp ; + + mount_list_lock(); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + if (validfsnode(mp) && mp->mnt_vfsstat.f_fsid.val[0] == volfs_id) { + cur_mount = mp; + if (withref) { + if (mount_iterref(cur_mount, 1)) { + cur_mount = (mount_t)0; + mount_list_unlock(); + goto out; + } + } + break; + } + } + mount_list_unlock(); + if (withref && (cur_mount != (mount_t)0)) { + mp = cur_mount; + if (vfs_busy(mp, LK_NOWAIT) != 0) { + cur_mount = (mount_t)0; + } + mount_iterdrop(mp); + } +out: + return(cur_mount); +} + + +mount_t +mount_list_lookupby_fsid(fsid, locked, withref) + fsid_t *fsid; + int locked; + int withref; +{ + mount_t retmp = (mount_t)0; + mount_t mp; + + if (!locked) + mount_list_lock(); + TAILQ_FOREACH(mp, &mountlist, mnt_list) + if (mp->mnt_vfsstat.f_fsid.val[0] == fsid->val[0] && + mp->mnt_vfsstat.f_fsid.val[1] == fsid->val[1]) { + retmp = mp; + if (withref) { + if (mount_iterref(retmp, 1)) + retmp = (mount_t)0; + } + goto out; + } +out: + if (!locked) + mount_list_unlock(); + return (retmp); +} + +errno_t +vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t context) +{ + struct nameidata nd; + int error; + struct vfs_context context2; + vfs_context_t ctx = context; + u_long ndflags = 0; + + if (context == NULL) { /* XXX technically an error */ + context2.vc_proc = current_proc(); + context2.vc_ucred = kauth_cred_get(); + ctx = &context2; + } + + if (flags & VNODE_LOOKUP_NOFOLLOW) + ndflags = NOFOLLOW; + else + ndflags = FOLLOW; + + if (flags & VNODE_LOOKUP_NOCROSSMOUNT) + ndflags |= NOCROSSMOUNT; + if (flags & VNODE_LOOKUP_DOWHITEOUT) + ndflags |= DOWHITEOUT; + + /* XXX AUDITVNPATH1 needed ? */ + NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + + if ((error = namei(&nd))) + return (error); + *vpp = nd.ni_vp; + nameidone(&nd); + + return (0); +} + +errno_t +vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_context_t context) +{ + struct nameidata nd; + int error; + struct vfs_context context2; + vfs_context_t ctx = context; + u_long ndflags = 0; + + if (context == NULL) { /* XXX technically an error */ + context2.vc_proc = current_proc(); + context2.vc_ucred = kauth_cred_get(); + ctx = &context2; + } + + if (flags & VNODE_LOOKUP_NOFOLLOW) + ndflags = NOFOLLOW; + else + ndflags = FOLLOW; + + if (flags & VNODE_LOOKUP_NOCROSSMOUNT) + ndflags |= NOCROSSMOUNT; + if (flags & VNODE_LOOKUP_DOWHITEOUT) + ndflags |= DOWHITEOUT; + + /* XXX AUDITVNPATH1 needed ? */ + NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + + if ((error = vn_open(&nd, fmode, cmode))) + *vpp = NULL; + else + *vpp = nd.ni_vp; + + return (error); +} + +errno_t +vnode_close(vnode_t vp, int flags, vfs_context_t context) +{ + kauth_cred_t cred; + struct proc *p; + int error; + + if (context) { + p = context->vc_proc; + cred = context->vc_ucred; + } else { + p = current_proc(); + cred = kauth_cred_get(); + } + + error = vn_close(vp, flags, cred, p); + vnode_put(vp); + return (error); +} + +errno_t +vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) +{ + struct vnode_attr va; + int error; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + error = vnode_getattr(vp, &va, ctx); + if (!error) + *sizep = va.va_data_size; + return(error); +} + +errno_t +vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, size); + va.va_vaflags = ioflag & 0xffff; + return(vnode_setattr(vp, &va, ctx)); +} + +errno_t +vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx) +{ + kauth_acl_t oacl, nacl; + int initial_acl; + errno_t error; + vnode_t vp = (vnode_t)0; + + error = 0; + oacl = nacl = NULL; + initial_acl = 0; + + KAUTH_DEBUG("%p CREATE - '%s'", dvp, cnp->cn_nameptr); + + /* + * Handle ACL inheritance. + */ + if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) { + /* save the original filesec */ + if (VATTR_IS_ACTIVE(vap, va_acl)) { + initial_acl = 1; + oacl = vap->va_acl; + } + + vap->va_acl = NULL; + if ((error = kauth_acl_inherit(dvp, + oacl, + &nacl, + vap->va_type == VDIR, + ctx)) != 0) { + KAUTH_DEBUG("%p CREATE - error %d processing inheritance", dvp, error); + return(error); + } + + /* + * If the generated ACL is NULL, then we can save ourselves some effort + * by clearing the active bit. + */ + if (nacl == NULL) { + VATTR_CLEAR_ACTIVE(vap, va_acl); + } else { + VATTR_SET(vap, va_acl, nacl); + } + } + + /* + * Check and default new attributes. + * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller + * hasn't supplied them. + */ + if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) { + KAUTH_DEBUG("%p CREATE - error %d handing/defaulting attributes", dvp, error); + goto out; + } + + + /* + * Create the requested node. + */ + switch(vap->va_type) { + case VREG: + error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx); + break; + case VDIR: + error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx); + break; + case VSOCK: + case VFIFO: + case VBLK: + case VCHR: + error = VNOP_MKNOD(dvp, vpp, cnp, vap, ctx); + break; + default: + panic("vnode_create: unknown vtype %d", vap->va_type); + } + if (error != 0) { + KAUTH_DEBUG("%p CREATE - error %d returned by filesystem", dvp, error); + goto out; + } + + vp = *vpp; + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + if (!VATTR_ALL_SUPPORTED(vap) && *vpp) { + KAUTH_DEBUG(" CREATE - doing fallback with ACL %p", vap->va_acl); + error = vnode_setattr_fallback(*vpp, vap, ctx); + } + if ((error != 0 ) && (vp != (vnode_t)0)) { + *vpp = (vnode_t) 0; + vnode_put(vp); + } + +out: + /* + * If the caller supplied a filesec in vap, it has been replaced + * now by the post-inheritance copy. We need to put the original back + * and free the inherited product. + */ + if (initial_acl) { + VATTR_SET(vap, va_acl, oacl); + } else { + VATTR_CLEAR_ACTIVE(vap, va_acl); + } + if (nacl != NULL) + kauth_acl_free(nacl); + + return(error); +} + +static kauth_scope_t vnode_scope; +static int vnode_authorize_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action, + uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3); + +typedef struct _vnode_authorize_context { + vnode_t vp; + struct vnode_attr *vap; + vnode_t dvp; + struct vnode_attr *dvap; + vfs_context_t ctx; + int flags; + int flags_valid; +#define _VAC_IS_OWNER (1<<0) +#define _VAC_IN_GROUP (1<<1) +#define _VAC_IS_DIR_OWNER (1<<2) +#define _VAC_IN_DIR_GROUP (1<<3) +} *vauth_ctx; + +void +vnode_authorize_init(void) +{ + vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL); +} + +/* + * Authorize an operation on a vnode. + * + * This is KPI, but here because it needs vnode_scope. + */ +int +vnode_authorize(vnode_t vp, vnode_t dvp, kauth_action_t action, vfs_context_t context) +{ + int error, result; + + /* + * We can't authorize against a dead vnode; allow all operations through so that + * the correct error can be returned. + */ + if (vp->v_type == VBAD) + return(0); + + error = 0; + result = kauth_authorize_action(vnode_scope, vfs_context_ucred(context), action, + (uintptr_t)context, (uintptr_t)vp, (uintptr_t)dvp, (uintptr_t)&error); + if (result == EPERM) /* traditional behaviour */ + result = EACCES; + /* did the lower layers give a better error return? */ + if ((result != 0) && (error != 0)) + return(error); + return(result); +} + +/* + * Test for vnode immutability. + * + * The 'append' flag is set when the authorization request is constrained + * to operations which only request the right to append to a file. + * + * The 'ignore' flag is set when an operation modifying the immutability flags + * is being authorized. We check the system securelevel to determine which + * immutability flags we can ignore. + */ +static int +vnode_immutable(struct vnode_attr *vap, int append, int ignore) +{ + int mask; + + /* start with all bits precluding the operation */ + mask = IMMUTABLE | APPEND; + + /* if appending only, remove the append-only bits */ + if (append) + mask &= ~APPEND; + + /* ignore only set when authorizing flags changes */ + if (ignore) { + if (securelevel <= 0) { + /* in insecure state, flags do not inhibit changes */ + mask = 0; + } else { + /* in secure state, user flags don't inhibit */ + mask &= ~(UF_IMMUTABLE | UF_APPEND); + } + } + KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap->va_flags, mask, append, ignore); + if ((vap->va_flags & mask) != 0) + return(EPERM); + return(0); +} + +static int +vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred) +{ + int result; + + /* default assumption is not-owner */ + result = 0; + + /* + * If the filesystem has given us a UID, we treat this as authoritative. + */ + if (vap && VATTR_IS_SUPPORTED(vap, va_uid)) { + result = (vap->va_uid == kauth_cred_getuid(cred)) ? 1 : 0; + } + /* we could test the owner UUID here if we had a policy for it */ + + return(result); +} + +static int +vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) +{ + int error; + int result; + + error = 0; + result = 0; + + /* the caller is expected to have asked the filesystem for a group at some point */ + if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) { + error = kauth_cred_ismember_gid(cred, vap->va_gid, &result); + } + /* we could test the group UUID here if we had a policy for it */ + + if (!error) + *ismember = result; + return(error); +} + +static int +vauth_file_owner(vauth_ctx vcp) +{ + int result; + + if (vcp->flags_valid & _VAC_IS_OWNER) { + result = (vcp->flags & _VAC_IS_OWNER) ? 1 : 0; + } else { + result = vauth_node_owner(vcp->vap, vcp->ctx->vc_ucred); + + /* cache our result */ + vcp->flags_valid |= _VAC_IS_OWNER; + if (result) { + vcp->flags |= _VAC_IS_OWNER; + } else { + vcp->flags &= ~_VAC_IS_OWNER; + } + } + return(result); +} + +static int +vauth_file_ingroup(vauth_ctx vcp, int *ismember) +{ + int error; + + if (vcp->flags_valid & _VAC_IN_GROUP) { + *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0; + error = 0; + } else { + error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember); + + if (!error) { + /* cache our result */ + vcp->flags_valid |= _VAC_IN_GROUP; + if (*ismember) { + vcp->flags |= _VAC_IN_GROUP; + } else { + vcp->flags &= ~_VAC_IN_GROUP; + } + } + + } + return(error); +} + +static int +vauth_dir_owner(vauth_ctx vcp) +{ + int result; + + if (vcp->flags_valid & _VAC_IS_DIR_OWNER) { + result = (vcp->flags & _VAC_IS_DIR_OWNER) ? 1 : 0; + } else { + result = vauth_node_owner(vcp->dvap, vcp->ctx->vc_ucred); + + /* cache our result */ + vcp->flags_valid |= _VAC_IS_DIR_OWNER; + if (result) { + vcp->flags |= _VAC_IS_DIR_OWNER; + } else { + vcp->flags &= ~_VAC_IS_DIR_OWNER; + } + } + return(result); +} + +static int +vauth_dir_ingroup(vauth_ctx vcp, int *ismember) +{ + int error; + + if (vcp->flags_valid & _VAC_IN_DIR_GROUP) { + *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0; + error = 0; + } else { + error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember); + + if (!error) { + /* cache our result */ + vcp->flags_valid |= _VAC_IN_DIR_GROUP; + if (*ismember) { + vcp->flags |= _VAC_IN_DIR_GROUP; + } else { + vcp->flags &= ~_VAC_IN_DIR_GROUP; + } + } + } + return(error); +} + +/* + * Test the posix permissions in (vap) to determine whether (credential) + * may perform (action) + */ +static int +vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir) +{ + struct vnode_attr *vap; + int needed, error, owner_ok, group_ok, world_ok, ismember; +#ifdef KAUTH_DEBUG_ENABLE + const char *where; +# define _SETWHERE(c) where = c; +#else +# define _SETWHERE(c) +#endif + + /* checking file or directory? */ + if (on_dir) { + vap = vcp->dvap; + } else { + vap = vcp->vap; + } + + error = 0; + + /* + * We want to do as little work here as possible. So first we check + * which sets of permissions grant us the access we need, and avoid checking + * whether specific permissions grant access when more generic ones would. + */ + + /* owner permissions */ + needed = 0; + if (action & VREAD) + needed |= S_IRUSR; + if (action & VWRITE) + needed |= S_IWUSR; + if (action & VEXEC) + needed |= S_IXUSR; + owner_ok = (needed & vap->va_mode) == needed; + + /* group permissions */ + needed = 0; + if (action & VREAD) + needed |= S_IRGRP; + if (action & VWRITE) + needed |= S_IWGRP; + if (action & VEXEC) + needed |= S_IXGRP; + group_ok = (needed & vap->va_mode) == needed; + + /* world permissions */ + needed = 0; + if (action & VREAD) + needed |= S_IROTH; + if (action & VWRITE) + needed |= S_IWOTH; + if (action & VEXEC) + needed |= S_IXOTH; + world_ok = (needed & vap->va_mode) == needed; + + /* If granted/denied by all three, we're done */ + if (owner_ok && group_ok && world_ok) { + _SETWHERE("all"); + goto out; + } + if (!owner_ok && !group_ok && !world_ok) { + _SETWHERE("all"); + error = EACCES; + goto out; + } + + /* Check ownership (relatively cheap) */ + if ((on_dir && vauth_dir_owner(vcp)) || + (!on_dir && vauth_file_owner(vcp))) { + _SETWHERE("user"); + if (!owner_ok) + error = EACCES; + goto out; + } + + /* Not owner; if group and world both grant it we're done */ + if (group_ok && world_ok) { + _SETWHERE("group/world"); + goto out; + } + if (!group_ok && !world_ok) { + _SETWHERE("group/world"); + error = EACCES; + goto out; + } + + /* Check group membership (most expensive) */ + ismember = 0; + if (on_dir) { + error = vauth_dir_ingroup(vcp, &ismember); + } else { + error = vauth_file_ingroup(vcp, &ismember); + } + if (error) + goto out; + if (ismember) { + _SETWHERE("group"); + if (!group_ok) + error = EACCES; + goto out; + } + + /* Not owner, not in group, use world result */ + _SETWHERE("world"); + if (!world_ok) + error = EACCES; + + /* FALLTHROUGH */ + +out: + KAUTH_DEBUG("%p %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d", + vcp->vp, (error == 0) ? "ALLOWED" : "DENIED", where, + (action & VREAD) ? "r" : "-", + (action & VWRITE) ? "w" : "-", + (action & VEXEC) ? "x" : "-", + needed, + (vap->va_mode & S_IRUSR) ? "r" : "-", + (vap->va_mode & S_IWUSR) ? "w" : "-", + (vap->va_mode & S_IXUSR) ? "x" : "-", + (vap->va_mode & S_IRGRP) ? "r" : "-", + (vap->va_mode & S_IWGRP) ? "w" : "-", + (vap->va_mode & S_IXGRP) ? "x" : "-", + (vap->va_mode & S_IROTH) ? "r" : "-", + (vap->va_mode & S_IWOTH) ? "w" : "-", + (vap->va_mode & S_IXOTH) ? "x" : "-", + kauth_cred_getuid(vcp->ctx->vc_ucred), + on_dir ? vcp->dvap->va_uid : vcp->vap->va_uid, + on_dir ? vcp->dvap->va_gid : vcp->vap->va_gid); + return(error); +} + +/* + * Authorize the deletion of the node vp from the directory dvp. + * + * We assume that: + * - Neither the node nor the directory are immutable. + * - The user is not the superuser. + * + * Deletion is not permitted if the directory is sticky and the caller is not owner of the + * node or directory. + * + * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be + * deleted. If neither denies the permission, and the caller has Posix write access to the + * directory, then the node may be deleted. + */ +static int +vnode_authorize_delete(vauth_ctx vcp) +{ + struct vnode_attr *vap = vcp->vap; + struct vnode_attr *dvap = vcp->dvap; + kauth_cred_t cred = vcp->ctx->vc_ucred; + struct kauth_acl_eval eval; + int error, delete_denied, delete_child_denied, ismember; + + /* check the ACL on the directory */ + delete_child_denied = 0; + if (VATTR_IS_NOT(dvap, va_acl, NULL)) { + eval.ae_requested = KAUTH_VNODE_DELETE_CHILD; + eval.ae_acl = &dvap->va_acl->acl_ace[0]; + eval.ae_count = dvap->va_acl->acl_entrycount; + eval.ae_options = 0; + if (vauth_dir_owner(vcp)) + eval.ae_options |= KAUTH_AEVAL_IS_OWNER; + if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0) + return(error); + if (ismember) + eval.ae_options |= KAUTH_AEVAL_IN_GROUP; + eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; + eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; + eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; + eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; + + error = kauth_acl_evaluate(cred, &eval); + + if (error != 0) { + KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); + return(error); + } + if (eval.ae_result == KAUTH_RESULT_DENY) + delete_child_denied = 1; + if (eval.ae_result == KAUTH_RESULT_ALLOW) { + KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp); + return(0); + } + } + + /* check the ACL on the node */ + delete_denied = 0; + if (VATTR_IS_NOT(vap, va_acl, NULL)) { + eval.ae_requested = KAUTH_VNODE_DELETE; + eval.ae_acl = &vap->va_acl->acl_ace[0]; + eval.ae_count = vap->va_acl->acl_entrycount; + eval.ae_options = 0; + if (vauth_file_owner(vcp)) + eval.ae_options |= KAUTH_AEVAL_IS_OWNER; + if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) + return(error); + if (ismember) + eval.ae_options |= KAUTH_AEVAL_IN_GROUP; + eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; + eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; + eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; + eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; + + if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { + KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); + return(error); + } + if (eval.ae_result == KAUTH_RESULT_DENY) + delete_denied = 1; + if (eval.ae_result == KAUTH_RESULT_ALLOW) { + KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp); + return(0); + } + } + + /* if denied by ACL on directory or node, return denial */ + if (delete_denied || delete_child_denied) { + KAUTH_DEBUG("%p ALLOWED - denied by ACL", vcp->vp); + return(EACCES); + } + + /* enforce sticky bit behaviour */ + if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) { + KAUTH_DEBUG("%p DENIED - sticky bit rules (user %d file %d dir %d)", + vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid); + return(EACCES); + } + + /* check the directory */ + if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) { + KAUTH_DEBUG("%p ALLOWED - granted by posix permisssions", vcp->vp); + return(error); + } + + /* not denied, must be OK */ + return(0); +} + + +/* + * Authorize an operation based on the node's attributes. + */ +static int +vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_rights_t preauth_rights) +{ + struct vnode_attr *vap = vcp->vap; + kauth_cred_t cred = vcp->ctx->vc_ucred; + struct kauth_acl_eval eval; + int error, ismember; + mode_t posix_action; + + /* + * If we are the file owner, we automatically have some rights. + * + * Do we need to expand this to support group ownership? + */ + if (vauth_file_owner(vcp)) + acl_rights &= ~(KAUTH_VNODE_WRITE_SECURITY); + + /* + * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can + * mask the latter. If TAKE_OWNERSHIP is requested the caller is about to + * change ownership to themselves, and WRITE_SECURITY is implicitly + * granted to the owner. We need to do this because at this point + * WRITE_SECURITY may not be granted as the caller is not currently + * the owner. + */ + if ((acl_rights & KAUTH_VNODE_TAKE_OWNERSHIP) && + (acl_rights & KAUTH_VNODE_WRITE_SECURITY)) + acl_rights &= ~KAUTH_VNODE_WRITE_SECURITY; + + if (acl_rights == 0) { + KAUTH_DEBUG("%p ALLOWED - implicit or no rights required", vcp->vp); + return(0); + } + + /* if we have an ACL, evaluate it */ + if (VATTR_IS_NOT(vap, va_acl, NULL)) { + eval.ae_requested = acl_rights; + eval.ae_acl = &vap->va_acl->acl_ace[0]; + eval.ae_count = vap->va_acl->acl_entrycount; + eval.ae_options = 0; + if (vauth_file_owner(vcp)) + eval.ae_options |= KAUTH_AEVAL_IS_OWNER; + if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) + return(error); + if (ismember) + eval.ae_options |= KAUTH_AEVAL_IN_GROUP; + eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; + eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS; + eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; + eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; + + if ((error = kauth_acl_evaluate(cred, &eval)) != 0) { + KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); + return(error); + } + + if (eval.ae_result == KAUTH_RESULT_DENY) { + KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp); + return(EACCES); /* deny, deny, counter-allege */ + } + if (eval.ae_result == KAUTH_RESULT_ALLOW) { + KAUTH_DEBUG("%p ALLOWED - all rights granted by ACL", vcp->vp); + return(0); + } + /* fall through and evaluate residual rights */ + } else { + /* no ACL, everything is residual */ + eval.ae_residual = acl_rights; + } + + /* + * Grant residual rights that have been pre-authorized. + */ + eval.ae_residual &= ~preauth_rights; + + /* + * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied. + */ + if (vauth_file_owner(vcp)) + eval.ae_residual &= ~KAUTH_VNODE_WRITE_ATTRIBUTES; + + if (eval.ae_residual == 0) { + KAUTH_DEBUG("%p ALLOWED - rights already authorized", vcp->vp); + return(0); + } + + /* + * Bail if we have residual rights that can't be granted by posix permissions, + * or aren't presumed granted at this point. + * + * XXX these can be collapsed for performance + */ + if (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) { + KAUTH_DEBUG("%p DENIED - CHANGE_OWNER not permitted", vcp->vp); + return(EACCES); + } + if (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) { + KAUTH_DEBUG("%p DENIED - WRITE_SECURITY not permitted", vcp->vp); + return(EACCES); + } + +#if DIAGNOSTIC + if (eval.ae_residual & KAUTH_VNODE_DELETE) + panic("vnode_authorize: can't be checking delete permission here"); +#endif + + /* + * Compute the fallback posix permissions that will satisfy the remaining + * rights. + */ + posix_action = 0; + if (eval.ae_residual & (KAUTH_VNODE_READ_DATA | + KAUTH_VNODE_LIST_DIRECTORY | + KAUTH_VNODE_READ_EXTATTRIBUTES)) + posix_action |= VREAD; + if (eval.ae_residual & (KAUTH_VNODE_WRITE_DATA | + KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY | + KAUTH_VNODE_DELETE_CHILD | + KAUTH_VNODE_WRITE_ATTRIBUTES | + KAUTH_VNODE_WRITE_EXTATTRIBUTES)) + posix_action |= VWRITE; + if (eval.ae_residual & (KAUTH_VNODE_EXECUTE | + KAUTH_VNODE_SEARCH)) + posix_action |= VEXEC; + + if (posix_action != 0) { + return(vnode_authorize_posix(vcp, posix_action, 0 /* !on_dir */)); + } else { + KAUTH_DEBUG("%p ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping", + vcp->vp, + (eval.ae_residual & KAUTH_VNODE_READ_DATA) + ? vnode_isdir(vcp->vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", + (eval.ae_residual & KAUTH_VNODE_WRITE_DATA) + ? vnode_isdir(vcp->vp) ? " ADD_FILE" : " WRITE_DATA" : "", + (eval.ae_residual & KAUTH_VNODE_EXECUTE) + ? vnode_isdir(vcp->vp) ? " SEARCH" : " EXECUTE" : "", + (eval.ae_residual & KAUTH_VNODE_DELETE) + ? " DELETE" : "", + (eval.ae_residual & KAUTH_VNODE_APPEND_DATA) + ? vnode_isdir(vcp->vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", + (eval.ae_residual & KAUTH_VNODE_DELETE_CHILD) + ? " DELETE_CHILD" : "", + (eval.ae_residual & KAUTH_VNODE_READ_ATTRIBUTES) + ? " READ_ATTRIBUTES" : "", + (eval.ae_residual & KAUTH_VNODE_WRITE_ATTRIBUTES) + ? " WRITE_ATTRIBUTES" : "", + (eval.ae_residual & KAUTH_VNODE_READ_EXTATTRIBUTES) + ? " READ_EXTATTRIBUTES" : "", + (eval.ae_residual & KAUTH_VNODE_WRITE_EXTATTRIBUTES) + ? " WRITE_EXTATTRIBUTES" : "", + (eval.ae_residual & KAUTH_VNODE_READ_SECURITY) + ? " READ_SECURITY" : "", + (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) + ? " WRITE_SECURITY" : "", + (eval.ae_residual & KAUTH_VNODE_CHECKIMMUTABLE) + ? " CHECKIMMUTABLE" : "", + (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) + ? " CHANGE_OWNER" : ""); + } + + /* + * Lack of required Posix permissions implies no reason to deny access. + */ + return(0); +} + +/* + * Check for file immutability. + */ +static int +vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore) +{ + mount_t mp; + int error; + int append; + + /* + * Perform immutability checks for operations that change data. + * + * Sockets, fifos and devices require special handling. + */ + switch(vp->v_type) { + case VSOCK: + case VFIFO: + case VBLK: + case VCHR: + /* + * Writing to these nodes does not change the filesystem data, + * so forget that it's being tried. + */ + rights &= ~KAUTH_VNODE_WRITE_DATA; + break; + default: + break; + } + + error = 0; + if (rights & KAUTH_VNODE_WRITE_RIGHTS) { + + /* check per-filesystem options if possible */ + mp = vnode_mount(vp); + if (mp != NULL) { + + /* check for no-EA filesystems */ + if ((rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES) && + (vfs_flags(mp) & MNT_NOUSERXATTR)) { + KAUTH_DEBUG("%p DENIED - filesystem disallowed extended attributes", vp); + error = EACCES; /* User attributes disabled */ + goto out; + } + } + + /* check for file immutability */ + append = 0; + if (vp->v_type == VDIR) { + if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights) + append = 1; + } else { + if ((rights & KAUTH_VNODE_APPEND_DATA) == rights) + append = 1; + } + if ((error = vnode_immutable(vap, append, ignore)) != 0) { + KAUTH_DEBUG("%p DENIED - file is immutable", vp); + goto out; + } + } +out: + return(error); +} + +/* + * Handle authorization actions for filesystems that advertise that the server will + * be enforcing. + */ +static int +vnode_authorize_opaque(vnode_t vp, int *resultp, kauth_action_t action, vfs_context_t ctx) +{ + int error; + + /* + * If the vp is a device node, socket or FIFO it actually represents a local + * endpoint, so we need to handle it locally. + */ + switch(vp->v_type) { + case VBLK: + case VCHR: + case VSOCK: + case VFIFO: + return(0); + default: + break; + } + + /* + * In the advisory request case, if the filesystem doesn't think it's reliable + * we will attempt to formulate a result ourselves based on VNOP_GETATTR data. + */ + if ((action & KAUTH_VNODE_ACCESS) && !vfs_authopaqueaccess(vnode_mount(vp))) + return(0); + + /* + * Let the filesystem have a say in the matter. It's OK for it to not implemnent + * VNOP_ACCESS, as most will authorise inline with the actual request. + */ + if ((error = VNOP_ACCESS(vp, action, ctx)) != ENOTSUP) { + *resultp = error; + KAUTH_DEBUG("%p DENIED - opaque filesystem VNOP_ACCESS denied access", vp); + return(1); + } + + /* + * Typically opaque filesystems do authorisation in-line, but exec is a special case. In + * order to be reasonably sure that exec will be permitted, we try a bit harder here. + */ + if ((action & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp)) { + /* try a VNOP_OPEN for readonly access */ + if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { + *resultp = error; + KAUTH_DEBUG("%p DENIED - EXECUTE denied because file could not be opened readonly", vp); + return(1); + } + VNOP_CLOSE(vp, FREAD, ctx); + } + + /* + * We don't have any reason to believe that the request has to be denied at this point, + * so go ahead and allow it. + */ + *resultp = 0; + KAUTH_DEBUG("%p ALLOWED - bypassing access check for non-local filesystem", vp); + return(1); +} + +static int +vnode_authorize_callback(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action, + uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) +{ + struct _vnode_authorize_context auth_context; + vauth_ctx vcp; + vfs_context_t ctx; + vnode_t vp, dvp; + kauth_cred_t cred; + kauth_ace_rights_t rights; + struct vnode_attr va, dva; + int result; + int *errorp; + int noimmutable; + + vcp = &auth_context; + ctx = vcp->ctx = (vfs_context_t)arg0; + vp = vcp->vp = (vnode_t)arg1; + dvp = vcp->dvp = (vnode_t)arg2; + errorp = (int *)arg3; + /* note that we authorize against the context, not the passed cred (the same thing anyway) */ + cred = ctx->vc_ucred; + + VATTR_INIT(&va); + vcp->vap = &va; + VATTR_INIT(&dva); + vcp->dvap = &dva; + + vcp->flags = vcp->flags_valid = 0; + +#if DIAGNOSTIC + if ((ctx == NULL) || (vp == NULL) || (cred == NULL)) + panic("vnode_authorize: bad arguments (context %p vp %p cred %p)", ctx, vp, cred); +#endif + + KAUTH_DEBUG("%p AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)", + vp, vfs_context_proc(ctx)->p_comm, + (action & KAUTH_VNODE_ACCESS) ? "access" : "auth", + (action & KAUTH_VNODE_READ_DATA) ? vnode_isdir(vp) ? " LIST_DIRECTORY" : " READ_DATA" : "", + (action & KAUTH_VNODE_WRITE_DATA) ? vnode_isdir(vp) ? " ADD_FILE" : " WRITE_DATA" : "", + (action & KAUTH_VNODE_EXECUTE) ? vnode_isdir(vp) ? " SEARCH" : " EXECUTE" : "", + (action & KAUTH_VNODE_DELETE) ? " DELETE" : "", + (action & KAUTH_VNODE_APPEND_DATA) ? vnode_isdir(vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "", + (action & KAUTH_VNODE_DELETE_CHILD) ? " DELETE_CHILD" : "", + (action & KAUTH_VNODE_READ_ATTRIBUTES) ? " READ_ATTRIBUTES" : "", + (action & KAUTH_VNODE_WRITE_ATTRIBUTES) ? " WRITE_ATTRIBUTES" : "", + (action & KAUTH_VNODE_READ_EXTATTRIBUTES) ? " READ_EXTATTRIBUTES" : "", + (action & KAUTH_VNODE_WRITE_EXTATTRIBUTES) ? " WRITE_EXTATTRIBUTES" : "", + (action & KAUTH_VNODE_READ_SECURITY) ? " READ_SECURITY" : "", + (action & KAUTH_VNODE_WRITE_SECURITY) ? " WRITE_SECURITY" : "", + (action & KAUTH_VNODE_CHANGE_OWNER) ? " CHANGE_OWNER" : "", + (action & KAUTH_VNODE_NOIMMUTABLE) ? " (noimmutable)" : "", + vnode_isdir(vp) ? "directory" : "file", + vp->v_name ? vp->v_name : "", action, vp, dvp); + + /* + * Extract the control bits from the action, everything else is + * requested rights. + */ + noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0; + rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE); + + if (rights & KAUTH_VNODE_DELETE) { +#if DIAGNOSTIC + if (dvp == NULL) + panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory"); +#endif + } else { + dvp = NULL; + } + + /* + * Check for read-only filesystems. + */ + if ((rights & KAUTH_VNODE_WRITE_RIGHTS) && + (vp->v_mount->mnt_flag & MNT_RDONLY) && + ((vp->v_type == VREG) || (vp->v_type == VDIR) || + (vp->v_type == VLNK) || (vp->v_type == VCPLX) || + (rights & KAUTH_VNODE_DELETE) || (rights & KAUTH_VNODE_DELETE_CHILD))) { + result = EROFS; + goto out; + } + + /* + * Check for noexec filesystems. + */ + if ((rights & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp) && (vp->v_mount->mnt_flag & MNT_NOEXEC)) { + result = EACCES; + goto out; + } + + /* + * Handle cases related to filesystems with non-local enforcement. + * This call can return 0, in which case we will fall through to perform a + * check based on VNOP_GETATTR data. Otherwise it returns 1 and sets + * an appropriate result, at which point we can return immediately. + */ + if (vfs_authopaque(vp->v_mount) && vnode_authorize_opaque(vp, &result, action, ctx)) + goto out; + + /* + * Get vnode attributes and extended security information for the vnode + * and directory if required. + */ + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_flags); + VATTR_WANTED(&va, va_acl); + if ((result = vnode_getattr(vp, &va, ctx)) != 0) { + KAUTH_DEBUG("%p ERROR - failed to get vnode attributes - %d", vp, result); + goto out; + } + if (dvp) { + VATTR_WANTED(&dva, va_mode); + VATTR_WANTED(&dva, va_uid); + VATTR_WANTED(&dva, va_gid); + VATTR_WANTED(&dva, va_flags); + VATTR_WANTED(&dva, va_acl); + if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) { + KAUTH_DEBUG("%p ERROR - failed to get directory vnode attributes - %d", vp, result); + goto out; + } + } + + /* + * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes + * *_EXTATTRIBUTES. + */ + if (S_ISXATTR(va.va_mode)) { + if (rights & KAUTH_VNODE_READ_DATA) { + rights &= ~KAUTH_VNODE_READ_DATA; + rights |= KAUTH_VNODE_READ_EXTATTRIBUTES; + } + if (rights & KAUTH_VNODE_WRITE_DATA) { + rights &= ~KAUTH_VNODE_WRITE_DATA; + rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; + } + } + + /* + * Check for immutability. + * + * In the deletion case, parent directory immutability vetoes specific + * file rights. + */ + if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0) + goto out; + if ((rights & KAUTH_VNODE_DELETE) && + ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0)) + goto out; + + /* + * Clear rights that have been authorized by reaching this point, bail if nothing left to + * check. + */ + rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE); + if (rights == 0) + goto out; + + /* + * If we're not the superuser, authorize based on file properties. + */ + if (!vfs_context_issuser(ctx)) { + /* process delete rights */ + if ((rights & KAUTH_VNODE_DELETE) && + ((result = vnode_authorize_delete(vcp)) != 0)) + goto out; + + /* process remaining rights */ + if ((rights & ~KAUTH_VNODE_DELETE) && + ((result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE)) != 0)) + goto out; + } else { + + /* + * Execute is only granted to root if one of the x bits is set. This check only + * makes sense if the posix mode bits are actually supported. + */ + if ((rights & KAUTH_VNODE_EXECUTE) && + (vp->v_type == VREG) && + VATTR_IS_SUPPORTED(&va, va_mode) && + !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { + result = EPERM; + KAUTH_DEBUG("%p DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode); + goto out; + } + + KAUTH_DEBUG("%p ALLOWED - caller is superuser", vp); + } + +out: + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) + kauth_acl_free(va.va_acl); + if (VATTR_IS_SUPPORTED(&dva, va_acl) && (dva.va_acl != NULL)) + kauth_acl_free(dva.va_acl); + if (result) { + *errorp = result; + KAUTH_DEBUG("%p DENIED - auth denied", vp); + return(KAUTH_RESULT_DENY); + } + + /* + * Note that this implies that we will allow requests for no rights, as well as + * for rights that we do not recognise. There should be none of these. + */ + KAUTH_DEBUG("%p ALLOWED - auth granted", vp); + return(KAUTH_RESULT_ALLOW); +} + +/* + * Check that the attribute information in vattr can be legally applied to + * a new file by the context. + */ +int +vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx) +{ + int error; + int is_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode; + kauth_cred_t cred; + guid_t changer; + mount_t dmp; + + error = 0; + defaulted_owner = defaulted_group = defaulted_mode = 0; + + /* + * Require that the filesystem support extended security to apply any. + */ + if (!vfs_extendedsecurity(dvp->v_mount) && + (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) { + error = EINVAL; + goto out; + } + + /* + * Default some fields. + */ + dmp = dvp->v_mount; + + /* + * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that + * owner takes ownership of all new files. + */ + if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsowner != KAUTH_UID_NONE)) { + VATTR_SET(vap, va_uid, dmp->mnt_fsowner); + defaulted_owner = 1; + } else { + if (!VATTR_IS_ACTIVE(vap, va_uid)) { + /* default owner is current user */ + VATTR_SET(vap, va_uid, kauth_cred_getuid(vfs_context_ucred(ctx))); + defaulted_owner = 1; + } + } + + /* + * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that + * group takes ownership of all new files. + */ + if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsgroup != KAUTH_GID_NONE)) { + VATTR_SET(vap, va_gid, dmp->mnt_fsgroup); + defaulted_group = 1; + } else { + if (!VATTR_IS_ACTIVE(vap, va_gid)) { + /* default group comes from parent object, fallback to current user */ + struct vnode_attr dva; + VATTR_INIT(&dva); + VATTR_WANTED(&dva, va_gid); + if ((error = vnode_getattr(dvp, &dva, ctx)) != 0) + goto out; + if (VATTR_IS_SUPPORTED(&dva, va_gid)) { + VATTR_SET(vap, va_gid, dva.va_gid); + } else { + VATTR_SET(vap, va_gid, kauth_cred_getgid(vfs_context_ucred(ctx))); + } + defaulted_group = 1; + } + } + + if (!VATTR_IS_ACTIVE(vap, va_flags)) + VATTR_SET(vap, va_flags, 0); + + /* default mode is everything, masked with current umask */ + if (!VATTR_IS_ACTIVE(vap, va_mode)) { + VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask); + KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap->va_mode, vfs_context_proc(ctx)->p_fd->fd_cmask); + defaulted_mode = 1; + } + /* set timestamps to now */ + if (!VATTR_IS_ACTIVE(vap, va_create_time)) { + nanotime(&vap->va_create_time); + VATTR_SET_ACTIVE(vap, va_create_time); + } + + /* + * Check for attempts to set nonsensical fields. + */ + if (vap->va_active & ~VNODE_ATTR_NEWOBJ) { + error = EINVAL; + KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx", + vap->va_active & ~VNODE_ATTR_NEWOBJ); + goto out; + } + + /* + * Quickly check for the applicability of any enforcement here. + * Tests below maintain the integrity of the local security model. + */ + if (vfs_authopaque(vnode_mount(dvp))) + goto out; + + /* + * We need to know if the caller is the superuser, or if the work is + * otherwise already authorised. + */ + cred = vfs_context_ucred(ctx); + if (noauth) { + /* doing work for the kernel */ + is_suser = 1; + } else { + is_suser = vfs_context_issuser(ctx); + } + + + if (VATTR_IS_ACTIVE(vap, va_flags)) { + if (is_suser) { + if ((vap->va_flags & (UF_SETTABLE | SF_SETTABLE)) != vap->va_flags) { + error = EPERM; + KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); + goto out; + } + } else { + if ((vap->va_flags & UF_SETTABLE) != vap->va_flags) { + error = EPERM; + KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); + goto out; + } + } + } + + /* if not superuser, validate legality of new-item attributes */ + if (!is_suser) { + if (!defaulted_mode && VATTR_IS_ACTIVE(vap, va_mode)) { + /* setgid? */ + if (vap->va_mode & S_ISGID) { + if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { + KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", vap->va_gid); + error = EPERM; + goto out; + } + } + + /* setuid? */ + if ((vap->va_mode & S_ISUID) && (vap->va_uid != kauth_cred_getuid(cred))) { + KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); + error = EPERM; + goto out; + } + } + if (!defaulted_owner && (vap->va_uid != kauth_cred_getuid(cred))) { + KAUTH_DEBUG(" DENIED - cannot create new item owned by %d", vap->va_uid); + error = EPERM; + goto out; + } + if (!defaulted_group) { + if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { + KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" DENIED - cannot create new item with group %d - not a member", vap->va_gid); + error = EPERM; + goto out; + } + } + + /* initialising owner/group UUID */ + if (VATTR_IS_ACTIVE(vap, va_uuuid)) { + if ((error = kauth_cred_getguid(cred, &changer)) != 0) { + KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); + /* XXX ENOENT here - no GUID - should perhaps become EPERM */ + goto out; + } + if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { + KAUTH_DEBUG(" ERROR - cannot create item with supplied owner UUID - not us"); + error = EPERM; + goto out; + } + } + if (VATTR_IS_ACTIVE(vap, va_guuid)) { + if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { + KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); + error = EPERM; + goto out; + } + } + } +out: + return(error); +} + +/* + * Check that the attribute information in vap can be legally written by the context. + * + * Call this when you're not sure about the vnode_attr; either its contents have come + * from an unknown source, or when they are variable. + * + * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that + * must be authorized to be permitted to write the vattr. + */ +int +vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_context_t ctx) +{ + struct vnode_attr ova; + kauth_action_t required_action; + int error, is_suser, ismember, chowner, chgroup; + guid_t changer; + gid_t group; + uid_t owner; + mode_t newmode; + kauth_cred_t cred; + uint32_t fdelta; + + VATTR_INIT(&ova); + required_action = 0; + error = 0; + + /* + * Quickly check for enforcement applicability. + */ + if (vfs_authopaque(vnode_mount(vp))) + goto out; + + /* + * Check for attempts to set nonsensical fields. + */ + if (vap->va_active & VNODE_ATTR_RDONLY) { + KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)"); + error = EINVAL; + goto out; + } + + /* + * We need to know if the caller is the superuser. + */ + cred = vfs_context_ucred(ctx); + is_suser = kauth_cred_issuser(cred); + + /* + * If any of the following are changing, we need information from the old file: + * va_uid + * va_gid + * va_mode + * va_uuuid + * va_guuid + */ + if (VATTR_IS_ACTIVE(vap, va_uid) || + VATTR_IS_ACTIVE(vap, va_gid) || + VATTR_IS_ACTIVE(vap, va_mode) || + VATTR_IS_ACTIVE(vap, va_uuuid) || + VATTR_IS_ACTIVE(vap, va_guuid)) { + VATTR_WANTED(&ova, va_mode); + VATTR_WANTED(&ova, va_uid); + VATTR_WANTED(&ova, va_gid); + VATTR_WANTED(&ova, va_uuuid); + VATTR_WANTED(&ova, va_guuid); + KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes"); + } + + /* + * If timestamps are being changed, we need to know who the file is owned + * by. + */ + if (VATTR_IS_ACTIVE(vap, va_create_time) || + VATTR_IS_ACTIVE(vap, va_change_time) || + VATTR_IS_ACTIVE(vap, va_modify_time) || + VATTR_IS_ACTIVE(vap, va_access_time) || + VATTR_IS_ACTIVE(vap, va_backup_time)) { + + VATTR_WANTED(&ova, va_uid); +#if 0 /* enable this when we support UUIDs as official owners */ + VATTR_WANTED(&ova, va_uuuid); +#endif + KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID"); + } + + /* + * If flags are being changed, we need the old flags. + */ + if (VATTR_IS_ACTIVE(vap, va_flags)) { + KAUTH_DEBUG("ATTR - flags changing, fetching old flags"); + VATTR_WANTED(&ova, va_flags); + } + + /* + * If the size is being set, make sure it's not a directory. + */ + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + /* size is meaningless on a directory, don't permit this */ + if (vnode_isdir(vp)) { + KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory"); + error = EISDIR; + goto out; + } + } + + /* + * Get old data. + */ + KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova.va_active); + if ((error = vnode_getattr(vp, &ova, ctx)) != 0) { + KAUTH_DEBUG(" ERROR - got %d trying to get attributes", error); + goto out; + } + + /* + * Size changes require write access to the file data. + */ + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + /* if we can't get the size, or it's different, we need write access */ + KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA"); + required_action |= KAUTH_VNODE_WRITE_DATA; + } + + /* + * Changing timestamps? + * + * Note that we are only called to authorize user-requested time changes; + * side-effect time changes are not authorized. Authorisation is only + * required for existing files. + * + * Non-owners are not permitted to change the time on an existing + * file to anything other than the current time. + */ + if (VATTR_IS_ACTIVE(vap, va_create_time) || + VATTR_IS_ACTIVE(vap, va_change_time) || + VATTR_IS_ACTIVE(vap, va_modify_time) || + VATTR_IS_ACTIVE(vap, va_access_time) || + VATTR_IS_ACTIVE(vap, va_backup_time)) { + /* + * The owner and root may set any timestamps they like, + * provided that the file is not immutable. The owner still needs + * WRITE_ATTRIBUTES (implied by ownership but still deniable). + */ + if (is_suser || vauth_node_owner(&ova, cred)) { + KAUTH_DEBUG("ATTR - root or owner changing timestamps"); + required_action |= KAUTH_VNODE_CHECKIMMUTABLE | KAUTH_VNODE_WRITE_ATTRIBUTES; + } else { + /* just setting the current time? */ + if (vap->va_vaflags & VA_UTIMES_NULL) { + KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES"); + required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; + } else { + KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted"); + error = EACCES; + goto out; + } + } + } + + /* + * Changing file mode? + */ + if (VATTR_IS_ACTIVE(vap, va_mode) && VATTR_IS_SUPPORTED(&ova, va_mode) && (ova.va_mode != vap->va_mode)) { + KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova.va_mode, vap->va_mode); + + /* + * Mode changes always have the same basic auth requirements. + */ + if (is_suser) { + KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check"); + required_action |= KAUTH_VNODE_CHECKIMMUTABLE; + } else { + /* need WRITE_SECURITY */ + KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY"); + required_action |= KAUTH_VNODE_WRITE_SECURITY; + } + + /* + * Can't set the setgid bit if you're not in the group and not root. Have to have + * existing group information in the case we're not setting it right now. + */ + if (vap->va_mode & S_ISGID) { + required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ + if (!is_suser) { + if (VATTR_IS_ACTIVE(vap, va_gid)) { + group = vap->va_gid; + } else if (VATTR_IS_SUPPORTED(&ova, va_gid)) { + group = ova.va_gid; + } else { + KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available"); + error = EINVAL; + goto out; + } + /* + * This might be too restrictive; WRITE_SECURITY might be implied by + * membership in this case, rather than being an additional requirement. + */ + if ((error = kauth_cred_ismember_gid(cred, group, &ismember)) != 0) { + KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" DENIED - can't set SGID bit, not a member of %d", group); + error = EPERM; + goto out; + } + } + } + + /* + * Can't set the setuid bit unless you're root or the file's owner. + */ + if (vap->va_mode & S_ISUID) { + required_action |= KAUTH_VNODE_CHECKIMMUTABLE; /* always required */ + if (!is_suser) { + if (VATTR_IS_ACTIVE(vap, va_uid)) { + owner = vap->va_uid; + } else if (VATTR_IS_SUPPORTED(&ova, va_uid)) { + owner = ova.va_uid; + } else { + KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available"); + error = EINVAL; + goto out; + } + if (owner != kauth_cred_getuid(cred)) { + /* + * We could allow this if WRITE_SECURITY is permitted, perhaps. + */ + KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit"); + error = EPERM; + goto out; + } + } + } + } + + /* + * Validate/mask flags changes. This checks that only the flags in + * the UF_SETTABLE mask are being set, and preserves the flags in + * the SF_SETTABLE case. + * + * Since flags changes may be made in conjunction with other changes, + * we will ask the auth code to ignore immutability in the case that + * the SF_* flags are not set and we are only manipulating the file flags. + * + */ + if (VATTR_IS_ACTIVE(vap, va_flags)) { + /* compute changing flags bits */ + if (VATTR_IS_SUPPORTED(&ova, va_flags)) { + fdelta = vap->va_flags ^ ova.va_flags; + } else { + fdelta = vap->va_flags; + } + + if (fdelta != 0) { + KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY"); + required_action |= KAUTH_VNODE_WRITE_SECURITY; + + /* check that changing bits are legal */ + if (is_suser) { + /* + * The immutability check will prevent us from clearing the SF_* + * flags unless the system securelevel permits it, so just check + * for legal flags here. + */ + if (fdelta & ~(UF_SETTABLE | SF_SETTABLE)) { + error = EPERM; + KAUTH_DEBUG(" DENIED - superuser attempt to set illegal flag(s)"); + goto out; + } + } else { + if (fdelta & ~UF_SETTABLE) { + error = EPERM; + KAUTH_DEBUG(" DENIED - user attempt to set illegal flag(s)"); + goto out; + } + } + /* + * If the caller has the ability to manipulate file flags, + * security is not reduced by ignoring them for this operation. + * + * A more complete test here would consider the 'after' states of the flags + * to determine whether it would permit the operation, but this becomes + * very complex. + * + * Ignoring immutability is conditional on securelevel; this does not bypass + * the SF_* flags if securelevel > 0. + */ + required_action |= KAUTH_VNODE_NOIMMUTABLE; + } + } + + /* + * Validate ownership information. + */ + chowner = 0; + chgroup = 0; + + /* + * uid changing + * Note that if the filesystem didn't give us a UID, we expect that it doesn't + * support them in general, and will ignore it if/when we try to set it. + * We might want to clear the uid out of vap completely here. + */ + if (VATTR_IS_ACTIVE(vap, va_uid) && VATTR_IS_SUPPORTED(&ova, va_uid) && (vap->va_uid != ova.va_uid)) { + if (!is_suser && (kauth_cred_getuid(cred) != vap->va_uid)) { + KAUTH_DEBUG(" DENIED - non-superuser cannot change ownershipt to a third party"); + error = EPERM; + goto out; + } + chowner = 1; + } + + /* + * gid changing + * Note that if the filesystem didn't give us a GID, we expect that it doesn't + * support them in general, and will ignore it if/when we try to set it. + * We might want to clear the gid out of vap completely here. + */ + if (VATTR_IS_ACTIVE(vap, va_gid) && VATTR_IS_SUPPORTED(&ova, va_gid) && (vap->va_gid != ova.va_gid)) { + if (!is_suser) { + if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) { + KAUTH_DEBUG(" ERROR - got %d checking for membership in %d", error, vap->va_gid); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" DENIED - group change from %d to %d but not a member of target group", + ova.va_gid, vap->va_gid); + error = EPERM; + goto out; + } + } + chgroup = 1; + } + + /* + * Owner UUID being set or changed. + */ + if (VATTR_IS_ACTIVE(vap, va_uuuid)) { + /* if the owner UUID is not actually changing ... */ + if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) + goto no_uuuid_change; + + /* + * The owner UUID cannot be set by a non-superuser to anything other than + * their own. + */ + if (!is_suser) { + if ((error = kauth_cred_getguid(cred, &changer)) != 0) { + KAUTH_DEBUG(" ERROR - got %d trying to get caller UUID", error); + /* XXX ENOENT here - no UUID - should perhaps become EPERM */ + goto out; + } + if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { + KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us"); + error = EPERM; + goto out; + } + } + chowner = 1; + } +no_uuuid_change: + /* + * Group UUID being set or changed. + */ + if (VATTR_IS_ACTIVE(vap, va_guuid)) { + /* if the group UUID is not actually changing ... */ + if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) + goto no_guuid_change; + + /* + * The group UUID cannot be set by a non-superuser to anything other than + * one of which they are a member. + */ + if (!is_suser) { + if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { + KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); + goto out; + } + if (!ismember) { + KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); + error = EPERM; + goto out; + } + } + chgroup = 1; + } +no_guuid_change: + + /* + * Compute authorisation for group/ownership changes. + */ + if (chowner || chgroup) { + if (is_suser) { + KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check"); + required_action |= KAUTH_VNODE_CHECKIMMUTABLE; + } else { + if (chowner) { + KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP"); + required_action |= KAUTH_VNODE_TAKE_OWNERSHIP; + } + if (chgroup && !chowner) { + KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY"); + required_action |= KAUTH_VNODE_WRITE_SECURITY; + } + + /* clear set-uid and set-gid bits as required by Posix */ + if (VATTR_IS_ACTIVE(vap, va_mode)) { + newmode = vap->va_mode; + } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) { + newmode = ova.va_mode; + } else { + KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits"); + newmode = 0; + } + if (newmode & (S_ISUID | S_ISGID)) { + VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID)); + KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode); + } + } + } + + /* + * Authorise changes in the ACL. + */ + if (VATTR_IS_ACTIVE(vap, va_acl)) { + + /* no existing ACL */ + if (!VATTR_IS_ACTIVE(&ova, va_acl) || (ova.va_acl == NULL)) { + + /* adding an ACL */ + if (vap->va_acl != NULL) { + required_action |= KAUTH_VNODE_WRITE_SECURITY; + KAUTH_DEBUG("CHMOD - adding ACL"); + } + + /* removing an existing ACL */ + } else if (vap->va_acl == NULL) { + required_action |= KAUTH_VNODE_WRITE_SECURITY; + KAUTH_DEBUG("CHMOD - removing ACL"); + + /* updating an existing ACL */ + } else { + if (vap->va_acl->acl_entrycount != ova.va_acl->acl_entrycount) { + /* entry count changed, must be different */ + required_action |= KAUTH_VNODE_WRITE_SECURITY; + KAUTH_DEBUG("CHMOD - adding/removing ACL entries"); + } else if (vap->va_acl->acl_entrycount > 0) { + /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */ + if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0], + sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) { + required_action |= KAUTH_VNODE_WRITE_SECURITY; + KAUTH_DEBUG("CHMOD - changing ACL entries"); + } + } + } + } + + /* + * Other attributes that require authorisation. + */ + if (VATTR_IS_ACTIVE(vap, va_encoding)) + required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES; + +out: + if (VATTR_IS_SUPPORTED(&ova, va_acl) && (ova.va_acl != NULL)) + kauth_acl_free(ova.va_acl); + if (error == 0) + *actionp = required_action; + return(error); +} + + +void +vfs_setlocklocal(mount_t mp) +{ + vnode_t vp; + + mount_lock(mp); + mp->mnt_kern_flag |= MNTK_LOCK_LOCAL; + + /* + * We do not expect anyone to be using any vnodes at the + * time this routine is called. So no need for vnode locking + */ + TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { + vp->v_flag |= VLOCKLOCAL; + } + TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) { + vp->v_flag |= VLOCKLOCAL; + } + TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) { + vp->v_flag |= VLOCKLOCAL; + } + mount_unlock(mp); +} + + +#ifdef JOE_DEBUG + +record_vp(vnode_t vp, int count) { + struct uthread *ut; + int i; + + if ((vp->v_flag & VSYSTEM)) + return; + + ut = get_bsdthread_info(current_thread()); + ut->uu_iocount += count; + + if (ut->uu_vpindex < 32) { + for (i = 0; i < ut->uu_vpindex; i++) { + if (ut->uu_vps[i] == vp) + return; + } + ut->uu_vps[ut->uu_vpindex] = vp; + ut->uu_vpindex++; + } +} +#endif diff --git a/bsd/vfs/vfs_support.c b/bsd/vfs/vfs_support.c index 3ab24eb6f..0bf329efe 100644 --- a/bsd/vfs/vfs_support.c +++ b/bsd/vfs/vfs_support.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,7 +30,7 @@ * supposed to. * * nop_* routines always return 0 [success] - * err_* routines always return EOPNOTSUPP + * err_* routines always return ENOTSUP * * This file could be auto-generated from vnode_if.src. but that needs * support for freeing cnp. @@ -43,643 +43,539 @@ */ #include +#include -struct vop_create_args /* { +struct vnop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */; int -nop_create(struct vop_create_args *ap) +nop_create(struct vnop_create_args *ap) { #if DIAGNOSTIC if ((ap->a_cnp->cn_flags & HASBUF) == 0) panic("nop_create: no name"); #endif - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); return (0); } int -err_create(struct vop_create_args *ap) +err_create(struct vnop_create_args *ap) { (void)nop_create(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_whiteout_args /* { +struct vnop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; + vfs_context_t a_context; } */; int -nop_whiteout(struct vop_whiteout_args *ap) +nop_whiteout(struct vnop_whiteout_args *ap) { return (0); } int -err_whiteout(struct vop_whiteout_args *ap) +err_whiteout(struct vnop_whiteout_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_mknod_args /* { +struct vnop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */; int -nop_mknod(struct vop_mknod_args *ap) +nop_mknod(struct vnop_mknod_args *ap) { #if DIAGNOSTIC if ((ap->a_cnp->cn_flags & HASBUF) == 0) panic("nop_mknod: no name"); #endif - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); return (0); } int -err_mknod(struct vop_mknod_args *ap) +err_mknod(struct vnop_mknod_args *ap) { (void)nop_mknod(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } - -struct vop_mkcomplex_args /* { - struct vnode *a_dvp, - struct vnode **a_vpp, - struct componentname *a_cnp, - struct vattr *a_vap, - u_long a_type) -} */; - -int -nop_mkcomplex(struct vop_mkcomplex_args *ap) -{ -#if DIAGNOSTIC - if ((ap->a_cnp->cn_flags & HASBUF) == 0) - panic("nop_mkcomplex: no name"); -#endif - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); - return (0); -} - -int -err_mkcomplex(struct vop_mkcomplex_args *ap) -{ - (void)nop_mkcomplex(ap); - return (EOPNOTSUPP); -} - - -struct vop_open_args /* { +struct vnop_open_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_open(struct vop_open_args *ap) +nop_open(struct vnop_open_args *ap) { return (0); } int -err_open(struct vop_open_args *ap) +err_open(struct vnop_open_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_close_args /* { +struct vnop_close_args /* { struct vnode *a_vp; int a_fflag; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_close(struct vop_close_args *ap) +nop_close(struct vnop_close_args *ap) { return (0); } int -err_close(struct vop_close_args *ap) +err_close(struct vnop_close_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_access_args /* { +struct vnop_access_args /* { struct vnode *a_vp; int a_mode; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_access(struct vop_access_args *ap) +nop_access(struct vnop_access_args *ap) { return (0); } int -err_access(struct vop_access_args *ap) +err_access(struct vnop_access_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_getattr_args /* { +struct vnop_getattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */; int -nop_getattr(struct vop_getattr_args *ap) +nop_getattr(struct vnop_getattr_args *ap) { return (0); } int -err_getattr(struct vop_getattr_args *ap) +err_getattr(struct vnop_getattr_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_setattr_args /* { +struct vnop_setattr_args /* { struct vnode *a_vp; - struct vattr *a_vap; - struct ucred *a_cred; - struct proc *a_p; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */; int -nop_setattr(struct vop_setattr_args *ap) +nop_setattr(struct vnop_setattr_args *ap) { return (0); } int -err_setattr(struct vop_setattr_args *ap) +err_setattr(struct vnop_setattr_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_getattrlist_args /* { +struct vnop_getattrlist_args /* { struct vnode *a_vp; struct attrlist *a_alist; struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context a_context; } */; int -nop_getattrlist(struct vop_getattrlist_args *ap) +nop_getattrlist(struct vnop_getattrlist_args *ap) { return (0); } int -err_getattrlist(struct vop_getattrlist_args *ap) +err_getattrlist(struct vnop_getattrlist_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_setattrlist_args /* { +struct vnop_setattrlist_args /* { struct vnode *a_vp; struct attrlist *a_alist; struct uio *a_uio; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; } */; int -nop_setattrlist(struct vop_setattrlist_args *ap) +nop_setattrlist(struct vnop_setattrlist_args *ap) { return (0); } int -err_setattrlist(struct vop_setattrlist_args *ap) +err_setattrlist(struct vnop_setattrlist_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_read_args /* { +struct vnop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */; int -nop_read(struct vop_read_args *ap) +nop_read(struct vnop_read_args *ap) { return (0); } int -err_read(struct vop_read_args *ap) +err_read(struct vnop_read_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_write_args /* { +struct vnop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; - struct ucred *a_cred; + vfs_context_t a_context; } */; int -nop_write(struct vop_write_args *ap) +nop_write(struct vnop_write_args *ap) { return (0); } int -err_write(struct vop_write_args *ap) +err_write(struct vnop_write_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_lease_args /* { - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; -} */; - -int -nop_lease(struct vop_lease_args *ap) -{ - return (0); -} - -int -err_lease(struct vop_lease_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_ioctl_args /* { +struct vnop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int a_fflag; - struct ucred *a_cred; + kauth_cred_t a_cred; struct proc *a_p; } */; int -nop_ioctl(struct vop_ioctl_args *ap) +nop_ioctl(__unused struct vnop_ioctl_args *ap) { return (0); } int -err_ioctl(struct vop_ioctl_args *ap) +err_ioctl(struct vnop_ioctl_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_select_args /* { +struct vnop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; - struct ucred *a_cred; + kauth_cred_t a_cred; void *a_wql; struct proc *a_p; } */; int -nop_select(struct vop_select_args *ap) +nop_select(__unused struct vnop_select_args *ap) { return (0); } int -err_select(struct vop_select_args *ap) +err_select(struct vnop_select_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_exchange_args /* { +struct vnop_exchange_args /* { struct vnode *a_fvp; struct vnode *a_tvp; - struct ucred *a_cred; - struct proc *a_p; + int a_options; + vfs_context_t a_context; } */; int -nop_exchange(struct vop_exchange_args *ap) +nop_exchange(struct vnop_exchange_args *ap) { return (0); } int -err_exchange(struct vop_exchange_args *ap) +err_exchange(struct vnop_exchange_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_revoke_args /* { +struct vnop_revoke_args /* { struct vnode *a_vp; int a_flags; + vfs_context_t a_context; } */; int -nop_revoke(struct vop_revoke_args *ap) +nop_revoke(struct vnop_revoke_args *ap) { - return (vop_revoke(ap)); + return vn_revoke(ap->a_vp, ap->a_flags, ap->a_context); } int -err_revoke(struct vop_revoke_args *ap) +err_revoke(struct vnop_revoke_args *ap) { (void)nop_revoke(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_mmap_args /* { +struct vnop_mmap_args /* { struct vnode *a_vp; int a_fflags; - struct ucred *a_cred; + kauth_cred_t a_cred; struct proc *a_p; } */; int -nop_mmap(struct vop_mmap_args *ap) +nop_mmap(__unused struct vnop_mmap_args *ap) { return (0); } int -err_mmap(struct vop_mmap_args *ap) +err_mmap(struct vnop_mmap_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_fsync_args /* { +struct vnop_fsync_args /* { struct vnode *a_vp; - struct ucred *a_cred; int a_waitfor; - struct proc *a_p; -} */; - -int -nop_fsync(struct vop_fsync_args *ap) -{ - return (0); -} - -int -err_fsync(struct vop_fsync_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_seek_args /* { - struct vnode *a_vp; - off_t a_oldoff; - off_t a_newoff; - struct ucred *a_cred; + vfs_context_t a_context; } */; int -nop_seek(struct vop_seek_args *ap) +nop_fsync(struct vnop_fsync_args *ap) { return (0); } int -err_seek(struct vop_seek_args *ap) +err_fsync(struct vnop_fsync_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_remove_args /* { +struct vnop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + int a_flags; + vfs_context_t a_context; } */; int -nop_remove(struct vop_remove_args *ap) +nop_remove(struct vnop_remove_args *ap) { - if (ap->a_dvp == ap->a_vp) - vrele(ap->a_vp); - else - vput(ap->a_vp); - vput(ap->a_dvp); return (0); } int -err_remove(struct vop_remove_args *ap) +err_remove(struct vnop_remove_args *ap) { (void)nop_remove(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_link_args /* { +struct vnop_link_args /* { struct vnode *a_vp; struct vnode *a_tdvp; struct componentname *a_cnp; + vfs_context_t a_context; } */; int -nop_link(struct vop_link_args *ap) +nop_link(struct vnop_link_args *ap) { -#if DIAGNOSTIC - if ((ap->a_cnp->cn_flags & HASBUF) == 0) - panic("nop_link: no name"); -#endif - VOP_ABORTOP(ap->a_tdvp, ap->a_cnp); - vput(ap->a_tdvp); return (0); } int -err_link(struct vop_link_args *ap) +err_link(struct vnop_link_args *ap) { (void)nop_link(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_rename_args /* { +struct vnop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; + vfs_context_t a_context; } */; int -nop_rename(struct vop_rename_args *ap) +nop_rename(struct vnop_rename_args *ap) { -#if DIAGNOSTIC - if ((ap->a_tcnp->cn_flags & HASBUF) == 0 || - (ap->a_fcnp->cn_flags & HASBUF) == 0) - panic("nop_rename: no name"); -#endif - VOP_ABORTOP(ap->a_tdvp, ap->a_tcnp); - if (ap->a_tdvp == ap->a_tvp) - vrele(ap->a_tdvp); - else - vput(ap->a_tdvp); - if (ap->a_tvp) - vput(ap->a_tvp); - VOP_ABORTOP(ap->a_fdvp, ap->a_fcnp); - vrele(ap->a_fdvp); - vrele(ap->a_fvp); return (0); } int -err_rename(struct vop_rename_args *ap) +err_rename(struct vnop_rename_args *ap) { (void)nop_rename(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_mkdir_args /* { +struct vnop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_vattr *a_vap; + vfs_context_t a_context; } */; int -nop_mkdir(struct vop_mkdir_args *ap) +nop_mkdir(struct vnop_mkdir_args *ap) { -#if DIAGNOSTIC - if ((ap->a_cnp->cn_flags & HASBUF) == 0) - panic("nop_mkdir: no name"); -#endif - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); return (0); } int -err_mkdir(struct vop_mkdir_args *ap) +err_mkdir(struct vnop_mkdir_args *ap) { - (void)nop_mkdir(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_rmdir_args /* { +struct vnop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; + vfs_context_t a_context; } */; int -nop_rmdir(struct vop_rmdir_args *ap) +nop_rmdir(struct vnop_rmdir_args *ap) { - vput(ap->a_dvp); - vput(ap->a_vp); return (0); } int -err_rmdir(struct vop_rmdir_args *ap) +err_rmdir(struct vnop_rmdir_args *ap) { (void)nop_rmdir(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_symlink_args /* { +struct vnop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; - struct vattr *a_vap; + struct vnode_vattr *a_vap; char *a_target; + vfs_context_t a_context; } */; int -nop_symlink(struct vop_symlink_args *ap) +nop_symlink(struct vnop_symlink_args *ap) { #if DIAGNOSTIC if ((ap->a_cnp->cn_flags & HASBUF) == 0) panic("nop_symlink: no name"); #endif - VOP_ABORTOP(ap->a_dvp, ap->a_cnp); - vput(ap->a_dvp); return (0); } int -err_symlink(struct vop_symlink_args *ap) +err_symlink(struct vnop_symlink_args *ap) { (void)nop_symlink(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_readdir_args /* { - struct vnode *a_vp; +struct vnop_readdir_args /* { + vnode_t a_vp; struct uio *a_uio; - struct ucred *a_cred; + int a_flags; int *a_eofflag; - int *a_ncookies; - u_long **a_cookies; + int *a_numdirent; + vfs_context_t a_context; } */; int -nop_readdir(struct vop_readdir_args *ap) +nop_readdir(struct vnop_readdir_args *ap) { return (0); } int -err_readdir(struct vop_readdir_args *ap) +err_readdir(struct vnop_readdir_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_readdirattr_args /* { +struct vnop_readdirattr_args /* { struct vnode *a_vp; struct attrlist *a_alist; struct uio *a_uio; @@ -688,12 +584,11 @@ struct vop_readdirattr_args /* { int *a_newstate; int *a_eofflag; u_long *a_actualcount; - u_long **a_cookies; - struct ucred *a_cred; + vfs_context_t a_context; } */; int -nop_readdirattr(struct vop_readdirattr_args *ap) +nop_readdirattr(struct vnop_readdirattr_args *ap) { *(ap->a_actualcount) = 0; *(ap->a_eofflag) = 0; @@ -701,509 +596,230 @@ nop_readdirattr(struct vop_readdirattr_args *ap) } int -err_readdirattr(struct vop_readdirattr_args *ap) +err_readdirattr(struct vnop_readdirattr_args *ap) { (void)nop_readdirattr(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_readlink_args /* { +struct vnop_readlink_args /* { struct vnode *vp; struct uio *uio; - struct ucred *cred; -} */; - -int -nop_readlink(struct vop_readlink_args *ap) -{ - return (0); -} - -int -err_readlink(struct vop_readlink_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_abortop_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; + vfs_context_t a_context; } */; int -nop_abortop(struct vop_abortop_args *ap) +nop_readlink(struct vnop_readlink_args *ap) { - if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) { - char *tmp = ap->a_cnp->cn_pnbuf; - ap->a_cnp->cn_pnbuf = NULL; - ap->a_cnp->cn_flags &= ~HASBUF; - FREE_ZONE(tmp, ap->a_cnp->cn_pnlen, M_NAMEI); - } - return (0); } int -err_abortop(struct vop_abortop_args *ap) +err_readlink(struct vnop_readlink_args *ap) { - (void)nop_abortop(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_inactive_args /* { +struct vnop_inactive_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_inactive(struct vop_inactive_args *ap) +nop_inactive(struct vnop_inactive_args *ap) { - VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } int -err_inactive(struct vop_inactive_args *ap) +err_inactive(struct vnop_inactive_args *ap) { (void)nop_inactive(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_reclaim_args /* { +struct vnop_reclaim_args /* { struct vnode *a_vp; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_reclaim(struct vop_reclaim_args *ap) +nop_reclaim(struct vnop_reclaim_args *ap) { return (0); } int -err_reclaim(struct vop_reclaim_args *ap) +err_reclaim(struct vnop_reclaim_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_lock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -} */; - -int -nop_lock(struct vop_lock_args *ap) -{ - return (vop_nolock(ap)); -} - -int -err_lock(struct vop_lock_args *ap) -{ - (void)nop_lock(ap); - return (EOPNOTSUPP); -} - - -struct vop_unlock_args /* { - struct vnode *a_vp; - int a_flags; - struct proc *a_p; -} */; - -int -nop_unlock(struct vop_unlock_args *ap) -{ - return (vop_nounlock(ap)); -} - -int -err_unlock(struct vop_unlock_args *ap) -{ - (void)nop_unlock(ap); - return (EOPNOTSUPP); -} - - -struct vop_bmap_args /* { - struct vnode *vp; - daddr_t bn; - struct vnode **vpp; - daddr_t *bnp; - int *runp; -} */; - -int -nop_bmap(struct vop_bmap_args *ap) -{ - return (0); -} - -int -err_bmap(struct vop_bmap_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_strategy_args /* { +struct vnop_strategy_args /* { struct buf *a_bp; } */; int -nop_strategy(struct vop_strategy_args *ap) -{ - return (0); -} - -int -err_strategy(struct vop_strategy_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_print_args /* { - struct vnode *a_vp; -} */; - -int -nop_print(struct vop_print_args *ap) +nop_strategy(struct vnop_strategy_args *ap) { return (0); } int -err_print(struct vop_print_args *ap) +err_strategy(struct vnop_strategy_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_islocked_args /* { - struct vnode *a_vp; -} */; - -int -nop_islocked(struct vop_islocked_args *ap) -{ - return (vop_noislocked(ap)); -} - -int -err_islocked(struct vop_islocked_args *ap) -{ - (void)nop_islocked(ap); - return (EOPNOTSUPP); -} - - -struct vop_pathconf_args /* { +struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; register_t *a_retval; + vfs_context_t a_context; } */; int -nop_pathconf(struct vop_pathconf_args *ap) +nop_pathconf(struct vnop_pathconf_args *ap) { return (0); } int -err_pathconf(struct vop_pathconf_args *ap) +err_pathconf(struct vnop_pathconf_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_advlock_args /* { +struct vnop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; + vfs_context_t a_context; } */; int -nop_advlock(struct vop_advlock_args *ap) -{ - return (0); -} - -int -err_advlock(struct vop_advlock_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_blkatoff_args /* { - struct vnode *a_vp; - off_t a_offset; - char **a_res; - struct buf **a_bpp; -} */; - -int -nop_blkatoff(struct vop_blkatoff_args *ap) -{ - *ap->a_bpp = NULL; - return (0); -} - -int -err_blkatoff(struct vop_blkatoff_args *ap) -{ - (void)nop_blkatoff(ap); - return (EOPNOTSUPP); -} - - -struct vop_valloc_args /* { - struct vnode *a_pvp; - int a_mode; - struct ucred *a_cred; - struct vnode **a_vpp; -} */; - -int -nop_valloc(struct vop_valloc_args *ap) -{ - *ap->a_vpp = NULL; - return (0); -} - -int -err_valloc(struct vop_valloc_args *ap) -{ - (void)nop_valloc(ap); - return (EOPNOTSUPP); -} - - -struct vop_reallocblks_args /* { - struct vnode *a_vp; - struct cluster_save *a_buflist; -} */; - -int -nop_reallocblks(struct vop_reallocblks_args *ap) -{ - return (0); -} - -int -err_reallocblks(struct vop_reallocblks_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_vfree_args /* { - struct vnode *a_pvp; - ino_t a_ino; - int a_mode; -} */; - -int -nop_vfree(struct vop_vfree_args *ap) +nop_advlock(struct vnop_advlock_args *ap) { return (0); } int -err_vfree(struct vop_vfree_args *ap) +err_advlock(struct vnop_advlock_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_truncate_args /* { - struct vnode *a_vp; - off_t a_length; - int a_flags; - struct ucred *a_cred; - struct proc *a_p; -} */; - -int -nop_truncate(struct vop_truncate_args *ap) -{ - return (0); -} - -int -err_truncate(struct vop_truncate_args *ap) -{ - return (EOPNOTSUPP); -} - -struct vop_allocate_args /* { +struct vnop_allocate_args /* { struct vnode *a_vp; off_t a_length; u_int32_t a_flags; off_t *a_bytesallocated; off_t a_offset; - struct ucred *a_cred; - struct proc *a_p; + vfs_context_t a_context; } */; int -nop_allocate(struct vop_allocate_args *ap) +nop_allocate(struct vnop_allocate_args *ap) { *(ap->a_bytesallocated) = 0; return (0); } int -err_allocate(struct vop_allocate_args *ap) +err_allocate(struct vnop_allocate_args *ap) { (void)nop_allocate(ap); - return (EOPNOTSUPP); -} - - -struct vop_update_args /* { - struct vnode *a_vp; - struct timeval *a_access; - struct timeval *a_modify; - int a_waitfor; -} */; - -int -nop_update(struct vop_update_args *ap) -{ - return (0); -} - -int -err_update(struct vop_update_args *ap) -{ - return (EOPNOTSUPP); + return (ENOTSUP); } - -struct vop_pgrd_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; -} */; - -int -nop_pgrd(struct vop_pgrd_args *ap) -{ - return (0); -} - -int -err_pgrd(struct vop_pgrd_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_pgwr_args /* { - struct vnode *a_vp; - struct uio *a_uio; - struct ucred *a_cred; - vm_offset_t a_offset; -} */; - -int -nop_pgwr(struct vop_pgwr_args *ap) -{ - return (0); -} - -int -err_pgwr(struct vop_pgwr_args *ap) -{ - return (EOPNOTSUPP); -} - - -struct vop_bwrite_args /* { +struct vnop_bwrite_args /* { struct buf *a_bp; } */; int -nop_bwrite(struct vop_bwrite_args *ap) +nop_bwrite(struct vnop_bwrite_args *ap) { - return (bwrite(ap->a_bp)); + return ((int)buf_bwrite(ap->a_bp)); } int -err_bwrite(struct vop_bwrite_args *ap) +err_bwrite(struct vnop_bwrite_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_pagein_args /* { +struct vnop_pagein_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_foffset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */; int -nop_pagein(struct vop_pagein_args *ap) +nop_pagein(struct vnop_pagein_args *ap) { - ubc_upl_abort(ap->a_pl, UPL_ABORT_ERROR); - return (0); + if ( !(ap->a_flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); + return (EINVAL); } int -err_pagein(struct vop_pagein_args *ap) +err_pagein(struct vnop_pagein_args *ap) { - ubc_upl_abort(ap->a_pl, UPL_ABORT_ERROR); - return (EOPNOTSUPP); + if ( !(ap->a_flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); + return (ENOTSUP); } -struct vop_pageout_args /* { +struct vnop_pageout_args /* { struct vnode *a_vp, upl_t a_pl, vm_offset_t a_pl_offset, off_t a_foffset, size_t a_size, - struct ucred *a_cred, int a_flags + vfs_context_t a_context; } */; int -nop_pageout(struct vop_pageout_args *ap) +nop_pageout(struct vnop_pageout_args *ap) { - ubc_upl_abort(ap->a_pl, UPL_ABORT_ERROR); - return (0); + if ( !(ap->a_flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); + return (EINVAL); } int -err_pageout(struct vop_pageout_args *ap) +err_pageout(struct vnop_pageout_args *ap) { - ubc_upl_abort(ap->a_pl, UPL_ABORT_ERROR); - return (EOPNOTSUPP); + if ( !(ap->a_flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(ap->a_pl, ap->a_pl_offset, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); + return (ENOTSUP); } -struct vop_devblocksize_args /* { +struct vnop_devblocksize_args /* { struct vnode *a_vp; register_t *a_retval; } */; int -nop_devblocksize(struct vop_devblocksize_args *ap) +nop_devblocksize(struct vnop_devblocksize_args *ap) { /* XXX default value because the call sites do not check error */ *ap->a_retval = 512; @@ -1211,14 +827,14 @@ nop_devblocksize(struct vop_devblocksize_args *ap) } int -err_devblocksize(struct vop_devblocksize_args *ap) +err_devblocksize(struct vnop_devblocksize_args *ap) { (void)nop_devblocksize(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_searchfs /* { +struct vnop_searchfs /* { struct vnode *a_vp; void *a_searchparams1; void *a_searchparams2; @@ -1231,23 +847,24 @@ struct vop_searchfs /* { u_long a_options; struct uio *a_uio; struct searchstate *a_searchstate; + vfs_context_t a_context; } */; int -nop_searchfs(struct vop_searchfs_args *ap) +nop_searchfs(struct vnop_searchfs_args *ap) { *(ap->a_nummatches) = 0; return (0); } int -err_searchfs(struct vop_searchfs_args *ap) +err_searchfs(struct vnop_searchfs_args *ap) { (void)nop_searchfs(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_copyfile_args /*{ +struct vnop_copyfile_args /*{ struct vnodeop_desc *a_desc; struct vnode *a_fvp; struct vnode *a_tdvp; @@ -1257,83 +874,77 @@ struct vop_copyfile_args /*{ }*/; int -nop_copyfile(struct vop_copyfile_args *ap) +nop_copyfile(struct vnop_copyfile_args *ap) { - if (ap->a_tdvp == ap->a_tvp) - vrele(ap->a_tdvp); - else - vput(ap->a_tdvp); - if (ap->a_tvp) - vput(ap->a_tvp); - vrele(ap->a_fvp); return (0); } int -err_copyfile(struct vop_copyfile_args *ap) +err_copyfile(struct vnop_copyfile_args *ap) { (void)nop_copyfile(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_blktooff_args /* { +struct vnop_blktooff_args /* { struct vnode *a_vp; - daddr_t a_lblkno; + daddr64_t a_lblkno; off_t *a_offset; } */; int -nop_blktooff(struct vop_blktooff_args *ap) +nop_blktooff(struct vnop_blktooff_args *ap) { *ap->a_offset = (off_t)-1; /* failure */ return (0); } int -err_blktooff(struct vop_blktooff_args *ap) +err_blktooff(struct vnop_blktooff_args *ap) { (void)nop_blktooff(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_offtoblk_args /* { +struct vnop_offtoblk_args /* { struct vnode *a_vp; off_t a_offset; - daddr_t *a_lblkno; + daddr64_t *a_lblkno; } */; int -nop_offtoblk(struct vop_offtoblk_args *ap) +nop_offtoblk(struct vnop_offtoblk_args *ap) { - *ap->a_lblkno = (daddr_t)-1; /* failure */ + *ap->a_lblkno = (daddr64_t)-1; /* failure */ return (0); } int -err_offtoblk(struct vop_offtoblk_args *ap) +err_offtoblk(struct vnop_offtoblk_args *ap) { (void)nop_offtoblk(ap); - return (EOPNOTSUPP); + return (ENOTSUP); } -struct vop_cmap_args /* { +struct vnop_blockmap_args /* { struct vnode *a_vp; off_t a_foffset; size_t a_size; - daddr_t *a_bpn; + daddr64_t *a_bpn; size_t *a_run; void *a_poff; + int a_flags; } */; -int nop_cmap(struct vop_cmap_args *ap) +int nop_blockmap(struct vnop_blockmap_args *ap) { return (0); } -int err_cmap(struct vop_cmap_args *ap) +int err_blockmap(struct vnop_blockmap_args *ap) { - return (EOPNOTSUPP); + return (ENOTSUP); } diff --git a/bsd/vfs/vfs_support.h b/bsd/vfs/vfs_support.h index 7eac9f21e..9e49a68a1 100644 --- a/bsd/vfs/vfs_support.h +++ b/bsd/vfs/vfs_support.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,196 +37,150 @@ #ifndef _VFS_VFS_SUPPORT_H_ #define _VFS_VFS_SUPPORT_H_ +#include #include #include -#include #include #include #include #include -#include #include #include #include #include +#include #include #include #include -extern int nop_create(struct vop_create_args *ap); -extern int err_create(struct vop_create_args *ap); +__BEGIN_DECLS +extern int nop_create(struct vnop_create_args *ap); +extern int err_create(struct vnop_create_args *ap); -extern int nop_whiteout(struct vop_whiteout_args *ap); -extern int err_whiteout(struct vop_whiteout_args *ap); +extern int nop_whiteout(struct vnop_whiteout_args *ap); +extern int err_whiteout(struct vnop_whiteout_args *ap); -extern int nop_mknod(struct vop_mknod_args *ap); -extern int err_mknod(struct vop_mknod_args *ap); +extern int nop_mknod(struct vnop_mknod_args *ap); +extern int err_mknod(struct vnop_mknod_args *ap); -extern int nop_mkcomplex(struct vop_mkcomplex_args *ap); -extern int err_mkcomplex(struct vop_mkcomplex_args *ap); +extern int nop_open(struct vnop_open_args *ap); +extern int err_open(struct vnop_open_args *ap); -extern int nop_open(struct vop_open_args *ap); -extern int err_open(struct vop_open_args *ap); +extern int nop_close(struct vnop_close_args *ap); +extern int err_close(struct vnop_close_args *ap); -extern int nop_close(struct vop_close_args *ap); -extern int err_close(struct vop_close_args *ap); +extern int nop_access(struct vnop_access_args *ap); +extern int err_access(struct vnop_access_args *ap); -extern int nop_access(struct vop_access_args *ap); -extern int err_access(struct vop_access_args *ap); +extern int nop_getattr(struct vnop_getattr_args *ap); +extern int err_getattr(struct vnop_getattr_args *ap); -extern int nop_getattr(struct vop_getattr_args *ap); -extern int err_getattr(struct vop_getattr_args *ap); +extern int nop_setattr(struct vnop_setattr_args *ap); +extern int err_setattr(struct vnop_setattr_args *ap); -extern int nop_setattr(struct vop_setattr_args *ap); -extern int err_setattr(struct vop_setattr_args *ap); +extern int nop_getattrlist(struct vnop_getattrlist_args *ap); +extern int err_getattrlist(struct vnop_getattrlist_args *ap); -extern int nop_getattrlist(struct vop_getattrlist_args *ap); -extern int err_getattrlist(struct vop_getattrlist_args *ap); +extern int nop_setattrlist(struct vnop_setattrlist_args *ap); +extern int err_setattrlist(struct vnop_setattrlist_args *ap); -extern int nop_setattrlist(struct vop_setattrlist_args *ap); -extern int err_setattrlist(struct vop_setattrlist_args *ap); +extern int nop_read(struct vnop_read_args *ap); +extern int err_read(struct vnop_read_args *ap); -extern int nop_read(struct vop_read_args *ap); -extern int err_read(struct vop_read_args *ap); +extern int nop_write(struct vnop_write_args *ap); +extern int err_write(struct vnop_write_args *ap); -extern int nop_write(struct vop_write_args *ap); -extern int err_write(struct vop_write_args *ap); +extern int nop_ioctl(struct vnop_ioctl_args *ap); +extern int err_ioctl(struct vnop_ioctl_args *ap); -extern int nop_lease(struct vop_lease_args *ap); -extern int err_lease(struct vop_lease_args *ap); +extern int nop_select(struct vnop_select_args *ap); +extern int err_select(struct vnop_select_args *ap); -extern int nop_ioctl(struct vop_ioctl_args *ap); -extern int err_ioctl(struct vop_ioctl_args *ap); +extern int nop_exchange(struct vnop_exchange_args *ap); +extern int err_exchange(struct vnop_exchange_args *ap); -extern int nop_select(struct vop_select_args *ap); -extern int err_select(struct vop_select_args *ap); +extern int nop_revoke(struct vnop_revoke_args *ap); +extern int err_revoke(struct vnop_revoke_args *ap); -extern int nop_exchange(struct vop_exchange_args *ap); -extern int err_exchange(struct vop_exchange_args *ap); +extern int nop_mmap(struct vnop_mmap_args *ap); +extern int err_mmap(struct vnop_mmap_args *ap); -extern int nop_revoke(struct vop_revoke_args *ap); -extern int err_revoke(struct vop_revoke_args *ap); +extern int nop_fsync(struct vnop_fsync_args *ap); +extern int err_fsync(struct vnop_fsync_args *ap); -extern int nop_mmap(struct vop_mmap_args *ap); -extern int err_mmap(struct vop_mmap_args *ap); +extern int nop_remove(struct vnop_remove_args *ap); +extern int err_remove(struct vnop_remove_args *ap); -extern int nop_fsync(struct vop_fsync_args *ap); -extern int err_fsync(struct vop_fsync_args *ap); +extern int nop_link(struct vnop_link_args *ap); +extern int err_link(struct vnop_link_args *ap); -extern int nop_seek(struct vop_seek_args *ap); -extern int err_seek(struct vop_seek_args *ap); +extern int nop_rename(struct vnop_rename_args *ap); +extern int err_rename(struct vnop_rename_args *ap); -extern int nop_remove(struct vop_remove_args *ap); -extern int err_remove(struct vop_remove_args *ap); +extern int nop_mkdir(struct vnop_mkdir_args *ap); +extern int err_mkdir(struct vnop_mkdir_args *ap); -extern int nop_link(struct vop_link_args *ap); -extern int err_link(struct vop_link_args *ap); +extern int nop_rmdir(struct vnop_rmdir_args *ap); +extern int err_rmdir(struct vnop_rmdir_args *ap); -extern int nop_rename(struct vop_rename_args *ap); -extern int err_rename(struct vop_rename_args *ap); +extern int nop_symlink(struct vnop_symlink_args *ap); +extern int err_symlink(struct vnop_symlink_args *ap); -extern int nop_mkdir(struct vop_mkdir_args *ap); -extern int err_mkdir(struct vop_mkdir_args *ap); +extern int nop_readdir(struct vnop_readdir_args *ap); +extern int err_readdir(struct vnop_readdir_args *ap); -extern int nop_rmdir(struct vop_rmdir_args *ap); -extern int err_rmdir(struct vop_rmdir_args *ap); +extern int nop_readdirattr(struct vnop_readdirattr_args *ap); +extern int err_readdirattr(struct vnop_readdirattr_args *ap); -extern int nop_symlink(struct vop_symlink_args *ap); -extern int err_symlink(struct vop_symlink_args *ap); +extern int nop_readlink(struct vnop_readlink_args *ap); +extern int err_readlink(struct vnop_readlink_args *ap); -extern int nop_readdir(struct vop_readdir_args *ap); -extern int err_readdir(struct vop_readdir_args *ap); +extern int nop_inactive(struct vnop_inactive_args *ap); +extern int err_inactive(struct vnop_inactive_args *ap); -extern int nop_readdirattr(struct vop_readdirattr_args *ap); -extern int err_readdirattr(struct vop_readdirattr_args *ap); +extern int nop_reclaim(struct vnop_reclaim_args *ap); +extern int err_reclaim(struct vnop_reclaim_args *ap); -extern int nop_readlink(struct vop_readlink_args *ap); -extern int err_readlink(struct vop_readlink_args *ap); -extern int nop_abortop(struct vop_abortop_args *ap); -extern int err_abortop(struct vop_abortop_args *ap); +extern int nop_strategy(struct vnop_strategy_args *ap); +extern int err_strategy(struct vnop_strategy_args *ap); -extern int nop_inactive(struct vop_inactive_args *ap); -extern int err_inactive(struct vop_inactive_args *ap); +extern int nop_pathconf(struct vnop_pathconf_args *ap); +extern int err_pathconf(struct vnop_pathconf_args *ap); -extern int nop_reclaim(struct vop_reclaim_args *ap); -extern int err_reclaim(struct vop_reclaim_args *ap); +extern int nop_advlock(struct vnop_advlock_args *ap); +extern int err_advlock(struct vnop_advlock_args *ap); -extern int nop_lock(struct vop_lock_args *ap); -extern int err_lock(struct vop_lock_args *ap); -extern int nop_unlock(struct vop_unlock_args *ap); -extern int err_unlock(struct vop_unlock_args *ap); +extern int nop_allocate(struct vnop_allocate_args *ap); +extern int err_allocate(struct vnop_allocate_args *ap); -extern int nop_bmap(struct vop_bmap_args *ap); -extern int err_bmap(struct vop_bmap_args *ap); +extern int nop_bwrite(struct vnop_bwrite_args *ap); +extern int err_bwrite(struct vnop_bwrite_args *ap); -extern int nop_strategy(struct vop_strategy_args *ap); -extern int err_strategy(struct vop_strategy_args *ap); +extern int nop_pagein(struct vnop_pagein_args *ap); +extern int err_pagein(struct vnop_pagein_args *ap); -extern int nop_print(struct vop_print_args *ap); -extern int err_print(struct vop_print_args *ap); +extern int nop_pageout(struct vnop_pageout_args *ap); +extern int err_pageout(struct vnop_pageout_args *ap); -extern int nop_islocked(struct vop_islocked_args *ap); -extern int err_islocked(struct vop_islocked_args *ap); +extern int nop_devblocksize(struct vnop_devblocksize_args *ap); +extern int err_devblocksize(struct vnop_devblocksize_args *ap); -extern int nop_pathconf(struct vop_pathconf_args *ap); -extern int err_pathconf(struct vop_pathconf_args *ap); +extern int nop_searchfs(struct vnop_searchfs_args *ap); +extern int err_searchfs(struct vnop_searchfs_args *ap); -extern int nop_advlock(struct vop_advlock_args *ap); -extern int err_advlock(struct vop_advlock_args *ap); +extern int nop_copyfile(struct vnop_copyfile_args *ap); +extern int err_copyfile(struct vnop_copyfile_args *ap); -extern int nop_blkatoff(struct vop_blkatoff_args *ap); -extern int err_blkatoff(struct vop_blkatoff_args *ap); +extern int nop_blktooff(struct vnop_blktooff_args *ap); +extern int err_blktooff(struct vnop_blktooff_args *ap); -extern int nop_valloc(struct vop_valloc_args *ap); -extern int err_valloc(struct vop_valloc_args *ap); +extern int nop_offtoblk(struct vnop_offtoblk_args *ap); +extern int err_offtoblk(struct vnop_offtoblk_args *ap); -extern int nop_reallocblks(struct vop_reallocblks_args *ap); -extern int err_reallocblks(struct vop_reallocblks_args *ap); +extern int nop_blockmap(struct vnop_blockmap_args *ap); +extern int err_blockmap(struct vnop_blockmap_args *ap); +__END_DECLS -extern int nop_vfree(struct vop_vfree_args *ap); -extern int err_vfree(struct vop_vfree_args *ap); - -extern int nop_truncate(struct vop_truncate_args *ap); -extern int err_truncate(struct vop_truncate_args *ap); - -extern int nop_allocate(struct vop_allocate_args *ap); -extern int err_allocate(struct vop_allocate_args *ap); - -extern int nop_update(struct vop_update_args *ap); -extern int err_update(struct vop_update_args *ap); - -extern int nop_pgrd(struct vop_pgrd_args *ap); -extern int err_pgrd(struct vop_pgrd_args *ap); - -extern int nop_pgwr(struct vop_pgwr_args *ap); -extern int err_pgwr(struct vop_pgwr_args *ap); - -extern int nop_bwrite(struct vop_bwrite_args *ap); -extern int err_bwrite(struct vop_bwrite_args *ap); - -extern int nop_pagein(struct vop_pagein_args *ap); -extern int err_pagein(struct vop_pagein_args *ap); - -extern int nop_pageout(struct vop_pageout_args *ap); -extern int err_pageout(struct vop_pageout_args *ap); - -extern int nop_devblocksize(struct vop_devblocksize_args *ap); -extern int err_devblocksize(struct vop_devblocksize_args *ap); - -extern int nop_searchfs(struct vop_searchfs_args *ap); -extern int err_searchfs(struct vop_searchfs_args *ap); - -extern int nop_copyfile(struct vop_copyfile_args *ap); -extern int err_copyfile(struct vop_copyfile_args *ap); - -extern int nop_blktooff(struct vop_blktooff_args *ap); -extern int err_blktooff(struct vop_blktooff_args *ap); - -extern int nop_offtoblk(struct vop_offtoblk_args *ap); -extern int err_offtoblk(struct vop_offtoblk_args *ap); - -extern int nop_cmap(struct vop_cmap_args *ap); -extern int err_cmap(struct vop_cmap_args *ap); #endif /* _VFS_VFS_SUPPORT_H_ */ diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index a44f963af..5675ce21f 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1995-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,28 +64,41 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include +#include +#include + +#include #include +#include -struct lock__bsd__ exchangelock; /* * The currently logged-in user, for ownership of files/directories whose on-disk @@ -93,14 +106,61 @@ struct lock__bsd__ exchangelock; */ uid_t console_user; -static int change_dir __P((struct nameidata *ndp, struct proc *p)); -static void checkdirs __P((struct vnode *olddp)); -static void enablequotas __P((struct proc *p, struct mount *mp)); -void notify_filemod_watchers(struct vnode *vp, struct proc *p); +static int change_dir(struct nameidata *ndp, vfs_context_t ctx); +static void checkdirs(struct vnode *olddp, vfs_context_t ctx); +void enablequotas(struct mount *mp, vfs_context_t ctx); +static int getfsstat_callback(mount_t mp, void * arg); +static int getutimes(user_addr_t usrtvp, struct timespec *tsp); +static int setutimes(vfs_context_t ctx, struct vnode *vp, const struct timespec *ts, int nullflag); +static int sync_callback(mount_t, void *); +static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, + boolean_t partial_copy); + +__private_extern__ int sync_internal(void); + +#ifdef __APPLE_API_OBSOLETE +struct fstatv_args { + int fd; /* file descriptor of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; +struct lstatv_args { + const char *path; /* pathname of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; +struct mkcomplex_args { + const char *path; /* pathname of the file to be created */ + mode_t mode; /* access mode for the newly created file */ + u_long type; /* format of the complex file */ +}; +struct statv_args { + const char *path; /* pathname of the target file */ + struct vstat *vsb; /* vstat structure for returned info */ +}; + +int fstatv(struct proc *p, struct fstatv_args *uap, register_t *retval); +int lstatv(struct proc *p, struct lstatv_args *uap, register_t *retval); +int mkcomplex(struct proc *p, struct mkcomplex_args *uap, register_t *retval); +int statv(struct proc *p, struct statv_args *uap, register_t *retval); + +#endif /* __APPLE_API_OBSOLETE */ + +#if UNION +extern int (**union_vnodeop_p)(void *); +extern struct vnode *union_dircache(struct vnode*, struct proc*); +#endif /* UNION */ /* counts number of mount and unmount operations */ unsigned int vfs_nummntops=0; +extern struct fileops vnops; + +extern void mount_list_add(mount_t mp); +extern void mount_list_remove(mount_t mp); +extern int mount_refdrain(mount_t mp); +extern int vcount(struct vnode *vp); + + /* * Virtual File System System Calls */ @@ -108,36 +168,40 @@ unsigned int vfs_nummntops=0; /* * Mount a file system. */ -struct mount_args { - char *type; - char *path; - int flags; - caddr_t data; -}; /* ARGSUSED */ int -mount(p, uap, retval) - struct proc *p; - register struct mount_args *uap; - register_t *retval; +mount(struct proc *p, register struct mount_args *uap, __unused register_t *retval) { struct vnode *vp; + struct vnode *devvp = NULLVP; + struct vnode *device_vnode = NULLVP; struct mount *mp; - struct vfsconf *vfsp; - int error, flag, err2; - struct vattr va; - u_long fstypenum; + struct vfstable *vfsp; + int error, flag = 0; + struct vnode_attr va; + struct vfs_context context; struct nameidata nd; + struct nameidata nd1; char fstypename[MFSNAMELEN]; size_t dummy=0; + user_addr_t devpath = USER_ADDR_NULL; + user_addr_t fsmountargs = uap->data; + int ronly = 0; + int mntalloc = 0; + mode_t accessmode; + boolean_t is_64bit; AUDIT_ARG(fflags, uap->flags); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + is_64bit = proc_is64bit(p); + /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); @@ -149,46 +213,44 @@ mount(p, uap, retval) if (uap->flags & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { - vput(vp); - return (EINVAL); + error = EINVAL; + goto out1; } mp = vp->v_mount; - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(vp); - return (EBUSY); + /* unmount in progress return error */ + mount_lock(mp); + if (mp->mnt_lflag & MNT_LUNMOUNT) { + mount_unlock(mp); + error = EBUSY; + goto out1; } + mount_unlock(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((uap->flags & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { - vfs_unbusy(mp, p); - vput(vp); - return (EOPNOTSUPP); /* Needs translation */ + lck_rw_done(&mp->mnt_rwlock); + error = ENOTSUP; + goto out1; } /* * Only root, or the user that did the original mount is * permitted to update it. */ - if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && - (error = suser(p->p_ucred, &p->p_acflag))) { - vfs_unbusy(mp, p); - vput(vp); - return (error); + if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(context.vc_ucred) && + (error = suser(context.vc_ucred, &p->p_acflag))) { + lck_rw_done(&mp->mnt_rwlock); + goto out1; } /* - * Do not allow NFS export by non-root users. FOr non-root - * users, silently enforce MNT_NOSUID and MNT_NODEV, and - * MNT_NOEXEC if mount point is already MNT_NOEXEC. + * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, + * and MNT_NOEXEC if mount point is already MNT_NOEXEC. */ - if (p->p_ucred->cr_uid != 0) { - if (uap->flags & MNT_EXPORTED) { - vfs_unbusy(mp, p); - vput(vp); - return (EPERM); - } + if (suser(context.vc_ucred, NULL)) { uap->flags |= MNT_NOSUID | MNT_NODEV; if (mp->mnt_flag & MNT_NOEXEC) uap->flags |= MNT_NOEXEC; @@ -198,81 +260,58 @@ mount(p, uap, retval) mp->mnt_flag |= uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); - VOP_UNLOCK(vp, 0, p); - + vfsp = mp->mnt_vtable; goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ - if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || - (va.va_uid != p->p_ucred->cr_uid && - (error = suser(p->p_ucred, &p->p_acflag)))) { - vput(vp); - return (error); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if ((error = vnode_getattr(vp, &va, &context)) || + (va.va_uid != kauth_cred_getuid(context.vc_ucred) && + (error = suser(context.vc_ucred, &p->p_acflag)))) { + goto out1; } /* - * Do not allow NFS export by non-root users. FOr non-root - * users, silently enforce MNT_NOSUID and MNT_NODEV, and + * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and * MNT_NOEXEC if mount point is already MNT_NOEXEC. */ - if (p->p_ucred->cr_uid != 0) { - if (uap->flags & MNT_EXPORTED) { - vput(vp); - return (EPERM); - } + if (suser(context.vc_ucred, NULL)) { uap->flags |= MNT_NOSUID | MNT_NODEV; if (vp->v_mount->mnt_flag & MNT_NOEXEC) uap->flags |= MNT_NOEXEC; } - if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) { - vput(vp); - return (error); - } + if ( (error = VNOP_FSYNC(vp, MNT_WAIT, &context)) ) + goto out1; + + if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) ) + goto out1; + if (vp->v_type != VDIR) { - vput(vp); - return (ENOTDIR); - } -#if COMPAT_43 - /* - * Historically filesystem types were identified by number. If we - * get an integer for the filesystem type instead of a string, we - * check to see if it matches one of the historic filesystem types. - */ - fstypenum = (u_long)uap->type; - if (fstypenum < maxvfsconf) { - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (vfsp->vfc_typenum == fstypenum) - break; - if (vfsp == NULL) { - vput(vp); - return (ENODEV); - } - strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); - } else -#endif /* COMPAT_43 */ - if (error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy)) { - vput(vp); - return (error); + error = ENOTDIR; + goto out1; } + if ( (error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy)) ) + goto out1; + /* XXXAUDIT: Should we capture the type on the error path as well? */ AUDIT_ARG(text, fstypename); + mount_list_lock(); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; + mount_list_unlock(); if (vfsp == NULL) { - vput(vp); - return (ENODEV); + error = ENODEV; + goto out1; } - simple_lock(&vp->v_interlock); if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) { - simple_unlock(&vp->v_interlock); - vput(vp); - return (EBUSY); + error = EBUSY; + goto out1; } SET(vp->v_flag, VMOUNT); - simple_unlock(&vp->v_interlock); /* * Allocate and initialize the filesystem. @@ -280,23 +319,35 @@ mount(p, uap, retval) MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); + mntalloc = 1; /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; - - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); - (void)vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; + mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; + mp->mnt_devblocksize = DEV_BSIZE; + + TAILQ_INIT(&mp->mnt_vnodelist); + TAILQ_INIT(&mp->mnt_workerqueue); + TAILQ_INIT(&mp->mnt_newvnodes); + mount_lock_init(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); mp->mnt_op = vfsp->vfc_vfsops; - mp->mnt_vfc = vfsp; + mp->mnt_vtable = vfsp; + mount_list_lock(); vfsp->vfc_refcount++; - mp->mnt_stat.f_type = vfsp->vfc_typenum; + mount_list_unlock(); + //mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; - strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); + strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); mp->mnt_vnodecovered = vp; - mp->mnt_stat.f_owner = p->p_ucred->cr_uid; - VOP_UNLOCK(vp, 0, p); + mp->mnt_vfsstat.f_owner = kauth_cred_getuid(context.vc_ucred); + /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ + vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE); + update: /* * Set the mount level flags. @@ -310,14 +361,107 @@ update: MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED); mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | - MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED); + MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | MNT_AUTOMOUNTED | + MNT_DEFWRITE); + + if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) { + if (is_64bit) { + if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) ) + goto out1; + fsmountargs += sizeof(devpath); + } else { + char *tmp; + if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) + goto out1; + /* munge into LP64 addr */ + devpath = CAST_USER_ADDR_T(tmp); + fsmountargs += sizeof(tmp); + } + + /* if it is not update and device name needs to be parsed */ + if ((devpath)) { + NDINIT(&nd1, LOOKUP, FOLLOW, UIO_USERSPACE, devpath, &context); + if ( (error = namei(&nd1)) ) + goto out1; + + strncpy(mp->mnt_vfsstat.f_mntfromname, nd1.ni_cnd.cn_pnbuf, MAXPATHLEN); + devvp = nd1.ni_vp; + + nameidone(&nd1); + + if (devvp->v_type != VBLK) { + error = ENOTBLK; + goto out2; + } + if (major(devvp->v_rdev) >= nblkdev) { + error = ENXIO; + goto out2; + } + /* + * If mount by non-root, then verify that user has necessary + * permissions on the device. + */ + if (suser(context.vc_ucred, NULL) != 0) { + accessmode = KAUTH_VNODE_READ_DATA; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + accessmode |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(devvp, NULL, accessmode, &context)) != 0) + goto out2; + } + } + if (devpath && ((uap->flags & MNT_UPDATE) == 0)) { + if ( (error = vnode_ref(devvp)) ) + goto out2; + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if ( (error = vfs_mountedon(devvp)) ) + goto out3; + + if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) { + error = EBUSY; + goto out3; + } + if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, &context)) ) { + error = ENOTBLK; + goto out3; + } + if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) ) + goto out3; + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, &context)) ) + goto out3; + + mp->mnt_devvp = devvp; + device_vnode = devvp; + } else { + if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { + /* + * If upgrade to read-write by non-root, then verify + * that user has necessary permissions on the device. + */ + device_vnode = mp->mnt_devvp; + if (device_vnode && suser(context.vc_ucred, NULL)) { + if ((error = vnode_authorize(device_vnode, NULL, + KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, &context)) != 0) + goto out2; + } + } + device_vnode = NULLVP; + } + } + + /* * Mount the filesystem. */ - error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p); + error = VFS_MOUNT(mp, device_vnode, fsmountargs, &context); if (uap->flags & MNT_UPDATE) { - vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ @@ -325,73 +469,102 @@ update: mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) mp->mnt_flag = flag; - vfs_unbusy(mp, p); + vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL); + lck_rw_done(&mp->mnt_rwlock); if (!error) - enablequotas(p, mp); - return (error); + enablequotas(mp,&context); + goto out2; } - - /* get the vnode lock */ - err2 = vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, p); - /* * Put the new filesystem on the mount list after root. */ - cache_purge(vp); - if (!error && !err2) { - simple_lock(&vp->v_interlock); + if (!error) { CLR(vp->v_flag, VMOUNT); - vp->v_mountedhere =mp; - simple_unlock(&vp->v_interlock); - simple_lock(&mountlist_slock); - CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - simple_unlock(&mountlist_slock); - vfs_event_signal(NULL, VQ_MOUNT, NULL); - checkdirs(vp); - VOP_UNLOCK(vp, 0, p); - vfs_unbusy(mp, p); - if (error = VFS_START(mp, 0, p)) - vrele(vp); + + vnode_lock(vp); + vp->v_mountedhere = mp; + vnode_unlock(vp); + + vnode_ref(vp); + + vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL); + checkdirs(vp, &context); + lck_rw_done(&mp->mnt_rwlock); + mount_list_add(mp); + /* + * there is no cleanup code here so I have made it void + * we need to revisit this + */ + (void)VFS_START(mp, 0, &context); /* increment the operations count */ - if (!error) { - vfs_nummntops++; - enablequotas(p, mp); - } + OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + enablequotas(mp,&context); + + if (device_vnode) { + device_vnode->v_specflags |= SI_MOUNTEDON; + + /* + * cache the IO attributes for the underlying physical media... + * an error return indicates the underlying driver doesn't + * support all the queries necessary... however, reasonable + * defaults will have been set, so no reason to bail or care + */ + vfs_init_io_attributes(device_vnode, mp); + } } else { - simple_lock(&vp->v_interlock); CLR(vp->v_flag, VMOUNT); - simple_unlock(&vp->v_interlock); - mp->mnt_vfc->vfc_refcount--; + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - vfs_unbusy(mp, p); + if (device_vnode ) { + VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, &context); + vnode_rele(device_vnode); + } + lck_rw_done(&mp->mnt_rwlock); + mount_lock_destroy(mp); FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); - if (err2) - vrele(vp); - else - vput(vp); } - return (error); + nameidone(&nd); + + /* + * drop I/O count on covered 'vp' and + * on the device vp if there was one + */ + if (devpath && devvp) + vnode_put(devvp); + vnode_put(vp); + + return(error); + +out3: + vnode_rele(devvp); +out2: + if (devpath && devvp) + vnode_put(devvp); +out1: + if (mntalloc) + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + vnode_put(vp); + nameidone(&nd); + + return(error); + } -static void -enablequotas(p, mp) - struct proc *p; - struct mount *mp; +void +enablequotas(struct mount *mp, vfs_context_t context) { - struct vnode *vp; struct nameidata qnd; int type; char qfpath[MAXPATHLEN]; - char *qfname = QUOTAFILENAME; - char *qfopsname = QUOTAOPSNAME; - char *qfextension[] = INITQFNAMES; - + const char *qfname = QUOTAFILENAME; + const char *qfopsname = QUOTAOPSNAME; + const char *qfextension[] = INITQFNAMES; - if ((strcmp(mp->mnt_stat.f_fstypename, "hfs") != 0 ) - && (strcmp( mp->mnt_stat.f_fstypename, "ufs") != 0)) + if ((strcmp(mp->mnt_vfsstat.f_fstypename, "hfs") != 0 ) + && (strcmp( mp->mnt_vfsstat.f_fstypename, "ufs") != 0)) return; /* @@ -399,21 +572,15 @@ enablequotas(p, mp) * We ignore errors as this should not interfere with final mount */ for (type=0; type < MAXQUOTAS; type++) { - sprintf(qfpath, "%s/%s.%s", mp->mnt_stat.f_mntonname, qfopsname, qfextension[type]); - NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, qfpath, p); - if (namei(&qnd) != 0) - continue; /* option file to trigger quotas is not present */ - vp = qnd.ni_vp; - sprintf(qfpath, "%s/%s.%s", mp->mnt_stat.f_mntonname, qfname, qfextension[type]); - if (vp->v_tag == VT_HFS) { - vrele(vp); - (void)hfs_quotaon(p, mp, type, qfpath, UIO_SYSSPACE); - } else if (vp->v_tag == VT_UFS) { - vrele(vp); - (void)quotaon(p, mp, type, qfpath, UIO_SYSSPACE); - } else { - vrele(vp); - } + sprintf(qfpath, "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]); + NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), context); + if (namei(&qnd) != 0) + continue; /* option file to trigger quotas is not present */ + vnode_put(qnd.ni_vp); + nameidone(&qnd); + sprintf(qfpath, "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]); + + (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, context); } return; } @@ -424,40 +591,67 @@ enablequotas(p, mp) * mounted. If so, replace them with the new mount point. */ static void -checkdirs(olddp) +checkdirs(olddp, context) struct vnode *olddp; + vfs_context_t context; { struct filedesc *fdp; struct vnode *newdp; struct proc *p; struct vnode *tvp; + struct vnode *fdp_cvp; + struct vnode *fdp_rvp; + int cdir_changed = 0; + int rdir_changed = 0; + boolean_t funnel_state; if (olddp->v_usecount == 1) return; - if (VFS_ROOT(olddp->v_mountedhere, &newdp)) + if (VFS_ROOT(olddp->v_mountedhere, &newdp, context)) panic("mount: lost mount"); + funnel_state = thread_funnel_set(kernel_flock, TRUE); + for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { + proc_fdlock(p); fdp = p->p_fd; - if (fdp->fd_cdir == olddp) { - VREF(newdp); + if (fdp == (struct filedesc *)0) { + proc_fdunlock(p); + continue; + } + fdp_cvp = fdp->fd_cdir; + fdp_rvp = fdp->fd_rdir; + proc_fdunlock(p); + + if (fdp_cvp == olddp) { + vnode_ref(newdp); tvp = fdp->fd_cdir; - fdp->fd_cdir = newdp; - vrele(tvp); + fdp_cvp = newdp; + cdir_changed = 1; + vnode_rele(tvp); } - if (fdp->fd_rdir == olddp) { - VREF(newdp); + if (fdp_rvp == olddp) { + vnode_ref(newdp); tvp = fdp->fd_rdir; - fdp->fd_rdir = newdp; - vrele(tvp); + fdp_rvp = newdp; + rdir_changed = 1; + vnode_rele(tvp); + } + if (cdir_changed || rdir_changed) { + proc_fdlock(p); + fdp->fd_cdir = fdp_cvp; + fdp->fd_rdir = fdp_rvp; + proc_fdunlock(p); } } if (rootvnode == olddp) { - VREF(newdp); + vnode_ref(newdp); tvp = rootvnode; rootvnode = newdp; - vrele(tvp); + vnode_rele(tvp); } - vput(newdp); + thread_funnel_set(kernel_flock, funnel_state); + + vnode_put(newdp); } /* @@ -466,43 +660,43 @@ checkdirs(olddp) * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ -struct unmount_args { - char *path; - int flags; -}; /* ARGSUSED */ int -unmount(p, uap, retval) - struct proc *p; - register struct unmount_args *uap; - register_t *retval; +unmount(struct proc *p, register struct unmount_args *uap, __unused register_t *retval) { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; mp = vp->v_mount; + nameidone(&nd); /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { - vput(vp); + vnode_put(vp); return (EINVAL); } - vput(vp); + vnode_put(vp); return (safedounmount(mp, uap->flags, p)); } /* * Do the actual file system unmount, prevent some common foot shooting. + * + * XXX Should take a "vfs_context_t" instead of a "struct proc *" */ int safedounmount(mp, flags, p) @@ -516,8 +710,8 @@ safedounmount(mp, flags, p) * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ - if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && - (error = suser(p->p_ucred, &p->p_acflag))) + if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) && + (error = suser(kauth_cred_get(), &p->p_acflag))) return (error); /* @@ -538,18 +732,27 @@ dounmount(mp, flags, p) int flags; struct proc *p; { - struct vnode *coveredvp; + struct vnode *coveredvp = (vnode_t)0; int error; + int needwakeup = 0; + struct vfs_context context; + int forcedunmount = 0; + int lflags = 0; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - simple_lock(&mountlist_slock); + if (flags & MNT_FORCE) + forcedunmount = 1; + mount_lock(mp); /* XXX post jaguar fix LK_DRAIN - then clean this up */ - if ((flags & MNT_FORCE)) + if ((flags & MNT_FORCE)) { mp->mnt_kern_flag |= MNTK_FRCUNMOUNT; - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - simple_unlock(&mountlist_slock); - mp->mnt_kern_flag |= MNTK_MWAIT; - if ((error = tsleep((void *)mp, PRIBIO, "dounmount", 0))) - return (error); + mp->mnt_lflag |= MNT_LFORCE; + } + if (mp->mnt_lflag & MNT_LUNMOUNT) { + mp->mnt_lflag |= MNT_LWAIT; + msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", 0 ); /* * The prior unmount attempt has probably succeeded. * Do not dereference mp here - returning EBUSY is safest. @@ -557,54 +760,149 @@ dounmount(mp, flags, p) return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; - error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, - &mountlist_slock, p); - if (error) { + mp->mnt_lflag |= MNT_LUNMOUNT; + mp->mnt_flag &=~ MNT_ASYNC; + mount_unlock(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); + fsevent_unmount(mp); /* has to come first! */ + error = 0; + if (forcedunmount == 0) { + ubc_umount(mp); /* release cached vnodes */ + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + error = VFS_SYNC(mp, MNT_WAIT, &context); + if (error) { + mount_lock(mp); + mp->mnt_kern_flag &= ~MNTK_UNMOUNT; + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; + goto out; + } + } + } + + if (forcedunmount) + lflags |= FORCECLOSE; + error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags); + if ((forcedunmount == 0) && error) { + mount_lock(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; goto out; } - mp->mnt_flag &=~ MNT_ASYNC; - ubc_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - if (((mp->mnt_flag & MNT_RDONLY) || - (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || - (flags & MNT_FORCE)) - error = VFS_UNMOUNT(mp, flags, p); - simple_lock(&mountlist_slock); + + /* make sure there are no one in the mount iterations or lookup */ + mount_iterdrain(mp); + + error = VFS_UNMOUNT(mp, flags, &context); if (error) { + mount_iterreset(mp); + mount_lock(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, - &mountlist_slock, p); + mp->mnt_lflag &= ~MNT_LUNMOUNT; + mp->mnt_lflag &= ~MNT_LFORCE; goto out; } /* increment the operations count */ if (!error) - vfs_nummntops++; - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + + if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) { + mp->mnt_devvp->v_specflags &= ~SI_MOUNTEDON; + VNOP_CLOSE(mp->mnt_devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, + &context); + vnode_rele(mp->mnt_devvp); + } + lck_rw_done(&mp->mnt_rwlock); + mount_list_remove(mp); + lck_rw_lock_exclusive(&mp->mnt_rwlock); + + /* mark the mount point hook in the vp but not drop the ref yet */ if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { - coveredvp->v_mountedhere = (struct mount *)0; - simple_unlock(&mountlist_slock); - vrele(coveredvp); - simple_lock(&mountlist_slock); + vnode_getwithref(coveredvp); + vnode_lock(coveredvp); + coveredvp->v_mountedhere = (struct mount *)0; + vnode_unlock(coveredvp); + vnode_put(coveredvp); } - mp->mnt_vfc->vfc_refcount--; - if (mp->mnt_vnodelist.lh_first != NULL) { - panic("unmount: dangling vnode"); + + mount_list_lock(); + mp->mnt_vtable->vfc_refcount--; + mount_list_unlock(); + + cache_purgevfs(mp); /* remove cache entries for this file sys */ + vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL); + mount_lock(mp); + mp->mnt_lflag |= MNT_LDEAD; + + if (mp->mnt_lflag & MNT_LWAIT) { + /* + * do the wakeup here + * in case we block in mount_refdrain + * which will drop the mount lock + * and allow anyone blocked in vfs_busy + * to wakeup and see the LDEAD state + */ + mp->mnt_lflag &= ~MNT_LWAIT; + wakeup((caddr_t)mp); } - lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); - vfs_event_signal(NULL, VQ_UNMOUNT, NULL); + mount_refdrain(mp); out: - if (mp->mnt_kern_flag & MNTK_MWAIT) + if (mp->mnt_lflag & MNT_LWAIT) { + mp->mnt_lflag &= ~MNT_LWAIT; + needwakeup = 1; + } + mount_unlock(mp); + lck_rw_done(&mp->mnt_rwlock); + + if (needwakeup) wakeup((caddr_t)mp); if (!error) { - if (mp->mnt_kern_flag & MNTK_IO_XINFO) - FREE(mp->mnt_xinfo_ptr, M_TEMP); - FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + if ((coveredvp != NULLVP)) { + vnode_getwithref(coveredvp); + vnode_rele(coveredvp); + vnode_lock(coveredvp); + if(mp->mnt_crossref == 0) { + vnode_unlock(coveredvp); + mount_lock_destroy(mp); + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } else { + coveredvp->v_lflag |= VL_MOUNTDEAD; + vnode_unlock(coveredvp); + } + vnode_put(coveredvp); + } else if (mp->mnt_flag & MNT_ROOTFS) { + mount_lock_destroy(mp); + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } else + panic("dounmount: no coveredvp"); } return (error); } +void +mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) +{ + vnode_lock(dp); + mp->mnt_crossref--; + if (mp->mnt_crossref < 0) + panic("mount cross refs -ve"); + if (((dp->v_lflag & VL_MOUNTDEAD) == VL_MOUNTDEAD) && (mp->mnt_crossref == 0)) { + dp->v_lflag &= ~VL_MOUNTDEAD; + if (need_put) + vnode_put_locked(dp); + vnode_unlock(dp); + mount_lock_destroy(mp); + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + return; + } + if (need_put) + vnode_put_locked(dp); + vnode_unlock(dp); +} + + /* * Sync each mounted filesystem. */ @@ -613,44 +911,39 @@ int syncprt = 0; struct ctldebug debug0 = { "syncprt", &syncprt }; #endif -struct sync_args { - int dummy; -}; int print_vmpage_stat=0; -/* ARGSUSED */ -int -sync(p, uap, retval) - struct proc *p; - struct sync_args *uap; - register_t *retval; +static int +sync_callback(mount_t mp, __unused void * arg) { - register struct mount *mp, *nmp; + struct proc * p = current_proc(); int asyncflag; + struct vfs_context context; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - if ((mp->mnt_flag & MNT_RDONLY) == 0) { + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; - VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); + VFS_SYNC(mp, MNT_NOWAIT, &context); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - } - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); } - simple_unlock(&mountlist_slock); + return(VFS_RETURNED); +} + +extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean; +extern unsigned int dp_pgins, dp_pgouts; + +/* ARGSUSED */ +int +sync(__unused struct proc *p, __unused struct sync_args *uap, __unused register_t *retval) +{ + + vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); { - extern void vm_countdirtypages(void); - extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean; - extern unsigned int dp_pgins, dp_pgouts; if(print_vmpage_stat) { vm_countdirtypages(); printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein, @@ -667,164 +960,267 @@ sync(p, uap, retval) /* * Change filesystem quotas. */ -struct quotactl_args { - char *path; - int cmd; - int uid; - caddr_t arg; -}; /* ARGSUSED */ int -quotactl(p, uap, retval) - struct proc *p; - register struct quotactl_args *uap; - register_t *retval; +quotactl(struct proc *p, register struct quotactl_args *uap, __unused register_t *retval) { register struct mount *mp; - int error; + int error, quota_cmd, quota_status; + caddr_t datap; + size_t fnamelen; struct nameidata nd; + struct vfs_context context; + struct dqblk my_dqblk; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); AUDIT_ARG(uid, uap->uid, 0, 0, 0); AUDIT_ARG(cmd, uap->cmd); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); mp = nd.ni_vp->v_mount; - vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, - uap->arg, p)); + vnode_put(nd.ni_vp); + nameidone(&nd); + + /* copyin any data we will need for downstream code */ + quota_cmd = uap->cmd >> SUBCMDSHIFT; + + switch (quota_cmd) { + case Q_QUOTAON: + /* uap->arg specifies a file from which to take the quotas */ + fnamelen = MAXPATHLEN; + datap = kalloc(MAXPATHLEN); + error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen); + break; + case Q_GETQUOTA: + /* uap->arg is a pointer to a dqblk structure. */ + datap = (caddr_t) &my_dqblk; + break; + case Q_SETQUOTA: + case Q_SETUSE: + /* uap->arg is a pointer to a dqblk structure. */ + datap = (caddr_t) &my_dqblk; + if (proc_is64bit(p)) { + struct user_dqblk my_dqblk64; + error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64)); + if (error == 0) { + munge_dqblk(&my_dqblk, &my_dqblk64, FALSE); + } + } + else { + error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk)); + } + break; + case Q_QUOTASTAT: + /* uap->arg is a pointer to an integer */ + datap = (caddr_t) "a_status; + break; + default: + datap = NULL; + break; + } /* switch */ + + if (error == 0) { + error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, &context); + } + + switch (quota_cmd) { + case Q_QUOTAON: + if (datap != NULL) + kfree(datap, MAXPATHLEN); + break; + case Q_GETQUOTA: + /* uap->arg is a pointer to a dqblk structure we need to copy out to */ + if (error == 0) { + if (proc_is64bit(p)) { + struct user_dqblk my_dqblk64; + munge_dqblk(&my_dqblk, &my_dqblk64, TRUE); + error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64)); + } + else { + error = copyout(datap, uap->arg, sizeof (struct dqblk)); + } + } + break; + case Q_QUOTASTAT: + /* uap->arg is a pointer to an integer */ + if (error == 0) { + error = copyout(datap, uap->arg, sizeof(quota_status)); + } + break; + default: + break; + } /* switch */ + + return (error); } /* * Get filesystem statistics. */ -struct statfs_args { - char *path; - struct statfs *buf; -}; /* ARGSUSED */ int -statfs(p, uap, retval) - struct proc *p; - register struct statfs_args *uap; - register_t *retval; +statfs(struct proc *p, register struct statfs_args *uap, __unused register_t *retval) { - register struct mount *mp; - register struct statfs *sp; + struct mount *mp; + struct vfsstatfs *sp; int error; struct nameidata nd; + struct vfs_context context; + vnode_t vp; - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); - mp = nd.ni_vp->v_mount; - sp = &mp->mnt_stat; - vrele(nd.ni_vp); - if (error = VFS_STATFS(mp, sp, p)) + vp = nd.ni_vp; + mp = vp->v_mount; + sp = &mp->mnt_vfsstat; + nameidone(&nd); + + error = vfs_update_vfsstat(mp, &context); + vnode_put(vp); + if (error != 0) return (error); - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, - sizeof(*sp)-sizeof(sp->f_reserved3)-sizeof(sp->f_reserved4))); + + error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); + return (error); } /* * Get filesystem statistics. */ -struct fstatfs_args { - int fd; - struct statfs *buf; -}; /* ARGSUSED */ int -fstatfs(p, uap, retval) - struct proc *p; - register struct fstatfs_args *uap; - register_t *retval; +fstatfs(struct proc *p, register struct fstatfs_args *uap, __unused register_t *retval) { - struct file *fp; + struct vnode *vp; struct mount *mp; - register struct statfs *sp; + struct vfsstatfs *sp; int error; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); AUDIT_ARG(fd, uap->fd); - if (error = getvnode(p, uap->fd, &fp)) + if ( (error = file_vnode(uap->fd, &vp)) ) return (error); - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); - mp = ((struct vnode *)fp->f_data)->v_mount; - if (!mp) + mp = vp->v_mount; + if (!mp) { + file_drop(uap->fd); return (EBADF); - sp = &mp->mnt_stat; - if (error = VFS_STATFS(mp, sp, p)) + } + sp = &mp->mnt_vfsstat; + if ((error = vfs_update_vfsstat(mp, &context)) != 0) { + file_drop(uap->fd); return (error); - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, - sizeof(*sp)-sizeof(sp->f_reserved3)-sizeof(sp->f_reserved4))); + } + file_drop(uap->fd); + + error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); + + return (error); } -/* - * Get statistics on all filesystems. - */ -struct getfsstat_args { - struct statfs *buf; - long bufsize; - int flags; + +struct getfsstat_struct { + user_addr_t sfsp; + int count; + int maxcount; + int flags; + int error; }; -int -getfsstat(p, uap, retval) - struct proc *p; - register struct getfsstat_args *uap; - register_t *retval; -{ - register struct mount *mp, *nmp; - register struct statfs *sp; - caddr_t sfsp; - long count, maxcount, error; - maxcount = uap->bufsize / sizeof(struct statfs); - sfsp = (caddr_t)uap->buf; - count = 0; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; + +static int +getfsstat_callback(mount_t mp, void * arg) +{ + + struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; + struct vfsstatfs *sp; + struct proc * p = current_proc(); + int error, my_size; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + if (fstp->sfsp && fstp->count < fstp->maxcount) { + sp = &mp->mnt_vfsstat; + /* + * If MNT_NOWAIT is specified, do not refresh the + * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + */ + if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) && + (error = vfs_update_vfsstat(mp, &context))) { + KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); + return(VFS_RETURNED); } - if (sfsp && count < maxcount) { - sp = &mp->mnt_stat; - /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) { - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - continue; - } - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp))) { - vfs_unbusy(mp, p); - return (error); - } - sfsp += sizeof(*sp); + + /* + * Need to handle LP64 version of struct statfs + */ + error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(p), FALSE); + if (error) { + fstp->error = error; + return(VFS_RETURNED_DONE); } - count++; - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); - if (sfsp && count > maxcount) - *retval = maxcount; + fstp->sfsp += my_size; + } + fstp->count++; + return(VFS_RETURNED); +} + +/* + * Get statistics on all filesystems. + */ +int +getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval) +{ + user_addr_t sfsp; + int count, maxcount; + struct getfsstat_struct fst; + + if (IS_64BIT_PROCESS(p)) { + maxcount = uap->bufsize / sizeof(struct user_statfs); + } + else { + maxcount = uap->bufsize / sizeof(struct statfs); + } + sfsp = uap->buf; + count = 0; + + fst.sfsp = sfsp; + fst.flags = uap->flags; + fst.count = 0; + fst.error = 0; + fst.maxcount = maxcount; + + + vfs_iterate(0, getfsstat_callback, &fst); + + if (fst.error ) { + KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error); + return(fst.error); + } + + if (fst.sfsp && fst.count > fst.maxcount) + *retval = fst.maxcount; else - *retval = count; + *retval = fst.count; return (0); } @@ -834,166 +1230,138 @@ ogetfsstat(p, uap, retval) register struct getfsstat_args *uap; register_t *retval; { - register struct mount *mp, *nmp; - register struct statfs *sp; - caddr_t sfsp; - long count, maxcount, error; - - maxcount = uap->bufsize / (sizeof(struct statfs) - sizeof(sp->f_reserved4)); - sfsp = (caddr_t)uap->buf; - count = 0; - simple_lock(&mountlist_slock); - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { - nmp = mp->mnt_list.cqe_next; - continue; - } - if (sfsp && count < maxcount) { - sp = &mp->mnt_stat; - /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. - */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && - (error = VFS_STATFS(mp, sp, p))) { - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - continue; - } - sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - error = copyout((caddr_t)sp, sfsp, - sizeof(*sp) - sizeof(sp->f_reserved3) - - sizeof(sp->f_reserved4)); - if (error) { - vfs_unbusy(mp, p); - return (error); - } - sfsp += sizeof(*sp) - sizeof(sp->f_reserved4); - } - count++; - simple_lock(&mountlist_slock); - nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp, p); - } - simple_unlock(&mountlist_slock); - if (sfsp && count > maxcount) - *retval = maxcount; - else - *retval = count; - return (0); + return (ENOTSUP); } #endif /* * Change current working directory to a given file descriptor. */ -struct fchdir_args { - int fd; -}; /* ARGSUSED */ int -fchdir(p, uap, retval) - struct proc *p; - struct fchdir_args *uap; - register_t *retval; +fchdir(struct proc *p, struct fchdir_args *uap, __unused register_t *retval) { register struct filedesc *fdp = p->p_fd; struct vnode *vp, *tdp, *tvp; struct mount *mp; - struct file *fp; int error; + struct vfs_context context; - if (error = getvnode(p, uap->fd, &fp)) - return (error); - vp = (struct vnode *)fp->f_data; - VREF(vp); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + if ( (error = file_vnode(uap->fd, &vp)) ) + return(error); + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); if (vp->v_type != VDIR) error = ENOTDIR; else - error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, &context); while (!error && (mp = vp->v_mountedhere) != NULL) { - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(vp); - return (EACCES); + if (vfs_busy(mp, LK_NOWAIT)) { + error = EACCES; + goto out; } - error = VFS_ROOT(mp, &tdp); - vfs_unbusy(mp, p); + error = VFS_ROOT(mp, &tdp, &context); + vfs_unbusy(mp); if (error) break; - vput(vp); + vnode_put(vp); vp = tdp; } - if (error) { - vput(vp); - return (error); - } - VOP_UNLOCK(vp, 0, p); + if (error) + goto out; + if ( (error = vnode_ref(vp)) ) + goto out; + vnode_put(vp); + + proc_fdlock(p); tvp = fdp->fd_cdir; fdp->fd_cdir = vp; - vrele(tvp); + proc_fdunlock(p); + + if (tvp) + vnode_rele(tvp); + file_drop(uap->fd); + return (0); +out: + vnode_put(vp); + file_drop(uap->fd); + + return(error); } /* * Change current working directory (``.''). */ -struct chdir_args { - char *path; -}; /* ARGSUSED */ int -chdir(p, uap, retval) - struct proc *p; - struct chdir_args *uap; - register_t *retval; +chdir(struct proc *p, struct chdir_args *uap, __unused register_t *retval) { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; struct vnode *tvp; + struct vfs_context context; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = change_dir(&nd, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); + error = change_dir(&nd, &context); if (error) return (error); + if ( (error = vnode_ref(nd.ni_vp)) ) { + vnode_put(nd.ni_vp); + return (error); + } + /* + * drop the iocount we picked up in change_dir + */ + vnode_put(nd.ni_vp); + + proc_fdlock(p); tvp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; - vrele(tvp); + proc_fdunlock(p); + + if (tvp) + vnode_rele(tvp); + return (0); } /* * Change notion of root (``/'') directory. */ -struct chroot_args { - char *path; -}; /* ARGSUSED */ int -chroot(p, uap, retval) - struct proc *p; - struct chroot_args *uap; - register_t *retval; +chroot(struct proc *p, struct chroot_args *uap, __unused register_t *retval) { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; boolean_t shared_regions_active; struct vnode *tvp; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = change_dir(&nd, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); + error = change_dir(&nd, &context); if (error) return (error); @@ -1002,16 +1370,27 @@ chroot(p, uap, retval) } else { shared_regions_active = TRUE; } - - if(error = clone_system_shared_regions(shared_regions_active, nd.ni_vp)) { - vrele(nd.ni_vp); + if ((error = clone_system_shared_regions(shared_regions_active, + TRUE, /* chain_regions */ + (int)nd.ni_vp))) { + vnode_put(nd.ni_vp); + return (error); + } + if ( (error = vnode_ref(nd.ni_vp)) ) { + vnode_put(nd.ni_vp); return (error); } + vnode_put(nd.ni_vp); + proc_fdlock(p); tvp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; + fdp->fd_flags |= FD_CHROOT; + proc_fdunlock(p); + if (tvp != NULL) - vrele(tvp); + vnode_rele(tvp); + return (0); } @@ -1019,24 +1398,22 @@ chroot(p, uap, retval) * Common routine for chroot and chdir. */ static int -change_dir(ndp, p) - register struct nameidata *ndp; - struct proc *p; +change_dir(struct nameidata *ndp, vfs_context_t ctx) { struct vnode *vp; int error; - if (error = namei(ndp)) + if ((error = namei(ndp))) return (error); + nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else - error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx); if (error) - vput(vp); - else - VOP_UNLOCK(vp, 0, p); + vnode_put(vp); + return (error); } @@ -1044,64 +1421,60 @@ change_dir(ndp, p) * Check permissions, allocate an open file structure, * and call the device open routine if any. */ -struct open_args { - char *path; - int flags; - int mode; -}; -int -open(p, uap, retval) - struct proc *p; - register struct open_args *uap; - register_t *retval; + +#warning XXX implement uid, gid +static int +open1(vfs_context_t ctx, user_addr_t upath, int uflags, struct vnode_attr *vap, register_t *retval) { + struct proc *p = vfs_context_proc(ctx); register struct filedesc *fdp = p->p_fd; - register struct file *fp; + register struct fileproc *fp; register struct vnode *vp; - int flags, cmode, oflags; - struct file *nfp; + int flags, oflags; + struct fileproc *nfp; int type, indx, error; struct flock lf; struct nameidata nd; - extern struct fileops vnops; - - oflags = uap->flags; - flags = FFLAGS(uap->flags); - - AUDIT_ARG(fflags, oflags); - AUDIT_ARG(mode, uap->mode); - cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + oflags = uflags; if ((oflags & O_ACCMODE) == O_ACCMODE) return(EINVAL); - if (error = falloc(p, &nfp, &indx)) + flags = FFLAGS(uflags); + + AUDIT_ARG(fflags, oflags); + AUDIT_ARG(mode, vap->va_mode); + + if ( (error = falloc(p, &nfp, &indx)) ) { return (error); + } fp = nfp; - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, upath, ctx); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ - if (error = vn_open_modflags(&nd, &flags, cmode)) { - ffree(fp); - if ((error == ENODEV || error == ENXIO) && - p->p_dupfd >= 0 && /* XXX from fdopen */ - (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { - *retval = indx; - return (0); + + if ((error = vn_open_auth(&nd, &flags, vap))) { + if ((error == ENODEV || error == ENXIO) && (p->p_dupfd >= 0)) { /* XXX from fdopen */ + if ((error = dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + fp_drop(p, indx, 0, 0); + *retval = indx; + return (0); + } } if (error == ERESTART) - error = EINTR; - fdrelse(p, indx); + error = EINTR; + fp_free(p, indx, fp); + return (error); } p->p_dupfd = 0; vp = nd.ni_vp; - fp->f_flag = flags & FMASK; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - VOP_UNLOCK(vp, 0, p); + fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY); + fp->f_fglob->fg_type = DTYPE_VNODE; + fp->f_fglob->fg_ops = &vnops; + fp->f_fglob->fg_data = (caddr_t)vp; + if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -1113,450 +1486,642 @@ open(p, uap, retval) type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) goto bad; - fp->f_flag |= FHASLOCK; + fp->f_fglob->fg_flag |= FHASLOCK; } - if (flags & O_TRUNC) { - struct vattr vat; - struct vattr *vap = &vat; + /* try to truncate by setting the size attribute */ + if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) + goto bad; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - (void)vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ - VATTR_NULL(vap); - vap->va_size = 0; - /* try to truncate by setting the size attribute */ - error = VOP_SETATTR(vp, vap, p->p_ucred, p); - VOP_UNLOCK(vp, 0, p); /* XXX */ - if (error) - goto bad; - } + vnode_put(vp); + proc_fdlock(p); *fdflags(p, indx) &= ~UF_RESERVED; + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + *retval = indx; + return (0); bad: - vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdrelse(p, indx); + vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p); + vnode_put(vp); + fp_free(p, indx, fp); + return (error); + } -#if COMPAT_43 -/* - * Create a file. - */ -struct ocreat_args { - char *path; - int mode; -}; int -ocreat(p, uap, retval) - struct proc *p; - register struct ocreat_args *uap; - register_t *retval; +open_extended(struct proc *p, struct open_extended_args *uap, register_t *retval) +{ + struct vfs_context context; + register struct filedesc *fdp = p->p_fd; + int ciferror; + kauth_filesec_t xsecdst; + struct vnode_attr va; + int cmode; + + xsecdst = NULL; + if ((uap->xsecurity != USER_ADDR_NULL) && + ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) + return ciferror; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + if (xsecdst != NULL) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + + ciferror = open1(&context, uap->path, uap->flags, &va, retval); + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + + return ciferror; +} + +int +open(struct proc *p, struct open_args *uap, register_t *retval) { - struct open_args nuap; + struct vfs_context context; + register struct filedesc *fdp = p->p_fd; + struct vnode_attr va; + int cmode; - nuap.path = uap->path; - nuap.mode = uap->mode; - nuap.flags = O_WRONLY | O_CREAT | O_TRUNC; - return (open(p, &nuap, retval)); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + /* Mask off all but regular access permissions */ + cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); + + return(open1(&context, uap->path, uap->flags, &va, retval)); } -#endif /* COMPAT_43 */ + /* * Create a special file. */ -struct mknod_args { - char *path; - int mode; - int dev; -}; -/* ARGSUSED */ +static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); + int -mknod(p, uap, retval) - struct proc *p; - register struct mknod_args *uap; - register_t *retval; +mknod(struct proc *p, register struct mknod_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct vattr vattr; - int cmode, error; - int whiteout; + struct vnode_attr va; + struct vfs_context context; + int error; + int whiteout = 0; struct nameidata nd; + vnode_t vp, dvp; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + VATTR_SET(&va, va_rdev, uap->dev); + + /* If it's a mknod() of a FIFO, call mkfifo1() instead */ + if ((uap->mode & S_IFMT) == S_IFIFO) + return(mkfifo1(&context, uap->path, &va)); AUDIT_ARG(mode, uap->mode); AUDIT_ARG(dev, uap->dev); - cmode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - if (error = suser(p->p_ucred, &p->p_acflag)) + + if ((error = suser(context.vc_ucred, &p->p_acflag))) return (error); - bwillwrite(); - NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp != NULL) + + if (vp != NULL) { error = EEXIST; - else { - VATTR_NULL(&vattr); - vattr.va_mode = cmode; - vattr.va_rdev = uap->dev; - whiteout = 0; - - switch (uap->mode & S_IFMT) { - case S_IFMT: /* used by badsect to flag bad sectors */ - vattr.va_type = VBAD; - break; - case S_IFCHR: - vattr.va_type = VCHR; - break; - case S_IFBLK: - vattr.va_type = VBLK; - break; - case S_IFWHT: - whiteout = 1; - break; - default: - error = EINVAL; - break; - } + goto out; } - if (!error) { - char *nameptr; - nameptr = add_name(nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, 0); - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (whiteout) { - error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); - if (error) - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - } else { - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, - &nd.ni_cnd, &vattr); - } - if (error == 0 && nd.ni_vp) { - if (VNAME(nd.ni_vp) == NULL) { - VNAME(nd.ni_vp) = nameptr; - nameptr = NULL; - } - if (VPARENT(nd.ni_vp) == NULL) { - if (vget(nd.ni_dvp, 0, p) == 0) { - VPARENT(nd.ni_vp) = nd.ni_dvp; - } - } - } - if (nameptr) { - remove_name(nameptr); - nameptr = NULL; - } + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context)) != 0) + goto out; + + switch (uap->mode & S_IFMT) { + case S_IFMT: /* used by badsect to flag bad sectors */ + VATTR_SET(&va, va_type, VBAD); + break; + case S_IFCHR: + VATTR_SET(&va, va_type, VCHR); + break; + case S_IFBLK: + VATTR_SET(&va, va_type, VBLK); + break; + case S_IFWHT: + whiteout = 1; + break; + default: + error = EINVAL; + goto out; + } + if (whiteout) { + error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE, &context); } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp) - vrele(vp); + error = vn_create(dvp, &vp, &nd.ni_cnd, &va, 0, &context); + } + if (error) + goto out; + + if (vp) { + int update_flags = 0; + + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); + + add_fsevent(FSE_CREATE_FILE, &context, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); } + +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + return (error); } /* * Create a named pipe. */ -struct mkfifo_args { - char *path; - int mode; -}; -/* ARGSUSED */ -int -mkfifo(p, uap, retval) - struct proc *p; - register struct mkfifo_args *uap; - register_t *retval; +static int +mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) { - struct vattr vattr; + vnode_t vp, dvp; int error; struct nameidata nd; - char *nameptr=NULL; - -#if !FIFO - return (EOPNOTSUPP); -#else - bwillwrite(); - NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, upath, ctx); error = namei(&nd); if (error) return (error); - if (nd.ni_vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - return (EEXIST); - } - - nameptr = add_name(nd.ni_cnd.cn_nameptr, - nd.ni_cnd.cn_namelen, - nd.ni_cnd.cn_hash, 0); - VATTR_NULL(&vattr); - vattr.va_type = VFIFO; - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - - if (error == 0 && nd.ni_vp && nd.ni_vp->v_type == VFIFO) { - int vpid = nd.ni_vp->v_id; - if (vget(nd.ni_vp, 0, p) == 0) { - if (vpid == nd.ni_vp->v_id && nd.ni_vp->v_type == VFIFO) { - VNAME(nd.ni_vp) = nameptr; - nameptr = NULL; - - if (VPARENT(nd.ni_vp) == NULL) { - if (vget(nd.ni_dvp, 0, p) == 0) { - VPARENT(nd.ni_vp) = nd.ni_dvp; - } - } - } - } - } - if (nameptr) { - remove_name(nameptr); - } + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + /* check that this is a new file and authorize addition */ + if (vp != NULL) { + error = EEXIST; + goto out; + } + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto out; + + VATTR_SET(vap, va_type, VFIFO); + + error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx); +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + return error; -#endif /* FIFO */ +} + +int +mkfifo_extended(struct proc *p, struct mkfifo_extended_args *uap, __unused register_t *retval) +{ + int ciferror; + kauth_filesec_t xsecdst; + struct vfs_context context; + struct vnode_attr va; + + xsecdst = KAUTH_FILESEC_NONE; + if (uap->xsecurity != USER_ADDR_NULL) { + if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return ciferror; + } + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + if (xsecdst != KAUTH_FILESEC_NONE) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + + ciferror = mkfifo1(&context, uap->path, &va); + + if (xsecdst != KAUTH_FILESEC_NONE) + kauth_filesec_free(xsecdst); + return ciferror; +} + +/* ARGSUSED */ +int +mkfifo(struct proc *p, register struct mkfifo_args *uap, __unused register_t *retval) +{ + struct vfs_context context; + struct vnode_attr va; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); + + return(mkfifo1(&context, uap->path, &va)); } /* * Make a hard file link. */ -struct link_args { - char *path; - char *link; -}; /* ARGSUSED */ int -link(p, uap, retval) - struct proc *p; - register struct link_args *uap; - register_t *retval; +link(struct proc *p, register struct link_args *uap, __unused register_t *retval) { - register struct vnode *vp; + vnode_t vp, dvp, lvp; struct nameidata nd; + struct vfs_context context; int error; + fse_info finfo; + int need_event, has_listeners; - bwillwrite(); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + vp = dvp = lvp = NULLVP; + + /* look up the object we are linking to */ + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; - if (vp->v_type == VDIR) + + nameidone(&nd); + + /* we're not allowed to link to directories */ + if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ - else { - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2; - nd.ni_dirp = uap->link; - error = namei(&nd); - if (error == 0) { - if (nd.ni_vp != NULL) - error = EEXIST; - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, - LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); + goto out; + } + + /* or to anything that kauth doesn't want us to (eg. immutable items) */ + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, &context)) != 0) + goto out; + + /* lookup the target node */ + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2; + nd.ni_dirp = uap->link; + error = namei(&nd); + if (error != 0) + goto out; + dvp = nd.ni_dvp; + lvp = nd.ni_vp; + /* target node must not exist */ + if (lvp != NULLVP) { + error = EEXIST; + goto out2; + } + /* cannot link across mountpoints */ + if (vnode_mount(vp) != vnode_mount(dvp)) { + error = EXDEV; + goto out2; + } + + /* authorize creation of the target note */ + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context)) != 0) + goto out2; + + /* and finally make the link */ + error = VNOP_LINK(vp, dvp, &nd.ni_cnd, &context); + if (error) + goto out2; + + need_event = need_fsevent(FSE_CREATE_FILE, dvp); + has_listeners = kauth_authorize_fileop_has_listeners(); + + if (need_event || has_listeners) { + char *target_path = NULL; + char *link_to_path = NULL; + int len, link_name_len; + + /* build the path to the new link file */ + target_path = get_pathbuff(); + len = MAXPATHLEN; + vn_getpath(dvp, target_path, &len); + target_path[len-1] = '/'; + strcpy(&target_path[len], nd.ni_cnd.cn_nameptr); + len += nd.ni_cnd.cn_namelen; + + if (has_listeners) { + /* build the path to file we are linking to */ + link_to_path = get_pathbuff(); + link_name_len = MAXPATHLEN; + vn_getpath(vp, link_to_path, &link_name_len); + + /* call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_LINK, + (uintptr_t)link_to_path, (uintptr_t)target_path); + if (link_to_path != NULL) + release_pathbuff(link_to_path); + } + if (need_event) { + /* construct fsevent */ + if (get_fse_info(vp, &finfo, &context) == 0) { + // build the path to the destination of the link + add_fsevent(FSE_CREATE_FILE, &context, + FSE_ARG_STRING, len, target_path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); } } + release_pathbuff(target_path); } - vrele(vp); - return (error); -} +out2: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); +out: + if (lvp) + vnode_put(lvp); + if (dvp) + vnode_put(dvp); + vnode_put(vp); + return (error); +} /* * Make a symbolic link. + * + * We could add support for ACLs here too... */ -struct symlink_args { - char *path; - char *link; -}; /* ARGSUSED */ int -symlink(p, uap, retval) - struct proc *p; - register struct symlink_args *uap; - register_t *retval; +symlink(struct proc *p, register struct symlink_args *uap, __unused register_t *retval) { - struct vattr vattr; - char *path, *nameptr; + struct vnode_attr va; + char *path; int error; struct nameidata nd; + struct vfs_context context; + vnode_t vp, dvp; size_t dummy=0; - u_long vpid; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (error = copyinstr(uap->path, path, MAXPATHLEN, &dummy)) + error = copyinstr(uap->path, path, MAXPATHLEN, &dummy); + if (error) goto out; AUDIT_ARG(text, path); /* This is the link string */ - bwillwrite(); - NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->link, p); + + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->link, &context); error = namei(&nd); if (error) goto out; - if (nd.ni_vp) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); - error = EEXIST; - goto out; - } - VATTR_NULL(&vattr); - vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - - nameptr = add_name(nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, 0); + dvp = nd.ni_dvp; + vp = nd.ni_vp; - error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + if (vp == NULL) { + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VLNK); + VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask); + + /* authorize */ + error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, &context); + /* get default ownership, etc. */ + if (error == 0) + error = vnode_authattr_new(dvp, &va, 0, &context); + if (error == 0) + error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, &context); + + /* do fallback attribute handling */ + if (error == 0) + error = vnode_setattr_fallback(vp, &va, &context); + + if (error == 0) { + int update_flags = 0; - // have to do this little dance because nd.ni_vp is not locked - // on return from the VOP_SYMLINK() call. - // - if (error == 0 && nd.ni_vp && nd.ni_vp->v_type == VLNK) { - vpid = nd.ni_vp->v_id; - if (vget(nd.ni_vp, 0, p) == 0) { - if (vpid == nd.ni_vp->v_id && nd.ni_vp->v_type == VLNK) { - VNAME(nd.ni_vp) = nameptr; - nameptr = NULL; + if (vp == NULL) { + nd.ni_cnd.cn_nameiop = LOOKUP; + nd.ni_cnd.cn_flags = 0; + error = namei(&nd); + vp = nd.ni_vp; - if (VPARENT(nd.ni_vp) == NULL && vget(nd.ni_dvp, 0, p) == 0) { - VPARENT(nd.ni_vp) = nd.ni_dvp; - } + if (vp == NULL) + goto skipit; + } + +#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */ + /* call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + if (kauth_authorize_fileop_has_listeners() && + namei(&nd) == 0) { + char *new_link_path = NULL; + int len; + + /* build the path to the new link file */ + new_link_path = get_pathbuff(); + len = MAXPATHLEN; + vn_getpath(dvp, new_link_path, &len); + new_link_path[len - 1] = '/'; + strcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr); + + kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_SYMLINK, + (uintptr_t)path, (uintptr_t)new_link_path); + if (new_link_path != NULL) + release_pathbuff(new_link_path); + } +#endif + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); + + add_fsevent(FSE_CREATE_FILE, &context, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); } - vrele(nd.ni_vp); - } - } - if (nameptr) { // only true if we didn't add it to the vnode - remove_name(nameptr); - } + } else + error = EEXIST; + +skipit: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); out: FREE_ZONE(path, MAXPATHLEN, M_NAMEI); + return (error); } /* * Delete a whiteout from the filesystem. */ -struct undelete_args { - char *path; -}; /* ARGSUSED */ +#warning XXX authorization not implmented for whiteouts int -undelete(p, uap, retval) - struct proc *p; - register struct undelete_args *uap; - register_t *retval; +undelete(struct proc *p, register struct undelete_args *uap, __unused register_t *retval) { int error; struct nameidata nd; + struct vfs_context context; + vnode_t vp, dvp; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - bwillwrite(); - NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT|AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); + dvp = nd.ni_dvp; + vp = nd.ni_vp; - if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - return (EEXIST); - } + if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) { + error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, &context); + } else + error = EEXIST; + + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); return (error); } /* * Delete a name from the filesystem. */ -struct unlink_args { - char *path; -}; /* ARGSUSED */ static int -_unlink(p, uap, retval, nodelbusy) - struct proc *p; - struct unlink_args *uap; - register_t *retval; - int nodelbusy; +_unlink(struct proc *p, struct unlink_args *uap, __unused register_t *retval, int nodelbusy) { - register struct vnode *vp; + vnode_t vp, dvp; int error; struct nameidata nd; + struct vfs_context context; + struct componentname *cnp; + int flags = 0; - bwillwrite(); - NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); - /* with Carbon semantics, busy files cannot be deleted */ + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); + cnp = &nd.ni_cnd; + + /* With Carbon delete semantics, busy files cannot be deleted */ if (nodelbusy) - nd.ni_cnd.cn_flags |= NODELETEBUSY; + flags |= VNODE_REMOVE_NODELETEBUSY; + error = namei(&nd); if (error) return (error); - + dvp = nd.ni_dvp; vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) + if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ - else { + } else { /* * The root of a mounted filesystem cannot be deleted. - * - * XXX: can this only be a VDIR case? */ - if (vp->v_flag & VROOT) + if (vp->v_flag & VROOT) { error = EBUSY; + } } - + /* authorize the delete operation */ + if (!error) + error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, &context); + if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + char *path = NULL; + int len; + fse_info finfo; + + if (need_fsevent(FSE_DELETE, dvp)) { + path = get_pathbuff(); + len = MAXPATHLEN; + vn_getpath(vp, path, &len); + get_fse_info(vp, &finfo, &context); + } + error = VNOP_REMOVE(dvp, vp, &nd.ni_cnd, flags, &context); + + if ( !error && path != NULL) { + add_fsevent(FSE_DELETE, &context, + FSE_ARG_STRING, len, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } + if (path != NULL) + release_pathbuff(path); } + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + vnode_put(dvp); + vnode_put(vp); return (error); } @@ -1578,53 +2143,51 @@ unlink(p, uap, retval) int delete(p, uap, retval) struct proc *p; - struct unlink_args *uap; + struct delete_args *uap; register_t *retval; { - return _unlink(p, uap, retval, 1); + return _unlink(p, (struct unlink_args *)uap, retval, 1); } /* * Reposition read/write file offset. */ -struct lseek_args { - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t offset; - int whence; -}; int lseek(p, uap, retval) struct proc *p; register struct lseek_args *uap; - register_t *retval; + off_t *retval; { - struct ucred *cred = p->p_ucred; - struct file *fp; + struct fileproc *fp; struct vnode *vp; - struct vattr vattr; - off_t offset = uap->offset; + struct vfs_context context; + off_t offset = uap->offset, file_size; int error; - if (error = fdgetf(p, uap->fd, &fp)) + if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) { + if (error == ENOTSUP) + return (ESPIPE); return (error); - if (fref(fp) == -1) - return (EBADF); - if (fp->f_type != DTYPE_VNODE) { - frele(fp); - return (ESPIPE); } - vp = (struct vnode *)fp->f_data; + if (vnode_isfifo(vp)) { + file_drop(uap->fd); + return(ESPIPE); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + switch (uap->whence) { case L_INCR: - offset += fp->f_offset; + offset += fp->f_fglob->fg_offset; break; case L_XTND: - if (error = VOP_GETATTR(vp, &vattr, cred, p)) + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + if ((error = vnode_size(vp, &file_size, &context)) != 0) break; - offset += vattr.va_size; + offset += file_size; break; case L_SET: break; @@ -1646,327 +2209,425 @@ lseek(p, uap, retval) error = EINVAL; } else { /* Success */ - fp->f_offset = offset; - *(off_t *)retval = fp->f_offset; + fp->f_fglob->fg_offset = offset; + *retval = fp->f_fglob->fg_offset; } } } - frele(fp); + (void)vnode_put(vp); + file_drop(uap->fd); return (error); } -#if COMPAT_43 + /* - * Reposition read/write file offset. + * Check access permissions. */ -struct olseek_args { - int fd; - long offset; - int whence; -}; -int -olseek(p, uap, retval) - struct proc *p; - register struct olseek_args *uap; - register_t *retval; +static int +access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) { - struct lseek_args /* { - syscallarg(int) fd; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; -#endif - syscallarg(off_t) offset; - syscallarg(int) whence; - } */ nuap; - off_t qret; + kauth_action_t action; int error; - nuap.fd = uap->fd; - nuap.offset = uap->offset; - nuap.whence = uap->whence; - error = lseek(p, &nuap, &qret); - *(long *)retval = qret; - return (error); + /* + * If just the regular access bits, convert them to something + * that vnode_authorize will understand. + */ + if (!(uflags & _ACCESS_EXTENDED_MASK)) { + action = 0; + if (uflags & R_OK) + action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */ + if (uflags & W_OK) { + if (vnode_isdir(vp)) { + action |= KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY; + /* might want delete rights here too */ + } else { + action |= KAUTH_VNODE_WRITE_DATA; + } + } + if (uflags & X_OK) { + if (vnode_isdir(vp)) { + action |= KAUTH_VNODE_SEARCH; + } else { + action |= KAUTH_VNODE_EXECUTE; + } + } + } else { + /* take advantage of definition of uflags */ + action = uflags >> 8; + } + + /* action == 0 means only check for existence */ + if (action != 0) { + error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx); + } else { + error = 0; + } + + return(error); } -#endif /* COMPAT_43 */ -/* - * Check access permissions. - */ -struct access_args { - char *path; - int flags; -}; + + +/* XXX need to support the check-as uid argument */ int -access(p, uap, retval) - struct proc *p; - register struct access_args *uap; - register_t *retval; +access_extended(__unused struct proc *p, struct access_extended_args *uap, __unused register_t *retval) { - register struct ucred *cred = p->p_ucred; - register struct vnode *vp; - int error, flags, t_gid, t_uid; + struct accessx_descriptor *input; + errno_t *result; + int error, limit, nent, i, j, wantdelete; + struct vfs_context context; struct nameidata nd; - - t_uid = cred->cr_uid; - t_gid = cred->cr_groups[0]; - cred->cr_uid = p->p_cred->p_ruid; - cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); + int niopts; + vnode_t vp, dvp; + + input = NULL; + result = NULL; + error = 0; + vp = NULL; + dvp = NULL; + context.vc_ucred = NULL; + + /* check input size and fetch descriptor array into allocated storage */ + if (uap->size > ACCESSX_MAX_TABLESIZE) + return(ENOMEM); + if (uap->size < sizeof(struct accessx_descriptor)) + return(EINVAL); + MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK); + if (input == NULL) { + error = ENOMEM; + goto out; + } + error = copyin(uap->entries, input, uap->size); if (error) - goto out1; - vp = nd.ni_vp; + goto out; - /* Flags == 0 means only check for existence. */ - if (uap->flags) { - flags = 0; - if (uap->flags & R_OK) - flags |= VREAD; - if (uap->flags & W_OK) - flags |= VWRITE; - if (uap->flags & X_OK) - flags |= VEXEC; - if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) - error = VOP_ACCESS(vp, flags, cred, p); - } - vput(vp); -out1: - cred->cr_uid = t_uid; - cred->cr_groups[0] = t_gid; - return (error); + /* + * Access is defined as checking against the process' + * real identity, even if operations are checking the + * effective identity. So we need to tweak the credential + * in the context. + */ + context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); + context.vc_proc = current_proc(); + + /* + * Find out how many entries we have, so we can allocate the result array. + */ + limit = uap->size / sizeof(struct accessx_descriptor); + nent = limit; + wantdelete = 0; + for (i = 0; i < nent; i++) { + /* + * Take the offset to the name string for this entry and convert to an + * input array index, which would be one off the end of the array if this + * was the lowest-addressed name string. + */ + j = input[i].ad_name_offset / sizeof(struct accessx_descriptor); + /* bad input */ + if (j > limit) { + error = EINVAL; + goto out; + } + /* implicit reference to previous name, not a real offset */ + if (j == 0) { + /* first entry must have a name string */ + if (i == 0) { + error = EINVAL; + goto out; + } + continue; + } + if (j < nent) + nent = j; + } + if (nent > ACCESSX_MAX_DESCRIPTORS) { + error = ENOMEM; + goto out; + } + MALLOC(result, errno_t *, nent * sizeof(errno_t), M_TEMP, M_WAITOK); + if (result == NULL) { + error = ENOMEM; + goto out; + } + + /* + * Do the work. + */ + error = 0; + for (i = 0; i < nent; i++) { + /* + * Looking up a new name? + */ + if (input[i].ad_name_offset != 0) { + /* discard old vnodes */ + if (vp) { + vnode_put(vp); + vp = NULL; + } + if (dvp) { + vnode_put(dvp); + dvp = NULL; + } + + /* scan forwards to see if we need the parent this time */ + wantdelete = input[i].ad_flags & _DELETE_OK; + for (j = i + 1; (j < nent) && (input[j].ad_name_offset == 0); j++) + if (input[j].ad_flags & _DELETE_OK) + wantdelete = 1; + + niopts = FOLLOW | AUDITVNPATH1; + /* need parent for vnode_authorize for deletion test */ + if (wantdelete) + niopts |= WANTPARENT; + + /* do the lookup */ + NDINIT(&nd, LOOKUP, niopts, UIO_SYSSPACE, CAST_USER_ADDR_T((const char *)input + input[i].ad_name_offset), &context); + error = namei(&nd); + if (!error) { + vp = nd.ni_vp; + if (wantdelete) + dvp = nd.ni_dvp; + } + nameidone(&nd); + } + + /* + * Handle lookup errors. + */ + switch(error) { + case ENOENT: + case EACCES: + case EPERM: + case ENOTDIR: + result[i] = error; + break; + case 0: + /* run this access check */ + result[i] = access1(vp, dvp, input[i].ad_flags, &context); + break; + default: + /* fatal lookup error */ + + goto out; + } + } + + /* copy out results */ + error = copyout(result, uap->results, nent * sizeof(errno_t)); + +out: + if (input) + FREE(input, M_TEMP); + if (result) + FREE(result, M_TEMP); + if (vp) + vnode_put(vp); + if (dvp) + vnode_put(dvp); + if (context.vc_ucred) + kauth_cred_rele(context.vc_ucred); + return(error); } -#if COMPAT_43 -/* - * Get file status; this version follows links. - */ -struct ostat_args { - char *path; - struct ostat *ub; -}; -/* ARGSUSED */ int -ostat(p, uap, retval) - struct proc *p; - register struct ostat_args *uap; - register_t *retval; +access(__unused struct proc *p, register struct access_args *uap, __unused register_t *retval) { - struct stat sb; - struct ostat osb; int error; struct nameidata nd; + int niopts; + struct vfs_context context; + + /* + * Access is defined as checking against the process' + * real identity, even if operations are checking the + * effective identity. So we need to tweak the credential + * in the context. + */ + context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); + context.vc_proc = current_proc(); + + niopts = FOLLOW | AUDITVNPATH1; + /* need parent for vnode_authorize for deletion test */ + if (uap->flags & _DELETE_OK) + niopts |= WANTPARENT; + NDINIT(&nd, LOOKUP, niopts, UIO_USERSPACE, uap->path, &context); + error = namei(&nd); + if (error) + goto out; + + error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context); + + vnode_put(nd.ni_vp); + if (uap->flags & _DELETE_OK) + vnode_put(nd.ni_dvp); + nameidone(&nd); + +out: + kauth_cred_rele(context.vc_ucred); + return(error); +} + + +static int +stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) +{ + struct stat sb; + struct user_stat user_sb; + caddr_t sbp; + int error, my_size; + kauth_filesec_t fsec; + size_t xsecurity_bufsize; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - if (error = namei(&nd)) + error = namei(ndp); + if (error) return (error); - error = vn_stat(nd.ni_vp, &sb, p); - vput(nd.ni_vp); + fsec = KAUTH_FILESEC_NONE; + error = vn_stat(ndp->ni_vp, &sb, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), ctx); + vnode_put(ndp->ni_vp); + nameidone(ndp); + if (error) return (error); - cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + /* Zap spare fields */ + sb.st_lspare = 0; + sb.st_qspare[0] = 0LL; + sb.st_qspare[1] = 0LL; + if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { + munge_stat(&sb, &user_sb); + my_size = sizeof(user_sb); + sbp = (caddr_t)&user_sb; + } + else { + my_size = sizeof(sb); + sbp = (caddr_t)&sb; + } + if ((error = copyout(sbp, ub, my_size)) != 0) + goto out; + + /* caller wants extended security information? */ + if (xsecurity != USER_ADDR_NULL) { + + /* did we get any? */ + if (fsec == KAUTH_FILESEC_NONE) { + if (susize(xsecurity_size, 0) != 0) { + error = EFAULT; + goto out; + } + } else { + /* find the user buffer size */ + xsecurity_bufsize = fusize(xsecurity_size); + + /* copy out the actual data size */ + if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) { + error = EFAULT; + goto out; + } + + /* if the caller supplied enough room, copy out to it */ + if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) + error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec)); + } + } +out: + if (fsec != KAUTH_FILESEC_NONE) + kauth_filesec_free(fsec); return (error); } /* - * Get file status; this version does not follow links. + * Get file status; this version follows links. */ -struct olstat_args { - char *path; - struct ostat *ub; -}; -/* ARGSUSED */ -int -olstat(p, uap, retval) - struct proc *p; - register struct olstat_args *uap; - register_t *retval; +static int +stat1(struct proc *p, user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) { - struct vnode *vp, *dvp; - struct stat sb, sb1; - struct ostat osb; - int error; struct nameidata nd; + struct vfs_context context; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT | AUDITVNPATH1, - UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) - return (error); - /* - * For symbolic links, always return the attributes of its - * containing directory, except for mode, size, and links. - */ - vp = nd.ni_vp; - dvp = nd.ni_dvp; - if (vp->v_type != VLNK) { - if (dvp == vp) - vrele(dvp); - else - vput(dvp); - error = vn_stat(vp, &sb, p); - vput(vp); - if (error) - return (error); - } else { - error = vn_stat(dvp, &sb, p); - vput(dvp); - if (error) { - vput(vp); - return (error); - } - error = vn_stat(vp, &sb1, p); - vput(vp); - if (error) - return (error); - sb.st_mode &= ~S_IFDIR; - sb.st_mode |= S_IFLNK; - sb.st_nlink = sb1.st_nlink; - sb.st_size = sb1.st_size; - sb.st_blocks = sb1.st_blocks; - } - cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); - return (error); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, &context); + return(stat2(&context, &nd, ub, xsecurity, xsecurity_size)); } -/* - * Convert from an old to a new stat structure. - */ -void -cvtstat(st, ost) - struct stat *st; - struct ostat *ost; +int +stat_extended(struct proc *p, struct stat_extended_args *uap, __unused register_t *retval) { + return (stat1(p, uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size)); +} - ost->st_dev = st->st_dev; - ost->st_ino = st->st_ino; - ost->st_mode = st->st_mode; - ost->st_nlink = st->st_nlink; - ost->st_uid = st->st_uid; - ost->st_gid = st->st_gid; - ost->st_rdev = st->st_rdev; - if (st->st_size < (quad_t)1 << 32) - ost->st_size = st->st_size; - else - ost->st_size = -2; - ost->st_atime = st->st_atime; - ost->st_mtime = st->st_mtime; - ost->st_ctime = st->st_ctime; - ost->st_blksize = st->st_blksize; - ost->st_blocks = st->st_blocks; - ost->st_flags = st->st_flags; - ost->st_gen = st->st_gen; +int +stat(struct proc *p, struct stat_args *uap, __unused register_t *retval) +{ + return(stat1(p, uap->path, uap->ub, 0, 0)); } -#endif /* COMPAT_43 */ /* - * The stat buffer spare fields are uninitialized - * so don't include them in the copyout. + * Get file status; this version does not follow links. */ -#define STATBUFSIZE \ - (sizeof(struct stat) - sizeof(int32_t) - 2 * sizeof(int64_t)) +static int +lstat1(struct proc *p, user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) +{ + struct nameidata nd; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, &context); + + return(stat2(&context, &nd, ub, xsecurity, xsecurity_size)); +} + +int +lstat_extended(struct proc *p, struct lstat_extended_args *uap, __unused register_t *retval) +{ + return (lstat1(p, uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size)); +} + +int +lstat(struct proc *p, struct lstat_args *uap, __unused register_t *retval) +{ + return(lstat1(p, uap->path, uap->ub, 0, 0)); +} + /* - * Get file status; this version follows links. + * Get configurable pathname variables. */ -struct stat_args { - char *path; - struct stat *ub; -}; /* ARGSUSED */ int -stat(p, uap, retval) +pathconf(p, uap, retval) struct proc *p; - register struct stat_args *uap; + register struct pathconf_args *uap; register_t *retval; { - struct stat sb; int error; struct nameidata nd; + struct vfs_context context; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | SHAREDLEAF | AUDITVNPATH1, - UIO_USERSPACE, uap->path, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); - error = vn_stat(nd.ni_vp, &sb, p); - vput(nd.ni_vp); - if (error) - return (error); - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, STATBUFSIZE); - return (error); -} - -/* - * Get file status; this version does not follow links. - */ -struct lstat_args { - char *path; - struct stat *ub; -}; -/* ARGSUSED */ -int -lstat(p, uap, retval) - struct proc *p; - register struct lstat_args *uap; - register_t *retval; -{ - int error; - struct vnode *vp; - struct stat sb; - struct nameidata nd; - - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) - return (error); - vp = nd.ni_vp; - error = vn_stat(vp, &sb, p); - vput(vp); - if (error) - return (error); - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, STATBUFSIZE); - return (error); -} -/* - * Get configurable pathname variables. - */ -struct pathconf_args { - char *path; - int name; -}; -/* ARGSUSED */ -int -pathconf(p, uap, retval) - struct proc *p; - register struct pathconf_args *uap; - register_t *retval; -{ - int error; - struct nameidata nd; + error = vn_pathconf(nd.ni_vp, uap->name, retval, &context); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) - return (error); - error = VOP_PATHCONF(nd.ni_vp, uap->name, retval); - vput(nd.ni_vp); + vnode_put(nd.ni_vp); + nameidone(&nd); return (error); } /* * Return target name of a symbolic link. */ -struct readlink_args { - char *path; - char *buf; - int count; -}; /* ARGSUSED */ int readlink(p, uap, retval) @@ -1975,212 +2636,341 @@ readlink(p, uap, retval) register_t *retval; { register struct vnode *vp; - struct iovec aiov; - struct uio auio; + uio_t auio; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; int error; struct nameidata nd; + struct vfs_context context; + char uio_buf[ UIO_SIZEOF(1) ]; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); + NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; + + nameidone(&nd); + + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->buf, uap->count); if (vp->v_type != VLNK) error = EINVAL; else { - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - error = VOP_READLINK(vp, &auio, p->p_ucred); - } - vput(vp); - *retval = uap->count - auio.uio_resid; + error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &context); + if (error == 0) + error = VNOP_READLINK(vp, auio, &context); + } + vnode_put(vp); + // LP64todo - fix this + *retval = uap->count - (int)uio_resid(auio); return (error); } +/* + * Change file flags. + */ +static int +chflags1(vnode_t vp, int flags, vfs_context_t ctx) +{ + struct vnode_attr va; + kauth_action_t action; + int error; + + VATTR_INIT(&va); + VATTR_SET(&va, va_flags, flags); + + /* request authorisation, disregard immutability */ + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + /* + * Request that the auth layer disregard those file flags it's allowed to when + * authorizing this operation; we need to do this in order to be able to + * clear immutable flags. + */ + if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); + +out: + vnode_put(vp); + return(error); +} + /* * Change flags of a file given a path name. */ -struct chflags_args { - char *path; - int flags; -}; /* ARGSUSED */ int -chflags(p, uap, retval) - struct proc *p; - register struct chflags_args *uap; - register_t *retval; +chflags(struct proc *p, register struct chflags_args *uap, __unused register_t *retval) { register struct vnode *vp; - struct vattr vattr; + struct vfs_context context; int error; struct nameidata nd; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + AUDIT_ARG(fflags, uap->flags); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - vput(vp); - return (error); + nameidone(&nd); + + error = chflags1(vp, uap->flags, &context); + + return(error); } /* * Change flags of a file given a file descriptor. */ -struct fchflags_args { - int fd; - int flags; -}; /* ARGSUSED */ int -fchflags(p, uap, retval) - struct proc *p; - register struct fchflags_args *uap; - register_t *retval; +fchflags(struct proc *p, register struct fchflags_args *uap, __unused register_t *retval) { - struct vattr vattr; + struct vfs_context context; struct vnode *vp; - struct file *fp; int error; AUDIT_ARG(fd, uap->fd); AUDIT_ARG(fflags, uap->flags); - if (error = getvnode(p, uap->fd, &fp)) + if ( (error = file_vnode(uap->fd, &vp)) ) return (error); - vp = (struct vnode *)fp->f_data; - - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if ((error = vnode_getwithref(vp))) { + file_drop(uap->fd); + return(error); + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); - VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp, 0, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + error = chflags1(vp, uap->flags, &context); + + file_drop(uap->fd); + return (error); +} + +/* + * Change security information on a filesystem object. + */ +static int +chmod2(vfs_context_t ctx, struct vnode *vp, struct vnode_attr *vap) +{ + kauth_action_t action; + int error; + + AUDIT_ARG(mode, (mode_t)vap->va_mode); +#warning XXX audit new args + + /* make sure that the caller is allowed to set this security information */ + if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) || + ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { + if (error == EACCES) + error = EPERM; + return(error); + } + + error = vnode_setattr(vp, vap, ctx); + return (error); } + /* * Change mode of a file given path name. */ -struct chmod_args { - char *path; - int mode; -}; -/* ARGSUSED */ +static int +chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) +{ + struct nameidata nd; + int error; + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); + if ((error = namei(&nd))) + return (error); + error = chmod2(ctx, nd.ni_vp, vap); + vnode_put(nd.ni_vp); + nameidone(&nd); + return(error); +} + int -chmod(p, uap, retval) - struct proc *p; - register struct chmod_args *uap; - register_t *retval; +chmod_extended(struct proc *p, struct chmod_extended_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct vattr vattr; + struct vfs_context context; int error; - struct nameidata nd; + struct vnode_attr va; + kauth_filesec_t xsecdst; + + VATTR_INIT(&va); + if (uap->mode != -1) + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + + xsecdst = NULL; + switch(uap->xsecurity) { + /* explicit remove request */ + case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */ + VATTR_SET(&va, va_acl, NULL); + break; + /* not being set */ + case USER_ADDR_NULL: + break; + default: + if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return(error); + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount); + } + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - AUDIT_ARG(mode, (mode_t)uap->mode); + error = chmod1(&context, uap->path, &va); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) - return (error); - vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + return(error); +} - vput(vp); - return (error); +int +chmod(struct proc *p, register struct chmod_args *uap, __unused register_t *retval) +{ + struct vfs_context context; + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + return(chmod1(&context, uap->path, &va)); } /* * Change mode of a file given a file descriptor. */ -struct fchmod_args { - int fd; - int mode; -}; -/* ARGSUSED */ -int -fchmod(p, uap, retval) - struct proc *p; - register struct fchmod_args *uap; - register_t *retval; +static int +fchmod1(struct proc *p, int fd, struct vnode_attr *vap) { - struct vattr vattr; struct vnode *vp; - struct file *fp; int error; + struct vfs_context context; - AUDIT_ARG(fd, uap->fd); - AUDIT_ARG(mode, (mode_t)uap->mode); - if (error = getvnode(p, uap->fd, &fp)) - return (error); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + AUDIT_ARG(fd, fd); + if ((error = file_vnode(fd, &vp)) != 0) + return (error); + if ((error = vnode_getwithref(vp)) != 0) { + file_drop(fd); + return(error); + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); - VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; - AUDIT_ARG(mode, (mode_t)vattr.va_mode); - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - - VOP_UNLOCK(vp, 0, p); + error = chmod2(&context, vp, vap); + (void)vnode_put(vp); + file_drop(fd); return (error); } +int +fchmod_extended(struct proc *p, struct fchmod_extended_args *uap, __unused register_t *retval) +{ + int error; + struct vnode_attr va; + kauth_filesec_t xsecdst; + + VATTR_INIT(&va); + if (uap->mode != -1) + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + if (uap->uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, uap->gid); + + xsecdst = NULL; + switch(uap->xsecurity) { + case USER_ADDR_NULL: + VATTR_SET(&va, va_acl, NULL); + break; + case CAST_USER_ADDR_T(-1): + break; + default: + if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return(error); + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + } + + error = fchmod1(p, uap->fd, &va); + + + switch(uap->xsecurity) { + case USER_ADDR_NULL: + case CAST_USER_ADDR_T(-1): + break; + default: + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + } + return(error); +} + +int +fchmod(struct proc *p, register struct fchmod_args *uap, __unused register_t *retval) +{ + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); + + return(fchmod1(p, uap->fd, &va)); +} + + /* * Set ownership given a path name. */ -struct chown_args { - char *path; - int uid; - int gid; -}; /* ARGSUSED */ -int -chown(p, uap, retval) - struct proc *p; - register struct chown_args *uap; - register_t *retval; +static int +chown1(vfs_context_t ctx, register struct chown_args *uap, __unused register_t *retval, int follow) { register struct vnode *vp; - struct vattr vattr; + struct vnode_attr va; int error; struct nameidata nd; + kauth_action_t action; AUDIT_ARG(owner, uap->uid, uap->gid); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, LOOKUP, (follow ? FOLLOW : 0) | AUDITVNPATH1, + UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; + nameidone(&nd); + /* * XXX A TEMPORARY HACK FOR NOW: Try to track console_user * by looking for chown() calls on /dev/console from a console process. @@ -2190,73 +2980,130 @@ chown(p, uap, retval) (minor(vp->v_specinfo->si_rdev) == 0)) { console_user = uap->uid; }; + VATTR_INIT(&va); + if (uap->uid != VNOVAL) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != VNOVAL) + VATTR_SET(&va, va_gid, uap->gid); + + /* preflight and authorize attribute changes */ + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); + +out: + /* + * EACCES is only allowed from namei(); permissions failure should + * return EPERM, so we need to translate the error code. + */ + if (error == EACCES) + error = EPERM; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - - vput(vp); + vnode_put(vp); return (error); } +int +chown(struct proc *p, register struct chown_args *uap, register_t *retval) +{ + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + return chown1(&context, uap, retval, 1); +} + +int +lchown(struct proc *p, register struct lchown_args *uap, register_t *retval) +{ + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + /* Argument list identical, but machine generated; cast for chown1() */ + return chown1(&context, (struct chown_args *)uap, retval, 0); +} + /* * Set ownership given a file descriptor. */ -struct fchown_args { - int fd; - int uid; - int gid; -}; /* ARGSUSED */ int -fchown(p, uap, retval) - struct proc *p; - register struct fchown_args *uap; - register_t *retval; +fchown(struct proc *p, register struct fchown_args *uap, __unused register_t *retval) { - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; struct vnode *vp; - struct file *fp; int error; + kauth_action_t action; AUDIT_ARG(owner, uap->uid, uap->gid); AUDIT_ARG(fd, uap->fd); - if (error = getvnode(p, uap->fd, &fp)) + if ( (error = file_vnode(uap->fd, &vp)) ) return (error); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } AUDIT_ARG(vnpath, vp, ARG_VNODE1); - VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + VATTR_INIT(&va); + if (uap->uid != VNOVAL) + VATTR_SET(&va, va_uid, uap->uid); + if (uap->gid != VNOVAL) + VATTR_SET(&va, va_gid, uap->gid); + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + /* preflight and authorize attribute changes */ + if ((error = vnode_authattr(vp, &va, &action, &context)) != 0) + goto out; + if (action && ((error = vnode_authorize(vp, NULL, action, &context)) != 0)) { + if (error == EACCES) + error = EPERM; + goto out; + } + error = vnode_setattr(vp, &va, &context); - VOP_UNLOCK(vp, 0, p); +out: + (void)vnode_put(vp); + file_drop(uap->fd); return (error); } static int getutimes(usrtvp, tsp) - const struct timeval *usrtvp; + user_addr_t usrtvp; struct timespec *tsp; { - struct timeval tv[2]; + struct user_timeval tv[2]; int error; - if (usrtvp == NULL) { - microtime(&tv[0]); - TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + if (usrtvp == USER_ADDR_NULL) { + struct timeval old_tv; + /* XXX Y2038 bug because of microtime argument */ + microtime(&old_tv); + TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]); tsp[1] = tsp[0]; } else { - if ((error = copyin((void *)usrtvp, (void *)tv, sizeof (tv))) != 0) + if (IS_64BIT_PROCESS(current_proc())) { + error = copyin(usrtvp, (void *)tv, sizeof(tv)); + } else { + struct timeval old_tv[2]; + error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv)); + tv[0].tv_sec = old_tv[0].tv_sec; + tv[0].tv_usec = old_tv[0].tv_usec; + tv[1].tv_sec = old_tv[1].tv_sec; + tv[1].tv_usec = old_tv[1].tv_usec; + } + if (error) return (error); TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); @@ -2265,30 +3112,28 @@ getutimes(usrtvp, tsp) } static int -setutimes(p, vp, ts, nullflag) - struct proc *p; - struct vnode *vp; - const struct timespec *ts; - int nullflag; +setutimes(vfs_context_t ctx, struct vnode *vp, const struct timespec *ts, + int nullflag) { int error; - struct vattr vattr; - - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) - goto out; + struct vnode_attr va; + kauth_action_t action; AUDIT_ARG(vnpath, vp, ARG_VNODE1); - VATTR_NULL(&vattr); - vattr.va_atime = ts[0]; - vattr.va_mtime = ts[1]; + VATTR_INIT(&va); + VATTR_SET(&va, va_access_time, ts[0]); + VATTR_SET(&va, va_modify_time, ts[1]); if (nullflag) - vattr.va_vaflags |= VA_UTIMES_NULL; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); + va.va_vaflags |= VA_UTIMES_NULL; + + if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) + goto out; + /* since we may not need to auth anything, check here */ + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) + goto out; + error = vnode_setattr(vp, &va, ctx); - VOP_UNLOCK(vp, 0, p); out: return error; } @@ -2296,121 +3141,119 @@ out: /* * Set the access and modification times of a file. */ -struct utimes_args { - char *path; - struct timeval *tptr; -}; /* ARGSUSED */ int -utimes(p, uap, retval) - struct proc *p; - register struct utimes_args *uap; - register_t *retval; +utimes(struct proc *p, register struct utimes_args *uap, __unused register_t *retval) { struct timespec ts[2]; - struct timeval *usrtvp; + user_addr_t usrtvp; int error; struct nameidata nd; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* AUDIT: Needed to change the order of operations to do the * name lookup first because auditing wants the path. */ - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); + nameidone(&nd); + /* + * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch + * the current time instead. + */ usrtvp = uap->tptr; - if ((error = getutimes(usrtvp, ts)) != 0) { - vrele(nd.ni_vp); - return (error); - } - error = setutimes(p, nd.ni_vp, ts, usrtvp == NULL); - vrele(nd.ni_vp); + if ((error = getutimes(usrtvp, ts)) != 0) + goto out; + + error = setutimes(&context, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL); + +out: + vnode_put(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ -struct futimes_args { - int fd; - struct timeval *tptr; -}; /* ARGSUSED */ int -futimes(p, uap, retval) - struct proc *p; - register struct futimes_args *uap; - register_t *retval; +futimes(struct proc *p, register struct futimes_args *uap, __unused register_t *retval) { struct timespec ts[2]; - struct file *fp; - struct timeval *usrtvp; + struct vnode *vp; + user_addr_t usrtvp; int error; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); AUDIT_ARG(fd, uap->fd); usrtvp = uap->tptr; if ((error = getutimes(usrtvp, ts)) != 0) return (error); - if ((error = getvnode(p, uap->fd, &fp)) != 0) + if ((error = file_vnode(uap->fd, &vp)) != 0) return (error); + if((error = vnode_getwithref(vp))) { + file_drop(uap->fd); + return(error); + } - return setutimes(p, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + error = setutimes(&context, vp, ts, usrtvp == 0); + vnode_put(vp); + file_drop(uap->fd); + return(error); } /* * Truncate a file given its path name. */ -struct truncate_args { - char *path; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t length; -}; /* ARGSUSED */ int -truncate(p, uap, retval) - struct proc *p; - register struct truncate_args *uap; - register_t *retval; +truncate(struct proc *p, register struct truncate_args *uap, __unused register_t *retval) { register struct vnode *vp; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; int error; struct nameidata nd; + kauth_action_t action; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); if (uap->length < 0) return(EINVAL); - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); - if (error = namei(&nd)) + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); + if ((error = namei(&nd))) return (error); vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) - error = EISDIR; - else if ((error = vn_writechk(vp)) == 0 && - (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { - VATTR_NULL(&vattr); - vattr.va_size = uap->length; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - } - vput(vp); + + nameidone(&nd); + + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, uap->length); + if ((error = vnode_authattr(vp, &va, &action, &context)) != 0) + goto out; + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, &context)) != 0)) + goto out; + error = vnode_setattr(vp, &va, &context); +out: + vnode_put(vp); return (error); } /* * Truncate a file given a file descriptor. */ -struct ftruncate_args { - int fd; -#ifdef DOUBLE_ALIGN_PARAMS - int pad; -#endif - off_t length; -}; /* ARGSUSED */ int ftruncate(p, uap, retval) @@ -2418,149 +3261,103 @@ ftruncate(p, uap, retval) register struct ftruncate_args *uap; register_t *retval; { - struct vattr vattr; + struct vfs_context context; + struct vnode_attr va; struct vnode *vp; - struct file *fp; - int error; + struct fileproc *fp; + int error ; + int fd = uap->fd; + context.vc_proc = current_proc(); + context.vc_ucred = kauth_cred_get(); + AUDIT_ARG(fd, uap->fd); if (uap->length < 0) return(EINVAL); - if (error = fdgetf(p, uap->fd, &fp)) - return (error); + if ( (error = fp_lookup(p,fd,&fp,0)) ) { + return(error); + } - if (fp->f_type == DTYPE_PSXSHM) { - return(pshm_truncate(p, fp, uap->fd, uap->length, retval)); + if (fp->f_fglob->fg_type == DTYPE_PSXSHM) { + error = pshm_truncate(p, fp, uap->fd, uap->length, retval); + goto out; + } + if (fp->f_fglob->fg_type != DTYPE_VNODE) { + error = EINVAL; + goto out; } - if (fp->f_type != DTYPE_VNODE) - return (EINVAL); - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + vp = (struct vnode *)fp->f_fglob->fg_data; - if ((fp->f_flag & FWRITE) == 0) - return (EINVAL); - vp = (struct vnode *)fp->f_data; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) - error = EISDIR; - else if ((error = vn_writechk(vp)) == 0) { - VATTR_NULL(&vattr); - vattr.va_size = uap->length; - error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); + if ((fp->f_fglob->fg_flag & FWRITE) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EINVAL; + goto out; } - VOP_UNLOCK(vp, 0, p); - return (error); -} - -#if COMPAT_43 -/* - * Truncate a file given its path name. - */ -struct otruncate_args { - char *path; - long length; -}; -/* ARGSUSED */ -int -otruncate(p, uap, retval) - struct proc *p; - register struct otruncate_args *uap; - register_t *retval; -{ - struct truncate_args /* { - syscallarg(char *) path; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; -#endif - syscallarg(off_t) length; - } */ nuap; - nuap.path = uap->path; - nuap.length = uap->length; - return (truncate(p, &nuap, retval)); -} + if ((error = vnode_getwithref(vp)) != 0) { + goto out; + } -/* - * Truncate a file given a file descriptor. - */ -struct oftruncate_args { - int fd; - long length; -}; -/* ARGSUSED */ -int -oftruncate(p, uap, retval) - struct proc *p; - register struct oftruncate_args *uap; - register_t *retval; -{ - struct ftruncate_args /* { - syscallarg(int) fd; -#ifdef DOUBLE_ALIGN_PARAMS - syscallarg(int) pad; -#endif - syscallarg(off_t) length; - } */ nuap; + AUDIT_ARG(vnpath, vp, ARG_VNODE1); - nuap.fd = uap->fd; - nuap.length = uap->length; - return (ftruncate(p, &nuap, retval)); + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, uap->length); + error = vnode_setattr(vp, &va, &context); + (void)vnode_put(vp); +out: + file_drop(fd); + return (error); } -#endif /* COMPAT_43 */ + /* * Sync an open file. */ -struct fsync_args { - int fd; -}; /* ARGSUSED */ int -fsync(p, uap, retval) - struct proc *p; - struct fsync_args *uap; - register_t *retval; +fsync(struct proc *p, struct fsync_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct file *fp; + struct vnode *vp; + struct fileproc *fp; + struct vfs_context context; int error; - if (error = getvnode(p, uap->fd, &fp)) + if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) ) return (error); - if (fref(fp) == -1) - return (EBADF); - vp = (struct vnode *)fp->f_data; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp, 0, p); - frele(fp); + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + context.vc_proc = p; + context.vc_ucred = fp->f_fglob->fg_cred; + + error = VNOP_FSYNC(vp, MNT_WAIT, &context); + + (void)vnode_put(vp); + file_drop(uap->fd); return (error); } /* * Duplicate files. Source must be a file, target must be a file or * must not exist. + * + * XXX Copyfile authorisation checking is woefully inadequate, and will not + * perform inheritance correctly. */ - -struct copyfile_args { - char *from; - char *to; - int mode; - int flags; -}; /* ARGSUSED */ int -copyfile(p, uap, retval) - struct proc *p; - register struct copyfile_args *uap; - register_t *retval; +copyfile(struct proc *p, register struct copyfile_args *uap, __unused register_t *retval) { - register struct vnode *tvp, *fvp, *tdvp; - register struct ucred *cred = p->p_ucred; + vnode_t tvp, fvp, tdvp, sdvp; struct nameidata fromnd, tond; int error; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); /* Check that the flags are valid. */ @@ -2569,32 +3366,31 @@ copyfile(p, uap, retval) } NDINIT(&fromnd, LOOKUP, SAVESTART | AUDITVNPATH1, - UIO_USERSPACE, uap->from, p); - if (error = namei(&fromnd)) + UIO_USERSPACE, uap->from, &context); + if ((error = namei(&fromnd))) return (error); fvp = fromnd.ni_vp; - NDINIT(&tond, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2, - UIO_USERSPACE, uap->to, p); - if (error = namei(&tond)) { - vrele(fvp); + NDINIT(&tond, CREATE, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2, + UIO_USERSPACE, uap->to, &context); + if ((error = namei(&tond))) { goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; + if (tvp != NULL) { if (!(uap->flags & CPF_OVERWRITE)) { error = EEXIST; goto out; } } - if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) { error = EISDIR; goto out; } - if (error = VOP_ACCESS(tdvp, VWRITE, cred, p)) + if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, &context)) != 0) goto out; if (fvp == tdvp) @@ -2606,121 +3402,151 @@ copyfile(p, uap, retval) */ if (fvp == tvp) error = -1; + if (!error) + error = VNOP_COPYFILE(fvp,tdvp,tvp,&tond.ni_cnd,uap->mode,uap->flags,&context); out: - if (!error) { - error = VOP_COPYFILE(fvp,tdvp,tvp,&tond.ni_cnd,uap->mode,uap->flags); - } else { - VOP_ABORTOP(tdvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - vrele(fvp); - } - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); + sdvp = tond.ni_startdir; + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + vnode_put(sdvp); out1: + vnode_put(fvp); + if (fromnd.ni_startdir) - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); + vnode_put(fromnd.ni_startdir); + nameidone(&fromnd); + if (error == -1) return (0); return (error); } + /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ -struct rename_args { - char *from; - char *to; -}; /* ARGSUSED */ int -rename(p, uap, retval) - struct proc *p; - register struct rename_args *uap; - register_t *retval; +rename(proc_t p, register struct rename_args *uap, __unused register_t *retval) { - register struct vnode *tvp, *fvp, *tdvp; + vnode_t tvp, tdvp; + vnode_t fvp, fdvp; struct nameidata fromnd, tond; + struct vfs_context context; int error; int mntrename; - int casesense,casepres; - char *nameptr=NULL, *oname; - struct vnode *oparent; + char *oname, *from_name, *to_name; + int from_len, to_len; + int holding_mntlock; + mount_t locked_mp = NULL; + vnode_t oparent; + fse_info from_finfo, to_finfo; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + holding_mntlock = 0; +retry: + fvp = tvp = NULL; + fdvp = tdvp = NULL; mntrename = FALSE; - bwillwrite(); - NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNPATH1, - UIO_USERSPACE, uap->from, p); - error = namei(&fromnd); - if (error) - return (error); - fvp = fromnd.ni_vp; + NDINIT(&fromnd, DELETE, WANTPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->from, &context); + + if ( (error = namei(&fromnd)) ) + goto out1; + fdvp = fromnd.ni_dvp; + fvp = fromnd.ni_vp; - NDINIT(&tond, RENAME, - LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2, - UIO_USERSPACE, uap->to, p); - if (fromnd.ni_vp->v_type == VDIR) + NDINIT(&tond, RENAME, WANTPARENT | AUDITVNPATH2, UIO_USERSPACE, uap->to, &context); + if (fvp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; - if (error = namei(&tond)) { - /* Translate error code for rename("dir1", "dir2/."). */ - if (error == EISDIR && fvp->v_type == VDIR) - error = EINVAL; - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out2; + + if ( (error = namei(&tond)) ) { + /* + * Translate error code for rename("dir1", "dir2/."). + */ + if (error == EISDIR && fvp->v_type == VDIR) + error = EINVAL; + goto out1; } tdvp = tond.ni_dvp; - tvp = tond.ni_vp; + tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; - goto out; + goto out1; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; - goto out; + goto out1; } } - if (fvp == tdvp) + if (fvp == tdvp) { error = EINVAL; + goto out1; + } + /* - * If source is the same as the destination (that is the - * same inode number) then there is nothing to do... EXCEPT if the - * underlying file system supports case insensitivity and is case - * preserving. Then a special case is made, i.e. foo -> Foo. + * Authorization. * - * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE - * and _PC_CASE_PRESERVING can have this exception, and they need to - * handle the special case of getting the same vnode as target and - * source. NOTE: Then the target is unlocked going into VOP_RENAME, - * so not to cause locking problems. There is a single reference on tvp. + * If tvp is a directory and not the same as fdvp, or tdvp is not the same as fdvp, + * the node is moving between directories and we need rights to remove from the + * old and add to the new. * - * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE - * that correct behaviour then is just to remove the source (link) + * If tvp already exists and is not a directory, we need to be allowed to delete it. + * + * Note that we do not inherit when renaming. XXX this needs to be revisited to + * implement the deferred-inherit bit. */ - if (fvp == tvp && fromnd.ni_dvp == tdvp) { - if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && - !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, - fromnd.ni_cnd.cn_namelen)) { - error = -1; /* Default "unix" behavior */ - } else { /* probe for file system specifics */ - if (VOP_PATHCONF(tdvp, _PC_CASE_SENSITIVE, &casesense)) - casesense = 1; - if (VOP_PATHCONF(tdvp, _PC_CASE_PRESERVING, &casepres)) - casepres = 1; - if (!casesense && casepres) - vput(tvp); /* Unlock target and drop ref */ + { + int moving = 0; + + error = 0; + if ((tvp != NULL) && vnode_isdir(tvp)) { + if (tvp != fdvp) + moving = 1; + } else if (tdvp != fdvp) { + moving = 1; + } + /* + * must have delete rights to remove the old name even in the simple case of + * fdvp == tdvp + */ + if ((error = vnode_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, &context)) != 0) + goto auth_exit; + if (moving) { + /* moving into tdvp or tvp, must have rights to add */ + if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp, + NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + &context)) != 0) + goto auth_exit; + } else { + /* node staying in same directory, must be allowed to add new name */ + if ((error = vnode_authorize(fdvp, NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, &context)) != 0) + goto auth_exit; } + /* overwriting tvp */ + if ((tvp != NULL) && !vnode_isdir(tvp) && + ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, &context)) != 0)) + goto auth_exit; + + /* XXX more checks? */ + +auth_exit: + /* authorization denied */ + if (error != 0) + goto out1; } - /* * Allow the renaming of mount points. * - target must not exist @@ -2728,534 +3554,608 @@ rename(p, uap, retval) * - union mounts cannot be renamed * - "/" cannot be renamed */ - if (!error && - (fvp->v_flag & VROOT) && + if ((fvp->v_flag & VROOT) && (fvp->v_type == VDIR) && (tvp == NULL) && (fvp->v_mountedhere == NULL) && - (fromnd.ni_dvp == tond.ni_dvp) && + (fdvp == tdvp) && ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) && (fvp->v_mount->mnt_vnodecovered != NULLVP)) { + struct vnode *coveredvp; /* switch fvp to the covered vnode */ - fromnd.ni_vp = fvp->v_mount->mnt_vnodecovered; - vrele(fvp); - fvp = fromnd.ni_vp; - VREF(fvp); + coveredvp = fvp->v_mount->mnt_vnodecovered; + if ( (vnode_getwithref(coveredvp)) ) { + error = ENOENT; + goto out1; + } + vnode_put(fvp); + + fvp = coveredvp; mntrename = TRUE; } -out: - if (!error) { - VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) - VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (tvp) - VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); + /* + * Check for cross-device rename. + */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; + goto out1; + } + /* + * Avoid renaming "." and "..". + */ + if (fvp->v_type == VDIR && + ((fdvp == fvp) || + (fromnd.ni_cnd.cn_namelen == 1 && fromnd.ni_cnd.cn_nameptr[0] == '.') || + ((fromnd.ni_cnd.cn_flags | tond.ni_cnd.cn_flags) & ISDOTDOT)) ) { + error = EINVAL; + goto out1; + } + /* + * The following edge case is caught here: + * (to cannot be a descendent of from) + * + * o fdvp + * / + * / + * o fvp + * \ + * \ + * o tdvp + * / + * / + * o tvp + */ + if (tdvp->v_parent == fvp) { + error = EINVAL; + goto out1; + } - // XXXdbg - so that the fs won't block when it vrele()'s - // these nodes before returning - if (fromnd.ni_dvp != tdvp) { - vget(tdvp, 0, p); + /* + * If source is the same as the destination (that is the + * same inode number) then there is nothing to do... + * EXCEPT if the underlying file system supports case + * insensitivity and is case preserving. In this case + * the file system needs to handle the special case of + * getting the same vnode as target (fvp) and source (tvp). + * + * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE + * and _PC_CASE_PRESERVING can have this exception, and they need to + * handle the special case of getting the same vnode as target and + * source. NOTE: Then the target is unlocked going into vnop_rename, + * so not to cause locking problems. There is a single reference on tvp. + * + * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE + * that correct behaviour then is just to remove the source (link) + */ + if (fvp == tvp && fdvp == tdvp) { + if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) { + goto out1; } - - // save these off so we can later verify that fvp is the same - oname = VNAME(fvp); - oparent = VPARENT(fvp); + } - nameptr = add_name(tond.ni_cnd.cn_nameptr, - tond.ni_cnd.cn_namelen, - tond.ni_cnd.cn_hash, 0); + if (holding_mntlock && fvp->v_mount != locked_mp) { + /* + * we're holding a reference and lock + * on locked_mp, but it no longer matches + * what we want to do... so drop our hold + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp != fdvp && fvp->v_type == VDIR) { + /* + * serialize renames that re-shape + * the tree... if holding_mntlock is + * set, then we're ready to go... + * otherwise we + * first need to drop the iocounts + * we picked up, second take the + * lock to serialize the access, + * then finally start the lookup + * process over with the lock held + */ + if (!holding_mntlock) { + /* + * need to grab a reference on + * the mount point before we + * drop all the iocounts... once + * the iocounts are gone, the mount + * could follow + */ + locked_mp = fvp->v_mount; + mount_ref(locked_mp, 0); + /* + * nameidone has to happen before we vnode_put(tvp) + * since it may need to release the fs_nodelock on the tvp + */ + nameidone(&tond); - error = VOP_RENAME(fromnd.ni_dvp, fvp, &fromnd.ni_cnd, - tond.ni_dvp, tvp, &tond.ni_cnd); - if (error) { - remove_name(nameptr); - nameptr = NULL; - if (fromnd.ni_dvp != tdvp) { - vrele(tdvp); - } + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fvp + */ + nameidone(&fromnd); - goto out1; + vnode_put(fvp); + vnode_put(fdvp); + + mount_lock_renames(locked_mp); + holding_mntlock = 1; + + goto retry; } - - /* - * update filesystem's mount point data + } else { + /* + * when we dropped the iocounts to take + * the lock, we allowed the identity of + * the various vnodes to change... if they did, + * we may no longer be dealing with a rename + * that reshapes the tree... once we're holding + * the iocounts, the vnodes can't change type + * so we're free to drop the lock at this point + * and continue on */ - if (mntrename) { - char *cp, *pathend, *mpname; - char * tobuf; - struct mount *mp; - int maxlen; - size_t len = 0; - - VREF(fvp); - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); - mp = fvp->v_mountedhere; - - if (vfs_busy(mp, LK_NOWAIT, 0, p)) { - vput(fvp); - error = EBUSY; - goto out1; - } - VOP_UNLOCK(fvp, 0, p); - - MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); - if (!error) { - /* find current mount point prefix */ - pathend = &mp->mnt_stat.f_mntonname[0]; - for (cp = pathend; *cp != '\0'; ++cp) { - if (*cp == '/') - pathend = cp + 1; - } - /* find last component of target name */ - for (mpname = cp = tobuf; *cp != '\0'; ++cp) { - if (*cp == '/') - mpname = cp + 1; - } - /* append name to prefix */ - maxlen = MNAMELEN - (pathend - mp->mnt_stat.f_mntonname); - bzero(pathend, maxlen); - strncpy(pathend, mpname, maxlen - 1); - } - FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI); - - vrele(fvp); - vfs_unbusy(mp, p); + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; } + } + // save these off so we can later verify that fvp is the same + oname = fvp->v_name; + oparent = fvp->v_parent; + if (need_fsevent(FSE_RENAME, fvp)) { + get_fse_info(fvp, &from_finfo, &context); - // fix up name & parent pointers. note that we first - // check that fvp has the same name/parent pointers it - // had before the rename call and then we lock fvp so - // that it won't go away on us when we hit blocking - // points like remove_name() or vrele() where fvp could - // get recycled. - if (oname == VNAME(fvp) && oparent == VPARENT(fvp) && vget(fvp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) { - if (VNAME(fvp)) { - char *tmp = VNAME(fvp); - VNAME(fvp) = NULL; - remove_name(tmp); - } + if (tvp) { + get_fse_info(tvp, &to_finfo, &context); + } + from_name = get_pathbuff(); + from_len = MAXPATHLEN; + vn_getpath(fvp, from_name, &from_len); - VNAME(fvp) = nameptr; - nameptr = NULL; - - if (fromnd.ni_dvp != tdvp) { - struct vnode *tmpvp; - - tmpvp = VPARENT(fvp); - VPARENT(fvp) = NULL; - vrele(tmpvp); + to_name = get_pathbuff(); + to_len = MAXPATHLEN; + + if (tvp && tvp->v_type != VDIR) { + vn_getpath(tvp, to_name, &to_len); + } else { + vn_getpath(tdvp, to_name, &to_len); + // if the path is not just "/", then append a "/" + if (to_len > 2) { + to_name[to_len-1] = '/'; + } else { + to_len--; + } + strcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr); + to_len += tond.ni_cnd.cn_namelen + 1; + to_name[to_len] = '\0'; + } + } else { + from_name = NULL; + to_name = NULL; + } + error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd, + tdvp, tvp, &tond.ni_cnd, + &context); - VPARENT(fvp) = tdvp; + if (holding_mntlock) { + /* + * we can drop our serialization + * lock now + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (error) { + if (to_name != NULL) + release_pathbuff(to_name); + if (from_name != NULL) + release_pathbuff(from_name); + from_name = to_name = NULL; - // note: we don't vrele() tdvp because we want to keep - // the reference until fvp gets recycled - } - - vput(fvp); - + goto out1; + } + + /* call out to allow 3rd party notification of rename. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_RENAME, + (uintptr_t)from_name, (uintptr_t)to_name); + + if (from_name != NULL && to_name != NULL) { + if (tvp) { + add_fsevent(FSE_RENAME, &context, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_FINFO, &to_finfo, + FSE_ARG_DONE); } else { - // if fvp isn't kosher anymore and we locked tdvp, - // release tdvp - if (fromnd.ni_dvp != tdvp) { - vrele(tdvp); - } - remove_name(nameptr); - nameptr = NULL; + add_fsevent(FSE_RENAME, &context, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_DONE); + } + } + if (to_name != NULL) + release_pathbuff(to_name); + if (from_name != NULL) + release_pathbuff(from_name); + from_name = to_name = NULL; + + /* + * update filesystem's mount point data + */ + if (mntrename) { + char *cp, *pathend, *mpname; + char * tobuf; + struct mount *mp; + int maxlen; + size_t len = 0; + + mp = fvp->v_mountedhere; + + if (vfs_busy(mp, LK_NOWAIT)) { + error = EBUSY; + goto out1; } + MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - } else { - VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); - if (tvp) - vput(tvp); - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); + if (!error) { + /* find current mount point prefix */ + pathend = &mp->mnt_vfsstat.f_mntonname[0]; + for (cp = pathend; *cp != '\0'; ++cp) { + if (*cp == '/') + pathend = cp + 1; + } + /* find last component of target name */ + for (mpname = cp = tobuf; *cp != '\0'; ++cp) { + if (*cp == '/') + mpname = cp + 1; + } + /* append name to prefix */ + maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname); + bzero(pathend, maxlen); + strncpy(pathend, mpname, maxlen - 1); + } + FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI); + + vfs_unbusy(mp); + } + /* + * fix up name & parent pointers. note that we first + * check that fvp has the same name/parent pointers it + * had before the rename call... this is a 'weak' check + * at best... + */ + if (oname == fvp->v_name && oparent == fvp->v_parent) { + int update_flags; + + update_flags = VNODE_UPDATE_NAME; + + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + + vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags); } out1: - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); -out2: - if (fromnd.ni_startdir) - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); - if (error == -1) - return (0); + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + } + if (tdvp) { + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&tond); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + } + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromnd); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + } return (error); } /* * Make a directory file. */ -struct mkdir_args { - char *path; - int mode; -}; /* ARGSUSED */ -int -mkdir(p, uap, retval) - struct proc *p; - register struct mkdir_args *uap; - register_t *retval; +static int +mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) { - register struct vnode *vp; - struct vattr vattr; + vnode_t vp, dvp; int error; + int update_flags = 0; struct nameidata nd; - char *nameptr; - AUDIT_ARG(mode, (mode_t)uap->mode); - bwillwrite(); - NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + AUDIT_ARG(mode, vap->va_mode); + NDINIT(&nd, CREATE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, path, ctx); nd.ni_cnd.cn_flags |= WILLBEDIR; error = namei(&nd); if (error) return (error); + dvp = nd.ni_dvp; vp = nd.ni_vp; - if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); - return (EEXIST); - } - VATTR_NULL(&vattr); - vattr.va_type = VDIR; - vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - nameptr = add_name(nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, 0); + if (vp != NULL) { + error = EEXIST; + goto out; + } + + /* authorize addition of a directory to the parent */ + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx)) != 0) + goto out; + + VATTR_SET(vap, va_type, VDIR); + + /* make the directory */ + if ((error = vn_create(dvp, &vp, &nd.ni_cnd, vap, 0, ctx)) != 0) + goto out; + + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); - error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (!error) { - VNAME(nd.ni_vp) = nameptr; - if (VPARENT(nd.ni_vp) == NULL && vget(nd.ni_dvp, 0, p) == 0) { - VPARENT(nd.ni_vp) = nd.ni_dvp; - } + add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + if (vp) + vnode_put(vp); + vnode_put(dvp); - vput(nd.ni_vp); - } return (error); } -/* - * Remove a directory file. - */ -struct rmdir_args { - char *path; -}; -/* ARGSUSED */ + int -rmdir(p, uap, retval) - struct proc *p; - struct rmdir_args *uap; - register_t *retval; +mkdir_extended(struct proc *p, register struct mkdir_extended_args *uap, __unused register_t *retval) { - register struct vnode *vp; - int error; - struct nameidata nd; - - bwillwrite(); - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) - return (error); - vp = nd.ni_vp; - if (vp->v_type != VDIR) { - error = ENOTDIR; - goto out; - } - /* - * No rmdir "." please. - */ - if (nd.ni_dvp == vp) { - error = EINVAL; - goto out; - } - /* - * The root of a mounted filesystem cannot be deleted. - */ - if (vp->v_flag & VROOT) - error = EBUSY; -out: - if (!error) { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); - } - return (error); + struct vfs_context context; + int ciferror; + kauth_filesec_t xsecdst; + struct vnode_attr va; + + xsecdst = NULL; + if ((uap->xsecurity != USER_ADDR_NULL) && + ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) + return ciferror; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); + if (xsecdst != NULL) + VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); + + ciferror = mkdir1(&context, uap->path, &va); + if (xsecdst != NULL) + kauth_filesec_free(xsecdst); + return ciferror; } -#if COMPAT_43 -/* - * Read a block of directory entries in a file system independent format. - */ -struct ogetdirentries_args { - int fd; - char *buf; - u_int count; - long *basep; -}; int -ogetdirentries(p, uap, retval) - struct proc *p; - register struct ogetdirentries_args *uap; - register_t *retval; +mkdir(struct proc *p, register struct mkdir_args *uap, __unused register_t *retval) { - register struct vnode *vp; - struct file *fp; - struct uio auio, kuio; - struct iovec aiov, kiov; - struct dirent *dp, *edp; - caddr_t dirbuf; - int error, eofflag, readcnt; - long loff; + struct vfs_context context; + struct vnode_attr va; - AUDIT_ARG(fd, uap->fd); - if (error = getvnode(p, uap->fd, &fp)) - return (error); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); - if ((fp->f_flag & FREAD) == 0) - return (EBADF); - vp = (struct vnode *)fp->f_data; -unionread: - if (vp->v_type != VDIR) - return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - loff = auio.uio_offset = fp->f_offset; -# if (BYTE_ORDER != LITTLE_ENDIAN) - if (vp->v_mount->mnt_maxsymlinklen <= 0) { - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, - (int *)0, (u_long **)0); - fp->f_offset = auio.uio_offset; - } else -# endif - { - kuio = auio; - kuio.uio_iov = &kiov; - kuio.uio_segflg = UIO_SYSSPACE; - kiov.iov_len = uap->count; - MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); - kiov.iov_base = dirbuf; - error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, - (int *)0, (u_long **)0); - fp->f_offset = kuio.uio_offset; - if (error == 0) { - readcnt = uap->count - kuio.uio_resid; - edp = (struct dirent *)&dirbuf[readcnt]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - /* - * The expected low byte of - * dp->d_namlen is our dp->d_type. - * The high MBZ byte of dp->d_namlen - * is our dp->d_namlen. - */ - dp->d_type = dp->d_namlen; - dp->d_namlen = 0; -# else - /* - * The dp->d_type is the high byte - * of the expected dp->d_namlen, - * so must be zero'ed. - */ - dp->d_type = 0; -# endif - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; - } - } - if (dp >= edp) - error = uiomove(dirbuf, readcnt, &auio); - } - FREE(dirbuf, M_TEMP); - } - VOP_UNLOCK(vp, 0, p); - if (error) - return (error); + return(mkdir1(&context, uap->path, &va)); +} -#if UNION +/* + * Remove a directory file. + */ +/* ARGSUSED */ +int +rmdir(struct proc *p, struct rmdir_args *uap, __unused register_t *retval) { - extern int (**union_vnodeop_p)(void *); - extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); - - if ((uap->count == auio.uio_resid) && - (vp->v_op == union_vnodeop_p)) { - struct vnode *lvp; - - lvp = union_dircache(vp, p); - if (lvp != NULLVP) { - struct vattr va; - - /* - * If the directory is opaque, - * then don't show lower entries - */ - error = VOP_GETATTR(vp, &va, fp->f_cred, p); - if (va.va_flags & OPAQUE) { - vput(lvp); - lvp = NULL; - } - } + vnode_t vp, dvp; + int error; + struct nameidata nd; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, DELETE, LOCKPARENT | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); + error = namei(&nd); + if (error) + return (error); + dvp = nd.ni_dvp; + vp = nd.ni_vp; + + if (vp->v_type != VDIR) { + /* + * rmdir only deals with directories + */ + error = ENOTDIR; + } else if (dvp == vp) { + /* + * No rmdir "." please. + */ + error = EINVAL; + } else if (vp->v_flag & VROOT) { + /* + * The root of a mounted filesystem cannot be deleted. + */ + error = EBUSY; + } else { + error = vnode_authorize(vp, nd.ni_dvp, KAUTH_VNODE_DELETE, &context); + } + if (!error) { + char *path = NULL; + int len; + fse_info finfo; - if (lvp != NULLVP) { - error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); - if (error) { - vput(lvp); - return (error); - } - VOP_UNLOCK(lvp, 0, p); - fp->f_data = (caddr_t) lvp; - fp->f_offset = 0; - error = VOP_CLOSE(vp, FREAD, fp->f_cred, p); - vrele(vp); - if (error) - return (error); - vp = lvp; - goto unionread; + if (need_fsevent(FSE_DELETE, dvp)) { + path = get_pathbuff(); + len = MAXPATHLEN; + vn_getpath(vp, path, &len); + get_fse_info(vp, &finfo, &context); } - } -} -#endif /* UNION */ + error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, &context); - if ((uap->count == auio.uio_resid) && - (vp->v_flag & VROOT) && - (vp->v_mount->mnt_flag & MNT_UNION)) { - struct vnode *tvp = vp; - vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); - fp->f_data = (caddr_t) vp; - fp->f_offset = 0; - vrele(tvp); - goto unionread; + if (!error && path != NULL) { + add_fsevent(FSE_DELETE, &context, + FSE_ARG_STRING, len, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } + if (path != NULL) + release_pathbuff(path); } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, - sizeof(long)); - *retval = uap->count - auio.uio_resid; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&nd); + + vnode_put(dvp); + vnode_put(vp); + return (error); } -#endif /* COMPAT_43 */ + /* * Read a block of directory entries in a file system independent format. */ -struct getdirentries_args { - int fd; - char *buf; - u_int count; - long *basep; -}; int getdirentries(p, uap, retval) struct proc *p; register struct getdirentries_args *uap; register_t *retval; { - register struct vnode *vp; - struct file *fp; - struct uio auio; - struct iovec aiov; + struct vnode *vp; + struct vfs_context context; + struct fileproc *fp; + uio_t auio; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; long loff; int error, eofflag; + int fd = uap->fd; + char uio_buf[ UIO_SIZEOF(1) ]; AUDIT_ARG(fd, uap->fd); - error = getvnode(p, uap->fd, &fp); + error = fp_getfvp(p, fd, &fp, &vp); if (error) return (error); - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EBADF; + goto out; + } + if ( (error = vnode_getwithref(vp)) ) { + goto out; + } + + AUDIT_ARG(vnpath, vp, ARG_VNODE1); - if ((fp->f_flag & FREAD) == 0) - return (EBADF); - vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) - return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->count; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, - (int *)0, (u_long **)0); - fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp, 0, p); - if (error) - return (error); + if (vp->v_type != VDIR) { + (void)vnode_put(vp); + error = EINVAL; + goto out; + } + context.vc_proc = p; + context.vc_ucred = fp->f_fglob->fg_cred; + + loff = fp->f_fglob->fg_offset; + auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->buf, uap->count); + + error = VNOP_READDIR(vp, auio, 0, &eofflag, (int *)NULL, &context); + fp->f_fglob->fg_offset = uio_offset(auio); + if (error) { + (void)vnode_put(vp); + goto out; + } #if UNION { - extern int (**union_vnodeop_p)(void *); - extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); - - if ((uap->count == auio.uio_resid) && + if ((uap->count == uio_resid(auio)) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { - struct vattr va; - + struct vnode_attr va; /* * If the directory is opaque, * then don't show lower entries */ - error = VOP_GETATTR(vp, &va, fp->f_cred, p); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + error = vnode_getattr(vp, &va, &context); if (va.va_flags & OPAQUE) { - vput(lvp); + vnode_put(lvp); lvp = NULL; } } if (lvp != NULLVP) { - error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + error = VNOP_OPEN(lvp, FREAD, &context); if (error) { - vput(lvp); - return (error); + vnode_put(lvp); + goto out; } - VOP_UNLOCK(lvp, 0, p); - fp->f_data = (caddr_t) lvp; - fp->f_offset = 0; - error = VOP_CLOSE(vp, FREAD, fp->f_cred, p); - vrele(vp); + vnode_ref(lvp); + fp->f_fglob->fg_data = (caddr_t) lvp; + fp->f_fglob->fg_offset = 0; + error = VNOP_CLOSE(vp, FREAD, &context); + vnode_rele(vp); + vnode_put(vp); if (error) - return (error); + goto out; vp = lvp; goto unionread; } @@ -3263,99 +4163,114 @@ unionread: } #endif /* UNION */ - if ((uap->count == auio.uio_resid) && + if (((user_ssize_t)uap->count == uio_resid(auio)) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; - VREF(vp); - fp->f_data = (caddr_t) vp; - fp->f_offset = 0; - vrele(tvp); + vnode_getwithref(vp); + vnode_ref(vp); + fp->f_fglob->fg_data = (caddr_t) vp; + fp->f_fglob->fg_offset = 0; + vnode_rele(tvp); + vnode_put(tvp); goto unionread; } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, - sizeof(long)); - *retval = uap->count - auio.uio_resid; + vnode_put(vp); + error = copyout((caddr_t)&loff, uap->basep, sizeof(long)); + // LP64todo - fix this + *retval = uap->count - uio_resid(auio); +out: + file_drop(fd); return (error); } /* * Set the mode mask for creation of filesystem nodes. */ -struct umask_args { - int newmask; -}; -int -umask(p, uap, retval) - struct proc *p; - struct umask_args *uap; - register_t *retval; +#warning XXX implement xsecurity + +#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */ +static int +umask1(struct proc *p, int newmask, __unused kauth_filesec_t fsec, register_t *retval) { register struct filedesc *fdp; - AUDIT_ARG(mask, uap->newmask); + AUDIT_ARG(mask, newmask); fdp = p->p_fd; *retval = fdp->fd_cmask; - fdp->fd_cmask = uap->newmask & ALLPERMS; + fdp->fd_cmask = newmask & ALLPERMS; return (0); } + +int +umask_extended(struct proc *p, struct umask_extended_args *uap, register_t *retval) +{ + int ciferror; + kauth_filesec_t xsecdst; + + xsecdst = KAUTH_FILESEC_NONE; + if (uap->xsecurity != USER_ADDR_NULL) { + if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) + return ciferror; + } else { + xsecdst = KAUTH_FILESEC_NONE; + } + + ciferror = umask1(p, uap->newmask, xsecdst, retval); + + if (xsecdst != KAUTH_FILESEC_NONE) + kauth_filesec_free(xsecdst); + return ciferror; +} + +int +umask(struct proc *p, struct umask_args *uap, register_t *retval) +{ + return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval)); +} + /* * Void all references to file by ripping underlying filesystem * away from vnode. */ -struct revoke_args { - char *path; -}; /* ARGSUSED */ int -revoke(p, uap, retval) - struct proc *p; - register struct revoke_args *uap; - register_t *retval; +revoke(struct proc *p, register struct revoke_args *uap, __unused register_t *retval) { register struct vnode *vp; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) + + nameidone(&nd); + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if ((error = vnode_getattr(vp, &va, &context))) goto out; - if (p->p_ucred->cr_uid != vattr.va_uid && - (error = suser(p->p_ucred, &p->p_acflag))) + if (kauth_cred_getuid(context.vc_ucred) != va.va_uid && + (error = suser(context.vc_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - VOP_REVOKE(vp, REVOKEALL); + VNOP_REVOKE(vp, REVOKEALL, &context); out: - vrele(vp); + vnode_put(vp); return (error); } -/* - * Convert a user file descriptor to a kernel file entry. - */ -int -getvnode(p, fd, fpp) - struct proc *p; - int fd; - struct file **fpp; -{ - struct file *fp; - int error; - - if (error = fdgetf(p, fd, &fp)) - return (error); - if (fp->f_type != DTYPE_VNODE) - return (EINVAL); - *fpp = fp; - return (0); -} /* * HFS/HFS PlUS SPECIFIC SYSTEM CALLS @@ -3372,105 +4287,48 @@ getvnode(p, fd, fpp) /* * Make a complex file. A complex file is one with multiple forks (data streams) */ -struct mkcomplex_args { - const char *path; /* pathname of the file to be created */ - mode_t mode; /* access mode for the newly created file */ - u_long type; /* format of the complex file */ -}; /* ARGSUSED */ int -mkcomplex(p,uap,retval) - struct proc *p; - register struct mkcomplex_args *uap; - register_t *retval; +mkcomplex(__unused struct proc *p, __unused struct mkcomplex_args *uap, __unused register_t *retval) { - struct vnode *vp; - struct vattr vattr; - int error; - struct nameidata nd; - - /* mkcomplex wants the directory vnode locked so do that here */ - - NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_USERSPACE, (char *)uap->path, p); - if (error = namei(&nd)) - return (error); - - /* Set the attributes as specified by the user */ - - VATTR_NULL(&vattr); - vattr.va_mode = (uap->mode & ACCESSPERMS); - error = VOP_MKCOMPLEX(nd.ni_dvp, &vp, &nd.ni_cnd, &vattr, uap->type); - - /* The mkcomplex call promises to release the parent vnode pointer - * even an an error case so don't do it here unless the operation - * is not supported. In that case, there isn't anyone to unlock the parent - * The vnode pointer to the file will also be released. - */ - - if (error) - { - if (error == EOPNOTSUPP) - vput(nd.ni_dvp); - return (error); - } - - return (0); - -} /* end of mkcomplex system call */ + return (ENOTSUP); +} /* * Extended stat call which returns volumeid and vnodeid as well as other info */ -struct statv_args { - const char *path; /* pathname of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -statv(p,uap,retval) - struct proc *p; - register struct statv_args *uap; - register_t *retval; - +statv(__unused struct proc *p, + __unused struct statv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of statv system call */ /* * Extended lstat call which returns volumeid and vnodeid as well as other info */ -struct lstatv_args { - const char *path; /* pathname of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -lstatv(p,uap,retval) - struct proc *p; - register struct lstatv_args *uap; - register_t *retval; - +lstatv(__unused struct proc *p, + __unused struct lstatv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of lstatv system call */ /* * Extended fstat call which returns volumeid and vnodeid as well as other info */ -struct fstatv_args { - int fd; /* file descriptor of the target file */ - struct vstat *vsb; /* vstat structure for returned info */ -}; /* ARGSUSED */ int -fstatv(p,uap,retval) - struct proc *p; - register struct fstatv_args *uap; - register_t *retval; - +fstatv(__unused struct proc *p, + __unused struct fstatv_args *uap, + __unused register_t *retval) { - return (EOPNOTSUPP); /* We'll just return an error for now */ + return (ENOTSUP); /* We'll just return an error for now */ } /* end of fstatv system call */ @@ -3480,161 +4338,6 @@ fstatv(p,uap,retval) #endif /* __APPLE_API_OBSOLETE */ - -/* -* Obtain attribute information about a file system object -*/ - -struct getattrlist_args { - const char *path; /* pathname of the target object */ - struct attrlist * alist; /* Attributes desired by the user */ - void * attributeBuffer; /* buffer to hold returned attributes */ - size_t bufferSize; /* size of the return buffer */ - unsigned long options; /* options (follow/don't follow) */ -}; -/* ARGSUSED */ -int -getattrlist (p,uap,retval) - struct proc *p; - register struct getattrlist_args *uap; - register_t *retval; - -{ - int error; - struct nameidata nd; - struct iovec aiov; - struct uio auio; - struct attrlist attributelist; - u_long nameiflags; - - /* Get the attributes desire and do our parameter checking */ - - if (error = copyin((caddr_t)uap->alist, (caddr_t) &attributelist, - sizeof (attributelist))) - { - return(error); - } - - if (attributelist.bitmapcount != ATTR_BIT_MAP_COUNT -#if 0 - || attributelist.commonattr & ~ATTR_CMN_VALIDMASK || - attributelist.volattr & ~ATTR_VOL_VALIDMASK || - attributelist.dirattr & ~ATTR_DIR_VALIDMASK || - attributelist.fileattr & ~ATTR_FILE_VALIDMASK || - attributelist.forkattr & ~ATTR_FORK_VALIDMASK -#endif - ) - { - return (EINVAL); - } - - /* Get the vnode for the file we are getting info on. */ - nameiflags = LOCKLEAF | SHAREDLEAF; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, - (char *)uap->path, p); - - error = namei(&nd); - if (error) - return (error); - - /* Set up the UIO structure for use by the vfs routine */ - - aiov.iov_base = uap->attributeBuffer; - aiov.iov_len = uap->bufferSize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->bufferSize; - - - error = VOP_GETATTRLIST(nd.ni_vp, &attributelist, &auio, p->p_ucred, p); - - /* Unlock and release the vnode which will have been locked by namei */ - - vput(nd.ni_vp); - - /* return the effort if we got one, otherwise return success */ - - if (error) - { - return (error); - } - - return(0); - -} /* end of getattrlist system call */ - - - -/* - * Set attribute information about a file system object - */ - -struct setattrlist_args { - const char *path; /* pathname of the target object */ - struct attrlist * alist; /* Attributes being set by the user */ - void * attributeBuffer; /* buffer with attribute values to be set */ - size_t bufferSize; /* size of the return buffer */ - unsigned long options; /* options (follow/don't follow) */ -}; -/* ARGSUSED */ -int -setattrlist (p,uap,retval) - struct proc *p; - register struct setattrlist_args *uap; - register_t *retval; - -{ - int error; - struct nameidata nd; - struct iovec aiov; - struct uio auio; - struct attrlist alist; - u_long nameiflags; - - /* Get the attributes desired and do our parameter checking */ - - if ((error = copyin((caddr_t)uap->alist, (caddr_t) &alist, - sizeof (alist)))) { - return (error); - } - - if (alist.bitmapcount != ATTR_BIT_MAP_COUNT) - return (EINVAL); - - /* Get the vnode for the file whose attributes are being set. */ - nameiflags = LOCKLEAF; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, - (char *)uap->path, p); - error = namei(&nd); - if (error) - return (error); - - /* Set up the UIO structure for use by the vfs routine */ - aiov.iov_base = uap->attributeBuffer; - aiov.iov_len = uap->bufferSize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = 0; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->bufferSize; - - error = VOP_SETATTRLIST(nd.ni_vp, &alist, &auio, p->p_ucred, p); - - vput(nd.ni_vp); - - return (error); - -} /* end of setattrlist system call */ - - /* * Obtain attribute information on objects in a directory while enumerating * the directory. This call does not yet support union mounted directories. @@ -3642,83 +4345,96 @@ setattrlist (p,uap,retval) * 1.union mounted directories. */ -struct getdirentriesattr_args { - int fd; /* file descriptor */ - struct attrlist *alist; /* bit map of requested attributes */ - void *buffer; /* buffer to hold returned attribute info */ - size_t buffersize; /* size of the return buffer */ - u_long *count; /* the count of entries requested/returned */ - u_long *basep; /* the offset of where we are leaving off in buffer */ - u_long *newstate; /* a flag to inform of changes in directory */ - u_long options; /* maybe unused for now */ -}; /* ARGSUSED */ int -getdirentriesattr (p,uap,retval) - struct proc *p; - register struct getdirentriesattr_args *uap; - register_t *retval; - +getdirentriesattr (struct proc *p, struct getdirentriesattr_args *uap, register_t *retval) { - register struct vnode *vp; - struct file *fp; - struct uio auio; - struct iovec aiov; - u_long actualcount; - u_long newstate; - int error, eofflag; - long loff; - struct attrlist attributelist; + struct vnode *vp; + struct fileproc *fp; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + uint64_t actualcount; + u_long tmpcount; + u_long newstate; + int error, eofflag; + u_long loff; + struct attrlist attributelist; + struct vfs_context context; + int fd = uap->fd; + char uio_buf[ UIO_SIZEOF(1) ]; + kauth_action_t action; + + AUDIT_ARG(fd, fd); + + /* Get the attributes into kernel space */ + if ((error = copyin(uap->alist, (caddr_t) &attributelist, sizeof (attributelist)))) + return(error); + actualcount = fuulong(uap->count); + if (actualcount == -1ULL) + return(-1); - AUDIT_ARG(fd, uap->fd); + if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) + return (error); + + if ((fp->f_fglob->fg_flag & FREAD) == 0) { + AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); + error = EBADF; + goto out; + } + if ( (error = vnode_getwithref(vp)) ) + goto out; - /* Get the attributes into kernel space */ - if (error = copyin((caddr_t)uap->alist, (caddr_t) &attributelist, sizeof (attributelist))) - return(error); - if (error = copyin((caddr_t)uap->count, (caddr_t) &actualcount, sizeof (u_long))) - return(error); + AUDIT_ARG(vnpath, vp, ARG_VNODE1); - if (error = getvnode(p, uap->fd, &fp)) - return (error); + if (vp->v_type != VDIR) { + (void)vnode_put(vp); + error = EINVAL; + goto out; + } - AUDIT_ARG(vnpath, (struct vnode *)fp->f_data, ARG_VNODE1); + /* set up the uio structure which will contain the users return buffer */ + loff = fp->f_fglob->fg_offset; + auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->buffer, uap->buffersize); + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + tmpcount = (u_long) actualcount; - if ((fp->f_flag & FREAD) == 0) - return(EBADF); - vp = (struct vnode *)fp->f_data; + /* + * If the only item requested is file names, we can let that past with + * just LIST_DIRECTORY. If they want any other attributes, that means + * they need SEARCH as well. + */ + action = KAUTH_VNODE_LIST_DIRECTORY; + if ((attributelist.commonattr & ~ATTR_CMN_NAME) || + attributelist.fileattr || attributelist.dirattr) + action |= KAUTH_VNODE_SEARCH; + + if ((error = vnode_authorize(vp, NULL, action, &context)) == 0) + error = VNOP_READDIRATTR(vp, &attributelist, auio, + tmpcount, uap->options, &newstate, &eofflag, + &tmpcount, &context); + (void)vnode_put(vp); + actualcount = tmpcount; - if (vp->v_type != VDIR) - return(EINVAL); + if (error) + goto out; + fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */ - /* set up the uio structure which will contain the users return buffer */ - aiov.iov_base = uap->buffer; - aiov.iov_len = uap->buffersize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = uap->buffersize; - - loff = auio.uio_offset = fp->f_offset; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_READDIRATTR (vp, &attributelist, &auio, - actualcount, uap->options, &newstate, &eofflag, - &actualcount, ((u_long **)0), p->p_ucred); - - VOP_UNLOCK(vp, 0, p); - if (error) return (error); - fp->f_offset = auio.uio_offset; /* should be multiple of dirent, not variable */ - - if (error = copyout((caddr_t) &actualcount, (caddr_t) uap->count, sizeof(u_long))) - return (error); - if (error = copyout((caddr_t) &newstate, (caddr_t) uap->newstate, sizeof(u_long))) - return (error); - if (error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long))) - return (error); + if ((error = suulong(uap->count, actualcount)) != 0) + goto out; + if ((error = suulong(uap->newstate, (uint64_t)newstate)) != 0) + goto out; + if ((error = suulong(uap->basep, (uint64_t)loff)) != 0) + goto out; *retval = eofflag; /* similar to getdirentries */ - return (0); /* return error earlier, an retval of 0 or 1 now */ + error = 0; + out: + file_drop(fd); + return (error); /* return error earlier, an retval of 0 or 1 now */ } /* end of getdirentryattr system call */ @@ -3726,93 +4442,132 @@ getdirentriesattr (p,uap,retval) * Exchange data between two files */ -struct exchangedata_args { - const char *path1; /* pathname of the first swapee */ - const char *path2; /* pathname of the second swapee */ - unsigned long options; /* options */ -}; /* ARGSUSED */ int -exchangedata (p,uap,retval) - struct proc *p; - register struct exchangedata_args *uap; - register_t *retval; - +exchangedata (struct proc *p, register struct exchangedata_args *uap, __unused register_t *retval) { struct nameidata fnd, snd; + struct vfs_context context; struct vnode *fvp, *svp; - int error; + int error; u_long nameiflags; + char *fpath = NULL; + char *spath = NULL; + int flen, slen; + fse_info f_finfo, s_finfo; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - /* Global lock, to prevent race condition, only one exchange at a time */ - lockmgr(&exchangelock, LK_EXCLUSIVE , (struct slock *)0, p); - - NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, - (char *) uap->path1, p); + NDINIT(&fnd, LOOKUP, nameiflags | AUDITVNPATH1, + UIO_USERSPACE, uap->path1, &context); - error = namei(&fnd); - if (error) - goto out2; + error = namei(&fnd); + if (error) + goto out2; - fvp = fnd.ni_vp; + nameidone(&fnd); + fvp = fnd.ni_vp; - NDINIT(&snd, LOOKUP, nameiflags | AUDITVNPATH2, UIO_USERSPACE, - (char *)uap->path2, p); + NDINIT(&snd, LOOKUP, nameiflags | AUDITVNPATH2, + UIO_USERSPACE, uap->path2, &context); - error = namei(&snd); - if (error) { - vrele(fvp); + error = namei(&snd); + if (error) { + vnode_put(fvp); goto out2; - } - + } + nameidone(&snd); svp = snd.ni_vp; - /* if the files are the same, return an inval error */ + /* + * if the files are the same, return an inval error + */ if (svp == fvp) { - vrele(fvp); - vrele(svp); - error = EINVAL; - goto out2; - } - - vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); - vn_lock(svp, LK_EXCLUSIVE | LK_RETRY, p); - - error = VOP_ACCESS(fvp, VWRITE, p->p_ucred, p); - if (error) goto out; + error = EINVAL; + goto out; + } - error = VOP_ACCESS(svp, VWRITE, p->p_ucred, p); - if (error) goto out; + /* + * if the files are on different volumes, return an error + */ + if (svp->v_mount != fvp->v_mount) { + error = EXDEV; + goto out; + } + if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, &context)) != 0) || + ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, &context)) != 0)) + goto out; + if (need_fsevent(FSE_EXCHANGE, fvp) || kauth_authorize_fileop_has_listeners()) { + fpath = get_pathbuff(); + spath = get_pathbuff(); + flen = MAXPATHLEN; + slen = MAXPATHLEN; + if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') { + printf("exchange: vn_getpath(fvp=0x%x) failed <<%s>>\n", + fvp, fpath); + } + if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') { + printf("exchange: vn_getpath(svp=0x%x) failed <<%s>>\n", + svp, spath); + } + get_fse_info(fvp, &f_finfo, &context); + get_fse_info(svp, &s_finfo, &context); + } /* Ok, make the call */ - error = VOP_EXCHANGE (fvp, svp, p->p_ucred, p); - - if (error == 0 && VPARENT(fvp) != VPARENT(svp)) { - struct vnode *tmp; + error = VNOP_EXCHANGE(fvp, svp, 0, &context); - tmp = VPARENT(fvp); - VPARENT(fvp) = VPARENT(svp); - VPARENT(svp) = tmp; + if (error == 0) { + char *tmpname; + + if (fpath != NULL && spath != NULL) { + /* call out to allow 3rd party notification of exchangedata. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_EXCHANGE, + (uintptr_t)fpath, (uintptr_t)spath); + } + name_cache_lock(); + + tmpname = fvp->v_name; + fvp->v_name = svp->v_name; + svp->v_name = tmpname; + + if (fvp->v_parent != svp->v_parent) { + struct vnode *tmp; + + tmp = fvp->v_parent; + fvp->v_parent = svp->v_parent; + svp->v_parent = tmp; + } + name_cache_unlock(); + + if (fpath != NULL && spath != NULL) { + add_fsevent(FSE_EXCHANGE, &context, + FSE_ARG_STRING, flen, fpath, + FSE_ARG_FINFO, &f_finfo, + FSE_ARG_STRING, slen, spath, + FSE_ARG_FINFO, &s_finfo, + FSE_ARG_DONE); + } } + if (spath != NULL) + release_pathbuff(spath); + if (fpath != NULL) + release_pathbuff(fpath); out: - vput (svp); - vput (fvp); - + vnode_put(svp); + vnode_put(fvp); out2: - lockmgr(&exchangelock, LK_RELEASE, (struct slock *)0, p); - - if (error) { return (error); - } - - return (0); +} -} /* end of exchangedata system call */ #ifdef __APPLE_API_OBSOLETE @@ -3822,31 +4577,22 @@ out2: /* * Check users access to a file -*/ - -struct checkuseraccess_args { - const char *path; /* pathname of the target file */ - uid_t userid; /* user for whom we are checking access */ - gid_t *groups; /* Group that we are checking for */ - int ngroups; /* Number of groups being checked */ - int accessrequired; /* needed access to the file */ - unsigned long options; /* options */ -}; +*/ /* ARGSUSED */ +#warning "checkuseraccess copies a cred in from user space but" +#warning "user space has no way of knowing what one looks like" +#warning "this code should use the access_extended spoof-as functionality" int -checkuseraccess (p,uap,retval) - struct proc *p; - register struct checkuseraccess_args *uap; - register_t *retval; - +checkuseraccess (struct proc *p, register struct checkuseraccess_args *uap, __unused register_t *retval) { register struct vnode *vp; int error; struct nameidata nd; - struct ucred cred; + struct ucred cred; /* XXX ILLEGAL */ int flags; /*what will actually get passed to access*/ u_long nameiflags; + struct vfs_context context; /* Make sure that the number of groups is correct before we do anything */ @@ -3855,7 +4601,7 @@ checkuseraccess (p,uap,retval) /* Verify that the caller is root */ - if (error = suser(p->p_ucred, &p->p_acflag)) + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); /* Fill in the credential structure */ @@ -3863,17 +4609,21 @@ checkuseraccess (p,uap,retval) cred.cr_ref = 0; cred.cr_uid = uap->userid; cred.cr_ngroups = uap->ngroups; - if (error = copyin((caddr_t) uap->groups, (caddr_t) &(cred.cr_groups), (sizeof(gid_t))*uap->ngroups)) + if ((error = copyin(CAST_USER_ADDR_T(uap->groups), (caddr_t) &(cred.cr_groups), (sizeof(gid_t))*uap->ngroups))) return (error); - /* Get our hands on the file */ + context.vc_proc = p; + context.vc_ucred = &cred; - nameiflags = LOCKLEAF; + /* Get our hands on the file */ + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, (char *)uap->path, p); + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, + UIO_USERSPACE, CAST_USER_ADDR_T(uap->path), &context); - if (error = namei(&nd)) + if ((error = namei(&nd))) return (error); + nameidone(&nd); vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ @@ -3882,15 +4632,15 @@ checkuseraccess (p,uap,retval) if (uap->accessrequired) { if (uap->accessrequired & R_OK) - flags |= VREAD; + flags |= KAUTH_VNODE_READ_DATA; if (uap->accessrequired & W_OK) - flags |= VWRITE; + flags |= KAUTH_VNODE_WRITE_DATA; if (uap->accessrequired & X_OK) - flags |= VEXEC; + flags |= KAUTH_VNODE_EXECUTE; } - error = VOP_ACCESS(vp, flags, &cred, p); + error = vnode_authorize(vp, NULL, flags, &context); - vput(vp); + vnode_put(vp); if (error) return (error); @@ -3907,41 +4657,50 @@ checkuseraccess (p,uap,retval) -struct searchfs_args { - const char *path; - struct fssearchblock *searchblock; - u_long *nummatches; - u_long scriptcode; - u_long options; - struct searchstate *state; - }; /* ARGSUSED */ int -searchfs (p,uap,retval) - struct proc *p; - register struct searchfs_args *uap; - register_t *retval; - +searchfs (struct proc *p, register struct searchfs_args *uap, __unused register_t *retval) { register struct vnode *vp; int error=0; int fserror = 0; struct nameidata nd; - struct fssearchblock searchblock; + struct user_fssearchblock searchblock; struct searchstate *state; struct attrlist *returnattrs; void *searchparams1,*searchparams2; - struct iovec aiov; - struct uio auio; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; u_long nummatches; int mallocsize; u_long nameiflags; - + struct vfs_context context; + char uio_buf[ UIO_SIZEOF(1) ]; - /* Start by copying in fsearchblock paramater list */ + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - if (error = copyin((caddr_t) uap->searchblock, (caddr_t) &searchblock,sizeof(struct fssearchblock))) + /* Start by copying in fsearchblock paramater list */ + if (IS_64BIT_PROCESS(p)) { + error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); + } + else { + struct fssearchblock tmp_searchblock; + error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock)); + // munge into 64-bit version + searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs); + searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); + searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; + searchblock.maxmatches = tmp_searchblock.maxmatches; + searchblock.timelimit = tmp_searchblock.timelimit; + searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1); + searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1; + searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2); + searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2; + searchblock.searchattrs = tmp_searchblock.searchattrs; + } + if (error) return(error); /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. @@ -3949,13 +4708,13 @@ searchfs (p,uap,retval) if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) return(EINVAL); - + /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */ /* It all has to do into local memory and it's not that big so we might as well put it all together. */ /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ /* block. */ - mallocsize = searchblock.sizeofsearchparams1+searchblock.sizeofsearchparams2 + + mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 + sizeof(struct attrlist) + sizeof(struct searchstate); MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK); @@ -3968,38 +4727,34 @@ searchfs (p,uap,retval) /* Now copy in the stuff given our local variables. */ - if (error = copyin((caddr_t) searchblock.searchparams1, searchparams1,searchblock.sizeofsearchparams1)) + if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) goto freeandexit; - if (error = copyin((caddr_t) searchblock.searchparams2, searchparams2,searchblock.sizeofsearchparams2)) + if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) goto freeandexit; - if (error = copyin((caddr_t) searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))) + if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) goto freeandexit; - if (error = copyin((caddr_t) uap->state, (caddr_t) state, sizeof(struct searchstate))) + if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) goto freeandexit; /* set up the uio structure which will contain the users return buffer */ - aiov.iov_base = searchblock.returnbuffer; - aiov.iov_len = searchblock.returnbuffersize; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = searchblock.returnbuffersize; + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); - nameiflags = LOCKLEAF; + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, - (char *)uap->path, p); + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, + UIO_USERSPACE, uap->path, &context); error = namei(&nd); if (error) goto freeandexit; + nameidone(&nd); vp = nd.ni_vp; @@ -4020,7 +4775,7 @@ searchfs (p,uap,retval) from copying out any results... */ - fserror = VOP_SEARCHFS(vp, + fserror = VNOP_SEARCHFS(vp, searchparams1, searchparams2, &searchblock.searchattrs, @@ -4030,20 +4785,21 @@ searchfs (p,uap,retval) &nummatches, uap->scriptcode, uap->options, - &auio, - state); + auio, + state, + &context); saveandexit: - vput(vp); + vnode_put(vp); /* Now copy out the stuff that needs copying out. That means the number of matches, the search state. Everything was already put into he return buffer by the vop call. */ - if (error = copyout((caddr_t) state, (caddr_t) uap->state, sizeof(struct searchstate))) + if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) goto freeandexit; - if (error = copyout((caddr_t) &nummatches, (caddr_t) uap->nummatches, sizeof(u_long))) + if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) goto freeandexit; error = fserror; @@ -4061,21 +4817,12 @@ freeandexit: /* * Make a filesystem-specific control call: */ -struct fsctl_args { - const char *path; /* pathname of the target object */ - u_long cmd; /* cmd (also encodes size/direction of arguments a la ioctl) */ - caddr_t data; /* pointer to argument buffer */ - u_long options; /* options for fsctl processing */ -}; /* ARGSUSED */ int -fsctl (p,uap,retval) - struct proc *p; - struct fsctl_args *uap; - register_t *retval; - +fsctl (struct proc *p, struct fsctl_args *uap, __unused register_t *retval) { int error; + boolean_t is64bit; struct nameidata nd; u_long nameiflags; u_long cmd = uap->cmd; @@ -4083,24 +4830,35 @@ fsctl (p,uap,retval) #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; caddr_t data, memp; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); size = IOCPARM_LEN(cmd); if (size > IOCPARM_MAX) return (EINVAL); + is64bit = proc_is64bit(p); + memp = NULL; if (size > sizeof (stkbuf)) { if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM; data = memp; } else { - data = stkbuf; + data = &stkbuf[0]; }; if (cmd & IOC_IN) { if (size) { - error = copyin(uap->data, data, (u_int)size); + error = copyin(uap->data, data, size); if (error) goto FSCtl_Exit; } else { - *(caddr_t *)data = uap->data; + if (is64bit) { + *(user_addr_t *)data = uap->data; + } + else { + *(uint32_t *)data = (uint32_t)uap->data; + } }; } else if ((cmd & IOC_OUT) && size) { /* @@ -4108,25 +4866,33 @@ fsctl (p,uap,retval) * gets back something deterministic. */ bzero(data, size); - } else if (cmd & IOC_VOID) - *(caddr_t *)data = uap->data; + } else if (cmd & IOC_VOID) { + if (is64bit) { + *(user_addr_t *)data = uap->data; + } + else { + *(uint32_t *)data = (uint32_t)uap->data; + } + } /* Get the vnode for the file we are getting info on: */ - nameiflags = LOCKLEAF; + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, (char *)uap->path, p); - if (error = namei(&nd)) goto FSCtl_Exit; - + NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, &context); + if ((error = namei(&nd))) goto FSCtl_Exit; + /* Invoke the filesystem-specific code */ - error = VOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, p->p_ucred, p); + error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, &context); - vput(nd.ni_vp); + vnode_put(nd.ni_vp); + nameidone(&nd); /* * Copy any data to user, size was * already set and checked above. */ - if (error == 0 && (cmd & IOC_OUT) && size) error = copyout(data, uap->data, (u_int)size); + if (error == 0 && (cmd & IOC_OUT) && size) + error = copyout(data, uap->data, size); FSCtl_Exit: if (memp) kfree(memp, size); @@ -4141,337 +4907,552 @@ FSCtl_Exit: __private_extern__ int sync_internal(void) { - boolean_t funnel_state; int error; struct sync_args data; int retval[2]; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = sync(current_proc(), &data, &retval); + error = sync(current_proc(), &data, &retval[0]); - thread_funnel_set(kernel_flock, funnel_state); return (error); } /* end of sync_internal call */ +/* + * Retrieve the data of an extended attribute. + */ +int +getxattr(struct proc *p, struct getxattr_args *uap, user_ssize_t *retval) +{ + struct vnode *vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + size_t namelen; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; -// XXXdbg fmod watching calls -#define NUM_CHANGE_NODES 256 -static int changed_init=0; -static volatile int fmod_watch_enabled = 0; -static pid_t fmod_watch_owner; -static simple_lock_data_t changed_nodes_lock; // guard access -static volatile struct vnode *changed_nodes[NUM_CHANGE_NODES]; -static volatile pid_t changed_nodes_pid[NUM_CHANGE_NODES]; -static volatile int changed_rd_index=0, changed_wr_index=0; -static volatile int notifier_sleeping=0; - + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); -void -notify_filemod_watchers(struct vnode *vp, struct proc *p) -{ - int ret; - - // only want notification on regular files. - if (fmod_watch_enabled == 0 || (vp->v_type != VREG && vp->v_type != VDIR)) { - return; - } + if (uap->options & XATTR_NOSECURITY) + return (EINVAL); - // grab a reference so it doesn't go away - if (vget(vp, 0, p) != 0) { - return; - } + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, &context); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); - retry: - simple_lock(&changed_nodes_lock); + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + goto out; + } + if (xattr_protected(attrname)) { + error = EPERM; + goto out; + } + if (uap->value && uap->size > 0) { + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + } - // If the table is full, block until it clears up - if (((changed_wr_index+1) % NUM_CHANGE_NODES) == changed_rd_index) { - simple_unlock(&changed_nodes_lock); + error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, &context); +out: + vnode_put(vp); - notifier_sleeping++; - // wait up to 10 seconds for the queue to drain - ret = tsleep((caddr_t)&changed_wr_index, PINOD, "changed_nodes_full", 10*hz); - if (ret != 0 || fmod_watch_enabled == 0) { - notifier_sleeping--; - printf("notify_filemod: err %d from tsleep/enabled %d. bailing out (vp 0x%x).\n", - ret, fmod_watch_enabled, vp); - vrele(vp); - return; + if (auio) { + *retval = uap->size - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; } - notifier_sleeping--; - goto retry; - } + return (error); +} - // insert our new guy - if (changed_nodes[changed_wr_index] != NULL) { - panic("notify_fmod_watchers: index %d is 0x%x, not null!\n", - changed_wr_index, changed_nodes[changed_wr_index]); - } - changed_nodes[changed_wr_index] = vp; - changed_nodes_pid[changed_wr_index] = current_proc()->p_pid; - changed_wr_index = (changed_wr_index + 1) % NUM_CHANGE_NODES; +/* + * Retrieve the data of an extended attribute. + */ +int +fgetxattr(struct proc *p, struct fgetxattr_args *uap, user_ssize_t *retval) +{ + struct vnode *vp; + char attrname[XATTR_MAXNAMELEN+1]; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + size_t namelen; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; - simple_unlock(&changed_nodes_lock); + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY)) + return (EINVAL); - wakeup((caddr_t)&changed_rd_index); -} + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + goto out; + } + if (xattr_protected(attrname)) { + error = EPERM; + goto out; + } + if (uap->value && uap->size > 0) { + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + } + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, &context); +out: + (void)vnode_put(vp); + file_drop(uap->fd); -struct fmod_watch_args { - int *new_fd; - char *pathbuf; - int len; - pid_t pid; -}; + if (auio) { + *retval = uap->size - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); +} +/* + * Set the data of an extended attribute. + */ int -fmod_watch(struct proc *p, struct fmod_watch_args *uap, register_t *retval) +setxattr(struct proc *p, struct setxattr_args *uap, int *retval) { - int fd, didhold = 0; - struct filedesc *fdp; - struct file *fp; - struct vnode *vp; - int flags; - int type, indx, error, need_wakeup=0; - struct flock lf; - struct nameidata nd; - extern struct fileops vnops; - pid_t pid; - - if (fmod_watch_enabled == 0) { - *retval = -1; - return EINVAL; - } + struct vnode *vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t namelen; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; - p = current_proc(); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - if (changed_init == 0) { - changed_init = 1; - simple_lock_init(&changed_nodes_lock); - } + if (uap->options & XATTR_NOSECURITY) + return (EINVAL); - if (changed_rd_index == changed_wr_index) { - // there's nothing to do, go to sleep - error = tsleep((caddr_t)&changed_rd_index, PUSER|PCATCH, "changed_nodes_empty", 0); - if (error != 0) { - // XXXdbg - what if after we unblock the changed_nodes - // table is full? We should wakeup() the writer. - *retval = -1; - return error; + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + if (uap->value == 0 || uap->size == 0) { + return (EINVAL); } - } - simple_lock(&changed_nodes_lock); + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, &context); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); - vp = (struct vnode *)changed_nodes[changed_rd_index]; - pid = changed_nodes_pid[changed_rd_index]; - - changed_nodes[changed_rd_index] = NULL; - changed_rd_index = (changed_rd_index + 1) % NUM_CHANGE_NODES; - - if (vp == NULL) { - printf("watch_file_changes: Someone put a null vnode in my table! (%d %d)\n", - changed_rd_index, changed_wr_index); - error = EINVAL; - goto err0; - } + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); - simple_unlock(&changed_nodes_lock); - - // if the writers are blocked, wake them up as we just freed up - // some space for them. - if (notifier_sleeping > 0) { - wakeup((caddr_t)&changed_wr_index); - } + error = vn_setxattr(vp, attrname, auio, uap->options, &context); + vnode_put(vp); + *retval = 0; + return (error); +} - if (vp->v_type != VREG && vp->v_type != VDIR) { - error = EBADF; - goto err1; - } +/* + * Set the data of an extended attribute. + */ +int +fsetxattr(struct proc *p, struct fsetxattr_args *uap, int *retval) +{ + struct vnode *vp; + char attrname[XATTR_MAXNAMELEN+1]; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t namelen; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; - if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) { - printf("fmod_watch: vn_lock returned %d\n", error); - goto err1; - } + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY)) + return (EINVAL); - // first copy out the name - if (uap->pathbuf) { - char *buff; - int len=MAXPATHLEN; - - MALLOC(buff, char *, len, M_TEMP, M_WAITOK); - error = vn_getpath(vp, buff, &len); - if (error == 0) { - if (len < uap->len) - error = copyout(buff, (void *)uap->pathbuf, len); - else - error = ENOSPC; + if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { + return (error); } - FREE(buff, M_TEMP); - if (error) { - goto err1; + if (xattr_protected(attrname)) + return(EPERM); + if (uap->value == 0 || uap->size == 0) { + return (EINVAL); } - } - - // now copy out the pid of the person that changed the file - if (uap->pid) { - if ((error = copyout((caddr_t)&pid, (void *)uap->pid, sizeof(pid_t))) != 0) { - printf("fmod_watch: failed to copy out the pid (%d)\n", pid); - goto err1; + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); } - } - - // now create a file descriptor for this vnode - fdp = p->p_fd; - flags = FREAD; - if (error = falloc(p, &fp, &indx)) { - printf("fmod_watch: failed to allocate an fd...\n"); - goto err2; - } - - if ((error = copyout((caddr_t)&indx, (void *)uap->new_fd, sizeof(int))) != 0) { - printf("fmod_watch: failed to copy out the new fd (%d)\n", indx); - goto err3; - } - - fp->f_flag = flags & FMASK; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - - if (UBCINFOEXISTS(vp) && ((didhold = ubc_hold(vp)) == 0)) { - goto err3; - } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->value, uap->size); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + error = vn_setxattr(vp, attrname, auio, uap->options, &context); + vnode_put(vp); + file_drop(uap->fd); + *retval = 0; + return (error); +} - error = VOP_OPEN(vp, flags, p->p_ucred, p); - if (error) { - goto err4; - } +/* + * Remove an extended attribute. + */ +#warning "code duplication" +int +removexattr(struct proc *p, struct removexattr_args *uap, int *retval) +{ + struct vnode *vp; + struct nameidata nd; + char attrname[XATTR_MAXNAMELEN+1]; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + struct vfs_context context; + size_t namelen; + u_long nameiflags; + int error; - VOP_UNLOCK(vp, 0, p); - - *fdflags(p, indx) &= ~UF_RESERVED; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - // note: we explicitly don't vrele() here because it - // happens when the fd is closed. + if (uap->options & XATTR_NOSECURITY) + return (EINVAL); - return error; + error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); + if (error != 0) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, &context); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); - err4: - if (didhold) { - ubc_rele(vp); - } - err3: - ffree(fp); - fdrelse(p, indx); - err2: - VOP_UNLOCK(vp, 0, p); - err1: - vrele(vp); // undoes the vref() in notify_filemod_watchers() - - err0: - *retval = -1; - return error; + error = vn_removexattr(vp, attrname, uap->options, &context); + vnode_put(vp); + *retval = 0; + return (error); } -static int -enable_fmod_watching(register_t *retval) +/* + * Remove an extended attribute. + */ +#warning "code duplication" +int +fremovexattr(struct proc *p, struct fremovexattr_args *uap, int *retval) { - *retval = -1; + struct vnode *vp; + char attrname[XATTR_MAXNAMELEN+1]; + struct vfs_context context; + size_t namelen; + int error; - if (!is_suser()) { - return EPERM; - } - - // XXXdbg for now we only allow one watcher at a time. - if (fmod_watch_enabled) { - return EBUSY; - } - - fmod_watch_enabled++; - fmod_watch_owner = current_proc()->p_pid; + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY)) + return (EINVAL); + + error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); + if (error != 0) { + return (error); + } + if (xattr_protected(attrname)) + return(EPERM); + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - *retval = 0; - return 0; + error = vn_removexattr(vp, attrname, uap->options, &context); + vnode_put(vp); + file_drop(uap->fd); + *retval = 0; + return (error); } -static int -disable_fmod_watching(register_t *retval) +/* + * Retrieve the list of extended attribute names. + */ +#warning "code duplication" +int +listxattr(struct proc *p, struct listxattr_args *uap, user_ssize_t *retval) { - if (!is_suser()) { - return EPERM; - } - - if (fmod_watch_enabled < 1) { - printf("fmod_watching: too many disables! (%d)\n", fmod_watch_enabled); - return EINVAL; - } + struct vnode *vp; + struct nameidata nd; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + u_long nameiflags; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; - fmod_watch_enabled--; - - // if we're the last guy, clear out any remaining vnodes - // in the table so they don't remain referenced. - // - if (fmod_watch_enabled == 0) { - int i; - for(i=changed_rd_index; i != changed_wr_index; ) { - if (changed_nodes[i] == NULL) { - panic("disable_fmod_watch: index %d is NULL!\n", i); - } - vrele((struct vnode *)changed_nodes[i]); - changed_nodes[i] = NULL; - i = (i + 1) % NUM_CHANGE_NODES; - } - changed_wr_index = changed_rd_index = 0; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - fmod_watch_owner = 0; - } + if (uap->options & XATTR_NOSECURITY) + return (EINVAL); - // wake up anyone that may be waiting for the - // queue to clear out. - // - while(notifier_sleeping) { - wakeup((caddr_t)&changed_wr_index); + nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, spacetype, uap->path, &context); + if ((error = namei(&nd))) { + return (error); + } + vp = nd.ni_vp; + nameidone(&nd); + if (uap->namebuf != 0 && uap->bufsize > 0) { + // LP64todo - fix this! + auio = uio_createwithbuffer(1, 0, spacetype, + UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->namebuf, uap->bufsize); + } - // yield the cpu so the notifiers can run - tsleep((caddr_t)&fmod_watch_enabled, PINOD, "disable_fmod_watch", 1); - } + error = vn_listxattr(vp, auio, &attrsize, uap->options, &context); - *retval = 0; - return 0; + vnode_put(vp); + if (auio) { + *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); } - -struct fmod_watch_enable_args { - int on_or_off; -}; - +/* + * Retrieve the list of extended attribute names. + */ +#warning "code duplication" int -fmod_watch_enable(struct proc *p, struct fmod_watch_enable_args *uap, register_t *retval) +flistxattr(struct proc *p, struct flistxattr_args *uap, user_ssize_t *retval) { - int ret; - - if (uap->on_or_off != 0) { - ret = enable_fmod_watching(retval); - } else { - ret = disable_fmod_watching(retval); - } + struct vnode *vp; + struct vfs_context context; + uio_t auio = NULL; + int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + size_t attrsize = 0; + int error; + char uio_buf[ UIO_SIZEOF(1) ]; + + if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY)) + return (EINVAL); + + if ( (error = file_vnode(uap->fd, &vp)) ) { + return (error); + } + if ( (error = vnode_getwithref(vp)) ) { + file_drop(uap->fd); + return(error); + } + if (uap->namebuf != 0 && uap->bufsize > 0) { + // LP64todo - fix this! + auio = uio_createwithbuffer(1, 0, spacetype, + UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, uap->namebuf, uap->bufsize); + } + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + error = vn_listxattr(vp, auio, &attrsize, uap->options, &context); - return ret; + vnode_put(vp); + file_drop(uap->fd); + if (auio) { + *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); + } else { + *retval = (user_ssize_t)attrsize; + } + return (error); } -void -clean_up_fmod_watch(struct proc *p) +/* + * Common routine to handle various flavors of statfs data heading out + * to user space. + */ +static int +munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp, int *sizep, boolean_t is_64_bit, + boolean_t partial_copy) { - if (fmod_watch_enabled && fmod_watch_owner == p->p_pid) { - register_t *retval; + int error; + int my_size, copy_size; + + if (is_64_bit) { + struct user_statfs sfs; + my_size = copy_size = sizeof(sfs); + bzero(&sfs, my_size); + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_reserved1 = (short)sfsp->f_fssubtype; + sfs.f_bsize = (user_long_t)sfsp->f_bsize; + sfs.f_iosize = (user_long_t)sfsp->f_iosize; + sfs.f_blocks = (user_long_t)sfsp->f_blocks; + sfs.f_bfree = (user_long_t)sfsp->f_bfree; + sfs.f_bavail = (user_long_t)sfsp->f_bavail; + sfs.f_files = (user_long_t)sfsp->f_files; + sfs.f_ffree = (user_long_t)sfsp->f_ffree; + sfs.f_fsid = sfsp->f_fsid; + sfs.f_owner = sfsp->f_owner; + strncpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN-1); + strncpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN-1); + strncpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN-1); + + if (partial_copy) { + copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); + } + error = copyout((caddr_t)&sfs, bufp, copy_size); + } + else { + struct statfs sfs; + my_size = copy_size = sizeof(sfs); + bzero(&sfs, my_size); + + sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + sfs.f_type = mp->mnt_vtable->vfc_typenum; + sfs.f_reserved1 = (short)sfsp->f_fssubtype; + + /* + * It's possible for there to be more than 2^^31 blocks in the filesystem, so we + * have to fudge the numbers here in that case. We inflate the blocksize in order + * to reflect the filesystem size as best we can. + */ + if ((sfsp->f_blocks > LONG_MAX) + /* Hack for 4061702 . I think the real fix is for Carbon to + * look for some volume capability and not depend on hidden + * semantics agreed between a FS and carbon. + * f_blocks, f_bfree, and f_bavail set to -1 is the trigger + * for Carbon to set bNoVolumeSizes volume attribute. + * Without this the webdavfs files cannot be copied onto + * disk as they look huge. This change should not affect + * XSAN as they should not setting these to -1.. + */ + && (sfsp->f_blocks != 0xffffffffffffffff) + && (sfsp->f_bfree != 0xffffffffffffffff) + && (sfsp->f_bavail != 0xffffffffffffffff)) { + int shift; + + /* + * Work out how far we have to shift the block count down to make it fit. + * Note that it's possible to have to shift so far that the resulting + * blocksize would be unreportably large. At that point, we will clip + * any values that don't fit. + * + * For safety's sake, we also ensure that f_iosize is never reported as + * being smaller than f_bsize. + */ + for (shift = 0; shift < 32; shift++) { + if ((sfsp->f_blocks >> shift) <= LONG_MAX) + break; + if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX) + break; + } +#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) + sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift); + sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift); + sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift); +#undef __SHIFT_OR_CLIP + sfs.f_bsize = (long)(sfsp->f_bsize << shift); + sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize); + } else { + /* filesystem is small enough to be reported honestly */ + sfs.f_bsize = (long)sfsp->f_bsize; + sfs.f_iosize = (long)sfsp->f_iosize; + sfs.f_blocks = (long)sfsp->f_blocks; + sfs.f_bfree = (long)sfsp->f_bfree; + sfs.f_bavail = (long)sfsp->f_bavail; + } + sfs.f_files = (long)sfsp->f_files; + sfs.f_ffree = (long)sfsp->f_ffree; + sfs.f_fsid = sfsp->f_fsid; + sfs.f_owner = sfsp->f_owner; + strncpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN-1); + strncpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN-1); + strncpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN-1); + + if (partial_copy) { + copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); + } + error = copyout((caddr_t)&sfs, bufp, copy_size); + } - disable_fmod_watching(&retval); - } + if (sizep != NULL) { + *sizep = my_size; + } + return(error); +} + +/* + * copy stat structure into user_stat structure. + */ +void munge_stat(struct stat *sbp, struct user_stat *usbp) +{ + usbp->st_dev = sbp->st_dev; + usbp->st_ino = sbp->st_ino; + usbp->st_mode = sbp->st_mode; + usbp->st_nlink = sbp->st_nlink; + usbp->st_uid = sbp->st_uid; + usbp->st_gid = sbp->st_gid; + usbp->st_rdev = sbp->st_rdev; +#ifndef _POSIX_SOURCE + usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; + usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; + usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; + usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; + usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; + usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; +#else + usbp->st_atime = sbp->st_atime; + usbp->st_atimensec = sbp->st_atimensec; + usbp->st_mtime = sbp->st_mtime; + usbp->st_mtimensec = sbp->st_mtimensec; + usbp->st_ctime = sbp->st_ctime; + usbp->st_ctimensec = sbp->st_ctimensec; +#endif + usbp->st_size = sbp->st_size; + usbp->st_blocks = sbp->st_blocks; + usbp->st_blksize = sbp->st_blksize; + usbp->st_flags = sbp->st_flags; + usbp->st_gen = sbp->st_gen; + usbp->st_lspare = sbp->st_lspare; + usbp->st_qspare[0] = sbp->st_qspare[0]; + usbp->st_qspare[1] = sbp->st_qspare[1]; } diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index 7c2f193f4..01a49889e 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -454,6 +454,91 @@ toolong: } +/* + * utf8_validatestr - Check for a valid UTF-8 string. + */ +int +utf8_validatestr(const u_int8_t* utf8p, size_t utf8len) +{ + unsigned int byte; + u_int32_t ch; + unsigned int ucs_ch; + size_t extrabytes; + + while (utf8len-- > 0 && (byte = *utf8p++) != '\0') { + if (byte < 0x80) + continue; /* plain ascii */ + + extrabytes = utf_extrabytes[byte >> 3]; + + if (utf8len < extrabytes) + goto invalid; + utf8len -= extrabytes; + + switch (extrabytes) { + case 1: + ch = byte; ch <<= 6; /* 1st byte */ + byte = *utf8p++; /* 2nd byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; + ch -= 0x00003080UL; + if (ch < 0x0080) + goto invalid; + break; + case 2: + ch = byte; ch <<= 6; /* 1st byte */ + byte = *utf8p++; /* 2nd byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; ch <<= 6; + byte = *utf8p++; /* 3rd byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; + ch -= 0x000E2080UL; + if (ch < 0x0800) + goto invalid; + if (ch >= 0xD800) { + if (ch <= 0xDFFF) + goto invalid; + if (ch == 0xFFFE || ch == 0xFFFF) + goto invalid; + } + break; + case 3: + ch = byte; ch <<= 6; /* 1st byte */ + byte = *utf8p++; /* 2nd byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; ch <<= 6; + byte = *utf8p++; /* 3rd byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; ch <<= 6; + byte = *utf8p++; /* 4th byte */ + if ((byte >> 6) != 2) + goto invalid; + ch += byte; + ch -= 0x03C82080UL + SP_HALF_BASE; + ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST; + if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) + goto invalid; + ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST; + if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST) + goto invalid; + break; + default: + goto invalid; + } + + } + return (0); +invalid: + return (EINVAL); +} + + /* * Unicode 3.2 decomposition code (derived from Core Foundation) */ diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 6e77308f8..3eb3f1522 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,107 +62,161 @@ */ #include +#include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include +#include #include -static int vn_closefile __P((struct file *fp, struct proc *p)); -static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, - struct proc *p)); -static int vn_read __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int vn_write __P((struct file *fp, struct uio *uio, - struct ucred *cred, int flags, struct proc *p)); -static int vn_select __P(( struct file *fp, int which, void * wql, - struct proc *p)); -static int vn_kqfilt_add __P((struct file *fp, struct knote *kn, struct proc *p)); -static int vn_kqfilt_remove __P((struct vnode *vp, uintptr_t ident, struct proc *p)); + + +static int vn_closefile(struct fileglob *fp, struct proc *p); +static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, struct proc *p); +static int vn_read(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int vn_write(struct fileproc *fp, struct uio *uio, + kauth_cred_t cred, int flags, struct proc *p); +static int vn_select( struct fileproc *fp, int which, void * wql, struct proc *p); +static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, struct proc *p); +#if 0 +static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, struct proc *p); +#endif struct fileops vnops = - { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add }; + { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, 0 }; /* * Common code for vnode open operations. - * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. + * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. + * + * XXX the profusion of interfaces here is probably a bad thing. */ int -vn_open(ndp, fmode, cmode) - register struct nameidata *ndp; - int fmode, cmode; +vn_open(struct nameidata *ndp, int fmode, int cmode) { - return vn_open_modflags(ndp,&fmode,cmode); + return(vn_open_modflags(ndp, &fmode, cmode)); } -__private_extern__ int -vn_open_modflags(ndp, fmodep, cmode) - register struct nameidata *ndp; - int *fmodep; - int cmode; +int +vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) { - register struct vnode *vp; - register struct proc *p = ndp->ni_cnd.cn_proc; - register struct ucred *cred = p->p_ucred; - struct vattr vat; - struct vattr *vap = &vat; + struct vnode_attr va; + + VATTR_INIT(&va); + VATTR_SET(&va, va_mode, cmode); + + return(vn_open_auth(ndp, fmodep, &va)); +} + +int +vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) +{ + struct vnode *vp; + struct vnode *dvp; + vfs_context_t ctx = ndp->ni_cnd.cn_context; int error; - int didhold = 0; - char *nameptr; - int fmode = *fmodep; + int fmode; + kauth_action_t action; +again: + vp = NULL; + dvp = NULL; + fmode = *fmodep; if (fmode & O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | AUDITVNPATH1; + if ((fmode & O_EXCL) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; - bwillwrite(); - if (error = namei(ndp)) - return (error); - if (ndp->ni_vp == NULL) { - nameptr = add_name(ndp->ni_cnd.cn_nameptr, - ndp->ni_cnd.cn_namelen, - ndp->ni_cnd.cn_hash, 0); - - VATTR_NULL(vap); - vap->va_type = VREG; - vap->va_mode = cmode; + if ( (error = namei(ndp)) ) + goto out; + dvp = ndp->ni_dvp; + vp = ndp->ni_vp; + + /* not found, create */ + if (vp == NULL) { + /* must have attributes for a new file */ + if (vap == NULL) { + error = EINVAL; + goto badcreate; + } + + /* authorize before creating */ + if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) + goto badcreate; + + VATTR_SET(vap, va_type, VREG); if (fmode & O_EXCL) vap->va_vaflags |= VA_EXCLUSIVE; - VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); - if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, - &ndp->ni_cnd, vap)) { - remove_name(nameptr); - return (error); - } - fmode &= ~O_TRUNC; - vp = ndp->ni_vp; + + if ((error = vn_create(dvp, &ndp->ni_vp, &ndp->ni_cnd, vap, 0, ctx)) != 0) + goto badcreate; - VNAME(vp) = nameptr; - if (vget(ndp->ni_dvp, 0, p) == 0) { - VPARENT(vp) = ndp->ni_dvp; + vp = ndp->ni_vp; + + if (vp) { + int update_flags = 0; + + // Make sure the name & parent pointers are hooked up + if (vp->v_name == NULL) + update_flags |= VNODE_UPDATE_NAME; + if (vp->v_parent == NULLVP) + update_flags |= VNODE_UPDATE_PARENT; + + if (update_flags) + vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); + + if (need_fsevent(FSE_CREATE_FILE, vp)) { + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } + } + /* + * nameidone has to happen before we vnode_put(dvp) + * and clear the ni_dvp field, since it may need + * to release the fs_nodelock on the dvp + */ +badcreate: + nameidone(ndp); + ndp->ni_dvp = NULL; + vnode_put(dvp); + + if (error) { + /* + * Check for a creation race. + */ + if ((error == EEXIST) && !(fmode & O_EXCL)) { + goto again; + } + goto bad; } + fmode &= ~O_TRUNC; } else { - VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); - if (ndp->ni_dvp == ndp->ni_vp) - vrele(ndp->ni_dvp); - else - vput(ndp->ni_dvp); + nameidone(ndp); ndp->ni_dvp = NULL; - vp = ndp->ni_vp; + vnode_put(dvp); + if (fmode & O_EXCL) { error = EEXIST; goto bad; @@ -172,12 +226,14 @@ vn_open_modflags(ndp, fmodep, cmode) } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | AUDITVNPATH1; - if (error = namei(ndp)) - return (error); + if ( (error = namei(ndp)) ) + goto out; vp = ndp->ni_vp; + nameidone(ndp); + ndp->ni_dvp = NULL; } - if (vp->v_type == VSOCK) { - error = EOPNOTSUPP; + if (vp->v_type == VSOCK && vp->v_tag != VT_FDESC) { + error = EOPNOTSUPP; /* Operation not supported on socket */ goto bad; } @@ -186,151 +242,267 @@ vn_open_modflags(ndp, fmodep, cmode) panic("vn_open: ubc_info_init"); #endif /* DIAGNOSTIC */ - if (UBCINFOEXISTS(vp) && ((didhold = ubc_hold(vp)) == 0)) { - error = ENOENT; - goto bad; - } - + /* authorize open of an existing file */ if ((fmode & O_CREAT) == 0) { - if (fmode & FREAD && fmode & (FWRITE | O_TRUNC)) { - int err = 0; - if (vp->v_type == VDIR) - err = EISDIR; - else - err = vn_writechk(vp); - if (err && !(error = VOP_ACCESS(vp, VREAD, cred, p))) - error = err; - if (error || (error = VOP_ACCESS(vp, VREAD|VWRITE, - cred, p))) - goto bad; - } else if (fmode & FREAD) { - if ((error = VOP_ACCESS(vp, VREAD, cred, p))) - goto bad; - } else if (fmode & (FWRITE | O_TRUNC)) { - if (vp->v_type == VDIR) { - error = EISDIR; - goto bad; - } - if ((error = vn_writechk(vp)) || - (error = VOP_ACCESS(vp, VWRITE, cred, p))) - goto bad; + + /* disallow write operations on directories */ + if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) { + error = EISDIR; + goto bad; } + + /* compute action to be authorized */ + action = 0; + if (fmode & FREAD) + action |= KAUTH_VNODE_READ_DATA; + if (fmode & (FWRITE | O_TRUNC)) + action |= KAUTH_VNODE_WRITE_DATA; + if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) + goto bad; + } - if (error = VOP_OPEN(vp, fmode, cred, p)) { + if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) { goto bad; } + if ( (error = vnode_ref_ext(vp, fmode)) ) + goto bad; + + /* call out to allow 3rd party notification of open. + * Ignore result of kauth_authorize_fileop call. + */ + kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, + (uintptr_t)vp, 0); - if (fmode & FWRITE) - if (++vp->v_writecount <= 0) - panic("vn_open: v_writecount"); *fmodep = fmode; return (0); bad: - VOP_UNLOCK(vp, 0, p); - if (didhold) - ubc_rele(vp); - vrele(vp); ndp->ni_vp = NULL; + if (vp) { + vnode_put(vp); + /* + * Check for a race against unlink. We had a vnode + * but according to vnode_authorize or VNOP_OPEN it + * no longer exists. + */ + if ((error == ENOENT) && (*fmodep & O_CREAT)) { + goto again; + } + } +out: return (error); } /* - * Check for write permissions on the specified vnode. - * Prototype text segments cannot be written. + * Authorize an action against a vnode. This has been the canonical way to + * ensure that the credential/process/etc. referenced by a vfs_context + * is granted the rights called out in 'mode' against the vnode 'vp'. + * + * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult + * to add support for more rights. As such, this interface will be deprecated + * and callers will use vnode_authorize instead. */ +#warning vn_access is deprecated int -vn_writechk(vp) - register struct vnode *vp; +vn_access(vnode_t vp, int mode, vfs_context_t context) { - - /* - * If there's shared text associated with - * the vnode, try to free it up once. If - * we fail, we can't allow writing. - */ -#if 0 - /* XXXXX Not sure we need this */ - if (vp->v_flag & VTEXT) - return (ETXTBSY); -#endif /* 0 */ - return (0); + kauth_action_t action; + + action = 0; + if (mode & VREAD) + action |= KAUTH_VNODE_READ_DATA; + if (mode & VWRITE) + action |= KAUTH_VNODE_WRITE_DATA; + if (mode & VEXEC) + action |= KAUTH_VNODE_EXECUTE; + + return(vnode_authorize(vp, NULL, action, context)); } /* * Vnode close call */ int -vn_close(vp, flags, cred, p) - register struct vnode *vp; - int flags; - struct ucred *cred; - struct proc *p; +vn_close(struct vnode *vp, int flags, kauth_cred_t cred, struct proc *p) { + struct vfs_context context; int error; - if (flags & FWRITE) { - - vp->v_writecount--; - - { - extern void notify_filemod_watchers(struct vnode *vp, struct proc *p); + context.vc_proc = p; + context.vc_ucred = cred; - notify_filemod_watchers(vp, p); + if (flags & FWASWRITTEN) { + if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { + add_fsevent(FSE_CONTENT_MODIFIED, &context, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); } } - error = VOP_CLOSE(vp, flags, cred, p); - ubc_rele(vp); - vrele(vp); + error = VNOP_CLOSE(vp, flags, &context); + (void)vnode_rele_ext(vp, flags, 0); + return (error); } +static int +vn_read_swapfile( + struct vnode *vp, + uio_t uio) +{ + static char *swap_read_zero_page = NULL; + int error; + off_t swap_count, this_count; + off_t file_end, read_end; + off_t prev_resid; + + /* + * Reading from a swap file will get you all zeroes. + */ + error = 0; + swap_count = uio_resid(uio); + + file_end = ubc_getsize(vp); + read_end = uio->uio_offset + uio_resid(uio); + if (uio->uio_offset >= file_end) { + /* uio starts after end of file: nothing to read */ + swap_count = 0; + } else if (read_end > file_end) { + /* uio extends beyond end of file: stop before that */ + swap_count -= (read_end - file_end); + } + + while (swap_count > 0) { + if (swap_read_zero_page == NULL) { + char *my_zero_page; + int funnel_state; + + /* + * Take kernel funnel so that only one thread + * sets up "swap_read_zero_page". + */ + funnel_state = thread_funnel_set(kernel_flock, TRUE); + + if (swap_read_zero_page == NULL) { + MALLOC(my_zero_page, char *, PAGE_SIZE, + M_TEMP, M_WAITOK); + memset(my_zero_page, '?', PAGE_SIZE); + /* + * Adding a newline character here + * and there prevents "less(1)", for + * example, from getting too confused + * about a file with one really really + * long line. + */ + my_zero_page[PAGE_SIZE-1] = '\n'; + if (swap_read_zero_page == NULL) { + swap_read_zero_page = my_zero_page; + } else { + FREE(my_zero_page, M_TEMP); + } + } else { + /* + * Someone else raced us here and won; + * just use their page. + */ + } + thread_funnel_set(kernel_flock, funnel_state); + } + + this_count = swap_count; + if (this_count > PAGE_SIZE) { + this_count = PAGE_SIZE; + } + + prev_resid = uio_resid(uio); + error = uiomove((caddr_t) swap_read_zero_page, + this_count, + uio); + if (error) { + break; + } + swap_count -= (prev_resid - uio_resid(uio)); + } + + return error; +} /* * Package up an I/O request on a vnode into a uio and do it. */ int -vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) - enum uio_rw rw; - struct vnode *vp; - caddr_t base; - int len; - off_t offset; - enum uio_seg segflg; - int ioflg; - struct ucred *cred; - int *aresid; - struct proc *p; +vn_rdwr( + enum uio_rw rw, + struct vnode *vp, + caddr_t base, + int len, + off_t offset, + enum uio_seg segflg, + int ioflg, + kauth_cred_t cred, + int *aresid, + struct proc *p) { - struct uio auio; - struct iovec aiov; + return vn_rdwr_64(rw, + vp, + (uint64_t)(uintptr_t)base, + (int64_t)len, + offset, + segflg, + ioflg, + cred, + aresid, + p); +} + + +int +vn_rdwr_64( + enum uio_rw rw, + struct vnode *vp, + uint64_t base, + int64_t len, + off_t offset, + enum uio_seg segflg, + int ioflg, + kauth_cred_t cred, + int *aresid, + struct proc *p) +{ + uio_t auio; + int spacetype; + struct vfs_context context; int error=0; + char uio_buf[ UIO_SIZEOF(1) ]; + + context.vc_proc = p; + context.vc_ucred = cred; - /* FIXME XXX */ - if ((ioflg & IO_NODELOCKED) == 0) - (void)vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - aiov.iov_base = base; - aiov.iov_len = len; - auio.uio_resid = len; - auio.uio_offset = offset; - auio.uio_segflg = segflg; - auio.uio_rw = rw; - auio.uio_procp = p; - - if (rw == UIO_READ) - error = VOP_READ(vp, &auio, ioflg, cred); - else - error = VOP_WRITE(vp, &auio, ioflg, cred); + if (UIO_SEG_IS_USER_SPACE(segflg)) { + spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + } + else { + spacetype = UIO_SYSSPACE; + } + auio = uio_createwithbuffer(1, offset, spacetype, rw, + &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, base, len); + + if (rw == UIO_READ) { + if (vp->v_flag & VSWAP) { + error = vn_read_swapfile(vp, auio); + } else { + error = VNOP_READ(vp, auio, ioflg, &context); + } + } else { + error = VNOP_WRITE(vp, auio, ioflg, &context); + } if (aresid) - *aresid = auio.uio_resid; + // LP64todo - fix this + *aresid = uio_resid(auio); else - if (auio.uio_resid && error == 0) + if (uio_resid(auio) && error == 0) error = EIO; - if ((ioflg & IO_NODELOCKED) == 0) - VOP_UNLOCK(vp, 0, p); return (error); } @@ -338,81 +510,39 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) * File table vnode read routine. */ static int -vn_read(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +vn_read(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, + int flags, struct proc *p) { struct vnode *vp; int error, ioflag; off_t count; + struct vfs_context context; - if (p != uio->uio_procp) - panic("vn_read: uio_procp does not match p"); + context.vc_proc = p; + context.vc_ucred = cred; - vp = (struct vnode *)fp->f_data; + vp = (struct vnode *)fp->f_fglob->fg_data; + if ( (error = vnode_getwithref(vp)) ) { + return(error); + } ioflag = 0; - if (fp->f_flag & FNONBLOCK) + if (fp->f_fglob->fg_flag & FNONBLOCK) ioflag |= IO_NDELAY; - VOP_LEASE(vp, p, cred, LEASE_READ); - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) - return (error); + if ((flags & FOF_OFFSET) == 0) - uio->uio_offset = fp->f_offset; - count = uio->uio_resid; - - if(UBCINFOEXISTS(vp)) { - memory_object_t pager; - struct iovec *iov; - off_t file_off; - kern_return_t kr = KERN_SUCCESS; - kern_return_t ret = KERN_SUCCESS; - int count; - - pager = (memory_object_t)ubc_getpager(vp); - file_off = uio->uio_offset; - iov = uio->uio_iov; - count = uio->uio_iovcnt; - while(count) { - kr = vm_conflict_check(current_map(), - (vm_offset_t)iov->iov_base, iov->iov_len, - pager, file_off); - if(kr == KERN_ALREADY_WAITING) { - if((count != uio->uio_iovcnt) && - (ret != KERN_ALREADY_WAITING)) { - error = EINVAL; - goto done; - } - ret = KERN_ALREADY_WAITING; - } else if (kr != KERN_SUCCESS) { - error = EINVAL; - goto done; - } - if(kr != ret) { - error = EINVAL; - goto done; - } - file_off += iov->iov_len; - iov++; - count--; - } - if(ret == KERN_ALREADY_WAITING) { - uio->uio_resid = 0; - if ((flags & FOF_OFFSET) == 0) - fp->f_offset += - count - uio->uio_resid; - error = 0; - goto done; - } + uio->uio_offset = fp->f_fglob->fg_offset; + count = uio_resid(uio); + + if (vp->v_flag & VSWAP) { + /* special case for swap files */ + error = vn_read_swapfile(vp, uio); + } else { + error = VNOP_READ(vp, uio, ioflag, &context); } - error = VOP_READ(vp, uio, ioflag, cred); if ((flags & FOF_OFFSET) == 0) - fp->f_offset += count - uio->uio_resid; -done: - VOP_UNLOCK(vp, 0, p); + fp->f_fglob->fg_offset += count - uio_resid(uio); + + (void)vnode_put(vp); return (error); } @@ -421,91 +551,49 @@ done: * File table vnode write routine. */ static int -vn_write(fp, uio, cred, flags, p) - struct file *fp; - struct uio *uio; - struct ucred *cred; - int flags; - struct proc *p; +vn_write(struct fileproc *fp, struct uio *uio, kauth_cred_t cred, + int flags, struct proc *p) { struct vnode *vp; int error, ioflag; off_t count; - - if (p != uio->uio_procp) - panic("vn_write: uio_procp does not match p"); - - vp = (struct vnode *)fp->f_data; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = cred; + count = 0; + vp = (struct vnode *)fp->f_fglob->fg_data; + if ( (error = vnode_getwithref(vp)) ) { + return(error); + } ioflag = IO_UNIT; - if (vp->v_type == VREG) - bwillwrite(); - if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) + if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND)) ioflag |= IO_APPEND; - if (fp->f_flag & FNONBLOCK) + if (fp->f_fglob->fg_flag & FNONBLOCK) ioflag |= IO_NDELAY; - if ((fp->f_flag & O_FSYNC) || + if ((fp->f_fglob->fg_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; - VOP_LEASE(vp, p, cred, LEASE_WRITE); - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) - return (error); + if ((flags & FOF_OFFSET) == 0) { - uio->uio_offset = fp->f_offset; - count = uio->uio_resid; + uio->uio_offset = fp->f_fglob->fg_offset; + count = uio_resid(uio); } - - if(UBCINFOEXISTS(vp)) { - memory_object_t pager; - struct iovec *iov; - off_t file_off; - kern_return_t kr = KERN_SUCCESS; - kern_return_t ret = KERN_SUCCESS; - int count; - - pager = (memory_object_t)ubc_getpager(vp); - file_off = uio->uio_offset; - iov = uio->uio_iov; - count = uio->uio_iovcnt; - while(count) { - kr = vm_conflict_check(current_map(), - (vm_offset_t)iov->iov_base, - iov->iov_len, pager, file_off); - if(kr == KERN_ALREADY_WAITING) { - if((count != uio->uio_iovcnt) && - (ret != KERN_ALREADY_WAITING)) { - error = EINVAL; - goto done; - } - ret = KERN_ALREADY_WAITING; - } else if (kr != KERN_SUCCESS) { - error = EINVAL; - goto done; - } - if(kr != ret) { - error = EINVAL; - goto done; - } - file_off += iov->iov_len; - iov++; - count--; - } - if(ret == KERN_ALREADY_WAITING) { - uio->uio_resid = 0; - if ((flags & FOF_OFFSET) == 0) - fp->f_offset += - count - uio->uio_resid; - error = 0; - goto done; - } + if (p && (vp->v_type == VREG) && + (((uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || + (uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { + psignal(p, SIGXFSZ); + vnode_put(vp); + return (EFBIG); } - error = VOP_WRITE(vp, uio, ioflag, cred); + + error = VNOP_WRITE(vp, uio, ioflag, &context); if ((flags & FOF_OFFSET) == 0) { if (ioflag & IO_APPEND) - fp->f_offset = uio->uio_offset; + fp->f_fglob->fg_offset = uio->uio_offset; else - fp->f_offset += count - uio->uio_resid; + fp->f_fglob->fg_offset += count - uio_resid(uio); } /* @@ -514,9 +602,7 @@ vn_write(fp, uio, cred, flags, p) if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { ubc_setcred(vp, p); } - -done: - VOP_UNLOCK(vp, 0, p); + (void)vnode_put(vp); return (error); } @@ -524,26 +610,45 @@ done: * File table vnode stat routine. */ int -vn_stat(vp, sb, p) - struct vnode *vp; - register struct stat *sb; - struct proc *p; +vn_stat_noauth(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx) { - struct vattr vattr; - register struct vattr *vap; + struct vnode_attr va; int error; u_short mode; - - vap = &vattr; - error = VOP_GETATTR(vp, vap, p->p_ucred, p); + kauth_filesec_t fsec; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_type); + VATTR_WANTED(&va, va_nlink); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_rdev); + VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_access_time); + VATTR_WANTED(&va, va_modify_time); + VATTR_WANTED(&va, va_change_time); + VATTR_WANTED(&va, va_flags); + VATTR_WANTED(&va, va_gen); + VATTR_WANTED(&va, va_iosize); + /* lower layers will synthesise va_total_alloc from va_data_size if required */ + VATTR_WANTED(&va, va_total_alloc); + if (xsec != NULL) { + VATTR_WANTED(&va, va_uuuid); + VATTR_WANTED(&va, va_guuid); + VATTR_WANTED(&va, va_acl); + } + error = vnode_getattr(vp, &va, ctx); if (error) - return (error); + goto out; /* * Copy from vattr table */ - sb->st_dev = vap->va_fsid; - sb->st_ino = vap->va_fileid; - mode = vap->va_mode; + sb->st_dev = va.va_fsid; + sb->st_ino = (ino_t)va.va_fileid; + mode = va.va_mode; switch (vp->v_type) { case VREG: mode |= S_IFREG; @@ -567,92 +672,175 @@ vn_stat(vp, sb, p) mode |= S_IFIFO; break; default: - return (EBADF); + error = EBADF; + goto out; }; sb->st_mode = mode; - sb->st_nlink = vap->va_nlink; - sb->st_uid = vap->va_uid; - sb->st_gid = vap->va_gid; - sb->st_rdev = vap->va_rdev; - sb->st_size = vap->va_size; - sb->st_atimespec = vap->va_atime; - sb->st_mtimespec = vap->va_mtime; - sb->st_ctimespec = vap->va_ctime; - sb->st_blksize = vap->va_blocksize; - sb->st_flags = vap->va_flags; + sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; + sb->st_uid = va.va_uid; + sb->st_gid = va.va_gid; + sb->st_rdev = va.va_rdev; + sb->st_size = va.va_data_size; + sb->st_atimespec = va.va_access_time; + sb->st_mtimespec = va.va_modify_time; + sb->st_ctimespec = va.va_change_time; + sb->st_blksize = va.va_iosize; + sb->st_flags = va.va_flags; + sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; + + /* if we're interested in exended security data and we got an ACL */ + if (xsec != NULL) { + if (!VATTR_IS_SUPPORTED(&va, va_acl) && + !VATTR_IS_SUPPORTED(&va, va_uuuid) && + !VATTR_IS_SUPPORTED(&va, va_guuid)) { + *xsec = KAUTH_FILESEC_NONE; + } else { + + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { + fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); + } else { + fsec = kauth_filesec_alloc(0); + } + if (fsec == NULL) { + error = ENOMEM; + goto out; + } + fsec->fsec_magic = KAUTH_FILESEC_MAGIC; + if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { + fsec->fsec_owner = va.va_uuuid; + } else { + fsec->fsec_owner = kauth_null_guid; + } + if (VATTR_IS_SUPPORTED(&va, va_guuid)) { + fsec->fsec_group = va.va_guuid; + } else { + fsec->fsec_group = kauth_null_guid; + } + if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { + bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); + } else { + fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; + } + *xsec = fsec; + } + } + /* Do not give the generation number out to unpriviledged users */ - if (vap->va_gen && suser(p->p_ucred, &p->p_acflag)) + if (va.va_gen && !vfs_context_issuser(ctx)) sb->st_gen = 0; else - sb->st_gen = vap->va_gen; - sb->st_blocks = vap->va_bytes / S_BLKSIZE; - return (0); + sb->st_gen = va.va_gen; + + error = 0; +out: + if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) + kauth_acl_free(va.va_acl); + return (error); +} + +int +vn_stat(struct vnode *vp, struct stat *sb, kauth_filesec_t *xsec, vfs_context_t ctx) +{ + int error; + + /* authorize */ + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) + return(error); + + /* actual stat */ + return(vn_stat_noauth(vp, sb, xsec, ctx)); } + /* * File table vnode ioctl routine. */ static int vn_ioctl(fp, com, data, p) - struct file *fp; + struct fileproc *fp; u_long com; caddr_t data; struct proc *p; { - register struct vnode *vp = ((struct vnode *)fp->f_data); - struct vattr vattr; + register struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data); + struct vfs_context context; + off_t file_size; int error; struct vnode *ttyvp; + int funnel_state; + if ( (error = vnode_getwithref(vp)) ) { + return(error); + } + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? */ + switch (vp->v_type) { case VREG: case VDIR: if (com == FIONREAD) { - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) - return (error); - *(int *)data = vattr.va_size - fp->f_offset; - return (0); + if ((error = vnode_size(vp, &file_size, &context)) != 0) + goto out; + *(int *)data = file_size - fp->f_fglob->fg_offset; + goto out; + } + if (com == FIONBIO || com == FIOASYNC) { /* XXX */ + goto out; } - if (com == FIONBIO || com == FIOASYNC) /* XXX */ - return (0); /* XXX */ /* fall into ... */ default: - return (ENOTTY); + error = ENOTTY; + goto out; case VFIFO: case VCHR: case VBLK: - /* Should not be able to set block size from user space */ - if(com == DKIOCSETBLOCKSIZE) - return (EPERM); - - if (com == FIODTYPE) { - if (vp->v_type == VBLK) { - if (major(vp->v_rdev) >= nblkdev) - return (ENXIO); - *(int *)data = bdevsw[major(vp->v_rdev)].d_type; - } else if (vp->v_type == VCHR) { - if (major(vp->v_rdev) >= nchrdev) - return (ENXIO); - *(int *)data = cdevsw[major(vp->v_rdev)].d_type; - } else { - return (ENOTTY); - } - return (0); - } - error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); - if (error == 0 && com == TIOCSCTTY) { - VREF(vp); - ttyvp = p->p_session->s_ttyvp; - p->p_session->s_ttyvp = vp; - if (ttyvp) - vrele(ttyvp); - } - return (error); + /* Should not be able to set block size from user space */ + if (com == DKIOCSETBLOCKSIZE) { + error = EPERM; + goto out; + } + + if (com == FIODTYPE) { + if (vp->v_type == VBLK) { + if (major(vp->v_rdev) >= nblkdev) { + error = ENXIO; + goto out; + } + *(int *)data = bdevsw[major(vp->v_rdev)].d_type; + + } else if (vp->v_type == VCHR) { + if (major(vp->v_rdev) >= nchrdev) { + error = ENXIO; + goto out; + } + *(int *)data = cdevsw[major(vp->v_rdev)].d_type; + } else { + error = ENOTTY; + goto out; + } + goto out; + } + error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, &context); + + if (error == 0 && com == TIOCSCTTY) { + vnode_ref(vp); + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + ttyvp = p->p_session->s_ttyvp; + p->p_session->s_ttyvp = vp; + thread_funnel_set(kernel_flock, funnel_state); + + if (ttyvp) + vnode_rele(ttyvp); + } } +out: + (void)vnode_put(vp); + return(error); } /* @@ -660,14 +848,25 @@ vn_ioctl(fp, com, data, p) */ static int vn_select(fp, which, wql, p) - struct file *fp; + struct fileproc *fp; int which; void * wql; struct proc *p; { + int error; + struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data; + struct vfs_context context; + + if ( (error = vnode_getwithref(vp)) == 0 ) { + context.vc_proc = p; + context.vc_ucred = fp->f_fglob->fg_cred; + + error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, &context); - return(VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag, - fp->f_cred, wql, p)); + (void)vnode_put(vp); + } + return(error); + } /* @@ -675,73 +874,96 @@ vn_select(fp, which, wql, p) * acquire requested lock. */ int -vn_lock(vp, flags, p) - struct vnode *vp; - int flags; - struct proc *p; +vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p) { - int error; - - do { - if ((flags & LK_INTERLOCK) == 0) - simple_lock(&vp->v_interlock); - if (vp->v_flag & VXLOCK) { - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - simple_unlock(&vp->v_interlock); - (void)tsleep((caddr_t)vp, PINOD, "vn_lock", 0); - } - error = ENOENT; - } else { - error = VOP_LOCK(vp, flags | LK_INTERLOCK, p); - if (error == 0) - return (error); - } - flags &= ~LK_INTERLOCK; - } while (flags & LK_RETRY); - return (error); + return (0); } /* * File table vnode close routine. */ static int -vn_closefile(fp, p) - struct file *fp; +vn_closefile(fg, p) + struct fileglob *fg; struct proc *p; { + struct vnode *vp = (struct vnode *)fg->fg_data; + int error; + + if ( (error = vnode_getwithref(vp)) == 0 ) { + error = vn_close(vp, fg->fg_flag, fg->fg_cred, p); + + (void)vnode_put(vp); + } + return(error); +} + +int +vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx) +{ + int error = 0; + + switch(name) { + case _PC_EXTENDED_SECURITY_NP: + *retval = vfs_extendedsecurity(vnode_mount(vp)); + break; + case _PC_AUTH_OPAQUE_NP: + *retval = vfs_authopaque(vnode_mount(vp)); + break; + default: + error = VNOP_PATHCONF(vp, name, retval, ctx); + break; + } - return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, - fp->f_cred, p)); + return (error); } static int vn_kqfilt_add(fp, kn, p) - struct file *fp; + struct fileproc *fp; struct knote *kn; struct proc *p; { - struct vnode *vp = (struct vnode *)fp->f_data; + struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data; + struct vfs_context context; int error; + int funnel_state; - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) return (error); - error = VOP_KQFILT_ADD(vp, kn, p); - (void)VOP_UNLOCK(vp, 0, p); + if ( (error = vnode_getwithref(vp)) == 0 ) { + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? */ + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + error = VNOP_KQFILT_ADD(vp, kn, &context); + thread_funnel_set(kernel_flock, funnel_state); + + (void)vnode_put(vp); + } return (error); } +#if 0 +/* No one calls this yet. */ static int vn_kqfilt_remove(vp, ident, p) struct vnode *vp; uintptr_t ident; struct proc *p; { + struct vfs_context context; int error; + int funnel_state; - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (error) return (error); - error = VOP_KQFILT_REMOVE(vp, ident, p); - (void)VOP_UNLOCK(vp, 0, p); + if ( (error = vnode_getwithref(vp)) == 0 ) { + context.vc_proc = p; + context.vc_ucred = p->p_ucred; /* XXX kauth_cred_get() ??? */ + + funnel_state = thread_funnel_set(kernel_flock, TRUE); + error = VNOP_KQFILT_REMOVE(vp, ident, &context); + thread_funnel_set(kernel_flock, funnel_state); + + (void)vnode_put(vp); + } return (error); } +#endif diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c new file mode 100644 index 000000000..7ecca5261 --- /dev/null +++ b/bsd/vfs/vfs_xattr.c @@ -0,0 +1,2007 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* + * Default xattr support routines. + */ +static int default_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, + int options, vfs_context_t context); + +static int default_setxattr(vnode_t vp, const char *name, uio_t uio, + int options, vfs_context_t context); + +static int default_removexattr(vnode_t vp, const char *name, int options, vfs_context_t context); + +static int default_listxattr(vnode_t vp, uio_t uio, size_t *size, int options, + vfs_context_t context); + + + +/* + * Retrieve the data of an extended attribute. + */ +int +vn_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, + int options, vfs_context_t context) +{ + int error; + + if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + return (EPERM); + } + if ((error = xattr_validatename(name))) { + return (error); + } + if (!(options & XATTR_NOSECURITY) && (error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_EXTATTRIBUTES, context))) + goto out; + + /* The offset can only be non-zero for resource forks. */ + if (uio != NULL && uio_offset(uio) != 0 && + bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) != 0) { + error = EINVAL; + goto out; + } + + error = VNOP_GETXATTR(vp, name, uio, size, options, context); + if (error == ENOTSUP) { + /* + * A filesystem may keep some EAs natively and return ENOTSUP for others. + * SMB returns ENOTSUP for finderinfo and resource forks. + */ + error = default_getxattr(vp, name, uio, size, options, context); + } +out: + return (error); +} + +/* + * Set the data of an extended attribute. + */ +int +vn_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t context) +{ + int error; + + if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + return (EPERM); + } + if ((options & (XATTR_REPLACE|XATTR_CREATE)) == (XATTR_REPLACE|XATTR_CREATE)) { + return (EINVAL); + } + if ((error = xattr_validatename(name))) { + return (error); + } + if (!(options & XATTR_NOSECURITY) && (error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_EXTATTRIBUTES, context))) + goto out; + + /* The offset can only be non-zero for resource forks. */ + if (uio_offset(uio) != 0 && + bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) != 0 ) { + error = EINVAL; + goto out; + } + + error = VNOP_SETXATTR(vp, name, uio, options, context); +#ifdef DUAL_EAS + /* + * An EJUSTRETURN is from a filesystem which keeps this xattr + * natively as well as in a dot-underscore file. In this case the + * EJUSTRETURN means the filesytem has done nothing, but identifies the + * EA as one which may be represented natively and/or in a DU, and + * since XATTR_CREATE or XATTR_REPLACE was specified, only up here in + * in vn_setxattr can we do the getxattrs needed to ascertain whether + * the XATTR_{CREATE,REPLACE} should yield an error. + */ + if (error == EJUSTRETURN) { + int native = 0, dufile = 0; + size_t sz; /* not used */ + + native = VNOP_GETXATTR(vp, name, NULL, &sz, 0, context) ? 0 : 1; + dufile = default_getxattr(vp, name, NULL, &sz, 0, context) ? 0 : 1; + if (options & XATTR_CREATE && (native || dufile)) { + error = EEXIST; + goto out; + } + if (options & XATTR_REPLACE && !(native || dufile)) { + error = ENOATTR; + goto out; + } + /* + * Having determined no CREATE/REPLACE error should result, we + * zero those bits, so both backing stores get written to. + */ + options &= ~(XATTR_CREATE | XATTR_REPLACE); + error = VNOP_SETXATTR(vp, name, uio, options, context); + /* the mainline path here is to have error==ENOTSUP ... */ + } +#endif /* DUAL_EAS */ + if (error == ENOTSUP) { + /* + * A filesystem may keep some EAs natively and return ENOTSUP for others. + * SMB returns ENOTSUP for finderinfo and resource forks. + */ + error = default_setxattr(vp, name, uio, options, context); + } +out: + return (error); +} + +/* + * Remove an extended attribute. + */ +int +vn_removexattr(vnode_t vp, const char * name, int options, vfs_context_t context) +{ + int error; + + if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + return (EPERM); + } + if ((error = xattr_validatename(name))) { + return (error); + } + if (!(options & XATTR_NOSECURITY) && (error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_EXTATTRIBUTES, context))) + goto out; + error = VNOP_REMOVEXATTR(vp, name, options, context); + if (error == ENOTSUP) { + /* + * A filesystem may keep some EAs natively and return ENOTSUP for others. + * SMB returns ENOTSUP for finderinfo and resource forks. + */ + error = default_removexattr(vp, name, options, context); +#ifdef DUAL_EAS + } else if (error == EJUSTRETURN) { + /* + * EJUSTRETURN is from a filesystem which keeps this xattr natively as well + * as in a dot-underscore file. EJUSTRETURN means the filesytem did remove + * a native xattr, so failure to find it in a DU file during + * default_removexattr should not be considered an error. + */ + error = default_removexattr(vp, name, options, context); + if (error == ENOATTR) + error = 0; +#endif /* DUAL_EAS */ + } +out: + return (error); +} + +/* + * Retrieve the list of extended attribute names. + */ +int +vn_listxattr(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t context) +{ + int error; + + if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + return (EPERM); + } + if (!(options & XATTR_NOSECURITY) && (error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_EXTATTRIBUTES, context))) + goto out; + + error = VNOP_LISTXATTR(vp, uio, size, options, context); + if (error == ENOTSUP) { + /* + * A filesystem may keep some but not all EAs natively, in which case + * the native EA names will have been uiomove-d out (or *size updated) + * and the default_listxattr here will finish the job. Note SMB takes + * advantage of this for its finder-info and resource forks. + */ + error = default_listxattr(vp, uio, size, options, context); + } +out: + return (error); +} + +int +xattr_validatename(const char *name) +{ + int namelen; + + if (name == NULL || name[0] == '\0') { + return (EINVAL); + } + namelen = strlen(name); + if (namelen > XATTR_MAXNAMELEN) { + return (ENAMETOOLONG); + } + if (utf8_validatestr(name, namelen) != 0) { + return (EINVAL); + } + return (0); +} + + +/* + * Determine whether an EA is a protected system attribute. + */ +int +xattr_protected(const char *attrname) +{ + return(!strncmp(attrname, "com.apple.system.", 17)); +} + + +/* + * Default Implementation (Non-native EA) + */ + + +/* + Typical "._" AppleDouble Header File layout: + ------------------------------------------------------------ + MAGIC 0x00051607 + VERSION 0x00020000 + FILLER 0 + COUNT 2 + .-- AD ENTRY[0] Finder Info Entry (must be first) + .--+-- AD ENTRY[1] Resource Fork Entry (must be last) + | '-> FINDER INFO + | ///////////// Fixed Size Data (32 bytes) + | EXT ATTR HDR + | ///////////// + | ATTR ENTRY[0] --. + | ATTR ENTRY[1] --+--. + | ATTR ENTRY[2] --+--+--. + | ... | | | + | ATTR ENTRY[N] --+--+--+--. + | ATTR DATA 0 <-' | | | + | //////////// | | | + | ATTR DATA 1 <----' | | + | ///////////// | | + | ATTR DATA 2 <-------' | + | ///////////// | + | ... | + | ATTR DATA N <----------' + | ///////////// + | Attribute Free Space + | + '----> RESOURCE FORK + ///////////// Variable Sized Data + ///////////// + ///////////// + ///////////// + ///////////// + ///////////// + ... + ///////////// + + ------------------------------------------------------------ + + NOTE: The EXT ATTR HDR, ATTR ENTRY's and ATTR DATA's are + stored as part of the Finder Info. The length in the Finder + Info AppleDouble entry includes the length of the extended + attribute header, attribute entries, and attribute data. +*/ + + +/* + * On Disk Data Structures + * + * Note: Motorola 68K alignment and big-endian. + * + * See RFC 1740 for additional information about the AppleDouble file format. + * + */ + +#define ADH_MAGIC 0x00051607 +#define ADH_VERSION 0x00020000 +#define ADH_MACOSX "Mac OS X " + +/* + * AppleDouble Entry ID's + */ +#define AD_DATA 1 /* Data fork */ +#define AD_RESOURCE 2 /* Resource fork */ +#define AD_REALNAME 3 /* FileÕs name on home file system */ +#define AD_COMMENT 4 /* Standard Mac comment */ +#define AD_ICONBW 5 /* Mac black & white icon */ +#define AD_ICONCOLOR 6 /* Mac color icon */ +#define AD_UNUSED 7 /* Not used */ +#define AD_FILEDATES 8 /* File dates; create, modify, etc */ +#define AD_FINDERINFO 9 /* Mac Finder info & extended info */ +#define AD_MACINFO 10 /* Mac file info, attributes, etc */ +#define AD_PRODOSINFO 11 /* Pro-DOS file info, attrib., etc */ +#define AD_MSDOSINFO 12 /* MS-DOS file info, attributes, etc */ +#define AD_AFPNAME 13 /* Short name on AFP server */ +#define AD_AFPINFO 14 /* AFP file info, attrib., etc */ +#define AD_AFPDIRID 15 /* AFP directory ID */ +#define AD_ATTRIBUTES AD_FINDERINFO + + +#define ATTR_FILE_PREFIX "._" +#define ATTR_HDR_MAGIC 0x41545452 /* 'ATTR' */ + +#define ATTR_BUF_SIZE 4096 /* default size of the attr file and how much we'll grow by */ + +/* Implementation Limits */ +#define ATTR_MAX_SIZE (128*1024) /* 128K maximum attribute data size */ +#define ATTR_MAX_HDR_SIZE 65536 +/* + * Note: ATTR_MAX_HDR_SIZE is the largest attribute header + * size supported (including the attribute entries). All of + * the attribute entries must reside within this limit. If + * any of the attribute data crosses the ATTR_MAX_HDR_SIZE + * boundry, then all of the attribute data I/O is performed + * seperately from the attribute header I/O. + */ + + +#pragma options align=mac68k + +#define FINDERINFOSIZE 32 + +typedef struct apple_double_entry { + u_int32_t type; /* entry type: see list, 0 invalid */ + u_int32_t offset; /* entry data offset from the beginning of the file. */ + u_int32_t length; /* entry data length in bytes. */ +} apple_double_entry_t; + + +typedef struct apple_double_header { + u_int32_t magic; /* == ADH_MAGIC */ + u_int32_t version; /* format version: 2 = 0x00020000 */ + u_int32_t filler[4]; + u_int16_t numEntries; /* number of entries which follow */ + apple_double_entry_t entries[2]; /* 'finfo' & 'rsrc' always exist */ + u_int8_t finfo[FINDERINFOSIZE]; /* Must start with Finder Info (32 bytes) */ + u_int8_t pad[2]; /* get better alignment inside attr_header */ +} apple_double_header_t; + +#define ADHDRSIZE (4+4+16+2) + +/* Entries are aligned on 4 byte boundaries */ +typedef struct attr_entry { + u_int32_t offset; /* file offset to data */ + u_int32_t length; /* size of attribute data */ + u_int16_t flags; + u_int8_t namelen; + u_int8_t name[1]; /* NULL-terminated UTF-8 name (up to 128 bytes max) */ +} attr_entry_t; + + +/* Header + entries must fit into 64K */ +typedef struct attr_header { + apple_double_header_t appledouble; + u_int32_t magic; /* == ATTR_HDR_MAGIC */ + u_int32_t debug_tag; /* for debugging == file id of owning file */ + u_int32_t total_size; /* total size of attribute header + entries + data */ + u_int32_t data_start; /* file offset to attribute data area */ + u_int32_t data_length; /* length of attribute data area */ + u_int32_t reserved[3]; + u_int16_t flags; + u_int16_t num_attrs; +} attr_header_t; + + +/* Empty Resource Fork Header */ +typedef struct rsrcfork_header { + u_int32_t fh_DataOffset; + u_int32_t fh_MapOffset; + u_int32_t fh_DataLength; + u_int32_t fh_MapLength; + u_int8_t systemData[112]; + u_int8_t appData[128]; + u_int32_t mh_DataOffset; + u_int32_t mh_MapOffset; + u_int32_t mh_DataLength; + u_int32_t mh_MapLength; + u_int32_t mh_Next; + u_int16_t mh_RefNum; + u_int8_t mh_Attr; + u_int8_t mh_InMemoryAttr; + u_int16_t mh_Types; + u_int16_t mh_Names; + u_int16_t typeCount; +} rsrcfork_header_t; + +#define RF_FIRST_RESOURCE 256 +#define RF_NULL_MAP_LENGTH 30 +#define RF_EMPTY_TAG "This resource fork intentionally left blank " + +#pragma options align=reset + +/* Runtime information about the attribute file. */ +typedef struct attr_info { + vfs_context_t context; + vnode_t filevp; + size_t filesize; + size_t iosize; + u_int8_t *rawdata; + size_t rawsize; /* raw size of AppleDouble file */ + apple_double_header_t *filehdr; + apple_double_entry_t *finderinfo; + apple_double_entry_t *rsrcfork; + attr_header_t *attrhdr; + attr_entry_t *attr_entry; + u_int8_t readonly; + u_int8_t emptyfinderinfo; +} attr_info_t; + + +#define ATTR_SETTING 1 + +#define ATTR_ALIGN 3L /* Use four-byte alignment */ + +#define ATTR_ENTRY_LENGTH(namelen) \ + ((sizeof(attr_entry_t) - 1 + (namelen) + ATTR_ALIGN) & (~ATTR_ALIGN)) + +#define ATTR_NEXT(ae) \ + (attr_entry_t *)((u_int8_t *)(ae) + ATTR_ENTRY_LENGTH((ae)->namelen)) + +#define ATTR_VALID(ae, ai) \ + ((u_int8_t *)ATTR_NEXT(ae) <= ((ai).rawdata + (ai).rawsize)) + + +#define SWAP16(x) NXSwapBigShortToHost((x)) +#define SWAP32(x) NXSwapBigIntToHost((x)) +#define SWAP64(x) NXSwapBigLongLongToHost((x)) + + +static u_int32_t emptyfinfo[8] = {0}; + + +/* + * Local support routines + */ +static void close_xattrfile(vnode_t xvp, int fileflags, vfs_context_t context); + +static int open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context); + +static int create_xattrfile(vnode_t xvp, u_int32_t fileid, vfs_context_t context); + +static int remove_xattrfile(vnode_t xvp, vfs_context_t context); + +static int get_xattrinfo(vnode_t xvp, int setting, attr_info_t *ainfop, vfs_context_t context); + +static void rel_xattrinfo(attr_info_t *ainfop); + +static int write_xattrinfo(attr_info_t *ainfop); + +static void init_empty_resource_fork(rsrcfork_header_t * rsrcforkhdr); + +static int lock_xattrfile(vnode_t xvp, short locktype, vfs_context_t context); + +static int unlock_xattrfile(vnode_t xvp, vfs_context_t context); + + +#if BYTE_ORDER == LITTLE_ENDIAN + static void swap_adhdr(apple_double_header_t *adh); + static void swap_attrhdr(attr_header_t *ah); + +#else +#define swap_adhdr(x) +#define swap_attrhdr(x) +#endif + +static int validate_attrhdr(attr_header_t *ah, size_t bufsize); +static int shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t context); +static int shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t context); + + +/* + * Retrieve the data of an extended attribute. + */ +static int +default_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, + __unused int options, vfs_context_t context) +{ + vnode_t xvp = NULL; + attr_info_t ainfo; + attr_header_t *header; + attr_entry_t *entry; + u_int8_t *attrdata; + size_t datalen; + int namelen; + int isrsrcfork; + int fileflags; + int i; + int error; + + fileflags = FREAD; + if (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + isrsrcfork = 1; + /* + * Open the file locked (shared) since the Carbon + * File Manager may have the Apple Double file open + * and could be changing the resource fork. + */ + fileflags |= O_SHLOCK; + } else { + isrsrcfork = 0; + } + + if ((error = open_xattrfile(vp, fileflags, &xvp, context))) { + return (error); + } + if ((error = get_xattrinfo(xvp, 0, &ainfo, context))) { + close_xattrfile(xvp, fileflags, context); + return (error); + } + + /* Get the Finder Info. */ + if (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + + if (ainfo.finderinfo == NULL || ainfo.emptyfinderinfo) { + error = ENOATTR; + } else if (uio == NULL) { + *size = FINDERINFOSIZE; + error = 0; + } else if (uio_offset(uio) != 0) { + error = EINVAL; + } else if (uio_resid(uio) < FINDERINFOSIZE) { + error = ERANGE; + } else { + attrdata = (u_int8_t*)ainfo.filehdr + ainfo.finderinfo->offset; + error = uiomove((caddr_t)attrdata, FINDERINFOSIZE, uio); + } + goto out; + } + + /* Read the Resource Fork. */ + if (isrsrcfork) { + if (!vnode_isreg(vp)) { + error = EPERM; + } else if (ainfo.rsrcfork == NULL) { + error = ENOATTR; + } else if (uio == NULL) { + *size = (size_t)ainfo.rsrcfork->length; + } else { + uio_setoffset(uio, uio_offset(uio) + ainfo.rsrcfork->offset); + error = VNOP_READ(xvp, uio, 0, context); + if (error == 0) + uio_setoffset(uio, uio_offset(uio) - ainfo.rsrcfork->offset); + } + goto out; + } + + if (ainfo.attrhdr == NULL || ainfo.attr_entry == NULL) { + error = ENOATTR; + goto out; + } + if (uio_offset(uio) != 0) { + error = EINVAL; + goto out; + } + error = ENOATTR; + namelen = strlen(name) + 1; + header = ainfo.attrhdr; + entry = ainfo.attr_entry; + /* + * Search for attribute name in the header. + */ + for (i = 0; i < header->num_attrs && ATTR_VALID(entry, ainfo); i++) { + if (strncmp(entry->name, name, namelen) == 0) { + datalen = (size_t)entry->length; + if (uio == NULL) { + *size = datalen; + error = 0; + break; + } + if (uio_resid(uio) < datalen) { + error = ERANGE; + break; + } + if (entry->offset + datalen < ATTR_MAX_HDR_SIZE) { + attrdata = ((u_int8_t *)header + entry->offset); + error = uiomove((caddr_t)attrdata, datalen, uio); + } else { + uio_setoffset(uio, entry->offset); + error = VNOP_READ(xvp, uio, 0, context); + uio_setoffset(uio, 0); + } + break; + } + entry = ATTR_NEXT(entry); + } +out: + rel_xattrinfo(&ainfo); + close_xattrfile(xvp, fileflags, context); + + return (error); +} + +/* + * Set the data of an extended attribute. + */ +static int +default_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t context) +{ + vnode_t xvp = NULL; + attr_info_t ainfo; + attr_header_t *header; + attr_entry_t *entry; + attr_entry_t *lastentry; + u_int8_t *attrdata; + size_t datalen; + size_t entrylen; + size_t datafreespace; + int namelen; + int found = 0; + int i; + int splitdata; + int fileflags; + int error; + + datalen = uio_resid(uio); + namelen = strlen(name) + 1; + entrylen = ATTR_ENTRY_LENGTH(namelen); + + if (datalen > ATTR_MAX_SIZE) { + return (E2BIG); /* EINVAL instead ? */ + } +start: + /* + * Open the file locked since setting an attribute + * can change the layout of the Apple Double file. + */ + fileflags = FREAD | FWRITE | O_EXLOCK; + if ((error = open_xattrfile(vp, O_CREAT | fileflags, &xvp, context))) { + return (error); + } + if ((error = get_xattrinfo(xvp, ATTR_SETTING, &ainfo, context))) { + close_xattrfile(xvp, fileflags, context); + return (error); + } + + /* Set the Finder Info. */ + if (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + if (ainfo.finderinfo && !ainfo.emptyfinderinfo) { + /* attr exists and "create" was specified? */ + if (options & XATTR_CREATE) { + error = EEXIST; + goto out; + } + } else { + /* attr doesn't exists and "replace" was specified? */ + if (options & XATTR_REPLACE) { + error = ENOATTR; + goto out; + } + } + if (uio_offset(uio) != 0 || datalen != FINDERINFOSIZE) { + error = EINVAL; + goto out; + } + if (ainfo.finderinfo) { + attrdata = (u_int8_t *)ainfo.filehdr + ainfo.finderinfo->offset; + error = uiomove((caddr_t)attrdata, datalen, uio); + if (error) + goto out; + ainfo.iosize = sizeof(attr_header_t); + error = write_xattrinfo(&ainfo); + goto out; + } + error = ENOATTR; + goto out; + } + + /* Write the Resource Fork. */ + if (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + u_int32_t endoffset; + + if (!vnode_isreg(vp)) { + error = EPERM; + goto out; + } + if (ainfo.rsrcfork && ainfo.rsrcfork->length) { + /* attr exists and "create" was specified? */ + if (options & XATTR_CREATE) { + error = EEXIST; + goto out; + } + } else { + /* attr doesn't exists and "replace" was specified? */ + if (options & XATTR_REPLACE) { + error = ENOATTR; + goto out; + } + } + endoffset = uio_resid(uio) + uio_offset(uio); /* new size */ + uio_setoffset(uio, uio_offset(uio) + ainfo.rsrcfork->offset); + error = VNOP_WRITE(xvp, uio, 0, context); + if (error) + goto out; + uio_setoffset(uio, uio_offset(uio) - ainfo.rsrcfork->offset); + if (endoffset > ainfo.rsrcfork->length) { + ainfo.rsrcfork->length = endoffset; + ainfo.iosize = sizeof(attr_header_t); + error = write_xattrinfo(&ainfo); + goto out; + } + goto out; + } + + if (ainfo.attrhdr == NULL) { + error = ENOATTR; + goto out; + } + header = ainfo.attrhdr; + entry = ainfo.attr_entry; + + /* Check if data area crosses the maximum header size. */ + if ((header->data_start + header->data_length + entrylen + datalen) > ATTR_MAX_HDR_SIZE) + splitdata = 1; /* do data I/O separately */ + else + splitdata = 0; + + /* + * See if attribute already exists. + */ + for (i = 0; i < header->num_attrs && ATTR_VALID(entry, ainfo); i++) { + if (strncmp(entry->name, name, namelen) == 0) { + found = 1; + break; + } + entry = ATTR_NEXT(entry); + } + + if (found) { + if (options & XATTR_CREATE) { + error = EEXIST; + goto out; + } + if (datalen == entry->length) { + if (splitdata) { + uio_setoffset(uio, entry->offset); + error = VNOP_WRITE(xvp, uio, 0, context); + uio_setoffset(uio, 0); + if (error) { + printf("setxattr: VNOP_WRITE error %d\n", error); + } + } else { + attrdata = (u_int8_t *)header + entry->offset; + error = uiomove((caddr_t)attrdata, datalen, uio); + if (error) + goto out; + ainfo.iosize = ainfo.attrhdr->data_start + ainfo.attrhdr->data_length; + error = write_xattrinfo(&ainfo); + if (error) { + printf("setxattr: write_xattrinfo error %d\n", error); + } + } + goto out; + } else { + /* + * Brute force approach - just remove old entry and set new entry. + */ + found = 0; + rel_xattrinfo(&ainfo); + close_xattrfile(xvp, fileflags, context); + error = default_removexattr(vp, name, options, context); + if (error) { + goto out; + } + goto start; /* start over */ + } + + } + + if (options & XATTR_REPLACE) { + error = ENOATTR; /* nothing there to replace */ + goto out; + } + /* Check if header size limit has been reached. */ + if ((header->data_start + entrylen) > ATTR_MAX_HDR_SIZE) { + error = ENOSPC; + goto out; + } + + datafreespace = header->total_size - (header->data_start + header->data_length); + + /* Check if we need more space. */ + if ((datalen + entrylen) > datafreespace) { + size_t growsize; + + growsize = roundup((datalen + entrylen) - datafreespace, ATTR_BUF_SIZE); + + /* Clip roundup size when we can still fit in ATTR_MAX_HDR_SIZE. */ + if (!splitdata && (header->total_size + growsize) > ATTR_MAX_HDR_SIZE) { + growsize = ATTR_MAX_HDR_SIZE - header->total_size; + } + + ainfo.filesize += growsize; + error = vnode_setsize(xvp, ainfo.filesize, 0, context); + if (error) { + printf("setxattr: VNOP_TRUNCATE error %d\n", error); + } + if (error) + goto out; + + /* + * Move the resource fork out of the way. + */ + if (ainfo.rsrcfork) { + if (ainfo.rsrcfork->length != 0) { + shift_data_down(xvp, + ainfo.rsrcfork->offset, + ainfo.rsrcfork->length, + growsize, context); + } + ainfo.rsrcfork->offset += growsize; + } + ainfo.finderinfo->length += growsize; + header->total_size += growsize; + } + + /* Make space for a new entry. */ + if (splitdata) { + shift_data_down(xvp, + header->data_start, + header->data_length, + entrylen, context); + } else { + bcopy((u_int8_t *)header + header->data_start, + (u_int8_t *)header + header->data_start + entrylen, + header->data_length); + } + header->data_start += entrylen; + + /* Fix up entry data offsets. */ + lastentry = entry; + for (entry = ainfo.attr_entry; entry != lastentry && ATTR_VALID(entry, ainfo); entry = ATTR_NEXT(entry)) { + entry->offset += entrylen; + } + + /* + * If the attribute data area is entirely within + * the header buffer, then just update the buffer, + * otherwise we'll write it separately to the file. + */ + if (splitdata) { + off_t offset; + + /* Write new attribute data after the end of existing data. */ + offset = header->data_start + header->data_length; + uio_setoffset(uio, offset); + error = VNOP_WRITE(xvp, uio, 0, context); + uio_setoffset(uio, 0); + if (error) { + printf("setxattr: VNOP_WRITE error %d\n", error); + goto out; + } + } else { + attrdata = (u_int8_t *)header + header->data_start + header->data_length; + + error = uiomove((caddr_t)attrdata, datalen, uio); + if (error) { + printf("setxattr: uiomove error %d\n", error); + goto out; + } + } + + /* Create the attribute entry. */ + lastentry->length = datalen; + lastentry->offset = header->data_start + header->data_length; + lastentry->namelen = namelen; + lastentry->flags = 0; + bcopy(name, &lastentry->name[0], namelen); + + /* Update the attributes header. */ + header->num_attrs++; + header->data_length += datalen; + + if (splitdata) { + /* Only write the entries, since the data was written separately. */ + ainfo.iosize = ainfo.attrhdr->data_start; + } else { + /* The entry and data are both in the header; write them together. */ + ainfo.iosize = ainfo.attrhdr->data_start + ainfo.attrhdr->data_length; + } + error = write_xattrinfo(&ainfo); + if (error) { + printf("setxattr: write_xattrinfo error %d\n", error); + } + +out: + rel_xattrinfo(&ainfo); + close_xattrfile(xvp, fileflags, context); + + /* Touch the change time if we changed an attribute. */ + if (error == 0) { + struct vnode_attr va; + + /* Re-write the mtime to cause a ctime change. */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_modify_time); + if (vnode_getattr(vp, &va, context) == 0) { + VATTR_INIT(&va); + VATTR_SET(&va, va_modify_time, va.va_modify_time); + (void) vnode_setattr(vp, &va, context); + } + } + return (error); +} + + +/* + * Remove an extended attribute. + */ +static int +default_removexattr(vnode_t vp, const char *name, __unused int options, vfs_context_t context) +{ + vnode_t xvp = NULL; + attr_info_t ainfo; + attr_header_t *header; + attr_entry_t *entry; + attr_entry_t *oldslot; + u_int8_t *attrdata; + u_int32_t dataoff; + size_t datalen; + size_t entrylen; + int namelen; + int found = 0, lastone = 0; + int i; + int splitdata; + int attrcount = 0; + int isrsrcfork; + int fileflags; + int error; + + fileflags = FREAD | FWRITE; + if (bcmp(name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { + isrsrcfork = 1; + /* + * Open the file locked (exclusive) since the Carbon + * File Manager may have the Apple Double file open + * and could be changing the resource fork. + */ + fileflags |= O_EXLOCK; + } else { + isrsrcfork = 0; + } + + if ((error = open_xattrfile(vp, fileflags, &xvp, context))) { + return (error); + } + if ((error = get_xattrinfo(xvp, 0, &ainfo, context))) { + close_xattrfile(xvp, fileflags, context); + return (error); + } + if (ainfo.attrhdr) + attrcount += ainfo.attrhdr->num_attrs; + if (ainfo.rsrcfork) + ++attrcount; + if (ainfo.finderinfo && !ainfo.emptyfinderinfo) + ++attrcount; + + /* Clear the Finder Info. */ + if (bcmp(name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { + if (ainfo.finderinfo == NULL || ainfo.emptyfinderinfo) { + error = ENOATTR; + goto out; + } + /* On removal of last attribute the ._ file is removed. */ + if (--attrcount == 0) + goto out; + attrdata = (u_int8_t *)ainfo.filehdr + ainfo.finderinfo->offset; + bzero((caddr_t)attrdata, FINDERINFOSIZE); + ainfo.iosize = sizeof(attr_header_t); + error = write_xattrinfo(&ainfo); + goto out; + } + + /* Clear the Resource Fork. */ + if (isrsrcfork) { + if (!vnode_isreg(vp)) { + error = EPERM; + goto out; + } + if (ainfo.rsrcfork == NULL || ainfo.rsrcfork->length == 0) { + error = ENOATTR; + goto out; + } + /* On removal of last attribute the ._ file is removed. */ + if (--attrcount == 0) + goto out; + /* + * XXX + * If the resource fork isn't the last AppleDouble + * entry then the space needs to be reclaimed by + * shifting the entries after the resource fork. + */ + if ((ainfo.rsrcfork->offset + ainfo.rsrcfork->length) == ainfo.filesize) { + ainfo.filesize -= ainfo.rsrcfork->length; + error = vnode_setsize(xvp, ainfo.filesize, 0, context); + } + if (error == 0) { + ainfo.rsrcfork->length = 0; + ainfo.iosize = sizeof(attr_header_t); + error = write_xattrinfo(&ainfo); + } + goto out; + } + + if (ainfo.attrhdr == NULL) { + error = ENOATTR; + goto out; + } + namelen = strlen(name) + 1; + header = ainfo.attrhdr; + entry = ainfo.attr_entry; + + /* + * See if this attribute exists. + */ + for (i = 0; i < header->num_attrs && ATTR_VALID(entry, ainfo); i++) { + if (strncmp(entry->name, name, namelen) == 0) { + found = 1; + if ((i+1) == header->num_attrs) + lastone = 1; + break; + } + entry = ATTR_NEXT(entry); + } + if (!found) { + error = ENOATTR; + goto out; + } + /* On removal of last attribute the ._ file is removed. */ + if (--attrcount == 0) + goto out; + + datalen = entry->length; + dataoff = entry->offset; + entrylen = ATTR_ENTRY_LENGTH(namelen); + if ((header->data_start + header->data_length) > ATTR_MAX_HDR_SIZE) + splitdata = 1; + else + splitdata = 0; + + /* Remove the attribute entry. */ + if (!lastone) { + bcopy((u_int8_t *)entry + entrylen, (u_int8_t *)entry, + ((size_t)header + header->data_start) - ((size_t)entry + entrylen)); + } + + /* Adjust the attribute data. */ + if (splitdata) { + shift_data_up(xvp, + header->data_start, + dataoff - header->data_start, + entrylen, + context); + if (!lastone) { + shift_data_up(xvp, + dataoff + datalen, + (header->data_start + header->data_length) - (dataoff + datalen), + datalen + entrylen, + context); + } + /* XXX write zeros to freed space ? */ + ainfo.iosize = ainfo.attrhdr->data_start - entrylen; + } else { + + + bcopy((u_int8_t *)header + header->data_start, + (u_int8_t *)header + header->data_start - entrylen, + dataoff - header->data_start); + if (!lastone) { + bcopy((u_int8_t *)header + dataoff + datalen, + (u_int8_t *)header + dataoff - entrylen, + (header->data_start + header->data_length) - (dataoff + datalen)); + } + bzero (((u_int8_t *)header + header->data_start + header->data_length) - (datalen + entrylen), (datalen + entrylen)); + ainfo.iosize = ainfo.attrhdr->data_start + ainfo.attrhdr->data_length; + } + + /* Adjust the header values and entry offsets. */ + header->num_attrs--; + header->data_start -= entrylen; + header->data_length -= datalen; + + oldslot = entry; + entry = ainfo.attr_entry; + for (i = 0; i < header->num_attrs && ATTR_VALID(entry, ainfo); i++) { + entry->offset -= entrylen; + if (entry >= oldslot) + entry->offset -= datalen; + entry = ATTR_NEXT(entry); + } + error = write_xattrinfo(&ainfo); + if (error) { + printf("removexattr: write_xattrinfo error %d\n", error); + } +out: + rel_xattrinfo(&ainfo); + + /* When there are no more attributes remove the ._ file. */ + if (attrcount == 0) { + if (fileflags & O_EXLOCK) + (void) unlock_xattrfile(xvp, context); + VNOP_CLOSE(xvp, fileflags, context); + vnode_rele(xvp); + error = remove_xattrfile(xvp, context); + vnode_put(xvp); + } else { + close_xattrfile(xvp, fileflags, context); + } + /* Touch the change time if we changed an attribute. */ + if (error == 0) { + struct vnode_attr va; + + /* Re-write the mtime to cause a ctime change. */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_modify_time); + if (vnode_getattr(vp, &va, context) == 0) { + VATTR_INIT(&va); + VATTR_SET(&va, va_modify_time, va.va_modify_time); + (void) vnode_setattr(vp, &va, context); + } + } + return (error); + +} + + +/* + * Retrieve the list of extended attribute names. + */ +static int +default_listxattr(vnode_t vp, uio_t uio, size_t *size, __unused int options, vfs_context_t context) +{ + vnode_t xvp = NULL; + attr_info_t ainfo; + attr_entry_t *entry; + int i, count; + int error; + + /* + * We do not zero "*size" here as we don't want to stomp a size set when + * VNOP_LISTXATTR processed any native EAs. That size is initially zeroed by the + * system call layer, up in listxattr or flistxattr. + */ + + if ((error = open_xattrfile(vp, FREAD, &xvp, context))) { + if (error == ENOATTR) + error = 0; + return (error); + } + if ((error = get_xattrinfo(xvp, 0, &ainfo, context))) { + close_xattrfile(xvp, FREAD, context); + return (error); + } + + /* Check for Finder Info. */ + if (ainfo.finderinfo && !ainfo.emptyfinderinfo) { + if (uio == NULL) { + *size += sizeof(XATTR_FINDERINFO_NAME); + } else if (uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) { + error = ERANGE; + goto out; + } else { + error = uiomove((caddr_t)XATTR_FINDERINFO_NAME, + sizeof(XATTR_FINDERINFO_NAME), uio); + if (error) { + error = ERANGE; + goto out; + } + } + } + + /* Check for Resource Fork. */ + if (vnode_isreg(vp) && ainfo.rsrcfork) { + if (uio == NULL) { + *size += sizeof(XATTR_RESOURCEFORK_NAME); + } else if (uio_resid(uio) < sizeof(XATTR_RESOURCEFORK_NAME)) { + error = ERANGE; + goto out; + } else { + error = uiomove((caddr_t)XATTR_RESOURCEFORK_NAME, + sizeof(XATTR_RESOURCEFORK_NAME), uio); + if (error) { + error = ERANGE; + goto out; + } + } + } + + /* Check for attributes. */ + if (ainfo.attrhdr) { + count = ainfo.attrhdr->num_attrs; + for (i = 0, entry = ainfo.attr_entry; i < count && ATTR_VALID(entry, ainfo); i++) { + if (xattr_protected(entry->name) || + xattr_validatename(entry->name) != 0) { + entry = ATTR_NEXT(entry); + continue; + } + if (uio == NULL) { + *size += entry->namelen; + entry = ATTR_NEXT(entry); + continue; + } + if (uio_resid(uio) < entry->namelen) { + error = ERANGE; + break; + } + error = uiomove((caddr_t) entry->name, entry->namelen, uio); + if (error) { + if (error != EFAULT) + error = ERANGE; + break; + } + entry = ATTR_NEXT(entry); + } + } +out: + rel_xattrinfo(&ainfo); + close_xattrfile(xvp, FREAD, context); + + return (error); +} + +static int +open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) +{ + vnode_t xvp = NULLVP; + vnode_t dvp = NULLVP; + struct vnode_attr va; + struct nameidata nd; + char smallname[64]; + char *filename = NULL; + char *basename = NULL; + size_t len; + errno_t error; + int opened = 0; + int referenced = 0; + + if (vnode_isvroot(vp) && vnode_isdir(vp)) { + /* + * For the root directory use "._." to hold the attributes. + */ + filename = &smallname[0]; + sprintf(filename, "%s%s", ATTR_FILE_PREFIX, "."); + dvp = vp; /* the "._." file resides in the root dir */ + goto lookup; + } + if ( (dvp = vnode_getparent(vp)) == NULLVP) { + error = ENOATTR; + goto out; + } + if ( (basename = vnode_getname(vp)) == NULL) { + error = ENOATTR; + goto out; + } + + /* "._" Attribute files cannot have attributes */ + if (vp->v_type == VREG && strlen(basename) > 2 && + basename[0] == '.' && basename[1] == '_') { + error = EPERM; + goto out; + } + filename = &smallname[0]; + len = snprintf(filename, sizeof(smallname), "%s%s", ATTR_FILE_PREFIX, basename); + if (len >= sizeof(smallname)) { + len++; /* snprintf result doesn't include '\0' */ + MALLOC(filename, char *, len, M_TEMP, M_WAITOK); + len = snprintf(filename, len, "%s%s", ATTR_FILE_PREFIX, basename); + } + /* + * Note that the lookup here does not authorize. Since we are looking + * up in the same directory that we already have the file vnode in, + * we must have been given the file vnode legitimately. Read/write + * access has already been authorized in layers above for calls from + * userspace, and the authorization code using this path to read + * file security from the EA must always get access + */ +lookup: + NDINIT(&nd, LOOKUP, LOCKLEAF | NOFOLLOW | USEDVP | DONOTAUTH, UIO_SYSSPACE, + CAST_USER_ADDR_T(filename), context); + nd.ni_dvp = dvp; + + if (fileflags & O_CREAT) { + nd.ni_cnd.cn_nameiop = CREATE; + nd.ni_cnd.cn_flags |= LOCKPARENT; + + if ( (error = namei(&nd))) { + nd.ni_dvp = NULLVP; + error = ENOATTR; + goto out; + } + if ( (xvp = nd.ni_vp) == NULLVP) { + uid_t uid; + gid_t gid; + mode_t umode; + + /* + * Pick up uid/gid/mode from target file. + */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_mode); + if (VNOP_GETATTR(vp, &va, context) == 0 && + VATTR_IS_SUPPORTED(&va, va_uid) && + VATTR_IS_SUPPORTED(&va, va_gid) && + VATTR_IS_SUPPORTED(&va, va_mode)) { + uid = va.va_uid; + gid = va.va_gid; + umode = va.va_mode & (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); + } else /* fallback values */ { + uid = KAUTH_UID_NONE; + gid = KAUTH_GID_NONE; + umode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; + } + + VATTR_INIT(&va); + VATTR_SET(&va, va_type, VREG); + VATTR_SET(&va, va_mode, umode); + if (uid != KAUTH_UID_NONE) + VATTR_SET(&va, va_uid, uid); + if (gid != KAUTH_GID_NONE) + VATTR_SET(&va, va_gid, gid); + + error = vn_create(dvp, &nd.ni_vp, &nd.ni_cnd, &va, + VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT, + context); + if (error == 0) + xvp = nd.ni_vp; + } + nameidone(&nd); + vnode_put(dvp); /* drop iocount from LOCKPARENT request above */ + + if (error) + goto out; + } else { + if ((error = namei(&nd))) { + nd.ni_dvp = NULLVP; + error = ENOATTR; + goto out; + } + xvp = nd.ni_vp; + nameidone(&nd); + } + nd.ni_dvp = NULLVP; + + if (xvp->v_type != VREG) { + error = ENOATTR; + goto out; + } + /* + * Owners must match. + */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if (VNOP_GETATTR(vp, &va, context) == 0 && VATTR_IS_SUPPORTED(&va, va_uid)) { + uid_t owner = va.va_uid; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + if (VNOP_GETATTR(xvp, &va, context) == 0 && (owner != va.va_uid)) { + error = ENOATTR; /* don't use this "._" file */ + goto out; + } + } + + if ( (error = VNOP_OPEN(xvp, fileflags, context))) { + error = ENOATTR; + goto out; + } + opened = 1; + + if ((error = vnode_ref(xvp))) { + goto out; + } + referenced = 1; + + /* If create was requested, make sure file header exists. */ + if (fileflags & O_CREAT) { + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_nlink); + if ( (error = vnode_getattr(xvp, &va, context)) != 0) { + error = EPERM; + goto out; + } + + /* If the file is empty then add a default header. */ + if (va.va_data_size == 0) { + /* Don't adopt hard-linked "._" files. */ + if (VATTR_IS_SUPPORTED(&va, va_nlink) && va.va_nlink > 1) { + error = EPERM; + goto out; + } + if ( (error = create_xattrfile(xvp, (u_int32_t)va.va_fileid, context))) + goto out; + } + } + /* Apply file locking if requested. */ + if (fileflags & (O_EXLOCK | O_SHLOCK)) { + short locktype; + + locktype = (fileflags & O_EXLOCK) ? F_WRLCK : F_RDLCK; + error = lock_xattrfile(xvp, locktype, context); + } +out: + if (dvp && (dvp != vp)) { + vnode_put(dvp); + } + if (basename) { + vnode_putname(basename); + } + if (filename && filename != &smallname[0]) { + FREE(filename, M_TEMP); + } + if (error) { + if (xvp != NULLVP) { + if (opened) { + (void) VNOP_CLOSE(xvp, fileflags, context); + } + if (referenced) { + (void) vnode_rele(xvp); + } + (void) vnode_put(xvp); + xvp = NULLVP; + } + if ((error == ENOATTR) && (fileflags & O_CREAT)) { + error = EPERM; + } + } + *xvpp = xvp; /* return a referenced vnode */ + return (error); +} + +static void +close_xattrfile(vnode_t xvp, int fileflags, vfs_context_t context) +{ +// if (fileflags & FWRITE) +// (void) VNOP_FSYNC(xvp, MNT_WAIT, context); + + if (fileflags & (O_EXLOCK | O_SHLOCK)) + (void) unlock_xattrfile(xvp, context); + + (void) VNOP_CLOSE(xvp, fileflags, context); + (void) vnode_rele(xvp); + (void) vnode_put(xvp); +} + +static int +remove_xattrfile(vnode_t xvp, vfs_context_t context) +{ + vnode_t dvp; + struct nameidata nd; + char *path; + int pathlen; + int error = 0; + + path = get_pathbuff(); + pathlen = MAXPATHLEN; + vn_getpath(xvp, path, &pathlen); + + NDINIT(&nd, DELETE, LOCKPARENT | NOFOLLOW | DONOTAUTH, + UIO_SYSSPACE, CAST_USER_ADDR_T(path), context); + error = namei(&nd); + release_pathbuff(path); + if (error) { + return (error); + } + dvp = nd.ni_dvp; + xvp = nd.ni_vp; + + error = VNOP_REMOVE(dvp, xvp, &nd.ni_cnd, 0, context); + nameidone(&nd); + vnode_put(dvp); + vnode_put(xvp); + + return (error); +} + +static int +get_xattrinfo(vnode_t xvp, int setting, attr_info_t *ainfop, vfs_context_t context) +{ + uio_t auio = NULL; + void * buffer = NULL; + apple_double_header_t *filehdr; + attr_header_t *attrhdr; + struct vnode_attr va; + size_t iosize; + int i; + int error; + + bzero(ainfop, sizeof(attr_info_t)); + ainfop->filevp = xvp; + ainfop->context = context; + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_fileid); + if ((error = vnode_getattr(xvp, &va, context))) { + goto bail; + } + ainfop->filesize = va.va_data_size; + + /* When setting attributes, allow room for the header to grow. */ + if (setting) + iosize = ATTR_MAX_HDR_SIZE; + else + iosize = MIN(ATTR_MAX_HDR_SIZE, ainfop->filesize); + + if (iosize == 0) { + error = ENOATTR; + goto bail; + } + ainfop->iosize = iosize; + MALLOC(buffer, void *, iosize, M_TEMP, M_WAITOK); + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + uio_addiov(auio, (uintptr_t)buffer, iosize); + + /* Read the file header. */ + error = VNOP_READ(xvp, auio, 0, context); + if (error) { + goto bail; + } + ainfop->rawsize = iosize - uio_resid(auio); + ainfop->rawdata = (u_int8_t *)buffer; + + filehdr = (apple_double_header_t *)buffer; + + /* Check for Apple Double file. */ + if (SWAP32(filehdr->magic) != ADH_MAGIC || + SWAP32(filehdr->version) != ADH_VERSION || + SWAP16(filehdr->numEntries) < 1 || + SWAP16(filehdr->numEntries) > 15) { + error = ENOATTR; + goto bail; + } + if (ADHDRSIZE + (SWAP16(filehdr->numEntries) * sizeof(apple_double_entry_t)) > ainfop->rawsize) { + error = EINVAL; + goto bail; + } + + swap_adhdr(filehdr); + ainfop->filehdr = filehdr; /* valid AppleDouble header */ + /* rel_xattrinfo is responsible for freeing the header buffer */ + buffer = NULL; + + /* Check the AppleDouble entries. */ + for (i = 0; i < filehdr->numEntries; ++i) { + if (filehdr->entries[i].type == AD_FINDERINFO && + filehdr->entries[i].length > 0) { + ainfop->finderinfo = &filehdr->entries[i]; + attrhdr = (attr_header_t *)filehdr; + + if (bcmp((u_int8_t*)ainfop->filehdr + ainfop->finderinfo->offset, + emptyfinfo, sizeof(emptyfinfo)) == 0) { + ainfop->emptyfinderinfo = 1; + } + + if (i != 0) { + continue; + } + /* See if we need to convert this AppleDouble file. */ + if (filehdr->entries[0].length == FINDERINFOSIZE) { + size_t delta; + size_t writesize; + + if (!setting || + filehdr->entries[1].type != AD_RESOURCE || + filehdr->numEntries > 2) { + continue; /* not expected layout */ + } + delta = ATTR_BUF_SIZE - (filehdr->entries[0].offset + FINDERINFOSIZE); + if (filehdr->entries[1].length) { + /* Make some room. */ + shift_data_down(xvp, + filehdr->entries[1].offset, + filehdr->entries[1].length, + delta, context); + writesize = sizeof(attr_header_t); + } else { + rsrcfork_header_t *rsrcforkhdr; + + vnode_setsize(xvp, filehdr->entries[1].offset + delta, 0, context); + + /* Steal some space for an empty RF header. */ + delta -= sizeof(rsrcfork_header_t); + + bzero(&attrhdr->appledouble.pad[0], delta); + rsrcforkhdr = (rsrcfork_header_t *)((char *)filehdr + filehdr->entries[1].offset + delta); + + /* Fill in Empty Resource Fork Header. */ + init_empty_resource_fork(rsrcforkhdr); + + filehdr->entries[1].length = sizeof(rsrcfork_header_t); + writesize = ATTR_BUF_SIZE; + } + filehdr->entries[0].length += delta; + filehdr->entries[1].offset += delta; + + /* Fill in Attribute Header. */ + attrhdr->magic = ATTR_HDR_MAGIC; + attrhdr->debug_tag = (u_int32_t)va.va_fileid; + attrhdr->total_size = filehdr->entries[1].offset; + attrhdr->data_start = sizeof(attr_header_t); + attrhdr->data_length = 0; + attrhdr->reserved[0] = 0; + attrhdr->reserved[1] = 0; + attrhdr->reserved[2] = 0; + attrhdr->flags = 0; + attrhdr->num_attrs = 0; + + /* Push out new header */ + uio_reset(auio, 0, UIO_SYSSPACE32, UIO_WRITE); + uio_addiov(auio, (uintptr_t)filehdr, writesize); + + swap_adhdr(filehdr); + swap_attrhdr(attrhdr); + error = VNOP_WRITE(xvp, auio, 0, context); + swap_adhdr(filehdr); + /* The attribute header gets swapped below. */ + } + if (SWAP32 (attrhdr->magic) != ATTR_HDR_MAGIC || + validate_attrhdr(attrhdr, ainfop->rawsize) != 0) { + printf("get_xattrinfo: invalid attribute header\n"); + continue; + } + swap_attrhdr(attrhdr); + ainfop->attrhdr = attrhdr; /* valid attribute header */ + ainfop->attr_entry = (attr_entry_t *)&attrhdr[1]; + continue; + } + if (filehdr->entries[i].type == AD_RESOURCE && + (filehdr->entries[i].length > sizeof(rsrcfork_header_t) || setting)) { + ainfop->rsrcfork = &filehdr->entries[i]; + if (i != (filehdr->numEntries - 1)) { + printf("get_xattrinfo: resource fork not last entry\n"); + ainfop->readonly = 1; + } + continue; + } + } + error = 0; +bail: + if (auio != NULL) + uio_free(auio); + if (buffer != NULL) + FREE(buffer, M_TEMP); + return (error); +} + + +static int +create_xattrfile(vnode_t xvp, u_int32_t fileid, vfs_context_t context) +{ + attr_header_t *xah; + rsrcfork_header_t *rsrcforkhdr; + void * buffer; + uio_t auio; + int rsrcforksize; + int error; + + MALLOC(buffer, void *, ATTR_BUF_SIZE, M_TEMP, M_WAITOK); + bzero(buffer, ATTR_BUF_SIZE); + + xah = (attr_header_t *)buffer; + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + uio_addiov(auio, (uintptr_t)buffer, ATTR_BUF_SIZE); + rsrcforksize = sizeof(rsrcfork_header_t); + rsrcforkhdr = (rsrcfork_header_t *) ((char *)buffer + ATTR_BUF_SIZE - rsrcforksize); + + /* Fill in Apple Double Header. */ + xah->appledouble.magic = SWAP32 (ADH_MAGIC); + xah->appledouble.version = SWAP32 (ADH_VERSION); + xah->appledouble.numEntries = SWAP16 (2); + xah->appledouble.entries[0].type = SWAP32 (AD_FINDERINFO); + xah->appledouble.entries[0].offset = SWAP32 (offsetof(apple_double_header_t, finfo)); + xah->appledouble.entries[0].length = SWAP32 (ATTR_BUF_SIZE - offsetof(apple_double_header_t, finfo) - rsrcforksize); + xah->appledouble.entries[1].type = SWAP32 (AD_RESOURCE); + xah->appledouble.entries[1].offset = SWAP32 (ATTR_BUF_SIZE - rsrcforksize); + xah->appledouble.entries[1].length = SWAP32 (rsrcforksize); + bcopy(ADH_MACOSX, xah->appledouble.filler, sizeof(xah->appledouble.filler)); + + /* Fill in Attribute Header. */ + xah->magic = SWAP32 (ATTR_HDR_MAGIC); + xah->debug_tag = SWAP32 (fileid); + xah->total_size = SWAP32 (ATTR_BUF_SIZE - rsrcforksize); + xah->data_start = SWAP32 (sizeof(attr_header_t)); + + /* Fill in Empty Resource Fork Header. */ + init_empty_resource_fork(rsrcforkhdr); + + /* Push it out. */ + error = VNOP_WRITE(xvp, auio, 0, context); + + uio_free(auio); + FREE(buffer, M_TEMP); + + return (error); +} + +static void +init_empty_resource_fork(rsrcfork_header_t * rsrcforkhdr) +{ + bzero(rsrcforkhdr, sizeof(rsrcfork_header_t)); + rsrcforkhdr->fh_DataOffset = SWAP32 (RF_FIRST_RESOURCE); + rsrcforkhdr->fh_MapOffset = SWAP32 (RF_FIRST_RESOURCE); + rsrcforkhdr->fh_MapLength = SWAP32 (RF_NULL_MAP_LENGTH); + rsrcforkhdr->mh_DataOffset = SWAP32 (RF_FIRST_RESOURCE); + rsrcforkhdr->mh_MapOffset = SWAP32 (RF_FIRST_RESOURCE); + rsrcforkhdr->mh_MapLength = SWAP32 (RF_NULL_MAP_LENGTH); + rsrcforkhdr->mh_Types = SWAP16 (RF_NULL_MAP_LENGTH - 2 ); + rsrcforkhdr->mh_Names = SWAP16 (RF_NULL_MAP_LENGTH); + rsrcforkhdr->typeCount = SWAP16 (-1); + bcopy(RF_EMPTY_TAG, rsrcforkhdr->systemData, sizeof(RF_EMPTY_TAG)); +} + +static void +rel_xattrinfo(attr_info_t *ainfop) +{ + FREE(ainfop->filehdr, M_TEMP); + bzero(ainfop, sizeof(attr_info_t)); +} + +static int +write_xattrinfo(attr_info_t *ainfop) +{ + uio_t auio; + int error; + + auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + uio_addiov(auio, (uintptr_t)ainfop->filehdr, ainfop->iosize); + + swap_adhdr(ainfop->filehdr); + swap_attrhdr(ainfop->attrhdr); + + error = VNOP_WRITE(ainfop->filevp, auio, 0, ainfop->context); + + swap_adhdr(ainfop->filehdr); + swap_attrhdr(ainfop->attrhdr); + return (error); +} + +#if BYTE_ORDER == LITTLE_ENDIAN +/* + * Endian swap apple double header + */ +static void +swap_adhdr(apple_double_header_t *adh) +{ + int count; + int i; + + count = (adh->magic == ADH_MAGIC) ? adh->numEntries : SWAP16(adh->numEntries); + + adh->magic = SWAP32 (adh->magic); + adh->version = SWAP32 (adh->version); + adh->numEntries = SWAP16 (adh->numEntries); + + for (i = 0; i < count; i++) { + adh->entries[i].type = SWAP32 (adh->entries[i].type); + adh->entries[i].offset = SWAP32 (adh->entries[i].offset); + adh->entries[i].length = SWAP32 (adh->entries[i].length); + } +} + +/* + * Endian swap extended attributes header + */ +static void +swap_attrhdr(attr_header_t *ah) +{ + attr_entry_t *ae; + int count; + int i; + + count = (ah->magic == ATTR_HDR_MAGIC) ? ah->num_attrs : SWAP16(ah->num_attrs); + + ah->magic = SWAP32 (ah->magic); + ah->debug_tag = SWAP32 (ah->debug_tag); + ah->total_size = SWAP32 (ah->total_size); + ah->data_start = SWAP32 (ah->data_start); + ah->data_length = SWAP32 (ah->data_length); + ah->flags = SWAP16 (ah->flags); + ah->num_attrs = SWAP16 (ah->num_attrs); + + ae = (attr_entry_t *)(&ah[1]); + for (i = 0; i < count; i++, ae = ATTR_NEXT(ae)) { + ae->offset = SWAP32 (ae->offset); + ae->length = SWAP32 (ae->length); + ae->flags = SWAP16 (ae->flags); + } +} +#endif + +/* + * Validate attributes header contents + */ +static int +validate_attrhdr(attr_header_t *ah, size_t bufsize) +{ + attr_entry_t *ae; + u_int8_t *bufend; + int count; + int i; + + if (ah == NULL) + return (EINVAL); + + bufend = (u_int8_t *)ah + bufsize; + count = (ah->magic == ATTR_HDR_MAGIC) ? ah->num_attrs : SWAP16(ah->num_attrs); + + ae = (attr_entry_t *)(&ah[1]); + for (i = 0; i < count && (u_int8_t *)ae < bufend; i++, ae = ATTR_NEXT(ae)) { + } + return (i < count ? EINVAL : 0); +} + +// +// "start" & "end" are byte offsets in the file. +// "to" is the byte offset we want to move the +// data to. "to" should be > "start". +// +// we do the copy backwards to avoid problems if +// there's an overlap. +// +static int +shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t context) +{ + int ret, iolen; + size_t chunk, orig_chunk; + char *buff; + off_t pos; + ucred_t ucred = vfs_context_ucred(context); + proc_t p = vfs_context_proc(context); + + if (delta == 0 || len == 0) { + return 0; + } + + chunk = 4096; + if (len < chunk) { + chunk = len; + } + orig_chunk = chunk; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) { + return ENOMEM; + } + + for(pos=start+len-chunk; pos >= start; pos-=chunk) { + ret = vn_rdwr(UIO_READ, xvp, buff, chunk, pos, UIO_SYSSPACE, IO_NODELOCKED, ucred, &iolen, p); + if (iolen != 0) { + printf("xattr:shift_data: error reading data @ %lld (read %d of %d) (%d)\n", + pos, ret, chunk, ret); + break; + } + + ret = vn_rdwr(UIO_WRITE, xvp, buff, chunk, pos + delta, UIO_SYSSPACE, IO_NODELOCKED, ucred, &iolen, p); + if (iolen != 0) { + printf("xattr:shift_data: error writing data @ %lld (wrote %d of %d) (%d)\n", + pos+delta, ret, chunk, ret); + break; + } + + if ((pos - chunk) < start) { + chunk = pos - start; + + if (chunk == 0) { // we're all done + break; + } + } + } + kmem_free(kernel_map, (vm_offset_t)buff, orig_chunk); + + return 0; +} + + +static int +shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t context) +{ + int ret, iolen; + size_t chunk, orig_chunk; + char *buff; + off_t pos; + off_t end; + ucred_t ucred = vfs_context_ucred(context); + proc_t p = vfs_context_proc(context); + + if (delta == 0 || len == 0) { + return 0; + } + + chunk = 4096; + if (len < chunk) { + chunk = len; + } + orig_chunk = chunk; + end = start + len; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&buff, chunk)) { + return ENOMEM; + } + + for(pos = start; pos < end; pos += chunk) { + ret = vn_rdwr(UIO_READ, xvp, buff, chunk, pos, UIO_SYSSPACE, IO_NODELOCKED, ucred, &iolen, p); + if (iolen != 0) { + printf("xattr:shift_data: error reading data @ %lld (read %d of %d) (%d)\n", + pos, ret, chunk, ret); + break; + } + + ret = vn_rdwr(UIO_WRITE, xvp, buff, chunk, pos - delta, UIO_SYSSPACE, IO_NODELOCKED, ucred, &iolen, p); + if (iolen != 0) { + printf("xattr:shift_data: error writing data @ %lld (wrote %d of %d) (%d)\n", + pos+delta, ret, chunk, ret); + break; + } + + if ((pos + chunk) > end) { + chunk = end - pos; + + if (chunk == 0) { // we're all done + break; + } + } + } + kmem_free(kernel_map, (vm_offset_t)buff, orig_chunk); + + return 0; +} + +static int +lock_xattrfile(vnode_t xvp, short locktype, vfs_context_t context) +{ + struct flock lf; + + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = locktype; /* F_WRLCK or F_RDLCK */ + /* Note: id is just a kernel address that's not a proc */ + return VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK, context); +} + +static int +unlock_xattrfile(vnode_t xvp, vfs_context_t context) +{ + struct flock lf; + + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + /* Note: id is just a kernel address that's not a proc */ + return VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_UNLCK, &lf, F_FLOCK, context); +} + diff --git a/bsd/vfs/vnode_if.c b/bsd/vfs/vnode_if.c index 557bcf66a..b77c7c3d4 100644 --- a/bsd/vfs/vnode_if.c +++ b/bsd/vfs/vnode_if.c @@ -65,11 +65,11 @@ #include -#include +#include #include -#include +#include -struct vnodeop_desc vop_default_desc = { +struct vnodeop_desc vnop_default_desc = { 0, "default", 0, @@ -82,952 +82,787 @@ struct vnodeop_desc vop_default_desc = { }; -int vop_lookup_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_lookup_args,a_dvp), +int vnop_lookup_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_lookup_args,a_dvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_lookup_desc = { +struct vnodeop_desc vnop_lookup_desc = { 0, - "vop_lookup", + "vnop_lookup", 0, - vop_lookup_vp_offsets, - VOPARG_OFFSETOF(struct vop_lookup_args, a_vpp), + vnop_lookup_vp_offsets, + VOPARG_OFFSETOF(struct vnop_lookup_args, a_vpp), VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_lookup_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_lookup_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_lookup_args, a_context), NULL, }; -int vop_cachedlookup_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_cachedlookup_args,a_dvp), +int vnop_create_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_create_args,a_dvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_cachedlookup_desc = { +struct vnodeop_desc vnop_create_desc = { 0, - "vop_cachedlookup", - 0, - vop_cachedlookup_vp_offsets, - VOPARG_OFFSETOF(struct vop_cachedlookup_args, a_vpp), - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_cachedlookup_args, a_cnp), - NULL, -}; - -int vop_create_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_create_args,a_dvp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_create_desc = { - 0, - "vop_create", + "vnop_create", 0 | VDESC_VP0_WILLRELE, - vop_create_vp_offsets, - VOPARG_OFFSETOF(struct vop_create_args, a_vpp), + vnop_create_vp_offsets, + VOPARG_OFFSETOF(struct vnop_create_args, a_vpp), VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_create_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_create_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_create_args, a_context), NULL, }; -int vop_whiteout_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_whiteout_args,a_dvp), +int vnop_whiteout_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_whiteout_args,a_dvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_whiteout_desc = { +struct vnodeop_desc vnop_whiteout_desc = { 0, - "vop_whiteout", + "vnop_whiteout", 0 | VDESC_VP0_WILLRELE, - vop_whiteout_vp_offsets, + vnop_whiteout_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_whiteout_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_whiteout_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_whiteout_args, a_context), NULL, }; -int vop_mknod_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_mknod_args,a_dvp), - VDESC_NO_OFFSET +int vnop_mknod_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_mknod_args,a_dvp), + VDESC_NO_OFFSET }; -struct vnodeop_desc vop_mknod_desc = { - 0, - "vop_mknod", - 0 | VDESC_VP0_WILLRELE | VDESC_VPP_WILLRELE, - vop_mknod_vp_offsets, - VOPARG_OFFSETOF(struct vop_mknod_args, a_vpp), - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_mknod_args, a_cnp), - NULL, +struct vnodeop_desc vnop_mknod_desc = { + 0, + "vnop_mknod", + 0 | VDESC_VP0_WILLRELE | VDESC_VPP_WILLRELE, + vnop_mknod_vp_offsets, + VOPARG_OFFSETOF(struct vnop_mknod_args, a_vpp), + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_mknod_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_mknod_args, a_context), + NULL, }; -int vop_mkcomplex_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_mkcomplex_args,a_dvp), +int vnop_open_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_open_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_mkcomplex_desc = { +struct vnodeop_desc vnop_open_desc = { + 0, + "vnop_open", 0, - "vop_mkcomplex", - 0 | VDESC_VP0_WILLRELE | VDESC_VPP_WILLRELE, - vop_mkcomplex_vp_offsets, - VOPARG_OFFSETOF(struct vop_mkcomplex_args, a_vpp), + vnop_open_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_mkcomplex_args, a_cnp), - NULL, -}; - -int vop_open_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_open_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_open_desc = { - 0, - "vop_open", - 0, - vop_open_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_open_args, a_cred), - VOPARG_OFFSETOF(struct vop_open_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_open_args, a_context), NULL, }; -int vop_close_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_close_args,a_vp), +int vnop_close_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_close_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_close_desc = { +struct vnodeop_desc vnop_close_desc = { 0, - "vop_close", + "vnop_close", 0, - vop_close_vp_offsets, + vnop_close_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_close_args, a_cred), - VOPARG_OFFSETOF(struct vop_close_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_access_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_access_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_access_desc = { - 0, - "vop_access", - 0, - vop_access_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_access_args, a_cred), - VOPARG_OFFSETOF(struct vop_access_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_close_args, a_context), NULL, }; -int vop_getattr_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_getattr_args,a_vp), +int vnop_access_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_access_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_getattr_desc = { +struct vnodeop_desc vnop_access_desc = { 0, - "vop_getattr", + "vnop_access", 0, - vop_getattr_vp_offsets, + vnop_access_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_getattr_args, a_cred), - VOPARG_OFFSETOF(struct vop_getattr_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_setattr_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_setattr_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_setattr_desc = { - 0, - "vop_setattr", - 0, - vop_setattr_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_setattr_args, a_cred), - VOPARG_OFFSETOF(struct vop_setattr_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_close_args, a_context), NULL, }; -int vop_getattrlist_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_getattrlist_args,a_vp), +int vnop_getattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_getattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_getattrlist_desc = { +struct vnodeop_desc vnop_getattr_desc = { 0, - "vop_getattrlist", + "vnop_getattr", 0, - vop_getattrlist_vp_offsets, + vnop_getattr_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_getattrlist_args, a_cred), - VOPARG_OFFSETOF(struct vop_getattrlist_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_setattrlist_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_setattrlist_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_setattrlist_desc = { - 0, - "vop_setattrlist", - 0, - vop_setattrlist_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_setattrlist_args, a_cred), - VOPARG_OFFSETOF(struct vop_setattrlist_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_getattr_args, a_context), NULL, }; -int vop_read_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_read_args,a_vp), +int vnop_setattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_setattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_read_desc = { +struct vnodeop_desc vnop_setattr_desc = { 0, - "vop_read", + "vnop_setattr", 0, - vop_read_vp_offsets, + vnop_setattr_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_read_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_setattr_args, a_context), NULL, }; -int vop_write_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_write_args,a_vp), +int vnop_getattrlist_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_getattrlist_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_write_desc = { +struct vnodeop_desc vnop_getattrlist_desc = { 0, - "vop_write", + "vnop_getattrlist", 0, - vop_write_vp_offsets, + vnop_getattrlist_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_write_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_getattrlist_args, a_context), NULL, }; -int vop_lease_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_lease_args,a_vp), +int vnop_setattrlist_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_setattrlist_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_lease_desc = { +struct vnodeop_desc vnop_setattrlist_desc = { 0, - "vop_lease", + "vnop_setattrlist", 0, - vop_lease_vp_offsets, + vnop_setattrlist_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_lease_args, a_cred), - VOPARG_OFFSETOF(struct vop_lease_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_ioctl_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_ioctl_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_ioctl_desc = { - 0, - "vop_ioctl", - 0, - vop_ioctl_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_ioctl_args, a_cred), - VOPARG_OFFSETOF(struct vop_ioctl_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_setattrlist_args, a_context), NULL, }; -int vop_select_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_select_args,a_vp), +int vnop_read_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_read_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_select_desc = { +struct vnodeop_desc vnop_read_desc = { 0, - "vop_select", + "vnop_read", 0, - vop_select_vp_offsets, + vnop_read_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_select_args, a_cred), - VOPARG_OFFSETOF(struct vop_select_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_exchange_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_exchange_args,a_fvp), - VOPARG_OFFSETOF(struct vop_exchange_args,a_tvp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_exchange_desc = { - 0, - "vop_exchange", - 0, - vop_exchange_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_exchange_args, a_cred), - VOPARG_OFFSETOF(struct vop_exchange_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_read_args, a_context), NULL, }; -int vop_kqfilt_add_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_kqfilt_add_args,a_vp), +int vnop_write_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_write_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_kqfilt_add_desc = { +struct vnodeop_desc vnop_write_desc = { 0, - "vop_kqfilt_add", + "vnop_write", 0, - vop_kqfilt_add_vp_offsets, + vnop_write_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_kqfilt_add_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_write_args, a_context), NULL, }; -int vop_kqfilt_remove_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_kqfilt_remove_args,a_vp), +int vnop_ioctl_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_ioctl_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_kqfilt_remove_desc = { +struct vnodeop_desc vnop_ioctl_desc = { 0, - "vop_kqfilt_remove", + "vnop_ioctl", 0, - vop_kqfilt_remove_vp_offsets, + vnop_ioctl_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_kqfilt_remove_args, a_p), VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_ioctl_args, a_context), NULL, }; -int vop_revoke_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_revoke_args,a_vp), +int vnop_select_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_select_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_revoke_desc = { +struct vnodeop_desc vnop_select_desc = { 0, - "vop_revoke", + "vnop_select", 0, - vop_revoke_vp_offsets, + vnop_select_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_select_args, a_context), VDESC_NO_OFFSET, NULL, }; -int vop_mmap_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_mmap_args,a_vp), +int vnop_exchange_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_exchange_args,a_fvp), + VOPARG_OFFSETOF(struct vnop_exchange_args,a_tvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_mmap_desc = { +struct vnodeop_desc vnop_exchange_desc = { 0, - "vop_mmap", + "vnop_exchange", 0, - vop_mmap_vp_offsets, + vnop_exchange_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_mmap_args, a_cred), - VOPARG_OFFSETOF(struct vop_mmap_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_fsync_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_fsync_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_fsync_desc = { - 0, - "vop_fsync", - 0, - vop_fsync_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_fsync_args, a_cred), - VOPARG_OFFSETOF(struct vop_fsync_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_exchange_args, a_context), NULL, }; -int vop_seek_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_seek_args,a_vp), +int vnop_kqfilt_add_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_kqfilt_add_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_seek_desc = { +struct vnodeop_desc vnop_kqfilt_add_desc = { 0, - "vop_seek", + "vnop_kqfilt_add", 0, - vop_seek_vp_offsets, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_seek_args, a_cred), - VDESC_NO_OFFSET, + vnop_kqfilt_add_vp_offsets, VDESC_NO_OFFSET, - NULL, -}; - -int vop_remove_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_remove_args,a_dvp), - VOPARG_OFFSETOF(struct vop_remove_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_remove_desc = { - 0, - "vop_remove", - 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE, - vop_remove_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_remove_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_kqfilt_add_args, a_context), NULL, }; -int vop_link_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_link_args,a_vp), - VOPARG_OFFSETOF(struct vop_link_args,a_tdvp), +int vnop_kqfilt_remove_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_kqfilt_remove_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_link_desc = { +struct vnodeop_desc vnop_kqfilt_remove_desc = { 0, - "vop_link", - 0 | VDESC_VP1_WILLRELE, - vop_link_vp_offsets, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_link_args, a_cnp), - NULL, -}; - -int vop_rename_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_rename_args,a_fdvp), - VOPARG_OFFSETOF(struct vop_rename_args,a_fvp), - VOPARG_OFFSETOF(struct vop_rename_args,a_tdvp), - VOPARG_OFFSETOF(struct vop_rename_args,a_tvp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_rename_desc = { + "vnop_kqfilt_remove", 0, - "vop_rename", - 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLRELE | VDESC_VP3_WILLRELE, - vop_rename_vp_offsets, + vnop_kqfilt_remove_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_rename_args, a_fcnp), - NULL, -}; - -int vop_mkdir_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_mkdir_args,a_dvp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_mkdir_desc = { - 0, - "vop_mkdir", - 0 | VDESC_VP0_WILLRELE, - vop_mkdir_vp_offsets, - VOPARG_OFFSETOF(struct vop_mkdir_args, a_vpp), - VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_mkdir_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_kqfilt_remove_args, a_context), NULL, }; -int vop_rmdir_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_rmdir_args,a_dvp), - VOPARG_OFFSETOF(struct vop_rmdir_args,a_vp), +int vnop_revoke_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_revoke_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_rmdir_desc = { +struct vnodeop_desc vnop_revoke_desc = { 0, - "vop_rmdir", - 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE, - vop_rmdir_vp_offsets, + "vnop_revoke", + 0, + vnop_revoke_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_rmdir_args, a_cnp), - NULL, -}; - -int vop_symlink_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_symlink_args,a_dvp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_symlink_desc = { - 0, - "vop_symlink", - 0 | VDESC_VP0_WILLRELE | VDESC_VPP_WILLRELE, - vop_symlink_vp_offsets, - VOPARG_OFFSETOF(struct vop_symlink_args, a_vpp), VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_symlink_args, a_cnp), NULL, }; -int vop_readdir_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_readdir_args,a_vp), + +int vnop_mmap_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_mmap_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_readdir_desc = { +struct vnodeop_desc vnop_mmap_desc = { 0, - "vop_readdir", + "vnop_mmap", 0, - vop_readdir_vp_offsets, + vnop_mmap_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_readdir_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, NULL, }; -int vop_readdirattr_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_readdirattr_args,a_vp), + +int vnop_mnomap_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_mnomap_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_readdirattr_desc = { +struct vnodeop_desc vnop_mnomap_desc = { 0, - "vop_readdirattr", + "vnop_mnomap", 0, - vop_readdirattr_vp_offsets, + vnop_mnomap_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_readdirattr_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, NULL, }; -int vop_readlink_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_readlink_args,a_vp), + +int vnop_fsync_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_fsync_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_readlink_desc = { +struct vnodeop_desc vnop_fsync_desc = { 0, - "vop_readlink", + "vnop_fsync", 0, - vop_readlink_vp_offsets, + vnop_fsync_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_readlink_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_fsync_args, a_context), NULL, }; -int vop_abortop_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_abortop_args,a_dvp), +int vnop_remove_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_remove_args,a_dvp), + VOPARG_OFFSETOF(struct vnop_remove_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_abortop_desc = { +struct vnodeop_desc vnop_remove_desc = { 0, - "vop_abortop", - 0, - vop_abortop_vp_offsets, + "vnop_remove", + 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE, + vnop_remove_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_abortop_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_remove_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_remove_args, a_context), NULL, }; -int vop_inactive_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_inactive_args,a_vp), +int vnop_link_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_link_args,a_vp), + VOPARG_OFFSETOF(struct vnop_link_args,a_tdvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_inactive_desc = { +struct vnodeop_desc vnop_link_desc = { 0, - "vop_inactive", - 0, - vop_inactive_vp_offsets, + "vnop_link", + 0 | VDESC_VP1_WILLRELE, + vnop_link_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_inactive_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_link_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_link_args, a_context), NULL, }; -int vop_reclaim_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_reclaim_args,a_vp), +int vnop_rename_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_rename_args,a_fdvp), + VOPARG_OFFSETOF(struct vnop_rename_args,a_fvp), + VOPARG_OFFSETOF(struct vnop_rename_args,a_tdvp), + VOPARG_OFFSETOF(struct vnop_rename_args,a_tvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_reclaim_desc = { - 0, - "vop_reclaim", +struct vnodeop_desc vnop_rename_desc = { 0, - vop_reclaim_vp_offsets, + "vnop_rename", + 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLRELE | VDESC_VP3_WILLRELE, + vnop_rename_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_reclaim_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_rename_args, a_fcnp), + VOPARG_OFFSETOF(struct vnop_rename_args, a_context), NULL, }; -int vop_lock_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_lock_args,a_vp), - VDESC_NO_OFFSET +int vnop_mkdir_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_mkdir_args,a_dvp), + VDESC_NO_OFFSET }; -struct vnodeop_desc vop_lock_desc = { - 0, - "vop_lock", - 0, - vop_lock_vp_offsets, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_lock_args, a_p), - VDESC_NO_OFFSET, - NULL, +struct vnodeop_desc vnop_mkdir_desc = { + 0, + "vnop_mkdir", + 0 | VDESC_VP0_WILLRELE, + vnop_mkdir_vp_offsets, + VOPARG_OFFSETOF(struct vnop_mkdir_args, a_vpp), + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_mkdir_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_mkdir_args, a_context), + NULL, }; -int vop_unlock_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_unlock_args,a_vp), +int vnop_rmdir_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_rmdir_args,a_dvp), + VOPARG_OFFSETOF(struct vnop_rmdir_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_unlock_desc = { - 0, - "vop_unlock", +struct vnodeop_desc vnop_rmdir_desc = { 0, - vop_unlock_vp_offsets, + "vnop_rmdir", + 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE, + vnop_rmdir_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_unlock_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_rmdir_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_rmdir_args, a_context), NULL, }; -int vop_bmap_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_bmap_args,a_vp), - VDESC_NO_OFFSET +int vnop_symlink_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_symlink_args,a_dvp), + VDESC_NO_OFFSET }; -struct vnodeop_desc vop_bmap_desc = { - 0, - "vop_bmap", - 0, - vop_bmap_vp_offsets, - VOPARG_OFFSETOF(struct vop_bmap_args, a_vpp), - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - VDESC_NO_OFFSET, - NULL, +struct vnodeop_desc vnop_symlink_desc = { + 0, + "vnop_symlink", + 0 | VDESC_VP0_WILLRELE | VDESC_VPP_WILLRELE, + vnop_symlink_vp_offsets, + VOPARG_OFFSETOF(struct vnop_symlink_args, a_vpp), + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_symlink_args, a_cnp), + VOPARG_OFFSETOF(struct vnop_symlink_args, a_context), + NULL, }; -int vop_print_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_print_args,a_vp), +int vnop_readdir_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_readdir_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_print_desc = { +struct vnodeop_desc vnop_readdir_desc = { 0, - "vop_print", + "vnop_readdir", 0, - vop_print_vp_offsets, + vnop_readdir_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_symlink_args, a_context), NULL, }; -int vop_islocked_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_islocked_args,a_vp), +int vnop_readdirattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_readdirattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_islocked_desc = { +struct vnodeop_desc vnop_readdirattr_desc = { 0, - "vop_islocked", + "vnop_readdirattr", 0, - vop_islocked_vp_offsets, + vnop_readdirattr_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_readdirattr_args, a_context), NULL, }; -int vop_pathconf_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_pathconf_args,a_vp), +int vnop_readlink_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_readlink_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_pathconf_desc = { +struct vnodeop_desc vnop_readlink_desc = { 0, - "vop_pathconf", + "vnop_readlink", 0, - vop_pathconf_vp_offsets, + vnop_readlink_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_readlink_args, a_context), NULL, }; -int vop_advlock_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_advlock_args,a_vp), +int vnop_inactive_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_inactive_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_advlock_desc = { +struct vnodeop_desc vnop_inactive_desc = { 0, - "vop_advlock", + "vnop_inactive", 0, - vop_advlock_vp_offsets, + vnop_inactive_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_inactive_args, a_context), NULL, }; -int vop_blkatoff_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_blkatoff_args,a_vp), +int vnop_reclaim_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_reclaim_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_blkatoff_desc = { +struct vnodeop_desc vnop_reclaim_desc = { 0, - "vop_blkatoff", + "vnop_reclaim", 0, - vop_blkatoff_vp_offsets, + vnop_reclaim_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_reclaim_args, a_context), NULL, }; -int vop_valloc_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_valloc_args,a_pvp), +int vnop_pathconf_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_pathconf_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_valloc_desc = { +struct vnodeop_desc vnop_pathconf_desc = { 0, - "vop_valloc", + "vnop_pathconf", 0, - vop_valloc_vp_offsets, - VOPARG_OFFSETOF(struct vop_valloc_args, a_vpp), - VOPARG_OFFSETOF(struct vop_valloc_args, a_cred), + vnop_pathconf_vp_offsets, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_pathconf_args, a_context), NULL, }; -int vop_reallocblks_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_reallocblks_args,a_vp), +int vnop_advlock_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_advlock_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_reallocblks_desc = { +struct vnodeop_desc vnop_advlock_desc = { 0, - "vop_reallocblks", + "vnop_advlock", 0, - vop_reallocblks_vp_offsets, + vnop_advlock_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_advlock_args, a_context), NULL, }; -int vop_vfree_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_vfree_args,a_pvp), +int vnop_allocate_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_allocate_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_vfree_desc = { +struct vnodeop_desc vnop_allocate_desc = { 0, - "vop_vfree", + "vnop_allocate", 0, - vop_vfree_vp_offsets, + vnop_allocate_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_allocate_args, a_context), NULL, }; -int vop_truncate_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_truncate_args,a_vp), +int vnop_pagein_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_pagein_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_truncate_desc = { +struct vnodeop_desc vnop_pagein_desc = { 0, - "vop_truncate", + "vnop_pagein", 0, - vop_truncate_vp_offsets, + vnop_pagein_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_truncate_args, a_cred), - VOPARG_OFFSETOF(struct vop_truncate_args, a_p), VDESC_NO_OFFSET, - NULL, -}; - -int vop_allocate_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_allocate_args,a_vp), - VDESC_NO_OFFSET -}; -struct vnodeop_desc vop_allocate_desc = { - 0, - "vop_allocate", - 0, - vop_allocate_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_allocate_args, a_cred), - VOPARG_OFFSETOF(struct vop_allocate_args, a_p), VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_pagein_args, a_context), NULL, }; -int vop_update_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_update_args,a_vp), +int vnop_pageout_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_pageout_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_update_desc = { +struct vnodeop_desc vnop_pageout_desc = { 0, - "vop_update", + "vnop_pageout", 0, - vop_update_vp_offsets, + vnop_pageout_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_pageout_args, a_context), NULL, }; -int vop_pgrd_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_pgrd_args,a_vp), +int vnop_devblocksize_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_devblocksize_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_pgrd_desc = { +struct vnodeop_desc vnop_devblocksize_desc = { 0, - "vop_pgrd", + "vnop_devblocksize", 0, - vop_pgrd_vp_offsets, + vnop_devblocksize_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_pgrd_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, NULL, }; -int vop_pgwr_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_pgwr_args,a_vp), +int vnop_searchfs_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_searchfs_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_pgwr_desc = { +struct vnodeop_desc vnop_searchfs_desc = { 0, - "vop_pgwr", + "vnop_searchfs", 0, - vop_pgwr_vp_offsets, + vnop_searchfs_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_pgwr_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, NULL, }; -int vop_pagein_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_pagein_args,a_vp), +int vnop_copyfile_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_copyfile_args,a_fvp), + VOPARG_OFFSETOF(struct vnop_copyfile_args,a_tdvp), + VOPARG_OFFSETOF(struct vnop_copyfile_args,a_tvp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_pagein_desc = { +struct vnodeop_desc vnop_copyfile_desc = { 0, - "vop_pagein", - 0, - vop_pagein_vp_offsets, + "vnop_copyfile", + 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLRELE, + vnop_copyfile_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_pagein_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_copyfile_args, a_tcnp), NULL, }; -int vop_pageout_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_pageout_args,a_vp), +int vop_getxattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_getxattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_pageout_desc = { +struct vnodeop_desc vnop_getxattr_desc = { 0, - "vop_pageout", + "vnop_getxattr", 0, - vop_pageout_vp_offsets, + vop_getxattr_vp_offsets, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_pageout_args, a_cred), VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_getxattr_args, a_context), NULL, }; -int vop_devblocksize_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_devblocksize_args,a_vp), +int vop_setxattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_setxattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_devblocksize_desc = { +struct vnodeop_desc vnop_setxattr_desc = { 0, - "vop_devblocksize", + "vnop_setxattr", 0, - vop_devblocksize_vp_offsets, + vop_setxattr_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_setxattr_args, a_context), NULL, }; -int vop_searchfs_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_searchfs_args,a_vp), +int vop_removexattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_removexattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_searchfs_desc = { +struct vnodeop_desc vnop_removexattr_desc = { 0, - "vop_searchfs", + "vnop_removexattr", 0, - vop_searchfs_vp_offsets, + vop_removexattr_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_removexattr_args, a_context), NULL, }; -int vop_copyfile_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_copyfile_args,a_fvp), - VOPARG_OFFSETOF(struct vop_copyfile_args,a_tdvp), - VOPARG_OFFSETOF(struct vop_copyfile_args,a_tvp), +int vop_listxattr_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_listxattr_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_copyfile_desc = { +struct vnodeop_desc vnop_listxattr_desc = { 0, - "vop_copyfile", - 0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLRELE, - vop_copyfile_vp_offsets, + "vnop_listxattr", + 0, + vop_listxattr_vp_offsets, + VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, - VOPARG_OFFSETOF(struct vop_copyfile_args, a_tcnp), + VOPARG_OFFSETOF(struct vnop_listxattr_args, a_context), NULL, }; -int vop_blktooff_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_blktooff_args,a_vp), +int vnop_blktooff_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_blktooff_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_blktooff_desc = { +struct vnodeop_desc vnop_blktooff_desc = { 0, - "vop_blktooff", + "vnop_blktooff", 0, - vop_blktooff_vp_offsets, + vnop_blktooff_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, @@ -1035,15 +870,15 @@ struct vnodeop_desc vop_blktooff_desc = { NULL, }; -int vop_offtoblk_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_offtoblk_args,a_vp), +int vnop_offtoblk_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_offtoblk_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_offtoblk_desc = { +struct vnodeop_desc vnop_offtoblk_desc = { 0, - "vop_offtoblk", + "vnop_offtoblk", 0, - vop_offtoblk_vp_offsets, + vnop_offtoblk_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, @@ -1051,15 +886,15 @@ struct vnodeop_desc vop_offtoblk_desc = { NULL, }; -int vop_cmap_vp_offsets[] = { - VOPARG_OFFSETOF(struct vop_cmap_args,a_vp), +int vnop_blockmap_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_blockmap_args,a_vp), VDESC_NO_OFFSET }; -struct vnodeop_desc vop_cmap_desc = { +struct vnodeop_desc vnop_blockmap_desc = { 0, - "vop_cmap", + "vnop_blockmap", 0, - vop_cmap_vp_offsets, + vnop_blockmap_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, @@ -1069,14 +904,14 @@ struct vnodeop_desc vop_cmap_desc = { /* Special cases: */ -int vop_strategy_vp_offsets[] = { +int vnop_strategy_vp_offsets[] = { VDESC_NO_OFFSET }; -struct vnodeop_desc vop_strategy_desc = { +struct vnodeop_desc vnop_strategy_desc = { 0, - "vop_strategy", + "vnop_strategy", 0, - vop_strategy_vp_offsets, + vnop_strategy_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, @@ -1084,14 +919,14 @@ struct vnodeop_desc vop_strategy_desc = { NULL, }; -int vop_bwrite_vp_offsets[] = { +int vnop_bwrite_vp_offsets[] = { VDESC_NO_OFFSET }; -struct vnodeop_desc vop_bwrite_desc = { +struct vnodeop_desc vnop_bwrite_desc = { 0, - "vop_bwrite", + "vnop_bwrite", 0, - vop_bwrite_vp_offsets, + vnop_bwrite_vp_offsets, VDESC_NO_OFFSET, VDESC_NO_OFFSET, VDESC_NO_OFFSET, @@ -1102,71 +937,58 @@ struct vnodeop_desc vop_bwrite_desc = { /* End of special cases. */ struct vnodeop_desc *vfs_op_descs[] = { - &vop_default_desc, /* MUST BE FIRST */ - &vop_strategy_desc, /* XXX: SPECIAL CASE */ - &vop_bwrite_desc, /* XXX: SPECIAL CASE */ - - &vop_lookup_desc, - &vop_cachedlookup_desc, - &vop_create_desc, - &vop_whiteout_desc, - &vop_mknod_desc, - &vop_mkcomplex_desc, - &vop_open_desc, - &vop_close_desc, - &vop_access_desc, - &vop_getattr_desc, - &vop_setattr_desc, - &vop_getattrlist_desc, - &vop_setattrlist_desc, - &vop_read_desc, - &vop_write_desc, - &vop_lease_desc, - &vop_ioctl_desc, - &vop_select_desc, - &vop_exchange_desc, - &vop_kqfilt_add_desc, - &vop_kqfilt_remove_desc, - &vop_revoke_desc, - &vop_mmap_desc, - &vop_fsync_desc, - &vop_seek_desc, - &vop_remove_desc, - &vop_link_desc, - &vop_rename_desc, - &vop_mkdir_desc, - &vop_rmdir_desc, - &vop_symlink_desc, - &vop_readdir_desc, - &vop_readdirattr_desc, - &vop_readlink_desc, - &vop_abortop_desc, - &vop_inactive_desc, - &vop_reclaim_desc, - &vop_lock_desc, - &vop_unlock_desc, - &vop_bmap_desc, - &vop_print_desc, - &vop_islocked_desc, - &vop_pathconf_desc, - &vop_advlock_desc, - &vop_blkatoff_desc, - &vop_valloc_desc, - &vop_reallocblks_desc, - &vop_vfree_desc, - &vop_truncate_desc, - &vop_allocate_desc, - &vop_update_desc, - &vop_pgrd_desc, - &vop_pgwr_desc, - &vop_pagein_desc, - &vop_pageout_desc, - &vop_devblocksize_desc, - &vop_searchfs_desc, - &vop_copyfile_desc, - &vop_blktooff_desc, - &vop_offtoblk_desc, - &vop_cmap_desc, + &vnop_default_desc, /* MUST BE FIRST */ + &vnop_strategy_desc, /* XXX: SPECIAL CASE */ + &vnop_bwrite_desc, /* XXX: SPECIAL CASE */ + + &vnop_lookup_desc, + &vnop_create_desc, + &vnop_mknod_desc, + &vnop_whiteout_desc, + &vnop_open_desc, + &vnop_close_desc, + &vnop_access_desc, + &vnop_getattr_desc, + &vnop_setattr_desc, + &vnop_getattrlist_desc, + &vnop_setattrlist_desc, + &vnop_read_desc, + &vnop_write_desc, + &vnop_ioctl_desc, + &vnop_select_desc, + &vnop_exchange_desc, + &vnop_kqfilt_add_desc, + &vnop_kqfilt_remove_desc, + &vnop_revoke_desc, + &vnop_mmap_desc, + &vnop_mnomap_desc, + &vnop_fsync_desc, + &vnop_remove_desc, + &vnop_link_desc, + &vnop_rename_desc, + &vnop_mkdir_desc, + &vnop_rmdir_desc, + &vnop_symlink_desc, + &vnop_readdir_desc, + &vnop_readdirattr_desc, + &vnop_readlink_desc, + &vnop_inactive_desc, + &vnop_reclaim_desc, + &vnop_pathconf_desc, + &vnop_advlock_desc, + &vnop_allocate_desc, + &vnop_pagein_desc, + &vnop_pageout_desc, + &vnop_devblocksize_desc, + &vnop_searchfs_desc, + &vnop_copyfile_desc, + &vnop_getxattr_desc, + &vnop_setxattr_desc, + &vnop_removexattr_desc, + &vnop_listxattr_desc, + &vnop_blktooff_desc, + &vnop_offtoblk_desc, + &vnop_blockmap_desc, NULL }; diff --git a/bsd/vfs/vnode_if.sh b/bsd/vfs/vnode_if.sh index 84b383645..610af8127 100644 --- a/bsd/vfs/vnode_if.sh +++ b/bsd/vfs/vnode_if.sh @@ -174,6 +174,7 @@ echo ' #define _SYS_VNODE_IF_H_ #include +#include #ifdef __APPLE_API_UNSTABLE extern struct vnodeop_desc vop_default_desc; @@ -206,26 +207,23 @@ function doit() { printf("static __inline int _%s(", toupper(name)); for (i=0; i #include #include #include -#include +#include +#include #include #include -#include +#include #include -#include +#include +#include + +#include +#include #include #include #include -#include -#include +#include +#include +#include + +#include #include +#include #include #include +#include + #include -#include +#include +#include +#include #include +#include + +extern thread_t current_act(void); /* * temporary support for delayed instantiation @@ -66,8 +81,6 @@ struct bs_map bs_port_table[MAX_BACKING_STORE] = { /* ###################################################### */ -#include - /* * Routine: macx_backing_store_recovery * Function: @@ -77,19 +90,20 @@ struct bs_map bs_port_table[MAX_BACKING_STORE] = { */ int macx_backing_store_recovery( - int pid) + struct macx_backing_store_recovery_args *args) { + int pid = args->pid; int error; struct proc *p = current_proc(); boolean_t funnel_state; funnel_state = thread_funnel_set(kernel_flock, TRUE); - if ((error = suser(p->p_ucred, &p->p_acflag))) + if ((error = suser(kauth_cred_get(), 0))) goto backing_store_recovery_return; /* for now restrict backing_store_recovery */ /* usage to only present task */ - if(pid != p->p_pid) { + if(pid != proc_selfpid()) { error = EINVAL; goto backing_store_recovery_return; } @@ -110,14 +124,14 @@ backing_store_recovery_return: int macx_backing_store_suspend( - boolean_t suspend) + struct macx_backing_store_suspend_args *args) { + boolean_t suspend = args->suspend; int error; - struct proc *p = current_proc(); boolean_t funnel_state; funnel_state = thread_funnel_set(kernel_flock, TRUE); - if ((error = suser(p->p_ucred, &p->p_acflag))) + if ((error = suser(kauth_cred_get(), 0))) goto backing_store_suspend_return; vm_backing_store_disable(suspend); @@ -134,31 +148,31 @@ backing_store_suspend_return: */ int macx_swapon( - char *filename, - int flags, - long size, - long priority) + struct macx_swapon_args *args) { - struct vnode *vp = 0; + int size = args->size; + vnode_t vp = (vnode_t)NULL; struct nameidata nd, *ndp; struct proc *p = current_proc(); - pager_file_t pf; register int error; kern_return_t kr; mach_port_t backing_store; memory_object_default_t default_pager; int i; boolean_t funnel_state; + off_t file_size; + struct vfs_context context; - struct vattr vattr; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); - AUDIT_ARG(value, priority); + AUDIT_ARG(value, args->priority); funnel_state = thread_funnel_set(kernel_flock, TRUE); ndp = &nd; - if ((error = suser(p->p_ucred, &p->p_acflag))) + if ((error = suser(kauth_cred_get(), 0))) goto swapon_bailout; if(default_pager_init_flag == 0) { @@ -169,34 +183,28 @@ macx_swapon( /* * Get a vnode for the paging area. */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - filename, p); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), + CAST_USER_ADDR_T(args->filename), &context); if ((error = namei(ndp))) goto swapon_bailout; + nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VREG) { error = EINVAL; - VOP_UNLOCK(vp, 0, p); goto swapon_bailout; } UBCINFOCHECK("macx_swapon", vp); - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) { - VOP_UNLOCK(vp, 0, p); + /* get file size */ + if ((error = vnode_size(vp, &file_size, &context)) != 0) goto swapon_bailout; - } - if (vattr.va_size < (u_quad_t)size) { - vattr_null(&vattr); - vattr.va_size = (u_quad_t)size; - error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - if (error) { - VOP_UNLOCK(vp, 0, p); - goto swapon_bailout; - } - } + /* resize to desired size if it's too small */ + if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, &context)) != 0)) + goto swapon_bailout; /* add new backing store to list */ i = 0; @@ -207,7 +215,6 @@ macx_swapon( } if(i == MAX_BACKING_STORE) { error = ENOMEM; - VOP_UNLOCK(vp, 0, p); goto swapon_bailout; } @@ -222,7 +229,6 @@ macx_swapon( kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); if(kr != KERN_SUCCESS) { error = EAGAIN; - VOP_UNLOCK(vp, 0, p); bs_port_table[i].vp = 0; goto swapon_bailout; } @@ -235,7 +241,6 @@ macx_swapon( if(kr != KERN_SUCCESS) { error = ENOMEM; - VOP_UNLOCK(vp, 0, p); bs_port_table[i].vp = 0; goto swapon_bailout; } @@ -248,9 +253,8 @@ macx_swapon( * b: because allow paging will be done modulo page size */ - VOP_UNLOCK(vp, 0, p); - kr = default_pager_add_file(backing_store, vp, PAGE_SIZE, - ((int)vattr.va_size)/PAGE_SIZE); + kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp, + PAGE_SIZE, (int)(file_size/PAGE_SIZE)); if(kr != KERN_SUCCESS) { bs_port_table[i].vp = 0; if(kr == KERN_INVALID_ARGUMENT) @@ -261,8 +265,6 @@ macx_swapon( } bs_port_table[i].bs = (void *)backing_store; error = 0; - if (!ubc_hold(vp)) - panic("macx_swapon: hold"); /* Mark this vnode as being used for swapfile */ SET(vp->v_flag, VSWAP); @@ -270,17 +272,14 @@ macx_swapon( ubc_setcred(vp, p); /* - * take an extra reference on the vnode to keep + * take a long term reference on the vnode to keep * vnreclaim() away from this vnode. */ - VREF(vp); - - /* Hold on to the namei reference to the paging file vnode */ - vp = 0; + vnode_ref(vp); swapon_bailout: if (vp) { - vrele(vp); + vnode_put(vp); } (void) thread_funnel_set(kernel_flock, FALSE); AUDIT_MACH_SYSCALL_EXIT(error); @@ -294,9 +293,9 @@ swapon_bailout: */ int macx_swapoff( - char *filename, - int flags) + struct macx_swapoff_args *args) { + __unused int flags = args->flags; kern_return_t kr; mach_port_t backing_store; @@ -306,59 +305,59 @@ macx_swapoff( int i; int error; boolean_t funnel_state; + struct vfs_context context; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF); + funnel_state = thread_funnel_set(kernel_flock, TRUE); backing_store = NULL; ndp = &nd; - if ((error = suser(p->p_ucred, &p->p_acflag))) + if ((error = suser(kauth_cred_get(), 0))) goto swapoff_bailout; /* * Get the vnode for the paging area. */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_USERSPACE, - filename, p); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), + CAST_USER_ADDR_T(args->filename), &context); if ((error = namei(ndp))) goto swapoff_bailout; + nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VREG) { error = EINVAL; - VOP_UNLOCK(vp, 0, p); goto swapoff_bailout; } for(i = 0; i < MAX_BACKING_STORE; i++) { if(bs_port_table[i].vp == vp) { - backing_store; break; } } if (i == MAX_BACKING_STORE) { error = EINVAL; - VOP_UNLOCK(vp, 0, p); goto swapoff_bailout; } backing_store = (mach_port_t)bs_port_table[i].bs; - VOP_UNLOCK(vp, 0, p); kr = default_pager_backing_store_delete(backing_store); switch (kr) { case KERN_SUCCESS: error = 0; bs_port_table[i].vp = 0; - ubc_rele(vp); /* This vnode is no longer used for swapfile */ CLR(vp->v_flag, VSWAP); - /* get rid of macx_swapon() namei() reference */ - vrele(vp); + /* get rid of macx_swapon() "long term" reference */ + vnode_rele(vp); - /* get rid of macx_swapon() "extra" reference */ - vrele(vp); break; case KERN_FAILURE: error = EAGAIN; @@ -371,9 +370,78 @@ macx_swapoff( swapoff_bailout: /* get rid of macx_swapoff() namei() reference */ if (vp) - vrele(vp); + vnode_put(vp); (void) thread_funnel_set(kernel_flock, FALSE); AUDIT_MACH_SYSCALL_EXIT(error); return(error); } + +/* + * Routine: macx_swapinfo + * Function: + * Syscall interface to get general swap statistics + */ +int +macx_swapinfo( + memory_object_size_t *total_p, + memory_object_size_t *avail_p, + vm_size_t *pagesize_p, + boolean_t *encrypted_p) +{ + int error; + memory_object_default_t default_pager; + default_pager_info_64_t dpi64; + kern_return_t kr; + + error = 0; + + /* + * Get a handle on the default pager. + */ + default_pager = MEMORY_OBJECT_DEFAULT_NULL; + kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); + if (kr != KERN_SUCCESS) { + error = EAGAIN; /* XXX why EAGAIN ? */ + goto done; + } + if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) { + /* + * The default pager has not initialized yet, + * so it can't be using any swap space at all. + */ + *total_p = 0; + *avail_p = 0; + *pagesize_p = 0; + *encrypted_p = FALSE; + goto done; + } + + /* + * Get swap usage data from default pager. + */ + kr = default_pager_info_64(default_pager, &dpi64); + if (kr != KERN_SUCCESS) { + error = ENOTSUP; + goto done; + } + + /* + * Provide default pager info to caller. + */ + *total_p = dpi64.dpi_total_space; + *avail_p = dpi64.dpi_free_space; + *pagesize_p = dpi64.dpi_page_size; + if (dpi64.dpi_flags & DPI_ENCRYPTED) { + *encrypted_p = TRUE; + } else { + *encrypted_p = FALSE; + } + +done: + if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) { + /* release our handle on default pager */ + memory_object_default_deallocate(default_pager); + } + return error; +} diff --git a/bsd/vm/vm_pager.h b/bsd/vm/vm_pager.h index 6a12dde49..633318cda 100644 --- a/bsd/vm/vm_pager.h +++ b/bsd/vm/vm_pager.h @@ -53,11 +53,11 @@ typedef struct pager_struct *vm_pager_t; #ifdef KERNEL typedef int pager_return_t; -vm_pager_t vm_pager_allocate(); -void vm_pager_deallocate(); -pager_return_t vm_pager_get(); -pager_return_t vm_pager_put(); -boolean_t vm_pager_has_page(); +extern vm_pager_t vm_pager_allocate(void); +extern void vm_pager_deallocate(void); +extern pager_return_t vm_pager_get(void); +extern pager_return_t vm_pager_put(void); +extern boolean_t vm_pager_has_page(void); #endif /* KERNEL */ #endif /* _VM_PAGER_ */ diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index a322679ae..4dfe84ebb 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -36,25 +36,31 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include #include #include -#include +#include +#include #include #include -#include -#include +#include #include #include #include -#include +#include +#include #include +#include +#include #include #include @@ -68,27 +74,29 @@ #include #include +#include -extern zone_t lsf_zone; -useracc(addr, len, prot) - caddr_t addr; - u_int len; - int prot; +int +useracc( + user_addr_t addr, + user_size_t len, + int prot) { return (vm_map_check_protection( current_map(), - trunc_page_32((unsigned int)addr), round_page_32((unsigned int)(addr+len)), + vm_map_trunc_page(addr), vm_map_round_page(addr+len), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); } -vslock(addr, len) - caddr_t addr; - int len; +int +vslock( + user_addr_t addr, + user_size_t len) { -kern_return_t kret; - kret = vm_map_wire(current_map(), trunc_page_32((unsigned int)addr), - round_page_32((unsigned int)(addr+len)), + kern_return_t kret; + kret = vm_map_wire(current_map(), vm_map_trunc_page(addr), + vm_map_round_page(addr+len), VM_PROT_READ | VM_PROT_WRITE ,FALSE); switch (kret) { @@ -104,22 +112,25 @@ kern_return_t kret; } } -vsunlock(addr, len, dirtied) - caddr_t addr; - int len; - int dirtied; +int +vsunlock( + user_addr_t addr, + user_size_t len, + __unused int dirtied) { - pmap_t pmap; #if FIXME /* [ */ + pmap_t pmap; vm_page_t pg; + vm_map_offset_t vaddr; + ppnum_t paddr; #endif /* FIXME ] */ - vm_offset_t vaddr, paddr; kern_return_t kret; #if FIXME /* [ */ if (dirtied) { pmap = get_task_pmap(current_task()); - for (vaddr = trunc_page((unsigned int)(addr)); vaddr < round_page((unsigned int)(addr+len)); + for (vaddr = vm_map_trunc_page(addr); + vaddr < vm_map_round_page(addr+len); vaddr += PAGE_SIZE) { paddr = pmap_extract(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); @@ -130,8 +141,8 @@ vsunlock(addr, len, dirtied) #ifdef lint dirtied++; #endif /* lint */ - kret = vm_map_unwire(current_map(), trunc_page_32((unsigned int)(addr)), - round_page_32((unsigned int)(addr+len)), FALSE); + kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr), + vm_map_round_page(addr+len), FALSE); switch (kret) { case KERN_SUCCESS: return (0); @@ -145,11 +156,10 @@ vsunlock(addr, len, dirtied) } } -#if defined(sun) || BALANCE || defined(m88k) -#else /*defined(sun) || BALANCE || defined(m88k)*/ -subyte(addr, byte) - void * addr; - int byte; +int +subyte( + user_addr_t addr, + int byte) { char character; @@ -157,18 +167,18 @@ subyte(addr, byte) return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); } -suibyte(addr, byte) - void * addr; - int byte; +int +suibyte( + user_addr_t addr, + int byte) { char character; character = (char)byte; - return (copyout((void *) &(character), addr, sizeof(char)) == 0 ? 0 : -1); + return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); } -int fubyte(addr) - void * addr; +int fubyte(user_addr_t addr) { unsigned char byte; @@ -177,8 +187,7 @@ int fubyte(addr) return(byte); } -int fuibyte(addr) - void * addr; +int fuibyte(user_addr_t addr) { unsigned char byte; @@ -187,15 +196,15 @@ int fuibyte(addr) return(byte); } -suword(addr, word) - void * addr; - long word; +int +suword( + user_addr_t addr, + long word) { return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); } -long fuword(addr) - void * addr; +long fuword(user_addr_t addr) { long word; @@ -206,15 +215,15 @@ long fuword(addr) /* suiword and fuiword are the same as suword and fuword, respectively */ -suiword(addr, word) - void * addr; - long word; +int +suiword( + user_addr_t addr, + long word) { return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); } -long fuiword(addr) - void * addr; +long fuiword(user_addr_t addr) { long word; @@ -222,23 +231,76 @@ long fuiword(addr) return(-1); return(word); } -#endif /* defined(sun) || BALANCE || defined(m88k) || defined(i386) */ + +/* + * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the + * fetching and setting of process-sized size_t and pointer values. + */ +int +sulong(user_addr_t addr, int64_t word) +{ + + if (IS_64BIT_PROCESS(current_proc())) { + return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); + } else { + return(suiword(addr, (long)word)); + } +} + +int64_t +fulong(user_addr_t addr) +{ + int64_t longword; + + if (IS_64BIT_PROCESS(current_proc())) { + if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) + return(-1); + return(longword); + } else { + return((int64_t)fuiword(addr)); + } +} int -swapon() +suulong(user_addr_t addr, uint64_t uword) +{ + + if (IS_64BIT_PROCESS(current_proc())) { + return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); + } else { + return(suiword(addr, (u_long)uword)); + } +} + +uint64_t +fuulong(user_addr_t addr) { - return(EOPNOTSUPP); + uint64_t ulongword; + + if (IS_64BIT_PROCESS(current_proc())) { + if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) + return(-1ULL); + return(ulongword); + } else { + return((uint64_t)fuiword(addr)); + } +} + +int +swapon(__unused struct proc *procp, __unused struct swapon_args *uap, __unused int *retval) +{ + return(ENOTSUP); } kern_return_t -pid_for_task(t, x) - mach_port_t t; - int *x; +pid_for_task( + struct pid_for_task_args *args) { + mach_port_name_t t = args->t; + user_addr_t pid_addr = args->pid; struct proc * p; task_t t1; - extern task_t port_name_to_task(mach_port_t t); int pid = -1; kern_return_t err = KERN_SUCCESS; boolean_t funnel_state; @@ -255,7 +317,7 @@ pid_for_task(t, x) } else { p = get_bsdtask_info(t1); if (p) { - pid = p->p_pid; + pid = proc_pid(p); err = KERN_SUCCESS; } else { err = KERN_FAILURE; @@ -264,7 +326,7 @@ pid_for_task(t, x) task_deallocate(t1); pftout: AUDIT_ARG(pid, pid); - (void) copyout((char *) &pid, (char *) x, sizeof(*x)); + (void) copyout((char *) &pid, pid_addr, sizeof(int)); thread_funnel_set(kernel_flock, funnel_state); AUDIT_MACH_SYSCALL_EXIT(err); return(err); @@ -278,18 +340,21 @@ pftout: * * Only permitted to privileged processes, or processes * with the same user ID. + * + * XXX This should be a BSD system call, not a Mach trap!!! */ kern_return_t -task_for_pid(target_tport, pid, t) - mach_port_t target_tport; - int pid; - mach_port_t *t; +task_for_pid( + struct task_for_pid_args *args) { + mach_port_name_t target_tport = args->target_tport; + int pid = args->pid; + user_addr_t task_addr = args->t; + struct uthread *uthread; struct proc *p; struct proc *p1; task_t t1; - mach_port_t tret; - extern task_t port_name_to_task(mach_port_t tp); + mach_port_name_t tret; void * sright; int error = 0; boolean_t funnel_state; @@ -300,45 +365,59 @@ task_for_pid(target_tport, pid, t) t1 = port_name_to_task(target_tport); if (t1 == TASK_NULL) { - (void ) copyout((char *)&t1, (char *)t, sizeof(mach_port_t)); + (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); return(KERN_FAILURE); } funnel_state = thread_funnel_set(kernel_flock, TRUE); - restart: - p1 = get_bsdtask_info(t1); + p1 = get_bsdtask_info(t1); /* XXX current proc */ + + /* + * Delayed binding of thread credential to process credential, if we + * are not running with an explicitly set thread credential. + */ + uthread = get_bsdthread_info(current_thread()); + if (uthread->uu_ucred != p1->p_ucred && + (uthread->uu_flag & UT_SETUID) == 0) { + kauth_cred_t old = uthread->uu_ucred; + proc_lock(p1); + uthread->uu_ucred = p1->p_ucred; + kauth_cred_ref(uthread->uu_ucred); + proc_unlock(p1); + if (old != NOCRED) + kauth_cred_rele(old); + } + p = pfind(pid); AUDIT_ARG(process, p); + if ( (p != (struct proc *) 0) && (p1 != (struct proc *) 0) - && (((p->p_ucred->cr_uid == p1->p_ucred->cr_uid) && - ((p->p_cred->p_ruid == p1->p_cred->p_ruid))) - || !(suser(p1->p_ucred, &p1->p_acflag))) + && (((kauth_cred_getuid(p->p_ucred) == kauth_cred_getuid(kauth_cred_get())) && + ((p->p_ucred->cr_ruid == kauth_cred_get()->cr_ruid))) + || !(suser(kauth_cred_get(), 0))) && (p->p_stat != SZOMB) ) { if (p->task != TASK_NULL) { - if (!task_reference_try(p->task)) { - mutex_pause(); /* temp loss of funnel */ - goto restart; - } + task_reference(p->task); sright = (void *)convert_task_to_port(p->task); - tret = (void *) - ipc_port_copyout_send(sright, - get_task_ipcspace(current_task())); + tret = ipc_port_copyout_send( + sright, + get_task_ipcspace(current_task())); } else tret = MACH_PORT_NULL; AUDIT_ARG(mach_port2, tret); - (void ) copyout((char *)&tret, (char *) t, sizeof(mach_port_t)); + (void ) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); task_deallocate(t1); error = KERN_SUCCESS; goto tfpout; } task_deallocate(t1); tret = MACH_PORT_NULL; - (void) copyout((char *) &tret, (char *) t, sizeof(mach_port_t)); + (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); error = KERN_FAILURE; tfpout: thread_funnel_set(kernel_flock, funnel_state); @@ -347,23 +426,483 @@ tfpout: } -struct load_shared_file_args { - char *filename; - caddr_t mfa; - u_long mfs; - caddr_t *ba; - int map_cnt; - sf_mapping_t *mappings; - int *flags; -}; +/* + * shared_region_make_private_np: + * + * This system call is for "dyld" only. + * + * It creates a private copy of the current process's "shared region" for + * split libraries. "dyld" uses this when the shared region is full or + * it needs to load a split library that conflicts with an already loaded one + * that this process doesn't need. "dyld" specifies a set of address ranges + * that it wants to keep in the now-private "shared region". These cover + * the set of split libraries that the process needs so far. The kernel needs + * to deallocate the rest of the shared region, so that it's available for + * more libraries for this process. + */ +int +shared_region_make_private_np( + struct proc *p, + struct shared_region_make_private_np_args *uap, + __unused int *retvalp) +{ + int error; + kern_return_t kr; + boolean_t using_shared_regions; + user_addr_t user_ranges; + unsigned int range_count; + struct shared_region_range_np *ranges; + shared_region_mapping_t shared_region; + struct shared_region_task_mappings task_mapping_info; + shared_region_mapping_t next; + + ranges = NULL; + + range_count = uap->rangeCount; + user_ranges = uap->ranges; + + /* allocate kernel space for the "ranges" */ + if (range_count != 0) { + kr = kmem_alloc(kernel_map, + (vm_offset_t *) &ranges, + (vm_size_t) (range_count * sizeof (ranges[0]))); + if (kr != KERN_SUCCESS) { + error = ENOMEM; + goto done; + } + + /* copy "ranges" from user-space */ + error = copyin(user_ranges, + ranges, + (range_count * sizeof (ranges[0]))); + if (error) { + goto done; + } + } + + if (p->p_flag & P_NOSHLIB) { + /* no split library has been mapped for this process so far */ + using_shared_regions = FALSE; + } else { + /* this process has already mapped some split libraries */ + using_shared_regions = TRUE; + } + + /* + * Get a private copy of the current shared region. + * Do not chain it to the system-wide shared region, as we'll want + * to map other split libraries in place of the old ones. We want + * to completely detach from the system-wide shared region and go our + * own way after this point, not sharing anything with other processes. + */ + error = clone_system_shared_regions(using_shared_regions, + FALSE, /* chain_regions */ + ENV_DEFAULT_ROOT); + if (error) { + goto done; + } + + /* get info on the newly allocated shared region */ + vm_get_shared_region(current_task(), &shared_region); + task_mapping_info.self = (vm_offset_t) shared_region; + shared_region_mapping_info(shared_region, + &(task_mapping_info.text_region), + &(task_mapping_info.text_size), + &(task_mapping_info.data_region), + &(task_mapping_info.data_size), + &(task_mapping_info.region_mappings), + &(task_mapping_info.client_base), + &(task_mapping_info.alternate_base), + &(task_mapping_info.alternate_next), + &(task_mapping_info.fs_base), + &(task_mapping_info.system), + &(task_mapping_info.flags), + &next); + + /* + * We now have our private copy of the shared region, as it was before + * the call to clone_system_shared_regions(). We now need to clean it + * up and keep only the memory areas described by the "ranges" array. + */ + kr = shared_region_cleanup(range_count, ranges, &task_mapping_info); + switch (kr) { + case KERN_SUCCESS: + error = 0; + break; + default: + error = EINVAL; + goto done; + } + +done: + if (ranges != NULL) { + kmem_free(kernel_map, + (vm_offset_t) ranges, + range_count * sizeof (ranges[0])); + ranges = NULL; + } + + return error; +} + + +/* + * shared_region_map_file_np: + * + * This system call is for "dyld" only. + * + * "dyld" wants to map parts of a split library in the shared region. + * We get a file descriptor on the split library to be mapped and a set + * of mapping instructions, describing which parts of the file to map in\ + * which areas of the shared segment and with what protection. + * The "shared region" is split in 2 areas: + * 0x90000000 - 0xa0000000 : read-only area (for TEXT and LINKEDIT sections), + * 0xa0000000 - 0xb0000000 : writable area (for DATA sections). + * + */ +int +shared_region_map_file_np( + struct proc *p, + struct shared_region_map_file_np_args *uap, + __unused int *retvalp) +{ + int error; + kern_return_t kr; + int fd; + unsigned int mapping_count; + user_addr_t user_mappings; /* 64-bit */ + user_addr_t user_slide_p; /* 64-bit */ + struct shared_file_mapping_np *mappings; + struct fileproc *fp; + mach_vm_offset_t slide; + struct vnode *vp; + struct vfs_context context; + memory_object_control_t file_control; + memory_object_size_t file_size; + shared_region_mapping_t shared_region; + struct shared_region_task_mappings task_mapping_info; + shared_region_mapping_t next; + shared_region_mapping_t default_shared_region; + boolean_t using_default_region; + unsigned int j; + vm_prot_t max_prot; + mach_vm_offset_t base_offset, end_offset; + mach_vm_offset_t original_base_offset; + boolean_t mappings_in_segment; +#define SFM_MAX_STACK 6 + struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; + + mappings = NULL; + mapping_count = 0; + fp = NULL; + vp = NULL; + + /* get file descriptor for split library from arguments */ + fd = uap->fd; + + /* get file structure from file descriptor */ + error = fp_lookup(p, fd, &fp, 0); + if (error) { + goto done; + } + + /* make sure we're attempting to map a vnode */ + if (fp->f_fglob->fg_type != DTYPE_VNODE) { + error = EINVAL; + goto done; + } + + /* we need at least read permission on the file */ + if (! (fp->f_fglob->fg_flag & FREAD)) { + error = EPERM; + goto done; + } + + /* get vnode from file structure */ + error = vnode_getwithref((vnode_t)fp->f_fglob->fg_data); + if (error) { + goto done; + } + vp = (struct vnode *) fp->f_fglob->fg_data; + + /* make sure the vnode is a regular file */ + if (vp->v_type != VREG) { + error = EINVAL; + goto done; + } + + /* get vnode size */ + { + off_t fs; + + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + if ((error = vnode_size(vp, &fs, &context)) != 0) + goto done; + file_size = fs; + } + + /* + * Get the list of mappings the caller wants us to establish. + */ + mapping_count = uap->mappingCount; /* the number of mappings */ + if (mapping_count == 0) { + error = 0; /* no mappings: we're done ! */ + goto done; + } else if (mapping_count <= SFM_MAX_STACK) { + mappings = &stack_mappings[0]; + } else { + kr = kmem_alloc(kernel_map, + (vm_offset_t *) &mappings, + (vm_size_t) (mapping_count * + sizeof (mappings[0]))); + if (kr != KERN_SUCCESS) { + error = ENOMEM; + goto done; + } + } + + user_mappings = uap->mappings; /* the mappings, in user space */ + error = copyin(user_mappings, + mappings, + (mapping_count * sizeof (mappings[0]))); + if (error != 0) { + goto done; + } + + /* + * If the caller provides a "slide" pointer, it means they're OK + * with us moving the mappings around to make them fit. + */ + user_slide_p = uap->slide_p; + + /* + * Make each mapping address relative to the beginning of the + * shared region. Check that all mappings are in the shared region. + * Compute the maximum set of protections required to tell the + * buffer cache how we mapped the file (see call to ubc_map() below). + */ + max_prot = VM_PROT_NONE; + base_offset = -1LL; + end_offset = 0; + mappings_in_segment = TRUE; + for (j = 0; j < mapping_count; j++) { + mach_vm_offset_t segment; + segment = (mappings[j].sfm_address & + GLOBAL_SHARED_SEGMENT_MASK); + if (segment != GLOBAL_SHARED_TEXT_SEGMENT && + segment != GLOBAL_SHARED_DATA_SEGMENT) { + /* this mapping is not in the shared region... */ + if (user_slide_p == NULL) { + /* ... and we can't slide it in: fail */ + error = EINVAL; + goto done; + } + if (j == 0) { + /* expect all mappings to be outside */ + mappings_in_segment = FALSE; + } else if (mappings_in_segment != FALSE) { + /* other mappings were not outside: fail */ + error = EINVAL; + goto done; + } + /* we'll try and slide that mapping in the segments */ + } else { + if (j == 0) { + /* expect all mappings to be inside */ + mappings_in_segment = TRUE; + } else if (mappings_in_segment != TRUE) { + /* other mappings were not inside: fail */ + error = EINVAL; + goto done; + } + /* get a relative offset inside the shared segments */ + mappings[j].sfm_address -= GLOBAL_SHARED_TEXT_SEGMENT; + } + if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) + < base_offset) { + base_offset = (mappings[j].sfm_address & + SHARED_TEXT_REGION_MASK); + } + if ((mappings[j].sfm_address & SHARED_TEXT_REGION_MASK) + + mappings[j].sfm_size > end_offset) { + end_offset = + (mappings[j].sfm_address & + SHARED_TEXT_REGION_MASK) + + mappings[j].sfm_size; + } + max_prot |= mappings[j].sfm_max_prot; + } + /* Make all mappings relative to the base_offset */ + base_offset = vm_map_trunc_page(base_offset); + end_offset = vm_map_round_page(end_offset); + for (j = 0; j < mapping_count; j++) { + mappings[j].sfm_address -= base_offset; + } + original_base_offset = base_offset; + if (mappings_in_segment == FALSE) { + /* + * We're trying to map a library that was not pre-bound to + * be in the shared segments. We want to try and slide it + * back into the shared segments but as far back as possible, + * so that it doesn't clash with pre-bound libraries. Set + * the base_offset to the end of the region, so that it can't + * possibly fit there and will have to be slid. + */ + base_offset = SHARED_TEXT_REGION_SIZE - end_offset; + } + + /* get the file's memory object handle */ + UBCINFOCHECK("shared_region_map_file_np", vp); + file_control = ubc_getobject(vp, UBC_HOLDOBJECT); + if (file_control == MEMORY_OBJECT_CONTROL_NULL) { + error = EINVAL; + goto done; + } + + /* + * Get info about the current process's shared region. + * This might change if we decide we need to clone the shared region. + */ + vm_get_shared_region(current_task(), &shared_region); + task_mapping_info.self = (vm_offset_t) shared_region; + shared_region_mapping_info(shared_region, + &(task_mapping_info.text_region), + &(task_mapping_info.text_size), + &(task_mapping_info.data_region), + &(task_mapping_info.data_size), + &(task_mapping_info.region_mappings), + &(task_mapping_info.client_base), + &(task_mapping_info.alternate_base), + &(task_mapping_info.alternate_next), + &(task_mapping_info.fs_base), + &(task_mapping_info.system), + &(task_mapping_info.flags), + &next); + + /* + * Are we using the system's current shared region + * for this environment ? + */ + default_shared_region = + lookup_default_shared_region(ENV_DEFAULT_ROOT, + task_mapping_info.system); + if (shared_region == default_shared_region) { + using_default_region = TRUE; + } else { + using_default_region = FALSE; + } + shared_region_mapping_dealloc(default_shared_region); + + if (vp->v_mount != rootvnode->v_mount && + using_default_region) { + /* + * The split library is not on the root filesystem. We don't + * want to polute the system-wide ("default") shared region + * with it. + * Reject the mapping. The caller (dyld) should "privatize" + * (via shared_region_make_private()) the shared region and + * try to establish the mapping privately for this process. + */ + error = EXDEV; + goto done; + } + + + /* + * Map the split library. + */ + kr = map_shared_file(mapping_count, + mappings, + file_control, + file_size, + &task_mapping_info, + base_offset, + (user_slide_p) ? &slide : NULL); + + switch (kr) { + case KERN_SUCCESS: + /* + * The mapping was successful. Let the buffer cache know + * that we've mapped that file with these protections. This + * prevents the vnode from getting recycled while it's mapped. + */ + (void) ubc_map(vp, max_prot); + error = 0; + break; + case KERN_INVALID_ADDRESS: + error = EFAULT; + goto done; + case KERN_PROTECTION_FAILURE: + error = EPERM; + goto done; + case KERN_NO_SPACE: + error = ENOMEM; + goto done; + case KERN_FAILURE: + case KERN_INVALID_ARGUMENT: + default: + error = EINVAL; + goto done; + } + + if (p->p_flag & P_NOSHLIB) { + /* signal that this process is now using split libraries */ + p->p_flag &= ~P_NOSHLIB; + } + + if (user_slide_p) { + /* + * The caller provided a pointer to a "slide" offset. Let + * them know by how much we slid the mappings. + */ + if (mappings_in_segment == FALSE) { + /* + * We faked the base_offset earlier, so undo that + * and take into account the real base_offset. + */ + slide += SHARED_TEXT_REGION_SIZE - end_offset; + slide -= original_base_offset; + /* + * The mappings were slid into the shared segments + * and "slide" is relative to the beginning of the + * shared segments. Adjust it to be absolute. + */ + slide += GLOBAL_SHARED_TEXT_SEGMENT; + } + error = copyout(&slide, + user_slide_p, + sizeof (int64_t)); + } + +done: + if (vp != NULL) { + /* + * release the vnode... + * ubc_map() still holds it for us in the non-error case + */ + (void) vnode_put(vp); + vp = NULL; + } + if (fp != NULL) { + /* release the file descriptor */ + fp_drop(p, fd, fp, 0); + fp = NULL; + } + if (mappings != NULL && + mappings != &stack_mappings[0]) { + kmem_free(kernel_map, + (vm_offset_t) mappings, + mapping_count * sizeof (mappings[0])); + } + mappings = NULL; -int ws_disabled = 1; + return error; +} int -load_shared_file( - struct proc *p, - struct load_shared_file_args *uap, - register *retval) +load_shared_file(struct proc *p, struct load_shared_file_args *uap, + __unused int *retval) { caddr_t mapped_file_addr=uap->mfa; u_long mapped_file_size=uap->mfs; @@ -378,7 +917,8 @@ load_shared_file( register int error; kern_return_t kr; - struct vattr vattr; + struct vfs_context context; + off_t file_size; memory_object_control_t file_control; sf_mapping_t *map_list; caddr_t local_base; @@ -393,16 +933,19 @@ load_shared_file( struct shared_region_task_mappings task_mapping_info; shared_region_mapping_t next; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + ndp = &nd; - AUDIT_ARG(addr, base_address); + AUDIT_ARG(addr, CAST_USER_ADDR_T(base_address)); /* Retrieve the base address */ - if (error = copyin(base_address, &local_base, sizeof (caddr_t))) { - goto lsf_bailout; - } - if (error = copyin(flags, &local_flags, sizeof (int))) { - goto lsf_bailout; - } + if ( (error = copyin(CAST_USER_ADDR_T(base_address), &local_base, sizeof (caddr_t))) ) { + goto lsf_bailout; + } + if ( (error = copyin(CAST_USER_ADDR_T(flags), &local_flags, sizeof (int))) ) { + goto lsf_bailout; + } if(local_flags & QUERY_IS_SYSTEM_REGION) { shared_region_mapping_t default_shared_region; @@ -433,7 +976,7 @@ load_shared_file( } shared_region_mapping_dealloc(default_shared_region); error = 0; - error = copyout(&local_flags, flags, sizeof (int)); + error = copyout(&local_flags, CAST_USER_ADDR_T(flags), sizeof (int)); goto lsf_bailout; } caller_flags = local_flags; @@ -452,28 +995,28 @@ load_shared_file( goto lsf_bailout; } - if (error = - copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) { + if ( (error = copyin(CAST_USER_ADDR_T(mappings), map_list, (map_cnt*sizeof(sf_mapping_t)))) ) { goto lsf_bailout_free; } - if (error = copyinstr(filename, - filename_str, MAXPATHLEN, (size_t *)&dummy)) { + if ( (error = copyinstr(CAST_USER_ADDR_T(filename), filename_str, + MAXPATHLEN, (size_t *)&dummy)) ) { goto lsf_bailout_free; } /* * Get a vnode for the target file */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE, - filename_str, p); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE32, + CAST_USER_ADDR_T(filename_str), &context); if ((error = namei(ndp))) { goto lsf_bailout_free; } - vp = ndp->ni_vp; + nameidone(ndp); + if (vp->v_type != VREG) { error = EINVAL; goto lsf_bailout_free_vput; @@ -481,10 +1024,8 @@ load_shared_file( UBCINFOCHECK("load_shared_file", vp); - if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) { + if ((error = vnode_size(vp, &file_size, &context)) != 0) goto lsf_bailout_free_vput; - } - file_control = ubc_getobject(vp, UBC_HOLDOBJECT); if (file_control == MEMORY_OBJECT_CONTROL_NULL) { @@ -493,7 +1034,7 @@ load_shared_file( } #ifdef notdef - if(vattr.va_size != mapped_file_size) { + if(file_size != mapped_file_size) { error = EINVAL; goto lsf_bailout_free_vput; } @@ -505,7 +1046,9 @@ load_shared_file( /* load alternate regions if the caller has requested. */ /* Note: the new regions are "clean slates" */ if (local_flags & NEW_LOCAL_SHARED_REGIONS) { - error = clone_system_shared_regions(FALSE, ENV_DEFAULT_ROOT); + error = clone_system_shared_regions(FALSE, + TRUE, /* chain_regions */ + ENV_DEFAULT_ROOT); if (error) { goto lsf_bailout_free_vput; } @@ -546,13 +1089,12 @@ load_shared_file( /* We don't want to run out of shared memory */ /* map entries by starting too many private versions */ /* of the shared library structures */ - int error; - if(p->p_flag & P_NOSHLIB) { - error = clone_system_shared_regions(FALSE, ENV_DEFAULT_ROOT); - } else { - error = clone_system_shared_regions(TRUE, ENV_DEFAULT_ROOT); - } - if (error) { + int error2; + + error2 = clone_system_shared_regions(!(p->p_flag & P_NOSHLIB), + TRUE, /* chain_regions */ + ENV_DEFAULT_ROOT); + if (error2) { goto lsf_bailout_free_vput; } local_flags = local_flags & ~NEW_LOCAL_SHARED_REGIONS; @@ -611,7 +1153,7 @@ load_shared_file( error = EINVAL; break; case KERN_INVALID_ADDRESS: - error = EACCES; + error = EFAULT; break; case KERN_PROTECTION_FAILURE: /* save EAUTH for authentication in this */ @@ -637,14 +1179,14 @@ load_shared_file( } else { if(default_regions) local_flags |= SYSTEM_REGION_BACKED; - if(!(error = copyout(&local_flags, flags, sizeof (int)))) { + if(!(error = copyout(&local_flags, CAST_USER_ADDR_T(flags), sizeof (int)))) { error = copyout(&local_base, - base_address, sizeof (caddr_t)); + CAST_USER_ADDR_T(base_address), sizeof (caddr_t)); } } lsf_bailout_free_vput: - vput(vp); + vnode_put(vp); lsf_bailout_free: kmem_free(kernel_map, (vm_offset_t)filename_str, @@ -656,33 +1198,24 @@ lsf_bailout: return error; } -struct reset_shared_file_args { - caddr_t *ba; - int map_cnt; - sf_mapping_t *mappings; -}; - int -reset_shared_file( - struct proc *p, - struct reset_shared_file_args *uap, - register *retval) +reset_shared_file(__unused struct proc *p, struct reset_shared_file_args *uap, + __unused register int *retval) { - caddr_t *base_address=uap->ba; - int map_cnt=uap->map_cnt; - sf_mapping_t *mappings=uap->mappings; + caddr_t *base_address=uap->ba; + int map_cnt=uap->map_cnt; + sf_mapping_t *mappings=uap->mappings; register int error; - kern_return_t kr; - sf_mapping_t *map_list; - caddr_t local_base; - vm_offset_t map_address; - int i; - kern_return_t kret; + sf_mapping_t *map_list; + caddr_t local_base; + vm_offset_t map_address; + int i; + kern_return_t kret; - AUDIT_ARG(addr, base_address); + AUDIT_ARG(addr, CAST_DOWN(user_addr_t, base_address)); /* Retrieve the base address */ - if (error = copyin(base_address, &local_base, sizeof (caddr_t))) { + if ( (error = copyin(CAST_USER_ADDR_T(base_address), &local_base, sizeof (caddr_t))) ) { goto rsf_bailout; } @@ -699,8 +1232,8 @@ reset_shared_file( goto rsf_bailout; } - if (error = - copyin(mappings, map_list, (map_cnt*sizeof(sf_mapping_t)))) { + if ( (error = + copyin(CAST_USER_ADDR_T(mappings), map_list, (map_cnt*sizeof(sf_mapping_t)))) ) { kmem_free(kernel_map, (vm_offset_t)map_list, (vm_size_t)(map_cnt*sizeof(sf_mapping_t))); @@ -715,7 +1248,8 @@ reset_shared_file( map_address, map_list[i].size); vm_map(current_map(), &map_address, - map_list[i].size, 0, SHARED_LIB_ALIAS, + map_list[i].size, 0, + SHARED_LIB_ALIAS | VM_FLAGS_FIXED, shared_data_region_handle, ((unsigned int)local_base & SHARED_DATA_REGION_MASK) + @@ -733,19 +1267,11 @@ rsf_bailout: return error; } -struct new_system_shared_regions_args { - int dummy; -}; - int -new_system_shared_regions( - struct proc *p, - struct new_system_shared_regions_args *uap, - register *retval) +new_system_shared_regions(__unused struct proc *p, + __unused struct new_system_shared_regions_args *uap, + register int *retval) { - shared_region_mapping_t regions; - shared_region_mapping_t new_regions; - if(!(is_suser())) { *retval = EINVAL; return EINVAL; @@ -761,7 +1287,10 @@ new_system_shared_regions( int -clone_system_shared_regions(shared_regions_active, base_vnode) +clone_system_shared_regions( + int shared_regions_active, + int chain_regions, + int base_vnode) { shared_region_mapping_t new_shared_region; shared_region_mapping_t next; @@ -769,8 +1298,6 @@ clone_system_shared_regions(shared_regions_active, base_vnode) struct shared_region_task_mappings old_info; struct shared_region_task_mappings new_info; - struct proc *p; - vm_get_shared_region(current_task(), &old_shared_region); old_info.self = (vm_offset_t)old_shared_region; shared_region_mapping_info(old_shared_region, @@ -827,8 +1354,25 @@ clone_system_shared_regions(shared_regions_active, base_vnode) shared_region_mapping_dealloc(new_shared_region); return(EINVAL); } - shared_region_object_chain_attach( - new_shared_region, old_shared_region); + if (chain_regions) { + /* + * We want a "shadowed" clone, a private superset of the old + * shared region. The info about the old mappings is still + * valid for us. + */ + shared_region_object_chain_attach( + new_shared_region, old_shared_region); + } else { + /* + * We want a completely detached clone with no link to + * the old shared region. We'll be removing some mappings + * in our private, cloned, shared region, so the old mappings + * will become irrelevant to us. Since we have a private + * "shared region" now, it isn't going to be shared with + * anyone else and we won't need to maintain mappings info. + */ + shared_region_object_chain_detached(new_shared_region); + } } if (vm_map_region_replace(current_map(), old_info.text_region, new_info.text_region, old_info.client_base, @@ -850,15 +1394,13 @@ clone_system_shared_regions(shared_regions_active, base_vnode) /* consume the reference which wasn't accounted for in object */ /* chain attach */ - if(!shared_regions_active) + if (!shared_regions_active || !chain_regions) shared_region_mapping_dealloc(old_shared_region); return(0); } -extern vm_map_t bsd_pageable_map; - /* header for the profile name file. The profiled app info is held */ /* in the data file and pointed to by elements in the name file */ @@ -895,10 +1437,23 @@ struct global_profile_cache { struct global_profile profiles[3]; }; +/* forward declarations */ +int bsd_open_page_cache_files(unsigned int user, + struct global_profile **profile); +void bsd_close_page_cache_files(struct global_profile *profile); +int bsd_search_page_cache_data_base( + struct vnode *vp, + struct profile_names_header *database, + char *app_name, + unsigned int mod_date, + unsigned int inode, + off_t *profile, + unsigned int *profile_size); + struct global_profile_cache global_user_profile_cache = - {3, 0, NULL, NULL, NULL, 0, 0, 0, - NULL, NULL, NULL, 0, 0, 0, - NULL, NULL, NULL, 0, 0, 0 }; + {3, 0, {{NULL, NULL, 0, 0, 0, 0}, + {NULL, NULL, 0, 0, 0, 0}, + {NULL, NULL, 0, 0, 0, 0}} }; /* BSD_OPEN_PAGE_CACHE_FILES: */ /* Caller provides a user id. This id was used in */ @@ -914,10 +1469,10 @@ bsd_open_page_cache_files( unsigned int user, struct global_profile **profile) { - char *cache_path = "/var/vm/app_profile/"; + const char *cache_path = "/var/vm/app_profile/"; struct proc *p; int error; - int resid; + vm_size_t resid; off_t resid_off; unsigned int lru; vm_size_t size; @@ -933,19 +1488,21 @@ bsd_open_page_cache_files( char *profile_names_string; char *substring; - struct vattr vattr; + off_t file_size; + struct vfs_context context; - struct profile_names_header *profile_header; kern_return_t ret; struct nameidata nd_names; struct nameidata nd_data; - int i; p = current_proc(); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + restart: for(i = 0; ibusy = 1; (*profile)->age = global_user_profile_cache.age; + + /* + * entries in cache are held with a valid + * usecount... take an iocount which will + * be dropped in "bsd_close_page_cache_files" + * which is called after the read or writes to + * these files are done + */ + if ( (vnode_getwithref((*profile)->data_vp)) ) { + + vnode_rele((*profile)->data_vp); + vnode_rele((*profile)->names_vp); + + (*profile)->data_vp = NULL; + (*profile)->busy = 0; + wakeup(*profile); + + goto restart; + } + if ( (vnode_getwithref((*profile)->names_vp)) ) { + + vnode_put((*profile)->data_vp); + vnode_rele((*profile)->data_vp); + vnode_rele((*profile)->names_vp); + + (*profile)->data_vp = NULL; + (*profile)->busy = 0; + wakeup(*profile); + + goto restart; + } global_user_profile_cache.age+=1; return 0; } @@ -1051,10 +1639,10 @@ restart: kmem_free(kernel_map, (*profile)->buf_ptr, 4 * PAGE_SIZE); if ((*profile)->names_vp) { - vrele((*profile)->names_vp); + vnode_rele((*profile)->names_vp); (*profile)->names_vp = NULL; } - vrele(data_vp); + vnode_rele(data_vp); } /* Try to open the appropriate users profile files */ @@ -1102,10 +1690,11 @@ restart: } NDINIT(&nd_names, LOOKUP, FOLLOW | LOCKLEAF, - UIO_SYSSPACE, profile_names_string, p); + UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context); NDINIT(&nd_data, LOOKUP, FOLLOW | LOCKLEAF, - UIO_SYSSPACE, profile_data_string, p); - if (error = vn_open(&nd_data, FREAD | FWRITE, 0)) { + UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context); + + if ( (error = vn_open(&nd_data, FREAD | FWRITE, 0)) ) { #ifdef notdef printf("bsd_open_page_cache_files: CacheData file not found %s\n", profile_data_string); @@ -1119,18 +1708,19 @@ restart: wakeup(*profile); return error; } - data_vp = nd_data.ni_vp; - VOP_UNLOCK(data_vp, 0, p); - if (error = vn_open(&nd_names, FREAD | FWRITE, 0)) { + if ( (error = vn_open(&nd_names, FREAD | FWRITE, 0)) ) { printf("bsd_open_page_cache_files: NamesData file not found %s\n", profile_data_string); kmem_free(kernel_map, (vm_offset_t)names_buf, 4 * PAGE_SIZE); kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); - vrele(data_vp); + + vnode_rele(data_vp); + vnode_put(data_vp); + (*profile)->data_vp = NULL; (*profile)->busy = 0; wakeup(*profile); @@ -1138,21 +1728,25 @@ restart: } names_vp = nd_names.ni_vp; - if(error = VOP_GETATTR(names_vp, &vattr, p->p_ucred, p)) { + if ((error = vnode_size(names_vp, &file_size, &context)) != 0) { printf("bsd_open_page_cache_files: Can't stat name file %s\n", profile_names_string); kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); kmem_free(kernel_map, (vm_offset_t)names_buf, 4 * PAGE_SIZE); - vput(names_vp); - vrele(data_vp); + + vnode_rele(names_vp); + vnode_put(names_vp); + vnode_rele(data_vp); + vnode_put(data_vp); + (*profile)->data_vp = NULL; (*profile)->busy = 0; wakeup(*profile); return error; } - size = vattr.va_size; + size = file_size; if(size > 4 * PAGE_SIZE) size = 4 * PAGE_SIZE; buf_ptr = names_buf; @@ -1161,7 +1755,7 @@ restart: while(size) { error = vn_rdwr(UIO_READ, names_vp, (caddr_t)buf_ptr, size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, kauth_cred_get(), &resid, p); if((error) || (size == resid)) { if(!error) { error = EINVAL; @@ -1170,8 +1764,12 @@ restart: (vm_offset_t)profile_data_string, PATH_MAX); kmem_free(kernel_map, (vm_offset_t)names_buf, 4 * PAGE_SIZE); - vput(names_vp); - vrele(data_vp); + + vnode_rele(names_vp); + vnode_put(names_vp); + vnode_rele(data_vp); + vnode_put(data_vp); + (*profile)->data_vp = NULL; (*profile)->busy = 0; wakeup(*profile); @@ -1181,12 +1779,16 @@ restart: resid_off += size-resid; size = resid; } - - VOP_UNLOCK(names_vp, 0, p); kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); + (*profile)->names_vp = names_vp; (*profile)->data_vp = data_vp; (*profile)->buf_ptr = names_buf; + + /* + * at this point, the both the names_vp and the data_vp have + * both a valid usecount and an iocount held + */ return 0; } @@ -1195,6 +1797,9 @@ void bsd_close_page_cache_files( struct global_profile *profile) { + vnode_put(profile->data_vp); + vnode_put(profile->names_vp); + profile->busy = 0; wakeup(profile); } @@ -1207,28 +1812,26 @@ bsd_read_page_cache_file( char *app_name, struct vnode *app_vp, vm_offset_t *buffer, - vm_offset_t *buf_size) + vm_offset_t *bufsize) { - boolean_t funnel_state; + boolean_t funnel_state; struct proc *p; int error; - int resid; - vm_size_t size; + unsigned int resid; off_t profile; unsigned int profile_size; vm_offset_t names_buf; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; kern_return_t ret; struct vnode *names_vp; struct vnode *data_vp; - struct vnode *vp1; - struct vnode *vp2; struct global_profile *uid_files; @@ -1253,79 +1856,52 @@ bsd_read_page_cache_file( data_vp = uid_files->data_vp; names_buf = uid_files->buf_ptr; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); - /* - * Get locks on both files, get the vnode with the lowest address first - */ - - if((unsigned int)names_vp < (unsigned int)data_vp) { - vp1 = names_vp; - vp2 = data_vp; - } else { - vp1 = data_vp; - vp2 = names_vp; - } - error = vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_read_page_cache_file: Can't lock profile names %x\n", user); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; - } - error = vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_read_page_cache_file: Can't lock profile data %x\n", user); - VOP_UNLOCK(vp1, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; - } - - if(error = VOP_GETATTR(app_vp, &vattr, p->p_ucred, p)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); + VATTR_INIT(&va); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_modify_time); + + if ((error = vnode_getattr(app_vp, &va, &context))) { printf("bsd_read_cache_file: Can't stat app file %s\n", app_name); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return error; } - *fid = vattr.va_fileid; - *mod = vattr.va_mtime.tv_sec; + *fid = (u_long)va.va_fileid; + *mod = va.va_modify_time.tv_sec; - - if (bsd_search_page_cache_data_base(names_vp, names_buf, app_name, - (unsigned int) vattr.va_mtime.tv_sec, - vattr.va_fileid, &profile, &profile_size) == 0) { + if (bsd_search_page_cache_data_base( + names_vp, + (struct profile_names_header *)names_buf, + app_name, + (unsigned int) va.va_modify_time.tv_sec, + (u_long)va.va_fileid, &profile, &profile_size) == 0) { /* profile is an offset in the profile data base */ /* It is zero if no profile data was found */ if(profile_size == 0) { - *buffer = NULL; - *buf_size = 0; - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); + *buffer = 0; + *bufsize = 0; bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return 0; } ret = (vm_offset_t)(kmem_alloc(kernel_map, buffer, profile_size)); if(ret) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return ENOMEM; } - *buf_size = profile_size; + *bufsize = profile_size; while(profile_size) { error = vn_rdwr(UIO_READ, data_vp, (caddr_t) *buffer, profile_size, - profile, UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); + profile, UIO_SYSSPACE32, IO_NODELOCKED, + kauth_cred_get(), &resid, p); if((error) || (profile_size == resid)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); kmem_free(kernel_map, (vm_offset_t)*buffer, profile_size); thread_funnel_set(kernel_flock, funnel_state); @@ -1334,14 +1910,10 @@ bsd_read_page_cache_file( profile += profile_size - resid; profile_size = resid; } - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return 0; } else { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return EINVAL; @@ -1369,8 +1941,8 @@ bsd_search_page_cache_data_base( off_t file_off = 0; unsigned int size; off_t resid_off; - int resid; - vm_offset_t local_buf = NULL; + unsigned int resid; + vm_offset_t local_buf = 0; int error; kern_return_t ret; @@ -1413,9 +1985,8 @@ bsd_search_page_cache_data_base( if(strncmp(element[i].name, app_name, 12) == 0) { *profile = element[i].addr; *profile_size = element[i].size; - if(local_buf != NULL) { - kmem_free(kernel_map, - (vm_offset_t)local_buf, 4 * PAGE_SIZE); + if(local_buf != 0) { + kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE); } return 0; } @@ -1423,9 +1994,8 @@ bsd_search_page_cache_data_base( } if(extended_list == 0) break; - if(local_buf == NULL) { - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&local_buf, 4 * PAGE_SIZE); + if(local_buf == 0) { + ret = kmem_alloc(kernel_map, &local_buf, 4 * PAGE_SIZE); if(ret != KERN_SUCCESS) { return ENOMEM; } @@ -1444,13 +2014,11 @@ bsd_search_page_cache_data_base( while(size) { error = vn_rdwr(UIO_READ, vp, CAST_DOWN(caddr_t, (local_buf + resid_off)), - size, file_off + resid_off, UIO_SYSSPACE, - IO_NODELOCKED, p->p_ucred, &resid, p); + size, file_off + resid_off, UIO_SYSSPACE32, + IO_NODELOCKED, kauth_cred_get(), &resid, p); if((error) || (size == resid)) { - if(local_buf != NULL) { - kmem_free(kernel_map, - (vm_offset_t)local_buf, - 4 * PAGE_SIZE); + if(local_buf != 0) { + kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE); } return EINVAL; } @@ -1458,9 +2026,8 @@ bsd_search_page_cache_data_base( size = resid; } } - if(local_buf != NULL) { - kmem_free(kernel_map, - (vm_offset_t)local_buf, 4 * PAGE_SIZE); + if(local_buf != 0) { + kmem_free(kernel_map, local_buf, 4 * PAGE_SIZE); } return 0; } @@ -1475,34 +2042,26 @@ bsd_write_page_cache_file( int fid) { struct proc *p; - struct nameidata nd; - struct vnode *vp = 0; - int resid; + int resid; off_t resid_off; - int error; + int error; boolean_t funnel_state; - struct vattr vattr; - struct vattr data_vattr; - - off_t profile; - unsigned int profile_size; + off_t file_size; + struct vfs_context context; + off_t profile; + unsigned int profile_size; vm_offset_t names_buf; struct vnode *names_vp; struct vnode *data_vp; - struct vnode *vp1; - struct vnode *vp2; - struct profile_names_header *profile_header; off_t name_offset; - struct global_profile *uid_files; funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = bsd_open_page_cache_files(user, &uid_files); if(error) { thread_funnel_set(kernel_flock, funnel_state); @@ -1515,39 +2074,12 @@ bsd_write_page_cache_file( data_vp = uid_files->data_vp; names_buf = uid_files->buf_ptr; - /* - * Get locks on both files, get the vnode with the lowest address first - */ - - if((unsigned int)names_vp < (unsigned int)data_vp) { - vp1 = names_vp; - vp2 = data_vp; - } else { - vp1 = data_vp; - vp2 = names_vp; - } - - error = vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_write_page_cache_file: Can't lock profile names %x\n", user); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; - } - error = vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - printf("bsd_write_page_cache_file: Can't lock profile data %x\n", user); - VOP_UNLOCK(vp1, 0, p); - bsd_close_page_cache_files(uid_files); - thread_funnel_set(kernel_flock, funnel_state); - return error; - } - /* Stat data file for size */ - if(error = VOP_GETATTR(data_vp, &data_vattr, p->p_ucred, p)) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + + if ((error = vnode_size(data_vp, &file_size, &context)) != 0) { printf("bsd_write_page_cache_file: Can't stat profile data %s\n", file_name); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); @@ -1580,7 +2112,7 @@ bsd_write_page_cache_file( /* write new entry */ name = (struct profile_element *) (names_buf + (vm_offset_t)name_offset); - name->addr = data_vattr.va_size; + name->addr = file_size; name->size = size; name->mod_date = mod; name->inode = fid; @@ -1589,7 +2121,7 @@ bsd_write_page_cache_file( unsigned int ele_size; struct profile_element name; /* write new entry */ - name.addr = data_vattr.va_size; + name.addr = file_size; name.size = size; name.mod_date = mod; name.inode = fid; @@ -1603,12 +2135,10 @@ bsd_write_page_cache_file( error = vn_rdwr(UIO_WRITE, names_vp, (caddr_t)buf_ptr, ele_size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, + kauth_cred_get(), &resid, p); if(error) { printf("bsd_write_page_cache_file: Can't write name_element %x\n", user); - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files( uid_files); thread_funnel_set( @@ -1639,11 +2169,9 @@ bsd_write_page_cache_file( error = vn_rdwr(UIO_WRITE, names_vp, (caddr_t)buf_ptr, header_size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, + kauth_cred_get(), &resid, p); if(error) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); printf("bsd_write_page_cache_file: Can't write header %x\n", user); bsd_close_page_cache_files( uid_files); @@ -1656,15 +2184,13 @@ bsd_write_page_cache_file( header_size = resid; } /* write profile to data file */ - resid_off = data_vattr.va_size; + resid_off = file_size; while(size) { error = vn_rdwr(UIO_WRITE, data_vp, (caddr_t)buffer, size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, + kauth_cred_get(), &resid, p); if(error) { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); printf("bsd_write_page_cache_file: Can't write header %x\n", user); bsd_close_page_cache_files( uid_files); @@ -1676,21 +2202,15 @@ bsd_write_page_cache_file( resid_off += size-resid; size = resid; } - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return 0; } /* Someone else wrote a twin profile before us */ - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return 0; } else { - VOP_UNLOCK(names_vp, 0, p); - VOP_UNLOCK(data_vp, 0, p); bsd_close_page_cache_files(uid_files); thread_funnel_set(kernel_flock, funnel_state); return EINVAL; @@ -1701,12 +2221,11 @@ bsd_write_page_cache_file( int prepare_profile_database(int user) { - char *cache_path = "/var/vm/app_profile/"; + const char *cache_path = "/var/vm/app_profile/"; struct proc *p; int error; int resid; off_t resid_off; - unsigned int lru; vm_size_t size; struct vnode *names_vp; @@ -1720,7 +2239,8 @@ prepare_profile_database(int user) char *profile_names_string; char *substring; - struct vattr vattr; + struct vnode_attr va; + struct vfs_context context; struct profile_names_header *profile_header; kern_return_t ret; @@ -1728,10 +2248,11 @@ prepare_profile_database(int user) struct nameidata nd_names; struct nameidata nd_data; - int i; - p = current_proc(); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); + ret = kmem_alloc(kernel_map, (vm_offset_t *)&profile_data_string, PATH_MAX); @@ -1765,37 +2286,37 @@ prepare_profile_database(int user) } NDINIT(&nd_names, LOOKUP, FOLLOW, - UIO_SYSSPACE, profile_names_string, p); + UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_names_string), &context); NDINIT(&nd_data, LOOKUP, FOLLOW, - UIO_SYSSPACE, profile_data_string, p); + UIO_SYSSPACE32, CAST_USER_ADDR_T(profile_data_string), &context); - if (error = vn_open(&nd_data, - O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) { + if ( (error = vn_open(&nd_data, + O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) { kmem_free(kernel_map, (vm_offset_t)names_buf, 4 * PAGE_SIZE); kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); + return 0; } - data_vp = nd_data.ni_vp; - VOP_UNLOCK(data_vp, 0, p); - if (error = vn_open(&nd_names, - O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) { + if ( (error = vn_open(&nd_names, + O_CREAT | O_EXCL | FWRITE, S_IRUSR|S_IWUSR)) ) { printf("prepare_profile_database: Can't create CacheNames %s\n", profile_data_string); kmem_free(kernel_map, (vm_offset_t)names_buf, 4 * PAGE_SIZE); kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); - vrele(data_vp); + + vnode_rele(data_vp); + vnode_put(data_vp); + return error; } - names_vp = nd_names.ni_vp; - /* Write Header for new names file */ profile_header = (struct profile_names_header *)names_buf; @@ -1816,8 +2337,8 @@ prepare_profile_database(int user) while(size) { error = vn_rdwr(UIO_WRITE, names_vp, (caddr_t)buf_ptr, size, resid_off, - UIO_SYSSPACE, IO_NODELOCKED, - p->p_ucred, &resid, p); + UIO_SYSSPACE32, IO_NODELOCKED, + kauth_cred_get(), &resid, p); if(error) { printf("prepare_profile_database: Can't write header %s\n", profile_names_string); kmem_free(kernel_map, @@ -1825,43 +2346,39 @@ prepare_profile_database(int user) kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); - vput(names_vp); - vrele(data_vp); + + vnode_rele(names_vp); + vnode_put(names_vp); + vnode_rele(data_vp); + vnode_put(data_vp); + return error; } buf_ptr += size-resid; resid_off += size-resid; size = resid; } + VATTR_INIT(&va); + VATTR_SET(&va, va_uid, user); - VATTR_NULL(&vattr); - vattr.va_uid = user; - error = VOP_SETATTR(names_vp, &vattr, p->p_cred->pc_ucred, p); + error = vnode_setattr(names_vp, &va, &context); if(error) { printf("prepare_profile_database: " "Can't set user %s\n", profile_names_string); } - vput(names_vp); + vnode_rele(names_vp); + vnode_put(names_vp); - error = vn_lock(data_vp, LK_EXCLUSIVE | LK_RETRY, p); - if(error) { - vrele(data_vp); - printf("prepare_profile_database: cannot lock data file %s\n", - profile_data_string); - kmem_free(kernel_map, - (vm_offset_t)profile_data_string, PATH_MAX); - kmem_free(kernel_map, - (vm_offset_t)names_buf, 4 * PAGE_SIZE); - } - VATTR_NULL(&vattr); - vattr.va_uid = user; - error = VOP_SETATTR(data_vp, &vattr, p->p_cred->pc_ucred, p); + VATTR_INIT(&va); + VATTR_SET(&va, va_uid, user); + error = vnode_setattr(data_vp, &va, &context); if(error) { printf("prepare_profile_database: " "Can't set user %s\n", profile_data_string); } - - vput(data_vp); + vnode_rele(data_vp); + vnode_put(data_vp); + kmem_free(kernel_map, (vm_offset_t)profile_data_string, PATH_MAX); kmem_free(kernel_map, diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index f7d6e0a83..a1f4079dd 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,13 +36,15 @@ #include #include #include +#include #include +#include #include #include -#include +#include #include -#include -#include +#include /* needs internal due to fhandle_t */ +#include #include #include @@ -59,6 +61,13 @@ #include #include +#include + +#include +#include +#include + +#include unsigned int vp_pagein=0; unsigned int vp_pgodirty=0; @@ -69,12 +78,8 @@ unsigned int dp_pgins=0; /* Default pager pageins */ vm_object_offset_t vnode_pager_get_filesize(struct vnode *vp) { - if (UBCINVALID(vp)) { - return (vm_object_offset_t) 0; - } return (vm_object_offset_t) ubc_getsize(vp); - } pager_return_t @@ -86,98 +91,136 @@ vnode_pageout(struct vnode *vp, int flags, int *errorp) { - int result = PAGER_SUCCESS; struct proc *p = current_proc(); + int result = PAGER_SUCCESS; int error = 0; - int blkno=0, s; - int cnt, isize; + int error_ret = 0; + daddr64_t blkno; + int isize; int pg_index; + int base_index; int offset; - struct buf *bp; - boolean_t funnel_state; upl_page_info_t *pl; - upl_t vpupl = NULL; + struct vfs_context context; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); isize = (int)size; if (isize <= 0) { - result = error = PAGER_ERROR; + result = PAGER_ERROR; + error_ret = EINVAL; goto out; } UBCINFOCHECK("vnode_pageout", vp); if (UBCINVALID(vp)) { - result = error = PAGER_ERROR; + result = PAGER_ERROR; + error_ret = EINVAL; if (upl && !(flags & UPL_NOCOMMIT)) ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY); goto out; } - if (upl) { + if ( !(flags & UPL_VNODE_PAGER)) { /* - * This is a pageout from the Default pager, - * just go ahead and call VOP_PAGEOUT + * This is a pageout from the default pager, + * just go ahead and call vnop_pageout since + * it has already sorted out the dirty ranges */ dp_pgouts++; KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, size, 1, 0, 0, 0); - if (error = VOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset, - (size_t)size, p->p_ucred, flags)) - result = error = PAGER_ERROR; + if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset, + (size_t)size, flags, &context)) ) + result = PAGER_ERROR; KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, size, 1, 0, 0, 0); goto out; } - ubc_create_upl(vp, f_offset, isize, &vpupl, &pl, UPL_FOR_PAGEOUT | UPL_COPYOUT_FROM | UPL_SET_LITE); - - if (vpupl == (upl_t) 0) { - result = error = PAGER_ABSENT; - goto out; - } /* - * if we get here, we've created the upl and - * are responsible for commiting/aborting it - * regardless of what the caller has passed in + * we come here for pageouts to 'real' files and + * for msyncs... the upl may not contain any + * dirty pages.. it's our responsibility to sort + * through it and find the 'runs' of dirty pages + * to call VNOP_PAGEOUT on... */ - flags &= ~UPL_NOCOMMIT; + pl = ubc_upl_pageinfo(upl); if (ubc_getsize(vp) == 0) { - for (offset = 0; isize; isize -= PAGE_SIZE, - offset += PAGE_SIZE) { - blkno = ubc_offtoblk(vp, (off_t)f_offset); - f_offset += PAGE_SIZE; - if ((bp = incore(vp, blkno)) && - ISSET(bp->b_flags, B_BUSY)) { - ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, - UPL_ABORT_FREE_ON_EMPTY); - result = error = PAGER_ERROR; - continue; - } else if (bp) { - bremfree(bp); - SET(bp->b_flags, B_BUSY | B_INVAL); - brelse(bp); + /* + * if the file has been effectively deleted, then + * we need to go through the UPL and invalidate any + * buffer headers we might have that reference any + * of it's pages + */ + for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) { +#if NFSCLIENT + if (vp->v_tag == VT_NFS) + /* check with nfs if page is OK to drop */ + error = nfs_buf_page_inval(vp, (off_t)f_offset); + else +#endif + { + blkno = ubc_offtoblk(vp, (off_t)f_offset); + error = buf_invalblkno(vp, blkno, 0); + } + if (error) { + if ( !(flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); + if (error_ret == 0) + error_ret = error; + result = PAGER_ERROR; + + } else if ( !(flags & UPL_NOCOMMIT)) { + ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY); } - ubc_upl_commit_range(vpupl, offset, PAGE_SIZE, - UPL_COMMIT_FREE_ON_EMPTY); + f_offset += PAGE_SIZE; } goto out; } - pg_index = 0; - offset = 0; + /* + * Ignore any non-present pages at the end of the + * UPL so that we aren't looking at a upl that + * may already have been freed by the preceeding + * aborts/completions. + */ + base_index = upl_offset / PAGE_SIZE; + + for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) { + if (upl_page_present(pl, --pg_index)) + break; + if (pg_index == base_index) { + /* + * no pages were returned, so release + * our hold on the upl and leave + */ + if ( !(flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY); + + goto out; + } + } + isize = (pg_index + 1) * PAGE_SIZE; + + offset = upl_offset; + pg_index = base_index; while (isize) { int xsize; int num_of_pages; - if ( !upl_valid_page(pl, pg_index)) { - ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, - UPL_ABORT_FREE_ON_EMPTY); + if ( !upl_page_present(pl, pg_index)) { + /* + * we asked for RET_ONLY_DIRTY, so it's possible + * to get back empty slots in the UPL + * just skip over them + */ offset += PAGE_SIZE; isize -= PAGE_SIZE; pg_index++; @@ -192,45 +235,31 @@ vnode_pageout(struct vnode *vp, * We also get here from vm_object_terminate() * So all you need to do in these * cases is to invalidate incore buffer if it is there - * Note we must not sleep here if B_BUSY - that is + * Note we must not sleep here if the buffer is busy - that is * a lock inversion which causes deadlock. */ - blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset)); - s = splbio(); vp_pgoclean++; - if (vp->v_tag == VT_NFS) { + +#if NFSCLIENT + if (vp->v_tag == VT_NFS) /* check with nfs if page is OK to drop */ error = nfs_buf_page_inval(vp, (off_t)(f_offset + offset)); - splx(s); - if (error) { - ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, - UPL_ABORT_FREE_ON_EMPTY); - result = error = PAGER_ERROR; - offset += PAGE_SIZE; - isize -= PAGE_SIZE; - pg_index++; - continue; - } - } else if ((bp = incore(vp, blkno)) && - ISSET(bp->b_flags, B_BUSY | B_NEEDCOMMIT)) { - splx(s); - ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, - UPL_ABORT_FREE_ON_EMPTY); - result = error = PAGER_ERROR; - offset += PAGE_SIZE; - isize -= PAGE_SIZE; - pg_index++; - continue; - } else if (bp) { - bremfree(bp); - SET(bp->b_flags, B_BUSY | B_INVAL ); - splx(s); - brelse(bp); - } else - splx(s); - - ubc_upl_commit_range(vpupl, offset, PAGE_SIZE, - UPL_COMMIT_FREE_ON_EMPTY); + else +#endif + { + blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset)); + error = buf_invalblkno(vp, blkno, 0); + } + if (error) { + if ( !(flags & UPL_NOCOMMIT)) + ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); + if (error_ret == 0) + error_ret = error; + result = PAGER_ERROR; + + } else if ( !(flags & UPL_NOCOMMIT)) { + ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY); + } offset += PAGE_SIZE; isize -= PAGE_SIZE; pg_index++; @@ -243,8 +272,6 @@ vnode_pageout(struct vnode *vp, xsize = isize - PAGE_SIZE; while (xsize) { - if ( !upl_valid_page(pl, pg_index + num_of_pages)) - break; if ( !upl_dirty_page(pl, pg_index + num_of_pages)) break; num_of_pages++; @@ -253,13 +280,15 @@ vnode_pageout(struct vnode *vp, xsize = num_of_pages * PAGE_SIZE; KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, - xsize, 0, 0, 0, 0); + xsize, (int)(f_offset + offset), 0, 0, 0); - if (error = VOP_PAGEOUT(vp, vpupl, (vm_offset_t)offset, + if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset, (off_t)(f_offset + offset), xsize, - p->p_ucred, flags)) - result = error = PAGER_ERROR; - + flags, &context)) ) { + if (error_ret == 0) + error_ret = error; + result = PAGER_ERROR; + } KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, xsize, 0, 0, 0, 0); @@ -269,14 +298,14 @@ vnode_pageout(struct vnode *vp, } out: if (errorp) - *errorp = result; - - thread_funnel_set(kernel_flock, funnel_state); + *errorp = error_ret; - return (error); + return (result); } +void IOSleep(int); + pager_return_t vnode_pagein( struct vnode *vp, @@ -288,20 +317,17 @@ vnode_pagein( int *errorp) { struct proc *p = current_proc(); + struct uthread *ut; upl_page_info_t *pl; int result = PAGER_SUCCESS; int error = 0; - int xfer_size; int pages_in_upl; int start_pg; int last_pg; int first_pg; int xsize; int abort_needed = 1; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); UBCINFOCHECK("vnode_pagein", vp); @@ -414,14 +440,16 @@ vnode_pagein( } if (last_pg > start_pg) { int xoff; + struct vfs_context context; + context.vc_proc = p; + context.vc_ucred = kauth_cred_get(); xsize = (last_pg - start_pg) * PAGE_SIZE; xoff = start_pg * PAGE_SIZE; - if (error = VOP_PAGEIN(vp, upl, (vm_offset_t) xoff, + if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff, (off_t)f_offset + xoff, - xsize, p->p_ucred, - flags)) { + xsize, flags, &context)) ) { result = PAGER_ERROR; error = PAGER_ERROR; @@ -434,28 +462,36 @@ vnode_pagein( out: if (errorp) *errorp = result; - thread_funnel_set(kernel_flock, funnel_state); + ut = get_bsdthread_info(current_thread()); + + if (ut->uu_lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(ut->uu_lowpri_delay); + ut->uu_lowpri_delay = 0; + } return (error); } void -vnode_pager_shutdown() +vnode_pager_shutdown(void) { int i; - extern struct bs_map bs_port_table[]; - struct vnode *vp; + vnode_t vp; for(i = 0; i < MAX_BACKING_STORE; i++) { - vp = (struct vnode *)(bs_port_table[i]).vp; + vp = (vnode_t)(bs_port_table[i]).vp; if (vp) { (bs_port_table[i]).vp = 0; - ubc_rele(vp); - /* get rid of macx_swapon() namei() reference */ - vrele(vp); - /* get rid of macx_swapon() "extra" reference */ - vrele(vp); + /* get rid of macx_swapon() reference */ + vnode_rele(vp); } } } diff --git a/bsd/vm/vnode_pager.h b/bsd/vm/vnode_pager.h index 45b21e9fb..7aea26deb 100644 --- a/bsd/vm/vnode_pager.h +++ b/bsd/vm/vnode_pager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,13 +34,12 @@ #include #ifdef KERNEL - #include +#include +#include #include -vm_pager_t vnode_pager_setup(); -boolean_t vnode_has_page(); -boolean_t vnode_pager_active(); +vm_pager_t vnode_pager_setup(struct vnode *, memory_object_t); /* * Vstructs are the internal (to us) description of a unit of backing store. @@ -89,6 +88,7 @@ struct bs_map { struct vnode *vp; void *bs; }; +extern struct bs_map bs_port_table[]; @@ -123,11 +123,15 @@ typedef struct vstruct { #define VNODE_PAGER_NULL ((vnode_pager_t) 0) +pager_return_t vnode_pagein(struct vnode *, upl_t, + upl_offset_t, vm_object_offset_t, + upl_size_t, int, int *); +pager_return_t vnode_pageout(struct vnode *, upl_t, + upl_offset_t, vm_object_offset_t, + upl_size_t, int, int *); -pager_return_t pager_vnode_pagein(); -pager_return_t pager_vnode_pageout(); -pager_return_t vnode_pagein(); -pager_return_t vnode_pageout(); +extern vm_object_offset_t vnode_pager_get_filesize( + struct vnode *vp); #endif /* KERNEL */ diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index 19deae2d6..0188659d7 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -1,325 +1,94 @@ -_BF_encrypt -_BF_set_key -_BestBlockSizeFit -_CURSIG -_ConvertUnicodeToUTF8Mangled -_DebugStr -_DisposePtr -_FastRelString -_FastUnicodeCompare -_GetEmbeddedFileID -_GetLogicalBlockSize -_GetTimeUTC -_LocalToUTC -_MAXNBUF -_MCFail -_MD5Final -_MD5Init -_MD5Pad -_MD5Transform -_MD5Update -_MDFail -_MPFail -_MacToVFSError -_NewPtr -_NewPtrSysClear -_PreliminarySetup -_RandomULong -_SHA1Final -_SHA1Init -_SHA1Transform -_SHA1Update -_SHA256_Data -_SHA256_End -_SHA256_Final -_SHA256_Init -_SHA256_Transform -_SHA256_Update -_SHA384_Data -_SHA384_End -_SHA384_Final -_SHA384_Init -_SHA384_Update -_SHA512_Data -_SHA512_End -_SHA512_Final -_SHA512_Init -_SHA512_Last -_SHA512_Transform -_SHA512_Update -_UTCToLocal +_VNOP_BWRITE +_VNOP_FSYNC +_VNOP_IOCTL +_VNOP_READ +_VNOP_STRATEGY +_VNOP_WRITE __FREE __FREE_ZONE __MALLOC __MALLOC_ZONE -___sysctl -__dist_code -__length_code -__printf -__tr_align -__tr_flush_block -__tr_init -__tr_stored_block -__tr_tally -_accept -_access -_acct -_acct_process -_acctchkfreq -_acctp -_acctresume -_acctsuspend -_acctwatch -_acctwatch_funnel -_add_name -_add_pcbuffer -_add_profil -_add_to_time_wait -_addlog -_addupc_task -_adjtime -_adler32 -_advisory_read -_age_is_stale -_ah4_calccksum -_ah4_input -_ah4_output -_ah6_calccksum -_ah6_ctlinput -_ah6_input -_ah6_output -_ah_algorithm_lookup -_ah_hdrlen -_ah_hdrsiz -_alert -_alert_done -_allocbuf -_allproc -_app_profile -_apple_hwcksum_rx -_apple_hwcksum_tx -_around -_arp_ifinit -_arp_rtrequest -_arpintr -_arpintrq -_arpresolve -_arpwhohas -_at_ether_input -_attrcalcsize -_averunnable -_b_to_q -_badport_bandlim -_bawrite +_bpfattach _bcd2bin_data -_bdevsw _bdevsw_add _bdevsw_isfree _bdevsw_remove -_bdevvp -_bdwrite -_bflushq -_bin2bcd_data -_bind -_biodone -_biowait -_blaundrycnt -_block_procsigmask -_boot -_boothowto -_bootp -_boottime -_both -_bpf_filter -_bpf_init -_bpf_mtap -_bpf_tap -_bpf_tap_callback -_bpf_validate -_bpfattach -_bpfclose -_bpfdetach -_bpfioctl -_bpfopen -_bpfpoll -_bpfread -_bpfwrite -_branch_tracing_enabled -_bread -_breada -_breadn -_brelse -_bremfree -_bs_port_table -_bsd_ast -_bsd_autoconf -_bsd_bufferinit -_bsd_close_page_cache_files -_bsd_hardclock -_bsd_hardclockinit -_bsd_init -_bsd_open_page_cache_files -_bsd_osrelease -_bsd_ostype -_bsd_pageable_map -_bsd_read_page_cache_file -_bsd_search_page_cache_data_base -_bsd_startupearly -_bsd_uprofil -_bsd_version -_bsd_version_major -_bsd_version_minor -_bsd_version_variant -_bsd_write_page_cache_file -_bsdinit_task -_buf -_bufferhdr_map -_bufhash -_bufhashlist_slock -_bufhashtbl -_bufqlim -_bufqscanwait -_bufqueues -_bufstats -_busyprt -_bwillwrite -_bwrite -_byte_swap_cgin -_byte_swap_cgout -_byte_swap_csum -_byte_swap_dir_block_in -_byte_swap_dir_block_out -_byte_swap_dir_out -_byte_swap_direct -_byte_swap_dirtemplate_in -_byte_swap_inode_in -_byte_swap_inode_out -_byte_swap_ints -_byte_swap_longlongs -_byte_swap_minidir_in -_byte_swap_sbin -_byte_swap_sbout -_byte_swap_shorts +_bsd_timeout +_bsd_untimeout +_buf_alloc +_buf_bawrite +_buf_bdwrite +_buf_biodone +_buf_biowait +_buf_blkno +_buf_bread +_buf_breadn +_buf_brelse +_buf_bwrite +_buf_callback +_buf_clear +_buf_clearflags +_buf_count +_buf_dataptr +_buf_device +_buf_dirtyend +_buf_dirtyoff +_buf_drvdata +_buf_error +_buf_flags +_buf_flushdirtyblks +_buf_free +_buf_fsprivate +_buf_fromcache +_buf_getblk +_buf_geteblk +_buf_invalblkno +_buf_invalidateblks +_buf_iterate +_buf_lblkno +_buf_map +_buf_markaged +_buf_markdelayed +_buf_markeintr +_buf_markinvalid +_buf_meta_bread +_buf_meta_breadn +_buf_proc +_buf_rcred +_buf_reset +_buf_resid +_buf_setblkno +_buf_setcallback +_buf_setcount +_buf_setdataptr +_buf_setdevice +_buf_setdirtyend +_buf_setdirtyoff +_buf_setdrvdata +_buf_seterror +_buf_setflags +_buf_setfsprivate +_buf_setlblkno +_buf_setresid +_buf_setsize +_buf_setupl +_buf_setvnode +_buf_size +_buf_strategy +_buf_unmap +_buf_upl +_buf_uploffset +_buf_valid +_buf_vnode +_buf_wcred _cache_enter _cache_lookup _cache_purge -_cache_purgevfs -_cached_sock_alloc -_cached_sock_count -_cached_sock_free -_calcru -_callout -_cansignal -_cast128_decrypt_round12 -_cast128_decrypt_round16 -_cast128_encrypt_round12 -_cast128_encrypt_round16 -_catq -_cd9660_access -_cd9660_blkatoff -_cd9660_blktooff -_cd9660_bmap -_cd9660_cdxaop_entries -_cd9660_cdxaop_opv_desc -_cd9660_cdxaop_p -_cd9660_close -_cd9660_cmap -_cd9660_defattr -_cd9660_deftstamp -_cd9660_enotsupp -_cd9660_fhtovp -_cd9660_fifoop_entries -_cd9660_fifoop_opv_desc -_cd9660_fifoop_p -_cd9660_getattr -_cd9660_getattrlist -_cd9660_ihashget -_cd9660_ihashins -_cd9660_ihashrem -_cd9660_inactive -_cd9660_init -_cd9660_ioctl -_cd9660_islocked -_cd9660_lock -_cd9660_lookup -_cd9660_mmap -_cd9660_mount -_cd9660_mountroot -_cd9660_offtoblk -_cd9660_open -_cd9660_pagein -_cd9660_pathconf -_cd9660_print -_cd9660_quotactl -_cd9660_read -_cd9660_readdir -_cd9660_readlink -_cd9660_reclaim -_cd9660_remove -_cd9660_rmdir -_cd9660_root -_cd9660_rrip_analyze -_cd9660_rrip_getname -_cd9660_rrip_getsymname -_cd9660_rrip_offset -_cd9660_seek -_cd9660_select -_cd9660_specop_entries -_cd9660_specop_opv_desc -_cd9660_specop_p -_cd9660_start -_cd9660_statfs -_cd9660_strategy -_cd9660_sync -_cd9660_sysctl -_cd9660_tstamp_conv17 -_cd9660_tstamp_conv7 -_cd9660_unlock -_cd9660_unmount -_cd9660_vfsops -_cd9660_vget -_cd9660_vget_internal -_cd9660_vnodeop_entries -_cd9660_vnodeop_opv_desc -_cd9660_vnodeop_p -_cd9660_vptofh -_cd9660_xa_read -_cdevsw +_cache_purge_negatives _cdevsw_add _cdevsw_add_with_bdev _cdevsw_isfree _cdevsw_remove -_cfree -_cfreecount -_cfreelist -_chdir -_check_cpu_subtype -_check_exec_access -_check_routeselfref -_checkalias -_checkuseraccess -_chflags -_chgproccnt -_chkdq -_chkdqchg -_chkiq -_chkiqchg -_chkvnlock -_chmod -_chown -_chroot -_chrtoblk -_chrtoblk_set -_cinit -_cjk_encoding -_cjk_lastunique -_clalloc -_clear_procsiglist -_clfree -_clone_system_shared_regions -_close -_closef -_clrbits _cluster_bp _cluster_copy_ubc_data _cluster_copy_upl_data @@ -327,278 +96,31 @@ _cluster_pagein _cluster_pageout _cluster_push _cluster_read -_cluster_release _cluster_write -_cmask -_cnodehash -_cnodehashtbl -_collectth_state -_comp_add_data -_comp_end -_comp_get_ratio -_comp_init -_compute_averunnable -_concat_domain -_connect -_cons -_cons_cinput -_console_user -_constty -_copyfile -_copyright +_cluster_zero _copystr -_copywithin -_coredump -_count_busy_buffers -_count_lock_queue -_crcmp -_crcopy -_crdup -_create_unix_stack -_cred0 -_crfree -_crget -_ctl_attach -_ctl_connect -_ctl_ctloutput _ctl_deregister -_ctl_disconnect _ctl_enqueuedata _ctl_enqueuembuf -_ctl_find -_ctl_head -_ctl_ioctl -_ctl_post_msg +_ctl_getenqueuespace _ctl_register -_ctl_send -_ctl_usrreqs -_ctlsw -_cttyioctl -_cttyopen -_cttyread -_cttyselect -_cttywrite -_cur_tw_slot _current_proc -_current_proc_EXTERNAL -_cvtstat -_dead_badop -_dead_blktooff -_dead_bmap -_dead_cmap -_dead_ebadf -_dead_ioctl -_dead_lock -_dead_lookup -_dead_nullop -_dead_offtoblk -_dead_open -_dead_print -_dead_read -_dead_select -_dead_strategy -_dead_vnodeop_entries -_dead_vnodeop_opv_desc -_dead_vnodeop_p -_dead_write -_def_tbuffer_size -_default_pager_init_flag -_deflate -_deflateCopy -_deflateEnd -_deflateInit2_ -_deflateInit_ -_deflateParams -_deflateReset -_deflateSetDictionary -_deflate_copyright -_defrouter_addreq -_defrouter_delreq -_defrouter_lookup -_defrouter_select -_defrtrlist_del -_delack_bitmask -_delete -_delete_each_prefix -_des_SPtrans -_des_check_key -_des_check_key_parity -_des_decrypt3 -_des_ecb3_encrypt -_des_ecb_encrypt -_des_encrypt1 -_des_encrypt2 -_des_encrypt3 -_des_fixup_key_parity -_des_is_weak_key -_des_key_sched -_des_options -_des_set_key -_des_set_key_checked -_des_set_key_unchecked -_des_set_odd_parity -_desireddquot _desiredvnodes -_dest6_input -_dev_add_entry -_dev_add_name -_dev_add_node -_dev_dup_entry -_dev_dup_plane -_dev_finddir -_dev_findname -_dev_free_hier -_dev_free_name -_dev_root -_devcls -_devfs_checkpath -_devfs_dn_free -_devfs_dntovn -_devfs_free_plane -_devfs_kernel_mount -_devfs_lock -_devfs_make_link _devfs_make_node -_devfs_mknod -_devfs_mount -_devfs_propogate _devfs_remove -_devfs_sinit -_devfs_spec_vnodeop_opv_desc -_devfs_spec_vnodeop_p -_devfs_stats -_devfs_update -_devfs_vfsops -_devfs_vnodeop_opv_desc -_devfs_vnodeop_p -_devin -_devio -_devioc -_devnode_free -_devopn -_devout -_devwait -_dhcpol_add -_dhcpol_concat -_dhcpol_count -_dhcpol_element -_dhcpol_find -_dhcpol_free -_dhcpol_get -_dhcpol_init -_dhcpol_parse_buffer -_dhcpol_parse_packet -_dhcpol_parse_vendor -_dirchk -_disableConsoleOutput -_disable_branch_tracing -_disable_funnel -_div_init -_div_input -_div_usrreqs -_divert_packet -_dlil_attach_interface_filter -_dlil_attach_protocol -_dlil_attach_protocol_filter -_dlil_dereg_if_modules -_dlil_dereg_proto_module -_dlil_detach_filter -_dlil_detach_protocol -_dlil_event -_dlil_expand_mcl -_dlil_find_dltag -_dlil_if_acquire -_dlil_if_attach -_dlil_if_detach -_dlil_if_release -_dlil_init -_dlil_initialized -_dlil_inject_if_input -_dlil_inject_if_output -_dlil_inject_pr_input -_dlil_inject_pr_output -_dlil_input -_dlil_input_lock -_dlil_input_packet -_dlil_input_thread_continue -_dlil_input_thread_wakeup -_dlil_ioctl -_dlil_output -_dlil_plumb_protocol -_dlil_post_msg -_dlil_reg_if_modules -_dlil_reg_proto_module -_dlil_stats -_dlil_unplumb_protocol -_dlttoproto -_dmmax -_dmmin -_dmtext -_doasyncfree -_doclusterread -_doclusterwrite -_doingcache -_domaininit -_domainname -_domainnamelen -_domains -_donice -_doreallocblks -_dosetrlimit -_dounmount -_dp_pgins -_dp_pgouts -_dqdirtylist -_dqfileclose -_dqfileopen -_dqflush -_dqfreelist -_dqget -_dqhash -_dqhashtbl -_dqinit -_dqreclaim -_dqref -_dqrele -_dqsync -_dqsync_orphans -_dump_string_table -_dumpdev -_dumplo -_dup -_dup2 -_dup_sockaddr -_dupfdopen -_dylink_test -_embutl -_enable_branch_tracing -_enable_funnel -_encap4_input -_encap6_input -_encap_attach -_encap_attach_func -_encap_detach -_encap_getarg -_encap_init -_encaptab -_encode_comp_t _enodev _enodev_strat _enoioctl _enosys -_enterpgrp _enxio _eopnotsupp -_err_abortop _err_access _err_advlock _err_allocate -_err_blkatoff _err_blktooff -_err_bmap +_err_blockmap _err_bwrite _err_close -_err_cmap _err_copyfile _err_create _err_devblocksize @@ -608,11 +130,7 @@ _err_getattr _err_getattrlist _err_inactive _err_ioctl -_err_islocked -_err_lease _err_link -_err_lock -_err_mkcomplex _err_mkdir _err_mknod _err_mmap @@ -621,1535 +139,272 @@ _err_open _err_pagein _err_pageout _err_pathconf -_err_pgrd -_err_pgwr -_err_print _err_read _err_readdir _err_readdirattr _err_readlink -_err_reallocblks _err_reclaim _err_remove _err_rename _err_revoke _err_rmdir _err_searchfs -_err_seek _err_select _err_setattr _err_setattrlist _err_strategy _err_symlink -_err_truncate -_err_unlock -_err_update -_err_valloc -_err_vfree _err_whiteout _err_write -_errsys -_esp4_input -_esp4_output -_esp6_ctlinput -_esp6_input -_esp6_output -_esp_algorithm_lookup -_esp_auth -_esp_hdrsiz -_esp_max_ivlen -_esp_rijndael_blockdecrypt -_esp_rijndael_blockencrypt -_esp_rijndael_schedlen -_esp_rijndael_schedule -_esp_schedule -_esp_udp_encap_port -_ether_addmulti -_ether_attach_at -_ether_attach_inet -_ether_attach_inet6 -_ether_delmulti +_ether_add_proto +_ether_del_proto +_ether_check_multi _ether_demux -_ether_detach_at -_ether_detach_inet -_ether_detach_inet6 -_ether_family_init _ether_frameout -_ether_ifattach -_ether_ifmod_ioctl -_ether_inet6_prmod_ioctl -_ether_inet_prmod_ioctl -_ether_input -_ether_ipmulticast_max -_ether_ipmulticast_min -_ether_pre_output -_ether_prmod_ioctl -_ether_resolvemulti -_ether_sprintf -_event_usrreqs -_eventsw -_evprocdeque -_evprocenque -_evsofree -_exchangedata -_exchangelock -_execsigs -_execv -_execve -_execve_semaphore -_exit -_exit1 -_falloc -_fatfile_getarch -_fatfile_getarch_affinity -_fchdir -_fchflags -_fchmod -_fchown -_fcntl -_fcount -_fdalloc -_fdavail -_fdesc_allocvp -_fdesc_badop -_fdesc_getattr -_fdesc_inactive -_fdesc_init -_fdesc_ioctl -_fdesc_lookup -_fdesc_mount -_fdesc_open -_fdesc_pathconf -_fdesc_print -_fdesc_read -_fdesc_readdir -_fdesc_readlink -_fdesc_reclaim -_fdesc_root -_fdesc_select -_fdesc_setattr -_fdesc_start -_fdesc_statfs -_fdesc_sync -_fdesc_unmount -_fdesc_vfree -_fdesc_vfsops -_fdesc_vnodeop_entries -_fdesc_vnodeop_opv_desc -_fdesc_vnodeop_p -_fdesc_write -_fdexpand -_fdgetf -_fdhash -_fdhashtbl -_fdopen -_fdrelse -_ffree -_ffs -_ffs_alloc -_ffs_balloc -_ffs_blkalloc -_ffs_blkatoff -_ffs_blkfree -_ffs_blkpref -_ffs_blktooff -_ffs_clrblock -_ffs_clusteracct -_ffs_fhtovp -_ffs_fifoop_entries -_ffs_fifoop_opv_desc -_ffs_fifoop_p -_ffs_flushfiles -_ffs_fragacct -_ffs_fsync -_ffs_init -_ffs_isblock -_ffs_mount -_ffs_mountfs -_ffs_mountroot -_ffs_offtoblk -_ffs_oldfscompat -_ffs_pagein -_ffs_pageout -_ffs_read -_ffs_reallocblks -_ffs_realloccg -_ffs_reclaim -_ffs_reload -_ffs_sbupdate -_ffs_setblock -_ffs_specop_entries -_ffs_specop_opv_desc -_ffs_specop_p -_ffs_statfs -_ffs_sync -_ffs_sysctl -_ffs_truncate -_ffs_unmount -_ffs_update -_ffs_valloc -_ffs_vfree -_ffs_vget -_ffs_vnodeop_entries -_ffs_vnodeop_opv_desc -_ffs_vnodeop_p -_ffs_vptofh -_ffs_write -_fhopen -_fifo_advlock -_fifo_bmap -_fifo_close -_fifo_ebadf -_fifo_inactive -_fifo_ioctl +_ether_ioctl _fifo_lookup -_fifo_nfsv2nodeop_opv_desc -_fifo_nfsv2nodeop_p _fifo_open -_fifo_pathconf -_fifo_print -_fifo_printinfo _fifo_read -_fifo_select -_fifo_vnodeop_entries -_fifo_vnodeop_opv_desc -_fifo_vnodeop_p _fifo_write -_filedesc0 -_filehead -_find_nke -_finishdup -_firstc -_firstsect -_firstseg -_firstsegfromheader -_fixjobc -_flock -_fmod_watch -_fmod_watch_enable -_fork -_fpathconf -_fr_checkp -_frag6_doing_reass -_frag6_drain -_frag6_init -_frag6_input -_frag6_nfragpackets -_frag6_slowtimo -_fragtbl -_fragtbl124 -_fragtbl8 -_freevnodes -_fref -_frele -_fs_filtops -_fsctl -_fstat -_fstatfs -_fstatv -_fsync -_ftruncate +_fifo_ioctl +_fifo_select +_fifo_inactive +_fifo_close +_fifo_pathconf +_fifo_advlock +_file_drop +_file_flags +_file_socket +_file_vnode _fubyte _fuibyte _fuiword _futimes _fuword -_fw_enable -_gCompareTable -_gLatinCaseFold -_gLowerCaseTable -_gTimeZone -_getProcName -_get_aiotask -_get_bsduthreadarg -_get_bsduthreadrval -_get_inpcb_str_size -_get_kernel_symfile -_get_new_filter_id -_get_procrustime -_get_signalthread -_get_tcp_str_size -_getattrlist -_getblk -_getc -_getdirentries -_getdirentriesattr -_getdtablesize -_geteblk -_getegid -_geteuid -_getfakefvmseg -_getfh -_getfsstat -_getgid -_getgroups -_getinoquota -_getitimer -_getlastaddr -_getlogin -_getnewvnode -_getpeername -_getpgid -_getpgrp -_getpid -_getppid -_getpriority -_getquota -_getrlimit -_getrusage -_getsectbyname -_getsectbynamefromheader -_getsectdatafromheader -_getsegbyname -_getsegbynamefromheader -_getsegdatafromheader -_getsid -_getsock -_getsockaddr -_getsockname -_getsockopt -_gettimeofday -_getuid -_getvnode -_gif_attach_inet -_gif_attach_inet6 -_gif_attach_proto_family -_gif_delete_tunnel -_gif_demux -_gif_detach_inet -_gif_detach_inet6 -_gif_detach_proto_family -_gif_encapcheck4 -_gif_encapcheck6 -_gif_input -_gif_ioctl -_gif_pre_output -_gif_reg_if_mods -_gif_shutdown -_gifattach -_gifs -_global_state_pid -_global_user_profile_cache -_grade_cpu_subtype _groupmember -_gsignal -_hard_throttle_on_root _hashinit -_hex2ascii_data -_hfc_tag _hfs_addconverter -_hfs_allocate -_hfs_blktooff -_hfs_bmap -_hfs_bwrite -_hfs_catname -_hfs_chash_slock -_hfs_chkdq -_hfs_chkdqchg -_hfs_chkiq -_hfs_chkiqchg -_hfs_clearlock -_hfs_cmap -_hfs_converterinit -_hfs_encoding_list -_hfs_encoding_list_slock -_hfs_encodingbias -_hfs_extname -_hfs_fifoop_entries -_hfs_fifoop_opv_desc -_hfs_fifoop_p -_hfs_findoverlap -_hfs_generate_volume_notifications -_hfs_getblock -_hfs_getconverter -_hfs_getinoquota -_hfs_getlock -_hfs_getquota -_hfs_ioctl -_hfs_islatinbias -_hfs_offtoblk -_hfs_owner_rights -_hfs_pagein -_hfs_pageout -_hfs_pickencoding -_hfs_privdirname -_hfs_qsync -_hfs_quotactl -_hfs_quotaoff -_hfs_quotaon -_hfs_quotastat -_hfs_read -_hfs_relconverter _hfs_remconverter -_hfs_select -_hfs_setlock -_hfs_setquota -_hfs_setuse -_hfs_specop_entries -_hfs_specop_opv_desc -_hfs_specop_p -_hfs_split -_hfs_strategy -_hfs_swap_BTNode -_hfs_swap_HFSBTInternalNode -_hfs_swap_HFSPlusBTInternalNode -_hfs_swap_HFSPlusForkData -_hfs_to_utf8 -_hfs_truncate -_hfs_vbmname -_hfs_vfsops -_hfs_vnodeop_entries -_hfs_vnodeop_opv_desc -_hfs_vnodeop_p -_hfs_wakelock -_hfs_write -_hfsfifo_kqfilt_add -_hfsfifo_kqfilt_remove -_hfsmaxlockdepth -_holdrele -_hostid -_hostname -_hostnamelen -_hz -_hzto -_icmp6_ctloutput -_icmp6_error -_icmp6_fasttimo -_icmp6_ifstat -_icmp6_ifstatmax -_icmp6_init -_icmp6_input -_icmp6_mtudisc_update -_icmp6_nodeinfo -_icmp6_rediraccept -_icmp6_redirect_input -_icmp6_redirect_output -_icmp6_redirtimeout -_icmp6_reflect -_icmp6errppslim -_icmp6stat -_icmp_error -_icmp_input -_if_addmulti -_if_allmulti -_if_attach -_if_delmulti -_if_delmultiaddr -_if_down -_if_down_all -_if_index -_if_name -_if_route -_if_rtproto_del -_if_unroute -_if_up -_if_withname -_ifa_ifwithaddr -_ifa_ifwithdstaddr -_ifa_ifwithnet -_ifa_ifwithroute -_ifafree -_ifaof_ifpforaddr -_ifaref -_ifbyfamily -_ifindex2ifnet -_ifioctl -_ifma_lostlist -_ifmaof_ifpforaddr -_ifmedia_add -_ifmedia_init -_ifmedia_ioctl -_ifmedia_list_add -_ifmedia_removeall -_ifmedia_set -_ifnet -_ifnet_addrs -_ifpromisc -_ifptodlt -_ifqmaxlen +_ifaddr_address +_ifaddr_address_family +_ifaddr_dstaddress +_ifaddr_findbestforaddr +_ifaddr_ifnet +_ifaddr_netmask +_ifaddr_reference +_ifaddr_release +_ifaddr_withaddr +_ifaddr_withdstaddr +_ifaddr_withnet +_ifaddr_withroute +_iflt_attach +_iflt_detach +_ifmaddr_address +_ifmaddr_ifnet +_ifmaddr_lladdress +_ifmaddr_reference +_ifmaddr_release +_ifnet_add_multicast +_ifnet_addrlen +_ifnet_allocate +_ifnet_attach +_ifnet_attach_protocol +_ifnet_baudrate +_ifnet_detach +_ifnet_detach_protocol +_ifnet_eflags +_ifnet_event +_ifnet_family +_ifnet_find_by_name +_ifnet_flags +_ifnet_free_address_list +_ifnet_free_multicast_list +_ifnet_get_address_list +_ifnet_get_address_list_family +_ifnet_get_link_mib_data +_ifnet_get_link_mib_data_length +_ifnet_get_multicast_list +_ifnet_hdrlen +_ifnet_index +_ifnet_input +_ifnet_ioctl +_ifnet_lastchange +_ifnet_list_free +_ifnet_list_get +_ifnet_lladdr +_ifnet_lladdr_copy_bytes +_ifnet_llbroadcast_copy_bytes +_ifnet_metric +_ifnet_mtu +_ifnet_name +_ifnet_offload +_ifnet_output +_ifnet_output_raw +_ifnet_reference +_ifnet_release +_ifnet_remove_multicast +_ifnet_set_addrlen +_ifnet_set_baudrate +_ifnet_set_eflags +_ifnet_set_flags +_ifnet_set_hdrlen +_ifnet_set_link_mib_data +_ifnet_set_lladdr +_ifnet_set_metric +_ifnet_set_mtu +_ifnet_set_offload +_ifnet_set_promiscuous +_ifnet_set_stat +_ifnet_softc +_ifnet_stat +_ifnet_stat_increment +_ifnet_stat_increment_in +_ifnet_stat_increment_out +_ifnet_touch_lastchange +_ifnet_type +_ifnet_unit _iftovt_tab -_ifunit -_igmp_fasttimo -_igmp_init -_igmp_input -_igmp_joingroup -_igmp_leavegroup -_igmp_slowtimo -_ihash -_ihashtbl -_in6_addmulti -_in6_addr2scopeid -_in6_addrscope -_in6_are_prefix_equal -_in6_cksum -_in6_clearscope -_in6_control -_in6_delmulti -_in6_dinit -_in6_embedscope -_in6_get_tmpifid -_in6_gif_input -_in6_gif_output -_in6_gif_protosw -_in6_if_up -_in6_ifaddr -_in6_ifattach -_in6_ifawithifp -_in6_ifawithscope -_in6_ifdetach -_in6_ifindex2scopeid -_in6_ifstat -_in6_ifstatmax -_in6_init2done -_in6_init_prefix_ltimes -_in6_inithead -_in6_is_addr_deprecated -_in6_len2mask -_in6_localaddr -_in6_losing -_in6_mapped_peeraddr -_in6_mapped_sockaddr -_in6_mask2len -_in6_matchlen -_in6_maxmtu -_in6_multihead -_in6_nigroup -_in6_nigroup_attach -_in6_nigroup_detach -_in6_pcbbind -_in6_pcbconnect -_in6_pcbdetach -_in6_pcbdisconnect -_in6_pcbladdr -_in6_pcblookup_hash -_in6_pcblookup_local -_in6_pcbnotify -_in6_pcbpurgeif0 -_in6_pcbsetport -_in6_post_msg -_in6_prefix_add_ifid -_in6_prefix_ioctl -_in6_prefix_remove_ifid -_in6_prefixlen2mask -_in6_proto_count -_in6_purgeaddr -_in6_purgeif -_in6_purgeprefix -_in6_recoverscope -_in6_rr_timer -_in6_rr_timer_funneled -_in6_rtchange -_in6_selecthlim -_in6_selectsrc -_in6_setmaxmtu -_in6_setpeeraddr -_in6_setsockaddr -_in6_sin6_2_sin -_in6_sin6_2_sin_in_sock -_in6_sin_2_v4mapsin6 -_in6_sin_2_v4mapsin6_in_sock -_in6_sockaddr -_in6_tmpaddrtimer -_in6_tmpaddrtimer_funneled -_in6_tmpifadd -_in6_update_ifa -_in6_v4mapsin6_sockaddr -_in6addr_any -_in6addr_linklocal_allnodes -_in6addr_linklocal_allrouters -_in6addr_loopback -_in6addr_nodelocal_allnodes -_in6if_do_dad -_in6ifa_ifpforlinklocal -_in6ifa_ifpwithaddr -_in6mask0 -_in6mask128 -_in6mask32 -_in6mask64 -_in6mask96 -_in_addmulti -_in_addword -_in_broadcast -_in_canforward -_in_cksum -_in_cksum_skip -_in_control -_in_delayed_cksum -_in_delmulti -_in_dinit -_in_gif_input -_in_gif_output -_in_gif_protosw -_in_ifaddrhead -_in_ifadown -_in_ifscrub -_in_inithead -_in_localaddr -_in_losing -_in_multihead -_in_pcb_get_owner -_in_pcb_grab_port -_in_pcb_letgo_port -_in_pcb_nat_init -_in_pcb_new_share_client -_in_pcb_rem_share_client -_in_pcballoc -_in_pcbbind -_in_pcbconnect -_in_pcbdetach -_in_pcbdisconnect -_in_pcbinshash -_in_pcbladdr -_in_pcblookup_hash -_in_pcblookup_local -_in_pcbnotifyall -_in_pcbpurgeif0 -_in_pcbrehash -_in_pcbremlists -_in_proto_count -_in_pseudo -_in_rtchange -_in_rtqdrain -_in_setpeeraddr -_in_setsockaddr -_in_stf_input -_in_stf_protosw -_inactivevnodes -_incore -_inet6_ether_input -_inet6_ether_pre_output -_inet6ctlerrmap -_inet6domain -_inet6sw +_inet_arp_handle_input:_arp_ip_handle_input +_inet_arp_init_ifaddr:_arp_ifinit +_inet_arp_lookup:_arp_lookup_ip _inet_aton -_inet_ether_input -_inet_ether_pre_output -_inet_ntoa -_inetctlerrmap -_inetdomain -_inetsw -_inferior -_inflate -_inflateEnd -_inflateInit2_ -_inflateInit_ -_inflateReset -_inflateSetDictionary -_inflateSync -_inflateSyncPoint -_inflate_blocks -_inflate_blocks_free -_inflate_blocks_new -_inflate_blocks_reset -_inflate_blocks_sync_point -_inflate_codes -_inflate_codes_free -_inflate_codes_new -_inflate_copyright -_inflate_fast -_inflate_flush -_inflate_mask -_inflate_set_dictionary -_inflate_trees_bits -_inflate_trees_dynamic -_inflate_trees_fixed -_init_args -_init_attempts -_init_domain -_init_ip6pktopts -_init_sin6 -_initialized -_initproc -_inittodr -_inside -_insmntque -_int6intrq_present -_invalhash -_iobufqueue -_ioctl -_ip4_ah_cleartos -_ip4_ah_net_deflev -_ip4_ah_offsetmask -_ip4_ah_trans_deflev -_ip4_def_policy -_ip4_esp_net_deflev -_ip4_esp_randpad -_ip4_esp_trans_deflev -_ip4_ipsec_dfbit -_ip4_ipsec_ecn -_ip6_accept_rtadv -_ip6_addaux -_ip6_ah_net_deflev -_ip6_ah_trans_deflev -_ip6_auto_flowlabel -_ip6_auto_linklocal -_ip6_clearpktopts -_ip6_copypktopts -_ip6_ctloutput -_ip6_dad_count -_ip6_def_policy -_ip6_defhlim -_ip6_defmcasthlim -_ip6_delaux -_ip6_desync_factor -_ip6_ecn_egress -_ip6_ecn_ingress -_ip6_esp_net_deflev -_ip6_esp_randpad -_ip6_esp_trans_deflev -_ip6_findaux -_ip6_flow_seq -_ip6_forward -_ip6_forward_rt -_ip6_forward_srcrt -_ip6_forwarding -_ip6_freemoptions -_ip6_freepcbopts -_ip6_fw_chk_ptr -_ip6_fw_ctl_ptr -_ip6_fw_enable -_ip6_get_prevhdr -_ip6_getdstifaddr -_ip6_gif_hlim -_ip6_hdrnestlimit -_ip6_id -_ip6_init -_ip6_input -_ip6_ipsec_ecn -_ip6_keepfaith -_ip6_lasthdr -_ip6_log_interval -_ip6_log_time -_ip6_maxfragpackets -_ip6_mforward -_ip6_mloopback -_ip6_mrouter -_ip6_mrouter_done -_ip6_mrouter_get -_ip6_mrouter_set -_ip6_mrouter_ver -_ip6_mrtproto -_ip6_nexthdr -_ip6_optlen -_ip6_ours_check_algorithm -_ip6_output -_ip6_process_hopopts -_ip6_protox -_ip6_rr_prune -_ip6_savecontrol -_ip6_sendredirects -_ip6_setpktoptions -_ip6_sourcecheck -_ip6_sourcecheck_interval -_ip6_sprintf -_ip6_temp_preferred_lifetime -_ip6_temp_regen_advance -_ip6_temp_valid_lifetime -_ip6_unknown_opt -_ip6_use_deprecated -_ip6_use_tempaddr -_ip6_v6only -_ip6intr -_ip6intrq -_ip6q -_ip6stat -_ip_ctloutput -_ip_defttl -_ip_divert_cookie -_ip_drain -_ip_ecn_egress -_ip_ecn_ingress -_ip_freemoptions -_ip_fw_chk_ptr -_ip_fw_ctl_ptr -_ip_fw_fwd_addr -_ip_gif_ttl -_ip_id -_ip_init -_ip_input -_ip_linklocal_in_allowbadttl -_ip_linklocal_stat -_ip_mcast_src -_ip_mforward -_ip_mrouter -_ip_mrouter_done -_ip_mrouter_get -_ip_mrouter_set -_ip_optcopy -_ip_output -_ip_pkt_to_mbuf -_ip_protox -_ip_rsvp_done -_ip_rsvp_force_done -_ip_rsvp_init -_ip_rsvp_vif_done -_ip_rsvp_vif_init -_ip_rsvpd -_ip_savecontrol -_ip_slowtimo -_ip_srcroute -_ip_stripoptions -_ipcomp4_input -_ipcomp4_output -_ipcomp6_input -_ipcomp6_output -_ipcomp_algorithm_lookup -_ipcperm -_ipflow_create -_ipflow_fastforward -_ipflow_slowtimo -_ipforwarding -_ipintr -_ipintrq -_ipintrq_present -_ipip_input -_ipport_firstauto -_ipport_hifirstauto -_ipport_hilastauto -_ipport_lastauto -_ipport_lowfirstauto -_ipport_lowlastauto -_ipsec4_delete_pcbpolicy -_ipsec4_get_policy -_ipsec4_getpolicybyaddr -_ipsec4_getpolicybysock -_ipsec4_hdrsiz -_ipsec4_in_reject -_ipsec4_in_reject_so -_ipsec4_logpacketstr -_ipsec4_output -_ipsec4_set_policy -_ipsec4_tunnel_validate -_ipsec6_delete_pcbpolicy -_ipsec6_get_policy -_ipsec6_getpolicybyaddr -_ipsec6_getpolicybysock -_ipsec6_hdrsiz -_ipsec6_in_reject -_ipsec6_in_reject_so -_ipsec6_logpacketstr -_ipsec6_output_trans -_ipsec6_output_tunnel -_ipsec6_set_policy -_ipsec6_tunnel_validate -_ipsec6stat -_ipsec_addhist -_ipsec_bypass -_ipsec_chkreplay -_ipsec_clearhist -_ipsec_copy_policy -_ipsec_copypkt -_ipsec_debug -_ipsec_delaux -_ipsec_dumpmbuf -_ipsec_get_reqlevel -_ipsec_gethist -_ipsec_getsocket -_ipsec_hdrsiz_tcp -_ipsec_init_policy -_ipsec_logsastr -_ipsec_setsocket -_ipsec_updatereplay -_ipsecstat -_ipstat -_iptime +_ipf_addv4 +_ipf_addv6 +_ipf_inject_input +_ipf_inject_output +_ipf_remove _is_file_clean _is_suser _is_suser1 _isdisk -_isinferior -_iskmemdev -_isn_ctx -_isn_last_reseed -_isn_secret -_iso_nchstats -_isodirino -_isofncmp -_isofntrans -_isohash -_isohashtbl -_isonullname -_issetugid -_issignal -_issingleuser -_itimerdecr -_itimerfix -_journal_active -_journal_close -_journal_create -_journal_end_transaction -_journal_flush -_journal_kill_block -_journal_modify_block_abort -_journal_modify_block_end -_journal_modify_block_start -_journal_open -_journal_start_transaction -_kd_buffer -_kd_buflast -_kd_bufptr -_kd_bufsize -_kd_buftomem -_kd_entropy_buffer -_kd_entropy_bufsize -_kd_entropy_buftomem -_kd_entropy_count -_kd_entropy_indx -_kd_mapcount -_kd_mapptr -_kd_mapsize -_kd_maptomem -_kd_prev_timebase -_kd_readlast -_kd_trace_lock -_kdbg_bootstrap -_kdbg_clear -_kdbg_control -_kdbg_control_chud -_kdbg_getentropy -_kdbg_getreg -_kdbg_mapinit -_kdbg_read -_kdbg_readmap -_kdbg_reinit -_kdbg_resolve_map -_kdbg_setpid -_kdbg_setpidex -_kdbg_setreg -_kdbg_setrtcdec -_kdbg_trace_data -_kdbg_trace_string -_kdebug_chudhook +_kauth_acl_alloc +_kauth_acl_free +_kauth_authorize_action +_kauth_cred_create +_kauth_cred_find +_kauth_cred_get +_kauth_cred_getgid +_kauth_cred_getguid +_kauth_cred_getntsid +_kauth_cred_getuid +_kauth_cred_get_with_ref +_kauth_cred_gid2ntsid +_kauth_cred_guid2gid +_kauth_cred_guid2ntsid +_kauth_cred_guid2uid +_kauth_cred_gid2guid +_kauth_cred_ismember_gid +_kauth_cred_ismember_guid +_kauth_cred_ntsid2gid +_kauth_cred_ntsid2guid +_kauth_cred_ntsid2uid +_kauth_cred_proc_ref +_kauth_cred_ref +_kauth_cred_rele +_kauth_cred_uid2guid +_kauth_cred_uid2ntsid +_kauth_deregister_scope +_kauth_getgid +_kauth_getruid +_kauth_getuid +_kauth_filesec_alloc +_kauth_filesec_free +_kauth_listen_scope +_kauth_null_guid +_kauth_register_scope +_kauth_unlisten_scope _kdebug_enable -_kdebug_flags -_kdebug_nolog -_kdebug_ops -_kdebug_trace -_kdlog_beg -_kdlog_end -_kdlog_value1 -_kdlog_value2 -_kdlog_value3 -_kdlog_value4 -_kern_control_init -_kern_event_init -_kern_sysctl -_kernacc _kernel_debug _kernel_debug1 _kernel_flock -_kernel_sysctl _kernproc -_kev_attach -_kev_control -_kev_detach -_kev_post_msg -_kevent -_key_allocsa -_key_allocsp -_key_cb -_key_checkrequest -_key_checktunnelsanity -_key_debug_level -_key_dst -_key_freereg -_key_freesav -_key_freeso -_key_freesp -_key_gettunnel -_key_init -_key_ismyaddr -_key_msg2sp -_key_newsp -_key_output -_key_parse -_key_random -_key_randomfill -_key_sa_recordxfer -_key_sa_routechange -_key_sa_stir_iv -_key_sendup -_key_sendup_mbuf -_key_sp2msg -_key_spdacquire -_key_src -_key_timehandler -_key_timehandler_funnel -_key_usrreqs -_keydb_delsecashead -_keydb_delsecpolicy -_keydb_delsecreg -_keydb_delsecreplay -_keydb_freesecasvar -_keydb_newsecashead -_keydb_newsecasvar -_keydb_newsecpolicy -_keydb_newsecreg -_keydb_newsecreplay -_keydb_refsecasvar -_keydomain -_keystat -_keysw -_kill -_killpg1 -_kinfo_vdebug -_klist_init -_klogwakeup -_km_tty -_kmclose -_kmem_mb_alloc -_kmeminit -_kmemstats -_kmgetc -_kmgetc_silent -_kminit -_kmioctl -_kmopen -_kmputc -_kmread -_kmwrite -_kmzones +_kev_msg_post +_kev_vendor_code_find _knote _knote_attach _knote_detach -_krpc_call -_krpc_portmap -_ktrace -_ktrcsw -_ktrgenio -_ktrnamei -_ktrpsig -_ktrsyscall -_ktrsysret -_kvprintf -_lbolt _ldisc_deregister _ldisc_register -_lease_check -_lease_updatetime -_leavepgrp -_legal_vif_num -_lf_clearlock -_lf_findoverlap -_lf_getblock -_lf_getlock -_lf_setlock -_lf_split -_lf_wakelock _lightning_bolt -_limcopy -_limit0 -_linesw -_link -_listen -_llinfo_nd6 -_lo_attach_inet -_lo_attach_inet6 -_lo_demux -_lo_framer -_lo_input -_lo_reg_if_mods -_lo_set_bpf_tap -_lo_shutdown -_load_ipfw -_load_machfile -_local_proto_count -_localdomain -_lockinit -_lockmgr -_lockmgr_printinfo -_lockstatus -_log_in_vain -_log_init -_log_lock -_log_open -_log_putc -_logclose -_logioctl -_logopen -_logpri -_logread -_logselect -_logsoftc -_logwakeup -_loif -_lookup -_loopattach -_lru_is_stale -_lseek -_lstat -_lstatv -_m_adj -_m_aux_add -_m_aux_delete -_m_aux_find -_m_cat -_m_clalloc -_m_cltom -_m_copy_pkthdr -_m_copyback -_m_copydata -_m_copym -_m_copym_with_hdrs -_m_devget -_m_dtom -_m_dup -_m_expand -_m_free -_m_freem -_m_freem_list -_m_get -_m_getclr -_m_gethdr -_m_getpacket -_m_getpackethdrs -_m_getpackets -_m_leadingspace -_m_mcheck -_m_mchtype -_m_mclalloc -_m_mclfree -_m_mclget -_m_mclhasreference -_m_mclref -_m_mclunref -_m_mtocl -_m_mtod -_m_prepend -_m_prepend_2 -_m_pulldown -_m_pullup -_m_reclaim -_m_retry -_m_retryhdr -_m_split -_m_trailingspace -_m_want -_mac_roman_to_unicode -_mac_roman_to_utf8 -_machdep_sysctl_list -_machine_exception -_macx_backing_store_recovery -_macx_backing_store_suspend -_macx_swapoff -_macx_swapon -_madvise -_map_fd -_map_fd_funneled -_max_datalen -_max_hdr -_max_linkhdr -_max_protohdr -_maxdmap -_maxfiles -_maxfilesperproc -_maxlockdepth -_maxproc -_maxprocperuid -_maxsmap -_maxsockets -_maxvfsconf -_maxvfsslots -_mb_map -_mbinit -_mbstat -_mbuf_slock -_mbutl -_mcl_paddr -_mcl_to_paddr -_mclfree -_mclrefcnt -_mdev -_mdevBMajor -_mdevCMajor -_mdevadd -_mdevinit -_mdevlookup -_memmove -_memname -_meta_bread -_meta_breadn -_meta_is_stale -_meta_zones -_mf6ctable -_mfree -_mfreelater +_mbuf_adj +_mbuf_allocpacket +_mbuf_align_32 +_mbuf_aux_add +_mbuf_aux_delete +_mbuf_aux_find +_mbuf_clear_csum_performed +_mbuf_clear_csum_requested +_mbuf_clear_vlan_tag +_mbuf_copy_pkthdr +_mbuf_copyback +_mbuf_copydata +_mbuf_copym +_mbuf_data +_mbuf_data_to_physical +_mbuf_datastart +_mbuf_dup +_mbuf_flags +_mbuf_free +_mbuf_freem +_mbuf_freem_list +_mbuf_get +_mbuf_getcluster +_mbuf_get_csum_performed +_mbuf_get_csum_requested +_mbuf_get_vlan_tag +_mbuf_gethdr +_mbuf_getpacket +_mbuf_inbound_modified +_mbuf_leadingspace +_mbuf_len +_mbuf_maxlen +_mbuf_mclget +_mbuf_mclhasreference +_mbuf_next +_mbuf_nextpkt +_mbuf_outbound_finalize +_mbuf_pkthdr_header +_mbuf_pkthdr_len +_mbuf_pkthdr_rcvif +_mbuf_pkthdr_setheader +_mbuf_pkthdr_setlen +_mbuf_pkthdr_setrcvif +_mbuf_prepend +_mbuf_pulldown +_mbuf_pullup +_mbuf_set_csum_performed +_mbuf_set_csum_requested +_mbuf_set_vlan_tag +_mbuf_setdata +_mbuf_setflags +_mbuf_setflags_mask +_mbuf_setlen +_mbuf_setnext +_mbuf_setnextpkt +_mbuf_settype +_mbuf_split +_mbuf_stats +_mbuf_tag_allocate +_mbuf_tag_find +_mbuf_tag_id_find +_mbuf_tag_free +_mbuf_trailingspace +_mbuf_type _microtime _microuptime -_mincore -_minherit _minphys -_mkcomplex -_mkdir -_mkfifo -_mknod -_mld6_fasttimeo -_mld6_init -_mld6_input -_mld6_start_listening -_mld6_stop_listening -_mlock -_mlockall -_mmFree -_mmGetPtr -_mmInit -_mmMalloc -_mmReturnPtr -_mmap -_mmread -_mmrw -_mmwrite -_mntid_slock -_mntvnode_slock -_modetodirtype -_modwatch -_mount -_mountlist -_mountlist_slock -_mountroot -_mountroot_post_hook -_mprotect -_mremap -_mrt6_ioctl -_mrt6stat -_mrt_ioctl -_msgbufp -_msgctl -_msgget -_msgrcv -_msgsnd -_msgsys -_msync -_multicast_register_if -_munlock -_munlockall -_munmap -_munmapfd -_mynum_flavors -_n6expire -_name_cmp -_namei +_msleep _nanotime _nanouptime -_nbdwrite -_nblkdev -_nbuf -_nbufh -_nbufhigh -_nbuflow -_nbuftarget -_ncallout -_nchash -_nchashtbl -_nchinit -_nchrdev -_nchstats -_ncl -_nclruhead -_nd6_cache_lladdr -_nd6_dad_duplicated -_nd6_dad_start -_nd6_dad_stop -_nd6_dad_stoptimer -_nd6_debug -_nd6_defifindex -_nd6_delay -_nd6_free -_nd6_gctimer -_nd6_ifattach -_nd6_ifptomac -_nd6_init -_nd6_ioctl -_nd6_is_addr_neighbor -_nd6_lookup -_nd6_maxndopt -_nd6_maxnudhint -_nd6_mmaxtries -_nd6_na_input -_nd6_na_output -_nd6_need_cache -_nd6_ns_input -_nd6_ns_output -_nd6_nud_hint -_nd6_option -_nd6_option_init -_nd6_options -_nd6_output -_nd6_prefix_lookup -_nd6_prefix_offlink -_nd6_prefix_onlink -_nd6_prelist_add -_nd6_prune -_nd6_purge -_nd6_ra_input -_nd6_recalc_reachtm_interval -_nd6_rs_input -_nd6_rtrequest -_nd6_setdefaultiface -_nd6_setmtu -_nd6_storelladdr -_nd6_timer -_nd6_timer_funneled -_nd6_umaxtries -_nd6_useloopback -_nd_defrouter -_nd_ifinfo -_nd_prefix -_ndflush -_ndqb -_ndrv_abort -_ndrv_attach -_ndrv_bind -_ndrv_connect -_ndrv_control -_ndrv_ctlinput -_ndrv_ctloutput -_ndrv_delspec -_ndrv_detach -_ndrv_disconnect -_ndrv_do_detach -_ndrv_do_disconnect -_ndrv_dominit -_ndrv_drain -_ndrv_find_tag -_ndrv_flushq -_ndrv_get_ifp -_ndrv_handle_ifp_detach -_ndrv_init -_ndrv_input -_ndrv_output -_ndrv_peeraddr -_ndrv_read_event -_ndrv_recvspace -_ndrv_send -_ndrv_sendspace -_ndrv_sense -_ndrv_setspec -_ndrv_shutdown -_ndrv_sockaddr -_ndrv_sysctl -_ndrv_to_dlil_demux -_ndrv_usrreqs -_ndrvdomain -_ndrvl -_ndrvsw -_net_add_domain -_net_add_proto -_net_del_domain -_net_del_proto -_net_sysctl -_netaddr_match -_netboot_iaddr -_netboot_mountroot -_netboot_root -_netboot_rootpath -_netboot_setup -_netisr -_network_flock -_new_sysctl -_new_system_shared_regions -_newsysctl_list -_nextc -_nextgennumber -_nextsect -_nextseg -_nextsegfromheader -_nextvnodeid -_nf_list -_nfiles -_nfs_adv -_nfs_async -_nfs_asyncio -_nfs_bioread -_nfs_boot_getfh -_nfs_boot_init -_nfs_buf_get -_nfs_buf_incore -_nfs_buf_iodone -_nfs_buf_iowait -_nfs_buf_page_inval -_nfs_buf_release -_nfs_buf_remfree -_nfs_buf_upl_check -_nfs_buf_upl_setup -_nfs_buf_write -_nfs_buf_write_delayed -_nfs_bufq -_nfs_clearcommit -_nfs_cltpsock -_nfs_commit -_nfs_connect -_nfs_defect -_nfs_disconnect -_nfs_doio -_nfs_dolock -_nfs_false -_nfs_flushcommits -_nfs_fsinfo -_nfs_getattrcache -_nfs_getauth -_nfs_getcookie -_nfs_getnickauth -_nfs_getreq -_nfs_hash -_nfs_inactive -_nfs_init -_nfs_invaldir -_nfs_ioddelwri -_nfs_iodmount -_nfs_iodwant -_nfs_islocked -_nfs_ispublicfh -_nfs_loadattrcache -_nfs_lock -_nfs_mount_type -_nfs_mountroot -_nfs_namei -_nfs_nbdwrite -_nfs_nbinit -_nfs_nget -_nfs_nhinit -_nfs_node_hash_lock -_nfs_numasync -_nfs_prog -_nfs_readdirplusrpc -_nfs_readdirrpc -_nfs_readlinkrpc -_nfs_readrpc -_nfs_reclaim -_nfs_removeit -_nfs_rephead -_nfs_reply -_nfs_reqq -_nfs_request -_nfs_savenickauth -_nfs_send -_nfs_sigintr -_nfs_slplock -_nfs_slpunlock -_nfs_sndlock -_nfs_sndunlock -_nfs_ticks -_nfs_timer -_nfs_timer_funnel -_nfs_true -_nfs_udpsock -_nfs_unlock -_nfs_vfsops -_nfs_vinvalbuf -_nfs_write -_nfs_writerpc -_nfs_xdrneg1 -_nfs_xidwrap -_nfsadvlock_longest -_nfsadvlocks -_nfsadvlocks_time -_nfsbufcnt -_nfsbufdelwri -_nfsbufdelwricnt -_nfsbuffree -_nfsbuffreecnt -_nfsbufhash -_nfsbufhashlock -_nfsbufhashtbl -_nfsbufmax -_nfsbufmin -_nfsclnt -_nfsd_head -_nfsd_head_flag -_nfsd_waiting -_nfslockdans -_nfslockdfd -_nfslockdfp -_nfslockdwait -_nfslockdwaiting -_nfsm_adj -_nfsm_disct -_nfsm_mbuftouio -_nfsm_reqh -_nfsm_rpchead -_nfsm_srvfattr -_nfsm_srvpostopattr -_nfsm_srvwcc -_nfsm_strtmbuf -_nfsm_uiotombuf -_nfsneedbuffer -_nfsnodehash -_nfsnodehashtbl -_nfsrtt -_nfsrtton -_nfsrv3_access -_nfsrv3_procs -_nfsrv_cleancache -_nfsrv_commit -_nfsrv_create -_nfsrv_dorec -_nfsrv_errmap -_nfsrv_fhtovp -_nfsrv_fsinfo -_nfsrv_getattr -_nfsrv_getcache -_nfsrv_init -_nfsrv_initcache -_nfsrv_link -_nfsrv_lookup -_nfsrv_mkdir -_nfsrv_mknod -_nfsrv_noop -_nfsrv_null -_nfsrv_object_create -_nfsrv_pathconf -_nfsrv_rcv -_nfsrv_read -_nfsrv_readdir -_nfsrv_readdirplus -_nfsrv_readlink -_nfsrv_remove -_nfsrv_rename -_nfsrv_rmdir -_nfsrv_setattr -_nfsrv_setcred -_nfsrv_slpderef -_nfsrv_statfs -_nfsrv_symlink -_nfsrv_updatecache -_nfsrv_wakenfsd -_nfsrv_write -_nfsrv_writegather -_nfsrvhash -_nfsrvhashtbl -_nfsrvlruhead -_nfsrvw_procrastinate -_nfsrvw_procrastinate_v3 -_nfsrvw_sort -_nfsstats -_nfssvc -_nfssvc_sockhead -_nfssvc_sockhead_flag -_nfsv2_procid -_nfsv2_type -_nfsv2_vnodeop_opv_desc -_nfsv2_vnodeop_p -_nfsv3_procid -_nfsv3_type -_ngif -_niobuf -_nkdbufs -_nke_insert -_nlinesw -_nmbclusters -_nobdev -_nocdev -_nop_abortop +_nd6_lookup_ipv6 +_net_init_add _nop_access _nop_advlock _nop_allocate -_nop_blkatoff _nop_blktooff -_nop_bmap +_nop_blockmap _nop_bwrite _nop_close -_nop_cmap _nop_copyfile _nop_create _nop_devblocksize @@ -2159,11 +414,7 @@ _nop_getattr _nop_getattrlist _nop_inactive _nop_ioctl -_nop_islocked -_nop_lease _nop_link -_nop_lock -_nop_mkcomplex _nop_mkdir _nop_mknod _nop_mmap @@ -2172,583 +423,108 @@ _nop_open _nop_pagein _nop_pageout _nop_pathconf -_nop_pgrd -_nop_pgwr -_nop_print _nop_read _nop_readdir _nop_readdirattr _nop_readlink -_nop_reallocblks _nop_reclaim _nop_remove _nop_rename _nop_revoke _nop_rmdir _nop_searchfs -_nop_seek _nop_select _nop_setattr _nop_setattrlist _nop_strategy _nop_symlink -_nop_truncate -_nop_unlock -_nop_update -_nop_valloc -_nop_vfree _nop_whiteout _nop_write -_nosys -_notify_filemod_watchers -_npcbufs -_nport -_nprocs -_nqfhhash -_nqfhhashtbl -_nqnfs_callback -_nqnfs_clientd -_nqnfs_clientlease -_nqnfs_getlease -_nqnfs_lease_check -_nqnfs_piggy -_nqnfs_prog -_nqnfs_serverd -_nqnfsrv_getlease -_nqnfsrv_vacated -_nqnfsstarttime -_nqsrv_clockskew -_nqsrv_getlease -_nqsrv_maxlease -_nqsrv_writeslack -_nqtimerhead -_nselcoll -_nswap -_nswapmap -_nswdev -_nsysent _nulldev _nullop -_nullsys -_numcache -_numdquot -_numnfsrvcache -_numused_vfsslots -_numvnodes -_nv3tov_type -_oaccept -_obreak -_ocreat -_ofstat -_oftruncate -_ogetdirentries -_ogetdomainname -_ogetdtablesize -_ogethostid -_ogethostname -_ogetpagesize -_ogetpeername -_ogetrlimit -_ogetsockname -_okillpg -_old_if_attach -_olseek -_olstat -_open -_orecv -_orecvfrom -_orecvmsg -_osend -_osendmsg -_osetdomainname -_osethostid -_osethostname -_osetregid -_osetreuid -_osetrlimit -_osigblock -_osigsetmask -_osigstack -_osigvec -_osmmap -_ostat -_otruncate -_ovadvise -_ovbcopy -_owait -_owait3 -_packattrblk -_packcommonattr -_packdirattr -_packfileattr -_packvolattr -_parse_bsd_args -_pathconf -_pc_buffer -_pc_buflast -_pc_bufptr -_pc_bufsize -_pc_buftomem -_pc_sample_pid -_pc_trace_frameworks -_pcb_synch -_pcsample_beg -_pcsample_comm -_pcsample_enable -_pcsample_end -_pcsample_flags -_pcsamples_bootstrap -_pcsamples_clear -_pcsamples_control -_pcsamples_ops -_pcsamples_read -_pcsamples_reinit -_pfctlinput -_pfctlinput2 -_pffasttimo -_pffinddomain -_pffindproto -_pffindtype -_pfind -_pfkeystat -_pfslowtimo -_pfxlist_onlink_check -_pgdelete -_pgfind -_pgrp0 -_pgrphash -_pgrphashtbl -_pgsignal _physio -_pid_for_task -_pidhash -_pidhashtbl -_pim6_input -_pipe -_pmtu_expire -_pmtu_probe _postevent -_postsig -_pread -_prelist_remove -_prelist_update -_prepare_profile_database -_prf -_print_vmpage_stat -_priority_IO_timestamp_for_root -_prngAllowReseed -_prngDestroy -_prngForceReseed -_prngInitialize -_prngInput -_prngOutput -_prngProcessSeedBuffer -_prngStretch -_proc0 -_proc_exit +_proc_find +_proc_forcequota +_proc_is64bit _proc_is_classic +_proc_isinferior +_proc_issignal _proc_name -_proc_prepareexit -_proc_reparent -_procdup -_process_terminate_self -_procinit -_profil -_prtactive -_pru_abort_notsupp -_pru_accept_notsupp -_pru_attach_notsupp -_pru_bind_notsupp -_pru_connect2_notsupp -_pru_connect_notsupp -_pru_control_notsupp -_pru_detach_notsupp -_pru_disconnect_notsupp -_pru_listen_notsupp -_pru_peeraddr_notsupp -_pru_rcvd_notsupp -_pru_rcvoob_notsupp -_pru_send_notsupp -_pru_sense_null -_pru_shutdown_notsupp -_pru_sockaddr_notsupp -_pru_sopoll_notsupp -_pru_soreceive -_pru_soreceive_notsupp -_pru_sosend -_pru_sosend_notsupp -_pseudo_inits -_psignal -_psignal_lock -_psignal_sigprof -_psignal_uthread -_psignal_vfork -_psignal_vtalarm -_psignal_xcpu -_pstats0 -_pt_setrunnable -_pthread_sigmask -_ptrace -_pty_init -_putc -_pwrite -_q_to_b -_qsync -_quotactl -_quotaoff -_quotaon -_quotastat +_proc_noremotehang +_proc_pid +_proc_ppid +_proc_rele +_proc_self +_proc_selfname +_proc_selfpid +_proc_selfppid +_proc_tbe +_proc_signal +_proc_suser +_proc_ucred +_proto_inject +_proto_input +_proto_register_plumber +_proto_unregister_plumber _random -_random_close -_random_init -_random_ioctl -_random_open -_random_read -_random_write -_raw_attach -_raw_ctlinput -_raw_detach -_raw_disconnect -_raw_init -_raw_input -_raw_usrreqs -_rawcb_list -_rawread -_rawwrite -_rc4_crypt -_rc4_init -_read _read_random -_readlink -_readv -_realitexpire -_reassignbuf -_reboot -_receive_packet -_recvfrom -_recvmsg -_register_sockfilter -_relookup -_remove_name -_rename -_resetpriority -_resize_namecache -_revoke -_rijndaelDecrypt -_rijndaelEncrypt -_rijndaelKeyEncToDec -_rijndaelKeySched -_rijndael_blockDecrypt -_rijndael_blockEncrypt -_rijndael_cipherInit -_rijndael_makeKey -_rijndael_padDecrypt -_rijndael_padEncrypt -_rip6_ctlinput -_rip6_ctloutput -_rip6_input -_rip6_output -_rip6_recvspace -_rip6_sendspace -_rip6_usrreqs -_rip6stat -_rip_ctlinput -_rip_ctloutput -_rip_init -_rip_input -_rip_output -_rip_recvspace -_rip_sendspace -_rip_usrreqs -_ripcb -_ripcbinfo _rl_add _rl_init _rl_remove _rl_scan -_rmdir -_rn_addmask -_rn_addroute -_rn_delete -_rn_init -_rn_inithead -_rn_lookup -_rn_match -_rn_refines -_rootdev -_rootdevice -_rootfs _rootvnode -_rootvp -_route6_input -_route_cb -_route_init -_routedomain -_rpc_auth_kerb -_rpc_auth_unix -_rpc_autherr -_rpc_call -_rpc_mismatch -_rpc_msgaccepted -_rpc_msgdenied -_rpc_reply -_rpc_vers -_rr_prefix -_rsvp_input -_rsvp_on -_rt6_flush -_rt_ifmsg -_rt_missmsg -_rt_newaddrmsg -_rt_newmaddrmsg -_rt_setgate -_rt_tables -_rtalloc -_rtalloc1 -_rtalloc_ign -_rtfree -_rtinit -_rtioctl -_rtredirect -_rtref -_rtrequest -_rtsetifa -_rtunref -_ruadd -_run_netisr -_rwuio -_sa6_any -_safedounmount -_savacctp -_sb_lock -_sb_max -_sb_notify -_sballoc -_sbappend -_sbappendaddr -_sbappendcontrol -_sbappendrecord -_sbcompress -_sbcreatecontrol -_sbdrop -_sbdroprecord -_sbflush -_sbfree -_sbinsertoob -_sblock -_sbrelease -_sbreserve -_sbrk -_sbspace -_sbtoxsockbuf -_sbunlock -_sbwait -_scanc -_scope6_addr2default -_scope6_get -_scope6_get_default -_scope6_ids -_scope6_ifattach -_scope6_set -_scope6_setdefault -_searchfs -_securelevel -_selcontinue -_select -_selprocess _selrecord _selthreadclear _seltrue _selwait _selwakeup -_sem -_sem_close -_sem_destroy -_sem_getvalue -_sem_init -_sem_open -_sem_post -_sem_trywait -_sem_unlink -_sem_wait -_sema -_semconfig -_semctl -_semexit -_semget -_seminfo -_seminit -_semop -_semsys -_semu -_sendmsg -_sendsig -_sendto -_session0 -_sessrele -_set_blocksize -_set_bsduthreadargs -_set_cast128_subkey _set_fsblocksize -_set_procsigmask -_set_security_token -_setattrlist -_setconf -_setegid -_seteuid -_setgid -_setgroups -_setitimer -_setlogin -_setpgid -_setpriority -_setprivexec -_setquota -_setrlimit -_setsid -_setsigvec -_setsockopt -_setthetime -_settimeofday -_setuid -_setuse -_sfilter_init -_sfilter_term -_sha1_init -_sha1_loop -_sha1_pad -_sha1_result -_shadow_map_create -_shadow_map_free -_shadow_map_read -_shadow_map_shadow_size -_shadow_map_write -_shm_open -_shm_unlink -_shmat -_shmctl -_shmdt -_shmexit -_shmfork -_shmget -_shminfo -_shminit -_shmsegs -_shmsys -_shutdown -_sig_filtops -_sig_lock_to_exit -_sig_try_locked -_sigaction -_sigacts0 -_sigaltstack -_sigcontinue -_sigexit_locked -_siginit -_signal_lock -_signal_setast -_signal_unlock -_sigpending -_sigprocmask -_sigprop -_sigreturn -_sigsuspend -_sigwait -_skpc -_sleep +_sflt_attach +_sflt_detach +_sflt_register +_sflt_unregister _snprintf -_so_cache_hw -_so_cache_init_done -_so_cache_max_freed -_so_cache_time -_so_cache_timeouts -_so_cache_timer -_so_cache_zone -_so_gencnt -_soabort -_soaccept -_soalloc -_sobind -_socantrcvmore -_socantsendmore -_sockargs -_socket -_socket_cache_head -_socket_cache_tail -_socket_debug -_socket_zone -_socketinit -_socketops -_socketpair -_soclose -_soconnect -_soconnect2 -_socreate -_sodealloc -_sodelayed_copy -_sodisconnect -_sodropablereq -_sofree -_sogetopt -_sohasoutofband -_soisconnected -_soisconnecting -_soisdisconnected -_soisdisconnecting -_solisten -_sonewconn -_soo_close -_soo_ioctl -_soo_kqfilter -_soo_read -_soo_select -_soo_stat -_soo_write -_soopt_getm -_soopt_mcopyin -_soopt_mcopyout -_sooptcopyin -_sooptcopyout -_sopoll -_soreadable -_soreceive -_soreserve -_sorflush -_sorwakeup -_sosend -_sosendallatonce -_sosetopt -_soshutdown -_sotoxsocket -_sowakeup -_sowriteable -_sowwakeup -_spec_advlock -_spec_badop -_spec_blktooff -_spec_bmap -_spec_close -_spec_cmap -_spec_devblocksize -_spec_ebadf -_spec_fsync -_spec_ioctl +_sock_accept +_sock_bind +_sock_close +_sock_connect +_sock_connectwait +_sock_getpeername +_sock_getsockname +_sock_getsockopt +_sock_gettype +_sock_inject_data_in +_sock_inject_data_out +_sock_ioctl +_sock_isconnected +_sock_isnonblocking +_sock_listen +_sock_nointerrupt +_sock_receive +_sock_receivembuf +_sock_send +_sock_sendmbuf +_sock_setpriv +_sock_setsockopt +_sock_shutdown +_sock_socket +_sockopt_copyin +_sockopt_copyout +_sockopt_direction +_sockopt_level +_sockopt_name +_sockopt_valsize _spec_lookup -_spec_nfsv2nodeop_opv_desc -_spec_nfsv2nodeop_p -_spec_offtoblk _spec_open -_spec_pathconf -_spec_print _spec_read +_spec_write +_spec_ioctl _spec_select +_spec_fsync _spec_strategy -_spec_vnodeop_entries -_spec_vnodeop_opv_desc -_spec_vnodeop_p -_spec_write -_spechash_slock -_speclisth +_spec_close +_spec_pathconf _spl0 _splbio _splclock @@ -2756,7 +532,6 @@ _splhigh _splimp _spllo _spln -_splnet _sploff _splon _splpower @@ -2765,404 +540,21 @@ _splsoftclock _spltty _splvm _splx -_srv -_ss_fltsz -_ss_fltsz_local -_sstk -_startprofclock -_stat -_statfs -_statv -_stf_attach_inet6 -_stf_detach_inet6 -_stf_ioctl -_stf_pre_output -_stf_reg_if_mods -_stf_shutdown -_stfattach -_stop -_stopprofclock _subyte _suibyte _suiword _suser _suword -_swapmap -_swapon -_swdevt -_symlink -_sync -_synthfs_access -_synthfs_adddirentry -_synthfs_cached_lookup -_synthfs_chflags -_synthfs_chmod -_synthfs_chown -_synthfs_create -_synthfs_fhtovp -_synthfs_getattr -_synthfs_inactive -_synthfs_init -_synthfs_islocked -_synthfs_lock -_synthfs_lookup -_synthfs_mkdir -_synthfs_mmap -_synthfs_mount -_synthfs_mount_fs -_synthfs_move_rename_entry -_synthfs_new_directory -_synthfs_new_symlink -_synthfs_open -_synthfs_pathconf -_synthfs_quotactl -_synthfs_readdir -_synthfs_readlink -_synthfs_reclaim -_synthfs_remove -_synthfs_remove_directory -_synthfs_remove_entry -_synthfs_remove_symlink -_synthfs_rename -_synthfs_rmdir -_synthfs_root -_synthfs_select -_synthfs_setattr -_synthfs_setupuio -_synthfs_start -_synthfs_statfs -_synthfs_symlink -_synthfs_sync -_synthfs_sysctl -_synthfs_unlock -_synthfs_unmount -_synthfs_update -_synthfs_vfsops -_synthfs_vget -_synthfs_vnodeop_entries -_synthfs_vnodeop_opv_desc -_synthfs_vnodeop_p -_synthfs_vptofh -_syscallnames _sysctl__children -_sysctl__debug -_sysctl__debug_bpf_bufsize -_sysctl__debug_bpf_maxbufsize _sysctl__debug_children -_sysctl__hw -_sysctl__hw_activecpu -_sysctl__hw_busfrequency -_sysctl__hw_busfrequency_compat -_sysctl__hw_busfrequency_max -_sysctl__hw_busfrequency_min -_sysctl__hw_byteorder -_sysctl__hw_cachelinesize -_sysctl__hw_cachelinesize_compat -_sysctl__hw_children -_sysctl__hw_cpufrequency -_sysctl__hw_cpufrequency_compat -_sysctl__hw_cpufrequency_max -_sysctl__hw_cpufrequency_min -_sysctl__hw_cpusubtype -_sysctl__hw_cputype -_sysctl__hw_epoch -_sysctl__hw_l1dcachesize -_sysctl__hw_l1dcachesize_compat -_sysctl__hw_l1icachesize -_sysctl__hw_l1icachesize_compat -_sysctl__hw_l2cachesize -_sysctl__hw_l2cachesize_compat -_sysctl__hw_l2settings -_sysctl__hw_l3cachesize -_sysctl__hw_l3cachesize_compat -_sysctl__hw_l3settings -_sysctl__hw_machine -_sysctl__hw_memsize -_sysctl__hw_model -_sysctl__hw_ncpu -_sysctl__hw_optional -_sysctl__hw_optional_children -_sysctl__hw_optional_floatingpoint -_sysctl__hw_pagesize -_sysctl__hw_pagesize_compat -_sysctl__hw_physmem -_sysctl__hw_tbfrequency -_sysctl__hw_tbfrequency_compat -_sysctl__hw_usermem -_sysctl__hw_vectorunit -_sysctl__kern _sysctl__kern_children -_sysctl__kern_dummy -_sysctl__kern_ipc -_sysctl__kern_ipc_children -_sysctl__kern_ipc_maxsockbuf -_sysctl__kern_ipc_maxsockets -_sysctl__kern_ipc_nmbclusters -_sysctl__kern_ipc_sockbuf_waste_factor -_sysctl__kern_ipc_somaxconn -_sysctl__kern_ipc_sorecvmincopy -_sysctl__kern_ipc_sosendminchain -_sysctl__kern_maxfilesperproc -_sysctl__kern_maxprocperuid -_sysctl__kern_sysv -_sysctl__kern_sysv_children -_sysctl__kern_sysv_shmall -_sysctl__kern_sysv_shmmax -_sysctl__kern_sysv_shmmin -_sysctl__kern_sysv_shmmni -_sysctl__kern_sysv_shmseg -_sysctl__machdep _sysctl__machdep_children -_sysctl__net _sysctl__net_children -_sysctl__net_inet -_sysctl__net_inet6 -_sysctl__net_inet6_children -_sysctl__net_inet6_icmp6 -_sysctl__net_inet6_icmp6_children -_sysctl__net_inet6_icmp6_errppslimit -_sysctl__net_inet6_icmp6_nd6_debug -_sysctl__net_inet6_icmp6_nd6_delay -_sysctl__net_inet6_icmp6_nd6_maxnudhint -_sysctl__net_inet6_icmp6_nd6_mmaxtries -_sysctl__net_inet6_icmp6_nd6_prune -_sysctl__net_inet6_icmp6_nd6_umaxtries -_sysctl__net_inet6_icmp6_nd6_useloopback -_sysctl__net_inet6_icmp6_nodeinfo -_sysctl__net_inet6_icmp6_rediraccept -_sysctl__net_inet6_icmp6_redirtimeout -_sysctl__net_inet6_icmp6_stats -_sysctl__net_inet6_ip6 -_sysctl__net_inet6_ip6_accept_rtadv -_sysctl__net_inet6_ip6_auto_flowlabel -_sysctl__net_inet6_ip6_auto_linklocal -_sysctl__net_inet6_ip6_children -_sysctl__net_inet6_ip6_dad_count -_sysctl__net_inet6_ip6_defmcasthlim -_sysctl__net_inet6_ip6_forwarding -_sysctl__net_inet6_ip6_gifhlim -_sysctl__net_inet6_ip6_hdrnestlimit -_sysctl__net_inet6_ip6_hlim -_sysctl__net_inet6_ip6_kame_version -_sysctl__net_inet6_ip6_keepfaith -_sysctl__net_inet6_ip6_log_interval -_sysctl__net_inet6_ip6_maxfragpackets -_sysctl__net_inet6_ip6_redirect -_sysctl__net_inet6_ip6_rip6stats -_sysctl__net_inet6_ip6_rr_prune -_sysctl__net_inet6_ip6_rtexpire -_sysctl__net_inet6_ip6_rtmaxcache -_sysctl__net_inet6_ip6_rtminexpire -_sysctl__net_inet6_ip6_stats -_sysctl__net_inet6_ip6_temppltime -_sysctl__net_inet6_ip6_tempvltime -_sysctl__net_inet6_ip6_use_deprecated -_sysctl__net_inet6_ip6_use_tempaddr -_sysctl__net_inet6_ip6_v6only -_sysctl__net_inet6_ipsec6 -_sysctl__net_inet6_ipsec6_ah_net_deflev -_sysctl__net_inet6_ipsec6_ah_trans_deflev -_sysctl__net_inet6_ipsec6_children -_sysctl__net_inet6_ipsec6_debug -_sysctl__net_inet6_ipsec6_def_policy -_sysctl__net_inet6_ipsec6_ecn -_sysctl__net_inet6_ipsec6_esp_net_deflev -_sysctl__net_inet6_ipsec6_esp_randpad -_sysctl__net_inet6_ipsec6_esp_trans_deflev -_sysctl__net_inet6_ipsec6_stats -_sysctl__net_inet6_tcp6 -_sysctl__net_inet6_tcp6_children -_sysctl__net_inet6_udp6 -_sysctl__net_inet6_udp6_children -_sysctl__net_inet_children -_sysctl__net_inet_div -_sysctl__net_inet_div_children -_sysctl__net_inet_icmp -_sysctl__net_inet_icmp_bmcastecho -_sysctl__net_inet_icmp_children -_sysctl__net_inet_icmp_drop_redirect -_sysctl__net_inet_icmp_icmplim -_sysctl__net_inet_icmp_log_redirect -_sysctl__net_inet_icmp_maskrepl -_sysctl__net_inet_icmp_stats -_sysctl__net_inet_igmp -_sysctl__net_inet_igmp_children -_sysctl__net_inet_igmp_stats -_sysctl__net_inet_ip -_sysctl__net_inet_ip_accept_sourceroute -_sysctl__net_inet_ip_check_interface -_sysctl__net_inet_ip_check_route_selfref -_sysctl__net_inet_ip_children -_sysctl__net_inet_ip_fastforwarding -_sysctl__net_inet_ip_forwarding -_sysctl__net_inet_ip_gifttl -_sysctl__net_inet_ip_intr_queue_drops -_sysctl__net_inet_ip_intr_queue_maxlen -_sysctl__net_inet_ip_keepfaith -_sysctl__net_inet_ip_linklocal -_sysctl__net_inet_ip_linklocal_children -_sysctl__net_inet_ip_linklocal_in -_sysctl__net_inet_ip_linklocal_in_allowbadttl -_sysctl__net_inet_ip_linklocal_in_children -_sysctl__net_inet_ip_linklocal_stat -_sysctl__net_inet_ip_maxfragpackets -_sysctl__net_inet_ip_portrange -_sysctl__net_inet_ip_portrange_children -_sysctl__net_inet_ip_portrange_first -_sysctl__net_inet_ip_portrange_hifirst -_sysctl__net_inet_ip_portrange_hilast -_sysctl__net_inet_ip_portrange_last -_sysctl__net_inet_ip_portrange_lowfirst -_sysctl__net_inet_ip_portrange_lowlast -_sysctl__net_inet_ip_redirect -_sysctl__net_inet_ip_rtexpire -_sysctl__net_inet_ip_rtmaxcache -_sysctl__net_inet_ip_rtminexpire -_sysctl__net_inet_ip_sourceroute -_sysctl__net_inet_ip_stats -_sysctl__net_inet_ip_subnets_are_local -_sysctl__net_inet_ip_ttl -_sysctl__net_inet_ip_use_route_genid -_sysctl__net_inet_ipsec -_sysctl__net_inet_ipsec_ah_cleartos -_sysctl__net_inet_ipsec_ah_net_deflev -_sysctl__net_inet_ipsec_ah_offsetmask -_sysctl__net_inet_ipsec_ah_trans_deflev -_sysctl__net_inet_ipsec_bypass -_sysctl__net_inet_ipsec_children -_sysctl__net_inet_ipsec_debug -_sysctl__net_inet_ipsec_def_policy -_sysctl__net_inet_ipsec_dfbit -_sysctl__net_inet_ipsec_ecn -_sysctl__net_inet_ipsec_esp_net_deflev -_sysctl__net_inet_ipsec_esp_port -_sysctl__net_inet_ipsec_esp_randpad -_sysctl__net_inet_ipsec_esp_trans_deflev -_sysctl__net_inet_ipsec_stats -_sysctl__net_inet_raw -_sysctl__net_inet_raw_children -_sysctl__net_inet_raw_maxdgram -_sysctl__net_inet_raw_pcblist -_sysctl__net_inet_raw_recvspace -_sysctl__net_inet_tcp -_sysctl__net_inet_tcp_always_keepalive -_sysctl__net_inet_tcp_blackhole -_sysctl__net_inet_tcp_children -_sysctl__net_inet_tcp_delacktime -_sysctl__net_inet_tcp_delayed_ack -_sysctl__net_inet_tcp_do_tcpdrain -_sysctl__net_inet_tcp_drop_synfin -_sysctl__net_inet_tcp_icmp_may_rst -_sysctl__net_inet_tcp_isn_reseed_interval -_sysctl__net_inet_tcp_keepidle -_sysctl__net_inet_tcp_keepinit -_sysctl__net_inet_tcp_keepintvl -_sysctl__net_inet_tcp_local_slowstart_flightsize -_sysctl__net_inet_tcp_log_in_vain -_sysctl__net_inet_tcp_msl -_sysctl__net_inet_tcp_mssdflt -_sysctl__net_inet_tcp_newreno -_sysctl__net_inet_tcp_path_mtu_discovery -_sysctl__net_inet_tcp_pcbcount -_sysctl__net_inet_tcp_pcblist -_sysctl__net_inet_tcp_recvspace -_sysctl__net_inet_tcp_rfc1323 -_sysctl__net_inet_tcp_rfc1644 -_sysctl__net_inet_tcp_sendspace -_sysctl__net_inet_tcp_slowlink_wsize -_sysctl__net_inet_tcp_slowstart_flightsize -_sysctl__net_inet_tcp_sockthreshold -_sysctl__net_inet_tcp_stats -_sysctl__net_inet_tcp_strict_rfc1948 -_sysctl__net_inet_tcp_tcbhashsize -_sysctl__net_inet_tcp_tcp_lq_overflow -_sysctl__net_inet_tcp_v6mssdflt -_sysctl__net_inet_udp -_sysctl__net_inet_udp_blackhole -_sysctl__net_inet_udp_checksum -_sysctl__net_inet_udp_children -_sysctl__net_inet_udp_log_in_vain -_sysctl__net_inet_udp_maxdgram -_sysctl__net_inet_udp_pcblist -_sysctl__net_inet_udp_recvspace -_sysctl__net_inet_udp_stats -_sysctl__net_key -_sysctl__net_key_ah_keymin -_sysctl__net_key_blockacq_count -_sysctl__net_key_blockacq_lifetime -_sysctl__net_key_children -_sysctl__net_key_debug -_sysctl__net_key_esp_auth -_sysctl__net_key_esp_keymin -_sysctl__net_key_int_random -_sysctl__net_key_larval_lifetime -_sysctl__net_key_natt_keepalive_interval -_sysctl__net_key_prefered_oldsa -_sysctl__net_key_spi_maxval -_sysctl__net_key_spi_minval -_sysctl__net_key_spi_trycnt -_sysctl__net_link -_sysctl__net_link_children -_sysctl__net_link_ether -_sysctl__net_link_ether_children -_sysctl__net_link_ether_inet -_sysctl__net_link_ether_inet_apple_hwcksum_rx -_sysctl__net_link_ether_inet_apple_hwcksum_tx -_sysctl__net_link_ether_inet_children -_sysctl__net_link_ether_inet_host_down_time -_sysctl__net_link_ether_inet_log_arp_wrong_iface -_sysctl__net_link_ether_inet_max_age -_sysctl__net_link_ether_inet_maxtries -_sysctl__net_link_ether_inet_proxyall -_sysctl__net_link_ether_inet_prune_intvl -_sysctl__net_link_ether_inet_useloopback -_sysctl__net_link_generic -_sysctl__net_link_generic_children -_sysctl__net_local -_sysctl__net_local_children -_sysctl__net_local_dgram -_sysctl__net_local_dgram_children -_sysctl__net_local_dgram_maxdgram -_sysctl__net_local_dgram_pcblist -_sysctl__net_local_dgram_recvspace -_sysctl__net_local_inflight -_sysctl__net_local_stream -_sysctl__net_local_stream_children -_sysctl__net_local_stream_pcblist -_sysctl__net_local_stream_recvspace -_sysctl__net_local_stream_sendspace -_sysctl__net_routetable -_sysctl__net_routetable_children -_sysctl__sysctl _sysctl__sysctl_children -_sysctl__sysctl_debug -_sysctl__sysctl_name -_sysctl__sysctl_name2oid -_sysctl__sysctl_name_children -_sysctl__sysctl_next -_sysctl__sysctl_next_children -_sysctl__sysctl_oidfmt -_sysctl__sysctl_oidfmt_children -_sysctl__user -_sysctl__user_children -_sysctl__vfs _sysctl__vfs_children _sysctl__vfs_generic _sysctl__vfs_generic_children -_sysctl__vfs_generic_ctlbyfsid -_sysctl__vfs_generic_ctlbyfsid_children -_sysctl__vfs_generic_nfs -_sysctl__vfs_generic_nfs_children -_sysctl__vfs_generic_nfs_client -_sysctl__vfs_generic_nfs_client_children -_sysctl__vfs_generic_nfs_client_initialdowndelay -_sysctl__vfs_generic_nfs_client_nextdowndelay -_sysctl__vfs_generic_vfsidlist -_sysctl__vm _sysctl__vm_children -_sysctl_clockrate -_sysctl_doproc -_sysctl_file _sysctl_handle_int _sysctl_handle_int2quad _sysctl_handle_long @@ -3171,7 +563,6 @@ _sysctl_handle_quad _sysctl_handle_string _sysctl_int _sysctl_mib_init -_sysctl_procargs _sysctl_quad _sysctl_rdint _sysctl_rdquad @@ -3181,611 +572,238 @@ _sysctl_register_all _sysctl_register_fixed _sysctl_register_oid _sysctl_register_set -_sysctl_set _sysctl_string _sysctl_struct _sysctl_unregister_oid _sysctl_unregister_set -_sysctl_vnode -_sysctlbyname -_sysent -_systemdomain -_systemdomain_init -_tablefull -_task_for_pid -_tbeproc -_tcb -_tcbinfo -_tcp6_ctlinput -_tcp6_input -_tcp6_usrreqs -_tcp_backoff -_tcp_canceltimers -_tcp_ccgen -_tcp_close -_tcp_ctlinput -_tcp_ctloutput -_tcp_delack_enabled -_tcp_delacktime -_tcp_do_newreno -_tcp_drain -_tcp_drop -_tcp_drop_syn_sent -_tcp_fasttimo -_tcp_fillheaders -_tcp_freeq -_tcp_gettaocache -_tcp_init -_tcp_input -_tcp_keepidle -_tcp_keepinit -_tcp_keepintvl -_tcp_lq_overflow -_tcp_maketemplate -_tcp_maxidle -_tcp_maxpersistidle -_tcp_msl -_tcp_mss -_tcp_mssdflt -_tcp_mssopt -_tcp_mtudisc -_tcp_new_isn -_tcp_newtcpcb -_tcp_now -_tcp_output -_tcp_quench -_tcp_recvspace -_tcp_respond -_tcp_rtlookup -_tcp_rtlookup6 -_tcp_sendspace -_tcp_setpersist -_tcp_slowtimo -_tcp_syn_backoff -_tcp_timers -_tcp_usrreqs -_tcp_v6mssdflt -_tcpstat -_temp_msgbuf -_termioschars -_thread_flavor_array -_thread_funnel_get -_thread_funnel_merge -_thread_funnel_set -_thread_funnel_switch -_threadsignal -_tick -_time -_time_wait_slots -_time_zone_slock_init -_timeout +_thread_issignal _timevaladd _timevalfix _timevalsub -_tk_cancc -_tk_nin -_tk_nout -_tk_rawcc -_to_bsd_time -_to_hfs_time -_tprintf -_tprintf_close -_tprintf_open -_tputchar -_trashMemory -_truncate -_tsleep -_tsleep0 -_tsleep1 -_ttioctl -_ttread -_ttrstrt -_ttselect -_ttsetwater -_ttspeedtab -_ttstart -_ttwakeup -_ttwrite -_ttwwakeup -_tty_pgsignal -_ttyblock -_ttychars -_ttycheckoutq -_ttyclose -_ttyflush -_ttyfree -_ttyinfo -_ttyinput -_ttylclose -_ttymalloc -_ttymodem -_ttyopen -_ttyprintf -_ttyselect -_ttysleep -_ttywait _tvtoabstime -_tvtohz -_tz -_tz_slock -_uap _ubc_blktooff -_ubc_clean -_ubc_clearflags _ubc_create_upl _ubc_getcred _ubc_getobject _ubc_getsize -_ubc_hold _ubc_info_deallocate _ubc_info_init _ubc_info_zone -_ubc_invalidate _ubc_isinuse -_ubc_issetflags _ubc_offtoblk _ubc_page_op -_ubc_pushdirty -_ubc_pushdirty_range _ubc_range_op -_ubc_rele -_ubc_release -_ubc_release_named _ubc_setcred -_ubc_setflags -_ubc_setpager _ubc_setsize -_ubc_uncache +_ubc_sync_range _ubc_upl_abort _ubc_upl_abort_range _ubc_upl_commit _ubc_upl_commit_range _ubc_upl_map -_ubc_upl_pageinfo _ubc_upl_unmap -_ucsfncmp -_ucsfntrans -_udb -_udbinfo -_udp6_ctlinput -_udp6_input -_udp6_output -_udp6_recvspace -_udp6_sendspace -_udp6_usrreqs -_udp_ctlinput -_udp_in6 -_udp_init -_udp_input -_udp_ip6 -_udp_notify -_udp_recvspace -_udp_sendspace -_udp_shutdown -_udp_usrreqs -_udpstat -_ufs_access -_ufs_advlock -_ufs_bmap -_ufs_bmaparray -_ufs_check_export -_ufs_checkpath -_ufs_close -_ufs_cmap -_ufs_create -_ufs_dirbad -_ufs_dirbadentry -_ufs_dirempty -_ufs_direnter -_ufs_direnter2 -_ufs_dirremove -_ufs_dirrewrite -_ufs_getattr -_ufs_getlbns -_ufs_ihash_slock -_ufs_ihashget -_ufs_ihashinit -_ufs_ihashins -_ufs_ihashlookup -_ufs_ihashrem -_ufs_inactive -_ufs_init -_ufs_ioctl -_ufs_islocked -_ufs_kqfilt_add -_ufs_link -_ufs_lock -_ufs_lookup -_ufs_makeinode -_ufs_mkdir -_ufs_mknod -_ufs_mmap -_ufs_open -_ufs_pathconf -_ufs_print -_ufs_quotactl -_ufs_readdir -_ufs_readlink -_ufs_reclaim -_ufs_remove -_ufs_rename -_ufs_rmdir -_ufs_root -_ufs_seek -_ufs_select -_ufs_setattr -_ufs_start -_ufs_strategy -_ufs_symlink -_ufs_unlock -_ufs_vfsops -_ufs_vinit -_ufs_whiteout -_ufsfifo_close -_ufsfifo_kqfilt_add -_ufsfifo_read -_ufsfifo_write -_ufsspec_close -_ufsspec_read -_ufsspec_write -_uihash -_uihashtbl +_ubc_upl_pageinfo +_upl_page_present +_upl_dirty_page +_upl_valid_page +_uio_addiov +_uio_create +_uio_curriovbase +_uio_curriovlen +_uio_duplicate +_uio_free +_uio_getiov +_uio_iovcnt +_uio_isuserspace +_uio_offset +_uio_reset +_uio_resid +_uio_rw +_uio_setoffset +_uio_setresid +_uio_setrw +_uio_update _uiomove _uiomove64 -_uipc_usrreqs -_umask -_unblock_procsigmask -_undelete -_unicode_to_hfs -_union_abortop -_union_access -_union_advlock -_union_allocvp -_union_blktooff -_union_bmap -_union_close -_union_cmap -_union_copyfile -_union_copyup -_union_create -_union_dircache -_union_dowhiteout -_union_freevp -_union_fsync -_union_getattr -_union_inactive -_union_init -_union_ioctl -_union_islocked -_union_lease -_union_link -_union_lock -_union_lookup -_union_mkdir -_union_mknod -_union_mkshadow -_union_mkwhiteout -_union_mmap -_union_mount -_union_newlower -_union_newsize -_union_newupper -_union_offtoblk -_union_open -_union_pagein -_union_pageout -_union_pathconf -_union_print -_union_read -_union_readdir -_union_readlink -_union_reclaim -_union_remove -_union_removed_upper -_union_rename -_union_revoke -_union_rmdir -_union_root -_union_seek -_union_select -_union_setattr -_union_start -_union_statfs -_union_strategy -_union_symlink -_union_unlock -_union_unmount -_union_updatevp -_union_vfsops -_union_vn_close -_union_vn_create -_union_vnodeop_entries -_union_vnodeop_opv_desc -_union_vnodeop_p -_union_whiteout -_union_write -_unix_syscall -_unix_syscall_return -_unlink -_unmount -_unp_connect2 -_unp_dispose -_unp_externalize -_unp_init -_unp_zone -_unputc -_unregister_sockfilter -_untimeout -_upl_get_internal_page_list -_uprintf -_ureadc _useracc -_userland_sysctl _utf8_decodestr -_utf8_encodelen _utf8_encodestr -_utf8_to_hfs -_utf8_to_mac_roman -_utf_extrabytes -_utimes -_utrace -_v_putc -_va_null -_vagevp -_vattr_null +_utf8_validatestr _vcount -_vfinddev _vflush -_vfork -_vfork_exit -_vfork_return +_vfs_64bitready +_vfs_addname +_vfs_authopaque +_vfs_authopaqueaccess _vfs_busy +_vfs_clearauthopaque +_vfs_clearauthopaqueaccess +_vfs_clearextendedsecurity +_vfs_clearflags +_vfs_context_create +_vfs_context_is64bit +_vfs_context_issignal +_vfs_context_pid +_vfs_context_proc +_vfs_context_rele +_vfs_context_suser +_vfs_context_ucred +_vfs_devblocksize _vfs_event_init _vfs_event_signal -_vfs_export -_vfs_export_lookup +_vfs_flags +_vfs_fsadd +_vfs_fsprivate +_vfs_fsremove _vfs_getnewfsid _vfs_getvfs +_vfs_ioattr _vfs_init_io_attributes -_vfs_io_attributes +_vfs_isforce +_vfs_isrdonly +_vfs_isrdwr +_vfs_isreload +_vfs_issynchronous +_vfs_isupdate +_vfs_iswriteupgrade +_vfs_iterate +_vfs_maxsymlen _vfs_mountedon _vfs_mountroot +_vfs_name _vfs_nummntops _vfs_op_descs _vfs_op_init _vfs_opv_descs _vfs_opv_init _vfs_opv_numops -_vfs_rootmountalloc +_vfs_removename +_vfs_setauthopaque +_vfs_setauthopaqueaccess +_vfs_setextendedsecurity +_vfs_setflags +_vfs_setfsprivate +_vfs_setioattr +_vfs_setmaxsymlen +_vfs_statfs _vfs_sysctl +_vfs_typenum _vfs_unbusy -_vfsconf -_vfsconf_add -_vfsconf_del -_vfsinit -_vget -_vgone -_vgonel -_vhold -_vinvalbuf -_vm_initial_limit_core -_vm_initial_limit_data -_vm_initial_limit_stack -_vm_sysctl +_vn_access _vn_bwrite -_vn_close _vn_default_error -_vn_lock -_vn_mkdir -_vn_open +_vn_getpath _vn_rdwr -_vn_stat -_vn_symlink -_vn_table -_vn_writechk -_vndevice_init -_vndevice_root_image -_vnode_free_list -_vnode_free_list_slock -_vnode_inactive_list -_vnode_objects_reclaimed -_vnode_pagein -_vnode_pageout -_vnode_pager_get_filesize -_vnode_reclaim_tried +_vn_revoke +_vnode_addfsref +_vnode_authorize +_vnode_authattr +_vnode_authattr_new +_vnode_clearfsnode +_vnode_clearmountedon +_vnode_clearnocache +_vnode_clearnoreadahead +_vnode_close +_vnode_create +_vnode_fsnode +_vnode_get +_vnode_getattr +_vnode_getwithref +_vnode_getwithvid +_vnode_hascleanblks +_vnode_hasdirtyblks +_vnode_isblk +_vnode_ischr +_vnode_isdir +_vnode_islnk +_vnode_isfifo +_vnode_isinuse +_vnode_ismount +_vnode_ismountedon +_vnode_isnocache +_vnode_isnoreadahead +_vnode_isreg +_vnode_issystem +_vnode_isvroot +_vnode_iterate +_vnode_mount +_vnode_mountedhere +_vnode_lookup +_vnode_open +_vnode_put +_vnode_recycle +_vnode_ref +_vnode_rele +_vnode_removefsref +_vnode_setattr +_vnode_setmountedon +_vnode_setnocache +_vnode_setnoreadahead +_vnode_settag +_vnode_specrdev +_vnode_startwrite +_vnode_vfs64bitready +_vnode_vfsisrdonly +_vnode_vfsmaxsymlen +_vnode_vfsname +_vnode_vfstypenum +_vnode_vid +_vnode_vtype +_vnode_waitforwrites +_vnode_writedone _vnodetarget -_vnops -_volfs_access -_volfs_fhtovp -_volfs_getattr -_volfs_init -_volfs_islocked -_volfs_load -_volfs_lock -_volfs_lookup -_volfs_mount -_volfs_pathconf -_volfs_quotactl -_volfs_readdir -_volfs_reclaim -_volfs_rmdir -_volfs_root -_volfs_select -_volfs_start -_volfs_statfs -_volfs_sync -_volfs_sysctl -_volfs_unlock -_volfs_unmount -_volfs_vfsops -_volfs_vget -_volfs_vnodeop_entries -_volfs_vnodeop_opv_desc -_volfs_vnodeop_p -_volfs_vptofh -_vop_abortop_desc -_vop_abortop_vp_offsets -_vop_access_desc -_vop_access_vp_offsets -_vop_advlock_desc -_vop_advlock_vp_offsets -_vop_allocate_desc -_vop_allocate_vp_offsets -_vop_blkatoff_desc -_vop_blkatoff_vp_offsets -_vop_blktooff_desc -_vop_blktooff_vp_offsets -_vop_bmap_desc -_vop_bmap_vp_offsets -_vop_bwrite_desc -_vop_bwrite_vp_offsets -_vop_cachedlookup_desc -_vop_cachedlookup_vp_offsets -_vop_close_desc -_vop_close_vp_offsets -_vop_cmap_desc -_vop_cmap_vp_offsets -_vop_copyfile_desc -_vop_copyfile_vp_offsets -_vop_create_desc -_vop_create_vp_offsets -_vop_default_desc -_vop_devblocksize_desc -_vop_devblocksize_vp_offsets -_vop_exchange_desc -_vop_exchange_vp_offsets -_vop_fsync_desc -_vop_fsync_vp_offsets -_vop_getattr_desc -_vop_getattr_vp_offsets -_vop_getattrlist_desc -_vop_getattrlist_vp_offsets -_vop_inactive_desc -_vop_inactive_vp_offsets -_vop_ioctl_desc -_vop_ioctl_vp_offsets -_vop_islocked_desc -_vop_islocked_vp_offsets -_vop_kqfilt_add_desc -_vop_kqfilt_add_vp_offsets -_vop_kqfilt_remove_desc -_vop_kqfilt_remove_vp_offsets -_vop_lease_desc -_vop_lease_vp_offsets -_vop_link_desc -_vop_link_vp_offsets -_vop_lock_desc -_vop_lock_vp_offsets -_vop_lookup_desc -_vop_lookup_vp_offsets -_vop_mkcomplex_desc -_vop_mkcomplex_vp_offsets -_vop_mkdir_desc -_vop_mkdir_vp_offsets -_vop_mknod_desc -_vop_mknod_vp_offsets -_vop_mmap_desc -_vop_mmap_vp_offsets -_vop_noislocked -_vop_nolock -_vop_nounlock -_vop_offtoblk_desc -_vop_offtoblk_vp_offsets -_vop_open_desc -_vop_open_vp_offsets -_vop_pagein_desc -_vop_pagein_vp_offsets -_vop_pageout_desc -_vop_pageout_vp_offsets -_vop_pathconf_desc -_vop_pathconf_vp_offsets -_vop_pgrd_desc -_vop_pgrd_vp_offsets -_vop_pgwr_desc -_vop_pgwr_vp_offsets -_vop_print_desc -_vop_print_vp_offsets -_vop_read_desc -_vop_read_vp_offsets -_vop_readdir_desc -_vop_readdir_vp_offsets -_vop_readdirattr_desc -_vop_readdirattr_vp_offsets -_vop_readlink_desc -_vop_readlink_vp_offsets -_vop_reallocblks_desc -_vop_reallocblks_vp_offsets -_vop_reclaim_desc -_vop_reclaim_vp_offsets -_vop_remove_desc -_vop_remove_vp_offsets -_vop_rename_desc -_vop_rename_vp_offsets -_vop_revoke -_vop_revoke_desc -_vop_revoke_vp_offsets -_vop_rmdir_desc -_vop_rmdir_vp_offsets -_vop_searchfs_desc -_vop_searchfs_vp_offsets -_vop_seek_desc -_vop_seek_vp_offsets -_vop_select_desc -_vop_select_vp_offsets -_vop_setattr_desc -_vop_setattr_vp_offsets -_vop_setattrlist_desc -_vop_setattrlist_vp_offsets -_vop_strategy_desc -_vop_strategy_vp_offsets -_vop_symlink_desc -_vop_symlink_vp_offsets -_vop_truncate_desc -_vop_truncate_vp_offsets -_vop_unlock_desc -_vop_unlock_vp_offsets -_vop_update_desc -_vop_update_vp_offsets -_vop_valloc_desc -_vop_valloc_vp_offsets -_vop_vfree_desc -_vop_vfree_vp_offsets -_vop_whiteout_desc -_vop_whiteout_vp_offsets -_vop_write_desc -_vop_write_vp_offsets -_vp_pagein -_vp_pgoclean -_vp_pgodirty -_vprint -_vput -_vpwakeup -_vrecycle -_vref -_vrele +_vnop_access_desc +_vnop_advlock_desc +_vnop_allocate_desc +_vnop_blktooff_desc +_vnop_blockmap_desc +_vnop_bwrite_desc +_vnop_close_desc +_vnop_copyfile_desc +_vnop_create_desc +_vnop_default_desc +_vnop_exchange_desc +_vnop_fsync_desc +_vnop_getattr_desc +_vnop_getattrlist_desc +_vnop_getxattr_desc +_vnop_inactive_desc +_vnop_ioctl_desc +_vnop_link_desc +_vnop_listxattr_desc +_vnop_lookup_desc +_vnop_mkdir_desc +_vnop_mknod_desc +_vnop_mmap_desc +_vnop_mnomap_desc +_vnop_offtoblk_desc +_vnop_open_desc +_vnop_pagein_desc +_vnop_pageout_desc +_vnop_pathconf_desc +_vnop_read_desc +_vnop_readdir_desc +_vnop_readdirattr_desc +_vnop_readlink_desc +_vnop_reclaim_desc +_vnop_remove_desc +_vnop_removexattr_desc +_vnop_rename_desc +_vnop_revoke_desc +_vnop_rmdir_desc +_vnop_searchfs_desc +_vnop_select_desc +_vnop_setattr_desc +_vnop_setattrlist_desc +_vnop_setxattr_desc +_vnop_strategy_desc +_vnop_symlink_desc +_vnop_whiteout_desc +_vnop_write_desc _vslock -_vsnprintf -_vsprintf _vsunlock _vttoif_tab -_vwakeup -_wait1 -_wait1continue -_wait4 -_waitevent -_waittime _wakeup _wakeup_one -_walk_allvnodes -_walk_vnodes_debug -_watchevent -_write -_writev -_ws_disabled -_zError -_z_errmsg -_zeroin6_addr -_zeroin_addr -_zlibVersion -_zombproc - +_xattr_protected +_xattr_validatename diff --git a/config/BSDKernel.ppc.exports b/config/BSDKernel.ppc.exports index 195e5f977..e69de29bb 100644 --- a/config/BSDKernel.ppc.exports +++ b/config/BSDKernel.ppc.exports @@ -1,489 +0,0 @@ -_AARPwakeup -_ASPgetmsg -_ASPputmsg -_ATPgetreq -_ATPgetrsp -_ATPsndreq -_ATPsndrsp -_ATgetmsg -_ATputmsg -_ATsocket -_AURPaccess -_AURPcleanup -_AURPcmdx -_AURPfreemsg -_AURPgetmsg -_AURPgetri -_AURPinit -_AURPiocack -_AURPiocnak -_AURPpurgeri -_AURPrcvOpenReq -_AURPrcvOpenRsp -_AURPrcvRDReq -_AURPrcvRIAck -_AURPrcvRIReq -_AURPrcvRIRsp -_AURPrcvRIUpd -_AURPrcvTickle -_AURPrcvTickleAck -_AURPrcvZReq -_AURPrcvZRsp -_AURPrtupdate -_AURPsend -_AURPsetri -_AURPshutdown -_AURPsndGDZL -_AURPsndGZN -_AURPsndOpenReq -_AURPsndOpenReq_funnel -_AURPsndRDReq -_AURPsndRIAck -_AURPsndRIReq -_AURPsndRIReq_funnel -_AURPsndRIRsp_funnel -_AURPsndRIUpd -_AURPsndRIUpd_funnel -_AURPsndTickle -_AURPsndZReq -_AURPsndZRsp -_AURPupdate -_AURPupdateri -_AbortIO -_AdspBad -_CalcRecvWdw -_CalcSendQFree -_CheckAttn -_CheckOkToClose -_CheckReadQueue -_CheckRecvSeq -_CheckSend -_CleanupGlobals -_CompleteQueue -_DDP_chksum_on -_DDP_slfsnd_on -_DoClose -_DoTimerElem -_ErrorRTMPoverflow -_ErrorZIPoverflow -_FillSendQueue -_FindSender -_InitGlobals -_InsertTimerElem -_NextCID -_NotifyUser -_RT_maxentry -_RT_table -_RT_table_freelist -_RT_table_start -_RXAttention -_RXData -_RXFReset -_RXFResetAck -_RemoveCCB -_RemoveTimerElem -_RouterError -_RouterMix -_RxClose -_SndMsgUp -_TimerQueueTick -_TimerStop -_TimerTick -_TimerTick_funnel -_TrashSession -_UrgentUser -_ZIPwakeup -_ZT_maxentry -_ZT_table -__ATPgetreq -__ATPgetrsp -__ATPsndreq -__ATPsndrsp -__ATclose -__ATgetmsg -__ATioctl -__ATkqfilter -__ATputmsg -__ATread -__ATrw -__ATselect -__ATsocket -__ATwrite -_aarp_chk_addr -_aarp_init1 -_aarp_init2 -_aarp_rcv_pkt -_aarp_sched_probe -_aarp_send_data -_aarp_table -_abs -_add_ddp_handler -_adspAllocateCCB -_adspAssignSocket -_adspAttention -_adspCLDeny -_adspCLListen -_adspClose -_adspDeassignSocket -_adspGlobal -_adspInit -_adspInited -_adspMode -_adspNewCID -_adspOpen -_adspOptions -_adspPacket -_adspRead -_adspReadAttention -_adspReadHandler -_adspRelease -_adspReset -_adspStatus -_adspWrite -_adspWriteHandler -_adsp_close -_adsp_dequeue_ccb -_adsp_input -_adsp_inputC -_adsp_inputQ -_adsp_open -_adsp_pidM -_adsp_readable -_adsp_rput -_adsp_sendddp -_adsp_window -_adsp_wput -_adsp_writeable -_adspall_lock -_adspgen_lock -_adspioc_ack -_adsptmr_lock -_append_copy -_appletalk_hack_start -_appletalk_inited -_arpinp_lock -_asp_ack_reply -_asp_clock -_asp_clock_funnel -_asp_close -_asp_init -_asp_inpC -_asp_nak_reply -_asp_open -_asp_pack_bdsp -_asp_readable -_asp_scbQ -_asp_wput -_aspall_lock -_asptmo_lock -_at_control -_at_ddp_brt -_at_ddp_stats -_at_ifQueueHd -_at_insert -_at_interfaces -_at_ioctl -_at_memzone_init -_at_pcballoc -_at_pcbbind -_at_pcbdetach -_at_reg_mcast -_at_state -_at_unreg_mcast -_atalk_closeref -_atalk_enablew -_atalk_flush -_atalk_getref -_atalk_gettrace -_atalk_load -_atalk_notify -_atalk_notify_sel -_atalk_openref -_atalk_peek -_atalk_post_msg -_atalk_putnext -_atalk_settrace -_atalk_to_ip -_atalk_unload -_atalkdomain -_atalkintr -_atalkintrq -_atalksw -_atp_bind -_atp_build_release -_atp_cancel_req -_atp_close -_atp_delete_free_clusters -_atp_dequeue_atp -_atp_drop_req -_atp_free -_atp_free_cluster_list -_atp_free_cluster_timeout_set -_atp_free_list -_atp_init -_atp_inited -_atp_input -_atp_inputQ -_atp_iocack -_atp_iocnak -_atp_link -_atp_lomask -_atp_mask -_atp_need_rel -_atp_open -_atp_pidM -_atp_rcb_alloc -_atp_rcb_data -_atp_rcb_free -_atp_rcb_free_list -_atp_rcb_timer -_atp_reply -_atp_req_ind -_atp_req_timeout -_atp_resource_m -_atp_retry_req -_atp_rput -_atp_rsp_ind -_atp_send -_atp_send_replies -_atp_send_req -_atp_send_rsp -_atp_state_data -_atp_tid -_atp_timout -_atp_trans_abort -_atp_trans_alloc -_atp_trans_free -_atp_trans_free_list -_atp_treq_event -_atp_trp_clock -_atp_trp_clock_funnel -_atp_unlink -_atp_untimout -_atp_used_list -_atp_wput -_atp_x_done -_atp_x_done_funnel -_atpall_lock -_atpcb_zone -_atpgen_lock -_atptmo_lock -_attachData -_aurp_close -_aurp_global -_aurp_gref -_aurp_ifID -_aurp_open -_aurp_state -_aurp_wakeup -_aurp_wput -_aurpd_start -_aurpgen_lock -_calcRecvQ -_calcSendQ -_ccb_used_list -_completepb -_cons_getc -_cons_putc -_consclose -_consioctl -_consopen -_consread -_consselect -_conswrite -_copy_pkt -_dbgBits -_ddp_AURPfuncx -_ddp_AURPsendx -_ddp_add_if -_ddp_adjmsg -_ddp_age_router -_ddp_bit_reverse -_ddp_brt_init -_ddp_brt_shutdown -_ddp_brt_sweep -_ddp_brt_sweep_funnel -_ddp_brt_sweep_timer -_ddp_checksum -_ddp_compress_msg -_ddp_ctloutput -_ddp_glean -_ddp_growmsg -_ddp_handler -_ddp_head -_ddp_init -_ddp_input -_ddp_notify_nbp -_ddp_output -_ddp_pru_abort -_ddp_pru_attach -_ddp_pru_bind -_ddp_pru_connect -_ddp_pru_control -_ddp_pru_detach -_ddp_pru_disconnect -_ddp_pru_peeraddr -_ddp_pru_send -_ddp_pru_shutdown -_ddp_pru_sockaddr -_ddp_putmsg -_ddp_recvspace -_ddp_rem_if -_ddp_router_output -_ddp_sendspace -_ddp_shutdown -_ddp_slowtimo -_ddp_socket_inuse -_ddp_start -_ddp_usrreqs -_ddpall_lock -_ddpinp_lock -_dst_addr_cnt -_elap_dataput -_elap_offline -_elap_online3 -_elap_wput -_ep_input -_errstr -_et_zeroaddr -_etalk_multicast_addr -_find_ifID -_forUs -_gbuf_freel -_gbuf_linkpkt -_gbuf_strip -_getIfUsage -_getLocalZone -_getNbpTable -_getNbpTableSize -_getPhysAddrSize -_getRTRLocalZone -_getRtmpTable -_getRtmpTableSize -_getSPLocalZone -_getZipTable -_getZipTableSize -_getchar -_gets -_gref_alloc -_gref_close -_gref_wput -_ifID_home -_ifID_table -_init_ddp_handler -_ioc_ack -_lap_online -_m_clattach -_m_lgbuf_alloc -_m_lgbuf_free -_name_registry -_nbp_add_multicast -_nbp_delete_entry -_nbp_fillin_nve -_nbp_find_nve -_nbp_input -_nbp_mh_reg -_nbp_new_nve_entry -_nbp_shutdown -_nbp_strhash -_net_access -_net_access_cnt -_net_export -_net_port -_no_of_nets_tried -_no_of_nodes_tried -_nve_lock -_ot_ddp_check_socket -_pat_output -_pktsDropped -_pktsHome -_pktsIn -_pktsOut -_ppc_gettimeofday -_prep_ZIP_reply_packet -_probe_cb -_qAddToEnd -_qfind_m -_rcv_connection_id -_reboot_how -_refall_lock -_regDefaultZone -_releaseData -_routerStart -_router_added -_router_killed -_routershutdown -_routing_needed -_rt_bdelete -_rt_binsert -_rt_blookup -_rt_delete -_rt_getNextRoute -_rt_insert -_rt_show -_rt_sortedshow -_rt_table_init -_rtmp_dropper -_rtmp_init -_rtmp_input -_rtmp_prep_new_packet -_rtmp_purge -_rtmp_r_find_bridge -_rtmp_router_input -_rtmp_router_start -_rtmp_send_port -_rtmp_send_port_funnel -_rtmp_shutdown -_rtmp_timeout -_scb_free_list -_scb_resource_m -_scb_used_list -_setLocalZones -_sethzonehash -_sip_input -_snmpFlags -_snmpStats -_sys_ATPgetreq -_sys_ATPgetrsp -_sys_ATPsndreq -_sys_ATPsndrsp -_sys_ATgetmsg -_sys_ATputmsg -_sys_ATsocket -_sysctl__net_appletalk -_sysctl__net_appletalk_children -_sysctl__net_appletalk_ddpstats -_sysctl__net_appletalk_debug -_sysctl__net_appletalk_routermix -_trackrouter -_trackrouter_rem_if -_trp_tmo_rcb -_ttalk_multicast_addr -_update_tmo -_upshift8 -_uwritec -_xpatcnt -_xsum_assym -_zip_control -_zip_handle_getmyzone -_zip_prep_query_packet -_zip_reply_received -_zip_reply_to_getlocalzones -_zip_reply_to_getzonelist -_zip_router_input -_zip_sched_getnetinfo -_zip_send_queries -_zip_type_packet -_zonename_equal -_zt_add_zone -_zt_add_zonename -_zt_clr_zmap -_zt_compute_hash -_zt_ent_zcount -_zt_ent_zindex -_zt_find_zname -_zt_getNextZone -_zt_get_zmcast -_zt_remove_zones -_zt_set_zmap -_zt_upper_zname diff --git a/config/IOKit.exports b/config/IOKit.exports index 19c59a9e3..4729bf597 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -26,7 +26,14 @@ _IOKitResetTime _IOLibInit _IOLockAlloc _IOLockFree +_IOLockGetMachLock _IOLockInitWithState +_IOLockLock:_lck_mtx_lock +_IOLockSleep +_IOLockSleepDeadline +_IOLockTryLock:_lck_mtx_try_lock +_IOLockUnlock:_lck_mtx_unlock +_IOLockWakeup _IOLog _IOMalloc _IOMallocAligned @@ -54,8 +61,13 @@ _IOPanic _IOPrintPlane _IORWLockAlloc _IORWLockFree +_IORWLockGetMachLock +_IORWLockRead:_lck_rw_lock_shared +_IORWLockUnlock:_lck_rw_done +_IORWLockWrite:_lck_rw_lock_exclusive _IORecursiveLockAlloc _IORecursiveLockFree +_IORecursiveLockGetMachLock _IORecursiveLockHaveLock _IORecursiveLockLock _IORecursiveLockSleep @@ -65,7 +77,11 @@ _IORecursiveLockWakeup _IOSetProcessorCacheMode _IOSimpleLockAlloc _IOSimpleLockFree +_IOSimpleLockGetMachLock _IOSimpleLockInit +_IOSimpleLockLock:_lck_spin_lock +_IOSimpleLockTryLock:_lck_spin_try_lock +_IOSimpleLockUnlock:_lck_spin_unlock _IOSizeToAlignment _IOSleep _IOSpinUnlock @@ -81,19 +97,18 @@ _PEGetPlatformEpoch _PEHaltRestart _PESavePanicInfo _PESetGMTTimeOfDay +_PE_call_timebase_callback _PE_cpu_halt _PE_cpu_machine_init _PE_cpu_machine_quiesce _PE_cpu_signal _PE_cpu_start -_PE_call_timebase_callback _PE_enter_debugger _PE_halt_restart _PE_parse_boot_arg _PE_poll_input _StartIOKit __Z10tellClientP8OSObjectPv -__Z16IOCPUSleepKernelv __Z16IODTFindSlotNameP15IORegistryEntrym __Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E __Z17IODTGetCellCountsP15IORegistryEntryPmS1_ @@ -149,7 +164,6 @@ __ZN10IOWorkLoop10wakeupGateEPvb __ZN10IOWorkLoop12tryCloseGateEv __ZN10IOWorkLoop13_maintRequestEPvS0_S0_S0_ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource -__ZN10IOWorkLoop16launchThreadMainEPv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv __ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev @@ -159,7 +173,6 @@ __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev -__ZN10IOWorkLoop22threadMainContinuationEv __ZN10IOWorkLoop4freeEv __ZN10IOWorkLoop4initEv __ZN10IOWorkLoop8openGateEv @@ -317,6 +330,7 @@ __ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem __ZN12IOUserClient24registerNotificationPortEP8ipc_portmm __ZN12IOUserClient25getExternalMethodForIndexEm __ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor __ZN12IOUserClient30getExternalAsyncMethodForIndexEm __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem __ZN12IOUserClient4freeEv @@ -517,7 +531,6 @@ __ZN14IOPMrootDomain26handleSleepTimerExpirationEv __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv __ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain27registerPMSettingControllerEPFiiiPvES0_ -__ZN14IOPMrootDomain29registerPlatformPowerProfilesEP7OSArray __ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev @@ -574,18 +587,6 @@ __ZN15IOPMPowerSourceC2EPK11OSMetaClass __ZN15IOPMPowerSourceC2Ev __ZN15IOPMPowerSourceD0Ev __ZN15IOPMPowerSourceD2Ev -__ZN15IOPanicPlatform10gMetaClassE -__ZN15IOPanicPlatform10superClassE -__ZN15IOPanicPlatform5startEP9IOService -__ZN15IOPanicPlatform9MetaClassC1Ev -__ZN15IOPanicPlatform9MetaClassC2Ev -__ZN15IOPanicPlatform9metaClassE -__ZN15IOPanicPlatformC1EPK11OSMetaClass -__ZN15IOPanicPlatformC1Ev -__ZN15IOPanicPlatformC2EPK11OSMetaClass -__ZN15IOPanicPlatformC2Ev -__ZN15IOPanicPlatformD0Ev -__ZN15IOPanicPlatformD2Ev __ZN15IORegistryEntry10gMetaClassE __ZN15IORegistryEntry10initializeEv __ZN15IORegistryEntry10superClassE @@ -612,9 +613,9 @@ __ZN15IORegistryEntry15getRegistryRootEv __ZN15IORegistryEntry16detachFromParentEPS_PK15IORegistryPlane __ZN15IORegistryEntry16setPropertyTableEP12OSDictionary __ZN15IORegistryEntry17matchPathLocationEPKcPK15IORegistryPlane +__ZN15IORegistryEntry17runPropertyActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ __ZN15IORegistryEntry18getGenerationCountEv __ZN15IORegistryEntry21getChildFromComponentEPPKcPK15IORegistryPlane -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry5Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev @@ -670,22 +671,6 @@ __ZN15IORegistryPlaneC2EPK11OSMetaClass __ZN15IORegistryPlaneC2Ev __ZN15IORegistryPlaneD0Ev __ZN15IORegistryPlaneD2Ev -__ZN15IOWatchDogTimer10gMetaClassE -__ZN15IOWatchDogTimer10superClassE -__ZN15IOWatchDogTimer13setPropertiesEP8OSObject -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev -__ZN15IOWatchDogTimer4stopEP9IOService -__ZN15IOWatchDogTimer5startEP9IOService -__ZN15IOWatchDogTimer9MetaClassC1Ev -__ZN15IOWatchDogTimer9MetaClassC2Ev -__ZN15IOWatchDogTimer9metaClassE -__ZN15IOWatchDogTimerC1EPK11OSMetaClass -__ZN15IOWatchDogTimerC2EPK11OSMetaClass -__ZN15IOWatchDogTimerD0Ev -__ZN15IOWatchDogTimerD2Ev __ZN15_IOConfigThread10gMetaClassE __ZN15_IOConfigThread10superClassE __ZN15_IOConfigThread12configThreadEv @@ -713,22 +698,6 @@ __ZN16IOKitDiagnosticsC2EPK11OSMetaClass __ZN16IOKitDiagnosticsC2Ev __ZN16IOKitDiagnosticsD0Ev __ZN16IOKitDiagnosticsD2Ev -__ZN16IOPMPagingPlexus10gMetaClassE -__ZN16IOPMPagingPlexus10superClassE -__ZN16IOPMPagingPlexus12findProviderEP9IOService -__ZN16IOPMPagingPlexus15processChildrenEv -__ZN16IOPMPagingPlexus15processSiblingsEP9IOService -__ZN16IOPMPagingPlexus17setAggressivenessEmm -__ZN16IOPMPagingPlexus5startEP9IOService -__ZN16IOPMPagingPlexus9MetaClassC1Ev -__ZN16IOPMPagingPlexus9MetaClassC2Ev -__ZN16IOPMPagingPlexus9metaClassE -__ZN16IOPMPagingPlexusC1EPK11OSMetaClass -__ZN16IOPMPagingPlexusC1Ev -__ZN16IOPMPagingPlexusC2EPK11OSMetaClass -__ZN16IOPMPagingPlexusC2Ev -__ZN16IOPMPagingPlexusD0Ev -__ZN16IOPMPagingPlexusD2Ev __ZN16IOPMinformeeList10gMetaClassE __ZN16IOPMinformeeList10initializeEv __ZN16IOPMinformeeList10nextInListEP12IOPMinformee @@ -748,81 +717,6 @@ __ZN16IOPMinformeeListC2EPK11OSMetaClass __ZN16IOPMinformeeListC2Ev __ZN16IOPMinformeeListD0Ev __ZN16IOPMinformeeListD2Ev -__ZN16IOPlatformDevice10gMetaClassE -__ZN16IOPlatformDevice10superClassE -__ZN16IOPlatformDevice12getResourcesEv -__ZN16IOPlatformDevice13matchLocationEP9IOService -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev -__ZN16IOPlatformDevice9MetaClassC1Ev -__ZN16IOPlatformDevice9MetaClassC2Ev -__ZN16IOPlatformDevice9metaClassE -__ZN16IOPlatformDeviceC1EPK11OSMetaClass -__ZN16IOPlatformDeviceC1Ev -__ZN16IOPlatformDeviceC2EPK11OSMetaClass -__ZN16IOPlatformDeviceC2Ev -__ZN16IOPlatformDeviceD0Ev -__ZN16IOPlatformDeviceD2Ev -__ZN16IOPlatformExpert10gMetaClassE -__ZN16IOPlatformExpert10superClassE -__ZN16IOPlatformExpert11haltRestartEj -__ZN16IOPlatformExpert11sleepKernelEv -__ZN16IOPlatformExpert12CheckSubTreeEP7OSArrayP9IOServiceS3_P12OSDictionary -__ZN16IOPlatformExpert12getModelNameEPci -__ZN16IOPlatformExpert12hasPMFeatureEm -__ZN16IOPlatformExpert13savePanicInfoEPhm -__ZN16IOPlatformExpert14getBootROMTypeEv -__ZN16IOPlatformExpert14getChipSetTypeEv -__ZN16IOPlatformExpert14getConsoleInfoEP8PE_Video -__ZN16IOPlatformExpert14getMachineNameEPci -__ZN16IOPlatformExpert14getMachineTypeEv -__ZN16IOPlatformExpert14setBootROMTypeEl -__ZN16IOPlatformExpert14setChipSetTypeEl -__ZN16IOPlatformExpert14setConsoleInfoEP8PE_Videoj -__ZN16IOPlatformExpert14setMachineTypeEl -__ZN16IOPlatformExpert15getGMTTimeOfDayEv -__ZN16IOPlatformExpert15getNubResourcesEP9IOService -__ZN16IOPlatformExpert15setGMTTimeOfDayEl -__ZN16IOPlatformExpert16PMRegisterDeviceEP9IOServiceS1_ -__ZN16IOPlatformExpert16atInterruptLevelEv -__ZN16IOPlatformExpert16hasPrivPMFeatureEm -__ZN16IOPlatformExpert20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_ -__ZN16IOPlatformExpert21RegisterServiceInTreeEP9IOServiceP12OSDictionaryS3_S1_ -__ZN16IOPlatformExpert21numBatteriesSupportedEv -__ZN16IOPlatformExpert21platformAdjustServiceEP9IOService -__ZN16IOPlatformExpert23registerNVRAMControllerEP17IONVRAMController -__ZN16IOPlatformExpert25PMInstantiatePowerDomainsEv -__ZN16IOPlatformExpert25getPhysicalRangeAllocatorEv -__ZN16IOPlatformExpert25lookUpInterruptControllerEP8OSSymbol -__ZN16IOPlatformExpert25setCPUInterruptPropertiesEP9IOService -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert2Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert3Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert4Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert5Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert6Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert7Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert8Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert9Ev -__ZN16IOPlatformExpert27_RESERVEDIOPlatformExpert10Ev -__ZN16IOPlatformExpert27_RESERVEDIOPlatformExpert11Ev -__ZN16IOPlatformExpert27registerInterruptControllerEP8OSSymbolP21IOInterruptController -__ZN16IOPlatformExpert30createSystemSerialNumberStringEP6OSData -__ZN16IOPlatformExpert5PMLogEPKcmmm -__ZN16IOPlatformExpert5startEP9IOService -__ZN16IOPlatformExpert6attachEP9IOService -__ZN16IOPlatformExpert9MetaClassC1Ev -__ZN16IOPlatformExpert9MetaClassC2Ev -__ZN16IOPlatformExpert9configureEP9IOService -__ZN16IOPlatformExpert9createNubEP12OSDictionary -__ZN16IOPlatformExpert9metaClassE -__ZN16IOPlatformExpertC1EPK11OSMetaClass -__ZN16IOPlatformExpertC1Ev -__ZN16IOPlatformExpertC2EPK11OSMetaClass -__ZN16IOPlatformExpertC2Ev -__ZN16IOPlatformExpertD0Ev -__ZN16IOPlatformExpertD2Ev __ZN16IORangeAllocator10deallocateEmm __ZN16IORangeAllocator10gMetaClassE __ZN16IORangeAllocator10superClassE @@ -860,6 +754,26 @@ __ZN17IOBigMemoryCursorC2EPK11OSMetaClass __ZN17IOBigMemoryCursorC2Ev __ZN17IOBigMemoryCursorD0Ev __ZN17IOBigMemoryCursorD2Ev +__ZN17IOPolledInterface10gMetaClassE +__ZN17IOPolledInterface15checkAllForWorkEv +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev +__ZN17IOPolledInterfaceC2EPK11OSMetaClass +__ZN17IOPolledInterfaceD2Ev __ZN17IOPowerConnection10gMetaClassE __ZN17IOPowerConnection10superClassE __ZN17IOPowerConnection14getAwaitingAckEv @@ -885,42 +799,6 @@ __ZN17IOPowerConnectionC2EPK11OSMetaClass __ZN17IOPowerConnectionC2Ev __ZN17IOPowerConnectionD0Ev __ZN17IOPowerConnectionD2Ev -__ZN18IODTPlatformExpert10createNubsEP9IOServiceP10OSIterator -__ZN18IODTPlatformExpert10gMetaClassE -__ZN18IODTPlatformExpert10superClassE -__ZN18IODTPlatformExpert10writeXPRAMEmPhm -__ZN18IODTPlatformExpert11haltRestartEj -__ZN18IODTPlatformExpert12getModelNameEPci -__ZN18IODTPlatformExpert13savePanicInfoEPhm -__ZN18IODTPlatformExpert14getMachineNameEPci -__ZN18IODTPlatformExpert15getNubResourcesEP9IOService -__ZN18IODTPlatformExpert15processTopLevelEP15IORegistryEntry -__ZN18IODTPlatformExpert17readNVRAMPropertyEP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN18IODTPlatformExpert18getNVRAMPartitionsEv -__ZN18IODTPlatformExpert18readNVRAMPartitionEPK8OSSymbolmPhm -__ZN18IODTPlatformExpert18writeNVRAMPropertyEP15IORegistryEntryPK8OSSymbolP6OSData -__ZN18IODTPlatformExpert19writeNVRAMPartitionEPK8OSSymbolmPhm -__ZN18IODTPlatformExpert23registerNVRAMControllerEP17IONVRAMController -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert0Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert1Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert2Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert3Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert4Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert5Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert6Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert7Ev -__ZN18IODTPlatformExpert30createSystemSerialNumberStringEP6OSData -__ZN18IODTPlatformExpert5probeEP9IOServicePl -__ZN18IODTPlatformExpert9MetaClassC1Ev -__ZN18IODTPlatformExpert9MetaClassC2Ev -__ZN18IODTPlatformExpert9configureEP9IOService -__ZN18IODTPlatformExpert9createNubEP15IORegistryEntry -__ZN18IODTPlatformExpert9metaClassE -__ZN18IODTPlatformExpert9readXPRAMEmPhm -__ZN18IODTPlatformExpertC1EPK11OSMetaClass -__ZN18IODTPlatformExpertC2EPK11OSMetaClass -__ZN18IODTPlatformExpertD0Ev -__ZN18IODTPlatformExpertD2Ev __ZN18IOMemoryDescriptor10addMappingEP11IOMemoryMap __ZN18IOMemoryDescriptor10gMetaClassE __ZN18IOMemoryDescriptor10initializeEv @@ -933,16 +811,16 @@ __ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm __ZN18IOMemoryDescriptor11withAddressEPvm11IODirection __ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task __ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor12setPurgeableEmPm __ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection __ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap __ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper __ZN18IOMemoryDescriptor16getSourceSegmentEmPm +__ZN18IOMemoryDescriptor16performOperationEmmm __ZN18IOMemoryDescriptor18getPhysicalAddressEv __ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor3Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor4Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor5Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev @@ -954,6 +832,7 @@ __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev +__ZN18IOMemoryDescriptor30withPersistentMemoryDescriptorEPS_ __ZN18IOMemoryDescriptor3mapEP4taskjmmm __ZN18IOMemoryDescriptor3mapEm __ZN18IOMemoryDescriptor4freeEv @@ -1025,6 +904,7 @@ __ZN18IOTimerEventSource10superClassE __ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide __ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec __ZN18IOTimerEventSource10wakeAtTimeEmm +__ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop __ZN18IOTimerEventSource12checkForWorkEv __ZN18IOTimerEventSource12setTimeoutMSEm __ZN18IOTimerEventSource12setTimeoutUSEm @@ -1197,15 +1077,18 @@ __ZN21IOSubMemoryDescriptor10superClassE __ZN21IOSubMemoryDescriptor10writeBytesEmPKvm __ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm __ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection +__ZN21IOSubMemoryDescriptor12setPurgeableEmPm __ZN21IOSubMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb __ZN21IOSubMemoryDescriptor15initWithAddressEPvm11IODirection __ZN21IOSubMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm +__ZN21IOSubMemoryDescriptor16performOperationEmmm __ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm __ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN21IOSubMemoryDescriptor4freeEv +__ZN21IOSubMemoryDescriptor5doMapEP6vm_mapPjmmm __ZN21IOSubMemoryDescriptor7prepareE11IODirection __ZN21IOSubMemoryDescriptor8completeE11IODirection __ZN21IOSubMemoryDescriptor8redirectEP4taskb @@ -1247,23 +1130,6 @@ __ZN22IOInterruptEventSourceC2EPK11OSMetaClass __ZN22IOInterruptEventSourceC2Ev __ZN22IOInterruptEventSourceD0Ev __ZN22IOInterruptEventSourceD2Ev -__ZN22IOPlatformExpertDevice10gMetaClassE -__ZN22IOPlatformExpertDevice10superClassE -__ZN22IOPlatformExpertDevice12initWithArgsEPvS0_S0_S0_ -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice0Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice1Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice2Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice3Ev -__ZN22IOPlatformExpertDevice4freeEv -__ZN22IOPlatformExpertDevice9MetaClassC1Ev -__ZN22IOPlatformExpertDevice9MetaClassC2Ev -__ZN22IOPlatformExpertDevice9metaClassE -__ZN22IOPlatformExpertDeviceC1EPK11OSMetaClass -__ZN22IOPlatformExpertDeviceC1Ev -__ZN22IOPlatformExpertDeviceC2EPK11OSMetaClass -__ZN22IOPlatformExpertDeviceC2Ev -__ZN22IOPlatformExpertDeviceD0Ev -__ZN22IOPlatformExpertDeviceD2Ev __ZN22_IOOpenServiceIterator10gMetaClassE __ZN22_IOOpenServiceIterator10superClassE __ZN22_IOOpenServiceIterator13getNextObjectEv @@ -1350,33 +1216,6 @@ __ZN24IOBufferMemoryDescriptorC2EPK11OSMetaClass __ZN24IOBufferMemoryDescriptorC2Ev __ZN24IOBufferMemoryDescriptorD0Ev __ZN24IOBufferMemoryDescriptorD2Ev -__ZN24IOCPUInterruptController10gMetaClassE -__ZN24IOCPUInterruptController10superClassE -__ZN24IOCPUInterruptController14causeInterruptEP9IOServicei -__ZN24IOCPUInterruptController15enableInterruptEP9IOServicei -__ZN24IOCPUInterruptController15handleInterruptEPvP9IOServicei -__ZN24IOCPUInterruptController16disableInterruptEP9IOServicei -__ZN24IOCPUInterruptController16getInterruptTypeEP9IOServiceiPi -__ZN24IOCPUInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_ -__ZN24IOCPUInterruptController18enableCPUInterruptEP5IOCPU -__ZN24IOCPUInterruptController25setCPUInterruptPropertiesEP9IOService -__ZN24IOCPUInterruptController26initCPUInterruptControllerEi -__ZN24IOCPUInterruptController30registerCPUInterruptControllerEv -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController0Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController1Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController2Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController3Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController4Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController5Ev -__ZN24IOCPUInterruptController9MetaClassC1Ev -__ZN24IOCPUInterruptController9MetaClassC2Ev -__ZN24IOCPUInterruptController9metaClassE -__ZN24IOCPUInterruptControllerC1EPK11OSMetaClass -__ZN24IOCPUInterruptControllerC1Ev -__ZN24IOCPUInterruptControllerC2EPK11OSMetaClass -__ZN24IOCPUInterruptControllerC2Ev -__ZN24IOCPUInterruptControllerD0Ev -__ZN24IOCPUInterruptControllerD2Ev __ZN25IOGeneralMemoryDescriptor10gMetaClassE __ZN25IOGeneralMemoryDescriptor10superClassE __ZN25IOGeneralMemoryDescriptor11setPositionEm @@ -1406,7 +1245,6 @@ __ZN25IOGeneralMemoryDescriptorC2EPK11OSMetaClass __ZN25IOGeneralMemoryDescriptorC2Ev __ZN25IOGeneralMemoryDescriptorD0Ev __ZN25IOGeneralMemoryDescriptorD2Ev -__ZNK25IOGeneralMemoryDescriptor12getBackingIDEv __ZN25IOServiceUserNotification10gMetaClassE __ZN25IOServiceUserNotification10superClassE __ZN25IOServiceUserNotification13getNextObjectEv @@ -1504,35 +1342,6 @@ __ZN32IOServiceMessageUserNotificationC2EPK11OSMetaClass __ZN32IOServiceMessageUserNotificationC2Ev __ZN32IOServiceMessageUserNotificationD0Ev __ZN32IOServiceMessageUserNotificationD2Ev -__ZN5IOCPU10gMetaClassE -__ZN5IOCPU10superClassE -__ZN5IOCPU11getCPUGroupEv -__ZN5IOCPU11getCPUStateEv -__ZN5IOCPU11setCPUStateEm -__ZN5IOCPU12getCPUNumberEv -__ZN5IOCPU12setCPUNumberEm -__ZN5IOCPU13setPropertiesEP8OSObject -__ZN5IOCPU15_RESERVEDIOCPU0Ev -__ZN5IOCPU15_RESERVEDIOCPU1Ev -__ZN5IOCPU15_RESERVEDIOCPU2Ev -__ZN5IOCPU15_RESERVEDIOCPU3Ev -__ZN5IOCPU15_RESERVEDIOCPU4Ev -__ZN5IOCPU15_RESERVEDIOCPU5Ev -__ZN5IOCPU15_RESERVEDIOCPU6Ev -__ZN5IOCPU15_RESERVEDIOCPU7Ev -__ZN5IOCPU15getCPUGroupSizeEv -__ZN5IOCPU16getMachProcessorEv -__ZN5IOCPU17enableCPUTimeBaseEb -__ZN5IOCPU5startEP9IOService -__ZN5IOCPU8initCPUsEv -__ZN5IOCPU9MetaClassC1Ev -__ZN5IOCPU9MetaClassC2Ev -__ZN5IOCPU9metaClassE -__ZN5IOCPU9signalCPUEPS_ -__ZN5IOCPUC1EPK11OSMetaClass -__ZN5IOCPUC2EPK11OSMetaClass -__ZN5IOCPUD0Ev -__ZN5IOCPUD2Ev __ZN8IOMapper10allocTableEm __ZN8IOMapper10gMetaClassE __ZN8IOMapper10iovmInsertEjmP13upl_page_infom @@ -1618,46 +1427,6 @@ __ZN9IOCommandC1EPK11OSMetaClass __ZN9IOCommandC2EPK11OSMetaClass __ZN9IOCommandD0Ev __ZN9IOCommandD2Ev -__ZN9IODTNVRAM10gMetaClassE -__ZN9IODTNVRAM10superClassE -__ZN9IODTNVRAM10writeXPRAMEmPhm -__ZN9IODTNVRAM11setPropertyEPK8OSSymbolP8OSObject -__ZN9IODTNVRAM13savePanicInfoEPhm -__ZN9IODTNVRAM13setPropertiesEP8OSObject -__ZN9IODTNVRAM15initOFVariablesEv -__ZN9IODTNVRAM15syncOFVariablesEv -__ZN9IODTNVRAM16escapeDataToDataEP6OSData -__ZN9IODTNVRAM16updateOWBootArgsEPK8OSSymbolP8OSObject -__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ -__ZN9IODTNVRAM17readNVRAMPropertyEP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM18generateOWChecksumEPh -__ZN9IODTNVRAM18getNVRAMPartitionsEv -__ZN9IODTNVRAM18readNVRAMPartitionEPK8OSSymbolmPhm -__ZN9IODTNVRAM18validateOWChecksumEPh -__ZN9IODTNVRAM18writeNVRAMPropertyEP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject -__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject -__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm -__ZN9IODTNVRAM19unescapeBytesToDataEPhm -__ZN9IODTNVRAM19writeNVRAMPartitionEPK8OSSymbolmPhm -__ZN9IODTNVRAM22readNVRAMPropertyType0EP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM22readNVRAMPropertyType1EP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM23registerNVRAMControllerEP17IONVRAMController -__ZN9IODTNVRAM23writeNVRAMPropertyType0EP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM23writeNVRAMPropertyType1EP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM26calculatePartitionChecksumEPh -__ZN9IODTNVRAM4initEP15IORegistryEntryPK15IORegistryPlane -__ZN9IODTNVRAM4syncEv -__ZN9IODTNVRAM9MetaClassC1Ev -__ZN9IODTNVRAM9MetaClassC2Ev -__ZN9IODTNVRAM9metaClassE -__ZN9IODTNVRAM9readXPRAMEmPhm -__ZN9IODTNVRAMC1EPK11OSMetaClass -__ZN9IODTNVRAMC1Ev -__ZN9IODTNVRAMC2EPK11OSMetaClass -__ZN9IODTNVRAMC2Ev -__ZN9IODTNVRAMD0Ev -__ZN9IODTNVRAMD2Ev __ZN9IOService10actionStopEPS_S0_ __ZN9IOService10adjustBusyEl __ZN9IOService10ask_parentEm @@ -1785,7 +1554,7 @@ __ZN9IOService18matchPropertyTableEP12OSDictionary __ZN9IOService18matchPropertyTableEP12OSDictionaryPl __ZN9IOService18setIdleTimerPeriodEm __ZN9IOService18settleTimerExpiredEv -__ZN9IOService19_RESERVEDIOService3Ev +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService19_RESERVEDIOService4Ev __ZN9IOService19_RESERVEDIOService5Ev __ZN9IOService19_RESERVEDIOService6Ev @@ -2010,8 +1779,6 @@ __ZNK15IOConditionLock16getInterruptibleEv __ZNK15IOConditionLock9MetaClass5allocEv __ZNK15IOPMPowerSource12getMetaClassEv __ZNK15IOPMPowerSource9MetaClass5allocEv -__ZNK15IOPanicPlatform12getMetaClassEv -__ZNK15IOPanicPlatform9MetaClass5allocEv __ZNK15IORegistryEntry11compareNameEP8OSStringPS1_ __ZNK15IORegistryEntry11getLocationEPK15IORegistryPlane __ZNK15IORegistryEntry11getPropertyEPK8OSString @@ -2057,23 +1824,13 @@ __ZNK15IORegistryEntry9breakLinkEPS_jPK15IORegistryPlane __ZNK15IORegistryPlane12getMetaClassEv __ZNK15IORegistryPlane9MetaClass5allocEv __ZNK15IORegistryPlane9serializeEP11OSSerialize -__ZNK15IOWatchDogTimer12getMetaClassEv -__ZNK15IOWatchDogTimer9MetaClass5allocEv __ZNK15_IOConfigThread12getMetaClassEv __ZNK15_IOConfigThread9MetaClass5allocEv __ZNK16IOKitDiagnostics12getMetaClassEv __ZNK16IOKitDiagnostics9MetaClass5allocEv __ZNK16IOKitDiagnostics9serializeEP11OSSerialize -__ZNK16IOPMPagingPlexus12getMetaClassEv -__ZNK16IOPMPagingPlexus9MetaClass5allocEv __ZNK16IOPMinformeeList12getMetaClassEv __ZNK16IOPMinformeeList9MetaClass5allocEv -__ZNK16IOPlatformDevice11compareNameEP8OSStringPS1_ -__ZNK16IOPlatformDevice12getMetaClassEv -__ZNK16IOPlatformDevice9MetaClass5allocEv -__ZNK16IOPlatformExpert12getMetaClassEv -__ZNK16IOPlatformExpert14compareNubNameEPK9IOServiceP8OSStringPS4_ -__ZNK16IOPlatformExpert9MetaClass5allocEv __ZNK16IORangeAllocator12getMetaClassEv __ZNK16IORangeAllocator9MetaClass5allocEv __ZNK16IORangeAllocator9serializeEP11OSSerialize @@ -2081,9 +1838,6 @@ __ZNK17IOBigMemoryCursor12getMetaClassEv __ZNK17IOBigMemoryCursor9MetaClass5allocEv __ZNK17IOPowerConnection12getMetaClassEv __ZNK17IOPowerConnection9MetaClass5allocEv -__ZNK18IODTPlatformExpert12getMetaClassEv -__ZNK18IODTPlatformExpert14compareNubNameEPK9IOServiceP8OSStringPS4_ -__ZNK18IODTPlatformExpert9MetaClass5allocEv __ZNK18IOMemoryDescriptor12getDirectionEv __ZNK18IOMemoryDescriptor12getMetaClassEv __ZNK18IOMemoryDescriptor9MetaClass5allocEv @@ -2118,10 +1872,6 @@ __ZNK22IOInterruptEventSource11getProviderEv __ZNK22IOInterruptEventSource12getMetaClassEv __ZNK22IOInterruptEventSource14getAutoDisableEv __ZNK22IOInterruptEventSource9MetaClass5allocEv -__ZNK22IOPlatformExpertDevice11compareNameEP8OSStringPS1_ -__ZNK22IOPlatformExpertDevice11getWorkLoopEv -__ZNK22IOPlatformExpertDevice12getMetaClassEv -__ZNK22IOPlatformExpertDevice9MetaClass5allocEv __ZNK22_IOOpenServiceIterator12getMetaClassEv __ZNK22_IOOpenServiceIterator9MetaClass5allocEv __ZNK23IOMultiMemoryDescriptor12getMetaClassEv @@ -2129,8 +1879,6 @@ __ZNK23IOMultiMemoryDescriptor9MetaClass5allocEv __ZNK24IOBufferMemoryDescriptor11getCapacityEv __ZNK24IOBufferMemoryDescriptor12getMetaClassEv __ZNK24IOBufferMemoryDescriptor9MetaClass5allocEv -__ZNK24IOCPUInterruptController12getMetaClassEv -__ZNK24IOCPUInterruptController9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor12getMetaClassEv __ZNK25IOGeneralMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor9serializeEP11OSSerialize @@ -2145,8 +1893,6 @@ __ZNK28IOFilterInterruptEventSource15getFilterActionEv __ZNK28IOFilterInterruptEventSource9MetaClass5allocEv __ZNK32IOServiceMessageUserNotification12getMetaClassEv __ZNK32IOServiceMessageUserNotification9MetaClass5allocEv -__ZNK5IOCPU12getMetaClassEv -__ZNK5IOCPU9MetaClass5allocEv __ZNK8IOMapper12getMetaClassEv __ZNK8IOMapper9MetaClass5allocEv __ZNK8IOPMpriv12getMetaClassEv @@ -2159,13 +1905,6 @@ __ZNK8IOSyncer12getMetaClassEv __ZNK8IOSyncer9MetaClass5allocEv __ZNK9IOCommand12getMetaClassEv __ZNK9IOCommand9MetaClass5allocEv -__ZNK9IODTNVRAM11getPropertyEPK8OSSymbol -__ZNK9IODTNVRAM11getPropertyEPKc -__ZNK9IODTNVRAM12getMetaClassEv -__ZNK9IODTNVRAM17getOFVariablePermEPK8OSSymbol -__ZNK9IODTNVRAM17getOFVariableTypeEPK8OSSymbol -__ZNK9IODTNVRAM19serializePropertiesEP11OSSerialize -__ZNK9IODTNVRAM9MetaClass5allocEv __ZNK9IOService10isInactiveEv __ZNK9IOService11getProviderEv __ZNK9IOService11getWorkLoopEv @@ -2200,20 +1939,15 @@ __ZTV14IOMemoryCursor __ZTV14IOPMrootDomain __ZTV15IOConditionLock __ZTV15IOPMPowerSource -__ZTV15IOPanicPlatform __ZTV15IORegistryEntry __ZTV15IORegistryPlane -__ZTV15IOWatchDogTimer __ZTV15_IOConfigThread __ZTV16IOKitDiagnostics -__ZTV16IOPMPagingPlexus __ZTV16IOPMinformeeList -__ZTV16IOPlatformDevice -__ZTV16IOPlatformExpert __ZTV16IORangeAllocator __ZTV17IOBigMemoryCursor +__ZTV17IOPolledInterface __ZTV17IOPowerConnection -__ZTV18IODTPlatformExpert __ZTV18IOMemoryDescriptor __ZTV18IOPMchangeNoteList __ZTV18IORegistryIterator @@ -2228,24 +1962,20 @@ __ZTV21IOInterruptController __ZTV21IONaturalMemoryCursor __ZTV21IOSubMemoryDescriptor __ZTV22IOInterruptEventSource -__ZTV22IOPlatformExpertDevice __ZTV22_IOOpenServiceIterator __ZTV23IOMultiMemoryDescriptor __ZTV24IOBufferMemoryDescriptor -__ZTV24IOCPUInterruptController __ZTV25IOGeneralMemoryDescriptor __ZTV25IOServiceUserNotification __ZTV26_IOServiceInterestNotifier __ZTV27IOSharedInterruptController __ZTV28IOFilterInterruptEventSource __ZTV32IOServiceMessageUserNotification -__ZTV5IOCPU __ZTV8IOMapper __ZTV8IOPMpriv __ZTV8IOPMprot __ZTV8IOSyncer __ZTV9IOCommand -__ZTV9IODTNVRAM __ZTV9IOService __ZTVN10IOMachPort9MetaClassE __ZTVN10IONotifier9MetaClassE @@ -2267,20 +1997,14 @@ __ZTVN14IOMemoryCursor9MetaClassE __ZTVN14IOPMrootDomain9MetaClassE __ZTVN15IOConditionLock9MetaClassE __ZTVN15IOPMPowerSource9MetaClassE -__ZTVN15IOPanicPlatform9MetaClassE __ZTVN15IORegistryEntry9MetaClassE __ZTVN15IORegistryPlane9MetaClassE -__ZTVN15IOWatchDogTimer9MetaClassE __ZTVN15_IOConfigThread9MetaClassE __ZTVN16IOKitDiagnostics9MetaClassE -__ZTVN16IOPMPagingPlexus9MetaClassE __ZTVN16IOPMinformeeList9MetaClassE -__ZTVN16IOPlatformDevice9MetaClassE -__ZTVN16IOPlatformExpert9MetaClassE __ZTVN16IORangeAllocator9MetaClassE __ZTVN17IOBigMemoryCursor9MetaClassE __ZTVN17IOPowerConnection9MetaClassE -__ZTVN18IODTPlatformExpert9MetaClassE __ZTVN18IOMemoryDescriptor9MetaClassE __ZTVN18IOPMchangeNoteList9MetaClassE __ZTVN18IORegistryIterator9MetaClassE @@ -2295,24 +2019,20 @@ __ZTVN21IOInterruptController9MetaClassE __ZTVN21IONaturalMemoryCursor9MetaClassE __ZTVN21IOSubMemoryDescriptor9MetaClassE __ZTVN22IOInterruptEventSource9MetaClassE -__ZTVN22IOPlatformExpertDevice9MetaClassE __ZTVN22_IOOpenServiceIterator9MetaClassE __ZTVN23IOMultiMemoryDescriptor9MetaClassE __ZTVN24IOBufferMemoryDescriptor9MetaClassE -__ZTVN24IOCPUInterruptController9MetaClassE __ZTVN25IOGeneralMemoryDescriptor9MetaClassE __ZTVN25IOServiceUserNotification9MetaClassE __ZTVN26_IOServiceInterestNotifier9MetaClassE __ZTVN27IOSharedInterruptController9MetaClassE __ZTVN28IOFilterInterruptEventSource9MetaClassE __ZTVN32IOServiceMessageUserNotification9MetaClassE -__ZTVN5IOCPU9MetaClassE __ZTVN8IOMapper9MetaClassE __ZTVN8IOPMpriv9MetaClassE __ZTVN8IOPMprot9MetaClassE __ZTVN8IOSyncer9MetaClassE __ZTVN9IOCommand9MetaClassE -__ZTVN9IODTNVRAM9MetaClassE __ZTVN9IOService9MetaClassE __giDebugLogDataInternal __giDebugLogInternal @@ -2389,3 +2109,6 @@ _gIOTerminatedNotification _gIOUserClientClassKey _gOFVariables _gPlatformInterruptControllerName +_registerPrioritySleepWakeInterest +_registerSleepWakeInterest +_vetoSleepWakeNotification diff --git a/config/IOKit.ppc.exports b/config/IOKit.ppc.exports index fcfff58c6..e69de29bb 100644 --- a/config/IOKit.ppc.exports +++ b/config/IOKit.ppc.exports @@ -1,184 +0,0 @@ -_CallTVector -__Z11IODBDMAStopPV23IODBDMAChannelRegisters -__Z12IODBDMAFlushPV23IODBDMAChannelRegisters -__Z12IODBDMAPausePV23IODBDMAChannelRegisters -__Z12IODBDMAResetPV23IODBDMAChannelRegisters -__Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor -__Z14RootRegisteredP8OSObjectPvP9IOService -__Z15IODBDMAContinuePV23IODBDMAChannelRegisters -__Z32IOFreePhysicallyContiguousMemoryPjj -__Z36IOAllocatePhysicallyContiguousMemoryjjPjPm -__ZN10AppleMacIO10deleteListEv -__ZN10AppleMacIO10gMetaClassE -__ZN10AppleMacIO10processNubEP9IOService -__ZN10AppleMacIO10superClassE -__ZN10AppleMacIO11excludeListEv -__ZN10AppleMacIO12publishBelowEP15IORegistryEntry -__ZN10AppleMacIO15getNubResourcesEP9IOService -__ZN10AppleMacIO20_RESERVEDAppleMacIO0Ev -__ZN10AppleMacIO20_RESERVEDAppleMacIO1Ev -__ZN10AppleMacIO20_RESERVEDAppleMacIO2Ev -__ZN10AppleMacIO20_RESERVEDAppleMacIO3Ev -__ZN10AppleMacIO5startEP9IOService -__ZN10AppleMacIO8selfTestEv -__ZN10AppleMacIO9MetaClassC1Ev -__ZN10AppleMacIO9MetaClassC2Ev -__ZN10AppleMacIO9createNubEP15IORegistryEntry -__ZN10AppleMacIO9metaClassE -__ZN10AppleMacIOC1EPK11OSMetaClass -__ZN10AppleMacIOC2EPK11OSMetaClass -__ZN10AppleMacIOD0Ev -__ZN10AppleMacIOD2Ev -__ZN10AppleNVRAM10gMetaClassE -__ZN10AppleNVRAM10superClassE -__ZN10AppleNVRAM4readEmPhm -__ZN10AppleNVRAM5startEP9IOService -__ZN10AppleNVRAM5writeEmPhm -__ZN10AppleNVRAM9MetaClassC1Ev -__ZN10AppleNVRAM9MetaClassC2Ev -__ZN10AppleNVRAM9metaClassE -__ZN10AppleNVRAMC1EPK11OSMetaClass -__ZN10AppleNVRAMC1Ev -__ZN10AppleNVRAMC2EPK11OSMetaClass -__ZN10AppleNVRAMC2Ev -__ZN10AppleNVRAMD0Ev -__ZN10AppleNVRAMD2Ev -__ZN16AppleMacIODevice10gMetaClassE -__ZN16AppleMacIODevice10superClassE -__ZN16AppleMacIODevice12getResourcesEv -__ZN16AppleMacIODevice13matchLocationEP9IOService -__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice0Ev -__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice1Ev -__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice2Ev -__ZN16AppleMacIODevice26_RESERVEDAppleMacIODevice3Ev -__ZN16AppleMacIODevice9MetaClassC1Ev -__ZN16AppleMacIODevice9MetaClassC2Ev -__ZN16AppleMacIODevice9metaClassE -__ZN16AppleMacIODeviceC1EPK11OSMetaClass -__ZN16AppleMacIODeviceC1Ev -__ZN16AppleMacIODeviceC2EPK11OSMetaClass -__ZN16AppleMacIODeviceC2Ev -__ZN16AppleMacIODeviceD0Ev -__ZN16AppleMacIODeviceD2Ev -__ZN17IONVRAMController10gMetaClassE -__ZN17IONVRAMController10superClassE -__ZN17IONVRAMController4syncEv -__ZN17IONVRAMController5startEP9IOService -__ZN17IONVRAMController9MetaClassC1Ev -__ZN17IONVRAMController9MetaClassC2Ev -__ZN17IONVRAMController9metaClassE -__ZN17IONVRAMControllerC1EPK11OSMetaClass -__ZN17IONVRAMControllerC2EPK11OSMetaClass -__ZN17IONVRAMControllerD0Ev -__ZN17IONVRAMControllerD2Ev -__ZN19ApplePlatformExpert10deleteListEv -__ZN19ApplePlatformExpert10gMetaClassE -__ZN19ApplePlatformExpert10superClassE -__ZN19ApplePlatformExpert11excludeListEv -__ZN19ApplePlatformExpert14getMachineNameEPci -__ZN19ApplePlatformExpert15getGMTTimeOfDayEv -__ZN19ApplePlatformExpert15setGMTTimeOfDayEl -__ZN19ApplePlatformExpert23registerNVRAMControllerEP17IONVRAMController -__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert0Ev -__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert1Ev -__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert2Ev -__ZN19ApplePlatformExpert29_RESERVEDApplePlatformExpert3Ev -__ZN19ApplePlatformExpert5startEP9IOService -__ZN19ApplePlatformExpert9MetaClassC1Ev -__ZN19ApplePlatformExpert9MetaClassC2Ev -__ZN19ApplePlatformExpert9configureEP9IOService -__ZN19ApplePlatformExpert9metaClassE -__ZN19ApplePlatformExpertC1EPK11OSMetaClass -__ZN19ApplePlatformExpertC2EPK11OSMetaClass -__ZN19ApplePlatformExpertD0Ev -__ZN19ApplePlatformExpertD2Ev -__ZN19IODBDMAMemoryCursor10gMetaClassE -__ZN19IODBDMAMemoryCursor10superClassE -__ZN19IODBDMAMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN19IODBDMAMemoryCursor17withSpecificationEmmm -__ZN19IODBDMAMemoryCursor21initWithSpecificationEmmm -__ZN19IODBDMAMemoryCursor9MetaClassC1Ev -__ZN19IODBDMAMemoryCursor9MetaClassC2Ev -__ZN19IODBDMAMemoryCursor9metaClassE -__ZN19IODBDMAMemoryCursorC1EPK11OSMetaClass -__ZN19IODBDMAMemoryCursorC1Ev -__ZN19IODBDMAMemoryCursorC2EPK11OSMetaClass -__ZN19IODBDMAMemoryCursorC2Ev -__ZN19IODBDMAMemoryCursorD0Ev -__ZN19IODBDMAMemoryCursorD2Ev -__ZN8AppleCPU10gMetaClassE -__ZN8AppleCPU10getCPUNameEv -__ZN8AppleCPU10quiesceCPUEv -__ZN8AppleCPU10superClassE -__ZN8AppleCPU5startEP9IOService -__ZN8AppleCPU7haltCPUEv -__ZN8AppleCPU7initCPUEb -__ZN8AppleCPU8startCPUEjj -__ZN8AppleCPU9MetaClassC1Ev -__ZN8AppleCPU9MetaClassC2Ev -__ZN8AppleCPU9metaClassE -__ZN8AppleCPUC1EPK11OSMetaClass -__ZN8AppleCPUC1Ev -__ZN8AppleCPUC2EPK11OSMetaClass -__ZN8AppleCPUC2Ev -__ZN8AppleCPUD0Ev -__ZN8AppleCPUD2Ev -__ZN8AppleNMI10gMetaClassE -__ZN8AppleNMI10superClassE -__ZN8AppleNMI15handleInterruptEPvP9IOServicei -__ZN8AppleNMI18_RESERVEDAppleNMI0Ev -__ZN8AppleNMI18_RESERVEDAppleNMI1Ev -__ZN8AppleNMI18_RESERVEDAppleNMI2Ev -__ZN8AppleNMI18_RESERVEDAppleNMI3Ev -__ZN8AppleNMI22powerStateWillChangeToEmmP9IOService -__ZN8AppleNMI5startEP9IOService -__ZN8AppleNMI7initNMIEP21IOInterruptControllerP6OSData -__ZN8AppleNMI9MetaClassC1Ev -__ZN8AppleNMI9MetaClassC2Ev -__ZN8AppleNMI9metaClassE -__ZN8AppleNMIC1EPK11OSMetaClass -__ZN8AppleNMIC1Ev -__ZN8AppleNMIC2EPK11OSMetaClass -__ZN8AppleNMIC2Ev -__ZN8AppleNMID0Ev -__ZN8AppleNMID2Ev -__ZNK10AppleMacIO12getMetaClassEv -__ZNK10AppleMacIO14compareNubNameEPK9IOServiceP8OSStringPS4_ -__ZNK10AppleMacIO9MetaClass5allocEv -__ZNK10AppleNVRAM12getMetaClassEv -__ZNK10AppleNVRAM9MetaClass5allocEv -__ZNK16AppleMacIODevice11compareNameEP8OSStringPS1_ -__ZNK16AppleMacIODevice12getMetaClassEv -__ZNK16AppleMacIODevice9MetaClass5allocEv -__ZNK17IONVRAMController12getMetaClassEv -__ZNK17IONVRAMController9MetaClass5allocEv -__ZNK19ApplePlatformExpert12getMetaClassEv -__ZNK19ApplePlatformExpert9MetaClass5allocEv -__ZNK19IODBDMAMemoryCursor12getMetaClassEv -__ZNK19IODBDMAMemoryCursor9MetaClass5allocEv -__ZNK8AppleCPU12getMetaClassEv -__ZNK8AppleCPU9MetaClass5allocEv -__ZNK8AppleNMI12getMetaClassEv -__ZNK8AppleNMI9MetaClass5allocEv -__ZTV10AppleMacIO -__ZTV10AppleNVRAM -__ZTV16AppleMacIODevice -__ZTV17IONVRAMController -__ZTV19ApplePlatformExpert -__ZTV19IODBDMAMemoryCursor -__ZTV8AppleCPU -__ZTV8AppleNMI -__ZTVN10AppleMacIO9MetaClassE -__ZTVN10AppleNVRAM9MetaClassE -__ZTVN16AppleMacIODevice9MetaClassE -__ZTVN17IONVRAMController9MetaClassE -__ZTVN19ApplePlatformExpert9MetaClassE -__ZTVN19IODBDMAMemoryCursor9MetaClassE -__ZTVN8AppleCPU9MetaClassE -__ZTVN8AppleNMI9MetaClassE -__eSynchronizeIO -_gGetDefaultBusSpeedsKey -_PE_Determine_Clock_Speeds -_PE_read_write_time_of_day -_PE_write_IIC - diff --git a/config/IPFirewall.kext/Info.plist b/config/IPFirewall.kext/Info.plist new file mode 100644 index 000000000..7f415cdbd --- /dev/null +++ b/config/IPFirewall.kext/Info.plist @@ -0,0 +1,26 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleGetInfoString + IP network firewall + CFBundleIdentifier + com.apple.nke.IPFirewall + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + IP Firewall + CFBundlePackageType + KEXT + CFBundleShortVersionString + 2.0 + CFBundleSignature + ipfw + CFBundleVersion + 2.0 + OSKernelResource + + + diff --git a/config/Libkern.exports b/config/Libkern.exports index 263d70407..eeb7bfb60 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -1,6 +1,8 @@ +_Assert _OSAddAtomic _OSAddAtomic16 _OSAddAtomic8 +_OSBacktrace _OSBitAndAtomic _OSBitAndAtomic16 _OSBitAndAtomic8 @@ -16,9 +18,16 @@ _OSDecrementAtomic16 _OSDecrementAtomic8 _OSDequeueAtomic _OSEnqueueAtomic +_OSFree _OSIncrementAtomic _OSIncrementAtomic16 _OSIncrementAtomic8 +_OSMalloc +_OSMalloc_Tagalloc +_OSMalloc_Tagfree +_OSMalloc_noblock +_OSMalloc_nowait +_OSReportWithBacktrace _OSRuntimeFinalizeCPP _OSRuntimeInitializeCPP _OSRuntimeUnloadCPP @@ -107,9 +116,10 @@ __ZN11OSSerializeC2Ev __ZN11OSSerializeD0Ev __ZN11OSSerializeD2Ev __ZN12OSCollection10gMetaClassE +__ZN12OSCollection10setOptionsEjjPv __ZN12OSCollection10superClassE -__ZN12OSCollection22_RESERVEDOSCollection0Ev -__ZN12OSCollection22_RESERVEDOSCollection1Ev +__ZN12OSCollection11haveUpdatedEv +__ZN12OSCollection14copyCollectionEP12OSDictionary __ZN12OSCollection22_RESERVEDOSCollection2Ev __ZN12OSCollection22_RESERVEDOSCollection3Ev __ZN12OSCollection22_RESERVEDOSCollection4Ev @@ -125,6 +135,7 @@ __ZN12OSCollectionC2EPK11OSMetaClass __ZN12OSCollectionD0Ev __ZN12OSCollectionD2Ev __ZN12OSDictionary10gMetaClassE +__ZN12OSDictionary10setOptionsEjjPv __ZN12OSDictionary10superClassE __ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSStringjj __ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSSymboljj @@ -132,6 +143,7 @@ __ZN12OSDictionary12removeObjectEPK8OSString __ZN12OSDictionary12removeObjectEPK8OSSymbol __ZN12OSDictionary12removeObjectEPKc __ZN12OSDictionary12withCapacityEj +__ZN12OSDictionary14copyCollectionEPS_ __ZN12OSDictionary14ensureCapacityEj __ZN12OSDictionary14withDictionaryEPKS_j __ZN12OSDictionary15flushCollectionEv @@ -163,11 +175,13 @@ __ZN12OSDictionaryC2Ev __ZN12OSDictionaryD0Ev __ZN12OSDictionaryD2Ev __ZN12OSOrderedSet10gMetaClassE +__ZN12OSOrderedSet10setOptionsEjjPv __ZN12OSOrderedSet10superClassE __ZN12OSOrderedSet11orderObjectEPK15OSMetaClassBase __ZN12OSOrderedSet12removeObjectEPK15OSMetaClassBase __ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ __ZN12OSOrderedSet13setLastObjectEPK15OSMetaClassBase +__ZN12OSOrderedSet14copyCollectionEP12OSDictionary __ZN12OSOrderedSet14ensureCapacityEj __ZN12OSOrderedSet14getOrderingRefEv __ZN12OSOrderedSet14setFirstObjectEPK15OSMetaClassBase @@ -252,12 +266,14 @@ __ZN20OSCollectionIteratorC2Ev __ZN20OSCollectionIteratorD0Ev __ZN20OSCollectionIteratorD2Ev __ZN5OSSet10gMetaClassE +__ZN5OSSet10setOptionsEjjPv __ZN5OSSet10superClassE __ZN5OSSet11initWithSetEPKS_j __ZN5OSSet11withObjectsEPPK8OSObjectjj __ZN5OSSet12removeObjectEPK15OSMetaClassBase __ZN5OSSet12withCapacityEj __ZN5OSSet13initWithArrayEPK7OSArrayj +__ZN5OSSet14copyCollectionEP12OSDictionary __ZN5OSSet14ensureCapacityEj __ZN5OSSet15_RESERVEDOSSet0Ev __ZN5OSSet15_RESERVEDOSSet1Ev @@ -322,12 +338,14 @@ __ZN6OSDataC2Ev __ZN6OSDataD0Ev __ZN6OSDataD2Ev __ZN7OSArray10gMetaClassE +__ZN7OSArray10setOptionsEjjPv __ZN7OSArray10superClassE __ZN7OSArray11withObjectsEPPK8OSObjectjj __ZN7OSArray12removeObjectEj __ZN7OSArray12withCapacityEj __ZN7OSArray13initWithArrayEPKS_j __ZN7OSArray13replaceObjectEjPK15OSMetaClassBase +__ZN7OSArray14copyCollectionEP12OSDictionary __ZN7OSArray14ensureCapacityEj __ZN7OSArray15flushCollectionEv __ZN7OSArray15initWithObjectsEPPK8OSObjectjj @@ -714,8 +732,17 @@ _bcopy_phys _bzero _bzero_phys _copyin +_copyinstr _copyout +_copyoutstr +_crc32 _debug_ivars_size +_ffs +_flush_dcache +_flush_dcache64 +_inet_ntop +_invalidate_icache +_invalidate_icache64 _itoa _kOSBooleanFalse _kOSBooleanTrue @@ -724,17 +751,60 @@ _kern_os_malloc _kern_os_malloc_size _kern_os_realloc _kprintf +_lck_attr_alloc_init +_lck_attr_free +_lck_attr_setdebug +_lck_attr_setdefault +_lck_grp_alloc_init +_lck_grp_attr_alloc_init +_lck_grp_attr_free +_lck_grp_attr_setdefault +_lck_grp_attr_setstat +_lck_grp_free +_lck_mtx_alloc_init +_lck_mtx_destroy +_lck_mtx_free +_lck_mtx_init +_lck_mtx_lock +_lck_mtx_unlock +_lck_rw_alloc_init +_lck_rw_destroy +_lck_rw_free +_lck_rw_init +_lck_rw_lock +_lck_rw_lock_exclusive +_lck_rw_lock_shared +_lck_rw_unlock +_lck_rw_unlock_exclusive +_lck_rw_unlock_shared +_lck_spin_alloc_init +_lck_spin_destroy +_lck_spin_free +_lck_spin_init +_lck_spin_lock +_lck_spin_unlock _memcmp _memcpy +_memmove _memset +_ml_at_interrupt_context +_ml_set_interrupts_enabled +_osrelease +_ostype +_page_mask +_page_shift +_page_size _panic _printf _sprintf +_sscanf +_strcasecmp _strcat _strchr _strcmp _strcpy _strlen +_strncasecmp _strncat _strncmp _strncpy @@ -743,4 +813,25 @@ _strtol _strtoq _strtoul _strtouq - +_sysctlbyname +_uuid_clear +_uuid_compare +_uuid_copy +_uuid_generate +_uuid_generate_random +_uuid_generate_time +_uuid_is_null +_uuid_parse +_uuid_unparse +_uuid_unparse_lower +_uuid_unparse_upper +_version +_version_major +_version_minor +_version_prerelease_level +_version_revision +_version_stage +_version_variant +_vsnprintf +_vsprintf +_vsscanf diff --git a/config/Libkern.ppc.exports b/config/Libkern.ppc.exports index e69de29bb..9b1bdcf7a 100644 --- a/config/Libkern.ppc.exports +++ b/config/Libkern.ppc.exports @@ -0,0 +1,2 @@ +_bcopy_nc +_bzero_nc diff --git a/config/Mach.exports b/config/Mach.exports index 502660a46..48e542931 100644 --- a/config/Mach.exports +++ b/config/Mach.exports @@ -1,1522 +1,43 @@ -_Assert -_Debugger -_IODefaultCacheBits -_IOGetTime -_IOMapPages -_IOUnmapPages -_KERNEL_AUDIT_TOKEN -_KERNEL_SECURITY_TOKEN -_KUNCExecute -_KUNCGetNotificationID -_KUNCUserNotificationCancel -_KUNCUserNotificationDisplayAlert -_KUNCUserNotificationDisplayFromBundle -_KUNCUserNotificationDisplayNotice -_NDR_record -_Switch_context -_TRAP_TYPES -_UNDAlertCompletedWithResult_rpc -_UNDCancelNotification_rpc -_UNDDisplayAlertFromBundle_rpc -_UNDDisplayAlertSimple_rpc -_UNDDisplayCustomFromBundle_rpc -_UNDDisplayCustomFromDictionary_rpc -_UNDDisplayNoticeFromBundle_rpc -_UNDDisplayNoticeSimple_rpc -_UNDExecute_rpc -_UNDNotificationCreated_rpc -_UNDReply_deallocate -_UNDReply_server -_UNDReply_server_routine -_UNDReply_subsystem -___doprnt -__cpu_capabilities -__disable_preemption -__doprnt -__doprnt_truncates -__enable_preemption -__enable_preemption_no_check -__longjmp -__mk_sp_thread_begin -__mk_sp_thread_depress_abort -__mk_sp_thread_depress_abstime -__mk_sp_thread_depress_ms -__mk_sp_thread_dispatch -__mk_sp_thread_done -__mk_sp_thread_perhaps_yield -__mk_sp_thread_switch -__mk_sp_thread_switch_continue -__mk_sp_thread_unblock -__mutex_lock -__mutex_try -__setjmp -__start -__vm_external_state_get -__vm_map_clip_end -__vm_map_clip_start -__vm_map_entry_create -__vm_map_entry_dispose _absolutetime_to_nanoseconds -_act_abort -_act_attach -_act_deallocate -_act_detach -_act_execute_returnhandlers -_act_free_swapin -_act_get_state -_act_get_state_locked -_act_lock_thread -_act_machine_sv_free -_act_reference -_act_set_apc -_act_set_astbsd -_act_set_state -_act_set_state_locked -_act_thread_catt -_act_thread_cfree -_act_thread_csave -_act_ulock_release_all -_act_unlock_thread -_active_debugger -_adjust_vm_object_cache -_adr -_all_zones_lock -_allow_clustered_pageouts _assert_wait -_assert_wait_possible -_assert_wait_prim +_assert_wait_deadline _assert_wait_timeout -_assert_wait_timeout_event -_ast_check -_ast_init -_ast_taken -_astbsd_on -_atoi -_atoi_term -_avail_remaining -_avenrun -_backing_store_add -_backing_store_alloc -_backing_store_list -_backing_store_lookup -_backing_store_release_trigger_disable -_bcopy -_bcopy_phys -_be_tracing -_bs_commit -_bs_get_global_clsize -_bs_global_info -_bs_initialize -_bs_low -_bs_more_space -_bs_no_paging_space -_bs_set_default_clsize -_bsd_exception -_bsd_init_task -_bzero -_bzero_phys -_c_incoming_interrupts -_c_mach_msg_trap_switch_fast -_c_mmot_combined_S_R -_c_mmot_kernel_send -_c_swapin_thread_block -_c_syscalls_mach -_c_syscalls_unix -_c_thr_exc_raise -_c_thr_exc_raise_state -_c_thr_exc_raise_state_id -_c_thread_invoke_csw -_c_thread_invoke_hits -_c_thread_invoke_misses -_c_thread_invoke_same -_c_thread_invoke_same_cont -_c_tsk_exc_raise -_c_tsk_exc_raise_state -_c_tsk_exc_raise_state_id -_c_vm_page_grab_fictitious -_c_vm_page_more_fictitious -_c_vm_page_release_fictitious -_c_weird_pset_ref_exit -_calend_config -_calend_getattr -_calend_gettime -_calend_init -_calend_ops -_call_continuation -_call_thread_block -_call_thread_unblock -_catch_exc_subsystem -_cause_ast_check -_check_actforsig -_clear_wait _clock_absolutetime_interval_to_deadline -_clock_adjtime -_clock_adjust_calendar -_clock_alarm -_clock_alarm_intr -_clock_alarm_reply -_clock_config -_clock_count -_clock_deadline_for_periodic_event -_clock_get_attributes +_clock_delay_until _clock_get_calendar_microtime _clock_get_calendar_nanotime -_clock_get_calendar_value _clock_get_system_microtime _clock_get_system_nanotime -_clock_get_system_value -_clock_get_time _clock_get_uptime -_clock_init -_clock_initialize_calendar _clock_interval_to_absolutetime_interval _clock_interval_to_deadline -_clock_list -_clock_priv_server -_clock_priv_server_routine -_clock_priv_subsystem -_clock_server -_clock_server_routine -_clock_service_create -_clock_set_attributes -_clock_set_calendar_adjtime -_clock_set_calendar_microtime -_clock_set_time -_clock_set_timer_deadline -_clock_set_timer_func -_clock_sleep_internal -_clock_sleep_trap -_clock_subsystem _clock_timebase_info -_clock_timebase_init -_clock_wakeup_calendar -_clr_be_bit -_clrbit -_cluster_transfer_minimum -_clustered_reads -_clustered_writes -_clusters_available -_clusters_committed -_clusters_committed_peak -_cngetc -_cnmaygetc -_cnputc -_com_mapping_resource -_com_region_handle -_com_region_map -_com_region_size -_commpage_populate -_compute_mach_factor -_compute_my_priority -_compute_priority -_consdebug_putc -_consider_machine_adjust -_consider_machine_collect -_consider_task_collect -_consider_zone_gc -_conslog_putc -_convert_act_to_port -_convert_clock_ctrl_to_port -_convert_clock_to_port -_convert_host_to_port -_convert_ledger_to_port -_convert_lock_set_to_port -_convert_memory_object_to_port -_convert_mig_object_to_port -_convert_mo_control_to_port -_convert_port_entry_to_map -_convert_port_entry_to_object -_convert_port_to_UNDReply -_convert_port_to_act -_convert_port_to_clock -_convert_port_to_clock_ctrl -_convert_port_to_host -_convert_port_to_host_priv -_convert_port_to_host_security -_convert_port_to_ledger -_convert_port_to_lock_set -_convert_port_to_locked_task -_convert_port_to_map -_convert_port_to_memory_object -_convert_port_to_mig_object -_convert_port_to_mo_control -_convert_port_to_processor -_convert_port_to_pset -_convert_port_to_pset_name -_convert_port_to_semaphore -_convert_port_to_space -_convert_port_to_task -_convert_port_to_upl -_convert_processor_to_port -_convert_pset_name_to_port -_convert_pset_to_port -_convert_semaphore_to_port -_convert_task_to_port -_convert_upl_to_port -_copyin -_copyin_shared_file -_copyinmap -_copyinmsg -_copyinstr -_copyout -_copyoutmap -_copyoutmsg -_copyoutstr -_copypv -_coredumpok -_cpm_allocate -_cpu_control -_cpu_down -_cpu_info -_cpu_info_count -_cpu_init -_cpu_launch_first_thread -_cpu_machine_init -_cpu_number -_cpu_register -_cpu_signal_handler -_cpu_sleep -_cpu_start -_cpu_up -_csw_check -_cthread_stack_size -_current_act -_current_debugger -_current_map _current_task _current_thread -_current_thread_aborted -_current_timer -_d_to_i -_db_thread_read_times -_db_timer_grab -_dbugprintf -_ddb_regs -_debug_buf -_debug_buf_ptr -_debug_buf_size -_debug_log_init -_debug_mode -_debug_putc -_default_environment_shared_regions -_default_pager -_default_pager_add_file -_default_pager_async_lock -_default_pager_backing_store_create -_default_pager_backing_store_delete -_default_pager_backing_store_info -_default_pager_backing_store_monitor -_default_pager_clsize -_default_pager_default_set -_default_pager_external_count -_default_pager_external_set -_default_pager_info -_default_pager_info_verbose -_default_pager_initialize -_default_pager_internal_count -_default_pager_internal_set -_default_pager_memory_object_create -_default_pager_memory_object_default_subsystem -_default_pager_object -_default_pager_object_create -_default_pager_object_pages -_default_pager_object_server -_default_pager_object_server_routine -_default_pager_object_subsystem -_default_pager_objects -_default_pager_space_alert -_default_pager_triggers -_default_preemption_rate -_default_pset -_delay -_device_object_create -_device_pager_bootstrap -_device_pager_data_initialize -_device_pager_data_request -_device_pager_data_return -_device_pager_data_unlock -_device_pager_deallocate -_device_pager_init -_device_pager_lookup -_device_pager_populate_object -_device_pager_reference -_device_pager_setup -_device_pager_synchronize -_device_pager_terminate -_device_pager_unmap -_device_pager_workaround -_device_pager_zone -_device_service_create -_disableDebugOuput -_disable_bluebox -_dispatch_counts -_dp_memory_object_data_initialize -_dp_memory_object_data_request -_dp_memory_object_data_return -_dp_memory_object_data_unlock -_dp_memory_object_deallocate -_dp_memory_object_init -_dp_memory_object_reference -_dp_memory_object_subsystem -_dp_memory_object_synchronize -_dp_memory_object_terminate -_dp_memory_object_unmap -_dp_pages_free -_dp_parse_argument -_dpt_array -_dpt_lock -_draw_panic_dialog -_dynamic_pager_control_port -_edata -_eml_init -_eml_task_deallocate -_eml_task_reference -_enable_bluebox -_enable_hotpath -_end -_etap_get_info -_etap_interrupt_probe -_etap_machcall_probe1 -_etap_machcall_probe2 -_etap_mon_reconfig -_etap_new_probe -_etap_probe -_etap_trace_event -_etap_trace_thread -_etext -_exc_server -_exc_server_routine -_exception -_exception_deliver -_exception_raise -_exception_raise_state -_exception_raise_state_identity -_ffsbit -_fillPage -_first_avail -_first_free_check -_first_free_is_valid -_first_k_zone -_first_zone -_flush_dcache -_flush_dcache64 -_funnel_alloc -_funnel_free -_funnel_lock -_funnel_unlock -_gIOKitPortCount -_gc_buffer_lock -_gc_vt100state -_get_bsdtask_info -_get_bsdthread_info -_get_dp_control_port -_get_firstthread -_get_map_end -_get_map_max -_get_map_min -_get_map_nentries -_get_map_pmap -_get_map_start -_get_read_buffer -_get_set_state -_get_signalact -_get_state_handler -_get_task_ipcspace -_get_task_map -_get_task_numacts -_get_task_pmap -_get_task_userstop -_get_thread_userstop -_get_thread_waitresult -_get_threadtask -_get_user_regs -_get_useraddr -_get_vmmap_entries -_get_vmmap_size -_get_vmsubmap_entries -_getact_thread -_getmachheaders -_getsectcmdsymtabfromheader -_getshuttle_thread -_getsymtab -_global_stats -_halt_all_cpus -_halt_cpu -_halt_in_debugger -_hertz_tick -_host_default_memory_manager -_host_get_UNDServer -_host_get_boot_info -_host_get_clock_control -_host_get_clock_service -_host_get_exception_ports -_host_get_io_master -_host_get_special_port -_host_info -_host_ipc_hash_info -_host_kernel_version -_host_load_symbol_table -_host_notify_calendar_change -_host_notify_init -_host_notify_port_destroy -_host_page_size -_host_priv_self -_host_priv_server -_host_priv_server_routine -_host_priv_statistics -_host_priv_subsystem -_host_processor_info -_host_processor_set_priv -_host_processor_sets -_host_processors -_host_reboot -_host_request_notification -_host_security_create_task_token -_host_security_self -_host_security_server -_host_security_server_routine -_host_security_set_task_token -_host_security_subsystem -_host_self -_host_self_trap -_host_set_UNDServer -_host_set_exception_ports -_host_set_special_port -_host_stack_usage -_host_statistics -_host_swap_exception_ports -_host_virtual_physical_table_info -_host_zone_info -_hw_atomic_add -_hw_atomic_and -_hw_atomic_or -_hw_atomic_sub -_hw_compare_and_store -_hw_lock_held -_hw_lock_init -_hw_lock_lock -_hw_lock_to -_hw_lock_try -_hw_lock_unlock -_idle_thread -_idle_thread_continue -_init_ast_check -_init_task_failure_data -_init_timers -_initialize_screen -_install_special_handler -_install_special_handler_locked -_interlock_unlock -_intstack -_invalidate_icache -_invalidate_icache64 -_io_map -_io_map_spec -_io_throttle_zero_fill -_iokit_alloc_object_port -_iokit_destroy_object_port -_iokit_lookup_connect_port -_iokit_lookup_connect_ref -_iokit_lookup_connect_ref_current_task -_iokit_lookup_object_port -_iokit_make_connect_port -_iokit_make_object_port -_iokit_make_send_right -_iokit_notify -_iokit_release_port -_iokit_retain_port -_iokit_server -_iokit_server_routine -_iokit_switch_object_port -_ipc_bootstrap -_ipc_clock_enable -_ipc_clock_init -_ipc_entry_alloc -_ipc_entry_alloc_name -_ipc_entry_dealloc -_ipc_entry_get -_ipc_entry_grow_table -_ipc_entry_lookup -_ipc_entry_tree_collision -_ipc_hash_delete -_ipc_hash_global_delete -_ipc_hash_global_insert -_ipc_hash_global_lookup -_ipc_hash_global_mask -_ipc_hash_global_size -_ipc_hash_global_table -_ipc_hash_init -_ipc_hash_insert -_ipc_hash_local_delete -_ipc_hash_local_insert -_ipc_hash_local_lookup -_ipc_hash_lookup -_ipc_host_init -_ipc_init -_ipc_kernel_copy_map -_ipc_kernel_copy_map_size -_ipc_kernel_map -_ipc_kernel_map_size -_ipc_kmsg_alloc -_ipc_kmsg_cache -_ipc_kmsg_cache_avail -_ipc_kmsg_clean -_ipc_kmsg_clean_body -_ipc_kmsg_clean_partial -_ipc_kmsg_clear_prealloc -_ipc_kmsg_copyin -_ipc_kmsg_copyin_body -_ipc_kmsg_copyin_from_kernel -_ipc_kmsg_copyin_header -_ipc_kmsg_copyin_scatter -_ipc_kmsg_copyout -_ipc_kmsg_copyout_body -_ipc_kmsg_copyout_dest -_ipc_kmsg_copyout_header -_ipc_kmsg_copyout_object -_ipc_kmsg_copyout_pseudo -_ipc_kmsg_copyout_to_kernel -_ipc_kmsg_dequeue -_ipc_kmsg_destroy -_ipc_kmsg_destroy_dest -_ipc_kmsg_enqueue -_ipc_kmsg_free -_ipc_kmsg_free_scatter -_ipc_kmsg_get -_ipc_kmsg_get_from_kernel -_ipc_kmsg_init -_ipc_kmsg_max_vm_space -_ipc_kmsg_put -_ipc_kmsg_put_to_kernel -_ipc_kmsg_queue_next -_ipc_kmsg_rmqueue -_ipc_kmsg_send -_ipc_kmsg_set_prealloc -_ipc_kobject_destroy -_ipc_kobject_notify -_ipc_kobject_server -_ipc_kobject_set -_ipc_kobject_set_atomically -_ipc_mqueue_add -_ipc_mqueue_changed -_ipc_mqueue_copyin -_ipc_mqueue_destroy -_ipc_mqueue_full -_ipc_mqueue_init -_ipc_mqueue_member -_ipc_mqueue_post -_ipc_mqueue_rcv -_ipc_mqueue_receive -_ipc_mqueue_receive_continue -_ipc_mqueue_receive_results -_ipc_mqueue_release_msgcount -_ipc_mqueue_remove -_ipc_mqueue_remove_all -_ipc_mqueue_remove_from_all -_ipc_mqueue_select -_ipc_mqueue_send -_ipc_mqueue_set_qlimit -_ipc_mqueue_set_seqno -_ipc_notify_dead_name -_ipc_notify_no_senders -_ipc_notify_port_deleted -_ipc_notify_port_destroyed -_ipc_notify_send_once -_ipc_object_alloc -_ipc_object_alloc_dead -_ipc_object_alloc_dead_name -_ipc_object_alloc_name -_ipc_object_copyin -_ipc_object_copyin_from_kernel -_ipc_object_copyin_type -_ipc_object_copyout -_ipc_object_copyout_dest -_ipc_object_copyout_name -_ipc_object_destroy -_ipc_object_reference -_ipc_object_release -_ipc_object_rename -_ipc_object_translate -_ipc_object_translate_two -_ipc_object_zones -_ipc_port_alloc -_ipc_port_alloc_name -_ipc_port_alloc_special -_ipc_port_check_circularity -_ipc_port_clear_receiver -_ipc_port_copy_send -_ipc_port_copyout_send -_ipc_port_dealloc_special -_ipc_port_destroy -_ipc_port_dncancel -_ipc_port_dngrow -_ipc_port_dnnotify -_ipc_port_dnrequest -_ipc_port_init -_ipc_port_lookup_notify -_ipc_port_make_send -_ipc_port_make_send_locked -_ipc_port_make_sonce -_ipc_port_max -_ipc_port_multiple_lock_data -_ipc_port_nsrequest -_ipc_port_pdrequest -_ipc_port_release -_ipc_port_release_receive -_ipc_port_release_send -_ipc_port_release_sonce -_ipc_port_timestamp -_ipc_port_timestamp_data -_ipc_port_timestamp_lock_data -_ipc_processor_disable -_ipc_processor_enable -_ipc_processor_init -_ipc_processor_terminate -_ipc_pset_add -_ipc_pset_alloc -_ipc_pset_alloc_name -_ipc_pset_destroy -_ipc_pset_disable -_ipc_pset_enable -_ipc_pset_init -_ipc_pset_max -_ipc_pset_member -_ipc_pset_remove -_ipc_pset_remove_from_all -_ipc_pset_terminate -_ipc_right_check -_ipc_right_clean -_ipc_right_copyin -_ipc_right_copyin_check -_ipc_right_copyin_two -_ipc_right_copyin_undo -_ipc_right_copyout -_ipc_right_dealloc -_ipc_right_delta -_ipc_right_destroy -_ipc_right_dncancel -_ipc_right_dnrequest -_ipc_right_info -_ipc_right_inuse -_ipc_right_lookup_two_write -_ipc_right_lookup_write -_ipc_right_rename -_ipc_right_reverse -_ipc_space_clean -_ipc_space_create -_ipc_space_create_special -_ipc_space_destroy -_ipc_space_kernel -_ipc_space_max -_ipc_space_reference -_ipc_space_release -_ipc_space_reply -_ipc_space_zone -_ipc_splay_traverse_finish -_ipc_splay_traverse_next -_ipc_splay_traverse_start -_ipc_splay_tree_bounds -_ipc_splay_tree_delete -_ipc_splay_tree_init -_ipc_splay_tree_insert -_ipc_splay_tree_join -_ipc_splay_tree_lookup -_ipc_splay_tree_pick -_ipc_splay_tree_split -_ipc_table_alloc -_ipc_table_dnrequests -_ipc_table_dnrequests_size -_ipc_table_entries -_ipc_table_entries_size -_ipc_table_fill -_ipc_table_free -_ipc_table_init -_ipc_table_realloc -_ipc_task_disable -_ipc_task_enable -_ipc_task_init -_ipc_task_terminate -_ipc_thr_act_disable -_ipc_thr_act_init -_ipc_thr_act_terminate -_ipc_thread_init -_ipc_thread_terminate -_ipc_tree_entry_max -_ipc_tree_entry_zone -_is_64signalregset -_is_iokit_subsystem -_is_kerneltask -_is_thread_active -_is_thread_idle -_is_thread_running -_iso_font -_itoa -_k_zone -_k_zone_max -_kalloc -_kalloc_canblock -_kalloc_fake_zone_info -_kalloc_init -_kalloc_large_inuse -_kalloc_large_max -_kalloc_large_total -_kalloc_map -_kalloc_map_size -_kalloc_max -_kalloc_max_prerounded -_kalloc_noblock -_kalloc_zone -_kdb_printf -_kdp -_kdp_call -_kdp_call_kdb -_kdp_exception -_kdp_exception_ack -_kdp_flag -_kdp_get_interface -_kdp_get_ip_address -_kdp_get_mac_addr -_kdp_getc -_kdp_intr_disbl -_kdp_intr_enbl -_kdp_machine_hostinfo -_kdp_machine_read_regs -_kdp_machine_write_regs -_kdp_ml_get_breakinsn -_kdp_packet -_kdp_panic -_kdp_raise_exception -_kdp_reboot -_kdp_register_send_receive -_kdp_remove_all_breakpoints -_kdp_reset -_kdp_set_interface -_kdp_set_ip_and_mac_addresses -_kdp_sync_cache -_kdp_unregister_send_receive -_kdp_us_spin -_kdp_vm_read -_kdp_vm_write -_kentry_count -_kentry_data -_kentry_data_size -_kern_invalid -_kern_invalid_debug -_kernel_map -_kernel_memory_allocate -_kernel_object_iopl_request -_kernel_pageable_map -_kernel_pmap -_kernel_pmap_store -_kernel_set_special_port _kernel_task -_kernel_task_create -_kernel_thread -_kernel_thread_create -_kernel_thread_with_priority -_kernel_timer -_kernel_upl_abort -_kernel_upl_abort_range -_kernel_upl_commit -_kernel_upl_commit_range -_kernel_upl_map -_kernel_upl_unmap -_kernel_vm_map_reference -_kfree -_kget -_kmem_alloc -_kmem_alloc_aligned -_kmem_alloc_contig -_kmem_alloc_pageable -_kmem_alloc_pages -_kmem_alloc_wired -_kmem_free -_kmem_init -_kmem_io_object_deallocate -_kmem_io_object_trunc -_kmem_realloc -_kmem_remap_pages -_kmem_suballoc -_kmod -_kmod_cmd_queue -_kmod_control -_kmod_create -_kmod_create_fake -_kmod_create_internal -_kmod_default_start -_kmod_default_stop -_kmod_destroy -_kmod_destroy_internal -_kmod_dump -_kmod_finalize_cpp -_kmod_get_info -_kmod_init -_kmod_initialize_cpp -_kmod_load_extension -_kmod_load_extension_with_dependencies -_kmod_lock -_kmod_lookupbyid -_kmod_lookupbyid_locked -_kmod_lookupbyname -_kmod_lookupbyname_locked -_kmod_queue_cmd -_kmod_queue_lock -_kmod_release -_kmod_retain -_kmod_send_generic -_kmod_start_or_stop -_krealloc -_kvtophys -_last_page_zf -_last_zone -_ledger_copy -_ledger_create -_ledger_enter -_ledger_init -_ledger_read -_ledger_server -_ledger_server_routine -_ledger_subsystem -_ledger_terminate -_ledger_transfer -_local_log2 -_lock_acquire -_lock_alloc -_lock_done -_lock_free -_lock_handoff -_lock_handoff_accept -_lock_init -_lock_make_stable -_lock_make_unstable -_lock_read -_lock_read_to_write -_lock_release -_lock_release_internal -_lock_set_create -_lock_set_dereference -_lock_set_destroy -_lock_set_event -_lock_set_handoff -_lock_set_init -_lock_set_reference -_lock_set_server -_lock_set_server_routine -_lock_set_subsystem -_lock_try -_lock_wait_time -_lock_write -_lock_write_to_read -_log -_logPanicDataToScreen -_lookup_default_shared_region -_lsf_mapping_pool_gauge -_lsf_remove_regions_mappings -_lsf_zone +_kernel_thread_start +_lck_mtx_sleep +_lck_mtx_sleep_deadline +_lck_rw_sleep +_lck_rw_sleep_deadline +_lck_spin_sleep +_lck_spin_sleep_deadline _mach_absolute_time -_mach_assert -_mach_destroy_memory_entry -_mach_factor -_mach_host_server -_mach_host_server_routine -_mach_host_subsystem -_mach_make_memory_entry -_mach_make_memory_entry_64 -_mach_memory_object_memory_entry -_mach_memory_object_memory_entry_64 -_mach_msg_overwrite -_mach_msg_overwrite_trap -_mach_msg_receive -_mach_msg_receive_continue -_mach_msg_receive_results -_mach_msg_rpc_from_kernel -_mach_msg_send _mach_msg_send_from_kernel -_mach_msg_trap -_mach_notify_dead_name -_mach_notify_no_senders -_mach_notify_port_deleted -_mach_notify_port_destroyed -_mach_notify_send_once -_mach_port_allocate -_mach_port_allocate_full -_mach_port_allocate_name -_mach_port_allocate_qos -_mach_port_deallocate -_mach_port_destroy -_mach_port_dnrequest_info -_mach_port_extract_member -_mach_port_extract_right -_mach_port_get_attributes -_mach_port_get_refs -_mach_port_get_set_status -_mach_port_get_srights -_mach_port_gst_helper -_mach_port_insert_member -_mach_port_insert_right -_mach_port_kernel_object -_mach_port_mod_refs -_mach_port_move_member -_mach_port_names -_mach_port_names_helper -_mach_port_rename -_mach_port_request_notification -_mach_port_server -_mach_port_server_routine -_mach_port_set_attributes -_mach_port_set_mscount -_mach_port_set_seqno -_mach_port_space_info -_mach_port_subsystem -_mach_port_type -_mach_ports_lookup -_mach_ports_register -_mach_reply_port -_mach_thread_self -_mach_timebase_info -_mach_trap_count -_mach_trap_table -_mach_vm_region_info -_mach_vm_region_info_64 -_mach_wait_until -_machine_boot_info -_machine_idle -_machine_info -_machine_init -_machine_load_context -_machine_signal_idle -_machine_slot -_machine_stack_attach -_machine_stack_detach -_machine_stack_handoff -_machine_startup -_machine_switch_act -_machine_switch_context -_machine_thread_create -_machine_thread_destroy -_machine_thread_dup -_machine_thread_get_state -_machine_thread_init -_machine_thread_set_current -_machine_thread_set_state -_machine_thread_terminate_self -_machine_wake_thread -_macx_triggers -_map_data -_map_data_size -_mapping_set_mod -_master_cpu -_master_device_port -_master_processor -_max_doubled_size -_max_mem -_max_pages_trigger_port -_max_poll_computation -_max_poll_quanta -_max_rt_quantum -_max_unsafe_computation -_max_unsafe_quanta -_maximum_pages_free -_mem_size -_memcpy -_memory_manager_default -_memory_manager_default_cluster -_memory_manager_default_lock -_memory_object_change_attributes -_memory_object_control_deallocate -_memory_object_control_disable -_memory_object_control_reference -_memory_object_control_server -_memory_object_control_server_routine -_memory_object_control_subsystem -_memory_object_create -_memory_object_create_named -_memory_object_data_initialize -_memory_object_data_request -_memory_object_data_return -_memory_object_data_unlock -_memory_object_deactivate_pages -_memory_object_deallocate -_memory_object_default_deallocate -_memory_object_default_reference -_memory_object_default_server -_memory_object_default_server_routine -_memory_object_destroy -_memory_object_get_attributes -_memory_object_init -_memory_object_iopl_request -_memory_object_lock_page -_memory_object_lock_request -_memory_object_name_server -_memory_object_name_server_routine -_memory_object_name_subsystem -_memory_object_page_op -_memory_object_range_op -_memory_object_recover_named -_memory_object_reference -_memory_object_release_name -_memory_object_server -_memory_object_server_routine -_memory_object_super_upl_request -_memory_object_synchronize -_memory_object_synchronize_completed -_memory_object_terminate -_memory_object_unmap -_memory_object_upl_request -_memset -_mig_buckets -_mig_dealloc_reply_port -_mig_e -_mig_get_reply_port -_mig_init -_mig_object_deallocate -_mig_object_destroy -_mig_object_init -_mig_object_no_senders -_mig_object_reference -_mig_put_reply_port -_mig_reply_size -_mig_strncpy -_mig_table_max_displ -_mig_user_allocate -_mig_user_deallocate -_min_pages_trigger_port -_min_rt_quantum -_min_std_quantum -_minimum_pages_remaining -_mk_timebase_info -_mk_timer_arm -_mk_timer_cancel -_mk_timer_create -_mk_timer_destroy -_mk_timer_init -_mk_timer_port_destroy -_ml_at_interrupt_context -_ml_cause_interrupt -_ml_cpu_get_info -_ml_get_interrupts_enabled -_ml_get_max_cpus -_ml_get_timebase -_ml_init_interrupt -_ml_init_max_cpus -_ml_install_interrupt_handler -_ml_io_map -_ml_phys_read -_ml_phys_read_64 -_ml_phys_read_byte -_ml_phys_read_byte_64 -_ml_phys_read_double -_ml_phys_read_double_64 -_ml_phys_read_half -_ml_phys_read_half_64 -_ml_phys_read_word -_ml_phys_read_word_64 -_ml_phys_write -_ml_phys_write_64 -_ml_phys_write_byte -_ml_phys_write_byte_64 -_ml_phys_write_double -_ml_phys_write_double_64 -_ml_phys_write_half -_ml_phys_write_half_64 -_ml_phys_write_word -_ml_phys_write_word_64 -_ml_probe_read -_ml_probe_read_64 -_ml_processor_register -_ml_set_interrupts_enabled -_ml_static_malloc -_ml_static_mfree -_ml_static_ptovirt -_ml_thread_policy -_ml_vtophys -_msg_ool_size_small -_msg_receive_error -_mutex_alloc -_mutex_free -_mutex_init -_mutex_lock -_mutex_lock_acquire -_mutex_lock_wait -_mutex_pause -_mutex_preblock -_mutex_preblock_wait -_mutex_try -_mutex_unlock -_mutex_unlock_wakeup -_my_name _nanoseconds_to_absolutetime -_need_ast -_nestedpanic -_new_addr_hash -_new_obj_hash -_newtest -_no_dispatch_count -_noresume_on_disconnect -_norma_mk -_not_implemented -_null_port -_num_zones -_osfmk_osrelease -_osfmk_ostype -_osfmk_version -_osfmk_version_major -_osfmk_version_minor -_osfmk_version_variant -_page_mask -_page_shift -_page_size -_paging_segment_count -_paging_segment_max -_paging_segments -_paging_segments_lock -_panic -_panicDebugging -_panicDialogDesired -_panic_init -_panic_is_inited -_panic_lock -_panic_ui_initialize -_paniccpu -_panicstr -_panicwait -_pc_trace_buf -_pc_trace_cnt -_physical_transfer_cluster_count -_pmap_bootstrap -_pmap_change_wiring -_pmap_clear_modify -_pmap_clear_reference -_pmap_collect -_pmap_copy_page -_pmap_copy_part_page -_pmap_create -_pmap_destroy -_pmap_enter -_pmap_extract -_pmap_find_phys -_pmap_free_pages -_pmap_init -_pmap_initialized -_pmap_is_modified -_pmap_is_referenced -_pmap_map -_pmap_modify_pages -_pmap_next_page -_pmap_page_protect -_pmap_pageable -_pmap_protect -_pmap_reference -_pmap_remove -_pmap_remove_some_phys -_pmap_startup -_pmap_steal_memory -_pmap_sync_caches_phys -_pmap_verify_free -_pmap_virtual_space -_pmap_zero_page -_pmap_zero_part_page -_pmap_zone -_port_name_to_act -_port_name_to_clock -_port_name_to_semaphore -_port_name_to_task -_print_saved_state -_printf_init -_printf_lock -_processor_array -_processor_assign -_processor_control -_processor_doshutdown -_processor_exit -_processor_get_assignment -_processor_info -_processor_info_count -_processor_init -_processor_offline -_processor_ptr -_processor_server -_processor_server_routine -_processor_set_base -_processor_set_create -_processor_set_default -_processor_set_destroy -_processor_set_info -_processor_set_limit -_processor_set_max_priority -_processor_set_policy_control -_processor_set_policy_disable -_processor_set_policy_enable -_processor_set_server -_processor_set_server_routine -_processor_set_stack_usage -_processor_set_statistics -_processor_set_subsystem -_processor_set_tasks -_processor_set_things -_processor_set_threads -_processor_shutdown -_processor_start -_processor_subsystem -_prof_queue -_profile_kernel_services -_ps_allocate_cluster -_ps_clmap -_ps_clunmap -_ps_dealloc_vsmap -_ps_deallocate_cluster -_ps_delete -_ps_enter -_ps_map_extend -_ps_read_device -_ps_read_file -_ps_select_array -_ps_select_segment -_ps_vs_write_complete -_ps_vstruct_allocated_pages -_ps_vstruct_allocated_size -_ps_vstruct_create -_ps_vstruct_dealloc -_ps_vstruct_transfer_from_segment -_ps_write_device -_ps_write_file -_pset_add_processor -_pset_add_task -_pset_add_thread -_pset_deallocate -_pset_init -_pset_quanta_setup -_pset_reference -_pset_remove_processor -_pset_remove_task -_pset_remove_thread -_pset_sys_bootstrap -_pvs_cluster_read -_pvs_object_data_provided -_real_ncpus -_realhost -_reattach_wait -_ref_act_port_locked -_ref_pset_port_locked -_refresh_screen -_refunnel_hint -_refunnel_hint_enabled -_remove_all_shared_regions -_remove_default_shared_region -_retrieve_act_self_fast -_retrieve_task_self_fast -_return_on_panic -_root_paged_ledger -_root_wired_ledger -_rtclock_intr -_rtclock_reset -_run_queue_remove -_safe_gets -_sane_size -_save_waits -_sched_init -_sched_poll_yield_shift -_sched_safe_duration -_sched_tick -_sched_tick_init -_sched_tick_thread -_sched_tick_thread_continue -_sched_timebase_init -_sectDATAB -_sectLINKB -_sectPRELINKB -_sectSizeDATA -_sectSizeLINK -_sectSizePRELINK -_sectSizeTEXT -_sectTEXTB -_semaphore_convert_wait_result +_preemption_enabled _semaphore_create _semaphore_dereference _semaphore_destroy -_semaphore_init -_semaphore_max _semaphore_reference -_semaphore_server -_semaphore_server_routine _semaphore_signal _semaphore_signal_all -_semaphore_signal_all_trap -_semaphore_signal_internal -_semaphore_signal_thread -_semaphore_signal_thread_trap -_semaphore_signal_trap -_semaphore_subsystem _semaphore_timedwait -_semaphore_timedwait_continue -_semaphore_timedwait_signal -_semaphore_timedwait_signal_trap -_semaphore_timedwait_trap _semaphore_wait -_semaphore_wait_continue -_semaphore_wait_internal -_semaphore_wait_signal -_semaphore_wait_signal_trap -_semaphore_wait_trap -_semaphore_zone -_set_be_bit -_set_bsdtask_info -_set_dp_control_port -_set_priority -_set_sched_pri -_set_state_handler -_setbit -_setup_main -_sfma_handle -_shared_com_boot_time_init -_shared_data_region_handle -_shared_file_available_hash_ele -_shared_file_boot_time_init -_shared_file_create_system_region -_shared_file_data_region -_shared_file_mapping_array -_shared_file_text_region -_shared_region_mapping_create -_shared_region_mapping_dealloc -_shared_region_mapping_info -_shared_region_mapping_ref -_shared_region_mapping_set_alt_next -_shared_region_object_chain_attach -_shared_text_region_handle -_slave_machine_init -_slave_main -_space_deallocate -_special_handler -_special_handler_continue -_split_funnel_off -_sprintf -_sprintf_lock -_sscanf -_stack_alloc -_stack_alloc_bndry -_stack_alloc_hits -_stack_alloc_hiwater -_stack_alloc_misses -_stack_alloc_total -_stack_alloc_try -_stack_cache_hits -_stack_collect -_stack_fake_zone_info -_stack_free -_stack_free_count -_stack_free_limit -_stack_free_max -_stack_free_stack -_stack_privilege -_stack_statistics -_start_cpu_thread -_start_def_pager -_start_kernel_threads -_startup_miss -_state_count -_std_quantum -_std_quantum_us -_strcat -_strcmp -_strcpy -_strncmp -_strncpy -_strprefix -_swap_act_map -_swap_task_map -_swapin_init -_swapin_lock -_swapin_queue -_swapin_thread -_swapin_thread_continue -_switch_act -_switch_act_swapins -_switch_debugger -_switch_to_serial_console -_switch_to_shutdown_context -_swtch -_swtch_continue -_swtch_pri -_swtch_pri_continue -_sysclk_config -_sysclk_getattr -_sysclk_gettime -_sysclk_init -_sysclk_ops -_sysclk_setalarm -_systemLogDiags -_task_act_iterate_wth_args -_task_assign -_task_assign_default -_task_backing_store_privileged -_task_collect_allowed -_task_collect_last_tick -_task_collect_max_rate -_task_collect_scan -_task_create -_task_create_internal _task_deallocate -_task_get_assignment -_task_get_emulation_vector -_task_get_exception_ports -_task_get_special_port -_task_halt -_task_hold -_task_hold_locked -_task_importance -_task_info -_task_init -_task_is_classic -_task_policy -_task_policy_get -_task_policy_set _task_reference -_task_reference_try -_task_release -_task_release_locked -_task_resume -_task_sample -_task_self_trap -_task_server -_task_server_routine -_task_set_emulation -_task_set_emulation_vector -_task_set_emulation_vector_internal -_task_set_exception_ports -_task_set_info -_task_set_ledger -_task_set_policy -_task_set_port_space -_task_set_ras_pc -_task_set_special_port -_task_subsystem -_task_suspend -_task_swap_exception_ports -_task_swappable -_task_synchronizer_destroy_all -_task_terminate -_task_terminate_internal -_task_threads -_task_wait_locked -_task_wire -_task_working_set_create -_task_zone -_test_tws -_testbit -_thread_abort -_thread_abort_safely -_thread_act_server -_thread_act_server_routine -_thread_act_subsystem -_thread_apc_clear -_thread_apc_set -_thread_assign -_thread_assign_default -_thread_bind _thread_block -_thread_block_reason -_thread_bootstrap -_thread_bootstrap_return +_thread_block_parameter _thread_call_allocate _thread_call_cancel _thread_call_enter @@ -1524,549 +45,8 @@ _thread_call_enter1 _thread_call_enter1_delayed _thread_call_enter_delayed _thread_call_free -_thread_call_func -_thread_call_func_cancel -_thread_call_func_delayed -_thread_call_initialize -_thread_call_is_delayed -_thread_call_setup -_thread_cancel_timer -_thread_change_psets -_thread_continue -_thread_create -_thread_create_running _thread_deallocate -_thread_depress_abort -_thread_depress_expire -_thread_dispatch -_thread_doreap -_thread_doswapin -_thread_dup -_thread_entrypoint -_thread_exception_return -_thread_get_assignment -_thread_get_cont_arg -_thread_get_exception_ports -_thread_get_special_port -_thread_get_state -_thread_getstatus -_thread_go_locked -_thread_hold -_thread_info -_thread_info_shuttle -_thread_init -_thread_invoke -_thread_lock_act -_thread_policy -_thread_policy_get _thread_policy_set -_thread_quantum_expire -_thread_read_times -_thread_reaper_enqueue -_thread_reaper_init _thread_reference -_thread_release -_thread_resume -_thread_run -_thread_sample -_thread_scan_enabled -_thread_select -_thread_self -_thread_self_trap -_thread_set_child -_thread_set_cont_arg -_thread_set_exception_ports -_thread_set_parent -_thread_set_policy -_thread_set_special_port -_thread_set_state -_thread_set_timer -_thread_set_timer_deadline -_thread_setrun -_thread_setstatus -_thread_should_abort -_thread_should_halt -_thread_sleep_funnel -_thread_sleep_lock_write -_thread_sleep_mutex -_thread_sleep_mutex_deadline -_thread_sleep_usimple_lock -_thread_stop -_thread_suspend -_thread_swap_exception_ports -_thread_swapin -_thread_switch -_thread_syscall_return -_thread_task_priority _thread_terminate -_thread_terminate_internal -_thread_terminate_self -_thread_termination_continue -_thread_timer_expire -_thread_timer_setup -_thread_timer_terminate -_thread_unlock_act -_thread_unstop -_thread_userstack -_thread_wait -_thread_wakeup _thread_wakeup_prim -_thread_wire -_timer_call_cancel -_timer_call_enter -_timer_call_enter1 -_timer_call_initialize -_timer_call_is_delayed -_timer_call_setup -_timer_call_shutdown -_timer_delta -_timer_grab -_timer_init -_timer_normalize -_timer_read -_trailer_template -_trap_type -_trigger_name_to_port -_tws_build_cluster -_tws_create_startup_list -_tws_expand_working_set -_tws_handle_startup_file -_tws_hash_clear -_tws_hash_create -_tws_hash_destroy -_tws_hash_line_clear -_tws_hash_ws_flush -_tws_insert -_tws_internal_lookup -_tws_internal_startup_send -_tws_line_signal -_tws_lookup -_tws_read_startup_file -_tws_send_startup_info -_tws_startup_list_lookup -_tws_test_for_community -_tws_traverse_address_hash_list -_tws_traverse_object_hash_list -_tws_write_startup_file -_udp_ttl -_update_default_shared_region -_update_priority -_upl_abort -_upl_abort_range -_upl_clear_dirty -_upl_commit -_upl_commit_range -_upl_deallocate -_upl_dirty_page -_upl_get_internal_pagelist_offset -_upl_offset_to_pagelist -_upl_page_present -_upl_phys_page -_upl_server -_upl_server_routine -_upl_set_dirty -_upl_subsystem -_upl_valid_page -_user_warned -_usimple_lock -_usimple_lock_init -_usimple_lock_try -_usimple_unlock -_vc_display_icon -_vc_progress_initialize -_vc_progress_lock -_vcattach -_vcputc -_verbose -_video_scroll_down -_video_scroll_up -_vinfo -_virtual_space_end -_virtual_space_start -_vm_accellerate_zf_pageout_trigger -_vm_allocate -_vm_allocate_cpm -_vm_allow_clustered_pagein -_vm_backing_store_disable -_vm_backing_store_low -_vm_behavior_set -_vm_conflict_check -_vm_copy -_vm_countdirtypages -_vm_deallocate -_vm_default_ahead -_vm_default_behind -_vm_external_copy -_vm_external_create -_vm_external_destroy -_vm_external_map_size -_vm_external_module_initialize -_vm_external_state_clr -_vm_external_state_set -_vm_external_within -_vm_fault -_vm_fault_cleanup -_vm_fault_copy -_vm_fault_copy_cleanup -_vm_fault_copy_dst_cleanup -_vm_fault_debug -_vm_fault_init -_vm_fault_list_request -_vm_fault_page -_vm_fault_unwire -_vm_fault_wire -_vm_fault_wire_fast -_vm_free_page_pause -_vm_get_shared_region -_vm_inherit -_vm_last_addr -_vm_machine_attribute -_vm_map -_vm_map_64 -_vm_map_aggressive_enter -_vm_map_aggressive_enter_max -_vm_map_behavior_set -_vm_map_check_protection -_vm_map_copy_copy -_vm_map_copy_discard -_vm_map_copy_overwrite -_vm_map_copy_overwrite_aligned -_vm_map_copy_overwrite_nested -_vm_map_copy_overwrite_unaligned -_vm_map_copy_zone -_vm_map_copyin_common -_vm_map_copyin_kernel_buffer -_vm_map_copyin_object -_vm_map_copyout -_vm_map_copyout_kernel_buffer -_vm_map_create -_vm_map_deallocate -_vm_map_delete -_vm_map_destroy -_vm_map_enter -_vm_map_entry_delete -_vm_map_entry_insert -_vm_map_entry_zone -_vm_map_find_space -_vm_map_fork -_vm_map_fork_copy -_vm_map_fork_share -_vm_map_get_phys_page -_vm_map_get_upl -_vm_map_inherit -_vm_map_init -_vm_map_kentry_zone -_vm_map_lookup_entry -_vm_map_lookup_locked -_vm_map_machine_attribute -_vm_map_overwrite_submap_recurse -_vm_map_page_query -_vm_map_pmap_enter -_vm_map_pmap_enter_enable -_vm_map_pmap_enter_print -_vm_map_protect -_vm_map_range_check -_vm_map_read_user -_vm_map_reference -_vm_map_region_replace -_vm_map_remove -_vm_map_server -_vm_map_server_routine -_vm_map_simplify -_vm_map_steal_memory -_vm_map_submap -_vm_map_submap_pmap_clean -_vm_map_subsystem -_vm_map_switch -_vm_map_unwire -_vm_map_unwire_nested -_vm_map_verify -_vm_map_wire -_vm_map_wire_nested -_vm_map_write_user -_vm_map_zone -_vm_mapped_pages_info -_vm_mem_bootstrap -_vm_mem_init -_vm_msync -_vm_object_absent_max -_vm_object_destroy -_vm_object_enter -_vm_object_hash_entry_free -_vm_object_iopl_request -_vm_object_page_map -_vm_object_page_remove_iterate -_vm_object_page_remove_lookup -_vm_object_pager_create -_vm_object_populate_with_private -_vm_object_shadow_check -_vm_object_sync -_vm_object_terminate_remove_all -_vm_object_update -_vm_page_activate -_vm_page_active_count -_vm_page_alloc -_vm_page_alloc_lock -_vm_page_bootstrap -_vm_page_bucket_count -_vm_page_bucket_hash -_vm_page_bucket_lock -_vm_page_buckets -_vm_page_convert -_vm_page_copy -_vm_page_create -_vm_page_deactivate -_vm_page_deactivate_behind -_vm_page_deactivate_hint -_vm_page_fictitious_addr -_vm_page_fictitious_count -_vm_page_free -_vm_page_free_count -_vm_page_free_count_init -_vm_page_free_count_minimum -_vm_page_free_list -_vm_page_free_min -_vm_page_free_reserve -_vm_page_free_reserved -_vm_page_free_target -_vm_page_free_verify -_vm_page_free_wanted -_vm_page_gobble -_vm_page_gobble_count -_vm_page_gobble_count_warning -_vm_page_grab -_vm_page_grab_count -_vm_page_grab_fictitious -_vm_page_hash_mask -_vm_page_hash_shift -_vm_page_inactive_count -_vm_page_inactive_target -_vm_page_init -_vm_page_insert -_vm_page_laundry_count -_vm_page_laundry_max -_vm_page_laundry_min -_vm_page_limbo_count -_vm_page_limbo_real_count -_vm_page_lookup -_vm_page_mask -_vm_page_module_init -_vm_page_more_fictitious -_vm_page_pages -_vm_page_part_copy -_vm_page_part_zero_fill -_vm_page_pin_count -_vm_page_preppin_lock -_vm_page_queue_active -_vm_page_queue_fictitious -_vm_page_queue_free -_vm_page_queue_free_lock -_vm_page_queue_inactive -_vm_page_queue_limbo -_vm_page_queue_lock -_vm_page_queue_zf -_vm_page_release -_vm_page_release_fictitious -_vm_page_remove -_vm_page_rename -_vm_page_replace -_vm_page_shift -_vm_page_template -_vm_page_ticket -_vm_page_ticket_roll -_vm_page_unwire -_vm_page_wait -_vm_page_wire -_vm_page_wire_count -_vm_page_wire_count_warning -_vm_page_zero_fill -_vm_page_zero_fill_lock -_vm_page_zone -_vm_pageclean_copy -_vm_pageclean_setup -_vm_pagein_cluster_unused -_vm_pagein_cluster_used -_vm_pageout -_vm_pageout_active -_vm_pageout_burst_max -_vm_pageout_burst_min -_vm_pageout_burst_wait -_vm_pageout_clean_active_pages -_vm_pageout_cluster -_vm_pageout_cluster_page -_vm_pageout_continue -_vm_pageout_dirty_no_pager -_vm_pageout_emergency_availability_request -_vm_pageout_empty_wait -_vm_pageout_in_place -_vm_pageout_inactive -_vm_pageout_inactive_absent -_vm_pageout_inactive_avoid -_vm_pageout_inactive_busy -_vm_pageout_inactive_clean -_vm_pageout_inactive_dirty -_vm_pageout_inactive_forced -_vm_pageout_inactive_nolock -_vm_pageout_inactive_throttled -_vm_pageout_inactive_used -_vm_pageout_initialize_page -_vm_pageout_object_allocate -_vm_pageout_object_terminate -_vm_pageout_out_of_line -_vm_pageout_pause_count -_vm_pageout_pause_max -_vm_pageout_reserved_internal -_vm_pageout_reserved_really -_vm_pageout_scan -_vm_pageout_scan_active_emm_throttle -_vm_pageout_scan_active_emm_throttle_failure -_vm_pageout_scan_active_emm_throttle_success -_vm_pageout_scan_continue -_vm_pageout_scan_event_counter -_vm_pageout_scan_inactive_emm_throttle -_vm_pageout_scan_inactive_emm_throttle_failure -_vm_pageout_scan_inactive_emm_throttle_success -_vm_pageout_setup -_vm_pageout_throttle -_vm_pool_low -_vm_protect -_vm_read -_vm_read_list -_vm_read_overwrite -_vm_region -_vm_region_64 -_vm_region_clone -_vm_region_count_obj_refs -_vm_region_look_for_page -_vm_region_object_create -_vm_region_recurse -_vm_region_recurse_64 -_vm_region_top_walk -_vm_region_walk -_vm_remap -_vm_remap_extract -_vm_remap_range_allocate -_vm_set_page_size -_vm_set_shared_region -_vm_stat -_vm_stat_discard -_vm_stat_discard_cleared_reply -_vm_stat_discard_cleared_too_late -_vm_stat_discard_cleared_unset -_vm_stat_discard_failure -_vm_stat_discard_sent -_vm_stat_discard_throttle -_vm_submap_object -_vm_upl_map -_vm_upl_unmap -_vm_wire -_vm_write -_vm_zf_count -_vm_zf_iterator -_vm_zf_iterator_count -_vnode_object_create -_vnode_pager_bootstrap -_vnode_pager_cluster_read -_vnode_pager_cluster_write -_vnode_pager_data_initialize -_vnode_pager_data_request -_vnode_pager_data_return -_vnode_pager_data_unlock -_vnode_pager_deallocate -_vnode_pager_get_object_size -_vnode_pager_init -_vnode_pager_lookup -_vnode_pager_reference -_vnode_pager_release_from_cache -_vnode_pager_setup -_vnode_pager_synchronize -_vnode_pager_terminate -_vnode_pager_unmap -_vnode_pager_workaround -_vnode_pager_zone -_vs_alloc_async -_vs_alloc_async_count -_vs_alloc_async_failed -_vs_async_free_list -_vs_cl_write_complete -_vs_cluster_transfer -_vs_cluster_write -_vs_do_async_write -_vs_free_async -_vs_get_map_entry -_vs_object_create -_vstruct_def_clshift -_vstruct_list -_vstruct_zone -_wait_queue_alloc -_wait_queue_assert_wait -_wait_queue_assert_wait64 -_wait_queue_free -_wait_queue_init -_wait_queue_link -_wait_queue_link_noalloc -_wait_queue_link_size -_wait_queue_member -_wait_queue_pull_thread_locked -_wait_queue_set_alloc -_wait_queue_set_free -_wait_queue_set_init -_wait_queue_set_size -_wait_queue_set_unlink_all -_wait_queue_set_unlink_all_nofree -_wait_queue_sub_clearrefs -_wait_queue_sub_init -_wait_queue_unlink -_wait_queue_unlink_all -_wait_queue_unlink_one -_wait_queue_unlinkall_nofree -_wait_queue_wakeup64_all -_wait_queue_wakeup64_one -_wait_queue_wakeup64_thread -_wait_queue_wakeup_all -_wait_queue_wakeup_one -_wait_queue_wakeup_thread -_wait_queues -_wait_queues_init -_wait_shift -_wait_subqueue_unlink_all -_wncpu -_zalloc -_zalloc_async -_zalloc_canblock -_zalloc_end_of_space -_zalloc_next_space -_zalloc_noblock -_zalloc_wasted_space -_zcram -_zdata -_zdata_size -_zfill -_zfree -_zget -_zget_space -_zget_space_lock -_zinit -_zone_bootstrap -_zone_change -_zone_check -_zone_free_count -_zone_gc -_zone_gc_allowed -_zone_gc_forced -_zone_gc_last_tick -_zone_gc_lock -_zone_gc_max_rate -_zone_init -_zone_map -_zone_map_max_address -_zone_map_min_address -_zone_page_alloc -_zone_page_collectable -_zone_page_init -_zone_page_keep -_zone_page_table -_zone_pages -_zone_steal_memory -_zone_zone -_zprealloc diff --git a/config/Mach.ppc.exports b/config/Mach.ppc.exports index a1546def8..e69de29bb 100644 --- a/config/Mach.ppc.exports +++ b/config/Mach.ppc.exports @@ -1,582 +0,0 @@ -Choke -ClearRealCall -CreateFakeDECCall -CreateFakeIOCall -CreateShutdownCTXCall -CutTrace -DoPreemptCall -LoadDBATsCall -LoadIBATsCall -NullCall -StoreRealCall -SwitchContextCall -_AlignAssist -_AlignAssist64 -_AltivecAssist -_Call_Debugger -_Call_DebuggerC -_Call_continuation -_ChokeSys -_ClearReal -_ClearRealLL -_CreateFakeDEC -_CreateFakeDECLL -_CreateFakeIO -_CreateFakeIOLL -_CreateShutdownCTX -_CreateShutdownCTXLL -_DebugWork -_DoChokeLL -_DoPreemptLL -_EmulExit -_Emulate -_Emulate64 -_ExceptionVectorsEnd -_ExceptionVectorsStart -_FCReturn -_FWtable -_FirmwareCall -_FixedStackEnd -_FixedStackStart -_FloatInit -_GratefulDebInit -_GratefulDebWork -_LLTraceSet -_LoadDBATs -_LoadIBATs -_MapUserAddressSpace -_MapUserAddressSpaceInit -_NMIss -_NullLL -_PFSExit -_PPCcalls -_QNaNbarbarian -_ReadReal -_ReleaseUserAddressSpace -_ResetHandler -_RuptCtrs -_StoreReal -_StoreRealLL -_SwitchContextLL -_SysChoked -__start_cpu -_aaFPopTable -_atomic_switch_syscall -_atomic_switch_trap -_backchain -_backpocket -_bbSetRupt -_bb_disable_bluebox -_bb_enable_bluebox -_bb_settaskenv -_bcopy_64 -_bcopy_970 -_bcopy_g3 -_bcopy_g4 -_bcopy_nc -_bcopy_physvir -_bigcopy_970 -_boot_args_buf -_bzero_128 -_bzero_32 -_bzero_nc -_cacheDisable -_cacheInit -_cbfpend -_cbfr -_chandler -_checkBogus -_checkNMI -_clock_delay_until -_clock_gettimeofday -_cnputcusr -_cntlzw -_commPagePtr -_commpage_flush_dcache -_commpage_flush_icache -_commpage_set_timestamp -_commpage_stuff -_commpage_time_dcba -_condStop -_cons_ops -_cons_ops_index -_consider_mapping_adjust -_console_chan_default -_console_is_serial -_console_unit -_copyin_multiple -_copyout_multiple -_cpu_doshutdown -_cpu_signal -_cpu_sync_timebase -_cpus_holding_bkpts -_current_free_region -_cursor_pmap -_db_breakpoints_inserted -_db_im_stepping -_db_recover -_db_run_mode -_dbfloats -_dbgCkpt -_dbgCkptLL -_dbgDisp -_dbgDispLL -_dbgRegsLL -_dbgTrace -_dbspecrs -_dbvecs -_debcnputc -_debsave0 -_debstack -_debstack_top_ss -_debstackptr -_debugNoop -_debugbackpocket -_debugger_active -_debugger_cpu -_debugger_debug -_debugger_holdoff -_debugger_is_slave -_debugger_lock -_debugger_pending -_debugger_sync -_delay_for_interval -_dgVideo -_dgWork -_diagCall -_diagTrap -_disable_bluebox_internal -_doexception -_dump_backtrace -_dump_savearea -_enter_funnel_section -_env_buf -_exception_end -_exception_entry -_exception_exit -_exit_funnel_section -_extPatch32 -_extPatchMCK -_failNames -_fastexit -_fctx_test -_find_user_fpu -_find_user_regs -_find_user_vec -_first_free_virt -_forcenap -_fpu_save -_fpu_switch -_free_mappings -_free_pmap_count -_free_pmap_list -_free_pmap_lock -_free_pmap_max -_fwEmMck -_fwSCCinit -_fwSCOM -_get_got -_get_msr_exportmask -_get_msr_nbits -_get_msr_rbits -_get_preemption_level -_get_simple_lock_count -_getrpc -_gettimeofday_32 -_gettimeofday_64 -_handleDSeg -_handleISeg -_handlePF -_hash_table_base -_hash_table_size -_hid0get64 -_hw_add_map -_hw_blow_seg -_hw_cpu_sync -_hw_cpu_wcng -_hw_dequeue_atomic -_hw_find_map -_hw_find_space -_hw_hash_init -_hw_lock_bit -_hw_lock_mbits -_hw_map_seg -_hw_perfmon_lock -_hw_protect -_hw_purge_map -_hw_purge_phys -_hw_purge_space -_hw_queue_atomic -_hw_queue_atomic_list -_hw_rem_map -_hw_set_user_space -_hw_set_user_space_dis -_hw_setup_trans -_hw_start_trans -_hw_test_rc -_hw_unlock_bit -_hw_walk_phys -_hwulckPatch_eieio -_hwulckPatch_isync -_hwulckbPatch_eieio -_hwulckbPatch_isync -_iNullLL -_ignore_zero_fault -_ihandler -_ihandler_ret -_incrVSID -_initialize_serial -_interrupt -_interrupt_disable -_interrupt_enable -_intstack_top_ss -_invalidateSegs -_invalidate_dcache -_invalidate_dcache64 -_invxcption -_isync_mfdec -_kdb_trap -_kdp_backtrace -_kdp_copy_phys -_kdp_dabr -_kdp_noisy -_kdp_pmap -_kdp_print_backtrace -_kdp_print_registers -_kdp_sr_dump -_kdp_trans_off -_kdp_trap -_kdp_trap_codes -_kdp_vtophys -_kernel_args_buf -_kernel_pmap_phys -_killprint -_killresv -_lastTrace -_lock_debugger -_lowGlo -_mach_absolute_time_32 -_mach_absolute_time_64 -_machine_act_terminate -_machine_clock_assist -_machine_conf -_machine_idle_ppc -_machine_idle_ret -_mapCtl -_mapInsert -_mapLog -_mapRemove -_mapSearch -_mapSearchFull -_mapSetLists -_mapSetUp -_mapSkipListVerify -_mapSkipListVerifyC -_mapalc1 -_mapalc2 -_mapdebug -_mapping_adjust -_mapping_adjust_call -_mapping_alloc -_mapping_clr_mod -_mapping_clr_ref -_mapping_drop_busy -_mapping_fake_zone_info -_mapping_find -_mapping_free -_mapping_free_init -_mapping_free_prime -_mapping_init -_mapping_make -_mapping_map -_mapping_p2v -_mapping_phys_lookup -_mapping_phys_unused -_mapping_prealloc -_mapping_protect -_mapping_protect_phys -_mapping_relpre -_mapping_remove -_mapping_set_ref -_mapping_tst_mod -_mapping_tst_ref -_mapping_verify -_mappingdeb0 -_mappingdeb1 -_max_cpus_initialized -_mem_actual -_mfdar -_mflr -_mfmmcr0 -_mfmmcr1 -_mfmmcr2 -_mfmsr -_mfpmc1 -_mfpmc2 -_mfpmc3 -_mfpmc4 -_mfpvr -_mfrtcl -_mfrtcu -_mfsda -_mfsia -_mfsrin -_mftb -_mftbu -_ml_enable_cache_level -_ml_enable_nap -_ml_ppc_sleep -_ml_probe_read_mck -_ml_probe_read_mck_64 -_ml_read_temp -_ml_restore -_ml_sense_nmi -_ml_set_physical -_ml_set_physical_disabled -_ml_set_physical_get_ffs -_ml_set_processor_speed -_ml_set_processor_voltage -_ml_set_translation_off -_ml_thrm_init -_ml_thrm_set -_ml_throttle -_ml_mem_backoff -_mtdar -_mtdec -_mtmmcr0 -_mtmmcr1 -_mtmmcr2 -_mtmsr -_mtpmc1 -_mtpmc2 -_mtpmc3 -_mtpmc4 -_mtsdr1 -_mtsrin -_mulckPatch_eieio -_mulckPatch_isync -_mutex_unlock_rwcmb -_packAsc -_patch_table -_pbtcnt -_pbtcpu -_pbtlock -_per_proc_info -_perfIntHook -_perfTrapHook -_perfmon_acquire_facility -_perfmon_clear_counters -_perfmon_control -_perfmon_disable -_perfmon_enable -_perfmon_handle_pmi -_perfmon_init -_perfmon_read_counters -_perfmon_release_facility -_perfmon_set_event -_perfmon_set_event_func -_perfmon_set_tbsel -_perfmon_set_threshold -_perfmon_start_counters -_perfmon_stop_counters -_perfmon_write_counters -_phys_copy -_phys_table -_phystokv -_pmapTrans -_pmap_activate -_pmap_add_physical_memory -_pmap_attribute -_pmap_attribute_cache_sync -_pmap_boot_map -_pmap_canExecute -_pmap_deactivate -_pmap_find_physentry -_pmap_map_block -_pmap_map_block_rc -_pmap_mem_regions -_pmap_mem_regions_count -_pmap_nest -_pmap_switch -_pmap_unnest -_powermac_scc_get_datum -_powermac_scc_set_datum -_ppcNull -_ppcNullinst -_ppc_checkthreadstate -_ppc_init -_ppc_init_cpu -_ppc_max_adrsp -_ppc_max_pmaps -_ppc_usimple_lock -_ppc_usimple_lock_init -_ppc_usimple_lock_try -_ppc_usimple_unlock_rwcmb -_ppc_usimple_unlock_rwmb -_ppc_vm_cpu_init -_ppc_vm_init -_ppcscret -_pper_proc_info -_print_backtrace -_pthread_getspecific_sprg3 -_pthread_getspecific_uftrap -_pthread_self_sprg3 -_pthread_self_uftrap -_resetPOR -_resethandler_target -_retFromVM -_rtclock_decrementer_min -_save_adjust -_save_alloc -_save_cpv -_save_fake_zone_info -_save_get -_save_get_init -_save_get_phys_32 -_save_get_phys_64 -_save_queue -_save_recover -_save_release -_save_ret -_save_ret_phys -_save_ret_wMSR -_save_trim_free -_saveanchor -_savearea_init -_scc_funnel_initted -_scc_getc -_scc_param -_scc_parm_done -_scc_probe -_scc_putc -_scc_softc -_scc_std -_scc_stomp -_scc_tty -_scc_uses_modem_control -_sconowner -_sectKLDB -_sectSizeKLD -_serial_initted -_serial_keyboard_init -_serial_keyboard_poll -_serial_keyboard_start -_serialmode -_setPmon -_set_machine_current_act -_shadow_BAT -_shandler -_sharedPage -_sharedPmap -_spinlock_32_lock_mp -_spinlock_32_lock_up -_spinlock_32_try_mp -_spinlock_32_try_up -_spinlock_32_unlock_mp -_spinlock_32_unlock_up -_spinlock_64_lock_mp -_spinlock_64_lock_up -_spinlock_64_try_mp -_spinlock_64_try_up -_spinlock_64_unlock_mp -_spinlock_64_unlock_up -_spinlock_relinquish -_stFloat -_stSpecrs -_stVectors -_static_memory_end -_sulckPatch_eieio -_sulckPatch_isync -_switchIntoVM -_switchSegs -_switch_in -_switch_to_old_console -_switch_to_video_console -_syncClkSpot -_sync_cache -_sync_cache64 -_sync_cache_virtual -_sync_ppage -_syscall_error -_syscall_notify_interrupt -_syscall_trace -_syscall_trace_end -_taproot_addr -_taproot_size -_testPerfTrap -_thandler -_thread_adjuserstack -_thread_enable_fpe -_thread_setentrypoint -_thread_setuserstack -_tlbie -_toss_live_fpu -_toss_live_vec -_trap -_trcWork -_tstbit -_unlock_debugger -_vcgetc -_vec_save -_vec_switch -_vm_max_address -_vm_max_physical -_vmm_dispatch -_vmm_dispatch_table -_vmm_execute_vm -_vmm_exit -_vmm_fam_exc -_vmm_fam_pf -_vmm_fam_reserved -_vmm_force_exit -_vmm_get_XA -_vmm_get_adsp -_vmm_get_entry -_vmm_get_features -_vmm_get_features_sel -_vmm_get_float_state -_vmm_get_page_dirty_flag -_vmm_get_page_dirty_flag32 -_vmm_get_page_mapping -_vmm_get_page_mapping32 -_vmm_get_timer -_vmm_get_vector_state -_vmm_get_version -_vmm_get_version_sel -_vmm_init_context -_vmm_init_context_sel -_vmm_interrupt -_vmm_map_execute -_vmm_map_execute32 -_vmm_map_list -_vmm_map_list32 -_vmm_map_list64 -_vmm_map_page -_vmm_map_page32 -_vmm_max_addr -_vmm_protect_execute -_vmm_protect_execute32 -_vmm_protect_page -_vmm_protect_page32 -_vmm_set_XA -_vmm_set_timer -_vmm_stop_vm -_vmm_tear_down_all -_vmm_tear_down_context -_vmm_timer_pop -_vmm_ufp -_vmm_unmap_all_pages -_vmm_unmap_list -_vmm_unmap_page -_vmm_unmap_page32 -_xLoadDBATsLL -_xLoadIBATsLL -dbgCkptCall -dbgDispCall -dbgRegsCall -debstash -fwdisplock -hexTab -hexfont -iNullCall - diff --git a/config/Makefile b/config/Makefile index 36d0759dd..a0de5228d 100644 --- a/config/Makefile +++ b/config/Makefile @@ -27,6 +27,7 @@ INST_SUBDIRS = INSTALL_DATA_LIST= \ + IPFirewall.kext/Info.plist \ System.kext/Info.plist \ System.kext/PlugIns/Libkern.kext/Info.plist \ System.kext/PlugIns/Mach.kext/Info.plist \ @@ -36,13 +37,13 @@ INSTALL_DATA_LIST= \ System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \ System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ + System.kext/PlugIns/Unsupported.kext/Info.plist \ \ System.kext/PlugIns/System6.0.kext/Info.plist \ System.kext/PlugIns/Libkern6.0.kext/Info.plist \ System.kext/PlugIns/Mach6.0.kext/Info.plist \ System.kext/PlugIns/BSDKernel6.0.kext/Info.plist \ System.kext/PlugIns/IOKit6.0.kext/Info.plist \ - INSTALL_DATA_DIR= \ /System/Library/Extensions/ @@ -52,13 +53,15 @@ INSTMAN_SUBDIRS = # KEXT_CREATE_SYMBOL_SET = /usr/local/bin/kextsymboltool +NEWVERS = $(SRCROOT)/config/newvers.pl SYMBOL_COMPONENT_LIST = \ System6.0 \ BSDKernel \ IOKit \ Libkern \ - Mach + Mach \ + Unsupported SYMBOL_SET_BUILD = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(set).symbolset) SYMBOL_SET_FAT = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJROOT)/$(set).symbolset) @@ -81,13 +84,38 @@ $(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset : $(foreach arch, $(INSTALL_ARCHS), $( build_symbol_sets: $(SYMBOL_SET_BUILD) - -install_symbol_sets: $(SYMBOL_SET_FAT) + $(KEXT_CREATE_SYMBOL_SET) \ + -arch $(ARCH_CONFIG_LC) \ + -import $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/allsymbols \ + -export $(SRCROOT)/$(COMPONENT)/Libkern.exports \ + -export $(SRCROOT)/$(COMPONENT)/Libkern.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/Mach.exports \ + -export $(SRCROOT)/$(COMPONENT)/Mach.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/IOKit.exports \ + -export $(SRCROOT)/$(COMPONENT)/IOKit.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/BSDKernel.exports \ + -export $(SRCROOT)/$(COMPONENT)/BSDKernel.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/Unsupported.exports \ + -export $(SRCROOT)/$(COMPONENT)/Unsupported.$(ARCH_CONFIG_LC).exports \ + -output /dev/null; + +install_symbol_sets: $(SYMBOL_SET_FAT) $(SRCROOT)/config/MasterVersion install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0; install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel; install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/IOKit; install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Libkern; install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Mach; + install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported; + $(NEWVERS) $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/AppleNMI.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist do_build_all: build_symbol_sets diff --git a/config/MasterVersion b/config/MasterVersion new file mode 100644 index 000000000..b87eb66eb --- /dev/null +++ b/config/MasterVersion @@ -0,0 +1,19 @@ +8.0.0 + +# The first line of this file contains the master version number for the kernel. +# All other instances of the kernel version in xnu are derived from this file. +# +# The format of the version number must conform to the version resource format +# as described in TN1132: http://developer.apple.com/technotes/tn/tn1132.html +# +# In particular, the string is formatted as: J[.N[.R[S[L]]]], where: +# J represents the kernel major version number (integer) +# N represents the kernel minor version number (integer) +# R represents the kernel revision number (integer) +# S represents the kernel build stage (one of "d", "a", "b", or "r") +# L represents the kernel pre-release level (integer) +# +# The correct way to make use of the kernel version within kernel code or a +# kext is to include libkern/verison.h. version.h contains defines that can +# be used for build-time version logic and prototypes for variables that can +# be used for run-time version logic. diff --git a/config/System.kext/Info.plist b/config/System.kext/Info.plist index 46d15f4be..ac4e5bae0 100644 --- a/config/System.kext/Info.plist +++ b/config/System.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - System Resource Pseudoextension, Apple Computer Inc, 7.9.0 + System Resource Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kernel CFBundleInfoDictionaryVersion @@ -15,13 +15,13 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/AppleNMI.kext/Info.plist b/config/System.kext/PlugIns/AppleNMI.kext/Info.plist index ab3046737..742ae9d1b 100644 --- a/config/System.kext/PlugIns/AppleNMI.kext/Info.plist +++ b/config/System.kext/PlugIns/AppleNMI.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - AppleNMI Pseudoextension, Apple Computer Inc, 7.9.0 + AppleNMI Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.driver.AppleNMI CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist b/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist index ca1388a35..ffd2dae41 100644 --- a/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist +++ b/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - Apple Platform Family Pseudoextension, Apple Computer Inc, 7.9.0 + Apple Platform Family Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.iokit.ApplePlatformFamily CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion 1.0 OSBundleRequired diff --git a/config/System.kext/PlugIns/BSDKernel.kext/Info.plist b/config/System.kext/PlugIns/BSDKernel.kext/Info.plist index 2fa1a6302..fcd967afa 100644 --- a/config/System.kext/PlugIns/BSDKernel.kext/Info.plist +++ b/config/System.kext/PlugIns/BSDKernel.kext/Info.plist @@ -7,7 +7,7 @@ CFBundleExecutable BSDKernel CFBundleGetInfoString - BSD Kernel Pseudoextension, Apple Computer Inc, 7.9.0 + BSD Kernel Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kpi.bsd CFBundleInfoDictionaryVersion @@ -17,13 +17,13 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion - 7.0 + 8.0.0b1 OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist b/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist index dd1a2c3a0..55d34bbf3 100644 --- a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - BSD Kernel Pseudoextension, Apple Computer Inc, 6.9.9 + BSD Kernel Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.bsd CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 6.9.9 + 7.9.9 CFBundleSignature ???? CFBundleVersion - 6.9.9 + 7.9.9 OSBundleCompatibleVersion 1.1 OSBundleRequired diff --git a/config/System.kext/PlugIns/IOKit.kext/Info.plist b/config/System.kext/PlugIns/IOKit.kext/Info.plist index fa44ad120..5ca172b1c 100644 --- a/config/System.kext/PlugIns/IOKit.kext/Info.plist +++ b/config/System.kext/PlugIns/IOKit.kext/Info.plist @@ -7,7 +7,7 @@ CFBundleExecutable IOKit CFBundleGetInfoString - I/O Kit Pseudoextension, Apple Computer Inc, 7.9.0 + I/O Kit Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kpi.iokit CFBundleInfoDictionaryVersion @@ -17,11 +17,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion 7.0 OSBundleRequired diff --git a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist b/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist index 8c1f742b4..8dcf9743c 100644 --- a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - I/O Kit Pseudoextension, Apple Computer Inc, 6.9.9 + I/O Kit Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.iokit CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 6.9.9 + 7.9.9 CFBundleSignature ???? CFBundleVersion - 6.9.9 + 7.9.9 OSBundleCompatibleVersion 1.0.0b1 OSBundleRequired diff --git a/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist b/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist index aebc5af29..998d83151 100644 --- a/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist +++ b/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - AppleNMI Pseudoextension, Apple Computer Inc, 7.9.0 + AppleNMI Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.iokit.IONVRAMFamily CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion 1.1 OSBundleRequired diff --git a/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist b/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist index e689041ad..5f9024432 100644 --- a/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist +++ b/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - System Management Pseudoextension, Apple Computer Inc, 7.9.0 + System Management Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.iokit.IOSystemManagementFamily CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion 1.0.0b1 OSBundleRequired diff --git a/config/System.kext/PlugIns/Libkern.kext/Info.plist b/config/System.kext/PlugIns/Libkern.kext/Info.plist index acb0a4472..a04f4c87f 100644 --- a/config/System.kext/PlugIns/Libkern.kext/Info.plist +++ b/config/System.kext/PlugIns/Libkern.kext/Info.plist @@ -7,7 +7,7 @@ CFBundleExecutable Libkern CFBundleGetInfoString - Libkern Pseudoextension, Apple Computer Inc, 7.9.0 + Libkern Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kpi.libkern CFBundleInfoDictionaryVersion @@ -17,13 +17,13 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion - 7.0 + 8.0.0d0 OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist b/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist index 03a831442..c9dfaa185 100644 --- a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - Libkern Pseudoextension, Apple Computer Inc, 6.9.9 + Libkern Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.libkern CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 6.9.9 + 7.9.9 CFBundleSignature ???? CFBundleVersion - 6.9.9 + 7.9.9 OSBundleCompatibleVersion 1.0.0b1 OSBundleRequired diff --git a/config/System.kext/PlugIns/Mach.kext/Info.plist b/config/System.kext/PlugIns/Mach.kext/Info.plist index 79f9055e5..9f4a6e288 100644 --- a/config/System.kext/PlugIns/Mach.kext/Info.plist +++ b/config/System.kext/PlugIns/Mach.kext/Info.plist @@ -7,7 +7,7 @@ CFBundleExecutable Mach CFBundleGetInfoString - Mach Kernel Pseudoextension, Apple Computer Inc, 7.9.0 + Mach Kernel Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kpi.mach CFBundleInfoDictionaryVersion @@ -17,13 +17,13 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 7.9.0 + ###KERNEL_VERSION_SHORT### CFBundleSignature ???? CFBundleVersion - 7.9.0 + ###KERNEL_VERSION_LONG### OSBundleCompatibleVersion - 7.0 + 8.0.0d0 OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist b/config/System.kext/PlugIns/Mach6.0.kext/Info.plist index 0e607e92c..69244d218 100644 --- a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/Mach6.0.kext/Info.plist @@ -5,7 +5,7 @@ CFBundleDevelopmentRegion English CFBundleGetInfoString - Mach Kernel Pseudoextension, Apple Computer Inc, 6.9.9 + Mach Kernel Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.mach CFBundleInfoDictionaryVersion @@ -15,11 +15,11 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 6.9.9 + 7.9.9 CFBundleSignature ???? CFBundleVersion - 6.9.9 + 7.9.9 OSBundleCompatibleVersion 1.0.0b1 OSBundleRequired diff --git a/config/System.kext/PlugIns/System6.0.kext/Info.plist b/config/System.kext/PlugIns/System6.0.kext/Info.plist index fd7e2f91e..36f98b1e9 100644 --- a/config/System.kext/PlugIns/System6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/System6.0.kext/Info.plist @@ -7,7 +7,7 @@ CFBundleExecutable kernel.6.0 CFBundleGetInfoString - System Resource Pseudoextension, Apple Computer Inc, 6.9.9 + System Resource Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.6.0 CFBundleInfoDictionaryVersion @@ -17,13 +17,13 @@ CFBundlePackageType KEXT CFBundleShortVersionString - 6.9.9 + 7.9.9 CFBundleSignature ???? CFBundleVersion - 6.9.9 + 7.9.9 OSBundleCompatibleVersion - 6.9.9 + 7.9.9 OSBundleRequired Root OSKernelResource diff --git a/config/System.kext/PlugIns/Unsupported.kext/Info.plist b/config/System.kext/PlugIns/Unsupported.kext/Info.plist new file mode 100644 index 000000000..5f8979335 --- /dev/null +++ b/config/System.kext/PlugIns/Unsupported.kext/Info.plist @@ -0,0 +1,32 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + Unsupported + CFBundleGetInfoString + Unsupported Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### + CFBundleIdentifier + com.apple.kpi.unsupported + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + Unsupported Pseudoextension + CFBundlePackageType + KEXT + CFBundleShortVersionString + ###KERNEL_VERSION_SHORT### + CFBundleSignature + ???? + CFBundleVersion + ###KERNEL_VERSION_LONG### + OSBundleCompatibleVersion + 8.0.0b1 + OSBundleRequired + Root + OSKernelResource + + + diff --git a/config/System6.0.exports b/config/System6.0.exports index 0230d2186..1815d9dfb 100644 --- a/config/System6.0.exports +++ b/config/System6.0.exports @@ -1,44 +1,3 @@ -_AgeCatalogIterator -_AllocateNode -_Assert -_BF_decrypt -_BF_encrypt -_BF_set_key -_BTClosePath -_BTDeleteRecord -_BTFlushPath -_BTGetInformation -_BTGetLastSync -_BTInsertRecord -_BTInvalidateHint -_BTIterateRecord -_BTIterateRecords -_BTOpenPath -_BTReloadData -_BTReplaceRecord -_BTScanInitialize -_BTScanNextRecord -_BTScanTerminate -_BTSearchRecord -_BTSetLastSync -_BTUpdateRecord -_BestBlockSizeFit -_BuildCatalogKey -_BuildCatalogKeyUTF8 -_CURSIG -_CalcKeyRecordSize -_CalcMapBits -_CheckExtents -_CheckInsertParams -_CheckNode -_ClearNode -_CompareCatalogKeys -_CompareExtendedCatalogKeys -_ConvertUnicodeToUTF8Mangled -_CopyBigCatalogNodeInfo -_CopyCatalogName -_CopyCatalogNodeInfo -_CopyExtentInfo _DTCreateEntryIterator _DTCreatePropertyIterator _DTDisposeEntryIterator @@ -54,34 +13,7 @@ _DTIterateProperties _DTLookupEntry _DTRestartEntryIteration _DTRestartPropertyIteration -_DebugStr _Debugger -_DeleteExtents -_DeleteOffset -_DeleteRecord -_DeleteTree -_DisposePtr -_ExchangeFileIDs -_ExtendBTree -_FastRelString -_FastUnicodeCompare -_FindIteratorPosition -_FlushCatalog -_FreeNode -_GetCatalogIterator -_GetChildNodeNum -_GetDirEntrySize -_GetEmbeddedFileID -_GetLogicalBlockSize -_GetMapNode -_GetNewNode -_GetNode -_GetNodeDataSize -_GetNodeFreeSize -_GetOffsetAddress -_GetRecordByIndex -_GetRecordSize -_GetTimeUTC _IOAlignmentToSize _IOBSDNameMatching _IOBSDRegistryEntryForDeviceTree @@ -91,7 +23,6 @@ _IOCDMatching _IOCreateThread _IODTFreeLoaderInfo _IODTGetLoaderInfo -_IODefaultCacheBits _IODelay _IODiskMatching _IOExitThread @@ -111,13 +42,19 @@ _IOKitResetTime _IOLibInit _IOLockAlloc _IOLockFree +_IOLockGetMachLock _IOLockInitWithState +_IOLockLock:_lck_mtx_lock +_IOLockSleep +_IOLockSleepDeadline +_IOLockTryLock:_lck_mtx_try_lock +_IOLockUnlock:_lck_mtx_unlock +_IOLockWakeup _IOLog _IOMalloc _IOMallocAligned _IOMallocContiguous _IOMallocPageable -_IOMapPages _IOMappedRead16 _IOMappedRead32 _IOMappedRead64 @@ -140,8 +77,13 @@ _IOPanic _IOPrintPlane _IORWLockAlloc _IORWLockFree +_IORWLockGetMachLock +_IORWLockRead:_lck_rw_lock_shared +_IORWLockUnlock:_lck_rw_done +_IORWLockWrite:_lck_rw_lock_exclusive _IORecursiveLockAlloc _IORecursiveLockFree +_IORecursiveLockGetMachLock _IORecursiveLockHaveLock _IORecursiveLockLock _IORecursiveLockSleep @@ -151,47 +93,25 @@ _IORecursiveLockWakeup _IOSetProcessorCacheMode _IOSimpleLockAlloc _IOSimpleLockFree +_IOSimpleLockGetMachLock _IOSimpleLockInit +_IOSimpleLockLock:_lck_spin_lock +_IOSimpleLockTryLock:_lck_spin_try_lock +_IOSimpleLockUnlock:_lck_spin_unlock _IOSizeToAlignment _IOSleep _IOSpinUnlock _IOSystemShutdownNotification _IOTrySpinLock -_IOUnmapPages _IOZeroTvalspec -_InitCatalogCache -_InsertKeyRecord -_InsertOffset -_InsertRecord -_InsertTree -_InvalidateCatalogCache -_IsItAHint -_KERNEL_SECURITY_TOKEN _KUNCExecute _KUNCGetNotificationID -_KUNCUserNotificationCancel _KUNCUserNotificationDisplayAlert _KUNCUserNotificationDisplayFromBundle _KUNCUserNotificationDisplayNotice -_LocalToUTC -_LocateCatalogNode -_LocateCatalogNodeByKey -_LocateCatalogRecord -_LockTimeOut -_MAXNBUF -_MCFail _MD5Final _MD5Init -_MD5Pad -_MD5Transform _MD5Update -_MDFail -_MPFail -_MacToVFSError -_MoveExtents -_NDR_record -_NewPtr -_NewPtrSysClear _OSAddAtomic _OSAddAtomic16 _OSAddAtomic8 @@ -256,69 +176,7 @@ _PE_poll_input _PE_putc _PE_register_timebase_callback _PE_state -_PositionIterator -_PreliminarySetup -_RandomULong -_ReleaseCatalogIterator -_ReleaseNode -_ReplaceBTreeRecord -_S -_SHA1Final -_SHA1Init -_SHA1Transform -_SHA1Update -_SHA256_Data -_SHA256_End -_SHA256_Final -_SHA256_Init -_SHA256_Transform -_SHA256_Update -_SHA384_Data -_SHA384_End -_SHA384_Final -_SHA384_Init -_SHA384_Update -_SHA512_Data -_SHA512_End -_SHA512_Final -_SHA512_Init -_SHA512_Last -_SHA512_Transform -_SHA512_Update -_SearchBTreeRecord -_SearchNode -_SearchTree _StartIOKit -_Switch_context -_TRAP_TYPES -_TrashCatalogIterator -_TrashNode -_TrySimpleReplace -_UNDAlertCompletedWithResult_rpc -_UNDCancelNotification_rpc -_UNDDisplayAlertFromBundle_rpc -_UNDDisplayAlertSimple_rpc -_UNDDisplayCustomFromBundle_rpc -_UNDDisplayCustomFromDictionary_rpc -_UNDDisplayNoticeFromBundle_rpc -_UNDDisplayNoticeSimple_rpc -_UNDExecute_rpc -_UNDNotificationCreated_rpc -_UNDReply_deallocate -_UNDReply_server -_UNDReply_server_routine -_UNDReply_subsystem -_UTCToLocal -_UpdateBtreeIterator -_UpdateCatalogIterator -_UpdateCatalogName -_UpdateHeader -_UpdateNode -_VerifyHeader -__FREE -__FREE_ZONE -__MALLOC -__MALLOC_ZONE __Z10tellClientP8OSObjectPv __Z13OSUnserializePKcPP8OSString __Z13readExtensionP12OSDictionaryPKc @@ -392,7 +250,6 @@ __ZN10IOWorkLoop10wakeupGateEPvb __ZN10IOWorkLoop12tryCloseGateEv __ZN10IOWorkLoop13_maintRequestEPvS0_S0_S0_ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource -__ZN10IOWorkLoop16launchThreadMainEPv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv __ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev @@ -402,7 +259,6 @@ __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev -__ZN10IOWorkLoop22threadMainContinuationEv __ZN10IOWorkLoop4freeEv __ZN10IOWorkLoop4initEv __ZN10IOWorkLoop8openGateEv @@ -651,9 +507,10 @@ __ZN12KLDBootstrapC2Ev __ZN12KLDBootstrapD1Ev __ZN12KLDBootstrapD2Ev __ZN12OSCollection10gMetaClassE +__ZN12OSCollection10setOptionsEjjPv __ZN12OSCollection10superClassE -__ZN12OSCollection22_RESERVEDOSCollection0Ev -__ZN12OSCollection22_RESERVEDOSCollection1Ev +__ZN12OSCollection11haveUpdatedEv +__ZN12OSCollection14copyCollectionEP12OSDictionary __ZN12OSCollection22_RESERVEDOSCollection2Ev __ZN12OSCollection22_RESERVEDOSCollection3Ev __ZN12OSCollection22_RESERVEDOSCollection4Ev @@ -669,6 +526,7 @@ __ZN12OSCollectionC2EPK11OSMetaClass __ZN12OSCollectionD0Ev __ZN12OSCollectionD2Ev __ZN12OSDictionary10gMetaClassE +__ZN12OSDictionary10setOptionsEjjPv __ZN12OSDictionary10superClassE __ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSStringjj __ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSSymboljj @@ -676,6 +534,7 @@ __ZN12OSDictionary12removeObjectEPK8OSString __ZN12OSDictionary12removeObjectEPK8OSSymbol __ZN12OSDictionary12removeObjectEPKc __ZN12OSDictionary12withCapacityEj +__ZN12OSDictionary14copyCollectionEPS_ __ZN12OSDictionary14ensureCapacityEj __ZN12OSDictionary14withDictionaryEPKS_j __ZN12OSDictionary15flushCollectionEv @@ -707,11 +566,13 @@ __ZN12OSDictionaryC2Ev __ZN12OSDictionaryD0Ev __ZN12OSDictionaryD2Ev __ZN12OSOrderedSet10gMetaClassE +__ZN12OSOrderedSet10setOptionsEjjPv __ZN12OSOrderedSet10superClassE __ZN12OSOrderedSet11orderObjectEPK15OSMetaClassBase __ZN12OSOrderedSet12removeObjectEPK15OSMetaClassBase __ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ __ZN12OSOrderedSet13setLastObjectEPK15OSMetaClassBase +__ZN12OSOrderedSet14copyCollectionEP12OSDictionary __ZN12OSOrderedSet14ensureCapacityEj __ZN12OSOrderedSet14getOrderingRefEv __ZN12OSOrderedSet14setFirstObjectEPK15OSMetaClassBase @@ -955,7 +816,6 @@ __ZN14IOPMrootDomain26handleSleepTimerExpirationEv __ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv __ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain27registerPMSettingControllerEPFiiiPvES0_ -__ZN14IOPMrootDomain29registerPlatformPowerProfilesEP7OSArray __ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev @@ -1050,9 +910,9 @@ __ZN15IORegistryEntry15getRegistryRootEv __ZN15IORegistryEntry16detachFromParentEPS_PK15IORegistryPlane __ZN15IORegistryEntry16setPropertyTableEP12OSDictionary __ZN15IORegistryEntry17matchPathLocationEPKcPK15IORegistryPlane +__ZN15IORegistryEntry17runPropertyActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ __ZN15IORegistryEntry18getGenerationCountEv __ZN15IORegistryEntry21getChildFromComponentEPPKcPK15IORegistryPlane -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry5Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev @@ -1164,22 +1024,6 @@ __ZN16IOKitDiagnosticsC2EPK11OSMetaClass __ZN16IOKitDiagnosticsC2Ev __ZN16IOKitDiagnosticsD0Ev __ZN16IOKitDiagnosticsD2Ev -__ZN16IOPMPagingPlexus10gMetaClassE -__ZN16IOPMPagingPlexus10superClassE -__ZN16IOPMPagingPlexus12findProviderEP9IOService -__ZN16IOPMPagingPlexus15processChildrenEv -__ZN16IOPMPagingPlexus15processSiblingsEP9IOService -__ZN16IOPMPagingPlexus17setAggressivenessEmm -__ZN16IOPMPagingPlexus5startEP9IOService -__ZN16IOPMPagingPlexus9MetaClassC1Ev -__ZN16IOPMPagingPlexus9MetaClassC2Ev -__ZN16IOPMPagingPlexus9metaClassE -__ZN16IOPMPagingPlexusC1EPK11OSMetaClass -__ZN16IOPMPagingPlexusC1Ev -__ZN16IOPMPagingPlexusC2EPK11OSMetaClass -__ZN16IOPMPagingPlexusC2Ev -__ZN16IOPMPagingPlexusD0Ev -__ZN16IOPMPagingPlexusD2Ev __ZN16IOPMinformeeList10gMetaClassE __ZN16IOPMinformeeList10initializeEv __ZN16IOPMinformeeList10nextInListEP12IOPMinformee @@ -1311,6 +1155,26 @@ __ZN17IOBigMemoryCursorC2EPK11OSMetaClass __ZN17IOBigMemoryCursorC2Ev __ZN17IOBigMemoryCursorD0Ev __ZN17IOBigMemoryCursorD2Ev +__ZN17IOPolledInterface10gMetaClassE +__ZN17IOPolledInterface15checkAllForWorkEv +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev +__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev +__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev +__ZN17IOPolledInterfaceC2EPK11OSMetaClass +__ZN17IOPolledInterfaceD2Ev __ZN17IOPowerConnection10gMetaClassE __ZN17IOPowerConnection10superClassE __ZN17IOPowerConnection14getAwaitingAckEv @@ -1384,16 +1248,16 @@ __ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm __ZN18IOMemoryDescriptor11withAddressEPvm11IODirection __ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task __ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor12setPurgeableEmPm __ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection __ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap __ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper __ZN18IOMemoryDescriptor16getSourceSegmentEmPm +__ZN18IOMemoryDescriptor16performOperationEmmm __ZN18IOMemoryDescriptor18getPhysicalAddressEv __ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor3Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor4Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor5Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev @@ -1405,6 +1269,7 @@ __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev +__ZN18IOMemoryDescriptor30withPersistentMemoryDescriptorEPS_ __ZN18IOMemoryDescriptor3mapEP4taskjmmm __ZN18IOMemoryDescriptor3mapEm __ZN18IOMemoryDescriptor4freeEv @@ -1476,6 +1341,7 @@ __ZN18IOTimerEventSource10superClassE __ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide __ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec __ZN18IOTimerEventSource10wakeAtTimeEmm +__ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop __ZN18IOTimerEventSource12checkForWorkEv __ZN18IOTimerEventSource12setTimeoutMSEm __ZN18IOTimerEventSource12setTimeoutUSEm @@ -1665,15 +1531,18 @@ __ZN21IOSubMemoryDescriptor10superClassE __ZN21IOSubMemoryDescriptor10writeBytesEmPKvm __ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm __ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection +__ZN21IOSubMemoryDescriptor12setPurgeableEmPm __ZN21IOSubMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb __ZN21IOSubMemoryDescriptor15initWithAddressEPvm11IODirection __ZN21IOSubMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm +__ZN21IOSubMemoryDescriptor16performOperationEmmm __ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm __ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm __ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN21IOSubMemoryDescriptor4freeEv +__ZN21IOSubMemoryDescriptor5doMapEP6vm_mapPjmmm __ZN21IOSubMemoryDescriptor7prepareE11IODirection __ZN21IOSubMemoryDescriptor8completeE11IODirection __ZN21IOSubMemoryDescriptor8redirectEP4taskb @@ -1874,7 +1743,6 @@ __ZN25IOGeneralMemoryDescriptorC2EPK11OSMetaClass __ZN25IOGeneralMemoryDescriptorC2Ev __ZN25IOGeneralMemoryDescriptorD0Ev __ZN25IOGeneralMemoryDescriptorD2Ev -__ZNK25IOGeneralMemoryDescriptor12getBackingIDEv __ZN25IOServiceUserNotification10gMetaClassE __ZN25IOServiceUserNotification10superClassE __ZN25IOServiceUserNotification13getNextObjectEv @@ -1977,6 +1845,7 @@ __ZN5IOCPU10superClassE __ZN5IOCPU11getCPUGroupEv __ZN5IOCPU11getCPUStateEv __ZN5IOCPU11setCPUStateEm +__ZN5IOCPU11setPropertyEPK8OSSymbolP8OSObject __ZN5IOCPU12getCPUNumberEv __ZN5IOCPU12setCPUNumberEm __ZN5IOCPU13setPropertiesEP8OSObject @@ -2002,12 +1871,14 @@ __ZN5IOCPUC2EPK11OSMetaClass __ZN5IOCPUD0Ev __ZN5IOCPUD2Ev __ZN5OSSet10gMetaClassE +__ZN5OSSet10setOptionsEjjPv __ZN5OSSet10superClassE __ZN5OSSet11initWithSetEPKS_j __ZN5OSSet11withObjectsEPPK8OSObjectjj __ZN5OSSet12removeObjectEPK15OSMetaClassBase __ZN5OSSet12withCapacityEj __ZN5OSSet13initWithArrayEPK7OSArrayj +__ZN5OSSet14copyCollectionEP12OSDictionary __ZN5OSSet14ensureCapacityEj __ZN5OSSet15_RESERVEDOSSet0Ev __ZN5OSSet15_RESERVEDOSSet1Ev @@ -2072,6 +1943,7 @@ __ZN6OSDataC2Ev __ZN6OSDataD0Ev __ZN6OSDataD2Ev __ZN7OSArray10gMetaClassE +__ZN7OSArray10setOptionsEjjPv __ZN7OSArray10superClassE __ZN7OSArray11withObjectsEPPK8OSObjectjj __ZN7OSArray12removeObjectEj @@ -2079,6 +1951,7 @@ __ZN7OSArray12withCapacityEj __ZN7OSArray13initWithArrayEPKS_j __ZN7OSArray13replaceObjectEjPK15OSMetaClassBase __ZN7OSArray14ensureCapacityEj +__ZN7OSArray14copyCollectionEP12OSDictionary __ZN7OSArray15flushCollectionEv __ZN7OSArray15initWithObjectsEPPK8OSObjectjj __ZN7OSArray16initWithCapacityEj @@ -2346,7 +2219,7 @@ __ZN9IODTNVRAM18writeNVRAMPropertyEP15IORegistryEntryPK8OSSymbolP6OSData __ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject __ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject __ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm -__ZN9IODTNVRAM19unescapeBytesToDataEPhm +__ZN9IODTNVRAM19unescapeBytesToDataEPKhm __ZN9IODTNVRAM19writeNVRAMPartitionEPK8OSSymbolmPhm __ZN9IODTNVRAM22readNVRAMPropertyType0EP15IORegistryEntryPPK8OSSymbolPP6OSData __ZN9IODTNVRAM22readNVRAMPropertyType1EP15IORegistryEntryPPK8OSSymbolPP6OSData @@ -2493,7 +2366,7 @@ __ZN9IOService18matchPropertyTableEP12OSDictionary __ZN9IOService18matchPropertyTableEP12OSDictionaryPl __ZN9IOService18setIdleTimerPeriodEm __ZN9IOService18settleTimerExpiredEv -__ZN9IOService19_RESERVEDIOService3Ev +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService19_RESERVEDIOService4Ev __ZN9IOService19_RESERVEDIOService5Ev __ZN9IOService19_RESERVEDIOService6Ev @@ -2861,8 +2734,6 @@ __ZNK15_IOConfigThread9MetaClass5allocEv __ZNK16IOKitDiagnostics12getMetaClassEv __ZNK16IOKitDiagnostics9MetaClass5allocEv __ZNK16IOKitDiagnostics9serializeEP11OSSerialize -__ZNK16IOPMPagingPlexus12getMetaClassEv -__ZNK16IOPMPagingPlexus9MetaClass5allocEv __ZNK16IOPMinformeeList12getMetaClassEv __ZNK16IOPMinformeeList9MetaClass5allocEv __ZNK16IOPlatformDevice11compareNameEP8OSStringPS1_ @@ -2944,7 +2815,9 @@ __ZNK28IOFilterInterruptEventSource15getFilterActionEv __ZNK28IOFilterInterruptEventSource9MetaClass5allocEv __ZNK32IOServiceMessageUserNotification12getMetaClassEv __ZNK32IOServiceMessageUserNotification9MetaClass5allocEv +__ZNK5IOCPU11getPropertyEPK8OSSymbol __ZNK5IOCPU12getMetaClassEv +__ZNK5IOCPU19serializePropertiesEP11OSSerialize __ZNK5IOCPU9MetaClass5allocEv __ZNK5OSSet11getCapacityEv __ZNK5OSSet12getAnyObjectEv @@ -3103,12 +2976,12 @@ __ZTV15OSMetaClassBase __ZTV15OSMetaClassMeta __ZTV15_IOConfigThread __ZTV16IOKitDiagnostics -__ZTV16IOPMPagingPlexus __ZTV16IOPMinformeeList __ZTV16IOPlatformDevice __ZTV16IOPlatformExpert __ZTV16IORangeAllocator __ZTV17IOBigMemoryCursor +__ZTV17IOPolledInterface __ZTV17IOPowerConnection __ZTV18IODTPlatformExpert __ZTV18IOMemoryDescriptor @@ -3185,7 +3058,6 @@ __ZTVN15IORegistryPlane9MetaClassE __ZTVN15IOWatchDogTimer9MetaClassE __ZTVN15_IOConfigThread9MetaClassE __ZTVN16IOKitDiagnostics9MetaClassE -__ZTVN16IOPMPagingPlexus9MetaClassE __ZTVN16IOPMinformeeList9MetaClassE __ZTVN16IOPlatformDevice9MetaClassE __ZTVN16IOPlatformExpert9MetaClassE @@ -3238,786 +3110,79 @@ __ZTVN9OSBoolean9MetaClassE __ZdlPv __Znwm ___cxa_pure_virtual -___disable_threadsignal -___doprnt -___pthread_kill -___sysctl -__cpu_capabilities -__disable_preemption -__dist_code __doprnt -__doprnt_truncates -__enable_preemption -__enable_preemption_no_check __giDebugLogDataInternal __giDebugLogInternal __giDebugReserved1 __giDebugReserved2 -__length_code -__longjmp __mh_execute_header -__mk_sp_thread_begin -__mk_sp_thread_depress_abort -__mk_sp_thread_depress_abstime -__mk_sp_thread_depress_ms -__mk_sp_thread_dispatch -__mk_sp_thread_done -__mk_sp_thread_perhaps_yield -__mk_sp_thread_switch -__mk_sp_thread_switch_continue -__mk_sp_thread_unblock -__mutex_lock -__mutex_try __printf -__setjmp __start -__tr_align -__tr_flush_block -__tr_init -__tr_stored_block -__tr_tally -__vm_external_state_get -__vm_map_clip_end -__vm_map_clip_start -__vm_map_entry_create -__vm_map_entry_dispose _absolutetime_to_nanoseconds -_accept -_access -_acct -_acct_process -_acctchkfreq -_acctp -_acctresume -_acctsuspend -_acctwatch -_acctwatch_funnel _acknowledgeSleepWakeNotification -_act_abort -_act_attach -_act_deallocate -_act_detach -_act_execute_returnhandlers -_act_free_swapin -_act_get_state -_act_get_state_locked -_act_lock_thread -_act_machine_sv_free -_act_reference -_act_set_apc -_act_set_astbsd -_act_set_state -_act_set_state_locked -_act_thread_catt -_act_thread_cfree -_act_thread_csave -_act_ulock_release_all -_act_unlock_thread -_active_debugger _add_from_mkext_function -_add_name -_add_pcbuffer -_add_profil -_add_to_time_wait -_addlog -_addupc_task -_adjtime -_adjust_vm_object_cache -_adler32 -_adr -_advisory_read -_age_is_stale -_ah4_calccksum -_ah4_input -_ah4_output -_ah6_calccksum -_ah6_ctlinput -_ah6_input -_ah6_output -_ah_algorithm_lookup -_ah_hdrlen -_ah_hdrsiz -_aio_cancel -_aio_error -_aio_fsync -_aio_max_requests -_aio_max_requests_per_process -_aio_read -_aio_return -_aio_suspend -_aio_worker_threads -_aio_write -_alert -_alert_done -_all_zones_lock -_allocbuf -_allow_clustered_pageouts -_allproc -_app_profile _appleClut8 -_apple_hwcksum_rx -_apple_hwcksum_tx _argstrcpy -_around -_arp_ifinit -_arp_rtrequest -_arpintr -_arpintrq -_arpresolve -_arpwhohas _assert_wait -_assert_wait_possible -_assert_wait_prim _assert_wait_timeout -_assert_wait_timeout_event -_ast_check -_ast_init -_ast_taken -_astbsd_on -_at_ether_input _atoi -_atoi_term -_attrcalcsize -_avail_remaining -_avenrun -_averunnable -_b_to_q -_backing_store_add -_backing_store_alloc -_backing_store_list -_backing_store_lookup -_backing_store_release_trigger_disable _badport_bandlim -_bawrite _bcd2bin_data _bcmp _bcopy -_bcopy_phys -_bdevsw _bdevsw_add _bdevsw_isfree _bdevsw_remove -_bdevvp -_bdwrite -_be_tracing -_bflushq -_bin2bcd_data -_bind -_biodone -_biowait -_blaundrycnt -_block_procsigmask _boot -_boothowto -_bootp -_boottime -_both -_bpf_filter -_bpf_init -_bpf_mtap -_bpf_tap -_bpf_tap_callback -_bpf_validate -_bpfattach -_bpfclose -_bpfdetach -_bpfioctl -_bpfopen -_bpfpoll -_bpfread -_bpfwrite -_branch_tracing_enabled -_bread -_breada -_breadn -_brelse -_bremfree -_bs_commit -_bs_get_global_clsize -_bs_global_info -_bs_initialize -_bs_low -_bs_more_space -_bs_no_paging_space -_bs_port_table -_bs_set_default_clsize -_bsd_ast -_bsd_autoconf -_bsd_bufferinit -_bsd_close_page_cache_files -_bsd_exception -_bsd_hardclock -_bsd_hardclockinit -_bsd_init -_bsd_init_task -_bsd_open_page_cache_files -_bsd_osrelease -_bsd_ostype -_bsd_pageable_map -_bsd_read_page_cache_file -_bsd_search_page_cache_data_base -_bsd_startupearly -_bsd_uprofil -_bsd_utaskbootstrap -_bsd_version -_bsd_version_major -_bsd_version_minor -_bsd_version_variant -_bsd_write_page_cache_file -_bsdinit_task -_buf -_bufferhdr_map -_bufhash -_bufhashlist_slock -_bufhashtbl -_bufqlim -_bufqscanwait -_bufqueues -_bufstats -_busyprt -_bwillwrite -_bwrite -_byte_swap_cgin -_byte_swap_cgout -_byte_swap_csum -_byte_swap_dir_block_in -_byte_swap_dir_block_out -_byte_swap_dir_out -_byte_swap_direct -_byte_swap_dirtemplate_in -_byte_swap_inode_in -_byte_swap_inode_out -_byte_swap_ints -_byte_swap_longlongs -_byte_swap_minidir_in -_byte_swap_sbin -_byte_swap_sbout -_byte_swap_shorts +_bsd_osrelease:_osrelease +_bsd_ostype:_ostype +_bsd_version:_version +_bsd_version_major:_version_major +_bsd_version_minor:_version_minor +_bsd_version_variant:_version_variant _bzero -_bzero_phys -_c_incoming_interrupts -_c_mach_msg_trap_switch_fast -_c_mmot_combined_S_R -_c_mmot_kernel_send -_c_swapin_thread_block -_c_syscalls_mach -_c_syscalls_unix -_c_thr_exc_raise -_c_thr_exc_raise_state -_c_thr_exc_raise_state_id -_c_thread_invoke_csw -_c_thread_invoke_hits -_c_thread_invoke_misses -_c_thread_invoke_same -_c_thread_invoke_same_cont -_c_tsk_exc_raise -_c_tsk_exc_raise_state -_c_tsk_exc_raise_state_id -_c_vm_page_grab_fictitious -_c_vm_page_more_fictitious -_c_vm_page_release_fictitious -_c_weird_pset_ref_exit -_cache_enter -_cache_lookup -_cache_purge -_cache_purgevfs -_cached_sock_alloc -_cached_sock_count -_cached_sock_free -_calcru -_calend_config -_calend_getattr -_calend_gettime -_calend_init -_calend_ops -_call_continuation -_call_thread_block -_call_thread_unblock -_callout -_cansignal -_cast128_decrypt_round12 -_cast128_decrypt_round16 -_cast128_encrypt_round12 -_cast128_encrypt_round16 -_catch_exc_subsystem _catch_exception_raise _catch_exception_raise_state _catch_exception_raise_state_identity -_catq -_cause_ast_check -_cd9660_access -_cd9660_blkatoff -_cd9660_blktooff -_cd9660_bmap -_cd9660_cdxaop_entries -_cd9660_cdxaop_opv_desc -_cd9660_cdxaop_p -_cd9660_close -_cd9660_cmap -_cd9660_defattr -_cd9660_deftstamp -_cd9660_enotsupp -_cd9660_fhtovp -_cd9660_fifoop_entries -_cd9660_fifoop_opv_desc -_cd9660_fifoop_p -_cd9660_getattr -_cd9660_getattrlist -_cd9660_ihashget -_cd9660_ihashins -_cd9660_ihashrem -_cd9660_inactive -_cd9660_init -_cd9660_ioctl -_cd9660_islocked -_cd9660_lock -_cd9660_lookup -_cd9660_mmap -_cd9660_mount -_cd9660_mountroot -_cd9660_offtoblk -_cd9660_open -_cd9660_pagein -_cd9660_pathconf -_cd9660_print -_cd9660_quotactl -_cd9660_read -_cd9660_readdir -_cd9660_readlink -_cd9660_reclaim -_cd9660_remove -_cd9660_rmdir -_cd9660_root -_cd9660_rrip_analyze -_cd9660_rrip_getname -_cd9660_rrip_getsymname -_cd9660_rrip_offset -_cd9660_seek -_cd9660_select -_cd9660_specop_entries -_cd9660_specop_opv_desc -_cd9660_specop_p -_cd9660_start -_cd9660_statfs -_cd9660_strategy -_cd9660_sync -_cd9660_sysctl -_cd9660_tstamp_conv17 -_cd9660_tstamp_conv7 -_cd9660_unlock -_cd9660_unmount -_cd9660_vfsops -_cd9660_vget -_cd9660_vget_internal -_cd9660_vnodeop_entries -_cd9660_vnodeop_opv_desc -_cd9660_vnodeop_p -_cd9660_vptofh -_cd9660_xa_read -_cdevsw _cdevsw_add _cdevsw_add_with_bdev _cdevsw_isfree _cdevsw_remove -_cfree -_cfreecount -_cfreelist -_chdir -_check_actforsig -_check_cpu_subtype -_check_exec_access -_check_routeselfref -_checkalias -_checkuseraccess -_chflags -_chgproccnt -_chkdq -_chkdqchg -_chkiq -_chkiqchg -_chkvnlock -_chmod -_chown -_chroot -_chrtoblk -_chrtoblk_set -_cinit -_cjk_encoding -_cjk_lastunique -_clalloc -_classichandler -_classichandler_fileid -_classichandler_fsid -_clear_procsiglist -_clear_wait -_clfree _clock_absolutetime_interval_to_deadline -_clock_adjtime -_clock_adjust_calendar -_clock_alarm -_clock_alarm_intr -_clock_alarm_reply -_clock_config -_clock_count -_clock_deadline_for_periodic_event -_clock_get_attributes +_clock_delay_until _clock_get_calendar_microtime _clock_get_calendar_nanotime _clock_get_calendar_value _clock_get_system_microtime _clock_get_system_nanotime _clock_get_system_value -_clock_get_time _clock_get_uptime -_clock_init -_clock_initialize_calendar _clock_interval_to_absolutetime_interval _clock_interval_to_deadline -_clock_list -_clock_priv_server -_clock_priv_server_routine -_clock_priv_subsystem -_clock_server -_clock_server_routine -_clock_service_create -_clock_set_attributes -_clock_set_calendar_adjtime -_clock_set_calendar_microtime -_clock_set_time -_clock_set_timer_deadline -_clock_set_timer_func -_clock_sleep_internal -_clock_sleep_trap -_clock_subsystem _clock_timebase_info -_clock_wakeup_calendar -_clone_system_shared_regions -_cloneproc -_close -_closef -_clr_be_bit -_clrbit -_clrbits -_cluster_bp -_cluster_copy_ubc_data -_cluster_copy_upl_data -_cluster_pagein -_cluster_pageout -_cluster_push -_cluster_read -_cluster_release -_cluster_transfer_minimum -_cluster_write -_clustered_reads -_clustered_writes -_clusters_available -_clusters_committed -_clusters_committed_peak -_cmask -_cngetc -_cnmaygetc -_cnodehash -_cnodehashtbl _cnputc -_collectth_state -_com_mapping_resource -_com_region_handle -_com_region_map -_com_region_size -_commpage_populate -_comp_add_data -_comp_end -_comp_get_ratio -_comp_init -_compute_averunnable -_compute_mach_factor -_compute_my_priority -_compute_priority -_concat_domain -_connect -_cons -_cons_cinput -_consdebug_putc -_consider_machine_adjust -_consider_machine_collect -_consider_task_collect -_consider_zone_gc _conslog_putc _console_user -_constty -_convert_act_to_port -_convert_clock_ctrl_to_port -_convert_clock_to_port -_convert_host_to_port -_convert_ledger_to_port -_convert_lock_set_to_port -_convert_memory_object_to_port -_convert_mig_object_to_port -_convert_mo_control_to_port _convert_port_entry_to_map _convert_port_entry_to_object -_convert_port_to_UNDReply -_convert_port_to_act -_convert_port_to_clock -_convert_port_to_clock_ctrl -_convert_port_to_host -_convert_port_to_host_priv -_convert_port_to_host_security -_convert_port_to_ledger -_convert_port_to_lock_set -_convert_port_to_locked_task -_convert_port_to_map -_convert_port_to_memory_object -_convert_port_to_mig_object -_convert_port_to_mo_control -_convert_port_to_processor -_convert_port_to_pset -_convert_port_to_pset_name -_convert_port_to_semaphore -_convert_port_to_space -_convert_port_to_task -_convert_port_to_upl -_convert_processor_to_port -_convert_pset_name_to_port -_convert_pset_to_port -_convert_semaphore_to_port -_convert_task_to_port -_convert_upl_to_port -_copyfile -_copyin -_copyin_shared_file -_copyinmap -_copyinmsg -_copyinstr -_copyout -_copyoutmap -_copyoutmsg -_copyoutstr -_copypv -_copyright -_copystr -_copywithin -_coredump -_coredumpok -_count_busy_buffers -_count_lock_queue -_cpm_allocate -_cpu_control -_cpu_down -_cpu_info -_cpu_info_count -_cpu_init -_cpu_launch_first_thread -_cpu_machine_init -_cpu_number -_cpu_register -_cpu_signal_handler -_cpu_sleep -_cpu_start -_cpu_up -_crcmp -_crcopy -_crdup -_create_unix_stack -_cred0 -_crfree -_crget -_csw_check -_cthread_stack_size -_ctl_attach -_ctl_connect -_ctl_ctloutput -_ctl_deregister -_ctl_disconnect -_ctl_enqueuedata -_ctl_enqueuembuf -_ctl_find -_ctl_head -_ctl_ioctl -_ctl_post_msg -_ctl_register -_ctl_send -_ctl_usrreqs -_ctlsw -_cttyioctl -_cttyopen -_cttyread -_cttyselect -_cttywrite -_cur_tw_slot _current_act -_current_debugger -_current_map _current_proc _current_proc_EXTERNAL _current_task _current_thread -_current_thread_aborted -_current_timer -_cvtstat -_d_to_i _db_dumpiojunk _db_piokjunk -_db_thread_read_times -_db_timer_grab -_dbugprintf -_ddb_regs -_dead_badop -_dead_blktooff -_dead_bmap -_dead_cmap -_dead_ebadf -_dead_ioctl -_dead_lock -_dead_lookup -_dead_nullop -_dead_offtoblk -_dead_open -_dead_print -_dead_read -_dead_select -_dead_strategy -_dead_vnodeop_entries -_dead_vnodeop_opv_desc -_dead_vnodeop_p -_dead_write -_debug_buf -_debug_buf_ptr -_debug_buf_size _debug_container_malloc_size _debug_iomalloc_size _debug_ivars_size -_debug_log_init _debug_malloc_size -_debug_mode -_debug_putc -_def_tbuffer_size -_default_environment_shared_regions -_default_pager -_default_pager_add_file -_default_pager_async_lock -_default_pager_backing_store_create -_default_pager_backing_store_delete -_default_pager_backing_store_info -_default_pager_clsize -_default_pager_default_set -_default_pager_external_count -_default_pager_external_set -_default_pager_info -_default_pager_info_verbose -_default_pager_init_flag -_default_pager_initialize -_default_pager_internal_count -_default_pager_internal_set -_default_pager_memory_object_create -_default_pager_memory_object_default_subsystem -_default_pager_object -_default_pager_object_create -_default_pager_object_pages -_default_pager_object_server -_default_pager_object_server_routine -_default_pager_object_subsystem -_default_pager_objects -_default_pager_space_alert -_default_pager_triggers -_default_preemption_rate -_default_pset -_deflate -_deflateCopy -_deflateEnd -_deflateInit2_ -_deflateInit_ -_deflateParams -_deflateReset -_deflateSetDictionary -_deflate_copyright -_defrouter_addreq -_defrouter_delreq -_defrouter_lookup -_defrouter_select -_defrtrlist_del -_delack_bitmask _delay -_delete -_delete_each_prefix -_des_SPtrans -_des_check_key -_des_check_key_parity -_des_decrypt3 -_des_ecb3_encrypt -_des_ecb_encrypt -_des_encrypt1 -_des_encrypt2 -_des_encrypt3 -_des_fixup_key_parity -_des_is_weak_key -_des_key_sched -_des_options -_des_set_key -_des_set_key_checked -_des_set_key_unchecked -_des_set_odd_parity -_desireddquot -_desiredvnodes -_dest6_input -_dev_add_entry -_dev_add_name -_dev_add_node -_dev_dup_entry -_dev_dup_plane -_dev_finddir -_dev_findname -_dev_free_hier -_dev_free_name -_dev_root -_devcls -_devfs_checkpath -_devfs_dn_free -_devfs_dntovn -_devfs_free_plane -_devfs_kernel_mount -_devfs_lock _devfs_make_link _devfs_make_node -_devfs_mknod -_devfs_mount -_devfs_propogate _devfs_remove -_devfs_sinit -_devfs_spec_vnodeop_opv_desc -_devfs_spec_vnodeop_p -_devfs_stats -_devfs_update -_devfs_vfsops -_devfs_vnodeop_opv_desc -_devfs_vnodeop_p _device_close _device_data_action -_device_object_create -_device_pager_bootstrap -_device_pager_data_initialize -_device_pager_data_request -_device_pager_data_return -_device_pager_data_unlock -_device_pager_deallocate -_device_pager_init -_device_pager_lookup -_device_pager_populate_object -_device_pager_reference -_device_pager_setup -_device_pager_synchronize -_device_pager_terminate -_device_pager_unmap -_device_pager_workaround -_device_pager_zone -_device_service_create -_devin -_devio -_devioc _devnode_free -_devopn -_devout -_devwait _dgraph_add_dependency _dgraph_add_dependent _dgraph_establish_load_order @@ -4026,453 +3191,16 @@ _dgraph_find_root _dgraph_free _dgraph_init _dgraph_log -_dhcpol_add -_dhcpol_concat -_dhcpol_count -_dhcpol_element -_dhcpol_find -_dhcpol_free -_dhcpol_get -_dhcpol_init -_dhcpol_parse_buffer -_dhcpol_parse_packet -_dhcpol_parse_vendor -_di_root_image -_dirchk -_disableConsoleOutput -_disableDebugOuput _disableSerialOuput -_disable_bluebox -_disable_branch_tracing -_disable_funnel -_dispatch_counts -_div_init -_div_input -_div_usrreqs -_divert_packet -_dlil_attach_interface_filter -_dlil_attach_protocol -_dlil_attach_protocol_filter -_dlil_dereg_if_modules -_dlil_dereg_proto_module -_dlil_detach_filter -_dlil_detach_protocol -_dlil_event -_dlil_expand_mcl -_dlil_find_dltag -_dlil_if_acquire -_dlil_if_attach -_dlil_if_detach -_dlil_if_release -_dlil_init -_dlil_initialized -_dlil_inject_if_input -_dlil_inject_if_output -_dlil_inject_pr_input -_dlil_inject_pr_output -_dlil_input -_dlil_input_lock -_dlil_input_packet -_dlil_input_thread_continue -_dlil_input_thread_wakeup -_dlil_ioctl -_dlil_output -_dlil_plumb_protocol -_dlil_post_msg -_dlil_reg_if_modules -_dlil_reg_proto_module -_dlil_stats -_dlil_unplumb_protocol -_dlttoproto -_dmmax -_dmmin -_dmtext -_do_bsdexception -_doasyncfree -_doclusterread -_doclusterwrite -_doingcache -_domaininit -_domainname -_domainnamelen -_domains -_donice -_doreallocblks -_dosetrlimit -_dounmount -_dp_memory_object_data_initialize -_dp_memory_object_data_request -_dp_memory_object_data_return -_dp_memory_object_data_unlock -_dp_memory_object_deallocate -_dp_memory_object_init -_dp_memory_object_reference -_dp_memory_object_subsystem -_dp_memory_object_synchronize -_dp_memory_object_terminate -_dp_memory_object_unmap -_dp_pages_free -_dp_parse_argument -_dp_pgins -_dp_pgouts -_dpt_array -_dpt_lock -_dqdirtylist -_dqfileclose -_dqfileopen -_dqflush -_dqfreelist -_dqget -_dqhash -_dqhashtbl -_dqinit -_dqreclaim -_dqref -_dqrele -_dqsync -_dqsync_orphans -_draw_panic_dialog -_dump_string_table -_dumpdev -_dumplo -_dup -_dup2 -_dup_sockaddr -_dupfdopen -_dylink_test -_dynamic_pager_control_port -_edata -_embutl -_eml_init -_eml_task_deallocate -_eml_task_reference -_enable_bluebox -_enable_branch_tracing -_enable_funnel -_enable_hotpath -_encap4_input -_encap6_input -_encap_attach -_encap_attach_func -_encap_detach -_encap_getarg -_encap_init -_encaptab -_encode_comp_t -_end -_enodev -_enodev_strat -_enoioctl -_enosys -_enterpgrp -_enxio -_eopnotsupp -_err_abortop -_err_access -_err_advlock -_err_allocate -_err_blkatoff -_err_blktooff -_err_bmap -_err_bwrite -_err_close -_err_cmap -_err_copyfile -_err_create -_err_devblocksize -_err_exchange -_err_fsync -_err_getattr -_err_getattrlist -_err_inactive -_err_ioctl -_err_islocked -_err_lease -_err_link -_err_lock -_err_mkcomplex -_err_mkdir -_err_mknod -_err_mmap -_err_offtoblk -_err_open -_err_pagein -_err_pageout -_err_pathconf -_err_pgrd -_err_pgwr -_err_print -_err_read -_err_readdir -_err_readdirattr -_err_readlink -_err_reallocblks -_err_reclaim -_err_remove -_err_rename -_err_revoke -_err_rmdir -_err_searchfs -_err_seek -_err_select -_err_setattr -_err_setattrlist -_err_strategy -_err_symlink -_err_truncate -_err_unlock -_err_update -_err_valloc -_err_vfree -_err_whiteout -_err_write -_errsys -_esp4_input -_esp4_output -_esp6_ctlinput -_esp6_input -_esp6_output -_esp_algorithm_lookup -_esp_auth -_esp_hdrsiz -_esp_max_ivlen -_esp_rijndael_blockdecrypt -_esp_rijndael_blockencrypt -_esp_rijndael_schedlen -_esp_rijndael_schedule -_esp_schedule -_esp_udp_encap_port -_etap_get_info -_etap_interrupt_probe -_etap_machcall_probe1 -_etap_machcall_probe2 -_etap_mon_reconfig -_etap_new_probe -_etap_probe -_etap_trace_event -_etap_trace_thread -_etext -_ether_addmulti -_ether_attach_at -_ether_attach_inet -_ether_attach_inet6 -_ether_delmulti -_ether_demux -_ether_detach_at -_ether_detach_inet -_ether_detach_inet6 -_ether_family_init -_ether_frameout -_ether_ifattach -_ether_ifmod_ioctl -_ether_inet6_prmod_ioctl -_ether_inet_prmod_ioctl -_ether_input -_ether_ipmulticast_max -_ether_ipmulticast_min -_ether_pre_output -_ether_prmod_ioctl -_ether_resolvemulti -_ether_sprintf +_ether_check_multi _ev_try_lock _ev_unlock -_event_usrreqs -_eventsw -_evprocdeque -_evprocenque -_evsofree -_exc_server -_exc_server_routine -_exception -_exception_deliver -_exception_raise -_exception_raise_state -_exception_raise_state_identity -_exchangedata -_exchangelock -_execsigs -_execv -_execve -_execve_semaphore -_exit -_exit1 -_falloc _fatfile_getarch _fatfile_getarch_affinity -_fchdir -_fchflags -_fchmod -_fchown -_fcntl -_fcount -_fdalloc -_fdavail -_fdcopy -_fdesc_allocvp -_fdesc_badop -_fdesc_getattr -_fdesc_inactive -_fdesc_init -_fdesc_ioctl -_fdesc_lookup -_fdesc_mount -_fdesc_open -_fdesc_pathconf -_fdesc_print -_fdesc_read -_fdesc_readdir -_fdesc_readlink -_fdesc_reclaim -_fdesc_root -_fdesc_select -_fdesc_setattr -_fdesc_start -_fdesc_statfs -_fdesc_sync -_fdesc_unmount -_fdesc_vfree -_fdesc_vfsops -_fdesc_vnodeop_entries -_fdesc_vnodeop_opv_desc -_fdesc_vnodeop_p -_fdesc_write -_fdexec -_fdexpand -_fdfree -_fdgetf -_fdhash -_fdhashtbl -_fdopen -_fdrelse -_ffree -_ffs -_ffs_alloc -_ffs_balloc -_ffs_blkalloc -_ffs_blkatoff -_ffs_blkfree -_ffs_blkpref -_ffs_blktooff -_ffs_clrblock -_ffs_clusteracct -_ffs_fhtovp -_ffs_fifoop_entries -_ffs_fifoop_opv_desc -_ffs_fifoop_p -_ffs_flushfiles -_ffs_fragacct -_ffs_fsync -_ffs_init -_ffs_isblock -_ffs_mount -_ffs_mountfs -_ffs_mountroot -_ffs_offtoblk -_ffs_oldfscompat -_ffs_pagein -_ffs_pageout -_ffs_read -_ffs_reallocblks -_ffs_realloccg -_ffs_reclaim -_ffs_reload -_ffs_sbupdate -_ffs_setblock -_ffs_specop_entries -_ffs_specop_opv_desc -_ffs_specop_p -_ffs_statfs -_ffs_sync -_ffs_sysctl -_ffs_truncate -_ffs_unmount -_ffs_update -_ffs_valloc -_ffs_vfree -_ffs_vget -_ffs_vnodeop_entries -_ffs_vnodeop_opv_desc -_ffs_vnodeop_p -_ffs_vptofh -_ffs_write -_ffsbit -_fhopen -_fifo_advlock -_fifo_bmap -_fifo_close -_fifo_ebadf -_fifo_inactive -_fifo_ioctl -_fifo_lookup -_fifo_nfsv2nodeop_opv_desc -_fifo_nfsv2nodeop_p -_fifo_open -_fifo_pathconf -_fifo_print -_fifo_printinfo -_fifo_read -_fifo_select -_fifo_vnodeop_entries -_fifo_vnodeop_opv_desc -_fifo_vnodeop_p -_fifo_write -_filedesc0 -_filehead -_fillPage _fill_backward_load_order _find_entry -_find_nke -_finishdup -_first_avail -_first_free_check -_first_free_is_valid -_first_k_zone -_first_zone -_firstc -_firstsect -_firstseg -_firstsegfromheader -_fixjobc -_flock _flush_dcache _flush_dcache64 -_fmod_watch -_fork -_forkproc -_fpathconf -_fr_checkp -_frag6_doing_reass -_frag6_drain -_frag6_init -_frag6_input -_frag6_nfragpackets -_frag6_slowtimo -_fragtbl -_fragtbl124 -_fragtbl8 -_freebitcount -_freevnodes -_fref -_frele -_fsctl -_fstat -_fstatfs -_fstatv -_fsync -_ftruncate -_fubyte -_fuibyte -_fuiword -_funnel_alloc -_funnel_free -_funnel_lock -_funnel_unlock -_futimes -_fuword -_fvm_seg -_fw_enable -_gCatalogCacheGlobals -_gCompareTable _gGearPict _gIOAppPowerStateInterest _gIOBusyInterest @@ -4510,7 +3238,6 @@ _gIOKernelConfigTables _gIOKernelKmods _gIOKitDebug _gIOKitDebugKey -_gIOKitPortCount _gIOLocationKey _gIOLocationMatchKey _gIOMatchCategoryKey @@ -4536,957 +3263,35 @@ _gIOServiceKey _gIOServicePlane _gIOTerminatedNotification _gIOUserClientClassKey -_gLatinCaseFold -_gLowerCaseTable _gOFVariables _gPEClockFrequencyInfo +_gPESerialBaud _gPlatformInterruptControllerName -_gTimeZone -_gatherstats -_gc_buffer_lock -_gc_vt100state -_getProcName -_get_aiotask _get_bsdtask_info -_get_bsdthread_info _get_bsduthreadarg _get_bsduthreadrval -_get_dp_control_port -_get_firstthread _get_inpcb_str_size _get_kernel_symfile -_get_map_end -_get_map_max -_get_map_min -_get_map_nentries -_get_map_pmap -_get_map_start -_get_new_filter_id _get_procrustime -_get_read_buffer -_get_set_state -_get_signalact -_get_signalthread -_get_state_handler -_get_task_ipcspace _get_task_map -_get_task_numacts -_get_task_pmap -_get_task_userstop -_get_tcp_str_size -_get_thread_userstop -_get_thread_waitresult -_get_threadtask -_get_user_regs -_get_useraddr -_get_vmmap_entries -_get_vmmap_size -_get_vmsubmap_entries -_getact_thread -_getattrlist -_getblk -_getc -_getdirentries -_getdirentriesattr -_getdtablesize -_geteblk -_getegid -_geteuid -_getfakefvmseg -_getfh -_getfsstat -_getgid -_getgroups -_getinoquota -_getitimer -_getlastaddr -_getlogin -_getmachheaders -_getnewvnode -_getpeername -_getpgid -_getpgrp -_getpid -_getppid -_getpriority -_getquota -_getrlimit -_getrusage -_getsectbyname -_getsectbynamefromheader -_getsectcmdsymtabfromheader -_getsectdatafromheader -_getsegbyname -_getsegbynamefromheader -_getsegdatafromheader -_getshuttle_thread -_getsid -_getsock -_getsockaddr -_getsockname -_getsockopt -_getsymtab -_gettimeofday -_getuid _getval -_getvnode -_gif_attach_inet -_gif_attach_inet6 -_gif_attach_proto_family -_gif_delete_tunnel -_gif_demux -_gif_detach_inet -_gif_detach_inet6 -_gif_detach_proto_family -_gif_encapcheck4 -_gif_encapcheck6 -_gif_input -_gif_ioctl -_gif_pre_output -_gif_reg_if_mods -_gif_shutdown -_gifattach -_gifs -_global_state_pid -_global_stats -_global_user_profile_cache -_grade_cpu_subtype -_groupmember -_gsignal -_halt_all_cpus -_halt_cpu -_halt_in_debugger -_hard_throttle_on_root -_hashinit -_hertz_tick -_hex2ascii_data -_hfc_tag _hfs_addconverter -_hfs_allocate -_hfs_blktooff -_hfs_bmap -_hfs_bwrite -_hfs_catname -_hfs_chash_slock -_hfs_chkdq -_hfs_chkdqchg -_hfs_chkiq -_hfs_chkiqchg -_hfs_clearlock -_hfs_cmap -_hfs_converterinit -_hfs_encoding_list -_hfs_encoding_list_slock -_hfs_encodingbias -_hfs_extname -_hfs_fifoop_entries -_hfs_fifoop_opv_desc -_hfs_fifoop_p -_hfs_findoverlap -_hfs_getblock -_hfs_getconverter -_hfs_getinoquota -_hfs_getlock -_hfs_getquota -_hfs_ioctl -_hfs_offtoblk -_hfs_owner_rights -_hfs_pagein -_hfs_pageout -_hfs_pickencoding -_hfs_privdirname -_hfs_qsync -_hfs_quotactl -_hfs_quotaoff -_hfs_quotaon -_hfs_quotastat -_hfs_read -_hfs_relconverter _hfs_remconverter -_hfs_select -_hfs_setlock -_hfs_setquota -_hfs_setuse -_hfs_specop_entries -_hfs_specop_opv_desc -_hfs_specop_p -_hfs_split -_hfs_strategy -_hfs_swap_BTNode -_hfs_swap_HFSBTInternalNode -_hfs_swap_HFSPlusBTInternalNode -_hfs_swap_HFSPlusForkData -_hfs_to_utf8 -_hfs_truncate -_hfs_vbmname -_hfs_vfsops -_hfs_vnodeop_entries -_hfs_vnodeop_opv_desc -_hfs_vnodeop_p -_hfs_wakelock -_hfs_write -_hfsmaxlockdepth -_holdrele -_host_default_memory_manager -_host_get_UNDServer -_host_get_boot_info -_host_get_clock_control -_host_get_clock_service -_host_get_exception_ports -_host_get_io_master -_host_get_special_port -_host_info -_host_ipc_hash_info -_host_kernel_version -_host_load_symbol_table -_host_notify_calendar_change -_host_notify_init -_host_notify_port_destroy -_host_page_size -_host_priv_self -_host_priv_server -_host_priv_server_routine -_host_priv_statistics -_host_priv_subsystem -_host_processor_info -_host_processor_set_priv -_host_processor_sets -_host_processors -_host_reboot -_host_request_notification -_host_security_create_task_token -_host_security_self -_host_security_server -_host_security_server_routine -_host_security_set_task_token -_host_security_subsystem -_host_self -_host_self_trap -_host_set_UNDServer -_host_set_exception_ports -_host_set_special_port -_host_stack_usage -_host_statistics -_host_swap_exception_ports -_host_virtual_physical_table_info -_host_zone_info -_hostid -_hostname -_hostnamelen -_hw_atomic_add -_hw_atomic_and -_hw_atomic_or -_hw_atomic_sub -_hw_compare_and_store -_hw_lock_held -_hw_lock_init -_hw_lock_lock -_hw_lock_to -_hw_lock_try -_hw_lock_unlock -_hz -_hzto -_icmp6_ctloutput -_icmp6_error -_icmp6_fasttimo -_icmp6_ifstat -_icmp6_ifstatmax -_icmp6_init -_icmp6_input -_icmp6_mtudisc_update -_icmp6_nodeinfo -_icmp6_rediraccept -_icmp6_redirect_input -_icmp6_redirect_output -_icmp6_redirtimeout -_icmp6_reflect -_icmp6errppslim -_icmp6stat -_icmp_error -_icmp_input -_idle_thread -_idle_thread_continue -_if_addmulti -_if_allmulti -_if_attach -_if_delmulti -_if_delmultiaddr -_if_down -_if_down_all -_if_index -_if_name -_if_route -_if_rtproto_del -_if_unroute -_if_up -_if_withname -_ifa_ifwithaddr -_ifa_ifwithdstaddr -_ifa_ifwithnet -_ifa_ifwithroute -_ifafree -_ifaof_ifpforaddr -_ifaref -_ifbyfamily -_ifindex2ifnet -_ifioctl -_ifma_lostlist -_ifmaof_ifpforaddr -_ifmedia_add -_ifmedia_init -_ifmedia_ioctl -_ifmedia_list_add -_ifmedia_removeall -_ifmedia_set -_ifnet -_ifnet_addrs -_ifpromisc -_ifptodlt -_ifqmaxlen -_iftovt_tab -_ifunit -_igmp_fasttimo -_igmp_init -_igmp_input -_igmp_joingroup -_igmp_leavegroup -_igmp_slowtimo -_ihash -_ihashtbl -_in6_addmulti -_in6_addr2scopeid -_in6_addrscope -_in6_are_prefix_equal -_in6_cksum -_in6_clearscope -_in6_control -_in6_delmulti -_in6_dinit -_in6_embedscope -_in6_get_tmpifid -_in6_gif_input -_in6_gif_output -_in6_gif_protosw -_in6_if_up -_in6_ifaddr -_in6_ifattach -_in6_ifawithifp -_in6_ifawithscope -_in6_ifdetach -_in6_ifindex2scopeid -_in6_ifstat -_in6_ifstatmax -_in6_init2done -_in6_init_prefix_ltimes -_in6_inithead -_in6_is_addr_deprecated -_in6_len2mask -_in6_localaddr -_in6_losing -_in6_mapped_peeraddr -_in6_mapped_sockaddr -_in6_mask2len -_in6_matchlen -_in6_maxmtu -_in6_multihead -_in6_nigroup -_in6_nigroup_attach -_in6_nigroup_detach -_in6_pcbbind -_in6_pcbconnect -_in6_pcbdetach -_in6_pcbdisconnect -_in6_pcbladdr -_in6_pcblookup_hash -_in6_pcblookup_local -_in6_pcbnotify -_in6_pcbpurgeif0 -_in6_pcbsetport -_in6_post_msg -_in6_prefix_add_ifid -_in6_prefix_ioctl -_in6_prefix_remove_ifid -_in6_prefixlen2mask -_in6_proto_count -_in6_purgeaddr -_in6_purgeif -_in6_purgeprefix -_in6_recoverscope -_in6_rr_timer -_in6_rr_timer_funneled -_in6_rtchange -_in6_selecthlim -_in6_selectsrc -_in6_setmaxmtu -_in6_setpeeraddr -_in6_setsockaddr -_in6_sin6_2_sin -_in6_sin6_2_sin_in_sock -_in6_sin_2_v4mapsin6 -_in6_sin_2_v4mapsin6_in_sock -_in6_sockaddr -_in6_tmpaddrtimer -_in6_tmpaddrtimer_funneled -_in6_tmpifadd -_in6_update_ifa -_in6_v4mapsin6_sockaddr -_in6addr_any -_in6addr_linklocal_allnodes -_in6addr_linklocal_allrouters -_in6addr_loopback -_in6addr_nodelocal_allnodes -_in6if_do_dad -_in6ifa_ifpforlinklocal -_in6ifa_ifpwithaddr -_in6mask0 -_in6mask128 -_in6mask32 -_in6mask64 -_in6mask96 -_in_addmulti -_in_addword -_in_broadcast -_in_canforward -_in_cksum -_in_cksum_skip -_in_control -_in_delayed_cksum -_in_delmulti -_in_dinit -_in_gif_input -_in_gif_output -_in_gif_protosw -_in_ifaddrhead -_in_ifadown -_in_ifscrub -_in_inithead -_in_localaddr -_in_losing -_in_multihead -_in_pcb_get_owner -_in_pcb_grab_port -_in_pcb_letgo_port -_in_pcb_nat_init -_in_pcb_new_share_client -_in_pcb_rem_share_client -_in_pcballoc -_in_pcbbind -_in_pcbconnect -_in_pcbdetach -_in_pcbdisconnect -_in_pcbinshash -_in_pcbladdr -_in_pcblookup_hash -_in_pcblookup_local -_in_pcbnotifyall -_in_pcbpurgeif0 -_in_pcbrehash -_in_pcbremlists -_in_proto_count -_in_pseudo -_in_rtchange -_in_rtqdrain -_in_setpeeraddr -_in_setsockaddr -_in_stf_input -_in_stf_protosw -_inactivevnodes -_incore -_inet6_ether_input -_inet6_ether_pre_output -_inet6ctlerrmap -_inet6domain -_inet6sw -_inet_aton -_inet_ether_input -_inet_ether_pre_output -_inet_ntoa -_inetctlerrmap -_inetdomain -_inetsw -_inferior -_inflate -_inflateEnd -_inflateInit2_ -_inflateInit_ -_inflateReset -_inflateSetDictionary -_inflateSync -_inflateSyncPoint -_inflate_blocks -_inflate_blocks_free -_inflate_blocks_new -_inflate_blocks_reset -_inflate_blocks_sync_point -_inflate_codes -_inflate_codes_free -_inflate_codes_new -_inflate_copyright -_inflate_fast -_inflate_flush -_inflate_mask -_inflate_set_dictionary -_inflate_trees_bits -_inflate_trees_dynamic -_inflate_trees_fixed -_init_args -_init_ast_check -_init_attempts -_init_domain -_init_exec_args -_init_ip6pktopts -_init_process -_init_program_name -_init_sin6 -_init_task_failure_data -_init_timers -_initialize_screen -_initialized -_initproc -_inittodr -_inside -_insmntque -_install_special_handler -_install_special_handler_locked -_int6intrq_present -_interlock_unlock -_intstack -_invalhash _invalidate_icache _invalidate_icache64 -_io_map -_io_map_spec -_io_throttle_zero_fill -_iobufqueue -_ioctl _iokit_add_reference -_iokit_alloc_object_port -_iokit_builder +_iokit_builder:_osbuilder _iokit_client_died -_iokit_destroy_object_port -_iokit_lookup_connect_port -_iokit_lookup_connect_ref -_iokit_lookup_connect_ref_current_task -_iokit_lookup_object_port -_iokit_make_connect_port -_iokit_make_object_port -_iokit_make_send_right -_iokit_notify -_iokit_osrelease -_iokit_ostype +_iokit_osrelease:_osrelease +_iokit_ostype:_ostype _iokit_port_for_object -_iokit_release_port _iokit_remove_reference -_iokit_retain_port -_iokit_server -_iokit_server_routine _iokit_user_client_trap -_iokit_version -_iokit_version_major -_iokit_version_minor -_iokit_version_variant -_ip4_ah_cleartos -_ip4_ah_net_deflev -_ip4_ah_offsetmask -_ip4_ah_trans_deflev -_ip4_def_policy -_ip4_esp_net_deflev -_ip4_esp_randpad -_ip4_esp_trans_deflev -_ip4_ipsec_dfbit -_ip4_ipsec_ecn -_ip6_accept_rtadv -_ip6_addaux -_ip6_ah_net_deflev -_ip6_ah_trans_deflev -_ip6_auto_flowlabel -_ip6_auto_linklocal -_ip6_clearpktopts -_ip6_copypktopts -_ip6_ctloutput -_ip6_dad_count -_ip6_def_policy -_ip6_defhlim -_ip6_defmcasthlim -_ip6_delaux -_ip6_desync_factor -_ip6_ecn_egress -_ip6_ecn_ingress -_ip6_esp_net_deflev -_ip6_esp_randpad -_ip6_esp_trans_deflev -_ip6_findaux -_ip6_flow_seq -_ip6_forward -_ip6_forward_rt -_ip6_forward_srcrt -_ip6_forwarding -_ip6_freemoptions -_ip6_freepcbopts -_ip6_fw_chk_ptr -_ip6_fw_ctl_ptr -_ip6_fw_enable -_ip6_get_prevhdr -_ip6_getdstifaddr -_ip6_gif_hlim -_ip6_hdrnestlimit -_ip6_id -_ip6_init -_ip6_input -_ip6_ipsec_ecn -_ip6_keepfaith -_ip6_lasthdr -_ip6_log_interval -_ip6_log_time -_ip6_maxfragpackets -_ip6_mforward -_ip6_mloopback -_ip6_mrouter -_ip6_mrouter_done -_ip6_mrouter_get -_ip6_mrouter_set -_ip6_mrouter_ver -_ip6_mrtproto -_ip6_nexthdr -_ip6_optlen -_ip6_ours_check_algorithm -_ip6_output -_ip6_process_hopopts -_ip6_protox -_ip6_rr_prune -_ip6_savecontrol -_ip6_sendredirects -_ip6_setpktoptions -_ip6_sourcecheck -_ip6_sourcecheck_interval -_ip6_sprintf -_ip6_temp_preferred_lifetime -_ip6_temp_regen_advance -_ip6_temp_valid_lifetime -_ip6_unknown_opt -_ip6_use_deprecated -_ip6_use_tempaddr -_ip6_v6only -_ip6intr -_ip6intrq -_ip6q -_ip6stat -_ip_ctloutput -_ip_defttl -_ip_divert_cookie -_ip_drain -_ip_ecn_egress -_ip_ecn_ingress -_ip_freemoptions -_ip_fw_chk_ptr -_ip_fw_ctl_ptr -_ip_fw_fwd_addr -_ip_gif_ttl -_ip_id -_ip_init -_ip_input -_ip_linklocal_in_allowbadttl -_ip_linklocal_stat -_ip_mcast_src -_ip_mforward -_ip_mrouter -_ip_mrouter_done -_ip_mrouter_get -_ip_mrouter_set -_ip_optcopy -_ip_output -_ip_pkt_to_mbuf -_ip_protox -_ip_rsvp_done -_ip_rsvp_force_done -_ip_rsvp_init -_ip_rsvp_vif_done -_ip_rsvp_vif_init -_ip_rsvpd -_ip_savecontrol -_ip_slowtimo -_ip_srcroute -_ip_stripoptions -_ipc_bootstrap -_ipc_clock_enable -_ipc_clock_init -_ipc_entry_alloc -_ipc_entry_alloc_name -_ipc_entry_dealloc -_ipc_entry_get -_ipc_entry_grow_table -_ipc_entry_lookup -_ipc_entry_tree_collision -_ipc_hash_delete -_ipc_hash_global_delete -_ipc_hash_global_insert -_ipc_hash_global_lookup -_ipc_hash_global_mask -_ipc_hash_global_size -_ipc_hash_global_table -_ipc_hash_init -_ipc_hash_insert -_ipc_hash_local_delete -_ipc_hash_local_insert -_ipc_hash_local_lookup -_ipc_hash_lookup -_ipc_host_init -_ipc_init -_ipc_kernel_copy_map -_ipc_kernel_copy_map_size -_ipc_kernel_map -_ipc_kernel_map_size -_ipc_kmsg_alloc -_ipc_kmsg_cache -_ipc_kmsg_cache_avail -_ipc_kmsg_clean -_ipc_kmsg_clean_body -_ipc_kmsg_clean_partial -_ipc_kmsg_clear_prealloc -_ipc_kmsg_copyin -_ipc_kmsg_copyin_body -_ipc_kmsg_copyin_from_kernel -_ipc_kmsg_copyin_header -_ipc_kmsg_copyin_scatter -_ipc_kmsg_copyout -_ipc_kmsg_copyout_body -_ipc_kmsg_copyout_dest -_ipc_kmsg_copyout_header -_ipc_kmsg_copyout_object -_ipc_kmsg_copyout_pseudo -_ipc_kmsg_copyout_to_kernel -_ipc_kmsg_dequeue -_ipc_kmsg_destroy -_ipc_kmsg_destroy_dest -_ipc_kmsg_enqueue -_ipc_kmsg_free -_ipc_kmsg_free_scatter -_ipc_kmsg_get -_ipc_kmsg_get_from_kernel -_ipc_kmsg_init -_ipc_kmsg_max_vm_space -_ipc_kmsg_put -_ipc_kmsg_put_to_kernel -_ipc_kmsg_queue_next -_ipc_kmsg_rmqueue -_ipc_kmsg_send -_ipc_kmsg_set_prealloc -_ipc_kobject_destroy -_ipc_kobject_notify -_ipc_kobject_server -_ipc_kobject_set -_ipc_kobject_set_atomically -_ipc_mqueue_add -_ipc_mqueue_changed -_ipc_mqueue_copyin -_ipc_mqueue_destroy -_ipc_mqueue_full -_ipc_mqueue_init -_ipc_mqueue_member -_ipc_mqueue_post -_ipc_mqueue_rcv -_ipc_mqueue_receive -_ipc_mqueue_receive_continue -_ipc_mqueue_receive_results -_ipc_mqueue_release_msgcount -_ipc_mqueue_remove -_ipc_mqueue_remove_all -_ipc_mqueue_remove_from_all -_ipc_mqueue_select -_ipc_mqueue_send -_ipc_mqueue_set_qlimit -_ipc_mqueue_set_seqno -_ipc_notify_dead_name -_ipc_notify_no_senders -_ipc_notify_port_deleted -_ipc_notify_port_destroyed -_ipc_notify_send_once -_ipc_object_alloc -_ipc_object_alloc_dead -_ipc_object_alloc_dead_name -_ipc_object_alloc_name -_ipc_object_copyin -_ipc_object_copyin_from_kernel -_ipc_object_copyin_type -_ipc_object_copyout -_ipc_object_copyout_dest -_ipc_object_copyout_name -_ipc_object_destroy -_ipc_object_reference -_ipc_object_release -_ipc_object_rename -_ipc_object_translate -_ipc_object_translate_two -_ipc_object_zones -_ipc_port_alloc -_ipc_port_alloc_name -_ipc_port_alloc_special -_ipc_port_check_circularity -_ipc_port_clear_receiver -_ipc_port_copy_send -_ipc_port_copyout_send -_ipc_port_dealloc_special -_ipc_port_destroy -_ipc_port_dncancel -_ipc_port_dngrow -_ipc_port_dnnotify -_ipc_port_dnrequest -_ipc_port_init -_ipc_port_lookup_notify -_ipc_port_make_send -_ipc_port_make_send_locked -_ipc_port_make_sonce -_ipc_port_max -_ipc_port_multiple_lock_data -_ipc_port_nsrequest -_ipc_port_pdrequest -_ipc_port_release -_ipc_port_release_receive +_iokit_version:_version +_iokit_version_major:_version_major +_iokit_version_minor:_version_minor +_iokit_version_variant:_version_variant _ipc_port_release_send -_ipc_port_release_sonce -_ipc_port_timestamp -_ipc_port_timestamp_data -_ipc_port_timestamp_lock_data -_ipc_processor_disable -_ipc_processor_enable -_ipc_processor_init -_ipc_processor_terminate -_ipc_pset_add -_ipc_pset_alloc -_ipc_pset_alloc_name -_ipc_pset_destroy -_ipc_pset_disable -_ipc_pset_enable -_ipc_pset_init -_ipc_pset_max -_ipc_pset_member -_ipc_pset_remove -_ipc_pset_remove_from_all -_ipc_pset_terminate -_ipc_right_check -_ipc_right_clean -_ipc_right_copyin -_ipc_right_copyin_check -_ipc_right_copyin_two -_ipc_right_copyin_undo -_ipc_right_copyout -_ipc_right_dealloc -_ipc_right_delta -_ipc_right_destroy -_ipc_right_dncancel -_ipc_right_dnrequest -_ipc_right_info -_ipc_right_inuse -_ipc_right_lookup_two_write -_ipc_right_lookup_write -_ipc_right_rename -_ipc_right_reverse -_ipc_space_clean -_ipc_space_create -_ipc_space_create_special -_ipc_space_destroy -_ipc_space_kernel -_ipc_space_max -_ipc_space_reference -_ipc_space_release -_ipc_space_reply -_ipc_space_zone -_ipc_splay_traverse_finish -_ipc_splay_traverse_next -_ipc_splay_traverse_start -_ipc_splay_tree_bounds -_ipc_splay_tree_delete -_ipc_splay_tree_init -_ipc_splay_tree_insert -_ipc_splay_tree_join -_ipc_splay_tree_lookup -_ipc_splay_tree_pick -_ipc_splay_tree_split -_ipc_table_alloc -_ipc_table_dnrequests -_ipc_table_dnrequests_size -_ipc_table_entries -_ipc_table_entries_size -_ipc_table_fill -_ipc_table_free -_ipc_table_init -_ipc_table_realloc -_ipc_task_disable -_ipc_task_enable -_ipc_task_init -_ipc_task_terminate -_ipc_thr_act_disable -_ipc_thr_act_init -_ipc_thr_act_terminate -_ipc_thread_init -_ipc_thread_terminate -_ipc_tree_entry_max -_ipc_tree_entry_zone -_ipcomp4_input -_ipcomp4_output -_ipcomp6_input -_ipcomp6_output -_ipcomp_algorithm_lookup -_ipcperm -_ipflow_create -_ipflow_fastforward -_ipflow_slowtimo -_ipforwarding -_ipintr -_ipintrq -_ipintrq_present -_ipip_input -_ipport_firstauto -_ipport_hifirstauto -_ipport_hilastauto -_ipport_lastauto -_ipport_lowfirstauto -_ipport_lowlastauto -_ipsec4_delete_pcbpolicy -_ipsec4_get_policy -_ipsec4_getpolicybyaddr -_ipsec4_getpolicybysock -_ipsec4_hdrsiz -_ipsec4_in_reject -_ipsec4_in_reject_so -_ipsec4_logpacketstr -_ipsec4_output -_ipsec4_set_policy -_ipsec4_tunnel_validate -_ipsec6_delete_pcbpolicy -_ipsec6_get_policy -_ipsec6_getpolicybyaddr -_ipsec6_getpolicybysock -_ipsec6_hdrsiz -_ipsec6_in_reject -_ipsec6_in_reject_so -_ipsec6_logpacketstr -_ipsec6_output_trans -_ipsec6_output_tunnel -_ipsec6_set_policy -_ipsec6_tunnel_validate -_ipsec6stat -_ipsec_addhist -_ipsec_bypass -_ipsec_chkreplay -_ipsec_clearhist -_ipsec_copy_policy -_ipsec_copypkt -_ipsec_debug -_ipsec_delaux -_ipsec_dumpmbuf -_ipsec_get_reqlevel -_ipsec_gethist -_ipsec_getsocket -_ipsec_hdrsiz_tcp -_ipsec_init_policy -_ipsec_logsastr -_ipsec_setsocket -_ipsec_updatereplay -_ipsecstat -_ipstat -_iptime -_is_file_clean _is_io_async_method_scalarI_scalarO _is_io_async_method_scalarI_structureI _is_io_async_method_scalarI_structureO @@ -5548,232 +3353,28 @@ _is_io_service_match_property_table_ool _is_io_service_open _is_io_service_request_probe _is_io_service_wait_quiet -_is_iokit_subsystem -_is_kerneltask _is_suser _is_suser1 -_is_thread_active -_is_thread_idle -_is_thread_running _isargsep -_isdisk -_isinferior -_iskmemdev -_isn_ctx -_isn_last_reseed -_isn_secret -_iso_font -_iso_nchstats -_isodirino -_isofncmp -_isofntrans -_isohash -_isohashtbl -_isonullname -_issetugid -_issignal -_issingleuser -_itimerdecr -_itimerfix -_itoa -_journal_active -_journal_close -_journal_create -_journal_end_transaction -_journal_flush -_journal_kill_block -_journal_modify_block_abort -_journal_modify_block_end -_journal_modify_block_start -_journal_open -_journal_start_transaction _kOSBooleanFalse _kOSBooleanTrue -_k_zone -_k_zone_max _kalloc -_kalloc_canblock -_kalloc_fake_zone_info -_kalloc_init -_kalloc_large_inuse -_kalloc_large_max -_kalloc_large_total -_kalloc_map -_kalloc_map_size -_kalloc_max -_kalloc_max_prerounded -_kalloc_noblock -_kalloc_zone -_kd_buffer -_kd_buflast -_kd_bufptr -_kd_bufsize -_kd_buftomem -_kd_entropy_buffer -_kd_entropy_bufsize -_kd_entropy_buftomem -_kd_entropy_count -_kd_entropy_indx -_kd_mapcount -_kd_mapptr -_kd_mapsize -_kd_maptomem -_kd_prev_timebase -_kd_readlast -_kd_trace_lock -_kdb_printf -_kdbg_bootstrap -_kdbg_clear -_kdbg_control -_kdbg_control_chud -_kdbg_getentropy -_kdbg_getreg -_kdbg_mapinit -_kdbg_read -_kdbg_readmap -_kdbg_reinit -_kdbg_resolve_map -_kdbg_setpid -_kdbg_setpidex -_kdbg_setreg -_kdbg_setrtcdec -_kdbg_trace_data -_kdbg_trace_string -_kdebug_chudhook _kdebug_enable -_kdebug_flags -_kdebug_nolog -_kdebug_ops -_kdebug_trace -_kdlog_beg -_kdlog_end -_kdlog_value1 -_kdlog_value2 -_kdlog_value3 -_kdlog_value4 -_kdp -_kdp_call -_kdp_call_kdb -_kdp_exception -_kdp_exception_ack -_kdp_flag -_kdp_get_interface -_kdp_get_ip_address -_kdp_get_mac_addr -_kdp_getc -_kdp_intr_disbl -_kdp_intr_enbl -_kdp_machine_hostinfo -_kdp_machine_read_regs -_kdp_machine_write_regs -_kdp_ml_get_breakinsn -_kdp_packet -_kdp_panic -_kdp_raise_exception -_kdp_reboot _kdp_register_send_receive -_kdp_remove_all_breakpoints -_kdp_reset _kdp_set_interface -_kdp_set_ip_and_mac_addresses -_kdp_sync_cache _kdp_unregister_send_receive -_kdp_us_spin -_kdp_vm_read -_kdp_vm_write -_kentry_count -_kentry_data -_kentry_data_size -_kern_control_init -_kern_event_init -_kern_invalid -_kern_invalid_debug _kern_os_free _kern_os_malloc _kern_os_malloc_size _kern_os_realloc -_kern_sysctl -_kernacc _kernelLinkerPresent _kernel_debug _kernel_debug1 -_kernel_flock _kernel_map -_kernel_memory_allocate -_kernel_object_iopl_request -_kernel_pageable_map _kernel_pmap -_kernel_pmap_store -_kernel_sysctl _kernel_task -_kernel_task_create _kernel_thread -_kernel_thread_create -_kernel_thread_with_priority -_kernel_timer -_kernel_upl_abort -_kernel_upl_abort_range -_kernel_upl_commit -_kernel_upl_commit_range -_kernel_upl_map -_kernel_upl_unmap -_kernel_vm_map_reference -_kernproc -_kev_attach -_kev_control -_kev_detach -_kev_post_msg -_kevent -_key_allocsa -_key_allocsp -_key_cb -_key_checkrequest -_key_checktunnelsanity -_key_debug_level -_key_dst -_key_freereg -_key_freesav -_key_freeso -_key_freesp -_key_gettunnel -_key_init -_key_ismyaddr -_key_msg2sp -_key_newsp -_key_output -_key_parse -_key_random -_key_randomfill -_key_sa_recordxfer -_key_sa_routechange -_key_sa_stir_iv -_key_sendup -_key_sendup_mbuf -_key_sp2msg -_key_spdacquire -_key_src -_key_timehandler -_key_timehandler_funnel -_key_usrreqs -_keydb_delsecashead -_keydb_delsecpolicy -_keydb_delsecreg -_keydb_delsecreplay -_keydb_freesecasvar -_keydb_newsecashead -_keydb_newsecasvar -_keydb_newsecpolicy -_keydb_newsecreg -_keydb_newsecreplay -_keydb_refsecasvar -_keydomain -_keystat -_keysw _kfree -_kget -_kill -_killpg1 -_kinfo_vdebug _kld_file_cleanup_all_resources _kld_file_getaddr _kld_file_lookupsymbol @@ -5781,1782 +3382,132 @@ _kld_file_map _kld_file_merge_OSObjects _kld_file_patch_OSObjects _kld_file_prepare_for_link -_klist_init -_klogwakeup -_km_tty -_kmclose _kmem_alloc -_kmem_alloc_aligned -_kmem_alloc_contig -_kmem_alloc_pageable -_kmem_alloc_pages -_kmem_alloc_wired _kmem_free -_kmem_init -_kmem_io_object_deallocate -_kmem_io_object_trunc -_kmem_mb_alloc -_kmem_realloc -_kmem_remap_pages -_kmem_suballoc -_kmeminit -_kmemstats -_kmgetc -_kmgetc_silent -_kminit -_kmioctl _kmod -_kmod_cmd_queue -_kmod_control -_kmod_create _kmod_create_fake -_kmod_create_internal -_kmod_default_start -_kmod_default_stop -_kmod_destroy -_kmod_destroy_internal -_kmod_dump -_kmod_finalize_cpp -_kmod_get_info -_kmod_init -_kmod_initialize_cpp -_kmod_load_extension -_kmod_load_extension_with_dependencies _kmod_load_from_cache _kmod_load_function _kmod_load_request _kmod_lock -_kmod_lookupbyid -_kmod_lookupbyid_locked _kmod_lookupbyname -_kmod_lookupbyname_locked -_kmod_queue_cmd -_kmod_queue_lock -_kmod_release -_kmod_retain -_kmod_send_generic -_kmod_start_or_stop _kmod_unload_cache -_kmopen -_kmputc -_kmread -_kmwrite -_kmzones -_knote -_knote_attach -_knote_detach -_knote_fdclose -_knote_init -_knote_remove _kprintf -_kqueue -_kqueue_from_portset_np -_kqueue_portset_np -_kqueue_register -_kqueue_stat -_krealloc -_krpc_call -_krpc_portmap -_ktrace -_ktrcsw -_ktrgenio -_ktrnamei -_ktrpsig -_ktrsyscall -_ktrsysret -_kvprintf -_kvtophys -_last_page_zf -_last_zone -_lbolt -_ldisc_deregister -_ldisc_register -_lease_check -_lease_updatetime -_leavepgrp -_ledger_copy -_ledger_create -_ledger_enter -_ledger_init -_ledger_read -_ledger_server -_ledger_server_routine -_ledger_subsystem -_ledger_terminate -_ledger_transfer -_legal_vif_num -_lf_clearlock -_lf_findoverlap -_lf_getblock -_lf_getlock -_lf_setlock -_lf_split -_lf_wakelock -_libkern_builder -_libkern_osrelease -_libkern_ostype -_libkern_version -_libkern_version_major -_libkern_version_minor -_libkern_version_variant -_libsa_builder -_libsa_osrelease -_libsa_ostype -_libsa_version -_libsa_version_major -_libsa_version_minor -_libsa_version_variant -_lightning_bolt -_limcopy -_limit0 -_linesw -_link -_lio_listio -_listen -_llinfo_nd6 -_lo_attach_inet -_lo_attach_inet6 -_lo_demux -_lo_framer -_lo_input -_lo_reg_if_mods -_lo_set_bpf_tap -_lo_shutdown -_load_init_program -_load_ipfw +_libkern_builder:_osbuilder +_libkern_osrelease:_osrelease +_libkern_ostype:_ostype +_libkern_version:_version +_libkern_version_major:_version_major +_libkern_version_minor:_version_minor +_libkern_version_variant:_version_variant +_libsa_builder:_osbuilder +_libsa_osrelease:_osrelease +_libsa_ostype:_ostype +_libsa_version:_version +_libsa_version_major:_version_major +_libsa_version_minor:_version_minor +_libsa_version_variant:_version_variant _load_kernel_extension -_load_machfile -_load_shared_file -_local_log2 -_local_proto_count -_localdomain -_lock_acquire -_lock_alloc -_lock_done -_lock_free -_lock_handoff -_lock_handoff_accept -_lock_init -_lock_make_stable -_lock_make_unstable -_lock_read -_lock_read_to_write -_lock_release -_lock_release_internal -_lock_set_create -_lock_set_dereference -_lock_set_destroy -_lock_set_event -_lock_set_handoff -_lock_set_init -_lock_set_reference -_lock_set_server -_lock_set_server_routine -_lock_set_subsystem -_lock_try -_lock_wait_time -_lock_write -_lock_write_to_read -_lockinit -_lockmgr -_lockmgr_printinfo -_lockstatus -_log -_logPanicDataToScreen -_log_in_vain -_log_init +_lock_alloc:_lock_alloc_EXT +_lock_done:_lock_done_EXT +_lock_free:_lock_free_EXT +_lock_init:_lock_init_EXT +_lock_read:_lock_read_EXT +_lock_read_to_write:_lock_read_to_write_EXT +_lock_write:_lock_write_EXT +_lock_write_to_read:_lock_write_to_read_EXT _log_level -_log_lock -_log_open -_log_putc -_logclose -_logioctl -_logopen -_logpri -_logread -_logselect -_logsoftc -_logwakeup -_loif -_lookup -_lookup_default_shared_region -_loopattach -_lru_is_stale -_lseek -_lsf_mapping_pool_gauge -_lsf_remove_regions_mappings -_lsf_zone -_lstat -_lstatv -_m_adj -_m_aux_add -_m_aux_delete -_m_aux_find -_m_cat -_m_clalloc -_m_cltom -_m_copy_pkthdr -_m_copyback -_m_copydata -_m_copym -_m_copym_with_hdrs -_m_devget -_m_dtom -_m_dup -_m_expand -_m_free -_m_freem -_m_freem_list -_m_get -_m_getclr -_m_gethdr -_m_getpacket -_m_getpackethdrs -_m_getpackets -_m_leadingspace -_m_mcheck -_m_mchtype -_m_mclalloc _m_mclfree -_m_mclget -_m_mclhasreference -_m_mclref -_m_mclunref -_m_mtocl _m_mtod -_m_prepend -_m_prepend_2 -_m_pulldown -_m_pullup -_m_reclaim -_m_retry -_m_retryhdr -_m_split -_m_trailingspace -_m_want -_mac_roman_to_unicode -_mac_roman_to_utf8 _mach_absolute_time -_mach_assert -_mach_destroy_memory_entry -_mach_factor -_mach_host_server -_mach_host_server_routine -_mach_host_subsystem -_mach_make_memory_entry _mach_make_memory_entry_64 -_mach_memory_object_memory_entry -_mach_memory_object_memory_entry_64 -_mach_msg_overwrite -_mach_msg_overwrite_trap -_mach_msg_receive -_mach_msg_receive_continue -_mach_msg_receive_results -_mach_msg_rpc_from_kernel -_mach_msg_send _mach_msg_send_from_kernel -_mach_msg_trap -_mach_port_allocate -_mach_port_allocate_full -_mach_port_allocate_name -_mach_port_allocate_qos -_mach_port_deallocate -_mach_port_destroy -_mach_port_dnrequest_info -_mach_port_extract_member -_mach_port_extract_right -_mach_port_get_attributes -_mach_port_get_refs -_mach_port_get_set_status -_mach_port_get_srights -_mach_port_gst_helper -_mach_port_insert_member -_mach_port_insert_right -_mach_port_kernel_object -_mach_port_mod_refs -_mach_port_move_member -_mach_port_names -_mach_port_names_helper -_mach_port_rename -_mach_port_request_notification -_mach_port_server -_mach_port_server_routine -_mach_port_set_attributes -_mach_port_set_mscount -_mach_port_set_seqno -_mach_port_space_info -_mach_port_subsystem -_mach_port_type -_mach_ports_lookup -_mach_ports_register -_mach_reply_port -_mach_thread_self -_mach_timebase_info -_mach_trap_count -_mach_trap_table -_mach_vm_region_info -_mach_vm_region_info_64 -_mach_wait_until -_machdep_sysctl_list -_machine_boot_info -_machine_exception _machine_idle -_machine_info -_machine_init -_machine_load_context -_machine_signal_idle -_machine_slot -_machine_stack_attach -_machine_stack_detach -_machine_stack_handoff -_machine_startup -_machine_switch_act -_machine_switch_context -_machine_thread_create -_machine_thread_destroy -_machine_thread_dup -_machine_thread_get_state -_machine_thread_init -_machine_thread_set_current -_machine_thread_set_state -_machine_thread_terminate_self -_machine_wake_thread -_macx_backing_store_recovery -_macx_backing_store_suspend -_macx_swapoff -_macx_swapon -_macx_triggers -_madvise -_map_data -_map_data_size -_map_fd -_map_fd_funneled -_mapping_set_mod -_master_cpu -_master_device_port -_master_processor -_max_datalen -_max_doubled_size -_max_hdr -_max_linkhdr _max_mem -_max_pages_trigger_port -_max_poll_computation -_max_poll_quanta -_max_protohdr -_max_rt_quantum -_max_unsafe_computation -_max_unsafe_quanta -_maxdmap -_maxfiles -_maxfilesperproc -_maximum_pages_free -_maxlockdepth -_maxproc -_maxprocperuid -_maxsmap -_maxsockets -_maxvfsconf -_maxvfsslots -_mb_map -_mbinit _mbstat -_mbuf_slock -_mbutl -_mcl_paddr _mcl_to_paddr -_mclfree -_mclrefcnt -_md_prepare_for_shutdown -_mdev -_mdevBMajor -_mdevCMajor -_mdevadd -_mdevinit -_mdevlookup _mem_size _memcmp _memcpy _memmove -_memname -_memory_manager_default -_memory_manager_default_cluster -_memory_manager_default_lock -_memory_object_change_attributes -_memory_object_control_deallocate -_memory_object_control_disable -_memory_object_control_reference -_memory_object_control_server -_memory_object_control_server_routine -_memory_object_control_subsystem -_memory_object_create -_memory_object_create_named -_memory_object_data_initialize -_memory_object_data_request -_memory_object_data_return -_memory_object_data_unlock -_memory_object_deactivate_pages -_memory_object_deallocate -_memory_object_default_deallocate -_memory_object_default_reference -_memory_object_default_server -_memory_object_default_server_routine -_memory_object_destroy -_memory_object_get_attributes -_memory_object_init -_memory_object_iopl_request -_memory_object_lock_page -_memory_object_lock_request -_memory_object_name_server -_memory_object_name_server_routine -_memory_object_name_subsystem _memory_object_page_op -_memory_object_range_op -_memory_object_recover_named -_memory_object_reference -_memory_object_release_name -_memory_object_server -_memory_object_server_routine -_memory_object_super_upl_request -_memory_object_synchronize -_memory_object_synchronize_completed -_memory_object_terminate -_memory_object_unmap -_memory_object_upl_request _memset -_meta_bread -_meta_breadn -_meta_is_stale -_meta_zones -_mf6ctable -_mfree -_mfreelater _microtime _microuptime -_mig_buckets -_mig_dealloc_reply_port -_mig_e -_mig_get_reply_port -_mig_init -_mig_object_deallocate -_mig_object_destroy -_mig_object_init -_mig_object_no_senders -_mig_object_reference -_mig_put_reply_port -_mig_reply_size -_mig_strncpy -_mig_table_max_displ -_mig_user_allocate -_mig_user_deallocate -_min_pages_trigger_port -_min_rt_quantum -_min_std_quantum -_mincore -_minherit -_minimum_pages_remaining -_minphys -_mk_timebase_info -_mk_timer_arm -_mk_timer_cancel -_mk_timer_create -_mk_timer_destroy -_mk_timer_init -_mk_timer_port_destroy -_mkcomplex -_mkdir -_mkfifo -_mknod _ml_at_interrupt_context -_ml_cause_interrupt _ml_cpu_get_info -_ml_get_interrupts_enabled -_ml_get_max_cpus -_ml_get_timebase -_ml_init_interrupt -_ml_init_max_cpus -_ml_install_interrupt_handler _ml_io_map _ml_phys_read -_ml_phys_read_64 -_ml_phys_read_byte -_ml_phys_read_byte_64 -_ml_phys_read_double -_ml_phys_read_double_64 -_ml_phys_read_half -_ml_phys_read_half_64 -_ml_phys_read_word -_ml_phys_read_word_64 _ml_phys_write -_ml_phys_write_64 -_ml_phys_write_byte -_ml_phys_write_byte_64 -_ml_phys_write_double -_ml_phys_write_double_64 -_ml_phys_write_half -_ml_phys_write_half_64 -_ml_phys_write_word -_ml_phys_write_word_64 _ml_probe_read -_ml_probe_read_64 _ml_processor_register _ml_set_interrupts_enabled -_ml_static_malloc -_ml_static_mfree -_ml_static_ptovirt _ml_thread_policy -_ml_vtophys -_mld6_fasttimeo -_mld6_init -_mld6_input -_mld6_start_listening -_mld6_stop_listening -_mlock -_mlockall -_mmFree -_mmGetPtr -_mmInit -_mmMalloc -_mmReturnPtr -_mmap -_mmread -_mmrw -_mmwrite -_mntid_slock -_mntvnode_slock -_modetodirtype -_modwatch -_mount -_mountlist -_mountlist_slock -_mountroot -_mountroot_post_hook -_mprotect -_mremap -_mrt6_ioctl -_mrt6stat -_mrt_ioctl -_msg_ool_size_small -_msg_receive_error -_msgbufp -_msgctl -_msgget -_msgrcv -_msgsnd -_msgsys -_msync -_multicast_register_if -_munlock -_munlockall -_munmap -_munmapfd -_mutex_alloc -_mutex_free -_mutex_init -_mutex_lock -_mutex_lock_acquire -_mutex_lock_wait -_mutex_pause -_mutex_preblock -_mutex_preblock_wait -_mutex_try -_mutex_unlock -_mutex_unlock_wakeup -_my_name -_mynum_flavors -_n6expire -_name_cmp -_namei +_mutex_alloc:_mutex_alloc_EXT +_mutex_free:_mutex_free_EXT +_mutex_init:_mutex_init_EXT +_mutex_lock:_mutex_lock_EXT +_mutex_try:_mutex_try_EXT +_mutex_unlock:_mutex_unlock_EXT _nanoseconds_to_absolutetime _nanotime _nanouptime -_nbdwrite -_nblkdev -_nbuf -_nbufh -_nbufhigh -_nbuflow -_nbuftarget -_ncallout -_nchash -_nchashtbl -_nchinit -_nchrdev -_nchstats -_ncl -_nclruhead -_nd6_cache_lladdr -_nd6_dad_duplicated -_nd6_dad_start -_nd6_dad_stop -_nd6_dad_stoptimer -_nd6_debug -_nd6_defifindex -_nd6_delay -_nd6_free -_nd6_gctimer -_nd6_ifattach -_nd6_ifptomac -_nd6_init -_nd6_ioctl -_nd6_is_addr_neighbor -_nd6_lookup -_nd6_maxndopt -_nd6_maxnudhint -_nd6_mmaxtries -_nd6_na_input -_nd6_na_output -_nd6_need_cache -_nd6_ns_input -_nd6_ns_output -_nd6_nud_hint -_nd6_option -_nd6_option_init -_nd6_options -_nd6_output -_nd6_prefix_lookup -_nd6_prefix_offlink -_nd6_prefix_onlink -_nd6_prelist_add -_nd6_prune -_nd6_purge -_nd6_ra_input -_nd6_recalc_reachtm_interval -_nd6_rs_input -_nd6_rtrequest -_nd6_setdefaultiface -_nd6_setmtu -_nd6_storelladdr -_nd6_timer -_nd6_timer_funneled -_nd6_umaxtries -_nd6_useloopback -_nd_defrouter -_nd_ifinfo -_nd_prefix -_ndflush -_ndqb -_ndrv_abort -_ndrv_attach -_ndrv_bind -_ndrv_connect -_ndrv_control -_ndrv_ctlinput -_ndrv_ctloutput -_ndrv_delspec -_ndrv_detach -_ndrv_disconnect -_ndrv_do_detach -_ndrv_do_disconnect -_ndrv_dominit -_ndrv_drain -_ndrv_find_tag -_ndrv_flushq -_ndrv_get_ifp -_ndrv_handle_ifp_detach -_ndrv_init -_ndrv_input -_ndrv_output -_ndrv_peeraddr -_ndrv_read_event -_ndrv_recvspace -_ndrv_send -_ndrv_sendspace -_ndrv_sense -_ndrv_setspec -_ndrv_shutdown -_ndrv_sockaddr -_ndrv_sysctl -_ndrv_to_dlil_demux -_ndrv_usrreqs -_ndrvdomain -_ndrvl -_ndrvsw -_need_ast -_nestedpanic -_net_add_domain -_net_add_proto -_net_del_domain -_net_del_proto -_net_sysctl -_netaddr_match -_netboot_iaddr -_netboot_mountroot -_netboot_root -_netboot_rootpath -_netboot_setup -_netisr -_network_flock -_new_addr_hash -_new_obj_hash -_new_sysctl -_new_system_shared_regions -_newsysctl_list -_newtest -_nextc -_nextgennumber -_nextsect -_nextseg -_nextsegfromheader -_nextvnodeid -_nf_list -_nfiles -_nfs_adv -_nfs_async -_nfs_asyncio -_nfs_bioread -_nfs_boot_getfh -_nfs_boot_init -_nfs_bufq -_nfs_clearcommit -_nfs_cltpsock -_nfs_connect -_nfs_defect -_nfs_disconnect -_nfs_doio -_nfs_dolock -_nfs_false -_nfs_flushcommits -_nfs_fsinfo -_nfs_getattrcache -_nfs_getauth -_nfs_getcookie -_nfs_getnickauth -_nfs_getreq -_nfs_hash -_nfs_inactive -_nfs_init -_nfs_invaldir -_nfs_iodmount -_nfs_iodwant -_nfs_islocked -_nfs_ispublicfh -_nfs_loadattrcache -_nfs_lock -_nfs_mount_type -_nfs_mountroot -_nfs_namei -_nfs_nget -_nfs_nhinit -_nfs_node_hash_lock -_nfs_numasync -_nfs_prog -_nfs_readdirplusrpc -_nfs_readdirrpc -_nfs_readlinkrpc -_nfs_readrpc -_nfs_reclaim -_nfs_removeit -_nfs_rephead -_nfs_reply -_nfs_reqq -_nfs_request -_nfs_savenickauth -_nfs_send -_nfs_sigintr -_nfs_slplock -_nfs_slpunlock -_nfs_sndlock -_nfs_sndunlock -_nfs_ticks -_nfs_timer -_nfs_timer_funnel -_nfs_true -_nfs_udpsock -_nfs_unlock -_nfs_vfsops -_nfs_vinvalbuf -_nfs_write -_nfs_writerpc -_nfs_xdrneg1 -_nfs_xidwrap -_nfsadvlock_longest -_nfsadvlocks -_nfsadvlocks_time -_nfsclnt -_nfsd_head -_nfsd_head_flag -_nfsd_waiting -_nfslockdans -_nfslockdfd -_nfslockdfp -_nfslockdwait -_nfslockdwaiting -_nfsm_adj -_nfsm_disct -_nfsm_mbuftouio -_nfsm_reqh -_nfsm_rpchead -_nfsm_srvfattr -_nfsm_srvpostopattr -_nfsm_srvwcc -_nfsm_strtmbuf -_nfsm_uiotombuf -_nfsnodehash -_nfsnodehashtbl -_nfsrtt -_nfsrtton -_nfsrv3_access -_nfsrv3_procs -_nfsrv_cleancache -_nfsrv_commit -_nfsrv_create -_nfsrv_dorec -_nfsrv_errmap -_nfsrv_fhtovp -_nfsrv_fsinfo -_nfsrv_getattr -_nfsrv_getcache -_nfsrv_init -_nfsrv_initcache -_nfsrv_link -_nfsrv_lookup -_nfsrv_mkdir -_nfsrv_mknod -_nfsrv_noop -_nfsrv_null -_nfsrv_object_create -_nfsrv_pathconf -_nfsrv_rcv -_nfsrv_read -_nfsrv_readdir -_nfsrv_readdirplus -_nfsrv_readlink -_nfsrv_remove -_nfsrv_rename -_nfsrv_rmdir -_nfsrv_setattr -_nfsrv_setcred -_nfsrv_slpderef -_nfsrv_statfs -_nfsrv_symlink -_nfsrv_updatecache -_nfsrv_wakenfsd -_nfsrv_write -_nfsrv_writegather -_nfsrvhash -_nfsrvhashtbl -_nfsrvlruhead -_nfsrvw_procrastinate -_nfsrvw_procrastinate_v3 -_nfsrvw_sort -_nfsstats -_nfssvc -_nfssvc_sockhead -_nfssvc_sockhead_flag -_nfsv2_procid -_nfsv2_type -_nfsv2_vnodeop_opv_desc -_nfsv2_vnodeop_p -_nfsv3_procid -_nfsv3_type -_ngif -_niobuf -_nkdbufs -_nke_insert -_nlinesw -_nmbclusters -_no_dispatch_count -_nobdev -_nocdev -_nop_abortop -_nop_access -_nop_advlock -_nop_allocate -_nop_blkatoff -_nop_blktooff -_nop_bmap -_nop_bwrite -_nop_close -_nop_cmap -_nop_copyfile -_nop_create -_nop_devblocksize -_nop_exchange -_nop_fsync -_nop_getattr -_nop_getattrlist -_nop_inactive -_nop_ioctl -_nop_islocked -_nop_lease -_nop_link -_nop_lock -_nop_mkcomplex -_nop_mkdir -_nop_mknod -_nop_mmap -_nop_offtoblk -_nop_open -_nop_pagein -_nop_pageout -_nop_pathconf -_nop_pgrd -_nop_pgwr -_nop_print -_nop_read -_nop_readdir -_nop_readdirattr -_nop_readlink -_nop_reallocblks -_nop_reclaim -_nop_remove -_nop_rename -_nop_revoke -_nop_rmdir -_nop_searchfs -_nop_seek -_nop_select -_nop_setattr -_nop_setattrlist -_nop_strategy -_nop_symlink -_nop_truncate -_nop_unlock -_nop_update -_nop_valloc -_nop_vfree -_nop_whiteout -_nop_write -_noresume_on_disconnect -_norma_mk -_nosys -_not_implemented -_notify_filemod_watchers -_npcbufs -_nport -_nprocs -_nqfhhash -_nqfhhashtbl -_nqnfs_callback -_nqnfs_clientd -_nqnfs_clientlease -_nqnfs_getlease -_nqnfs_lease_check -_nqnfs_piggy -_nqnfs_prog -_nqnfs_serverd -_nqnfsrv_getlease -_nqnfsrv_vacated -_nqnfsstarttime -_nqsrv_clockskew -_nqsrv_getlease -_nqsrv_maxlease -_nqsrv_writeslack -_nqtimerhead -_nr_hashmask -_nr_hashtbl -_nrdeletes -_nrinserts -_nselcoll -_nswap -_nswapmap -_nswdev -_nsysent -_null_port -_nulldev -_nullop -_nullsys -_num_zones -_numcache -_numdquot -_numnfsrvcache -_numused_vfsslots -_numvnodes -_nv3tov_type -_oaccept -_obreak -_ocreat -_ofstat -_oftruncate -_ogetdirentries -_ogetdomainname -_ogetdtablesize -_ogethostid -_ogethostname -_ogetpagesize -_ogetpeername -_ogetrlimit -_ogetsockname -_okillpg -_old_if_attach -_olseek -_olstat -_open -_orecv -_orecvfrom -_orecvmsg -_osend -_osendmsg -_osetdomainname -_osethostid -_osethostname -_osetregid -_osetreuid -_osetrlimit -_osfmk_osrelease -_osfmk_ostype -_osfmk_version -_osfmk_version_major -_osfmk_version_minor -_osfmk_version_variant -_osigblock -_osigsetmask -_osigstack -_osigvec -_osmmap +_osfmk_osrelease:_osrelease +_osfmk_ostype:_ostype +_osfmk_version:_version +_osfmk_version_major:_version_major +_osfmk_version_minor:_version_minor +_osfmk_version_variant:_version_variant _osrelease -_ostat _ostype -_otruncate -_ovadvise -_ovbcopy -_owait -_owait3 -_packattrblk -_packcommonattr -_packdirattr -_packfileattr -_packvolattr _page_mask _page_shift _page_size -_paging_segment_count -_paging_segment_max -_paging_segments -_paging_segments_lock _panic -_panicDebugging -_panicDialogDesired -_panic_init -_panic_is_inited -_panic_lock -_panic_ui_initialize -_paniccpu -_panicstr -_panicwait -_parse_bsd_args -_pathconf -_pc_buffer -_pc_buflast -_pc_bufptr -_pc_bufsize -_pc_buftomem -_pc_sample_pid -_pc_trace_buf -_pc_trace_cnt -_pc_trace_frameworks -_pcb_synch -_pcsample_beg -_pcsample_comm -_pcsample_enable -_pcsample_end -_pcsample_flags -_pcsamples_bootstrap -_pcsamples_clear -_pcsamples_control -_pcsamples_ops -_pcsamples_read -_pcsamples_reinit _pe_identify_machine _pe_init_debug -_pexpert_osrelease -_pexpert_ostype -_pexpert_version -_pexpert_version_major -_pexpert_version_minor -_pexpert_version_variant -_pfctlinput -_pfctlinput2 -_pffasttimo -_pffinddomain -_pffindproto -_pffindtype -_pfind -_pfkeystat -_pfslowtimo -_pfxlist_onlink_check -_pgdelete -_pgfind -_pgrp0 -_pgrphash -_pgrphashtbl -_pgsignal -_physical_transfer_cluster_count -_physio -_pid_for_task -_pidhash -_pidhashtbl -_pim6_input -_pipe -_pmap_bootstrap -_pmap_change_wiring -_pmap_clear_modify -_pmap_clear_reference -_pmap_collect -_pmap_copy_page -_pmap_copy_part_page -_pmap_create -_pmap_destroy -_pmap_enter +_pexpert_osrelease:_osrelease +_pexpert_ostype:_ostype +_pexpert_version:_version +_pexpert_version_major:_version_major +_pexpert_version_minor:_version_minor +_pexpert_version_variant:_version_variant _pmap_extract _pmap_find_phys -_pmap_free_pages -_pmap_init -_pmap_initialized -_pmap_is_modified -_pmap_is_referenced -_pmap_map -_pmap_modify_pages -_pmap_next_page -_pmap_page_protect -_pmap_pageable -_pmap_protect -_pmap_reference -_pmap_remove -_pmap_remove_some_phys -_pmap_startup -_pmap_steal_memory -_pmap_sync_caches_phys -_pmap_verify_free -_pmap_virtual_space -_pmap_zero_page -_pmap_zero_part_page -_pmap_zone -_pmtu_expire -_pmtu_probe -_port_name_to_act -_port_name_to_clock -_port_name_to_semaphore -_port_name_to_task -_postevent -_postsig -_pread -_prelist_remove -_prelist_update -_prepare_profile_database -_prf -_print_saved_state _print_vmpage_stat _printf -_printf_init -_printf_lock -_priority_IO_timestamp_for_root -_prngAllowReseed -_prngDestroy -_prngForceReseed -_prngInitialize -_prngInput -_prngOutput -_prngProcessSeedBuffer -_prngStretch -_proc0 -_proc_exit -_proc_is_classic -_proc_name -_proc_prepareexit -_proc_reparent -_procdup -_process_terminate_self -_processor_array -_processor_assign -_processor_control -_processor_doshutdown _processor_exit -_processor_get_assignment _processor_info -_processor_info_count -_processor_init -_processor_offline -_processor_ptr -_processor_server -_processor_server_routine -_processor_set_base -_processor_set_create -_processor_set_default -_processor_set_destroy -_processor_set_info -_processor_set_limit -_processor_set_max_priority -_processor_set_policy_control -_processor_set_policy_disable -_processor_set_policy_enable -_processor_set_server -_processor_set_server_routine -_processor_set_stack_usage -_processor_set_statistics -_processor_set_subsystem -_processor_set_tasks -_processor_set_things -_processor_set_threads -_processor_shutdown _processor_start -_processor_subsystem -_procinit -_prof_queue -_profil -_profile_kernel_services -_prtactive -_pru_abort_notsupp -_pru_accept_notsupp -_pru_attach_notsupp -_pru_bind_notsupp -_pru_connect2_notsupp -_pru_connect_notsupp -_pru_control_notsupp -_pru_detach_notsupp -_pru_disconnect_notsupp -_pru_listen_notsupp -_pru_peeraddr_notsupp -_pru_rcvd_notsupp -_pru_rcvoob_notsupp -_pru_send_notsupp -_pru_sense_null -_pru_shutdown_notsupp -_pru_sockaddr_notsupp -_pru_sopoll_notsupp -_pru_soreceive -_pru_soreceive_notsupp -_pru_sosend -_pru_sosend_notsupp -_ps_allocate_cluster -_ps_clmap -_ps_clunmap -_ps_dealloc_vsmap -_ps_deallocate_cluster -_ps_delete -_ps_enter -_ps_map_extend -_ps_read_device -_ps_read_file -_ps_select_array -_ps_select_segment -_ps_vs_write_complete -_ps_vstruct_allocated_pages -_ps_vstruct_allocated_size -_ps_vstruct_create -_ps_vstruct_dealloc -_ps_vstruct_transfer_from_segment -_ps_write_device -_ps_write_file -_psem_access -_psem_cache_init -_psem_cache_purge -_psem_delete -_psemhash -_psemhashtbl -_psemnument -_psemops -_psemstats -_pset_add_processor -_pset_add_task -_pset_add_thread -_pset_deallocate -_pset_init -_pset_quanta_setup -_pset_reference -_pset_remove_processor -_pset_remove_task -_pset_remove_thread -_pset_sys_bootstrap -_pseudo_inits -_pshm_access -_pshm_cache_add -_pshm_cache_delete -_pshm_cache_init -_pshm_cache_purge -_pshm_cache_search -_pshm_close -_pshm_mmap -_pshm_stat -_pshm_truncate -_pshmhash -_pshmhashtbl -_pshmnument -_pshmops -_pshmstats -_psignal -_psignal_lock -_psignal_sigprof -_psignal_uthread -_psignal_vfork -_psignal_vtalarm -_psignal_xcpu -_pstats0 -_pt_setrunnable -_pthread_sigmask -_ptrace -_pty_init -_putc -_pvs_cluster_read -_pvs_object_data_provided -_pwrite -_q_to_b -_qsync -_quotactl -_quotaoff -_quotaon -_quotastat _random -_random_close -_random_init -_random_ioctl -_random_open -_random_read -_random_write -_raw_attach -_raw_ctlinput -_raw_detach -_raw_disconnect -_raw_init -_raw_input -_raw_usrreqs -_rawcb_list -_rawread -_rawwrite _rc4_crypt _rc4_init -_read _read_random -_readlink -_readv -_real_ncpus -_realhost -_realitexpire -_reassignbuf -_reattach_wait -_reboot -_receive_packet _record_startup_extensions_function -_recvfrom -_recvmsg -_ref_act_port_locked -_ref_pset_port_locked -_refresh_screen -_refunnel_hint -_refunnel_hint_enabled _registerPrioritySleepWakeInterest _registerSleepWakeInterest -_register_sockfilter -_relookup _rem3_remangle_name -_remove_all_shared_regions -_remove_default_shared_region -_remove_name _remove_startup_extension_function -_rename -_reset_shared_file -_resetpriority -_resize_namecache -_retrieve_act_self_fast -_retrieve_task_self_fast -_return_on_panic -_revoke -_rijndaelDecrypt -_rijndaelEncrypt -_rijndaelKeyEncToDec -_rijndaelKeySched -_rijndael_blockDecrypt -_rijndael_blockEncrypt -_rijndael_cipherInit -_rijndael_makeKey -_rijndael_padDecrypt -_rijndael_padEncrypt -_rip6_ctlinput -_rip6_ctloutput -_rip6_input -_rip6_output -_rip6_recvspace -_rip6_sendspace -_rip6_usrreqs -_rip6stat -_rip_ctlinput -_rip_ctloutput -_rip_init -_rip_input -_rip_output -_rip_recvspace -_rip_sendspace -_rip_usrreqs -_ripcb -_ripcbinfo -_rl_add -_rl_init -_rl_remove -_rl_scan -_rmdir -_rn_addmask -_rn_addroute -_rn_delete -_rn_init -_rn_inithead -_rn_lookup -_rn_match -_rn_refines _rootDomainRestart _rootDomainShutdown -_root_paged_ledger -_root_wired_ledger -_rootdev -_rootdevice -_rootfs -_rootvnode -_rootvp -_route6_input -_route_cb -_route_init -_routedomain -_rpc_auth_kerb -_rpc_auth_unix -_rpc_autherr -_rpc_call -_rpc_mismatch -_rpc_msgaccepted -_rpc_msgdenied -_rpc_reply -_rpc_vers -_rr_prefix -_rsvp_input -_rsvp_on -_rt6_flush -_rt_ifmsg -_rt_missmsg -_rt_newaddrmsg -_rt_newmaddrmsg -_rt_setgate -_rt_tables -_rtalloc -_rtalloc1 -_rtalloc_ign -_rtclock_intr -_rtclock_reset -_rtfree -_rtinit -_rtioctl -_rtredirect -_rtref -_rtrequest -_rtsetifa -_rtunref -_ruadd -_run_netisr -_run_queue_remove -_rwuio -_sa6_any -_safe_gets -_safedounmount -_sane_size -_savacctp -_save_waits -_sb_lock -_sb_max -_sb_notify -_sballoc -_sbappend -_sbappendaddr -_sbappendcontrol -_sbappendrecord -_sbcompress -_sbcreatecontrol -_sbdrop -_sbdroprecord -_sbflush -_sbfree -_sbinsertoob -_sblock -_sbrelease -_sbreserve -_sbrk -_sbspace -_sbtoxsockbuf -_sbunlock -_sbwait -_scanc -_sched_init -_sched_poll_yield_shift -_sched_safe_duration -_sched_tick -_sched_tick_init -_sched_tick_thread -_sched_tick_thread_continue -_scope6_addr2default -_scope6_get -_scope6_get_default -_scope6_ids -_scope6_ifattach -_scope6_set -_scope6_setdefault -_searchfs -_sectDATAB -_sectLINKB -_sectPRELINKB -_sectSizeDATA -_sectSizeLINK -_sectSizePRELINK -_sectSizeTEXT -_sectTEXTB -_securelevel -_selcontinue -_select -_selprocess -_selrecord -_selthreadclear -_seltrue -_selwait -_selwakeup -_sem -_sem_close -_sem_destroy -_sem_getvalue -_sem_init -_sem_open -_sem_post -_sem_trywait -_sem_unlink -_sem_wait -_sema -_semaphore_convert_wait_result _semaphore_create _semaphore_dereference _semaphore_destroy -_semaphore_init -_semaphore_max _semaphore_reference -_semaphore_server -_semaphore_server_routine _semaphore_signal _semaphore_signal_all -_semaphore_signal_all_trap -_semaphore_signal_internal -_semaphore_signal_thread -_semaphore_signal_thread_trap -_semaphore_signal_trap -_semaphore_subsystem _semaphore_timedwait -_semaphore_timedwait_continue -_semaphore_timedwait_signal -_semaphore_timedwait_signal_trap -_semaphore_timedwait_trap _semaphore_wait -_semaphore_wait_continue -_semaphore_wait_internal -_semaphore_wait_signal -_semaphore_wait_signal_trap -_semaphore_wait_trap -_semaphore_zone -_semconfig -_semctl -_semexit -_semget -_seminfo -_seminit -_semop -_semsys -_semu -_sendmsg -_sendsig -_sendto _serial_putc -_session0 -_sessrele -_set_be_bit -_set_blocksize -_set_bsdtask_info -_set_bsduthreadargs -_set_cast128_subkey -_set_dp_control_port -_set_fsblocksize -_set_priority -_set_procsigmask -_set_sched_pri -_set_security_token -_set_state_handler -_setattrlist -_setbit -_setconf -_setegid -_seteuid -_setgid -_setgroups -_setitimer -_setlogin -_setpgid -_setpriority -_setprivexec -_setquota -_setrlimit -_setsid -_setsigvec -_setsockopt -_setthetime -_settimeofday -_setuid -_setup_main -_setuse -_sfilter_init -_sfilter_term -_sfma_handle _sha1_init _sha1_loop -_sha1_pad _sha1_result -_shadow_map_create -_shadow_map_free -_shadow_map_read -_shadow_map_shadow_size -_shadow_map_write -_shared_com_boot_time_init -_shared_data_region_handle -_shared_file_available_hash_ele -_shared_file_boot_time_init -_shared_file_create_system_region -_shared_file_data_region -_shared_file_mapping_array -_shared_file_text_region -_shared_region_mapping_create -_shared_region_mapping_dealloc -_shared_region_mapping_info -_shared_region_mapping_ref -_shared_region_mapping_set_alt_next -_shared_region_object_chain_attach -_shared_text_region_handle -_shm_open -_shm_unlink -_shmat -_shmctl -_shmdt -_shmexit -_shmfork -_shmget -_shminfo -_shminit -_shmsegs -_shmsys -_shutdown -_sig_filtops -_sig_lock_to_exit -_sig_try_locked -_sigaction -_sigacts0 -_sigaltstack -_sigcontinue -_sigexit_locked -_siginit -_signal_lock -_signal_setast -_signal_unlock -_sigpending -_sigprocmask -_sigprop -_sigreturn -_sigsuspend -_sigwait -_skpc -_slave_machine_init -_slave_main -_sleep _snprintf -_so_cache_hw -_so_cache_init_done -_so_cache_max_freed -_so_cache_time -_so_cache_timeouts -_so_cache_timer -_so_cache_zone -_so_gencnt -_soabort -_soaccept -_soalloc -_sobind -_socantrcvmore -_socantsendmore -_sockargs -_socket -_socket_cache_head -_socket_cache_tail -_socket_debug -_socket_zone -_socketinit -_socketops -_socketpair -_soclose -_soconnect -_soconnect2 -_socreate -_sodealloc -_sodelayed_copy -_sodisconnect -_sodropablereq -_sofree -_sogetopt -_sohasoutofband -_soisconnected -_soisconnecting -_soisdisconnected -_soisdisconnecting -_solisten -_sonewconn -_soo_close -_soo_ioctl -_soo_kqfilter -_soo_read -_soo_select -_soo_stat -_soo_write -_soopt_getm -_soopt_mcopyin -_soopt_mcopyout -_sooptcopyin -_sooptcopyout -_sopoll -_soreadable -_soreceive -_soreserve -_sorflush -_sorwakeup -_sosend -_sosendallatonce -_sosetopt -_soshutdown -_sotoxsocket -_sowakeup -_sowriteable -_sowwakeup -_space_deallocate -_spec_advlock -_spec_badop -_spec_blktooff -_spec_bmap -_spec_close -_spec_cmap -_spec_devblocksize -_spec_ebadf -_spec_fsync -_spec_ioctl -_spec_lookup -_spec_nfsv2nodeop_opv_desc -_spec_nfsv2nodeop_p -_spec_offtoblk -_spec_open -_spec_pathconf -_spec_print -_spec_read -_spec_select -_spec_strategy -_spec_vnodeop_entries -_spec_vnodeop_opv_desc -_spec_vnodeop_p -_spec_write -_spechash_slock -_special_handler -_special_handler_continue -_speclisth _spl0 _splbio _splclock _splhigh _splimp -_split_funnel_off _spllo _spln _splnet @@ -7569,49 +3520,8 @@ _spltty _splvm _splx _sprintf -_sprintf_lock -_srv -_ss_fltsz -_ss_fltsz_local _sscanf -_sstk -_stack_alloc -_stack_alloc_bndry -_stack_alloc_hits -_stack_alloc_hiwater -_stack_alloc_misses -_stack_alloc_total -_stack_alloc_try -_stack_cache_hits -_stack_collect -_stack_fake_zone_info -_stack_free -_stack_free_count -_stack_free_limit -_stack_free_max -_stack_free_stack _stack_privilege -_stack_statistics -_start_cpu_thread -_start_def_pager -_start_kernel_threads -_startprofclock -_startup_miss -_stat -_state_count -_statfs -_statv -_std_quantum -_std_quantum_us -_stf_attach_inet6 -_stf_detach_inet6 -_stf_ioctl -_stf_pre_output -_stf_reg_if_mods -_stf_shutdown -_stfattach -_stop -_stopprofclock _strcat _strchr _strcmp @@ -7621,7 +3531,6 @@ _strlen _strncat _strncmp _strncpy -_strprefix _strtol _strtoq _strtoul @@ -7631,511 +3540,13 @@ _suibyte _suiword _suser _suword -_swap_act_map -_swap_task_map -_swapin_init -_swapin_lock -_swapin_queue -_swapin_thread -_swapin_thread_continue -_swapmap -_swapon -_swdevt -_switch_act -_switch_act_swapins -_switch_debugger -_switch_to_serial_console -_switch_to_shutdown_context -_swtch -_swtch_continue -_swtch_pri -_swtch_pri_continue -_symlink -_sync -_synthfs_access -_synthfs_adddirentry -_synthfs_cached_lookup -_synthfs_chflags -_synthfs_chmod -_synthfs_chown -_synthfs_create -_synthfs_fhtovp -_synthfs_getattr -_synthfs_inactive -_synthfs_init -_synthfs_islocked -_synthfs_lock -_synthfs_lookup -_synthfs_mkdir -_synthfs_mmap -_synthfs_mount -_synthfs_mount_fs -_synthfs_move_rename_entry -_synthfs_new_directory -_synthfs_new_symlink -_synthfs_open -_synthfs_pathconf -_synthfs_quotactl -_synthfs_readdir -_synthfs_readlink -_synthfs_reclaim -_synthfs_remove -_synthfs_remove_directory -_synthfs_remove_entry -_synthfs_remove_symlink -_synthfs_rename -_synthfs_rmdir -_synthfs_root -_synthfs_select -_synthfs_setattr -_synthfs_setupuio -_synthfs_start -_synthfs_statfs -_synthfs_symlink -_synthfs_sync -_synthfs_sysctl -_synthfs_unlock -_synthfs_unmount -_synthfs_update -_synthfs_vfsops -_synthfs_vget -_synthfs_vnodeop_entries -_synthfs_vnodeop_opv_desc -_synthfs_vnodeop_p -_synthfs_vptofh -_syscallnames -_sysclk_config -_sysclk_getattr -_sysclk_gettime -_sysclk_init -_sysclk_ops -_sysclk_setalarm -_sysctl__children -_sysctl__debug -_sysctl__debug_bpf_bufsize -_sysctl__debug_bpf_maxbufsize -_sysctl__debug_children -_sysctl__hw -_sysctl__hw_children -_sysctl__kern -_sysctl__kern_children -_sysctl__kern_dummy -_sysctl__kern_ipc -_sysctl__kern_ipc_children -_sysctl__kern_ipc_maxsockbuf -_sysctl__kern_ipc_maxsockets -_sysctl__kern_ipc_nmbclusters -_sysctl__kern_ipc_sockbuf_waste_factor -_sysctl__kern_ipc_somaxconn -_sysctl__kern_ipc_sorecvmincopy -_sysctl__kern_ipc_sosendminchain -_sysctl__kern_maxfilesperproc -_sysctl__kern_maxprocperuid -_sysctl__kern_sysv -_sysctl__kern_sysv_children -_sysctl__kern_sysv_shmall -_sysctl__kern_sysv_shmmax -_sysctl__kern_sysv_shmmin -_sysctl__kern_sysv_shmmni -_sysctl__kern_sysv_shmseg -_sysctl__machdep -_sysctl__machdep_children -_sysctl__net -_sysctl__net_children -_sysctl__net_inet -_sysctl__net_inet6 -_sysctl__net_inet6_children -_sysctl__net_inet6_icmp6 -_sysctl__net_inet6_icmp6_children -_sysctl__net_inet6_icmp6_errppslimit -_sysctl__net_inet6_icmp6_nd6_debug -_sysctl__net_inet6_icmp6_nd6_delay -_sysctl__net_inet6_icmp6_nd6_maxnudhint -_sysctl__net_inet6_icmp6_nd6_mmaxtries -_sysctl__net_inet6_icmp6_nd6_prune -_sysctl__net_inet6_icmp6_nd6_umaxtries -_sysctl__net_inet6_icmp6_nd6_useloopback -_sysctl__net_inet6_icmp6_nodeinfo -_sysctl__net_inet6_icmp6_rediraccept -_sysctl__net_inet6_icmp6_redirtimeout -_sysctl__net_inet6_icmp6_stats -_sysctl__net_inet6_ip6 -_sysctl__net_inet6_ip6_accept_rtadv -_sysctl__net_inet6_ip6_auto_flowlabel -_sysctl__net_inet6_ip6_auto_linklocal -_sysctl__net_inet6_ip6_children -_sysctl__net_inet6_ip6_dad_count -_sysctl__net_inet6_ip6_defmcasthlim -_sysctl__net_inet6_ip6_forwarding -_sysctl__net_inet6_ip6_gifhlim -_sysctl__net_inet6_ip6_hdrnestlimit -_sysctl__net_inet6_ip6_hlim -_sysctl__net_inet6_ip6_kame_version -_sysctl__net_inet6_ip6_keepfaith -_sysctl__net_inet6_ip6_log_interval -_sysctl__net_inet6_ip6_maxfragpackets -_sysctl__net_inet6_ip6_redirect -_sysctl__net_inet6_ip6_rip6stats -_sysctl__net_inet6_ip6_rr_prune -_sysctl__net_inet6_ip6_rtexpire -_sysctl__net_inet6_ip6_rtmaxcache -_sysctl__net_inet6_ip6_rtminexpire -_sysctl__net_inet6_ip6_stats -_sysctl__net_inet6_ip6_temppltime -_sysctl__net_inet6_ip6_tempvltime -_sysctl__net_inet6_ip6_use_deprecated -_sysctl__net_inet6_ip6_use_tempaddr -_sysctl__net_inet6_ip6_v6only -_sysctl__net_inet6_ipsec6 -_sysctl__net_inet6_ipsec6_ah_net_deflev -_sysctl__net_inet6_ipsec6_ah_trans_deflev -_sysctl__net_inet6_ipsec6_children -_sysctl__net_inet6_ipsec6_debug -_sysctl__net_inet6_ipsec6_def_policy -_sysctl__net_inet6_ipsec6_ecn -_sysctl__net_inet6_ipsec6_esp_net_deflev -_sysctl__net_inet6_ipsec6_esp_randpad -_sysctl__net_inet6_ipsec6_esp_trans_deflev -_sysctl__net_inet6_ipsec6_stats -_sysctl__net_inet6_tcp6 -_sysctl__net_inet6_tcp6_children -_sysctl__net_inet6_udp6 -_sysctl__net_inet6_udp6_children -_sysctl__net_inet_children -_sysctl__net_inet_div -_sysctl__net_inet_div_children -_sysctl__net_inet_icmp -_sysctl__net_inet_icmp_bmcastecho -_sysctl__net_inet_icmp_children -_sysctl__net_inet_icmp_drop_redirect -_sysctl__net_inet_icmp_icmplim -_sysctl__net_inet_icmp_log_redirect -_sysctl__net_inet_icmp_maskrepl -_sysctl__net_inet_icmp_stats -_sysctl__net_inet_igmp -_sysctl__net_inet_igmp_children -_sysctl__net_inet_igmp_stats -_sysctl__net_inet_ip -_sysctl__net_inet_ip_accept_sourceroute -_sysctl__net_inet_ip_check_interface -_sysctl__net_inet_ip_check_route_selfref -_sysctl__net_inet_ip_children -_sysctl__net_inet_ip_fastforwarding -_sysctl__net_inet_ip_forwarding -_sysctl__net_inet_ip_gifttl -_sysctl__net_inet_ip_intr_queue_drops -_sysctl__net_inet_ip_intr_queue_maxlen -_sysctl__net_inet_ip_keepfaith -_sysctl__net_inet_ip_linklocal -_sysctl__net_inet_ip_linklocal_children -_sysctl__net_inet_ip_linklocal_in -_sysctl__net_inet_ip_linklocal_in_allowbadttl -_sysctl__net_inet_ip_linklocal_in_children -_sysctl__net_inet_ip_linklocal_stat -_sysctl__net_inet_ip_maxfragpackets -_sysctl__net_inet_ip_portrange -_sysctl__net_inet_ip_portrange_children -_sysctl__net_inet_ip_portrange_first -_sysctl__net_inet_ip_portrange_hifirst -_sysctl__net_inet_ip_portrange_hilast -_sysctl__net_inet_ip_portrange_last -_sysctl__net_inet_ip_portrange_lowfirst -_sysctl__net_inet_ip_portrange_lowlast -_sysctl__net_inet_ip_redirect -_sysctl__net_inet_ip_rtexpire -_sysctl__net_inet_ip_rtmaxcache -_sysctl__net_inet_ip_rtminexpire -_sysctl__net_inet_ip_sourceroute -_sysctl__net_inet_ip_stats -_sysctl__net_inet_ip_subnets_are_local -_sysctl__net_inet_ip_ttl -_sysctl__net_inet_ipsec -_sysctl__net_inet_ipsec_ah_cleartos -_sysctl__net_inet_ipsec_ah_net_deflev -_sysctl__net_inet_ipsec_ah_offsetmask -_sysctl__net_inet_ipsec_ah_trans_deflev -_sysctl__net_inet_ipsec_bypass -_sysctl__net_inet_ipsec_children -_sysctl__net_inet_ipsec_debug -_sysctl__net_inet_ipsec_def_policy -_sysctl__net_inet_ipsec_dfbit -_sysctl__net_inet_ipsec_ecn -_sysctl__net_inet_ipsec_esp_net_deflev -_sysctl__net_inet_ipsec_esp_port -_sysctl__net_inet_ipsec_esp_randpad -_sysctl__net_inet_ipsec_esp_trans_deflev -_sysctl__net_inet_ipsec_stats -_sysctl__net_inet_raw -_sysctl__net_inet_raw_children -_sysctl__net_inet_raw_maxdgram -_sysctl__net_inet_raw_pcblist -_sysctl__net_inet_raw_recvspace -_sysctl__net_inet_tcp -_sysctl__net_inet_tcp_always_keepalive -_sysctl__net_inet_tcp_blackhole -_sysctl__net_inet_tcp_children -_sysctl__net_inet_tcp_delacktime -_sysctl__net_inet_tcp_delayed_ack -_sysctl__net_inet_tcp_do_tcpdrain -_sysctl__net_inet_tcp_drop_synfin -_sysctl__net_inet_tcp_icmp_may_rst -_sysctl__net_inet_tcp_isn_reseed_interval -_sysctl__net_inet_tcp_keepidle -_sysctl__net_inet_tcp_keepinit -_sysctl__net_inet_tcp_keepintvl -_sysctl__net_inet_tcp_local_slowstart_flightsize -_sysctl__net_inet_tcp_log_in_vain -_sysctl__net_inet_tcp_msl -_sysctl__net_inet_tcp_mssdflt -_sysctl__net_inet_tcp_newreno -_sysctl__net_inet_tcp_path_mtu_discovery -_sysctl__net_inet_tcp_pcbcount -_sysctl__net_inet_tcp_pcblist -_sysctl__net_inet_tcp_recvspace -_sysctl__net_inet_tcp_rfc1323 -_sysctl__net_inet_tcp_rfc1644 -_sysctl__net_inet_tcp_sendspace -_sysctl__net_inet_tcp_slowlink_wsize -_sysctl__net_inet_tcp_slowstart_flightsize -_sysctl__net_inet_tcp_stats -_sysctl__net_inet_tcp_strict_rfc1948 -_sysctl__net_inet_tcp_tcbhashsize -_sysctl__net_inet_tcp_tcp_lq_overflow -_sysctl__net_inet_tcp_v6mssdflt -_sysctl__net_inet_udp -_sysctl__net_inet_udp_blackhole -_sysctl__net_inet_udp_checksum -_sysctl__net_inet_udp_children -_sysctl__net_inet_udp_log_in_vain -_sysctl__net_inet_udp_maxdgram -_sysctl__net_inet_udp_pcblist -_sysctl__net_inet_udp_recvspace -_sysctl__net_inet_udp_stats -_sysctl__net_key -_sysctl__net_key_ah_keymin -_sysctl__net_key_blockacq_count -_sysctl__net_key_blockacq_lifetime -_sysctl__net_key_children -_sysctl__net_key_debug -_sysctl__net_key_esp_auth -_sysctl__net_key_esp_keymin -_sysctl__net_key_int_random -_sysctl__net_key_larval_lifetime -_sysctl__net_key_prefered_oldsa -_sysctl__net_key_spi_maxval -_sysctl__net_key_spi_minval -_sysctl__net_key_spi_trycnt -_sysctl__net_link -_sysctl__net_link_children -_sysctl__net_link_ether -_sysctl__net_link_ether_children -_sysctl__net_link_ether_inet -_sysctl__net_link_ether_inet_apple_hwcksum_rx -_sysctl__net_link_ether_inet_apple_hwcksum_tx -_sysctl__net_link_ether_inet_children -_sysctl__net_link_ether_inet_host_down_time -_sysctl__net_link_ether_inet_log_arp_wrong_iface -_sysctl__net_link_ether_inet_max_age -_sysctl__net_link_ether_inet_maxtries -_sysctl__net_link_ether_inet_proxyall -_sysctl__net_link_ether_inet_prune_intvl -_sysctl__net_link_ether_inet_useloopback -_sysctl__net_link_generic -_sysctl__net_link_generic_children -_sysctl__net_local -_sysctl__net_local_children -_sysctl__net_local_dgram -_sysctl__net_local_dgram_children -_sysctl__net_local_dgram_maxdgram -_sysctl__net_local_dgram_pcblist -_sysctl__net_local_dgram_recvspace -_sysctl__net_local_inflight -_sysctl__net_local_stream -_sysctl__net_local_stream_children -_sysctl__net_local_stream_pcblist -_sysctl__net_local_stream_recvspace -_sysctl__net_local_stream_sendspace -_sysctl__net_routetable -_sysctl__net_routetable_children -_sysctl__sysctl -_sysctl__sysctl_children -_sysctl__sysctl_debug -_sysctl__sysctl_name -_sysctl__sysctl_name2oid -_sysctl__sysctl_name_children -_sysctl__sysctl_next -_sysctl__sysctl_next_children -_sysctl__sysctl_oidfmt -_sysctl__sysctl_oidfmt_children -_sysctl__user -_sysctl__user_children -_sysctl__vfs -_sysctl__vfs_children -_sysctl__vfs_generic -_sysctl__vfs_generic_children -_sysctl__vfs_generic_ctlbyfsid -_sysctl__vfs_generic_ctlbyfsid_children -_sysctl__vfs_generic_vfsidlist -_sysctl__vm -_sysctl__vm_children -_sysctl_clockrate -_sysctl_doproc -_sysctl_file -_sysctl_handle_int -_sysctl_handle_long -_sysctl_handle_opaque -_sysctl_handle_string -_sysctl_int -_sysctl_procargs -_sysctl_quad -_sysctl_rdint -_sysctl_rdquad -_sysctl_rdstring -_sysctl_rdstruct -_sysctl_register_all -_sysctl_register_fixed -_sysctl_register_oid -_sysctl_register_set -_sysctl_set -_sysctl_string -_sysctl_struct -_sysctl_unregister_oid -_sysctl_unregister_set -_sysctl_vnode _sysctlbyname -_sysent -_systemLogDiags -_systemdomain -_systemdomain_init -_tablefull -_task_act_iterate_wth_args -_task_assign -_task_assign_default -_task_backing_store_privileged -_task_collect_allowed -_task_collect_last_tick -_task_collect_max_rate -_task_collect_scan _task_create -_task_create_internal _task_deallocate -_task_for_pid -_task_get_assignment -_task_get_emulation_vector -_task_get_exception_ports -_task_get_special_port -_task_halt -_task_hold -_task_hold_locked -_task_importance -_task_info -_task_init -_task_is_classic -_task_policy -_task_policy_get -_task_policy_set _task_reference -_task_reference_try -_task_release -_task_release_locked _task_resume -_task_sample -_task_self_trap -_task_server -_task_server_routine -_task_set_emulation -_task_set_emulation_vector -_task_set_emulation_vector_internal -_task_set_exception_ports -_task_set_info -_task_set_ledger -_task_set_policy -_task_set_port_space -_task_set_ras_pc -_task_set_special_port -_task_subsystem _task_suspend -_task_swap_exception_ports -_task_swappable -_task_synchronizer_destroy_all -_task_terminate -_task_terminate_internal -_task_threads -_task_wait_locked -_task_wire -_task_working_set_create -_task_zone -_tbeproc -_tcb -_tcbinfo -_tcp6_ctlinput -_tcp6_input -_tcp6_usrreqs -_tcp_backoff -_tcp_canceltimers -_tcp_ccgen -_tcp_close -_tcp_ctlinput -_tcp_ctloutput -_tcp_delack_enabled -_tcp_delacktime -_tcp_do_newreno -_tcp_drain -_tcp_drop -_tcp_drop_syn_sent -_tcp_fasttimo -_tcp_fillheaders -_tcp_freeq -_tcp_gettaocache -_tcp_init -_tcp_input -_tcp_keepidle -_tcp_keepinit -_tcp_keepintvl -_tcp_lq_overflow -_tcp_maketemplate -_tcp_maxidle -_tcp_maxpersistidle -_tcp_msl -_tcp_mss -_tcp_mssdflt -_tcp_mssopt -_tcp_mtudisc -_tcp_new_isn -_tcp_newtcpcb -_tcp_now -_tcp_output -_tcp_quench -_tcp_recvspace -_tcp_respond -_tcp_rtlookup -_tcp_rtlookup6 -_tcp_sendspace -_tcp_setpersist -_tcp_slowtimo -_tcp_syn_backoff -_tcp_timers -_tcp_usrreqs -_tcp_v6mssdflt -_tcpstat -_temp_msgbuf -_termioschars -_test_tws -_testbit -_thread_abort -_thread_abort_safely -_thread_act_server -_thread_act_server_routine -_thread_act_subsystem -_thread_apc_clear -_thread_apc_set -_thread_assign -_thread_assign_default -_thread_bind _thread_block -_thread_block_reason -_thread_bootstrap -_thread_bootstrap_return _thread_call_allocate _thread_call_cancel _thread_call_enter @@ -8146,1098 +3557,59 @@ _thread_call_free _thread_call_func _thread_call_func_cancel _thread_call_func_delayed -_thread_call_initialize _thread_call_is_delayed -_thread_call_setup _thread_cancel_timer -_thread_change_psets -_thread_continue -_thread_create -_thread_create_running _thread_deallocate -_thread_depress_abort -_thread_depress_expire -_thread_dispatch -_thread_doreap -_thread_doswapin -_thread_dup -_thread_entrypoint -_thread_exception_return _thread_flavor_array -_thread_funnel_get -_thread_funnel_merge _thread_funnel_set -_thread_funnel_switch -_thread_get_assignment -_thread_get_cont_arg -_thread_get_exception_ports -_thread_get_special_port -_thread_get_state -_thread_getstatus -_thread_go_locked -_thread_hold -_thread_info -_thread_info_shuttle -_thread_init -_thread_invoke -_thread_lock_act -_thread_policy -_thread_policy_get _thread_policy_set -_thread_quantum_expire -_thread_read_times -_thread_reaper_enqueue -_thread_reaper_init _thread_reference -_thread_release -_thread_resume -_thread_run -_thread_sample -_thread_scan_enabled -_thread_select -_thread_self -_thread_self_trap -_thread_set_child -_thread_set_cont_arg -_thread_set_exception_ports -_thread_set_parent -_thread_set_policy -_thread_set_special_port -_thread_set_state _thread_set_timer _thread_set_timer_deadline -_thread_setrun -_thread_setstatus -_thread_should_abort -_thread_should_halt -_thread_sleep_funnel -_thread_sleep_lock_write -_thread_sleep_mutex -_thread_sleep_mutex_deadline -_thread_sleep_usimple_lock -_thread_stop -_thread_suspend -_thread_swap_exception_ports -_thread_swapin -_thread_switch -_thread_syscall_return -_thread_task_priority +_thread_sleep_lock_write:_thread_sleep_lock_write_EXT +_thread_sleep_mutex:_thread_sleep_mutex_EXT +_thread_sleep_mutex_deadline:_thread_sleep_mutex_deadline_EXT +_thread_sleep_usimple_lock:_thread_sleep_usimple_lock_EXT _thread_terminate -_thread_terminate_internal -_thread_terminate_self -_thread_termination_continue -_thread_timer_expire -_thread_timer_setup -_thread_timer_terminate -_thread_unlock_act -_thread_unstop -_thread_userstack -_thread_wait -_thread_wakeup _thread_wakeup_prim -_thread_wire -_threadsignal -_tick -_time -_time_wait_slots -_time_zone_slock_init -_timeout -_timer_call_cancel -_timer_call_enter -_timer_call_enter1 -_timer_call_initialize -_timer_call_is_delayed -_timer_call_setup -_timer_call_shutdown -_timer_delta -_timer_grab -_timer_init -_timer_normalize -_timer_read -_timevaladd -_timevalfix -_timevalsub -_tk_cancc -_tk_nin -_tk_nout -_tk_rawcc -_to_bsd_time -_to_hfs_time -_tprintf -_tprintf_close -_tprintf_open -_tputchar -_trailer_template -_trap_type -_trashMemory -_trigger_name_to_port -_truncate -_tsleep -_tsleep0 -_tsleep1 -_ttioctl -_ttread -_ttrstrt -_ttselect -_ttsetwater -_ttspeedtab -_ttstart -_ttwakeup -_ttwrite -_ttwwakeup -_tty_pgsignal -_ttyblock -_ttychars -_ttycheckoutq -_ttyclose -_ttyflush -_ttyfree -_ttyinfo -_ttyinput -_ttylclose -_ttymalloc -_ttymodem -_ttyopen -_ttyprintf -_ttyselect -_ttysleep -_ttywait -_tvtoabstime -_tvtohz -_tws_build_cluster -_tws_create_startup_list -_tws_expand_working_set -_tws_handle_startup_file -_tws_hash_clear -_tws_hash_create -_tws_hash_destroy -_tws_hash_line_clear -_tws_hash_ws_flush -_tws_insert -_tws_internal_lookup -_tws_internal_startup_send -_tws_line_signal -_tws_lookup -_tws_read_startup_file -_tws_send_startup_info -_tws_startup_list_lookup -_tws_test_for_community -_tws_traverse_address_hash_list -_tws_traverse_object_hash_list -_tws_write_startup_file -_tz -_tz_slock -_uap -_ubc_blktooff -_ubc_clean -_ubc_clearflags -_ubc_create_upl -_ubc_getcred -_ubc_getobject -_ubc_getsize -_ubc_hold -_ubc_info_deallocate -_ubc_info_init -_ubc_info_zone -_ubc_invalidate -_ubc_isinuse -_ubc_issetflags -_ubc_offtoblk -_ubc_page_op -_ubc_pushdirty -_ubc_pushdirty_range -_ubc_range_op -_ubc_rele -_ubc_release -_ubc_release_named -_ubc_setcred -_ubc_setflags -_ubc_setpager -_ubc_setsize -_ubc_uncache -_ubc_upl_abort -_ubc_upl_abort_range -_ubc_upl_commit -_ubc_upl_commit_range -_ubc_upl_map -_ubc_upl_pageinfo -_ubc_upl_unmap -_ucsfncmp -_ucsfntrans -_udb -_udbinfo -_udp6_ctlinput -_udp6_input -_udp6_output -_udp6_recvspace -_udp6_sendspace -_udp6_usrreqs -_udp_ctlinput -_udp_in6 -_udp_init -_udp_input -_udp_ip6 -_udp_notify -_udp_recvspace -_udp_sendspace -_udp_shutdown -_udp_ttl -_udp_usrreqs -_udpstat -_ufs_access -_ufs_advlock -_ufs_bmap -_ufs_bmaparray -_ufs_check_export -_ufs_checkpath -_ufs_close -_ufs_cmap -_ufs_create -_ufs_dirbad -_ufs_dirbadentry -_ufs_dirempty -_ufs_direnter -_ufs_direnter2 -_ufs_dirremove -_ufs_dirrewrite -_ufs_getattr -_ufs_getlbns -_ufs_ihash_slock -_ufs_ihashget -_ufs_ihashinit -_ufs_ihashins -_ufs_ihashlookup -_ufs_ihashrem -_ufs_inactive -_ufs_init -_ufs_ioctl -_ufs_islocked -_ufs_kqfilt_add -_ufs_link -_ufs_lock -_ufs_lookup -_ufs_makeinode -_ufs_mkdir -_ufs_mknod -_ufs_mmap -_ufs_open -_ufs_pathconf -_ufs_print -_ufs_quotactl -_ufs_readdir -_ufs_readlink -_ufs_reclaim -_ufs_remove -_ufs_rename -_ufs_rmdir -_ufs_root -_ufs_seek -_ufs_select -_ufs_setattr -_ufs_start -_ufs_strategy -_ufs_symlink -_ufs_unlock -_ufs_vfsops -_ufs_vinit -_ufs_whiteout -_ufsfifo_close -_ufsfifo_kqfilt_add -_ufsfifo_read -_ufsfifo_write -_ufsspec_close -_ufsspec_read -_ufsspec_write -_uihash -_uihashtbl -_uiomove -_uiomove64 -_uipc_usrreqs -_umask -_unblock_procsigmask -_undelete -_unicode_to_hfs -_union_abortop -_union_access -_union_advlock -_union_allocvp -_union_blktooff -_union_bmap -_union_close -_union_cmap -_union_copyfile -_union_copyup -_union_create -_union_dircache -_union_dowhiteout -_union_freevp -_union_fsync -_union_getattr -_union_inactive -_union_init -_union_ioctl -_union_islocked -_union_lease -_union_link -_union_lock -_union_lookup -_union_mkdir -_union_mknod -_union_mkshadow -_union_mkwhiteout -_union_mmap -_union_mount -_union_newlower -_union_newsize -_union_newupper -_union_offtoblk -_union_open -_union_pagein -_union_pageout -_union_pathconf -_union_print -_union_read -_union_readdir -_union_readlink -_union_reclaim -_union_remove -_union_removed_upper -_union_rename -_union_revoke -_union_rmdir -_union_root -_union_seek -_union_select -_union_setattr -_union_start -_union_statfs -_union_strategy -_union_symlink -_union_unlock -_union_unmount -_union_updatevp -_union_vfsops -_union_vn_close -_union_vn_create -_union_vnodeop_entries -_union_vnodeop_opv_desc -_union_vnodeop_p -_union_whiteout -_union_write -_unix_syscall -_unix_syscall_return -_unlink -_unmount -_unp_connect2 -_unp_dispose -_unp_externalize -_unp_init -_unp_zone -_unputc -_unregister_sockfilter -_untimeout -_update_default_shared_region -_update_priority -_upl_abort -_upl_abort_range -_upl_clear_dirty -_upl_commit -_upl_commit_range -_upl_deallocate -_upl_dirty_page _upl_get_internal_page_list -_upl_get_internal_pagelist_offset -_upl_offset_to_pagelist -_upl_page_present -_upl_phys_page -_upl_server -_upl_server_routine -_upl_set_dirty -_upl_subsystem _upl_valid_page -_uprintf -_ureadc -_user_warned -_useracc -_userland_sysctl -_usimple_lock -_usimple_lock_init -_usimple_lock_try -_usimple_unlock -_utf8_decodestr -_utf8_encodelen -_utf8_encodestr -_utf8_to_hfs -_utf8_to_mac_roman -_utf_extrabytes -_uthread_alloc -_uthread_free -_uthread_zone -_uthread_zone_init -_uthread_zone_inited -_utimes -_utrace -_ux_exception_port -_ux_handler_init -_v_putc -_va_null -_vagevp -_vattr_null -_vc_display_icon -_vc_progress_initialize -_vc_progress_lock -_vcattach -_vcount -_vcputc -_verbose +_usimple_lock:_usimple_lock_EXT +_usimple_lock_init:_usimple_lock_init_EXT +_usimple_lock_try:_usimple_lock_try_EXT +_usimple_unlock:_usimple_unlock_EXT +_uuid_clear +_uuid_compare +_uuid_copy +_uuid_generate +_uuid_generate_random +_uuid_generate_time +_uuid_is_null +_uuid_parse +_uuid_unparse +_uuid_unparse_lower +_uuid_unparse_upper _version _version_major _version_minor _version_variant _vetoSleepWakeNotification -_vfinddev -_vflush -_vfork -_vfork_exit -_vfork_return -_vfs_busy -_vfs_event_signal -_vfs_export -_vfs_export_lookup -_vfs_getnewfsid -_vfs_getvfs -_vfs_init_io_attributes -_vfs_io_attributes -_vfs_mountedon -_vfs_mountroot -_vfs_nummntops -_vfs_op_descs -_vfs_op_init -_vfs_opv_descs -_vfs_opv_init -_vfs_opv_numops -_vfs_rootmountalloc -_vfs_sysctl -_vfs_unbusy -_vfsconf -_vfsconf_add -_vfsconf_del -_vfsinit -_vget -_vgone -_vgonel -_vhold -_video_scroll_down -_video_scroll_up -_vinfo -_vinvalbuf -_virtual_space_end -_virtual_space_start -_vm_accellerate_zf_pageout_trigger _vm_allocate -_vm_allocate_cpm -_vm_allow_clustered_pagein -_vm_backing_store_disable -_vm_backing_store_low -_vm_behavior_set -_vm_conflict_check -_vm_copy -_vm_countdirtypages _vm_deallocate -_vm_default_ahead -_vm_default_behind -_vm_external_copy -_vm_external_create -_vm_external_destroy -_vm_external_map_size -_vm_external_module_initialize -_vm_external_state_clr -_vm_external_state_set -_vm_external_within -_vm_fault -_vm_fault_cleanup -_vm_fault_copy -_vm_fault_copy_cleanup -_vm_fault_copy_dst_cleanup -_vm_fault_debug -_vm_fault_init -_vm_fault_list_request -_vm_fault_page -_vm_fault_unwire -_vm_fault_wire -_vm_fault_wire_fast -_vm_free_page_pause -_vm_get_shared_region -_vm_inherit _vm_initial_limit_core _vm_initial_limit_data _vm_initial_limit_stack -_vm_last_addr -_vm_machine_attribute _vm_map -_vm_map_64 -_vm_map_aggressive_enter -_vm_map_aggressive_enter_max -_vm_map_behavior_set -_vm_map_check_protection -_vm_map_copy_copy -_vm_map_copy_discard -_vm_map_copy_overwrite -_vm_map_copy_overwrite_aligned -_vm_map_copy_overwrite_nested -_vm_map_copy_overwrite_unaligned -_vm_map_copy_zone -_vm_map_copyin_common -_vm_map_copyin_kernel_buffer -_vm_map_copyin_object -_vm_map_copyout -_vm_map_copyout_kernel_buffer -_vm_map_create _vm_map_deallocate -_vm_map_delete -_vm_map_destroy -_vm_map_enter -_vm_map_entry_delete -_vm_map_entry_insert -_vm_map_entry_zone -_vm_map_find_space -_vm_map_fork -_vm_map_fork_copy -_vm_map_fork_share -_vm_map_get_phys_page -_vm_map_get_upl -_vm_map_inherit -_vm_map_init -_vm_map_kentry_zone -_vm_map_lookup_entry -_vm_map_lookup_locked -_vm_map_machine_attribute -_vm_map_overwrite_submap_recurse -_vm_map_page_query -_vm_map_pmap_enter -_vm_map_pmap_enter_enable -_vm_map_pmap_enter_print -_vm_map_protect -_vm_map_range_check -_vm_map_read_user -_vm_map_reference -_vm_map_region_replace -_vm_map_remove -_vm_map_server -_vm_map_server_routine -_vm_map_simplify -_vm_map_steal_memory -_vm_map_submap -_vm_map_submap_pmap_clean -_vm_map_subsystem -_vm_map_switch _vm_map_unwire -_vm_map_unwire_nested -_vm_map_verify _vm_map_wire -_vm_map_wire_nested -_vm_map_write_user -_vm_map_zone -_vm_mapped_pages_info -_vm_mem_bootstrap -_vm_mem_init -_vm_msync -_vm_object_absent_max -_vm_object_destroy -_vm_object_enter -_vm_object_hash_entry_free -_vm_object_iopl_request -_vm_object_page_map -_vm_object_page_remove_iterate -_vm_object_page_remove_lookup -_vm_object_pager_create -_vm_object_populate_with_private -_vm_object_shadow_check -_vm_object_sync -_vm_object_terminate_remove_all -_vm_object_update -_vm_page_activate -_vm_page_active_count -_vm_page_alloc -_vm_page_alloc_lock -_vm_page_bootstrap -_vm_page_bucket_count -_vm_page_bucket_lock -_vm_page_buckets -_vm_page_convert -_vm_page_copy -_vm_page_create -_vm_page_deactivate -_vm_page_deactivate_behind -_vm_page_deactivate_hint -_vm_page_fictitious_addr -_vm_page_fictitious_count -_vm_page_free _vm_page_free_count -_vm_page_free_count_init -_vm_page_free_count_minimum -_vm_page_free_list -_vm_page_free_min -_vm_page_free_reserve -_vm_page_free_reserved -_vm_page_free_target -_vm_page_free_verify -_vm_page_free_wanted -_vm_page_gobble -_vm_page_gobble_count -_vm_page_gobble_count_warning -_vm_page_grab -_vm_page_grab_count -_vm_page_grab_fictitious -_vm_page_hash_mask -_vm_page_hash_shift -_vm_page_inactive_count -_vm_page_inactive_target -_vm_page_init -_vm_page_insert -_vm_page_laundry_count -_vm_page_laundry_max -_vm_page_laundry_min -_vm_page_limbo_count -_vm_page_limbo_real_count -_vm_page_lookup -_vm_page_mask -_vm_page_module_init -_vm_page_more_fictitious -_vm_page_pages -_vm_page_part_copy -_vm_page_part_zero_fill -_vm_page_pin_count -_vm_page_preppin_lock -_vm_page_queue_active -_vm_page_queue_fictitious -_vm_page_queue_free -_vm_page_queue_free_lock -_vm_page_queue_inactive -_vm_page_queue_limbo -_vm_page_queue_lock -_vm_page_queue_zf -_vm_page_release -_vm_page_release_fictitious -_vm_page_remove -_vm_page_rename -_vm_page_replace -_vm_page_shift -_vm_page_template -_vm_page_ticket -_vm_page_ticket_roll -_vm_page_unwire -_vm_page_wait -_vm_page_wire -_vm_page_wire_count -_vm_page_wire_count_warning -_vm_page_zero_fill -_vm_page_zero_fill_lock -_vm_page_zone -_vm_pageclean_copy -_vm_pageclean_setup -_vm_pagein_cluster_unused -_vm_pagein_cluster_used -_vm_pageout -_vm_pageout_active -_vm_pageout_burst_max -_vm_pageout_burst_min -_vm_pageout_burst_wait -_vm_pageout_clean_active_pages -_vm_pageout_cluster -_vm_pageout_cluster_page -_vm_pageout_continue -_vm_pageout_dirty_no_pager -_vm_pageout_emergency_availability_request -_vm_pageout_empty_wait -_vm_pageout_in_place -_vm_pageout_inactive -_vm_pageout_inactive_absent -_vm_pageout_inactive_avoid -_vm_pageout_inactive_busy -_vm_pageout_inactive_clean -_vm_pageout_inactive_dirty -_vm_pageout_inactive_forced -_vm_pageout_inactive_nolock -_vm_pageout_inactive_throttled -_vm_pageout_inactive_used -_vm_pageout_initialize_page -_vm_pageout_object_allocate -_vm_pageout_object_terminate -_vm_pageout_out_of_line -_vm_pageout_pause_count -_vm_pageout_pause_max -_vm_pageout_reserved_internal -_vm_pageout_reserved_really -_vm_pageout_scan -_vm_pageout_scan_active_emm_throttle -_vm_pageout_scan_active_emm_throttle_failure -_vm_pageout_scan_active_emm_throttle_success -_vm_pageout_scan_continue -_vm_pageout_scan_event_counter -_vm_pageout_scan_inactive_emm_throttle -_vm_pageout_scan_inactive_emm_throttle_failure -_vm_pageout_scan_inactive_emm_throttle_success -_vm_pageout_setup -_vm_pageout_throttle -_vm_pool_low _vm_protect -_vm_read -_vm_read_list -_vm_read_overwrite _vm_region -_vm_region_64 -_vm_region_clone -_vm_region_count_obj_refs -_vm_region_look_for_page _vm_region_object_create -_vm_region_recurse -_vm_region_recurse_64 -_vm_region_top_walk -_vm_region_walk -_vm_remap -_vm_remap_extract -_vm_remap_range_allocate -_vm_set_page_size -_vm_set_shared_region -_vm_stat -_vm_stat_discard -_vm_stat_discard_cleared_reply -_vm_stat_discard_cleared_too_late -_vm_stat_discard_cleared_unset -_vm_stat_discard_failure -_vm_stat_discard_sent -_vm_stat_discard_throttle -_vm_submap_object -_vm_sysctl -_vm_upl_map -_vm_upl_unmap -_vm_wire -_vm_write -_vm_zf_count -_vm_zf_iterator -_vm_zf_iterator_count -_vn_bwrite -_vn_close -_vn_default_error -_vn_lock -_vn_mkdir -_vn_open -_vn_rdwr -_vn_stat -_vn_symlink -_vn_table -_vn_writechk -_vndevice_init -_vndevice_root_image -_vnode_free_list -_vnode_free_list_slock -_vnode_inactive_list -_vnode_object_create -_vnode_objects_reclaimed -_vnode_pagein -_vnode_pageout -_vnode_pager_bootstrap -_vnode_pager_cluster_read -_vnode_pager_cluster_write -_vnode_pager_data_initialize -_vnode_pager_data_request -_vnode_pager_data_return -_vnode_pager_data_unlock -_vnode_pager_deallocate -_vnode_pager_get_filesize -_vnode_pager_get_object_size -_vnode_pager_init -_vnode_pager_lookup -_vnode_pager_reference -_vnode_pager_release_from_cache -_vnode_pager_setup -_vnode_pager_shutdown -_vnode_pager_synchronize -_vnode_pager_terminate -_vnode_pager_unmap -_vnode_pager_workaround -_vnode_pager_zone -_vnode_reclaim_tried -_vnodetarget -_vnops -_volfs_access -_volfs_fhtovp -_volfs_getattr -_volfs_init -_volfs_islocked -_volfs_load -_volfs_lock -_volfs_lookup -_volfs_mount -_volfs_pathconf -_volfs_quotactl -_volfs_readdir -_volfs_reclaim -_volfs_rmdir -_volfs_root -_volfs_select -_volfs_start -_volfs_statfs -_volfs_sync -_volfs_sysctl -_volfs_unlock -_volfs_unmount -_volfs_vfsops -_volfs_vget -_volfs_vnodeop_entries -_volfs_vnodeop_opv_desc -_volfs_vnodeop_p -_volfs_vptofh -_vop_abortop_desc -_vop_abortop_vp_offsets -_vop_access_desc -_vop_access_vp_offsets -_vop_advlock_desc -_vop_advlock_vp_offsets -_vop_allocate_desc -_vop_allocate_vp_offsets -_vop_blkatoff_desc -_vop_blkatoff_vp_offsets -_vop_blktooff_desc -_vop_blktooff_vp_offsets -_vop_bmap_desc -_vop_bmap_vp_offsets -_vop_bwrite_desc -_vop_bwrite_vp_offsets -_vop_cachedlookup_desc -_vop_cachedlookup_vp_offsets -_vop_close_desc -_vop_close_vp_offsets -_vop_cmap_desc -_vop_cmap_vp_offsets -_vop_copyfile_desc -_vop_copyfile_vp_offsets -_vop_create_desc -_vop_create_vp_offsets -_vop_default_desc -_vop_devblocksize_desc -_vop_devblocksize_vp_offsets -_vop_exchange_desc -_vop_exchange_vp_offsets -_vop_fsync_desc -_vop_fsync_vp_offsets -_vop_getattr_desc -_vop_getattr_vp_offsets -_vop_getattrlist_desc -_vop_getattrlist_vp_offsets -_vop_inactive_desc -_vop_inactive_vp_offsets -_vop_ioctl_desc -_vop_ioctl_vp_offsets -_vop_islocked_desc -_vop_islocked_vp_offsets -_vop_kqfilt_add_desc -_vop_kqfilt_add_vp_offsets -_vop_kqfilt_remove_desc -_vop_kqfilt_remove_vp_offsets -_vop_lease_desc -_vop_lease_vp_offsets -_vop_link_desc -_vop_link_vp_offsets -_vop_lock_desc -_vop_lock_vp_offsets -_vop_lookup_desc -_vop_lookup_vp_offsets -_vop_mkcomplex_desc -_vop_mkcomplex_vp_offsets -_vop_mkdir_desc -_vop_mkdir_vp_offsets -_vop_mknod_desc -_vop_mknod_vp_offsets -_vop_mmap_desc -_vop_mmap_vp_offsets -_vop_noislocked -_vop_nolock -_vop_nounlock -_vop_offtoblk_desc -_vop_offtoblk_vp_offsets -_vop_open_desc -_vop_open_vp_offsets -_vop_pagein_desc -_vop_pagein_vp_offsets -_vop_pageout_desc -_vop_pageout_vp_offsets -_vop_pathconf_desc -_vop_pathconf_vp_offsets -_vop_pgrd_desc -_vop_pgrd_vp_offsets -_vop_pgwr_desc -_vop_pgwr_vp_offsets -_vop_print_desc -_vop_print_vp_offsets -_vop_read_desc -_vop_read_vp_offsets -_vop_readdir_desc -_vop_readdir_vp_offsets -_vop_readdirattr_desc -_vop_readdirattr_vp_offsets -_vop_readlink_desc -_vop_readlink_vp_offsets -_vop_reallocblks_desc -_vop_reallocblks_vp_offsets -_vop_reclaim_desc -_vop_reclaim_vp_offsets -_vop_remove_desc -_vop_remove_vp_offsets -_vop_rename_desc -_vop_rename_vp_offsets -_vop_revoke -_vop_revoke_desc -_vop_revoke_vp_offsets -_vop_rmdir_desc -_vop_rmdir_vp_offsets -_vop_searchfs_desc -_vop_searchfs_vp_offsets -_vop_seek_desc -_vop_seek_vp_offsets -_vop_select_desc -_vop_select_vp_offsets -_vop_setattr_desc -_vop_setattr_vp_offsets -_vop_setattrlist_desc -_vop_setattrlist_vp_offsets -_vop_strategy_desc -_vop_strategy_vp_offsets -_vop_symlink_desc -_vop_symlink_vp_offsets -_vop_truncate_desc -_vop_truncate_vp_offsets -_vop_unlock_desc -_vop_unlock_vp_offsets -_vop_update_desc -_vop_update_vp_offsets -_vop_valloc_desc -_vop_valloc_vp_offsets -_vop_vfree_desc -_vop_vfree_vp_offsets -_vop_whiteout_desc -_vop_whiteout_vp_offsets -_vop_write_desc -_vop_write_vp_offsets -_vp_pagein -_vp_pgoclean -_vp_pgodirty -_vprint -_vproc_exit -_vput -_vpwakeup -_vrecycle -_vref -_vrele -_vs_alloc_async -_vs_alloc_async_count -_vs_alloc_async_failed -_vs_async_free_list -_vs_cl_write_complete -_vs_cluster_transfer -_vs_cluster_write -_vs_do_async_write -_vs_free_async -_vs_get_map_entry -_vs_object_create -_vslock _vsnprintf _vsprintf -_vstruct_def_clshift -_vstruct_list -_vstruct_zone -_vsunlock -_vttoif_tab -_vwakeup -_wait1 -_wait1continue -_wait4 -_wait_queue_alloc -_wait_queue_assert_wait -_wait_queue_assert_wait64 -_wait_queue_free -_wait_queue_init -_wait_queue_link -_wait_queue_link_noalloc -_wait_queue_link_size -_wait_queue_member -_wait_queue_pull_thread_locked -_wait_queue_set_alloc -_wait_queue_set_free -_wait_queue_set_init -_wait_queue_set_size -_wait_queue_set_unlink_all -_wait_queue_set_unlink_all_nofree -_wait_queue_sub_clearrefs -_wait_queue_sub_init -_wait_queue_unlink -_wait_queue_unlink_all -_wait_queue_unlink_one -_wait_queue_unlinkall_nofree -_wait_queue_wakeup64_all -_wait_queue_wakeup64_one -_wait_queue_wakeup64_thread -_wait_queue_wakeup_all -_wait_queue_wakeup_one -_wait_queue_wakeup_thread -_wait_queues -_wait_queues_init -_wait_shift -_wait_subqueue_unlink_all -_waitevent -_waittime -_wakeup -_wakeup_one -_walk_allvnodes -_walk_vnodes_debug -_watchevent -_wncpu -_write -_writev -_ws_disabled -_zError -_z_errmsg +_vsscanf _zalloc -_zalloc_async -_zalloc_canblock -_zalloc_end_of_space -_zalloc_next_space -_zalloc_noblock -_zalloc_wasted_space -_zcram -_zdata -_zdata_size -_zeroin6_addr -_zeroin_addr -_zfill _zfree -_zget -_zget_space -_zget_space_lock _zinit -_zlibVersion -_zombproc -_zone_bootstrap -_zone_change -_zone_check -_zone_free_count -_zone_gc -_zone_gc_allowed -_zone_gc_forced -_zone_gc_last_tick -_zone_gc_lock -_zone_gc_max_rate -_zone_init -_zone_map -_zone_map_max_address -_zone_map_min_address -_zone_page_alloc -_zone_page_collectable -_zone_page_init -_zone_page_keep -_zone_page_table -_zone_pages -_zone_steal_memory -_zone_zone -_zprealloc diff --git a/config/System6.0.i386.exports b/config/System6.0.i386.exports index 154f6ec5a..5b8690450 100644 --- a/config/System6.0.i386.exports +++ b/config/System6.0.i386.exports @@ -3,10 +3,6 @@ _Load_context _PE_incoming_interrupt _PE_install_interrupt_handler _PE_interrupt_handler -_PE_platform_interrupt_initialize -_RtcAlrm -_RtcDelt -_RtcTime _Thread_continue __ZN15AppleIntelClock10gMetaClassE __ZN15AppleIntelClock10superClassE @@ -34,7 +30,6 @@ __fnstsw __fprestore __fpsave __fstcw -__kick_buffer_ __mp_disable_preemption __mp_enable_preemption __mp_enable_preemption_no_check @@ -43,17 +38,14 @@ _a_dbl_fault _a_fpu_over _a_inv_tss _acc_type +_acpi_install_wake_handler +_acpi_sleep_kernel _act_machine_return -_act_machine_switch_pcb -_active_kloaded -_active_stacks _all_intrs _alltraps _avail_end -_avail_next _avail_start _bbc_config -_bbc_getattr _bbc_gettime _bbc_settime _bcopy16 @@ -62,33 +54,19 @@ _bit_lock _bit_lock_try _bit_unlock _blkclr -_bmapmap -_bmapmapr -_bmapvideo _boot_args_start -_buffer_map _check_io_fault _clear_kdb_intr _cli_count -_clknum -_clks_per_int -_clks_per_int_99 -_cnclose -_cnioctl -_cnopen -_cnread -_cnselect -_cnvmem -_cnwrite _collect_ref _collect_unref _copyp2p -_cpu_data +_copypv +_cpu_idle_handler _cpu_interrupt +_cpu_number _cpu_shutdown _cpu_to_lapic -_cpu_update_list -_cpu_update_needed _cpu_vendors _cpudata_desc_pattern _cpuid_cpu_display @@ -99,14 +77,12 @@ _cpuid_features _cpuid_get_feature_names _cpuid_get_info _cpuid_info -_cpuid_intel_get_model_name _cpus_active _cpus_idle _createdt _dectohexdec _dev_indirect_count _dev_indirect_list -_display_syscall _div_scale _dr0 _dr1 @@ -118,11 +94,9 @@ _dump_act _dump_regs _eintstack _emulate_io -_extmem _fakePPCBootArgs _fakePPCDeviceTree _fc_get -_first_addr _fix_desc _flush_tlb _fp_free @@ -153,10 +127,7 @@ _get_cr4 _get_ldt _get_pc _get_tr -_hardclock _hexdectodec -_hole_end -_hole_start _htonl _htons _i386_astintr @@ -177,12 +148,8 @@ _insb _insl _inst_fetch _insw -_int_stack_high -_int_stack_top _intel_read_fault _intel_startCPU -_interrupt_stack -_interrupt_stack_alloc _inuse_ptepages_count _iopb_destroy _iopb_init @@ -197,23 +164,10 @@ _kdp_i386_trap _kdp_setstate _kdreboot _kernel_preempt_check -_kernel_stack _kernel_trap -_kgdb_stack_store -_kpde _ktss -_lapic_cpu_map -_lapic_dump _lapic_end_of_interrupt -_lapic_esr_clear -_lapic_esr_read -_lapic_id -_lapic_id_initdata -_lapic_init -_lapic_interrupt -_lapic_start -_lapic_test -_lapic_to_cpu +_lapic_smm_restore _last_addr _ldt _ldt_desc_pattern @@ -229,31 +183,27 @@ _mach_rpc _machdep_call_count _machdep_call_table _machdep_syscall -_machine_kernel_stack_init +_master_cpu _master_is_up +_master_processor _master_up _minsecurity +_ml_get_max_cpus _mp_boot_pde -_mp_desc_init -_mp_desc_table -_mp_gdt -_mp_idt _mp_kdp_enter _mp_kdp_exit _mp_kdp_lock _mp_kdp_ncpus _mp_kdp_trap -_mp_ktss -_mp_ldt +_mtrr_range_add +_mtrr_range_remove _mul_scale -_new_clknum _nptr _ntohl _ntohs _outsb _outsl _outsw -_pagemove _panic_trap _phys_attribute_clear _phys_attribute_set @@ -269,7 +219,6 @@ _pmap_debug _pmap_expand _pmap_map_bd _pmap_movepage -_pmap_object _pmap_phys_attributes _pmap_pte _pmap_remove_range @@ -277,17 +226,14 @@ _pmap_set_modify _pmap_system_lock _pmap_update_interrupt _pmap_valid_page -_preemptable _printdt _process_pmap_updates _pstart -_ptes_per_vm_page _pv_free_list _pv_free_list_lock _pv_head_table _pv_list_zone _pv_lock_table -_real_pmap _real_to_prot _recover_table _recover_table_end @@ -296,13 +242,9 @@ _reset_mem_on_reboot _retry_table _retry_table_end _return_to_iret +_rtc_clock_stepped +_rtc_clock_stepping _rtc_cyc_per_sec -_rtc_intr_count -_rtc_intr_freq -_rtc_intr_hertz -_rtc_print_lost_tick -_rtc_quant_scale -_rtc_setvals _rtcget _rtclock _rtcput @@ -321,7 +263,6 @@ _signal_cpus _slave_boot_base _slave_boot_end _slave_boot_init -_slave_clock _slave_pstart _slave_start _smp_init @@ -329,26 +270,6 @@ _smp_initialized _start_lock _startprog _sti_count -_syscall -_syscall_failed -_syscall_int80 -_sysclk_gettime_internal -_sysclk_gettime_interrupts_disabled -_sysclk_setattr -_sysctl__machdep_cpu -_sysctl__machdep_cpu_brand -_sysctl__machdep_cpu_brand_string -_sysctl__machdep_cpu_children -_sysctl__machdep_cpu_extfamily -_sysctl__machdep_cpu_extmodel -_sysctl__machdep_cpu_family -_sysctl__machdep_cpu_feature_bits -_sysctl__machdep_cpu_features -_sysctl__machdep_cpu_model -_sysctl__machdep_cpu_signature -_sysctl__machdep_cpu_stepping -_sysctl__machdep_cpu_value -_sysctl__machdep_cpu_vendor _t_bounds _t_debug _t_fpu_err @@ -387,12 +308,11 @@ _tc_scroll_down _tc_scroll_up _tc_show_cursor _tc_update_color +_thread_bind _thread_compose_cthread_desc _thread_fast_set_cthread_self _thread_get_cthread_self _thread_set_cthread_self -_thread_swapin_mach_alloc -_time_per_clk _trap_mach25_syscall _trap_machdep_syscall _trap_unix_syscall diff --git a/config/System6.0.ppc.exports b/config/System6.0.ppc.exports index cc18cea96..9aeba05d6 100644 --- a/config/System6.0.ppc.exports +++ b/config/System6.0.ppc.exports @@ -1,181 +1,11 @@ -Choke -ClearRealCall -CreateFakeDECCall -CreateFakeIOCall -CreateShutdownCTXCall -CutTrace -DoPreemptCall -LoadDBATsCall -LoadIBATsCall -NullCall -StoreRealCall -SwitchContextCall -_AARPwakeup -_ASPgetmsg -_ASPputmsg -_ATPgetreq -_ATPgetrsp -_ATPsndreq -_ATPsndrsp -_ATgetmsg -_ATputmsg -_ATsocket -_AURPaccess -_AURPcleanup -_AURPcmdx -_AURPfreemsg -_AURPgetmsg -_AURPgetri -_AURPinit -_AURPiocack -_AURPiocnak -_AURPpurgeri -_AURPrcvOpenReq -_AURPrcvOpenRsp -_AURPrcvRDReq -_AURPrcvRIAck -_AURPrcvRIReq -_AURPrcvRIRsp -_AURPrcvRIUpd -_AURPrcvTickle -_AURPrcvTickleAck -_AURPrcvZReq -_AURPrcvZRsp -_AURPrtupdate -_AURPsend -_AURPsetri -_AURPshutdown -_AURPsndGDZL -_AURPsndGZN -_AURPsndOpenReq -_AURPsndOpenReq_funnel -_AURPsndRDReq -_AURPsndRIAck -_AURPsndRIReq -_AURPsndRIReq_funnel -_AURPsndRIRsp_funnel -_AURPsndRIUpd -_AURPsndRIUpd_funnel -_AURPsndTickle -_AURPsndZReq -_AURPsndZRsp -_AURPupdate -_AURPupdateri -_AbortIO -_AdspBad -_AlignAssist -_AlignAssist64 -_AltivecAssist -_CalcRecvWdw -_CalcSendQFree _CallTVector -_Call_Debugger -_Call_DebuggerC -_Call_continuation -_CheckAttn -_CheckOkToClose -_CheckReadQueue -_CheckRecvSeq -_CheckSend -_ChokeSys -_CleanupGlobals -_ClearReal -_ClearRealLL -_CompleteQueue -_CreateFakeDEC -_CreateFakeDECLL -_CreateFakeIO -_CreateFakeIOLL -_CreateShutdownCTX -_CreateShutdownCTXLL -_DDP_chksum_on -_DDP_slfsnd_on -_DebugWork -_DoChokeLL -_DoClose -_DoPreemptLL -_DoTimerElem -_EmulExit -_Emulate -_Emulate64 -_ErrorRTMPoverflow -_ErrorZIPoverflow -_ExceptionVectorsEnd -_ExceptionVectorsStart -_FCReturn -_FWtable -_FillSendQueue -_FindSender -_FirmwareCall -_FixedStackEnd -_FixedStackStart -_FloatInit -_GratefulDebInit -_GratefulDebWork -_InitGlobals -_InsertTimerElem -_LLTraceSet -_LoadDBATs -_LoadIBATs -_MapUserAddressSpace -_MapUserAddressSpaceInit -_NMIss -_NextCID -_NotifyUser -_NullLL _PE_Determine_Clock_Speeds _PE_find_scc _PE_init_taproot _PE_read_write_time_of_day _PE_write_IIC -_PFSExit _PPCcalls -_QNaNbarbarian -_RT_maxentry -_RT_table -_RT_table_freelist -_RT_table_start -_RXAttention -_RXData -_RXFReset -_RXFResetAck -_ReadReal -_ReleaseUserAddressSpace -_RemoveCCB -_RemoveTimerElem _ResetHandler -_RouterError -_RouterMix -_RuptCtrs -_RxClose -_SndMsgUp -_StoreReal -_StoreRealLL -_SwitchContextLL -_SysChoked -_TimerQueueTick -_TimerStop -_TimerTick -_TimerTick_funnel -_TrashSession -_UrgentUser -_ZIPwakeup -_ZT_maxentry -_ZT_table -__ATPgetreq -__ATPgetrsp -__ATPsndreq -__ATPsndrsp -__ATclose -__ATgetmsg -__ATioctl -__ATkqfilter -__ATputmsg -__ATread -__ATrw -__ATselect -__ATsocket -__ATwrite __Z11IODBDMAStopPV23IODBDMAChannelRegisters __Z12IODBDMAFlushPV23IODBDMAChannelRegisters __Z12IODBDMAPausePV23IODBDMAChannelRegisters @@ -354,904 +184,27 @@ __ZTVN19IODBDMAMemoryCursor9MetaClassE __ZTVN8AppleCPU9MetaClassE __ZTVN8AppleNMI9MetaClassE __eSynchronizeIO -__start_cpu -_aaFPopTable -_aarp_chk_addr -_aarp_init1 -_aarp_init2 -_aarp_rcv_pkt -_aarp_sched_probe -_aarp_send_data -_aarp_table _abs -_add_ddp_handler -_adspAllocateCCB -_adspAssignSocket -_adspAttention -_adspCLDeny -_adspCLListen -_adspClose -_adspDeassignSocket -_adspGlobal -_adspInit -_adspInited -_adspMode -_adspNewCID -_adspOpen -_adspOptions -_adspPacket -_adspRead -_adspReadAttention -_adspReadHandler -_adspRelease -_adspReset -_adspStatus -_adspWrite -_adspWriteHandler -_adsp_close -_adsp_dequeue_ccb -_adsp_input -_adsp_inputC -_adsp_inputQ -_adsp_open -_adsp_pidM -_adsp_readable -_adsp_rput -_adsp_sendddp -_adsp_window -_adsp_wput -_adsp_writeable -_adspall_lock -_adspgen_lock -_adspioc_ack -_adsptmr_lock -_append_copy -_appletalk_hack_start -_appletalk_inited -_arpinp_lock -_asp_ack_reply -_asp_clock -_asp_clock_funnel -_asp_close -_asp_init -_asp_inpC -_asp_nak_reply -_asp_open -_asp_pack_bdsp -_asp_readable -_asp_scbQ -_asp_wput -_aspall_lock -_asptmo_lock -_at_control -_at_ddp_brt -_at_ddp_stats -_at_ifQueueHd -_at_insert -_at_interfaces -_at_ioctl -_at_memzone_init -_at_pcballoc -_at_pcbbind -_at_pcbdetach -_at_reg_mcast -_at_state -_at_unreg_mcast -_atalk_closeref -_atalk_enablew -_atalk_flush -_atalk_getref -_atalk_gettrace -_atalk_load -_atalk_notify -_atalk_notify_sel -_atalk_openref -_atalk_peek -_atalk_post_msg -_atalk_putnext -_atalk_settrace -_atalk_to_ip -_atalk_unload -_atalkdomain -_atalkintr -_atalkintrq -_atalksw -_atomic_switch_syscall -_atomic_switch_trap -_atp_bind -_atp_build_release -_atp_cancel_req -_atp_close -_atp_delete_free_clusters -_atp_dequeue_atp -_atp_drop_req -_atp_free -_atp_free_cluster_list -_atp_free_cluster_timeout_set -_atp_free_list -_atp_init -_atp_inited -_atp_input -_atp_inputQ -_atp_iocack -_atp_iocnak -_atp_link -_atp_lomask -_atp_mask -_atp_need_rel -_atp_open -_atp_pidM -_atp_rcb_alloc -_atp_rcb_data -_atp_rcb_free -_atp_rcb_free_list -_atp_rcb_timer -_atp_reply -_atp_req_ind -_atp_req_timeout -_atp_resource_m -_atp_retry_req -_atp_rput -_atp_rsp_ind -_atp_send -_atp_send_replies -_atp_send_req -_atp_send_rsp -_atp_state_data -_atp_tid -_atp_timout -_atp_trans_abort -_atp_trans_alloc -_atp_trans_free -_atp_trans_free_list -_atp_treq_event -_atp_trp_clock -_atp_trp_clock_funnel -_atp_unlink -_atp_untimout -_atp_used_list -_atp_wput -_atp_x_done -_atp_x_done_funnel -_atpall_lock -_atpcb_zone -_atpgen_lock -_atptmo_lock -_attachData -_aurp_close -_aurp_global -_aurp_gref -_aurp_ifID -_aurp_open -_aurp_state -_aurp_wakeup -_aurp_wput -_aurpd_start -_aurpgen_lock -_backchain -_backpocket -_bbSetRupt -_bb_disable_bluebox -_bb_enable_bluebox -_bb_settaskenv -_bcopy_64 -_bcopy_970 -_bcopy_g3 -_bcopy_g4 _bcopy_nc -_bcopy_physvir -_boot_args_buf -_bzero_128 -_bzero_32 _bzero_nc _cacheDisable _cacheInit -_calcRecvQ -_calcSendQ -_cbfpend -_cbfr -_ccb_used_list -_chandler -_checkBogus -_checkNMI -_clock_delay_until -_clock_gettimeofday -_cnputcusr -_cntlzw -_commPagePtr -_commpage_flush_dcache -_commpage_flush_icache -_commpage_set_timestamp -_commpage_time_dcba -_completepb -_condStop -_cons_getc -_cons_ops -_cons_ops_index -_cons_putc -_consclose -_consider_mapping_adjust -_consioctl -_console_chan_default -_console_is_serial -_console_unit -_consopen -_consread -_consselect -_conswrite -_copy_pkt -_copyin_multiple -_copyout_multiple -_cpu_doshutdown -_cpu_signal -_cpu_sync_timebase -_cpus_holding_bkpts -_current_free_region -_cursor_pmap -_db_breakpoints_inserted -_db_im_stepping -_db_recover -_db_run_mode -_dbfloats -_dbgBits -_dbgCkpt -_dbgCkptLL -_dbgDisp -_dbgDispLL -_dbgRegsLL -_dbgTrace -_dbspecrs -_dbvecs -_ddp_AURPfuncx -_ddp_AURPsendx -_ddp_add_if -_ddp_adjmsg -_ddp_age_router -_ddp_bit_reverse -_ddp_brt_init -_ddp_brt_shutdown -_ddp_brt_sweep -_ddp_brt_sweep_funnel -_ddp_brt_sweep_timer -_ddp_checksum -_ddp_compress_msg -_ddp_ctloutput -_ddp_glean -_ddp_growmsg -_ddp_handler -_ddp_head -_ddp_init -_ddp_input -_ddp_notify_nbp -_ddp_output -_ddp_pru_abort -_ddp_pru_attach -_ddp_pru_bind -_ddp_pru_connect -_ddp_pru_control -_ddp_pru_detach -_ddp_pru_disconnect -_ddp_pru_peeraddr -_ddp_pru_send -_ddp_pru_shutdown -_ddp_pru_sockaddr -_ddp_putmsg -_ddp_recvspace -_ddp_rem_if -_ddp_router_output -_ddp_sendspace -_ddp_shutdown -_ddp_slowtimo -_ddp_socket_inuse -_ddp_start -_ddp_usrreqs -_ddpall_lock -_ddpinp_lock -_debcnputc -_debsave0 -_debstack -_debstack_top_ss -_debstackptr -_debugNoop -_debugbackpocket -_debugger_active -_debugger_cpu -_debugger_debug -_debugger_holdoff -_debugger_is_slave -_debugger_lock -_debugger_pending -_debugger_sync _delay_for_interval -_dgVideo -_dgWork -_diagCall -_diagTrap -_disable_bluebox_internal -_doexception -_dst_addr_cnt -_dump_backtrace -_dump_savearea -_elap_dataput -_elap_offline -_elap_online3 -_elap_wput -_enter_funnel_section -_env_buf -_ep_input -_errstr -_et_zeroaddr -_etalk_multicast_addr -_exception_end -_exception_entry -_exception_exit -_exit_funnel_section -_extPatch32 -_extPatchMCK -_failNames -_fastexit -_fctx_test -_find_ifID -_find_user_fpu -_find_user_regs -_find_user_vec -_first_free_virt -_forUs -_fpu_save -_fpu_switch -_free_mappings -_free_pmap_count -_free_pmap_list -_free_pmap_lock -_free_pmap_max -_fwSCCinit _gGetDefaultBusSpeedsKey -_gbuf_alloc_wait -_gbuf_freel -_gbuf_linkb -_gbuf_linkpkt -_gbuf_msgsize -_gbuf_strip -_getAarp -_getAarpTableSize -_getIfUsage -_getLocalZone -_getNbpTable -_getNbpTableSize -_getPhysAddrSize -_getRTRLocalZone -_getRtmpTable -_getRtmpTableSize -_getSPLocalZone -_getZipTable -_getZipTableSize -_get_got _get_io_base_addr -_get_msr_exportmask -_get_msr_nbits -_get_msr_rbits _get_preemption_level -_get_simple_lock_count -_getchar -_getrpc -_gets -_gettimeofday_32 -_gettimeofday_64 -_gref_alloc -_gref_close -_gref_wput -_handleDSeg -_handleISeg -_handlePF -_hash_table_base -_hash_table_size -_hid0get64 -_hw_add_map -_hw_blow_seg -_hw_cpu_sync -_hw_cpu_wcng -_hw_dequeue_atomic -_hw_find_map -_hw_find_space -_hw_hash_init -_hw_lock_bit -_hw_lock_mbits -_hw_map_seg -_hw_perfmon_lock -_hw_protect -_hw_purge_map -_hw_purge_phys -_hw_purge_space -_hw_queue_atomic -_hw_queue_atomic_list -_hw_rem_map -_hw_set_user_space -_hw_set_user_space_dis -_hw_setup_trans -_hw_start_trans -_hw_test_rc -_hw_unlock_bit -_hw_walk_phys -_hwulckPatch_eieio -_hwulckPatch_isync -_hwulckbPatch_eieio -_hwulckbPatch_isync -_iNullLL -_ifID_home -_ifID_table _ignore_zero_fault -_ihandler -_ihandler_ret -_incrVSID -_init_ddp_handler -_initialize_serial -_interrupt -_interrupt_disable -_interrupt_enable -_intstack_top_ss -_invalidateSegs -_invalidate_dcache -_invalidate_dcache64 -_invxcption -_ioc_ack -_isync_mfdec -_kdb_trap -_kdp_backtrace -_kdp_copy_phys -_kdp_dabr -_kdp_noisy -_kdp_pmap -_kdp_print_backtrace -_kdp_print_registers -_kdp_sr_dump -_kdp_trans_off -_kdp_trap -_kdp_trap_codes -_kdp_vtophys -_kernel_args_buf -_kernel_pmap_phys _killprint _kprintf_lock -_lap_online -_lastTrace -_lock_debugger -_lowGlo -_m_clattach -_m_lgbuf_alloc -_m_lgbuf_free -_mach_absolute_time_32 -_mach_absolute_time_64 -_machine_act_terminate -_machine_clock_assist -_machine_conf -_machine_idle_ppc -_machine_idle_ret -_mapCtl -_mapInsert -_mapLog -_mapRemove -_mapSearch -_mapSearchFull -_mapSetLists -_mapSetUp -_mapSkipListVerify -_mapSkipListVerifyC -_mapalc1 -_mapalc2 -_mapdebug -_mapping_adjust -_mapping_adjust_call -_mapping_alloc -_mapping_clr_mod -_mapping_clr_ref -_mapping_drop_busy -_mapping_fake_zone_info -_mapping_find -_mapping_free -_mapping_free_init -_mapping_free_prime -_mapping_init -_mapping_make -_mapping_map -_mapping_p2v -_mapping_phys_lookup -_mapping_phys_unused _mapping_prealloc -_mapping_protect -_mapping_protect_phys _mapping_relpre -_mapping_remove -_mapping_set_ref -_mapping_tst_mod -_mapping_tst_ref -_mapping_verify -_mappingdeb0 -_mappingdeb1 -_max_cpus_initialized -_mem_actual -_mfdar -_mflr -_mfmmcr0 -_mfmmcr1 -_mfmmcr2 -_mfmsr -_mfpmc1 -_mfpmc2 -_mfpmc3 -_mfpmc4 -_mfpvr -_mfrtcl -_mfrtcu -_mfsda -_mfsia -_mfsrin -_mftb -_mftbu _ml_enable_cache_level _ml_enable_nap _ml_ppc_sleep -_ml_probe_read_mck -_ml_probe_read_mck_64 -_ml_read_temp -_ml_restore -_ml_sense_nmi -_ml_set_physical -_ml_set_physical_disabled -_ml_set_physical_get_ffs _ml_set_processor_speed _ml_set_processor_voltage -_ml_set_translation_off -_ml_thrm_init -_ml_thrm_set _ml_throttle _ml_mem_backoff -_mtdar -_mtdec -_mtmmcr0 -_mtmmcr1 -_mtmmcr2 -_mtmsr -_mtpmc1 -_mtpmc2 -_mtpmc3 -_mtpmc4 -_mtsdr1 -_mtsrin -_mulckPatch_eieio -_mulckPatch_isync -_mutex_unlock_rwcmb -_name_registry -_nbp_add_multicast -_nbp_delete_entry -_nbp_fillin_nve -_nbp_find_nve -_nbp_input -_nbp_mh_reg -_nbp_new_nve_entry -_nbp_shutdown -_nbp_strhash -_net_access -_net_access_cnt -_net_export -_net_port -_no_of_nets_tried -_no_of_nodes_tried -_nve_lock -_ot_ddp_check_socket -_pat_output -_patch_table -_pbtcnt -_pbtcpu -_pbtlock _pe_do_clock_test _pe_run_clock_test -_per_proc_info -_perfIntHook -_perfTrapHook -_perfmon_acquire_facility -_perfmon_clear_counters -_perfmon_control -_perfmon_disable -_perfmon_enable -_perfmon_handle_pmi -_perfmon_init -_perfmon_read_counters -_perfmon_release_facility -_perfmon_set_event -_perfmon_set_event_func -_perfmon_set_tbsel -_perfmon_set_threshold -_perfmon_start_counters -_perfmon_stop_counters -_perfmon_write_counters -_phys_copy -_phys_table -_phystokv -_pktsDropped -_pktsHome -_pktsIn -_pktsOut -_pmapTrans -_pmap_activate -_pmap_add_physical_memory -_pmap_attribute -_pmap_attribute_cache_sync -_pmap_boot_map -_pmap_canExecute -_pmap_deactivate -_pmap_find_physentry -_pmap_map_block -_pmap_map_block_rc -_pmap_mem_regions -_pmap_mem_regions_count -_pmap_nest -_pmap_switch -_pmap_unnest -_powermac_scc_get_datum -_powermac_scc_set_datum -_ppcNull -_ppcNullinst -_ppc_checkthreadstate -_ppc_gettimeofday -_ppc_init -_ppc_init_cpu -_ppc_max_adrsp -_ppc_max_pmaps -_ppc_usimple_lock -_ppc_usimple_lock_init -_ppc_usimple_lock_try -_ppc_usimple_unlock_rwcmb -_ppc_usimple_unlock_rwmb -_ppc_vm_cpu_init -_ppc_vm_init -_ppcscret -_pper_proc_info -_prep_ZIP_reply_packet -_print_backtrace -_probe_cb -_pthread_getspecific_sprg3 -_pthread_getspecific_uftrap -_pthread_self_sprg3 -_pthread_self_uftrap -_qAddToEnd -_qfind_m -_rcv_connection_id -_reboot_how -_refall_lock -_regDefaultZone -_releaseData -_resetPOR -_resethandler_target -_retFromVM -_routerStart -_router_added -_router_killed -_routershutdown -_routing_needed -_rt_bdelete -_rt_binsert -_rt_blookup -_rt_delete -_rt_getNextRoute -_rt_insert -_rt_show -_rt_sortedshow -_rt_table_init -_rtclock_decrementer_min -_rtmp_dropper -_rtmp_init -_rtmp_input -_rtmp_prep_new_packet -_rtmp_purge -_rtmp_r_find_bridge -_rtmp_router_input -_rtmp_router_start -_rtmp_send_port -_rtmp_send_port_funnel -_rtmp_shutdown -_rtmp_timeout -_save_adjust -_save_alloc -_save_cpv -_save_fake_zone_info -_save_get -_save_get_init -_save_get_phys_32 -_save_get_phys_64 -_save_queue -_save_recover -_save_release -_save_ret -_save_ret_phys -_save_ret_wMSR -_save_trim_free -_saveanchor -_savearea_init -_scb_free_list -_scb_resource_m -_scb_used_list _scc -_scc_funnel_initted -_scc_getc -_scc_param -_scc_parm_done -_scc_probe -_scc_putc -_scc_softc -_scc_std -_scc_stomp -_scc_tty -_scc_uses_modem_control -_sconowner -_sectKLDB -_sectSizeKLD -_serial_initted -_serial_keyboard_init -_serial_keyboard_poll -_serial_keyboard_start -_serialmode -_setLocalZones -_setPmon -_set_machine_current_act -_sethzonehash -_shadow_BAT -_shandler -_sharedPage -_sharedPmap -_sip_input -_snmpFlags -_snmpStats -_spinlock_32_lock_mp -_spinlock_32_lock_up -_spinlock_32_try_mp -_spinlock_32_try_up -_spinlock_32_unlock_mp -_spinlock_32_unlock_up -_spinlock_64_lock_mp -_spinlock_64_lock_up -_spinlock_64_try_mp -_spinlock_64_try_up -_spinlock_64_unlock_mp -_spinlock_64_unlock_up -_stFloat -_stSpecrs -_stVectors -_static_memory_end -_sulckPatch_eieio -_sulckPatch_isync -_switchIntoVM -_switchSegs -_switch_in -_switch_to_old_console -_switch_to_video_console -_syncClkSpot -_sync_cache -_sync_cache64 -_sync_cache_virtual -_sync_ppage -_sys_ATPgetreq -_sys_ATPgetrsp -_sys_ATPsndreq -_sys_ATPsndrsp -_sys_ATgetmsg -_sys_ATputmsg -_sys_ATsocket -_syscall_error -_syscall_notify_interrupt -_syscall_trace -_syscall_trace_end -_sysctl__net_appletalk -_sysctl__net_appletalk_children -_sysctl__net_appletalk_ddpstats -_sysctl__net_appletalk_debug -_sysctl__net_appletalk_routermix -_taproot_addr -_taproot_size -_testPerfTrap -_thandler -_thread_adjuserstack -_thread_enable_fpe -_thread_setentrypoint -_thread_setuserstack -_tlbie -_toss_live_fpu -_toss_live_vec -_trackrouter -_trackrouter_rem_if -_trap -_trcWork -_trp_tmo_rcb -_tstbit -_ttalk_multicast_addr -_unlock_debugger -_update_tmo -_upshift8 -_uwritec -_vcgetc -_vec_save -_vec_switch -_vm_max_address -_vm_max_physical -_vmm_dispatch -_vmm_dispatch_table -_vmm_execute_vm -_vmm_exit -_vmm_fam_exc -_vmm_fam_pf -_vmm_fam_reserved -_vmm_force_exit -_vmm_get_XA -_vmm_get_adsp -_vmm_get_entry -_vmm_get_features -_vmm_get_features_sel -_vmm_get_float_state -_vmm_get_page_dirty_flag -_vmm_get_page_dirty_flag32 -_vmm_get_page_mapping -_vmm_get_page_mapping32 -_vmm_get_timer -_vmm_get_vector_state -_vmm_get_version -_vmm_get_version_sel -_vmm_init_context -_vmm_init_context_sel -_vmm_interrupt -_vmm_map_execute -_vmm_map_execute32 -_vmm_map_list -_vmm_map_list32 -_vmm_map_list64 -_vmm_map_page -_vmm_map_page32 -_vmm_max_addr -_vmm_protect_execute -_vmm_protect_execute32 -_vmm_protect_page -_vmm_protect_page32 -_vmm_set_XA -_vmm_set_timer -_vmm_stop_vm -_vmm_tear_down_all -_vmm_tear_down_context -_vmm_timer_pop -_vmm_ufp -_vmm_unmap_all_pages -_vmm_unmap_list -_vmm_unmap_page -_vmm_unmap_page32 -_xLoadDBATsLL -_xLoadIBATsLL -_xpatcnt -_xsum_assym -_zip_control -_zip_handle_getmyzone -_zip_prep_query_packet -_zip_reply_received -_zip_reply_to_getlocalzones -_zip_reply_to_getzonelist -_zip_router_input -_zip_sched_getnetinfo -_zip_send_queries -_zip_type_packet -_zonename_equal -_zt_add_zone -_zt_add_zonename -_zt_clr_zmap -_zt_compute_hash -_zt_ent_zcount -_zt_ent_zindex -_zt_find_zname -_zt_getNextZone -_zt_get_zmcast -_zt_remove_zones -_zt_set_zmap -_zt_upper_zname -dbgCkptCall -dbgDispCall -dbgRegsCall -debstash -fwdisplock -hexTab -hexfont -iNullCall diff --git a/config/Unsupported.exports b/config/Unsupported.exports new file mode 100644 index 000000000..816e4e058 --- /dev/null +++ b/config/Unsupported.exports @@ -0,0 +1,268 @@ +_Debugger +_FastUnicodeCompare +_KUNCExecute +_KUNCGetNotificationID +_KUNCUserNotificationDisplayAlert +_KUNCUserNotificationDisplayFromBundle +_KUNCUserNotificationDisplayNotice +_MD5Final +_MD5Init +_MD5Update +__ZN16IOPlatformDevice10gMetaClassE +__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev +__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev +__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev +__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev +__ZN16IOPlatformDeviceC2EPK11OSMetaClass +__ZN16IOPlatformDevice9metaClassE +__ZN16IOPlatformDeviceD2Ev +__ZN18IODTPlatformExpert9metaClassE +__ZTV16IOPlatformDevice +__doprnt +_aes_decrypt_cbc +_aes_decrypt_key128 +_aes_encrypt_cbc +_aes_encrypt_key128 +_appleClut8 +_b_to_q +_bdevsw +_boot +_bpf_mtap +_cdevsw +_clalloc +_clfree +_clock_get_system_value +_cons_cinput +_conslog_putc +_console_user +_convert_port_entry_to_map +_convert_port_entry_to_object +_current_act +_delay +_delay_for_interval +_des_ecb_encrypt +_des_set_key +_dlil_attach_protocol +_dlil_dereg_if_modules +_dlil_dereg_proto_module +_dlil_detach_protocol +_dlil_if_acquire +_dlil_if_attach +_dlil_if_detach +_dlil_if_release +_dlil_input +_dlil_output +_dlil_reg_if_modules +_dlil_reg_proto_module +_domains +_gIODTSharedInterrupts +_gPESerialBaud +_get_aiotask +_get_bsdtask_info +_get_task_map +_getsectdatafromheader +_hfs_getconverter +_hfs_pickencoding +_hfs_relconverter +_hz +_ifbyfamily +_ifunit +_in6_cksum +_in_broadcast +_in_cksum +_in_ifaddrhead +_in_pcb_get_owner +_in_pcb_grab_port +_in_pcb_letgo_port +_in_pcb_new_share_client +_in_pcb_rem_share_client +_inet_domain_mutex +_inflate +_inflateEnd +_inflateInit_ +_ip_mutex +_ip_output +_ip_protox +_ipc_port_release_send +_ipflow_fastforward +_kalloc +_kauth_guid_equal +_kdp_register_send_receive +_kdp_set_interface +_kdp_unregister_send_receive +_kernel_map +_kernel_pmap +_kernel_thread +_kev_post_msg +_kfree +_kmem_alloc +_kmem_free +_kmod +_kmod_create_fake +_kmod_create_fake_with_address +_kmod_destroy_fake +_kmod_lock +_kmod_lookupbyname +_kmputc +_lbolt +_lck_mtx_assert +_lck_mtx_try_lock +_lck_rw_done +_linesw +_lo_ifp +_log +_logwakeup +_loif +_m_adj +_m_cat +_m_copydata +_m_copym +_m_free +_m_freem +_m_get +_m_gethdr +_m_getpacket +_m_getpackets +_m_mclget +_m_mtod +_m_prepend_2 +_m_pullup +_m_split +_m_trailingspace +_mach_make_memory_entry_64 +_max_mem +_mcl_to_paddr +_mem_size +_memory_object_page_op +_ml_io_map +_ml_phys_read +_ml_phys_write +_ml_probe_read +_ml_processor_register +_ml_thread_policy +_mountroot_post_hook +_msleep1 +_nd6_storelladdr +_net_add_domain +_net_add_proto +_net_del_domain +_net_del_proto +_netboot_root +_ovbcopy +_pffinddomain +_pffindproto +_pmap_find_phys +_prf +_processor_exit +_processor_info +_processor_start +_pru_abort_notsupp +_pru_accept_notsupp +_pru_bind_notsupp +_pru_connect2_notsupp +_pru_connect_notsupp +_pru_disconnect_notsupp +_pru_listen_notsupp +_pru_peeraddr_notsupp +_pru_rcvd_notsupp +_pru_rcvoob_notsupp +_pru_send_notsupp +_pru_sense_null +_pru_shutdown_notsupp +_pru_sockaddr_notsupp +_pru_sopoll_notsupp +_putc +_q_to_b +_rc4_crypt +_rc4_init +_rootdev +_rootvp +_rt_mtx +_rt_setgate +_rtalloc1_locked +_rtfree +_rtrequest_locked +_rtunref +_sbappendaddr +_sbappendrecord +_sbflush +_sbspace +_securelevel +_sha1_init +_sha1_loop +_sha1_result +_sleep +_soabort +_sobind +_socantrcvmore +_socantsendmore +_sock_release +_sock_retain +_soclose +_soconnect +_socreate +_sodisconnect +_sofree +_soisconnected +_soisconnecting +_soisdisconnected +_soisdisconnecting +_sonewconn +_sooptcopyin +_sooptcopyout +_sopoll +_soreceive +_soreserve +_sorwakeup +_sosend +_sosetopt +_stack_privilege +_task_resume +_task_suspend +_tcbinfo +_termioschars +_thread_call_func +_thread_call_func_cancel +_thread_call_func_delayed +_thread_call_is_delayed +_thread_cancel_timer +_thread_funnel_set +_thread_set_timer +_thread_set_timer_deadline +_timeout +_tk_nin +_tk_rawcc +_tsleep +_ttioctl +_ttsetwater +_ttspeedtab +_ttwakeup +_ttwwakeup +_ttyclose +_ttyflush +_ttyinput +_ttymodem +_ttyselect +_udbinfo +_uio_iovsaddr +_uio_spacetype +_unputc +_untimeout +_utf8_encodelen +_vfs_update_vfsstat +_vm_allocate +_vm_deallocate +_vm_map +_vm_map_deallocate +_vm_map_unwire +_vm_map_wire +_vm_protect +_vm_region +_vm_region_object_create +_vnode_getparent +_vnode_getname +_vnode_putname +_vnode_tag +_vnode_update_identity +_vnop_kqfilt_add_desc +_vnop_kqfilt_remove_desc diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports new file mode 100644 index 000000000..72a1f46cf --- /dev/null +++ b/config/Unsupported.i386.exports @@ -0,0 +1,8 @@ +_copypv +_cpu_number +_master_cpu +_master_processor +_ml_get_max_cpus +_mtrr_range_add +_mtrr_range_remove +_thread_bind diff --git a/config/Unsupported.ppc.exports b/config/Unsupported.ppc.exports new file mode 100644 index 000000000..da87c45ee --- /dev/null +++ b/config/Unsupported.ppc.exports @@ -0,0 +1,25 @@ +_ASPgetmsg +_ASPputmsg +_CallTVector +_PPCcalls +_PE_write_IIC +__ZN19IODBDMAMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +_asp_open +_at_ioctl +_get_preemption_level +_gbuf_alloc_wait +_gref_alloc +_gref_close +_gref_wput +_ignore_zero_fault +_killprint +_mapping_prealloc +_mapping_relpre +_ml_enable_cache_level +_ml_enable_nap +_ml_ppc_sleep +_ml_set_processor_speed +_ml_set_processor_voltage +_ml_throttle +_temp_patch_ptrace +_temp_unpatch_ptrace diff --git a/config/newvers.pl b/config/newvers.pl new file mode 100755 index 000000000..661737ee4 --- /dev/null +++ b/config/newvers.pl @@ -0,0 +1,110 @@ +#!/usr/bin/perl +# +# This tool is used to stamp kernel version information into files at kernel +# build time. Each argument provided on the command line is the path to a file +# that needs to be updated with the current verison information. The file +# xnu/config/MasterVersion is read to determine the version number to use. +# Each file is read, and all occurrences of the following strings are replaced +# in-place like so: +# ###KERNEL_VERSION_LONG### 1.2.3b4 +# ###KERNEL_VERSION_SHORT### 1.2.3 +# ###KERNEL_VERSION_MAJOR### 1 +# ###KERNEL_VERSION_MINOR### 2 +# ###KERNEL_VERSION_VARIANT### 3b4 +# ###KERNEL_VERSION_REVISION### 3 +# ###KERNEL_VERSION_STAGE### VERSION_STAGE_BETA (see libkern/version.h) +# ###KERNEL_VERSION_PRERELEASE_LEVEL### 4 +# ###KERNEL_BUILDER### root +# ###KERNEL_BUILD_OBJROOT### xnu/xnu-690.obj~2/RELEASE_PPC +# ###KERNEL_BUILD_DATE### Sun Oct 24 05:33:28 PDT 2004 + +sub ReadFile { + my ($fileName) = @_; + my $data; + local $/ = undef; # Read complete files + + if (open(IN, "<$fileName")) { + $data=; + close IN; + return $data; + } + die "newvers: Can't read file \"$fileName\"\n"; +} + +sub WriteFile { + my ($fileName, $data) = @_; + + open (OUT, ">$fileName") or die "newvers: Can't write file \"$fileName\"\n"; + print OUT $data; + close(OUT); +} + +my $versfile = "MasterVersion"; +$versfile = "$ENV{'SRCROOT'}/config/$versfile" if ($ENV{'SRCROOT'}); +my $BUILD_OBJROOT=$ENV{'OBJROOT'} . "/" . $ENV{'KERNEL_CONFIG'} . '_' . $ENV{'ARCH_CONFIG'}; +my $BUILD_DATE = `date`; +$BUILD_DATE =~ s/[\n\t]//g; +my $BUILDER=`whoami`; +$BUILDER =~ s/[\n\t]//g; +$BUILD_OBJROOT =~ s|.*(xnu.*)|$1|; + +my $rawvers = &ReadFile($versfile); +#$rawvers =~ s/\s//g; +($rawvers) = split "\n", $rawvers; +my ($VERSION_MAJOR, $VERSION_MINOR, $VERSION_VARIANT) = split /\./, $rawvers; +die "newvers: Invalid MasterVersion \"$rawvers\"!!! " if (!$VERSION_MAJOR); +$VERSION_MINOR = "0" unless ($VERSION_MINOR); +$VERSION_VARIANT = "0" unless ($VERSION_VARIANT); +$VERSION_VARIANT =~ tr/A-Z/a-z/; +$VERSION_VARIANT =~ m/(\d+)((?:d|a|b|r|fc)?)(\d*)/; +my $VERSION_REVISION = $1; +my $stage = $2; +my $VERSION_PRERELEASE_LEVEL = $3; +$VERSION_REVISION ="0" unless ($VERSION_REVISION); +$stage = "r" if (!$stage || ($stage eq "fc")); +$VERSION_PRERELEASE_LEVEL = "0" unless ($VERSION_PRERELEASE_LEVEL); + +my $VERSION_STAGE; +$VERSION_STAGE = 'VERSION_STAGE_DEV' if ($stage eq 'd'); +$VERSION_STAGE = 'VERSION_STAGE_ALPHA' if ($stage eq 'a'); +$VERSION_STAGE = 'VERSION_STAGE_BETA' if ($stage eq 'b'); +$VERSION_STAGE = 'VERSION_STAGE_RELEASE' if ($stage eq 'r'); + +my $VERSION_SHORT = "$VERSION_MAJOR.$VERSION_MINOR.$VERSION_REVISION"; +my $VERSION_LONG = $VERSION_SHORT; +$VERSION_LONG .= "$stage$VERSION_PRERELEASE_LEVEL" if (($stage ne "r") || ($VERSION_PRERELEASE_LEVEL != 0)); + +my $file; +foreach $file (@ARGV) { + print "newvers.pl: Stamping version \"$VERSION_LONG\" into \"$file\" ..."; + my $data = &ReadFile($file); + my $count=0; + $count += $data =~ s/###KERNEL_VERSION_LONG###/$VERSION_LONG/g; + $count += $data =~ s/###KERNEL_VERSION_SHORT###/$VERSION_SHORT/g; + $count += $data =~ s/###KERNEL_VERSION_MAJOR###/$VERSION_MAJOR/g; + $count += $data =~ s/###KERNEL_VERSION_MINOR###/$VERSION_MINOR/g; + $count += $data =~ s/###KERNEL_VERSION_VARIANT###/$VERSION_VARIANT/g; + $count += $data =~ s/###KERNEL_VERSION_REVISION###/$VERSION_REVISION/g; + $count += $data =~ s/###KERNEL_VERSION_STAGE###/$VERSION_STAGE/g; + $count += $data =~ s/###KERNEL_VERSION_PRERELEASE_LEVEL###/$VERSION_PRERELEASE_LEVEL/g; + $count += $data =~ s/###KERNEL_BUILDER###/$BUILDER/g; + $count += $data =~ s/###KERNEL_BUILD_OBJROOT###/$BUILD_OBJROOT/g; + $count += $data =~ s/###KERNEL_BUILD_DATE###/$BUILD_DATE/g; + print " $count replacements\n"; + &WriteFile($file, $data); +} + +if (0==scalar @ARGV) { + print "newvers.pl: read version \"$rawvers\" from \"$versfile\"\n"; + print "newvers.pl: ###KERNEL_VERSION_LONG### = $VERSION_LONG\n"; + print "newvers.pl: ###KERNEL_VERSION_SHORT### = $VERSION_SHORT\n"; + print "newvers.pl: ###KERNEL_VERSION_MAJOR### = $VERSION_MAJOR\n"; + print "newvers.pl: ###KERNEL_VERSION_MINOR### = $VERSION_MINOR\n"; + print "newvers.pl: ###KERNEL_VERSION_VARIANT### = $VERSION_VARIANT\n"; + print "newvers.pl: ###KERNEL_VERSION_REVISION### = $VERSION_REVISION\n"; + print "newvers.pl: ###KERNEL_VERSION_STAGE### = $VERSION_STAGE\n"; + print "newvers.pl: ###KERNEL_VERSION_PRERELEASE_LEVEL### = $VERSION_PRERELEASE_LEVEL\n"; + print "newvers.pl: ###KERNEL_BUILDER### = $BUILDER\n"; + print "newvers.pl: ###KERNEL_BUILD_OBJROOT### = $BUILD_OBJROOT\n"; + print "newvers.pl: ###KERNEL_BUILD_DATE### = $BUILD_DATE\n"; +} diff --git a/config/version.c b/config/version.c new file mode 100644 index 000000000..0811a8a29 --- /dev/null +++ b/config/version.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* version.c + * This file is a C source template for version.c, which is generated + * on every make of xnu. This template is processed by the script + * xnu/config/newvers.pl based on the version information in the file + * xnu/config/MasterVersion. + */ + +#include + +const char version[] = OSTYPE " Kernel Version ###KERNEL_VERSION_LONG###: ###KERNEL_BUILD_DATE###; ###KERNEL_BUILDER###:###KERNEL_BUILD_OBJROOT###"; +const int version_major = VERSION_MAJOR; +const int version_minor = VERSION_MINOR; +const int version_revision = VERSION_REVISION; +const int version_stage = VERSION_STAGE; +const int version_prerelease_level = VERSION_PRERELEASE_LEVEL; +const char version_variant[] = VERSION_VARIANT; +const char osbuilder[] = "###KERNEL_BUILDER###"; +const char osrelease[] = OSRELEASE; +const char ostype[] = OSTYPE; + diff --git a/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h b/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h index 30a0ba5f6..b3ce6e92b 100644 --- a/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h +++ b/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h @@ -25,8 +25,6 @@ #include -#define kIRQ_Clock 0 - class AppleIntelClock : public IOService { OSDeclareDefaultStructors(AppleIntelClock); diff --git a/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp b/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp index 3de76178e..c84abc4ce 100644 --- a/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp +++ b/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp @@ -29,19 +29,16 @@ #define super IOService OSDefineMetaClassAndStructors(AppleIntelClock, IOService); -extern "C" { -extern void hardclock(void); -}; - bool AppleIntelClock::start(IOService *provider) { if (!super::start(provider)) return false; - provider->registerInterrupt(kIRQ_Clock, 0, (IOInterruptAction) hardclock); - provider->enableInterrupt(kIRQ_Clock); - + /* + * The clock is already provided by the kernel, so all we need + * here is publish its availability for any IOKit client to use. + */ publishResource("IORTC", this); return true; } diff --git a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp index 4d63bd406..74680f724 100644 --- a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp +++ b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp @@ -20,7 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998-9 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2003 Apple Computer, Inc. All rights reserved. * * DRI: Josh de Cesare * @@ -52,7 +52,7 @@ OSMetaClassDefineReservedUnused(AppleNMI, 3); bool AppleNMI::start(IOService *provider) { - if (!super::init()) return false; + if (!super::start(provider)) return false; enable_debugger = FALSE; mask_NMI = FALSE; @@ -89,18 +89,6 @@ bool RootRegistered( OSObject * us, void *, IOService * yourDevice ) IOReturn AppleNMI::initNMI(IOInterruptController *parentController, OSData *parentSource) { - // Allocate the IOInterruptSource so this can act like a nub. - _interruptSources = (IOInterruptSource *)IOMalloc(sizeof(IOInterruptSource)); - if (_interruptSources == 0) return kIOReturnNoMemory; - _numInterruptSources = 1; - - // Set up the IOInterruptSource to point at this. - _interruptSources[0].interruptController = parentController; - _interruptSources[0].vectorData = parentSource; - - // call start using itself as its provider. - if (!start(this)) return kIOReturnError; - return kIOReturnSuccess; } @@ -129,8 +117,6 @@ IOReturn AppleNMI::powerStateWillChangeTo ( IOPMPowerFlags theFlags, unsigned lo { if ( ! (theFlags & IOPMPowerOn) ) { - IOLog("AppleNMI mask NMI\n"); - // Mask NMI and change from edge to level whilst sleeping (copied directly from OS9 code) nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; nmiIntSource = ml_phys_read(nmiIntSourceAddr); @@ -143,8 +129,6 @@ IOReturn AppleNMI::powerStateWillChangeTo ( IOPMPowerFlags theFlags, unsigned lo } else { - IOLog("AppleNMI unmask NMI\n"); - // Unmask NMI and change back to edge (copied directly from OS9 code) nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; nmiIntSource = ml_phys_read(nmiIntSourceAddr); @@ -156,6 +140,6 @@ IOReturn AppleNMI::powerStateWillChangeTo ( IOPMPowerFlags theFlags, unsigned lo eieio(); } } - + return IOPMAckImplied; } diff --git a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp b/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp index 274cafce8..f76e6e8e9 100644 --- a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp +++ b/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,7 +40,7 @@ OSDefineMetaClassAndStructors(AppleCPU, IOCPU); bool AppleCPU::start(IOService *provider) { kern_return_t result; - ml_processor_info_t processor_info; + ml_processor_info_t this_processor_info; if (!super::start(provider)) return false; @@ -52,16 +52,18 @@ bool AppleCPU::start(IOService *provider) cpuIC->registerCPUInterruptController(); - processor_info.cpu_id = (cpu_id_t)this; - processor_info.boot_cpu = true; - processor_info.start_paddr = 0; - processor_info.supports_nap = false; - processor_info.l2cr_value = 0; - processor_info.time_base_enable = 0; + this_processor_info.cpu_id = (cpu_id_t)this; + this_processor_info.boot_cpu = true; + this_processor_info.start_paddr = 0; + this_processor_info.supports_nap = false; + this_processor_info.l2cr_value = 0; + this_processor_info.time_base_enable = 0; // Register this CPU with mach. - result = ml_processor_register(&processor_info, &machProcessor, - &ipi_handler); + result = ml_processor_register( + &this_processor_info, + &machProcessor, + &ipi_handler); if (result == KERN_FAILURE) return false; setCPUState(kIOCPUStateUninitalized); diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h index fcb6d8156..afe2c1794 100644 --- a/iokit/IOKit/IOBufferMemoryDescriptor.h +++ b/iokit/IOKit/IOBufferMemoryDescriptor.h @@ -27,12 +27,17 @@ enum { kIOMemoryPhysicallyContiguous = 0x00000010, kIOMemoryPageable = 0x00000020, + kIOMemoryPurgeable = 0x00000040, kIOMemorySharingTypeMask = 0x000f0000, kIOMemoryUnshared = 0x00000000, kIOMemoryKernelUserShared = 0x00010000 }; #define _IOBUFFERMEMORYDESCRIPTOR_INTASKWITHOPTIONS_ 1 +/*! + @class IOBufferMemoryDescriptor + @abstract Provides a simple memory descriptor that allocates its own buffer memory. +*/ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor { @@ -135,16 +140,17 @@ public: vm_offset_t alignment = 1); /*! @function inTaskWithOptions - @abstract Create a memory buffer with memory descriptor for that buffer. Added in Mac OS X 10.2. - @discussion This method allocates a memory buffer with a given size and alignment in the task's address space specified, and returns a memory descriptor instance representing the memory. It is recommended memory allocated for I/O or sharing via mapping be created via IOBufferMemoryDescriptor. Options passed with the request specify the kind of memory to be allocated - pageablity and sharing are specified with option bits. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Creates a memory buffer with memory descriptor for that buffer. + @discussion Added in Mac OS X 10.2, this method allocates a memory buffer with a given size and alignment in the task's address space specified, and returns a memory descriptor instance representing the memory. It is recommended that memory allocated for I/O or sharing via mapping be created via IOBufferMemoryDescriptor. Options passed with the request specify the kind of memory to be allocated - pageablity and sharing are specified with option bits. This function may block and so should not be called from interrupt level or while a simple lock is held. @param inTask The task the buffer will be allocated in. - @param options Options for the allocation: - kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute. - kIOMemoryPageable - pass to request memory be non-wired - the default for kernel allocated memory is wired. + @param options Options for the allocation:
+ kIOMemoryPhysicallyContiguous - pass to request memory be physically contiguous. This option is heavily discouraged. The request may fail if memory is fragmented, may cause large amounts of paging activity, and may take a very long time to execute.
+ kIOMemoryPageable - pass to request memory be non-wired - the default for kernel allocated memory is wired.
+ kIOMemoryPurgeable - pass to request memory that may later have its purgeable state set with IOMemoryDescriptor::setPurgeable. Only supported for kIOMemoryPageable allocations.
kIOMemoryKernelUserShared - pass to request memory that will be mapped into both the kernel and client applications. @param capacity The number of bytes to allocate. @param alignment The minimum required alignment of the buffer in bytes - 1 is the default for no required alignment. For example, pass 256 to get memory allocated at an address with bits 0-7 zero. - @result An instance of class IOBufferMemoryDescriptor. To be released by the caller, which will free the memory desriptor and associated buffer. */ + @result Returns an instance of class IOBufferMemoryDescriptor to be released by the caller, which will free the memory desriptor and associated buffer. */ static IOBufferMemoryDescriptor * inTaskWithOptions( task_t inTask, diff --git a/iokit/IOKit/IOCPU.h b/iokit/IOKit/IOCPU.h index c78ea6ac0..d16a3e5c1 100644 --- a/iokit/IOKit/IOCPU.h +++ b/iokit/IOKit/IOCPU.h @@ -73,6 +73,9 @@ public: static void initCPUs(void); virtual bool start(IOService *provider); + virtual OSObject *getProperty(const OSSymbol *aKey) const; + virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject); + virtual bool serializeProperties(OSSerialize *serialize) const; virtual IOReturn setProperties(OSObject *properties); virtual void initCPU(bool boot) = 0; virtual void quiesceCPU(void) = 0; diff --git a/iokit/IOKit/IOCatalogue.h b/iokit/IOKit/IOCatalogue.h index 129be6ec5..4f1bfa07c 100644 --- a/iokit/IOKit/IOCatalogue.h +++ b/iokit/IOKit/IOCatalogue.h @@ -239,6 +239,8 @@ public: */ virtual kern_return_t removeKernelLinker(void); + static void disableExternalLinker(void); + private: /*! diff --git a/iokit/IOKit/IOCommand.h b/iokit/IOKit/IOCommand.h index f6e58d2e0..860c44f03 100644 --- a/iokit/IOKit/IOCommand.h +++ b/iokit/IOKit/IOCommand.h @@ -50,8 +50,6 @@ #include #include -class IOCommandPool; - /*! * @class IOCommand * @abstract @@ -64,12 +62,11 @@ class IOCommandPool; class IOCommand : public OSObject { - OSDeclareAbstractStructors(IOCommand) + OSDeclareDefaultStructors(IOCommand) -protected: +public: virtual bool init(void); -public: /*! @var fCommandChain This variable is used by the current 'owner' to queue the command. During the life cycle of a command it moves through a series of queues. This is the queue pointer for it. Only valid while 'ownership' is clear. For instance a IOCommandPool uses this pointer to maintain its list of free commands. May be manipulated using the kern/queue.h macros */ queue_chain_t fCommandChain; /* used to queue commands */ diff --git a/iokit/IOKit/IOCommandPool.h b/iokit/IOKit/IOCommandPool.h index 7703d3d6a..811664751 100644 --- a/iokit/IOKit/IOCommandPool.h +++ b/iokit/IOKit/IOCommandPool.h @@ -55,9 +55,7 @@ /*! * @class IOCommandPool - * @abstract - * The IOCommandPool class is used to manipulate a pool of commands which - * inherit from IOCommand. + * @abstract Manipulates a pool of commands which inherit from IOCommand. * @discussion * The IOCommandPool class is used to manipulate a pool of commands which * inherit from IOCommand. It includes a factory method to create a pool @@ -90,9 +88,8 @@ protected: ExpansionData *reserved; /*! - * @defined kIOCommandPoolDefaultSize - * @abstract - * kIOCommandPoolDefaultSize is the default size of any command pool. + * @const kIOCommandPoolDefaultSize + * @abstract The default size of any command pool. * @discussion * kIOCommandPoolDefaultSize is the default size of any command pool. * The default size was determined to be the smallest size for which @@ -112,12 +109,12 @@ public: /*! * @function initWithWorkLoop - * @abstract Primary initialiser for an IOCommandPool Object - * @discussion Primary initialiser for an IOCommandPool. + * @abstract Primary initializer for an IOCommandPool object. + * @discussion Primary initializer for an IOCommandPool. * Should probably use IOCommandPool::withWorkLoop() as it is easier to use. * @param inWorkLoop - * The workloop that this command pool should synchronise with. - * @result true if command pool was sucessfully initialised. + * The workloop that this command pool should synchronize with. + * @result Returns true if command pool was successfully initialized. */ virtual bool initWithWorkLoop(IOWorkLoop *workLoop); @@ -128,7 +125,7 @@ public: * The withWorkLoop method is what is known as a factory method. It creates * a new instance of an IOCommandPool and returns a pointer to that object. * @param inWorkLoop - * The workloop that this command pool should synchronise with. + * The workloop that this command pool should synchronize with. * @result * Returns a pointer to an instance of IOCommandPool if successful, * otherwise NULL. @@ -138,7 +135,7 @@ public: /*! * @function init - * @abstract Should never be used, obsolete See initWithWorkLoop + * @abstract Should never be used, obsolete. See initWithWorkLoop. */ virtual bool init(IOService *inOwner, IOWorkLoop *inWorkLoop, @@ -146,7 +143,7 @@ public: /*! * @function withWorkLoop - * @abstract Should never be used, obsolete See IOCommandPool::withWorkLoop + * @abstract Should never be used, obsolete. See IOCommandPool::withWorkLoop. */ static IOCommandPool *commandPool(IOService *inOwner, IOWorkLoop *inWorkLoop, @@ -155,12 +152,10 @@ public: /*! * @function getCommand - * @discussion - * The getCommand method is used to get a pointer to an object of type IOCommand - * from the pool. + * @discussion The getCommand method is used to get a pointer to an object of type IOCommand from the pool. * @param blockForCommand * If the caller would like to have its thread slept until a command is - * available, it should pass true, else false + * available, it should pass true, else false. * @result * If the caller passes true in blockForCommand, getCommand guarantees that * the result will be a pointer to an IOCommand object from the pool. If @@ -187,17 +182,16 @@ protected: * @function gatedGetCommand * @discussion * The gatedGetCommand method is used to serialize the extraction of a - * command of from the pool behind a command gate. - * runAction-ed by getCommand. + * command from the pool behind a command gate, runAction-ed by getCommand. * @param vCommand * A pointer to a pointer to an IOCommand object where the returned - * command will be stored + * command will be stored. * @param vBlock * A bool that indicates whether to block the request until a command * becomes available. * @result * Returns kIOReturnNoResources if no command is available and the client - * doesn't with to block until one does become available. + * doesn't wish to block until one does become available. * kIOReturnSuccess if the vCommand argument is valid. */ virtual IOReturn gatedGetCommand(IOCommand **command, bool blockForCommand); @@ -206,8 +200,7 @@ protected: * @function gatedReturnCommand * @discussion * The gatedReturnCommand method is used to serialize the return of a - * command of to the pool behind a command gate. - * runAction-ed by returnCommand. + * command to the pool behind a command gate, runAction-ed by returnCommand. * @param vCommand * A pointer to the IOCommand object to be returned to the pool. * @result diff --git a/iokit/IOKit/IODeviceMemory.h b/iokit/IOKit/IODeviceMemory.h index 7c72d0524..94b50cb00 100644 --- a/iokit/IOKit/IODeviceMemory.h +++ b/iokit/IOKit/IODeviceMemory.h @@ -31,9 +31,10 @@ #include -/*! @class IODeviceMemory : public IOMemoryDescriptor +/*! @class IODeviceMemory @abstract An IOMemoryDescriptor used for device physical memory ranges. - @discussion The IODeviceMemory class is a simple subclass of IOMemoryDescriptor that uses its methods to describe a single range of physical memory on a device. IODeviceMemory objects are usually looked up with IOService or IOPCIDevice accessors, and are created by memory mapped bus families. IODeviceMemory implements only some factory methods in addition to the methods of IOMemoryDescriptor. */ + @discussion The IODeviceMemory class is a simple subclass of IOMemoryDescriptor that uses its methods to describe a single range of physical memory on a device. IODeviceMemory objects are usually looked up with IOService or IOPCIDevice accessors, and are created by memory-mapped bus families. IODeviceMemory implements only some factory methods in addition to the methods of IOMemoryDescriptor. +*/ class IODeviceMemory : public IOMemoryDescriptor { @@ -54,10 +55,10 @@ public: /*! @function arrayFromList @abstract Constructs an OSArray of IODeviceMemory instances, each describing one physical range, and a tag value. - @discussion This method creates IODeviceMemory instances for each physical range passed in a IODeviceMemory::InitElement array. Each element consists of a physical address, length and tag value for the IODeviceMemory. The instances are returned as a created OSArray. + @discussion This method creates IODeviceMemory instances for each physical range passed in an IODeviceMemory::InitElement array. Each element consists of a physical address, length and tag value for the IODeviceMemory. The instances are returned as a created OSArray. @param list An array of IODeviceMemory::InitElement structures. @param count The number of elements in the list. - @result A created OSArray of IODeviceMemory objects, to be released by the caller, or zero on failure. */ + @result Returns a created OSArray of IODeviceMemory objects, to be released by the caller, or zero on failure. */ static OSArray * arrayFromList( InitElement list[], @@ -65,22 +66,22 @@ public: /*! @function withRange @abstract Constructs an IODeviceMemory instance, describing one physical range. - @discussion This method creates IODeviceMemory instance for one physical range passed as a physical address and length. It just calls IOMemoryDescriptor::withPhysicalAddress. + @discussion This method creates an IODeviceMemory instance for one physical range passed as a physical address and length. It just calls IOMemoryDescriptor::withPhysicalAddress. @param address The physical address of the first byte in the memory. @param withLength The length of memory. - @result The created IODeviceMemory on success, to be released by the caller, or zero on failure. */ + @result Returns the created IODeviceMemory on success, to be released by the caller, or zero on failure. */ static IODeviceMemory * withRange( IOPhysicalAddress start, IOPhysicalLength length ); -/*! @function withRange +/*! @function withSubRange @abstract Constructs an IODeviceMemory instance, describing a subset of an existing IODeviceMemory range. - @discussion This method creates IODeviceMemory instance for a subset of an existing IODeviceMemory range, passed as a physical address offset and length. It just calls IOMemoryDescriptor::withSubRange. + @discussion This method creates an IODeviceMemory instance for a subset of an existing IODeviceMemory range, passed as a physical address offset and length. It just calls IOMemoryDescriptor::withSubRange. @param of The parent IODeviceMemory of which a subrange is to be used for the new descriptor, which will be retained by the subrange IODeviceMemory. @param offset A byte offset into the parent's memory. @param length The length of the subrange. - @result The created IODeviceMemory on success, to be released by the caller, or zero on failure. */ + @result Returns the created IODeviceMemory on success, to be released by the caller, or zero on failure. */ static IODeviceMemory * withSubRange( IODeviceMemory * of, diff --git a/iokit/IOKit/IODeviceTreeSupport.h b/iokit/IOKit/IODeviceTreeSupport.h index 0c2de3a02..46e3da7b6 100644 --- a/iokit/IOKit/IODeviceTreeSupport.h +++ b/iokit/IOKit/IODeviceTreeSupport.h @@ -104,6 +104,11 @@ const OSSymbol * IODTInterruptControllerName( bool IODTMapInterrupts( IORegistryEntry * regEntry ); +enum { + kIODTInterruptShared = 0x00000001 +}; +IOReturn IODTGetInterruptOptions( IORegistryEntry * regEntry, int source, IOOptionBits * options ); + #ifdef __cplusplus extern "C" { #endif diff --git a/iokit/IOKit/IOEventSource.h b/iokit/IOKit/IOEventSource.h index 6136c93ae..f9868a69f 100644 --- a/iokit/IOKit/IOEventSource.h +++ b/iokit/IOKit/IOEventSource.h @@ -68,7 +68,7 @@ attempting to move it. All subclasses of the IOEventSource are expected to implement the checkForWork() member function.

- checkForWork() is the key method in this class. It is called by some work-loop when convienient and is expected to evaluate it's internal state and determine if an event has occured since the last call. In the case of an event having occurred then the instance defined target(owner)/action will be called. The action is stored as an ordinary C function pointer but the first parameter is always the owner. This means that a C++ member function can be used as an action function though this depends on the ABI. + checkForWork() is the key method in this class. It is called by some work-loop when convienient and is expected to evaluate it's internal state and determine if an event has occurred since the last call. In the case of an event having occurred then the instance defined target(owner)/action will be called. The action is stored as an ordinary C function pointer but the first parameter is always the owner. This means that a C++ member function can be used as an action function though this depends on the ABI.

Although the eventChainNext variable contains a reference to the next event source in the chain this reference is not retained. The list 'owner' i.e. the client that creates the event, not the work-loop, is expected to retain the source. */ diff --git a/iokit/IOKit/IOFilterInterruptEventSource.h b/iokit/IOKit/IOFilterInterruptEventSource.h index 0c554524c..0dad3b6a4 100644 --- a/iokit/IOKit/IOFilterInterruptEventSource.h +++ b/iokit/IOKit/IOFilterInterruptEventSource.h @@ -39,7 +39,9 @@ class IOService;

As the routine is called in the primary interrupt context great care must be taken in the writing of this routine. In general none of the generic IOKit environment is safe to call in this context. We intend this routine to be used by hardware that can interrogate its registers without destroying state. Primarily this variant of event sources will be used by drivers that share interrupts. The filter routine will determine if the interrupt is a real interrupt or a ghost and thus optimise the work thread context switch away.

- CAUTION: Called in primary interrupt context, if you need to disable interrupt to guard you registers against an unexpected call then it is better to use a straight IOInterruptEventSource and its secondary interrupt delivery mechanism. +If you are implementing 'SoftDMA' (or pseudo-DMA), you may not want the I/O Kit to automatically start your interrupt handler routine on your work loop when your filter routine returns true. In this case, you may choose to have your filter routine schedule the work on the work loop itself and then return false. If you do this, the interrupt will not be disabled in hardware and you could receive additional primary interrupts before your work loop–level service routine completes. Because this scheme has implications for synchronization between your filter routine and your interrupt service routine, you should avoid doing this unless your driver requires SoftDMA. +

+CAUTION: Called in primary interrupt context, if you need to disable interrupt to guard you registers against an unexpected call then it is better to use a straight IOInterruptEventSource and its secondary interrupt delivery mechanism. */ class IOFilterInterruptEventSource : public IOInterruptEventSource { @@ -118,7 +120,7 @@ successfully. */ /*! @function signalInterrupt @abstract Cause the work loop to schedule the action. - @discussion Cause the work loop to schedule the interrupt action even if the filter routine returns 'false'. Note well the interrupting condition MUST be cleared from the hardware otherwise an infinite process interrupt loop will occur. Use this function when 'SoftDMA' is desired. See $link IOFilterInterruptSource::Filter */ + @discussion Cause the work loop to schedule the interrupt action even if the filter routine returns 'false'. Note well the interrupting condition MUST be cleared from the hardware otherwise an infinite process interrupt loop will occur. Use this function when SoftDMA is desired. See $link IOFilterInterruptSource::Filter */ virtual void signalInterrupt(); /*! @function getFilterAction diff --git a/iokit/IOKit/IOInterruptEventSource.h b/iokit/IOKit/IOInterruptEventSource.h index 9fff5fecb..7a4930d5c 100644 --- a/iokit/IOKit/IOInterruptEventSource.h +++ b/iokit/IOKit/IOInterruptEventSource.h @@ -40,7 +40,7 @@ class IOService; @abstract Event source for interrupt delivery to work-loop based drivers. @discussion The IOInterruptEventSource is a generic object that delivers calls interrupt routines in it's client in a guaranteed single-threaded manner. IOInterruptEventSource is part of the IOKit $link IOWorkLoop infrastructure where the semantic that one and only one action method is executing within a work-loops event chain.

-When the action method is called in the client member function will receive 2 arguments, (IOEventSource *) sender and (int) count, See $link IOInterruptEventSource::Action. Where sender will be reference to the interrupt that occured and the count will be computed by the difference between the $link producerCount and $link consumerCount. This number may not be reliable as no attempt is made to adjust for around the world type problems but is provided for general information and statistic gathering. +When the action method is called in the client member function will receive 2 arguments, (IOEventSource *) sender and (int) count, See $link IOInterruptEventSource::Action. Where sender will be reference to the interrupt that occurred and the count will be computed by the difference between the $link producerCount and $link consumerCount. This number may not be reliable as no attempt is made to adjust for around the world type problems but is provided for general information and statistic gathering.

In general a client will use the factory member function to create and initialise the event source and then add it to their work-loop. It is the work loop's responsiblity to maintain the new event source in it's event chain. See $link IOWorkLoop.

diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index 548aec91c..80aa6c4d8 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -70,6 +70,8 @@ enum { kIOLogServiceTree = 0x00001000ULL, kIOLogDTree = 0x00002000ULL, kIOLogMemory = 0x00004000ULL, + // available = 0x00008000ULL, + kOSLogRegistryMods = 0x00010000ULL, // Log attempts to modify registry collections // debug aids - change behaviour kIONoFreeObjects = 0x00100000ULL, @@ -77,7 +79,6 @@ enum { }; extern SInt64 gIOKitDebug; -extern char iokit_version[]; #ifdef __cplusplus extern "C" { diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h index f4f6d21d2..f92c4263d 100644 --- a/iokit/IOKit/IOKitKeys.h +++ b/iokit/IOKit/IOKitKeys.h @@ -124,4 +124,13 @@ // property of root that describes the machine's serial number as a string #define kIOPlatformSerialNumberKey "IOPlatformSerialNumber" // (OSString) +// IODTNVRAM property keys +#define kIONVRAMDeletePropertyKey "IONVRAM-DELETE-PROPERTY" +#define kIODTNVRAMPanicInfoKey "aapl,panic-info" + +// keys for complex boot information +#define kIOBootDeviceKey "IOBootDevice" // dict | array of dicts +#define kIOBootDevicePathKey "IOBootDevicePath" // arch-neutral OSString +#define kIOBootDeviceSizeKey "IOBootDeviceSize" // OSNumber of bytes + #endif /* ! _IOKIT_IOKITKEYS_H */ diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index 54b0ac605..0db1a4db5 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -41,5 +41,6 @@ // IOResources property #define kIOConsoleUsersSeedKey "IOConsoleUsersSeed" /* value is OSNumber */ +#define kIOKernelHasSafeSleep 1 #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOLib.h b/iokit/IOKit/IOLib.h index 6537e22c3..a792dc154 100644 --- a/iokit/IOKit/IOLib.h +++ b/iokit/IOKit/IOLib.h @@ -259,7 +259,7 @@ IOThread IOCreateThread(IOThreadFunc function, void *argument); @abstract Terminate exceution of current thread. @discussion This function destroys the currently running thread, and does not return. */ -volatile void IOExitThread(); +volatile void IOExitThread(void); /*! @function IOSleep @abstract Sleep the calling thread for a number of milliseconds. @@ -284,7 +284,13 @@ void IODelay(unsigned microseconds); void IOLog(const char *format, ...) __attribute__((format(printf, 1, 2))); +#ifndef _FN_KPRINTF +#define _FN_KPRINTF void kprintf(const char *format, ...); +#endif +#ifndef _FN_KPRINTF_DECLARED +#define _FN_KPRINTF_DECLARED +#endif /* * Convert a integer constant (typically a #define or enum) to a string diff --git a/iokit/IOKit/IOLocks.h b/iokit/IOKit/IOLocks.h index 10f5ea40d..734230164 100644 --- a/iokit/IOKit/IOLocks.h +++ b/iokit/IOKit/IOLocks.h @@ -41,91 +41,114 @@ extern "C" { #endif -#include -#include -#include +#include #include +extern lck_grp_t *IOLockGroup; + /* * Mutex lock operations */ -typedef mutex_t IOLock; +#ifdef XNU_KERNEL_PRIVATE +typedef lck_mtx_t IOLock; +#else +typedef struct _IOLock IOLock; +#endif /* XNU_KERNEL_PRIVATE */ + /*! @function IOLockAlloc - @abstract Allocates and initializes an osfmk mutex. - @discussion Allocates an osfmk mutex in general purpose memory, and initilizes it. Mutexes are general purpose blocking mutual exclusion locks, supplied by osfmk/kern/lock.h. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Allocates and initializes a mutex. + @discussion Allocates a mutex in general purpose memory, and initilizes it. Mutexes are general purpose blocking mutual exclusion locks, supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. @result Pointer to the allocated lock, or zero on failure. */ IOLock * IOLockAlloc( void ); /*! @function IOLockFree - @abstract Frees an osfmk mutex. + @abstract Frees a mutex. @discussion Frees a lock allocated with IOLockAlloc. Any blocked waiters will not be woken. @param lock Pointer to the allocated lock. */ void IOLockFree( IOLock * lock); +/*! @function IOLockGetMachLock + @abstract Accessor to a Mach mutex. + @discussion Accessor to the Mach mutex. + @param lock Pointer to the allocated lock. */ + +lck_mtx_t * IOLockGetMachLock( IOLock * lock); + /*! @function IOLockLock - @abstract Lock an osfmk mutex. - @discussion Lock the mutex. If the lock is held by any thread, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a simple lock is held. Locking the mutex recursively from one thread will result in deadlock. + @abstract Lock a mutex. + @discussion Lock the mutex. If the lock is held by any thread, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the mutex recursively from one thread will result in deadlock. @param lock Pointer to the allocated lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IOLockLock( IOLock * lock) { - mutex_lock(lock); + lck_mtx_lock(lock); } +#else +void IOLockLock( IOLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +void IOLockLock( IOLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOLockTryLock - @abstract Attempt to lock an osfmk mutex. + @abstract Attempt to lock a mutex. @discussion Lock the mutex if it is currently unlocked, and return true. If the lock is held by any thread, return false. @param lock Pointer to the allocated lock. @result True if the mutex was unlocked and is now locked by the caller, otherwise false. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ boolean_t IOLockTryLock( IOLock * lock) { - return(mutex_try(lock)); + return(lck_mtx_try_lock(lock)); } +#else +boolean_t IOLockTryLock( IOLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +boolean_t IOLockTryLock( IOLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOLockUnlock - @abstract Unlock an osfmk mutex. -@discussion Unlock the mutex and wake any blocked waiters. Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Unlock a mutex. +@discussion Unlock the mutex and wake any blocked waiters. Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the allocated lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IOLockUnlock( IOLock * lock) { - mutex_unlock(lock); + lck_mtx_unlock(lock); } +#else +void IOLockUnlock( IOLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +void IOLockUnlock( IOLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOLockSleep @abstract Sleep with mutex unlock and relock -@discussion Prepare to sleep,unlock the mutex, and re-acquire it on wakeup.Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a simple lock is held. +@discussion Prepare to sleep,unlock the mutex, and re-acquire it on wakeup.Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the locked lock. @param event The event to sleep on. @param interType How can the sleep be interrupted. @result The wait-result value indicating how the thread was awakened.*/ -static __inline__ -int IOLockSleep( IOLock * lock, void *event, UInt32 interType) -{ - return thread_sleep_mutex((event_t) event, lock, (int) interType); -} +int IOLockSleep( IOLock * lock, void *event, UInt32 interType); -static __inline__ int IOLockSleepDeadline( IOLock * lock, void *event, - AbsoluteTime deadline, UInt32 interType) -{ - return thread_sleep_mutex_deadline((event_t) event, lock, - __OSAbsoluteTime(deadline), (int) interType); -} + AbsoluteTime deadline, UInt32 interType); -static __inline__ -void IOLockWakeup(IOLock * lock, void *event, bool oneThread) -{ - thread_wakeup_prim((event_t) event, oneThread, THREAD_AWAKENED); -} +void IOLockWakeup(IOLock * lock, void *event, bool oneThread); #ifdef __APPLE_API_OBSOLETE @@ -153,7 +176,7 @@ typedef struct _IORecursiveLock IORecursiveLock; /*! @function IORecursiveLockAlloc @abstract Allocates and initializes an recursive lock. - @discussion Allocates a recursive lock in general purpose memory, and initilizes it. Recursive locks function identically to osfmk mutexes but allow one thread to lock more than once, with balanced unlocks. + @discussion Allocates a recursive lock in general purpose memory, and initilizes it. Recursive locks function identically to mutexes but allow one thread to lock more than once, with balanced unlocks. @result Pointer to the allocated lock, or zero on failure. */ IORecursiveLock * IORecursiveLockAlloc( void ); @@ -165,9 +188,16 @@ IORecursiveLock * IORecursiveLockAlloc( void ); void IORecursiveLockFree( IORecursiveLock * lock); +/*! @function IORecursiveLockGetMachLock + @abstract Accessor to a Mach mutex. + @discussion Accessor to the Mach mutex. + @param lock Pointer to the allocated lock. */ + +lck_mtx_t * IORecursiveLockGetMachLock( IORecursiveLock * lock); + /*! @function IORecursiveLockLock @abstract Lock a recursive lock. - @discussion Lock the recursive lock. If the lock is held by another thread, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a simple lock is held. The lock may be taken recursively by the same thread, with a balanced number of calls to IORecursiveLockUnlock. + @discussion Lock the recursive lock. If the lock is held by another thread, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. The lock may be taken recursively by the same thread, with a balanced number of calls to IORecursiveLockUnlock. @param lock Pointer to the allocated lock. */ void IORecursiveLockLock( IORecursiveLock * lock); @@ -182,7 +212,7 @@ boolean_t IORecursiveLockTryLock( IORecursiveLock * lock); /*! @function IORecursiveLockUnlock @abstract Unlock a recursive lock. -@discussion Undo one call to IORecursiveLockLock, if the lock is now unlocked wake any blocked waiters. Results are undefined if the caller does not balance calls to IORecursiveLockLock with IORecursiveLockUnlock. This function may block and so should not be called from interrupt level or while a simple lock is held. +@discussion Undo one call to IORecursiveLockLock, if the lock is now unlocked wake any blocked waiters. Results are undefined if the caller does not balance calls to IORecursiveLockLock with IORecursiveLockUnlock. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the allocated lock. */ void IORecursiveLockUnlock( IORecursiveLock * lock); @@ -204,54 +234,89 @@ extern void IORecursiveLockWakeup( IORecursiveLock *_lock, * Complex (read/write) lock operations */ -typedef lock_t IORWLock; +#ifdef XNU_KERNEL_PRIVATE +typedef lck_rw_t IORWLock; +#else +typedef struct _IORWLock IORWLock; +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IORWLockAlloc - @abstract Allocates and initializes an osfmk general (read/write) lock. -@discussion Allocates an initializes an osfmk lock_t in general purpose memory, and initilizes it. Read/write locks provide for multiple readers, one exclusive writer, and are supplied by osfmk/kern/lock.h. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Allocates and initializes a read/write lock. +@discussion Allocates and initializes a read/write lock in general purpose memory, and initilizes it. Read/write locks provide for multiple readers, one exclusive writer, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. @result Pointer to the allocated lock, or zero on failure. */ IORWLock * IORWLockAlloc( void ); /*! @function IORWLockFree - @abstract Frees an osfmk general (read/write) lock. + @abstract Frees a read/write lock. @discussion Frees a lock allocated with IORWLockAlloc. Any blocked waiters will not be woken. @param lock Pointer to the allocated lock. */ void IORWLockFree( IORWLock * lock); +/*! @function IORWLockGetMachLock + @abstract Accessor to a Mach read/write lock. + @discussion Accessor to the Mach read/write lock. + @param lock Pointer to the allocated lock. */ + +lck_rw_t * IORWLockGetMachLock( IORWLock * lock); + /*! @function IORWLockRead - @abstract Lock an osfmk lock for read. -@discussion Lock the lock for read, allowing multiple readers when there are no writers. If the lock is held for write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a simple lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. + @abstract Lock a read/write lock for read. +@discussion Lock the lock for read, allowing multiple readers when there are no writers. If the lock is held for write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. @param lock Pointer to the allocated lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IORWLockRead( IORWLock * lock) { - lock_read( lock); + lck_rw_lock_shared( lock); } +#else +void IORWLockRead( IORWLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +void IORWLockRead( IORWLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IORWLockWrite - @abstract Lock an osfmk lock for write. - @discussion Lock the lock for write, allowing one writer exlusive access. If the lock is held for read or write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a simple lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. + @abstract Lock a read/write lock for write. + @discussion Lock the lock for write, allowing one writer exlusive access. If the lock is held for read or write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. @param lock Pointer to the allocated lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IORWLockWrite( IORWLock * lock) { - lock_write( lock); + lck_rw_lock_exclusive( lock); } +#else +void IORWLockWrite( IORWLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +void IORWLockWrite( IORWLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IORWLockUnlock - @abstract Unlock an osfmk lock. - @discussion Undo one call to IORWLockRead or IORWLockWrite. Results are undefined if the caller has not locked the lock. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Unlock a read/write lock. + @discussion Undo one call to IORWLockRead or IORWLockWrite. Results are undefined if the caller has not locked the lock. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the allocated lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IORWLockUnlock( IORWLock * lock) { - lock_done( lock); + lck_rw_done( lock); } +#else +void IORWLockUnlock( IORWLock * lock); +#endif /* !IOLOCKS_CPP */ +#else +void IORWLockUnlock( IORWLock * lock); +#endif /* XNU_KERNEL_PRIVATE */ #ifdef __APPLE_API_OBSOLETE @@ -268,80 +333,115 @@ static __inline__ void IORWUnlock( IORWLock * lock) { IORWLockUnlock(lock); } * Simple locks. Cannot block while holding a simple lock. */ -typedef simple_lock_data_t IOSimpleLock; +#ifdef KERNEL_PRIVATE +typedef lck_spin_t IOSimpleLock; +#else +typedef struct _IOSimpleLock IOSimpleLock; +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOSimpleLockAlloc - @abstract Allocates and initializes an osfmk simple (spin) lock. - @discussion Allocates an initializes an osfmk simple lock in general purpose memory, and initilizes it. Simple locks provide non-blocking mutual exclusion for synchronization between thread context and interrupt context, or for multiprocessor synchronization, and are supplied by osfmk/kern/simple_lock.h. This function may block and so should not be called from interrupt level or while a simple lock is held. + @abstract Allocates and initializes a spin lock. + @discussion Allocates an initializes a spin lock in general purpose memory, and initilizes it. Spin locks provide non-blocking mutual exclusion for synchronization between thread context and interrupt context, or for multiprocessor synchronization, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. @result Pointer to the allocated lock, or zero on failure. */ IOSimpleLock * IOSimpleLockAlloc( void ); /*! @function IOSimpleLockFree - @abstract Frees an osfmk simple (spin) lock. + @abstract Frees a spin lock. @discussion Frees a lock allocated with IOSimpleLockAlloc. @param lock Pointer to the lock. */ void IOSimpleLockFree( IOSimpleLock * lock ); +/*! @function IOSimpleLockGetMachLock + @abstract Accessor to a Mach spin lock. + @discussion Accessor to the Mach spin lock. + @param lock Pointer to the allocated lock. */ + +lck_spin_t * IOSimpleLockGetMachLock( IOSimpleLock * lock); + /*! @function IOSimpleLockInit - @abstract Initialize an osfmk simple (spin) lock. - @discussion Initialize an embedded osfmk simple lock, to the unlocked state. + @abstract Initialize a spin lock. + @discussion Initialize an embedded spin lock, to the unlocked state. @param lock Pointer to the lock. */ void IOSimpleLockInit( IOSimpleLock * lock ); /*! @function IOSimpleLockLock - @abstract Lock an osfmk simple lock. -@discussion Lock the simple lock. If the lock is held, spin waiting for its unlock. Simple locks disable preemption, cannot be held across any blocking operation, and should be held for very short periods. When used to synchronize between interrupt context and thread context they should be locked with interrupts disabled - IOSimpleLockLockDisableInterrupt() will do both. Locking the lock recursively from one thread will result in deadlock. + @abstract Lock a spin lock. +@discussion Lock the spin lock. If the lock is held, spin waiting for its unlock. Spin locks disable preemption, cannot be held across any blocking operation, and should be held for very short periods. When used to synchronize between interrupt context and thread context they should be locked with interrupts disabled - IOSimpleLockLockDisableInterrupt() will do both. Locking the lock recursively from one thread will result in deadlock. @param lock Pointer to the lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IOSimpleLockLock( IOSimpleLock * lock ) { - usimple_lock( lock ); + lck_spin_lock( lock ); } +#else +void IOSimpleLockLock( IOSimpleLock * lock ); +#endif /* !IOLOCKS_CPP */ +#else +void IOSimpleLockLock( IOSimpleLock * lock ); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOSimpleLockTryLock - @abstract Attempt to lock an osfmk simple lock. -@discussion Lock the simple lock if it is currently unlocked, and return true. If the lock is held, return false. Successful calls to IOSimpleLockTryLock should be balanced with calls to IOSimpleLockUnlock. + @abstract Attempt to lock a spin lock. +@discussion Lock the spin lock if it is currently unlocked, and return true. If the lock is held, return false. Successful calls to IOSimpleLockTryLock should be balanced with calls to IOSimpleLockUnlock. @param lock Pointer to the lock. @result True if the lock was unlocked and is now locked by the caller, otherwise false. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ) { - return( usimple_lock_try( lock ) ); + return( lck_spin_try_lock( lock ) ); } +#else +boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ); +#endif /* !IOLOCKS_CPP */ +#else +boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ); +#endif /* XNU_KERNEL_PRIVATE */ /*! @function IOSimpleLockUnlock - @abstract Unlock an osfmk simple lock. + @abstract Unlock a spin lock. @discussion Unlock the lock, and restore preemption. Results are undefined if the caller has not locked the lock. @param lock Pointer to the lock. */ +#ifdef XNU_KERNEL_PRIVATE +#ifndef IOLOCKS_CPP static __inline__ void IOSimpleLockUnlock( IOSimpleLock * lock ) { - usimple_unlock( lock ); + lck_spin_unlock( lock ); } +#else +void IOSimpleLockUnlock( IOSimpleLock * lock ); +#endif /* !IOLOCKS_CPP */ +#else +void IOSimpleLockUnlock( IOSimpleLock * lock ); +#endif /* XNU_KERNEL_PRIVATE */ typedef long int IOInterruptState; /*! @function IOSimpleLockLockDisableInterrupt - @abstract Lock an osfmk simple lock. - @discussion Lock the simple lock. If the lock is held, spin waiting for its unlock. Simple locks disable preemption, cannot be held across any blocking operation, and should be held for very short periods. When used to synchronize between interrupt context and thread context they should be locked with interrupts disabled - IOSimpleLockLockDisableInterrupt() will do both. Locking the lock recursively from one thread will result in deadlock. + @abstract Lock a spin lock. + @discussion Lock the spin lock. If the lock is held, spin waiting for its unlock. Simple locks disable preemption, cannot be held across any blocking operation, and should be held for very short periods. When used to synchronize between interrupt context and thread context they should be locked with interrupts disabled - IOSimpleLockLockDisableInterrupt() will do both. Locking the lock recursively from one thread will result in deadlock. @param lock Pointer to the lock. */ static __inline__ IOInterruptState IOSimpleLockLockDisableInterrupt( IOSimpleLock * lock ) { IOInterruptState state = ml_set_interrupts_enabled( false ); - usimple_lock( lock ); + IOSimpleLockLock( lock ); return( state ); } /*! @function IOSimpleLockUnlockEnableInterrupt - @abstract Unlock an osfmk simple lock, and restore interrupt state. + @abstract Unlock a spin lock, and restore interrupt state. @discussion Unlock the lock, and restore preemption and interrupts to the state as they were when the lock was taken. Results are undefined if the caller has not locked the lock. @param lock Pointer to the lock. @param state The interrupt state returned by IOSimpleLockLockDisableInterrupt() */ @@ -350,7 +450,7 @@ static __inline__ void IOSimpleLockUnlockEnableInterrupt( IOSimpleLock * lock, IOInterruptState state ) { - usimple_unlock( lock ); + IOSimpleLockUnlock( lock ); ml_set_interrupts_enabled( state ); } diff --git a/iokit/IOKit/IOMemoryCursor.h b/iokit/IOKit/IOMemoryCursor.h index 9f866e1b4..bb4fdb3a6 100644 --- a/iokit/IOKit/IOMemoryCursor.h +++ b/iokit/IOKit/IOMemoryCursor.h @@ -30,19 +30,19 @@ class IOMemoryDescriptor; /**************************** class IOMemoryCursor ***************************/ /*! - @class IOMemoryCursor : public OSObject + @class IOMemoryCursor @abstract A mechanism to convert memory references to physical addresses. @discussion The IOMemoryCursor declares the super class that all specific memory cursors must inherit from, but a memory cursor can be created without a specific format subclass by just providing a segment function to the initializers. This class does the difficult stuff of dividing a memory descriptor into a physical scatter/gather list appropriate for the target hardware.

- A driver is expected to create a memory cursor and configure it to the limitations of it's DMA hardware; for instance the memory cursor used by the firewire SBP2 protocol has a maximum physical segment size of 2^16 - 1 but the actual transfer size is unlimited. Thus it would create a cursor with a maxSegmentSize of 65535 and a maxTransfer size of UINT_MAX. It would also provide a SegmentFunction that can output a pagelist entry. + A driver is expected to create a memory cursor and configure it to the limitations of its DMA hardware; for instance the memory cursor used by the FireWire SBP-2 protocol has a maximum physical segment size of 2^16 - 1 but the actual transfer size is unlimited. Thus it would create a cursor with a maxSegmentSize of 65535 and a maxTransfer size of UINT_MAX. It would also provide a SegmentFunction that can output a pagelist entry.

-Below is the simplest example of a SegmentFunction:- -void IONaturalMemoryCursor::outputSegment(PhysicalSegment segment, - void * outSegments, - UInt32 outSegmentIndex) -{ - ((PhysicalSegment *) outSegments)[outSegmentIndex] = segment; +Below is the simplest example of a SegmentFunction:
+void IONaturalMemoryCursor::outputSegment(PhysicalSegment segment,
+ void * outSegments,
+ UInt32 outSegmentIndex)
+{
+ ((PhysicalSegment *) outSegments)[outSegmentIndex] = segment;
} */ @@ -62,7 +62,8 @@ public: }; /*! @defined IOPhysicalSegment - @discussion Backward compatibilty define for the old non-class scoped type definition. See $link IOMemoryCursor::PhysicalSegment */ + @discussion Backward compatibility define for the old non-class scoped type definition. See IOMemoryCursor::PhysicalSegment +*/ #define IOPhysicalSegment IOMemoryCursor::PhysicalSegment /*! @@ -77,7 +78,7 @@ public: UInt32 segmentIndex); /*! @defined OutputSegmentFunc - @discussion Backward compatibilty define for the old non-class scoped type definition. See $link IOMemoryCursor::SegmentFunction */ + @discussion Backward compatibility define for the old non-class scoped type definition. See IOMemoryCursor::SegmentFunction */ #define OutputSegmentFunc IOMemoryCursor::SegmentFunction protected: @@ -97,12 +98,13 @@ protected: public: /*! @function withSpecification - @abstract Factory function to create and initialise an IOMemoryCursor in one operation, see $link IOMemoryCursor::initWithSpecification. + @abstract Creates and initializes an IOMemoryCursor in one operation. + @discussion Factory function to create and initialize an IOMemoryCursor in one operation. For more information, see IOMemoryCursor::initWithSpecification. @param outSegFunc SegmentFunction to call to output one physical segment. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result A new memory cursor if successfully created and initialised, 0 otherwise. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. */ static IOMemoryCursor * withSpecification(SegmentFunction outSegFunc, @@ -111,12 +113,12 @@ public: IOPhysicalLength alignment = 1); /*! @function initWithSpecification - @abstract Primary initialiser for the IOMemoryCursor class. + @abstract Primary initializer for the IOMemoryCursor class. @param outSegFunc SegmentFunction to call to output one physical segment. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result true if the inherited classes and this instance initialise + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns true if the inherited classes and this instance initialize successfully. */ virtual bool initWithSpecification(SegmentFunction outSegFunc, @@ -125,14 +127,14 @@ successfully. IOPhysicalLength alignment = 1); /*! @function genPhysicalSegments - @abstract Generate a physical scatter/gather list given a memory descriptor. + @abstract Generates a physical scatter/gather list given a memory descriptor. @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Void pointer to base of output physical scatter/gather list. Always passed directly onto the SegmentFunction without interpretation by the cursor. @param maxSegments Maximum number of segments that can be written to segments array. @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. - @param transferSize Pointer to a IOByteCount variable that can contain the total size of the transfer being described. Default to 0 indicating that no transfer size need be returned. + @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ virtual UInt32 genPhysicalSegments( @@ -148,8 +150,8 @@ successfully. /*! - @class IONaturalMemoryCursor : public IOMemoryCursor - @abstract A $link IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the natural byte orientation for the cpu. + @class IONaturalMemoryCursor + @abstract An IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the natural byte orientation for the CPU. @discussion The IONaturalMemoryCursor would be used when it is too difficult to safely describe a SegmentFunction that is more appropriate for your hardware. This cursor just outputs an array of PhysicalSegments. */ class IONaturalMemoryCursor : public IOMemoryCursor @@ -157,8 +159,8 @@ class IONaturalMemoryCursor : public IOMemoryCursor OSDeclareDefaultStructors(IONaturalMemoryCursor) public: -/*! @funtion outputSegment - @abstract Output the given segment into the output segments array in natural byte order. +/*! @function outputSegment + @abstract Outputs the given segment into the output segments array in natural byte order. @param segment The physical address and length that is next to be output. @param segments Base of the output vector of DMA address length pairs. @param segmentIndex Index to output 'segment' in the 'segments' array. @@ -168,15 +170,17 @@ public: UInt32 segmentIndex); /*! @defined naturalOutputSegment - @discussion Backward compatibilty define for the old global function definition. See $link IONaturalMemoryCursor::outputSegment */ + @discussion Backward compatibility define for the old global function definition. See IONaturalMemoryCursor::outputSegment. +*/ #define naturalOutputSegment IONaturalMemoryCursor::outputSegment /*! @function withSpecification - @abstract Factory function to create and initialise an IONaturalMemoryCursor in one operation, see $link IONaturalMemoryCursor::initWithSpecification. + @abstract Creates and initializes an IONaturalMemoryCursor in one operation. + @discussion Factory function to create and initialize an IONaturalMemoryCursor in one operation. For more information, see IONaturalMemoryCursor::initWithSpecification. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result A new memory cursor if successfully created and initialised, 0 otherwise. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. */ static IONaturalMemoryCursor * withSpecification(IOPhysicalLength maxSegmentSize, @@ -184,12 +188,11 @@ public: IOPhysicalLength alignment = 1); /*! @function initWithSpecification - @abstract Primary initialiser for the IONaturalMemoryCursor class. + @abstract Primary initializer for the IONaturalMemoryCursor class. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result true if the inherited classes and this instance initialise -successfully. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns true if the inherited classes and this instance initialize successfully. */ virtual bool initWithSpecification(IOPhysicalLength maxSegmentSize, IOPhysicalLength maxTransferSize, @@ -197,14 +200,14 @@ successfully. /*! @function getPhysicalSegments - @abstract Generate a cpu natural physical scatter/gather list given a memory descriptor. - @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps $link IOMemoryCursor::genPhysicalSegments. + @abstract Generates a CPU natural physical scatter/gather list given a memory descriptor. + @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps IOMemoryCursor::genPhysicalSegments. @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. @param fromPosition Starting location of the I/O within a memory descriptor. - @param segments Pointer to an array of $link IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. + @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. - @param transferSize Pointer to a IOByteCount variable that can contain the total size of the transfer being described. Default to 0 indicating that no transfer size need be returned. + @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ virtual UInt32 getPhysicalSegments(IOMemoryDescriptor *descriptor, @@ -222,8 +225,8 @@ successfully. /************************** class IOBigMemoryCursor **************************/ /*! - @class IOBigMemoryCursor : public IOMemoryCursor - @abstract A $link IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the big endian byte order. + @class IOBigMemoryCursor + @abstract An IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the big endian byte order. @discussion The IOBigMemoryCursor would be used when the DMA hardware requires a big endian address and length pair. This cursor outputs an array of PhysicalSegments that are encoded in big-endian format. */ class IOBigMemoryCursor : public IOMemoryCursor @@ -231,8 +234,8 @@ class IOBigMemoryCursor : public IOMemoryCursor OSDeclareDefaultStructors(IOBigMemoryCursor) public: -/*! @funtion outputSegment - @abstract Output the given segment into the output segments array in big endian byte order. +/*! @function outputSegment + @abstract Outputs the given segment into the output segments array in big endian byte order. @param segment The physical address and length that is next to be output. @param segments Base of the output vector of DMA address length pairs. @param segmentIndex Index to output 'segment' in the 'segments' array. @@ -242,15 +245,17 @@ public: UInt32 segmentIndex); /*! @defined bigOutputSegment - @discussion Backward compatibilty define for the old global function definition. See $link IOBigMemoryCursor::outputSegment */ + @discussion Backward compatibility define for the old global function definition. See IOBigMemoryCursor::outputSegment +*/ #define bigOutputSegment IOBigMemoryCursor::outputSegment /*! @function withSpecification - @abstract Factory function to create and initialise an IOBigMemoryCursor in one operation, see $link IOBigMemoryCursor::initWithSpecification. + @abstract Creates and initializes an IOBigMemoryCursor in one operation. + @discussion Factory function to create and initialize an IOBigMemoryCursor in one operation. See also IOBigMemoryCursor::initWithSpecification. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result A new memory cursor if successfully created and initialised, 0 otherwise. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. */ static IOBigMemoryCursor * withSpecification(IOPhysicalLength maxSegmentSize, @@ -258,11 +263,11 @@ public: IOPhysicalLength alignment = 1); /*! @function initWithSpecification - @abstract Primary initialiser for the IOBigMemoryCursor class. + @abstract Primary initializer for the IOBigMemoryCursor class. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result true if the inherited classes and this instance initialise + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns true if the inherited classes and this instance initialize successfully. */ virtual bool initWithSpecification(IOPhysicalLength maxSegmentSize, @@ -271,14 +276,14 @@ successfully. /*! @function getPhysicalSegments - @abstract Generate a big endian physical scatter/gather list given a memory descriptor. - @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps $link IOMemoryCursor::genPhysicalSegments. + @abstract Generates a big endian physical scatter/gather list given a memory descriptor. + @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps IOMemoryCursor::genPhysicalSegments. @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. @param fromPosition Starting location of the I/O within a memory descriptor. - @param segments Pointer to an array of $link IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. + @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. - @param transferSize Pointer to a IOByteCount variable that can contain the total size of the transfer being described. Default to 0 indicating that no transfer size need be returned. + @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ virtual UInt32 getPhysicalSegments(IOMemoryDescriptor * descriptor, @@ -296,8 +301,8 @@ successfully. /************************* class IOLittleMemoryCursor ************************/ /*! - @class IOLittleMemoryCursor : public IOMemoryCursor - @abstract A $link IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the little endian byte order. + @class IOLittleMemoryCursor + @abstract An IOMemoryCursor subclass that outputs a vector of PhysicalSegments in the little endian byte order. @discussion The IOLittleMemoryCursor would be used when the DMA hardware requires a little endian address and length pair. This cursor outputs an array of PhysicalSegments that are encoded in little endian format. */ class IOLittleMemoryCursor : public IOMemoryCursor @@ -305,8 +310,8 @@ class IOLittleMemoryCursor : public IOMemoryCursor OSDeclareDefaultStructors(IOLittleMemoryCursor) public: -/*! @funtion outputSegment - @abstract Output the given segment into the output segments array in little endian byte order. +/*! @function outputSegment + @abstract Outputs the given segment into the output segments array in little endian byte order. @param segment The physical address and length that is next to be output. @param segments Base of the output vector of DMA address length pairs. @param segmentIndex Index to output 'segment' in the 'segments' array. @@ -316,15 +321,16 @@ public: UInt32 segmentIndex); /*! @defined littleOutputSegment - @discussion Backward compatibilty define for the old global function definition. See $link IOLittleMemoryCursor::outputSegment */ + @discussion Backward compatibility define for the old global function definition. See also IOLittleMemoryCursor::outputSegment. */ #define littleOutputSegment IOLittleMemoryCursor::outputSegment /*! @function withSpecification - @abstract Factory function to create and initialise an IOLittleMemoryCursor in one operation, see $link IOLittleMemoryCursor::initWithSpecification. + @abstract Creates and initializes an IOLittleMemoryCursor in one operation. + @discussion Factory function to create and initialize an IOLittleMemoryCursor in one operation. See also IOLittleMemoryCursor::initWithSpecification. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result A new memory cursor if successfully created and initialised, 0 otherwise. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. */ static IOLittleMemoryCursor * withSpecification(IOPhysicalLength maxSegmentSize, @@ -332,12 +338,11 @@ public: IOPhysicalLength alignment = 1); /*! @function initWithSpecification - @abstract Primary initialiser for the IOLittleMemoryCursor class. + @abstract Primary initializer for the IOLittleMemoryCursor class. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result true if the inherited classes and this instance initialise -successfully. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns true if the inherited classes and this instance initialize successfully. */ virtual bool initWithSpecification(IOPhysicalLength maxSegmentSize, IOPhysicalLength maxTransferSize, @@ -345,14 +350,14 @@ successfully. /*! @function getPhysicalSegments - @abstract Generate a little endian physical scatter/gather list given a memory descriptor. - @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps $link IOMemoryCursor::genPhysicalSegments. + @abstract Generates a little endian physical scatter/gather list given a memory descriptor. + @discussion Generates a list of physical segments from the given memory descriptor, relative to the current position of the descriptor. Wraps IOMemoryCursor::genPhysicalSegments. @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. @param fromPosition Starting location of the I/O within a memory descriptor. - @param segments Pointer to an array of $link IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. + @param segments Pointer to an array of IOMemoryCursor::PhysicalSegments for the output physical scatter/gather list. @param maxSegments Maximum number of segments that can be written to segments array. @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. - @param transferSize Pointer to a IOByteCount variable that can contain the total size of the transfer being described. Default to 0 indicating that no transfer size need be returned. + @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ virtual UInt32 getPhysicalSegments(IOMemoryDescriptor * descriptor, @@ -374,8 +379,8 @@ successfully. struct IODBDMADescriptor; /*! - @class IODBDMAMemoryCursor : public IOMemoryCursor - @abstract A $link IOMemoryCursor subclass that outputs a vector of DBDMA descriptors where the address and length are filled in. + @class IODBDMAMemoryCursor + @abstract An IOMemoryCursor subclass that outputs a vector of DBDMA descriptors where the address and length are filled in. @discussion The IODBDMAMemoryCursor would be used when the DBDMA hardware is available for the device for that will use an instance of this cursor. */ class IODBDMAMemoryCursor : public IOMemoryCursor @@ -383,8 +388,8 @@ class IODBDMAMemoryCursor : public IOMemoryCursor OSDeclareDefaultStructors(IODBDMAMemoryCursor) public: -/*! @funtion outputSegment - @abstract Output the given segment into the output segments array in address and length fields of an DBDMA descriptor. +/*! @function outputSegment + @abstract Outpust the given segment into the output segments array in address and length fields of an DBDMA descriptor. @param segment The physical address and length that is next to be output. @param segments Base of the output vector of DMA address length pairs. @param segmentIndex Index to output 'segment' in the 'segments' array. @@ -394,15 +399,16 @@ public: UInt32 segmentIndex); /*! @defined dbdmaOutputSegment - @discussion Backward compatibilty define for the old global function definition. See $link IODBDMAMemoryCursor::outputSegment */ + @discussion Backward compatibility define for the old global function definition. See IODBDMAMemoryCursor::outputSegment. */ #define dbdmaOutputSegment IODBDMAMemoryCursor::outputSegment /*! @function withSpecification - @abstract Factory function to create and initialise an IODBDMAMemoryCursor in one operation, see $link IODBDMAMemoryCursor::initWithSpecification. + @abstract Creates and initializes an IODBDMAMemoryCursor in one operation. + @discussion Factory function to create and initialize an IODBDMAMemoryCursor in one operation. See also IODBDMAMemoryCursor::initWithSpecification. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result A new memory cursor if successfully created and initialised, 0 otherwise. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns a new memory cursor if successfully created and initialized, 0 otherwise. */ static IODBDMAMemoryCursor * withSpecification(IOPhysicalLength maxSegmentSize, @@ -410,12 +416,11 @@ public: IOPhysicalLength alignment = 1); /*! @function initWithSpecification - @abstract Primary initialiser for the IODBDMAMemoryCursor class. + @abstract Primary initializer for the IODBDMAMemoryCursor class. @param maxSegmentSize Maximum allowable size for one segment. Defaults to 0. - @param maxTransferSize Maximum size of an entire transfer. Default to 0 indicating no maximum. - @param alignment Alligment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. - @result true if the inherited classes and this instance initialise -successfully. + @param maxTransferSize Maximum size of an entire transfer. Defaults to 0 indicating no maximum. + @param alignment Alignment restrictions on output physical addresses. Not currently implemented. Defaults to single byte alignment. + @result Returns true if the inherited classes and this instance initialize successfully. */ virtual bool initWithSpecification(IOPhysicalLength maxSegmentSize, IOPhysicalLength maxTransferSize, @@ -423,14 +428,14 @@ successfully. /*! @function getPhysicalSegments - @abstract Generate a DBDMA physical scatter/gather list given a memory descriptor. - @discussion Generates a list of DBDMA descriptors where the address and length fields are filled in appropriately. But the client is expected to fill in the rest of teh DBDMA descriptor as is appropriate for their particular hardware. Wraps $link IOMemoryCursor::genPhysicalSegments. + @abstract Generates a DBDMA physical scatter/gather list given a memory descriptor. + @discussion Generates a list of DBDMA descriptors where the address and length fields are filled in appropriately. But the client is expected to fill in the rest of the DBDMA descriptor as is appropriate for their particular hardware. Wraps IOMemoryCursor::genPhysicalSegments. @param descriptor IOMemoryDescriptor that describes the data associated with an I/O request. @param fromPosition Starting location of the I/O within a memory descriptor. @param segments Pointer to an array of DBDMA descriptors for the output physical scatter/gather list. Be warned no room is left for a preamble in the output array. 'segments' should point to the first memory description slot in a DBDMA command. - @param maxSegments Maximum number of segments that can be written to the dbdma descriptor table. + @param maxSegments Maximum number of segments that can be written to the DBDMA descriptor table. @param maxTransferSize Maximum transfer size is limited to that many bytes, otherwise it defaults to the maximum transfer size specified when the memory cursor was initialized. - @param transferSize Pointer to a IOByteCount variable that can contain the total size of the transfer being described. Default to 0 indicating that no transfer size need be returned. + @param transferSize Pointer to an IOByteCount variable that can contain the total size of the transfer being described. Defaults to 0 indicating that no transfer size need be returned. @result If the descriptor is exhausted of memory, a zero is returned, otherwise the number of segments that were filled in is returned. */ virtual UInt32 getPhysicalSegments(IOMemoryDescriptor * descriptor, diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index 322a2760b..c370718fb 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -53,7 +53,7 @@ enum IODirection }; /* - * IOOptionBits used in the second withRanges variant + * IOOptionBits used in the withOptions variant */ enum { kIOMemoryDirectionMask = 0x00000007, @@ -62,6 +62,8 @@ enum { kIOMemoryTypeVirtual = 0x00000010, kIOMemoryTypePhysical = 0x00000020, kIOMemoryTypeUPL = 0x00000030, + kIOMemoryTypePersistentMD = 0x00000040, // Persistent Memory Descriptor + kIOMemoryTypeUIO = 0x00000050, kIOMemoryTypeMask = 0x000000f0, kIOMemoryAsReference = 0x00000100, @@ -73,6 +75,19 @@ enum { #define kIOMapperNone ((IOMapper *) -1) #define kIOMapperSystem ((IOMapper *) 0) +enum +{ + kIOMemoryPurgeableKeepCurrent = 1, + kIOMemoryPurgeableNonVolatile = 2, + kIOMemoryPurgeableVolatile = 3, + kIOMemoryPurgeableEmpty = 4 +}; +enum +{ + kIOMemoryIncoherentIOFlush = 1, + kIOMemoryIncoherentIOStore = 2, +}; + /*! @class IOMemoryDescriptor : public OSObject @abstract An abstract base class defining common methods for describing physical or virtual memory. @discussion The IOMemoryDescriptor object represents a buffer or range of memory, specified as one or more physical or virtual address ranges. It contains methods to return the memory's physically contiguous segments (fragments), for use with the IOMemoryCursor, and methods to map the memory into any address space with caching and placed mapping options. */ @@ -110,13 +125,6 @@ protected: public: -/*! @function getBackingID - @abstract Get an unique identifier for the virtual memory systems backing memory object. - @discussion For memory descriptors that are directly mapped by real memory, IOGeneralMemoryDescriptors that are also persistent (kIOMemoryPersistent) return the id of the backing vm object. This returned value can be tested to see if 2 memory persistent memory descriptors share the same backing. The call itself is fairly heavy weight and can only be applied to persistent memory descriptors so it is not generally useful. This function is NOT virtual at the moment. We may choose to make it virtual in the future however. - @result 0 on non-persistent or non IOGeneralMemoryDescriptors, unique id if not. */ - // See implementation at end of file - inline void * getBackingID() const; - virtual IOPhysicalAddress getSourceSegment( IOByteCount offset, IOByteCount * length ); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 0); @@ -133,14 +141,45 @@ public: IOMapper * mapper = 0); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 1); - virtual addr64_t IOMemoryDescriptor::getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); + virtual addr64_t getPhysicalSegment64( IOByteCount offset, + IOByteCount * length ); OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 2); + +/*! @function setPurgeable + @abstract Control the purgeable status of a memory descriptors memory. + @discussion Buffers may be allocated with the ability to have their purgeable status changed - IOBufferMemoryDescriptor with the kIOMemoryPurgeable option, VM_FLAGS_PURGEABLE may be passed to vm_allocate() in user space to allocate such buffers. The purgeable status of such a buffer may be controlled with setPurgeable(). The process of making a purgeable memory descriptor non-volatile and determining its previous state is atomic - if a purgeable memory descriptor is made nonvolatile and the old state is returned as kIOMemoryPurgeableVolatile, then the memory's previous contents are completely intact and will remain so until the memory is made volatile again. If the old state is returned as kIOMemoryPurgeableEmpty then the memory was reclaimed while it was in a volatile state and its previous contents have been lost. + @param newState - the desired new purgeable state of the memory:
+ kIOMemoryPurgeableKeepCurrent - make no changes to the memory's purgeable state.
+ kIOMemoryPurgeableVolatile - make the memory volatile - the memory may be reclaimed by the VM system without saving its contents to backing store.
+ kIOMemoryPurgeableNonVolatile - make the memory nonvolatile - the memory is treated as with usual allocations and must be saved to backing store if paged.
+ kIOMemoryPurgeableEmpty - make the memory volatile, and discard any pages allocated to it. + @param oldState - if non-NULL, the previous purgeable state of the memory is returned here:
+ kIOMemoryPurgeableNonVolatile - the memory was nonvolatile.
+ kIOMemoryPurgeableVolatile - the memory was volatile but its content has not been discarded by the VM system.
+ kIOMemoryPurgeableEmpty - the memory was volatile and has been discarded by the VM system.
+ @result An IOReturn code. */ + + virtual IOReturn setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 3); + +/*! @function performOperation + @abstract Perform an operation on the memory descriptor's memory. + @discussion This method performs some operation on a range of the memory descriptor's memory. When a memory descriptor's memory is not mapped, it should be more efficient to use this method than mapping the memory to perform the operation virtually. + @param options The operation to perform on the memory:
+ kIOMemoryIncoherentIOFlush - pass this option to store to memory and flush any data in the processor cache for the memory range, with synchronization to ensure the data has passed through all levels of processor cache. It may not be supported on all architectures. This type of flush may be used for non-coherent I/O such as AGP - it is NOT required for PCI coherent operations. The memory descriptor must have been previously prepared.
+ kIOMemoryIncoherentIOStore - pass this option to store to memory any data in the processor cache for the memory range, with synchronization to ensure the data has passed through all levels of processor cache. It may not be supported on all architectures. This type of flush may be used for non-coherent I/O such as AGP - it is NOT required for PCI coherent operations. The memory descriptor must have been previously prepared. + @param offset A byte offset into the memory descriptor's memory. + @param length The length of the data range. + @result An IOReturn code. */ + + virtual IOReturn performOperation( IOOptionBits options, + IOByteCount offset, IOByteCount length ); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 4); + private: - OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 3); - OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 4); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 5); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 6); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 7); @@ -274,6 +313,14 @@ public: IOByteCount length, IODirection withDirection); +/*! @function withPersistentMemoryDescriptor + @abstract Copy constructor that generates a new memory descriptor if the backing memory for the same task's virtual address and length has changed. + @discussion If the original memory descriptor's address and length is still backed by the same real memory, i.e. the user hasn't deallocated and the reallocated memory at the same address then the original memory descriptor is returned with a additional reference. Otherwise we build a totally new memory descriptor with the same characteristics as the previous one but with a new view of the vm. Note not legal to call this function with anything except an IOGeneralMemoryDescriptor that was created with the kIOMemoryPersistent option. + @param originalMD The memory descriptor to be duplicated. + @result Either the original memory descriptor with an additional retain or a new memory descriptor, 0 for a bad original memory descriptor or some other resource shortage. */ + static IOMemoryDescriptor * + withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD); + /*! @function initWithAddress @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one virtual range of the kernel task. @discussion This method initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the kernel map. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. @@ -447,6 +494,7 @@ public: kIOMapInhibitCache, kIOMapWriteThruCache, kIOMapCopybackCache to set the appropriate caching.
kIOMapReadOnly to allow only read only accesses to the memory - writes will cause and access fault.
kIOMapReference will only succeed if the mapping already exists, and the IOMemoryMap object is just an extra reference, ie. no new mapping will be created.
+ kIOMapUnique allows a special kind of mapping to be created that may be used with the IOMemoryMap::redirect() API. These mappings will not be shared as is the default - there will always be a unique mapping created for the caller, not an existing mapping with an extra reference.
@param offset Is a beginning offset into the IOMemoryDescriptor's memory where the mapping starts. Zero is the default to map all the memory. @param length Is the length of the mapping requested for a subset of the IOMemoryDescriptor. Zero is the default to map all the memory. @result A reference to an IOMemoryMap object representing the mapping, which can supply the virtual address of the mapping and other information. The mapping may be shared with multiple callers - multiple maps are avoided if a compatible one exists. The IOMemoryMap object returned should be released only when the caller has finished accessing the mapping, as freeing the object destroys the mapping. The IOMemoryMap instance also retains the IOMemoryDescriptor it maps while it exists. */ @@ -593,6 +641,18 @@ public: virtual IOReturn unmap() = 0; virtual void taskDied() = 0; + +/*! @function redirect + @abstract Replace the memory mapped in a process with new backing memory. + @discussion An IOMemoryMap created with the kIOMapUnique option to IOMemoryDescriptor::map() can remapped to a new IOMemoryDescriptor backing object. If the new IOMemoryDescriptor is specified as NULL, client access to the memory map is blocked until a new backing object has been set. By blocking access and copying data, the caller can create atomic copies of the memory while the client is potentially reading or writing the memory. + @param newBackingMemory The IOMemoryDescriptor that represents the physical memory that is to be now mapped in the virtual range the IOMemoryMap represents. If newBackingMemory is NULL, any access to the mapping will hang (in vm_fault()) until access has been restored by a new call to redirect() with non-NULL newBackingMemory argument. + @param options Mapping options are defined in IOTypes.h, and are documented in IOMemoryDescriptor::map() + @param offset As with IOMemoryDescriptor::map(), a beginning offset into the IOMemoryDescriptor's memory where the mapping starts. Zero is the default. + @result An IOReturn code. */ + + virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, + IOOptionBits options, + IOByteCount offset = 0) = 0; }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -614,11 +674,14 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor { OSDeclareDefaultStructors(IOGeneralMemoryDescriptor); -protected: - union { +public: + union Ranges { IOVirtualRange * v; IOPhysicalRange * p; - } _ranges; /* list of address ranges */ + void *uio; + }; +protected: + Ranges _ranges; unsigned _rangesCount; /* number of address ranges in list */ bool _rangesIsAllocated; /* is list allocated by us? */ @@ -641,8 +704,10 @@ protected: private: - // Internal API may be made virtual at some time in the future. + // Internal APIs may be made virtual at some time in the future. IOReturn wireVirtual(IODirection forDirection); + void *createNamedEntry(); + /* DEPRECATED */ IOByteCount _position; /* absolute position over all ranges */ /* DEPRECATED */ virtual void setPosition(IOByteCount position); @@ -672,11 +737,6 @@ public: * IOMemoryDescriptor required methods */ -/*! @function getBackingID - @abstract Returns the vm systems unique id for the memory backing this IOGeneralMemoryDescriptor. See IOMemoryDescriptor::getBackingID for details. - @result 0 on non-persistent or non IOGeneralMemoryDescriptors, unique id if not. */ - void * getBackingID() const; - // Master initaliser virtual bool initWithOptions(void * buffers, UInt32 count, @@ -736,6 +796,10 @@ public: IOVirtualAddress logical, IOByteCount length ); virtual bool serialize(OSSerialize *s) const; + + // Factory method for cloning a persistent IOMD, see IOMemoryDescriptor + static IOMemoryDescriptor * + withPersistentMemoryDescriptor(IOGeneralMemoryDescriptor *originalMD); }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -824,6 +888,11 @@ public: virtual bool serialize(OSSerialize *s) const; + virtual IOReturn setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ); + virtual IOReturn performOperation( IOOptionBits options, + IOByteCount offset, IOByteCount length ); + protected: virtual IOMemoryMap * makeMapping( IOMemoryDescriptor * owner, @@ -832,20 +901,15 @@ protected: IOOptionBits options, IOByteCount offset, IOByteCount length ); + + virtual IOReturn doMap( + vm_map_t addressMap, + IOVirtualAddress * atAddress, + IOOptionBits options, + IOByteCount sourceOffset = 0, + IOByteCount length = 0 ); }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// Implementation of inline functions -void * IOMemoryDescriptor::getBackingID() const -{ - const IOGeneralMemoryDescriptor *genMD = (const IOGeneralMemoryDescriptor *) - OSDynamicCast(IOGeneralMemoryDescriptor, this); - - if (genMD) - return genMD->getBackingID(); - else - return 0; -} - #endif /* !_IOMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOMessage.h b/iokit/IOKit/IOMessage.h index f90a1b7c9..40655f323 100644 --- a/iokit/IOKit/IOMessage.h +++ b/iokit/IOKit/IOMessage.h @@ -28,8 +28,15 @@ typedef UInt32 IOMessage; -#define iokit_common_msg(message) (UInt32)(sys_iokit|sub_iokit_common|message) -#define iokit_family_msg(sub,message) (UInt32)(sys_iokit|sub|message) +#define iokit_common_msg(message) (UInt32)(sys_iokit|sub_iokit_common|message) +#define iokit_family_msg(sub,message) (UInt32)(sys_iokit|sub|message) + +/*! @defined iokit_vendor_specific_msg + @discussion iokit_vendor_specific_msg passes messages in the sub_iokit_vendor_specific + subsystem. It can be used to be generate messages that are used for private + communication between vendor specific code with the IOService::message() etc. APIs. +*/ +#define iokit_vendor_specific_msg(message) (UInt32)(sys_iokit|sub_iokit_vendor_specific|message) #define kIOMessageServiceIsTerminated iokit_common_msg(0x010) #define kIOMessageServiceIsSuspended iokit_common_msg(0x020) diff --git a/iokit/IOKit/IONVRAM.h b/iokit/IOKit/IONVRAM.h index 1e09641a7..6114b0112 100644 --- a/iokit/IOKit/IONVRAM.h +++ b/iokit/IOKit/IONVRAM.h @@ -23,6 +23,7 @@ #ifndef _IOKIT_IONVRAM_H #define _IOKIT_IONVRAM_H +#include #include #include #include @@ -33,8 +34,6 @@ #define kIODTNVRAMPanicInfoPartitonName "APL,OSXPanic" #define kIODTNVRAMFreePartitionName "wwwwwwwwwwww" -#define kIODTNVRAMPanicInfoKey "aapl,panic-info" - enum { kIODTNVRAMImageSize = 0x2000, kIODTNVRAMXPRAMSize = 0x0100, @@ -83,7 +82,9 @@ private: virtual UInt8 calculatePartitionChecksum(UInt8 *partitionHeader); virtual IOReturn initOFVariables(void); +public: virtual IOReturn syncOFVariables(void); +private: virtual UInt32 getOFVariableType(const OSSymbol *propSymbol) const; virtual UInt32 getOFVariablePerm(const OSSymbol *propSymbol) const; virtual bool getOWVariableInfo(UInt32 variableNumber, const OSSymbol **propSymbol, @@ -107,7 +108,7 @@ private: const OSSymbol *name, OSData * value); - virtual OSData *unescapeBytesToData(UInt8 *bytes, UInt32 length); + virtual OSData *unescapeBytesToData(const UInt8 *bytes, UInt32 length); virtual OSData *escapeDataToData(OSData * value); virtual IOReturn readNVRAMPropertyType1(IORegistryEntry *entry, @@ -124,10 +125,11 @@ public: virtual void sync(void); - virtual bool serializeProperties(OSSerialize * serialize) const; + virtual bool serializeProperties(OSSerialize *s) const; virtual OSObject *getProperty(const OSSymbol *aKey) const; virtual OSObject *getProperty(const char *aKey) const; virtual bool setProperty(const OSSymbol *aKey, OSObject *anObject); + virtual void removeProperty(const OSSymbol *aKey); virtual IOReturn setProperties(OSObject *properties); virtual IOReturn readXPRAM(IOByteCount offset, UInt8 *buffer, diff --git a/iokit/IOKit/IOPolledInterface.h b/iokit/IOKit/IOPolledInterface.h new file mode 100644 index 000000000..6be5edd0f --- /dev/null +++ b/iokit/IOKit/IOPolledInterface.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#define kIOPolledInterfaceSupportKey "IOPolledInterface" + +enum +{ + kIOPolledPreflightState = 1, + kIOPolledBeforeSleepState = 2, + kIOPolledAfterSleepState = 3, + kIOPolledPostflightState = 4 +}; + +enum +{ + kIOPolledWrite = 1, + kIOPolledRead = 2 +}; + +typedef void (*IOPolledCompletionAction)( void * target, + void * parameter, + IOReturn status, + uint64_t actualByteCount); +struct IOPolledCompletion +{ + void * target; + IOPolledCompletionAction action; + void * parameter; +}; + +class IOPolledInterface : public OSObject +{ + OSDeclareAbstractStructors(IOPolledInterface); + +protected: + struct ExpansionData { }; + ExpansionData * reserved; + +public: + virtual IOReturn probe(IOService * target) = 0; + + virtual IOReturn open( IOOptionBits state, IOMemoryDescriptor * buffer) = 0; + virtual IOReturn close(IOOptionBits state) = 0; + + virtual IOReturn startIO(uint32_t operation, + uint32_t bufferOffset, + uint64_t deviceOffset, + uint64_t length, + IOPolledCompletion completion) = 0; + + virtual IOReturn checkForWork(void) = 0; + + static IOReturn checkAllForWork(void); + + OSMetaClassDeclareReservedUnused(IOPolledInterface, 0); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 1); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 2); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 3); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 4); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 5); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 6); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 7); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 8); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 9); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 10); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 11); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 12); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 13); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 14); + OSMetaClassDeclareReservedUnused(IOPolledInterface, 15); +}; + diff --git a/iokit/IOKit/IORangeAllocator.h b/iokit/IOKit/IORangeAllocator.h index 761084e84..f222cacaf 100644 --- a/iokit/IOKit/IORangeAllocator.h +++ b/iokit/IOKit/IORangeAllocator.h @@ -36,9 +36,10 @@ typedef UInt32 IORangeScalar; -/*! @class IORangeAllocator : public OSObject +/*! @class IORangeAllocator @abstract A utility class to manage allocations from a range. - @discussion The IORangeAllocator class provides functions for allocating ranges, at a fixed or any offset, and freeing them back to a free list. It is useful for describing ranges of memory or address space without requiring storage in the memory - information describing the free elements is kept elsewhere. Ranges are described by a start offset and a size. IORangeAllocator is optionally protected against multithreaded access. */ + @discussion The IORangeAllocator class provides functions for allocating ranges, at a fixed or any offset, and freeing them back to a free list. It is useful for describing ranges of memory or address space without requiring storage in the memory - information describing the free elements is kept elsewhere. Ranges are described by a start offset and a size. IORangeAllocator is optionally protected against multithreaded access. +*/ class IORangeAllocator : public OSObject { @@ -66,11 +67,11 @@ public: /*! @function init @abstract Standard initializer for IORangeAllocator. @discussion This method initializes an IORangeAllocator and optionally sets the free list to contain one fragment, from zero to an endOfRange parameter. The capacity in terms of free fragments and locking options are set for the instance. - @param endOfRange If the free list is to contain an initial fragment, set endOfRange to the last offset in the range, ie. size - 1, to create a free fragment for the range zero to endOfRange inclusive. If zero is passed the free list will be initialized empty, and can be populated with calls to the deallocate method. + @param endOfRange If the free list is to contain an initial fragment, set endOfRange to the last offset in the range, ie. size - 1, to create a free fragment for the range zero to endOfRange inclusive. If zero is passed, the free list will be initialized empty, and can be populated with calls to the deallocate method. @param defaultAlignment If this parameter is non-zero it specifies a required alignment for all allocations, for example pass 256 to align allocations on 256 byte boundaries. Zero or one specify unaligned allocations. - @param capacity Sets the initial size of the free list in number of non-contiguous fragments. This value is also used for the capacityIncrement. + @param capacity Sets the initial size of the free list in number of noncontiguous fragments. This value is also used for the capacityIncrement. @param options Pass kLocking if the instance can be used by multiple threads. - @result Returns true if the instance is successfully initialize, false on failure. */ + @result Returns true if the instance is successfully initialized, false on failure. */ virtual bool init( IORangeScalar endOfRange, IORangeScalar defaultAlignment, @@ -96,58 +97,65 @@ public: /*! @function getFragmentCount @abstract Accessor to return the number of free fragments in the range. @discussion This method returns a count of free fragments. Each fragment describes a non-contiguous free range - deallocations will merge contiguous fragments together. - @result The count of free fragments. */ + @result Returns the count of free fragments. +*/ virtual UInt32 getFragmentCount( void ); /*! @function getFragmentCapacity @abstract Accessor to return the number of free fragments in the range. @discussion This method returns the current capacity of the free fragment list. - @result The current capacity of free fragment list. */ + @result Returns the current capacity of free fragment list. +*/ virtual UInt32 getFragmentCapacity( void ); /*! @function setFragmentCapacityIncrement @abstract Sets the count of fragments the free list will increase by when full. - @discussion This method sets the number of extra fragments the free list will expand to when full. It defaults to the initial capacity. - @param count The number of fragments to increment the capacity by when the free list is full. */ + @discussion This method sets the number of extra fragments the free list will expand to when full. It defaults to the initial capacity. + @param count The number of fragments to increment the capacity by when the free list is full. +*/ virtual void setFragmentCapacityIncrement( UInt32 count ); /*! @function getFreeCount @abstract Totals the sizes of the free fragments. @discussion This method returns the total of the sizes of the fragments on the free list. - @result The total of the free fragments sizes. */ + @result Returns the total of the free fragments sizes. +*/ virtual IORangeScalar getFreeCount( void ); /*! @function allocate - @abstract Allocate from the free list, at any offset. + @abstract Allocates from the free list, at any offset. @discussion This method allocates a range from the free list. The alignment will default to the alignment set when the allocator was created or may be set here. @param size The size of the range requested. @param result The beginning of the range allocated is returned here on success. @param alignment If zero is passed, default to the allocators alignment, otherwise pass an alignment required for the allocation, for example 4096 to page align. - @result True if the allocation was successful, else false. */ + @result Returns true if the allocation was successful, else false. +*/ virtual bool allocate( IORangeScalar size, IORangeScalar * result, IORangeScalar alignment = 0 ); /*! @function allocateRange - @abstract Allocate from the free list, at a set offset. + @abstract Allocates from the free list, at a set offset. @discussion This method allocates a range from the free list, given a set offset passed in. @param start The beginning of the range requested. @param size The size of the range requested. - @result True if the allocation was successful, else false. */ + @result Returns true if the allocation was successful, else false. +*/ virtual bool allocateRange( IORangeScalar start, IORangeScalar size ); /*! @function deallocate - @abstract Deallocate a range to the free list. + @abstract Deallocates a range to the free list. @discussion This method deallocates a range to the free list, given a the start offset and length passed in. @param start The beginning of the range requested. - @param size The size of the range requested. */ + @param size Returns the size of the range requested. +*/ virtual void deallocate( IORangeScalar start, IORangeScalar size ); diff --git a/iokit/IOKit/IORegistryEntry.h b/iokit/IOKit/IORegistryEntry.h index 315945052..ffb9d6844 100644 --- a/iokit/IOKit/IORegistryEntry.h +++ b/iokit/IOKit/IORegistryEntry.h @@ -91,6 +91,8 @@ public: IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; + OSMetaClassDeclareReservedUsed(IORegistryEntry, 0); + /*! @function copyProperty @abstract Synchronized method to obtain a property from a registry entry or one of its parents (or children) in the hierarchy. Available in Mac OS X 10.1 or later. @discussion This method will search for a property, starting first with this registry entry's property table, then iterating recusively through either the parent registry entries or the child registry entries of this entry. Once the first occurrence is found, it will lookup and return the value of the property, using the OSDictionary::getObject semantics. The iteration keeps track of entries that have been recursed into previously to avoid loops. This method is synchronized with other IORegistryEntry accesses to the property table(s). @@ -104,6 +106,7 @@ public: IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; + OSMetaClassDeclareReservedUsed(IORegistryEntry, 1); /*! @function copyProperty @abstract Synchronized method to obtain a property from a registry entry or one of its parents (or children) in the hierarchy. Available in Mac OS X 10.1 or later. @@ -118,6 +121,7 @@ public: IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; + OSMetaClassDeclareReservedUsed(IORegistryEntry, 2); /*! @function copyParentEntry @abstract Returns an registry entry's first parent entry in a plane. Available in Mac OS X 10.1 or later. @@ -126,6 +130,7 @@ public: @result Returns the first parent of the registry entry, or zero if the entry is not attached into the registry in that plane. A reference on the entry is returned to caller, which should be released. */ virtual IORegistryEntry * copyParentEntry( const IORegistryPlane * plane ) const; + OSMetaClassDeclareReservedUsed(IORegistryEntry, 3); /*! @function copyChildEntry @abstract Returns an registry entry's first child entry in a plane. Available in Mac OS X 10.1 or later. @@ -134,16 +139,44 @@ public: @result Returns the first child of the registry entry, or zero if the entry is not attached into the registry in that plane. A reference on the entry is returned to caller, which should be released. */ virtual IORegistryEntry * copyChildEntry( const IORegistryPlane * plane ) const; + OSMetaClassDeclareReservedUsed(IORegistryEntry, 4); -private: + /* method available in Mac OS X 10.4 or later */ +/*! + @typedef Action + @discussion Type and arguments of callout C function that is used when +a runCommand is executed by a client. Cast to this type when you want a C++ +member function to be used. Note the arg1 - arg3 parameters are passed straight pass through to the action callout. + @param target + Target of the function, can be used as a refcon. Note if a C++ function +was specified, this parameter is implicitly the first parameter in the target +member function's parameter list. + @param arg0 Argument to action from run operation. + @param arg1 Argument to action from run operation. + @param arg2 Argument to action from run operation. + @param arg3 Argument to action from run operation. +*/ + typedef IOReturn (*Action)(OSObject *target, + void *arg0, void *arg1, + void *arg2, void *arg3); + +/*! @function runPropertyAction + @abstract Single thread a call to an action w.r.t. the property lock + @discussion Client function that causes the given action to be called in a manner that syncrhonises with the registry iterators and serialisers. This functin can be used to synchronously manipulate the property table of this nub + @param action Pointer to function to be executed in work-loop context. + @param arg0 Parameter for action parameter, defaults to 0. + @param arg1 Parameter for action parameter, defaults to 0. + @param arg2 Parameter for action parameter, defaults to 0. + @param arg3 Parameter for action parameter, defaults to 0. + @result Returns the value of the Action callout. +*/ + virtual IOReturn runPropertyAction(Action action, OSObject *target, + void *arg0 = 0, void *arg1 = 0, + void *arg2 = 0, void *arg3 = 0); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 5); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 0); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 1); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 2); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 3); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 4); +private: - OSMetaClassDeclareReservedUnused(IORegistryEntry, 5); OSMetaClassDeclareReservedUnused(IORegistryEntry, 6); OSMetaClassDeclareReservedUnused(IORegistryEntry, 7); OSMetaClassDeclareReservedUnused(IORegistryEntry, 8); @@ -505,11 +538,11 @@ public: /*! @function inPlane @abstract Determines whether a registry entry is attached in a plane. - @discussion This method determines if the entry is attached in a plane to any other entry. - @param plane The plane object. - @result If the entry has a parent in the plane, true is returned, otherwise false is returned. */ + @discussion This method determines if the entry is attached in a plane to any other entry. It can also be used to determine if the entry is a member of any plane. + @param plane The plane object, 0 indicates any plane. + @result If the entry has a parent in the given plane or if plane = 0 then if entry has any parent; return true, otherwise false. */ - virtual bool inPlane( const IORegistryPlane * plane ) const; + virtual bool inPlane( const IORegistryPlane * plane = 0) const; /*! @function getDepth @abstract Counts the maximum number of entries between an entry and the registry root, in a plane. diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index 8feb011e8..012585340 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -39,18 +39,21 @@ extern "C" { typedef kern_return_t IOReturn; #ifndef sys_iokit -#define sys_iokit err_system(0x38) +#define sys_iokit err_system(0x38) #endif /* sys_iokit */ -#define sub_iokit_common err_sub(0) -#define sub_iokit_usb err_sub(1) -#define sub_iokit_firewire err_sub(2) -#define sub_iokit_block_storage err_sub(4) -#define sub_iokit_graphics err_sub(5) -#define sub_iokit_bluetooth err_sub(8) -#define sub_iokit_pmu err_sub(9) -#define sub_iokit_reserved err_sub(-1) -#define iokit_common_err(return) (sys_iokit|sub_iokit_common|return) -#define iokit_family_err(sub,return) (sys_iokit|sub|return) +#define sub_iokit_common err_sub(0) +#define sub_iokit_usb err_sub(1) +#define sub_iokit_firewire err_sub(2) +#define sub_iokit_block_storage err_sub(4) +#define sub_iokit_graphics err_sub(5) +#define sub_iokit_bluetooth err_sub(8) +#define sub_iokit_pmu err_sub(9) +#define sub_iokit_vendor_specific err_sub(-2) +#define sub_iokit_reserved err_sub(-1) + +#define iokit_common_err(return) (sys_iokit|sub_iokit_common|return) +#define iokit_family_err(sub,return) (sys_iokit|sub|return) +#define iokit_vendor_specific_err(return) (sys_iokit|sub_iokit_vendor_specific|return) #define kIOReturnSuccess KERN_SUCCESS // OK #define kIOReturnError iokit_common_err(0x2bc) // general error diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index b86ea1b4d..71c47184e 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -314,12 +314,24 @@ public: virtual bool didTerminate( IOService * provider, IOOptionBits options, bool * defer ); + /* method available in Mac OS X 10.4 or later */ +/*! @function nextIdleTimeout + @abstract Allows subclasses to customize idle power management behavior. + @discussion Returns the next time that the device should idle into its next lower power state. Subclasses may override for custom idle behavior. + @param currentTime The current time + @param lastActivity The time of last activity on this device + @param powerState The device's current power state. + @result Returns the next time the device should idle off (in seconds, relative to the current time). */ + + virtual SInt32 nextIdleTimeout(AbsoluteTime currentTime, + AbsoluteTime lastActivity, unsigned int powerState); + private: OSMetaClassDeclareReservedUsed(IOService, 0); OSMetaClassDeclareReservedUsed(IOService, 1); OSMetaClassDeclareReservedUsed(IOService, 2); + OSMetaClassDeclareReservedUsed(IOService, 3); - OSMetaClassDeclareReservedUnused(IOService, 3); OSMetaClassDeclareReservedUnused(IOService, 4); OSMetaClassDeclareReservedUnused(IOService, 5); OSMetaClassDeclareReservedUnused(IOService, 6); diff --git a/iokit/IOKit/IOServicePM.h b/iokit/IOKit/IOServicePM.h index 965201f2a..55e34fc96 100644 --- a/iokit/IOKit/IOServicePM.h +++ b/iokit/IOKit/IOServicePM.h @@ -50,7 +50,7 @@ notification from an interested driver or the controlling driver its ack timer i /*! -@class IOPMpriv : public OSObject +@class IOPMpriv @abstract Private power management private instance variables for IOService objects. */ class IOPMpriv : public OSObject @@ -61,116 +61,173 @@ class IOPMpriv : public OSObject public: -/*! @field we_are_root TRUE if this device is the root power domain */ +/*! @var we_are_root + TRUE if this device is the root power domain. +*/ bool we_are_root; - /*! @field interestedDrivers list of interested drivers */ + /*! @var interestedDrivers + List of interested drivers. + */ IOPMinformeeList * interestedDrivers; - /*! @field children list of power domain children */ + /*! @var children + List of power domain children. + */ IOPMinformeeList * children; - /*! @field changeList list of pending power state changes */ + /*! @var changeList + List of pending power state changes. + */ IOPMchangeNoteList * changeList; - /*! @field driver_timer timeout on waiting for controlling driver to acknowledgeSetPowerState */ + /*! @var driver_timer + Timeout on waiting for controlling driver to acknowledgeSetPowerState. + */ IOReturn driver_timer; - /*! @field ackTimer */ + /*! @var ackTimer */ thread_call_t ackTimer; - /*! @field settleTimer */ + /*! @var settleTimer */ thread_call_t settleTimer; - /*! @field machine_state state number of state machine processing current change note */ + /*! @var machine_state + State number of state machine processing current change note. + */ unsigned long machine_state; - /*! @field settle_time settle timer after changing power state */ + /*! @var settle_time + Settle timer after changing power state. + */ unsigned long settle_time; - /*! @field head_note ordinal of change note currently being processed */ + /*! @var head_note + Ordinal of change note currently being processed. + */ long head_note; - /*! @field head_note_flags copy of flags field in change note currently being processed*/ + /*! @var head_note_flags + Copy of flags field in change note currently being processed. + */ unsigned long head_note_flags; - /*! @field head_note_state copy of newStateNumberfield in change note currently being processed */ + /*! @var head_note_state + Copy of newStateNumberfield in change note currently being processed. + */ unsigned long head_note_state; - /*! @field head_note_outputFlags outputPowerCharacter field from change note currently being processed */ + /*! @var head_note_outputFlags + OutputPowerCharacter field from change note currently being processed. + */ unsigned long head_note_outputFlags; - /*! @field head_note_domainState power domain flags from parent... (only on parent change) */ + /*! @var head_note_domainState + Power domain flags from parent... (only on parent change). + */ unsigned long head_note_domainState; - /*! @field head_note_parent pointer to initiating parent... (only on parent change) */ + /*! @var head_note_parent + Pointer to initiating parent... (only on parent change). + */ IOPowerConnection * head_note_parent; - /*! @field head_note_capabilityFlags copy of capabilityFlags field in change note currently being processed */ + /*! @var head_note_capabilityFlags + Copy of capabilityFlags field in change note currently being processed. + */ unsigned long head_note_capabilityFlags; - /*! @field head_note_pendingAcks number of acks we are waiting for during notification */ + /*! @var head_note_pendingAcks + Number of acks we are waiting for during notification. + */ unsigned long head_note_pendingAcks; - /*! @field our_lock used to control access to head_note_pendingAcks and driver_timer */ + /*! @var our_lock + Used to control access to head_note_pendingAcks and driver_timer. + */ IOLock * our_lock; - /*! @field flags_lock used to control access to response flags array */ + /*! @var flags_lock + Used to control access to response flags array. + */ IOLock * flags_lock; - /*! @field queue_lock used to control access to change note queue */ + /*! @var queue_lock + Used to control access to change note queue. + */ IOLock * queue_lock; - /*! @field initial_change true forces first state to be broadcast even if it isn't a change */ + /*! @var initial_change + True forces first state to be broadcast even if it isn't a change. + */ bool initial_change; - /*! @field need_to_become_usable someone called makeUsable before we had a controlling driver */ + /*! @var need_to_become_usable + Someone called makeUsable before we had a controlling driver. + */ bool need_to_become_usable; - /*! @field device_overrides state changes are made based only on subclass's desire */ + /*! @var device_overrides + State changes are made based only on subclass's desire. + */ bool device_overrides; - /*! @field clampOn domain is clamped on till first child registers */ + /*! @var clampOn + Domain is clamped on till first child registers. + */ bool clampOn; - /*! @field owner points to object which made this struct. Used for debug output only */ + /*! @var owner + Points to object which made this struct. Used for debug output only. + */ IOService * owner; - /*! @field activityLock used to protect activity flag */ + /*! @var activityLock + Used to protect activity flag. + */ IOLock * activityLock; - /*! @field timerEventSrc an idle timer */ + /*! @var timerEventSrc + An idle timer. + */ IOTimerEventSource * timerEventSrc; - /*! @field idle_timer_period its period in seconds */ + /*! @var idle_timer_period + Timer's period in seconds. + */ unsigned long idle_timer_period; - /*! @field clampTimerEventSrc timer for clamping power on */ + /*! @var clampTimerEventSrc + Timer for clamping power on. + */ IOTimerEventSource * clampTimerEventSrc; - /*! @field device_active true: there has been device activity since last idle timer expiration */ + /*! @var device_active + True: there has been device activity since last idle timer expiration. + */ bool device_active; - /*! @field device_active_timestamp time in ticks of last activity */ + /*! @var device_active_timestamp + Time in ticks of last activity. + */ AbsoluteTime device_active_timestamp; - /*! @field driverDesire -This is the power state desired by our controlling driver. It is initialized to myCurrentState and is changed -when the controlling driver calls changePowerStateTo. A change in driverDesire may cause a change in ourDesiredPowerState. + /*! @var driverDesire + This is the power state desired by our controlling driver. It is initialized to myCurrentState and is changed + when the controlling driver calls changePowerStateTo. A change in driverDesire may cause a change in ourDesiredPowerState. */ unsigned long driverDesire; - /*! @field deviceDesire -This is the power state desired by a subclassed device object. It is initialized to myCurrentState and is changed -when the subclassed object calls changePowerStateToPriv. A change in deviceDesire may cause a change in ourDesiredPowerState. + /*! @var deviceDesire + This is the power state desired by a subclassed device object. It is initialized to myCurrentState and is changed when the subclassed object calls changePowerStateToPriv. A change in deviceDesire may cause a change in ourDesiredPowerState. */ unsigned long deviceDesire; - /*! @field ourDesiredPowerState + /*! @var ourDesiredPowerState This is the power state we desire currently. If equal to myCurrentState, we're happy. Otherwise, we're waiting for the parent to raise the power domain to at least this level. @@ -197,7 +254,7 @@ a subclassed object asks for lower power for some reason via changePowerStateToP unsigned long ourDesiredPowerState; - /*! @field previousRequest + /*! @var previousRequest This is a reminder of what our parent thinks our need is. Whenever it changes, we call requestDomainState in the parent to keep it current. It is usually equal to ourDesiredPowerState except while a power change is in progress. @@ -205,19 +262,19 @@ except while a power change is in progress. unsigned long previousRequest; - /*! @field askingFor -Not used. + /*! @var askingFor + Not used. */ unsigned long askingFor; - /*! @field imminentState -Usually the same as myCurrentState, except right after calling powerStateWillChangeTo. + /*! @var imminentState + Usually the same as myCurrentState, except right after calling powerStateWillChangeTo. */ unsigned long imminentState; /*! @function serialize -Serialize private instance variables for debug output (IORegistryDumper). + Serialize private instance variables for debug output (IORegistryDumper). */ virtual bool serialize(OSSerialize *s) const; @@ -227,7 +284,7 @@ Serialize private instance variables for debug output (IORegistryDumper). /*! -@class IOPMprot : public OSObject +@class IOPMprott @abstract Protected power management instance variables for IOService objects. */ class IOPMprot : public OSObject //management @@ -238,72 +295,115 @@ class IOPMprot : public OSObject //management public: - /*! @field ourName from getName(), used in logging */ + /*! @var ourName + From getName(), used in logging. + */ const char * ourName; - /*! @field thePlatform from getPlatform, used in logging and registering */ + /*! @var thePlatform + From getPlatform, used in logging and registering. + */ IOPlatformExpert * thePlatform; - /*! @field theNumberOfPowerStates the number of states in the array */ + /*! @var theNumberOfPowerStates + The number of states in the array. + */ unsigned long theNumberOfPowerStates; // the number of states in the array - /*! @field thePowerStates the array */ + /*! @var thePowerStates + The array. + */ IOPMPowerState thePowerStates[IOPMMaxPowerStates]; - /*! @field theControllingDriver points to the controlling driver */ + /*! @var theControllingDriver + Points to the controlling driver. + */ IOService * theControllingDriver; - /*! @field aggressiveness current value of power management aggressiveness */ + /*! @var aggressiveness + Current value of power management aggressiveness. + */ unsigned long aggressiveness; - /*! @field current_aggressiveness_values array of aggressiveness values */ + /*! @var current_aggressiveness_values + Array of aggressiveness values. + */ unsigned long current_aggressiveness_values [kMaxType+1]; - /*! @field current_aggressiveness_validity true for values that are currently valid */ + /*! @var current_aggressiveness_validity + True for values that are currently valid. + */ bool current_aggressiveness_valid [kMaxType+1]; - /*! @field myCurrentState the ordinal of our current power state */ + /*! @var myCurrentState + The ordinal of our current power state. + */ unsigned long myCurrentState; - /*! @field parentsKnowState true if all our parents know the state of their power domain */ + /*! @var parentsKnowState + True if all our parents know the state of their power domain. + */ bool parentsKnowState; - /*! @field parentsCurrentPowerFlags logical OR of power flags for the current state of each power domainparent */ + /*! @var parentsCurrentPowerFlags + Logical OR of power flags for the current state of each power domainparent. + */ IOPMPowerFlags parentsCurrentPowerFlags; - /*! @field maxCapability ordinal of highest state we can achieve in current power domain state */ + /*! @var maxCapability + Ordinal of highest state we can achieve in current power domain state. + */ unsigned long maxCapability; - /*! @field PMworkloop points to the single power management workloop */ + /*! @var PMworkloop + Points to the single power management workloop. + */ IOWorkLoop * PMworkloop; - /*! @field commandQueue used to serialize idle-power-down and busy-power-up */ + /*! @var commandQueue + Used to serialize idle-power-down and busy-power-up. + */ IOCommandQueue * commandQueue; - /*! @field PMcommandGate used to serialize timer expirations and incoming acknowledgements */ + /*! @var PMcommandGate + Used to serialize timer expirations and incoming acknowledgements. + */ IOCommandGate * PMcommandGate; - /*! @field myCharacterFlags logical OR of all output power character flags in the array */ + /*! @var myCharacterFlags + Logical OR of all output power character flags in the array. + */ IOPMPowerFlags myCharacterFlags; - /*! @field serialNumber used to uniquely identify power management notification to apps and clients */ + /*! @var serialNumber + Used to uniquely identify power management notification to apps and clients. + */ UInt16 serialNumber; - /*! @field responseFlags points to an OSArray which manages responses from notified apps and clients */ + /*! @var responseFlags + Points to an OSArray which manages responses from notified apps and clients. + */ OSArray* responseFlags; - /*! @field doNotPowerDown keeps track of any negative responses from notified apps and clients */ + /*! @var doNotPowerDown + Keeps track of any negative responses from notified apps and clients. + */ bool doNotPowerDown; - /*! @field childLock used to serialize scanning the children */ + /*! @var childLock + Used to serialize scanning the children. + */ IOLock * childLock; - /*! @field parentLock used to serialize scanning the parents */ + /*! @var parentLock + Used to serialize scanning the parents. + */ IOLock * parentLock; - /*! @field outofbandparameter used to communicate desired function to tellClientsWithResponse(). - This is used because it avoids changing the signatures of the affected - virtual methods. */ + /*! @var outofbandparameter + Used to communicate desired function to tellClientsWithResponse(). + This is used because it avoids changing the signatures of the affected virtual methods. + */ int outofbandparameter; /*! @function serialize diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index cf07474f8..d6c42e12b 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -147,10 +147,6 @@ IOTimeStamp(unsigned int csc, /* DBG_IOKIT/DBG_IOMDESC codes */ /* DBG_IOKIT/DBG_IOPOWER codes */ -#define IOPOWER_ROOT 1 /* 0x05100004 */ -#define IOPOWER_WAKE 2 /* 0x05100008 */ -#define IOPOWER_STATE 3 /* 0x0510000c */ -#define IOPOWER_ACK 4 /* 0x05100010 */ -#define IOPOWER_CLIENT 5 /* 0x05100014 */ +// See IOKit/pwr_mgt/IOPMlog.h for the power management codes #endif /* ! IOKIT_IOTIMESTAMP_H */ diff --git a/iokit/IOKit/IOTimerEventSource.h b/iokit/IOKit/IOTimerEventSource.h index fd7335821..80fb27309 100644 --- a/iokit/IOKit/IOTimerEventSource.h +++ b/iokit/IOKit/IOTimerEventSource.h @@ -62,9 +62,13 @@ protected: AbsoluteTime abstime; /*! @struct ExpansionData - @discussion This structure will be used to expand the capablilties of the IOWorkLoop in the future. + @discussion This structure is private to the IOTimerEventSource implementation. */ - struct ExpansionData { }; + struct ExpansionData + { + SInt32 calloutGeneration; + IOWorkLoop * workLoop; + }; /*! @var reserved Reserved for future use. (Internal use only) */ @@ -89,6 +93,8 @@ protected: @abstract Have to implement it is mandatory in $link IOEventSource, but IOTimerEventSources don't actually use this work-loop mechanism. */ virtual bool checkForWork(); + virtual void setWorkLoop(IOWorkLoop *workLoop); + public: /*! @typedef Action @@ -203,6 +209,9 @@ public: @discussion Clear down any oustanding calls. By the time this function completes it is guaranteed that the action will not be called again. */ virtual void cancelTimeout(); +private: + static void timeoutAndRelease(void *self, void *wl); + private: OSMetaClassDeclareReservedUnused(IOTimerEventSource, 0); OSMetaClassDeclareReservedUnused(IOTimerEventSource, 1); diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index 23399b939..417fe2020 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ typedef UInt32 IOCacheMode; typedef UInt32 IOByteCount; + /* LP64todo - these will need to expand to mach_vm_address_t */ typedef vm_address_t IOVirtualAddress; typedef IOVirtualAddress IOLogicalAddress; @@ -151,11 +152,12 @@ typedef mach_port_t io_object_t; #include typedef io_object_t io_connect_t; +typedef io_object_t io_enumerator_t; typedef io_object_t io_iterator_t; typedef io_object_t io_registry_entry_t; typedef io_object_t io_service_t; -typedef io_object_t io_enumerator_t; +#define IO_OBJECT_NULL ((io_object_t) 0) #endif /* MACH_KERNEL */ @@ -189,7 +191,8 @@ enum { kIOMapReadOnly = 0x00001000, kIOMapStatic = 0x01000000, - kIOMapReference = 0x02000000 + kIOMapReference = 0x02000000, + kIOMapUnique = 0x04000000 }; /*! @enum Scale Factors @@ -197,13 +200,15 @@ enum { @constant kNanosecondScale Scale factor for nanosecond based times. @constant kMicrosecondScale Scale factor for microsecond based times. @constant kMillisecondScale Scale factor for millisecond based times. + @constant kTickScale Scale factor for the standard (100Hz) tick. @constant kSecondScale Scale factor for second based times. */ enum { kNanosecondScale = 1, kMicrosecondScale = 1000, kMillisecondScale = 1000 * 1000, - kSecondScale = 1000 * 1000 * 1000 + kSecondScale = 1000 * 1000 * 1000, + kTickScale = (kSecondScale / 100) }; /* compatibility types */ diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index e6a891c50..d5c1d23d2 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -79,6 +79,12 @@ enum { #define kIOClientPrivilegeAdministrator "root" #define kIOClientPrivilegeLocalUser "local" +/*! + @class IOUserClient + @abstract Provides a basis for communication between client applications and I/O Kit objects. +*/ + + class IOUserClient : public IOService { OSDeclareAbstractStructors(IOUserClient) @@ -86,11 +92,12 @@ class IOUserClient : public IOService protected: /*! @struct ExpansionData @discussion This structure will be used to expand the capablilties of this class in the future. - */ +*/ struct ExpansionData { }; /*! @var reserved - Reserved for future use. (Internal use only) */ + Reserved for future use. (Internal use only) +*/ ExpansionData * reserved; public: @@ -174,9 +181,9 @@ public: /*! @function exportObjectToClient Make an arbitrary OSObject available to the client task. - @param task The task - @param obj The object we want to export to the client - @param clientObj returned value is the client's port name. + @param task The task. + @param obj The object we want to export to the client. + @param clientObj Returned value is the client's port name. */ virtual IOReturn exportObjectToClient(task_t task, OSObject *obj, io_object_t *clientObj); diff --git a/iokit/IOKit/IOWorkLoop.h b/iokit/IOKit/IOWorkLoop.h index f3c6e1a1e..f71331864 100644 --- a/iokit/IOKit/IOWorkLoop.h +++ b/iokit/IOKit/IOWorkLoop.h @@ -39,14 +39,16 @@ HISTORY #include class IOEventSource; +class IOTimerEventSource; class IOCommandGate; -/*! @class IOWorkLoop : public OSObject - @discussion An IOWorkLoop is a thread of control that is intended to be used to provide single threaded access to hardware. This class has no knowledge of the nature and type of the events that it marshals and forwards. When an device driver sucessfully starts, See $link IOService::start it is expected to create the event sources it will need to receive events from. Then a work loop is initialised and the events are added to the work loop for monitoring. In general this set up will be automated by the family superclass of the specific device. +/*! @class IOWorkLoop + @discussion An IOWorkLoop is a thread of control that is intended to be used to provide single threaded access to hardware. This class has no knowledge of the nature and type of the events that it marshals and forwards. When a device driver successfully starts (see IOService::start), it is expected to create the event sources it will need to receive events. Then a work loop is initialized and the events are added to the work loop for monitoring. In general this set up will be automated by the family superclass of the specific device.

- The thread main method walks the event source linked list and messages each one requesting a work check. At this point each event source is expected to notify their registered owner that the event has occured. After each event has been walked and they indicate that another loop isn't required by the 'more' flag being false the thread will go to sleep on a signaling semaphore. + The thread main method walks the event source linked list and messages each one requesting a work check. At this point each event source is expected to notify its registered owner that the event has occurred. After each event has been walked and each indicates that another loop isn't required (by setting the 'more' flag to false) the thread will go to sleep on a signaling semaphore.

- When an event source is registered with a work loop it is informed of the semaphore to use to wake up the loop.*/ + When an event source is registered with a work loop it is informed of the semaphore to use to wake up the loop. +*/ class IOWorkLoop : public OSObject { OSDeclareDefaultStructors(IOWorkLoop) @@ -60,7 +62,7 @@ member function to be used. Note the arg1 - arg3 parameters are straight pass through from the runCommand to the action callout. @param target Target of the function, can be used as a refcon. Note if a C++ function -was specified this parameter is implicitly the first paramter in the target +was specified, this parameter is implicitly the first parameter in the target member function's parameter list. @param arg0 Argument to action from run operation. @param arg1 Argument to action from run operation. @@ -72,38 +74,43 @@ member function's parameter list. void *arg2, void *arg3); private: -/*! @function launchThreadMain - @abstract Static function that setup thread state and calls the continuation function, $link threadMainContinuation */ - static void launchThreadMain(void *self); - /*! @function threadMainContinuation - @abstract Static function that calls the $link threadMain function. */ - static void threadMainContinuation(); + @abstract Static function that calls the threadMain function. +*/ + static void threadMainContinuation(IOWorkLoop *self); protected: /*! @typedef maintCommandEnum - @discussion Enumeration of commands that $link _maintCommand can deal with. - @enum + @discussion Enumeration of commands that _maintCommand can deal with. @constant mAddEvent Used to tag a Remove event source command. - @constant mRemoveEvent Used to tag a Remove event source command. */ + @constant mRemoveEvent Used to tag a Remove event source command. +*/ typedef enum { mAddEvent, mRemoveEvent } maintCommandEnum; /*! @var gateLock - Mutual exlusion lock that used by close and open Gate functions. */ + Mutual exclusion lock that is used by close and open Gate functions. +*/ IORecursiveLock *gateLock; -/*! @var eventChain Pointer to first Event Source in linked list. */ +/*! @var eventChain + Pointer to first event source in linked list. +*/ IOEventSource *eventChain; -/*! @var controlG Internal control gate to maintain event system. */ +/*! @var controlG + Internal control gate to maintain event system. +*/ IOCommandGate *controlG; /*! @var workSpinLock - The spin lock that is used to guard the 'workToDo' variable. */ + The spin lock that is used to guard the 'workToDo' variable. +*/ IOSimpleLock *workToDoLock; -/*! @var workThread Work loop thread. */ +/*! @var workThread + Work loop thread. +*/ IOThread workThread; /*! @var workToDo @@ -112,96 +119,114 @@ protected: volatile bool workToDo; /*! @var loopRestart - If event chain has been changed and the system has to be rechecked from start this flag is set. (Internal use only) */ + Set if an event chain has been changed and the system has to be rechecked from start. (Internal use only) +*/ bool loopRestart; /*! @struct ExpansionData @discussion This structure will be used to expand the capablilties of the IOWorkLoop in the future. - */ +*/ struct ExpansionData { }; /*! @var reserved - Reserved for future use. (Internal use only) */ + Reserved for future use. (Internal use only) +*/ ExpansionData *reserved; /*! @function _maintRequest - @abstract Synchrounous implementation of $link addEventSource & $link removeEventSource functions. */ + @abstract Synchronous implementation of addEventSource and removeEventSource functions. + @discussion This function implements the commands as defined in the maintCommandEnum. It can be subclassed but it isn't an external API in the usual sense. A subclass implementation of _maintRequest would be called synchronously with respect to the work loop and it should be implemented in the usual way that an ioctl would be. + @return kIOReturnUnsupported if the command given is not implemented, kIOReturnSuccess otherwise. +*/ virtual IOReturn _maintRequest(void *command, void *data, void *, void *); /*! @function free - @discussion Mandatory free of the object independent of the current retain count. If the work loop is running this method will not return until the thread has succefully terminated. Each event source in the chain will be released and the working semaphore will be destroyed. + @discussion Mandatory free of the object independent of the current retain count. If the work loop is running, this method will not return until the thread has successfully terminated. Each event source in the chain will be released and the working semaphore will be destroyed.

- If the client has some outstanding requests on an event they will never be informed of completion. If an external thread is blocked on any of the event sources they will be awoken with a KERN_INTERUPTED status. */ + If the client has some outstanding requests on an event they will never be informed of completion. If an external thread is blocked on any of the event sources they will be awakened with a KERN_INTERUPTED status. +*/ virtual void free(); /*! @function threadMain - @discussion Work loop threads main function. This function consists of 3 loops: the outermost loop is the semaphore clear and wait loop, the middle loop terminates when there is no more work and the inside loop walks the event list calling the $link checkForWork method in each event source. If an event source has more work to do then it can set the more flag and the middle loop will repeat. When no more work is outstanding the outermost will sleep until and event is signaled or the least wakeupTime whichever occurs first. If the event source does not require the semaphore wait to time out it must set the provided wakeupTime parameter to zero. */ + @discussion Work loop threads main function. This function consists of 3 loops: the outermost loop is the semaphore clear and wait loop, the middle loop terminates when there is no more work, and the inside loop walks the event list calling the checkForWork method in each event source. If an event source has more work to do, it can set the more flag and the middle loop will repeat. When no more work is outstanding the outermost will sleep until an event is signalled or the least wakeupTime, whichever occurs first. If the event source does not require the semaphore wait to time out, it must set the provided wakeupTime parameter to zero. +*/ virtual void threadMain(); public: /*! @function workLoop - @abstract Factory member function to constuct and intialise a work loop. - @result workLoop instance if constructed successfully, 0 otherwise. */ + @abstract Factory member function to constuct and intialize a work loop. + @result Returns a workLoop instance if constructed successfully, 0 otherwise. +*/ static IOWorkLoop *workLoop(); /*! @function init - @description - Initialises an instance of the workloop. This method creates and initialses the signaling semaphore and forks the thread that will continue executing. - @result true if initialised successfully, false otherwise. */ + @discussion Initializes an instance of the workloop. This method creates and initialses the signaling semaphore and forks the thread that will continue executing. + @result Returns true if initialized successfully, false otherwise. +*/ virtual bool init(); /*! @function getThread - @abstract Get'ter for $link workThread. - @result Returns workThread */ + @abstract Gets the workThread. + @result Returns workThread. +*/ virtual IOThread getThread() const; /*! @function onThread @abstract Is the current execution context on the work thread? - @result Returns true if IOThreadSelf() == workThread. */ + @result Returns true if IOThreadSelf() == workThread. +*/ virtual bool onThread() const; /*! @function inGate @abstract Is the current execution context holding the work-loop's gate? - @result Returns true if IOThreadSelf() is gate holder. */ + @result Returns true if IOThreadSelf() is gate holder. +*/ virtual bool inGate() const; /*! @function addEventSource - @discussion Add an event source to be monitored by the work loop. This function does not return until the work loop has acknowledged the arrival of the new event source. When a new event has been added the threadMain will always restart it's loop and check all outstanding events. The event source is retained by the work loop - @param newEvent Pointer to $link IOEventSource subclass to add. - @result Always returns kIOReturnSuccess. */ + @discussion Add an event source to be monitored by the work loop. This function does not return until the work loop has acknowledged the arrival of the new event source. When a new event has been added the threadMain will always restart its loop and check all outstanding events. The event source is retained by the work loop. + @param newEvent Pointer to IOEventSource subclass to add. + @result Always returns kIOReturnSuccess. +*/ virtual IOReturn addEventSource(IOEventSource *newEvent); /*! @function removeEventSource - @discussion Remove an event source from the work loop. This function does not return until the work loop has acknowledged the removal of the event source. When an event has been removed the threadMain will always restart it's loop and check all outstanding events. The event source will be released before return. - @param toRemove Pointer to $link IOEventSource subclass to remove. - @result kIOReturnSuccess if successful, kIOReturnBadArgument if toRemove couldn't be found. */ + @discussion Remove an event source from the work loop. This function does not return until the work loop has acknowledged the removal of the event source. When an event has been removed the threadMain will always restart its loop and check all outstanding events. The event source will be released before return. + @param toRemove Pointer to IOEventSource subclass to remove. + @result Returns kIOReturnSuccess if successful, kIOReturnBadArgument if toRemove couldn't be found. +*/ virtual IOReturn removeEventSource(IOEventSource *toRemove); /*! @function enableAllEventSources - @abstract Call enable() in all event sources - @discussion For all event sources in $link eventChain call enable() function. See $link IOEventSource::enable() */ + @abstract Calls enable() in all event sources. + @discussion For all event sources in eventChain, call enable() function. See IOEventSource::enable(). +*/ virtual void enableAllEventSources() const; /*! @function disableAllEventSources - @abstract Call disable() in all event sources - @discussion For all event sources in $link eventChain call disable() function. See $link IOEventSource::disable() */ + @abstract Calls disable() in all event sources. + @discussion For all event sources in eventChain, call disable() function. See IOEventSource::disable(). +*/ virtual void disableAllEventSources() const; /*! @function enableAllInterrupts - @abstract Call enable() in all interrupt event sources - @discussion For all event sources, ES, for which IODynamicCast(IOInterruptEventSource, ES) is valid, in $link eventChain call enable() function. See $link IOEventSource::enable() */ + @abstract Calls enable() in all interrupt event sources. + @discussion For all event sources (ES) for which IODynamicCast(IOInterruptEventSource, ES) is valid, in eventChain call enable() function. See IOEventSource::enable(). +*/ virtual void enableAllInterrupts() const; /*! @function disableAllInterrupts - @abstract Call disable() in all interrupt event sources - @discussion For all event sources, ES, for which IODynamicCast(IOInterruptEventSource, ES) is valid, in $link eventChain call disable() function. See $link IOEventSource::disable() */ + @abstract Calls disable() in all interrupt event sources. + @discussion For all event sources (ES) for which IODynamicCast(IOInterruptEventSource, ES) is valid, in eventChain call disable() function. See IOEventSource::disable(). +*/ virtual void disableAllInterrupts() const; protected: // Internal APIs used by event sources to control the thread friend class IOEventSource; + friend class IOTimerEventSource; virtual void signalWorkAvailable(); virtual void openGate(); virtual void closeGate(); @@ -215,17 +240,14 @@ public: /*! @function runAction @abstract Single thread a call to an action with the work-loop. @discussion Client function that causes the given action to be called in -a single threaded manner. Beware the work-loop's gate is recursive and runAction - can cause direct or indirect re-entrancy. When the executing on a -client's thread runAction will sleep until the work-loop's gate opens for -execution of client actions, the action is single threaded against all other -work-loop event sources. +a single threaded manner. Beware: the work-loop's gate is recursive and runAction can cause direct or indirect re-entrancy. When executing on a client's thread, runAction will sleep until the work-loop's gate opens for +execution of client actions, the action is single threaded against all other work-loop event sources. @param action Pointer to function to be executed in work-loop context. @param arg0 Parameter for action parameter, defaults to 0. @param arg1 Parameter for action parameter, defaults to 0. @param arg2 Parameter for action parameter, defaults to 0. @param arg3 Parameter for action parameter, defaults to 0. - @result return value of the Action callout. + @result Returns the value of the Action callout. */ virtual IOReturn runAction(Action action, OSObject *target, void *arg0 = 0, void *arg1 = 0, diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile index f9a6e8586..fdcf6ccb7 100644 --- a/iokit/IOKit/Makefile +++ b/iokit/IOKit/Makefile @@ -28,7 +28,9 @@ EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -NOT_EXPORT_HEADERS = IOKitKeysPrivate.h +NOT_EXPORT_HEADERS = + +NOT_KF_MI_HEADERS = $(NOT_EXPORT_HEADERS) IOKitKeysPrivate.h IOCPU.h IOPolledInterface.h NOT_LOCAL_HEADERS = @@ -46,5 +48,7 @@ EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) EXPORT_MI_DIR = IOKit +INSTALL_KF_MI_LIST = $(filter-out $(NOT_KF_MI_HEADERS), $(ALL_HEADERS)) + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/iokit/IOKit/OSMessageNotification.h b/iokit/IOKit/OSMessageNotification.h index 74bcb1f64..75f3f95ec 100644 --- a/iokit/IOKit/OSMessageNotification.h +++ b/iokit/IOKit/OSMessageNotification.h @@ -80,7 +80,12 @@ struct OSNotificationHeader { vm_size_t size; /* content size */ natural_t type; OSAsyncReference reference; + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + unsigned char content[]; +#else unsigned char content[0]; +#endif }; struct IOServiceInterestContent { @@ -90,7 +95,11 @@ struct IOServiceInterestContent { struct IOAsyncCompletionContent { IOReturn result; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + void * args[]; +#else void * args[0]; +#endif }; #ifndef __cplusplus diff --git a/iokit/IOKit/i386/IOSharedLockImp.h b/iokit/IOKit/i386/IOSharedLockImp.h index e02faf799..421f8d9eb 100644 --- a/iokit/IOKit/i386/IOSharedLockImp.h +++ b/iokit/IOKit/i386/IOSharedLockImp.h @@ -49,16 +49,6 @@ #include -// 'Till we're building in kernel -.macro DISABLE_PREEMPTION -#ifdef KERNEL -#endif -.endmacro -.macro ENABLE_PREEMPTION -#ifdef KERNEL -#endif -.endmacro - /* * void * ev_lock(p) @@ -95,7 +85,6 @@ LEAF(_ev_unlock, 0) LEAF(_IOSpinUnlock, 0) movl 4(%esp), %ecx movl $0, (%ecx) - ENABLE_PREEMPTION() ret END(_ev_unlock) @@ -111,7 +100,6 @@ END(_ev_unlock) LEAF(_ev_try_lock, 0) LEAF(_IOTrySpinLock, 0) - DISABLE_PREEMPTION() movl 4(%esp), %ecx xorl %eax, %eax lock @@ -120,7 +108,6 @@ LEAF(_IOTrySpinLock, 0) movl $1, %eax /* yes */ ret 1: - ENABLE_PREEMPTION() xorl %eax, %eax /* no */ END(_ev_try_lock) diff --git a/iokit/IOKit/i386/Makefile b/iokit/IOKit/i386/Makefile index 05dac0f86..3f51d0a48 100644 --- a/iokit/IOKit/i386/Makefile +++ b/iokit/IOKit/i386/Makefile @@ -28,7 +28,7 @@ INSTALL_MD_LIST = ${HEADER_LIST} INSTALL_MD_LCL_LIST = "" INSTALL_MD_DIR = $(MD_DIR) -EXPORT_MD_LIST = ${HEADER_LIST} +EXPORT_MD_LIST = EXPORT_MD_DIR = IOKit/$(MD_DIR) include $(MakeInc_rule) diff --git a/iokit/IOKit/ppc/IOSharedLockImp.h b/iokit/IOKit/ppc/IOSharedLockImp.h index 53c3a98cd..3644a328c 100644 --- a/iokit/IOKit/ppc/IOSharedLockImp.h +++ b/iokit/IOKit/ppc/IOSharedLockImp.h @@ -54,33 +54,6 @@ #include #endif -.macro DISABLE_PREEMPTION -#ifdef KERNEL - stwu r1,-(FM_SIZE)(r1) - mflr r0 - stw r3,FM_ARG0(r1) - stw r0,(FM_SIZE+FM_LR_SAVE)(r1) - bl EXT(_disable_preemption) - lwz r3,FM_ARG0(r1) - lwz r1,0(r1) - lwz r0,FM_LR_SAVE(r1) - mtlr r0 -#endif -.endmacro -.macro ENABLE_PREEMPTION -#ifdef KERNEL - stwu r1,-(FM_SIZE)(r1) - mflr r0 - stw r3,FM_ARG0(r1) - stw r0,(FM_SIZE+FM_LR_SAVE)(r1) - bl EXT(_enable_preemption) - lwz r3,FM_ARG0(r1) - lwz r1,0(r1) - lwz r0,FM_LR_SAVE(r1) - mtlr r0 -#endif -.endmacro - /* * void * ev_lock(p) @@ -149,7 +122,6 @@ LEAF(_ev_unlock) sync li a7,0 stw a7,0(a0) - ENABLE_PREEMPTION() blr END(_ev_unlock) @@ -157,7 +129,6 @@ LEAF(_IOSpinUnlock) sync li a7,0 stw a7,0(a0) - ENABLE_PREEMPTION() blr END(_IOSpinUnlock) @@ -170,9 +141,6 @@ END(_IOSpinUnlock) */ LEAF(_ev_try_lock) - - DISABLE_PREEMPTION() - li a6,1 // lock value lwz a7,0(a0) // Get lock word @@ -192,16 +160,12 @@ LEAF(_ev_try_lock) stwcx. a7,a7,r1 // Kill reservation 6: - ENABLE_PREEMPTION() li a0,0 // return FALSE blr END(_ev_try_lock) LEAF(_IOTrySpinLock) - - DISABLE_PREEMPTION() - li a6,1 // lock value lwz a7,0(a0) // Get lock word @@ -221,7 +185,6 @@ LEAF(_IOTrySpinLock) stwcx. a7,a7,r1 // Kill reservation 6: - ENABLE_PREEMPTION() li a0,0 // return FALSE blr diff --git a/iokit/IOKit/ppc/Makefile b/iokit/IOKit/ppc/Makefile index 23ca4f7bf..321a0a5fd 100644 --- a/iokit/IOKit/ppc/Makefile +++ b/iokit/IOKit/ppc/Makefile @@ -11,7 +11,7 @@ include $(MakeInc_cmd) include $(MakeInc_def) MD_DIR = ppc -NOT_EXPORT_HEADERS = +NOT_EXPORT_HEADERS = IOSharedLockImp.h INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index d91ece714..5f8dd4760 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -269,9 +269,11 @@ enum { kPMMinutesToSleep, kPMEthernetWakeOnLANSettings, kPMSetProcessorSpeed, - kPMPowerSource + kPMPowerSource, + kPMMotionSensor, + kPMLastAggressivenessType }; -#define kMaxType kPMEthernetWakeOnLANSettings +#define kMaxType (kPMLastAggressivenessType-1) // SetAggressiveness values for the kPMPowerSource aggressiveness type enum { diff --git a/iokit/IOKit/pwr_mgt/IOPMlog.h b/iokit/IOKit/pwr_mgt/IOPMlog.h index 401197aeb..1ad911a5a 100644 --- a/iokit/IOKit/pwr_mgt/IOPMlog.h +++ b/iokit/IOKit/pwr_mgt/IOPMlog.h @@ -19,50 +19,105 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#define PMlogSetParent 1 -#define PMlogAddChild 2 -#define PMlogRemoveChild 3 -#define PMlogControllingDriver 4 -#define PMlogControllingDriverErr1 5 /* bad power state array version */ -#define PMlogControllingDriverErr2 6 /* too many power states */ -#define PMlogControllingDriverErr3 7 /* not a real IOPMDriver */ -#define PMlogControllingDriverErr4 8 /* power state change in progress */ -#define PMlogInterestedDriver 9 -#define PMlogAcknowledgeErr1 10 /* unknown entity called acknowledgePowerChange */ -#define PMlogChildAcknowledge 11 -#define PMlogDriverAcknowledge 12 /* interested driver acknowledges */ -#define PMlogAcknowledgeErr2 13 /* object has already acked */ -#define PMlogAcknowledgeErr3 14 /* not expecting any acks */ -#define PMlogAcknowledgeErr4 15 /* not expecting acknowledgeSetPowerState */ -#define PMlogDriverAcknowledgeSet 16 /* controlling driver acknowledges */ -#define PMlogWillChange 17 -#define PMlogDidChange 18 -#define PMlogRequestDomain 19 -#define PMlogMakeUsable 20 -#define PMlogChangeStateTo 21 -#define PMlogChangeStateToPriv 22 -#define PMlogSetAggressiveness 23 -#define PMlogCriticalTemp 24 -#define PMlogOverrideOn 25 -#define PMlogOverrideOff 26 -#define PMlogEnqueueErr 27 /* change queue overflow */ -#define PMlogCollapseQueue 28 -#define PMlogChangeDone 29 -#define PMlogCtrlDriverTardy 30 /* controlling driver didn't acknowledge */ -#define PMlogIntDriverTardy 31 /* interested driver didn't acknowledge */ -#define PMlogStartAckTimer 32 -#define PMlogStartParentChange 33 -#define PMlogAmendParentChange 34 -#define PMlogStartDeviceChange 35 -#define PMlogRequestDenied 36 /* parent denied domain state change request */ -#define PMlogControllingDriverErr5 37 /* zero power states or we already have a driver with more power states */ -#define PMlogProgramHardware 38 -#define PMlogInformDriverPreChange 39 -#define PMlogInformDriverPostChange 40 -#define PMlogRemoveDriver 41 -#define PMsetIdleTimerPeriod 42 -#define PMlogSystemWake 43 -#define PMlogAcknowledgeErr5 44 -#define PMlogClientAcknowledge 45 -#define PMlogClientTardy 46 /* application or kernel client didn't acknowledge */ -#define PMlogClientCancel 47 +enum PMLogEnum { + kPMLogSetParent = 1, // 1 0x05100004 + kPMLogAddChild, // 2 0x05100008 + kPMLogRemoveChild, // 3 0x0510000c + kPMLogControllingDriver, // 4 0x05100010 + kPMLogControllingDriverErr1, // 5 0x05100014 - bad power state array version + kPMLogControllingDriverErr2, // 6 0x05100018 - too many power states + kPMLogControllingDriverErr3, // 7 0x0510001c - not a real IOPMDriver + kPMLogControllingDriverErr4, // 8 0x05100020 - power state change in progress + kPMLogInterestedDriver, // 9 0x05100024 + kPMLogAcknowledgeErr1, // 10 0x05100028 - unknown entity called acknowledgePowerChange + kPMLogChildAcknowledge, // 11 0x0510002c + kPMLogDriverAcknowledge, // 12 0x05100030 - interested driver acknowledges + kPMLogAcknowledgeErr2, // 13 0x05100034 - object has already acked + kPMLogAcknowledgeErr3, // 14 0x05100038 - not expecting any acks + kPMLogAcknowledgeErr4, // 15 0x0510003c - not expecting acknowledgeSetPowerState + kPMLogDriverAcknowledgeSet, // 16 0x05100040 - controlling driver acknowledges + kPMLogWillChange, // 17 0x05100044 + kPMLogDidChange, // 18 0x05100048 + kPMLogRequestDomain, // 19 0x0510004c + kPMLogMakeUsable, // 20 0x05100050 + kPMLogChangeStateTo, // 21 0x05100054 + kPMLogChangeStateToPriv, // 22 0x05100058 + kPMLogSetAggressiveness, // 23 0x0510005c + kPMLogCriticalTemp, // 24 0x05100060 + kPMLogOverrideOn, // 25 0x05100064 + kPMLogOverrideOff, // 26 0x05100068 + kPMLogEnqueueErr, // 27 0x0510006c - change queue overflow + kPMLogCollapseQueue, // 28 0x05100070 + kPMLogChangeDone, // 29 0x05100074 + kPMLogCtrlDriverTardy, // 30 0x05100078 - controlling driver didn't acknowledge + kPMLogIntDriverTardy, // 31 0x0510007c - interested driver didn't acknowledge + kPMLogStartAckTimer, // 32 0x05100080 + kPMLogStartParentChange, // 33 0x05100084 + kPMLogAmendParentChange, // 34 0x05100088 + kPMLogStartDeviceChange, // 35 0x0510008c + kPMLogRequestDenied, // 36 0x05100090 - parent denied domain state change request + kPMLogControllingDriverErr5, // 37 0x05100094 - zero power states or we already have a driver with more power states + kPMLogProgramHardware, // 38 0x05100098 + kPMLogInformDriverPreChange, // 39 0x0510009c + kPMLogInformDriverPostChange, // 40 0x051000a0 + kPMLogRemoveDriver, // 41 0x051000a4 + kPMLogSetIdleTimerPeriod, // 42 0x051000a8 + kPMLogSystemWake, // 43 0x051000ac + kPMLogAcknowledgeErr5, // 44 0x051000b0 + kPMLogClientAcknowledge, // 45 0x051000b4 + kPMLogClientTardy, // 46 0x051000b8 - application didn't acknowledge + kPMLogClientCancel, // 47 0x051000bc + kPMLogClientNotify, // 48 0x051000c0 - client sent a notification + kPMLogAppNotify, // 49 0x051000c4 - application sent a notification + kIOPMlogLastEvent +}; + +// Deprecated Power Management Logging Constants +#define PMlogSetParent kPMLogSetParent +#define PMlogAddChild kPMLogAddChild +#define PMlogRemoveChild kPMLogRemoveChild +#define PMlogControllingDriver kPMLogControllingDriver +#define PMlogControllingDriverErr1 kPMLogControllingDriverErr1 +#define PMlogControllingDriverErr2 kPMLogControllingDriverErr2 +#define PMlogControllingDriverErr3 kPMLogControllingDriverErr3 +#define PMlogControllingDriverErr4 kPMLogControllingDriverErr4 +#define PMlogInterestedDriver kPMLogInterestedDriver +#define PMlogAcknowledgeErr1 kPMLogAcknowledgeErr1 +#define PMlogChildAcknowledge kPMLogChildAcknowledge +#define PMlogDriverAcknowledge kPMLogDriverAcknowledge +#define PMlogAcknowledgeErr2 kPMLogAcknowledgeErr2 +#define PMlogAcknowledgeErr3 kPMLogAcknowledgeErr3 +#define PMlogAcknowledgeErr4 kPMLogAcknowledgeErr4 +#define PMlogDriverAcknowledgeSet kPMLogDriverAcknowledgeSet +#define PMlogWillChange kPMLogWillChange +#define PMlogDidChange kPMLogDidChange +#define PMlogRequestDomain kPMLogRequestDomain +#define PMlogMakeUsable kPMLogMakeUsable +#define PMlogChangeStateTo kPMLogChangeStateTo +#define PMlogChangeStateToPriv kPMLogChangeStateToPriv +#define PMlogSetAggressiveness kPMLogSetAggressiveness +#define PMlogCriticalTemp kPMLogCriticalTemp +#define PMlogOverrideOn kPMLogOverrideOn +#define PMlogOverrideOff kPMLogOverrideOff +#define PMlogEnqueueErr kPMLogEnqueueErr +#define PMlogCollapseQueue kPMLogCollapseQueue +#define PMlogChangeDone kPMLogChangeDone +#define PMlogCtrlDriverTardy kPMLogCtrlDriverTardy +#define PMlogIntDriverTardy kPMLogIntDriverTardy +#define PMlogStartAckTimer kPMLogStartAckTimer +#define PMlogStartParentChange kPMLogStartParentChange +#define PMlogAmendParentChange kPMLogAmendParentChange +#define PMlogStartDeviceChange kPMLogStartDeviceChange +#define PMlogRequestDenied kPMLogRequestDenied +#define PMlogControllingDriverErr5 kPMLogControllingDriverErr5 +#define PMlogProgramHardware kPMLogProgramHardware +#define PMlogInformDriverPreChange kPMLogInformDriverPreChange +#define PMlogInformDriverPostChange kPMLogInformDriverPostChange +#define PMlogRemoveDriver kPMLogRemoveDriver +#define PMsetIdleTimerPeriod kPMLogSetIdleTimerPeriod +#define PMlogSystemWake kPMLogSystemWake +#define PMlogAcknowledgeErr5 kPMLogAcknowledgeErr5 +#define PMlogClientAcknowledge kPMLogClientAcknowledge +#define PMlogClientTardy kPMLogClientTardy +#define PMlogClientCancel kPMLogClientCancel + diff --git a/iokit/IOKit/pwr_mgt/Makefile b/iokit/IOKit/pwr_mgt/Makefile index 64ae04404..34e8b138c 100644 --- a/iokit/IOKit/pwr_mgt/Makefile +++ b/iokit/IOKit/pwr_mgt/Makefile @@ -5,7 +5,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(KPINCDIR)/IOKit +export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -17,7 +17,8 @@ NOT_EXPORT_HEADERS = \ IOPMinformeeList.h \ IOPMlog.h \ IOPMpmChild.h \ - IOPMPrivate.h + IOPMPagingPlexus.h \ + IOPMPrivate.h INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = @@ -30,11 +31,13 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = IOPMLibDefs.h IOPM.h -INSTALL_MI_LCL_LIST = IOPMPrivate.h +INSTALL_MI_LCL_LIST = "" INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) EXPORT_MI_DIR = IOKit/$(MI_DIR) +INSTALL_KF_MI_LCL_LIST = $(EXPORT_MI_LIST) IOPMPrivate.h IOPMPagingPlexus.h + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index c2962a4ab..8984051fe 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -37,14 +37,16 @@ enum { kPCICantSleep = 0x00000004 }; -// Constants for use as arguments to registerPMSettingsController +// Constants for use as arguments to the settings callback PMU/SMU defines +// with registerPMSettingsController enum { kIOPMAutoWakeSetting = 1, kIOPMAutoPowerOnSetting, kIOPMWakeOnRingSetting, kIOPMAutoRestartOnPowerLossSetting, kIOPMWakeOnLidSetting, - kIOPMWakeOnACChangeSetting + kIOPMWakeOnACChangeSetting, + kIOPMTimeZoneSetting }; typedef int IOPMSystemSettingType; @@ -97,7 +99,6 @@ public: virtual IOReturn changePowerStateToPriv ( unsigned long ordinal ); IOReturn registerPMSettingController(IOPMSettingControllerCallback, void *); - IOReturn registerPlatformPowerProfiles(OSArray *); private: @@ -133,6 +134,7 @@ private: void adjustPowerState( void ); void restoreUserSpinDownTimeout ( void ); + IOLock *featuresDictLock; // guards supportedFeatures IOPMPowerStateQueue *pmPowerStateQueue; unsigned int user_spindown; // User's selected disk spindown value diff --git a/iokit/IOKit/system.h b/iokit/IOKit/system.h index f1eca8e41..f6427aa27 100644 --- a/iokit/IOKit/system.h +++ b/iokit/IOKit/system.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,69 +22,47 @@ #ifndef __IOKIT_SYSTEM_H #define __IOKIT_SYSTEM_H +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + #include __BEGIN_DECLS #include #include -#include -#include +#include #include +#include +#include -#if KERNEL_PRIVATE #include /* Must be before other includes of kern/assert.h */ -#include + +#include #include -#include -#include -#include -#include +#include #include -#include #include -#include -#include +#include #include #include -#endif /* KERNEL_PRIVATE */ +#ifndef MACH_KERNEL_PRIVATE +#include +#endif -extern int bcmp(const void *, const void *, size_t); -extern void bcopy(const void *, void *, size_t); -extern void bzero(void *, size_t); +#ifdef KERNEL_PRIVATE +#include +#include +#include +#include +#endif /* KERNEL_PRIVATE */ -extern int memcmp(const void *, const void *, size_t); extern void _doprnt( const char *format, va_list *arg, void (*putc)(char), int radix ); -extern int sscanf(const char *input, const char *fmt, ...); -extern int sprintf(char *s, const char *format, ...); -extern long strtol(const char *, char **, int); -extern unsigned long strtoul(const char *, char **, int); -extern long long strtoq(const char *, char **, int); -extern unsigned long long strtouq(const char *, char **, int); - -extern -#ifdef __GNUC__ -volatile -#endif -void panic(const char * msg, ...); - -/* - */ - -/* - * Really need a set of interfaces from osfmk/pexpert components to do - * all that is required to prepare an I/O from a cache management point - * of view. - * osfmk/ppc/cache.s - */ -extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); -extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); -extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys); -extern void flush_dcache64(addr64_t addr, unsigned count, int phys); __END_DECLS diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 8de94f029..337c16895 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -25,6 +25,8 @@ #include #include +#include "IOKitKernelInternal.h" + __BEGIN_DECLS void ipc_port_release_send(ipc_port_t port); #include @@ -86,8 +88,9 @@ bool IOBufferMemoryDescriptor::initWithOptions( vm_offset_t alignment, task_t inTask) { - vm_map_t map = 0; - IOOptionBits iomdOptions = kIOMemoryAsReference | kIOMemoryTypeVirtual; + kern_return_t kr; + vm_map_t vmmap = 0; + IOOptionBits iomdOptions = kIOMemoryAsReference | kIOMemoryTypeVirtual; if (!capacity) return false; @@ -111,34 +114,77 @@ bool IOBufferMemoryDescriptor::initWithOptions( if (options & kIOMemoryPageable) { iomdOptions |= kIOMemoryBufferPageable; - if (inTask == kernel_task) + + ipc_port_t sharedMem; + vm_size_t size = round_page_32(capacity); + + // must create the entry before any pages are allocated + + // set flags for entry + object create + vm_prot_t memEntryCacheMode = VM_PROT_READ | VM_PROT_WRITE + | MAP_MEM_NAMED_CREATE; + + if (options & kIOMemoryPurgeable) + memEntryCacheMode |= MAP_MEM_PURGABLE; + + // set memory entry cache mode + switch (options & kIOMapCacheMask) + { + case kIOMapInhibitCache: + SET_MAP_MEM(MAP_MEM_IO, memEntryCacheMode); + break; + + case kIOMapWriteThruCache: + SET_MAP_MEM(MAP_MEM_WTHRU, memEntryCacheMode); + break; + + case kIOMapWriteCombineCache: + SET_MAP_MEM(MAP_MEM_WCOMB, memEntryCacheMode); + break; + + case kIOMapCopybackCache: + SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode); + break; + + case kIOMapDefaultCache: + default: + SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode); + break; + } + + kr = mach_make_memory_entry( vmmap, + &size, 0, + memEntryCacheMode, &sharedMem, + NULL ); + + if( (KERN_SUCCESS == kr) && (size != round_page_32(capacity))) { + ipc_port_release_send( sharedMem ); + kr = kIOReturnVMError; + } + if( KERN_SUCCESS != kr) + return( false ); + + _memEntry = (void *) sharedMem; +#if IOALLOCDEBUG + debug_iomallocpageable_size += size; +#endif + if ((NULL == inTask) && (options & kIOMemoryPageable)) + inTask = kernel_task; + else if (inTask == kernel_task) { - /* Allocate some kernel address space. */ - _buffer = IOMallocPageable(capacity, alignment); - if (_buffer) - map = IOPageableMapForAddress((vm_address_t) _buffer); + vmmap = kernel_map; } else { - kern_return_t kr; if( !reserved) { reserved = IONew( ExpansionData, 1 ); if( !reserved) return( false ); } - map = get_task_map(inTask); - vm_map_reference(map); - reserved->map = map; - kr = vm_allocate( map, (vm_address_t *) &_buffer, round_page_32(capacity), - VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_IOKIT) ); - if( KERN_SUCCESS != kr) - return( false ); - - // we have to make sure that these pages don't get copied on fork. - kr = vm_inherit( map, (vm_address_t) _buffer, round_page_32(capacity), VM_INHERIT_NONE); - if( KERN_SUCCESS != kr) - return( false ); + vmmap = get_task_map(inTask); + vm_map_reference(vmmap); + reserved->map = vmmap; } } else @@ -155,10 +201,10 @@ bool IOBufferMemoryDescriptor::initWithOptions( _buffer = IOMallocAligned(capacity, alignment); else _buffer = IOMalloc(capacity); - } - if (!_buffer) - return false; + if (!_buffer) + return false; + } _singleRange.v.address = (vm_address_t) _buffer; _singleRange.v.length = capacity; @@ -167,53 +213,20 @@ bool IOBufferMemoryDescriptor::initWithOptions( inTask, iomdOptions, /* System mapper */ 0)) return false; - if (options & kIOMemoryPageable) { + if (options & kIOMemoryPageable) + { kern_return_t kr; - ipc_port_t sharedMem = (ipc_port_t) _memEntry; - vm_size_t size = round_page_32(_ranges.v[0].length); - // must create the entry before any pages are allocated - if( 0 == sharedMem) { - - // set memory entry cache - vm_prot_t memEntryCacheMode = VM_PROT_READ | VM_PROT_WRITE; - switch (options & kIOMapCacheMask) - { - case kIOMapInhibitCache: - SET_MAP_MEM(MAP_MEM_IO, memEntryCacheMode); - break; - - case kIOMapWriteThruCache: - SET_MAP_MEM(MAP_MEM_WTHRU, memEntryCacheMode); - break; - - case kIOMapWriteCombineCache: - SET_MAP_MEM(MAP_MEM_WCOMB, memEntryCacheMode); - break; - - case kIOMapCopybackCache: - SET_MAP_MEM(MAP_MEM_COPYBACK, memEntryCacheMode); - break; - - case kIOMapDefaultCache: - default: - SET_MAP_MEM(MAP_MEM_NOOP, memEntryCacheMode); - break; - } - - kr = mach_make_memory_entry( map, - &size, _ranges.v[0].address, - memEntryCacheMode, &sharedMem, - NULL ); - - if( (KERN_SUCCESS == kr) && (size != round_page_32(_ranges.v[0].length))) { - ipc_port_release_send( sharedMem ); - kr = kIOReturnVMError; - } - if( KERN_SUCCESS != kr) - sharedMem = 0; - _memEntry = (void *) sharedMem; - } + if (vmmap) + { + kr = doMap(vmmap, (IOVirtualAddress *) &_buffer, kIOMapAnywhere, 0, round_page_32(capacity)); + if (KERN_SUCCESS != kr) + { + _buffer = 0; + return( false ); + } + _singleRange.v.address = (vm_address_t) _buffer; + } } setLength(capacity); @@ -335,39 +348,43 @@ void IOBufferMemoryDescriptor::free() IOOptionBits options = _options; vm_size_t size = _capacity; void * buffer = _buffer; - vm_map_t map = 0; + vm_map_t vmmap = 0; vm_offset_t alignment = _alignment; if (reserved) { - map = reserved->map; + vmmap = reserved->map; IODelete( reserved, ExpansionData, 1 ); } /* super::free may unwire - deallocate buffer afterwards */ super::free(); - if (buffer) + if (options & kIOMemoryPageable) { - if (options & kIOMemoryPageable) - { - if (map) - vm_deallocate(map, (vm_address_t) buffer, round_page_32(size)); - else - IOFreePageable(buffer, size); - } - else - { - if (options & kIOMemoryPhysicallyContiguous) - IOFreeContiguous(buffer, size); - else if (alignment > 1) - IOFreeAligned(buffer, size); +#if IOALLOCDEBUG + if (!buffer || vmmap) + debug_iomallocpageable_size -= round_page_32(size); +#endif + if (buffer) + { + if (vmmap) + vm_deallocate(vmmap, (vm_address_t) buffer, round_page_32(size)); else - IOFree(buffer, size); + IOFreePageable(buffer, size); } } - if (map) - vm_map_deallocate(map); + else if (buffer) + { + if (options & kIOMemoryPhysicallyContiguous) + IOFreeContiguous(buffer, size); + else if (alignment > 1) + IOFreeAligned(buffer, size); + else + IOFree(buffer, size); + } + if (vmmap) + vm_map_deallocate(vmmap); } /* diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index 07676f269..b3cd11f53 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -154,17 +154,19 @@ bool IOCPU::start(IOService *provider) gIOCPUs->setObject(this); // Correct the bus, cpu and timebase frequencies in the device tree. - if (gPEClockFrequencyInfo.bus_frequency_hz < 0x100000000ULL) - busFrequency = OSData::withBytesNoCopy((void *)((char *)&gPEClockFrequencyInfo.bus_frequency_hz + 4), 4); - else - busFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.bus_clock_rate_hz, 8); + if (gPEClockFrequencyInfo.bus_frequency_hz < 0x100000000ULL) { + busFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.bus_clock_rate_hz, 4); + } else { + busFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.bus_frequency_hz, 8); + } provider->setProperty("bus-frequency", busFrequency); busFrequency->release(); - if (gPEClockFrequencyInfo.cpu_frequency_hz < 0x100000000ULL) - cpuFrequency = OSData::withBytesNoCopy((void *)((char *)&gPEClockFrequencyInfo.cpu_frequency_hz + 4), 4); - else - cpuFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.cpu_clock_rate_hz, 8); + if (gPEClockFrequencyInfo.cpu_frequency_hz < 0x100000000ULL) { + cpuFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.cpu_clock_rate_hz, 4); + } else { + cpuFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.cpu_frequency_hz, 8); + } provider->setProperty("clock-frequency", cpuFrequency); cpuFrequency->release(); @@ -172,7 +174,7 @@ bool IOCPU::start(IOService *provider) provider->setProperty("timebase-frequency", timebaseFrequency); timebaseFrequency->release(); - setProperty("IOCPUID", (UInt32)this, 32); + super::setProperty("IOCPUID", (UInt32)this, 32); setCPUNumber(0); setCPUState(kIOCPUStateUnregistered); @@ -180,36 +182,66 @@ bool IOCPU::start(IOService *provider) return true; } -IOReturn IOCPU::setProperties(OSObject *properties) +OSObject *IOCPU::getProperty(const OSSymbol *aKey) const { - OSDictionary *dict = OSDynamicCast(OSDictionary, properties); - OSString *stateStr; - IOReturn result; + if (aKey == gIOCPUStateKey) return gIOCPUStateNames[_cpuState]; - if (dict == 0) return kIOReturnUnsupported; + return super::getProperty(aKey); +} + +bool IOCPU::setProperty(const OSSymbol *aKey, OSObject *anObject) +{ + OSString *stateStr; - stateStr = OSDynamicCast(OSString, dict->getObject(gIOCPUStateKey)); - if (stateStr != 0) { - result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); - if (result != kIOReturnSuccess) return result; + if (aKey == gIOCPUStateKey) { + stateStr = OSDynamicCast(OSString, anObject); + if (stateStr == 0) return false; - if (_cpuNumber == 0) return kIOReturnUnsupported; + if (_cpuNumber == 0) return false; if (stateStr->isEqualTo("running")) { if (_cpuState == kIOCPUStateStopped) { processor_start(machProcessor); } else if (_cpuState != kIOCPUStateRunning) { - return kIOReturnUnsupported; + return false; } } else if (stateStr->isEqualTo("stopped")) { if (_cpuState == kIOCPUStateRunning) { haltCPU(); } else if (_cpuState != kIOCPUStateStopped) { - return kIOReturnUnsupported; + return false; } - } else return kIOReturnUnsupported; + } else return false; + + return true; + } + + return super::setProperty(aKey, anObject); +} + +bool IOCPU::serializeProperties(OSSerialize *serialize) const +{ + super::setProperty(gIOCPUStateKey, gIOCPUStateNames[_cpuState]); + + return super::serializeProperties(serialize); +} + +IOReturn IOCPU::setProperties(OSObject *properties) +{ + OSDictionary *dict = OSDynamicCast(OSDictionary, properties); + OSString *stateStr; + IOReturn result; + + if (dict == 0) return kIOReturnUnsupported; + + stateStr = OSDynamicCast(OSString, dict->getObject(gIOCPUStateKey)); + if (stateStr != 0) { + result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); + if (result != kIOReturnSuccess) return result; + + if (setProperty(gIOCPUStateKey, stateStr)) return kIOReturnSuccess; - return kIOReturnSuccess; + return kIOReturnUnsupported; } return kIOReturnUnsupported; @@ -231,7 +263,7 @@ UInt32 IOCPU::getCPUNumber(void) void IOCPU::setCPUNumber(UInt32 cpuNumber) { _cpuNumber = cpuNumber; - setProperty("IOCPUNumber", _cpuNumber, 32); + super::setProperty("IOCPUNumber", _cpuNumber, 32); } UInt32 IOCPU::getCPUState(void) @@ -241,9 +273,8 @@ UInt32 IOCPU::getCPUState(void) void IOCPU::setCPUState(UInt32 cpuState) { - if ((cpuState >= 0) && (cpuState < kIOCPUStateCount)) { + if (cpuState < kIOCPUStateCount) { _cpuState = cpuState; - setProperty(gIOCPUStateKey, gIOCPUStateNames[cpuState]); } } diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp index 10c17ea27..9f0552193 100644 --- a/iokit/Kernel/IOCatalogue.cpp +++ b/iokit/Kernel/IOCatalogue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -46,8 +46,8 @@ extern "C" { extern "C" { int IODTGetLoaderInfo( char *key, void **infoAddr, int *infoSize ); extern void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize ); -extern void OSRuntimeUnloadCPPForSegment( - struct segment_command * segment); +/* operates on 32 bit segments */ +extern void OSRuntimeUnloadCPPForSegment(struct segment_command * segment); }; @@ -202,7 +202,7 @@ kern_return_t start_prelink_module(UInt32 moduleIndex) if (depInfo) { kr = kmod_retain(KMOD_PACK_IDS(id, depInfo->id)); - kfree((vm_offset_t) depInfo, sizeof(kmod_info_t)); + kfree(depInfo, sizeof(kmod_info_t)); } else IOLog("%s: NO DEP %s\n", kmod_info->name, str->getCStringNoCopy()); } @@ -275,7 +275,7 @@ extern "C" Boolean kmod_load_request(const char * moduleName, Boolean make_reque // Is the module already loaded? ret = (0 != (kmod_info = kmod_lookupbyname_locked((char *)moduleName))); if (ret) { - kfree((vm_offset_t) kmod_info, sizeof(kmod_info_t)); + kfree(kmod_info, sizeof(kmod_info_t)); break; } sym = OSSymbol::withCString(moduleName); @@ -295,6 +295,7 @@ extern "C" Boolean kmod_load_request(const char * moduleName, Boolean make_reque IOLog("IOCatalogue: %s cannot be loaded " "(kmod load function not set).\n", moduleName); + ret = true; break; } @@ -453,7 +454,6 @@ void IOCatalogue::initialize( void ) // Initialize the IOCatalog object. bool IOCatalogue::init(OSArray * initArray) { - IORegistryEntry * entry; OSDictionary * dict; if ( !super::init() ) @@ -485,10 +485,6 @@ bool IOCatalogue::init(OSArray * initArray) thread_call_func_delayed( ping, this, deadline ); #endif - entry = IORegistryEntry::getRegistryRoot(); - if ( entry ) - entry->setProperty(kIOCatalogueKey, this); - return true; } @@ -841,7 +837,7 @@ IOReturn IOCatalogue::unloadModule( OSString * moduleName ) const if ( k_info->stop && !((ret = k_info->stop(k_info, 0)) == kIOReturnSuccess) ) { - kfree((vm_offset_t) k_info, sizeof(kmod_info_t)); + kfree(k_info, sizeof(kmod_info_t)); return ret; } @@ -850,18 +846,16 @@ IOReturn IOCatalogue::unloadModule( OSString * moduleName ) const } if (k_info) { - kfree((vm_offset_t) k_info, sizeof(kmod_info_t)); + kfree(k_info, sizeof(kmod_info_t)); } return ret; } -static IOReturn _terminateDrivers( OSArray * array, OSDictionary * matching ) +static IOReturn _terminateDrivers( OSDictionary * matching ) { - OSCollectionIterator * tables; OSDictionary * dict; OSIterator * iter; - OSArray * arrayCopy; IOService * service; IOReturn ret; @@ -900,9 +894,17 @@ static IOReturn _terminateDrivers( OSArray * array, OSDictionary * matching ) } while( !service && !iter->isValid()); iter->release(); + return ret; +} + +static IOReturn _removeDrivers( OSArray * array, OSDictionary * matching ) +{ + OSCollectionIterator * tables; + OSDictionary * dict; + OSArray * arrayCopy; + IOReturn ret = kIOReturnSuccess; + // remove configs from catalog. - if ( ret != kIOReturnSuccess ) - return ret; arrayCopy = OSArray::withCapacity(100); if ( !arrayCopy ) @@ -938,9 +940,10 @@ IOReturn IOCatalogue::terminateDrivers( OSDictionary * matching ) { IOReturn ret; - ret = kIOReturnSuccess; + ret = _terminateDrivers(matching); IOLockLock( lock ); - ret = _terminateDrivers(array, matching); + if (kIOReturnSuccess == ret) + ret = _removeDrivers(array, matching); kernelTables->reset(); IOLockUnlock( lock ); @@ -960,9 +963,10 @@ IOReturn IOCatalogue::terminateDriversForModule( dict->setObject(gIOModuleIdentifierKey, moduleName); + ret = _terminateDrivers(dict); IOLockLock( lock ); - - ret = _terminateDrivers(array, dict); + if (kIOReturnSuccess == ret) + ret = _removeDrivers(array, dict); kernelTables->reset(); // Unload the module itself. @@ -1039,18 +1043,10 @@ void IOCatalogue::reset(void) bool IOCatalogue::serialize(OSSerialize * s) const { - bool ret; - if ( !s ) return false; - IOLockLock( lock ); - - ret = array->serialize(s); - - IOLockUnlock( lock ); - - return ret; + return super::serialize(s); } bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const @@ -1122,6 +1118,8 @@ bool IOCatalogue::recordStartupExtensions(void) { /********************************************************************* +* This function operates on sections retrieved from the currently running +* 32 bit mach kernel. *********************************************************************/ bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) { @@ -1169,6 +1167,8 @@ bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) * This function clears out all references to the in-kernel linker, * frees the list of startup extensions in extensionDict, and * deallocates the kernel's __KLD segment to reclaim that memory. +* +* The segments it operates on are strictly 32 bit segments. *********************************************************************/ kern_return_t IOCatalogue::removeKernelLinker(void) { kern_return_t result = KERN_SUCCESS; @@ -1261,3 +1261,20 @@ finish: return result; } + +/********************************************************************* +* This function stops the catalogue from making kextd requests during +* shutdown. +*********************************************************************/ +void IOCatalogue::disableExternalLinker(void) { + IOLockLock(gIOKLDLock); + /* If kmod_load_extension (the kextd requester function) is in use, + * disable new module requests. + */ + if (kmod_load_function == &kmod_load_extension) { + kmod_load_function = NULL; + } + + IOLockUnlock(gIOKLDLock); +} + diff --git a/iokit/Kernel/IOCommand.cpp b/iokit/Kernel/IOCommand.cpp index f9dbc1d33..8d0c557d2 100644 --- a/iokit/Kernel/IOCommand.cpp +++ b/iokit/Kernel/IOCommand.cpp @@ -33,7 +33,7 @@ #include #define super OSObject -OSDefineMetaClassAndAbstractStructors(IOCommand, OSObject); +OSDefineMetaClassAndStructors(IOCommand, OSObject); //-------------------------------------------------------------------------- diff --git a/iokit/Kernel/IOCommandQueue.cpp b/iokit/Kernel/IOCommandQueue.cpp index e5439a542..328c96190 100644 --- a/iokit/Kernel/IOCommandQueue.cpp +++ b/iokit/Kernel/IOCommandQueue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -111,7 +111,7 @@ Returns nil. void IOCommandQueue::free() { if (queue) - kfree((vm_offset_t)queue, size * sizeof(commandEntryT)); + kfree(queue, size * sizeof(commandEntryT)); if (producerSema) semaphore_destroy(kernel_task, producerSema); if (producerLock) diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index c18268a3c..fd695d3bc 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -19,14 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * 23 Nov 98 sdouglas, created from IODeviceTreeBus.m, & MacOS exp mgr. - * 05 Apr 99 sdouglas, add interrupt mapping. - * - */ #include #include @@ -75,27 +67,30 @@ const OSSymbol * gIODTInterruptCellKey; const OSSymbol * gIODTInterruptParentKey; const OSSymbol * gIODTNWInterruptMappingKey; +OSDictionary * gIODTSharedInterrupts; static IORegistryEntry * MakeReferenceTable( DTEntry dtEntry, bool copy ); static void AddPHandle( IORegistryEntry * regEntry ); static void FreePhysicalMemory( vm_offset_t * range ); +static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary * allInts ); IORegistryEntry * IODeviceTreeAlloc( void * dtTop ) { - IORegistryEntry *parent; - IORegistryEntry *child; - IORegistryIterator *regIter; + IORegistryEntry * parent; + IORegistryEntry * child; + IORegistryIterator * regIter; DTEntryIterator iter; - DTEntry dtChild; - DTEntry mapEntry; - OSArray *stack; - OSData *prop; - OSObject *obj; - vm_offset_t *dtMap; - int propSize; - bool intMap; - bool freeDT; + DTEntry dtChild; + DTEntry mapEntry; + OSArray * stack; + OSData * prop; + OSObject * obj; + OSDictionary * allInts; + vm_offset_t * dtMap; + int propSize; + bool intMap; + bool freeDT; gIODTPlane = IORegistryEntry::makePlane( kIODeviceTreePlane ); @@ -196,13 +191,16 @@ IODeviceTreeAlloc( void * dtTop ) } // adjust tree + + gIODTSharedInterrupts = OSDictionary::withCapacity(4); + allInts = OSDictionary::withCapacity(4); intMap = false; regIter = IORegistryIterator::iterateOver( gIODTPlane, kIORegistryIterateRecursively ); - assert( regIter ); - if( regIter) { + assert( regIter && allInts && gIODTSharedInterrupts ); + if( regIter && allInts && gIODTSharedInterrupts ) { while( (child = regIter->getNextObject())) { - IODTMapInterrupts( child ); + IODTMapInterruptsSharing( child, allInts ); if( !intMap && child->getProperty( gIODTInterruptParentKey)) intMap = true; @@ -226,6 +224,30 @@ IODeviceTreeAlloc( void * dtTop ) regIter->release(); } +#if IODTSUPPORTDEBUG + parent->setProperty("allInts", allInts); + parent->setProperty("sharedInts", gIODTSharedInterrupts); + + regIter = IORegistryIterator::iterateOver( gIODTPlane, + kIORegistryIterateRecursively ); + if (regIter) { + while( (child = regIter->getNextObject())) { + OSArray * + array = OSDynamicCast(OSArray, child->getProperty( gIOInterruptSpecifiersKey )); + for( UInt32 i = 0; array && (i < array->getCount()); i++) + { + IOOptionBits options; + IOReturn ret = IODTGetInterruptOptions( child, i, &options ); + if( (ret != kIOReturnSuccess) || options) + IOLog("%s[%ld] %ld (%x)\n", child->getName(), i, options, ret); + } + } + regIter->release(); + } +#endif + + allInts->release(); + if( intMap) // set a key in the root to indicate we found NW interrupt mapping parent->setProperty( gIODTNWInterruptMappingKey, @@ -565,19 +587,57 @@ UInt32 IODTMapOneInterrupt( IORegistryEntry * regEntry, UInt32 * intSpec, return( ok ? original_icells : 0 ); } -bool IODTMapInterrupts( IORegistryEntry * regEntry ) +IOReturn IODTGetInterruptOptions( IORegistryEntry * regEntry, int source, IOOptionBits * options ) { - IORegistryEntry *parent; - OSData *local; - OSData *local2; - UInt32 *localBits; - UInt32 *localEnd; - OSData *map; - OSArray *mapped; - const OSSymbol *controller; - OSArray *controllers; - UInt32 skip = 1; - bool ok, nw; + OSArray * controllers; + OSArray * specifiers; + OSArray * shared; + OSObject * spec; + OSObject * oneSpec; + + *options = 0; + + controllers = OSDynamicCast(OSArray, regEntry->getProperty(gIOInterruptControllersKey)); + specifiers = OSDynamicCast(OSArray, regEntry->getProperty(gIOInterruptSpecifiersKey)); + + if( !controllers || !specifiers) + return (kIOReturnNoInterrupt); + + shared = (OSArray *) gIODTSharedInterrupts->getObject( + (const OSSymbol *) controllers->getObject(source) ); + if (!shared) + return (kIOReturnSuccess); + + spec = specifiers->getObject(source); + if (!spec) + return (kIOReturnNoInterrupt); + + for (unsigned int i = 0; + (oneSpec = shared->getObject(i)) + && (!oneSpec->isEqualTo(spec)); + i++ ) {} + + if (oneSpec) + *options = kIODTInterruptShared; + + return (kIOReturnSuccess); +} + +static bool IODTMapInterruptsSharing( IORegistryEntry * regEntry, OSDictionary * allInts ) +{ + IORegistryEntry * parent; + OSData * local; + OSData * local2; + UInt32 * localBits; + UInt32 * localEnd; + OSData * map; + OSObject * oneMap; + OSArray * mapped; + OSArray * controllerInts; + const OSSymbol * controller; + OSArray * controllers; + UInt32 skip = 1; + bool ok, nw; nw = (0 == (local = OSDynamicCast( OSData, regEntry->getProperty( gIODTAAPLInterruptsKey)))); @@ -618,8 +678,47 @@ bool IODTMapInterrupts( IORegistryEntry * regEntry ) localBits += skip; mapped->setObject( map ); + controllers->setObject( controller ); + + if (allInts) + { + controllerInts = (OSArray *) allInts->getObject( controller ); + if (controllerInts) + { + for (unsigned int i = 0; (oneMap = controllerInts->getObject(i)); i++) + { + if (map->isEqualTo(oneMap)) + { + controllerInts = (OSArray *) gIODTSharedInterrupts->getObject( controller ); + if (controllerInts) + controllerInts->setObject(map); + else + { + controllerInts = OSArray::withObjects( (const OSObject **) &map, 1, 4 ); + if (controllerInts) + { + gIODTSharedInterrupts->setObject( controller, controllerInts ); + controllerInts->release(); + } + } + break; + } + } + if (!oneMap) + controllerInts->setObject(map); + } + else + { + controllerInts = OSArray::withObjects( (const OSObject **) &map, 1, 16 ); + if (controllerInts) + { + allInts->setObject( controller, controllerInts ); + controllerInts->release(); + } + } + } + map->release(); - controllers->setObject( (OSObject *) controller ); controller->release(); } while( localBits < localEnd); @@ -640,6 +739,11 @@ bool IODTMapInterrupts( IORegistryEntry * regEntry ) return( ok ); } +bool IODTMapInterrupts( IORegistryEntry * regEntry ) +{ + return( IODTMapInterruptsSharing( regEntry, 0 )); +} + /* */ @@ -1079,3 +1183,8 @@ OSData * IODTFindSlotName( IORegistryEntry * regEntry, UInt32 deviceNumber ) return( ret ); } + +extern "C" IOReturn IONDRVLibrariesInitialize( IOService * provider ) +{ + return( kIOReturnUnsupported ); +} diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index 438d93d87..29c5fd14a 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,8 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, OSData *vectorData; IOService *originalNub; int originalSource; + IOOptionBits options; + bool canBeShared, shouldBeShared, wasAlreadyRegisterd; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; @@ -75,14 +78,22 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, // Get the lock for this vector. IOTakeLock(vector->interruptLock); - // If this vector is already in use, and can be shared, + // Check if the interrupt source can/should be shared. + canBeShared = vectorCanBeShared(vectorNumber, vector); + IODTGetInterruptOptions(nub, source, &options); + shouldBeShared = canBeShared && (options & kIODTInterruptShared); + wasAlreadyRegisterd = vector->interruptRegistered; + + // If the vector is registered and can not be shared return error. + if (wasAlreadyRegisterd && !canBeShared) { + IOUnlock(vector->interruptLock); + return kIOReturnNoResources; + } + + // If this vector is already in use, and can be shared (implied), + // or it is not registered and should be shared, // register as a shared interrupt. - if (vector->interruptRegistered) { - if (!vectorCanBeShared(vectorNumber, vector)) { - IOUnlock(vector->interruptLock); - return kIOReturnNoResources; - } - + if (wasAlreadyRegisterd || shouldBeShared) { // If this vector is not already shared, break it out. if (vector->sharedController == 0) { // Make the IOShareInterruptController instance @@ -92,54 +103,57 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, return kIOReturnNoMemory; } - // Save the nub and source for the original consumer. - originalNub = vector->nub; - originalSource = vector->source; - - // Physically disable the interrupt, but mark it as being enables in the hardware. - // The interruptDisabledSoft now indicates the driver's request for enablement. - disableVectorHard(vectorNumber, vector); - vector->interruptDisabledHard = 0; + if (wasAlreadyRegisterd) { + // Save the nub and source for the original consumer. + originalNub = vector->nub; + originalSource = vector->source; + + // Physically disable the interrupt, but mark it as being enabled in the hardware. + // The interruptDisabledSoft now indicates the driver's request for enablement. + disableVectorHard(vectorNumber, vector); + vector->interruptDisabledHard = 0; + } // Initialize the new shared interrupt controller. - error = vector->sharedController->initInterruptController(this, - vectorData); + error = vector->sharedController->initInterruptController(this, vectorData); // If the IOSharedInterruptController could not be initalized, - // put the original consumor's interrupt back to normal and + // if needed, put the original consumer's interrupt back to normal and // get rid of whats left of the shared controller. if (error != kIOReturnSuccess) { - enableInterrupt(originalNub, originalSource); + if (wasAlreadyRegisterd) enableInterrupt(originalNub, originalSource); vector->sharedController->release(); vector->sharedController = 0; IOUnlock(vector->interruptLock); return error; } - // Try to register the original consumer on the shared controller. - error = vector->sharedController->registerInterrupt(originalNub, - originalSource, - vector->target, - vector->handler, - vector->refCon); - // If the original consumer could not be moved to the shared controller, - // put the original consumor's interrupt back to normal and - // get rid of whats left of the shared controller. - if (error != kIOReturnSuccess) { - // Save the driver's interrupt enablement state. - wasDisabledSoft = vector->interruptDisabledSoft; - - // Make the interrupt really hard disabled. - vector->interruptDisabledSoft = 1; - vector->interruptDisabledHard = 1; - - // Enable the original consumer's interrupt if needed. - if (!wasDisabledSoft) originalNub->enableInterrupt(originalSource); - enableInterrupt(originalNub, originalSource); - - vector->sharedController->release(); - vector->sharedController = 0; - IOUnlock(vector->interruptLock); - return error; + // If there was an original consumer try to register it on the shared controller. + if (wasAlreadyRegisterd) { + error = vector->sharedController->registerInterrupt(originalNub, + originalSource, + vector->target, + vector->handler, + vector->refCon); + // If the original consumer could not be moved to the shared controller, + // put the original consumor's interrupt back to normal and + // get rid of whats left of the shared controller. + if (error != kIOReturnSuccess) { + // Save the driver's interrupt enablement state. + wasDisabledSoft = vector->interruptDisabledSoft; + + // Make the interrupt really hard disabled. + vector->interruptDisabledSoft = 1; + vector->interruptDisabledHard = 1; + + // Enable the original consumer's interrupt if needed. + if (!wasDisabledSoft) originalNub->enableInterrupt(originalSource); + enableInterrupt(originalNub, originalSource); + + vector->sharedController->release(); + vector->sharedController = 0; + IOUnlock(vector->interruptLock); + return error; + } } // Fill in vector with the shared controller's info. @@ -149,12 +163,18 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, vector->target = vector->sharedController; vector->refCon = 0; - // Save the driver's interrupt enablement state. - wasDisabledSoft = vector->interruptDisabledSoft; + // If the interrupt was already registered, + // save the driver's interrupt enablement state. + if (wasAlreadyRegisterd) wasDisabledSoft = vector->interruptDisabledSoft; + else wasDisabledSoft = true; + + // Do any specific initalization for this vector if it has not yet been used. + if (!wasAlreadyRegisterd) initVector(vectorNumber, vector); // Make the interrupt really hard disabled. vector->interruptDisabledSoft = 1; vector->interruptDisabledHard = 1; + vector->interruptRegistered = 1; // Enable the original consumer's interrupt if needed. if (!wasDisabledSoft) originalNub->enableInterrupt(originalSource); @@ -388,6 +408,8 @@ OSMetaClassDefineReservedUnused(IOSharedInterruptController, 3); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#define kIOSharedInterruptControllerDefaultVectors (128) + IOReturn IOSharedInterruptController::initInterruptController(IOInterruptController *parentController, OSData *parentSource) { int cnt, interruptType; @@ -416,7 +438,7 @@ IOReturn IOSharedInterruptController::initInterruptController(IOInterruptControl } // Allocate the memory for the vectors - numVectors = 32; // For now a constant number. + numVectors = kIOSharedInterruptControllerDefaultVectors; // For now a constant number. vectors = (IOInterruptVector *)IOMalloc(numVectors * sizeof(IOInterruptVector)); if (vectors == NULL) { IOFree(_interruptSources, sizeof(IOInterruptSource)); @@ -440,6 +462,7 @@ IOReturn IOSharedInterruptController::initInterruptController(IOInterruptControl } } + numVectors = 0; // reset the high water mark for used vectors vectorsRegistered = 0; vectorsEnabled = 0; controllerDisabled = 1; @@ -462,9 +485,9 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, interruptSources = nub->_interruptSources; // Find a free vector. - vectorNumber = numVectors; - while (vectorsRegistered != numVectors) { - for (vectorNumber = 0; vectorNumber < numVectors; vectorNumber++) { + vectorNumber = kIOSharedInterruptControllerDefaultVectors; + while (vectorsRegistered != kIOSharedInterruptControllerDefaultVectors) { + for (vectorNumber = 0; vectorNumber < kIOSharedInterruptControllerDefaultVectors; vectorNumber++) { vector = &vectors[vectorNumber]; // Get the lock for this vector. @@ -477,11 +500,11 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, IOUnlock(vector->interruptLock); } - if (vectorNumber != numVectors) break; + if (vectorNumber != kIOSharedInterruptControllerDefaultVectors) break; } // Could not find a free one, so give up. - if (vectorNumber == numVectors) { + if (vectorNumber == kIOSharedInterruptControllerDefaultVectors) { return kIOReturnNoResources; } @@ -502,12 +525,13 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, vector->target = target; vector->refCon = refCon; - // Get the vector ready. It start soft disabled. + // Get the vector ready. It starts off soft disabled. vector->interruptDisabledSoft = 1; vector->interruptRegistered = 1; interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); - vectorsRegistered++; + // Move the high water mark if needed + if (++vectorsRegistered > numVectors) numVectors = vectorsRegistered; IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); IOUnlock(vector->interruptLock); @@ -521,7 +545,7 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, long vectorNumber; IOInterruptVector *vector; OSData *vectorData; - IOInterruptState interruptState;; + IOInterruptState interruptState; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; @@ -537,7 +561,7 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, return kIOReturnSuccess; } - // Soft disable the source. + // Soft disable the source and the controller too. disableInterrupt(nub, source); // Clear all the storage for the vector except for interruptLock. @@ -556,6 +580,13 @@ IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); IOUnlock(vector->interruptLock); + + // Re-enable the controller if all vectors are enabled. + if (vectorsEnabled == vectorsRegistered) { + controllerDisabled = 0; + provider->enableInterrupt(0); + } + return kIOReturnSuccess; } diff --git a/iokit/Kernel/IOInterruptEventSource.cpp b/iokit/Kernel/IOInterruptEventSource.cpp index 72ff61937..bda259692 100644 --- a/iokit/Kernel/IOInterruptEventSource.cpp +++ b/iokit/Kernel/IOInterruptEventSource.cpp @@ -97,12 +97,12 @@ bool IOInterruptEventSource::init(OSObject *inOwner, autoDisable = (intType == kIOInterruptTypeLevel); if (autoDisable) { - intHandler = (IOInterruptAction) - &IOInterruptEventSource::disableInterruptOccurred; + intHandler = OSMemberFunctionCast(IOInterruptAction, + this, &IOInterruptEventSource::disableInterruptOccurred); } else - intHandler = (IOInterruptAction) - &IOInterruptEventSource::normalInterruptOccurred; + intHandler = OSMemberFunctionCast(IOInterruptAction, + this, &IOInterruptEventSource::normalInterruptOccurred); res = (kIOReturnSuccess == inProvider->registerInterrupt (inIntIndex, this, intHandler)); diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index fba115d55..06c6e1030 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -26,6 +26,8 @@ * */ +#include + #include #include #include @@ -35,19 +37,25 @@ #include #include -extern "C" { - -SInt64 gIOKitDebug #ifdef IOKITDEBUG - = IOKITDEBUG +#define DEBUG_INIT_VALUE IOKITDEBUG +#else +#define DEBUG_INIT_VALUE 0 #endif -; + +SInt64 gIOKitDebug = DEBUG_INIT_VALUE; +SYSCTL_QUAD(_debug, OID_AUTO, iokit, CTLFLAG_RW, &gIOKitDebug, "boot_arg io"); + int debug_malloc_size; int debug_iomalloc_size; +vm_size_t debug_iomallocpageable_size; int debug_container_malloc_size; // int debug_ivars_size; // in OSObject.cpp +extern "C" { + + void IOPrintPlane( const IORegistryPlane * plane ) { IORegistryEntry * next; @@ -202,6 +210,7 @@ bool IOKitDiagnostics::serialize(OSSerialize *s) const updateOffset( dict, debug_ivars_size, "Instance allocation" ); updateOffset( dict, debug_container_malloc_size, "Container allocation" ); updateOffset( dict, debug_iomalloc_size, "IOMalloc allocation" ); + updateOffset( dict, debug_iomallocpageable_size, "Pageable allocation" ); OSMetaClass::serializeClassDictionary(dict); diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h new file mode 100644 index 000000000..9322aa0b5 --- /dev/null +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +#ifndef _IOKIT_KERNELINTERNAL_H +#define _IOKIT_KERNELINTERNAL_H + +#include + +__BEGIN_DECLS + +#include +#include +#include + +typedef kern_return_t (*IOIteratePageableMapsCallback)(vm_map_t map, void * ref); + +void IOLibInit(void); +kern_return_t IOIteratePageableMaps(vm_size_t size, + IOIteratePageableMapsCallback callback, void * ref); +vm_map_t IOPageableMapForAddress( vm_address_t address ); +SInt32 OSKernelStackRemaining( void ); + +extern vm_size_t debug_iomallocpageable_size; + +// osfmk/device/iokit_rpc.c +// LP64todo - these need to expand +extern kern_return_t IOMapPages( vm_map_t map, vm_offset_t va, vm_offset_t pa, + vm_size_t length, unsigned int mapFlags); +extern kern_return_t IOUnmapPages(vm_map_t map, vm_offset_t va, vm_size_t length); + +/* Physical to physical copy (ints must be disabled) */ +extern void bcopy_phys(addr64_t from, addr64_t to, int size); + +__END_DECLS + +#endif /* ! _IOKIT_KERNELINTERNAL_H */ diff --git a/iokit/Kernel/IOLib.c b/iokit/Kernel/IOLib.c index 3b6e3e957..0a75d9af3 100644 --- a/iokit/Kernel/IOLib.c +++ b/iokit/Kernel/IOLib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,15 +36,20 @@ #include #include +#include #include #include +#include "IOKitKernelInternal.h" + mach_timespec_t IOZeroTvalspec = { 0, 0 }; extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +lck_grp_t *IOLockGroup; + /* * Global variables for use by iLogger * These symbols are for use only by Apple diagnostic code. @@ -61,17 +66,14 @@ void *_giDebugReserved2 = NULL; * Static variables for this module. */ -static IOThreadFunc threadArgFcn; -static void * threadArgArg; -static lock_t * threadArgLock; - static queue_head_t gIOMallocContiguousEntries; -static mutex_t * gIOMallocContiguousEntriesLock; +static lck_mtx_t * gIOMallocContiguousEntriesLock; enum { kIOMaxPageableMaps = 16 }; enum { kIOPageableMapSize = 96 * 1024 * 1024 }; enum { kIOPageableMaxMapSize = 96 * 1024 * 1024 }; +/* LP64todo - these need to expand */ typedef struct { vm_map_t map; vm_offset_t address; @@ -82,7 +84,7 @@ static struct { UInt32 count; UInt32 hint; IOMapData maps[ kIOMaxPageableMaps ]; - mutex_t * lock; + lck_mtx_t * lock; } gIOKitPageableSpace; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -96,24 +98,24 @@ void IOLibInit(void) if(libInitialized) return; - threadArgLock = lock_alloc( true, NULL, NULL ); - gIOKitPageableSpace.maps[0].address = 0; ret = kmem_suballoc(kernel_map, &gIOKitPageableSpace.maps[0].address, kIOPageableMapSize, TRUE, - TRUE, + VM_FLAGS_ANYWHERE, &gIOKitPageableSpace.maps[0].map); if (ret != KERN_SUCCESS) panic("failed to allocate iokit pageable map\n"); - gIOKitPageableSpace.lock = mutex_alloc( 0 ); + IOLockGroup = lck_grp_alloc_init("IOKit", LCK_GRP_ATTR_NULL); + + gIOKitPageableSpace.lock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); gIOKitPageableSpace.maps[0].end = gIOKitPageableSpace.maps[0].address + kIOPageableMapSize; gIOKitPageableSpace.hint = 0; gIOKitPageableSpace.count = 1; - gIOMallocContiguousEntriesLock = mutex_alloc( 0 ); + gIOMallocContiguousEntriesLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); queue_init( &gIOMallocContiguousEntries ); libInitialized = true; @@ -121,44 +123,24 @@ void IOLibInit(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -/* - * We pass an argument to a new thread by saving fcn and arg in some - * locked variables and starting the thread at ioThreadStart(). This - * function retrives fcn and arg and makes the appropriate call. - * - */ - -static void ioThreadStart( void ) -{ - IOThreadFunc fcn; - void * arg; - - fcn = threadArgFcn; - arg = threadArgArg; - lock_done( threadArgLock); - - (*fcn)(arg); - - IOExitThread(); -} - IOThread IOCreateThread(IOThreadFunc fcn, void *arg) { - IOThread thread; + kern_return_t result; + thread_t thread; - lock_write( threadArgLock); - threadArgFcn = fcn; - threadArgArg = arg; + result = kernel_thread_start((thread_continue_t)fcn, arg, &thread); + if (result != KERN_SUCCESS) + return (NULL); - thread = kernel_thread( kernel_task, ioThreadStart); + thread_deallocate(thread); - return(thread); + return (thread); } -volatile void IOExitThread() +volatile void IOExitThread(void) { - (void) thread_terminate(current_act()); + (void) thread_terminate(current_thread()); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -179,7 +161,7 @@ void * IOMalloc(vm_size_t size) void IOFree(void * address, vm_size_t size) { if (address) { - kfree((vm_offset_t)address, size); + kfree(address, size); #if IOALLOCDEBUG debug_iomalloc_size -= size; #endif @@ -270,9 +252,9 @@ void IOFreeAligned(void * address, vm_size_t size) - sizeof(vm_address_t) )); if (adjustedSize >= page_size) - kmem_free( kernel_map, (vm_address_t) allocationAddress, adjustedSize); + kmem_free( kernel_map, allocationAddress, adjustedSize); else - kfree((vm_offset_t) allocationAddress, adjustedSize); + kfree((void *)allocationAddress, adjustedSize); } #if IOALLOCDEBUG @@ -371,10 +353,10 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, } entry->virtual = (void *) address; entry->ioBase = base; - mutex_lock(gIOMallocContiguousEntriesLock); + lck_mtx_lock(gIOMallocContiguousEntriesLock); queue_enter( &gIOMallocContiguousEntries, entry, _IOMallocContiguousEntry *, link ); - mutex_unlock(gIOMallocContiguousEntriesLock); + lck_mtx_unlock(gIOMallocContiguousEntriesLock); *physicalAddress = (IOPhysicalAddress)((base << PAGE_SHIFT) | (address & PAGE_MASK)); for (offset = 0; offset < ((size + PAGE_MASK) >> PAGE_SHIFT); offset++, pagenum++) @@ -412,7 +394,7 @@ void IOFreeContiguous(void * address, vm_size_t size) assert(size); - mutex_lock(gIOMallocContiguousEntriesLock); + lck_mtx_lock(gIOMallocContiguousEntriesLock); queue_iterate( &gIOMallocContiguousEntries, entry, _IOMallocContiguousEntry *, link ) { @@ -423,7 +405,7 @@ void IOFreeContiguous(void * address, vm_size_t size) break; } } - mutex_unlock(gIOMallocContiguousEntriesLock); + lck_mtx_unlock(gIOMallocContiguousEntriesLock); if (base) { @@ -442,7 +424,7 @@ void IOFreeContiguous(void * address, vm_size_t size) allocationAddress = *((vm_address_t *)( (vm_address_t) address - sizeof(vm_address_t) )); - kfree((vm_offset_t) allocationAddress, adjustedSize); + kfree((void *)allocationAddress, adjustedSize); } #if IOALLOCDEBUG @@ -452,8 +434,6 @@ void IOFreeContiguous(void * address, vm_size_t size) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -typedef kern_return_t (*IOIteratePageableMapsCallback)(vm_map_t map, void * ref); - kern_return_t IOIteratePageableMaps(vm_size_t size, IOIteratePageableMapsCallback callback, void * ref) { @@ -484,11 +464,11 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, if( KERN_SUCCESS == kr) break; - mutex_lock( gIOKitPageableSpace.lock ); + lck_mtx_lock( gIOKitPageableSpace.lock ); index = gIOKitPageableSpace.count; if( index >= (kIOMaxPageableMaps - 1)) { - mutex_unlock( gIOKitPageableSpace.lock ); + lck_mtx_unlock( gIOKitPageableSpace.lock ); break; } @@ -502,10 +482,10 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, &min, segSize, TRUE, - TRUE, + VM_FLAGS_ANYWHERE, &map); if( KERN_SUCCESS != kr) { - mutex_unlock( gIOKitPageableSpace.lock ); + lck_mtx_unlock( gIOKitPageableSpace.lock ); break; } @@ -515,7 +495,7 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, gIOKitPageableSpace.hint = index; gIOKitPageableSpace.count = index + 1; - mutex_unlock( gIOKitPageableSpace.lock ); + lck_mtx_unlock( gIOKitPageableSpace.lock ); } while( true ); @@ -555,7 +535,7 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment) #if IOALLOCDEBUG if( ref.address) - debug_iomalloc_size += round_page_32(size); + debug_iomallocpageable_size += round_page_32(size); #endif return( (void *) ref.address ); @@ -588,16 +568,12 @@ void IOFreePageable(void * address, vm_size_t size) kmem_free( map, (vm_offset_t) address, size); #if IOALLOCDEBUG - debug_iomalloc_size -= round_page_32(size); + debug_iomallocpageable_size -= round_page_32(size); #endif } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -extern kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa, - vm_size_t length, unsigned int options); -extern kern_return_t IOUnmapPages(vm_map_t map, vm_offset_t va, vm_size_t length); - IOReturn IOSetProcessorCacheMode( task_t task, IOVirtualAddress address, IOByteCount length, IOOptionBits cacheMode ) { @@ -659,13 +635,7 @@ SInt32 OSKernelStackRemaining( void ) void IOSleep(unsigned milliseconds) { - wait_result_t wait_result; - - wait_result = assert_wait_timeout(milliseconds, THREAD_UNINT); - assert(wait_result == THREAD_WAITING); - - wait_result = thread_block(THREAD_CONTINUE_NULL); - assert(wait_result == THREAD_TIMED_OUT); + delay_for_interval(milliseconds, kMillisecondScale); } /* @@ -673,9 +643,7 @@ void IOSleep(unsigned milliseconds) */ void IODelay(unsigned microseconds) { - extern void delay(int usec); - - delay(microseconds); + delay_for_interval(microseconds, kMicrosecondScale); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -684,7 +652,7 @@ void IOLog(const char *format, ...) { va_list ap; extern void conslog_putc(char); - extern void logwakeup(); + extern void logwakeup(void); va_start(ap, format); _doprnt(format, &ap, conslog_putc, 16); @@ -750,8 +718,3 @@ unsigned int IOAlignmentToSize(IOAlignment align) } return size; } - -IOReturn IONDRVLibrariesInitialize( void ) -{ - return( kIOReturnUnsupported ); -} diff --git a/iokit/Kernel/IOLocks.cpp b/iokit/Kernel/IOLocks.cpp index 43884bfb3..064a89de0 100644 --- a/iokit/Kernel/IOLocks.cpp +++ b/iokit/Kernel/IOLocks.cpp @@ -27,6 +27,8 @@ */ +#define IOLOCKS_CPP 1 + #include #include @@ -34,27 +36,49 @@ #include extern "C" { -#include -#include +#include + +void IOLockInitWithState( IOLock * lock, IOLockState state) +{ + if( state == kIOLockStateLocked) + lck_mtx_lock( lock); +} IOLock * IOLockAlloc( void ) { - return( mutex_alloc(ETAP_IO_AHA) ); + return( lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL) ); } void IOLockFree( IOLock * lock) { - mutex_free( lock ); + lck_mtx_free( lock, IOLockGroup); } -void IOLockInitWithState( IOLock * lock, IOLockState state) +lck_mtx_t * IOLockGetMachLock( IOLock * lock) { - if( state == kIOLockStateLocked) - IOLockLock( lock); + return( (lck_mtx_t *)lock); } +int IOLockSleep( IOLock * lock, void *event, UInt32 interType) +{ + return (int) lck_mtx_sleep(lock, LCK_SLEEP_DEFAULT, (event_t) event, (wait_interrupt_t) interType); +} + +int IOLockSleepDeadline( IOLock * lock, void *event, + AbsoluteTime deadline, UInt32 interType) +{ + return (int) lck_mtx_sleep_deadline(lock, LCK_SLEEP_DEFAULT, (event_t) event, + (wait_interrupt_t) interType, __OSAbsoluteTime(deadline)); +} + +void IOLockWakeup(IOLock * lock, void *event, bool oneThread) +{ + thread_wakeup_prim((event_t) event, oneThread, THREAD_AWAKENED); +} + + struct _IORecursiveLock { - mutex_t * mutex; + lck_mtx_t *mutex; thread_t thread; UInt32 count; }; @@ -67,7 +91,7 @@ IORecursiveLock * IORecursiveLockAlloc( void ) if( !lock) return( 0 ); - lock->mutex = mutex_alloc(ETAP_IO_AHA); + lock->mutex = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); if( lock->mutex) { lock->thread = 0; lock->count = 0; @@ -83,10 +107,15 @@ void IORecursiveLockFree( IORecursiveLock * _lock ) { _IORecursiveLock * lock = (_IORecursiveLock *)_lock; - mutex_free( lock->mutex ); + lck_mtx_free( lock->mutex , IOLockGroup); IODelete( lock, _IORecursiveLock, 1); } +lck_mtx_t * IORecursiveLockGetMachLock( IORecursiveLock * lock) +{ + return( lock->mutex); +} + void IORecursiveLockLock( IORecursiveLock * _lock) { _IORecursiveLock * lock = (_IORecursiveLock *)_lock; @@ -94,7 +123,7 @@ void IORecursiveLockLock( IORecursiveLock * _lock) if( lock->thread == IOThreadSelf()) lock->count++; else { - mutex_lock( lock->mutex ); + lck_mtx_lock( lock->mutex ); assert( lock->thread == 0 ); assert( lock->count == 0 ); lock->thread = IOThreadSelf(); @@ -110,7 +139,7 @@ boolean_t IORecursiveLockTryLock( IORecursiveLock * _lock) lock->count++; return( true ); } else { - if( mutex_try( lock->mutex )) { + if( lck_mtx_try_lock( lock->mutex )) { assert( lock->thread == 0 ); assert( lock->count == 0 ); lock->thread = IOThreadSelf(); @@ -129,7 +158,7 @@ void IORecursiveLockUnlock( IORecursiveLock * _lock) if( 0 == (--lock->count)) { lock->thread = 0; - mutex_unlock( lock->mutex ); + lck_mtx_unlock( lock->mutex ); } } @@ -151,7 +180,7 @@ int IORecursiveLockSleep(IORecursiveLock *_lock, void *event, UInt32 interType) lock->count = 0; lock->thread = 0; - res = thread_sleep_mutex((event_t) event, lock->mutex, (int) interType); + res = lck_mtx_sleep(lock->mutex, LCK_SLEEP_DEFAULT, (event_t) event, (wait_interrupt_t) interType); // Must re-establish the recursive lock no matter why we woke up // otherwise we would potentially leave the return path corrupted. @@ -173,16 +202,17 @@ void IORecursiveLockWakeup(IORecursiveLock *, void *event, bool oneThread) IORWLock * IORWLockAlloc( void ) { - IORWLock * lock; - - lock = lock_alloc( true, ETAP_IO_AHA, ETAP_IO_AHA); - - return( lock); + return( lck_rw_alloc_init(IOLockGroup, LCK_ATTR_NULL) ); } void IORWLockFree( IORWLock * lock) { - lock_free( lock ); + lck_rw_free( lock, IOLockGroup); +} + +lck_rw_t * IORWLockGetMachLock( IORWLock * lock) +{ + return( (lck_rw_t *)lock); } @@ -192,23 +222,22 @@ void IORWLockFree( IORWLock * lock) IOSimpleLock * IOSimpleLockAlloc( void ) { - IOSimpleLock * lock; - - lock = (IOSimpleLock *) IOMalloc( sizeof(IOSimpleLock)); - if( lock) - IOSimpleLockInit( lock ); - - return( lock ); + return( lck_spin_alloc_init( IOLockGroup, LCK_ATTR_NULL) ); } void IOSimpleLockInit( IOSimpleLock * lock) { - simple_lock_init( (simple_lock_t) lock, ETAP_IO_AHA ); + lck_spin_init( lock, IOLockGroup, LCK_ATTR_NULL); } void IOSimpleLockFree( IOSimpleLock * lock ) { - IOFree( lock, sizeof(IOSimpleLock)); + lck_spin_free( lock, IOLockGroup); +} + +lck_spin_t * IOSimpleLockGetMachLock( IOSimpleLock * lock) +{ + return( (lck_spin_t *)lock); } } /* extern "C" */ diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index 66f56480d..c0f915c0f 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -212,8 +212,10 @@ void IOMapper::FreeARTTable(OSData *artHandle, IOByteCount size) ARTTableData *dataP = getARTDataP(artHandle); int numupls = ((artHandle->getLength() - sizeof(*dataP)) / sizeof(upl_t)); - for (int i = 0; i < numupls; i++) - kernel_upl_abort(dataP->u[i], 0); + for (int i = 0; i < numupls; i++) { + upl_abort(dataP->u[i], 0); + upl_deallocate(dataP->u[i]); + } if (dataP->v) { size = round_page_32(size); diff --git a/iokit/Kernel/IOMemoryCursor.cpp b/iokit/Kernel/IOMemoryCursor.cpp index bfd0833a0..ccc953957 100644 --- a/iokit/Kernel/IOMemoryCursor.cpp +++ b/iokit/Kernel/IOMemoryCursor.cpp @@ -116,11 +116,11 @@ IOMemoryCursor::genPhysicalSegments(IOMemoryDescriptor *inDescriptor, * If we finished cleanly return number of segments found * and update the position in the descriptor. */ - PhysicalSegment curSeg = { 0 }; + PhysicalSegment curSeg = { 0, 0 }; UInt curSegIndex = 0; UInt curTransferSize = 0; IOByteCount inDescriptorLength = inDescriptor->getLength(); - PhysicalSegment seg = { 0 }; + PhysicalSegment seg = { 0, 0 }; while ((seg.location) || (fromPosition < inDescriptorLength)) { diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 9ed812170..389d75a1e 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,21 +37,29 @@ #include +#include "IOKitKernelInternal.h" + #include #include #include #include #include -#include + +#include __BEGIN_DECLS #include +#include +#include #include #include #ifndef i386 +#include +#include struct phys_entry *pmap_find_physentry(ppnum_t pa); #endif + extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); void ipc_port_release_send(ipc_port_t port); @@ -83,26 +91,8 @@ memory_object_iopl_request( unsigned int *page_list_count, int *flags); -/* - * Page fault handling based on vm_map (or entries therein) - */ -extern kern_return_t vm_fault( - vm_map_t map, - vm_offset_t vaddr, - vm_prot_t fault_type, - boolean_t change_wiring, - int interruptible, - pmap_t caller_pmap, - vm_offset_t caller_pmap_addr); - unsigned int IOTranslateCacheBits(struct phys_entry *pp); -vm_map_t IOPageableMapForAddress( vm_address_t address ); - -typedef kern_return_t (*IOIteratePageableMapsCallback)(vm_map_t map, void * ref); - -kern_return_t IOIteratePageableMaps(vm_size_t size, - IOIteratePageableMapsCallback callback, void * ref); __END_DECLS #define kIOMaximumMappedIOByteCount (512*1024*1024) @@ -130,6 +120,115 @@ static IORecursiveLock * gIOMemoryLock; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +class _IOMemoryMap : public IOMemoryMap +{ + OSDeclareDefaultStructors(_IOMemoryMap) +public: + IOMemoryDescriptor * memory; + IOMemoryMap * superMap; + IOByteCount offset; + IOByteCount length; + IOVirtualAddress logical; + task_t addressTask; + vm_map_t addressMap; + IOOptionBits options; + upl_t redirUPL; + ipc_port_t redirEntry; + IOMemoryDescriptor * owner; + +protected: + virtual void taggedRelease(const void *tag = 0) const; + virtual void free(); + +public: + + // IOMemoryMap methods + virtual IOVirtualAddress getVirtualAddress(); + virtual IOByteCount getLength(); + virtual task_t getAddressTask(); + virtual IOMemoryDescriptor * getMemoryDescriptor(); + virtual IOOptionBits getMapOptions(); + + virtual IOReturn unmap(); + virtual void taskDied(); + + virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, + IOOptionBits options, + IOByteCount offset = 0); + + virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, + IOByteCount * length); + + // for IOMemoryDescriptor use + _IOMemoryMap * copyCompatible( + IOMemoryDescriptor * owner, + task_t intoTask, + IOVirtualAddress toAddress, + IOOptionBits options, + IOByteCount offset, + IOByteCount length ); + + bool initCompatible( + IOMemoryDescriptor * memory, + IOMemoryMap * superMap, + IOByteCount offset, + IOByteCount length ); + + bool initWithDescriptor( + IOMemoryDescriptor * memory, + task_t intoTask, + IOVirtualAddress toAddress, + IOOptionBits options, + IOByteCount offset, + IOByteCount length ); + + IOReturn redirect( + task_t intoTask, bool redirect ); +}; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// Some data structures and accessor macros used by the initWithOptions +// Function + +enum ioPLBlockFlags { + kIOPLOnDevice = 0x00000001, + kIOPLExternUPL = 0x00000002, +}; + +struct typePersMDData +{ + const IOGeneralMemoryDescriptor *fMD; + ipc_port_t fMemEntry; +}; + +struct ioPLBlock { + upl_t fIOPL; + vm_address_t fIOMDOffset; // The offset of this iopl in descriptor + vm_offset_t fPageInfo; // Pointer to page list or index into it + ppnum_t fMappedBase; // Page number of first page in this iopl + unsigned int fPageOffset; // Offset within first page of iopl + unsigned int fFlags; // Flags +}; + +struct ioGMDData { + IOMapper *fMapper; + unsigned int fPageCnt; + upl_page_info_t fPageList[]; + ioPLBlock fBlocks[]; +}; + +#define getDataP(osd) ((ioGMDData *) (osd)->getBytesNoCopy()) +#define getIOPLList(d) ((ioPLBlock *) &(d->fPageList[d->fPageCnt])) +#define getNumIOPL(osd, d) \ + (((osd)->getLength() - ((char *) getIOPLList(d) - (char *) d)) / sizeof(ioPLBlock)) +#define getPageList(d) (&(d->fPageList[0])) +#define computeDataSize(p, u) \ + (sizeof(ioGMDData) + p * sizeof(upl_page_info_t) + u * sizeof(ioPLBlock)) + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + #define next_page(a) ( trunc_page_32(a) + PAGE_SIZE ) @@ -155,8 +254,12 @@ kern_return_t device_data_action( LOCK; memDesc = ref->memory; if( memDesc) + { + memDesc->retain(); kr = memDesc->handleFault( device_pager, 0, 0, offset, size, kIOMapDefaultCache /*?*/); + memDesc->release(); + } else kr = KERN_ABORTED; UNLOCK; @@ -179,7 +282,26 @@ kern_return_t device_close( return( kIOReturnSuccess ); } +}; // end extern "C" +// Note this inline function uses C++ reference arguments to return values +// This means that pointers are not passed and NULLs don't have to be +// checked for as a NULL reference is illegal. +static inline void +getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables + UInt32 type, IOGeneralMemoryDescriptor::Ranges r, UInt32 ind) +{ + assert(kIOMemoryTypePhysical == type || kIOMemoryTypeUIO == type + || kIOMemoryTypeVirtual == type); + if (kIOMemoryTypeUIO == type) { + user_size_t us; + uio_getiov((uio_t) r.uio, ind, &addr, &us); len = us; + } + else { + IOVirtualRange cur = r.v[ind]; + addr = cur.address; + len = cur.length; + } } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -325,6 +447,87 @@ IOMemoryDescriptor::withSubRange(IOMemoryDescriptor * of, return self; } +IOMemoryDescriptor * IOMemoryDescriptor:: + withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD) +{ + IOGeneralMemoryDescriptor *origGenMD = + OSDynamicCast(IOGeneralMemoryDescriptor, originalMD); + + if (origGenMD) + return IOGeneralMemoryDescriptor:: + withPersistentMemoryDescriptor(origGenMD); + else + return 0; +} + +IOMemoryDescriptor * IOGeneralMemoryDescriptor:: + withPersistentMemoryDescriptor(IOGeneralMemoryDescriptor *originalMD) +{ + ipc_port_t sharedMem = (ipc_port_t) originalMD->createNamedEntry(); + + if (!sharedMem) + return 0; + + if (sharedMem == originalMD->_memEntry) { + originalMD->retain(); // Add a new reference to ourselves + ipc_port_release_send(sharedMem); // Remove extra send right + return originalMD; + } + + IOGeneralMemoryDescriptor * self = new IOGeneralMemoryDescriptor; + typePersMDData initData = { originalMD, sharedMem }; + + if (self + && !self->initWithOptions(&initData, 1, 0, 0, kIOMemoryTypePersistentMD, 0)) { + self->release(); + self = 0; + } + return self; +} + +void *IOGeneralMemoryDescriptor::createNamedEntry() +{ + kern_return_t error; + ipc_port_t sharedMem; + + IOOptionBits type = _flags & kIOMemoryTypeMask; + + user_addr_t range0Addr; + IOByteCount range0Len; + getAddrLenForInd(range0Addr, range0Len, type, _ranges, 0); + range0Addr = trunc_page_64(range0Addr); + + vm_size_t size = ptoa_32(_pages); + vm_address_t kernelPage = (vm_address_t) range0Addr; + + vm_map_t theMap = ((_task == kernel_task) + && (kIOMemoryBufferPageable & _flags)) + ? IOPageableMapForAddress(kernelPage) + : get_task_map(_task); + + memory_object_size_t actualSize = size; + vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; + if (_memEntry) + prot |= MAP_MEM_NAMED_REUSE; + + error = mach_make_memory_entry_64(theMap, + &actualSize, range0Addr, prot, &sharedMem, (ipc_port_t) _memEntry); + + if (KERN_SUCCESS == error) { + if (actualSize == size) { + return sharedMem; + } else { +#if IOASSERT + IOLog("IOGMD::mach_make_memory_entry_64 (%08llx) size (%08lx:%08x)\n", + (UInt64)range0Addr, (UInt32)actualSize, size); +#endif + ipc_port_release_send( sharedMem ); + } + } + + return MACH_PORT_NULL; +} + /* * initWithAddress: * @@ -402,16 +605,15 @@ IOGeneralMemoryDescriptor::initWithRanges( if (task) { mdOpts |= kIOMemoryTypeVirtual; + + // Auto-prepare if this is a kernel memory descriptor as very few + // clients bother to prepare() kernel memory. + // But it was not enforced so what are you going to do? if (task == kernel_task) mdOpts |= kIOMemoryAutoPrepare; } else mdOpts |= kIOMemoryTypePhysical; - - // @@@ gvdl: Need to remove this - // Auto-prepare if this is a kernel memory descriptor as very few - // clients bother to prepare() kernel memory. - // But it has been enforced so what are you going to do? return initWithOptions(ranges, count, 0, task, mdOpts, /* mapper */ 0); } @@ -420,8 +622,8 @@ IOGeneralMemoryDescriptor::initWithRanges( * initWithOptions: * * IOMemoryDescriptor. The buffer is made up of several virtual address ranges, - * from a given task or several physical ranges or finally an UPL from the ubc - * system. + * from a given task, several physical ranges, an UPL from the ubc + * system or a uio (may be 64bit) from the BSD subsystem. * * Passing the ranges as a reference will avoid an extra allocation. * @@ -430,36 +632,6 @@ IOGeneralMemoryDescriptor::initWithRanges( * I/O Kit classes, although it is supported here. */ -enum ioPLBlockFlags { - kIOPLOnDevice = 0x00000001, - kIOPLExternUPL = 0x00000002, -}; - -struct ioPLBlock { - upl_t fIOPL; - vm_address_t fIOMDOffset; // The offset of this iopl in descriptor - vm_offset_t fPageInfo; // Pointer to page list or index into it - ppnum_t fMappedBase; // Page number of first page in this iopl - unsigned int fPageOffset; // Offset within first page of iopl - unsigned int fFlags; // Flags -}; - -struct ioGMDData { - IOMapper *fMapper; - unsigned int fPageCnt; - upl_page_info_t fPageList[0]; // @@@ gvdl need to get rid of this - // should be able to use upl directly - ioPLBlock fBlocks[0]; -}; - -#define getDataP(osd) ((ioGMDData *) (osd)->getBytesNoCopy()) -#define getIOPLList(d) ((ioPLBlock *) &(d->fPageList[d->fPageCnt])) -#define getNumIOPL(d,len) \ - ((len - ((char *) getIOPLList(d) - (char *) d)) / sizeof(ioPLBlock)) -#define getPageList(d) (&(d->fPageList[0])) -#define computeDataSize(p, u) \ - (sizeof(ioGMDData) + p * sizeof(upl_page_info_t) + u * sizeof(ioPLBlock)) - bool IOGeneralMemoryDescriptor::initWithOptions(void * buffers, UInt32 count, @@ -468,8 +640,36 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, IOOptionBits options, IOMapper * mapper) { + IOOptionBits type = options & kIOMemoryTypeMask; + + // Grab the original MD's configuation data to initialse the + // arguments to this function. + if (kIOMemoryTypePersistentMD == type) { + + typePersMDData *initData = (typePersMDData *) buffers; + const IOGeneralMemoryDescriptor *orig = initData->fMD; + ioGMDData *dataP = getDataP(orig->_memoryEntries); + + // Only accept persistent memory descriptors with valid dataP data. + assert(orig->_rangesCount == 1); + if ( !(orig->_flags & kIOMemoryPersistent) || !dataP) + return false; + + _memEntry = initData->fMemEntry; // Grab the new named entry + options = orig->_flags | kIOMemoryAsReference; + _singleRange = orig->_singleRange; // Initialise our range + buffers = &_singleRange; + count = 1; - switch (options & kIOMemoryTypeMask) { + // Now grab the original task and whatever mapper was previously used + task = orig->_task; + mapper = dataP->fMapper; + + // We are ready to go through the original initialisation now + } + + switch (type) { + case kIOMemoryTypeUIO: case kIOMemoryTypeVirtual: assert(task); if (!task) @@ -479,11 +679,11 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, case kIOMemoryTypePhysical: // Neither Physical nor UPL should have a task mapper = kIOMapperNone; + case kIOMemoryTypeUPL: assert(!task); break; default: -panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing return false; /* bad argument */ } @@ -508,6 +708,8 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing unmapFromKernel(); if (_ranges.v && _rangesIsAllocated) IODelete(_ranges.v, IOVirtualRange, _rangesCount); + if (_memEntry) + { ipc_port_release_send((ipc_port_t) _memEntry); _memEntry = 0; } } else { if (!super::init()) @@ -523,6 +725,8 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing gIOSystemMapper = mapper = IOMapper::gSystem; } + // Remove the dynamic internal use flags from the initial setting + options &= ~(kIOMemoryPreparedReadOnly); _flags = options; _task = task; @@ -533,7 +737,7 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing _cachedPhysicalAddress = 0; _cachedVirtualAddress = 0; - if ( (options & kIOMemoryTypeMask) == kIOMemoryTypeUPL) { + if (kIOMemoryTypeUPL == type) { ioGMDData *dataP; unsigned int dataSize = computeDataSize(/* pages */ 0, /* upls */ 1); @@ -564,8 +768,6 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing iopl.fFlags = pageList->device | kIOPLExternUPL; iopl.fIOMDOffset = 0; if (!pageList->device) { - // @@@ gvdl: Ask JoeS are the pages contiguious with the list? - // or there a chance that we may be inserting 0 phys_addrs? // Pre-compute the offset into the UPL's page list pageList = &pageList[atop_32(offset)]; offset &= PAGE_MASK; @@ -583,50 +785,60 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing _memoryEntries->appendBytes(&iopl, sizeof(iopl)); } - else { /* kIOMemoryTypeVirtual | kIOMemoryTypePhysical */ - IOVirtualRange *ranges = (IOVirtualRange *) buffers; - - /* - * Initialize the memory descriptor. - */ - - _length = 0; - _pages = 0; - for (unsigned ind = 0; ind < count; ind++) { - IOVirtualRange cur = ranges[ind]; - - _length += cur.length; - _pages += atop_32(cur.address + cur.length + PAGE_MASK) - - atop_32(cur.address); - } - - _ranges.v = 0; - _rangesIsAllocated = !(options & kIOMemoryAsReference); - _rangesCount = count; + else { + // kIOMemoryTypeVirtual | kIOMemoryTypeUIO | kIOMemoryTypePhysical + + // Initialize the memory descriptor + if (options & kIOMemoryAsReference) { + _rangesIsAllocated = false; + + // Hack assignment to get the buffer arg into _ranges. + // I'd prefer to do _ranges = (Ranges) buffers, but that doesn't + // work, C++ sigh. + // This also initialises the uio & physical ranges. + _ranges.v = (IOVirtualRange *) buffers; + } + else { + assert(kIOMemoryTypeUIO != type); - if (options & kIOMemoryAsReference) - _ranges.v = ranges; - else { - _ranges.v = IONew(IOVirtualRange, count); - if (!_ranges.v) + _rangesIsAllocated = true; + _ranges.v = IONew(IOVirtualRange, count); + if (!_ranges.v) return false; - bcopy(/* from */ ranges, _ranges.v, - count * sizeof(IOVirtualRange)); - } + bcopy(buffers, _ranges.v, count * sizeof(IOVirtualRange)); + } + + // Find starting address within the vector of ranges + Ranges vec = _ranges; + UInt32 length = 0; + UInt32 pages = 0; + for (unsigned ind = 0; ind < count; ind++) { + user_addr_t addr; + UInt32 len; + + // addr & len are returned by this function + getAddrLenForInd(addr, len, type, vec, ind); + pages += (atop_64(addr + len + PAGE_MASK) - atop_64(addr)); + len += length; + assert(len > length); // Check for 32 bit wrap around + length = len; + } + _length = length; + _pages = pages; + _rangesCount = count; // Auto-prepare memory at creation time. // Implied completion when descriptor is free-ed - if ( (options & kIOMemoryTypeMask) == kIOMemoryTypePhysical) - _wireCount++; // Physical MDs are start out wired - else { /* kIOMemoryTypeVirtual */ + if (kIOMemoryTypePhysical == type) + _wireCount++; // Physical MDs are, by definition, wired + else { /* kIOMemoryTypeVirtual | kIOMemoryTypeUIO */ ioGMDData *dataP; - unsigned int dataSize = - computeDataSize(_pages, /* upls */ _rangesCount * 2); + unsigned dataSize = computeDataSize(_pages, /* upls */ count * 2); if (!_memoryEntries) { _memoryEntries = OSData::withCapacity(dataSize); if (!_memoryEntries) - return false; + return false; } else if (!_memoryEntries->initWithCapacity(dataSize)) return false; @@ -636,38 +848,8 @@ panic("IOGMD::iWO(): bad type"); // @@@ gvdl: for testing dataP->fMapper = mapper; dataP->fPageCnt = _pages; - if (kIOMemoryPersistent & _flags) - { - kern_return_t error; - ipc_port_t sharedMem; - - vm_size_t size = _pages << PAGE_SHIFT; - vm_address_t startPage; - - startPage = trunc_page_32(_ranges.v[0].address); - - vm_map_t theMap = ((_task == kernel_task) && (kIOMemoryBufferPageable & _flags)) - ? IOPageableMapForAddress(startPage) - : get_task_map(_task); - - vm_size_t actualSize = size; - error = mach_make_memory_entry( theMap, - &actualSize, startPage, - VM_PROT_READ | VM_PROT_WRITE, &sharedMem, - NULL ); - - if (KERN_SUCCESS == error) { - if (actualSize == round_page_32(size)) { - _memEntry = (void *) sharedMem; - } else { -#if IOASSERT - IOLog("mach_make_memory_entry_64 (%08x) size (%08lx:%08x)\n", - startPage, (UInt32)actualSize, size); -#endif - ipc_port_release_send( sharedMem ); - } - } - } + if ( (kIOMemoryPersistent & _flags) && !_memEntry) + _memEntry = createNamedEntry(); if ((_flags & kIOMemoryAutoPrepare) && prepare() != kIOReturnSuccess) @@ -889,11 +1071,10 @@ IOPhysicalAddress IOGeneralMemoryDescriptor::getPhysicalSegment length += cur.length; } - // @@@ gvdl: should assert(address); + // @@@ gvdl: should be assert(address); // but can't as NVidia GeForce creates a bogus physical mem - { - assert(address || /*nvidia*/(!_ranges.p[0].address && 1 == _rangesCount)); - } + assert(address + || /* nvidia */ (!_ranges.p[0].address && 1 == _rangesCount)); assert(length); } else do { @@ -910,7 +1091,7 @@ IOPhysicalAddress IOGeneralMemoryDescriptor::getPhysicalSegment ioGMDData * dataP = getDataP(_memoryEntries); const ioPLBlock *ioplList = getIOPLList(dataP); - UInt ind, numIOPLs = getNumIOPL(dataP, _memoryEntries->getLength()); + UInt ind, numIOPLs = getNumIOPL(_memoryEntries, dataP); upl_page_info_t *pageList = getPageList(dataP); assert(numIOPLs > 0); @@ -1030,87 +1211,50 @@ addr64_t IOMemoryDescriptor::getPhysicalSegment64 return phys64; } -// Note this function is NOT a virtual function -void * IOGeneralMemoryDescriptor::getBackingID() const -{ - if (!_memEntry) // Not created as a persistent memory descriptor - return 0; - - vm_size_t size = _pages << PAGE_SHIFT; - vm_size_t seenSize = 0; - vm_address_t basePage = trunc_page_32(_ranges.v[0].address); - void *retObjID = 0; - - vm_map_t theMap = - ((_task == kernel_task) && (kIOMemoryBufferPageable & _flags)) - ? IOPageableMapForAddress(basePage) - : get_task_map(_task); - - - for (;;) { - vm_region_object_info_data_64_t objInfo; - vm_address_t actualPage = basePage; - vm_size_t actualSize; - mach_msg_type_number_t objInfoSize; - kern_return_t error; - - objInfoSize = VM_REGION_OBJECT_INFO_COUNT_64; - error = vm_region_64(theMap, - &actualPage, - &actualSize, - VM_REGION_OBJECT_INFO_64, - (vm_region_info_t) &objInfo, - &objInfoSize, - 0); - - if (KERN_SUCCESS != error || actualSize == 0 || actualPage > basePage - || (retObjID && retObjID != (void *) objInfo.object_id)) - return 0; - - actualPage += actualSize; // Calculate the end address - seenSize += actualPage - basePage; // Size of overlap - basePage = actualPage; // Start here for next loop - if (seenSize >= size) - return (void *) objInfo.object_id; - - if (!retObjID) - retObjID = (void *) objInfo.object_id; - } -} - - -IOPhysicalAddress IOGeneralMemoryDescriptor::getSourceSegment - (IOByteCount offset, IOByteCount *lengthOfSegment) +IOPhysicalAddress IOGeneralMemoryDescriptor:: +getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) { IOPhysicalAddress address = 0; IOPhysicalLength length = 0; + IOOptionBits type = _flags & kIOMemoryTypeMask; assert(offset <= _length); - if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypeUPL) + if ( type == kIOMemoryTypeUPL) return super::getSourceSegment( offset, lengthOfSegment ); - - if ( offset < _length ) // (within bounds?) + else if ( offset < _length ) // (within bounds?) { unsigned rangesIndex = 0; - - for ( ; offset >= _ranges.v[rangesIndex].length; rangesIndex++ ) - { - offset -= _ranges.v[rangesIndex].length; // (make offset relative) - } - - address = _ranges.v[rangesIndex].address + offset; - length = _ranges.v[rangesIndex].length - offset; - - for ( ++rangesIndex; rangesIndex < _rangesCount; rangesIndex++ ) - { - if ( address + length != _ranges.v[rangesIndex].address ) break; - - length += _ranges.v[rangesIndex].length; // (coalesce ranges) - } - - assert(address); - if ( address == 0 ) length = 0; + Ranges vec = _ranges; + user_addr_t addr; + + // Find starting address within the vector of ranges + for (;;) { + getAddrLenForInd(addr, length, type, vec, rangesIndex); + if (offset < length) + break; + offset -= length; // (make offset relative) + rangesIndex++; + } + + // Now that we have the starting range, + // lets find the last contiguous range + addr += offset; + length -= offset; + + for ( ++rangesIndex; rangesIndex < _rangesCount; rangesIndex++ ) { + user_addr_t newAddr; + IOPhysicalLength newLen; + + getAddrLenForInd(newAddr, newLen, type, vec, rangesIndex); + if (addr + length != newAddr) + break; + length += newLen; + } + if (addr) + address = (IOPhysicalAddress) addr; // Truncate address to 32bit + else + length = 0; } if ( lengthOfSegment ) *lengthOfSegment = length; @@ -1131,6 +1275,124 @@ IOPhysicalAddress IOGeneralMemoryDescriptor::getSourceSegment /* DEPRECATED */ } /* DEPRECATED */ /* USE INSTEAD: map(), readBytes(), writeBytes() */ + + +IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ) +{ + IOReturn err = kIOReturnSuccess; + vm_purgable_t control; + int state; + + do + { + if (!_memEntry) + { + err = kIOReturnNotReady; + break; + } + + control = VM_PURGABLE_SET_STATE; + switch (newState) + { + case kIOMemoryPurgeableKeepCurrent: + control = VM_PURGABLE_GET_STATE; + break; + + case kIOMemoryPurgeableNonVolatile: + state = VM_PURGABLE_NONVOLATILE; + break; + case kIOMemoryPurgeableVolatile: + state = VM_PURGABLE_VOLATILE; + break; + case kIOMemoryPurgeableEmpty: + state = VM_PURGABLE_EMPTY; + break; + default: + err = kIOReturnBadArgument; + break; + } + + if (kIOReturnSuccess != err) + break; + + err = mach_memory_entry_purgable_control((ipc_port_t) _memEntry, control, &state); + + if (oldState) + { + if (kIOReturnSuccess == err) + { + switch (state) + { + case VM_PURGABLE_NONVOLATILE: + state = kIOMemoryPurgeableNonVolatile; + break; + case VM_PURGABLE_VOLATILE: + state = kIOMemoryPurgeableVolatile; + break; + case VM_PURGABLE_EMPTY: + state = kIOMemoryPurgeableEmpty; + break; + default: + state = kIOMemoryPurgeableNonVolatile; + err = kIOReturnNotReady; + break; + } + *oldState = state; + } + } + } + while (false); + + return (err); +} + +extern "C" void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count); +extern "C" void dcache_incoherent_io_store64(addr64_t pa, unsigned int count); + +IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, + IOByteCount offset, IOByteCount length ) +{ + IOByteCount remaining; + void (*func)(addr64_t pa, unsigned int count) = 0; + + switch (options) + { + case kIOMemoryIncoherentIOFlush: + func = &dcache_incoherent_io_flush64; + break; + case kIOMemoryIncoherentIOStore: + func = &dcache_incoherent_io_store64; + break; + } + + if (!func) + return (kIOReturnUnsupported); + + remaining = length = min(length, getLength() - offset); + while (remaining) + // (process another target segment?) + { + addr64_t dstAddr64; + IOByteCount dstLen; + + dstAddr64 = getPhysicalSegment64(offset, &dstLen); + if (!dstAddr64) + break; + + // Clip segment length to remaining + if (dstLen > remaining) + dstLen = remaining; + + (*func)(dstAddr64, dstLen); + + offset += dstLen; + remaining -= dstLen; + } + + return (remaining ? kIOReturnUnderrun : kIOReturnSuccess); +} + #ifdef __ppc__ extern vm_offset_t static_memory_end; #define io_kernel_static_end static_memory_end @@ -1141,14 +1403,12 @@ extern vm_offset_t first_avail; static kern_return_t io_get_kernel_static_upl( - vm_map_t map, + vm_map_t /* map */, vm_address_t offset, vm_size_t *upl_size, upl_t *upl, upl_page_info_array_t page_list, - unsigned int *count, - int *flags, - int force_data_sync) + unsigned int *count) { unsigned int pageCount, page; ppnum_t phys; @@ -1177,6 +1437,7 @@ io_get_kernel_static_upl( IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) { + IOOptionBits type = _flags & kIOMemoryTypeMask; IOReturn error = kIOReturnNoMemory; ioGMDData *dataP; ppnum_t mapBase = 0; @@ -1184,6 +1445,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) ipc_port_t sharedMem = (ipc_port_t) _memEntry; assert(!_wireCount); + assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type); if (_pages >= gIOMaximumMappedIOPageCount) return kIOReturnNoResources; @@ -1217,36 +1479,47 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) } uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE; - // - // Check user read/write access to the data buffer. - // - unsigned int pageIndex = 0; - IOByteCount mdOffset = 0; + // Find the appropriate vm_map for the given task vm_map_t curMap; if (_task == kernel_task && (kIOMemoryBufferPageable & _flags)) curMap = 0; else { curMap = get_task_map(_task); } + // Iterate over the vector of virtual ranges + Ranges vec = _ranges; + unsigned int pageIndex = 0; + IOByteCount mdOffset = 0; for (UInt range = 0; range < _rangesCount; range++) { ioPLBlock iopl; - IOVirtualRange curRange = _ranges.v[range]; - vm_address_t startPage; + user_addr_t startPage; IOByteCount numBytes; - startPage = trunc_page_32(curRange.address); - iopl.fPageOffset = (short) curRange.address & PAGE_MASK; + // Get the startPage address and length of vec[range] + getAddrLenForInd(startPage, numBytes, type, vec, range); + iopl.fPageOffset = (short) startPage & PAGE_MASK; + numBytes += iopl.fPageOffset; + startPage = trunc_page_64(startPage); + if (mapper) iopl.fMappedBase = mapBase + pageIndex; else iopl.fMappedBase = 0; - numBytes = iopl.fPageOffset + curRange.length; + // Iterate over the current range, creating UPLs while (numBytes) { dataP = getDataP(_memoryEntries); - vm_map_t theMap = - (curMap)? curMap - : IOPageableMapForAddress(startPage); + vm_address_t kernelStart = (vm_address_t) startPage; + vm_map_t theMap; + if (curMap) + theMap = curMap; + else if (!sharedMem) { + assert(_task == kernel_task); + theMap = IOPageableMapForAddress(kernelStart); + } + else + theMap = NULL; + upl_page_info_array_t pageInfo = getPageList(dataP); int ioplFlags = uplFlags; upl_page_list_ptr_t baseInfo = &pageInfo[pageIndex]; @@ -1254,36 +1527,32 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) vm_size_t ioplSize = round_page_32(numBytes); unsigned int numPageInfo = atop_32(ioplSize); - if ((theMap == kernel_map) && (startPage < io_kernel_static_end)) - { + if (theMap == kernel_map && kernelStart < io_kernel_static_end) { error = io_get_kernel_static_upl(theMap, - startPage, - &ioplSize, - &iopl.fIOPL, - baseInfo, - &numPageInfo, - &ioplFlags, - false); - - } else if (sharedMem && (kIOMemoryPersistent & _flags)) { - + kernelStart, + &ioplSize, + &iopl.fIOPL, + baseInfo, + &numPageInfo); + } + else if (sharedMem) { error = memory_object_iopl_request(sharedMem, - ptoa_32(pageIndex), - &ioplSize, - &iopl.fIOPL, - baseInfo, - &numPageInfo, - &ioplFlags); - - } else { - error = vm_map_get_upl(theMap, - startPage, - &ioplSize, - &iopl.fIOPL, - baseInfo, - &numPageInfo, - &ioplFlags, - false); + ptoa_32(pageIndex), + &ioplSize, + &iopl.fIOPL, + baseInfo, + &numPageInfo, + &ioplFlags); + } + else { + assert(theMap); + error = vm_map_create_upl(theMap, + startPage, + &ioplSize, + &iopl.fIOPL, + baseInfo, + &numPageInfo, + &ioplFlags); } assert(ioplSize); @@ -1314,14 +1583,17 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if ((_flags & kIOMemoryAutoPrepare) && iopl.fIOPL) { - kernel_upl_commit(iopl.fIOPL, 0, 0); + upl_commit(iopl.fIOPL, 0, 0); + upl_deallocate(iopl.fIOPL); iopl.fIOPL = 0; } if (!_memoryEntries->appendBytes(&iopl, sizeof(iopl))) { // Clean up partial created and unsaved iopl - if (iopl.fIOPL) - kernel_upl_abort(iopl.fIOPL, 0); + if (iopl.fIOPL) { + upl_abort(iopl.fIOPL, 0); + upl_deallocate(iopl.fIOPL); + } goto abortExit; } @@ -1348,14 +1620,17 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) abortExit: { dataP = getDataP(_memoryEntries); - UInt done = getNumIOPL(dataP, _memoryEntries->getLength()); + UInt done = getNumIOPL(_memoryEntries, dataP); ioPLBlock *ioplList = getIOPLList(dataP); for (UInt range = 0; range < done; range++) { - if (ioplList[range].fIOPL) - kernel_upl_abort(ioplList[range].fIOPL, 0); + if (ioplList[range].fIOPL) { + upl_abort(ioplList[range].fIOPL, 0); + upl_deallocate(ioplList[range].fIOPL); + } } + (void) _memoryEntries->initWithBytes(dataP, sizeof(ioGMDData)); // == setLength() if (mapper && mapBase) mapper->iovmFree(mapBase, _pages); @@ -1375,9 +1650,11 @@ abortExit: */ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection) { - IOReturn error = kIOReturnSuccess; + IOReturn error = kIOReturnSuccess; + IOOptionBits type = _flags & kIOMemoryTypeMask; - if (!_wireCount && (_flags & kIOMemoryTypeMask) == kIOMemoryTypeVirtual) { + if (!_wireCount + && (kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type) ) { error = wireVirtual(forDirection); if (error) return error; @@ -1406,23 +1683,27 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) _wireCount--; if (!_wireCount) { - if ((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) { + IOOptionBits type = _flags & kIOMemoryTypeMask; + + if (kIOMemoryTypePhysical == type) { /* kIOMemoryTypePhysical */ // DO NOTHING } else { ioGMDData * dataP = getDataP(_memoryEntries); ioPLBlock *ioplList = getIOPLList(dataP); - UInt count = getNumIOPL(dataP, _memoryEntries->getLength()); + UInt count = getNumIOPL(_memoryEntries, dataP); if (dataP->fMapper && _pages && ioplList[0].fMappedBase) dataP->fMapper->iovmFree(ioplList[0].fMappedBase, _pages); // Only complete iopls that we created which are for TypeVirtual - if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypeVirtual) { + if (kIOMemoryTypeVirtual == type || kIOMemoryTypeUIO == type) { for (UInt ind = 0; ind < count; ind++) - if (ioplList[ind].fIOPL) - kernel_upl_commit(ioplList[ind].fIOPL, 0, 0); + if (ioplList[ind].fIOPL) { + upl_commit(ioplList[ind].fIOPL, 0, 0); + upl_deallocate(ioplList[ind].fIOPL); + } } (void) _memoryEntries->initWithBytes(dataP, sizeof(ioGMDData)); // == setLength() @@ -1441,37 +1722,51 @@ IOReturn IOGeneralMemoryDescriptor::doMap( kern_return_t kr; ipc_port_t sharedMem = (ipc_port_t) _memEntry; + IOOptionBits type = _flags & kIOMemoryTypeMask; + Ranges vec = _ranges; + + user_addr_t range0Addr = 0; + IOByteCount range0Len = 0; + + if (vec.v) + getAddrLenForInd(range0Addr, range0Len, type, vec, 0); + // mapping source == dest? (could be much better) - if( _task && (addressMap == get_task_map(_task)) && (options & kIOMapAnywhere) - && (1 == _rangesCount) && (0 == sourceOffset) - && (length <= _ranges.v[0].length) ) { - *atAddress = _ranges.v[0].address; + if( _task + && (addressMap == get_task_map(_task)) && (options & kIOMapAnywhere) + && (1 == _rangesCount) && (0 == sourceOffset) + && range0Addr && (length <= range0Len) ) { + if (sizeof(user_addr_t) > 4 && ((UInt64) range0Addr) >> 32) + return kIOReturnOverrun; // Doesn't fit in 32bit return field + else { + *atAddress = range0Addr; return( kIOReturnSuccess ); + } } if( 0 == sharedMem) { - vm_size_t size = _pages << PAGE_SHIFT; + vm_size_t size = ptoa_32(_pages); if( _task) { #ifndef i386 - vm_size_t actualSize = size; - kr = mach_make_memory_entry( get_task_map(_task), - &actualSize, _ranges.v[0].address, + memory_object_size_t actualSize = size; + kr = mach_make_memory_entry_64(get_task_map(_task), + &actualSize, range0Addr, VM_PROT_READ | VM_PROT_WRITE, &sharedMem, NULL ); if( (KERN_SUCCESS == kr) && (actualSize != round_page_32(size))) { #if IOASSERT - IOLog("mach_make_memory_entry_64 (%08x) size (%08lx:%08x)\n", - _ranges.v[0].address, (UInt32)actualSize, size); + IOLog("mach_make_memory_entry_64 (%08llx) size (%08lx:%08x)\n", + range0Addr, (UInt32) actualSize, size); #endif kr = kIOReturnVMError; ipc_port_release_send( sharedMem ); } if( KERN_SUCCESS != kr) -#endif /* i386 */ +#endif /* !i386 */ sharedMem = MACH_PORT_NULL; } else do { @@ -1548,6 +1843,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap( _memEntry = (void *) sharedMem; } + #ifndef i386 if( 0 == sharedMem) kr = kIOReturnVMError; @@ -1565,93 +1861,28 @@ IOReturn IOGeneralMemoryDescriptor::doUnmap( IOByteCount length ) { // could be much better - if( _task && (addressMap == get_task_map(_task)) && (1 == _rangesCount) - && (logical == _ranges.v[0].address) - && (length <= _ranges.v[0].length) ) + if( _task && (addressMap == get_task_map(_task)) && (1 == _rangesCount)) { + + IOOptionBits type = _flags & kIOMemoryTypeMask; + user_addr_t range0Addr; + IOByteCount range0Len; + + getAddrLenForInd(range0Addr, range0Len, type, _ranges, 0); + if (logical == range0Addr && length <= range0Len) return( kIOReturnSuccess ); + } return( super::doUnmap( addressMap, logical, length )); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -extern "C" { -// osfmk/device/iokit_rpc.c -extern kern_return_t IOMapPages( vm_map_t map, vm_offset_t va, vm_offset_t pa, - vm_size_t length, unsigned int mapFlags); -extern kern_return_t IOUnmapPages(vm_map_t map, vm_offset_t va, vm_size_t length); -}; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - OSDefineMetaClassAndAbstractStructors( IOMemoryMap, OSObject ) /* inline function implementation */ IOPhysicalAddress IOMemoryMap::getPhysicalAddress() { return( getPhysicalSegment( 0, 0 )); } -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -class _IOMemoryMap : public IOMemoryMap -{ - OSDeclareDefaultStructors(_IOMemoryMap) - - IOMemoryDescriptor * memory; - IOMemoryMap * superMap; - IOByteCount offset; - IOByteCount length; - IOVirtualAddress logical; - task_t addressTask; - vm_map_t addressMap; - IOOptionBits options; - -protected: - virtual void taggedRelease(const void *tag = 0) const; - virtual void free(); - -public: - - // IOMemoryMap methods - virtual IOVirtualAddress getVirtualAddress(); - virtual IOByteCount getLength(); - virtual task_t getAddressTask(); - virtual IOMemoryDescriptor * getMemoryDescriptor(); - virtual IOOptionBits getMapOptions(); - - virtual IOReturn unmap(); - virtual void taskDied(); - - virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length); - - // for IOMemoryDescriptor use - _IOMemoryMap * copyCompatible( - IOMemoryDescriptor * owner, - task_t intoTask, - IOVirtualAddress toAddress, - IOOptionBits options, - IOByteCount offset, - IOByteCount length ); - - bool initCompatible( - IOMemoryDescriptor * memory, - IOMemoryMap * superMap, - IOByteCount offset, - IOByteCount length ); - - bool initWithDescriptor( - IOMemoryDescriptor * memory, - task_t intoTask, - IOVirtualAddress toAddress, - IOOptionBits options, - IOByteCount offset, - IOByteCount length ); - - IOReturn redirect( - task_t intoTask, bool redirect ); -}; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #undef super #define super IOMemoryMap @@ -1698,21 +1929,29 @@ bool _IOMemoryMap::initWithDescriptor( IOByteCount _offset, IOByteCount _length ) { - bool ok; + bool ok; + bool redir = ((kIOMapUnique|kIOMapReference) == ((kIOMapUnique|kIOMapReference) & _options)); - if( (!_memory) || (!intoTask) || !super::init()) + if ((!_memory) || (!intoTask)) return( false); if( (_offset + _length) > _memory->getLength()) return( false); - addressMap = get_task_map(intoTask); - if( !addressMap) - return( false); - vm_map_reference(addressMap); + if (!redir) + { + if (!super::init()) + return(false); + addressMap = get_task_map(intoTask); + if( !addressMap) + return( false); + vm_map_reference(addressMap); + addressTask = intoTask; + logical = toAddress; + options = _options; + } _memory->retain(); - memory = _memory; offset = _offset; if( _length) @@ -1720,25 +1959,34 @@ bool _IOMemoryMap::initWithDescriptor( else length = _memory->getLength(); - addressTask = intoTask; - logical = toAddress; - options = _options; - if( options & kIOMapStatic) ok = true; else - ok = (kIOReturnSuccess == memory->doMap( addressMap, &logical, - options, offset, length )); - if( !ok) { - logical = 0; - memory->release(); - memory = 0; - vm_map_deallocate(addressMap); - addressMap = 0; + ok = (kIOReturnSuccess == _memory->doMap( addressMap, &toAddress, + _options, offset, length )); + if (ok || redir) + { + if (memory) + memory->release(); + memory = _memory; + logical = toAddress; + } + else + { + _memory->release(); + if (!redir) + { + logical = 0; + memory = 0; + vm_map_deallocate(addressMap); + addressMap = 0; + } } + return( ok ); } +/* LP64todo - these need to expand */ struct IOMemoryDescriptorMapAllocRef { ipc_port_t sharedMem; @@ -1852,28 +2100,75 @@ IOReturn IOMemoryDescriptor::doMap( if( 0 == length) length = getLength(); - sourceAddr = getSourceSegment( sourceOffset, NULL ); - assert( sourceAddr ); - pageOffset = sourceAddr - trunc_page_32( sourceAddr ); + sourceAddr = getSourceSegment( sourceOffset, NULL ); + pageOffset = sourceAddr - trunc_page_32( sourceAddr ); - ref.size = round_page_32( length + pageOffset ); + ref.size = round_page_32( length + pageOffset ); - logical = *atAddress; - if( options & kIOMapAnywhere) - // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE - ref.mapped = 0; - else { - ref.mapped = trunc_page_32( logical ); - if( (logical - ref.mapped) != pageOffset) { - err = kIOReturnVMError; - continue; - } - } + if ((kIOMapReference|kIOMapUnique) == ((kIOMapReference|kIOMapUnique) & options)) + { + upl_t redirUPL2; + vm_size_t size; + int flags; - if( ref.sharedMem && (addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) - err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); - else - err = IOMemoryDescriptorMapAlloc( addressMap, &ref ); + _IOMemoryMap * mapping = (_IOMemoryMap *) *atAddress; + ref.mapped = mapping->getVirtualAddress(); + + if (!_memEntry) + { + err = kIOReturnNotReadable; + continue; + } + + size = length; + flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL + | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; + + if (KERN_SUCCESS != memory_object_iopl_request((ipc_port_t) _memEntry, 0, &size, &redirUPL2, + NULL, NULL, + &flags)) + redirUPL2 = NULL; + + err = upl_transpose(redirUPL2, mapping->redirUPL); + if (kIOReturnSuccess != err) + { + IOLog("upl_transpose(%x)\n", err); + err = kIOReturnSuccess; + } + + if (redirUPL2) + { + upl_commit(redirUPL2, NULL, 0); + upl_deallocate(redirUPL2); + redirUPL2 = 0; + } + { + // swap the memEntries since they now refer to different vm_objects + void * me = _memEntry; + _memEntry = mapping->memory->_memEntry; + mapping->memory->_memEntry = me; + } + } + else + { + + logical = *atAddress; + if( options & kIOMapAnywhere) + // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE + ref.mapped = 0; + else { + ref.mapped = trunc_page_32( logical ); + if( (logical - ref.mapped) != pageOffset) { + err = kIOReturnVMError; + continue; + } + } + + if( ref.sharedMem && (addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) + err = IOIteratePageableMaps( ref.size, &IOMemoryDescriptorMapAlloc, &ref ); + else + err = IOMemoryDescriptorMapAlloc( addressMap, &ref ); + } if( err != KERN_SUCCESS) continue; @@ -2015,7 +2310,11 @@ IOReturn IOMemoryDescriptor::handleFault( /* handle. This is required for machine architecture independence.*/ if(!(kIOMemoryRedirected & _flags)) { - vm_fault(addressMap, address, 3, FALSE, FALSE, NULL, 0); + vm_fault(addressMap, + (vm_map_offset_t)address, + VM_PROT_READ|VM_PROT_WRITE, + FALSE, THREAD_UNINT, NULL, + (vm_map_offset_t)0); } /* *** Temporary Workaround *** */ @@ -2061,27 +2360,30 @@ IOReturn IOMemoryDescriptor::doUnmap( return( err ); } -IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool redirect ) +IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) { - IOReturn err; + IOReturn err = kIOReturnSuccess; _IOMemoryMap * mapping = 0; OSIterator * iter; LOCK; + if( doRedirect) + _flags |= kIOMemoryRedirected; + else + _flags &= ~kIOMemoryRedirected; + do { if( (iter = OSCollectionIterator::withCollection( _mappings))) { - while( (mapping = (_IOMemoryMap *) iter->getNextObject())) - mapping->redirect( safeTask, redirect ); + while( (mapping = (_IOMemoryMap *) iter->getNextObject())) + mapping->redirect( safeTask, doRedirect ); - iter->release(); - } + iter->release(); + } } while( false ); - if( redirect) - _flags |= kIOMemoryRedirected; - else { - _flags &= ~kIOMemoryRedirected; + if (!doRedirect) + { WAKEUP; } @@ -2090,33 +2392,35 @@ IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool redirect ) // temporary binary compatibility IOSubMemoryDescriptor * subMem; if( (subMem = OSDynamicCast( IOSubMemoryDescriptor, this))) - err = subMem->redirect( safeTask, redirect ); + err = subMem->redirect( safeTask, doRedirect ); else - err = kIOReturnSuccess; + err = kIOReturnSuccess; return( err ); } -IOReturn IOSubMemoryDescriptor::redirect( task_t safeTask, bool redirect ) +IOReturn IOSubMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) { - return( _parent->redirect( safeTask, redirect )); + return( _parent->redirect( safeTask, doRedirect )); } -IOReturn _IOMemoryMap::redirect( task_t safeTask, bool redirect ) +IOReturn _IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) { IOReturn err = kIOReturnSuccess; if( superMap) { -// err = ((_IOMemoryMap *)superMap)->redirect( safeTask, redirect ); +// err = ((_IOMemoryMap *)superMap)->redirect( safeTask, doRedirect ); } else { LOCK; if( logical && addressMap - && (get_task_map( safeTask) != addressMap) - && (0 == (options & kIOMapStatic))) { - - IOUnmapPages( addressMap, logical, length ); - if( !redirect) { + && (!safeTask || (get_task_map(safeTask) != addressMap)) + && (0 == (options & kIOMapStatic))) + { + IOUnmapPages( addressMap, logical, length ); + if(!doRedirect && safeTask + && ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical)) + { err = vm_deallocate( addressMap, logical, length ); err = memory->doMap( addressMap, &logical, (options & ~kIOMapAnywhere) /*| kIOMapReserve*/, @@ -2124,12 +2428,17 @@ IOReturn _IOMemoryMap::redirect( task_t safeTask, bool redirect ) } else err = kIOReturnSuccess; #ifdef DEBUG - IOLog("IOMemoryMap::redirect(%d, %p) %x:%lx from %p\n", redirect, this, logical, length, addressMap); + IOLog("IOMemoryMap::redirect(%d, %p) %x:%lx from %p\n", doRedirect, this, logical, length, addressMap); #endif } UNLOCK; } + if (((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + && safeTask + && (doRedirect != (0 != (memory->_flags & kIOMemoryRedirected)))) + memory->redirect(safeTask, doRedirect); + return( err ); } @@ -2190,9 +2499,21 @@ void _IOMemoryMap::free() memory->release(); } + if (owner && (owner != memory)) + { + LOCK; + owner->removeMapping(this); + UNLOCK; + } + if( superMap) superMap->release(); + if (redirUPL) { + upl_commit(redirUPL, NULL, 0); + upl_deallocate(redirUPL); + } + super::free(); } @@ -2236,6 +2557,8 @@ _IOMemoryMap * _IOMemoryMap::copyCompatible( if( (!task) || (!addressMap) || (addressMap != get_task_map(task))) return( 0 ); + if( options & kIOMapUnique) + return( 0 ); if( (options ^ _options) & kIOMapReadOnly) return( 0 ); if( (kIOMapDefaultCache != (_options & kIOMapCacheMask)) @@ -2270,12 +2593,12 @@ _IOMemoryMap * _IOMemoryMap::copyCompatible( } IOPhysicalAddress _IOMemoryMap::getPhysicalSegment( IOByteCount _offset, - IOPhysicalLength * length) + IOPhysicalLength * _length) { IOPhysicalAddress address; LOCK; - address = memory->getPhysicalSegment( offset + _offset, length ); + address = memory->getPhysicalSegment( offset + _offset, _length ); UNLOCK; return( address ); @@ -2310,24 +2633,24 @@ IOMemoryMap * IOMemoryDescriptor::setMapping( IOVirtualAddress mapAddress, IOOptionBits options ) { - _IOMemoryMap * map; + _IOMemoryMap * newMap; - map = new _IOMemoryMap; + newMap = new _IOMemoryMap; LOCK; - if( map - && !map->initWithDescriptor( this, intoTask, mapAddress, + if( newMap + && !newMap->initWithDescriptor( this, intoTask, mapAddress, options | kIOMapStatic, 0, getLength() )) { - map->release(); - map = 0; + newMap->release(); + newMap = 0; } - addMapping( map); + addMapping( newMap); UNLOCK; - return( map); + return( newMap); } IOMemoryMap * IOMemoryDescriptor::map( @@ -2352,6 +2675,69 @@ IOMemoryMap * IOMemoryDescriptor::map( return( makeMapping( this, intoTask, toAddress, options, offset, length )); } +IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, + IOOptionBits options, + IOByteCount offset) +{ + IOReturn err = kIOReturnSuccess; + IOMemoryDescriptor * physMem = 0; + + LOCK; + + if (logical && addressMap) do + { + if ((memory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + { + physMem = memory; + physMem->retain(); + } + + if (!redirUPL) + { + vm_size_t size = length; + int flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL + | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; + if (KERN_SUCCESS != memory_object_iopl_request((ipc_port_t) memory->_memEntry, 0, &size, &redirUPL, + NULL, NULL, + &flags)) + redirUPL = 0; + + if (physMem) + { + IOUnmapPages( addressMap, logical, length ); + physMem->redirect(0, true); + } + } + + if (newBackingMemory) + { + if (newBackingMemory != memory) + { + if (this != newBackingMemory->makeMapping(newBackingMemory, addressTask, (IOVirtualAddress) this, + options | kIOMapUnique | kIOMapReference, + offset, length)) + err = kIOReturnError; + } + if (redirUPL) + { + upl_commit(redirUPL, NULL, 0); + upl_deallocate(redirUPL); + redirUPL = 0; + } + if (physMem) + physMem->redirect(0, false); + } + } + while (false); + + UNLOCK; + + if (physMem) + physMem->release(); + + return (err); +} + IOMemoryMap * IOMemoryDescriptor::makeMapping( IOMemoryDescriptor * owner, task_t intoTask, @@ -2360,37 +2746,102 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( IOByteCount offset, IOByteCount length ) { + IOMemoryDescriptor * mapDesc = 0; _IOMemoryMap * mapping = 0; OSIterator * iter; LOCK; - do { - // look for an existing mapping - if( (iter = OSCollectionIterator::withCollection( _mappings))) { + do + { + if (kIOMapUnique & options) + { + IOPhysicalAddress phys; + IOByteCount physLen; - while( (mapping = (_IOMemoryMap *) iter->getNextObject())) { + if (owner != this) + continue; - if( (mapping = mapping->copyCompatible( - owner, intoTask, toAddress, - options | kIOMapReference, - offset, length ))) - break; - } - iter->release(); - if( mapping) - continue; - } + if ((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) + { + phys = getPhysicalSegment(offset, &physLen); + if (!phys || (physLen < length)) + continue; + + mapDesc = IOMemoryDescriptor::withPhysicalAddress( + phys, length, _direction); + if (!mapDesc) + continue; + offset = 0; + } + else + { + mapDesc = this; + mapDesc->retain(); + } + + if (kIOMapReference & options) + { + mapping = (_IOMemoryMap *) toAddress; + mapping->retain(); + +#if 1 + uint32_t pageOffset1 = mapDesc->getSourceSegment( offset, NULL ); + pageOffset1 -= trunc_page_32( pageOffset1 ); + + uint32_t pageOffset2 = mapping->getVirtualAddress(); + pageOffset2 -= trunc_page_32( pageOffset2 ); + + if (pageOffset1 != pageOffset2) + IOLog("::redirect can't map offset %x to addr %x\n", + pageOffset1, mapping->getVirtualAddress()); +#endif + + + if (!mapping->initWithDescriptor( mapDesc, intoTask, toAddress, options, + offset, length )) + { +#ifdef DEBUG + IOLog("Didn't redirect map %08lx : %08lx\n", offset, length ); +#endif + } + + if (mapping->owner) + mapping->owner->removeMapping(mapping); + continue; + } + } + else + { + // look for an existing mapping + if( (iter = OSCollectionIterator::withCollection( _mappings))) { + + while( (mapping = (_IOMemoryMap *) iter->getNextObject())) { + + if( (mapping = mapping->copyCompatible( + owner, intoTask, toAddress, + options | kIOMapReference, + offset, length ))) + break; + } + iter->release(); + } - if( mapping || (options & kIOMapReference)) - continue; + if (mapping) + mapping->retain(); + if( mapping || (options & kIOMapReference)) + continue; + + mapDesc = owner; + mapDesc->retain(); + } owner = this; mapping = new _IOMemoryMap; if( mapping - && !mapping->initWithDescriptor( owner, intoTask, toAddress, options, + && !mapping->initWithDescriptor( mapDesc, intoTask, toAddress, options, offset, length )) { #ifdef DEBUG IOLog("Didn't make map %08lx : %08lx\n", offset, length ); @@ -2399,12 +2850,23 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( mapping = 0; } + if (mapping) + mapping->retain(); + } while( false ); - owner->addMapping( mapping); + if (mapping) + { + mapping->owner = owner; + owner->addMapping( mapping); + mapping->release(); + } UNLOCK; + if (mapDesc) + mapDesc->release(); + return( mapping); } @@ -2505,6 +2967,19 @@ IOPhysicalAddress IOSubMemoryDescriptor::getPhysicalSegment( IOByteCount offset, return( address ); } + +IOReturn IOSubMemoryDescriptor::doMap( + vm_map_t addressMap, + IOVirtualAddress * atAddress, + IOOptionBits options, + IOByteCount sourceOffset, + IOByteCount length ) +{ + if( sourceOffset >= _length) + return( kIOReturnOverrun ); + return (_parent->doMap(addressMap, atAddress, options, sourceOffset + _start, length)); +} + IOPhysicalAddress IOSubMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length ) { @@ -2569,6 +3044,36 @@ IOByteCount IOSubMemoryDescriptor::writeBytes(IOByteCount offset, return( byteCount ); } +IOReturn IOSubMemoryDescriptor::setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ) +{ + IOReturn err; + + LOCK; + err = _parent->setPurgeable( newState, oldState ); + UNLOCK; + + return( err ); +} + +IOReturn IOSubMemoryDescriptor::performOperation( IOOptionBits options, + IOByteCount offset, IOByteCount length ) +{ + IOReturn err; + + assert(offset <= _length); + + if( offset >= _length) + return( kIOReturnOverrun ); + + LOCK; + err = _parent->performOperation( options, _start + offset, + min(length, _length - offset) ); + UNLOCK; + + return( err ); +} + IOReturn IOSubMemoryDescriptor::prepare( IODirection forDirection) { @@ -2601,9 +3106,10 @@ IOMemoryMap * IOSubMemoryDescriptor::makeMapping( IOByteCount offset, IOByteCount length ) { - IOMemoryMap * mapping; + IOMemoryMap * mapping = 0; - mapping = (IOMemoryMap *) _parent->makeMapping( + if (!(kIOMapUnique & options)) + mapping = (IOMemoryMap *) _parent->makeMapping( _parent, intoTask, toAddress - (_start + offset), options | kIOMapReference, @@ -2676,10 +3182,15 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const { OSSymbol const *keys[2]; OSObject *values[2]; - IOVirtualRange *vcopy; + struct SerData { + user_addr_t address; + user_size_t length; + } *vcopy; unsigned int index, nRanges; bool result; + IOOptionBits type = _flags & kIOMemoryTypeMask; + if (s == NULL) return false; if (s->previouslySerialized(this)) return true; @@ -2687,7 +3198,7 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const if (!s->addXMLStartTag(this, "array")) return false; nRanges = _rangesCount; - vcopy = (IOVirtualRange *) IOMalloc(sizeof(IOVirtualRange) * nRanges); + vcopy = (SerData *) IOMalloc(sizeof(SerData) * nRanges); if (vcopy == 0) return false; keys[0] = OSSymbol::withCString("address"); @@ -2702,8 +3213,12 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const // while the lock is held. LOCK; if (nRanges == _rangesCount) { + Ranges vec = _ranges; for (index = 0; index < nRanges; index++) { - vcopy[index] = _ranges.v[index]; + user_addr_t addr; IOByteCount len; + getAddrLenForInd(addr, len, type, vec, index); + vcopy[index].address = addr; + vcopy[index].length = len; } } else { // The descriptor changed out from under us. Give up. @@ -2715,12 +3230,15 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const for (index = 0; index < nRanges; index++) { - values[0] = OSNumber::withNumber(_ranges.v[index].address, sizeof(_ranges.v[index].address) * 8); + user_addr_t addr = vcopy[index].address; + IOByteCount len = (IOByteCount) vcopy[index].length; + values[0] = + OSNumber::withNumber(addr, (((UInt64) addr) >> 32)? 64 : 32); if (values[0] == 0) { result = false; goto bail; } - values[1] = OSNumber::withNumber(_ranges.v[index].length, sizeof(_ranges.v[index].length) * 8); + values[1] = OSNumber::withNumber(len, sizeof(len) * 8); if (values[1] == 0) { result = false; goto bail; @@ -2808,8 +3326,8 @@ bool IOSubMemoryDescriptor::serialize(OSSerialize * s) const OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 0); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 1); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 2); -OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 3); -OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 4); +OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 3); +OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 4); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 5); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 6); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 7); diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index b77705efe..315aefe11 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -28,6 +28,10 @@ #define super IOService +#define kIONVRAMPrivilege kIOClientPrivilegeAdministrator +//#define kIONVRAMPrivilege kIOClientPrivilegeLocalUser + + OSDefineMetaClassAndStructors(IODTNVRAM, IOService); bool IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane) @@ -209,7 +213,7 @@ void IODTNVRAM::sync(void) _nvramImageDirty = false; } -bool IODTNVRAM::serializeProperties(OSSerialize *serialize) const +bool IODTNVRAM::serializeProperties(OSSerialize *s) const { bool result; UInt32 variablePerm; @@ -220,7 +224,7 @@ bool IODTNVRAM::serializeProperties(OSSerialize *serialize) const if (_ofDict == 0) return false; // Verify permissions. - result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); + result = IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege); if (result != kIOReturnSuccess) { tmpDict = OSDictionary::withCapacity(1); if (tmpDict == 0) return false; @@ -242,7 +246,7 @@ bool IODTNVRAM::serializeProperties(OSSerialize *serialize) const dict = _ofDict; } - result = dict->serialize(serialize); + result = dict->serialize(s); if (tmpDict != 0) tmpDict->release(); if (iter != 0) iter->release(); @@ -258,7 +262,7 @@ OSObject *IODTNVRAM::getProperty(const OSSymbol *aKey) const if (_ofDict == 0) return 0; // Verify permissions. - result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); + result = IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege); if (result != kIOReturnSuccess) { variablePerm = getOFVariablePerm(aKey); if (variablePerm == kOFVariablePermRootOnly) return 0; @@ -291,7 +295,7 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) if (_ofDict == 0) return false; // Verify permissions. - result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); + result = IOUserClient::clientHasPrivilege(current_task(), kIONVRAMPrivilege); if (result != kIOReturnSuccess) { propPerm = getOFVariablePerm(aKey); if (propPerm != kOFVariablePermUserWrite) return false; @@ -347,11 +351,41 @@ bool IODTNVRAM::setProperty(const OSSymbol *aKey, OSObject *anObject) return result; } +void IODTNVRAM::removeProperty(const OSSymbol *aKey) +{ + bool result; + UInt32 propPerm; + + if (_ofDict == 0) return; + + // Verify permissions. + result = IOUserClient::clientHasPrivilege(current_task(), kIOClientPrivilegeAdministrator); + if (result != kIOReturnSuccess) { + propPerm = getOFVariablePerm(aKey); + if (propPerm != kOFVariablePermUserWrite) return; + } + + // Don't allow removal of properties on old world machines. + if (getPlatform()->getBootROMType() == 0) return; + + // Don't allow change of 'aapl,panic-info'. + if (aKey->isEqualTo(kIODTNVRAMPanicInfoKey)) return; + + // If the object exists, remove it from the dictionary. + result = _ofDict->getObject(aKey) != 0; + if (result) { + _ofDict->removeObject(aKey); + + _ofImageDirty = true; + } +} + IOReturn IODTNVRAM::setProperties(OSObject *properties) { bool result = true; OSObject *object; const OSSymbol *key; + const OSString *tmpStr; OSDictionary *dict; OSCollectionIterator *iter; @@ -368,7 +402,19 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties) object = dict->getObject(key); if (object == 0) continue; - result = setProperty(key, object); + if (key->isEqualTo(kIONVRAMDeletePropertyKey)) { + tmpStr = OSDynamicCast(OSString, object); + if (tmpStr != 0) { + key = OSSymbol::withString(tmpStr); + removeProperty(key); + key->release(); + result = true; + } else { + result = false; + } + } else { + result = setProperty(key, object); + } } iter->release(); @@ -382,7 +428,7 @@ IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer, { if (_xpramImage == 0) return kIOReturnUnsupported; - if ((buffer == 0) || (length <= 0) || (offset < 0) || + if ((buffer == 0) || (length == 0) || (offset + length > kIODTNVRAMXPRAMSize)) return kIOReturnBadArgument; @@ -396,7 +442,7 @@ IOReturn IODTNVRAM::writeXPRAM(IOByteCount offset, UInt8 *buffer, { if (_xpramImage == 0) return kIOReturnUnsupported; - if ((buffer == 0) || (length <= 0) || (offset < 0) || + if ((buffer == 0) || (length == 0) || (offset + length > kIODTNVRAMXPRAMSize)) return kIOReturnBadArgument; @@ -458,7 +504,7 @@ IOReturn IODTNVRAM::readNVRAMPartition(const OSSymbol *partitionID, partitionOffset = partitionOffsetNumber->unsigned32BitValue(); partitionLength = partitionLengthNumber->unsigned32BitValue(); - if ((buffer == 0) || (length <= 0) || (offset < 0) || + if ((buffer == 0) || (length == 0) || (offset + length > partitionLength)) return kIOReturnBadArgument; @@ -485,7 +531,7 @@ IOReturn IODTNVRAM::writeNVRAMPartition(const OSSymbol *partitionID, partitionOffset = partitionOffsetNumber->unsigned32BitValue(); partitionLength = partitionLengthNumber->unsigned32BitValue(); - if ((buffer == 0) || (length <= 0) || (offset < 0) || + if ((buffer == 0) || (length == 0) || (offset + length > partitionLength)) return kIOReturnBadArgument; @@ -682,7 +728,8 @@ IOReturn IODTNVRAM::syncOFVariables(void) bool ok; UInt32 cnt, length, maxLength; UInt32 curOffset, tmpOffset, tmpType, tmpDataLength; - UInt8 *buffer, *tmpBuffer, *tmpData; + UInt8 *buffer, *tmpBuffer; + const UInt8 *tmpData; const OSSymbol *tmpSymbol; OSObject *tmpObject; OSBoolean *tmpBoolean; @@ -766,7 +813,7 @@ IOReturn IODTNVRAM::syncOFVariables(void) case kOFVariableTypeString : tmpString = OSDynamicCast(OSString, tmpObject); - tmpData = (UInt8 *) tmpString->getCStringNoCopy(); + tmpData = (const UInt8 *)tmpString->getCStringNoCopy(); tmpDataLength = tmpString->getLength(); if ((curOffset - tmpDataLength) < sizeof(OWVariablesHeader)) { @@ -806,10 +853,10 @@ IOReturn IODTNVRAM::syncOFVariables(void) } struct OFVariable { - char *variableName; - UInt32 variableType; - UInt32 variablePerm; - SInt32 variableOffset; + const char *variableName; + UInt32 variableType; + UInt32 variablePerm; + SInt32 variableOffset; }; typedef struct OFVariable OFVariable; @@ -864,6 +911,7 @@ OFVariable gOFVariables[] = { {"aapl,pci", kOFVariableTypeData, kOFVariablePermRootOnly, -1}, {"security-mode", kOFVariableTypeString, kOFVariablePermUserRead, -1}, {"security-password", kOFVariableTypeData, kOFVariablePermRootOnly, -1}, + {"boot-image", kOFVariableTypeData, kOFVariablePermUserWrite, -1}, {0, kOFVariableTypeData, kOFVariablePermUserRead, -1} }; @@ -990,7 +1038,7 @@ bool IODTNVRAM::convertPropToObject(UInt8 *propName, UInt32 propNameLength, bool IODTNVRAM::convertObjectToProp(UInt8 *buffer, UInt32 *length, const OSSymbol *propSymbol, OSObject *propObject) { - UInt8 *propName; + const UInt8 *propName; UInt32 propNameLength, propDataLength; UInt32 propType, tmpValue; OSBoolean *tmpBoolean = 0; @@ -998,7 +1046,7 @@ bool IODTNVRAM::convertObjectToProp(UInt8 *buffer, UInt32 *length, OSString *tmpString = 0; OSData *tmpData = 0; - propName = (UInt8 *)propSymbol->getCStringNoCopy(); + propName = (const UInt8 *)propSymbol->getCStringNoCopy(); propNameLength = propSymbol->getLength(); propType = getOFVariableType(propSymbol); @@ -1103,11 +1151,12 @@ bool IODTNVRAM::validateOWChecksum(UInt8 *buffer) void IODTNVRAM::updateOWBootArgs(const OSSymbol *key, OSObject *value) { - bool wasBootArgs, bootr = false; - UInt32 cnt; - OSString *tmpString, *bootCommand, *bootArgs = 0; - UInt8 *bootCommandData, *bootArgsData, *tmpData; - UInt32 bootCommandDataLength, bootArgsDataLength, tmpDataLength; + bool wasBootArgs, bootr = false; + UInt32 cnt; + OSString *tmpString, *bootCommand, *bootArgs = 0; + const UInt8 *bootCommandData, *bootArgsData; + UInt8 *tmpData; + UInt32 bootCommandDataLength, bootArgsDataLength, tmpDataLength; tmpString = OSDynamicCast(OSString, value); if (tmpString == 0) return; @@ -1122,7 +1171,7 @@ void IODTNVRAM::updateOWBootArgs(const OSSymbol *key, OSObject *value) if (bootCommand == 0) return; } else return; - bootCommandData = (UInt8 *)bootCommand->getCStringNoCopy(); + bootCommandData = (const UInt8 *)bootCommand->getCStringNoCopy(); bootCommandDataLength = bootCommand->getLength(); if (bootCommandData == 0) return; @@ -1142,7 +1191,7 @@ void IODTNVRAM::updateOWBootArgs(const OSSymbol *key, OSObject *value) } if (wasBootArgs) { - bootArgsData = (UInt8 *)bootArgs->getCStringNoCopy(); + bootArgsData = (const UInt8 *)bootArgs->getCStringNoCopy(); bootArgsDataLength = bootArgs->getLength(); if (bootArgsData == 0) return; @@ -1308,7 +1357,7 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType0(IORegistryEntry *entry, return err; } -OSData *IODTNVRAM::unescapeBytesToData(UInt8 *bytes, UInt32 length) +OSData *IODTNVRAM::unescapeBytesToData(const UInt8 *bytes, UInt32 length) { OSData *data = 0; UInt32 totalLength = 0; @@ -1355,29 +1404,29 @@ OSData *IODTNVRAM::unescapeBytesToData(UInt8 *bytes, UInt32 length) OSData * IODTNVRAM::escapeDataToData(OSData * value) { - OSData * result; - UInt8 * start; - UInt8 * end; - UInt8 * where; - UInt8 byte; - bool ok = true; + OSData * result; + const UInt8 * startPtr; + const UInt8 * endPtr; + const UInt8 * wherePtr; + UInt8 byte; + bool ok = true; - where = (UInt8 *) value->getBytesNoCopy(); - end = where + value->getLength(); + wherePtr = (const UInt8 *) value->getBytesNoCopy(); + endPtr = wherePtr + value->getLength(); - result = OSData::withCapacity(end - where); + result = OSData::withCapacity(endPtr - wherePtr); if (!result) return result; - while (where < end) { - start = where; - byte = *where++; + while (wherePtr < endPtr) { + startPtr = wherePtr; + byte = *wherePtr++; if ((byte == 0x00) || (byte == 0xFF)) { for (; - ((where - start) < 0x80) && (where < end) && (byte == *where); - where++) {} + ((wherePtr - startPtr) < 0x80) && (wherePtr < endPtr) && (byte == *wherePtr); + wherePtr++) {} ok &= result->appendByte(0xff, 1); - byte = (byte & 0x80) | (where - start); + byte = (byte & 0x80) | (wherePtr - startPtr); } ok &= result->appendByte(byte, 1); } @@ -1391,56 +1440,77 @@ OSData * IODTNVRAM::escapeDataToData(OSData * value) return result; } +static bool IsApplePropertyName(const char * propName) +{ + char c; + while ((c = *propName++)) { + if ((c >= 'A') && (c <= 'Z')) + break; + } + + return (c == 0); +} + IOReturn IODTNVRAM::readNVRAMPropertyType1(IORegistryEntry *entry, const OSSymbol **name, OSData **value) { - IOReturn err = kIOReturnNoResources; - OSData *data; - UInt8 *start; - UInt8 *end; - UInt8 *where; - UInt8 *nvPath = 0; - UInt8 *nvName = 0; - UInt8 byte; + IOReturn err = kIOReturnNoResources; + OSData *data; + const UInt8 *startPtr; + const UInt8 *endPtr; + const UInt8 *wherePtr; + const UInt8 *nvPath = 0; + const char *nvName = 0; + const char *resultName = 0; + const UInt8 *resultValue = 0; + UInt32 resultValueLen = 0; + UInt8 byte; if (_ofDict == 0) return err; data = OSDynamicCast(OSData, _ofDict->getObject(_registryPropertiesKey)); if (data == 0) return err; - start = (UInt8 *) data->getBytesNoCopy(); - end = start + data->getLength(); + startPtr = (const UInt8 *) data->getBytesNoCopy(); + endPtr = startPtr + data->getLength(); - where = start; - while (where < end) { - byte = *(where++); + wherePtr = startPtr; + while (wherePtr < endPtr) { + byte = *(wherePtr++); if (byte) continue; if (nvPath == 0) - nvPath = start; + nvPath = startPtr; else if (nvName == 0) - nvName = start; + nvName = (const char *) startPtr; else { IORegistryEntry * compareEntry = IORegistryEntry::fromPath((const char *) nvPath, gIODTPlane); - if (entry == compareEntry) { - if (compareEntry) - compareEntry->release(); - *name = OSSymbol::withCString((const char *) nvName); - *value = unescapeBytesToData(start, where - start - 1); - if ((*name != 0) && (*value != 0)) - err = kIOReturnSuccess; - else - err = kIOReturnNoMemory; - break; - } if (compareEntry) compareEntry->release(); - nvPath = nvName = 0; + if (entry == compareEntry) { + bool appleProp = IsApplePropertyName(nvName); + if (!appleProp || !resultName) { + resultName = nvName; + resultValue = startPtr; + resultValueLen = wherePtr - startPtr - 1; + } + if (!appleProp) + break; + } + nvPath = 0; + nvName = 0; } - start = where; + startPtr = wherePtr; + } + if (resultName) { + *name = OSSymbol::withCString(resultName); + *value = unescapeBytesToData(resultValue, resultValueLen); + if ((*name != 0) && (*value != 0)) + err = kIOReturnSuccess; + else + err = kIOReturnNoMemory; } - return err; } @@ -1448,55 +1518,60 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry, const OSSymbol *propName, OSData *value) { - OSData *oldData; - OSData *data = 0; - UInt8 *start; - UInt8 *propStart; - UInt8 *end; - UInt8 *where; - UInt8 *nvPath = 0; - UInt8 *nvName = 0; + OSData *oldData; + OSData *data = 0; + const UInt8 *startPtr; + const UInt8 *propStart; + const UInt8 *endPtr; + const UInt8 *wherePtr; + const UInt8 *nvPath = 0; + const char *nvName = 0; const char * comp; const char * name; - UInt8 byte; - bool ok = true; + UInt8 byte; + bool ok = true; + bool settingAppleProp; if (_ofDict == 0) return kIOReturnNoResources; + settingAppleProp = IsApplePropertyName(propName->getCStringNoCopy()); + // copy over existing properties for other entries oldData = OSDynamicCast(OSData, _ofDict->getObject(_registryPropertiesKey)); if (oldData) { - start = (UInt8 *) oldData->getBytesNoCopy(); - end = start + oldData->getLength(); + startPtr = (const UInt8 *) oldData->getBytesNoCopy(); + endPtr = startPtr + oldData->getLength(); - propStart = start; - where = start; - while (where < end) { - byte = *(where++); + propStart = startPtr; + wherePtr = startPtr; + while (wherePtr < endPtr) { + byte = *(wherePtr++); if (byte) continue; if (nvPath == 0) - nvPath = start; + nvPath = startPtr; else if (nvName == 0) - nvName = start; + nvName = (const char *) startPtr; else { IORegistryEntry * compareEntry = IORegistryEntry::fromPath((const char *) nvPath, gIODTPlane); - if (entry == compareEntry) { - if (compareEntry) - compareEntry->release(); - // delete old property (nvPath -> where) - data = OSData::withBytes(propStart, nvPath - propStart); - if (data) - ok &= data->appendBytes(where, end - where); - break; - } if (compareEntry) compareEntry->release(); - nvPath = nvName = 0; + if (entry == compareEntry) { + if ((settingAppleProp && propName->isEqualTo(nvName)) + || (!settingAppleProp && !IsApplePropertyName(nvName))) { + // delete old property (nvPath -> wherePtr) + data = OSData::withBytes(propStart, nvPath - propStart); + if (data) + ok &= data->appendBytes(wherePtr, endPtr - wherePtr); + break; + } + } + nvPath = 0; + nvName = 0; } - start = where; + startPtr = wherePtr; } } @@ -1511,51 +1586,52 @@ IOReturn IODTNVRAM::writeNVRAMPropertyType1(IORegistryEntry *entry, return kIOReturnNoMemory; } - // get entries in path - OSArray *array = OSArray::withCapacity(5); - if (!array) { - data->release(); - return kIOReturnNoMemory; - } - do - array->setObject(entry); - while ((entry = entry->getParentEntry(gIODTPlane))); - - // append path - for (int i = array->getCount() - 3; - (entry = (IORegistryEntry *) array->getObject(i)); - i--) { - - name = entry->getName(gIODTPlane); - comp = entry->getLocation(gIODTPlane); - if( comp && (0 == strcmp("pci", name)) - && (0 == strcmp("80000000", comp))) { - // yosemite hack - comp = "/pci@80000000"; - } else { - if (comp) - ok &= data->appendBytes("/@", 2); - else { - if (!name) - continue; - ok &= data->appendByte('/', 1); - comp = name; - } - } - ok &= data->appendBytes(comp, strlen(comp)); - } - ok &= data->appendByte(0, 1); - array->release(); - - // append prop name - ok &= data->appendBytes(propName->getCStringNoCopy(), propName->getLength() + 1); - - // append escaped data - oldData = escapeDataToData(value); - ok &= (oldData != 0); - if (ok) - ok &= data->appendBytes(oldData); - + if (value && value->getLength()) { + // get entries in path + OSArray *array = OSArray::withCapacity(5); + if (!array) { + data->release(); + return kIOReturnNoMemory; + } + do + array->setObject(entry); + while ((entry = entry->getParentEntry(gIODTPlane))); + + // append path + for (int i = array->getCount() - 3; + (entry = (IORegistryEntry *) array->getObject(i)); + i--) { + + name = entry->getName(gIODTPlane); + comp = entry->getLocation(gIODTPlane); + if( comp && (0 == strcmp("pci", name)) + && (0 == strcmp("80000000", comp))) { + // yosemite hack + comp = "/pci@80000000"; + } else { + if (comp) + ok &= data->appendBytes("/@", 2); + else { + if (!name) + continue; + ok &= data->appendByte('/', 1); + comp = name; + } + } + ok &= data->appendBytes(comp, strlen(comp)); + } + ok &= data->appendByte(0, 1); + array->release(); + + // append prop name + ok &= data->appendBytes(propName->getCStringNoCopy(), propName->getLength() + 1); + + // append escaped data + oldData = escapeDataToData(value); + ok &= (oldData != 0); + if (ok) + ok &= data->appendBytes(oldData); + } if (ok) { ok = _ofDict->setObject(_registryPropertiesKey, data); if (ok) diff --git a/iokit/Kernel/IOPMPagingPlexus.cpp b/iokit/Kernel/IOPMPagingPlexus.cpp deleted file mode 100644 index 187e0a1b7..000000000 --- a/iokit/Kernel/IOPMPagingPlexus.cpp +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * 9 May 01 suurballe. - */ - -#include -#include -#include -#include -#include - -extern char rootdevice[]; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#define super IOService -OSDefineMetaClassAndStructors(IOPMPagingPlexus,IOService) - - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -// stub driver has two power states, off and on - -enum { kIOPlexusPowerStateCount = 2 }; - -static const IOPMPowerState powerStates[ kIOPlexusPowerStateCount ] = { - { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 1, 0, IOPMPagingAvailable, IOPMPowerOn, 0, 0, 0, 0, 0, 0, 0, 0 } -}; - -//* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// initialize -// -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool IOPMPagingPlexus::start ( IOService * provider ) -{ - super::start(provider); - - ourLock = IOLockAlloc(); - systemBooting = true; - - PMinit(); // initialize superclass variables - - registerPowerDriver(this,(IOPMPowerState *)powerStates,kIOPlexusPowerStateCount); - - return true; -} - - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// setAggressiveness -// -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -IOReturn IOPMPagingPlexus::setAggressiveness ( unsigned long type, unsigned long ) -{ - OSDictionary * dict; - OSIterator * iter; - OSObject * next; - IOService * candidate = 0; - IOService * pagingProvider; - - if( type != kPMMinutesToSleep) - return IOPMNoErr; - - IOLockLock(ourLock); - if ( systemBooting ) { - systemBooting = false; - IOLockUnlock(ourLock); - dict = IOBSDNameMatching(rootdevice); - if ( dict ) { - iter = getMatchingServices(dict); - if ( iter ) { - while ( (next = iter->getNextObject()) ) { - if ( (candidate = OSDynamicCast(IOService,next)) ) { - break; - } - } - iter->release(); - } - } - if ( candidate ) { - pagingProvider = findProvider(candidate); - if ( pagingProvider ) { - processSiblings(pagingProvider); - pagingProvider->addPowerChild(this); - getPMRootDomain()->removePowerChild(((IOPowerConnection *)getParentEntry(gIOPowerPlane))); - processChildren(); - } - } - } - else { - IOLockUnlock(ourLock); - } - return IOPMNoErr; -} - - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// findProvider -// -// Climb upward in the power tree from the node pointed to by the parameter. -// Return a pointer to the first power-managed entity encountered. -// This is the provider of paging services (the root device disk driver). -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -IOService * IOPMPagingPlexus::findProvider ( IOService * mediaObject ) -{ - IORegistryEntry * node = mediaObject; - - if ( mediaObject == NULL ) { - return NULL; - } - - while ( node ) { - if ( node->inPlane(gIOPowerPlane) ) { - return (IOService *)node; - } - node = node->getParentEntry(gIOServicePlane); - } - return NULL; -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// processSiblings -// -// Climb upward in the power tree from the node pointed to by the parameter. -// "Other" children of each ancestor (not the nodes in our upward path) are -// made children of this plexus, so they get paging services from here. -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -void IOPMPagingPlexus::processSiblings ( IOService * aNode ) -{ - OSIterator * parentIterator; - IORegistryEntry * nextNub; - IORegistryEntry * nextParent; - OSIterator * siblingIterator; - IORegistryEntry * nextSibling; - - parentIterator = aNode->getParentIterator(gIOPowerPlane); // iterate parents of this node - - if ( parentIterator ) { - while ( true ) { - if ( ! (nextNub = (IORegistryEntry *)(parentIterator->getNextObject())) ) { - parentIterator->release(); - break; - } - if ( OSDynamicCast(IOPowerConnection,nextNub) ) { - nextParent = nextNub->getParentEntry(gIOPowerPlane); - if ( nextParent == getPMRootDomain() ) { - continue; // plexus already has root's children - } - if ( nextParent == this ) { - parentIterator->release(); - removePowerChild((IOPowerConnection *)nextNub); - break; - } - siblingIterator = nextParent->getChildIterator(gIOPowerPlane); - // iterate children of this parent - if ( siblingIterator ) { - while ( (nextSibling = (IORegistryEntry *)(siblingIterator->getNextObject())) ) { - if ( OSDynamicCast(IOPowerConnection,nextSibling) ) { - nextSibling = nextSibling->getChildEntry(gIOPowerPlane); - if ( nextSibling != aNode ) { // non-ancestor of driver gets - addPowerChild((IOService *)nextSibling); // plexus as parent - } - } - } - siblingIterator->release(); - } - processSiblings((IOService *)nextParent); // do the same thing to this parent - } - } - } -} - - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -// processChildren -// -// Now invent the need for paging services: alter our children's arrays -// to show that they need paging. -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -void IOPMPagingPlexus::processChildren ( void ) -{ - OSIterator * childIterator; - IOPowerConnection * nextChildNub; - IORegistryEntry * nextChild; - IOService * child; - unsigned int i; - - childIterator = getChildIterator(gIOPowerPlane); - - if ( childIterator ) { - while ( (nextChild = (IORegistryEntry *)(childIterator->getNextObject())) ) { - if ( (nextChildNub = OSDynamicCast(IOPowerConnection,nextChild)) ) { - child = (IOService *)nextChild->getChildEntry(gIOPowerPlane); - if ( child->pm_vars->theControllingDriver ) { - for ( i = 1; i < child->pm_vars->theNumberOfPowerStates; i++ ) { - child->pm_vars->thePowerStates[i].inputPowerRequirement |= IOPMPagingAvailable; - } - } - if ( child->pm_vars->myCurrentState ) { - nextChildNub->setDesiredDomainState(kIOPlexusPowerStateCount-1); - } - } - } - childIterator->release(); - } -} diff --git a/iokit/Kernel/IOPMchangeNoteList.cpp b/iokit/Kernel/IOPMchangeNoteList.cpp index e5c23bf97..427ff5085 100644 --- a/iokit/Kernel/IOPMchangeNoteList.cpp +++ b/iokit/Kernel/IOPMchangeNoteList.cpp @@ -107,7 +107,7 @@ IOReturn IOPMchangeNoteList::releaseHeadChangeNote ( void ) { IOPowerConnection *tmp; - if(tmp = changeNote[firstInList].parent) { + if((tmp = changeNote[firstInList].parent)) { changeNote[firstInList].parent = 0; tmp->release(); } @@ -131,7 +131,7 @@ IOReturn IOPMchangeNoteList::releaseTailChangeNote ( void ) { IOPowerConnection *tmp; - if(tmp = changeNote[firstInList].parent) { + if((tmp = changeNote[firstInList].parent)) { changeNote[firstInList].parent = 0; tmp->release(); } diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 506cd40f9..b811ffbf8 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -1,4 +1,4 @@ - /* +/* * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ @@ -27,25 +27,51 @@ #include #include #include +#include #include #include "RootDomainUserClient.h" #include "IOKit/pwr_mgt/IOPowerConnection.h" #include "IOPMPowerStateQueue.h" +#include -extern "C" void kprintf(const char *, ...); +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -extern const IORegistryPlane * gIOPowerPlane; -// debug trace function -static inline void -ioSPMTrace(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +#include + +OSDefineMetaClassAndAbstractStructors(IOPolledInterface, OSObject); + +OSMetaClassDefineReservedUnused(IOPolledInterface, 0); +OSMetaClassDefineReservedUnused(IOPolledInterface, 1); +OSMetaClassDefineReservedUnused(IOPolledInterface, 2); +OSMetaClassDefineReservedUnused(IOPolledInterface, 3); +OSMetaClassDefineReservedUnused(IOPolledInterface, 4); +OSMetaClassDefineReservedUnused(IOPolledInterface, 5); +OSMetaClassDefineReservedUnused(IOPolledInterface, 6); +OSMetaClassDefineReservedUnused(IOPolledInterface, 7); +OSMetaClassDefineReservedUnused(IOPolledInterface, 8); +OSMetaClassDefineReservedUnused(IOPolledInterface, 9); +OSMetaClassDefineReservedUnused(IOPolledInterface, 10); +OSMetaClassDefineReservedUnused(IOPolledInterface, 11); +OSMetaClassDefineReservedUnused(IOPolledInterface, 12); +OSMetaClassDefineReservedUnused(IOPolledInterface, 13); +OSMetaClassDefineReservedUnused(IOPolledInterface, 14); +OSMetaClassDefineReservedUnused(IOPolledInterface, 15); + +IOReturn +IOPolledInterface::checkAllForWork(void) { - if (gIOKitDebug & kIOLogTracePower) - IOTimeStampConstant(IODBG_POWER(csc), a, b, c, d); + return (kIOReturnSuccess); } + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +extern "C" void kprintf(const char *, ...); + +extern const IORegistryPlane * gIOPowerPlane; + IOReturn broadcast_aggressiveness ( OSObject *, void *, void *, void *, void * ); static void sleepTimerExpired(thread_call_param_t); static void wakeupClamshellTimerExpired ( thread_call_param_t us); @@ -87,12 +113,12 @@ OSDefineMetaClassAndStructors(IOPMrootDomain,IOService) extern "C" { - IONotifier * registerSleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref = 0) + IONotifier * registerSleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref) { return gRootDomain->registerInterest( gIOGeneralInterest, handler, self, ref ); } - IONotifier * registerPrioritySleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref = 0) + IONotifier * registerPrioritySleepWakeInterest(IOServiceInterestHandler handler, void * self, void * ref) { return gRootDomain->registerInterest( gIOPriorityPowerStateInterest, handler, self, ref ); } @@ -119,6 +145,7 @@ extern "C" void IOSystemShutdownNotification ( void ) { + IOCatalogue::disableExternalLinker(); for ( int i = 0; i < 100; i++ ) { if ( OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) ) break; @@ -217,6 +244,7 @@ bool IOPMrootDomain::start ( IOService * nub ) PMinit(); setProperty("IOSleepSupported",""); + allowSleep = true; sleepIsSupported = true; systemBooting = true; @@ -232,11 +260,13 @@ bool IOPMrootDomain::start ( IOService * nub ) tmpDict = OSDictionary::withCapacity(1); setProperty(kRootDomainSupportedFeatures, tmpDict); tmpDict->release(); - + pm_vars->PMworkloop = IOWorkLoop::workLoop(); pmPowerStateQueue = IOPMPowerStateQueue::PMPowerStateQueue(this); pm_vars->PMworkloop->addEventSource(pmPowerStateQueue); + featuresDictLock = IOLockAlloc(); + extraSleepTimer = thread_call_allocate((thread_call_func_t)sleepTimerExpired, (thread_call_param_t) this); clamshellWakeupIgnore = thread_call_allocate((thread_call_func_t)wakeupClamshellTimerExpired, (thread_call_param_t) this); diskSyncCalloutEntry = thread_call_allocate(&disk_sync_callout, (thread_call_param_t) this); @@ -269,9 +299,22 @@ bool IOPMrootDomain::start ( IOService * nub ) &batteryLocationPublished, this, this); const OSSymbol *ucClassName = OSSymbol::withCStringNoCopy("RootDomainUserClient"); - setProperty(gIOUserClientClassKey, (OSMetaClassBase *) ucClassName); + setProperty(gIOUserClientClassKey, (OSObject *) ucClassName); ucClassName->release(); + IORegistryEntry *temp_entry = NULL; + if( (temp_entry = IORegistryEntry::fromPath("mac-io/battery", gIODTPlane)) || + (temp_entry = IORegistryEntry::fromPath("mac-io/via-pmu/battery", gIODTPlane))) + { + // If this machine has a battery, publish the fact that the backlight + // supports dimming. + // Notice similar call in IOPMrootDomain::batteryLocationPublished() to + // detect batteries on SMU machines. + publishFeature("DisplayDims"); + temp_entry->release(); + } + + registerService(); // let clients find us return true; @@ -300,15 +343,17 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); OSBoolean *b; OSNumber *n; - OSString *boot_complete_string = OSString::withCString("System Boot Complete"); - OSString *power_button_string = OSString::withCString("DisablePowerButtonSleep"); - OSString *stall_halt_string = OSString::withCString("StallSystemAtHalt"); - OSString *auto_wake_string = OSString::withCString("wake"); - OSString *auto_power_string = OSString::withCString("poweron"); - OSString *wakeonring_string = OSString::withCString("WakeOnRing"); - OSString *fileserver_string = OSString::withCString("AutoRestartOnPowerLoss"); - OSString *wakeonlid_string = OSString::withCString("WakeOnLid"); - OSString *wakeonac_string = OSString::withCString("WakeOnACChange"); + OSString *str; + const OSSymbol *boot_complete_string = OSSymbol::withCString("System Boot Complete"); + const OSSymbol *power_button_string = OSSymbol::withCString("DisablePowerButtonSleep"); + const OSSymbol *stall_halt_string = OSSymbol::withCString("StallSystemAtHalt"); + const OSSymbol *auto_wake_string = OSSymbol::withCString("wake"); + const OSSymbol *auto_power_string = OSSymbol::withCString("poweron"); + const OSSymbol *wakeonring_string = OSSymbol::withCString("WakeOnRing"); + const OSSymbol *fileserver_string = OSSymbol::withCString("AutoRestartOnPowerLoss"); + const OSSymbol *wakeonlid_string = OSSymbol::withCString("WakeOnLid"); + const OSSymbol *wakeonac_string = OSSymbol::withCString("WakeOnACChange"); + const OSSymbol *timezone_string = OSSymbol::withCString("TimeZoneOffsetSeconds"); if(!dict) { @@ -335,7 +380,8 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) { setProperty(stall_halt_string, b); } - + + // Relay AutoWake setting to its controller if( auto_wake_string && (n = OSDynamicCast(OSNumber, dict->getObject(auto_wake_string))) ) @@ -384,6 +430,14 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) if(kIOReturnSuccess != return_value) goto exit; } + // Relay timezone offset in seconds to SMU + if( timezone_string + && (n = OSDynamicCast(OSNumber, dict->getObject(timezone_string))) ) + { + return_value = setPMSetting(kIOPMTimeZoneSetting, n); + if(kIOReturnSuccess != return_value) goto exit; + } + exit: if(boot_complete_string) boot_complete_string->release(); @@ -395,6 +449,7 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) if(fileserver_string) fileserver_string->release(); if(wakeonlid_string) wakeonlid_string->release(); if(wakeonac_string) wakeonac_string->release(); + if(timezone_string) timezone_string->release(); return return_value; } @@ -546,6 +601,7 @@ IOReturn IOPMrootDomain::sleepSystem ( void ) //kprintf("sleep demand received\n"); if ( !systemBooting && allowSleep && sleepIsSupported ) { patriarch->sleepSystem(); + return kIOReturnSuccess; } if ( !systemBooting && allowSleep && !sleepIsSupported ) { @@ -602,14 +658,14 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) { // re-enable this timer for next sleep idleSleepPending = false; + IOLog("System Sleep\n"); pm_vars->thePlatform->sleepKernel(); // The CPU(s) are off at this point. When they're awakened by CPU interrupt, - // code will resume exeuction here. - + // code will resume execution here. + // Now we're waking... - ioSPMTrace(IOPOWER_WAKE, * (int *) this); // stay awake for at least 30 seconds clock_interval_to_deadline(30, kSecondScale, &deadline); @@ -730,11 +786,22 @@ void IOPMrootDomain::wakeFromDoze( void ) // ********************************************************************************** void IOPMrootDomain::publishFeature( const char * feature ) { - OSDictionary *features = (OSDictionary *)getProperty(kRootDomainSupportedFeatures); - - features->setObject(feature, kOSBooleanTrue); + if(featuresDictLock) IOLockLock(featuresDictLock); + OSDictionary *features = + (OSDictionary *) getProperty(kRootDomainSupportedFeatures); + + if ( features && OSDynamicCast(OSDictionary, features)) + features = OSDictionary::withDictionary(features); + else + features = OSDictionary::withCapacity(1); + + features->setObject(feature, kOSBooleanTrue); + setProperty(kRootDomainSupportedFeatures, features); + features->release(); + if(featuresDictLock) IOLockUnlock(featuresDictLock); } + void IOPMrootDomain::unIdleDevice( IOService *theDevice, unsigned long theState ) { if(pmPowerStateQueue) @@ -743,14 +810,14 @@ void IOPMrootDomain::unIdleDevice( IOService *theDevice, unsigned long theState void IOPMrootDomain::announcePowerSourceChange( void ) { - IORegistryEntry *_batteryRegEntry = getProperty("BatteryEntry"); + IORegistryEntry *_batteryRegEntry = (IORegistryEntry *) getProperty("BatteryEntry"); // (if possible) re-publish power source state under IOPMrootDomain // (only done if the battery controller publishes an IOResource defining battery location) if(_batteryRegEntry) { OSArray *batt_info; - batt_info = _batteryRegEntry->getProperty(kIOBatteryInfoKey); + batt_info = (OSArray *) _batteryRegEntry->getProperty(kIOBatteryInfoKey); if(batt_info) setProperty(kIOBatteryInfoKey, batt_info); } @@ -771,13 +838,6 @@ IOReturn IOPMrootDomain::registerPMSettingController return kIOReturnSuccess; } -IOReturn IOPMrootDomain::registerPlatformPowerProfiles - (OSArray *system_profiles) -{ - if(!system_profiles) return kIOReturnBadArgument; - if(getProperty("SystemPowerProfiles")) return kIOReturnExclusiveAccess; - setProperty("SystemPowerProfiles", system_profiles); -} //********************************************************************************* // receivePowerNotification @@ -1113,15 +1173,11 @@ void IOPMrootDomain::restoreUserSpinDownTimeout ( void ) IOReturn IOPMrootDomain::changePowerStateTo ( unsigned long ordinal ) { - ioSPMTrace(IOPOWER_ROOT, * (int *) this, (int) true, (int) ordinal); - return super::changePowerStateTo(ordinal); } IOReturn IOPMrootDomain::changePowerStateToPriv ( unsigned long ordinal ) { - ioSPMTrace(IOPOWER_ROOT, * (int *) this, (int) false, (int) ordinal); - return super::changePowerStateToPriv(ordinal); } @@ -1309,15 +1365,20 @@ bool IOPMrootDomain::batteryLocationPublished( void * target, void * root_domain IOService * resourceService ) { IORegistryEntry *battery_location; - char battery_reg_path[255]; - int path_len = 255; - battery_location = resourceService->getProperty("battery"); + battery_location = (IORegistryEntry *) resourceService->getProperty("battery"); if (!battery_location || !OSDynamicCast(IORegistryEntry, battery_location)) return (true); ((IOPMrootDomain *)root_domain)->setProperty("BatteryEntry", battery_location); + // rdar://2936060 + // All laptops have dimmable LCD displays + // All laptops have batteries + // So if this machine has a battery, publish the fact that the backlight + // supports dimming. + ((IOPMrootDomain *)root_domain)->publishFeature("DisplayDims"); + ((IOPMrootDomain *)root_domain)->announcePowerSourceChange(); return (true); } diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 782cee0a1..a5b07dd86 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -34,12 +34,12 @@ #include #include #include +#include #include #include - extern "C" { #include #include @@ -368,11 +368,44 @@ bool IOPlatformExpert::platformAdjustService(IOService */*service*/) // //********************************************************************************* -void IOPlatformExpert::PMLog(const char * who,unsigned long event,unsigned long param1, unsigned long param2) +void IOPlatformExpert:: +PMLog(const char *who, unsigned long event, + unsigned long param1, unsigned long param2) { - if( gIOKitDebug & kIOLogPower) { - kprintf("%s %02d %08x %08x\n",who,event,param1,param2); -// IOLog("%s %02d %08x %08x\n",who,event,param1,param2); + UInt32 debugFlags = gIOKitDebug; + + if (debugFlags & kIOLogPower) { + + uint32_t nows, nowus; + clock_get_system_microtime(&nows, &nowus); + nowus += (nows % 1000) * 1000000; + + kprintf("pm%u %x %.30s %d %x %x\n", + nowus, (unsigned) current_thread(), who, // Identity + (int) event, param1, param2); // Args + + if (debugFlags & kIOLogTracePower) { + static const UInt32 sStartStopBitField[] = + { 0x00000000, 0x00000040 }; // Only Program Hardware so far + + // Arcane formula from Hacker's Delight by Warren + // abs(x) = ((int) x >> 31) ^ (x + ((int) x >> 31)) + UInt32 sgnevent = ((long) event >> 31); + UInt32 absevent = sgnevent ^ (event + sgnevent); + UInt32 code = IODBG_POWER(absevent); + + UInt32 bit = 1 << (absevent & 0x1f); + if (absevent < sizeof(sStartStopBitField) * 8 + && (sStartStopBitField[absevent >> 5] & bit) ) { + // Or in the START or END bits, Start = 1 & END = 2 + // If sgnevent == 0 then START - 0 => START + // else if sgnevent == -1 then START - -1 => END + code |= DBG_FUNC_START - sgnevent; + } + + // Record the timestamp, wish I had a this pointer + IOTimeStampConstant(code, (UInt32) who, event, param1, param2); + } } } @@ -908,7 +941,7 @@ bool IODTPlatformExpert::createNubs( IOService * parent, OSIterator * iter ) return( ok ); } -void IODTPlatformExpert::processTopLevel( IORegistryEntry * root ) +void IODTPlatformExpert::processTopLevel( IORegistryEntry * rootEntry ) { OSIterator * kids; IORegistryEntry * next; @@ -916,7 +949,7 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * root ) IORegistryEntry * options; // infanticide - kids = IODTFindMatchingEntries( root, 0, deleteList() ); + kids = IODTFindMatchingEntries( rootEntry, 0, deleteList() ); if( kids) { while( (next = (IORegistryEntry *)kids->getNextObject())) { next->detachAll( gIODTPlane); @@ -925,7 +958,7 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * root ) } // Publish an IODTNVRAM class on /options. - options = root->childFromPath("options", gIODTPlane); + options = rootEntry->childFromPath("options", gIODTPlane); if (options) { dtNVRAM = new IODTNVRAM; if (dtNVRAM) { @@ -940,12 +973,12 @@ void IODTPlatformExpert::processTopLevel( IORegistryEntry * root ) } // Publish the cpus. - cpus = root->childFromPath( "cpus", gIODTPlane); + cpus = rootEntry->childFromPath( "cpus", gIODTPlane); if ( cpus) createNubs( this, IODTFindMatchingEntries( cpus, kIODTExclusive, 0)); // publish top level, minus excludeList - createNubs( this, IODTFindMatchingEntries( root, kIODTExclusive, excludeList())); + createNubs( this, IODTFindMatchingEntries( rootEntry, kIODTExclusive, excludeList())); } IOReturn IODTPlatformExpert::getNubResources( IOService * nub ) @@ -1097,14 +1130,14 @@ IOByteCount IODTPlatformExpert::savePanicInfo(UInt8 *buffer, IOByteCount length) OSString* IODTPlatformExpert::createSystemSerialNumberString(OSData* myProperty) { UInt8* serialNumber; unsigned int serialNumberSize; - short pos = 0; + unsigned short pos = 0; char* temp; char SerialNo[30]; if (myProperty != NULL) { serialNumberSize = myProperty->getLength(); serialNumber = (UInt8*)(myProperty->getBytesNoCopy()); - temp = serialNumber; + temp = (char*)serialNumber; if (serialNumberSize > 0) { // check to see if this is a CTO serial number... while (pos < serialNumberSize && temp[pos] != '-') pos++; diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index d7fc62d91..78dbf3527 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -44,6 +44,12 @@ OSDefineMetaClassAndStructors(IORegistryEntry, OSObject) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#define kIORegPlaneParentSuffix "ParentLinks" +#define kIORegPlaneChildSuffix "ChildLinks" +#define kIORegPlaneNameSuffix "Name" +#define kIORegPlaneLocationSuffix "Location" +#define kIORegPlaneParentSuffixLen (sizeof(kIORegPlaneParentSuffix) - 1) + static IORegistryEntry * gRegistryRoot; static OSDictionary * gIORegistryPlanes; @@ -103,7 +109,7 @@ static SInt32 gIORegistryGenerationCount; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ struct s_lock_t { - decl_simple_lock_data(,interlock) /* "hardware" interlock field */ + lck_spin_t interlock; /* "hardware" interlock field */ volatile unsigned int read_count:16, /* No. of accepted readers */ want_upgrade:1, /* Read-to-write upgrade waiting */ @@ -134,7 +140,7 @@ s_lock_init( { (void) memset((void *) l, 0, sizeof(s_lock_t)); - simple_lock_init(&l->interlock, 0); + lck_spin_init(&l->interlock, IOLockGroup, LCK_ATTR_NULL); l->want_write = FALSE; l->want_upgrade = FALSE; l->read_count = 0; @@ -147,7 +153,7 @@ s_lock_write( { register int i; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); /* * Try to acquire the want_write bit. @@ -156,17 +162,16 @@ s_lock_write( i = lock_wait_time[l->can_sleep ? 1 : 0]; if (i != 0) { - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); while (--i != 0 && l->want_write) continue; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); } if (l->can_sleep && l->want_write) { l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); + lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, + (event_t) l, THREAD_UNINT); /* interlock relocked */ } } @@ -178,23 +183,22 @@ s_lock_write( i = lock_wait_time[l->can_sleep ? 1 : 0]; if (i != 0) { - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); while (--i != 0 && (l->read_count != 0 || l->want_upgrade)) continue; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); } if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); + lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, + (event_t) l, THREAD_UNINT); /* interlock relocked */ } } - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); } static void @@ -203,7 +207,7 @@ s_lock_done( { boolean_t do_wakeup = FALSE; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); if (l->read_count != 0) { l->read_count -= 1; @@ -229,7 +233,7 @@ s_lock_done( do_wakeup = TRUE; } - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); if (do_wakeup) thread_wakeup((event_t) l); @@ -241,32 +245,31 @@ s_lock_read( { register int i; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); while ( l->want_upgrade || ((0 == l->read_count) && l->want_write )) { i = lock_wait_time[l->can_sleep ? 1 : 0]; if (i != 0) { - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); while (--i != 0 && (l->want_upgrade || ((0 == l->read_count) && l->want_write ))) continue; - simple_lock(&l->interlock); + lck_spin_lock(&l->interlock); } if (l->can_sleep && (l->want_upgrade || ((0 == l->read_count) && l->want_write ))) { l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); + lck_spin_sleep( &l->interlock, LCK_SLEEP_DEFAULT, + (event_t) l, THREAD_UNINT); /* interlock relocked */ } } l->read_count += 1; - simple_unlock(&l->interlock); + lck_spin_unlock(&l->interlock); } @@ -327,16 +330,16 @@ const IORegistryPlane * IORegistryEntry::makePlane( const char * name ) nameKey = OSSymbol::withCString( key); - strcpy( end, "ParentLinks" ); + strcpy( end, kIORegPlaneParentSuffix ); parentKey = OSSymbol::withCString( key); - strcpy( end, "ChildLinks" ); + strcpy( end, kIORegPlaneChildSuffix ); childKey = OSSymbol::withCString( key); - strcpy( end, "Name" ); + strcpy( end, kIORegPlaneNameSuffix ); pathNameKey = OSSymbol::withCString( key); - strcpy( end, "Location" ); + strcpy( end, kIORegPlaneLocationSuffix ); pathLocationKey = OSSymbol::withCString( key); plane = new IORegistryPlane; @@ -585,14 +588,14 @@ IORegistryEntry::copyProperty( type * aKey, \ bool IORegistryEntry::serializeProperties( OSSerialize * s ) const { - bool ok; - // setProperty( getRetainCount(), 32, "__retain" ); PLOCK; - ok = getPropertyTable()->serialize( s ); + OSCollection *snapshotProperties = getPropertyTable()->copyCollection(); PUNLOCK; + bool ok = snapshotProperties->serialize( s ); + snapshotProperties->release(); return( ok ); } @@ -638,6 +641,49 @@ IORegistryEntry::getProperty( const OSSymbol * aKey) const return( obj ); } +void +IORegistryEntry::removeProperty( const OSSymbol * aKey) +{ + PLOCK; + getPropertyTable()->removeObject( aKey ); + PUNLOCK; +} + +bool +IORegistryEntry::setProperty( const OSSymbol * aKey, OSObject * anObject) +{ + bool ret = false; + + // If we are inserting a collection class and the current entry + // is attached into the registry (inPlane()) then mark the collection + // as immutable. + OSCollection *coll = OSDynamicCast(OSCollection, anObject); + bool makeImmutable = (coll && inPlane()); + + PLOCK; + if( makeImmutable ) + coll->setOptions( OSCollection::kMASK, OSCollection::kImmutable ); + + ret = getPropertyTable()->setObject( aKey, anObject ); + PUNLOCK; + + return ret; +} + +IOReturn IORegistryEntry:: +runPropertyAction(Action inAction, OSObject *target, + void *arg0, void *arg1, void *arg2, void *arg3) +{ + IOReturn res; + + // closeGate is recursive so don't worry if we already hold the lock. + PLOCK; + res = (*inAction)(target, arg0, arg1, arg2, arg3); + PUNLOCK; + + return res; +} + OSObject * IORegistryEntry::getProperty( const OSString * aKey) const { @@ -658,13 +704,6 @@ IORegistryEntry::getProperty( const char * aKey) const return( obj ); } -void -IORegistryEntry::removeProperty( const OSSymbol * aKey) -{ - PLOCK; - getPropertyTable()->removeObject( aKey ); - PUNLOCK; -} void IORegistryEntry::removeProperty( const OSString * aKey) @@ -682,17 +721,6 @@ IORegistryEntry::removeProperty( const char * aKey) tmpKey->release(); } -bool -IORegistryEntry::setProperty( const OSSymbol * aKey, OSObject * anObject) -{ - bool ret = false; - PLOCK; - ret = getPropertyTable()->setObject( aKey, anObject ); - PUNLOCK; - - return ret; -} - bool IORegistryEntry::setProperty( const OSString * aKey, OSObject * anObject) { @@ -1658,7 +1686,34 @@ bool IORegistryEntry::inPlane( const IORegistryPlane * plane ) const RLOCK; - ret = (0 != getParentSetReference( plane )); + if( plane) + ret = (0 != getParentSetReference( plane )); + else { + + // Check to see if this is in any plane. If it is in a plane + // then the registryTable will contain a key with the ParentLinks + // suffix. When we iterate over the keys looking for that suffix + ret = false; + + OSCollectionIterator *iter = + OSCollectionIterator::withCollection( registryTable()); + if( iter) { + const OSSymbol *key; + + while( (key = (OSSymbol *) iter->getNextObject()) ) { + const char *keysuffix; + + // Get a pointer to this keys suffix + keysuffix = key->getCStringNoCopy() + + key->getLength() - kIORegPlaneParentSuffixLen; + if( !strcmp(keysuffix, kIORegPlaneParentSuffix) ) { + ret = true; + break; + } + } + iter->release(); + } + } UNLOCK; @@ -1684,10 +1739,33 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, else needParent = true; -// ret &= parent->makeLink( this, kChildSetIndex, plane ); - UNLOCK; + PLOCK; + + // Mark any collections in the property list as immutable + OSDictionary *ptable = getPropertyTable(); + OSCollectionIterator *iter = + OSCollectionIterator::withCollection( ptable ); + if( iter) { + const OSSymbol *key; + + while( (key = (OSSymbol *) iter->getNextObject( ))) { + // Is object for key a collection? + OSCollection *coll = + OSDynamicCast( OSCollection, ptable->getObject( key )); + + if( coll) { + // Yup so mark it as immutable + coll->setOptions( OSCollection::kMASK, + OSCollection::kImmutable ); + } + } + iter->release(); + } + + PUNLOCK; + if( needParent) ret &= parent->attachToChild( this, plane ); @@ -2075,8 +2153,8 @@ OSMetaClassDefineReservedUsed(IORegistryEntry, 1); OSMetaClassDefineReservedUsed(IORegistryEntry, 2); OSMetaClassDefineReservedUsed(IORegistryEntry, 3); OSMetaClassDefineReservedUsed(IORegistryEntry, 4); +OSMetaClassDefineReservedUsed(IORegistryEntry, 5); -OSMetaClassDefineReservedUnused(IORegistryEntry, 5); OSMetaClassDefineReservedUnused(IORegistryEntry, 6); OSMetaClassDefineReservedUnused(IORegistryEntry, 7); OSMetaClassDefineReservedUnused(IORegistryEntry, 8); diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index 1ea83079b..781fad86c 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -158,6 +159,19 @@ static OSData * gIOConsoleUsersSeedValue; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#define queue_element(entry, element, type, field) do { \ + vm_address_t __ele = (vm_address_t) (entry); \ + __ele -= -4 + ((size_t)(&((type) 4)->field)); \ + (element) = (type) __ele; \ + } while(0) + +#define iterqueue(que, elt) \ + for (queue_entry_t elt = queue_first(que); \ + !queue_end(que, elt); \ + elt = queue_next(elt)) + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + struct ArbitrationLockQueueElement { queue_chain_t link; IOThread thread; @@ -512,7 +526,7 @@ void IOService::startMatching( IOOptionBits options ) lockForArbitration(); IOLockLock( gIOServiceBusyLock ); - waitAgain = (prevBusy != (__state[1] & kIOServiceBusyStateMask)); + waitAgain = (prevBusy < (__state[1] & kIOServiceBusyStateMask)); if( waitAgain) __state[1] |= kIOServiceSyncPubState | kIOServiceBusyWaiterState; else @@ -534,7 +548,8 @@ void IOService::startMatching( IOOptionBits options ) IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables ) { OSDictionary * table; - OSIterator * iter; + OSSet * set; + OSSet * allSet = 0; IOService * service; #if IOMATCHDEBUG SInt32 count = 0; @@ -545,18 +560,23 @@ IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables ) while( (table = (OSDictionary *) newTables->getFirstObject())) { LOCKWRITENOTIFY(); - iter = (OSIterator *) getExistingServices( table, - kIOServiceRegisteredState ); + set = (OSSet *) getExistingServices( table, + kIOServiceRegisteredState, + kIOServiceExistingSet); UNLOCKNOTIFY(); - if( iter) { - while( (service = (IOService *) iter->getNextObject())) { - service->startMatching(kIOServiceAsynchronous); + if( set) { + #if IOMATCHDEBUG - count++; + count += set->getCount(); #endif + if (allSet) { + allSet->merge((const OSSet *) set); + set->release(); } - iter->release(); + else + allSet = set; } + #if IOMATCHDEBUG if( getDebugFlags( table ) & kIOLogMatch) LOG("Matching service count = %ld\n", count); @@ -564,6 +584,14 @@ IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables ) newTables->removeObject(table); } + if (allSet) { + while( (service = (IOService *) allSet->getAnyObject())) { + service->startMatching(kIOServiceAsynchronous); + allSet->removeObject(service); + } + allSet->release(); + } + newTables->release(); return( kIOReturnSuccess ); @@ -1254,28 +1282,36 @@ void IOService::applyToInterested( const OSSymbol * typeOfInterest, OSObjectApplierFunction applier, void * context ) { - OSArray * array; - unsigned int index; - OSObject * next; - OSArray * copyArray; + OSArray * copyArray = 0; applyToClients( (IOServiceApplierFunction) applier, context ); LOCKREADNOTIFY(); - array = OSDynamicCast( OSArray, getProperty( typeOfInterest )); - if( array) { - copyArray = OSArray::withArray( array ); - UNLOCKNOTIFY(); - if( copyArray) { - for( index = 0; - (next = copyArray->getObject( index )); - index++) { - (*applier)(next, context); - } - copyArray->release(); - } - } else - UNLOCKNOTIFY(); + + IOCommand *notifyList = + OSDynamicCast( IOCommand, getProperty( typeOfInterest )); + + if( notifyList) { + copyArray = OSArray::withCapacity(1); + + // iterate over queue, entry is set to each element in the list + iterqueue(¬ifyList->fCommandChain, entry) { + _IOServiceInterestNotifier * notify; + + queue_element(entry, notify, _IOServiceInterestNotifier *, chain); + copyArray->setObject(notify); + } + } + UNLOCKNOTIFY(); + + if( copyArray) { + unsigned int index; + OSObject * next; + + for( index = 0; (next = copyArray->getObject( index )); index++) + (*applier)(next, context); + copyArray->release(); + } } struct MessageClientsContext { @@ -1325,7 +1361,6 @@ IONotifier * IOService::registerInterest( const OSSymbol * typeOfInterest, IOServiceInterestHandler handler, void * target, void * ref ) { _IOServiceInterestNotifier * notify = 0; - OSArray * set; if( (typeOfInterest != gIOGeneralInterest) && (typeOfInterest != gIOBusyInterest) @@ -1352,16 +1387,23 @@ IONotifier * IOService::registerInterest( const OSSymbol * typeOfInterest, ////// queue LOCKWRITENOTIFY(); - if( 0 == (set = (OSArray *) getProperty( typeOfInterest ))) { - set = OSArray::withCapacity( 1 ); - if( set) { - setProperty( typeOfInterest, set ); - set->release(); - } - } - notify->whence = set; - if( set) - set->setObject( notify ); + + // Get the head of the notifier linked list + IOCommand *notifyList = (IOCommand *) getProperty( typeOfInterest ); + if (!notifyList || !OSDynamicCast(IOCommand, notifyList)) { + notifyList = OSTypeAlloc(IOCommand); + if (notifyList) { + notifyList->init(); + setProperty( typeOfInterest, notifyList); + notifyList->release(); + } + } + + if (notifyList) { + enqueue(¬ifyList->fCommandChain, ¬ify->chain); + notify->retain(); // ref'ed while in list + } + UNLOCKNOTIFY(); } } @@ -1370,30 +1412,30 @@ IONotifier * IOService::registerInterest( const OSSymbol * typeOfInterest, return( notify ); } -static void cleanInterestArray( OSObject * object ) +static void cleanInterestList( OSObject * head ) { - OSArray * array; - unsigned int index; - _IOServiceInterestNotifier * next; - - if( (array = OSDynamicCast( OSArray, object))) { - LOCKWRITENOTIFY(); - for( index = 0; - (next = (_IOServiceInterestNotifier *) - array->getObject( index )); - index++) { - next->whence = 0; - } - UNLOCKNOTIFY(); + IOCommand *notifyHead = OSDynamicCast(IOCommand, head); + if (!notifyHead) + return; + + LOCKWRITENOTIFY(); + while ( queue_entry_t entry = dequeue(¬ifyHead->fCommandChain) ) { + queue_next(entry) = queue_prev(entry) = 0; + + _IOServiceInterestNotifier * notify; + + queue_element(entry, notify, _IOServiceInterestNotifier *, chain); + notify->release(); } + UNLOCKNOTIFY(); } void IOService::unregisterAllInterest( void ) { - cleanInterestArray( getProperty( gIOGeneralInterest )); - cleanInterestArray( getProperty( gIOBusyInterest )); - cleanInterestArray( getProperty( gIOAppPowerStateInterest )); - cleanInterestArray( getProperty( gIOPriorityPowerStateInterest )); + cleanInterestList( getProperty( gIOGeneralInterest )); + cleanInterestList( getProperty( gIOBusyInterest )); + cleanInterestList( getProperty( gIOAppPowerStateInterest )); + cleanInterestList( getProperty( gIOPriorityPowerStateInterest )); } /* @@ -1435,10 +1477,10 @@ void _IOServiceInterestNotifier::remove() { LOCKWRITENOTIFY(); - if( whence) { - whence->removeObject(whence->getNextIndexOfObject( - (OSObject *) this, 0 )); - whence = 0; + if( queue_next( &chain )) { + remqueue( 0, &chain); + queue_next( &chain) = queue_prev( &chain) = 0; + release(); } state &= ~kIOServiceNotifyEnable; @@ -1563,7 +1605,6 @@ bool IOService::terminatePhase1( IOOptionBits options ) victim->deliverNotification( gIOTerminatedNotification, 0, 0xffffffff ); IOUserClient::destroyUserReferences( victim ); - victim->unregisterAllInterest(); iter = victim->getClientIterator(); if( iter) { @@ -1598,7 +1639,7 @@ bool IOService::terminatePhase1( IOOptionBits options ) void IOService::scheduleTerminatePhase2( IOOptionBits options ) { AbsoluteTime deadline; - int waitResult; + int waitResult = THREAD_AWAKENED; bool wait, haveDeadline = false; options |= kIOServiceRequired; @@ -1636,8 +1677,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) deadline, THREAD_UNINT ); if( waitResult == THREAD_TIMED_OUT) { TLOG("%s::terminate(kIOServiceSynchronous) timeout", getName()); - } else - thread_cancel_timer(); + } } } while(gIOTerminateWork || (wait && (waitResult != THREAD_TIMED_OUT))); @@ -2548,11 +2588,15 @@ bool IOService::startCandidate( IOService * service ) ok = service->attach( this ); - if( ok) { - // stall for any nub resources - checkResources(); - // stall for any driver resources - service->checkResources(); + if( ok) + { + if (this != gIOResources) + { + // stall for any nub resources + checkResources(); + // stall for any driver resources + service->checkResources(); + } AbsoluteTime startTime; AbsoluteTime endTime; @@ -2612,22 +2656,22 @@ void IOService::publishResource( const OSSymbol * key, OSObject * value ) bool IOService::addNeededResource( const char * key ) { - OSObject * resources; + OSObject * resourcesProp; OSSet * set; OSString * newKey; bool ret; - resources = getProperty( gIOResourceMatchKey ); + resourcesProp = getProperty( gIOResourceMatchKey ); newKey = OSString::withCString( key ); - if( (0 == resources) || (0 == newKey)) + if( (0 == resourcesProp) || (0 == newKey)) return( false); - set = OSDynamicCast( OSSet, resources ); + set = OSDynamicCast( OSSet, resourcesProp ); if( !set) { set = OSSet::withCapacity( 1 ); if( set) - set->setObject( resources ); + set->setObject( resourcesProp ); } else set->retain(); @@ -2674,32 +2718,32 @@ bool IOService::checkResource( OSObject * matching ) bool IOService::checkResources( void ) { - OSObject * resources; + OSObject * resourcesProp; OSSet * set; OSIterator * iter; bool ok; - resources = getProperty( gIOResourceMatchKey ); - if( 0 == resources) + resourcesProp = getProperty( gIOResourceMatchKey ); + if( 0 == resourcesProp) return( true ); - if( (set = OSDynamicCast( OSSet, resources ))) { + if( (set = OSDynamicCast( OSSet, resourcesProp ))) { iter = OSCollectionIterator::withCollection( set ); ok = (0 != iter); - while( ok && (resources = iter->getNextObject()) ) - ok = checkResource( resources ); + while( ok && (resourcesProp = iter->getNextObject()) ) + ok = checkResource( resourcesProp ); if( iter) iter->release(); } else - ok = checkResource( resources ); + ok = checkResource( resourcesProp ); return( ok ); } -_IOConfigThread * _IOConfigThread::configThread( void ) +void _IOConfigThread::configThread( void ) { _IOConfigThread * inst; @@ -2708,18 +2752,17 @@ _IOConfigThread * _IOConfigThread::configThread( void ) continue; if( !inst->init()) continue; - if( !(inst->thread = IOCreateThread - ( (IOThreadFunc) &_IOConfigThread::main, inst ))) + if( !(IOCreateThread((IOThreadFunc) &_IOConfigThread::main, inst ))) continue; - return( inst ); + return; } while( false); if( inst) inst->release(); - return( 0 ); + return; } void _IOConfigThread::free( void ) @@ -2892,7 +2935,6 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, if( wait) { __state[1] |= kIOServiceBusyWaiterState; unlockForArbitration(); - assert_wait( (event_t) this, THREAD_UNINT ); if( timeout ) { if( computeDeadline ) { AbsoluteTime nsinterval; @@ -2905,16 +2947,16 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, abstime, &abstime ); computeDeadline = false; } - thread_set_timer_deadline( abstime ); + + assert_wait_deadline((event_t)this, THREAD_UNINT, __OSAbsoluteTime(abstime)); } + else + assert_wait((event_t)this, THREAD_UNINT ); } else unlockForArbitration(); IOLockUnlock( gIOServiceBusyLock ); - if( wait) { + if( wait) waitResult = thread_block(THREAD_CONTINUE_NULL); - if( timeout && (waitResult != THREAD_TIMED_OUT)) - thread_cancel_timer(); - } } while( wait && (waitResult != THREAD_TIMED_OUT)); @@ -3077,7 +3119,6 @@ void _IOServiceJob::pingConfig( _IOServiceJob * job ) semaphore_signal( gJobsSemaphore ); } - // internal - call with gNotificationLock OSObject * IOService::getExistingServices( OSDictionary * matching, IOOptionBits inState, IOOptionBits options ) @@ -3085,36 +3126,57 @@ OSObject * IOService::getExistingServices( OSDictionary * matching, OSObject * current = 0; OSIterator * iter; IOService * service; + OSObject * obj; if( !matching) return( 0 ); - iter = IORegistryIterator::iterateOver( gIOServicePlane, - kIORegistryIterateRecursively ); - if( iter) { - do { - iter->reset(); - while( (service = (IOService *) iter->getNextObject())) { - if( (inState == (service->__state[0] & inState)) - && (0 == (service->__state[0] & kIOServiceInactiveState)) - && service->passiveMatch( matching )) { - - if( options & kIONotifyOnce) { - current = service; - break; - } - if( current) - ((OSSet *)current)->setObject( service ); - else - current = OSSet::withObjects( - (const OSObject **) &service, 1, 1 ); - } - } - } while( !service && !iter->isValid()); - iter->release(); + if(true + && (obj = matching->getObject(gIOProviderClassKey)) + && gIOResourcesKey + && gIOResourcesKey->isEqualTo(obj) + && (service = gIOResources)) + { + if( (inState == (service->__state[0] & inState)) + && (0 == (service->__state[0] & kIOServiceInactiveState)) + && service->passiveMatch( matching )) + { + if( options & kIONotifyOnce) + current = service; + else + current = OSSet::withObjects( + (const OSObject **) &service, 1, 1 ); + } + } + else + { + iter = IORegistryIterator::iterateOver( gIOServicePlane, + kIORegistryIterateRecursively ); + if( iter) { + do { + iter->reset(); + while( (service = (IOService *) iter->getNextObject())) { + if( (inState == (service->__state[0] & inState)) + && (0 == (service->__state[0] & kIOServiceInactiveState)) + && service->passiveMatch( matching )) { + + if( options & kIONotifyOnce) { + current = service; + break; + } + if( current) + ((OSSet *)current)->setObject( service ); + else + current = OSSet::withObjects( + (const OSObject **) &service, 1, 1 ); + } + } + } while( !service && !iter->isValid()); + iter->release(); + } } - if( current && (0 == (options & kIONotifyOnce))) { + if( current && (0 == (options & (kIONotifyOnce | kIOServiceExistingSet)))) { iter = OSCollectionIterator::withCollection( (OSSet *)current ); current->release(); current = iter; @@ -4098,7 +4160,7 @@ int IOService::errnoFromReturn( IOReturn rtn ) case kIOReturnBadArgument: return(EINVAL); case kIOReturnUnsupported: - return(EOPNOTSUPP); + return(ENOTSUP); case kIOReturnBusy: return(EBUSY); case kIOReturnNoPower: @@ -4393,8 +4455,8 @@ IOReturn IOService::causeInterrupt(int source) OSMetaClassDefineReservedUsed(IOService, 0); OSMetaClassDefineReservedUsed(IOService, 1); OSMetaClassDefineReservedUsed(IOService, 2); +OSMetaClassDefineReservedUsed(IOService, 3); -OSMetaClassDefineReservedUnused(IOService, 3); OSMetaClassDefineReservedUnused(IOService, 4); OSMetaClassDefineReservedUnused(IOService, 5); OSMetaClassDefineReservedUnused(IOService, 6); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 9b0756500..af7e11dd0 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -19,60 +19,36 @@ * * @APPLE_LICENSE_HEADER_END@ */ - -#include -#include + +#include + #include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include "IOKit/pwr_mgt/IOPMinformeeList.h" -#include "IOKit/pwr_mgt/IOPMchangeNoteList.h" -#include "IOKit/pwr_mgt/IOPMlog.h" -#include "IOKit/pwr_mgt/IOPowerConnection.h" -#include -#define super IORegistryEntry +#include +#include +#include +#include +#include +#include -// Some debug functions -static inline void -ioSPMTrace(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) -{ - if (gIOKitDebug & kIOLogTracePower) - IOTimeStampConstant(IODBG_POWER(csc), a, b, c, d); -} +// Required for notification instrumentation +#include "IOServicePrivate.h" -static inline void -ioSPMTraceStart(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) -{ - if (gIOKitDebug & kIOLogTracePower) - IOTimeStampConstant(IODBG_POWER(csc)|DBG_FUNC_START, a, b, c, d); -} - -static inline void -ioSPMTraceEnd(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) -{ - if (gIOKitDebug & kIOLogTracePower) - IOTimeStampConstant(IODBG_POWER(csc)|DBG_FUNC_END, a, b, c, d); -} +#define super IORegistryEntry +#define OUR_PMLog(t, a, b) \ + do { pm_vars->thePlatform->PMLog(pm_vars->ourName, t, a, b); } while(0) static void ack_timer_expired(thread_call_param_t); static void settle_timer_expired(thread_call_param_t); static void PM_idle_timer_expired(OSObject *, IOTimerEventSource *); -static void c_PM_Clamp_Timer_Expired (OSObject * client,IOTimerEventSource *); void tellAppWithResponse ( OSObject * object, void * context); void tellClientWithResponse ( OSObject * object, void * context); void tellClient ( OSObject * object, void * context); @@ -360,6 +336,8 @@ void IOService::PMfree ( void ) if ( pm_vars ) { if ( pm_vars->PMcommandGate ) { + if(pm_vars->PMworkloop) + pm_vars->PMworkloop->removeEventSource(pm_vars->PMcommandGate); pm_vars->PMcommandGate->release(); pm_vars->PMcommandGate = NULL; } @@ -419,9 +397,12 @@ void IOService::PMstop ( void ) // detach IOConnections detachAbove( gIOPowerPlane ); - // no more power state changes - pm_vars->parentsKnowState = false; - + if ( pm_vars ) + { + // no more power state changes + pm_vars->parentsKnowState = false; + } + // detach children iter = getChildIterator(gIOPowerPlane); @@ -1033,33 +1014,28 @@ IOReturn IOService::acknowledgePowerChange ( IOService * whichObject ) IOReturn IOService::acknowledgeSetPowerState ( void ) { - if (! acquire_lock() ) - { + if (!acquire_lock()) return IOPMNoErr; - } - ioSPMTrace(IOPOWER_ACK, * (int *) this); - - if ( priv->driver_timer == -1 ) - { + IOReturn timer = priv->driver_timer; + if ( timer == -1 ) { // driver is acking instead of using return code + OUR_PMLog(kPMLogDriverAcknowledgeSet, (UInt32) this, timer); priv->driver_timer = 0; + } + else if ( timer > 0 ) { // are we expecting this? + // yes, stop the timer + stop_ack_timer(); + priv->driver_timer = 0; + OUR_PMLog(kPMLogDriverAcknowledgeSet, (UInt32) this, timer); + IOUnlock(priv->our_lock); + driver_acked(); + return IOPMNoErr; } else { - // are we expecting this? - if ( priv->driver_timer > 0 ) - { - // yes, stop the timer - stop_ack_timer(); - priv->driver_timer = 0; - IOUnlock(priv->our_lock); - pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogDriverAcknowledgeSet,0,0); - driver_acked(); - return IOPMNoErr; - } else { - // not expecting this - pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogAcknowledgeErr4,0,0); - } + // not expecting this + OUR_PMLog(kPMLogAcknowledgeErr4, (UInt32) this, 0); } + IOUnlock(priv->our_lock); return IOPMNoErr; } @@ -1333,7 +1309,7 @@ IOReturn IOService::requestPowerDomainState ( IOPMPowerFlags desiredState, IOPow case IOPMNextLowerState: i = pm_vars->myCurrentState - 1; - while ( i >= 0 ) + while ( (int) i >= 0 ) { if ( ( pm_vars->thePowerStates[i].outputPowerCharacter & theDesiredState) == (theDesiredState & pm_vars->myCharacterFlags) ) { @@ -1341,7 +1317,7 @@ IOReturn IOService::requestPowerDomainState ( IOPMPowerFlags desiredState, IOPow } i--; } - if ( i < 0 ) + if ( (int) i < 0 ) { return IOPMNoSuchState; } @@ -1349,7 +1325,7 @@ IOReturn IOService::requestPowerDomainState ( IOPMPowerFlags desiredState, IOPow case IOPMHighestState: i = pm_vars->theNumberOfPowerStates; - while ( i >= 0 ) + while ( (int) i >= 0 ) { i--; if ( ( pm_vars->thePowerStates[i].outputPowerCharacter & theDesiredState) == (theDesiredState & pm_vars->myCharacterFlags) ) @@ -1357,7 +1333,7 @@ IOReturn IOService::requestPowerDomainState ( IOPMPowerFlags desiredState, IOPow break; } } - if ( i < 0 ) + if ( (int) i < 0 ) { return IOPMNoSuchState; } @@ -1683,7 +1659,7 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) return true; } IOUnlock(priv->activityLock); - + // Transfer execution to the PM workloop if( (pmRootDomain = getPMRootDomain()) ) pmRootDomain->unIdleDevice(this, stateNumber); @@ -1773,41 +1749,73 @@ IOReturn IOService::setIdleTimerPeriod ( unsigned long period ) return IOPMNoErr; } +//****************************************************************************** +// nextIdleTimeout +// +// Returns how many "seconds from now" the device should idle into its +// next lowest power state. +//****************************************************************************** +SInt32 IOService::nextIdleTimeout( + AbsoluteTime currentTime, + AbsoluteTime lastActivity, + unsigned int powerState) +{ + AbsoluteTime delta; + UInt64 delta_ns; + SInt32 delta_secs; + SInt32 delay_secs; -//********************************************************************************* + // Calculate time difference using funky macro from clock.h. + delta = currentTime; + SUB_ABSOLUTETIME(&delta, &lastActivity); + + // Figure it in seconds. + absolutetime_to_nanoseconds(delta, &delta_ns); + delta_secs = (SInt32)(delta_ns / NSEC_PER_SEC); + + // Be paranoid about delta somehow exceeding timer period. + if (delta_secs < (int) priv->idle_timer_period ) + delay_secs = (int) priv->idle_timer_period - delta_secs; + else + delay_secs = (int) priv->idle_timer_period; + + return (SInt32)delay_secs; +} + +//****************************************************************************** // start_PM_idle_timer // // The parameter is a pointer to us. Use it to call our timeout method. -//********************************************************************************* +//****************************************************************************** void IOService::start_PM_idle_timer ( void ) { + static const int maxTimeout = 100000; + static const int minTimeout = 1; AbsoluteTime uptime; - AbsoluteTime delta; - UInt64 delta_ns; - UInt64 delta_secs; - UInt64 delay_secs; + SInt32 idle_in = 0; IOLockLock(priv->activityLock); clock_get_uptime(&uptime); + + // Subclasses may modify idle sleep algorithm + idle_in = nextIdleTimeout(uptime, + priv->device_active_timestamp, + pm_vars->myCurrentState); - // Calculate time difference using funky macro from clock.h. - delta = uptime; - SUB_ABSOLUTETIME(&delta, &(priv->device_active_timestamp)); - - // Figure it in seconds. - absolutetime_to_nanoseconds(delta, &delta_ns); - delta_secs = delta_ns / NSEC_PER_SEC; - - // Be paranoid about delta somehow exceeding timer period. - if (delta_secs < priv->idle_timer_period ) + // Check for out-of range responses + if(idle_in > maxTimeout) { - delay_secs = priv->idle_timer_period - delta_secs; - } else { - delay_secs = priv->idle_timer_period; + // use standard implementation + idle_in = IOService::nextIdleTimeout(uptime, + priv->device_active_timestamp, + pm_vars->myCurrentState); + } else if(idle_in < minTimeout) { + // fire immediately + idle_in = 0; } - priv->timerEventSrc->setTimeout(delay_secs, NSEC_PER_SEC); + priv->timerEventSrc->setTimeout(idle_in, NSEC_PER_SEC); IOLockUnlock(priv->activityLock); return; @@ -1885,6 +1893,10 @@ void IOService::command_received ( void *statePtr , void *, void * , void * ) (priv->imminentState < stateNumber) ) { changePowerStateToPriv(stateNumber); + + // After we raise our state, re-schedule the idle timer. + if(priv->timerEventSrc) + start_PM_idle_timer(); } } @@ -1941,8 +1953,8 @@ IOReturn IOService::setAggressiveness ( unsigned long type, unsigned long newLev IOReturn IOService::getAggressiveness ( unsigned long type, unsigned long * currentLevel ) { -// if ( type > kMaxType ) -// return kIOReturnBadArgument; + if ( type > kMaxType ) + return kIOReturnBadArgument; if ( !pm_vars->current_aggressiveness_valid[type] ) return kIOReturnInvalid; @@ -3850,7 +3862,7 @@ IOReturn IOService::ask_parent ( unsigned long requestedState ) //********************************************************************************* IOReturn IOService::instruct_driver ( unsigned long newState ) { - IOReturn return_code; + IOReturn delay; // can our driver switch to the desired state? if ( pm_vars->thePowerStates[newState].capabilityFlags & IOPMNotAttainable ) @@ -3860,15 +3872,14 @@ IOReturn IOService::instruct_driver ( unsigned long newState ) } priv->driver_timer = -1; - pm_vars->thePlatform->PMLog(pm_vars->ourName,PMlogProgramHardware,newState,0); // yes, instruct it - ioSPMTraceStart(IOPOWER_STATE, * (int *) this, (int) newState); - return_code = pm_vars->theControllingDriver->setPowerState( newState,this ); - ioSPMTraceEnd(IOPOWER_STATE, * (int *) this, (int) newState, (int) return_code); + OUR_PMLog( kPMLogProgramHardware, (UInt32) this, newState); + delay = pm_vars->theControllingDriver->setPowerState( newState,this ); + OUR_PMLog((UInt32) -kPMLogProgramHardware, (UInt32) this, (UInt32) delay); // it finished - if ( return_code == IOPMAckImplied ) + if ( delay == IOPMAckImplied ) { priv->driver_timer = 0; return IOPMAckImplied; @@ -3881,13 +3892,13 @@ IOReturn IOService::instruct_driver ( unsigned long newState ) } // somebody goofed - if ( return_code < 0 ) + if ( delay < 0 ) { return IOPMAckImplied; } // it didn't finish - priv->driver_timer = (return_code / ( ACK_TIMER_PERIOD / ns_per_us )) + 1; + priv->driver_timer = (delay / ( ACK_TIMER_PERIOD / ns_per_us )) + 1; return IOPMWillAckLater; } @@ -4085,23 +4096,31 @@ bool IOService::tellClientsWithResponse ( int messageType ) void tellAppWithResponse ( OSObject * object, void * context) { struct context *theContext = (struct context *)context; - UInt32 refcon; OSBoolean *aBool; - + IOPMprot *pm_vars = theContext->us->pm_vars; + if( OSDynamicCast( IOService, object) ) { + // Automatically 'ack' in kernel clients IOLockLock(theContext->flags_lock); aBool = OSBoolean::withBoolean(true); theContext->responseFlags->setObject(theContext->counter,aBool); aBool->release(); IOLockUnlock(theContext->flags_lock); + + const char *who = ((IOService *) object)->getName(); + pm_vars->thePlatform->PMLog(who, + kPMLogClientAcknowledge, theContext->msgType, * (UInt32 *) object); } else { - refcon = ((theContext->serialNumber & 0xFFFF)<<16) + (theContext->counter & 0xFFFF); + UInt32 refcon = ((theContext->serialNumber & 0xFFFF)<<16) + + (theContext->counter & 0xFFFF); IOLockLock(theContext->flags_lock); aBool = OSBoolean::withBoolean(false); theContext->responseFlags->setObject(theContext->counter,aBool); aBool->release(); IOLockUnlock(theContext->flags_lock); + + OUR_PMLog(kPMLogAppNotify, theContext->msgType, refcon); theContext->us->messageClient(theContext->msgType,object,(void *)refcon); if ( theContext->maxTimeRequested < k30seconds ) { @@ -4111,7 +4130,6 @@ void tellAppWithResponse ( OSObject * object, void * context) theContext->counter += 1; } - //********************************************************************************* // tellClientWithResponse // @@ -4138,6 +4156,19 @@ void tellClientWithResponse ( OSObject * object, void * context) aBool->release(); IOLockUnlock(theContext->flags_lock); + IOPMprot *pm_vars = theContext->us->pm_vars; + if (gIOKitDebug & kIOLogPower) { + OUR_PMLog(kPMLogClientNotify, refcon, (UInt32) theContext->msgType); + if (OSDynamicCast(IOService, object)) { + const char *who = ((IOService *) object)->getName(); + pm_vars->thePlatform->PMLog(who, + kPMLogClientNotify, * (UInt32 *) object, (UInt32) object); + } else if (OSDynamicCast(_IOServiceInterestNotifier, object)) { + _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object; + OUR_PMLog(kPMLogClientNotify, (UInt32) n->handler, 0); + } + } + notify.powerRef = (void *)refcon; notify.returnValue = 0; notify.stateNumber = theContext->stateNumber; @@ -4154,6 +4185,7 @@ void tellClientWithResponse ( OSObject * object, void * context) theContext->responseFlags->replaceObject(theContext->counter,aBool); aBool->release(); IOLockUnlock(theContext->flags_lock); + OUR_PMLog(kPMLogClientAcknowledge, refcon, (UInt32) object); } else { IOLockLock(theContext->flags_lock); @@ -4173,6 +4205,7 @@ void tellClientWithResponse ( OSObject * object, void * context) IOLockUnlock(theContext->flags_lock); } } else { + OUR_PMLog(kPMLogClientAcknowledge, refcon, 0); // not a client of ours IOLockLock(theContext->flags_lock); // so we won't be waiting for response @@ -4488,34 +4521,20 @@ IOReturn IOService::allowCancelCommon ( void ) } +#if 0 //********************************************************************************* -// clampPowerOn +// c_PM_clamp_Timer_Expired (C Func) // -// Set to highest available power state for a minimum of duration milliseconds +// Called when our clamp timer expires...we will call the object method. //********************************************************************************* -#define kFiveMinutesInNanoSeconds (300 * NSEC_PER_SEC) - -void IOService::clampPowerOn (unsigned long duration) +static void c_PM_Clamp_Timer_Expired (OSObject * client, IOTimerEventSource *) { -/* - changePowerStateToPriv (pm_vars->theNumberOfPowerStates-1); - - if ( priv->clampTimerEventSrc == NULL ) { - priv->clampTimerEventSrc = IOTimerEventSource::timerEventSource(this, - c_PM_Clamp_Timer_Expired); - - IOWorkLoop * workLoop = getPMworkloop (); - - if ( !priv->clampTimerEventSrc || !workLoop || - ( workLoop->addEventSource( priv->clampTimerEventSrc) != kIOReturnSuccess) ) { - - } - } - - priv->clampTimerEventSrc->setTimeout(300*USEC_PER_SEC, USEC_PER_SEC); -*/ + if (client) + ((IOService *)client)->PM_Clamp_Timer_Expired (); } +#endif + //********************************************************************************* // PM_Clamp_Timer_Expired @@ -4525,6 +4544,7 @@ void IOService::clampPowerOn (unsigned long duration) void IOService::PM_Clamp_Timer_Expired (void) { +#if 0 if ( ! initialized ) { // we're unloading @@ -4532,20 +4552,37 @@ void IOService::PM_Clamp_Timer_Expired (void) } changePowerStateToPriv (0); +#endif } -//********************************************************************************* -// c_PM_clamp_Timer_Expired (C Func) +//****************************************************************************** +// clampPowerOn // -// Called when our clamp timer expires...we will call the object method. -//********************************************************************************* +// Set to highest available power state for a minimum of duration milliseconds +//****************************************************************************** + +#define kFiveMinutesInNanoSeconds (300 * NSEC_PER_SEC) -void c_PM_Clamp_Timer_Expired (OSObject * client, IOTimerEventSource *) +void IOService::clampPowerOn (unsigned long duration) { - if (client) - ((IOService *)client)->PM_Clamp_Timer_Expired (); -} +#if 0 + changePowerStateToPriv (pm_vars->theNumberOfPowerStates-1); + if ( priv->clampTimerEventSrc == NULL ) { + priv->clampTimerEventSrc = IOTimerEventSource::timerEventSource(this, + c_PM_Clamp_Timer_Expired); + + IOWorkLoop * workLoop = getPMworkloop (); + + if ( !priv->clampTimerEventSrc || !workLoop || + ( workLoop->addEventSource( priv->clampTimerEventSrc) != kIOReturnSuccess) ) { + + } + } + + priv->clampTimerEventSrc->setTimeout(300*USEC_PER_SEC, USEC_PER_SEC); +#endif +} //********************************************************************************* // setPowerState @@ -4714,10 +4751,16 @@ bool IOPMprot::serialize(OSSerialize *s) const OSString * theOSString; char * buffer; char * ptr; + int buf_size; int i; bool rtn_code; - buffer = ptr = IONew(char, 2000); + // estimate how many bytes we need to present all power states + buf_size = 150 // beginning and end of string + + (275 * (int)theNumberOfPowerStates) // size per state + + 100; // extra room just for kicks + + buffer = ptr = IONew(char, buf_size); if(!buffer) return false; @@ -4729,7 +4772,7 @@ bool IOPMprot::serialize(OSSerialize *s) const if ( theNumberOfPowerStates != 0 ) { for ( i = 0; i < (int)theNumberOfPowerStates; i++ ) { - ptr += sprintf(ptr,"power state %d = { ",i); + ptr += sprintf(ptr, "power state %d = { ",i); ptr += sprintf(ptr,"capabilityFlags %08x, ",(unsigned int)thePowerStates[i].capabilityFlags); ptr += sprintf(ptr,"outputPowerCharacter %08x, ",(unsigned int)thePowerStates[i].outputPowerCharacter); ptr += sprintf(ptr,"inputPowerRequirement %08x, ",(unsigned int)thePowerStates[i].inputPowerRequirement); @@ -4752,7 +4795,7 @@ bool IOPMprot::serialize(OSSerialize *s) const theOSString = OSString::withCString(buffer); rtn_code = theOSString->serialize(s); theOSString->release(); - IODelete(buffer, char, 2000); + IODelete(buffer, char, buf_size); return rtn_code; } diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h index ad61f2485..9798d7eba 100644 --- a/iokit/Kernel/IOServicePrivate.h +++ b/iokit/Kernel/IOServicePrivate.h @@ -32,7 +32,8 @@ // options for getExistingServices() enum { - kIONotifyOnce = 0x00000001 + kIONotifyOnce = 0x00000001, + kIOServiceExistingSet = 0x00000002 }; // masks for __state[1] @@ -98,7 +99,7 @@ class _IOServiceInterestNotifier : public IONotifier OSDeclareDefaultStructors(_IOServiceInterestNotifier) public: - OSArray * whence; + queue_chain_t chain; IOServiceInterestHandler handler; void * target; @@ -120,11 +121,9 @@ class _IOConfigThread : public OSObject OSDeclareDefaultStructors(_IOConfigThread) public: - IOThread thread; - virtual void free(); - static _IOConfigThread * configThread( void ); + static void configThread( void ); static void main( _IOConfigThread * self ); }; diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index ab928aefb..010c14c59 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -27,6 +27,7 @@ */ #include +#include #include #include #include @@ -39,10 +40,11 @@ #include +#include "IOKitKernelInternal.h" + extern "C" { extern void OSlibkernInit (void); -extern void IOLibInit(void); #include @@ -77,8 +79,6 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) OSCollectionIterator * kmodIter; // must release OSString * kmodName; // don't release - IOLog( iokit_version ); - if( PE_parse_boot_arg( "io", &debugFlags )) gIOKitDebug = debugFlags; @@ -126,8 +126,13 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) "an invalid version.\n", kmodName->getCStringNoCopy()); } - if (KERN_SUCCESS != kmod_create_fake(kmodName->getCStringNoCopy(), - kmodVersion->getCStringNoCopy())) { + + // empty version strings get replaced with current kernel version + const char *vers = (strlen(kmodVersion->getCStringNoCopy()) + ? kmodVersion->getCStringNoCopy() + : osrelease); + + if (KERN_SUCCESS != kmod_create_fake(kmodName->getCStringNoCopy(), vers)) { panic("Failure declaring in-kernel kmod \"%s\".\n", kmodName->getCStringNoCopy()); } @@ -145,7 +150,7 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) IOUserClient::initialize(); IOMemoryDescriptor::initialize(); - obj = OSString::withCString( iokit_version ); + obj = OSString::withCString( version ); assert( obj ); if( obj ) { root->setProperty( kIOKitBuildVersionKey, obj ); diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp index fc6f7d091..b81757f38 100644 --- a/iokit/Kernel/IOTimerEventSource.cpp +++ b/iokit/Kernel/IOTimerEventSource.cpp @@ -57,6 +57,11 @@ OSMetaClassDefineReservedUnused(IOTimerEventSource, 5); OSMetaClassDefineReservedUnused(IOTimerEventSource, 6); OSMetaClassDefineReservedUnused(IOTimerEventSource, 7); +// +// reserved != 0 means IOTimerEventSource::timeoutAndRelease is being used, +// not a subclassed implementation. +// + bool IOTimerEventSource::checkForWork() { return false; } // Timeout handler function. This function is called by the kernel when @@ -66,22 +71,59 @@ void IOTimerEventSource::timeout(void *self) { IOTimerEventSource *me = (IOTimerEventSource *) self; - if (me->enabled) { - Action doit = (Action) me->action; + if (me->enabled && me->action) + { + IOWorkLoop * + wl = me->workLoop; + if (wl) + { + Action doit; + wl->closeGate(); + doit = (Action) me->action; + if (doit && me->enabled && AbsoluteTime_to_scalar(&me->abstime)) + { + IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), + (unsigned int) doit, (unsigned int) me->owner); + (*doit)(me->owner, me); + } + wl->openGate(); + } + } +} - if (doit) { - IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), - (unsigned int) doit, (unsigned int) me->owner); - me->closeGate(); - (*doit)(me->owner, me); - me->openGate(); +void IOTimerEventSource::timeoutAndRelease(void * self, void * count) +{ + IOTimerEventSource *me = (IOTimerEventSource *) self; + + if (me->enabled && me->action) + { + IOWorkLoop * + wl = me->reserved->workLoop; + if (wl) + { + Action doit; + wl->closeGate(); + doit = (Action) me->action; + if (doit && (me->reserved->calloutGeneration == (SInt32) count)) + { + IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), + (unsigned int) doit, (unsigned int) me->owner); + (*doit)(me->owner, me); + } + wl->openGate(); } } + + me->reserved->workLoop->release(); + me->release(); } void IOTimerEventSource::setTimeoutFunc() { - calloutEntry = (void *) thread_call_allocate((thread_call_func_t) timeout, + // reserved != 0 means IOTimerEventSource::timeoutAndRelease is being used, + // not a subclassed implementation + reserved = IONew(ExpansionData, 1); + calloutEntry = (void *) thread_call_allocate((thread_call_func_t) &IOTimerEventSource::timeoutAndRelease, (thread_call_param_t) this); } @@ -117,13 +159,23 @@ void IOTimerEventSource::free() thread_call_free((thread_call_t) calloutEntry); } + if (reserved) + IODelete(reserved, ExpansionData, 1); + super::free(); } void IOTimerEventSource::cancelTimeout() { - thread_call_cancel((thread_call_t) calloutEntry); + if (reserved) + reserved->calloutGeneration++; + bool active = thread_call_cancel((thread_call_t) calloutEntry); AbsoluteTime_to_scalar(&abstime) = 0; + if (active && reserved) + { + release(); + workLoop->release(); + } } void IOTimerEventSource::enable() @@ -135,13 +187,20 @@ void IOTimerEventSource::enable() void IOTimerEventSource::disable() { - thread_call_cancel((thread_call_t) calloutEntry); + if (reserved) + reserved->calloutGeneration++; + bool active = thread_call_cancel((thread_call_t) calloutEntry); super::disable(); + if (active && reserved) + { + release(); + workLoop->release(); + } } IOReturn IOTimerEventSource::setTimeoutTicks(UInt32 ticks) { - return setTimeout(ticks, NSEC_PER_SEC/hz); + return setTimeout(ticks, kTickScale); } IOReturn IOTimerEventSource::setTimeoutMS(UInt32 ms) @@ -187,7 +246,7 @@ IOReturn IOTimerEventSource::setTimeout(AbsoluteTime interval) IOReturn IOTimerEventSource::wakeAtTimeTicks(UInt32 ticks) { - return wakeAtTime(ticks, NSEC_PER_SEC/hz); + return wakeAtTime(ticks, kTickScale); } IOReturn IOTimerEventSource::wakeAtTimeMS(UInt32 ms) @@ -200,35 +259,58 @@ IOReturn IOTimerEventSource::wakeAtTimeUS(UInt32 us) return wakeAtTime(us, kMicrosecondScale); } -IOReturn IOTimerEventSource::wakeAtTime(UInt32 abstime, UInt32 scale_factor) +IOReturn IOTimerEventSource::wakeAtTime(UInt32 inAbstime, UInt32 scale_factor) { AbsoluteTime end; - clock_interval_to_absolutetime_interval(abstime, scale_factor, &end); + clock_interval_to_absolutetime_interval(inAbstime, scale_factor, &end); return wakeAtTime(end); } -IOReturn IOTimerEventSource::wakeAtTime(mach_timespec_t abstime) +IOReturn IOTimerEventSource::wakeAtTime(mach_timespec_t inAbstime) { AbsoluteTime end, nsecs; clock_interval_to_absolutetime_interval - (abstime.tv_nsec, kNanosecondScale, &nsecs); + (inAbstime.tv_nsec, kNanosecondScale, &nsecs); clock_interval_to_absolutetime_interval - (abstime.tv_sec, kSecondScale, &end); + (inAbstime.tv_sec, kSecondScale, &end); ADD_ABSOLUTETIME(&end, &nsecs); return wakeAtTime(end); } +void IOTimerEventSource::setWorkLoop(IOWorkLoop *inWorkLoop) +{ + super::setWorkLoop(inWorkLoop); + if ( enabled && AbsoluteTime_to_scalar(&abstime) && workLoop ) + wakeAtTime(abstime); +} + IOReturn IOTimerEventSource::wakeAtTime(AbsoluteTime inAbstime) { if (!action) return kIOReturnNoResources; abstime = inAbstime; - if ( enabled && AbsoluteTime_to_scalar(&abstime) ) - thread_call_enter_delayed((thread_call_t) calloutEntry, abstime); + if ( enabled && AbsoluteTime_to_scalar(&abstime) && workLoop ) + { + if (reserved) + { + retain(); + workLoop->retain(); + reserved->workLoop = workLoop; + reserved->calloutGeneration++; + if (thread_call_enter1_delayed((thread_call_t) calloutEntry, + (void *) reserved->calloutGeneration, abstime)) + { + release(); + workLoop->release(); + } + } + else + thread_call_enter_delayed((thread_call_t) calloutEntry, abstime); + } return kIOReturnSuccess; } diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index bb74fff3e..4a3df066f 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,6 +55,8 @@ extern kern_return_t iokit_destroy_object_port( ipc_port_t port ); extern mach_port_name_t iokit_make_send_right( task_t task, io_object_t obj, ipc_kobject_type_t type ); +extern kern_return_t iokit_mod_send_right( task_t task, mach_port_name_t name, mach_port_delta_t delta ); + extern io_object_t iokit_lookup_connect_ref(io_object_t clientRef, ipc_space_t task); extern io_object_t iokit_lookup_connect_ref_current_task(io_object_t clientRef); @@ -66,6 +68,7 @@ extern void iokit_release_port( ipc_port_t port ); extern kern_return_t iokit_switch_object_port( ipc_port_t port, io_object_t obj, ipc_kobject_type_t type ); +#include #include } /* extern "C" */ @@ -1015,13 +1018,100 @@ kern_return_t is_io_object_get_class( io_object_t object, io_name_t className ) { + const OSMetaClass* my_obj = NULL; + if( !object) return( kIOReturnBadArgument ); - - strcpy( className, object->getMetaClass()->getClassName()); + + my_obj = object->getMetaClass(); + if (!my_obj) { + return (kIOReturnNotFound); + } + + strcpy( className, my_obj->getClassName()); return( kIOReturnSuccess ); } +/* Routine io_object_get_superclass */ +kern_return_t is_io_object_get_superclass( + mach_port_t master_port, + io_name_t obj_name, + io_name_t class_name) +{ + const OSMetaClass* my_obj = NULL; + const OSMetaClass* superclass = NULL; + const OSSymbol *my_name = NULL; + const char *my_cstr = NULL; + + if (!obj_name || !class_name) + return (kIOReturnBadArgument); + + if( master_port != master_device_port) + return( kIOReturnNotPrivileged); + + my_name = OSSymbol::withCString(obj_name); + + if (my_name) { + my_obj = OSMetaClass::getMetaClassWithName(my_name); + my_name->release(); + } + if (my_obj) { + superclass = my_obj->getSuperClass(); + } + + if (!superclass) { + return( kIOReturnNotFound ); + } + + my_cstr = superclass->getClassName(); + + if (my_cstr) { + strncpy(class_name, my_cstr, sizeof(io_name_t)-1); + return( kIOReturnSuccess ); + } + return (kIOReturnNotFound); +} + +/* Routine io_object_get_bundle_identifier */ +kern_return_t is_io_object_get_bundle_identifier( + mach_port_t master_port, + io_name_t obj_name, + io_name_t bundle_name) +{ + const OSMetaClass* my_obj = NULL; + const OSSymbol *my_name = NULL; + const OSSymbol *identifier = NULL; + const char *my_cstr = NULL; + + if (!obj_name || !bundle_name) + return (kIOReturnBadArgument); + + if( master_port != master_device_port) + return( kIOReturnNotPrivileged); + + my_name = OSSymbol::withCString(obj_name); + + if (my_name) { + my_obj = OSMetaClass::getMetaClassWithName(my_name); + my_name->release(); + } + + if (my_obj) { + identifier = my_obj->getKmodName(); + } + if (!identifier) { + return( kIOReturnNotFound ); + } + + my_cstr = identifier->getCStringNoCopy(); + if (my_cstr) { + strncpy(bundle_name, identifier->getCStringNoCopy(), sizeof(io_name_t)-1); + return( kIOReturnSuccess ); + } + + return (kIOReturnBadArgument); +} + /* Routine io_object_conforms_to */ kern_return_t is_io_object_conforms_to( io_object_t object, @@ -1124,8 +1214,10 @@ kern_return_t is_io_service_match_property_table_ool( { kern_return_t kr; vm_offset_t data; + vm_map_offset_t map_data; - kr = vm_map_copyout( kernel_map, &data, (vm_map_copy_t) matching ); + kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) matching ); + data = CAST_DOWN(vm_offset_t, map_data); if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds @@ -1174,8 +1266,10 @@ kern_return_t is_io_service_get_matching_services_ool( { kern_return_t kr; vm_offset_t data; + vm_map_offset_t map_data; - kr = vm_map_copyout( kernel_map, &data, (vm_map_copy_t) matching ); + kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) matching ); + data = CAST_DOWN(vm_offset_t, map_data); if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds @@ -1275,8 +1369,10 @@ kern_return_t is_io_service_add_notification_ool( { kern_return_t kr; vm_offset_t data; + vm_map_offset_t map_data; - kr = vm_map_copyout( kernel_map, &data, (vm_map_copy_t) matching ); + kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) matching ); + data = CAST_DOWN(vm_offset_t, map_data); if( KERN_SUCCESS == kr) { // must return success after vm_map_copyout() succeeds @@ -1558,7 +1654,7 @@ static kern_return_t copyoutkdata( void * data, vm_size_t len, kern_return_t err; vm_map_copy_t copy; - err = vm_map_copyin( kernel_map, (vm_offset_t) data, len, + err = vm_map_copyin( kernel_map, CAST_USER_ADDR_T(data), len, false /* src_destroy */, ©); assert( err == KERN_SUCCESS ); @@ -1752,10 +1848,12 @@ kern_return_t is_io_registry_entry_set_properties kern_return_t err; IOReturn res; vm_offset_t data; + vm_map_offset_t map_data; CHECK( IORegistryEntry, registry_entry, entry ); - err = vm_map_copyout( kernel_map, &data, (vm_map_copy_t) properties ); + err = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) properties ); + data = CAST_DOWN(vm_offset_t, map_data); if( KERN_SUCCESS == err) { @@ -2014,14 +2112,28 @@ kern_return_t is_io_connect_unmap_memory( map = memory->map( task, mapAddr, options ); memory->release(); - if( map) { + if( map) + { IOLockLock( gIOObjectPortLock); if( client->mappings) client->mappings->removeObject( map); IOLockUnlock( gIOObjectPortLock); - IOMachPort::releasePortForObject( map, IKOT_IOKIT_OBJECT ); - map->release(); - } else + + mach_port_name_t name = 0; + if (task != current_task()) + name = IOMachPort::makeSendRightForTask( task, map, IKOT_IOKIT_OBJECT ); + if (name) + { + map->unmap(); + err = iokit_mod_send_right( task, name, -2 ); + err = kIOReturnSuccess; + } + else + IOMachPort::releasePortForObject( map, IKOT_IOKIT_OBJECT ); + if (task == current_task()) + map->release(); + } + else err = kIOReturnBadArgument; } @@ -2698,8 +2810,12 @@ kern_return_t is_io_catalog_send_data( if(flag != kIOCatalogRemoveKernelLinker && ( !inData || !inDataCount) ) return kIOReturnBadArgument; - if (data) { - kr = vm_map_copyout( kernel_map, &data, (vm_map_copy_t)inData); + if (inData) { + vm_map_offset_t map_data; + + kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t)inData); + data = CAST_DOWN(vm_offset_t, map_data); + if( kr != KERN_SUCCESS) return kr; @@ -2869,10 +2985,11 @@ kern_return_t is_io_catalog_get_data( vm_size_t size; size = s->getLength(); - kr = vm_allocate(kernel_map, &data, size, true); + kr = vm_allocate(kernel_map, &data, size, VM_FLAGS_ANYWHERE); if ( kr == kIOReturnSuccess ) { bcopy(s->text(), (void *)data, size); - kr = vm_map_copyin(kernel_map, data, size, true, ©); + kr = vm_map_copyin(kernel_map, (vm_map_address_t)data, + (vm_map_size_t)size, true, ©); *outData = (char *)copy; *outDataCount = size; } @@ -2938,19 +3055,17 @@ kern_return_t is_io_catalog_reset( return kIOReturnSuccess; } -kern_return_t iokit_user_client_trap(io_object_t userClientRef, UInt32 index, - void *p1, void *p2, void *p3, - void *p4, void *p5, void *p6) +kern_return_t iokit_user_client_trap(struct iokit_user_client_trap_args *args) { kern_return_t result = kIOReturnBadArgument; IOUserClient *userClient; if ((userClient = OSDynamicCast(IOUserClient, - iokit_lookup_connect_ref_current_task(userClientRef)))) { + iokit_lookup_connect_ref_current_task((OSObject *)(args->userClientRef))))) { IOExternalTrap *trap; IOService *target = NULL; - trap = userClient->getTargetAndTrapForIndex(&target, index); + trap = userClient->getTargetAndTrapForIndex(&target, args->index); if (trap && target) { IOTrap func; @@ -2958,7 +3073,7 @@ kern_return_t iokit_user_client_trap(io_object_t userClientRef, UInt32 index, func = trap->func; if (func) { - result = (target->*func)(p1, p2, p3, p4, p5, p6); + result = (target->*func)(args->p1, args->p2, args->p3, args->p4, args->p5, args->p6); } } diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 70fd5adcf..4108eaac9 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -57,12 +57,6 @@ static inline bool ISSETP(void *addr, unsigned int flag) #define fFlags loopRestart -void IOWorkLoop::launchThreadMain(void *self) -{ - thread_set_cont_arg((int) self); - threadMainContinuation(); -} - bool IOWorkLoop::init() { // The super init and gateLock allocation MUST be done first @@ -90,7 +84,7 @@ bool IOWorkLoop::init() if (addEventSource(controlG) != kIOReturnSuccess) return false; - workThread = IOCreateThread(launchThreadMain, (void *) this); + workThread = IOCreateThread((thread_continue_t)threadMainContinuation, this); if (!workThread) return false; @@ -246,12 +240,9 @@ do { \ #endif /* KDEBUG */ -void IOWorkLoop::threadMainContinuation() +void IOWorkLoop::threadMainContinuation(IOWorkLoop *self) { - IOWorkLoop* self; - self = (IOWorkLoop *) thread_get_cont_arg(); - - self->threadMain(); + self->threadMain(); } void IOWorkLoop::threadMain() @@ -294,8 +285,7 @@ void IOWorkLoop::threadMain() assert_wait((void *) &workToDo, false); IOSimpleLockUnlockEnableInterrupt(workToDoLock, is); - thread_set_cont_arg((int) this); - thread_block(&threadMainContinuation); + thread_block_parameter((thread_continue_t)threadMainContinuation, this); /* NOTREACHED */ } diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index d28948a31..923831637 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -38,6 +38,17 @@ OSDefineMetaClassAndStructors(RootDomainUserClient, IOUserClient) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +bool RootDomainUserClient::initWithTask(task_t owningTask, void *security_id, UInt32) +{ + if (!super::init()) + return false; + + fOwningTask = owningTask; + task_reference (fOwningTask); + return true; +} + + bool RootDomainUserClient::start( IOService * provider ) { assert(OSDynamicCast(IOPMrootDomain, provider)); @@ -49,10 +60,63 @@ bool RootDomainUserClient::start( IOService * provider ) return true; } +IOReturn RootDomainUserClient::secureSleepSystem( int *return_code ) +{ + int local_priv = 0; + int admin_priv = 0; + IOReturn ret = kIOReturnNotPrivileged; + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeLocalUser); + local_priv = (kIOReturnSuccess == ret); + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + admin_priv = (kIOReturnSuccess == ret); + + if((local_priv || admin_priv) && fOwner) { + *return_code = fOwner->sleepSystem(); + return kIOReturnSuccess; + } else { + *return_code = kIOReturnNotPrivileged; + return kIOReturnSuccess; + } + +} + +IOReturn RootDomainUserClient::secureSetAggressiveness( + unsigned long type, + unsigned long newLevel, + int *return_code ) +{ + int local_priv = 0; + int admin_priv = 0; + IOReturn ret = kIOReturnNotPrivileged; + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeLocalUser); + local_priv = (kIOReturnSuccess == ret); + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + admin_priv = (kIOReturnSuccess == ret); + + if((local_priv || admin_priv) && fOwner) { + *return_code = fOwner->setAggressiveness(type, newLevel); + return kIOReturnSuccess; + } else { + *return_code = kIOReturnNotPrivileged; + return kIOReturnSuccess; + } + +} + IOReturn RootDomainUserClient::clientClose( void ) { detach(fOwner); + + if(fOwningTask) { + task_deallocate(fOwningTask); + fOwningTask = 0; + } + return kIOReturnSuccess; } @@ -61,13 +125,13 @@ RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 i { static IOExternalMethod sMethods[] = { { // kPMSetAggressiveness, 0 - 0, (IOMethod)&IOPMrootDomain::setAggressiveness, kIOUCScalarIScalarO, 2, 0 + 1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1 }, { // kPMGetAggressiveness, 1 0, (IOMethod)&IOPMrootDomain::getAggressiveness, kIOUCScalarIScalarO, 1, 1 }, { // kPMSleepSystem, 2 - 0, (IOMethod)&IOPMrootDomain::sleepSystem, kIOUCScalarIScalarO, 0, 0 + 1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1 }, { // kPMAllowPowerChange, 3 0, (IOMethod)&IOPMrootDomain::allowPowerChange, kIOUCScalarIScalarO, 1, 0 @@ -85,7 +149,7 @@ RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 i 1, (IOMethod) &RootDomainUserClient::setPreventative, kIOUCScalarIScalarO, 2, 0 }, }; - + if(index >= kNumPMMethods) return NULL; else { diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index 3d6762299..b4172ea26 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -42,6 +42,10 @@ class RootDomainUserClient : public IOUserClient private: IOPMrootDomain * fOwner; + task_t fOwningTask; + + IOReturn secureSleepSystem( int *return_code ); + IOReturn secureSetAggressiveness( unsigned long type, unsigned long newLevel, int *return_code ); public: @@ -51,6 +55,8 @@ public: virtual bool start( IOService * provider ); + virtual bool initWithTask(task_t owningTask, void *security_id, UInt32); + void setPreventative(UInt32 on_off, UInt32 types_of_sleep); }; diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp index 17727686a..9814f8ac9 100644 --- a/iokit/KernelConfigTables.cpp +++ b/iokit/KernelConfigTables.cpp @@ -23,23 +23,25 @@ /* This list is used in IOStartIOKit.cpp to declare fake kmod_info * structs for kext dependencies that are built into the kernel. + * Empty version strings get replaced with osrelease at runtime. */ const char * gIOKernelKmods = "{" -" 'com.apple.kernel' = '7.9.0';" -" 'com.apple.kpi.bsd' = '7.9.0';" -" 'com.apple.kpi.iokit' = '7.9.0';" -" 'com.apple.kpi.libkern' = '7.9.0';" -" 'com.apple.kpi.mach' = '7.9.0';" -" 'com.apple.iokit.IONVRAMFamily' = '7.9.0';" -" 'com.apple.driver.AppleNMI' = '7.9.0';" -" 'com.apple.iokit.IOSystemManagementFamily' = '7.9.0';" -" 'com.apple.iokit.ApplePlatformFamily' = '7.9.0';" -" 'com.apple.kernel.6.0' = '6.9.9';" -" 'com.apple.kernel.bsd' = '6.9.9';" -" 'com.apple.kernel.iokit' = '6.9.9';" -" 'com.apple.kernel.libkern' = '6.9.9';" -" 'com.apple.kernel.mach' = '6.9.9';" + "'com.apple.kernel' = '';" + "'com.apple.kpi.bsd' = '';" + "'com.apple.kpi.iokit' = '';" + "'com.apple.kpi.libkern' = '';" + "'com.apple.kpi.mach' = '';" + "'com.apple.kpi.unsupported' = '';" + "'com.apple.iokit.IONVRAMFamily' = '';" + "'com.apple.driver.AppleNMI' = '';" + "'com.apple.iokit.IOSystemManagementFamily' = '';" + "'com.apple.iokit.ApplePlatformFamily' = '';" + "'com.apple.kernel.6.0' = '7.9.9';" + "'com.apple.kernel.bsd' = '7.9.9';" + "'com.apple.kernel.iokit' = '7.9.9';" + "'com.apple.kernel.libkern' = '7.9.9';" + "'com.apple.kernel.mach' = '7.9.9';" "}"; diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index 571ea90f1..d93930dd0 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -26,8 +26,6 @@ #include #include -#include - extern "C" { #include @@ -75,6 +73,12 @@ OSDictionary * IOBSDNameMatching( const char * name ) return( 0 ); } +OSDictionary * IOUUIDMatching( void ) +{ + return IOService::resourceMatching( "boot-uuid-media" ); +} + + OSDictionary * IOCDMatching( void ) { OSDictionary * dict; @@ -255,7 +259,6 @@ OSDictionary * IODiskMatching( const char * path, char * buf, int maxLen ) long partition = -1; long lun = -1; char c; - const char * partitionSep = NULL; // scan the tail of the path for "@unit:partition" do { @@ -269,24 +272,12 @@ OSDictionary * IODiskMatching( const char * path, char * buf, int maxLen ) if( *(--look) == c) { if( c == ':') { partition = strtol( look + 1, 0, 0 ); - partitionSep = look; c = '@'; } else if( c == '@') { - int diff = -1; - - unit = strtol( look + 1, 0, 16 ); - - diff = (int)partitionSep - (int)look; - if ( diff > 0 ) { - - for ( ; diff > 0; diff-- ) - { - if( look[diff] == ',' ) - { - lun = strtol ( &look[diff + 1], 0, 16 ); - break; - } - } + unit = strtol( look + 1, &comp, 16 ); + + if( *comp == ',') { + lun = strtol( comp + 1, 0, 16 ); } c = '/'; @@ -343,11 +334,53 @@ OSDictionary * IODiskMatching( const char * path, char * buf, int maxLen ) OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen ) { + OSDictionary * matching; + OSString * str; + char * comp; + int len; + /* need to look up path, get device type, call matching help based on device type */ - return( IODiskMatching( path, buf, maxLen )); + matching = IODiskMatching( path, buf, maxLen ); + if( matching) + return( matching ); + + do { + + len = strlen( kIODeviceTreePlane ":" ); + maxLen -= len; + if( maxLen < 0) + continue; + + strcpy( buf, kIODeviceTreePlane ":" ); + comp = buf + len; + + len = strlen( path ); + maxLen -= len; + if( maxLen < 0) + continue; + strncpy( comp, path, len ); + comp[ len ] = 0; + + matching = OSDictionary::withCapacity( 1 ); + if( !matching) + continue; + + str = OSString::withCString( buf ); + if( !str) + continue; + matching->setObject( kIOPathMatchKey, str ); + str->release(); + + return( matching ); + + } while( false ); + if( matching) + matching->release(); + + return( 0 ); } IOService * IOFindMatchingChild( IOService * service ) @@ -403,6 +436,7 @@ kern_return_t IOFindBSDRoot( char * rootName, UInt32 flags = 0; int minor, major; bool findHFSChild = false; + char * mediaProperty = 0; char * rdBootVar; enum { kMaxPathBuf = 512, kMaxBootVar = 128 }; char * str; @@ -410,10 +444,11 @@ kern_return_t IOFindBSDRoot( char * rootName, int len; bool forceNet = false; bool debugInfoPrintedOnce = false; + const char * uuidStr = NULL; static int mountAttempts = 0; - int xchar, dchar; + int xchar, dchar; if( mountAttempts++) @@ -429,19 +464,39 @@ kern_return_t IOFindBSDRoot( char * rootName, rdBootVar[0] = 0; do { - if( (regEntry = IORegistryEntry::fromPath( "/chosen", gIODTPlane ))) { - data = (OSData *) regEntry->getProperty( "rootpath" ); - regEntry->release(); - if( data) continue; + if( (regEntry = IORegistryEntry::fromPath( "/chosen", gIODTPlane ))) { + data = (OSData *) regEntry->getProperty( "boot-uuid" ); + if( data) { + uuidStr = (const char*)data->getBytesNoCopy(); + OSString *uuidString = OSString::withCString( uuidStr ); + + // match the boot-args boot-uuid processing below + if( uuidString) { + IOLog("rooting via boot-uuid from /chosen: %s\n", uuidStr); + IOService::publishResource( "boot-uuid", uuidString ); + uuidString->release(); + matching = IOUUIDMatching(); + mediaProperty = "boot-uuid-media"; + regEntry->release(); + continue; + } else { + uuidStr = NULL; } + } + + // else try for an OF Path + data = (OSData *) regEntry->getProperty( "rootpath" ); + regEntry->release(); + if( data) continue; + } if( (regEntry = IORegistryEntry::fromPath( "/options", gIODTPlane ))) { - data = (OSData *) regEntry->getProperty( "boot-file" ); - regEntry->release(); - if( data) continue; - } + data = (OSData *) regEntry->getProperty( "boot-file" ); + regEntry->release(); + if( data) continue; + } } while( false ); - if( data) + if( data && !uuidStr) look = (const char *) data->getBytesNoCopy(); if( rdBootVar[0] == '*') { @@ -527,6 +582,26 @@ kern_return_t IOFindBSDRoot( char * rootName, } else if ( strncmp( look, "cdrom", strlen( "cdrom" )) == 0 ) { matching = IOCDMatching(); findHFSChild = true; + } else if ( strncmp( look, "uuid", strlen( "uuid" )) == 0 ) { + char *uuid; + OSString *uuidString; + + uuid = (char *)IOMalloc( kMaxBootVar ); + + if ( uuid ) { + if (!PE_parse_boot_arg( "boot-uuid", uuid )) { + panic( "rd=uuid but no boot-uuid= specified" ); + } + uuidString = OSString::withCString( uuid ); + if ( uuidString ) { + IOService::publishResource( "boot-uuid", uuidString ); + uuidString->release(); + IOLog( "\nWaiting for boot volume with UUID %s\n", uuid ); + matching = IOUUIDMatching(); + mediaProperty = "boot-uuid-media"; + } + IOFree( uuid, kMaxBootVar ); + } } else { matching = IOBSDNameMatching( look ); } @@ -593,6 +668,8 @@ kern_return_t IOFindBSDRoot( char * rootName, // look for a subservice with an Apple_HFS child IOService * subservice = IOFindMatchingChild( service ); if ( subservice ) service = subservice; + } else if ( service && mediaProperty ) { + service = service->getProperty(mediaProperty); } major = 0; diff --git a/iokit/conf/Makefile.i386 b/iokit/conf/Makefile.i386 index 2f6232c14..1f8be4145 100644 --- a/iokit/conf/Makefile.i386 +++ b/iokit/conf/Makefile.i386 @@ -2,6 +2,33 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# Enable -Werror for i386 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR= \ + ioconf.o \ + UNDRequest.o \ + IOLib.o \ + IOStringFuncs.o \ + IOCPU.cpo \ + IOCommandPool.cpo \ + IOCommandQueue.cpo \ + IOKitBSDInit.cpo \ + IOInterruptController.cpo \ + IOInterruptEventSource.cpo \ + IOPMPowerStateQueue.cpo \ + IOPMchangeNoteList.cpo \ + IOPMrootDomain.cpo \ + IOServicePM.cpo \ + IOWorkLoop.cpo \ + RootDomainUserClient.cpo + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index bf9b2d38b..b24d19114 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -31,6 +31,10 @@ CFLAGS+= -DKERNEL -DDRIVER_PRIVATE \ -DIOMATCHDEBUG=1 -DIOALLOCDEBUG=1 \ #-DIOKITDEBUG=-1 +CWARNFLAGS += -Wno-unused-parameter -Wno-redundant-decls -Wno-nested-externs -Wno-write-strings +MWARNFLAGS += -Wno-unused-parameter -Wno-redundant-decls -Wno-nested-externs -Wno-write-strings +CXXWARNFLAGS += -Wno-unused-parameter -Wno-redundant-decls -Wno-write-strings -Wno-cast-qual -Wno-shadow + CFLAGS_RELEASE += -DIOASSERT=0 CFLAGS_DEBUG += -DIOASSERT=1 @@ -47,14 +51,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -90,12 +86,9 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "creating $(COMPONENT).o" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c + @echo [ updating $(COMPONENT).o ${IOKIT_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} do_depend: do_all ${MD} -u Makedep -f -d `ls *.d` diff --git a/iokit/conf/files b/iokit/conf/files index 29fc19011..f4aa60ae1 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -5,6 +5,7 @@ OPTIONS/kdebug optional kdebug # libIOKit + iokit/Kernel/IOLib.c optional iokitcpp iokit/Kernel/IOLocks.cpp optional iokitcpp iokit/Kernel/IOConditionLock.cpp optional iokitcpp @@ -25,7 +26,6 @@ iokit/Kernel/IOPMPowerStateQueue.cpp optional iokitcpp iokit/Kernel/IOCatalogue.cpp optional iokitcpp iokit/Kernel/IOPMPowerSource.cpp optional iokitcpp iokit/Kernel/IOPMPowerSourceList.cpp optional iokitcpp -iokit/Kernel/IOPMPagingPlexus.cpp optional iokitcpp iokit/Kernel/IOWorkLoop.cpp optional iokitcpp iokit/Kernel/IOEventSource.cpp optional iokitcpp diff --git a/iokit/conf/tools/Makefile b/iokit/conf/tools/Makefile index 9df86ce8c..4f9ccd553 100644 --- a/iokit/conf/tools/Makefile +++ b/iokit/conf/tools/Makefile @@ -7,13 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - newvers +SETUP_SUBDIRS = doconf -COMP_SUBDIRS = \ - doconf \ - newvers +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/iokit/conf/tools/newvers/Makefile b/iokit/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/iokit/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/iokit/conf/tools/newvers/newvers.csh b/iokit/conf/tools/newvers/newvers.csh deleted file mode 100644 index 381446bb2..000000000 --- a/iokit/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"IOKit Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"IOKit\";" ; - /bin/echo "char ${COMPONENT}_builder[] = \"$w\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/iokit/conf/version.major b/iokit/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/iokit/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/iokit/conf/version.minor b/iokit/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/iokit/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/iokit/conf/version.variant b/iokit/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/iokit/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/iokit/include/mach/mach.h b/iokit/include/mach/mach.h index 21fc91eb4..2289426e2 100644 --- a/iokit/include/mach/mach.h +++ b/iokit/include/mach/mach.h @@ -19,7 +19,7 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#warning include is going away, please don't use it anymore. +#warning include is going away, please do not use it anymore. #include diff --git a/iokit/mach-o/mach_header.h b/iokit/mach-o/mach_header.h index 4d14d1380..9dc84d842 100644 --- a/iokit/mach-o/mach_header.h +++ b/iokit/mach-o/mach_header.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,7 +22,14 @@ /* * File: kern/mach_header.h * - * Definitions for accessing mach-o headers. + * Definitions for accessing mach-o headers. This header wraps the + * routines defined in osfmk/mach-o/mach_header.c; this is made clear + * by the existance of the getsectcmdsymtabfromheader() prototype. + * + * NOTE: The functions prototyped by this header only operate againt + * 32 bit mach headers. Many of these functions imply the + * currently running kernel, and cannot be used against mach + * headers other than that of the currently running kernel. * * HISTORY * 29-Jan-92 Mike DeMoney (mike@next.com) @@ -46,17 +53,17 @@ struct segment_command *nextseg(struct segment_command *sgp); struct segment_command *nextsegfromheader( struct mach_header *header, struct segment_command *seg); -struct segment_command *getsegbyname(char *seg_name); +struct segment_command *getsegbyname(const char *seg_name); struct segment_command *getsegbynamefromheader( struct mach_header *header, - char *seg_name); -void *getsegdatafromheader(struct mach_header *, char *, int *); -struct section *getsectbyname(char *seg_name, char *sect_name); + const char *seg_name); +void *getsegdatafromheader(struct mach_header *, const char *, int *); +struct section *getsectbyname(const char *seg_name, const char *sect_name); struct section *getsectbynamefromheader( struct mach_header *header, char *seg_name, char *sect_name); -void *getsectdatafromheader(struct mach_header *, char *, char *, int *); +void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); struct section *firstsect(struct segment_command *sgp); struct section *nextsect(struct segment_command *sgp, struct section *sp); struct fvmlib_command *fvmlib(void); diff --git a/kgmacros b/kgmacros index 935ec81b3..2356ffe9e 100644 --- a/kgmacros +++ b/kgmacros @@ -6,8 +6,26 @@ # # All the convenience variables used by these macros begin with $kgm_ +define showversion +#Display version string, a pointer to which is pinned at 0x501C in the kernel's +#low memory globals + p (char *) *0x501c +end + +document showversion +Syntax: showversion +| Read the kernel version string from a fixed address in low +| memory. Useful if you don't know which kernel is on the other end, +| and need to find the appropriate symbols. Beware that if you've +| loaded a symbol file, but aren't connected to a remote target, +| the version string from the symbol file will be displayed instead. +| This macro expects to be connected to the remote kernel to function +| correctly. +end + set $kgm_dummy = &proc0 set $kgm_dummy = &kmod +set $kgm_mtype = ((struct mach_header)_mh_execute_header).cputype echo Loading Kernel GDB Macros package. Type "help kgm" for more info.\n @@ -24,6 +42,7 @@ document kgm | (gdb) attach | | The following macros are available in this package: +| showversion Displays a string describing the remote kernel version | | showalltasks Display a summary listing of all tasks | showallthreads Display info about all threads in the system @@ -35,6 +54,7 @@ document kgm | showallipc Display a summary listing of all the ipc spaces | showallrights Display a summary listing of all the ipc rights | showallkmods Display a summary listing of all the kernel modules +| showallclasses Display info about all OSObject subclasses in the system | | showtask Display info about the specified task | showtaskthreads Display info about the threads in the task @@ -61,15 +81,32 @@ document kgm | | dumpcallqueue Dump out all the entries given a queue head | +| showallmtx Display info about mutexes usage +| showallrwlck Display info about reader/writer locks usage +| | zprint Display info about the memory zones +| showioalloc Display info about iokit allocations | paniclog Display the panic log info | | switchtoact Switch to different context specified by activation | switchtoctx Switch to different context +| showuserstack Display numeric backtrace of the user stack for an +| activation +| +| switchtouserthread Switch to the user context of the specified thread +| resetstacks Return to the original kernel context +| | resetctx Reset context | resume_on Resume when detaching from gdb | resume_off Don't resume when detaching from gdb | +| sendcore Configure kernel to send a coredump to the specified IP +| disablecore Configure the kernel to disable coredump transmission +| switchtocorethread Corefile version of "switchtoact" +| resetcorectx Corefile version of "resetctx" +| +| kdp-reboot Restart remote target +| | Type "help " for more specific help on a particular macro. | Type "show user " to see what the macro is really doing. end @@ -169,11 +206,9 @@ end define showactint - printf " 0x%08x ", $arg0 - set $kgm_actp = *(struct thread *)$arg0 - if $kgm_actp.thread - set $kgm_thread = *$kgm_actp.thread - printf "0x%08x ", $kgm_actp.thread + printf " 0x%08x ", $arg0 + set $kgm_thread = *(struct thread *)$arg0 + printf "0x%08x ", $arg0 printf "%3d ", $kgm_thread.sched_pri set $kgm_state = $kgm_thread.state if $kgm_state & 0x80 @@ -200,7 +235,12 @@ define showactint if $kgm_state & 0x01 printf "W\t" printf "0x%08x ", $kgm_thread.wait_queue - output /a $kgm_thread.wait_event + + if ((unsigned)$kgm_thread.wait_event > (unsigned)sectPRELINKB) + showkmodaddr $kgm_thread.wait_event + else + output /a (unsigned) $kgm_thread.wait_event + end end if $arg1 != 0 if ($kgm_thread.kernel_stack != 0) @@ -208,44 +248,49 @@ define showactint printf "\n\t\treserved_stack=0x%08x", $kgm_thread.reserved_stack end printf "\n\t\tkernel_stack=0x%08x", $kgm_thread.kernel_stack - if (machine_slot[0].cpu_type == 18) - set $mysp = $kgm_actp->mact.pcb->save_r1 + if ($kgm_mtype == 18) + set $mysp = $kgm_thread.machine.pcb->save_r1 else - set $kgm_statep = (struct i386_kernel_state *)($kgm_thread->kernel_stack + 0x4000 - sizeof(stru\ -ct i386_kernel_state)) + set $kgm_statep = (struct i386_kernel_state *) \ + ($kgm_thread->kernel_stack + 0x4000 \ + - sizeof(struct i386_kernel_state)) set $mysp = $kgm_statep->k_ebp end set $prevsp = 0 printf "\n\t\tstacktop=0x%08x", $mysp - while ($mysp != 0) && (($mysp & 0xf) == 0) && ($mysp < 0xb0000000) && ($mysp > $prevsp) - printf "\n\t\t0x%08x ", $mysp - if (machine_slot[0].cpu_type == 18) - set $kgm_return = *($mysp + 8) - else - set $kgm_return = *($mysp + 4) - end - if ($kgm_return > sectPRELINKB) - showkmodaddr $kgm_return - else - if (machine_slot[0].cpu_type == 18) - output /a * ($mysp + 8) - else - output /a * ($mysp + 4) - end - end - set $prevsp = $mysp - set $mysp = * $mysp + if ($kgm_mtype == 18) + set $stkmask = 0xf + set $stklimit = 0xb0000000 + else + set $stkmask = 0x3 + set $stklimit = 0xfc000000 + end + while ($mysp != 0) && (($mysp & $stkmask) == 0) \ + && ($mysp < $stklimit) \ + && ((unsigned)$mysp > (unsigned)$prevsp) + printf "\n\t\t0x%08x ", $mysp + if ($kgm_mtype == 18) + set $kgm_return = *($mysp + 8) + else + set $kgm_return = *($mysp + 4) + end + if ((unsigned) $kgm_return > (unsigned) sectPRELINKB) + showkmodaddr $kgm_return + else + output /a (unsigned) $kgm_return + end + set $prevsp = $mysp + set $mysp = * $mysp end printf "\n\t\tstackbottom=0x%08x", $prevsp else printf "\n\t\t\tcontinuation=" - output /a $kgm_thread.continuation + output /a (unsigned) $kgm_thread.continuation end printf "\n" else printf "\n" end - end end define showact @@ -294,19 +339,17 @@ document showallthreads end define showcurrentthreads -set $kgm_ncpus = machine_info.max_cpus -set $kgm_i = 0 - while $kgm_i < $kgm_ncpus - set $kgm_prp = processor_ptr[$kgm_i] - if ($kgm_prp != 0) && (($kgm_prp)->active_thread != 0) - set $kgm_actp = (($kgm_prp)->active_thread)->top_act +set $kgm_prp = processor_list + while $kgm_prp != 0 + if ($kgm_prp)->active_thread != 0 + set $kgm_actp = ($kgm_prp)->active_thread showtaskheader showtaskint ($kgm_actp)->task showactheader showactint $kgm_actp 0 printf "\n" end - set $kgm_i = $kgm_i + 1 + set $kgm_prp = ($kgm_prp)->processor_list end end document showcurrentthreads @@ -339,19 +382,17 @@ document showallstacks end define showcurrentstacks -set $kgm_ncpus = machine_info.max_cpus -set $kgm_i = 0 - while $kgm_i < $kgm_ncpus - set $kgm_prp = processor_ptr[$kgm_i] - if ($kgm_prp != 0) && (($kgm_prp)->active_thread != 0) - set $kgm_actp = (($kgm_prp)->active_thread)->top_act +set $kgm_prp = processor_list + while $kgm_prp != 0 + if ($kgm_prp)->active_thread != 0 + set $kgm_actp = ($kgm_prp)->active_thread showtaskheader showtaskint ($kgm_actp)->task showactheader showactint $kgm_actp 1 printf "\n" end - set $kgm_i = $kgm_i + 1 + set $kgm_prp = ($kgm_prp)->processor_list end end document showcurrentstacks @@ -377,7 +418,7 @@ define showwaitqwaiters showwaiterheader end set $kgm_w_shuttle = (struct thread *)$kgm_w_wqe - showactint $kgm_w_shuttle->top_act 0 + showactint $kgm_w_shuttle 0 end set $kgm_w_wqe = (struct wait_queue_element *)$kgm_w_wqe->wqe_links.next end @@ -522,8 +563,8 @@ define showmapheader end define showvmeheader - printf " entry start " - printf "prot #page object offset\n" + printf " entry start " + printf " prot #page object offset\n" end define showvmint @@ -533,7 +574,11 @@ define showvmint printf "0x%08x ", $kgm_map.pmap printf "0x%08x ", $kgm_map.size printf "%3d ", $kgm_map.hdr.nentries - printf "%5d ", $kgm_map.pmap->stats.resident_count + if $kgm_map.pmap + printf "%5d ", $kgm_map.pmap->stats.resident_count + else + printf " " + end printf "0x%08x ", $kgm_map.hint printf "0x%08x\n", $kgm_map.first_free if $arg1 != 0 @@ -542,8 +587,8 @@ define showvmint set $kgm_vmep = $kgm_map.hdr.links.next while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) set $kgm_vme = *$kgm_vmep - printf " 0x%08x ", $kgm_vmep - printf "0x%08x ", $kgm_vme.links.start + printf " 0x%08x ", $kgm_vmep + printf "0x%016llx ", $kgm_vme.links.start printf "%1x", $kgm_vme.protection printf "%1x", $kgm_vme.max_protection if $kgm_vme.inheritance == 0x0 @@ -569,7 +614,7 @@ define showvmint end printf "%5d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 printf "0x%08x ", $kgm_vme.object.vm_object - printf "0x%08x\n", $kgm_vme.offset + printf "0x%016llx\n", $kgm_vme.offset set $kgm_vmep = $kgm_vme.links.next end end @@ -784,13 +829,13 @@ end define showallipc set $kgm_head_taskp = &default_pset.tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp + set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_cur_taskp != $kgm_head_taskp showtaskheader showipcheader - showtaskint $kgm_taskp - showipcint $kgm_taskp->itk_space 0 - set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + showtaskint $kgm_cur_taskp + showipcint $kgm_cur_taskp->itk_space 0 + set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->pset_tasks.next) end end document showallipc @@ -802,13 +847,13 @@ end define showallrights set $kgm_head_taskp = &default_pset.tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp + set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_cur_taskp != $kgm_head_taskp showtaskheader showipcheader - showtaskint $kgm_taskp - showipcint $kgm_taskp->itk_space 1 - set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + showtaskint $kgm_cur_taskp + showipcint $kgm_cur_taskp->itk_space 1 + set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->pset_tasks.next) end end document showallrights @@ -1132,20 +1177,20 @@ define showportdestproc if ($kgm_spacep != $kgm_destspacep) set $kgm_destprocp = (struct proc *)0 set $kgm_head_taskp = &default_pset.tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while (($kgm_destprocp == 0) && ($kgm_taskp != $kgm_head_taskp)) - set $kgm_destspacep = $kgm_taskp->itk_space + set $kgm_desttaskp = (struct task *)($kgm_head_taskp->next) + while (($kgm_destprocp == 0) && ($kgm_desttaskp != $kgm_head_taskp)) + set $kgm_destspacep = $kgm_desttaskp->itk_space if ($kgm_destspacep == $kgm_spacep) - set $kgm_destprocp = (struct proc *)$kgm_taskp->bsd_info + set $kgm_destprocp = (struct proc *)$kgm_desttaskp->bsd_info else - set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + set $kgm_desttaskp = (struct task *)($kgm_desttaskp->pset_tasks.next) end end end if $kgm_destprocp != 0 printf "%s(%d)\n", $kgm_destprocp->p_comm, $kgm_destprocp->p_pid else - printf "task 0x%08x\n", $kgm_taskp + printf "task 0x%08x\n", $kgm_desttaskp end end @@ -1316,25 +1361,88 @@ document zprint | (gdb) zprint end +define showmtxgrp +set $kgm_mtxgrp = (lck_grp_t *)$arg0 + +if ($kgm_mtxgrp->lck_grp_mtxcnt) +printf "0x%08x ", $kgm_mtxgrp +printf "%8d ",$kgm_mtxgrp->lck_grp_mtxcnt +printf "%12u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt +printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt +printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt +printf "%s ",&$kgm_mtxgrp->lck_grp_name +printf "\n" +end +end + + +define showallmtx +printf "LCK GROUP CNT UTIL MISS WAIT NAME\n" +set $kgm_mtxgrp_ptr = (lck_grp_t *)&lck_grp_queue +set $kgm_mtxgrp_ptr = (lck_grp_t *)$kgm_mtxgrp_ptr->lck_grp_link.next +while ($kgm_mtxgrp_ptr != (lck_grp_t *)&lck_grp_queue) + showmtxgrp $kgm_mtxgrp_ptr + set $kgm_mtxgrp_ptr = (lck_grp_t *)$kgm_mtxgrp_ptr->lck_grp_link.next +end +printf "\n" +end +document showallmtx +| Routine to print a summary listing of all mutexes +| The following is the syntax: +| (gdb) showallmtx +end + +define showrwlckgrp +set $kgm_rwlckgrp = (lck_grp_t *)$arg0 + +if ($kgm_rwlckgrp->lck_grp_rwcnt) +printf "0x%08x ", $kgm_rwlckgrp +printf "%8d ",$kgm_rwlckgrp->lck_grp_rwcnt +printf "%12u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt +printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt +printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt +printf "%s ",&$kgm_rwlckgrp->lck_grp_name +printf "\n" +end +end + + +define showallrwlck +printf "LCK GROUP CNT UTIL MISS WAIT NAME\n" +set $kgm_rwlckgrp_ptr = (lck_grp_t *)&lck_grp_queue +set $kgm_rwlckgrp_ptr = (lck_grp_t *)$kgm_rwlckgrp_ptr->lck_grp_link.next +while ($kgm_rwlckgrp_ptr != (lck_grp_t *)&lck_grp_queue) + showrwlckgrp $kgm_rwlckgrp_ptr + set $kgm_rwlckgrp_ptr = (lck_grp_t *)$kgm_rwlckgrp_ptr->lck_grp_link.next +end +printf "\n" +end +document showallrwlck +| Routine to print a summary listing of all read/writer locks +| The following is the syntax: +| (gdb) showallrwlck +end + set $kdp_act_counter = 0 define switchtoact - if (machine_slot[0].cpu_type == 18) + if ($kgm_mtype == 18) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end set $kdp_act_counter = $kdp_act_counter + 1 set $newact = (struct thread *) $arg0 - if (($newact.thread)->kernel_stack == 0) - echo This activation does not have a stack.\n - echo continuation: - output/a $newact.thread.continuation - echo \n + if ($newact->kernel_stack == 0) + echo This activation does not have a stack.\n + echo continuation: + output/a (unsigned) $newact.continuation + echo \n + else + set (struct savearea *) kdp.saved_state=$newact->machine->pcb + flush + set $pc=$newact->machine->pcb.save_srr0 + update end - set (struct savearea *) kdp.saved_state=$newact->mact->pcb - flush - set $pc=$newact->mact->pcb.save_srr0 - update else echo switchtoact not implemented for this architecture.\n end @@ -1350,7 +1458,7 @@ Syntax: switchtoact
end define switchtoctx - if (machine_slot[0].cpu_type == 18) + if ($kgm_mtype == 18) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end @@ -1373,7 +1481,7 @@ Syntax: switchtoctx
end define resetctx - if (machine_slot[0].cpu_type == 18) + if ($kgm_mtype == 18) set (struct savearea *)kdp.saved_state=$kdpstate flush set $pc=((struct savearea *) kdp.saved_state)->save_srr0 @@ -1467,3 +1575,417 @@ end document showallacts | See help showallthreads. end + + +define resetstacks + _kgm_flush_loop + set kdp_pmap = 0 + _kgm_flush_loop + resetctx + _kgm_flush_loop + _kgm_update_loop + resetctx + _kgm_update_loop +end + +document resetstacks +| Syntax: resetstacks +| Internal kgmacro routine used by the "showuserstack" macro +| to reset the target pmap to the kernel pmap. +end + +#Barely effective hacks to work around bugs in the "flush" and "update" +#gdb commands in Tiger (up to 219); these aren't necessary with Panther +#gdb, but do no harm. +define _kgm_flush_loop + set $kgm_flush_loop_ctr = 0 + while ($kgm_flush_loop_ctr < 30) + flush + set $kgm_flush_loop_ctr = $kgm_flush_loop_ctr + 1 + end +end + +define _kgm_update_loop + set $kgm_update_loop_ctr = 0 + while ($kgm_update_loop_ctr < 30) + update + set $kgm_update_loop_ctr = $kgm_update_loop_ctr + 1 + end +end + +define showuserstack + if ($kgm_mtype == 18) + if ($kdp_act_counter == 0) + set $kdpstate = (struct savearea *) kdp.saved_state + end + set $kdp_act_counter = $kdp_act_counter + 1 + set $newact = (struct thread *) $arg0 + _kgm_flush_loop + set $checkpc = $newact->machine->upcb.save_srr0 + if ($checkpc == 0) + echo This activation does not appear to have + echo \20 a valid user context.\n + else + set (struct savearea *) kdp.saved_state=$newact->machine->upcb + set $pc = $checkpc +#flush and update seem to be executed lazily by gdb on Tiger, hence the +#repeated invocations - see 3743135 + _kgm_flush_loop +# This works because the new pmap is used only for reads + set kdp_pmap = $newact->task->map->pmap + _kgm_flush_loop + _kgm_update_loop + bt + resetstacks + _kgm_flush_loop + _kgm_update_loop + resetstacks + _kgm_flush_loop + _kgm_update_loop + end + else + echo showuserstack not implemented for this architecture.\n + end +end + +document showuserstack +Syntax: showuserstack
+|This command displays a numeric backtrace for the user space stack of +|the given thread activation. It may, of course, fail to display a +|complete backtrace if portions of the user stack are not mapped in. +|Symbolic backtraces can be obtained either by running gdb on the +|user space binary, or a tool such as "symbolicate". +|Note that while this command works on Panther's gdb, an issue +|with Tiger gdb (3743135) appears to hamper the evaluation of this +|macro in some cases. +end + +#Stopgap until gdb can generate the HOSTREBOOT packet +define kdp-reboot + set flag_kdp_trigger_reboot = 1 + continue +end + +document kdp-reboot +Syntax: kdp-reboot +|Reboot the remote target machine; not guaranteed to succeed. Requires symbols +|until gdb support for the HOSTREBOOT packet is implemented. +end + +define sendcore + set kdp_trigger_core_dump = 1 + set kdp_flag |= 0x40 + set panicd_ip_str = "$arg0" + set panicd_specified = 1 + set disableDebugOuput = 0 + set disableConsoleOutput = 0 + set logPanicDataToScreen = 1 + set reattach_wait = 1 + resume_off +end + +document sendcore +Syntax: sendcore +|Configure the kernel to transmit a kernel coredump to a server (kdumpd) +|at the specified IP address. This is useful when the remote target has +|not been previously configured to transmit coredumps, and you wish to +|preserve kernel state for later examination. NOTE: You must issue a "continue" +|command after using this macro to trigger the kernel coredump. The kernel +|will resume waiting in the debugger after completion of the coredump. You +|may disable coredumps by executing the "disablecore" macro. +end + +define disablecore + set kdp_trigger_core_dump = 0 + set kdp_flag |= 0x40 + set kdp_flag &= ~0x10 + set panicd_specified = 0 +end + +document disablecore +Syntax: disablecore +|Reconfigures the kernel so that it no longer transmits kernel coredumps. This +|complements the "sendcore" macro, but it may be used if the kernel has been +|configured to transmit coredumps through boot-args as well. +end + +#Use of this macro requires the gdb submission from 3401283 +define switchtocorethread + if ($kgm_mtype == 18) + if ($kdp_act_counter == 0) + set $kdpstate = (struct savearea *) kdp.saved_state + end + set $kdp_act_counter = $kdp_act_counter + 1 + set $newact = (struct thread *) $arg0 + if ($newact->kernel_stack == 0) + echo This thread does not have a stack.\n + echo continuation: + output/a (unsigned) $newact.continuation + echo \n + else + loadcontext $newact->machine->pcb +# flushstack will be introduced in a gdb version > gdb-357 + flushstack + set $pc = $newact->machine->pcb.save_srr0 + end + else + echo switchtocorethread not implemented for this architecture.\n + end +end + +document switchtocorethread +Syntax: switchtocorethread
+| The corefile equivalent of "switchtoact". When debugging a kernel coredump +| file, this command can be used to examine the execution context and stack +| trace for a given thread activation. For example, to view the backtrace +| for a thread issue "switchtocorethread
", followed by "bt". +| Before resuming execution, issue a "resetcorectx" command, to +| return to the original execution context. Note that this command +| requires gdb support, as documented in Radar 3401283. +end + +define loadcontext + set $pc = $arg0.save_srr0 + set $r1 = $arg0.save_r1 + set $lr = $arg0.save_lr + + set $r2 = $arg0.save_r2 + set $r3 = $arg0.save_r3 + set $r4 = $arg0.save_r4 + set $r5 = $arg0.save_r5 + set $r6 = $arg0.save_r6 + set $r7 = $arg0.save_r7 + set $r8 = $arg0.save_r8 + set $r9 = $arg0.save_r9 + set $r10 = $arg0.save_r10 + set $r11 = $arg0.save_r11 + set $r12 = $arg0.save_r12 + set $r13 = $arg0.save_r13 + set $r14 = $arg0.save_r14 + set $r15 = $arg0.save_r15 + set $r16 = $arg0.save_r16 + set $r17 = $arg0.save_r17 + set $r18 = $arg0.save_r18 + set $r19 = $arg0.save_r19 + set $r20 = $arg0.save_r20 + set $r21 = $arg0.save_r21 + set $r22 = $arg0.save_r22 + set $r23 = $arg0.save_r23 + set $r24 = $arg0.save_r24 + set $r25 = $arg0.save_r25 + set $r26 = $arg0.save_r26 + set $r27 = $arg0.save_r27 + set $r28 = $arg0.save_r28 + set $r29 = $arg0.save_r29 + set $r30 = $arg0.save_r30 + set $r31 = $arg0.save_r31 + + set $cr = $arg0.save_cr + set $ctr = $arg0.save_ctr +end + +define resetcorectx + set $kgm_corecontext = (struct savearea *) kdp.saved_state + loadcontext $kgm_corecontext +# Maintaining this act counter wouldn't be necessary if we just initialized +# $kdpstate at the beginning of the macro.. + set $kdp_act_counter = 0 +end + +document resetcorectx +Syntax: resetcorectx +| The corefile equivalent of "resetctx". Returns to the original +| execution context (that of the active thread at the time of the NMI or +| panic). This command should be issued if you wish to resume +| execution after using the "switchtocorethread" command. +end + +#Helper function for "showallgdbstacks" + +define showgdbthread + printf " 0x%08x ", $arg0 + set $kgm_thread = *(struct thread *)$arg0 + printf "0x%08x ", $arg0 + printf "%3d ", $kgm_thread.sched_pri + set $kgm_state = $kgm_thread.state + if $kgm_state & 0x80 + printf "I" + end + if $kgm_state & 0x40 + printf "P" + end + if $kgm_state & 0x20 + printf "A" + end + if $kgm_state & 0x10 + printf "H" + end + if $kgm_state & 0x08 + printf "U" + end + if $kgm_state & 0x04 + printf "R" + end + if $kgm_state & 0x02 + printf "S" + end + if $kgm_state & 0x01 + printf "W\t" + printf "0x%08x ", $kgm_thread.wait_queue + output /a (unsigned) $kgm_thread.wait_event + end + if $arg1 != 0 + if ($kgm_thread.kernel_stack != 0) + if ($kgm_thread.reserved_stack != 0) + printf "\n\t\treserved_stack=0x%08x", $kgm_thread.reserved_stack + end + printf "\n\t\tkernel_stack=0x%08x", $kgm_thread.kernel_stack + if ($kgm_mtype == 18) + set $mysp = $kgm_thread.machine.pcb->save_r1 + else + set $kgm_statep = (struct i386_kernel_state *) \ + ($kgm_thread->kernel_stack + 0x4000 \ + - sizeof(struct i386_kernel_state)) + set $mysp = $kgm_statep->k_ebp + end + set $prevsp = 0 + printf "\n\t\tstacktop=0x%08x", $mysp + switchtoact $arg0 + bt + else + printf "\n\t\t\tcontinuation=" + output /a (unsigned) $kgm_thread.continuation + end + printf "\n" + else + printf "\n" + end +end + +#Use of this macro is currently (8/04) blocked by the fact that gdb +#stops evaluating macros when encountering an error, such as a failure +#to read memory from a certain location. Until this issue (described in +#3758949) is addressed, evaluation of this macro may stop upon +#encountering such an error. + +define showallgdbstacks + set $kgm_head_taskp = &default_pset.tasks + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_taskp != $kgm_head_taskp + showtaskheader + showtaskint $kgm_taskp + set $kgm_head_actp = &($kgm_taskp->threads) + set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) + while $kgm_actp != $kgm_head_actp + showactheader + showgdbthread $kgm_actp 1 + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) + end + printf "\n" + set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + end + resetctx +end + +document showallgdbstacks +Syntax: showallgdbstacks +| An alternative to "showallstacks". Iterates through the task list and +| displays a gdb generated backtrace for each kernel thread. It is +| advantageous in that it is much faster than "showallstacks", and +| decodes function call arguments and displays source level traces, but +| it has the drawback that it doesn't determine if frames belong to +| functions from kernel extensions, as with "showallstacks". +| This command may terminate prematurely because of a gdb bug +| (Radar 3758949), which stops macro evaluation on memory read +| errors. +end + +define switchtouserthread + if ($kgm_mtype == 18) + if ($kdp_act_counter == 0) + set $kdpstate = (struct savearea *) kdp.saved_state + end + set $kdp_act_counter = $kdp_act_counter + 1 + set $newact = (struct thread *) $arg0 + _kgm_flush_loop + set $checkpc = $newact->machine->upcb.save_srr0 + if ($checkpc == 0) + echo This activation does not appear to have + echo \20 a valid user context.\n + else + set (struct savearea *) kdp.saved_state=$newact->machine->upcb + set $pc = $checkpc +#flush and update seem to be executed lazily by gdb on Tiger, hence the +#repeated invocations - see 3743135 + _kgm_flush_loop +# This works because the new pmap is used only for reads + set kdp_pmap = $newact->task->map->pmap + _kgm_flush_loop + _kgm_update_loop + end + else + echo switchtouserthread not implemented for this architecture.\n + end +end + +document switchtouserthread +Syntax: switchtouserthread
+| Analogous to switchtoact, but switches to the user context of a +| specified thread address. Similar to the "showuserstack" +| command, but this command does not return gdb to the kernel context +| immediately. This is to assist with the following (rather risky) +| manoeuvre - upon switching to the user context and virtual address +| space, the user may choose to call remove-symbol-file on the +| mach_kernel symbol file, and then add-symbol-file on the user space +| binary's symfile. gdb can then generate symbolic backtraces +| for the user space thread. To return to the +| kernel context and virtual address space, the process must be +| reversed, i.e. call remove-symbol-file on the user space symbols, and +| then add-symbol-file on the appropriate mach_kernel, and issue the +| "resetstacks" command. Note that gdb may not react kindly to all these +| symbol file switches. The same restrictions that apply to "showuserstack" +| apply here - pages that have been paged out cannot be read while in the +| debugger context, so backtraces may terminate early. +| If the virtual addresses in the stack trace do not conflict with those +| of symbols in the kernel's address space, it may be sufficient to +| just do an add-symbol-file on the user space binary's symbol file. +| Note that while this command works on Panther's gdb, an issue +| with Tiger gdb (3743135) appears to hamper the evaluation of this +| macro in some cases. +end + +define showmetaclass + set cp-abi gnu-v2 + set $kgm_metaclassp = (OSMetaClass *)$arg0 + printf "%-5d", $kgm_metaclassp->instanceCount + printf "x %5d bytes", $kgm_metaclassp->classSize + printf " %s\n", $kgm_metaclassp->className->string +end + +define showallclasses + set cp-abi gnu-v2 + set $kgm_classidx = 0 + while $kgm_classidx < sAllClassesDict->count + set $kgm_meta = (OSMetaClass *) sAllClassesDict->dictionary[$kgm_classidx].value + showmetaclass $kgm_meta + set $kgm_classidx = $kgm_classidx + 1 + end +end +document showallclasses +| Show the instance counts and ivar size of all OSObject subclasses. See ioclasscount man page for details. +| The following is the syntax: +| (gdb) showallclasses +end + +define showioalloc + printf " Instance allocation = 0x%08lx = %4ld K\n", (int) debug_ivars_size, ((int) debug_ivars_size) / 1024 + printf "Container allocation = 0x%08lx = %4ld K\n", (int) debug_container_malloc_size, ((int) debug_container_malloc_size) / 1024 + printf " IOMalloc allocation = 0x%08lx = %4ld K\n", (int) debug_iomalloc_size, ((int) debug_iomalloc_size) / 1024 + printf " Pageable allocation = 0x%08lx = %4ld K\n", (vm_size_t) debug_iomallocpageable_size, ((vm_size_t) debug_iomallocpageable_size) / 1024 +end + +document showioalloc +| Show some accounting of memory allocated by IOKit allocators. See ioalloccount man page for details. +| The following is the syntax: +| (gdb) showioalloc +end diff --git a/libkern/Makefile b/libkern/Makefile index 7788f760d..c9ed0dba2 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -4,17 +4,27 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir -export COMP_LDFLAGS_COMPONENT_PPC = -i_OSCompareAndSwap:_hw_compare_and_store +export COMP_LDFLAGS_COMPONENT_PPC = -i_OSCompareAndSwap:_hw_compare_and_store \ + -i_OSDequeueAtomic:_hw_dequeue_atomic \ + -i_OSEnqueueAtomic:_hw_queue_atomic include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = libkern +INSTINC_SUBDIRS = \ + libkern \ + uuid + INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} + INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS = libkern +EXPINC_SUBDIRS = \ + libkern \ + uuid + EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} + EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} SETUP_SUBDIRS = conf diff --git a/libkern/c++/OSArray.cpp b/libkern/c++/OSArray.cpp index 63ef3ba74..6531060aa 100644 --- a/libkern/c++/OSArray.cpp +++ b/libkern/c++/OSArray.cpp @@ -24,6 +24,7 @@ #include +#include #include #include @@ -48,6 +49,9 @@ extern "C" { #define ACCUMSIZE(s) #endif +#define EXT_CAST(obj) \ + reinterpret_cast(const_cast(obj)) + bool OSArray::initWithCapacity(unsigned int inCapacity) { int size; @@ -67,23 +71,23 @@ bool OSArray::initWithCapacity(unsigned int inCapacity) bzero(array, size); ACCUMSIZE(size); - return this; + return true; } bool OSArray::initWithObjects(const OSObject *objects[], unsigned int theCount, unsigned int theCapacity) { - unsigned int capacity; + unsigned int initCapacity; if (!theCapacity) - capacity = theCount; + initCapacity = theCount; else if (theCount > theCapacity) return false; else - capacity = theCapacity; + initCapacity = theCapacity; - if (!objects || !initWithCapacity(capacity)) + if (!objects || !initWithCapacity(initCapacity)) return false; for ( unsigned int i = 0; i < theCount; i++ ) { @@ -150,10 +154,13 @@ OSArray *OSArray::withArray(const OSArray *array, void OSArray::free() { + // Clear immutability - assumes the container is doing the right thing + (void) super::setOptions(0, kImmutable); + flushCollection(); if (array) { - kfree((vm_offset_t)array, sizeof(const OSMetaClassBase *) * capacity); + kfree(array, sizeof(const OSMetaClassBase *) * capacity); ACCUMSIZE( -(sizeof(const OSMetaClassBase *) * capacity) ); } @@ -192,7 +199,7 @@ unsigned int OSArray::ensureCapacity(unsigned int newCapacity) bcopy(array, newArray, oldSize); bzero(&newArray[capacity], newSize - oldSize); - kfree((vm_offset_t)array, oldSize); + kfree(array, oldSize); array = newArray; capacity = newCapacity; } @@ -330,7 +337,7 @@ OSObject *OSArray::getObject(unsigned int index) const if (index >= count) return 0; else - return (OSObject *) array[index]; + return (OSObject *) (const_cast(array[index])); } OSObject *OSArray::getLastObject() const @@ -338,7 +345,7 @@ OSObject *OSArray::getLastObject() const if (count == 0) return 0; else - return (OSObject *) array[count - 1]; + return ( OSObject *) (const_cast(array[count - 1])); } unsigned int OSArray::getNextIndexOfObject(const OSMetaClassBase * anObject, @@ -370,7 +377,7 @@ bool OSArray::getNextObjectForIterator(void *inIterator, OSObject **ret) const unsigned int index = (*iteratorP)++; if (index < count) { - *ret = (OSObject *) array[index]; + *ret = (OSObject *)(const_cast (array[index])); return true; } else { @@ -391,3 +398,74 @@ bool OSArray::serialize(OSSerialize *s) const return s->addXMLEndTag("array"); } + +unsigned OSArray::setOptions(unsigned options, unsigned mask, void *) +{ + unsigned old = super::setOptions(options, mask); + if ((old ^ options) & mask) { + + // Value changed need to recurse over all of the child collections + for ( unsigned i = 0; i < count; i++ ) { + OSCollection *coll = OSDynamicCast(OSCollection, array[i]); + if (coll) + coll->setOptions(options, mask); + } + } + + return old; +} + +OSCollection * OSArray::copyCollection(OSDictionary *cycleDict) +{ + bool allocDict = !cycleDict; + OSCollection *ret = 0; + OSArray *newArray = 0; + + if (allocDict) { + cycleDict = OSDictionary::withCapacity(16); + if (!cycleDict) + return 0; + } + + do { + // Check for a cycle + ret = super::copyCollection(cycleDict); + if (ret) + continue; + + newArray = OSArray::withArray(this); + if (!newArray) + continue; + + // Insert object into cycle Dictionary + cycleDict->setObject((const OSSymbol *) this, newArray); + + for (unsigned int i = 0; i < count; i++) { + OSCollection *coll = + OSDynamicCast(OSCollection, EXT_CAST(newArray->array[i])); + + if (coll) { + OSCollection *newColl = coll->copyCollection(cycleDict); + if (!newColl) + goto abortCopy; + + newArray->replaceObject(i, newColl); + newColl->release(); + }; + }; + + ret = newArray; + newArray = 0; + + } while (false); + +abortCopy: + if (newArray) + newArray->release(); + + if (allocDict) + cycleDict->release(); + + return ret; +} + diff --git a/libkern/c++/OSCollection.cpp b/libkern/c++/OSCollection.cpp index 6a958424a..ff54175ee 100644 --- a/libkern/c++/OSCollection.cpp +++ b/libkern/c++/OSCollection.cpp @@ -21,14 +21,20 @@ */ /* IOArray.h created by rsulack on Thu 11-Sep-1997 */ +#include + #include -#include +#include + +#include #define super OSObject OSDefineMetaClassAndAbstractStructors(OSCollection, OSObject) -OSMetaClassDefineReservedUnused(OSCollection, 0); -OSMetaClassDefineReservedUnused(OSCollection, 1); + + +OSMetaClassDefineReservedUsed(OSCollection, 0); +OSMetaClassDefineReservedUsed(OSCollection, 1); OSMetaClassDefineReservedUnused(OSCollection, 2); OSMetaClassDefineReservedUnused(OSCollection, 3); OSMetaClassDefineReservedUnused(OSCollection, 4); @@ -45,3 +51,40 @@ bool OSCollection::init() return true; } + +void OSCollection::haveUpdated() +{ + if ( (gIOKitDebug & kOSLogRegistryMods) && (fOptions & kImmutable) ) + OSReportWithBacktrace("Trying to change a collection in the registry"); + + updateStamp++; +} + +unsigned OSCollection::setOptions(unsigned options, unsigned mask, void *) +{ + unsigned old = fOptions; + + if (mask) + fOptions = (old & ~mask) | (options & mask); + + return old; +} + +OSCollection * OSCollection::copyCollection(OSDictionary *cycleDict) +{ + if (cycleDict) { + OSObject *obj = cycleDict->getObject((const OSSymbol *) this); + if (obj) + obj->retain(); + + return reinterpret_cast(obj); + } + else { + // If we are here it means that there is a collection subclass that + // hasn't overridden the copyCollection method. In which case just + // return a reference to ourselves. + // Hopefully this collection will not be inserted into the registry + retain(); + return this; + } +} diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index f21fab7d9..4796f039d 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -52,6 +52,9 @@ extern "C" { #define ACCUMSIZE(s) #endif +#define EXT_CAST(obj) \ + reinterpret_cast(const_cast(obj)) + bool OSDictionary::initWithCapacity(unsigned int inCapacity) { if (!super::init()) @@ -228,6 +231,7 @@ OSDictionary *OSDictionary::withDictionary(const OSDictionary *dict, void OSDictionary::free() { + (void) super::setOptions(0, kImmutable); flushCollection(); if (dictionary) { kfree((vm_offset_t)dictionary, capacity * sizeof(dictEntry)); @@ -304,11 +308,11 @@ setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) if (aKey == dictionary[i].key) { const OSMetaClassBase *oldObject = dictionary[i].value; + haveUpdated(); + anObject->taggedRetain(OSTypeID(OSCollection)); dictionary[i].value = anObject; - haveUpdated(); - oldObject->taggedRelease(OSTypeID(OSCollection)); return true; } @@ -318,14 +322,14 @@ setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject) if (count >= capacity && count >= ensureCapacity(count+1)) return 0; + haveUpdated(); + aKey->taggedRetain(OSTypeID(OSCollection)); anObject->taggedRetain(OSTypeID(OSCollection)); dictionary[count].key = aKey; dictionary[count].value = anObject; count++; - haveUpdated(); - return true; } @@ -568,3 +572,77 @@ bool OSDictionary::serialize(OSSerialize *s) const return s->addXMLEndTag("dict"); } + +unsigned OSDictionary::setOptions(unsigned options, unsigned mask, void *) +{ + unsigned old = super::setOptions(options, mask); + if ((old ^ options) & mask) { + + // Value changed need to recurse over all of the child collections + for ( unsigned i = 0; i < count; i++ ) { + OSCollection *v = OSDynamicCast(OSCollection, dictionary[i].value); + if (v) + v->setOptions(options, mask); + } + } + + return old; +} + +OSCollection * OSDictionary::copyCollection(OSDictionary *cycleDict) +{ + bool allocDict = !cycleDict; + OSCollection *ret = 0; + OSDictionary *newDict = 0; + + if (allocDict) { + cycleDict = OSDictionary::withCapacity(16); + if (!cycleDict) + return 0; + } + + do { + // Check for a cycle + ret = super::copyCollection(cycleDict); + if (ret) + continue; + + newDict = OSDictionary::withDictionary(this); + if (!newDict) + continue; + + // Insert object into cycle Dictionary + cycleDict->setObject((const OSSymbol *) this, newDict); + + for (unsigned int i = 0; i < count; i++) { + const OSMetaClassBase *obj = dictionary[i].value; + OSCollection *coll = OSDynamicCast(OSCollection, EXT_CAST(obj)); + + if (coll) { + OSCollection *newColl = coll->copyCollection(cycleDict); + if (!newColl) + goto abortCopy; + + newDict->dictionary[i].value = newColl; + + coll->taggedRelease(OSTypeID(OSCollection)); + newColl->taggedRetain(OSTypeID(OSCollection)); + newColl->release(); + }; + } + + ret = newDict; + newDict = 0; + + } while (false); + +abortCopy: + if (newDict) + newDict->release(); + + if (allocDict) + cycleDict->release(); + + return ret; +} + diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp index 7ec48c113..4e3a92ab5 100644 --- a/libkern/c++/OSMetaClass.cpp +++ b/libkern/c++/OSMetaClass.cpp @@ -22,7 +22,6 @@ /* OSMetaClass.cpp created by gvdl on Fri 1998-11-17 */ #include -#include #include @@ -41,8 +40,8 @@ __BEGIN_DECLS +#include #include -#include #include #include #include @@ -69,7 +68,7 @@ static enum { static const int kClassCapacityIncrement = 40; static const int kKModCapacityIncrement = 10; -static OSDictionary *sAllClassesDict, *sKModClassesDict; +static OSDictionary *sAllClassesDict, *sKModClassesDict, *sSortedByClassesDict; static mutex_t *loadLock; static struct StalledData { @@ -286,8 +285,10 @@ OSMetaClass::~OSMetaClass() do { OSCollectionIterator *iter; - if (sAllClassesDict) + if (sAllClassesDict) { sAllClassesDict->removeObject(className); + className->release(); + } iter = OSCollectionIterator::withCollection(sKModClassesDict); if (!iter) @@ -319,7 +320,6 @@ OSMetaClass::~OSMetaClass() memmove(&sStalled->classes[i], &sStalled->classes[i+1], (sStalled->count - i) * sizeof(OSMetaClass *)); } - return; } } @@ -345,7 +345,7 @@ unsigned int OSMetaClass::getClassSize() const void *OSMetaClass::preModLoad(const char *kmodName) { if (!loadLock) { - loadLock = mutex_alloc(ETAP_IO_AHA); + loadLock = mutex_alloc(0); mutex_lock(loadLock); } else @@ -381,6 +381,7 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) { OSReturn result = kOSReturnSuccess; OSSet *kmodSet = 0; + OSSymbol *myname = 0; if (!sStalled || loadHandle != sStalled) { logError(kOSMetaClassInternal); @@ -397,7 +398,8 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) case kMakingDictionaries: sKModClassesDict = OSDictionary::withCapacity(kKModCapacityIncrement); sAllClassesDict = OSDictionary::withCapacity(kClassCapacityIncrement); - if (!sAllClassesDict || !sKModClassesDict) { + sSortedByClassesDict = OSDictionary::withCapacity(kClassCapacityIncrement); + if (!sAllClassesDict || !sKModClassesDict || !sSortedByClassesDict) { result = kOSMetaClassNoDicts; break; } @@ -406,6 +408,7 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) case kCompletedBootstrap: { unsigned int i; + myname = OSSymbol::withCStringNoCopy(sStalled->kmodName); if (!sStalled->count) break; // Nothing to do so just get out @@ -429,19 +432,20 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) break; } - if (!sKModClassesDict->setObject(sStalled->kmodName, kmodSet)) { + if (!sKModClassesDict->setObject(myname, kmodSet)) { result = kOSMetaClassNoInsKModSet; break; } // Second pass symbolling strings and inserting classes in dictionary - for (unsigned int i = 0; i < sStalled->count; i++) { + for (i = 0; i < sStalled->count; i++) { OSMetaClass *me = sStalled->classes[i]; me->className = OSSymbol::withCStringNoCopy((const char *) me->className); sAllClassesDict->setObject(me->className, me); kmodSet->setObject(me); + sSortedByClassesDict->setObject((const OSSymbol *)me, myname); } sBootstrapState = kCompletedBootstrap; break; @@ -455,6 +459,9 @@ OSReturn OSMetaClass::postModLoad(void *loadHandle) if (kmodSet) kmodSet->release(); + if (myname) + myname->release(); + if (sStalled) { ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) + sizeof(*sStalled))); @@ -491,7 +498,7 @@ bool OSMetaClass::modHasInstance(const char *kmodName) bool result = false; if (!loadLock) { - loadLock = mutex_alloc(ETAP_IO_AHA); + loadLock = mutex_alloc(0); mutex_lock(loadLock); } else @@ -771,6 +778,11 @@ const OSMetaClass *OSMetaClass::getSuperClass() const return superClassLink; } +const OSSymbol *OSMetaClass::getKmodName() const +{ + return sSortedByClassesDict->getObject((const OSSymbol *)this); +} + unsigned int OSMetaClass::getInstanceCount() const { return instanceCount; diff --git a/libkern/c++/OSNumber.cpp b/libkern/c++/OSNumber.cpp index 9a4934d97..99b309c7d 100644 --- a/libkern/c++/OSNumber.cpp +++ b/libkern/c++/OSNumber.cpp @@ -24,7 +24,7 @@ #include __BEGIN_DECLS -extern int sscanf(const char *input, const char *fmt, ...); +extern unsigned long strtoul(const char *, char **, int); __END_DECLS #include @@ -60,18 +60,7 @@ bool OSNumber::init(unsigned long long inValue, unsigned int numberOfBits) bool OSNumber::init(const char *value, unsigned int numberOfBits) { - unsigned long long thisOffset; - -#ifdef q_works - sscanf(value, "%qi", thisOffset); -#else - unsigned int smallOffset; - - sscanf(value, "%i", &smallOffset); - thisOffset = smallOffset; -#endif - - return init(thisOffset, numberOfBits); + return init((unsigned long long)strtoul(value, NULL, 0), numberOfBits); } void OSNumber::free() { super::free(); } diff --git a/libkern/c++/OSObjectAsm.s b/libkern/c++/OSObjectAsm.s index 7b2096558..c2cf06dd8 100644 --- a/libkern/c++/OSObjectAsm.s +++ b/libkern/c++/OSObjectAsm.s @@ -26,7 +26,7 @@ ; This function was generated by disassembling the 'OSObject::free(void)' ; function of the Panther7B7 kernel in gdb. ; -; Then add the 'li r4,3' flag taken fropm the Puma kernel OSObject::free' +; Then add the 'li r4,3' flag taken fropm the Puma kernel 'OSObject::free' ; .text diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp index ff6e9c9e9..593ebc577 100644 --- a/libkern/c++/OSOrderedSet.cpp +++ b/libkern/c++/OSOrderedSet.cpp @@ -20,6 +20,7 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include #include #include @@ -49,6 +50,8 @@ struct _Element { // unsigned int pri; }; +#define EXT_CAST(obj) \ + reinterpret_cast(const_cast(obj)) bool OSOrderedSet:: initWithCapacity(unsigned int inCapacity, @@ -92,6 +95,7 @@ withCapacity(unsigned int capacity, void OSOrderedSet::free() { + (void) super::setOptions(0, kImmutable); flushCollection(); if (array) { @@ -219,15 +223,14 @@ void OSOrderedSet::removeObject(const OSMetaClassBase *anObject) if( deleted) array[i-1] = array[i]; else if( (array[i].obj == anObject)) { - array[i].obj->taggedRelease(OSTypeID(OSCollection)); deleted = true; + haveUpdated(); // Pity we can't flush the log + array[i].obj->taggedRelease(OSTypeID(OSCollection)); } } - if( deleted) { - count--; - haveUpdated(); - } + if (deleted) + count--; } bool OSOrderedSet::containsObject(const OSMetaClassBase *anObject) const @@ -340,3 +343,78 @@ getNextObjectForIterator(void *inIterator, OSObject **ret) const return (*ret != 0); } + +unsigned OSOrderedSet::setOptions(unsigned options, unsigned mask, void *) +{ + unsigned old = super::setOptions(options, mask); + if ((old ^ options) & mask) { + + // Value changed need to recurse over all of the child collections + for ( unsigned i = 0; i < count; i++ ) { + OSCollection *coll = OSDynamicCast(OSCollection, array[i].obj); + if (coll) + coll->setOptions(options, mask); + } + } + + return old; +} + +OSCollection * OSOrderedSet::copyCollection(OSDictionary *cycleDict) +{ + bool allocDict = !cycleDict; + OSCollection *ret = 0; + OSOrderedSet *newSet = 0; + + if (allocDict) { + cycleDict = OSDictionary::withCapacity(16); + if (!cycleDict) + return 0; + } + + do { + // Check for a cycle + ret = super::copyCollection(cycleDict); + if (ret) + continue; + + // Duplicate the set with no contents + newSet = OSOrderedSet::withCapacity(capacity, ordering, orderingRef); + if (!newSet) + continue; + + // Insert object into cycle Dictionary + cycleDict->setObject((const OSSymbol *) this, newSet); + + newSet->capacityIncrement = capacityIncrement; + + // Now copy over the contents to the new duplicate + for (unsigned int i = 0; i < count; i++) { + OSObject *obj = EXT_CAST(array[i].obj); + OSCollection *coll = OSDynamicCast(OSCollection, obj); + if (coll) { + OSCollection *newColl = coll->copyCollection(cycleDict); + if (newColl) { + obj = newColl; // Rely on cycleDict ref for a bit + newColl->release(); + } + else + goto abortCopy; + }; + newSet->setLastObject(obj); + }; + + ret = newSet; + newSet = 0; + + } while (false); + +abortCopy: + if (newSet) + newSet->release(); + + if (allocDict) + cycleDict->release(); + + return ret; +} diff --git a/libkern/c++/OSRuntime.cpp b/libkern/c++/OSRuntime.cpp index 7cf2472a0..3f24b8ee2 100644 --- a/libkern/c++/OSRuntime.cpp +++ b/libkern/c++/OSRuntime.cpp @@ -67,7 +67,7 @@ void *kern_os_malloc( #endif mem->mlen = memsize; - (void) memset(mem->dat, 0, size); + bzero( mem->dat, size); return (mem->dat); } @@ -89,7 +89,7 @@ void kern_os_free( #if 0 memset((vm_offset_t)hdr, 0xbb, hdr->mlen); #else - kfree((vm_offset_t)hdr, hdr->mlen); + kfree(hdr, hdr->mlen); #endif } @@ -130,7 +130,7 @@ void *kern_os_realloc( (void) memset(&nmem->dat[osize], 0, nsize - osize); (void) memcpy(nmem->dat, ohdr->dat, (nsize > osize) ? osize : nsize); - kfree((vm_offset_t)ohdr, ohdr->mlen); + kfree(ohdr, ohdr->mlen); return (nmem->dat); } @@ -155,7 +155,11 @@ void __pure_virtual( void ) { panic(__FUNCTION__); } typedef void (*structor_t)(void); -void OSRuntimeUnloadCPPForSegment(struct segment_command * segment) { +// Given a pointer to a 32 bit mach object segment, iterate the segment to +// obtain a 32 bit destructor section for C++ objects, and call each of the +// destructors there. +void +OSRuntimeUnloadCPPForSegment(struct segment_command * segment) { struct section * section; @@ -179,6 +183,7 @@ void OSRuntimeUnloadCPPForSegment(struct segment_command * segment) { return; } +// This function will only operate on 32 bit kmods void OSRuntimeUnloadCPP(kmod_info_t *ki, void *) { if (ki && ki->address) { @@ -221,6 +226,7 @@ kern_return_t OSRuntimeFinalizeCPP(kmod_info_t *ki, void *) } // Functions used by the extenTools/kmod library project +// This function will only operate on 32 bit kmods kern_return_t OSRuntimeInitializeCPP(kmod_info_t *ki, void *) { struct mach_header *header; @@ -328,8 +334,6 @@ void * operator new( size_t size) void * result; result = (void *) kern_os_malloc( size); - if( result) - bzero( result, size); return( result); } diff --git a/libkern/c++/OSSet.cpp b/libkern/c++/OSSet.cpp index ef0c06691..c09e49350 100644 --- a/libkern/c++/OSSet.cpp +++ b/libkern/c++/OSSet.cpp @@ -21,9 +21,10 @@ */ /* IOSet.m created by rsulack on Thu 11-Jun-1998 */ -#include +#include #include #include +#include #define super OSCollection @@ -37,6 +38,9 @@ OSMetaClassDefineReservedUnused(OSSet, 5); OSMetaClassDefineReservedUnused(OSSet, 6); OSMetaClassDefineReservedUnused(OSSet, 7); +#define EXT_CAST(obj) \ + reinterpret_cast(const_cast(obj)) + bool OSSet::initWithCapacity(unsigned int inCapacity) { if ( !super::init() ) @@ -145,6 +149,7 @@ OSSet *OSSet::withSet(const OSSet *set, void OSSet::free() { + (void) members->super::setOptions(0, kImmutable); if (members) members->release(); @@ -206,7 +211,7 @@ bool OSSet::merge(const OSArray *array) bool OSSet::merge(const OSSet *set) { - return setObject(set->members); + return merge(set->members); } void OSSet::removeObject(const OSMetaClassBase *anObject) @@ -321,3 +326,71 @@ bool OSSet::serialize(OSSerialize *s) const return s->addXMLEndTag("set"); } + +unsigned OSSet::setOptions(unsigned options, unsigned mask, void *) +{ + unsigned old = super::setOptions(options, mask); + if ((old ^ options) & mask) + members->setOptions(options, mask); + + return old; +} + +OSCollection * OSSet::copyCollection(OSDictionary *cycleDict) +{ + bool allocDict = !cycleDict; + OSCollection *ret = 0; + OSSet *newSet = 0; + + if (allocDict) { + cycleDict = OSDictionary::withCapacity(16); + if (!cycleDict) + return 0; + } + + do { + // Check for a cycle + ret = super::copyCollection(cycleDict); + if (ret) + continue; // Found it + + newSet = OSSet::withCapacity(members->capacity); + if (!newSet) + continue; // Couldn't create new set abort + + // Insert object into cycle Dictionary + cycleDict->setObject((const OSSymbol *) this, newSet); + + OSArray *newMembers = newSet->members; + newMembers->capacityIncrement = members->capacityIncrement; + + // Now copy over the contents into the new duplicate + for (unsigned int i = 0; i < members->count; i++) { + OSObject *obj = EXT_CAST(members->array[i]); + OSCollection *coll = OSDynamicCast(OSCollection, obj); + if (coll) { + OSCollection *newColl = coll->copyCollection(cycleDict); + if (newColl) { + obj = newColl; // Rely on cycleDict ref for a bit + newColl->release(); + } + else + goto abortCopy; + }; + newMembers->setObject(obj); + }; + + ret = newSet; + newSet = 0; + + } while(false); + +abortCopy: + if (newSet) + newSet->release(); + + if (allocDict) + cycleDict->release(); + + return ret; +} diff --git a/libkern/c++/OSUnserialize.cpp b/libkern/c++/OSUnserialize.cpp index bec931246..db1237eb1 100644 --- a/libkern/c++/OSUnserialize.cpp +++ b/libkern/c++/OSUnserialize.cpp @@ -1562,7 +1562,7 @@ OSUnserialize(const char *buffer, OSString **errorString) OSObject *object; if (!lock) { - lock = mutex_alloc(ETAP_IO_AHA); + lock = mutex_alloc(0); mutex_lock(lock); } else { mutex_lock(lock); diff --git a/libkern/c++/OSUnserializeXML.cpp b/libkern/c++/OSUnserializeXML.cpp index 9c4f66314..d77055edc 100644 --- a/libkern/c++/OSUnserializeXML.cpp +++ b/libkern/c++/OSUnserializeXML.cpp @@ -1407,10 +1407,9 @@ getNumber(parser_state_t *state) { unsigned long long n = 0; int base = 10; + bool negate = false; int c = currentChar(); - if (!isDigit (c)) return 0; - if (c == '0') { c = nextChar(); if (c == 'x') { @@ -1419,10 +1418,17 @@ getNumber(parser_state_t *state) } } if (base == 10) { + if (c == '-') { + negate = true; + c = nextChar(); + } while(isDigit(c)) { n = (n * base + c - '0'); c = nextChar(); } + if (negate) { + n = (unsigned long long)((long long)n * (long long)-1); + } } else { while(isHexDigit(c)) { if (isDigit(c)) { diff --git a/libkern/c++/OSUnserializeXML.y b/libkern/c++/OSUnserializeXML.y index bb36c159c..f8f1be578 100644 --- a/libkern/c++/OSUnserializeXML.y +++ b/libkern/c++/OSUnserializeXML.y @@ -450,10 +450,9 @@ getNumber(parser_state_t *state) { unsigned long long n = 0; int base = 10; + bool negate = false; int c = currentChar(); - if (!isDigit (c)) return 0; - if (c == '0') { c = nextChar(); if (c == 'x') { @@ -462,10 +461,17 @@ getNumber(parser_state_t *state) } } if (base == 10) { + if (c == '-') { + negate = true; + c = nextChar(); + } while(isDigit(c)) { n = (n * base + c - '0'); c = nextChar(); } + if (negate) { + n = (unsigned long long)((long long)n * (long long)-1); + } } else { while(isHexDigit(c)) { if (isDigit(c)) { diff --git a/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj b/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj index 9403d5361..431decfcd 100644 --- a/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj +++ b/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj @@ -3,12 +3,15 @@ archiveVersion = 1; classes = { }; - objectVersion = 38; + objectVersion = 39; objects = { 05D29F900382361902CA299A = { + fileEncoding = 30; isa = PBXFileReference; + lastKnownFileType = sourcecode.cpp.cpp; path = test2_main.cpp; refType = 4; + sourceTree = ""; }; 05D29F910382361902CA299A = { fileRef = 05D29F900382361902CA299A; @@ -30,11 +33,7 @@ buildActionMask = 2147483647; files = ( ); - generatedFileNames = ( - ); isa = PBXShellScriptBuildPhase; - neededFileNames = ( - ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; @@ -44,7 +43,12 @@ ); buildSettings = { COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; }; isa = PBXBuildStyle; name = Development; @@ -54,6 +58,8 @@ ); buildSettings = { COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; }; isa = PBXBuildStyle; name = Deployment; @@ -62,11 +68,7 @@ buildActionMask = 2147483647; files = ( ); - generatedFileNames = ( - ); isa = PBXShellScriptBuildPhase; - neededFileNames = ( - ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; @@ -82,10 +84,13 @@ //083 //084 089C1669FE841209C02AAC07 = { + buildSettings = { + }; buildStyles = ( 06AA1262FFB20DD611CA28AA, 06AA1263FFB20DD611CA28AA, ); + hasScannedForEncodings = 1; isa = PBXProject; mainGroup = 089C166AFE841209C02AAC07; projectDirPath = ""; @@ -101,6 +106,7 @@ isa = PBXGroup; name = test2; refType = 4; + sourceTree = ""; }; 089C1673FE841209C02AAC07 = { buildPhases = ( @@ -165,7 +171,6 @@ "; - shouldUseHeadermap = 1; }; 089C1674FE841209C02AAC07 = { buildActionMask = 2147483647; @@ -214,9 +219,12 @@ //0A3 //0A4 0A5A7D55FFB780D811CA28AA = { + explicitFileType = wrapper.cfbundle; + fallbackIsa = PBXFileReference; isa = PBXBundleReference; path = test2.kext; refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; }; //0A0 //0A1 @@ -235,6 +243,7 @@ isa = PBXGroup; name = Products; refType = 4; + sourceTree = ""; }; //190 //191 @@ -254,6 +263,7 @@ name = Source; path = ""; refType = 4; + sourceTree = ""; }; }; rootObject = 089C1669FE841209C02AAC07; diff --git a/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp b/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp index c4c735719..4ed533cbc 100644 --- a/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp +++ b/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp @@ -33,66 +33,70 @@ __END_DECLS #include #include -char *testBuffer = " - - - - - - key true - key false - - key d0 - key d1 AQ== - key d2 ASM= - key d3 ASNF - key d4 0123 4567 89abcdef - key d5 ASNFZw== - - key i0 - key i1 123456789 - key i2 0x12345678 - - key s0 - key s1 string 1 - key s2 string 2 - key <&> <&> - - key c0 - - - key a0 - - - key a1 - array string 1 - array string 2 - - - key t0 - - key t1 - set string 1 - set string 2 - - - key r1 - key r2 - key r3 - key r4 - key r5 - key r6 - - key e1 - key e2 - key e3 - key e4 - key e5 - key e6 - - - -"; +char *testBuffer = +" \n" +" \n" +" \n" +" \n" +" \n" +" \n" + +" key true \n" +" key false \n" + +" key d0 \n" +" key d1 AQ== \n" +" key d2 ASM= \n" +" key d3 ASNF \n" +" key d4 ASNFZw== \n" + +" key i0 \n" +" key i1 123456789 \n" +" key i2 -123456789 \n" +" key i3 0x12345678 \n" + +" key s0 \n" +" key s1 string 1 \n" +" key s2 string 2 \n" +" key mr © mac roman copyright © \n" +" key uft8 \xc2\xa9 utf-8 copyright \xc2\xa9 \n" +" key <&> <&> \n" + +" key D0 \n" +" \n" + +" key a0 \n" +" \n" + +" key a1 \n" +" array string 1 \n" +" array string 2 \n" +" \n" + +" key r1 \n" +" key r2 \n" +" key r3 \n" +" key r4 \n" +" key r5 \n" + +" key e1 \n" +" key e2 \n" +" key e4 \n" +" key e5 \n" +" key e6 \n" + +" key S0 \n" +" \n" +" key S1 \n" +" set string 1 \n" +" set string 2 \n" +" \n" +" key r6 \n" +" key e3 \n" + +" \n" +" \n" +; /* this causes the parser to return an empty string? it doesn't look like yyerror gets called diff --git a/libkern/conf/Makefile.i386 b/libkern/conf/Makefile.i386 index b89fdd145..fa98396d8 100644 --- a/libkern/conf/Makefile.i386 +++ b/libkern/conf/Makefile.i386 @@ -2,6 +2,34 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# Enable -Werror for i386 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly in libkern/c++: +OBJS_NO_WERROR= \ + ioconf.o \ + OSRuntimeSupport.o \ + OSMetaClass.cpo \ + OSArray.cpo \ + OSBoolean.cpo \ + OSCollectionIterator.cpo \ + OSCollection.cpo \ + OSData.cpo \ + OSDictionary.cpo \ + OSNumber.cpo \ + OSObject.cpo \ + OSOrderedSet.cpo \ + OSRuntime.cpo \ + OSSerialize.cpo \ + OSString.cpo \ + OSSymbol.cpo \ + OSUnserialize.cpo \ + OSUnserializeXML.cpo + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror ###################################################################### #END Machine dependent Makefile fragment for i386 diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template index e56bdbb70..fc5e9da90 100644 --- a/libkern/conf/Makefile.template +++ b/libkern/conf/Makefile.template @@ -42,14 +42,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -85,12 +77,8 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "creating $(COMPONENT).o" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c @echo [ updating $(COMPONENT).o ${LIBKERN_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} do_all: $(COMPONENT).o diff --git a/libkern/conf/files b/libkern/conf/files index ba3b178b9..8c32688f5 100644 --- a/libkern/conf/files +++ b/libkern/conf/files @@ -7,6 +7,7 @@ OPTIONS/gprof optional gprof # libkern libkern/gen/OSAtomicOperations.c standard +libkern/gen/OSDebug.cpp standard libkern/c++/OSMetaClass.cpp optional libkerncpp libkern/c++/OSObject.cpp optional libkerncpp @@ -29,3 +30,6 @@ libkern/c++/OSSymbol.cpp optional libkerncpp libkern/c++/OSUnserialize.cpp optional libkerncpp libkern/c++/OSUnserializeXML.cpp optional libkerncpp +libkern/stdio/scanf.c standard + +libkern/uuid/uuid.c standard diff --git a/libkern/conf/tools/Makefile b/libkern/conf/tools/Makefile index 9df86ce8c..4f9ccd553 100644 --- a/libkern/conf/tools/Makefile +++ b/libkern/conf/tools/Makefile @@ -7,13 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - newvers +SETUP_SUBDIRS = doconf -COMP_SUBDIRS = \ - doconf \ - newvers +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/libkern/conf/tools/newvers/Makefile b/libkern/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/libkern/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libkern/conf/tools/newvers/newvers.csh b/libkern/conf/tools/newvers/newvers.csh deleted file mode 100644 index b462d3387..000000000 --- a/libkern/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"Common Services Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"Common Services\";" ; - /bin/echo "char ${COMPONENT}_builder[] = \"$w\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/libkern/conf/version.major b/libkern/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/libkern/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/libkern/conf/version.minor b/libkern/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/libkern/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/libkern/conf/version.variant b/libkern/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/libkern/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c index c6e86aaca..4a1e7beb0 100644 --- a/libkern/gen/OSAtomicOperations.c +++ b/libkern/gen/OSAtomicOperations.c @@ -39,6 +39,11 @@ enum { * which I wrote for NuKernel in a previous life with a different last name...) * * native Boolean CompareAndSwap(UInt32 oldValue, UInt32 newValue, UInt32 * oldValuePtr); + * + * We've since implemented a few more of these -- OSAddAtomic, OSDequeueAtomic, + * OSEnqueueAtomic etc -- in assembler, either for speed or correctness. See also the + * commpage atomic operations, and the platform specific versions. + * Like standards, there are a lot of atomic ops to choose from! */ #ifndef __ppc__ @@ -66,6 +71,37 @@ SInt32 OSDecrementAtomic(SInt32 * value) return OSAddAtomic(-1, value); } +void * OSDequeueAtomic(void ** inList, SInt32 inOffset) +{ + void * oldListHead; + void * newListHead; + + do { + oldListHead = *inList; + if (oldListHead == NULL) { + break; + } + + newListHead = *(void **) (((char *) oldListHead) + inOffset); + } while (! OSCompareAndSwap((UInt32)oldListHead, + (UInt32)newListHead, (UInt32 *)inList)); + + return oldListHead; +} + +void OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset) +{ + void * oldListHead; + void * newListHead = inNewLink; + void ** newLinkNextPtr = (void **) (((char *) inNewLink) + inOffset); + + do { + oldListHead = *inList; + *newLinkNextPtr = oldListHead; + } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, + (UInt32 *)inList)); +} + #endif /* !__ppc__ */ static UInt32 OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, UInt32 * value) @@ -145,37 +181,6 @@ Boolean OSTestAndClear(UInt32 bit, UInt8 * startAddress) return OSTestAndSetClear(bit, false, startAddress); } -void * OSDequeueAtomic(void ** inList, SInt32 inOffset) -{ - void * oldListHead; - void * newListHead; - - do { - oldListHead = *inList; - if (oldListHead == NULL) { - break; - } - - newListHead = *(void **) (((char *) oldListHead) + inOffset); - } while (! OSCompareAndSwap((UInt32)oldListHead, - (UInt32)newListHead, (UInt32 *)inList)); - - return oldListHead; -} - -void OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset) -{ - void * oldListHead; - void * newListHead = inNewLink; - void ** newLinkNextPtr = (void **) (((char *) inNewLink) + inOffset); - - do { - oldListHead = *inList; - *newLinkNextPtr = oldListHead; - } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, - (UInt32 *)inList)); -} - /* * silly unaligned versions */ diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp new file mode 100644 index 000000000..64a752091 --- /dev/null +++ b/libkern/gen/OSDebug.cpp @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +// NOTE: This file is only c++ so I can get static initialisers going +#include + +#include + +#include +#include +#include +#include + +#include // From bsd's libkern directory + +__BEGIN_DECLS +// From osmfk/kern/thread.h but considered to be private +extern vm_offset_t min_valid_stack_address(void); +extern vm_offset_t max_valid_stack_address(void); + +// From osfmk/kmod.c +extern void kmod_dump_log(vm_offset_t *addr, unsigned int cnt); +__END_DECLS + +static mutex_t *sOSReportLock = mutex_alloc(0); + +/* Report a message with a 4 entry backtrace - very slow */ +void +OSReportWithBacktrace(const char *str, ...) +{ + char buf[128]; + void *bt[9]; + const unsigned cnt = sizeof(bt) / sizeof(bt[0]); + va_list listp; + + // Ignore the our and our callers stackframes, skipping frames 0 & 1 + (void) OSBacktrace(bt, cnt); + + va_start(listp, str); + vsnprintf(buf, sizeof(buf), str, listp); + va_end(listp); + + mutex_lock(sOSReportLock); + { + printf("%s\nBacktrace %p %p %p %p %p %p %p\n", + buf, bt[2], bt[3], bt[4], bt[5], bt[6], bt[7], bt[8]); + kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2); + } + mutex_unlock(sOSReportLock); +} + +static vm_offset_t minstackaddr = min_valid_stack_address(); +static vm_offset_t maxstackaddr = max_valid_stack_address(); + +unsigned OSBacktrace(void **bt, unsigned maxAddrs) +{ + unsigned frame; + +#if __ppc__ + vm_offset_t stackptr, stackptr_prev; + const vm_offset_t * const mem = (vm_offset_t *) 0; + unsigned i = 0; + + __asm__ volatile("mflr %0" : "=r" (stackptr)); + bt[i++] = (void *) stackptr; + + __asm__ volatile("mr %0,r1" : "=r" (stackptr)); + for ( ; i < maxAddrs; i++) { + // Validate we have a reasonable stackptr + if ( !(minstackaddr <= stackptr && stackptr < maxstackaddr) + || (stackptr & 3)) + break; + + stackptr_prev = stackptr; + stackptr = mem[stackptr_prev >> 2]; + if ((stackptr_prev ^ stackptr) > 8 * 1024) // Sanity check + break; + + vm_offset_t addr = mem[(stackptr >> 2) + 2]; + if ((addr & 3) || (addr < 0x8000)) // More sanity checks + break; + bt[i] = (void *) addr; + } + frame = i; + + for ( ; i < maxAddrs; i++) + bt[i] = (void *) 0; +#elif 0 && __i386__ // Note that this should be ported for i386 + // This function is not safe, we should get this code ported appropriately + if (maxAddrs > 16) { + for (frame = 16; frame < maxAddrs; frame++) + bt[frame] = __builtin_return_address(frame); + maxAddrs = 16; + } + + switch(maxAddrs) { + case 15+1: bt[15] = __builtin_return_address(15); + case 14+1: bt[14] = __builtin_return_address(14); + case 13+1: bt[13] = __builtin_return_address(13); + case 12+1: bt[12] = __builtin_return_address(12); + case 11+1: bt[11] = __builtin_return_address(11); + case 10+1: bt[10] = __builtin_return_address(10); + case 9+1: bt[ 9] = __builtin_return_address( 9); + case 8+1: bt[ 8] = __builtin_return_address( 8); + case 7+1: bt[ 7] = __builtin_return_address( 7); + case 6+1: bt[ 6] = __builtin_return_address( 6); + case 5+1: bt[ 5] = __builtin_return_address( 5); + case 4+1: bt[ 4] = __builtin_return_address( 4); + case 3+1: bt[ 3] = __builtin_return_address( 3); + case 2+1: bt[ 2] = __builtin_return_address( 2); + case 1+1: bt[ 1] = __builtin_return_address( 1); + case 0+1: bt[ 0] = __builtin_return_address( 0); + case 0: default: break; + } + + frame = maxAddrs; +#else + // This function is not safe, we should get this code ported appropriately + if (maxAddrs > 16) { + for (frame = 16; frame < maxAddrs; frame++) + bt[frame] = 0; + maxAddrs = 16; + } + + switch (maxAddrs) { + case 15+1: bt[15] = __builtin_return_address(15); + case 14+1: bt[14] = __builtin_return_address(14); + case 13+1: bt[13] = __builtin_return_address(13); + case 12+1: bt[12] = __builtin_return_address(12); + case 11+1: bt[11] = __builtin_return_address(11); + case 10+1: bt[10] = __builtin_return_address(10); + case 9+1: bt[ 9] = __builtin_return_address( 9); + case 8+1: bt[ 8] = __builtin_return_address( 8); + case 7+1: bt[ 7] = __builtin_return_address( 7); + case 6+1: bt[ 6] = __builtin_return_address( 6); + case 5+1: bt[ 5] = __builtin_return_address( 5); + case 4+1: bt[ 4] = __builtin_return_address( 4); + case 3+1: bt[ 3] = __builtin_return_address( 3); + case 2+1: bt[ 2] = __builtin_return_address( 2); + case 1+1: bt[ 1] = __builtin_return_address( 1); + case 0+1: bt[ 0] = __builtin_return_address( 0); + case 0: + default : + break; + } + + frame = maxAddrs; +#endif + + return frame; +} diff --git a/libkern/i386/OSAtomic.s b/libkern/i386/OSAtomic.s index e7f13bf55..68b10523e 100644 --- a/libkern/i386/OSAtomic.s +++ b/libkern/i386/OSAtomic.s @@ -27,22 +27,11 @@ .globl _OSCompareAndSwap _OSCompareAndSwap: - #; this is _lame_, the project will not currently accept asm code that - #; requires anything beyond a 386, but that chip: - #; - does not support MP - #; - does not support the cmpxchgl instruction - #; - does not support the lock meta-instruction - #; so what is a poor guy to do? comment it out... - pushl %edi - pushl %esi - movl 0+8+4(%esp),%eax #; oldValue - movl 4+8+4(%esp),%edi #; newValue - movl 8+8+4(%esp),%esi #; ptr + movl 4(%esp), %eax #; oldValue + movl 8(%esp), %edx #; newValue + movl 12(%esp), %ecx #; ptr lock - cmpxchgl %edi,0(%esi) #; CAS (eax is an implicit operand) - sete %al #; did CAS succeed? (TZ=1) - andl $0x000000ff,%eax #; clear out the high bytes (has to be an easier way...) - popl %esi - popl %edi + cmpxchgl %edx, 0(%ecx) #; CAS (eax is an implicit operand) + sete %al #; did CAS succeed? (TZ=1) + movzbl %al, %eax #; clear out the high bytes ret - diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index bd2b5547d..cf3af2a63 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -25,17 +25,30 @@ DATAFILES = \ OSAtomic.h \ OSBase.h \ OSByteOrder.h \ + OSDebug.h \ + OSMalloc.h \ OSReturn.h \ - OSTypes.h + OSTypes.h \ + locks.h \ + sysctl.h -INSTALL_MI_LIST = OSByteOrder.h OSReturn.h OSTypes.h +INSTALL_MI_LIST = OSByteOrder.h OSDebug.h OSReturn.h OSTypes.h INSTALL_MI_DIR = libkern EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_GEN_LIST = \ + version.h + EXPORT_MI_DIR = libkern +NEWVERS = $(SRCROOT)/config/newvers.pl + +version.h: version.h.template $(SRCROOT)/config/MasterVersion + @echo "Generating libkern/$@ from $<"; + install $(DATA_INSTALL_FLAGS) $< $@ + $(NEWVERS) $@; include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index 300bac7df..2b839fe77 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -49,7 +49,7 @@ extern Boolean OSCompareAndSwap( UInt32 oldValue, UInt32 newValue, UInt32 * addr /*! @function OSAddAtomic @abstract 32-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic function adds the specified amount to the value at the specified address and returns the result. + @discussion The OSAddAtomic function adds the specified amount to the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param amount The amount to add. @@ -60,7 +60,7 @@ extern SInt32 OSAddAtomic(SInt32 amount, SInt32 * address); /*! @function OSAddAtomic16 @abstract 16-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the result. + @discussion The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param amount The amount to add. @@ -71,7 +71,7 @@ extern SInt16 OSAddAtomic16(SInt32 amount, SInt16 * address); /*! @function OSAddAtomic8 @abstract 8-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the result. + @discussion The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param amount The amount to add. @@ -82,7 +82,7 @@ extern SInt8 OSAddAtomic8(SInt32 amount, SInt8 * address); /*! @function OSIncrementAtomic @abstract 32-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic function increments the value at the specified address by one and returns the value as it was before the change. + @discussion The OSIncrementAtomic function increments the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The 4-byte aligned address of the value to update atomically. @@ -92,7 +92,7 @@ extern SInt32 OSIncrementAtomic(SInt32 * address); /*! @function OSIncrementAtomic16 @abstract 16-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic16 function increments the value at the specified address by one and returns the value as it was before the change. + @discussion The OSIncrementAtomic16 function increments the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The 2-byte aligned address of the value to update atomically. @@ -102,7 +102,7 @@ extern SInt16 OSIncrementAtomic16(SInt16 * address); /*! @function OSIncrementAtomic8 @abstract 8-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic8 function increments the value at the specified address by one and returns the value as it was before the change. + @discussion The OSIncrementAtomic8 function increments the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The address of the value to update atomically. @@ -112,7 +112,7 @@ extern SInt8 OSIncrementAtomic8(SInt8 * address); /*! @function OSDecrementAtomic @abstract 32-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic function decrements the value at the specified address by one and returns the value as it was before the change. + @discussion The OSDecrementAtomic function decrements the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The 4-byte aligned address of the value to update atomically. @@ -122,7 +122,7 @@ extern SInt32 OSDecrementAtomic(SInt32 * address); /*! @function OSDecrementAtomic16 @abstract 16-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the value as it was before the change. + @discussion The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The 2-byte aligned address of the value to update atomically. @@ -132,7 +132,7 @@ extern SInt16 OSDecrementAtomic16(SInt16 * address); /*! @function OSDecrementAtomic8 @abstract 8-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the value as it was before the change. + @discussion The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param address The address of the value to update atomically. @@ -142,7 +142,7 @@ extern SInt8 OSDecrementAtomic8(SInt8 * address); /*! @function OSBitAndAtomic @abstract 32-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically and with the value. @@ -153,7 +153,7 @@ extern UInt32 OSBitAndAtomic(UInt32 mask, UInt32 * address); /*! @function OSBitAndAtomic16 @abstract 16-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic16 function logically ands the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitAndAtomic16 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically and with the value. @@ -164,7 +164,7 @@ extern UInt16 OSBitAndAtomic16(UInt32 mask, UInt16 * address); /*! @function OSBitAndAtomic8 @abstract 8-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic8 function logically ands the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitAndAtomic8 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically and with the value. @@ -175,7 +175,7 @@ extern UInt8 OSBitAndAtomic8(UInt32 mask, UInt8 * address); /*! @function OSBitOrAtomic @abstract 32-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitOrAtomic function logically ors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitOrAtomic function logically ors the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically or with the value. @@ -186,7 +186,7 @@ extern UInt32 OSBitOrAtomic(UInt32 mask, UInt32 * address); /*! @function OSBitOrAtomic16 @abstract 16-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitOrAtomic16 function logically ors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitOrAtomic16 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically or with the value. @@ -199,7 +199,7 @@ extern UInt16 OSBitOrAtomic16(UInt32 mask, UInt16 * address); @abstract 8-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitOrAtomic8 function logically ors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitOrAtomic8 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. @param mask The mask to logically or with the value. @param address The address of the value to update atomically. @result The value before the bitwise operation. */ @@ -210,7 +210,7 @@ extern UInt8 OSBitOrAtomic8(UInt32 mask, UInt8 * address); @abstract 32-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitXorAtomic function logically xors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitXorAtomic function logically xors the bits of the specified mask into the value at the specified address and returns the original value. @param mask The mask to logically or with the value. @param address The 4-byte aligned address of the value to update atomically. @result The value before the bitwise operation. */ @@ -219,7 +219,7 @@ extern UInt32 OSBitXorAtomic(UInt32 mask, UInt32 * address); /*! @function OSBitXorAtomic16 @abstract 16-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitXorAtomic16 function logically xors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitXorAtomic16 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. @param mask The mask to logically or with the value. @@ -232,7 +232,7 @@ extern UInt16 OSBitXorAtomic16(UInt32 mask, UInt16 * address); @abstract 8-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitXorAtomic8 function logically xors the bits of the specified mask into the value at the specified address and returns the result. + @discussion The OSBitXorAtomic8 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. @param mask The mask to logically or with the value. @param address The address of the value to update atomically. @result The value before the bitwise operation. */ diff --git a/libkern/libkern/OSBase.h b/libkern/libkern/OSBase.h index 0938bc7bc..2746f19af 100644 --- a/libkern/libkern/OSBase.h +++ b/libkern/libkern/OSBase.h @@ -37,6 +37,8 @@ __BEGIN_DECLS +#ifdef KERNEL_PRIVATE + OS_INLINE uint64_t __OSAbsoluteTime( @@ -53,7 +55,31 @@ __OSAbsoluteTimePtr( return ((uint64_t *)abstime); } +#define AbsoluteTime_to_scalar(x) (*(uint64_t *)(x)) + +/* t1 < = > t2 */ +#define CMP_ABSOLUTETIME(t1, t2) \ + (AbsoluteTime_to_scalar(t1) > \ + AbsoluteTime_to_scalar(t2)? (int)+1 : \ + (AbsoluteTime_to_scalar(t1) < \ + AbsoluteTime_to_scalar(t2)? (int)-1 : 0)) + +/* t1 += t2 */ +#define ADD_ABSOLUTETIME(t1, t2) \ + (AbsoluteTime_to_scalar(t1) += \ + AbsoluteTime_to_scalar(t2)) + +/* t1 -= t2 */ +#define SUB_ABSOLUTETIME(t1, t2) \ + (AbsoluteTime_to_scalar(t1) -= \ + AbsoluteTime_to_scalar(t2)) + +#define ADD_ABSOLUTETIME_TICKS(t1, ticks) \ + (AbsoluteTime_to_scalar(t1) += \ + (int32_t)(ticks)) + +#endif /* KERNEL_PRIVATE */ + __END_DECLS #endif /* _OS_OSBASE_H */ - diff --git a/libkern/libkern/OSByteOrder.h b/libkern/libkern/OSByteOrder.h index 86c5de6e3..c64a3aa05 100644 --- a/libkern/libkern/OSByteOrder.h +++ b/libkern/libkern/OSByteOrder.h @@ -104,17 +104,17 @@ OSHostByteOrder(void) { OS_INLINE uint16_t OSReadBigInt16( - volatile void * base, + const volatile void * base, uintptr_t offset ) { - return *(volatile uint16_t *)((int8_t *)base + offset); + return *(volatile uint16_t *)((uintptr_t)base + offset); } OS_INLINE uint32_t OSReadBigInt32( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -124,7 +124,7 @@ OSReadBigInt32( OS_INLINE uint64_t OSReadBigInt64( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -334,7 +334,7 @@ OSSwapBigToHostInt64( OS_INLINE uint16_t OSReadBigInt16( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -344,7 +344,7 @@ OSReadBigInt16( OS_INLINE uint32_t OSReadBigInt32( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -354,7 +354,7 @@ OSReadBigInt32( OS_INLINE uint64_t OSReadBigInt64( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -401,7 +401,7 @@ OSWriteBigInt64( OS_INLINE uint16_t OSReadLittleInt16( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -411,7 +411,7 @@ OSReadLittleInt16( OS_INLINE uint32_t OSReadLittleInt32( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -421,7 +421,7 @@ OSReadLittleInt32( OS_INLINE uint64_t OSReadLittleInt64( - volatile void * base, + const volatile void * base, uintptr_t offset ) { diff --git a/bsd/machine/trap.h b/libkern/libkern/OSDebug.h similarity index 71% rename from bsd/machine/trap.h rename to libkern/libkern/OSDebug.h index 6dd0fe1c7..4435dcdc3 100644 --- a/bsd/machine/trap.h +++ b/libkern/libkern/OSDebug.h @@ -20,19 +20,23 @@ * @APPLE_LICENSE_HEADER_END@ */ /* - * Copyright 1995 NeXT Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * HISTORY + * */ -#ifndef _BSD_MACHINE_TRAP_H_ -#define _BSD_MACHINE_TRAP_H_ +#ifndef _OS_OSDEBBUG_H +#define _OS_OSDEBBUG_H + +#include + +__BEGIN_DECLS -#if defined (__ppc__) -#include "ppc/trap.h" -#elif defined (__i386__) -#include "i386/trap.h" -#else -#error architecture not supported -#endif +/* Report a message with a 4 entry backtrace - very slow */ +extern void OSReportWithBacktrace(const char *str, ...); +extern unsigned OSBacktrace(void **bt, unsigned maxAddrs); +__END_DECLS -#endif /* _BSD_MACHINE_TRAP_H_ */ +#endif /* !_OS_OSDEBBUG_H */ diff --git a/libkern/libkern/OSMalloc.h b/libkern/libkern/OSMalloc.h new file mode 100644 index 000000000..0390a7703 --- /dev/null +++ b/libkern/libkern/OSMalloc.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef LIBKERN_OSMALLOC_h +#define LIBKERN_OSMALLOC_h + +#include + +__BEGIN_DECLS + +#include +#ifdef MACH_KERNEL_PRIVATE +#include +#endif + +#ifdef MACH_KERNEL_PRIVATE + +#define OSMT_MAX_NAME 64 + +typedef struct _OSMallocTag_ { + queue_chain_t OSMT_link; + uint32_t OSMT_refcnt; + uint32_t OSMT_state; + uint32_t OSMT_attr; + char OSMT_name[OSMT_MAX_NAME]; +} *OSMallocTag; + +#define OSMT_VALID_MASK 0xFFFF0000 +#define OSMT_VALID 0xDEAB0000 +#define OSMT_RELEASED 0x00000001 + +#define OSMT_ATTR_PAGEABLE 0x01 +#else +typedef struct __OSMallocTag__ *OSMallocTag, *OSMallocTag_t; +#endif + +#define OSMT_DEFAULT 0x00 +#define OSMT_PAGEABLE 0x01 + +extern OSMallocTag OSMalloc_Tagalloc(const char * str, uint32_t flags); + +extern void OSMalloc_Tagfree(OSMallocTag tag); + +extern void * OSMalloc(uint32_t size, OSMallocTag tag); + +extern void * OSMalloc_nowait(uint32_t size, OSMallocTag tag); + +extern void * OSMalloc_noblock(uint32_t size, OSMallocTag tag); + +extern void OSFree(void * addr, uint32_t size, OSMallocTag tag); + +__END_DECLS + +#endif /* LIBKERN_OSMALLOC_h */ diff --git a/libkern/libkern/c++/OSArray.h b/libkern/libkern/c++/OSArray.h index 03630ed7a..f70cdba68 100644 --- a/libkern/libkern/c++/OSArray.h +++ b/libkern/libkern/c++/OSArray.h @@ -206,14 +206,14 @@ public: /*! @function isEqualTo - @abstract A member function which tests the equality of two OSArray objects. + @abstract A member function which tests the equality of the values of two OSArray objects. @param anArray The array object being compared against the receiver. @result Returns true if the two arrays are equivalent or false otherwise. */ virtual bool isEqualTo(const OSArray *anArray) const; /*! @function isEqualTo - @abstract A member function which compares the equality of the receiving array to an arbitrary object. + @abstract A member function which compares the equality of the values of a receiving array to an arbitrary object. @param anObject The object to be compared against the receiver. @result Returns true if the two objects are equivalent, that is they are either the same object or they are both arrays containing the same or equivalent objects, or false otherwise. */ @@ -235,7 +235,7 @@ public: /*! @function getNextIndexOfObject - @abstract A member function which returns the next array index of an object, at or beyond the supplied index. + @abstract A member function which searches the array for the next instance of a specific object, at or beyond the supplied index. @result Returns the next index of the object in the array or (-1) if none is found. */ virtual unsigned int getNextIndexOfObject(const OSMetaClassBase * anObject, @@ -249,6 +249,24 @@ public: */ virtual bool serialize(OSSerialize *s) const; + /*! + @function setOptions + @abstract This function is used to recursively set option bits in this array and all child collections. + @param options Set the (options & mask) bits. + @param mask The mask of bits which need to be set, 0 to get the current value. + @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. + */ + virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); + + /*! + @function copyCollection + @abstract Do a deep copy of this array and its collections. + @discussion This function copies this array included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. + @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. + @result The newly copied collecton or 0 if insufficient memory + */ + OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSMetaClassDeclareReservedUnused(OSArray, 0); OSMetaClassDeclareReservedUnused(OSArray, 1); OSMetaClassDeclareReservedUnused(OSArray, 2); diff --git a/libkern/libkern/c++/OSBoolean.h b/libkern/libkern/c++/OSBoolean.h index 3934cfeae..58b3e36e6 100644 --- a/libkern/libkern/c++/OSBoolean.h +++ b/libkern/libkern/c++/OSBoolean.h @@ -40,8 +40,9 @@ protected: bool value; /*D @function taggedRelease - @abstract Override tagged release mechanism. - @param when Unused. */ + @abstract Overrides tagged release mechanism. + @param when Unused. + */ virtual void taggedRelease(const void *tag, const int when) const; public: @@ -103,7 +104,7 @@ public: /*! @function serialize - @abstract A member function which archives the receiver. + @abstract A member function that archives the receiver. @param s The OSSerialize object. @result Returns true if serialization was successful, false if not. */ @@ -120,14 +121,14 @@ public: }; /*! - @defined kOSBooleanTrue + @const kOSBooleanTrue @abstract The OSBoolean constant for "true". @discussion The OSBoolean constant for "true". The object does not need to be retained or released. Comparisons of the form (booleanObject == kOSBooleanTrue) are acceptable and would be equivalent to (booleanObject->getValue() == true). */ extern OSBoolean * const & kOSBooleanTrue; /*! - @defined kOSBooleanFalse + @const kOSBooleanFalse @abstract The OSBoolean constant for "false". @discussion The OSBoolean constant for "false". The object does not need to be retained or released. Comparisons of the form (booleanObject == kOSBooleanFalse) are acceptable and would be equivalent to (booleanObject->getValue() == false). */ diff --git a/libkern/libkern/c++/OSCollection.h b/libkern/libkern/c++/OSCollection.h index 1b3a22600..ef8f1da90 100644 --- a/libkern/libkern/c++/OSCollection.h +++ b/libkern/libkern/c++/OSCollection.h @@ -26,6 +26,8 @@ #include +class OSDictionary; + /*! @class OSCollection @abstract Abstract super class for all collections. @@ -36,17 +38,19 @@ class OSCollection : public OSObject { friend class OSCollectionIterator; - OSDeclareAbstractStructors(OSCollection) + OSDeclareAbstractStructors(OSCollection); + struct ExpansionData { }; + protected: unsigned int updateStamp; - struct ExpansionData { }; - - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; +private: + /* Reserved for future use. (Internal use only) */ + // ExpansionData *reserved; + unsigned int fOptions; +protected: // Member functions used by the OSCollectionIterator class. /* @function iteratorSize @@ -84,11 +88,16 @@ protected: virtual bool init(); public: + enum { + kImmutable = 0x00000001, + kMASK = (unsigned) -1 + }; + /* @function haveUpdated @abstract A member function to track of all updates to the collection. */ - void haveUpdated() { updateStamp++; }; + void haveUpdated(); /* @function getCount @@ -130,8 +139,84 @@ public: */ virtual void flushCollection() = 0; - OSMetaClassDeclareReservedUnused(OSCollection, 0); - OSMetaClassDeclareReservedUnused(OSCollection, 1); + /*! + @function setOptions + @abstract This function is used to recursively set option bits in this collection and all child collections. + @discussion setOptions is a recursive function but the OSCollection class itself does not know the structure of the particular collection. This means that all derived classes are expected to override this method and recurse if the old value of the option was NOT set, which is why the old value is returned. As this function is a reserved function override it is very multi purpose. It can be used to get & set the options, + @param options Set the (options & mask) bits. + @param mask The mask of bits which need to be set, 0 to get the current value. + @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. + */ + OSMetaClassDeclareReservedUsed(OSCollection, 0); + virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); + + /*! + @function copyCollection + @abstract Do a deep copy of a collection tree. + @discussion This function copies this collection and all of the contained collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. To a derive from OSConnection::copyCollection some code is required to be implemented in the derived class, below is the skeleton pseudo code to copy a collection. + +OSCollection * ::copyCollection(OSDictionary *inCycleDict) +{ + bool allocDict = !cycleDict; + OSCollection *ret = 0; + *newMyColl = 0; + + if (allocDict) + cycleDict = OSDictionary::withCapacity(16); + if (!cycleDict) + return 0; + + do { + // Check to see if we already have a copy of the new dictionary + ret = super::copyCollection(cycleDict); + if (ret) + continue; + + // Your code goes here to copy your collection, + // see OSArray & OSDictionary for examples. + newMyColl = ::with(this); + if (!newMyColl) + continue; + + // Insert object into cycle Dictionary + cycleDict->setObject((const OSSymbol *) this, newMyColl); + + // Duplicate any collections in us + for (unsigned int i = 0; i < count; i++) { + OSObject *obj = getObject(i); + OSCollection *coll = OSDynamicCast(OSCollection, obj); + + if (coll) { + OSCollection *newColl = coll->copyCollection(cycleDict); + if (!newColl) + goto abortCopy; + + newMyColl->replaceObject(i, newColl); + newColl->release(); + }; + }; + + ret = newMyColl; + newMyColl = 0; + + } while (false); + +abortCopy: + if (newMyColl) + newMyColl->release(); + + if (allocDict) + cycleDict->release(); + + return ret; +} + + @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. + @result The newly copied collecton or 0 if insufficient memory + */ + virtual OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSMetaClassDeclareReservedUsed(OSCollection, 1); + OSMetaClassDeclareReservedUnused(OSCollection, 2); OSMetaClassDeclareReservedUnused(OSCollection, 3); OSMetaClassDeclareReservedUnused(OSCollection, 4); diff --git a/libkern/libkern/c++/OSDictionary.h b/libkern/libkern/c++/OSDictionary.h index f42c4b1d4..c09501cc4 100644 --- a/libkern/libkern/c++/OSDictionary.h +++ b/libkern/libkern/c++/OSDictionary.h @@ -308,6 +308,24 @@ public: */ virtual bool serialize(OSSerialize *s) const; + /*! + @function setOptions + @abstract This function is used to recursively set option bits in this dictionary and all child collections. + @param options Set the (options & mask) bits. + @param mask The mask of bits which need to be set, 0 to get the current value. + @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. + */ + virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); + + /*! + @function copyCollection + @abstract Do a deep copy of this dictionary and its collections. + @discussion This function copies this dictionary and all included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. + @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. + @result The newly copied collecton or 0 if insufficient memory + */ + OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSMetaClassDeclareReservedUnused(OSDictionary, 0); OSMetaClassDeclareReservedUnused(OSDictionary, 1); diff --git a/libkern/libkern/c++/OSLib.h b/libkern/libkern/c++/OSLib.h index d3b597dca..57d5a720c 100644 --- a/libkern/libkern/c++/OSLib.h +++ b/libkern/libkern/c++/OSLib.h @@ -41,7 +41,9 @@ __BEGIN_DECLS #include #include +#ifdef KERNEL_PRIVATE #include +#endif __END_DECLS diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index 3cbf9cf19..02982c302 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -25,6 +25,7 @@ #include #include +#include class OSMetaClass; class OSObject; @@ -84,6 +85,57 @@ public: OSMetaClassBase::checkTypeInst(inst, typeinst) +// Arcane evil code interprets a C++ pointer to function as specified in the +// -fapple-kext ABI, i.e. the gcc-2.95 generated code. IT DOES NOT ALLOW +// the conversion of functions that are from MULTIPLY inherited classes. + +typedef void (*_ptf_t)(void); + +static inline _ptf_t +_ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void)) +{ + union { + void (OSMetaClassBase::*fIn)(void); + struct { // Pointer to member function 2.95 + unsigned short fToff; + short fVInd; + union { + _ptf_t fPFN; + short fVOff; + } u; + } fptmf2; + } map; + + map.fIn = func; + if (map.fptmf2.fToff) + panic("Multiple inheritance is not supported"); + else if (map.fptmf2.fVInd < 0) { + // Not virtual, i.e. plain member func + return map.fptmf2.u.fPFN; + } else { + union { + const OSMetaClassBase *fObj; + _ptf_t **vtablep; + } u; + u.fObj = self; + + // Virtual member function so dereference vtable + return (*u.vtablep)[map.fptmf2.fVInd - 1]; + } +} + +/*! @function OSMemberFunctionCast + @abstract Convert a pointer to a member function to a c-style pointer to function. No warnings are generated. + @param type The type of pointer function desired. + @param self The this pointer of the object whose function you wish to cache. + @param func The pointer to member function itself, something like &Base::func. + @result A pointer to function of the given type. This function will panic if an attempt is made to call it with a multiply inherited class. +*/ + +#define OSMemberFunctionCast(cptrtype, self, func) \ + (cptrtype) OSMetaClassBase:: \ + _ptmf2ptf(self, (void (OSMetaClassBase::*)(void)) func) + protected: OSMetaClassBase(); virtual ~OSMetaClassBase(); @@ -246,6 +298,8 @@ private: @abstract Given an error code log an error string using printf */ static void logError(OSReturn result); +public: + /*! @function getMetaClassWithName @abstract Lookup a meta-class in the runtime type information system @param name Name of the desired class's meta-class. @@ -317,12 +371,6 @@ public: @result If success full return a handle to be used in later calls 0 otherwise. */ static void *preModLoad(const char *kmodName); -/*! @function failModLoad - @abstract Record an error during the loading of an kernel module. - @discussion As constructor's can't return errors nor can they through exceptions in embedded-c++ an indirect error mechanism is necessary. Check mod load returns a bool to indicate the current error state of the runtime type information system. During object construction a call to failModLoad will cause an error code to be recorded. Once an error has been set the continuing construction will be ignored until the end of the pre/post load. - @param error Code of the error. */ - static void failModLoad(OSReturn error); - /*! @function checkModLoad @abstract Check if the current load attempt is still OK. @param loadHandle Handle returned when a successful call to preModLoad is made. @@ -427,6 +475,11 @@ public: @abstract 'Get'ter for the super class. @result Pointer to superclass, chain ends with 0 for OSObject. */ const OSMetaClass *getSuperClass() const; + +/*! @function getKmodName + @abstract 'Get'ter for the name of the kmod. + @result OSSymbol representing the kmod name. */ + const OSSymbol *getKmodName() const; /*! @function getClassName @abstract 'Get'ter for class name. diff --git a/libkern/libkern/c++/OSNumber.h b/libkern/libkern/c++/OSNumber.h index aa32d0fcb..c72a91c98 100644 --- a/libkern/libkern/c++/OSNumber.h +++ b/libkern/libkern/c++/OSNumber.h @@ -58,6 +58,7 @@ public: /*! @function withNumber @abstract A static constructor function to create and initialize an instance of OSNumber with a given value represented as a simple c-string. + @discussion This function does not work on IOKit versions prior to 8.0 (prior to 10.4). For IOKit version 8.0 and later, it works but is limited to parsing unsigned 32 bit quantities The format of the c-string may be decimal, hexadecimal ("0x" prefix), binary ("0b" prefix, or octal ("0" prefix). @param value A c-string representing a numeric value. @param numberOfBits The number of bit required to represent the value. @result Returns an instance of OSNumber or 0 if an error occurred. diff --git a/libkern/libkern/c++/OSOrderedSet.h b/libkern/libkern/c++/OSOrderedSet.h index 4357aee37..b9e211939 100644 --- a/libkern/libkern/c++/OSOrderedSet.h +++ b/libkern/libkern/c++/OSOrderedSet.h @@ -247,6 +247,24 @@ public: virtual bool isEqualTo(const OSMetaClassBase *anObject) const; + /*! + @function setOptions + @abstract This function is used to recursively set option bits in this set and all child collections. + @param options Set the (options & mask) bits. + @param mask The mask of bits which need to be set, 0 to get the current value. + @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. + */ + virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); + + /*! + @function copyCollection + @abstract Do a deep copy of this ordered set and its collections. + @discussion This function copies this set and all included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. + @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. + @result The newly copied collecton or 0 if insufficient memory + */ + OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSMetaClassDeclareReservedUnused(OSOrderedSet, 0); OSMetaClassDeclareReservedUnused(OSOrderedSet, 1); OSMetaClassDeclareReservedUnused(OSOrderedSet, 2); diff --git a/libkern/libkern/c++/OSSet.h b/libkern/libkern/c++/OSSet.h index ee2467122..e01401e7d 100644 --- a/libkern/libkern/c++/OSSet.h +++ b/libkern/libkern/c++/OSSet.h @@ -251,6 +251,23 @@ public: */ virtual bool serialize(OSSerialize *s) const; + /*! + @function setOptions + @abstract This function is used to recursively set option bits in this set and all child collections. + @param options Set the (options & mask) bits. + @param mask The mask of bits which need to be set, 0 to get the current value. + @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. + */ + virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); + + /*! + @function copyCollection + @abstract Do a deep copy of this ordered set. + @discussion This function copies this set and all of included containers recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. + @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. + @result The newly copied collecton or 0 if insufficient memory + */ + OSCollection *copyCollection(OSDictionary *cycleDict = 0); OSMetaClassDeclareReservedUnused(OSSet, 0); OSMetaClassDeclareReservedUnused(OSSet, 1); diff --git a/libkern/libkern/i386/OSByteOrder.h b/libkern/libkern/i386/OSByteOrder.h index ea307d340..eb6576624 100644 --- a/libkern/libkern/i386/OSByteOrder.h +++ b/libkern/libkern/i386/OSByteOrder.h @@ -75,43 +75,43 @@ _OSSwapInt64( OS_INLINE uint16_t OSReadSwapInt16( - volatile void * base, + const volatile void * base, uintptr_t offset ) { uint16_t result; - result = *(uint16_t *)((uintptr_t)base + offset); + result = *(volatile uint16_t *)((uintptr_t)base + offset); return _OSSwapInt16(result); } OS_INLINE uint32_t OSReadSwapInt32( - volatile void * base, + const volatile void * base, uintptr_t offset ) { uint32_t result; - result = *(uint32_t *)((uintptr_t)base + offset); + result = *(volatile uint32_t *)((uintptr_t)base + offset); return _OSSwapInt32(result); } OS_INLINE uint64_t OSReadSwapInt64( - volatile void * base, + const volatile void * base, uintptr_t offset ) { - uint32_t * inp; + const volatile uint32_t * inp; union ullc { uint64_t ull; uint32_t ul[2]; } outv; - inp = (uint32_t *)((uintptr_t)base + offset); + inp = (const volatile uint32_t *)((uintptr_t)base + offset); outv.ul[0] = inp[1]; outv.ul[1] = inp[0]; outv.ul[0] = _OSSwapInt32(outv.ul[0]); @@ -129,7 +129,7 @@ OSWriteSwapInt16( uint16_t data ) { - *(uint16_t *)((uintptr_t)base + offset) = _OSSwapInt16(data); + *(volatile uint16_t *)((uintptr_t)base + offset) = _OSSwapInt16(data); } OS_INLINE @@ -140,7 +140,7 @@ OSWriteSwapInt32( uint32_t data ) { - *(uint32_t *)((uintptr_t)base + offset) = _OSSwapInt32(data); + *(volatile uint32_t *)((uintptr_t)base + offset) = _OSSwapInt32(data); } OS_INLINE @@ -151,7 +151,7 @@ OSWriteSwapInt64( uint64_t data ) { - *(uint64_t *)((uintptr_t)base + offset) = _OSSwapInt64(data); + *(volatile uint64_t *)((uintptr_t)base + offset) = _OSSwapInt64(data); } #endif /* ! _OS_OSBYTEORDERI386_H */ diff --git a/bsd/i386/user.h b/libkern/libkern/locks.h similarity index 79% rename from bsd/i386/user.h rename to libkern/libkern/locks.h index edc737230..7e012f8f7 100644 --- a/bsd/i386/user.h +++ b/libkern/libkern/locks.h @@ -1,14 +1,14 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ - * + * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. - * + * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -16,14 +16,19 @@ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * Copyright (C) 1989, NeXT, Inc. - * - * next/user.h * - * We can use the default definition of u, so this file is empty. + * @APPLE_LICENSE_HEADER_END@ */ +#ifndef LIBKERN_LOCKS_H +#define LIBKERN_LOCKS_H + +#include + +__BEGIN_DECLS + +#include + +__END_DECLS + +#endif /* LIBKERN_LOCKS_H */ diff --git a/libkern/libkern/ppc/OSByteOrder.h b/libkern/libkern/ppc/OSByteOrder.h index 3bda80d1b..3fa0081bc 100644 --- a/libkern/libkern/ppc/OSByteOrder.h +++ b/libkern/libkern/ppc/OSByteOrder.h @@ -41,7 +41,7 @@ OS_INLINE uint16_t OSReadSwapInt16( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -56,7 +56,7 @@ OSReadSwapInt16( OS_INLINE uint32_t OSReadSwapInt32( - volatile void * base, + const volatile void * base, uintptr_t offset ) { @@ -71,17 +71,17 @@ OSReadSwapInt32( OS_INLINE uint64_t OSReadSwapInt64( - volatile void * base, + const volatile void * base, uintptr_t offset ) { - uint64_t * inp; + const volatile uint64_t * inp; union ullc { uint64_t ull; uint32_t ul[2]; } outv; - inp = (uint64_t *)base; + inp = (const volatile uint64_t *)base; outv.ul[0] = OSReadSwapInt32(inp, offset + 4); outv.ul[1] = OSReadSwapInt32(inp, offset); return outv.ull; @@ -125,14 +125,14 @@ OSWriteSwapInt64( uint64_t data ) { - uint64_t * outp; - union ullc { + volatile uint64_t * outp; + volatile union ullc { uint64_t ull; uint32_t ul[2]; } *inp; - outp = (uint64_t *)base; - inp = (union ullc *)&data; + outp = (volatile uint64_t *)base; + inp = (volatile union ullc *)&data; OSWriteSwapInt32(outp, offset, inp->ul[1]); OSWriteSwapInt32(outp, offset + 4, inp->ul[0]); } diff --git a/libkern/libkern/sysctl.h b/libkern/libkern/sysctl.h new file mode 100644 index 000000000..c798bf3af --- /dev/null +++ b/libkern/libkern/sysctl.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef LIBKERN_SYSCTL_H +#define LIBKERN_SYSCTL_H + +#include + +__BEGIN_DECLS + +#include + +/* + * These are the support HW selectors for sysctlbyname. Parameters that are byte counts or frequencies are 64 bit numbers. + * All other parameters are 32 bit numbers. + * + * hw.memsize - The number of bytes of physical memory in the system. + * + * hw.ncpu - The maximum number of processors that could be available this boot. + * Use this value for sizing of static per processor arrays; i.e. processor load statistics. + * + * hw.activecpu - The number of processors currently available for executing threads. + * Use this number to determine the number threads to create in SMP aware applications. + * This number can change when power management modes are changed. + * + * hw.physicalcpu - The number of physical processors available in the current power management mode. + * hw.physicalcpu_max - The maximum number of physical processors that could be available this boot + * + * hw.logicalcpu - The number of logical processors available in the current power management mode. + * hw.logicalcpu_max - The maximum number of logical processors that could be available this boot + * + * hw.tbfrequency - This gives the time base frequency used by the OS and is the basis of all timing services. + * In general is is better to use mach's or higher level timing services, but this value + * is needed to convert the PPC Time Base registers to real time. + * + * hw.cpufrequency - These values provide the current, min and max cpu frequency. The min and max are for + * hw.cpufrequency_max - all power management modes. The current frequency is the max frequency in the current mode. + * hw.cpufrequency_min - All frequencies are in Hz. + * + * hw.busfrequency - These values provide the current, min and max bus frequency. The min and max are for + * hw.busfrequency_max - all power management modes. The current frequency is the max frequency in the current mode. + * hw.busfrequency_min - All frequencies are in Hz. + * + * hw.cputype - These values provide the mach-o cpu type and subtype. A complete list is in + * hw.cpusubtype - These values should be used to determine what processor family the running cpu is from so that + * the best binary can be chosen, or the best dynamic code generated. They should not be used + * to determine if a given processor feature is available. + * hw.cputhreadtype - This value will be present if the processor supports threads. Like hw.cpusubtype this selector + * should not be used to infer features, and only used to name the processors thread architecture. + * The values are defined in + * + * hw.byteorder - Gives the byte order of the processor. 4321 for big endian, 1234 for little. + * + * hw.pagesize - Gives the size in bytes of the pages used by the processor and VM system. + * + * hw.cachelinesize - Gives the size in bytes of the processor's cache lines. + * This value should be use to control the strides of loops that use cache control instructions + * like dcbz, dcbt or dcbst. + * + * hw.l1dcachesize - These values provide the size in bytes of the L1, L2 and L3 caches. If a cache is not present + * hw.l1icachesize - then the selector will return and error. + * hw.l2cachesize - + * hw.l3cachesize - + * + * + * These are the selectors for optional processor features. Selectors that return errors are not support on the system. + * Supported features will return 1 if they are recommended or 0 if they are supported but are not expected to help performance. + * Future versions of these selectors may return larger values as necessary so it is best to test for non zero. + * + * hw.optional.floatingpoint - Floating Point Instructions + * hw.optional.altivec - AltiVec Instructions + * hw.optional.graphicsops - Graphics Operations + * hw.optional.64bitops - 64-bit Instructions + * hw.optional.fsqrt - HW Floating Point Square Root Instruction + * hw.optional.stfiwx - Store Floating Point as Integer Word Indexed Instructions + * hw.optional.dcba - Data Cache Block Allocate Instruction + * hw.optional.datastreams - Data Streams Instructions + * hw.optional.dcbtstreams - Data Cache Block Touch Steams Instruction Form + * + */ + +/* + * Sysctl handling + */ +int sysctlbyname(const char *, void *, size_t *, void *, size_t); + +__END_DECLS + +#endif /* LIBKERN_SYSCTL_H */ diff --git a/libkern/libkern/version.h.template b/libkern/libkern/version.h.template new file mode 100644 index 000000000..7f2569d4c --- /dev/null +++ b/libkern/libkern/version.h.template @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef LIBKERN_VERSION_H +#define LIBKERN_VERSION_H + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Kernel versions conform to kext version strings, as described in: + * http://developer.apple.com/technotes/tn/tn1132.html + */ + +/* VERSION_MAJOR, version_major is an integer that represents that major version + * of the kernel + */ +#define VERSION_MAJOR ###KERNEL_VERSION_MAJOR### +extern const int version_major; + +/* VERSION_MINOR, version_minor is an integer that represents the minor version + * of the kernel + */ +#define VERSION_MINOR ###KERNEL_VERSION_MINOR### +extern const int version_minor; + +/* VERSION_VARIANT, version_variant is a string that contains the revision, + * stage, and prerelease level of the kernel + */ +#define VERSION_VARIANT "###KERNEL_VERSION_VARIANT###" +extern const char version_variant[]; + +/* VERSION_REVISION, version_revision is an integer that represents the revision + * of the kernel + */ +#define VERSION_REVISION ###KERNEL_VERSION_REVISION### +extern const int version_revision; + +/* VERSION_STAGE, version_stage, is an integer set to one of the following: */ +#define VERSION_STAGE_DEV 0x20 +#define VERSION_STAGE_ALPHA 0x40 +#define VERSION_STAGE_BETA 0x60 +#define VERSION_STAGE_RELEASE 0x80 +#define VERSION_STAGE ###KERNEL_VERSION_STAGE### +extern const int version_stage; + +/* VERSION_PRERELEASE_LEVEL, version_prerelease_level, is an integer sequence + * number to distinguish between pre-release builds + */ +#define VERSION_PRERELEASE_LEVEL ###KERNEL_VERSION_PRERELEASE_LEVEL### +extern const int version_prerelease_level; + +/* OSTYPE, ostype, is a string as returned by uname -s */ +#define OSTYPE "Darwin" +extern const char ostype[]; + +/* OSRELEASE, osrelease, is a string as returned by uname -r */ +#define OSRELEASE "###KERNEL_VERSION_LONG###" +extern const char osrelease[]; + +/* osbuilder is a string as returned by uname -r */ +extern const char osbuilder[]; + +/* version is a string of the following form, as returned by uname -v: + * "Darwin Kernel Version : ; :" + */ + +extern const char version[]; + + +#if defined(__cplusplus) +} +#endif + +#endif /* LIBKERN_VERSION_H */ diff --git a/libkern/mach-o/loader.h b/libkern/mach-o/loader.h index 277b2b1e2..59450ff88 100644 --- a/libkern/mach-o/loader.h +++ b/libkern/mach-o/loader.h @@ -45,22 +45,38 @@ #include /* - * The mach header appears at the very beginning of the object file. + * XXX historically, we have not included this header. Continue to not do so. + * + * #include + */ + +/* + * The mach header appears at the very beginning of the object file; it + * is the same for both 32-bit and 64-bit architectures. */ struct mach_header { - unsigned long magic; /* mach magic number identifier */ + uint32_t magic; /* mach magic number identifier */ cpu_type_t cputype; /* cpu specifier */ cpu_subtype_t cpusubtype; /* machine specifier */ - unsigned long filetype; /* type of file */ - unsigned long ncmds; /* number of load commands */ - unsigned long sizeofcmds; /* the size of all the load commands */ - unsigned long flags; /* flags */ + uint32_t filetype; /* type of file */ + uint32_t ncmds; /* number of load commands */ + uint32_t sizeofcmds; /* the size of all the load commands */ + uint32_t flags; /* flags */ }; -/* Constant for the magic field of the mach_header */ +/* Constant for the magic field of the mach_header (32-bit architectures) */ #define MH_MAGIC 0xfeedface /* the mach magic number */ #define MH_CIGAM NXSwapInt(MH_MAGIC) +/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ +#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ +#define MH_CIGAM_64 NXSwapInt(MH_MAGIC_64) + +/* Constants for the cmd field of new load commands, the type */ +#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ +#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ + + /* * The layout of the file depends on the filetype. For all but the MH_OBJECT * file type the segments are padded out and aligned on a segment alignment @@ -117,7 +133,9 @@ struct mach_header { * of the particular load command structure plus anything that follows it that * is a part of the load command (i.e. section structures, strings, etc.). To * advance to the next load command the cmdsize can be added to the offset or - * pointer of the current load command. The cmdsize MUST be a multiple of + * pointer of the current load command. The cmdsize for 32-bit architectures + * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple + * of 8 bytes (these are forever the maximum alignment of any load commands). * sizeof(long) (this is forever the maximum alignment of any load commands). * The padded bytes must be zero. All tables in the object file must also * follow these rules so the file can be memory mapped. Otherwise the pointers @@ -173,7 +191,7 @@ union lc_str { * section structures directly follow the segment command and their size is * reflected in cmdsize. */ -struct segment_command { +struct segment_command { /* for 32-bit architectures */ unsigned long cmd; /* LC_SEGMENT */ unsigned long cmdsize; /* includes sizeof section structs */ char segname[16]; /* segment name */ @@ -187,6 +205,27 @@ struct segment_command { unsigned long flags; /* flags */ }; +/* + * The 64-bit segment load command indicates that a part of this file is to be + * mapped into a 64-bit task's address space. If the 64-bit segment has + * sections then section_64 structures directly follow the 64-bit segment + * command and their size is reflected in cmdsize. + */ +struct segment_command_64 { /* for 64-bit architectures */ + uint32_t cmd; /* LC_SEGMENT_64 */ + uint32_t cmdsize; /* includes sizeof section_64 structs */ + char segname[16]; /* segment name */ + uint64_t vmaddr; /* memory address of this segment */ + uint64_t vmsize; /* memory size of this segment */ + uint32_t fileoff; /* file offset of this segment */ + uint32_t filesize; /* amount to map from the file */ + vm_prot_t maxprot; /* maximum VM protection */ + vm_prot_t initprot; /* initial VM protection */ + uint32_t nsects; /* number of sections in segment */ + uint32_t flags; /* flags */ +}; + + /* Constants for the flags field of the segment_command */ #define SG_HIGHVM 0x1 /* the file contents for this segment is for the high part of the VM space, the low part @@ -206,7 +245,9 @@ struct segment_command { * and load commands of the object file before it's first section. The zero * fill sections are always last in their segment (in all formats). This * allows the zeroed segment padding to be mapped into memory where zero fill - * sections might be. + * sections might be. The gigabyte zero fill sections, those with the section + * type S_GB_ZEROFILL, can only be in a segment with sections of this type. + * These segments are then placed after all other segments. * * The MH_OBJECT format has all of it's sections in one segment for * compactness. There is no padding to a specified segment boundary and the @@ -223,7 +264,7 @@ struct segment_command { * fields of the section structure for mach object files is described in the * header file . */ -struct section { +struct section { /* for 32-bit architectures */ char sectname[16]; /* name of this section */ char segname[16]; /* segment this section goes in */ unsigned long addr; /* memory address of this section */ @@ -237,6 +278,22 @@ struct section { unsigned long reserved2; /* reserved */ }; +struct section_64 { /* for 64-bit architectures */ + char sectname[16]; /* name of this section */ + char segname[16]; /* segment this section goes in */ + uint64_t addr; /* memory address of this section */ + uint64_t size; /* size in bytes of this section */ + uint32_t offset; /* file offset of this section */ + uint32_t align; /* section alignment (power of 2) */ + uint32_t reloff; /* file offset of relocation entries */ + uint32_t nreloc; /* number of relocation entries */ + uint32_t flags; /* flags (section type and attributes)*/ + uint32_t reserved1; /* reserved (for offset or index) */ + uint32_t reserved2; /* reserved (for count or sizeof) */ + uint32_t reserved3; /* reserved */ +}; + + /* * The flags field of a section structure is separated into two parts a section * type and section attributes. The section types are mutually exclusive (it @@ -666,6 +723,34 @@ struct dylib_module { objc_module_info_size; /* the (__OBJC,__module_info) section */ }; +/* a 64-bit module table entry */ +struct dylib_module_64 { + uint32_t module_name; /* the module name (index into string table) */ + + uint32_t iextdefsym; /* index into externally defined symbols */ + uint32_t nextdefsym; /* number of externally defined symbols */ + uint32_t irefsym; /* index into reference symbol table */ + uint32_t nrefsym; /* number of reference symbol table entries */ + uint32_t ilocalsym; /* index into symbols for local symbols */ + uint32_t nlocalsym; /* number of local symbols */ + + uint32_t iextrel; /* index into external relocation entries */ + uint32_t nextrel; /* number of external relocation entries */ + + uint32_t iinit_iterm; /* low 16 bits are the index into the init + section, high 16 bits are the index into + the term section */ + uint32_t ninit_nterm; /* low 16 bits are the number of init section + entries, high 16 bits are the number of + term section entries */ + + uint32_t /* for this module size of the */ + objc_module_info_size; /* (__OBJC,__module_info) section */ + uint64_t /* for this module address of the start of */ + objc_module_info_addr; /* the (__OBJC,__module_info) section */ +}; + + /* * The entries in the reference symbol table are used when loading the module * (both by the static and dynamic link editors) and if the module is unloaded diff --git a/libkern/mach-o/mach_header.h b/libkern/mach-o/mach_header.h index 4d14d1380..31daf7349 100644 --- a/libkern/mach-o/mach_header.h +++ b/libkern/mach-o/mach_header.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,7 +22,14 @@ /* * File: kern/mach_header.h * - * Definitions for accessing mach-o headers. + * Definitions for accessing mach-o headers. This header wraps the + * routines defined in osfmk/mach-o/mach_header.c; this is made clear + * by the existance of the getsectcmdsymtabfromheader() prototype. + * + * NOTE: The functions prototyped by this header only operate againt + * 32 bit mach headers. Many of these functions imply the + * currently running kernel, and cannot be used against mach + * headers other than that of the currently running kernel. * * HISTORY * 29-Jan-92 Mike DeMoney (mike@next.com) @@ -46,17 +53,17 @@ struct segment_command *nextseg(struct segment_command *sgp); struct segment_command *nextsegfromheader( struct mach_header *header, struct segment_command *seg); -struct segment_command *getsegbyname(char *seg_name); +struct segment_command *getsegbyname(const char *seg_name); struct segment_command *getsegbynamefromheader( struct mach_header *header, - char *seg_name); -void *getsegdatafromheader(struct mach_header *, char *, int *); -struct section *getsectbyname(char *seg_name, char *sect_name); + const char *seg_name); +void *getsegdatafromheader(struct mach_header *, const char *, int *); +struct section *getsectbyname(const char *seg_name, const char *sect_name); struct section *getsectbynamefromheader( struct mach_header *header, - char *seg_name, - char *sect_name); -void *getsectdatafromheader(struct mach_header *, char *, char *, int *); + const char *seg_name, + const char *sect_name); +void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); struct section *firstsect(struct segment_command *sgp); struct section *nextsect(struct segment_command *sgp, struct section *sp); struct fvmlib_command *fvmlib(void); diff --git a/libkern/ppc/OSAtomic.s b/libkern/ppc/OSAtomic.s index b0d4afb6f..f1834d998 100644 --- a/libkern/ppc/OSAtomic.s +++ b/libkern/ppc/OSAtomic.s @@ -50,8 +50,14 @@ $0: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; /* -int OSCompareAndSwap( UInt32 oldVal, UInt32 newVal, UInt32 * addr ) -This is now an alias to hw_compare_and_store, see xnu/libkern/Makefile +int OSCompareAndSwap( UInt32 oldVal, UInt32 newVal, UInt32 * addr ) + This is now an alias to hw_compare_and_store, see xnu/libkern/Makefile. + +void * OSDequeueAtomic(void ** inList, SInt32 inOffset) + This is also aliased, to hw_dequeue_atomic. + +void OSEnqueueAtomic(void ** inList, void * inNewLink, SInt32 inOffset) + This is aliased to hw_queue_atomic. */ /* diff --git a/libkern/stdio/scanf.c b/libkern/stdio/scanf.c new file mode 100644 index 000000000..99c5edbaa --- /dev/null +++ b/libkern/stdio/scanf.c @@ -0,0 +1,660 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#if 0 /* XXX coming soon */ +#include +#else +static inline int +isspace(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); +} +#endif +#include +#include +#include +#include + +#define BUF 32 /* Maximum length of numeric string. */ + +/* + * Flags used during conversion. + */ +#define LONG 0x01 /* l: long or double */ +#define SHORT 0x04 /* h: short */ +#define SUPPRESS 0x08 /* *: suppress assignment */ +#define POINTER 0x10 /* p: void * (as hex) */ +#define NOSKIP 0x20 /* [ or c: do not skip blanks */ +#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ +#define SHORTSHORT 0x4000 /* hh: char */ +#define UNSIGNED 0x8000 /* %[oupxX] conversions */ + +/* + * The following are used in numeric conversions only: + * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; + * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. + */ +#define SIGNOK 0x40 /* +/- is (still) legal */ +#define NDIGITS 0x80 /* no digits detected */ + +#define DPTOK 0x100 /* (float) decimal point is still legal */ +#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ + +#define PFXOK 0x100 /* 0x prefix is (still) legal */ +#define NZDIGITS 0x200 /* no zero digits detected */ + +/* + * Conversion types. + */ +#define CT_CHAR 0 /* %c conversion */ +#define CT_CCL 1 /* %[...] conversion */ +#define CT_STRING 2 /* %s conversion */ +#define CT_INT 3 /* %[dioupxX] conversion */ + +static const u_char *__sccl(char *, const u_char *); + +int +sscanf(const char *ibuf, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = vsscanf(ibuf, fmt, ap); + va_end(ap); + return(ret); +} + +int +vsscanf(const char *inp, char const *fmt0, va_list ap) +{ + int inr; + const u_char *fmt = (const u_char *)fmt0; + int c; /* character from format, or conversion */ + size_t width; /* field width, or 0 */ + char *p; /* points into all kinds of strings */ + int n; /* handy integer */ + int flags; /* flags as defined above */ + char *p0; /* saves original value of p when necessary */ + int nassigned; /* number of fields assigned */ + int nconversions; /* number of conversions */ + int nread; /* number of characters consumed from fp */ + int base; /* base argument to conversion function */ + char ccltab[256]; /* character class table for %[...] */ + char buf[BUF]; /* buffer for numeric conversions */ + + /* `basefix' is used to avoid `if' tests in the integer scanner */ + static short basefix[17] = + { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + + inr = strlen(inp); + + nassigned = 0; + nconversions = 0; + nread = 0; + base = 0; /* XXX just to keep gcc happy */ + for (;;) { + c = *fmt++; + if (c == 0) + return (nassigned); + if (isspace(c)) { + while (inr > 0 && isspace(*inp)) + nread++, inr--, inp++; + continue; + } + if (c != '%') + goto literal; + width = 0; + flags = 0; + /* + * switch on the format. continue if done; + * break once format type is derived. + */ +again: c = *fmt++; + switch (c) { + case '%': +literal: + if (inr <= 0) + goto input_failure; + if (*inp != c) + goto match_failure; + inr--, inp++; + nread++; + continue; + + case '*': + flags |= SUPPRESS; + goto again; + case 'l': + if (flags & LONG) { + flags &= ~LONG; + flags |= LONGLONG; + } else + flags |= LONG; + goto again; + case 'q': + flags |= LONGLONG; /* not quite */ + goto again; + case 'h': + if (flags & SHORT) { + flags &= ~SHORT; + flags |= SHORTSHORT; + } else + flags |= SHORT; + goto again; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + width = width * 10 + c - '0'; + goto again; + + /* + * Conversions. + */ + case 'd': + c = CT_INT; + base = 10; + break; + + case 'i': + c = CT_INT; + base = 0; + break; + + case 'o': + c = CT_INT; + flags |= UNSIGNED; + base = 8; + break; + + case 'u': + c = CT_INT; + flags |= UNSIGNED; + base = 10; + break; + + case 'X': + case 'x': + flags |= PFXOK; /* enable 0x prefixing */ + c = CT_INT; + flags |= UNSIGNED; + base = 16; + break; + + case 's': + c = CT_STRING; + break; + + case '[': + fmt = __sccl(ccltab, fmt); + flags |= NOSKIP; + c = CT_CCL; + break; + + case 'c': + flags |= NOSKIP; + c = CT_CHAR; + break; + + case 'p': /* pointer format is like hex */ + flags |= POINTER | PFXOK; + c = CT_INT; + flags |= UNSIGNED; + base = 16; + break; + + case 'n': + nconversions++; + if (flags & SUPPRESS) /* ??? */ + continue; + if (flags & SHORTSHORT) + *va_arg(ap, char *) = nread; + else if (flags & SHORT) + *va_arg(ap, short *) = nread; + else if (flags & LONG) + *va_arg(ap, long *) = nread; + else if (flags & LONGLONG) + *va_arg(ap, long long *) = nread; + else + *va_arg(ap, int *) = nread; + continue; + } + + /* + * We have a conversion that requires input. + */ + if (inr <= 0) + goto input_failure; + + /* + * Consume leading white space, except for formats + * that suppress this. + */ + if ((flags & NOSKIP) == 0) { + while (isspace(*inp)) { + nread++; + if (--inr > 0) + inp++; + else + goto input_failure; + } + /* + * Note that there is at least one character in + * the buffer, so conversions that do not set NOSKIP + * can no longer result in an input failure. + */ + } + + /* + * Do the conversion. + */ + switch (c) { + + case CT_CHAR: + /* scan arbitrary characters (sets NOSKIP) */ + if (width == 0) + width = 1; + if (flags & SUPPRESS) { + size_t sum = 0; + for (;;) { + if ((n = inr) < (int)width) { + sum += n; + width -= n; + inp += n; + if (sum == 0) + goto input_failure; + break; + } else { + sum += width; + inr -= width; + inp += width; + break; + } + } + nread += sum; + } else { + bcopy(inp, va_arg(ap, char *), width); + inr -= width; + inp += width; + nread += width; + nassigned++; + } + nconversions++; + break; + + case CT_CCL: + /* scan a (nonempty) character class (sets NOSKIP) */ + if (width == 0) + width = (size_t)~0; /* `infinity' */ + /* take only those things in the class */ + if (flags & SUPPRESS) { + n = 0; + while (ccltab[(unsigned char)*inp]) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (n == 0) + goto input_failure; + break; + } + } + if (n == 0) + goto match_failure; + } else { + p0 = p = va_arg(ap, char *); + while (ccltab[(unsigned char)*inp]) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) { + if (p == p0) + goto input_failure; + break; + } + } + n = p - p0; + if (n == 0) + goto match_failure; + *p = 0; + nassigned++; + } + nread += n; + nconversions++; + break; + + case CT_STRING: + /* like CCL, but zero-length string OK, & no NOSKIP */ + if (width == 0) + width = (size_t)~0; + if (flags & SUPPRESS) { + n = 0; + while (!isspace(*inp)) { + n++, inr--, inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + nread += n; + } else { + p0 = p = va_arg(ap, char *); + while (!isspace(*inp)) { + inr--; + *p++ = *inp++; + if (--width == 0) + break; + if (inr <= 0) + break; + } + *p = 0; + nread += p - p0; + nassigned++; + } + nconversions++; + continue; + + case CT_INT: + /* scan an integer as if by the conversion function */ +#ifdef hardway + if (width == 0 || width > sizeof(buf) - 1) + width = sizeof(buf) - 1; +#else + /* size_t is unsigned, hence this optimisation */ + if (--width > sizeof(buf) - 2) + width = sizeof(buf) - 2; + width++; +#endif + flags |= SIGNOK | NDIGITS | NZDIGITS; + for (p = buf; width; width--) { + c = *inp; + /* + * Switch on the character; `goto ok' + * if we accept it as a part of number. + */ + switch (c) { + + /* + * The digit 0 is always legal, but is + * special. For %i conversions, if no + * digits (zero or nonzero) have been + * scanned (only signs), we will have + * base==0. In that case, we should set + * it to 8 and enable 0x prefixing. + * Also, if we have not scanned zero digits + * before this, do not turn off prefixing + * (someone else will turn it off if we + * have scanned any nonzero digits). + */ + case '0': + if (base == 0) { + base = 8; + flags |= PFXOK; + } + if (flags & NZDIGITS) + flags &= ~(SIGNOK|NZDIGITS|NDIGITS); + else + flags &= ~(SIGNOK|PFXOK|NDIGITS); + goto ok; + + /* 1 through 7 always legal */ + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + base = basefix[base]; + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* digits 8 and 9 ok iff decimal or hex */ + case '8': case '9': + base = basefix[base]; + if (base <= 8) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* letters ok iff hex */ + case 'A': case 'B': case 'C': + case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': + case 'd': case 'e': case 'f': + /* no need to fix base here */ + if (base <= 10) + break; /* not legal here */ + flags &= ~(SIGNOK | PFXOK | NDIGITS); + goto ok; + + /* sign ok only as first character */ + case '+': case '-': + if (flags & SIGNOK) { + flags &= ~SIGNOK; + goto ok; + } + break; + + /* x ok iff flag still set & 2nd char */ + case 'x': case 'X': + if (flags & PFXOK && p == buf + 1) { + base = 16; /* if %i */ + flags &= ~PFXOK; + goto ok; + } + break; + } + + /* + * If we got here, c is not a legal character + * for a number. Stop accumulating digits. + */ + break; + ok: + /* + * c is legal: store it and look at the next. + */ + *p++ = c; + if (--inr > 0) + inp++; + else + break; /* end of input */ + } + /* + * If we had only a sign, it is no good; push + * back the sign. If the number ends in `x', + * it was [sign] '0' 'x', so push back the x + * and treat it as [sign] '0'. + */ + if (flags & NDIGITS) { + if (p > buf) { + inp--; + inr++; + } + goto match_failure; + } + c = ((u_char *)p)[-1]; + if (c == 'x' || c == 'X') { + --p; + inp--; + inr++; + } + if ((flags & SUPPRESS) == 0) { + u_quad_t res; + + *p = 0; + if ((flags & UNSIGNED) == 0) + res = strtoq(buf, (char **)NULL, base); + else + res = strtouq(buf, (char **)NULL, base); + if (flags & POINTER) + *va_arg(ap, void **) = + (void *)(uintptr_t)res; + else if (flags & SHORTSHORT) + *va_arg(ap, char *) = res; + else if (flags & SHORT) + *va_arg(ap, short *) = res; + else if (flags & LONG) + *va_arg(ap, long *) = res; + else if (flags & LONGLONG) + *va_arg(ap, long long *) = res; + else + *va_arg(ap, int *) = res; + nassigned++; + } + nread += p - buf; + nconversions++; + break; + + } + } +input_failure: + return (nconversions != 0 ? nassigned : -1); +match_failure: + return (nassigned); +} + +/* + * Fill in the given table from the scanset at the given format + * (just after `['). Return a pointer to the character past the + * closing `]'. The table has a 1 wherever characters should be + * considered part of the scanset. + */ +static const u_char * +__sccl(char *tab, const u_char *fmt) +{ + int c, n, v; + + /* first `clear' the whole table */ + c = *fmt++; /* first char hat => negated scanset */ + if (c == '^') { + v = 1; /* default => accept */ + c = *fmt++; /* get new first char */ + } else + v = 0; /* default => reject */ + + /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ + (void) memset(tab, v, 256); + + if (c == 0) + return (fmt - 1);/* format ended before closing ] */ + + /* + * Now set the entries corresponding to the actual scanset + * to the opposite of the above. + * + * The first character may be ']' (or '-') without being special; + * the last character may be '-'. + */ + v = 1 - v; + for (;;) { + tab[c] = v; /* take character c */ +doswitch: + n = *fmt++; /* and examine the next */ + switch (n) { + + case 0: /* format ended too soon */ + return (fmt - 1); + + case '-': + /* + * A scanset of the form + * [01+-] + * is defined as `the digit 0, the digit 1, + * the character +, the character -', but + * the effect of a scanset such as + * [a-zA-Z0-9] + * is implementation defined. The V7 Unix + * scanf treats `a-z' as `the letters a through + * z', but treats `a-a' as `the letter a, the + * character -, and the letter a'. + * + * For compatibility, the `-' is not considerd + * to define a range if the character following + * it is either a close bracket (required by ANSI) + * or is not numerically greater than the character + * we just stored in the table (c). + */ + n = *fmt; + if (n == ']' || n < c) { + c = '-'; + break; /* resume the for(;;) */ + } + fmt++; + /* fill in the range */ + do { + tab[++c] = v; + } while (c < n); + c = n; + /* + * Alas, the V7 Unix scanf also treats formats + * such as [a-c-e] as `the letters a through e'. + * This too is permitted by the standard.... + */ + goto doswitch; + break; + + case ']': /* end of scanset */ + return (fmt); + + default: /* just another character */ + c = n; + break; + } + } + /* NOTREACHED */ +} diff --git a/libkern/uuid/Makefile b/libkern/uuid/Makefile new file mode 100644 index 000000000..03789630f --- /dev/null +++ b/libkern/uuid/Makefile @@ -0,0 +1,37 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = \ + +INSTINC_SUBDIRS_PPC = \ + +INSTINC_SUBDIRS_I386 = \ + +EXPINC_SUBDIRS = \ + +EXPINC_SUBDIRS_PPC = \ + +EXPINC_SUBDIRS_I386 = \ + +# uuid.h is now installed by bsd/uuid/Makefile +DATAFILES = \ + +INSTALL_MI_LIST = \ + +INSTALL_MI_DIR = \ + +EXPORT_MI_LIST = ${DATAFILES} + +EXPORT_MI_DIR = + + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/libkern/uuid/uuid.c b/libkern/uuid/uuid.c new file mode 100644 index 000000000..8b6573281 --- /dev/null +++ b/libkern/uuid/uuid.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * %Begin-Header% + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * %End-Header% + */ + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +UUID_DEFINE(UUID_NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + +static void +read_node(uint8_t *node) +{ + struct ifnet *ifp; + struct ifaddr *ifa; + struct sockaddr_dl *sdl; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sdl = (struct sockaddr_dl *)ifa->ifa_addr; + if (sdl && sdl->sdl_family == AF_LINK && sdl->sdl_type == IFT_ETHER) { + memcpy(node, LLADDR(sdl), 6); + ifnet_head_done(); + return; + } + } + } + ifnet_head_done(); + + read_random(node, 6); + node[0] |= 0x01; +} + +static uint64_t +read_time(void) +{ + struct timespec tv; + + nanotime(&tv); + + return (tv.tv_sec * 10000000ULL) + (tv.tv_nsec / 100ULL) + 0x01B21DD213814000ULL; +} + +void +uuid_clear(uuid_t uu) +{ + memset(uu, 0, sizeof(uuid_t)); +} + +int +uuid_compare(const uuid_t uu1, const uuid_t uu2) +{ + return memcmp(uu1, uu2, sizeof(uuid_t)); +} + +void +uuid_copy(uuid_t dst, const uuid_t src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + +void +uuid_generate_random(uuid_t out) +{ + read_random(out, sizeof(uuid_t)); + + out[6] = (out[6] & 0x0F) | 0x40; + out[8] = (out[8] & 0x3F) | 0x80; +} + +void +uuid_generate_time(uuid_t out) +{ + uint64_t time; + + read_node(&out[10]); + read_random(&out[8], 2); + + time = read_time(); + out[0] = (uint8_t)(time >> 24); + out[1] = (uint8_t)(time >> 16); + out[2] = (uint8_t)(time >> 8); + out[3] = (uint8_t)time; + out[4] = (uint8_t)(time >> 40); + out[5] = (uint8_t)(time >> 32); + out[6] = (uint8_t)(time >> 56); + out[7] = (uint8_t)(time >> 48); + + out[6] = (out[6] & 0x0F) | 0x10; + out[8] = (out[8] & 0x3F) | 0x80; +} + +void +uuid_generate(uuid_t out) +{ + uuid_generate_random(out); +} + +int +uuid_is_null(const uuid_t uu) +{ + return !memcmp(uu, UUID_NULL, sizeof(uuid_t)); +} + +int +uuid_parse(const char *in, uuid_t uu) +{ + int n = 0; + + sscanf(in, + "%hh2x%hh2x%hh2x%hh2x-" + "%hh2x%hh2x-" + "%hh2x%hh2x-" + "%hh2x%hh2x-" + "%hh2x%hh2x%hh2x%hh2x%hh2x%hh2x%n", + &uu[0], &uu[1], &uu[2], &uu[3], + &uu[4], &uu[5], + &uu[6], &uu[7], + &uu[8], &uu[9], + &uu[10], &uu[11], &uu[12], &uu[13], &uu[14], &uu[15], &n); + + return (n != 36 || in[n] != '\0' ? -1 : 0); +} + +void +uuid_unparse_lower(const uuid_t uu, char *out) +{ + sprintf(out, + "%02x%02x%02x%02x-" + "%02x%02x-" + "%02x%02x-" + "%02x%02x-" + "%02x%02x%02x%02x%02x%02x", + uu[0], uu[1], uu[2], uu[3], + uu[4], uu[5], + uu[6], uu[7], + uu[8], uu[9], + uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]); +} + +void +uuid_unparse_upper(const uuid_t uu, char *out) +{ + sprintf(out, + "%02X%02X%02X%02X-" + "%02X%02X-" + "%02X%02X-" + "%02X%02X-" + "%02X%02X%02X%02X%02X%02X", + uu[0], uu[1], uu[2], uu[3], + uu[4], uu[5], + uu[6], uu[7], + uu[8], uu[9], + uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]); +} + +void +uuid_unparse(const uuid_t uu, char *out) +{ + uuid_unparse_upper(uu, out); +} diff --git a/libsa/catalogue.cpp b/libsa/catalogue.cpp index 4effe8364..554fa5a3a 100644 --- a/libsa/catalogue.cpp +++ b/libsa/catalogue.cpp @@ -42,17 +42,17 @@ extern "C" { extern "C" { extern void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize ); -extern kern_return_t host_info(host_t host, - host_flavor_t flavor, - host_info_t info, - mach_msg_type_number_t *count); -extern int check_cpu_subtype(cpu_subtype_t cpu_subtype); -extern struct section * -getsectbyname( - char *segname, - char *sectname); -extern struct segment_command * -getsegbyname(char *seg_name); +// extern kern_return_t host_info(host_t host, +// host_flavor_t flavor, +// host_info_t info, +// mach_msg_type_number_t *count); +extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); +// Return the address of the named Mach-O segment from the currently +// executing 32 bit kernel, or NULL. +extern struct segment_command *getsegbyname(char *seg_name); +// Return the address of the named section from the named Mach-O segment +// from the currently executing 32 bit kernel, or NULL. +extern struct section *getsectbyname(char *segname, char *sectname); }; #define LOG_DELAY() @@ -705,8 +705,17 @@ OSDictionary * readExtension(OSDictionary * propertyDict, driverInfo = (MemoryMapFileInfo *) bootxDriverDataObject->getBytesNoCopy(0, sizeof(MemoryMapFileInfo)); +#if defined (__ppc__) dataBuffer = (BootxDriverInfo *)ml_static_ptovirt( - driverInfo->paddr); + driverInfo->paddr); +#elif defined (__i386__) + dataBuffer = (BootxDriverInfo *)driverInfo->paddr; + dataBuffer->plistAddr = ml_static_ptovirt(dataBuffer->plistAddr); + if (dataBuffer->moduleAddr) + dataBuffer->moduleAddr = ml_static_ptovirt(dataBuffer->moduleAddr); +#else +#error unsupported architecture +#endif if (!dataBuffer) { IOLog("Error: No data buffer " "for device tree entry \"%s\".\n", memory_map_name); @@ -938,8 +947,14 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, OSData * moduleInfo = 0; // must release MkextEntryInfo module_info; - mkext_data = (mkext_header *)mkext_file_info->paddr; +#if defined (__ppc__) + mkext_data = (mkext_header *)mkext_file_info->paddr; +#elif defined (__i386__) + mkext_data = (mkext_header *)ml_static_ptovirt(mkext_file_info->paddr); +#else +#error unsupported architecture +#endif if (OSSwapBigToHostInt32(mkext_data->magic) != MKEXT_MAGIC || OSSwapBigToHostInt32(mkext_data->signature) != MKEXT_SIGN) { IOLog("Error: Extension archive has invalid magic or signature.\n"); @@ -994,7 +1009,8 @@ bool extractExtensionsFromArchive(MemoryMapFileInfo * mkext_file_info, result = false; goto finish; } - if (!check_cpu_subtype(OSSwapBigToHostInt32(mkext_data->cpusubtype))) { + if (!grade_binary(OSSwapBigToHostInt32(mkext_data->cputype), + OSSwapBigToHostInt32(mkext_data->cpusubtype))) { IOLog("Error: Extension archive doesn't contain software " "for this computer's CPU subtype.\n"); LOG_DELAY(); @@ -1290,7 +1306,7 @@ bool addPersonalities(OSDictionary * extensions) { if (thisDriverPersonalities) { OSCollectionIterator * pIterator; - OSString * key; + OSString * locakKey; pIterator = OSCollectionIterator::withCollection( thisDriverPersonalities); if (!pIterator) { @@ -1299,12 +1315,12 @@ bool addPersonalities(OSDictionary * extensions) { LOG_DELAY(); continue; } - while ( (key = OSDynamicCast(OSString, + while ( (locakKey = OSDynamicCast(OSString, pIterator->getNextObject())) ) { OSDictionary * personality = OSDynamicCast( OSDictionary, - thisDriverPersonalities->getObject(key)); + thisDriverPersonalities->getObject(locakKey)); if (personality) { allDriverPersonalities->setObject(personality); } diff --git a/libsa/conf/Makefile.i386 b/libsa/conf/Makefile.i386 index b89fdd145..56a4eff64 100644 --- a/libsa/conf/Makefile.i386 +++ b/libsa/conf/Makefile.i386 @@ -2,6 +2,33 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# Enable -Werror for i386 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly in libsa: +OBJS_NO_WERROR= \ + ioconf.o \ + UNDRequest.o \ + bootstrap.cpo \ + bsearch.o \ + c++rem3.o \ + catalogue.cpo \ + dgraph.o \ + kext.cpo \ + kld_patch.o \ + load.o \ + mach.o \ + malloc.o \ + misc.o \ + sort.o \ + strrchr.o \ + strstr.o \ + vers_rsrc.o + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror ###################################################################### #END Machine dependent Makefile fragment for i386 diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template index a8d3252d6..0419939c7 100644 --- a/libsa/conf/Makefile.template +++ b/libsa/conf/Makefile.template @@ -43,14 +43,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -86,12 +78,8 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "creating $(COMPONENT).o" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c @echo [ updating $(COMPONENT).o ${LIBSA_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o /usr/local/lib/libkld.a + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} /usr/local/lib/libkld.a $(SEG_HACK) __KLD $(COMPONENT).o -o $(COMPONENT)_kld.o mv $(COMPONENT)_kld.o $(COMPONENT).o diff --git a/libsa/conf/tools/Makefile b/libsa/conf/tools/Makefile index 9df86ce8c..4f9ccd553 100644 --- a/libsa/conf/tools/Makefile +++ b/libsa/conf/tools/Makefile @@ -7,13 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - newvers +SETUP_SUBDIRS = doconf -COMP_SUBDIRS = \ - doconf \ - newvers +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/libsa/conf/tools/newvers/Makefile b/libsa/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/libsa/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libsa/conf/tools/newvers/newvers.csh b/libsa/conf/tools/newvers/newvers.csh deleted file mode 100644 index b462d3387..000000000 --- a/libsa/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"Common Services Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"Common Services\";" ; - /bin/echo "char ${COMPONENT}_builder[] = \"$w\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/libsa/conf/version.major b/libsa/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/libsa/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/libsa/conf/version.minor b/libsa/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/libsa/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/libsa/conf/version.variant b/libsa/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/libsa/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/libsa/dgraph.c b/libsa/dgraph.c index 50abd38cb..199a9138e 100644 --- a/libsa/dgraph.c +++ b/libsa/dgraph.c @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + #ifdef KERNEL #include #else @@ -614,8 +636,14 @@ dgraph_entry_t * dgraph_add_dependent( // /hacks new_entry->is_symbol_set = (2 & is_kernel_component); - new_entry->opaques = !strncmp(new_entry->expected_kmod_name, - "com.apple.kpi", strlen("com.apple.kpi")); + + new_entry->opaques = 0; + if (!strncmp(new_entry->expected_kmod_name, + "com.apple.kpi", strlen("com.apple.kpi"))) + new_entry->opaques |= kOpaqueLink; + if (!strcmp(new_entry->expected_kmod_name, + "com.apple.kernel")) + new_entry->opaques |= kOpaqueLink | kRawKernelLink; // hacks/ dgraph->has_symbol_sets |= new_entry->is_symbol_set; diff --git a/libsa/dgraph.h b/libsa/dgraph.h index 17e26b963..47e124098 100644 --- a/libsa/dgraph.h +++ b/libsa/dgraph.h @@ -91,6 +91,8 @@ typedef enum { } dgraph_error_t; +enum { kOpaqueLink = 0x01, kRawKernelLink = 0x02 }; + dgraph_error_t dgraph_init(dgraph_t * dgraph); #ifndef KERNEL diff --git a/libsa/kext.cpp b/libsa/kext.cpp index 8f785d9d0..70defb292 100644 --- a/libsa/kext.cpp +++ b/libsa/kext.cpp @@ -335,9 +335,11 @@ finish: static bool figureDependenciesForKext(OSDictionary * kextPlist, OSDictionary * dependencies, - OSString * trueParent) + OSString * trueParent, + Boolean skipKernelDependencies) { bool result = true; + bool hasDirectKernelDependency = false; OSString * kextName = 0; // don't release OSDictionary * libraries = 0; // don't release OSCollectionIterator * keyIterator = 0; // must release @@ -379,10 +381,27 @@ figureDependenciesForKext(OSDictionary * kextPlist, result = false; goto finish; } else { - dependencies->setObject(libraryName, - trueParent ? trueParent : kextName); + char is_kernel_component; + + if (!kextIsDependency(libraryName->getCStringNoCopy(), &is_kernel_component)) + is_kernel_component = false; + + if (!skipKernelDependencies || !is_kernel_component) { + dependencies->setObject(libraryName, + trueParent ? trueParent : kextName); + } + if (!hasDirectKernelDependency && is_kernel_component) { + hasDirectKernelDependency = true; + } } } + if (!hasDirectKernelDependency) { + /* a kext without any kernel dependency is assumed dependent on 6.0 */ + dependencies->setObject("com.apple.kernel.libkern", + trueParent ? trueParent : kextName); + IOLog("Extension \"%s\" has no kernel dependency.\n", + kextName->getCStringNoCopy()); + } finish: if (keyIterator) keyIterator->release(); @@ -504,7 +523,7 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) goto finish; } - if (!figureDependenciesForKext(kextPlist, workingDependencies, NULL)) { + if (!figureDependenciesForKext(kextPlist, workingDependencies, NULL, false)) { IOLog("can't determine immediate dependencies for extension %s\n", kmod_name); result = false; @@ -570,7 +589,7 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) * binaryless, dependency. */ if (!figureDependenciesForKext(kextPlist, pendingDependencies, - dependentName)) { + dependentName, true)) { IOLog("can't determine immediate dependencies for extension %s\n", library_name); @@ -634,7 +653,7 @@ bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) /* Now put the library's dependencies onto the pending set. */ if (!figureDependenciesForKext(kextPlist, pendingDependencies, - NULL)) { + NULL, false)) { IOLog("can't determine immediate dependencies for extension %s\n", library_name); diff --git a/libsa/kld_patch.c b/libsa/kld_patch.c index 558e694bf..d73f1b2d0 100644 --- a/libsa/kld_patch.c +++ b/libsa/kld_patch.c @@ -38,7 +38,7 @@ #if KERNEL #include -#include +//#include #include @@ -55,12 +55,11 @@ enum { false = 0, true = 1 }; #define vm_page_size page_size -extern load_return_t fatfile_getarch( - void * vp, // normally a (struct vnode *) - vm_offset_t data_ptr, - struct fat_arch * archret); +extern void kld_error_vprintf(const char *format, va_list ap); __private_extern__ char *strstr(const char *in, const char *str); +extern struct mach_header _mh_execute_header; +extern struct segment_command *getsegbyname(char *seg_name); // 32 bit only #else /* !KERNEL */ @@ -167,7 +166,7 @@ typedef struct Data { } Data, *DataRef; struct sectionRecord { - const struct section *fSection; + const struct section *fSection; // 32 bit mach object section DataRef fRelocCache; }; @@ -241,8 +240,6 @@ findSymbolByName(struct fileRecord *file, const char *symname); static void errprintf(const char *fmt, ...) { - extern void kld_error_vprintf(const char *format, va_list ap); - va_list ap; va_start(ap, fmt); @@ -381,6 +378,10 @@ symbolname(const struct fileRecord *file, const struct nlist *sym) unsigned int index; index = sym - file->fSymbolBase; + + if (index && !sym->n_un.n_strx) + return file->fStringBase + sym->n_value; + if (index < file->fSymtab->nsyms) return symNameByIndex(file, index); @@ -605,6 +606,7 @@ kld_set_architecture(const NXArchInfo * arch) sPreferArchInfo = arch; } +// This function can only operate on 32 bit mach-o files Boolean kld_macho_swap(struct mach_header * mh) { @@ -648,6 +650,7 @@ kld_macho_swap(struct mach_header * mh) return (true); } +// This function can only operate on 32 bit mach-o files void kld_macho_unswap(struct mach_header * mh, Boolean didSwap, int symbols) { @@ -702,6 +705,8 @@ kld_macho_unswap(struct mach_header * mh, Boolean didSwap, int symbols) #endif /* !KERNEL */ +// Note: This functions is only called from kld_file_map() +// This function can only operate on 32 bit mach-o files static Boolean findBestArch(struct fileRecord *file, const char *pathName) { unsigned long magic; @@ -797,6 +802,7 @@ static Boolean findBestArch(struct fileRecord *file, const char *pathName) return true; } +// This function can only operate on segments from 32 bit mach-o files static Boolean parseSegments(struct fileRecord *file, struct segment_command *seg) { @@ -902,6 +908,8 @@ tryRemangleAgain: return true; } +// This function can only operate on symbol table files from 32 bit +// mach-o files static Boolean parseSymtab(struct fileRecord *file, const char *pathName) { const struct nlist *sym; @@ -996,7 +1004,7 @@ static Boolean parseSymtab(struct fileRecord *file, const char *pathName) errprintf("%s: Undefined in symbol set: %s\n", pathName, symname); patchsym->n_type = N_ABS; patchsym->n_desc = 0; - patchsym->n_value = 0; + patchsym->n_value = patchsym->n_un.n_strx; patchsym->n_un.n_strx = 0; } @@ -1129,7 +1137,7 @@ findSymbolByAddress(const struct fileRecord *file, void *entry) } static const struct nlist * -findSymbolByAddressInAllFiles(const struct fileRecord * fromFile, +findSymbolByAddressInAllFiles(__unused const struct fileRecord * fromFile, void *entry, const struct fileRecord **resultFile) { int i, nfiles = 0; @@ -1588,7 +1596,7 @@ static Boolean mergeOSObjectsForFile(const struct fileRecord *file) ("Unable to allocate memory metaclass list\n", file->fPath)); } else { /* perform a duplicate check */ - int i, j, cnt1, cnt2; + int k, j, cnt1, cnt2; struct metaClassRecord **list1, **list2; list1 = (struct metaClassRecord **) DataGetPtr(file->fClassList); @@ -1596,11 +1604,11 @@ static Boolean mergeOSObjectsForFile(const struct fileRecord *file) list2 = (struct metaClassRecord **) DataGetPtr(sMergeMetaClasses); cnt2 = DataGetLength(sMergeMetaClasses) / sizeof(*list2); - for (i = 0; i < cnt1; i++) { + for (k = 0; k < cnt1; k++) { for (j = 0; j < cnt2; j++) { - if (!strcmp(list1[i]->fClassName, list2[j]->fClassName)) { + if (!strcmp(list1[k]->fClassName, list2[j]->fClassName)) { errprintf("duplicate class %s in %s & %s\n", - list1[i]->fClassName, + list1[k]->fClassName, file->fPath, list2[j]->fFile->fPath); } } @@ -2218,6 +2226,8 @@ static Boolean growImage(struct fileRecord *file, vm_size_t delta) #endif /* KERNEL */ } +// Note: This function is only called from kld_file_prepare_for_link() +// This function can only operate on 32 bit mach-o files static Boolean prepareFileForLink(struct fileRecord *file) { @@ -2256,7 +2266,7 @@ DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: // We will need to repair the reloc list for (j = 0; j < nreloc; j++, rec++) { void **entry; - struct nlist *sym; + struct nlist *repairSym; // Repair Damage to object image entry = (void **) (sectionBase + rec->fRInfo->r_address); @@ -2264,12 +2274,12 @@ DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: // Check if the symbol that this relocation entry points // to is marked as erasable - sym = (struct nlist *) rec->fSymbol; - if (sym && sym->n_type == (N_EXT | N_UNDF) - && sym->n_sect == (unsigned char) -1) { + repairSym = (struct nlist *) rec->fSymbol; + if (repairSym && repairSym->n_type == (N_EXT | N_UNDF) + && repairSym->n_sect == (unsigned char) -1) { // It is in use so we better clear the mark - sym->n_un.n_strx = -sym->n_un.n_strx; - sym->n_sect = NO_SECT; + repairSym->n_un.n_strx = -repairSym->n_un.n_strx; + repairSym->n_sect = NO_SECT; } } @@ -2355,6 +2365,7 @@ DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: } // Don't need the new strings any more + if (file->fNewStringBlocks){ last = DataGetLength(file->fNewStringBlocks) / sizeof(DataRef); stringBlocks = (DataRef *) DataGetPtr(file->fNewStringBlocks); @@ -2363,6 +2374,7 @@ DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: last =0; stringBlocks=0; } + for (i = 0; i < last; i++) DataRelease(stringBlocks[i]); @@ -2416,6 +2428,7 @@ DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: return true; } +// This function can only operate on 32 bit mach-o files Boolean #if KERNEL kld_file_map(const char *pathName, @@ -2451,7 +2464,7 @@ kld_file_map(const char *pathName) } *machO; const struct load_command *cmd; boolean_t lookVMRange; - int i; + unsigned long i; if (!findBestArch(&file, pathName)) break; @@ -2544,9 +2557,6 @@ kld_file_map(const char *pathName) // Automatically load the kernel's link edit segment if we are // attempting to load a driver. if (!sKernelFile) { - extern struct mach_header _mh_execute_header; - extern struct segment_command *getsegbyname(char *seg_name); - struct segment_command *sg; size_t kernelSize; Boolean ret; @@ -2683,7 +2693,7 @@ Boolean kld_file_patch_OSObjects(const char *pathName) return true; } -Boolean kld_file_prepare_for_link() +Boolean kld_file_prepare_for_link(void) { if (sMergedFiles) { unsigned long i, nmerged = 0; @@ -2707,7 +2717,7 @@ Boolean kld_file_prepare_for_link() return true; } -void kld_file_cleanup_all_resources() +void kld_file_cleanup_all_resources(void) { unsigned long i, nfiles; diff --git a/libsa/kld_patch.h b/libsa/kld_patch.h index b0e6058b3..773784031 100644 --- a/libsa/kld_patch.h +++ b/libsa/kld_patch.h @@ -37,20 +37,20 @@ extern Boolean kld_file_map(const char *pathName, #else extern Boolean kld_file_map(const char *pathName); -extern void * - kld_file_lookupsymbol(const char *pathName, const char *symbolname); - Boolean kld_file_debug_dump(const char *pathName, const char *outName); #endif /* KERNEL */ +extern void * + kld_file_lookupsymbol(const char *pathName, const char *symbolname); + extern void *kld_file_getaddr(const char *pathName, long *size); extern Boolean kld_file_merge_OSObjects(const char *pathName); extern Boolean kld_file_patch_OSObjects(const char *pathName); -extern Boolean kld_file_prepare_for_link(); +extern Boolean kld_file_prepare_for_link(void); -extern void kld_file_cleanup_all_resources(); +extern void kld_file_cleanup_all_resources(void); __END_DECLS diff --git a/libsa/kmod.cpp b/libsa/kmod.cpp index 97675dddb..2847e9f15 100644 --- a/libsa/kmod.cpp +++ b/libsa/kmod.cpp @@ -588,7 +588,7 @@ unsigned long alloc_for_kmod( headers_pad = round_headers_size - headers_size; k_result = vm_allocate(kernel_map, (vm_offset_t *)&buffer, - round_size, TRUE); + round_size, VM_FLAGS_ANYWHERE); if (k_result != KERN_SUCCESS) { IOLog("alloc_for_kmod(): Can't allocate memory.\n"); LOG_DELAY(); diff --git a/libsa/libsa/Makefile b/libsa/libsa/Makefile index 7ae937a41..f495a088a 100644 --- a/libsa/libsa/Makefile +++ b/libsa/libsa/Makefile @@ -7,9 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS = mach +INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} ppc +INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} i386 EXPINC_SUBDIRS = mach EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} ppc @@ -23,6 +23,7 @@ EXPORT_MI_LIST = mkext.h setjmp.h stdlib.h unistd.h EXPORT_MI_DIR = libsa +INSTALL_KF_MI_LIST = include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/libsa/i386/Makefile b/libsa/libsa/i386/Makefile index f28a193d0..053aa5abd 100644 --- a/libsa/libsa/i386/Makefile +++ b/libsa/libsa/i386/Makefile @@ -26,6 +26,8 @@ EXPORT_MD_LIST = setjmp.h EXPORT_MD_DIR = libsa/i386 +INSTALL_KF_MD_LIST = + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/libsa/mach/Makefile b/libsa/libsa/mach/Makefile index 53babf14c..099adeccd 100644 --- a/libsa/libsa/mach/Makefile +++ b/libsa/libsa/mach/Makefile @@ -26,6 +26,8 @@ EXPORT_MI_LIST = mach.h EXPORT_MI_DIR = libsa/mach +INSTALL_KF_MI_LIST = + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/libsa/ppc/Makefile b/libsa/libsa/ppc/Makefile index 6f4a9d96a..6fcdf5278 100644 --- a/libsa/libsa/ppc/Makefile +++ b/libsa/libsa/ppc/Makefile @@ -26,6 +26,8 @@ EXPORT_MD_LIST = setjmp.h EXPORT_MD_DIR = libsa/ppc +INSTALL_KF_MD_LIST = + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/libsa/stdlib.h b/libsa/libsa/stdlib.h index 6b2c48a07..df136e5d7 100644 --- a/libsa/libsa/stdlib.h +++ b/libsa/libsa/stdlib.h @@ -2,10 +2,11 @@ #define _LIBSA_STDLIB_H_ #include +#include -#ifndef _BSD_SIZE_T_DEFINED_ -#define _BSD_SIZE_T_DEFINED_ -typedef __SIZE_TYPE__ size_t; +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; #endif #ifndef NULL diff --git a/libsa/load.c b/libsa/load.c index b9c703d99..2d64905b4 100644 --- a/libsa/load.c +++ b/libsa/load.c @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + /*************** * HEADERS ***************/ @@ -18,6 +40,7 @@ #include #include +#include #include #include #include @@ -28,6 +51,8 @@ #include #include #include +#include +#include #include #include "vers_rsrc.h" @@ -92,8 +117,7 @@ kmod_start_or_stop( extern kern_return_t kmod_retain(kmod_t id); extern kern_return_t kmod_release(kmod_t id); -extern void flush_dcache(vm_offset_t addr, unsigned cnt, int phys); -extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); +extern struct mach_header _mh_execute_header; #endif /* KERNEL */ @@ -486,7 +510,8 @@ finish: } #endif /* not KERNEL */ /******************************************************************************* -* +* This function can only operate on 32 bit mach object file symbol table +* entries. *******************************************************************************/ static kload_error __kload_keep_symbols(dgraph_entry_t * entry) @@ -545,7 +570,7 @@ kload_error __kload_keep_symbols(dgraph_entry_t * entry) hdr = (struct mach_header *) mem; cmd->hdr.ncmds = 2; - cmd->hdr.sizeofcmds = sizeof(struct load_cmds); + cmd->hdr.sizeofcmds = sizeof(struct load_cmds) - sizeof(struct mach_header); cmd->hdr.flags &= ~MH_INCRLINK; cmd->symcmd.stroff -= (symcmd->symoff - sizeof(struct load_cmds)); @@ -566,7 +591,16 @@ kload_error __kload_keep_symbols(dgraph_entry_t * entry) sym = (struct nlist *) (cmd + 1); for (idx = 0; idx < symcmd->nsyms; idx++, sym++) { - if ( (sym->n_type & N_TYPE) == N_SECT) { + if ( (sym->n_type & N_STAB) != 0) + { + sym->n_type = N_ABS; + sym->n_desc = 0; + sym->n_value = sym->n_un.n_strx; + sym->n_un.n_strx = 0; + sym->n_sect = NO_SECT; + } + else if ( (sym->n_type & N_TYPE) == N_SECT) + { sym->n_sect = NO_SECT; sym->n_type = (sym->n_type & ~N_TYPE) | N_ABS; } @@ -585,6 +619,9 @@ kload_error __kload_keep_symbols(dgraph_entry_t * entry) } +/******************************************************************************* +* This function can only operate on 32 bit mach object files +*******************************************************************************/ static kload_error __kload_make_opaque_basefile(dgraph_t * dgraph, struct mach_header * hdr) { @@ -592,7 +629,7 @@ kload_error __kload_make_opaque_basefile(dgraph_t * dgraph, struct mach_header * struct segment_command * data_seg; struct segment_command * text_seg; struct section * sec; - int j; + unsigned int j; vm_offset_t offset; unsigned long idx, ncmds; vm_size_t size; @@ -777,7 +814,6 @@ kload_error __kload_load_modules(dgraph_t * dgraph #else /* KERNEL */ const char * kernel_file = "(kernel)"; - extern struct mach_header _mh_execute_header; kernel_base_addr = (char *) &_mh_execute_header; #endif /* not KERNEL */ @@ -808,14 +844,14 @@ kload_error __kload_load_modules(dgraph_t * dgraph } cleanup_kld_loader = true; - bool opaque_now = false; + char opaque_now = false; for (i = 0; i < dgraph->length; i++) { dgraph_entry_t * current_entry = dgraph->load_order[i]; opaque_now |= current_entry->opaque_link; - if (opaque_now) + if (kOpaqueLink & opaque_now) { unsigned int k, j; @@ -829,9 +865,17 @@ kload_error __kload_load_modules(dgraph_t * dgraph if (dgraph->have_loaded_symbols) { kld_unload_all(1); - kld_result = kld_load_basefile_from_memory(kernel_file, - (char *) dgraph->opaque_base_image, - dgraph->opaque_base_length); + if (kRawKernelLink & current_entry->opaque_link) { +#ifndef KERNEL + kld_result = kld_load_basefile_from_memory(kernel_file, + (char *) kernel_base_addr, kernel_size); +#endif + } else { + kld_result = kld_load_basefile_from_memory(kernel_file, + (char *) dgraph->opaque_base_image, + dgraph->opaque_base_length); + dgraph->have_loaded_symbols = false; + } if (!kld_result) { kload_log_error("can't link base image %s" KNL, kernel_file); result = kload_error_link_load; @@ -839,23 +883,37 @@ kload_error __kload_load_modules(dgraph_t * dgraph } } - dgraph->have_loaded_symbols = false; - - for (j = 0; j < dgraph->length; j++) + for (j = 0; j < i; j++) { - for (k = 0; - (k < current_entry->num_dependencies) - && (current_entry->dependencies[k] != dgraph->load_order[j]); - k++) {} - if (k == current_entry->num_dependencies) - continue; + dgraph_entry_t * image_dep = dgraph->load_order[j]; + + if (current_entry->opaque_link) + { + for (k = 0; + (k < current_entry->num_dependencies) + && (current_entry->dependencies[k] != image_dep); + k++) {} + + if (k == current_entry->num_dependencies) + continue; + } + + if (!current_entry->opaque_link && image_dep->opaques) + { + // kpi not on direct dependency list + continue; + } + if (kRawKernelLink & image_dep->opaques) + { + // raw kernel already in base image + continue; + } - dgraph_entry_t * image_dep = current_entry->dependencies[k]; if (!image_dep->symbols) { kload_log_error("internal error; no dependent symbols" KNL); - result = kload_error_link_load; + result = kload_error_link_load; goto finish; } else @@ -904,7 +962,9 @@ kload_error __kload_load_modules(dgraph_t * dgraph if (dgraph->has_opaque_links && (current_entry != dgraph->root)) { - result = __kload_keep_symbols(current_entry); + if (!(kRawKernelLink & current_entry->opaques)) { + result = __kload_keep_symbols(current_entry); + } if (result != kload_error_none) { kload_log_error("__kload_keep_symbols() failed for module %s" KNL, current_entry->name); @@ -1020,9 +1080,10 @@ finish: static kload_error __kload_load_module(dgraph_t * dgraph, dgraph_entry_t * entry, - int is_root -#ifndef KERNEL - , +#ifdef KERNEL + __unused int is_root +#else /* not KERNEL */ + int is_root, const char * symbol_file, const char * symbol_dir, int do_load, @@ -1303,7 +1364,7 @@ kload_error __kload_load_module(dgraph_t * dgraph, if (do_load && entry->do_load) { mach_result = vm_allocate(mach_task_self(), &vm_buffer, - entry->kernel_alloc_size, TRUE); + entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); if (mach_result != KERN_SUCCESS) { kload_log_error("unable to vm_allocate() copy buffer" KNL); entry->need_cleanup = 1; @@ -1493,7 +1554,7 @@ kload_error kload_map_dgraph( #endif /* not KERNEL */ { kload_error result = kload_error_none; - int i; + unsigned int i; if (log_level >= kload_log_level_load_details) { #ifndef KERNEL @@ -2034,7 +2095,7 @@ finish: #ifdef KERNEL // Do this ONLY if in the kernel! if (current_kmod) { - kfree((unsigned int)current_kmod, sizeof(kmod_info_t)); + kfree(current_kmod, sizeof(kmod_info_t)); } #endif /* KERNEL */ return result; @@ -2366,7 +2427,8 @@ finish: *******************************************************************************/ /******************************************************************************* -* +* This function can only operate on 32 bit mach object file symbol table +* graphs represented by G_current_load_entry. *******************************************************************************/ static unsigned long __kload_linkedit_address( @@ -2443,11 +2505,11 @@ unsigned long __kload_linkedit_address( #ifndef KERNEL mach_result = vm_allocate(G_kernel_port, &G_current_load_entry->kernel_alloc_address, - G_current_load_entry->kernel_alloc_size, TRUE); + G_current_load_entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); #else mach_result = vm_allocate(kernel_map, &G_current_load_entry->kernel_alloc_address, - G_current_load_entry->kernel_alloc_size, TRUE); + G_current_load_entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); #endif /* not KERNEL */ } diff --git a/libsa/mach_loader.h b/libsa/mach_loader.h index 415f03e93..eed913974 100644 --- a/libsa/mach_loader.h +++ b/libsa/mach_loader.h @@ -26,6 +26,9 @@ * * Mach object file loader API. * + * NOTE: This header is only used by the kld code for loading 32 bit + * kernel modules into a 32 bit mach_kernel. + * * HISTORY * 24-Aug-92 Doug Mitchell at NeXT * Created. diff --git a/libsa/malloc.c b/libsa/malloc.c index 72768fe24..300583b59 100644 --- a/libsa/malloc.c +++ b/libsa/malloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,10 +21,14 @@ */ #include +#include + +#include #include #include #include #include + #include #include "libsa/malloc.h" @@ -41,8 +45,8 @@ typedef struct malloc_block { struct malloc_block *malFwd; struct malloc_block *malBwd; + void *malActl; unsigned int malSize; - unsigned int malActl; } malloc_block; static malloc_block malAnchor = {&malAnchor, &malAnchor, 0, 0}; @@ -68,7 +72,7 @@ void * malloc(size_t size) { rmem = (nmem + 15) & -16; /* Round to 16 byte boundary */ amem = (malloc_block *)rmem; /* Point to the block */ - amem->malActl = (unsigned int)nmem; /* Set the actual address */ + amem->malActl = nmem; /* Set the actual address */ amem->malSize = nsize; /* Size */ mutex_lock(malloc_lock); @@ -123,7 +127,7 @@ void free(void * address) { __private_extern__ void malloc_init(void) { - malloc_lock = mutex_alloc(ETAP_IO_AHA); + malloc_lock = mutex_alloc(0); malInited = 1; } @@ -142,13 +146,13 @@ void malloc_reset(void) { mutex_lock(malloc_lock); - amem = malAnchor.malFwd; /* Get the first one */ + amem = malAnchor.malFwd; /* Get the first one */ - while(amem != &malAnchor) { /* Go until we hit the anchor */ + while(amem != &malAnchor) { /* Go until we hit the anchor */ - bmem = amem->malFwd; /* Next one */ - kfree(amem->malActl, amem->malSize); /* Toss it */ - amem = bmem; /* Skip to it */ + bmem = amem->malFwd; /* Next one */ + kfree(amem->malActl, amem->malSize); /* Toss it */ + amem = bmem; /* Skip to it */ } diff --git a/libsa/mkext.c b/libsa/mkext.c index 7f28031ed..9747f5382 100644 --- a/libsa/mkext.c +++ b/libsa/mkext.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -28,34 +28,42 @@ #include #endif /* KERNEL */ +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5000 +// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); __private_extern__ u_int32_t -adler32(u_int8_t *buffer, int32_t length) +adler32(uint8_t *buf, int32_t len) { - int32_t cnt; - u_int32_t result, lowHalf, highHalf; - - lowHalf = 1; - highHalf = 0; - - for (cnt = 0; cnt < length; cnt++) { - if ((cnt % 5000) == 0) { - lowHalf %= 65521L; - highHalf %= 65521L; + unsigned long s1 = 1; // adler & 0xffff; + unsigned long s2 = 0; // (adler >> 16) & 0xffff; + int k; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; } - - lowHalf += buffer[cnt]; - highHalf += lowHalf; + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; } - - lowHalf %= 65521L; - highHalf %= 65521L; - - result = (highHalf << 16) | lowHalf; - - return result; + return (s2 << 16) | s1; } + /************************************************************** LZSS.C -- A Data Compression Program *************************************************************** diff --git a/libsa/ppc/setjmp.s b/libsa/ppc/setjmp.s index 8350b13d4..7177d23c7 100644 --- a/libsa/ppc/setjmp.s +++ b/libsa/ppc/setjmp.s @@ -84,8 +84,8 @@ ENTRY(setjmp,TAG_NO_FRAME_USED) stw r0, 88(ARG0) /* Fixed point exception register */ #if FLOATING_POINT_SUPPORT /* TODO NMGS probably not needed for kern */ - mffs r0 - stw r0, 92(ARG0) /* Floating point status register */ + mffs f0 /* get FPSCR in low 32 bits of f0 */ + stfiwx f0, 92(ARG0) /* Floating point status register */ stfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ stfd f15, 104(ARG0) @@ -156,8 +156,8 @@ ENTRY(longjmp, TAG_NO_FRAME_USED) /* TODO NMGS - need correct tag */ mtxer r0 #ifdef FLOATING_POINT_SUPPORT - lwz r0, 92(ARG0) /* Floating point status register */ - mtfs r0 + lfd f0, 92-4(ARG0) /* get Floating point status register in low 32 bits of f0 */ + mtfsf 0xFF,f0 /* restore FPSCR */ lfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ lfd f15, 104(ARG0) diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def index 7a705ed5d..73842031c 100644 --- a/makedefs/MakeInc.def +++ b/makedefs/MakeInc.def @@ -16,14 +16,6 @@ export INCR_EXPORTHDRS = FALSE endif endif -ifndef INCR_INSTALLHDRS -ifeq ($(shell test -d $$DSTROOT/$INCDIR;echo $$?),0) -export INCR_INSTALLHDRS = TRUE -else -export INCR_INSTALLHDRS = FALSE -endif -endif - # # Component List # @@ -90,7 +82,7 @@ export INSTALL_ARCH_DEFAULT = PPC # # Standard defines list # -export DEFINES = -DAPPLE -DNeXT -DKERNEL_PRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT) +export DEFINES = -DAPPLE -DNeXT -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT) # # Compiler command @@ -99,6 +91,34 @@ KCC = /usr/bin/cc KC++ = /usr/bin/c++ CC = $(KCC) +# +# Compiler warning flags +# + +CWARNFLAGS_STD = \ + -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ + -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ + -Wshadow -Wcast-align -Wbad-function-cast -Wchar-subscripts -Winline \ + -Wnested-externs -Wredundant-decls + +export CWARNFLAGS ?= $(CWARNFLAGS_STD) + +MWARNFLAGS_STD = \ + -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ + -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ + -Wshadow -Wcast-align -Wbad-function-cast -Wchar-subscripts -Winline \ + -Wnested-externs -Wredundant-decls + +export MWARNFLAGS ?= $(MWARNFLAGS_STD) + +CXXWARNFLAGS_STD = \ + -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ + -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ + -Wshadow -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls \ + -fpermissive + +export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD) + # # Setup for parallel sub-makes when doing an RC build @@ -118,23 +138,23 @@ endif export CFLAGS_GEN = -static -g -nostdinc -nostdlib -no-cpp-precomp \ -fno-builtin -finline -fno-keep-inline-functions -msoft-float \ - -fsigned-bitfields -Wpointer-arith $(OTHER_CFLAGS) + -fsigned-bitfields $(OTHER_CFLAGS) export CFLAGS_RELEASE = export CFLAGS_DEBUG = export CFLAGS_PROFILE = -pg export CFLAGS_PPC = -arch ppc -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED -export CFLAGS_I386 = -arch i386 -Di386 -DI386 -D__I386__ \ - -march=i686 -mpreferred-stack-boundary=2 -falign-functions=4 -mcpu=pentium4 +export CFLAGS_I386 = -arch i386 -Di386 -DI386 -D__I386__ -DPAGE_SIZE_FIXED \ + -march=i686 -mpreferred-stack-boundary=2 -falign-functions=4 -mcpu=pentium4 -force_cpusubtype_ALL export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns export CFLAGS_DEBUGPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns export CFLAGS_PROFILEPPC = -O2 -mcpu=750 -mmultiple -fschedule-insns -export CFLAGS_RELEASEI386 = -O2 -export CFLAGS_DEBUGI386 = -O2 -export CFLAGS_PROFILEI386 = -O2 +export CFLAGS_RELEASEI386 = -Os +export CFLAGS_DEBUGI386 = -Os +export CFLAGS_PROFILEI386 = -Os export CFLAGS = $(CFLAGS_GEN) \ $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) \ @@ -194,15 +214,33 @@ export LDFLAGS_COMPONENT = $(LDFLAGS_COMPONENT_GEN) \ $($(addsuffix $(ARCH_CONFIG),LDFLAGS_COMPONENT_)) \ $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_COMPONENT_)) -export LDFLAGS_KERNEL_GEN = -static -force_cpusubtype_ALL -segalign 0x1000 +export LDFLAGS_KERNEL_GEN = \ + -static \ + -force_cpusubtype_ALL \ + -e __start \ + -segalign 0x1000 \ + -sectalign __TEXT __text 0x1000 \ + -sectalign __DATA __common 0x1000 \ + -sectalign __DATA __bss 0x1000 \ + -sectcreate __PRELINK __text /dev/null \ + -sectcreate __PRELINK __symtab /dev/null \ + -sectcreate __PRELINK __info /dev/null export LDFLAGS_KERNEL_RELEASE = # -noseglinkedit export LDFLAGS_KERNEL_DEBUG = export LDFLAGS_KERNEL_PROFILE = -export LDFLAGS_KERNEL_PPC = -arch ppc -segaddr __VECTORS 0x0 -segaddr __TEXT 0x7000 -e __start -sectalign __TEXT __text 0x1000 -sectalign __DATA __common 0x1000 -sectalign __DATA __bss 0x1000 -sectcreate __PRELINK __text /dev/null -sectcreate __PRELINK __symtab /dev/null -sectcreate __PRELINK __info /dev/null -export LDFLAGS_KERNEL_I386 = -arch i386 -segaddr __TEXT 0x100000 -e _pstart -sectcreate __PRELINK __text /dev/null -sectcreate __PRELINK __symtab /dev/null -sectcreate __PRELINK __info /dev/null +export LDFLAGS_KERNEL_PPC = \ + -arch ppc \ + -segaddr __VECTORS 0x0 \ + -segaddr __HIB 0x7000 \ + -segaddr __TEXT 0xe000 + +export LDFLAGS_KERNEL_I386 = \ + -arch i386 \ + -segaddr __HIB 0xC0100000 \ + -segaddr __TEXT 0xC0111000 export LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ $($(addsuffix $(ARCH_CONFIG),LDFLAGS_KERNEL_)) \ @@ -219,7 +257,7 @@ export LD_KERNEL_LIBS = -lcc_kext # export INCFLAGS_IMPORT = $(patsubst %, -I$(OBJROOT)/EXPORT_HDRS/%, $(COMPONENT_IMPORT_LIST)) export INCFLAGS_EXTERN = -I$(OBJROOT)/EXTERN_HDRS -I$(SRCROOT)/EXTERNAL_HEADERS -I$(SRCROOT)/EXTERNAL_HEADERS/bsd -export INCFLAGS_GEN = -I$(SRCROOT)/$(COMPONENT) +export INCFLAGS_GEN = -I$(SRCROOT)/$(COMPONENT) -I$(OBJROOT)/EXPORT_HDRS/$(COMPONENT) export INCFLAGS_POSIX = -I$(OBJROOT)/EXPORT_HDRS/bsd export INCFLAGS_LOCAL = -I. @@ -265,6 +303,14 @@ KINCVERS = A KINCFRAME = $(FRAMEDIR)/Kernel.framework KINCDIR = $(KINCFRAME)/Versions/$(KINCVERS)/Headers KPINCDIR = $(KINCFRAME)/Versions/$(KINCVERS)/PrivateHeaders +KRESDIR = $(KINCFRAME)/Versions/$(KINCVERS)/Resources + +XNU_PRIVATE_UNIFDEF = -UMACH_KERNEL_PRIVATE -UBSD_KERNEL_PRIVATE -UIOKIT_KERNEL_PRIVATE -ULIBKERN_KERNEL_PRIVATE -ULIBSA_KERNEL_PRIVATE -UPEXPERT_KERNEL_PRIVATE -UXNU_KERNEL_PRIVATE + +SPINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -DPRIVATE +SINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE +KPINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL +KINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL # # Compononent Header file destinations diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir index a810d4517..fd5b753b6 100644 --- a/makedefs/MakeInc.dir +++ b/makedefs/MakeInc.dir @@ -4,25 +4,21 @@ installhdrs: exporthdrs installhdrs_mi installhdrs_md @echo "[ $(SRCROOT) ] make installhdrs installing Kernel.framework"; \ kincpath=$(DSTROOT)/$(KINCDIR); \ + krespath=$(DSTROOT)/$(KRESDIR); \ kframepath=$(DSTROOT)/$(KINCFRAME); \ - $(MKDIR) $$kincpath; \ - chmod -R +w $$kincpath; \ - for i in $(COMPONENT_LIST); do \ - if [ -d $(OBJROOT)/EXPORT_HDRS/$$i ]; then ( \ - cd $(OBJROOT)/EXPORT_HDRS/$$i; \ - pax -ruw -s '/.*CVS.*//' . $$kincpath || true ; \ - ) fi \ - done; \ + [ -d $$krespath ] || $(MKDIR) $$krespath; \ + [ -d $$kincpath ] || $(MKDIR) $$kincpath; \ cd $(SRCROOT)/EXTERNAL_HEADERS; \ - pax -ruw -s '/.*CVS.*//' . $$kincpath || true; \ + install $(FILE_INSTALL_FLAGS) Info.plist $$krespath; \ + $(SRCROOT)/config/newvers.pl $${krespath}/Info.plist; \ cd $$kframepath/Versions; \ [ -L Current ] || $(LN) $(KINCVERS) Current; \ cd $$kframepath; \ [ -L Headers ] || $(LN) Versions/Current/Headers Headers; \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR); \ + [ -L Resources ] || $(LN) Versions/Current/Resources Resources; \ + [ -d $(DSTROOT)/$(KPINCDIR) ] || $(MKDIR) $(DSTROOT)/$(KPINCDIR); \ cd $$kframepath; [ -L PrivateHeaders ] || \ - $(LN) Versions/Current/PrivateHeaders PrivateHeaders; \ - find $$kframepath -type f | xargs -s 32000 chmod a-w + $(LN) Versions/Current/PrivateHeaders PrivateHeaders; # # Install header files order @@ -38,7 +34,7 @@ installhdrs_mi: kernel_config=$(INSTALL_TYPE); \ arch_config=$(INSTALL_ARCH_DEFAULT); \ installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${installinc_dir}; \ + [ -d $${installinc_dir} ] ||$(MKDIR) $${installinc_dir}; \ ${MAKE} -C $${installinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -56,6 +52,7 @@ installhdrs_md: kernel_config=$(INSTALL_TYPE); \ for arch_config in $(INSTALL_ARCHS); \ do \ + [ -d ${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path} ] || \ $(MKDIR) ${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ ${MAKE} -C ${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path} \ KERNEL_CONFIG=$${kernel_config} \ @@ -75,7 +72,7 @@ build_installhdrs_mi:: @echo "[ $(SOURCE) ] make build_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ for installinc_subdir in $(INSTINC_SUBDIRS); \ do \ - $(MKDIR) $${installinc_subdir}; \ + [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ ${MAKE} -C $${installinc_subdir} \ MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ SOURCE=$(SOURCE)$${installinc_subdir}/ \ @@ -93,7 +90,7 @@ build_installhdrs_md:: @echo "[ $(SOURCE) ] make installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ for installinc_subdir in $($(addprefix INSTINC_SUBDIRS_, $(ARCH_CONFIG))); \ do \ - $(MKDIR) $${installinc_subdir}; \ + [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ ${MAKE} -C $${installinc_subdir} \ MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ SOURCE=$(SOURCE)$${installinc_subdir}/ \ @@ -123,7 +120,7 @@ exporthdrs_mi: kernel_config=$(INSTALL_TYPE); \ arch_config=$(INSTALL_ARCH_DEFAULT); \ exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${exportinc_dir}; \ + [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ ${MAKE} -C $${exportinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -142,7 +139,7 @@ exporthdrs_md: for arch_config in $(ARCH_CONFIGS); \ do \ exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${exportinc_dir}; \ + [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ ${MAKE} -C $${exportinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -162,7 +159,7 @@ build_exporthdrs_mi: _TMP_EXPINC_SUBDIRS="$(EXPINC_SUBDIRS)"; \ for exportinc_subdir in $${_TMP_EXPINC_SUBDIRS}; \ do \ - $(MKDIR) $${exportinc_subdir}; \ + [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ ${MAKE} -C $${exportinc_subdir} \ MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ SOURCE=$(SOURCE)$${exportinc_subdir}/ \ @@ -181,7 +178,7 @@ build_exporthdrs_md: _TMP_exportinc_subdir="$($(addprefix EXPINC_SUBDIRS_, $(ARCH_CONFIG)))"; \ for exportinc_subdir in $${_TMP_exportinc_subdir}; \ do \ - $(MKDIR) $${exportinc_subdir}; \ + [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ ${MAKE} -C $${exportinc_subdir} \ MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ SOURCE=$(SOURCE)$${exportinc_subdir}/ \ @@ -201,7 +198,7 @@ setup: for arch_config in $(ARCH_CONFIGS); \ do \ setup_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${setup_subdir}; \ + [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ ${MAKE} -C $${setup_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -219,7 +216,7 @@ build_setup: _TMP_setup_subdir="$(SETUP_SUBDIRS) $($(addprefix SETUP_SUBDIRS_, $(ARCH_CONFIG)))"; \ for setup_subdir in $${_TMP_setup_subdir}; \ do \ - $(MKDIR) $${setup_subdir}; \ + [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ ${MAKE} -C $${setup_subdir} \ MAKEFILES=${SOURCE}/$${setup_subdir}/Makefile \ SOURCE=${SOURCE}/$${setup_subdir}/ \ @@ -244,7 +241,7 @@ endif for arch_config in $(ARCH_CONFIGS); \ do \ build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${build_subdir}; \ + [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ ${MAKE} -C $${build_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -265,7 +262,7 @@ build_all: _TMP_comp_subdir="$(COMP_SUBDIRS) $($(addprefix COMP_SUBDIRS_, $(ARCH_CONFIG)))"; \ for comp_subdir in $${_TMP_comp_subdir}; \ do \ - $(MKDIR) $${comp_subdir}; \ + [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ ${MAKE} -C $${comp_subdir} \ MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ SOURCE=${SOURCE}$${comp_subdir}/ \ @@ -276,7 +273,7 @@ build_all: _TMP_comp_subdir="$(CONFIG_SUBDIRS) $($(addprefix CONFIG_SUBDIRS_, $(ARCH_CONFIG)))"; \ for comp_subdir in $${_TMP_comp_subdir}; \ do \ - $(MKDIR) $${comp_subdir}; \ + [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ ${MAKE} -C $${comp_subdir} \ MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ SOURCE=${SOURCE}$${comp_subdir}/ \ @@ -295,7 +292,7 @@ mach_kernel: for arch_config in $(ARCH_CONFIGS); \ do \ build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}; \ - $(MKDIR) $${build_subdir}; \ + [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ ${MAKE} -C $${build_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -334,7 +331,7 @@ install: installhdrs all installman for arch_config in $(INSTALL_ARCHS); \ do \ install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ - $(MKDIR) $${install_subdir}; \ + [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ ${MAKE} -C $${install_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -358,7 +355,7 @@ build_install: kernel_config=$(KERNEL_CONFIG); \ for install_subdir in $(INST_SUBDIRS); \ do \ - $(MKDIR) $${install_subdir}; \ + [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ ${MAKE} -C $${install_subdir} \ KERNEL_CONFIG=$${kernel_config} \ MAKEFILES=${SOURCE}/$${install_subdir}/Makefile \ @@ -423,7 +420,7 @@ tags: cscope.files installman: @echo "[ $(SRCROOT) ] Installing man pages"; \ manpath=$(DSTROOT)/$(MANDIR); \ - $(MKDIR) $$manpath; \ + [ -d $$manpath ] || $(MKDIR) $$manpath; \ ${MAKE} MAKEFILES=${SOURCE}/Makefile \ SOURCE=${SOURCE}/ \ TARGET=${DSTROOT}/ \ diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule index 86d80e70c..bfd896d29 100644 --- a/makedefs/MakeInc.rule +++ b/makedefs/MakeInc.rule @@ -2,10 +2,6 @@ # Generic Install rules # -# -# gvdl: Let's optionally have a different list of local installs and -# regular installs. -# ifndef INSTALL_MI_LCL_LIST INSTALL_MI_LCL_LIST = $(INSTALL_MI_LIST) endif @@ -22,184 +18,137 @@ ifndef INSTALL_MD_LCL_GEN_LIST INSTALL_MD_LCL_GEN_LIST = $(INSTALL_MD_GEN_LIST) endif -ifeq ($(INCR_INSTALLHDRS), TRUE) +ifndef INSTALL_KF_MI_LCL_LIST + INSTALL_KF_MI_LCL_LIST = $(EXPORT_MI_LIST) +endif -# -# These are the machine independent headers that end up in /usr/include -# Eventually they're copied to System framework's headers directory -# -INSTALL_MI_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LIST)) -INSTALL_MI_GEN_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST)) +ifndef INSTALL_KF_MI_LCL_GEN_LIST + INSTALL_KF_MI_LCL_GEN_LIST = $(EXPORT_MI_GEN_LIST) +endif -$(INSTALL_MI_INC_FILES) $(INSTALL_MI_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - $(MKDIR) ./incdir; \ - echo garbage > ./incdir/$${filename_strip}; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ - $< > ./incdir/$${filename} || \ - $(DECOMMENT) ./incdir/$${filename} r > \ - ./incdir/$${filename_strip}; \ - if [ -s ./incdir/$${filename_strip} ]; \ - then ( \ - install $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; \ - $(RM) -rf ./incdir; +ifndef INSTALL_KF_MD_LCL_LIST + INSTALL_KF_MD_LCL_LIST = $(EXPORT_MD_LIST) +endif -# -# These are usually machine independent System framework private headers -# Unless LCLDIR is specified as something else -# -INSTALL_MI_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_LIST)) -INSTALL_MI_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_LIST)) +ifndef INSTALL_KF_MD_LCL_GEN_LIST + INSTALL_KF_MD_LCL_GEN_LIST = $(EXPORT_MD_GEN_LIST) +endif -$(INSTALL_MI_LCL_FILES) $(INSTALL_MI_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ - $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); +ifndef INSTALL_KF_MI_LIST + INSTALL_KF_MI_LIST = $(EXPORT_MI_LIST) +endif -# -# These are the Kernel framework's machine independent private headers -# They should be specified in INSTALL_MI_LCL_KERN_LIST and INSTALL_MI_LCL_GEN_KERN_LIST -# -INSTALL_MI_LCL_KERN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_KERN_LIST)) -INSTALL_MI_GEN_LCL_KERN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_KERN_LIST)) +ifndef INSTALL_KF_MI_GEN_LIST + INSTALL_KF_MI_GEN_LIST = $(EXPORT_MI_GEN_LIST) +endif -$(INSTALL_MI_LCL_KERN_FILES) $(INSTALL_MI_GEN_LCL_KERN_FILES): $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR); \ - $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); +ifndef INSTALL_KF_MD_LIST + INSTALL_KF_MD_LIST = $(EXPORT_MD_LIST) +endif -# -# These are the machine dependent headers that end up in /usr/include -# Eventually they're copied to System framework's headers directory -# -INSTALL_MD_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LIST)) -INSTALL_MD_GEN_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_GEN_LIST)) +ifndef INSTALL_KF_MD_GEN_LIST + INSTALL_KF_MD_GEN_LIST = $(EXPORT_MD_GEN_LIST) +endif + +INSTALL_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST)) -$(INSTALL_MD_INC_FILES) $(INSTALL_MD_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/% : % +$(INSTALL_MI_GEN_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ + [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ filename=`$(BASENAME) $<`; \ filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - $(MKDIR) ./incdir; \ - echo garbage > ./incdir/$${filename_strip}; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ - $< > ./incdir/$${filename} || \ - $(DECOMMENT) ./incdir/$${filename} r > \ - ./incdir/$${filename_strip}; \ - if [ -s ./incdir/$${filename_strip} ]; \ + [ -d ./incmidir ] || $(MKDIR) ./incmidir; \ + echo garbage > ./incmidir/$${filename_strip}; \ + $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ + $< > ./incmidir/$${filename} || \ + $(DECOMMENT) ./incmidir/$${filename} r > \ + ./incmidir/$${filename_strip}; \ + if [ -s ./incmidir/$${filename_strip} ]; \ then ( \ - install $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\ + install $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\ ); \ else \ echo Header file $< not exported; \ - fi; \ - $(RM) -rf ./incdir; - -# -# These are usually machine dependent System framework private headers -# Unless LCLDIR is specified as something else -# -INSTALL_MD_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_LIST)) -INSTALL_MD_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_LIST)) - -$(INSTALL_MD_LCL_FILES) $(INSTALL_MD_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ - $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); - -# -# These are the Kernel framework's machine dependent private headers -# They should be specified in INSTALL_MD_LCL_KERN_LIST and INSTALL_MD_LCL_GEN_KERN_LIST -# -INSTALL_MD_LCL_KERN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_KERN_LIST)) -INSTALL_MD_GEN_LCL_KERN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_KERN_LIST)) - -$(INSTALL_MD_LCL_KERN_FILES) $(INSTALL_MD_GEN_LCL_KERN_FILES): $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR); \ - $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); - -setup_installhdrs_mi: - @echo "[ $(SOURCE) ] make setup_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_installhdrs_mi: $(INSTALL_MI_INC_FILES) $(INSTALL_MI_GEN_INC_FILES) $(INSTALL_MI_LCL_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL_MI_LCL_KERN_FILES) $(INSTALL_MI_GEN_LCL_KERN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_installhdrs_md: - @echo "[ $(SOURCE) ] make setup_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" + fi; -do_installhdrs_md: $(INSTALL_MD_INC_FILES) $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_LCL_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INSTALL_MD_LCL_KERN_FILES) $(INSTALL_MD_GEN_LCL_KERN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" +INSTALL_KF_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_GEN_LIST)) -else - -INSTALL_MI_INC_FILES = $(addprefix $(SOURCE), $(INSTALL_MI_LIST)) -INSTALL_MI_GEN_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST)) - -$(INSTALL_MI_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ +$(INSTALL_KF_MI_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/% : % + @true echo Installing $< in $(midir $@); \ + [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ filename=`$(BASENAME) $<`; \ filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - $(MKDIR) ./incdir; \ - echo garbage > ./incdir/$${filename_strip}; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ - $< > ./incdir/$${filename} || \ - $(DECOMMENT) ./incdir/$${filename} r > \ - ./incdir/$${filename_strip}; \ - if [ -s ./incdir/$${filename_strip} ]; \ + [ -d ./kincmidir ] || $(MKDIR) ./kincmidir; \ + echo garbage > ./kincmidir/$${filename_strip}; \ + $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ + $< > ./kincmidir/$${filename} || \ + $(DECOMMENT) ./kincmidir/$${filename} r > \ + ./kincmidir/$${filename_strip}; \ + if [ -s ./kincmidir/$${filename_strip} ]; \ then ( \ - install $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\ + install $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\ ); \ else \ echo Header file $< not exported; \ - fi; \ - $(RM) -rf ./incdir; + fi; -INSTALL_MI_LCL_FILES = $(addprefix $(SOURCE), $(INSTALL_MI_LCL_LIST)) INSTALL_MI_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_LIST)) $(INSTALL_MI_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ + [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ + filename=`$(BASENAME) $<`; \ + filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); + [ -d ./pincmidir ] || $(MKDIR) ./pincmidir; \ + echo garbage > ./pincmidir/$${filename_strip}; \ + $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ + $< > ./pincmidir/$${filename} || \ + $(DECOMMENT) ./pincmidir/$${filename} r > \ + ./pincmidir/$${filename_strip}; \ + if [ -s ./pincmidir/$${filename_strip} ]; \ + then ( \ + install $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\ + ); \ + else \ + echo Header file $< not exported; \ + fi; -INSTALL_MI_LCL_KERN_FILES = $(addprefix $(SOURCE), $(INSTALL_MI_LCL_KERN_LIST)) -INSTALL_MI_GEN_LCL_KERN_FILES = $(addprefix $(KPINCDIR)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_KERN_GEN_LIST)) +INSTALL_KF_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_LCL_GEN_LIST)) -$(INSTALL_MI_GEN_LCL_KERN_FILES): $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR)/% : % +$(INSTALL_KF_MI_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR); \ + [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ + filename=`$(BASENAME) $<`; \ + filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); + [ -d ./kpincmidir ] || $(MKDIR) ./kpincmidir; \ + echo garbage > ./kpincmidir/$${filename_strip}; \ + $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ + $< > ./kpincmidir/$${filename} || \ + $(DECOMMENT) ./kpincmidir/$${filename} r > \ + ./kpincmidir/$${filename_strip}; \ + if [ -s ./kpincmidir/$${filename_strip} ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\ + ); \ + else \ + echo Header file $< not exported; \ + fi; -INSTALL_MD_INC_FILES = $(addprefix $(SOURCE), $(INSTALL_MD_LIST)) INSTALL_MD_GEN_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_GEN_LIST)) $(INSTALL_MD_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ + [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ filename=`$(BASENAME) $<`; \ filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - $(MKDIR) ./incdir; \ + [ -d ./incdir ] || $(MKDIR) ./incdir; \ echo garbage > ./incdir/$${filename_strip}; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ + $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ $< > ./incdir/$${filename} || \ $(DECOMMENT) ./incdir/$${filename} r > \ ./incdir/$${filename_strip}; \ @@ -209,89 +158,188 @@ $(INSTALL_MD_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/% : % ); \ else \ echo Header file $< not exported; \ - fi; \ - $(RM) -rf /incdir; + fi; + +INSTALL_KF_MD_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_GEN_LIST)) + +$(INSTALL_KF_MD_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/% : % + @true echo Installing $< in $(dir $@); \ + [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ + filename=`$(BASENAME) $<`; \ + filename_strip=$(addsuffix .strip,$${filename}); \ + $(RM) $(RMFLAGS) $@; \ + [ -d ./kincdir ] || $(MKDIR) ./kincdir; \ + echo garbage > ./kincdir/$${filename_strip}; \ + $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ + $< > ./kincdir/$${filename} || \ + $(DECOMMENT) ./kincdir/$${filename} r > \ + ./kincdir/$${filename_strip}; \ + if [ -s ./kincdir/$${filename_strip} ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\ + ); \ + else \ + echo Header file $< not exported; \ + fi; INSTALL_MD_LCL_FILES = $(addprefix $(SOURCE), $(INSTALL_MD_LCL_LIST)) INSTALL_MD_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_LIST)) $(INSTALL_MD_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ + [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ + filename=`$(BASENAME) $<`; \ + filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); + [ -d ./pincdir ] || $(MKDIR) ./pincdir; \ + echo garbage > ./pincdir/$${filename_strip}; \ + $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ + $< > ./pincdir/$${filename} || \ + $(DECOMMENT) ./pincdir/$${filename} r > \ + ./pincdir/$${filename_strip}; \ + if [ -s ./pincdir/$${filename_strip} ]; \ + then ( \ + install $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\ + ); \ + else \ + echo Header file $< not exported; \ + fi; -INSTALL_MD_LCL_KERN_FILES = $(addprefix $(SOURCE), $(INSTALL_MD_LCL_KERN_LIST)) -INSTALL_MD_GEN_LCL_KERN_FILES = $(addprefix $(KPINCDIR)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_KERN_GEN_LIST)) +INSTALL_KF_MD_LCL_FILES = $(addprefix $(SOURCE), $(INSTALL_KF_MD_LCL_LIST)) +INSTALL_KF_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_LCL_GEN_LIST)) -$(INSTALL_MD_GEN_LCL_KERN_FILES): $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR)/% : % +$(INSTALL_KF_MD_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/% : % @true echo Installing $< in $(dir $@); \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR); \ + [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ + filename=`$(BASENAME) $<`; \ + filename_strip=$(addsuffix .strip,$${filename}); \ $(RM) $(RMFLAGS) $@; \ - install $(INSTALL_FLAGS) $< $(dir $@); + [ -d ./kpincdir ] || $(MKDIR) ./kpincdir; \ + echo garbage > ./kpincdir/$${filename_strip}; \ + $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ + $< > ./kpincdir/$${filename} || \ + $(DECOMMENT) ./kpincdir/$${filename} r > \ + ./kpincdir/$${filename_strip}; \ + if [ -s ./kpincdir/$${filename_strip} ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\ + ); \ + else \ + echo Header file $< not exported; \ + fi; setup_installhdrs_mi: @echo "[ $(SOURCE) ] make setup_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" -do_installhdrs_mi: $(INSTALL_MI_GEN_INC_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL_MI_GEN_LCL_KERN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - @if [ -n "$(strip $(INSTALL_MI_LIST))" ]; then \ +do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL_KF_MI_GEN_FILES) $(INSTALL_KF_MI_LCL_GEN_FILES) + @true echo "[ $(SOURCE) ] make do_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ + $(MKDIR) ./incmidir ./pincmidir ./kincmidir ./kpincmidir; \ + if [ -n "$(strip $(INSTALL_MI_LIST))" ]; then \ if [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR) ]; then \ (cd $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_MI_LIST) ); \ else \ $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ fi; \ - $(MKDIR) ./incdir; \ for j in $(INSTALL_MI_LIST); \ do \ - echo garbage > ./incdir/$$j.strip; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ - $(SOURCE)/$$j > ./incdir/$$j || \ - $(DECOMMENT) ./incdir/$$j r > \ - ./incdir/$$j.strip; \ - if [ -s ./incdir/$$j.strip ]; \ + echo garbage > ./incmidir/$$j.strip; \ + $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./incmidir/$$j || \ + $(DECOMMENT) ./incmidir/$$j r > \ + ./incmidir/$$j.strip; \ + if [ -s ./incmidir/$$j.strip ]; \ then ( \ - install $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ + install $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ ); \ else \ echo Header file $$j not exported; \ fi; \ done; \ - $(RM) -rf ./incdir; \ - fi - @if [ -n "$(strip $(INSTALL_MI_LCL_LIST))" ]; then \ + fi; \ + if [ -n "$(strip $(INSTALL_MI_LCL_LIST))" ]; then \ if [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR) ]; then \ (cd $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_MI_LCL_LIST) ); \ - else \ + else \ $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ fi; \ - install $(INSTALL_FLAGS) $(INSTALL_MI_LCL_FILES) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ - fi - @if [ -n "$(strip $(INSTALL_MI_LCL_KERN_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR) ]; then \ - (cd $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_MI_LCL_KERN_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR); \ + for j in $(INSTALL_MI_LCL_LIST); \ + do \ + echo garbage > ./pincmidir/$$j.strip; \ + $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./pincmidir/$$j || \ + $(DECOMMENT) ./pincmidir/$$j r > \ + ./pincmidir/$$j.strip; \ + if [ -s ./pincmidir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + if [ -n "$(strip $(INSTALL_KF_MI_LIST))" ]; then \ + if [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) ]; then \ + (cd $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MI_LIST) ); \ + else \ + $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ fi; \ - install $(INSTALL_FLAGS) $(INSTALL_MI_LCL_KERN_FILES) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MI_DIR); \ - fi + for j in $(INSTALL_KF_MI_LIST); \ + do \ + echo garbage > ./kincmidir/$$j.strip; \ + $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./kincmidir/$$j || \ + $(DECOMMENT) ./kincmidir/$$j r > \ + ./kincmidir/$$j.strip; \ + if [ -s ./kincmidir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + if [ -n "$(strip $(INSTALL_KF_MI_LCL_LIST))" ]; then \ + if [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR) ]; then \ + (cd $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MI_LCL_LIST) ); \ + else \ + $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ + fi; \ + for j in $(INSTALL_KF_MI_LCL_LIST); \ + do \ + echo garbage > ./kpincmidir/$$j.strip; \ + $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./kpincmidir/$$j || \ + $(DECOMMENT) ./kpincmidir/$$j r > \ + ./kpincmidir/$$j.strip; \ + if [ -s ./kpincmidir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + $(RM) -rf ./incmidir ./pincmidir ./kincmidir ./kpincmidir; setup_installhdrs_md: @echo "[ $(SOURCE) ] make setup_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" -do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INSTALL_MI_GEN_LCL_KERN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - @if [ -n "$(strip $(INSTALL_MD_LIST))" ]; then \ +do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INSTALL_KF_MD_GEN_FILES) $(INSTALL_KF_MD_LCL_GEN_FILES) + @true echo "[ $(SOURCE) ] make do_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ + $(MKDIR) ./incdir ./pincdir ./kincdir ./kpincdir; \ + if [ -n "$(strip $(INSTALL_MD_LIST))" ]; then \ if [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR) ]; then \ (cd $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_MD_LIST) ); \ else \ $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ fi; \ - $(MKDIR) ./incdir; \ for j in $(INSTALL_MD_LIST); \ do \ echo garbage > ./incdir/$$j.strip; \ - $(UNIFDEF) -UKERNEL_PRIVATE -UDRIVER_PRIVATE \ + $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ $(SOURCE)/$$j > ./incdir/$$j || \ $(DECOMMENT) ./incdir/$$j r > \ ./incdir/$$j.strip; \ @@ -303,27 +351,75 @@ do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INS echo Header file $$j not exported; \ fi; \ done; \ - $(RM) -rf ./incdir; \ - fi - @if [ -n "$(strip $(INSTALL_MD_LCL_LIST))" ]; then \ + fi; \ + if [ -n "$(strip $(INSTALL_MD_LCL_LIST))" ]; then \ if [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR) ]; then \ (cd $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_MD_LCL_LIST) ); \ else \ $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ fi; \ - install $(INSTALL_FLAGS) $(INSTALL_MD_LCL_FILES) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ - fi - @if [ -n "$(strip $(INSTALL_MD_LCL_KERN_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR) ]; then \ - (cd $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_MD_LCL_KERN_LIST) ); \ + for j in $(INSTALL_MD_LCL_LIST); \ + do \ + echo garbage > ./pincdir/$$j.strip; \ + $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./pincdir/$$j || \ + $(DECOMMENT) ./pincdir/$$j r > \ + ./pincdir/$$j.strip; \ + if [ -s ./pincdir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + if [ -n "$(strip $(INSTALL_KF_MD_LIST))" ]; then \ + if [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR) ]; then \ + (cd $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MD_LIST) ); \ else \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR); \ + $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ fi; \ - install $(INSTALL_FLAGS) $(INSTALL_MD_LCL_KERN_FILES) $(DSTROOT)/$(KPINCDIR)/$(INSTALL_MD_DIR); \ - fi - + for j in $(INSTALL_KF_MD_LIST); \ + do \ + echo garbage > ./kincdir/$$j.strip; \ + $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./kincdir/$$j || \ + $(DECOMMENT) ./kincdir/$$j r > \ + ./kincdir/$$j.strip; \ + if [ -s ./kincdir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + if [ -n "$(strip $(INSTALL_KF_MD_LCL_LIST))" ]; then \ + if [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR) ]; then \ + (cd $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MD_LCL_LIST) ); \ + else \ + $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ + fi; \ + for j in $(INSTALL_KF_MD_LCL_LIST); \ + do \ + echo garbage > ./kpincdir/$$j.strip; \ + $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ + $(SOURCE)/$$j > ./kpincdir/$$j || \ + $(DECOMMENT) ./kpincdir/$$j r > \ + ./kpincdir/$$j.strip; \ + if [ -s ./kpincdir/$$j.strip ]; \ + then ( \ + install $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ + ); \ + else \ + echo Header file $$j not exported; \ + fi; \ + done; \ + fi; \ + $(RM) -rf ./incdir ./pincdir ./kincdir ./kpincdir; -endif # # Generic Export rules @@ -335,7 +431,7 @@ EXPORT_MI_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $( $(EXPORT_MI_INC_FILES) $(EXPORT_MI_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : % @true echo Exporting $< in $(dir $@); \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ + [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ ${CP} -p $< $(dir $@); \ @@ -344,7 +440,7 @@ EXPORT_MD_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $( $(EXPORT_MD_INC_FILES) $(EXPORT_MD_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : % @true echo Exporting $< in $(dir $@); \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ + [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ ${CP} -p $< $(dir $@); \ setup_exporthdrs_mi: @@ -366,7 +462,7 @@ EXPORT_MI_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $( $(EXPORT_MI_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : % @true echo Exporting $< in $(dir $@); \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ + [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ ${CP} -p $< $(dir $@); \ @@ -375,7 +471,7 @@ EXPORT_MD_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $( $(EXPORT_MD_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : % @true echo Exporting $< in $(dir $@); \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ + [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ ${CP} -p $< $(dir $@); \ setup_exporthdrs_mi: @@ -443,7 +539,7 @@ $(COMP_COBJ_FILES): $(TARGET)$(COMP_OBJ_DIR)%.o : %.c # # Compilation rules to generate .o from .c for normal files # -C_RULE_1A=${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} +C_RULE_1A=${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${CWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} C_RULE_1B=$*.c C_RULE_2= C_RULE_3= @@ -461,7 +557,7 @@ C_RULE_4_D=${C_RULE_4} # # Compilation rules to generate .o from .m # -M_RULE_1A=${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} +M_RULE_1A=${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${MWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} M_RULE_1B=$*.m M_RULE_2= M_RULE_3= @@ -472,7 +568,7 @@ M_RULE_4= # The config tool slickly changes the last source filename char to 'o' # for the object filename. # -P_RULE_1A=${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} +P_RULE_1A=${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${CXXWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} P_RULE_1B=$( $(@:.cpo=.d~) && mv $(@:.cpo=.d~) $(@:.cpo=.d) P_RULE_3= @@ -505,12 +601,10 @@ endif # do_build_mach_kernel: $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/kgmacros @echo "[ building mach_kernel ]"; - $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/kernel_newvers \ - "`${CAT} $(SRCROOT)/osfmk/conf/kernelversion.major`" \ - "`${CAT} $(SRCROOT)/osfmk/conf/kernelversion.minor`" \ - "`${CAT} $(SRCROOT)/osfmk/conf/kernelversion.variant`"; \ - ${KCC} $(CFLAGS) $(INCLUDES) -c kernel_vers.c; \ - $(LD) $(LDFLAGS_KERNEL) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell echo -n $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) kernel_vers.o -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS); \ + @install $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c; + @$(SRCROOT)/config/newvers.pl $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c; + ${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.c -o $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.o + $(LD) $(LDFLAGS_KERNEL) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell echo -n $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/version.o -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS); \ $(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel; $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/kgmacros: $(SRCROOT)/kgmacros @@ -568,7 +662,7 @@ INSTALL_DATA_FILES = $(addprefix $(DSTROOT)$(INSTALL_DATA_DIR), $(INSTALL_DATA_L $(INSTALL_DATA_FILES): $(DSTROOT)$(INSTALL_DATA_DIR)% : $(SOURCE)/% @echo Installing $< in $@; - @$(MKDIR) $(dir $@); \ + @[ -d $(dir $@) ] ||$(MKDIR) $(dir $@); \ $(RM) $(RMFLAGS) $@; \ install $(DATA_INSTALL_FLAGS) $< $(dir $@); @@ -587,13 +681,25 @@ do_installman: $(INSTALL_MAN_FILES) if [ -d $$man_dir ]; then \ cur_dir=`pwd`; \ cd $$man_dir; \ - $(RM) $(RMFLAGS) $(INSTALL_MAN_LIST); \ + $(RM) $(RMFLAGS) $(INSTALL_MAN_LIST) $(INSTALL_MAN_LINKS); \ cd $$cur_dir; \ else \ $(MKDIR) $$man_dir; \ fi; \ echo Installing $(INSTALL_MAN_LIST) in $$man_dir; \ install $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir; \ + if [ -n "$(strip $(INSTALL_MAN_LINKS))" ]; then \ + set `echo ${INSTALL_MAN_LINKS}`; \ + while : ; do \ + case $$# in \ + 0) break;; \ + 1) echo "warn: empty INSTALL_MAN_LINKS: $$1"; break;; \ + esac; \ + link_src=$$1; shift; link_dst=$$1; shift; \ + echo "hard linking $${link_src} to $${link_dst}"; \ + ln -f $${man_dir}/$${link_src} $${man_dir}/$${link_dst} ; \ + done; \ + fi; \ fi $(INSTALL_MAN_FILES): $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/% : % diff --git a/osfmk/Makefile b/osfmk/Makefile index 92ea3b008..77661426e 100644 --- a/osfmk/Makefile +++ b/osfmk/Makefile @@ -9,30 +9,29 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ mach \ - machine \ - default_pager \ device \ + default_pager \ mach_debug \ - profiling \ - UserNotification + kern \ + ipc \ + machine \ + UserNotification \ + vm \ + libsa INSTINC_SUBDIRS_PPC = \ mach \ - ppc \ - profiling + ppc INSTINC_SUBDIRS_I386 = \ mach \ - i386 \ - profiling + i386 EXPINC_SUBDIRS = \ mach \ device \ default_pager \ mach_debug \ - profiling \ - ddb \ kern \ kdp \ ipc \ @@ -43,13 +42,11 @@ EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ mach \ - ppc \ - profiling + ppc EXPINC_SUBDIRS_I386 = \ mach \ - i386 \ - profiling + i386 SETUP_SUBDIRS = \ conf diff --git a/osfmk/UserNotification/KUNCUserNotifications.c b/osfmk/UserNotification/KUNCUserNotifications.c index aec492ca7..21ef970a0 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.c +++ b/osfmk/UserNotification/KUNCUserNotifications.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,12 +23,15 @@ #include #include #include -#include +#include +#include +#include #include -#include #include +#include + #include #include #include @@ -56,6 +59,11 @@ struct UNDReply { #define UNDReply_lock_try(reply) mutex_lock_try(&(reply)->lock) #define UNDReply_unlock(reply) mutex_unlock(&(reply)->lock) +/* forward declarations */ +void UNDReply_deallocate( + UNDReplyRef reply); + + void UNDReply_deallocate( UNDReplyRef reply) @@ -70,7 +78,7 @@ UNDReply_deallocate( UNDReply_unlock(reply); ipc_port_dealloc_kernel(port); - kfree((vm_offset_t)reply, sizeof(struct UNDReply)); + kfree(reply, sizeof(struct UNDReply)); return; } @@ -108,7 +116,7 @@ UNDAlertCompletedWithResult_rpc ( CFStringRef xmlError = NULL; CFDictionaryRef dict = NULL; #else - void *dict = (void *)keyRef; + const void *dict = (const void *)keyRef; #endif if (reply == UND_REPLY_NULL || !reply->inprogress) @@ -182,10 +190,10 @@ KUNCGetNotificationID() if (reply != UND_REPLY_NULL) { reply->self_port = ipc_port_alloc_kernel(); if (reply->self_port == IP_NULL) { - kfree((vm_offset_t)reply, sizeof(struct UNDReply)); + kfree(reply, sizeof(struct UNDReply)); reply = UND_REPLY_NULL; } else { - mutex_init(&reply->lock, ETAP_IO_UNDREPLY); + mutex_init(&reply->lock, 0); reply->userLandNotificationKey = -1; reply->inprogress = FALSE; ipc_kobject_set(reply->self_port, @@ -229,7 +237,7 @@ kern_return_t KUNCUserNotificationCancel( } reply->inprogress = FALSE; - if (ulkey = reply->userLandNotificationKey) { + if ((ulkey = reply->userLandNotificationKey) != 0) { UNDServerRef UNDServer; reply->userLandNotificationKey = 0; @@ -251,7 +259,7 @@ kern_return_t KUNCUserNotificationCancel( kern_return_t KUNCUserNotificationDisplayNotice( - int timeout, + int noticeTimeout, unsigned flags, char *iconPath, char *soundPath, @@ -266,7 +274,7 @@ KUNCUserNotificationDisplayNotice( if (IP_VALID(UNDServer)) { kern_return_t kr; kr = UNDDisplayNoticeSimple_rpc(UNDServer, - timeout, + noticeTimeout, flags, iconPath, soundPath, @@ -282,7 +290,7 @@ KUNCUserNotificationDisplayNotice( kern_return_t KUNCUserNotificationDisplayAlert( - int timeout, + int alertTimeout, unsigned flags, char *iconPath, char *soundPath, @@ -300,7 +308,7 @@ KUNCUserNotificationDisplayAlert( if (IP_VALID(UNDServer)) { kern_return_t kr; kr = UNDDisplayAlertSimple_rpc(UNDServer, - timeout, + alertTimeout, flags, iconPath, soundPath, @@ -326,7 +334,7 @@ KUNCUserNotificationDisplayFromBundle( char *messageKey, char *tokenString, KUNCUserNotificationCallBack callback, - int contextKey) + __unused int contextKey) { UNDReplyRef reply = (UNDReplyRef)id; UNDServerRef UNDServer; @@ -339,7 +347,7 @@ KUNCUserNotificationDisplayFromBundle( UNDReply_unlock(reply); return KERN_INVALID_ARGUMENT; } - reply->inprogress == TRUE; + reply->inprogress = TRUE; reply->callback = callback; reply_port = ipc_port_make_send(reply->self_port); UNDReply_unlock(reply); diff --git a/osfmk/UserNotification/KUNCUserNotifications.h b/osfmk/UserNotification/KUNCUserNotifications.h index 0351f1d87..6b1f816b8 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.h +++ b/osfmk/UserNotification/KUNCUserNotifications.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,7 +37,7 @@ __BEGIN_DECLS */ kern_return_t KUNCUserNotificationDisplayNotice( - int timeout, + int noticeTimeout, unsigned flags, char *iconPath, char *soundPath, @@ -52,7 +52,7 @@ KUNCUserNotificationDisplayNotice( */ kern_return_t KUNCUserNotificationDisplayAlert( - int timeout, + int alertTimeout, unsigned flags, char *iconPath, char *soundPath, @@ -94,32 +94,60 @@ KUNCExecute( * * Key Type * Header string (header displayed on dialog) + * corresponds to kCFUserNotificationAlertHeaderKey + * * Icon URL string (url of the icon to display) + * corresponds to kCFUserNotificationIconURLKey + * * Sound URL string (url of the sound to play on display) + * corresponds to kCFUserNotificationSoundURLKey + * * Localization URL string (url of bundle to retrieve localization * info from, using Localizable.strings files) + * corresponds to kCFUserNotificationLocalizationURLKey + * * Message string (text of the message, can contain %@'s * which are filled from tokenString passed in) + * corresponds to kCFUserNotificationAlertMessageKey + * * OK Button Title string (title of the "main" button) - * Alternate Button Title string (title of the "alternate" button - - * usually cancel) + * corresponds to kCFUserNotificationDefaultButtonTitleKey + * + * Alternate Button Title string (title of the "alternate" button, usually cancel) + * corresponds to kCFUserNotificationAlternateButtonTitleKey + * * Other Button Title string (title of the "other" button) + * corresponds to kCFUserNotificationOtherButtonTitleKey + * * Timeout string (numeric, int - seconds until the dialog * goes away on it's own) - * Alert Level string (Stop, Notice, Alert, + * + * Alert Level string (Stop, Notice, Alert) + * * Blocking Message string (numeric, 1 or 0 - if 1, the dialog will * have no buttons) + * * Text Field Strings array of strings (each becomes a text field) + * corresponds to kCFUserNotificationTextFieldTitlesKey + * * Password Fields array of strings (numeric - each indicates a * pwd field) + * * Popup Button Strings array of strings (each entry becomes a popup * button string) + * * Radio Button Strings array of strings (each becomes a radio button) + * * Check Box Strings array of strings (each becomes a check box) + * corresponds to kCFUserNotificationCheckBoxTitlesKey + * * Selected Radio string (numeric - which radio is selected) + * * Checked Boxes array of strings (numeric - each indicates a * checked field) + * * Selected Popup string (numeric - which popup entry is selected) + * */ /* @@ -175,14 +203,14 @@ enum { */ typedef void (*KUNCUserNotificationCallBack)( - int contextKey, - int responseFlags, - void *xmlData); + int contextKey, + int responseFlags, + void *xmlData); /* * Get a notification ID */ -KUNCUserNotificationID KUNCGetNotificationID(); +KUNCUserNotificationID KUNCGetNotificationID(void); /* This function currently requires a bundle path, which kexts cannot currently get. In the future, the CFBundleIdentiofier of the kext will be pass in in place of the bundlePath. */ diff --git a/osfmk/UserNotification/UNDReply.defs b/osfmk/UserNotification/UNDReply.defs index 87bb8b6ac..58d613224 100644 --- a/osfmk/UserNotification/UNDReply.defs +++ b/osfmk/UserNotification/UNDReply.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * diff --git a/osfmk/UserNotification/UNDRequest.defs b/osfmk/UserNotification/UNDRequest.defs index e73bd0391..959faa9f3 100644 --- a/osfmk/UserNotification/UNDRequest.defs +++ b/osfmk/UserNotification/UNDRequest.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -83,7 +83,7 @@ simpleroutine UNDCancelNotification_rpc( */ simpleroutine UNDDisplayNoticeSimple_rpc( server: UNDServerRef; - in timeout: int; + in rpctimeout: int; in flags: unsigned; in iconPath:UNDLabel; in soundPath:UNDLabel; @@ -100,7 +100,7 @@ simpleroutine UNDDisplayNoticeSimple_rpc( */ routine UNDDisplayAlertSimple_rpc( server: UNDServerRef; - in timeout: int; + in rpctimeout: int; in flags: unsigned; in iconPath:UNDLabel; in soundPath:UNDLabel; diff --git a/osfmk/UserNotification/UNDTypes.h b/osfmk/UserNotification/UNDTypes.h index 32581eec0..02838bf34 100644 --- a/osfmk/UserNotification/UNDTypes.h +++ b/osfmk/UserNotification/UNDTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -49,7 +49,10 @@ typedef const char * xmlData_t; */ typedef struct UNDReply *UNDReplyRef; +#include +__BEGIN_DECLS extern UNDReplyRef convert_port_to_UNDReply(mach_port_t); +__END_DECLS #else /* !MACH_KERNEL_PRIVATE */ diff --git a/osfmk/conf/MASTER b/osfmk/conf/MASTER index ba9c83411..feca58dec 100644 --- a/osfmk/conf/MASTER +++ b/osfmk/conf/MASTER @@ -123,36 +123,8 @@ options KDEBUG # kernel tracing # # MACH_COUNTERS enables code that handles various counters in the system. # options MACH_COUNTERS # # -# -# ETAP The Event Trace Analysis Package enables user-level tasks to monitor -# and analyze kernel events. ETAP supports three modes of tracing: -# -# 1. General event tracing: ETAP_EVENT_MONITOR -# 2. Monitored lock tracing: ETAP_LOCK_MONITOR -# 3. Cumulative lock tracing: ETAP_LOCK_ACCUMULATE -# -# Each of these trace modes are mutually exclusive. -# -# CONFIGURING ETAP: To enable the trace package, the ETAP switch -# along with *ONE* ETAP trace mode is selected. The selected ETAP -# mode determines the level of instrumentation built into the kernel. -# Mode 1 configures event probes through-out the system. Modes 2 & 3 -# add instumentation to the kernel lock mechanisms. -# -# ETAP (and all its trace modes) is mutually exclusive with the -# MACH_LDEBUG option. It is assumed that general lock debugging is -# completed before gathering event information. -# -# ETAP functionality is normally only enabled for event profiling and -# performance studies. Event tracing should not be enabled for release -# configurations, as the code size and performance impact of these -# options are significant. -# -# -#options ETAP # ETAP enable -#options ETAP_EVENT_MONITOR # Monitor events -#options ETAP_LOCK_MONITOR # Monitor lock behavior -#options ETAP_LOCK_ACCUMULATE # Collect cumulative lock data + +options UPL_DEBUG # # ########################################################## # diff --git a/osfmk/conf/MASTER.i386 b/osfmk/conf/MASTER.i386 index 7ce7d8f0a..1dc361d30 100644 --- a/osfmk/conf/MASTER.i386 +++ b/osfmk/conf/MASTER.i386 @@ -20,7 +20,6 @@ machine "i386" # cpu "i386" # -pseudo-device cpus 4 pseudo-device com 2 pseudo-device vc 1 @@ -39,7 +38,6 @@ options FP_EMUL # floating point emulation # options PC_SUPPORT # virtual PC support # options PROFILE # kernel profiling # options UXPR # user-level XPR package # -options STAT_TIME # time stats config mach_kernel swap generic # options GPROF # kgmon profiling # @@ -52,4 +50,5 @@ options MACH_PE # # #options DDB # Inline debugger # options MACH_KDP # KDP # +#options PAE diff --git a/osfmk/conf/MASTER.ppc b/osfmk/conf/MASTER.ppc index 7511c465e..850c57ddd 100644 --- a/osfmk/conf/MASTER.ppc +++ b/osfmk/conf/MASTER.ppc @@ -18,16 +18,6 @@ ###################################################################### # ############################################################################## -# -# Statistics and timing options. -# -# STAT_TIME indicates that this machine uses a statistical timer for gathering -# usage statistics, and has no higher resolution timer to measure actual -# intervals. -# -options STAT_TIME -#options MACH_MACHINE_ROUTINES - # # MACH_PROF enables code for mach profiling. # @@ -42,11 +32,10 @@ options PROFILE # kernel profiling # machine "ppc" cpu "ppc" -pseudo-device cpus 2 pseudo-device scc 1 pseudo-device vc 1 - +options MACHINE_TIMER_ROUTINES # Disabled by default, since mklinux does not need this # unless running multiserver - the atalk stack at time of diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile index 131bd8c56..38fc57eba 100644 --- a/osfmk/conf/Makefile +++ b/osfmk/conf/Makefile @@ -3,6 +3,240 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir +# +# VM should be warning free +# +export device_vm.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export device_vm.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export memory_object.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export memory_object.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export task_working_set.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export task_working_set.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_debug.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_debug.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_external.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_external.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_fault.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_fault.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_init.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_init.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_kern.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_kern.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_map.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_map.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_object.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_object.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_pageout.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_pageout.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_resident.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_resident.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_shared_memory_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_shared_memory_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export bsd_vm.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export bsd_vm.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export default_pager.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export default_pager.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export dp_backing_store.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export dp_backing_store.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export dp_memory_object.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export dp_memory_object.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export default_pager_alerts_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export default_pager_alerts_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export default_pager_alerts_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export default_pager_alerts_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export memory_object_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export memory_object_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export memory_object_control_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export memory_object_control_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export memory_object_default_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export memory_object_default_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export memory_object_name_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export memory_object_name_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export upl_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export upl_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export vm_map_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export vm_map_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) + +# +# ipc should be warning free +# +export ipc_entry.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_entry.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_hash.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_hash.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_init.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_init.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_kmsg.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_kmsg.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_mqueue.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_mqueue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_notify.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_notify.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_object.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_object.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_port.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_port.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_pset.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_pset.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_right.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_right.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_space.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_space.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_splay.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_splay.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_table.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_table.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_debug.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_debug.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_msg.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_msg.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_port.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_port.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mig_log.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mig_log.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_clock.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_clock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_host.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_host.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_kobject.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_kobject.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_mig.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_mig.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_sync.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_sync.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ipc_tt.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ipc_tt.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export sync_lock.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export sync_lock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export sync_sema.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export sync_sema.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_port_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_port_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export lock_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export lock_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export semaphore_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export semaphore_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) + +# +# kern should be warning free (almost) +# +# export debug.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export debug.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# export printf.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export printf.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# export xpr.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export xpr.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# export mk_sp.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export mk_sp.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# export syscall_emulation.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export syscall_emulation.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# export bsd_kern.o_CFLAGS_RM=$(CWARNFLAGS_STD) +# export bsd_kern.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +# +export ast.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ast.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export clock.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export clock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export counters.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export counters.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export exception.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export exception.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export host.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export host.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export host_notify.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export host_notify.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export kalloc.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export kalloc.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ledger.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ledger.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export locks.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export locks.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_clock.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_clock.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_factor.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_factor.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export machine.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export machine.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mk_timer.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mk_timer.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export profile.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export profile.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export priority.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export priority.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export processor.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export processor.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export processor_data.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export processor_data.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export queue.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export queue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export sched_prim.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export sched_prim.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export sscanf.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export sscanf.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export stack.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export stack.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export startup.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export startup.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export syscall_subr.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export syscall_subr.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export syscall_sw.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export syscall_sw.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export task.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export task.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export task_policy.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export task_policy.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export task_swap.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export task_swap.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread_act.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread_act.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread_policy.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread_policy.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread_swap.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread_swap.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export timer.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export timer.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export timer_call.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export timer_call.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export wait_queue.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export wait_queue.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export zalloc.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export zalloc.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export clock_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export clock_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export clock_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export clock_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export clock_reply_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export clock_reply_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export exc_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export exc_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export exc_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export exc_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export host_priv_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export host_priv_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export host_security_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export host_security_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export ledger_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export ledger_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_host_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_host_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export mach_notify_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export mach_notify_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export processor_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export processor_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export processor_set_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export processor_set_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export prof_user.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export prof_user.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export task_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export task_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) +export thread_act_server.o_CFLAGS_RM=$(CWARNFLAGS_STD) +export thread_act_server.o_CFLAGS_ADD=-Werror $(CWARNFLAGS_STD) include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386 index 763f8a3e8..ae239e230 100644 --- a/osfmk/conf/Makefile.i386 +++ b/osfmk/conf/Makefile.i386 @@ -5,6 +5,42 @@ CFLAGS+= -DAT386=1 SFLAGS+= -DAT386=1 +# Enable -Werror for i386 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR= \ + ioconf.o \ + UNDRequest.o \ + KUNCUserNotifications.o \ + panic_dialog.o \ + panic_image.o \ + rendered_numbers.o \ + video_console.o \ + iokit_rpc.o \ + subrs.o \ + kdp.o \ + kdp_udp.o \ + bsd_kern.o \ + debug.o \ + kmod.o \ + mk_sp.o \ + printf.o \ + syscall_emulation.o \ + mach_header.o + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + + + +# Files that must go in the __HIB segment: +HIB_FILES= \ + gdt.o \ + idt.o + ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/osfmk/conf/Makefile.ppc b/osfmk/conf/Makefile.ppc index eca596885..60251be9b 100644 --- a/osfmk/conf/Makefile.ppc +++ b/osfmk/conf/Makefile.ppc @@ -11,13 +11,16 @@ makedis: $(SRCROOT)/osfmk/ddb/makedis.c ppc_disasm.o_CFLAGS_ADD = -Dperror=db_printf -Dexit=db_error -Dmalloc=db_disasm_malloc -ppc_disasm : $(SRCROOT)/osfmk/ppc/ppc_disasm.i makedis +ppc_disasm.c ppc_disasm.h : $(SRCROOT)/osfmk/ppc/ppc_disasm.i makedis ./makedis -w -h ./ppc_disasm.h $(SOURCE_DIR)/osfmk/ppc/ppc_disasm.i > ./ppc_disasm.c -ppc_disasm.c ppc_disasm.h : ppc_disasm db_disasm.o : ppc_disasm.h +# Files that must go in the __HIB segment: +HIB_FILES= \ + + ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/osfmk/conf/Makefile.template b/osfmk/conf/Makefile.template index 7b918321e..aab83c2ec 100644 --- a/osfmk/conf/Makefile.template +++ b/osfmk/conf/Makefile.template @@ -47,15 +47,6 @@ COMP_SUBDIRS_I386 = \ # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright.osf \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright.cmu - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -91,12 +82,13 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) assym.s @echo "[ creating $(COMPONENT).o ]" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c + for hib_file in ${HIB_FILES}; \ + do \ + $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \ + mv $${hib_file}__ $${hib_file} ; \ + done; @echo [ updating $(COMPONENT).o ${OSFMK_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} do_all: $(COMPONENT).o diff --git a/osfmk/conf/files b/osfmk/conf/files index cfdab7c5e..7514d7843 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -1,3 +1,4 @@ + # # @OSF_COPYRIGHT@ # @@ -26,21 +27,12 @@ # the rights to redistribute these changes. # -# -# N.B. "kern/lock.c" is listed as "optional cpus" so that config will -# create a "cpus.h" file. -# - OPTIONS/dli optional dli -OPTIONS/etap optional etap -OPTIONS/etap_lock_accumulate optional etap_lock_accumulate -OPTIONS/etap_lock_monitor optional etap_lock_monitor -OPTIONS/etap_event_monitor optional etap_event_monitor -OPTIONS/fast_idle optional fast_idle OPTIONS/kdebug optional kdebug OPTIONS/mach_assert optional mach_assert OPTIONS/mach_debug optional mach_debug OPTIONS/mach_machine_routines.h optional mach_machine_routines +OPTIONS/machine_timer_routines optional machine_timer_routines # OPTIONS/norma_vm optional norma_vm OPTIONS/norma_task optional norma_task @@ -56,7 +48,6 @@ OPTIONS/mach_kgdb optional mach_kgdb OPTIONS/mach_kdp optional mach_kdp OPTIONS/mach_kprof optional mach_kprof OPTIONS/mach_ldebug optional mach_ldebug -OPTIONS/mach_lock_mon optional mach_lock_mon OPTIONS/mach_mp_debug optional mach_mp_debug OPTIONS/mach_pagemap optional mach_pagemap OPTIONS/mach_prof optional mach_prof @@ -70,7 +61,6 @@ OPTIONS/mach_tr optional mach_tr OPTIONS/mach_vm_debug optional mach_vm_debug OPTIONS/mach_page_hash_stats optional mach_page_hash_stats OPTIONS/mig_debug optional mig_debug -OPTIONS/simple_clock optional simple_clock OPTIONS/stat_time optional stat_time OPTIONS/time_stamp optional time_stamp OPTIONS/xpr_debug optional xpr_debug @@ -142,8 +132,6 @@ osfmk/kern/clock.c standard osfmk/kern/counters.c standard osfmk/kern/debug.c standard osfmk/kern/exception.c standard -osfmk/kern/etap.c standard -osfmk/kern/etap_pool.c optional etap osfmk/kern/host.c standard osfmk/kern/host_notify.c standard osfmk/kern/ipc_clock.c standard @@ -154,10 +142,8 @@ osfmk/kern/ipc_sync.c standard osfmk/kern/ipc_tt.c standard osfmk/kern/kalloc.c standard osfmk/kern/ledger.c standard -osfmk/kern/lock.c optional cpus -osfmk/kern/lock_mon.c optional mach_lock_mon +osfmk/kern/locks.c standard osfmk/kern/mach_clock.c standard -osfmk/kern/mach_factor.c standard osfmk/kern/machine.c standard osfmk/kern/mk_sp.c standard osfmk/kern/mk_timer.c standard @@ -165,9 +151,11 @@ osfmk/kern/profile.c standard osfmk/kern/printf.c standard osfmk/kern/priority.c standard osfmk/kern/processor.c standard +osfmk/kern/processor_data.c standard osfmk/kern/queue.c standard +osfmk/kern/sched_average.c standard osfmk/kern/sched_prim.c standard -osfmk/kern/sscanf.c standard +osfmk/kern/stack.c standard osfmk/kern/startup.c standard osfmk/kern/sync_lock.c standard osfmk/kern/sync_sema.c standard @@ -181,7 +169,6 @@ osfmk/kern/thread.c standard osfmk/kern/thread_act.c standard osfmk/kern/thread_call.c standard osfmk/kern/thread_policy.c standard -osfmk/kern/thread_swap.c standard osfmk/kern/timer.c standard osfmk/kern/timer_call.c standard osfmk/kern/wait_queue.c standard @@ -200,6 +187,7 @@ osfmk/kern/bsd_kern.c optional mach_bsd ./mach/mach_host_server.c standard ./mach/mach_notify_user.c standard ./mach/mach_port_server.c standard +./mach/mach_vm_server.c standard ./mach/memory_object_server.c standard ./mach/memory_object_control_server.c standard ./mach/memory_object_default_server.c standard diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386 index 8b4b58d37..abd788656 100644 --- a/osfmk/conf/files.i386 +++ b/osfmk/conf/files.i386 @@ -16,8 +16,6 @@ OPTIONS/dynamic_num_nodes optional dynamic_num_nodes OPTIONS/vtoc_compat optional vtoc_compat OPTIONS/fddi optional fddi - - osfmk/i386/hi_res_clock_map.c optional hi_res_clock osfmk/i386/pmap.c standard @@ -35,14 +33,16 @@ osfmk/i386/bcopy.s standard osfmk/i386/bzero.s standard osfmk/i386/cpu.c standard osfmk/i386/cpuid.c standard +osfmk/i386/cpu_threads.c standard osfmk/i386/db_disasm.c optional mach_kdb osfmk/i386/db_interface.c optional mach_kdb osfmk/i386/db_trace.c optional mach_kdb osfmk/i386/fpu.c standard osfmk/i386/gcc.s standard osfmk/i386/gdt.c standard -osfmk/i386/hardclock.c standard osfmk/i386/i386_lock.s standard +osfmk/i386/i386_init.c standard +osfmk/i386/i386_vm_init.c standard osfmk/i386/idt.s standard osfmk/i386/io_emulate.c standard osfmk/i386/io_map.c standard @@ -50,6 +50,7 @@ osfmk/i386/iopb.c standard osfmk/i386/ktss.c standard osfmk/i386/ldt.c standard osfmk/i386/loose_ends.c standard +osfmk/i386/locks_i386.c standard osfmk/i386/locore.s standard osfmk/i386/start.s standard osfmk/i386/cswitch.s standard @@ -58,15 +59,15 @@ osfmk/i386/machine_routines_asm.s standard osfmk/i386/mcount.s optional profile osfmk/i386/mp_desc.c standard osfmk/i386/ntoh.s standard +osfmk/i386/perfmon.c standard osfmk/i386/pcb.c standard osfmk/i386/phys.c standard osfmk/i386/rtclock.c standard osfmk/i386/trap.c standard osfmk/i386/user_ldt.c standard -osfmk/i386/i386_init.c standard -osfmk/i386/i386_vm_init.c standard osfmk/i386/commpage/commpage.c standard +osfmk/i386/commpage/atomic.s standard osfmk/i386/commpage/commpage_mach_absolute_time.s standard osfmk/i386/commpage/spinlocks.s standard osfmk/i386/commpage/pthreads.s standard @@ -74,7 +75,6 @@ osfmk/i386/commpage/cacheflush.s standard osfmk/i386/commpage/commpage_gettimeofday.s standard osfmk/i386/commpage/bcopy_scalar.s standard osfmk/i386/commpage/bzero_scalar.s standard -osfmk/i386/commpage/commpage_sigs.s standard osfmk/i386/AT386/autoconf.c standard osfmk/i386/AT386/bbclock.c standard @@ -86,6 +86,11 @@ osfmk/i386/AT386/physmem.c optional physmem device-driver osfmk/i386/mp.c standard osfmk/i386/mp_slave_boot.s standard +osfmk/i386/acpi.c standard +osfmk/i386/acpi_wakeup.s standard + +osfmk/i386/mtrr.c standard + osfmk/console/i386/serial_console.c optional com device-driver osfmk/console/i386/kdasm.s optional vc device-driver @@ -97,18 +102,20 @@ osfmk/console/i386/video_scroll.c optional vc device-driver osfmk/kern/etap_map.c optional etap device-driver -osfmk/profiling/i386/profile-md.c optional gprof -osfmk/profiling/i386/profile-asm.s optional gprof -osfmk/profiling/profile-kgmon.c optional gprof +#osfmk/profiling/i386/profile-md.c optional gprof +#osfmk/profiling/i386/profile-asm.s optional gprof +#osfmk/profiling/profile-kgmon.c optional gprof #osfmk/profiling/profile-mk.c optional gprof osfmk/kdp/ml/i386/kdp_machdep.c optional mach_kdp osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp + # DUMMIES TO FORCE GENERATION OF .h FILES osfmk/OPTIONS/ln optional ln osfmk/OPTIONS/eisa optional eisa osfmk/OPTIONS/himem optional himem osfmk/OPTIONS/ec optional ec osfmk/OPTIONS/hi_res_clock optional hi_res_clock + diff --git a/osfmk/conf/files.ppc b/osfmk/conf/files.ppc index 96fb09479..d985923de 100644 --- a/osfmk/conf/files.ppc +++ b/osfmk/conf/files.ppc @@ -30,6 +30,7 @@ osfmk/ppc/cpu.c standard osfmk/ppc/ppc_init.c standard osfmk/ppc/ppc_vm_init.c standard osfmk/ppc/model_dep.c standard +osfmk/ppc/locks_ppc.c standard osfmk/ppc/pmap.c standard osfmk/ppc/mappings.c standard osfmk/ppc/savearea.c standard @@ -51,7 +52,6 @@ osfmk/ppc/bzero.s standard osfmk/ppc/bcopy.s standard osfmk/ppc/atomic_switch.s standard osfmk/ppc/PseudoKernel.c standard -osfmk/ppc/misc.c standard osfmk/ppc/interrupt.c standard osfmk/ppc/machine_routines.c standard osfmk/ppc/machine_routines_asm.s standard @@ -86,6 +86,11 @@ osfmk/ppc/commpage/mach_absolute_time.s standard osfmk/ppc/commpage/pthread.s standard osfmk/ppc/commpage/spinlocks.s standard osfmk/ppc/commpage/bigcopy_970.s standard +osfmk/ppc/commpage/atomic.s standard +osfmk/ppc/commpage/memset_64.s standard +osfmk/ppc/commpage/memset_g3.s standard +osfmk/ppc/commpage/memset_g4.s standard +osfmk/ppc/commpage/memset_g5.s standard osfmk/ppc/chud/chud_osfmk_callback.c standard osfmk/ppc/chud/chud_cpu.c standard @@ -105,7 +110,7 @@ osfmk/console/panic_dialog.c optional vc device-driver osfmk/console/video_console.c optional vc device-driver osfmk/console/ppc/video_scroll.s optional vc device-driver + # DUMMIES TO FORCE GENERATION OF .h FILES -OPTIONS/hi_res_clock optional hi_res_clock OPTIONS/bm optional bm OPTIONS/debug optional debug diff --git a/osfmk/conf/kernelversion.major b/osfmk/conf/kernelversion.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/osfmk/conf/kernelversion.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/osfmk/conf/kernelversion.minor b/osfmk/conf/kernelversion.minor deleted file mode 100644 index ec635144f..000000000 --- a/osfmk/conf/kernelversion.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/osfmk/conf/kernelversion.variant b/osfmk/conf/kernelversion.variant deleted file mode 100644 index 573541ac9..000000000 --- a/osfmk/conf/kernelversion.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/osfmk/conf/tools/Makefile b/osfmk/conf/tools/Makefile index fdae6a573..4f9ccd553 100644 --- a/osfmk/conf/tools/Makefile +++ b/osfmk/conf/tools/Makefile @@ -7,15 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - kernel_newvers \ - newvers - -COMP_SUBDIRS = \ - doconf \ - kernel_newvers \ - newvers +SETUP_SUBDIRS = doconf + +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/osfmk/conf/tools/kernel_newvers/Makefile b/osfmk/conf/tools/kernel_newvers/Makefile deleted file mode 100644 index 7c749c5bf..000000000 --- a/osfmk/conf/tools/kernel_newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/) -PROGRAM= $(DSTDIR)kernel_newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/osfmk/conf/tools/kernel_newvers/kernel_newvers.csh b/osfmk/conf/tools/kernel_newvers/kernel_newvers.csh deleted file mode 100644 index 19859a46c..000000000 --- a/osfmk/conf/tools/kernel_newvers/kernel_newvers.csh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# kernel_newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -version="${major}.${minor}" -if [ -n "$variant" ]; then version="${version}.${variant}"; fi - -objdir="${OBJROOT}/${KERNEL_CONFIG}_${ARCH_CONFIG}" - time=`date` - who=`whoami` - -if [ -z "${objdir}" ] || [ -z "${time}" ]; then exit 1; fi - -CONFIG=`expr "${objdir}" : '.*/\([^/]*\)$'` -objdir=`expr "${objdir}" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int version_major = ${major};" ; - /bin/echo "int version_minor = ${minor};" ; - /bin/echo "char version_variant[] = \"${variant}\";" ; - /bin/echo "char version[] = \"Darwin Kernel Version ${version}:\\n${time}; ${who}:${objdir}\\n\\n\";" ; - /bin/echo "char osrelease[] = \"${version}\";" ; - /bin/echo "char ostype[] = \"Darwin\";" ; -) > kernel_vers.c - -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/osfmk/conf/tools/newvers/Makefile b/osfmk/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/osfmk/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/osfmk/conf/tools/newvers/newvers.csh b/osfmk/conf/tools/newvers/newvers.csh deleted file mode 100644 index 802c7ed2a..000000000 --- a/osfmk/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"Mach Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"Mach\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/osfmk/conf/version.major b/osfmk/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/osfmk/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/osfmk/conf/version.minor b/osfmk/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/osfmk/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/osfmk/conf/version.variant b/osfmk/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/osfmk/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c index e60c3e615..6808589ae 100644 --- a/osfmk/console/i386/serial_console.c +++ b/osfmk/console/i386/serial_console.c @@ -21,27 +21,183 @@ */ #include +#include +#include #include +#include +#include +#include +#include + +static struct { + char *buffer; + int len; + int used; + char *write_ptr; + char *read_ptr; + decl_simple_lock_data(,read_lock); + decl_simple_lock_data(,write_lock); +} console_ring; + +typedef struct console_buf { + char *buf_base; + char *buf_end; + char *buf_ptr; +#define CPU_BUFFER_LEN (256 - 3*(sizeof(char*))) + char buf[CPU_BUFFER_LEN]; +} console_buf_t; void -cnputc(char c) +console_init(void) +{ + int ret; + + console_ring.len = PAGE_SIZE; + ret = kmem_alloc(kernel_map, (vm_offset_t *) &console_ring.buffer, + console_ring.len); + if (ret != KERN_SUCCESS) + panic("console_ring_init() " + "failed to allocate ring buffer, error %d\n", ret); + console_ring.used = 0; + console_ring.read_ptr = console_ring.buffer; + console_ring.write_ptr = console_ring.buffer; + simple_lock_init(&console_ring.read_lock, 0); + simple_lock_init(&console_ring.write_lock, 0); + +} + +void * +console_cpu_alloc(__unused boolean_t boot_processor) +{ + int ret; + console_buf_t *cbp; + + ret = kmem_alloc(kernel_map, (vm_offset_t *) &cbp, + sizeof(console_buf_t)); + if (ret != KERN_SUCCESS) { + printf("console_cpu_alloc() " + "failed to allocate cpu buffer, error=%d\n", ret); + return NULL; + } + + cbp->buf_base = (char *) &cbp->buf; + cbp->buf_ptr = cbp->buf_base; + cbp->buf_end = cbp->buf_base + CPU_BUFFER_LEN; + + return (void *) cbp; +} + +void +console_cpu_free(void *buf) +{ + if (buf != NULL) + kfree((void *) buf, sizeof(console_buf_t)); +} + +static boolean_t +console_ring_put(char ch) +{ + if (console_ring.used < console_ring.len) { + console_ring.used++;; + *console_ring.write_ptr++ = ch; + if (console_ring.write_ptr - console_ring.buffer + == console_ring.len) + console_ring.write_ptr = console_ring.buffer; + return TRUE; + } else { + return FALSE; + } +} + +static int +console_ring_get(void) +{ + char ch = 0; + + if (console_ring.used > 0) { + console_ring.used--; + ch = *console_ring.read_ptr++; + if (console_ring.read_ptr - console_ring.buffer + == console_ring.len) + console_ring.read_ptr = console_ring.buffer; + } + return (int) ch; +} + +static inline void +cpu_buffer_put(console_buf_t *cbp, char ch) +{ + if (cbp->buf_ptr < cbp->buf_end) + *(cbp->buf_ptr++) = ch; +} + +static inline void +_cnputc(char c) { - boolean_t nolock = mp_kdp_trap || !ml_get_interrupts_enabled(); - - /* - * Note: this lock prevents other cpus interferring with the - * output is this one character to the console (screen). It - * does not prevent multiple printfs being interleaved - that's - * the responsibility of the caller. Without this lock, - * an unreadable black-on-black or white-on-white display may result. - * We avoid taking this lock, however, if we're in the debugger or - * at interrupt level. - */ - if (!nolock) - simple_lock(&mp_putc_lock); vcputc(0, 0, c); if (c == '\n') vcputc(0, 0,'\r'); - if (!nolock) - simple_unlock(&mp_putc_lock); +} + +void +cnputcusr(char c) +{ + simple_lock(&console_ring.read_lock); + _cnputc(c); + simple_unlock(&console_ring.read_lock); +} + +void +cnputc(char c) +{ + console_buf_t *cbp; + + if (!(real_ncpus > 1)) { + _cnputc(c); + return; + } + + mp_disable_preemption(); + /* add to stack buf */ + cbp = (console_buf_t *) current_cpu_datap()->cpu_console_buf; + if (c != '\n') { + cpu_buffer_put(cbp, c); + } else { + boolean_t state; + char *cp; + + /* Here at end of printf -- time to try to output */ + + /* copy this buffer into the shared ring buffer */ + state = ml_set_interrupts_enabled(FALSE); + simple_lock(&console_ring.write_lock); + for (cp = cbp->buf_base; cp < cbp->buf_ptr; cp++) { + while (!console_ring_put(*cp)) + /* spin if share buffer full */ + cpu_pause(); + } + (void) console_ring_put('\n'); + simple_unlock(&console_ring.write_lock); + ml_set_interrupts_enabled(state); + cbp->buf_ptr = cbp->buf_base; + + /* + * Try to get the read lock on the ring buffer to empty it. + * If this fails someone else is already emptying... + */ + if (simple_lock_try(&console_ring.read_lock)) { + for (;;) { + char ch; + + simple_lock(&console_ring.write_lock); + ch = console_ring_get(); + simple_unlock(&console_ring.write_lock); + if (ch == 0) + break; + _cnputc(ch); + } + simple_unlock(&console_ring.read_lock); + } + } + mp_enable_preemption(); } diff --git a/osfmk/console/i386/text_console.c b/osfmk/console/i386/text_console.c index 6e1258648..a5b7cad90 100644 --- a/osfmk/console/i386/text_console.c +++ b/osfmk/console/i386/text_console.c @@ -28,6 +28,7 @@ #include #include +#include "text_console.h" /* * Macros and typedefs. @@ -47,6 +48,7 @@ typedef short csrpos_t; /* cursor position, ONE_SPACE bytes per char */ /* * Commands sent to graphics adapter. */ +#define VGA_C_START 0x0a /* cursor start position, on/off bit */ #define VGA_C_LOW 0x0f /* return low byte of cursor addr */ #define VGA_C_HIGH 0x0e /* high byte */ @@ -56,6 +58,12 @@ typedef short csrpos_t; /* cursor position, ONE_SPACE bytes per char */ #define VGA_ATTR_NORMAL 0x07 #define VGA_ATTR_REVERSE 0x70 +/* + * Cursor Start Register bit fields. + */ +#define VGA_CURSOR_CS 0x1F +#define VGA_CURSOR_ON 0x20 + /* * Convert from XY coordinate to a location in display memory. */ @@ -64,13 +72,14 @@ typedef short csrpos_t; /* cursor position, ONE_SPACE bytes per char */ /* * Globals. */ -static short vga_idx_reg = 0; /* location of VGA index register */ -static short vga_io_reg = 0; /* location of VGA data register */ -static short vga_cols = 80; /* number of columns */ -static short vga_rows = 25; /* number of rows */ -static char vga_attr = 0; /* current character attribute */ -static char vga_attr_rev = 0; /* current reverse attribute */ -static char * vram_start = 0; /* VM start of VGA frame buffer */ +static short vga_idx_reg = 0; /* location of VGA index register */ +static short vga_io_reg = 0; /* location of VGA data register */ +static short vga_cols = 80; /* number of columns */ +static short vga_rows = 25; /* number of rows */ +static char vga_attr = 0; /* current character attribute */ +static char vga_attr_rev = 0; /* current reverse attribute */ +static char vga_cursor_start = 0; /* cached cursor start scan line */ +static char * vram_start = 0; /* VM start of VGA frame buffer */ /* * Functions in kdasm.s. @@ -139,6 +148,19 @@ set_cursor_position( csrpos_t newpos ) outb(vga_io_reg, (unsigned char)(curpos & 0xff)); } +/* + * set_cursor_enable + * + * Allow the cursor to be turned on or off. + */ +static void +set_cursor_enable( boolean_t enable ) +{ + outb(vga_idx_reg, VGA_C_START); + outb(vga_io_reg, vga_cursor_start | + (enable == TRUE ? VGA_CURSOR_ON : 0)); +} + /* * display_char * @@ -169,7 +191,12 @@ vga_init(int cols, int rows, unsigned char * addr) vga_attr = VGA_ATTR_NORMAL; vga_attr_rev = VGA_ATTR_REVERSE; - set_cursor_position(0); + /* cache cursor start position */ + outb(vga_idx_reg, VGA_C_START); + vga_cursor_start = inb(vga_io_reg) & VGA_CURSOR_CS; + + /* defaults to a hidden hw cursor */ + set_cursor_enable( FALSE ); } /* @@ -178,7 +205,7 @@ vga_init(int cols, int rows, unsigned char * addr) * Scroll the screen up 'n' character lines. */ void -tc_scroll_up( int lines, int top, int bottom ) +tc_scroll_up( int lines, __unused int top, __unused int bottom ) { csrpos_t to; csrpos_t from; @@ -202,7 +229,7 @@ tc_scroll_up( int lines, int top, int bottom ) * Scrolls the screen down 'n' character lines. */ void -tc_scroll_down( int lines, int top, int bottom ) +tc_scroll_down( int lines, __unused int top, __unused int bottom ) { csrpos_t to; csrpos_t from; @@ -284,6 +311,7 @@ void tc_show_cursor( int x, int y ) { set_cursor_position( XY_TO_CSRPOS(x, y) ); + set_cursor_enable( TRUE ); } /* @@ -292,9 +320,9 @@ tc_show_cursor( int x, int y ) * Hide the hardware cursor. */ void -tc_hide_cursor( int x, int y ) +tc_hide_cursor( __unused int x, __unused int y ) { - return; + set_cursor_enable( FALSE ); } /* @@ -304,7 +332,8 @@ tc_hide_cursor( int x, int y ) * relative to the current cursor position. */ void -tc_clear_screen(int x, int y, int top, int bottom, int operation) +tc_clear_screen(int x, int y, __unused int top, __unused int bottom, + int operation) { csrpos_t start; int count; @@ -335,7 +364,8 @@ tc_clear_screen(int x, int y, int top, int bottom, int operation) * and attributes. */ void -tc_paint_char( int x, int y, unsigned char ch, int attrs, unsigned char ch_previous, int attrs_previous ) +tc_paint_char(int x, int y, unsigned char ch, int attrs, + __unused unsigned char ch_previous, __unused int attrs_previous) { char my_attr = vga_attr; @@ -350,7 +380,7 @@ tc_paint_char( int x, int y, unsigned char ch, int attrs, unsigned char ch_previ * Enable / disable the console. */ void -tc_enable(boolean_t enable) +tc_enable(__unused boolean_t enable) { } diff --git a/osfmk/console/i386/video_scroll.c b/osfmk/console/i386/video_scroll.c index f54bba222..41538c872 100644 --- a/osfmk/console/i386/video_scroll.c +++ b/osfmk/console/i386/video_scroll.c @@ -20,16 +20,21 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include +#include + +extern void bcopy(const void *, void *, size_t); + void video_scroll_up(unsigned long start, unsigned long end, unsigned long dest) { - bcopy(start, dest, (end - start) << 2); + bcopy((void *) start, (void *) dest, (end - start) << 2); } void video_scroll_down(unsigned long start, /* HIGH addr */ unsigned long end, /* LOW addr */ unsigned long dest) /* HIGH addr */ { - bcopy(end, dest, (start - end) << 2); + bcopy((void *) end, (void *) dest, (start - end) << 2); } diff --git a/osfmk/console/panic_dialog.c b/osfmk/console/panic_dialog.c index 1cf728780..1e7a26f93 100644 --- a/osfmk/console/panic_dialog.c +++ b/osfmk/console/panic_dialog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,356 +21,518 @@ */ #include - #include #include #include +#include +#include +#include -#include "panic_image.c" -#include "rendered_numbers.c" extern struct vc_info vinfo; extern boolean_t panicDialogDesired; -/* panic image clut */ -static const unsigned char *clut = NULL; -extern void panic_ui_initialize(const unsigned char * system_clut); - -/* We use this standard MacOS clut as a fallback */ -static const unsigned char appleClut8[ 256 * 3 ] = { -// 00 - 0xFF,0xFF,0xFF, 0xFF,0xFF,0xCC, 0xFF,0xFF,0x99, 0xFF,0xFF,0x66, - 0xFF,0xFF,0x33, 0xFF,0xFF,0x00, 0xFF,0xCC,0xFF, 0xFF,0xCC,0xCC, - 0xFF,0xCC,0x99, 0xFF,0xCC,0x66, 0xFF,0xCC,0x33, 0xFF,0xCC,0x00, - 0xFF,0x99,0xFF, 0xFF,0x99,0xCC, 0xFF,0x99,0x99, 0xFF,0x99,0x66, -// 10 - 0xFF,0x99,0x33, 0xFF,0x99,0x00, 0xFF,0x66,0xFF, 0xFF,0x66,0xCC, - 0xFF,0x66,0x99, 0xFF,0x66,0x66, 0xFF,0x66,0x33, 0xFF,0x66,0x00, - 0xFF,0x33,0xFF, 0xFF,0x33,0xCC, 0xFF,0x33,0x99, 0xFF,0x33,0x66, - 0xFF,0x33,0x33, 0xFF,0x33,0x00, 0xFF,0x00,0xFF, 0xFF,0x00,0xCC, -// 20 - 0xFF,0x00,0x99, 0xFF,0x00,0x66, 0xFF,0x00,0x33, 0xFF,0x00,0x00, - 0xCC,0xFF,0xFF, 0xCC,0xFF,0xCC, 0xCC,0xFF,0x99, 0xCC,0xFF,0x66, - 0xCC,0xFF,0x33, 0xCC,0xFF,0x00, 0xCC,0xCC,0xFF, 0xCC,0xCC,0xCC, - 0xCC,0xCC,0x99, 0xCC,0xCC,0x66, 0xCC,0xCC,0x33, 0xCC,0xCC,0x00, -// 30 - 0xCC,0x99,0xFF, 0xCC,0x99,0xCC, 0xCC,0x99,0x99, 0xCC,0x99,0x66, - 0xCC,0x99,0x33, 0xCC,0x99,0x00, 0xCC,0x66,0xFF, 0xCC,0x66,0xCC, - 0xCC,0x66,0x99, 0xCC,0x66,0x66, 0xCC,0x66,0x33, 0xCC,0x66,0x00, - 0xCC,0x33,0xFF, 0xCC,0x33,0xCC, 0xCC,0x33,0x99, 0xCC,0x33,0x66, -// 40 - 0xCC,0x33,0x33, 0xCC,0x33,0x00, 0xCC,0x00,0xFF, 0xCC,0x00,0xCC, - 0xCC,0x00,0x99, 0xCC,0x00,0x66, 0xCC,0x00,0x33, 0xCC,0x00,0x00, - 0x99,0xFF,0xFF, 0x99,0xFF,0xCC, 0x99,0xFF,0x99, 0x99,0xFF,0x66, - 0x99,0xFF,0x33, 0x99,0xFF,0x00, 0x99,0xCC,0xFF, 0x99,0xCC,0xCC, -// 50 - 0x99,0xCC,0x99, 0x99,0xCC,0x66, 0x99,0xCC,0x33, 0x99,0xCC,0x00, - 0x99,0x99,0xFF, 0x99,0x99,0xCC, 0x99,0x99,0x99, 0x99,0x99,0x66, - 0x99,0x99,0x33, 0x99,0x99,0x00, 0x99,0x66,0xFF, 0x99,0x66,0xCC, - 0x99,0x66,0x99, 0x99,0x66,0x66, 0x99,0x66,0x33, 0x99,0x66,0x00, -// 60 - 0x99,0x33,0xFF, 0x99,0x33,0xCC, 0x99,0x33,0x99, 0x99,0x33,0x66, - 0x99,0x33,0x33, 0x99,0x33,0x00, 0x99,0x00,0xFF, 0x99,0x00,0xCC, - 0x99,0x00,0x99, 0x99,0x00,0x66, 0x99,0x00,0x33, 0x99,0x00,0x00, - 0x66,0xFF,0xFF, 0x66,0xFF,0xCC, 0x66,0xFF,0x99, 0x66,0xFF,0x66, -// 70 - 0x66,0xFF,0x33, 0x66,0xFF,0x00, 0x66,0xCC,0xFF, 0x66,0xCC,0xCC, - 0x66,0xCC,0x99, 0x66,0xCC,0x66, 0x66,0xCC,0x33, 0x66,0xCC,0x00, - 0x66,0x99,0xFF, 0x66,0x99,0xCC, 0x66,0x99,0x99, 0x66,0x99,0x66, - 0x66,0x99,0x33, 0x66,0x99,0x00, 0x66,0x66,0xFF, 0x66,0x66,0xCC, -// 80 - 0x66,0x66,0x99, 0x66,0x66,0x66, 0x66,0x66,0x33, 0x66,0x66,0x00, - 0x66,0x33,0xFF, 0x66,0x33,0xCC, 0x66,0x33,0x99, 0x66,0x33,0x66, - 0x66,0x33,0x33, 0x66,0x33,0x00, 0x66,0x00,0xFF, 0x66,0x00,0xCC, - 0x66,0x00,0x99, 0x66,0x00,0x66, 0x66,0x00,0x33, 0x66,0x00,0x00, -// 90 - 0x33,0xFF,0xFF, 0x33,0xFF,0xCC, 0x33,0xFF,0x99, 0x33,0xFF,0x66, - 0x33,0xFF,0x33, 0x33,0xFF,0x00, 0x33,0xCC,0xFF, 0x33,0xCC,0xCC, - 0x33,0xCC,0x99, 0x33,0xCC,0x66, 0x33,0xCC,0x33, 0x33,0xCC,0x00, - 0x33,0x99,0xFF, 0x33,0x99,0xCC, 0x33,0x99,0x99, 0x33,0x99,0x66, -// a0 - 0x33,0x99,0x33, 0x33,0x99,0x00, 0x33,0x66,0xFF, 0x33,0x66,0xCC, - 0x33,0x66,0x99, 0x33,0x66,0x66, 0x33,0x66,0x33, 0x33,0x66,0x00, - 0x33,0x33,0xFF, 0x33,0x33,0xCC, 0x33,0x33,0x99, 0x33,0x33,0x66, - 0x33,0x33,0x33, 0x33,0x33,0x00, 0x33,0x00,0xFF, 0x33,0x00,0xCC, -// b0 - 0x33,0x00,0x99, 0x33,0x00,0x66, 0x33,0x00,0x33, 0x33,0x00,0x00, - 0x00,0xFF,0xFF, 0x00,0xFF,0xCC, 0x00,0xFF,0x99, 0x00,0xFF,0x66, - 0x00,0xFF,0x33, 0x00,0xFF,0x00, 0x00,0xCC,0xFF, 0x00,0xCC,0xCC, - 0x00,0xCC,0x99, 0x00,0xCC,0x66, 0x00,0xCC,0x33, 0x00,0xCC,0x00, -// c0 - 0x00,0x99,0xFF, 0x00,0x99,0xCC, 0x00,0x99,0x99, 0x00,0x99,0x66, - 0x00,0x99,0x33, 0x00,0x99,0x00, 0x00,0x66,0xFF, 0x00,0x66,0xCC, - 0x00,0x66,0x99, 0x00,0x66,0x66, 0x00,0x66,0x33, 0x00,0x66,0x00, - 0x00,0x33,0xFF, 0x00,0x33,0xCC, 0x00,0x33,0x99, 0x00,0x33,0x66, -// d0 - 0x00,0x33,0x33, 0x00,0x33,0x00, 0x00,0x00,0xFF, 0x00,0x00,0xCC, - 0x00,0x00,0x99, 0x00,0x00,0x66, 0x00,0x00,0x33, 0xEE,0x00,0x00, - 0xDD,0x00,0x00, 0xBB,0x00,0x00, 0xAA,0x00,0x00, 0x88,0x00,0x00, - 0x77,0x00,0x00, 0x55,0x00,0x00, 0x44,0x00,0x00, 0x22,0x00,0x00, -// e0 - 0x11,0x00,0x00, 0x00,0xEE,0x00, 0x00,0xDD,0x00, 0x00,0xBB,0x00, - 0x00,0xAA,0x00, 0x00,0x88,0x00, 0x00,0x77,0x00, 0x00,0x55,0x00, - 0x00,0x44,0x00, 0x00,0x22,0x00, 0x00,0x11,0x00, 0x00,0x00,0xEE, - 0x00,0x00,0xDD, 0x00,0x00,0xBB, 0x00,0x00,0xAA, 0x00,0x00,0x88, -// f0 - 0x00,0x00,0x77, 0x00,0x00,0x55, 0x00,0x00,0x44, 0x00,0x00,0x22, - 0x00,0x00,0x11, 0xEE,0xEE,0xEE, 0xDD,0xDD,0xDD, 0xBB,0xBB,0xBB, - 0xAA,0xAA,0xAA, 0x88,0x88,0x88, 0x77,0x77,0x77, 0x55,0x55,0x55, - 0x44,0x44,0x44, 0x22,0x22,0x22, 0x11,0x11,0x11, 0x00,0x00,0x00 -}; - - -/* panic dialog and info saving */ -static int mac_addr_digit_x; -static int mac_addr_digit_y; +#include "panic_image.c" + +void panic_ui_initialize(const unsigned char * system_clut); +int panic_dialog_set_image( const unsigned char * ptr, unsigned int size ); +void panic_dialog_get_image( unsigned char ** ptr, unsigned int * size ); +void draw_panic_dialog( void ); +void panic_dialog_test( void ); + +static int panic_dialog_verify( const struct panicimage * data, unsigned int size ); +static int pixels_needed_to_blit_digit( int digit ); static void blit_digit( int digit ); +static char * strnstr(const char * s, const char * find, size_t slen); +static void dim_screen(void); +static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ); + +static int panic_info_x; +static int panic_info_y; + +static const unsigned char * active_clut = NULL; /* This is a copy of the active clut */ + static boolean_t panicDialogDrawn = FALSE; -static void -panic_blit_rect( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ); - -static void -panic_blit_rect_8( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ); - -static void -panic_blit_rect_16( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ); - -static void -panic_blit_rect_32( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ); +static const struct panicimage * panic_dialog = NULL; /* the active panic dialog */ +static const unsigned char * panic_dialog_data = NULL; /* where the image data starts */ +static const unsigned char * panic_dialog_clut = NULL; /* where the clut used for the image starts */ -static void -dim_screen(void); +static unsigned char * curr_image_ptr = NULL; /* If NULL, the default panic dialog is active */ +static unsigned int curr_image_size = 0; -static void -dim_screen_16(void); +#define FONT_WIDTH 8 +#define FONT_HEIGHT 16 +static unsigned short rendered_font[FONT_HEIGHT][FONT_WIDTH]; -static void -dim_screen_32(void); +static char versionbuf[20]; /* ####.###~###\0 */ -static int -decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * value ); +#define isdigit(d) ((d) >= '0' && (d) <= '9') -void +#define CLUT_ENTRIES 256 +#define CLUT_SIZE (CLUT_ENTRIES * 3) + + +/* + * This routine sets up the default panic dialog + */ + +extern unsigned char iso_font[]; +extern const char version[]; +extern unsigned int panic_caller; + +void panic_ui_initialize(const unsigned char * system_clut) { - clut = system_clut; + char vstr[20]; + + + panic_dialog_set_image( NULL, 0 ); + + active_clut = system_clut; + + strcpy(vstr, "custom"); + + /* Convert xnu-####.###.obj~### into ####.###~### */ + + if (version) { + char * versionpos = strnstr(version, "xnu-", 20); + + if (versionpos) { + int len, i; + + vstr[0] = '\0'; + + for (i=0,len=4;len<20;len++) { + if (isdigit(versionpos[len]) || versionpos[len] == '.') { /* extract ####.###. */ + vstr[i++] = versionpos[len]; + continue; + } + break; + } + + if ( versionpos[len-1] == '.' ) /* remove trailing period if present */ + i--; + + for (;len<20;len++) { /* skip to next digit if present */ + if ( !isdigit(versionpos[len]) ) + continue; + break; + } + + if ( versionpos[len-1] == '~' ) { /* extract ~### if present */ + vstr[i++] = versionpos[len-1]; + for (;len<20;len++) { /* extract ### */ + if ( isdigit(versionpos[len]) ) { + vstr[i++] = versionpos[len]; + continue; + } + break; + } + } + + vstr[i] = '\0'; + } + } + + strcpy(versionbuf, vstr); +} + + + +void +panic_dialog_test( void ) +{ + boolean_t o_panicDialogDrawn = panicDialogDrawn; + boolean_t o_panicDialogDesired = panicDialogDesired; + unsigned int o_logPanicDataToScreen = logPanicDataToScreen; + unsigned int o_panic_caller = panic_caller; + unsigned int o_panicDebugging = panicDebugging; + + + panicDebugging = TRUE; + panic_caller = (unsigned int) __builtin_return_address(0); + logPanicDataToScreen = FALSE; + panicDialogDesired = TRUE; + panicDialogDrawn = FALSE; + + draw_panic_dialog(); + + panicDebugging = o_panicDebugging; + panic_caller = o_panic_caller; + logPanicDataToScreen = o_logPanicDataToScreen; + panicDialogDesired = o_panicDialogDesired; + panicDialogDrawn = o_panicDialogDrawn; } + void draw_panic_dialog( void ) { - int pd_x,pd_y, iconx, icony, tx_line, tx_col; - int line_width = 1; - int f1, f2, d1, d2, d3, rem; - char *pair = "ff"; - int count = 0; - char digit; - int nibble; - char colon = ':'; - char dot = '.'; - struct ether_addr kdp_mac_addr = kdp_get_mac_addr(); - unsigned int ip_addr = (unsigned int) ntohl(kdp_get_ip_address()); - - if (!panicDialogDrawn && panicDialogDesired) - { - if ( !logPanicDataToScreen ) - { + if (!panicDialogDrawn && panicDialogDesired) { + if ( !logPanicDataToScreen ) { + int pd_x, pd_y; + int count, nibble, indx; + struct ether_addr kdp_mac_addr; + unsigned int panic_dialog_count, ip_addr; + char panic_num_chars[13+8+1], mac_addr_chars[17+1], ip_addr_chars[15+1]; + struct { + int pixels; + char * chars; + } panic_dialog_info[3]; + /* dim the screen 50% before putting up panic dialog */ dim_screen(); /* set up to draw background box */ - pd_x = (vinfo.v_width/2) - panic_dialog.pd_width/2; - pd_y = (vinfo.v_height/2) - panic_dialog.pd_height/2; + /* by locating where the upper left corner is placed */ + + pd_x = (vinfo.v_width/2) - panic_dialog->pd_width/2; + pd_y = (vinfo.v_height/2) - panic_dialog->pd_height/2; - /* draw image */ - panic_blit_rect( pd_x, pd_y, panic_dialog.pd_width, panic_dialog.pd_height, 0, (unsigned char*) panic_dialog.image_pixel_data); + /* draw panic dialog at pd_x/pd_y */ + panic_blit_rect( pd_x, pd_y, panic_dialog->pd_width, panic_dialog->pd_height, + 0, (unsigned char*) panic_dialog_data); - /* do not display the mac and ip addresses if the machine isn't attachable. */ - /* there's no sense in possibly confusing people. */ - if (panicDebugging) - { + panic_dialog_count = 0; /* number of info items to display at the bottom of dialog */ + + if (panicDebugging) { + int x1, x2; + + /* + * PANIC CALLER + * + * don't display the panic caller if it is 0 + * + */ + + if ( panic_caller != 0 ) { + /* Calculate the pixels need to generate the panic number */ + panic_dialog_info[panic_dialog_count].pixels = 0; + + for ( indx=1, count=0; count < 13; count++ ) { + if ( versionbuf[count] == '\0' ) + break; + + panic_num_chars[indx++] = versionbuf[count]; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( versionbuf[count] ); + } + + panic_num_chars[indx++] = ':'; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( ':' ); + + for ( count=8; count != 0; count-- ) { + nibble = (panic_caller >> ((count-1)<<2)) &0xF; + panic_num_chars[indx++] = nibble; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( nibble ); + } + + panic_num_chars[0] = indx; + panic_dialog_info[panic_dialog_count].chars = panic_num_chars; + panic_dialog_count++; + } - /* offset for mac address text */ - mac_addr_digit_x = (vinfo.v_width/2) - 130; /* use 62 if no ip */ - mac_addr_digit_y = (vinfo.v_height/2) + panic_dialog.pd_height/2 - 20; + /* + * MAC ADDRESS + * + * if the mac address is not available, then use ff:ff:ff:ff:ff:ff + * + */ + + kdp_mac_addr = kdp_get_mac_addr(); - if(kdp_mac_addr.ether_addr_octet[0] || kdp_mac_addr.ether_addr_octet[1]|| kdp_mac_addr.ether_addr_octet[2] - || kdp_mac_addr.ether_addr_octet[3] || kdp_mac_addr.ether_addr_octet[4] || kdp_mac_addr.ether_addr_octet[5]) - { - /* blit the digits for mac address */ + /* If no mac_addr has been set, then force to -1 */ + if( ! (kdp_mac_addr.ether_addr_octet[0] || kdp_mac_addr.ether_addr_octet[1] || kdp_mac_addr.ether_addr_octet[2] + || kdp_mac_addr.ether_addr_octet[3] || kdp_mac_addr.ether_addr_octet[4] || kdp_mac_addr.ether_addr_octet[5])) { for (count = 0; count < 6; count++ ) - { - nibble = (kdp_mac_addr.ether_addr_octet[count] & 0xf0) >> 4; - digit = nibble < 10 ? nibble + '0':nibble - 10 + 'a'; - blit_digit(digit); - - nibble = kdp_mac_addr.ether_addr_octet[count] & 0xf; - digit = nibble < 10 ? nibble + '0':nibble - 10 + 'a'; - blit_digit(digit); - if( count < 5 ) - blit_digit( colon ); - } + kdp_mac_addr.ether_addr_octet[count] = -1; } - else /* blit the ff's */ - { - for( count = 0; count < 6; count++ ) - { - digit = pair[0]; - blit_digit(digit); - digit = pair[1]; - blit_digit(digit); - if( count < 5 ) - blit_digit( colon ); + + panic_dialog_info[panic_dialog_count].pixels = 0; + + for (indx=1, count=0; count < 6; count++ ) { + nibble = (kdp_mac_addr.ether_addr_octet[count] & 0xf0) >> 4; + mac_addr_chars[indx++] = nibble; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( nibble ); + + nibble = kdp_mac_addr.ether_addr_octet[count] & 0xf; + mac_addr_chars[indx++] = nibble; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( nibble ); + + if( count < 5 ) { + mac_addr_chars[indx++] = ':'; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( ':' ); } } - /* now print the ip address */ - mac_addr_digit_x = (vinfo.v_width/2) + 10; - if(ip_addr != 0) - { - /* blit the digits for ip address */ - for (count = 0; count < 4; count++ ) - { + + mac_addr_chars[0] = indx; + panic_dialog_info[panic_dialog_count].chars = mac_addr_chars; + panic_dialog_count++; + + /* + * IP ADDRESS + * + * do not display the ip addresses if the machine isn't attachable. + * there's no sense in possibly confusing people. + */ + + if ( (ip_addr = (unsigned int) ntohl(kdp_get_ip_address())) != 0 ) { + int d1, d2, d3; + + panic_dialog_info[panic_dialog_count].pixels = 0; + + for ( indx=1, count=0; count < 4; count++ ) { nibble = (ip_addr & 0xff000000 ) >> 24; - d3 = (nibble % 0xa) + '0'; - nibble = nibble/0xa; - d2 = (nibble % 0xa) + '0'; - nibble = nibble /0xa; - d1 = (nibble % 0xa) + '0'; + d3 = (nibble % 10) ; nibble = nibble / 10; + d2 = (nibble % 10) ; nibble = nibble / 10; + d1 = (nibble % 10) ; - if( d1 != '0' ) blit_digit(d1); - blit_digit(d2); - blit_digit(d3); - if( count < 3 ) - blit_digit(dot); + if( d1 != 0 ) { + ip_addr_chars[indx++] = d1; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( d1 ); + } + + ip_addr_chars[indx++] = d2; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( d2 ); + + ip_addr_chars[indx++] = d3; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( d3 ); + + if ( count < 3 ) { + ip_addr_chars[indx++] = '.'; + panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( '.' ); + } d1= d2 = d3 = 0; ip_addr = ip_addr << 8; } + + ip_addr_chars[0] = indx; + panic_dialog_info[panic_dialog_count].chars = ip_addr_chars; + panic_dialog_count++; } - } - } - } + + + /* vertical alignment for information to be displayed */ + panic_info_y = (vinfo.v_height/2) + panic_dialog->pd_height/2 - (panic_dialog->pd_info_height); + + /* blit out all the information we gathered */ + + switch ( panic_dialog_count ) { + case 1 : /* one item is centered */ + panic_info_x = (vinfo.v_width/2) - (panic_dialog_info[0].pixels/2); + for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) + blit_digit(panic_dialog_info[0].chars[indx]); + + break; + + case 2 : /* left centered and right centered */ + x1 = ((panic_dialog->pd_width/2) - panic_dialog_info[0].pixels)/2; + panic_info_x = ((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1; + + for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) + blit_digit(panic_dialog_info[0].chars[indx]); + + x2 = ((panic_dialog->pd_width/2) - panic_dialog_info[1].pixels)/2; + panic_info_x = (vinfo.v_width/2) + x2; + + for (indx=1; indx < panic_dialog_info[1].chars[0]; indx++) + blit_digit(panic_dialog_info[1].chars[indx]); + + break; + + case 3 : /* left centered, middle and right centered */ + x1 = ((panic_dialog->pd_width/2) - panic_dialog_info[0].pixels - (panic_dialog_info[1].pixels/2))/2; + panic_info_x = ((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1; + + for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) + blit_digit(panic_dialog_info[0].chars[indx]); + + panic_info_x = (vinfo.v_width/2) - (panic_dialog_info[1].pixels/2); + + for (indx=1; indx < panic_dialog_info[1].chars[0]; indx++) + blit_digit(panic_dialog_info[1].chars[indx]); + + x2 = ((panic_dialog->pd_width/2) - panic_dialog_info[2].pixels - (panic_dialog_info[1].pixels/2))/2; + panic_info_x = (vinfo.v_width/2) + x2 + (panic_dialog_info[1].pixels/2); + + for (indx=1; indx < panic_dialog_info[2].chars[0]; indx++) + blit_digit(panic_dialog_info[2].chars[indx]); + + break; + + default : /* nothing */ + break; + + } /* switch */ + } /* if panic_deugging */ + } /* if ! logPanicDataToScreen */ + } /* if ! panicDialogDrawn && ! panicDialogDesired */ + panicDialogDrawn = TRUE; panicDialogDesired = FALSE; +} + + +/* + * This routine installs a new panic dialog + * If ptr is NULL, then the default "built-in" panic dialog will be installed. + * note: It is the caller that must take care of deallocating memory used for the previous panic dialog + */ + +int +panic_dialog_set_image( const unsigned char * ptr, unsigned int size ) +{ + int error; + unsigned int newsize; + const struct panicimage * newimage; + + /* if ptr is NULL, restore panic image to built-in default */ + if ( ptr == NULL ) { + newimage = &panic_dialog_default; + newsize = sizeof(struct panicimage) + newimage->pd_dataSize; + } + else { + newimage = (struct panicimage *) ptr; + newsize = size; + } + + if ( (error = panic_dialog_verify( newimage, newsize )) ) + return (error); + + panic_dialog = newimage; + panic_dialog_data = &panic_dialog->data[0]; + panic_dialog_clut = &panic_dialog->data[panic_dialog->pd_dataSize-CLUT_SIZE]; + + curr_image_ptr = (unsigned char *) ptr; + curr_image_size = size; + + return (0); +} + + +/* + * This routines returns the current address of the panic dialog + * If the default panic dialog is active, then *ptr will be NULL + */ + +void +panic_dialog_get_image( unsigned char ** ptr, unsigned int * size ) +{ + *ptr = curr_image_ptr; + *size = curr_image_size; +} + + +/* + * This routine verifies the panic dialog image is valid. + */ + +static int +panic_dialog_verify( const struct panicimage * newimage, unsigned int size ) +{ + unsigned int sum, i; + + if ( size < (sizeof(struct panicimage) + newimage->pd_dataSize) ) + return EINVAL; + + if ( newimage->pd_tag != 'RNMp' ) + return EINVAL; + + size = newimage->pd_dataSize-CLUT_SIZE; + for (sum=0,i=0; idata[i]; + sum <<= sum&1; + } + if ( sum != newimage->pd_sum ) + return EINVAL; + + return 0; +} + + +/* + * Service Routines for managing the panic dialog + */ + + +static const struct rendered_num * find_rendered_digit( int digit ); +static void panic_blit_rect_8( unsigned int x, unsigned int y, unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ); +static void panic_blit_rect_16( unsigned int x, unsigned int y, unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ); +static void panic_blit_rect_32( unsigned int x, unsigned int y, unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ); +static int decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * depth, unsigned char ** value ); + + +/* Utilities to convert 8 bit/gray */ +static unsigned int make24bitcolor( unsigned int index, const unsigned char * clut ); +static unsigned char findIndexMatch( unsigned char index ); +static unsigned char color24togray8( unsigned int color24 ); +static unsigned char findbestgray( unsigned int color24 ); +static int isActiveClutOK( void ); + +static int +pixels_needed_to_blit_digit( int digit ) +{ + return FONT_WIDTH; } + +static const struct rendered_num * +find_rendered_digit( int digit ) +{ + //extern unsigned char iso_font[]; + const struct rendered_num *digitPtr; + + if ( digit < 16 ) { + if ( digit < 10 ) + digit += 0x30; + else + digit += 0x37; + } + + digitPtr = (const struct rendered_num *) &iso_font[digit * 16]; + return digitPtr; +} + + static void blit_digit( int digit ) { - switch( digit ) - { - case '0': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_0.num_w, num_0.num_h, 255, (unsigned char*) num_0.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_0.num_w - 1; - break; - } - case '1': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_1.num_w, num_1.num_h, 255, (unsigned char*) num_1.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_1.num_w ; - break; - } - case '2': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_2.num_w, num_2.num_h, 255, (unsigned char*) num_2.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_2.num_w ; - break; - } - case '3': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_3.num_w, num_3.num_h, 255, (unsigned char*) num_3.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_3.num_w ; - break; - } - case '4': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_4.num_w, num_4.num_h, 255, (unsigned char*) num_4.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_4.num_w ; - break; - } - case '5': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_5.num_w, num_5.num_h, 255, (unsigned char*) num_5.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_5.num_w ; - break; - } - case '6': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_6.num_w, num_6.num_h, 255, (unsigned char*) num_6.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_6.num_w ; - break; - } - case '7': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_7.num_w, num_7.num_h, 255, (unsigned char*) num_7.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_7.num_w ; - break; - } - case '8': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_8.num_w, num_8.num_h, 255, (unsigned char*) num_8.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_8.num_w ; - break; - } - case '9': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_9.num_w, num_9.num_h, 255, (unsigned char*) num_9.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_9.num_w ; - break; - } - case 'a': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_a.num_w, num_a.num_h, 255, (unsigned char*) num_a.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_a.num_w ; - break; - } - case 'b': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_b.num_w, num_b.num_h, 255, (unsigned char*) num_b.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_b.num_w ; - break; - } - case 'c': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_c.num_w, num_c.num_h, 255, (unsigned char*) num_c.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_c.num_w ; - break; - } - case 'd': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_d.num_w, num_d.num_h, 255, (unsigned char*) num_d.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_d.num_w ; - break; - } - case 'e': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_e.num_w, num_e.num_h, 255, (unsigned char*) num_e.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_e.num_w ; - break; - } - case 'f': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_f.num_w, num_f.num_h, 255, (unsigned char*) num_f.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_f.num_w ; - break; + unsigned char * raw_data = (unsigned char *) find_rendered_digit( digit ); + unsigned width = FONT_WIDTH, height = FONT_HEIGHT; + int row; + + for (row=0; row=0; j--) { + + if ( bits & 0x80 ) + rendered_font[row][j] = 0x0100 | panic_dialog->pd_info_color[0]; + else + rendered_font[row][j] = 0x0100 | panic_dialog->pd_info_color[1]; + bits <<= 1; } - case ':': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y, num_colon.num_w, num_colon.num_h, 255, (unsigned char*) num_colon.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_colon.num_w; - break; - } - case '.': { - panic_blit_rect( mac_addr_digit_x, mac_addr_digit_y + (num_colon.num_h/2), num_colon.num_w, num_colon.num_h/2, 255, (unsigned char*) num_colon.num_pixel_data); - mac_addr_digit_x = mac_addr_digit_x + num_colon.num_w; - break; - } - default: - break; - } + + panic_blit_rect( panic_info_x, panic_info_y , width, height, 255, (unsigned char *) rendered_font); + panic_info_x += width; } + static void panic_blit_rect( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ) + unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ) { if(!vinfo.v_depth) return; @@ -388,125 +550,152 @@ panic_blit_rect( unsigned int x, unsigned int y, } } -/* panic_blit_rect_8 uses the built in clut for drawing. +/* + * panic_blit_rect_8 decodes the RLE encoded image data on the fly, looks up the + * color by indexing into the clut, or attempts to find the best index. + */ -*/ static void panic_blit_rect_8( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ) + unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ) { volatile unsigned char * dst; - int line, col; - unsigned int data, quantity, value; + unsigned int line, col, i; + static int clutOK = -1; + unsigned int data, quantity, depth; + unsigned char * value; + + if ( clutOK == -1 ) + clutOK = isActiveClutOK(); + dst = (volatile unsigned char *) (vinfo.v_baseaddr + - (y * vinfo.v_rowbytes) + - x); + (y * vinfo.v_rowbytes) + + x); quantity = 0; + i = 0; for( line = 0; line < height; line++) { for( col = 0; col < width; col++) { + if (quantity == 0) { - dataPtr += decode_rle(dataPtr, &quantity, &value); + dataPtr += decode_rle(dataPtr, &quantity, &depth, &value); + i = 0; } - data = value; + if ( clutOK ) + data = value[i++]; + else + data = findIndexMatch( value[i++] ); + *(dst + col) = data; - quantity--; + + if ( i == depth ) { + i = 0; + quantity--; + } } dst = (volatile unsigned char *) (((int)dst) + vinfo.v_rowbytes); } } -/* panic_blit_rect_16 draws using a clut. - - panic_blit_rect_16 decodes the RLE encoded image data on the fly, looks up the - color by indexing into the clut, uses the top 5 bits to fill in each of the three - pixel values (RGB) and writes each pixel to the screen. -*/ +/* + * panic_blit_rect_16 decodes the RLE encoded image data on the fly, looks up the + * color by indexing into the clut, uses the top 5 bits to fill in each of the three + * pixel values (RGB) and writes each pixel to the screen. + */ + static void panic_blit_rect_16( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ) + unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ) { - volatile unsigned short * dst; - int line, col; - unsigned int quantity, index, value, data; - /* If our clut has disappeared, use the standard MacOS 8-bit clut */ - if(!clut) { - clut = appleClut8; - } + volatile unsigned short * dst; + unsigned int line, col, i; + unsigned int quantity, index, data, depth; + unsigned char * value; - dst = (volatile unsigned short *) (vinfo.v_baseaddr + - (y * vinfo.v_rowbytes) + - (x * 2)); + dst = (volatile unsigned short *) (vinfo.v_baseaddr + + (y * vinfo.v_rowbytes) + + (x * 2)); - quantity = 0; + quantity = 0; + i = 0; - for( line = 0; line < height; line++) { - for( col = 0; col < width; col++) { + for( line = 0; line < height; line++) { + for( col = 0; col < width; col++) { - if (quantity == 0) { - dataPtr += decode_rle(dataPtr, &quantity, &value); - index = value * 3; - } + if (quantity == 0) { + dataPtr += decode_rle(dataPtr, &quantity, &depth, &value); + i = 0; + } - data = ( (unsigned short) (0xf8 & (clut[index + 0])) << 7) - | ( (unsigned short) (0xf8 & (clut[index + 1])) << 2) - | ( (unsigned short) (0xf8 & (clut[index + 2])) >> 3); + index = value[i++] * 3; + + data = ( (unsigned short) (0xf8 & (panic_dialog_clut[index + 0])) << 7) + | ( (unsigned short) (0xf8 & (panic_dialog_clut[index + 1])) << 2) + | ( (unsigned short) (0xf8 & (panic_dialog_clut[index + 2])) >> 3); - *(dst + col) = data; - quantity--; - } + *(dst + col) = data; - dst = (volatile unsigned short *) (((int)dst) + vinfo.v_rowbytes); - } + if ( i == depth ) { + i = 0; + quantity--; + } + } + dst = (volatile unsigned short *) (((int)dst) + vinfo.v_rowbytes); + } } - /* - panic_blit_rect_32 decodes the RLE encoded image data on the fly, and fills - in each of the three pixel values from the clut (RGB) for each pixel and - writes it to the screen. +/* + * panic_blit_rect_32 decodes the RLE encoded image data on the fly, and fills + * in each of the three pixel values from the clut (RGB) for each pixel and + * writes it to the screen. */ + static void panic_blit_rect_32( unsigned int x, unsigned int y, - unsigned int width, unsigned int height, - int transparent, unsigned char * dataPtr ) + unsigned int width, unsigned int height, + int transparent, unsigned char * dataPtr ) { - volatile unsigned int * dst; - int line, col; - unsigned int value, quantity, index, data; - + volatile unsigned int * dst; + unsigned int line, col, i; + unsigned int quantity, index, data, depth; + unsigned char * value; - /* If our clut has disappeared, use the standard MacOS 8-bit clut */ - if(!clut) { - clut = appleClut8; - } dst = (volatile unsigned int *) (vinfo.v_baseaddr + - (y * vinfo.v_rowbytes) + - (x * 4)); + (y * vinfo.v_rowbytes) + + (x * 4)); quantity = 0; + i = 0; for( line = 0; line < height; line++) { for( col = 0; col < width; col++) { + if (quantity == 0) { - dataPtr += decode_rle(dataPtr, &quantity, &value); - index = value * 3; + dataPtr += decode_rle(dataPtr, &quantity, &depth, &value); + i = 0; } + + index = value[i++] * 3; - data = ( (unsigned int) clut[index + 0] << 16) - | ( (unsigned int) clut[index + 1] << 8) - | ( (unsigned int) clut[index + 2]); + data = ( (unsigned int) panic_dialog_clut[index + 0] << 16) + | ( (unsigned int) panic_dialog_clut[index + 1] << 8) + | ( (unsigned int) panic_dialog_clut[index + 2]); *(dst + col) = data; - quantity--; + + if ( i == depth ) { + i = 0; + quantity--; + } } dst = (volatile unsigned int *) (((int)dst) + vinfo.v_rowbytes); @@ -514,115 +703,223 @@ panic_blit_rect_8( unsigned int x, unsigned int y, } /* - decode_rle decodes a single quantity/value pair of a "modified-RLE" encoded - image. The encoding works as follows: - - The quantity and value will be described by either two or three bytes. If the - most significant bit of the first byte is a 0, then the next seven bits are - the quantity (run-length) and the following 8 bits are the value (index into - a clut, in this case). If the msb of the first byte is a 1, then the next 15 bits - are the quantity and the following 8 are the value. Visually, the two possible - encodings are: (q = quantity, v = value) - - Byte 1 Byte 2 Byte 3 - case 1: [ 0 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] [ ] - case 2: [ 1 q14 q13 q12 a11 q10 q9 q8 ] [ q7 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] + decode_rle decodes a single quantity/value run of a "modified-RLE" encoded + image. The encoding works as follows: + + The run is described in the first byte. If the MSB is zero, then the next seven bits + are the quantity of bytes that follow that make up the run of value bytes. (see case 0) + + If the MSB is set, bits 0-3 are the quantity's least significant 4 bits. If bit 5 is set, + then the quantity is further described in the next byte, where an additional 7 bits (4-10) + worth of quantity will be found. If the MSB of this byte is set, then an additional + 7 bits (11-17) worth of quantity will be found in the next byte. This repeats until the MSB of + a quantity byte is zero, thus ending the run of quantity bytes. + + Bits 5/6 of the first byte, describe the number of bytes in the value run following the quantity run. + These bits describe value runs of 1 to 4 bytes. And the quantity describe the number of value runs. + (see cases 1-4) + + encodings are: (q = quantity, v = value, c = quantity continues) + + case 0: [ 0 q6-q0 ] [ v7-v0 ] ... [ v7-v0 ] + case 1: [ 1 0 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] + case 2: [ 1 0 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] + case 3: [ 1 1 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] + case 4: [ 1 1 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] */ + static int -decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * value ) +decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * depth, unsigned char ** value ) { - unsigned char byte1 = *dataPtr++; - unsigned char byte2 = *dataPtr++; - int num_slots = 0; - - /* if the most-significant bit is 0, then the first byte is quanity, the second is value */ - if ((byte1 >> 7) == 0) { - *quantity = (unsigned int) byte1; - *value = (unsigned int) byte2; - num_slots = 2; + unsigned int mask; + int i, runlen, runsize; + + i = 0; + mask = dataPtr[i] & 0xF0; + + if ( mask & 0x80 ) { + runsize = ((mask & 0x60) >> 5) + 1; + runlen = dataPtr[i++] & 0x0F; + + if ( mask & 0x10 ) { + int shift = 4; + + do { + mask = dataPtr[i] & 0x80; + runlen |= ((dataPtr[i++] & 0x7F) << shift); + shift+=7; + } while (mask); + } } else { - /* clear the leading 1 */ - byte1 ^= 0x80; - - /* the first two bytes are the quantity, the third is value */ - *quantity = (unsigned int) byte1 << 8 | byte2; - *value = *dataPtr++; - num_slots = 3; + runlen = 1; + runsize = dataPtr[i++]; } - - return num_slots; + + *depth = runsize; + *quantity = runlen; + *value = &dataPtr[i]; + + return i+runsize; } + static void dim_screen(void) { + unsigned long *p, *endp, *row; + int col, rowline, rowlongs; + register unsigned long mask; + if(!vinfo.v_depth) return; - switch( vinfo.v_depth) { - case 16: - dim_screen_16(); - break; - case 32: - dim_screen_32(); - break; - } -} - -static void -dim_screen_16(void) -{ - unsigned long *p, *endp, *row; - int col; - int rowline, rowlongs; - unsigned long value, tmp; + if ( vinfo.v_depth == 32 ) + mask = 0x007F7F7F; + else if ( vinfo.v_depth == 16 ) + mask = 0x3DEF3DEF; + else + return; rowline = vinfo.v_rowscanbytes / 4; rowlongs = vinfo.v_rowbytes / 4; p = (unsigned long*) vinfo.v_baseaddr; - endp = (unsigned long*) vinfo.v_baseaddr; - - endp += rowlongs * vinfo.v_height; + endp = p + (rowlongs * vinfo.v_height); for (row = p ; row < endp ; row += rowlongs) { - for (col = 0; col < rowline; col++) { - value = *(row+col); - tmp = ((value & 0x7C007C00) >> 1) & 0x3C003C00; - tmp |= ((value & 0x03E003E0) >> 1) & 0x01E001E0; - tmp |= ((value & 0x001F001F) >> 1) & 0x000F000F; - *(row+col) = tmp; //half (dimmed)? - } + for (p = &row[0], col = 0; col < rowline; col++) { + *p++ = (*p >> 1) & mask; + } + } +} + + +/* From user mode Libc - this ought to be in a library */ +static char * +strnstr(const char * s, const char * find, size_t slen) +{ + char c, sc; + size_t len; + + if ((c = *find++) != '\0') { + len = strlen(find); + do { + do { + if ((sc = *s++) == '\0' || slen-- < 1) + return (NULL); + } while (sc != c); + if (len > slen) + return (NULL); + } while (strncmp(s, find, len) != 0); + s--; + } + return ((char *)s); +} + +/* + * these routines are for converting a color into grayscale + * in 8-bit mode, if the active clut is different than the + * clut used to create the panic dialog, then we must convert to gray + */ + +static unsigned int +make24bitcolor( unsigned int index, const unsigned char * clut ) +{ + unsigned int color24 = 0; + int i = index * 3; + + color24 |= clut[i+0] << 16; + color24 |= clut[i+1] << 8; + color24 |= clut[i+2]; + + return color24; +} + + +static unsigned char +findbestgray( unsigned int color24 ) +{ + unsigned int c24, rel, bestindex=-1, bestgray = -1; + unsigned char gray8, c8; + int i; +#define abs(v) ((v) > 0)?(v):-(v) + gray8 = color24togray8( color24 ); /* convert the original color into grayscale */ + + for (i=0; i>16)&0xff) != ((c24>>8)&0xff)) || ((c24>>8)&0xff) != (c24 & 0xff) ) + continue; /* only match against grays */ + + c8 = c24 & 0xFF; /* isolate the gray */ + + /* find the gray with the smallest difference */ + rel = abs( gray8 - c8 ); + if ( rel < bestgray ) { + bestgray = rel; + bestindex = i; + } } + /* Did we fail to find any grays ? */ + if ( bestindex == -1 ) { + /* someday we should look for the best color match */ + /* but for now just return the gray as the index */ + /* at least there might be something readble on the display */ + + bestindex = gray8; + } + + return bestindex; +#undef abs } -static void -dim_screen_32(void) + +static unsigned char +color24togray8( unsigned int color24 ) +{ + float R, G, B; + float Gray; + unsigned char gray8; + + R = (color24 & 0xFF0000) >> 16 ; + G = (color24 & 0xFF00) >> 8 ; + B = (color24 & 0xFF); + + Gray = (R*.30) + (G*.59) + (B*.11); + gray8 = (unsigned char) ( Gray + .5); + return gray8; +} + + +static unsigned char +findIndexMatch( unsigned char index ) { - unsigned long *p, *endp, *row; - int col; - int rowline, rowlongs; - unsigned long value, tmp; + static unsigned int last_in_index = -1; + static unsigned char last_index; + unsigned int sc; - rowline = vinfo.v_rowscanbytes / 4; - rowlongs = vinfo.v_rowbytes / 4; + if ( index == last_in_index ) + return last_index; - p = (unsigned long*) vinfo.v_baseaddr; - endp = (unsigned long*) vinfo.v_baseaddr; + last_in_index = index; + sc = make24bitcolor( index, panic_dialog_clut ); + last_index = findbestgray( sc ); /* find the nearest matching gray in the active clut */ - endp += rowlongs * vinfo.v_height; + return last_index; +} - for (row = p ; row < endp ; row += rowlongs) { - for (col = 0; col < rowline; col++) { - value = *(row+col); - tmp = ((value & 0x00FF0000) >> 1) & 0x007F0000; - tmp |= ((value & 0x0000FF00) >> 1) & 0x00007F00; - tmp |= (value & 0x000000FF) >> 1; - *(row+col) = tmp; //half (dimmed)? - } +static int +isActiveClutOK( void ) +{ + int i; + int r = 1; /* assume OK */ + for (i=0; i -n -fg <24-bit color> -bg <24-bit color> +** options other than -i are optional. + +To genertate a kernel loadable panic image file, execute: + +qtif2kraw -i -o -n -fg <24-bit color> -bg <24-bit color> +** options other than -i and -o are optional. + + + +===== Other Info + +The reason an 8-bit image was choosen, was because it is easy to convert to 24 or 16 bit colors. +The system does not typically run in 8-bit mode. If the system is in 8-bit mode. Then we have +to check to see if the active CLUT is the same as the one that the image was created with. If the +CLUTs are different. The image is converted to grayscale and the nearest matching gray in the active +CLUT is used. + + diff --git a/osfmk/console/panic_ui/appleclut8.h b/osfmk/console/panic_ui/appleclut8.h new file mode 100644 index 000000000..0e499f5d5 --- /dev/null +++ b/osfmk/console/panic_ui/appleclut8.h @@ -0,0 +1,51 @@ +// This bootClut was generated from appleClut8.act +unsigned int appleClut8[256] = { +// 00 + 0xFFFFFF, 0xFFFFCC, 0xFFFF99, 0xFFFF66, 0xFFFF33, 0xFFFF00, 0xFFCCFF, 0xFFCCCC, + 0xFFCC99, 0xFFCC66, 0xFFCC33, 0xFFCC00, 0xFF99FF, 0xFF99CC, 0xFF9999, 0xFF9966, +// 10 + 0xFF9933, 0xFF9900, 0xFF66FF, 0xFF66CC, 0xFF6699, 0xFF6666, 0xFF6633, 0xFF6600, + 0xFF33FF, 0xFF33CC, 0xFF3399, 0xFF3366, 0xFF3333, 0xFF3300, 0xFF00FF, 0xFF00CC, +// 20 + 0xFF0099, 0xFF0066, 0xFF0033, 0xFF0000, 0xCCFFFF, 0xCCFFCC, 0xCCFF99, 0xCCFF66, + 0xCCFF33, 0xCCFF00, 0xCCCCFF, 0xCCCCCC, 0xCCCC99, 0xCCCC66, 0xCCCC33, 0xCCCC00, +// 30 + 0xCC99FF, 0xCC99CC, 0xCC9999, 0xCC9966, 0xCC9933, 0xCC9900, 0xCC66FF, 0xCC66CC, + 0xCC6699, 0xCC6666, 0xCC6633, 0xCC6600, 0xCC33FF, 0xCC33CC, 0xCC3399, 0xCC3366, +// 40 + 0xCC3333, 0xCC3300, 0xCC00FF, 0xCC00CC, 0xCC0099, 0xCC0066, 0xCC0033, 0xCC0000, + 0x99FFFF, 0x99FFCC, 0x99FF99, 0x99FF66, 0x99FF33, 0x99FF00, 0x99CCFF, 0x99CCCC, +// 50 + 0x99CC99, 0x99CC66, 0x99CC33, 0x99CC00, 0x9999FF, 0x9999CC, 0x999999, 0x999966, + 0x999933, 0x999900, 0x9966FF, 0x9966CC, 0x996699, 0x996666, 0x996633, 0x996600, +// 60 + 0x9933FF, 0x9933CC, 0x993399, 0x993366, 0x993333, 0x993300, 0x9900FF, 0x9900CC, + 0x990099, 0x990066, 0x990033, 0x990000, 0x66FFFF, 0x66FFCC, 0x66FF99, 0x66FF66, +// 70 + 0x66FF33, 0x66FF00, 0x66CCFF, 0x66CCCC, 0x66CC99, 0x66CC66, 0x66CC33, 0x66CC00, + 0x6699FF, 0x6699CC, 0x669999, 0x669966, 0x669933, 0x669900, 0x6666FF, 0x6666CC, +// 80 + 0x666699, 0x666666, 0x666633, 0x666600, 0x6633FF, 0x6633CC, 0x663399, 0x663366, + 0x663333, 0x663300, 0x6600FF, 0x6600CC, 0x660099, 0x660066, 0x660033, 0x660000, +// 90 + 0x33FFFF, 0x33FFCC, 0x33FF99, 0x33FF66, 0x33FF33, 0x33FF00, 0x33CCFF, 0x33CCCC, + 0x33CC99, 0x33CC66, 0x33CC33, 0x33CC00, 0x3399FF, 0x3399CC, 0x339999, 0x339966, +// A0 + 0x339933, 0x339900, 0x3366FF, 0x3366CC, 0x336699, 0x336666, 0x336633, 0x336600, + 0x3333FF, 0x3333CC, 0x333399, 0x333366, 0x333333, 0x333300, 0x3300FF, 0x3300CC, +// B0 + 0x330099, 0x330066, 0x330033, 0x330000, 0x00FFFF, 0x00FFCC, 0x00FF99, 0x00FF66, + 0x00FF33, 0x00FF00, 0x00CCFF, 0x00CCCC, 0x00CC99, 0x00CC66, 0x00CC33, 0x00CC00, +// C0 + 0x0099FF, 0x0099CC, 0x009999, 0x009966, 0x009933, 0x009900, 0x0066FF, 0x0066CC, + 0x006699, 0x006666, 0x006633, 0x006600, 0x0033FF, 0x0033CC, 0x003399, 0x003366, +// D0 + 0x003333, 0x003300, 0x0000FF, 0x0000CC, 0x000099, 0x000066, 0x000033, 0xEE0000, + 0xDD0000, 0xBB0000, 0xAA0000, 0x880000, 0x770000, 0x550000, 0x440000, 0x220000, +// E0 + 0x110000, 0x00EE00, 0x00DD00, 0x00BB00, 0x00AA00, 0x008800, 0x007700, 0x005500, + 0x004400, 0x002200, 0x001100, 0x0000EE, 0x0000DD, 0x0000BB, 0x0000AA, 0x000088, +// F0 + 0x000077, 0x000055, 0x000044, 0x000022, 0x000011, 0xEEEEEE, 0xDDDDDD, 0xBBBBBB, + 0xAAAAAA, 0x888888, 0x777777, 0x555555, 0x444444, 0x222222, 0x111111, 0x000000 +}; diff --git a/osfmk/console/panic_ui/generated_files/panic_image.c b/osfmk/console/panic_ui/generated_files/panic_image.c new file mode 100644 index 000000000..8933773fe --- /dev/null +++ b/osfmk/console/panic_ui/generated_files/panic_image.c @@ -0,0 +1,1953 @@ +/* generated c file */ + +static const struct { + unsigned int pd_width; + unsigned int pd_height; + unsigned int bytes_per_pixel; /* 1: CLUT, 3:RGB, 4:RGBA */ + unsigned char image_pixel_data[0x880a]; +} panic_dialog = { + 472, 255, 1, +0xae,0x87,0xfd, 0x01,0x6c, 0x01,0x55, 0x80,0xbb,0xfd, 0x01,0x55, 0x01,0x6c, 0x06,0xfd, +0x01,0x6c, 0x01,0x55, 0x0b,0xfd, 0x01,0x41, 0x01,0x83, 0x24,0xfd, 0x01,0x83, 0x01,0x41, 0x80,0xa6,0xfd, +0x02,0x2b, 0x04,0xfd, 0x01,0x2b, 0x01,0x19, 0x30,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0xa9,0xfd, +0x01,0x55, 0x01,0x00, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, +0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, 0x01,0x41, 0x24,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, +0x04,0x00, 0x01,0x07, 0x80,0x8f,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x83, 0x30,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x6c, 0x22,0xfd, +0x01,0x6c, 0x01,0x55, 0x10,0xfd, 0x01,0x41, 0x01,0x83, 0x52,0xfd, 0x01,0x55, 0x01,0x6c, 0x17,0xfd, +0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, +0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, 0x01,0x41, 0x1f,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, +0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x41, 0x01,0x07, 0x01,0x00, 0x01,0x2b, +0x80,0x8f,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x31,0xfd, 0x01,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x22,0xfd, 0x01,0x2b, 0x01,0x00, 0x10,0xfd, 0x01,0x00, +0x01,0x41, 0x52,0xfd, 0x01,0x00, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, +0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x0b,0xfd, 0x01,0x00, +0x01,0x41, 0x1f,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x8f,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, +0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, +0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x55, 0x05,0x00, 0x05,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, +0x01,0x83, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0xfd, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2e, +0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x04,0x00, +0x04,0xfd, 0x01,0x19, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x03,0x00, +0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, +0x01,0x19, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0x2b, +0x01,0x83, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x03,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x09,0xfd, +0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x05,0x00, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x01,0xfd, 0x02,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x06,0xfd, 0x04,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x71,0xfd, +0x01,0x19, 0x02,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x02,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x55, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, +0x06,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, +0x02,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x41, 0x07,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, +0x01,0x07, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x83, 0x01,0x19, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, +0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x06,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x55, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, +0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x74,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0x41, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, +0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x01,0x41, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x02,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x04,0xfd, +0x02,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0x41, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, +0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, +0x01,0x00, 0x01,0x07, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0x2b, 0x01,0xfd, +0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x04,0x00, 0x01,0x2e, +0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0x19, +0x01,0x00, 0x01,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x01,0x83, +0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x75,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, +0x06,0x00, 0x02,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, +0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, +0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, +0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, +0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0x2b, 0x01,0x19, +0x01,0x00, 0x01,0x83, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x2b, 0x01,0x2e, +0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, +0x01,0x00, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, +0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x08,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, +0x02,0x2b, 0x01,0x07, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x6c, 0x04,0xfd, 0x02,0x2b, 0x01,0x87, 0x01,0x82, 0x01,0x7d, 0x02,0x2b, 0x01,0x74, +0x01,0x26, 0x01,0x00, 0x01,0x6e, 0x01,0x6d, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x01,0x6f, 0x01,0x71, 0x01,0x00, 0x01,0x2c, 0x01,0x7a, 0x01,0x7d, 0x01,0x58, 0x01,0x00, +0x01,0x5f, 0x01,0x8b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, +0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x2b, 0x01,0x00, +0x01,0x55, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, +0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x07, +0x02,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, +0x02,0xfd, 0x01,0x2e, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x19, 0x02,0x00, +0x06,0xfd, 0x02,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, +0x01,0x6c, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x07, 0x07,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x08,0xfd, 0x02,0x00, 0x01,0x56, 0x01,0x7a, 0x01,0x15, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x64, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, +0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x41, 0x01,0x2b, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x19, 0x02,0x00, +0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x19, +0x02,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x06,0xfd, +0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x6c, 0x02,0xfd, 0x02,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x75,0xfd, 0x01,0x00, +0x01,0x2b, 0x04,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x02,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x07, +0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, +0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, +0x01,0x00, 0x01,0x55, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x19, 0x02,0x00, 0x06,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, +0x01,0x55, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x8b, 0x01,0x82, +0x01,0x79, 0x01,0x70, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x03, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, +0x01,0x2b, 0x01,0x70, 0x01,0x79, 0x01,0x44, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x04,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, +0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x06,0xfd, 0x01,0x00, +0x01,0x41, 0x05,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, +0x02,0xfd, 0x02,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x80,0xf2,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x8b, 0x01,0x84, 0x01,0x7a, +0x01,0x6f, 0x1f,0xfb, 0x01,0x00, 0x01,0x22, 0x10,0xfb, 0x01,0x6f, 0x01,0x7a, 0x01,0x84, 0x01,0x8b, +0x81,0x87,0xfd, 0x01,0x19, 0x01,0x00, 0x13,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x75, 0x23,0xfb, +0x01,0x00, 0x01,0x22, 0x14,0xfb, 0x01,0x75, 0x01,0x80, 0x01,0x8b, 0x81,0x84,0xfd, 0x01,0x2e, +0x01,0x41, 0x10,0xfd, 0x01,0x8b, 0x01,0x81, 0x01,0x75, 0x26,0xfb, 0x01,0x2b, 0x01,0x47, 0x17,0xfb, +0x01,0x75, 0x01,0x81, 0x01,0x8b, 0x81,0x91,0xfd, 0x01,0x86, 0x01,0x79, 0x45,0xfb, 0x01,0x79, +0x01,0x86, 0x81,0x8c,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x72, 0x49,0xfb, 0x01,0x72, 0x01,0x80, +0x01,0x8b, 0x80,0xfe,0xfd, 0x01,0x55, 0x01,0x6c, 0x2e,0xfd, 0x01,0x55, 0x01,0x41, 0x3b,0xfd, +0x01,0x41, 0x01,0x83, 0x1a,0xfd, 0x01,0x8b, 0x01,0xfc, 0x01,0x6d, 0x12,0xfb, 0x01,0x22, 0x01,0x59, +0x39,0xfb, 0x01,0x6d, 0x01,0xfc, 0x01,0x8b, 0x0c,0xfd, 0x01,0x83, 0x01,0x41, 0x42,0xfd, 0x01,0x6c, +0x01,0x55, 0x80,0xaa,0xfd, 0x01,0x00, 0x01,0x2b, 0x2d,0xfd, 0x03,0x00, 0x01,0x2b, 0x3a,0xfd, +0x01,0x00, 0x01,0x41, 0x18,0xfd, 0x01,0x8b, 0x01,0x7b, 0x15,0xfb, 0x01,0x00, 0x01,0x22, 0x3c,0xfb, +0x01,0x7b, 0x01,0x8b, 0x0a,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x04,0x00, 0x01,0x2e, 0x2b,0xfd, +0x01,0x2b, 0x01,0x00, 0x80,0xaa,0xfd, 0x01,0x00, 0x01,0x2b, 0x0e,0xfd, 0x01,0x6c, 0x01,0x55, +0x03,0xfd, 0x01,0x55, 0x01,0x6c, 0x17,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x3c,0xfd, 0x01,0x00, +0x01,0x41, 0x16,0xfd, 0x01,0x8b, 0x01,0x7c, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x3e,0xfb, 0x01,0x7c, +0x01,0x8b, 0x03,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, +0x01,0x07, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x10,0xfd, 0x01,0x55, 0x01,0x6c, 0x0f,0xfd, +0x01,0x6c, 0x01,0x55, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x0e,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, +0x01,0x83, 0x01,0x41, 0x80,0x95,0xfd, 0x01,0x00, 0x01,0x2b, 0x0e,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x3d,0xfd, 0x01,0x00, 0x01,0x41, +0x14,0xfd, 0x01,0x8b, 0x01,0x7d, 0x19,0xfb, 0x01,0x00, 0x01,0x22, 0x40,0xfb, 0x01,0x7d, 0x01,0x8b, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x10,0xfd, 0x01,0x00, 0x01,0x2b, 0x0f,0xfd, 0x01,0x2b, 0x01,0x00, +0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x0e,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x80,0x95,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x03,0x00, 0x02,0x2b, 0x03,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, +0x05,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0x00, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, 0x03,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x6c, +0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x8b, 0x01,0x81, 0x01,0x14, +0x03,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x02,0x2b, +0x02,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x36, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x36, +0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x04,0xfb, 0x01,0x70, 0x01,0x81, 0x04,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x03,0x00, +0x01,0x83, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, +0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x41, +0x03,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0xfd, 0x04,0x00, +0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, +0x01,0x19, 0x80,0x84,0xfd, 0x02,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x06,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x07, 0x02,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x0e, 0x01,0x75, 0x01,0x22, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x47, 0x01,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, +0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0x59, 0x01,0x2b, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x06,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, +0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x06,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, +0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, +0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, +0x01,0x47, 0x06,0xfb, 0x01,0x28, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, +0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x06,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x2e, 0x02,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2e, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x02,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, +0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, +0x01,0x83, 0x01,0x07, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, +0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, +0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, +0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0x41, 0x01,0x3a, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x00, 0x04,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x02,0x00, 0x01,0x12, 0x07,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x12, +0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x02,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x47, 0x01,0x00, +0x01,0x2b, 0x01,0x47, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfc, 0x01,0x8b, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, +0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x04,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, +0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, +0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x05,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x05,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x05,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, +0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, +0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x22, 0x05,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, +0x02,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x72, +0x01,0x85, 0x01,0x3f, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x06,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, +0x01,0x07, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, +0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x08,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x05,0xfb, 0x02,0x00, 0x04,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x00, 0x01,0x22, +0x03,0xfb, 0x01,0x59, 0x02,0x00, 0x05,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, +0x07,0xfb, 0x01,0x59, 0x02,0x00, 0x04,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x2f, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, +0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, +0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0x84,0xfd, +0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x01,0x83, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x41, 0x01,0x6c, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, +0x02,0xfd, 0x03,0x00, 0x03,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0x88, +0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x59, 0x01,0x47, +0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x06,0xfb, +0x02,0x00, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x2b, 0x01,0x59, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, +0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, +0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x03, +0x05,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x88, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x06,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, +0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, 0x01,0x00, +0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, +0x80,0x80,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x03,0x00, +0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x02,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x83, +0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x83, +0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x02,0x00, +0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x2e, +0x03,0x00, 0x01,0x15, 0x01,0x81, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, +0x03,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x12, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x06,0xfb, 0x01,0x59, 0x04,0x00, 0x01,0x59, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, +0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x2b, +0x03,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x2b, 0x02,0x00, +0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x81, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x41, 0x04,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, +0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, +0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, +0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x80,0xf7,0xfd, 0x01,0x8b, 0x01,0x7b, 0x44,0xfb, 0x01,0x22, 0x01,0x00, 0x2d,0xfb, +0x01,0x7b, 0x01,0x8b, 0x81,0x60,0xfd, 0x01,0x8b, 0x01,0x75, 0x45,0xfb, 0x01,0x22, 0x01,0x00, +0x2e,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x5e,0xfd, 0x01,0x86, 0x01,0x70, 0x46,0xfb, 0x01,0x47, +0x01,0x2b, 0x2f,0xfb, 0x01,0x70, 0x01,0x86, 0x81,0x5c,0xfd, 0x01,0x82, 0x7b,0xfb, 0x01,0x82, +0x81,0x5a,0xfd, 0x01,0x7f, 0x7d,0xfb, 0x01,0x7f, 0x81,0x58,0xfd, 0x01,0x7d, 0x3d,0xfb, +0x01,0x82, 0x04,0xfd, 0x01,0x82, 0x3c,0xfb, 0x01,0x7d, 0x81,0x55,0xfd, 0x01,0x8b, 0x01,0x7c, +0x3d,0xfb, 0x08,0xfd, 0x3c,0xfb, 0x01,0x7c, 0x01,0x8b, 0x81,0x52,0xfd, 0x01,0x8b, 0x01,0x7b, +0x3d,0xfb, 0x0a,0xfd, 0x3c,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x50,0xfd, 0x01,0x8b, 0x01,0x7b, +0x3d,0xfb, 0x01,0x7e, 0x0a,0xfd, 0x01,0x7e, 0x3c,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x4f,0xfd, +0x01,0x7c, 0x3e,0xfb, 0x0c,0xfd, 0x3d,0xfb, 0x01,0x7c, 0x81,0x4e,0xfd, 0x01,0x7d, 0x3f,0xfb, +0x0c,0xfd, 0x3e,0xfb, 0x01,0x7d, 0x81,0x4c,0xfd, 0x01,0x7f, 0x40,0xfb, 0x0c,0xfd, 0x3f,0xfb, +0x01,0x7f, 0x81,0x4a,0xfd, 0x01,0x82, 0x41,0xfb, 0x0c,0xfd, 0x40,0xfb, 0x01,0x82, 0x81,0x48,0xfd, +0x01,0x86, 0x42,0xfb, 0x0c,0xfd, 0x41,0xfb, 0x01,0x86, 0x81,0x46,0xfd, 0x01,0x8b, 0x01,0x70, +0x42,0xfb, 0x0c,0xfd, 0x41,0xfb, 0x01,0x70, 0x01,0x8b, 0x81,0x44,0xfd, 0x01,0x8b, 0x01,0x75, +0x43,0xfb, 0x0c,0xfd, 0x42,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x43,0xfd, 0x01,0x7b, 0x44,0xfb, +0x0c,0xfd, 0x43,0xfb, 0x01,0x7b, 0x81,0x42,0xfd, 0x01,0x81, 0x45,0xfb, 0x0c,0xfd, 0x44,0xfb, +0x01,0x81, 0x81,0x40,0xfd, 0x01,0x88, 0x46,0xfb, 0x0c,0xfd, 0x45,0xfb, 0x01,0x88, 0x81,0x3e,0xfd, +0x01,0x8b, 0x01,0x74, 0x46,0xfb, 0x0c,0xfd, 0x45,0xfb, 0x01,0x74, 0x01,0x8b, 0x80,0xf2,0xfd, +0x01,0x83, 0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x55, 0x26,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, +0x01,0x41, 0x01,0x83, 0x16,0xfd, 0x01,0xfc, 0x47,0xfb, 0x08,0xfd, 0x01,0x83, 0x01,0x41, 0x02,0xfd, +0x46,0xfb, 0x01,0xfc, 0x33,0xfd, 0x01,0x6c, 0x01,0x55, 0x29,0xfd, 0x01,0x41, 0x01,0x83, 0x76,0xfd, +0x01,0x07, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x83, 0x10,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, +0x02,0x2b, 0x16,0xfd, 0x01,0x85, 0x48,0xfb, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x03, +0x01,0x2b, 0x33,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x2b, 0x01,0x00, 0x09,0xfb, +0x01,0x0d, 0x01,0x2b, 0x31,0xfd, 0x01,0x2b, 0x01,0x00, 0x29,0xfd, 0x01,0x00, 0x01,0x41, 0x76,0xfd, +0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x11,0xfd, 0x01,0x2b, 0x01,0x6c, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x16,0xfd, 0x01,0x8b, 0x01,0x72, 0x29,0xfb, 0x01,0x47, 0x01,0x36, +0x1d,0xfb, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x22, 0x01,0x03, 0x11,0xfb, 0x01,0x47, +0x01,0x36, 0x20,0xfb, 0x02,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x47, 0x02,0x00, 0x09,0xfb, 0x01,0x27, +0x01,0x12, 0x0a,0xfd, 0x01,0x6c, 0x01,0x55, 0x25,0xfd, 0x01,0x2b, 0x01,0x00, 0x0d,0xfd, 0x01,0x6c, +0x01,0x55, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, 0x77,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, +0x01,0x07, 0x14,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x26,0xfd, 0x01,0x00, +0x01,0x2b, 0x1b,0xfd, 0x01,0xfc, 0x2a,0xfb, 0x01,0x2b, 0x01,0x00, 0x1d,0xfb, 0x08,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x13,0xfb, 0x01,0x2b, 0x01,0x00, 0x20,0xfb, 0x03,0x00, 0x03,0xfb, 0x01,0x2b, +0x02,0x00, 0x0a,0xfb, 0x01,0xfc, 0x0a,0xfd, 0x01,0x2b, 0x01,0x00, 0x25,0xfd, 0x01,0x2b, 0x01,0x00, +0x0d,0xfd, 0x01,0x2b, 0x01,0x00, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, 0x77,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, +0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x03,0x00, +0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, 0x02,0x00, 0x01,0x19, +0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x01,0x88, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, +0x01,0x00, 0x02,0x22, 0x01,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x03,0x00, +0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x04,0xfb, 0x01,0x12, 0x01,0x00, 0x01,0x59, +0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x01,0xfb, +0x01,0x22, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x02,0x8b, 0x01,0x13, +0x03,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0xf7, +0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfb, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x13, +0x01,0x8b, 0x01,0x3c, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x03, +0x01,0x00, 0x01,0x2b, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x88, 0x01,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x03,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, +0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, +0x01,0x41, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x05,0xfd, +0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x6c,0xfd, +0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x04,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x19, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x75, 0x01,0xfb, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, 0x01,0x59, +0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x06,0xfb, +0x01,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, +0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, +0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x63, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x85, 0x02,0x00, +0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x36, +0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x75, 0x01,0x8b, +0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, +0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, +0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x83, +0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, +0x6d,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0x41, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x03,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, +0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, +0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, +0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x7a, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x05,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, +0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x0b,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x01,0x22, 0x01,0x00, 0x06,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x81, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x6c,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, +0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x03,0xfd, 0x02,0x00, 0x08,0xfd, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x05,0x00, 0x01,0x36, +0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0x12, 0x01,0x00, +0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x82, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x06,0x00, 0x01,0x87, 0x04,0xfb, 0x01,0x2b, +0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x35, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x0c,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x47, +0x01,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x47, 0x04,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x70, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x04,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, +0x05,0x00, 0x01,0x55, 0x6c,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x08,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x30, +0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x03, +0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x03, +0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x70, 0x03,0xfb, +0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x02,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x4a, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x8b, 0x01,0x7e, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x0c,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x03,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x06,0xfd, +0x01,0x19, 0x01,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x72,0xfd, 0x02,0x00, +0x01,0x19, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, +0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x19, +0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, +0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, +0x01,0x74, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x01,0xfb, +0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x2b, 0x02,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x87, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, +0x01,0x7e, 0x03,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0xfb, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x48, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, +0x04,0xfb, 0x01,0x2b, 0x01,0x22, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, +0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, +0x01,0x00, 0x01,0x12, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x19, +0x01,0x00, 0x01,0x2e, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, +0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x07, +0x02,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, +0x01,0x55, 0x01,0x6c, 0x6d,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x01,0x41, 0x04,0x00, +0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, +0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x06,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x07, +0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, +0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, +0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x2b, +0x03,0x00, 0x01,0x36, 0x01,0x8b, 0x01,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x7e, 0x04,0xfb, 0x01,0x36, 0x03,0x00, +0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x03,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0xfb, 0x01,0x7e, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x02,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0x8b, 0x01,0x59, 0x03,0x00, +0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x59, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x36, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, 0x01,0x41, +0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x05,0xfd, +0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, +0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x80,0xca,0xfd, +0x01,0x8b, 0x2c,0xfb, 0x01,0x8b, 0x13,0xfd, 0x01,0x7e, 0x0d,0xfb, 0x0c,0xfd, 0x0b,0xfb, 0x01,0x7e, +0x13,0xfd, 0x01,0x8b, 0x2d,0xfb, 0x01,0x8b, 0x81,0x2f,0xfd, 0x01,0x7b, 0x2b,0xfb, 0x01,0x8b, +0x14,0xfd, 0x01,0x78, 0x0d,0xfb, 0x0c,0xfd, 0x0b,0xfb, 0x01,0x78, 0x14,0xfd, 0x01,0x8b, 0x2c,0xfb, +0x01,0x7b, 0x81,0x2e,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x8b, 0x15,0xfd, 0x0e,0xfb, 0x0c,0xfd, +0x0c,0xfb, 0x15,0xfd, 0x01,0x8b, 0x2c,0xfb, 0x01,0x8b, 0x81,0x2d,0xfd, 0x01,0xfc, 0x29,0xfb, +0x01,0x6e, 0x01,0x8b, 0x15,0xfd, 0x01,0x8b, 0x0e,0xfb, 0x0c,0xfd, 0x0c,0xfb, 0x01,0x8b, 0x15,0xfd, +0x01,0x8b, 0x01,0x6e, 0x2a,0xfb, 0x01,0xfc, 0x81,0x2c,0xfd, 0x01,0x8b, 0x01,0x6d, 0x29,0xfb, +0x01,0x8b, 0x16,0xfd, 0x0f,0xfb, 0x0c,0xfd, 0x0d,0xfb, 0x16,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x6d, +0x01,0x8b, 0x80,0xd2,0xfd, 0x01,0x83, 0x01,0x41, 0x12,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, +0x01,0x41, 0x01,0x83, 0x3c,0xfd, 0x01,0x80, 0x0e,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x47, 0x18,0xfb, +0x01,0x8b, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x13,0xfd, 0x10,0xfb, 0x07,0xfd, 0x01,0x55, 0x01,0x6c, +0x03,0xfd, 0x0e,0xfb, 0x11,0xfd, 0x01,0x55, 0x01,0x6c, 0x03,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x80, +0x2e,0xfd, 0x01,0x41, 0x01,0x83, 0x27,0xfd, 0x01,0x83, 0x01,0x41, 0x79,0xfd, 0x01,0x41, 0x01,0x00, +0x12,0xfd, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x02,0x2b, 0x3d,0xfd, 0x01,0x72, 0x0d,0xfb, 0x01,0x22, +0x03,0x00, 0x17,0xfb, 0x01,0x8b, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x11,0xfd, 0x01,0x8b, +0x11,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0f,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x00, +0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x0b,0xfb, 0x01,0x36, 0x01,0x00, 0x1c,0xfb, 0x01,0x72, 0x2e,0xfd, +0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x55, 0x70,0xfd, +0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x3d,0xfd, +0x01,0x86, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, 0x18,0xfb, 0x01,0x8b, 0x02,0xfd, 0x01,0x2b, 0x01,0x55, +0x11,0xfd, 0x01,0x85, 0x12,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0e,0xfb, 0x01,0x36, +0x01,0x47, 0x01,0x85, 0x0e,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x8b, 0x0a,0xfb, 0x01,0x47, +0x01,0x2b, 0x1d,0xfb, 0x01,0x86, 0x2d,0xfd, 0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, +0x07,0xfd, 0x01,0x2b, 0x01,0x6c, 0x70,0xfd, 0x01,0x41, 0x01,0x00, 0x12,0xfd, 0x01,0x00, 0x01,0x2b, +0x42,0xfd, 0x01,0x79, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, 0x17,0xfb, 0x01,0x87, 0x14,0xfd, 0x01,0x8b, +0x01,0x82, 0x13,0xfb, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x0e,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x82, 0x01,0x8b, 0x0c,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x87, 0x28,0xfb, +0x01,0x79, 0x2d,0xfd, 0x01,0x00, 0x01,0x41, 0x27,0xfd, 0x01,0x41, 0x01,0x00, 0x75,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x19, +0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, +0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x00, +0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, +0x05,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x8b, 0x01,0x03, 0x03,0x00, +0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x04,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, +0x02,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, +0x01,0x19, 0x03,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x03, +0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x02,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, +0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x8b, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x03,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x2b, 0x03,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x8b, +0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, +0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, +0x01,0x83, 0x5d,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, +0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x41, +0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, +0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x32, +0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0x8b, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x01,0xfb, 0x02,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x12, 0x01,0x00, +0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, +0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x52, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2c, 0x01,0x59, 0x01,0xfb, 0x02,0x00, +0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x04,0xfb, 0x01,0x8b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x00, +0x01,0x03, 0x01,0xfb, 0x02,0x59, 0x02,0xfb, 0x01,0x81, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, +0x01,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, +0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x01,0x83, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x5c,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, +0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, +0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x82, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, +0x01,0x00, 0x03,0xfd, 0x01,0x87, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, +0x02,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x87, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x82, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0x47, 0x03,0xfb, +0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x03,0xfb, +0x02,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x75, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x2b, 0x01,0x6c, 0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, +0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5c,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x05,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x8b, 0x06,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x7c, 0x04,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x06,0x00, +0x02,0xfd, 0x01,0x7e, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0xf7, 0x04,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, +0x01,0x7e, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x7c, 0x01,0x22, +0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, +0x01,0x47, 0x03,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x8b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, +0x02,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x41, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, +0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5c,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x07, +0x08,0xfd, 0x01,0x80, 0x02,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, +0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, +0x05,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x84, 0x04,0xfb, +0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, +0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x07,0xfb, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x06,0xfb, 0x01,0x47, 0x02,0x00, 0x03,0xfb, 0x01,0x80, 0x04,0xfd, 0x01,0x07, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x19, +0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x06,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x19, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5d,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x83, 0x01,0x41, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, +0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x01,0x41, 0x01,0x2b, 0x02,0x00, +0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x04,0xfd, 0x01,0x75, +0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, +0x02,0xfd, 0x01,0x2c, 0x01,0x85, 0x05,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, +0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x59, 0x01,0x47, +0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, +0x07,0xfb, 0x02,0x00, 0x01,0x69, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, +0x01,0x36, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, +0x01,0x36, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x12, +0x02,0x59, 0x02,0x00, 0x03,0xfb, 0x01,0x75, 0x01,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, 0x01,0x6c, +0x01,0x41, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x41, +0x02,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x5d,0xfd, 0x01,0x55, 0x03,0x00, +0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x02,0x00, +0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x04,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, +0x02,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x8b, 0x02,0xfb, +0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x70, 0x01,0x53, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x06,0xfb, 0x05,0x00, 0x01,0x36, +0x03,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, +0x01,0x3e, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x6c, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, +0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x04,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x8b, 0x01,0x2e, 0x03,0x00, +0x01,0x19, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x2e, +0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, +0x04,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, +0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, +0x02,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x80,0xa9,0xfd, 0x01,0x00, 0x01,0x2b, +0x0b,0xfd, 0x01,0x84, 0x25,0xfb, 0x01,0x8b, 0x11,0xfd, 0x01,0x87, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, +0x13,0xfb, 0x0c,0xfd, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x87, 0x11,0xfd, 0x01,0x8b, +0x26,0xfb, 0x01,0x84, 0x81,0x12,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x6c, 0x0b,0xfd, +0x01,0x7a, 0x25,0xfb, 0x11,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x13,0xfb, 0x0c,0xfd, +0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x8b, 0x11,0xfd, 0x26,0xfb, 0x01,0x7a, 0x81,0x12,0xfd, +0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x83, 0x0c,0xfd, 0x01,0x6f, 0x24,0xfb, 0x01,0x8b, +0x10,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x03, 0x01,0x22, 0x13,0xfb, 0x0c,0xfd, 0x17,0xfb, 0x01,0x2b, +0x01,0x47, 0x03,0xfb, 0x01,0x8b, 0x10,0xfd, 0x01,0x8b, 0x25,0xfb, 0x01,0x6f, 0x81,0x22,0xfd, +0x01,0x8b, 0x24,0xfb, 0x01,0x78, 0x10,0xfd, 0x01,0x8b, 0x1f,0xfb, 0x0c,0xfd, 0x1d,0xfb, 0x01,0x8b, +0x10,0xfd, 0x01,0x78, 0x25,0xfb, 0x01,0x8b, 0x81,0x21,0xfd, 0x01,0x82, 0x24,0xfb, 0x01,0x8b, +0x10,0xfd, 0x01,0x80, 0x1f,0xfb, 0x0c,0xfd, 0x1d,0xfb, 0x01,0x80, 0x10,0xfd, 0x01,0x8b, 0x25,0xfb, +0x01,0x82, 0x81,0x1a,0xfd, 0x01,0x6c, 0x01,0x55, 0x05,0xfd, 0x01,0x79, 0x07,0xfb, 0x01,0x59, +0x01,0x22, 0x1b,0xfb, 0x10,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x47, 0x01,0x36, 0x15,0xfb, 0x01,0x59, +0x01,0x3e, 0x0b,0xfd, 0x17,0xfb, 0x01,0x47, 0x01,0x36, 0x05,0xfb, 0x01,0x8b, 0x10,0xfd, 0x25,0xfb, +0x01,0x79, 0x81,0x1a,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x70, 0x07,0xfb, 0x01,0x22, +0x01,0x00, 0x1a,0xfb, 0x01,0x7d, 0x10,0xfd, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x15,0xfb, 0x01,0x00, +0x01,0x16, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x04,0xfb, 0x01,0x00, 0x01,0x36, 0x06,0xfb, +0x01,0x03, 0x01,0x2b, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x03, 0x02,0xfb, +0x10,0xfd, 0x01,0x7d, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x21,0xfb, 0x01,0x70, 0x81,0x1a,0xfd, +0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x8b, 0x08,0xfb, 0x01,0x22, 0x01,0x00, 0x17,0xfb, 0x01,0x22, +0x01,0x59, 0x01,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x14,0xfb, +0x02,0x03, 0x04,0xfd, 0x01,0x2b, 0x01,0x6c, 0x06,0xfd, 0x04,0xfb, 0x01,0x2b, 0x01,0x47, 0x01,0xfb, +0x01,0x59, 0x01,0x22, 0x03,0xfb, 0x01,0x22, 0x01,0x03, 0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x03, 0x01,0x22, 0x02,0xfb, 0x01,0x8b, 0x0c,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x8b, +0x01,0xfb, 0x01,0x47, 0x01,0x2b, 0x22,0xfb, 0x01,0x8b, 0x81,0x19,0xfd, 0x01,0x2b, 0x01,0x00, +0x04,0xfd, 0x01,0x88, 0x08,0xfb, 0x01,0x22, 0x01,0x00, 0x17,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x7e, +0x10,0xfd, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x16,0xfb, 0x0c,0xfd, 0x07,0xfb, 0x01,0x22, 0x01,0x00, +0x0e,0xfb, 0x01,0x2b, 0x01,0x00, 0x07,0xfb, 0x0c,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x7e, +0x24,0xfb, 0x01,0x88, 0x80,0xc8,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0x19, 0x01,0x00, +0x04,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x41, +0x05,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x05,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x12, +0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, +0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x04,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, 0x01,0x19, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x80, 0x06,0xfb, 0x01,0x36, +0x05,0x00, 0x02,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x04,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x03, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, +0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, 0x01,0x8b, 0x01,0x22, 0x01,0x00, +0x03,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x03, 0x02,0x00, +0x01,0x03, 0x12,0xfb, 0x01,0x80, 0x80,0xc8,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, +0x01,0x19, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, +0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x7a, 0x01,0x36, 0x01,0x00, +0x01,0x47, 0x05,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x59, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, +0x01,0x55, 0x01,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, +0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x52, 0x01,0x00, +0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x47, 0x01,0xfd, +0x01,0x6c, 0x02,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0xfb, +0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x01,0x2b, 0x01,0x00, +0x12,0xfb, 0x01,0x7a, 0x80,0xcb,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, +0x04,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x04,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x01,0x28, 0x01,0x22, 0x01,0x00, +0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x12, 0x01,0x00, 0x01,0xfb, 0x01,0x03, +0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x03, +0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x12, 0x02,0x22, +0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0xf7, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x2b, 0x01,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0x5d, 0x07,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x00, +0x01,0x03, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x00, 0x12,0xfb, 0x01,0x73, 0x80,0xc8,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x06,0x00, 0x03,0xfd, 0x01,0x2e, 0x01,0x00, +0x01,0x2e, 0x07,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x22, +0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x39, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x22, +0x05,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x06,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x04,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, +0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x39, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x13,0xfb, 0x80,0xc7,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x07, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x19, 0x0a,0xfd, 0x01,0x83, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x4d, +0x09,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x03, 0x01,0x00, +0x03,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x40, 0x02,0xfd, 0x02,0x2b, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x87, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, +0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x09,0xfb, 0x01,0x2b, +0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0xfb, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, +0x01,0x22, 0x01,0x00, 0x01,0x06, 0x01,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x40, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, +0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, +0x13,0xfb, 0x01,0x8b, 0x80,0xc6,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x83, 0x01,0x2b, +0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, +0x02,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, +0x01,0x36, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x01,0x47, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x07, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x47, 0x01,0x00, +0x01,0x12, 0x01,0xfb, 0x01,0x03, 0x02,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, +0x01,0x36, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x03,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x59, 0x01,0x2b, +0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x16, 0x01,0xfd, 0x01,0x00, 0x01,0x07, +0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x2b, 0x0f,0xfb, 0x01,0x8b, 0x80,0xc7,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2e, 0x02,0x00, +0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x04,0xfd, 0x01,0x41, 0x04,0x00, 0x01,0xfd, 0x01,0x2b, +0x05,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x19, 0x02,0x00, +0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x12, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, +0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x2b, 0x02,0x00, +0x01,0x36, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x09, 0x02,0x00, 0x02,0xfd, 0x01,0x19, 0x03,0x00, +0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x06,0xfb, +0x01,0x03, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x12, +0x05,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, +0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x47, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x55, +0x01,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x03,0x00, +0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x0f,0xfb, 0x01,0x87, 0x80,0xce,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, +0x01,0x00, 0x10,0xfd, 0x01,0x00, 0x01,0x41, 0x33,0xfd, 0x01,0x82, 0x22,0xfb, 0x01,0x82, 0x0e,0xfd, +0x01,0x8b, 0x25,0xfb, 0x0c,0xfd, 0x23,0xfb, 0x01,0x8b, 0x0e,0xfd, 0x01,0x82, 0x23,0xfb, 0x01,0x82, +0x80,0xce,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x0f,0xfd, 0x01,0x19, +0x01,0x00, 0x34,0xfd, 0x01,0x7d, 0x22,0xfb, 0x01,0x8b, 0x0e,0xfd, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, +0x0e,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7d, 0x80,0xce,0xfd, 0x01,0x41, 0x01,0x19, 0x06,0xfd, +0x01,0x41, 0x01,0x19, 0x0f,0xfd, 0x01,0x2e, 0x01,0x41, 0x34,0xfd, 0x01,0x7a, 0x22,0xfb, 0x01,0x8b, +0x0e,0xfd, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x0e,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7a, 0x81,0x1d,0xfd, +0x01,0x77, 0x22,0xfb, 0x0e,0xfd, 0x01,0x89, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x89, 0x0e,0xfd, +0x23,0xfb, 0x01,0x77, 0x81,0x1d,0xfd, 0x01,0x74, 0x21,0xfb, 0x01,0x71, 0x0e,0xfd, 0x01,0x7d, +0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x7d, 0x0e,0xfd, 0x01,0x71, 0x22,0xfb, 0x01,0x74, 0x81,0x1d,0xfd, +0x01,0x71, 0x21,0xfb, 0x01,0x7d, 0x0e,0xfd, 0x01,0x73, 0x26,0xfb, 0x0c,0xfd, 0x24,0xfb, 0x01,0x73, +0x0e,0xfd, 0x01,0x7d, 0x22,0xfb, 0x01,0x71, 0x81,0x1d,0xfd, 0x01,0x6f, 0x21,0xfb, 0x01,0x84, +0x0e,0xfd, 0x27,0xfb, 0x0c,0xfd, 0x25,0xfb, 0x0e,0xfd, 0x01,0x84, 0x22,0xfb, 0x01,0x6f, 0x81,0x1d,0xfd, +0x01,0x6e, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x27,0xfb, 0x0c,0xfd, 0x25,0xfb, 0x01,0x8b, +0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6e, 0x81,0x1d,0xfd, 0x01,0x6d, 0x21,0xfb, 0x01,0x8b, +0x0d,0xfd, 0x01,0x8b, 0x27,0xfb, 0x01,0x82, 0x0a,0xfd, 0x01,0x82, 0x25,0xfb, 0x01,0x8b, 0x0d,0xfd, +0x01,0x8b, 0x22,0xfb, 0x01,0x6d, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, +0x28,0xfb, 0x0a,0xfd, 0x26,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, +0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x28,0xfb, 0x01,0x75, 0x08,0xfd, 0x01,0x75, 0x26,0xfb, +0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, +0x01,0x8b, 0x2a,0xfb, 0x01,0x82, 0x04,0xfd, 0x01,0x82, 0x28,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, +0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x89, 0x58,0xfb, 0x01,0x89, +0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x22,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x89, +0x58,0xfb, 0x01,0x89, 0x0d,0xfd, 0x01,0x8b, 0x23,0xfb, 0x81,0x1d,0xfd, 0x01,0x6d, 0x21,0xfb, +0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6d, +0x81,0x1d,0xfd, 0x01,0x6e, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, +0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6e, 0x81,0x1d,0xfd, 0x01,0x6f, 0x21,0xfb, 0x01,0x8b, +0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x6f, 0x81,0x1d,0xfd, +0x01,0x71, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, +0x22,0xfb, 0x01,0x71, 0x81,0x1d,0xfd, 0x01,0x74, 0x21,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, +0x58,0xfb, 0x01,0x8b, 0x0d,0xfd, 0x01,0x8b, 0x22,0xfb, 0x01,0x74, 0x81,0x16,0xfd, 0x01,0x41, +0x01,0x83, 0x05,0xfd, 0x01,0x77, 0x21,0xfb, 0x01,0x84, 0x0e,0xfd, 0x58,0xfb, 0x0e,0xfd, 0x01,0x84, +0x22,0xfb, 0x01,0x77, 0x02,0xfd, 0x01,0x55, 0x01,0x6c, 0x35,0xfd, 0x01,0x41, 0x01,0x83, 0x80,0x8c,0xfd, +0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x1a,0xfd, +0x02,0x6c, 0x01,0xfd, 0x01,0x41, 0x24,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x05,0xfd, 0x01,0x7a, 0x1a,0xfb, 0x01,0x22, 0x03,0x00, 0x01,0x2b, 0x01,0x47, 0x01,0xfb, 0x01,0x7d, +0x0e,0xfd, 0x01,0x73, 0x56,0xfb, 0x01,0x73, 0x0e,0xfd, 0x01,0x7d, 0x14,0xfb, 0x01,0x03, 0x01,0x2b, +0x04,0xfb, 0x01,0x2b, 0x01,0x03, 0x06,0xfb, 0x01,0x7a, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x1a,0xfd, +0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x12,0xfd, +0x01,0x00, 0x01,0x41, 0x80,0x8c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x19, 0x01,0x2b, +0x02,0xfd, 0x01,0x19, 0x01,0x41, 0x1a,0xfd, 0x02,0x19, 0x01,0xfd, 0x01,0x00, 0x24,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x7d, 0x19,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0x22, 0x01,0x12, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x71, 0x0e,0xfd, 0x01,0x7d, +0x15,0xfb, 0x01,0x36, 0x01,0x47, 0x35,0xfb, 0x01,0x59, 0x01,0x22, 0x08,0xfb, 0x01,0x7d, 0x07,0xfd, +0x01,0x55, 0x01,0x6c, 0x05,0xfd, 0x01,0x71, 0x14,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, +0x01,0x2b, 0x06,0xfb, 0x01,0x7d, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x83, +0x16,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x19, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x41, +0x12,0xfd, 0x01,0x00, 0x01,0x41, 0x80,0x8b,0xfd, 0x01,0x41, 0x01,0x00, 0x4a,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x82, 0x18,0xfb, 0x01,0x36, 0x01,0x00, +0x01,0x36, 0x07,0xfb, 0x0e,0xfd, 0x01,0x89, 0x15,0xfb, 0x01,0x00, 0x01,0x2b, 0x35,0xfb, 0x01,0x22, +0x01,0x00, 0x08,0xfb, 0x01,0x89, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x15,0xfb, 0x01,0x2b, +0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x82, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x41, 0x01,0x00, 0x1a,0xfd, 0x01,0x00, 0x01,0x41, +0x80,0x8b,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x55, 0x03,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x19, +0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, +0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2e, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x19, +0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x87, 0x01,0x00, +0x01,0x2b, 0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x03, 0x02,0x00, 0x01,0x03, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x08,0xfb, 0x01,0x13, 0x03,0x00, +0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0x00, +0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, +0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x05,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, 0x01,0xfb, +0x04,0x00, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0xfc, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, +0x01,0x00, 0x02,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x36, 0x02,0xfb, +0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x36, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x0e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x04,0x00, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, +0x01,0x55, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x55, 0x03,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x76,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x41, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, +0x01,0x00, 0x06,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x02,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x83, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, +0x01,0x83, 0x01,0x00, 0x01,0x07, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x41, +0x01,0x83, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x8b, 0x02,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x01,0x2b, +0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x02,0x00, +0x01,0x3e, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x02,0x00, 0x01,0x22, 0x06,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x36, +0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x02,0x59, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x47, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0xf7, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x11, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, +0x01,0xfb, 0x02,0x00, 0x0a,0xfb, 0x01,0x2b, 0x06,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x47, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x02,0x00, +0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x06,0xfd, 0x01,0x19, 0x02,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x06,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x6c, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x77,0xfd, 0x01,0x6c, 0x02,0x00, 0x01,0x19, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, +0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x2e, 0x04,0xfd, 0x02,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x8b, +0x01,0x00, 0x01,0x12, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, 0x01,0x00, +0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x02,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x22, +0x01,0x00, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x33, 0x02,0x22, 0x01,0x00, 0x01,0x36, +0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, +0x04,0x22, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x55, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, 0x02,0x00, +0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, 0x01,0x00, +0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, +0x01,0x2b, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x79,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x6c, 0x01,0x2b, +0x02,0x00, 0x01,0x83, 0x01,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x02,0x00, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x06,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x59, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x09,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x6c, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x7a,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, +0x01,0x6c, 0x02,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, +0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, +0x06,0xfb, 0x02,0x00, 0x03,0xfd, 0x02,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x06,0xfb, 0x01,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x47, 0x02,0x00, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x70, 0x05,0xfb, 0x01,0x00, +0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x0a,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x28, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x06,0xfd, +0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0a,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x19, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x75,0xfd, 0x01,0x55, 0x01,0x19, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, +0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x02,0x83, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x00, 0x07,0xfb, 0x02,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x12, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, +0x01,0x00, 0x01,0x3e, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x02,0x00, 0x01,0x47, +0x01,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0x00, 0x01,0x2b, 0x02,0xfb, +0x02,0x00, 0x02,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x01,0xfb, +0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0x59, +0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x01,0x12, 0x02,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, +0x01,0x36, 0x02,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x04,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, +0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x03, +0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x36, 0x01,0x00, +0x01,0x36, 0x01,0x59, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, +0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x55, +0x01,0x19, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x05,0xfd, +0x02,0x00, 0x01,0xfd, 0x01,0x07, 0x02,0x00, 0x75,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x04,0x00, +0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x22, 0x04,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x07,0xfb, 0x01,0x59, 0x01,0x2b, 0x04,0x00, +0x01,0x12, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0x00, +0x01,0x59, 0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x36, +0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x04,0x00, +0x02,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x06,0xfb, 0x04,0x00, 0x01,0x59, +0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, +0x01,0x07, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x5d, +0x04,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, +0x02,0x00, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, +0x02,0x00, 0x01,0x1d, 0x02,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x02,0x00, +0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, 0x06,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x80,0xcd,0xfd, 0x01,0x88, +0x23,0xfb, 0x0f,0xfd, 0x01,0x87, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x4c,0xfb, 0x01,0x87, 0x0f,0xfd, +0x24,0xfb, 0x01,0x88, 0x81,0x1f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x0f,0xfd, 0x01,0x8b, 0x02,0xfb, +0x01,0x00, 0x01,0x22, 0x4c,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x24,0xfb, 0x01,0x8b, 0x81,0x20,0xfd, +0x01,0x70, 0x22,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x80, 0x01,0xfb, 0x01,0x2b, 0x01,0x47, 0x4b,0xfb, +0x01,0x80, 0x0f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x70, 0x81,0x21,0xfd, 0x01,0x79, 0x22,0xfb, +0x01,0x7d, 0x10,0xfd, 0x4e,0xfb, 0x10,0xfd, 0x01,0x7d, 0x23,0xfb, 0x01,0x79, 0x81,0x21,0xfd, +0x01,0x82, 0x23,0xfb, 0x01,0x8b, 0x0f,0xfd, 0x01,0x89, 0x4c,0xfb, 0x01,0x89, 0x0f,0xfd, 0x01,0x8b, +0x24,0xfb, 0x01,0x82, 0x80,0xcd,0xfd, 0x01,0x83, 0x01,0x41, 0x31,0xfd, 0x01,0x83, 0x01,0x41, +0x0e,0xfd, 0x01,0x41, 0x01,0x83, 0x0f,0xfd, 0x01,0x8b, 0x23,0xfb, 0x01,0x7d, 0x10,0xfd, 0x1e,0xfb, +0x01,0x22, 0x01,0x59, 0x1a,0xfb, 0x01,0x36, 0x01,0x47, 0x10,0xfb, 0x10,0xfd, 0x01,0x7d, 0x08,0xfb, +0x01,0x36, 0x01,0x47, 0x16,0xfb, 0x01,0x22, 0x01,0x59, 0x02,0xfb, 0x01,0x8b, 0x18,0xfd, 0x01,0x41, +0x01,0x83, 0x16,0xfd, 0x01,0x83, 0x01,0x41, 0x16,0xfd, 0x01,0x6c, 0x01,0x55, 0x80,0x83,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x01,0x2b, 0x0d,0xfd, 0x01,0x19, 0x04,0x00, 0x01,0x2b, +0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, 0x01,0x41, +0x10,0xfd, 0x01,0x6f, 0x1f,0xfb, 0x01,0x36, 0x01,0x00, 0x02,0xfb, 0x08,0xfd, 0x01,0x19, 0x01,0x2b, +0x06,0xfd, 0x01,0x8b, 0x0f,0xfb, 0x01,0x03, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x09,0xfb, 0x01,0x00, +0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0f,0xfb, 0x01,0x8b, 0x10,0xfd, 0x09,0xfb, 0x01,0x00, +0x01,0x2b, 0x08,0xfb, 0x01,0x36, 0x02,0x59, 0x01,0x36, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x6f, 0x19,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, 0x01,0x00, 0x08,0xfd, 0x01,0x83, +0x01,0x55, 0x01,0xfd, 0x01,0x55, 0x01,0x83, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x83,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x19, 0x0d,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x41, +0x01,0x55, 0x01,0xfd, 0x01,0x41, 0x01,0x19, 0x17,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x41, 0x03,0xfd, 0x01,0x6c, 0x01,0x55, 0x08,0xfd, 0x01,0x7a, +0x08,0xfb, 0x01,0x36, 0x01,0x47, 0x15,0xfb, 0x01,0x47, 0x01,0x2b, 0x02,0xfb, 0x01,0x8b, 0x07,0xfd, +0x01,0x41, 0x01,0x19, 0x07,0xfd, 0x01,0x7e, 0x0d,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x12, 0x01,0x22, +0x01,0x2b, 0x01,0x36, 0x09,0xfb, 0x01,0x00, 0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0e,0xfb, +0x01,0x7e, 0x10,0xfd, 0x01,0x8b, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x2b, 0x02,0x36, +0x01,0x2b, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x7a, 0x03,0xfd, 0x01,0x6c, 0x01,0x55, +0x14,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, 0x01,0x00, 0x08,0xfd, 0x01,0x55, 0x01,0x2b, +0x01,0xfd, 0x01,0x2b, 0x01,0x55, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, 0x80,0x83,0xfd, 0x01,0x41, +0x01,0x00, 0x11,0xfd, 0x01,0x2b, 0x01,0x00, 0x1e,0xfd, 0x01,0x41, 0x01,0x00, 0x0e,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x08,0xfd, 0x01,0x84, +0x08,0xfb, 0x01,0x00, 0x01,0x2b, 0x19,0xfb, 0x01,0x73, 0x10,0xfd, 0x01,0x8b, 0x0d,0xfb, 0x01,0x00, +0x01,0x2b, 0x0d,0xfb, 0x01,0x00, 0x01,0x22, 0x1a,0xfb, 0x01,0x00, 0x01,0x2b, 0x0e,0xfb, 0x01,0x8b, +0x10,0xfd, 0x01,0x73, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, 0x16,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x84, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x14,0xfd, 0x01,0x00, 0x01,0x41, 0x16,0xfd, 0x01,0x41, +0x01,0x00, 0x16,0xfd, 0x01,0x2b, 0x01,0x00, 0x7f,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x06,0xfd, 0x01,0x2b, +0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x55, +0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x2e, 0x03,0x00, 0x01,0x2b, 0x01,0x8b, 0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x03, 0x01,0xfb, +0x01,0x2b, 0x03,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x03,0x00, 0x01,0x36, 0x07,0xfb, 0x01,0x2b, +0x02,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x17, +0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x05,0x00, 0x01,0x8b, +0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x06,0xfb, 0x02,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x03, +0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x36, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x04,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x36, +0x03,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x54, 0x06,0xfd, 0x01,0x83, +0x05,0x00, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x03, 0x04,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0xfa, 0x01,0x00, 0x01,0x55, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, +0x03,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, +0x01,0x19, 0x03,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0x00, 0x05,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x2b, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x02,0x00, 0x01,0x19, 0x6c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, +0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, +0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x6c, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x02,0x55, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x54, 0x01,0x75, 0x02,0x00, 0x02,0xfb, 0x01,0x47, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x06,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0x59, 0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x01,0xfd, 0x01,0x87, 0x01,0x2b, 0x01,0x00, +0x02,0x59, 0x01,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x47, +0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, +0x01,0x22, 0x02,0x00, 0x01,0x47, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, +0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x2b, 0x01,0x87, 0x02,0x00, 0x06,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x59, +0x01,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, +0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x47, 0x02,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0x4d, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x6c, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x06,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, +0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x02,0x2b, 0x03,0xfd, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x6b,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, +0x02,0x41, 0x01,0x00, 0x01,0x07, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0x80, 0x02,0x00, 0x01,0x12, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, +0x03,0x22, 0x01,0x00, 0x01,0x36, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x01,0xfb, +0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x8b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, 0x07,0xfb, 0x01,0x2b, 0x02,0x00, +0x01,0x59, 0x01,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x12, +0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x02,0x00, 0x03,0xfb, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x03,0x22, 0x01,0x00, 0x01,0x36, 0x01,0xfb, +0x01,0x00, 0x01,0x2b, 0x01,0x87, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, +0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x02,0x00, +0x01,0xfb, 0x02,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x59, +0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, +0x01,0x47, 0x05,0xfb, 0x01,0x00, 0x01,0x12, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, +0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x19, +0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0x41, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0x00, +0x01,0x83, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6b,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, +0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, +0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x04,0x00, 0x01,0x41, 0x01,0x8b, +0x01,0x59, 0x01,0x2b, 0x02,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, +0x05,0x00, 0x01,0x36, 0x05,0xfb, 0x05,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, +0x01,0x2b, 0x01,0x00, 0x01,0x71, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, +0x08,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x06,0x00, 0x02,0xfb, 0x03,0x00, 0x01,0x59, 0x03,0xfb, +0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x2b, 0x05,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x05,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, +0x06,0xfb, 0x03,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x41, +0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x01,0xfd, 0x06,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x07, +0x03,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x6b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x09,0xfd, 0x01,0x2b, 0x01,0x00, +0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x75, +0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x00, +0x0a,0xfb, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0xfb, 0x01,0x8b, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0xf7, 0x01,0x84, +0x0c,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, 0x02,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0x00, +0x01,0x59, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x12, 0x01,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0x8b, 0x01,0x00, 0x01,0x2b, 0x05,0xfb, 0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x05,0xfb, 0x01,0x00, 0x01,0x28, 0x02,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, +0x01,0xfd, 0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x06,0xfd, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, +0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6c,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, +0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x03,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x2e, +0x01,0xfd, 0x01,0x32, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x02,0xfb, +0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, 0x01,0x00, +0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x3e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, +0x01,0x41, 0x01,0x2b, 0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x41, 0x01,0x82, 0x01,0x59, +0x01,0x22, 0x05,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, +0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x59, +0x02,0x00, 0x02,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x22, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, 0x01,0x59, +0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x36, +0x01,0x47, 0x01,0x82, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x02,0x00, +0x01,0x41, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0xfb, +0x01,0x59, 0x01,0x22, 0x01,0xfb, 0x01,0x03, 0x01,0x00, 0x01,0x47, 0x01,0x59, 0x01,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x02,0x00, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, +0x02,0xfb, 0x02,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x00, 0x01,0x32, 0x01,0x83, +0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, +0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0x6c, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x2e, +0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x02,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x01,0x6c, 0x02,0x00, +0x01,0x41, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x02,0x83, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x6c,0xfd, +0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x05,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, +0x01,0x55, 0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x07, +0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0xfd, 0x01,0x83, 0x02,0x00, +0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x02,0x00, 0x02,0x2b, 0x01,0x00, 0x01,0x55, 0x01,0x2b, 0x03,0x00, +0x01,0x36, 0x02,0xfb, 0x01,0x36, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x47, +0x06,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x01,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x03,0x00, 0x01,0x2b, 0x05,0xfb, +0x01,0x12, 0x03,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, +0x01,0xfb, 0x01,0x59, 0x02,0x00, 0x01,0xfb, 0x01,0x59, 0x03,0x00, 0x01,0x12, 0x01,0x00, 0x02,0xfb, +0x01,0x22, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x47, 0x04,0x00, 0x01,0x56, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, +0x01,0x7c, 0x02,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x36, 0x02,0x00, 0x02,0x2b, +0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x39, 0x01,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x03,0x00, +0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x41, +0x04,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, +0x02,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x07, +0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x80,0xc8,0xfd, 0x01,0x79, 0x25,0xfb, 0x01,0x82, +0x0a,0xfd, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x87, 0x38,0xfb, 0x01,0x87, 0x12,0xfd, 0x01,0x00, +0x01,0x2b, 0x26,0xfb, 0x01,0x79, 0x81,0x29,0xfd, 0x01,0x86, 0x26,0xfb, 0x01,0x8b, 0x05,0xfd, +0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x6c, 0x08,0xfd, 0x01,0x87, 0x36,0xfb, 0x01,0x87, 0x0f,0xfd, +0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x47, 0x26,0xfb, 0x01,0x86, 0x81,0x2a,0xfd, 0x01,0x72, +0x25,0xfb, 0x01,0x7c, 0x05,0xfd, 0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x83, 0x0a,0xfd, +0x01,0x8b, 0x34,0xfb, 0x01,0x8b, 0x10,0xfd, 0x01,0x6c, 0x01,0x07, 0x01,0x2b, 0x01,0x07, 0x01,0x66, +0x26,0xfb, 0x01,0x72, 0x81,0x2b,0xfd, 0x01,0x80, 0x26,0xfb, 0x01,0x87, 0x14,0xfd, 0x01,0x8b, +0x01,0x7e, 0x30,0xfb, 0x01,0x7e, 0x01,0x8b, 0x14,0xfd, 0x01,0x87, 0x27,0xfb, 0x01,0x80, 0x81,0x2b,0xfd, +0x01,0x8b, 0x01,0x6d, 0x26,0xfb, 0x01,0x8b, 0x15,0xfd, 0x01,0x8b, 0x2e,0xfb, 0x01,0x8b, 0x15,0xfd, +0x01,0x8b, 0x27,0xfb, 0x01,0x6d, 0x01,0x8b, 0x80,0xe8,0xfd, 0x01,0x83, 0x01,0x41, 0x42,0xfd, +0x01,0xfc, 0x27,0xfb, 0x01,0x8b, 0x16,0xfd, 0x01,0x89, 0x2a,0xfb, 0x01,0x89, 0x16,0xfd, 0x01,0x8b, +0x28,0xfb, 0x01,0xfc, 0x80,0xcf,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, +0x02,0xfd, 0x01,0x2b, 0x01,0x19, 0x11,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x01,0x2b, +0x0d,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x03,0xfd, 0x01,0x55, 0x01,0x2b, 0x29,0xfd, 0x01,0x8b, +0x0b,0xfb, 0x07,0x00, 0x01,0x2b, 0x15,0xfb, 0x01,0x8b, 0x17,0xfd, 0x01,0x80, 0x26,0xfb, 0x01,0x80, +0x17,0xfd, 0x01,0x8b, 0x29,0xfb, 0x01,0x8b, 0x80,0xcf,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x41, +0x01,0x19, 0x01,0x2b, 0x02,0xfd, 0x01,0x19, 0x01,0x41, 0x11,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x19, 0x0d,0xfd, 0x01,0x41, 0x02,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x18,0xfd, +0x01,0x55, 0x01,0x6c, 0x0f,0xfd, 0x01,0x6b, 0x01,0x3f, 0x0a,0xfb, 0x03,0x22, 0x02,0x00, 0x02,0x22, +0x01,0x36, 0x0f,0xfb, 0x01,0x22, 0x01,0x59, 0x05,0xfb, 0x01,0x8b, 0x17,0xfd, 0x01,0x8b, 0x01,0x87, +0x22,0xfb, 0x01,0x87, 0x01,0x8b, 0x17,0xfd, 0x01,0x8b, 0x29,0xfb, 0x01,0x7b, 0x80,0xcf,0xfd, +0x01,0x41, 0x01,0x00, 0x19,0xfd, 0x01,0x41, 0x01,0x00, 0x11,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2e, +0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x18,0xfd, 0x01,0x00, 0x01,0x2b, 0x0f,0xfd, 0x01,0x2b, 0x01,0x00, +0x0d,0xfb, 0x01,0x00, 0x01,0x2b, 0x12,0xfb, 0x01,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x6e, 0x01,0x8b, +0x19,0xfd, 0x01,0x89, 0x1e,0xfb, 0x01,0x89, 0x19,0xfd, 0x01,0x8b, 0x01,0x6e, 0x29,0xfb, 0x01,0x8b, +0x80,0xcf,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x01,0x55, 0x03,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, 0x01,0x19, 0x06,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x03,0x00, +0x01,0x19, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, +0x03,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x04,0x00, 0x01,0x55, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x41, 0x03,0x00, 0x01,0x2b, 0x0b,0xfb, 0x01,0x00, +0x01,0x2b, 0x04,0xfb, 0x01,0x2b, 0x03,0x00, 0x01,0x03, 0x02,0xfb, 0x01,0x12, 0x03,0x00, 0x01,0x36, +0x01,0xfb, 0x04,0x00, 0x02,0xfb, 0x01,0x03, 0x03,0x00, 0x01,0x7a, 0x1b,0xfd, 0x01,0x8b, 0x18,0xfb, +0x01,0x8b, 0x1b,0xfd, 0x01,0x8b, 0x2a,0xfb, 0x01,0x7c, 0x80,0xd1,0xfd, 0x01,0x19, 0x02,0x00, +0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x01,0xfd, +0x01,0x07, 0x01,0x00, 0x06,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, +0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2e, 0x01,0x83, 0x01,0xfd, 0x02,0x00, 0x02,0xfd, +0x02,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x8b, 0x0c,0xfb, 0x01,0x00, 0x01,0x2b, +0x04,0xfb, 0x01,0x22, 0x01,0x47, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, +0x01,0xfb, 0x02,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x01,0x36, +0x01,0xfb, 0x02,0x00, 0x01,0x8b, 0x1c,0xfd, 0x01,0x8b, 0x01,0x89, 0x01,0x7c, 0x10,0xfb, 0x01,0x7c, +0x01,0x89, 0x01,0x8b, 0x1c,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x8b, 0x80,0xd2,0xfd, 0x01,0x6c, +0x02,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0x41, +0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, +0x01,0x07, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0x41, +0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x02,0x41, 0x01,0x00, 0x01,0x07, 0x01,0xfd, +0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, +0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x7d, 0x01,0xfb, 0x01,0x36, 0x04,0x22, 0x01,0x36, +0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x07,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x01,0x36, +0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x02,0x00, 0x02,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0xfb, +0x01,0x8b, 0x20,0xfd, 0x04,0x8b, 0x04,0x89, 0x04,0x8b, 0x20,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x7d, +0x80,0xd5,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x05,0x00, +0x01,0x19, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x04,0x00, +0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x8b, 0x01,0x70, +0x01,0x2b, 0x04,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, 0x04,0x00, +0x02,0xfb, 0x01,0x47, 0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x06,0x00, +0x02,0xfb, 0x01,0x8b, 0x4a,0xfd, 0x01,0x8b, 0x2b,0xfb, 0x01,0x70, 0x01,0x8b, 0x80,0xd6,0xfd, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x09,0xfd, +0x01,0x55, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x09,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x02,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x81, 0x0a,0xfb, 0x01,0x00, 0x01,0x2b, +0x03,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x04,0xfb, 0x01,0x47, +0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x02,0xfb, 0x02,0x00, 0x07,0xfb, 0x01,0x87, 0x48,0xfd, +0x01,0x87, 0x2c,0xfb, 0x01,0x81, 0x80,0xd2,0xfd, 0x01,0x55, 0x01,0x19, 0x01,0x6c, 0x01,0xfd, +0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x01,0x00, +0x01,0x2b, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0xfd, +0x01,0x6c, 0x01,0x55, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, 0x02,0x00, 0x02,0xfd, +0x02,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, +0x01,0x41, 0x02,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x6c, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x02,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0x6c, 0x02,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0x2b, 0x03,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x8b, 0x01,0x75, 0x09,0xfb, +0x01,0x00, 0x01,0x2b, 0x03,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x36, 0x01,0x59, 0x01,0x2b, 0x01,0x00, +0x01,0x59, 0x01,0xfb, 0x01,0x12, 0x02,0x59, 0x02,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, 0x01,0x12, 0x02,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0x7a, +0x01,0x8b, 0x44,0xfd, 0x01,0x8b, 0x01,0x7a, 0x2c,0xfb, 0x01,0x75, 0x01,0x8b, 0x80,0xd2,0xfd, +0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x6c, +0x04,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x55, 0x03,0x00, 0x01,0x07, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, +0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x05,0xfd, 0x01,0x41, 0x01,0x00, +0x04,0xfd, 0x02,0x00, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x03,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, +0x03,0x00, 0x01,0x2e, 0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x55, +0x02,0x00, 0x01,0x41, 0x01,0x83, 0x03,0x00, 0x01,0x2e, 0x02,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, +0x03,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x88, 0x09,0xfb, 0x01,0x00, 0x01,0x2b, +0x04,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x12, 0x02,0x00, 0x01,0xfb, 0x04,0x00, 0x01,0x59, 0x02,0xfb, +0x01,0x03, 0x02,0x00, 0x02,0xfb, 0x01,0x22, 0x04,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x2b, 0x02,0xfb, +0x01,0x82, 0x42,0xfd, 0x01,0x82, 0x2e,0xfb, 0x01,0x88, 0x81,0x38,0xfd, 0x01,0xfc, 0x2d,0xfb, +0x01,0x7c, 0x40,0xfd, 0x01,0x7c, 0x2e,0xfb, 0x01,0xfc, 0x81,0x39,0xfd, 0x01,0x8b, 0x01,0x72, +0x2e,0xfb, 0x01,0x85, 0x3c,0xfd, 0x01,0x85, 0x2f,0xfb, 0x01,0x72, 0x01,0x8b, 0x81,0x3a,0xfd, +0x01,0x85, 0x2f,0xfb, 0x01,0x6e, 0x01,0x8b, 0x38,0xfd, 0x01,0x8b, 0x01,0x6e, 0x30,0xfb, 0x01,0x85, +0x81,0x3c,0xfd, 0x01,0xfc, 0x31,0xfb, 0x01,0x8b, 0x34,0xfd, 0x01,0x8b, 0x32,0xfb, 0x01,0xfc, +0x81,0x3d,0xfd, 0x01,0x8b, 0x01,0x74, 0x31,0xfb, 0x01,0x73, 0x01,0x8b, 0x30,0xfd, 0x01,0x8b, +0x01,0x73, 0x32,0xfb, 0x01,0x74, 0x01,0x8b, 0x81,0x3e,0xfd, 0x01,0x88, 0x34,0xfb, 0x01,0x7c, +0x01,0x8b, 0x2a,0xfd, 0x01,0x8b, 0x01,0x7c, 0x35,0xfb, 0x01,0x88, 0x81,0x40,0xfd, 0x01,0x81, +0x35,0xfb, 0x01,0x7d, 0x01,0x8b, 0x26,0xfd, 0x01,0x8b, 0x01,0x7d, 0x36,0xfb, 0x01,0x81, 0x81,0x42,0xfd, +0x01,0x7b, 0x38,0xfb, 0x01,0x87, 0x20,0xfd, 0x01,0x87, 0x39,0xfb, 0x01,0x7b, 0x81,0x43,0xfd, +0x01,0x8b, 0x01,0x75, 0x3a,0xfb, 0x01,0x77, 0x01,0x80, 0x02,0x8b, 0x14,0xfd, 0x02,0x8b, 0x01,0x80, +0x01,0x77, 0x3b,0xfb, 0x01,0x75, 0x01,0x8b, 0x81,0x44,0xfd, 0x01,0x8b, 0x01,0x70, 0x3e,0xfb, +0x01,0x71, 0x01,0x7d, 0x01,0x84, 0x0c,0x8b, 0x01,0x84, 0x01,0x7d, 0x01,0x71, 0x3f,0xfb, 0x01,0x70, +0x01,0x8b, 0x81,0x46,0xfd, 0x01,0x86, 0x80,0x8f,0xfb, 0x01,0x86, 0x81,0x48,0xfd, +0x01,0x82, 0x80,0x8d,0xfb, 0x01,0x82, 0x81,0x4a,0xfd, 0x01,0x7f, 0x80,0x8b,0xfb, +0x01,0x7f, 0x81,0x4c,0xfd, 0x01,0x7d, 0x80,0x89,0xfb, 0x01,0x7d, 0x81,0x4e,0xfd, +0x01,0x7c, 0x80,0x87,0xfb, 0x01,0x7c, 0x81,0x4f,0xfd, 0x01,0x8b, 0x01,0x7b, 0x80,0x85,0xfb, +0x01,0x7b, 0x01,0x8b, 0x81,0x50,0xfd, 0x01,0x8b, 0x01,0x7b, 0x80,0x83,0xfb, 0x01,0x7b, +0x01,0x8b, 0x81,0x52,0xfd, 0x01,0x8b, 0x01,0x7c, 0x80,0x81,0xfb, 0x01,0x7c, 0x01,0x8b, +0x81,0x55,0xfd, 0x01,0x7d, 0x7f,0xfb, 0x01,0x7d, 0x81,0x58,0xfd, 0x01,0x7f, 0x7d,0xfb, +0x01,0x7f, 0x81,0x08,0xfd, 0x02,0x2b, 0x01,0x6c, 0x1f,0xfd, 0x01,0x07, 0x01,0x55, 0x0b,0xfd, +0x01,0x41, 0x01,0x55, 0x06,0xfd, 0x01,0x83, 0x0b,0x41, 0x01,0x83, 0x03,0xfd, 0x01,0x19, 0x01,0x55, +0x09,0xfd, 0x01,0x82, 0x02,0xfb, 0x01,0x22, 0x01,0x2b, 0x01,0x03, 0x01,0xfb, 0x01,0x47, 0x01,0x12, +0x0a,0xfb, 0x01,0x2b, 0x16,0xfb, 0x01,0x00, 0x01,0x03, 0x07,0xfb, 0x01,0x59, 0x0b,0x22, 0x01,0x59, +0x04,0xfb, 0x01,0x22, 0x01,0x59, 0x04,0xfb, 0x01,0x59, 0x01,0x2b, 0x05,0xfb, 0x02,0x22, 0x0a,0xfb, +0x02,0x47, 0x0e,0xfb, 0x01,0x03, 0x01,0x47, 0x0c,0xfb, 0x01,0x2b, 0x01,0x82, 0x3e,0xfd, 0x01,0x41, +0x02,0xfd, 0x01,0x2e, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x07, 0x01,0x55, 0x18,0xfd, 0x01,0x41, +0x01,0x55, 0x80,0x8e,0xfd, 0x01,0x83, 0x0c,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, +0x01,0x19, 0x01,0x6c, 0x01,0x2b, 0x1e,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x83, 0x0b,0xfd, 0x01,0x00, +0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x03,0xfd, 0x01,0x6c, 0x05,0x2b, 0x01,0x00, 0x05,0x2b, 0x01,0x6c, +0x03,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x04,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x03,0x00, 0x01,0x22, 0x02,0xfb, 0x01,0x22, 0x01,0x2b, 0x0a,0xfb, 0x01,0x00, 0x08,0xfb, 0x01,0x2b, +0x05,0x00, 0x01,0x36, 0x07,0xfb, 0x01,0x47, 0x02,0x00, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x36, +0x02,0xfb, 0x01,0x47, 0x03,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0x00, 0x03,0x2b, 0x01,0x47, 0x04,0xfb, +0x01,0x00, 0x04,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x12, 0x04,0xfb, 0x02,0x2b, 0x02,0xfb, +0x01,0x59, 0x01,0x22, 0x01,0x47, 0x05,0xfb, 0x01,0x2b, 0x01,0x12, 0x01,0xfb, 0x01,0x03, 0x01,0x2b, +0x01,0x22, 0x0a,0xfb, 0x01,0x00, 0x01,0x22, 0x0b,0xfb, 0x01,0x70, 0x01,0x00, 0x15,0xfd, 0x01,0x83, +0x01,0x41, 0x02,0xfd, 0x01,0x6c, 0x01,0x55, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x55, 0x08,0x41, 0x01,0x83, 0x14,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, +0x01,0x41, 0x04,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x83, 0x08,0xfd, 0x01,0x83, 0x0f,0xfd, 0x01,0x00, +0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x7e,0xfd, 0x09,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, +0x0a,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x02,0x2b, 0x01,0x6c, 0x1e,0xfd, 0x01,0x2b, +0x06,0x00, 0x03,0xfd, 0x01,0x55, 0x08,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, +0x04,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x03,0x2b, 0x01,0x00, +0x02,0x2b, 0x01,0xfb, 0x01,0x22, 0x01,0x2b, 0x04,0xfb, 0x01,0x03, 0x03,0x2b, 0x06,0x00, 0x01,0x2b, +0x04,0xfb, 0x01,0x36, 0x01,0x22, 0x01,0x36, 0x01,0xfb, 0x02,0x00, 0x0a,0xfb, 0x01,0x47, 0x01,0x00, +0x01,0x59, 0x01,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x47, 0x02,0x22, 0x01,0x00, 0x01,0x2b, 0x01,0x22, +0x01,0x00, 0x01,0x2b, 0x02,0x22, 0x01,0x59, 0x04,0xfb, 0x01,0x59, 0x01,0x00, 0x04,0xfb, 0x01,0x59, +0x01,0x2b, 0x01,0x03, 0x01,0x22, 0x01,0xfb, 0x01,0x22, 0x07,0x00, 0x01,0x2b, 0x01,0x47, 0x05,0xfb, +0x01,0x00, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x03, 0x04,0xfb, 0x01,0x36, +0x01,0x00, 0x03,0x2b, 0x05,0x00, 0x03,0xfb, 0x01,0x03, 0x03,0x2b, 0x06,0x00, 0x01,0x2b, 0x11,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x07, 0x01,0x55, 0x01,0x07, 0x01,0x55, +0x02,0xfd, 0x01,0x00, 0x07,0x2b, 0x01,0x00, 0x01,0x2e, 0x14,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x2e, +0x06,0xfd, 0x01,0x2b, 0x06,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x09,0xfd, 0x01,0x55, +0x08,0x00, 0x01,0x55, 0x80,0x84,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x83, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x1f,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x07, +0x01,0x2b, 0x06,0xfd, 0x01,0x00, 0x01,0x6c, 0x09,0xfd, 0x09,0x00, 0x05,0xfd, 0x01,0x2b, 0x01,0x41, +0x05,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x83, 0x02,0x41, 0x01,0x07, 0x01,0x2b, 0x01,0x2e, +0x01,0x22, 0x01,0x2b, 0x04,0x00, 0x02,0xfb, 0x05,0x22, 0x01,0x47, 0x01,0x00, 0x03,0xfb, 0x01,0x59, +0x07,0xfb, 0x01,0x00, 0x01,0x2b, 0x09,0xfb, 0x01,0x00, 0x03,0xfb, 0x02,0x2b, 0x04,0xfb, 0x01,0x2b, +0x01,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x02,0xfb, +0x01,0x59, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x03, 0x01,0xfb, 0x01,0x59, 0x01,0x00, +0x01,0x22, 0x06,0xfb, 0x01,0x2b, 0x01,0x22, 0x02,0xfb, 0x01,0x2b, 0x07,0xfb, 0x01,0x00, 0x01,0x2b, +0x01,0x03, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x05,0xfb, 0x01,0x36, 0x03,0x22, 0x01,0x00, 0x01,0x2b, +0x01,0x47, 0x05,0xfb, 0x03,0x22, 0x01,0x2e, 0x01,0x3a, 0x01,0x6c, 0x01,0x00, 0x03,0xfd, 0x01,0x83, +0x11,0xfd, 0x01,0x2e, 0x01,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0x07, 0x01,0x19, 0x03,0xfd, +0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, 0x01,0x83, 0x0f,0xfd, 0x01,0x2b, 0x09,0x00, 0x01,0x2b, +0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x07, 0x01,0x2b, 0x05,0xfd, 0x01,0x2b, +0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x05,0xfd, 0x01,0x00, 0x01,0x6c, 0x80,0x89,0xfd, +0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x03,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x83, 0x01,0x2e, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x1f,0xfd, +0x01,0x55, 0x01,0x00, 0x02,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x02,0x2b, 0x01,0x07, +0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x02,0xfd, 0x01,0x6c, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x41, 0x04,0x00, 0x01,0x2b, +0x02,0xfd, 0x06,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x2b, 0x01,0xfb, 0x01,0x00, 0x05,0xfb, 0x01,0x03, +0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x09,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x03, 0x08,0xfb, 0x01,0x2b, +0x01,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x47, 0x01,0x00, 0x01,0x47, 0x01,0x22, 0x03,0xfb, 0x01,0x2b, +0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, +0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x01,0x2b, +0x01,0x00, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x06,0xfb, 0x04,0x2b, 0x01,0x00, 0x07,0xfb, 0x02,0x00, +0x04,0xfb, 0x01,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x47, 0x03,0xfb, 0x01,0x00, 0x03,0x22, 0x01,0x03, +0x05,0xfb, 0x01,0x81, 0x01,0x15, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x15,0xfd, 0x01,0x2b, 0x01,0x07, +0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, +0x15,0xfd, 0x01,0x00, 0x08,0xfd, 0x01,0x55, 0x01,0x00, 0x02,0x55, 0x03,0xfd, 0x01,0x00, 0x01,0x55, +0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x02,0x2b, 0x01,0x07, +0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x80,0x84,0xfd, 0x01,0x41, 0x01,0x00, 0x0b,0xfd, +0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x2b, 0x01,0x2e, 0x09,0xfd, 0x01,0x2b, +0x05,0x00, 0x05,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0x55, +0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, +0x01,0x19, 0x01,0x00, 0x01,0x2e, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x09,0x00, 0x02,0xfd, +0x01,0x83, 0x02,0x41, 0x01,0x2b, 0x01,0x07, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x06,0xfd, 0x01,0x00, +0x02,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x88, 0x02,0x2b, 0x01,0xfb, 0x01,0x00, 0x04,0xfb, +0x01,0x22, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x08,0xfb, 0x01,0x59, 0x05,0x00, 0x01,0x03, +0x04,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x22, 0x01,0x59, +0x01,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x2b, 0x09,0x00, 0x01,0x22, 0x04,0xfb, 0x01,0x00, 0x01,0x47, +0x02,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x00, 0x01,0x03, 0x04,0xfb, 0x01,0x2b, 0x02,0x00, 0x01,0x22, +0x03,0x00, 0x01,0x03, 0x05,0xfb, 0x01,0x00, 0x01,0x12, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x04,0xfb, +0x06,0x00, 0x02,0x2b, 0x01,0x22, 0x03,0xfb, 0x01,0x74, 0x01,0x88, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, +0x01,0x41, 0x01,0x00, 0x15,0xfd, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x07, 0x05,0xfd, +0x01,0x2b, 0x01,0x55, 0x05,0xfd, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, +0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x83, 0x01,0x00, 0x04,0xfd, +0x01,0x41, 0x01,0x00, 0x02,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0c,0xfd, +0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, 0x01,0x19, 0x01,0x00, 0x01,0x2e, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x80,0x83,0xfd, 0x01,0x41, 0x01,0x00, 0x0a,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x10,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, +0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x6c, 0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, +0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, +0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, +0x01,0x2e, 0x01,0x2b, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x83, 0x02,0xfd, 0x01,0x00, +0x01,0x41, 0x01,0x07, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0xfd, 0x01,0x2b, 0x01,0x2f, 0x01,0xfb, +0x01,0x00, 0x04,0xfb, 0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x36, 0x06,0xfb, +0x01,0x36, 0x02,0x00, 0x01,0x03, 0x01,0x59, 0x01,0xfb, 0x01,0x47, 0x02,0x00, 0x03,0xfb, 0x02,0x2b, +0x01,0xfb, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x02,0xfb, 0x01,0x03, 0x01,0x2b, 0x05,0xfb, 0x01,0x47, +0x01,0x00, 0x01,0x59, 0x08,0xfb, 0x01,0x59, 0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x47, +0x01,0x00, 0x03,0xfb, 0x01,0x00, 0x02,0x2b, 0x01,0x22, 0x01,0x59, 0x01,0x00, 0x01,0xfb, 0x01,0x59, +0x01,0x00, 0x01,0x36, 0x04,0xfb, 0x01,0x00, 0x01,0x59, 0x04,0xfb, 0x01,0x00, 0x01,0x22, 0x08,0xfb, +0x01,0x2b, 0x01,0x22, 0x05,0xfb, 0x01,0xfc, 0x01,0x8b, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x55, 0x13,0xfd, 0x01,0x6c, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x01,0x00, +0x0c,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x19, 0x09,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x55, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x04,0xfd, 0x01,0x6c, 0x03,0xfd, +0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x0b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x41, +0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, 0x01,0x55, 0x80,0x85,0xfd, +0x01,0x41, 0x01,0x00, 0x09,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x05,0xfd, 0x01,0x41, 0x01,0x00, +0x10,0xfd, 0x01,0x2e, 0x01,0x2b, 0x18,0xfd, 0x01,0x00, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x01,0x41, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x00, +0x03,0x41, 0x01,0x00, 0x03,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x83, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, +0x01,0x2b, 0x02,0x00, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, +0x04,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x69, 0x01,0x85, 0x01,0x00, 0x04,0xfb, 0x01,0x22, +0x01,0x00, 0x01,0x22, 0x02,0x00, 0x01,0x22, 0x05,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x22, 0x06,0xfb, +0x01,0x00, 0x01,0x36, 0x02,0xfb, 0x01,0x00, 0x01,0x47, 0x01,0xfb, 0x01,0x00, 0x01,0x03, 0x01,0x00, +0x03,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0xfb, 0x01,0x03, 0x0b,0x00, 0x01,0x36, 0x02,0xfb, 0x02,0x2b, +0x03,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, +0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x02,0x2b, 0x04,0xfb, 0x01,0x00, 0x04,0xfb, +0x01,0x59, 0x01,0x00, 0x06,0xfb, 0x01,0x36, 0x02,0x2b, 0x01,0x00, 0x01,0x03, 0x03,0xfb, 0x01,0x72, +0x01,0x85, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x13,0xfd, +0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x07, 0x01,0x00, 0x12,0xfd, 0x01,0x2b, +0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x2b, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, +0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x07, 0x01,0x00, 0x80,0x87,0xfd, 0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x6c, +0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x06,0xfd, 0x01,0x41, 0x01,0x00, 0x10,0xfd, 0x01,0x2b, 0x01,0x07, +0x17,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x05,0xfd, +0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x07,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x03,0x2b, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x03,0x2b, 0x01,0x00, +0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0xfc, 0x04,0xfb, 0x01,0x22, +0x01,0x2b, 0x01,0x22, 0x01,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x47, 0x01,0xfb, 0x03,0x00, 0x01,0x36, +0x02,0xfb, 0x01,0x00, 0x01,0x36, 0x01,0xfb, 0x01,0x22, 0x01,0x00, 0x02,0xfb, 0x02,0x00, 0x03,0xfb, +0x01,0x00, 0x01,0xfb, 0x01,0x00, 0x01,0x12, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x12, 0x02,0xfb, +0x01,0x36, 0x01,0x00, 0x06,0xfb, 0x01,0x00, 0x01,0x47, 0x02,0xfb, 0x01,0x22, 0x01,0x00, 0x06,0xfb, +0x01,0x2b, 0x01,0x22, 0x01,0xfb, 0x01,0x2b, 0x02,0x00, 0x03,0xfb, 0x01,0x00, 0x01,0x03, 0x09,0xfb, +0x01,0x2b, 0x01,0x00, 0x05,0xfb, 0x02,0x00, 0x01,0x12, 0x01,0x22, 0x03,0x00, 0x01,0x22, 0x01,0xfc, +0x01,0x8b, 0x05,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x2b, +0x02,0x00, 0x01,0x83, 0x09,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x05,0xfd, 0x01,0x41, 0x01,0x00, +0x01,0x83, 0x08,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x6c, 0x11,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, +0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x02,0x2b, 0x0a,0xfd, 0x01,0x6c, +0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, +0x80,0x80,0xfd, 0x09,0x00, 0x03,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, +0x01,0x83, 0x07,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x2e, +0x09,0xfd, 0x01,0x00, 0x01,0x41, 0x15,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x41, +0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x03,0x00, 0x01,0x2b, 0x02,0xfd, 0x02,0x41, +0x01,0x07, 0x01,0x2b, 0x01,0x41, 0x01,0x2e, 0x01,0x19, 0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, +0x01,0xfd, 0x01,0x88, 0x01,0x75, 0x04,0xfb, 0x01,0x00, 0x01,0x2b, 0x08,0xfb, 0x01,0x22, 0x01,0x2b, +0x01,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0xfb, 0x01,0x59, 0x01,0x36, +0x01,0x47, 0x02,0x00, 0x04,0xfb, 0x01,0x00, 0x01,0xfb, 0x01,0x22, 0x01,0x36, 0x02,0xfb, 0x01,0x36, +0x01,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x05,0xfb, 0x01,0x03, 0x01,0x00, 0x02,0x59, +0x01,0xfb, 0x02,0x2b, 0x06,0xfb, 0x02,0x2b, 0x01,0x59, 0x02,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, +0x09,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x59, 0x05,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x2b, 0x01,0x22, +0x01,0x3c, 0x02,0x00, 0x01,0x83, 0x07,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, +0x01,0x19, 0x01,0x07, 0x08,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2e, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x83, 0x05,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x11,0xfd, 0x01,0x83, 0x01,0x00, +0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, 0x01,0x83, +0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x80,0x87,0xfd, 0x01,0x55, 0x01,0x2b, +0x04,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x0b,0xfd, 0x01,0x2e, 0x05,0x00, 0x01,0x2b, 0x01,0x19, +0x04,0xfd, 0x08,0x00, 0x01,0x2b, 0x11,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x0b,0xfd, +0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0x00, 0x04,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, +0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x07, 0x06,0x41, 0x01,0x2e, 0x01,0x6c, +0x01,0x2b, 0x04,0x00, 0x01,0x07, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x02,0xfd, +0x01,0x8b, 0x01,0x81, 0x01,0x70, 0x01,0x12, 0x01,0x00, 0x01,0x2b, 0x0a,0xfb, 0x02,0x00, 0x01,0x2b, +0x03,0x00, 0x01,0x59, 0x02,0xfb, 0x01,0x47, 0x01,0x2b, 0x01,0x00, 0x01,0x22, 0x01,0x00, 0x03,0xfb, +0x01,0x22, 0x01,0x00, 0x05,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x02,0x00, +0x01,0x22, 0x02,0xfb, 0x01,0x59, 0x01,0x00, 0x01,0x47, 0x01,0x36, 0x03,0x00, 0x01,0x47, 0x06,0xfb, +0x01,0x59, 0x02,0x00, 0x01,0x03, 0x01,0xfb, 0x01,0x12, 0x02,0x00, 0x01,0x2b, 0x01,0x59, 0x06,0xfb, +0x01,0x22, 0x01,0x2b, 0x02,0x00, 0x01,0x59, 0x06,0xfb, 0x01,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, +0x01,0x6c, 0x01,0x8b, 0x01,0xfd, 0x01,0x2b, 0x01,0x83, 0x05,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x2b, +0x06,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x01,0x2b, 0x01,0x19, 0x08,0xfd, 0x01,0x83, 0x01,0x07, +0x08,0xfd, 0x01,0x83, 0x01,0x2b, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x01,0x83, 0x13,0xfd, +0x01,0x41, 0x04,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x03,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, +0x01,0x2e, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, 0x0c,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, +0x02,0x00, 0x80,0xbe,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x83, 0x0d,0xfd, 0x01,0x83, 0x03,0x41, +0x01,0x6c, 0x05,0xfd, 0x01,0x2b, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, +0x01,0x55, 0x02,0xfd, 0x01,0x83, 0x01,0x41, 0x06,0x2b, 0x01,0x41, 0x01,0x83, 0x01,0x41, 0x04,0xfd, +0x01,0x2b, 0x01,0x2e, 0x01,0x41, 0x02,0x00, 0x01,0x6c, 0x04,0xfd, 0x01,0x8b, 0x01,0x01, 0x01,0x22, +0x0c,0xfb, 0x01,0x36, 0x01,0x22, 0x01,0x36, 0x05,0xfb, 0x01,0x59, 0x01,0x2b, 0x01,0x59, 0x01,0xfb, +0x01,0x2b, 0x04,0x00, 0x01,0x36, 0x03,0xfb, 0x01,0x22, 0x02,0x00, 0x01,0x2b, 0x01,0x12, 0x04,0xfb, +0x01,0x22, 0x02,0x00, 0x02,0xfb, 0x01,0x36, 0x03,0xfb, 0x01,0x22, 0x01,0x47, 0x0c,0xfb, 0x01,0x59, +0x01,0x22, 0x09,0xfb, 0x01,0x47, 0x01,0x2b, 0x01,0x47, 0x09,0xfb, 0x01,0x59, 0x01,0x22, 0x01,0x30, +0x01,0x8b, 0x0a,0xfd, 0x01,0x07, 0x01,0x41, 0x08,0xfd, 0x01,0x2e, 0x01,0x00, 0x01,0x2b, 0x1b,0xfd, +0x01,0x83, 0x1a,0xfd, 0x01,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x83, 0x1a,0xfd, 0x01,0x83, +0x03,0x41, 0x01,0x6c, 0x81,0x03,0xfd, 0x01,0x8b, 0x01,0x7c, 0x57,0xfb, 0x01,0x7c, 0x01,0x8b, +0x81,0x7f,0xfd, 0x01,0x8b, 0x01,0x7b, 0x53,0xfb, 0x01,0x7b, 0x01,0x8b, 0x81,0x83,0xfd, +0x01,0x8b, 0x01,0xfc, 0x01,0x6d, 0x4d,0xfb, 0x01,0x6d, 0x01,0xfc, 0x01,0x8b, 0x81,0x87,0xfd, +0x01,0x8b, 0x01,0x80, 0x01,0x72, 0x49,0xfb, 0x01,0x72, 0x01,0x80, 0x01,0x8b, 0x80,0xfd,0xfd, +0x01,0x19, 0x01,0xfd, 0x01,0x19, 0x02,0x55, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x83, 0x05,0xfd, +0x01,0x83, 0x01,0x19, 0x01,0x00, 0x03,0xfd, 0x01,0x2b, 0x05,0xfd, 0x01,0x55, 0x03,0x41, 0x01,0x55, +0x01,0xfd, 0x04,0x41, 0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x02,0xfd, 0x06,0x41, 0x01,0x55, 0x04,0xfd, +0x01,0x41, 0x01,0x83, 0x0a,0xfd, 0x01,0x41, 0x01,0x19, 0x04,0xfd, 0x01,0x2b, 0x06,0xfd, 0x01,0x6c, +0x05,0xfd, 0x01,0x6c, 0x01,0x55, 0x14,0xfd, 0x01,0x19, 0x01,0x55, 0x22,0xfd, 0x01,0x86, 0x01,0x79, +0x1b,0xfb, 0x01,0x22, 0x0e,0xfb, 0x01,0x22, 0x02,0xfb, 0x01,0x12, 0x01,0x22, 0x01,0x00, 0x06,0xfb, +0x01,0x2b, 0x01,0x36, 0x0d,0xfb, 0x01,0x79, 0x01,0x86, 0x09,0xfd, 0x01,0x41, 0x01,0x55, 0x08,0xfd, +0x01,0x2b, 0x02,0xfd, 0x06,0x41, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x83, 0x1c,0xfd, 0x01,0x83, +0x01,0x55, 0x08,0xfd, 0x01,0x19, 0x04,0xfd, 0x01,0x6c, 0x01,0x2b, 0x08,0xfd, 0x01,0x07, 0x80,0xa7,0xfd, +0x01,0x19, 0x02,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x83, +0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, +0x01,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, +0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x2b, 0x02,0x6c, +0x03,0x2b, 0x01,0x00, 0x03,0x2b, 0x03,0xfd, 0x01,0x2b, 0x01,0x19, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, +0x06,0xfd, 0x01,0x2b, 0x05,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x01,0x41, 0x17,0xfd, 0x01,0x00, +0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x8b, 0x01,0x00, 0x01,0x75, 0x18,0xfb, 0x01,0x00, +0x0e,0xfb, 0x01,0x00, 0x02,0xfb, 0x01,0x36, 0x01,0x00, 0x01,0x2b, 0x01,0x22, 0x04,0xfb, 0x01,0x59, +0x01,0x00, 0x01,0x59, 0x08,0xfb, 0x01,0x59, 0x01,0xfb, 0x01,0x75, 0x01,0x81, 0x01,0x8b, 0x0b,0xfd, +0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x83, 0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, +0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x41, 0x13,0xfd, 0x01,0x6c, 0x07,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x55, 0x01,0x00, +0x01,0x2b, 0x01,0x07, 0x07,0xfd, 0x01,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x83, 0x01,0x55, 0x80,0x9d,0xfd, +0x01,0x83, 0x01,0x41, 0x04,0x2b, 0x01,0x55, 0x01,0x2e, 0x01,0x00, 0x03,0x41, 0x01,0x6c, 0x02,0xfd, +0x01,0x41, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x03,0xfd, +0x05,0x00, 0x01,0xfd, 0x05,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x6c, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x08,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x2b, 0x03,0x41, 0x01,0x00, 0x03,0x41, 0x03,0xfd, +0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x55, +0x01,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x13,0xfd, 0x01,0x00, 0x04,0xfd, +0x01,0x41, 0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x8b, 0x01,0x80, 0x01,0x75, 0x15,0xfb, +0x01,0x00, 0x0e,0xfb, 0x01,0x00, 0x03,0xfb, 0x01,0x12, 0x06,0xfb, 0x01,0x2b, 0x06,0x00, 0x03,0xfb, +0x01,0x75, 0x01,0x00, 0x01,0x2b, 0x01,0x7f, 0x09,0xfd, 0x01,0x55, 0x08,0x00, 0x01,0x55, 0x02,0xfd, +0x01,0x6c, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x6c, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x09,0xfd, 0x01,0x19, 0x01,0x2b, 0x08,0x00, 0x01,0x19, +0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x19, +0x02,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0x6c, 0x07,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0x2b, +0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x55, 0x80,0x96,0xfd, +0x01,0x6c, 0x02,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x19, 0x03,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, +0x01,0x6c, 0x02,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, +0x01,0x2b, 0x01,0x55, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x02,0x41, +0x01,0x00, 0x02,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0a,0xfd, 0x01,0x19, 0x01,0x2b, 0x01,0x00, +0x01,0x83, 0x01,0x19, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, +0x01,0x6c, 0x03,0x41, 0x01,0x07, 0x01,0x00, 0x02,0x2b, 0x07,0xfd, 0x02,0x2b, 0x06,0xfd, 0x01,0x41, +0x02,0x2b, 0x03,0x00, 0x01,0x41, 0x01,0xfd, 0x02,0x2b, 0x12,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, +0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0xf7, 0x01,0x2b, 0x01,0x00, +0x01,0x09, 0x01,0x6f, 0x03,0xfb, 0x01,0x59, 0x01,0xfb, 0x01,0x2b, 0x01,0x59, 0x02,0xfb, 0x01,0x36, +0x01,0x47, 0x06,0xfb, 0x01,0x00, 0x09,0xfb, 0x01,0x2b, 0x09,0x00, 0x01,0x2b, 0x04,0xfb, 0x01,0x22, +0x01,0x00, 0x01,0x59, 0x03,0xfb, 0x01,0x2b, 0x01,0x00, 0x01,0x7a, 0x01,0x84, 0x01,0x8b, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x2e, 0x01,0x19, 0x05,0xfd, 0x01,0x00, 0x01,0x6c, +0x07,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x02,0x41, 0x01,0x00, +0x02,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0a,0xfd, 0x01,0x2e, 0x02,0x41, 0x01,0x83, 0x02,0xfd, +0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x19, 0x07,0xfd, 0x01,0x2b, +0x03,0x00, 0x02,0x2b, 0x08,0xfd, 0x06,0x00, 0x01,0x2b, 0x01,0x2e, 0x01,0x83, 0x03,0xfd, 0x01,0x41, +0x01,0x00, 0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x80,0x96,0xfd, 0x01,0x83, 0x01,0x00, +0x02,0x2b, 0x01,0x00, 0x01,0x55, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x01,0xfd, +0x01,0x83, 0x01,0x41, 0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, +0x01,0xfd, 0x01,0x41, 0x01,0x2b, 0x02,0xfd, 0x04,0x00, 0x01,0x2b, 0x01,0xfd, 0x05,0x00, 0x03,0xfd, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x0b,0xfd, 0x01,0x00, 0x01,0x07, 0x01,0x2e, 0x01,0x55, 0x05,0x41, 0x01,0x55, 0x03,0xfd, 0x01,0x00, +0x01,0xfd, 0x01,0x6c, 0x04,0x2b, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, +0x07,0xfd, 0x01,0x07, 0x01,0x19, 0x02,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x01,0xfd, +0x01,0x00, 0x01,0x41, 0x11,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x02,0x2b, +0x04,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0x41, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x8b, 0x01,0x82, +0x01,0x08, 0x01,0x00, 0x01,0xfb, 0x02,0x2b, 0x02,0xfb, 0x01,0x2b, 0x01,0x03, 0x06,0xfb, 0x01,0x00, +0x01,0x03, 0x01,0x59, 0x0c,0xfb, 0x01,0x00, 0x08,0xfb, 0x01,0x36, 0x01,0x00, 0x02,0x36, 0x01,0x70, +0x01,0x79, 0x01,0x82, 0x01,0x00, 0x01,0x50, 0x06,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x00, +0x01,0x19, 0x04,0xfd, 0x02,0x2b, 0x01,0x07, 0x01,0x2b, 0x01,0x83, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x02,0x2b, 0x01,0x00, 0x04,0xfd, 0x01,0x00, +0x0f,0xfd, 0x01,0x2b, 0x01,0x19, 0x08,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x41, 0x0a,0xfd, 0x01,0x2b, +0x01,0x41, 0x01,0xfd, 0x01,0x55, 0x01,0x41, 0x03,0x2b, 0x01,0x6c, 0x08,0xfd, 0x01,0x19, 0x01,0x2b, +0x05,0xfd, 0x01,0x6c, 0x01,0x00, 0x06,0xfd, 0x02,0x2b, 0x80,0x95,0xfd, 0x01,0x55, 0x01,0x00, +0x01,0x55, 0x01,0x19, 0x02,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x07, 0x01,0xfd, 0x01,0x00, 0x01,0x83, +0x03,0xfd, 0x02,0x00, 0x01,0x6c, 0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x00, 0x02,0xfd, 0x05,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, +0x01,0x00, 0x01,0x83, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, +0x01,0x00, 0x0a,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x41, 0x01,0x00, 0x01,0x2b, 0x06,0x00, 0x03,0xfd, +0x01,0x00, 0x05,0xfd, 0x01,0x41, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x02,0x00, 0x01,0x2b, 0x02,0x00, +0x01,0x19, 0x06,0xfd, 0x01,0x2b, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x01,0xfd, 0x01,0x41, +0x01,0x00, 0x11,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x41, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, 0x01,0x41, +0x01,0x83, 0x01,0x00, 0x04,0xfd, 0x02,0x2b, 0x05,0xfd, 0x01,0x00, 0x01,0x79, 0x01,0x71, 0x01,0x00, +0x01,0x7a, 0x01,0x73, 0x01,0x00, 0x01,0x47, 0x06,0xfb, 0x03,0x00, 0x01,0x2b, 0x01,0x47, 0x07,0xfb, +0x01,0x00, 0x01,0x59, 0x01,0xfb, 0x01,0x00, 0x01,0xfb, 0x01,0x59, 0x01,0x00, 0x04,0xfb, 0x01,0x28, +0x01,0x00, 0x01,0x43, 0x01,0x49, 0x01,0x00, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x00, 0x0c,0xfd, +0x01,0x2e, 0x01,0x00, 0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x2e, 0x01,0x19, 0x01,0x00, 0x01,0x2e, +0x01,0x2b, 0x01,0x00, 0x01,0x41, 0x03,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0x55, 0x01,0x00, 0x02,0xfd, +0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x0e,0xfd, 0x02,0x2b, 0x08,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x83, 0x0b,0xfd, 0x01,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x03,0x41, +0x01,0x83, 0x09,0xfd, 0x01,0x00, 0x01,0x55, 0x05,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x83, 0x01,0x00, +0x01,0x83, 0x80,0x95,0xfd, 0x02,0x83, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x83, 0x01,0x00, +0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x02,0x2b, 0x02,0x00, 0x01,0x83, 0x01,0xfd, 0x01,0x00, +0x01,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x01,0x55, +0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, 0x01,0x55, 0x01,0x83, 0x01,0x55, +0x01,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x03,0xfd, 0x01,0x41, 0x01,0x00, +0x05,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x19, 0x01,0x83, 0x01,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, +0x05,0xfd, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x00, 0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x55, +0x10,0xfd, 0x01,0x2b, 0x04,0xfd, 0x01,0x19, 0x01,0x2b, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x83, +0x02,0x2b, 0x06,0xfd, 0x01,0x2b, 0x01,0x19, 0x01,0xfd, 0x01,0x41, 0x01,0x83, 0x01,0x41, 0x01,0x00, +0x02,0x8b, 0x01,0x87, 0x01,0x82, 0x01,0x7d, 0x01,0x7a, 0x01,0x77, 0x01,0x00, 0x01,0x71, 0x01,0x5c, +0x01,0x2b, 0x01,0x00, 0x01,0x2b, 0x01,0x59, 0x03,0xfb, 0x01,0x6d, 0x01,0x38, 0x01,0x00, 0x01,0x71, +0x01,0x74, 0x01,0x00, 0x01,0x7a, 0x01,0x7d, 0x01,0x00, 0x01,0x35, 0x02,0x8b, 0x02,0xfd, 0x01,0x6c, +0x03,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x55, 0x0b,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x55, 0x03,0xfd, +0x01,0x41, 0x01,0x00, 0x01,0x2e, 0x01,0xfd, 0x01,0x83, 0x01,0x07, 0x01,0x00, 0x01,0x2b, 0x01,0x55, +0x04,0xfd, 0x01,0x55, 0x02,0x00, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x04,0xfd, 0x01,0x00, 0x06,0xfd, 0x01,0x2b, 0x01,0x6c, 0x06,0xfd, 0x01,0x00, 0x01,0x6c, 0x08,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x19, 0x0a,0xfd, 0x01,0x6c, 0x01,0x00, 0x0c,0xfd, 0x01,0x6c, 0x01,0x2b, +0x03,0x00, 0x01,0x2b, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x83, 0x04,0xfd, 0x01,0x2b, +0x01,0x07, 0x80,0x94,0xfd, 0x01,0x55, 0x06,0x00, 0x01,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x2e, 0x01,0x2b, 0x01,0x6c, 0x01,0xfd, 0x01,0x41, 0x02,0x00, 0x01,0x41, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x01,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, 0x07,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, +0x01,0x83, 0x01,0x00, 0x03,0xfd, 0x01,0x07, 0x01,0x41, 0x01,0x2b, 0x01,0x07, 0x01,0x19, 0x01,0xfd, +0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x03,0xfd, 0x01,0x00, 0x01,0x19, 0x01,0x2b, +0x03,0xfd, 0x01,0x19, 0x01,0x2b, 0x04,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x41, 0x06,0xfd, 0x01,0x00, +0x01,0x55, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x2e, +0x01,0x6c, 0x15,0xfd, 0x01,0x2b, 0x01,0x07, 0x07,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x01,0x2b, +0x0c,0xfd, 0x01,0x00, 0x01,0x41, 0x07,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, +0x04,0xfd, 0x01,0x2b, 0x01,0x07, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x2e, 0x01,0x2b, 0x09,0xfd, +0x01,0x00, 0x01,0x2b, 0x0b,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x55, 0x01,0x41, +0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x07, 0x01,0x00, 0x05,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0x00, +0x02,0xfd, 0x07,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x07,0xfd, 0x01,0x00, +0x0a,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x09,0xfd, 0x01,0x07, 0x01,0x2b, 0x02,0xfd, 0x01,0x2b, +0x01,0x83, 0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2e, 0x02,0xfd, 0x01,0x83, 0x01,0x2e, 0x01,0x00, +0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x07, 0x01,0x19, 0x03,0xfd, 0x01,0x41, +0x01,0x00, 0x80,0x95,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0xfd, 0x01,0x19, 0x01,0x07, 0x02,0xfd, +0x01,0x2e, 0x01,0x00, 0x01,0x83, 0x02,0xfd, 0x01,0x07, 0x01,0x2e, 0x01,0x41, 0x01,0x2b, 0x04,0xfd, +0x01,0x83, 0x01,0x00, 0x01,0x55, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0x41, 0x01,0x00, +0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, +0x01,0x19, 0x04,0xfd, 0x01,0x00, 0x01,0x83, 0x04,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0x41, +0x02,0x2b, 0x02,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x00, 0x05,0xfd, 0x02,0x00, 0x01,0x83, 0x03,0xfd, +0x01,0x2b, 0x01,0x19, 0x05,0xfd, 0x01,0x6c, 0x01,0xfd, 0x03,0x00, 0x01,0x55, 0x02,0xfd, 0x01,0x00, +0x01,0x55, 0x03,0xfd, 0x01,0x2b, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x01,0x2e, +0x12,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x83, 0x07,0xfd, 0x01,0x00, 0x0f,0xfd, 0x01,0x07, 0x01,0x2b, +0x08,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x83, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x02,0xfd, +0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x02,0x2b, 0x0a,0xfd, 0x01,0x6c, 0x01,0x2b, +0x01,0x00, 0x01,0x55, 0x07,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x00, 0x07,0xfd, +0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x02,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x19, 0x04,0xfd, 0x01,0x00, +0x01,0x83, 0x04,0xfd, 0x02,0x2b, 0x07,0xfd, 0x01,0x00, 0x01,0x07, 0x0b,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x83, 0x07,0xfd, 0x01,0x00, 0x01,0x55, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x08,0xfd, 0x01,0x00, +0x01,0x2e, 0x0b,0xfd, 0x01,0x07, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0x6c, 0x03,0xfd, 0x01,0x83, +0x01,0x00, 0x02,0xfd, 0x01,0x2b, 0x02,0x00, 0x01,0x83, 0x80,0x8f,0xfd, 0x01,0x6c, 0x01,0x2b, +0x02,0x00, 0x01,0x83, 0x02,0xfd, 0x02,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0xfd, 0x01,0x41, +0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x04,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, +0x03,0x2b, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, 0x07,0xfd, +0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, 0x03,0xfd, 0x01,0x00, +0x01,0x41, 0x02,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x2b, 0x01,0xfd, 0x01,0x00, 0x01,0xfd, 0x01,0x07, +0x01,0x41, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x19, 0x01,0x00, 0x07,0xfd, 0x01,0x41, +0x01,0x2b, 0x01,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0xfd, 0x01,0x07, 0x01,0x00, 0x03,0xfd, 0x01,0x6c, +0x01,0x00, 0x01,0xfd, 0x01,0x6c, 0x01,0xfd, 0x01,0x2e, 0x01,0x00, 0x05,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x19, 0x10,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x01,0x41, 0x03,0xfd, +0x01,0x55, 0x01,0x2e, 0x08,0xfd, 0x01,0x2b, 0x01,0x00, 0x09,0xfd, 0x01,0x00, 0x08,0xfd, 0x01,0x83, +0x01,0x00, 0x01,0x2e, 0x03,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x83, 0x04,0xfd, +0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x07,0xfd, 0x01,0x83, 0x01,0x41, 0x01,0x19, 0x02,0x00, 0x01,0x2b, +0x01,0x83, 0x08,0xfd, 0x01,0x41, 0x01,0x00, 0x02,0xfd, 0x01,0x55, 0x07,0xfd, 0x01,0x00, 0x05,0xfd, +0x01,0x00, 0x07,0xfd, 0x01,0x00, 0x01,0x07, 0x02,0xfd, 0x01,0x83, 0x01,0x2b, 0x01,0x00, 0x01,0x83, +0x07,0xfd, 0x01,0x55, 0x01,0x00, 0x01,0x2b, 0x01,0x6c, 0x0a,0xfd, 0x01,0x07, 0x01,0x00, 0x01,0x6c, +0x05,0xfd, 0x01,0x55, 0x01,0x00, 0x02,0xfd, 0x01,0x07, 0x01,0x00, 0x04,0xfd, 0x01,0x83, 0x03,0xfd, +0x01,0x2b, 0x01,0x07, 0x05,0xfd, 0x01,0x6c, 0x05,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x2b, 0x01,0x00, +0x05,0xfd, 0x01,0x83, 0x01,0xfd, 0x01,0x55, 0x01,0x2b, 0x01,0xfd, 0x01,0x19, 0x01,0x07, 0x80,0x90,0xfd, +0x01,0x19, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x01,0x6c, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x02,0x2b, +0x01,0x83, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x41, +0x05,0xfd, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x04,0x41, 0x02,0xfd, 0x01,0x00, 0x03,0xfd, 0x01,0x00, +0x05,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x55, +0x01,0x2b, 0x01,0x41, 0x01,0x2b, 0x01,0x83, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x01,0xfd, 0x01,0x00, +0x01,0xfd, 0x01,0x2b, 0x01,0x41, 0x02,0xfd, 0x01,0x2b, 0x01,0x07, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0x55, 0x08,0xfd, 0x01,0x00, 0x02,0x2b, 0x02,0x00, 0x01,0x2b, 0x01,0x83, 0x03,0xfd, 0x01,0x00, +0x01,0x19, 0x01,0xfd, 0x03,0x00, 0x01,0x2e, 0x06,0xfd, 0x01,0x83, 0x01,0x00, 0x01,0x19, 0x0d,0xfd, +0x01,0x2b, 0x02,0x00, 0x01,0x6c, 0x09,0xfd, 0x01,0x2e, 0x06,0x00, 0x06,0xfd, 0x01,0x2e, 0x01,0x00, +0x01,0x2b, 0x0a,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x41, 0x04,0xfd, 0x01,0x00, 0x04,0xfd, 0x01,0x2e, +0x03,0xfd, 0x01,0x6c, 0x01,0x2b, 0x01,0x00, 0x01,0x2e, 0x09,0xfd, 0x01,0x00, 0x01,0x2b, 0x01,0x2e, +0x0c,0xfd, 0x01,0x2b, 0x01,0x00, 0x03,0x2b, 0x02,0x00, 0x04,0xfd, 0x01,0x00, 0x05,0xfd, 0x01,0x00, +0x07,0xfd, 0x01,0x83, 0x04,0x00, 0x01,0x2b, 0x01,0x83, 0x09,0xfd, 0x01,0x6c, 0x01,0x2b, 0x02,0x00, +0x0a,0xfd, 0x01,0x19, 0x01,0x00, 0x01,0x55, 0x04,0xfd, 0x01,0x00, 0x01,0x19, 0x02,0xfd, 0x01,0x83, +0x01,0x2b, 0x04,0x00, 0x01,0x2b, 0x03,0xfd, 0x01,0x6c, 0x02,0x00, 0x03,0x2b, 0x02,0x00, 0x06,0xfd, +0x01,0x55, 0x01,0x2b, 0x01,0x83, 0x07,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x83, 0x01,0x2b, 0x01,0x19, +0x80,0x8e,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x00, 0x01,0x41, +0x03,0xfd, 0x02,0x2b, 0x02,0xfd, 0x01,0x41, 0x01,0x2b, 0x01,0x19, 0x01,0x00, 0x01,0x19, 0x01,0x83, +0x06,0xfd, 0x01,0x00, 0x07,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x19, 0x01,0xfd, 0x01,0x6c, 0x01,0x00, +0x01,0x2b, 0x05,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x83, 0x06,0xfd, 0x01,0x6c, +0x01,0x41, 0x01,0x2b, 0x01,0x41, 0x01,0x00, 0x01,0x6c, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x2b, +0x07,0xfd, 0x01,0x41, 0x01,0x83, 0x0a,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x55, 0x06,0xfd, 0x01,0x55, +0x03,0xfd, 0x02,0x55, 0x08,0xfd, 0x01,0x55, 0x01,0x2b, 0x0d,0xfd, 0x01,0x55, 0x01,0x6c, 0x18,0xfd, +0x01,0x55, 0x01,0x6c, 0x0b,0xfd, 0x01,0x2b, 0x0e,0xfd, 0x01,0x2b, 0x08,0xfd, 0x01,0x83, 0x01,0x2b, +0x01,0x83, 0x1a,0xfd, 0x01,0x83, 0x03,0x41, 0x01,0x6c, 0x03,0xfd, 0x01,0x6c, 0x01,0x00, 0x01,0x2b, +0x05,0xfd, 0x01,0x00, 0x09,0xfd, 0x01,0x55, 0x01,0x41, 0x01,0x83, 0x0e,0xfd, 0x01,0x83, 0x0b,0xfd, +0x01,0x19, 0x01,0x83, 0x04,0xfd, 0x01,0x6c, 0x01,0x83, 0x0e,0xfd, 0x01,0x83, 0x03,0x41, 0x13,0xfd, +0x01,0x2e, 0x01,0x00, 0x01,0x2b, 0x80,0x95,0xfd, 0x01,0x83, 0x3c,0xfd, 0x01,0x83, 0x01,0x6c, +0xb6,0xd5,0xfd +}; diff --git a/osfmk/console/panic_ui/generated_files/rendered_numbers.c b/osfmk/console/panic_ui/generated_files/rendered_numbers.c new file mode 100644 index 000000000..c2d571ef9 --- /dev/null +++ b/osfmk/console/panic_ui/generated_files/rendered_numbers.c @@ -0,0 +1,376 @@ + /* generated c file */ + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x82]; +} num_0 = { +/* w */ 9, +/* h */ 11, +/* pixel_data */ +0x09,0xfd, +0x02,0xfd, 0x01,0x81, 0x01,0x2b, 0x02,0x00, 0x01,0x26, 0x02,0xfd, +0x02,0xfd, 0x01,0x2b, 0x01,0x01, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x69, 0x01,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x03,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, +0x02,0xfd, 0x01,0x2b, 0x01,0x01, 0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x01,0x69, 0x01,0xfd, +0x02,0xfd, 0x01,0x81, 0x01,0x2b, 0x02,0x00, 0x01,0x26, 0x02,0xfd, +0x09,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x54]; +} num_1 = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x2b, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x01,0xfd, 0x01,0x26, 0x01,0x52, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0xf9, 0x02,0xfd, +0x01,0xfd, 0x01,0x2b, 0x04,0x00, 0x01,0xfd, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x5c]; +} num_2 = { +/* w */ 8, +/* h */ 11, +/* pixel_data */ +0x08,0xfd, +0x01,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x01, 0x02,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x81, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x12, 0x01,0xfd, +0x05,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, +0x04,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, +0x03,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x01, 0x03,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x04,0xfd, +0x01,0xfd, 0x01,0x01, 0x01,0x2b, 0x05,0xfd, +0x01,0xfd, 0x05,0x00, 0x01,0x01, 0x01,0xfd, +0x08,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x46]; +} num_3 = { +/* w */ 6, +/* h */ 11, +/* pixel_data */ +0x06,0xfd, +0x01,0xfd, 0x04,0x00, 0x01,0x52, +0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x02,0x00, +0x04,0xfd, 0x01,0x01, 0x01,0x00, +0x03,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x52, +0x01,0xfd, 0x01,0x52, 0x02,0x00, 0x01,0x2b, 0x01,0x81, +0x04,0xfd, 0x01,0x2b, 0x01,0x00, +0x04,0xfd, 0x01,0xf9, 0x01,0x00, +0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x01,0x2b, 0x01,0x00, +0x01,0xfd, 0x04,0x00, 0x01,0x52, +0x06,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x64]; +} num_4 = { +/* w */ 9, +/* h */ 11, +/* pixel_data */ +0x09,0xfd, +0x05,0xfd, 0x02,0x00, 0x02,0xfd, +0x04,0xfd, 0x01,0x01, 0x02,0x00, 0x02,0xfd, +0x03,0xfd, 0x01,0x52, 0x01,0x2b, 0x01,0x01, 0x01,0x00, 0x02,0xfd, +0x03,0xfd, 0x01,0x00, 0x01,0x81, 0x01,0x01, 0x01,0x00, 0x02,0xfd, +0x02,0xfd, 0x01,0x01, 0x01,0xf9, 0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, +0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x02,0xf9, 0x01,0x2b, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, +0x01,0xfd, 0x01,0x69, 0x03,0x01, 0x02,0x00, 0x01,0x01, 0x01,0xfd, +0x05,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, +0x05,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, +0x09,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x58]; +} num_5 = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x01,0xfd, 0x01,0xf9, 0x04,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x04,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x04,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x01, 0x01,0x69, 0x02,0xfd, +0x01,0xfd, 0x01,0x81, 0x01,0xf9, 0x01,0x12, 0x01,0x00, 0x01,0x12, 0x01,0xfd, +0x04,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0xfd, +0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, +0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x12, 0x03,0x00, 0x01,0x81, 0x01,0xfd, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x72]; +} num_6 = { +/* w */ 8, +/* h */ 11, +/* pixel_data */ +0x08,0xfd, +0x02,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x01, 0x01,0xfd, +0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x52, 0x02,0xfd, 0x01,0x81, 0x01,0xfd, +0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x05,0xfd, +0x01,0xfd, 0x01,0x00, 0x02,0x01, 0x01,0x00, 0x01,0x2b, 0x01,0x69, 0x01,0xfd, +0x01,0xfd, 0x02,0x00, 0x01,0xf9, 0x01,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, +0x01,0xfd, 0x02,0x2b, 0x03,0xfd, 0x01,0x00, 0x01,0xf9, +0x01,0xfd, 0x01,0x69, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0xfd, +0x02,0xfd, 0x01,0x26, 0x03,0x00, 0x01,0x52, 0x01,0xfd, +0x08,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x4a]; +} num_7 = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x01,0xfd, 0x06,0x00, +0x05,0xfd, 0x01,0x2b, 0x01,0x01, +0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, +0x04,0xfd, 0x01,0x00, 0x01,0xf9, 0x01,0xfd, +0x03,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0xfd, +0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x69, 0x02,0xfd, +0x02,0xfd, 0x01,0x01, 0x01,0x00, 0x03,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x03,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x81, 0x03,0xfd, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x76]; +} num_8 = { +/* w */ 8, +/* h */ 11, +/* pixel_data */ +0x08,0xfd, +0x02,0xfd, 0x01,0x52, 0x03,0x00, 0x01,0x12, 0x01,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x26, 0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0xfd, +0x02,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0x01, 0x01,0x00, 0x01,0x52, 0x01,0xfd, +0x02,0xfd, 0x01,0x12, 0x03,0x00, 0x01,0x52, 0x01,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x02,0x81, 0x02,0x00, 0x01,0x81, +0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0xf9, +0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x81, +0x02,0xfd, 0x01,0x01, 0x03,0x00, 0x01,0xf9, 0x01,0xfd, +0x08,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x66]; +} num_9 = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x02,0xfd, 0x01,0x01, 0x02,0x00, 0x01,0x2b, 0x01,0xfd, +0x01,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0xfd, 0x01,0x00, 0x01,0x12, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x26, 0x01,0x00, +0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, 0x01,0xf9, 0x01,0x00, +0x01,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0x69, 0x01,0x81, 0x02,0x00, +0x02,0xfd, 0x01,0x26, 0x01,0x00, 0x01,0x2b, 0x01,0x26, 0x01,0x00, +0x05,0xfd, 0x01,0x01, 0x01,0x2b, +0x01,0xfd, 0x01,0x81, 0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x52, +0x01,0xfd, 0x01,0x69, 0x03,0x00, 0x01,0x26, 0x01,0xfd, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x76]; +} num_a = { +/* w */ 10, +/* h */ 11, +/* pixel_data */ +0x0a,0xfd, +0x04,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0x81, 0x03,0xfd, +0x04,0xfd, 0x02,0x00, 0x01,0x12, 0x03,0xfd, +0x03,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x2b, 0x01,0x00, 0x03,0xfd, +0x03,0xfd, 0x01,0x2b, 0x01,0x12, 0x01,0x69, 0x01,0x00, 0x01,0x52, 0x02,0xfd, +0x02,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x81, 0x01,0xfd, 0x01,0x00, 0x01,0x2b, 0x02,0xfd, +0x02,0xfd, 0x01,0x12, 0x01,0x00, 0x02,0x01, 0x02,0x00, 0x02,0xfd, +0x02,0xfd, 0x01,0x00, 0x01,0x12, 0x03,0xf9, 0x01,0x00, 0x01,0x26, 0x01,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x2b, 0x01,0x01, 0x04,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0x81, +0x0a,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x68]; +} num_b = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x01,0xfd, 0x04,0x00, 0x01,0x2b, 0x01,0x81, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x81, 0x01,0x00, 0x01,0x01, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x00, 0x01,0x01, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0x81, +0x01,0xfd, 0x04,0x00, 0x01,0xf9, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x26, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x01, 0x01,0x00, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x02,0xfd, 0x01,0x00, 0x01,0x2b, +0x01,0xfd, 0x05,0x00, 0x01,0x81, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x5a]; +} num_c = { +/* w */ 9, +/* h */ 11, +/* pixel_data */ +0x09,0xfd, +0x03,0xfd, 0x01,0x01, 0x04,0x00, 0x01,0xf9, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x03,0xfd, 0x01,0x69, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x69, 0x05,0xfd, +0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, +0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, +0x01,0xfd, 0x01,0x01, 0x01,0x00, 0x06,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0x69, 0x05,0xfd, +0x02,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x02,0xfd, 0x01,0x81, 0x01,0x52, +0x03,0xfd, 0x01,0x01, 0x04,0x00, 0x01,0x52, +0x09,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x7c]; +} num_d = { +/* w */ 10, +/* h */ 11, +/* pixel_data */ +0x0a,0xfd, +0x01,0xfd, 0x05,0x00, 0x01,0x2b, 0x01,0x81, 0x02,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0x81, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x00, 0x01,0x01, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x01, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x2b, 0x01,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x04,0xfd, 0x01,0x00, 0x01,0x12, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0x01, 0x03,0xfd, 0x01,0x2b, 0x01,0x00, 0x02,0xfd, +0x01,0xfd, 0x05,0x00, 0x01,0x01, 0x01,0x81, 0x02,0xfd, +0x0a,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x48]; +} num_e = { +/* w */ 7, +/* h */ 11, +/* pixel_data */ +0x07,0xfd, +0x01,0xfd, 0x05,0x00, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x04,0x00, 0x01,0x12, 0x01,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x04,0xfd, +0x01,0xfd, 0x05,0x00, 0x01,0x52, +0x07,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x46]; +} num_f = { +/* w */ 6, +/* h */ 11, +/* pixel_data */ +0x06,0xfd, +0x01,0xfd, 0x05,0x00, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x04,0x00, 0x01,0x12, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x01,0xfd, 0x01,0x00, 0x01,0xf9, 0x03,0xfd, +0x06,0xfd +}; + + +static const struct { + unsigned int num_w; + unsigned int num_h; + unsigned char num_pixel_data[0x2e]; +} num_colon = { +/* w */ 4, +/* h */ 11, +/* pixel_data */ +0x04,0xfd, +0x04,0xfd, +0x04,0xfd, +0x01,0xfd, 0x01,0x69, 0x01,0x01, 0x01,0xfd, +0x01,0xfd, 0x01,0x52, 0x01,0x00, 0x01,0xfd, +0x04,0xfd, +0x04,0xfd, +0x04,0xfd, +0x01,0xfd, 0x01,0x81, 0x01,0xf9, 0x01,0xfd, +0x01,0xfd, 0x01,0xf9, 0x01,0x00, 0x01,0xfd, +0x04,0xfd +}; + + diff --git a/osfmk/console/panic_ui/genimage.c b/osfmk/console/panic_ui/genimage.c new file mode 100644 index 000000000..2c1ecd110 --- /dev/null +++ b/osfmk/console/panic_ui/genimage.c @@ -0,0 +1,1621 @@ +/* converts a QT RAW image file into the c structure that the + * kernel panic ui system expects. + * + * to build: cc -o genimage genimage.c +*/ + +#include +#include +#include +#include +#include +#include + +int EncodeImage( + unsigned char * data, + int pixels, + unsigned char * fileArr ); +int decode_rle( + unsigned char * dataPtr, + unsigned int * quantity, + unsigned int * depth, + unsigned char ** value ); +int findIndexNearMatch( + unsigned int color24 ); +unsigned char findIndexMatch( + unsigned int color24 ); +int convert24toGrey( + unsigned char * data, + unsigned int size ); +int convert8toGrey( + unsigned char * data, + unsigned int size ); +int convert8bitIndexto24( + unsigned char * data, + int height, + int width, + unsigned char ** dout ); +int convert8bitIndexto8( + unsigned char * data, + int height, + int width, + unsigned char ** dout ); +int convert24to8bitIndex( + unsigned char * data, + int height, + int width, + unsigned char ** dout ); +unsigned int * CreateCLUTarry( + unsigned char * raw_clut ); +unsigned int * ReplaceCLUT( + char * iname ); +void GenerateCLUT( + char * oname ); +void WriteQTRawFile( + FILE * ostream, + unsigned char * data, + int height, + int width, + int depth, + unsigned int size ); +void CreateRawQTFont( + void ); +void CreateRawQTCLUT( + int type ); + +#define offsetof(type, field) ((size_t)(&((type *)0)->field)) + +struct panicimage { + unsigned int pd_sum; + unsigned int pd_dataSize; + unsigned int pd_tag; + unsigned short pd_width; + unsigned short pd_height; + unsigned char pd_depth; + unsigned char pd_info_height; + unsigned char pd_info_color[2]; + unsigned char data[]; +}; + + + + +void +usage( int type ) { +printf( +"\n" +"Usage:\n" +"\tgenimage -i <.qtif> [operands ...]\n\n" +"\tThe following operands are available\n\n" +"\t-h\t\tDisplay full help information\n" +"\t-i \tUse file containing QuickTime uncompressed raw image as\n" +"\t\t\tthe panic dialog (8 or 24 bit)\n" +"\t-o \tWrite the output as a compressed WHD RAW image suitable\n" +"\t\t\tfor loading into the kernel\n" +"\t-c \tUse file containing 256 RGB values for 8-bit indexed \n" +"\t\t\tlookups, overrides built-in appleClut8\n" +"\t-fg \tForeground color of font used for panic information in\n" +"\t\t\t24-bits, default 0xFFFFFF (100%% white)\n" +"\t-bg \tBackground color of font used for panic information in\n" +"\t\t\t24-bits, default 0x222222 (13%% white, dark gray)\n" +"\t-n \tNumber of lines that have been reserved to display the\n" +"\t\t\tpanic information, must be at least 20\n" +"\n\tThese are useful options for testing\n" +"\t-io \tUse to override the default C source filename\n" +"\t-bw\t\tConvert the input image to shades of gray\n" +"\t-n24\t\tConvert an image from 8 bit to 24 bit mode before\n" +"\t\t\tprocessing\n" +"\t-n8\t\tDon't convert an image from 24 bit to 8 bit mode before \n" +"\t\t\tprocessing, default is to convert\n" +"\t-qt \t(requires -i) Write QuickTime uncompressed raw .gtif\n" +"\t\t\tfile containing the input image in 8-bit format\n" +"\t-r\t\tCreate a Quicktime uncompressed image of the 8-bit\n" +"\t\t\tsystem CLUT named appleclut8.qtif \n" +"\t-f\t\tCreate a Quicktime uncompressed image of the 8x16\n" +"\t\t\tbit panic info font named font.qtif \n" +"\n\n" ); +if ( type > 0 ) +printf( +"\ +This utility is used to convert a panic dialog from .qtif format, into\n\ +one that is suitable for the kernel to display. The .qtif image file\n\ +can be in either 24 or 8 bit mode, but must be in an uncompressed raw\n\ +format. 8 bit mode is preferred, as it requires no conversion to the\n\ +colors that are contained in the CLUT. If a color cannot be found in\n\ +the CLUT, it will be converted to the nearest gray. The default CLUT\n\ +is the same as the system CLUT. If needed, this can be overridden by\n\ +providing a new CLUT with the -c option.\n\ +\n\ +However, if you override the default CLUT. The panic UI may not appear\n\ +as you intended, when the systme is in 8 bit mode. Colors that are not\n\ +present in the active CLUT, will be converted to the nearest gray.\n\ +\n\ +The panic dialog must have a number of lines reserved at the bottom for\n\ +displaying additional panic information. The minimum number of lines\n\ +is 20. The font use to display this information needs to have the\n\ +foreground and background colors defined. The defaults are full white\n\ +on dark gray. This can be changed by using the -fg and/or -bg options to\n\ +provide new 24 bit colors. These colors must be contained in the CLUT.\n\ +\n\ +There are two possible output results. The default is to create a C\n\ +source file named panic_image.c that contains the panic image in a 8 bit\n\ +modified RLE compressed format and the CLUT that was used to create the\n\ +image. The second possibility is to create a binary version of the same\n\ +information by using the -o option. This file can then be used to replace\n\ +the panic dialog that is currently active in the kernel by using\n\ +sysctl(KERN_PANIC_INFO).\n\ +\n\n"); +} + + +#include "appleclut8.h" +#include "../iso_font.c" + +struct QTHeader { + long idSize; /* total size of ImageDescription including extra data ( CLUTs and other per sequence data ) */ + long cType; /* 'raw '; what kind of codec compressed this data */ + long resvd1; /* reserved for Apple use */ + short resvd2; /* reserved for Apple use */ + short dataRefIndex; /* set to zero */ + short version; /* which version is this data */ + short revisionLevel; /* what version of that codec did this */ + long vendor; /* whose codec compressed this data */ + long temporalQuality; /* what was the temporal quality factor */ + long spatialQuality; /* what was the spatial quality factor */ + short width; /* how many pixels wide is this data */ + short height; /* how many pixels high is this data */ + long hRes; /* horizontal resolution */ + long vRes; /* vertical resolution */ + long dataSize; /* if known, the size of data for this image descriptor */ + short frameCount; /* number of frames this description applies to */ + char name[32]; /* name of codec ( in case not installed ) */ + short depth; /* what depth is this data (1-32) or ( 33-40 grayscale ) */ + short clutID; /* clut id or if 0 clut follows or -1 if no clut */ +} image_header; + +static unsigned int mismatchClut[256]; +static int nextmis = -1, neargrey = 0, cvt2grey = 0, exactmatch=0; +static int grey = 0, debug = 0, testfont = 0, testclut = 0; +static int convert = 8; // default is to convert image to 8 bit uncompressed .tgif +static unsigned char fg, bg; +unsigned int * panic_clut = NULL; +static char * clutin = NULL; + +union colors { + unsigned int c24; + unsigned char rgb[4]; + struct { + unsigned char dummy; + unsigned char red; + unsigned char green; + unsigned char blue; + } clut; +}; + +int +main( int argc, char *argv[] ) +{ + char *file = NULL; + char *out = NULL; + char *kraw = NULL; + char *qtraw = NULL; + char *clutout = NULL; + char *whdname = NULL; + FILE * stream, *out_stream; + unsigned char * data; + unsigned short width = 0, height = 0; + unsigned char depth = 0, lines = 20; + unsigned int i, pixels, sum, encodedSize, fg24= 0xFFFFFF, bg24=0x222222; + unsigned char *fileArr; + int chars_this_line, next, runindex; + + + // pull apart the arguments + for( next = 1; next < argc; next++ ) + { + if (strcmp(argv[next], "-i") == 0) // image file in raw QT uncompressed format (.qtif) + file = argv[++next]; + + else if (strcmp(argv[next], "-o") == 0) // output file for WHD image + kraw = argv[++next]; + else if (strcmp(argv[next], "-io") == 0) // output file for image + out = argv[++next]; + + else if (strcmp(argv[next], "-n") == 0) // numbers of reserved lines + lines = atoi(argv[++next]); + else if (strcmp(argv[next], "-fg") == 0) // foreground color in 24 bits + sscanf(argv[++next], "%i", &fg24); + else if (strcmp(argv[next], "-bg") == 0) // background color in 24 bits + sscanf(argv[++next], "%i", &bg24); + else if (strcmp(argv[next], "-c") == 0) // input file for clut + clutin = argv[++next]; + else if (strcmp(argv[next], "-h") == 0) // display more help + { usage(1); exit(1); } + + // useful testing options + else if (strcmp(argv[next], "-co") == 0) // output file for generating appleClut8.h array included in this file + clutout = argv[++next]; + else if (strcmp(argv[next], "-a8") == 0) // output file for testing system CLUT 8 in QT RAW (test) + testclut = 8; + else if (strcmp(argv[next], "-r") == 0) // output file for QT clut RAW (test) + testclut = 1; + else if (strcmp(argv[next], "-qt") == 0) // output file for QT RAW (test) + qtraw = argv[++next]; + else if (strcmp(argv[next], "-bw") == 0) // use only shades of grey (test) + grey = 1; + else if (strcmp(argv[next], "-n8") == 0) // don't convert to 8 by default (test) + convert = 0; + else if (strcmp(argv[next], "-n24") == 0) // convert to 8 to 24 (test) + convert = 24; + else if (strcmp(argv[next], "-f") == 0) // test font (test) + testfont = 1; + else if (strcmp(argv[next], "-w") == 0) // read WHD raw file and output 8 bit tqif + whdname = argv[++next]; + + else if (strcmp(argv[next], "-debug") == 0) // verbose + debug++; + } + + if (!(file || clutout || testfont || testclut || whdname) ) { + usage(0); + exit(1); + } + + printf("\n"); + + panic_clut = appleClut8; + + if ( clutin ) + { + panic_clut = ReplaceCLUT( clutin ); + printf("Built-in CLUT has been replaced with %s...\n", clutin); + } else + { + if ( whdname ) + printf("Using CLUT from %s...\n", whdname); + else + printf("Using Built-in CLUT...\n"); + } + + if ( clutout ) + { + GenerateCLUT( clutout ); + printf("Created C source file of %s...\n", clutout); + } + + fg = findIndexNearMatch(fg24); + bg = findIndexNearMatch(bg24); + + if ( testclut ) + CreateRawQTCLUT(testclut); + + if ( testfont ) + CreateRawQTFont(); + + // Begin to process the image + + if( file == NULL) + { + if ( whdname == NULL ) + { + if ( debug) + printf("No image file was processed...\n\n"); + exit(0); + } + } + + + printf("Verifing image file...\n"); + if ( file != NULL ) + { + stream = fopen(file, "r"); + if (!stream) { + fprintf(stderr, "Err: could not open .qtif image file.\n\n"); + exit(1); + } + + { + long hdr_off; + long hdr_type; + + fread((void *) &hdr_off, sizeof(long), 1, stream); + fread((void *) &hdr_type, sizeof(long), 1, stream); + + if ( hdr_type != 'idat' ) goto errQTimage; + + fseek(stream, hdr_off, SEEK_SET); + fread((void *) &hdr_off, sizeof(long), 1, stream); + fread((void *) &hdr_type, sizeof(long), 1, stream); + + if ( hdr_type != 'idsc' ) goto errQTimage; + + fread((void *) &image_header, sizeof(image_header), 1, stream); + if ( image_header.cType != 'raw ' ) goto errQTimage; + if (( image_header.depth != 8 ) && ( image_header.depth != 24 )) goto errQTimage; + + width = image_header.width; + height = image_header.height; + depth = image_header.depth; + + printf("Image info: width: %d height: %d depth: %d...\n", width, height, depth); + + if (!(width && height && depth)) { + fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); + exit(1); + } + } + + if ( !(data = (char *)malloc(image_header.dataSize))) { + fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image_header.dataSize); + exit(1); + } + + // Read the image data + fseek(stream, 8, SEEK_SET); + fread((void *) data, image_header.dataSize, 1, stream); + fclose( stream ); + + if ( kraw && image_header.depth == 24 ) + { + fprintf(stderr, "Err: The WHD raw file (%s) will not be created when input in is millions of colors\n", kraw); + kraw = NULL; + } + + pixels = image_header.dataSize; + + if ( image_header.depth == 24 ) + { + if ( grey == 1 ) + pixels = convert24toGrey( data, image_header.dataSize); + + if ( convert == 8 ) + { + printf("Converting image file to 8 bit...\n"); + pixels = convert24to8bitIndex( data, height, width, &data ); + image_header.dataSize = pixels; + depth = 1; + } else + depth = 3; + } else { + if ( grey == 1 ) + pixels = convert8toGrey( data, image_header.dataSize ); + + if ( convert == 24 ) + { + printf("Converting image file to 24 bit...\n"); + pixels = convert8bitIndexto24( data, height, width, &data ); + image_header.dataSize = pixels; + depth = 3; + } else + { + printf("Converting image file to 8 bit raw...\n"); + pixels = convert8bitIndexto8( data, height, width, &data ); + image_header.dataSize = pixels; + depth = 1; + } + } + + printf("Converted %d pixels%s...\n", pixels/depth, ((grey==1)?" to grayscale":"")); + if ( exactmatch > 0 ) + printf("Found %d color mathces in CLUT...\n", exactmatch); + if ( cvt2grey > 0 ) + printf("Converted %d colors to gray...\n", cvt2grey); + if ( neargrey > 0 ) + printf("Adjusted %d grays to best match...\n", neargrey); + if ( nextmis > 0 ) + printf("Total of %d seperate color mismatches...\n", nextmis); + } + else + { + unsigned int pixels_out; + struct panicimage image; + + stream = fopen(whdname, "r"); + if (!stream) { + fprintf(stderr, "Err: could not open WHD raw image file.\n\n"); + exit(1); + } + + fread(&image, sizeof(image), 1, stream); + + if ( image.pd_tag != 'RNMp' ) + goto errWHDimage; + + if ( image.pd_depth != 1 ) + goto errWHDimage; + + width = image.pd_width; + height = image.pd_height; + depth = image.pd_depth; + + printf("Image info: width: %d height: %d depth: %d...\n", image.pd_width, image.pd_height, image.pd_depth); + + if (!(width && height && depth)) { + fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); + exit(1); + } + + if ( !(fileArr = (char *)malloc(image.pd_dataSize))) { + fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image.pd_dataSize); + exit(1); + } + + /* read the data into a buffer */ + fread(fileArr, image.pd_dataSize, 1, stream); + fclose(stream); + + encodedSize = image.pd_dataSize - (256 * 3); + + for(sum=0,i=0; i= pixels ) + { + printf("Skipping encoding...\n"); + } + + for (sum=0,i=0; i= encodedSize) // this is the last element + break; + + if(chars_this_line >= 80) { + fprintf( out_stream, "\n"); + chars_this_line = 0; + } + } + + + if (debug) + { + printf("Encoded size = %d\n", encodedSize); + printf("Decoded size = %d\n", pixels); + } + + fprintf(out_stream, "\n\n"); + for ( i=0; i<256; i+=4) + { + union colors c; + + if ( (i % 16) == 0 ) fprintf(out_stream, "// %02X\n", i); + c.c24 = panic_clut[i+0]; + fprintf(out_stream, "\t0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); + c.c24 = panic_clut[i+1]; + fprintf(out_stream, "0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); + c.c24 = panic_clut[i+2]; + fprintf(out_stream, "0x%02X,0x%02X,0x%02X, ", c.clut.red, c.clut.green, c.clut.blue); + c.c24 = panic_clut[i+3]; + fprintf(out_stream, "0x%02X,0x%02X,0x%02X%s\n", c.clut.red, c.clut.green, c.clut.blue, ((i!=(256-4))?",":"")); + } + + fprintf(out_stream, "}\n"); + fprintf(out_stream, "};\n"); + + fclose( out_stream ); + +leaveOK: + printf("\n"); + return 0; + +errQTimage: + fprintf(stderr,"Err: Image must be in the QuickTime Raw Uncompressed Millions or 256 Colors format\n"); + exit(1); +errWHDimage: + fprintf(stderr,"Err: Image must be in the WHD Raw 256 Colors format\n"); + exit(1); +} + + + +#define RUN_MAX ((1<<20)-1) + +union RunData { + unsigned int i; + unsigned char c[4]; +}; + +unsigned int encode_rle( + unsigned char * fileArr, + unsigned int filePos, + unsigned int quantity, + union RunData * value, + int depth); + +int +compareruns( unsigned char * data, unsigned int * index, unsigned int max, union RunData * currP, int * depth ) +{ + unsigned int i = *index; + union RunData * nextP; + static int retc = 0; + + if ( currP == NULL || data == NULL ) + { + retc = 0; + goto Leave; + } + + if ( (*index+*depth) > max ) + { + *depth = 1; + retc = 0; + goto Leave; + } + + nextP = (union RunData *) &data[*index]; + + if ( retc == 1 ) + { + // check current data against current depth + switch ( *depth ) + { + case 1: + if ( nextP->c[0] == currP->c[0] ) + goto Leave; + break; + case 2: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] ) + goto Leave; + break; + case 3: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] ) + goto Leave; + break; + case 4: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] && + nextP->c[3] == currP->c[3] ) + goto Leave; + break; + } + + retc = 0; + goto Leave; + } + + // start of a new pattern match begine with depth = 1 + + if ( (*index+6) <= max ) + { + // We have at least 8 bytes left in the buffer starting from currP +#if 1 + nextP = (union RunData *) &data[*index+3]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] && + nextP->c[3] == currP->c[3] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] && + currP->c[1] == currP->c[2] && + currP->c[2] == currP->c[3] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 4 at %x\n", *index); + retc = 1; + *depth = 4; + *index += 3; + goto Leave; + } + + nextP = (union RunData *) &data[*index+2]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] && + currP->c[1] == currP->c[2] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 3 at %x\n", *index); + retc = 1; + *depth = 3; + *index += 2; + goto Leave; + } + + nextP = (union RunData *) &data[*index+1]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 2 at %x\n", *index); + retc = 1; + *depth = 2; + *index += 1; + goto Leave; + } + +#endif + nextP = (union RunData *) &data[*index]; + + } + + if ( nextP->c[0] == currP->c[0] ) + retc = 1; + else + retc = 0; + +Leave: + + if ( retc == 1 ) + *index += *depth; + + return retc; +} + +int +EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ) +{ + union RunData * currP, * norunP ; + int i, match, depth; + unsigned int filePos, run, nomatchrun; + + currP = NULL; + norunP = NULL; + nomatchrun = 0; + filePos = 0; // position in the file we're writing out + run = 1; + depth = 1; + + currP = (union RunData *)&data[0]; // start a new run + for (i=1; i 2 ) + { + unsigned char * p = (unsigned char *)norunP; + + if( nomatchrun ) + { + while (nomatchrun) + { + int cnt; + + cnt = (nomatchrun > 127) ? 127 : nomatchrun; + fileArr[filePos++] = cnt; + nomatchrun -= cnt; + + while ( cnt-- ) + fileArr[filePos++] = *p++; + } + } + + filePos += encode_rle(fileArr, filePos, run, currP, depth); + + norunP = NULL; + } + else + { + nomatchrun+=run; + } + + currP = (union RunData *)&data[i]; // start a new run + + if( norunP == NULL ) + { + nomatchrun = 0; + norunP = currP; + } + + depth = 1; // switch back to a single byte depth + run = 1; // thee is always at least one entry + i++; // point to next byte + } + } + + if( nomatchrun ) + { + unsigned char * p = (unsigned char *)norunP; + while (nomatchrun) + { + int cnt; + + cnt = (nomatchrun > 127) ? 127 : nomatchrun; + fileArr[filePos++] = cnt; + nomatchrun -= cnt; + + while ( cnt-- ) + fileArr[filePos++] = *p++; + } + } + + // write out any run that was in progress + if (run > 0) { + filePos += encode_rle(fileArr, filePos, run, currP, depth); + } + + return filePos; +} + +/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: + + The quantity is described in the first byte. If the MSB is zero, then the next seven bits + are the quantity. If the MSB is set, bits 0-3 of the quantity are in the least significant bits. + If bit 5 is set, then the quantity is further described in the next byte, where an additional + 7 bits (4-10) worth of quantity will be found. If the MSB of this byte is set, then an additional + 7 bits (11-17) worth of quantity will be found in the next byte. This repeats until the MSB of + a quantity byte is zero, thus ending the chain. + + The value is described in the first byte. If the MSB is zero, then the value is in the next byte. + If the MSB is set, then bits 5/6 describe the number of value bytes following the quantity bytes. + + encodings are: (q = quantity, v = value, c = quantity continues) + + Byte 1 Byte 2 Byte 3 Byte 4 Byte 5 Byte 6 Byte 7 Byte 8 + case 1: [ 0 q6-q0 ] [ v7-v0 ] + case 2: [ 1 0 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] + case 3: [ 1 0 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] + case 4: [ 1 1 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] + case 5: [ 1 1 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] +*/ + +unsigned int +encode_length(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned int mask) +{ + unsigned char single_mask = 0x0F; + unsigned char double_mask = 0x7F; + unsigned int slots_used = 0; + + fileArr[filePos] = mask | (quantity & single_mask); // low bits (plus mask) + slots_used++; + + if (quantity >>= 4) + { + fileArr[filePos++] |= 0x10; // set length continuation bit + fileArr[filePos] = quantity & double_mask; + slots_used++; + + while (quantity >>= 7) + { + fileArr[filePos++] |= 0x80; // set length continuation bit + fileArr[filePos] = quantity & double_mask; + slots_used++; + } + } + + return slots_used; +} + + +unsigned int +encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, union RunData * value, int depth) +{ + unsigned char single_mask = 0x0F; + unsigned char double_mask = 0x7F; + unsigned char slots_used = 0; + + + switch ( depth ) + { + case 1: + slots_used += encode_length( fileArr, filePos, quantity, 0x80 ); + fileArr[filePos+slots_used++] = value->c[0]; + break; + + case 2: + slots_used += encode_length( fileArr, filePos, quantity, 0xA0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + break; + + case 3: + slots_used += encode_length( fileArr, filePos, quantity, 0xC0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + fileArr[filePos+slots_used++] = value->c[2]; + break; + + case 4: + slots_used += encode_length( fileArr, filePos, quantity, 0xE0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + fileArr[filePos+slots_used++] = value->c[2]; + fileArr[filePos+slots_used++] = value->c[3]; + break; + } + + return slots_used; +} + +int +decode_rle( unsigned char * dataPtr, unsigned int * quantity, unsigned int * depth, unsigned char ** value ) +{ + unsigned int mask; + int i, runlen, runsize; + + i = 0; + mask = dataPtr[i] & 0xF0; + + if ( mask & 0x80 ) + { + runsize = ((mask & 0x60) >> 5) + 1; + runlen = dataPtr[i++] & 0x0F; + + if ( mask & 0x10 ) + { + int shift = 4; + + do + { + mask = dataPtr[i] & 0x80; + runlen |= ((dataPtr[i++] & 0x7F) << shift); + shift+=7; + } while (mask); + } + } else + { + runlen = 1; + runsize = dataPtr[i++]; + } + + *depth = runsize; + *quantity = runlen; + *value = &dataPtr[i]; + + return i+runsize; +} + +int +findIndexNearMatch( unsigned int color24 ) +{ + union colors color8; + union colors clut8; + int isGrey = 0; + + color8.c24 = color24; + + if ( color8.clut.red == color8.clut.green && color8.clut.green == color8.clut.blue ) + isGrey = 1; + + if ( isGrey ) { + int i; + unsigned int bestIndex = 0, rel, bestMatch = -1; + + for (i=0; i<256; i++) + { + clut8.c24 = panic_clut[i]; + + if ( clut8.clut.red != clut8.clut.green || clut8.clut.green != clut8.clut.blue ) + continue; + + if ( clut8.clut.red > color8.clut.red) continue; + rel = abs(color8.clut.red - clut8.clut.red); + if ( rel < bestMatch ) { + bestMatch = rel; + bestIndex = i; + } + } + + return bestIndex; + } + + // we must have a non-grey color + return -1; +} + +unsigned int +color24toGrey( unsigned int color24 ) +{ + float R, G, B; + float Grey; + union colors c; + unsigned char grey8; + unsigned int grey24; + + c.c24 = color24; + + R = (c.clut.red & 0xFF) ; + G = (c.clut.green & 0xFF) ; + B = (c.clut.blue & 0xFF) ; + + Grey = (R*.30) + (G*.59) + (B*.11); + grey8 = (unsigned char) ( Grey + .5); + grey24 = (grey8<<16) | (grey8<<8) | grey8; + return grey24; +} + +int +convert24toGrey( unsigned char * data, unsigned int size ) +{ + float R, G, B; + float Grey; + unsigned int grey8; + int i24; + + + for ( i24=0; i24 c.rgb[2] && c.rgb[1] > c.rgb[3] ) + prim = 1; + else if ( c.rgb[2] > c.rgb[1] && c.rgb[2] > c.rgb[3] ) + prim = 2; + else if ( c.rgb[3] > c.rgb[1] && c.rgb[3] > c.rgb[2] ) + prim = 3; + else if ( c.rgb[1] == c.rgb[2] && c.rgb[1] == c.rgb[3] ) + prim = 0; // gray + else if ( c.rgb[1] == c.rgb[2] ) + prim = 0x12; // red green + else if ( c.rgb[1] == c.rgb[3] ) + prim = 0x13; // red blue + else if ( c.rgb[2] == c.rgb[3] ) + prim = 0x23; // green blue + else + printf("cannot tell color %06x\n", color24); + + last_c = color24; + last_p = prim; + + if ( prim == 0 || prim > 3 ) + { + last_co = -1; + return last_co; + } + +#if 0 + for (i=0; i<256; i++) + { + + break; + } +#endif + + return -1; +} + + +unsigned char +findIndexMatch( unsigned int color24 ) +{ + int i; + unsigned char ri; + static unsigned char last = 0; + +retry: + if ( panic_clut[last] == color24 ) + { + exactmatch++; + return last; + } + + for (i=0; i<256; i++) + { + if ( panic_clut[i] == color24 ) { + last = i; + exactmatch++; + return last; + } + } + + if ( nextmis == -1 ) { + for (i=0; i<256; i++) mismatchClut[i] = -1; + nextmis = 0; + } + + i = findIndexNearMatch(color24); + + if ( i == -1 ) // found a color that is not grey + { + unsigned int colormatch = findColor24NearMatch( color24 ); + + if ( colormatch == -1 ) // cannot convert color + { + cvt2grey++; + if (debug>1) printf("color %06X not matched at all\n", color24); + color24 = color24toGrey(color24); + if (debug>1) printf("now grey %06X\n", color24); + } + else + color24 = colormatch; + + goto retry; + } + + if (debug>1) printf("color %06X now matched at %x\n", color24, i); + + ri = i; + + neargrey++; + + // keep track of missed repeats + for ( i=0; i= 256) ) + { + fprintf(stderr,"Err: Too many color mismatches detected with this CLUT\n"); + exit(1); + } + + return ri; +} + +/* + * Convert 24 bit mode to 8 bit. We do not do any alignment conversions + */ + +int +convert24to8bitIndex( unsigned char * data, int height, int width, unsigned char ** dout ) +{ + unsigned int row, col, i, i24, i8, size; + unsigned char index; + unsigned char * ddata; + union colors color24; + + size = height * width; + + ddata = (unsigned char *) calloc( size, 1); + + for (i24=0,i8=0,row=0; row=0; j--) + { + if ( bits & 0x80) + fonts[row][i][j] = fg; + else + fonts[row][i][j] = bg; + bits <<= 1; + } + } + } + + WriteQTRawFile( ostream, (unsigned char *) fonts, 16, 256*8, 1, 16*256*8 ); + fclose(ostream); +} diff --git a/osfmk/console/panic_ui/images/panic_dialog.tiff b/osfmk/console/panic_ui/images/panic_dialog.tiff new file mode 100644 index 0000000000000000000000000000000000000000..9abd78924c39a046350314aee4e3cc56cc6f1d6c GIT binary patch literal 136036 zcmeHw`*R(~b>>j81xtw?TV6YbY$b&twV~G6ydY@_6d8)rwPZs~M4OQaM{wdeAPG{i zE*=6dERqr=l8{6xH}Cg5cHZyzJ5Kx$?9W^NCHwR4R&BOwx3;_$2+RGxbNY1m%$>P+ zF5U#ENzC;0+b2>r@QCOndBo$lDsGRVDkTxcfof6f4mIvcHnEgs6V+4|2@wC z9{2~GABhj#>-@-d-~&eI(f1$tP?YAw&QChu?9v?g&kr8>mgL>8-2Zjp-+TYR9eB{C zzX$1$B)BI34!(O19QFPWA9$Q;gw>DS^A)oFz<*4V!H>Lon*Q(jN|FqnyEWs=y61zB zfJTzs|IYO!Irx6NH!jYfpB_H@{CsPE>G}DE;bZl~$9zJsH#6x2z zjy{syzWvp&{_{Wo(?5OXE4Od|!$17}-~Zj;{q5iW&ENd>U;ou#{pDZ&#a|@Jpa1!v z{n?-X>7V?`AOGlzx%tt^E(^gpTB(h(xr*N?zVG|YXU?5__St8i`QGn6d-l_x{?w<=oO$}` z@A;nZ{_fMKpL%M1{F9&j#3!D3;^QB0G#-EavByrGdi2qgC%@~vPMkP?{E>G-Qyhp@-KO*;}m)E7WR+-I6eK=TQ9vdG4bS+BO}w(N#FPUYKqTpP#-wGxzAh|M`z!Jvcmb@zI0d_wbp+XBM8Aetzbu zYm3v*Ui;L!3)fz_aQx!I$4=dU^06ttUk+WFS~zrRrrDZaTpFHUy71`86VHq=`pOB! z%uctahOf*v=ax=fdGw%UH~~L{M-L85lGY249(?Z1+2PjweDj5w*6`8#BlV+)KK8MX z4qsZFnw@@We({CjBlU+5g5=`GODCRr@{^7ro*q5;e5FA)loB4=k=>hSPM1e`f@;>r07%d^vSt_vX!(nAklIC}p0c`|UkfPs^v9mF8kC{-BA1z9x=YK4MFJ!oWS`(0H? zUplVTRjI2l4fK_`s$G@3`qDsOiL2UGsjDvy^p&`(U6s1}(m-E{tJ+nmt1k`omAI;1 zmAd-UKwpVBTDvg6W3xijbC?Qx33EQlBOgC~=3W0mYz^<1Yjk1Uz<+1JxBfBm&Nhd? zh$$h=ojf{%X_b-T>A4H@7iZ=!KRPmT?vsZekvXDofAZ5$oSQuRiQ$Fj%u;Lk?8L{P zKK;b-$f42E#==5#diaU?*@b1yERBvndG5*KvrnIX_8jJd4v&t0;!`8TBhlQ?6qCy= z4iYYno?V<@m|kpMeHv5`Aw&IQ>*5IJH?)`(UkJT8bD?$W{`*e8Fn#qDCW7ipY+-R` zu2nxfb$R-e^NX`n@Sb^UesSj7Jm!;{&rC0!9A$cBbJ_Frw`Mr8R==<~J=L0Dv^@8n zTxx;*YpOoS_b(f(GM~YFa0b<8 zAHK12MDtardS7k9`^prYX`lP$omC*7uR~S$Lsi)DrgZnSw0!=Yj(QiSFbe0wUENxu;!tzpU`XU3LnZ6JPmcABtA53Hh-hGZnyN${^hu-&{v_T;iXG2B$ zd;YX26qC6ls(6xD;^8`mrs3t-Bly_I9?DDo@UgK}a2S)I)O@CGM4BDs_o>(J)H{dy z&}Nx&epjCJ2Kr%yni=Uw^**ZjQD{Gqq7~c~RW~bVLESxc+f78)RyS2KH+85v-NM9EinZ>YR>!=DrlDi*$b3EjHI!-ozZJ9PAs zb4QP!c=*_f!w;pVKe{Tld$-u}!+o<=YhrF{XWr<0ppJEswV-RPUh8?&cbllcTl88F z{YH!_Lbvg~zQ-zXR@j0XT<{hz2(Di4Fb;I@fw#B@FDzWV8(#25Vn#3dr>i$SO&nYD zhIiw?#@W-DRH4x?3>@8%>hR(EQB>t}RxQ}xzKGv@PtRX?ArQJW)m)k$r2(qj5VUo5 zAsV&jo1S3Ra~=9QnW%`^^3xsN%IFl+298-NyHUvz@}!rsTgF&KD>ysG409-15lzCA zqhbv`b-!432mYIwa<9miv!{}$_;k;Ix##iQ|9bmh2kyP+nSsmiesJI&*`H>yU%Tf) z*;98BNwJe|KsKwrb{l@|Pz&z)_as9HenOsyLul3~*5m&nq3@T{NrhbCE>`F7l(_eW ze2-mE)E@m_3F8jGE0S`^KYyE0{onF@pImV{pt0i*fU3Wzvo=a9QZ#H`cyJ{ z=@Md^xZW!v2VBUPfAN=m$Wg=`NIr|JrFmFF4uufm=Ktb3Nfx;4D$fo4S4QW>+`he; zIMEFDHo{R>$bHY_`scLxZ3so7Rx*RXW-^^z zOb#VS5S~Jw8Q`KY$qO237bWl!j!8bxEjOFEahv45*o857ap}|Nzi*ba^@za_>~Lb#Cf% zD(rn1rjc*@O6&B}Q|Hb+eI9#zNbo%toJxG(^Ye?>8qJx@rm1gI;j!F8gOPLa;`F7d zP-&pjK&6371C<6U4OAMaG*D@v(mP-&pjK&6558fZ?GPrK(*PBfLJln{;8GJLK!%B0%; zG_@Les15bYN$t+rU@#%94vC;DX5#!QA(B#vHFD&}ev4$VUj=S+`bL6=H<6U8t1&C4 z607=gT-Kjz77|&mZDi?me=g)ip)iI?WMjBWQ_aP7urX*PyYsk%v6MIIySc8Ln##yy zX@gYd+D1rU##hM6R^cjia&bjV@076&&)b-0H5nXY-?GZy1kIM}F(zvXHn|e>S|VEG zmRBU<&ap)MBSZir%S$#B1g)>Tl#E_);8I(!%N3Mr*i6P?I4O8NX-aaGBT56k2Flg0 za#bjVcu=XaG%`AfJ31TI5u{d5$Qr|h2^Ikta5X+jJQ70XBOMZ%Sm(Q`2CmS7dkugN z;wTI? zs0r}8BqF2bWiH}uDJP&D3{iyK#02yNoynVsX(TbE2DlA8>mLcwUvn>pL@0##qzSb) z$bl3j3@fg}IJqRl!h{wsgotDKxH;|cI+S@ggA*0eWWkdC9U+ zb19oijU!JZN66FO2l|_+~Y3#kVFnw;KtD^6OnAdm5Jb- zO0)5j0mMYZyfcB8#8N^_iij9_!H*3QabAds6zL>Gwc1d!iX=?on9#ei$Z~w2CDLfJ zq=8zI)N8dm#i1MW#RxY$cM+|*x{U544_6(KbTkd#s7(R)LdgQYkb3*3Fe#t@WMB-0WI1vbL7g=3h)cdTr&oa4y}%hqKyZ*^JH+%*@cldQb$*RRb3!)}i4HtpuGz243es-Aa6 z1NK5p72dwSoCYlSsS{TwTfSaFn^NZPJk+%diwbYwC<7`Gtc~Zw4QOvv&_)W~6W%wk z7J$`X@YOf>gftZsqJj4H#s1*yl6vuO?~e=3AO)3eqNC?5NSGJg&5qN4XSNfk5e#@Y z=w{zZb;ny_IPEk{9FVeMN&V?_3aFm-gC-CJ6S^sM# zVhY0WH%Vz{znn^E*u{idn50e?HsTFfYGc`oaBaVmg9?K-hwIwfD@3-N;gr)68P_4a zZ>CvXh45B%1Q`yt;xUV>@SY9y1?AOxAKBZ}sYK>!uN0uVd0f}lR@yq3kjFXM=@hji z#KY-5PEw(nN4U2pwmk)BfjHMgp1~p~iWmZsZT>`_b*59NkwS!!vemw`es&B6_MC|JJXG%$|2LFJ#qGY@2@Ay-r)I8YWcE>OBel2Sxr`q*A8S$6G0 zQR&`IQ{l3gE0SuyE`n;2;5g`FxgJ~mlPXfxFeoG_M2I+%6e zw>*;6*4D-`-#ZClfGZw(uVMa|xYF-_s)i(Pu4%+i5+_K4lw-JS2KrpK2RSVYbUF>E zgRs*h&aoQAz%=-($V`Y;B?Jh#!uUo*G&8yRPI<7IWu_350+a8mqcEJ5@4(gBC(;&K z3XzHs-fJ@YFmod)HfcP`7#~@X!*wVbLgR%x8z*Xs-~i}|%y=?}_DW2F5}F?0uQ)lH z%?>V=L_%c3_yHzHn(^d;5!MmSFrN5{i;)CFu2MGn23`|7`=j>Oxgn&*KEWq$)Pm)t6zyq$F z=Z7LBJ#YY!ZcbltG+fyxfh!s<(C$bQLI^I>p;M+qhprj$D1?Rd;v0_<97-cj74BAE z7^^CVVo6H}a)@?Jc+~=)L<6bzDit=;qSD{Zt8fl2lX<)q1FpfIn87s_;aG=CQ0?_4 zyo$4~VV`nOq7+_Okccl?1`(Yc5OCOU`o@TE)No}pC0=}QGKIBQO(G%EUI!emZ0zb> z2I=4tBB@5f0?QYE9M=L(U{zlH$|$Vrk}?n1jGb6PmiDSz)OzoV*tHwqAbj895A~L| zR*r8(3HBPz;0lDoQYhiOp|59WZe)`{aBO@;0M;ytUYP-Fof=8=ExtV`X<4nUjidjy z_p9NWOpYf=F)}XQG7*cD2!hCpM93ic<7G=inqhnd^?+SD2T}lhBza59&l*mp%Bh{y ziy>OTSPvP%w5o)Bb9oDG#bzfKi>zG2`_=Vn%(ttKLp(Mu4qw?`ap!;s+%rT#&f*$J z2=7LMcf6v+l$kO7;&byekLhvPYUP0&s%nxOEijYM%3l`w~lhuJcb zl6GR=!-ZO8En$0&dj&1)F1Z%WFFCx*-lT^un|N(4o`1Sa^t{7jwo4l0dSKQku4~Kf z9hUPhEAGvO9)OjM9=8s;Wcsy*%7C|xRqbY37`7qZ&25zHE+_rkHo#S$e2UH)INLSMt#h zjGbv$HY-Y%KWJdnKX3x6&EmRN{oIL_xhemqew|2@i;M4UgZ+h66BMyFH{pM?KfULr zj!Vyr?<=gDQjFEPVSjrI=7i(I`@%3U@j_(;i1oQa|CU&A;MyLa?!@}sp#SCm zbe^9&E;~O(U!gC@^eX>GG=S9+Tj1i7w}2d0P*iSH8hC3fP}s`fuZ|zAd8*vTHPCMD zQCwSh;n;~Aiu{(N9+@e-HxUQddb+R5)&tXc_k*7Dd27}NKleK|<*T9g88kaNbdPm;j-m4nO;kq!WkHv;= z0cIho?TV-iqB$#>aR6T5xK0$Ps9k;=I+p)-bEXEeHlmAz`d7de?9jrg#F?5JYx68Qsc!!P;1YPKm0K`%mkV<2gkwj{W&U;wC@WY7 zS3J6NMdchaawQV_(`y@mmin+WV!5qSw+mut#9*=v-9drea{&kkMC06`A$%DE1>LEG+bE=2;K#hqY8fm&)g#!a7D4+CEPSJhAS$|m~8N5#_N zb`f&RgS|k-W2e_L5CQQSu_8IB!8M>lL2HDqZvgFf1TBm}i(?d@Wm9C0DX&Sratuit zZmdF1B>bq`rKw#B8=fQ1auiqWPnF%00oMpt?BhXrt!C@VWt+&_4T?jS`Dfyq2u?~! zf_f|xNivZvJf`-JBCBf%cBroDMlbD8dy@OZKm_`2wX{0`pm*p4=j^gh=|-A7{q}5T!zP-Z7wyN!wXpjfi z6vq&aK%uh$hyQ5!aEVGTpPb`x)#+s%g3m^fLRuZ&z#T7gRTuoe-&tkaE zo^B|-9}&e=h8Y$0s0Bj${;9w@1_;E=vt~DZ_z4kPebb ziNwh9oId84N<$8ZtLcWxcgneDC|#xuZsnT^5w0?{!(sK}lq?Lpb4-T*tR*?bv--51AMaVkSBpK75ibR6vLb}uCJmTo)jUumEAUn z#i6}g|9}QMiL<^~#|A*NmW*ljP+X(@(qBl6R4G9X4ueWKO6N-Cqvm3?4tzlsfk=Rx z!xilrPMa{Z-0`kW%*wiUj=1UNq;Z0X>Z@_M^Y>NqT3*pPM5`dDftMrg_wwfI_DU*O ztk)>wN*r$fzJRSPv8J|C_kMJn3u|UpDK0d^)!PH&`1O7YuNrJhb-fnc^O2+|xsWe?5W(yd|Cwymw z=AGQw!=PGDuFsXG~J94b?uCyFn$Hw1C zv!b}hU2%`YDEeC_-aoGLNw+kQ+x%3B>n6*+nnth?*IRkzwDWd-xP&>lg80o;lXbYR z%M5dxk^HBtd0Z5R^^a>IscnhCWQk1>n{|Iom?}k9^IO^C~b3kPatU zI1z)abnO9zz?kCBl4ZP}I$MKcU7*-wk>2G($!-$5jS_Tm1EV+{h)E|BHs(lRD`vnC`;iQ_ab$(klPH$xaw zs?9*PS!;@c6U=xcw>xl?@!R9X4#9pnm>G5T8PsA%Lu$w-e8B}%^AR3r-$97j8&#e1 zNltPLJZbHeGh~S=|AJ`-r&=Na@!Z#Drd<2@COS{1<|V1YoJtD}0XXHE<2sluab&n9 zSjy)^J+3$1k+sCtQ!`va74-wH72n-q3$z1t`CU;8PUgf$8BX%mXMf_Y`xt6oN`}cx zjw=_WzBwZ|EdolMNw&dBsl1>A(Z0u(h+^5Puv>|7b=CogD;d%dp(D65kFeaPPueSN zDMX2XgaR1hK?7@v>>QVr{zIfP+3#^>Tut&2SE0=dmc(R<6P#DsMyW%lp&$2GP%%)p z6dO}xKg>L|MG)V%nmv$5QfXOAg?@@BsNl`;08@ zwAg{P;}#G67%E+yl1-FOYCKgL?<*TC3LX0!+c{V(qrG!OR zl*%xULS$VS!1P9L?ME2BUCykTW=v%{07%Be`36-QIs>juET&K7kWe*O${ZdiBB1UV zR~`QnBE@y#StJ1hE|+xb+Ju-_-g_t@>W7Uvj=yQzHR2d8gCYqtJzo5ziUI2o?v%sz z!>A_W#|^93TtAE;>4(?aZUAiT@DYuZd7@D+il}LJic2ygx%UoW)9WsB^B}MzTU=n} zhX!sIsV)xhP~-JDHpd^UHYYj+U5``J->b%G*6Qq^tls>y+g*0m=@{SJD0V)R^LXVK zY>b=adPH*tWL?&dDpQa=9emY@SNE(2s$Y@iY%qN;8uAFb-201wYB~Q0zUtqx80a73#$$|*%S18pDnwXH&RFk+mBIJr2 zFmxhG>|HWz#MvfEES;~WrF8AwvawZ0}7p}9s6X=kdwkymc=21 ze#28o&XN!+x37MZh#)|7?$=pqSPwV>jPxq7@Z*C^q@U*+(3j5mxbtw+Q*5ymDl$slh05O!NrjrO)?XI<#6XoF|(BLl50?BdB z$f~5JDXx+b7cm;Ru2RB`2ZJe#tFtmNtu}AzQJ9Mr0URG^3=8RkYsWB}qTYcnWl%-) zQC!29Fo78l6_H?wH{lFYBB`3ga&T=XXM7V79MJ~j_Daj=TJku^vNGU8wciqzyS52% z!dE|id(|H=22%!Cre|S>E8F9}V%W6|j}uqcHe8rWSo4fJPx}r8F^6R{pV}8N?T&ER zpMV)gHt6%5;ShVEJq^5=iMHP_YcUdtm#do};HeT^&}4>JvwW`66dm5LFwZz#(f@LU z1nmbRL;)|`+M3%^>f1N+$VWht0C&7G^@Iwfl+T2In79t1i{U#Kr4e;mG39X`(YHlF zcc0)2Y{Y!Z5vLy)V}#0Z`4XlqxuQ?M%ytDhtyxPlZMN^;$*Yp*0I32?W)w!{@_!4}wqS(vtQ_Vz-` zjfb_pDrJ`8T&IoC*W^3VV;!yCgA8o6eY=~crs8{MDz;smGKz^AiGqu~nZ{RwD_KPs zmvV>u4Ntp!acq&34=CkXDbjQZ&*(fvbo0f9cS~n+ar$y||10S%cluLIFva(DS#K97jAP(wR8-OX(cdQ%VE9 zj~jk5m43ZM$4%|WnPN1G{3~&W_4OBv(uMF=I!E=C(m-zouB6hhF9pqZ^TnCErL(xq z^ihf0lc{fUftA0+;-<{fKLeu2Jyvaxq#7M(a0|}6=c|@DrB#-G>V2?7de~I%S5E)^ zaM}u6W--iMT9E1~rGeh(N^A_*=tpmvAEO!TdKp}2Q*u-fX$^Gm>lnK+T%&o|q^zJll_OY?(}1^>#j|8&ZYqmZ^SQ32oPSxU+ur2uW9;6797b4|l5=+x7dk z2a281edAjFe(i>9@6B|5E4&l4G9i5zQ_VMZ!?pMPaNi0qgJNfzzKf~mhr8k0+n&?6 z!aFf56Vi7vl|3gHS9i>3Z~N=U=Kae+nM#vpvc%a4u)n&aT+>;5Kb1b=v^S|ONP&}0 zYCIalt(VB7*u*CY^TM-Xq~TUWby+XL)o0i)wUW+V(oUDcti(eZJDu*u;}$$Rte1!u zU8b4DQx|n@Xdzff0b3P-lg@@}Shv>&xuKzTECcNAWLcupDK>PX8jU=FRe+iISbhe4 zkYyt-?73ZWT)QTi``XtfiCDrY4ml`CQRC~Pu04gTtio$MR;9<*8VX5FT}pmt+dK zGTHkCS9l<`Q-~@<^irTME>GqOr*h!cs#|95ziB2pclrYC*mVLEJHC`0=m!?<_)f)@ zuyF(3(E92s)TQSpA~(O6C{A$I=QbDX15i0U6hiyGYX$(&;qq?&aR*Rxiu(bu#D7GV zc*~M+KI5K#7AYE1fx_ z35TL&N1i68QcNNCwMhz@uP&UkhNbYpwSi2k4c2xlq+{W(>T3J1sAC-OgyTzDq;bT4 z$J(oHpp!Oj8|=6Zi-|3hdv42We|LB&8-#U@RG&D>1k$=SuD;%84Dp;KTn;LRb$o$R zPd4SoS9e@$jH@(6D-88Q=?jZP$;bSsEJL@!+J{eEhV6d=*X((&B4ob0pg|ilsy5^! zV)X7Bt~|p|PKnfds~xd%6yxeP&|yT#swhr{QAvV6_)KJ=dARhRMBh*(li=!9Vz?62 z7*}PBTVGv`5Y(7RoK^;lWasPJt84?&EnUi5Oel>j36p|t6e0g;uci=FhTxj7F6r{n zSeb>joChUZsV>_t+#m$FiC6ZUD2A&Z&Iz{39jdIahLm3~hqFUW-C7l@=wudA^zr}aYU`y;hb&*T^%QT^NYrKjj#{y zhO0X~G{%*^Ck{E~p|Pi+bK62CRDgr-Z3iBc^1>rF@TI>%X}J31Qe9%=swb1mpc{FS z!f=Hg7@K)QWNEI#m zV6(zdcti)b(HAHUSM4@KV&aPI%nvfmTN_FSwY_3w)>!`>W0erGWTm0=xH8+f0te~> z<0G$-2-gC2sfuL8%`#bz8-VK>ucFg^aq;KDsP1yF89{GB)qE>&nY)!KpbX)Ca~@1a z_N{tsmr!D+EM(bQT-%ufO0bk!50xQMJ%;nvx+pWN_v<`nYwO(g3=CzeT6Nx9Mrj?7 zc0OVkoK~`#x}@DeRF-=F6m**K)oiBfIirD_X@%uJWgBe%Byn!m-hnnXY$Sv*r;Az+D<)eR@Oy{fqjIj>sEWUeQmz9b z9~+X6O-@Gh{~%wyxT#Y<xx~M7N7n44lav*tNq!u*a53U_&YKAHkaIKAvMb!c1c?dy}AeOr~ zuK4z5^i9k?gKI1V{Jr?f!Bv7nKELK9=+QSB^JJDYWKZ_U)RYFntN55;JPCDMq{77pgVtM2A9Pa44WsZu5FibJS zaTcm3pJA-EIpZTIn{m>}8Bs-vnASlv;j~dL`DUSMh2zjsnJjXx$jqeZjya1YWX5EE zh?!G%SW+Z-k;%Y`zq7{A8v6i$&At*e2;7-3E$&}J|`8iPgE+vrDCZ@P*8h!|~ineVkB~d}1 z8F#`-VZ5MZnaUR_ymaJ;u<_10Zyrb|Z29WR&MJ$mbk2=7EugZ1Mesp79Rk6_j#&HQ z7Y$b-D8*^Cz$#5vCiA7El-@Y%hmi)IC+iI_xEZkIp~S@S7(%r z#puA5B0msSRE0=qYwD7#J^XsmcdxLO&3 zU*8F|1yE&$RUm~c4Zd{Ag95CR91=)7#ahhBxJpC}*I0Fm+A5-)e$crwzp0c&cNAB) zMpP1Y>4iVy%JVWo zh{ijTKZ-$)n47gdh4*Xf^N}9clY@=hhOc?T7~3l;7zZyuAa7JX4{d~Uh=d-V6(6aQGI=OWr2}z=E_h7=4fECr!tv9 zzObJCAZ5E$)=VO;ArT||%H^d@qJqc8C|l+2u7T?XI?M!TTbU4Tr{{_@S@IrV?SPep z%C`LL-9@QlBMr2BpEDr<_CsuqHe}Pi_FtlgQ z=#IDcN0!ec>o+ysM}-e6DFD7OltNU!Y@@5ft( znM3J4d@t`cTsFy%%rE5A$cX~3n6}7?a1{LSPov16{gKOSte9<}n=8U*eZ5C-T+jo9 zg?8`bhK#x5(oK{*>t3*-lXEPbBavAs_&HSyhbcqM6B1V@K%mW|aFS(+Q!bbx#f8%} zNUG*oQyuP#E0e)66YW(MyJ)vBCKUFk-S8;P6mjId`cq4V+#grW`H8(Ev;S3%p@JJ4 zp}tq8rQ(UWx@Tdx%Lf(K{-pbN?$_q%=$5 zVkSh2#Bi9GL!jZx>22wIkw~^31zg(;>~|GW*IxOo)AX*rGJrun!6FmoTtBcbY+*Yz8ru?VjA;YUGM`(a`#E_de1 zv(<;E#3(wjb)yf$cSzEL4V?fe&nheJ7$<}9?J0J}%-&eWAc*Ufv>&E9wjGZ47F>0k z>7?7MDq|_Lu+~PdZAsH<`$et+ykg(-mXx`-02lk9>p^Atimu6uO^eQiNWKME85Oxe zwN0{fmVOx7aEyq4p;F$jc&M$1J(JoGBQZbxh;&^3Fu=xY7S@_O4}5bbQJ$eA4+fcIa+eXRAb>y*0pGMB0{d0ccmnttu@=rbIp!%9yN|n3ctgd~7vIQBUX{VHK%RIbvJ%66R-M-%Y z#QEX^_Txm;79iCpiMAe3SghtAF)yyXPCJgd*H>~+|L#E~j^3}+{e9xH>EtDsiN~tn zR)4h+eAAFBfj;MLw-wYanTXyZz1;C+UfX7<`XT??9k0zjB)|rLj`V|9$+RnbNGtn* zm5t3Fl*Fo!WSce`?E@^fQiS%)eJFqXlD=GWzGEvz-`}+M1h5sXTVKgGIFY-RZMUC! z=dD-3zNNU^gQt~!tJz%^Y1cC)`vnO zOi+yA1CHZ3g~c3JtmQd5iHi}FTwLxPR=a_KTqiZ(2o{%{8y`2H-Um|8wdgnt&vG9g zY+|VE@;xlQ3=(B1m<}?*yY@)^d*|413^i*$EZ4Kj!+pHtArX9T;v_zt86j6zjGBHS zj_VY;d=W^DL}1zqVmU3wj_{Ht#`v@uHVcfC7We$%E<^(s$N)|V z^3HAS(&q)NfX#8->)V{bnLuECBCd=bX_0>ddk`5Oi5<(&DANJ`yX3gsQyk03;Y@)I zhb!>u(OwA?9CC7fsyJVhO^=d>p^cg#4tyN01A-WH3Cv^I^5NHs1B~%xlFL!o#`&!= z{&x*kaGk6TjOZd&W>;JjfQ^GKi6eZh4RCmf$_X+$A4SCxf)i|kS`(2^Pdv(`r93Vo zh@H|~rw%eCcJ$GKh+~&nY@HkEBnS(IrE+#*M$SrHA9Rg6oausv#?DD*t>nZNJ@jC$ zJ|cQStEC&K=A~!FT>*v%Ui`5!@&qQNm;Y#slgNyYeA3z?+ov^tMxZt@k8BMxyirR1 zM3cBgb=QeiC-cebAjLL$Q=JggIUFIAh?){R)*DFlXb7aXL(bvqcBsq7NHAW9`Z~3c zW(g;au`XzZz~QWe%psYmL-&V9k9Ht?eV`PDo9$KAKLEf42VQtCsdEyKbMDxy0y_U6 zAD={?YGPKRdcDQIKY!x-6u`B_1wc;iFJmuB8Q)dtC`qTz6r8qk^8=TD0r>n){& zP6|64Vz^4Fy5$Ba@Dtt6X>FBNhsn;e}(nd(a8h@fj~|RD|rp&a;UEyj3uERcLZ+}pB2P;#`1?Pc07Pm zw(LQ>()McW$;=ys2aS~{Ptb>sRqt!WnIDsrQ%V6Ew()p}D0$}!_Bc-y=U0xfl}eyi z6~UeK&@-?ZFLL=en8_b4$Pv&6n9ApbND0dZ4O}BFII%UA(_5^ARQ6qY@F3!V810C1 z`YhK|bu#D_g9tVVt@qHO>#Y+EsNr?(6|XTo;J8ew`?6wu1h}}S2q@)O4sg}Y&j1D8 zF;JW2#a^iFhOO`BYggKLcrhaY`PdIXzz%u@jg_u}UP6Zp$ehKR@r}dW7LCC*5K0TBv z9w?#15zNuB;0*q#hB`)}1k(#cq?O8#*%((0S!(!br?@(Dz;T;Apj~g@Sa=}7$~MMm z;4+SriV2Ksm-5>NQRt{Ad9pu`4HgGf2Y3pJeQJk4pj_)GGf5L~R&AVh>o?K#R4j75 z>JeU5>GYfIj7m9qtx8`+c~fGi^%mK#fdW$bdvx|Y5*V6o$AyQJZQTNaj-He5(g6F(dP>%M-I1xHU|Y7 zK6`{iWaJB7bQGvwMIQdF*r}932dUt@?qS=EcdtaK-gR2t3j%M8y}h!Z?*VAr@02|K zR6E|D*I73R-UFiDcXtmw1iL7HNxQMns?Hop{i3}JUmlbzhPnK8 zVh^d$X6o6PZakp_ru=XBMF!Zy3jF5PF2IGMktD(+ew}Li=<|dOxEj{-2UBRV2%YTU z>yXaLuYq8x^kZj>-=lZTl*tFW$R1B`;ZRV<@?x(_+*QJ4R#zv^;~%oMCM~U;gFsgO z^}wGmbTXRtzvAOe8t<-Bo=uFjlW;DOV&<<7Lwn}97}NiJR8yWrOs*~8Xh-BZ~Ifck|%4EA)mA-y$@(T!mVM z>qO)0wd6j27#KfbF~eYAkPWzcAap`QXCZ6Jwb$G1B&2SM2^kDo6YL@aKFf!C%I_el zn=pK(!wDQK`ZXNv>2b9wNSAZ(+|c7d5mY$8%!|oBOg!QILuj8;z)uHu@fUYukpRhH zU6+nfoxqio57y^Aci^;Vmn=oWWvq>h*6w)$Bm>*1@IE@+@o$m+&Ii}D;5uyU&O*cWPWr*(? z|KhP(f2V?IkrT=68MjJ3PJlb|DV-&4$E>jkK=qa*EGaCHF_kc~&ctx2#i$eb2MTJ& zL3>5G!w#8-RymO>iNzX1y3$Qbkh@5gx3Ly!uUacWUBIkk(r(|nxpM8LD zM}c)9g;`s{g)kbfg95Mg$pVrhC3RsC3nAFV@u^A3ln6YE5qo!WqH(1O-PgSaQ#$ba z0c|xxShSIm+T@HVOpSznm_oaVz=BjFDKZR(v6K{Q@$=sk91O44wF2QP%b{EyHfHzA z9oW!fRcC7&tn0T}3&zbB#IX=M7QG=;s4}lt5DOxZs(h}M9y+tv3G2Qi(JzK-3SmnI zTCCjYEQFfH?SVVH7VspWp~cbt#I>nI4)E*SE4;$1Uh#ygfR&0AMsS7f$vUki^1IK! z=|B!|Wm$TF4z1~4dR9OzJ-@x^oHWk~554R57-^-mV{woDm_iFhAZ@%)qJD%nL=6Ri zl8>UgE=8lzs+Hg>yO&_im3({PG6epJWR2YofB?|ieS&MYSAqpLRR+DXMRN>8Sts!G z6>mT^O^&EVbid#Nxj4SnxeoNz@-+{NoO*@f8Hs{Y)#~d?O4-w_bnr{Ig(fED-QDY#3(uCr%PFLHE6f5^19-W3MdzC zwSkYhVyGj|q{OGf!MSwM4qZl1*a)%7#U(%mBO1+@nq=h6ggD-0J*qxgS{E5m)c}7@ zeUT@fiBp^EN|{2th~Vj%2Y1y%)1Y%Pw5vvm%$urx7PtrQ8SeYzXtfZO{^(SxD-Bc{ zs5DS%pwd94fl33F1}Y6y8mKf-X`s?TrGZKVl?EyeR2rx>P-&pjK&6371C<6U4OAMa zG*D@v(m pP-&pjKnV>b17A9TQ`3{=!0p>f@}+x{chQgkU;EO4`4hy@{eL8yGw%QZ literal 0 HcmV?d00001 diff --git a/osfmk/console/panic_ui/images/panic_dialogWHD.raw b/osfmk/console/panic_ui/images/panic_dialogWHD.raw new file mode 100644 index 0000000000000000000000000000000000000000..ba7161d2bf532bf95279c6946e63b1b0c18feae8 GIT binary patch literal 120366 zcmeHwU2i4Fc3oc(FztsJ@Hp22AM(P7L_lBA#tls58is7b0T>%F5ICk00x|NCAb=4{ z8qL+z#RlB;nc2)Jy z{-ybU&F?O&s}iUZs1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3W zs1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1m3Ws1lejfs3>G zX&+t6vkPOXSBaCw96lE(b5cEgnzkhm?ZLL3w9U3T9xVNtSUpfhjfuwny-GB*5IY&E zcLTToVAUHh_@sg2oi(%CYDpnP5>3C18~S@{p~!iI-BbzO#-=F}(B=333ujhudtZ<3Q;;VL?#xI)uc>o|w!J4~}^_7B*% zEZCbsZ7H5$vTY6;Vcs^98t$0nj7hCOLIg01ykxUM&~oW18NEEgPrE$QFG#ly1C(6_ zFPjS`N86InK<@SSb|}pn@o#dU|Aqkj8=x7q}jP z!&2fZ1g!!N*%P1Ov0n)^FVdl^p&vlHvj%07GcaiSo^T}#tcPTw0@!Sh5_M4Chng>& z#YJ-jZuY&-N_w9^Lc*A5-Fo`!xRP_!Q_k3Ah3S|)<&A(yiI_wVZJ1hQ`QVWQZXB&L z5o0p079AoT#w&wpkl_MXcE;>}l)-5@ksvSpsEAgi5|yNww>fCrgJyvwOg6w3IasQk z6`|~&tT{oeXpY+Uh}EGx@H$)Cd;FCUye*^q7{k>@@sb4|96gaBdCE(sMa@&|1$5x_QKuRG=qs;hsZ!g>Si+|UA`pI>3czxZppMBo^;`QZ!YyL+^sj^W1 zNehDsazkg-z*6E9N&A&R7a*{tuK=QL&d}Ga!!_4XIgvk8SD%J@vHPPd(< zu&4ya4;ScI0f3&bRy&OtLjt73$Ep!SQkMn8)noAD5+pZz!h(B@FF2-9vitrqTw|=r z;a)J3YQ>=^q@<2TV5B4*#>7Zaj2kse$-WoaK$;IU(mgZMsaXIB=EA+6ZRkhO60Y6b zSI>bQvD?G%-Zii-ab>4_uDHFXeg5h{krVayj%(B+;~I5jp~jENn&C&t4FoiZgs#%> zLq5xol!#5ZhAc>cIpKmjo3FKZjpm~*E6oS<<1!i6xBKSxXTWfsqu%!2XRp8MhJ@<< zgGeAAh`qwQtCxMiM(cw{Otx5gjL+_6#%^(wv?QEszoJcqpyv~l({q)HyPKOxq z-q3yWQ?EPTh2eD7Ff~@m#}Q^P8nb!9i$FU&@k+vp(J#_p1iaQ3svKd;%yHG#8ga~ba`BD>=ru}H$=-)iUYK;lMFtvrVW%pLj~8quw3%84 zP8e&F4rbloe|gxn*Vm_*?>z@Fz!kT=wwV7VuJrq>YEI&FO^0ccxIq%6JjT^B(C67c z;`}U{n3KFnL;K7CVx=J%5YMC z1y{FE^ewWiL@z?Tw;Fwfxe*jEXgnDhA6by&de9tDV_??(v$hcqfR4zVHpkFcVhWU? zdVIb%%`>VUTrY`2G-3RViIL{Cd3MM)Vi``GG;wj*U}$_IHHeZ#5KM{4 zPmi2D_*umjVE3fx86NN=&hbN@$**{4pqM8Tyiq8)WMD5soU>o$5&}9cSb*Uz?vzs# z;2Br;g%E_&0|x-H_QZW(YZ!Xo|R+rJ<ktW= zy*-2%u#Lbz<#!Sae^`*JuPpnBZhj_k*l#9fL^m3^QcZ~$pPNh(`f5oOB7NO+T&e8l zJcBxTgecV?g9Vl^ew=GTlUdb+-x!5eT`9+K9oUHtWa+C((b~Ni@n9D|LHNAGAKEQ# ztsLJ-2=N#l;0lEDQYi6#VvlDxH&P`KTpAw{fHg~!D>GoN(@AsjJ)fTEw5+z*r|5s< z`5L%3=cf%)9G3lu<|C2#&DHLm_y@XY9>TB66SXm#)@5KC)<2Co99(Hcx>+5p<=>w7TJy)}vX^h)|*_OCozwGY0 zo*!6o-#y;|*v#lt_QBh@=hc9BhfVG6Hc;$1?b|!7*9T7ew>ySvJ95AMz@>j@)t{c_ zp97He?N$nZd1p2IkO@zkyV?7h2zUvK)8g zyCtk&XD+^;2jO_C3TzhSPJC1ABF4qP5?b=G61h#lS0$zDzpVsrH)P6BSMsq9jH_uc zZ(Eg`KTF`&FF2XhZE?M>uREnMZ{@$$+lf@JEPho7w+pEg6s0t8;s0(sxsRnTOCO8h zDy&W^mg2mzzuUo_a9Q|P7>*@=UX1{yJa6dVi3O)Ddkm`W!m4$rl;;ioUv4M!@ziD6 z<0-ZZ{j#K2{Wp;SR!8i>l_l?h99B?NcPj~ezY7$0`TpwsV9ir?H``F4vXuy zSy!j^n05|RpN{Un-3JQg;<` zBt%D6GUEWet#Lh@5K%XO8#j*(oA4x%mg0r*Z(t{5*4m$fRe z4yYVU(B?W+*h*IgahG$Ii|Dn~9Kzrdc!-r-Fm0C$a@>bwN6cmZaSG@NuBeuuE2?wI z$dziCPOm)yw3J7j5zB3rwp|cABleq@kR1fbJr{s*PZH+_4LOH^;^wnY8Yk+;kkXtZ zyY1Fs1Jqu{z?H3l;QddOV*>vc_uM0yaYeDoHQY3Ej9)aCV{Pzgo0nbe1hqyU`#XDQ z^jfWL7{X+#nE~Xv^4=B(XcvnFr40ox8_RGnzbKKX`(yJl&!8RA*VskKEf4Vkl_yTG zWgr5|Gh$V9P=h;x2!+-mwQmo4dk769&~l98y*5R*O!=C`f+3~h#wz4Q!e96E0p>^s%*v){^RejZd7Ivy=gTayE${aY<)p50+DsPu%wUsyF3Un+{ zfh$x?`&X&HK&fqDKFidbcLbIYsZVV_c4-cA{ZnJ!u%O?11?z`NB3yHbU3bkXBe;Rb zaDC1KnK$RpJL8y24C%Z?2_Pjiao8>W5T26?%w3!z8_RGnzc@jeaOuZb6K`P1gqJov zB8j)xki3ny3(L?ckvSKz9XO9r_9 zP4QO7GCM4OVwr3)p_+y)fCNm|NTBP6D|pDbmis$d47b_S9pvXDm}Dx((vj!h-ciLv zf8iY0(c=P@;O>6x`5)T{n|(a-QO%62Jt}zGsc6)dSVKK6^fyvnnH0#tm6vV^I#9@jw_CO zzYZ4MJTti+i_T&^)>njDCjxP z`eGZ~1I@NMw&r2D7Wt)x(nzZ%sKMc70gkov5_xO67;OVzP(>hxmT-kW!)3i-w)fg~ z#B8qXMylFhPFf}?sD53ByML>aug8iWAzFi60$&!=-{c*Y?LYT&m2xd2{<#bf|5m_O zOKga3)^!`%j>0;aRVoWDa834rGJd&D;Zaaae~l;xGUPGtVHOtao! z%qEtFRR5R+x{(t~C&W9B%s^dxdf6j1Ryq#o>!n$H^T`AQ`%@XKsCbl^OPS3x)Hka@N#b~_bP;%wik;>0SDwvjlW<2WxH_10T$h?*?lUre_iBub$*}El zog{T95tuBA3F43hW(E<|CZpksL6x&DwEyU9o}5>KQGj$b8-;shrR(+(0%MAwCChj_ zb+(0IJy7;or1xAX*(aeplwgY+7{%#8OgfQpFh_DUgB&P70nXx{j z7}UrDk9wm`m?S`|vsoe*!c@4>cq&_$jHl@gTE>#Jb4%(-Oh<2VIv0aZ!US97h#XV#g7I@OysWW7WDgUx*2B%sg z0P)<{7N*?z_$E3}rshx6!kkJA3<3B!F~@ZI>MEC#BxWU^cA)gqQpN!0gP~Cfo-Fm<4T!6M5@XD zge&7(Pb{lf|5xBs_<$`laGX?NM$gHgE*7iflJoRFf*bIN`yaQG~~V4Da?);R}abh>gLMj z@`_@oa|nfN)>p%I54B`zmP=%p%WsrR`nsoFxEBHit_MIu`dTy^X1gYq?1jj+;|kU0 z@)zMssz&Wx&sW_NS$M9eE^jHxVRy+KuG$Bb(jBJo&*ewTD!9#cu!-4>$H&sPh?{Q__9<%dXrVedB_ z2bt~G53_~ffom|guTyLb;@*hRss0mKgcymd8Iv!L0h}3K{b*dATni{eU!U0l^yrA~ z0Nr4RaA6VY@TrNXuf8hk7Gj)n&$Hp_A|))c5-P(u3embS2sjzJtsi0ZUAeGkqAt-7 zgFPD$j~i5J=*+k>u}q&BRVXy*bkqvCLiE&?WJ)z$pZ`}Y)^n*-Hx*)DJ@-&R(GLf6 z9Dmca8;u5ZK@m>lXgv6dD+a6wxUwE7TPknV&|)yY4qOqfe)zic0dNR|w@^;zDMq<0 zT1WBPBS_R89#3g0EDX4sV5y{*BlRP91?rCJQk2Q5Gc}0T{B|jQc;bDNNUXR>#Oq3= z1puvgWit8E$mY0jRx*C@0eZ#io^_?kOeF6IzY5~@I+TE3c9zRH)W>FY`qGQL z%VFM#uxY7umpsiMAxGFT8EO-Nn4WA?8ctTUSZO|_eTi4BlxWKHe3+}=hXf4Qq4bk`f~ufn>QE2>nl6g%d6mdkWb z$Rw`K;mWGttVNne&bP@|O4tL31*EKHT2AKmS0X2peb19QM}#vZGBMh$2o0;8UeK&& zb7E7(m~AyvqozxI_JI>s$QV)aQAy?dh5j%7(J*Yb&={IYF^u(3#zPvRHDyjp2)2V0%Yr`z4Id$>rKUZaI zc%S`lKBaAuvz&2tTs4yoaKmtjnzsIYBfOZ0NVw8NM_ilsHEo`Bw3`B zO_G27hNp~WN=U;Tt*b!OgK#uXBmLk?^q{fGt>I*A?oWWtcs z!&S@Tkiov;X(MMPgvjHopClp#XwLnSjfU-j6TnDc;_8JtuCTfOqjRJT@Uv*?C|F}E zu|o2N`?cqJ?=L)iA^8-9pf(x3$@EVrSYnr0-^A+Y9#})lA zM@W!OKH+?N@Py$N zx3qj-|6XrAoN?8#mq7Bf%T7eNf)pG|@^01j6%bi3k8A9;MS(DzlP$0ZvoLMr?Crq@ ziE2D-`f8NPsJm4gUuwzkqbEArxaK-gX}5N7Lrl}x!BlKFowA5287YFNyaVN%!If6g zm8HDr{=hS?sXDR9DFw{(TuwCurmy|7ewa>J#*~@L@G1WfWroS6GiC1I^mEj^o&+`@ zH~e`o{c=XfP3f1JN;IbYmt}_K^7E;5IsA)$j(XRVz-9qn_R=qZoh7@@S7ut5F5(8$ zM>AT_puLp^R{xB}O@pO>0mO!DDcTK5ExOF$3(nW)t4o~vBJ1C6KG>l@Y?}9$vwc6@ zx5Ca@4D)HfBK58(fz9Vi+zi*^rFYKD=)k%@2iH$}^{6*}39Rqy7<)5Zi-FVTB5qbp zzasUnCxOjEzag$KqnMYQi>SDUuy%IcEc6@VdTkSZ^EzR0v3)T+lfhFYv@NbPrzJL% z9j53{Ow1XcJw-TnR;QtOSMU3ZRB_#z=iqCJTsPZuwifs* zW^+QeE~eUZMsf9HJ~!K6Z#M5g2g+WWK9e)fMu7d*k8wPw8pSD#_K)Mc4-IoD3t!)(UQ96O!u#p4z{I;@u{7G3t4 z#N8Tk-Dn}$N&&kHz)5EZE!OR=LGIvSiDiJBot7mUooPcSn$h7itO6W7$Hp_@gDf|3 zdC%>obL}cP`q;N6iCQy>!jOaYm{Ptq;(AoLY9;Pw$Y!M|aGjjz(`sK~)h9VVF%iPv z65PDtJEogl1xM-hafQXGs7IekF1HR*dK7q0N2 z)-DhehU79KE-p{z38y;nYT?VQ(>KkO^S*z89lK6o633U41N*>Y9N%fU5;kt2J6J9j z5SN{sh}`^MqMUHG`;LqCfr}0gMWxf;wEzI4- zctGQ1Q*PSu<5CM;u|oqVlS*H);Qze&5%lpDE~((6`2!;ks%&)Oh$bA0(vCb!OreB8 z;%k$vKp}rp;N~EG|r1<2N38Zan+|jaQ4DmcCTm`NxU!b&;O}T^Ck4r6al_ExA zP-CMn77v>LkUnLZyA8HJeBv^1|I4@z&vR8J$HfH+#*opZp^u2s`(U{8NJE_xY3cV z8O_eu^;O$IY)hB1Rubyt%A6!08%4-J^fd$$!Vs?G;!>9fWn~t&avqczrFympxS$f? zrd-({RS8_}a89sI?$BgYGi3c$6NK{`_lB!MQR?d+`)A3Kq=kyJ9M>yG=Uo^%;m3{C!T>bFS5?A(~IOLRv#(oT$ z#}+D40S>me9eAAUfk$lM>wkeVa81XhdSc>gCzEQ>jl4(^xS}2ynWdDm!;{fFJM@ex;!RjIG+ zW0(>SBN5*Zy%txf#jqc?U2=T-ax|;)FpCRZnE~Ts#O+b7Fcju?N4Vk(l!2>tn>jIY zWm4n^8Rl&dntjt(jLc4!|Ck98OEwz1ge$ZCTi`%hV0@GWQs6ovE)$VP+$@vj_yF9_ zc$G|_#7{a8#$-48JE-V8h?-C3ow?zb$2o+b%y}>a;qL0Tn<2zOS;(?mT)RO5Ggum| zhZ+#5xB0xaHNqUq{fftI?ao~dV3^a?>UnDep{?A;`G{+9x*TR&llBHtL+ayG(0#(M zhMDU9Kmu?30vo*>D(v_q@o3h~fiX24e9ckUC$?Mt2A@0}W?8X+%n{>d$ULs=(ZY#} zlWx`W6Ex$R)O9tfQ6>3dON-2t_MM>|B7>jm8n+K~<+}ilb%nz?pED4UhNsrIZi}`<$pI+RwT0i4G%VB|& z-P{0zsB#`7YI*NhGB-GYfb+j6IDd{4pJOi9$6`@$HQn4!lWBcIKX|%m2tO6mKbvx| zd3M-l$!~+}%9)zE$z)vH!;`i@OXsnTD@~livav`aG zMv{3r*@T%&&OL$r|835!2Fi^*Zo86km2*&>Y2$p7;bUv-G+-@l}=@Ip%Q zg%Opw7N2xPDgK2TEqiVYr4JmPgp~L^nGg51HP%c z;h&MTq&x)97_$jQoDN4^|CTVa%5w4XsxYo*le5jW7Q```9Jp#ySx3=PeiRx-8WnZY zcYjIWlSFlJ3@Mr&c!Vo)b$reKx^LAsv#AZ12zU=y5@=4Y{IoRU8b(y2ReaL1Z8`}A zh3604+4m!vQo75}fyQ?!ft;zB;c7apd0rF}WdPOyXGk=X%7m*12VCPYT%6X%&MM)m zJGNC-fyBFhd0ikur%= zDDobD()T1$a{1|c@oFq*Inn;=M#-`oE4Z>ssNzd@q$a*FqJ}teHC(SdmrPczAJd47 zsUdi28N3bb-cV6?ojB0g@F;MNGBUrZ6X*n}vBDorFWq^DSmSr}J| zNZ?wE&QMz*%H@O3jrl{QBzmv7QXA2z96>w@e3(CitJz=q{J zabt&w{?ZQcCM*D0wol5>t+I~m^~b63^g+tEzEHHKk&bpzXr#VNsdj>AB>>L2!c>o* zfzMZ!@wUJfIwqn~W+Zi7d0r+6(F#cNzr-L%=4R8U{Cw?wJ~H9j6CIk69RTa{kp{@B zDUsekt{lGcnZS}|T{M%hb!rL?IWY*F+ITN;#T_kfUXzA)MP{@ zt`w&03{}reyDHp_l{8YBg&O8I{0yNoH$3oibB*wAxE|Hw1PFtBBg; zb1e&u{BE=kHE=`%FE$F33G{{a<=}&q-K?yGMAkwIMo9F34-!q>z9_O)?|KPbO~`N% zJk&A~?fP@YgDhjWU$4MQLUT3$YJF8|Y$1Ve^End&XcUk+r=ykd-j2v8ptS{ZtBmjekO@TNv7hW_0Irc6a{MHr09$(oS#aG- zR?IfgeL98Ba=D>5p6G$WLbv(2p* zG=4GXCws+U|En28g&UO6o~zPS+!0p~3|y(G+#8F_@`$_b;3r)e;z}VJuKwXiV2iz~ zoE^kGJh!s0!)V$4fZ4#l`|5}FIr^#B-mN9{egX}n8WazR#CA_qqckms36Zq}qOdOG zHv$9K*c(e+;TSB8;lcN>xxZA8)zlAjg4XSJ#MLdV8>QR9{Q$?Sl{Ed^Yfx)J)u9s8 zgx=ej)%~y3NU0@o2@@i#$8eaJb70`g>23ABNTh8?8Q1PP`&~oS^_BOrS5^AT00wOY zi%iUO+ravKhvm-W?tW77p-6pArIn_|Fd;IMxbn%`vnv*RW2vKDCs69^@=aD2;Tj)) z6tdP26H~d-nWx0o7M>ZS=*-reEeO9SNhdaR0-!Ogtn?TsgYfAoyW(JPEMpMFbxPI` z(;VC7qrHWzZ8JUhy{Z~Zg@v^}e7zIZ>Cz(C4BoWwGuh^Qb(zNI% zMDi_MH7fEzvrV#dRzHkvI7Y<2P^srDZrWwuGim)W67$24NXO+5dsNo2u(p04`0a!= zXAemMbKM(xLvhcYO*C2G`kiBC;9Ouh;(cl}h&Kf32HCoK_PQkVZ*NdW%JOX@Gtcwl z3HHKC7E+F1;!Tn+;=kHL=-rm1B`>Cqg>Tu`irI+*xlP{7 znaFl&*YYNc{rD$1BXLWdw$A=#U!wExHqJq9HMi6&_o-Q3w+3Yg8H#DAo^0pb{N`$W zp1-6j2U=6uI4tNyfUJqlneth;YyH=M}b zrS5Js@4R~fJXwklJ$Sl&vXXt^iaqS>PJO{~_}kVG7@KSk`&jvw9FEJBTj1o(a5yAp zd5{xfg5nS#a6H8+Ea9+XZOrK;E=HVlak)FJb^`(Zo?E^{EG`dsdKx~v?)C25;y8=M z@)jO!VmPwpdsupzYc!w`G8hCu*e&IsoU?y&aM7l)QO|;h`y|IhB6xp+llX9EgnrpD zTKhsA*C}%OB9J&Nz>F2dIxQxS@X``veA*0~1x`tedwy^gP|AP>s%jf8D4k<}2pvN9 za6(XWzOl>hpJN5=h~r-0i!+=F1jf(G#<-Fe`OmNik>N$P%lb1aWI+D|IWG5h&qV zHOBw0sKWKU-8-~Js?2VYQ3afFhVMb>qF85uj^O-JKXzWgzt&tN~^w9h5 z(V^r5t))AqCnFaD^EJb?-6^&h%;j?C!D&qEifpP~GWKx<$gZ4C;%QOePu zT@aU|?RvG2$b7TdXSJigy-tYf97ohiRqa)~lp9EFPy|xDC+E2O4s~se1mi~#->MY) zEa9XvwgqbtIGlB#Ih2Vq^gk#)^gw%kAQXd}`fB3e17N~|KRlPzo#JuM9eY(km;U49 zlh8@z;i2PnP|HNYl-&+vqz3b|f=2Pp&ylasyx+C^WYy4cT-5?r6Q}(BYL{~B$zYYj z{e}dtD%G~!00n-c+nu#m-5gMia%kV!V21}z?CwD)bcD}%nUaUPLKaZbcK;+~1&ATx z@1cmo1-t9)1T;d#?ujsZhYpR&|;E=0?@Y!xm6HR++C~ZKq^%>&YOO@i4x!!aC{D`{5opo~U12 zjfu34gISK2I8oR|LjlBJd&GX2cTnX9{V-^?e{8ry?EE^Y91geJJLi#jxD)Q|h3ymQ zeAHSZq7NVb)Ze0+0Q;a}iK2&=qBGY|9Y<~p?^b%%!ZtGApXoPZq_+K*Xw}c1E%9AJPgNoYqnmF^TbULLJpkW(Na){D1SJ>k`O`KnONG(;MH5K7bdgvLb z#xJ=18_bN47IXx30A}^`O7tpAg$AyL6ujD&%H>wdpqKqYZW4$%AjUePkv_|Ns#OME zFo>W+Si6S|y|->KpoZ7=6^}98;J8e+eXSTj1TL*i*TPUM**uluq=Yr9!TL=jCV0Lp^ zZiNgY@&T?%3sCmr1jiDjfs*Zfy*SCw*cd7f*o;Fe7|izJ?ZWYOSx8f;sE`H6!zV`v zJm6sYy|z*2AutA0R6yWzer(+_aOHOQV`Rn|wZdx;=eoOkQcdwpMsONA(QD}xeE8=X zL`MQg6&-M>B!|uS?$i4#`~^lHgUdq6h^ynDtKFAdPX;;v!}#n_rgET!4o8?nvETy! zqJ=u65JJd>A<|O4%$B%f$kO7YopMLW0mp6ffOgrvdHyWJN*!Y~a5=?E#RSInQtvj1 zLPvehll^&YupCg`<0&NZsU5yRx%QKp<^oSvQ_iF4H=%k~Epog_2rsO5_Dyz1^_(87 z>Wf(4UbXvjD`n85@Lt_4l7buCoIkWT21yB{LtHtrwm7kV`NU3gYcU^ZyT%@j>t-sM-h4 zLU;9IBU*D4ytw+Xd*B1uMe!!>W1of19H@QKUWG3Y>KDUYzMaHP@3Wb9Hl~j!Y``@B z?Y_bQJK2E0d$k6*JTy`w+~V!j(ifj6WWcMumOq<9r>n5a4!#cQPHzL*Qklli5&uZ9 zm?_f-y2u_+Z{bi-V|m%D8vj+8W_4}iynK;uFKB7y90ao3+XG);=ro#5Z}DX&i}zoZ z7ZW30lg|Y*%+jx88220(WBQ-BX3A5<`Sr^;T|u5hZX4gCn29Q+F?sCW;|$*s(aQ;5GohJ46Fl-A6F&WNPLPwR_8-^uCV-c7%;MO5Av39+ z2S#U5mOLjzqG7wFH#tQ_9+c5tjYGSNE5Ca~XAOw)FPk4lSVD5%iHvu8rNIT8BAIt~S62?t@e5h;11Q6;+8zYWPgska=DWbzl zMP8+0#`Fcw!F2UW5utm0V5iQFf&H48t&d2I}HaW$_#Z(jd8CJfooVv-3n zg|y5O=8u}c`fIen;(SLC7%_!TR-q^m-o6S#EC+nfqz6yT`nw2@d41XZDL1mSNVK1l?uaRL zvI@psQpnuLAR<-*7^j-pj_UYuERCov$s_sokp&UtDDVhKVb)f-5XQiDU+}6=7D$Ga z+QL2-LQuu=sY%qS5j=(wdv`fW+-O4fM@fP`86^3DwizKJI>@LsxeiC~w2T>|?nC8(OT6s7?Dve6A8-dsISgHBMrk z@yC8tzqqPfl2&1Sh)g!X!c{T;th(US8Kgu`OKMDdi_-q%g2=s}rSV`~VnG}Wp<~fE zGKCuR_6uUU3Z$yfwbDZu_ByeiIuiS0sHG5F8fdX`qq7jThT`cF@;W6fwb{FDf$uG5G@n{Dj%!rU5Z6PtBv4--AlISD&HgcIRJk| zvSoJzAON&=pWvGMO0dAD)xmzL(Hz6Du4j0C#S;*!$q}`xo)%mn7st0Y*MYuTU-Mv< zdyg>OBT*KrS$(~vo;@u}2cM*(TLKK0Q!_x3*)P(3mME!Z4z>% literal 0 HcmV?d00001 diff --git a/osfmk/console/panic_ui/images/rendered_numbers.tiff b/osfmk/console/panic_ui/images/rendered_numbers.tiff new file mode 100644 index 0000000000000000000000000000000000000000..6deae082e5db6e3fe364200e69e24d3c14aab4eb GIT binary patch literal 3218 zcmeHIS8o$R5S|SX5WIjz6TCz`ScpT&7=?go3B3ivh29A<|7goy6?9^|{I_2fmji|7csNgdZwG61|ef;?0!~6H|-g(}e zH?Lp6diC<60f!j~_pJ^zh+>2lwyayLb2Qojaa)`}VC{H*em!asB$W zYgeycxpMjPrAwZ7@#2LG=g*%zclPX=GoNrypFVZ!>sb$NSE?u%@@!~~`9H*vc;lc$As;lSEud13iZ|>YVb7s$;HEZU~ z88fC&pEhmk)G1RYPo6ljveLFEOsJ?BKYrY}^764`$BY?0deo?qBS(xFK781)Awvca zE-Nc79W-d*zySmL_wUy)k?7mEPoI*K-o1;9lYjXT1pz}Yko=5hdwltl&pO*8pFWVi zeVa_~-o1MDq)9`EcIlE#68-M{iSo^#=>PnK9_Oc?7sx{{ko=5hPdA$n{dlgwa(*;_ zqCbEC-_PIk{=fg+X@pbHKfzr4?0e|TFE(2f@cE^IHl&T`Usj!}N)}brr0P?3HL2R7 zk{&UN$6~z_v3Me0T-389(JP)PjuEw{uhbO(ITZf*6YyH84Btu>P!mzNfK3yhhp{U3 zd1zIez`l8*YL{ixK9Ex@)gj=UHKS#bBICbI$Hz)#|s zxB)6{7*|F>+6SOG7Gb|F6I6^MSbi=5{g*&i#4%n9@U@HFB#=L>#sFk?9Go*b#Tto$GfI}(1$2m!N&BO@02*~b(&X-d*&i+4S&jZ5+e zmR3+v#C2VMQ|tm|?hbJ2**hR{@d_NXE285#d{CC@b*2oyIw)6~Z(v4nMHkqZ-v^J> zxG?~pg)F|d1(1pN9RST}g5LrB9B}veEAAjzFB};p4;yLFYY}r+qmx-XkfLtXg5yGm ZyRk?A*nz*K*he@Bq!F@!?DG!~eFIZAZo2>g literal 0 HcmV?d00001 diff --git a/osfmk/console/panic_ui/images/rendered_numbersWHD.raw b/osfmk/console/panic_ui/images/rendered_numbersWHD.raw new file mode 100644 index 0000000000000000000000000000000000000000..35290cd565b35a74d0ad11be0fb3d0a16b84e8c9 GIT binary patch literal 1425 zcmc(f%Z-#5r#3>lF z$JBL2MANM+E~AUUihP@qknujGZV}ziEs`5+=S_xCUY@1!TRD(Vr?-_c);g~OkP?+s z(XL1edCl@pK)Hkim!O}r9QqlEV!*T1L()e#o`Jjt7m)+a)*+?sx3WZ)mdDk7Z2*3+ zIBVny00Ov$vvXM?hs|l`86JCaqRqgcYZV zSe$4N>l1)yM{+8Sn0q<*pv9R9^4f^vRAJ#fqt`BoA&{B=^Buqk11|Hv1GPn%M9yer zRMiq!XV>rEXlV~@HJRd=bMj3QBIElG(E8sUa9(`{qjtqM@^I8vas6roaj0FBUoa1e zVgYBq4@O#H0ALpW3i2|b%J&aIg68=@puYpyj#Z0G`o^@tYJT!V5^u7tb$98n#sc literal 0 HcmV?d00001 diff --git a/osfmk/console/panic_ui/qtif2kraw.c b/osfmk/console/panic_ui/qtif2kraw.c new file mode 100644 index 000000000..6627e7eaa --- /dev/null +++ b/osfmk/console/panic_ui/qtif2kraw.c @@ -0,0 +1,892 @@ +/* converts a QT RAW 8-bit image file into format the kernel panic ui expects. + * + * to build: cc -o qtif2kraw qtif2kraw.c +*/ + +#include +#include +#include +#include +#include +#include + +int EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ); +int findIndexNearMatch( unsigned int color24 ); +unsigned int findColor24NearMatch( unsigned int color24 ); +unsigned char findIndexMatch( unsigned int color24 ); +int convert8toGrey( unsigned char * data, unsigned int size ); +int convert8bitIndexto8( unsigned char * data, int height, int width, unsigned char ** dout ); +unsigned int * CreateCLUTarry( unsigned char * raw_clut ); +unsigned int * ReplaceCLUT( char * iname ); + +#define offsetof(type, field) ((size_t)(&((type *)0)->field)) + +struct panicimage { + unsigned int pd_sum; + unsigned int pd_dataSize; + unsigned int pd_tag; + unsigned short pd_width; + unsigned short pd_height; + unsigned char pd_depth; + unsigned char pd_info_height; + unsigned char pd_info_color[2]; + unsigned char data[]; +}; + + +void +usage( int type ) { +printf( +"\n" +"Usage:\n" +"\tqtif2kraw -i <.qtif> -o <.kraw> [operands ...]\n\n" +"\tThe following operands are available\n\n" +"\t-h\t\tDisplay full help information\n" +"\t-i \tUse file containing QuickTime uncompressed raw image as\n" +"\t\t\tthe panic dialog (8 bit only)\n" +"\t-o \tWrite the output as a compressed kernel RAW image suitable\n" +"\t\t\tfor loading into the kernel\n" +"\t-c \tUse file containing 256 RGB values for 8-bit indexed \n" +"\t\t\tlookups, overrides built-in appleClut8\n" +"\t-fg \tForeground color of font used for panic information in\n" +"\t\t\t24-bits, default 0xFFFFFF (100%% white)\n" +"\t-bg \tBackground color of font used for panic information in\n" +"\t\t\t24-bits, default 0x222222 (13%% white, dark gray)\n" +"\t-n \tNumber of lines that have been reserved to display the\n" +"\t\t\tpanic information, must be at least 20\n" +"\n\n" ); +} + + +#include "appleclut8.h" +#include "../iso_font.c" + +struct QTHeader { + long idSize; /* total size of ImageDescription including extra data ( CLUTs and other per sequence data ) */ + long cType; /* 'raw '; what kind of codec compressed this data */ + long resvd1; /* reserved for Apple use */ + short resvd2; /* reserved for Apple use */ + short dataRefIndex; /* set to zero */ + short version; /* which version is this data */ + short revisionLevel; /* what version of that codec did this */ + long vendor; /* whose codec compressed this data */ + long temporalQuality; /* what was the temporal quality factor */ + long spatialQuality; /* what was the spatial quality factor */ + short width; /* how many pixels wide is this data */ + short height; /* how many pixels high is this data */ + long hRes; /* horizontal resolution */ + long vRes; /* vertical resolution */ + long dataSize; /* if known, the size of data for this image descriptor */ + short frameCount; /* number of frames this description applies to */ + char name[32]; /* name of codec ( in case not installed ) */ + short depth; /* what depth is this data (1-32) or ( 33-40 grayscale ) */ + short clutID; /* clut id or if 0 clut follows or -1 if no clut */ +} image_header; + +static unsigned int mismatchClut[256]; +static int nextmis = -1, neargrey = 0, cvt2grey = 0, exactmatch=0; +static int grey = 0, debug = 0; +static unsigned char fg, bg; +unsigned int * panic_clut = NULL; +static char * clutin = NULL; + +union colors { + unsigned int c24; + unsigned char rgb[4]; + struct { + unsigned char dummy; + unsigned char red; + unsigned char green; + unsigned char blue; + } clut; +}; + +int +main( int argc, char *argv[] ) +{ + char *file = NULL; + char *kraw = NULL; + FILE * stream; + unsigned char * data; + unsigned short width = 0, height = 0; + unsigned char depth = 0, lines = 20; + unsigned int i, pixels, sum, encodedSize, fg24= 0xFFFFFF, bg24=0x222222; + unsigned char *fileArr; + int next; + + + // pull apart the arguments + for( next = 1; next < argc; next++ ) + { + if (strcmp(argv[next], "-i") == 0) // image file in raw QT uncompressed format (.qtif) + file = argv[++next]; + + else if (strcmp(argv[next], "-o") == 0) // output file for WHD image + kraw = argv[++next]; + + else if (strcmp(argv[next], "-n") == 0) // numbers of reserved lines + lines = atoi(argv[++next]); + else if (strcmp(argv[next], "-fg") == 0) // foreground color in 24 bits + sscanf(argv[++next], "%i", &fg24); + else if (strcmp(argv[next], "-bg") == 0) // background color in 24 bits + sscanf(argv[++next], "%i", &bg24); + else if (strcmp(argv[next], "-c") == 0) // input file for clut + clutin = argv[++next]; + else if (strcmp(argv[next], "-h") == 0) // display more help + { usage(1); exit(1); } + + else if (strcmp(argv[next], "-debug") == 0) // verbose + debug++; + } + + if (!(file || kraw) ) { + usage(0); + exit(1); + } + + printf("\n"); + + panic_clut = appleClut8; + + if ( clutin ) + { + panic_clut = ReplaceCLUT( clutin ); + printf("Built-in CLUT has been replaced with %s...\n", clutin); + } + + fg = findIndexNearMatch(fg24); + bg = findIndexNearMatch(bg24); + + // Begin to process the image + + if( file == NULL) + { + printf("No image file was processed...\n\n"); + exit(0); + } + + + printf("Verifing image file...\n"); + if ( file != NULL ) + { + stream = fopen(file, "r"); + if (!stream) { + fprintf(stderr, "Err: could not open .qtif image file.\n\n"); + exit(1); + } + + { + long hdr_off; + long hdr_type; + int rc; + + if ( ! fread((void *) &hdr_off, sizeof(long), 1, stream) ) goto errQTimage; + if ( ! fread((void *) &hdr_type, sizeof(long), 1, stream) ) goto errQTimage; + + if ( hdr_type != 'idat' ) goto errQTimage; + + if ( fseek(stream, hdr_off, SEEK_SET) ) goto errQTimage; + if ( ! fread((void *) &hdr_off, sizeof(long), 1, stream) ) goto errQTimage; + if ( ! fread((void *) &hdr_type, sizeof(long), 1, stream) ) goto errQTimage; + + if ( hdr_type != 'idsc' ) goto errQTimage; + + rc = fread((void *) &image_header, sizeof(image_header), 1, stream); + if ( !rc && !feof(stream) ) goto errQTimage; + if ( image_header.cType != 'raw ' ) goto errQTimage; + if ( image_header.depth != 8 ) goto errQTimage; + + + width = image_header.width; + height = image_header.height; + depth = image_header.depth; + + printf("Image info: width: %d height: %d depth: %d...\n", width, height, depth); + + if (!(width && height && depth)) { + fprintf(stderr,"Err: Invalid image file header (width, height, or depth is 0)\n"); + exit(1); + } + } + + if ( !(data = (char *)malloc(image_header.dataSize))) { + fprintf(stderr,"Err: Couldn't malloc file data (%ld bytes)... bailing.\n", image_header.dataSize); + exit(1); + } + + // Read the image data + if ( fseek(stream, 8, SEEK_SET) ) goto errQTimage; + if ( ! fread((void *) data, image_header.dataSize, 1, stream) ) goto errQTimage; + fclose( stream ); + + pixels = image_header.dataSize; + + if ( grey == 1 ) + pixels = convert8toGrey( data, image_header.dataSize ); + + printf("Converting image file to 8 bit raw...\n"); + pixels = convert8bitIndexto8( data, height, width, &data ); + image_header.dataSize = pixels; + depth = 1; + + printf("Converted %d pixels%s...\n", pixels/depth, ((grey==1)?" to grayscale":"")); + if ( exactmatch > 0 ) + printf("Found %d color mathces in CLUT...\n", exactmatch); + if ( cvt2grey > 0 ) + printf("Converted %d colors to gray...\n", cvt2grey); + if ( neargrey > 0 ) + printf("Adjusted %d grays to best match...\n", neargrey); + if ( nextmis > 0 ) + printf("Total of %d seperate color mismatches...\n", nextmis); + } + + printf("Encoding image file...\n"); + + if (!(fileArr = (unsigned char *) malloc(pixels))) { + fprintf(stderr,"Err: Couldn't malloc fileArr (%d pixels)... bailing.\n", pixels); + exit(1); + } + + encodedSize = EncodeImage( data, pixels, fileArr ); + + if ( encodedSize >= pixels ) + { + printf("Skipping encoding...\n"); + } + + for (sum=0,i=0; i max ) + { + *depth = 1; + retc = 0; + goto Leave; + } + + nextP = (union RunData *) &data[*index]; + + if ( retc == 1 ) + { + // check current data against current depth + switch ( *depth ) + { + case 1: + if ( nextP->c[0] == currP->c[0] ) + goto Leave; + break; + case 2: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] ) + goto Leave; + break; + case 3: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] ) + goto Leave; + break; + case 4: + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] && + nextP->c[3] == currP->c[3] ) + goto Leave; + break; + } + + retc = 0; + goto Leave; + } + + // start of a new pattern match begine with depth = 1 + + if ( (*index+6) <= max ) + { + // We have at least 8 bytes left in the buffer starting from currP +#if 1 + nextP = (union RunData *) &data[*index+3]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] && + nextP->c[3] == currP->c[3] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] && + currP->c[1] == currP->c[2] && + currP->c[2] == currP->c[3] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 4 at %x\n", *index); + retc = 1; + *depth = 4; + *index += 3; + goto Leave; + } + + nextP = (union RunData *) &data[*index+2]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] && + nextP->c[2] == currP->c[2] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] && + currP->c[1] == currP->c[2] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 3 at %x\n", *index); + retc = 1; + *depth = 3; + *index += 2; + goto Leave; + } + + nextP = (union RunData *) &data[*index+1]; + if ( nextP->c[0] == currP->c[0] && + nextP->c[1] == currP->c[1] ) + { + // check if they are all the same value + if ( currP->c[0] == currP->c[1] ) + { // if so, leave at depth = 1 + retc = 1; + *depth = 1; + goto Leave; + } + + if (debug>2) printf("Found 2 at %x\n", *index); + retc = 1; + *depth = 2; + *index += 1; + goto Leave; + } + +#endif + nextP = (union RunData *) &data[*index]; + + } + + if ( nextP->c[0] == currP->c[0] ) + retc = 1; + else + retc = 0; + +Leave: + + if ( retc == 1 ) + *index += *depth; + + return retc; +} + +int +EncodeImage( unsigned char * data, int pixels, unsigned char * fileArr ) +{ + union RunData * currP, * norunP ; + int i, depth; + unsigned int filePos, run, nomatchrun; + + currP = NULL; + norunP = NULL; + nomatchrun = 0; + filePos = 0; // position in the file we're writing out + run = 1; + depth = 1; + + currP = (union RunData *)&data[0]; // start a new run + for (i=1; i 2 ) { + unsigned char * p = (unsigned char *)norunP; + + if( nomatchrun ) { + while (nomatchrun) { + int cnt; + + cnt = (nomatchrun > 127) ? 127 : nomatchrun; + fileArr[filePos++] = cnt; + nomatchrun -= cnt; + + while ( cnt-- ) + fileArr[filePos++] = *p++; + } + } + + filePos += encode_rle(fileArr, filePos, run, currP, depth); + + norunP = NULL; + } else { + nomatchrun+=run; + } + + currP = (union RunData *)&data[i]; // start a new run + + if( norunP == NULL ) { + nomatchrun = 0; + norunP = currP; + } + + depth = 1; // switch back to a single byte depth + run = 1; // thee is always at least one entry + i++; // point to next byte + } + } + + if( nomatchrun ) { + unsigned char * p = (unsigned char *)norunP; + while (nomatchrun) { + int cnt; + + cnt = (nomatchrun > 127) ? 127 : nomatchrun; + fileArr[filePos++] = cnt; + nomatchrun -= cnt; + + while ( cnt-- ) + fileArr[filePos++] = *p++; + } + } + + // write out any run that was in progress + if (run > 0) { + filePos += encode_rle(fileArr, filePos, run, currP, depth); + } + + return filePos; +} + +/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: + + The quantity is described in the first byte. If the MSB is zero, then the next seven bits + are the quantity. If the MSB is set, bits 0-3 of the quantity are in the least significant bits. + If bit 5 is set, then the quantity is further described in the next byte, where an additional + 7 bits (4-10) worth of quantity will be found. If the MSB of this byte is set, then an additional + 7 bits (11-17) worth of quantity will be found in the next byte. This repeats until the MSB of + a quantity byte is zero, thus ending the chain. + + The value is described in the first byte. If the MSB is zero, then the value is in the next byte. + If the MSB is set, then bits 5/6 describe the number of value bytes following the quantity bytes. + + encodings are: (q = quantity, v = value, c = quantity continues) + + Byte 1 Byte 2 Byte 3 Byte 4 Byte 5 Byte 6 Byte 7 Byte 8 + case 1: [ 0 q6-q0 ] [ v7-v0 ] + case 2: [ 1 0 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] + case 3: [ 1 0 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] + case 4: [ 1 1 0 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] + case 5: [ 1 1 1 c q3-q0 ] [ c q10-q4 ] [ c q17-q11 ] [ q24-q18 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] [ v7-v0 ] +*/ + +unsigned int +encode_length(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned int mask) +{ + unsigned char single_mask = 0x0F; + unsigned char double_mask = 0x7F; + unsigned int slots_used = 0; + + fileArr[filePos] = mask | (quantity & single_mask); // low bits (plus mask) + slots_used++; + + if (quantity >>= 4) { + fileArr[filePos++] |= 0x10; // set length continuation bit + fileArr[filePos] = quantity & double_mask; + slots_used++; + + while (quantity >>= 7) { + fileArr[filePos++] |= 0x80; // set length continuation bit + fileArr[filePos] = quantity & double_mask; + slots_used++; + } + } + + return slots_used; +} + + +unsigned int +encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, union RunData * value, int depth) +{ + unsigned char slots_used = 0; + + + switch ( depth ) { + case 1: + slots_used += encode_length( fileArr, filePos, quantity, 0x80 ); + fileArr[filePos+slots_used++] = value->c[0]; + break; + + case 2: + slots_used += encode_length( fileArr, filePos, quantity, 0xA0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + break; + + case 3: + slots_used += encode_length( fileArr, filePos, quantity, 0xC0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + fileArr[filePos+slots_used++] = value->c[2]; + break; + + case 4: + slots_used += encode_length( fileArr, filePos, quantity, 0xE0 ); + fileArr[filePos+slots_used++] = value->c[0]; + fileArr[filePos+slots_used++] = value->c[1]; + fileArr[filePos+slots_used++] = value->c[2]; + fileArr[filePos+slots_used++] = value->c[3]; + break; + } + + return slots_used; +} + + +int +findIndexNearMatch( unsigned int color24 ) +{ + union colors color8; + union colors clut8; + int isGrey = 0; + + color8.c24 = color24; + + if ( color8.clut.red == color8.clut.green && color8.clut.green == color8.clut.blue ) + isGrey = 1; + + if ( isGrey ) { + int i; + unsigned int bestIndex = 0, rel, bestMatch = -1; + + for (i=0; i<256; i++) { + clut8.c24 = panic_clut[i]; + + if ( clut8.clut.red != clut8.clut.green || clut8.clut.green != clut8.clut.blue ) + continue; + + if ( clut8.clut.red > color8.clut.red) continue; + rel = abs(color8.clut.red - clut8.clut.red); + if ( rel < bestMatch ) { + bestMatch = rel; + bestIndex = i; + } + } + + return bestIndex; + } + + // we must have a non-grey color + return -1; +} + +unsigned int +color24toGrey( unsigned int color24 ) +{ + float R, G, B; + float Grey; + union colors c; + unsigned char grey8; + unsigned int grey24; + + c.c24 = color24; + + R = (c.clut.red & 0xFF) ; + G = (c.clut.green & 0xFF) ; + B = (c.clut.blue & 0xFF) ; + + Grey = (R*.30) + (G*.59) + (B*.11); + grey8 = (unsigned char) ( Grey + .5); + grey24 = (grey8<<16) | (grey8<<8) | grey8; + return grey24; +} + + +int +convert8toGrey( unsigned char * data, unsigned int size ) +{ + int i; + unsigned int c24; + union colors c; + + for ( i=0; i c.rgb[2] && c.rgb[1] > c.rgb[3] ) + prim = 1; + else if ( c.rgb[2] > c.rgb[1] && c.rgb[2] > c.rgb[3] ) + prim = 2; + else if ( c.rgb[3] > c.rgb[1] && c.rgb[3] > c.rgb[2] ) + prim = 3; + else if ( c.rgb[1] == c.rgb[2] && c.rgb[1] == c.rgb[3] ) + prim = 0; // gray + else if ( c.rgb[1] == c.rgb[2] ) + prim = 0x12; // red green + else if ( c.rgb[1] == c.rgb[3] ) + prim = 0x13; // red blue + else if ( c.rgb[2] == c.rgb[3] ) + prim = 0x23; // green blue + else + printf("cannot tell color %06x\n", color24); + + last_c = color24; + last_p = prim; + + if ( prim == 0 || prim > 3 ) + { + last_co = -1; + return last_co; + } + + return -1; +} + + +unsigned char +findIndexMatch( unsigned int color24 ) +{ + int i; + unsigned char ri; + static unsigned char last = 0; + +retry: + if ( panic_clut[last] == color24 ) + { + exactmatch++; + return last; + } + + for (i=0; i<256; i++) + { + if ( panic_clut[i] == color24 ) { + last = i; + exactmatch++; + return last; + } + } + + if ( nextmis == -1 ) { + for (i=0; i<256; i++) mismatchClut[i] = -1; + nextmis = 0; + } + + i = findIndexNearMatch(color24); + + if ( i == -1 ) // found a color that is not grey + { + unsigned int colormatch = findColor24NearMatch( color24 ); + + if ( colormatch == -1 ) // cannot convert color + { + cvt2grey++; + if (debug>1) printf("color %06X not matched at all\n", color24); + color24 = color24toGrey(color24); + if (debug>1) printf("now grey %06X\n", color24); + } + else + color24 = colormatch; + + goto retry; + } + + if (debug>1) printf("color %06X now matched at %x\n", color24, i); + + ri = i; + + neargrey++; + + // keep track of missed repeats + for ( i=0; i= 256) ) + { + fprintf(stderr,"Err: Too many color mismatches detected with this CLUT\n"); + exit(1); + } + + return ri; +} + +/* + * Convert 8 bit mode to 8 bit, We have to strip off the alignment bytes + */ + +int +convert8bitIndexto8( unsigned char * data, int height, int width, unsigned char ** dout ) +{ + unsigned int row, col, i, i8, size, adj; + unsigned char index; + unsigned char * ddata; + union colors color24; + + adj=(4-(width%4))%4; // adjustment needed to strip off the word alignment padding + size = height * width; + ddata = (unsigned char *) calloc( size, 1); + + for (i8=0,row=0; row +#include +#include + +#define RUN_MAX 32767 + +void create_numbers_file( FILE *stream, char *outfile ); +unsigned int encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned char value); + +void +usage(void) { + printf("\nusage: setupdialog -i -oi -n -on \n"); + + printf("\nYou can supply a panic image file, a numbers file, or both. Input files\n"); + printf("must be in RAW format where each pixel is represented by an index into the\n"); + printf("MacOS X system CLUT. The first %d bytes must be the width, height, and depth\n", 3 * sizeof(short)); + printf("(in that order, %d bytes each).\n", sizeof(short)); + + printf("\nThe output files are generated C structures in the format the panic ui code\n"); + printf("expects (default output files are panic_image.c and rendered_numbers.c).\n\n"); +} + +int +main( int argc, char *argv[] ) +{ + int next; + char *file = NULL, *ptr, *out = NULL, *numsfile = NULL, *numsout = NULL; + FILE * stream, *out_stream; + int * data; + short width = 0, height = 0, depth = 0; + char word[2]; + char byte; + unsigned int i, pixels, filePos; + int err; + unsigned char *fileArr; + unsigned char nextP; + unsigned int count; + int currP; + int fd; + int pairs_this_line; + + + // pull apart the arguments + for( next = 1; next < argc; next++ ) + { + if (strcmp(argv[next], "-i") == 0) // image file (RAW/PICT?) + file = argv[++next]; + else if (strcmp(argv[next], "-n") == 0) // numbers/chars image file (RAW) + numsfile = argv[++next]; + else if (strcmp(argv[next], "-oi") == 0) // output file for image + out = argv[++next]; + else if (strcmp(argv[next], "-on") == 0) // output file for numbers + numsout = argv[++next]; + + /* perhaps we should just let the user specify the W/H rather than require the header */ + /* + else if (strcmp(argv[next], "-w") == 0) // image width (pixels) + width = strtoul(argv[++next], &ptr, 0); + else if (strcmp(argv[next], "-h") == 0) // image height (pixels) + width = strtoul(argv[++next], &ptr, 0); + */ + } + + if (!(numsfile || file)) { + usage(); + exit(1); + } + + if (!numsfile) { + printf("\nNo numbers file to process\n"); + } else { + stream = fopen(numsfile, "r"); + if (!stream) { + printf("bad nums infile.. bailing.\n"); + exit(1); + } + create_numbers_file( stream, numsout ); + fclose(stream); + } + + if( file == NULL) { + printf("\nNo image file to process\n"); + exit(1); + } + + stream = fopen(file, "r"); + if (!stream) { + printf("bad infile.. bailing.\n"); + exit(1); + } + + printf("\nReading image file...\n"); + + fread((void *) &width, sizeof(short), 1, stream); + printf("got width: %d\n", width); + fread((void *) &height, sizeof(short), 1, stream); + printf("got height: %d\n", height); + fread((void *) &depth, sizeof(short), 1, stream); + printf("got depth: %d\n", depth); + + if (!(width && height && depth)) { + printf("Invalid image file header (width, height, or depth is 0)\n"); + exit(1); + } + + pixels = width * height; + + if (!(fileArr = (unsigned char *) malloc(pixels))) { + printf("couldn't malloc fileArr (%d pixels)... bailing.\n", pixels); + exit(1); + } + + currP = -1; + count = 0; + filePos = 0; // position in the file we're writing out + + for (i=0; i < pixels; i++) { + nextP = fgetc(stream); + count++; + if (nextP == currP) { + if (count >= RUN_MAX) { + filePos += encode_rle(fileArr, filePos, count, (unsigned char) currP); + count = 0; + currP = -1; + } + } else { + if (currP != -1) { + filePos += encode_rle(fileArr, filePos, count-1, (unsigned char) currP); + } + currP = nextP; // start a new run + count = 1; + } + } + + // write out any run that was in progress + if (count > 0) { + filePos += encode_rle(fileArr, filePos, count, (unsigned char) currP); + } + + fclose( stream ); + + // now, generate the c file + + if ( out == NULL) + out = "panic_image.c"; + out_stream = fopen(out, "w"); + + if(out_stream == NULL) { + printf("couldn't open out file.. bailing\n"); + exit(1); + } + + pairs_this_line = 0; + + fprintf( out_stream, "/* generated c file */\n\n"); + fprintf( out_stream, "static const struct {\n"); + fprintf( out_stream, " unsigned int pd_width;\n"); + fprintf( out_stream, " unsigned int pd_height;\n"); + fprintf( out_stream, " unsigned int bytes_per_pixel; /* 1: CLUT, 3:RGB, 4:RGBA */\n"); + fprintf( out_stream, " unsigned char image_pixel_data[%#4.2x];\n", (filePos)); + + fprintf( out_stream, "} panic_dialog = {\n"); + fprintf( out_stream, "\t%d, ", width); /* panic dialog x */ + fprintf( out_stream, "%d, ", height); /* panic dialog y */ + fprintf( out_stream, "1,\n"); /* bytes per pixel */ + + for( i=0; i < filePos;) { + fprintf( out_stream, "0x%.2x,0x%.2x", fileArr[i], fileArr[i+1]); + i+=2; + pairs_this_line++; + + // if the first byte had a leading 1, this is a 3-byte encoding + if ((fileArr[i-2] >> 7) == 1) { + fprintf( out_stream, ",0x%.2x", fileArr[i++]); + pairs_this_line++; + } + + if (i >= filePos) // this is the last element + fprintf( out_stream, "\n};"); + else fprintf( out_stream, ", "); + + if(pairs_this_line > 8) { + fprintf( out_stream, "\n"); + pairs_this_line = 0; + } + } + + + fclose( out_stream ); + + return 0; +} + + +/* Each number/char (0-f) has its own row in the pixmap array. + When done, these rows each contain an RLE character. + The image file is read row by row, so the individual characters + must be constructed in the same way. The numPos array tracks the + current position in each character's RLE array. + */ +void +create_numbers_file( FILE *stream, char *outfile ) +{ + int err; + short height, depth, totalwidth; + int numbers = 17; + int width[17] = {9,7,8,6,9,7,8,7,8,7,10,7,9,10,7,6,4}; + int numPos[17] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + int **pixmap; + int row, col, item, line=0, currWidth; + int nextP, currP; + int count, currNum; + + FILE *out_stream; + + printf("\nReading numbers file...\n"); + fread((void *) &totalwidth, sizeof(short), 1, stream); + printf("got width: %d\n", totalwidth); + fread((void *) &height, sizeof(short), 1, stream); + printf("got height: %d\n", height); + fread((void *) &depth, sizeof(short), 1, stream); + printf("got depth: %d\n", depth); + + if (!(width && height && depth)) { + printf("Invalid numbers file header (width, height, or depth is 0)\n"); + return; + } + + // allocate array to hold each number's RLE encoding (20 = 2xwidest width[i] value, 17 = num chars) + pixmap = (int **) malloc( 17 * sizeof(int *) ); + for( item=0; item<17; item++) + pixmap[item] = (int *) malloc( 2*width[item]*height*sizeof(int) ); + + currP = -1; + count = 0; + currWidth = 0; + currNum = 0; + + for( row=0; row < height; row++) { + for( item=0; item < numbers; item++) { + count = 0; + currP = -1; // start each character fresh + for( col=0; col < width[item]; col++) { + nextP = fgetc( stream ); + if( nextP == currP) { + if( count == 127) { // probably never executed given the small widths + pixmap[item][numPos[item]] = count; + pixmap[item][numPos[item]+1] = currP; + numPos[item]+=2; + count = 0; + currP = -1; + } else count++; // add one to the current run + } else { + if( currP != -1) { + pixmap[item][numPos[item]] = count; // currP was the end of the run + pixmap[item][numPos[item]+1] = currP; + numPos[item]+=2; + } + currP = nextP; // start a new run + count = 1; + } + } + // write out any run that was in progress + if( count > 0) { + pixmap[item][numPos[item]] = count; + pixmap[item][numPos[item]+1] = currP; + numPos[item]+=2; + } + } + } + + // now, generate the c file + + if ( outfile == NULL) + outfile = "rendered_numbers.c"; + out_stream = fopen(outfile, "w"); + + if(out_stream == NULL) { + printf("couldn't open numbers outfile.. bailing\n"); + exit(1); + } + + fprintf( out_stream, " /* generated c file */\n\n"); + + // iterate through all the numbers/chars + for( item=0; item= width[item]) { + fprintf( out_stream, "\n"); + line = 0; + } + col+=2; + } + } + + fclose( out_stream ); +} + + +/* encode_rle applies a "modified-RLE encoding to a given image. The encoding works as follows: + + The quantity and value will be described by either two or three bytes. If the + most significant bit of the first byte is a 0, then the next seven bits are + the quantity (run-length) and the following 8 bits are the value (index into + a clut, in this case). If the msb of the first byte is a 1, then the next 15 bits + are the quantity and the following 8 are the value. Visually, the two possible + encodings are: (q = quantity, v = value) + + Byte 1 Byte 2 Byte 3 + case 1: [ 0 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] [ ] + case 2: [ 1 q14 q13 q12 a11 q10 q9 q8 ] [ q7 q6 q5 q4 q3 q2 q1 q0 ] [ v7 v6 v5 v4 v3 v2 v1 v0 ] +*/ + + +unsigned int +encode_rle(unsigned char * fileArr, unsigned int filePos, unsigned int quantity, unsigned char value) +{ + unsigned char single_mask = 0x00; + unsigned char double_mask = 0x80; + unsigned char slots_used = 0; + + if (quantity < 128) { + fileArr[filePos] = single_mask | quantity; + slots_used = 1; + } else { + fileArr[filePos] = double_mask | (quantity >> 8); // high 7 bits (plus mask) + fileArr[filePos+1] = (unsigned char) quantity; // low 8 bits + slots_used = 2; + } + + fileArr[filePos+slots_used] = value; + slots_used++; + + return slots_used; +} diff --git a/osfmk/console/panic_ui/systemCLUT.act b/osfmk/console/panic_ui/systemCLUT.act new file mode 100644 index 0000000000000000000000000000000000000000..0ad32f3ae5e6e8ce6d2b14c6cabaa2d1c0e42ea8 GIT binary patch literal 768 zcmc(d*Tr#A{5E(LsqB4adO374;@B&_;XZeoH&durK zT%3Ki_FDV*-}_=i2uGvQa5x+c2A`jw5Q1K>_wn)Z{{G(WcHiFKUSD53old*mZnatf zfM&DVXf$44Uh4Jw^Yio5(^IWhdwhI^06siCRIAlWrE-6NUoMwRrPAHq-Rhkh3nM@`UiHnPi^Yim~JRXb1 zqS5Ht*;ynK35Ua>P$(D-1_A-U-|zGJyk4)z<2gAwal73vm+ScW*y(gS91gqPZnN2r zj*hHWtHokDJUld;&HMZNCX>l%G#U&By5R+g8S zxm+%X!(p@8EEa2NX^F{XE-o%I7z{d{zOb+`KR-{S(Wq4F+}s?6LLrmMBob+6W@dVN znn)xP2n0MHkHg`xSS$vEL8H+q6bgw%PEAey`cDNQ{Jt4L@V)&P4Zt7d3F-^{00;da f2L5tSzu%vvr>Ca}2M4RGtFyDS2n1r{O#b;d!}L?2 literal 0 HcmV?d00001 diff --git a/osfmk/console/ppc/serial_console.c b/osfmk/console/ppc/serial_console.c index bcfbb62c6..080ba0d8f 100644 --- a/osfmk/console/ppc/serial_console.c +++ b/osfmk/console/ppc/serial_console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,11 +34,15 @@ #include /* spl definitions */ #include #include +#include +#include #include #include #include #include #include +#include +#include #include /* @@ -85,8 +89,9 @@ struct ppcbfr { /* Controls multiple processor output */ unsigned int echo; /* Control character echoing */ char buffer[256]; /* Fairly big buffer */ }; -typedef struct ppcbfr ppcbfr; -ppcbfr cbfr[NCPUS]; /* Get one of these for each processor */ +typedef struct ppcbfr ppcbfr_t; + +ppcbfr_t cbfr_boot_cpu; /* Get one for boot cpu */ volatile unsigned int cbfpend; /* A buffer is pending output */ volatile unsigned int sconowner=-1; /* Mark who's actually writing */ @@ -97,35 +102,61 @@ unsigned int cons_ops_index = CONS_OPS; unsigned int killprint = 0; unsigned int debcnputc = 0; extern unsigned int mappingdeb0; -extern int debugger_holdoff[NCPUS]; extern int debugger_cpu; +void *console_per_proc_alloc(boolean_t boot_processor) +{ + ppcbfr_t *cbfr_cpu; + + if (boot_processor) + cbfr_cpu = &cbfr_boot_cpu; + else { + cbfr_cpu = (ppcbfr_t *)kalloc(sizeof(ppcbfr_t)); + if (cbfr_cpu == (ppcbfr_t *)NULL) + return (void *)NULL; + } + bzero((char *)cbfr_cpu, sizeof(ppcbfr_t)); + return (void *)cbfr_cpu; +} + +void console_per_proc_free(void *per_proc_cbfr) +{ + if (per_proc_cbfr == (void *)&cbfr_boot_cpu) + return; + else + kfree(per_proc_cbfr, sizeof(ppcbfr_t)); +} + + static void _cnputc(char c) { cons_ops[cons_ops_index].putc(console_unit, console_chan, c); } void cnputcusr(char c) { /* Echo input character directly */ - - unsigned int cpu; - - cpu = cpu_number(); - - hw_atomic_add(&debugger_holdoff[cpu], 1); /* Don't allow debugger entry just now (this is a HACK) */ + struct per_proc_info *procinfo; + spl_t s; + s=splhigh(); + procinfo = getPerProc(); + + hw_atomic_add(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */ + _cnputc( c); /* Echo the character */ if(c=='\n') _cnputc( '\r'); /* Add a return if we had a new line */ - - hw_atomic_sub(&debugger_holdoff[cpu], 1); /* Don't allow debugger entry just now (this is a HACK) */ + + hw_atomic_sub(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */ + splx(s); return; } void cnputc(char c) { - - unsigned int oldpend, i, cpu, ourbit, sccpu; - spl_t s; + unsigned int oldpend, i, cpu, ourbit, sccpu; + struct per_proc_info *procinfo; + ppcbfr_t *cbfr, *cbfr_cpu; + spl_t s; #if MP_SAFE_CONSOLE @@ -139,9 +170,12 @@ cnputc(char c) return; /* If printing is disabled, bail... */ } - cpu = cpu_number(); + s=splhigh(); /* Don't bother me */ + procinfo = getPerProc(); + cpu = procinfo->cpu_number; + cbfr = procinfo->pp_cbfr; - hw_atomic_add(&debugger_holdoff[cpu], 1); /* Don't allow debugger entry just now (this is a HACK) */ + hw_atomic_add(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */ ourbit = 1 << cpu; /* Make a mask for just us */ if(debugger_cpu != -1) { /* Are we in the debugger with empty buffers? */ @@ -155,46 +189,46 @@ cnputc(char c) _cnputc( '\r'); /* Yeah, just add a return */ sconowner=-1; /* Mark it idle */ - hw_atomic_sub(&debugger_holdoff[cpu], 1); /* Don't allow debugger entry just now (this is a HACK) */ + hw_atomic_sub(&(procinfo->debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */ + splx(s); return; /* Leave... */ } - s=splhigh(); /* Don't bother me */ while(ourbit&cbfpend); /* We aren't "double buffered," so we'll just wait until the buffers are written */ isync(); /* Just in case we had to wait */ if(c) { /* If the character is not null */ - cbfr[cpu].buffer[cbfr[cpu].pos]=c; /* Fill in the buffer for our CPU */ - cbfr[cpu].pos++; /* Up the count */ - if(cbfr[cpu].pos > 253) { /* Is the buffer full? */ - cbfr[cpu].buffer[254]='\n'; /* Yeah, set the second to last as a LF */ - cbfr[cpu].buffer[255]='\r'; /* And the last to a CR */ - cbfr[cpu].pos=256; /* Push the buffer to the end */ + cbfr->buffer[cbfr->pos]=c; /* Fill in the buffer for our CPU */ + cbfr->pos++; /* Up the count */ + if(cbfr->pos > 253) { /* Is the buffer full? */ + cbfr->buffer[254]='\n'; /* Yeah, set the second to last as a LF */ + cbfr->buffer[255]='\r'; /* And the last to a CR */ + cbfr->pos=256; /* Push the buffer to the end */ c='\r'; /* Set character to a CR */ } } if(c == '\n') { /* Are we finishing a line? */ - cbfr[cpu].buffer[cbfr[cpu].pos]='\r'; /* And the last to a CR */ - cbfr[cpu].pos++; /* Up the count */ + cbfr->buffer[cbfr->pos]='\r'; /* And the last to a CR */ + cbfr->pos++; /* Up the count */ c='\r'; /* Set character to a CR */ } #if 1 - if(cbfr[cpu].echo == 1) { /* Did we hit an escape last time? */ + if(cbfr->echo == 1) { /* Did we hit an escape last time? */ if(c == 'K') { /* Is it a partial clear? */ - cbfr[cpu].echo = 2; /* Yes, enter echo mode */ + cbfr->echo = 2; /* Yes, enter echo mode */ } - else cbfr[cpu].echo = 0; /* Otherwise reset escape */ + else cbfr->echo = 0; /* Otherwise reset escape */ } - else if(cbfr[cpu].echo == 0) { /* Not in escape sequence, see if we should enter */ - cbfr[cpu].echo = 1; /* Set that we are in escape sequence */ + else if(cbfr->echo == 0) { /* Not in escape sequence, see if we should enter */ + cbfr->echo = 1; /* Set that we are in escape sequence */ } #endif - if((c == 0x00) || (c == '\r') || (cbfr[cpu].echo == 2)) { /* Try to push out all buffers if we see CR or null */ + if((c == 0x00) || (c == '\r') || (cbfr->echo == 2)) { /* Try to push out all buffers if we see CR or null */ while(1) { /* Loop until we see who's doing this */ oldpend=cbfpend; /* Get the currentest pending buffer flags */ @@ -203,19 +237,24 @@ cnputc(char c) } if(!hw_compare_and_store(-1, cpu, (unsigned int *)&sconowner)) { /* See if someone else has this, and take it if not */ - debugger_holdoff[cpu] = 0; /* Allow debugger entry (this is a HACK) */ + procinfo->debugger_holdoff = 0; /* Allow debugger entry (this is a HACK) */ splx(s); /* Let's take some 'rupts now */ return; /* We leave here, 'cause another processor is already writing the buffers */ } while(1) { /* Loop to dump out all of the finished buffers */ oldpend=cbfpend; /* Get the most current finished buffers */ - for(sccpu=0; sccpupp_cbfr == 0)) + continue; + + cbfr_cpu = PerProcTable[sccpu].ppe_vaddr->pp_cbfr; if(oldpend&(1<noprompt) { /* Don't prompt if there was not CR before */ _cnputc( '{'); /* Mark CPU number */ _cnputc( '0'+sccpu); /* Mark CPU number */ _cnputc( '.'); /* (TEST/DEBUG) */ @@ -225,19 +264,19 @@ cnputc(char c) } #endif - for(i=0; ipos; i++) { /* Do the whole buffer */ + _cnputc(cbfr_cpu->buffer[i]); /* Write it */ } - if(cbfr[sccpu].buffer[cbfr[sccpu].pos-1]!='\r') { /* Was the last character a return? */ - cbfr[sccpu].noprompt = 1; /* Remember not to prompt */ + if(cbfr_cpu->buffer[cbfr_cpu->pos-1]!='\r') { /* Was the last character a return? */ + cbfr_cpu->noprompt = 1; /* Remember not to prompt */ } - else { /* Last was a return */ - cbfr[sccpu].noprompt = 0; /* Otherwise remember to prompt */ - cbfr[sccpu].echo = 0; /* And clear echo */ + else { /* Last was a return */ + cbfr_cpu->noprompt = 0; /* Otherwise remember to prompt */ + cbfr_cpu->echo = 0; /* And clear echo */ } - cbfr[sccpu].pos=0; /* Reset the buffer pointer */ + cbfr_cpu->pos=0; /* Reset the buffer pointer */ while(!hw_compare_and_store(cbfpend, cbfpend&~(1<debugger_holdoff), 1); /* Don't allow debugger entry just now (this is a HACK) */ splx(s); /* Let's take some 'rupts now */ #else /* MP_SAFE_CONSOLE */ @@ -313,7 +352,10 @@ switch_to_old_console(int old_console) int -vcgetc(int l, int u, boolean_t wait, boolean_t raw) +vcgetc(__unused int l, + __unused int u, + __unused boolean_t wait, + __unused boolean_t raw) { char c; diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index f3e840dcb..9e00806ff 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -85,11 +85,17 @@ #include #include + +#include +#include #include #include #include -#include +#include + +#include #include + #include #include "iso_font.c" @@ -161,6 +167,8 @@ static int gc_par[MAXPARS], gc_numpars, gc_hanging_cursor, gc_attr, gc_saveattr; static char gc_tab_stops[255]; static int gc_scrreg_top, gc_scrreg_bottom; +enum { kProgressAcquireDelay = 5 /* secs */ }; + enum vt100state_e { ESnormal, /* Nothing yet */ ESesc, /* Got ESC */ @@ -316,9 +324,9 @@ gc_enable( boolean_t enable ) simple_unlock( &gc_buffer_lock ); splx( s ); - kfree( (vm_offset_t)buffer_attributes, buffer_size ); - kfree( (vm_offset_t)buffer_characters, buffer_size ); - kfree( (vm_offset_t)buffer_colorcodes, buffer_size ); + kfree( buffer_attributes, buffer_size ); + kfree( buffer_characters, buffer_size ); + kfree( buffer_colorcodes, buffer_size ); } else { @@ -344,9 +352,9 @@ gc_enable( boolean_t enable ) buffer_characters == NULL || buffer_colorcodes == NULL ) { - if ( buffer_attributes ) kfree( (vm_offset_t)buffer_attributes, buffer_size ); - if ( buffer_characters ) kfree( (vm_offset_t)buffer_characters, buffer_size ); - if ( buffer_colorcodes ) kfree( (vm_offset_t)buffer_colorcodes, buffer_size ); + if ( buffer_attributes ) kfree( buffer_attributes, buffer_size ); + if ( buffer_characters ) kfree( buffer_characters, buffer_size ); + if ( buffer_colorcodes ) kfree( buffer_colorcodes, buffer_size ); buffer_columns = 0; buffer_rows = 0; @@ -424,7 +432,7 @@ gc_initialize(struct vc_info * info) if ( gc_initialized == FALSE ) { /* Init our lock */ - simple_lock_init(&gc_buffer_lock, ETAP_IO_TTY); + simple_lock_init(&gc_buffer_lock, 0); gc_initialized = TRUE; } @@ -1550,7 +1558,7 @@ vc_render_font(short newdepth) return; /* nothing to do */ } if (vc_rendered_font) { - kfree((vm_offset_t)vc_rendered_font, vc_rendered_font_size); + kfree(vc_rendered_font, vc_rendered_font_size); } vc_rendered_char_size = ISO_CHAR_HEIGHT * ((newdepth / 8) * ISO_CHAR_WIDTH); @@ -1705,15 +1713,17 @@ static boolean_t vc_progress_enable; static const unsigned char * vc_clut; static const unsigned char * vc_clut8; static unsigned char vc_revclut8[256]; -static unsigned int vc_progress_tick; +static uint32_t vc_progress_interval; +static uint64_t vc_progress_deadline; +static thread_call_data_t vc_progress_call; static boolean_t vc_needsave; -static vm_address_t vc_saveunder; +static void * vc_saveunder; static vm_size_t vc_saveunder_len; decl_simple_lock_data(,vc_progress_lock) static void vc_blit_rect( int x, int y, int width, int height, const unsigned char * dataPtr, const unsigned char * alphaPtr, - vm_address_t backBuffer, boolean_t save, boolean_t static_alpha ); + void * backBuffer, boolean_t save, boolean_t static_alpha ); static void vc_blit_rect_8( int x, int y, int width, int height, const unsigned char * dataPtr, const unsigned char * alphaPtr, unsigned char * backBuffer, boolean_t save, boolean_t static_alpha ); @@ -1725,14 +1735,14 @@ static void vc_blit_rect_32( int x, int y, int width, int height, unsigned int * backBuffer, boolean_t save, boolean_t static_alpha ); extern void vc_display_icon( vc_progress_element * desc, const unsigned char * data ); extern void vc_progress_initialize( vc_progress_element * desc, const unsigned char * data, const unsigned char * clut ); -static void vc_progress_set( boolean_t enable, unsigned int initial_tick ); -static void vc_progress_task( void * arg ); +static void vc_progress_set( boolean_t enable, uint32_t delay ); +static void vc_progress_task( void * arg0, void * arg ); static void vc_blit_rect( int x, int y, int width, int height, const unsigned char * dataPtr, const unsigned char * alphaPtr, - vm_address_t backBuffer, + void * backBuffer, boolean_t save, boolean_t static_alpha ) { if(!vinfo.v_depth) @@ -1927,7 +1937,7 @@ void vc_display_icon( vc_progress_element * desc, x += ((vinfo.v_width - width) / 2); y += ((vinfo.v_height - height) / 2); } - vc_blit_rect( x, y, width, height, data, NULL, (vm_address_t) NULL, FALSE, TRUE ); + vc_blit_rect( x, y, width, height, data, NULL, NULL, FALSE, TRUE ); } } @@ -1936,6 +1946,8 @@ vc_progress_initialize( vc_progress_element * desc, const unsigned char * data, const unsigned char * clut ) { + uint64_t abstime; + if( (!clut) || (!desc) || (!data)) return; vc_clut = clut; @@ -1948,16 +1960,20 @@ vc_progress_initialize( vc_progress_element * desc, + vc_progress->count * vc_progress->width * vc_progress->height; else vc_progress_alpha = NULL; - vc_progress_tick = vc_progress->time * hz / 1000; - simple_lock_init(&vc_progress_lock, ETAP_IO_TTY); + thread_call_setup(&vc_progress_call, vc_progress_task, NULL); + + clock_interval_to_absolutetime_interval(vc_progress->time, 1000 * 1000, &abstime); + vc_progress_interval = abstime; + + simple_lock_init(&vc_progress_lock, 0); } static void -vc_progress_set( boolean_t enable, unsigned int initial_tick ) +vc_progress_set( boolean_t enable, uint32_t delay ) { spl_t s; - vm_address_t saveBuf = 0; + void *saveBuf = 0; vm_size_t saveLen = 0; unsigned int count; unsigned int index; @@ -1988,7 +2004,7 @@ vc_progress_set( boolean_t enable, unsigned int initial_tick ) } } } - memset( (void *) saveBuf, 0x01, saveLen ); + memset( saveBuf, 0x01, saveLen ); break; case 16 : @@ -2022,8 +2038,10 @@ vc_progress_set( boolean_t enable, unsigned int initial_tick ) vc_saveunder_len = saveLen; saveBuf = 0; saveLen = 0; - timeout(vc_progress_task, (void *) 0, - initial_tick ); + + clock_interval_to_deadline(delay, 1000 * 1000 * 1000 /*second scale*/, &vc_progress_deadline); + thread_call_enter_delayed(&vc_progress_call, vc_progress_deadline); + } else { if( vc_saveunder) { saveBuf = vc_saveunder; @@ -2031,7 +2049,8 @@ vc_progress_set( boolean_t enable, unsigned int initial_tick ) vc_saveunder = 0; vc_saveunder_len = 0; } - untimeout( vc_progress_task, (void *) 0 ); + + thread_call_cancel(&vc_progress_call); } } @@ -2042,7 +2061,7 @@ vc_progress_set( boolean_t enable, unsigned int initial_tick ) kfree( saveBuf, saveLen ); } -static void vc_progress_task( void * arg ) +static void vc_progress_task( void * arg0, void * arg ) { spl_t s; int count = (int) arg; @@ -2069,12 +2088,12 @@ static void vc_progress_task( void * arg ) y += ((vinfo.v_height - height) / 2); } vc_blit_rect( x, y, width, height, - NULL, data, vc_saveunder, + NULL, data, vc_saveunder, vc_needsave, (0 == (4 & vc_progress->flags)) ); vc_needsave = FALSE; - timeout( vc_progress_task, (void *) count, - vc_progress_tick ); + clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline); + thread_call_enter1_delayed(&vc_progress_call, (void *)count, vc_progress_deadline); } simple_unlock(&vc_progress_lock); splx(s); @@ -2096,109 +2115,6 @@ static unsigned int lastVideoPhys = 0; static unsigned int lastVideoVirt = 0; static unsigned int lastVideoSize = 0; -#ifdef __i386__ -void -initialize_screen(Boot_Video * boot_vinfo, unsigned int op) -{ - if ( boot_vinfo ) - { - vinfo.v_name[0] = 0; - vinfo.v_width = boot_vinfo->v_width; - vinfo.v_height = boot_vinfo->v_height; - vinfo.v_depth = boot_vinfo->v_depth; - vinfo.v_rowbytes = boot_vinfo->v_rowBytes; - vinfo.v_physaddr = boot_vinfo->v_baseAddr; - vinfo.v_baseaddr = vinfo.v_physaddr; - vinfo.v_type = boot_vinfo->v_display; - - if ( (vinfo.v_type == TEXT_MODE) ) - { - // Text mode setup by the booter. - gc_ops.initialize = tc_initialize; - gc_ops.enable = tc_enable; - gc_ops.paint_char = tc_paint_char; - gc_ops.clear_screen = tc_clear_screen; - gc_ops.scroll_down = tc_scroll_down; - gc_ops.scroll_up = tc_scroll_up; - gc_ops.hide_cursor = tc_hide_cursor; - gc_ops.show_cursor = tc_show_cursor; - gc_ops.update_color = tc_update_color; - } - else - - { - // Graphics mode setup by the booter. - gc_ops.initialize = vc_initialize; - gc_ops.enable = vc_enable; - gc_ops.paint_char = vc_paint_char; - gc_ops.scroll_down = vc_scroll_down; - gc_ops.scroll_up = vc_scroll_up; - gc_ops.clear_screen = vc_clear_screen; - gc_ops.hide_cursor = vc_reverse_cursor; - gc_ops.show_cursor = vc_reverse_cursor; - gc_ops.update_color = vc_update_color; - } - - gc_initialize(&vinfo); - -#ifdef GRATEFULDEBUGGER - GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re-initialize GratefulDeb */ -#endif /* GRATEFULDEBUGGER */ - } - - switch ( op ) - { - case kPEGraphicsMode: - panicDialogDesired = TRUE; - gc_graphics_boot = TRUE; - break; - - case kPETextMode: - panicDialogDesired = FALSE; - gc_graphics_boot = FALSE; - break; - - case kPEAcquireScreen: - if ( gc_acquired ) break; - - vc_progress_set( gc_graphics_boot, 2 * hz ); - gc_enable( !gc_graphics_boot ); - gc_acquired = TRUE; - break; - - case kPEEnableScreen: - /* deprecated */ - break; - - case kPETextScreen: - panicDialogDesired = FALSE; - if ( gc_acquired == FALSE ) break; - if ( gc_graphics_boot == FALSE ) break; - - vc_progress_set( FALSE, 0 ); - gc_enable( TRUE ); - break; - - case kPEDisableScreen: - /* deprecated */ - /* skip break */ - - case kPEReleaseScreen: - gc_acquired = FALSE; - gc_enable( FALSE ); - vc_progress_set( FALSE, 0 ); - - vc_clut8 = NULL; -#ifdef GRATEFULDEBUGGER - GratefulDebInit(0); /* Stop grateful debugger */ -#endif /* GRATEFULDEBUGGER */ - break; - } -#ifdef GRATEFULDEBUGGER - if ( boot_vinfo ) GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re initialize GratefulDeb */ -#endif /* GRATEFULDEBUGGER */ -} -#else void initialize_screen(Boot_Video * boot_vinfo, unsigned int op) { @@ -2249,7 +2165,11 @@ initialize_screen(Boot_Video * boot_vinfo, unsigned int op) vinfo.v_physaddr = (fbppage << 12) | (boot_vinfo->v_baseAddr & PAGE_MASK); /* Get the physical address */ } +#ifdef __i386__ + vinfo.v_type = boot_vinfo->v_display; +#else vinfo.v_type = 0; +#endif fbsize = round_page_32(vinfo.v_height * vinfo.v_rowbytes); /* Remember size */ @@ -2329,7 +2249,7 @@ initialize_screen(Boot_Video * boot_vinfo, unsigned int op) case kPEAcquireScreen: if ( gc_acquired ) break; - vc_progress_set( gc_graphics_boot, 2 * hz ); + vc_progress_set( gc_graphics_boot, kProgressAcquireDelay ); gc_enable( !gc_graphics_boot ); gc_acquired = TRUE; break; @@ -2366,7 +2286,6 @@ initialize_screen(Boot_Video * boot_vinfo, unsigned int op) if ( boot_vinfo ) GratefulDebInit((bootBumbleC *)boot_vinfo); /* Re initialize GratefulDeb */ #endif /* GRATEFULDEBUGGER */ } -#endif void refresh_screen(void) diff --git a/osfmk/ddb/Makefile b/osfmk/ddb/Makefile index 266125550..b0689e4fb 100644 --- a/osfmk/ddb/Makefile +++ b/osfmk/ddb/Makefile @@ -12,8 +12,6 @@ MIG_DEFS = \ MIG_HDRS = \ DATAFILES = \ - nlist.h stab.h \ - ${MIG_DEFS} MIGINCLUDES = \ diff --git a/osfmk/ddb/db_aout.c b/osfmk/ddb/db_aout.c index 05d507670..08c781adc 100644 --- a/osfmk/ddb/db_aout.c +++ b/osfmk/ddb/db_aout.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,9 +56,11 @@ /* * Symbol table routines for a.out format files. */ +#include #include #include #include /* data types */ +#include #include /* For strcpy(), strcmp() */ #include #include /* For db_printf() */ @@ -660,7 +662,7 @@ aout_db_search_symbol( db_strategy_t strategy, db_expr_t *diffp) /* in/out */ { - register unsigned long diff = *diffp; + db_expr_t diff = *diffp; register struct nlist *symp = 0; struct nlist *sp, *ep, *cp; boolean_t first_pass = FALSE; diff --git a/osfmk/ddb/db_break.c b/osfmk/ddb/db_break.c index 2f815cbc2..f4c841f17 100644 --- a/osfmk/ddb/db_break.c +++ b/osfmk/ddb/db_break.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -73,7 +73,7 @@ static int db_delete_thread_breakpoint( static db_thread_breakpoint_t db_find_thread_breakpoint( db_breakpoint_t bkpt, - thread_act_t thr_act); + thread_t thr_act); static void db_force_delete_breakpoint( db_breakpoint_t bkpt, @@ -192,11 +192,11 @@ db_delete_thread_breakpoint( static db_thread_breakpoint_t db_find_thread_breakpoint( db_breakpoint_t bkpt, - thread_act_t thr_act) + thread_t thr_act) { register db_thread_breakpoint_t tp; register task_t task = - (thr_act == THR_ACT_NULL) + (thr_act == THREAD_NULL) ? TASK_NULL : thr_act->task; for (tp = bkpt->threads; tp; tp = tp->tb_next) { @@ -286,7 +286,7 @@ db_check_breakpoint_valid(void) if ((tbp->tb_is_task && db_lookup_task((task_t)(tbp->tb_task_thd)) < 0) || (!tbp->tb_is_task && - db_lookup_act((thread_act_t)(tbp->tb_task_thd)) < 0)) { + db_lookup_act((thread_t)(tbp->tb_task_thd)) < 0)) { db_force_delete_breakpoint(bkpt, tbp->tb_task_thd, tbp->tb_is_task); } @@ -308,7 +308,7 @@ db_set_breakpoint( task_t task, db_addr_t addr, int count, - thread_act_t thr_act, + thread_t thr_act, boolean_t task_bpt) { register db_breakpoint_t bkpt; @@ -317,7 +317,7 @@ db_set_breakpoint( bkpt = db_find_breakpoint(task, addr); if (bkpt) { - if (thr_act == THR_ACT_NULL + if (thr_act == THREAD_NULL || db_find_thread_breakpoint(bkpt, thr_act)) { db_printf("Already set.\n"); return; @@ -325,12 +325,12 @@ db_set_breakpoint( } else { if (!DB_CHECK_ACCESS(addr, BKPT_SIZE, task)) { if (task) { - db_printf("Warning: non-resident page for breakpoint at %lX", - addr); + db_printf("Warning: non-resident page for breakpoint at %llX", + (unsigned long long)addr); db_printf(" in task %lX.\n", task); } else { - db_printf("Cannot set breakpoint at %lX in kernel space.\n", - addr); + db_printf("Cannot set breakpoint at %llX in kernel space.\n", + (unsigned long long)addr); return; } } @@ -340,7 +340,7 @@ db_set_breakpoint( return; } bkpt->task = task; - bkpt->flags = (task && thr_act == THR_ACT_NULL)? + bkpt->flags = (task && thr_act == THREAD_NULL)? (BKPT_USR_GLOBAL|BKPT_1ST_SET): 0; bkpt->address = addr; bkpt->threads = 0; @@ -436,7 +436,7 @@ db_set_breakpoints(void) register db_breakpoint_t bkpt; register task_t task; db_expr_t inst; - thread_act_t cur_act = current_act(); + thread_t cur_act = current_act(); task_t cur_task = (cur_act) ? cur_act->task : TASK_NULL; @@ -479,7 +479,7 @@ db_clear_breakpoints(void) register db_breakpoint_t bkpt, *bkptp; register task_t task; db_expr_t inst; - thread_act_t cur_act = current_act(); + thread_t cur_act = current_act(); task_t cur_task = (cur_act) ? cur_act->task: TASK_NULL; @@ -606,7 +606,7 @@ db_list_breakpoints(void) else db_printf("task%03d ", task_id); } else { - thread_act_t thd = (thread_act_t)(tp->tb_task_thd); + thread_t thd = (thread_t)(tp->tb_task_thd); task_id = db_lookup_task(thd->task); act_id = db_lookup_task_act(thd->task, thd); if (task_id < 0 || act_id < 0) @@ -661,7 +661,7 @@ void db_delete_cmd(void) { register int n; - thread_act_t thr_act; + thread_t thr_act; vm_offset_t task_thd; boolean_t user_global = FALSE; boolean_t task_bpt = FALSE; @@ -718,13 +718,13 @@ db_delete_cmd(void) user_space = TRUE; } if (!DB_VALID_ADDRESS(addr, user_space)) { - db_printf("Address %#X is not in %s space\n", addr, + db_printf("Address %#llX is not in %s space\n", (unsigned long long)addr, (user_space)? "user": "kernel"); db_error(0); } if (thd_bpt || task_bpt) { for (n = 0; db_get_next_act(&thr_act, n); n++) { - if (thr_act == THR_ACT_NULL) + if (thr_act == THREAD_NULL) db_error("No active thr_act\n"); if (task_bpt) { if (thr_act->task == TASK_NULL) @@ -736,7 +736,7 @@ db_delete_cmd(void) (db_addr_t)addr, task_thd); } } else { - db_delete_breakpoint(db_target_space(THR_ACT_NULL, user_space), + db_delete_breakpoint(db_target_space(THREAD_NULL, user_space), (db_addr_t)addr, 0); } } @@ -752,7 +752,7 @@ db_breakpoint_cmd( char * modif) { register int n; - thread_act_t thr_act; + thread_t thr_act; boolean_t user_global = db_option(modif, 'U'); boolean_t task_bpt = db_option(modif, 'T'); boolean_t user_space; @@ -776,7 +776,7 @@ db_breakpoint_cmd( if (user_space) db_error("Invalid user space address\n"); user_space = TRUE; - db_printf("%#X is in user space\n", addr); + db_printf("%#llX is in user space\n", (unsigned long long)addr); #ifdef ppc db_printf("kernel is from %#X to %#x\n", VM_MIN_KERNEL_ADDRESS, vm_last_addr); #else @@ -785,7 +785,7 @@ db_breakpoint_cmd( } if (db_option(modif, 't') || task_bpt) { for (n = 0; db_get_next_act(&thr_act, n); n++) { - if (thr_act == THR_ACT_NULL) + if (thr_act == THREAD_NULL) db_error("No active thr_act\n"); if (task_bpt && thr_act->task == TASK_NULL) db_error("No task\n"); @@ -794,13 +794,13 @@ db_breakpoint_cmd( db_error("Cannot set break point in inactive user space\n"); db_set_breakpoint(db_target_space(thr_act, user_space), (db_addr_t)addr, count, - (user_global)? THR_ACT_NULL: thr_act, + (user_global)? THREAD_NULL: thr_act, task_bpt); } } else { - db_set_breakpoint(db_target_space(THR_ACT_NULL, user_space), + db_set_breakpoint(db_target_space(THREAD_NULL, user_space), (db_addr_t)addr, - count, THR_ACT_NULL, FALSE); + count, THREAD_NULL, FALSE); } } diff --git a/osfmk/ddb/db_break.h b/osfmk/ddb/db_break.h index 2882408dd..1f3552348 100644 --- a/osfmk/ddb/db_break.h +++ b/osfmk/ddb/db_break.h @@ -198,7 +198,7 @@ void db_set_breakpoint( task_t task, db_addr_t addr, int count, - thread_act_t thr_act, + thread_t thr_act, boolean_t task_bpt); db_breakpoint_t db_find_breakpoint( diff --git a/osfmk/ddb/db_command.c b/osfmk/ddb/db_command.c index 4c8b4c56c..77832b4b5 100644 --- a/osfmk/ddb/db_command.c +++ b/osfmk/ddb/db_command.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,7 +57,6 @@ /* * Command dispatcher. */ -#include #include #ifdef AT386 #include @@ -405,9 +404,6 @@ db_command_list( extern void db_system_stats(void); struct db_command db_show_all_cmds[] = { -#if USLOCK_DEBUG - { "slocks", (db_func) db_show_all_slocks, 0, 0 }, -#endif /* USLOCK_DEBUG */ { "acts", db_show_all_acts, 0, 0 }, { "spaces", db_show_all_spaces, 0, 0 }, { "tasks", db_show_all_acts, 0, 0 }, @@ -448,7 +444,6 @@ struct db_command db_show_cmds[] = { { "kmsg", (db_func) ipc_kmsg_print, 0, 0 }, { "msg", (db_func) ipc_msg_print, 0, 0 }, { "ipc_port", db_show_port_id, 0, 0 }, - { "lock", (db_func)db_show_one_lock, 0, 0 }, #if NORMA_VM { "xmm_obj", (db_func) xmm_obj_print, 0, 0 }, { "xmm_reply", (db_func) xmm_reply_print, 0, 0 }, @@ -459,16 +454,16 @@ struct db_command db_show_cmds[] = { { "space", db_show_one_space, 0, 0 }, { "system", (db_func) db_system_stats, 0, 0 }, { "zone", db_show_one_zone, 0, 0 }, - { "simple_lock", db_show_one_simple_lock, 0, 0 }, + { "lock", (db_func)db_show_one_lock, 0, 0 }, + { "mutex_lock", (db_func)db_show_one_mutex, 0, 0 }, + { "simple_lock", (db_func)db_show_one_simple_lock, 0, 0 }, { "thread_log", (db_func)db_show_thread_log, 0, 0 }, { "shuttle", db_show_shuttle, 0, 0 }, { (char *)0, } }; -#if NCPUS > 1 #define db_switch_cpu kdb_on extern void db_switch_cpu(int); -#endif /* NCPUS > 1 */ struct db_command db_command_table[] = { #if DB_MACHINE_COMMANDS @@ -508,9 +503,7 @@ struct db_command db_command_table[] = { { "macro", (db_func) db_def_macro_cmd, CS_OWN, 0 }, { "dmacro", (db_func) db_del_macro_cmd, CS_OWN, 0 }, { "show", 0, 0, db_show_cmds }, -#if NCPUS > 1 { "cpu", (db_func) db_switch_cpu, 0, 0 }, -#endif /* NCPUS > 1 */ { "reboot", (db_func) db_reboot, 0, 0 }, #if defined(__ppc__) { "lt", db_low_trace, CS_MORE|CS_SET_DOT, 0 }, @@ -598,8 +591,8 @@ db_command_loop(void) boolean_t db_exec_cmd_nest( - char *cmd, - int size) + const char *cmd, + int size) { struct db_lex_context lex_context; @@ -616,7 +609,7 @@ db_exec_cmd_nest( } void -db_error(char *s) +db_error(const char *s) { extern int db_macro_level; @@ -697,8 +690,8 @@ db_fncall(void) boolean_t db_option( - char *modif, - int option) + const char *modif, + int option) { register char *p; diff --git a/osfmk/ddb/db_command.h b/osfmk/ddb/db_command.h index 9626a5091..8c8443506 100644 --- a/osfmk/ddb/db_command.h +++ b/osfmk/ddb/db_command.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,79 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * Revision 1.1.1.1 1998/09/22 21:05:47 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.15.1 1997/03/27 18:46:27 barbou - * Add #include so that DB_MACHINE_COMMANDS - * can be defined. - * Move here from db_commands.c the prototype for - * db_machine_commands_install(), referenced by PARAGON/model_dep.c. - * [97/02/25 barbou] - * - * Revision 1.1.9.2 1994/09/23 01:18:19 ezf - * change marker to not FREE - * [1994/09/22 21:09:33 ezf] - * - * Revision 1.1.9.1 1994/06/11 21:11:39 bolinger - * Merge up to NMK17.2. - * [1994/06/11 20:03:50 bolinger] - * - * Revision 1.1.7.1 1994/04/11 09:34:47 bernadat - * Added db_command struct decalration. - * [94/03/17 bernadat] - * - * Revision 1.1.2.3 1993/07/27 18:26:57 elliston - * Add ANSI prototypes. CR #9523. - * [1993/07/27 18:11:08 elliston] - * - * Revision 1.1.2.2 1993/06/02 23:10:38 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:56:00 jeffc] - * - * Revision 1.1 1992/09/30 02:24:14 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/10/09 15:58:45 af - * Revision 2.5.2.1 91/10/05 13:05:30 jeffreyh - * Added db_exec_conditional_cmd(), and db_option(). - * Deleted db_skip_to_eol(). - * [91/08/29 tak] - * - * Revision 2.5.2.1 91/10/05 13:05:30 jeffreyh - * Added db_exec_conditional_cmd(), and db_option(). - * Deleted db_skip_to_eol(). - * [91/08/29 tak] - * - * Revision 2.5 91/07/09 23:15:46 danner - * Grabbed up to date copyright. - * [91/07/08 danner] - * - * Revision 2.2 91/04/10 16:02:32 mbj - * Grabbed 3.0 copyright/disclaimer since ddb comes from 3.0. - * [91/04/09 rvb] - * - * Revision 2.3 91/02/05 17:06:15 mrt - * Changed to new Mach copyright - * [91/01/31 16:17:28 mrt] - * - * Revision 2.2 90/08/27 21:50:19 dbg - * Replace db_last_address_examined with db_prev, db_next. - * [90/08/22 dbg] - * Created. - * [90/08/07 dbg] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -169,13 +96,13 @@ void db_command_loop(void); void db_machine_commands_install(struct db_command *ptr); boolean_t db_exec_cmd_nest( - char *cmd, - int size); + const char *cmd, + int size); -void db_error(char *s); +void db_error(const char *s); boolean_t db_option( - char *modif, - int option); + const char *modif, + int option); #endif /* !_DDB_DB_COMMAND_H_ */ diff --git a/osfmk/ddb/db_examine.c b/osfmk/ddb/db_examine.c index 04bf996fa..49404a8d1 100644 --- a/osfmk/ddb/db_examine.c +++ b/osfmk/ddb/db_examine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -75,7 +75,7 @@ char db_examine_format[TOK_STRING_SIZE] = "x"; int db_examine_count = 1; db_addr_t db_examine_prev_addr = 0; -thread_act_t db_examine_act = THR_ACT_NULL; +thread_t db_examine_act = THREAD_NULL; extern int db_max_width; @@ -103,7 +103,7 @@ db_examine_cmd( db_expr_t count, char * modif) { - thread_act_t thr_act; + thread_t thr_act; extern char db_last_modifier[]; if (modif[0] != '\0') @@ -121,7 +121,7 @@ db_examine_cmd( if (db_option(modif,'u')) thr_act = current_act(); else - thr_act = THR_ACT_NULL; + thr_act = THREAD_NULL; db_examine_act = thr_act; db_examine((db_addr_t) addr, db_examine_format, count, @@ -248,7 +248,7 @@ db_examine( db_find_task_sym_and_offset(addr,&name,&off,task); if (off == 0) db_printf("\r%s:\n", name); - db_printf("%#n: ", addr); + db_printf("%#lln: ", (unsigned long long)addr); for (sz = 0; sz < leader; sz++) db_putchar(' '); db_prev = addr; @@ -267,9 +267,9 @@ db_examine( db_find_task_sym_and_offset( value, &symName, &offset, task); db_printf("\n\t*%8llX(%8llX) = %s", - next_addr, value, symName ); + (unsigned long long)next_addr, (unsigned long long)value, symName ); if( offset ) { - db_printf("+%llX", offset ); + db_printf("+%llX", (unsigned long long)offset ); } next_addr += size; } @@ -285,7 +285,7 @@ db_examine( value = db_get_task_value(next_addr, sizeof (db_expr_t), TRUE,task); - db_printf("%-*llr", width, value); + db_printf("%-*llr", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -295,7 +295,7 @@ db_examine( } value = db_get_task_value(next_addr, sz, TRUE, task); - db_printf("%-*llR", width, value); + db_printf("%-*llR", width, (unsigned long long)value); next_addr += sz; } break; @@ -312,9 +312,9 @@ db_examine( sizeof (db_expr_t), FALSE,task); if ( c == 'X') - db_printf("%0*llX ", 2*size, value); + db_printf("%0*llX ", 2*size, (unsigned long long)value); else - db_printf("%-*llx", width, value); + db_printf("%-*llx", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -325,9 +325,9 @@ db_examine( value = db_get_task_value(next_addr, sz, FALSE, task); if ( c == 'X') - db_printf("%0*llX ", 2*size, value); + db_printf("%0*llX ", 2*size, (unsigned long long)value); else - db_printf("%-*llX", width, value); + db_printf("%-*llX", width, (unsigned long long)value); next_addr += sz; } break; @@ -342,7 +342,7 @@ db_examine( value = db_get_task_value(next_addr, sizeof (db_expr_t), TRUE, task); - db_printf("%-*llz", width, value); + db_printf("%-*llz", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -352,7 +352,7 @@ db_examine( } value = db_get_task_value(next_addr,sz, TRUE,task); - db_printf("%-*llZ", width, value); + db_printf("%-*llZ", width, (unsigned long long)value); next_addr += sz; } break; @@ -367,7 +367,7 @@ db_examine( value = db_get_task_value(next_addr, sizeof (db_expr_t), TRUE,task); - db_printf("%-*lld", width, value); + db_printf("%-*lld", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -377,7 +377,7 @@ db_examine( } value = db_get_task_value(next_addr, sz, TRUE, task); - db_printf("%-*llD", width, value); + db_printf("%-*llD", width, (unsigned long long)value); next_addr += sz; } break; @@ -393,7 +393,7 @@ db_examine( value = db_get_task_value(next_addr, sizeof (db_expr_t), FALSE,task); - db_printf("%-*llu", width, value); + db_printf("%-*llu", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -403,7 +403,7 @@ db_examine( } value = db_get_task_value(next_addr, sz, FALSE, task); - db_printf("%-*llU", width, value); + db_printf("%-*llU", width, (unsigned long long)value); next_addr += sz; } break; @@ -418,7 +418,7 @@ db_examine( value = db_get_task_value(next_addr, sizeof (db_expr_t), FALSE,task); - db_printf("%-*llo", width, value); + db_printf("%-*llo", width, (unsigned long long)value); next_addr += sizeof (db_expr_t); } if (sz > 0) { @@ -428,7 +428,7 @@ db_examine( } value = db_get_task_value(next_addr, sz, FALSE, task); - db_printf("%-*llo", width, value); + db_printf("%-*llo", width, (unsigned long long)value); next_addr += sz; } break; @@ -441,9 +441,9 @@ db_examine( if ((value >= ' ' && value <= '~') || value == '\n' || value == '\t') - db_printf("%llc", value); + db_printf("%llc", (unsigned long long)value); else - db_printf("\\%03llo", value); + db_printf("\\%03llo", (unsigned long long)value); } break; case 's': /* null-terminated string */ @@ -456,9 +456,9 @@ db_examine( if (value == 0) break; if (value >= ' ' && value <= '~') - db_printf("%llc", value); + db_printf("%llc", (unsigned long long)value); else - db_printf("\\%03llo", value); + db_printf("\\%03llo", (unsigned long long)value); } break; case 'i': /* instruction */ @@ -529,32 +529,32 @@ db_print_cmd(void) task); break; case 'r': - db_printf("%11llr", value); + db_printf("%11llr", (unsigned long long)value); break; case 'X': - db_printf("%016llX", value); + db_printf("%016llX", (unsigned long long)value); break; case 'x': - db_printf("%016llx", value); + db_printf("%016llx", (unsigned long long)value); break; case 'z': - db_printf("%16llz", value); + db_printf("%16llz", (unsigned long long)value); break; case 'd': - db_printf("%11lld", value); + db_printf("%11lld", (unsigned long long)value); break; case 'u': - db_printf("%11llu", value); + db_printf("%11llu", (unsigned long long)value); break; case 'o': - db_printf("%16llo", value); + db_printf("%16llo", (unsigned long long)value); break; case 'c': value = value & 0xFF; if (value >= ' ' && value <= '~') - db_printf("%llc", value); + db_printf("%llc", (unsigned long long)value); else - db_printf("\\%03llo", value); + db_printf("\\%03llo", (unsigned long long)value); break; default: db_printf("Unknown format %c\n", db_print_format); @@ -603,7 +603,7 @@ db_search_cmd(void) db_expr_t value; db_expr_t mask; db_addr_t count; - thread_act_t thr_act; + thread_t thr_act; boolean_t thread_flag = FALSE; register char *p; @@ -670,7 +670,7 @@ db_search_cmd(void) if (!db_get_next_act(&thr_act, 0)) return; } else - thr_act = THR_ACT_NULL; + thr_act = THREAD_NULL; db_search(addr, size, value, mask, count, db_act_to_task(thr_act)); } @@ -690,7 +690,7 @@ db_search( break; addr += size; } - db_printf("0x%x: ", addr); + db_printf("0x%llx: ", (unsigned long long)addr); db_next = addr; } @@ -717,7 +717,7 @@ db_xcdump( db_printf("%s:\n", name); off = -1; } - db_printf("%0*llX:%s", 2*sizeof(db_addr_t), addr, + db_printf("%0*llX:%s", 2*sizeof(db_addr_t),(unsigned long long) addr, (size != 1) ? " " : "" ); bcount = ((n > DB_XCDUMP_NC)? DB_XCDUMP_NC: n); if (trunc_page_32(addr) != trunc_page_32(addr+bcount-1)) { @@ -730,7 +730,7 @@ db_xcdump( if (i % 4 == 0) db_printf(" "); value = db_get_task_value(addr, size, FALSE, task); - db_printf("%0*llX ", size*2, value); + db_printf("%0*llX ", size*2, (unsigned long long)value); addr += size; db_find_task_sym_and_offset(addr, &name, &off, task); } @@ -741,7 +741,7 @@ db_xcdump( db_printf("%s*", (size != 1)? " ": ""); for (i = 0; i < bcount; i++) { value = data[i]; - db_printf("%llc", (value >= ' ' && value <= '~')? value: '.'); + db_printf("%llc", (value >= ' ' && value <= '~')? (unsigned long long)value: (unsigned long long)'.'); } db_printf("*\n"); } diff --git a/osfmk/ddb/db_expr.c b/osfmk/ddb/db_expr.c index 72d649989..46bf7fc09 100644 --- a/osfmk/ddb/db_expr.c +++ b/osfmk/ddb/db_expr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -97,7 +97,7 @@ db_term(db_expr_t *valuep) if (db_allow_unprefixed_hexa && db_radix == 16 && db_tok_string) { char *cp; - int value; + db_expr_t value; value = 0; valid_hexa = TRUE; @@ -118,7 +118,7 @@ db_term(db_expr_t *valuep) db_printf("Ambiguous constant %x used as a symbol\n", value); } else { - *valuep = (db_expr_t)value; + *valuep = value; } } } diff --git a/osfmk/ddb/db_ext_symtab.c b/osfmk/ddb/db_ext_symtab.c index 04da32c41..58cba851d 100644 --- a/osfmk/ddb/db_ext_symtab.c +++ b/osfmk/ddb/db_ext_symtab.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,7 +57,7 @@ #include #include #include -#include /* prototype */ +#include /* prototype */ #if MACH_KDB && MACH_DEBUG #include @@ -77,11 +77,11 @@ */ kern_return_t host_load_symbol_table( - host_priv_t host_priv, - task_t task, - char * name, - pointer_t symtab, - mach_msg_type_number_t symtab_count) + __unused host_priv_t host_priv, + __unused task_t task, + __unused char * name, + __unused pointer_t symtab, + __unused mach_msg_type_number_t symtab_count) { return KERN_FAILURE; } diff --git a/osfmk/ddb/db_macro.c b/osfmk/ddb/db_macro.c index 6ea64bc83..5365f9c6d 100644 --- a/osfmk/ddb/db_macro.c +++ b/osfmk/ddb/db_macro.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -201,12 +201,12 @@ db_arg_variable( if (flag == DB_VAR_SHOW) { value = db_macro_args[ap->hidden_level][ap->suffix[0]-1]; - db_printf("%#lln", value); + db_printf("%#lln", (unsigned long long)value); db_find_xtrn_task_sym_and_offset(value, &name, &offset, TASK_NULL); if (name != (char *)0 && offset <= db_maxoff && offset != value) { db_printf("\t%s", name); if (offset != 0) - db_printf("+%#r", offset); + db_printf("+%#llr", (unsigned long long)offset); } return(0); } diff --git a/osfmk/ddb/db_output.c b/osfmk/ddb/db_output.c index 2079a572d..9be9c4eaf 100644 --- a/osfmk/ddb/db_output.c +++ b/osfmk/ddb/db_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -281,7 +281,7 @@ db_end_line(void) */ void -db_printf(char *fmt, ...) +db_printf(const char *fmt, ...) { va_list listp; @@ -293,7 +293,7 @@ db_printf(char *fmt, ...) /* alternate name */ void -kdbprintf(char *fmt, ...) +kdbprintf(const char *fmt, ...) { va_list listp; @@ -308,7 +308,7 @@ int db_indent = 0; * Printing (to console) with indentation. */ void -iprintf(char *fmt, ...) +iprintf(const char *fmt, ...) { va_list listp; register int i; diff --git a/osfmk/ddb/db_output.h b/osfmk/ddb/db_output.h index c1a398935..38a834fee 100644 --- a/osfmk/ddb/db_output.h +++ b/osfmk/ddb/db_output.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,71 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.12.2 1994/09/23 01:20:43 ezf - * change marker to not FREE - * [1994/09/22 21:10:36 ezf] - * - * Revision 1.2.12.1 1994/06/11 21:12:00 bolinger - * Merge up to NMK17.2. - * [1994/06/11 20:03:58 bolinger] - * - * Revision 1.2.10.2 1994/03/07 16:37:44 paire - * Added definition of indent. - * [94/02/17 paire] - * - * Revision 1.2.10.1 1994/02/08 10:58:14 bernadat - * Added db_reserve_output_position - * db_reset_more - * prototypes - * [94/02/07 bernadat] - * - * Revision 1.2.2.4 1993/08/11 22:12:12 elliston - * Add ANSI Prototypes. CR #9523. - * [1993/08/11 03:33:44 elliston] - * - * Revision 1.2.2.3 1993/07/27 18:27:52 elliston - * Add ANSI prototypes. CR #9523. - * [1993/07/27 18:12:35 elliston] - * - * Revision 1.2.2.2 1993/06/09 02:20:29 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:56:49 jeffc] - * - * Revision 1.2 1993/04/19 16:02:43 devrcs - * Changes from mk78: - * db_printf is void. - * [92/05/18 jfriedl] - * [93/02/03 bruel] - * - * Revision 1.1 1992/09/30 02:24:18 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 15:35:07 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:06:49 mrt - * Changed to new Mach copyright - * [91/01/31 16:18:48 mrt] - * - * Revision 2.2 90/08/27 21:51:32 dbg - * Created. - * [90/08/07 dbg] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -137,9 +72,9 @@ void db_force_whitespace(void); void db_putchar(char c); int db_print_position(void); void db_end_line(void); -void db_printf(char *fmt, ...); -void kdbprintf(char *fmt, ...); -void iprintf(char *fmt, ...); +void db_printf(const char *fmt, ...); +void kdbprintf(const char *fmt, ...); +void iprintf(const char *fmt, ...); boolean_t db_reserve_output_position(int len); void db_reset_more(void); void db_output_prompt(void); diff --git a/osfmk/ddb/db_print.c b/osfmk/ddb/db_print.c index 6a1696b4d..9bb1c9064 100644 --- a/osfmk/ddb/db_print.c +++ b/osfmk/ddb/db_print.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -90,11 +89,11 @@ */ char *db_act_stat( - register thread_act_t thr_act, + register thread_t thr_act, char *status); char *db_act_swap_stat( - register thread_act_t thr_act, + register thread_t thr_act, char *status); void db_print_task( @@ -113,12 +112,12 @@ void db_print_one_entry( ipc_space_t space); int db_port_iterate( - thread_act_t thr_act, + thread_t thr_act, boolean_t is_pset, boolean_t do_output); ipc_port_t db_lookup_port( - thread_act_t thr_act, + thread_t thr_act, int id); static void db_print_port_id( @@ -128,7 +127,7 @@ static void db_print_port_id( int n); void db_print_act( - thread_act_t thr_act, + thread_t thr_act, int act_id, int flag); @@ -162,15 +161,15 @@ db_show_regs( task_t task = TASK_NULL; aux_param.modif = modif; - aux_param.thr_act = THR_ACT_NULL; + aux_param.thr_act = THREAD_NULL; if (db_option(modif, 't')) { if (have_addr) { - if (!db_check_act_address_valid((thread_act_t)addr)) + if (!db_check_act_address_valid((thread_t)addr)) return; - aux_param.thr_act = (thread_act_t)addr; + aux_param.thr_act = (thread_t)addr; } else aux_param.thr_act = db_default_act; - if (aux_param.thr_act != THR_ACT_NULL) + if (aux_param.thr_act != THREAD_NULL) task = aux_param.thr_act->task; } for (regp = db_regs; regp < db_eregs; regp++) { @@ -187,13 +186,13 @@ db_show_regs( 12-strlen(regp->name)-((i<10)?1:2), ""); else db_printf("%-12s", regp->name); - db_printf("%#*llN", 2+2*sizeof(db_expr_t), value); + db_printf("%#*llN", 2+2*sizeof(db_expr_t), (unsigned long long)value); db_find_xtrn_task_sym_and_offset((db_addr_t)value, &name, &offset, task); if (name != 0 && offset <= db_maxoff && offset != value) { db_printf("\t%s", name); if (offset != 0) - db_printf("+%#r", offset); + db_printf("+%#llr", (unsigned long long)offset); } db_printf("\n"); } @@ -217,7 +216,7 @@ db_show_regs( char * db_act_stat( - register thread_act_t thr_act, + register thread_t thr_act, char *status) { register char *p = status; @@ -229,20 +228,13 @@ db_act_stat( *p++ = 'n', *p++ = 'g'; *p++ = ' '; - } else if (!thr_act->thread) { - *p++ = 'E', - *p++ = 'm', - *p++ = 'p', - *p++ = 't', - *p++ = 'y'; - *p++ = ' '; } else { - thread_t athread = thr_act->thread; + thread_t athread = thr_act; *p++ = (athread->state & TH_RUN) ? 'R' : '.'; *p++ = (athread->state & TH_WAIT) ? 'W' : '.'; *p++ = (athread->state & TH_SUSP) ? 'S' : '.'; - *p++ = (athread->state & TH_STACK_HANDOFF) ? 'O' : '.'; + *p++ = (!athread->kernel_stack) ? 'O' : '.'; *p++ = (athread->state & TH_UNINT) ? 'N' : '.'; /* show if the FPU has been used */ *p++ = db_act_fp_used(thr_act) ? 'F' : '.'; @@ -253,37 +245,10 @@ db_act_stat( char * db_act_swap_stat( - register thread_act_t thr_act, + register thread_t thr_act, char *status) { register char *p = status; - -#if THREAD_SWAPPER - switch (thr_act->swap_state & TH_SW_STATE) { - case TH_SW_UNSWAPPABLE: - *p++ = 'U'; - break; - case TH_SW_IN: - *p++ = 'I'; - break; - case TH_SW_GOING_OUT: - *p++ = 'G'; - break; - case TH_SW_WANT_IN: - *p++ = 'W'; - break; - case TH_SW_OUT: - *p++ = 'O'; - break; - case TH_SW_COMING_IN: - *p++ = 'C'; - break; - default: - *p++ = '?'; - break; - } - *p++ = (thr_act->swap_state & TH_SW_TASK_SWAPPING) ? 'T' : '.'; -#endif /* THREAD_SWAPPER */ *p++ = 0; return status; @@ -294,7 +259,7 @@ char *policy_list[] = { "TS", "RR", "??", "FF", void db_print_act( - thread_act_t thr_act, + thread_t thr_act, int act_id, int flag) { @@ -309,7 +274,7 @@ db_print_act( return; } - athread = thr_act->thread; + athread = thr_act; if (flag & OPTION_USER) { if (flag & OPTION_LONG) { @@ -322,7 +287,7 @@ db_print_act( policy = ((athread && (athread->sched_mode&TH_MODE_TIMESHARE))? 1: 2); db_printf("%s%3d%c %0*X %s %s %0*X %0*X %3d %3d/%s ", indent, act_id, - (thr_act == current_act())? '#': ':', + (thr_act == current_thread())? '#': ':', 2*sizeof(vm_offset_t), thr_act, db_act_stat(thr_act, status), db_act_swap_stat(thr_act, swap_status), @@ -345,7 +310,7 @@ db_print_act( } else db_printf(" "); db_printf("%3d%c(%0*X,%s)", act_id, - (thr_act == current_act())? '#': ':', + (thr_act == current_thread())? '#': ':', 2*sizeof(vm_offset_t), thr_act, db_act_stat(thr_act, status)); } @@ -362,9 +327,7 @@ db_print_act( (athread->state & TH_SUSP) ? 'S' : ' ', (athread->state & TH_UNINT)? 'N' : ' ', db_act_fp_used(thr_act) ? 'F' : ' '); - /* Obsolete TH_STACK_HANDOFF code, left for now; might enhance - * to print out safe_points instead */ - if (athread->state & TH_STACK_HANDOFF) { + if (!athread->kernel_stack) { if (athread->continuation) { db_printf("("); db_task_printsym((db_addr_t)athread->continuation, @@ -391,13 +354,13 @@ db_print_task( int task_id, int flag) { - thread_act_t thr_act; + thread_t thr_act; int act_id; char sstate; if (flag & OPTION_USER) { if (flag & OPTION_TASK_TITLE) { - db_printf(" ID: TASK MAP THD RES SUS PR SW %s", + db_printf(" ID: TASK MAP THD SUS PR SW %s", DB_TASK_NAME_TITLE); if ((flag & OPTION_LONG) == 0) db_printf(" ACTS"); @@ -428,10 +391,10 @@ db_print_task( sstate = 'I'; #endif /* TASK_SWAPPER */ /*** ??? fix me ***/ - db_printf("%3d: %0*X %0*X %3d %3d %3d %2d %c ", + db_printf("%3d: %0*X %0*X %3d %3d %2d %c ", task_id, 2*sizeof(vm_offset_t), task, 2*sizeof(vm_offset_t), task->map, - task->thread_count, task->res_thread_count, + task->thread_count, task->suspend_count, task->priority, sstate); @@ -443,7 +406,7 @@ db_print_task( } else if (task->thread_count <= 1) flag &= ~OPTION_INDENT; act_id = 0; - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { + queue_iterate(&task->threads, thr_act, thread_t, task_threads) { db_print_act(thr_act, act_id, flag); flag &= ~OPTION_THREAD_TITLE; act_id++; @@ -469,7 +432,7 @@ db_print_task( flag &= ~OPTION_INDENT; act_id = 0; queue_iterate(&task->threads, thr_act, - thread_act_t, task_threads) { + thread_t, task_threads) { db_print_act(thr_act, act_id++, flag); flag &= ~OPTION_THREAD_TITLE; } @@ -484,7 +447,7 @@ db_print_space( int flag) { ipc_space_t space; - thread_act_t act = (thread_act_t)queue_first(&task->threads); + thread_t act = (thread_t)queue_first(&task->threads); int count; count = 0; @@ -538,13 +501,13 @@ db_show_one_task_vm( db_expr_t count, char *modif) { - thread_act_t thread; + thread_t thread; task_t task; int task_id; if (have_addr == FALSE) { - if ((thread = db_default_act) == THR_ACT_NULL) { - if ((thread = current_act()) == THR_ACT_NULL) { + if ((thread = db_default_act) == THREAD_NULL) { + if ((thread = current_thread()) == THREAD_NULL) { db_printf("no thread.\n"); return; } @@ -638,7 +601,7 @@ db_show_one_space( task = (task_t) addr; if ((task_id = db_lookup_task(task)) < 0) { - db_printf("bad task address 0x%x\n", addr); + db_printf("bad task address 0x%llx\n", (unsigned long long)addr); db_error(0); /*NOTREACHED*/ } @@ -701,7 +664,7 @@ db_show_one_act( { int flag; int act_id; - thread_act_t thr_act; + thread_t thr_act; flag = OPTION_THREAD_TITLE; if (db_option(modif, 'u')) @@ -710,16 +673,16 @@ db_show_one_act( flag |= OPTION_LONG; if (!have_addr) { - thr_act = current_act(); - if (thr_act == THR_ACT_NULL) { + thr_act = current_thread(); + if (thr_act == THREAD_NULL) { db_error("No thr_act\n"); /*NOTREACHED*/ } } else - thr_act = (thread_act_t) addr; + thr_act = (thread_t) addr; if ((act_id = db_lookup_act(thr_act)) < 0) { - db_printf("bad thr_act address %#llX\n", addr); + db_printf("bad thr_act address %#llX\n", (unsigned long long)addr); db_error(0); /*NOTREACHED*/ } @@ -735,12 +698,12 @@ db_show_one_act( 2*sizeof(vm_offset_t), thr_act->task, act_id); db_print_act(thr_act, act_id, flag); } - if (db_option(modif, 'i') && thr_act->thread && - (thr_act->thread->state & TH_WAIT) && - thr_act->thread->kernel_stack == 0) { + if (db_option(modif, 'i') && + (thr_act->state & TH_WAIT) && + thr_act->kernel_stack == 0) { db_printf("Wait State: option 0x%x\n", - thr_act->thread->ith_option); + thr_act->ith_option); } } @@ -771,7 +734,7 @@ db_show_one_task( task = (task_t) addr; if ((task_id = db_lookup_task(task)) < 0) { - db_printf("bad task address 0x%llX\n", addr); + db_printf("bad task address 0x%llX\n", (unsigned long long)addr); db_error(0); /*NOTREACHED*/ } @@ -786,37 +749,21 @@ db_show_shuttle( db_expr_t count, char * modif) { - thread_t shuttle; - thread_act_t thr_act; + thread_t thread; if (have_addr) - shuttle = (thread_t) addr; + thread = (thread_t) addr; else { - thr_act = current_act(); - if (thr_act == THR_ACT_NULL) { - db_error("No thr_act\n"); - /*NOTREACHED*/ - } - shuttle = thr_act->thread; - if (shuttle == THREAD_NULL) { - db_error("No shuttle associated with current thr_act\n"); + thread = current_thread(); + if (thread == THREAD_NULL) { + db_error("No thread\n"); /*NOTREACHED*/ } } - db_printf("shuttle %x:\n", shuttle); - if (shuttle->top_act == THR_ACT_NULL) - db_printf(" no activations\n"); - else { - db_printf(" activations:"); - for (thr_act = shuttle->top_act; thr_act != THR_ACT_NULL; - thr_act = thr_act->lower) { - if (thr_act != shuttle->top_act) - printf(" from"); - printf(" $task%d.%d(%x)", db_lookup_task(thr_act->task), - db_lookup_act(thr_act), thr_act); - } - db_printf("\n"); - } + db_printf("thread %x:\n", thread); + printf(" $task%d.%d(%x)", db_lookup_task(thread->task), + db_lookup_act(thread), thread); + db_printf("\n"); } int @@ -880,7 +827,7 @@ db_print_one_entry( int db_port_iterate( - thread_act_t thr_act, + thread_t thr_act, boolean_t is_pset, boolean_t do_output) { @@ -920,13 +867,13 @@ db_port_iterate( ipc_port_t db_lookup_port( - thread_act_t thr_act, + thread_t thr_act, int id) { register ipc_space_t space; register ipc_entry_t entry; - if (thr_act == THR_ACT_NULL) + if (thr_act == THREAD_NULL) return(0); space = thr_act->task->itk_space; if (id < 0 || id >= space->is_table_size) @@ -958,16 +905,16 @@ db_show_port_id( db_expr_t count, char * modif) { - thread_act_t thr_act; + thread_t thr_act; if (!have_addr) { - thr_act = current_act(); - if (thr_act == THR_ACT_NULL) { + thr_act = current_thread(); + if (thr_act == THREAD_NULL) { db_error("No thr_act\n"); /*NOTREACHED*/ } } else - thr_act = (thread_act_t) addr; + thr_act = (thread_t) addr; if (db_lookup_act(thr_act) < 0) { db_printf("Bad thr_act address 0x%llX\n", addr); db_error(0); @@ -1008,7 +955,6 @@ db_show_runq( run_queue_t runq; boolean_t showedany = FALSE; -#if NCPUS > 1 /* This code has not been tested. */ queue_iterate(&pset->processors, proc, processor_t, processors) { runq = &proc->runq; if (runq->count > 0) { @@ -1017,10 +963,6 @@ db_show_runq( showedany = TRUE; } } -#endif /* NCPUS > 1 */ -#ifndef NCPUS -#error NCPUS undefined -#endif runq = &pset->runq; if (runq->count > 0) { db_printf("PROCESSOR SET %x\n", pset); @@ -1035,9 +977,8 @@ void db_show_one_runq( run_queue_t runq) { - int i, task_id, thr_act_id; + int i, task_id, thread_id; queue_t q; - thread_act_t thr_act; thread_t thread; task_t task; @@ -1046,11 +987,10 @@ db_show_one_runq( if (!queue_empty(q)) { db_printf("%3d:", i); queue_iterate(q, thread, thread_t, links) { - thr_act = thread->top_act; - task = thr_act->task; + task = thread->task; task_id = db_lookup_task(task); - thr_act_id = db_lookup_task_act(task, thr_act); - db_printf(" %d.%d", task_id, thr_act_id); + thread_id = db_lookup_task_act(task, thread); + db_printf(" %d.%d", task_id, thread_id); } db_printf("\n"); } diff --git a/osfmk/ddb/db_sym.c b/osfmk/ddb/db_sym.c index 21b597522..b6c01ca52 100644 --- a/osfmk/ddb/db_sym.c +++ b/osfmk/ddb/db_sym.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -650,7 +650,7 @@ db_search_task_symbol( db_addr_t *offp, /* better be unsigned */ task_t task) { - unsigned long diff, newdiff; + db_addr_t diff, newdiff; register int i; db_symtab_t *sp; db_sym_t ret = DB_SYM_NULL, sym; @@ -660,16 +660,16 @@ db_search_task_symbol( task = db_current_task(); map_for_val = (task == TASK_NULL)? VM_MAP_NULL: task->map; again: - newdiff = diff = ~0UL; + newdiff = diff = -1; db_last_symtab = 0; for (sp = &db_symtabs[0], i = 0; i < db_nsymtab; sp++, i++) { - if (((vm_map_t)sp->map_pointer == VM_MAP_NULL || - (vm_map_t)sp->map_pointer == map_for_val) && - (sp->maxsym == 0 || - ((unsigned long) val >= sp->minsym && - (unsigned long) val <= sp->maxsym))) { + if ((((vm_map_t)sp->map_pointer == VM_MAP_NULL) || + ((vm_map_t)sp->map_pointer == map_for_val)) && + ((sp->maxsym == 0) || + ((val >= (db_addr_t)sp->minsym) && + (val <= (db_addr_t)sp->maxsym)))) { sym = X_db_search_symbol(sp, val, strategy, (db_expr_t *)&newdiff); if (newdiff < diff) { @@ -704,7 +704,7 @@ db_search_task_symbol_and_line( task_t task, int *argsp) { - unsigned long diff, newdiff; + db_addr_t diff, newdiff; register int i; db_symtab_t *sp; db_sym_t ret = DB_SYM_NULL, sym; @@ -728,24 +728,25 @@ db_search_task_symbol_and_line( for (sp = &db_symtabs[0], i = 0; i < db_nsymtab; sp++, i++) { - if (((vm_map_t)sp->map_pointer == VM_MAP_NULL || - (vm_map_t)sp->map_pointer == map_for_val) && - (sp->maxsym == 0 || - ((unsigned long) val >= sp->minsym && - (unsigned long) val <= sp->maxsym))) { - sym = X_db_search_by_addr(sp, val, &filename, &func, - &linenum, (db_expr_t *)&newdiff, - &args); - if (sym && newdiff < diff) { - db_last_symtab = sp; - diff = newdiff; - ret = sym; - *filenamep = filename; - *linenump = linenum; - *argsp = args; - if (diff <= db_search_maxoff) - break; - } + if ((((vm_map_t)sp->map_pointer == VM_MAP_NULL) || + ((vm_map_t)sp->map_pointer == map_for_val)) && + ((sp->maxsym == 0) || + ((val >= (db_addr_t)sp->minsym) && + (val <= (db_addr_t)sp->maxsym)))) { + + sym = X_db_search_by_addr(sp, val, &filename, &func, + &linenum, (db_expr_t *)&newdiff, + &args); + if (sym && newdiff < diff) { + db_last_symtab = sp; + diff = newdiff; + ret = sym; + *filenamep = filename; + *linenump = linenum; + *argsp = args; + if (diff <= db_search_maxoff) + break; + } } } if (ret == DB_SYM_NULL && map_for_val != VM_MAP_NULL) { @@ -808,11 +809,11 @@ db_symbol_values( void db_task_printsym( - db_expr_t off, + db_addr_t off, db_strategy_t strategy, task_t task) { - db_addr_t d; + db_expr_t d; char *filename; char *name; db_expr_t value; @@ -820,19 +821,19 @@ db_task_printsym( db_sym_t cursym; if (off >= db_maxval || off < db_minval) { - db_printf("%#n", off); + db_printf("%#lln", (unsigned long long)off); return; } cursym = db_search_task_symbol(off, strategy, &d, task); db_symbol_values(0, cursym, &name, &value); if (name == 0 || d >= db_maxoff || value == 0) { - db_printf("%#n", off); + db_printf("%#lln",(unsigned long long) off); return; } db_printf("%s", name); if (d) - db_printf("+0x%x", d); + db_printf("+%llx", (unsigned long long)d); if (strategy == DB_STGY_PROC) { if (db_line_at_pc(cursym, &filename, &linenum, off)) { db_printf(" [%s", filename); @@ -908,7 +909,7 @@ db_task_getlinenum( db_strategy_t strategy = DB_STGY_PROC; if (off >= db_maxval || off < db_minval) { - db_printf("%#n", off); + db_printf("%#lln", (unsigned long long)off); return(-1); } cursym = db_search_task_symbol(off, strategy, &d, task); @@ -1299,7 +1300,6 @@ db_clone_symtabXXX( char * memp; vm_size_t size; long offset; - extern vm_offset_t kalloc(vm_size_t); extern void db_clone_offsetXXX(char *, long); if (db_nsymtab >= MAXNOSYMTABS) { @@ -1316,7 +1316,7 @@ db_clone_symtabXXX( } /* alloc new symbols */ size = (vm_size_t)(st_src->end - st_src->private); - memp = (char *)kalloc( round_page_32(size) ); + memp = (char *)kalloc( round_page(size) ); if (!memp) { db_printf("db_clone_symtab: no memory for symtab\n"); return; @@ -1402,7 +1402,7 @@ static db_sym_t no_search( db_strategy_t strategy, db_expr_t *diffp) { - db_printf("Bogus search for offset %#Xn", off); + db_printf("Bogus search for offset %#llXn", (unsigned long long)off); return DB_SYM_NULL; } @@ -1413,7 +1413,7 @@ static boolean_t no_line_at_pc( int *line, db_expr_t pc) { - db_printf("Bogus search for pc %#X\n", pc); + db_printf("Bogus search for pc %#llX\n", (unsigned long long)pc); return FALSE; } @@ -1436,7 +1436,7 @@ static db_sym_t no_search_by_addr( db_expr_t *diffp, int *args) { - db_printf("Bogus search for address %#X\n", off); + db_printf("Bogus search for address %#llX\n", (unsigned long long)off); return DB_SYM_NULL; } diff --git a/osfmk/ddb/db_sym.h b/osfmk/ddb/db_sym.h index e5d19a4d9..4252e069b 100644 --- a/osfmk/ddb/db_sym.h +++ b/osfmk/ddb/db_sym.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,146 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.20.6 1996/01/09 19:16:22 devrcs - * Add proto for db_task_getlinenum(). - * [1995/12/01 21:42:34 jfraser] - * - * Revision 1.2.20.5 1995/02/28 01:58:53 dwm - * Merged with changes from 1.2.20.4 - * [1995/02/28 01:53:54 dwm] - * - * mk6 CR1120 - Merge mk6pro_shared into cnmk_shared - * [1995/02/28 01:12:57 dwm] - * - * Revision 1.2.20.4 1995/02/23 21:43:48 alanl - * Prepend a "db_" to qsort and qsort_limit_search - * (collisions with the real qsort in stdlib.h) - * [95/02/14 travos] - * - * Expanded db_sym_switch structure to make ddb object format dependent; - * this allows us to remove all of the aout dependencies. - * [95/01/24 sjs] - * - * Revision 1.2.23.4 1994/12/22 20:36:20 bolinger - * Fix ri-osc CR881: Fixed glitch in use of symtab cloning hack. - * [1994/12/22 20:35:17 bolinger] - * - * Revision 1.2.23.3 1994/11/02 18:36:07 dwm - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup, prototypes - * fix X_db_search_by_addr macro to match prototype - * [1994/11/02 18:16:20 dwm] - * - * Revision 1.2.20.4 1995/02/23 21:43:48 alanl - * Prepend a "db_" to qsort and qsort_limit_search - * (collisions with the real qsort in stdlib.h) - * [95/02/14 travos] - * - * Expanded db_sym_switch structure to make ddb object format dependent; - * this allows us to remove all of the aout dependencies. - * [95/01/24 sjs] - * - * Revision 1.2.23.4 1994/12/22 20:36:20 bolinger - * Fix ri-osc CR881: Fixed glitch in use of symtab cloning hack. - * [1994/12/22 20:35:17 bolinger] - * - * Revision 1.2.23.3 1994/11/02 18:36:07 dwm - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup, prototypes - * fix X_db_search_by_addr macro to match prototype - * [1994/11/02 18:16:20 dwm] - * - * Revision 1.2.20.2 1994/09/23 01:21:51 ezf - * change marker to not FREE - * [1994/09/22 21:11:04 ezf] - * - * Revision 1.2.20.1 1994/06/11 21:12:25 bolinger - * Merge up to NMK17.2. - * [1994/06/11 20:04:14 bolinger] - * - * Revision 1.2.14.1 1994/02/08 10:58:56 bernadat - * Added db_sym_print_completion - * db_sym_parse_and_lookup_incomplete - * db_sym_parse_and_print_completion - * db_print_completion - * db_lookup_incomplete - * ddb_init - * prototypes - * - * Changed func type to db_sym_parse_and_lookup prototype - * - * Added definition of db_maxoff. - * [93/08/12 paire] - * [94/02/07 bernadat] - * - * Revision 1.2.18.1 1994/06/08 19:11:28 dswartz - * Preemption merge. - * [1994/06/08 19:10:27 dswartz] - * - * Revision 1.2.17.2 1994/06/01 21:34:50 klj - * Initial preemption code base merge - * - * Revision 1.2.4.3 1993/07/27 18:28:12 elliston - * Add ANSI prototypes. CR #9523. - * [1993/07/27 18:13:02 elliston] - * - * Revision 1.2.4.2 1993/06/09 02:20:56 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:57:18 jeffc] - * - * Revision 1.2 1993/04/19 16:03:18 devrcs - * Added 3 new fields in db_symtab_t for sorting. - * [barbou@gr.osf.org] - * [92/12/03 bernadat] - * - * Revision 1.1 1992/09/30 02:24:22 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/10/09 16:02:45 af - * Revision 2.5.1.1 91/10/05 13:07:39 jeffreyh - * Added macro definitions of db_find_task_sym_and_offset(), - * db_find_xtrn_task_sym_and_offset(), db_search_symbol(). - * [91/08/29 tak] - * - * Revision 2.5.1.1 91/10/05 13:07:39 jeffreyh - * Added macro definitions of db_find_task_sym_and_offset(), - * db_find_xtrn_task_sym_and_offset(), db_search_symbol(). - * [91/08/29 tak] - * - * Revision 2.5 91/07/31 17:31:49 dbg - * Add map pointer and storage for name to db_symtab_t. - * [91/07/30 16:45:08 dbg] - * - * Revision 2.4 91/05/14 15:36:08 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:07:12 mrt - * Changed to new Mach copyright - * [91/01/31 16:19:27 mrt] - * - * Revision 2.2 90/08/27 21:52:39 dbg - * Changed type of db_sym_t to char * - it's a better type for an - * opaque pointer. - * [90/08/22 dbg] - * - * Created. - * [90/08/19 af] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -381,7 +241,7 @@ extern void db_clone_symtabXXX(char *, char *, vm_offset_t); extern db_symtab_t *db_symtab_cloneeXXX(char *); -extern db_task_getlinenum( db_expr_t, task_t); +extern int db_task_getlinenum( db_expr_t, task_t); /* Some convenience macros. */ diff --git a/osfmk/ddb/db_task_thread.c b/osfmk/ddb/db_task_thread.c index e38013746..23626b8a5 100644 --- a/osfmk/ddb/db_task_thread.c +++ b/osfmk/ddb/db_task_thread.c @@ -70,7 +70,7 @@ #define DB_MAX_PSETS 0x10000 /* max # of processor sets */ task_t db_default_task; /* default target task */ -thread_act_t db_default_act; /* default target thr_act */ +thread_t db_default_act; /* default target thr_act */ @@ -78,7 +78,7 @@ thread_act_t db_default_act; /* default target thr_act */ */ task_t db_lookup_task_id(register int task_id); -static thread_act_t db_lookup_act_id( +static thread_t db_lookup_act_id( task_t task, register int thread_id); @@ -115,15 +115,15 @@ db_lookup_task(task_t target_task) int db_lookup_task_act( task_t task, - thread_act_t target_act) + thread_t target_act) { - register thread_act_t thr_act; + register thread_t thr_act; register int act_id; act_id = 0; if (queue_first(&task->threads) == 0) return(-1); - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { + queue_iterate(&task->threads, thr_act, thread_t, task_threads) { if (target_act == thr_act) return(act_id); if (act_id++ >= DB_MAX_THREADID) @@ -137,7 +137,7 @@ db_lookup_task_act( * as the thread id. */ int -db_lookup_act(thread_act_t target_act) +db_lookup_act(thread_t target_act) { register int act_id; register task_t task; @@ -166,7 +166,7 @@ db_lookup_act(thread_act_t target_act) */ int force_act_lookup = 0; boolean_t -db_check_act_address_valid(thread_act_t thr_act) +db_check_act_address_valid(thread_t thr_act) { if (!force_act_lookup && db_lookup_act(thr_act) < 0) { db_printf("Bad thr_act address 0x%x\n", thr_act); @@ -202,23 +202,23 @@ db_lookup_task_id(register task_id) /* * convert (task_id, act_id) pair to thr_act address */ -static thread_act_t +static thread_t db_lookup_act_id( task_t task, register int act_id) { - register thread_act_t thr_act; + register thread_t thr_act; if (act_id > DB_MAX_THREADID) - return(THR_ACT_NULL); + return(THREAD_NULL); if (queue_first(&task->threads) == 0) - return(THR_ACT_NULL); - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { + return(THREAD_NULL); + queue_iterate(&task->threads, thr_act, thread_t, task_threads) { if (act_id-- <= 0) return(thr_act); } - return(THR_ACT_NULL); + return(THREAD_NULL); } /* @@ -227,15 +227,15 @@ db_lookup_act_id( */ boolean_t db_get_next_act( - thread_act_t *actp, + thread_t *actp, int position) { db_expr_t value; - thread_act_t thr_act; + thread_t thr_act; - *actp = THR_ACT_NULL; + *actp = THREAD_NULL; if (db_expression(&value)) { - thr_act = (thread_act_t) value; + thr_act = (thread_t) value; if (!db_check_act_address_valid(thr_act)) { db_flush_lex(); return(FALSE); @@ -256,7 +256,7 @@ void db_init_default_act(void) { if (db_lookup_act(db_default_act) < 0) { - db_default_act = THR_ACT_NULL; + db_default_act = THREAD_NULL; db_default_task = TASK_NULL; } else db_default_task = db_default_act->task; @@ -273,7 +273,7 @@ db_set_default_act( int flag, db_var_aux_param_t ap) /* unused */ { - thread_act_t thr_act; + thread_t thr_act; int task_id; int act_id; @@ -293,8 +293,8 @@ db_set_default_act( *valuep = (db_expr_t) db_default_act; return(0); } - thr_act = (thread_act_t) *valuep; - if (thr_act != THR_ACT_NULL && !db_check_act_address_valid(thr_act)) + thr_act = (thread_t) *valuep; + if (thr_act != THREAD_NULL && !db_check_act_address_valid(thr_act)) db_error(0); /* NOTREACHED */ db_default_act = thr_act; @@ -314,7 +314,7 @@ db_get_task_act( db_var_aux_param_t ap) { task_t task; - thread_act_t thr_act; + thread_t thr_act; int task_id; if (flag == DB_VAR_SHOW) { @@ -338,7 +338,7 @@ db_get_task_act( *valuep = (db_expr_t) task; return(0); } - if ((thr_act = db_lookup_act_id(task, ap->suffix[1])) == THR_ACT_NULL){ + if ((thr_act = db_lookup_act_id(task, ap->suffix[1])) == THREAD_NULL){ db_printf("no such thr_act($task%d.%d)\n", ap->suffix[0], ap->suffix[1]); db_error(0); diff --git a/osfmk/ddb/db_task_thread.h b/osfmk/ddb/db_task_thread.h index bb3aab2a0..2c465c157 100644 --- a/osfmk/ddb/db_task_thread.h +++ b/osfmk/ddb/db_task_thread.h @@ -66,10 +66,10 @@ * space. */ #define db_current_task() \ - ((current_act())? current_act()->task: TASK_NULL) + ((current_thread())? current_thread()->task: TASK_NULL) #define db_current_space() \ - ((current_act())?\ - current_act()->task: TASK_NULL) + ((current_thread())?\ + current_thread()->task: TASK_NULL) #define db_target_space(thr_act, user_space) \ ((!(user_space) || ((thr_act)))?\ TASK_NULL: \ @@ -79,24 +79,24 @@ ((task) == TASK_NULL || (task) == db_current_space()) extern task_t db_default_task; /* default target task */ -extern thread_act_t db_default_act; /* default target thr_act */ +extern thread_t db_default_act; /* default target thr_act */ /* Prototypes for functions exported by this module. */ -int db_lookup_act(thread_act_t target_act); +int db_lookup_act(thread_t target_act); int db_lookup_task(task_t target_task); int db_lookup_task_act( task_t task, - thread_act_t target_act); + thread_t target_act); -boolean_t db_check_act_address_valid(thread_act_t thr_act); +boolean_t db_check_act_address_valid(thread_t thr_act); boolean_t db_get_next_act( - thread_act_t *actp, + thread_t *actp, int position); void db_init_default_act(void); diff --git a/osfmk/ddb/db_trap.c b/osfmk/ddb/db_trap.c index 2fcca75ba..59a007cdf 100644 --- a/osfmk/ddb/db_trap.c +++ b/osfmk/ddb/db_trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -76,10 +76,6 @@ extern int db_inst_count; extern int db_load_count; extern int db_store_count; -#if PARAGON860 && NCPUS > 1 -extern int db_first_cpu; -#endif - void db_task_trap( int type, @@ -94,7 +90,7 @@ db_task_trap( task_t task_space; task = db_current_task(); - task_space = db_target_space(current_act(), user_space); + task_space = db_target_space(current_thread(), user_space); bkpt = IS_BREAKPOINT_TRAP(type, code); watchpt = IS_WATCHPOINT_TRAP(type, code); @@ -132,7 +128,7 @@ db_task_trap( #endif /* __ppc__ */ #endif /* defined(__alpha) */ } else - db_printf("Trouble printing location %#X.\n", db_dot); + db_printf("Trouble printing location %#llX.\n", (unsigned long long)db_dot); db_recover = prev; db_command_loop(); diff --git a/osfmk/ddb/db_variables.c b/osfmk/ddb/db_variables.c index b89091212..163e98d63 100644 --- a/osfmk/ddb/db_variables.c +++ b/osfmk/ddb/db_variables.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -159,7 +159,7 @@ db_cmp_variable_name( || (vp->high >= 0 && ap->suffix[0] > vp->high)))) return(FALSE); strcpy(ap->modif, (*np)? np+1: ""); - ap->thr_act = (db_option(ap->modif, 't')?db_default_act: THR_ACT_NULL); + ap->thr_act = (db_option(ap->modif, 't')?db_default_act: THREAD_NULL); ap->level = level; ap->hidden_level = -1; return(TRUE); @@ -249,7 +249,7 @@ db_read_write_variable( ap = &aux_param; ap->modif = ""; ap->level = 0; - ap->thr_act = THR_ACT_NULL; + ap->thr_act = THREAD_NULL; } if (rw_flag == DB_VAR_SET && vp->precious) db_read_write_variable(vp, &old_value, DB_VAR_GET, ap); @@ -262,7 +262,7 @@ db_read_write_variable( (*func)(vp, valuep, rw_flag, ap); if (rw_flag == DB_VAR_SET && vp->precious) db_printf("\t$%s:%s<%#x>\t%#8lln\t=\t%#8lln\n", vp->name, - ap->modif, ap->thr_act, old_value, *valuep); + ap->modif, ap->thr_act, (unsigned long long)old_value, (unsigned long long)*valuep); } void @@ -431,7 +431,7 @@ db_show_one_variable(void) strcpy(aux_param.modif, *p ? p + 1 : ""); aux_param.thr_act = (db_option(aux_param.modif, 't') ? - db_default_act : THR_ACT_NULL); + db_default_act : THREAD_NULL); } if (cur->hidden_level) @@ -504,14 +504,14 @@ db_show_one_variable(void) aux_param.suffix[0] = i; (*cur->fcn)(cur, (db_expr_t *)0, DB_VAR_SHOW, &aux_param); } else { - db_printf("%#lln", *(cur->valuep + i)); + db_printf("%#lln", (unsigned long long)*(cur->valuep + i)); db_find_xtrn_task_sym_and_offset(*(cur->valuep + i), &name, &offset, TASK_NULL); if (name != (char *)0 && offset <= db_maxoff && offset != *(cur->valuep + i)) { db_printf("\t%s", name); if (offset != 0) - db_printf("+%#r", offset); + db_printf("+%#llr", (unsigned long long)offset); } } db_putchar('\n'); @@ -594,7 +594,7 @@ db_show_variable(void) aux_param.modif = ""; aux_param.level = 1; - aux_param.thr_act = THR_ACT_NULL; + aux_param.thr_act = THREAD_NULL; for (cur = db_vars; cur < db_evars; cur++) { i = cur->low; @@ -650,14 +650,14 @@ db_show_variable(void) aux_param.suffix[0] = i; (*cur->fcn)(cur, (db_expr_t *)0, DB_VAR_SHOW, &aux_param); } else { - db_printf("%#lln", *(cur->valuep + i)); + db_printf("%#lln", (unsigned long long)*(cur->valuep + i)); db_find_xtrn_task_sym_and_offset(*(cur->valuep + i), &name, &offset, TASK_NULL); if (name != (char *)0 && offset <= db_maxoff && offset != *(cur->valuep + i)) { db_printf("\t%s", name); if (offset != 0) - db_printf("+%#r", offset); + db_printf("+%#llr", (unsigned long long)offset); } } db_putchar('\n'); diff --git a/osfmk/ddb/db_variables.h b/osfmk/ddb/db_variables.h index d4c62d174..2b13c746e 100644 --- a/osfmk/ddb/db_variables.h +++ b/osfmk/ddb/db_variables.h @@ -169,7 +169,7 @@ struct db_var_aux_param { short level; /* number of levels */ short hidden_level; /* hidden level */ short suffix[DB_VAR_LEVEL]; /* suffix */ - thread_act_t thr_act; /* target thr_act */ + thread_t thr_act; /* target thr_act */ }; typedef struct db_var_aux_param *db_var_aux_param_t; diff --git a/osfmk/ddb/db_watch.c b/osfmk/ddb/db_watch.c index 15271dca4..3a99ac5ab 100644 --- a/osfmk/ddb/db_watch.c +++ b/osfmk/ddb/db_watch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,86 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.12.2 1995/01/06 19:11:06 devrcs - * mk6 CR668 - 1.3b26 merge - * * Revision 1.1.3.5 1994/05/06 18:40:29 tmt - * Merged osc1.3dec/shared with osc1.3b19 - * Merge Alpha changes into osc1.312b source code. - * 64bit cleanup. - * * End1.3merge - * [1994/11/04 08:50:16 dwm] - * - * Revision 1.1.12.1 1994/09/23 01:22:53 ezf - * change marker to not FREE - * [1994/09/22 21:11:33 ezf] - * - * Revision 1.1.10.1 1994/01/05 19:28:22 bolinger - * Be sure to count kernel-loaded tasks as part of kernel address space - * in locating watchpoints. - * [1994/01/04 17:43:33 bolinger] - * - * Revision 1.1.3.3 1993/07/27 18:28:31 elliston - * Add ANSI prototypes. CR #9523. - * [1993/07/27 18:13:30 elliston] - * - * Revision 1.1.3.2 1993/06/02 23:13:14 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:57:54 jeffc] - * - * Revision 1.1 1992/09/30 02:01:33 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.7 91/10/09 16:04:32 af - * Revision 2.6.3.1 91/10/05 13:08:50 jeffreyh - * Added user space watch point support including non current task. - * Changed "map" field of db_watchpoint structure to "task" - * for a user to easily understand the target space. - * [91/08/29 tak] - * - * Revision 2.6.3.1 91/10/05 13:08:50 jeffreyh - * Added user space watch point support including non current task. - * Changed "map" field of db_watchpoint structure to "task" - * for a user to easily understand the target space. - * [91/08/29 tak] - * - * Revision 2.6 91/05/14 15:37:30 mrt - * Correcting copyright - * - * Revision 2.5 91/02/05 17:07:27 mrt - * Changed to new Mach copyright - * [91/01/31 16:20:02 mrt] - * - * Revision 2.4 91/01/08 15:09:24 rpd - * Use db_map_equal, db_map_current, db_map_addr. - * [90/11/10 rpd] - * - * Revision 2.3 90/11/05 14:26:39 rpd - * Initialize db_watchpoints_inserted to TRUE. - * [90/11/04 rpd] - * - * Revision 2.2 90/10/25 14:44:16 rwd - * Made db_watchpoint_cmd parse a size argument. - * [90/10/17 rpd] - * Generalized the watchpoint support. - * [90/10/16 rwd] - * Created. - * [90/10/16 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -394,8 +314,8 @@ db_set_watchpoints(void) for (watch = db_watchpoint_list; watch != 0; watch = watch->link) { map = (watch->task)? watch->task->map: kernel_map; pmap_protect(map->pmap, - trunc_page_32(watch->loaddr), - round_page_32(watch->hiaddr), + vm_map_trunc_page(watch->loaddr), + vm_map_round_page(watch->hiaddr), VM_PROT_READ); } db_watchpoints_inserted = TRUE; @@ -424,8 +344,8 @@ db_find_watchpoint( if (watch->task == task_space) { if ((watch->loaddr <= addr) && (addr < watch->hiaddr)) return (TRUE); - else if ((trunc_page_32(watch->loaddr) <= addr) && - (addr < round_page_32(watch->hiaddr))) + else if ((trunc_page(watch->loaddr) <= addr) && + (addr < round_page(watch->hiaddr))) found = watch; } } diff --git a/osfmk/ddb/db_write_cmd.c b/osfmk/ddb/db_write_cmd.c index fdbf14268..ad6a3c3e4 100644 --- a/osfmk/ddb/db_write_cmd.c +++ b/osfmk/ddb/db_write_cmd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,86 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.10.1 1994/09/23 01:23:15 ezf - * change marker to not FREE - * [1994/09/22 21:11:42 ezf] - * - * Revision 1.2.8.3 1994/03/17 22:35:48 dwm - * The infamous name change: thread_activation + thread_shuttle = thread. - * [1994/03/17 21:26:02 dwm] - * - * Revision 1.2.8.2 1994/01/12 17:51:11 dwm - * Coloc: initial restructuring to follow Utah model. - * [1994/01/12 17:13:42 dwm] - * - * Revision 1.2.8.1 1994/01/05 19:28:25 bolinger - * Target current address space, not current "task", for writes. - * [1994/01/04 17:44:51 bolinger] - * - * Revision 1.2.2.3 1993/07/27 18:28:36 elliston - * Add ANSI prototypes. CR #9523. - * [1993/07/27 18:13:37 elliston] - * - * Revision 1.2.2.2 1993/06/09 02:21:11 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:58:03 jeffc] - * - * Revision 1.2 1993/04/19 16:03:43 devrcs - * Changes from mk78: - * Removed unused variable 'p' from db_write_cmd(). - * [92/05/16 jfriedl] - * Reorganized. w/u now works, instead of just w/tu. - * [92/04/18 danner] - * [93/02/02 bruel] - * - * Revision 1.1 1992/09/30 02:01:35 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/10/09 16:05:06 af - * Revision 2.5.3.1 91/10/05 13:09:25 jeffreyh - * Added user space write support including inactive task. - * [91/08/29 tak] - * - * Revision 2.5.3.1 91/10/05 13:09:25 jeffreyh - * Added user space write support including inactive task. - * [91/08/29 tak] - * - * Revision 2.5 91/05/14 15:38:04 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:07:35 mrt - * Changed to new Mach copyright - * [91/01/31 16:20:19 mrt] - * - * Revision 2.3 90/10/25 14:44:26 rwd - * Changed db_write_cmd to print unsigned. - * [90/10/19 rpd] - * - * Revision 2.2 90/08/27 21:53:54 dbg - * Set db_prev and db_next instead of explicitly advancing dot. - * [90/08/22 dbg] - * Reflected changes in db_printsym()'s calling seq. - * [90/08/20 af] - * Warn user if nothing was written. - * [90/08/07 dbg] - * Created. - * [90/07/25 dbg] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -165,7 +85,7 @@ db_write_cmd( register int size; boolean_t wrote_one = FALSE; boolean_t t_opt, u_opt; - thread_act_t thr_act; + thread_t thr_act; task_t task; addr = (db_addr_t) address; @@ -187,14 +107,14 @@ db_write_cmd( task = TASK_NULL; if (!DB_VALID_ADDRESS(addr, u_opt)) { - db_printf("Bad address 0x%x\n", addr); + db_printf("Bad address 0x%llx\n", (unsigned long long)addr); return; } while (db_expression(&new_value)) { old_value = db_get_task_value(addr, size, FALSE, task); db_task_printsym(addr, DB_STGY_ANY, task); - db_printf("\t\t%#8n\t=\t%#8n\n", old_value, new_value); + db_printf("\t\t%#8lln\t=\t%#8lln\n", (unsigned long long)old_value, (unsigned long long)new_value); db_put_task_value(addr, size, new_value, task); addr += size; diff --git a/osfmk/ddb/tr.c b/osfmk/ddb/tr.c index 7edac65d5..b1f7b75ea 100644 --- a/osfmk/ddb/tr.c +++ b/osfmk/ddb/tr.c @@ -88,7 +88,7 @@ tr_init(void) tr_indent[i]=0; #endif /* NCPUS > 1 */ - simple_lock_init(&trace_lock, ETAP_DIPC_TRACE); + simple_lock_init(&trace_lock, 0); } void diff --git a/osfmk/default_pager/Makefile b/osfmk/default_pager/Makefile index 367b888bf..b8e1ccbf7 100644 --- a/osfmk/default_pager/Makefile +++ b/osfmk/default_pager/Makefile @@ -16,6 +16,7 @@ MIG_DEFS = \ MIG_USHDRS = \ MIG_UUHDRS = \ + default_pager_object.h MIGINCLUDES = ${MIG_UUHDRS} ${MIG_USHDRS} diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c index b2f39767f..b2409b19b 100644 --- a/osfmk/default_pager/default_pager.c +++ b/osfmk/default_pager/default_pager.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,11 +55,16 @@ */ #include "default_pager_internal.h" +#include #include #include #include +#include +#include #include #include +#include +#include char my_name[] = "(default pager): "; @@ -98,9 +103,9 @@ MACH_PORT_FACE default_pager_external_set; /* Port set for external objects. */ /* Memory created by default_pager_object_create should mostly be resident. */ #define DEFAULT_PAGER_EXTERNAL_COUNT (2) -unsigned int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT; +int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT; /* Number of "internal" threads. */ -unsigned int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT; +int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT; /* Number of "external" threads. */ /* @@ -116,9 +121,9 @@ default_pager_thread_t *start_default_pager_thread(int, boolean_t); void default_pager(void); void default_pager_thread(void *); void default_pager_initialize(void); -void default_pager_set_policy(MACH_PORT_FACE); boolean_t dp_parse_argument(char *); /* forward; */ unsigned int d_to_i(char *); /* forward; */ +boolean_t strprefix(register const char *s1, register const char *s2); extern int vstruct_def_clshift; @@ -131,8 +136,7 @@ void default_pager(void) { int i, id; - static char here[] = "default_pager"; - mach_msg_options_t server_options; + __unused static char here[] = "default_pager"; default_pager_thread_t dpt; kern_return_t kr; @@ -164,7 +168,7 @@ default_pager(void) if (dpt_array[id] == NULL) Panic("alloc pager thread"); kr = vm_allocate(kernel_map, &((dpt_array[id])->dpt_buffer), - vm_page_size << vstruct_def_clshift, TRUE); + vm_page_size << vstruct_def_clshift, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) Panic("alloc thread buffer"); kr = vm_map_wire(kernel_map, (dpt_array[id])->dpt_buffer, @@ -233,7 +237,7 @@ d_to_i(char * arg) boolean_t dp_parse_argument(char *av) { char *rhs = av; - static char here[] = "dp_parse_argument"; + __unused static char here[] = "dp_parse_argument"; /* Check for '-v' flag */ @@ -264,9 +268,8 @@ boolean_t dp_parse_argument(char *av) } int -start_def_pager(char *bs_device) +start_def_pager( __unused char *bs_device ) { - int my_node; /* MACH_PORT_FACE master_device_port; */ @@ -275,8 +278,7 @@ start_def_pager(char *bs_device) MACH_PORT_FACE root_ledger_wired; MACH_PORT_FACE root_ledger_paged; */ - static char here[] = "main"; - int need_dp_init = 1; + __unused static char here[] = "main"; @@ -301,7 +303,11 @@ start_def_pager(char *bs_device) default_pager(); /* start the backing store monitor, it runs on a callout thread */ - thread_call_func(default_pager_backing_store_monitor, NULL, FALSE); + default_pager_backing_store_monitor_callout = + thread_call_allocate(default_pager_backing_store_monitor, NULL); + if (!default_pager_backing_store_monitor_callout) + panic("can't start backing store monitor thread"); + thread_call_enter(default_pager_backing_store_monitor_callout); } /* @@ -340,11 +346,35 @@ default_pager_info( } +kern_return_t +default_pager_info_64( + memory_object_default_t pager, + default_pager_info_64_t *infop) +{ + vm_size_t pages_total, pages_free; + + if (pager != default_pager_object) + return KERN_INVALID_ARGUMENT; + + bs_global_info(&pages_total, &pages_free); + + infop->dpi_total_space = ptoa_64(pages_total); + infop->dpi_free_space = ptoa_64(pages_free); + infop->dpi_page_size = vm_page_size; + infop->dpi_flags = 0; + if (dp_encryption_inited && dp_encryption == TRUE) { + infop->dpi_flags |= DPI_ENCRYPTED; + } + + return KERN_SUCCESS; +} + + void default_pager_initialize() { kern_return_t kr; - static char here[] = "default_pager_initialize"; + __unused static char here[] = "default_pager_initialize"; /* diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h index daa06f1a6..631c5b681 100644 --- a/osfmk/default_pager/default_pager_internal.h +++ b/osfmk/default_pager/default_pager_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -135,7 +135,7 @@ extern int debug_mask; #define DEBUG_BS_EXTERNAL 0x01000000 #define DEBUG_BS_INTERNAL 0x02000000 -#define DEBUG(level, args) \ +#define DP_DEBUG(level, args) \ do { \ if (debug_mask & (level)) \ dprintf(args); \ @@ -156,7 +156,7 @@ extern int debug_mask; #else /* DEFAULT_PAGER_DEBUG */ -#define DEBUG(level, args) +#define DP_DEBUG(level, args) #define ASSERT(clause) #endif /* DEFAULT_PAGER_DEBUG */ @@ -176,7 +176,6 @@ extern char *mach_error_string(kern_return_t); */ #ifdef MACH_KERNEL #define vm_page_size page_size -extern vm_size_t page_size; #else extern vm_object_size_t vm_page_size; #endif @@ -193,7 +192,7 @@ extern memory_object_default_t default_pager_object; #ifdef MACH_KERNEL extern mutex_t dpt_lock; /* Lock for the dpt array */ -extern unsigned int default_pager_internal_count; +extern int default_pager_internal_count; extern MACH_PORT_FACE default_pager_host_port; /* extern task_t default_pager_self; */ /* dont need or want */ extern MACH_PORT_FACE default_pager_internal_set; @@ -299,7 +298,7 @@ typedef struct backing_store *backing_store_t; #define BS_STAT(bs, clause) VSTATS_ACTION(&(bs)->bs_lock, (clause)) #ifdef MACH_KERNEL -#define BS_LOCK_INIT(bs) mutex_init(&(bs)->bs_lock, ETAP_DPAGE_BS) +#define BS_LOCK_INIT(bs) mutex_init(&(bs)->bs_lock, 0) #else #define BS_LOCK_INIT(bs) mutex_init(&(bs)->bs_lock) #endif @@ -318,7 +317,7 @@ extern struct backing_store_list_head backing_store_list; extern int backing_store_release_trigger_disable; #ifdef MACH_KERNEL -#define BSL_LOCK_INIT() mutex_init(&backing_store_list.bsl_lock, ETAP_DPAGE_BSL) +#define BSL_LOCK_INIT() mutex_init(&backing_store_list.bsl_lock, 0) #else #define BSL_LOCK_INIT() mutex_init(&backing_store_list.bsl_lock) #endif @@ -347,7 +346,7 @@ struct paging_segment { unsigned int ps_ncls; /* Number of clusters in segment */ unsigned int ps_clcount; /* Number of free clusters */ unsigned int ps_pgcount; /* Number of free pages */ - long ps_hint; /* Hint of where to look next. */ + unsigned long ps_hint; /* Hint of where to look next. */ /* bitmap */ #ifdef MACH_KERNEL @@ -373,7 +372,7 @@ typedef struct paging_segment *paging_segment_t; #define PAGING_SEGMENT_NULL ((paging_segment_t) 0) #ifdef MACH_KERNEL -#define PS_LOCK_INIT(ps) mutex_init(&(ps)->ps_lock, ETAP_DPAGE_SEGMENT) +#define PS_LOCK_INIT(ps) mutex_init(&(ps)->ps_lock, 0) #else #define PS_LOCK_INIT(ps) mutex_init(&(ps)->ps_lock) #endif @@ -403,7 +402,7 @@ extern int paging_segment_max; /* highest used paging segment index */ extern int ps_select_array[DEFAULT_PAGER_BACKING_STORE_MAXPRI+1]; #ifdef MACH_KERNEL -#define PSL_LOCK_INIT() mutex_init(&paging_segments_lock, ETAP_DPAGE_SEGLIST) +#define PSL_LOCK_INIT() mutex_init(&paging_segments_lock, 0) #else #define PSL_LOCK_INIT() mutex_init(&paging_segments_lock) #endif @@ -459,7 +458,7 @@ typedef struct vs_map *vs_map_t; * Exported macros for manipulating the vs_map structure -- * checking status, getting and setting bits. */ -#define VSCLSIZE(vs) (1 << (vs)->vs_clshift) +#define VSCLSIZE(vs) (1UL << (vs)->vs_clshift) #define VSM_ISCLR(vsm) (((vsm).vsmap_entry == VSM_ENTRY_NULL) && \ ((vsm).vsmap_error == 0)) #define VSM_ISERR(vsm) ((vsm).vsmap_error) @@ -543,7 +542,7 @@ typedef struct vstruct_alias { } vstruct_alias_t; #ifdef MACH_KERNEL -#define DPT_LOCK_INIT(lock) mutex_init(&(lock), ETAP_DPAGE_VSTRUCT) +#define DPT_LOCK_INIT(lock) mutex_init(&(lock), 0) #define DPT_LOCK(lock) mutex_lock(&(lock)) #define DPT_UNLOCK(lock) mutex_unlock(&(lock)) #define DPT_SLEEP(lock, e, i) thread_sleep_mutex(&(lock), (event_t)(e), i) @@ -553,13 +552,13 @@ typedef struct vstruct_alias { #define VS_LOCK(vs) hw_lock_lock(&(vs)->vs_lock) #define VS_UNLOCK(vs) hw_lock_unlock(&(vs)->vs_lock) #define VS_MAP_LOCK_TYPE mutex_t -#define VS_MAP_LOCK_INIT(vs) mutex_init(&(vs)->vs_map_lock, ETAP_DPAGE_VSMAP) +#define VS_MAP_LOCK_INIT(vs) mutex_init(&(vs)->vs_map_lock, 0) #define VS_MAP_LOCK(vs) mutex_lock(&(vs)->vs_map_lock) #define VS_MAP_TRY_LOCK(vs) mutex_try(&(vs)->vs_map_lock) #define VS_MAP_UNLOCK(vs) mutex_unlock(&(vs)->vs_map_lock) #else #define VS_LOCK_TYPE struct mutex -#define VS_LOCK_INIT(vs) mutex_init(&(vs)->vs_lock, ETAP_DPAGE_VSTRUCT) +#define VS_LOCK_INIT(vs) mutex_init(&(vs)->vs_lock, 0) #define VS_TRY_LOCK(vs) mutex_try(&(vs)->vs_lock) #define VS_LOCK(vs) mutex_lock(&(vs)->vs_lock) #define VS_UNLOCK(vs) mutex_unlock(&(vs)->vs_lock) @@ -611,8 +610,8 @@ typedef struct vstruct { queue_chain_t vs_links; /* Link in pager-wide list */ - int vs_clshift; /* Bit shift: clusters->pages */ - int vs_size; /* Object size in clusters */ + unsigned int vs_clshift; /* Bit shift: clusters->pages */ + unsigned int vs_size; /* Object size in clusters */ #ifdef MACH_KERNEL mutex_t vs_map_lock; /* to protect map below */ #else @@ -640,6 +639,7 @@ __private_extern__ void vs_wait_for_readers(vstruct_t); __private_extern__ void vs_start_write(vstruct_t); __private_extern__ void vs_finish_write(vstruct_t); __private_extern__ void vs_wait_for_writers(vstruct_t); +__private_extern__ void vs_wait_for_sync_writers(vstruct_t); #else /* PARALLEL */ #define vs_lock(vs) #define vs_unlock(vs) @@ -705,7 +705,7 @@ __private_extern__ void vstruct_list_delete(vstruct_t vs); #ifdef MACH_KERNEL -#define VSL_LOCK_INIT() mutex_init(&vstruct_list.vsl_lock, ETAP_DPAGE_VSLIST) +#define VSL_LOCK_INIT() mutex_init(&vstruct_list.vsl_lock, 0) #else #define VSL_LOCK_INIT() mutex_init(&vstruct_list.vsl_lock) #endif @@ -791,8 +791,8 @@ extern kern_return_t pvs_cluster_read(vstruct_t, vm_size_t); extern kern_return_t vs_cluster_write(vstruct_t, upl_t, - vm_offset_t, - vm_size_t, + upl_offset_t, + upl_size_t, boolean_t, int); extern vm_offset_t ps_clmap(vstruct_t, @@ -809,6 +809,16 @@ extern boolean_t bs_set_default_clsize(unsigned int); extern boolean_t verbose; +extern thread_call_t default_pager_backing_store_monitor_callout; extern void default_pager_backing_store_monitor(thread_call_param_t, thread_call_param_t); +extern ipc_port_t max_pages_trigger_port; +extern unsigned int dp_pages_free; +extern unsigned int maximum_pages_free; + +/* Do we know yet if swap files need to be encrypted ? */ +extern boolean_t dp_encryption_inited; +/* Should we encrypt data before writing to swap ? */ +extern boolean_t dp_encryption; + #endif /* _DEFAULT_PAGER_INTERNAL_H_ */ diff --git a/osfmk/default_pager/default_pager_object.defs b/osfmk/default_pager/default_pager_object.defs index 77e4ae75e..8caa3d652 100644 --- a/osfmk/default_pager/default_pager_object.defs +++ b/osfmk/default_pager/default_pager_object.defs @@ -133,4 +133,8 @@ routine default_pager_triggers( in flags : int; in trigger_port : mach_port_t); +routine default_pager_info_64( + default_pager : mach_port_t; + out info : default_pager_info_64_t); + diff --git a/osfmk/default_pager/default_pager_types.defs b/osfmk/default_pager/default_pager_types.defs index f02e57ea5..5abe8f5c9 100644 --- a/osfmk/default_pager/default_pager_types.defs +++ b/osfmk/default_pager/default_pager_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,90 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * $Log: default_pager_types.defs,v $ - * Revision 1.4 2002/11/23 05:08:24 lindak - * Merged PR-3107160-3107168 into ZZ100 - * 3107160 Panther Kernel builds spew tons of compiler warnings about default - * argument - * 3107168 Kernel warnings about "extra tokens at end of #endif directive" - * Kernel - * - * Revision 1.3.1930.1 2002/11/21 22:11:29 sarcone - * - * Bug #:3107160,3107168 - * Submitted by: Chris Sarcone - * Reviewed by: Simon Douglas - * - * Fixed a bunch of compiler warnings about default arguments and - * extra tokens at end of #endif directives. - * - * Revision 1.3 2000/01/26 05:56:23 wsanchez - * Add APSL - * - * Revision 1.2 1998/12/01 00:24:42 wsanchez - * Merged in CDY_DP1 (chris: default pager) - * - * Revision 1.1.2.2 1998/11/25 21:32:17 youngwor - * fix errant comment format - * - * Revision 1.1.2.1 1998/11/24 22:39:58 youngwor - * Check-in of support for the in-kernel default pager - * - * Revision 1.1.1.1 1998/03/07 02:26:33 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.7.3 1995/01/11 19:30:40 devrcs - * mk6 CR668 - 1.3b26 merge - * [1994/11/10 15:34:32 bolinger] - * - * Insert OSC1_3 log. - * - * BEGIN OSC1_3 HISTORY - * - * Revision 1.2.2.6 1994/05/06 19:23:25 tmt - * Merge Alpha changes into osc1.3b19 source. - * [1994/03/29 18:21:06 rmiller] - * 64 bit - * [1994/01/27 14:31:30 picco] - * - * Revision 1.2.2.5 1994/04/01 18:42:58 jph - * CR10550 -- Add backing store info interfaces. - * [1994/04/01 18:40:30 jph] - * - * END OSC1_3 HISTORY - * [1994/11/02 20:48:01 bolinger] - * - * Revision 1.2.7.1 1994/09/23 06:57:07 ezf - * change marker to not FREE - * [1994/09/23 06:54:36 ezf] - * - * Revision 1.2.2.3 1993/08/05 17:57:50 gm - * CR9627: Removed deprecated default_pager_filename_t type. - * [1993/07/09 19:20:12 gm] - * - * Revision 1.2.2.2 1993/06/09 02:11:13 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:42:07 gm] - * - * Revision 1.2 1993/04/19 16:32:44 devrcs - * Untyped ipc merge: - * Introducing new MIG syntax for Untyped IPC (via compile option - * MACH_IPC_TYPED) - * [1993/03/18 09:37:27 rod] - * - * Moved from bootstrap. - * [1993/02/17 13:45:33 bruel] - * - * Fixed comments. - * [1993/02/11 09:26:06 bruel] - * - * Created for external default pager. - * [1993/02/09 14:56:57 bruel] - * - * $EndLog$ - */ #ifndef _MACH_DEFAULT_PAGER_TYPES_DEFS_ #define _MACH_DEFAULT_PAGER_TYPES_DEFS_ @@ -114,6 +30,7 @@ type default_pager_info_t = struct[3] of natural_t; +type default_pager_info_64_t = struct[6] of natural_t; type default_pager_object_t = struct[2] of natural_t; type default_pager_object_array_t = array[] of default_pager_object_t; diff --git a/osfmk/default_pager/default_pager_types.h b/osfmk/default_pager/default_pager_types.h index 59d395666..86ad227f6 100644 --- a/osfmk/default_pager/default_pager_types.h +++ b/osfmk/default_pager/default_pager_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,19 +37,25 @@ typedef memory_object_default_t default_pager_t; -#ifdef MACH_KERNEL_PRIVATE - /* * Remember to update the mig type definitions * in default_pager_types.defs when adding/removing fields. */ typedef struct default_pager_info { - vm_size_t dpi_total_space; /* size of backing store */ - vm_size_t dpi_free_space; /* how much of it is unused */ - vm_size_t dpi_page_size; /* the pager's vm page size */ + vm_size_t dpi_total_space; /* size of backing store */ + vm_size_t dpi_free_space; /* how much of it is unused */ + vm_size_t dpi_page_size; /* the pager's vm page size */ } default_pager_info_t; +typedef struct default_pager_info_64 { + memory_object_size_t dpi_total_space; /* size of backing store */ + memory_object_size_t dpi_free_space; /* how much of it is unused */ + vm_size_t dpi_page_size; /* the pager's vm page size */ + int dpi_flags; +#define DPI_ENCRYPTED 0x1 /* swap files are encrypted */ +} default_pager_info_64_t; + typedef integer_t *backing_store_info_t; typedef int backing_store_flavor_t; typedef int *vnode_ptr_t; @@ -92,12 +98,12 @@ typedef struct default_pager_page { typedef default_pager_page_t *default_pager_page_array_t; -#endif /* MACH_KERNEL_PRIVATE */ - #define DEFAULT_PAGER_BACKING_STORE_MAXPRI 4 -#define HI_WAT_ALERT 1 -#define LO_WAT_ALERT 2 +#define HI_WAT_ALERT 0x01 +#define LO_WAT_ALERT 0x02 +#define SWAP_ENCRYPT_ON 0x04 +#define SWAP_ENCRYPT_OFF 0x08 #endif /* __APPLE_API_UNSTABLE */ diff --git a/osfmk/default_pager/diag.h b/osfmk/default_pager/diag.h index a6e571cfa..72f9c184d 100644 --- a/osfmk/default_pager/diag.h +++ b/osfmk/default_pager/diag.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,8 +39,6 @@ #define Panic(aargh) panic("%s[KERNEL]: %s", my_name, aargh) #endif -extern char my_name[]; - #define VSTATS_ACTION(l, stmt) \ do { VSTATS_LOCK(l); stmt; VSTATS_UNLOCK(l); } while (0) diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index 0c3ae6af8..f0ea16a7b 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -53,19 +53,31 @@ * Paging File Management. */ +#include #include #include -#include "default_pager_internal.h" +#include +#include #include +#include + +#include #include #include + +#include +#include #include #include #include + #include #include -/* CDY CDY */ #include +#include +#include + +/* LP64todo - need large internal object support */ /* * ALLOC_STRIDE... the maximum number of bytes allocated from @@ -128,8 +140,7 @@ void vs_free_async(struct vs_async *vsa); /* forward */ #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock) #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock) -#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \ - ETAP_IO_DEV_PAGEH) +#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0) #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock) /* * Paging Space Hysteresis triggers and the target notification port @@ -143,7 +154,12 @@ ipc_port_t max_pages_trigger_port = NULL; boolean_t bs_low = FALSE; int backing_store_release_trigger_disable = 0; - + + +/* Have we decided if swap needs to be encrypted yet ? */ +boolean_t dp_encryption_inited = FALSE; +/* Should we encrypt swap ? */ +boolean_t dp_encryption = FALSE; /* @@ -172,12 +188,28 @@ int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 }; unsigned int dp_pages_free = 0; unsigned int cluster_transfer_minimum = 100; -kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int); /* forward */ -kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */ +/* forward declarations */ +kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, int); /* forward */ +kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */ +default_pager_thread_t *get_read_buffer( void ); +kern_return_t ps_vstruct_transfer_from_segment( + vstruct_t vs, + paging_segment_t segment, + upl_t upl); +kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ +kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ +kern_return_t vs_cluster_transfer( + vstruct_t vs, + upl_offset_t offset, + upl_size_t cnt, + upl_t upl); +vs_map_t vs_get_map_entry( + vstruct_t vs, + vm_offset_t offset); default_pager_thread_t * -get_read_buffer() +get_read_buffer( void ) { int i; @@ -331,9 +363,9 @@ bs_global_info( */ pages_total += ps->ps_pgnum; pages_free += ps->ps_clcount << ps->ps_clshift; - DEBUG(DEBUG_BS_INTERNAL, - ("segment #%d: %d total, %d free\n", - i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("segment #%d: %d total, %d free\n", + i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift)); } *totalp = pages_total; *freep = pages_free; @@ -412,10 +444,10 @@ void backing_store_add(backing_store_t); /* forward */ void backing_store_add( - backing_store_t bs) + __unused backing_store_t bs) { - MACH_PORT_FACE port = bs->bs_port; - MACH_PORT_FACE pset = default_pager_default_set; +// MACH_PORT_FACE port = bs->bs_port; +// MACH_PORT_FACE pset = default_pager_default_set; kern_return_t kr = KERN_SUCCESS; if (kr != KERN_SUCCESS) @@ -512,7 +544,7 @@ default_pager_backing_store_create( { backing_store_t bs; MACH_PORT_FACE port; - kern_return_t kr; +// kern_return_t kr; struct vstruct_alias *alias_struct; if (pager != default_pager_object) @@ -523,9 +555,9 @@ default_pager_backing_store_create( ipc_port_make_send(port); assert (port != IP_NULL); - DEBUG(DEBUG_BS_EXTERNAL, - ("priority=%d clsize=%d bs_port=0x%x\n", - priority, clsize, (int) backing_store)); + DP_DEBUG(DEBUG_BS_EXTERNAL, + ("priority=%d clsize=%d bs_port=0x%x\n", + priority, clsize, (int) backing_store)); alias_struct = (struct vstruct_alias *) kalloc(sizeof (struct vstruct_alias)); @@ -536,7 +568,7 @@ default_pager_backing_store_create( } else { ipc_port_dealloc_kernel((MACH_PORT_FACE)(port)); - kfree((vm_offset_t)bs, sizeof (struct backing_store)); + kfree(bs, sizeof (struct backing_store)); return KERN_RESOURCE_SHORTAGE; } @@ -685,7 +717,7 @@ ps_delete( int count; upl_t upl; - transfer_object = vm_object_allocate(VM_SUPER_CLUSTER); + transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER); count = 0; error = vm_object_upl_request(transfer_object, (vm_object_offset_t)0, VM_SUPER_CLUSTER, @@ -695,7 +727,7 @@ ps_delete( if(error == KERN_SUCCESS) { error = ps_vstruct_transfer_from_segment( vs, ps, upl); - upl_commit(upl, NULL); + upl_commit(upl, NULL, 0); upl_deallocate(upl); } else { error = KERN_FAILURE; @@ -761,7 +793,7 @@ default_pager_backing_store_delete( paging_segment_t ps; int error; int interim_pages_removed = 0; - kern_return_t kr; +// kern_return_t kr; if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL) return KERN_INVALID_ARGUMENT; @@ -844,9 +876,8 @@ default_pager_backing_store_delete( paging_segments[i] = PAGING_SEGMENT_NULL; paging_segment_count--; PS_LOCK(ps); - kfree((vm_offset_t)ps->ps_bmap, - RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); } } } @@ -869,8 +900,8 @@ default_pager_backing_store_delete( * Disable lookups of this backing store. */ if((void *)bs->bs_port->alias != NULL) - kfree((vm_offset_t) bs->bs_port->alias, - sizeof (struct vstruct_alias)); + kfree((void *) bs->bs_port->alias, + sizeof (struct vstruct_alias)); ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port)); bs->bs_port = MACH_PORT_NULL; BS_UNLOCK(bs); @@ -886,7 +917,7 @@ default_pager_backing_store_delete( /* * Free the backing store structure. */ - kfree((vm_offset_t)bs, sizeof *bs); + kfree(bs, sizeof *bs); return KERN_SUCCESS; } @@ -985,7 +1016,7 @@ default_pager_add_segment( PS_LOCK_INIT(ps); ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); if (!ps->ps_bmap) { - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -997,8 +1028,8 @@ default_pager_add_segment( ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { - kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -1013,10 +1044,10 @@ default_pager_add_segment( bs_more_space(ps->ps_clcount); - DEBUG(DEBUG_BS_INTERNAL, - ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", - device, offset, count, record_size, - ps->ps_record_shift, ps->ps_pgnum)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", + device, offset, count, record_size, + ps->ps_record_shift, ps->ps_pgnum)); return KERN_SUCCESS; } @@ -1072,7 +1103,7 @@ vs_alloc_async(void) { struct vs_async *vsa; MACH_PORT_FACE reply_port; - kern_return_t kr; +// kern_return_t kr; VS_ASYNC_LOCK(); if (vs_async_free_list == NULL) { @@ -1099,8 +1130,7 @@ vs_alloc_async(void) vs_alloc_async_failed++; ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); - kfree((vm_offset_t)vsa, - sizeof (struct vs_async)); + kfree(vsa, sizeof (struct vs_async)); vsa = NULL; } } @@ -1152,8 +1182,7 @@ vs_alloc_async(void) vs_alloc_async_failed++; ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); - kfree((vm_offset_t) vsa, - sizeof (struct vs_async)); + kfree(vsa, sizeof (struct vs_async)); vsa = NULL; } } @@ -1169,8 +1198,8 @@ vs_free_async( kern_return_t kr; reply_port = vsa->reply_port; - kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias)); - kfree((vm_offset_t) vsa, sizeof (struct vs_async)); + kfree(reply_port->alias, sizeof (struct vstuct_alias)); + kfree(vsa, sizeof (struct vs_async)); ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); #if 0 VS_ASYNC_LOCK(); @@ -1188,7 +1217,7 @@ ps_vstruct_create( vm_size_t size) { vstruct_t vs; - int i; + unsigned int i; vs = (vstruct_t) zalloc(vstruct_zone); if (vs == VSTRUCT_NULL) { @@ -1211,11 +1240,11 @@ ps_vstruct_create( vs->vs_waiting_write = FALSE; vs->vs_waiting_async = FALSE; #else - mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO); - mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD); - mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE); - mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS); - mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC); + mutex_init(&vs->vs_waiting_seqno, 0); + mutex_init(&vs->vs_waiting_read, 0); + mutex_init(&vs->vs_waiting_write, 0); + mutex_init(&vs->vs_waiting_refs, 0); + mutex_init(&vs->vs_waiting_async, 0); #endif vs->vs_readers = 0; @@ -1241,14 +1270,14 @@ ps_vstruct_create( vs->vs_indirect = FALSE; } vs->vs_xfer_pending = FALSE; - DEBUG(DEBUG_VS_INTERNAL, - ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect)); /* * Check to see that we got the space. */ if (!vs->vs_dmap) { - kfree((vm_offset_t)vs, sizeof *vs); + kfree(vs, sizeof *vs); return VSTRUCT_NULL; } @@ -1269,12 +1298,12 @@ ps_vstruct_create( return vs; } -paging_segment_t ps_select_segment(int, int *); /* forward */ +paging_segment_t ps_select_segment(unsigned int, int *); /* forward */ paging_segment_t ps_select_segment( - int shift, - int *psindex) + unsigned int shift, + int *psindex) { paging_segment_t ps; int i; @@ -1417,7 +1446,7 @@ ps_allocate_cluster( int *psindex, paging_segment_t use_ps) { - int byte_num; + unsigned int byte_num; int bit_num = 0; paging_segment_t ps; vm_offset_t cluster; @@ -1469,14 +1498,21 @@ ps_allocate_cluster( } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) == PAGING_SEGMENT_NULL) { -#if 0 - bs_no_paging_space(TRUE); -#endif -#if 0 - if (verbose) -#endif - dprintf(("no space in available paging segments; " - "swapon suggested\n")); + static uint32_t lastnotify = 0; + uint32_t now, nanoseconds_dummy; + + /* + * Emit a notification of the low-paging resource condition + * but don't issue it more than once every five seconds. This + * prevents us from overflowing logs with thousands of + * repetitions of the message. + */ + clock_get_system_nanotime(&now, &nanoseconds_dummy); + if (now > lastnotify + 5) { + dprintf(("no space in available paging segments\n")); + lastnotify = now; + } + /* the count got off maybe, reset to zero */ PSL_LOCK(); dp_pages_free = 0; @@ -1572,7 +1608,7 @@ ps_dealloc_vsmap( struct vs_map *vsmap, vm_size_t size) { - int i; + unsigned int i; for (i = 0; i < size; i++) if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) ps_deallocate_cluster(VSM_PS(vsmap[i]), @@ -1583,8 +1619,8 @@ void ps_vstruct_dealloc( vstruct_t vs) { - int i; - spl_t s; + unsigned int i; +// spl_t s; VS_MAP_LOCK(vs); @@ -1601,31 +1637,29 @@ ps_vstruct_dealloc( for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { if (vs->vs_imap[i] != NULL) { ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES); - kfree((vm_offset_t)vs->vs_imap[i], - CLMAP_THRESHOLD); + kfree(vs->vs_imap[i], CLMAP_THRESHOLD); } } - kfree((vm_offset_t)vs->vs_imap, - INDIRECT_CLMAP_SIZE(vs->vs_size)); + kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size)); } else { /* * Direct map. Free used clusters, then memory. */ ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size); - kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); + kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); } VS_MAP_UNLOCK(vs); bs_commit(- vs->vs_size); - zfree(vstruct_zone, (vm_offset_t)vs); + zfree(vstruct_zone, vs); } -int ps_map_extend(vstruct_t, int); /* forward */ +int ps_map_extend(vstruct_t, unsigned int); /* forward */ int ps_map_extend( vstruct_t vs, - int new_size) + unsigned int new_size) { struct vs_map **new_imap; struct vs_map *new_dmap = NULL; @@ -1687,7 +1721,7 @@ int ps_map_extend( /* Allocate an indirect page */ if ((new_imap[0] = (struct vs_map *) kalloc(CLMAP_THRESHOLD)) == NULL) { - kfree((vm_offset_t)new_imap, new_map_size); + kfree(new_imap, new_map_size); return -1; } new_dmap = new_imap[0]; @@ -1728,7 +1762,7 @@ int ps_map_extend( bs_commit(new_size - vs->vs_size); vs->vs_size = new_size; if (old_map) - kfree((vm_offset_t)old_map, old_map_size); + kfree(old_map, old_map_size); return 0; } @@ -1744,7 +1778,7 @@ ps_clmap( vm_offset_t cluster; /* The cluster of offset. */ vm_offset_t newcl; /* The new cluster allocated. */ vm_offset_t newoff; - int i; + unsigned int i; struct vs_map *vsmap; VS_MAP_LOCK(vs); @@ -1842,7 +1876,7 @@ ps_clmap( */ newcl = ps_allocate_cluster(vs, &psindex, PAGING_SEGMENT_NULL); - if (newcl == -1) { + if (newcl == (vm_offset_t) -1) { VS_MAP_UNLOCK(vs); return (vm_offset_t) -1; } @@ -1930,13 +1964,13 @@ ps_clmap( } else VS_MAP_UNLOCK(vs); - DEBUG(DEBUG_VS_INTERNAL, - ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n", - newcl+newoff, (int) vs, (int) vsmap, flag)); - DEBUG(DEBUG_VS_INTERNAL, - (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n", - (int) clmap->cl_ps, clmap->cl_numpages, - (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n", + newcl+newoff, (int) vs, (int) vsmap, flag)); + DP_DEBUG(DEBUG_VS_INTERNAL, + (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n", + (int) clmap->cl_ps, clmap->cl_numpages, + (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map)); return (newcl + newoff); } @@ -1960,7 +1994,7 @@ ps_clunmap( */ while (length > 0) { vm_offset_t newoff; - int i; + unsigned int i; cluster = atop_32(offset) >> vs->vs_clshift; if (vs->vs_indirect) /* indirect map */ @@ -1982,7 +2016,7 @@ ps_clunmap( * paging segment cluster pages. * Optimize for entire cluster cleraing. */ - if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) { + if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) { /* * Not cluster aligned. */ @@ -2039,15 +2073,15 @@ void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, void vs_cl_write_complete( - vstruct_t vs, - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t addr, - vm_size_t size, - boolean_t async, - int error) + vstruct_t vs, + __unused paging_segment_t ps, + vm_offset_t offset, + __unused vm_offset_t addr, + vm_size_t size, + boolean_t async, + int error) { - kern_return_t kr; +// kern_return_t kr; if (error) { /* @@ -2106,11 +2140,7 @@ device_write_reply( if(vsa->vsa_error) { /* need to consider error condition. re-write data or */ /* throw it away here. */ - vm_offset_t ioaddr; - if(vm_map_copyout(kernel_map, &ioaddr, - (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS) - panic("vs_cluster_write: unable to copy source list\n"); - vm_deallocate(kernel_map, ioaddr, vsa->vsa_size); + vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr); } ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset, vsa->vsa_size, vsa->vsa_error); @@ -2187,8 +2217,6 @@ device_open_reply( return KERN_SUCCESS; } -kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ - kern_return_t ps_read_device( paging_segment_t ps, @@ -2231,7 +2259,7 @@ ps_read_device( vsa->vsa_size = 0; vsa->vsa_ps = NULL; } - mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO); + mutex_init(&vsa->vsa_lock, 0); ip_lock(vsa->reply_port); vsa->reply_port->ip_sorights++; ip_reference(vsa->reply_port); @@ -2287,9 +2315,9 @@ ps_read_device( records_read = (bytes_read >> (vm_page_shift - ps->ps_record_shift)); dev_offset += records_read; - DEBUG(DEBUG_VS_INTERNAL, - ("calling vm_deallocate(addr=0x%X,size=0x%X)\n", - dev_buffer, bytes_read)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("calling vm_deallocate(addr=0x%X,size=0x%X)\n", + dev_buffer, bytes_read)); if (vm_deallocate(kernel_map, dev_buffer, bytes_read) != KERN_SUCCESS) Panic("dealloc buf"); @@ -2298,7 +2326,7 @@ ps_read_device( *residualp = size - total_read; if((dev_buffer != *bufferp) && (total_read != 0)) { vm_offset_t temp_buffer; - vm_allocate(kernel_map, &temp_buffer, total_read, TRUE); + vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE); memcpy((void *) temp_buffer, (void *) *bufferp, total_read); if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read, VM_MAP_COPYIN_OPT_SRC_DESTROY | @@ -2328,8 +2356,6 @@ ps_read_device( return KERN_SUCCESS; } -kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ - kern_return_t ps_write_device( paging_segment_t ps, @@ -2435,40 +2461,41 @@ ps_write_device( kern_return_t ps_read_device( - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t *bufferp, - unsigned int size, - unsigned int *residualp, - int flags) + __unused paging_segment_t ps, + __unused vm_offset_t offset, + __unused vm_offset_t *bufferp, + __unused unsigned int size, + __unused unsigned int *residualp, + __unused int flags) { panic("ps_read_device not supported"); } +kern_return_t ps_write_device( - paging_segment_t ps, - vm_offset_t offset, - vm_offset_t addr, - unsigned int size, - struct vs_async *vsa) + __unused paging_segment_t ps, + __unused vm_offset_t offset, + __unused vm_offset_t addr, + __unused unsigned int size, + __unused struct vs_async *vsa) { panic("ps_write_device not supported"); } #endif /* DEVICE_PAGING */ -void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t); /* forward */ +void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */ void pvs_object_data_provided( - vstruct_t vs, - upl_t upl, - vm_offset_t offset, - vm_size_t size) + __unused vstruct_t vs, + __unused upl_t upl, + __unused upl_offset_t offset, + upl_size_t size) { - DEBUG(DEBUG_VS_INTERNAL, - ("buffer=0x%x,offset=0x%x,size=0x%x\n", - upl, offset, size)); + DP_DEBUG(DEBUG_VS_INTERNAL, + ("buffer=0x%x,offset=0x%x,size=0x%x\n", + upl, offset, size)); ASSERT(size > 0); GSTAT(global_stats.gs_pages_in += atop_32(size)); @@ -2488,15 +2515,15 @@ pvs_cluster_read( { upl_t upl; kern_return_t error = KERN_SUCCESS; - int size; - unsigned int residual; + int size; + int residual; unsigned int request_flags; - int seg_index; - int pages_in_cl; + int seg_index; + int pages_in_cl; int cl_size; int cl_mask; - int cl_index; - int xfer_size; + int cl_index; + int xfer_size; vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; struct clmap clmap; @@ -2525,6 +2552,17 @@ pvs_cluster_read( #else request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT; #endif + + assert(dp_encryption_inited); + if (dp_encryption) { + /* + * ENCRYPTED SWAP: + * request that the UPL be prepared for + * decryption. + */ + request_flags |= UPL_ENCRYPT; + } + while (cnt && (error == KERN_SUCCESS)) { int ps_info_valid; int page_list_count; @@ -2712,7 +2750,7 @@ pvs_cluster_read( request_flags | UPL_SET_INTERNAL); error = ps_read_file(psp[beg_pseg], - upl, (vm_offset_t) 0, + upl, (upl_offset_t) 0, ps_offset[beg_pseg] + (beg_indx * vm_page_size), xfer_size, &residual, 0); @@ -2807,34 +2845,28 @@ kern_return_t vs_cluster_write( vstruct_t vs, upl_t internal_upl, - vm_offset_t offset, - vm_size_t cnt, + upl_offset_t offset, + upl_size_t cnt, boolean_t dp_internal, int flags) { - vm_offset_t size; - vm_offset_t transfer_size; + upl_size_t transfer_size; int error = 0; struct clmap clmap; vm_offset_t actual_offset; /* Offset within paging segment */ paging_segment_t ps; - vm_offset_t subx_size; vm_offset_t mobj_base_addr; vm_offset_t mobj_target_addr; - int mobj_size; - - struct vs_async *vsa; - vm_map_copy_t copy; upl_t upl; upl_page_info_t *pl; int page_index; int list_size; int pages_in_cl; - int cl_size; + unsigned int cl_size; int base_index; - int seg_size; + unsigned int seg_size; pages_in_cl = 1 << vs->vs_clshift; cl_size = pages_in_cl * vm_page_size; @@ -2842,12 +2874,12 @@ vs_cluster_write( if (!dp_internal) { int page_list_count; int request_flags; - int super_size; + unsigned int super_size; int first_dirty; int num_dirty; int num_of_pages; int seg_index; - vm_offset_t upl_offset; + upl_offset_t upl_offset; vm_offset_t seg_offset; vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; @@ -2867,6 +2899,26 @@ vs_cluster_write( UPL_NO_SYNC | UPL_SET_INTERNAL; } + if (!dp_encryption_inited) { + /* + * ENCRYPTED SWAP: + * Once we've started using swap, we + * can't change our mind on whether + * it needs to be encrypted or + * not. + */ + dp_encryption_inited = TRUE; + } + if (dp_encryption) { + /* + * ENCRYPTED SWAP: + * request that the UPL be prepared for + * encryption. + */ + request_flags |= UPL_ENCRYPT; + flags |= UPL_PAGING_ENCRYPTED; + } + page_list_count = 0; memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)offset, @@ -3099,7 +3151,7 @@ ps_vstruct_allocated_size( { int num_pages; struct vs_map *vsmap; - int i, j, k; + unsigned int i, j, k; num_pages = 0; if (vs->vs_indirect) { @@ -3146,10 +3198,10 @@ ps_vstruct_allocated_pages( default_pager_page_t *pages, size_t pages_size) { - int num_pages; + unsigned int num_pages; struct vs_map *vsmap; vm_offset_t offset; - int i, j, k; + unsigned int i, j, k; num_pages = 0; offset = 0; @@ -3215,9 +3267,9 @@ ps_vstruct_transfer_from_segment( upl_t upl) { struct vs_map *vsmap; - struct vs_map old_vsmap; - struct vs_map new_vsmap; - int i, j, k; +// struct vs_map old_vsmap; +// struct vs_map new_vsmap; + unsigned int i, j; VS_LOCK(vs); /* block all work on this vstruct */ /* can't allow the normal multiple write */ @@ -3240,8 +3292,8 @@ ps_vstruct_transfer_from_segment( VS_UNLOCK(vs); vs_changed: if (vs->vs_indirect) { - int vsmap_size; - int clmap_off; + unsigned int vsmap_size; + int clmap_off; /* loop on indirect maps */ for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { vsmap = vs->vs_imap[i]; @@ -3371,17 +3423,18 @@ vs_cluster_transfer( paging_segment_t ps; struct clmap clmap; kern_return_t error = KERN_SUCCESS; - int size, size_wanted, i; + unsigned int size, size_wanted; + int i; unsigned int residual; - int unavail_size; - default_pager_thread_t *dpt; - boolean_t dealloc; - struct vs_map *vsmap_ptr; + unsigned int unavail_size; +// default_pager_thread_t *dpt; +// boolean_t dealloc; + struct vs_map *vsmap_ptr = NULL; struct vs_map read_vsmap; struct vs_map original_read_vsmap; struct vs_map write_vsmap; - upl_t sync_upl; - vm_offset_t ioaddr; +// upl_t sync_upl; +// vm_offset_t ioaddr; /* vs_cluster_transfer reads in the pages of a cluster and * then writes these pages back to new backing store. The @@ -3490,7 +3543,7 @@ vs_cluster_transfer( */ } else { /* NEED TO ISSUE WITH SYNC & NO COMMIT */ - error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, + error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset, size, &residual, (UPL_IOSYNC | UPL_NOCOMMIT)); } @@ -3506,7 +3559,6 @@ vs_cluster_transfer( * */ if ((error == KERN_SUCCESS) && (residual == 0)) { - int page_list_count = 0; /* * Got everything we asked for, supply the data to @@ -3601,14 +3653,16 @@ vs_cluster_transfer( } kern_return_t -default_pager_add_file(MACH_PORT_FACE backing_store, - int *vp, +default_pager_add_file( + MACH_PORT_FACE backing_store, + vnode_ptr_t vp, int record_size, - long size) + vm_size_t size) { backing_store_t bs; paging_segment_t ps; int i; + unsigned int j; int error; if ((bs = backing_store_lookup(backing_store)) @@ -3658,20 +3712,20 @@ default_pager_add_file(MACH_PORT_FACE backing_store, PS_LOCK_INIT(ps); ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); if (!ps->ps_bmap) { - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } - for (i = 0; i < ps->ps_ncls; i++) { - clrbit(ps->ps_bmap, i); + for (j = 0; j < ps->ps_ncls; j++) { + clrbit(ps->ps_bmap, j); } ps->ps_going_away = FALSE; ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { - kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); - kfree((vm_offset_t)ps, sizeof *ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); BS_UNLOCK(bs); return KERN_RESOURCE_SHORTAGE; } @@ -3686,10 +3740,10 @@ default_pager_add_file(MACH_PORT_FACE backing_store, bs_more_space(ps->ps_clcount); - DEBUG(DEBUG_BS_INTERNAL, - ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", - device, offset, size, record_size, - ps->ps_record_shift, ps->ps_pgnum)); + DP_DEBUG(DEBUG_BS_INTERNAL, + ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", + device, offset, size, record_size, + ps->ps_record_shift, ps->ps_pgnum)); return KERN_SUCCESS; } @@ -3700,9 +3754,9 @@ kern_return_t ps_read_file( paging_segment_t ps, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_offset_t offset, - unsigned int size, + upl_size_t size, unsigned int *residualp, int flags) { @@ -3710,6 +3764,7 @@ ps_read_file( int error = 0; int result; + assert(dp_encryption_inited); clustered_reads[atop_32(size)]++; @@ -3739,7 +3794,7 @@ kern_return_t ps_write_file( paging_segment_t ps, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_offset_t offset, unsigned int size, int flags) @@ -3747,11 +3802,20 @@ ps_write_file( vm_object_offset_t f_offset; kern_return_t result; - int error = 0; + assert(dp_encryption_inited); clustered_writes[atop_32(size)]++; f_offset = (vm_object_offset_t)(ps->ps_offset + offset); + if (flags & UPL_PAGING_ENCRYPTED) { + /* + * ENCRYPTED SWAP: + * encrypt all the pages that we're going + * to pageout. + */ + upl_encrypt(upl, upl_offset, size); + } + if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL)) result = KERN_FAILURE; @@ -3762,7 +3826,7 @@ ps_write_file( } kern_return_t -default_pager_triggers(MACH_PORT_FACE default_pager, +default_pager_triggers( __unused MACH_PORT_FACE default_pager, int hi_wat, int lo_wat, int flags, @@ -3772,7 +3836,27 @@ default_pager_triggers(MACH_PORT_FACE default_pager, kern_return_t kr; PSL_LOCK(); - if (flags == HI_WAT_ALERT) { + if (flags == SWAP_ENCRYPT_ON) { + /* ENCRYPTED SWAP: turn encryption on */ + release = trigger_port; + if (!dp_encryption_inited) { + dp_encryption_inited = TRUE; + dp_encryption = TRUE; + kr = KERN_SUCCESS; + } else { + kr = KERN_FAILURE; + } + } else if (flags == SWAP_ENCRYPT_OFF) { + /* ENCRYPTED SWAP: turn encryption off */ + release = trigger_port; + if (!dp_encryption_inited) { + dp_encryption_inited = TRUE; + dp_encryption = FALSE; + kr = KERN_SUCCESS; + } else { + kr = KERN_FAILURE; + } + } else if (flags == HI_WAT_ALERT) { release = min_pages_trigger_port; min_pages_trigger_port = trigger_port; minimum_pages_remaining = hi_wat/vm_page_size; @@ -3809,11 +3893,13 @@ default_pager_triggers(MACH_PORT_FACE default_pager, #define PF_LATENCY 10 /* number of intervals before release */ static int dp_pages_free_low_count = 0; +thread_call_t default_pager_backing_store_monitor_callout; void -default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t p2) +default_pager_backing_store_monitor(__unused thread_call_param_t p1, + __unused thread_call_param_t p2) { - unsigned long long average; +// unsigned long long average; ipc_port_t trigger; uint64_t deadline; @@ -3862,5 +3948,5 @@ default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t } clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline); - thread_call_func_delayed(default_pager_backing_store_monitor, NULL, deadline); + thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline); } diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c index 5eab19f52..c8ce4820e 100644 --- a/osfmk/default_pager/dp_memory_object.c +++ b/osfmk/default_pager/dp_memory_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,11 +54,20 @@ */ #include "default_pager_internal.h" +#include +#include +#include #include #include +#include +#include #include #include +#include +#include +/* forward declaration */ +vstruct_t vs_object_create(vm_size_t size); /* * List of all vstructs. A specific vstruct is @@ -104,7 +113,6 @@ static unsigned int default_pager_total = 0; /* debugging */ static unsigned int default_pager_wait_seqno = 0; /* debugging */ static unsigned int default_pager_wait_read = 0; /* debugging */ static unsigned int default_pager_wait_write = 0; /* debugging */ -static unsigned int default_pager_wait_refs = 0; /* debugging */ __private_extern__ void vs_async_wait( @@ -347,7 +355,7 @@ kern_return_t dp_memory_object_init( memory_object_t mem_obj, memory_object_control_t control, - vm_size_t pager_page_size) + __unused vm_size_t pager_page_size) { vstruct_t vs; @@ -372,7 +380,7 @@ dp_memory_object_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t length, - vm_sync_t flags) + __unused vm_sync_t flags) { vstruct_t vs; @@ -387,7 +395,7 @@ dp_memory_object_synchronize( kern_return_t dp_memory_object_unmap( - memory_object_t mem_obj) + __unused memory_object_t mem_obj) { panic("dp_memory_object_unmap"); @@ -400,7 +408,6 @@ dp_memory_object_terminate( { memory_object_control_t control; vstruct_t vs; - kern_return_t kr; /* * control port is a receive right, not a send right. @@ -461,9 +468,6 @@ dp_memory_object_reference( VS_UNLOCK(vs); } -extern ipc_port_t max_pages_trigger_port; -extern int dp_pages_free; -extern int maximum_pages_free; void dp_memory_object_deallocate( memory_object_t mem_obj) @@ -558,7 +562,7 @@ dp_memory_object_data_request( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t length, - vm_prot_t protection_required) + __unused vm_prot_t protection_required) { vstruct_t vs; @@ -636,9 +640,9 @@ dp_memory_object_data_initialize( { vstruct_t vs; - DEBUG(DEBUG_MO_EXTERNAL, - ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n", - (int)mem_obj, (int)offset, (int)size)); + DP_DEBUG(DEBUG_MO_EXTERNAL, + ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n", + (int)mem_obj, (int)offset, (int)size)); GSTAT(global_stats.gs_pages_init += atop_32(size)); vs_lookup(mem_obj, vs); @@ -660,29 +664,33 @@ dp_memory_object_data_initialize( kern_return_t dp_memory_object_data_unlock( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t size, - vm_prot_t desired_access) + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t size, + __unused vm_prot_t desired_access) { Panic("dp_memory_object_data_unlock: illegal"); return KERN_FAILURE; } +/*ARGSUSED8*/ kern_return_t dp_memory_object_data_return( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, - boolean_t dirty, - boolean_t kernel_copy) + vm_size_t size, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags) { vstruct_t vs; - DEBUG(DEBUG_MO_EXTERNAL, - ("mem_obj=0x%x,offset=0x%x,size=0x%x\n", - (int)mem_obj, (int)offset, (int)size)); + DP_DEBUG(DEBUG_MO_EXTERNAL, + ("mem_obj=0x%x,offset=0x%x,size=0x%x\n", + (int)mem_obj, (int)offset, (int)size)); GSTAT(global_stats.gs_pageout_calls++); /* This routine is called by the pageout thread. The pageout thread */ @@ -782,7 +790,7 @@ dp_memory_object_data_return( */ kern_return_t default_pager_memory_object_create( - memory_object_default_t dmm, + __unused memory_object_default_t dmm, vm_size_t new_size, memory_object_t *new_mem_obj) { @@ -819,16 +827,13 @@ default_pager_memory_object_create( */ kern_return_t default_pager_object_create( - default_pager_t pager, + default_pager_t default_pager, vm_size_t size, memory_object_t *mem_objp) { vstruct_t vs; - kern_return_t result; - struct vstruct_alias *alias_struct; - - if (pager != default_pager_object) + if (default_pager != default_pager_object) return KERN_INVALID_ARGUMENT; vs = vs_object_create(size); @@ -847,95 +852,59 @@ default_pager_object_create( kern_return_t default_pager_objects( - default_pager_t pager, + default_pager_t default_pager, default_pager_object_array_t *objectsp, mach_msg_type_number_t *ocountp, - memory_object_array_t *pagersp, + mach_port_array_t *portsp, mach_msg_type_number_t *pcountp) { vm_offset_t oaddr = 0; /* memory for objects */ vm_size_t osize = 0; /* current size */ default_pager_object_t * objects; - unsigned int opotential; + unsigned int opotential = 0; - vm_offset_t paddr = 0; /* memory for pagers */ + vm_map_copy_t pcopy = 0; /* copy handle for pagers */ vm_size_t psize = 0; /* current size */ memory_object_t * pagers; - unsigned int ppotential; + unsigned int ppotential = 0; unsigned int actual; unsigned int num_objects; kern_return_t kr; vstruct_t entry; -/* - if (pager != default_pager_default_port) - return KERN_INVALID_ARGUMENT; -*/ - - /* start with the inline memory */ - - kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects, - (vm_map_copy_t) *objectsp); - - if (kr != KERN_SUCCESS) - return kr; - osize = round_page_32(*ocountp * sizeof * objects); - kr = vm_map_wire(ipc_kernel_map, - trunc_page_32((vm_offset_t)objects), - round_page_32(((vm_offset_t)objects) + osize), - VM_PROT_READ|VM_PROT_WRITE, FALSE); - osize=0; - - *objectsp = objects; - /* we start with the inline space */ - - - num_objects = 0; - opotential = *ocountp; - - pagers = (memory_object_t *) *pagersp; - ppotential = *pcountp; - - VSL_LOCK(); + if (default_pager != default_pager_object) + return KERN_INVALID_ARGUMENT; /* * We will send no more than this many */ actual = vstruct_list.vsl_count; - VSL_UNLOCK(); - if (opotential < actual) { - vm_offset_t newaddr; - vm_size_t newsize; - - newsize = 2 * round_page_32(actual * sizeof * objects); - - kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE); - if (kr != KERN_SUCCESS) - goto nomemory; - - oaddr = newaddr; - osize = newsize; - opotential = osize / sizeof * objects; - objects = (default_pager_object_t *)oaddr; + /* + * Out out-of-line port arrays are simply kalloc'ed. + */ + psize = round_page(actual * sizeof * pagers); + ppotential = psize / sizeof * pagers; + pagers = (memory_object_t *)kalloc(psize); + if (0 == pagers) + return KERN_RESOURCE_SHORTAGE; + + /* + * returned out of line data must be allocated out + * the ipc_kernel_map, wired down, filled in, and + * then "copied in" as if it had been sent by a + * user process. + */ + osize = round_page(actual * sizeof * objects); + opotential = osize / sizeof * objects; + kr = kmem_alloc(ipc_kernel_map, &oaddr, osize); + if (KERN_SUCCESS != kr) { + kfree(pagers, psize); + return KERN_RESOURCE_SHORTAGE; } + objects = (default_pager_object_t *)oaddr; - if (ppotential < actual) { - vm_offset_t newaddr; - vm_size_t newsize; - - newsize = 2 * round_page_32(actual * sizeof * pagers); - - kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE); - if (kr != KERN_SUCCESS) - goto nomemory; - - paddr = newaddr; - psize = newsize; - ppotential = psize / sizeof * pagers; - pagers = (memory_object_t *)paddr; - } /* * Now scan the list. @@ -946,8 +915,8 @@ default_pager_objects( num_objects = 0; queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) { - memory_object_t pager; - vm_size_t size; + memory_object_t pager; + vm_size_t size; if ((num_objects >= opotential) || (num_objects >= ppotential)) { @@ -981,7 +950,8 @@ default_pager_objects( VS_UNLOCK(entry); goto not_this_one; } - dp_memory_object_reference(vs_to_mem_obj(entry)); + pager = vs_to_mem_obj(entry); + dp_memory_object_reference(pager); VS_UNLOCK(entry); /* the arrays are wired, so no deadlock worries */ @@ -1003,121 +973,52 @@ default_pager_objects( VSL_UNLOCK(); - /* - * Deallocate and clear unused memory. - * (Returned memory will automagically become pageable.) - */ - - if (objects == *objectsp) { - - /* - * Our returned information fit inline. - * Nothing to deallocate. - */ - *ocountp = num_objects; - } else if (actual == 0) { - (void) vm_deallocate(kernel_map, oaddr, osize); - - /* return zero items inline */ - *ocountp = 0; - } else { - vm_offset_t used; - - used = round_page_32(actual * sizeof * objects); - - if (used != osize) - (void) vm_deallocate(kernel_map, - oaddr + used, osize - used); - - *objectsp = objects; - *ocountp = num_objects; + /* clear out any excess allocation */ + while (num_objects < opotential) { + objects[--opotential].dpo_object = (vm_offset_t) 0; + objects[opotential].dpo_size = 0; } - - if (pagers == (memory_object_t *)*pagersp) { - - /* - * Our returned information fit inline. - * Nothing to deallocate. - */ - - *pcountp = num_objects; - } else if (actual == 0) { - (void) vm_deallocate(kernel_map, paddr, psize); - - /* return zero items inline */ - *pcountp = 0; - } else { - vm_offset_t used; - - used = round_page_32(actual * sizeof * pagers); - - if (used != psize) - (void) vm_deallocate(kernel_map, - paddr + used, psize - used); - - *pagersp = (memory_object_array_t)pagers; - *pcountp = num_objects; - } - (void) vm_map_unwire(kernel_map, (vm_offset_t)objects, - *ocountp + (vm_offset_t)objects, FALSE); - (void) vm_map_copyin(kernel_map, (vm_offset_t)objects, - *ocountp, TRUE, (vm_map_copy_t *)objectsp); - - return KERN_SUCCESS; - - nomemory: - { - register int i; - for (i = 0; i < num_objects; i++) - if (pagers[i] != MEMORY_OBJECT_NULL) - memory_object_deallocate(pagers[i]); + while (num_objects < ppotential) { + pagers[--ppotential] = MEMORY_OBJECT_NULL; } - if (objects != *objectsp) - (void) vm_deallocate(kernel_map, oaddr, osize); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr), + vm_map_round_page(oaddr + osize), FALSE); + assert(KERN_SUCCESS == kr); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr, + (vm_map_size_t)osize, TRUE, &pcopy); + assert(KERN_SUCCESS == kr); - if (pagers != (memory_object_t *)*pagersp) - (void) vm_deallocate(kernel_map, paddr, psize); + *objectsp = (default_pager_object_array_t)objects; + *ocountp = num_objects; + *portsp = (mach_port_array_t)pcopy; + *pcountp = num_objects; - return KERN_RESOURCE_SHORTAGE; + return KERN_SUCCESS; } kern_return_t default_pager_object_pages( - default_pager_t pager, - memory_object_t object, + default_pager_t default_pager, + mach_port_t memory_object, default_pager_page_array_t *pagesp, mach_msg_type_number_t *countp) { - vm_offset_t addr; /* memory for page offsets */ + vm_offset_t addr = 0; /* memory for page offsets */ vm_size_t size = 0; /* current memory size */ - default_pager_page_t * pages; - unsigned int potential, actual; + vm_map_copy_t copy; + default_pager_page_t * pages = 0; + unsigned int potential; + unsigned int actual; kern_return_t kr; + memory_object_t object; - - if (pager != default_pager_object) + if (default_pager != default_pager_object) return KERN_INVALID_ARGUMENT; - kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages, - (vm_map_copy_t) *pagesp); - - if (kr != KERN_SUCCESS) - return kr; - - size = round_page_32(*countp * sizeof * pages); - kr = vm_map_wire(ipc_kernel_map, - trunc_page_32((vm_offset_t)pages), - round_page_32(((vm_offset_t)pages) + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); - size=0; - - *pagesp = pages; - /* we start with the inline space */ - - addr = (vm_offset_t)pages; - potential = *countp; + object = (memory_object_t) memory_object; + potential = 0; for (;;) { vstruct_t entry; @@ -1134,9 +1035,9 @@ default_pager_object_pages( VSL_UNLOCK(); /* did not find the object */ + if (0 != addr) + kmem_free(ipc_kernel_map, addr, size); - if (pages != *pagesp) - (void) vm_deallocate(kernel_map, addr, size); return KERN_INVALID_ARGUMENT; found_object: @@ -1147,7 +1048,7 @@ default_pager_object_pages( VS_UNLOCK(entry); - assert_wait_timeout( 1, THREAD_UNINT ); + assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC); wresult = thread_block(THREAD_CONTINUE_NULL); assert(wresult == THREAD_TIMED_OUT); continue; @@ -1161,50 +1062,33 @@ default_pager_object_pages( break; /* allocate more memory */ + if (0 != addr) + kmem_free(ipc_kernel_map, addr, size); + + size = round_page(actual * sizeof * pages); + kr = kmem_alloc(ipc_kernel_map, &addr, size); + if (KERN_SUCCESS != kr) + return KERN_RESOURCE_SHORTAGE; - if (pages != *pagesp) - (void) vm_deallocate(kernel_map, addr, size); - size = round_page_32(actual * sizeof * pages); - kr = vm_allocate(kernel_map, &addr, size, TRUE); - if (kr != KERN_SUCCESS) - return kr; pages = (default_pager_page_t *)addr; potential = size / sizeof * pages; } /* - * Deallocate and clear unused memory. - * (Returned memory will automagically become pageable.) + * Clear unused memory. */ - - if (pages == *pagesp) { - - /* - * Our returned information fit inline. - * Nothing to deallocate. - */ - - *countp = actual; - } else if (actual == 0) { - (void) vm_deallocate(kernel_map, addr, size); - - /* return zero items inline */ - *countp = 0; - } else { - vm_offset_t used; - - used = round_page_32(actual * sizeof * pages); - - if (used != size) - (void) vm_deallocate(kernel_map, - addr + used, size - used); - - *pagesp = pages; - *countp = actual; - } - (void) vm_map_unwire(kernel_map, (vm_offset_t)pages, - *countp + (vm_offset_t)pages, FALSE); - (void) vm_map_copyin(kernel_map, (vm_offset_t)pages, - *countp, TRUE, (vm_map_copy_t *)pagesp); + while (actual < potential) + pages[--potential].dpp_offset = 0; + + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), FALSE); + assert(KERN_SUCCESS == kr); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size, TRUE, ©); + assert(KERN_SUCCESS == kr); + + + *pagesp = (default_pager_page_array_t)copy; + *countp = actual; return KERN_SUCCESS; } diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index 04be46fbc..3830eaaff 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -479,6 +479,17 @@ routine io_service_add_notification_ool( out notification : io_object_t ); +routine io_object_get_superclass( + master_port : mach_port_t; + in obj_name : io_name_t; + out class_name : io_name_t + ); + +routine io_object_get_bundle_identifier( + master_port : mach_port_t; + in obj_name : io_name_t; + out class_name : io_name_t + ); #endif diff --git a/osfmk/device/device_init.c b/osfmk/device/device_init.c index 3e267fbe3..55904f4e2 100644 --- a/osfmk/device/device_init.c +++ b/osfmk/device/device_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,10 +55,21 @@ * * Initialize device service as part of kernel task. */ + +#include +#include + +#include #include #include + +#include +#include +#include +#include #include #include + #include #include diff --git a/osfmk/device/device_port.h b/osfmk/device/device_port.h index 1f2bbba59..13a285a82 100644 --- a/osfmk/device/device_port.h +++ b/osfmk/device/device_port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * diff --git a/osfmk/device/device_types.h b/osfmk/device/device_types.h index 1e6c2e85f..a3245ee1c 100644 --- a/osfmk/device/device_types.h +++ b/osfmk/device/device_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,6 +61,8 @@ * Types for device interface. */ #include +#include +#include #include /* @@ -94,6 +96,8 @@ extern io_connect_t iokit_lookup_connect_port( ipc_port_t port ); extern ipc_port_t iokit_make_object_port( io_object_t obj ); extern ipc_port_t iokit_make_connect_port( io_connect_t obj ); +extern boolean_t iokit_notify( mach_msg_header_t *msg ); + #else #ifndef __IOKIT_PORTS_DEFINED__ diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index f4945d036..d6fb27e2e 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,12 +39,10 @@ #include #include -#include #include #include #include #include -#include #include #include #include @@ -62,6 +60,9 @@ #ifdef __ppc__ #include #endif +#ifdef __i386 +#include +#endif #include #define EXTERN @@ -73,8 +74,6 @@ extern void iokit_add_reference( io_object_t obj ); -extern void iokit_remove_reference( io_object_t obj ); - extern ipc_port_t iokit_port_for_object( io_object_t obj, ipc_kobject_type_t type ); @@ -319,6 +318,12 @@ iokit_make_send_right( task_t task, io_object_t obj, ipc_kobject_type_t type ) return( name ); } +EXTERN kern_return_t +iokit_mod_send_right( task_t task, mach_port_name_t name, mach_port_delta_t delta ) +{ + return (mach_port_mod_refs( task->itk_space, name, MACH_PORT_RIGHT_SEND, delta )); +} + /* * Handle the No-More_Senders notification generated from a device port destroy. * Since there are no longer any tasks which hold a send right to this device @@ -404,7 +409,7 @@ unsigned int IODefaultCacheBits(addr64_t pa) // If no physical, just hard code attributes flags = VM_WIMG_IO; #else - extern vm_offset_t avail_end; + extern pmap_paddr_t avail_end; if (pa < avail_end) flags = VM_WIMG_COPYBACK; diff --git a/osfmk/device/subrs.c b/osfmk/device/subrs.c index 1282fd398..d652244e9 100644 --- a/osfmk/device/subrs.c +++ b/osfmk/device/subrs.c @@ -166,6 +166,49 @@ strncmp( return 0; } + +// +// Lame implementation just for use by strcasecmp/strncasecmp +// +static int +tolower(unsigned char ch) +{ + if (ch >= 'A' && ch <= 'Z') + ch = 'a' + (ch - 'A'); + + return ch; +} + +int +strcasecmp(const char *s1, const char *s2) +{ + const unsigned char *us1 = (const u_char *)s1, + *us2 = (const u_char *)s2; + + while (tolower(*us1) == tolower(*us2++)) + if (*us1++ == '\0') + return (0); + return (tolower(*us1) - tolower(*--us2)); +} + +int +strncasecmp(const char *s1, const char *s2, size_t n) +{ + if (n != 0) { + const unsigned char *us1 = (const u_char *)s1, + *us2 = (const u_char *)s2; + + do { + if (tolower(*us1) != tolower(*us2++)) + return (tolower(*us1) - tolower(*--us2)); + if (*us1++ == '\0') + break; + } while (--n != 0); + } + return (0); +} + + /* * Abstract: * strcpy copies the contents of the string "from" including diff --git a/osfmk/i386/AT386/asm_startup.h b/osfmk/i386/AT386/asm_startup.h deleted file mode 100644 index faa760edb..000000000 --- a/osfmk/i386/AT386/asm_startup.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#ifndef __MACHO__ -/* - * Startup code for an i386 on an AT. - * Kernel is loaded starting at 1MB. - * Protected mode, paging disabled. - */ - - popl %eax - cmpl $-1,%eax /* new calling convention */ - je 0f - -/* - * Old calling convention - * - * %esp -> boottype (deprecated) - * size of extended memory (K) - * size of conventional memory (K) - * boothowto (deprecated) - * esym (if KDB set up) - */ -#define SYS_REBOOT_COMPAT 1 -#if SYS_REBOOT_COMPAT - movl %eax,PA(EXT(boottype)) -#endif - popl PA(EXT(extmem)) /* extended memory, in K */ - popl PA(EXT(cnvmem)) /* conventional memory, in K */ - popl %edx /* old boothowto */ -#if SYS_REBOOT_COMPAT -#define RB_SINGLE 0x2 -#define RB_HALT 0x8 -#define RB_ALTBOOT 0x40 - testb $(RB_SINGLE),%edx /* old RB_SINGLE flag ? */ - je 2f - incl PA(EXT(startup_single_user)) -2: testb $(RB_HALT),%edx /* old RB_HALT flag ? */ - je 2f - incl PA(EXT(halt_in_debugger)) -2: testb $(RB_ALTBOOT),%edx /* old RB_ALTBOOT flag ? */ - je 2f - incl PA(EXT(cons_is_com1)) -2: -#if NCPUS > 1 - shrl $0x8,%edx - movb %edx,PA(EXT(wncpu)) /* old want ncpus flag */ -#endif -#endif - - popl %eax /* get boot_string & esym */ -#if SYS_REBOOT_COMPAT - movl %eax, %esi - lea PA(EXT(boot_string_store)), %edi - movl PA(EXT(boot_string_sz)), %ecx - cld - rep - movsb -#endif - -/* - * Move symbol table out of the way of BSS. - * - * When kernel is loaded, at the start of BSS we have: - * _edata: - * .long kern_sym_size - * .long boot_image_size - * .long load_info_size - * sym_start: - * kernel symbols - * .align ALIGN - * boot_start: - * bootstrap image - * .align ALIGN - * load_info_start: - * bootstrap load information - * - * all of which must be moved somewhere else, since it - * is sitting in the kernel BSS. In addition, the bootstrap - * image must be moved to a machine page boundary, so that we get: - * - * _edata: - * BSS - * _end: <- kern_sym_start (VA) - * kernel symbols . (kern_sym_size) - * : <- boot_start (VA) - * bootstrap image - * <- load_info_start (VA) - * load information - * <- %ebx (PA) - * - */ - lea PA(EXT(edata))+4-1,%esi /* point to symbol size word */ - andl $~0x3,%esi - movl (%esi),%edx /* get symbol size */ - - lea PA(EXT(end))+NBPG-1(%edx),%edi - /* point after BSS, add symbol */ - /* size, and round up to */ - andl $-NBPG,%edi /* machine page boundary */ - - lea -KVTOPHYS(%edi),%eax /* save virtual address */ - movl %eax,PA(EXT(boot_start)) /* of start of bootstrap */ - movl 4(%esi),%ecx /* get size of bootstrap */ - movl %ecx,PA(EXT(boot_size)) /* save size of bootstrap */ - lea -KVTOPHYS(%edi,%ecx),%eax - movl %eax,PA(EXT(load_info_start)) - /* save virtual address */ - /* of start of loader info */ - movl 8(%esi),%eax /* get size of loader info */ - movl %eax,PA(EXT(load_info_size)) - /* save size of loader info */ - addl %eax,%ecx /* get total size to move */ - - leal 12(%esi,%edx),%esi /* point to start of boot image - source */ - - leal (%edi,%ecx),%ebx /* point to new location of */ - /* end of bootstrap - next */ - /* available physical address */ - - lea -4(%esi,%ecx),%esi /* point to end of src - 4 */ - lea -4(%edi,%ecx),%edi /* point to end of dst - 4 */ - shrl $2,%ecx /* move by longs */ - std /* move backwards */ - rep - movsl /* move bootstrap and loader_info */ - cld /* reset direction flag */ - - movl $EXT(end),PA(EXT(kern_sym_start)) - /* save virtual address */ - /* of start of symbols */ - movl %edx,PA(EXT(kern_sym_size)) /* save symbol table size */ - testl %edx,%edx /* any symbols? */ - jz 1f /* if so: */ - - /* %esi points to start of boot-4 */ - /* == end of symbol table (source) - 4 */ - leal PA(EXT(end))-4(%edx),%edi /* point to end of dst - 4 */ - movl %edx,%ecx /* copy size */ - shrl $2,%ecx /* move by longs */ - std /* move backwards */ - rep - movsl /* move symbols */ - cld /* reset direction flag */ - - jmp 1f - -/* - * New calling convention - * - * %esp -> -1 - * size of extended memory (K) - * size of conventional memory (K) - * kern_sym_start - * kern_sym_size - * kern_args_start - * kern_args_size - * boot_sym_start - * boot_sym_size - * boot_args_start - * boot_args_size - * boot_start - * boot_size - * boot_region_desc - * boot_region_count - * boot_thread_state_flavor - * boot_thread_state - * boot_thread_state_count - * env_start - * env_size - * top of loaded memory - */ - -#define MEM_BASE 0 - -#define BOOT_TO_VIRT (MEM_BASE-(KVTOPHYS)) - .globl EXT(boot_start) - -0: - popl PA(EXT(extmem)) /* extended memory, in K */ - popl PA(EXT(cnvmem)) /* conventional memory, in K */ - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(kern_sym_start)) - popl PA(EXT(kern_sym_size)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(kern_args_start)) - popl PA(EXT(kern_args_size)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(boot_sym_start)) - popl PA(EXT(boot_sym_size)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(boot_args_start)) - popl PA(EXT(boot_args_size)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(boot_start)) - popl PA(EXT(boot_size)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(boot_region_desc)) - popl PA(EXT(boot_region_count)) - popl PA(EXT(boot_thread_state_flavor)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(boot_thread_state)) - popl PA(EXT(boot_thread_state_count)) - popl %eax - addl $BOOT_TO_VIRT,%eax /* convert to virtual address */ - movl %eax,PA(EXT(env_start)) - popl PA(EXT(env_size)) - popl %ebx /* mem top */ - addl $MEM_BASE,%ebx /* translate */ -1: -#else - movl %ebx,PA(EXT(boot_args_start)) /* Save KERNBOOTSTRUCT */ - cld - call PA(EXT(i386_preinit)) - movl %eax,%ebx -#endif diff --git a/osfmk/i386/AT386/bbclock.c b/osfmk/i386/AT386/bbclock.c index 3a0245e70..f52b3694b 100644 --- a/osfmk/i386/AT386/bbclock.c +++ b/osfmk/i386/AT386/bbclock.c @@ -51,6 +51,8 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include #include #include +#include +#include #include #include #include @@ -81,13 +83,12 @@ bbc_config(void) int BbcFlag; struct rtc_st rtclk; -#if NCPUS > 1 && AT386 mp_disable_preemption(); if (cpu_number() != master_cpu) { mp_enable_preemption(); return(1); } -#endif + /* * Setup device. */ @@ -104,9 +105,7 @@ bbc_config(void) printf("battery clock configured\n"); else printf("WARNING: Battery Clock Failure!\n"); -#if NCPUS > 1 && AT386 mp_enable_preemption(); -#endif return (BbcFlag); } @@ -124,7 +123,6 @@ bbc_gettime( spl_t s; thread_t thread; -#if NCPUS > 1 && AT386 if ((thread = current_thread()) != THREAD_NULL) { thread_bind(thread, master_processor); mp_disable_preemption(); @@ -135,7 +133,7 @@ bbc_gettime( mp_enable_preemption(); } } -#endif + s = LOCK_BBC(); rtcget(&rtclk); sec = hexdectodec(rtclk.rtc_sec); @@ -159,10 +157,8 @@ bbc_gettime( cur_time->tv_nsec = 0; UNLOCK_BBC(s); -#if NCPUS > 1 && AT386 if (thread != THREAD_NULL) thread_bind(thread, PROCESSOR_NULL); -#endif return (KERN_SUCCESS); } @@ -179,7 +175,6 @@ bbc_settime( spl_t s; thread_t thread; -#if NCPUS > 1 && AT386 if ((thread = current_thread()) != THREAD_NULL) { thread_bind(thread, master_processor); mp_disable_preemption(); @@ -190,7 +185,7 @@ bbc_settime( mp_enable_preemption(); } } -#endif + s = LOCK_BBC(); rtcget(&rtclk); diff = 0; @@ -214,39 +209,9 @@ bbc_settime( rtcput(&rtclk); UNLOCK_BBC(s); -#if NCPUS > 1 && AT386 if (thread != THREAD_NULL) thread_bind(current_thread(), PROCESSOR_NULL); -#endif - return (KERN_SUCCESS); -} -/* - * Get clock device attributes. - */ -kern_return_t -bbc_getattr( - clock_flavor_t flavor, - clock_attr_t attr, /* OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ -{ - if (*count != 1) - return (KERN_FAILURE); - switch (flavor) { - - case CLOCK_GET_TIME_RES: /* >0 res */ - *(clock_res_t *) attr = NSEC_PER_SEC; - break; - - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - case CLOCK_ALARM_MINRES: - case CLOCK_ALARM_MAXRES: - *(clock_res_t *) attr = 0; - break; - - default: - return (KERN_INVALID_VALUE); - } return (KERN_SUCCESS); } @@ -258,7 +223,7 @@ rtcget( struct rtc_st * regs) { outb(RTC_ADDR, RTC_D); - if (inb(RTC_DATA) & RTC_VRT == 0) + if ((inb(RTC_DATA) & RTC_VRT) == 0) return (-1); outb(RTC_ADDR, RTC_A); while (inb(RTC_DATA) & RTC_UIP) /* busy wait */ diff --git a/osfmk/i386/AT386/bbclock_entries.h b/osfmk/i386/AT386/bbclock_entries.h index 1fd8ca1d7..2cd9cc6c6 100644 --- a/osfmk/i386/AT386/bbclock_entries.h +++ b/osfmk/i386/AT386/bbclock_entries.h @@ -27,5 +27,6 @@ extern kern_return_t bbc_gettime( mach_timespec_t * curtime); extern kern_return_t bbc_settime( mach_timespec_t * curtime); +extern int bbc_config(void); #define NO_SETALRM (void (*) (mach_timespec_t * alarm_time))0 diff --git a/osfmk/i386/AT386/conf.c b/osfmk/i386/AT386/conf.c index 67632a323..8001faf2f 100644 --- a/osfmk/i386/AT386/conf.c +++ b/osfmk/i386/AT386/conf.c @@ -58,8 +58,6 @@ #include #include -#include - /* * Clock device subsystem configuration. The clock_list[] * table contains the clock structures for all clocks in @@ -74,9 +72,9 @@ extern struct clock_ops sysclk_ops, calend_ops; struct clock clock_list[] = { /* SYSTEM_CLOCK */ - { &sysclk_ops, 0, 0, 0 }, + { &sysclk_ops, 0, 0, {0} }, /* CALENDAR_CLOCK */ - { &calend_ops, 0, 0, 0 }, + { &calend_ops, 0, 0, {0} } }; int clock_count = sizeof(clock_list) / sizeof(clock_list[0]); diff --git a/osfmk/i386/AT386/himem.c b/osfmk/i386/AT386/himem.c index f9abdb912..c01c1f37e 100644 --- a/osfmk/i386/AT386/himem.c +++ b/osfmk/i386/AT386/himem.c @@ -138,6 +138,7 @@ #include #include #include +#include hil_t hil_head; decl_simple_lock_data(,hil_lock) @@ -151,7 +152,7 @@ void himem_init( void) { - simple_lock_init(&hil_lock, ETAP_VM_HIMEM); + simple_lock_init(&hil_lock, 0); } /* @@ -168,10 +169,11 @@ himem_reserve( vm_page_t low; hil_t hil; spl_t ipl; - extern vm_offset_t avail_end; + extern pmap_paddr_t_t avail_end; if (avail_end <= HIGH_MEM) return; + kprintf("looking for low mem pages\n"); hil = (hil_t)kalloc(npages*sizeof(struct himem_link)); if (hil == (hil_t)0) panic("himem_reserve: kalloc failed\n"); @@ -195,7 +197,7 @@ himem_reserve( i++; } } - + kprintf("freeing high pages back\n"); for (low = free_head; low; low = free_head) { free_head = (vm_page_t) low->pageq.next; VM_PAGE_FREE(low); @@ -248,7 +250,7 @@ himem_convert( h->high_addr = phys_addr; if (io_op == D_WRITE) { - bcopy((char *)phystokv(phys_addr), (char *)phystokv(h->low_page + offset), + bcopy_phys((addr64_t)phys_addr, (addr64_t)(h->low_page + offset), length); h->length = 0; } else { @@ -279,8 +281,8 @@ himem_revert( while(hil) { if (hil->length) { - bcopy((char *)phystokv(hil->low_page + hil->offset), - (char *)phystokv(hil->high_addr), + bcopy_phys((addr64_t)hil->low_page + hil->offset), + (addr64_t)(hil->high_addr), hil->length); } hil->high_addr = 0; diff --git a/osfmk/i386/AT386/machdep.mk b/osfmk/i386/AT386/machdep.mk index 00c31d4df..5fe42b283 100644 --- a/osfmk/i386/AT386/machdep.mk +++ b/osfmk/i386/AT386/machdep.mk @@ -32,8 +32,7 @@ AT386_DATAFILES = disk.h kd.h -#AT386_DATAFILES = asm_startup.h \ -# atbus.h \ +#AT386_DATAFILES = atbus.h \ # blitreg.h \ # blituser.h \ # blitvar.h \ diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 741077057..588e59611 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -60,11 +60,9 @@ * Basic initialization for I386 - ISA bus machines. */ -#include #include #include #include -#include #include @@ -79,32 +77,37 @@ #include #include #include -#include #include +#include #include #include #include #include -#include #include +#include +#include #include #if MACH_KDB #include #endif /* MACH_KDB */ -#if NCPUS > 1 #include -#endif /* NCPUS */ - -#if NCPUS > 1 #include -#endif /* NCPUS > 1 */ #include +void enable_bluebox(void); +void disable_bluebox(void); + static void machine_conf(void); #include +extern int default_preemption_rate; +extern int max_unsafe_quanta; +extern int max_poll_quanta; +extern int idlehalt; +extern unsigned int panic_is_inited; + void machine_startup() { @@ -153,25 +156,20 @@ machine_startup() #endif /* MACH_KDB */ if (PE_parse_boot_arg("preempt", &boot_arg)) { - extern int default_preemption_rate; - default_preemption_rate = boot_arg; } if (PE_parse_boot_arg("unsafe", &boot_arg)) { - extern int max_unsafe_quanta; - max_unsafe_quanta = boot_arg; } if (PE_parse_boot_arg("poll", &boot_arg)) { - extern int max_poll_quanta; - max_poll_quanta = boot_arg; } if (PE_parse_boot_arg("yield", &boot_arg)) { - extern int sched_poll_yield_shift; - sched_poll_yield_shift = boot_arg; } + if (PE_parse_boot_arg("idlehalt", &boot_arg)) { + idlehalt = boot_arg; + } machine_conf(); @@ -182,17 +180,14 @@ machine_startup() /* * Start the system. */ - setup_main(); - - /* Should never return */ + kernel_bootstrap(); + /*NOTREACHED*/ } static void machine_conf(void) { - machine_info.max_cpus = NCPUS; - machine_info.avail_cpus = 1; machine_info.memory_size = mem_size; } @@ -202,10 +197,6 @@ machine_conf(void) void machine_init(void) { - int unit; - const char *p; - int n; - /* * Display CPU identification */ @@ -213,9 +204,7 @@ machine_init(void) cpuid_feature_display("CPU features", 0); -#if NCPUS > 1 smp_init(); -#endif /* * Set up to use floating point. @@ -226,6 +215,21 @@ machine_init(void) * Configure clock devices. */ clock_config(); + + /* + * Initialize MTRR from boot processor. + */ + mtrr_init(); + + /* + * Set up PAT for boot processor. + */ + pat_init(); + + /* + * Free lowmem pages + */ + x86_lowmem_free(); } /* @@ -249,8 +253,10 @@ halt_all_cpus(boolean_t reboot) /* * Tell the BIOS not to clear and test memory. */ +#if 0 /* XXX fixme */ if (!reset_mem_on_reboot) *(unsigned short *)phystokv(0x472) = 0x1234; +#endif printf("MACH Reboot\n"); PEHaltRestart( kPERestartCPU ); @@ -264,7 +270,6 @@ halt_all_cpus(boolean_t reboot) /*XXX*/ void fc_get(mach_timespec_t *ts); #include -#include extern kern_return_t sysclk_gettime( mach_timespec_t *cur_time); void fc_get(mach_timespec_t *ts) { @@ -275,7 +280,14 @@ void Debugger( const char *message) { + + if (!panic_is_inited) { + postcode(PANIC_HLT); + asm("hlt"); + } + printf("Debugger called: <%s>\n", message); + kprintf("Debugger called: <%s>\n", message); draw_panic_dialog(); @@ -283,34 +295,16 @@ Debugger( } void -display_syscall(int syscall) +enable_bluebox(void) { - printf("System call happened %d\n", syscall); } - -#if XPR_DEBUG && (NCPUS == 1) - -extern kern_return_t sysclk_gettime_interrupts_disabled( - mach_timespec_t *cur_time); - -int xpr_time(void) -{ - mach_timespec_t time; - - sysclk_gettime_interrupts_disabled(&time); - return(time.tv_sec*1000000 + time.tv_nsec/1000); -} -#endif /* XPR_DEBUG && (NCPUS == 1) */ - -enable_bluebox() -{ -} -disable_bluebox() +void +disable_bluebox(void) { } char * -machine_boot_info(char *buf, vm_size_t size) +machine_boot_info(char *buf, __unused vm_size_t size) { *buf ='\0'; return buf; diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile index 869a3f0cd..ae6a4000d 100644 --- a/osfmk/i386/Makefile +++ b/osfmk/i386/Makefile @@ -8,17 +8,23 @@ include $(MakeInc_cmd) include $(MakeInc_def) EXPORT_ONLY_FILES = \ - cpu_capabilities.h \ + apic.h \ cpu_number.h \ - hw_lock_types.h \ + cpu_capabilities.h \ + cpuid.h \ io_map_entries.h \ lock.h \ + locks.h \ machine_routines.h \ machine_cpu.h \ + mtrr.h \ mp.h \ + mp_desc.h \ mp_events.h \ - apic.h \ - cpuid.h + proc_reg.h \ + seg.h \ + simple_lock.h \ + tss.h INSTALL_MD_DIR = i386 diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c new file mode 100644 index 000000000..51623e41c --- /dev/null +++ b/osfmk/i386/acpi.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +extern void acpi_sleep_cpu(acpi_sleep_callback, void * refcon); +extern char acpi_wake_start[]; +extern char acpi_wake_end[]; + +extern int serial_init(void); +extern unsigned int disableSerialOuput; + +extern void set_kbd_leds(int leds); + +vm_offset_t +acpi_install_wake_handler(void) +{ + /* copy wake code to ACPI_WAKE_ADDR in low memory */ + bcopy_phys((addr64_t) kvtophys((vm_offset_t)acpi_wake_start), + (addr64_t) ACPI_WAKE_ADDR, + acpi_wake_end - acpi_wake_start); + + /* flush cache */ + wbinvd(); + + /* return physical address of the wakeup code */ + return ACPI_WAKE_ADDR; +} + +typedef struct acpi_sleep_callback_data { + acpi_sleep_callback func; + void *refcon; +} acpi_sleep_callback_data; + +static void +acpi_sleep_do_callback(void *refcon) +{ + acpi_sleep_callback_data *data = (acpi_sleep_callback_data *)refcon; + + + (data->func)(data->refcon); + + /* should never get here! */ +} + +void +acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) +{ + acpi_sleep_callback_data data; + + /* shutdown local APIC before passing control to BIOS */ + lapic_shutdown(); + + data.func = func; + data.refcon = refcon; + + /* + * Save master CPU state and sleep platform. + * Will not return until platform is woken up, + * or if sleep failed. + */ + acpi_sleep_cpu(acpi_sleep_do_callback, &data); + + /* reset UART if kprintf is enabled */ + if (FALSE == disableSerialOuput) + serial_init(); + + + /* restore MTRR settings */ + mtrr_update_cpu(); + + /* set up PAT following boot processor power up */ + pat_init(); + + /* re-enable and re-init local apic */ + if (lapic_probe()) + lapic_init(); + + /* let the realtime clock reset */ + rtc_sleep_wakeup(); + +} diff --git a/bsd/machine/proc.h b/osfmk/i386/acpi.h similarity index 66% rename from bsd/machine/proc.h rename to osfmk/i386/acpi.h index 47a46df41..bdb544058 100644 --- a/bsd/machine/proc.h +++ b/osfmk/i386/acpi.h @@ -19,20 +19,24 @@ * * @APPLE_LICENSE_HEADER_END@ */ + +#ifndef _I386_ACPI_H_ +#define _I386_ACPI_H_ + /* - * Copyright 1995 NeXT Computer, Inc. All rights reserved. + * ACPI (Advanced Configuration and Power Interface) support. */ -#ifndef _BSD_MACHINE_PROC_H_ -#define _BSD_MACHINE_PROC_H_ - -#if defined (__ppc__) -#include "ppc/proc.h" -#elif defined (__i386__) -#include "i386/proc.h" -#else -#error architecture not supported -#endif +/* + * Wake up code linear address + * FIXME: borrowed unused memory reserved by MP_BOOT + */ +#define ACPI_WAKE_ADDR 0x2000 +#ifndef ASSEMBLER +typedef void (*acpi_sleep_callback)(void * refcon); +extern vm_offset_t acpi_install_wake_handler(void); +extern void acpi_sleep_kernel(acpi_sleep_callback func, void * refcon); +#endif /* ASSEMBLER */ -#endif /* _BSD_MACHINE_PROC_H_ */ +#endif /* !_I386_ACPI_H_ */ diff --git a/osfmk/i386/acpi_wakeup.s b/osfmk/i386/acpi_wakeup.s new file mode 100644 index 000000000..931a72740 --- /dev/null +++ b/osfmk/i386/acpi_wakeup.s @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include + + .file "acpi_wakeup.s" + + .text + .align 12 /* Page align for single bcopy_phys() */ + +#define LJMP(segment, address) \ + .byte 0xea ;\ + .long address - EXT(acpi_wake_start) ;\ + .word segment + +#define PA(addr) ((addr)-KERNELBASE) + +/* + * acpi_wake_start + * + * The code from acpi_wake_start to acpi_wake_end is copied to + * memory below 1MB. The firmware waking vector is updated to + * point at acpi_wake_start in low memory before sleeping. + */ + +ENTRY(acpi_wake_start) + /* + * CPU woke up from sleep, and is back in real mode. + * Initialize it just enough to get back to protected mode. + */ + cli + + POSTCODE(ACPI_WAKE_START_ENTRY) + + /* set up DS to match CS */ + movw %cs, %ax + movw %ax, %ds + + /* + * Must initialize GDTR before entering protected mode. + * Use a temporary GDT that is 0 based, 4GB limit, code and data. + * Restoring the actual GDT will come later. + */ + addr16 + data16 + lgdt EXT(acpi_gdtr) - EXT(acpi_wake_start) + + /* set CR0.PE to enter protected mode */ + mov %cr0, %eax + data16 + or $(CR0_PE), %eax + mov %eax, %cr0 + + /* + * Make intra-segment jump to flush pipeline and reload CS register. + * If GDT is bogus, it will blow up here. + */ + data16 + LJMP(0x8, acpi_wake_prot + ACPI_WAKE_ADDR) + +acpi_wake_prot: + + /* protected mode, paging disabled */ + + /* setup the protected mode segment registers */ + mov $0x10, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw %ax, %fs + movw %ax, %gs + + /* jump back to the sleep function in the kernel */ + movl PA(saved_eip), %eax + jmp *%eax + +/* Segment Descriptor + * + * 31 24 19 16 7 0 + * ------------------------------------------------------------ + * | | |B| |A| | | |1|0|E|W|A| | + * | BASE 31..24 |G|/|0|V| LIMIT |P|DPL| TYPE | BASE 23:16 | + * | | |D| |L| 19..16| | |1|1|C|R|A| | + * ------------------------------------------------------------ + * | | | + * | BASE 15..0 | LIMIT 15..0 | + * | | | + * ------------------------------------------------------------ + */ +ENTRY(acpi_gdt) + .word 0, 0 /* 0x0 : null */ + .byte 0, 0, 0, 0 + + .word 0xffff, 0x0000 /* 0x8 : code */ + .byte 0, 0x9e, 0xcf, 0 + + .word 0xffff, 0x0000 /* 0x10 : data */ + .byte 0, 0x92, 0xcf, 0 + +ENTRY(acpi_gdtr) + .word 24 /* limit (8*3 segs) */ + .long EXT(acpi_gdt) - EXT(acpi_wake_start) + ACPI_WAKE_ADDR + +ENTRY(acpi_wake_end) + + +/* + * acpi_sleep_cpu(acpi_sleep_callback func, void * refcon) + * + * Save CPU state before platform sleep. Restore CPU state + * following wake up. + */ + +ENTRY(acpi_sleep_cpu) + pushl %ebp + movl %esp, %ebp + + /* save flags */ + pushfl + + /* save general purpose registers */ + pushal + movl %esp, saved_esp + + /* save control registers */ + movl %cr0, %eax + movl %eax, saved_cr0 + movl %cr2, %eax + movl %eax, saved_cr2 + movl %cr3, %eax + movl %eax, saved_cr3 + movl %cr4, %eax + movl %eax, saved_cr4 + + /* save segment registers */ + movw %es, saved_es + movw %fs, saved_fs + movw %gs, saved_gs + movw %ss, saved_ss + + /* save descriptor table registers */ + sgdt saved_gdt + sldt saved_ldt + sidt saved_idt + str saved_tr + + /* + * When system wakes up, the real mode wake handler will revert to + * protected mode, then jump to the address stored at saved_eip. + */ + movl $(PA(wake_prot)), saved_eip + + /* + * Call ACPI function provided by the caller to sleep the platform. + * This call will not return on success. + */ + pushl B_ARG1 + movl B_ARG0, %edi + call *%edi + popl %edi + + /* sleep failed, no cpu context lost */ + jmp wake_restore + +wake_prot: + + /* protected mode, paging disabled */ + POSTCODE(ACPI_WAKE_PROT_ENTRY) + + /* restore kernel GDT */ + lgdt PA(saved_gdt) + + /* restore control registers */ + movl PA(saved_cr2), %eax + movl %eax, %cr2 + +#ifdef PAE + movl PA(EXT(IdlePDPT)), %eax + movl (%eax), %esi /* save orig */ + movl 24(%eax), %ebx + movl %ebx, (%eax) /* identity map low mem */ + movl %eax, %cr3 + + movl PA(saved_cr4), %eax + movl %eax, %cr4 +#else + movl PA(saved_cr4), %eax + movl %eax, %cr4 + + /* + * Temporarily use the page tables at IdlePTD + * to enable paging. Copy the KPTDI entry to + * entry 0 in the PTD to identity map the kernel. + */ + movl PA(EXT(IdlePTD)), %eax + movl %eax, %ebx + addl $(KPTDI << PTEINDX), %ebx /* bytes per PDE */ + movl (%ebx), %ebx /* IdlePTD[KPTDI] */ + movl (%eax), %esi /* save original IdlePTD[0] */ + movl %ebx, (%eax) /* update IdlePTD[0] */ + movl %eax, %cr3 /* CR3 = IdlePTD */ +#endif + + /* restore CR0, paging enabled */ + movl PA(saved_cr0), %eax + movl %eax, %cr0 + + /* switch to kernel code segment */ + ljmpl $(KERNEL_CS), $wake_paged + +wake_paged: + + /* protected mode, paging enabled */ + POSTCODE(ACPI_WAKE_PAGED_ENTRY) + + /* switch to kernel data segment */ + movw $(KERNEL_DS), %ax + movw %ax, %ds + + /* undo changes to IdlePTD */ +#ifdef PAE + movl EXT(IdlePDPT), %eax +#else + movl EXT(IdlePTD), %eax +#endif + addl $(KERNELBASE), %eax /* make virtual */ + movl %esi, (%eax) + + /* restore real PDE base */ + movl saved_cr3, %eax + movl %eax, %cr3 + + + /* restore local and interrupt descriptor tables */ + lldt saved_ldt + lidt saved_idt + + /* restore segment registers */ + movw saved_es, %es + movw saved_fs, %fs + movw saved_gs, %gs + movw saved_ss, %ss + + /* + * Restore task register. Before doing this, clear the busy flag + * in the TSS descriptor set by the CPU. + */ + movl $saved_gdt, %eax + movl 2(%eax), %edx /* GDT base, skip limit word */ + movl $(KERNEL_TSS), %eax /* TSS segment selector */ + movb $(K_TSS), 5(%edx, %eax) /* clear busy flag */ + ltr saved_tr /* restore TR */ + +wake_restore: + + /* restore general purpose registers */ + movl saved_esp, %esp + popal + + /* restore flags */ + popfl + + leave + ret + + + .section __HIB, __text + .align 2 + + .globl EXT(acpi_wake_prot_entry) +ENTRY(acpi_wake_prot_entry) + /* protected mode, paging enabled */ + POSTCODE(ACPI_WAKE_PAGED_ENTRY) + + /* restore kernel GDT */ + lgdt PA(saved_gdt) + + POSTCODE(0x40) + /* restore control registers */ + movl saved_cr2, %eax + movl %eax, %cr2 + + POSTCODE(0x3E) + /* switch to kernel data segment */ + movw $(KERNEL_DS), %ax + movw %ax, %ds + + POSTCODE(0x3D) + /* restore real PDE base */ + movl saved_cr3, %eax + movl saved_cr4, %edx + movl %eax, %cr3 + movl %edx, %cr4 + + POSTCODE(0x3C) + /* restore local and interrupt descriptor tables */ + lldt saved_ldt + lidt saved_idt + + POSTCODE(0x3B) + /* restore segment registers */ + movw saved_es, %es + movw saved_fs, %fs + movw saved_gs, %gs + movw saved_ss, %ss + + POSTCODE(0x3A) + /* + * Restore task register. Before doing this, clear the busy flag + * in the TSS descriptor set by the CPU. + */ + movl $saved_gdt, %eax + movl 2(%eax), %edx /* GDT base, skip limit word */ + movl $(KERNEL_TSS), %eax /* TSS segment selector */ + movb $(K_TSS), 5(%edx, %eax) /* clear busy flag */ + ltr saved_tr /* restore TR */ + + /* restore general purpose registers */ + movl saved_esp, %esp + popal + + /* restore flags */ + popfl + + /* make sure interrupts are disabled */ + cli + + movl $2, %eax + + leave + ret + + + .data + .section __HIB, __data + .align 2 + + +/* + * CPU registers saved across sleep/wake. + */ +saved_esp: .long 0 +saved_es: .word 0 +saved_fs: .word 0 +saved_gs: .word 0 +saved_ss: .word 0 +saved_cr0: .long 0 +saved_cr2: .long 0 +saved_cr3: .long 0 +saved_cr4: .long 0 +saved_gdt: .word 0 + .long 0 +saved_idt: .word 0 + .long 0 +saved_ldt: .word 0 +saved_tr: .word 0 +saved_eip: .long 0 + diff --git a/osfmk/i386/apic.h b/osfmk/i386/apic.h index 708985704..40e558ca9 100644 --- a/osfmk/i386/apic.h +++ b/osfmk/i386/apic.h @@ -23,6 +23,8 @@ * @OSF_COPYRIGHT@ * */ +#ifndef _I386_APIC_H_ +#define _I386_APIC_H_ #define LAPIC_START 0xFEE00000 #define LAPIC_SIZE 0x00000400 @@ -97,9 +99,38 @@ #define LAPIC_LVT_TM_LEVEL 0x08000 #define LAPIC_LVT_MASKED 0x10000 #define LAPIC_LVT_PERIODIC 0x20000 -#define LAPIC_INITIAL_COUNT_TIMER 0x00000380 -#define LAPIC_CURRENT_COUNT_TIMER 0x00000390 +#define LAPIC_TIMER_INITIAL_COUNT 0x00000380 +#define LAPIC_TIMER_CURRENT_COUNT 0x00000390 #define LAPIC_TIMER_DIVIDE_CONFIG 0x000003E0 +/* divisor encoded by bits 0,1,3 with bit 2 always 0: */ +#define LAPIC_TIMER_DIVIDE_MASK 0x0000000F +#define LAPIC_TIMER_DIVIDE_2 0x00000000 +#define LAPIC_TIMER_DIVIDE_4 0x00000001 +#define LAPIC_TIMER_DIVIDE_8 0x00000002 +#define LAPIC_TIMER_DIVIDE_16 0x00000003 +#define LAPIC_TIMER_DIVIDE_32 0x00000008 +#define LAPIC_TIMER_DIVIDE_64 0x00000009 +#define LAPIC_TIMER_DIVIDE_128 0x0000000A +#define LAPIC_TIMER_DIVIDE_1 0x0000000B + +#ifndef ASSEMBLER +#include +typedef enum { + periodic, + one_shot +} lapic_timer_mode_t; +typedef enum { + divide_by_1 = LAPIC_TIMER_DIVIDE_1, + divide_by_2 = LAPIC_TIMER_DIVIDE_2, + divide_by_4 = LAPIC_TIMER_DIVIDE_4, + divide_by_8 = LAPIC_TIMER_DIVIDE_8, + divide_by_16 = LAPIC_TIMER_DIVIDE_16, + divide_by_32 = LAPIC_TIMER_DIVIDE_32, + divide_by_64 = LAPIC_TIMER_DIVIDE_64, + divide_by_128 = LAPIC_TIMER_DIVIDE_128 +} lapic_timer_divide_t; +typedef uint32_t lapic_timer_count_t; +#endif /* ASSEMBLER */ #define IOAPIC_START 0xFEC00000 #define IOAPIC_SIZE 0x00000020 @@ -125,3 +156,6 @@ #define IOA_R_R_IP_PLRITY_LOW 0x02000 #define IOA_R_R_TM_LEVEL 0x08000 #define IOA_R_R_MASKED 0x10000 + +#endif /* _I386_APIC_H_ */ + diff --git a/osfmk/i386/asm.h b/osfmk/i386/asm.h index 1a67df5fe..fb714c558 100644 --- a/osfmk/i386/asm.h +++ b/osfmk/i386/asm.h @@ -67,6 +67,7 @@ #endif /* MACH_KERNEL || _KERNEL */ +#define S_PC 0(%esp) #define S_ARG0 4(%esp) #define S_ARG1 8(%esp) #define S_ARG2 12(%esp) @@ -75,6 +76,8 @@ #define FRAME pushl %ebp; movl %esp, %ebp #define EMARF leave +#define B_LINK 0(%ebp) +#define B_PC 4(%ebp) #define B_ARG0 8(%ebp) #define B_ARG1 12(%ebp) #define B_ARG2 16(%ebp) @@ -151,13 +154,7 @@ call *(%eax); #else /* !GPROF, !__SHARED__ */ -#define MCOUNT ; .data;\ - .align ALIGN;\ - LBc(x, 8) .long 0;\ - .text;\ - movl LBb(x,8),%edx;\ - call *EXT(_mcount_ptr); - +#define MCOUNT ; call mcount; #endif /* GPROF */ #ifdef __ELF__ diff --git a/osfmk/i386/ast_check.c b/osfmk/i386/ast_check.c index 15d3cd6bc..2999f58f2 100644 --- a/osfmk/i386/ast_check.c +++ b/osfmk/i386/ast_check.c @@ -50,6 +50,5 @@ /* */ -#include #include diff --git a/osfmk/i386/bcopy.s b/osfmk/i386/bcopy.s index b592689e8..fe9b9b883 100644 --- a/osfmk/i386/bcopy.s +++ b/osfmk/i386/bcopy.s @@ -109,3 +109,54 @@ ENTRY(bcopy16) popl %edi ret + + /* + * Based on NetBSD's bcopy.S from their libc. + * bcopy(src, dst, cnt) + * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 + */ +ENTRY(bcopy) + pushl %esi + pushl %edi + movl 12(%esp),%esi + movl 16(%esp),%edi + movl 20(%esp),%ecx + + movl %edi,%edx + subl %esi,%edx + cmpl %ecx,%edx /* overlapping && src < dst? */ + movl %ecx,%edx + jb 1f + + shrl $2,%ecx /* copy by 32-bit words */ + cld /* nope, copy forwards */ + rep + movsl + movl %edx,%ecx + andl $3,%ecx /* any bytes left? */ + rep + movsb + popl %edi + popl %esi + ret + + +1: + addl %ecx,%edi /* copy backwards */ + addl %ecx,%esi + decl %edi + decl %esi + andl $3,%ecx /* any fractional bytes? */ + std + rep + movsb + movl %edx,%ecx /* copy remainder by 32-bit words */ + shrl $2,%ecx + subl $3,%esi + subl $3,%edi + rep + movsl + popl %edi + popl %esi + cld + ret diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index bdd39922a..759d96259 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,12 +20,12 @@ * @APPLE_LICENSE_HEADER_END@ */ #ifdef MACH_BSD -#include #include #include #include #include +#include #include #include @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -44,6 +43,8 @@ #include #include +#include +#include #include #include #include @@ -53,10 +54,17 @@ #include #include #include - +#include +#include +#include +#include +#include #include +#include #include -struct proc; +#include <../bsd/sys/sysent.h> + +extern struct proc *current_proc(void); kern_return_t thread_userstack( @@ -64,7 +72,7 @@ thread_userstack( int, thread_state_t, unsigned int, - vm_offset_t *, + mach_vm_offset_t *, int * ); @@ -74,13 +82,9 @@ thread_entrypoint( int, thread_state_t, unsigned int, - vm_offset_t * + mach_vm_offset_t * ); -struct i386_saved_state * -get_user_regs( - thread_act_t); - unsigned int get_msr_exportmask(void); unsigned int get_msr_nbits(void); @@ -90,6 +94,8 @@ unsigned int get_msr_rbits(void); kern_return_t thread_compose_cthread_desc(unsigned int addr, pcb_t pcb); +void IOSleep(int); + /* * thread_userstack: * @@ -98,11 +104,11 @@ thread_compose_cthread_desc(unsigned int addr, pcb_t pcb); */ kern_return_t thread_userstack( - thread_t thread, + __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, - vm_offset_t *user_stack, + user_addr_t *user_stack, int *customstack ) { @@ -118,6 +124,8 @@ thread_userstack( state25 = (i386_thread_state_t *) tstate; if (state25->esp) *user_stack = state25->esp; + else + *user_stack = USRSTACK; if (customstack && state25->esp) *customstack = 1; else @@ -135,6 +143,8 @@ thread_userstack( /* If a valid user stack is specified, use it. */ if (uesp) *user_stack = uesp; + else + *user_stack = USRSTACK; if (customstack && uesp) *customstack = 1; else @@ -149,11 +159,11 @@ thread_userstack( kern_return_t thread_entrypoint( - thread_t thread, + __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, - vm_offset_t *entry_point + mach_vm_offset_t *entry_point ) { struct i386_saved_state *state; @@ -189,9 +199,9 @@ thread_entrypoint( } struct i386_saved_state * -get_user_regs(thread_act_t th) +get_user_regs(thread_t th) { - if (th->mact.pcb) + if (th->machine.pcb) return(USER_REGS(th)); else { printf("[get_user_regs: thread does not have pcb]"); @@ -205,33 +215,31 @@ get_user_regs(thread_act_t th) */ kern_return_t machine_thread_dup( - thread_act_t parent, - thread_act_t child + thread_t parent, + thread_t child ) { - struct i386_saved_state *parent_state, *child_state; - struct i386_machine_state *ims; struct i386_float_state floatregs; #ifdef XXX /* Save the FPU state */ - if ((pcb_t)(per_proc_info[cpu_number()].fpu_pcb) == parent->mact.pcb) { + if ((pcb_t)(per_proc_info[cpu_number()].fpu_pcb) == parent->machine.pcb) { fp_state_save(parent); } #endif - if (child->mact.pcb == NULL || parent->mact.pcb == NULL) + if (child->machine.pcb == NULL || parent->machine.pcb == NULL) return (KERN_FAILURE); /* Copy over the i386_saved_state registers */ - child->mact.pcb->iss = parent->mact.pcb->iss; + child->machine.pcb->iss = parent->machine.pcb->iss; /* Check to see if parent is using floating point * and if so, copy the registers to the child * FIXME - make sure this works. */ - if (parent->mact.pcb->ims.ifps) { + if (parent->machine.pcb->ims.ifps) { if (fpu_get_state(parent, &floatregs) == KERN_SUCCESS) fpu_set_state(child, &floatregs); } @@ -239,7 +247,15 @@ machine_thread_dup( /* FIXME - should a user specified LDT, TSS and V86 info * be duplicated as well?? - probably not. */ - + // duplicate any use LDT entry that was set I think this is appropriate. +#ifdef MACH_BSD + if (parent->machine.pcb->uldt_selector!= 0) { + child->machine.pcb->uldt_selector = parent->machine.pcb->uldt_selector; + child->machine.pcb->uldt_desc = parent->machine.pcb->uldt_desc; + } +#endif + + return (KERN_SUCCESS); } @@ -247,39 +263,25 @@ machine_thread_dup( * FIXME - thread_set_child */ -void thread_set_child(thread_act_t child, int pid); +void thread_set_child(thread_t child, int pid); void -thread_set_child(thread_act_t child, int pid) +thread_set_child(thread_t child, int pid) { - child->mact.pcb->iss.eax = pid; - child->mact.pcb->iss.edx = 1; - child->mact.pcb->iss.efl &= ~EFL_CF; + child->machine.pcb->iss.eax = pid; + child->machine.pcb->iss.edx = 1; + child->machine.pcb->iss.efl &= ~EFL_CF; } -void thread_set_parent(thread_act_t parent, int pid); +void thread_set_parent(thread_t parent, int pid); void -thread_set_parent(thread_act_t parent, int pid) +thread_set_parent(thread_t parent, int pid) { - parent->mact.pcb->iss.eax = pid; - parent->mact.pcb->iss.edx = 0; - parent->mact.pcb->iss.efl &= ~EFL_CF; + parent->machine.pcb->iss.eax = pid; + parent->machine.pcb->iss.edx = 0; + parent->machine.pcb->iss.efl &= ~EFL_CF; } -/* - * Move pages from one kernel virtual address to another. - * Both addresses are assumed to reside in the Sysmap, - * and size must be a multiple of the page size. - */ -void -pagemove( - register caddr_t from, - register caddr_t to, - int size) -{ - pmap_movepage((unsigned long)from, (unsigned long)to, (vm_size_t)size); -} - /* * System Call handling code */ @@ -287,43 +289,43 @@ pagemove( #define ERESTART -1 /* restart syscall */ #define EJUSTRETURN -2 /* don't modify regs, just return */ -struct sysent { /* system call table */ - unsigned short sy_narg; /* number of args */ - char sy_parallel; /* can execute in parallel */ - char sy_funnel; /* funnel type */ - unsigned long (*sy_call)(void *, void *, int *); /* implementing function */ -}; #define NO_FUNNEL 0 #define KERNEL_FUNNEL 1 -#define NETWORK_FUNNEL 2 extern funnel_t * kernel_flock; -extern funnel_t * network_flock; -extern struct sysent sysent[]; +extern int set_bsduthreadargs (thread_t, struct i386_saved_state *, void *); +extern void * get_bsduthreadarg(thread_t); +extern int * get_bsduthreadrval(thread_t th); +extern int * get_bsduthreadlowpridelay(thread_t th); + +extern long fuword(vm_offset_t); + +extern void unix_syscall(struct i386_saved_state *); +extern void unix_syscall_return(int); -int set_bsduthreadargs (thread_act_t, struct i386_saved_state *, void *); +/* following implemented in bsd/dev/i386/unix_signal.c */ +int __pthread_cset(struct sysent *); -void * get_bsduthreadarg(thread_act_t); +void __pthread_creset(struct sysent *); -void unix_syscall(struct i386_saved_state *); void unix_syscall_return(int error) { - thread_act_t thread; + thread_t thread; volatile int *rval; struct i386_saved_state *regs; struct proc *p; - struct proc *current_proc(); unsigned short code; vm_offset_t params; struct sysent *callp; - extern int nsysent; + volatile int *lowpri_delay; - thread = current_act(); - rval = (int *)get_bsduthreadrval(thread); + thread = current_thread(); + rval = get_bsduthreadrval(thread); + lowpri_delay = get_bsduthreadlowpridelay(thread); p = current_proc(); regs = USER_REGS(thread); @@ -350,14 +352,27 @@ unix_syscall_return(int error) } } - ktrsysret(p, code, error, rval[0], callp->sy_funnel); + ktrsysret(p, code, error, rval[0], (callp->sy_funnel & FUNNEL_MASK)); - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); + __pthread_creset(callp); - if (callp->sy_funnel != NO_FUNNEL) + if ((callp->sy_funnel & FUNNEL_MASK) != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + if (*lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(*lowpri_delay); + *lowpri_delay = 0; + } + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, rval[0], rval[1], 0, 0); + thread_exception_return(); /* NOTREACHED */ } @@ -366,21 +381,24 @@ unix_syscall_return(int error) void unix_syscall(struct i386_saved_state *regs) { - thread_act_t thread; + thread_t thread; void *vt; unsigned short code; struct sysent *callp; - int nargs, error; - volatile int *rval; + int nargs; + int error; + int *rval; int funnel_type; vm_offset_t params; - extern int nsysent; struct proc *p; - struct proc *current_proc(); + volatile int *lowpri_delay; - thread = current_act(); + thread = current_thread(); p = current_proc(); - rval = (int *)get_bsduthreadrval(thread); + rval = get_bsduthreadrval(thread); + lowpri_delay = get_bsduthreadlowpridelay(thread); + + thread->task->syscalls_unix++; /* MP-safety ignored */ //printf("[scall : eax %x]", regs->eax); code = regs->eax; @@ -395,7 +413,7 @@ unix_syscall(struct i386_saved_state *regs) vt = get_bsduthreadarg(thread); if ((nargs = (callp->sy_narg * sizeof (int))) && - (error = copyin((char *) params, (char *)vt , nargs)) != 0) { + (error = copyin((user_addr_t) params, (char *) vt, nargs)) != 0) { regs->eax = error; regs->efl |= EFL_CF; thread_exception_return(); @@ -405,13 +423,19 @@ unix_syscall(struct i386_saved_state *regs) rval[0] = 0; rval[1] = regs->edx; - funnel_type = callp->sy_funnel; + if ((error = __pthread_cset(callp))) { + /* cancelled system call; let it returned with EINTR for handling */ + regs->eax = error; + regs->efl |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } + + funnel_type = (callp->sy_funnel & FUNNEL_MASK); if(funnel_type == KERNEL_FUNNEL) (void) thread_funnel_set(kernel_flock, TRUE); - else if (funnel_type == NETWORK_FUNNEL) - (void) thread_funnel_set(network_flock, TRUE); - set_bsduthreadargs(thread, regs, NULL); + (void) set_bsduthreadargs(thread, regs, NULL); if (callp->sy_narg > 8) panic("unix_syscall max arg count exceeded (%d)", callp->sy_narg); @@ -424,7 +448,7 @@ unix_syscall(struct i386_saved_state *regs) *ip, *(ip+1), *(ip+2), *(ip+3), 0); } - error = (*(callp->sy_call))(p, (void *) vt, (int *) &rval[0]); + error = (*(callp->sy_call))((void *) p, (void *) vt, &rval[0]); #if 0 /* May be needed with vfork changes */ @@ -446,12 +470,25 @@ unix_syscall(struct i386_saved_state *regs) ktrsysret(p, code, error, rval[0], funnel_type); - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); + __pthread_creset(callp); if(funnel_type != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + if (*lowpri_delay) { + /* + * task is marked as a low priority I/O type + * and the I/O we issued while in this system call + * collided with normal I/O operations... we'll + * delay in order to mitigate the impact of this + * task on the normal operation of the system + */ + IOSleep(*lowpri_delay); + *lowpri_delay = 0; + } + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, rval[0], rval[1], 0, 0); + thread_exception_return(); /* NOTREACHED */ } @@ -462,13 +499,10 @@ machdep_syscall( struct i386_saved_state *regs) { int trapno, nargs; machdep_call_t *entry; - thread_t thread; - struct proc *p; - struct proc *current_proc(); trapno = regs->eax; if (trapno < 0 || trapno >= machdep_call_count) { - regs->eax = (unsigned int)kern_invalid(); + regs->eax = (unsigned int)kern_invalid(NULL); thread_exception_return(); /* NOTREACHED */ @@ -480,7 +514,7 @@ machdep_syscall( struct i386_saved_state *regs) if (nargs > 0) { int args[nargs]; - if (copyin((char *) regs->uesp + sizeof (int), + if (copyin((user_addr_t) regs->uesp + sizeof (int), (char *) args, nargs * sizeof (int))) { @@ -492,23 +526,23 @@ machdep_syscall( struct i386_saved_state *regs) switch (nargs) { case 1: - regs->eax = (*entry->routine)(args[0]); + regs->eax = (*entry->routine.args_1)(args[0]); break; case 2: - regs->eax = (*entry->routine)(args[0],args[1]); + regs->eax = (*entry->routine.args_2)(args[0],args[1]); break; case 3: - regs->eax = (*entry->routine)(args[0],args[1],args[2]); + regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); break; case 4: - regs->eax = (*entry->routine)(args[0],args[1],args[2],args[3]); + regs->eax = (*entry->routine.args_4)(args[0],args[1],args[2],args[3]); break; default: panic("machdep_syscall(): too many args"); } } else - regs->eax = (*entry->routine)(); + regs->eax = (*entry->routine.args_0)(); if (current_thread()->funnel_lock) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); @@ -522,11 +556,9 @@ kern_return_t thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) { struct real_descriptor desc; - extern struct fake_descriptor *mp_ldt[]; - struct real_descriptor *ldtp; - int mycpu = cpu_number(); - ldtp = (struct real_descriptor *)mp_ldt[mycpu]; + mp_disable_preemption(); + desc.limit_low = 1; desc.limit_high = 0; desc.base_low = addr & 0xffff; @@ -535,14 +567,17 @@ thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) desc.access = ACC_P|ACC_PL_U|ACC_DATA_W; desc.granularity = SZ_32|SZ_G; pcb->cthread_desc = desc; - ldtp[sel_idx(USER_CTHREAD)] = desc; + *ldt_desc_p(USER_CTHREAD) = desc; + + mp_enable_preemption(); + return(KERN_SUCCESS); } kern_return_t -thread_set_cthread_self(int self) +thread_set_cthread_self(uint32_t self) { - current_act()->mact.pcb->cthread_self = (unsigned int)self; + current_thread()->machine.pcb->cthread_self = self; return (KERN_SUCCESS); } @@ -550,19 +585,86 @@ thread_set_cthread_self(int self) kern_return_t thread_get_cthread_self(void) { - return ((kern_return_t)current_act()->mact.pcb->cthread_self); + return ((kern_return_t)current_thread()->machine.pcb->cthread_self); } kern_return_t -thread_fast_set_cthread_self(int self) +thread_fast_set_cthread_self(uint32_t self) { pcb_t pcb; - pcb = (pcb_t)current_act()->mact.pcb; - thread_compose_cthread_desc((unsigned int)self, pcb); - pcb->cthread_self = (unsigned int)self; /* preserve old func too */ + pcb = (pcb_t)current_thread()->machine.pcb; + thread_compose_cthread_desc(self, pcb); + pcb->cthread_self = self; /* preserve old func too */ return (USER_CTHREAD); } +/* + * thread_set_user_ldt routine is the interface for the user level + * settable ldt entry feature. allowing a user to create arbitrary + * ldt entries seems to be too large of a security hole, so instead + * this mechanism is in place to allow user level processes to have + * an ldt entry that can be used in conjunction with the FS register. + * + * Swapping occurs inside the pcb.c file along with initialization + * when a thread is created. The basic functioning theory is that the + * pcb->uldt_selector variable will contain either 0 meaning the + * process has not set up any entry, or the selector to be used in + * the FS register. pcb->uldt_desc contains the actual descriptor the + * user has set up stored in machine usable ldt format. + * + * Currently one entry is shared by all threads (USER_SETTABLE), but + * this could be changed in the future by changing how this routine + * allocates the selector. There seems to be no real reason at this + * time to have this added feature, but in the future it might be + * needed. + * + * address is the linear address of the start of the data area size + * is the size in bytes of the area flags should always be set to 0 + * for now. in the future it could be used to set R/W permisions or + * other functions. Currently the segment is created as a data segment + * up to 1 megabyte in size with full read/write permisions only. + * + * this call returns the segment selector or -1 if any error occurs + */ +kern_return_t +thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags) +{ + pcb_t pcb; + struct fake_descriptor temp; + int mycpu; + + if (flags != 0) + return -1; // flags not supported + if (size > 0xFFFFF) + return -1; // size too big, 1 meg is the limit + + mp_disable_preemption(); + mycpu = cpu_number(); + + // create a "fake" descriptor so we can use fix_desc() + // to build a real one... + // 32 bit default operation size + // standard read/write perms for a data segment + pcb = (pcb_t)current_thread()->machine.pcb; + temp.offset = address; + temp.lim_or_seg = size; + temp.size_or_wdct = SZ_32; + temp.access = ACC_P|ACC_PL_U|ACC_DATA_W; + + // turn this into a real descriptor + fix_desc(&temp,1); + + // set up our data in the pcb + pcb->uldt_desc = *(struct real_descriptor*)&temp; + pcb->uldt_selector = USER_SETTABLE; // set the selector value + + // now set it up in the current table... + *ldt_desc_p(USER_SETTABLE) = *(struct real_descriptor*)&temp; + + mp_enable_preemption(); + + return USER_SETTABLE; +} void mach25_syscall(struct i386_saved_state *regs) { @@ -585,6 +687,8 @@ mach_call_start(unsigned int call_number, unsigned int *args) int i, argc; unsigned int kdarg[3]; + current_thread()->task->syscalls_mach++; /* MP-safety ignored */ + /* Always prepare to trace mach system calls */ kdarg[0]=0; @@ -619,3 +723,125 @@ mach_call_end(unsigned int call_number, unsigned int retval) return retval; /* pass this back thru */ } +typedef kern_return_t (*mach_call_t)(void *); + +extern __attribute__((regparm(1))) kern_return_t +mach_call_munger(unsigned int call_number, + unsigned int arg1, + unsigned int arg2, + unsigned int arg3, + unsigned int arg4, + unsigned int arg5, + unsigned int arg6, + unsigned int arg7, + unsigned int arg8, + unsigned int arg9 +); + +struct mach_call_args { + unsigned int arg1; + unsigned int arg2; + unsigned int arg3; + unsigned int arg4; + unsigned int arg5; + unsigned int arg6; + unsigned int arg7; + unsigned int arg8; + unsigned int arg9; +}; +__private_extern__ +__attribute__((regparm(1))) kern_return_t +mach_call_munger(unsigned int call_number, + unsigned int arg1, + unsigned int arg2, + unsigned int arg3, + unsigned int arg4, + unsigned int arg5, + unsigned int arg6, + unsigned int arg7, + unsigned int arg8, + unsigned int arg9 +) +{ + int argc; + mach_call_t mach_call; + kern_return_t retval; + struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + current_thread()->task->syscalls_mach++; /* MP-safety ignored */ + call_number >>= 4; + + argc = mach_trap_table[call_number].mach_trap_arg_count; + switch (argc) { + case 9: args.arg9 = arg9; + case 8: args.arg8 = arg8; + case 7: args.arg7 = arg7; + case 6: args.arg6 = arg6; + case 5: args.arg5 = arg5; + case 4: args.arg4 = arg4; + case 3: args.arg3 = arg3; + case 2: args.arg2 = arg2; + case 1: args.arg1 = arg1; + } + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, + args.arg1, args.arg2, args.arg3, 0, 0); + + mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; + retval = mach_call(&args); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, + retval, 0, 0, 0, 0); + + return retval; +} + +/* + * thread_setuserstack: + * + * Sets the user stack pointer into the machine + * dependent thread state info. + */ +void +thread_setuserstack( + thread_t thread, + mach_vm_address_t user_stack) +{ + struct i386_saved_state *ss = get_user_regs(thread); + + ss->uesp = CAST_DOWN(unsigned int,user_stack); +} + +/* + * thread_adjuserstack: + * + * Returns the adjusted user stack pointer from the machine + * dependent thread state info. Used for small (<2G) deltas. + */ +uint64_t +thread_adjuserstack( + thread_t thread, + int adjust) +{ + struct i386_saved_state *ss = get_user_regs(thread); + + ss->uesp += adjust; + return CAST_USER_ADDR_T(ss->uesp); +} + +/* + * thread_setentrypoint: + * + * Sets the user PC into the machine + * dependent thread state info. + */ +void +thread_setentrypoint( + thread_t thread, + mach_vm_address_t entry) +{ + struct i386_saved_state *ss = get_user_regs(thread); + + ss->eip = CAST_DOWN(unsigned int,entry); +} + diff --git a/osfmk/i386/commpage/atomic.s b/osfmk/i386/commpage/atomic.s new file mode 100644 index 000000000..ed50f43d8 --- /dev/null +++ b/osfmk/i386/commpage/atomic.s @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +/* OSAtomic.h library native implementations. */ + + .text + .align 2, 0x90 + +// This is a regparm(3) subroutine used by: + +// bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); +// int32_t OSAtomicAnd32( int32_t mask, int32_t *value); +// int32_t OSAtomicOr32( int32_t mask, int32_t *value); +// int32_t OSAtomicXor32( int32_t mask, int32_t *value); + +// It assumes old -> %eax, new -> %edx, value -> %ecx +// on success: returns with ZF set +// on failure: returns with *value in %eax, ZF clear + +// The first word of the routine contains the address of the first instruction, +// so callers can pass parameters in registers by using the absolute: + +// call *_COMPARE_AND_SWAP32 + +// TODO: move the .long onto a separate page to reduce icache pollution (?) + +Lcompare_and_swap32_mp: +.long _COMM_PAGE_COMPARE_AND_SWAP32+4 + lock + cmpxchgl %edx, (%ecx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) + +Lcompare_and_swap32_up: +.long _COMM_PAGE_COMPARE_AND_SWAP32+4 + cmpxchgl %edx, (%ecx) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); + +// It assumes old -> %eax/%edx, new -> %ebx/%ecx, value -> %esi +// on success: returns with ZF set +// on failure: returns with *value in %eax/%edx, ZF clear + +Lcompare_and_swap64_mp: +.long _COMM_PAGE_COMPARE_AND_SWAP64+4 + lock + cmpxchg8b (%esi) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) + +Lcompare_and_swap64_up: +.long _COMM_PAGE_COMPARE_AND_SWAP64+4 + cmpxchg8b (%esi) + ret + + COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicTestAndSet( uint32_t n, void *value ); +// It assumes n -> %eax, value -> %edx + +// Returns: old value of bit in CF + +Lbit_test_and_set_mp: +.long _COMM_PAGE_BTS+4 + lock + bts %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) + +Lbit_test_and_set_up: +.long _COMM_PAGE_BTS+4 + bts %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) + +// This is a subroutine used by: +// bool OSAtomicTestAndClear( uint32_t n, void *value ); +// It assumes n -> %eax, value -> %edx + +// Returns: old value of bit in CF + +Lbit_test_and_clear_mp: +.long _COMM_PAGE_BTC+4 + lock + btc %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) + +Lbit_test_and_clear_up: +.long _COMM_PAGE_BTC+4 + btc %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) + +// This is a subroutine used by: +// int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); +// It assumes amt -> %eax, value -> %edx + +// Returns: old value in %eax +// NB: OSAtomicAdd32 returns the new value, so clients will add amt to %eax + +Latomic_add32_mp: +.long _COMM_PAGE_ATOMIC_ADD32+4 + lock + xaddl %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP) + +Latomic_add32_up: +.long _COMM_PAGE_ATOMIC_ADD32+4 + xaddl %eax, (%edx) + ret + + COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index 8bf520d63..6a729039d 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -41,16 +41,21 @@ #include #include +#include #include #include #include #include #include -#include +#include +#include + + +extern vm_map_t com_region_map32; // the shared submap, set up in vm init -static uintptr_t next = 0; // next available byte in comm page -static int cur_routine = 0; // comm page address of "current" routine -static int matched; // true if we've found a match for "current" routine +static uintptr_t next = 0; // next available byte in comm page +static int cur_routine = 0; // comm page address of "current" routine +static int matched; // true if we've found a match for "current" routine int _cpu_capabilities = 0; // define the capability vector @@ -66,13 +71,13 @@ char *commPagePtr = NULL; // virtual address of comm page in kerne static void* commpage_allocate( void ) { - extern vm_map_t com_region_map; // the shared submap, set up in vm init vm_offset_t kernel_addr; // address of commpage in kernel map vm_offset_t zero = 0; vm_size_t size = _COMM_PAGE_AREA_LENGTH; + vm_map_entry_t entry; ipc_port_t handle; - if (com_region_map == NULL) + if (com_region_map32 == NULL) panic("commpage map is null"); if (vm_allocate(kernel_map,&kernel_addr,_COMM_PAGE_AREA_LENGTH,VM_FLAGS_ANYWHERE)) @@ -81,6 +86,18 @@ commpage_allocate( void ) if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+_COMM_PAGE_AREA_LENGTH,VM_PROT_DEFAULT,FALSE)) panic("cannot wire commpage"); + /* + * Now that the object is created and wired into the kernel map, mark it so that no delay + * copy-on-write will ever be performed on it as a result of mapping it into user-space. + * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and + * that would be a real disaster. + * + * JMM - What we really need is a way to create it like this in the first place. + */ + if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map) + panic("cannot find commpage entry"); + entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + if (mach_make_memory_entry( kernel_map, // target map &size, // size kernel_addr, // offset (address in kernel map) @@ -89,7 +106,7 @@ commpage_allocate( void ) NULL )) // parent_entry (what is this?) panic("cannot make entry for commpage"); - if (vm_map_64( com_region_map, // target map (shared submap) + if (vm_map_64( com_region_map32, // target map (shared submap) &zero, // address (map into 1st page in submap) _COMM_PAGE_AREA_LENGTH, // size 0, // mask @@ -109,7 +126,7 @@ commpage_allocate( void ) /* Get address (in kernel map) of a commpage field. */ -static void* +static void* commpage_addr_of( int addr_at_runtime ) { @@ -148,7 +165,7 @@ commpage_init_cpu_capabilities( void ) switch (cpu_info.vector_unit) { case 5: - bits |= kHasPNI; + bits |= kHasSSE3; /* fall thru */ case 4: bits |= kHasSSE2; @@ -181,6 +198,8 @@ commpage_init_cpu_capabilities( void ) bits |= (cpus << kNumCPUsShift); + bits |= kFastThreadLocalStorage; // we use %gs for TLS + _cpu_capabilities = bits; // set kernel version for use by drivers etc } @@ -195,13 +214,23 @@ commpage_stuff( void *dest = commpage_addr_of(address); if ((uintptr_t)dest < next) - panic("commpage overlap"); + panic("commpage overlap at address 0x%x, 0x%x < 0x%x", address, dest, next); bcopy(source,dest,length); next = ((uintptr_t)dest + length); } + +static void +commpage_stuff2( + int address, + void *source, + int length ) +{ + commpage_stuff(address, source, length); +} + /* Copy a routine into comm page if it matches running machine. */ static void @@ -229,6 +258,67 @@ commpage_stuff_routine( } } + +#define COMMPAGE_DESC(name) commpage_ ## name +#define EXTERN_COMMPAGE_DESC(name) \ + extern commpage_descriptor COMMPAGE_DESC(name) + +EXTERN_COMMPAGE_DESC(compare_and_swap32_mp); +EXTERN_COMMPAGE_DESC(compare_and_swap32_up); +EXTERN_COMMPAGE_DESC(compare_and_swap64_mp); +EXTERN_COMMPAGE_DESC(compare_and_swap64_up); +EXTERN_COMMPAGE_DESC(atomic_add32_mp); +EXTERN_COMMPAGE_DESC(atomic_add32_up); +EXTERN_COMMPAGE_DESC(mach_absolute_time); +EXTERN_COMMPAGE_DESC(spin_lock_try_mp); +EXTERN_COMMPAGE_DESC(spin_lock_try_up); +EXTERN_COMMPAGE_DESC(spin_lock_mp); +EXTERN_COMMPAGE_DESC(spin_lock_up); +EXTERN_COMMPAGE_DESC(spin_unlock); +EXTERN_COMMPAGE_DESC(pthread_getspecific); +EXTERN_COMMPAGE_DESC(gettimeofday); +EXTERN_COMMPAGE_DESC(sys_flush_dcache); +EXTERN_COMMPAGE_DESC(sys_icache_invalidate); +EXTERN_COMMPAGE_DESC(pthread_self); +EXTERN_COMMPAGE_DESC(relinquish); +EXTERN_COMMPAGE_DESC(bit_test_and_set_mp); +EXTERN_COMMPAGE_DESC(bit_test_and_set_up); +EXTERN_COMMPAGE_DESC(bit_test_and_clear_mp); +EXTERN_COMMPAGE_DESC(bit_test_and_clear_up); +EXTERN_COMMPAGE_DESC(bzero_scalar); +EXTERN_COMMPAGE_DESC(bcopy_scalar); +EXTERN_COMMPAGE_DESC(nanotime); + +static commpage_descriptor *routines[] = { + &COMMPAGE_DESC(compare_and_swap32_mp), + &COMMPAGE_DESC(compare_and_swap32_up), + &COMMPAGE_DESC(compare_and_swap64_mp), + &COMMPAGE_DESC(compare_and_swap64_up), + &COMMPAGE_DESC(atomic_add32_mp), + &COMMPAGE_DESC(atomic_add32_up), + &COMMPAGE_DESC(mach_absolute_time), + &COMMPAGE_DESC(spin_lock_try_mp), + &COMMPAGE_DESC(spin_lock_try_up), + &COMMPAGE_DESC(spin_lock_mp), + &COMMPAGE_DESC(spin_lock_up), + &COMMPAGE_DESC(spin_unlock), + &COMMPAGE_DESC(pthread_getspecific), + &COMMPAGE_DESC(gettimeofday), + &COMMPAGE_DESC(sys_flush_dcache), + &COMMPAGE_DESC(sys_icache_invalidate), + &COMMPAGE_DESC(pthread_self), + &COMMPAGE_DESC(relinquish), + &COMMPAGE_DESC(bit_test_and_set_mp), + &COMMPAGE_DESC(bit_test_and_set_up), + &COMMPAGE_DESC(bit_test_and_clear_mp), + &COMMPAGE_DESC(bit_test_and_clear_up), + &COMMPAGE_DESC(bzero_scalar), + &COMMPAGE_DESC(bcopy_scalar), + &COMMPAGE_DESC(nanotime), + NULL +}; + + /* Fill in commpage: called once, during kernel initialization, from the * startup thread before user-mode code is running. * See the top of this file for a list of what you have to do to add @@ -238,45 +328,11 @@ commpage_stuff_routine( void commpage_populate( void ) { + short c2; + static double two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52 + static double ten6 = 1000000.0; // 10**6 commpage_descriptor **rd; short version = _COMM_PAGE_THIS_VERSION; - void *sig_addr; - - extern char commpage_sigs_begin[]; - extern char commpage_sigs_end[]; - - extern commpage_descriptor commpage_mach_absolute_time; - extern commpage_descriptor commpage_spin_lock_try_mp; - extern commpage_descriptor commpage_spin_lock_try_up; - extern commpage_descriptor commpage_spin_lock_mp; - extern commpage_descriptor commpage_spin_lock_up; - extern commpage_descriptor commpage_spin_unlock; - extern commpage_descriptor commpage_pthread_getspecific; - extern commpage_descriptor commpage_gettimeofday; - extern commpage_descriptor commpage_sys_flush_dcache; - extern commpage_descriptor commpage_sys_icache_invalidate; - extern commpage_descriptor commpage_pthread_self; - extern commpage_descriptor commpage_relinquish; - extern commpage_descriptor commpage_bzero_scalar; - extern commpage_descriptor commpage_bcopy_scalar; - - static commpage_descriptor *routines[] = { - &commpage_mach_absolute_time, - &commpage_spin_lock_try_mp, - &commpage_spin_lock_try_up, - &commpage_spin_lock_mp, - &commpage_spin_lock_up, - &commpage_spin_unlock, - &commpage_pthread_getspecific, - &commpage_gettimeofday, - &commpage_sys_flush_dcache, - &commpage_sys_icache_invalidate, - &commpage_pthread_self, - &commpage_relinquish, - &commpage_bzero_scalar, - &commpage_bcopy_scalar, - NULL - }; commPagePtr = (char *)commpage_allocate(); @@ -286,40 +342,62 @@ commpage_populate( void ) * ascending order, so we can check for overlap and panic if so. */ - commpage_stuff(_COMM_PAGE_VERSION,&version,sizeof(short)); + commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short)); commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities, sizeof(int)); + if (_cpu_capabilities & kCache32) + c2 = 32; + else if (_cpu_capabilities & kCache64) + c2 = 64; + else if (_cpu_capabilities & kCache128) + c2 = 128; + commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2); + + c2 = 32; + + commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8); + + commpage_stuff2(_COMM_PAGE_10_TO_6,&ten6,8); + for( rd = routines; *rd != NULL ; rd++ ) commpage_stuff_routine(*rd); if (!matched) panic("commpage no match on last routine"); - if (next > ((uintptr_t)commPagePtr + PAGE_SIZE)) - panic("commpage overflow"); - -#define STUFF_SIG(addr, func) \ - extern char commpage_sig_ ## func []; \ - sig_addr = (void *)( (uintptr_t)_COMM_PAGE_BASE_ADDRESS + \ - (uintptr_t)_COMM_PAGE_SIGS_OFFSET + 0x1000 + \ - (uintptr_t)&commpage_sig_ ## func - \ - (uintptr_t)&commpage_sigs_begin ); \ - commpage_stuff(addr + _COMM_PAGE_SIGS_OFFSET, &sig_addr, sizeof(void *)); - - STUFF_SIG(_COMM_PAGE_ABSOLUTE_TIME, mach_absolute_time); - STUFF_SIG(_COMM_PAGE_SPINLOCK_TRY, spin_lock_try); - STUFF_SIG(_COMM_PAGE_SPINLOCK_LOCK, spin_lock); - STUFF_SIG(_COMM_PAGE_SPINLOCK_UNLOCK, spin_unlock); - STUFF_SIG(_COMM_PAGE_PTHREAD_GETSPECIFIC, pthread_getspecific); - STUFF_SIG(_COMM_PAGE_GETTIMEOFDAY, gettimeofday); - STUFF_SIG(_COMM_PAGE_FLUSH_DCACHE, sys_dcache_flush); - STUFF_SIG(_COMM_PAGE_FLUSH_ICACHE, sys_icache_invalidate); - STUFF_SIG(_COMM_PAGE_PTHREAD_SELF, pthread_self); - STUFF_SIG(_COMM_PAGE_BZERO, bzero); - STUFF_SIG(_COMM_PAGE_BCOPY, bcopy); - STUFF_SIG(_COMM_PAGE_MEMCPY, memmove); - - commpage_stuff(_COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_SIGS_OFFSET + 0x1000, &commpage_sigs_begin, - (uintptr_t)&commpage_sigs_end - (uintptr_t)&commpage_sigs_begin); + if (next > (uintptr_t)_COMM_PAGE_END) + panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%08x", next, (uintptr_t)commPagePtr); + + + pmap_commpage_init((vm_offset_t) commPagePtr, _COMM_PAGE_BASE_ADDRESS, + _COMM_PAGE_AREA_LENGTH/INTEL_PGBYTES); } + +/* + * This macro prevents compiler instruction scheduling: + */ +#define NO_REORDERING asm volatile("" : : : "memory") + +void +commpage_set_nanotime(commpage_nanotime_t *newp) +{ + commpage_nanotime_t *cnp; + + /* Nop if commpage not set up yet */ + if (commPagePtr == NULL) + return; + + cnp = (commpage_nanotime_t *)commpage_addr_of(_COMM_PAGE_NANOTIME_INFO); + + /* + * Update in reverse order: + * check_tsc first - it's read and compared with base_tsc last. + */ + cnp->nt_check_tsc = newp->nt_base_tsc; NO_REORDERING; + cnp->nt_shift = newp->nt_shift; NO_REORDERING; + cnp->nt_scale = newp->nt_scale; NO_REORDERING; + cnp->nt_base_ns = newp->nt_base_ns; NO_REORDERING; + cnp->nt_base_tsc = newp->nt_base_tsc; +} + diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h index 1e368d84d..2a14d32eb 100644 --- a/osfmk/i386/commpage/commpage.h +++ b/osfmk/i386/commpage/commpage.h @@ -63,6 +63,15 @@ extern char *commPagePtr; // virt address of commpage in kernel map extern void commpage_set_timestamp(uint64_t tbr,uint32_t secs,uint32_t usecs,uint32_t ticks_per_sec); +typedef struct { + uint64_t nt_base_tsc; + uint64_t nt_base_ns; + uint32_t nt_scale; + uint32_t nt_shift; + uint64_t nt_check_tsc; +} commpage_nanotime_t; +extern void commpage_set_nanotime(commpage_nanotime_t *new_nanotime); + #endif /* __ASSEMBLER__ */ #endif /* _I386_COMMPAGE_H */ diff --git a/osfmk/i386/commpage/commpage_mach_absolute_time.s b/osfmk/i386/commpage/commpage_mach_absolute_time.s index 20e65ce00..3427958ed 100644 --- a/osfmk/i386/commpage/commpage_mach_absolute_time.s +++ b/osfmk/i386/commpage/commpage_mach_absolute_time.s @@ -23,12 +23,97 @@ #include #include #include +#include + +#include .text .align 2, 0x90 Lmach_absolute_time: + int $0x3 ret - COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,0,0) + COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,1,0) + + +Lnanotime: + + pushl %ebx + pushl %esi + pushl %edi + pushl %ebp + movl $(_COMM_PAGE_NANOTIME_INFO), %esi + + /* + * The nanotime info consists of: + * - base_tsc 64-bit timestamp register value + * - base_ns 64-bit corresponding nanosecond uptime value + * - scale 32-bit current scale multiplier + * - shift 32-bit current shift divider + * - check_tsc 64-bit timestamp check value + * + * This enables an timestamp register's value, tsc, to be converted + * into a nanosecond nanotime value, ns: + * + * ns = base_ns + ((tsc - base_tsc) * scale >> shift) + * + * The kernel updates this every tick or whenever a performance + * speed-step changes the scaling. To avoid locking, a duplicated + * sequence counting scheme is used. The base_tsc value is updated + * whenever the info starts to be changed, and check_tsc is updated + * to the same value at the end of the update. The regularity of + * update ensures that (tsc - base_tsc) is a 32-bit quantity. + * When a conversion is performed, we read base_tsc before we start + * and check_tsc at the end -- if there's a mis-match we repeat. + * It's sufficient to compare only the low-order 32-bits. + */ + +1: + // + // Read nanotime info and stash in registers. + // + movl NANOTIME_BASE_TSC(%esi), %ebx // ebx := lo(base_tsc) + movl NANOTIME_BASE_NS(%esi), %ebp + movl NANOTIME_BASE_NS+4(%esi), %edi // edi:ebp := base_ns + movl NANOTIME_SHIFT(%esi), %ecx // ecx := shift + // + // Read timestamp register (tsc) and calculate delta. + // + rdtsc // edx:eax := tsc + subl %ebx, %eax // eax := (tsc - base_tsc) + movl NANOTIME_SCALE(%esi), %edx // edx := shift + // + // Check for consistency and re-read if necessary. + // + cmpl NANOTIME_CHECK_TSC(%esi), %ebx + jne 1b + + // + // edx:eax := ((tsc - base_tsc) * scale) + // + mull %edx + + // + // eax := ((tsc - base_tsc) * scale >> shift) + // + shrdl %cl, %edx, %eax + andb $32, %cl + cmovnel %edx, %eax // %eax := %edx if shift == 32 + xorl %edx, %edx + + // + // Add base_ns: + // edx:eax = (base_ns + ((tsc - base_tsc) * scale >> shift)) + // + addl %ebp, %eax + adcl %edi, %edx + + popl %ebp + popl %edi + popl %esi + popl %ebx + ret + + COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,1,0) diff --git a/osfmk/i386/commpage/commpage_sigs.h b/osfmk/i386/commpage/commpage_sigs.h index 0251ee075..e69de29bb 100644 --- a/osfmk/i386/commpage/commpage_sigs.h +++ b/osfmk/i386/commpage/commpage_sigs.h @@ -1,57 +0,0 @@ -#define BSWAP_32(x) \ - ((x & 0x000000ff) << 24) | \ - ((x & 0x0000ff00) << 8) | \ - ((x & 0x00ff0000) >> 8) | \ - ((x & 0xff000000) >> 24) - -#define COMMPAGE_SIGS_BEGIN \ -.const_data ; \ -.align 2 ; \ -.private_extern _commpage_sigs_begin ; \ -_commpage_sigs_begin: - -#define COMMPAGE_SIGS_DONE \ -.private_extern _commpage_sigs_end ; \ -_commpage_sigs_end: ; \ - -#define COMMPAGE_SIG_START(x) \ -.private_extern _commpage_sig ## x ; \ -_commpage_sig ## x ## : ; \ - .long BSWAP_32(0x14400000) ; \ - .long BSWAP_32(0x00000001) ; \ - .asciz # x ; \ - .align 2 ; \ - .long BSWAP_32(0x14400000) - -#define COMMPAGE_SIG_END(x) \ - .long BSWAP_32(0x4e800020) ; \ - .long BSWAP_32(0x14400000) ; \ - .long BSWAP_32(0x00000000) ; \ - .asciz # x ; \ - .align 2 ; \ - .long BSWAP_32(0x14400000) - -#define ARG(n) \ - ((((n * 2) + 6) << 20) + 4) - -#define COMMPAGE_SIG_ARG(n) \ - .long BSWAP_32(0x14400001) ; \ - .long BSWAP_32(ARG(n)) ; \ - .long BSWAP_32(0x14400001) - -#define COMMPAGE_SIG_CALL(x, n) \ - .long BSWAP_32(0x14400002) ; \ - .long BSWAP_32(n) ; \ - .long BSWAP_32(0x00000000) ; \ - .asciz # x ; \ - .align 2 ; \ - .long BSWAP_32(0x14400002) - -#define COMMPAGE_SIG_CALL_VOID(x) \ - COMMPAGE_SIG_CALL(x, 0) - -#define COMMPAGE_SIG_CALL_RET0(x) \ - COMMPAGE_SIG_CALL(x, ARG(0)) - -#define COMMPAGE_SIG_CALL_RET1(x) \ - COMMPAGE_SIG_CALL(x, ARG(1)) diff --git a/osfmk/i386/commpage/commpage_sigs.s b/osfmk/i386/commpage/commpage_sigs.s index 573c5f7e6..e69de29bb 100644 --- a/osfmk/i386/commpage/commpage_sigs.s +++ b/osfmk/i386/commpage/commpage_sigs.s @@ -1,69 +0,0 @@ -#include "commpage_sigs.h" - -COMMPAGE_SIGS_BEGIN - -COMMPAGE_SIG_START(_mach_absolute_time) -COMMPAGE_SIG_CALL_RET0(_mach_absolute_time_high) -COMMPAGE_SIG_CALL_RET1(_mach_absolute_time_low) -COMMPAGE_SIG_END(_mach_absolute_time) - -COMMPAGE_SIG_START(_spin_lock_try) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_CALL_RET0(_spin_lock_try_wrapper) -COMMPAGE_SIG_END(_spin_lock_try) - -COMMPAGE_SIG_START(_spin_lock) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_CALL_VOID(_spin_lock) -COMMPAGE_SIG_END(_spin_lock) - -COMMPAGE_SIG_START(_spin_unlock) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_CALL_VOID(_spin_unlock) -COMMPAGE_SIG_END(_spin_unlock) - -COMMPAGE_SIG_START(_pthread_getspecific) -COMMPAGE_SIG_END(_pthread_getspecific) - -COMMPAGE_SIG_START(_gettimeofday) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_CALL_RET0(_gettimeofday_wrapper) -COMMPAGE_SIG_END(_gettimeofday) - -COMMPAGE_SIG_START(_sys_dcache_flush) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_ARG(1) -COMMPAGE_SIG_CALL_VOID(_sys_dcache_flush) -COMMPAGE_SIG_END(_sys_dcache_flush) - -COMMPAGE_SIG_START(_sys_icache_invalidate) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_ARG(1) -COMMPAGE_SIG_CALL_VOID(_sys_icache_invalidate_wrapper) -COMMPAGE_SIG_END(_sys_icache_invalidate) - -COMMPAGE_SIG_START(_pthread_self) -COMMPAGE_SIG_END(_pthread_self) - -COMMPAGE_SIG_START(_bzero) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_ARG(1) -COMMPAGE_SIG_CALL_VOID(_bzero) -COMMPAGE_SIG_END(_bzero) - -COMMPAGE_SIG_START(_bcopy) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_ARG(1) -COMMPAGE_SIG_ARG(2) -COMMPAGE_SIG_CALL_VOID(_bcopy) -COMMPAGE_SIG_END(_bcopy) - -COMMPAGE_SIG_START(_memmove) -COMMPAGE_SIG_ARG(0) -COMMPAGE_SIG_ARG(1) -COMMPAGE_SIG_ARG(2) -COMMPAGE_SIG_CALL_VOID(_memmove) -COMMPAGE_SIG_END(_memmove) - -COMMPAGE_SIGS_DONE - diff --git a/osfmk/i386/commpage/spinlocks.s b/osfmk/i386/commpage/spinlocks.s index 2c7a17e4c..e2cb71207 100644 --- a/osfmk/i386/commpage/spinlocks.s +++ b/osfmk/i386/commpage/spinlocks.s @@ -49,7 +49,8 @@ Lspin_lock_try_up: movl 4(%esp), %ecx xorl %eax, %eax - cmpxchgl %ecx, (%ecx) + orl $-1, %edx + cmpxchgl %edx, (%ecx) setz %dl movzbl %dl, %eax ret @@ -60,8 +61,9 @@ Lspin_lock_try_up: Lspin_lock_try_mp: movl 4(%esp), %ecx xorl %eax, %eax + orl $-1, %edx lock - cmpxchgl %ecx, (%ecx) + cmpxchgl %edx, (%ecx) setz %dl movzbl %dl, %eax ret @@ -75,7 +77,8 @@ Lspin_lock_up: movl 4(%esp), %ecx xorl %eax, %eax .set Lretry, . - Lspin_lock_up - cmpxchgl %ecx, (%ecx) + orl $-1, %edx + cmpxchgl %edx, (%ecx) UNLIKELY JNZ Lrelinquish_off - . + Lspin_lock_up - LEN ret @@ -87,8 +90,9 @@ Lspin_lock_mp: movl 4(%esp), %ecx xorl %eax, %eax 0: + orl $-1, %edx lock - cmpxchgl %ecx, (%ecx) + cmpxchgl %edx, (%ecx) UNLIKELY jnz 1f ret @@ -120,11 +124,11 @@ Lrelinquish: /* relinquish the processor */ pushl $1 /* 1 ms */ pushl $1 /* SWITCH_OPTION_DEPRESS */ pushl $0 /* THREAD_NULL */ + pushl $0 /* push dummy stack ret addr */ movl $-61, %eax /* syscall_thread_switch */ lcall $7, $0 - popl %eax /* set %eax to 0 again */ - popl %edx /* use %edx as scratch */ - popl %edx /* reg to fixup stack */ + addl $16, %esp /* adjust stack*/ + xorl %eax, %eax /* set %eax to 0 again */ JMP Lretry - Lrelinquish_off - . + Lrelinquish - LEN COMMPAGE_DESCRIPTOR(relinquish,_COMM_PAGE_RELINQUISH,0,0) diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index 078da9c98..4578329c7 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -25,19 +25,20 @@ * cpu specific routines */ -#include +#include #include -#include -#include -#include +#include #include +#include #include #include -#include +#include +#include +#include +#include + -cpu_data_t cpu_data[NCPUS]; -int real_ncpus = 0; -int wncpu = NCPUS; +struct processor processor_master; /*ARGSUSED*/ kern_return_t @@ -46,15 +47,16 @@ cpu_control( processor_info_t info, unsigned int count) { - printf("cpu_control not implemented\n"); + printf("cpu_control(%d,0x%x,%d) not implemented\n", + slot_num, info, count); return (KERN_FAILURE); } /*ARGSUSED*/ kern_return_t cpu_info_count( - processor_flavor_t flavor, - unsigned int *count) + __unused processor_flavor_t flavor, + unsigned int *count) { *count = 0; return (KERN_FAILURE); @@ -68,80 +70,35 @@ cpu_info( processor_info_t info, unsigned int *count) { - printf("cpu_info not implemented\n"); + printf("cpu_info(%d,%d,0x%x,0x%x) not implemented\n", + flavor, slot_num, info, count); return (KERN_FAILURE); } void -cpu_sleep() +cpu_sleep(void) { - printf("cpu_sleep not implemented\n"); -} + cpu_data_t *proc_info = current_cpu_datap(); -void -cpu_init() -{ - int my_cpu = get_cpu_number(); + PE_cpu_machine_quiesce(proc_info->cpu_id); - machine_slot[my_cpu].is_cpu = TRUE; - machine_slot[my_cpu].running = TRUE; -#ifdef MACH_BSD - /* FIXME */ - machine_slot[my_cpu].cpu_type = CPU_TYPE_I386; - machine_slot[my_cpu].cpu_subtype = CPU_SUBTYPE_PENTPRO; -#else - machine_slot[my_cpu].cpu_type = cpuid_cputype(0); - machine_slot[my_cpu].cpu_subtype = CPU_SUBTYPE_AT386; -#endif - -#if NCPUS > 1 - mp_desc_init(my_cpu); -#endif /* NCPUS */ + cpu_thread_halt(); } -kern_return_t -cpu_register( - int *target_cpu) +void +cpu_init(void) { - int cpu; + cpu_data_t *cdp = current_cpu_datap(); - if (real_ncpus == 0) { - /* - * Special case for the boot processor, - * it has been pre-registered by cpu_init(); - */ - *target_cpu = 0; - real_ncpus++; - return KERN_SUCCESS; - } - - /* - * TODO: - * - Run cpu_register() in exclusion mode - */ - - *target_cpu = -1; - for(cpu=0; cpu < wncpu; cpu++) { - if(!machine_slot[cpu].is_cpu) { - machine_slot[cpu].is_cpu = TRUE; #ifdef MACH_BSD - /* FIXME */ - machine_slot[cpu].cpu_type = CPU_TYPE_I386; - machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_PENTPRO; + /* FIXME */ + cdp->cpu_type = CPU_TYPE_I386; + cdp->cpu_subtype = CPU_SUBTYPE_PENTPRO; #else - machine_slot[cpu].cpu_type = cpuid_cputype(0); - machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386; + cdp->cpu_type = cpuid_cputype(0); + cdp->cpu_subtype = CPU_SUBTYPE_AT386; #endif - *target_cpu = cpu; - break; - } - } - - if (*target_cpu != -1) { - real_ncpus++; - return KERN_SUCCESS; - } else - return KERN_FAILURE; + cdp->cpu_running = TRUE; } kern_return_t @@ -151,9 +108,7 @@ cpu_start( kern_return_t ret; if (cpu == cpu_number()) { - PE_cpu_machine_init(cpu_data[cpu].cpu_id, TRUE); - ml_init_interrupt(); - cpu_data[cpu].cpu_status = 1; + cpu_machine_init(); return KERN_SUCCESS; } else { /* @@ -165,6 +120,12 @@ cpu_start( } } +void +cpu_exit_wait( + __unused int cpu) +{ +} + void cpu_machine_init( void) @@ -172,8 +133,89 @@ cpu_machine_init( int cpu; cpu = get_cpu_number(); - PE_cpu_machine_init(cpu_data[cpu].cpu_id, TRUE); + PE_cpu_machine_init(cpu_datap(cpu)->cpu_id, TRUE); + ml_init_interrupt(); - cpu_data[cpu].cpu_status = 1; } +processor_t +cpu_processor_alloc(boolean_t is_boot_cpu) +{ + int ret; + processor_t proc; + + if (is_boot_cpu) + return &processor_master; + + ret = kmem_alloc(kernel_map, (vm_offset_t *) &proc, sizeof(*proc)); + if (ret != KERN_SUCCESS) + return NULL; + + bzero((void *) proc, sizeof(*proc)); + return proc; +} + +void +cpu_processor_free(processor_t proc) +{ + if (proc != NULL && proc != &processor_master) + kfree((void *) proc, sizeof(*proc)); +} + +processor_t +current_processor(void) +{ + return current_cpu_datap()->cpu_processor; +} + +processor_t +cpu_to_processor( + int cpu) +{ + return cpu_datap(cpu)->cpu_processor; +} + +ast_t * +ast_pending(void) +{ + return (¤t_cpu_datap()->cpu_pending_ast); +} + +cpu_type_t +slot_type( + int slot_num) +{ + return (cpu_datap(slot_num)->cpu_type); +} + +cpu_subtype_t +slot_subtype( + int slot_num) +{ + return (cpu_datap(slot_num)->cpu_subtype); +} + +cpu_threadtype_t +slot_threadtype( + int slot_num) +{ + return (cpu_datap(slot_num)->cpu_threadtype); +} + +cpu_type_t +cpu_type(void) +{ + return (current_cpu_datap()->cpu_type); +} + +cpu_subtype_t +cpu_subtype(void) +{ + return (current_cpu_datap()->cpu_subtype); +} + +cpu_threadtype_t +cpu_threadtype(void) +{ + return (current_cpu_datap()->cpu_threadtype); +} diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index 7f0dfdbe0..08d4f0bea 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,23 +19,15 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef PRIVATE #ifndef _I386_CPU_CAPABILITIES_H #define _I386_CPU_CAPABILITIES_H -/* Sadly, some clients of this interface misspell __APPLE_API_PRIVATE. - * To avoid breaking them, we accept the incorrect _APPLE_API_PRIVATE. - */ -#ifdef _APPLE_API_PRIVATE -#ifndef __APPLE_API_PRIVATE -#define __APPLE_API_PRIVATE -#endif /* __APPLE_API_PRIVATE */ -#endif /* _APPLE_API_PRIVATE */ +#ifndef __ASSEMBLER__ +#include +#endif -#ifndef __APPLE_API_PRIVATE -#error cpu_capabilities.h is for Apple Internal use only -#else /* __APPLE_API_PRIVATE */ - /* * This is the authoritative way to determine from user mode what * implementation-specific processor features are available. @@ -48,10 +40,11 @@ #define kHasMMX 0x00000001 #define kHasSSE 0x00000002 #define kHasSSE2 0x00000004 -#define kHasPNI 0x00000008 // Prescott New Instructions +#define kHasSSE3 0x00000008 #define kCache32 0x00000010 // cache line size is 32 bytes #define kCache64 0x00000020 #define kCache128 0x00000040 +#define kFastThreadLocalStorage 0x00000080 // TLS ptr is kept in a user-mode-readable register #define kUP 0x00008000 // set if (kNumCPUs == 1) #define kNumCPUs 0x00FF0000 // number of CPUs (see _NumCPUs() below) @@ -59,8 +52,11 @@ #define kNumCPUsShift 16 // see _NumCPUs() below #ifndef __ASSEMBLER__ - -extern uint32_t _get_cpu_capabilities( void ); +#include + +__BEGIN_DECLS +extern int _get_cpu_capabilities( void ); +__END_DECLS inline static int _NumCPUs( void ) @@ -82,61 +78,75 @@ int _NumCPUs( void ) * the comm area is seven pages. */ -#define _COMM_PAGE_BASE_ADDRESS 0xBFFF9000 // VM_MAX_ADDRESS - 7 * 4096 -#define _COMM_PAGE_SIGS_OFFSET 0x4000 // offset to routine signatures -#define _COMM_PAGE_AREA_LENGTH ( 7*4096) // reserved length of entire comm area +#define _COMM_PAGE_AREA_LENGTH (19*4096) + // reserved length of entire comm area +#define _COMM_PAGE_BASE_ADDRESS (-20*4096) + // VM_MAX_ADDRESS-_COMM_PAGE_AREA_LENGTH +#define _COMM_PAGE_START_ADDRESS (-16*4096) + // VM_MAX_ADDRESS-_COMM_PAGE_AREA_LENGTH +#define _COMM_PAGE_SIGS_OFFSET 0x8000 + // offset to routine signatures /* data in the comm page */ -#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_BASE_ADDRESS+0x000) // first few bytes are a signature -#define _COMM_PAGE_VERSION (_COMM_PAGE_BASE_ADDRESS+0x01E) // 16-bit version# -#define _COMM_PAGE_THIS_VERSION 1 // this is version 1 of the commarea format +#define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) // first few bytes are a signature +#define _COMM_PAGE_VERSION (_COMM_PAGE_START_ADDRESS+0x01E) // 16-bit version# +#define _COMM_PAGE_THIS_VERSION 3 // version of the commarea format -#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_BASE_ADDRESS+0x020) // uint32_t _cpu_capabilities -#define _COMM_PAGE_NCPUS (_COMM_PAGE_BASE_ADDRESS+0x021) // uint8_t number of configured CPUs -#define _COMM_PAGE_VECTOR_FLAVOR (_COMM_PAGE_BASE_ADDRESS+0x024) // uint8_t SSE/SSE2/PNI -#define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_BASE_ADDRESS+0x026) // uint16_t cache line size - -#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_BASE_ADDRESS+0x030) // 16 unused bytes +#define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) // uint32_t _cpu_capabilities +#define _COMM_PAGE_NCPUS (_COMM_PAGE_START_ADDRESS+0x021) // uint8_t number of configured CPUs +#define _COMM_PAGE_VECTOR_FLAVOR (_COMM_PAGE_START_ADDRESS+0x024) // uint8_t SSE/SSE2/SSE3 +#define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_START_ADDRESS+0x026) // uint16_t cache line size -#define _COMM_PAGE_2_TO_52 (_COMM_PAGE_BASE_ADDRESS+0x040) // double float constant 2**52 -#define _COMM_PAGE_10_TO_6 (_COMM_PAGE_BASE_ADDRESS+0x048) // double float constant 10**6 +#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_START_ADDRESS+0x030) // 16 unused bytes -#define _COMM_PAGE_UNUSED2 (_COMM_PAGE_BASE_ADDRESS+0x050) // 16 unused bytes +#define _COMM_PAGE_2_TO_52 (_COMM_PAGE_START_ADDRESS+0x040) // double float constant 2**52 +#define _COMM_PAGE_10_TO_6 (_COMM_PAGE_START_ADDRESS+0x048) // double float constant 10**6 -#define _COMM_PAGE_TIMEBASE (_COMM_PAGE_BASE_ADDRESS+0x060) // used by gettimeofday() -#define _COMM_PAGE_TIMESTAMP (_COMM_PAGE_BASE_ADDRESS+0x068) // used by gettimeofday() -#define _COMM_PAGE_SEC_PER_TICK (_COMM_PAGE_BASE_ADDRESS+0x070) // used by gettimeofday() +#define _COMM_PAGE_UNUSED2 (_COMM_PAGE_START_ADDRESS+0x050) // 16 unused bytes -#define _COMM_PAGE_UNUSED3 (_COMM_PAGE_BASE_ADDRESS+0x080) // 384 unused bytes +#define _COMM_PAGE_TIMEBASE (_COMM_PAGE_START_ADDRESS+0x060) // used by gettimeofday() +#define _COMM_PAGE_TIMESTAMP (_COMM_PAGE_START_ADDRESS+0x068) // used by gettimeofday() +#define _COMM_PAGE_SEC_PER_TICK (_COMM_PAGE_START_ADDRESS+0x070) // used by gettimeofday() /* jump table (bla to this address, which may be a branch to the actual code somewhere else) */ /* When new jump table entries are added, corresponding symbols should be added below */ + +#define _COMM_PAGE_COMPARE_AND_SWAP32 (_COMM_PAGE_START_ADDRESS+0x080) // compare-and-swap word +#define _COMM_PAGE_COMPARE_AND_SWAP64 (_COMM_PAGE_START_ADDRESS+0x0c0) // compare-and-swap doubleword +#define _COMM_PAGE_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x100) // enqueue +#define _COMM_PAGE_DEQUEUE (_COMM_PAGE_START_ADDRESS+0x140) // dequeue +#define _COMM_PAGE_MEMORY_BARRIER (_COMM_PAGE_START_ADDRESS+0x180) // add atomic doubleword +#define _COMM_PAGE_ATOMIC_ADD32 (_COMM_PAGE_START_ADDRESS+0x1a0) // add atomic word +#define _COMM_PAGE_ATOMIC_ADD64 (_COMM_PAGE_START_ADDRESS+0x1c0) // add atomic doubleword + +#define _COMM_PAGE_NANOTIME_INFO (_COMM_PAGE_START_ADDRESS+0x1e0) // 32 bytes used by nanotime() -#define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_BASE_ADDRESS+0x200) // mach_absolute_time() -#define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_BASE_ADDRESS+0x220) // spinlock_try() -#define _COMM_PAGE_SPINLOCK_LOCK (_COMM_PAGE_BASE_ADDRESS+0x260) // spinlock_lock() -#define _COMM_PAGE_SPINLOCK_UNLOCK (_COMM_PAGE_BASE_ADDRESS+0x2a0) // spinlock_unlock() -#define _COMM_PAGE_PTHREAD_GETSPECIFIC (_COMM_PAGE_BASE_ADDRESS+0x2c0) // pthread_getspecific() -#define _COMM_PAGE_GETTIMEOFDAY (_COMM_PAGE_BASE_ADDRESS+0x2e0) // used by gettimeofday() -#define _COMM_PAGE_FLUSH_DCACHE (_COMM_PAGE_BASE_ADDRESS+0x4e0) // sys_dcache_flush() -#define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_BASE_ADDRESS+0x520) // sys_icache_invalidate() -#define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_BASE_ADDRESS+0x580) // pthread_self() -#define _COMM_PAGE_UNUSED4 (_COMM_PAGE_BASE_ADDRESS+0x5a0) // 32 unused bytes -#define _COMM_PAGE_RELINQUISH (_COMM_PAGE_BASE_ADDRESS+0x5c0) // used by spinlocks +#define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_START_ADDRESS+0x200) // mach_absolute_time() +#define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_START_ADDRESS+0x220) // spinlock_try() +#define _COMM_PAGE_SPINLOCK_LOCK (_COMM_PAGE_START_ADDRESS+0x260) // spinlock_lock() +#define _COMM_PAGE_SPINLOCK_UNLOCK (_COMM_PAGE_START_ADDRESS+0x2a0) // spinlock_unlock() +#define _COMM_PAGE_PTHREAD_GETSPECIFIC (_COMM_PAGE_START_ADDRESS+0x2c0) // pthread_getspecific() +#define _COMM_PAGE_GETTIMEOFDAY (_COMM_PAGE_START_ADDRESS+0x2e0) // used by gettimeofday() +#define _COMM_PAGE_FLUSH_DCACHE (_COMM_PAGE_START_ADDRESS+0x4e0) // sys_dcache_flush() +#define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_START_ADDRESS+0x520) // sys_icache_invalidate() +#define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_START_ADDRESS+0x580) // pthread_self() +#define _COMM_PAGE_UNUSED4 (_COMM_PAGE_START_ADDRESS+0x5a0) // 32 unused bytes +#define _COMM_PAGE_RELINQUISH (_COMM_PAGE_START_ADDRESS+0x5c0) // used by spinlocks -#define _COMM_PAGE_UNUSED5 (_COMM_PAGE_BASE_ADDRESS+0x5e0) // 32 unused bytes +#define _COMM_PAGE_BTS (_COMM_PAGE_START_ADDRESS+0x5e0) // bit test-and-set +#define _COMM_PAGE_BTC (_COMM_PAGE_START_ADDRESS+0x5f0) // bit test-and-clear -#define _COMM_PAGE_BZERO (_COMM_PAGE_BASE_ADDRESS+0x600) // bzero() -#define _COMM_PAGE_BCOPY (_COMM_PAGE_BASE_ADDRESS+0x780) // bcopy() -#define _COMM_PAGE_MEMCPY (_COMM_PAGE_BASE_ADDRESS+0x7a0) // memcpy() -#define _COMM_PAGE_MEMMOVE (_COMM_PAGE_BASE_ADDRESS+0x7a0) // memmove() +#define _COMM_PAGE_BZERO (_COMM_PAGE_START_ADDRESS+0x600) // bzero() +#define _COMM_PAGE_BCOPY (_COMM_PAGE_START_ADDRESS+0x780) // bcopy() +#define _COMM_PAGE_MEMCPY (_COMM_PAGE_START_ADDRESS+0x7a0) // memcpy() +#define _COMM_PAGE_MEMMOVE (_COMM_PAGE_START_ADDRESS+0x7a0) // memmove() -#define _COMM_PAGE_UNUSED6 (_COMM_PAGE_BASE_ADDRESS+0xF80) // 128 unused bytes +#define _COMM_PAGE_NANOTIME (_COMM_PAGE_START_ADDRESS+0xF80) // nanotime() -#define _COMM_PAGE_BIGCOPY (_COMM_PAGE_BASE_ADDRESS+0x1000)// very-long-operand copies +#define _COMM_PAGE_BIGCOPY (_COMM_PAGE_START_ADDRESS+0x1000)// very-long-operand copies -#define _COMM_PAGE_END (_COMM_PAGE_BASE_ADDRESS+0x1600)// end of common page +#define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1600)// end of common page #ifdef __ASSEMBLER__ #ifdef __COMM_PAGE_SYMBOLS @@ -147,6 +157,13 @@ symbol_name: nop .text // Required to make a well behaved symbol file + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32, _COMM_PAGE_COMPARE_AND_SWAP32) + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64, _COMM_PAGE_COMPARE_AND_SWAP64) + CREATE_COMM_PAGE_SYMBOL(___atomic_enqueue, _COMM_PAGE_ENQUEUE) + CREATE_COMM_PAGE_SYMBOL(___atomic_dequeue, _COMM_PAGE_DEQUEUE) + CREATE_COMM_PAGE_SYMBOL(___memory_barrier, _COMM_PAGE_MEMORY_BARRIER) + CREATE_COMM_PAGE_SYMBOL(___atomic_add32, _COMM_PAGE_ATOMIC_ADD32) + CREATE_COMM_PAGE_SYMBOL(___atomic_add64, _COMM_PAGE_ATOMIC_ADD64) CREATE_COMM_PAGE_SYMBOL(___mach_absolute_time, _COMM_PAGE_ABSOLUTE_TIME) CREATE_COMM_PAGE_SYMBOL(___spin_lock_try, _COMM_PAGE_SPINLOCK_TRY) CREATE_COMM_PAGE_SYMBOL(___spin_lock, _COMM_PAGE_SPINLOCK_LOCK) @@ -157,11 +174,14 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___sys_icache_invalidate, _COMM_PAGE_FLUSH_ICACHE) CREATE_COMM_PAGE_SYMBOL(___pthread_self, _COMM_PAGE_PTHREAD_SELF) CREATE_COMM_PAGE_SYMBOL(___spin_lock_relinquish, _COMM_PAGE_RELINQUISH) + CREATE_COMM_PAGE_SYMBOL(___bit_test_and_set, _COMM_PAGE_BTS) + CREATE_COMM_PAGE_SYMBOL(___bit_test_and_clear, _COMM_PAGE_BTC) CREATE_COMM_PAGE_SYMBOL(___bzero, _COMM_PAGE_BZERO) CREATE_COMM_PAGE_SYMBOL(___bcopy, _COMM_PAGE_BCOPY) CREATE_COMM_PAGE_SYMBOL(___memcpy, _COMM_PAGE_MEMCPY) // CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) CREATE_COMM_PAGE_SYMBOL(___bigcopy, _COMM_PAGE_BIGCOPY) + CREATE_COMM_PAGE_SYMBOL(___nanotime, _COMM_PAGE_NANOTIME) CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END) .data // Required to make a well behaved symbol file @@ -170,5 +190,5 @@ symbol_name: nop #endif /* __COMM_PAGE_SYMBOLS */ #endif /* __ASSEMBLER__ */ -#endif /* __APPLE_API_PRIVATE */ #endif /* _I386_CPU_CAPABILITIES_H */ +#endif /* PRIVATE */ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 005c063cb..7d455b4a1 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,38 +27,120 @@ #ifndef I386_CPU_DATA #define I386_CPU_DATA -#include #include #if defined(__GNUC__) #include #include +#include #include -typedef struct -{ - thread_act_t *active_thread; - int preemption_level; - int simple_lock_count; - int interrupt_level; - int cpu_number; /* Logical CPU number */ - int cpu_phys_number; /* Physical CPU Number */ - cpu_id_t cpu_id; /* Platform Expert handle */ - int cpu_status; /* Boot Status */ - int cpu_signals; /* IPI events */ - int mcount_off; /* mcount recursion flag */ + +/* + * Data structures referenced (anonymously) from per-cpu data: + */ +struct cpu_core; +struct cpu_cons_buffer; +struct mp_desc_table; + + +/* + * Data structures embedded in per-cpu data: + */ +typedef struct rtclock_timer { + uint64_t deadline; + boolean_t is_set; + boolean_t has_expired; +} rtclock_timer_t; + +typedef struct { + uint64_t rnt_tsc; /* timestamp */ + uint64_t rnt_nanos; /* nanoseconds */ + uint32_t rnt_scale; /* tsc -> nanosec multiplier */ + uint32_t rnt_shift; /* tsc -> nanosec shift/div */ + uint64_t rnt_step_tsc; /* tsc when scale applied */ + uint64_t rnt_step_nanos; /* ns when scale applied */ +} rtc_nanotime_t; + +typedef struct { + struct i386_tss *cdi_ktss; +#if MACH_KDB + struct i386_tss *cdi_dbtss; +#endif /* MACH_KDB */ + struct fake_descriptor *cdi_gdt; + struct fake_descriptor *cdi_idt; + struct fake_descriptor *cdi_ldt; +} cpu_desc_index_t; + + +/* + * Per-cpu data. + * + * Each processor has a per-cpu data area which is dereferenced through the + * current_cpu_datap() macro. For speed, the %gs segment is based here, and + * using this, inlines provides single-instruction access to frequently used + * members - such as get_cpu_number()/cpu_number(), and get_active_thread()/ + * current_thread(). + * + * Cpu data owned by another processor can be accessed using the + * cpu_datap(cpu_number) macro which uses the cpu_data_ptr[] array of per-cpu + * pointers. + */ +typedef struct cpu_data +{ + struct cpu_data *cpu_this; /* pointer to myself */ + thread_t cpu_active_thread; + thread_t cpu_active_kloaded; + vm_offset_t cpu_active_stack; + vm_offset_t cpu_kernel_stack; + vm_offset_t cpu_int_stack_top; + int cpu_preemption_level; + int cpu_simple_lock_count; + int cpu_interrupt_level; + int cpu_number; /* Logical CPU */ + int cpu_phys_number; /* Physical CPU */ + cpu_id_t cpu_id; /* Platform Expert */ + int cpu_signals; /* IPI events */ + int cpu_mcount_off; /* mcount recursion */ + ast_t cpu_pending_ast; + int cpu_type; + int cpu_subtype; + int cpu_threadtype; + int cpu_running; + struct cpu_core *cpu_core; /* cpu's parent core */ + uint64_t cpu_rtc_tick_deadline; + uint64_t cpu_rtc_intr_deadline; + rtclock_timer_t cpu_rtc_timer; + rtc_nanotime_t cpu_rtc_nanotime; + void *cpu_console_buf; + struct processor *cpu_processor; + struct cpu_pmap *cpu_pmap; + struct mp_desc_table *cpu_desc_tablep; + cpu_desc_index_t cpu_desc_index; + boolean_t cpu_iflag; +#ifdef MACH_KDB + /* XXX Untested: */ + int cpu_db_pass_thru; + vm_offset_t cpu_db_stacks; + struct i386_saved_state *cpu_kdb_saved_state; + spl_t cpu_kdb_saved_ipl; + int cpu_kdb_is_slave; + int cpu_kdb_active; +#endif /* MACH_KDB */ + int cpu_reserved1; } cpu_data_t; -extern cpu_data_t cpu_data[NCPUS]; +extern cpu_data_t *cpu_data_ptr[]; +extern cpu_data_t cpu_data_master; /* Macro to generate inline bodies to retrieve per-cpu data fields. */ #define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#define CPU_DATA_GET(field,type) \ +#define CPU_DATA_GET(member,type) \ type ret; \ __asm__ volatile ("movl %%gs:%P1,%0" \ : "=r" (ret) \ - : "i" (offsetof(cpu_data_t,field))); \ + : "i" (offsetof(cpu_data_t,member))); \ return ret; /* @@ -66,95 +148,110 @@ extern cpu_data_t cpu_data[NCPUS]; * inline versions of these routines. Everyone outside, must call * the real thing, */ -extern thread_act_t __inline__ get_active_thread(void) +static inline thread_t +get_active_thread(void) { - CPU_DATA_GET(active_thread,thread_act_t) + CPU_DATA_GET(cpu_active_thread,thread_t) } -#define current_act_fast() get_active_thread() -#define current_act() current_act_fast() -#define current_thread() current_act_fast()->thread +#define current_thread_fast() get_active_thread() +#define current_thread() current_thread_fast() -extern int __inline__ get_preemption_level(void) +static inline int +get_preemption_level(void) { - CPU_DATA_GET(preemption_level,int) + CPU_DATA_GET(cpu_preemption_level,int) } -extern int __inline__ get_simple_lock_count(void) +static inline int +get_simple_lock_count(void) { - CPU_DATA_GET(simple_lock_count,int) + CPU_DATA_GET(cpu_simple_lock_count,int) } -extern int __inline__ get_interrupt_level(void) +static inline int +get_interrupt_level(void) { - CPU_DATA_GET(interrupt_level,int) + CPU_DATA_GET(cpu_interrupt_level,int) } -extern int __inline__ get_cpu_number(void) +static inline int +get_cpu_number(void) { CPU_DATA_GET(cpu_number,int) } -extern int __inline__ get_cpu_phys_number(void) +static inline int +get_cpu_phys_number(void) { CPU_DATA_GET(cpu_phys_number,int) } - -extern void __inline__ disable_preemption(void) +static inline struct +cpu_core * get_cpu_core(void) { - register int idx = (int)&((cpu_data_t *)0)->preemption_level; - - __asm__ volatile (" incl %%gs:(%0)" : : "r" (idx)); + CPU_DATA_GET(cpu_core,struct cpu_core *) } -extern void __inline__ enable_preemption(void) +static inline void +disable_preemption(void) { - extern void kernel_preempt_check (void); - register int idx = (int)&((cpu_data_t *)0)->preemption_level; - register void (*kpc)(void)= kernel_preempt_check; + __asm__ volatile ("incl %%gs:%P0" + : + : "i" (offsetof(cpu_data_t, cpu_preemption_level))); +} +static inline void +enable_preemption(void) +{ assert(get_preemption_level() > 0); - __asm__ volatile ("decl %%gs:(%0); jne 1f; \ - call %1; 1:" + __asm__ volatile ("decl %%gs:%P0 \n\t" + "jne 1f \n\t" + "call _kernel_preempt_check \n\t" + "1:" : /* no outputs */ - : "r" (idx), "r" (kpc) - : "%eax", "%ecx", "%edx", "cc", "memory"); + : "i" (offsetof(cpu_data_t, cpu_preemption_level)) + : "eax", "ecx", "edx", "cc", "memory"); } -extern void __inline__ enable_preemption_no_check(void) +static inline void +enable_preemption_no_check(void) { - register int idx = (int)&((cpu_data_t *)0)->preemption_level; - assert(get_preemption_level() > 0); - __asm__ volatile ("decl %%gs:(%0)" + __asm__ volatile ("decl %%gs:%P0" : /* no outputs */ - : "r" (idx) + : "i" (offsetof(cpu_data_t, cpu_preemption_level)) : "cc", "memory"); } -extern void __inline__ mp_disable_preemption(void) +static inline void +mp_disable_preemption(void) { -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ } -extern void __inline__ mp_enable_preemption(void) +static inline void +mp_enable_preemption(void) { -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ } -extern void __inline__ mp_enable_preemption_no_check(void) +static inline void +mp_enable_preemption_no_check(void) { -#if NCPUS > 1 enable_preemption_no_check(); -#endif /* NCPUS > 1 */ } -#if 0 -#ifndef __OPTIMIZE__ -#undef extern -#endif -#endif +static inline cpu_data_t * +current_cpu_datap(void) +{ + CPU_DATA_GET(cpu_this, cpu_data_t *); +} + +static inline cpu_data_t * +cpu_datap(int cpu) +{ + assert(cpu_data_ptr[cpu]); + return cpu_data_ptr[cpu]; +} + +extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); #else /* !defined(__GNUC__) */ diff --git a/osfmk/i386/cpu_number.h b/osfmk/i386/cpu_number.h index cecff513b..81feb8eda 100644 --- a/osfmk/i386/cpu_number.h +++ b/osfmk/i386/cpu_number.h @@ -55,20 +55,23 @@ * Machine-dependent definitions for cpu identification. * */ +#ifdef KERNEL_PRIVATE + #ifndef _I386_CPU_NUMBER_H_ #define _I386_CPU_NUMBER_H_ -#if MP_V1_1 +#ifdef I386_CPU_DATA /* Get the cpu number directly from the pre-processor data area */ -#include #define cpu_number() get_cpu_number() -#else /* MP_V1_1 */ +#else /* I386_CPU_DATA */ /* Use a function to do this less directly. */ extern int cpu_number(void); -#endif /* MP_V1_1 */ +#endif /* I386_CPU_DATA */ #endif /* _I386_CPU_NUMBER_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c new file mode 100644 index 000000000..52de5180b --- /dev/null +++ b/osfmk/i386/cpu_threads.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include + +/* + * Kernel parameter determining whether threads are halted unconditionally + * in the idle state. This is the default behavior. + * See machine_idle() for use. + */ +int idlehalt = 1; + +void +cpu_thread_init(void) +{ + int my_cpu = get_cpu_number(); + int my_core_base_cpu; + int ret; + cpu_core_t *my_core; + + /* Have we initialized already for this cpu? */ + if (cpu_core()) + return; + + if (cpuid_features() & CPUID_FEATURE_HTT) { + /* + * Get the cpu number of the base thread in the core. + */ + my_core_base_cpu = cpu_to_core_cpu(my_cpu); + current_cpu_datap()->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT; + } else { + my_core_base_cpu = my_cpu; + current_cpu_datap()->cpu_threadtype = CPU_THREADTYPE_NONE; + } + + /* + * Allocate the base cpu_core struct if none exists. + * Since we could be racing with other threads in the same core, + * this needs care without using locks. We allocate a new core + * structure and assign it atomically, freeing it if we lost the race. + */ + my_core = (cpu_core_t *) cpu_to_core(my_core_base_cpu); + if (my_core == NULL) { + cpu_core_t *new_core; + + ret = kmem_alloc(kernel_map, + (void *) &new_core, sizeof(cpu_core_t)); + if (ret != KERN_SUCCESS) + panic("cpu_thread_init() kmem_alloc ret=%d\n", ret); + bzero((void *) new_core, sizeof(cpu_core_t)); + new_core->base_cpu = my_core_base_cpu; + if (atomic_cmpxchg((uint32_t *) &cpu_to_core(my_core_base_cpu), + 0, (uint32_t) new_core)) { + atomic_incl((long *) &machine_info.physical_cpu, 1); + atomic_incl((long *) &machine_info.physical_cpu_max, 1); + } else { + kmem_free(kernel_map, + (vm_offset_t)new_core, sizeof(cpu_core_t)); + } + my_core = (cpu_core_t *) cpu_to_core(my_core_base_cpu); + } + + cpu_to_core(my_cpu) = (struct cpu_core *) my_core; + + atomic_incl((long *) &my_core->active_threads, 1); + atomic_incl((long *) &my_core->num_threads, 1); + atomic_incl((long *) &machine_info.logical_cpu, 1); + atomic_incl((long *) &machine_info.logical_cpu_max, 1); + +} + +/* + * Called for a cpu to halt permanently + * (as opposed to halting and expecting an interrupt to awaken it). + */ +void +cpu_thread_halt(void) +{ + cpu_core_t *my_core = cpu_core(); + + /* Note: don't ever decrement the number of physical processors */ + atomic_decl((long *) &my_core->active_threads, 1); + atomic_decl((long *) &my_core->num_threads, 1); + atomic_decl((long *) &machine_info.logical_cpu, 1); + + cpu_halt(); +} diff --git a/osfmk/i386/cpu_threads.h b/osfmk/i386/cpu_threads.h new file mode 100644 index 000000000..ba98631be --- /dev/null +++ b/osfmk/i386/cpu_threads.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _I386_CPU_THREADS_H_ +#define _I386_CPU_THREADS_H_ + +#include +#include + +struct pmc; + +typedef struct { + int base_cpu; /* Number of the cpu first in core */ + int num_threads; /* Number of threads (logical cpus) */ + int active_threads; /* Number of non-halted thredas */ + struct pmc *pmc; /* Pointer to perfmon data */ +} cpu_core_t; + +#define CPU_THREAD_MASK 0x00000001 +#define cpu_to_core_lapic(cpu) (cpu_to_lapic[cpu] & ~CPU_THREAD_MASK) +#define cpu_to_core_cpu(cpu) (lapic_to_cpu[cpu_to_core_lapic(cpu)]) +#define cpu_to_logical_cpu(cpu) (cpu_to_lapic[cpu] & CPU_THREAD_MASK) +#define cpu_is_core_cpu(cpu) (cpu_to_logical_cpu(cpu) == 0) + +#define cpu_to_core(cpu) (cpu_datap(cpu)->cpu_core) + +/* Fast access: */ +#define cpu_core() ((cpu_core_t *) get_cpu_core()) + +#define cpu_is_same_core(cpu1,cpu2) (cpu_to_core(cpu1) == cpu_to_core(cpu2)) + +extern void cpu_thread_init(void); +extern void cpu_thread_halt(void); + +extern int idlehalt; + +extern int ncore; +#endif /* _I386_CPU_THREADS_H_ */ diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index 3326a564f..e6dbc551c 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -23,6 +23,8 @@ * @OSF_COPYRIGHT@ */ +#include + #include "cpuid.h" #define min(a,b) ((a) < (b) ? (a) : (b)) @@ -44,14 +46,19 @@ uint32_t cpuid_feature; /* XXX obsolescent for compat */ * We only identify Intel CPUs here. Adding support * for others would be straightforward. */ +static void set_cpu_generic(i386_cpu_info_t *); static void set_cpu_intel(i386_cpu_info_t *); +static void set_cpu_amd(i386_cpu_info_t *); +static void set_cpu_nsc(i386_cpu_info_t *); static void set_cpu_unknown(i386_cpu_info_t *); struct { - char *vendor; - void (* func)(i386_cpu_info_t *); + const char *vendor; + void (* func)(i386_cpu_info_t *); } cpu_vendors[] = { {CPUID_VID_INTEL, set_cpu_intel}, + {CPUID_VID_AMD, set_cpu_amd}, + {CPUID_VID_NSC, set_cpu_nsc}, {0, set_cpu_unknown} }; @@ -81,78 +88,6 @@ cpuid_get_info(i386_cpu_info_t *info_p) } } -/* - * A useful model name string takes some decoding. - */ -char * -cpuid_intel_get_model_name( - uint8_t brand, - uint8_t family, - uint8_t model, - uint32_t signature) -{ - /* check for brand id */ - switch(brand) { - case 0: - /* brand ID not supported; use alternate method. */ - switch(family) { - case CPUID_FAMILY_486: - return "486"; - case CPUID_FAMILY_P5: - return "Pentium"; - case CPUID_FAMILY_PPRO: - switch(model) { - case CPUID_MODEL_P6: - return "Pentium Pro"; - case CPUID_MODEL_PII: - return "Pentium II"; - case CPUID_MODEL_P65: - case CPUID_MODEL_P66: - return "Celeron"; - case CPUID_MODEL_P67: - case CPUID_MODEL_P68: - case CPUID_MODEL_P6A: - case CPUID_MODEL_P6B: - return "Pentium III"; - default: - return "Unknown P6 Family"; - } - case CPUID_FAMILY_PENTIUM4: - return "Pentium 4"; - default: - return "Unknown Family"; - } - case 0x01: - return "Celeron"; - case 0x02: - case 0x04: - return "Pentium III"; - case 0x03: - if (signature == 0x6B1) - return "Celeron"; - else - return "Pentium III Xeon"; - case 0x06: - return "Mobile Pentium III"; - case 0x07: - return "Mobile Celeron"; - case 0x08: - if (signature >= 0xF20) - return "Genuine Intel"; - else - return "Pentium 4"; - case 0x09: - return "Pentium 4"; - case 0x0b: - return "Xeon"; - case 0x0e: - case 0x0f: - return "Mobile Pentium 4"; - default: - return "Unknown Pentium"; - } -} - /* * Cache descriptor table. Each row has the form: * (descriptor_value, cache, size, linesize, @@ -163,11 +98,11 @@ static cpuid_cache_desc_t cpuid_cache_desc_tab[] = { CACHE_DESC(CPUID_CACHE_ITLB_4K, Lnone, 0, 0, \ "Instruction TLB, 4K, pages 4-way set associative, 64 entries"), CACHE_DESC(CPUID_CACHE_ITLB_4M, Lnone, 0, 0, \ - "Instruction TLB, 4M, pages 4-way set associative, 4 entries"), + "Instruction TLB, 4M, pages 4-way set associative, 2 entries"), CACHE_DESC(CPUID_CACHE_DTLB_4K, Lnone, 0, 0, \ "Data TLB, 4K pages, 4-way set associative, 64 entries"), CACHE_DESC(CPUID_CACHE_DTLB_4M, Lnone, 0, 0, \ - "Data TLB, 4M pages, 4-way set associative, 4 entries"), + "Data TLB, 4M pages, 4-way set associative, 8 entries"), CACHE_DESC(CPUID_CACHE_ITLB_64, Lnone, 0, 0, \ "Instruction TLB, 4K and 2M or 4M pages, 64 entries"), CACHE_DESC(CPUID_CACHE_ITLB_128, Lnone, 0, 0, \ @@ -180,6 +115,10 @@ CACHE_DESC(CPUID_CACHE_DTLB_128, Lnone, 0, 0, \ "Data TLB, 4K and 4M pages, 128 entries"), CACHE_DESC(CPUID_CACHE_DTLB_256, Lnone, 0, 0, \ "Data TLB, 4K and 4M pages, 256 entries"), +CACHE_DESC(CPUID_CACHE_ITLB_128_4, Lnone, 0, 0, \ + "Instruction TLB, 4K pages, 4-way set associative, 128 entries"), +CACHE_DESC(CPUID_CACHE_DTLB_128_4, Lnone, 0, 0, \ + "Data TLB, 4K pages, 4-way set associative, 128 entries"), CACHE_DESC(CPUID_CACHE_ICACHE_8K, L1I, 8*1024, 32, \ "Instruction L1 cache, 8K, 4-way set associative, 32byte line size"), CACHE_DESC(CPUID_CACHE_DCACHE_8K, L1D, 8*1024, 32, \ @@ -194,11 +133,17 @@ CACHE_DESC(CPUID_CACHE_DCACHE_16K_64, L1D, 16*1024, 64, \ "Data L1 cache, 16K, 4-way set associative, 64byte line size"), CACHE_DESC(CPUID_CACHE_DCACHE_32K_64, L1D, 32*1024, 64, \ "Data L1 cache, 32K, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_DCACHE_32K, L1D, 32*1024, 64, \ + "Data L1 cache, 32K, 8-way set assocative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_ICACHE_32K, L1I, 32*1024, 64, \ + "Instruction L1 cache, 32K, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_DCACHE_16K_8, L1D, 16*1024, 64, \ + "Data L1 cache, 16K, 8-way set associative, 64byte line size"), CACHE_DESC(CPUID_CACHE_TRACE_12K, L1I, 12*1024, 64, \ "Trace cache, 12K-uop, 8-way set associative"), -CACHE_DESC(CPUID_CACHE_TRACE_12K, L1I, 16*1024, 64, \ +CACHE_DESC(CPUID_CACHE_TRACE_16K, L1I, 16*1024, 64, \ "Trace cache, 16K-uop, 8-way set associative"), -CACHE_DESC(CPUID_CACHE_TRACE_12K, L1I, 32*1024, 64, \ +CACHE_DESC(CPUID_CACHE_TRACE_32K, L1I, 32*1024, 64, \ "Trace cache, 32K-uop, 8-way set associative"), CACHE_DESC(CPUID_CACHE_UCACHE_128K, L2U, 128*1024, 32, \ "Unified L2 cache, 128K, 4-way set associative, 32byte line size"), @@ -226,71 +171,133 @@ CACHE_DESC(CPUID_CACHE_UCACHE_1M_32, L2U, 1*1024*1024, 32, \ "Unified L2 cache, 1M, 8-way set associative, 32byte line size"), CACHE_DESC(CPUID_CACHE_UCACHE_2M_32, L2U, 2*1024*1024, 32, \ "Unified L2 cache, 2M, 8-way set associative, 32byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_1M_64_4, L2U, 1*1024*1024, 64, \ + "Unified L2 cache, 1M, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_2M_64, L2U, 2*1024*1024, 64, \ + "Unified L2 cache, 2M, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_512K_64_2,L2U, 512*1024, 64, \ + "Unified L2 cache, 512K, 2-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_512K_64_4,L2U, 512*1024, 64, \ + "Unified L2 cache, 512K, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_1M_64_8, L2U, 1*1024*1024, 64, \ + "Unified L2 cache, 1M, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_128K_S4, L2U, 128*1024, 64, \ + "Unified L2 sectored cache, 128K, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_128K_S2, L2U, 128*1024, 64, \ + "Unified L2 sectored cache, 128K, 2-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_UCACHE_256K_S4, L2U, 256*1024, 64, \ + "Unified L2 sectored cache, 256K, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_L3CACHE_512K, L3U, 512*1024, 64, \ + "Unified L3 cache, 512K, 4-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_L3CACHE_1M, L3U, 1*1024*1024, 64, \ + "Unified L3 cache, 1M, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_L3CACHE_2M, L3U, 2*1024*1024, 64, \ + "Unified L3 cache, 2M, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_L3CACHE_4M, L3U, 4*1024*1024, 64, \ + "Unified L3 cache, 4M, 8-way set associative, 64byte line size"), +CACHE_DESC(CPUID_CACHE_PREFETCH_64, Lnone, 0, 0, \ + "64-Byte Prefetching"), +CACHE_DESC(CPUID_CACHE_PREFETCH_128, Lnone, 0, 0, \ + "128-Byte Prefetching"), +CACHE_DESC(CPUID_CACHE_NOCACHE, Lnone, 0, 0, \ + "No L2 cache or, if valid L2 cache, no L3 cache"), CACHE_DESC(CPUID_CACHE_NULL, Lnone, 0, 0, \ (char *)0), }; -static void -set_cpu_intel(i386_cpu_info_t *info_p) +static const char * get_intel_model_string( i386_cpu_info_t * info_p ) { - uint32_t cpuid_result[4]; - uint32_t max_extid; - char str[128], *p; - char *model; - int i; - int j; - - /* get extended cpuid results */ - do_cpuid(0x80000000, cpuid_result); - max_extid = cpuid_result[0]; - - /* check to see if we can get brand string */ - if (max_extid > 0x80000000) { - /* - * The brand string 48 bytes (max), guaranteed to - * be NUL terminated. - */ - do_cpuid(0x80000002, cpuid_result); - bcopy((char *)cpuid_result, &str[0], 16); - do_cpuid(0x80000003, cpuid_result); - bcopy((char *)cpuid_result, &str[16], 16); - do_cpuid(0x80000004, cpuid_result); - bcopy((char *)cpuid_result, &str[32], 16); - for (p = str; *p != '\0'; p++) { - if (*p != ' ') break; - } - strncpy(info_p->cpuid_brand_string, - p, sizeof(info_p->cpuid_brand_string)-1); - info_p->cpuid_brand_string[sizeof(info_p->cpuid_brand_string)-1] = '\0'; - } - - /* get processor signature and decode */ - do_cpuid(1, cpuid_result); - info_p->cpuid_signature = cpuid_result[0]; - info_p->cpuid_stepping = cpuid_result[0] & 0x0f; - info_p->cpuid_model = (cpuid_result[0] >> 4) & 0x0f; - info_p->cpuid_family = (cpuid_result[0] >> 8) & 0x0f; - info_p->cpuid_type = (cpuid_result[0] >> 12) & 0x03; - info_p->cpuid_extmodel = (cpuid_result[0] >> 16) & 0x0f; - info_p->cpuid_extfamily = (cpuid_result[0] >> 20) & 0xff; - info_p->cpuid_brand = cpuid_result[1] & 0xff; - info_p->cpuid_features = cpuid_result[3]; + /* check for brand id */ + switch(info_p->cpuid_brand) { + case CPUID_BRAND_UNSUPPORTED: + /* brand ID not supported; use alternate method. */ + switch(info_p->cpuid_family) { + case CPUID_FAMILY_486: + return "Intel 486"; + case CPUID_FAMILY_586: + return "Intel Pentium"; + case CPUID_FAMILY_686: + switch(info_p->cpuid_model) { + case CPUID_MODEL_P6: + return "Intel Pentium Pro"; + case CPUID_MODEL_PII: + return "Intel Pentium II"; + case CPUID_MODEL_P65: + case CPUID_MODEL_P66: + return "Intel Celeron"; + case CPUID_MODEL_P67: + case CPUID_MODEL_P68: + case CPUID_MODEL_P6A: + case CPUID_MODEL_P6B: + return "Intel Pentium III"; + case CPUID_MODEL_PM9: + case CPUID_MODEL_PMD: + return "Intel Pentium M"; + default: + return "Unknown Intel P6 Family"; + } + case CPUID_FAMILY_ITANIUM: + return "Intel Itanium"; + case CPUID_FAMILY_EXTENDED: + switch (info_p->cpuid_extfamily) { + case CPUID_EXTFAMILY_PENTIUM4: + return "Intel Pentium 4"; + case CPUID_EXTFAMILY_ITANIUM2: + return "Intel Itanium 2"; + } + default: + return "Unknown Intel Family"; + } + break; + case CPUID_BRAND_CELERON_1: + case CPUID_BRAND_CELERON_A: + case CPUID_BRAND_CELERON_14: + return "Intel Celeron"; + case CPUID_BRAND_PENTIUM_III_2: + case CPUID_BRAND_PENTIUM_III_4: + return "Pentium III"; + case CPUID_BRAND_PIII_XEON: + if (info_p->cpuid_signature == 0x6B1) + return "Intel Celeron"; + else + return "Intel Pentium III Xeon"; + case CPUID_BRAND_PENTIUM_III_M: + return "Mobile Intel Pentium III-M"; + case CPUID_BRAND_M_CELERON_7: + case CPUID_BRAND_M_CELERON_F: + case CPUID_BRAND_M_CELERON_13: + case CPUID_BRAND_M_CELERON_17: + return "Mobile Intel Celeron"; + case CPUID_BRAND_PENTIUM4_8: + case CPUID_BRAND_PENTIUM4_9: + return "Intel Pentium 4"; + case CPUID_BRAND_XEON: + return "Intel Xeon"; + case CPUID_BRAND_XEON_MP: + return "Intel Xeon MP"; + case CPUID_BRAND_PENTIUM4_M: + if (info_p->cpuid_signature == 0xF13) + return "Intel Xeon"; + else + return "Mobile Intel Pentium 4"; + case CPUID_BRAND_CELERON_M: + return "Intel Celeron M"; + case CPUID_BRAND_PENTIUM_M: + return "Intel Pentium M"; + case CPUID_BRAND_MOBILE_15: + case CPUID_BRAND_MOBILE_17: + return "Mobile Intel"; + } + + return "Unknown Intel"; +} - /* decode family/model/type */ - switch (info_p->cpuid_type) { - case CPUID_TYPE_OVERDRIVE: - strcat(info_p->model_string, "Overdrive "); - break; - case CPUID_TYPE_DUAL: - strcat(info_p->model_string, "Dual "); - break; - } - strcat(info_p->model_string, - cpuid_intel_get_model_name(info_p->cpuid_brand, - info_p->cpuid_family, - info_p->cpuid_model, - info_p->cpuid_signature)); - info_p->model_string[sizeof(info_p->model_string)-1] = '\0'; +static void set_intel_cache_info( i386_cpu_info_t * info_p ) +{ + uint32_t cpuid_result[4]; + uint32_t l1d_cache_linesize = 0; + unsigned int i; + unsigned int j; /* get processor cache descriptor info */ do_cpuid(2, cpuid_result); @@ -326,6 +333,8 @@ set_cpu_intel(i386_cpu_info_t *info_p) info_p->cache_size[descp->type] = descp->size; if (descp->type == L2U) info_p->cache_linesize = descp->linesize; + if (descp->type == L1D) + l1d_cache_linesize = descp->linesize; break; } } @@ -335,20 +344,205 @@ set_cpu_intel(i386_cpu_info_t *info_p) info_p->cache_size[L2U] = 256*1024; info_p->cache_linesize = 32; } + /* If we have no L2 cache, use the L1 data cache line size */ + if (info_p->cache_size[L2U] == 0) + info_p->cache_linesize = l1d_cache_linesize; +} + +static void set_cpu_intel( i386_cpu_info_t * info_p ) +{ + set_cpu_generic(info_p); + set_intel_cache_info(info_p); + info_p->cpuid_model_string = get_intel_model_string(info_p); +} + +static const char * get_amd_model_string( i386_cpu_info_t * info_p ) +{ + switch (info_p->cpuid_family) + { + case CPUID_FAMILY_486: + switch (info_p->cpuid_model) { + case CPUID_MODEL_AM486_DX: + case CPUID_MODEL_AM486_DX2: + case CPUID_MODEL_AM486_DX2WB: + case CPUID_MODEL_AM486_DX4: + case CPUID_MODEL_AM486_DX4WB: + return "Am486"; + case CPUID_MODEL_AM486_5X86: + case CPUID_MODEL_AM486_5X86WB: + return "Am5x86"; + } + break; + case CPUID_FAMILY_586: + switch (info_p->cpuid_model) { + case CPUID_MODEL_K5M0: + case CPUID_MODEL_K5M1: + case CPUID_MODEL_K5M2: + case CPUID_MODEL_K5M3: + return "AMD-K5"; + case CPUID_MODEL_K6M6: + case CPUID_MODEL_K6M7: + return "AMD-K6"; + case CPUID_MODEL_K6_2: + return "AMD-K6-2"; + case CPUID_MODEL_K6_III: + return "AMD-K6-III"; + } + break; + case CPUID_FAMILY_686: + switch (info_p->cpuid_model) { + case CPUID_MODEL_ATHLON_M1: + case CPUID_MODEL_ATHLON_M2: + case CPUID_MODEL_ATHLON_M4: + case CPUID_MODEL_ATHLON_M6: + case CPUID_MODEL_ATHLON_M8: + case CPUID_MODEL_ATHLON_M10: + return "AMD Athlon"; + case CPUID_MODEL_DURON_M3: + case CPUID_MODEL_DURON_M7: + return "AMD Duron"; + default: + return "Unknown AMD Athlon"; + } + case CPUID_FAMILY_EXTENDED: + switch (info_p->cpuid_model) { + case CPUID_MODEL_ATHLON64: + return "AMD Athlon 64"; + case CPUID_MODEL_OPTERON: + return "AMD Opteron"; + default: + return "Unknown AMD-64"; + } + } + return "Unknown AMD"; +} + +static void set_amd_cache_info( i386_cpu_info_t * info_p ) +{ + uint32_t cpuid_result[4]; + + /* It would make sense to fill in info_p->cache_info with complete information + * on the TLBs and data cache associativity, lines, etc, either by mapping + * to the Intel tags (if possible), or replacing cache_info with a generic + * mechanism. But right now, nothing makes use of that information (that I know + * of). + */ + + /* L1 Cache and TLB Information */ + do_cpuid(0x80000005, cpuid_result); + + /* EAX: TLB Information for 2-Mbyte and 4-MByte Pages */ + /* (ignore) */ + + /* EBX: TLB Information for 4-Kbyte Pages */ + /* (ignore) */ + + /* ECX: L1 Data Cache Information */ + info_p->cache_size[L1D] = ((cpuid_result[2] >> 24) & 0xFF) * 1024; + info_p->cache_linesize = (cpuid_result[2] & 0xFF); + + /* EDX: L1 Instruction Cache Information */ + info_p->cache_size[L1I] = ((cpuid_result[3] >> 24) & 0xFF) * 1024; + + /* L2 Cache Information */ + do_cpuid(0x80000006, cpuid_result); + + /* EAX: L2 TLB Information for 2-Mbyte and 4-Mbyte Pages */ + /* (ignore) */ + + /* EBX: L2 TLB Information for 4-Kbyte Pages */ + /* (ignore) */ + + /* ECX: L2 Cache Information */ + info_p->cache_size[L2U] = ((cpuid_result[2] >> 16) & 0xFFFF) * 1024; + if (info_p->cache_size[L2U] > 0) + info_p->cache_linesize = cpuid_result[2] & 0xFF; +} + +static void set_cpu_amd( i386_cpu_info_t * info_p ) +{ + set_cpu_generic(info_p); + set_amd_cache_info(info_p); + info_p->cpuid_model_string = get_amd_model_string(info_p); +} + +static void set_cpu_nsc( i386_cpu_info_t * info_p ) +{ + set_cpu_generic(info_p); + set_amd_cache_info(info_p); + + if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX1) + info_p->cpuid_model_string = "AMD Geode GX1"; + else if (info_p->cpuid_family == CPUID_FAMILY_586 && info_p->cpuid_model == CPUID_MODEL_GX2) + info_p->cpuid_model_string = "AMD Geode GX"; + else + info_p->cpuid_model_string = "Unknown National Semiconductor"; +} + +static void +set_cpu_generic(i386_cpu_info_t *info_p) +{ + uint32_t cpuid_result[4]; + uint32_t max_extid; + char str[128], *p; + + /* get extended cpuid results */ + do_cpuid(0x80000000, cpuid_result); + max_extid = cpuid_result[0]; + + /* check to see if we can get brand string */ + if (max_extid >= 0x80000004) { + /* + * The brand string 48 bytes (max), guaranteed to + * be NUL terminated. + */ + do_cpuid(0x80000002, cpuid_result); + bcopy((char *)cpuid_result, &str[0], 16); + do_cpuid(0x80000003, cpuid_result); + bcopy((char *)cpuid_result, &str[16], 16); + do_cpuid(0x80000004, cpuid_result); + bcopy((char *)cpuid_result, &str[32], 16); + for (p = str; *p != '\0'; p++) { + if (*p != ' ') break; + } + strncpy(info_p->cpuid_brand_string, + p, sizeof(info_p->cpuid_brand_string)-1); + info_p->cpuid_brand_string[sizeof(info_p->cpuid_brand_string)-1] = '\0'; + + if (!strcmp(info_p->cpuid_brand_string, CPUID_STRING_UNKNOWN)) { + /* + * This string means we have a BIOS-programmable brand string, + * and the BIOS couldn't figure out what sort of CPU we have. + */ + info_p->cpuid_brand_string[0] = '\0'; + } + } + + /* get processor signature and decode */ + do_cpuid(1, cpuid_result); + info_p->cpuid_signature = cpuid_result[0]; + info_p->cpuid_stepping = cpuid_result[0] & 0x0f; + info_p->cpuid_model = (cpuid_result[0] >> 4) & 0x0f; + info_p->cpuid_family = (cpuid_result[0] >> 8) & 0x0f; + info_p->cpuid_type = (cpuid_result[0] >> 12) & 0x03; + info_p->cpuid_extmodel = (cpuid_result[0] >> 16) & 0x0f; + info_p->cpuid_extfamily = (cpuid_result[0] >> 20) & 0xff; + info_p->cpuid_brand = cpuid_result[1] & 0xff; + info_p->cpuid_features = cpuid_result[3]; return; } static void -set_cpu_unknown(i386_cpu_info_t *info_p) +set_cpu_unknown(__unused i386_cpu_info_t *info_p) { - strcat(info_p->model_string, "Unknown"); + info_p->cpuid_model_string = "Unknown"; } static struct { uint32_t mask; - char *name; + const char *name; } feature_names[] = { {CPUID_FEATURE_FPU, "FPU",}, {CPUID_FEATURE_VME, "VME",}, @@ -405,8 +599,8 @@ cpuid_get_feature_names(uint32_t feature, char *buf, unsigned buf_len) void cpuid_feature_display( - char *header, - int my_cpu) + const char *header, + __unused int my_cpu) { char buf[256]; @@ -416,13 +610,13 @@ cpuid_feature_display( void cpuid_cpu_display( - char *header, - int my_cpu) + const char *header, + __unused int my_cpu) { + if (cpuid_cpu_info.cpuid_brand_string[0] != '\0') { printf("%s: %s\n", header, - (cpuid_cpu_info.cpuid_brand_string[0] != '\0') ? - cpuid_cpu_info.cpuid_brand_string : - cpuid_cpu_info.model_string); + cpuid_cpu_info.cpuid_brand_string); + } } unsigned int @@ -434,6 +628,22 @@ cpuid_family(void) unsigned int cpuid_features(void) { + static int checked = 0; + char fpu_arg[16] = { 0 }; + if (!checked) { + /* check for boot-time fpu limitations */ + if (PE_parse_boot_arg("_fpu", &fpu_arg[0])) { + printf("limiting fpu features to: %s\n", fpu_arg); + if (!strncmp("387", fpu_arg, sizeof "387") || !strncmp("mmx", fpu_arg, sizeof "mmx")) { + printf("no sse or sse2\n"); + cpuid_cpu_info.cpuid_features &= ~(CPUID_FEATURE_SSE | CPUID_FEATURE_SSE2 | CPUID_FEATURE_FXSR); + } else if (!strncmp("sse", fpu_arg, sizeof "sse")) { + printf("no sse2\n"); + cpuid_cpu_info.cpuid_features &= ~(CPUID_FEATURE_SSE2); + } + } + checked = 1; + } return cpuid_cpu_info.cpuid_features; } diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index d7df13261..cf3512b0a 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,7 +40,14 @@ #define CPUID_VID_UMC "UMC UMC UMC " #define CPUID_VID_AMD "AuthenticAMD" #define CPUID_VID_CYRIX "CyrixInstead" -#define CPUID_VID_NEXTGEN "NexGenDriven" +#define CPUID_VID_NEXGEN "NexGenDriven" +#define CPUID_VID_CENTAUR "CentaurHauls" +#define CPUID_VID_RISE "RiseRiseRise" +#define CPUID_VID_SIS "SiS SiS SiS " +#define CPUID_VID_TRANSMETA "GenuineTMx86" +#define CPUID_VID_NSC "Geode by NSC" + +#define CPUID_STRING_UNKNOWN "Unknown CPU Typ" #define CPUID_FEATURE_FPU 0x00000001 /* Floating point unit on-chip */ #define CPUID_FEATURE_VME 0x00000002 /* Virtual Mode Extension */ @@ -77,13 +84,11 @@ #define CPUID_TYPE_RESERVED 0x3 /* Reserved */ #define CPUID_FAMILY_386 0x3 /* Intel 386 (not part of CPUID) */ -#define CPUID_FAMILY_486 0x4 /* Intel 486 */ -#define CPUID_FAMILY_P5 0x5 /* Intel Pentium */ -#define CPUID_FAMILY_PPRO 0x6 /* Intel Pentium Pro, II, III */ -#define CPUID_FAMILY_PENTIUM4 0xF /* Intel Pentium 4 */ #define CPUID_MODEL_I386_DX 0x0 /* Intel 386 (not part of CPUID) */ +#define CPUID_FAMILY_486 0x4 /* Intel 486 */ + #define CPUID_MODEL_I486_DX 0x0 /* Intel 486DX */ #define CPUID_MODEL_I486_DX_S 0x1 /* Intel 486DX-S */ #define CPUID_MODEL_I486_SX 0x2 /* Intel 486SX */ @@ -102,9 +107,12 @@ #define CPUID_MODEL_AM486_5X86 0xE /* AMD 5x86 */ #define CPUID_MODEL_AM486_5X86WB 0xF /* AMD 5x86WB */ +#define CPUID_MODEL_MEDIAGX 0x4 /* Cyrix MediaGX */ #define CPUID_MODEL_CYRIX5X86 0x9 /* CYRIX 5X86 */ -#define CPUID_MODEL_UMC5SD 0x1 /* UMC U5SD */ +#define CPUID_FAMILY_586 0x5 /* Intel Pentium, AMD K5/K6*/ + +#define CPUID_MODEL_UMC5D 0x1 /* UMC U5D */ #define CPUID_MODEL_UMC5S 0x2 /* UMC U5S */ #define CPUID_MODEL_UMC486_DX2 0x3 /* UMC U486_DX2 */ #define CPUID_MODEL_UMC486_SX2 0x5 /* UMC U486_SX2 */ @@ -114,14 +122,94 @@ #define CPUID_MODEL_P54 0x2 /* Intel P5 75/80/100/120/133/166 */ #define CPUID_MODEL_P24T 0x3 /* Intel P5 Overdrive 63/83 */ +#define CPUID_MODEL_K5M0 0x0 /* AMD-K5 Model 0 */ +#define CPUID_MODEL_K5M1 0x1 /* AMD-K5 Model 1 */ +#define CPUID_MODEL_K5M2 0x2 /* AMD-K5 Model 2 */ +#define CPUID_MODEL_K5M3 0x3 /* AMD-K5 Model 3 */ +#define CPUID_MODEL_K6M6 0x6 /* AMD-K6 Model 6 */ +#define CPUID_MODEL_K6M7 0x7 /* AMD-K6 Model 7 */ +#define CPUID_MODEL_K6_2 0x8 /* AMD-K6-2 Model 8 */ +#define CPUID_MODEL_K6_III 0x9 /* AMD-K6-III Model 9 */ + +#define CPUID_MODEL_CYRIX_M1 0x2 /* Cyrix M1 */ +#define CPUID_MODEL_MEDIAGX_MMX 0x4 /* Cyrix MediaGX MMX Enhanced */ + +#define CPUID_FAMILY_686 0x6 /* Intel Pentium Pro, II, III; AMD Athlon */ + #define CPUID_MODEL_P6 0x1 /* Intel P6 */ #define CPUID_MODEL_PII 0x3 /* Intel PII */ #define CPUID_MODEL_P65 0x5 /* Intel PII/Xeon/Celeron model 5 */ #define CPUID_MODEL_P66 0x6 /* Intel Celeron model 6 */ #define CPUID_MODEL_P67 0x7 /* Intel PIII/Xeon model 7 */ #define CPUID_MODEL_P68 0x8 /* Intel PIII/Xeon/Celeron model 8 */ +#define CPUID_MODEL_PM9 0x9 /* Intel Pentium M model 9 */ #define CPUID_MODEL_P6A 0xA /* Intel PIII Xeon model A */ #define CPUID_MODEL_P6B 0xB /* Intel PIII model B */ +#define CPUID_MODEL_PMD 0xD /* Intel Pentium M model D */ + +#define CPUID_MODEL_ATHLON_M1 0x1 /* AMD Athlon Model 1 */ +#define CPUID_MODEL_ATHLON_M2 0x2 /* AMD Athlon Model 2 */ +#define CPUID_MODEL_DURON_M3 0x3 /* AMD Duron Model 3 */ +#define CPUID_MODEL_ATHLON_M4 0x4 /* AMD Athlon Model 4 */ +#define CPUID_MODEL_ATHLON_M6 0x6 /* (Mobile) AMD Athlon/Duron MP/XP/4 Model 6 */ +#define CPUID_MODEL_DURON_M7 0x7 /* (Mobile) AMD Duron Model 7 */ +#define CPUID_MODEL_ATHLON_M8 0x8 /* (Mobile) Athlon XP/MP/XP-M Model 8 */ +#define CPUID_MODEL_ATHLON_M10 0xA /* (Mobile) AMD Athlon XP/MP/XP-M/XP-M(LV) Model 10 */ + +#define CPUID_MODEL_CYRIX_M2 0x0 /* Cyrix M2 */ +#define CPUID_MODEL_CYRIX_MII 0x2 /* VIA Cyrix MII (6x86MX) */ +#define CPUID_MODEL_VIA_CYRIX_M2 0x5 /* VIA C3 Cyrix M2 */ +#define CPUID_MODEL_WINCHIP_C5A 0x6 /* VIA C3 WinChip C5A */ +#define CPUID_MODEL_WINCHIP_C5BC 0x7 /* VIA C3 WinChip C5B/C5C */ +#define CPUID_MODEL_WINCHIP_C5N 0x8 /* VIA C3 WinChip C5N */ +#define CPUID_MODEL_WINCHIP_C5XLP 0x9 /* VIA C3 WinChip C5P */ + +#define CPUID_MODEL_NX586 0x0 /* NexGen Nx586 */ + +#define CPUID_MODEL_RISE_MP6_0 0x0 /* Rise mP6 */ +#define CPUID_MODEL_RISE_MP6_2 0x2 /* Rise mP6 */ + +#define CPUID_MODEL_SIS_55X 0x0 /* SIS 55x */ + +#define CPUID_MODEL_TM_CRUSOE 0x4 /* Transmeta Crusoe TM3x00 and TM5x00 */ + +#define CPUID_MODEL_CENTAUR_C6 0x4 /* Centaur C6 */ +#define CPUID_MODEL_CENTAUR_C2 0x8 /* Centaur C2 */ +#define CPUID_MODEL_CENTAUR_C3 0x9 /* Centaur C3 */ + +#define CPUID_MODEL_GX1 0x4 /* AMD Geode GX1 */ +#define CPUID_MODEL_GX2 0x5 /* AMD Geode GX */ + +#define CPUID_FAMILY_ITANIUM 0x7 /* Intel Intanium */ +#define CPUID_FAMILY_EXTENDED 0xF /* Intel Pentium 4, Itanium II */ + +#define CPUID_EXTFAMILY_PENTIUM4 0x0 /* Intel Pentium 4 */ +#define CPUID_EXTFAMILY_ITANIUM2 0x1 /* Intel Itanium 2 */ + +#define CPUID_MODEL_ATHLON64 0x4 /* AMD Athlon 64 Model 4 */ +#define CPUID_MODEL_OPTERON 0x5 /* AMD Opteron Model 4 */ + +#define CPUID_BRAND_UNSUPPORTED 0x00 +#define CPUID_BRAND_CELERON_1 0x01 /* Intel Celeron */ +#define CPUID_BRAND_PENTIUM_III_2 0x02 /* Intel Pentium III */ +#define CPUID_BRAND_PIII_XEON 0x03 /* Intel Pentium III Xeon / Celeron */ +#define CPUID_BRAND_PENTIUM_III_4 0x04 /* Intel Pentium III */ +#define CPUID_BRAND_PENTIUM_III_M 0x05 /* Mobile Intel Pentium III-M */ +#define CPUID_BRAND_M_CELERON_7 0x07 /* Mobile Intel Celeron */ +#define CPUID_BRAND_PENTIUM4_8 0x08 /* Intel Pentium 4 */ +#define CPUID_BRAND_PENTIUM4_9 0x09 /* Intel Pentium 4 */ +#define CPUID_BRAND_CELERON_A 0x0A /* Intel Celeron */ +#define CPUID_BRAND_XEON 0x0B /* Intel Xeon (MP) */ +#define CPUID_BRAND_XEON_MP 0x0C /* Intel Xeon MP */ +#define CPUID_BRAND_PENTIUM4_M 0x0E /* Mobile Intel Pentium 4-M / Xeon */ +#define CPUID_BRAND_M_CELERON_F 0x0F /* Mobile Intel Celeron */ +#define CPUID_BRAND_MOBILE_17 0x11 /* Mobile Genuine Intel */ +#define CPUID_BRAND_CELERON_M 0x12 /* Intel Celeron M */ +#define CPUID_BRAND_M_CELERON_13 0x13 /* Mobile Intel Celeron */ +#define CPUID_BRAND_CELERON_14 0x14 /* Intel Celeron */ +#define CPUID_BRAND_MOBILE_15 0x15 /* Mobile Genuine Intel */ +#define CPUID_BRAND_PENTIUM_M 0x16 /* Intel Pentium M */ +#define CPUID_BRAND_M_CELERON_17 0x17 /* Mobile Intel Celeron */ #define CPUID_CACHE_SIZE 16 /* Number of descriptor vales */ @@ -133,7 +221,17 @@ #define CPUID_CACHE_ICACHE_8K 0x06 /* Instruction cache, 8K */ #define CPUID_CACHE_ICACHE_16K 0x08 /* Instruction cache, 16K */ #define CPUID_CACHE_DCACHE_8K 0x0A /* Data cache, 8K */ -#define CPUID_CACHE_DCACHE_16K 0x0C /* Data cache, 16K */ +#define CPUID_CACHE_DCACHE_16K 0x0C /* Data cache, 16K */ +#define CPUID_CACHE_L3CACHE_512K 0x22 /* 3rd-level cache, 512K */ +#define CPUID_CACHE_L3CACHE_1M 0x23 /* 3rd-level cache, 1M */ +#define CPUID_CACHE_L3CACHE_2M 0x25 /* 3rd-level cache, 2M */ +#define CPUID_CACHE_L3CACHE_4M 0x29 /* 3rd-level cache, 4M */ +#define CPUID_CACHE_DCACHE_32K 0x2C /* Data cache, 32K, 8-way */ +#define CPUID_CACHE_ICACHE_32K 0x30 /* Instruction cache, 32K, 8-way */ +#define CPUID_CACHE_UCACHE_128K_S4 0x39 /* 2nd-level cache, 128K, 4-way, sectored */ +#define CPUID_CACHE_UCACHE_128K_S2 0x3B /* 2nd-level cache, 128K, 2-way, sectored */ +#define CPUID_CACHE_UCACHE_256K_S4 0x3C /* 2nd-level cache, 256K, 4-way, sectored */ +#define CPUID_CACHE_NOCACHE 0x40 /* No 2nd level or 3rd-level cache */ #define CPUID_CACHE_UCACHE_128K 0x41 /* 2nd-level cache, 128K */ #define CPUID_CACHE_UCACHE_256K 0x42 /* 2nd-level cache, 256K */ #define CPUID_CACHE_UCACHE_512K 0x43 /* 2nd-level cache, 512K */ @@ -145,20 +243,30 @@ #define CPUID_CACHE_DTLB_64 0x5B /* Data TLB, 64 entries */ #define CPUID_CACHE_DTLB_128 0x5C /* Data TLB, 128 entries */ #define CPUID_CACHE_DTLB_256 0x5D /* Data TLB, 256 entries */ +#define CPUID_CACHE_DCACHE_16K_8 0x60 /* Data cache, 8K, 64 byte line size, 8-way */ #define CPUID_CACHE_DCACHE_8K_64 0x66 /* Data cache, 8K, 64 byte line size */ #define CPUID_CACHE_DCACHE_16K_64 0x67 /* Data cache, 16K, 64 byte line size */ #define CPUID_CACHE_DCACHE_32K_64 0x68 /* Data cache, 32K, 64 byte line size */ #define CPUID_CACHE_TRACE_12K 0x70 /* Trace cache 12K-uop, 8-way */ #define CPUID_CACHE_TRACE_16K 0x71 /* Trace cache 16K-uop, 8-way */ #define CPUID_CACHE_TRACE_32K 0x72 /* Trace cache 32K-uop, 8-way */ +#define CPUID_CACHE_UCACHE_1M_64_4 0x78 /* 2nd-level, 1M, 4-way, 64 bytes */ #define CPUID_CACHE_UCACHE_128K_64 0x79 /* 2nd-level, 128K, 8-way, 64 bytes */ #define CPUID_CACHE_UCACHE_256K_64 0x7A /* 2nd-level, 256K, 8-way, 64 bytes */ #define CPUID_CACHE_UCACHE_512K_64 0x7B /* 2nd-level, 512K, 8-way, 64 bytes */ #define CPUID_CACHE_UCACHE_1M_64 0x7C /* 2nd-level, 1M, 8-way, 64 bytes */ +#define CPUID_CACHE_UCACHE_2M_64 0x7D /* 2nd-level, 2M, 8-way, 64 bytes */ +#define CPUID_CACHE_UCACHE_512K_64_2 0x7F /* 2nd-level, 512K, 2-way, 64 bytes */ #define CPUID_CACHE_UCACHE_256K_32 0x82 /* 2nd-level, 256K, 8-way, 32 bytes */ #define CPUID_CACHE_UCACHE_512K_32 0x83 /* 2nd-level, 512K, 8-way, 32 bytes */ #define CPUID_CACHE_UCACHE_1M_32 0x84 /* 2nd-level, 1M, 8-way, 32 bytes */ #define CPUID_CACHE_UCACHE_2M_32 0x85 /* 2nd-level, 2M, 8-way, 32 bytes */ +#define CPUID_CACHE_UCACHE_512K_64_4 0x86 /* 2nd-level, 512K, 4-way, 64 bytes */ +#define CPUID_CACHE_UCACHE_1M_64_8 0x87 /* 2nd-level, 1M, 8-way, 64 bytes */ +#define CPUID_CACHE_ITLB_128_4 0xB0 /* Instruction TLB, 4-way, 128 entries */ +#define CPUID_CACHE_DTLB_128_4 0xB3 /* Data TLB, 4-way, 128 entries */ +#define CPUID_CACHE_PREFETCH_64 0xF0 /* 64-Byte Prefetching */ +#define CPUID_CACHE_PREFETCH_128 0xF1 /* 128-Byte Prefetching */ #ifndef ASSEMBLER #include @@ -182,14 +290,14 @@ do_cpuid(uint32_t selector, uint32_t *data) * Cache ID descriptor structure. * Note: description string absent in kernel. */ -typedef enum { Lnone, L1I, L1D, L2U, LCACHE_MAX } cache_type_t ; +typedef enum { Lnone, L1I, L1D, L2U, L3U, LCACHE_MAX } cache_type_t ; typedef struct { unsigned char value; /* Descriptor value */ cache_type_t type; /* Cache type */ unsigned int size; /* Cache size */ unsigned int linesize; /* Cache line size */ #ifdef KERNEL - char *description; /* Cache description */ + const char *description; /* Cache description */ #endif /* KERNEL */ } cpuid_cache_desc_t; @@ -205,6 +313,7 @@ typedef struct { typedef struct { char cpuid_vendor[16]; char cpuid_brand_string[48]; + const char *cpuid_model_string; uint32_t cpuid_value; cpu_type_t cpuid_type; @@ -220,29 +329,36 @@ typedef struct { uint32_t cache_size[LCACHE_MAX]; uint32_t cache_linesize; - char model_string[64]; /* sanitized model string */ uint8_t cache_info[64]; /* list of cache descriptors */ } i386_cpu_info_t; +#ifdef __cplusplus +extern "C" { +#endif /* * External declarations */ extern cpu_type_t cpuid_cputype(int); -extern void cpuid_cpu_display(char *, int); -extern void cpuid_features_display(char *, int); +extern void cpuid_cpu_display(const char *, __unused int); +extern void cpuid_feature_display(const char *, __unused int); extern char * cpuid_get_feature_names(uint32_t, char *, unsigned); extern uint32_t cpuid_features(void); extern uint32_t cpuid_family(void); + +extern void cpuid_get_info(i386_cpu_info_t *info_p); +extern i386_cpu_info_t *cpuid_info(void); -extern char * cpuid_intel_get_model_name(uint8_t, uint8_t, - uint8_t, uint32_t); +/* XXX obsolescent: */ +extern uint32_t cpuid_feature; +extern void set_cpu_model(void); -extern i386_cpu_info_t *cpuid_info(void); +#ifdef __cplusplus +} +#endif -extern uint32_t cpuid_feature; /* XXX obsolescent */ #endif /* ASSEMBLER */ #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/i386/cswitch.s b/osfmk/i386/cswitch.s index 4de4aabd1..107bc26af 100644 --- a/osfmk/i386/cswitch.s +++ b/osfmk/i386/cswitch.s @@ -50,15 +50,12 @@ /* */ -#include #include #include #include #include -#if NCPUS > 1 - #ifdef SYMMETRY #include #endif @@ -69,13 +66,6 @@ #define CX(addr, reg) addr(,reg,4) -#else /* NCPUS == 1 */ - -#define CPU_NUMBER(reg) -#define CX(addr,reg) addr - -#endif /* NCPUS == 1 */ - /* * Context switch routines for i386. */ @@ -85,17 +75,15 @@ Entry(Load_context) movl TH_KERNEL_STACK(%ecx),%ecx /* get kernel stack */ lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%edx /* point to stack top */ - CPU_NUMBER(%eax) - movl %ecx,CX(EXT(active_stacks),%eax) /* store stack address */ - movl %edx,CX(EXT(kernel_stack),%eax) /* store stack top */ + movl %ecx,%gs:CPU_ACTIVE_STACK /* store stack address */ + movl %edx,%gs:CPU_KERNEL_STACK /* store stack top */ + + movl %edx,%esp + movl %edx,%ebp - movl KSS_ESP(%ecx),%esp /* switch stacks */ - movl KSS_ESI(%ecx),%esi /* restore registers */ - movl KSS_EDI(%ecx),%edi - movl KSS_EBP(%ecx),%ebp - movl KSS_EBX(%ecx),%ebx xorl %eax,%eax /* return zero (no old thread) */ - jmp *KSS_EIP(%ecx) /* resume thread */ + pushl %eax + call EXT(thread_continue) /* * This really only has to save registers @@ -103,8 +91,7 @@ Entry(Load_context) */ Entry(Switch_context) - CPU_NUMBER(%edx) - movl CX(EXT(active_stacks),%edx),%ecx /* get old kernel stack */ + movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) @@ -114,19 +101,17 @@ Entry(Switch_context) movl %esp,KSS_ESP(%ecx) /* save SP */ movl 0(%esp),%eax /* return old thread */ - movl 8(%esp),%esi /* get new thread */ - movl TH_TOP_ACT(%esi),%ebx /* get new_thread->top_act */ - movl $ CPD_ACTIVE_THREAD,%ecx - movl %ebx,%gs:(%ecx) /* new thread is active */ - movl TH_KERNEL_STACK(%esi),%ecx /* get its kernel stack */ + movl 8(%esp),%ebx /* get new thread */ + movl %ebx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ + movl TH_KERNEL_STACK(%ebx),%ecx /* get its kernel stack */ lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%ebx /* point to stack top */ - movl %ecx,CX(EXT(active_stacks),%edx) /* set current stack */ - movl %ebx,CX(EXT(kernel_stack),%edx) /* set stack top */ + movl %ecx,%gs:CPU_ACTIVE_STACK /* set current stack */ + movl %ebx,%gs:CPU_KERNEL_STACK /* set stack top */ - movl $0,CX(EXT(active_kloaded),%edx) + movl $0,%gs:CPU_ACTIVE_KLOADED movl KSS_ESP(%ecx),%esp /* switch stacks */ movl KSS_ESI(%ecx),%esi /* restore registers */ @@ -140,9 +125,8 @@ Entry(Thread_continue) xorl %ebp,%ebp /* zero frame pointer */ call *%ebx /* call real continuation */ -#if NCPUS > 1 /* - * void switch_to_shutdown_context(thread_t thread, + * void machine_processor_shutdown(thread_t thread, * void (*routine)(processor_t), * processor_t processor) * @@ -154,9 +138,8 @@ Entry(Thread_continue) * Assumes that the thread is a kernel thread (thus * has no FPU state) */ -Entry(switch_to_shutdown_context) - CPU_NUMBER(%edx) - movl EXT(active_stacks)(,%edx,4),%ecx /* get old kernel stack */ +Entry(machine_processor_shutdown) + movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) movl %edi,KSS_EDI(%ecx) @@ -169,18 +152,12 @@ Entry(switch_to_shutdown_context) movl 4(%esp),%ebx /* get routine to run next */ movl 8(%esp),%esi /* get its argument */ - movl CX(EXT(interrupt_stack),%edx),%ecx /* point to its intr stack */ - lea INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */ - - pushl %eax /* push thread */ - call EXT(thread_dispatch) /* reschedule thread */ - addl $4,%esp /* clean stack */ + movl %gs:CPU_INT_STACK_TOP,%esp /* switch to interrupt stack */ pushl %esi /* push argument */ call *%ebx /* call routine to run */ hlt /* (should never return) */ -#endif /* NCPUS > 1 */ .text diff --git a/osfmk/i386/db_interface.c b/osfmk/i386/db_interface.c index bd3a52369..4bb885b1b 100644 --- a/osfmk/i386/db_interface.c +++ b/osfmk/i386/db_interface.c @@ -53,7 +53,6 @@ /* * Interface to new debugger. */ -#include #include #include #include @@ -88,14 +87,13 @@ #include int db_active = 0; -int db_pass_thru[NCPUS]; struct i386_saved_state *i386_last_saved_statep; struct i386_saved_state i386_nested_saved_state; unsigned i386_last_kdb_sp; -vm_offset_t db_stacks[NCPUS]; - -extern thread_act_t db_default_act; +extern thread_t db_default_act; +extern pt_entry_t *DMAP1; +extern caddr_t DADDR1; #if MACH_MP_DEBUG extern int masked_state_cnt[]; @@ -155,8 +153,6 @@ extern void unlock_kdb(void); extern jmp_buf_t *db_recover; -spl_t saved_ipl[NCPUS]; /* just to know what IPL was before trap */ -struct i386_saved_state *saved_state[NCPUS]; /* * Translate the state saved in a task state segment into an @@ -173,11 +169,7 @@ db_tss_to_frame( int mycpu = cpu_number(); struct i386_tss *tss; -#if NCPUS == 1 - tss = &ktss; /* XXX */ -#else /* NCPUS > 1 */ - tss = mp_ktss[mycpu]; /* XXX */ -#endif /* NCPUS > 1 */ + tss = cpu_datap(mycpu)->cpu_desc_index.cdi_ktss; /* XXX */ /* * ddb will overwrite whatever's in esp, so put esp0 elsewhere, too. @@ -267,20 +259,16 @@ kdb_trap( kdbprinttrap(type, code, (int *)®s->eip, regs->uesp); } -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ - saved_ipl[cpu_number()] = s; - saved_state[cpu_number()] = regs; + current_cpu_datap()->cpu_kdb_saved_ipl = s; + current_cpu_datap()->cpu_kdb_saved_state = regs; i386_last_saved_statep = regs; i386_last_kdb_sp = (unsigned) &type; -#if NCPUS > 1 if (!kdb_enter(regs->eip)) goto kdb_exit; -#endif /* NCPUS > 1 */ /* Should switch to kdb's own stack here. */ @@ -332,25 +320,21 @@ kdb_trap( (db_get_task_value(regs->eip, BKPT_SIZE, FALSE, - db_target_space(current_act(), + db_target_space(current_thread(), trap_from_user)) == BKPT_INST)) regs->eip += BKPT_SIZE; -#if NCPUS > 1 kdb_exit: kdb_leave(); -#endif /* NCPUS > 1 */ - saved_state[cpu_number()] = 0; + current_cpu_datap()->cpu_kdb_saved_state = 0; #if MACH_MP_DEBUG - masked_state_cnt[cpu_number()] = 0; + current_cpu_datap()->cpu_masked_state_cnt = 0; #endif /* MACH_MP_DEBUG */ -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ splx(s); @@ -410,16 +394,12 @@ kdb_kentry( regs.fs = int_regs->fs; regs.gs = int_regs->gs; -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ - saved_state[cpu_number()] = ®s; + current_cpu_datap()->cpu_kdb_saved_state = ®s; -#if NCPUS > 1 if (!kdb_enter(regs.eip)) goto kdb_exit; -#endif /* NCPUS > 1 */ bcopy((char *)®s, (char *)&ddb_regs, sizeof (ddb_regs)); trap_from_user = IS_USER_TRAP(&ddb_regs, &etext); @@ -447,15 +427,11 @@ kdb_kentry( int_regs->fs = ddb_regs.fs & 0xffff; int_regs->gs = ddb_regs.gs & 0xffff; -#if NCPUS > 1 kdb_exit: kdb_leave(); -#endif /* NCPUS > 1 */ - saved_state[cpu_number()] = 0; + current_cpu_datap()->cpu_kdb_saved_state = 0; -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ splx(s); } @@ -499,7 +475,21 @@ db_user_to_kernel_address( } return(-1); } - *kaddr = (unsigned)ptetokv(*ptp) + (addr & (INTEL_PGBYTES-1)); + + src = (vm_offset_t)pte_to_pa(*ptp); + *(int *) DMAP1 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; +#if defined(I386_CPU) + if (cpu_class == CPUCLASS_386) { + invltlb(); + } else +#endif + { + invlpg((u_int)DADDR1); + } + + *kaddr = (unsigned)DADDR1 + (addr & PAGE_MASK); + return(0); } @@ -652,9 +642,9 @@ db_check_access( return(TRUE); task = kernel_task; } else if (task == TASK_NULL) { - if (current_act() == THR_ACT_NULL) + if (current_thread() == THREAD_NULL) return(FALSE); - task = current_act()->task; + task = current_thread()->task; } while (size > 0) { if (db_user_to_kernel_address(task, addr, &kern_addr, 0) < 0) @@ -680,9 +670,9 @@ db_phys_eq( if ((addr1 & (INTEL_PGBYTES-1)) != (addr2 & (INTEL_PGBYTES-1))) return(FALSE); if (task1 == TASK_NULL) { - if (current_act() == THR_ACT_NULL) + if (current_thread() == THREAD_NULL) return(FALSE); - task1 = current_act()->task; + task1 = current_thread()->task; } if (db_user_to_kernel_address(task1, addr1, &kern_addr1, 0) < 0 || db_user_to_kernel_address(task2, addr2, &kern_addr2, 0) < 0) @@ -764,21 +754,6 @@ db_task_name( db_printf(" "); } -#if NCPUS == 1 - -void -db_machdep_init(void) -{ - db_stacks[0] = (vm_offset_t)(db_stack_store + - INTSTACK_SIZE - sizeof (natural_t)); - dbtss.esp0 = (int)(db_task_stack_store + - INTSTACK_SIZE - sizeof (natural_t)); - dbtss.esp = dbtss.esp0; - dbtss.eip = (int)&db_task_start; -} - -#else /* NCPUS > 1 */ - /* * Code used to synchronize kdb among all cpus, one active at a time, switch * from on to another using kdb_on! #cpu or cpu #cpu @@ -792,8 +767,6 @@ decl_simple_lock_data(, kdb_lock) /* kdb lock */ int kdb_cpu = -1; /* current cpu running kdb */ int kdb_debug = 0; -int kdb_is_slave[NCPUS]; -int kdb_active[NCPUS]; volatile unsigned int cpus_holding_bkpts; /* counter for number of cpus holding breakpoints (ie: cpus that did not insert back breakpoints) */ @@ -804,8 +777,8 @@ db_machdep_init(void) { int c; - db_simple_lock_init(&kdb_lock, ETAP_MISC_KDB); - for (c = 0; c < NCPUS; ++c) { + db_simple_lock_init(&kdb_lock, 0); + for (c = 0; c < real_ncpus; ++c) { db_stacks[c] = (vm_offset_t) (db_stack_store + (INTSTACK_SIZE * (c + 1)) - sizeof (natural_t)); if (c == master_cpu) { @@ -832,30 +805,28 @@ db_machdep_init(void) int kdb_enter(int pc) { - int my_cpu; + int mycpu; int retval; -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ - my_cpu = cpu_number(); + mycpu = cpu_number(); - if (db_pass_thru[my_cpu]) { + if (current_cpu_datap()->cpu_db_pass_thru) { retval = 0; goto kdb_exit; } - kdb_active[my_cpu]++; + current_cpu_datap()->cpu_kdb_active++; lock_kdb(); if (kdb_debug) db_printf("kdb_enter: cpu %d, is_slave %d, kdb_cpu %d, run mode %d pc %x (%x) holds %d\n", - my_cpu, kdb_is_slave[my_cpu], kdb_cpu, + my_cpu, current_cpu_datap()->cpu_kdb_is_slave, kdb_cpu, db_run_mode, pc, *(int *)pc, cpus_holding_bkpts); if (db_breakpoints_inserted) cpus_holding_bkpts++; - if (kdb_cpu == -1 && !kdb_is_slave[my_cpu]) { + if (kdb_cpu == -1 && !current_cpu_datap()->cpu_kdb_is_slave) { kdb_cpu = my_cpu; remote_kdb(); /* stop other cpus */ retval = 1; @@ -865,9 +836,7 @@ kdb_enter(int pc) retval = 0; kdb_exit: -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ return (retval); } @@ -878,9 +847,7 @@ kdb_leave(void) int my_cpu; boolean_t wait = FALSE; -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ my_cpu = cpu_number(); @@ -890,8 +857,8 @@ kdb_leave(void) } if (db_breakpoints_inserted) cpus_holding_bkpts--; - if (kdb_is_slave[my_cpu]) - kdb_is_slave[my_cpu]--; + if (current_cpu_datap()->cpu_kdb_is_slave) + current_cpu_datap()->cpu_kdb_is_slave--; if (kdb_debug) db_printf("kdb_leave: cpu %d, kdb_cpu %d, run_mode %d pc %x (%x) holds %d\n", my_cpu, kdb_cpu, db_run_mode, @@ -899,11 +866,9 @@ kdb_leave(void) cpus_holding_bkpts); clear_kdb_intr(); unlock_kdb(); - kdb_active[my_cpu]--; + current_cpu_datap()->cpu_kdb_active--; -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ if (wait) { while(cpus_holding_bkpts); @@ -917,9 +882,7 @@ lock_kdb(void) register i; extern void kdb_console(void); -#if NCPUS > 1 disable_preemption(); -#endif /* NCPUS > 1 */ my_cpu = cpu_number(); @@ -935,9 +898,7 @@ lock_kdb(void) } } -#if NCPUS > 1 enable_preemption(); -#endif /* NCPUS > 1 */ } #if TIME_STAMP @@ -1005,7 +966,7 @@ kdb_on( int cpu) { KDB_SAVE_CTXT(); - if (cpu < 0 || cpu >= NCPUS || !kdb_active[cpu]) + if (cpu < 0 || cpu >= real_ncpus || !cpu_datap(cpu)->cpu_kdb_active) return; db_set_breakpoints(); db_set_watchpoints(); @@ -1021,8 +982,6 @@ kdb_on( } } -#endif /* NCPUS > 1 */ - void db_reboot( db_expr_t addr, boolean_t have_addr, diff --git a/osfmk/i386/db_machdep.h b/osfmk/i386/db_machdep.h index 13c990439..147140acd 100644 --- a/osfmk/i386/db_machdep.h +++ b/osfmk/i386/db_machdep.h @@ -166,7 +166,7 @@ extern void db_task_name( /* macro for checking if a thread has used floating-point */ -#define db_act_fp_used(act) (act && act->mact.pcb->ims.ifps) +#define db_act_fp_used(act) (act && act->machine.pcb->ims.ifps) extern void db_tss_to_frame( int tss_sel, diff --git a/osfmk/i386/db_trace.c b/osfmk/i386/db_trace.c index 41349103f..b5ef0bc7d 100644 --- a/osfmk/i386/db_trace.c +++ b/osfmk/i386/db_trace.c @@ -81,22 +81,14 @@ struct i386_kernel_state ddb_null_kregs; extern vm_offset_t vm_min_inks_addr; /* set by db_clone_symtabXXX */ #define INKSERVER(va) (((vm_offset_t)(va)) >= vm_min_inks_addr) -#if NCPUS > 1 extern vm_offset_t interrupt_stack[]; #define ININTSTACK(va) \ (((vm_offset_t)(va)) >= interrupt_stack[cpu_number()] &&\ (((vm_offset_t)(va)) < interrupt_stack[cpu_number()] + \ INTSTACK_SIZE)) -#else /* NCPUS > 1 */ -extern char intstack[]; -#define ININTSTACK(va) \ - (((vm_offset_t)(va)) >= (vm_offset_t)intstack && \ - (((vm_offset_t)(va)) < ((vm_offset_t)&intstack) + \ - INTSTACK_SIZE)) -#endif /* NCPUS > 1 */ #define INKERNELSTACK(va, th) \ - (th == THR_ACT_NULL || \ + (th == THREAD_NULL || \ (((vm_offset_t)(va)) >= th->thread->kernel_stack && \ (((vm_offset_t)(va)) < th->thread->kernel_stack + \ KERNEL_STACK_SIZE)) || \ @@ -151,7 +143,7 @@ extern void db_nextframe( struct i386_frame **fp, db_addr_t *ip, int frame_type, - thread_act_t thr_act); + thread_t thr_act); extern int _setjmp( jmp_buf_t * jb); @@ -202,23 +194,23 @@ db_i386_reg_value( extern char etext; int *dp = 0; db_expr_t null_reg = 0; - register thread_act_t thr_act = ap->thr_act; + register thread_t thr_act = ap->thr_act; extern unsigned int_stack_high; int cpu; if (db_option(ap->modif, 'u')) { - if (thr_act == THR_ACT_NULL) { - if ((thr_act = current_act()) == THR_ACT_NULL) + if (thr_act == THREAD_NULL) { + if ((thr_act = current_thread()) == THREAD_NULL) db_error("no user registers\n"); } - if (thr_act == current_act()) { + if (thr_act == current_thread()) { if (IS_USER_TRAP(&ddb_regs, &etext)) dp = vp->valuep; else if (ddb_regs.ebp < int_stack_high) db_error("cannot get/set user registers in nested interrupt\n"); } } else { - if (thr_act == THR_ACT_NULL || thr_act == current_act()) { + if (thr_act == THREAD_NULL || thr_act == current_thread()) { dp = vp->valuep; } else { if (thr_act->thread && @@ -226,9 +218,9 @@ db_i386_reg_value( thr_act->thread->kernel_stack) { int cpu; - for (cpu = 0; cpu < NCPUS; cpu++) { - if (machine_slot[cpu].running == TRUE && - cpu_data[cpu].active_thread == thr_act->thread && saved_state[cpu]) { + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu_datap(cpu)->cpu_running == TRUE && + cpu_datap(cpu)->cpu_active_thread == thr_act->thread && saved_state[cpu]) { dp = (int *) (((int)saved_state[cpu]) + (((int) vp->valuep) - (int) &ddb_regs)); @@ -255,9 +247,9 @@ db_i386_reg_value( int cpu; if (!db_option(ap->modif, 'u')) { - for (cpu = 0; cpu < NCPUS; cpu++) { - if (machine_slot[cpu].running == TRUE && - cpu_data[cpu].active_thread == thr_act->thread && saved_state[cpu]) { + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu_datap(cpu)->cpu_running == TRUE && + cpu_datap(cpu)->cpu_active_thread == thr_act->thread && saved_state[cpu]) { dp = (int *) (((int)saved_state[cpu]) + (((int) vp->valuep) - (int) &ddb_regs)); @@ -266,9 +258,9 @@ db_i386_reg_value( } } if (dp == 0) { - if (!thr_act || thr_act->mact.pcb == 0) + if (!thr_act || thr_act->machine.pcb == 0) db_error("no pcb\n"); - dp = (int *)((int)(&thr_act->mact.pcb->iss) + + dp = (int *)((int)(&thr_act->machine.pcb->iss) + ((int)vp->valuep - (int)&ddb_regs)); } } @@ -372,7 +364,7 @@ db_nextframe( struct i386_frame **fp, /* in/out */ db_addr_t *ip, /* out */ int frame_type, /* in */ - thread_act_t thr_act) /* in */ + thread_t thr_act) /* in */ { extern char * trap_type[]; extern int TRAP_TYPES; @@ -380,7 +372,7 @@ db_nextframe( struct i386_saved_state *saved_regs; struct interrupt_frame *ifp; struct i386_interrupt_state *isp; - task_t task = (thr_act != THR_ACT_NULL)? thr_act->task: TASK_NULL; + task_t task = (thr_act != THREAD_NULL)? thr_act->task: TASK_NULL; switch(frame_type) { case TRAP: @@ -418,9 +410,9 @@ db_nextframe( db_printf(" <<<<<\n"); break; case SYSCALL: - if (thr_act != THR_ACT_NULL && thr_act->mact.pcb) { - *ip = (db_addr_t) thr_act->mact.pcb->iss.eip; - *fp = (struct i386_frame *) thr_act->mact.pcb->iss.ebp; + if (thr_act != THREAD_NULL && thr_act->machine.pcb) { + *ip = (db_addr_t) thr_act->machine.pcb->iss.eip; + *fp = (struct i386_frame *) thr_act->machine.pcb->iss.ebp; break; } /* falling down for unknown case */ @@ -453,7 +445,7 @@ db_stack_trace_cmd( char *filename; int linenum; task_t task; - thread_act_t th, top_act; + thread_t th, top_act; int user_frame; int frame_count; jmp_buf_t *prev; @@ -487,21 +479,21 @@ db_stack_trace_cmd( addr = (db_expr_t) queue_first(act_list); } else if (trace_thread) { if (have_addr) { - if (!db_check_act_address_valid((thread_act_t)addr)) { + if (!db_check_act_address_valid((thread_t)addr)) { if (db_lookup_task((task_t)addr) == -1) return; act_list = &(((task_t)addr)->thr_acts); addr = (db_expr_t) queue_first(act_list); } else { - act_list = &(((thread_act_t)addr)->task->thr_acts); - thcount = db_lookup_task_act(((thread_act_t)addr)->task, - (thread_act_t)addr); + act_list = &(((thread_t)addr)->task->thr_acts); + thcount = db_lookup_task_act(((thread_t)addr)->task, + (thread_t)addr); } } else { th = db_default_act; - if (th == THR_ACT_NULL) - th = current_act(); - if (th == THR_ACT_NULL) { + if (th == THREAD_NULL) + th = current_thread(); + if (th == THREAD_NULL) { db_printf("no active thr_act\n"); return; } @@ -516,7 +508,7 @@ db_stack_trace_cmd( count = 65535; next_thread: - top_act = THR_ACT_NULL; + top_act = THREAD_NULL; user_frame = 0; frame_count = count; @@ -524,18 +516,18 @@ db_stack_trace_cmd( if (!have_addr && !trace_thread) { frame = (struct i386_frame *)ddb_regs.ebp; callpc = (db_addr_t)ddb_regs.eip; - th = current_act(); - task = (th != THR_ACT_NULL)? th->task: TASK_NULL; + th = current_thread(); + task = (th != THREAD_NULL)? th->task: TASK_NULL; } else if (trace_thread) { if (have_addr) { - th = (thread_act_t) addr; + th = (thread_t) addr; if (!db_check_act_address_valid(th)) return; } else { th = db_default_act; - if (th == THR_ACT_NULL) - th = current_act(); - if (th == THR_ACT_NULL) { + if (th == THREAD_NULL) + th = current_thread(); + if (th == THREAD_NULL) { db_printf("no active thread\n"); return; } @@ -548,17 +540,17 @@ db_stack_trace_cmd( user_frame = 0; task = th->task; - if (th == current_act()) { + if (th == current_thread()) { frame = (struct i386_frame *)ddb_regs.ebp; callpc = (db_addr_t)ddb_regs.eip; } else { - if (th->mact.pcb == 0) { + if (th->machine.pcb == 0) { db_printf("thread has no pcb\n"); return; } if (!th->thread) { register struct i386_saved_state *iss = - &th->mact.pcb->iss; + &th->machine.pcb->iss; db_printf("thread has no shuttle\n"); #if 0 @@ -571,7 +563,7 @@ db_stack_trace_cmd( else if ((th->thread->state & TH_STACK_HANDOFF) || th->thread->kernel_stack == 0) { register struct i386_saved_state *iss = - &th->mact.pcb->iss; + &th->machine.pcb->iss; db_printf("Continuation "); db_task_printsym((db_expr_t)th->thread->continuation, @@ -582,21 +574,21 @@ db_stack_trace_cmd( } else { int cpu; - for (cpu = 0; cpu < NCPUS; cpu++) { - if (machine_slot[cpu].running == TRUE && - cpu_data[cpu].active_thread == th->thread && + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu_datap(cpu)->cpu_running == TRUE && + cpu_datap(cpu)->cpu_active_thread == th->thread && saved_state[cpu]) { break; } } - if (top_act != THR_ACT_NULL) { + if (top_act != THREAD_NULL) { /* * Trying to get the backtrace of an activation * which is not the top_most one in the RPC chain: * use the activation's pcb. */ register struct i386_saved_state *iss = - &th->mact.pcb->iss; + &th->machine.pcb->iss; frame = (struct i386_frame *) (iss->ebp); callpc = (db_addr_t) (iss->eip); } else { @@ -635,8 +627,8 @@ db_stack_trace_cmd( } } else { frame = (struct i386_frame *)addr; - th = (db_default_act)? db_default_act: current_act(); - task = (th != THR_ACT_NULL)? th->task: TASK_NULL; + th = (db_default_act)? db_default_act: current_thread(); + task = (th != THREAD_NULL)? th->task: TASK_NULL; callpc = (db_addr_t)db_get_task_value((int)&frame->f_retaddr, 4, FALSE, @@ -764,11 +756,11 @@ db_stack_trace_cmd( next_frame: lastcallpc = callpc; db_nextframe(&lastframe, &frame, &callpc, frame_type, - (user_frame) ? th : THR_ACT_NULL); + (user_frame) ? th : THREAD_NULL); if (frame == 0) { - if (th->lower != THR_ACT_NULL) { - if (top_act == THR_ACT_NULL) + if (th->lower != THREAD_NULL) { + if (top_act == THREAD_NULL) top_act = th; th = th->lower; db_printf(">>>>> next activation 0x%x ($task%d.%d) <<<<<\n", @@ -803,9 +795,9 @@ db_stack_trace_cmd( thread_done: if (trace_all_threads) { - if (top_act != THR_ACT_NULL) + if (top_act != THREAD_NULL) th = top_act; - th = (thread_act_t) queue_next(&th->thr_acts); + th = (thread_t) queue_next(&th->thr_acts); if (! queue_end(act_list, (queue_entry_t) th)) { db_printf("\n"); addr = (db_expr_t) th; diff --git a/osfmk/i386/endian.h b/osfmk/i386/endian.h index 55de05e4d..730c5c7b6 100644 --- a/osfmk/i386/endian.h +++ b/osfmk/i386/endian.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -40,15 +40,11 @@ /* * Macros for network/external number representation conversion. - */ - -unsigned short ntohs(unsigned short), htons(unsigned short); -unsigned long ntohl(unsigned long), htonl(unsigned long); - -/* * Use GNUC support to inline the byteswappers. */ +#if !defined(ntohs) +unsigned short ntohs(unsigned short); extern __inline__ unsigned short ntohs(unsigned short w_int) @@ -57,9 +53,15 @@ ntohs(unsigned short w_int) __asm__ volatile("xchgb %h1,%b1" : "=q" (w) : "0" (w)); return (w); /* zero-extend for compat */ } +#endif +#if !defined(htons) +unsigned short htons(unsigned short); #define htons ntohs +#endif +#if !defined(ntohl) +unsigned long ntohl(unsigned long); extern __inline__ unsigned long ntohl(register unsigned long value) @@ -68,8 +70,12 @@ ntohl(register unsigned long value) __asm__ volatile("bswap %0" : "=r" (l) : "0" (l)); return l; } +#endif +#if !defined(htonl) +unsigned long htonl(unsigned long); #define htonl ntohl +#endif #define NTOHL(x) (x) = ntohl((unsigned long)x) #define NTOHS(x) (x) = ntohs((unsigned short)x) diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 9366cdcfa..30de466be 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,7 +50,6 @@ /* */ -#include #include #include @@ -58,6 +57,7 @@ #include #include +#include #include #include #include @@ -88,36 +88,20 @@ extern int curr_ipl; int fp_kind = FP_387; /* 80387 present */ zone_t ifps_zone; /* zone for FPU save area */ -#if NCPUS == 1 -volatile thread_act_t fp_act = THR_ACT_NULL; - /* thread whose state is in FPU */ - /* always THR_ACT_NULL if emulating FPU */ -volatile thread_act_t fp_intr_act = THR_ACT_NULL; - - #define clear_fpu() \ { \ set_ts(); \ - fp_act = THR_ACT_NULL; \ } -#else /* NCPUS > 1 */ -#define clear_fpu() \ - { \ - set_ts(); \ - } - -#endif - #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) /* Forward */ extern void fpinit(void); extern void fp_save( - thread_act_t thr_act); + thread_t thr_act); extern void fp_load( - thread_act_t thr_act); + thread_t thr_act); /* * Look for FPU and initialize it. @@ -133,7 +117,7 @@ init_fpu(void) * then trying to read the correct bit patterns from * the control and status registers. */ - set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */ + set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */ fninit(); status = fnstsw(); @@ -188,23 +172,11 @@ fpu_module_init(void) * Called only when thread terminating - no locking necessary. */ void -fp_free(fps) +fpu_free(fps) struct i386_fpsave_state *fps; { ASSERT_IPL(SPL0); -#if NCPUS == 1 - if ((fp_act != THR_ACT_NULL) && (fp_act->mact.pcb->ims.ifps == fps)) { - /* - * Make sure we don't get FPU interrupts later for - * this thread - */ - fwait(); - - /* Mark it free and disable access */ - clear_fpu(); - } -#endif /* NCPUS == 1 */ - zfree(ifps_zone, (vm_offset_t) fps); + zfree(ifps_zone, fps); } /* @@ -218,7 +190,7 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_set_fxstate( - thread_act_t thr_act, + thread_t thr_act, struct i386_float_state *state) { register pcb_t pcb; @@ -234,22 +206,9 @@ ASSERT_IPL(SPL0); return fpu_set_state(thr_act, state); } - assert(thr_act != THR_ACT_NULL); - pcb = thr_act->mact.pcb; + assert(thr_act != THREAD_NULL); + pcb = thr_act->machine.pcb; -#if NCPUS == 1 - - /* - * If this thread`s state is in the FPU, - * discard it; we are replacing the entire - * FPU state. - */ - if (fp_act == thr_act) { - fwait(); /* wait for possible interrupt */ - clear_fpu(); /* no state in FPU */ - } -#endif - if (state->initialized == 0) { /* * new FPU state is 'invalid'. @@ -261,7 +220,7 @@ ASSERT_IPL(SPL0); simple_unlock(&pcb->lock); if (ifps != 0) { - zfree(ifps_zone, (vm_offset_t) ifps); + zfree(ifps_zone, ifps); } } else { @@ -293,7 +252,7 @@ ASSERT_IPL(SPL0); ifps->fp_save_flavor = FP_FXSR; simple_unlock(&pcb->lock); if (new_ifps != 0) - zfree(ifps_zone, (vm_offset_t) ifps); + zfree(ifps_zone, ifps); } return KERN_SUCCESS; @@ -307,18 +266,21 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_get_fxstate( - thread_act_t thr_act, + thread_t thr_act, register struct i386_float_state *state) { register pcb_t pcb; register struct i386_fpsave_state *ifps; ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) + if (fp_kind == FP_NO) { return KERN_FAILURE; + } else if (fp_kind == FP_387) { + return fpu_get_state(thr_act, state); + } - assert(thr_act != THR_ACT_NULL); - pcb = thr_act->mact.pcb; + assert(thr_act != THREAD_NULL); + pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); ifps = pcb->ims.ifps; @@ -333,11 +295,7 @@ ASSERT_IPL(SPL0); /* Make sure we`ve got the latest fp state info */ /* If the live fpu state belongs to our target */ -#if NCPUS == 1 - if (thr_act == fp_act) -#else - if (thr_act == current_act()) -#endif + if (thr_act == current_thread()) { clear_ts(); fp_save(thr_act); @@ -362,7 +320,7 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_set_state( - thread_act_t thr_act, + thread_t thr_act, struct i386_float_state *state) { register pcb_t pcb; @@ -373,21 +331,8 @@ ASSERT_IPL(SPL0); if (fp_kind == FP_NO) return KERN_FAILURE; - assert(thr_act != THR_ACT_NULL); - pcb = thr_act->mact.pcb; - -#if NCPUS == 1 - - /* - * If this thread`s state is in the FPU, - * discard it; we are replacing the entire - * FPU state. - */ - if (fp_act == thr_act) { - fwait(); /* wait for possible interrupt */ - clear_fpu(); /* no state in FPU */ - } -#endif + assert(thr_act != THREAD_NULL); + pcb = thr_act->machine.pcb; if (state->initialized == 0) { /* @@ -400,7 +345,7 @@ ASSERT_IPL(SPL0); simple_unlock(&pcb->lock); if (ifps != 0) { - zfree(ifps_zone, (vm_offset_t) ifps); + zfree(ifps_zone, ifps); } } else { @@ -448,7 +393,7 @@ ASSERT_IPL(SPL0); ifps->fp_save_flavor = FP_387; simple_unlock(&pcb->lock); if (new_ifps != 0) - zfree(ifps_zone, (vm_offset_t) ifps); + zfree(ifps_zone, ifps); } return KERN_SUCCESS; @@ -462,7 +407,7 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_get_state( - thread_act_t thr_act, + thread_t thr_act, register struct i386_float_state *state) { register pcb_t pcb; @@ -472,8 +417,8 @@ ASSERT_IPL(SPL0); if (fp_kind == FP_NO) return KERN_FAILURE; - assert(thr_act != THR_ACT_NULL); - pcb = thr_act->mact.pcb; + assert(thr_act != THREAD_NULL); + pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); ifps = pcb->ims.ifps; @@ -488,11 +433,7 @@ ASSERT_IPL(SPL0); /* Make sure we`ve got the latest fp state info */ /* If the live fpu state belongs to our target */ -#if NCPUS == 1 - if (thr_act == fp_act) -#else - if (thr_act == current_act()) -#endif + if (thr_act == current_thread()) { clear_ts(); fp_save(thr_act); @@ -575,36 +516,11 @@ fpnoextflt(void) */ ASSERT_IPL(SPL0); clear_ts(); -#if NCPUS == 1 - - /* - * If this thread`s state is in the FPU, we are done. - */ - if (fp_act == current_act()) - return; - - /* Make sure we don't do fpsave() in fp_intr while doing fpsave() - * here if the current fpu instruction generates an error. - */ - fwait(); - /* - * If another thread`s state is in the FPU, save it. - */ - if (fp_act != THR_ACT_NULL) { - fp_save(fp_act); - } - - /* - * Give this thread the FPU. - */ - fp_act = current_act(); - -#endif /* NCPUS == 1 */ /* * Load this thread`s state into the FPU. */ - fp_load(current_act()); + fp_load(current_thread()); } /* @@ -615,26 +531,15 @@ ASSERT_IPL(SPL0); void fpextovrflt(void) { - register thread_act_t thr_act = current_act(); + register thread_t thr_act = current_thread(); register pcb_t pcb; register struct i386_fpsave_state *ifps; -#if NCPUS == 1 - - /* - * Is exception for the currently running thread? - */ - if (fp_act != thr_act) { - /* Uh oh... */ - panic("fpextovrflt"); - } -#endif - /* * This is a non-recoverable error. * Invalidate the thread`s FPU state. */ - pcb = thr_act->mact.pcb; + pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); ifps = pcb->ims.ifps; pcb->ims.ifps = 0; @@ -652,7 +557,7 @@ fpextovrflt(void) clear_fpu(); if (ifps) - zfree(ifps_zone, (vm_offset_t) ifps); + zfree(ifps_zone, ifps); /* * Raise exception. @@ -668,43 +573,13 @@ fpextovrflt(void) void fpexterrflt(void) { - register thread_act_t thr_act = current_act(); + register thread_t thr_act = current_thread(); ASSERT_IPL(SPL0); -#if NCPUS == 1 - /* - * Since FPU errors only occur on ESC or WAIT instructions, - * the current thread should own the FPU. If it didn`t, - * we should have gotten the task-switched interrupt first. - */ - if (fp_act != THR_ACT_NULL) { - panic("fpexterrflt"); - return; - } - - /* - * Check if we got a context switch between the interrupt and the AST - * This can happen if the interrupt arrived after the FPU AST was - * checked. In this case, raise the exception in fp_load when this - * thread next time uses the FPU. Remember exception condition in - * fp_valid (extended boolean 2). - */ - if (fp_intr_act != thr_act) { - if (fp_intr_act == THR_ACT_NULL) { - panic("fpexterrflt: fp_intr_act == THR_ACT_NULL"); - return; - } - fp_intr_act->mact.pcb->ims.ifps->fp_valid = 2; - fp_intr_act = THR_ACT_NULL; - return; - } - fp_intr_act = THR_ACT_NULL; -#else /* NCPUS == 1 */ /* * Save the FPU state and turn off the FPU. */ fp_save(thr_act); -#endif /* NCPUS == 1 */ /* * Raise FPU exception. @@ -713,7 +588,7 @@ ASSERT_IPL(SPL0); */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status); + thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); /*NOTREACHED*/ } @@ -727,9 +602,9 @@ ASSERT_IPL(SPL0); */ void fp_save( - thread_act_t thr_act) + thread_t thr_act) { - register pcb_t pcb = thr_act->mact.pcb; + register pcb_t pcb = thr_act->machine.pcb; register struct i386_fpsave_state *ifps = pcb->ims.ifps; if (ifps != 0 && !ifps->fp_valid) { /* registers are in FPU */ @@ -751,9 +626,9 @@ fp_save( void fp_load( - thread_act_t thr_act) + thread_t thr_act) { - register pcb_t pcb = thr_act->mact.pcb; + register pcb_t pcb = thr_act->machine.pcb; register struct i386_fpsave_state *ifps; ASSERT_IPL(SPL0); @@ -782,7 +657,7 @@ ASSERT_IPL(SPL0); */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status); + thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); /*NOTREACHED*/ #endif } else { @@ -802,7 +677,7 @@ ASSERT_IPL(SPL0); void fp_state_alloc(void) { - pcb_t pcb = current_act()->mact.pcb; + pcb_t pcb = current_thread()->machine.pcb; struct i386_fpsave_state *ifps; ifps = (struct i386_fpsave_state *)zalloc(ifps_zone); @@ -825,24 +700,16 @@ fp_state_alloc(void) /* - * fpflush(thread_act_t) + * fpflush(thread_t) * Flush the current act's state, if needed * (used by thread_terminate_self to ensure fp faults * aren't satisfied by overly general trap code in the * context of the reaper thread) */ void -fpflush(thread_act_t thr_act) +fpflush(__unused thread_t thr_act) { -#if NCPUS == 1 - if (fp_act && thr_act == fp_act) { - clear_ts(); - fwait(); - clear_fpu(); - } -#else /* not needed on MP x86s; fp not lazily evaluated */ -#endif } @@ -855,7 +722,7 @@ void fpintr(void) { spl_t s; - thread_act_t thr_act = current_act(); + thread_t thr_act = current_thread(); ASSERT_IPL(SPL1); /* @@ -866,34 +733,6 @@ ASSERT_IPL(SPL1); /* * Save the FPU context to the thread using it. */ -#if NCPUS == 1 - if (fp_act == THR_ACT_NULL) { - printf("fpintr: FPU not belonging to anyone!\n"); - clear_ts(); - fninit(); - clear_fpu(); - return; - } - - if (fp_act != thr_act) { - /* - * FPU exception is for a different thread. - * When that thread again uses the FPU an exception will be - * raised in fp_load. Remember the condition in fp_valid (== 2). - */ - clear_ts(); - fp_save(fp_act); - fp_act->mact.pcb->ims.ifps->fp_valid = 2; - fninit(); - clear_fpu(); - /* leave fp_intr_act THR_ACT_NULL */ - return; - } - if (fp_intr_act != THR_ACT_NULL) - panic("fp_intr: already caught intr"); - fp_intr_act = thr_act; -#endif /* NCPUS == 1 */ - clear_ts(); fp_save(thr_act); fninit(); diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index ae3aa0e88..618e2e3ce 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -59,7 +59,6 @@ * floating-point processor. */ -#include #include #include #include @@ -114,11 +113,10 @@ extern __inline__ unsigned short fnstsw(void) * NOTE: in order to provide backwards compatible support in the kernel. When saving SSE2 state, we also save the * FP state in it's old location. Otherwise fpu_get_state() and fpu_set_state() will stop working */ -#if NCPUS > 1 #define fpu_save_context(thread) \ { \ register struct i386_fpsave_state *ifps; \ - ifps = (thread)->top_act->mact.pcb->ims.ifps; \ + ifps = (thread)->machine.pcb->ims.ifps; \ if (ifps != 0 && !ifps->fp_valid) { \ /* registers are in FPU - save to memory */ \ ifps->fp_valid = TRUE; \ @@ -132,38 +130,31 @@ extern __inline__ unsigned short fnstsw(void) set_ts(); \ } -#else /* NCPUS == 1 */ -#define fpu_save_context(thread) \ - { \ - set_ts(); \ - } - -#endif /* NCPUS == 1 */ extern int fp_kind; extern void init_fpu(void); extern void fpu_module_init(void); -extern void fp_free( +extern void fpu_free( struct i386_fpsave_state * fps); extern kern_return_t fpu_set_state( - thread_act_t thr_act, + thread_t thr_act, struct i386_float_state * st); extern kern_return_t fpu_get_state( - thread_act_t thr_act, + thread_t thr_act, struct i386_float_state * st); -/* extern kern_return_t fpu_set_fxstate( - thread_act_t thr_act, +extern kern_return_t fpu_set_fxstate( + thread_t thr_act, struct i386_float_state * st); extern kern_return_t fpu_get_fxstate( - thread_act_t thr_act, - struct i386_float_state * st); */ + thread_t thr_act, + struct i386_float_state * st); extern void fpnoextflt(void); extern void fpextovrflt(void); extern void fpexterrflt(void); extern void fp_state_alloc(void); extern void fpintr(void); -extern void fpflush(thread_act_t); +extern void fpflush(thread_t); #endif /* _I386_FPU_H_ */ diff --git a/osfmk/i386/gdt.c b/osfmk/i386/gdt.c index 8d9f57b57..df29cf3a9 100644 --- a/osfmk/i386/gdt.c +++ b/osfmk/i386/gdt.c @@ -54,11 +54,11 @@ /* * Global descriptor table. */ -#include -#include +#include #include #include -#include +#include +#include #ifdef MACH_BSD extern int trap_unix_syscall(void), trap_mach25_syscall(void), @@ -67,22 +67,22 @@ extern int trap_unix_syscall(void), trap_mach25_syscall(void), struct fake_descriptor gdt[GDTSZ] = { /* 0x000 */ { 0, 0, 0, 0 }, /* always NULL */ -/* 0x008 */ { LINEAR_KERNEL_ADDRESS + VM_MIN_ADDRESS, - (VM_MAX_KERNEL_ADDRESS-1-VM_MIN_KERNEL_ADDRESS)>>12, +/* 0x008 */ { 0, + 0xfffff, SZ_32|SZ_G, ACC_P|ACC_PL_K|ACC_CODE_R }, /* kernel code */ -/* 0x010 */ { LINEAR_KERNEL_ADDRESS + VM_MIN_ADDRESS, - (VM_MAX_KERNEL_ADDRESS-1-VM_MIN_KERNEL_ADDRESS)>>12, +/* 0x010 */ { 0, + 0xfffff, SZ_32|SZ_G, ACC_P|ACC_PL_K|ACC_DATA_W }, /* kernel data */ -/* 0x018 */ { LINEAR_KERNEL_ADDRESS + (unsigned int)ldt, +/* 0x018 */ { (unsigned int)ldt, LDTSZ*sizeof(struct fake_descriptor)-1, 0, ACC_P|ACC_PL_K|ACC_LDT }, /* local descriptor table */ -/* 0x020 */ { LINEAR_KERNEL_ADDRESS + (unsigned int)&ktss, +/* 0x020 */ { (unsigned int)&ktss, sizeof(struct i386_tss)-1, 0, ACC_P|ACC_PL_K|ACC_TSS @@ -109,13 +109,13 @@ struct fake_descriptor gdt[GDTSZ] = { /* 0x038 */ { 0, 0, 0, 0 }, #endif /* 0x040 */ { 0, 0, 0, 0 }, -/* 0x048 */ { LINEAR_KERNEL_ADDRESS + (unsigned int)&cpu_data[0], - sizeof(cpu_data)-1, +/* 0x048 */ { (unsigned int)&cpu_data_master, + sizeof(cpu_data_t)-1, SZ_32, ACC_P|ACC_PL_K|ACC_DATA_W }, /* per-CPU current thread address */ #if MACH_KDB -/* 0x050 */ { LINEAR_KERNEL_ADDRESS + (unsigned int)&dbtss, +/* 0x050 */ { (unsigned int)&dbtss, sizeof(struct i386_tss)-1, 0, ACC_P|ACC_PL_K|ACC_TSS diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index c29dde6a7..64dd377bc 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -49,7 +49,6 @@ */ #include -#include #include #include #include @@ -61,25 +60,23 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include #include #include +#include +#include #include -#if NCPUS > 1 #include -#endif - -extern void kernel_preempt_check(void); -cpu_data_t cpu_data[NCPUS]; +#include +#include /* * genassym.c is used to produce an @@ -129,44 +126,36 @@ main( #endif /* MACH_LDEBUG */ /* Mutex structure */ - DECLARE("MUTEX_LOCKED", offsetof(mutex_t *, locked)); - DECLARE("MUTEX_WAITERS",offsetof(mutex_t *, waiters)); - DECLARE("MUTEX_PROMOTED_PRI",offsetof(mutex_t *, promoted_pri)); + DECLARE("MUTEX_LOCKED", offsetof(mutex_t *, lck_mtx.lck_mtx_locked)); + DECLARE("MUTEX_WAITERS",offsetof(mutex_t *, lck_mtx.lck_mtx_waiters)); + DECLARE("MUTEX_PROMOTED_PRI",offsetof(mutex_t *, lck_mtx.lck_mtx_pri)); #if MACH_LDEBUG DECLARE("MUTEX_TYPE", offsetof(mutex_t *, type)); DECLARE("MUTEX_PC", offsetof(mutex_t *, pc)); DECLARE("MUTEX_THREAD", offsetof(mutex_t *, thread)); DECLARE("MUTEX_TAG", MUTEX_TAG); #endif /* MACH_LDEBUG */ - -#if MACH_LDEBUG - DECLARE("TH_MUTEX_COUNT", offsetof(thread_t, mutex_count)); -#endif /* MACH_LDEBUG */ + DECLARE("MUTEX_IND", LCK_MTX_TAG_INDIRECT); + DECLARE("MUTEX_ITAG", offsetof(lck_mtx_t *, lck_mtx_tag)); + DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); + DECLARE("TH_RECOVER", offsetof(thread_t, recover)); DECLARE("TH_CONTINUATION", offsetof(thread_t, continuation)); - DECLARE("TH_TOP_ACT", offsetof(thread_t, top_act)); DECLARE("TH_KERNEL_STACK", offsetof(thread_t, kernel_stack)); - DECLARE("TASK_EMUL", offsetof(task_t, eml_dispatch)); DECLARE("TASK_MACH_EXC_PORT", offsetof(task_t, exc_actions[EXC_MACH_SYSCALL].port)); /* These fields are being added on demand */ DECLARE("ACT_MACH_EXC_PORT", - offsetof(thread_act_t, exc_actions[EXC_MACH_SYSCALL].port)); + offsetof(thread_t, exc_actions[EXC_MACH_SYSCALL].port)); - DECLARE("ACT_THREAD", offsetof(thread_act_t, thread)); - DECLARE("ACT_TASK", offsetof(thread_act_t, task)); - DECLARE("ACT_PCB", offsetof(thread_act_t, mact.pcb)); - DECLARE("ACT_LOWER", offsetof(thread_act_t, lower)); - DECLARE("ACT_MAP", offsetof(thread_act_t, map)); + DECLARE("ACT_TASK", offsetof(thread_t, task)); + DECLARE("ACT_PCB", offsetof(thread_t, machine.pcb)); + DECLARE("ACT_MAP", offsetof(thread_t, map)); DECLARE("MAP_PMAP", offsetof(vm_map_t, pmap)); - DECLARE("DISP_MIN", offsetof(eml_dispatch_t, disp_min)); - DECLARE("DISP_COUNT", offsetof(eml_dispatch_t, disp_count)); - DECLARE("DISP_VECTOR", offsetof(eml_dispatch_t, disp_vector[0])); - #define IKS ((size_t) (STACK_IKS(0))) DECLARE("KSS_EBX", IKS + offsetof(struct i386_kernel_state *, k_ebx)); @@ -209,20 +198,30 @@ main( DECLARE("I_EFL", offsetof(struct i386_interrupt_state *, efl)); DECLARE("NBPG", I386_PGBYTES); + DECLARE("PAGE_SIZE", I386_PGBYTES); + DECLARE("PAGE_MASK", I386_PGBYTES-1); + DECLARE("PAGE_SHIFT", 12); + DECLARE("NKPT", NKPT); + DECLARE("KPTDI", KPTDI); DECLARE("VM_MIN_ADDRESS", VM_MIN_ADDRESS); DECLARE("VM_MAX_ADDRESS", VM_MAX_ADDRESS); DECLARE("KERNELBASE", VM_MIN_KERNEL_ADDRESS); DECLARE("LINEAR_KERNELBASE", LINEAR_KERNEL_ADDRESS); DECLARE("KERNEL_STACK_SIZE", KERNEL_STACK_SIZE); + DECLARE("COMM_PAGE_BASE_ADDR", _COMM_PAGE_BASE_ADDRESS); + DECLARE("PDESHIFT", PDESHIFT); - DECLARE("PTESHIFT", PTESHIFT); DECLARE("PTEMASK", PTEMASK); - + DECLARE("PTEINDX", PTEINDX); DECLARE("PTE_PFN", INTEL_PTE_PFN); DECLARE("PTE_V", INTEL_PTE_VALID); DECLARE("PTE_W", INTEL_PTE_WRITE); + DECLARE("PTE_PS", INTEL_PTE_PS); + DECLARE("PTE_U", INTEL_PTE_USER); DECLARE("PTE_INVALID", ~INTEL_PTE_VALID); + DECLARE("CR4_PAE", CR4_PAE); + DECLARE("NPGPTD", NPGPTD); DECLARE("IDTSZ", IDTSZ); DECLARE("GDTSZ", GDTSZ); @@ -237,29 +236,49 @@ main( #if MACH_KDB DECLARE("DEBUG_TSS", DEBUG_TSS); #endif /* MACH_KDB */ - - DECLARE("CPU_DATA", CPU_DATA); - DECLARE("CPD_ACTIVE_THREAD", - offsetof(cpu_data_t *, active_thread)); + DECLARE("CPU_DATA_GS", CPU_DATA_GS); + + DECLARE("CPU_THIS", + offsetof(cpu_data_t *, cpu_this)); + DECLARE("CPU_ACTIVE_THREAD", + offsetof(cpu_data_t *, cpu_active_thread)); + DECLARE("CPU_ACTIVE_KLOADED", + offsetof(cpu_data_t *, cpu_active_kloaded)); + DECLARE("CPU_ACTIVE_STACK", + offsetof(cpu_data_t *, cpu_active_stack)); + DECLARE("CPU_KERNEL_STACK", + offsetof(cpu_data_t *, cpu_kernel_stack)); + DECLARE("CPU_INT_STACK_TOP", + offsetof(cpu_data_t *, cpu_int_stack_top)); #if MACH_RT - DECLARE("CPD_PREEMPTION_LEVEL", - offsetof(cpu_data_t *, preemption_level)); + DECLARE("CPU_PREEMPTION_LEVEL", + offsetof(cpu_data_t *, cpu_preemption_level)); #endif /* MACH_RT */ - DECLARE("CPD_INTERRUPT_LEVEL", - offsetof(cpu_data_t *, interrupt_level)); - DECLARE("CPD_SIMPLE_LOCK_COUNT", - offsetof(cpu_data_t *,simple_lock_count)); - DECLARE("CPD_CPU_NUMBER", + DECLARE("CPU_INTERRUPT_LEVEL", + offsetof(cpu_data_t *, cpu_interrupt_level)); + DECLARE("CPU_SIMPLE_LOCK_COUNT", + offsetof(cpu_data_t *,cpu_simple_lock_count)); + DECLARE("CPU_NUMBER_GS", offsetof(cpu_data_t *,cpu_number)); - DECLARE("CPD_CPU_PHYS_NUMBER", - offsetof(cpu_data_t *,cpu_phys_number)); - DECLARE("CPD_CPU_STATUS", - offsetof(cpu_data_t *,cpu_status)); - DECLARE("CPD_MCOUNT_OFF", - offsetof(cpu_data_t *,mcount_off)); - - DECLARE("PTES_PER_PAGE", NPTES); + DECLARE("CPU_RUNNING", + offsetof(cpu_data_t *,cpu_running)); + DECLARE("CPU_MCOUNT_OFF", + offsetof(cpu_data_t *,cpu_mcount_off)); + DECLARE("CPU_PENDING_AST", + offsetof(cpu_data_t *,cpu_pending_ast)); + DECLARE("CPU_DESC_TABLEP", + offsetof(cpu_data_t *,cpu_desc_tablep)); + DECLARE("CPU_PROCESSOR", + offsetof(cpu_data_t *,cpu_processor)); + DECLARE("CPU_RTC_NANOTIME", + offsetof(cpu_data_t *,cpu_rtc_nanotime)); + DECLARE("INTEL_PTE_KERNEL", INTEL_PTE_VALID|INTEL_PTE_WRITE); + DECLARE("PTDPTDI", PTDPTDI); + DECLARE("PDESHIFT", PDESHIFT); + DECLARE("PDESIZE", PDESIZE); + DECLARE("PTESIZE", PTESIZE); + DECLARE("APTDPTDI", APTDPTDI); DECLARE("KERNELBASEPDE", (LINEAR_KERNEL_ADDRESS >> PDESHIFT) * @@ -283,25 +302,53 @@ main( DECLARE("USL_INTERLOCK", offsetof(usimple_lock_t, interlock)); DECLARE("INTSTACK_SIZE", INTSTACK_SIZE); -#if NCPUS > 1 DECLARE("MP_GDT", offsetof(struct mp_desc_table *, gdt[0])); DECLARE("MP_IDT", offsetof(struct mp_desc_table *, idt[0])); -#endif /* NCPUS > 1 */ -#if !STAT_TIME - DECLARE("LOW_BITS", offsetof(struct timer *, low_bits)); - DECLARE("HIGH_BITS", offsetof(struct timer *, high_bits)); - DECLARE("HIGH_BITS_CHECK", offsetof(struct timer *, high_bits_check)); - DECLARE("TIMER_HIGH_UNIT", TIMER_HIGH_UNIT); - DECLARE("TH_SYS_TIMER", offsetof(struct timer *, system_timer)); - DECLARE("TH_USER_TIMER", offsetof(struct timer *, user_timer)); + DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); + DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); + DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); + DECLARE("KADDR", offsetof(struct KernelBootArgs *, kaddr)); + DECLARE("KSIZE", offsetof(struct KernelBootArgs *, ksize)); + + DECLARE("NANOTIME_BASE_TSC", + offsetof(commpage_nanotime_t*, nt_base_tsc)); + DECLARE("NANOTIME_BASE_NS", + offsetof(commpage_nanotime_t*, nt_base_ns)); + DECLARE("NANOTIME_SCALE", + offsetof(commpage_nanotime_t*, nt_scale)); + DECLARE("NANOTIME_SHIFT", + offsetof(commpage_nanotime_t*, nt_shift)); + DECLARE("NANOTIME_CHECK_TSC", + offsetof(commpage_nanotime_t*, nt_check_tsc)); + + DECLARE("RTN_TSC", + offsetof(rtc_nanotime_t *, rnt_tsc)); + DECLARE("RTN_NANOS", + offsetof(rtc_nanotime_t *, rnt_nanos)); + DECLARE("RTN_SCALE", + offsetof(rtc_nanotime_t *, rnt_scale)); + DECLARE("RTN_SHIFT", + offsetof(rtc_nanotime_t *, rnt_shift)); + + /* values from kern/timer.h */ + DECLARE("TIMER_LOW", + offsetof(struct timer *, low_bits)); + DECLARE("TIMER_HIGH", + offsetof(struct timer *, high_bits)); + DECLARE("TIMER_HIGHCHK", + offsetof(struct timer *, high_bits_check)); +#if !STAT_TIME + DECLARE("TIMER_TSTAMP", + offsetof(struct timer *, tstamp)); + + DECLARE("CURRENT_TIMER", + offsetof(struct processor *, processor_data.current_timer)); #endif + DECLARE("SYSTEM_TIMER", + offsetof(struct thread *, system_timer)); + DECLARE("USER_TIMER", + offsetof(struct thread *, user_timer)); return (0); } - -/* Dummy to keep linker quiet */ -void -kernel_preempt_check(void) -{ -} diff --git a/osfmk/i386/hardclock.c b/osfmk/i386/hardclock.c deleted file mode 100644 index f12cc1bd4..000000000 --- a/osfmk/i386/hardclock.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Clock interrupt. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#if MACH_MP_DEBUG -#include /* for HZ */ -#endif /* MACH_MP_DEBUG */ - -extern char return_to_iret[]; - -#if TIME_STAMP && NCPUS > 1 -extern unsigned time_stamp; -unsigned old_time_stamp, time_stamp_cum, nstamps; - -/* - * If H/W provides a counter, record number of ticks and cumulated - * time stamps to know timestamps rate. - * This should go away when ALARMCLOCKS installed - */ -#define time_stamp_stat() \ - if (my_cpu == 0) \ - if (!old_time_stamp) { \ - old_time_stamp = time_stamp; \ - nstamps = 0; \ - } else { \ - nstamps++; \ - time_stamp_cum = (time_stamp - old_time_stamp); \ - } -#else /* TIME_STAMP && AT386 && NCPUS > 1 */ -#define time_stamp_stat() -#endif /* TIME_STAMP && AT386 && NCPUS > 1 */ - -#if MACH_KPROF -int masked_pc[NCPUS]; -int missed_clock[NCPUS]; -int detect_lost_tick = 0; -#endif /* MACH_KPROF */ - -#if MACH_MP_DEBUG -int masked_state_cnt[NCPUS]; -int masked_state_max = 10*HZ; -#endif /* MACH_MP_DEBUG */ - -/* - * In the interest of a fast clock interrupt service path, - * this routine should be folded into assembly language with - * a direct interrupt vector on the i386. The "pit" interrupt - * should always call the rtclock_intr() routine on the master - * processor. The return value of the rtclock_intr() routine - * indicates whether HZ rate clock processing should be - * performed. (On the Sequent, all slave processors will - * run at HZ rate). For now, we'll leave this routine in C - * (with TIME_STAMP, MACH_MP_DEBUG and MACH_KPROF code this - * routine is way too large for assembler anyway). - */ - -#ifdef PARANOID_KDB -int paranoid_debugger = TRUE; -int paranoid_count = 1000; -int paranoid_current = 0; -int paranoid_cpu = 0; -#endif /* PARANOID_KDB */ - -void -hardclock(struct i386_interrupt_state *regs) /* saved registers */ -{ - int mycpu; - register unsigned pc; - register boolean_t usermode; - - mp_disable_preemption(); - mycpu = cpu_number(); - -#ifdef PARANOID_KDB - if (paranoid_cpu == mycpu && - paranoid_current++ >= paranoid_count) { - paranoid_current = 0; - if (paranoid_debugger) - Debugger("hardclock"); - } -#endif /* PARANOID_KDB */ - -#if MACH_KPROF - /* - * If we were masked against the clock skip call - * to rtclock_intr(). When MACH_KPROF is set, the - * clock frequency of the master-cpu is confined - * to the HZ rate. - */ - if (SPL_CMP_GE((old_ipl & 0xFF), SPL7)) { - usermode = (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0); - pc = (unsigned)regs->eip; - assert(!usermode); - if (missed_clock[mycpu]++ && detect_lost_tick > 1) - Debugger("Mach_KPROF"); - masked_pc[mycpu] = pc; - } else -#endif /* MACH_KPROF */ - /* - * The master processor executes the rtclock_intr() routine - * on every clock tick. The rtclock_intr() routine returns - * a zero value on a HZ tick boundary. - */ - if (mycpu == master_cpu) { - if (rtclock_intr(regs) != 0) { - mp_enable_preemption(); - return; - } - } else { - usermode = (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0); - pc = (unsigned)regs->eip; - hertz_tick(usermode, pc); - } - - /* - * The following code is executed at HZ rate by all processors - * in the system. This implies that the clock rate on slave - * processors must be HZ rate. - */ - - time_stamp_stat(); - -#if NCPUS >1 - /* - * Instead of having the master processor interrupt - * all active processors, each processor in turn interrupts - * the next active one. This avoids all slave processors - * accessing the same R/W data simultaneously. - */ - slave_clock(); -#endif /* NCPUS >1 && AT386 */ - - mp_enable_preemption(); -} - -#if MACH_KPROF -void -delayed_clock(void) -{ - int i; - int my_cpu; - - mp_disable_preemption(); - my_cpu = cpu_number(); - - if (missed_clock[my_cpu] > 1 && detect_lost_tick) - printf("hardclock: missed %d clock interrupt(s) at %x\n", - missed_clock[my_cpu]-1, masked_pc[my_cpu]); - if (my_cpu == master_cpu) { - i = rtclock_intr(); - assert(i == 0); - } - hertz_tick(0, masked_pc[my_cpu]); - missed_clock[my_cpu] = 0; - - mp_enable_preemption(); -} -#endif /* MACH_KPROF */ diff --git a/osfmk/i386/hardclock_entries.h b/osfmk/i386/hardclock_entries.h deleted file mode 100644 index 80001989d..000000000 --- a/osfmk/i386/hardclock_entries.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:36 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:37 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.7.1 1994/09/23 01:54:13 ezf - * change marker to not FREE - * [1994/09/22 21:22:49 ezf] - * - * Revision 1.1.2.3 1993/09/17 21:35:16 robert - * change marker to OSF_FREE_COPYRIGHT - * [1993/09/17 21:28:26 robert] - * - * Revision 1.1.2.2 1993/08/09 19:39:51 dswartz - * Add ANSI prototypes - CR#9523 - * [1993/08/06 17:44:52 dswartz] - * - * $EndLog$ - */ - -extern void hardclock(struct i386_interrupt_state *regs); -extern void delayed_clock(void); diff --git a/osfmk/i386/hw_lock_types.h b/osfmk/i386/hw_lock_types.h index 88c28809c..4bf586e78 100644 --- a/osfmk/i386/hw_lock_types.h +++ b/osfmk/i386/hw_lock_types.h @@ -83,10 +83,11 @@ * dependent optimizations for the locking constructs defined * later in kern/lock.h.. */ -typedef volatile int hw_lock_data_t; -typedef hw_lock_data_t *hw_lock_t; -#define hw_lock_addr(hwl) (&(hwl)) - +struct hslock { + int lock_data; +}; +typedef struct hslock hw_lock_data_t, *hw_lock_t; +#define hw_lock_addr(hwl) (&((hwl).lock_data)) #endif /* _I386_HW_LOCK_TYPES_H_ */ diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index c0343c525..78d4cef00 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -48,11 +48,9 @@ * the rights to redistribute these changes. */ -#include #include #include #include -#include #include @@ -61,14 +59,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -81,14 +77,14 @@ #include #include #include -#include #include +#include +#include #if MACH_KDB #include #endif /* MACH_KDB */ #include #ifdef __MACHO__ -#include #include static KernelBootArgs_t *kernelBootArgs; @@ -100,32 +96,45 @@ vm_offset_t boot_args_start = 0; /* pointer to kernel arguments, set in start.s #include vm_offset_t edata, etext, end; +/* operations only against currently loaded 32 bit mach kernel */ +extern struct segment_command *getsegbyname(const char *); +extern struct section *firstsect(struct segment_command *); +extern struct section *nextsect(struct segment_command *, struct section *); + /* * Called first for a mach-o kernel before paging is set up. * Returns the first available physical address in memory. */ -unsigned long -i386_preinit() +void +i386_preinit(void) { struct segment_command *sgp; struct section *sp; + struct KernelBootArgs *pp; + int i; - sgp = (struct segment_command *) getsegbyname("__DATA"); + sgp = getsegbyname("__DATA"); if (sgp) { - sp = (struct section *) firstsect(sgp); + sp = firstsect(sgp); if (sp) { do { - if (sp->flags & S_ZEROFILL) + if ((sp->flags & S_ZEROFILL)) bzero((char *) sp->addr, sp->size); - } while (sp = (struct section *)nextsect(sgp, sp)); + } while ((sp = nextsect(sgp, sp))); } } - kernelBootArgs = (KernelBootArgs_t *) boot_args_start; - end = round_page( kernelBootArgs->kaddr + kernelBootArgs->ksize ); - - return end; + kernelBootArgs = (KernelBootArgs_t *) + ml_static_ptovirt(boot_args_start); + pp = (struct KernelBootArgs *) kernelBootArgs; + pp->configEnd = (char *) + ml_static_ptovirt((vm_offset_t) pp->configEnd); + for (i = 0; i < pp->numBootDrivers; i++) { + pp->driverConfig[i].address = (unsigned) + ml_static_ptovirt(pp->driverConfig[i].address); + } + return; } #endif @@ -140,30 +149,35 @@ void i386_init(void) { unsigned int maxmem; + unsigned int cpus; + + postcode(I386_INIT_ENTRY); + master_cpu = 0; + cpu_data_alloc(TRUE); cpu_init(); + postcode(CPU_INIT_D); /* * Setup some processor related structures to satisfy funnels. * Must be done before using unparallelized device drivers. */ - processor_ptr[0] = &processor_array[0]; - master_cpu = 0; - master_processor = cpu_to_processor(master_cpu); + processor_bootstrap(); PE_init_platform(FALSE, kernelBootArgs); + postcode(PE_INIT_PLATFORM_D); /* * Set up initial thread so current_thread() works early on */ thread_bootstrap(); + postcode(THREAD_BOOTSTRAP_D); printf_init(); /* Init this in case we need debugger */ panic_init(); /* Init this in case we need debugger */ /* setup debugging output if one has been chosen */ PE_init_kprintf(FALSE); - kprintf("kprintf initialized\n"); /* setup console output */ PE_init_printf(FALSE); @@ -171,7 +185,6 @@ i386_init(void) kprintf("version_variant = %s\n", version_variant); kprintf("version = %s\n", version); - /* * VM initialization, after this we're using page tables... * The maximum number of cpus must be set beforehand. @@ -181,11 +194,10 @@ i386_init(void) else maxmem = maxmem * (1024 * 1024); - if (PE_parse_boot_arg("cpus", &wncpu)) { - if (!((wncpu > 0) && (wncpu < NCPUS))) - wncpu = NCPUS; - } else - wncpu = NCPUS; + if (PE_parse_boot_arg("cpus", &cpus)) { + if ((0 < cpus) && (cpus < max_ncpus)) + max_ncpus = cpus; + } i386_vm_init(maxmem, kernelBootArgs); @@ -193,7 +205,7 @@ i386_init(void) /* create the console for verbose or pretty mode */ PE_create_console(); - + machine_startup(); } diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index adfbbef81..d8d36ebbd 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -29,48 +29,88 @@ * the terms and conditions for use and redistribution. */ -#include #include #include #include #include -#include #include "assym.s" +#define PAUSE rep; nop + /* * When performance isn't the only concern, it's * nice to build stack frames... */ -#define BUILD_STACK_FRAMES ((MACH_LDEBUG || ETAP_LOCK_TRACE) && MACH_KDB) +#define BUILD_STACK_FRAMES (GPROF || \ + ((MACH_LDEBUG || ETAP_LOCK_TRACE) && MACH_KDB)) #if BUILD_STACK_FRAMES -#define L_PC 4(%ebp) -#define L_ARG0 8(%ebp) -#define L_ARG1 12(%ebp) +/* STack-frame-relative: */ +#define L_PC B_PC +#define L_ARG0 B_ARG0 +#define L_ARG1 B_ARG1 + +#define LEAF_ENTRY(name) \ + Entry(name); \ + FRAME; \ + MCOUNT + +#define LEAF_ENTRY2(n1,n2) \ + Entry(n1); \ + Entry(n2); \ + FRAME; \ + MCOUNT + +#define LEAF_RET \ + EMARF; \ + ret -#define SWT_HI -4(%ebp) -#define SWT_LO -8(%ebp) -#define MISSED -12(%ebp) +#else /* BUILD_STACK_FRAMES */ -#else /* BUILD_STACK_FRAMES */ +/* Stack-pointer-relative: */ +#define L_PC S_PC +#define L_ARG0 S_ARG0 +#define L_ARG1 S_ARG1 + +#define LEAF_ENTRY(name) \ + Entry(name) + +#define LEAF_ENTRY2(n1,n2) \ + Entry(n1); \ + Entry(n2) + +#define LEAF_RET \ + ret -#undef FRAME -#undef EMARF -#define FRAME -#define EMARF -#define L_PC (%esp) -#define L_ARG0 4(%esp) -#define L_ARG1 8(%esp) +#endif /* BUILD_STACK_FRAMES */ -#endif /* BUILD_STACK_FRAMES */ + +/* Non-leaf routines always have a stack frame: */ + +#define NONLEAF_ENTRY(name) \ + Entry(name); \ + FRAME; \ + MCOUNT + +#define NONLEAF_ENTRY2(n1,n2) \ + Entry(n1); \ + Entry(n2); \ + FRAME; \ + MCOUNT + +#define NONLEAF_RET \ + EMARF; \ + ret #define M_ILK (%edx) #define M_LOCKED MUTEX_LOCKED(%edx) #define M_WAITERS MUTEX_WAITERS(%edx) #define M_PROMOTED_PRI MUTEX_PROMOTED_PRI(%edx) +#define M_ITAG MUTEX_ITAG(%edx) +#define M_PTR MUTEX_PTR(%edx) #if MACH_LDEBUG #define M_TYPE MUTEX_TYPE(%edx) #define M_PC MUTEX_PC(%edx) @@ -78,12 +118,7 @@ #endif /* MACH_LDEBUG */ #include -#if (NCPUS > 1) #define CX(addr,reg) addr(,reg,4) -#else -#define CPU_NUMBER(reg) -#define CX(addr,reg) addr -#endif /* (NCPUS > 1) */ #if MACH_LDEBUG /* @@ -128,10 +163,9 @@ * (since a mutex lock may context switch, holding a simplelock * is not a good thing). */ -#if 0 /*MACH_RT - 11/12/99 - lion@apple.com disable check for now*/ +#if MACH_RT #define CHECK_PREEMPTION_LEVEL() \ - movl $ CPD_PREEMPTION_LEVEL,%eax ; \ - cmpl $0,%gs:(%eax) ; \ + cmpl $0,%gs:CPU_PREEMPTION_LEVEL ; \ je 1f ; \ pushl $2f ; \ call EXT(panic) ; \ @@ -145,8 +179,7 @@ #endif /* MACH_RT */ #define CHECK_NO_SIMPLELOCKS() \ - movl $ CPD_SIMPLE_LOCK_COUNT,%eax ; \ - cmpl $0,%gs:(%eax) ; \ + cmpl $0,%gs:CPU_SIMPLE_LOCK_COUNT ; \ je 1f ; \ pushl $2f ; \ call EXT(panic) ; \ @@ -160,8 +193,7 @@ * Verifies return to the correct thread in "unlock" situations. */ #define CHECK_THREAD(thd) \ - movl $ CPD_ACTIVE_THREAD,%eax ; \ - movl %gs:(%eax),%ecx ; \ + movl %gs:CPU_ACTIVE_THREAD,%ecx ; \ testl %ecx,%ecx ; \ je 1f ; \ cmpl %ecx,thd ; \ @@ -175,8 +207,7 @@ 1: #define CHECK_MYLOCK(thd) \ - movl $ CPD_ACTIVE_THREAD,%eax ; \ - movl %gs:(%eax),%ecx ; \ + movl %gs:CPU_ACTIVE_THREAD,%ecx ; \ testl %ecx,%ecx ; \ je 1f ; \ cmpl %ecx,thd ; \ @@ -216,13 +247,10 @@ * * Initialize a hardware lock. */ -ENTRY(hw_lock_init) - FRAME +LEAF_ENTRY(hw_lock_init) movl L_ARG0,%edx /* fetch lock pointer */ - xorl %eax,%eax - movl %eax,0(%edx) /* clear the lock */ - EMARF - ret + movl $0,0(%edx) /* clear the lock */ + LEAF_RET /* * void hw_lock_lock(hw_lock_t) @@ -230,27 +258,22 @@ ENTRY(hw_lock_init) * Acquire lock, spinning until it becomes available. * MACH_RT: also return with preemption disabled. */ -ENTRY(hw_lock_lock) - FRAME +LEAF_ENTRY(hw_lock_lock) movl L_ARG0,%edx /* fetch lock pointer */ -1: DISABLE_PREEMPTION(%eax) - movl $1,%ecx - xchgl 0(%edx),%ecx /* try to acquire the HW lock */ - testl %ecx,%ecx /* success? */ + movl L_PC,%ecx +1: DISABLE_PREEMPTION + movl 0(%edx), %eax + testl %eax,%eax /* lock locked? */ + jne 3f /* branch if so */ + lock; cmpxchgl %ecx,0(%edx) /* try to acquire the HW lock */ jne 3f movl $1,%eax /* In case this was a timeout call */ - EMARF /* if yes, then nothing left to do */ - ret - -3: ENABLE_PREEMPTION(%eax) /* no reason we can't be preemptable now */ + LEAF_RET /* if yes, then nothing left to do */ - movl $1,%ecx -2: - rep; nop /* pause for hyper-threading */ - testl %ecx,0(%edx) /* spin checking lock value in cache */ - jne 2b /* non-zero means locked, keep spinning */ - jmp 1b /* zero means unlocked, try to grab it */ +3: ENABLE_PREEMPTION /* no reason we can't be preemptable */ + PAUSE /* pause for hyper-threading */ + jmp 1b /* try again */ /* * unsigned int hw_lock_to(hw_lock_t, unsigned int) @@ -258,22 +281,22 @@ ENTRY(hw_lock_lock) * Acquire lock, spinning until it becomes available or timeout. * MACH_RT: also return with preemption disabled. */ -ENTRY(hw_lock_to) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ +LEAF_ENTRY(hw_lock_to) 1: + movl L_ARG0,%edx /* fetch lock pointer */ + movl L_PC,%ecx /* * Attempt to grab the lock immediately * - fastpath without timeout nonsense. */ - DISABLE_PREEMPTION(%eax) + DISABLE_PREEMPTION + movl 0(%edx), %eax + testl %eax,%eax /* lock locked? */ + jne 2f /* branch if so */ + lock; cmpxchgl %ecx,0(%edx) /* try to acquire the HW lock */ + jne 2f /* branch on failure */ movl $1,%eax - xchgl 0(%edx),%eax /* try to acquire the HW lock */ - testl %eax,%eax /* success? */ - jne 2f /* no */ - movl $1,%eax /* yes, return true */ - EMARF - ret + LEAF_RET 2: #define INNER_LOOP_COUNT 1000 @@ -293,16 +316,16 @@ ENTRY(hw_lock_to) mov %edx,%ecx mov %eax,%ebx /* %ecx:%ebx is the timeout expiry */ 3: - ENABLE_PREEMPTION(%eax) /* no reason not to be preempted now */ + ENABLE_PREEMPTION /* no reason not to be preempted now */ 4: /* * The inner-loop spin to look for the lock being freed. */ - movl $1,%eax mov $(INNER_LOOP_COUNT),%edx 5: - rep; nop /* pause for hyper-threading */ - testl %eax,0(%edi) /* spin checking lock value in cache */ + PAUSE /* pause for hyper-threading */ + movl 0(%edi),%eax /* spin checking lock value in cache */ + testl %eax,%eax je 6f /* zero => unlocked, try to grab it */ decl %edx /* decrement inner loop count */ jnz 5b /* time to check for timeout? */ @@ -314,28 +337,25 @@ ENTRY(hw_lock_to) cmpl %ecx,%edx /* compare high-order 32-bits */ jb 4b /* continue spinning if less, or */ cmpl %ebx,%eax /* compare low-order 32-bits */ - jb 5b /* continue is less, else bail */ + jb 5b /* continue if less, else bail */ xor %eax,%eax /* with 0 return value */ pop %ebx pop %edi - EMARF - ret + LEAF_RET 6: /* * Here to try to grab the lock that now appears to be free * after contention. */ - DISABLE_PREEMPTION(%eax) - movl $1,%eax - xchgl 0(%edi),%eax /* try to acquire the HW lock */ - testl %eax,%eax /* success? */ + movl 8+L_PC,%edx /* calling pc (8+ for pushed regs) */ + DISABLE_PREEMPTION + lock; cmpxchgl %edx,0(%edi) /* try to acquire the HW lock */ jne 3b /* no - spin again */ movl $1,%eax /* yes */ pop %ebx pop %edi - EMARF - ret + LEAF_RET /* * void hw_lock_unlock(hw_lock_t) @@ -343,492 +363,407 @@ ENTRY(hw_lock_to) * Unconditionally release lock. * MACH_RT: release preemption level. */ -ENTRY(hw_lock_unlock) - FRAME +LEAF_ENTRY(hw_lock_unlock) movl L_ARG0,%edx /* fetch lock pointer */ - xorl %eax,%eax - xchgl 0(%edx),%eax /* clear the lock... a mov instruction */ - /* ...might be cheaper and less paranoid */ - ENABLE_PREEMPTION(%eax) - EMARF - ret + movl $0,0(%edx) /* clear the lock */ + ENABLE_PREEMPTION + LEAF_RET /* * unsigned int hw_lock_try(hw_lock_t) * MACH_RT: returns with preemption disabled on success. */ -ENTRY(hw_lock_try) - FRAME +LEAF_ENTRY(hw_lock_try) movl L_ARG0,%edx /* fetch lock pointer */ - DISABLE_PREEMPTION(%eax) - movl $1,%ecx - xchgl 0(%edx),%ecx /* try to acquire the HW lock */ - testl %ecx,%ecx /* success? */ - jne 1f /* if yes, let the caller know */ + movl L_PC,%ecx + DISABLE_PREEMPTION + movl 0(%edx),%eax + testl %eax,%eax + jne 1f + lock; cmpxchgl %ecx,0(%edx) /* try to acquire the HW lock */ + jne 1f movl $1,%eax /* success */ - EMARF - ret + LEAF_RET -1: ENABLE_PREEMPTION(%eax) /* failure: release preemption... */ +1: ENABLE_PREEMPTION /* failure: release preemption... */ xorl %eax,%eax /* ...and return failure */ - EMARF - ret + LEAF_RET /* * unsigned int hw_lock_held(hw_lock_t) * MACH_RT: doesn't change preemption state. * N.B. Racy, of course. */ -ENTRY(hw_lock_held) - FRAME +LEAF_ENTRY(hw_lock_held) movl L_ARG0,%edx /* fetch lock pointer */ + movl 0(%edx),%eax /* check lock value */ + testl %eax,%eax movl $1,%ecx - testl %ecx,0(%edx) /* check lock value */ - jne 1f /* non-zero means locked */ - xorl %eax,%eax /* tell caller: lock wasn't locked */ - EMARF - ret - -1: movl $1,%eax /* tell caller: lock was locked */ - EMARF - ret - + cmovne %ecx,%eax /* 0 => unlocked, 1 => locked */ + LEAF_RET +LEAF_ENTRY(mutex_init) + movl L_ARG0,%edx /* fetch lock pointer */ + xorl %eax,%eax + movl %eax,M_ILK /* clear interlock */ + movl %eax,M_LOCKED /* clear locked flag */ + movw %ax,M_WAITERS /* init waiter count */ + movw %ax,M_PROMOTED_PRI -#if 0 +#if MACH_LDEBUG + movl $ MUTEX_TAG,M_TYPE /* set lock type */ + movl %eax,M_PC /* init caller pc */ + movl %eax,M_THREAD /* and owning thread */ +#endif + LEAF_RET -ENTRY(_usimple_lock_init) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ - xorl %eax,%eax - movl %eax,USL_INTERLOCK(%edx) /* unlock the HW lock */ - EMARF - ret +NONLEAF_ENTRY2(mutex_lock,_mutex_lock) -ENTRY(_simple_lock) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ + movl B_ARG0,%edx /* fetch lock pointer */ - CHECK_SIMPLE_LOCK_TYPE() + CHECK_MUTEX_TYPE() + CHECK_NO_SIMPLELOCKS() + CHECK_PREEMPTION_LEVEL() - DISABLE_PREEMPTION(%eax) + pushf /* save interrupt state */ + cli /* disable interrupts */ -sl_get_hw: - movl $1,%ecx - xchgl USL_INTERLOCK(%edx),%ecx/* try to acquire the HW lock */ - testl %ecx,%ecx /* did we succeed? */ +ml_retry: + movl B_PC,%ecx -#if MACH_LDEBUG - je 5f - CHECK_MYLOCK(S_THREAD) - jmp sl_get_hw -5: -#else /* MACH_LDEBUG */ - jne sl_get_hw /* no, try again */ -#endif /* MACH_LDEBUG */ +ml_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp ml_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne ml_get_hw /* branch on failure to retry */ + + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex locked? */ + jne ml_fail /* yes, we lose */ + movl %gs:CPU_ACTIVE_THREAD,%ecx + movl %ecx,M_LOCKED #if MACH_LDEBUG - movl L_PC,%ecx - movl %ecx,S_PC - movl $ CPD_ACTIVE_THREAD,%eax - movl %gs:(%eax),%ecx - movl %ecx,S_THREAD - incl CX(EXT(simple_lock_count),%eax) -#if 0 - METER_SIMPLE_LOCK_LOCK(%edx) + movl %ecx,M_THREAD + movl B_PC,%ecx + movl %ecx,M_PC #endif -#if NCPUS == 1 - pushf - pushl %edx - cli - call EXT(lock_stack_push) - popl %edx - popfl -#endif /* NCPUS == 1 */ -#endif /* MACH_LDEBUG */ - - EMARF - ret -ENTRY(_simple_lock_try) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_acquire) + addl $4,%esp + popl %edx /* restore mutex address */ - CHECK_SIMPLE_LOCK_TYPE() + xorl %eax,%eax + movl %eax,M_ILK - DISABLE_PREEMPTION(%eax) + popf /* restore interrupt state */ - movl $1,%ecx - xchgl USL_INTERLOCK(%edx),%ecx/* try to acquire the HW lock */ - testl %ecx,%ecx /* did we succeed? */ - jne 1f /* no, return failure */ + NONLEAF_RET -#if MACH_LDEBUG - movl L_PC,%ecx - movl %ecx,S_PC - movl $ CPD_ACTIVE_THREAD,%eax - movl %gs:(%eax),%ecx - movl %ecx,S_THREAD - incl CX(EXT(simple_lock_count),%eax) -#if 0 - METER_SIMPLE_LOCK_LOCK(%edx) -#endif -#if NCPUS == 1 - pushf - pushl %edx - cli - call EXT(lock_stack_push) - popl %edx - popfl -#endif /* NCPUS == 1 */ -#endif /* MACH_LDEBUG */ +ml_fail: +ml_block: + CHECK_MYLOCK(M_THREAD) + pushl M_LOCKED + pushl %edx /* push mutex address */ + call EXT(lck_mtx_lock_wait) /* wait for the lock */ + addl $8,%esp + movl B_ARG0,%edx /* refetch mutex address */ + jmp ml_retry /* and try again */ - movl $1,%eax /* return success */ +NONLEAF_ENTRY2(mutex_try,_mutex_try) - EMARF - ret + movl B_ARG0,%edx /* fetch lock pointer */ -1: - ENABLE_PREEMPTION(%eax) + CHECK_MUTEX_TYPE() + CHECK_NO_SIMPLELOCKS() - xorl %eax,%eax /* and return failure */ + movl B_PC,%ecx - EMARF - ret + pushf /* save interrupt state */ + cli /* disable interrupts */ -ENTRY(_simple_unlock) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ +mt_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp mt_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne mt_get_hw /* branch on failure to retry */ - CHECK_SIMPLE_LOCK_TYPE() - CHECK_THREAD(S_THREAD) + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex locked? */ + jne mt_fail /* yes, we lose */ + movl %gs:CPU_ACTIVE_THREAD,%ecx + movl %ecx,M_LOCKED #if MACH_LDEBUG - xorl %eax,%eax - movl %eax,S_THREAD /* disown thread */ - MP_DISABLE_PREEMPTION(%eax) - CPU_NUMBER(%eax) - decl CX(EXT(simple_lock_count),%eax) - MP_ENABLE_PREEMPTION(%eax) -#if 0 - METER_SIMPLE_LOCK_UNLOCK(%edx) + movl %ecx,M_THREAD + movl B_PC,%ecx + movl %ecx,M_PC #endif -#if NCPUS == 1 - pushf - pushl %edx - cli - call EXT(lock_stack_pop) - popl %edx - popfl -#endif /* NCPUS == 1 */ -#endif /* MACH_LDEBUG */ - xorl %ecx,%ecx - xchgl USL_INTERLOCK(%edx),%ecx /* unlock the HW lock */ + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_acquire) + addl $4,%esp + popl %edx /* restore mutex address */ - ENABLE_PREEMPTION(%eax) + xorl %eax,%eax + movl %eax,M_ILK - EMARF - ret + popf /* restore interrupt state */ -#endif /* 0 */ + movl $1,%eax + NONLEAF_RET -ENTRY(mutex_init) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ +mt_fail: xorl %eax,%eax - movl %eax,M_ILK /* clear interlock */ - movl %eax,M_LOCKED /* clear locked flag */ - movw %ax,M_WAITERS /* init waiter count */ - movw %ax,M_PROMOTED_PRI + movl %eax,M_ILK -#if MACH_LDEBUG - movl $ MUTEX_TAG,M_TYPE /* set lock type */ - movl %eax,M_PC /* init caller pc */ - movl %eax,M_THREAD /* and owning thread */ -#endif -#if ETAP_LOCK_TRACE - movl L_ARG1,%ecx /* fetch event type */ - pushl %ecx /* push event type */ - pushl %edx /* push mutex address */ - call EXT(etap_mutex_init) /* init ETAP data */ - addl $8,%esp -#endif /* ETAP_LOCK_TRACE */ + popf /* restore interrupt state */ - EMARF - ret + xorl %eax,%eax -ENTRY2(mutex_lock,_mutex_lock) - FRAME + NONLEAF_RET -#if ETAP_LOCK_TRACE - subl $12,%esp /* make room for locals */ - movl $0,SWT_HI /* set wait time to zero (HI) */ - movl $0,SWT_LO /* set wait time to zero (LO) */ - movl $0,MISSED /* clear local miss marker */ -#endif /* ETAP_LOCK_TRACE */ - - movl L_ARG0,%edx /* fetch lock pointer */ +NONLEAF_ENTRY(mutex_unlock) + movl B_ARG0,%edx /* fetch lock pointer */ CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() + CHECK_THREAD(M_THREAD) -ml_retry: - DISABLE_PREEMPTION(%eax) + movl B_PC,%ecx -ml_get_hw: - movl $1,%ecx - xchgl %ecx,M_ILK - testl %ecx,%ecx /* did we succeed? */ - jne ml_get_hw /* no, try again */ + pushf /* save interrupt state */ + cli /* disable interrupts */ - movl $1,%ecx - xchgl %ecx,M_LOCKED /* try to set locked flag */ - testl %ecx,%ecx /* is the mutex locked? */ - jne ml_fail /* yes, we lose */ +mu_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp mu_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne mu_get_hw /* branch on failure to retry */ - pushl %edx - call EXT(mutex_lock_acquire) - addl $4,%esp - movl L_ARG0,%edx + cmpw $0,M_WAITERS /* are there any waiters? */ + jne mu_wakeup /* yes, more work to do */ + +mu_doit: #if MACH_LDEBUG - movl L_PC,%ecx - movl %ecx,M_PC - movl $ CPD_ACTIVE_THREAD,%eax - movl %gs:(%eax),%ecx - movl %ecx,M_THREAD - testl %ecx,%ecx - je 3f - incl TH_MUTEX_COUNT(%ecx) -3: + movl $0,M_THREAD /* disown thread */ #endif xorl %ecx,%ecx - xchgl %ecx,M_ILK - - ENABLE_PREEMPTION(%eax) + movl %ecx,M_LOCKED /* unlock the mutex */ -#if ETAP_LOCK_TRACE - movl L_PC,%eax /* fetch pc */ - pushl SWT_LO /* push wait time (low) */ - pushl SWT_HI /* push wait time (high) */ - pushl %eax /* push pc */ - pushl %edx /* push mutex address */ - call EXT(etap_mutex_hold) /* collect hold timestamp */ - addl $16+12,%esp /* clean up stack, adjusting for locals */ -#endif /* ETAP_LOCK_TRACE */ + movl %ecx,M_ILK - EMARF - ret + popf /* restore interrupt state */ -ml_fail: -#if ETAP_LOCK_TRACE - cmp $0,MISSED /* did we already take a wait timestamp? */ - jne ml_block /* yup. carry-on */ - pushl %edx /* push mutex address */ - call EXT(etap_mutex_miss) /* get wait timestamp */ - movl %eax,SWT_HI /* set wait time (high word) */ - movl %edx,SWT_LO /* set wait time (low word) */ - popl %edx /* clean up stack */ - movl $1,MISSED /* mark wait timestamp as taken */ -#endif /* ETAP_LOCK_TRACE */ + NONLEAF_RET -ml_block: - CHECK_MYLOCK(M_THREAD) - xorl %eax,%eax - pushl %eax /* no promotion here yet */ +mu_wakeup: + pushl M_LOCKED pushl %edx /* push mutex address */ - call EXT(mutex_lock_wait) /* wait for the lock */ + call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ addl $8,%esp - movl L_ARG0,%edx /* refetch lock pointer */ - jmp ml_retry /* and try again */ - -ENTRY2(mutex_try,_mutex_try) - FRAME + movl B_ARG0,%edx /* restore lock pointer */ + jmp mu_doit -#if ETAP_LOCK_TRACE - subl $8,%esp /* make room for locals */ - movl $0,SWT_HI /* set wait time to zero (HI) */ - movl $0,SWT_LO /* set wait time to zero (LO) */ -#endif /* ETAP_LOCK_TRACE */ +/* + * lck_mtx_lock() + * lck_mtx_try_lock() + * lck_mutex_unlock() + * + * These are variants of mutex_lock(), mutex_try() and mutex_unlock() without + * DEBUG checks (which require fields not present in lck_mtx_t's). + */ +NONLEAF_ENTRY(lck_mtx_lock) - movl L_ARG0,%edx /* fetch lock pointer */ + movl B_ARG0,%edx /* fetch lock pointer */ + cmpl $(MUTEX_IND),M_ITAG /* is this indirect? */ + cmove M_PTR,%edx /* yes - take indirection */ - CHECK_MUTEX_TYPE() CHECK_NO_SIMPLELOCKS() + CHECK_PREEMPTION_LEVEL() - DISABLE_PREEMPTION(%eax) + pushf /* save interrupt state */ + cli /* disable interrupts */ -mt_get_hw: - movl $1,%ecx - xchgl %ecx,M_ILK - testl %ecx,%ecx - jne mt_get_hw +lml_retry: + movl B_PC,%ecx - movl $1,%ecx - xchgl %ecx,M_LOCKED - testl %ecx,%ecx - jne mt_fail +lml_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp lml_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne lml_get_hw /* branch on failure to retry */ + + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex locked? */ + jne lml_fail /* yes, we lose */ + movl %gs:CPU_ACTIVE_THREAD,%ecx + movl %ecx,M_LOCKED + pushl %edx /* save mutex address */ pushl %edx - call EXT(mutex_lock_acquire) + call EXT(lck_mtx_lock_acquire) addl $4,%esp - movl L_ARG0,%edx + popl %edx /* restore mutex address */ -#if MACH_LDEBUG - movl L_PC,%ecx - movl %ecx,M_PC - movl $ CPD_ACTIVE_THREAD,%ecx - movl %gs:(%ecx),%ecx - movl %ecx,M_THREAD - testl %ecx,%ecx - je 1f - incl TH_MUTEX_COUNT(%ecx) -1: -#endif + xorl %eax,%eax + movl %eax,M_ILK - xorl %ecx,%ecx - xchgl %ecx,M_ILK + popf /* restore interrupt state */ - ENABLE_PREEMPTION(%eax) + NONLEAF_RET -#if ETAP_LOCK_TRACE - movl L_PC,%eax /* fetch pc */ - pushl SWT_LO /* push wait time (low) */ - pushl SWT_HI /* push wait time (high) */ - pushl %eax /* push pc */ +lml_fail: + CHECK_MYLOCK(M_THREAD) + pushl %edx /* save mutex address */ + pushl M_LOCKED pushl %edx /* push mutex address */ - call EXT(etap_mutex_hold) /* get start hold timestamp */ - addl $16,%esp /* clean up stack, adjusting for locals */ -#endif /* ETAP_LOCK_TRACE */ + call EXT(lck_mtx_lock_wait) /* wait for the lock */ + addl $8,%esp + popl %edx /* restore mutex address */ + jmp lml_retry /* and try again */ - movl $1,%eax +NONLEAF_ENTRY(lck_mtx_try_lock) -#if MACH_LDEBUG || ETAP_LOCK_TRACE -#if ETAP_LOCK_TRACE - addl $8,%esp /* pop stack claimed on entry */ -#endif -#endif + movl B_ARG0,%edx /* fetch lock pointer */ + cmpl $(MUTEX_IND),M_ITAG /* is this indirect? */ + cmove M_PTR,%edx /* yes - take indirection */ - EMARF - ret + CHECK_NO_SIMPLELOCKS() + CHECK_PREEMPTION_LEVEL() -mt_fail: - xorl %ecx,%ecx - xchgl %ecx,M_ILK + movl B_PC,%ecx - ENABLE_PREEMPTION(%eax) + pushf /* save interrupt state */ + cli /* disable interrupts */ -#if ETAP_LOCK_TRACE - movl L_PC,%eax /* fetch pc */ - pushl SWT_LO /* push wait time (low) */ - pushl SWT_HI /* push wait time (high) */ - pushl %eax /* push pc */ - pushl %edx /* push mutex address */ - call EXT(etap_mutex_hold) /* get start hold timestamp */ - addl $16,%esp /* clean up stack, adjusting for locals */ -#endif /* ETAP_LOCK_TRACE */ +lmt_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp lmt_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne lmt_get_hw /* branch on failure to retry */ + + movl M_LOCKED,%ecx /* get lock owner */ + testl %ecx,%ecx /* is the mutex locked? */ + jne lmt_fail /* yes, we lose */ + movl %gs:CPU_ACTIVE_THREAD,%ecx + movl %ecx,M_LOCKED + + pushl %edx /* save mutex address */ + pushl %edx + call EXT(lck_mtx_lock_acquire) + addl $4,%esp + popl %edx /* restore mutex address */ xorl %eax,%eax + movl %eax,M_ILK -#if MACH_LDEBUG || ETAP_LOCK_TRACE -#if ETAP_LOCK_TRACE - addl $8,%esp /* pop stack claimed on entry */ -#endif -#endif + popf /* restore interrupt state */ - EMARF - ret + movl $1,%eax /* return success */ + NONLEAF_RET -ENTRY(mutex_unlock) - FRAME - movl L_ARG0,%edx /* fetch lock pointer */ +lmt_fail: + xorl %eax,%eax + movl %eax,M_ILK -#if ETAP_LOCK_TRACE - pushl %edx /* push mutex address */ - call EXT(etap_mutex_unlock) /* collect ETAP data */ - popl %edx /* restore mutex address */ -#endif /* ETAP_LOCK_TRACE */ + popf /* restore interrupt state */ - CHECK_MUTEX_TYPE() - CHECK_THREAD(M_THREAD) + xorl %eax,%eax /* return failure */ + NONLEAF_RET - DISABLE_PREEMPTION(%eax) +NONLEAF_ENTRY(lck_mtx_unlock) -mu_get_hw: - movl $1,%ecx - xchgl %ecx,M_ILK - testl %ecx,%ecx /* did we succeed? */ - jne mu_get_hw /* no, try again */ + movl B_ARG0,%edx /* fetch lock pointer */ + cmpl $(MUTEX_IND),M_ITAG /* is this indirect? */ + cmove M_PTR,%edx /* yes - take indirection */ - cmpw $0,M_WAITERS /* are there any waiters? */ - jne mu_wakeup /* yes, more work to do */ + movl B_PC,%ecx -mu_doit: -#if MACH_LDEBUG - xorl %eax,%eax - movl %eax,M_THREAD /* disown thread */ - movl $ CPD_ACTIVE_THREAD,%eax - movl %gs:(%eax),%ecx - testl %ecx,%ecx - je 0f - decl TH_MUTEX_COUNT(%ecx) -0: -#endif + pushf /* save interrupt state */ + cli /* disable interrupts */ - xorl %ecx,%ecx - xchgl %ecx,M_LOCKED /* unlock the mutex */ +lmu_get_hw: + movl M_ILK,%eax /* read interlock */ + testl %eax,%eax /* unlocked? */ + je 1f /* yes - attempt to lock it */ + PAUSE /* no - pause */ + jmp lmu_get_hw /* try again */ +1: + lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ + jne lmu_get_hw /* branch on failure to retry */ + cmpw $0,M_WAITERS /* are there any waiters? */ + jne lmu_wakeup /* yes, more work to do */ + +lmu_doit: xorl %ecx,%ecx - xchgl %ecx,M_ILK + movl %ecx,M_LOCKED /* unlock the mutex */ - ENABLE_PREEMPTION(%eax) + movl %ecx,M_ILK - EMARF - ret + popf /* restore interrupt state */ -mu_wakeup: - xorl %eax,%eax - pushl %eax /* no promotion here yet */ + NONLEAF_RET + +lmu_wakeup: + pushl %edx /* save mutex address */ + pushl M_LOCKED pushl %edx /* push mutex address */ - call EXT(mutex_unlock_wakeup)/* yes, wake a thread */ + call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ addl $8,%esp - movl L_ARG0,%edx /* refetch lock pointer */ - jmp mu_doit - -ENTRY(interlock_unlock) - FRAME - movl L_ARG0,%edx - - xorl %ecx,%ecx - xchgl %ecx,M_ILK + popl %edx /* restore mutex pointer */ + jmp lmu_doit - ENABLE_PREEMPTION(%eax) +LEAF_ENTRY(lck_mtx_ilk_unlock) + movl L_ARG0,%edx /* no indirection here */ - EMARF - ret + xorl %eax,%eax + movl %eax,M_ILK + LEAF_RET -ENTRY(_disable_preemption) +LEAF_ENTRY(_disable_preemption) #if MACH_RT - _DISABLE_PREEMPTION(%eax) + _DISABLE_PREEMPTION #endif /* MACH_RT */ - ret + LEAF_RET -ENTRY(_enable_preemption) +LEAF_ENTRY(_enable_preemption) #if MACH_RT #if MACH_ASSERT - movl $ CPD_PREEMPTION_LEVEL,%eax - cmpl $0,%gs:(%eax) + cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f - pushl %gs:(%eax) + pushl %gs:CPU_PREEMPTION_LEVEL pushl $2f call EXT(panic) hlt @@ -837,15 +772,14 @@ ENTRY(_enable_preemption) .text 1: #endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION(%eax) + _ENABLE_PREEMPTION #endif /* MACH_RT */ - ret + LEAF_RET -ENTRY(_enable_preemption_no_check) +LEAF_ENTRY(_enable_preemption_no_check) #if MACH_RT #if MACH_ASSERT - movl $ CPD_PREEMPTION_LEVEL,%eax - cmpl $0,%gs:(%eax) + cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f pushl $2f call EXT(panic) @@ -855,24 +789,23 @@ ENTRY(_enable_preemption_no_check) .text 1: #endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION_NO_CHECK(%eax) + _ENABLE_PREEMPTION_NO_CHECK #endif /* MACH_RT */ - ret + LEAF_RET -ENTRY(_mp_disable_preemption) -#if MACH_RT && NCPUS > 1 - _DISABLE_PREEMPTION(%eax) -#endif /* MACH_RT && NCPUS > 1*/ - ret +LEAF_ENTRY(_mp_disable_preemption) +#if MACH_RT + _DISABLE_PREEMPTION +#endif /* MACH_RT */ + LEAF_RET -ENTRY(_mp_enable_preemption) -#if MACH_RT && NCPUS > 1 +LEAF_ENTRY(_mp_enable_preemption) +#if MACH_RT #if MACH_ASSERT - movl $ CPD_PREEMPTION_LEVEL,%eax - cmpl $0,%gs:(%eax) + cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f - pushl %gs:(%eax) + pushl %gs:CPU_PREEMPTION_LEVEL pushl $2f call EXT(panic) hlt @@ -881,15 +814,14 @@ ENTRY(_mp_enable_preemption) .text 1: #endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION(%eax) -#endif /* MACH_RT && NCPUS > 1 */ - ret + _ENABLE_PREEMPTION +#endif /* MACH_RT */ + LEAF_RET -ENTRY(_mp_enable_preemption_no_check) -#if MACH_RT && NCPUS > 1 +LEAF_ENTRY(_mp_enable_preemption_no_check) +#if MACH_RT #if MACH_ASSERT - movl $ CPD_PREEMPTION_LEVEL,%eax - cmpl $0,%gs:(%eax) + cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f pushl $2f call EXT(panic) @@ -899,48 +831,48 @@ ENTRY(_mp_enable_preemption_no_check) .text 1: #endif /* MACH_ASSERT */ - _ENABLE_PREEMPTION_NO_CHECK(%eax) -#endif /* MACH_RT && NCPUS > 1 */ - ret + _ENABLE_PREEMPTION_NO_CHECK +#endif /* MACH_RT */ + LEAF_RET -ENTRY(i_bit_set) - movl S_ARG0,%edx - movl S_ARG1,%eax +LEAF_ENTRY(i_bit_set) + movl L_ARG0,%edx + movl L_ARG1,%eax lock bts %dl,(%eax) - ret + LEAF_RET -ENTRY(i_bit_clear) - movl S_ARG0,%edx - movl S_ARG1,%eax +LEAF_ENTRY(i_bit_clear) + movl L_ARG0,%edx + movl L_ARG1,%eax lock btr %dl,(%eax) - ret + LEAF_RET -ENTRY(bit_lock) - movl S_ARG0,%ecx - movl S_ARG1,%eax +LEAF_ENTRY(bit_lock) + movl L_ARG0,%ecx + movl L_ARG1,%eax 1: lock bts %ecx,(%eax) jb 1b - ret + LEAF_RET -ENTRY(bit_lock_try) - movl S_ARG0,%ecx - movl S_ARG1,%eax +LEAF_ENTRY(bit_lock_try) + movl L_ARG0,%ecx + movl L_ARG1,%eax lock bts %ecx,(%eax) jb bit_lock_failed - ret /* %eax better not be null ! */ + LEAF_RET /* %eax better not be null ! */ bit_lock_failed: xorl %eax,%eax - ret + LEAF_RET -ENTRY(bit_unlock) - movl S_ARG0,%ecx - movl S_ARG1,%eax +LEAF_ENTRY(bit_unlock) + movl L_ARG0,%ecx + movl L_ARG1,%eax lock btr %ecx,(%eax) - ret + LEAF_RET diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index 4af68483c..d66ffd4ef 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -48,11 +48,9 @@ * the rights to redistribute these changes. */ -#include #include #include #include -#include #include @@ -61,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -76,28 +73,23 @@ #include #include #include +#include #ifdef __MACHO__ -#include #include #endif vm_size_t mem_size = 0; -vm_offset_t first_addr = 0; /* set by start.s - keep out of bss */ vm_offset_t first_avail = 0;/* first after page tables */ vm_offset_t last_addr; uint64_t max_mem; -uint64_t sane_size; +uint64_t sane_size = 0; /* we are going to use the booter memory + table info to construct this */ -vm_offset_t avail_start, avail_end; +pmap_paddr_t avail_start, avail_end; vm_offset_t virtual_avail, virtual_end; -vm_offset_t hole_start, hole_end; -vm_offset_t avail_next; -unsigned int avail_remaining; - -/* parameters passed from bootstrap loader */ -int cnvmem = 0; /* must be in .data section */ -int extmem = 0; +pmap_paddr_t avail_remaining; +vm_offset_t static_memory_end = 0; #ifndef __MACHO__ extern char edata, end; @@ -107,13 +99,19 @@ extern char edata, end; #include vm_offset_t edata, etext, end; +/* + * _mh_execute_header is the mach_header for the currently executing + * 32 bit kernel + */ extern struct mach_header _mh_execute_header; void *sectTEXTB; int sectSizeTEXT; void *sectDATAB; int sectSizeDATA; void *sectOBJCB; int sectSizeOBJC; void *sectLINKB; int sectSizeLINK; void *sectPRELINKB; int sectSizePRELINK; +void *sectHIBB; int sectSizeHIB; +extern void *getsegdatafromheader(struct mach_header *, const char *, int *); #endif /* @@ -122,8 +120,11 @@ void *sectPRELINKB; int sectSizePRELINK; void i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) { - int i,j; /* Standard index vars. */ - vm_size_t bios_hole_size; + pmap_memory_region_t *pmptr; + MemoryRange *mptr; + ppnum_t fap; + unsigned int i; + ppnum_t maxpg = (maxmem >> I386_PGSHIFT); #ifdef __MACHO__ /* Now retrieve addresses for end, edata, and etext @@ -138,6 +139,8 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) &_mh_execute_header, "__OBJC", §SizeOBJC); sectLINKB = (void *) getsegdatafromheader( &_mh_execute_header, "__LINKEDIT", §SizeLINK); + sectHIBB = (void *)getsegdatafromheader( + &_mh_execute_header, "__HIB", §SizeHIB); sectPRELINKB = (void *) getsegdatafromheader( &_mh_execute_header, "__PRELINK", §SizePRELINK); @@ -152,10 +155,6 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) bzero((char *)&edata,(unsigned)(&end - &edata)); #endif - /* Now copy over various boot args bits.. */ - cnvmem = args->convmem; - extmem = args->extmem; - /* * Initialize the pic prior to any possible call to an spl. */ @@ -163,75 +162,211 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) set_cpu_model(); vm_set_page_size(); - /* - * Initialize the Event Trace Analysis Package - * Static Phase: 1 of 2 - */ - etap_init_phase1(); - /* * Compute the memory size. */ -#if NCPUS > 1 - /* First two pages are used to boot the other cpus. */ - /* TODO - reclaim pages after all cpus have booted */ - - first_addr = MP_FIRST_ADDR; + avail_remaining = 0; + avail_end = 0; + pmptr = pmap_memory_regions; + pmap_memory_region_count = pmap_memory_region_current = 0; + fap = (ppnum_t) i386_btop(first_avail); + mptr = args->memoryMap; + +#ifdef PAE +#define FOURGIG 0x0000000100000000ULL + for (i=0; i < args->memoryMapCount; i++,mptr++) { + ppnum_t base, top; + + base = (ppnum_t) (mptr->base >> I386_PGSHIFT); + top = (ppnum_t) ((mptr->base + mptr->length) >> I386_PGSHIFT) - 1; + + if (maxmem) { + if (base >= maxpg) break; + top = (top > maxpg)? maxpg : top; + } + + if (kMemoryRangeUsable != mptr->type) continue; + sane_size += (uint64_t)(mptr->length); +#ifdef DEVICES_HANDLE_64BIT_IO /* XXX enable else clause when I/O to high memory works */ + if (top < fap) { + /* entire range below first_avail */ + continue; + } else if (mptr->base >= FOURGIG) { + /* entire range above 4GB (pre PAE) */ + continue; + } else if ( (base < fap) && + (top > fap)) { + /* spans first_avail */ + /* put mem below first avail in table but + mark already allocated */ + pmptr->base = base; + pmptr->alloc = pmptr->end = (fap - 1); + pmptr->type = mptr->type; + /* we bump these here inline so the accounting below works + correctly */ + pmptr++; + pmap_memory_region_count++; + pmptr->alloc = pmptr->base = fap; + pmptr->type = mptr->type; + pmptr->end = top; + } else if ( (mptr->base < FOURGIG) && + ((mptr->base+mptr->length) > FOURGIG) ) { + /* spans across 4GB (pre PAE) */ + pmptr->alloc = pmptr->base = base; + pmptr->type = mptr->type; + pmptr->end = (FOURGIG >> I386_PGSHIFT) - 1; + } else { + /* entire range useable */ + pmptr->alloc = pmptr->base = base; + pmptr->type = mptr->type; + pmptr->end = top; + } #else - first_addr = 0x1000; + if (top < fap) { + /* entire range below first_avail */ + continue; + } else if ( (base < fap) && + (top > fap)) { + /* spans first_avail */ + pmptr->alloc = pmptr->base = fap; + pmptr->type = mptr->type; + pmptr->end = top; + } else { + /* entire range useable */ + pmptr->alloc = pmptr->base = base; + pmptr->type = mptr->type; + pmptr->end = top; + } +#endif + if (i386_ptob(pmptr->end) > avail_end ) { + avail_end = i386_ptob(pmptr->end); + } + avail_remaining += (pmptr->end - pmptr->base); + pmap_memory_region_count++; + pmptr++; + } +#else /* non PAE follows */ +#define FOURGIG 0x0000000100000000ULL + for (i=0; i < args->memoryMapCount; i++,mptr++) { + ppnum_t base, top; + + base = (ppnum_t) (mptr->base >> I386_PGSHIFT); + top = (ppnum_t) ((mptr->base + mptr->length) >> I386_PGSHIFT) - 1; + + if (maxmem) { + if (base >= maxpg) break; + top = (top > maxpg)? maxpg : top; + } + + if (kMemoryRangeUsable != mptr->type) continue; + + // save other regions + if (kMemoryRangeNVS == mptr->type) { + pmptr->base = base; + pmptr->end = ((mptr->base + mptr->length + I386_PGBYTES - 1) >> I386_PGSHIFT) - 1; + pmptr->alloc = pmptr->end; + pmptr->type = mptr->type; + kprintf("NVS region: 0x%x ->0x%x\n", pmptr->base, pmptr->end); + } else if (kMemoryRangeUsable != mptr->type) { + continue; + } else { + // Usable memory region + sane_size += (uint64_t)(mptr->length); + if (top < fap) { + /* entire range below first_avail */ + /* salvage some low memory pages */ + /* we use some very low memory at startup */ + /* mark as already allocated here */ + pmptr->base = 0x18; /* PAE and HIB use below this */ + pmptr->alloc = pmptr->end = top; /* mark as already mapped */ + pmptr->type = mptr->type; + } else if (mptr->base >= FOURGIG) { + /* entire range above 4GB (pre PAE) */ + continue; + } else if ( (base < fap) && + (top > fap)) { + /* spans first_avail */ + /* put mem below first avail in table but + mark already allocated */ + pmptr->base = base; + pmptr->alloc = pmptr->end = (fap - 1); + pmptr->type = mptr->type; + /* we bump these here inline so the accounting below works + correctly */ + pmptr++; + pmap_memory_region_count++; + pmptr->alloc = pmptr->base = fap; + pmptr->type = mptr->type; + pmptr->end = top; + } else if ( (mptr->base < FOURGIG) && + ((mptr->base+mptr->length) > FOURGIG) ) { + /* spans across 4GB (pre PAE) */ + pmptr->alloc = pmptr->base = base; + pmptr->type = mptr->type; + pmptr->end = (FOURGIG >> I386_PGSHIFT) - 1; + } else { + /* entire range useable */ + pmptr->alloc = pmptr->base = base; + pmptr->type = mptr->type; + pmptr->end = top; + } + + if (i386_ptob(pmptr->end) > avail_end ) { + avail_end = i386_ptob(pmptr->end); + } + + avail_remaining += (pmptr->end - pmptr->base); + pmap_memory_region_count++; + pmptr++; + } + } #endif - /* BIOS leaves data in low memory */ - last_addr = 1024*1024 + extmem*1024; - - /* extended memory starts at 1MB */ - - bios_hole_size = 1024*1024 - trunc_page((vm_offset_t)(1024 * cnvmem)); - - /* - * Initialize for pmap_free_pages and pmap_next_page. - * These guys should be page-aligned. - */ - - hole_start = trunc_page((vm_offset_t)(1024 * cnvmem)); - hole_end = round_page((vm_offset_t)first_avail); - - /* - * compute mem_size - */ +#ifdef PRINT_PMAP_MEMORY_TABLE + { + unsigned int j; + pmap_memory_region_t *p = pmap_memory_regions; + for (j=0;jbase, p->alloc, p->end); + } + } +#endif - /* - * We're currently limited to 512 MB max physical memory. - */ -#define M (1024*1024) -#define MAXMEM (512*M) - if ((maxmem == 0) && (last_addr - bios_hole_size > MAXMEM)) { - printf("Physical memory %d MB, "\ - "maximum usable memory limited to %d MB\n", - (last_addr - bios_hole_size)/M, MAXMEM/M); - maxmem = MAXMEM; - } + avail_start = first_avail; - if (maxmem != 0) { - if (maxmem < (last_addr) - bios_hole_size) - last_addr = maxmem + bios_hole_size; + if (maxmem) { /* if user set maxmem try to use it */ + uint64_t tmp = (uint64_t)maxmem; + /* can't set below first_avail or above actual memory */ + if ( (maxmem > first_avail) && (tmp < sane_size) ) { + sane_size = tmp; + avail_end = maxmem; + } } + // round up to a megabyte - mostly accounting for the + // low mem madness + sane_size += ( 0x100000ULL - 1); + sane_size &= ~0xFFFFFULL; + +#ifndef PAE + if (sane_size < FOURGIG) + mem_size = (unsigned long) sane_size; + else + mem_size = (unsigned long) (FOURGIG >> 1); +#else + mem_size = (unsigned long) sane_size; +#endif - first_addr = round_page(first_addr); - last_addr = trunc_page(last_addr); - mem_size = last_addr - bios_hole_size; - - max_mem = (uint64_t)mem_size; - sane_size = max_mem; + max_mem = sane_size; - avail_start = first_addr; - avail_end = last_addr; - avail_next = avail_start; + /* now make sane size sane */ +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MEG (1024*1024) + sane_size = MIN(sane_size, 256*MEG); -#if NCPUS > 1 - interrupt_stack_alloc(); -#endif /* NCPUS > 1 */ + kprintf("Physical memory %d MB\n", + mem_size/MEG); /* * Initialize kernel physical map. @@ -239,8 +374,7 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) */ pmap_bootstrap(0); - avail_remaining = atop((avail_end - avail_start) - - (hole_end - hole_start)); + } unsigned int @@ -253,24 +387,36 @@ boolean_t pmap_next_page( ppnum_t *pn) { - if (avail_next == avail_end) - return FALSE; - /* skip the hole */ + while (pmap_memory_region_current < pmap_memory_region_count) { + if (pmap_memory_regions[pmap_memory_region_current].alloc == + pmap_memory_regions[pmap_memory_region_current].end) { + pmap_memory_region_current++; + continue; + } + *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; + avail_remaining--; - if (avail_next == hole_start) - avail_next = hole_end; - - *pn = (ppnum_t)i386_btop(avail_next); - avail_next += PAGE_SIZE; - avail_remaining--; - - return TRUE; + return TRUE; + } + return FALSE; } boolean_t pmap_valid_page( - vm_offset_t x) + ppnum_t pn) { - return ((avail_start <= x) && (x < avail_end)); + unsigned int i; + pmap_memory_region_t *pmptr = pmap_memory_regions; + + assert(pn); + for (i=0; i= pmptr->base) && (pn <= pmptr->end) ) { + if (pmptr->type == kMemoryRangeUsable) + return TRUE; + else + return FALSE; + } + } + return FALSE; } diff --git a/osfmk/i386/io_emulate.c b/osfmk/i386/io_emulate.c index 0c955e130..f9ed656d5 100644 --- a/osfmk/i386/io_emulate.c +++ b/osfmk/i386/io_emulate.c @@ -51,7 +51,6 @@ */ #include -#include #include #include #include @@ -68,13 +67,13 @@ #include #include +#if 1 int emulate_io( - struct i386_saved_state *regs, - int opcode, - int io_port) + __unused struct i386_saved_state *regs, + __unused int opcode, + __unused int io_port) { -#if 1 /* At the moment, we are not allowing I/O emulation * * FIXME - this should probably change due to @@ -82,7 +81,14 @@ emulate_io( */ return EM_IO_ERROR; +} #else +int +emulate_io( + struct i386_saved_state *regs, + int opcode, + int io_port) +{ thread_t thread = current_thread(); at386_io_lock_state(); @@ -133,8 +139,8 @@ emulate_io( * Make the thread use its IO_TSS to get the IO permissions; * it may not have had one before this. */ - act_machine_switch_pcb(thread->top_act); + act_machine_switch_pcb(thread); return EM_IO_RETRY; -#endif } +#endif diff --git a/osfmk/i386/io_map.c b/osfmk/i386/io_map.c index e650c0884..2a9ae6ddf 100644 --- a/osfmk/i386/io_map.c +++ b/osfmk/i386/io_map.c @@ -75,12 +75,15 @@ io_map(phys_addr, size) */ start = virtual_avail; virtual_avail += round_page(size); + + (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size), + VM_PROT_READ|VM_PROT_WRITE); } else { (void) kmem_alloc_pageable(kernel_map, &start, round_page(size)); + (void) pmap_map(start, phys_addr, phys_addr + round_page(size), + VM_PROT_READ|VM_PROT_WRITE); } - (void) pmap_map_bd(start, phys_addr, phys_addr + round_page(size), - VM_PROT_READ|VM_PROT_WRITE); return (start); } diff --git a/osfmk/i386/io_map_entries.h b/osfmk/i386/io_map_entries.h index 2335e4db3..ebde6d42a 100644 --- a/osfmk/i386/io_map_entries.h +++ b/osfmk/i386/io_map_entries.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,17 +22,24 @@ /* * @OSF_COPYRIGHT@ */ +#ifdef KERNEL_PRIVATE #ifndef _I386_IO_MAP_ENTRIES #define _I386_IO_MAP_ENTRIES +#include #include #ifdef __APPLE_API_PRIVATE +__BEGIN_DECLS extern vm_offset_t io_map( vm_offset_t phys_addr, vm_size_t size); +extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size); +__END_DECLS #endif /* __APPLE_API_PRIVATE */ #endif /* _I386_IO_MAP_ENTRIES */ +#endif /* KERNEL_PRIVATE */ + diff --git a/osfmk/i386/iopb.c b/osfmk/i386/iopb.c index ec00e33f2..ecf1a8c68 100644 --- a/osfmk/i386/iopb.c +++ b/osfmk/i386/iopb.c @@ -81,7 +81,7 @@ iopb_init(void) void -iopb_destroy(iopb_tss_t io_tss) +iopb_destroy(__unused iopb_tss_t io_tss) { } @@ -145,7 +145,7 @@ void iopb_init(void) { queue_init(&device_to_io_port_list); - simple_lock_init(&iopb_lock, ETAP_IO_IOPB); + simple_lock_init(&iopb_lock, 0); } /* @@ -298,7 +298,7 @@ io_tss_init( io_bitmap_init(io_tss->bitmap); io_tss->barrier = ~0; queue_init(&io_tss->io_port_list); - addr += LINEAR_KERNEL_ADDRESS; + addr |= LINEAR_KERNEL_ADDRESS; io_tss->iopb_desc[0] = ((size-1) & 0xffff) | ((addr & 0xffff) << 16); io_tss->iopb_desc[1] = ((addr & 0x00ff0000) >> 16) @@ -368,7 +368,7 @@ i386_io_port_add( || device == DEVICE_NULL) return KERN_INVALID_ARGUMENT; - pcb = thread->top_act->mact.pcb; + pcb = thread->machine.pcb; new_io_tss = 0; iu = (io_use_t) kalloc(sizeof(struct io_use)); @@ -467,7 +467,7 @@ i386_io_port_remove( || device == DEVICE_NULL) return KERN_INVALID_ARGUMENT; - pcb = thread->top_act->mact.pcb; + pcb = thread->machine.pcb; simple_lock(&iopb_lock); @@ -539,7 +539,7 @@ i386_io_port_list(thread, list, list_count) if (thread == THREAD_NULL) return KERN_INVALID_ARGUMENT; - pcb = thread->top_act->mact.pcb; + pcb = thread->machine.pcb; alloc_count = 16; /* a guess */ @@ -633,7 +633,7 @@ iopb_check_mapping( io_port_t io_port; io_use_t iu; - pcb = thread->top_act->mact.pcb; + pcb = thread->machine.pcb; simple_lock(&iopb_lock); diff --git a/osfmk/i386/iopb_entries.h b/osfmk/i386/iopb_entries.h index 8b50a7eb2..c6b638d2f 100644 --- a/osfmk/i386/iopb_entries.h +++ b/osfmk/i386/iopb_entries.h @@ -37,8 +37,8 @@ extern kern_return_t i386_io_port_list( thread_t thread, device_t ** list, unsigned int * list_count); +#endif extern void iopb_init(void); extern iopb_tss_t iopb_create(void); extern void iopb_destroy( iopb_tss_t iopb); -#endif diff --git a/osfmk/i386/ipl.h b/osfmk/i386/ipl.h index db59db5d1..1eded56e4 100644 --- a/osfmk/i386/ipl.h +++ b/osfmk/i386/ipl.h @@ -104,13 +104,3 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define SPL_CMP_GE(a, b) ((unsigned)(a) >= (unsigned)(b)) #define SPL_CMP_LE(a, b) ((unsigned)(a) <= (unsigned)(b)) -#ifdef MACH_KERNEL -#ifndef ASSEMBLER - -#include - -extern i386_intr_t ivect[]; -extern int iunit[]; -extern unsigned char intpri[]; -#endif /* ASSEMBLER */ -#endif /* MACH_KERNEL */ diff --git a/osfmk/i386/ldt.c b/osfmk/i386/ldt.c index 461acfce2..6c5f9840a 100644 --- a/osfmk/i386/ldt.c +++ b/osfmk/i386/ldt.c @@ -55,6 +55,7 @@ * same LDT. */ #include +#include #include #include @@ -73,18 +74,31 @@ struct fake_descriptor ldt[LDTSZ] = { ACC_P|ACC_PL_U|ACC_CALL_GATE }, /* call gate for mach rpc */ /*017*/ { 0, - (VM_MAX_ADDRESS-VM_MIN_ADDRESS-1)>>12, - SZ_32|SZ_G, + 0xfffff, + SZ_32|SZ_G, ACC_P|ACC_PL_U|ACC_CODE_R }, /* user code segment */ /*01F*/ { 0, - (VM_MAX_ADDRESS-VM_MIN_ADDRESS-1)>>12, + 0xfffff, SZ_32|SZ_G, ACC_P|ACC_PL_U|ACC_DATA_W }, /* user data segment */ /*027*/ { 0, - (VM_MAX_ADDRESS-VM_MIN_ADDRESS-1)>>12, + 0xfffff, SZ_32|SZ_G, ACC_P|ACC_PL_U|ACC_DATA_W }, /* user cthread segment */ +// Storage space for user ldt entries we will make room for 10 entries initially +// as we will probably never need many more than that +/*02F*/ { 0, 0, 0, 0}, +/*037*/ { 0, 0, 0, 0}, +/*03F*/ { 0, 0, 0, 0}, +/*047*/ { 0, 0, 0, 0}, +/*04F*/ { 0, 0, 0, 0}, +/*057*/ { 0, 0, 0, 0}, +/*05F*/ { 0, 0, 0, 0}, +/*067*/ { 0, 0, 0, 0}, +/*06F*/ { 0, 0, 0, 0}, +/*077*/ { 0, 0, 0, 0}, + }; diff --git a/osfmk/i386/lock.h b/osfmk/i386/lock.h index d503a07ba..8f31c69bb 100644 --- a/osfmk/i386/lock.h +++ b/osfmk/i386/lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,6 +58,7 @@ /* * Machine-dependent simple locks for the i386. */ +#ifdef KERNEL_PRIVATE #ifndef _I386_LOCK_H_ #define _I386_LOCK_H_ @@ -71,10 +72,32 @@ #include #include #include +#include #include #include -#include + +typedef struct { + lck_mtx_t lck_mtx; /* inlined lck_mtx, need to be first */ +#if MACH_LDEBUG + int type; +#define MUTEX_TAG 0x4d4d + vm_offset_t pc; + vm_offset_t thread; +#endif /* MACH_LDEBUG */ +} mutex_t; + +typedef struct { + decl_simple_lock_data(,interlock) /* "hardware" interlock field */ + volatile unsigned int + read_count:16, /* No. of accepted readers */ + want_upgrade:1, /* Read-to-write upgrade waiting */ + want_write:1, /* Writer is waiting, or locked for write */ + waiting:1, /* Someone is sleeping on lock */ + can_sleep:1; /* Can attempts to lock go to sleep? */ +} lock_t; + +extern unsigned int LockTimeOut; /* Number of hardware ticks of a lock timeout */ #if defined(__GNUC__) @@ -118,34 +141,34 @@ : \ "r" (bit), "m" (*(volatile int *)(l))); -extern __inline__ unsigned long i_bit_isset(unsigned int testbit, volatile unsigned long *word) +static inline unsigned long i_bit_isset(unsigned int test, volatile unsigned long *word) { int bit; __asm__ volatile("btl %2,%1\n\tsbbl %0,%0" : "=r" (bit) - : "m" (word), "ir" (testbit)); + : "m" (word), "ir" (test)); return bit; } -extern __inline__ char xchgb(volatile char * cp, char new); +static inline char xchgb(volatile char * cp, char new); -extern __inline__ void atomic_incl(long * p, long delta); -extern __inline__ void atomic_incs(short * p, short delta); -extern __inline__ void atomic_incb(char * p, char delta); +static inline void atomic_incl(long * p, long delta); +static inline void atomic_incs(short * p, short delta); +static inline void atomic_incb(char * p, char delta); -extern __inline__ void atomic_decl(long * p, long delta); -extern __inline__ void atomic_decs(short * p, short delta); -extern __inline__ void atomic_decb(char * p, char delta); +static inline void atomic_decl(long * p, long delta); +static inline void atomic_decs(short * p, short delta); +static inline void atomic_decb(char * p, char delta); -extern __inline__ long atomic_getl(long * p); -extern __inline__ short atomic_gets(short * p); -extern __inline__ char atomic_getb(char * p); +static inline long atomic_getl(long * p); +static inline short atomic_gets(short * p); +static inline char atomic_getb(char * p); -extern __inline__ void atomic_setl(long * p, long value); -extern __inline__ void atomic_sets(short * p, short value); -extern __inline__ void atomic_setb(char * p, char value); +static inline void atomic_setl(long * p, long value); +static inline void atomic_sets(short * p, short value); +static inline void atomic_setb(char * p, char value); -extern __inline__ char xchgb(volatile char * cp, char new) +static inline char xchgb(volatile char * cp, char new) { register char old = new; @@ -155,104 +178,141 @@ extern __inline__ char xchgb(volatile char * cp, char new) return (old); } -extern __inline__ void atomic_incl(long * p, long delta) +/* + * Compare and exchange: + * - returns failure (0) if the location did not contain the old value, + * - returns success (1) if the location was set to the new value. + */ +static inline uint32_t +atomic_cmpxchg(uint32_t *p, uint32_t old, uint32_t new) +{ + uint32_t res = old; + + asm volatile( + "lock; cmpxchgl %1,%2; \n\t" + " setz %%al; \n\t" + " movzbl %%al,%0" + : "+a" (res) /* %0: old value to compare, returns success */ + : "r" (new), /* %1: new value to set */ + "m" (*(p)) /* %2: memory address */ + : "memory"); + return (res); +} + +static inline uint64_t +atomic_load64(uint64_t *quadp) +{ + uint64_t ret; + + asm volatile( + " lock; cmpxchg8b %1" + : "=A" (ret) + : "m" (*quadp), "a" (0), "d" (0), "b" (0), "c" (0)); + return (ret); +} + +static inline uint64_t +atomic_loadstore64(uint64_t *quadp, uint64_t new) +{ + uint64_t ret; + + ret = *quadp; + asm volatile( + "1: \n\t" + " lock; cmpxchg8b %1 \n\t" + " jnz 1b" + : "+A" (ret) + : "m" (*quadp), + "b" ((uint32_t)new), "c" ((uint32_t)(new >> 32))); + return (ret); +} + +static inline void atomic_incl(long * p, long delta) { -#if NEED_ATOMIC __asm__ volatile (" lock \n \ addl %0,%1" : \ : \ "r" (delta), "m" (*(volatile long *)p)); -#else /* NEED_ATOMIC */ - *p += delta; -#endif /* NEED_ATOMIC */ } -extern __inline__ void atomic_incs(short * p, short delta) +static inline void atomic_incs(short * p, short delta) { -#if NEED_ATOMIC __asm__ volatile (" lock \n \ addw %0,%1" : \ : \ "q" (delta), "m" (*(volatile short *)p)); -#else /* NEED_ATOMIC */ - *p += delta; -#endif /* NEED_ATOMIC */ } -extern __inline__ void atomic_incb(char * p, char delta) +static inline void atomic_incb(char * p, char delta) { -#if NEED_ATOMIC __asm__ volatile (" lock \n \ addb %0,%1" : \ : \ "q" (delta), "m" (*(volatile char *)p)); -#else /* NEED_ATOMIC */ - *p += delta; -#endif /* NEED_ATOMIC */ } -extern __inline__ void atomic_decl(long * p, long delta) +static inline void atomic_decl(long * p, long delta) { -#if NCPUS > 1 __asm__ volatile (" lock \n \ subl %0,%1" : \ : \ "r" (delta), "m" (*(volatile long *)p)); -#else /* NCPUS > 1 */ - *p -= delta; -#endif /* NCPUS > 1 */ } -extern __inline__ void atomic_decs(short * p, short delta) +static inline int atomic_decl_and_test(long * p, long delta) +{ + uint8_t ret; + asm volatile ( + " lock \n\t" + " subl %1,%2 \n\t" + " sete %0" + : "=qm" (ret) + : "r" (delta), "m" (*(volatile long *)p)); + return ret; +} + +static inline void atomic_decs(short * p, short delta) { -#if NEED_ATOMIC __asm__ volatile (" lock \n \ subw %0,%1" : \ : \ "q" (delta), "m" (*(volatile short *)p)); -#else /* NEED_ATOMIC */ - *p -= delta; -#endif /* NEED_ATOMIC */ } -extern __inline__ void atomic_decb(char * p, char delta) +static inline void atomic_decb(char * p, char delta) { -#if NEED_ATOMIC __asm__ volatile (" lock \n \ subb %0,%1" : \ : \ "q" (delta), "m" (*(volatile char *)p)); -#else /* NEED_ATOMIC */ - *p -= delta; -#endif /* NEED_ATOMIC */ } -extern __inline__ long atomic_getl(long * p) +static inline long atomic_getl(long * p) { return (*p); } -extern __inline__ short atomic_gets(short * p) +static inline short atomic_gets(short * p) { return (*p); } -extern __inline__ char atomic_getb(char * p) +static inline char atomic_getb(char * p) { return (*p); } -extern __inline__ void atomic_setl(long * p, long value) +static inline void atomic_setl(long * p, long value) { *p = value; } -extern __inline__ void atomic_sets(short * p, short value) +static inline void atomic_sets(short * p, short value) { *p = value; } -extern __inline__ void atomic_setb(char * p, char value) +static inline void atomic_setb(char * p, char value) { *p = value; } @@ -290,3 +350,5 @@ extern void kernel_preempt_check (void); #endif /* __APLE_API_PRIVATE */ #endif /* _I386_LOCK_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h new file mode 100644 index 000000000..40e3340e3 --- /dev/null +++ b/osfmk/i386/locks.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _I386_LOCKS_H_ +#define _I386_LOCKS_H_ + +#include +#include + +#ifdef MACH_KERNEL_PRIVATE + +#include + +extern unsigned int LcksOpts; + +#define enaLkDeb 0x00000001 /* Request debug in default attribute */ +#define enaLkStat 0x00000002 /* Request statistic in default attribute */ + +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct { + unsigned int lck_spin_data[10]; /* XXX - usimple_lock_data_t */ +} lck_spin_t; + +#define LCK_SPIN_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[10]; +} lck_spin_t; +#else +typedef struct __lck_spin_t__ lck_spin_t; +#endif +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct _lck_mtx_ { + union { + struct { + unsigned int lck_mtxd_ilk; + unsigned int lck_mtxd_locked; + unsigned short lck_mtxd_waiters; + unsigned short lck_mtxd_pri; + } lck_mtxd; + struct { + unsigned int lck_mtxi_tag; + struct _lck_mtx_ext_ *lck_mtxi_ptr; + unsigned int lck_mtxi_pad8; + } lck_mtxi; + } lck_mtx_sw; +} lck_mtx_t; + +#define lck_mtx_ilk lck_mtx_sw.lck_mtxd.lck_mtxd_ilk +#define lck_mtx_locked lck_mtx_sw.lck_mtxd.lck_mtxd_locked +#define lck_mtx_waiters lck_mtx_sw.lck_mtxd.lck_mtxd_waiters +#define lck_mtx_pri lck_mtx_sw.lck_mtxd.lck_mtxd_pri + +#define lck_mtx_tag lck_mtx_sw.lck_mtxi.lck_mtxi_tag +#define lck_mtx_ptr lck_mtx_sw.lck_mtxi.lck_mtxi_ptr + +#define LCK_MTX_TAG_INDIRECT 0x00001007 /* lock marked as Indirect */ +#define LCK_MTX_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ + +typedef struct { + unsigned int type; + vm_offset_t pc; + vm_offset_t thread; +} lck_mtx_deb_t; + +#define MUTEX_TAG 0x4d4d + +typedef struct { + unsigned int lck_mtx_stat_data; +} lck_mtx_stat_t; + +typedef struct _lck_mtx_ext_ { + lck_mtx_t lck_mtx; + struct _lck_grp_ *lck_mtx_grp; + unsigned int lck_mtx_attr; + lck_mtx_deb_t lck_mtx_deb; + lck_mtx_stat_t lck_mtx_stat; +} lck_mtx_ext_t; + +#define LCK_MTX_ATTR_DEBUG 0x1 +#define LCK_MTX_ATTR_DEBUGb 31 +#define LCK_MTX_ATTR_STAT 0x2 +#define LCK_MTX_ATTR_STATb 30 + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[3]; +} lck_mtx_t; +#else +typedef struct __lck_mtx_t__ lck_mtx_t; +#endif +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct { + hw_lock_data_t interlock; + volatile unsigned int + read_count:16, /* No. of accepted readers */ + want_upgrade:1, /* Read-to-write upgrade waiting */ + want_write:1, /* Writer is waiting, or locked for write */ + waiting:1, /* Someone is sleeping on lock */ + can_sleep:1; /* Can attempts to lock go to sleep? */ + unsigned int lck_rw_tag; +} lck_rw_t; + +#define LCK_RW_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[3]; +} lck_rw_t; +#else +typedef struct __lck_rw_t__ lck_rw_t; +#endif +#endif + +#endif /* _I386_LOCKS_H_ */ diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c new file mode 100644 index 000000000..3e9b62fd7 --- /dev/null +++ b/osfmk/i386/locks_i386.c @@ -0,0 +1,1870 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives implementation + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if MACH_KDB +#include +#include +#include +#include +#endif /* MACH_KDB */ + +#ifdef __ppc__ +#include +#endif + +#include + +#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100 +#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101 +#define LCK_RW_LCK_SHARED_CODE 0x102 +#define LCK_RW_LCK_SH_TO_EX_CODE 0x103 +#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 +#define LCK_RW_LCK_EX_TO_SH_CODE 0x105 + + +#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) + +unsigned int LcksOpts=0; +unsigned int lock_wait_time[2] = { (unsigned int)-1, 100 } ; + +/* Forwards */ + +#if MACH_KDB +void db_print_simple_lock( + simple_lock_t addr); + +void db_print_mutex( + mutex_t * addr); +#endif /* MACH_KDB */ + + +#if USLOCK_DEBUG +/* + * Perform simple lock checks. + */ +int uslock_check = 1; +int max_lock_loops = 100000000; +decl_simple_lock_data(extern , printf_lock) +decl_simple_lock_data(extern , panic_lock) +#if MACH_KDB +decl_simple_lock_data(extern , kdb_lock) +#endif /* MACH_KDB */ +#endif /* USLOCK_DEBUG */ + + +/* + * We often want to know the addresses of the callers + * of the various lock routines. However, this information + * is only used for debugging and statistics. + */ +typedef void *pc_t; +#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) +#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) +#if ANY_LOCK_DEBUG +#define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l))) +#define DECL_PC(pc) pc_t pc; +#else /* ANY_LOCK_DEBUG */ +#define DECL_PC(pc) +#ifdef lint +/* + * Eliminate lint complaints about unused local pc variables. + */ +#define OBTAIN_PC(pc,l) ++pc +#else /* lint */ +#define OBTAIN_PC(pc,l) +#endif /* lint */ +#endif /* USLOCK_DEBUG */ + + +/* + * Portable lock package implementation of usimple_locks. + */ + +#if USLOCK_DEBUG +#define USLDBG(stmt) stmt +void usld_lock_init(usimple_lock_t, unsigned short); +void usld_lock_pre(usimple_lock_t, pc_t); +void usld_lock_post(usimple_lock_t, pc_t); +void usld_unlock(usimple_lock_t, pc_t); +void usld_lock_try_pre(usimple_lock_t, pc_t); +void usld_lock_try_post(usimple_lock_t, pc_t); +int usld_lock_common_checks(usimple_lock_t, char *); +#else /* USLOCK_DEBUG */ +#define USLDBG(stmt) +#endif /* USLOCK_DEBUG */ + +/* + * Routine: lck_spin_alloc_init + */ +lck_spin_t * +lck_spin_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) +{ + lck_spin_t *lck; + + if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) + lck_spin_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_spin_free + */ +void +lck_spin_free( + lck_spin_t *lck, + lck_grp_t *grp) +{ + lck_spin_destroy(lck, grp); + kfree(lck, sizeof(lck_spin_t)); +} + +/* + * Routine: lck_spin_init + */ +void +lck_spin_init( + lck_spin_t *lck, + lck_grp_t *grp, + __unused lck_attr_t *attr) +{ + usimple_lock_init((usimple_lock_t) lck, 0); + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN); +} + +/* + * Routine: lck_spin_destroy + */ +void +lck_spin_destroy( + lck_spin_t *lck, + lck_grp_t *grp) +{ + if (lck->lck_spin_data[0] == LCK_SPIN_TAG_DESTROYED) + return; + lck->lck_spin_data[0] = LCK_SPIN_TAG_DESTROYED; + lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); + lck_grp_deallocate(grp); + return; +} + +/* + * Routine: lck_spin_lock + */ +void +lck_spin_lock( + lck_spin_t *lck) +{ + usimple_lock((usimple_lock_t) lck); +} + +/* + * Routine: lck_spin_unlock + */ +void +lck_spin_unlock( + lck_spin_t *lck) +{ + usimple_unlock((usimple_lock_t) lck); +} + + +/* + * Routine: lck_spin_try_lock + */ +boolean_t +lck_spin_try_lock( + lck_spin_t *lck) +{ + usimple_lock_try((usimple_lock_t) lck); +} + +/* + * Initialize a usimple_lock. + * + * No change in preemption state. + */ +void +usimple_lock_init( + usimple_lock_t l, + __unused unsigned short tag) +{ +#ifndef MACHINE_SIMPLE_LOCK + USLDBG(usld_lock_init(l, tag)); + hw_lock_init(&l->interlock); +#else + simple_lock_init((simple_lock_t)l,tag); +#endif +} + + +/* + * Acquire a usimple_lock. + * + * Returns with preemption disabled. Note + * that the hw_lock routines are responsible for + * maintaining preemption state. + */ +void +usimple_lock( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + pc_t pc = NULL; + + OBTAIN_PC(pc, l); + USLDBG(usld_lock_pre(l, pc)); + + if(!hw_lock_to(&l->interlock, LockTimeOut)) /* Try to get the lock with a timeout */ + panic("simple lock deadlock detection - l=%08X, cpu=%d, ret=%08X", l, cpu_number(), pc); + + USLDBG(usld_lock_post(l, pc)); +#else + simple_lock((simple_lock_t)l); +#endif +} + + +/* + * Release a usimple_lock. + * + * Returns with preemption enabled. Note + * that the hw_lock routines are responsible for + * maintaining preemption state. + */ +void +usimple_unlock( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + DECL_PC(pc); + + OBTAIN_PC(pc, l); + USLDBG(usld_unlock(l, pc)); + hw_lock_unlock(&l->interlock); +#else + simple_unlock_rwmb((simple_lock_t)l); +#endif +} + + +/* + * Conditionally acquire a usimple_lock. + * + * On success, returns with preemption disabled. + * On failure, returns with preemption in the same state + * as when first invoked. Note that the hw_lock routines + * are responsible for maintaining preemption state. + * + * XXX No stats are gathered on a miss; I preserved this + * behavior from the original assembly-language code, but + * doesn't it make sense to log misses? XXX + */ +unsigned int +usimple_lock_try( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + DECL_PC(pc); + unsigned int success; + + OBTAIN_PC(pc, l); + USLDBG(usld_lock_try_pre(l, pc)); + if ((success = hw_lock_try(&l->interlock))) { + USLDBG(usld_lock_try_post(l, pc)); + } + return success; +#else + return(simple_lock_try((simple_lock_t)l)); +#endif +} + +#if USLOCK_DEBUG +/* + * States of a usimple_lock. The default when initializing + * a usimple_lock is setting it up for debug checking. + */ +#define USLOCK_CHECKED 0x0001 /* lock is being checked */ +#define USLOCK_TAKEN 0x0002 /* lock has been taken */ +#define USLOCK_INIT 0xBAA0 /* lock has been initialized */ +#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) +#define USLOCK_CHECKING(l) (uslock_check && \ + ((l)->debug.state & USLOCK_CHECKED)) + +/* + * Trace activities of a particularly interesting lock. + */ +void usl_trace(usimple_lock_t, int, pc_t, const char *); + + +/* + * Initialize the debugging information contained + * in a usimple_lock. + */ +void +usld_lock_init( + usimple_lock_t l, + __unused unsigned short tag) +{ + if (l == USIMPLE_LOCK_NULL) + panic("lock initialization: null lock pointer"); + l->lock_type = USLOCK_TAG; + l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; + l->debug.lock_cpu = l->debug.unlock_cpu = 0; + l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; + l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; + l->debug.duration[0] = l->debug.duration[1] = 0; + l->debug.unlock_cpu = l->debug.unlock_cpu = 0; + l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; + l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; +} + + +/* + * These checks apply to all usimple_locks, not just + * those with USLOCK_CHECKED turned on. + */ +int +usld_lock_common_checks( + usimple_lock_t l, + char *caller) +{ + if (l == USIMPLE_LOCK_NULL) + panic("%s: null lock pointer", caller); + if (l->lock_type != USLOCK_TAG) + panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l); + if (!(l->debug.state & USLOCK_INIT)) + panic("%s: 0x%x is not an initialized lock", + caller, (integer_t) l); + return USLOCK_CHECKING(l); +} + + +/* + * Debug checks on a usimple_lock just before attempting + * to acquire it. + */ +/* ARGSUSED */ +void +usld_lock_pre( + usimple_lock_t l, + pc_t pc) +{ + char caller[] = "usimple_lock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + +/* + * Note that we have a weird case where we are getting a lock when we are] + * in the process of putting the system to sleep. We are running with no + * current threads, therefore we can't tell if we are trying to retake a lock + * we have or someone on the other processor has it. Therefore we just + * ignore this test if the locking thread is 0. + */ + + if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && + l->debug.lock_thread == (void *) current_thread()) { + printf("%s: lock 0x%x already locked (at 0x%x) by", + caller, (integer_t) l, l->debug.lock_pc); + printf(" current thread 0x%x (new attempt at pc 0x%x)\n", + l->debug.lock_thread, pc); + panic(caller); + } + mp_disable_preemption(); + usl_trace(l, cpu_number(), pc, caller); + mp_enable_preemption(); +} + + +/* + * Debug checks on a usimple_lock just after acquiring it. + * + * Pre-emption has been disabled at this point, + * so we are safe in using cpu_number. + */ +void +usld_lock_post( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char caller[] = "successful usimple_lock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + + if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) + panic("%s: lock 0x%x became uninitialized", + caller, (integer_t) l); + if ((l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x became TAKEN by someone else", + caller, (integer_t) l); + + mycpu = cpu_number(); + l->debug.lock_thread = (void *)current_thread(); + l->debug.state |= USLOCK_TAKEN; + l->debug.lock_pc = pc; + l->debug.lock_cpu = mycpu; + + usl_trace(l, mycpu, pc, caller); +} + + +/* + * Debug checks on a usimple_lock just before + * releasing it. Note that the caller has not + * yet released the hardware lock. + * + * Preemption is still disabled, so there's + * no problem using cpu_number. + */ +void +usld_unlock( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char caller[] = "usimple_unlock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + + mycpu = cpu_number(); + + if (!(l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x hasn't been taken", + caller, (integer_t) l); + if (l->debug.lock_thread != (void *) current_thread()) + panic("%s: unlocking lock 0x%x, owned by thread 0x%x", + caller, (integer_t) l, l->debug.lock_thread); + if (l->debug.lock_cpu != mycpu) { + printf("%s: unlocking lock 0x%x on cpu 0x%x", + caller, (integer_t) l, mycpu); + printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); + panic(caller); + } + usl_trace(l, mycpu, pc, caller); + + l->debug.unlock_thread = l->debug.lock_thread; + l->debug.lock_thread = INVALID_PC; + l->debug.state &= ~USLOCK_TAKEN; + l->debug.unlock_pc = pc; + l->debug.unlock_cpu = mycpu; +} + + +/* + * Debug checks on a usimple_lock just before + * attempting to acquire it. + * + * Preemption isn't guaranteed to be disabled. + */ +void +usld_lock_try_pre( + usimple_lock_t l, + pc_t pc) +{ + char caller[] = "usimple_lock_try"; + + if (!usld_lock_common_checks(l, caller)) + return; + mp_disable_preemption(); + usl_trace(l, cpu_number(), pc, caller); + mp_enable_preemption(); +} + + +/* + * Debug checks on a usimple_lock just after + * successfully attempting to acquire it. + * + * Preemption has been disabled by the + * lock acquisition attempt, so it's safe + * to use cpu_number. + */ +void +usld_lock_try_post( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char caller[] = "successful usimple_lock_try"; + + if (!usld_lock_common_checks(l, caller)) + return; + + if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) + panic("%s: lock 0x%x became uninitialized", + caller, (integer_t) l); + if ((l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x became TAKEN by someone else", + caller, (integer_t) l); + + mycpu = cpu_number(); + l->debug.lock_thread = (void *) current_thread(); + l->debug.state |= USLOCK_TAKEN; + l->debug.lock_pc = pc; + l->debug.lock_cpu = mycpu; + + usl_trace(l, mycpu, pc, caller); +} + + +/* + * For very special cases, set traced_lock to point to a + * specific lock of interest. The result is a series of + * XPRs showing lock operations on that lock. The lock_seq + * value is used to show the order of those operations. + */ +usimple_lock_t traced_lock; +unsigned int lock_seq; + +void +usl_trace( + usimple_lock_t l, + int mycpu, + pc_t pc, + const char * op_name) +{ + if (traced_lock == l) { + XPR(XPR_SLOCK, + "seq %d, cpu %d, %s @ %x\n", + (integer_t) lock_seq, (integer_t) mycpu, + (integer_t) op_name, (integer_t) pc, 0); + lock_seq++; + } +} + + +#endif /* USLOCK_DEBUG */ + +/* + * Routine: lock_alloc + * Function: + * Allocate a lock for external users who cannot + * hard-code the structure definition into their + * objects. + * For now just use kalloc, but a zone is probably + * warranted. + */ +lock_t * +lock_alloc( + boolean_t can_sleep, + unsigned short tag, + unsigned short tag1) +{ + lock_t *l; + + if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0) + lock_init(l, can_sleep, tag, tag1); + return(l); +} + +/* + * Routine: lock_free + * Function: + * Free a lock allocated for external users. + * For now just use kfree, but a zone is probably + * warranted. + */ +void +lock_free( + lock_t *l) +{ + kfree(l, sizeof(lock_t)); +} + + +/* + * Routine: lock_init + * Function: + * Initialize a lock; required before use. + * Note that clients declare the "struct lock" + * variables and then initialize them, rather + * than getting a new one from this module. + */ +void +lock_init( + lock_t *l, + boolean_t can_sleep, + __unused unsigned short tag, + unsigned short tag1) +{ + (void) memset((void *) l, 0, sizeof(lock_t)); + + simple_lock_init(&l->interlock, tag1); + l->want_write = FALSE; + l->want_upgrade = FALSE; + l->read_count = 0; + l->can_sleep = can_sleep; +} + + +/* + * Sleep locks. These use the same data structure and algorithm + * as the spin locks, but the process sleeps while it is waiting + * for the lock. These work on uniprocessor systems. + */ + +#define DECREMENTER_TIMEOUT 1000000 + +void +lock_write( + register lock_t * l) +{ + register int i; + boolean_t lock_miss = FALSE; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + + simple_lock(&l->interlock); + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + + /* + * Try to acquire the want_write bit. + */ + while (l->want_write) { + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[l->can_sleep ? 1 : 0]; + if (i != 0) { + simple_unlock(&l->interlock); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - want_write"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && l->want_write) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->want_write) { + l->waiting = TRUE; + thread_sleep_simple_lock((event_t) l, + simple_lock_addr(l->interlock), + THREAD_UNINT); + /* interlock relocked */ + } + } + l->want_write = TRUE; + + /* Wait for readers (and upgrades) to finish */ + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while ((l->read_count != 0) || l->want_upgrade) { + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[l->can_sleep ? 1 : 0]; + if (i != 0) { + simple_unlock(&l->interlock); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - wait for readers"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && (l->read_count != 0 || + l->want_upgrade)) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep_simple_lock((event_t) l, + simple_lock_addr(l->interlock), + THREAD_UNINT); + /* interlock relocked */ + } + } + + simple_unlock(&l->interlock); +} + +void +lock_done( + register lock_t * l) +{ + boolean_t do_wakeup = FALSE; + + + simple_lock(&l->interlock); + + if (l->read_count != 0) { + l->read_count--; + } + else + if (l->want_upgrade) { + l->want_upgrade = FALSE; + } + else { + l->want_write = FALSE; + } + + /* + * There is no reason to wakeup a waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + do_wakeup = TRUE; + } + + simple_unlock(&l->interlock); + + if (do_wakeup) + thread_wakeup((event_t) l); +} + +void +lock_read( + register lock_t * l) +{ + register int i; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + + simple_lock(&l->interlock); + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while (l->want_write || l->want_upgrade) { + i = lock_wait_time[l->can_sleep ? 1 : 0]; + + if (i != 0) { + simple_unlock(&l->interlock); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - wait no writers"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && (l->want_write || l->want_upgrade)) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->want_write || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep_simple_lock((event_t) l, + simple_lock_addr(l->interlock), + THREAD_UNINT); + /* interlock relocked */ + } + } + + l->read_count++; + + simple_unlock(&l->interlock); +} + + +/* + * Routine: lock_read_to_write + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ + +boolean_t +lock_read_to_write( + register lock_t * l) +{ + register int i; + boolean_t do_wakeup = FALSE; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + + simple_lock(&l->interlock); + + l->read_count--; + + if (l->want_upgrade) { + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + do_wakeup = TRUE; + } + + simple_unlock(&l->interlock); + + if (do_wakeup) + thread_wakeup((event_t) l); + return (TRUE); + } + + l->want_upgrade = TRUE; + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while (l->read_count != 0) { + i = lock_wait_time[l->can_sleep ? 1 : 0]; + + if (i != 0) { + simple_unlock(&l->interlock); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - read_count"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && l->read_count != 0) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->read_count != 0) { + l->waiting = TRUE; + thread_sleep_simple_lock((event_t) l, + simple_lock_addr(l->interlock), + THREAD_UNINT); + /* interlock relocked */ + } + } + + simple_unlock(&l->interlock); + + return (FALSE); +} + +void +lock_write_to_read( + register lock_t * l) +{ + boolean_t do_wakeup = FALSE; + + simple_lock(&l->interlock); + + l->read_count++; + if (l->want_upgrade) + l->want_upgrade = FALSE; + else + l->want_write = FALSE; + + if (l->waiting) { + l->waiting = FALSE; + do_wakeup = TRUE; + } + + simple_unlock(&l->interlock); + + if (do_wakeup) + thread_wakeup((event_t) l); +} + + +#if 0 /* Unused */ +/* + * Routine: lock_try_write + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lock_try_write( + register lock_t * l) +{ + pc_t pc; + + simple_lock(&l->interlock); + + if (l->want_write || l->want_upgrade || l->read_count) { + /* + * Can't get lock. + */ + simple_unlock(&l->interlock); + return(FALSE); + } + + /* + * Have lock. + */ + + l->want_write = TRUE; + + simple_unlock(&l->interlock); + + return(TRUE); +} + +/* + * Routine: lock_try_read + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lock_try_read( + register lock_t * l) +{ + pc_t pc; + + simple_lock(&l->interlock); + + if (l->want_write || l->want_upgrade) { + simple_unlock(&l->interlock); + return(FALSE); + } + + l->read_count++; + + simple_unlock(&l->interlock); + + return(TRUE); +} +#endif /* Unused */ + + +/* + * Routine: lck_rw_alloc_init + */ +lck_rw_t * +lck_rw_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) { + lck_rw_t *lck; + + if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) + lck_rw_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_rw_free + */ +void +lck_rw_free( + lck_rw_t *lck, + lck_grp_t *grp) { + lck_rw_destroy(lck, grp); + kfree(lck, sizeof(lck_rw_t)); +} + +/* + * Routine: lck_rw_init + */ +void +lck_rw_init( + lck_rw_t *lck, + lck_grp_t *grp, + __unused lck_attr_t *attr) { + + hw_lock_init(&lck->interlock); + lck->want_write = FALSE; + lck->want_upgrade = FALSE; + lck->read_count = 0; + lck->can_sleep = TRUE; + lck->lck_rw_tag = 0; + + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); +} + +/* + * Routine: lck_rw_destroy + */ +void +lck_rw_destroy( + lck_rw_t *lck, + lck_grp_t *grp) { + if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) + return; + lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; + lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); + lck_grp_deallocate(grp); + return; +} + +/* + * Sleep locks. These use the same data structure and algorithm + * as the spin locks, but the process sleeps while it is waiting + * for the lock. These work on uniprocessor systems. + */ + +#define DECREMENTER_TIMEOUT 1000000 + + +/* + * We need to disable interrupts while holding the mutex interlock + * to prevent an IPI intervening. + * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock(). + */ +static boolean_t +lck_interlock_lock(lck_rw_t *lck) +{ + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + hw_lock_lock(&lck->interlock); + + return istate; +} + +static void +lck_interlock_unlock(lck_rw_t *lck, boolean_t istate) +{ + hw_lock_unlock(&lck->interlock); + ml_set_interrupts_enabled(istate); +} + +/* + * Routine: lck_rw_lock_exclusive + */ +void +lck_rw_lock_exclusive( + lck_rw_t *lck) +{ + int i; + boolean_t lock_miss = FALSE; + wait_result_t res; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + boolean_t istate; + + istate = lck_interlock_lock(lck); + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + + /* + * Try to acquire the want_write bit. + */ + while (lck->want_write) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); + + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[lck->can_sleep ? 1 : 0]; + if (i != 0) { + lck_interlock_unlock(lck, istate); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - want_write"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && lck->want_write) + continue; + istate = lck_interlock_lock(lck); + } + + if (lck->can_sleep && lck->want_write) { + lck->waiting = TRUE; + res = assert_wait((event_t) lck, THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_interlock_unlock(lck, istate); + res = thread_block(THREAD_CONTINUE_NULL); + istate = lck_interlock_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0); + } + lck->want_write = TRUE; + + /* Wait for readers (and upgrades) to finish */ + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while ((lck->read_count != 0) || lck->want_upgrade) { + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[lck->can_sleep ? 1 : 0]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START, + (int)lck, lck->read_count, lck->want_upgrade, i, 0); + + if (i != 0) { + lck_interlock_unlock(lck, istate); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - wait for readers"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && (lck->read_count != 0 || + lck->want_upgrade)) + continue; + istate = lck_interlock_lock(lck); + } + + if (lck->can_sleep && (lck->read_count != 0 || lck->want_upgrade)) { + lck->waiting = TRUE; + res = assert_wait((event_t) lck, THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_interlock_unlock(lck, istate); + res = thread_block(THREAD_CONTINUE_NULL); + istate = lck_interlock_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END, + (int)lck, lck->read_count, lck->want_upgrade, res, 0); + } + + lck_interlock_unlock(lck, istate); +} + + +/* + * Routine: lck_rw_done + */ +lck_rw_type_t +lck_rw_done( + lck_rw_t *lck) +{ + boolean_t do_wakeup = FALSE; + lck_rw_type_t lck_rw_type; + boolean_t istate; + + + istate = lck_interlock_lock(lck); + + if (lck->read_count != 0) { + lck_rw_type = LCK_RW_TYPE_SHARED; + lck->read_count--; + } + else { + lck_rw_type = LCK_RW_TYPE_EXCLUSIVE; + if (lck->want_upgrade) + lck->want_upgrade = FALSE; + else + lck->want_write = FALSE; + } + + /* + * There is no reason to wakeup a waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (lck->waiting && (lck->read_count == 0)) { + lck->waiting = FALSE; + do_wakeup = TRUE; + } + + lck_interlock_unlock(lck, istate); + + if (do_wakeup) + thread_wakeup((event_t) lck); + return(lck_rw_type); +} + + + + +/* + * Routine: lck_rw_unlock + */ +void +lck_rw_unlock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + lck_rw_unlock_shared(lck); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + lck_rw_unlock_exclusive(lck); + else + panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type); +} + + +/* + * Routine: lck_rw_unlock_shared + */ +void +lck_rw_unlock_shared( + lck_rw_t *lck) +{ + lck_rw_type_t ret; + + ret = lck_rw_done(lck); + + if (ret != LCK_RW_TYPE_SHARED) + panic("lck_rw_unlock(): lock held in mode: %d\n", ret); +} + + +/* + * Routine: lck_rw_unlock_exclusive + */ +void +lck_rw_unlock_exclusive( + lck_rw_t *lck) +{ + lck_rw_type_t ret; + + ret = lck_rw_done(lck); + + if (ret != LCK_RW_TYPE_EXCLUSIVE) + panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret); +} + + +/* + * Routine: lck_rw_lock + */ +void +lck_rw_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + lck_rw_lock_shared(lck); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + lck_rw_lock_exclusive(lck); + else + panic("lck_rw_lock(): Invalid RW lock type: %x\n", lck_rw_type); +} + + +/* + * Routine: lck_rw_lock_shared + */ +void +lck_rw_lock_shared( + lck_rw_t *lck) +{ + int i; + wait_result_t res; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + boolean_t istate; + + istate = lck_interlock_lock(lck); + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while (lck->want_write || lck->want_upgrade) { + i = lock_wait_time[lck->can_sleep ? 1 : 0]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, + (int)lck, lck->want_write, lck->want_upgrade, i, 0); + + if (i != 0) { + lck_interlock_unlock(lck, istate); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - wait no writers"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && (lck->want_write || lck->want_upgrade)) + continue; + istate = lck_interlock_lock(lck); + } + + if (lck->can_sleep && (lck->want_write || lck->want_upgrade)) { + lck->waiting = TRUE; + res = assert_wait((event_t) lck, THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_interlock_unlock(lck, istate); + res = thread_block(THREAD_CONTINUE_NULL); + istate = lck_interlock_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END, + (int)lck, lck->want_write, lck->want_upgrade, res, 0); + } + + lck->read_count++; + + lck_interlock_unlock(lck, istate); +} + + +/* + * Routine: lck_rw_lock_shared_to_exclusive + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ + +boolean_t +lck_rw_lock_shared_to_exclusive( + lck_rw_t *lck) +{ + int i; + boolean_t do_wakeup = FALSE; + wait_result_t res; +#if MACH_LDEBUG + int decrementer; +#endif /* MACH_LDEBUG */ + boolean_t istate; + + istate = lck_interlock_lock(lck); + + lck->read_count--; + + if (lck->want_upgrade) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START, + (int)lck, lck->read_count, lck->want_upgrade, 0, 0); + + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (lck->waiting && (lck->read_count == 0)) { + lck->waiting = FALSE; + do_wakeup = TRUE; + } + + lck_interlock_unlock(lck, istate); + + if (do_wakeup) + thread_wakeup((event_t) lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END, + (int)lck, lck->read_count, lck->want_upgrade, 0, 0); + + return (TRUE); + } + + lck->want_upgrade = TRUE; + +#if MACH_LDEBUG + decrementer = DECREMENTER_TIMEOUT; +#endif /* MACH_LDEBUG */ + while (lck->read_count != 0) { + i = lock_wait_time[lck->can_sleep ? 1 : 0]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START, + (int)lck, lck->read_count, i, 0, 0); + + if (i != 0) { + lck_interlock_unlock(lck, istate); +#if MACH_LDEBUG + if (!--decrementer) + Debugger("timeout - read_count"); +#endif /* MACH_LDEBUG */ + while (--i != 0 && lck->read_count != 0) + continue; + istate = lck_interlock_lock(lck); + } + + if (lck->can_sleep && lck->read_count != 0) { + lck->waiting = TRUE; + res = assert_wait((event_t) lck, THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_interlock_unlock(lck, istate); + res = thread_block(THREAD_CONTINUE_NULL); + istate = lck_interlock_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END, + (int)lck, lck->read_count, 0, 0, 0); + } + + lck_interlock_unlock(lck, istate); + + return (FALSE); +} + +/* + * Routine: lck_rw_lock_exclusive_to_shared + */ +void +lck_rw_lock_exclusive_to_shared( + lck_rw_t *lck) +{ + boolean_t do_wakeup = FALSE; + boolean_t istate; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, + (int)lck, lck->want_write, lck->want_upgrade, 0, 0); + + istate = lck_interlock_lock(lck); + + lck->read_count++; + if (lck->want_upgrade) + lck->want_upgrade = FALSE; + else + lck->want_write = FALSE; + + if (lck->waiting) { + lck->waiting = FALSE; + do_wakeup = TRUE; + } + + lck_interlock_unlock(lck, istate); + + if (do_wakeup) + thread_wakeup((event_t) lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, + (int)lck, lck->want_write, lck->want_upgrade, lck->read_count, 0); + +} + + +/* + * Routine: lck_rw_try_lock + */ +boolean_t +lck_rw_try_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + return(lck_rw_try_lock_shared(lck)); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + return(lck_rw_try_lock_exclusive(lck)); + else + panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type); + return(FALSE); +} + +/* + * Routine: lck_rw_try_lock_exclusive + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_exclusive( + lck_rw_t *lck) +{ + boolean_t istate; + + istate = lck_interlock_lock(lck); + + if (lck->want_write || lck->want_upgrade || lck->read_count) { + /* + * Can't get lock. + */ + lck_interlock_unlock(lck, istate); + return(FALSE); + } + + /* + * Have lock. + */ + + lck->want_write = TRUE; + + lck_interlock_unlock(lck, istate); + + return(TRUE); +} + +/* + * Routine: lck_rw_try_lock_shared + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_shared( + lck_rw_t *lck) +{ + boolean_t istate; + + istate = lck_interlock_lock(lck); + + if (lck->want_write || lck->want_upgrade) { + lck_interlock_unlock(lck, istate); + return(FALSE); + } + + lck->read_count++; + + lck_interlock_unlock(lck, istate); + + return(TRUE); +} + +/* + * Routine: lck_mtx_alloc_init + */ +lck_mtx_t * +lck_mtx_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) +{ + lck_mtx_t *lck; + + if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) + lck_mtx_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_mtx_free + */ +void +lck_mtx_free( + lck_mtx_t *lck, + lck_grp_t *grp) +{ + lck_mtx_destroy(lck, grp); + kfree(lck, sizeof(lck_mtx_t)); +} + +/* + * Routine: lck_mtx_ext_init + */ +static void +lck_mtx_ext_init( + lck_mtx_ext_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) +{ + lck->lck_mtx.lck_mtx_ilk = 0; + lck->lck_mtx.lck_mtx_locked = 0; + lck->lck_mtx.lck_mtx_waiters = 0; + lck->lck_mtx.lck_mtx_pri = 0; + lck->lck_mtx_attr = 0; + + if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { + lck->lck_mtx_deb.pc = 0; + lck->lck_mtx_deb.thread = 0; + lck->lck_mtx_deb.type = MUTEX_TAG; + lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG; + } + + lck->lck_mtx_grp = grp; +} + +/* + * Routine: lck_mtx_init + */ +void +lck_mtx_init( + lck_mtx_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) +{ + lck_mtx_ext_t *lck_ext; + + if ((attr != LCK_ATTR_NULL) && ((attr->lck_attr_val) & LCK_ATTR_DEBUG)) { + if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) { + lck_mtx_ext_init(lck_ext, grp, attr); + lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; + lck->lck_mtx_ptr = lck_ext; + } + } else { + lck->lck_mtx_ilk = 0; + lck->lck_mtx_locked = 0; + lck->lck_mtx_waiters = 0; + lck->lck_mtx_pri = 0; + } + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); +} + +/* + * Routine: lck_mtx_destroy + */ +void +lck_mtx_destroy( + lck_mtx_t *lck, + lck_grp_t *grp) +{ + boolean_t lck_is_indirect; + + if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) + return; + lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); + lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED; + if (lck_is_indirect) + kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t)); + lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); + lck_grp_deallocate(grp); + return; +} + +/* + * Routine: lck_mtx_assert + */ +void +lck_mtx_assert( + __unused lck_mtx_t *lck, + __unused unsigned int type) +{ +} + +#if MACH_KDB + +void db_show_one_lock(lock_t *); + +void +db_show_one_lock( + lock_t *lock) +{ + db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ", + lock->read_count, + lock->want_upgrade ? "" : "!", + lock->want_write ? "" : "!"); + db_printf("%swaiting, %scan_sleep\n", + lock->waiting ? "" : "!", lock->can_sleep ? "" : "!"); + db_printf("Interlock:\n"); + db_show_one_simple_lock((db_expr_t)simple_lock_addr(lock->interlock), + TRUE, (db_expr_t)0, (char *)0); +} + +#endif /* MACH_KDB */ + +/* + * The C portion of the mutex package. These routines are only invoked + * if the optimized assembler routines can't do the work. + */ + +/* + * Routine: lock_alloc + * Function: + * Allocate a mutex for external users who cannot + * hard-code the structure definition into their + * objects. + * For now just use kalloc, but a zone is probably + * warranted. + */ +mutex_t * +mutex_alloc( + unsigned short tag) +{ + mutex_t *m; + + if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0) + mutex_init(m, tag); + return(m); +} + +/* + * Routine: mutex_free + * Function: + * Free a mutex allocated for external users. + * For now just use kfree, but a zone is probably + * warranted. + */ +void +mutex_free( + mutex_t *m) +{ + kfree(m, sizeof(mutex_t)); +} + +/* + * Routine: _mutex_assert + */ +void +_mutex_assert ( + mutex_t *mutex, + unsigned int what) +{ + + thread_t thread = current_thread(); + thread_t holder; + + if (panicstr != NULL) + return; + + holder = (thread_t) mutex->lck_mtx.lck_mtx_locked; + + switch (what) { + case MA_OWNED: + if (thread != holder) + panic("mutex %x not owned\n", mutex); + break; + + case MA_NOTOWNED: + if (thread == holder) + panic("mutex %x owned\n", mutex); + break; + } + +} + +#if MACH_KDB +/* + * Routines to print out simple_locks and mutexes in a nicely-formatted + * fashion. + */ + +char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; +char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; + +void +db_show_one_simple_lock ( + db_expr_t addr, + boolean_t have_addr, + db_expr_t count, + char * modif) +{ + simple_lock_t saddr = (simple_lock_t)addr; + + if (saddr == (simple_lock_t)0 || !have_addr) { + db_error ("No simple_lock\n"); + } +#if USLOCK_DEBUG + else if (saddr->lock_type != USLOCK_TAG) + db_error ("Not a simple_lock\n"); +#endif /* USLOCK_DEBUG */ + + db_printf ("%s\n", simple_lock_labels); + db_print_simple_lock (saddr); +} + +void +db_print_simple_lock ( + simple_lock_t addr) +{ + + db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock)); +#if USLOCK_DEBUG + db_printf (" %08x", addr->debug.lock_thread); + db_printf (" %08x ", addr->debug.duration[1]); + db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY); +#endif /* USLOCK_DEBUG */ + db_printf ("\n"); +} + +void +db_show_one_mutex ( + db_expr_t addr, + boolean_t have_addr, + db_expr_t count, + char * modif) +{ + mutex_t * maddr = (mutex_t *)addr; + + if (maddr == (mutex_t *)0 || !have_addr) + db_error ("No mutex\n"); +#if MACH_LDEBUG + else if (maddr->type != MUTEX_TAG) + db_error ("Not a mutex\n"); +#endif /* MACH_LDEBUG */ + + db_printf ("%s\n", mutex_labels); + db_print_mutex (maddr); +} + +void +db_print_mutex ( + mutex_t * addr) +{ + db_printf ("%08x %6d %7d", + addr, *addr, addr->lck_mtx.lck_mtx_waiters); +#if MACH_LDEBUG + db_printf (" %08x ", addr->thread); + db_printsym (addr->pc, DB_STGY_ANY); +#endif /* MACH_LDEBUG */ + db_printf ("\n"); +} + +#endif /* MACH_KDB */ diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s index 596c8e8bd..a4ba462cc 100644 --- a/osfmk/i386/locore.s +++ b/osfmk/i386/locore.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,9 +48,6 @@ * the rights to redistribute these changes. */ -#include -#include -#include #include #include #include @@ -72,6 +69,25 @@ #define PREEMPT_DEBUG_LOG 0 + +/* + * PTmap is recursive pagemap at top of virtual address space. + * Within PTmap, the page directory can be found (third indirection). +*/ + .globl _PTmap,_PTD,_PTDpde + .set _PTmap,(PTDPTDI << PDESHIFT) + .set _PTD,_PTmap + (PTDPTDI * NBPG) + .set _PTDpde,_PTD + (PTDPTDI * PDESIZE) + +/* + * APTmap, APTD is the alternate recursive pagemap. + * It's used when modifying another process's page tables. + */ + .globl _APTmap,_APTD,_APTDpde + .set _APTmap,(APTDPTDI << PDESHIFT) + .set _APTD,_APTmap + (APTDPTDI * NBPG) + .set _APTDpde,_PTD + (APTDPTDI * PDESIZE) + #if __MACHO__ /* Under Mach-O, etext is a variable which contains * the last text address @@ -84,16 +100,8 @@ #define ETEXT_ADDR $ EXT(etext) #endif -#if NCPUS > 1 - #define CX(addr,reg) addr(,reg,4) -#else -#define CPU_NUMBER(reg) -#define CX(addr,reg) addr - -#endif /* NCPUS > 1 */ - .text locore_start: @@ -161,6 +169,23 @@ LEXT(retry_table_end) ;\ /* * Timing routines. */ +Entry(timer_update) + movl 4(%esp),%ecx + movl 8(%esp),%eax + movl 12(%esp),%edx + movl %eax,TIMER_HIGHCHK(%ecx) + movl %edx,TIMER_LOW(%ecx) + movl %eax,TIMER_HIGH(%ecx) + ret + +Entry(timer_grab) + movl 4(%esp),%ecx +0: movl TIMER_HIGH(%ecx),%edx + movl TIMER_LOW(%ecx),%eax + cmpl TIMER_HIGHCHK(%ecx),%edx + jne 0b + ret + #if STAT_TIME #define TIME_TRAP_UENTRY @@ -168,154 +193,102 @@ LEXT(retry_table_end) ;\ #define TIME_INT_ENTRY #define TIME_INT_EXIT -#else /* microsecond timing */ +#else +/* + * Nanosecond timing. + */ + +/* + * Low 32-bits of nanotime returned in %eax. + * Computed from tsc using conversion scale/shift from per-cpu data. + * Uses %ecx and %edx. + */ +#define NANOTIME32 \ + pushl %esi /* save %esi */ ;\ + movl %gs:CPU_THIS,%esi /* per-cpu data ptr */ ;\ + addl $(CPU_RTC_NANOTIME),%esi /* esi -> per-cpu nanotime*/ ;\ + rdtsc /* edx:eax = tsc */ ;\ + subl RTN_TSC(%esi),%eax /* eax = (tsc - base_tsc) */ ;\ + mull RTN_SCALE(%esi) /* eax *= scale */ ;\ + movl RTN_SHIFT(%esi),%ecx /* ecx = shift */ ;\ + shrdl %cl,%edx,%eax /* edx:eax >> shift */ ;\ + andb $32,%cl /* shift == 32? */ ;\ + cmovnel %edx,%eax /* %eax = %edx if so */ ;\ + addl RTN_NANOS(%esi),%eax /* add base ns */ ;\ + popl %esi /* - * Microsecond timing. - * Assumes a free-running microsecond counter. - * no TIMER_MAX check needed. + * Add 32-bit ns delta in register dreg to timer pointed to by register treg. */ +#define TIMER_UPDATE(treg,dreg) \ + addl TIMER_LOW(treg),dreg /* add delta low bits */ ;\ + adcl $0,TIMER_HIGHCHK(treg) /* add carry check bits */ ;\ + movl dreg,TIMER_LOW(treg) /* store updated low bit */ ;\ + movl TIMER_HIGHCHK(treg),dreg /* copy high check bits */ ;\ + movl dreg,TIMER_HIGH(treg) /* to high bita */ /* - * There is only one current time-stamp per CPU, since only - * the time-stamp in the current timer is used. - * To save time, we allocate the current time-stamps here. + * Add time delta to old timer and start new. */ - .comm EXT(current_tstamp), 4*NCPUS +#define TIMER_EVENT(old,new) \ + pushl %eax /* must be invariant */ ;\ + cli /* block interrupts */ ;\ + NANOTIME32 /* eax low bits nanosecs */ ;\ + movl %gs:CPU_PROCESSOR,%ecx /* get current processor */ ;\ + movl CURRENT_TIMER(%ecx),%ecx /* get current timer */ ;\ + movl %eax,%edx /* save timestamp in %edx */ ;\ + subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ;\ + TIMER_UPDATE(%ecx,%eax) /* update timer struct */ ;\ + addl $(new##_TIMER-old##_TIMER),%ecx /* point to new timer */ ;\ + movl %edx,TIMER_TSTAMP(%ecx) /* set timestamp */ ;\ + movl %gs:CPU_PROCESSOR,%edx /* get current processor */ ;\ + movl %ecx,CURRENT_TIMER(%edx) /* set current timer */ ;\ + sti /* interrupts on */ ;\ + popl %eax /* must be invariant */ /* * Update time on user trap entry. - * 11 instructions (including cli on entry) - * Assumes CPU number in %edx. - * Uses %ebx, %ecx. - */ -#define TIME_TRAP_UENTRY \ - cli /* block interrupts */ ;\ - movl VA_ETC,%ebx /* get timer value */ ;\ - movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\ - movl %ebx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\ - subl %ecx,%ebx /* elapsed = new-old */ ;\ - movl CX(EXT(current_timer),%edx),%ecx /* get current timer */;\ - addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ - jns 0f /* if overflow, */ ;\ - call timer_normalize /* normalize timer */ ;\ -0: addl $(TH_SYS_TIMER-TH_USER_TIMER),%ecx ;\ - /* switch to sys timer */;\ - movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ ;\ - sti /* allow interrupts */ + * Uses %ecx,%edx. + */ +#define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) /* * update time on user trap exit. - * 10 instructions. - * Assumes CPU number in %edx. - * Uses %ebx, %ecx. - */ -#define TIME_TRAP_UEXIT \ - cli /* block interrupts */ ;\ - movl VA_ETC,%ebx /* get timer */ ;\ - movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\ - movl %ebx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\ - subl %ecx,%ebx /* elapsed = new-old */ ;\ - movl CX(EXT(current_timer),%edx),%ecx /* get current timer */;\ - addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\ - jns 0f /* if overflow, */ ;\ - call timer_normalize /* normalize timer */ ;\ -0: addl $(TH_USER_TIMER-TH_SYS_TIMER),%ecx ;\ - /* switch to user timer */;\ - movl %ecx,CX(EXT(current_timer),%edx) /* make it current */ + * Uses %ecx,%edx. + */ +#define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) /* * update time on interrupt entry. - * 9 instructions. - * Assumes CPU number in %edx. - * Leaves old timer in %ebx. - * Uses %ecx. + * Uses %eax,%ecx,%edx. */ #define TIME_INT_ENTRY \ - movl VA_ETC,%ecx /* get timer */ ;\ - movl CX(EXT(current_tstamp),%edx),%ebx /* get old time stamp */;\ - movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\ - subl %ebx,%ecx /* elapsed = new-old */ ;\ - movl CX(EXT(current_timer),%edx),%ebx /* get current timer */;\ - addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\ - leal CX(0,%edx),%ecx /* timer is 16 bytes */ ;\ - lea CX(EXT(kernel_timer),%edx),%ecx /* get interrupt timer*/;\ - movl %ecx,CX(EXT(current_timer),%edx) /* set timer */ + NANOTIME32 /* eax low bits nanosecs */ ;\ + movl %gs:CPU_PROCESSOR,%ecx /* get current processor */ ;\ + movl CURRENT_TIMER(%ecx),%ecx /* get current timer */ ;\ + movl %eax,%edx /* save timestamp in %edx */ ;\ + subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ;\ + TIMER_UPDATE(%ecx,%eax) /* update timer struct */ ;\ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ ;\ + addl $(SYSTEM_TIMER),%ecx /* point to sys timer */ ;\ + movl %edx,TIMER_TSTAMP(%ecx) /* set timestamp */ /* * update time on interrupt exit. - * 11 instructions - * Assumes CPU number in %edx, old timer in %ebx. - * Uses %eax, %ecx. + * Uses %eax, %ecx, %edx. */ #define TIME_INT_EXIT \ - movl VA_ETC,%eax /* get timer */ ;\ - movl CX(EXT(current_tstamp),%edx),%ecx /* get old time stamp */;\ - movl %eax,CX(EXT(current_tstamp),%edx) /* set new time stamp */;\ - subl %ecx,%eax /* elapsed = new-old */ ;\ - movl CX(EXT(current_timer),%edx),%ecx /* get current timer */;\ - addl %eax,LOW_BITS(%ecx) /* add to low bits */ ;\ - jns 0f /* if overflow, */ ;\ - call timer_normalize /* normalize timer */ ;\ -0: testb $0x80,LOW_BITS+3(%ebx) /* old timer overflow? */;\ - jz 0f /* if overflow, */ ;\ - movl %ebx,%ecx /* get old timer */ ;\ - call timer_normalize /* normalize timer */ ;\ -0: movl %ebx,CX(EXT(current_timer),%edx) /* set timer */ - - -/* - * Normalize timer in ecx. - * Preserves edx; clobbers eax. - */ - .align ALIGN -timer_high_unit: - .long TIMER_HIGH_UNIT /* div has no immediate opnd */ - -timer_normalize: - pushl %edx /* save registersz */ - pushl %eax - xorl %edx,%edx /* clear divisor high */ - movl LOW_BITS(%ecx),%eax /* get divisor low */ - divl timer_high_unit,%eax /* quotient in eax */ - /* remainder in edx */ - addl %eax,HIGH_BITS_CHECK(%ecx) /* add high_inc to check */ - movl %edx,LOW_BITS(%ecx) /* remainder to low_bits */ - addl %eax,HIGH_BITS(%ecx) /* add high_inc to high bits */ - popl %eax /* restore register */ - popl %edx - ret - -/* - * Switch to a new timer. - */ -Entry(timer_switch) - CPU_NUMBER(%edx) /* get this CPU */ - movl VA_ETC,%ecx /* get timer */ - movl CX(EXT(current_tstamp),%edx),%eax /* get old time stamp */ - movl %ecx,CX(EXT(current_tstamp),%edx) /* set new time stamp */ - subl %ecx,%eax /* elapsed = new - old */ - movl CX(EXT(current_timer),%edx),%ecx /* get current timer */ - addl %eax,LOW_BITS(%ecx) /* add to low bits */ - jns 0f /* if overflow, */ - call timer_normalize /* normalize timer */ -0: - movl S_ARG0,%ecx /* get new timer */ - movl %ecx,CX(EXT(current_timer),%edx) /* set timer */ - ret + NANOTIME32 /* eax low bits nanosecs */ ;\ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ ;\ + addl $(SYSTEM_TIMER),%ecx /* point to sys timer */ ;\ + movl %eax,%edx /* save timestamp in %edx */ ;\ + subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ;\ + TIMER_UPDATE(%ecx,%eax) /* update timer struct */ ;\ + movl %gs:CPU_PROCESSOR,%ecx /* get current processor */ ;\ + movl CURRENT_TIMER(%ecx),%ecx /* interrupted timer */ ;\ + movl %edx,TIMER_TSTAMP(%ecx) /* set timestamp */ -/* - * Initialize the first timer for a CPU. - */ -Entry(start_timer) - CPU_NUMBER(%edx) /* get this CPU */ - movl VA_ETC,%ecx /* get timer */ - movl %ecx,CX(EXT(current_tstamp),%edx) /* set initial time stamp */ - movl S_ARG0,%ecx /* get timer */ - movl %ecx,CX(EXT(current_timer),%edx) /* set initial timer */ - ret - -#endif /* accurate timing */ +#endif /* STAT_TIME */ /* * Encapsulate the transfer of exception stack frames between a PCB @@ -484,13 +457,9 @@ Entry(db_task_start) movl %eax,R_ERR(%esp) movl %ebx,R_TRAPNO(%esp) pushl %edx -#if NCPUS > 1 CPU_NUMBER(%edx) movl CX(EXT(mp_dbtss),%edx),%edx movl TSS_LINK(%edx),%eax -#else - movl EXT(dbtss)+TSS_LINK,%eax -#endif pushl %eax /* pass along selector of previous TSS */ call EXT(db_tss_to_frame) popl %eax /* get rid of TSS selector */ @@ -703,8 +672,7 @@ trap_set_segs: jnz trap_from_user /* user mode trap if so */ testb $3,R_CS(%esp) /* user mode trap? */ jnz trap_from_user - CPU_NUMBER(%edx) - cmpl $0,CX(EXT(active_kloaded),%edx) + cmpl $0,%gs:CPU_ACTIVE_KLOADED je trap_from_kernel /* if clear, truly in kernel */ #ifdef FIXME cmpl ETEXT_ADDR,R_EIP(%esp) /* pc within kernel? */ @@ -718,25 +686,23 @@ trap_from_kloaded: * up a simulated "uesp" manually, since there's none in the * frame. */ - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs CAH(atstart) - CPU_NUMBER(%edx) - movl CX(EXT(active_kloaded),%edx),%ebx - movl CX(EXT(kernel_stack),%edx),%eax + movl %gs:CPU_ACTIVE_KLOADED,%ebx + movl %gs:CPU_KERNEL_STACK,%eax xchgl %esp,%eax FRAME_STACK_TO_PCB(%ebx,%eax) CAH(atend) jmp EXT(take_trap) trap_from_user: - mov $ CPU_DATA,%ax + mov $ CPU_DATA_GS,%ax mov %ax,%gs - CPU_NUMBER(%edx) TIME_TRAP_UENTRY - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx xchgl %ebx,%esp /* switch to kernel stack */ /* user regs pointer already set */ LEXT(take_trap) @@ -751,11 +717,11 @@ LEXT(take_trap) */ LEXT(return_from_trap) - CPU_NUMBER(%edx) - cmpl $0,CX(EXT(need_ast),%edx) + movl %gs:CPU_PENDING_AST,%edx + cmpl $0,%edx je EXT(return_to_user) /* if we need an AST: */ - movl CX(EXT(kernel_stack),%edx),%esp + movl %gs:CPU_KERNEL_STACK,%esp /* switch to kernel stack */ pushl $0 /* push preemption flag */ call EXT(i386_astintr) /* take the AST */ @@ -771,16 +737,13 @@ LEXT(return_from_trap) */ LEXT(return_to_user) TIME_TRAP_UEXIT - CPU_NUMBER(%eax) - cmpl $0,CX(EXT(active_kloaded),%eax) + cmpl $0,%gs:CPU_ACTIVE_KLOADED jnz EXT(return_xfer_stack) - movl $ CPD_ACTIVE_THREAD,%ebx - movl %gs:(%ebx),%ebx /* get active thread */ + movl %gs:CPU_ACTIVE_THREAD, %ebx /* get active thread */ #if MACH_RT #if MACH_ASSERT - movl $ CPD_PREEMPTION_LEVEL,%ebx - cmpl $0,%gs:(%ebx) + cmpl $0,%gs:CPU_PREEMPTION_LEVEL je EXT(return_from_kernel) int $3 #endif /* MACH_ASSERT */ @@ -814,9 +777,8 @@ LEXT(return_xfer_stack) * to do so for us. */ CAH(rxsstart) - CPU_NUMBER(%eax) - movl CX(EXT(kernel_stack),%eax),%esp - movl CX(EXT(active_kloaded),%eax),%eax + movl %gs:CPU_KERNEL_STACK,%esp + movl %gs:CPU_ACTIVE_KLOADED,%eax FRAME_PCB_TO_STACK(%eax) movl %eax,%esp CAH(rxsend) @@ -837,14 +799,12 @@ LEXT(return_xfer_stack) * stack pointer. */ LEXT(return_kernel_loading) - CPU_NUMBER(%eax) - movl CX(EXT(kernel_stack),%eax),%esp - movl $ CPD_ACTIVE_THREAD,%ebx - movl %gs:(%ebx),%ebx /* get active thread */ + movl %gs:CPU_KERNEL_STACK,%esp + movl %gs:CPU_ACTIVE_THREAD, %ebx /* get active thread */ movl %ebx,%edx /* save for later */ FRAME_PCB_TO_STACK(%ebx) movl %ebx,%esp /* start running on new stack */ - movl $0,CX(EXT(active_kloaded),%eax) /* set cached indicator */ + movl $0,%gs:CPU_ACTIVE_KLOADED /* set cached indicator */ jmp EXT(return_from_kernel) /* @@ -852,7 +812,7 @@ LEXT(return_kernel_loading) */ trap_from_kernel: #if MACH_KDB || MACH_KGDB - mov $ CPU_DATA,%ax + mov $ CPU_DATA_GS,%ax mov %ax,%gs movl %esp,%ebx /* save current stack */ @@ -888,11 +848,9 @@ trap_from_kernel: #if MACH_KDB cmpl $0,EXT(db_active) /* could trap be from ddb? */ je 3f /* no */ -#if NCPUS > 1 CPU_NUMBER(%edx) /* see if this CPU is in ddb */ cmpl $0,CX(EXT(kdb_active),%edx) je 3f /* no */ -#endif /* NCPUS > 1 */ pushl %esp call EXT(db_trap_from_asm) addl $0x4,%esp @@ -923,14 +881,13 @@ trap_from_kernel: 4: #endif /* MACH_KDB */ - CPU_NUMBER(%edx) /* get CPU number */ - cmpl CX(EXT(kernel_stack),%edx),%esp + cmpl %gs:CPU_KERNEL_STACK,%esp /* if not already on kernel stack, */ ja 5f /* check some more */ - cmpl CX(EXT(active_stacks),%edx),%esp + cmpl %gs:CPU_ACTIVE_STACK,%esp ja 6f /* on kernel stack: no switch */ 5: - movl CX(EXT(kernel_stack),%edx),%esp + movl %gs:CPU_KERNEL_STACK,%esp 6: pushl %ebx /* save old stack */ pushl %ebx /* pass as parameter */ @@ -974,16 +931,12 @@ trap_from_kernel: #endif /* MACH_KDB || MACH_KGDB */ #if MACH_RT - CPU_NUMBER(%edx) - - movl CX(EXT(need_ast),%edx),%eax /* get pending asts */ + movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ testl $ AST_URGENT,%eax /* any urgent preemption? */ je EXT(return_from_kernel) /* no, nothing to do */ - cmpl $0,EXT(preemptable) /* kernel-mode, preemption enabled? */ - je EXT(return_from_kernel) /* no, skip it */ cmpl $ T_PREEMPT,48(%esp) /* preempt request? */ jne EXT(return_from_kernel) /* no, nothing to do */ - movl CX(EXT(kernel_stack),%edx),%eax + movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx andl $(-KERNEL_STACK_SIZE),%ecx @@ -1025,12 +978,20 @@ LEXT(thread_bootstrap_return) Entry(call_continuation) movl S_ARG0,%eax /* get continuation */ - movl %esp,%ecx /* get kernel stack */ - or $(KERNEL_STACK_SIZE-1),%ecx - addl $(-3-IKS_SIZE),%ecx - movl %ecx,%esp /* pop the stack */ + movl S_ARG1,%edx /* continuation param */ + movl S_ARG2,%ecx /* wait result */ + movl %esp,%ebp /* get kernel stack */ + or $(KERNEL_STACK_SIZE-1),%ebp + addl $(-3-IKS_SIZE),%ebp + movl %ebp,%esp /* pop the stack */ xorl %ebp,%ebp /* zero frame pointer */ - jmp *%eax /* goto continuation */ + pushl %ecx + pushl %edx + call *%eax /* call continuation */ + addl $8,%esp + movl %gs:CPU_ACTIVE_THREAD,%eax + pushl %eax + call EXT(thread_terminate) #if 0 #define LOG_INTERRUPT(info,msg) \ @@ -1067,9 +1028,6 @@ Entry(all_intrs) pushl %edx cld /* clear direction flag */ - cmpl %ss:EXT(int_stack_high),%esp /* on an interrupt stack? */ - jb int_from_intstack /* if not: */ - pushl %ds /* save segment registers */ pushl %es pushl %fs @@ -1077,59 +1035,50 @@ Entry(all_intrs) mov %ss,%dx /* switch to kernel segments */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs - CPU_NUMBER(%edx) - - movl CX(EXT(int_stack_top),%edx),%ecx + /* + * test whether already on interrupt stack + */ + movl %gs:CPU_INT_STACK_TOP,%ecx + cmpl %esp,%ecx + jb 1f + leal -INTSTACK_SIZE(%ecx),%edx + cmpl %esp,%edx + jb int_from_intstack +1: movl %esp,%edx /* & i386_interrupt_state */ xchgl %ecx,%esp /* switch to interrupt stack */ -#if STAT_TIME pushl %ecx /* save pointer to old stack */ -#else - pushl %ebx /* save %ebx - out of the way */ - /* so stack looks the same */ - pushl %ecx /* save pointer to old stack */ - TIME_INT_ENTRY /* do timing */ -#endif - pushl %edx /* pass &i386_interrupt_state to pe_incoming_interrupt */ + pushl %eax /* push trap number */ + TIME_INT_ENTRY /* do timing */ + #if MACH_RT - movl $ CPD_PREEMPTION_LEVEL,%edx - incl %gs:(%edx) + incl %gs:CPU_PREEMPTION_LEVEL #endif /* MACH_RT */ + incl %gs:CPU_INTERRUPT_LEVEL - movl $ CPD_INTERRUPT_LEVEL,%edx - incl %gs:(%edx) - - pushl %eax /* Push trap number */ call EXT(PE_incoming_interrupt) /* call generic interrupt routine */ addl $8,%esp /* Pop trap number and eip */ .globl EXT(return_to_iret) LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ - movl $ CPD_INTERRUPT_LEVEL,%edx - decl %gs:(%edx) + decl %gs:CPU_INTERRUPT_LEVEL #if MACH_RT - movl $ CPD_PREEMPTION_LEVEL,%edx - decl %gs:(%edx) + decl %gs:CPU_PREEMPTION_LEVEL #endif /* MACH_RT */ -#if STAT_TIME -#else TIME_INT_EXIT /* do timing */ - movl 4(%esp),%ebx /* restore the extra reg we saved */ -#endif popl %esp /* switch back to old stack */ - CPU_NUMBER(%edx) - movl CX(EXT(need_ast),%edx),%eax + movl %gs:CPU_PENDING_AST,%eax testl %eax,%eax /* any pending asts? */ je 1f /* no, nothing to do */ testl $(EFL_VM),I_EFL(%esp) /* if in V86 */ @@ -1142,16 +1091,13 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ #endif #if MACH_RT - cmpl $0,EXT(preemptable) /* kernel-mode, preemption enabled? */ - je 1f /* no, skip it */ - movl $ CPD_PREEMPTION_LEVEL,%ecx - cmpl $0,%gs:(%ecx) /* preemption masked? */ + cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption masked? */ jne 1f /* yes, skip it */ testl $ AST_URGENT,%eax /* any urgent requests? */ je 1f /* no, skip it */ cmpl $ EXT(locore_end),I_EIP(%esp) /* are we in locore code? */ jb 1f /* yes, skip it */ - movl CX(EXT(kernel_stack),%edx),%eax + movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx andl $(-KERNEL_STACK_SIZE),%ecx @@ -1193,32 +1139,27 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ int_from_intstack: #if MACH_RT - movl $ CPD_PREEMPTION_LEVEL,%edx - incl %gs:(%edx) + incl %gs:CPU_PREEMPTION_LEVEL #endif /* MACH_RT */ - movl $ CPD_INTERRUPT_LEVEL,%edx - incl %gs:(%edx) + incl %gs:CPU_INTERRUPT_LEVEL - subl $16, %esp /* dummy ds, es, fs, gs */ - movl %esp, %edx /* &i386_interrupt_state */ + movl %esp, %edx /* i386_interrupt_state */ pushl %edx /* pass &i386_interrupt_state to PE_incoming_interrupt /* pushl %eax /* Push trap number */ call EXT(PE_incoming_interrupt) - addl $20,%esp /* pop i386_interrupt_state, dummy gs,fs,es,ds */ + addl $20,%esp /* pop i386_interrupt_state, gs,fs,es,ds */ LEXT(return_to_iret_i) /* ( label for kdb_kintr) */ addl $4,%esp /* pop trap number */ - movl $ CPD_INTERRUPT_LEVEL,%edx - decl %gs:(%edx) + decl %gs:CPU_INTERRUPT_LEVEL #if MACH_RT - movl $ CPD_PREEMPTION_LEVEL,%edx - decl %gs:(%edx) + decl %gs:CPU_PREEMPTION_LEVEL #endif /* MACH_RT */ pop %edx /* must have been on kernel segs */ @@ -1259,7 +1200,7 @@ ast_from_interrupt: mov %ss,%dx /* switch to kernel segments */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -1280,8 +1221,8 @@ ast_from_interrupt: * Transfer the current stack frame by hand into the PCB. */ CAH(afistart) - movl CX(EXT(active_kloaded),%edx),%eax - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_ACTIVE_KLOADED,%eax + movl %gs:CPU_KERNEL_STACK,%ebx xchgl %ebx,%esp FRAME_STACK_TO_PCB(%eax,%ebx) CAH(afiend) @@ -1290,7 +1231,7 @@ ast_from_interrupt: 0: TIME_TRAP_UENTRY - movl CX(EXT(kernel_stack),%edx),%eax + movl %gs:CPU_KERNEL_STACK,%eax /* switch to kernel stack */ xchgl %eax,%esp 3: @@ -1378,12 +1319,7 @@ Entry(kdb_kintr) */ kdb_from_iret: /* save regs in known locations */ -#if STAT_TIME pushl %ebx /* caller`s %ebx is in reg */ -#else - movl 4(%esp),%eax /* get caller`s %ebx */ - pushl %eax /* push on stack */ -#endif pushl %ebp pushl %esi pushl %edi @@ -1405,12 +1341,7 @@ kdb_from_iret: popl %edi popl %esi popl %ebp -#if STAT_TIME popl %ebx -#else - popl %eax - movl %eax,4(%esp) -#endif jmp EXT(return_to_iret) /* normal interrupt return */ kdb_from_iret_i: /* on interrupt stack */ @@ -1469,7 +1400,7 @@ Entry(mach_rpc) mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -1483,7 +1414,6 @@ Entry(mach_rpc) movl %edx,R_CS(%esp) /* fix cs */ movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - CPU_NUMBER(%edx) TIME_TRAP_UENTRY negl %eax /* get system call number */ @@ -1498,22 +1428,20 @@ Entry(mach_rpc) * up a simulated "uesp" manually, since there's none in the * frame. */ - cmpl $0,CX(EXT(active_kloaded),%edx) + cmpl $0,%gs:CPU_ACTIVE_KLOADED jz 2f CAH(mrstart) - movl CX(EXT(active_kloaded),%edx),%ebx - movl CX(EXT(kernel_stack),%edx),%edx + movl %gs:CPU_ACTIVE_KLOADED,%ebx + movl %gs:CPU_KERNEL_STACK,%edx xchgl %edx,%esp FRAME_STACK_TO_PCB(%ebx,%edx) CAH(mrend) - CPU_NUMBER(%edx) jmp 3f 2: - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ @@ -1538,8 +1466,7 @@ Entry(mach_rpc) movl R_UESP(%ebx),%esi /* get user stack pointer */ lea 4(%esi,%ecx,4),%esi /* skip user return address, */ /* and point past last argument */ - /* edx holds cpu number from above */ - movl CX(EXT(active_kloaded),%edx),%edx + movl %gs:CPU_ACTIVE_KLOADED,%edx /* point to current thread */ orl %edx,%edx /* if ! kernel-loaded, check addr */ jz 4f /* else */ @@ -1562,25 +1489,12 @@ Entry(mach_rpc) /* * Register use on entry: - * eax contains syscall number - * ebx contains user regs pointer + * eax contains syscall number << 4 + * mach_call_munger is declared regparm(1), so the first arg is %eax */ 2: - pushl %ebx /* arg ptr */ - pushl %eax /* call # - preserved across */ - call EXT(mach_call_start) - addl $ 8, %esp - movl %eax, %ebx /* need later */ - - CAH(call_call) - call *EXT(mach_trap_table)+4(%eax) - /* call procedure */ - - pushl %eax /* retval */ - pushl %ebx /* call # */ - call EXT(mach_call_end) - addl $ 8, %esp + call EXT(mach_call_munger) movl %esp,%ecx /* get kernel stack */ or $(KERNEL_STACK_SIZE-1),%ecx @@ -1615,7 +1529,7 @@ Entry(syscall_int80) mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs jmp syscall_entry_3 @@ -1650,7 +1564,7 @@ syscall_entry_2: mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -1665,7 +1579,6 @@ syscall_entry_2: movl %ebx,R_EFLAGS(%esp) /* fix eflags */ syscall_entry_3: - CPU_NUMBER(%edx) /* * Check here for syscall from kernel-loaded task -- * We didn't enter here "through" PCB (i.e., using ring 0 stack), @@ -1674,59 +1587,33 @@ syscall_entry_3: * up a simulated "uesp" manually, since there's none in the * frame. */ - cmpl $0,CX(EXT(active_kloaded),%edx) + cmpl $0,%gs:CPU_ACTIVE_KLOADED jz 0f CAH(scstart) - movl CX(EXT(active_kloaded),%edx),%ebx - movl CX(EXT(kernel_stack),%edx),%edx + movl %gs:CPU_ACTIVE_KLOADED,%ebx + movl %gs:CPU_KERNEL_STACK,%edx xchgl %edx,%esp FRAME_STACK_TO_PCB(%ebx,%edx) CAH(scend) TIME_TRAP_UENTRY - CPU_NUMBER(%edx) jmp 1f 0: TIME_TRAP_UENTRY - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ /* user regs pointer already set */ -/* - * Check for MACH or emulated system call - * Register use (from here till we begin processing call): - * eax contains system call number - * ebx points to user regs - */ -1: - movl $ CPD_ACTIVE_THREAD,%edx - movl %gs:(%edx),%edx /* get active thread */ - movl ACT_TASK(%edx),%edx /* point to task */ - movl TASK_EMUL(%edx),%edx /* get emulation vector */ - orl %edx,%edx /* if none, */ - je syscall_native /* do native system call */ - movl %eax,%ecx /* copy system call number */ - subl DISP_MIN(%edx),%ecx /* get displacement into syscall */ - /* vector table */ - jl syscall_native /* too low - native system call */ - cmpl DISP_COUNT(%edx),%ecx /* check range */ - jnl syscall_native /* too high - native system call */ - movl DISP_VECTOR(%edx,%ecx,4),%edx - /* get the emulation vector */ - orl %edx,%edx /* emulated system call if not zero */ - jnz syscall_emul - /* * Native system call. * Register use on entry: * eax contains syscall number * ebx points to user regs */ -syscall_native: +1: negl %eax /* get system call number */ jl mach_call_range /* out of range if it was positive */ @@ -1738,13 +1625,6 @@ syscall_native: /* get procedure */ cmpl $ EXT(kern_invalid),%edx /* if not "kern_invalid" */ jne do_native_call /* go on with Mach syscall */ - - movl $ CPD_ACTIVE_THREAD,%edx - movl %gs:(%edx),%edx /* get active thread */ - movl ACT_TASK(%edx),%edx /* point to task */ - movl TASK_EMUL(%edx),%edx /* get emulation vector */ - orl %edx,%edx /* if it exists, */ - jne do_native_call /* do native system call */ shrl $4,%eax /* restore syscall number */ jmp mach_call_range /* try it as a "server" syscall */ @@ -1760,8 +1640,7 @@ do_native_call: movl R_UESP(%ebx),%esi /* get user stack pointer */ lea 4(%esi,%ecx,4),%esi /* skip user return address, */ /* and point past last argument */ - CPU_NUMBER(%edx) - movl CX(EXT(active_kloaded),%edx),%edx + movl %gs:CPU_ACTIVE_KLOADED,%edx /* point to current thread */ orl %edx,%edx /* if kernel-loaded, skip addr check */ jz 0f /* else */ @@ -1809,18 +1688,10 @@ mach_call_call: make_syscall: - pushl %ebx /* arg ptr */ - pushl %eax /* call # - preserved across */ - call EXT(mach_call_start) - addl $ 8, %esp - movl %eax, %ebx /* need later */ - - call *EXT(mach_trap_table)+4(%eax) /* call procedure */ - - pushl %eax /* retval */ - pushl %ebx /* call # */ - call EXT(mach_call_end) - addl $ 8, %esp +/* + * mach_call_munger is declared regparm(1) so the first arg is %eax + */ + call EXT(mach_call_munger) skip_syscall: @@ -1854,20 +1725,13 @@ mach_call_addr: * eax contains syscall number */ mach_call_range: - movl $ CPD_ACTIVE_THREAD,%edx - movl %gs:(%edx),%edx /* get active thread */ - movl ACT_TASK(%edx),%edx /* point to task */ - movl TASK_EMUL(%edx),%edx /* get emulation vector */ - orl %edx,%edx /* if emulator, */ - jne EXT(syscall_failed) /* handle as illegal instruction */ - /* else generate syscall exception: */ push %eax movl %esp,%edx push $1 /* code_cnt = 1 */ push %edx /* exception_type_t (see i/f docky) */ push $ EXC_SYSCALL CAH(call_range) - call EXT(exception) + call EXT(exception_triage) /* no return */ .globl EXT(syscall_failed) @@ -1875,8 +1739,7 @@ LEXT(syscall_failed) movl %esp,%ecx /* get kernel stack */ or $(KERNEL_STACK_SIZE-1),%ecx movl -3-IKS_SIZE(%ecx),%esp /* switch back to PCB stack */ - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ @@ -1888,64 +1751,6 @@ LEXT(syscall_failed) CAH(failed) jmp EXT(take_trap) /* treat as a trap */ -/* - * User space emulation of system calls. - * edx - user address to handle syscall - * - * User stack will become: - * uesp-> eflags - * eip - * Register use on entry: - * ebx contains user regs pointer - * edx contains emulator vector address - */ -syscall_emul: - movl R_UESP(%ebx),%edi /* get user stack pointer */ - CPU_NUMBER(%eax) - movl CX(EXT(active_kloaded),%eax),%eax - orl %eax,%eax /* if thread not kernel-loaded, */ - jz 0f /* do address checks */ - subl $8,%edi - mov %ds,%ax /* kernel data segment access */ - jmp 1f /* otherwise, skip them */ -0: - cmpl $(VM_MAX_ADDRESS),%edi /* in user space? */ - ja syscall_addr /* address error if not */ - subl $8,%edi /* push space for new arguments */ - cmpl $(VM_MIN_ADDRESS),%edi /* still in user space? */ - jb syscall_addr /* error if not */ - movl $ USER_DS,%ax /* user data segment access */ -1: - mov %ax,%fs - movl R_EFLAGS(%ebx),%eax /* move flags */ - RECOVERY_SECTION - RECOVER(syscall_addr) - movl %eax,%fs:0(%edi) /* to user stack */ - movl R_EIP(%ebx),%eax /* move eip */ - RECOVERY_SECTION - RECOVER(syscall_addr) - movl %eax,%fs:4(%edi) /* to user stack */ - movl %edi,R_UESP(%ebx) /* set new user stack pointer */ - movl %edx,R_EIP(%ebx) /* change return address to trap */ - movl %ebx,%esp /* back to PCB stack */ - CAH(emul) - jmp EXT(return_from_trap) /* return to user */ - - -/* - * Address error - address is in %edi. - * Register use on entry: - * ebx contains user regs pointer - */ -syscall_addr: - movl %edi,R_CR2(%ebx) /* set fault address */ - movl $(T_PAGE_FAULT),R_TRAPNO(%ebx) - /* set page-fault trap */ - movl $(T_PF_USER),R_ERR(%ebx) - /* set error code - read user space */ - CAH(addr) - jmp EXT(take_trap) /* treat as a trap */ - /* */ /* * Utility routines. @@ -1969,8 +1774,7 @@ ENTRY(copyin) lea 0(%esi,%edx),%eax /* get user end address + 1 */ - movl $ CPD_ACTIVE_THREAD,%ecx - movl %gs:(%ecx),%ecx /* get active thread */ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get active thread */ movl ACT_MAP(%ecx),%ecx /* get act->map */ movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ cmpl EXT(kernel_pmap), %ecx @@ -2023,8 +1827,7 @@ Entry(copyinstr) lea 0(%esi,%edx),%eax /* get user end address + 1 */ - movl $ CPD_ACTIVE_THREAD,%ecx - movl %gs:(%ecx),%ecx /* get active thread */ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get active thread */ movl ACT_MAP(%ecx),%ecx /* get act->map */ movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ cmpl EXT(kernel_pmap), %ecx @@ -2088,8 +1891,7 @@ ENTRY(copyout) leal 0(%edi,%edx),%eax /* get user end address + 1 */ - movl $ CPD_ACTIVE_THREAD,%ecx - movl %gs:(%ecx),%ecx /* get active thread */ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get active thread */ movl ACT_MAP(%ecx),%ecx /* get act->map */ movl MAP_PMAP(%ecx),%ecx /* get map->pmap */ cmpl EXT(kernel_pmap), %ecx @@ -2238,12 +2040,8 @@ ENTRY(_fprestore) * Set cr3 */ ENTRY(set_cr3) -#if NCPUS > 1 CPU_NUMBER(%eax) orl 4(%esp), %eax -#else /* NCPUS > 1 && AT386 */ - movl 4(%esp),%eax /* get new cr3 value */ -#endif /* NCPUS > 1 && AT386 */ /* * Don't set PDBR to a new value (hence invalidating the * "paging cache") if the new value matches the current one. @@ -2260,9 +2058,7 @@ ENTRY(set_cr3) */ ENTRY(get_cr3) movl %cr3,%eax -#if NCPUS > 1 andl $(~0x7), %eax /* remove cpu number */ -#endif /* NCPUS > 1 && AT386 */ ret /* @@ -2670,6 +2466,24 @@ kdp_vm_read_fail: ret #endif +/* + * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi) + */ +ENTRY(rdmsr_carefully) + movl S_ARG0, %ecx + RECOVERY_SECTION + RECOVER(rdmsr_fail) + rdmsr + movl S_ARG1, %ecx + movl %eax, (%ecx) + movl S_ARG2, %ecx + movl %edx, (%ecx) + movl $0, %eax + ret + +rdmsr_fail: + movl $1, %eax + ret /* * Done with recovery and retry tables. @@ -2744,14 +2558,6 @@ ENTRY(dr3) ret .data - -DATA(preemptable) /* Not on an MP (makes cpu_number() usage unsafe) */ -#if MACH_RT && (NCPUS == 1) - .long 0 /* FIXME -- Currently disabled */ -#else - .long 0 /* FIX ME -- Currently disabled */ -#endif /* MACH_RT && (NCPUS == 1) */ - dr_msk: .long ~0x000f0003 .long ~0x00f0000c @@ -2881,8 +2687,6 @@ ENTRY(etap_time_sub) #endif /* ETAP */ -#if NCPUS > 1 - ENTRY(minsecurity) pushl %ebp movl %esp,%ebp @@ -2893,8 +2697,6 @@ ENTRY(minsecurity) ENTRY(jail) jmp EXT(jail) -#endif /* NCPUS > 1 */ - /* * unsigned int * div_scale(unsigned int dividend, @@ -2989,7 +2791,7 @@ trap_unix_2: mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -3003,14 +2805,12 @@ trap_unix_2: movl %edx,R_CS(%esp) /* fix cs */ movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - CPU_NUMBER(%edx) TIME_TRAP_UENTRY negl %eax /* get system call number */ shll $4,%eax /* manual indexing */ - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ @@ -3048,7 +2848,7 @@ Entry(trap_machdep_syscall) mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -3062,14 +2862,12 @@ Entry(trap_machdep_syscall) movl %edx,R_CS(%esp) /* fix cs */ movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - CPU_NUMBER(%edx) TIME_TRAP_UENTRY negl %eax /* get system call number */ shll $4,%eax /* manual indexing */ - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ @@ -3103,7 +2901,7 @@ Entry(trap_mach25_syscall) mov %ss,%dx /* switch to kernel data segment */ mov %dx,%ds mov %dx,%es - mov $ CPU_DATA,%dx + mov $ CPU_DATA_GS,%dx mov %dx,%gs /* @@ -3117,14 +2915,12 @@ Entry(trap_mach25_syscall) movl %edx,R_CS(%esp) /* fix cs */ movl %ebx,R_EFLAGS(%esp) /* fix eflags */ - CPU_NUMBER(%edx) TIME_TRAP_UENTRY negl %eax /* get system call number */ shll $4,%eax /* manual indexing */ - CPU_NUMBER(%edx) - movl CX(EXT(kernel_stack),%edx),%ebx + movl %gs:CPU_KERNEL_STACK,%ebx /* get current kernel stack */ xchgl %ebx,%esp /* switch stacks - %ebx points to */ /* user registers. */ diff --git a/osfmk/i386/loose_ends.c b/osfmk/i386/loose_ends.c index 9883b6c28..4ce9fd757 100644 --- a/osfmk/i386/loose_ends.c +++ b/osfmk/i386/loose_ends.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,22 +57,61 @@ #include #include #include -#include #include #include +#include +#include +#include +#include +#include +#include +#include + +/* XXX - should be gone from here */ +extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys); +extern void flush_dcache64(addr64_t addr, unsigned count, int phys); +extern boolean_t phys_page_exists(ppnum_t); +extern pt_entry_t *pmap_mapgetpte(vm_map_t, vm_offset_t); +extern void bcopy_no_overwrite(const char *from, char *to,vm_size_t bytes); +extern void pmap_set_reference(ppnum_t pn); +extern void mapping_set_mod(ppnum_t pa); +extern void mapping_set_ref(ppnum_t pn); +extern void switch_to_serial_console(void); +extern kern_return_t copyp2p(vm_offset_t source, + vm_offset_t dest, + unsigned int size, + unsigned int flush_action); +extern void fillPage(ppnum_t pa, unsigned int fill); +extern void ovbcopy(const char *from, + char *to, + vm_size_t nbytes); +void machine_callstack(natural_t *buf, vm_size_t callstack_max); + #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL) #define low32(x) ((unsigned int)((x) & 0x00000000FFFFFFFFLL)) - /* - * Should be rewritten in asm anyway. - */ - void -bzero_phys(addr64_t p, uint32_t len) +bzero_phys( + addr64_t src64, + vm_size_t bytes) { - bzero((char *)phystokv(low32(p)), len); + vm_offset_t src = low32(src64); + pt_entry_t save2; + mp_disable_preemption(); + if (*(pt_entry_t *) CM2) + panic("bzero_phys: CMAP busy"); + + *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + save2=*(pt_entry_t *)CM2; + invlpg((u_int)CA2); + + bzero((void *)((unsigned int)CA2 | (src & INTEL_OFFMASK)), bytes); + if (save2 != *(pt_entry_t *)CM2) panic("bzero_phys CMAP changed"); + *(pt_entry_t *) CM2 = 0; + mp_enable_preemption(); } /* @@ -85,9 +124,12 @@ bzero_phys(addr64_t p, uint32_t len) * if flush_action == 3, flush both source and dest */ -extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); - -kern_return_t copyp2p(vm_offset_t source, vm_offset_t dest, unsigned int size, unsigned int flush_action) { +kern_return_t +copyp2p(vm_offset_t source, + vm_offset_t dest, + unsigned int size, + unsigned int flush_action) +{ switch(flush_action) { case 1: @@ -120,49 +162,41 @@ kern_return_t copyp2p(vm_offset_t source, vm_offset_t dest, unsigned int size, u return KERN_SUCCESS; } - - -/* - * Copies data from a physical page to a virtual page. This is used to - * move data from the kernel to user state. - * - */ -#if 0 -kern_return_t -copyp2v(char *from, char *to, unsigned int size) { - - return(copyout(phystokv(from), to, size)); -} -#endif - -/* - * Copies data from a virtual page to a physical page. This is used to - * move data from the user address space into the kernel. - * - */ -#if 0 -kern_return_t -copyv2p(char *from, char *to, unsigned int size) { - - return(copyin(from, phystokv(to), size)); -} -#endif - /* * bcopy_phys - like bcopy but copies from/to physical addresses. - * this is trivial since all phys mem is mapped into - * kernel virtual space */ void -bcopy_phys(addr64_t from, addr64_t to, vm_size_t bytes) +bcopy_phys( + addr64_t src64, + addr64_t dst64, + vm_size_t bytes) { - /* this will die horribly if we ever run off the end of a page */ - if ( value_64bit(from) || value_64bit(to)) panic("bcopy_phys: 64 bit value"); - bcopy((char *)phystokv(low32(from)), - (char *)phystokv(low32(to)), bytes); -} + vm_offset_t src = low32(src64); + vm_offset_t dst = low32(dst64); + pt_entry_t save1,save2; + /* ensure we stay within a page */ + if ( (((src & (NBPG-1)) + bytes) > NBPG) || + (((dst & (NBPG-1)) + bytes) > NBPG) ) panic("bcopy_phys"); + mp_disable_preemption(); + if (*(pt_entry_t *) CM1 || *(pt_entry_t *) CM2) + panic("bcopy_phys: CMAP busy"); + + *(pt_entry_t *) CM1 = INTEL_PTE_VALID | (src & PG_FRAME) | INTEL_PTE_REF; + *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (dst & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + save1 = *(pt_entry_t *)CM1;save2 = *(pt_entry_t *)CM2; + invlpg((u_int)CA1); + invlpg((u_int)CA2); + + bcopy((void *) ((uintptr_t)CA1 | (src & INTEL_OFFMASK)), + (void *) ((uintptr_t)CA2 | (dst & INTEL_OFFMASK)), bytes); + if ( (save1 != *(pt_entry_t *)CM1) || (save2 != *(pt_entry_t *)CM2)) panic("bcopy_phys CMAP changed"); + *(pt_entry_t *) CM1 = 0; + *(pt_entry_t *) CM2 = 0; + mp_enable_preemption(); +} /* * ovbcopy - like bcopy, but recognizes overlapping ranges and handles @@ -189,20 +223,265 @@ ovbcopy( } } -void -bcopy( - const char *from, - char *to, - vm_size_t bytes) /* num bytes to copy */ + +/* + * Read data from a physical address. Memory should not be cache inhibited. + */ + + +static unsigned int +ml_phys_read_data( vm_offset_t paddr, int size ) +{ + unsigned int result; + pt_entry_t save; + mp_disable_preemption(); + if (*(pt_entry_t *) CM3) + panic("ml_phys_read_data: CMAP busy"); + + *(pt_entry_t *) CM3 = INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF; + save = *(pt_entry_t *)CM3; + invlpg((u_int)CA3); + + + switch (size) { + unsigned char s1; + unsigned short s2; + case 1: + s1 = *(unsigned char *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + result = s1; + break; + case 2: + s2 = *(unsigned short *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + result = s2; + break; + case 4: + default: + result = *(unsigned int *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + break; + } + + if (save != *(pt_entry_t *)CM3) panic("ml_phys_read_data CMAP changed"); + *(pt_entry_t *) CM3 = 0; + mp_enable_preemption(); + return result; +} + +static unsigned long long +ml_phys_read_long_long( vm_offset_t paddr ) +{ + unsigned long long result; + pt_entry_t save; + mp_disable_preemption(); + if (*(pt_entry_t *) CM3) + panic("ml_phys_read_data: CMAP busy"); + + *(pt_entry_t *) CM3 = INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF; + save = *(pt_entry_t *)CM3; + invlpg((u_int)CA3); + + result = *(unsigned long long *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)); + + if (save != *(pt_entry_t *)CM3) panic("ml_phys_read_data CMAP changed"); + *(pt_entry_t *) CM3 = 0; + mp_enable_preemption(); + return result; +} + +unsigned int ml_phys_read( vm_offset_t paddr) +{ + return ml_phys_read_data(paddr, 4); +} + +unsigned int ml_phys_read_word(vm_offset_t paddr) { + return ml_phys_read_data(paddr, 4); +} + +unsigned int ml_phys_read_64(addr64_t paddr64) +{ + return ml_phys_read_data(low32(paddr64), 4); +} + +unsigned int ml_phys_read_word_64(addr64_t paddr64) +{ + return ml_phys_read_data(low32(paddr64), 4); +} + +unsigned int ml_phys_read_half(vm_offset_t paddr) +{ + return ml_phys_read_data(paddr, 2); +} + +unsigned int ml_phys_read_half_64(addr64_t paddr64) +{ + return ml_phys_read_data(low32(paddr64), 2); +} + +unsigned int ml_phys_read_byte(vm_offset_t paddr) +{ + return ml_phys_read_data(paddr, 1); +} + +unsigned int ml_phys_read_byte_64(addr64_t paddr64) +{ + return ml_phys_read_data(low32(paddr64), 1); +} + +unsigned long long ml_phys_read_double(vm_offset_t paddr) +{ + return ml_phys_read_long_long(paddr); +} + +unsigned long long ml_phys_read_double_64(addr64_t paddr) +{ + return ml_phys_read_long_long(low32(paddr)); +} + + +/* + * Write data to a physical address. Memory should not be cache inhibited. + */ + +static void +ml_phys_write_data( vm_offset_t paddr, unsigned long data, int size ) +{ + pt_entry_t save; + mp_disable_preemption(); + if (*(pt_entry_t *) CM3) + panic("ml_phys_write_data: CMAP busy"); + + *(pt_entry_t *) CM3 = INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + save = *(pt_entry_t *)CM3; + invlpg((u_int)CA3); + + switch (size) { + case 1: + *(unsigned char *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = (unsigned char)data; + break; + case 2: + *(unsigned short *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = (unsigned short)data; + break; + case 4: + default: + *(unsigned int *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = data; + break; + } + + if (save != *(pt_entry_t *)CM3) panic("ml_phys_write_data CMAP changed"); + *(pt_entry_t *) CM3 = 0; + mp_enable_preemption(); +} + +static void +ml_phys_write_long_long( vm_offset_t paddr, unsigned long long data ) +{ + pt_entry_t save; + mp_disable_preemption(); + if (*(pt_entry_t *) CM3) + panic("ml_phys_write_data: CMAP busy"); + + *(pt_entry_t *) CM3 = INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + save = *(pt_entry_t *)CM3; + invlpg((u_int)CA3); + + *(unsigned long long *)((unsigned int)CA3 | (paddr & INTEL_OFFMASK)) = data; + + if (save != *(pt_entry_t *)CM3) panic("ml_phys_write_data CMAP changed"); + *(pt_entry_t *) CM3 = 0; + mp_enable_preemption(); +} + +void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data(paddr, data, 1); +} + +void ml_phys_write_byte_64(addr64_t paddr, unsigned int data) +{ + ml_phys_write_data(low32(paddr), data, 1); +} + +void ml_phys_write_half(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data(paddr, data, 2); +} + +void ml_phys_write_half_64(addr64_t paddr, unsigned int data) +{ + ml_phys_write_data(low32(paddr), data, 2); +} + +void ml_phys_write(vm_offset_t paddr, unsigned int data) { - ovbcopy(from, to, bytes); + ml_phys_write_data(paddr, data, 4); } +void ml_phys_write_64(addr64_t paddr, unsigned int data) +{ + ml_phys_write_data(low32(paddr), data, 4); +} + +void ml_phys_write_word(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data(paddr, data, 4); +} + +void ml_phys_write_word_64(addr64_t paddr, unsigned int data) +{ + ml_phys_write_data(low32(paddr), data, 4); +} + + +void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) +{ + ml_phys_write_long_long(paddr, data); +} + +void ml_phys_write_double_64(addr64_t paddr, unsigned long long data) +{ + ml_phys_write_long_long(low32(paddr), data); +} + + +/* PCI config cycle probing + * + * + * Read the memory location at physical address paddr. + * This is a part of a device probe, so there is a good chance we will + * have a machine check here. So we have to be able to handle that. + * We assume that machine checks are enabled both in MSR and HIDs + */ + +boolean_t +ml_probe_read(vm_offset_t paddr, unsigned int *val) +{ + *val = ml_phys_read(paddr); + return TRUE; +} + +/* + * Read the memory location at physical address paddr. + * This is a part of a device probe, so there is a good chance we will + * have a machine check here. So we have to be able to handle that. + * We assume that machine checks are enabled both in MSR and HIDs + */ +boolean_t +ml_probe_read_64(addr64_t paddr, unsigned int *val) +{ + *val = ml_phys_read_64(paddr); + return TRUE; +} + + int bcmp( - const char *a, - const char *b, - vm_size_t len) + const void *pa, + const void *pb, + size_t len) { + const char *a = (const char *)pa; + const char *b = (const char *)pb; + if (len == 0) return 0; @@ -216,12 +495,17 @@ int bcmp( int memcmp(s1, s2, n) - register char *s1, *s2; - register n; + const void *s1, *s2; + size_t n; { - while (--n >= 0) - if (*s1++ != *s2++) - return (*--s1 - *--s2); + if (n != 0) { + const unsigned char *p1 = s1, *p2 = s2; + + do { + if (*p1++ != *p2++) + return (*--p1 - *--p2); + } while (--n != 0); + } return (0); } @@ -328,8 +612,8 @@ hw_compare_and_store( * levels of return pc information. */ void machine_callstack( - natural_t *buf, - vm_size_t callstack_max) + __unused natural_t *buf, + __unused vm_size_t callstack_max) { } @@ -340,89 +624,224 @@ void machine_callstack( void fillPage(ppnum_t pa, unsigned int fill) { - unsigned int *addr = (unsigned int *)phystokv(i386_ptob(pa)); + pmap_paddr_t src; int i; - int cnt = NBPG/sizeof(unsigned int); - - for (i = 0; i < cnt ; i++ ) + int cnt = PAGE_SIZE/sizeof(unsigned int); + unsigned int *addr; + mp_disable_preemption(); + if (*(pt_entry_t *) CM2) + panic("fillPage: CMAP busy"); + src = (pmap_paddr_t)i386_ptob(pa); + *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + invlpg((u_int)CA2); + + for (i = 0, addr = (unsigned int *)CA2; i < cnt ; i++ ) *addr++ = fill; + + *(pt_entry_t *) CM2 = 0; + mp_enable_preemption(); +} + +static inline void __sfence(void) +{ + __asm__ volatile("sfence"); +} +static inline void __mfence(void) +{ + __asm__ volatile("mfence"); +} +static inline void __wbinvd(void) +{ + __asm__ volatile("wbinvd"); +} +static inline void __clflush(void *ptr) +{ + __asm__ volatile(".byte 0x0F; .byte 0xae; .byte 0x38" : : "a" (ptr)); +} + +void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) +{ + if (cpuid_features() & CPUID_FEATURE_CLFSH) + { + uint32_t linesize = cpuid_info()->cache_linesize; + addr64_t addr; + uint32_t offset, chunk; + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + + if (*(pt_entry_t *) CM2) + panic("cache_flush_page_phys: CMAP busy"); + + offset = pa & (linesize - 1); + count += offset; + addr = pa - offset; + offset = addr & ((addr64_t) (page_size - 1)); + chunk = page_size - offset; + + do + { + if (chunk > count) + chunk = count; + + *(pt_entry_t *) CM2 = i386_ptob(atop_64(addr)) | INTEL_PTE_VALID; + invlpg((u_int)CA2); + + for (; offset < chunk; offset += linesize) + __clflush((void *)(((u_int)CA2) + offset)); + + count -= chunk; + addr += chunk; + chunk = page_size; + offset = 0; + } + while (count); + + *(pt_entry_t *) CM2 = 0; + + (void) ml_set_interrupts_enabled(istate); + } + else + __wbinvd(); + __sfence(); } -#define cppvPHYS (cppvPsnk|cppvPsrc) +void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count) +{ + return(dcache_incoherent_io_store64(pa,count)); +} -kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int which) +void +flush_dcache64(__unused addr64_t addr, + __unused unsigned count, + __unused int phys) { - char *src32, *dst32; +} - if (value_64bit(source) | value_64bit(sink)) panic("copypv: 64 bit value"); +void +invalidate_icache64(__unused addr64_t addr, + __unused unsigned count, + __unused int phys) +{ +} - src32 = (char *)low32(source); - dst32 = (char *)low32(sink); +kern_return_t copypv(addr64_t src64, + addr64_t snk64, + unsigned int size, + int which) +{ + + vm_map_t map; + kern_return_t ret; + vm_offset_t source, sink; + vm_offset_t vaddr; + vm_offset_t paddr; + spl_t s; + unsigned int lop, csize; + int needtran, bothphys; + vm_prot_t prot; + pt_entry_t *ptep; + + map = (which & cppvKmap) ? kernel_map : current_map_fast(); - if (which & cppvFsrc) flush_dcache(source, size, 1); /* If requested, flush source before move */ - if (which & cppvFsnk) flush_dcache(sink, size, 1); /* If requested, flush sink before move */ + source = low32(src64); + sink = low32(snk64); - switch (which & cppvPHYS) { + if((which & (cppvPsrc | cppvPsnk)) == 0 ) { /* Make sure that only one is virtual */ + panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */ + } + + bothphys = 1; /* Assume both are physical */ + + if(!(which & cppvPsnk)) { /* Is there a virtual page here? */ + vaddr = sink; /* Sink side is virtual */ + bothphys = 0; /* Show both aren't physical */ + prot = VM_PROT_READ | VM_PROT_WRITE; /* Sink always must be read/write */ + } else /* if(!(which & cppvPsrc)) */ { /* Source side is virtual */ + vaddr = source; /* Source side is virtual */ + bothphys = 0; /* Show both aren't physical */ + prot = VM_PROT_READ; /* Virtual source is always read only */ + } - case cppvPHYS: - /* - * both destination and source are physical - */ - bcopy_phys(source, sink, (vm_size_t)size); - break; + needtran = 1; /* Show we need to map the virtual the first time */ + s = splhigh(); /* Don't bother me */ + + while(size) { + + if(!bothphys && (needtran || !(vaddr & 4095LL))) { /* If first time or we stepped onto a new page, we need to translate */ + needtran = 0; + while(1) { + ptep = pmap_mapgetpte(map, vaddr); + if((0 == ptep) || ((*ptep & INTEL_PTE_VALID) == 0)) { + splx(s); /* Restore the interrupt level */ + ret = vm_fault(map, vm_map_trunc_page(vaddr), prot, FALSE, THREAD_UNINT, NULL, 0); /* Didn't find it, try to fault it in... */ + + if(ret != KERN_SUCCESS)return KERN_FAILURE; /* Didn't find any, return no good... */ + + s = splhigh(); /* Don't bother me */ + continue; /* Go try for the map again... */ + + } + + /* Note that we have to have the destination writable. So, if we already have it, or we are mapping the source, + we can just leave. + */ + if((which & cppvPsnk) || (*ptep & INTEL_PTE_WRITE)) break; /* We got it mapped R/W or the source is not virtual, leave... */ + splx(s); /* Restore the interrupt level */ + + ret = vm_fault(map, vm_map_trunc_page(vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); /* check for a COW area */ + if (ret != KERN_SUCCESS) return KERN_FAILURE; /* We couldn't get it R/W, leave in disgrace... */ + s = splhigh(); /* Don't bother me */ + } + + paddr = pte_to_pa(*ptep) | (vaddr & 4095); + + if(which & cppvPsrc) sink = paddr; /* If source is physical, then the sink is virtual */ + else source = paddr; /* Otherwise the source is */ + } + + lop = (unsigned int)(4096LL - (sink & 4095LL)); /* Assume sink smallest */ + if(lop > (unsigned int)(4096LL - (source & 4095LL))) lop = (unsigned int)(4096LL - (source & 4095LL)); /* No, source is smaller */ + + csize = size; /* Assume we can copy it all */ + if(lop < size) csize = lop; /* Nope, we can't do it all */ + + if(which & cppvFsrc) flush_dcache64((addr64_t)source, csize, 1); /* If requested, flush source before move */ + if(which & cppvFsnk) flush_dcache64((addr64_t)sink, csize, 1); /* If requested, flush sink before move */ + + bcopy_phys((addr64_t)source, (addr64_t)sink, csize); /* Do a physical copy, virtually */ + + if(which & cppvFsrc) flush_dcache64((addr64_t)source, csize, 1); /* If requested, flush source after move */ + if(which & cppvFsnk) flush_dcache64((addr64_t)sink, csize, 1); /* If requested, flush sink after move */ - case cppvPsnk: - /* - * destination is physical, source is virtual - */ - if (which & cppvKmap) - /* - * source is kernel virtual - */ - bcopy(src32, (char *)phystokv(dst32), size); - else - /* - * source is user virtual - */ - copyin(src32, (char *)phystokv(dst32), size); - break; - - case cppvPsrc: - /* - * source is physical, destination is virtual - */ - if (which & cppvKmap) - /* - * destination is kernel virtual - */ - bcopy((char *)phystokv(src32), dst32, size); - else - /* - * destination is user virtual - */ - copyout((char *)phystokv(src32), dst32, size); - break; - - default: - panic("copypv: both virtual"); - } - if (which & cppvFsrc) flush_dcache(source, size, 1); /* If requested, flush source before move */ - if (which & cppvFsnk) flush_dcache(sink, size, 1); /* If requested, flush sink before move */ +/* + * Note that for certain ram disk flavors, we may be copying outside of known memory. + * Therefore, before we try to mark it modifed, we check if it exists. + */ - return KERN_SUCCESS; -} + if( !(which & cppvNoModSnk)) { + if (phys_page_exists((ppnum_t)sink >> 12)) + mapping_set_mod((ppnum_t)(sink >> 12)); /* Make sure we know that it is modified */ + } + if( !(which & cppvNoRefSrc)) { + if (phys_page_exists((ppnum_t)source >> 12)) + mapping_set_ref((ppnum_t)(source >> 12)); /* Make sure we know that it is modified */ + } -void flush_dcache64(addr64_t addr, unsigned count, int phys) -{ -} + size = size - csize; /* Calculate what is left */ + vaddr = vaddr + csize; /* Move to next sink address */ + source = source + csize; /* Bump source to next physical address */ + sink = sink + csize; /* Bump sink to next physical address */ + } + + splx(s); /* Open up for interrupts */ -void invalidate_icache64(addr64_t addr, unsigned cnt, int phys) -{ + return KERN_SUCCESS; } - void switch_to_serial_console(void) { } @@ -435,10 +854,46 @@ mapping_set_mod(ppnum_t pn) pmap_set_modify(pn); } -boolean_t -mutex_preblock( - mutex_t *mutex, - thread_t thread) +void +mapping_set_ref(ppnum_t pn) +{ + pmap_set_reference(pn); +} + +void +cache_flush_page_phys(ppnum_t pa) { - return (FALSE); + boolean_t istate; + int i; + unsigned int *cacheline_addr; + int cacheline_size = cpuid_info()->cache_linesize; + int cachelines_in_page = PAGE_SIZE/cacheline_size; + + /* + * If there's no clflush instruction, we're sadly forced to use wbinvd. + */ + if (!(cpuid_features() & CPUID_FEATURE_CLFSH)) { + asm volatile("wbinvd" : : : "memory"); + return; + } + + istate = ml_set_interrupts_enabled(FALSE); + + if (*(pt_entry_t *) CM2) + panic("cache_flush_page_phys: CMAP busy"); + + *(pt_entry_t *) CM2 = i386_ptob(pa) | INTEL_PTE_VALID; + invlpg((u_int)CA2); + + for (i = 0, cacheline_addr = (unsigned int *)CA2; + i < cachelines_in_page; + i++, cacheline_addr += cacheline_size) { + asm volatile("clflush %0" : : "m" (cacheline_addr)); + } + + *(pt_entry_t *) CM2 = 0; + + (void) ml_set_interrupts_enabled(istate); + } + diff --git a/osfmk/i386/mach_param.h b/osfmk/i386/mach_param.h deleted file mode 100644 index 7b2a7e7ce..000000000 --- a/osfmk/i386/mach_param.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Machine-dependent parameters for i386. - */ - -#define HZ (100) - /* clock tick each 10 ms. */ diff --git a/osfmk/i386/machdep_call.c b/osfmk/i386/machdep_call.c index ddf379d3f..4e44c0559 100644 --- a/osfmk/i386/machdep_call.c +++ b/osfmk/i386/machdep_call.c @@ -34,60 +34,27 @@ #include -extern kern_return_t kern_invalid(); -extern kern_return_t thread_get_cthread_self(); -extern kern_return_t thread_set_cthread_self(); -extern kern_return_t thread_fast_set_cthread_self(); +extern kern_return_t kern_invalid(void); +#ifdef FIXME extern kern_return_t PCcreate(), PCldt(), PCresume(); extern kern_return_t PCcopyBIOSData(), PCmapBIOSRom(); extern kern_return_t PCsizeBIOSExtData(), PCcopyBIOSExtData(); +#endif machdep_call_t machdep_call_table[] = { - { - thread_get_cthread_self, - 0 - }, - { - thread_set_cthread_self, - 1 - }, - { - kern_invalid, /* old th_create() */ - 0 - }, - { - thread_fast_set_cthread_self, - 1 - }, + MACHDEP_CALL_ROUTINE(thread_get_cthread_self,0), + MACHDEP_CALL_ROUTINE(thread_set_cthread_self,1), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(thread_fast_set_cthread_self,1), + MACHDEP_CALL_ROUTINE(thread_set_user_ldt,3), #ifdef FIXME - { - PCcreate, - 3 - }, - { - PCldt, - 3 - }, - { - PCresume, - 0 - }, - { - PCcopyBIOSData, - 1 - }, - { - PCsizeBIOSExtData, - 0 - }, - { - PCcopyBIOSExtData, - 1 - }, - { - PCmapBIOSRom, - 3 - }, + MACHDEP_CALL_ROUTINE(PCcreate,3), + MACHDEP_CALL_ROUTINE(PCldt,3), + MACHDEP_CALL_ROUTINE(PCresume,0), + MACHDEP_CALL_ROUTINE(PCcopyBIOSData,1), + MACHDEP_CALL_ROUTINE(PCsizeBIOSExtData,0), + MACHDEP_CALL_ROUTINE(PCcopyBIOSExtData,1), + MACHDEP_CALL_ROUTINE(PCmapBIOSRom,3), #endif }; diff --git a/osfmk/i386/machdep_call.h b/osfmk/i386/machdep_call.h index beeb51304..6476900ea 100644 --- a/osfmk/i386/machdep_call.h +++ b/osfmk/i386/machdep_call.h @@ -30,7 +30,17 @@ * Created. */ -typedef kern_return_t (*machdep_call_routine_t)(); +typedef union { + kern_return_t (*args_0)(void); + kern_return_t (*args_1)(uint32_t); + kern_return_t (*args_2)(uint32_t,uint32_t); + kern_return_t (*args_3)(uint32_t,uint32_t,uint32_t); + kern_return_t (*args_4)(uint32_t, uint32_t,uint32_t,uint32_t); + kern_return_t (*args_var)(uint32_t,...); +} machdep_call_routine_t; + +#define MACHDEP_CALL_ROUTINE(func,args) \ + { { .args_ ## args = func }, args } typedef struct { machdep_call_routine_t routine; @@ -39,3 +49,11 @@ typedef struct { extern machdep_call_t machdep_call_table[]; extern int machdep_call_count; + +extern kern_return_t thread_get_cthread_self(void); +extern kern_return_t thread_set_cthread_self(uint32_t); +extern kern_return_t thread_fast_set_cthread_self(uint32_t); +extern kern_return_t thread_set_user_ldt(uint32_t,uint32_t,uint32_t); + +extern void mach25_syscall(struct i386_saved_state *); +extern void machdep_syscall(struct i386_saved_state *); diff --git a/osfmk/i386/machine_cpu.h b/osfmk/i386/machine_cpu.h index 291719b36..85950f68d 100644 --- a/osfmk/i386/machine_cpu.h +++ b/osfmk/i386/machine_cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,28 +26,28 @@ #include #include #include +#include +__BEGIN_DECLS void cpu_machine_init( void); -kern_return_t cpu_register( - int *); - -kern_return_t cpu_start( - int); - -void cpu_doshutdown( - void); - -void cpu_sleep( - void); - struct i386_interrupt_state; void cpu_signal_handler( struct i386_interrupt_state *regs); +kern_return_t cpu_register( + int *slot_nump); +__END_DECLS + +static inline void cpu_halt(void) +{ + asm volatile( "cli; hlt" ); +} + static inline void cpu_pause(void) { asm volatile( "rep; nop" ); } + #endif /* _I386_MACHINE_CPU_H_ */ diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index 6bd198fe7..fa5002a29 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -24,11 +24,23 @@ #include #include #include +#include #include -#include +#include +#include +#include #include #include #include +#include +#include +#include +#include + +#define MIN(a,b) ((a)<(b)? (a) : (b)) + +extern void initialize_screen(Boot_Video *, unsigned int); +extern void wakeup(void *); static int max_cpus_initialized = 0; @@ -47,7 +59,7 @@ vm_offset_t ml_io_map( /* boot memory allocation */ vm_offset_t ml_static_malloc( - vm_size_t size) + __unused vm_size_t size) { return((vm_offset_t)NULL); } @@ -56,15 +68,36 @@ vm_offset_t ml_static_ptovirt( vm_offset_t paddr) { - return phystokv(paddr); + return (vm_offset_t)((unsigned) paddr | LINEAR_KERNEL_ADDRESS); } + +/* + * Routine: ml_static_mfree + * Function: + */ void ml_static_mfree( - vm_offset_t vaddr, - vm_size_t size) + vm_offset_t vaddr, + vm_size_t size) { - return; + vm_offset_t vaddr_cur; + ppnum_t ppn; + + if (vaddr < VM_MIN_KERNEL_ADDRESS) return; + + assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */ + + for (vaddr_cur = vaddr; + vaddr_cur < round_page_32(vaddr+size); + vaddr_cur += PAGE_SIZE) { + ppn = pmap_find_phys(kernel_pmap, (addr64_t)vaddr_cur); + if (ppn != (vm_offset_t)NULL) { + pmap_remove(kernel_pmap, (addr64_t)vaddr_cur, (addr64_t)(vaddr_cur+PAGE_SIZE)); + vm_page_create(ppn,(ppn+1)); + vm_page_wire_count--; + } + } } /* virtual to physical on wired pages */ @@ -158,55 +191,102 @@ void ml_install_interrupt_handler( initialize_screen(0, kPEAcquireScreen); } +static void +cpu_idle(void) +{ + __asm__ volatile("sti; hlt": : :"memory"); +} +void (*cpu_idle_handler)(void) = cpu_idle; + void machine_idle(void) { - DBGLOG(cpu_handle, cpu_number(), MP_IDLE); - __asm__ volatile("sti; hlt": : :"memory"); - __asm__ volatile("cli"); - DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); + cpu_core_t *my_core = cpu_core(); + int others_active; + + /* + * We halt this cpu thread + * unless kernel param idlehalt is false and no other thread + * in the same core is active - if so, don't halt so that this + * core doesn't go into a low-power mode. + */ + others_active = !atomic_decl_and_test( + (long *) &my_core->active_threads, 1); + if (idlehalt || others_active) { + DBGLOG(cpu_handle, cpu_number(), MP_IDLE); + cpu_idle_handler(); + DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); + } else { + __asm__ volatile("sti"); + } + atomic_incl((long *) &my_core->active_threads, 1); } void machine_signal_idle( processor_t processor) { - cpu_interrupt(processor->slot_num); + cpu_interrupt(PROCESSOR_DATA(processor, slot_num)); } kern_return_t ml_processor_register( cpu_id_t cpu_id, uint32_t lapic_id, - processor_t *processor, + processor_t *processor_out, ipi_handler_t *ipi_handler, boolean_t boot_cpu) { - kern_return_t ret; int target_cpu; + cpu_data_t *this_cpu_datap; - if (cpu_register(&target_cpu) != KERN_SUCCESS) + this_cpu_datap = cpu_data_alloc(boot_cpu); + if (this_cpu_datap == NULL) { return KERN_FAILURE; - + } + target_cpu = this_cpu_datap->cpu_number; assert((boot_cpu && (target_cpu == 0)) || (!boot_cpu && (target_cpu != 0))); lapic_cpu_map(lapic_id, target_cpu); - cpu_data[target_cpu].cpu_id = cpu_id; - cpu_data[target_cpu].cpu_phys_number = lapic_id; - *processor = cpu_to_processor(target_cpu); + + this_cpu_datap->cpu_id = cpu_id; + this_cpu_datap->cpu_phys_number = lapic_id; + + this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu); + if (this_cpu_datap->cpu_console_buf == NULL) + goto failed; + + if (!boot_cpu) { + this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu); + if (this_cpu_datap->cpu_pmap == NULL) + goto failed; + + this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu); + if (this_cpu_datap->cpu_processor == NULL) + goto failed; + processor_init(this_cpu_datap->cpu_processor, target_cpu); + } + + *processor_out = this_cpu_datap->cpu_processor; *ipi_handler = NULL; return KERN_SUCCESS; + +failed: + cpu_processor_free(this_cpu_datap->cpu_processor); + pmap_cpu_free(this_cpu_datap->cpu_pmap); + console_cpu_free(this_cpu_datap->cpu_console_buf); + return KERN_FAILURE; } void -ml_cpu_get_info(ml_cpu_info_t *cpu_info) +ml_cpu_get_info(ml_cpu_info_t *cpu_infop) { boolean_t os_supports_sse; i386_cpu_info_t *cpuid_infop; - if (cpu_info == NULL) + if (cpu_infop == NULL) return; /* @@ -215,27 +295,36 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_info) */ os_supports_sse = get_cr4() & CR4_XMM; if ((cpuid_features() & CPUID_FEATURE_SSE2) && os_supports_sse) - cpu_info->vector_unit = 4; + cpu_infop->vector_unit = 4; else if ((cpuid_features() & CPUID_FEATURE_SSE) && os_supports_sse) - cpu_info->vector_unit = 3; + cpu_infop->vector_unit = 3; else if (cpuid_features() & CPUID_FEATURE_MMX) - cpu_info->vector_unit = 2; + cpu_infop->vector_unit = 2; else - cpu_info->vector_unit = 0; + cpu_infop->vector_unit = 0; cpuid_infop = cpuid_info(); - cpu_info->cache_line_size = cpuid_infop->cache_linesize; + cpu_infop->cache_line_size = cpuid_infop->cache_linesize; - cpu_info->l1_icache_size = cpuid_infop->cache_size[L1I]; - cpu_info->l1_dcache_size = cpuid_infop->cache_size[L1D]; + cpu_infop->l1_icache_size = cpuid_infop->cache_size[L1I]; + cpu_infop->l1_dcache_size = cpuid_infop->cache_size[L1D]; - cpu_info->l2_settings = 1; - cpu_info->l2_cache_size = cpuid_infop->cache_size[L2U]; + if (cpuid_infop->cache_size[L2U] > 0) { + cpu_infop->l2_settings = 1; + cpu_infop->l2_cache_size = cpuid_infop->cache_size[L2U]; + } else { + cpu_infop->l2_settings = 0; + cpu_infop->l2_cache_size = 0xFFFFFFFF; + } - /* XXX No L3 */ - cpu_info->l3_settings = 0; - cpu_info->l3_cache_size = 0xFFFFFFFF; + if (cpuid_infop->cache_size[L3U] > 0) { + cpu_infop->l2_settings = 1; + cpu_infop->l2_cache_size = cpuid_infop->cache_size[L3U]; + } else { + cpu_infop->l3_settings = 0; + cpu_infop->l3_cache_size = 0xFFFFFFFF; + } } void @@ -245,8 +334,15 @@ ml_init_max_cpus(unsigned long max_cpus) current_state = ml_set_interrupts_enabled(FALSE); if (max_cpus_initialized != MAX_CPUS_SET) { - if (max_cpus > 0 && max_cpus < NCPUS) - machine_info.max_cpus = max_cpus; + if (max_cpus > 0 && max_cpus <= MAX_CPUS) { + /* + * Note: max_cpus is the number of enable processors + * that ACPI found; max_ncpus is the maximum number + * that the kernel supports or that the "cpus=" + * boot-arg has set. Here we take int minimum. + */ + machine_info.max_cpus = MIN(max_cpus, max_ncpus); + } if (max_cpus_initialized == MAX_CPUS_WAIT) wakeup((event_t)&max_cpus_initialized); max_cpus_initialized = MAX_CPUS_SET; @@ -269,39 +365,64 @@ ml_get_max_cpus(void) return(machine_info.max_cpus); } +/* + * This is called from the machine-independent routine cpu_up() + * to perform machine-dependent info updates. Defer to cpu_thread_init(). + */ +void +ml_cpu_up(void) +{ + return; +} + +/* + * This is called from the machine-independent routine cpu_down() + * to perform machine-dependent info updates. + */ +void +ml_cpu_down(void) +{ + return; +} + /* Stubs for pc tracing mechanism */ int *pc_trace_buf; int pc_trace_cnt = 0; int -set_be_bit() +set_be_bit(void) { return(0); } int -clr_be_bit() +clr_be_bit(void) { return(0); } int -be_tracing() +be_tracing(void) { return(0); } -#undef current_act -thread_act_t +/* + * The following are required for parts of the kernel + * that cannot resolve these functions as inlines: + */ +extern thread_t current_act(void); +thread_t current_act(void) -{ - return(current_act_fast()); -} +{ + return(current_thread_fast()); +} #undef current_thread +extern thread_t current_thread(void); thread_t current_thread(void) { - return(current_act_fast()); + return(current_thread_fast()); } diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index f6bcf37c4..aac0dd848 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,8 +31,11 @@ #include #include +#include #include +__BEGIN_DECLS + /* Interrupt handling */ /* Initialize Interrupts */ @@ -70,6 +73,7 @@ struct ml_processor_info { typedef struct ml_processor_info ml_processor_info_t; + /* Register a processor */ kern_return_t ml_processor_register( cpu_id_t cpu_id, @@ -178,7 +182,7 @@ struct ml_cpu_info { typedef struct ml_cpu_info ml_cpu_info_t; /* Get processor info */ -void ml_cpu_get_info(ml_cpu_info_t *cpu_info); +void ml_cpu_get_info(ml_cpu_info_t *ml_cpu_info); #endif /* __APPLE_API_UNSTABLE */ @@ -202,14 +206,6 @@ void bzero_phys( addr64_t phys_address, uint32_t length); -#ifdef MACH_KERNEL_PRIVATE - -void machine_idle(void); - -void machine_signal_idle( - processor_t processor); -#endif /* MACH_KERNEL_PRIVATE */ - void ml_thread_policy( thread_t thread, unsigned policy_id, @@ -228,6 +224,15 @@ void ml_init_max_cpus( int ml_get_max_cpus( void); +extern void ml_cpu_up(void); +extern void ml_cpu_down(void); + +extern int set_be_bit(void); +extern int clr_be_bit(void); +extern int be_tracing(void); + #endif /* __APPLE_API_PRIVATE */ +__END_DECLS + #endif /* _I386_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s index eba45c21c..f18d06c9f 100644 --- a/osfmk/i386/machine_routines_asm.s +++ b/osfmk/i386/machine_routines_asm.s @@ -40,253 +40,3 @@ ENTRY(ml_get_timebase) ret - -/* PCI config cycle probing - * - * boolean_t ml_probe_read(vm_offset_t paddr, unsigned int *val) - * - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ -ENTRY(ml_probe_read) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl 0(%ecx), %ecx - movl %ecx, 0(%eax) - movl $1, %eax - - ret - - -/* PCI config cycle probing - 64-bit - * - * boolean_t ml_probe_read_64(addr64_t paddr, unsigned int *val) - * - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ -ENTRY(ml_probe_read_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl 0(%ecx), %ecx - movl %ecx, 0(%eax) - movl $1, %eax - - ret - - -/* Read physical address byte - * - * unsigned int ml_phys_read_byte(vm_offset_t paddr) - * unsigned int ml_phys_read_byte_64(addr64_t paddr) - * - * Read the byte at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_byte_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - xor %eax, %eax - movb 0(%ecx), %eax - - ret - -ENTRY(ml_phys_read_byte) - - movl S_ARG0, %ecx - xor %eax, %eax - movb 0(%ecx), %eax - - ret - - -/* Read physical address half word - * - * unsigned int ml_phys_read_half(vm_offset_t paddr) - * unsigned int ml_phys_read_half_64(addr64_t paddr) - * - * Read the half word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_half_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - xor %eax, %eax - movw 0(%ecx), %eax - - ret - -ENTRY(ml_phys_read_half) - - movl S_ARG0, %ecx - xor %eax, %eax - movw 0(%ecx), %eax - - ret - - -/* Read physical address word - * - * unsigned int ml_phys_read(vm_offset_t paddr) - * unsigned int ml_phys_read_64(addr64_t paddr) - * unsigned int ml_phys_read_word(vm_offset_t paddr) - * unsigned int ml_phys_read_word_64(addr64_t paddr) - * - * Read the word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_64) -ENTRY(ml_phys_read_word_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl 0(%ecx), %eax - - ret - -ENTRY(ml_phys_read) -ENTRY(ml_phys_read_word) - - movl S_ARG0, %ecx - movl 0(%ecx), %eax - - ret - - -/* Read physical address double - * - * unsigned long long ml_phys_read_double(vm_offset_t paddr) - * unsigned long long ml_phys_read_double_64(addr64_t paddr) - * - * Read the double word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_read_double_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl 0(%ecx), %eax - movl 4(%ecx), %edx - - ret - -ENTRY(ml_phys_read_double) - - movl S_ARG0, %ecx - movl 0(%ecx), %eax - movl 4(%ecx), %edx - - ret - - -/* Write physical address byte - * - * void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_byte_64(addr64_t paddr, unsigned int data) - * - * Write the byte at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_byte_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movb %eax, 0(%ecx) - - ret - -ENTRY(ml_phys_write_byte) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movb %eax, 0(%ecx) - - ret - - -/* Write physical address half word - * - * void ml_phys_write_half(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_half_64(addr64_t paddr, unsigned int data) - * - * Write the byte at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_half_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movw %eax, 0(%ecx) - - ret - -ENTRY(ml_phys_write_half) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movw %eax, 0(%ecx) - - ret - - -/* Write physical address word - * - * void ml_phys_write(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_64(addr64_t paddr, unsigned int data) - * void ml_phys_write_word(vm_offset_t paddr, unsigned int data) - * void ml_phys_write_word_64(addr64_t paddr, unsigned int data) - * - * Write the word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_64) -ENTRY(ml_phys_write_word_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl %eax, 0(%ecx) - - ret - -ENTRY(ml_phys_write) -ENTRY(ml_phys_write_word) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl %eax, 0(%ecx) - - ret - - -/* Write physical address double word - * - * void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) - * void ml_phys_write_double_64(addr64_t paddr, unsigned long long data) - * - * Write the double word at physical address paddr. Memory should not be cache inhibited. - */ -ENTRY(ml_phys_write_double_64) - - /* Only use lower 32 bits of address for now */ - movl S_ARG0, %ecx - movl S_ARG2, %eax - movl %eax, 0(%ecx) - movl S_ARG3, %eax - movl %eax, 4(%ecx) - - ret - -ENTRY(ml_phys_write_double) - - movl S_ARG0, %ecx - movl S_ARG1, %eax - movl %eax, 0(%ecx) - movl S_ARG2, %eax - movl %eax, 4(%ecx) - - ret diff --git a/osfmk/i386/machparam.h b/osfmk/i386/machparam.h index a692dc68f..908137f32 100644 --- a/osfmk/i386/machparam.h +++ b/osfmk/i386/machparam.h @@ -56,11 +56,3 @@ * SPLs are true functions on i386, defined elsewhere. */ -/* - * XXX Temporary workaround to null out the call to compute_my_priority() - * from thread_quantum_expire() -- which for x86 may occur on the wrong cpu - * and this can lead to run queue corruption. - * Making this slimey re-definition here avoids the need for ifdefs in - * machine-independent code. - */ -#define compute_my_priority(x) diff --git a/osfmk/i386/mcount.s b/osfmk/i386/mcount.s index 763ffc7d8..12f1ddb68 100644 --- a/osfmk/i386/mcount.s +++ b/osfmk/i386/mcount.s @@ -27,23 +27,15 @@ Entry(mcount) pushl %ebp // setup mcount's frame movl %esp,%ebp + pushl %eax // save %eax pushf // save interrupt state cli // disable interrupts - // - // Check that %gs, with segment pointing at the per-cpu data area, - // has been set up. C routines (mp_desc_init() in particular) may - // be called very early before this happens. - // - mov %gs,%ax - test %ax,%ax - jz 1f - // // Check that this cpu is ready. // This delays the start of mcounting until a cpu is really prepared. // - movl %gs:CPD_CPU_STATUS,%eax + movl %gs:CPU_RUNNING,%eax testl %eax,%eax jz 1f @@ -51,10 +43,11 @@ Entry(mcount) // Test for recursion as indicated by a per-cpu flag. // Skip if nested, otherwise set the flag and call the C mount(). // - movl %gs:CPD_MCOUNT_OFF,%eax + movl %gs:CPU_MCOUNT_OFF,%eax testl %eax,%eax // test for recursion jnz 1f - incl %gs:CPD_MCOUNT_OFF // set recursion flag + + incl %gs:CPU_MCOUNT_OFF // set recursion flag movl (%ebp),%eax // frame pointer of mcount's caller movl 4(%eax),%eax // mcount's caller's return address @@ -63,9 +56,10 @@ Entry(mcount) call _mcount // call the C mcount addl $8,%esp // pop args - decl %gs:CPD_MCOUNT_OFF // turn off recursion flag + decl %gs:CPU_MCOUNT_OFF // turn off recursion flag 1: popf // restore interrupt state + popl %eax movl %ebp,%esp // tear down mcount's frame popl %ebp ret diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 4921093ba..dfa24f8a2 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -25,16 +25,21 @@ #include +extern void i386_preinit(void); +extern void i386_init(void); +extern void i386_vm_init(unsigned int, struct KernelBootArgs *); + +extern void machine_startup(void); + extern void get_root_device(void); extern void picinit(void); -extern void slave_clock(void); extern void interrupt_processor( int cpu); extern void mp_probe_cpus(void); extern void remote_kdb(void); extern void clear_kdb_intr(void); extern void draw_panic_dialog(void); -extern void set_cpu_model(void); +extern void cpu_init(void); extern void cpu_shutdown(void); extern void fix_desc( void * desc, @@ -55,8 +60,6 @@ extern void blkclr( extern void kdb_kintr(void); extern void kdb_console(void); -extern unsigned long ntohl(unsigned long); - extern unsigned int div_scale( unsigned int dividend, unsigned int divisor, @@ -70,3 +73,30 @@ extern unsigned int mul_scale( /* Move arbitrarily-aligned data from one physical address to another */ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes); +/* Flush all cachelines for a page. */ +extern void cache_flush_page_phys(ppnum_t pa); + +/* Flushing for incoherent I/O */ +extern void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count); +extern void dcache_incoherent_io_store64(addr64_t pa, unsigned int count); + + +extern processor_t cpu_processor_alloc(boolean_t is_boot_cpu); +extern void cpu_processor_free(processor_t proc); + +extern void sysclk_gettime_interrupts_disabled( + mach_timespec_t *cur_time); + + +extern void rtclock_intr(struct i386_interrupt_state *regs); + +extern void rtc_sleep_wakeup(void); + +extern void rtc_clock_stepping( + uint32_t new_frequency, + uint32_t old_frequency); +extern void rtc_clock_stepped( + uint32_t new_frequency, + uint32_t old_frequency); + +extern void x86_lowmem_free(void); diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 6da284edf..1edc74a25 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,11 +23,27 @@ * @OSF_COPYRIGHT@ */ -#include #include #include #include #include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include #include #include @@ -40,14 +56,11 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #if MP_DEBUG #define PAUSE delay(1000000) @@ -57,12 +70,38 @@ #define PAUSE #endif /* MP_DEBUG */ +/* + * By default, use high vectors to leave vector space for systems + * with multiple I/O APIC's. However some systems that boot with + * local APIC disabled will hang in SMM when vectors greater than + * 0x5F are used. Those systems are not expected to have I/O APIC + * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect. + */ +#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0 +#define LAPIC_REDUCED_INTERRUPT_BASE 0x50 +/* + * Specific lapic interrupts are relative to this base: + */ +#define LAPIC_PERFCNT_INTERRUPT 0xB +#define LAPIC_TIMER_INTERRUPT 0xC +#define LAPIC_SPURIOUS_INTERRUPT 0xD +#define LAPIC_INTERPROCESSOR_INTERRUPT 0xE +#define LAPIC_ERROR_INTERRUPT 0xF + /* Initialize lapic_id so cpu_number() works on non SMP systems */ unsigned long lapic_id_initdata = 0; unsigned long lapic_id = (unsigned long)&lapic_id_initdata; -vm_offset_t lapic_start; +vm_offset_t lapic_start; + +static i386_intr_func_t lapic_timer_func; +static i386_intr_func_t lapic_pmi_func; + +/* TRUE if local APIC was enabled by the OS not by the BIOS */ +static boolean_t lapic_os_enabled = FALSE; + +/* Base vector for local APIC interrupt sources */ +int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE; -void lapic_init(void); void slave_boot_init(void); static void mp_kdp_wait(void); @@ -71,7 +110,8 @@ static void mp_rendezvous_action(void); boolean_t smp_initialized = FALSE; decl_simple_lock_data(,mp_kdp_lock); -decl_simple_lock_data(,mp_putc_lock); + +decl_mutex_data(static, mp_cpu_boot_lock); /* Variables needed for MP rendezvous. */ static void (*mp_rv_setup_func)(void *arg); @@ -79,28 +119,28 @@ static void (*mp_rv_action_func)(void *arg); static void (*mp_rv_teardown_func)(void *arg); static void *mp_rv_func_arg; static int mp_rv_ncpus; -static volatile long mp_rv_waiters[2]; +static long mp_rv_waiters[2]; decl_simple_lock_data(,mp_rv_lock); -int lapic_to_cpu[LAPIC_ID_MAX+1]; -int cpu_to_lapic[NCPUS]; +int lapic_to_cpu[MAX_CPUS]; +int cpu_to_lapic[MAX_CPUS]; static void lapic_cpu_map_init(void) { int i; - for (i = 0; i < NCPUS; i++) - cpu_to_lapic[i] = -1; - for (i = 0; i <= LAPIC_ID_MAX; i++) + for (i = 0; i < MAX_CPUS; i++) { lapic_to_cpu[i] = -1; + cpu_to_lapic[i] = -1; + } } void -lapic_cpu_map(int apic_id, int cpu_number) +lapic_cpu_map(int apic_id, int cpu) { - cpu_to_lapic[cpu_number] = apic_id; - lapic_to_cpu[apic_id] = cpu_number; + cpu_to_lapic[cpu] = apic_id; + lapic_to_cpu[apic_id] = cpu; } #ifdef MP_DEBUG @@ -109,19 +149,24 @@ lapic_cpu_map_dump(void) { int i; - for (i = 0; i < NCPUS; i++) { + for (i = 0; i < MAX_CPUS; i++) { if (cpu_to_lapic[i] == -1) continue; kprintf("cpu_to_lapic[%d]: %d\n", i, cpu_to_lapic[i]); } - for (i = 0; i <= LAPIC_ID_MAX; i++) { + for (i = 0; i < MAX_CPUS; i++) { if (lapic_to_cpu[i] == -1) continue; kprintf("lapic_to_cpu[%d]: %d\n", i, lapic_to_cpu[i]); } } +#define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump() +#define LAPIC_DUMP() lapic_dump() +#else +#define LAPIC_CPU_MAP_DUMP() +#define LAPIC_DUMP() #endif /* MP_DEBUG */ #define LAPIC_REG(reg) \ @@ -129,10 +174,36 @@ lapic_cpu_map_dump(void) #define LAPIC_REG_OFFSET(reg,off) \ (*((volatile int *)(lapic_start + LAPIC_##reg + (off)))) +#define LAPIC_VECTOR(src) \ + (lapic_interrupt_base + LAPIC_##src##_INTERRUPT) + +#define LAPIC_ISR_IS_SET(base,src) \ + (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \ + (1 <<((base + LAPIC_##src##_INTERRUPT)%32))) + +#if GPROF +/* + * Initialize dummy structs for profiling. These aren't used but + * allows hertz_tick() to be built with GPROF defined. + */ +struct profile_vars _profile_vars; +struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; +#define GPROF_INIT() \ +{ \ + int i; \ + \ + /* Hack to initialize pointers to unused profiling structs */ \ + for (i = 1; i < MAX_CPUS; i++) \ + _profile_vars_cpus[i] = &_profile_vars; \ +} +#else +#define GPROF_INIT() +#endif /* GPROF */ + +extern void master_up(void); void smp_init(void) - { int result; vm_map_entry_t entry; @@ -140,49 +211,62 @@ smp_init(void) uint32_t hi; boolean_t is_boot_processor; boolean_t is_lapic_enabled; + vm_offset_t lapic_base; + + simple_lock_init(&mp_kdp_lock, 0); + simple_lock_init(&mp_rv_lock, 0); + mutex_init(&mp_cpu_boot_lock, 0); + console_init(); /* Local APIC? */ - if ((cpuid_features() & CPUID_FEATURE_APIC) == 0) + if (!lapic_probe()) return; - simple_lock_init(&mp_kdp_lock, ETAP_MISC_PRINTF); - simple_lock_init(&mp_rv_lock, ETAP_MISC_PRINTF); - simple_lock_init(&mp_putc_lock, ETAP_MISC_PRINTF); - /* Examine the local APIC state */ rdmsr(MSR_IA32_APIC_BASE, lo, hi); is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0; is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0; - DBG("MSR_IA32_APIC_BASE 0x%x:0x%x %s %s\n", hi, lo, + lapic_base = (lo & MSR_IA32_APIC_BASE_BASE); + kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base, is_lapic_enabled ? "enabled" : "disabled", is_boot_processor ? "BSP" : "AP"); - assert(is_boot_processor); - assert(is_lapic_enabled); + if (!is_boot_processor || !is_lapic_enabled) + panic("Unexpected local APIC state\n"); /* Establish a map to the local apic */ lapic_start = vm_map_min(kernel_map); result = vm_map_find_space(kernel_map, &lapic_start, round_page(LAPIC_SIZE), 0, &entry); if (result != KERN_SUCCESS) { - printf("smp_init: vm_map_find_entry FAILED (err=%d). " - "Only supporting ONE cpu.\n", result); - return; + panic("smp_init: vm_map_find_entry FAILED (err=%d)", result); } vm_map_unlock(kernel_map); pmap_enter(pmap_kernel(), lapic_start, - (ppnum_t) i386_btop(i386_trunc_page(LAPIC_START)), + (ppnum_t) i386_btop(lapic_base), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); lapic_id = (unsigned long)(lapic_start + LAPIC_ID); + if ((LAPIC_REG(VERSION)&LAPIC_VERSION_MASK) != 0x14) { + printf("Local APIC version not 0x14 as expected\n"); + } + /* Set up the lapic_id <-> cpu_number map and add this boot processor */ lapic_cpu_map_init(); lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0); lapic_init(); + cpu_thread_init(); + + if (pmc_init() != KERN_SUCCESS) + printf("Performance counters not available\n"); + + GPROF_INIT(); + DBGLOG_CPU_INIT(master_cpu); + slave_boot_init(); master_up(); @@ -192,7 +276,7 @@ smp_init(void) } -int +static int lapic_esr_read(void) { /* write-read register */ @@ -200,14 +284,14 @@ lapic_esr_read(void) return LAPIC_REG(ERROR_STATUS); } -void +static void lapic_esr_clear(void) { LAPIC_REG(ERROR_STATUS) = 0; LAPIC_REG(ERROR_STATUS) = 0; } -static char *DM[8] = { +static const char *DM[8] = { "Fixed", "Lowest Priority", "Invalid", @@ -221,7 +305,6 @@ void lapic_dump(void) { int i; - char buf[128]; #define BOOL(a) ((a)?' ':'!') @@ -245,11 +328,12 @@ lapic_dump(void) (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", BOOL(LAPIC_REG(LVT_TIMER)&LAPIC_LVT_MASKED), (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot"); - kprintf("LVT_PERFCNT: Vector 0x%02x [%s][%s][%s] %s %cmasked\n", + kprintf(" Initial Count: 0x%08x \n", LAPIC_REG(TIMER_INITIAL_COUNT)); + kprintf(" Current Count: 0x%08x \n", LAPIC_REG(TIMER_CURRENT_COUNT)); + kprintf(" Divide Config: 0x%08x \n", LAPIC_REG(TIMER_DIVIDE_CONFIG)); + kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n", LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_VECTOR_MASK, DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK], - (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ", - (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High", (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle", BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED)); kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n", @@ -289,13 +373,88 @@ lapic_dump(void) kprintf("\n"); } +boolean_t +lapic_probe(void) +{ + uint32_t lo; + uint32_t hi; + + if (cpuid_features() & CPUID_FEATURE_APIC) + return TRUE; + + if (cpuid_family() == 6 || cpuid_family() == 15) { + /* + * Mobile Pentiums: + * There may be a local APIC which wasn't enabled by BIOS. + * So we try to enable it explicitly. + */ + rdmsr(MSR_IA32_APIC_BASE, lo, hi); + lo &= ~MSR_IA32_APIC_BASE_BASE; + lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START; + lo |= MSR_IA32_APIC_BASE_ENABLE; + wrmsr(MSR_IA32_APIC_BASE, lo, hi); + + /* + * Re-initialize cpu features info and re-check. + */ + set_cpu_model(); + if (cpuid_features() & CPUID_FEATURE_APIC) { + printf("Local APIC discovered and enabled\n"); + lapic_os_enabled = TRUE; + lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE; + return TRUE; + } + } + + return FALSE; +} + void -lapic_init(void) +lapic_shutdown(void) { - int value; + uint32_t lo; + uint32_t hi; + uint32_t value; + + /* Shutdown if local APIC was enabled by OS */ + if (lapic_os_enabled == FALSE) + return; mp_disable_preemption(); + /* ExtINT: masked */ + if (get_cpu_number() == master_cpu) { + value = LAPIC_REG(LVT_LINT0); + value |= LAPIC_LVT_MASKED; + LAPIC_REG(LVT_LINT0) = value; + } + + /* Timer: masked */ + LAPIC_REG(LVT_TIMER) |= LAPIC_LVT_MASKED; + + /* Perfmon: masked */ + LAPIC_REG(LVT_PERFCNT) |= LAPIC_LVT_MASKED; + + /* Error: masked */ + LAPIC_REG(LVT_ERROR) |= LAPIC_LVT_MASKED; + + /* APIC software disabled */ + LAPIC_REG(SVR) &= ~LAPIC_SVR_ENABLE; + + /* Bypass the APIC completely and update cpu features */ + rdmsr(MSR_IA32_APIC_BASE, lo, hi); + lo &= ~MSR_IA32_APIC_BASE_ENABLE; + wrmsr(MSR_IA32_APIC_BASE, lo, hi); + set_cpu_model(); + + mp_enable_preemption(); +} + +void +lapic_init(void) +{ + int value; + /* Set flat delivery model, logical processor id */ LAPIC_REG(DFR) = LAPIC_DFR_FLAT; LAPIC_REG(LDR) = (get_cpu_number()) << LAPIC_LDR_SHIFT; @@ -303,45 +462,165 @@ lapic_init(void) /* Accept all */ LAPIC_REG(TPR) = 0; - LAPIC_REG(SVR) = SPURIOUS_INTERRUPT | LAPIC_SVR_ENABLE; + LAPIC_REG(SVR) = LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE; /* ExtINT */ if (get_cpu_number() == master_cpu) { value = LAPIC_REG(LVT_LINT0); + value &= ~LAPIC_LVT_MASKED; value |= LAPIC_LVT_DM_EXTINT; LAPIC_REG(LVT_LINT0) = value; } + /* Timer: unmasked, one-shot */ + LAPIC_REG(LVT_TIMER) = LAPIC_VECTOR(TIMER); + + /* Perfmon: unmasked */ + LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); + lapic_esr_clear(); - LAPIC_REG(LVT_ERROR) = APIC_ERROR_INTERRUPT; + LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR); - mp_enable_preemption(); } +void +lapic_set_timer_func(i386_intr_func_t func) +{ + lapic_timer_func = func; +} void -lapic_end_of_interrupt(void) +lapic_set_timer( + boolean_t interrupt, + lapic_timer_mode_t mode, + lapic_timer_divide_t divisor, + lapic_timer_count_t initial_count) +{ + boolean_t state; + uint32_t timer_vector; + + state = ml_set_interrupts_enabled(FALSE); + timer_vector = LAPIC_REG(LVT_TIMER); + timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);; + timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED; + timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0; + LAPIC_REG(LVT_TIMER) = timer_vector; + LAPIC_REG(TIMER_DIVIDE_CONFIG) = divisor; + LAPIC_REG(TIMER_INITIAL_COUNT) = initial_count; + ml_set_interrupts_enabled(state); +} + +void +lapic_get_timer( + lapic_timer_mode_t *mode, + lapic_timer_divide_t *divisor, + lapic_timer_count_t *initial_count, + lapic_timer_count_t *current_count) +{ + boolean_t state; + + state = ml_set_interrupts_enabled(FALSE); + if (mode) + *mode = (LAPIC_REG(LVT_TIMER) & LAPIC_LVT_PERIODIC) ? + periodic : one_shot; + if (divisor) + *divisor = LAPIC_REG(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK; + if (initial_count) + *initial_count = LAPIC_REG(TIMER_INITIAL_COUNT); + if (current_count) + *current_count = LAPIC_REG(TIMER_CURRENT_COUNT); + ml_set_interrupts_enabled(state); +} + +void +lapic_set_pmi_func(i386_intr_func_t func) +{ + lapic_pmi_func = func; +} + +static inline void +_lapic_end_of_interrupt(void) { LAPIC_REG(EOI) = 0; } void +lapic_end_of_interrupt(void) +{ + _lapic_end_of_interrupt(); +} + +int lapic_interrupt(int interrupt, void *state) { + interrupt -= lapic_interrupt_base; + if (interrupt < 0) + return 0; switch(interrupt) { - case APIC_ERROR_INTERRUPT: + case LAPIC_PERFCNT_INTERRUPT: + if (lapic_pmi_func != NULL) + (*lapic_pmi_func)( + (struct i386_interrupt_state *) state); + /* Clear interrupt masked */ + LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT); + _lapic_end_of_interrupt(); + return 1; + case LAPIC_TIMER_INTERRUPT: + _lapic_end_of_interrupt(); + if (lapic_timer_func != NULL) + (*lapic_timer_func)( + (struct i386_interrupt_state *) state); + return 1; + case LAPIC_ERROR_INTERRUPT: + lapic_dump(); panic("Local APIC error\n"); - break; - case SPURIOUS_INTERRUPT: + _lapic_end_of_interrupt(); + return 1; + case LAPIC_SPURIOUS_INTERRUPT: kprintf("SPIV\n"); - break; - case INTERPROCESS_INTERRUPT: + /* No EOI required here */ + return 1; + case LAPIC_INTERPROCESSOR_INTERRUPT: cpu_signal_handler((struct i386_interrupt_state *) state); - break; + _lapic_end_of_interrupt(); + return 1; } - lapic_end_of_interrupt(); + return 0; +} + +void +lapic_smm_restore(void) +{ + boolean_t state; + + if (lapic_os_enabled == FALSE) + return; + + state = ml_set_interrupts_enabled(FALSE); + + if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) { + /* + * Bogus SMI handler enables interrupts but does not know about + * local APIC interrupt sources. When APIC timer counts down to + * zero while in SMM, local APIC will end up waiting for an EOI + * but no interrupt was delivered to the OS. + */ + _lapic_end_of_interrupt(); + + /* + * timer is one-shot, trigger another quick countdown to trigger + * another timer interrupt. + */ + if (LAPIC_REG(TIMER_CURRENT_COUNT) == 0) { + LAPIC_REG(TIMER_INITIAL_COUNT) = 1; + } + + kprintf("lapic_smm_restore\n"); + } + + ml_set_interrupts_enabled(state); } kern_return_t @@ -350,36 +629,58 @@ intel_startCPU( { int i = 1000; - int lapic_id = cpu_to_lapic[slot_num]; + int lapic = cpu_to_lapic[slot_num]; - if (slot_num == get_cpu_number()) - return KERN_SUCCESS; + assert(lapic != -1); + + DBGLOG_CPU_INIT(slot_num); - assert(lapic_id != -1); + DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic); + DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) IdlePTD); - DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic_id); + /* Initialize (or re-initialize) the descriptor tables for this cpu. */ + mp_desc_init(cpu_datap(slot_num), FALSE); + + /* Serialize use of the slave boot stack. */ + mutex_lock(&mp_cpu_boot_lock); mp_disable_preemption(); + if (slot_num == get_cpu_number()) { + mp_enable_preemption(); + mutex_unlock(&mp_cpu_boot_lock); + return KERN_SUCCESS; + } - LAPIC_REG(ICRD) = lapic_id << LAPIC_ICRD_DEST_SHIFT; + LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT; LAPIC_REG(ICR) = LAPIC_ICR_DM_INIT; delay(10000); - LAPIC_REG(ICRD) = lapic_id << LAPIC_ICRD_DEST_SHIFT; + LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT; LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12); delay(200); + LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT; + LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12); + delay(200); + +#ifdef POSTCODE_DELAY + /* Wait much longer if postcodes are displayed for a delay period. */ + i *= 10000; +#endif while(i-- > 0) { - delay(10000); - if (machine_slot[slot_num].running) + if (cpu_datap(slot_num)->cpu_running) break; + delay(10000); } mp_enable_preemption(); + mutex_unlock(&mp_cpu_boot_lock); - if (!machine_slot[slot_num].running) { + if (!cpu_datap(slot_num)->cpu_running) { DBG("Failed to start CPU %02d\n", slot_num); - printf("Failed to start CPU %02d\n", slot_num); + printf("Failed to start CPU %02d, rebooting...\n", slot_num); + delay(1000000); + cpu_shutdown(); return KERN_SUCCESS; } else { DBG("Started CPU %02d\n", slot_num); @@ -388,86 +689,61 @@ intel_startCPU( } } +extern char slave_boot_base[]; +extern char slave_boot_end[]; +extern void pstart(void); + void slave_boot_init(void) { - extern char slave_boot_base[]; - extern char slave_boot_end[]; - extern void pstart(void); - - DBG("slave_base=%p slave_end=%p MP_BOOT P=%p V=%p\n", - slave_boot_base, slave_boot_end, MP_BOOT, phystokv(MP_BOOT)); + DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n", + slave_boot_base, + kvtophys((vm_offset_t) slave_boot_base), + MP_BOOT, + slave_boot_end-slave_boot_base); /* * Copy the boot entry code to the real-mode vector area MP_BOOT. * This is in page 1 which has been reserved for this purpose by * machine_startup() from the boot processor. * The slave boot code is responsible for switching to protected - * mode and then jumping to the common startup, pstart(). + * mode and then jumping to the common startup, _start(). */ - bcopy(slave_boot_base, - (char *)phystokv(MP_BOOT), - slave_boot_end-slave_boot_base); + bcopy_phys((addr64_t) kvtophys((vm_offset_t) slave_boot_base), + (addr64_t) MP_BOOT, + slave_boot_end-slave_boot_base); /* * Zero a stack area above the boot code. */ - bzero((char *)(phystokv(MP_BOOTSTACK+MP_BOOT)-0x400), 0x400); + DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK+MP_BOOT-0x400, 0x400); + bzero_phys((addr64_t)MP_BOOTSTACK+MP_BOOT-0x400, 0x400); /* * Set the location at the base of the stack to point to the * common startup entry. */ - *((vm_offset_t *) phystokv(MP_MACH_START+MP_BOOT)) = - kvtophys((vm_offset_t)&pstart); + DBG("writing 0x%x at phys 0x%x\n", + kvtophys((vm_offset_t) &pstart), MP_MACH_START+MP_BOOT); + ml_phys_write_word(MP_MACH_START+MP_BOOT, + kvtophys((vm_offset_t) &pstart)); /* Flush caches */ __asm__("wbinvd"); } #if MP_DEBUG -cpu_signal_event_log_t cpu_signal[NCPUS] = { 0, 0, 0 }; -cpu_signal_event_log_t cpu_handle[NCPUS] = { 0, 0, 0 }; +cpu_signal_event_log_t *cpu_signal[MAX_CPUS]; +cpu_signal_event_log_t *cpu_handle[MAX_CPUS]; MP_EVENT_NAME_DECL(); -void -cpu_signal_dump_last(int cpu) -{ - cpu_signal_event_log_t *logp = &cpu_signal[cpu]; - int last; - cpu_signal_event_t *eventp; - - last = (logp->next_entry == 0) ? - LOG_NENTRIES - 1 : logp->next_entry - 1; - - eventp = &logp->entry[last]; - - kprintf("cpu%d: tsc=%lld cpu_signal(%d,%s)\n", - cpu, eventp->time, eventp->cpu, mp_event_name[eventp->event]); -} - -void -cpu_handle_dump_last(int cpu) -{ - cpu_signal_event_log_t *logp = &cpu_handle[cpu]; - int last; - cpu_signal_event_t *eventp; - - last = (logp->next_entry == 0) ? - LOG_NENTRIES - 1 : logp->next_entry - 1; - - eventp = &logp->entry[last]; - - kprintf("cpu%d: tsc=%lld cpu_signal_handle%s\n", - cpu, eventp->time, mp_event_name[eventp->event]); -} #endif /* MP_DEBUG */ void -cpu_signal_handler(struct i386_interrupt_state *regs) +cpu_signal_handler(__unused struct i386_interrupt_state *regs) { - register my_cpu; + int my_cpu; volatile int *my_word; #if MACH_KDB && MACH_ASSERT int i=100; @@ -476,7 +752,7 @@ cpu_signal_handler(struct i386_interrupt_state *regs) mp_disable_preemption(); my_cpu = cpu_number(); - my_word = &cpu_data[my_cpu].cpu_signals; + my_word = ¤t_cpu_datap()->cpu_signals; do { #if MACH_KDB && MACH_ASSERT @@ -490,11 +766,7 @@ cpu_signal_handler(struct i386_interrupt_state *regs) mp_kdp_wait(); } else #endif /* MACH_KDP */ - if (i_bit(MP_CLOCK, my_word)) { - DBGLOG(cpu_handle,my_cpu,MP_CLOCK); - i_bit_clear(MP_CLOCK, my_word); - hardclock(regs); - } else if (i_bit(MP_TLB_FLUSH, my_word)) { + if (i_bit(MP_TLB_FLUSH, my_word)) { DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH); i_bit_clear(MP_TLB_FLUSH, my_word); pmap_update_interrupt(); @@ -521,6 +793,9 @@ cpu_signal_handler(struct i386_interrupt_state *regs) } +#ifdef MP_DEBUG +extern int max_lock_loops; +#endif /* MP_DEBUG */ void cpu_interrupt(int cpu) { @@ -529,48 +804,35 @@ cpu_interrupt(int cpu) if (smp_initialized) { /* Wait for previous interrupt to be delivered... */ - while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) +#ifdef MP_DEBUG + int pending_busy_count = 0; + while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { + if (++pending_busy_count > max_lock_loops) + panic("cpus_interrupt() deadlock\n"); +#else + while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) { +#endif /* MP_DEBUG */ cpu_pause(); + } state = ml_set_interrupts_enabled(FALSE); LAPIC_REG(ICRD) = cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT; LAPIC_REG(ICR) = - INTERPROCESS_INTERRUPT | LAPIC_ICR_DM_FIXED; + LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED; (void) ml_set_interrupts_enabled(state); } } -void -slave_clock(void) -{ - int cpu; - - /* - * Clock interrupts are chained from the boot processor - * to the next logical processor that is running and from - * there on to any further running processor etc. - */ - mp_disable_preemption(); - for (cpu=cpu_number()+1; cpucpu_signals; + uint64_t tsc_timeout; - if (!cpu_data[cpu].cpu_status) + if (!cpu_datap(cpu)->cpu_running) return; DBGLOG(cpu_signal, cpu, event); @@ -579,8 +841,8 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) cpu_interrupt(cpu); if (mode == SYNC) { again: - timeout = rdtsc64() + (1000*1000*1000); - while (i_bit(event, signals) && rdtsc64() < timeout) { + tsc_timeout = rdtsc64() + (1000*1000*1000); + while (i_bit(event, signals) && rdtsc64() < tsc_timeout) { cpu_pause(); } if (i_bit(event, signals)) { @@ -594,11 +856,11 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) void i386_signal_cpus(mp_event_t event, mp_sync_t mode) { - int cpu; - int my_cpu = cpu_number(); + unsigned int cpu; + unsigned int my_cpu = cpu_number(); - for (cpu = 0; cpu < NCPUS; cpu++) { - if (cpu == my_cpu || !machine_slot[cpu].running) + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; i386_signal_cpu(cpu, event, mode); } @@ -607,11 +869,11 @@ i386_signal_cpus(mp_event_t event, mp_sync_t mode) int i386_active_cpus(void) { - int cpu; - int ncpus = 0; + unsigned int cpu; + unsigned int ncpus = 0; - for (cpu = 0; cpu < NCPUS; cpu++) { - if (machine_slot[cpu].running) + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu_datap(cpu)->cpu_running) ncpus++; } return(ncpus); @@ -640,14 +902,14 @@ mp_rendezvous_action(void) mp_rv_setup_func(mp_rv_func_arg); /* spin on entry rendezvous */ atomic_incl(&mp_rv_waiters[0], 1); - while (mp_rv_waiters[0] < mp_rv_ncpus) + while (*((volatile long *) &mp_rv_waiters[0]) < mp_rv_ncpus) cpu_pause(); /* action function */ if (mp_rv_action_func != NULL) mp_rv_action_func(mp_rv_func_arg); /* spin on exit rendezvous */ atomic_incl(&mp_rv_waiters[1], 1); - while (mp_rv_waiters[1] < mp_rv_ncpus) + while (*((volatile long *) &mp_rv_waiters[1]) < mp_rv_ncpus) cpu_pause(); /* teardown function */ if (mp_rv_teardown_func != NULL) @@ -700,15 +962,16 @@ mp_rendezvous(void (*setup_func)(void *), #if MACH_KDP volatile boolean_t mp_kdp_trap = FALSE; long mp_kdp_ncpus; +boolean_t mp_kdp_state; + void mp_kdp_enter(void) { - int cpu; - int ncpus; - int my_cpu = cpu_number(); - boolean_t state; - uint64_t timeout; + unsigned int cpu; + unsigned int ncpus; + unsigned int my_cpu = cpu_number(); + uint64_t tsc_timeout; DBG("mp_kdp_enter()\n"); @@ -717,7 +980,7 @@ mp_kdp_enter(void) * In case of races, only one cpu is allowed to enter kdp after * stopping others. */ - state = ml_set_interrupts_enabled(FALSE); + mp_kdp_state = ml_set_interrupts_enabled(FALSE); simple_lock(&mp_kdp_lock); while (mp_kdp_trap) { simple_unlock(&mp_kdp_lock); @@ -728,12 +991,11 @@ mp_kdp_enter(void) mp_kdp_ncpus = 1; /* self */ mp_kdp_trap = TRUE; simple_unlock(&mp_kdp_lock); - (void) ml_set_interrupts_enabled(state); /* Deliver a nudge to other cpus, counting how many */ DBG("mp_kdp_enter() signaling other processors\n"); - for (ncpus = 1, cpu = 0; cpu < NCPUS; cpu++) { - if (cpu == my_cpu || !machine_slot[cpu].running) + for (ncpus = 1, cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; ncpus++; i386_signal_cpu(cpu, MP_KDP, ASYNC); @@ -741,18 +1003,22 @@ mp_kdp_enter(void) /* Wait other processors to spin. */ DBG("mp_kdp_enter() waiting for (%d) processors to suspend\n", ncpus); - timeout = rdtsc64() + (1000*1000*1000); - while (*((volatile long *) &mp_kdp_ncpus) != ncpus - && rdtsc64() < timeout) { + tsc_timeout = rdtsc64() + (1000*1000*1000); + while (*((volatile unsigned int *) &mp_kdp_ncpus) != ncpus + && rdtsc64() < tsc_timeout) { cpu_pause(); } DBG("mp_kdp_enter() %d processors done %s\n", mp_kdp_ncpus, (mp_kdp_ncpus == ncpus) ? "OK" : "timed out"); + postcode(MP_KDP_ENTER); } static void mp_kdp_wait(void) { + boolean_t state; + + state = ml_set_interrupts_enabled(TRUE); DBG("mp_kdp_wait()\n"); atomic_incl(&mp_kdp_ncpus, 1); while (mp_kdp_trap) { @@ -760,6 +1026,7 @@ mp_kdp_wait(void) } atomic_decl(&mp_kdp_ncpus, 1); DBG("mp_kdp_wait() done\n"); + (void) ml_set_interrupts_enabled(state); } void @@ -775,23 +1042,15 @@ mp_kdp_exit(void) cpu_pause(); } DBG("mp_kdp_exit() done\n"); + (void) ml_set_interrupts_enabled(mp_kdp_state); + postcode(0); } #endif /* MACH_KDP */ -void -lapic_test(void) -{ - int cpu = 1; - - lapic_dump(); - i_bit_set(0, &cpu_data[cpu].cpu_signals); - cpu_interrupt(1); -} - /*ARGSUSED*/ void init_ast_check( - processor_t processor) + __unused processor_t processor) { } @@ -799,7 +1058,7 @@ void cause_ast_check( processor_t processor) { - int cpu = processor->slot_num; + int cpu = PROCESSOR_DATA(processor, slot_num); if (cpu != cpu_number()) { i386_signal_cpu(cpu, MP_AST, ASYNC); @@ -813,12 +1072,12 @@ cause_ast_check( void remote_kdb(void) { - int my_cpu = cpu_number(); - int cpu; + unsigned int my_cpu = cpu_number(); + unsigned int cpu; mp_disable_preemption(); - for (cpu = 0; cpu < NCPUS; cpu++) { - if (cpu == my_cpu || !machine_slot[cpu].running) + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) continue; i386_signal_cpu(cpu, MP_KDB, SYNC); } @@ -833,37 +1092,58 @@ void clear_kdb_intr(void) { mp_disable_preemption(); - i_bit_clear(MP_KDB, &cpu_data[cpu_number()].cpu_signals); + i_bit_clear(MP_KDB, ¤t_cpu_datap()->cpu_signals); mp_enable_preemption(); } +/* + * i386_init_slave() is called from pstart. + * We're in the cpu's interrupt stack with interrupts disabled. + */ void -slave_machine_init(void) +i386_init_slave(void) { - int my_cpu; + postcode(I386_INIT_SLAVE); /* Ensure that caching and write-through are enabled */ set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); - mp_disable_preemption(); - my_cpu = get_cpu_number(); - - DBG("slave_machine_init() CPU%d: phys (%d) active.\n", - my_cpu, get_cpu_phys_number()); + DBG("i386_init_slave() CPU%d: phys (%d) active.\n", + get_cpu_number(), get_cpu_phys_number()); lapic_init(); + LAPIC_DUMP(); + LAPIC_CPU_MAP_DUMP(); + + mtrr_update_cpu(); + + pat_init(); + + cpu_init(); + + slave_main(); + + panic("i386_init_slave() returned from slave_main()"); +} + +void +slave_machine_init(void) +{ + /* + * Here in process context. + */ + DBG("slave_machine_init() CPU%d\n", get_cpu_number()); + init_fpu(); - cpu_machine_init(); + cpu_thread_init(); - mp_enable_preemption(); + pmc_init(); -#ifdef MP_DEBUG - lapic_dump(); - lapic_cpu_map_dump(); -#endif /* MP_DEBUG */ + cpu_machine_init(); + clock_init(); } #undef cpu_number() diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index 16819d22d..6863b946f 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,41 +50,72 @@ /* */ +#ifdef KERNEL_PRIVATE #ifndef _I386AT_MP_H_ #define _I386AT_MP_H_ -#if !defined(NCPUS) -#include -#endif /* !defined(NCPUS) */ - -#if NCPUS > 1 - #ifndef DEBUG #include #endif -#if DEBUG -#define MP_DEBUG 1 -#endif +//#define MP_DEBUG 1 #include #include -#define SPURIOUS_INTERRUPT 0xDD -#define INTERPROCESS_INTERRUPT 0xDE -#define APIC_ERROR_INTERRUPT 0xDF +#define LAPIC_ID_MAX (LAPIC_ID_MASK) -#define LAPIC_ID_MAX (LAPIC_ID_MASK) +#define MAX_CPUS (LAPIC_ID_MAX + 1) #ifndef ASSEMBLER +#include +#include +#include + +__BEGIN_DECLS + +extern kern_return_t intel_startCPU(int slot_num); +extern void i386_init_slave(void); +extern void smp_init(void); + +extern void cpu_interrupt(int cpu); + +extern void lapic_init(void); +extern void lapic_shutdown(void); +extern void lapic_smm_restore(void); +extern boolean_t lapic_probe(void); extern void lapic_dump(void); -extern void lapic_interrupt(int interrupt, void *state); +extern int lapic_interrupt(int interrupt, void *state); +extern void lapic_end_of_interrupt(void); extern int lapic_to_cpu[]; extern int cpu_to_lapic[]; +extern int lapic_interrupt_base; extern void lapic_cpu_map(int lapic, int cpu_num); + +extern void lapic_set_timer( + boolean_t interrupt, + lapic_timer_mode_t mode, + lapic_timer_divide_t divisor, + lapic_timer_count_t initial_count); + +extern void lapic_get_timer( + lapic_timer_mode_t *mode, + lapic_timer_divide_t *divisor, + lapic_timer_count_t *initial_count, + lapic_timer_count_t *current_count); + +typedef void (*i386_intr_func_t)(void *); +extern void lapic_set_timer_func(i386_intr_func_t func); +extern void lapic_set_pmi_func(i386_intr_func_t func); + +__END_DECLS + #endif /* ASSEMBLER */ #define CPU_NUMBER(r) \ + movl %gs:CPU_NUMBER_GS,r + +#define CPU_NUMBER_FROM_LAPIC(r) \ movl EXT(lapic_id),r; \ movl 0(r),r; \ shrl $(LAPIC_ID_SHIFT),r; \ @@ -92,16 +123,20 @@ extern void lapic_cpu_map(int lapic, int cpu_num); movl EXT(lapic_to_cpu)(,r,4),r -#define MP_IPL SPL6 /* software interrupt level */ - /* word describing the reason for the interrupt, one per cpu */ #ifndef ASSEMBLER #include -extern int real_ncpus; /* real number of cpus */ -extern int wncpu; /* wanted number of cpus */ + +extern unsigned int real_ncpus; /* real number of cpus */ +extern unsigned int max_ncpus; /* max number of cpus */ decl_simple_lock_data(extern,kdb_lock) /* kdb lock */ -decl_simple_lock_data(extern,mp_putc_lock) + +__BEGIN_DECLS + +extern void console_init(void); +extern void *console_cpu_alloc(boolean_t boot_cpu); +extern void console_cpu_free(void *console_buf); extern int kdb_cpu; /* current cpu running kdb */ extern int kdb_debug; @@ -109,8 +144,8 @@ extern int kdb_is_slave[]; extern int kdb_active[]; extern volatile boolean_t mp_kdp_trap; -extern void mp_trap_enter(); -extern void mp_trap_exit(); +extern void mp_kdp_enter(void); +extern void mp_kdp_exit(void); /* * All cpu rendezvous: @@ -120,6 +155,8 @@ extern void mp_rendezvous(void (*setup_func)(void *), void (*teardown_func)(void *), void *arg); +__END_DECLS + #if MP_DEBUG typedef struct { uint64_t time; @@ -134,14 +171,14 @@ typedef struct { cpu_signal_event_t entry[LOG_NENTRIES]; } cpu_signal_event_log_t; -extern cpu_signal_event_log_t cpu_signal[NCPUS]; -extern cpu_signal_event_log_t cpu_handle[NCPUS]; +extern cpu_signal_event_log_t *cpu_signal[]; +extern cpu_signal_event_log_t *cpu_handle[]; #define DBGLOG(log,_cpu,_event) { \ - cpu_signal_event_log_t *logp = &log[cpu_number()]; \ + boolean_t spl = ml_set_interrupts_enabled(FALSE); \ + cpu_signal_event_log_t *logp = log[cpu_number()]; \ int next = logp->next_entry; \ cpu_signal_event_t *eventp = &logp->entry[next]; \ - boolean_t spl = ml_set_interrupts_enabled(FALSE); \ \ logp->count[_event]++; \ \ @@ -155,8 +192,27 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; \ (void) ml_set_interrupts_enabled(spl); \ } + +#define DBGLOG_CPU_INIT(cpu) { \ + cpu_signal_event_log_t **sig_logpp = &cpu_signal[cpu]; \ + cpu_signal_event_log_t **hdl_logpp = &cpu_handle[cpu]; \ + \ + if (*sig_logpp == NULL && \ + kmem_alloc(kernel_map, \ + (vm_offset_t *) sig_logpp, \ + sizeof(cpu_signal_event_log_t)) != KERN_SUCCESS)\ + panic("DBGLOG_CPU_INIT cpu_signal allocation failed\n");\ + bzero(*sig_logpp, sizeof(cpu_signal_event_log_t)); \ + if (*hdl_logpp == NULL && \ + kmem_alloc(kernel_map, \ + (vm_offset_t *) hdl_logpp, \ + sizeof(cpu_signal_event_log_t)) != KERN_SUCCESS)\ + panic("DBGLOG_CPU_INIT cpu_handle allocation failed\n");\ + bzero(*sig_logpp, sizeof(cpu_signal_event_log_t)); \ +} #else /* MP_DEBUG */ #define DBGLOG(log,_cpu,_event) +#define DBGLOG_CPU_INIT(cpu) #endif /* MP_DEBUG */ #endif /* ASSEMBLER */ @@ -187,23 +243,12 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; #define MP_DEV_OP_TIMEO 2 /* If lock busy, register a pending timeout */ #define MP_DEV_OP_CALLB 3 /* If lock busy, register a pending callback */ -#else /* NCPUS > 1 */ -#define at386_io_lock_state() -#define at386_io_lock(op) (TRUE) -#define at386_io_unlock() -#define mp_trap_enter() -#define mp_trap_exit() -#include -#endif /* NCPUS > 1 */ - #if MACH_RT -#define _DISABLE_PREEMPTION(r) \ - movl $ CPD_PREEMPTION_LEVEL,r ; \ - incl %gs:(r) +#define _DISABLE_PREEMPTION \ + incl %gs:CPU_PREEMPTION_LEVEL -#define _ENABLE_PREEMPTION(r) \ - movl $ CPD_PREEMPTION_LEVEL,r ; \ - decl %gs:(r) ; \ +#define _ENABLE_PREEMPTION \ + decl %gs:CPU_PREEMPTION_LEVEL ; \ jne 9f ; \ pushl %eax ; \ pushl %ecx ; \ @@ -214,12 +259,11 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %eax ; \ 9: -#define _ENABLE_PREEMPTION_NO_CHECK(r) \ - movl $ CPD_PREEMPTION_LEVEL,r ; \ - decl %gs:(r) +#define _ENABLE_PREEMPTION_NO_CHECK \ + decl %gs:CPU_PREEMPTION_LEVEL #if MACH_ASSERT -#define DISABLE_PREEMPTION(r) \ +#define DISABLE_PREEMPTION \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -227,7 +271,7 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#define ENABLE_PREEMPTION(r) \ +#define ENABLE_PREEMPTION \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -235,7 +279,7 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#define ENABLE_PREEMPTION_NO_CHECK(r) \ +#define ENABLE_PREEMPTION_NO_CHECK \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -243,8 +287,7 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#if NCPUS > 1 -#define MP_DISABLE_PREEMPTION(r) \ +#define MP_DISABLE_PREEMPTION \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -252,7 +295,7 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#define MP_ENABLE_PREEMPTION(r) \ +#define MP_ENABLE_PREEMPTION \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -260,7 +303,7 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#define MP_ENABLE_PREEMPTION_NO_CHECK(r) \ +#define MP_ENABLE_PREEMPTION_NO_CHECK \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -268,33 +311,24 @@ extern cpu_signal_event_log_t cpu_handle[NCPUS]; popl %edx; \ popl %ecx; \ popl %eax -#else /* NCPUS > 1 */ -#define MP_DISABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION_NO_CHECK(r) -#endif /* NCPUS > 1 */ #else /* MACH_ASSERT */ -#define DISABLE_PREEMPTION(r) _DISABLE_PREEMPTION(r) -#define ENABLE_PREEMPTION(r) _ENABLE_PREEMPTION(r) -#define ENABLE_PREEMPTION_NO_CHECK(r) _ENABLE_PREEMPTION_NO_CHECK(r) -#if NCPUS > 1 -#define MP_DISABLE_PREEMPTION(r) _DISABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION(r) _ENABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION_NO_CHECK(r) _ENABLE_PREEMPTION_NO_CHECK(r) -#else /* NCPUS > 1 */ -#define MP_DISABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION_NO_CHECK(r) -#endif /* NCPUS > 1 */ +#define DISABLE_PREEMPTION _DISABLE_PREEMPTION +#define ENABLE_PREEMPTION _ENABLE_PREEMPTION +#define ENABLE_PREEMPTION_NO_CHECK _ENABLE_PREEMPTION_NO_CHECK +#define MP_DISABLE_PREEMPTION _DISABLE_PREEMPTION +#define MP_ENABLE_PREEMPTION _ENABLE_PREEMPTION +#define MP_ENABLE_PREEMPTION_NO_CHECK _ENABLE_PREEMPTION_NO_CHECK #endif /* MACH_ASSERT */ #else /* MACH_RT */ -#define DISABLE_PREEMPTION(r) -#define ENABLE_PREEMPTION(r) -#define ENABLE_PREEMPTION_NO_CHECK(r) -#define MP_DISABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION(r) -#define MP_ENABLE_PREEMPTION_NO_CHECK(r) +#define DISABLE_PREEMPTION +#define ENABLE_PREEMPTION +#define ENABLE_PREEMPTION_NO_CHECK +#define MP_DISABLE_PREEMPTION +#define MP_ENABLE_PREEMPTION +#define MP_ENABLE_PREEMPTION_NO_CHECK #endif /* MACH_RT */ #endif /* _I386AT_MP_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c index 01d114aba..03cc903d7 100644 --- a/osfmk/i386/mp_desc.c +++ b/osfmk/i386/mp_desc.c @@ -51,11 +51,9 @@ /* */ -#include - -#if NCPUS > 1 #include +#include #include #include #include @@ -64,6 +62,7 @@ #include #include #include +#include #include @@ -75,17 +74,6 @@ * than any thread`s kernel stack. */ -/* - * Addresses of bottom and top of interrupt stacks. - */ -vm_offset_t interrupt_stack[NCPUS]; -vm_offset_t int_stack_top[NCPUS]; - -/* - * Barrier address. - */ -vm_offset_t int_stack_high; - /* * First cpu`s interrupt stack. */ @@ -93,10 +81,16 @@ extern char intstack[]; /* bottom */ extern char eintstack[]; /* top */ /* - * We allocate interrupt stacks from physical memory. + * Per-cpu data area pointers. + * The master cpu (cpu 0) has its data area statically allocated; + * others are allocated dynamically and this array is updated at runtime. */ -extern -vm_offset_t avail_start; +cpu_data_t cpu_data_master; +cpu_data_t *cpu_data_ptr[MAX_CPUS] = { [0] &cpu_data_master }; + +decl_simple_lock_data(,cpu_lock); /* protects real_ncpus */ +unsigned int real_ncpus = 1; +unsigned int max_ncpus = MAX_CPUS; /* * Multiprocessor i386/i486 systems use a separate copy of the @@ -107,30 +101,6 @@ vm_offset_t avail_start; * and since using a TSS marks it busy. */ -/* - * Allocated descriptor tables. - */ -struct mp_desc_table *mp_desc_table[NCPUS] = { 0 }; - -/* - * Pointer to TSS for access in load_context. - */ -struct i386_tss *mp_ktss[NCPUS] = { 0 }; - -#if MACH_KDB -/* - * Pointer to TSS for debugger use. - */ -struct i386_tss *mp_dbtss[NCPUS] = { 0 }; -#endif /* MACH_KDB */ - -/* - * Pointer to GDT to reset the KTSS busy bit. - */ -struct fake_descriptor *mp_gdt[NCPUS] = { 0 }; -struct fake_descriptor *mp_idt[NCPUS] = { 0 }; -struct fake_descriptor *mp_ldt[NCPUS] = { 0 }; - /* * Allocate and initialize the per-processor descriptor tables. */ @@ -155,32 +125,33 @@ struct fake_descriptor cpudata_desc_pattern = { ACC_P|ACC_PL_K|ACC_DATA_W }; -struct mp_desc_table * +void mp_desc_init( - int mycpu) + cpu_data_t *cdp, + boolean_t is_boot_cpu) { - register struct mp_desc_table *mpt; + struct mp_desc_table *mpt = cdp->cpu_desc_tablep; + cpu_desc_index_t *cdt = &cdp->cpu_desc_index; - if (mycpu == master_cpu) { + if (is_boot_cpu) { /* * Master CPU uses the tables built at boot time. * Just set the TSS and GDT pointers. */ - mp_ktss[mycpu] = &ktss; + cdt->cdi_ktss = &ktss; #if MACH_KDB - mp_dbtss[mycpu] = &dbtss; + cdt->cdi_dbtss = &dbtss; #endif /* MACH_KDB */ - mp_gdt[mycpu] = gdt; - mp_idt[mycpu] = idt; - mp_ldt[mycpu] = ldt; - return 0; - } - else { - mpt = mp_desc_table[mycpu]; - mp_ktss[mycpu] = &mpt->ktss; - mp_gdt[mycpu] = mpt->gdt; - mp_idt[mycpu] = mpt->idt; - mp_ldt[mycpu] = mpt->ldt; + cdt->cdi_gdt = gdt; + cdt->cdi_idt = idt; + cdt->cdi_ldt = ldt; + + } else { + + cdt->cdi_ktss = &mpt->ktss; + cdt->cdi_gdt = mpt->gdt; + cdt->cdi_idt = mpt->idt; + cdt->cdi_ldt = mpt->ldt; /* * Copy the tables @@ -196,15 +167,9 @@ mp_desc_init( sizeof(ldt)); bzero((char *)&mpt->ktss, sizeof(struct i386_tss)); -#if 0 - bzero((char *)&cpu_data[mycpu], - sizeof(cpu_data_t)); -#endif - /* I am myself */ - cpu_data[mycpu].cpu_number = mycpu; #if MACH_KDB - mp_dbtss[mycpu] = &mpt->dbtss; + cdt->cdi_dbtss = &dbtss; bcopy((char *)&dbtss, (char *)&mpt->dbtss, sizeof(struct i386_tss)); @@ -215,106 +180,136 @@ mp_desc_init( * this LDT and this TSS. */ mpt->gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - mpt->gdt[sel_idx(KERNEL_LDT)].offset = - LINEAR_KERNEL_ADDRESS + (unsigned int) mpt->ldt; + mpt->gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) mpt->ldt; fix_desc(&mpt->gdt[sel_idx(KERNEL_LDT)], 1); mpt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - mpt->gdt[sel_idx(KERNEL_TSS)].offset = - LINEAR_KERNEL_ADDRESS + (unsigned int) &mpt->ktss; + mpt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) &mpt->ktss; fix_desc(&mpt->gdt[sel_idx(KERNEL_TSS)], 1); - mpt->gdt[sel_idx(CPU_DATA)] = cpudata_desc_pattern; - mpt->gdt[sel_idx(CPU_DATA)].offset = - LINEAR_KERNEL_ADDRESS + (unsigned int) &cpu_data[mycpu]; - fix_desc(&mpt->gdt[sel_idx(CPU_DATA)], 1); + mpt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; + mpt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; + fix_desc(&mpt->gdt[sel_idx(CPU_DATA_GS)], 1); #if MACH_KDB mpt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - mpt->gdt[sel_idx(DEBUG_TSS)].offset = - LINEAR_KERNEL_ADDRESS + (unsigned int) &mpt->dbtss; + mpt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) &mpt->dbtss; fix_desc(&mpt->gdt[sel_idx(DEBUG_TSS)], 1); mpt->dbtss.esp0 = (int)(db_task_stack_store + - (INTSTACK_SIZE * (mycpu + 1)) - sizeof (natural_t)); + (INTSTACK_SIZE * (cpu + 1)) - sizeof (natural_t)); mpt->dbtss.esp = mpt->dbtss.esp0; mpt->dbtss.eip = (int)&db_task_start; #endif /* MACH_KDB */ mpt->ktss.ss0 = KERNEL_DS; mpt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ - - return mpt; } } -/* - * Called after all CPUs have been found, but before the VM system - * is running. The machine array must show which CPUs exist. - */ -void -interrupt_stack_alloc(void) +cpu_data_t * +cpu_data_alloc(boolean_t is_boot_cpu) { - register int i; - int cpu_count; - vm_offset_t stack_start; - struct mp_desc_table *mpt; + int ret; + cpu_data_t *cdp; + + if (is_boot_cpu) { + assert(real_ncpus == 1); + simple_lock_init(&cpu_lock, 0); + cdp = &cpu_data_master; + if (cdp->cpu_processor == NULL) { + cdp->cpu_processor = cpu_processor_alloc(TRUE); + cdp->cpu_pmap = pmap_cpu_alloc(TRUE); + cdp->cpu_this = cdp; + cdp->cpu_int_stack_top = (vm_offset_t) eintstack; + mp_desc_init(cdp, TRUE); + } + return cdp; + } - /* - * Number of CPUs possible. - */ - cpu_count = wncpu; + /* Check count before making allocations */ + if (real_ncpus >= max_ncpus) + return NULL; /* - * Allocate an interrupt stack for each CPU except for - * the master CPU (which uses the bootstrap stack) + * Allocate per-cpu data: */ - stack_start = phystokv(avail_start); - avail_start = round_page(avail_start + INTSTACK_SIZE*(cpu_count-1)); - bzero((char *)stack_start, INTSTACK_SIZE*(cpu_count-1)); + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &cdp, sizeof(cpu_data_t)); + if (ret != KERN_SUCCESS) { + printf("cpu_data_alloc() failed, ret=%d\n", ret); + goto abort; + } + bzero((void*) cdp, sizeof(cpu_data_t)); + cdp->cpu_this = cdp; /* - * Set up pointers to the top of the interrupt stack. + * Allocate interrupt stack: */ - for (i = 0; i < cpu_count; i++) { - if (i == master_cpu) { - interrupt_stack[i] = (vm_offset_t) intstack; - int_stack_top[i] = (vm_offset_t) eintstack; - } - else { - interrupt_stack[i] = stack_start; - int_stack_top[i] = stack_start + INTSTACK_SIZE; - - stack_start += INTSTACK_SIZE; - } + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &cdp->cpu_int_stack_top, + INTSTACK_SIZE); + if (ret != KERN_SUCCESS) { + printf("cpu_data_alloc() int stack failed, ret=%d\n", ret); + goto abort; } + bzero((void*) cdp->cpu_int_stack_top, INTSTACK_SIZE); + cdp->cpu_int_stack_top += INTSTACK_SIZE; /* - * Allocate descriptor tables for each CPU except for - * the master CPU (which already has them initialized) + * Allocate descriptor table: */ + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &cdp->cpu_desc_tablep, + sizeof(struct mp_desc_table)); + if (ret != KERN_SUCCESS) { + printf("cpu_data_alloc() desc_table failed, ret=%d\n", ret); + goto abort; + } - mpt = (struct mp_desc_table *) phystokv(avail_start); - avail_start = round_page((vm_offset_t)avail_start + - sizeof(struct mp_desc_table)*(cpu_count-1)); - for (i = 0; i < cpu_count; i++) - if (i != master_cpu) - mp_desc_table[i] = mpt++; - + simple_lock(&cpu_lock); + if (real_ncpus >= max_ncpus) { + simple_unlock(&cpu_lock); + goto abort; + } + cpu_data_ptr[real_ncpus] = cdp; + cdp->cpu_number = real_ncpus; + real_ncpus++; + simple_unlock(&cpu_lock); + + kprintf("cpu_data_alloc(%d) 0x%x desc_table: 0x%x " + "int_stack: 0x%x-0x%x\n", + cdp->cpu_number, cdp, cdp->cpu_desc_tablep, + cdp->cpu_int_stack_top - INTSTACK_SIZE, cdp->cpu_int_stack_top); + + return cdp; + +abort: + if (cdp) { + if (cdp->cpu_desc_tablep) + kfree((void *) cdp->cpu_desc_tablep, + sizeof(*cdp->cpu_desc_tablep)); + if (cdp->cpu_int_stack_top) + kfree((void *) (cdp->cpu_int_stack_top - INTSTACK_SIZE), + INTSTACK_SIZE); + kfree((void *) cdp, sizeof(*cdp)); + } + return NULL; +} - /* - * Set up the barrier address. All thread stacks MUST - * be above this address. - */ - /* - * intstack is at higher addess than stack_start for AT mps - * so int_stack_high must point at eintstack. - * XXX - * But what happens if a kernel stack gets allocated below - * 1 Meg ? Probably never happens, there is only 640 K available - * There. - */ - int_stack_high = (vm_offset_t) eintstack; +boolean_t +valid_user_segment_selectors(uint16_t cs, + uint16_t ss, + uint16_t ds, + uint16_t es, + uint16_t fs, + uint16_t gs) +{ + return valid_user_code_selector(cs) && + valid_user_stack_selector(ss) && + valid_user_data_selector(ds) && + valid_user_data_selector(es) && + valid_user_data_selector(fs) && + valid_user_data_selector(gs); } -#endif /* NCPUS > 1 */ diff --git a/osfmk/i386/mp_desc.h b/osfmk/i386/mp_desc.h index fd85e29ad..3a89fad07 100644 --- a/osfmk/i386/mp_desc.h +++ b/osfmk/i386/mp_desc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,8 +54,10 @@ #ifndef _I386_MP_DESC_H_ #define _I386_MP_DESC_H_ -#include #include +#include + +__BEGIN_DECLS /* * Multiprocessor i386/i486 systems use a separate copy of the @@ -83,33 +85,85 @@ struct mp_desc_table { struct i386_tss dbtss; }; -/* - * They are pointed to by a per-processor array. - */ -extern struct mp_desc_table *mp_desc_table[NCPUS]; +#define current_gdt() (current_cpu_datap()->cpu_desc_index.cdi_gdt) +#define current_idt() (current_cpu_datap()->cpu_desc_index.cdi_idt) +#define current_ldt() (current_cpu_datap()->cpu_desc_index.cdi_ldt) +#define current_ktss() (current_cpu_datap()->cpu_desc_index.cdi_ktss) +#define current_dbtss() (current_cpu_datap()->cpu_desc_index.cdi_dbtss) -/* - * The kernel TSS gets its own pointer. - */ -extern struct i386_tss *mp_ktss[NCPUS]; -#if MACH_KDB -extern struct i386_tss *mp_dbtss[NCPUS]; -#endif /* MACH_KDB */ +#define gdt_desc_p(sel) \ + ((struct real_descriptor *)¤t_gdt()[sel_idx(sel)]) +#define ldt_desc_p(sel) \ + ((struct real_descriptor *)¤t_ldt()[sel_idx(sel)]) -/* - * So does the GDT and IDT. - */ -extern struct fake_descriptor *mp_gdt[NCPUS]; -extern struct fake_descriptor *mp_idt[NCPUS]; -extern struct fake_descriptor *mp_ldt[NCPUS]; +extern void mp_desc_init(cpu_data_t *cdp, boolean_t is_boot_cpu); +static inline boolean_t +valid_user_data_selector(uint16_t selector) +{ + sel_t sel = selector_to_sel(selector); + + if (selector == 0) + return (TRUE); -/* - * Each CPU calls this routine to set up its descriptor tables. - */ + if (sel.ti == SEL_LDT) + return (TRUE); + else if (sel.index < GDTSZ) { + if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) + return (TRUE); + } + + return (FALSE); +} + +static inline boolean_t +valid_user_code_selector(uint16_t selector) +{ + sel_t sel = selector_to_sel(selector); + + if (selector == 0) + return (FALSE); + + if (sel.ti == SEL_LDT) { + if (sel.rpl == USER_PRIV) + return (TRUE); + } + else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { + if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) + return (TRUE); + } + + return (FALSE); +} + +static inline boolean_t +valid_user_stack_selector(uint16_t selector) +{ + sel_t sel = selector_to_sel(selector); + + if (selector == 0) + return (FALSE); + + if (sel.ti == SEL_LDT) { + if (sel.rpl == USER_PRIV) + return (TRUE); + } + else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { + if ((gdt_desc_p(selector)->access & ACC_PL_U) == ACC_PL_U) + return (TRUE); + } + + return (FALSE); +} + +extern boolean_t +valid_user_segment_selectors(uint16_t cs, + uint16_t ss, + uint16_t ds, + uint16_t es, + uint16_t fs, + uint16_t gs); -extern struct mp_desc_table * mp_desc_init( - int cpu); -extern void interrupt_stack_alloc(void); +__END_DECLS #endif /* _I386_MP_DESC_H_ */ diff --git a/osfmk/i386/mp_events.h b/osfmk/i386/mp_events.h index f6bdef5e5..62da36894 100644 --- a/osfmk/i386/mp_events.h +++ b/osfmk/i386/mp_events.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,13 +26,13 @@ #ifndef ASSEMBLER +#include + typedef enum { MP_TLB_FLUSH = 0, - MP_CLOCK, MP_KDP, MP_KDB, MP_AST, - MP_SOFTCLOCK, MP_RENDEZVOUS, MP_IDLE, MP_UNIDLE, @@ -40,13 +40,11 @@ typedef enum { } mp_event_t; #define MP_EVENT_NAME_DECL() \ -char *mp_event_name[] = { \ +const char *mp_event_name[] = { \ "MP_TLB_FLUSH", \ - "MP_CLOCK", \ "MP_KDP", \ "MP_KDB", \ "MP_AST", \ - "MP_SOFTCLOCK", \ "MP_RENDEZVOUS", \ "MP_IDLE", \ "MP_UNIDLE", \ @@ -55,9 +53,14 @@ char *mp_event_name[] = { \ typedef enum { SYNC, ASYNC } mp_sync_t; +__BEGIN_DECLS + extern void i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode); extern void i386_signal_cpus(mp_event_t event, mp_sync_t mode); extern int i386_active_cpus(void); + +__END_DECLS + #endif #endif diff --git a/osfmk/i386/mp_slave_boot.h b/osfmk/i386/mp_slave_boot.h index 8f4bbf184..06949fff1 100644 --- a/osfmk/i386/mp_slave_boot.h +++ b/osfmk/i386/mp_slave_boot.h @@ -98,7 +98,7 @@ #define MP_BOOT 0x1000 /* address where slave boots load */ #define MP_BOOTSEG 0x100 -#define MP_GDT 0x1100 /* temporary gdt address for boot */ +#define MP_BOOTGDT 0x1100 /* temporary gdt address for boot */ #define MP_BOOTSTACK 0x800 /* stack for boot */ #define MP_MACH_START MP_BOOTSTACK /* contains address where to jump after boot */ diff --git a/osfmk/i386/mp_slave_boot.s b/osfmk/i386/mp_slave_boot.s index a508084c0..bef238b67 100644 --- a/osfmk/i386/mp_slave_boot.s +++ b/osfmk/i386/mp_slave_boot.s @@ -49,9 +49,9 @@ * the rights to redistribute these changes. */ - #include #include +#include #define CR0_PE_ON 0x1 #define CR0_PE_OFF 0xfffffffe @@ -59,6 +59,7 @@ .file "slave_boot.s" .text + .align 12 // Page align for single bcopy_phys() #define LJMP(segment,address) \ .byte 0xea ;\ @@ -81,6 +82,8 @@ Entry(slave_pstart) mov %cs, %ax mov %ax, %ds + POSTCODE(SLAVE_PSTART_ENTRY); + /* set up %ss and %esp */ data16 mov $(MP_BOOTSEG), %eax @@ -107,11 +110,14 @@ Entry(real_to_prot) /* guarantee that interrupt is disabled when in prot mode */ cli + POSTCODE(REAL_TO_PROT_ENTRY); + /* load the gdtr */ addr16 data16 LGDT(EXT(gdtr)) + /* load the gdtr */ /* set the PE bit of CR0 */ mov %cr0, %eax @@ -125,6 +131,7 @@ Entry(real_to_prot) LJMP(0x08, xprot) xprot: + /* we are in USE32 mode now */ /* set up the protective mode segment registers : DS, SS, ES */ mov $0x10, %eax @@ -132,6 +139,8 @@ xprot: movw %ax, %ss movw %ax, %es + POSTCODE(REAL_TO_PROT_EXIT); + ret /* @@ -141,22 +150,26 @@ xprot: Entry(startprog) push %ebp - mov %esp, %ebp + movl %esp, %ebp - mov 0x8(%ebp), %ecx /* entry offset */ - mov $0x28, %ebx /* segment */ + POSTCODE(STARTPROG_ENTRY); + + movl 0x8(%ebp), %ecx /* entry offset */ + movl $0x28, %ebx /* segment */ push %ebx push %ecx /* set up %ds and %es */ - mov $0x20, %ebx + movl $0x20, %ebx movw %bx, %ds movw %bx, %es + POSTCODE(STARTPROG_EXIT); + lret - . = MP_GDT-MP_BOOT /* GDT location */ + . = MP_BOOTGDT-MP_BOOT /* GDT location */ Entry(Gdt) /* Segment Descriptor @@ -176,23 +189,23 @@ Entry(Gdt) .byte 0,0,0,0 .word 0xffff,MP_BOOT /* 0x8 : boot code */ - .byte 0,0x9e,0x40,0 + .byte 0,0x9e,0xcf,0 .word 0xffff,MP_BOOT /* 0x10 : boot data */ - .byte 0,0x92,0x40,0 + .byte 0,0x92,0xcf,0 .word 0xffff,MP_BOOT /* 0x18 : boot code, 16 bits */ .byte 0,0x9e,0x0,0 .word 0xffff,0 /* 0x20 : init data */ - .byte 0,0x92,0xcf,0 + .byte 0,0x93,0xcf,0 .word 0xffff,0 /* 0x28 : init code */ - .byte 0,0x9e,0xcf,0 + .byte 0,0x9f,0xcf,0 Entry(gdtr) .short 48 /* limit (8*6 segs) */ - .short MP_GDT /* base low */ + .short MP_BOOTGDT /* base low */ .short 0 /* base high */ Entry(slave_boot_end) diff --git a/osfmk/i386/mtrr.c b/osfmk/i386/mtrr.c new file mode 100644 index 000000000..fe718c39d --- /dev/null +++ b/osfmk/i386/mtrr.c @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct mtrr_var_range { + uint64_t base; /* in IA32_MTRR_PHYSBASE format */ + uint64_t mask; /* in IA32_MTRR_PHYSMASK format */ + uint32_t refcnt; /* var ranges reference count */ +}; + +struct mtrr_fix_range { + uint64_t types; /* fixed-range type octet */ +}; + +typedef struct mtrr_var_range mtrr_var_range_t; +typedef struct mtrr_fix_range mtrr_fix_range_t; + +static struct { + uint64_t MTRRcap; + uint64_t MTRRdefType; + mtrr_var_range_t * var_range; + unsigned int var_count; + mtrr_fix_range_t fix_range[11]; +} mtrr_state; + +static boolean_t mtrr_initialized = FALSE; + +decl_simple_lock_data(static, mtrr_lock); +#define MTRR_LOCK() simple_lock(&mtrr_lock); +#define MTRR_UNLOCK() simple_unlock(&mtrr_lock); + +#if MTRR_DEBUG +#define DBG(x...) kprintf(x) +#else +#define DBG(x...) +#endif + +/* Private functions */ +static void mtrr_get_var_ranges(mtrr_var_range_t * range, int count); +static void mtrr_set_var_ranges(const mtrr_var_range_t * range, int count); +static void mtrr_get_fix_ranges(mtrr_fix_range_t * range); +static void mtrr_set_fix_ranges(const mtrr_fix_range_t * range); +static void mtrr_update_setup(void * param); +static void mtrr_update_teardown(void * param); +static void mtrr_update_action(void * param); +static void var_range_encode(mtrr_var_range_t * range, addr64_t address, + uint64_t length, uint32_t type, int valid); +static int var_range_overlap(mtrr_var_range_t * range, addr64_t address, + uint64_t length, uint32_t type); + +#define CACHE_CONTROL_MTRR (NULL) +#define CACHE_CONTROL_PAT ((void *)1) + +/* + * MTRR MSR bit fields. + */ +#define IA32_MTRR_DEF_TYPE_MT 0x000000ff +#define IA32_MTRR_DEF_TYPE_FE 0x00000400 +#define IA32_MTRR_DEF_TYPE_E 0x00000800 + +#define IA32_MTRRCAP_VCNT 0x000000ff +#define IA32_MTRRCAP_FIX 0x00000100 +#define IA32_MTRRCAP_WC 0x00000400 + +/* 0 < bits <= 64 */ +#define PHYS_BITS_TO_MASK(bits) \ + ((((1ULL << (bits-1)) - 1) << 1) | 1) + +/* + * Default mask for 36 physical address bits, this can + * change depending on the cpu model. + */ +static uint64_t mtrr_phys_mask = PHYS_BITS_TO_MASK(36); + +#define IA32_MTRR_PHYMASK_VALID 0x0000000000000800ULL +#define IA32_MTRR_PHYSBASE_MASK (mtrr_phys_mask & ~0xFFF) +#define IA32_MTRR_PHYSBASE_TYPE 0x00000000000000FFULL + +/* + * Variable-range mask to/from length conversions. + */ +#define MASK_TO_LEN(mask) \ + ((~((mask) & IA32_MTRR_PHYSBASE_MASK) & mtrr_phys_mask) + 1) + +#define LEN_TO_MASK(len) \ + (~((len) - 1) & IA32_MTRR_PHYSBASE_MASK) + +#define LSB(x) ((x) & (~((x) - 1))) + +/* + * Fetch variable-range MTRR register pairs. + */ +static void +mtrr_get_var_ranges(mtrr_var_range_t * range, int count) +{ + int i; + + for (i = 0; i < count; i++) { + range[i].base = rdmsr64(MSR_IA32_MTRR_PHYSBASE(i)); + range[i].mask = rdmsr64(MSR_IA32_MTRR_PHYSMASK(i)); + + /* bump ref count for firmware configured ranges */ + if (range[i].mask & IA32_MTRR_PHYMASK_VALID) + range[i].refcnt = 1; + else + range[i].refcnt = 0; + } +} + +/* + * Update variable-range MTRR register pairs. + */ +static void +mtrr_set_var_ranges(const mtrr_var_range_t * range, int count) +{ + int i; + + for (i = 0; i < count; i++) { + wrmsr64(MSR_IA32_MTRR_PHYSBASE(i), range[i].base); + wrmsr64(MSR_IA32_MTRR_PHYSMASK(i), range[i].mask); + } +} + +/* + * Fetch all fixed-range MTRR's. Note MSR offsets are not consecutive. + */ +static void +mtrr_get_fix_ranges(mtrr_fix_range_t * range) +{ + int i; + + /* assume 11 fix range registers */ + range[0].types = rdmsr64(MSR_IA32_MTRR_FIX64K_00000); + range[1].types = rdmsr64(MSR_IA32_MTRR_FIX16K_80000); + range[2].types = rdmsr64(MSR_IA32_MTRR_FIX16K_A0000); + for (i = 0; i < 8; i++) + range[3 + i].types = rdmsr64(MSR_IA32_MTRR_FIX4K_C0000 + i); +} + +/* + * Update all fixed-range MTRR's. + */ +static void +mtrr_set_fix_ranges(const struct mtrr_fix_range * range) +{ + int i; + + /* assume 11 fix range registers */ + wrmsr64(MSR_IA32_MTRR_FIX64K_00000, range[0].types); + wrmsr64(MSR_IA32_MTRR_FIX16K_80000, range[1].types); + wrmsr64(MSR_IA32_MTRR_FIX16K_A0000, range[2].types); + for (i = 0; i < 8; i++) + wrmsr64(MSR_IA32_MTRR_FIX4K_C0000 + i, range[3 + i].types); +} + +#if MTRR_DEBUG +static void +mtrr_msr_dump(void) +{ + int i; + int count = rdmsr64(MSR_IA32_MTRRCAP) & IA32_MTRRCAP_VCNT; + + DBG("VAR -- BASE -------------- MASK -------------- SIZE\n"); + for (i = 0; i < count; i++) { + DBG(" %02x 0x%016llx 0x%016llx 0x%llx\n", i, + rdmsr64(MSR_IA32_MTRR_PHYSBASE(i)), + rdmsr64(MSR_IA32_MTRR_PHYSMASK(i)), + MASK_TO_LEN(rdmsr64(MSR_IA32_MTRR_PHYSMASK(i)))); + } + DBG("\n"); + + DBG("FIX64K_00000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX64K_00000)); + DBG("FIX16K_80000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX16K_80000)); + DBG("FIX16K_A0000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX16K_A0000)); + DBG(" FIX4K_C0000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_C0000)); + DBG(" FIX4K_C8000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_C8000)); + DBG(" FIX4K_D0000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_D0000)); + DBG(" FIX4K_D8000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_D8000)); + DBG(" FIX4K_E0000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_E0000)); + DBG(" FIX4K_E8000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_E8000)); + DBG(" FIX4K_F0000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_F0000)); + DBG(" FIX4K_F8000: 0x%016llx\n", rdmsr64(MSR_IA32_MTRR_FIX4K_F8000)); + + DBG("\nMTRRcap = 0x%llx MTRRdefType = 0x%llx\n", + rdmsr64(MSR_IA32_MTRRCAP), rdmsr64(MSR_IA32_MTRR_DEF_TYPE)); +} +#endif /* MTRR_DEBUG */ + +/* + * Called by the boot processor (BP) early during boot to initialize MTRR + * support. The MTRR state on the BP is saved, any additional processors + * will have the same settings applied to ensure MTRR consistency. + */ +void +mtrr_init(void) +{ + i386_cpu_info_t * infop = cpuid_info(); + + /* no reason to init more than once */ + if (mtrr_initialized == TRUE) + return; + + /* check for presence of MTRR feature on the processor */ + if ((cpuid_features() & CPUID_FEATURE_MTRR) == 0) + return; /* no MTRR feature */ + + /* cpu vendor/model specific handling */ + if (!strncmp(infop->cpuid_vendor, CPUID_VID_AMD, sizeof(CPUID_VID_AMD))) + { + /* Check for AMD Athlon 64 and Opteron */ + if (cpuid_family() == 0xF) + { + uint32_t cpuid_result[4]; + + /* check if cpu support Address Sizes function */ + do_cpuid(0x80000000, cpuid_result); + if (cpuid_result[0] >= 0x80000008) + { + int bits; + + do_cpuid(0x80000008, cpuid_result); + DBG("MTRR: AMD 8000_0008 EAX = %08x\n", + cpuid_result[0]); + + /* + * Function 8000_0008 (Address Sizes) EAX + * Bits 7-0 : phys address size + * Bits 15-8 : virt address size + */ + bits = cpuid_result[0] & 0xFF; + if ((bits < 36) || (bits > 64)) + { + printf("MTRR: bad address size\n"); + return; /* bogus size */ + } + + mtrr_phys_mask = PHYS_BITS_TO_MASK(bits); + } + } + } + + /* use a lock to serialize MTRR changes */ + bzero((void *)&mtrr_state, sizeof(mtrr_state)); + simple_lock_init(&mtrr_lock, 0); + + mtrr_state.MTRRcap = rdmsr64(MSR_IA32_MTRRCAP); + mtrr_state.MTRRdefType = rdmsr64(MSR_IA32_MTRR_DEF_TYPE); + mtrr_state.var_count = mtrr_state.MTRRcap & IA32_MTRRCAP_VCNT; + + /* allocate storage for variable ranges (can block?) */ + if (mtrr_state.var_count) { + mtrr_state.var_range = (mtrr_var_range_t *) + kalloc(sizeof(mtrr_var_range_t) * + mtrr_state.var_count); + if (mtrr_state.var_range == NULL) + mtrr_state.var_count = 0; + } + + /* fetch the initial firmware configured variable ranges */ + if (mtrr_state.var_count) + mtrr_get_var_ranges(mtrr_state.var_range, + mtrr_state.var_count); + + /* fetch the initial firmware configured fixed ranges */ + if (mtrr_state.MTRRcap & IA32_MTRRCAP_FIX) + mtrr_get_fix_ranges(mtrr_state.fix_range); + + mtrr_initialized = TRUE; + +#if MTRR_DEBUG + mtrr_msr_dump(); /* dump firmware settings */ +#endif +} + +/* + * Performs the Intel recommended procedure for changing the MTRR + * in a MP system. Leverage rendezvous mechanism for the required + * barrier synchronization among all processors. This function is + * called from the rendezvous IPI handler, and mtrr_update_cpu(). + */ +static void +mtrr_update_action(void * cache_control_type) +{ + uint32_t cr0, cr4; + uint32_t tmp; + + cr0 = get_cr0(); + cr4 = get_cr4(); + + /* enter no-fill cache mode */ + tmp = cr0 | CR0_CD; + tmp &= ~CR0_NW; + set_cr0(tmp); + + /* flush caches */ + wbinvd(); + + /* clear the PGE flag in CR4 */ + if (cr4 & CR4_PGE) + set_cr4(cr4 & ~CR4_PGE); + + /* flush TLBs */ + flush_tlb(); + + if (CACHE_CONTROL_PAT == cache_control_type) { + /* Change PA6 attribute field to WC */ + uint64_t pat = rdmsr64(MSR_IA32_CR_PAT); + DBG("CPU%d PAT: was 0x%016llx\n", get_cpu_number(), pat); + pat &= ~(0x0FULL << 48); + pat |= (0x01ULL << 48); + wrmsr64(MSR_IA32_CR_PAT, pat); + DBG("CPU%d PAT: is 0x%016llx\n", + get_cpu_number(), rdmsr64(MSR_IA32_CR_PAT)); + } + else { + /* disable all MTRR ranges */ + wrmsr64(MSR_IA32_MTRR_DEF_TYPE, + mtrr_state.MTRRdefType & ~IA32_MTRR_DEF_TYPE_E); + + /* apply MTRR settings */ + if (mtrr_state.var_count) + mtrr_set_var_ranges(mtrr_state.var_range, + mtrr_state.var_count); + + if (mtrr_state.MTRRcap & IA32_MTRRCAP_FIX) + mtrr_set_fix_ranges(mtrr_state.fix_range); + + /* enable all MTRR range registers (what if E was not set?) */ + wrmsr64(MSR_IA32_MTRR_DEF_TYPE, + mtrr_state.MTRRdefType | IA32_MTRR_DEF_TYPE_E); + } + + /* flush all caches and TLBs a second time */ + wbinvd(); + flush_tlb(); + + /* restore normal cache mode */ + set_cr0(cr0); + + /* restore PGE flag */ + if (cr4 & CR4_PGE) + set_cr4(cr4); + + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +static void +mtrr_update_setup(__unused void * param_not_used) +{ + /* disable interrupts before the first barrier */ + current_cpu_datap()->cpu_iflag = ml_set_interrupts_enabled(FALSE); + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +static void +mtrr_update_teardown(__unused void * param_not_used) +{ + /* restore interrupt flag following MTRR changes */ + ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag); + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); +} + +/* + * Update MTRR settings on all processors. + */ +kern_return_t +mtrr_update_all_cpus(void) +{ + if (mtrr_initialized == FALSE) + return KERN_NOT_SUPPORTED; + + MTRR_LOCK(); + mp_rendezvous(mtrr_update_setup, + mtrr_update_action, + mtrr_update_teardown, NULL); + MTRR_UNLOCK(); + + return KERN_SUCCESS; +} + +/* + * Update a single CPU with the current MTRR settings. Can be called + * during slave processor initialization to mirror the MTRR settings + * discovered on the boot processor by mtrr_init(). + */ +kern_return_t +mtrr_update_cpu(void) +{ + if (mtrr_initialized == FALSE) + return KERN_NOT_SUPPORTED; + + MTRR_LOCK(); + mtrr_update_setup(NULL); + mtrr_update_action(NULL); + mtrr_update_teardown(NULL); + MTRR_UNLOCK(); + + return KERN_SUCCESS; +} + +/* + * Add a MTRR range to associate the physical memory range specified + * with a given memory caching type. + */ +kern_return_t +mtrr_range_add(addr64_t address, uint64_t length, uint32_t type) +{ + mtrr_var_range_t * vr; + mtrr_var_range_t * free_range; + kern_return_t ret = KERN_NO_SPACE; + int overlap; + unsigned int i; + + DBG("mtrr_range_add base = 0x%llx, size = 0x%llx, type = %d\n", + address, length, type); + + if (mtrr_initialized == FALSE) { + return KERN_NOT_SUPPORTED; + } + + /* check memory type (GPF exception for undefined types) */ + if ((type != MTRR_TYPE_UNCACHEABLE) && + (type != MTRR_TYPE_WRITECOMBINE) && + (type != MTRR_TYPE_WRITETHROUGH) && + (type != MTRR_TYPE_WRITEPROTECT) && + (type != MTRR_TYPE_WRITEBACK)) { + return KERN_INVALID_ARGUMENT; + } + + /* check WC support if requested */ + if ((type == MTRR_TYPE_WRITECOMBINE) && + (mtrr_state.MTRRcap & IA32_MTRRCAP_WC) == 0) { + return KERN_NOT_SUPPORTED; + } + + /* leave the fix range area below 1MB alone */ + if (address < 0x100000 || mtrr_state.var_count == 0) { + return KERN_NOT_SUPPORTED; + } + + /* + * Length must be a power of 2 given by 2^n, where n >= 12. + * Base address alignment must be larger than or equal to length. + */ + if ((length < 0x1000) || + (LSB(length) != length) || + (address && (length > LSB(address)))) { + return KERN_INVALID_ARGUMENT; + } + + MTRR_LOCK(); + + /* + * Check for overlap and locate a free range. + */ + for (i = 0, free_range = NULL; i < mtrr_state.var_count; i++) + { + vr = &mtrr_state.var_range[i]; + + if (vr->refcnt == 0) { + /* free range candidate if no overlaps are found */ + free_range = vr; + continue; + } + + overlap = var_range_overlap(vr, address, length, type); + if (overlap > 0) { + /* + * identical overlap permitted, increment ref count. + * no hardware update required. + */ + free_range = vr; + break; + } + if (overlap < 0) { + /* unsupported overlapping of memory types */ + free_range = NULL; + break; + } + } + + if (free_range) { + if (free_range->refcnt++ == 0) { + var_range_encode(free_range, address, length, type, 1); + mp_rendezvous(mtrr_update_setup, + mtrr_update_action, + mtrr_update_teardown, NULL); + } + ret = KERN_SUCCESS; + } + +#if MTRR_DEBUG + mtrr_msr_dump(); +#endif + + MTRR_UNLOCK(); + + return ret; +} + +/* + * Remove a previously added MTRR range. The same arguments used for adding + * the memory range must be supplied again. + */ +kern_return_t +mtrr_range_remove(addr64_t address, uint64_t length, uint32_t type) +{ + mtrr_var_range_t * vr; + int result = KERN_FAILURE; + int cpu_update = 0; + unsigned int i; + + DBG("mtrr_range_remove base = 0x%llx, size = 0x%llx, type = %d\n", + address, length, type); + + if (mtrr_initialized == FALSE) { + return KERN_NOT_SUPPORTED; + } + + MTRR_LOCK(); + + for (i = 0; i < mtrr_state.var_count; i++) { + vr = &mtrr_state.var_range[i]; + + if (vr->refcnt && + var_range_overlap(vr, address, length, type) > 0) { + /* found specified variable range */ + if (--mtrr_state.var_range[i].refcnt == 0) { + var_range_encode(vr, address, length, type, 0); + cpu_update = 1; + } + result = KERN_SUCCESS; + break; + } + } + + if (cpu_update) { + mp_rendezvous(mtrr_update_setup, + mtrr_update_action, + mtrr_update_teardown, NULL); + result = KERN_SUCCESS; + } + +#if MTRR_DEBUG + mtrr_msr_dump(); +#endif + + MTRR_UNLOCK(); + + return result; +} + +/* + * Variable range helper routines + */ +static void +var_range_encode(mtrr_var_range_t * range, addr64_t address, + uint64_t length, uint32_t type, int valid) +{ + range->base = (address & IA32_MTRR_PHYSBASE_MASK) | + (type & IA32_MTRR_PHYSBASE_TYPE); + + range->mask = LEN_TO_MASK(length) | + (valid ? IA32_MTRR_PHYMASK_VALID : 0); +} + +static int +var_range_overlap(mtrr_var_range_t * range, addr64_t address, + uint64_t length, uint32_t type) +{ + uint64_t v_address, v_length; + uint32_t v_type; + int result = 0; /* no overlap, or overlap ok */ + + v_address = range->base & IA32_MTRR_PHYSBASE_MASK; + v_type = range->base & IA32_MTRR_PHYSBASE_TYPE; + v_length = MASK_TO_LEN(range->mask); + + /* detect range overlap */ + if ((v_address >= address && v_address < (address + length)) || + (address >= v_address && address < (v_address + v_length))) { + + if (v_address == address && v_length == length && v_type == type) + result = 1; /* identical overlap ok */ + else if ( v_type == MTRR_TYPE_UNCACHEABLE && + type == MTRR_TYPE_UNCACHEABLE ) { + /* UC ranges can overlap */ + } + else if ((v_type == MTRR_TYPE_UNCACHEABLE && + type == MTRR_TYPE_WRITEBACK) || + (v_type == MTRR_TYPE_WRITEBACK && + type == MTRR_TYPE_UNCACHEABLE)) { + /* UC/WB can overlap - effective type becomes UC */ + } + else { + /* anything else may cause undefined behavior */ + result = -1; + } + } + + return result; +} + +/* + * Initialize PAT (Page Attribute Table) + */ +void +pat_init(void) +{ + if (cpuid_features() & CPUID_FEATURE_PAT) + { + boolean_t istate = ml_set_interrupts_enabled(FALSE); + mtrr_update_action(CACHE_CONTROL_PAT); + ml_set_interrupts_enabled(istate); + } +} diff --git a/bsd/ppc/cpu.h b/osfmk/i386/mtrr.h similarity index 51% rename from bsd/ppc/cpu.h rename to osfmk/i386/mtrr.h index 5aa43232f..7222200b8 100644 --- a/bsd/ppc/cpu.h +++ b/osfmk/i386/mtrr.h @@ -1,6 +1,5 @@ /* - * Copyright (c) 1993 NeXT Computer, Inc. All rights reserved. - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,19 +19,46 @@ * * @APPLE_LICENSE_HEADER_END@ */ + +#ifndef _I386_MTRR_H_ +#define _I386_MTRR_H_ + /* - * HISTORY - * + * Memory type range register (MTRR) support. */ - -#ifndef _BSD_PPC_CPU_H_ -#define _BSD_PPC_CPU_H_ +#include #include +#include + +#ifdef __APPLE_API_PRIVATE + +enum { + MTRR_TYPE_UNCACHEABLE = 0, + MTRR_TYPE_WRITECOMBINE = 1, + MTRR_TYPE_WRITETHROUGH = 4, + MTRR_TYPE_WRITEPROTECT = 5, + MTRR_TYPE_WRITEBACK = 6 +}; + +__BEGIN_DECLS + +extern void mtrr_init(void); +extern kern_return_t mtrr_update_cpu(void); +extern kern_return_t mtrr_update_all_cpus(void); + +extern kern_return_t mtrr_range_add( addr64_t phys_addr, + uint64_t length, + uint32_t mem_type); + +extern kern_return_t mtrr_range_remove( addr64_t phys_addr, + uint64_t length, + uint32_t mem_type); + +extern void pat_init(void); -#ifdef __APPLE_API_OBSOLETE -#define cpu_number() (0) -#endif /* __APPLE_API_OBSOLETE */ +__END_DECLS -#endif /* _BSD_PPC_CPU_H_ */ +#endif /* __APPLE_API_PRIVATE */ +#endif /* !_I386_MTRR_H_ */ diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index 764da53e3..ca1a170f4 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,7 +48,6 @@ * the rights to redistribute these changes. */ -#include #include #include #include @@ -59,19 +58,27 @@ #include #include +#include +#include + #include +#include #include +#include +#include +#include #include #include -#include -#include #include #include #include #include +#include #include #include +#include #include +#include #include #include @@ -81,15 +88,15 @@ #include #include #include +#include +#include -vm_offset_t active_stacks[NCPUS]; -vm_offset_t kernel_stack[NCPUS]; -thread_act_t active_kloaded[NCPUS]; /* * Maps state flavor to number of words in the state: */ -unsigned int state_count[] = { +__private_extern__ +unsigned int _MachineStateCount[] = { /* FLAVOR_LIST */ 0, i386_NEW_THREAD_STATE_COUNT, i386_FLOAT_STATE_COUNT, @@ -103,14 +110,18 @@ unsigned int state_count[] = { /* Forward */ -void act_machine_throughcall(thread_act_t thr_act); +void act_machine_throughcall(thread_t thr_act); +user_addr_t get_useraddr(void); +void act_machine_return(int); +void act_machine_sv_free(thread_t, int); + extern thread_t Switch_context( - thread_t old, - void (*cont)(void), - thread_t new); + thread_t old, + thread_continue_t cont, + thread_t new); extern void Thread_continue(void); extern void Load_context( - thread_t thread); + thread_t thread); /* * consider_machine_collect: @@ -118,80 +129,32 @@ extern void Load_context( * Try to collect machine-dependent pages */ void -consider_machine_collect() -{ -} - -void -consider_machine_adjust() +consider_machine_collect(void) { } - -/* - * machine_kernel_stack_init: - * - * Initialize a kernel stack which has already been - * attached to its thread_activation. - */ - void -machine_kernel_stack_init( - thread_t thread, - void (*start_pos)(thread_t)) +consider_machine_adjust(void) { - thread_act_t thr_act = thread->top_act; - vm_offset_t stack; - - assert(thr_act); - stack = thread->kernel_stack; - assert(stack); - - /* - * We want to run at start_pos, giving it as an argument - * the return value from Load_context/Switch_context. - * Thread_continue takes care of the mismatch between - * the argument-passing/return-value conventions. - * This function will not return normally, - * so we don`t have to worry about a return address. - */ - STACK_IKS(stack)->k_eip = (int) Thread_continue; - STACK_IKS(stack)->k_ebx = (int) start_pos; - STACK_IKS(stack)->k_esp = (int) STACK_IEL(stack); - - /* - * Point top of kernel stack to user`s registers. - */ - STACK_IEL(stack)->saved_state = &thr_act->mact.pcb->iss; } -#if NCPUS > 1 -#define curr_gdt(mycpu) (mp_gdt[mycpu]) -#define curr_ldt(mycpu) (mp_ldt[mycpu]) -#define curr_ktss(mycpu) (mp_ktss[mycpu]) -#else -#define curr_gdt(mycpu) (gdt) -#define curr_ldt(mycpu) (ldt) -#define curr_ktss(mycpu) (&ktss) -#endif - -#define gdt_desc_p(mycpu,sel) \ - ((struct real_descriptor *)&curr_gdt(mycpu)[sel_idx(sel)]) +// DEBUG +int DEBUG_kldt = 0; +int DEBUG_uldt = 0; -void -act_machine_switch_pcb( thread_act_t new_act ) +static void +act_machine_switch_pcb( thread_t new ) { - pcb_t pcb = new_act->mact.pcb; + pcb_t pcb = new->machine.pcb; int mycpu; register iopb_tss_t tss = pcb->ims.io_tss; vm_offset_t pcb_stack_top; - register user_ldt_t ldt = pcb->ims.ldt; + register user_ldt_t uldt = pcb->ims.ldt; - assert(new_act->thread != NULL); - assert(new_act->thread->kernel_stack != 0); - STACK_IEL(new_act->thread->kernel_stack)->saved_state = - &new_act->mact.pcb->iss; + assert(new->kernel_stack != 0); + STACK_IEL(new->kernel_stack)->saved_state = + &new->machine.pcb->iss; /* * Save a pointer to the top of the "kernel" stack - @@ -214,39 +177,46 @@ act_machine_switch_pcb( thread_act_t new_act ) * No per-thread IO permissions. * Use standard kernel TSS. */ - if (!(gdt_desc_p(mycpu,KERNEL_TSS)->access & ACC_TSS_BUSY)) + if (!(gdt_desc_p(KERNEL_TSS)->access & ACC_TSS_BUSY)) set_tr(KERNEL_TSS); - curr_ktss(mycpu)->esp0 = pcb_stack_top; + current_ktss()->esp0 = pcb_stack_top; } else { /* * Set the IO permissions. Use this thread`s TSS. */ - *gdt_desc_p(mycpu,USER_TSS) + *gdt_desc_p(USER_TSS) = *(struct real_descriptor *)tss->iopb_desc; tss->tss.esp0 = pcb_stack_top; set_tr(USER_TSS); - gdt_desc_p(mycpu,KERNEL_TSS)->access &= ~ ACC_TSS_BUSY; + gdt_desc_p(KERNEL_TSS)->access &= ~ ACC_TSS_BUSY; } /* - * Set the thread`s LDT. + * Set the thread`s LDT or LDT entry. */ - if (ldt == 0) { + if (uldt == 0) { struct real_descriptor *ldtp; /* * Use system LDT. */ - ldtp = (struct real_descriptor *)curr_ldt(mycpu); + // Set up the tasks specific ldt entries if extant + ldtp = (struct real_descriptor *)current_ldt(); ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; + if (pcb->uldt_selector != 0) + ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; set_ldt(KERNEL_LDT); } else { /* - * Thread has its own LDT. + * Thread has its own LDT. // THIS SHOULD BE REMOVED!!!! */ - *gdt_desc_p(mycpu,USER_LDT) = ldt->desc; + *gdt_desc_p(USER_LDT) = uldt->desc; set_ldt(USER_LDT); + /*debug*/ + if ((DEBUG_uldt++ % 0x7fff) == 0) + printf("KERNEL----> setting user ldt"); + } mp_enable_preemption(); @@ -264,41 +234,8 @@ void machine_load_context( thread_t new) { - act_machine_switch_pcb(new->top_act); - Load_context(new); -} - -/* - * Number of times we needed to swap an activation back in before - * switching to it. - */ -int switch_act_swapins = 0; - -/* - * machine_switch_act - * - * Machine-dependent details of activation switching. Called with - * RPC locks held and preemption disabled. - */ -void -machine_switch_act( - thread_t thread, - thread_act_t old, - thread_act_t new) -{ - int cpu = cpu_number(); - - /* - * Switch the vm, ast and pcb context. - * Save FP registers if in use and set TS (task switch) bit. - */ - fpu_save_context(thread); - - active_stacks[cpu] = thread->kernel_stack; - ast_context(new, cpu); - - PMAP_SWITCH_CONTEXT(old, new, cpu); act_machine_switch_pcb(new); + Load_context(new); } /* @@ -308,17 +245,13 @@ machine_switch_act( */ thread_t machine_switch_context( - thread_t old, - void (*continuation)(void), - thread_t new) + thread_t old, + thread_continue_t continuation, + thread_t new) { - register thread_act_t old_act = old->top_act, - new_act = new->top_act; - #if MACH_RT - assert(active_stacks[cpu_number()] == old_act->thread->kernel_stack); + assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack); #endif - check_simple_locks(); /* * Save FP registers if in use. @@ -332,13 +265,13 @@ machine_switch_context( { int mycpu = cpu_number(); - PMAP_SWITCH_CONTEXT(old_act, new_act, mycpu) + PMAP_SWITCH_CONTEXT(old, new, mycpu) } /* * Load the rest of the user state for the new thread */ - act_machine_switch_pcb(new_act); + act_machine_switch_pcb(new); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, (int)old, (int)new, old->sched_pri, new->sched_pri, 0); old->continuation = NULL; @@ -351,21 +284,34 @@ machine_switch_context( * user level savearea(s) too, else don't */ void -act_machine_sv_free(thread_act_t act, int flag) +act_machine_sv_free(__unused thread_t act, __unused int flag) { } + +/* + * This is where registers that are not normally specified by the mach-o + * file on an execve would be nullified, perhaps to avoid a covert channel. + */ +kern_return_t +machine_thread_state_initialize( + thread_t thread) +{ +#pragma unused (thread) + + return KERN_SUCCESS; +} + + /* * act_machine_set_state: * - * Set the status of the specified thread. Called with "appropriate" - * thread-related locks held (see act_lock_thread()), so - * thr_act->thread is guaranteed not to change. + * Set the status of the specified thread. */ kern_return_t machine_thread_set_state( - thread_act_t thr_act, + thread_t thr_act, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count) @@ -401,6 +347,16 @@ machine_thread_set_state( state = (struct i386_saved_state *) tstate; + /* Check segment selectors are safe */ + if (!kernel_act && + !valid_user_segment_selectors(state->cs, + state->ss, + state->ds, + state->es, + state->fs, + state->gs)) + return KERN_INVALID_ARGUMENT; + saved_state = USER_REGS(thr_act); /* @@ -443,11 +399,11 @@ machine_thread_set_state( saved_state->fs = 0; saved_state->gs = 0; - if (thr_act->mact.pcb->ims.v86s.int_table) { + if (thr_act->machine.pcb->ims.v86s.int_table) { /* * Hardware assist on. */ - thr_act->mact.pcb->ims.v86s.flags = + thr_act->machine.pcb->ims.v86s.flags = state->efl & (EFL_TF | EFL_IF); } } @@ -461,7 +417,7 @@ machine_thread_set_state( saved_state->ds = KERNEL_DS; saved_state->es = KERNEL_DS; saved_state->fs = KERNEL_DS; - saved_state->gs = CPU_DATA; + saved_state->gs = CPU_DATA_GS; } else { /* @@ -490,6 +446,8 @@ machine_thread_set_state( return(KERN_INVALID_ARGUMENT); } + state = (struct i386_new_thread_state *) tstate; + if (flavor == i386_REGS_SEGS_STATE) { /* * Code and stack selectors must not be null, @@ -504,13 +462,15 @@ machine_thread_set_state( state->gs &= 0xffff; if (!kernel_act && - (state->cs == 0 || (state->cs & SEL_PL) != SEL_PL_U - || state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U)) + !valid_user_segment_selectors(state->cs, + state->ss, + state->ds, + state->es, + state->fs, + state->gs)) return KERN_INVALID_ARGUMENT; } - state = (struct i386_new_thread_state *) tstate; - saved_state = USER_REGS(thr_act); /* @@ -553,11 +513,11 @@ machine_thread_set_state( saved_state->fs = 0; saved_state->gs = 0; - if (thr_act->mact.pcb->ims.v86s.int_table) { + if (thr_act->machine.pcb->ims.v86s.int_table) { /* * Hardware assist on. */ - thr_act->mact.pcb->ims.v86s.flags = + thr_act->machine.pcb->ims.v86s.flags = state->efl & (EFL_TF | EFL_IF); } } @@ -571,7 +531,7 @@ machine_thread_set_state( saved_state->ds = KERNEL_DS; saved_state->es = KERNEL_DS; saved_state->fs = KERNEL_DS; - saved_state->gs = CPU_DATA; + saved_state->gs = CPU_DATA_GS; } else { /* @@ -591,7 +551,6 @@ machine_thread_set_state( } case i386_FLOAT_STATE: { - struct i386_float_state *state = (struct i386_float_state*)tstate; if (count < i386_old_FLOAT_STATE_COUNT) return(KERN_INVALID_ARGUMENT); if (count < i386_FLOAT_STATE_COUNT) @@ -603,9 +562,6 @@ machine_thread_set_state( * Temporary - replace by i386_io_map */ case i386_ISA_PORT_MAP_STATE: { - register struct i386_isa_port_map_state *state; - register iopb_tss_t tss; - if (count < i386_ISA_PORT_MAP_STATE_COUNT) return(KERN_INVALID_ARGUMENT); @@ -631,10 +587,10 @@ machine_thread_set_state( > VM_MAX_ADDRESS) return KERN_INVALID_ARGUMENT; - thr_act->mact.pcb->ims.v86s.int_table = int_table; - thr_act->mact.pcb->ims.v86s.int_count = int_count; + thr_act->machine.pcb->ims.v86s.int_table = int_table; + thr_act->machine.pcb->ims.v86s.int_count = int_count; - thr_act->mact.pcb->ims.v86s.flags = + thr_act->machine.pcb->ims.v86s.flags = USER_REGS(thr_act)->efl & (EFL_TF | EFL_IF); break; } @@ -682,7 +638,7 @@ machine_thread_set_state( kern_return_t machine_thread_get_state( - thread_act_t thr_act, + thread_t thr_act, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count) @@ -714,11 +670,11 @@ machine_thread_get_state( state->fs = saved_state->v86_segs.v86_fs & 0xffff; state->gs = saved_state->v86_segs.v86_gs & 0xffff; - if (thr_act->mact.pcb->ims.v86s.int_table) { + if (thr_act->machine.pcb->ims.v86s.int_table) { /* * Hardware assist on */ - if ((thr_act->mact.pcb->ims.v86s.flags & + if ((thr_act->machine.pcb->ims.v86s.flags & (EFL_IF|V86_IF_PENDING)) == 0) state->efl &= ~EFL_IF; } @@ -773,11 +729,11 @@ machine_thread_get_state( state->fs = saved_state->v86_segs.v86_fs & 0xffff; state->gs = saved_state->v86_segs.v86_gs & 0xffff; - if (thr_act->mact.pcb->ims.v86s.int_table) { + if (thr_act->machine.pcb->ims.v86s.int_table) { /* * Hardware assist on */ - if ((thr_act->mact.pcb->ims.v86s.flags & + if ((thr_act->machine.pcb->ims.v86s.flags & (EFL_IF|V86_IF_PENDING)) == 0) state->efl &= ~EFL_IF; } @@ -822,8 +778,6 @@ machine_thread_get_state( break; case i386_FLOAT_STATE: { - struct i386_float_state *state = (struct i386_float_state*)tstate; - if (*count < i386_old_FLOAT_STATE_COUNT) return(KERN_INVALID_ARGUMENT); if (*count< i386_FLOAT_STATE_COUNT) { @@ -846,10 +800,10 @@ machine_thread_get_state( return(KERN_INVALID_ARGUMENT); state = (struct i386_isa_port_map_state *) tstate; - tss = thr_act->mact.pcb->ims.io_tss; + tss = thr_act->machine.pcb->ims.io_tss; if (tss == 0) { - int i; + unsigned int i; /* * The thread has no ktss, so no IO permissions. @@ -879,8 +833,8 @@ machine_thread_get_state( return KERN_INVALID_ARGUMENT; state = (struct i386_v86_assist_state *) tstate; - state->int_table = thr_act->mact.pcb->ims.v86s.int_table; - state->int_count = thr_act->mact.pcb->ims.v86s.int_count; + state->int_table = thr_act->machine.pcb->ims.v86s.int_table; + state->int_count = thr_act->machine.pcb->ims.v86s.int_count; *count = i386_V86_ASSIST_STATE_COUNT; break; @@ -925,13 +879,13 @@ machine_thread_get_state( kern_return_t machine_thread_create( thread_t thread, - task_t task) + __unused task_t task) { - pcb_t pcb = &thread->mact.xxx_pcb; + pcb_t pcb = &thread->machine.xxx_pcb; - thread->mact.pcb = pcb; + thread->machine.pcb = pcb; - simple_lock_init(&pcb->lock, ETAP_MISC_PCB); + simple_lock_init(&pcb->lock, 0); /* * Guarantee that the bootstrapped thread will be in user @@ -945,23 +899,17 @@ machine_thread_create( pcb->iss.gs = USER_DS; pcb->iss.efl = EFL_USER_SET; { - extern struct fake_descriptor ldt[]; struct real_descriptor *ldtp; ldtp = (struct real_descriptor *)ldt; pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_selector = 0; } /* - * Allocate a kernel stack per shuttle + * Allocate a kernel stack per thread. */ - thread->kernel_stack = (int)stack_alloc(thread, thread_continue); - thread->state &= ~TH_STACK_HANDOFF; - assert(thread->kernel_stack != 0); - - /* - * Point top of kernel stack to user`s registers. - */ - STACK_IEL(thread->kernel_stack)->saved_state = &pcb->iss; + stack_alloc(thread); return(KERN_SUCCESS); } @@ -973,17 +921,17 @@ void machine_thread_destroy( thread_t thread) { - register pcb_t pcb = thread->mact.pcb; + register pcb_t pcb = thread->machine.pcb; assert(pcb); - + if (pcb->ims.io_tss != 0) iopb_destroy(pcb->ims.io_tss); if (pcb->ims.ifps != 0) - fp_free(pcb->ims.ifps); + fpu_free(pcb->ims.ifps); if (pcb->ims.ldt != 0) user_ldt_free(pcb->ims.ldt); - thread->mact.pcb = (pcb_t)0; + thread->machine.pcb = (pcb_t)0; } /* @@ -991,15 +939,12 @@ machine_thread_destroy( * when starting up a new processor */ void -machine_thread_set_current( thread_t thread ) +machine_set_current_thread( thread_t thread ) { - register int my_cpu; - mp_disable_preemption(); - my_cpu = cpu_number(); - cpu_data[my_cpu].active_thread = thread->top_act; - active_kloaded[my_cpu] = THR_ACT_NULL; + current_cpu_datap()->cpu_active_thread = thread; + current_cpu_datap()->cpu_active_kloaded = THREAD_NULL; mp_enable_preemption(); } @@ -1012,8 +957,6 @@ machine_thread_terminate_self(void) void act_machine_return(int code) { - thread_act_t thr_act = current_act(); - /* * This code is called with nothing locked. * It also returns with nothing locked, if it returns. @@ -1024,17 +967,12 @@ act_machine_return(int code) * activation) is terminated. */ assert( code == KERN_TERMINATED ); - assert( thr_act ); - - /* This is the only activation attached to the shuttle... */ - /* terminate the entire thread (shuttle plus activation) */ - assert(thr_act->thread->top_act == thr_act); thread_terminate_self(); /*NOTREACHED*/ - panic("act_machine_return: TALKING ZOMBIE! (1)"); + panic("act_machine_return(%d): TALKING ZOMBIE! (1)", code); } @@ -1051,11 +989,12 @@ machine_thread_init(void) /* * Some routines for debugging activation code */ -static void dump_handlers(thread_act_t); -void dump_regs(thread_act_t); +static void dump_handlers(thread_t); +void dump_regs(thread_t); +int dump_act(thread_t thr_act); static void -dump_handlers(thread_act_t thr_act) +dump_handlers(thread_t thr_act) { ReturnHandler *rhp = thr_act->handlers; int counter = 0; @@ -1079,9 +1018,9 @@ dump_handlers(thread_act_t thr_act) } void -dump_regs(thread_act_t thr_act) +dump_regs(thread_t thr_act) { - if (thr_act->mact.pcb) { + if (thr_act->machine.pcb) { register struct i386_saved_state *ssp = USER_REGS(thr_act); /* Print out user register state */ printf("\tRegs:\tedi=%x esi=%x ebp=%x ebx=%x edx=%x\n", @@ -1093,24 +1032,22 @@ dump_regs(thread_act_t thr_act) } int -dump_act(thread_act_t thr_act) +dump_act(thread_t thr_act) { if (!thr_act) return(0); - printf("thr_act(0x%x)(%d): thread=%x(%d) task=%x(%d)\n", + printf("thread(0x%x)(%d): task=%x(%d)\n", thr_act, thr_act->ref_count, - thr_act->thread, thr_act->thread ? thr_act->thread->ref_count:0, thr_act->task, thr_act->task ? thr_act->task->ref_count : 0); printf("\tsusp=%d user_stop=%d active=%x ast=%x\n", thr_act->suspend_count, thr_act->user_stop_count, thr_act->active, thr_act->ast); - printf("\thi=%x lo=%x\n", thr_act->higher, thr_act->lower); - printf("\tpcb=%x\n", thr_act->mact.pcb); + printf("\tpcb=%x\n", thr_act->machine.pcb); - if (thr_act->thread && thr_act->thread->kernel_stack) { - vm_offset_t stack = thr_act->thread->kernel_stack; + if (thr_act->kernel_stack) { + vm_offset_t stack = thr_act->kernel_stack; printf("\tk_stk %x eip %x ebx %x esp %x iss %x\n", stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, @@ -1121,26 +1058,20 @@ dump_act(thread_act_t thr_act) dump_regs(thr_act); return((int)thr_act); } -unsigned int -get_useraddr() + +user_addr_t +get_useraddr(void) { - thread_act_t thr_act = current_act(); + thread_t thr_act = current_thread(); - if (thr_act->mact.pcb) - return(thr_act->mact.pcb->iss.eip); + if (thr_act->machine.pcb) + return(thr_act->machine.pcb->iss.eip); else return(0); } -void -thread_swapin_mach_alloc(thread_t thread) -{ - - /* 386 does not have saveareas */ - -} /* * detach and return a kernel stack from a thread */ @@ -1165,26 +1096,25 @@ machine_stack_detach(thread_t thread) */ void -machine_stack_attach(thread_t thread, - vm_offset_t stack, - void (*start_pos)(thread_t)) +machine_stack_attach( + thread_t thread, + vm_offset_t stack) { struct i386_kernel_state *statep; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH), thread, thread->priority, - thread->sched_pri, continuation, - 0); + thread->sched_pri, 0, 0); assert(stack); statep = STACK_IKS(stack); thread->kernel_stack = stack; statep->k_eip = (unsigned long) Thread_continue; - statep->k_ebx = (unsigned long) start_pos; + statep->k_ebx = (unsigned long) thread_continue; statep->k_esp = (unsigned long) STACK_IEL(stack); - STACK_IEL(stack)->saved_state = &thread->mact.pcb->iss; + STACK_IEL(stack)->saved_state = &thread->machine.pcb->iss; return; } @@ -1197,28 +1127,26 @@ void machine_stack_handoff(thread_t old, thread_t new) { - vm_offset_t stack; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF), thread, thread->priority, - thread->sched_pri, continuation, - 0); + thread->sched_pri, 0, 0); - assert(new->top_act); - assert(old->top_act); + assert(new); + assert(old); stack = machine_stack_detach(old); - machine_stack_attach(new, stack, 0); + machine_stack_attach(new, stack); - PMAP_SWITCH_CONTEXT(old->top_act->task, new->top_act->task, cpu_number()); + PMAP_SWITCH_CONTEXT(old->task, new->task, cpu_number()); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF) | DBG_FUNC_NONE, (int)old, (int)new, old->sched_pri, new->sched_pri, 0); - machine_thread_set_current(new); + machine_set_current_thread(new); - active_stacks[cpu_number()] = new->kernel_stack; + current_cpu_datap()->cpu_active_stack = new->kernel_stack; return; } @@ -1241,21 +1169,21 @@ int val; return((void *)0); val = i386_SAVED_STATE_COUNT; - kret = machine_thread_get_state(current_act(), + kret = machine_thread_get_state(current_thread(), i386_SAVED_STATE, (thread_state_t) &ic->ss, &val); if (kret != KERN_SUCCESS) { - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); + kfree(ic,sizeof(struct i386_act_context)); return((void *)0); } val = i386_FLOAT_STATE_COUNT; - kret = machine_thread_get_state(current_act(), + kret = machine_thread_get_state(current_thread(), i386_FLOAT_STATE, (thread_state_t) &ic->fs, &val); if (kret != KERN_SUCCESS) { - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); + kfree(ic,sizeof(struct i386_act_context)); return((void *)0); } return(ic); @@ -1265,32 +1193,31 @@ act_thread_catt(void *ctx) { struct i386_act_context *ic; kern_return_t kret; -int val; ic = (struct i386_act_context *)ctx; if (ic == (struct i386_act_context *)NULL) return; - kret = machine_thread_set_state(current_act(), + kret = machine_thread_set_state(current_thread(), i386_SAVED_STATE, (thread_state_t) &ic->ss, i386_SAVED_STATE_COUNT); if (kret != KERN_SUCCESS) goto out; - kret = machine_thread_set_state(current_act(), + kret = machine_thread_set_state(current_thread(), i386_FLOAT_STATE, (thread_state_t) &ic->fs, i386_FLOAT_STATE_COUNT); if (kret != KERN_SUCCESS) goto out; out: - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); + kfree(ic,sizeof(struct i386_act_context)); } void act_thread_cfree(void *ctx) { - kfree((vm_offset_t)ctx,sizeof(struct i386_act_context)); + kfree(ctx,sizeof(struct i386_act_context)); } diff --git a/osfmk/i386/perfmon.c b/osfmk/i386/perfmon.c new file mode 100644 index 000000000..c23f7d831 --- /dev/null +++ b/osfmk/i386/perfmon.c @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(x...) kprintf(x) +#else +#define DBG(x...) +#endif + +/* + * Table of ESCRs and addresses associated with performance counters/CCCRs. + * See Intel SDM Vol 3, Table 15-4 (section 15.9): + */ +static uint16_t pmc_escr_addr_table[18][8] = { + [MSR_BPU_COUNTER0] { + [MSR_BSU_ESCR0] 0x3a0, + [MSR_FSB_ESCR0] 0x3a2, + [MSR_MOB_ESCR0] 0x3aa, + [MSR_PMH_ESCR0] 0x3ac, + [MSR_BPU_ESCR0] 0x3b2, + [MSR_IS_ESCR0] 0x3b4, + [MSR_ITLB_ESCR0] 0x3b6, + [MSR_IX_ESCR0] 0x3c8, + }, + [MSR_BPU_COUNTER1] { + [MSR_BSU_ESCR0] 0x3a0, + [MSR_FSB_ESCR0] 0x3a2, + [MSR_MOB_ESCR0] 0x3aa, + [MSR_PMH_ESCR0] 0x3ac, + [MSR_BPU_ESCR0] 0x3b2, + [MSR_IS_ESCR0] 0x3b4, + [MSR_ITLB_ESCR0] 0x3b6, + [MSR_IX_ESCR0] 0x3c8, + }, + [MSR_BPU_COUNTER2] { + [MSR_BSU_ESCR1] 0x3a1, + [MSR_FSB_ESCR1] 0x3a3, + [MSR_MOB_ESCR1] 0x3ab, + [MSR_PMH_ESCR1] 0x3ad, + [MSR_BPU_ESCR1] 0x3b3, + [MSR_IS_ESCR1] 0x3b5, + [MSR_ITLB_ESCR1] 0x3b7, + [MSR_IX_ESCR1] 0x3c9, + }, + [MSR_BPU_COUNTER3] { + [MSR_BSU_ESCR1] 0x3a1, + [MSR_FSB_ESCR1] 0x3a3, + [MSR_MOB_ESCR1] 0x3ab, + [MSR_PMH_ESCR1] 0x3ad, + [MSR_BPU_ESCR1] 0x3b3, + [MSR_IS_ESCR1] 0x3b5, + [MSR_ITLB_ESCR1] 0x3b7, + [MSR_IX_ESCR1] 0x3c9, + }, + [MSR_MS_COUNTER0] { + [MSR_MS_ESCR1] 0x3c1, + [MSR_TBPU_ESCR1] 0x3c3, + [MSR_TC_ESCR1] 0x3c5, + }, + [MSR_MS_COUNTER1] { + [MSR_MS_ESCR1] 0x3c1, + [MSR_TBPU_ESCR1] 0x3c3, + [MSR_TC_ESCR1] 0x3c5, + }, + [MSR_MS_COUNTER2] { + [MSR_MS_ESCR1] 0x3c1, + [MSR_TBPU_ESCR1] 0x3c3, + [MSR_TC_ESCR1] 0x3c5, + }, + [MSR_MS_COUNTER3] { + [MSR_MS_ESCR1] 0x3c1, + [MSR_TBPU_ESCR1] 0x3c3, + [MSR_TC_ESCR1] 0x3c5, + }, + [MSR_FLAME_COUNTER0] { + [MSR_FIRM_ESCR0] 0x3a4, + [MSR_FLAME_ESCR0] 0x3a6, + [MSR_DAC_ESCR0] 0x3a8, + [MSR_SAT_ESCR0] 0x3ae, + [MSR_U2L_ESCR0] 0x3b0, + }, + [MSR_FLAME_COUNTER1] { + [MSR_FIRM_ESCR0] 0x3a4, + [MSR_FLAME_ESCR0] 0x3a6, + [MSR_DAC_ESCR0] 0x3a8, + [MSR_SAT_ESCR0] 0x3ae, + [MSR_U2L_ESCR0] 0x3b0, + }, + [MSR_FLAME_COUNTER2] { + [MSR_FIRM_ESCR1] 0x3a5, + [MSR_FLAME_ESCR1] 0x3a7, + [MSR_DAC_ESCR1] 0x3a9, + [MSR_SAT_ESCR1] 0x3af, + [MSR_U2L_ESCR1] 0x3b1, + }, + [MSR_FLAME_COUNTER3] { + [MSR_FIRM_ESCR1] 0x3a5, + [MSR_FLAME_ESCR1] 0x3a7, + [MSR_DAC_ESCR1] 0x3a9, + [MSR_SAT_ESCR1] 0x3af, + [MSR_U2L_ESCR1] 0x3b1, + }, + [MSR_IQ_COUNTER0] { + [MSR_CRU_ESCR0] 0x3b8, + [MSR_CRU_ESCR2] 0x3cc, + [MSR_CRU_ESCR4] 0x3e0, + [MSR_IQ_ESCR0] 0x3ba, + [MSR_RAT_ESCR0] 0x3bc, + [MSR_SSU_ESCR0] 0x3be, + [MSR_AFL_ESCR0] 0x3ca, + }, + [MSR_IQ_COUNTER1] { + [MSR_CRU_ESCR0] 0x3b8, + [MSR_CRU_ESCR2] 0x3cc, + [MSR_CRU_ESCR4] 0x3e0, + [MSR_IQ_ESCR0] 0x3ba, + [MSR_RAT_ESCR0] 0x3bc, + [MSR_SSU_ESCR0] 0x3be, + [MSR_AFL_ESCR0] 0x3ca, + }, + [MSR_IQ_COUNTER2] { + [MSR_CRU_ESCR1] 0x3b9, + [MSR_CRU_ESCR3] 0x3cd, + [MSR_CRU_ESCR5] 0x3e1, + [MSR_IQ_ESCR1] 0x3bb, + [MSR_RAT_ESCR1] 0x3bd, + [MSR_AFL_ESCR1] 0x3cb, + }, + [MSR_IQ_COUNTER3] { + [MSR_CRU_ESCR1] 0x3b9, + [MSR_CRU_ESCR3] 0x3cd, + [MSR_CRU_ESCR5] 0x3e1, + [MSR_IQ_ESCR1] 0x3bb, + [MSR_RAT_ESCR1] 0x3bd, + [MSR_AFL_ESCR1] 0x3cb, + }, + [MSR_IQ_COUNTER4] { + [MSR_CRU_ESCR0] 0x3b8, + [MSR_CRU_ESCR2] 0x3cc, + [MSR_CRU_ESCR4] 0x3e0, + [MSR_IQ_ESCR0] 0x3ba, + [MSR_RAT_ESCR0] 0x3bc, + [MSR_SSU_ESCR0] 0x3be, + [MSR_AFL_ESCR0] 0x3ca, + }, + [MSR_IQ_COUNTER5] { + [MSR_CRU_ESCR1] 0x3b9, + [MSR_CRU_ESCR3] 0x3cd, + [MSR_CRU_ESCR5] 0x3e1, + [MSR_IQ_ESCR1] 0x3bb, + [MSR_RAT_ESCR1] 0x3bd, + [MSR_AFL_ESCR1] 0x3cb, + }, +}; +#define PMC_ESCR_ADDR(id,esid) pmc_escr_addr_table[id][esid] + +typedef struct { + pmc_id_t id_max; /* Maximum counter id */ + pmc_machine_t machine_type; /* P6 or P4/Xeon */ + uint32_t msr_counter_base; /* First counter MSR */ + uint32_t msr_control_base; /* First control MSR */ + boolean_t reserved[18]; /* Max-sized arrays... */ + pmc_ovf_func_t *ovf_func[18]; +#ifdef DEBUG + pmc_cccr_t cccr_shadow[18]; /* Last cccr values set */ + pmc_counter_t counter_shadow[18]; /* Last counter values set */ + uint32_t ovfs_unexpected[18]; /* Count of unexpected intrs */ +#endif +} pmc_table_t; + +static pmc_machine_t +_pmc_machine_type(void) +{ + i386_cpu_info_t *infop = cpuid_info(); + + if (strncmp(infop->cpuid_vendor, CPUID_VID_INTEL, sizeof(CPUID_VID_INTEL)) != 0) + return pmc_none; + + if (!pmc_is_available()) + return pmc_none; + + switch (infop->cpuid_family) { + case 0x6: + return pmc_P6; + case 0xf: + return pmc_P4_Xeon; + default: + return pmc_unknown; + } +} + +static void +pmc_p4_intr(void *state) +{ + pmc_table_t *pmc_table = (pmc_table_t *) cpu_core()->pmc; + uint32_t cccr_addr; + pmc_cccr_t cccr; + pmc_id_t id; + int my_logical_cpu = cpu_to_logical_cpu(cpu_number()); + + /* + * Scan through table for reserved counters with overflow and + * with a registered overflow function. + */ + for (id = 0; id <= pmc_table->id_max; id++) { + if (!pmc_table->reserved[id]) + continue; + cccr_addr = pmc_table->msr_control_base + id; + cccr.u_u64 = rdmsr64(cccr_addr); +#ifdef DEBUG + pmc_table->cccr_shadow[id] = cccr; + *((uint64_t *) &pmc_table->counter_shadow[id]) = + rdmsr64(pmc_table->msr_counter_base + id); +#endif + if (cccr.u_htt.ovf == 0) + continue; + if ((cccr.u_htt.ovf_pmi_t0 == 1 && my_logical_cpu == 0) || + (cccr.u_htt.ovf_pmi_t1 == 1 && my_logical_cpu == 1)) { + if (pmc_table->ovf_func[id]) { + (*pmc_table->ovf_func[id])(id, state); + /* func expected to clear overflow */ + continue; + } + } + /* Clear overflow for unexpected interrupt */ +#ifdef DEBUG + pmc_table->ovfs_unexpected[id]++; +#endif + } +} + +static void +pmc_p6_intr(void *state) +{ + pmc_table_t *pmc_table = (pmc_table_t *) cpu_core()->pmc; + pmc_id_t id; + + /* + * Can't determine which counter has overflow + * so call all registered functions. + */ + for (id = 0; id <= pmc_table->id_max; id++) + if (pmc_table->reserved[id] && pmc_table->ovf_func[id]) + (*pmc_table->ovf_func[id])(id, state); +} + +int +pmc_init(void) +{ + int ret; + cpu_core_t *my_core; + pmc_table_t *pmc_table; + pmc_machine_t pmc_type; + + my_core = cpu_core(); + assert(my_core); + + pmc_type = _pmc_machine_type(); + if (pmc_type == pmc_none) { + return KERN_FAILURE; + } + + pmc_table = (pmc_table_t *) my_core->pmc; + if (pmc_table == NULL) { + ret = kmem_alloc(kernel_map, + (void *) &pmc_table, sizeof(pmc_table_t)); + if (ret != KERN_SUCCESS) + panic("pmc_init() kmem_alloc returned %d\n", ret); + bzero((void *)pmc_table, sizeof(pmc_table_t)); + + pmc_table->machine_type = pmc_type; + switch (pmc_type) { + case pmc_P4_Xeon: + pmc_table->id_max = 17; + pmc_table->msr_counter_base = MSR_COUNTER_ADDR(0); + pmc_table->msr_control_base = MSR_CCCR_ADDR(0); + lapic_set_pmi_func(&pmc_p4_intr); + break; + case pmc_P6: + pmc_table->id_max = 1; + pmc_table->msr_counter_base = MSR_P6_COUNTER_ADDR(0); + pmc_table->msr_control_base = MSR_P6_PES_ADDR(0); + lapic_set_pmi_func(&pmc_p6_intr); + break; + default: + break; + } + if (!atomic_cmpxchg((uint32_t *) &my_core->pmc, + 0, (uint32_t) pmc_table)) { + kmem_free(kernel_map, + (vm_offset_t) pmc_table, sizeof(pmc_table_t)); + } + } + DBG("pmc_init() done for cpu %d my_core->pmc=0x%x type=%d\n", + cpu_number(), my_core->pmc, pmc_type); + + return KERN_SUCCESS; +} + +static inline pmc_table_t * +pmc_table_valid(pmc_id_t id) +{ + cpu_core_t *my_core = cpu_core(); + pmc_table_t *pmc_table; + + assert(my_core); + + pmc_table = (pmc_table_t *) my_core->pmc; + return (pmc_table == NULL || + id > pmc_table->id_max || + !pmc_table->reserved[id]) ? NULL : pmc_table; +} + +int +pmc_machine_type(pmc_machine_t *type) +{ + cpu_core_t *my_core = cpu_core(); + pmc_table_t *pmc_table; + + assert(my_core); + + pmc_table = (pmc_table_t *) my_core->pmc; + if (pmc_table == NULL) + return KERN_FAILURE; + + *type = pmc_table->machine_type; + + return KERN_SUCCESS; +} + +int +pmc_reserve(pmc_id_t id) +{ + cpu_core_t *my_core = cpu_core(); + pmc_table_t *pmc_table; + + assert(my_core); + + pmc_table = (pmc_table_t *) my_core->pmc; + if (pmc_table == NULL) + return KERN_FAILURE; + if (id > pmc_table->id_max) + return KERN_INVALID_ARGUMENT; + if (pmc_table->reserved[id]) + return KERN_FAILURE; + + pmc_table->reserved[id] = TRUE; + + return KERN_SUCCESS; +} + +boolean_t +pmc_is_reserved(pmc_id_t id) +{ + return pmc_table_valid(id) != NULL; +} + +int +pmc_free(pmc_id_t id) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + pmc_cccr_write(id, 0x0ULL); + pmc_table->reserved[id] = FALSE; + pmc_table->ovf_func[id] = NULL; + + return KERN_SUCCESS; +} + +int +pmc_counter_read(pmc_id_t id, pmc_counter_t *val) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + *(uint64_t *)val = rdmsr64(pmc_table->msr_counter_base + id); + + return KERN_SUCCESS; +} + +int +pmc_counter_write(pmc_id_t id, pmc_counter_t *val) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + wrmsr64(pmc_table->msr_counter_base + id, *(uint64_t *)val); + + return KERN_SUCCESS; +} + +int +pmc_cccr_read(pmc_id_t id, pmc_cccr_t *cccr) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + if (pmc_table->machine_type != pmc_P4_Xeon) + return KERN_FAILURE; + + *(uint64_t *)cccr = rdmsr64(pmc_table->msr_control_base + id); + + return KERN_SUCCESS; +} + +int +pmc_cccr_write(pmc_id_t id, pmc_cccr_t *cccr) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + if (pmc_table->machine_type != pmc_P4_Xeon) + return KERN_FAILURE; + + wrmsr64(pmc_table->msr_control_base + id, *(uint64_t *)cccr); + + return KERN_SUCCESS; +} + +int +pmc_evtsel_read(pmc_id_t id, pmc_evtsel_t *evtsel) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + if (pmc_table->machine_type != pmc_P6) + return KERN_FAILURE; + + *(uint64_t *)evtsel = rdmsr64(pmc_table->msr_control_base + id); + + return KERN_SUCCESS; +} + +int +pmc_evtsel_write(pmc_id_t id, pmc_evtsel_t *evtsel) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + if (pmc_table->machine_type != pmc_P4_Xeon) + return KERN_FAILURE; + + wrmsr64(pmc_table->msr_control_base + id, *(uint64_t *)evtsel); + + return KERN_SUCCESS; +} + +int +pmc_escr_read(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr) +{ + uint32_t addr; + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + if (pmc_table->machine_type != pmc_P4_Xeon) + return KERN_FAILURE; + + if (esid > PMC_ESID_MAX) + return KERN_INVALID_ARGUMENT; + + addr = PMC_ESCR_ADDR(id, esid); + if (addr == 0) + return KERN_INVALID_ARGUMENT; + + *(uint64_t *)escr = rdmsr64(addr); + + return KERN_SUCCESS; +} + +int +pmc_escr_write(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr) +{ + uint32_t addr; + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_FAILURE; + + if (pmc_table->machine_type != pmc_P4_Xeon) + return KERN_FAILURE; + + if (esid > PMC_ESID_MAX) + return KERN_INVALID_ARGUMENT; + + addr = PMC_ESCR_ADDR(id, esid); + if (addr == 0) + return KERN_INVALID_ARGUMENT; + + wrmsr64(addr, *(uint64_t *)escr); + + return KERN_SUCCESS; +} + +int +pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t func) +{ + pmc_table_t *pmc_table = pmc_table_valid(id); + + if (pmc_table == NULL) + return KERN_INVALID_ARGUMENT; + + pmc_table->ovf_func[id] = func; + + return KERN_SUCCESS; +} diff --git a/osfmk/i386/perfmon.h b/osfmk/i386/perfmon.h new file mode 100644 index 000000000..c8eae2a3f --- /dev/null +++ b/osfmk/i386/perfmon.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _I386_PERFMON_H_ +#define _I386_PERFMON_H_ + +#include + +/* + * Handy macros for bit/bitfield definition and manipulations: + */ +#define bit(n) (1ULL << (n)) +#define field(n,m) ((bit((m)+1)-1) & ~(bit(n)-1)) +#define field_nbit(fld) (ffs(fld)-1) +#define field_select(fld,x) ((x) & (fld)) +#define field_clear(fld,x) ((x) & ~(fld)) +#define field_unshift(fld,x) ((x) >> field_nbit(fld)) +#define field_shift(fld,x) ((x) << field_nbit(fld)) +#define field_get(fld,x) (field_unshift(fld,field_select(fld,x))) +#define field_set(fld,x,val) (field_clear(fld,x) | field_shift(fld,val)) + +#define PERFMON_AVAILABLE bit(7) +#define BTS_UNAVAILABLE bit(11) + +static inline boolean_t +pmc_is_available(void) +{ + uint32_t lo; + uint32_t hi; + int ret; + + ret = rdmsr_carefully(MSR_IA32_MISC_ENABLE, &lo, &hi); + + return (ret == 0) && ((lo & PERFMON_AVAILABLE) != 0); +} + +/* + * Counter layout: + */ +#define PMC_COUNTER_COUNTER field(0,39) +#define PMC_COUNTER_RESERVED field(40,64) +#define PMC_COUNTER_MAX ((uint64_t) PMC_COUNTER_COUNTER) +typedef struct { + uint64_t counter : 40; + uint64_t reserved : 24; +} pmc_counter_t; +#define PMC_COUNTER_ZERO { 0, 0 } + + +/* + * There are 2 basic flavors of PMCsL: P6 and P4/Xeon: + */ +typedef enum { + pmc_none, + pmc_P6, + pmc_P4_Xeon, + pmc_unknown +} pmc_machine_t; + +/* + * P6 MSRs... + */ +#define MSR_P6_COUNTER_ADDR(n) (0x0c1 + (n)) +#define MSR_P6_PES_ADDR(n) (0x186 + (n)) + +typedef struct { + uint64_t event_select : 8; + uint64_t umask : 8; + uint64_t usr : 1; + uint64_t os : 1; + uint64_t e : 1; + uint64_t pc : 1; + uint64_t apic_int : 1; + uint64_t reserved1 : 1; + uint64_t en : 1; + uint64_t inv : 1; + uint64_t cmask : 8; +} pmc_evtsel_t; +#define PMC_EVTSEL_ZERO ((pmc_evtsel_t){ 0,0,0,0,0,0,0,0,0,0,0 }) + +#define MSR_P6_PERFCTR0 0 +#define MSR_P6_PERFCTR1 1 + +/* + * P4/Xeon MSRs... + */ +#define MSR_COUNTER_ADDR(n) (0x300 + (n)) +#define MSR_CCCR_ADDR(n) (0x360 + (n)) + +typedef enum { + MSR_BPU_COUNTER0 = 0, + MSR_BPU_COUNTER1 = 1, + #define MSR_BSU_ESCR0 7 + #define MSR_FSB_ESCR0 6 + #define MSR_MOB_ESCR0 2 + #define MSR_PMH_ESCR0 4 + #define MSR_BPU_ESCR0 0 + #define MSR_IS_ESCR0 1 + #define MSR_ITLB_ESCR0 3 + #define MSR_IX_ESCR0 5 + MSR_BPU_COUNTER2 = 2, + MSR_BPU_COUNTER3 = 3, + #define MSR_BSU_ESCR1 7 + #define MSR_FSB_ESCR1 6 + #define MSR_MOB_ESCR1 2 + #define MSR_PMH_ESCR1 4 + #define MSR_BPU_ESCR1 0 + #define MSR_IS_ESCR1 1 + #define MSR_ITLB_ESCR1 3 + #define MSR_IX_ESCR1 5 + MSR_MS_COUNTER0 = 4, + MSR_MS_COUNTER1 = 5, + #define MSR_MS_ESCR0 0 + #define MSR_TBPU_ESCR0 2 + #define MSR_TC_ESCR0 1 + MSR_MS_COUNTER2 = 6, + MSR_MS_COUNTER3 = 7, + #define MSR_MS_ESCR1 0 + #define MSR_TBPU_ESCR1 2 + #define MSR_TC_ESCR1 1 + MSR_FLAME_COUNTER0 = 8, + MSR_FLAME_COUNTER1 = 9, + #define MSR_FIRM_ESCR0 1 + #define MSR_FLAME_ESCR0 0 + #define MSR_DAC_ESCR0 5 + #define MSR_SAT_ESCR0 2 + #define MSR_U2L_ESCR0 3 + MSR_FLAME_COUNTER2 = 10, + MSR_FLAME_COUNTER3 = 11, + #define MSR_FIRM_ESCR1 1 + #define MSR_FLAME_ESCR1 0 + #define MSR_DAC_ESCR1 5 + #define MSR_SAT_ESCR1 2 + #define MSR_U2L_ESCR1 3 + MSR_IQ_COUNTER0 = 12, + MSR_IQ_COUNTER1 = 13, + MSR_IQ_COUNTER4 = 16, + #define MSR_CRU_ESCR0 4 + #define MSR_CRU_ESCR2 5 + #define MSR_CRU_ESCR4 6 + #define MSR_IQ_ESCR0 0 + #define MSR_RAT_ESCR0 2 + #define MSR_SSU_ESCR0 3 + #define MSR_AFL_ESCR0 1 + MSR_IQ_COUNTER2 = 14, + MSR_IQ_COUNTER3 = 15, + MSR_IQ_COUNTER5 = 17, + #define MSR_CRU_ESCR1 4 + #define MSR_CRU_ESCR3 5 + #define MSR_CRU_ESCR5 6 + #define MSR_IQ_ESCR1 0 + #define MSR_RAT_ESCR1 2 + #define MSR_AFL_ESCR1 1 +} pmc_id_t; + +typedef int pmc_escr_id_t; +#define PMC_ESID_MAX 7 + +/* + * ESCR MSR layout: + */ +#define PMC_ECSR_NOHTT_RESERVED field(0,1) +#define PMC_ECSR_T0_USR bit(0) +#define PMC_ECSR_T0_OS bit(1) +#define PMC_ECSR_T1_USR bit(2) +#define PMC_ECSR_T1_OS bit(3) +#define PMC_ECSR_USR bit(2) +#define PMC_ECSR_OS bit(3) +#define PMC_ECSR_TAG_ENABLE bit(4) +#define PMC_ECSR_TAG_VALUE field(5,8) +#define PMC_ECSR_EVENT_MASK field(9,24) +#define PMC_ECSR_EVENT_SELECT field(25,30) +#define PMC_ECSR_RESERVED2 field(30,64) +typedef struct { + uint64_t reserved1 : 2; + uint64_t usr : 1; + uint64_t os : 1; + uint64_t tag_enable : 1; + uint64_t tag_value : 4; + uint64_t event_mask : 16; + uint64_t event_select : 6; + uint64_t reserved2 : 33; +} pmc_escr_nohtt_t; +typedef struct { + uint64_t t0_usr : 1; + uint64_t t0_os : 1; + uint64_t t1_usr : 1; + uint64_t t1_os : 1; + uint64_t tag_enable : 1; + uint64_t tag_value : 4; + uint64_t event_mask : 16; + uint64_t event_select : 6; + uint64_t reserved2 : 33; +} pmc_escr_htt_t; +typedef union { + pmc_escr_nohtt_t u_nohtt; + pmc_escr_htt_t u_htt; + uint64_t u_u64; +} pmc_escr_t; +#define PMC_ESCR_ZERO { .u_u64 = 0ULL } + +/* + * CCCR MSR layout: + */ +#define PMC_CCCR_RESERVED1 field(1,11) +#define PMC_CCCR_ENABLE bit(12) +#define PMC_CCCR_ECSR_SELECT field(13,15) +#define PMC_CCCR_RESERVED2 field(16,17) +#define PMC_CCCR_HTT_ACTIVE field(16,17) +#define PMC_CCCR_COMPARE bit(18) +#define PMC_CCCR_COMPLEMENT bit(19) +#define PMC_CCCR_THRESHOLD field(20,23) +#define PMC_CCCR_EDGE bit(24) +#define PMC_CCCR_FORCE_OVF bit(25) +#define PMC_CCCR_OVF_PMI bit(26) +#define PMC_CCCR_NOHTT_RESERVED2 field(27,29) +#define PMC_CCCR_OVF_PMI_T0 bit(26) +#define PMC_CCCR_OVF_PMI_T1 bit(27) +#define PMC_CCCR_HTT_RESERVED2 field(28,29) +#define PMC_CCCR_CASCADE bit(30) +#define PMC_CCCR_OVF bit(31) +typedef struct { + uint64_t reserved1 : 12; + uint64_t enable : 1; + uint64_t escr_select : 3; + uint64_t reserved2 : 2; + uint64_t compare : 1; + uint64_t complement : 1; + uint64_t threshold : 4; + uint64_t edge : 1; + uint64_t force_ovf : 1; + uint64_t ovf_pmi : 1; + uint64_t reserved3 : 3; + uint64_t cascade : 1; + uint64_t ovf : 1; + uint64_t reserved4 : 32; +} pmc_cccr_nohtt_t; +typedef struct { + uint64_t reserved1 : 12; + uint64_t enable : 1; + uint64_t escr_select : 3; + uint64_t active_thread : 2; + uint64_t compare : 1; + uint64_t complement : 1; + uint64_t threshold : 4; + uint64_t edge : 1; + uint64_t force_OVF : 1; + uint64_t ovf_pmi_t0 : 1; + uint64_t ovf_pmi_t1 : 1; + uint64_t reserved3 : 2; + uint64_t cascade : 1; + uint64_t ovf : 1; + uint64_t reserved4 : 32; +} pmc_cccr_htt_t; +typedef union { + pmc_cccr_nohtt_t u_nohtt; + pmc_cccr_htt_t u_htt; + uint64_t u_u64; +} pmc_cccr_t; +#define PMC_CCCR_ZERO { .u_u64 = 0ULL } + +typedef void (pmc_ovf_func_t)(pmc_id_t id, void *state); + +/* + * In-kernel PMC access primitives: + */ +/* Generic: */ +extern int pmc_init(void); +extern int pmc_machine_type(pmc_machine_t *type); +extern boolean_t pmc_is_reserved(pmc_id_t id); +extern int pmc_reserve(pmc_id_t id); +extern int pmc_free(pmc_id_t id); +extern int pmc_counter_read(pmc_id_t id, pmc_counter_t *val); +extern int pmc_counter_write(pmc_id_t id, pmc_counter_t *val); + +/* P6-specific: */ +extern int pmc_evtsel_read(pmc_id_t id, pmc_evtsel_t *evtsel); +extern int pmc_evtsel_write(pmc_id_t id, pmc_evtsel_t *evtsel); + +/* P4/Xeon-specific: */ +extern int pmc_cccr_read(pmc_id_t id, pmc_cccr_t *cccr); +extern int pmc_cccr_write(pmc_id_t id, pmc_cccr_t *cccr); +extern int pmc_escr_read(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); +extern int pmc_escr_write(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); +extern int pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t *func); + +#endif /* _I386_PERFMON_H_ */ diff --git a/osfmk/i386/phys.c b/osfmk/i386/phys.c index fe2374e3e..3676a4f95 100644 --- a/osfmk/i386/phys.c +++ b/osfmk/i386/phys.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -47,13 +47,41 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ -#include +#include +#include +#include + +#include + +#include +#include #include -#include -#include -#include + +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* * pmap_zero_page zeros the specified (machine independent) page. @@ -62,10 +90,8 @@ void pmap_zero_page( ppnum_t pn) { - vm_offset_t p; assert(pn != vm_page_fictitious_addr); - p = (vm_offset_t)i386_ptob(pn); - bzero((char *)phystokv(p), PAGE_SIZE); + bzero_phys((addr64_t)i386_ptob(pn), PAGE_SIZE); } /* @@ -80,26 +106,7 @@ pmap_zero_part_page( { assert(pn != vm_page_fictitious_addr); assert(offset + len <= PAGE_SIZE); - bzero((char *)phystokv(i386_ptob(pn)) + offset, len); -} - -/* - * pmap_copy_page copies the specified (machine independent) pages. - */ -void -pmap_copy_page( - ppnum_t psrc, - ppnum_t pdst) - -{ - vm_offset_t src,dst; - - assert(psrc != vm_page_fictitious_addr); - assert(pdst != vm_page_fictitious_addr); - src = (vm_offset_t)i386_ptob(psrc); - dst = (vm_offset_t)i386_ptob(pdst); - - memcpy((void *)phystokv(dst), (void *)phystokv(src), PAGE_SIZE); + bzero_phys((addr64_t)(i386_ptob(pn) + offset), len); } /* @@ -114,16 +121,15 @@ pmap_copy_part_page( vm_size_t len) { vm_offset_t src, dst; - assert(psrc != vm_page_fictitious_addr); assert(pdst != vm_page_fictitious_addr); src = (vm_offset_t)i386_ptob(psrc); dst = (vm_offset_t)i386_ptob(pdst); assert(((dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); assert(((src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); - - memcpy((void *)(phystokv(dst) + dst_offset), - (void *)(phystokv(src) + src_offset), len); + bcopy_phys((addr64_t)src + (src_offset & INTEL_OFFMASK), + (addr64_t)dst + (dst_offset & INTEL_OFFMASK), + len); } /* @@ -137,14 +143,21 @@ pmap_copy_part_lpage( vm_offset_t dst_offset, vm_size_t len) { - vm_offset_t dst; + pt_entry_t *ptep; + thread_t thr_act = current_thread(); - assert(src != vm_page_fictitious_addr); assert(pdst != vm_page_fictitious_addr); - dst = (vm_offset_t)i386_ptob(pdst); - assert(((dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); - - memcpy((void *)(phystokv(dst) + dst_offset), (void *)src, len); + ptep = pmap_pte(thr_act->map->pmap, i386_ptob(pdst)); + if (0 == ptep) + panic("pmap_copy_part_lpage ptep"); + assert(((pdst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); + if (*(pt_entry_t *) CM2) + panic("pmap_copy_part_lpage"); + *(int *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (*ptep & PG_FRAME) | + INTEL_PTE_REF | INTEL_PTE_MOD; + invlpg((unsigned int) CA2); + memcpy((void *) (CA2 + (dst_offset & INTEL_OFFMASK)), (void *) src, len); + *(pt_entry_t *) CM2 = 0; } /* @@ -158,14 +171,21 @@ pmap_copy_part_rpage( vm_offset_t dst, vm_size_t len) { - vm_offset_t src; + pt_entry_t *ptep; + thread_t thr_act = current_thread(); assert(psrc != vm_page_fictitious_addr); - assert(dst != vm_page_fictitious_addr); - src = (vm_offset_t)i386_ptob(psrc); - assert(((src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); - - memcpy((void *)dst, (void *)(phystokv(src) + src_offset), len); + ptep = pmap_pte(thr_act->map->pmap, i386_ptob(psrc)); + if (0 == ptep) + panic("pmap_copy_part_rpage ptep"); + assert(((psrc & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); + if (*(pt_entry_t *) CM2) + panic("pmap_copy_part_rpage"); + *(pt_entry_t *) CM2 = INTEL_PTE_VALID | INTEL_PTE_RW | (*ptep & PG_FRAME) | + INTEL_PTE_REF; + invlpg((unsigned int) CA2); + memcpy((void *) dst, (void *) (CA2 + (src_offset & INTEL_OFFMASK)), len); + *(pt_entry_t *) CM2 = 0; } /* @@ -177,9 +197,15 @@ vm_offset_t kvtophys( vm_offset_t addr) { - pt_entry_t *pte; - - if ((pte = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) - return 0; - return i386_trunc_page(*pte) | (addr & INTEL_OFFMASK); + pt_entry_t *ptep; + pmap_paddr_t pa; + + if ((ptep = pmap_pte(kernel_pmap, addr)) == PT_ENTRY_NULL) { + pa = 0; + } else { + pa = pte_to_pa(*ptep) | (addr & INTEL_OFFMASK); + } + if (0 == pa) + kprintf("kvtophys ret 0!\n"); + return (pa); } diff --git a/osfmk/i386/pio.h b/osfmk/i386/pio.h index 309714aaf..c8b3b4a19 100644 --- a/osfmk/i386/pio.h +++ b/osfmk/i386/pio.h @@ -51,7 +51,6 @@ */ #ifndef I386_PIO_H #define I386_PIO_H -#include #include typedef unsigned short i386_ioport_t; diff --git a/osfmk/i386/pit.h b/osfmk/i386/pit.h index ab14da5c1..1bfb05bf9 100644 --- a/osfmk/i386/pit.h +++ b/osfmk/i386/pit.h @@ -100,7 +100,7 @@ WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * Clock speed for the timer in hz divided by the constant HZ * (defined in param.h) */ -#define CLKNUM 1193167 +#define CLKNUM 1193182 /* formerly 1193167 */ #if EXL /* added micro-timer support. --- csy */ diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index e619388da..e43dac464 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -82,8 +82,6 @@ * and to when physical maps must be made correct. */ -#include - #include #include #include @@ -96,6 +94,7 @@ #include #include +#include #include #include @@ -113,8 +112,10 @@ #include #include +#include #include #include +#include #if MACH_KDB #include @@ -125,14 +126,14 @@ #include -#if NCPUS > 1 -#include -#endif +#include + +#include /* * Forward declarations for internal functions. */ -void pmap_expand( +void pmap_expand( pmap_t map, vm_offset_t v); @@ -142,30 +143,42 @@ extern void pmap_remove_range( pt_entry_t *spte, pt_entry_t *epte); -void phys_attribute_clear( - vm_offset_t phys, +void phys_attribute_clear( + ppnum_t phys, int bits); -boolean_t phys_attribute_test( - vm_offset_t phys, +boolean_t phys_attribute_test( + ppnum_t phys, int bits); -void pmap_set_modify(ppnum_t pn); - -void phys_attribute_set( - vm_offset_t phys, +void phys_attribute_set( + ppnum_t phys, int bits); +void pmap_growkernel( + vm_offset_t addr); + +void pmap_set_reference( + ppnum_t pn); + +void pmap_movepage( + unsigned long from, + unsigned long to, + vm_size_t size); + +pt_entry_t * pmap_mapgetpte( + vm_map_t map, + vm_offset_t v); + +boolean_t phys_page_exists( + ppnum_t pn); #ifndef set_dirbase -void set_dirbase(vm_offset_t dirbase); +void set_dirbase(vm_offset_t dirbase); #endif /* set_dirbase */ -#define PA_TO_PTE(pa) (pa_to_pte((pa) - VM_MIN_KERNEL_ADDRESS)) #define iswired(pte) ((pte) & INTEL_PTE_WIRED) -pmap_t real_pmap[NCPUS]; - #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry); #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry); @@ -199,11 +212,21 @@ pv_entry_t pv_head_table; /* array of entries, one per page */ */ pv_entry_t pv_free_list; /* free list at SPLVM */ decl_simple_lock_data(,pv_free_list_lock) +int pv_free_count = 0; +#define PV_LOW_WATER_MARK 5000 +#define PV_ALLOC_CHUNK 2000 +thread_call_t mapping_adjust_call; +static thread_call_data_t mapping_adjust_call_data; +int mappingrecurse = 0; #define PV_ALLOC(pv_e) { \ simple_lock(&pv_free_list_lock); \ if ((pv_e = pv_free_list) != 0) { \ pv_free_list = pv_e->next; \ + pv_free_count--; \ + if (pv_free_count < PV_LOW_WATER_MARK) \ + if (hw_compare_and_store(0,1,&mappingrecurse)) \ + thread_call_enter(mapping_adjust_call); \ } \ simple_unlock(&pv_free_list_lock); \ } @@ -212,11 +235,17 @@ decl_simple_lock_data(,pv_free_list_lock) simple_lock(&pv_free_list_lock); \ pv_e->next = pv_free_list; \ pv_free_list = pv_e; \ + pv_free_count++; \ simple_unlock(&pv_free_list_lock); \ } zone_t pv_list_zone; /* zone of pv_entry structures */ +#ifdef PAE +static zone_t pdpt_zone; +#endif + + /* * Each entry in the pv_head_table is locked by a bit in the * pv_lock_table. The lock bits are accessed by the physical @@ -231,16 +260,25 @@ char *pv_lock_table; /* pointer to array of bits */ * for. Initialized to zero so that pmap operations done before * pmap_init won't touch any non-existent structures. */ -vm_offset_t vm_first_phys = (vm_offset_t) 0; -vm_offset_t vm_last_phys = (vm_offset_t) 0; +pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0; +pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0; boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ +pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0; + +#define GROW_KERNEL_FUNCTION_IMPLEMENTED 1 +#if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */ +static struct vm_object kptobj_object_store; +static vm_object_t kptobj; +#endif + + /* * Index into pv_head table, its lock bits, and the modify/reference * bits starting at vm_first_phys. */ -#define pa_index(pa) (atop(pa - vm_first_phys)) +#define pa_index(pa) (i386_btop(pa - vm_first_phys)) #define pai_to_pvh(pai) (&pv_head_table[pai]) #define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table) @@ -265,12 +303,6 @@ char *pmap_phys_attributes; */ #define PDE_MAPPED_SIZE (pdetova(1)) -/* - * We allocate page table pages directly from the VM system - * through this object. It maps physical memory. - */ -vm_object_t pmap_object = VM_OBJECT_NULL; - /* * Locking and TLB invalidation */ @@ -308,7 +340,6 @@ vm_object_t pmap_object = VM_OBJECT_NULL; * kernel_pmap can only be held at splhigh. */ -#if NCPUS > 1 /* * We raise the interrupt level to splvm, to block interprocessor * interrupts during pmap operations. We must take the CPU out of @@ -366,15 +397,19 @@ lock_t pmap_system_lock; #if USLOCK_DEBUG extern int max_lock_loops; -#define LOOP_VAR int loop_count = 0 +extern int disableSerialOuput; +#define LOOP_VAR \ + unsigned int loop_count; \ + loop_count = disableSerialOuput ? max_lock_loops \ + : max_lock_loops*100 #define LOOP_CHECK(msg, pmap) \ - if (loop_count++ > max_lock_loops) { \ + if (--loop_count == 0) { \ mp_disable_preemption(); \ - kprintf("%s: cpu %d pmap %x, cpus_active %d\n", \ + kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \ msg, cpu_number(), pmap, cpus_active); \ Debugger("deadlock detection"); \ mp_enable_preemption(); \ - loop_count = 0; \ + loop_count = max_lock_loops; \ } #else /* USLOCK_DEBUG */ #define LOOP_VAR @@ -413,54 +448,17 @@ extern int max_lock_loops; mp_enable_preemption(); \ } -#else /* NCPUS > 1 */ - -#if MACH_RT -#define SPLVM(spl) { (spl) = splhigh(); } -#define SPLX(spl) splx (spl) -#else /* MACH_RT */ -#define SPLVM(spl) -#define SPLX(spl) -#endif /* MACH_RT */ - -#define PMAP_READ_LOCK(pmap, spl) SPLVM(spl) -#define PMAP_WRITE_LOCK(spl) SPLVM(spl) -#define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl) -#define PMAP_WRITE_UNLOCK(spl) SPLX(spl) -#define PMAP_WRITE_TO_READ_LOCK(pmap) - -#if MACH_RT -#define LOCK_PVH(index) disable_preemption() -#define UNLOCK_PVH(index) enable_preemption() -#else /* MACH_RT */ -#define LOCK_PVH(index) -#define UNLOCK_PVH(index) -#endif /* MACH_RT */ - -#define PMAP_FLUSH_TLBS() flush_tlb() -#define PMAP_RELOAD_TLBS() set_cr3(kernel_pmap->pdirbase) -#define PMAP_INVALIDATE_PAGE(map, saddr, eaddr) { \ - if (map == kernel_pmap) \ - invlpg((vm_offset_t) saddr); \ - else \ - flush_tlb(); \ -} - -#endif /* NCPUS > 1 */ - #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ #define INVALIDATE_TLB(m, s, e) { \ flush_tlb(); \ } -#if NCPUS > 1 /* * Structures to keep track of pending TLB invalidations */ cpu_set cpus_active; cpu_set cpus_idle; -volatile boolean_t cpu_update_needed[NCPUS]; #define UPDATE_LIST_SIZE 4 @@ -483,30 +481,31 @@ struct pmap_update_list { } ; typedef struct pmap_update_list *pmap_update_list_t; -struct pmap_update_list cpu_update_list[NCPUS]; - extern void signal_cpus( cpu_set use_list, pmap_t pmap, vm_offset_t start, vm_offset_t end); -#endif /* NCPUS > 1 */ +pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; /* * Other useful macros. */ -#define current_pmap() (vm_map_pmap(current_act()->map)) +#define current_pmap() (vm_map_pmap(current_thread()->map)) #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0) struct pmap kernel_pmap_store; pmap_t kernel_pmap; +#ifdef PMAP_QUEUE +decl_simple_lock_data(,free_pmap_lock) +#endif + struct zone *pmap_zone; /* zone of pmap structures */ int pmap_debug = 0; /* flag for debugging prints */ -int ptes_per_vm_page; /* number of hardware ptes needed - to map one VM page. */ + unsigned int inuse_ptepages_count = 0; /* debugging */ /* @@ -523,10 +522,11 @@ extern vm_offset_t hole_start, hole_end; extern char end; -/* - * Page directory for kernel. - */ -pt_entry_t *kpde = 0; /* set by start.s - keep out of bss */ +static int nkpt; + +pt_entry_t *DMAP1, *DMAP2; +caddr_t DADDR1; +caddr_t DADDR2; #if DEBUG_ALIAS #define PMAP_ALIAS_MAX 32 @@ -542,32 +542,40 @@ extern vm_offset_t get_rpc(); #endif /* DEBUG_ALIAS */ -/* - * Given an offset and a map, compute the address of the - * pte. If the address is invalid with respect to the map - * then PT_ENTRY_NULL is returned (and the map may need to grow). - * - * This is only used in machine-dependent code. - */ +#define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])) +#define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT]) -pt_entry_t * -pmap_pte( - register pmap_t pmap, - register vm_offset_t addr) +static __inline int +pmap_is_current(pmap_t pmap) { - register pt_entry_t *ptp; - register pt_entry_t pte; - - pte = pmap->dirbase[pdenum(pmap, addr)]; - if ((pte & INTEL_PTE_VALID) == 0) - return(PT_ENTRY_NULL); - ptp = (pt_entry_t *)ptetokv(pte); - return(&ptp[ptenum(addr)]); - + return (pmap == kernel_pmap || + (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); } -#define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(pmap, addr)]) +/* + * return address of mapped pte for vaddr va in pmap pmap. + */ +pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *pde; + pd_entry_t newpf; + + pde = pmap_pde(pmap, va); + if (*pde != 0) { + if (pmap_is_current(pmap)) + return( vtopte(va)); + newpf = *pde & PG_FRAME; + if (((*CM4) & PG_FRAME) != newpf) { + *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID; + invlpg((u_int)CA4); + } + return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1)); + } + return(0); +} + #define DEBUG_PTE_PAGE 0 #if DEBUG_PTE_PAGE @@ -591,7 +599,7 @@ ptep_check( if (pte->wired) ctw++; } - pte += ptes_per_vm_page; + pte++; } if (ctu != ptep->use_count || ctw != ptep->wired_count) { @@ -612,17 +620,18 @@ ptep_check( vm_offset_t pmap_map( register vm_offset_t virt, - register vm_offset_t start, - register vm_offset_t end, + register vm_offset_t start_addr, + register vm_offset_t end_addr, register vm_prot_t prot) { register int ps; ps = PAGE_SIZE; - while (start < end) { - pmap_enter(kernel_pmap, virt, (ppnum_t)i386_btop(start), prot, 0, FALSE); + while (start_addr < end_addr) { + pmap_enter(kernel_pmap, + virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE); virt += ps; - start += ps; + start_addr += ps; } return(virt); } @@ -637,14 +646,14 @@ pmap_map( vm_offset_t pmap_map_bd( register vm_offset_t virt, - register vm_offset_t start, - register vm_offset_t end, + register vm_offset_t start_addr, + register vm_offset_t end_addr, vm_prot_t prot) { register pt_entry_t template; register pt_entry_t *pte; - template = pa_to_pte(start) + template = pa_to_pte(start_addr) | INTEL_PTE_NCACHE | INTEL_PTE_REF | INTEL_PTE_MOD @@ -653,24 +662,28 @@ pmap_map_bd( if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; - while (start < end) { + /* XXX move pmap_pte out of loop, once one pte mapped, all are */ + while (start_addr < end_addr) { pte = pmap_pte(kernel_pmap, virt); - if (pte == PT_ENTRY_NULL) + if (pte == PT_ENTRY_NULL) { panic("pmap_map_bd: Invalid kernel address\n"); + } WRITE_PTE_FAST(pte, template) pte_increment_pa(template); virt += PAGE_SIZE; - start += PAGE_SIZE; + start_addr += PAGE_SIZE; } flush_tlb(); return(virt); } -extern int cnvmem; extern char *first_avail; extern vm_offset_t virtual_avail, virtual_end; -extern vm_offset_t avail_start, avail_end, avail_next; +extern pmap_paddr_t avail_start, avail_end; +extern vm_offset_t etext; +extern void *sectHIBB; +extern int sectSizeHIB; /* * Bootstrap the system enough to run with virtual memory. @@ -693,20 +706,15 @@ extern vm_offset_t avail_start, avail_end, avail_next; void pmap_bootstrap( - vm_offset_t load_start) + __unused vm_offset_t load_start) { - vm_offset_t va, tva, paddr; - ppnum_t pn; - pt_entry_t template; - pt_entry_t *pde, *pte, *ptend; - vm_size_t morevm; /* VM space for kernel map */ + vm_offset_t va; + pt_entry_t *pte; + int i; + int wpkernel, boot_arg; - vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address known to VM */ - - /* - * Set ptes_per_vm_page for general use. - */ - ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES; + vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address + * known to VM */ /* * The kernel's pmap is statically allocated so we don't @@ -715,165 +723,113 @@ pmap_bootstrap( */ kernel_pmap = &kernel_pmap_store; +#ifdef PMAP_QUEUE + kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */ + kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */ +#endif + kernel_pmap->ref_count = 1; + kernel_pmap->pm_obj = (vm_object_t) NULL; + kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE); + kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD; +#ifdef PAE + kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); + kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT; +#endif -#if NCPUS > 1 - lock_init(&pmap_system_lock, - FALSE, /* NOT a sleep lock */ - ETAP_VM_PMAP_SYS, - ETAP_VM_PMAP_SYS_I); -#endif /* NCPUS > 1 */ + va = (vm_offset_t)kernel_pmap->dirbase; + /* setup self referential mapping(s) */ + for (i = 0; i< NPGPTD; i++ ) { + pmap_paddr_t pa; + pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) = + (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | + INTEL_PTE_MOD | INTEL_PTE_WIRED ; +#ifdef PAE + kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID; +#endif + } - simple_lock_init(&kernel_pmap->lock, ETAP_VM_PMAP_KERNEL); - simple_lock_init(&pv_free_list_lock, ETAP_VM_PMAP_FREE); + nkpt = NKPT; - kernel_pmap->ref_count = 1; + virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; + virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); /* - * The kernel page directory has been allocated; - * its virtual address is in kpde. - * - * Enough kernel page table pages have been allocated - * to map low system memory, kernel text, kernel data/bss, - * kdb's symbols, and the page directory and page tables. - * - * No other physical memory has been allocated. + * Reserve some special page table entries/VA space for temporary + * mapping of pages. */ +#define SYSMAP(c, p, v, n) \ + v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n); + + va = virtual_avail; + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); /* - * Start mapping virtual memory to physical memory, 1-1, - * at end of mapped memory. + * CMAP1/CMAP2 are used for zeroing and copying pages. + * CMAP3 is used for ml_phys_read/write. */ + SYSMAP(caddr_t, CM1, CA1, 1) + * (pt_entry_t *) CM1 = 0; + SYSMAP(caddr_t, CM2, CA2, 1) + * (pt_entry_t *) CM2 = 0; + SYSMAP(caddr_t, CM3, CA3, 1) + * (pt_entry_t *) CM3 = 0; - virtual_avail = phystokv(avail_start); - virtual_end = phystokv(avail_end); + /* used by pmap_pte */ + SYSMAP(caddr_t, CM4, CA4, 1) + * (pt_entry_t *) CM4 = 0; - pde = kpde; - pde += pdenum(kernel_pmap, virtual_avail); + /* DMAP user for debugger */ + SYSMAP(caddr_t, DMAP1, DADDR1, 1); + SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ - if (pte_to_pa(*pde) == 0) { - /* This pte has not been allocated */ - pte = 0; ptend = 0; - } - else { - pte = (pt_entry_t *)ptetokv(*pde); - /* first pte of page */ - ptend = pte+NPTES; /* last pte of page */ - pte += ptenum(virtual_avail); /* point to pte that - maps first avail VA */ - pde++; /* point pde to first empty slot */ - } - template = pa_to_pte(avail_start) - | INTEL_PTE_VALID - | INTEL_PTE_WRITE; - - for (va = virtual_avail; va < virtual_end; va += INTEL_PGBYTES) { - if (pte >= ptend) { - pte = (pt_entry_t *)phystokv(virtual_avail); - ptend = pte + NPTES; - virtual_avail = (vm_offset_t)ptend; - if (virtual_avail == hole_start) - virtual_avail = hole_end; - *pde = PA_TO_PTE((vm_offset_t) pte) - | INTEL_PTE_VALID - | INTEL_PTE_WRITE; - pde++; - } - WRITE_PTE_FAST(pte, template) - pte++; - pte_increment_pa(template); - } - - avail_start = virtual_avail - VM_MIN_KERNEL_ADDRESS; - avail_next = avail_start; - - /* - * Figure out maximum kernel address. - * Kernel virtual space is: - * - at least three times physical memory - * - at least VM_MIN_KERNEL_ADDRESS - * - limited by VM_MAX_KERNEL_ADDRESS - */ - - morevm = 3*avail_end; - if (virtual_end + morevm > VM_MAX_KERNEL_ADDRESS) - morevm = VM_MAX_KERNEL_ADDRESS - virtual_end + 1; + lock_init(&pmap_system_lock, + FALSE, /* NOT a sleep lock */ + 0, 0); -/* - * startup requires additional virtual memory (for tables, buffers, - * etc.). The kd driver may also require some of that memory to - * access the graphics board. - * - */ - *(int *)&template = 0; + virtual_avail = va; - /* - * Leave room for kernel-loaded servers, which have been linked at - * addresses from VM_MIN_KERNEL_LOADED_ADDRESS to - * VM_MAX_KERNEL_LOADED_ADDRESS. - */ - if (virtual_end + morevm < VM_MAX_KERNEL_LOADED_ADDRESS + 1) - morevm = VM_MAX_KERNEL_LOADED_ADDRESS + 1 - virtual_end; - - virtual_end += morevm; - for (tva = va; tva < virtual_end; tva += INTEL_PGBYTES) { - if (pte >= ptend) { - pmap_next_page(&pn); - paddr = i386_ptob(pn); - pte = (pt_entry_t *)phystokv(paddr); - ptend = pte + NPTES; - *pde = PA_TO_PTE((vm_offset_t) pte) - | INTEL_PTE_VALID - | INTEL_PTE_WRITE; - pde++; - } - WRITE_PTE_FAST(pte, template) - pte++; + wpkernel = 1; + if (PE_parse_boot_arg("debug", &boot_arg)) { + if (boot_arg & DB_PRT) wpkernel = 0; + if (boot_arg & DB_NMI) wpkernel = 0; } - virtual_avail = va; - - /* Push the virtual avail address above hole_end */ - if (virtual_avail < hole_end) - virtual_avail = hole_end; + /* remap kernel text readonly if not debugging or kprintfing */ + if (wpkernel) + { + vm_offset_t myva; + pt_entry_t *ptep; + + for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { + if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB)) + continue; + ptep = pmap_pte(kernel_pmap, myva); + if (ptep) + *ptep &= ~INTEL_PTE_RW; + } + flush_tlb(); + } - /* - * c.f. comment above - * - */ - virtual_end = va + morevm; - while (pte < ptend) - *pte++ = 0; + simple_lock_init(&kernel_pmap->lock, 0); + simple_lock_init(&pv_free_list_lock, 0); - /* - * invalidate user virtual addresses - */ - memset((char *)kpde, + /* invalidate user virtual addresses */ + memset((char *)kernel_pmap->dirbase, 0, - pdenum(kernel_pmap,VM_MIN_KERNEL_ADDRESS)*sizeof(pt_entry_t)); - kernel_pmap->dirbase = kpde; - printf("Kernel virtual space from 0x%x to 0x%x.\n", - VM_MIN_KERNEL_ADDRESS, virtual_end); + (KPTDI) * sizeof(pd_entry_t)); - avail_start = avail_next; - printf("Available physical space from 0x%x to 0x%x\n", + kprintf("Kernel virtual space from 0x%x to 0x%x.\n", + VADDR(KPTDI,0), virtual_end); +#ifdef PAE + kprintf("Available physical space from 0x%llx to 0x%llx\n", avail_start, avail_end); - - kernel_pmap->pdirbase = kvtophys((vm_offset_t)kernel_pmap->dirbase); - - if (cpuid_features() & CPUID_FEATURE_PAT) - { - uint64_t pat; - uint32_t msr; - - msr = 0x277; - asm volatile("rdmsr" : "=A" (pat) : "c" (msr)); - - pat &= ~(0xfULL << 48); - pat |= 0x01ULL << 48; - - asm volatile("wrmsr" :: "A" (pat), "c" (msr)); - } + printf("PAE enabled\n"); +#else + kprintf("Available physical space from 0x%x to 0x%x\n", + avail_start, avail_end); +#endif } void @@ -896,14 +852,17 @@ pmap_init(void) register long npages; vm_offset_t addr; register vm_size_t s; - int i; + vm_offset_t vaddr; + ppnum_t ppn; /* * Allocate memory for the pv_head_table and its lock bits, * the modify bit array, and the pte_page table. */ - npages = atop(avail_end - avail_start); + /* zero bias all these arrays now instead of off avail_start + so we cover all memory */ + npages = i386_btop(avail_end); s = (vm_size_t) (sizeof(struct pv_entry) * npages + pv_lock_table_size(npages) + npages); @@ -933,18 +892,11 @@ pmap_init(void) pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ s = (vm_size_t) sizeof(struct pv_entry); pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ - -#if NCPUS > 1 - /* - * Set up the pmap request lists - */ - for (i = 0; i < NCPUS; i++) { - pmap_update_list_t up = &cpu_update_list[i]; - - simple_lock_init(&up->lock, ETAP_VM_PMAP_UPDATE); - up->count = 0; - } -#endif /* NCPUS > 1 */ +#ifdef PAE + // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD); + s = 63; + pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ +#endif /* * Only now, when all of the data structures are allocated, @@ -953,8 +905,31 @@ pmap_init(void) * data structures and blow up. */ - vm_first_phys = avail_start; + /* zero bias this now so we cover all memory */ + vm_first_phys = 0; vm_last_phys = avail_end; + +#if GROW_KERNEL_FUNCTION_IMPLEMENTED + kptobj = &kptobj_object_store; + _vm_object_allocate((vm_object_size_t)NKPDE, kptobj); + kernel_pmap->pm_obj = kptobj; +#endif + + /* create pv entries for kernel pages mapped by low level + startup code. these have to exist so we can pmap_remove() + e.g. kext pages from the middle of our addr space */ + + vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS; + for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) { + pv_entry_t pv_e; + + pv_e = pai_to_pvh(ppn); + pv_e->va = vaddr; + vaddr += PAGE_SIZE; + pv_e->pmap = kernel_pmap; + pv_e->next = PV_ENTRY_NULL; + } + pmap_initialized = TRUE; /* @@ -962,11 +937,24 @@ pmap_init(void) */ pmap_cache_list = PMAP_NULL; pmap_cache_count = 0; - simple_lock_init(&pmap_cache_lock, ETAP_VM_PMAP_CACHE); + simple_lock_init(&pmap_cache_lock, 0); +#ifdef PMAP_QUEUE + simple_lock_init(&free_pmap_lock, 0); +#endif + } +void +x86_lowmem_free(void) +{ + /* free lowmem pages back to the vm system. we had to defer doing this + until the vm system was fully up. + the actual pages that are released are determined by which + pages the memory sizing code puts into the region table */ -#define pmap_valid_page(x) ((avail_start <= x) && (x < avail_end)) + ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS, + (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base)); +} #define valid_page(x) (pmap_initialized && pmap_valid_page(x)) @@ -975,18 +963,18 @@ boolean_t pmap_verify_free( ppnum_t pn) { - vm_offset_t phys; + pmap_paddr_t phys; pv_entry_t pv_h; int pai; spl_t spl; boolean_t result; assert(pn != vm_page_fictitious_addr); - phys = (vm_offset_t)i386_ptob(pn); + phys = (pmap_paddr_t)i386_ptob(pn); if (!pmap_initialized) return(TRUE); - if (!pmap_valid_page(phys)) + if (!pmap_valid_page(pn)) return(FALSE); PMAP_WRITE_LOCK(spl); @@ -1016,8 +1004,13 @@ pmap_t pmap_create( vm_size_t size) { - register pmap_t p; - register pmap_statistics_t stats; + register pmap_t p; +#ifdef PMAP_QUEUE + register pmap_t pro; + spl_t s; +#endif + register int i; + register vm_offset_t va; /* * A software use-only map doesn't even need a map. @@ -1027,82 +1020,60 @@ pmap_create( return(PMAP_NULL); } - /* - * Try to get cached pmap, if this fails, - * allocate a pmap struct from the pmap_zone. Then allocate - * the page descriptor table from the pd_zone. - */ - - simple_lock(&pmap_cache_lock); - while ((p = pmap_cache_list) == PMAP_NULL) { - - vm_offset_t dirbases; - register int i; - - simple_unlock(&pmap_cache_lock); - -#if NCPUS > 1 - /* - * XXX NEEDS MP DOING ALLOC logic so that if multiple processors - * XXX get here, only one allocates a chunk of pmaps. - * (for now we'll just let it go - safe but wasteful) - */ + p = (pmap_t) zalloc(pmap_zone); + if (PMAP_NULL == p) + panic("pmap_create zalloc"); + if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) + panic("pmap_create kmem_alloc_wired"); +#ifdef PAE + p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); + if ((vm_offset_t)NULL == p->pm_hold) { + panic("pdpt zalloc"); + } + p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); + p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */ +#endif + if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG)))) + panic("pmap_create vm_object_allocate"); + memcpy(p->dirbase, + (void *)((unsigned int)IdlePTD | KERNBASE), + NBPTD); + va = (vm_offset_t)p->dirbase; + p->pdirbase = (pd_entry_t *)(kvtophys(va)); + simple_lock_init(&p->lock, 0); + + /* setup self referential mapping(s) */ + for (i = 0; i< NPGPTD; i++ ) { + pmap_paddr_t pa; + pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + * (pd_entry_t *) (p->dirbase + PTDPTDI + i) = + (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | + INTEL_PTE_MOD | INTEL_PTE_WIRED ; +#ifdef PAE + p->pm_pdpt[i] = pa | INTEL_PTE_VALID; #endif - - /* - * Allocate a chunck of pmaps. Single kmem_alloc_wired - * operation reduces kernel map fragmentation. - */ - - if (kmem_alloc_wired(kernel_map, &dirbases, - pmap_alloc_chunk * INTEL_PGBYTES) - != KERN_SUCCESS) - panic("pmap_create.1"); - - for (i = pmap_alloc_chunk; i > 0 ; i--) { - p = (pmap_t) zalloc(pmap_zone); - if (p == PMAP_NULL) - panic("pmap_create.2"); - - /* - * Initialize pmap. Don't bother with - * ref count as cache list is threaded - * through it. It'll be set on cache removal. - */ - p->dirbase = (pt_entry_t *) dirbases; - dirbases += INTEL_PGBYTES; - memcpy(p->dirbase, kpde, INTEL_PGBYTES); - p->pdirbase = kvtophys((vm_offset_t)p->dirbase); - - simple_lock_init(&p->lock, ETAP_VM_PMAP); - p->cpus_using = 0; - - /* - * Initialize statistics. - */ - stats = &p->stats; - stats->resident_count = 0; - stats->wired_count = 0; - - /* - * Insert into cache - */ - simple_lock(&pmap_cache_lock); - p->ref_count = (int) pmap_cache_list; - pmap_cache_list = p; - pmap_cache_count++; - simple_unlock(&pmap_cache_lock); - } - simple_lock(&pmap_cache_lock); } + p->cpus_using = 0; p->stats.resident_count = 0; p->stats.wired_count = 0; - - pmap_cache_list = (pmap_t) p->ref_count; p->ref_count = 1; - pmap_cache_count--; - simple_unlock(&pmap_cache_lock); + +#ifdef PMAP_QUEUE + /* insert new pmap at head of queue hanging off kernel_pmap */ + SPLVM(s); + simple_lock(&free_pmap_lock); + p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next; + kernel_pmap->pmap_link.next = (queue_t)p; + + pro = (pmap_t) p->pmap_link.next; + p->pmap_link.prev = (queue_t)pro->pmap_link.prev; + pro->pmap_link.prev = (queue_t)p; + + + simple_unlock(&free_pmap_lock); + SPLX(s); +#endif return(p); } @@ -1118,10 +1089,12 @@ pmap_destroy( register pmap_t p) { register pt_entry_t *pdep; - register vm_offset_t pa; register int c; spl_t s; register vm_page_t m; +#ifdef PMAP_QUEUE + register pmap_t pre,pro; +#endif if (p == PMAP_NULL) return; @@ -1141,18 +1114,19 @@ pmap_destroy( * pmap that is being destroyed! Make sure we are * physically on the right pmap: */ - -#if NCPUS > 1 /* force pmap/cr3 update */ PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_KERNEL_ADDRESS); -#endif /* NCPUS > 1 */ - if (real_pmap[my_cpu] == p) { + if (PMAP_REAL(my_cpu) == p) { PMAP_CPU_CLR(p, my_cpu); - real_pmap[my_cpu] = kernel_pmap; - set_cr3(kernel_pmap->pdirbase); + PMAP_REAL(my_cpu) = kernel_pmap; +#ifdef PAE + set_cr3((unsigned int)kernel_pmap->pm_ppdpt); +#else + set_cr3((unsigned int)kernel_pmap->pdirbase); +#endif } mp_enable_preemption(); } @@ -1163,62 +1137,59 @@ pmap_destroy( return; /* still in use */ } +#ifdef PMAP_QUEUE + /* remove from pmap queue */ + SPLVM(s); + simple_lock(&free_pmap_lock); + + pre = (pmap_t)p->pmap_link.prev; + pre->pmap_link.next = (queue_t)p->pmap_link.next; + pro = (pmap_t)p->pmap_link.next; + pro->pmap_link.prev = (queue_t)p->pmap_link.prev; + + simple_unlock(&free_pmap_lock); + SPLX(s); +#endif + /* * Free the memory maps, then the * pmap structure. */ - pdep = p->dirbase; - while (pdep < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)]) { + + pdep = (pt_entry_t *)p->dirbase; + + while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) { + int ind; if (*pdep & INTEL_PTE_VALID) { - pa = pte_to_pa(*pdep); - vm_object_lock(pmap_object); - m = vm_page_lookup(pmap_object, pa); - if (m == VM_PAGE_NULL) + ind = pdep - (pt_entry_t *)&p->dirbase[0]; + vm_object_lock(p->pm_obj); + m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind); + if (m == VM_PAGE_NULL) { panic("pmap_destroy: pte page not in object"); + } vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; - vm_object_unlock(pmap_object); + vm_object_unlock(p->pm_obj); vm_page_unlock_queues(); /* * Clear pdes, this might be headed for the cache. */ - c = ptes_per_vm_page; - do { - *pdep = 0; - pdep++; - } while (--c > 0); + *pdep++ = 0; } else { - pdep += ptes_per_vm_page; + *pdep++ = 0; } } - /* - * XXX These asserts fail on system shutdown. - * - assert(p->stats.resident_count == 0); - assert(p->stats.wired_count == 0); - * - */ - - /* - * Add to cache if not already full - */ - simple_lock(&pmap_cache_lock); - if (pmap_cache_count <= pmap_cache_max) { - p->ref_count = (int) pmap_cache_list; - pmap_cache_list = p; - pmap_cache_count++; - simple_unlock(&pmap_cache_lock); - } - else { - simple_unlock(&pmap_cache_lock); - kmem_free(kernel_map, (vm_offset_t)p->dirbase, INTEL_PGBYTES); - zfree(pmap_zone, (vm_offset_t) p); - } + vm_object_deallocate(p->pm_obj); + kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); +#ifdef PAE + zfree(pdpt_zone, (void *)p->pm_hold); +#endif + zfree(pmap_zone, p); } /* @@ -1263,7 +1234,7 @@ pmap_remove_range( register pt_entry_t *cpte; int num_removed, num_unwired; int pai; - vm_offset_t pa; + pmap_paddr_t pa; #if DEBUG_PTE_PAGE if (pmap != kernel_pmap) @@ -1273,7 +1244,7 @@ pmap_remove_range( num_unwired = 0; for (cpte = spte; cpte < epte; - cpte += ptes_per_vm_page, va += PAGE_SIZE) { + cpte++, va += PAGE_SIZE) { pa = pte_to_pa(*cpte); if (pa == 0) @@ -1283,18 +1254,15 @@ pmap_remove_range( if (iswired(*cpte)) num_unwired++; - if (!valid_page(pa)) { + if (!valid_page(i386_btop(pa))) { /* * Outside range of managed physical memory. * Just remove the mappings. */ - register int i = ptes_per_vm_page; register pt_entry_t *lpte = cpte; - do { - *lpte = 0; - lpte++; - } while (--i > 0); + + *lpte = 0; continue; } @@ -1305,17 +1273,13 @@ pmap_remove_range( * Get the modify and reference bits. */ { - register int i; register pt_entry_t *lpte; - i = ptes_per_vm_page; lpte = cpte; - do { pmap_phys_attributes[pai] |= *lpte & (PHYS_MODIFIED|PHYS_REFERENCED); *lpte = 0; - lpte++; - } while (--i > 0); + } /* @@ -1374,15 +1338,14 @@ pmap_remove_range( */ void pmap_remove_some_phys( - pmap_t map, - ppnum_t pn) + __unused pmap_t map, + __unused ppnum_t pn) { /* Implement to support working set code */ } - /* * Remove the given range of addresses * from the specified map. @@ -1403,6 +1366,7 @@ pmap_remove( register pt_entry_t *spte, *epte; vm_offset_t l; vm_offset_t s, e; + vm_offset_t orig_s; if (map == PMAP_NULL) return; @@ -1413,14 +1377,9 @@ pmap_remove( panic("pmap_remove addr overflow"); } - s = (vm_offset_t)low32(s64); + orig_s = s = (vm_offset_t)low32(s64); e = (vm_offset_t)low32(e64); - /* - * Invalidate the translation buffer first - */ - PMAP_UPDATE_TLBS(map, s, e); - pde = pmap_pde(map, s); while (s < e) { @@ -1428,7 +1387,7 @@ pmap_remove( if (l > e) l = e; if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)ptetokv(*pde); + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); spte = &spte[ptenum(s)]; epte = &spte[intel_btop(l-s)]; pmap_remove_range(map, s, spte, epte); @@ -1437,6 +1396,8 @@ pmap_remove( pde++; } + PMAP_UPDATE_TLBS(map, orig_s, e); + PMAP_READ_UNLOCK(map, spl); } @@ -1459,11 +1420,11 @@ pmap_page_protect( register pmap_t pmap; spl_t spl; boolean_t remove; - vm_offset_t phys; + pmap_paddr_t phys; assert(pn != vm_page_fictitious_addr); - phys = (vm_offset_t)i386_ptob(pn); - if (!valid_page(phys)) { + phys = (pmap_paddr_t)i386_ptob(pn); + if (!valid_page(pn)) { /* * Not a managed page. */ @@ -1504,6 +1465,7 @@ pmap_page_protect( prev = pv_e = pv_h; do { + register vm_offset_t va; pmap = pv_e->pmap; /* * Lock the pmap to block pmap_extract and similar routines. @@ -1511,7 +1473,6 @@ pmap_page_protect( simple_lock(&pmap->lock); { - register vm_offset_t va; va = pv_e->va; pte = pmap_pte(pmap, va); @@ -1522,10 +1483,6 @@ pmap_page_protect( /* assert(*pte & INTEL_PTE_VALID); XXX */ /* assert(pte_to_phys(*pte) == phys); */ - /* - * Invalidate TLBs for all CPUs using this mapping. - */ - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } /* @@ -1537,13 +1494,10 @@ pmap_page_protect( * Remove the mapping, collecting any modify bits. */ { - register int i = ptes_per_vm_page; - - do { pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); *pte++ = 0; - } while (--i > 0); + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } assert(pmap->stats.resident_count >= 1); @@ -1570,13 +1524,10 @@ pmap_page_protect( /* * Write-protect. */ - register int i = ptes_per_vm_page; - do { *pte &= ~INTEL_PTE_WRITE; pte++; - } while (--i > 0); - + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); /* * Advance prev. */ @@ -1602,6 +1553,22 @@ pmap_page_protect( PMAP_WRITE_UNLOCK(spl); } +/* + * Routine: + * pmap_disconnect + * + * Function: + * Disconnect all mappings for this page and return reference and change status + * in generic format. + * + */ +unsigned int pmap_disconnect( + ppnum_t pa) +{ + pmap_page_protect(pa, 0); /* disconnect the page */ + return (pmap_get_refmod(pa)); /* return ref/chg status */ +} + /* * Set the physical protection on the * specified range of this map as requested. @@ -1618,6 +1585,7 @@ pmap_protect( register pt_entry_t *spte, *epte; vm_offset_t l; spl_t spl; + vm_offset_t orig_s = s; if (map == PMAP_NULL) @@ -1638,35 +1606,16 @@ pmap_protect( return; } - /* - * If write-protecting in the kernel pmap, - * remove the mappings; the i386 ignores - * the write-permission bit in kernel mode. - * - * XXX should be #if'd for i386 - */ - - if (cpuid_family() == CPUID_FAMILY_386) - if (map == kernel_pmap) { - pmap_remove(map, (addr64_t)s, (addr64_t)e); - return; - } - SPLVM(spl); simple_lock(&map->lock); - /* - * Invalidate the translation buffer first - */ - PMAP_UPDATE_TLBS(map, s, e); - pde = pmap_pde(map, s); while (s < e) { l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); if (l > e) l = e; if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)ptetokv(*pde); + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); spte = &spte[ptenum(s)]; epte = &spte[intel_btop(l-s)]; @@ -1680,6 +1629,8 @@ pmap_protect( pde++; } + PMAP_UPDATE_TLBS(map, orig_s, e); + simple_unlock(&map->lock); SPLX(spl); } @@ -1709,15 +1660,15 @@ pmap_enter( { register pt_entry_t *pte; register pv_entry_t pv_h; - register int i, pai; + register int pai; pv_entry_t pv_e; pt_entry_t template; spl_t spl; - vm_offset_t old_pa; - vm_offset_t pa = (vm_offset_t)i386_ptob(pn); + pmap_paddr_t old_pa; + pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n", - current_thread()->top_act, + current_thread(), current_thread(), pmap, v, pn); @@ -1727,32 +1678,6 @@ pmap_enter( if (pmap == PMAP_NULL) return; - if (cpuid_family() == CPUID_FAMILY_386) - if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0 - && !wired /* hack for io_wire */ ) { - /* - * Because the 386 ignores write protection in kernel mode, - * we cannot enter a read-only kernel mapping, and must - * remove an existing mapping if changing it. - * - * XXX should be #if'd for i386 - */ - PMAP_READ_LOCK(pmap, spl); - - pte = pmap_pte(pmap, v); - if (pte != PT_ENTRY_NULL && pte_to_pa(*pte) != 0) { - /* - * Invalidate the translation buffer, - * then remove the mapping. - */ - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); - pmap_remove_range(pmap, v, pte, - pte + ptes_per_vm_page); - } - PMAP_READ_UNLOCK(pmap, spl); - return; - } - /* * Must allocate a new pvlist entry while we're unlocked; * zalloc may cause pageout (which will lock the pmap system). @@ -1761,7 +1686,7 @@ pmap_enter( * the allocated entry later (if we no longer need it). */ pv_e = PV_ENTRY_NULL; -Retry: + PMAP_READ_LOCK(pmap, spl); /* @@ -1814,15 +1739,10 @@ Retry: } } - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); - i = ptes_per_vm_page; - do { if (*pte & INTEL_PTE_MOD) template |= INTEL_PTE_MOD; WRITE_PTE(pte, template) - pte++; - pte_increment_pa(template); - } while (--i > 0); + pte++; goto Done; } @@ -1845,9 +1765,8 @@ Retry: * managed, step 2) is skipped. */ - if (old_pa != (vm_offset_t) 0) { + if (old_pa != (pmap_paddr_t) 0) { - PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); #if DEBUG_PTE_PAGE if (pmap != kernel_pmap) @@ -1860,7 +1779,7 @@ Retry: * to overwrite the old one. */ - if (valid_page(old_pa)) { + if (valid_page(i386_btop(old_pa))) { pai = pa_index(old_pa); LOCK_PVH(pai); @@ -1871,20 +1790,10 @@ Retry: assert(pmap->stats.wired_count >= 1); pmap->stats.wired_count--; } - i = ptes_per_vm_page; - do { + pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); WRITE_PTE(pte, 0) - pte++; - pte_increment_pa(template); - } while (--i > 0); - - /* - * Put pte back to beginning of page since it'll be - * used later to enter the new page. - */ - pte -= ptes_per_vm_page; /* * Remove the mapping from the pvlist for @@ -1933,7 +1842,7 @@ Retry: * at Step 3) will enter new mapping (overwriting old * one). Do removal part of accounting. */ - old_pa = (vm_offset_t) 0; + old_pa = (pmap_paddr_t) 0; assert(pmap->stats.resident_count >= 1); pmap->stats.resident_count--; if (iswired(*pte)) { @@ -1941,9 +1850,10 @@ Retry: pmap->stats.wired_count--; } } + } - if (valid_page(pa)) { + if (valid_page(i386_btop(pa))) { /* * Step 2) Enter the mapping in the PV list for this @@ -2011,9 +1921,10 @@ RetryPvList: * Invalidate the translation buffer, * then remove the mapping. */ - PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE); pmap_remove_range(pmap, e->va, opte, - opte + ptes_per_vm_page); + opte + 1); + PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE); + /* * We could have remove the head entry, * so there could be no more entries @@ -2082,14 +1993,7 @@ RetryPvList: if (pv_e == PV_ENTRY_NULL) { PV_ALLOC(pv_e); if (pv_e == PV_ENTRY_NULL) { - UNLOCK_PVH(pai); - PMAP_READ_UNLOCK(pmap, spl); - - /* - * Refill from zone. - */ - pv_e = (pv_entry_t) zalloc(pv_list_zone); - goto Retry; + panic("pmap no pv_e's"); } } pv_e->va = v; @@ -2130,13 +2034,12 @@ RetryPvList: template |= INTEL_PTE_WIRED; pmap->stats.wired_count++; } - i = ptes_per_vm_page; - do { - WRITE_PTE(pte, template) - pte++; - pte_increment_pa(template); - } while (--i > 0); + + WRITE_PTE(pte, template) + Done: + PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); + if (pv_e != PV_ENTRY_NULL) { PV_FREE(pv_e); } @@ -2158,7 +2061,6 @@ pmap_change_wiring( boolean_t wired) { register pt_entry_t *pte; - register int i; spl_t spl; #if 1 @@ -2176,10 +2078,7 @@ pmap_change_wiring( * wiring down mapping */ map->stats.wired_count++; - i = ptes_per_vm_page; - do { - *pte++ |= INTEL_PTE_WIRED; - } while (--i > 0); + *pte++ |= INTEL_PTE_WIRED; } else if (!wired && iswired(*pte)) { /* @@ -2187,10 +2086,7 @@ pmap_change_wiring( */ assert(map->stats.wired_count >= 1); map->stats.wired_count--; - i = ptes_per_vm_page; - do { - *pte++ &= ~INTEL_PTE_WIRED; - } while (--i > 0); + *pte++ &= ~INTEL_PTE_WIRED; } PMAP_READ_UNLOCK(map, spl); @@ -2201,20 +2097,23 @@ pmap_change_wiring( } -ppnum_t +ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) { - pt_entry_t *ptp; - vm_offset_t a32; - ppnum_t ppn; - - if (value_64bit(va)) panic("pmap_find_phys 64 bit value"); - a32 = (vm_offset_t)low32(va); - ptp = pmap_pte(pmap, a32); - if (PT_ENTRY_NULL == ptp) - return 0; - ppn = (ppnum_t)i386_btop(pte_to_pa(*ptp)); - return ppn; + pt_entry_t *ptp; + vm_offset_t a32; + ppnum_t ppn; + + if (value_64bit(va)) + panic("pmap_find_phys 64 bit value"); + a32 = (vm_offset_t) low32(va); + ptp = pmap_pte(pmap, a32); + if (PT_ENTRY_NULL == ptp) { + ppn = 0; + } else { + ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp)); + } + return ppn; } /* @@ -2222,6 +2121,9 @@ pmap_find_phys(pmap_t pmap, addr64_t va) * Function: * Extract the physical page address associated * with the given map/virtual_address pair. + * Change to shim for backwards compatibility but will not + * work for 64 bit systems. Some old drivers that we cannot + * change need this. */ vm_offset_t @@ -2229,23 +2131,18 @@ pmap_extract( register pmap_t pmap, vm_offset_t va) { - register pt_entry_t *pte; - register vm_offset_t pa; - spl_t spl; - - SPLVM(spl); - simple_lock(&pmap->lock); - if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL) - pa = (vm_offset_t) 0; - else if (!(*pte & INTEL_PTE_VALID)) - pa = (vm_offset_t) 0; - else - pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK); - simple_unlock(&pmap->lock); - SPLX(spl); - return(pa); + ppnum_t ppn; + vm_offset_t vaddr; + + vaddr = (vm_offset_t)0; + ppn = pmap_find_phys(pmap, (addr64_t)va); + if (ppn) { + vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK); + } + return (vaddr); } + /* * Routine: pmap_expand * @@ -2268,21 +2165,15 @@ pmap_expand( { pt_entry_t *pdp; register vm_page_t m; - register vm_offset_t pa; + register pmap_paddr_t pa; register int i; spl_t spl; ppnum_t pn; - if (map == kernel_pmap) - panic("pmap_expand"); - - /* - * We cannot allocate the pmap_object in pmap_init, - * because it is called before the zone package is up. - * Allocate it now if it is missing. - */ - if (pmap_object == VM_OBJECT_NULL) - pmap_object = vm_object_allocate(avail_end); + if (map == kernel_pmap) { + pmap_growkernel(v); + return; + } /* * Allocate a VM page for the level 2 page table entries. @@ -2291,23 +2182,24 @@ pmap_expand( VM_PAGE_WAIT(); /* - * Map the page to its physical address so that it + * put the page into the pmap's obj list so it * can be found later. */ pn = m->phys_page; pa = i386_ptob(pn); - vm_object_lock(pmap_object); - vm_page_insert(m, pmap_object, (vm_object_offset_t)pa); + i = pdenum(map, v); + vm_object_lock(map->pm_obj); + vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); vm_page_lock_queues(); vm_page_wire(m); inuse_ptepages_count++; - vm_object_unlock(pmap_object); + vm_object_unlock(map->pm_obj); vm_page_unlock_queues(); /* * Zero the page. */ - memset((void *)phystokv(pa), 0, PAGE_SIZE); + pmap_zero_page(pn); PMAP_READ_LOCK(map, spl); /* @@ -2315,12 +2207,12 @@ pmap_expand( */ if (pmap_pte(map, v) != PT_ENTRY_NULL) { PMAP_READ_UNLOCK(map, spl); - vm_object_lock(pmap_object); + vm_object_lock(map->pm_obj); vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; vm_page_unlock_queues(); - vm_object_unlock(pmap_object); + vm_object_unlock(map->pm_obj); return; } @@ -2330,16 +2222,11 @@ pmap_expand( * set several page directory entries. */ - i = ptes_per_vm_page; - pdp = &map->dirbase[pdenum(map, v) & ~(i-1)]; - do { + pdp = &map->dirbase[pdenum(map, v)]; *pdp = pa_to_pte(pa) | INTEL_PTE_VALID | INTEL_PTE_USER | INTEL_PTE_WRITE; - pdp++; - pa += INTEL_PGBYTES; - } while (--i > 0); PMAP_READ_UNLOCK(map, spl); return; @@ -2368,21 +2255,29 @@ pmap_copy( #endif/* 0 */ /* - * pmap_sync_caches_phys(ppnum_t pa) + * pmap_sync_page_data_phys(ppnum_t pa) * * Invalidates all of the instruction cache on a physical page and * pushes any dirty data from the data cache for the same physical page + * Not required in i386. */ - -void pmap_sync_caches_phys(ppnum_t pa) +void +pmap_sync_page_data_phys(__unused ppnum_t pa) { -// if (!(cpuid_features() & CPUID_FEATURE_SS)) - { - __asm__ volatile("wbinvd"); - } return; } +/* + * pmap_sync_page_attributes_phys(ppnum_t pa) + * + * Write back and invalidate all cachelines on a physical page. + */ +void +pmap_sync_page_attributes_phys(ppnum_t pa) +{ + cache_flush_page_phys(pa); +} + int collect_ref; int collect_unref; @@ -2403,7 +2298,6 @@ pmap_collect( { register pt_entry_t *pdp, *ptp; pt_entry_t *eptp; - vm_offset_t pa; int wired; spl_t spl; @@ -2417,21 +2311,19 @@ pmap_collect( * Garbage collect map. */ PMAP_READ_LOCK(p, spl); - PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS); - for (pdp = p->dirbase; - pdp < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)]; - pdp += ptes_per_vm_page) + for (pdp = (pt_entry_t *)p->dirbase; + pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; + pdp++) { - if (*pdp & INTEL_PTE_VALID) + if (*pdp & INTEL_PTE_VALID) { if(*pdp & INTEL_PTE_REF) { *pdp &= ~INTEL_PTE_REF; collect_ref++; } else { collect_unref++; - pa = pte_to_pa(*pdp); - ptp = (pt_entry_t *)phystokv(pa); - eptp = ptp + NPTES*ptes_per_vm_page; + ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); + eptp = ptp + NPTEPG; /* * If the pte page has any wired mappings, we cannot @@ -2452,21 +2344,15 @@ pmap_collect( * Remove the virtual addresses mapped by this pte page. */ pmap_remove_range(p, - pdetova(pdp - p->dirbase), + pdetova(pdp - (pt_entry_t *)p->dirbase), ptp, eptp); /* * Invalidate the page directory pointer. */ - { - register int i = ptes_per_vm_page; - register pt_entry_t *pdep = pdp; - do { - *pdep++ = 0; - } while (--i > 0); - } - + *pdp = 0x0; + PMAP_READ_UNLOCK(p, spl); /* @@ -2475,21 +2361,23 @@ pmap_collect( { register vm_page_t m; - vm_object_lock(pmap_object); - m = vm_page_lookup(pmap_object, pa); + vm_object_lock(p->pm_obj); + m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0])); if (m == VM_PAGE_NULL) panic("pmap_collect: pte page not in object"); vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; vm_page_unlock_queues(); - vm_object_unlock(pmap_object); + vm_object_unlock(p->pm_obj); } PMAP_READ_LOCK(p, spl); } - } + } + } } + PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS); PMAP_READ_UNLOCK(p, spl); return; @@ -2508,49 +2396,16 @@ pmap_kernel(void) } #endif/* 0 */ -/* - * pmap_zero_page zeros the specified (machine independent) page. - * See machine/phys.c or machine/phys.s for implementation. - */ -#if 0 void -pmap_zero_page( - register vm_offset_t phys) +pmap_copy_page(src, dst) + ppnum_t src; + ppnum_t dst; { - register int i; - - assert(phys != vm_page_fictitious_addr); - i = PAGE_SIZE / INTEL_PGBYTES; - phys = intel_pfn(phys); - - while (i--) - zero_phys(phys++); + bcopy_phys((addr64_t)i386_ptob(src), + (addr64_t)i386_ptob(dst), + PAGE_SIZE); } -#endif/* 0 */ - -/* - * pmap_copy_page copies the specified (machine independent) page. - * See machine/phys.c or machine/phys.s for implementation. - */ -#if 0 -void -pmap_copy_page( - vm_offset_t src, - vm_offset_t dst) -{ - int i; - assert(src != vm_page_fictitious_addr); - assert(dst != vm_page_fictitious_addr); - i = PAGE_SIZE / INTEL_PGBYTES; - - while (i--) { - copy_phys(intel_pfn(src), intel_pfn(dst)); - src += INTEL_PGBYTES; - dst += INTEL_PGBYTES; - } -} -#endif/* 0 */ /* * Routine: pmap_pageable @@ -2568,13 +2423,13 @@ pmap_copy_page( */ void pmap_pageable( - pmap_t pmap, - vm_offset_t start, - vm_offset_t end, - boolean_t pageable) + __unused pmap_t pmap, + __unused vm_offset_t start_addr, + __unused vm_offset_t end_addr, + __unused boolean_t pageable) { #ifdef lint - pmap++; start++; end++; pageable++; + pmap++; start_addr++; end_addr++; pageable++; #endif /* lint */ } @@ -2583,7 +2438,7 @@ pmap_pageable( */ void phys_attribute_clear( - vm_offset_t phys, + ppnum_t pn, int bits) { pv_entry_t pv_h; @@ -2592,9 +2447,10 @@ phys_attribute_clear( int pai; register pmap_t pmap; spl_t spl; + pmap_paddr_t phys; - assert(phys != vm_page_fictitious_addr); - if (!valid_page(phys)) { + assert(pn != vm_page_fictitious_addr); + if (!valid_page(pn)) { /* * Not a managed page. */ @@ -2607,7 +2463,7 @@ phys_attribute_clear( */ PMAP_WRITE_LOCK(spl); - + phys = i386_ptob(pn); pai = pa_index(phys); pv_h = pai_to_pvh(pai); @@ -2642,22 +2498,15 @@ phys_attribute_clear( /* assert(pte_to_phys(*pte) == phys); */ #endif - /* - * Invalidate TLBs for all CPUs using this mapping. - */ - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); - } - /* * Clear modify or reference bits. */ - { - register int i = ptes_per_vm_page; - do { + *pte++ &= ~bits; - } while (--i > 0); + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } simple_unlock(&pmap->lock); + } } @@ -2671,7 +2520,7 @@ phys_attribute_clear( */ boolean_t phys_attribute_test( - vm_offset_t phys, + ppnum_t pn, int bits) { pv_entry_t pv_h; @@ -2680,9 +2529,10 @@ phys_attribute_test( int pai; register pmap_t pmap; spl_t spl; + pmap_paddr_t phys; - assert(phys != vm_page_fictitious_addr); - if (!valid_page(phys)) { + assert(pn != vm_page_fictitious_addr); + if (!valid_page(pn)) { /* * Not a managed page. */ @@ -2695,7 +2545,7 @@ phys_attribute_test( */ PMAP_WRITE_LOCK(spl); - + phys = i386_ptob(pn); pai = pa_index(phys); pv_h = pai_to_pvh(pai); @@ -2740,15 +2590,11 @@ phys_attribute_test( * Check modify or reference bits. */ { - register int i = ptes_per_vm_page; - - do { if (*pte++ & bits) { simple_unlock(&pmap->lock); PMAP_WRITE_UNLOCK(spl); return (TRUE); } - } while (--i > 0); } simple_unlock(&pmap->lock); } @@ -2762,13 +2608,14 @@ phys_attribute_test( */ void phys_attribute_set( - vm_offset_t phys, + ppnum_t pn, int bits) { int spl; + pmap_paddr_t phys; - assert(phys != vm_page_fictitious_addr); - if (!valid_page(phys)) { + assert(pn != vm_page_fictitious_addr); + if (!valid_page(pn)) { /* * Not a managed page. */ @@ -2780,7 +2627,7 @@ phys_attribute_set( * the phys attributes array. Don't need to bother with * ptes because the test routine looks here first. */ - + phys = i386_ptob(pn); PMAP_WRITE_LOCK(spl); pmap_phys_attributes[pa_index(phys)] |= bits; PMAP_WRITE_UNLOCK(spl); @@ -2793,8 +2640,7 @@ phys_attribute_set( void pmap_set_modify( ppnum_t pn) { - vm_offset_t phys = (vm_offset_t)i386_ptob(pn); - phys_attribute_set(phys, PHYS_MODIFIED); + phys_attribute_set(pn, PHYS_MODIFIED); } /* @@ -2805,8 +2651,7 @@ void pmap_clear_modify( ppnum_t pn) { - vm_offset_t phys = (vm_offset_t)i386_ptob(pn); - phys_attribute_clear(phys, PHYS_MODIFIED); + phys_attribute_clear(pn, PHYS_MODIFIED); } /* @@ -2820,8 +2665,7 @@ boolean_t pmap_is_modified( ppnum_t pn) { - vm_offset_t phys = (vm_offset_t)i386_ptob(pn); - return (phys_attribute_test(phys, PHYS_MODIFIED)); + return (phys_attribute_test(pn, PHYS_MODIFIED)); } /* @@ -2834,8 +2678,13 @@ void pmap_clear_reference( ppnum_t pn) { - vm_offset_t phys = (vm_offset_t)i386_ptob(pn); - phys_attribute_clear(phys, PHYS_REFERENCED); + phys_attribute_clear(pn, PHYS_REFERENCED); +} + +void +pmap_set_reference(ppnum_t pn) +{ + phys_attribute_set(pn, PHYS_REFERENCED); } /* @@ -2849,8 +2698,34 @@ boolean_t pmap_is_referenced( ppnum_t pn) { - vm_offset_t phys = (vm_offset_t)i386_ptob(pn); - return (phys_attribute_test(phys, PHYS_REFERENCED)); + return (phys_attribute_test(pn, PHYS_REFERENCED)); +} + +/* + * pmap_get_refmod(phys) + * returns the referenced and modified bits of the specified + * physical page. + */ +unsigned int +pmap_get_refmod(ppnum_t pa) +{ + return ( ((phys_attribute_test(pa, PHYS_MODIFIED))? VM_MEM_MODIFIED : 0) + | ((phys_attribute_test(pa, PHYS_REFERENCED))? VM_MEM_REFERENCED : 0)); +} + +/* + * pmap_clear_refmod(phys, mask) + * clears the referenced and modified bits as specified by the mask + * of the specified physical page. + */ +void +pmap_clear_refmod(ppnum_t pa, unsigned int mask) +{ + unsigned int x86Mask; + + x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0) + | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0)); + phys_attribute_clear(pa, x86Mask); } /* @@ -2870,24 +2745,20 @@ pmap_modify_pages( register pt_entry_t *pde; register pt_entry_t *spte, *epte; vm_offset_t l; + vm_offset_t orig_s = s; if (map == PMAP_NULL) return; PMAP_READ_LOCK(map, spl); - /* - * Invalidate the translation buffer first - */ - PMAP_UPDATE_TLBS(map, s, e); - pde = pmap_pde(map, s); while (s && s < e) { l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); if (l > e) l = e; if (*pde & INTEL_PTE_VALID) { - spte = (pt_entry_t *)ptetokv(*pde); + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); if (l) { spte = &spte[ptenum(s)]; epte = &spte[intel_btop(l-s)]; @@ -2905,22 +2776,26 @@ pmap_modify_pages( s = l; pde++; } + PMAP_UPDATE_TLBS(map, orig_s, e); PMAP_READ_UNLOCK(map, spl); } void -invalidate_icache(vm_offset_t addr, unsigned cnt, int phys) +invalidate_icache(__unused vm_offset_t addr, + __unused unsigned cnt, + __unused int phys) { return; } void -flush_dcache(vm_offset_t addr, unsigned count, int phys) +flush_dcache(__unused vm_offset_t addr, + __unused unsigned count, + __unused int phys) { return; } -#if NCPUS > 1 /* * TLB Coherence Code (TLB "shootdown" code) * @@ -2988,8 +2863,8 @@ void signal_cpus( cpu_set use_list, pmap_t pmap, - vm_offset_t start, - vm_offset_t end) + vm_offset_t start_addr, + vm_offset_t end_addr) { register int which_cpu, j; register pmap_update_list_t update_list_p; @@ -2997,7 +2872,7 @@ signal_cpus( while ((which_cpu = ffs((unsigned long)use_list)) != 0) { which_cpu -= 1; /* convert to 0 origin */ - update_list_p = &cpu_update_list[which_cpu]; + update_list_p = cpu_update_list(which_cpu); simple_lock(&update_list_p->lock); j = update_list_p->count; @@ -3012,16 +2887,16 @@ signal_cpus( } else { update_list_p->item[j].pmap = pmap; - update_list_p->item[j].start = start; - update_list_p->item[j].end = end; + update_list_p->item[j].start = start_addr; + update_list_p->item[j].end = end_addr; update_list_p->count = j+1; } - cpu_update_needed[which_cpu] = TRUE; + cpu_update_needed(which_cpu) = TRUE; simple_unlock(&update_list_p->lock); /* if its the kernel pmap, ignore cpus_idle */ if (((cpus_idle & (1 << which_cpu)) == 0) || - (pmap == kernel_pmap) || real_pmap[which_cpu] == pmap) + (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap) { i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC); } @@ -3040,7 +2915,7 @@ process_pmap_updates( mp_disable_preemption(); my_cpu = cpu_number(); - update_list_p = &cpu_update_list[my_cpu]; + update_list_p = cpu_update_list(my_cpu); simple_lock(&update_list_p->lock); for (j = 0; j < update_list_p->count; j++) { @@ -3050,8 +2925,12 @@ process_pmap_updates( if (pmap->ref_count <= 0) { PMAP_CPU_CLR(pmap, my_cpu); - real_pmap[my_cpu] = kernel_pmap; - set_cr3(kernel_pmap->pdirbase); + PMAP_REAL(my_cpu) = kernel_pmap; +#ifdef PAE + set_cr3((unsigned int)kernel_pmap->pm_ppdpt); +#else + set_cr3((unsigned int)kernel_pmap->pdirbase); +#endif } else INVALIDATE_TLB(pmap, update_list_p->item[j].start, @@ -3059,7 +2938,7 @@ process_pmap_updates( } } update_list_p->count = 0; - cpu_update_needed[my_cpu] = FALSE; + cpu_update_needed(my_cpu) = FALSE; simple_unlock(&update_list_p->lock); mp_enable_preemption(); } @@ -3087,7 +2966,7 @@ pmap_update_interrupt(void) */ s = splhigh(); - my_pmap = real_pmap[my_cpu]; + my_pmap = PMAP_REAL(my_cpu); if (!(my_pmap && pmap_in_use(my_pmap, my_cpu))) my_pmap = kernel_pmap; @@ -3105,8 +2984,8 @@ pmap_update_interrupt(void) * Wait for any pmap updates in progress, on either user * or kernel pmap. */ - while (*(volatile hw_lock_t)&my_pmap->lock.interlock || - *(volatile hw_lock_t)&kernel_pmap->lock.interlock) { + while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) || + *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) { LOOP_CHECK("pmap_update_interrupt", my_pmap); cpu_pause(); } @@ -3115,21 +2994,20 @@ pmap_update_interrupt(void) i_bit_set(my_cpu, &cpus_active); - } while (cpu_update_needed[my_cpu]); + } while (cpu_update_needed(my_cpu)); splx(s); mp_enable_preemption(); } -#endif /* NCPUS > 1 */ #if MACH_KDB /* show phys page mappings and attributes */ -extern void db_show_page(vm_offset_t pa); +extern void db_show_page(pmap_paddr_t pa); void -db_show_page(vm_offset_t pa) +db_show_page(pmap_paddr_t pa) { pv_entry_t pv_h; int pai; @@ -3192,14 +3070,14 @@ db_show_vaddrs( pdecnt = ptecnt = 0; pdep = &dirbase[0]; - for (y = 0; y < NPDES; y++, pdep++) { + for (y = 0; y < NPDEPG; y++, pdep++) { if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) { continue; } pdecnt++; ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK); db_printf("dir[%4d]: 0x%x\n", y, *pdep); - for (x = 0; x < NPTES; x++, ptep++) { + for (x = 0; x < NPTEPG; x++, ptep++) { if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) { continue; } @@ -3223,9 +3101,9 @@ db_show_vaddrs( int pmap_list_resident_pages( - register pmap_t pmap, - register vm_offset_t *listp, - register int space) + __unused pmap_t pmap, + __unused vm_offset_t *listp, + __unused int space) { return 0; } @@ -3279,42 +3157,260 @@ pmap_movepage(unsigned long from, unsigned long to, vm_size_t size) PMAP_READ_UNLOCK(kernel_pmap, spl); } +#endif /* MACH_BSD */ -kern_return_t bmapvideo(vm_offset_t *info); -kern_return_t bmapvideo(vm_offset_t *info) { +/* temporary workaround */ +boolean_t +coredumpok(vm_map_t map, vm_offset_t va) +{ + pt_entry_t *ptep; - extern struct vc_info vinfo; -#ifdef NOTIMPLEMENTED - (void)copyout((char *)&vinfo, (char *)info, sizeof(struct vc_info)); /* Copy out the video info */ -#endif - return KERN_SUCCESS; + ptep = pmap_pte(map->pmap, va); + if (0 == ptep) + return FALSE; + return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); } -kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr); -kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr) { - -#ifdef NOTIMPLEMENTED - pmap_map_block(current_act()->task->map->pmap, va, pa, size, prot, attr); /* Map it in */ +/* + * grow the number of kernel page table entries, if needed + */ +void +pmap_growkernel(vm_offset_t addr) +{ +#if GROW_KERNEL_FUNCTION_IMPLEMENTED + struct pmap *pmap; + int s; + vm_offset_t ptppaddr; + ppnum_t ppn; + vm_page_t nkpg; + pd_entry_t newpdir = 0; + + /* + * Serialize. + * Losers return to try again until the winner completes the work. + */ + if (kptobj == 0) panic("growkernel 0"); + if (!vm_object_lock_try(kptobj)) { + return; + } + + vm_page_lock_queues(); + + s = splhigh(); + + /* + * If this is the first time thru, locate the end of the + * kernel page table entries and set nkpt to the current + * number of kernel page table pages + */ + + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + + while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + nkpt++; + } + } + + /* + * Now allocate and map the required number of page tables + */ + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + while (kernel_vm_end < addr) { + if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + continue; /* someone already filled this one */ + } + + nkpg = vm_page_alloc(kptobj, nkpt); + if (!nkpg) + panic("pmap_growkernel: no memory to grow kernel"); + + nkpt++; + vm_page_wire(nkpg); + ppn = nkpg->phys_page; + pmap_zero_page(ppn); + ptppaddr = i386_ptob(ppn); + newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID | + INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD); + pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir; + + simple_lock(&free_pmap_lock); + for (pmap = (struct pmap *)kernel_pmap->pmap_link.next; + pmap != kernel_pmap ; + pmap = (struct pmap *)pmap->pmap_link.next ) { + *pmap_pde(pmap, kernel_vm_end) = newpdir; + } + simple_unlock(&free_pmap_lock); + } + splx(s); + vm_page_unlock_queues(); + vm_object_unlock(kptobj); #endif - return KERN_SUCCESS; } -kern_return_t bmapmapr(vm_offset_t va); -kern_return_t bmapmapr(vm_offset_t va) { - -#ifdef NOTIMPLEMENTED - mapping_remove(current_act()->task->map->pmap, va); /* Remove map */ -#endif - return KERN_SUCCESS; +pt_entry_t * +pmap_mapgetpte(vm_map_t map, vm_offset_t v) +{ + return pmap_pte(map->pmap, v); } -#endif -/* temporary workaround */ boolean_t -coredumpok(vm_map_t map, vm_offset_t va) +phys_page_exists( + ppnum_t pn) { - pt_entry_t *ptep; - ptep = pmap_pte(map->pmap, va); - if (0 == ptep) return FALSE; - return ((*ptep & (INTEL_PTE_NCACHE|INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE|INTEL_PTE_WIRED)); + pmap_paddr_t phys; + + assert(pn != vm_page_fictitious_addr); + + if (!pmap_initialized) + return (TRUE); + phys = (pmap_paddr_t) i386_ptob(pn); + if (!pmap_valid_page(pn)) + return (FALSE); + + return TRUE; +} + +void +mapping_free_prime() +{ + int i; + pv_entry_t pv_e; + + for (i = 0; i < (5 * PV_ALLOC_CHUNK); i++) { + pv_e = (pv_entry_t) zalloc(pv_list_zone); + PV_FREE(pv_e); + } +} + +void +mapping_adjust() +{ + pv_entry_t pv_e; + int i; + int spl; + + if (mapping_adjust_call == NULL) { + thread_call_setup(&mapping_adjust_call_data, + (thread_call_func_t) mapping_adjust, + (thread_call_param_t) NULL); + mapping_adjust_call = &mapping_adjust_call_data; + } + /* XXX rethink best way to do locking here */ + if (pv_free_count < PV_LOW_WATER_MARK) { + for (i = 0; i < PV_ALLOC_CHUNK; i++) { + pv_e = (pv_entry_t) zalloc(pv_list_zone); + SPLVM(spl); + PV_FREE(pv_e); + SPLX(spl); + } + } + mappingrecurse = 0; +} + +void +pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) +{ + int i; + pt_entry_t *opte, *npte; + pt_entry_t pte; + + for (i = 0; i < cnt; i++) { + opte = pmap_pte(kernel_pmap, kernel_commpage); + if (0 == opte) panic("kernel_commpage"); + npte = pmap_pte(kernel_pmap, user_commpage); + if (0 == npte) panic("user_commpage"); + pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL; + pte &= ~INTEL_PTE_WRITE; // ensure read only + WRITE_PTE_FAST(npte, pte); + kernel_commpage += INTEL_PGBYTES; + user_commpage += INTEL_PGBYTES; + } +} + +static cpu_pmap_t cpu_pmap_master; +static struct pmap_update_list cpu_update_list_master; + +struct cpu_pmap * +pmap_cpu_alloc(boolean_t is_boot_cpu) +{ + int ret; + int i; + cpu_pmap_t *cp; + pmap_update_list_t up; + vm_offset_t address; + vm_map_entry_t entry; + + if (is_boot_cpu) { + cp = &cpu_pmap_master; + up = &cpu_update_list_master; + } else { + /* + * The per-cpu pmap data structure itself. + */ + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &cp, sizeof(cpu_pmap_t)); + if (ret != KERN_SUCCESS) { + printf("pmap_cpu_alloc() failed ret=%d\n", ret); + return NULL; + } + bzero((void *)cp, sizeof(cpu_pmap_t)); + + /* + * The tlb flush update list. + */ + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &up, sizeof(*up)); + if (ret != KERN_SUCCESS) { + printf("pmap_cpu_alloc() failed ret=%d\n", ret); + pmap_cpu_free(cp); + return NULL; + } + + /* + * The temporary windows used for copy/zero - see loose_ends.c + */ + for (i = 0; i < PMAP_NWINDOWS; i++) { + ret = vm_map_find_space(kernel_map, + &address, PAGE_SIZE, 0, &entry); + if (ret != KERN_SUCCESS) { + printf("pmap_cpu_alloc() " + "vm_map_find_space ret=%d\n", ret); + pmap_cpu_free(cp); + return NULL; + } + vm_map_unlock(kernel_map); + + cp->mapwindow[i].prv_CADDR = (caddr_t) address; + cp->mapwindow[i].prv_CMAP = vtopte(address); + * (int *) cp->mapwindow[i].prv_CMAP = 0; + + kprintf("pmap_cpu_alloc() " + "window=%d CADDR=0x%x CMAP=0x%x\n", + i, address, vtopte(address)); + } + } + + /* + * Set up the pmap request list + */ + cp->update_list = up; + simple_lock_init(&up->lock, 0); + up->count = 0; + + return cp; +} + +void +pmap_cpu_free(struct cpu_pmap *cp) +{ + if (cp != NULL && cp != &cpu_pmap_master) { + if (cp->update_list != NULL) + kfree((void *) cp->update_list, + sizeof(*cp->update_list)); + kfree((void *) cp, sizeof(cpu_pmap_t)); + } } diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 6f58c9b52..7faa4124c 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -72,8 +72,12 @@ #include #include #include -#include +#include #include +#define PMAP_QUEUE 1 +#ifdef PMAP_QUEUE +#include +#endif /* * Define the generic in terms of the specific @@ -93,17 +97,47 @@ * i386/i486/i860 Page Table Entry */ -typedef unsigned int pt_entry_t; +#ifdef PAE +typedef uint64_t pdpt_entry_t; +typedef uint64_t pt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pmap_paddr_t; +#else +typedef uint32_t pt_entry_t; +typedef uint32_t pd_entry_t; +typedef uint32_t pmap_paddr_t; +#endif + #define PT_ENTRY_NULL ((pt_entry_t *) 0) +#define PD_ENTRY_NULL ((pt_entry_t *) 0) #endif /* ASSEMBLER */ -#define INTEL_OFFMASK 0xfff /* offset within page */ -#define PDESHIFT 22 /* page descriptor shift */ -#define PDEMASK 0x3ff /* mask for page descriptor index */ -#define PTESHIFT 12 /* page table shift */ -#define PTEMASK 0x3ff /* mask for page table index */ +#ifdef PAE +#define NPGPTD 4 +#define PDESHIFT 21 +#define PTEMASK 0x1ff +#define PTEINDX 3 +#else +#define NPGPTD 1 +#define PDESHIFT 22 +#define PTEMASK 0x3ff +#define PTEINDX 2 +#endif +#define PTESHIFT 12 + +#define PDESIZE sizeof(pd_entry_t) /* for assembly files */ +#define PTESIZE sizeof(pt_entry_t) /* for assembly files */ + +#define INTEL_OFFMASK (I386_PGBYTES - 1) +#define PG_FRAME (~((pmap_paddr_t)PAGE_MASK)) +#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NBPTD (NPGPTD << PAGE_SHIFT) +#define NPDEPTD (NBPTD / (sizeof (pd_entry_t))) +#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NBPDE (1 << PDESHIFT) +#define PDEMASK (NBPDE - 1) #define VM_WIMG_COPYBACK VM_MEM_COHERENT #define VM_WIMG_DEFAULT VM_MEM_COHERENT @@ -115,17 +149,55 @@ typedef unsigned int pt_entry_t; #define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) /* - * Convert kernel virtual address to linear address + * Size of Kernel address space. This is the number of page table pages + * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. + * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). */ +#ifndef KVA_PAGES +#define KVA_PAGES 256 +#endif -#define kvtolinear(a) ((a)+LINEAR_KERNEL_ADDRESS) +/* + * Pte related macros + */ +#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<> PDESHIFT) & PDEMASK) +#define pdenum(pmap, a) (((a) >> PDESHIFT) & PDEMASK) + /* * Convert page descriptor index to user virtual address @@ -137,9 +209,6 @@ typedef unsigned int pt_entry_t; */ #define ptenum(a) (((a) >> PTESHIFT) & PTEMASK) -#define NPTES (intel_ptob(1)/sizeof(pt_entry_t)) -#define NPDES (intel_ptob(1)/sizeof(pt_entry_t)) - /* * Hardware pte bit definitions (to be used directly on the ptes * without using the bit fields). @@ -147,17 +216,20 @@ typedef unsigned int pt_entry_t; #define INTEL_PTE_VALID 0x00000001 #define INTEL_PTE_WRITE 0x00000002 +#define INTEL_PTE_RW 0x00000002 #define INTEL_PTE_USER 0x00000004 #define INTEL_PTE_WTHRU 0x00000008 #define INTEL_PTE_NCACHE 0x00000010 #define INTEL_PTE_REF 0x00000020 #define INTEL_PTE_MOD 0x00000040 +#define INTEL_PTE_PS 0x00000080 +#define INTEL_PTE_GLOBAL 0x00000100 #define INTEL_PTE_WIRED 0x00000200 -#define INTEL_PTE_PFN 0xfffff000 +#define INTEL_PTE_PFN /*0xFFFFF000*/ (~0xFFF) #define INTEL_PTE_PTA 0x00000080 -#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) -#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) +#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */ +#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */ #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) #define PMAP_DEFAULT_CACHE 0 @@ -167,24 +239,105 @@ typedef unsigned int pt_entry_t; #define PMAP_NO_GUARD_CACHE 8 +#ifndef ASSEMBLER + +#include + /* - * Convert page table entry to kernel virtual address + * Address of current and alternate address space page table maps + * and directories. */ -#define ptetokv(a) (phystokv(pte_to_pa(a))) -#ifndef ASSEMBLER +extern pt_entry_t PTmap[], APTmap[], Upte; +extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde; + +extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ +#ifdef PAE +extern pdpt_entry_t *IdlePDPT; +#endif + +/* + * virtual address to page table entry and + * to physical address. Likewise for alternate address space. + * Note: these work recursively, thus vtopte of a pte will give + * the corresponding pde that in turn maps it. + */ +#define vtopte(va) (PTmap + i386_btop(va)) + + typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ /* changed by other processors */ +struct md_page { + int pv_list_count; + TAILQ_HEAD(,pv_entry) pv_list; +}; + +#include + +/* + * For each vm_page_t, there is a list of all currently + * valid virtual mappings of that page. An entry is + * a pv_entry_t; the list is the pv_table. + */ struct pmap { - pt_entry_t *dirbase; /* page directory pointer register */ - vm_offset_t pdirbase; /* phys. address of dirbase */ +#ifdef PMAP_QUEUE + queue_head_t pmap_link; /* unordered queue of in use pmaps */ +#endif + pd_entry_t *dirbase; /* page directory pointer register */ + pd_entry_t *pdirbase; /* phys. address of dirbase */ + vm_object_t pm_obj; /* object to hold pte's */ int ref_count; /* reference count */ decl_simple_lock_data(,lock) /* lock on map */ struct pmap_statistics stats; /* map statistics */ cpu_set cpus_using; /* bitmap of cpus using pmap */ +#ifdef PAE + vm_offset_t pm_hold; /* true pdpt zalloc addr */ + pdpt_entry_t *pm_pdpt; /* KVA of pg dir ptr table */ + vm_offset_t pm_ppdpt; /* phy addr pdpt + should really be 32/64 bit */ +#endif }; +#define PMAP_NWINDOWS 4 +typedef struct { + pt_entry_t *prv_CMAP; + caddr_t prv_CADDR; +} mapwindow_t; + +typedef struct cpu_pmap { + mapwindow_t mapwindow[PMAP_NWINDOWS]; + struct pmap *real_pmap; + struct pmap_update_list *update_list; + volatile boolean_t update_needed; +} cpu_pmap_t; + +/* + * Should be rewritten in asm anyway. + */ +#define CM1 (current_cpu_datap()->cpu_pmap->mapwindow[0].prv_CMAP) +#define CM2 (current_cpu_datap()->cpu_pmap->mapwindow[1].prv_CMAP) +#define CM3 (current_cpu_datap()->cpu_pmap->mapwindow[2].prv_CMAP) +#define CM4 (current_cpu_datap()->cpu_pmap->mapwindow[3].prv_CMAP) +#define CA1 (current_cpu_datap()->cpu_pmap->mapwindow[0].prv_CADDR) +#define CA2 (current_cpu_datap()->cpu_pmap->mapwindow[1].prv_CADDR) +#define CA3 (current_cpu_datap()->cpu_pmap->mapwindow[2].prv_CADDR) +#define CA4 (current_cpu_datap()->cpu_pmap->mapwindow[3].prv_CADDR) + +typedef struct pmap_memory_regions { + ppnum_t base; + ppnum_t end; + ppnum_t alloc; + uint32_t type; +} pmap_memory_region_t; + +unsigned pmap_memory_region_count; +unsigned pmap_memory_region_current; + +#define PMAP_MEMORY_REGIONS_SIZE 32 + +extern pmap_memory_region_t pmap_memory_regions[]; + /* * Optimization avoiding some TLB flushes when switching to * kernel-loaded threads. This is effective only for i386: @@ -197,13 +350,13 @@ struct pmap { * itself. * * We store the pmap we are really using (from which we fetched the - * dirbase value) in real_pmap[cpu_number()]. + * dirbase value) in current_cpu_datap()->cpu_pmap.real_pmap. * * Invariant: - * current_pmap() == real_pmap[cpu_number()] || current_pmap() == kernel_pmap. + * current_pmap() == current_cpu_datap()->cpu_pmap.real_pmap || + * current_pmap() == kernel_pmap. */ - -extern struct pmap *real_pmap[NCPUS]; +#define PMAP_REAL(my_cpu) (cpu_datap(my_cpu)->cpu_pmap->real_pmap) #include /* @@ -216,20 +369,19 @@ extern struct pmap *real_pmap[NCPUS]; * in use, don't do anything to the hardware, to avoid a TLB flush. */ -#if NCPUS > 1 #define PMAP_CPU_SET(pmap, my_cpu) i_bit_set(my_cpu, &((pmap)->cpus_using)) #define PMAP_CPU_CLR(pmap, my_cpu) i_bit_clear(my_cpu, &((pmap)->cpus_using)) -#else /* NCPUS > 1 */ -#define PMAP_CPU_SET(pmap,my_cpu) (pmap)->cpus_using = TRUE -#define PMAP_CPU_CLR(pmap,my_cpu) (pmap)->cpus_using = FALSE -#endif /* NCPUS > 1 */ - +#ifdef PAE +#define PDIRBASE pm_ppdpt +#else +#define PDIRBASE pdirbase +#endif #define set_dirbase(mypmap, my_cpu) { \ - struct pmap **ppmap = &real_pmap[my_cpu]; \ - vm_offset_t pdirbase = (mypmap)->pdirbase; \ + struct pmap **ppmap = &PMAP_REAL(my_cpu); \ + pmap_paddr_t pdirbase = (pmap_paddr_t)((mypmap)->PDIRBASE); \ \ - if (*ppmap == (vm_offset_t)NULL) { \ + if (*ppmap == (pmap_paddr_t)NULL) { \ *ppmap = (mypmap); \ PMAP_CPU_SET((mypmap), my_cpu); \ set_cr3(pdirbase); \ @@ -243,7 +395,6 @@ extern struct pmap *real_pmap[NCPUS]; assert((mypmap) == *ppmap || (mypmap) == kernel_pmap); \ } -#if NCPUS > 1 /* * List of cpus that are actively using mapped memory. Any * pmap update operation must wait for all cpus in this list. @@ -259,11 +410,8 @@ extern cpu_set cpus_active; extern cpu_set cpus_idle; -/* - * Quick test for pmap update requests. - */ -extern volatile -boolean_t cpu_update_needed[NCPUS]; +#define cpu_update_needed(cpu) cpu_datap(cpu)->cpu_pmap->update_needed +#define cpu_update_list(cpu) cpu_datap(cpu)->cpu_pmap->update_list /* * External declarations for PMAP_ACTIVATE. @@ -271,15 +419,10 @@ boolean_t cpu_update_needed[NCPUS]; extern void process_pmap_updates(struct pmap *pmap); extern void pmap_update_interrupt(void); -extern pmap_t kernel_pmap; - -#endif /* NCPUS > 1 */ /* * Machine dependent routines that are used only for i386/i486/i860. */ -extern vm_offset_t (phystokv)( - vm_offset_t pa); extern vm_offset_t (kvtophys)( vm_offset_t addr); @@ -304,23 +447,32 @@ extern void pmap_bootstrap( vm_offset_t load_start); extern boolean_t pmap_valid_page( - vm_offset_t pa); + ppnum_t pn); extern int pmap_list_resident_pages( struct pmap *pmap, vm_offset_t *listp, int space); -extern void flush_tlb(void); +extern void pmap_commpage_init( + vm_offset_t kernel, + vm_offset_t user, + int count); +extern struct cpu_pmap *pmap_cpu_alloc( + boolean_t is_boot_cpu); +extern void pmap_cpu_free( + struct cpu_pmap *cp); + extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys); extern void flush_dcache(vm_offset_t addr, unsigned count, int phys); extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); +extern void pmap_sync_page_data_phys(ppnum_t pa); +extern void pmap_sync_page_attributes_phys(ppnum_t pa); /* * Macros for speed. */ -#if NCPUS > 1 #include @@ -352,7 +504,7 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); /* \ * Process invalidate requests for the kernel pmap. \ */ \ - if (cpu_update_needed[(my_cpu)]) \ + if (cpu_update_needed(my_cpu)) \ process_pmap_updates(kernel_pmap); \ \ /* \ @@ -375,6 +527,8 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); * pmap is locked against updates. \ */ \ i_bit_clear((my_cpu), &kernel_pmap->cpus_using); \ + i_bit_clear((my_cpu), &cpus_active); \ + PMAP_REAL(my_cpu) = NULL; \ } #define PMAP_ACTIVATE_MAP(map, my_cpu) { \ @@ -425,13 +579,7 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); splx(spl); \ } -#define PMAP_DEACTIVATE_USER(th, my_cpu) { \ - spl_t spl; \ - \ - spl = splhigh(); \ - PMAP_DEACTIVATE_MAP(th->map, my_cpu) \ - splx(spl); \ -} +#define PMAP_DEACTIVATE_USER(th, my_cpu) #define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ spl_t spl; \ @@ -484,7 +632,7 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); */ \ i_bit_clear((my_cpu), &cpus_idle); \ \ - if (cpu_update_needed[(my_cpu)]) \ + if (cpu_update_needed(my_cpu)) \ pmap_update_interrupt(); \ \ /* \ @@ -495,55 +643,12 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); clear_led(my_cpu); \ } -#else /* NCPUS > 1 */ - -/* - * With only one CPU, we just have to indicate whether the pmap is - * in use. - */ - -#define PMAP_ACTIVATE_KERNEL(my_cpu) { \ - kernel_pmap->cpus_using = TRUE; \ -} - -#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \ - kernel_pmap->cpus_using = FALSE; \ -} - -#define PMAP_ACTIVATE_MAP(map, my_cpu) \ - set_dirbase(vm_map_pmap(map), my_cpu) - -#define PMAP_DEACTIVATE_MAP(map, my_cpu) - -#define PMAP_ACTIVATE_USER(th, my_cpu) \ - PMAP_ACTIVATE_MAP(th->map, my_cpu) - -#define PMAP_DEACTIVATE_USER(th, my_cpu) \ - PMAP_DEACTIVATE_MAP(th->map, my_cpu) - -#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ - if (old_th->map != new_th->map) { \ - PMAP_DEACTIVATE_MAP(old_th->map, my_cpu); \ - PMAP_ACTIVATE_MAP(new_th->map, my_cpu); \ - } \ -} - -#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ - PMAP_DEACTIVATE_MAP(th->map, my_cpu); \ - th->map = new_map; \ - PMAP_ACTIVATE_MAP(th->map, my_cpu); \ -} - -#endif /* NCPUS > 1 */ - #define PMAP_CONTEXT(pmap, thread) #define pmap_kernel_va(VA) \ (((VA) >= VM_MIN_KERNEL_ADDRESS) && ((VA) <= VM_MAX_KERNEL_ADDRESS)) #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) -#define pmap_phys_address(frame) ((vm_offset_t) (intel_ptob(frame))) -#define pmap_phys_to_frame(phys) ((int) (intel_btop(phys))) #define pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr) #define pmap_attribute(pmap,addr,size,attr,value) \ (KERN_INVALID_ADDRESS) diff --git a/osfmk/i386/postcode.h b/osfmk/i386/postcode.h new file mode 100644 index 000000000..d0065f7a6 --- /dev/null +++ b/osfmk/i386/postcode.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _I386_POSTCODE_H_ +#define _I386_POSTCODE_H_ + +#ifndef DEBUG +#include +#endif + +/* Define this to delay about 1 sec after posting each code */ +/* #define POSTCODE_DELAY 1 */ + +/* The POSTCODE is port 0x80 */ +#define POSTPORT 0x80 + +#define SPINCOUNT 100000000 +#define CPU_PAUSE() rep; nop + +#if DEBUG +/* + * Macro to output byte value to postcode, destoying register al. + * Additionally, if POSTCODE_DELAY, spin for about a second. + */ +#if POSTCODE_DELAY +#define POSTCODE_AL \ + outb %al,$(POSTPORT); \ + movl $(SPINCOUNT), %eax; \ +1: \ + CPU_PAUSE(); \ + decl %eax; \ + jne 1b +#else +#define POSTCODE_AL \ + outb %al,$(POSTPORT) +#endif /* POSTCODE_DELAY */ + +#define POSTCODE(XX) \ + mov $(XX), %al; \ + POSTCODE_AL + +/* Output byte value to postcode, without destoying register eax */ +#define POSTCODE_SAVE_EAX(XX) \ + push %eax; \ + POSTCODE(XX); \ + pop %eax + +/* + * Display a 32-bit value to the post card - low byte to high byte + * Entry: value in %ebx + * Exit: %ebx preserved; %eax destroyed + */ +#define POSTCODE32_EBX \ + roll $8, %ebx; \ + movl %ebx, %eax; \ + POSTCODE_AL; \ + \ + roll $8, %ebx; \ + movl %ebx, %eax; \ + POSTCODE_AL; \ + \ + roll $8, %ebx; \ + movl %ebx, %eax; \ + POSTCODE_AL; \ + \ + roll $8, %ebx; \ + movl %ebx, %eax; \ + POSTCODE_AL + +#else /* DEBUG */ +#define POSTCODE_AL +#define POSTCODE(X) +#define POSTCODE32_EBX +#endif /* DEBUG */ + +/* + * The following postcodes are defined for stages of early startup: + */ + +#define PSTART_ENTRY 0xFF +#define PSTART_PAGE_TABLES 0xFE +#define PSTART_BEFORE_PAGING 0xFD +#define VSTART_ENTRY 0xFC +#define VSTART_STACK_SWITCH 0xFB +#define VSTART_EXIT 0xFA +#define I386_INIT_ENTRY 0xF9 +#define CPU_INIT_D 0xF8 +#define PROCESSOR_BOOTSTRAP_D 0xF7 +#define PE_INIT_PLATFORM_D 0xF6 +#define THREAD_BOOTSTRAP_D 0xF5 + +#define SLAVE_PSTART_ENTRY 0xEF +#define REAL_TO_PROT_ENTRY 0xEE +#define REAL_TO_PROT_EXIT 0xED +#define STARTPROG_ENTRY 0xEC +#define STARTPROG_EXIT 0xEB +#define SLAVE_START_ENTRY 0xEA +#define SLAVE_START_EXIT 0xE9 +#define SVSTART_ENTRY 0xE8 +#define SVSTART_DESC_INIT 0xE7 +#define SVSTART_STACK_SWITCH 0xE6 +#define SVSTART_EXIT 0xE5 +#define I386_INIT_SLAVE 0xE4 + +#define MP_KDP_ENTER 0xDB /* Machine in kdp DeBugger */ +#define PANIC_HLT 0xD1 /* Die an early death */ + +#define ACPI_WAKE_START_ENTRY 0xCF +#define ACPI_WAKE_PROT_ENTRY 0xCE +#define ACPI_WAKE_PAGED_ENTRY 0xCD + +#ifndef ASSEMBLER +inline static void +_postcode_delay(uint32_t spincount) +{ + asm volatile("1: \n\t" + " rep; nop; \n\t" + " decl %%eax; \n\t" + " jne 1b" + : : "a" (spincount)); +} +inline static void +_postcode(uint8_t xx) +{ + asm volatile("outb %0, %1" : : "a" (xx), "N" (POSTPORT)); +} +#if DEBUG +inline static void +postcode(uint8_t xx) +{ + _postcode(xx); +#if POSTCODE_DELAY + _postcode_delay(SPINCOUNT); +#endif +} +#else +#define postcode(xx) +#endif +#endif + +#endif /* _I386_POSTCODE_H_ */ diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index a9cf9c8d1..153b6bdbd 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -141,7 +141,9 @@ */ #define CR4_FXS 0x00000200 /* SSE/SSE2 OS supports FXSave */ #define CR4_XMM 0x00000400 /* SSE/SSE2 instructions supported in OS */ +#define CR4_PGE 0x00000080 /* p6: Page Global Enable */ #define CR4_MCE 0x00000040 /* p5: Machine Check Exceptions */ +#define CR4_PAE 0x00000020 /* p5: Physical Address Extensions */ #define CR4_PSE 0x00000010 /* p5: Page Size Extensions */ #define CR4_DE 0x00000008 /* p5: Debugging Extensions */ #define CR4_TSD 0x00000004 /* p5: Time Stamp Disable */ @@ -149,103 +151,89 @@ #define CR4_VME 0x00000001 /* p5: Virtual-8086 Mode Extensions */ #ifndef ASSEMBLER -extern unsigned int get_cr0(void); -extern void set_cr0( - unsigned int value); -extern unsigned int get_cr2(void); -extern unsigned int get_cr3(void); -extern void set_cr3( - unsigned int value); -extern unsigned int get_cr4(void); -extern void set_cr4( - unsigned int value); + +#include +__BEGIN_DECLS #define set_ts() \ set_cr0(get_cr0() | CR0_TS) -extern void clear_ts(void); - -extern unsigned short get_tr(void); -extern void set_tr( - unsigned int seg); -extern unsigned short get_ldt(void); -extern void set_ldt( - unsigned int seg); -#ifdef __GNUC__ -extern __inline__ unsigned int get_cr0(void) +static inline unsigned int get_cr0(void) { register unsigned int cr0; __asm__ volatile("mov %%cr0, %0" : "=r" (cr0)); return(cr0); } -extern __inline__ void set_cr0(unsigned int value) +static inline void set_cr0(unsigned int value) { __asm__ volatile("mov %0, %%cr0" : : "r" (value)); } -extern __inline__ unsigned int get_cr2(void) +static inline unsigned int get_cr2(void) { register unsigned int cr2; __asm__ volatile("mov %%cr2, %0" : "=r" (cr2)); return(cr2); } -#if NCPUS > 1 && AT386 -/* - * get_cr3 and set_cr3 are more complicated for the MPs. cr3 is where - * the cpu number gets stored. The MP versions live in locore.s - */ -#else /* NCPUS > 1 && AT386 */ -extern __inline__ unsigned int get_cr3(void) +static inline unsigned int get_cr3(void) { register unsigned int cr3; __asm__ volatile("mov %%cr3, %0" : "=r" (cr3)); return(cr3); } -extern __inline__ void set_cr3(unsigned int value) +static inline void set_cr3(unsigned int value) { __asm__ volatile("mov %0, %%cr3" : : "r" (value)); } -#endif /* NCPUS > 1 && AT386 */ -extern __inline__ void clear_ts(void) +/* Implemented in locore: */ +extern uint32_t get_cr4(void); +extern void set_cr4(uint32_t); + +static inline void clear_ts(void) { __asm__ volatile("clts"); } -extern __inline__ unsigned short get_tr(void) +static inline unsigned short get_tr(void) { unsigned short seg; __asm__ volatile("str %0" : "=rm" (seg)); return(seg); } -extern __inline__ void set_tr(unsigned int seg) +static inline void set_tr(unsigned int seg) { __asm__ volatile("ltr %0" : : "rm" ((unsigned short)(seg))); } -extern __inline__ unsigned short get_ldt(void) +static inline unsigned short get_ldt(void) { unsigned short seg; __asm__ volatile("sldt %0" : "=rm" (seg)); return(seg); } -extern __inline__ void set_ldt(unsigned int seg) +static inline void set_ldt(unsigned int seg) { __asm__ volatile("lldt %0" : : "rm" ((unsigned short)(seg))); } -extern __inline__ void flush_tlb(void) +static inline void flush_tlb(void) { unsigned long cr3_temp; __asm__ volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (cr3_temp) :: "memory"); } -extern __inline__ void invlpg(unsigned long addr) +static inline void wbinvd(void) +{ + __asm__ volatile("wbinvd"); +} + +static inline void invlpg(unsigned long addr) { __asm__ volatile("invlpg (%0)" :: "r" (addr) : "memory"); } @@ -270,25 +258,34 @@ extern __inline__ void invlpg(unsigned long addr) #define rdpmc(counter,lo,hi) \ __asm__ volatile("rdpmc" : "=a" (lo), "=d" (hi) : "c" (counter)) -extern __inline__ uint64_t rdmsr64(uint32_t msr) +static inline uint64_t rdmsr64(uint32_t msr) { uint64_t ret; __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr)); return ret; } -extern __inline__ void wrmsr64(uint32_t msr, uint64_t val) +static inline void wrmsr64(uint32_t msr, uint64_t val) { __asm__ volatile("wrmsr" : : "c" (msr), "A" (val)); } -extern __inline__ uint64_t rdtsc64(void) +static inline uint64_t rdtsc64(void) { uint64_t ret; __asm__ volatile("rdtsc" : "=A" (ret)); return ret; } -#endif /* __GNUC__ */ + +/* + * rdmsr_carefully() returns 0 when the MSR has been read successfully, + * or non-zero (1) if the MSR does not exist. + * The implementation is in locore.s. + */ +extern int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi); + +__END_DECLS + #endif /* ASSEMBLER */ #define MSR_IA32_P5_MC_ADDR 0 @@ -316,15 +313,35 @@ extern __inline__ uint64_t rdtsc64(void) #define MSR_IA32_EVNTSEL0 0x186 #define MSR_IA32_EVNTSEL1 0x187 +#define MSR_IA32_MISC_ENABLE 0x1a0 + #define MSR_IA32_DEBUGCTLMSR 0x1d9 #define MSR_IA32_LASTBRANCHFROMIP 0x1db #define MSR_IA32_LASTBRANCHTOIP 0x1dc #define MSR_IA32_LASTINTFROMIP 0x1dd #define MSR_IA32_LASTINTTOIP 0x1de +#define MSR_IA32_CR_PAT 0x277 + #define MSR_IA32_MC0_CTL 0x400 #define MSR_IA32_MC0_STATUS 0x401 #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 +#define MSR_IA32_MTRRCAP 0xfe +#define MSR_IA32_MTRR_DEF_TYPE 0x2ff +#define MSR_IA32_MTRR_PHYSBASE(n) (0x200 + 2*(n)) +#define MSR_IA32_MTRR_PHYSMASK(n) (0x200 + 2*(n) + 1) +#define MSR_IA32_MTRR_FIX64K_00000 0x250 +#define MSR_IA32_MTRR_FIX16K_80000 0x258 +#define MSR_IA32_MTRR_FIX16K_A0000 0x259 +#define MSR_IA32_MTRR_FIX4K_C0000 0x268 +#define MSR_IA32_MTRR_FIX4K_C8000 0x269 +#define MSR_IA32_MTRR_FIX4K_D0000 0x26a +#define MSR_IA32_MTRR_FIX4K_D8000 0x26b +#define MSR_IA32_MTRR_FIX4K_E0000 0x26c +#define MSR_IA32_MTRR_FIX4K_E8000 0x26d +#define MSR_IA32_MTRR_FIX4K_F0000 0x26e +#define MSR_IA32_MTRR_FIX4K_F8000 0x26f + #endif /* _I386_PROC_REG_H_ */ diff --git a/osfmk/i386/read_fault.c b/osfmk/i386/read_fault.c index 1af4a97f2..67dc98358 100644 --- a/osfmk/i386/read_fault.c +++ b/osfmk/i386/read_fault.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -84,20 +84,19 @@ intel_read_fault( vm_object_offset_t offset; /* Top-level offset */ vm_prot_t prot; /* Protection for mapping */ vm_behavior_t behavior; /* Expected paging behavior */ - vm_object_offset_t lo_offset, hi_offset; + vm_map_offset_t lo_offset, hi_offset; vm_page_t result_page; /* Result of vm_fault_page */ vm_page_t top_page; /* Placeholder page */ boolean_t wired; /* Is map region wired? */ kern_return_t result; register vm_page_t m; - vm_map_t pmap_map; + vm_map_t map_pmap; vm_map_t original_map = map; thread_t cur_thread; boolean_t funnel_set; - funnel_t *curflock; + funnel_t *curflock = NULL; cur_thread = current_thread(); - if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) { funnel_set = TRUE; curflock = cur_thread->funnel_lock; @@ -118,7 +117,7 @@ intel_read_fault( result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ, &version, &object, &offset, &prot, &wired, &behavior, &lo_offset, - &hi_offset, &pmap_map); + &hi_offset, &map_pmap); vm_map_unlock_read(map); @@ -128,9 +127,9 @@ intel_read_fault( return (result); } - if(pmap_map != map) { - vm_map_reference(pmap_map); - vm_map_unlock_read(pmap_map); + if(map_pmap != map) { + vm_map_reference(map_pmap); + vm_map_unlock_read(map_pmap); } /* @@ -150,8 +149,8 @@ intel_read_fault( if (result != VM_FAULT_SUCCESS) { vm_object_deallocate(object); - if(pmap_map != map) { - vm_map_deallocate(pmap_map); + if(map_pmap != map) { + vm_map_deallocate(map_pmap); } switch (result) { @@ -209,8 +208,8 @@ intel_read_fault( vm_object_offset_t retry_offset; vm_prot_t retry_prot; - if (map != pmap_map) { - vm_map_deallocate(pmap_map); + if (map != map_pmap) { + vm_map_deallocate(map_pmap); } map = original_map; @@ -219,7 +218,7 @@ intel_read_fault( result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ, &version, &retry_object, &retry_offset, &retry_prot, &wired, &behavior, &lo_offset, - &hi_offset, &pmap_map); + &hi_offset, &map_pmap); if (result != KERN_SUCCESS) { vm_map_unlock_read(map); @@ -231,8 +230,8 @@ intel_read_fault( return (result); } - if (map != pmap_map) { - vm_map_reference(pmap_map); + if (map != map_pmap) { + vm_map_reference(map_pmap); } vm_object_unlock(retry_object); @@ -241,9 +240,9 @@ intel_read_fault( vm_object_lock(m->object); RELEASE_PAGE(m); vm_map_unlock_read(map); - if(pmap_map != map) { - vm_map_unlock_read(pmap_map); - vm_map_deallocate(pmap_map); + if(map_pmap != map) { + vm_map_unlock_read(map_pmap); + vm_map_deallocate(map_pmap); } UNLOCK_AND_DEALLOCATE; goto RetryFault; @@ -254,11 +253,11 @@ intel_read_fault( * Put the page in the physical map. */ - PMAP_ENTER(pmap_map->pmap, vaddr, m, VM_PROT_READ, PMAP_DEFAULT_CACHE, wired); + PMAP_ENTER(map_pmap->pmap, vaddr, m, VM_PROT_READ, PMAP_DEFAULT_CACHE, wired); - if(pmap_map != map) { - vm_map_unlock_read(pmap_map); - vm_map_deallocate(pmap_map); + if(map_pmap != map) { + vm_map_unlock_read(map_pmap); + vm_map_deallocate(map_pmap); } vm_object_lock(m->object); diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 927802905..d6368afa6 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,24 +26,26 @@ /* * File: i386/rtclock.c * Purpose: Routines for handling the machine dependent - * real-time clock. This clock is generated by - * the Intel 8254 Programmable Interval Timer. + * real-time clock. Historically, this clock is + * generated by the Intel 8254 Programmable Interval + * Timer, but local apic timers are now used for + * this purpose with the master time reference being + * the cpu clock counted by the timestamp MSR. */ -#include #include #include #include -#include #include +#include #include #include #include #include #include -#include /* HZ */ +#include #include #include #include /* for kernel_map */ @@ -51,15 +53,26 @@ #include #include #include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include + +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define MIN(a,b) (((a)>(b))?(b):(a)) -#define DISPLAYENTER(x) printf("[RTCLOCK] entering " #x "\n"); -#define DISPLAYEXIT(x) printf("[RTCLOCK] leaving " #x "\n"); -#define DISPLAYVALUE(x,y) printf("[RTCLOCK] " #x ":" #y " = 0x%08x \n",y); +#define NSEC_PER_HZ (NSEC_PER_SEC / 100) /* nsec per tick */ + +#define UI_CPUFREQ_ROUNDING_FACTOR 10000000 int sysclk_config(void); @@ -73,23 +86,16 @@ kern_return_t sysclk_getattr( clock_attr_t attr, mach_msg_type_number_t *count); -kern_return_t sysclk_setattr( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t count); - void sysclk_setalarm( mach_timespec_t *alarm_time); -extern void (*IOKitRegisterInterruptHook)(void *, int irq, int isclock); - /* * Lists of clock routines. */ struct clock_ops sysclk_ops = { sysclk_config, sysclk_init, sysclk_gettime, 0, - sysclk_getattr, sysclk_setattr, + sysclk_getattr, 0, sysclk_setalarm, }; @@ -113,60 +119,59 @@ struct clock_ops calend_ops = { }; /* local data declarations */ -mach_timespec_t *RtcTime = (mach_timespec_t *)0; -mach_timespec_t *RtcAlrm; -clock_res_t RtcDelt; -/* global data declarations */ -struct { - uint64_t abstime; +static clock_timer_func_t rtclock_timer_expire; + +static timer_call_data_t rtclock_alarm_timer; - mach_timespec_t time; - mach_timespec_t alarm_time; /* time of next alarm */ +static void rtclock_alarm_expire( + timer_call_param_t p0, + timer_call_param_t p1); - mach_timespec_t calend_offset; +struct { + mach_timespec_t calend_offset; boolean_t calend_is_set; int64_t calend_adjtotal; int32_t calend_adjdelta; - uint64_t timer_deadline; - boolean_t timer_is_set; - clock_timer_func_t timer_expire; + uint32_t boottime; - clock_res_t new_ires; /* pending new resolution (nano ) */ - clock_res_t intr_nsec; /* interrupt resolution (nano) */ mach_timebase_info_data_t timebase_const; decl_simple_lock_data(,lock) /* real-time clock device lock */ } rtclock; -unsigned int clknum; /* clks per second */ -unsigned int new_clknum; /* pending clknum */ -unsigned int time_per_clk; /* time per clk in ZHZ */ -unsigned int clks_per_int; /* clks per interrupt */ -unsigned int clks_per_int_99; -int rtc_intr_count; /* interrupt counter */ -int rtc_intr_hertz; /* interrupts per HZ */ -int rtc_intr_freq; /* interrupt frequency */ -int rtc_print_lost_tick; /* print lost tick */ +boolean_t rtc_initialized = FALSE; +clock_res_t rtc_intr_nsec = NSEC_PER_HZ; /* interrupt res */ +uint64_t rtc_cycle_count; /* clocks in 1/20th second */ +uint64_t rtc_cyc_per_sec; /* processor cycles per sec */ +uint32_t rtc_boot_frequency; /* provided by 1st speed-step */ +uint32_t rtc_quant_scale; /* clock to nanos multiplier */ +uint32_t rtc_quant_shift; /* clock to nanos right shift */ +uint64_t rtc_decrementer_min; -uint32_t rtc_cyc_per_sec; /* processor cycles per seconds */ -uint32_t rtc_quant_scale; /* used internally to convert clocks to nanos */ +static mach_timebase_info_data_t rtc_lapic_scale; /* nsec to lapic count */ /* - * Macros to lock/unlock real-time clock device. + * Macros to lock/unlock real-time clock data. */ -#define LOCK_RTC(s) \ -MACRO_BEGIN \ - (s) = splclock(); \ - simple_lock(&rtclock.lock); \ +#define RTC_INTRS_OFF(s) \ + (s) = splclock() + +#define RTC_INTRS_ON(s) \ + splx(s) + +#define RTC_LOCK(s) \ +MACRO_BEGIN \ + RTC_INTRS_OFF(s); \ + simple_lock(&rtclock.lock); \ MACRO_END -#define UNLOCK_RTC(s) \ -MACRO_BEGIN \ +#define RTC_UNLOCK(s) \ +MACRO_BEGIN \ simple_unlock(&rtclock.lock); \ - splx(s); \ + RTC_INTRS_ON(s); \ MACRO_END /* @@ -175,121 +180,62 @@ MACRO_END * The i8254 is a traditional PC device with some arbitrary characteristics. * Basically, it is a register that counts at a fixed rate and can be * programmed to generate an interrupt every N counts. The count rate is - * clknum counts per second (see pit.h), historically 1193167 we believe. + * clknum counts per sec (see pit.h), historically 1193167=14.318MHz/12 + * but the more accurate value is 1193182=14.31818MHz/12. [14.31818 MHz being + * the master crystal oscillator reference frequency since the very first PC.] * Various constants are computed based on this value, and we calculate * them at init time for execution efficiency. To obtain sufficient * accuracy, some of the calculation are most easily done in floating * point and then converted to int. * - * We want an interrupt every 10 milliseconds, approximately. The count - * which will do that is clks_per_int. However, that many counts is not - * *exactly* 10 milliseconds; it is a bit more or less depending on - * roundoff. The actual time per tick is calculated and saved in - * rtclock.intr_nsec, and it is that value which is added to the time - * register on each tick. - * - * The i8254 counter can be read between interrupts in order to determine - * the time more accurately. The counter counts down from the preset value - * toward 0, and we have to handle the case where the counter has been - * reset just before being read and before the interrupt has been serviced. - * Given a count since the last interrupt, the time since then is given - * by (count * time_per_clk). In order to minimize integer truncation, - * we perform this calculation in an arbitrary unit of time which maintains - * the maximum precision, i.e. such that one tick is 1.0e9 of these units, - * or close to the precision of a 32-bit int. We then divide by this unit - * (which doesn't lose precision) to get nanoseconds. For notation - * purposes, this unit is defined as ZHZ = zanoseconds per nanosecond. - * - * This sequence to do all this is in sysclk_gettime. For efficiency, this - * sequence also needs the value that the counter will have if it has just - * overflowed, so we precompute that also. - * - * The fix for certain really old certain platforms has been removed - * (specifically the DEC XL5100) have been observed to have problem - * with latching the counter, and they occasionally (say, one out of - * 100,000 times) return a bogus value. Hence, the present code reads - * the counter twice and checks for a consistent pair of values. - * the code was: - * do { - * READ_8254(val); - * READ_8254(val2); - * } while ( val2 > val || val2 < val - 10 ); - * - * - * Some attributes of the rt clock can be changed, including the - * interrupt resolution. We default to the minimum resolution (10 ms), - * but allow a finer resolution to be requested. The assumed frequency - * of the clock can also be set since it appears that the actual - * frequency of real-world hardware can vary from the nominal by - * 200 ppm or more. When the frequency is set, the values above are - * recomputed and we continue without resetting or changing anything else. */ -#define RTC_MINRES (NSEC_PER_SEC / HZ) /* nsec per tick */ -#define RTC_MAXRES (RTC_MINRES / 20) /* nsec per tick */ -#define ZANO (1000000000) -#define ZHZ (ZANO / (NSEC_PER_SEC / HZ)) -#define READ_8254(val) { \ - outb(PITCTL_PORT, PIT_C0); \ - (val) = inb(PITCTR0_PORT); \ - (val) |= inb(PITCTR0_PORT) << 8 ; } - -#define UI_CPUFREQ_ROUNDING_FACTOR 10000000 - /* * Forward decl. */ -void rtc_setvals( unsigned int, clock_res_t ); - -static void rtc_set_cyc_per_sec(); - -/* define assembly routines */ - +static uint64_t rtc_set_cyc_per_sec(uint64_t cycles); +uint64_t rtc_nanotime_read(void); /* - * Inlines to get timestamp counter value. + * create_mul_quant_GHZ + * create a constant used to multiply the TSC by to convert to nanoseconds. + * This is a 32 bit number and the TSC *MUST* have a frequency higher than + * 1000Mhz for this routine to work. + * + * The theory here is that we know how many TSCs-per-sec the processor runs at. + * Normally to convert this to nanoseconds you would multiply the current + * timestamp by 1000000000 (a billion) then divide by TSCs-per-sec. + * Unfortunatly the TSC is 64 bits which would leave us with 96 bit intermediate + * results from the multiply that must be divided by. + * Usually thats + * uint96 = tsc * numer + * nanos = uint96 / denom + * Instead, we create this quant constant and it becomes the numerator, + * the denominator can then be 0x100000000 which makes our division as simple as + * forgetting the lower 32 bits of the result. We can also pass this number to + * user space as the numer and pass 0xFFFFFFFF (RTC_FAST_DENOM) as the denom to + * convert raw counts * to nanos. The difference is so small as to be + * undetectable by anything. + * + * Unfortunatly we can not do this for sub GHZ processors. In this case, all + * we do is pass the CPU speed in raw as the denom and we pass in 1000000000 + * as the numerator. No short cuts allowed */ - -inline static uint64_t -rdtsc_64(void) -{ - uint64_t result; - asm volatile("rdtsc": "=A" (result)); - return result; -} - -// create_mul_quant_GHZ create a constant that can be used to multiply -// the TSC by to create nanoseconds. This is a 32 bit number -// and the TSC *MUST* have a frequency higher than 1000Mhz for this routine to work -// -// The theory here is that we know how many TSCs-per-sec the processor runs at. Normally to convert this -// to nanoseconds you would multiply the current time stamp by 1000000000 (a billion) then divide -// by TSCs-per-sec to get nanoseconds. Unfortunatly the TSC is 64 bits which would leave us with -// 96 bit intermediate results from the dultiply that must be divided by. -// usually thats -// uint96 = tsc * numer -// nanos = uint96 / denom -// Instead, we create this quant constant and it becomes the numerator, the denominator -// can then be 0x100000000 which makes our division as simple as forgetting the lower 32 bits -// of the result. We can also pass this number to user space as the numer and pass 0xFFFFFFFF -// as the denom to converting raw counts to nanos. the difference is so small as to be undetectable -// by anything. -// unfortunatly we can not do this for sub GHZ processors. In that case, all we do is pass the CPU -// speed in raw as the denom and we pass in 1000000000 as the numerator. No short cuts allowed - +#define RTC_FAST_DENOM 0xFFFFFFFF inline static uint32_t -create_mul_quant_GHZ(uint32_t quant) +create_mul_quant_GHZ(int shift, uint32_t quant) { - return (uint32_t)((50000000ULL << 32) / quant); + return (uint32_t)((((uint64_t)NSEC_PER_SEC/20) << shift) / quant); } - -// this routine takes a value of raw TSC ticks and applies the passed mul_quant -// generated by create_mul_quant() This is our internal routine for creating -// nanoseconds -// since we don't really have uint96_t this routine basically does this.... -// uint96_t intermediate = (*value) * scale -// return (intermediate >> 32) +/* + * This routine takes a value of raw TSC ticks and applies the passed mul_quant + * generated by create_mul_quant() This is our internal routine for creating + * nanoseconds. + * Since we don't really have uint96_t this routine basically does this.... + * uint96_t intermediate = (*value) * scale + * return (intermediate >> 32) + */ inline static uint64_t fast_get_nano_from_abs(uint64_t value, int scale) { @@ -308,7 +254,7 @@ fast_get_nano_from_abs(uint64_t value, int scale) } /* - * this routine basically does this... + * This routine basically does this... * ts.tv_sec = nanos / 1000000000; create seconds * ts.tv_nsec = nanos % 1000000000; create remainder nanos */ @@ -324,11 +270,14 @@ nanos_to_timespec(uint64_t nanos) return ret.ts; } -// the following two routine perform the 96 bit arithmetic we need to -// convert generic absolute<->nanoseconds -// the multiply routine takes a uint64_t and a uint32_t and returns the result in a -// uint32_t[3] array. the dicide routine takes this uint32_t[3] array and -// divides it by a uint32_t returning a uint64_t +/* + * The following two routines perform the 96 bit arithmetic we need to + * convert generic absolute<->nanoseconds + * The multiply routine takes a uint64_t and a uint32_t and returns the result + * in a uint32_t[3] array. + * The divide routine takes this uint32_t[3] array and divides it by a uint32_t + * returning a uint64_t + */ inline static void longmul(uint64_t *abstime, uint32_t multiplicand, uint32_t *result) { @@ -349,7 +298,7 @@ longmul(uint64_t *abstime, uint32_t multiplicand, uint32_t *result) " movl %%eax,4(%%ecx) \n\t" " adcl $0,%%edx \n\t" " movl %%edx,8(%%ecx) // and save it" - : : "a"(abstime), "c"(multiplicand), "m"(result)); + : : "a"(abstime), "c"(multiplicand), "m"(result)); } @@ -372,133 +321,165 @@ longdiv(uint32_t *numer, uint32_t denom) return result; } -#define PIT_Mode4 0x08 /* turn on mode 4 one shot software trigger */ - -// Enable or disable timer 2. +/* + * Enable or disable timer 2. + * Port 0x61 controls timer 2: + * bit 0 gates the clock, + * bit 1 gates output to speaker. + */ inline static void -enable_PIT2() +enable_PIT2(void) { asm volatile( - " inb $97,%%al \n\t" - " and $253,%%al \n\t" + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" " or $1,%%al \n\t" - " outb %%al,$97 \n\t" - : : : "%al" ); + " outb %%al,$0x61 \n\t" + : : : "%al" ); } inline static void -disable_PIT2() +disable_PIT2(void) { asm volatile( - " inb $97,%%al \n\t" - " and $253,%%al \n\t" - " outb %%al,$97 \n\t" + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" + " outb %%al,$0x61 \n\t" : : : "%al" ); } -// ctimeRDTSC() routine sets up counter 2 to count down 1/20 of a second -// it pauses until the value is latched in the counter -// and then reads the time stamp counter to return to the caller -// utility routine -// Code to calculate how many processor cycles are in a second... inline static void set_PIT2(int value) { -// first, tell the clock we are going to write 16 bytes to the counter and enable one-shot mode -// then write the two bytes into the clock register. -// loop until the value is "realized" in the clock, this happens on the next tick -// +/* + * First, tell the clock we are going to write 16 bits to the counter + * and enable one-shot mode (command 0xB8 to port 0x43) + * Then write the two bytes into the PIT2 clock register (port 0x42). + * Loop until the value is "realized" in the clock, + * this happens on the next tick. + */ asm volatile( - " movb $184,%%al \n\t" - " outb %%al,$67 \n\t" + " movb $0xB8,%%al \n\t" + " outb %%al,$0x43 \n\t" " movb %%dl,%%al \n\t" - " outb %%al,$66 \n\t" + " outb %%al,$0x42 \n\t" " movb %%dh,%%al \n\t" - " outb %%al,$66 \n" -"1: inb $66,%%al \n\t" - " inb $66,%%al \n\t" + " outb %%al,$0x42 \n" +"1: inb $0x42,%%al \n\t" + " inb $0x42,%%al \n\t" " cmp %%al,%%dh \n\t" " jne 1b" - : : "d"(value) : "%al"); + : : "d"(value) : "%al"); } inline static uint64_t get_PIT2(unsigned int *value) { -// this routine first latches the time, then gets the time stamp so we know -// how long the read will take later. Reads register uint64_t result; +/* + * This routine first latches the time (command 0x80 to port 0x43), + * then gets the time stamp so we know how long the read will take later. + * Read (from port 0x42) and return the current value of the timer. + */ asm volatile( " xorl %%ecx,%%ecx \n\t" - " movb $128,%%al \n\t" - " outb %%al,$67 \n\t" + " movb $0x80,%%al \n\t" + " outb %%al,$0x43 \n\t" " rdtsc \n\t" " pushl %%eax \n\t" - " inb $66,%%al \n\t" + " inb $0x42,%%al \n\t" " movb %%al,%%cl \n\t" - " inb $66,%%al \n\t" + " inb $0x42,%%al \n\t" " movb %%al,%%ch \n\t" " popl %%eax " - : "=A"(result), "=c"(*value)); - return result; + : "=A"(result), "=c"(*value)); + return result; } -static uint32_t +/* + * timeRDTSC() + * This routine sets up PIT counter 2 to count down 1/20 of a second. + * It pauses until the value is latched in the counter + * and then reads the time stamp counter to return to the caller. + */ +static uint64_t timeRDTSC(void) { + int attempts = 0; uint64_t latchTime; uint64_t saveTime,intermediate; - unsigned int timerValue,x; + unsigned int timerValue, lastValue; boolean_t int_enabled; - uint64_t fact[6] = { 2000011734ll, - 2000045259ll, - 2000078785ll, - 2000112312ll, - 2000145841ll, - 2000179371ll}; + /* + * Table of correction factors to account for + * - timer counter quantization errors, and + * - undercounts 0..5 + */ +#define SAMPLE_CLKS_EXACT (((double) CLKNUM) / 20.0) +#define SAMPLE_CLKS_INT ((int) CLKNUM / 20) +#define SAMPLE_NSECS (2000000000LL) +#define SAMPLE_MULTIPLIER (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT) +#define ROUND64(x) ((uint64_t)((x) + 0.5)) + uint64_t scale[6] = { + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5)) + }; int_enabled = ml_set_interrupts_enabled(FALSE); +restart: + if (attempts >= 2) + panic("timeRDTSC() calibation failed with %d attempts\n", attempts); + attempts++; enable_PIT2(); // turn on PIT2 set_PIT2(0); // reset timer 2 to be zero - latchTime = rdtsc_64(); // get the time stamp to time + latchTime = rdtsc64(); // get the time stamp to time latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes - set_PIT2(59658); // set up the timer to count 1/20th a second - saveTime = rdtsc_64(); // now time how ling a 20th a second is... - get_PIT2(&x); - do { get_PIT2(&timerValue); x = timerValue;} while (timerValue > x); + set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second + saveTime = rdtsc64(); // now time how long a 20th a second is... + get_PIT2(&lastValue); + get_PIT2(&lastValue); // read twice, first value may be unreliable do { intermediate = get_PIT2(&timerValue); - if (timerValue>x) printf("Hey we are going backwards! %d, %d\n",timerValue,x); - x = timerValue; - } while ((timerValue != 0) && (timerValue >5)); - printf("Timer value:%d\n",timerValue); - printf("intermediate 0x%08x:0x%08x\n",intermediate); - printf("saveTime 0x%08x:0x%08x\n",saveTime); + if (timerValue > lastValue) { + printf("Hey we are going backwards! %u -> %u, restarting timing\n", + timerValue,lastValue); + set_PIT2(0); + disable_PIT2(); + goto restart; + } + lastValue = timerValue; + } while (timerValue > 5); + kprintf("timerValue %d\n",timerValue); + kprintf("intermediate 0x%016llx\n",intermediate); + kprintf("saveTime 0x%016llx\n",saveTime); - intermediate = intermediate - saveTime; // raw # of tsc's it takes for about 1/20 second - intermediate = intermediate * fact[timerValue]; // actual time spent - intermediate = intermediate / 2000000000ll; // rescale so its exactly 1/20 a second - intermediate = intermediate + latchTime; // add on our save fudge - set_PIT2(0); // reset timer 2 to be zero - disable_PIT2(0); // turn off PIT 2 + intermediate -= saveTime; // raw count for about 1/20 second + intermediate *= scale[timerValue]; // rescale measured time spent + intermediate /= SAMPLE_NSECS; // so its exactly 1/20 a second + intermediate += latchTime; // add on our save fudge + + set_PIT2(0); // reset timer 2 to be zero + disable_PIT2(); // turn off PIT 2 + ml_set_interrupts_enabled(int_enabled); return intermediate; } static uint64_t -rdtsctime_to_nanoseconds( void ) +tsc_to_nanoseconds(uint64_t abstime) { uint32_t numer; uint32_t denom; - uint64_t abstime; - uint32_t intermediate[3]; numer = rtclock.timebase_const.numer; denom = rtclock.timebase_const.denom; - abstime = rdtsc_64(); - if (denom == 0xFFFFFFFF) { + if (denom == RTC_FAST_DENOM) { abstime = fast_get_nano_from_abs(abstime, numer); } else { longmul(&abstime, numer, intermediate); @@ -508,56 +489,105 @@ rdtsctime_to_nanoseconds( void ) } inline static mach_timespec_t -rdtsc_to_timespec(void) +tsc_to_timespec(void) { uint64_t currNanos; - currNanos = rdtsctime_to_nanoseconds(); + currNanos = rtc_nanotime_read(); return nanos_to_timespec(currNanos); } -/* - * Initialize non-zero clock structure values. - */ -void -rtc_setvals( - unsigned int new_clknum, - clock_res_t new_ires - ) +#define DECREMENTER_MAX UINT_MAX +static uint32_t +deadline_to_decrementer( + uint64_t deadline, + uint64_t now) +{ + uint64_t delta; + + if (deadline <= now) + return rtc_decrementer_min; + else { + delta = deadline - now; + return MIN(MAX(rtc_decrementer_min,delta),DECREMENTER_MAX); + } +} + +static inline uint64_t +lapic_time_countdown(uint32_t initial_count) { - unsigned int timeperclk; - unsigned int scale0; - unsigned int scale1; - unsigned int res; + boolean_t state; + uint64_t start_time; + uint64_t stop_time; + lapic_timer_count_t count; + + state = ml_set_interrupts_enabled(FALSE); + lapic_set_timer(FALSE, one_shot, divide_by_1, initial_count); + start_time = rdtsc64(); + do { + lapic_get_timer(NULL, NULL, NULL, &count); + } while (count > 0); + stop_time = rdtsc64(); + ml_set_interrupts_enabled(state); - clknum = new_clknum; - rtc_intr_freq = (NSEC_PER_SEC / new_ires); - rtc_intr_hertz = rtc_intr_freq / HZ; - clks_per_int = (clknum + (rtc_intr_freq / 2)) / rtc_intr_freq; - clks_per_int_99 = clks_per_int - clks_per_int/100; + return tsc_to_nanoseconds(stop_time - start_time); +} - /* - * The following calculations are done with scaling integer operations - * in order that the integer results are accurate to the lsb. - */ - timeperclk = div_scale(ZANO, clknum, &scale0); /* 838.105647 nsec */ +static void +rtc_lapic_timer_calibrate(void) +{ + uint32_t nsecs; + uint64_t countdown; - time_per_clk = mul_scale(ZHZ, timeperclk, &scale1); /* 83810 */ - if (scale0 > scale1) - time_per_clk >>= (scale0 - scale1); - else if (scale0 < scale1) - panic("rtc_clock: time_per_clk overflow\n"); + if (!(cpuid_features() & CPUID_FEATURE_APIC)) + return; - /* - * Notice that rtclock.intr_nsec is signed ==> use unsigned int res - */ - res = mul_scale(clks_per_int, timeperclk, &scale1); /* 10000276 */ - if (scale0 > scale1) - rtclock.intr_nsec = res >> (scale0 - scale1); - else - panic("rtc_clock: rtclock.intr_nsec overflow\n"); - - rtc_intr_count = 1; - RtcDelt = rtclock.intr_nsec/2; + /* + * Set the local apic timer counting down to zero without an interrupt. + * Use the timestamp to calculate how long this takes. + */ + nsecs = (uint32_t) lapic_time_countdown(rtc_intr_nsec); + + /* + * Compute a countdown ratio for a given time in nanoseconds. + * That is, countdown = time * numer / denom. + */ + countdown = (uint64_t)rtc_intr_nsec * (uint64_t)rtc_intr_nsec / nsecs; + + nsecs = (uint32_t) lapic_time_countdown((uint32_t) countdown); + + rtc_lapic_scale.numer = countdown; + rtc_lapic_scale.denom = nsecs; + + kprintf("rtc_lapic_timer_calibrate() scale: %d/%d\n", + (uint32_t) countdown, nsecs); +} + +static void +rtc_lapic_set_timer( + uint32_t interval) +{ + uint64_t count; + + assert(rtc_lapic_scale.denom); + + count = interval * (uint64_t) rtc_lapic_scale.numer; + count /= rtc_lapic_scale.denom; + + lapic_set_timer(TRUE, one_shot, divide_by_1, (uint32_t) count); +} + +static void +rtc_lapic_start_ticking(void) +{ + uint64_t abstime; + uint64_t first_tick; + uint64_t decr; + + abstime = mach_absolute_time(); + first_tick = abstime + NSEC_PER_HZ; + current_cpu_datap()->cpu_rtc_tick_deadline = first_tick; + decr = deadline_to_decrementer(first_tick, abstime); + rtc_lapic_set_timer(decr); } /* @@ -568,146 +598,388 @@ rtc_setvals( int sysclk_config(void) { - int RtcFlag; - int pic; -#if NCPUS > 1 mp_disable_preemption(); if (cpu_number() != master_cpu) { mp_enable_preemption(); return(1); } mp_enable_preemption(); -#endif + + timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL); + + simple_lock_init(&rtclock.lock, 0); + + return (1); +} + + +/* + * Nanotime/mach_absolutime_time + * ----------------------------- + * The timestamp counter (tsc) - which counts cpu clock cycles and can be read + * efficient by the kernel and in userspace - is the reference for all timing. + * However, the cpu clock rate is not only platform-dependent but can change + * (speed-step) dynamically. Hence tsc is converted into nanoseconds which is + * identical to mach_absolute_time. The conversion to tsc to nanoseconds is + * encapsulated by nanotime. + * + * The kernel maintains nanotime information recording: + * - the current ratio of tsc to nanoseconds + * with this ratio expressed as a 32-bit scale and shift + * (power of 2 divider); + * - the tsc (step_tsc) and nanotime (step_ns) at which the current + * ratio (clock speed) began. + * So a tsc value can be converted to nanotime by: + * + * nanotime = (((tsc - step_tsc)*scale) >> shift) + step_ns + * + * In general, (tsc - step_tsc) is a 64-bit quantity with the scaling + * involving a 96-bit intermediate value. However, by saving the converted + * values at each tick (or at any intervening speed-step) - base_tsc and + * base_ns - we can perform conversions relative to these and be assured that + * (tsc - tick_tsc) is 32-bits. Hence: + * + * fast_nanotime = (((tsc - base_tsc)*scale) >> shift) + base_ns + * + * The tuple {base_tsc, base_ns, scale, shift} is exported in the commpage + * for the userspace nanotime routine to read. A duplicate check_tsc is + * appended so that the consistency of the read can be verified. Note that + * this scheme is essential for MP systems in which the commpage is updated + * by the master cpu but may be read concurrently by other cpus. + * + */ +static inline void +rtc_nanotime_set_commpage(rtc_nanotime_t *rntp) +{ + commpage_nanotime_t cp_nanotime; + + /* Only the master cpu updates the commpage */ + if (cpu_number() != master_cpu) + return; + + cp_nanotime.nt_base_tsc = rntp->rnt_tsc; + cp_nanotime.nt_base_ns = rntp->rnt_nanos; + cp_nanotime.nt_scale = rntp->rnt_scale; + cp_nanotime.nt_shift = rntp->rnt_shift; + + commpage_set_nanotime(&cp_nanotime); +} + +static void +rtc_nanotime_init(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + rtc_nanotime_t *master_rntp = &cpu_datap(master_cpu)->cpu_rtc_nanotime; + + if (cpu_number() == master_cpu) { + rntp->rnt_tsc = rdtsc64(); + rntp->rnt_nanos = tsc_to_nanoseconds(rntp->rnt_tsc); + rntp->rnt_scale = rtc_quant_scale; + rntp->rnt_shift = rtc_quant_shift; + rntp->rnt_step_tsc = 0ULL; + rntp->rnt_step_nanos = 0ULL; + } else { + /* + * Copy master processor's nanotime info. + * Loop required in case this changes while copying. + */ + do { + *rntp = *master_rntp; + } while (rntp->rnt_tsc != master_rntp->rnt_tsc); + } +} + +static inline void +_rtc_nanotime_update(rtc_nanotime_t *rntp, uint64_t tsc) +{ + uint64_t tsc_delta; + uint64_t ns_delta; + + tsc_delta = tsc - rntp->rnt_step_tsc; + ns_delta = tsc_to_nanoseconds(tsc_delta); + rntp->rnt_nanos = rntp->rnt_step_nanos + ns_delta; + rntp->rnt_tsc = tsc; +} + +static void +rtc_nanotime_update(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + + assert(get_preemption_level() > 0); + assert(!ml_get_interrupts_enabled()); + + _rtc_nanotime_update(rntp, rdtsc64()); + rtc_nanotime_set_commpage(rntp); +} + +static void +rtc_nanotime_scale_update(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + uint64_t tsc = rdtsc64(); + + assert(!ml_get_interrupts_enabled()); + + /* + * Update time based on past scale. + */ + _rtc_nanotime_update(rntp, tsc); + /* - * Setup device. + * Update scale and timestamp this update. */ - pic = 0; /* FIXME .. interrupt registration moved to AppleIntelClock */ + rntp->rnt_scale = rtc_quant_scale; + rntp->rnt_shift = rtc_quant_shift; + rntp->rnt_step_tsc = rntp->rnt_tsc; + rntp->rnt_step_nanos = rntp->rnt_nanos; + /* Export update to userland */ + rtc_nanotime_set_commpage(rntp); +} + +static uint64_t +_rtc_nanotime_read(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + uint64_t rnt_tsc; + uint32_t rnt_scale; + uint32_t rnt_shift; + uint64_t rnt_nanos; + uint64_t tsc; + uint64_t tsc_delta; + + rnt_scale = rntp->rnt_scale; + if (rnt_scale == 0) + return 0ULL; + + rnt_shift = rntp->rnt_shift; + rnt_nanos = rntp->rnt_nanos; + rnt_tsc = rntp->rnt_tsc; + tsc = rdtsc64(); + + tsc_delta = tsc - rnt_tsc; + if ((tsc_delta >> 32) != 0) + return rnt_nanos + tsc_to_nanoseconds(tsc_delta); + + /* Let the compiler optimize(?): */ + if (rnt_shift == 32) + return rnt_nanos + ((tsc_delta * rnt_scale) >> 32); + else + return rnt_nanos + ((tsc_delta * rnt_scale) >> rnt_shift); +} + +uint64_t +rtc_nanotime_read(void) +{ + uint64_t result; + uint64_t rnt_tsc; + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; /* - * We should attempt to test the real-time clock - * device here. If it were to fail, we should panic - * the system. + * Use timestamp to ensure the uptime record isn't changed. + * This avoids disabling interrupts. + * And not this is a per-cpu structure hence no locking. */ - RtcFlag = /* test device */1; - printf("realtime clock configured\n"); + do { + rnt_tsc = rntp->rnt_tsc; + result = _rtc_nanotime_read(); + } while (rnt_tsc != rntp->rnt_tsc); + + return result; +} + + +/* + * This function is called by the speed-step driver when a + * change of cpu clock frequency is about to occur. + * The scale is not changed until rtc_clock_stepped() is called. + * Between these times there is an uncertainty is exactly when + * the change takes effect. FIXME: by using another timing source + * we could eliminate this error. + */ +void +rtc_clock_stepping(__unused uint32_t new_frequency, + __unused uint32_t old_frequency) +{ + boolean_t istate; - simple_lock_init(&rtclock.lock, ETAP_NO_TRACE); - return (RtcFlag); + istate = ml_set_interrupts_enabled(FALSE); + rtc_nanotime_scale_update(); + ml_set_interrupts_enabled(istate); +} + +/* + * This function is called by the speed-step driver when a + * change of cpu clock frequency has just occured. This change + * is expressed as a ratio relative to the boot clock rate. + */ +void +rtc_clock_stepped(uint32_t new_frequency, uint32_t old_frequency) +{ + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + if (rtc_boot_frequency == 0) { + /* + * At the first ever stepping, old frequency is the real + * initial clock rate. This step and all others are based + * relative to this initial frequency at which the tsc + * calibration was made. Hence we must remember this base + * frequency as reference. + */ + rtc_boot_frequency = old_frequency; + } + rtc_set_cyc_per_sec(rtc_cycle_count * new_frequency / + rtc_boot_frequency); + rtc_nanotime_scale_update(); + ml_set_interrupts_enabled(istate); } /* - * Initialize the real-time clock device. Return success (1) - * or failure (0). Since the real-time clock is required to - * provide canonical mapped time, we allocate a page to keep - * the clock time value. In addition, various variables used - * to support the clock are initialized. Note: the clock is - * not started until rtclock_reset is called. + * rtc_sleep_wakeup() is called from acpi on awakening from a S3 sleep + */ +void +rtc_sleep_wakeup(void) +{ + rtc_nanotime_t *rntp = ¤t_cpu_datap()->cpu_rtc_nanotime; + + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + + /* + * Reset nanotime. + * The timestamp counter will have been reset + * but nanotime (uptime) marches onward. + * We assume that we're still at the former cpu frequency. + */ + rntp->rnt_tsc = rdtsc64(); + rntp->rnt_step_tsc = 0ULL; + rntp->rnt_step_nanos = rntp->rnt_nanos; + rtc_nanotime_set_commpage(rntp); + + /* Restart tick interrupts from the LAPIC timer */ + rtc_lapic_start_ticking(); + + ml_set_interrupts_enabled(istate); +} + +/* + * Initialize the real-time clock device. + * In addition, various variables used to support the clock are initialized. */ int sysclk_init(void) { - vm_offset_t *vp; -#if NCPUS > 1 + uint64_t cycles; + mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return(1); + if (cpu_number() == master_cpu) { + /* + * Perform calibration. + * The PIT is used as the reference to compute how many + * TCS counts (cpu clock cycles) occur per second. + */ + rtc_cycle_count = timeRDTSC(); + cycles = rtc_set_cyc_per_sec(rtc_cycle_count); + + /* + * Set min/max to actual. + * ACPI may update these later if speed-stepping is detected. + */ + gPEClockFrequencyInfo.cpu_frequency_min_hz = cycles; + gPEClockFrequencyInfo.cpu_frequency_max_hz = cycles; + printf("[RTCLOCK] frequency %llu (%llu)\n", + cycles, rtc_cyc_per_sec); + + rtc_lapic_timer_calibrate(); + + /* Minimum interval is 1usec */ + rtc_decrementer_min = deadline_to_decrementer(NSEC_PER_USEC, + 0ULL); + /* Point LAPIC interrupts to hardclock() */ + lapic_set_timer_func((i386_intr_func_t) rtclock_intr); + + clock_timebase_init(); + rtc_initialized = TRUE; } + + rtc_nanotime_init(); + + rtc_lapic_start_ticking(); + mp_enable_preemption(); -#endif - RtcTime = &rtclock.time; - rtc_setvals( CLKNUM, RTC_MINRES ); /* compute constants */ - rtc_set_cyc_per_sec(); /* compute number of tsc beats per second */ - clock_timebase_init(); return (1); } -static volatile unsigned int last_ival = 0; - /* * Get the clock device time. This routine is responsible * for converting the device's machine dependent time value * into a canonical mach_timespec_t value. */ -kern_return_t -sysclk_gettime( +static kern_return_t +sysclk_gettime_internal( mach_timespec_t *cur_time) /* OUT */ { - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return (KERN_SUCCESS); - } - - *cur_time = rdtsc_to_timespec(); + *cur_time = tsc_to_timespec(); return (KERN_SUCCESS); } kern_return_t -sysclk_gettime_internal( +sysclk_gettime( mach_timespec_t *cur_time) /* OUT */ { - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return (KERN_SUCCESS); - } - *cur_time = rdtsc_to_timespec(); - return (KERN_SUCCESS); + return sysclk_gettime_internal(cur_time); } -/* - * Get the clock device time when ALL interrupts are already disabled. - * Same as above except for turning interrupts off and on. - * This routine is responsible for converting the device's machine dependent - * time value into a canonical mach_timespec_t value. - */ void sysclk_gettime_interrupts_disabled( mach_timespec_t *cur_time) /* OUT */ { - if (!RtcTime) { - /* Uninitialized */ - cur_time->tv_nsec = 0; - cur_time->tv_sec = 0; - return; - } - *cur_time = rdtsc_to_timespec(); + (void) sysclk_gettime_internal(cur_time); } // utility routine // Code to calculate how many processor cycles are in a second... -static void -rtc_set_cyc_per_sec() +static uint64_t +rtc_set_cyc_per_sec(uint64_t cycles) { - uint32_t twen_cycles; - uint32_t cycles; + if (cycles > (NSEC_PER_SEC/20)) { + // we can use just a "fast" multiply to get nanos + rtc_quant_shift = 32; + rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); + rtclock.timebase_const.numer = rtc_quant_scale; // timeRDTSC is 1/20 + rtclock.timebase_const.denom = RTC_FAST_DENOM; + } else { + rtc_quant_shift = 26; + rtc_quant_scale = create_mul_quant_GHZ(rtc_quant_shift, cycles); + rtclock.timebase_const.numer = NSEC_PER_SEC/20; // timeRDTSC is 1/20 + rtclock.timebase_const.denom = cycles; + } + rtc_cyc_per_sec = cycles*20; // multiply it by 20 and we are done.. + // BUT we also want to calculate... + + cycles = ((rtc_cyc_per_sec + (UI_CPUFREQ_ROUNDING_FACTOR/2)) + / UI_CPUFREQ_ROUNDING_FACTOR) + * UI_CPUFREQ_ROUNDING_FACTOR; - twen_cycles = timeRDTSC(); - if (twen_cycles> (1000000000/20)) { - // we create this value so that you can use just a "fast" multiply to get nanos - rtc_quant_scale = create_mul_quant_GHZ(twen_cycles); - rtclock.timebase_const.numer = rtc_quant_scale; // because ctimeRDTSC gives us 1/20 a seconds worth - rtclock.timebase_const.denom = 0xffffffff; // so that nanoseconds = (TSC * numer) / denom - + /* + * Set current measured speed. + */ + if (cycles >= 0x100000000ULL) { + gPEClockFrequencyInfo.cpu_clock_rate_hz = 0xFFFFFFFFUL; } else { - rtclock.timebase_const.numer = 1000000000/20; // because ctimeRDTSC gives us 1/20 a seconds worth - rtclock.timebase_const.denom = twen_cycles; // so that nanoseconds = (TSC * numer) / denom + gPEClockFrequencyInfo.cpu_clock_rate_hz = (unsigned long)cycles; } - cycles = twen_cycles; // number of cycles in 1/20th a second - rtc_cyc_per_sec = cycles*20; // multiply it by 20 and we are done.. BUT we also want to calculate... + gPEClockFrequencyInfo.cpu_frequency_hz = cycles; - cycles = ((rtc_cyc_per_sec + UI_CPUFREQ_ROUNDING_FACTOR - 1) / UI_CPUFREQ_ROUNDING_FACTOR) * UI_CPUFREQ_ROUNDING_FACTOR; - gPEClockFrequencyInfo.cpu_clock_rate_hz = cycles; -DISPLAYVALUE(rtc_set_cyc_per_sec,rtc_cyc_per_sec); -DISPLAYEXIT(rtc_set_cyc_per_sec); + kprintf("[RTCLOCK] frequency %llu (%llu)\n", cycles, rtc_cyc_per_sec); + return(cycles); } void @@ -717,7 +989,7 @@ clock_get_system_microtime( { mach_timespec_t now; - sysclk_gettime(&now); + (void) sysclk_gettime_internal(&now); *secs = now.tv_sec; *microsecs = now.tv_nsec / NSEC_PER_USEC; @@ -730,7 +1002,7 @@ clock_get_system_nanotime( { mach_timespec_t now; - sysclk_gettime(&now); + (void) sysclk_gettime_internal(&now); *secs = now.tv_sec; *nanosecs = now.tv_nsec; @@ -745,31 +1017,18 @@ sysclk_getattr( clock_attr_t attr, /* OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { - spl_t s; - if (*count != 1) return (KERN_FAILURE); switch (flavor) { case CLOCK_GET_TIME_RES: /* >0 res */ -#if (NCPUS == 1) - LOCK_RTC(s); - *(clock_res_t *) attr = 1000; - UNLOCK_RTC(s); - break; -#endif /* (NCPUS == 1) */ - case CLOCK_ALARM_CURRES: /* =0 no alarm */ - LOCK_RTC(s); - *(clock_res_t *) attr = rtclock.intr_nsec; - UNLOCK_RTC(s); + *(clock_res_t *) attr = rtc_intr_nsec; break; + case CLOCK_ALARM_CURRES: /* =0 no alarm */ case CLOCK_ALARM_MAXRES: - *(clock_res_t *) attr = RTC_MAXRES; - break; - case CLOCK_ALARM_MINRES: - *(clock_res_t *) attr = RTC_MINRES; + *(clock_res_t *) attr = 0; break; default: @@ -778,60 +1037,6 @@ sysclk_getattr( return (KERN_SUCCESS); } -/* - * Set clock device attributes. - */ -kern_return_t -sysclk_setattr( - clock_flavor_t flavor, - clock_attr_t attr, /* IN */ - mach_msg_type_number_t count) /* IN */ -{ - spl_t s; - int freq; - int adj; - clock_res_t new_ires; - - if (count != 1) - return (KERN_FAILURE); - switch (flavor) { - - case CLOCK_GET_TIME_RES: - case CLOCK_ALARM_MAXRES: - case CLOCK_ALARM_MINRES: - return (KERN_FAILURE); - - case CLOCK_ALARM_CURRES: - new_ires = *(clock_res_t *) attr; - - /* - * The new resolution must be within the predetermined - * range. If the desired resolution cannot be achieved - * to within 0.1%, an error is returned. - */ - if (new_ires < RTC_MAXRES || new_ires > RTC_MINRES) - return (KERN_INVALID_VALUE); - freq = (NSEC_PER_SEC / new_ires); - adj = (((clknum % freq) * new_ires) / clknum); - if (adj > (new_ires / 1000)) - return (KERN_INVALID_VALUE); - /* - * Record the new alarm resolution which will take effect - * on the next HZ aligned clock tick. - */ - LOCK_RTC(s); - if ( freq != rtc_intr_freq ) { - rtclock.new_ires = new_ires; - new_clknum = clknum; - } - UNLOCK_RTC(s); - return (KERN_SUCCESS); - - default: - return (KERN_INVALID_VALUE); - } -} - /* * Set next alarm time for the clock device. This call * always resets the time to deliver an alarm for the @@ -841,12 +1046,9 @@ void sysclk_setalarm( mach_timespec_t *alarm_time) { - spl_t s; - - LOCK_RTC(s); - rtclock.alarm_time = *alarm_time; - RtcAlrm = &rtclock.alarm_time; - UNLOCK_RTC(s); + timer_call_enter(&rtclock_alarm_timer, + (uint64_t) alarm_time->tv_sec * NSEC_PER_SEC + + alarm_time->tv_nsec); } /* @@ -876,15 +1078,15 @@ calend_gettime( { spl_t s; - LOCK_RTC(s); + RTC_LOCK(s); if (!rtclock.calend_is_set) { - UNLOCK_RTC(s); + RTC_UNLOCK(s); return (KERN_FAILURE); } (void) sysclk_gettime_internal(cur_time); ADD_MACH_TIMESPEC(cur_time, &rtclock.calend_offset); - UNLOCK_RTC(s); + RTC_UNLOCK(s); return (KERN_SUCCESS); } @@ -921,15 +1123,20 @@ clock_set_calendar_microtime( uint32_t microsecs) { mach_timespec_t new_time, curr_time; + uint32_t old_offset; spl_t s; - LOCK_RTC(s); + new_time.tv_sec = secs; + new_time.tv_nsec = microsecs * NSEC_PER_USEC; + + RTC_LOCK(s); + old_offset = rtclock.calend_offset.tv_sec; (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset.tv_sec = new_time.tv_sec = secs; - rtclock.calend_offset.tv_nsec = new_time.tv_nsec = microsecs * NSEC_PER_USEC; + rtclock.calend_offset = new_time; SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); + rtclock.boottime += rtclock.calend_offset.tv_sec - old_offset; rtclock.calend_is_set = TRUE; - UNLOCK_RTC(s); + RTC_UNLOCK(s); (void) bbc_settime(&new_time); @@ -945,24 +1152,13 @@ calend_getattr( clock_attr_t attr, /* OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { - spl_t s; - if (*count != 1) return (KERN_FAILURE); switch (flavor) { case CLOCK_GET_TIME_RES: /* >0 res */ -#if (NCPUS == 1) - LOCK_RTC(s); - *(clock_res_t *) attr = 1000; - UNLOCK_RTC(s); - break; -#else /* (NCPUS == 1) */ - LOCK_RTC(s); - *(clock_res_t *) attr = rtclock.intr_nsec; - UNLOCK_RTC(s); + *(clock_res_t *) attr = rtc_intr_nsec; break; -#endif /* (NCPUS == 1) */ case CLOCK_ALARM_CURRES: /* =0 no alarm */ case CLOCK_ALARM_MINRES: @@ -990,7 +1186,7 @@ clock_set_calendar_adjtime( total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; - LOCK_RTC(s); + RTC_LOCK(s); ototal = rtclock.calend_adjtotal; if (total != 0) { @@ -1013,12 +1209,12 @@ clock_set_calendar_adjtime( rtclock.calend_adjtotal = total; rtclock.calend_adjdelta = delta; - interval = (NSEC_PER_SEC / HZ); + interval = NSEC_PER_HZ; } else rtclock.calend_adjdelta = rtclock.calend_adjtotal = 0; - UNLOCK_RTC(s); + RTC_UNLOCK(s); if (ototal == 0) *secs = *microsecs = 0; @@ -1037,7 +1233,7 @@ clock_adjust_calendar(void) int32_t delta; spl_t s; - LOCK_RTC(s); + RTC_LOCK(s); delta = rtclock.calend_adjdelta; ADD_MACH_TIMESPEC_NSEC(&rtclock.calend_offset, delta); @@ -1054,9 +1250,9 @@ clock_adjust_calendar(void) } if (rtclock.calend_adjdelta != 0) - interval = (NSEC_PER_SEC / HZ); + interval = NSEC_PER_HZ; - UNLOCK_RTC(s); + RTC_UNLOCK(s); return (interval); } @@ -1070,190 +1266,176 @@ clock_initialize_calendar(void) if (bbc_gettime(&bbc_time) != KERN_SUCCESS) return; - LOCK_RTC(s); - if (!rtclock.calend_is_set) { - (void) sysclk_gettime_internal(&curr_time); - rtclock.calend_offset = bbc_time; - SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); - rtclock.calend_is_set = TRUE; - } - UNLOCK_RTC(s); + RTC_LOCK(s); + if (rtclock.boottime == 0) + rtclock.boottime = bbc_time.tv_sec; + (void) sysclk_gettime_internal(&curr_time); + rtclock.calend_offset = bbc_time; + SUB_MACH_TIMESPEC(&rtclock.calend_offset, &curr_time); + rtclock.calend_is_set = TRUE; + RTC_UNLOCK(s); host_notify_calendar_change(); } +void +clock_get_boottime_nanotime( + uint32_t *secs, + uint32_t *nanosecs) +{ + *secs = rtclock.boottime; + *nanosecs = 0; +} + void clock_timebase_info( mach_timebase_info_t info) { - spl_t s; - - LOCK_RTC(s); - if (rtclock.timebase_const.denom == 0xFFFFFFFF) { - info->numer = info->denom = rtc_quant_scale; - } else { - info->numer = info->denom = 1; - } - UNLOCK_RTC(s); + info->numer = info->denom = 1; } void clock_set_timer_deadline( uint64_t deadline) { - spl_t s; - - LOCK_RTC(s); - rtclock.timer_deadline = deadline; - rtclock.timer_is_set = TRUE; - UNLOCK_RTC(s); + spl_t s; + cpu_data_t *pp = current_cpu_datap(); + rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; + uint64_t abstime; + uint64_t decr; + + assert(get_preemption_level() > 0); + assert(rtclock_timer_expire); + + RTC_INTRS_OFF(s); + mytimer->deadline = deadline; + mytimer->is_set = TRUE; + if (!mytimer->has_expired) { + abstime = mach_absolute_time(); + if (mytimer->deadline < pp->cpu_rtc_tick_deadline) { + decr = deadline_to_decrementer(mytimer->deadline, + abstime); + rtc_lapic_set_timer(decr); + pp->cpu_rtc_intr_deadline = mytimer->deadline; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | + DBG_FUNC_NONE, decr, 2, 0, 0, 0); + } + } + RTC_INTRS_ON(s); } void clock_set_timer_func( clock_timer_func_t func) { - spl_t s; - - LOCK_RTC(s); - if (rtclock.timer_expire == NULL) - rtclock.timer_expire = func; - UNLOCK_RTC(s); + if (rtclock_timer_expire == NULL) + rtclock_timer_expire = func; } - - /* - * Load the count register and start the clock. + * Real-time clock device interrupt. */ -#define RTCLOCK_RESET() { \ - outb(PITCTL_PORT, PIT_C0|PIT_NDIVMODE|PIT_READMODE); \ - outb(PITCTR0_PORT, (clks_per_int & 0xff)); \ - outb(PITCTR0_PORT, (clks_per_int >> 8)); \ -} - -/* - * Reset the clock device. This causes the realtime clock - * device to reload its mode and count value (frequency). - * Note: the CPU should be calibrated - * before starting the clock for the first time. - */ - void -rtclock_reset(void) -{ - int s; - -#if NCPUS > 1 - mp_disable_preemption(); - if (cpu_number() != master_cpu) { - mp_enable_preemption(); - return; - } - mp_enable_preemption(); -#endif /* NCPUS > 1 */ - LOCK_RTC(s); - RTCLOCK_RESET(); - UNLOCK_RTC(s); -} - -/* - * Real-time clock device interrupt. Called only on the - * master processor. Updates the clock time and upcalls - * into the higher level clock code to deliver alarms. - */ -int rtclock_intr(struct i386_interrupt_state *regs) { uint64_t abstime; - mach_timespec_t clock_time; - int i; - spl_t s; - boolean_t usermode; - - /* - * Update clock time. Do the update so that the macro - * MTS_TO_TS() for reading the mapped time works (e.g. - * update in order: mtv_csec, mtv_time.tv_nsec, mtv_time.tv_sec). - */ - LOCK_RTC(s); - abstime = rdtsctime_to_nanoseconds(); // get the time as of the TSC - clock_time = nanos_to_timespec(abstime); // turn it into a timespec - rtclock.time.tv_nsec = clock_time.tv_nsec; - rtclock.time.tv_sec = clock_time.tv_sec; - rtclock.abstime = abstime; - - /* note time now up to date */ - last_ival = 0; + uint32_t latency; + uint64_t decr; + uint64_t decr_tick; + uint64_t decr_timer; + cpu_data_t *pp = current_cpu_datap(); + rtclock_timer_t *mytimer = &pp->cpu_rtc_timer; + + assert(get_preemption_level() > 0); + assert(!ml_get_interrupts_enabled()); + + abstime = _rtc_nanotime_read(); + latency = (uint32_t) abstime - pp->cpu_rtc_intr_deadline; + if (pp->cpu_rtc_tick_deadline <= abstime) { + rtc_nanotime_update(); + clock_deadline_for_periodic_event( + NSEC_PER_HZ, abstime, &pp->cpu_rtc_tick_deadline); + hertz_tick( +#if STAT_TIME + NSEC_PER_HZ, +#endif + (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0), + regs->eip); + } - /* - * On a HZ-tick boundary: return 0 and adjust the clock - * alarm resolution (if requested). Otherwise return a - * non-zero value. - */ - if ((i = --rtc_intr_count) == 0) { - if (rtclock.new_ires) { - rtc_setvals(new_clknum, rtclock.new_ires); - RTCLOCK_RESET(); /* lock clock register */ - rtclock.new_ires = 0; - } - rtc_intr_count = rtc_intr_hertz; - UNLOCK_RTC(s); - usermode = (regs->efl & EFL_VM) || ((regs->cs & 0x03) != 0); - hertz_tick(usermode, regs->eip); - LOCK_RTC(s); + abstime = _rtc_nanotime_read(); + if (mytimer->is_set && mytimer->deadline <= abstime) { + mytimer->has_expired = TRUE; + mytimer->is_set = FALSE; + (*rtclock_timer_expire)(abstime); + assert(!ml_get_interrupts_enabled()); + mytimer->has_expired = FALSE; } - if ( rtclock.timer_is_set && - rtclock.timer_deadline <= abstime ) { - rtclock.timer_is_set = FALSE; - UNLOCK_RTC(s); + /* Log the interrupt service latency (-ve value expected by tool) */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, + -latency, (uint32_t)regs->eip, 0, 0, 0); - (*rtclock.timer_expire)(abstime); + abstime = _rtc_nanotime_read(); + decr_tick = deadline_to_decrementer(pp->cpu_rtc_tick_deadline, abstime); + decr_timer = (mytimer->is_set) ? + deadline_to_decrementer(mytimer->deadline, abstime) : + DECREMENTER_MAX; + decr = MIN(decr_tick, decr_timer); + pp->cpu_rtc_intr_deadline = abstime + decr; - LOCK_RTC(s); - } + rtc_lapic_set_timer(decr); - /* - * Perform alarm clock processing if needed. The time - * passed up is incremented by a half-interrupt tick - * to trigger alarms closest to their desired times. - * The clock_alarm_intr() routine calls sysclk_setalrm() - * before returning if later alarms are pending. - */ + /* Log the new decrementer value */ + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, + decr, 3, 0, 0, 0); - if (RtcAlrm && (RtcAlrm->tv_sec < RtcTime->tv_sec || - (RtcAlrm->tv_sec == RtcTime->tv_sec && - RtcDelt >= RtcAlrm->tv_nsec - RtcTime->tv_nsec))) { - clock_time.tv_sec = 0; - clock_time.tv_nsec = RtcDelt; - ADD_MACH_TIMESPEC (&clock_time, RtcTime); - RtcAlrm = 0; - UNLOCK_RTC(s); - /* - * Call clock_alarm_intr() without RTC-lock. - * The lock ordering is always CLOCK-lock - * before RTC-lock. - */ - clock_alarm_intr(SYSTEM_CLOCK, &clock_time); - LOCK_RTC(s); - } +} - UNLOCK_RTC(s); - return (i); +static void +rtclock_alarm_expire( + __unused timer_call_param_t p0, + __unused timer_call_param_t p1) +{ + mach_timespec_t clock_time; + + (void) sysclk_gettime_internal(&clock_time); + + clock_alarm_intr(SYSTEM_CLOCK, &clock_time); } void clock_get_uptime( uint64_t *result) { - *result = rdtsctime_to_nanoseconds(); + *result = rtc_nanotime_read(); } uint64_t mach_absolute_time(void) { - return rdtsctime_to_nanoseconds(); + return rtc_nanotime_read(); +} + +void +absolutetime_to_microtime( + uint64_t abstime, + uint32_t *secs, + uint32_t *microsecs) +{ + uint32_t remain; + + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (remain) + : "A" (abstime), "r" (NSEC_PER_SEC)); + asm volatile( + "divl %3" + : "=a" (*microsecs) + : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); } void @@ -1306,26 +1488,8 @@ nanoseconds_to_absolutetime( *result = nanoseconds; } -/* - * Spin-loop delay primitives. - */ void -delay_for_interval( - uint32_t interval, - uint32_t scale_factor) -{ - uint64_t now, end; - - clock_interval_to_deadline(interval, scale_factor, &end); - - do { - cpu_pause(); - now = mach_absolute_time(); - } while (now < end); -} - -void -clock_delay_until( +machine_delay_until( uint64_t deadline) { uint64_t now; @@ -1335,10 +1499,3 @@ clock_delay_until( now = mach_absolute_time(); } while (now < deadline); } - -void -delay( - int usec) -{ - delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC); -} diff --git a/osfmk/i386/rtclock_entries.h b/osfmk/i386/rtclock_entries.h index 9bc1d3377..7c2ef27aa 100644 --- a/osfmk/i386/rtclock_entries.h +++ b/osfmk/i386/rtclock_entries.h @@ -35,13 +35,8 @@ extern kern_return_t rtc_getattr( clock_flavor_t flavor, clock_attr_t ttr, mach_msg_type_number_t * count); -extern kern_return_t rtc_setattr( - clock_flavor_t flavor, - clock_attr_t ttr, - mach_msg_type_number_t count); extern void rtc_setalrm( mach_timespec_t * alarmtime); -extern void rtclock_reset(void); -extern int rtclock_intr( +extern void rtclock_intr( struct i386_interrupt_state *regs); -extern void calibrate_delay(void); +extern void rtc_sleep_wakeup(void); diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h index 9206c67b8..b2b83c246 100644 --- a/osfmk/i386/seg.h +++ b/osfmk/i386/seg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -52,13 +52,66 @@ #ifndef _I386_SEG_H_ #define _I386_SEG_H_ + #include +#include +#include /* * i386 segmentation. */ +static inline uint16_t +sel_to_selector(sel_t sel) +{ + union { + sel_t sel; + uint16_t selector; + } tconv; + + tconv.sel = sel; + + return (tconv.selector); +} + +static inline sel_t +selector_to_sel(uint16_t selector) +{ + union { + uint16_t selector; + sel_t sel; + } tconv; + + tconv.selector = selector; + + return (tconv.sel); +} + +#define LDTSZ 15 /* size of the kernel ldt in entries*/ + +#if MACH_KDB +#ifdef MACH_BSD +#define GDTSZ 14 +#else +#define GDTSZ 11 +#endif +#else /* MACH_KDB */ +#ifdef MACH_BSD +#define GDTSZ 13 +#else +#define GDTSZ 10 +#endif +#endif /* MACH_KDB */ + +/* + * Interrupt table is always 256 entries long. + */ +#define IDTSZ 256 + #ifndef __ASSEMBLER__ + +#include + /* * Real segment descriptor. */ @@ -93,6 +146,26 @@ struct fake_descriptor { /* word count, for gate */ unsigned int access:8; /* access */ }; + +/* + * Boot-time data for master (or only) CPU + */ +extern struct fake_descriptor idt[IDTSZ]; +extern struct fake_descriptor gdt[GDTSZ]; +extern struct fake_descriptor ldt[LDTSZ]; +extern struct i386_tss ktss; + +__BEGIN_DECLS + +#if MACH_KDB +extern char db_stack_store[]; +extern char db_task_stack_store[]; +extern struct i386_tss dbtss; +extern void db_task_start(void); +#endif /* MACH_KDB */ + +__END_DECLS + #endif /*__ASSEMBLER__*/ #define SZ_32 0x4 /* 32-bit segment */ @@ -141,7 +214,9 @@ struct fake_descriptor { /* * Convert selector to descriptor table index. */ -#define sel_idx(sel) ((sel)>>3) +#define sel_idx(sel) (selector_to_sel(sel).index) + +#define NULL_SEG 0 /* * User descriptors for MACH - 32-bit flat address space @@ -150,9 +225,9 @@ struct fake_descriptor { #define USER_RPC 0x0f /* mach rpc call gate */ #define USER_CS 0x17 /* user code segment */ #define USER_DS 0x1f /* user data segment */ -#define USER_CTHREAD 0x27 /* user cthread area */ - -#define LDTSZ 5 +#define USER_CTHREAD 0x27 /* user cthread area */ +#define USER_SETTABLE 0x2f /* start of user settable ldt entries */ +#define USLDTSZ 10 /* number of user settable entries */ /* * Kernel descriptors for MACH - 32-bit flat address space. @@ -173,7 +248,7 @@ struct fake_descriptor { #endif #define USER_FPREGS 0x40 /* user-mode access to saved floating-point registers */ -#define CPU_DATA 0x48 /* per-cpu data */ +#define CPU_DATA_GS 0x48 /* per-cpu data */ #ifdef MACH_BSD #define USER_LDT 0x58 @@ -183,24 +258,6 @@ struct fake_descriptor { #if MACH_KDB #define DEBUG_TSS 0x50 /* debug TSS (uniprocessor) */ - -#ifdef MACH_BSD -#define GDTSZ 14 -#else -#define GDTSZ 11 #endif -#else - -#ifdef MACH_BSD -#define GDTSZ 13 -#else -#define GDTSZ 10 -#endif -#endif - -/* - * Interrupt table is always 256 entries long. - */ -#define IDTSZ 256 #endif /* _I386_SEG_H_ */ diff --git a/osfmk/kern/time_out.h b/osfmk/i386/simple_lock.h similarity index 55% rename from osfmk/kern/time_out.h rename to osfmk/i386/simple_lock.h index f5ec5c019..c8b01655c 100644 --- a/osfmk/kern/time_out.h +++ b/osfmk/i386/simple_lock.h @@ -48,46 +48,71 @@ * the rights to redistribute these changes. */ /* + * File: kern/simple_lock_types.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Simple lock data type definitions */ +#ifdef KERNEL_PRIVATE -#ifndef _KERN_TIME_OUT_H_ -#define _KERN_TIME_OUT_H_ - -/* - * Mach tick-based timing. - */ +#ifndef _I386_SIMPLE_LOCK_TYPES_H_ +#define _I386_SIMPLE_LOCK_TYPES_H_ #include #include #include +#if defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE) +#include +#include +#endif + +#if defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE) + +#if MACH_LDEBUG +#define USLOCK_DEBUG 1 +#else +#define USLOCK_DEBUG 0 +#endif /* USLOCK_DEBUG */ + +typedef struct uslock_debug { + void *lock_pc; /* pc where lock operation began */ + void *lock_thread; /* thread that acquired lock */ + unsigned long duration[2]; + unsigned short state; + unsigned char lock_cpu; + void *unlock_thread; /* last thread to release lock */ + unsigned char unlock_cpu; + void *unlock_pc; /* pc where lock operation ended */ +} uslock_debug; -#ifdef __APPLE_API_PRIVATE +typedef struct slock { + hw_lock_data_t interlock; /* must be first... see lock.c */ + unsigned short lock_type; /* must be second... see lock.c */ +#define USLOCK_TAG 0x5353 + uslock_debug debug; +} usimple_lock_data_t, *usimple_lock_t; -extern int hz; /* num of ticks per second */ -extern int tick; /* num of usec per tick */ +#else -#ifdef MACH_KERNEL_PRIVATE +typedef struct slock { + unsigned int lock_data[10]; +} usimple_lock_data_t, *usimple_lock_t; -extern void hertz_tick( - boolean_t usermode, /* executing user code */ - natural_t pc); +#endif /* defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE) */ -typedef void (*timeout_fcn_t)(void *); +#define USIMPLE_LOCK_NULL ((usimple_lock_t) 0) -/* Set timeout */ -extern void timeout( - timeout_fcn_t fcn, - void *param, - int interval); +#if !defined(decl_simple_lock_data) +typedef usimple_lock_data_t *simple_lock_t; +typedef usimple_lock_data_t simple_lock_data_t; -/* Cancel timeout */ -extern void untimeout( - timeout_fcn_t fcn, - void *param); +#define decl_simple_lock_data(class,name) \ + class simple_lock_data_t name; -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* !defined(decl_simple_lock_data) */ -#endif /* __APPLE_API_PRIVATE */ +#endif /* !_I386_SIMPLE_LOCK_TYPES_H_ */ -#endif /* _KERN_TIME_OUT_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s index 0607eee1e..a9f8c45b0 100644 --- a/osfmk/i386/start.s +++ b/osfmk/i386/start.s @@ -51,25 +51,17 @@ */ #include -#include #include #include #include +#include #include -#if NCPUS > 1 - #define CX(addr,reg) addr(,reg,4) -#else - -#define CPU_NUMBER(reg) -#define CX(addr,reg) addr - -#endif /* NCPUS > 1 */ - #include +#include /* * GAS won't handle an intersegment jump with a relocatable offset. @@ -85,35 +77,51 @@ #define KVTOLINEAR LINEAR_KERNELBASE -#define PA(addr) (addr)+KVTOPHYS -#define VA(addr) (addr)-KVTOPHYS +#define PA(addr) ((addr)+KVTOPHYS) +#define VA(addr) ((addr)-KVTOPHYS) .data +#if 0 /* Anyone need this? */ .align 2 .globl EXT(_kick_buffer_) EXT(_kick_buffer_): .long 1 .long 3 .set .,.+16836 +#endif /* XXX */ /* * Interrupt and bootup stack for initial processor. */ + .section __HIB, __data .align ALIGN + .globl EXT(intstack) EXT(intstack): + .set ., .+INTSTACK_SIZE + .globl EXT(eintstack) EXT(eintstack:) -#if NCPUS == 1 - .globl EXT(int_stack_high) /* all interrupt stacks */ -EXT(int_stack_high): /* must lie below this */ - .long EXT(eintstack) /* address */ +/* + * Pointers to GDT and IDT. These contain linear addresses. + */ + .align ALIGN + .globl EXT(gdtptr) +LEXT(gdtptr) + .word Times(8,GDTSZ)-1 + .long EXT(gdt) + + .align ALIGN + .globl EXT(idtptr) +LEXT(idtptr) + .word Times(8,IDTSZ)-1 + .long EXT(idt) + + /* back to the regular __DATA section. */ + + .section __DATA, __data - .globl EXT(int_stack_top) /* top of interrupt stack */ -EXT(int_stack_top): - .long EXT(eintstack) -#endif #if MACH_KDB /* @@ -122,7 +130,7 @@ EXT(int_stack_top): .align ALIGN .globl EXT(db_stack_store) EXT(db_stack_store): - .set ., .+(INTSTACK_SIZE*NCPUS) + .set ., .+(INTSTACK_SIZE*MAX_CPUS) /* * Stack for last-ditch debugger task for each processor. @@ -130,8 +138,7 @@ EXT(db_stack_store): .align ALIGN .globl EXT(db_task_stack_store) EXT(db_task_stack_store): - .set ., .+(INTSTACK_SIZE*NCPUS) -#endif /* MACH_KDB */ + .set ., .+(INTSTACK_SIZE*MAX_CPUS) /* * per-processor kernel debugger stacks @@ -139,25 +146,9 @@ EXT(db_task_stack_store): .align ALIGN .globl EXT(kgdb_stack_store) EXT(kgdb_stack_store): - .set ., .+(INTSTACK_SIZE*NCPUS) - - -/* - * Pointers to GDT and IDT. These contain linear addresses. - */ - .align ALIGN - .globl EXT(gdtptr) -LEXT(gdtptr) - .word Times(8,GDTSZ)-1 - .long EXT(gdt)+KVTOLINEAR - - .align ALIGN - .globl EXT(idtptr) -LEXT(idtptr) - .word Times(8,IDTSZ)-1 - .long EXT(idt)+KVTOLINEAR + .set ., .+(INTSTACK_SIZE*MAX_CPUS) +#endif /* MACH_KDB */ -#if NCPUS > 1 .data /* * start_lock is very special. We initialize the @@ -179,8 +170,66 @@ EXT(master_is_up): .globl EXT(mp_boot_pde) EXT(mp_boot_pde): .long 0 -#endif /* NCPUS > 1 */ + +_KERNend: .long 0 /* phys addr end of kernel (just after bss) */ +physfree: .long 0 /* phys addr of next free page */ + + .globl _IdlePTD +_IdlePTD: .long 0 /* phys addr of kernel PTD */ +#ifdef PAE + .globl _IdlePDPT +_IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + + .globl _KPTphys + +_KPTphys: .long 0 /* phys addr of kernel page tables */ + + +/* Some handy macros */ + +#define ALLOCPAGES(npages) \ + movl PA(physfree), %esi ; \ + movl $((npages) * PAGE_SIZE), %eax ; \ + addl %esi, %eax ; \ + movl %eax, PA(physfree) ; \ + movl %esi, %edi ; \ + movl $((npages) * PAGE_SIZE / 4),%ecx ; \ + xorl %eax,%eax ; \ + cld ; \ + rep ; \ + stosl +/* + * fillkpt + * eax = page frame address + * ebx = index into page table + * ecx = how many pages to map + * base = base address of page dir/table + * prot = protection bits + */ +#define fillkpt(base, prot) \ + shll $(PTEINDX),%ebx ; \ + addl base,%ebx ; \ + orl $(PTE_V) ,%eax ; \ + orl prot,%eax ; \ +1: movl %eax,(%ebx) ; \ + addl $(PAGE_SIZE),%eax ; /* increment physical address */ \ + addl $(PTESIZE),%ebx ; /* next pte */ \ + loop 1b + +/* + * fillkptphys(prot) + * eax = physical address + * ecx = how many pages to map + * prot = protection bits + */ +#define fillkptphys(prot) \ + movl %eax, %ebx ; \ + shrl $(PAGE_SHIFT), %ebx ; \ + fillkpt(PA(EXT(KPTphys)), prot) + + /* * All CPUs start here. * @@ -195,11 +244,13 @@ EXT(mp_boot_pde): LEXT(_start) LEXT(pstart) mov %eax, %ebx /* save pointer to kernbootstruct */ + + POSTCODE(PSTART_ENTRY); + mov $0,%ax /* fs must be zeroed; */ mov %ax,%fs /* some bootstrappers don`t do this */ mov %ax,%gs -#if NCPUS > 1 jmp 1f 0: cmpl $0,PA(EXT(start_lock)) jne 0b @@ -211,86 +262,138 @@ LEXT(pstart) cmpl $0,PA(EXT(master_is_up)) /* are we first? */ jne EXT(slave_start) /* no -- system already up. */ movl $1,PA(EXT(master_is_up)) /* others become slaves */ -#endif /* NCPUS > 1 */ + jmp 3f +3: /* * Get startup parameters. */ -#include + movl %ebx,PA(EXT(boot_args_start)) /* Save KERNBOOTSTRUCT */ + + movl KADDR(%ebx), %eax + addl KSIZE(%ebx), %eax + addl $(NBPG-1),%eax + andl $(-NBPG), %eax + movl %eax, PA(EXT(KERNend)) + movl %eax, PA(physfree) + cld -/* - * Build initial page table directory and page tables. - * %ebx holds first available physical address. - */ +/* allocate kernel page table pages */ + ALLOCPAGES(NKPT) + movl %esi,PA(EXT(KPTphys)) - addl $(NBPG-1),%ebx /* round first avail physical addr */ - andl $(-NBPG),%ebx /* to machine page size */ - leal -KVTOPHYS(%ebx),%eax /* convert to virtual address */ - movl %eax,PA(EXT(kpde)) /* save as kernel page table directory */ - movl %ebx,%cr3 /* set physical address in CR3 now */ +#ifdef PAE +/* allocate Page Table Directory Page */ + ALLOCPAGES(1) + movl %esi,PA(EXT(IdlePDPT)) +#endif - movl %ebx,%edi /* clear page table directory */ - movl $(PTES_PER_PAGE),%ecx /* one page of ptes */ - xorl %eax,%eax - cld - rep - stosl /* edi now points to next page */ +/* allocate kernel page directory page */ + ALLOCPAGES(NPGPTD) + movl %esi,PA(EXT(IdlePTD)) -/* - * Use next few pages for page tables. - */ - addl $(KERNELBASEPDE),%ebx /* point to pde for kernel base */ - movl %edi,%esi /* point to end of current pte page */ +/* map from zero to end of kernel */ + xorl %eax,%eax + movl PA(physfree),%ecx + shrl $(PAGE_SHIFT),%ecx + fillkptphys( $(PTE_W) ) + +/* map page directory */ +#ifdef PAE + movl PA(EXT(IdlePDPT)), %eax + movl $1, %ecx + fillkptphys( $(PTE_W) ) +#endif + movl PA(EXT(IdlePTD)),%eax + movl $(NPGPTD), %ecx + fillkptphys( $(PTE_W) ) + +/* install a pde for temp double map of bottom of VA */ + movl PA(EXT(KPTphys)),%eax + xorl %ebx,%ebx + movl $(NKPT), %ecx + fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) + +/* install pde's for page tables */ + movl PA(EXT(KPTphys)),%eax + movl $(KPTDI),%ebx + movl $(NKPT),%ecx + fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) + +/* install a pde recursively mapping page directory as a page table */ + movl PA(EXT(IdlePTD)),%eax + movl $(PTDPTDI),%ebx + movl $(NPGPTD),%ecx + fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) + +#ifdef PAE + movl PA(EXT(IdlePTD)), %eax + xorl %ebx, %ebx + movl $(NPGPTD), %ecx + fillkpt(PA(EXT(IdlePDPT)), $0) +#endif -/* - * Enter 1-1 mappings for kernel and for kernel page tables. - */ - movl $(INTEL_PTE_KERNEL),%eax /* set up pte prototype */ -0: - cmpl %esi,%edi /* at end of pte page? */ - jb 1f /* if so: */ - movl %edi,%edx /* get pte address (physical) */ - andl $(-NBPG),%edx /* mask out offset in page */ - orl $(INTEL_PTE_KERNEL),%edx /* add pte bits */ - movl %edx,(%ebx) /* set pde */ - addl $4,%ebx /* point to next pde */ - movl %edi,%esi /* point to */ - addl $(NBPG),%esi /* end of new pte page */ -1: - movl %eax,(%edi) /* set pte */ - addl $4,%edi /* advance to next pte */ - addl $(NBPG),%eax /* advance to next phys page */ - cmpl %edi,%eax /* have we mapped this pte page yet? */ - jb 0b /* loop if not */ +/* install a pde page for commpage use up in high memory */ -/* - * Zero rest of last pte page. - */ - xor %eax,%eax /* don`t map yet */ -2: cmpl %esi,%edi /* at end of pte page? */ - jae 3f - movl %eax,(%edi) /* zero mapping */ - addl $4,%edi - jmp 2b -3: + movl PA(physfree),%eax /* grab next phys page */ + movl %eax,%ebx + addl $(PAGE_SIZE),%ebx + movl %ebx,PA(physfree) /* show next free phys pg */ + movl $(COMM_PAGE_BASE_ADDR),%ebx + shrl $(PDESHIFT),%ebx /* index into pde page */ + movl $(1), %ecx /* # pdes to store */ + fillkpt(PA(EXT(IdlePTD)), $(PTE_W|PTE_U)) /* user has access! */ -#if NCPUS > 1 -/* - * Grab (waste?) another page for a bootstrap page directory - * for the other CPUs. We don't want the running CPUs to see - * addresses 0..3fffff mapped 1-1. - */ - movl %edi,PA(EXT(mp_boot_pde)) /* save its physical address */ - movl $(PTES_PER_PAGE),%ecx /* and clear it */ - rep - stosl -#endif /* NCPUS > 1 */ + movl PA(physfree),%edi movl %edi,PA(EXT(first_avail)) /* save first available phys addr */ +#ifdef PAE /* - * pmap_bootstrap will enter rest of mappings. - */ + * We steal 0x4000 for a temp pdpt and 0x5000-0x8000 + * for temp pde pages in the PAE case. Once we are + * running at the proper virtual address we switch to + * the PDPT/PDE's the master is using */ + + /* clear pdpt page to be safe */ + xorl %eax, %eax + movl $(PAGE_SIZE),%ecx + movl $(0x4000),%edi + cld + rep + stosb + + /* build temp pdpt */ + movl $(0x5000), %eax + xorl %ebx, %ebx + movl $(NPGPTD), %ecx + fillkpt($(0x4000), $0) + + /* copy the NPGPTD pages of pdes */ + movl PA(EXT(IdlePTD)),%eax + movl $0x5000,%ebx + movl $((PTEMASK+1)*NPGPTD),%ecx +1: movl 0(%eax),%edx + movl %edx,0(%ebx) + movl 4(%eax),%edx + movl %edx,4(%ebx) + addl $(PTESIZE),%eax + addl $(PTESIZE),%ebx + loop 1b +#else +/* create temp pde for slaves to use + use unused lomem page and copy in IdlePTD */ + movl PA(EXT(IdlePTD)),%eax + movl $0x4000,%ebx + movl $(PTEMASK+1),%ecx +1: movl 0(%eax),%edx + movl %edx,0(%ebx) + addl $(PTESIZE),%eax + addl $(PTESIZE),%ebx + loop 1b +#endif + + POSTCODE(PSTART_PAGE_TABLES); /* * Fix initial descriptor tables. @@ -314,41 +417,41 @@ fix_gdt_ret: fix_ldt_ret: /* - * Turn on paging. + * */ - movl %cr3,%eax /* retrieve kernel PDE phys address */ - movl KERNELBASEPDE(%eax),%ecx - movl %ecx,(%eax) /* set it also as pte for location */ - /* 0..3fffff, so that the code */ - /* that enters paged mode is mapped */ - /* to identical addresses after */ - /* paged mode is enabled */ - addl $4,%eax /* 400000..7fffff */ - movl KERNELBASEPDE(%eax),%ecx - movl %ecx,(%eax) + lgdt PA(EXT(gdtptr)) /* load GDT */ + lidt PA(EXT(idtptr)) /* load IDT */ - movl $ EXT(pag_start),%ebx /* first paged code address */ - - movl %cr0,%eax - orl $(CR0_PG),%eax /* set PG bit in CR0 */ - orl $(CR0_WP),%eax - movl %eax,%cr0 /* to enable paging */ - - jmp *%ebx /* flush prefetch queue */ + POSTCODE(PSTART_BEFORE_PAGING); /* - * We are now paging, and can run with correct addresses. + * Turn on paging. */ -LEXT(pag_start) - lgdt EXT(gdtptr) /* load GDT */ - lidt EXT(idtptr) /* load IDT */ +#ifdef PAE + movl PA(EXT(IdlePDPT)), %eax + movl %eax, %cr3 + + movl %cr4, %eax + orl $(CR4_PAE), %eax + movl %eax, %cr4 +#else + movl PA(EXT(IdlePTD)), %eax + movl %eax,%cr3 +#endif + + movl %cr0,%eax + orl $(CR0_PG|CR0_WP|CR0_PE),%eax + movl %eax,%cr0 /* to enable paging */ + LJMP(KERNEL_CS,EXT(vstart)) /* switch to kernel code segment */ /* * Master is now running with correct addresses. */ LEXT(vstart) + POSTCODE(VSTART_ENTRY) ; + mov $(KERNEL_DS),%ax /* set kernel data segment */ mov %ax,%ds mov %ax,%es @@ -371,15 +474,25 @@ LEXT(vstart) movw $(KERNEL_TSS),%ax ltr %ax /* set up KTSS */ - mov $ CPU_DATA,%ax + mov $(CPU_DATA_GS),%ax mov %ax,%gs + POSTCODE(VSTART_STACK_SWITCH); + lea EXT(eintstack),%esp /* switch to the bootup stack */ + call EXT(i386_preinit) + + POSTCODE(VSTART_EXIT); + call EXT(i386_init) /* run C code */ /*NOTREACHED*/ hlt -#if NCPUS > 1 + .text + .globl __start + .set __start, PA(EXT(pstart)) + + /* * master_up is used by the master cpu to signify that it is done * with the interrupt stack, etc. See the code in pstart and svstart @@ -405,79 +518,89 @@ LEXT(master_up) LEXT(slave_start) cli /* disable interrupts, so we don`t */ /* need IDT for a while */ - movl EXT(kpde)+KVTOPHYS,%ebx /* get PDE virtual address */ - addl $(KVTOPHYS),%ebx /* convert to physical address */ - - movl PA(EXT(mp_boot_pde)),%edx /* point to the bootstrap PDE */ - movl KERNELBASEPDE(%ebx),%eax - /* point to pte for KERNELBASE */ - movl %eax,KERNELBASEPDE(%edx) - /* set in bootstrap PDE */ - movl %eax,(%edx) /* set it also as pte for location */ - /* 0..3fffff, so that the code */ - /* that enters paged mode is mapped */ - /* to identical addresses after */ - /* paged mode is enabled */ - movl %edx,%cr3 /* use bootstrap PDE to enable paging */ - - movl $ EXT(spag_start),%edx /* first paged code address */ + + POSTCODE(SLAVE_START_ENTRY); +/* + * Turn on paging. + */ + movl $(EXT(spag_start)),%edx /* first paged code address */ + +#ifdef PAE + movl $(0x4000), %eax + movl %eax, %cr3 + + movl %cr4, %eax + orl $(CR4_PAE), %eax + movl %eax, %cr4 +#else + movl $(0x4000),%eax /* tmp until we get mapped */ + movl %eax,%cr3 +#endif movl %cr0,%eax - orl $(CR0_PG),%eax /* set PG bit in CR0 */ - orl $(CR0_WP),%eax + orl $(CR0_PG|CR0_WP|CR0_PE),%eax movl %eax,%cr0 /* to enable paging */ - jmp *%edx /* flush prefetch queue. */ + POSTCODE(SLAVE_START_EXIT); + + jmp *%edx /* flush prefetch queue */ /* * We are now paging, and can run with correct addresses. */ LEXT(spag_start) - lgdt EXT(gdtptr) /* load GDT */ - lidt EXT(idtptr) /* load IDT */ + lgdt PA(EXT(gdtptr)) /* load GDT */ + lidt PA(EXT(idtptr)) /* load IDT */ + LJMP(KERNEL_CS,EXT(svstart)) /* switch to kernel code segment */ + /* * Slave is now running with correct addresses. */ LEXT(svstart) + + POSTCODE(SVSTART_ENTRY); + +#ifdef PAE + movl PA(EXT(IdlePDPT)), %eax + movl %eax, %cr3 +#else + movl PA(EXT(IdlePTD)), %eax + movl %eax, %cr3 +#endif + mov $(KERNEL_DS),%ax /* set kernel data segment */ mov %ax,%ds mov %ax,%es mov %ax,%ss - movl %ebx,%cr3 /* switch to the real kernel PDE */ - - CPU_NUMBER(%eax) - movl CX(EXT(interrupt_stack),%eax),%esp /* get stack */ - addl $(INTSTACK_SIZE),%esp /* point to top */ - xorl %ebp,%ebp /* for completeness */ - - movl $0,%ecx /* unlock start_lock */ - xchgl %ecx,EXT(start_lock) /* since we are no longer using */ - /* bootstrap stack */ + /* + * We're not quite through with the boot stack + * but we need to reset the stack pointer to the correct virtual + * address. + * And we need to offset above the address of pstart. + */ + movl $(VA(MP_BOOTSTACK+MP_BOOT+4)), %esp /* - * switch to the per-cpu descriptor tables + * Switch to the per-cpu descriptor tables */ + POSTCODE(SVSTART_DESC_INIT); + + CPU_NUMBER_FROM_LAPIC(%eax) + movl CX(EXT(cpu_data_ptr),%eax),%ecx + movl CPU_DESC_TABLEP(%ecx), %ecx - pushl %eax /* pass CPU number */ - call EXT(mp_desc_init) /* set up local table */ - /* pointer returned in %eax */ - subl $4,%esp /* get space to build pseudo-descriptors */ - - CPU_NUMBER(%eax) movw $(GDTSZ*8-1),0(%esp) /* set GDT size in GDT descriptor */ - movl CX(EXT(mp_gdt),%eax),%edx - addl $ KVTOLINEAR,%edx - movl %edx,2(%esp) /* point to local GDT (linear address) */ + leal MP_GDT(%ecx),%edx + movl %edx,2(%esp) /* point to local GDT (linear addr) */ lgdt 0(%esp) /* load new GDT */ movw $(IDTSZ*8-1),0(%esp) /* set IDT size in IDT descriptor */ - movl CX(EXT(mp_idt),%eax),%edx - addl $ KVTOLINEAR,%edx - movl %edx,2(%esp) /* point to local IDT (linear address) */ + leal MP_IDT(%ecx),%edx + movl %edx,2(%esp) /* point to local IDT (linear addr) */ lidt 0(%esp) /* load new IDT */ movw $(KERNEL_LDT),%ax /* get LDT segment */ @@ -486,13 +609,25 @@ LEXT(svstart) movw $(KERNEL_TSS),%ax ltr %ax /* load new KTSS */ - mov $ CPU_DATA,%ax + mov $(CPU_DATA_GS),%ax mov %ax,%gs - call EXT(slave_main) /* start MACH */ +/* + * Get stack top from pre-cpu data and switch + */ + POSTCODE(SVSTART_STACK_SWITCH); + + movl %gs:CPU_INT_STACK_TOP,%esp + xorl %ebp,%ebp /* for completeness */ + + movl $0,%eax /* unlock start_lock */ + xchgl %eax,EXT(start_lock) /* since we are no longer using */ + /* bootstrap stack */ + POSTCODE(SVSTART_EXIT); + + call EXT(i386_init_slave) /* start MACH */ /*NOTREACHED*/ hlt -#endif /* NCPUS > 1 */ /* * Convert a descriptor from fake to real format. diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index ffd07de78..d3f5f1d5d 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -70,7 +71,147 @@ #include #include #include -#include + +/* + * i386_saved_state: + * + * Has been exported to servers. See: mach/i386/thread_status.h + * + * This structure corresponds to the state of user registers + * as saved upon kernel entry. It lives in the pcb. + * It is also pushed onto the stack for exceptions in the kernel. + * For performance, it is also used directly in syscall exceptions + * if the server has requested i386_THREAD_STATE flavor for the exception + * port. + * + * We define the following as an alias for the "esp" field of the + * structure, because we actually save cr2 here, not the kernel esp. + */ +#define cr2 esp + +/* + * Save area for user floating-point state. + * Allocated only when necessary. + */ + +struct i386_fpsave_state { + boolean_t fp_valid; + struct i386_fp_save fp_save_state; + struct i386_fp_regs fp_regs; + struct i386_fx_save fx_save_state __attribute__ ((aligned (16))); + int fp_save_flavor; +}; + +/* + * v86_assist_state: + * + * This structure provides data to simulate 8086 mode + * interrupts. It lives in the pcb. + */ + +struct v86_assist_state { + vm_offset_t int_table; + unsigned short int_count; + unsigned short flags; /* 8086 flag bits */ +}; +#define V86_IF_PENDING 0x8000 /* unused bit */ + +/* + * i386_interrupt_state: + * + * This structure describes the set of registers that must + * be pushed on the current ring-0 stack by an interrupt before + * we can switch to the interrupt stack. + */ + +struct i386_interrupt_state { + int gs; + int fs; + int es; + int ds; + int edx; + int ecx; + int eax; + int eip; + int cs; + int efl; +}; + +/* + * i386_kernel_state: + * + * This structure corresponds to the state of kernel registers + * as saved in a context-switch. It lives at the base of the stack. + */ + +struct i386_kernel_state { + int k_ebx; /* kernel context */ + int k_esp; + int k_ebp; + int k_edi; + int k_esi; + int k_eip; +}; + +/* + * i386_machine_state: + * + * This structure corresponds to special machine state. + * It lives in the pcb. It is not saved by default. + */ + +struct i386_machine_state { + iopb_tss_t io_tss; + struct user_ldt * ldt; + struct i386_fpsave_state *ifps; + struct v86_assist_state v86s; +}; + +typedef struct pcb { + struct i386_interrupt_state iis[2]; /* interrupt and NMI */ + struct i386_saved_state iss; + struct i386_machine_state ims; +#ifdef MACH_BSD + unsigned long cthread_self; /* for use of cthread package */ + struct real_descriptor cthread_desc; + unsigned long uldt_selector; /* user ldt selector to set */ + struct real_descriptor uldt_desc; /* the actual user setable ldt data */ +#endif + decl_simple_lock_data(,lock) +} *pcb_t; + +/* + * Maps state flavor to number of words in the state: + */ +__private_extern__ unsigned int _MachineStateCount[]; + +#define USER_REGS(ThrAct) (&(ThrAct)->machine.pcb->iss) + +#define act_machine_state_ptr(ThrAct) (thread_state_t)USER_REGS(ThrAct) + + +#define is_user_thread(ThrAct) \ + ((USER_REGS(ThrAct)->efl & EFL_VM) \ + || ((USER_REGS(ThrAct)->cs & 0x03) != 0)) + +#define user_pc(ThrAct) (USER_REGS(ThrAct)->eip) +#define user_sp(ThrAct) (USER_REGS(ThrAct)->uesp) + +struct machine_thread { + /* + * pointer to process control block + * (actual storage may as well be here, too) + */ + struct pcb xxx_pcb; + pcb_t pcb; + +}; + +extern struct i386_saved_state *get_user_regs(thread_t); + +extern void *act_thread_csave(void); +extern void act_thread_catt(void *ctx); +extern void act_thread_cfree(void *ctx); /* * i386_exception_link: @@ -96,34 +237,6 @@ struct i386_exception_link { #define STACK_IEL(stack) \ ((struct i386_exception_link *)STACK_IKS(stack) - 1) -#if NCPUS > 1 -#include -#endif - -/* - * Boot-time data for master (or only) CPU - */ -extern struct fake_descriptor idt[IDTSZ]; -extern struct fake_descriptor gdt[GDTSZ]; -extern struct fake_descriptor ldt[LDTSZ]; -extern struct i386_tss ktss; -#if MACH_KDB -extern char db_stack_store[]; -extern char db_task_stack_store[]; -extern struct i386_tss dbtss; -extern void db_task_start(void); -#endif /* MACH_KDB */ -#if NCPUS > 1 -#define curr_gdt(mycpu) (mp_gdt[mycpu]) -#define curr_ktss(mycpu) (mp_ktss[mycpu]) -#else -#define curr_gdt(mycpu) (gdt) -#define curr_ktss(mycpu) (&ktss) -#endif - -#define gdt_desc_p(mycpu,sel) \ - ((struct real_descriptor *)&curr_gdt(mycpu)[sel_idx(sel)]) - /* * Return address of the function that called current function, given * address of the first parameter of current function. diff --git a/osfmk/i386/thread_act.h b/osfmk/i386/thread_act.h deleted file mode 100644 index b75f1f1ca..000000000 --- a/osfmk/i386/thread_act.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ - -#ifndef _I386_THREAD_ACT_H_ -#define _I386_THREAD_ACT_H_ - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -/* - * i386_saved_state: - * - * Has been exported to servers. See: mach/i386/thread_status.h - * - * This structure corresponds to the state of user registers - * as saved upon kernel entry. It lives in the pcb. - * It is also pushed onto the stack for exceptions in the kernel. - * For performance, it is also used directly in syscall exceptions - * if the server has requested i386_THREAD_STATE flavor for the exception - * port. - * - * We define the following as an alias for the "esp" field of the - * structure, because we actually save cr2 here, not the kernel esp. - */ -#define cr2 esp - -/* - * Save area for user floating-point state. - * Allocated only when necessary. - */ - -struct i386_fpsave_state { - boolean_t fp_valid; - struct i386_fp_save fp_save_state; - struct i386_fp_regs fp_regs; - struct i386_fx_save fx_save_state __attribute__ ((aligned (16))); - int fp_save_flavor; -}; - -/* - * v86_assist_state: - * - * This structure provides data to simulate 8086 mode - * interrupts. It lives in the pcb. - */ - -struct v86_assist_state { - vm_offset_t int_table; - unsigned short int_count; - unsigned short flags; /* 8086 flag bits */ -}; -#define V86_IF_PENDING 0x8000 /* unused bit */ - -/* - * i386_interrupt_state: - * - * This structure describes the set of registers that must - * be pushed on the current ring-0 stack by an interrupt before - * we can switch to the interrupt stack. - */ - -struct i386_interrupt_state { - int gs; - int fs; - int es; - int ds; - int edx; - int ecx; - int eax; - int eip; - int cs; - int efl; -}; - -/* - * i386_kernel_state: - * - * This structure corresponds to the state of kernel registers - * as saved in a context-switch. It lives at the base of the stack. - */ - -struct i386_kernel_state { - int k_ebx; /* kernel context */ - int k_esp; - int k_ebp; - int k_edi; - int k_esi; - int k_eip; -}; - -/* - * i386_machine_state: - * - * This structure corresponds to special machine state. - * It lives in the pcb. It is not saved by default. - */ - -struct i386_machine_state { - iopb_tss_t io_tss; - struct user_ldt * ldt; - struct i386_fpsave_state *ifps; - struct v86_assist_state v86s; -}; - -typedef struct pcb { - struct i386_interrupt_state iis[2]; /* interrupt and NMI */ - struct i386_saved_state iss; - struct i386_machine_state ims; -#ifdef MACH_BSD - unsigned long cthread_self; /* for use of cthread package */ - struct real_descriptor cthread_desc; -#endif - decl_simple_lock_data(,lock) -} *pcb_t; - -/* - * Maps state flavor to number of words in the state: - */ -extern unsigned int state_count[]; - - -#define USER_REGS(ThrAct) (&(ThrAct)->mact.pcb->iss) - -#define act_machine_state_ptr(ThrAct) (thread_state_t)USER_REGS(ThrAct) - - -#define is_user_thread(ThrAct) \ - ((USER_REGS(ThrAct)->efl & EFL_VM) \ - || ((USER_REGS(ThrAct)->cs & 0x03) != 0)) - -#define user_pc(ThrAct) (USER_REGS(ThrAct)->eip) -#define user_sp(ThrAct) (USER_REGS(ThrAct)->uesp) - -#define syscall_emulation_sync(task) /* do nothing */ - -typedef struct MachineThrAct { - /* - * pointer to process control block - * (actual storage may as well be here, too) - */ - struct pcb xxx_pcb; - pcb_t pcb; - -} MachineThrAct, *MachineThrAct_t; - -extern void *act_thread_csave(void); -extern void act_thread_catt(void *ctx); -extern void act_thread_cfree(void *ctx); - -extern vm_offset_t active_stacks[NCPUS]; -extern vm_offset_t kernel_stack[NCPUS]; -extern thread_act_t active_kloaded[NCPUS]; - -#endif /* _I386_THREAD_ACT_H_ */ diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index 0554da8a5..e8b8a2a20 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -53,8 +53,6 @@ * Hardware trap/fault handler. */ -#include -#include #include #include #include @@ -74,9 +72,8 @@ #include #include -#include #include -#include +#include #include #include #include @@ -127,7 +124,7 @@ void thread_syscall_return( kern_return_t ret) { - register thread_act_t thr_act = current_act(); + register thread_t thr_act = current_thread(); register struct i386_saved_state *regs = USER_REGS(thr_act); regs->eax = ret; thread_exception_return(); @@ -144,34 +141,27 @@ extern boolean_t db_breakpoints_inserted; void thread_kdb_return(void) { - register thread_act_t thr_act = current_act(); - register thread_t cur_thr = current_thread(); - register struct i386_saved_state *regs = USER_REGS(thr_act); + register thread_t thread = current_thread(); + register struct i386_saved_state *regs = USER_REGS(thread); if (kdb_trap(regs->trapno, regs->err, regs)) { #if MACH_LDEBUG - assert(cur_thr->mutex_count == 0); + assert(thread->mutex_count == 0); #endif /* MACH_LDEBUG */ - check_simple_locks(); thread_exception_return(); /*NOTREACHED*/ } } boolean_t let_ddb_vm_fault = FALSE; -#if NCPUS > 1 -extern int kdb_active[NCPUS]; -#endif /* NCPUS > 1 */ - #endif /* MACH_KDB */ void user_page_fault_continue( kern_return_t kr) { - register thread_act_t thr_act = current_act(); - register thread_t cur_thr = current_thread(); - register struct i386_saved_state *regs = USER_REGS(thr_act); + register thread_t thread = current_thread(); + register struct i386_saved_state *regs = USER_REGS(thread); if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) { #if MACH_KDB @@ -181,7 +171,7 @@ user_page_fault_continue( if (db_watchpoint_list && db_watchpoints_inserted && (regs->err & T_PF_WRITE) && - db_find_watchpoint(thr_act->map, + db_find_watchpoint(thread->map, (vm_offset_t)regs->cr2, regs)) kdb_trap(T_WATCHPOINT, 0, regs); @@ -194,9 +184,8 @@ user_page_fault_continue( if (debug_all_traps_with_kdb && kdb_trap(regs->trapno, regs->err, regs)) { #if MACH_LDEBUG - assert(cur_thr->mutex_count == 0); + assert(thread->mutex_count == 0); #endif /* MACH_LDEBUG */ - check_simple_locks(); thread_exception_return(); /*NOTREACHED*/ } @@ -210,8 +199,8 @@ user_page_fault_continue( * Fault recovery in copyin/copyout routines. */ struct recovery { - int fault_addr; - int recover_addr; + uint32_t fault_addr; + uint32_t recover_addr; }; extern struct recovery recover_table[]; @@ -225,9 +214,10 @@ extern struct recovery recover_table_end[]; extern struct recovery retry_table[]; extern struct recovery retry_table_end[]; -char * trap_type[] = {TRAP_NAMES}; +const char * trap_type[] = {TRAP_NAMES}; int TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); + /* * Trap from kernel mode. Only page-fault errors are recoverable, * and then only in special circumstances. All other errors are @@ -237,35 +227,21 @@ boolean_t kernel_trap( register struct i386_saved_state *regs) { - int exc; - int code; - int subcode; - int interruptible; - register int type; - vm_map_t map; - kern_return_t result; + int code; + unsigned int subcode; + int interruptible = THREAD_UNINT; + register int type; + vm_map_t map; + kern_return_t result = KERN_FAILURE; register thread_t thread; - thread_act_t thr_act; - etap_data_t probe_data; - pt_entry_t *pte; - extern vm_offset_t vm_last_phys; type = regs->trapno; code = regs->err; thread = current_thread(); - thr_act = current_act(); - - ETAP_DATA_LOAD(probe_data[0], regs->trapno); - ETAP_DATA_LOAD(probe_data[1], MACH_PORT_NULL); - ETAP_DATA_LOAD(probe_data[2], MACH_PORT_NULL); - ETAP_PROBE_DATA(ETAP_P_EXCEPTION, - 0, - thread, - &probe_data, - ETAP_DATA_ENTRY*3); switch (type) { case T_PREEMPT: + ast_taken(AST_PREEMPTION, FALSE); return (TRUE); case T_NO_FPU: @@ -291,9 +267,7 @@ kernel_trap( #if MACH_KDB mp_disable_preemption(); if (db_active -#if NCPUS > 1 && kdb_active[cpu_number()] -#endif /* NCPUS > 1 */ && !let_ddb_vm_fault) { /* * Force kdb to handle this one. @@ -307,13 +281,11 @@ kernel_trap( if (subcode > LINEAR_KERNEL_ADDRESS) { map = kernel_map; - subcode -= LINEAR_KERNEL_ADDRESS; - } else if (thr_act == THR_ACT_NULL || thread == THREAD_NULL) + } else if (thread == THREAD_NULL) map = kernel_map; else { - map = thr_act->map; + map = thread->map; } - #if MACH_KDB /* * Check for watchpoint on kernel static data. @@ -326,9 +298,7 @@ kernel_trap( (vm_offset_t)subcode < vm_last_phys && ((*(pte = pmap_pte(kernel_pmap, (vm_offset_t)subcode))) & INTEL_PTE_WRITE) == 0) { - *pte = INTEL_PTE_VALID | INTEL_PTE_WRITE | - pa_to_pte(trunc_page((vm_offset_t)subcode) - - VM_MIN_KERNEL_ADDRESS); + *pte = *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE; /* XXX need invltlb here? */ result = KERN_SUCCESS; } else #endif /* MACH_KDB */ @@ -350,7 +320,6 @@ kernel_trap( } } } - result = vm_fault(map, trunc_page((vm_offset_t)subcode), VM_PROT_READ|VM_PROT_WRITE, @@ -435,6 +404,15 @@ kernel_trap( /* fall through... */ default: + /* + * Exception 15 is reserved but some chips may generate it + * spuriously. Seen at startup on AMD Athlon-64. + */ + if (type == 15) { + kprintf("kernel_trap() ignoring spurious trap 15\n"); + return (TRUE); + } + /* * ...and return failure, so that locore can call into * debugger. @@ -475,15 +453,13 @@ user_trap( { int exc; int code; - int subcode; + unsigned int subcode; register int type; vm_map_t map; vm_prot_t prot; kern_return_t result; - register thread_act_t thr_act = current_act(); - thread_t thread = (thr_act ? thr_act->thread : THREAD_NULL); + thread_t thread = current_thread(); boolean_t kernel_act = FALSE; - etap_data_t probe_data; if (regs->efl & EFL_VM) { /* @@ -497,6 +473,7 @@ user_trap( type = regs->trapno; code = 0; subcode = 0; + exc = 0; switch (type) { @@ -573,7 +550,7 @@ user_trap( if (kernel_act == FALSE) { if (!(regs->err & T_PF_WRITE)) prot = VM_PROT_READ; - (void) user_page_fault_continue(vm_fault(thr_act->map, + (void) user_page_fault_continue(vm_fault(thread->map, trunc_page((vm_offset_t)subcode), prot, FALSE, @@ -583,9 +560,8 @@ user_trap( else { if (subcode > LINEAR_KERNEL_ADDRESS) { map = kernel_map; - subcode -= LINEAR_KERNEL_ADDRESS; } - result = vm_fault(thr_act->map, + result = vm_fault(thread->map, trunc_page((vm_offset_t)subcode), prot, FALSE, @@ -596,7 +572,7 @@ user_trap( * so that we can ask for read-only access * but enter a (kernel) writable mapping. */ - result = intel_read_fault(thr_act->map, + result = intel_read_fault(thread->map, trunc_page((vm_offset_t)subcode)); } user_page_fault_continue(result); @@ -629,21 +605,6 @@ user_trap( return; #endif /* MACH_KDB */ -#if ETAP_EVENT_MONITOR - if (thread != THREAD_NULL) { - ETAP_DATA_LOAD(probe_data[0], regs->trapno); - ETAP_DATA_LOAD(probe_data[1], - thr_act->exc_actions[exc].port); - ETAP_DATA_LOAD(probe_data[2], - thr_act->task->exc_actions[exc].port); - ETAP_PROBE_DATA(ETAP_P_EXCEPTION, - 0, - thread, - &probe_data, - ETAP_DATA_ENTRY*3); - } -#endif /* ETAP_EVENT_MONITOR */ - i386_exception(exc, code, subcode); /*NOTREACHED*/ } @@ -666,7 +627,7 @@ v86_assist( thread_t thread, register struct i386_saved_state *regs) { - register struct v86_assist_state *v86 = &thread->top_act->mact.pcb->ims.v86s; + register struct v86_assist_state *v86 = &thread->machine.pcb->ims.v86s; /* * Build an 8086 address. Use only when off is known to be 16 bits. @@ -838,9 +799,9 @@ v86_assist( case 0x9c: /* pushf */ { - int flags; - vm_offset_t sp; - int size; + int flags; + vm_offset_t sp; + unsigned int size; flags = regs->efl; if ((v86->flags & EFL_IF) == 0) @@ -860,7 +821,7 @@ v86_assist( goto stack_error; sp -= size; if (copyout((char *)&flags, - (char *)Addr8086(regs->ss,sp), + (user_addr_t)Addr8086(regs->ss,sp), size)) goto addr_error; if (addr_32) @@ -913,7 +874,6 @@ v86_assist( { vm_offset_t sp; int nflags; - int size; union iret_struct iret_struct; v86->flags &= ~V86_IRET_PENDING; @@ -967,7 +927,7 @@ v86_assist( } break; /* exit from 'while TRUE' */ } - regs->eip = (regs->eip & 0xffff0000 | eip); + regs->eip = (regs->eip & 0xffff0000) | eip; } else { /* @@ -1018,7 +978,7 @@ v86_assist( (char *) (sizeof(struct int_vec) * vec), sizeof (struct int_vec)); if (copyout((char *)&iret_16, - (char *)Addr8086(regs->ss,sp), + (user_addr_t)Addr8086(regs->ss,sp), sizeof(struct iret_16))) goto addr_error; regs->uesp = (regs->uesp & 0xFFFF0000) | (sp & 0xffff); @@ -1063,18 +1023,16 @@ extern void log_thread_action (thread_t, char *); void i386_astintr(int preemption) { - int mycpu; - ast_t mask = AST_ALL; + ast_t *my_ast, mask = AST_ALL; spl_t s; - thread_t self = current_thread(); s = splsched(); /* block interrupts to check reasons */ mp_disable_preemption(); - mycpu = cpu_number(); - if (need_ast[mycpu] & AST_I386_FP) { + my_ast = ast_pending(); + if (*my_ast & AST_I386_FP) { /* * AST was for delayed floating-point exception - - * FP interrupt occured while in kernel. + * FP interrupt occurred while in kernel. * Turn off this AST reason and handle the FPU error. */ @@ -1090,32 +1048,10 @@ i386_astintr(int preemption) * Interrupts are still blocked. */ -#ifdef XXX +#if 1 if (preemption) { - - /* - * We don't want to process any AST if we were in - * kernel-mode and the current thread is in any - * funny state (waiting and/or suspended). - */ - - thread_lock (self); - - if (thread_not_preemptable(self) || self->preempt) { - ast_off(AST_URGENT); - thread_unlock (self); - mp_enable_preemption(); - splx(s); - return; - } - else mask = AST_PREEMPTION; + mask = AST_PREEMPTION; mp_enable_preemption(); - -/* - self->preempt = TH_NOT_PREEMPTABLE; -*/ - - thread_unlock (self); } else { mp_enable_preemption(); } @@ -1123,14 +1059,8 @@ i386_astintr(int preemption) mp_enable_preemption(); #endif - ast_taken(mask, s -#if FAST_IDLE - ,NO_IDLE_THREAD -#endif /* FAST_IDLE */ - ); -/* - self->preempt = TH_PREEMPTABLE; -*/ + ast_taken(mask, s); + } } @@ -1164,7 +1094,7 @@ i386_exception( codes[0] = code; /* new exception interface */ codes[1] = subcode; - exception(exc, codes, 2); + exception_triage(exc, codes, 2); /*NOTREACHED*/ } @@ -1247,13 +1177,12 @@ check_io_fault( void kernel_preempt_check (void) { + ast_t *myast; + mp_disable_preemption(); - if ((need_ast[cpu_number()] & AST_URGENT) && -#if NCPUS > 1 + myast = ast_pending(); + if ((*myast & AST_URGENT) && get_interrupt_level() == 1 -#else /* NCPUS > 1 */ - get_interrupt_level() == 0 -#endif /* NCPUS > 1 */ ) { mp_enable_preemption_no_check(); __asm__ volatile (" int $0xff"); diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index 1b90618f3..f3563e0ba 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -99,8 +99,6 @@ extern void i386_exception( int code, int subcode); -extern void thread_exception_return(void); - extern boolean_t kernel_trap( struct i386_saved_state *regs); @@ -112,6 +110,13 @@ extern void user_trap( extern void i386_astintr(int preemption); +#if defined(MACH_KDP) +extern void kdp_i386_trap( + unsigned int, + struct i386_saved_state *, + kern_return_t, + vm_offset_t); +#endif /* MACH_KDP */ #endif /* !ASSEMBLER && MACH_KERNEL */ #endif /* _I386_TRAP_H_ */ diff --git a/osfmk/i386/user_ldt.c b/osfmk/i386/user_ldt.c index 5e27532e7..9cad6b506 100644 --- a/osfmk/i386/user_ldt.c +++ b/osfmk/i386/user_ldt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -78,8 +78,6 @@ char acc_type[8][3] = { { 1, 0, 1 }, /* code, readable, conforming */ }; -extern struct fake_descriptor ldt[]; /* for system call gate */ - #if 0 /* Forward */ @@ -97,7 +95,7 @@ selector_check( struct user_ldt *ldt; int access; - ldt = thread->top_act->mact.pcb->ims.ldt; + ldt = thread->machine.pcb->ims.ldt; if (ldt == 0) { switch (type) { case S_CODE: @@ -134,7 +132,7 @@ selector_check( kern_return_t i386_set_ldt( - thread_act_t thr_act, + thread_t thr_act, int first_selector, descriptor_list_t desc_list, mach_msg_type_number_t count) @@ -153,7 +151,7 @@ i386_set_ldt( return KERN_INVALID_ARGUMENT; if (first_desc + count >= 8192) return KERN_INVALID_ARGUMENT; - if (thr_act == THR_ACT_NULL) + if (thr_act == THREAD_NULL) return KERN_INVALID_ARGUMENT; if ((thread = act_lock_thread(thr_act)) == THREAD_NULL) { act_unlock_thread(thr_act); @@ -173,7 +171,7 @@ i386_set_ldt( */ { kern_return_t kr; - vm_offset_t dst_addr; + vm_map_offset_t dst_addr; old_copy_object = (vm_map_copy_t) desc_list; @@ -183,11 +181,11 @@ i386_set_ldt( return kr; (void) vm_map_wire(ipc_kernel_map, - trunc_page(dst_addr), - round_page(dst_addr + + vm_map_trunc_page(dst_addr), + vm_map_round_page(dst_addr + count * sizeof(struct real_descriptor)), VM_PROT_READ|VM_PROT_WRITE, FALSE); - desc_list = (descriptor_list_t) dst_addr; + desc_list = CAST_DOWN(descriptor_list_t, dst_addr); } for (i = 0, dp = (struct real_descriptor *) desc_list; @@ -217,8 +215,8 @@ i386_set_ldt( break; default: (void) vm_map_remove(ipc_kernel_map, - (vm_offset_t) desc_list, - count * sizeof(struct real_descriptor), + vm_map_trunc_page(desc_list), + vm_map_round_page(&desc_list[count]), VM_MAP_REMOVE_KUNWIRE); return KERN_INVALID_ARGUMENT; } @@ -226,7 +224,7 @@ i386_set_ldt( ldt_size_needed = sizeof(struct real_descriptor) * (first_desc + count); - pcb = thr_act->mact.pcb; + pcb = thr_act->machine.pcb; new_ldt = 0; Retry: simple_lock(&pcb->lock); @@ -266,7 +264,7 @@ i386_set_ldt( (char *)&new_ldt->ldt[0], old_ldt->desc.limit_low + 1); } - else if (thr_act == current_act()) { + else if (thr_act == current_thread()) { struct real_descriptor template = {0, 0, 0, ACC_P, 0, 0 ,0}; for (dp = &new_ldt->ldt[0], i = 0; i < first_desc; i++, dp++) { @@ -300,22 +298,22 @@ i386_set_ldt( /* * Free the descriptor list. */ - (void) vm_map_remove(ipc_kernel_map, (vm_offset_t) desc_list, - count * sizeof(struct real_descriptor), - VM_MAP_REMOVE_KUNWIRE); + (void) vm_map_remove(ipc_kernel_map, vm_map_trunc_page(desc_list), + vm_map_round_page(&desc_list[count]), + VM_MAP_REMOVE_KUNWIRE); return KERN_SUCCESS; } kern_return_t i386_get_ldt( - thread_act_t thr_act, + thread_t thr_act, int first_selector, int selector_count, /* number wanted */ descriptor_list_t *desc_list, /* in/out */ mach_msg_type_number_t *count) /* in/out */ { struct user_ldt *user_ldt; - pcb_t pcb = thr_act->mact.pcb; + pcb_t pcb = thr_act->machine.pcb; int first_desc = sel_idx(first_selector); unsigned int ldt_count; vm_size_t ldt_size; @@ -323,7 +321,7 @@ i386_get_ldt( vm_offset_t addr; thread_t thread; - if (thr_act == THR_ACT_NULL || (thread = thr_act->thread)==THREAD_NULL) + if (thr_act == THREAD_NULL) return KERN_INVALID_ARGUMENT; if (first_desc < 0 || first_desc > 8191) @@ -412,10 +410,10 @@ i386_get_ldt( /* * Unwire the memory and make it into copyin form. */ - (void) vm_map_unwire(ipc_kernel_map, trunc_page(addr), - round_page(addr + size_used), FALSE); - (void) vm_map_copyin(ipc_kernel_map, addr, size_used, - TRUE, &memory); + (void) vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size_used), FALSE); + (void) vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size_used, TRUE, &memory); *desc_list = (descriptor_list_t) memory; } @@ -427,6 +425,6 @@ void user_ldt_free( user_ldt_t user_ldt) { - kfree((vm_offset_t)user_ldt, + kfree(user_ldt, user_ldt->desc.limit_low+1+sizeof(struct real_descriptor)); } diff --git a/osfmk/i386/user_ldt.h b/osfmk/i386/user_ldt.h index 77768c912..db0b78e4d 100644 --- a/osfmk/i386/user_ldt.h +++ b/osfmk/i386/user_ldt.h @@ -80,6 +80,6 @@ extern boolean_t selector_check( int sel, int type); extern void user_ldt_free( - user_ldt_t ldt); + user_ldt_t uldt); #endif /* _I386_USER_LDT_H_ */ diff --git a/osfmk/i386/xpr.h b/osfmk/i386/xpr.h index e713e81e2..06b132787 100644 --- a/osfmk/i386/xpr.h +++ b/osfmk/i386/xpr.h @@ -57,16 +57,7 @@ */ #include -#include #include -#if NCPUS == 1 -extern int xpr_time(void); -#define XPR_TIMESTAMP xpr_time() - -#else /* NCPUS == 1 */ - #define XPR_TIMESTAMP (0) -#endif /* NCPUS == 1 */ - diff --git a/osfmk/ipc/ipc_entry.c b/osfmk/ipc/ipc_entry.c index 9d48e93f9..00814406f 100644 --- a/osfmk/ipc/ipc_entry.c +++ b/osfmk/ipc/ipc_entry.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -119,7 +119,8 @@ ipc_entry_tree_collision( ipc_splay_tree_bounds(&space->is_tree, name, &lower, &upper); index = MACH_PORT_INDEX(name); - return (((lower != ~0) && (MACH_PORT_INDEX(lower) == index)) || + return (((lower != (mach_port_name_t)~0) && + (MACH_PORT_INDEX(lower) == index)) || ((upper != 0) && (MACH_PORT_INDEX(upper) == index))); } @@ -535,7 +536,6 @@ ipc_entry_dealloc( ipc_tree_entry_t tentry; mach_port_name_t tname; boolean_t pick; - ipc_entry_bits_t bits; ipc_object_t obj; /* must move an entry from tree to table */ @@ -634,8 +634,8 @@ ipc_entry_dealloc( kern_return_t ipc_entry_grow_table( - ipc_space_t space, - int target_size) + ipc_space_t space, + ipc_table_elems_t target_size) { ipc_entry_num_t osize, size, nsize, psize; diff --git a/osfmk/ipc/ipc_entry.h b/osfmk/ipc/ipc_entry.h index 57e393c6e..70f99b476 100644 --- a/osfmk/ipc/ipc_entry.h +++ b/osfmk/ipc/ipc_entry.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,11 +61,13 @@ #ifndef _IPC_IPC_ENTRY_H_ #define _IPC_IPC_ENTRY_H_ +#include #include #include + +#include #include -#include -#include + #include /* @@ -89,10 +91,7 @@ * It is used as the head of the free list. */ -typedef natural_t ipc_entry_bits_t; -typedef ipc_table_elems_t ipc_entry_num_t; /* number of entries */ - -typedef struct ipc_entry { +struct ipc_entry { struct ipc_object *ie_object; ipc_entry_bits_t ie_bits; union { @@ -103,9 +102,7 @@ typedef struct ipc_entry { mach_port_index_t table; struct ipc_tree_entry *tree; } hash; -} *ipc_entry_t; - -#define IE_NULL ((ipc_entry_t) 0) +}; #define ie_request index.request #define ie_next index.next @@ -135,15 +132,13 @@ typedef struct ipc_entry { #define IE_BITS_RIGHT_MASK 0x007fffff /* relevant to the right */ -typedef struct ipc_tree_entry { +struct ipc_tree_entry { struct ipc_entry ite_entry; mach_port_name_t ite_name; struct ipc_space *ite_space; struct ipc_tree_entry *ite_lchild; struct ipc_tree_entry *ite_rchild; -} *ipc_tree_entry_t; - -#define ITE_NULL ((ipc_tree_entry_t) 0) +}; #define ite_bits ite_entry.ie_bits #define ite_object ite_entry.ie_object @@ -153,7 +148,7 @@ typedef struct ipc_tree_entry { extern zone_t ipc_tree_entry_zone; #define ite_alloc() ((ipc_tree_entry_t) zalloc(ipc_tree_entry_zone)) -#define ite_free(ite) zfree(ipc_tree_entry_zone, (vm_offset_t) (ite)) +#define ite_free(ite) zfree(ipc_tree_entry_zone, (ite)) /* * Exported interfaces @@ -190,7 +185,7 @@ extern void ipc_entry_dealloc( /* Grow the table in a space */ extern kern_return_t ipc_entry_grow_table( - ipc_space_t space, - int target_size); + ipc_space_t space, + ipc_table_elems_t target_size); #endif /* _IPC_IPC_ENTRY_H_ */ diff --git a/osfmk/ipc/ipc_hash.c b/osfmk/ipc/ipc_hash.c index 844501695..46462a593 100644 --- a/osfmk/ipc/ipc_hash.c +++ b/osfmk/ipc/ipc_hash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -211,8 +211,7 @@ typedef struct ipc_hash_global_bucket { #define IHGB_NULL ((ipc_hash_global_bucket_t) 0) -#define ihgb_lock_init(ihgb) mutex_init(&(ihgb)->ihgb_lock_data, \ - ETAP_IPC_IHGB) +#define ihgb_lock_init(ihgb) mutex_init(&(ihgb)->ihgb_lock_data, 0) #define ihgb_lock(ihgb) mutex_lock(&(ihgb)->ihgb_lock_data) #define ihgb_unlock(ihgb) mutex_unlock(&(ihgb)->ihgb_lock_data) @@ -284,17 +283,14 @@ ipc_hash_global_lookup( void ipc_hash_global_insert( - ipc_space_t space, - ipc_object_t obj, - mach_port_name_t name, - ipc_tree_entry_t entry) + ipc_space_t space, + ipc_object_t obj, + __assert_only mach_port_name_t name, + ipc_tree_entry_t entry) { ipc_hash_global_bucket_t bucket; - assert(!is_fast_space(space)); - - assert(entry->ite_name == name); assert(space != IS_NULL); assert(entry->ite_space == space); @@ -325,16 +321,15 @@ ipc_hash_global_insert( void ipc_hash_global_delete( - ipc_space_t space, - ipc_object_t obj, - mach_port_name_t name, - ipc_tree_entry_t entry) + ipc_space_t space, + ipc_object_t obj, + __assert_only mach_port_name_t name, + ipc_tree_entry_t entry) { ipc_hash_global_bucket_t bucket; ipc_tree_entry_t this, *last; assert(!is_fast_space(space)); - assert(entry->ite_name == name); assert(space != IS_NULL); assert(entry->ite_space == space); @@ -457,10 +452,10 @@ ipc_hash_local_lookup( void ipc_hash_local_insert( - ipc_space_t space, - ipc_object_t obj, - mach_port_index_t index, - ipc_entry_t entry) + ipc_space_t space, + ipc_object_t obj, + mach_port_index_t index, + __assert_only ipc_entry_t entry) { ipc_entry_t table; ipc_entry_num_t size; @@ -501,10 +496,10 @@ ipc_hash_local_insert( void ipc_hash_local_delete( - ipc_space_t space, - ipc_object_t obj, - mach_port_index_t index, - ipc_entry_t entry) + ipc_space_t space, + ipc_object_t obj, + mach_port_index_t index, + __assert_only ipc_entry_t entry) { ipc_entry_t table; ipc_entry_num_t size; diff --git a/osfmk/ipc/ipc_hash.h b/osfmk/ipc/ipc_hash.h index 2b864e5fc..00f4bde5c 100644 --- a/osfmk/ipc/ipc_hash.h +++ b/osfmk/ipc/ipc_hash.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,10 +60,11 @@ #ifndef _IPC_IPC_HASH_H_ #define _IPC_IPC_HASH_H_ +#include +#include #include #include -#include - +#include /* * Exported interfaces */ @@ -123,6 +124,8 @@ extern void ipc_hash_init(void); #if MACH_IPC_DEBUG +#include + extern natural_t ipc_hash_info( hash_info_bucket_t *info, mach_msg_type_number_t count); diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 1bf81102f..47c0ffc00 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,14 +60,22 @@ #include #include +#include #include + +#include +#include #include #include +#include #include #include #include +#include +#include #include #include + #include #include #include @@ -77,6 +85,8 @@ #include #include #include +#include + #include /* NDR_record */ vm_map_t ipc_kernel_map; @@ -92,7 +102,6 @@ int ipc_tree_entry_max = ITE_MAX; int ipc_port_max = PORT_MAX; int ipc_pset_max = SET_MAX; -extern void mig_init(void); extern void ikm_cache_init(void); /* @@ -155,6 +164,16 @@ ipc_bootstrap(void) /* make it exhaustible */ zone_change(ipc_object_zones[IOT_PORT_SET], Z_EXHAUST, TRUE); + /* + * Create the basic ipc_kmsg_t zone (the one we also cache) + * elements at the processor-level to avoid the locking. + */ + ipc_kmsg_zone = zinit(IKM_SAVED_KMSG_SIZE, + ipc_port_max * MACH_PORT_QLIMIT_MAX * + IKM_SAVED_KMSG_SIZE, + IKM_SAVED_KMSG_SIZE, + "ipc kmsgs"); + /* create special spaces */ kr = ipc_space_create_special(&ipc_space_kernel); @@ -172,7 +191,6 @@ ipc_bootstrap(void) mig_init(); ipc_table_init(); ipc_hash_init(); - ipc_kmsg_init(); semaphore_init(); lock_set_init(); mk_timer_init(); @@ -195,16 +213,17 @@ void ipc_init(void) { kern_return_t retval; - vm_offset_t min, max; - extern vm_size_t kalloc_max_prerounded; + vm_offset_t min; retval = kmem_suballoc(kernel_map, &min, ipc_kernel_map_size, - TRUE, TRUE, &ipc_kernel_map); + TRUE, VM_FLAGS_ANYWHERE, &ipc_kernel_map); + if (retval != KERN_SUCCESS) panic("ipc_init: kmem_suballoc of ipc_kernel_map failed"); retval = kmem_suballoc(kernel_map, &min, ipc_kernel_copy_map_size, - TRUE, TRUE, &ipc_kernel_copy_map); + TRUE, VM_FLAGS_ANYWHERE, &ipc_kernel_copy_map); + if (retval != KERN_SUCCESS) panic("ipc_init: kmem_suballoc of ipc_kernel_copy_map failed"); diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index 46267f018..7e097c14d 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,25 +57,36 @@ * Operations on kernel messages. */ -#include #include +#include #include #include #include #include +#include +#include #include + +#include #include +#include #include +#include +#include #include #include #include #include #include +#include + #include #include #include + #include +#include #include #include #include @@ -100,6 +111,46 @@ extern vm_size_t msg_ool_size_small; #define MSG_OOL_SIZE_SMALL msg_ool_size_small +#if defined(__LP64__) +#define MAP_SIZE_DIFFERS(map) (map->max_offset < MACH_VM_MAX_ADDRESS) +#define OTHER_OOL_DESCRIPTOR mach_msg_ool_descriptor32_t +#define OTHER_OOL_PORTS_DESCRIPTOR mach_msg_ool_ports_descriptor32_t +#else +#define MAP_SIZE_DIFFERS(map) (map->max_offset > VM_MAX_ADDRESS) +#define OTHER_OOL_DESCRIPTOR mach_msg_ool_descriptor64_t +#define OTHER_OOL_PORTS_DESCRIPTOR mach_msg_ool_ports_descriptor64_t +#endif + +#define DESC_SIZE_ADJUSTMENT (sizeof(OTHER_OOL_DESCRIPTOR) - \ + sizeof(mach_msg_ool_descriptor_t)) + +/* scatter list macros */ + +#define SKIP_PORT_DESCRIPTORS(s, c) \ +MACRO_BEGIN \ + if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \ + while ((c) > 0) { \ + if ((s)->type.type != MACH_MSG_PORT_DESCRIPTOR) \ + break; \ + (s)++; (c)--; \ + } \ + if (c == 0) \ + (s) = MACH_MSG_DESCRIPTOR_NULL; \ + } \ +MACRO_END + +#define INCREMENT_SCATTER(s, c, d) \ +MACRO_BEGIN \ + if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \ + s = (d) ? (mach_msg_descriptor_t *) \ + ((OTHER_OOL_DESCRIPTOR *)(s) + 1) : \ + (s + 1); \ + (c)--; \ + } \ +MACRO_END + +/* zone for cached ipc_kmsg_t structures */ +zone_t ipc_kmsg_zone; /* * Forward declarations @@ -110,26 +161,21 @@ void ipc_kmsg_clean( void ipc_kmsg_clean_body( ipc_kmsg_t kmsg, - mach_msg_type_number_t number); + mach_msg_type_number_t number, + mach_msg_descriptor_t *desc); void ipc_kmsg_clean_partial( ipc_kmsg_t kmsg, mach_msg_type_number_t number, + mach_msg_descriptor_t *desc, vm_offset_t paddr, vm_size_t length); -mach_msg_return_t ipc_kmsg_copyout_body( - ipc_kmsg_t kmsg, - ipc_space_t space, - vm_map_t map, - mach_msg_body_t *slist); - mach_msg_return_t ipc_kmsg_copyin_body( ipc_kmsg_t kmsg, ipc_space_t space, vm_map_t map); -void ikm_cache_init(void); /* * We keep a per-processor cache of kernel message buffers. * The cache saves the overhead/locking of using kalloc/kfree. @@ -137,35 +183,6 @@ void ikm_cache_init(void); * and it also uses less memory. Access to the cache doesn't * require locking. */ -#define IKM_STASH 16 /* # of cache entries per cpu */ -ipc_kmsg_t ipc_kmsg_cache[ NCPUS ][ IKM_STASH ]; -unsigned int ipc_kmsg_cache_avail[NCPUS]; - -/* - * Routine: ipc_kmsg_init - * Purpose: - * Initialize the kmsg system. For each CPU, we need to - * pre-stuff the kmsg cache. - */ -void -ipc_kmsg_init() -{ - unsigned int cpu, i; - - for (cpu = 0; cpu < NCPUS; ++cpu) { - for (i = 0; i < IKM_STASH; ++i) { - ipc_kmsg_t kmsg; - - kmsg = (ipc_kmsg_t) - kalloc(ikm_plus_overhead(IKM_SAVED_MSG_SIZE)); - if (kmsg == IKM_NULL) - panic("ipc_kmsg_init"); - ikm_init(kmsg, IKM_SAVED_MSG_SIZE); - ipc_kmsg_cache[cpu][i] = kmsg; - } - ipc_kmsg_cache_avail[cpu] = IKM_STASH; - } -} /* * Routine: ipc_kmsg_alloc @@ -179,31 +196,70 @@ ipc_kmsg_t ipc_kmsg_alloc( mach_msg_size_t msg_and_trailer_size) { + mach_msg_size_t max_expanded_size; ipc_kmsg_t kmsg; - if ((msg_and_trailer_size <= IKM_SAVED_MSG_SIZE)) { - unsigned int cpu, i; +#if !defined(__LP64__) + mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE; + + /* + * LP64support - + * Pad the allocation in case we need to expand the + * message descrptors for user spaces with pointers larger than + * the kernel's own. We don't know how many descriptors + * there are yet, so just assume the whole body could be + * descriptors (if there could be any at all). + * + * The expansion space is left in front of the header, + * because it is easier to pull the header and descriptors + * forward as we process them than it is to push all the + * data backwards. + */ + max_expanded_size = + (size > sizeof(mach_msg_base_t)) ? + (msg_and_trailer_size + DESC_SIZE_ADJUSTMENT * + ((size - sizeof(mach_msg_base_t)) / + (sizeof(mach_msg_ool_descriptor_t)))) + : + (msg_and_trailer_size); +#else + max_expanded_size = msg_and_trailer_size; +#endif + + /* round up for ikm_cache */ + if (max_expanded_size < IKM_SAVED_MSG_SIZE) + max_expanded_size = IKM_SAVED_MSG_SIZE; + + if (max_expanded_size == IKM_SAVED_MSG_SIZE) { + struct ikm_cache *cache; + unsigned int i; disable_preemption(); - cpu = cpu_number(); - if ((i = ipc_kmsg_cache_avail[cpu]) > 0) { + cache = &PROCESSOR_DATA(current_processor(), ikm_cache); + if ((i = cache->avail) > 0) { assert(i <= IKM_STASH); - kmsg = ipc_kmsg_cache[cpu][--i]; - ipc_kmsg_cache_avail[cpu] = i; - ikm_check_init(kmsg, IKM_SAVED_MSG_SIZE); + kmsg = cache->entries[--i]; + cache->avail = i; + ikm_check_init(kmsg, max_expanded_size); enable_preemption(); + kmsg->ikm_header = (mach_msg_header_t *) + ((vm_offset_t)(kmsg + 1) + + max_expanded_size - + msg_and_trailer_size); return (kmsg); } enable_preemption(); + kmsg = (ipc_kmsg_t)zalloc(ipc_kmsg_zone); + } else { + kmsg = (ipc_kmsg_t)kalloc(ikm_plus_overhead(max_expanded_size)); } - /* round up for ikm_cache */ - if (msg_and_trailer_size < IKM_SAVED_MSG_SIZE) - msg_and_trailer_size = IKM_SAVED_MSG_SIZE; - - kmsg = (ipc_kmsg_t)kalloc(ikm_plus_overhead(msg_and_trailer_size)); if (kmsg != IKM_NULL) { - ikm_init(kmsg, msg_and_trailer_size); + ikm_init(kmsg, max_expanded_size); + kmsg->ikm_header = (mach_msg_header_t *) + ((vm_offset_t)(kmsg + 1) + + max_expanded_size - + msg_and_trailer_size); } return(kmsg); } @@ -230,8 +286,7 @@ ipc_kmsg_free( /* * Check to see if the message is bound to the port. If so, * mark it not in use. If the port isn't already dead, then - * leave the message associated with it. Otherwise, free it - * (not to the cache). + * leave the message associated with it. Otherwise, free it. */ port = ikm_prealloc_inuse_port(kmsg); if (port != IP_NULL) { @@ -243,32 +298,28 @@ ipc_kmsg_free( return; } ip_check_unlock(port); /* May be last reference */ - goto free_it; } /* * Peek and see if it has to go back in the cache. */ - if (kmsg->ikm_size == IKM_SAVED_MSG_SIZE && - ipc_kmsg_cache_avail[cpu_number()] < IKM_STASH) { - unsigned int cpu, i; + if (kmsg->ikm_size == IKM_SAVED_MSG_SIZE) { + struct ikm_cache *cache; + unsigned int i; disable_preemption(); - cpu = cpu_number(); - - i = ipc_kmsg_cache_avail[cpu]; - if (i < IKM_STASH) { - assert(i >= 0); - ipc_kmsg_cache[cpu][i] = kmsg; - ipc_kmsg_cache_avail[cpu] = i + 1; + cache = &PROCESSOR_DATA(current_processor(), ikm_cache); + if ((i = cache->avail) < IKM_STASH) { + cache->entries[i] = kmsg; + cache->avail = i + 1; enable_preemption(); return; } enable_preemption(); + zfree(ipc_kmsg_zone, kmsg); + return; } - - free_it: - kfree((vm_offset_t) kmsg, ikm_plus_overhead(size)); + kfree(kmsg, ikm_plus_overhead(size)); } @@ -412,16 +463,16 @@ ipc_kmsg_destroy( * Conditions: * No locks held. */ - +void ipc_kmsg_destroy_dest( ipc_kmsg_t kmsg) { ipc_port_t port; - port = kmsg->ikm_header.msgh_remote_port; + port = kmsg->ikm_header->msgh_remote_port; ipc_port_release(port); - kmsg->ikm_header.msgh_remote_port = MACH_PORT_NULL; + kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; ipc_kmsg_destroy(kmsg); } @@ -437,16 +488,15 @@ ipc_kmsg_destroy_dest( void ipc_kmsg_clean_body( - ipc_kmsg_t kmsg, - mach_msg_type_number_t number) + __unused ipc_kmsg_t kmsg, + mach_msg_type_number_t number, + mach_msg_descriptor_t *saddr) { - mach_msg_descriptor_t *saddr, *eaddr; + mach_msg_descriptor_t *eaddr; if ( number == 0 ) return; - saddr = (mach_msg_descriptor_t *) - ((mach_msg_base_t *) &kmsg->ikm_header + 1); eaddr = saddr + number; for ( ; saddr < eaddr; saddr++ ) { @@ -511,7 +561,7 @@ ipc_kmsg_clean_body( assert(dsc->count != 0); - kfree((vm_offset_t) dsc->address, + kfree(dsc->address, (vm_size_t) dsc->count * sizeof(mach_port_name_t)); break; } @@ -539,17 +589,18 @@ void ipc_kmsg_clean_partial( ipc_kmsg_t kmsg, mach_msg_type_number_t number, + mach_msg_descriptor_t *desc, vm_offset_t paddr, vm_size_t length) { ipc_object_t object; - mach_msg_bits_t mbits = kmsg->ikm_header.msgh_bits; + mach_msg_bits_t mbits = kmsg->ikm_header->msgh_bits; - object = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; + object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; assert(IO_VALID(object)); ipc_object_destroy(object, MACH_MSGH_BITS_REMOTE(mbits)); - object = (ipc_object_t) kmsg->ikm_header.msgh_local_port; + object = (ipc_object_t) kmsg->ikm_header->msgh_local_port; if (IO_VALID(object)) ipc_object_destroy(object, MACH_MSGH_BITS_LOCAL(mbits)); @@ -557,7 +608,7 @@ ipc_kmsg_clean_partial( (void) vm_deallocate(ipc_kernel_copy_map, paddr, length); } - ipc_kmsg_clean_body(kmsg, number); + ipc_kmsg_clean_body(kmsg, number, desc); } /* @@ -576,20 +627,21 @@ ipc_kmsg_clean( ipc_object_t object; mach_msg_bits_t mbits; - mbits = kmsg->ikm_header.msgh_bits; - object = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; + mbits = kmsg->ikm_header->msgh_bits; + object = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; if (IO_VALID(object)) ipc_object_destroy(object, MACH_MSGH_BITS_REMOTE(mbits)); - object = (ipc_object_t) kmsg->ikm_header.msgh_local_port; + object = (ipc_object_t) kmsg->ikm_header->msgh_local_port; if (IO_VALID(object)) ipc_object_destroy(object, MACH_MSGH_BITS_LOCAL(mbits)); if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_body_t *body; - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); - ipc_kmsg_clean_body(kmsg, body->msgh_descriptor_count); + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + ipc_kmsg_clean_body(kmsg, body->msgh_descriptor_count, + (mach_msg_descriptor_t *)(body + 1)); } } @@ -630,6 +682,8 @@ ipc_kmsg_clear_prealloc( IP_CLEAR_PREALLOC(port, kmsg); } + + /* * Routine: ipc_kmsg_get * Purpose: @@ -647,16 +701,13 @@ ipc_kmsg_clear_prealloc( mach_msg_return_t ipc_kmsg_get( - mach_msg_header_t *msg, - mach_msg_size_t size, + mach_vm_address_t msg_addr, + mach_msg_size_t size, ipc_kmsg_t *kmsgp) { mach_msg_size_t msg_and_trailer_size; ipc_kmsg_t kmsg; mach_msg_max_trailer_t *trailer; - mach_port_name_t dest_name; - ipc_entry_t dest_entry; - ipc_port_t dest_port; if ((size < sizeof(mach_msg_header_t)) || (size & 3)) return MACH_SEND_MSG_TOO_SMALL; @@ -668,12 +719,12 @@ ipc_kmsg_get( if (kmsg == IKM_NULL) return MACH_SEND_NO_BUFFER; - if (copyinmsg((char *) msg, (char *) &kmsg->ikm_header, size)) { + if (copyinmsg(msg_addr, (char *) kmsg->ikm_header, size)) { ipc_kmsg_free(kmsg); return MACH_SEND_INVALID_DATA; } - kmsg->ikm_header.msgh_size = size; + kmsg->ikm_header->msgh_size = size; /* * I reserve for the trailer the largest space (MAX_TRAILER_SIZE) @@ -681,16 +732,16 @@ ipc_kmsg_get( * is initialized to the minimum (sizeof(mach_msg_trailer_t)), to optimize * the cases where no implicit data is requested. */ - trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)&kmsg->ikm_header + size); - trailer->msgh_sender = current_act()->task->sec_token; - trailer->msgh_audit = current_act()->task->audit_token; + trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + size); + trailer->msgh_sender = current_thread()->task->sec_token; + trailer->msgh_audit = current_thread()->task->audit_token; trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; #ifdef ppc - if(trcWork.traceMask) dbgTrace((unsigned int)kmsg->ikm_header.msgh_id, - (unsigned int)kmsg->ikm_header.msgh_remote_port, - (unsigned int)kmsg->ikm_header.msgh_local_port, 0); + if(trcWork.traceMask) dbgTrace((unsigned int)kmsg->ikm_header->msgh_id, + (unsigned int)kmsg->ikm_header->msgh_remote_port, + (unsigned int)kmsg->ikm_header->msgh_local_port, 0); #endif *kmsgp = kmsg; return MACH_MSG_SUCCESS; @@ -699,7 +750,9 @@ ipc_kmsg_get( /* * Routine: ipc_kmsg_get_from_kernel * Purpose: - * Allocates a kernel message buffer. + * First checks for a preallocated message + * reserved for kernel clients. If not found - + * allocates a new kernel message buffer. * Copies a kernel message to the message buffer. * Only resource errors are allowed. * Conditions: @@ -713,7 +766,7 @@ ipc_kmsg_get( mach_msg_return_t ipc_kmsg_get_from_kernel( mach_msg_header_t *msg, - mach_msg_size_t size, + mach_msg_size_t size, ipc_kmsg_t *kmsgp) { ipc_kmsg_t kmsg; @@ -758,9 +811,9 @@ ipc_kmsg_get_from_kernel( return MACH_SEND_NO_BUFFER; } - (void) memcpy((void *) &kmsg->ikm_header, (const void *) msg, size); + (void) memcpy((void *) kmsg->ikm_header, (const void *) msg, size); - kmsg->ikm_header.msgh_size = size; + kmsg->ikm_header->msgh_size = size; /* * I reserve for the trailer the largest space (MAX_TRAILER_SIZE) @@ -769,7 +822,7 @@ ipc_kmsg_get_from_kernel( * optimize the cases where no implicit data is requested. */ trailer = (mach_msg_max_trailer_t *) - ((vm_offset_t)&kmsg->ikm_header + size); + ((vm_offset_t)kmsg->ikm_header + size); trailer->msgh_sender = KERNEL_SECURITY_TOKEN; trailer->msgh_audit = KERNEL_AUDIT_TOKEN; trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; @@ -800,12 +853,11 @@ mach_msg_return_t ipc_kmsg_send( ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t timeout) + mach_msg_timeout_t send_timeout) { - kern_return_t save_wait_result; - ipc_port_t port; - port = (ipc_port_t) kmsg->ikm_header.msgh_remote_port; + + port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port; assert(IP_VALID(port)); ip_lock(port); @@ -831,7 +883,7 @@ ipc_kmsg_send( if (kmsg == IKM_NULL) return MACH_MSG_SUCCESS; - port = (ipc_port_t) kmsg->ikm_header.msgh_remote_port; + port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port; assert(IP_VALID(port)); ip_lock(port); /* fall thru with reply - same options */ @@ -852,12 +904,12 @@ ipc_kmsg_send( ip_release(port); ip_check_unlock(port); - kmsg->ikm_header.msgh_remote_port = MACH_PORT_NULL; + kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; ipc_kmsg_destroy(kmsg); return MACH_MSG_SUCCESS; } - if (kmsg->ikm_header.msgh_bits & MACH_MSGH_BITS_CIRCULAR) { + if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_CIRCULAR) { ip_unlock(port); /* don't allow the creation of a circular loop */ @@ -872,7 +924,7 @@ ipc_kmsg_send( * queue. */ ip_unlock(port); - return (ipc_mqueue_send(&port->ip_messages, kmsg, option, timeout)); + return (ipc_mqueue_send(&port->ip_messages, kmsg, option, send_timeout)); } /* @@ -891,13 +943,13 @@ ipc_kmsg_send( mach_msg_return_t ipc_kmsg_put( - mach_msg_header_t *msg, + mach_vm_address_t msg_addr, ipc_kmsg_t kmsg, mach_msg_size_t size) { mach_msg_return_t mr; - if (copyoutmsg((const char *) &kmsg->ikm_header, (char *) msg, size)) + if (copyoutmsg((const char *) kmsg->ikm_header, msg_addr, size)) mr = MACH_RCV_INVALID_DATA; else mr = MACH_MSG_SUCCESS; @@ -922,7 +974,7 @@ ipc_kmsg_put_to_kernel( ipc_kmsg_t kmsg, mach_msg_size_t size) { - (void) memcpy((void *) msg, (const void *) &kmsg->ikm_header, size); + (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, size); ipc_kmsg_free(kmsg); } @@ -1010,7 +1062,8 @@ ipc_kmsg_copyin_header( } notify_port = (ipc_port_t) entry->ie_object; - } + } else + notify_port = IP_NULL; if (dest_name == reply_name) { ipc_entry_t entry; @@ -1195,7 +1248,6 @@ ipc_kmsg_copyin_header( reply_soright = IP_NULL; } else { ipc_entry_t dest_entry, reply_entry; - ipc_port_t saved_reply; /* * This is the tough case to make atomic. @@ -1339,6 +1391,8 @@ invalid_dest: * MACH_MSG_INVALID_RT_DESCRIPTOR Dealloc and RT are incompatible */ +#define DESC_COUNT_SMALL 64 + mach_msg_return_t ipc_kmsg_copyin_body( ipc_kmsg_t kmsg, @@ -1347,73 +1401,111 @@ ipc_kmsg_copyin_body( { ipc_object_t dest; mach_msg_body_t *body; - mach_msg_descriptor_t *saddr, *eaddr; - boolean_t complex; - mach_msg_return_t mr; - int i; - kern_return_t kr; + mach_msg_descriptor_t *daddr, *naddr; + mach_msg_type_number_t dsc_count; + boolean_t differs = MAP_SIZE_DIFFERS(map); + boolean_t complex = FALSE; vm_size_t space_needed = 0; + vm_size_t desc_size_space[DESC_COUNT_SMALL]; + vm_size_t *user_desc_sizes = NULL; vm_offset_t paddr = 0; - mach_msg_descriptor_t *sstart; vm_map_copy_t copy = VM_MAP_COPY_NULL; + kern_return_t kr; + mach_msg_type_number_t i; + mach_msg_return_t mr = MACH_MSG_SUCCESS; /* * Determine if the target is a kernel port. */ - dest = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; - complex = FALSE; + dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + naddr = (mach_msg_descriptor_t *) (body + 1); - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); - saddr = (mach_msg_descriptor_t *) (body + 1); - eaddr = saddr + body->msgh_descriptor_count; - - /* make sure the message does not ask for more msg descriptors - * than the message can hold. - */ - - if (eaddr <= saddr || - eaddr > (mach_msg_descriptor_t *) (&kmsg->ikm_header + - kmsg->ikm_header.msgh_size)) { - ipc_kmsg_clean_partial(kmsg,0,0,0); - return MACH_SEND_MSG_TOO_SMALL; + dsc_count = body->msgh_descriptor_count; + if (dsc_count == 0) + return MACH_MSG_SUCCESS; + + if (differs) { + user_desc_sizes = (dsc_count <= DESC_COUNT_SMALL) ? + &desc_size_space : kalloc(dsc_count * sizeof(vm_size_t)); + if (user_desc_sizes == NULL) { + ipc_kmsg_clean_partial(kmsg,0,0,0,0); + return KERN_RESOURCE_SHORTAGE; + } } - + /* * Make an initial pass to determine kernal VM space requirements for - * physical copies. + * physical copies and possible contraction of the descriptors from + * processes with pointers larger than the kernel's. */ - for (sstart = saddr; sstart < eaddr; sstart++) { + daddr = 0; + for (i = 0; i < dsc_count; i++) { + daddr = naddr; + + /* make sure the descriptor fits in the message */ + if (differs) { + switch (daddr->type.type) { + case MACH_MSG_OOL_DESCRIPTOR: + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_PORTS_DESCRIPTOR: + user_desc_sizes[i] = sizeof(OTHER_OOL_DESCRIPTOR); + break; + default: + user_desc_sizes[i] = sizeof(*daddr); + break; + } + naddr = (mach_msg_descriptor_t *) + ((vm_offset_t)daddr + user_desc_sizes[i]); + } else { + naddr = daddr + 1; + } + + if (naddr > (mach_msg_descriptor_t *) + ((vm_offset_t)kmsg->ikm_header + kmsg->ikm_header->msgh_size)) { + ipc_kmsg_clean_partial(kmsg,0,0,0,0); + mr = MACH_SEND_MSG_TOO_SMALL; + goto out; + } - if (sstart->type.type == MACH_MSG_OOL_DESCRIPTOR || - sstart->type.type == MACH_MSG_OOL_VOLATILE_DESCRIPTOR) { + switch (daddr->type.type) { + mach_msg_size_t size; - if (sstart->out_of_line.copy != MACH_MSG_PHYSICAL_COPY && - sstart->out_of_line.copy != MACH_MSG_VIRTUAL_COPY) { + case MACH_MSG_OOL_DESCRIPTOR: + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + size = (differs) ? + ((OTHER_OOL_DESCRIPTOR *)daddr)->size : + daddr->out_of_line.size; + + if (daddr->out_of_line.copy != MACH_MSG_PHYSICAL_COPY && + daddr->out_of_line.copy != MACH_MSG_VIRTUAL_COPY) { + /* + * Invalid copy option + */ + ipc_kmsg_clean_partial(kmsg,0,0,0,0); + mr = MACH_SEND_INVALID_TYPE; + goto out; + } + + if ((size >= MSG_OOL_SIZE_SMALL) && + (daddr->out_of_line.copy == MACH_MSG_PHYSICAL_COPY) && + !(daddr->out_of_line.deallocate)) { + + /* + * Out-of-line memory descriptor, accumulate kernel + * memory requirements + */ + space_needed += round_page(size); + if (space_needed > ipc_kmsg_max_vm_space) { + /* - * Invalid copy option + * Per message kernel memory limit exceeded */ - ipc_kmsg_clean_partial(kmsg,0,0,0); - return MACH_SEND_INVALID_TYPE; + ipc_kmsg_clean_partial(kmsg,0,0,0,0); + mr = MACH_MSG_VM_KERNEL; + goto out; } - - if ((sstart->out_of_line.size >= MSG_OOL_SIZE_SMALL) && - (sstart->out_of_line.copy == MACH_MSG_PHYSICAL_COPY) && - !(sstart->out_of_line.deallocate)) { - - /* - * Out-of-line memory descriptor, accumulate kernel - * memory requirements - */ - space_needed += round_page_32(sstart->out_of_line.size); - if (space_needed > ipc_kmsg_max_vm_space) { - - /* - * Per message kernel memory limit exceeded - */ - ipc_kmsg_clean_partial(kmsg,0,0,0); - return MACH_MSG_VM_KERNEL; - } - } + } } } @@ -1423,69 +1515,106 @@ ipc_kmsg_copyin_body( * space. */ if (space_needed) { - if (vm_allocate(ipc_kernel_copy_map, &paddr, space_needed, TRUE) != + if (vm_allocate(ipc_kernel_copy_map, &paddr, space_needed, VM_FLAGS_ANYWHERE) != KERN_SUCCESS) { - ipc_kmsg_clean_partial(kmsg,0,0,0); - return MACH_MSG_VM_KERNEL; + ipc_kmsg_clean_partial(kmsg,0,0,0,0); + mr = MACH_MSG_VM_KERNEL; + goto out; } } /* * handle the OOL regions and port descriptors. - * the check for complex messages was done earlier. + * We process them in reverse order starting with the last one + * scanned above. That way, we can compact them up against + * the message body (if the user-descriptor size is larger than + * the kernel representation). */ - - for (i = 0, sstart = saddr; sstart < eaddr; sstart++) { - - switch (sstart->type.type) { + naddr -= 1; + do { + + switch (daddr->type.type) { + /* port descriptors are the same size everywhere, how nice */ case MACH_MSG_PORT_DESCRIPTOR: { - mach_msg_type_name_t name; + mach_msg_type_name_t user_disp; + mach_msg_type_name_t result_disp; + mach_port_name_t name; ipc_object_t object; - mach_msg_port_descriptor_t *dsc; + volatile mach_msg_port_descriptor_t *dsc; + volatile mach_msg_port_descriptor_t *user_dsc; - dsc = &sstart->port; + user_dsc = &daddr->port; + dsc = &naddr->port; - /* this is really the type SEND, SEND_ONCE, etc. */ - name = dsc->disposition; - dsc->disposition = ipc_object_copyin_type(name); + user_disp = user_dsc->disposition; + result_disp = ipc_object_copyin_type(user_disp); - if (!MACH_PORT_VALID((mach_port_name_t)dsc->name)) { - complex = TRUE; - break; - } - kr = ipc_object_copyin(space, (mach_port_name_t)dsc->name, name, &object); - if (kr != KERN_SUCCESS) { - ipc_kmsg_clean_partial(kmsg, i, paddr, space_needed); - return MACH_SEND_INVALID_RIGHT; - } - if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && - ipc_port_check_circularity((ipc_port_t) object, - (ipc_port_t) dest)) { - kmsg->ikm_header.msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + name = (mach_port_name_t)user_dsc->name; + if (MACH_PORT_VALID(name)) { + + kr = ipc_object_copyin(space, name, user_disp, &object); + if (kr != KERN_SUCCESS) { + mr = MACH_SEND_INVALID_RIGHT; + break; + } + + if ((result_disp == MACH_MSG_TYPE_PORT_RECEIVE) && + ipc_port_check_circularity((ipc_port_t) object, + (ipc_port_t) dest)) { + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + } + dsc->name = (ipc_port_t) object; + } else { + dsc->name = (mach_port_t)name; } - dsc->name = (ipc_port_t) object; + dsc->disposition = result_disp; + dsc->type = MACH_MSG_PORT_DESCRIPTOR; complex = TRUE; break; } + + /* out of line descriptors differ in size between 32 and 64 bit processes */ case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: case MACH_MSG_OOL_DESCRIPTOR: { vm_size_t length; boolean_t dealloc; - vm_offset_t addr; - vm_offset_t kaddr; - mach_msg_ool_descriptor_t *dsc; - - dsc = &sstart->out_of_line; - dealloc = dsc->deallocate; - addr = (vm_offset_t) dsc->address; - - length = dsc->size; + mach_msg_copy_options_t copy_options; + mach_vm_offset_t addr; + mach_msg_descriptor_type_t dsc_type; + + volatile mach_msg_ool_descriptor_t *dsc; + + if (differs) { + volatile OTHER_OOL_DESCRIPTOR *user_dsc; + + user_dsc = (OTHER_OOL_DESCRIPTOR *)&daddr->out_of_line; + addr = (mach_vm_offset_t) user_dsc->address; + length = user_dsc->size; + dealloc = user_dsc->deallocate; + copy_options = user_dsc->copy; + dsc_type = user_dsc->type; + } else { + volatile mach_msg_ool_descriptor_t *user_dsc; + user_dsc = &daddr->out_of_line; + addr = CAST_USER_ADDR_T(user_dsc->address); + dealloc = user_dsc->deallocate; + copy_options = user_dsc->copy; + dsc_type = user_dsc->type; + length = user_dsc->size; + } + + dsc = &naddr->out_of_line; + dsc->size = length; + dsc->deallocate = dealloc; + dsc->copy = copy_options; + dsc->type = dsc_type; + if (length == 0) { dsc->address = 0; } else if ((length >= MSG_OOL_SIZE_SMALL) && - (dsc->copy == MACH_MSG_PHYSICAL_COPY) && !dealloc) { + (copy_options == MACH_MSG_PHYSICAL_COPY) && !dealloc) { /* * If the request is a physical copy and the source @@ -1497,11 +1626,9 @@ ipc_kmsg_copyin_body( * is not being deallocated, we must be prepared * to page if the region is sufficiently large. */ - if (copyin((const char *) addr, (char *) paddr, - length)) { - ipc_kmsg_clean_partial(kmsg, i, paddr, - space_needed); - return MACH_SEND_INVALID_MEMORY; + if (copyin(addr, (char *) paddr, length)) { + mr = MACH_SEND_INVALID_MEMORY; + break; } /* @@ -1511,17 +1638,16 @@ ipc_kmsg_copyin_body( */ if (!page_aligned(length)) { (void) memset((void *) (paddr + length), 0, - round_page_32(length) - length); + round_page(length) - length); } - if (vm_map_copyin(ipc_kernel_copy_map, paddr, length, - TRUE, ©) != KERN_SUCCESS) { - ipc_kmsg_clean_partial(kmsg, i, paddr, - space_needed); - return MACH_MSG_VM_KERNEL; + if (vm_map_copyin(ipc_kernel_copy_map, (vm_map_address_t)paddr, + (vm_map_size_t)length, TRUE, ©) != KERN_SUCCESS) { + mr = MACH_MSG_VM_KERNEL; + break; } dsc->address = (void *) copy; - paddr += round_page_32(length); - space_needed -= round_page_32(length); + paddr += round_page(length); + space_needed -= round_page(length); } else { /* @@ -1532,12 +1658,13 @@ ipc_kmsg_copyin_body( * NOTE: A virtual copy is OK if the original is being * deallocted, even if a physical copy was requested. */ - kr = vm_map_copyin(map, addr, length, dealloc, ©); + kr = vm_map_copyin(map, addr, + (vm_map_size_t)length, dealloc, ©); if (kr != KERN_SUCCESS) { - ipc_kmsg_clean_partial(kmsg,i,paddr,space_needed); - return (kr == KERN_RESOURCE_SHORTAGE) ? + mr = (kr == KERN_RESOURCE_SHORTAGE) ? MACH_MSG_VM_KERNEL : MACH_SEND_INVALID_MEMORY; + break; } dsc->address = (void *) copy; } @@ -1546,18 +1673,46 @@ ipc_kmsg_copyin_body( } case MACH_MSG_OOL_PORTS_DESCRIPTOR: { vm_size_t length; - vm_offset_t data; - vm_offset_t addr; + void *data; ipc_object_t *objects; - int j; - mach_msg_type_name_t name; - mach_msg_ool_ports_descriptor_t *dsc; - - dsc = &sstart->ool_ports; - addr = (vm_offset_t) dsc->address; + unsigned int j; + mach_vm_offset_t addr; + mach_msg_type_name_t user_disp; + mach_msg_type_name_t result_disp; + mach_msg_type_number_t count; + mach_msg_copy_options_t copy_option; + boolean_t deallocate; + + volatile mach_msg_ool_ports_descriptor_t *dsc; + + if (differs) { + volatile OTHER_OOL_PORTS_DESCRIPTOR *user_dsc; + + user_dsc = (OTHER_OOL_PORTS_DESCRIPTOR *)&daddr->ool_ports; + addr = (mach_vm_offset_t)user_dsc->address; + count = user_dsc->count; + deallocate = user_dsc->deallocate; + copy_option = user_dsc->copy; + user_disp = user_dsc->disposition; + } else { + volatile mach_msg_ool_ports_descriptor_t *user_dsc; + + user_dsc = &daddr->ool_ports; + addr = CAST_USER_ADDR_T(user_dsc->address); + count = user_dsc->count; + deallocate = user_dsc->deallocate; + copy_option = user_dsc->copy; + user_disp = user_dsc->disposition; + } + + dsc = &naddr->ool_ports; + dsc->deallocate = deallocate; + dsc->copy = copy_option; + dsc->type = daddr->type.type; + dsc->count = count; /* calculate length of data in bytes, rounding up */ - length = dsc->count * sizeof(mach_port_name_t); + length = count * sizeof(mach_port_name_t); if (length == 0) { complex = TRUE; @@ -1567,56 +1722,55 @@ ipc_kmsg_copyin_body( data = kalloc(length); - if (data == 0) { - ipc_kmsg_clean_partial(kmsg, i, paddr, space_needed); - return MACH_SEND_NO_BUFFER; + if (data == NULL) { + mr = MACH_SEND_NO_BUFFER; + break; } - if (copyinmap(map, addr, data, length)) { + if (copyinmap(map, addr, data, length) != KERN_SUCCESS) { kfree(data, length); - ipc_kmsg_clean_partial(kmsg, i, paddr, space_needed); - return MACH_SEND_INVALID_MEMORY; + mr = MACH_SEND_INVALID_MEMORY; + break; } - if (dsc->deallocate) { - (void) vm_deallocate(map, addr, length); + if (deallocate) { + (void) mach_vm_deallocate(map, addr, (mach_vm_size_t)length); } - dsc->address = (void *) data; - - /* this is really the type SEND, SEND_ONCE, etc. */ - name = dsc->disposition; - dsc->disposition = ipc_object_copyin_type(name); + dsc->address = data; + result_disp = ipc_object_copyin_type(user_disp); + dsc->disposition = result_disp; + objects = (ipc_object_t *) data; - for ( j = 0; j < dsc->count; j++) { + for ( j = 0; j < count; j++) { mach_port_name_t port = (mach_port_name_t) objects[j]; ipc_object_t object; if (!MACH_PORT_VALID(port)) continue; - kr = ipc_object_copyin(space, port, name, &object); + kr = ipc_object_copyin(space, port, user_disp, &object); if (kr != KERN_SUCCESS) { - int k; + unsigned int k; for(k = 0; k < j; k++) { object = objects[k]; if (IPC_OBJECT_VALID(object)) - ipc_object_destroy(object, dsc->disposition); + ipc_object_destroy(object, result_disp); } kfree(data, length); - ipc_kmsg_clean_partial(kmsg, i, paddr, space_needed); - return MACH_SEND_INVALID_RIGHT; + mr = MACH_SEND_INVALID_RIGHT; + break; } if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && ipc_port_check_circularity( (ipc_port_t) object, (ipc_port_t) dest)) - kmsg->ikm_header.msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; objects[j] = object; } @@ -1628,16 +1782,42 @@ ipc_kmsg_copyin_body( /* * Invalid descriptor */ - ipc_kmsg_clean_partial(kmsg, i, paddr, space_needed); - return MACH_SEND_INVALID_TYPE; + mr = MACH_SEND_INVALID_TYPE; + break; } } - i++ ; - } + + if (MACH_MSG_SUCCESS != mr) { + ipc_kmsg_clean_partial(kmsg, dsc_count - i, + naddr + 1, paddr, space_needed); + goto out; + } + + } while (--i > 0 + && + (daddr = (differs) ? (mach_msg_descriptor_t *)((vm_offset_t)(daddr) - + user_desc_sizes[i - 1]) : daddr - 1) + && + naddr--); - if (!complex) - kmsg->ikm_header.msgh_bits &= ~MACH_MSGH_BITS_COMPLEX; - return MACH_MSG_SUCCESS; + if (!complex) { + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_COMPLEX; + } + + if (differs && naddr != daddr) { + mach_msg_base_t *old_base = (mach_msg_base_t *)kmsg->ikm_header; + mach_msg_base_t *new_base = (mach_msg_base_t *)naddr - 1; + + memmove(new_base, old_base, sizeof(mach_msg_base_t)); + new_base->header.msgh_size -= (vm_offset_t)naddr - (vm_offset_t)daddr; + kmsg->ikm_header = &new_base->header; + } + + out: + if (differs && dsc_count > DESC_COUNT_SMALL) + kfree(user_desc_sizes, body->msgh_descriptor_count * sizeof(vm_size_t)); + + return mr; } @@ -1675,11 +1855,11 @@ ipc_kmsg_copyin( { mach_msg_return_t mr; - mr = ipc_kmsg_copyin_header(&kmsg->ikm_header, space, notify); + mr = ipc_kmsg_copyin_header(kmsg->ikm_header, space, notify); if (mr != MACH_MSG_SUCCESS) return mr; - if ((kmsg->ikm_header.msgh_bits & MACH_MSGH_BITS_COMPLEX) == 0) + if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) == 0) return MACH_MSG_SUCCESS; return( ipc_kmsg_copyin_body( kmsg, space, map) ); @@ -1705,11 +1885,11 @@ void ipc_kmsg_copyin_from_kernel( ipc_kmsg_t kmsg) { - mach_msg_bits_t bits = kmsg->ikm_header.msgh_bits; + mach_msg_bits_t bits = kmsg->ikm_header->msgh_bits; mach_msg_type_name_t rname = MACH_MSGH_BITS_REMOTE(bits); mach_msg_type_name_t lname = MACH_MSGH_BITS_LOCAL(bits); - ipc_object_t remote = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; - ipc_object_t local = (ipc_object_t) kmsg->ikm_header.msgh_local_port; + ipc_object_t remote = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + ipc_object_t local = (ipc_object_t) kmsg->ikm_header->msgh_local_port; /* translate the destination and reply ports */ @@ -1727,13 +1907,13 @@ ipc_kmsg_copyin_from_kernel( bits = (MACH_MSGH_BITS_COMPLEX | MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0)); - kmsg->ikm_header.msgh_bits = bits; + kmsg->ikm_header->msgh_bits = bits; } else { bits = (MACH_MSGH_BITS_OTHER(bits) | MACH_MSGH_BITS(ipc_object_copyin_type(rname), ipc_object_copyin_type(lname))); - kmsg->ikm_header.msgh_bits = bits; + kmsg->ikm_header->msgh_bits = bits; if ((bits & MACH_MSGH_BITS_COMPLEX) == 0) return; } @@ -1741,7 +1921,7 @@ ipc_kmsg_copyin_from_kernel( mach_msg_descriptor_t *saddr, *eaddr; mach_msg_body_t *body; - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); saddr = (mach_msg_descriptor_t *) (body + 1); eaddr = (mach_msg_descriptor_t *) saddr + body->msgh_descriptor_count; @@ -1776,7 +1956,7 @@ ipc_kmsg_copyin_from_kernel( if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && ipc_port_check_circularity((ipc_port_t) object, (ipc_port_t) remote)) { - kmsg->ikm_header.msgh_bits |= + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; } } @@ -1792,7 +1972,7 @@ ipc_kmsg_copyin_from_kernel( } case MACH_MSG_OOL_PORTS_DESCRIPTOR: { ipc_object_t *objects; - int j; + unsigned int j; mach_msg_type_name_t name; mach_msg_ool_ports_descriptor_t *dsc; @@ -1816,7 +1996,7 @@ ipc_kmsg_copyin_from_kernel( ipc_port_check_circularity( (ipc_port_t) object, (ipc_port_t) remote)) - kmsg->ikm_header.msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; } break; } @@ -2243,178 +2423,326 @@ ipc_kmsg_copyout_body( mach_msg_body_t *slist) { mach_msg_body_t *body; - mach_msg_descriptor_t *saddr, *eaddr; + mach_msg_descriptor_t *daddr, *naddr; + mach_msg_descriptor_t *saddr; + mach_msg_type_number_t i, dsc_count, sdsc_count; mach_msg_return_t mr = MACH_MSG_SUCCESS; kern_return_t kr; - vm_offset_t data; - mach_msg_descriptor_t *sstart, *send; + void *data; + boolean_t differs = MAP_SIZE_DIFFERS(map); - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); - saddr = (mach_msg_descriptor_t *) (body + 1); - eaddr = saddr + body->msgh_descriptor_count; + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + dsc_count = body->msgh_descriptor_count; + daddr = (mach_msg_descriptor_t *) (body + 1); /* * Do scatter list setup */ if (slist != MACH_MSG_BODY_NULL) { - sstart = (mach_msg_descriptor_t *) (slist + 1); - send = sstart + slist->msgh_descriptor_count; + saddr = (mach_msg_descriptor_t *) (slist + 1); + sdsc_count = slist->msgh_descriptor_count; } else { - sstart = MACH_MSG_DESCRIPTOR_NULL; + saddr = MACH_MSG_DESCRIPTOR_NULL; + sdsc_count = 0; } - for ( ; saddr < eaddr; saddr++ ) { - - switch (saddr->type.type) { + /* + * Compute the true size of the resulting descriptors + * after potential expansion and adjust the header + * and body location accordingly. + */ + if (differs) { + mach_msg_size_t dsc_adjust; + + naddr = daddr; + dsc_adjust = 0; + for (i = 0; i < dsc_count; i++, naddr++) + switch (naddr->type.type) { + case MACH_MSG_OOL_DESCRIPTOR: + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_PORTS_DESCRIPTOR: + dsc_adjust += DESC_SIZE_ADJUSTMENT; + break; + default: + break; + } + if (dsc_adjust) { + mach_msg_base_t *old_base = (mach_msg_base_t *)kmsg->ikm_header; + mach_msg_base_t *new_base; + + new_base = (mach_msg_base_t *)((vm_offset_t)old_base - dsc_adjust); + memmove(new_base, old_base, sizeof(mach_msg_base_t)); + kmsg->ikm_header = &new_base->header; + kmsg->ikm_header->msgh_size += dsc_adjust; + naddr = (mach_msg_descriptor_t *)(new_base + 1); + } else { + naddr = daddr; + } + } else { + naddr = daddr; + } + + /* + * Now process the descriptors + */ + for ( i = 0; i < dsc_count; i++, daddr++ ) { + switch (daddr->type.type) { case MACH_MSG_PORT_DESCRIPTOR: { - mach_msg_port_descriptor_t *dsc; + volatile mach_msg_port_descriptor_t *dsc; + volatile mach_msg_port_descriptor_t *user_dsc; + mach_port_t port; + mach_port_name_t name; + mach_msg_type_name_t disp; /* * Copyout port right carried in the message */ - dsc = &saddr->port; + dsc = &daddr->port; + user_dsc = &naddr->port; + port = dsc->name; + disp = dsc->disposition; mr |= ipc_kmsg_copyout_object(space, - (ipc_object_t) dsc->name, - dsc->disposition, - (mach_port_name_t *) &dsc->name); - + (ipc_object_t)port, + disp, + &name); + user_dsc->name = (mach_port_t)name; + user_dsc->disposition = disp; + user_dsc->type = MACH_MSG_PORT_DESCRIPTOR; + naddr++; break; } + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: case MACH_MSG_OOL_DESCRIPTOR : { - vm_offset_t rcv_addr; - vm_offset_t snd_addr; + vm_map_copy_t copy; + mach_vm_offset_t rcv_addr; mach_msg_ool_descriptor_t *dsc; - mach_msg_copy_options_t copy_option; - - SKIP_PORT_DESCRIPTORS(sstart, send); + mach_msg_copy_options_t copy_options; + mach_msg_size_t size; + mach_msg_descriptor_type_t dsc_type; - dsc = &saddr->out_of_line; - - assert(dsc->copy != MACH_MSG_KALLOC_COPY_T); + SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); - copy_option = dsc->copy; + dsc = &daddr->out_of_line; + copy = (vm_map_copy_t) dsc->address; + size = dsc->size; + copy_options = dsc->copy; + assert(copy_options != MACH_MSG_KALLOC_COPY_T); + dsc_type = dsc->type; - if ((snd_addr = (vm_offset_t) dsc->address) != 0) { - if (sstart != MACH_MSG_DESCRIPTOR_NULL && - sstart->out_of_line.copy == MACH_MSG_OVERWRITE) { + if (copy != VM_MAP_COPY_NULL) { + /* + * Check to see if there is an overwrite descriptor + * specified in the scatter list for this ool data. + * The descriptor has already been verified. + */ + if (saddr != MACH_MSG_DESCRIPTOR_NULL) { + if (differs) { + OTHER_OOL_DESCRIPTOR *scatter_dsc; + + scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; + if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { + rcv_addr = (mach_vm_offset_t) scatter_dsc->address; + copy_options = MACH_MSG_OVERWRITE; + } else { + rcv_addr = 0; + copy_options = MACH_MSG_VIRTUAL_COPY; + } + } else { + mach_msg_ool_descriptor_t *scatter_dsc; - /* - * There is an overwrite descriptor specified in the - * scatter list for this ool data. The descriptor - * has already been verified - */ - rcv_addr = (vm_offset_t) sstart->out_of_line.address; - dsc->copy = MACH_MSG_OVERWRITE; - } else { - dsc->copy = MACH_MSG_ALLOCATE; + scatter_dsc = &saddr->out_of_line; + if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { + rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); + copy_options = MACH_MSG_OVERWRITE; + } else { + rcv_addr = 0; + copy_options = MACH_MSG_VIRTUAL_COPY; + } + } + INCREMENT_SCATTER(saddr, sdsc_count, differs); } + /* * Whether the data was virtually or physically * copied we have a vm_map_copy_t for it. * If there's an overwrite region specified * overwrite it, otherwise do a virtual copy out. */ - if (dsc->copy == MACH_MSG_OVERWRITE) { + if (copy_options == MACH_MSG_OVERWRITE) { kr = vm_map_copy_overwrite(map, rcv_addr, - (vm_map_copy_t) dsc->address, TRUE); + copy, TRUE); } else { - kr = vm_map_copyout(map, &rcv_addr, - (vm_map_copy_t) dsc->address); + kr = vm_map_copyout(map, &rcv_addr, copy); } if (kr != KERN_SUCCESS) { if (kr == KERN_RESOURCE_SHORTAGE) mr |= MACH_MSG_VM_KERNEL; else mr |= MACH_MSG_VM_SPACE; - vm_map_copy_discard((vm_map_copy_t) dsc->address); - dsc->address = 0; - INCREMENT_SCATTER(sstart); - break; + vm_map_copy_discard(copy); + rcv_addr = 0; + size = 0; } - dsc->address = (void *) rcv_addr; + } else { + rcv_addr = 0; + size = 0; + } + + /* + * Now update the descriptor as the user would see it. + * This may require expanding the descriptor to the user + * visible size. There is already space allocated for + * this in what naddr points to. + */ + if (differs) { + volatile OTHER_OOL_DESCRIPTOR *user_dsc; + + user_dsc = (OTHER_OOL_DESCRIPTOR *)naddr; + user_dsc->address = rcv_addr; + user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_dsc->copy = copy_options; + user_dsc->type = dsc_type; + user_dsc->size = size; + naddr = (mach_msg_descriptor_t *)((OTHER_OOL_DESCRIPTOR *)naddr + 1); + } else { + volatile mach_msg_ool_descriptor_t *user_dsc; + + user_dsc = &naddr->out_of_line; + user_dsc->address = CAST_DOWN(void *, rcv_addr); + user_dsc->size = size; + user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_dsc->copy = copy_options; + user_dsc->type = dsc_type; + naddr++; } - INCREMENT_SCATTER(sstart); break; } + case MACH_MSG_OOL_PORTS_DESCRIPTOR : { - vm_offset_t addr; + mach_vm_offset_t rcv_addr; mach_port_name_t *objects; - mach_msg_type_number_t j; + mach_msg_type_name_t disp; + mach_msg_type_number_t count, j; vm_size_t length; - mach_msg_ool_ports_descriptor_t *dsc; - SKIP_PORT_DESCRIPTORS(sstart, send); + volatile mach_msg_ool_ports_descriptor_t *dsc; + mach_msg_copy_options_t copy_options = MACH_MSG_VIRTUAL_COPY; - dsc = &saddr->ool_ports; + SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); - length = dsc->count * sizeof(mach_port_name_t); + dsc = &daddr->ool_ports; + count = dsc->count; + disp = dsc->disposition; + length = count * sizeof(mach_port_name_t); - if (length != 0) { - if (sstart != MACH_MSG_DESCRIPTOR_NULL && - sstart->ool_ports.copy == MACH_MSG_OVERWRITE) { + if (length != 0 && dsc->address != 0) { - /* - * There is an overwrite descriptor specified in the - * scatter list for this ool data. The descriptor - * has already been verified - */ - addr = (vm_offset_t) sstart->out_of_line.address; - dsc->copy = MACH_MSG_OVERWRITE; - } - else { + /* + * Check to see if there is an overwrite descriptor + * specified in the scatter list for this ool data. + * The descriptor has already been verified. + */ + if (saddr != MACH_MSG_DESCRIPTOR_NULL) { + if (differs) { + OTHER_OOL_DESCRIPTOR *scatter_dsc; + + scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; + rcv_addr = (mach_vm_offset_t) scatter_dsc->address; + copy_options = scatter_dsc->copy; + } else { + mach_msg_ool_descriptor_t *scatter_dsc; + scatter_dsc = &saddr->out_of_line; + rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); + copy_options = scatter_dsc->copy; + } + INCREMENT_SCATTER(saddr, sdsc_count, differs); + } + + if (copy_options == MACH_MSG_VIRTUAL_COPY) { /* * Dynamically allocate the region */ int anywhere = VM_MAKE_TAG(VM_MEMORY_MACH_MSG)| VM_FLAGS_ANYWHERE; - dsc->copy = MACH_MSG_ALLOCATE; - if ((kr = vm_allocate(map, &addr, length, + if ((kr = mach_vm_allocate(map, &rcv_addr, + (mach_vm_size_t)length, anywhere)) != KERN_SUCCESS) { - ipc_kmsg_clean_body(kmsg, - body->msgh_descriptor_count); - dsc->address = 0; + ipc_kmsg_clean_body(kmsg, 1, daddr); + rcv_addr = 0; if (kr == KERN_RESOURCE_SHORTAGE){ mr |= MACH_MSG_VM_KERNEL; } else { mr |= MACH_MSG_VM_SPACE; } - INCREMENT_SCATTER(sstart); - break; - } + } } - } else { - INCREMENT_SCATTER(sstart); - break; - } + + /* + * Handle the port rights and copy out the names + * for those rights out to user-space. + */ + if (rcv_addr != 0) { + objects = (mach_port_name_t *) dsc->address ; - objects = (mach_port_name_t *) dsc->address ; - - /* copyout port rights carried in the message */ + /* copyout port rights carried in the message */ - for ( j = 0; j < dsc->count ; j++) { - ipc_object_t object = - (ipc_object_t) objects[j]; + for ( j = 0; j < count ; j++) { + ipc_object_t object = + (ipc_object_t) objects[j]; - mr |= ipc_kmsg_copyout_object(space, object, - dsc->disposition, &objects[j]); + mr |= ipc_kmsg_copyout_object(space, object, + disp, &objects[j]); + } + + /* copyout to memory allocated above */ + data = dsc->address; + if (copyoutmap(map, data, rcv_addr, length) != KERN_SUCCESS) + mr |= MACH_MSG_VM_SPACE; + kfree(data, length); + } + } else { + rcv_addr = 0; } - /* copyout to memory allocated above */ - - data = (vm_offset_t) dsc->address; - (void) copyoutmap(map, data, addr, length); - kfree(data, length); - - dsc->address = (void *) addr; - INCREMENT_SCATTER(sstart); + /* + * Now update the descriptor based on the information + * calculated above. + */ + if (differs) { + volatile OTHER_OOL_PORTS_DESCRIPTOR *user_dsc; + + user_dsc = (OTHER_OOL_PORTS_DESCRIPTOR *)naddr; + user_dsc->address = rcv_addr; + user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_dsc->copy = copy_options; + user_dsc->disposition = disp; + user_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + user_dsc->count = count; + naddr = (mach_msg_descriptor_t *)((OTHER_OOL_PORTS_DESCRIPTOR *)naddr + 1); + } else { + volatile mach_msg_ool_ports_descriptor_t *user_dsc; + + user_dsc = &naddr->ool_ports; + user_dsc->address = CAST_DOWN(void *, rcv_addr); + user_dsc->count = count; + user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_dsc->copy = copy_options; + user_dsc->disposition = disp; + user_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + naddr++; + } break; } default : { @@ -2425,6 +2753,55 @@ ipc_kmsg_copyout_body( return mr; } +/* + * Routine: ipc_kmsg_copyout_size + * Purpose: + * Compute the size of the message as copied out to the given + * map. If the destination map's pointers are a different size + * than the kernel's, we have to allow for expansion/ + * contraction of the descriptors as appropriate. + * Conditions: + * Nothing locked. + * Returns: + * size of the message as it would be received. + */ + +mach_msg_size_t +ipc_kmsg_copyout_size( + ipc_kmsg_t kmsg, + vm_map_t map) +{ + mach_msg_size_t send_size; + + send_size = kmsg->ikm_header->msgh_size; + + if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) && + MAP_SIZE_DIFFERS(map)) { + + mach_msg_body_t *body; + mach_msg_descriptor_t *saddr, *eaddr; + + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + saddr = (mach_msg_descriptor_t *) (body + 1); + eaddr = saddr + body->msgh_descriptor_count; + + for ( ; saddr < eaddr; saddr++ ) { + switch (saddr->type.type) { + + case MACH_MSG_OOL_DESCRIPTOR: + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_PORTS_DESCRIPTOR: + send_size += DESC_SIZE_ADJUSTMENT; + break; + + default: + break; + } + } + } + return send_size; +} + /* * Routine: ipc_kmsg_copyout * Purpose: @@ -2453,11 +2830,11 @@ ipc_kmsg_copyout( { mach_msg_return_t mr; - mr = ipc_kmsg_copyout_header(&kmsg->ikm_header, space, notify); + mr = ipc_kmsg_copyout_header(kmsg->ikm_header, space, notify); if (mr != MACH_MSG_SUCCESS) return mr; - if (kmsg->ikm_header.msgh_bits & MACH_MSGH_BITS_COMPLEX) { + if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) { mr = ipc_kmsg_copyout_body(kmsg, space, map, slist); if (mr != MACH_MSG_SUCCESS) @@ -2494,9 +2871,9 @@ ipc_kmsg_copyout_pseudo( vm_map_t map, mach_msg_body_t *slist) { - mach_msg_bits_t mbits = kmsg->ikm_header.msgh_bits; - ipc_object_t dest = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; - ipc_object_t reply = (ipc_object_t) kmsg->ikm_header.msgh_local_port; + mach_msg_bits_t mbits = kmsg->ikm_header->msgh_bits; + ipc_object_t dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + ipc_object_t reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits); mach_msg_type_name_t reply_type = MACH_MSGH_BITS_LOCAL(mbits); mach_port_name_t dest_name, reply_name; @@ -2507,9 +2884,9 @@ ipc_kmsg_copyout_pseudo( mr = (ipc_kmsg_copyout_object(space, dest, dest_type, &dest_name) | ipc_kmsg_copyout_object(space, reply, reply_type, &reply_name)); - kmsg->ikm_header.msgh_bits = mbits &~ MACH_MSGH_BITS_CIRCULAR; - kmsg->ikm_header.msgh_remote_port = (ipc_port_t)dest_name; - kmsg->ikm_header.msgh_local_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_bits = mbits &~ MACH_MSGH_BITS_CIRCULAR; + kmsg->ikm_header->msgh_remote_port = (ipc_port_t)dest_name; + kmsg->ikm_header->msgh_local_port = (ipc_port_t)reply_name; if (mbits & MACH_MSGH_BITS_COMPLEX) { mr |= ipc_kmsg_copyout_body(kmsg, space, map, slist); @@ -2539,9 +2916,9 @@ ipc_kmsg_copyout_dest( mach_msg_type_name_t reply_type; mach_port_name_t dest_name, reply_name; - mbits = kmsg->ikm_header.msgh_bits; - dest = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; - reply = (ipc_object_t) kmsg->ikm_header.msgh_local_port; + mbits = kmsg->ikm_header->msgh_bits; + dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; dest_type = MACH_MSGH_BITS_REMOTE(mbits); reply_type = MACH_MSGH_BITS_LOCAL(mbits); @@ -2563,18 +2940,20 @@ ipc_kmsg_copyout_dest( } else reply_name = (mach_port_name_t) reply; - kmsg->ikm_header.msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | + kmsg->ikm_header->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | MACH_MSGH_BITS(reply_type, dest_type)); - kmsg->ikm_header.msgh_local_port = (ipc_port_t)dest_name; - kmsg->ikm_header.msgh_remote_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_local_port = (ipc_port_t)dest_name; + kmsg->ikm_header->msgh_remote_port = (ipc_port_t)reply_name; if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_body_t *body; - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); - ipc_kmsg_clean_body(kmsg, body->msgh_descriptor_count); + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + ipc_kmsg_clean_body(kmsg, body->msgh_descriptor_count, + (mach_msg_descriptor_t *)(body + 1)); } } + /* * Routine: ipc_kmsg_copyin_scatter * Purpose: @@ -2600,10 +2979,10 @@ ipc_kmsg_copyout_dest( */ mach_msg_body_t * -ipc_kmsg_copyin_scatter( - mach_msg_header_t *msg, - mach_msg_size_t slist_size, - ipc_kmsg_t kmsg) +ipc_kmsg_get_scatter( + mach_vm_address_t msg_addr, + mach_msg_size_t slist_size, + ipc_kmsg_t kmsg) { mach_msg_body_t *slist; mach_msg_body_t *body; @@ -2619,18 +2998,18 @@ ipc_kmsg_copyin_scatter( if (slist == MACH_MSG_BODY_NULL) return slist; - if (copyin((char *) (msg + 1), (char *)slist, slist_size)) { - kfree((vm_offset_t)slist, slist_size); + if (copyin(msg_addr + sizeof(mach_msg_header_t), (char *)slist, slist_size)) { + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } if ((slist->msgh_descriptor_count* sizeof(mach_msg_descriptor_t) + sizeof(mach_msg_size_t)) > slist_size) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } - body = (mach_msg_body_t *) (&kmsg->ikm_header + 1); + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); gstart = (mach_msg_descriptor_t *) (body + 1); gend = gstart + body->msgh_descriptor_count; @@ -2652,7 +3031,7 @@ ipc_kmsg_copyin_scatter( * automatic size mismatch. */ if (slist->msgh_descriptor_count == 0) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } @@ -2679,23 +3058,23 @@ ipc_kmsg_copyin_scatter( g_type == MACH_MSG_OOL_VOLATILE_DESCRIPTOR) { if (sstart->type.type != MACH_MSG_OOL_DESCRIPTOR && sstart->type.type != MACH_MSG_OOL_VOLATILE_DESCRIPTOR) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } if (sstart->out_of_line.copy == MACH_MSG_OVERWRITE && gstart->out_of_line.size > sstart->out_of_line.size) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } } else { if (sstart->type.type != MACH_MSG_OOL_PORTS_DESCRIPTOR) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } if (sstart->ool_ports.copy == MACH_MSG_OVERWRITE && gstart->ool_ports.count > sstart->ool_ports.count) { - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); return MACH_MSG_BODY_NULL; } } @@ -2721,7 +3100,7 @@ ipc_kmsg_free_scatter( mach_msg_size_t slist_size) { slist_size -= sizeof(mach_msg_header_t); - kfree((vm_offset_t)slist, slist_size); + kfree(slist, slist_size); } @@ -2749,10 +3128,10 @@ ipc_kmsg_copyout_to_kernel( mach_msg_type_name_t reply_type; mach_port_name_t dest_name, reply_name; - dest = (ipc_object_t) kmsg->ikm_header.msgh_remote_port; - reply = (ipc_object_t) kmsg->ikm_header.msgh_local_port; - dest_type = MACH_MSGH_BITS_REMOTE(kmsg->ikm_header.msgh_bits); - reply_type = MACH_MSGH_BITS_LOCAL(kmsg->ikm_header.msgh_bits); + dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; + dest_type = MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits); + reply_type = MACH_MSGH_BITS_LOCAL(kmsg->ikm_header->msgh_bits); assert(IO_VALID(dest)); @@ -2768,11 +3147,11 @@ ipc_kmsg_copyout_to_kernel( reply_name = (mach_port_name_t) reply; - kmsg->ikm_header.msgh_bits = - (MACH_MSGH_BITS_OTHER(kmsg->ikm_header.msgh_bits) | + kmsg->ikm_header->msgh_bits = + (MACH_MSGH_BITS_OTHER(kmsg->ikm_header->msgh_bits) | MACH_MSGH_BITS(reply_type, dest_type)); - kmsg->ikm_header.msgh_local_port = (ipc_port_t)dest_name; - kmsg->ikm_header.msgh_remote_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_local_port = (ipc_port_t)dest_name; + kmsg->ikm_header->msgh_remote_port = (ipc_port_t)reply_name; } #include @@ -2786,25 +3165,25 @@ ipc_kmsg_copyout_to_kernel( void ipc_msg_print_untyped( mach_msg_body_t *body); -char * ipc_type_name( +const char * ipc_type_name( int type_name, boolean_t received); void ipc_print_type_name( int type_name); -char * +const char * msgh_bit_decode( mach_msg_bits_t bit); -char * +const char * mm_copy_options_string( mach_msg_copy_options_t option); void db_print_msg_uid(mach_msg_header_t *); -char * +const char * ipc_type_name( int type_name, boolean_t received) @@ -2852,7 +3231,7 @@ void ipc_print_type_name( int type_name) { - char *name = ipc_type_name(type_name, TRUE); + const char *name = ipc_type_name(type_name, TRUE); if (name) { printf("%s", name); } else { @@ -2873,10 +3252,10 @@ ipc_kmsg_print( kmsg->ikm_prev, kmsg->ikm_size); printf("\n"); - ipc_msg_print(&kmsg->ikm_header); + ipc_msg_print(kmsg->ikm_header); } -char * +const char * msgh_bit_decode( mach_msg_bits_t bit) { @@ -2896,7 +3275,7 @@ ipc_msg_print( { mach_msg_bits_t mbits; unsigned int bit, i; - char *bit_name; + const char *bit_name; int needs_comma; mbits = msgh->msgh_bits; @@ -2955,11 +3334,11 @@ ipc_msg_print( } -char * +const char * mm_copy_options_string( mach_msg_copy_options_t option) { - char *name; + const char *name; switch (option) { case MACH_MSG_PHYSICAL_COPY: diff --git a/osfmk/ipc/ipc_kmsg.h b/osfmk/ipc/ipc_kmsg.h index dce03b573..dc58188a8 100644 --- a/osfmk/ipc/ipc_kmsg.h +++ b/osfmk/ipc/ipc_kmsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,14 +60,12 @@ #ifndef _IPC_IPC_KMSG_H_ #define _IPC_IPC_KMSG_H_ -#include - #include #include +#include #include -#include #include -#include +#include #include /* @@ -83,18 +81,16 @@ */ -typedef struct ipc_kmsg { +struct ipc_kmsg { struct ipc_kmsg *ikm_next; struct ipc_kmsg *ikm_prev; ipc_port_t ikm_prealloc; /* port we were preallocated from */ mach_msg_size_t ikm_size; - mach_msg_header_t ikm_header; -} *ipc_kmsg_t; + mach_msg_header_t *ikm_header; +}; -#define IKM_NULL ((ipc_kmsg_t) 0) -#define IKM_OVERHEAD \ - (sizeof(struct ipc_kmsg) - sizeof(mach_msg_header_t)) +#define IKM_OVERHEAD (sizeof(struct ipc_kmsg)) #define ikm_plus_overhead(size) ((mach_msg_size_t)((size) + IKM_OVERHEAD)) #define ikm_less_overhead(size) ((mach_msg_size_t)((size) - IKM_OVERHEAD)) @@ -108,8 +104,9 @@ typedef struct ipc_kmsg { * The size of the kernel message buffers that will be cached. * IKM_SAVED_KMSG_SIZE includes overhead; IKM_SAVED_MSG_SIZE doesn't. */ - -#define IKM_SAVED_MSG_SIZE ikm_less_overhead(256) +extern zone_t ipc_kmsg_zone; +#define IKM_SAVED_KMSG_SIZE 256 +#define IKM_SAVED_MSG_SIZE ikm_less_overhead(IKM_SAVED_KMSG_SIZE) #define ikm_prealloc_inuse_port(kmsg) \ ((kmsg)->ikm_prealloc) @@ -226,28 +223,6 @@ MACRO_BEGIN \ } \ MACRO_END -/* scatter list macros */ - -#define SKIP_PORT_DESCRIPTORS(s, e) \ -MACRO_BEGIN \ - if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \ - while ((s) < (e)) { \ - if ((s)->type.type != MACH_MSG_PORT_DESCRIPTOR) \ - break; \ - (s)++; \ - } \ - if ((s) >= (e)) \ - (s) = MACH_MSG_DESCRIPTOR_NULL; \ - } \ -MACRO_END - -#define INCREMENT_SCATTER(s) \ -MACRO_BEGIN \ - if ((s) != MACH_MSG_DESCRIPTOR_NULL) { \ - (s)++; \ - } \ -MACRO_END - /* * extern void * ipc_kmsg_send_always(ipc_kmsg_t); @@ -259,9 +234,9 @@ MACRO_END #define ipc_kmsg_send_always(kmsg) \ MACRO_BEGIN \ - mach_msg_return_t mr; \ + mach_msg_return_t mr2; \ \ - mr = ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, \ + mr2 = ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, \ MACH_MSG_TIMEOUT_NONE); \ assert(mr == MACH_MSG_SUCCESS); \ MACRO_END @@ -276,6 +251,7 @@ MACRO_END #endif /* MACH_ASSERT */ + /* Allocate a kernel message */ extern ipc_kmsg_t ipc_kmsg_alloc( mach_msg_size_t size); @@ -288,6 +264,11 @@ extern void ipc_kmsg_free( extern void ipc_kmsg_destroy( ipc_kmsg_t kmsg); +/* destroy kernel message and a reference on the dest */ +extern void ipc_kmsg_destroy_dest( + ipc_kmsg_t kmsg); + + /* Preallocate a kernel message buffer */ extern void ipc_kmsg_set_prealloc( ipc_kmsg_t kmsg, @@ -300,7 +281,7 @@ extern void ipc_kmsg_clear_prealloc( /* Allocate a kernel message buffer and copy a user message to the buffer */ extern mach_msg_return_t ipc_kmsg_get( - mach_msg_header_t *msg, + mach_vm_address_t msg_addr, mach_msg_size_t size, ipc_kmsg_t *kmsgp); @@ -314,11 +295,11 @@ extern mach_msg_return_t ipc_kmsg_get_from_kernel( extern mach_msg_return_t ipc_kmsg_send( ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t timeout); + mach_msg_timeout_t timeout_val); /* Copy a kernel message buffer to a user message */ extern mach_msg_return_t ipc_kmsg_put( - mach_msg_header_t *msg, + mach_vm_address_t msg_addr, ipc_kmsg_t kmsg, mach_msg_size_t size); @@ -381,6 +362,11 @@ extern mach_msg_return_t ipc_kmsg_copyout_pseudo( vm_map_t map, mach_msg_body_t *slist); +/* Compute size of message as copied out to the specified space/map */ +extern mach_msg_size_t ipc_kmsg_copyout_size( + ipc_kmsg_t kmsg, + vm_map_t map); + /* Copyout the destination port in the message */ extern void ipc_kmsg_copyout_dest( ipc_kmsg_t kmsg, @@ -391,9 +377,9 @@ extern void ipc_kmsg_copyout_to_kernel( ipc_kmsg_t kmsg, ipc_space_t space); -/* copyin a scatter list and check consistency */ -extern mach_msg_body_t *ipc_kmsg_copyin_scatter( - mach_msg_header_t *msg, +/* get a scatter list and check consistency */ +extern mach_msg_body_t *ipc_kmsg_get_scatter( + mach_vm_address_t msg_addr, mach_msg_size_t slist_size, ipc_kmsg_t kmsg); @@ -402,17 +388,4 @@ extern void ipc_kmsg_free_scatter( mach_msg_body_t *slist, mach_msg_size_t slist_size); -#include -#if MACH_KDB - -/* Do a formatted dump of a kernel message */ -extern void ipc_kmsg_print( - ipc_kmsg_t kmsg); - -/* Do a formatted dump of a user message */ -extern void ipc_msg_print( - mach_msg_header_t *msgh); - -#endif /* MACH_KDB */ - #endif /* _IPC_IPC_KMSG_H_ */ diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c index 883d8756c..9150acbc0 100644 --- a/osfmk/ipc/ipc_mqueue.c +++ b/osfmk/ipc/ipc_mqueue.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,6 +65,7 @@ #include #include #include +#include /* XXX - for mach_msg_receive_continue */ #include #include #include @@ -83,6 +84,9 @@ int ipc_mqueue_rcv; /* address is event for message arrival */ #define TR_ENABLE 0 +/* forward declarations */ +void ipc_mqueue_receive_results(wait_result_t result); + /* * Routine: ipc_mqueue_init * Purpose: @@ -119,11 +123,11 @@ ipc_mqueue_init( boolean_t ipc_mqueue_member( - ipc_mqueue_t port_mqueue, - ipc_mqueue_t set_mqueue) + ipc_mqueue_t port_mqueue, + ipc_mqueue_t set_mqueue) { wait_queue_t port_waitq = &port_mqueue->imq_wait_queue; - wait_queue_t set_waitq = &set_mqueue->imq_wait_queue; + wait_queue_set_t set_waitq = &set_mqueue->imq_set_queue; return (wait_queue_member(port_waitq, set_waitq)); @@ -244,10 +248,10 @@ ipc_mqueue_add( * just move onto the next. */ if (th->ith_msize < - kmsg->ikm_header.msgh_size + + kmsg->ikm_header->msgh_size + REQUESTED_TRAILER_SIZE(th->ith_option)) { th->ith_state = MACH_RCV_TOO_LARGE; - th->ith_msize = kmsg->ikm_header.msgh_size; + th->ith_msize = kmsg->ikm_header->msgh_size; if (th->ith_option & MACH_RCV_LARGE) { /* * let him go without message @@ -265,8 +269,9 @@ ipc_mqueue_add( * This thread is going to take this message, * so give it to him. */ - ipc_mqueue_release_msgcount(port_mqueue); ipc_kmsg_rmqueue(kmsgq, kmsg); + ipc_mqueue_release_msgcount(port_mqueue); + th->ith_kmsg = kmsg; th->ith_seqno = port_mqueue->imq_seqno++; thread_unlock(th); @@ -324,7 +329,7 @@ ipc_mqueue_send( ipc_mqueue_t mqueue, ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t timeout) + mach_msg_timeout_t send_timeout) { int wresult; spl_t s; @@ -340,42 +345,41 @@ ipc_mqueue_send( if (!imq_full(mqueue) || (option & MACH_SEND_ALWAYS) || - (MACH_MSGH_BITS_REMOTE(kmsg->ikm_header.msgh_bits) == + (MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits) == MACH_MSG_TYPE_PORT_SEND_ONCE)) { mqueue->imq_msgcount++; + assert(mqueue->imq_msgcount > 0); imq_unlock(mqueue); splx(s); } else { thread_t cur_thread = current_thread(); + uint64_t deadline; /* * We have to wait for space to be granted to us. */ - if ((option & MACH_SEND_TIMEOUT) && (timeout == 0)) { + if ((option & MACH_SEND_TIMEOUT) && (send_timeout == 0)) { imq_unlock(mqueue); splx(s); return MACH_SEND_TIMED_OUT; } mqueue->imq_fullwaiters = TRUE; thread_lock(cur_thread); + if (option & MACH_SEND_TIMEOUT) + clock_interval_to_deadline(send_timeout, 1000*NSEC_PER_USEC, &deadline); + else + deadline = 0; wresult = wait_queue_assert_wait64_locked( &mqueue->imq_wait_queue, IPC_MQUEUE_FULL, - THREAD_ABORTSAFE, + THREAD_ABORTSAFE, deadline, cur_thread); thread_unlock(cur_thread); imq_unlock(mqueue); splx(s); if (wresult == THREAD_WAITING) { - if (option & MACH_SEND_TIMEOUT) { - thread_set_timer(timeout, 1000*NSEC_PER_USEC); - wresult = thread_block(THREAD_CONTINUE_NULL); - if (wresult != THREAD_TIMED_OUT) - thread_cancel_timer(); - } else { - wresult = thread_block(THREAD_CONTINUE_NULL); - } + wresult = thread_block(THREAD_CONTINUE_NULL); counter(c_ipc_mqueue_send_block++); } @@ -386,6 +390,7 @@ ipc_mqueue_send( case THREAD_AWAKENED: /* we can proceed - inherited msgcount from waker */ + assert(mqueue->imq_msgcount > 0); break; case THREAD_INTERRUPTED: @@ -408,16 +413,18 @@ ipc_mqueue_send( * found a waiter. * * Conditions: - * The message queue is locked + * The message queue is locked. + * The message corresponding to this reference is off the queue. */ void ipc_mqueue_release_msgcount( ipc_mqueue_t mqueue) { assert(imq_held(mqueue)); - assert(mqueue->imq_msgcount > 0); + assert(mqueue->imq_msgcount > 1 || ipc_kmsg_queue_empty(&mqueue->imq_messages)); mqueue->imq_msgcount--; + if (!imq_full(mqueue) && mqueue->imq_fullwaiters) { if (wait_queue_wakeup64_one_locked( &mqueue->imq_wait_queue, @@ -426,7 +433,8 @@ ipc_mqueue_release_msgcount( FALSE) != KERN_SUCCESS) { mqueue->imq_fullwaiters = FALSE; } else { - mqueue->imq_msgcount++; /* gave it away */ + /* gave away our slot - add reference back */ + mqueue->imq_msgcount++; } } } @@ -483,9 +491,9 @@ ipc_mqueue_post( * the thread we wake up will get that as its status. */ if (receiver->ith_msize < - (kmsg->ikm_header.msgh_size) + + (kmsg->ikm_header->msgh_size) + REQUESTED_TRAILER_SIZE(receiver->ith_option)) { - receiver->ith_msize = kmsg->ikm_header.msgh_size; + receiver->ith_msize = kmsg->ikm_header->msgh_size; receiver->ith_state = MACH_RCV_TOO_LARGE; } else { receiver->ith_state = MACH_MSG_SUCCESS; @@ -526,12 +534,11 @@ ipc_mqueue_post( } -kern_return_t -ipc_mqueue_receive_results(void) +/* static */ void +ipc_mqueue_receive_results(wait_result_t saved_wait_result) { thread_t self = current_thread(); mach_msg_option_t option = self->ith_option; - kern_return_t saved_wait_result = self->wait_result; kern_return_t mr; /* @@ -543,15 +550,11 @@ ipc_mqueue_receive_results(void) return; case THREAD_INTERRUPTED: - if (option & MACH_RCV_TIMEOUT) - thread_cancel_timer(); self->ith_state = MACH_RCV_INTERRUPTED; return; case THREAD_RESTART: /* something bad happened to the port/set */ - if (option & MACH_RCV_TIMEOUT) - thread_cancel_timer(); self->ith_state = MACH_RCV_PORT_CHANGED; return; @@ -560,9 +563,6 @@ ipc_mqueue_receive_results(void) * We do not need to go select a message, somebody * handed us one (or a too-large indication). */ - if (option & MACH_RCV_TIMEOUT) - thread_cancel_timer(); - mr = MACH_MSG_SUCCESS; switch (self->ith_state) { @@ -592,9 +592,11 @@ ipc_mqueue_receive_results(void) } void -ipc_mqueue_receive_continue(void) +ipc_mqueue_receive_continue( + __unused void *param, + wait_result_t wresult) { - ipc_mqueue_receive_results(); + ipc_mqueue_receive_results(wresult); mach_msg_receive_continue(); /* hard-coded for now */ } @@ -628,20 +630,17 @@ ipc_mqueue_receive_continue(void) void ipc_mqueue_receive( - ipc_mqueue_t mqueue, - mach_msg_option_t option, - mach_msg_size_t max_size, - mach_msg_timeout_t timeout, - int interruptible) + ipc_mqueue_t mqueue, + mach_msg_option_t option, + mach_msg_size_t max_size, + mach_msg_timeout_t rcv_timeout, + int interruptible) { - ipc_port_t port; - mach_msg_return_t mr, mr2; - ipc_kmsg_queue_t kmsgs; - wait_result_t wresult; - thread_t self; - ipc_kmsg_t *kmsgp; - mach_port_seqno_t *seqnop; - spl_t s; + ipc_kmsg_queue_t kmsgs; + wait_result_t wresult; + thread_t self; + uint64_t deadline; + spl_t s; s = splsched(); imq_lock(mqueue); @@ -725,7 +724,7 @@ ipc_mqueue_receive( */ self = current_thread(); if (option & MACH_RCV_TIMEOUT) { - if (timeout == 0) { + if (rcv_timeout == 0) { imq_unlock(mqueue); splx(s); self->ith_state = MACH_RCV_TIMED_OUT; @@ -738,30 +737,31 @@ ipc_mqueue_receive( self->ith_option = option; self->ith_msize = max_size; + if (option & MACH_RCV_TIMEOUT) + clock_interval_to_deadline(rcv_timeout, 1000*NSEC_PER_USEC, &deadline); + else + deadline = 0; + wresult = wait_queue_assert_wait64_locked(&mqueue->imq_wait_queue, - IPC_MQUEUE_RECEIVE, - interruptible, - self); + IPC_MQUEUE_RECEIVE, + interruptible, deadline, + self); thread_unlock(self); imq_unlock(mqueue); splx(s); if (wresult == THREAD_WAITING) { - if (option & MACH_RCV_TIMEOUT) - thread_set_timer(timeout, 1000*NSEC_PER_USEC); - - if (interruptible == THREAD_ABORTSAFE) - counter(c_ipc_mqueue_receive_block_user++); - else - counter(c_ipc_mqueue_receive_block_kernel++); + counter((interruptible == THREAD_ABORTSAFE) ? + c_ipc_mqueue_receive_block_user++ : + c_ipc_mqueue_receive_block_kernel++); if (self->ith_continuation) thread_block(ipc_mqueue_receive_continue); /* NOTREACHED */ - thread_block(THREAD_CONTINUE_NULL); + wresult = thread_block(THREAD_CONTINUE_NULL); } - ipc_mqueue_receive_results(); + ipc_mqueue_receive_results(wresult); } @@ -786,8 +786,8 @@ ipc_mqueue_select( { thread_t self = current_thread(); ipc_kmsg_t kmsg; - mach_port_seqno_t seqno; mach_msg_return_t mr; + mach_msg_size_t rcv_size; mr = MACH_MSG_SUCCESS; @@ -797,26 +797,24 @@ ipc_mqueue_select( * before pulling the message off the queue. */ kmsg = ipc_kmsg_queue_first(&mqueue->imq_messages); - assert(kmsg != IKM_NULL); - if (kmsg->ikm_header.msgh_size + - REQUESTED_TRAILER_SIZE(option) > max_size) { - mr = MACH_RCV_TOO_LARGE; - } - /* * If we really can't receive it, but we had the * MACH_RCV_LARGE option set, then don't take it off * the queue, instead return the appropriate error * (and size needed). */ - if ((mr == MACH_RCV_TOO_LARGE) && (option & MACH_RCV_LARGE)) { - self->ith_kmsg = IKM_NULL; - self->ith_msize = kmsg->ikm_header.msgh_size; - self->ith_seqno = 0; - self->ith_state = mr; - return; + rcv_size = ipc_kmsg_copyout_size(kmsg, self->map); + if (rcv_size + REQUESTED_TRAILER_SIZE(option) > max_size) { + mr = MACH_RCV_TOO_LARGE; + if (option & MACH_RCV_LARGE) { + self->ith_kmsg = IKM_NULL; + self->ith_msize = rcv_size; + self->ith_seqno = 0; + self->ith_state = mr; + return; + } } ipc_kmsg_rmqueue_first_macro(&mqueue->imq_messages, kmsg); @@ -890,6 +888,8 @@ ipc_mqueue_set_qlimit( { spl_t s; + assert(qlimit <= MACH_PORT_QLIMIT_MAX); + /* wake up senders allowed by the new qlimit */ s = splsched(); imq_lock(mqueue); @@ -908,6 +908,7 @@ ipc_mqueue_set_qlimit( mqueue->imq_fullwaiters = FALSE; break; } + mqueue->imq_msgcount++; /* give it to the awakened thread */ } } mqueue->imq_qlimit = qlimit; @@ -981,7 +982,6 @@ ipc_mqueue_copyin( if (entry->ie_bits & MACH_PORT_TYPE_RECEIVE) { ipc_port_t port; - ipc_pset_t pset; port = (ipc_port_t) object; assert(port != IP_NULL); diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h index 209414666..2670f5c5c 100644 --- a/osfmk/ipc/ipc_mqueue.h +++ b/osfmk/ipc/ipc_mqueue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,6 +66,7 @@ #include #include +#include #include #include @@ -118,19 +119,46 @@ extern int ipc_mqueue_rcv; /* Initialize a newly-allocated message queue */ extern void ipc_mqueue_init( - ipc_mqueue_t mqueue, - boolean_t is_set); + ipc_mqueue_t mqueue, + boolean_t is_set); + +/* destroy an mqueue */ +extern void ipc_mqueue_destroy( + ipc_mqueue_t mqueue); /* Wake up receivers waiting in a message queue */ extern void ipc_mqueue_changed( ipc_mqueue_t mqueue); +/* Add the specific mqueue as a member of the set */ +extern kern_return_t ipc_mqueue_add( + ipc_mqueue_t mqueue, + ipc_mqueue_t set_mqueue); + +/* Check to see if mqueue is member of set_mqueue */ +extern boolean_t ipc_mqueue_member( + ipc_mqueue_t mqueue, + ipc_mqueue_t set_mqueue); + +/* Remove an mqueue from a specific set */ +extern kern_return_t ipc_mqueue_remove( + ipc_mqueue_t mqueue, + ipc_mqueue_t set_mqueue); + +/* Remove an mqueue from all sets */ +extern void ipc_mqueue_remove_from_all( + ipc_mqueue_t mqueue); + +/* Remove all the members of the specifiied set */ +extern void ipc_mqueue_remove_all( + ipc_mqueue_t mqueue); + /* Send a message to a port */ extern mach_msg_return_t ipc_mqueue_send( ipc_mqueue_t mqueue, ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t timeout); + mach_msg_timeout_t timeout_val); /* Deliver message to message queue or waiting receiver */ extern void ipc_mqueue_post( @@ -142,11 +170,13 @@ extern void ipc_mqueue_receive( ipc_mqueue_t mqueue, mach_msg_option_t option, mach_msg_size_t max_size, - mach_msg_timeout_t timeout, + mach_msg_timeout_t timeout_val, int interruptible); /* Continuation routine for message receive */ -extern void ipc_mqueue_receive_continue(void); +extern void ipc_mqueue_receive_continue( + void *param, + wait_result_t wresult); /* Select a message from a queue and try to post it to ourself */ extern void ipc_mqueue_select( diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c index 63abafcd2..0874e7a31 100644 --- a/osfmk/ipc/ipc_object.c +++ b/osfmk/ipc/ipc_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,11 +59,16 @@ #include +#include #include #include #include #include + +#include #include + +#include #include #include #include @@ -231,9 +236,6 @@ ipc_object_alloc_dead( ipc_entry_t entry; kern_return_t kr; - int i; - - kr = ipc_entry_alloc(space, namep, &entry); if (kr != KERN_SUCCESS) return kr; @@ -269,9 +271,6 @@ ipc_object_alloc_dead_name( ipc_entry_t entry; kern_return_t kr; - int i; - - kr = ipc_entry_alloc_name(space, name, &entry); if (kr != KERN_SUCCESS) return kr; @@ -485,8 +484,6 @@ ipc_object_copyin( ipc_port_t soright; kern_return_t kr; - int i; - /* * Could first try a read lock when doing * MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND, @@ -595,8 +592,7 @@ ipc_object_copyin_from_kernel( case MACH_MSG_TYPE_MOVE_SEND: { /* move naked send right into the message */ - ipc_port_t port = (ipc_port_t) object; - assert(port->ip_srights); + assert(((ipc_port_t)object)->ip_srights); break; } @@ -615,8 +611,7 @@ ipc_object_copyin_from_kernel( case MACH_MSG_TYPE_MOVE_SEND_ONCE: { /* move naked send-once right into the message */ - ipc_port_t port = (ipc_port_t) object; - assert(port->ip_sorights); + assert(((ipc_port_t)object)->ip_sorights); break; } @@ -781,8 +776,6 @@ ipc_object_copyout_name( ipc_entry_t entry; kern_return_t kr; - int i; - assert(IO_VALID(object)); assert(io_otype(object) == IOT_PORT); @@ -925,6 +918,7 @@ ipc_object_copyout_dest( default: panic("ipc_object_copyout_dest: strange rights"); + name = MACH_PORT_DEAD; } *namep = name; @@ -953,8 +947,6 @@ ipc_object_rename( ipc_entry_t oentry, nentry; kern_return_t kr; - int i; - kr = ipc_entry_alloc_name(space, nname, &nentry); if (kr != KERN_SUCCESS) return kr; @@ -998,7 +990,7 @@ io_free( ipc_port_track_dealloc(port); #endif /* MACH_ASSERT */ } - zfree(ipc_object_zones[otype], (vm_offset_t) object); + zfree(ipc_object_zones[otype], object); } #endif /* MACH_ASSERT */ @@ -1006,6 +998,7 @@ io_free( #if MACH_KDB #include +#include #define printf kdbprintf @@ -1015,7 +1008,7 @@ io_free( * Pretty-print an object for kdb. */ -char *ikot_print_array[IKOT_MAX_TYPE] = { +const char *ikot_print_array[IKOT_MAX_TYPE] = { "(NONE) ", "(THREAD) ", "(TASK) ", diff --git a/osfmk/ipc/ipc_object.h b/osfmk/ipc/ipc_object.h index 62a11978a..1eb392c29 100644 --- a/osfmk/ipc/ipc_object.h +++ b/osfmk/ipc/ipc_object.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,7 +61,6 @@ #define _IPC_IPC_OBJECT_H_ #include -#include #include #include @@ -91,12 +90,8 @@ typedef natural_t ipc_object_type_t; struct ipc_object { ipc_object_refs_t io_references; ipc_object_bits_t io_bits; - port_name_t io_receiver_name; -#if NCPUS == 1 - usimple_lock_data_t io_lock_data; -#else + mach_port_name_t io_receiver_name; decl_mutex_data(, io_lock_data) -#endif }; /* @@ -150,7 +145,7 @@ extern void io_free( #else /* MACH_ASSERT */ #define io_free(otype, io) \ - zfree(ipc_object_zones[(otype)], (vm_offset_t) (io)) + zfree(ipc_object_zones[(otype)], (io)) #endif /* MACH_ASSERT */ /* @@ -159,21 +154,8 @@ extern void io_free( * within any kernel data structure needing to lock an ipc_object * (ipc_port and ipc_pset). */ -#if NCPUS == 1 - -#define io_lock_init(io) \ - usimple_lock_init(&(io)-io_lock_data, ETAP_IPC_OBJECT) -#define io_lock(io) \ - usimple_lock(&(io)->io_lock_data) -#define io_lock_try(io) \ - usimple_lock_try(&(io)->io_lock_data) -#define io_unlock(io) \ - usimple_unlock(&(io)->io_lock_data) - -#else /* NCPUS == 1 */ - #define io_lock_init(io) \ - mutex_init(&(io)->io_lock_data, ETAP_IPC_OBJECT) + mutex_init(&(io)->io_lock_data, 0) #define io_lock(io) \ mutex_lock(&(io)->io_lock_data) #define io_lock_try(io) \ @@ -181,13 +163,7 @@ extern void io_free( #define io_unlock(io) \ mutex_unlock(&(io)->io_lock_data) -#endif /* NCPUS == 1 */ - -#if NCPUS > 1 #define _VOLATILE_ volatile -#else /* NCPUS > 1 */ -#define _VOLATILE_ -#endif /* NCPUS > 1 */ #define io_check_unlock(io) \ MACRO_BEGIN \ diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index 2183bee44..8c5d894b5 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -185,8 +185,8 @@ ipc_port_dnrequest( kern_return_t ipc_port_dngrow( - ipc_port_t port, - int target_size) + ipc_port_t port, + ipc_table_elems_t target_size) { ipc_table_size_t its; ipc_port_request_t otable, ntable; @@ -252,6 +252,7 @@ ipc_port_dngrow( (osize - 1) * sizeof(struct ipc_port_request)); } else { osize = 1; + oits = 0; free = 0; } @@ -294,9 +295,9 @@ ipc_port_dngrow( ipc_port_t ipc_port_dncancel( - ipc_port_t port, - mach_port_name_t name, - ipc_port_request_index_t index) + ipc_port_t port, + __assert_only mach_port_name_t name, + ipc_port_request_index_t index) { ipc_port_request_t ipr, table; ipc_port_t dnrequest; @@ -498,10 +499,6 @@ ipc_port_alloc( ipc_port_init(port, space, name); - if (task_is_classic(current_task())) { - IP_SET_CLASSIC(port); - } - *namep = name; *portp = port; @@ -541,10 +538,6 @@ ipc_port_alloc_name( ipc_port_init(port, space, name); - if (task_is_classic(current_task())) { - IP_SET_CLASSIC(port); - } - *portp = port; return KERN_SUCCESS; @@ -558,7 +551,7 @@ ipc_port_alloc_name( */ void ipc_port_dnnotify( - ipc_port_t port, + __unused ipc_port_t port, ipc_port_request_t dnrequests) { ipc_table_size_t its = dnrequests->ipr_size; @@ -567,7 +560,7 @@ ipc_port_dnnotify( for (index = 1; index < size; index++) { ipc_port_request_t ipr = &dnrequests[index]; - mach_port_name_t name = ipr->ipr_name; + mach_port_name_t name = ipr->ipr_name; ipc_port_t soright; if (name == MACH_PORT_NULL) @@ -601,7 +594,6 @@ ipc_port_destroy( { ipc_port_t pdrequest, nsrequest; ipc_mqueue_t mqueue; - ipc_kmsg_queue_t kmqueue; ipc_kmsg_t kmsg; ipc_port_request_t dnrequests; @@ -1007,11 +999,6 @@ ipc_port_release_send( mscount = port->ip_mscount; ip_unlock(port); ipc_notify_no_senders(nsrequest, mscount); - /* - * Check that there are no other locks taken, because - * [norma_]ipc_notify_no_senders routines may block. - */ - check_simple_locks(); } else ip_unlock(port); } @@ -1144,8 +1131,8 @@ ipc_port_alloc_special( void ipc_port_dealloc_special( - ipc_port_t port, - ipc_space_t space) + ipc_port_t port, + __assert_only ipc_space_t space) { ip_lock(port); assert(ip_active(port)); @@ -1169,6 +1156,8 @@ ipc_port_dealloc_special( #if MACH_ASSERT +#include + /* * Keep a list of all allocated ports. * Allocation is intercepted via ipc_port_init; @@ -1199,7 +1188,7 @@ void ipc_port_debug_init(void) { queue_init(&port_alloc_queue); - mutex_init(&port_alloc_queue_lock, ETAP_IPC_PORT_ALLOCQ); + mutex_init(&port_alloc_queue_lock, 0); } @@ -1213,7 +1202,7 @@ ipc_port_init_debug( { unsigned int i; - port->ip_thread = (unsigned long) current_thread(); + port->ip_thread = current_thread(); port->ip_timetrack = port_timestamp++; for (i = 0; i < IP_CALLSTACK_MAX; ++i) port->ip_callstack[i] = 0; @@ -1243,18 +1232,24 @@ ipc_port_init_debug( * This routine should be invoked JUST prior to * deallocating the actual memory occupied by the port. */ +#if 1 void ipc_port_track_dealloc( - ipc_port_t port) + __unused ipc_port_t port) +{ +} +#else +void +ipc_port_track_dealloc( + ipc_port_t port) { -#if 0 mutex_lock(&port_alloc_queue_lock); assert(port_count > 0); --port_count; queue_remove(&port_alloc_queue, port, ipc_port_t, ip_port_links); mutex_unlock(&port_alloc_queue_lock); -#endif } +#endif #endif /* MACH_ASSERT */ @@ -1265,29 +1260,11 @@ ipc_port_track_dealloc( #include #define printf kdbprintf -extern int db_indent; int db_port_queue_print( ipc_port_t port); -/* - * ipc_entry_print - pretty-print an ipc_entry - */ -static void ipc_entry_print(struct ipc_entry *, char *); /* forward */ - -static void ipc_entry_print(struct ipc_entry *iep, char *tag) -{ - ipc_entry_bits_t bits = iep->ie_bits; - - iprintf("%s @", tag); - printf(" 0x%x, bits=%x object=%x\n", iep, bits, iep->ie_object); - db_indent += 2; - iprintf("urefs=%x ", IE_BITS_UREFS(bits)); - printf("type=%x gen=%x\n", IE_BITS_TYPE(bits), IE_BITS_GEN(bits)); - db_indent -= 2; -} - /* * Routine: ipc_port_print * Purpose: @@ -1297,12 +1274,11 @@ int ipc_port_print_long = 0; /* set for more detail */ void ipc_port_print( - ipc_port_t port, - boolean_t have_addr, - db_expr_t count, - char *modif) + ipc_port_t port, + __unused boolean_t have_addr, + __unused db_expr_t count, + char *modif) { - extern int db_indent; db_addr_t task; int task_id; int nmsgs; @@ -1343,7 +1319,7 @@ ipc_port_print( printf("reply"); else if (port->ip_receiver == default_pager_space) printf("default_pager"); - else if (task = db_task_from_space(port->ip_receiver, &task_id)) + else if ((task = db_task_from_space(port->ip_receiver, &task_id)) != (db_addr_t)0) printf("task%d at 0x%x", task_id, task); else printf("unknown"); @@ -1445,7 +1421,7 @@ print_type_ports(type, dead) for (port = (ipc_port_t)first_element(ipc_object_zones[IOT_PORT]); port; port = (ipc_port_t)next_element(ipc_object_zones[IOT_PORT], - (vm_offset_t)port)) + port)) if (ip_kotype(port) == type && (!dead || !ip_active(port))) { if (++n % 5) @@ -1485,7 +1461,7 @@ print_ports(void) for (port = (ipc_port_t)first_element(ipc_object_zones[IOT_PORT]); port; port = (ipc_port_t)next_element(ipc_object_zones[IOT_PORT], - (vm_offset_t)port)) { + port)) { total_port_count++; if (ip_kotype(port) >= IKOT_MAX_TYPE) { port_types[IKOT_UNKNOWN].total_count++; @@ -1556,10 +1532,10 @@ print_ports(void) * */ -#define KMSG_MATCH_FIELD(kmsg) ((unsigned int) kmsg->ikm_header.msgh_id) +#define KMSG_MATCH_FIELD(kmsg) (kmsg->ikm_header->msgh_id) #define DKQP_LONG(kmsg) FALSE -char *dkqp_long_format = "(%3d) <%10d> 0x%x %10d %10d\n"; -char *dkqp_format = "(%3d) <%10d> 0x%x %10d %10d\n"; +const char *dkqp_long_format = "(%3d) <%10d> 0x%x %10d %10d\n"; +const char *dkqp_format = "(%3d) <%10d> 0x%x %10d %10d\n"; int db_kmsg_queue_print( @@ -1594,7 +1570,7 @@ db_kmsg_queue_print( if (DKQP_LONG(kmsg)) inline_total += kmsg->ikm_size; else - inline_total += kmsg->ikm_header.msgh_size; + inline_total += kmsg->ikm_header->msgh_size; } iprintf(DKQP_LONG(kmsg) ? dkqp_long_format : dkqp_format, icount, cur_id, ikmsg, inline_total, ool_total); @@ -1655,7 +1631,7 @@ typedef struct port_item { #define ITEM_MAX 400 typedef struct port_track { - char *name; + const char *name; unsigned long max; unsigned long warning; port_item items[ITEM_MAX]; @@ -1667,7 +1643,7 @@ port_track port_spaces; /* match against ipc spaces */ void port_track_init( port_track *trackp, - char *name); + const char *name); void port_item_add( port_track *trackp, unsigned long item); @@ -1682,7 +1658,7 @@ void port_callers_print( void port_track_init( port_track *trackp, - char *name) + const char *name) { port_item *i; @@ -1804,7 +1780,6 @@ db_port_walk( unsigned int ref_counts[MAX_REFS]; unsigned int inactive[MAX_REFS]; unsigned int ipc_ports = 0; - unsigned int proxies = 0, principals = 0; iprintf("Allocated port count is %d\n", port_count); no_receiver = no_match = ref_overflow = 0; @@ -1822,7 +1797,7 @@ db_port_walk( iprintf("Walking all ports.\n"); queue_iterate(&port_alloc_queue, port, ipc_port_t, ip_port_links) { - char *port_type; + const char *port_type; port_type = " IPC port"; if (ip_active(port)) diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h index 4f88af2f5..587f14654 100644 --- a/osfmk/ipc/ipc_port.h +++ b/osfmk/ipc/ipc_port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,19 +65,16 @@ #include #include +#include #include #include -#include #include -#include -#include -#include +#include + +#include #include #include -#include -#include -#include #include /* @@ -97,8 +94,6 @@ typedef unsigned int ipc_port_timestamp_t; -typedef unsigned int ipc_port_flags_t; - struct ipc_port { /* @@ -140,7 +135,7 @@ struct ipc_port { #define IP_NSPARES 10 #define IP_CALLSTACK_MAX 10 queue_chain_t ip_port_links; /* all allocated ports */ - natural_t ip_thread; /* who made me? thread context */ + thread_t ip_thread; /* who made me? thread context */ unsigned long ip_timetrack; /* give an idea of "when" created */ natural_t ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */ unsigned long ip_spares[IP_NSPARES]; /* for debugging */ @@ -197,17 +192,8 @@ MACRO_BEGIN \ (port)->ip_premsg = IKM_NULL; \ MACRO_END -#define IP_BIT_CLASSIC 0x00004000 -#define IP_CLASSIC(port) ((port)->ip_bits & IP_BIT_CLASSIC) - -#define IP_SET_CLASSIC(port) \ -MACRO_BEGIN \ - (port)->ip_bits |= IP_BIT_CLASSIC; \ -MACRO_END - -typedef ipc_table_index_t ipc_port_request_index_t; -typedef struct ipc_port_request { +struct ipc_port_request { union { struct ipc_port *port; ipc_port_request_index_t index; @@ -217,7 +203,7 @@ typedef struct ipc_port_request { mach_port_name_t name; struct ipc_table_size *size; } name; -} *ipc_port_request_t; +}; #define ipr_next notify.index #define ipr_size name.size @@ -225,8 +211,6 @@ typedef struct ipc_port_request { #define ipr_soright notify.port #define ipr_name name.name -#define IPR_NULL ((ipc_port_request_t) 0) - /* * Taking the ipc_port_multiple lock grants the privilege * to lock multiple ports at once. No ports must locked @@ -236,7 +220,7 @@ typedef struct ipc_port_request { decl_mutex_data(extern,ipc_port_multiple_lock_data) #define ipc_port_multiple_lock_init() \ - mutex_init(&ipc_port_multiple_lock_data, ETAP_IPC_PORT_MULT) + mutex_init(&ipc_port_multiple_lock_data, 0) #define ipc_port_multiple_lock() \ mutex_lock(&ipc_port_multiple_lock_data) @@ -254,7 +238,7 @@ decl_mutex_data(extern,ipc_port_timestamp_lock_data) extern ipc_port_timestamp_t ipc_port_timestamp_data; #define ipc_port_timestamp_lock_init() \ - mutex_init(&ipc_port_timestamp_lock_data, ETAP_IPC_PORT_TIME) + mutex_init(&ipc_port_timestamp_lock_data, 0) #define ipc_port_timestamp_lock() \ mutex_lock(&ipc_port_timestamp_lock_data) @@ -294,8 +278,8 @@ ipc_port_dnrequest( /* Grow a port's table of dead-name requests */ extern kern_return_t ipc_port_dngrow( - ipc_port_t port, - int target_size); + ipc_port_t port, + ipc_table_elems_t target_size); /* Cancel a dead-name request and return the send-once right */ extern ipc_port_t ipc_port_dncancel( diff --git a/osfmk/ipc/ipc_print.h b/osfmk/ipc/ipc_print.h index f35dff5fb..8b4277f1d 100644 --- a/osfmk/ipc/ipc_print.h +++ b/osfmk/ipc/ipc_print.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,75 +22,31 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:29 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:16 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.3 1995/02/23 17:31:31 alanl - * DIPC: Merge from nmk17b2 to nmk18b8. - * [95/01/03 mmp] - * - * Revision 1.1.7.3 1994/11/29 01:21:22 robert - * re-submit for failed CF backup - * [1994/11/29 01:17:55 robert] - * - * Revision 1.1.7.2 1994/11/28 23:58:36 travos - * Add MACH_KDB ifdef. - * [1994/11/28 23:53:46 travos] - * - * Revision 1.1.7.1 1994/08/04 02:22:55 mmp - * NOTE: file was moved back to b11 version for dipc2_shared. - * Update prototype for ipc_port_print. - * [1994/08/03 19:26:56 mmp] - * - * Revision 1.1.8.2 1994/09/23 02:10:26 ezf - * change marker to not FREE - * [1994/09/22 21:30:09 ezf] - * - * Revision 1.1.8.1 1994/08/07 20:46:08 bolinger - * Merge up to colo_b7. - * [1994/08/01 20:59:21 bolinger] - * - * Revision 1.1.2.2 1993/08/02 16:12:25 jeffc - * CR9523 -- New file to hold prototypes for ddb print - * functions in the ipc system. - * [1993/07/29 20:13:45 jeffc] - * - * $EndLog$ - */ -#ifndef IPC_PRINT_H -#define IPC_PRINT_H +#ifndef _IPC_PRINT_H_ +#define _IPC_PRINT_H_ + +#if MACH_KDB #include -#include + +#include +#include +#include +#include extern void ipc_pset_print( ipc_pset_t pset); -#include - -#if MACH_KDB -#include - extern void ipc_port_print( ipc_port_t port, boolean_t have_addr, db_expr_t count, char *modif); -#include - extern void ipc_kmsg_print( ipc_kmsg_t kmsg); -#include - extern void ipc_msg_print( mach_msg_header_t *msgh); @@ -99,4 +55,5 @@ extern ipc_port_t ipc_name_to_data( mach_port_name_t name); #endif /* MACH_KDB */ + #endif /* IPC_PRINT_H */ diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index 0befd406c..d2983415a 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,6 +68,8 @@ #include #include +#include +#include /* * Routine: ipc_pset_alloc * Purpose: @@ -235,8 +237,6 @@ kern_return_t ipc_pset_remove_from_all( ipc_port_t port) { - ipc_pset_t pset; - assert(ip_active(port)); if (port->ip_pset_count == 0) @@ -319,13 +319,10 @@ ipc_list_count( * Purpose: * Pretty-print a port set for kdb. */ - void ipc_pset_print( ipc_pset_t pset) { - extern int db_indent; - printf("pset 0x%x\n", pset); db_indent += 2; diff --git a/osfmk/ipc/ipc_pset.h b/osfmk/ipc/ipc_pset.h index e673bd8d8..0ee2afc84 100644 --- a/osfmk/ipc/ipc_pset.h +++ b/osfmk/ipc/ipc_pset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,27 +60,28 @@ #ifndef _IPC_IPC_PSET_H_ #define _IPC_IPC_PSET_H_ +#include #include #include -#include +#include +#include #include #include #include -typedef struct ipc_pset { +struct ipc_pset { /* * Initial sub-structure in common with all ipc_objects. */ struct ipc_object ips_object; struct ipc_mqueue ips_messages; -} *ipc_pset_t; +}; #define ips_references ips_object.io_references #define ips_local_name ips_object.io_receiver_name -#define IPS_NULL ((ipc_pset_t) IO_NULL) #define ips_active(pset) io_active(&(pset)->ips_object) #define ips_lock(pset) io_lock(&(pset)->ips_object) @@ -107,6 +108,11 @@ extern kern_return_t ipc_pset_add( ipc_pset_t pset, ipc_port_t port); +/* determine if port is a member of set */ +extern boolean_t ipc_pset_member( + ipc_pset_t pset, + ipc_port_t port); + /* Remove a port from a port set */ extern kern_return_t ipc_pset_remove( ipc_pset_t pset, diff --git a/osfmk/ipc/ipc_right.c b/osfmk/ipc/ipc_right.c index b2822ebe9..9f228a43a 100644 --- a/osfmk/ipc/ipc_right.c +++ b/osfmk/ipc/ipc_right.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -382,10 +382,10 @@ ipc_right_dnrequest( ipc_port_t ipc_right_dncancel( - ipc_space_t space, - ipc_port_t port, - mach_port_name_t name, - ipc_entry_t entry) + __unused ipc_space_t space, + ipc_port_t port, + mach_port_name_t name, + ipc_entry_t entry) { ipc_port_t dnrequest; @@ -410,9 +410,9 @@ ipc_right_dncancel( boolean_t ipc_right_inuse( - ipc_space_t space, - mach_port_name_t name, - ipc_entry_t entry) + ipc_space_t space, + __unused mach_port_name_t name, + ipc_entry_t entry) { if (IE_BITS_TYPE(entry->ie_bits) != MACH_PORT_TYPE_NONE) { is_write_unlock(space); @@ -558,7 +558,7 @@ ipc_right_clean( ipc_port_t port = (ipc_port_t) entry->ie_object; ipc_port_t dnrequest; ipc_port_t nsrequest = IP_NULL; - mach_port_mscount_t mscount; + mach_port_mscount_t mscount = 0; assert(port != IP_NULL); ip_lock(port); @@ -671,7 +671,7 @@ ipc_right_destroy( case MACH_PORT_TYPE_SEND_ONCE: { ipc_port_t port = (ipc_port_t) entry->ie_object; ipc_port_t nsrequest = IP_NULL; - mach_port_mscount_t mscount; + mach_port_mscount_t mscount = 0; ipc_port_t dnrequest; assert(port != IP_NULL); @@ -828,7 +828,7 @@ ipc_right_dealloc( ipc_port_t port; ipc_port_t dnrequest = IP_NULL; ipc_port_t nsrequest = IP_NULL; - mach_port_mscount_t mscount; + mach_port_mscount_t mscount = 0; assert(IE_BITS_UREFS(bits) > 0); @@ -881,7 +881,7 @@ ipc_right_dealloc( case MACH_PORT_TYPE_SEND_RECEIVE: { ipc_port_t port; ipc_port_t nsrequest = IP_NULL; - mach_port_mscount_t mscount; + mach_port_mscount_t mscount = 0; assert(IE_BITS_UREFS(bits) > 0); @@ -1157,7 +1157,7 @@ ipc_right_delta( ipc_port_t port; ipc_port_t dnrequest = IP_NULL; ipc_port_t nsrequest = IP_NULL; - mach_port_mscount_t mscount; + mach_port_mscount_t mscount = 0; if ((bits & MACH_PORT_TYPE_SEND) == 0) goto invalid_right; @@ -1310,10 +1310,10 @@ ipc_right_info( boolean_t ipc_right_copyin_check( - ipc_space_t space, - mach_port_name_t name, - ipc_entry_t entry, - mach_msg_type_name_t msgt_name) + __assert_only ipc_space_t space, + __unused mach_port_name_t name, + ipc_entry_t entry, + mach_msg_type_name_t msgt_name) { ipc_entry_bits_t bits; diff --git a/osfmk/ipc/ipc_space.c b/osfmk/ipc/ipc_space.c index b51b02999..c88fbe0de 100644 --- a/osfmk/ipc/ipc_space.c +++ b/osfmk/ipc/ipc_space.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -268,7 +268,6 @@ ipc_space_clean( for (tentry = ipc_splay_traverse_start(&space->is_tree); tentry != ITE_NULL; tentry = ipc_splay_traverse_next(&space->is_tree, TRUE)) { - int i; mach_port_type_t type; mach_port_name_t name = tentry->ite_name; @@ -380,3 +379,5 @@ ipc_space_destroy( */ is_release(space); } + + diff --git a/osfmk/ipc/ipc_space.h b/osfmk/ipc/ipc_space.h index 6ac534951..757ff469d 100644 --- a/osfmk/ipc/ipc_space.h +++ b/osfmk/ipc/ipc_space.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -120,7 +120,7 @@ struct ipc_space { extern zone_t ipc_space_zone; #define is_alloc() ((ipc_space_t) zalloc(ipc_space_zone)) -#define is_free(is) zfree(ipc_space_zone, (vm_offset_t) (is)) +#define is_free(is) zfree(ipc_space_zone, (is)) extern ipc_space_t ipc_space_kernel; extern ipc_space_t ipc_space_reply; @@ -133,8 +133,7 @@ extern ipc_space_t default_pager_space; #define is_fast_space(is) ((is)->is_fast) -#define is_ref_lock_init(is) mutex_init(&(is)->is_ref_lock_data, \ - ETAP_IPC_IS_REF) +#define is_ref_lock_init(is) mutex_init(&(is)->is_ref_lock_data, 0) #define ipc_space_reference_macro(is) \ MACRO_BEGIN \ @@ -157,7 +156,7 @@ MACRO_BEGIN \ is_free(is); \ MACRO_END -#define is_lock_init(is) mutex_init(&(is)->is_lock_data, ETAP_IPC_IS) +#define is_lock_init(is) mutex_init(&(is)->is_lock_data, 0) #define is_read_lock(is) mutex_lock(&(is)->is_lock_data) #define is_read_unlock(is) mutex_unlock(&(is)->is_lock_data) @@ -193,6 +192,10 @@ extern kern_return_t ipc_space_create( extern void ipc_space_destroy( ipc_space_t space); +/* Clean up the entries - but leave the space alive */ +extern void ipc_space_clean( + ipc_space_t space); + #endif /* MACH_KERNEL_PRIVATE */ #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/ipc/ipc_splay.c b/osfmk/ipc/ipc_splay.c index ca7817be2..06cc17444 100644 --- a/osfmk/ipc/ipc_splay.c +++ b/osfmk/ipc/ipc_splay.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,56 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:28 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:16 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1994/09/23 02:11:47 ezf - * change marker to not FREE - * [1994/09/22 21:30:41 ezf] - * - * Revision 1.1.2.3 1993/07/22 16:17:25 rod - * Add ANSI prototypes. CR #9523. - * [1993/07/22 13:33:20 rod] - * - * Revision 1.1.2.2 1993/06/02 23:33:40 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:11:07 jeffc] - * - * Revision 1.1 1992/09/30 02:08:11 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/10/09 16:10:41 af - * Revision 2.4.2.1 91/09/16 10:16:00 rpd - * Added MACH_PORT_SMALLEST, MACH_PORT_LARGEST definitions to reduce lint. - * [91/09/02 rpd] - * - * Revision 2.4.2.1 91/09/16 10:16:00 rpd - * Added MACH_PORT_SMALLEST, MACH_PORT_LARGEST definitions to reduce lint. - * [91/09/02 rpd] - * - * Revision 2.4 91/05/14 16:37:08 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:23:52 mrt - * Changed to new Mach copyright - * [91/02/01 15:51:43 mrt] - * - * Revision 2.2 90/06/02 14:51:49 rpd - * Created for new IPC. - * [90/03/26 21:03:46 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -478,9 +428,9 @@ ipc_splay_tree_insert( void ipc_splay_tree_delete( - ipc_splay_tree_t splay, - mach_port_name_t name, - ipc_tree_entry_t entry) + ipc_splay_tree_t splay, + mach_port_name_t name, + __assert_only ipc_tree_entry_t entry) { ipc_tree_entry_t root, saved; diff --git a/osfmk/ipc/ipc_table.c b/osfmk/ipc/ipc_table.c index e8de5a125..24452704a 100644 --- a/osfmk/ipc/ipc_table.c +++ b/osfmk/ipc/ipc_table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,72 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:28 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.2 1998/06/01 17:29:25 youngwor - * Added infrastructure for shared port space support - * - * Revision 1.1.1.1 1998/03/07 02:26:16 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.10.1 1994/09/23 02:12:16 ezf - * change marker to not FREE - * [1994/09/22 21:30:49 ezf] - * - * Revision 1.2.2.3 1993/07/22 16:17:30 rod - * Add ANSI prototypes. CR #9523. - * [1993/07/22 13:33:29 rod] - * - * Revision 1.2.2.2 1993/06/02 23:33:55 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:11:14 jeffc] - * - * Revision 1.2 1992/11/25 01:09:56 robert - * integrate changes below for norma_14 - * - * Philippe Bernadat (bernadat) at gr.osf.org - * Limit ipc table allocation chunks to 8 pages, otherwise - * the kernel might dead lock because of VM_PAGE_FREE_RESERVED - * limited to 15. [dlb@osf.org & barbou@gr.osf.org] - * [1992/11/13 19:31:46 robert] - * - * Revision 1.1 1992/09/30 02:08:13 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/10/09 16:11:08 af - * Revision 2.5.2.1 91/09/16 10:16:06 rpd - * Removed unused variables. - * [91/09/02 rpd] - * - * Revision 2.5.2.1 91/09/16 10:16:06 rpd - * Removed unused variables. - * [91/09/02 rpd] - * - * Revision 2.5 91/05/14 16:37:35 mrt - * Correcting copyright - * - * Revision 2.4 91/03/16 14:48:52 rpd - * Added ipc_table_realloc and ipc_table_reallocable. - * [91/03/04 rpd] - * - * Revision 2.3 91/02/05 17:24:15 mrt - * Changed to new Mach copyright - * [91/02/01 15:52:05 mrt] - * - * Revision 2.2 90/06/02 14:51:58 rpd - * Created for new IPC. - * [90/03/26 21:04:20 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -232,19 +166,19 @@ ipc_table_init(void) * May block. */ -vm_offset_t +void * ipc_table_alloc( vm_size_t size) { vm_offset_t table; if (size < PAGE_SIZE) - table = kalloc(size); - else - if (kmem_alloc(kalloc_map, &table, size) != KERN_SUCCESS) - table = 0; + return kalloc(size); + + if (kmem_alloc(kalloc_map, &table, size) != KERN_SUCCESS) + table = 0; - return table; + return (void *)table; } /* @@ -259,19 +193,20 @@ ipc_table_alloc( * May block. */ -vm_offset_t +void * ipc_table_realloc( vm_size_t old_size, - vm_offset_t old_table, + void * old_table, vm_size_t new_size) { vm_offset_t new_table; - if (kmem_realloc(kalloc_map, old_table, old_size, + if (kmem_realloc(kalloc_map, + (vm_offset_t) old_table, old_size, &new_table, new_size) != KERN_SUCCESS) new_table = 0; - return new_table; + return (void *)new_table; } /* @@ -286,10 +221,10 @@ ipc_table_realloc( void ipc_table_free( vm_size_t size, - vm_offset_t table) + void * table) { if (size < PAGE_SIZE) kfree(table, size); else - kmem_free(kalloc_map, table, size); + kmem_free(kalloc_map, (vm_offset_t)table, size); } diff --git a/osfmk/ipc/ipc_table.h b/osfmk/ipc/ipc_table.h index cd7dc7199..fee1cb97d 100644 --- a/osfmk/ipc/ipc_table.h +++ b/osfmk/ipc/ipc_table.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,72 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:28 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.2 1998/05/29 23:50:33 youngwor - * Added infrastructure for shared port space support - * - * Revision 1.1.1.1 1998/03/07 02:26:16 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.3 1995/01/06 19:46:05 devrcs - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup - * [1994/10/14 03:41:41 dwm] - * - * Revision 1.1.8.2 1994/09/23 02:12:26 ezf - * change marker to not FREE - * [1994/09/22 21:30:53 ezf] - * - * Revision 1.1.8.1 1994/08/18 23:11:45 widyono - * RT IPC from RT2_SHARED - * [1994/08/18 15:49:24 widyono] - * - * Revision 1.1.6.1 1994/07/29 07:33:22 widyono - * Define default target size, ITS_SIZE_NONE - * [1994/07/28 22:27:01 widyono] - * - * Revision 1.1.2.4 1993/07/22 16:17:33 rod - * Add ANSI prototypes. CR #9523. - * [1993/07/22 13:33:33 rod] - * - * Revision 1.1.2.3 1993/06/07 22:11:46 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:02:58 jeffc] - * - * Revision 1.1.2.2 1993/06/02 23:34:02 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:11:17 jeffc] - * - * Revision 1.1 1992/09/30 02:29:14 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/05/14 16:37:52 mrt - * Correcting copyright - * - * Revision 2.4 91/03/16 14:49:01 rpd - * Added ipc_table_realloc. - * [91/03/04 rpd] - * - * Revision 2.3 91/02/05 17:24:19 mrt - * Changed to new Mach copyright - * [91/02/01 15:52:19 mrt] - * - * Revision 2.2 90/06/02 14:52:02 rpd - * Created for new IPC. - * [90/03/26 21:04:35 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -127,9 +61,12 @@ #ifndef _IPC_IPC_TABLE_H_ #define _IPC_IPC_TABLE_H_ +#include #include #include +#include + /* * The is_table_next field of an ipc_space_t points to * an ipc_table_size structure. These structures must @@ -158,15 +95,9 @@ * The ipr_size field points to the currently used ipc_table_size. */ -typedef natural_t ipc_table_index_t; /* index into tables */ -typedef natural_t ipc_table_elems_t; /* size of tables */ - -typedef struct ipc_table_size { +struct ipc_table_size { ipc_table_elems_t its_size; /* number of elements in table */ -} *ipc_table_size_t; - -#define ITS_NULL ((ipc_table_size_t) 0) -#define ITS_SIZE_NONE -1 +}; extern ipc_table_size_t ipc_table_entries; extern ipc_table_size_t ipc_table_dnrequests; @@ -185,19 +116,19 @@ extern void ipc_table_init(void); */ /* Allocate a table */ -extern vm_offset_t ipc_table_alloc( +extern void * ipc_table_alloc( vm_size_t size); /* Reallocate a big table */ -extern vm_offset_t ipc_table_realloc( +extern void * ipc_table_realloc( vm_size_t old_size, - vm_offset_t old_table, + void * old_table, vm_size_t new_size); /* Free a table */ extern void ipc_table_free( vm_size_t size, - vm_offset_t table); + void * table); #define it_entries_reallocable(its) \ ((its)->its_size * sizeof(struct ipc_entry) >= PAGE_SIZE) @@ -205,23 +136,23 @@ extern void ipc_table_free( #define it_entries_alloc(its) \ ((ipc_entry_t) \ ipc_table_alloc(it_entries_reallocable(its) ? \ - round_page_32((its)->its_size * sizeof(struct ipc_entry)) : \ + round_page((its)->its_size * sizeof(struct ipc_entry)) : \ (its)->its_size * sizeof(struct ipc_entry) \ )) #define it_entries_realloc(its, table, nits) \ ((ipc_entry_t) \ ipc_table_realloc( \ - round_page_32((its)->its_size * sizeof(struct ipc_entry)), \ - (vm_offset_t)(table), \ - round_page_32((nits)->its_size * sizeof(struct ipc_entry)) \ + round_page((its)->its_size * sizeof(struct ipc_entry)), \ + (void *)(table), \ + round_page((nits)->its_size * sizeof(struct ipc_entry)) \ )) #define it_entries_free(its, table) \ ipc_table_free(it_entries_reallocable(its) ? \ - round_page_32((its)->its_size * sizeof(struct ipc_entry)) : \ + round_page((its)->its_size * sizeof(struct ipc_entry)) : \ (its)->its_size * sizeof(struct ipc_entry), \ - (vm_offset_t)(table) \ + (void *)(table) \ ) #define it_dnrequests_alloc(its) \ @@ -232,6 +163,6 @@ extern void ipc_table_free( #define it_dnrequests_free(its, table) \ ipc_table_free((its)->its_size * \ sizeof(struct ipc_port_request), \ - (vm_offset_t)(table)) + (void *)(table)) #endif /* _IPC_IPC_TABLE_H_ */ diff --git a/osfmk/ipc/ipc_types.h b/osfmk/ipc/ipc_types.h index 935d44b9e..77629a865 100644 --- a/osfmk/ipc/ipc_types.h +++ b/osfmk/ipc/ipc_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,30 +22,54 @@ /* * @OSF_COPYRIGHT@ */ + /* * Define Basic IPC types available to callers. * These are not intended to be used directly, but * are used to define other types available through * port.h and mach_types.h for in-kernel entities. */ -#ifndef _IPC_TYPES_H_ -#define _IPC_TYPES_H_ + +#ifndef _IPC_IPC_TYPES_H_ +#define _IPC_IPC_TYPES_H_ #include #include #include -#if !defined(MACH_KERNEL_PRIVATE) +#ifdef MACH_KERNEL_PRIVATE + +typedef natural_t ipc_table_index_t; /* index into tables */ +typedef natural_t ipc_table_elems_t; /* size of tables */ +typedef natural_t ipc_entry_bits_t; +typedef ipc_table_elems_t ipc_entry_num_t; /* number of entries */ +typedef ipc_table_index_t ipc_port_request_index_t; + +typedef mach_port_name_t mach_port_index_t; /* index values */ +typedef mach_port_name_t mach_port_gen_t; /* generation numbers */ + +typedef struct ipc_entry *ipc_entry_t; +typedef struct ipc_tree_entry *ipc_tree_entry_t; +typedef struct ipc_table_size *ipc_table_size_t; +typedef struct ipc_port_request *ipc_port_request_t; +typedef struct ipc_pset *ipc_pset_t; +typedef struct ipc_kmsg *ipc_kmsg_t; + +#define IE_NULL ((ipc_entry_t) 0) +#define ITE_NULL ((ipc_tree_entry_t) 0) +#define ITS_NULL ((ipc_table_size_t) 0) +#define ITS_SIZE_NONE ((ipc_table_elems_t) -1) +#define IPR_NULL ((ipc_port_request_t) 0) +#define IPS_NULL ((ipc_pset_t) 0) +#define IKM_NULL ((ipc_kmsg_t) 0) + +typedef void (*mach_msg_continue_t)(mach_msg_return_t); /* after wakeup */ + +#else /* MACH_KERNEL_PRIVATE */ -/* - * For kernel code that resides outside of mach - * we define empty structs so that everything will - * remain strongly typed, without giving out - * implementation details. - */ struct ipc_object ; -#endif /* !MACH_KERNEL_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ typedef struct ipc_object *ipc_object_t; @@ -54,6 +78,4 @@ typedef struct ipc_object *ipc_object_t; #define IPC_OBJECT_VALID(io) (((io) != IPC_OBJECT_NULL) && \ ((io) != IPC_OBJECT_DEAD)) -typedef void (*mach_msg_continue_t)(mach_msg_return_t); /* after wakeup */ - -#endif /* _IPC_TYPES_H_ */ +#endif /* _IPC_IPC_TYPES_H_ */ diff --git a/osfmk/ipc/mach_debug.c b/osfmk/ipc/mach_debug.c index ab6e47c9c..0d3699afa 100644 --- a/osfmk/ipc/mach_debug.c +++ b/osfmk/ipc/mach_debug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -71,6 +71,8 @@ #include #include #include +#include +#include #include #include #include @@ -93,15 +95,22 @@ * KERN_INVALID_RIGHT Name doesn't denote receive rights. */ +#if !MACH_IPC_DEBUG +kern_return_t +mach_port_get_srights( + __unused ipc_space_t space, + __unused mach_port_name_t name, + __unused mach_port_rights_t *srightsp) +{ + return KERN_FAILURE; +} +#else kern_return_t mach_port_get_srights( ipc_space_t space, mach_port_name_t name, mach_port_rights_t *srightsp) { -#if !MACH_IPC_DEBUG - return KERN_FAILURE; -#else ipc_port_t port; kern_return_t kr; mach_port_rights_t srights; @@ -119,8 +128,8 @@ mach_port_get_srights( *srightsp = srights; return KERN_SUCCESS; -#endif /* MACH_IPC_DEBUG */ } +#endif /* MACH_IPC_DEBUG */ /* * Routine: host_ipc_hash_info @@ -134,17 +143,24 @@ mach_port_get_srights( * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. */ +#if !MACH_IPC_DEBUG kern_return_t host_ipc_hash_info( - host_t host, - hash_info_bucket_array_t *infop, - mach_msg_type_number_t *countp) + __unused host_t host, + __unused hash_info_bucket_array_t *infop, + __unused mach_msg_type_number_t *countp) { -#if !MACH_IPC_DEBUG return KERN_FAILURE; +} #else +kern_return_t +host_ipc_hash_info( + host_t host, + hash_info_bucket_array_t *infop, + mach_msg_type_number_t *countp) +{ vm_offset_t addr; - vm_size_t size; + vm_size_t size = 0; hash_info_bucket_t *info; unsigned int potential, actual; kern_return_t kr; @@ -167,7 +183,7 @@ host_ipc_hash_info( if (info != *infop) kmem_free(ipc_kernel_map, addr, size); - size = round_page_32(actual * sizeof *info); + size = round_page(actual * sizeof *info); kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; @@ -188,13 +204,13 @@ host_ipc_hash_info( vm_map_copy_t copy; vm_size_t used; - used = round_page_32(actual * sizeof *info); + used = round_page(actual * sizeof *info); if (used != size) kmem_free(ipc_kernel_map, addr + used, size - used); - kr = vm_map_copyin(ipc_kernel_map, addr, used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)used, TRUE, ©); assert(kr == KERN_SUCCESS); *infop = (hash_info_bucket_t *) copy; @@ -202,8 +218,8 @@ host_ipc_hash_info( } return KERN_SUCCESS; -#endif /* MACH_IPC_DEBUG */ } +#endif /* MACH_IPC_DEBUG */ /* * Routine: mach_port_space_info @@ -218,18 +234,28 @@ host_ipc_hash_info( * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. */ +#if !MACH_IPC_DEBUG kern_return_t mach_port_space_info( - ipc_space_t space, - ipc_info_space_t *infop, + __unused ipc_space_t space, + __unused ipc_info_space_t *infop, + __unused ipc_info_name_array_t *tablep, + __unused mach_msg_type_number_t *tableCntp, + __unused ipc_info_tree_name_array_t *treep, + __unused mach_msg_type_number_t *treeCntp) +{ + return KERN_FAILURE; +} +#else +kern_return_t +mach_port_space_info( + ipc_space_t space, + ipc_info_space_t *infop, ipc_info_name_array_t *tablep, mach_msg_type_number_t *tableCntp, - ipc_info_tree_name_array_t *treep, + ipc_info_tree_name_array_t *treep, mach_msg_type_number_t *treeCntp) { -#if !MACH_IPC_DEBUG - return KERN_FAILURE; -#else ipc_info_name_t *table_info; unsigned int table_potential, table_actual; vm_offset_t table_addr; @@ -243,15 +269,16 @@ mach_port_space_info( ipc_entry_num_t tsize; mach_port_index_t index; kern_return_t kr; - ipc_entry_bits_t *capability; if (space == IS_NULL) return KERN_INVALID_TASK; /* start with in-line memory */ + table_size = 0; table_info = *tablep; table_potential = *tableCntp; + tree_size = 0; tree_info = *treep; tree_potential = *treeCntp; @@ -282,7 +309,7 @@ mach_port_space_info( kmem_free(ipc_kernel_map, table_addr, table_size); - table_size = round_page_32(table_actual * + table_size = round_page(table_actual * sizeof *table_info); kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size); @@ -303,7 +330,7 @@ mach_port_space_info( kmem_free(ipc_kernel_map, tree_addr, tree_size); - tree_size = round_page_32(tree_actual * + tree_size = round_page(tree_actual * sizeof *tree_info); kr = kmem_alloc(ipc_kernel_map, &tree_addr, tree_size); @@ -393,7 +420,7 @@ mach_port_space_info( /* kmem_alloc doesn't zero memory */ size_used = table_actual * sizeof *table_info; - rsize_used = round_page_32(size_used); + rsize_used = round_page(size_used); if (rsize_used != table_size) kmem_free(ipc_kernel_map, @@ -404,12 +431,12 @@ mach_port_space_info( bzero((char *) (table_addr + size_used), rsize_used - size_used); - kr = vm_map_unwire(ipc_kernel_map, table_addr, - table_addr + rsize_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(table_addr), + vm_map_round_page(table_addr + rsize_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, table_addr, rsize_used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr, + (vm_map_size_t)rsize_used, TRUE, ©); assert(kr == KERN_SUCCESS); *tablep = (ipc_info_name_t *) copy; @@ -431,7 +458,7 @@ mach_port_space_info( /* kmem_alloc doesn't zero memory */ size_used = tree_actual * sizeof *tree_info; - rsize_used = round_page_32(size_used); + rsize_used = round_page(size_used); if (rsize_used != tree_size) kmem_free(ipc_kernel_map, @@ -442,12 +469,12 @@ mach_port_space_info( bzero((char *) (tree_addr + size_used), rsize_used - size_used); - kr = vm_map_unwire(ipc_kernel_map, tree_addr, - tree_addr + rsize_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(tree_addr), + vm_map_round_page(tree_addr + rsize_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, tree_addr, rsize_used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)tree_addr, + (vm_map_size_t)rsize_used, TRUE, ©); assert(kr == KERN_SUCCESS); *treep = (ipc_info_tree_name_t *) copy; @@ -455,8 +482,8 @@ mach_port_space_info( } return KERN_SUCCESS; -#endif /* MACH_IPC_DEBUG */ } +#endif /* MACH_IPC_DEBUG */ /* * Routine: mach_port_dnrequest_info @@ -473,16 +500,24 @@ mach_port_space_info( * KERN_INVALID_RIGHT Name doesn't denote receive rights. */ +#if !MACH_IPC_DEBUG kern_return_t mach_port_dnrequest_info( - ipc_space_t space, - mach_port_name_t name, - unsigned int *totalp, - unsigned int *usedp) + __unused ipc_space_t space, + __unused mach_port_name_t name, + __unused unsigned int *totalp, + __unused unsigned int *usedp) { -#if !MACH_IPC_DEBUG return KERN_FAILURE; +} #else +kern_return_t +mach_port_dnrequest_info( + ipc_space_t space, + mach_port_name_t name, + unsigned int *totalp, + unsigned int *usedp) +{ unsigned int total, used; ipc_port_t port; kern_return_t kr; @@ -517,8 +552,8 @@ mach_port_dnrequest_info( *totalp = total; *usedp = used; return KERN_SUCCESS; -#endif /* MACH_IPC_DEBUG */ } +#endif /* MACH_IPC_DEBUG */ /* * Routine: mach_port_kernel_object [kernel call] @@ -536,16 +571,24 @@ mach_port_dnrequest_info( * send or receive rights. */ +#if !MACH_IPC_DEBUG kern_return_t mach_port_kernel_object( - ipc_space_t space, - mach_port_name_t name, - unsigned int *typep, - vm_offset_t *addrp) + __unused ipc_space_t space, + __unused mach_port_name_t name, + __unused unsigned int *typep, + __unused vm_offset_t *addrp) { -#if !MACH_IPC_DEBUG return KERN_FAILURE; +} #else +kern_return_t +mach_port_kernel_object( + ipc_space_t space, + mach_port_name_t name, + unsigned int *typep, + vm_offset_t *addrp) +{ ipc_entry_t entry; ipc_port_t port; kern_return_t kr; @@ -576,5 +619,5 @@ mach_port_kernel_object( ip_unlock(port); return KERN_SUCCESS; -#endif /* MACH_IPC_DEBUG */ } +#endif /* MACH_IPC_DEBUG */ diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index 251090f9e..f532b5230 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,22 +57,32 @@ * Exported message traps. See mach/message.h. */ -#include - +#include #include #include #include #include +#include + +#include #include #include #include +#include +#include #include #include #include #include #include #include +#include +#include +#include + #include + +#include #include #include #include @@ -81,24 +91,24 @@ #include #include #include -#include -#include -#include - -#include #include + #include +#ifndef offsetof +#define offsetof(type, member) ((size_t)(&((type *)0)->member)) +#endif /* offsetof */ + /* - * Forward declarations + * Forward declarations - kernel internal routines */ mach_msg_return_t mach_msg_send( mach_msg_header_t *msg, mach_msg_option_t option, mach_msg_size_t send_size, - mach_msg_timeout_t timeout, + mach_msg_timeout_t send_timeout, mach_port_name_t notify); mach_msg_return_t mach_msg_receive( @@ -106,14 +116,16 @@ mach_msg_return_t mach_msg_receive( mach_msg_option_t option, mach_msg_size_t rcv_size, mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, + mach_msg_timeout_t rcv_timeout, void (*continuation)(mach_msg_return_t), mach_msg_size_t slist_size); +mach_msg_return_t mach_msg_receive_results(void); + mach_msg_return_t msg_receive_error( ipc_kmsg_t kmsg, - mach_msg_header_t *msg, + mach_vm_address_t msg_addr, mach_msg_option_t option, mach_port_seqno_t seqno, ipc_space_t space); @@ -158,19 +170,42 @@ mach_msg_send( mach_msg_header_t *msg, mach_msg_option_t option, mach_msg_size_t send_size, - mach_msg_timeout_t timeout, + mach_msg_timeout_t send_timeout, mach_port_name_t notify) { ipc_space_t space = current_space(); vm_map_t map = current_map(); ipc_kmsg_t kmsg; mach_msg_return_t mr; + mach_msg_size_t msg_and_trailer_size; + mach_msg_max_trailer_t *trailer; - mr = ipc_kmsg_get(msg, send_size, &kmsg); + if ((send_size < sizeof(mach_msg_header_t)) || (send_size & 3)) + return MACH_SEND_MSG_TOO_SMALL; - if (mr != MACH_MSG_SUCCESS) - return mr; + msg_and_trailer_size = send_size + MAX_TRAILER_SIZE; + + kmsg = ipc_kmsg_alloc(msg_and_trailer_size); + + if (kmsg == IKM_NULL) + return MACH_SEND_NO_BUFFER; + + (void) memcpy((void *) kmsg->ikm_header, (const void *) msg, send_size); + kmsg->ikm_header->msgh_size = send_size; + + /* + * reserve for the trailer the largest space (MAX_TRAILER_SIZE) + * However, the internal size field of the trailer (msgh_trailer_size) + * is initialized to the minimum (sizeof(mach_msg_trailer_t)), to optimize + * the cases where no implicit data is requested. + */ + trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + send_size); + trailer->msgh_sender = current_thread()->task->sec_token; + trailer->msgh_audit = current_thread()->task->audit_token; + trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; + trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; + if (option & MACH_SEND_CANCEL) { if (notify == MACH_PORT_NULL) mr = MACH_SEND_INVALID_NOTIFY; @@ -183,11 +218,13 @@ mach_msg_send( return mr; } - mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, timeout); + mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, send_timeout); if (mr != MACH_MSG_SUCCESS) { mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL); - (void) ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size); + (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, + kmsg->ikm_header->msgh_size); + ipc_kmsg_free(kmsg); } return mr; @@ -223,11 +260,10 @@ mach_msg_receive_results(void) ipc_object_t object = self->ith_object; mach_msg_return_t mr = self->ith_state; - mach_msg_header_t *msg = self->ith_msg; + mach_vm_address_t msg_addr = self->ith_msg_addr; mach_msg_option_t option = self->ith_option; ipc_kmsg_t kmsg = self->ith_kmsg; mach_port_seqno_t seqno = self->ith_seqno; - mach_msg_size_t slist_size = self->ith_scatter_list_size; mach_msg_format_0_trailer_t *trailer; @@ -244,13 +280,13 @@ mach_msg_receive_results(void) * the queue). */ if (copyout((char *) &self->ith_msize, - (char *) &msg->msgh_size, + msg_addr + offsetof(mach_msg_header_t, msgh_size), sizeof(mach_msg_size_t))) mr = MACH_RCV_INVALID_DATA; goto out; } - if (msg_receive_error(kmsg, msg, option, seqno, space) + if (msg_receive_error(kmsg, msg_addr, option, seqno, space) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } @@ -258,8 +294,8 @@ mach_msg_receive_results(void) } trailer = (mach_msg_format_0_trailer_t *) - ((vm_offset_t)&kmsg->ikm_header + - round_msg(kmsg->ikm_header.msgh_size)); + ((vm_offset_t)kmsg->ikm_header + + round_msg(kmsg->ikm_header->msgh_size)); if (option & MACH_RCV_TRAILER_MASK) { trailer->msgh_seqno = seqno; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); @@ -275,7 +311,7 @@ mach_msg_receive_results(void) mach_msg_size_t slist_size = self->ith_scatter_list_size; mach_msg_body_t *slist; - slist = ipc_kmsg_copyin_scatter(msg, slist_size, kmsg); + slist = ipc_kmsg_get_scatter(msg_addr, slist_size, kmsg); mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL, slist); ipc_kmsg_free_scatter(slist, slist_size); } else { @@ -285,20 +321,20 @@ mach_msg_receive_results(void) if (mr != MACH_MSG_SUCCESS) { if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { - if (ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size + + if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } else { - if (msg_receive_error(kmsg, msg, option, seqno, space) + if (msg_receive_error(kmsg, msg_addr, option, seqno, space) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } goto out; } - mr = ipc_kmsg_put(msg, + mr = ipc_kmsg_put(msg_addr, kmsg, - kmsg->ikm_header.msgh_size + + kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size); out: return mr; @@ -310,20 +346,15 @@ mach_msg_receive( mach_msg_option_t option, mach_msg_size_t rcv_size, mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, + mach_msg_timeout_t rcv_timeout, void (*continuation)(mach_msg_return_t), mach_msg_size_t slist_size) { thread_t self = current_thread(); ipc_space_t space = current_space(); - vm_map_t map = current_map(); ipc_object_t object; ipc_mqueue_t mqueue; - ipc_kmsg_t kmsg; - mach_port_seqno_t seqno; mach_msg_return_t mr; - mach_msg_body_t *slist; - mach_msg_format_0_trailer_t *trailer; mr = ipc_mqueue_copyin(space, rcv_name, &mqueue, &object); if (mr != MACH_MSG_SUCCESS) { @@ -331,16 +362,16 @@ mach_msg_receive( } /* hold ref for object */ - self->ith_msg = msg; + self->ith_msg_addr = CAST_DOWN(mach_vm_address_t, msg); self->ith_object = object; self->ith_msize = rcv_size; self->ith_option = option; self->ith_scatter_list_size = slist_size; self->ith_continuation = continuation; - ipc_mqueue_receive(mqueue, option, rcv_size, timeout, THREAD_ABORTSAFE); - if ((option & MACH_RCV_TIMEOUT) && timeout == 0) - _mk_sp_thread_perhaps_yield(self); + ipc_mqueue_receive(mqueue, option, rcv_size, rcv_timeout, THREAD_ABORTSAFE); + if ((option & MACH_RCV_TIMEOUT) && rcv_timeout == 0) + thread_poll_yield(self); return mach_msg_receive_results(); } @@ -540,36 +571,35 @@ boolean_t enable_hotpath = TRUE; /* Patchable, just in case ... */ mach_msg_return_t mach_msg_overwrite_trap( - mach_msg_header_t *msg, - mach_msg_option_t option, - mach_msg_size_t send_size, - mach_msg_size_t rcv_size, - mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, - mach_port_name_t notify, - mach_msg_header_t *rcv_msg, - mach_msg_size_t scatter_list_size) + struct mach_msg_overwrite_trap_args *args) { + mach_vm_address_t msg_addr = args->msg; + mach_msg_option_t option = args->option; + mach_msg_size_t send_size = args->send_size; + mach_msg_size_t rcv_size = args->rcv_size; + mach_port_name_t rcv_name = args->rcv_name; + mach_msg_timeout_t msg_timeout = args->timeout; + mach_port_name_t notify = args->notify; + mach_vm_address_t rcv_msg_addr = args->rcv_msg; + mach_msg_size_t scatter_list_size = 0; /* NOT INITIALIZED - but not used in pactice */ + register mach_msg_header_t *hdr; mach_msg_return_t mr = MACH_MSG_SUCCESS; /* mask out some of the options before entering the hot path */ mach_msg_option_t masked_option = option & ~(MACH_SEND_TRAILER|MACH_RCV_TRAILER_MASK|MACH_RCV_LARGE); - int i; #if ENABLE_HOTPATH /* BEGINNING OF HOT PATH */ if ((masked_option == (MACH_SEND_MSG|MACH_RCV_MSG)) && enable_hotpath) { - register thread_t self = current_thread(); - register mach_msg_format_0_trailer_t *trailer; - - ipc_space_t space = current_act()->task->itk_space; + thread_t self = current_thread(); + mach_msg_format_0_trailer_t *trailer; + ipc_space_t space = self->task->itk_space; ipc_kmsg_t kmsg; register ipc_port_t dest_port; ipc_object_t rcv_object; - register ipc_mqueue_t rcv_mqueue; + ipc_mqueue_t rcv_mqueue; mach_msg_size_t reply_size; - ipc_kmsg_t rcv_kmsg; c_mmot_combined_S_R++; @@ -609,16 +639,17 @@ mach_msg_overwrite_trap( * server finds waiting messages and can't block. */ - mr = ipc_kmsg_get(msg, send_size, &kmsg); + mr = ipc_kmsg_get(msg_addr, send_size, &kmsg); if (mr != KERN_SUCCESS) { return mr; } - hdr = &kmsg->ikm_header; + hdr = kmsg->ikm_header; trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + send_size); - fast_copyin: /* + * fast_copyin: + * * optimized ipc_kmsg_copyin/ipc_mqueue_copyin * * We have the request message data in kmsg. @@ -676,6 +707,7 @@ mach_msg_overwrite_trap( } } else { entry = IE_NULL; + bits = 0; } if (entry == IE_NULL) { entry = ipc_entry_lookup(space, reply_name); @@ -725,6 +757,7 @@ mach_msg_overwrite_trap( } } else { entry = IE_NULL; + bits = 0; } if (entry == IE_NULL) { entry = ipc_entry_lookup(space, dest_name); @@ -855,7 +888,6 @@ mach_msg_overwrite_trap( register ipc_entry_t entry; register mach_port_gen_t gen; register mach_port_index_t index; - ipc_table_index_t *requests; { register mach_port_name_t dest_name = @@ -945,6 +977,7 @@ mach_msg_overwrite_trap( } } else { entry = IE_NULL; + bits = 0; } if (entry == IE_NULL) { entry = ipc_entry_lookup(space, rcv_name); @@ -1056,6 +1089,7 @@ mach_msg_overwrite_trap( wait_queue_t waitq; thread_t receiver; processor_t processor; + boolean_t still_running; spl_t s; s = splsched(); @@ -1081,22 +1115,15 @@ mach_msg_overwrite_trap( goto slow_send; } + assert(receiver->state & TH_WAIT); assert(receiver->wait_queue == waitq); assert(receiver->wait_event == IPC_MQUEUE_RECEIVE); /* - * Make sure that the scheduling state of the receiver is such - * that we can handoff to it here. If not, fall off. - * - * JMM - We have an opportunity here. If the thread is locked - * and we find it runnable, it may still be trying to get into - * thread_block on itself. We could just "hand him the message" - * and let him go (thread_go_locked()) and then fall down into a - * slow receive for ourselves. Only his RECEIVE_TOO_LARGE handling - * runs afoul of that. Clean this up! + * Make sure that the scheduling restrictions of the receiver + * are consistent with a handoff here (if it comes down to that). */ - if ((receiver->state & (TH_RUN|TH_WAIT)) != TH_WAIT || - receiver->sched_pri >= BASEPRI_RTQUEUES || + if ( receiver->sched_pri >= BASEPRI_RTQUEUES || receiver->processor_set != processor->processor_set || (receiver->bound_processor != PROCESSOR_NULL && receiver->bound_processor != processor)) { @@ -1111,8 +1138,8 @@ mach_msg_overwrite_trap( /* * Check that the receiver can stay on the hot path. */ - if (send_size + REQUESTED_TRAILER_SIZE(receiver->ith_option) > - receiver->ith_msize) { + if (ipc_kmsg_copyout_size(kmsg, receiver->map) + + REQUESTED_TRAILER_SIZE(receiver->ith_option) > receiver->ith_msize) { /* * The receiver can't accept the message. */ @@ -1146,8 +1173,8 @@ mach_msg_overwrite_trap( c_mach_msg_trap_switch_fast++; /* - * JMM - Go ahead and pull the receiver from the runq. If the - * runq wasn't the one for the mqueue, unlock it. + * Go ahead and pull the receiver from the waitq. If the + * waitq wasn't the one for the mqueue, unlock it. */ wait_queue_pull_thread_locked(waitq, receiver, @@ -1161,19 +1188,12 @@ mach_msg_overwrite_trap( receiver->ith_seqno = dest_mqueue->imq_seqno++; /* - * Update the scheduling state for the handoff. + * Unblock the receiver. If it was still running on another + * CPU, we'll give it a chance to run with the message where + * it is (and just select someother thread to run here). + * Otherwise, we'll invoke it here as part of the handoff. */ - receiver->state &= ~(TH_WAIT|TH_UNINT); - receiver->state |= TH_RUN; - - pset_run_incr(receiver->processor_set); - if (receiver->sched_mode & TH_MODE_TIMESHARE) - pset_share_incr(receiver->processor_set); - - receiver->wait_result = THREAD_AWAKENED; - - receiver->computation_metered = 0; - receiver->reason = AST_NONE; + still_running = thread_unblock(receiver, THREAD_AWAKENED); thread_unlock(receiver); @@ -1189,7 +1209,7 @@ mach_msg_overwrite_trap( * can hand off directly back to us. */ thread_lock(self); - self->ith_msg = (rcv_msg) ? rcv_msg : msg; + self->ith_msg_addr = (rcv_msg_addr) ? rcv_msg_addr : msg_addr; self->ith_object = rcv_object; /* still holds reference */ self->ith_msize = rcv_size; self->ith_option = option; @@ -1199,16 +1219,23 @@ mach_msg_overwrite_trap( waitq = &rcv_mqueue->imq_wait_queue; (void)wait_queue_assert_wait64_locked(waitq, IPC_MQUEUE_RECEIVE, - THREAD_ABORTSAFE, + THREAD_ABORTSAFE, 0, self); thread_unlock(self); imq_unlock(rcv_mqueue); /* - * Switch directly to receiving thread, and block - * this thread as though it had called ipc_mqueue_receive. + * If the receiving thread wasn't still running, we switch directly + * to it here. Otherwise we let the scheduler pick something for + * here. In either case, block this thread as though it had called + * ipc_mqueue_receive. */ - thread_run(self, ipc_mqueue_receive_continue, receiver); + if (still_running) { + splx(s); + thread_block(ipc_mqueue_receive_continue); + } else { + thread_run(self, ipc_mqueue_receive_continue, NULL, receiver); + } /* NOTREACHED */ } @@ -1420,8 +1447,10 @@ mach_msg_overwrite_trap( mr = ipc_kmsg_copyout_body(kmsg, space, current_map(), MACH_MSG_BODY_NULL); + /* hdr and send_size may be invalid now - done use */ if (mr != MACH_MSG_SUCCESS) { - if (ipc_kmsg_put(msg, kmsg, hdr->msgh_size + + if (ipc_kmsg_put(msg_addr, kmsg, + kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA) return MACH_RCV_INVALID_DATA; @@ -1439,9 +1468,10 @@ mach_msg_overwrite_trap( /*NOTREACHED*/ fast_put: - mr = ipc_kmsg_put(rcv_msg ? rcv_msg : msg, + mr = ipc_kmsg_put(rcv_msg_addr ? rcv_msg_addr : msg_addr, kmsg, - hdr->msgh_size + trailer->msgh_trailer_size); + kmsg->ikm_header->msgh_size + + trailer->msgh_trailer_size); if (mr != MACH_MSG_SUCCESS) { return MACH_RCV_INVALID_DATA; } @@ -1458,10 +1488,7 @@ mach_msg_overwrite_trap( slow_copyin: { - ipc_kmsg_t temp_kmsg; - mach_port_seqno_t temp_seqno; - ipc_object_t temp_rcv_object; - ipc_mqueue_t temp_rcv_mqueue; + mach_port_seqno_t temp_seqno = 0; register mach_port_name_t reply_name = (mach_port_name_t)hdr->msgh_local_port; @@ -1479,8 +1506,15 @@ mach_msg_overwrite_trap( return(mr); } - /* try to get back on optimized path */ + /* + * LP64support - We have to recompute the header pointer + * and send_size - as they could have changed during the + * complex copyin. + */ + hdr = kmsg->ikm_header; + send_size = hdr->msgh_size; + /* try to get back on optimized path */ if ((reply_name != rcv_name) || (hdr->msgh_bits & MACH_MSGH_BITS_CIRCULAR)) { HOT(c_mmot_cold_048++); @@ -1577,7 +1611,7 @@ mach_msg_overwrite_trap( * we cannot directly receive the reply * message. */ - hdr = &kmsg->ikm_header; + hdr = kmsg->ikm_header; send_size = hdr->msgh_size; trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + round_msg(send_size)); @@ -1657,7 +1691,8 @@ mach_msg_overwrite_trap( current_map(), MACH_MSG_BODY_NULL); - (void) ipc_kmsg_put(msg, kmsg, hdr->msgh_size); + (void) ipc_kmsg_put(msg_addr, kmsg, + kmsg->ikm_header->msgh_size); return(mr); } @@ -1666,16 +1701,15 @@ mach_msg_overwrite_trap( * We have sent the message. Copy in the receive port. */ mr = ipc_mqueue_copyin(space, rcv_name, - &temp_rcv_mqueue, &temp_rcv_object); + &rcv_mqueue, &rcv_object); if (mr != MACH_MSG_SUCCESS) { return(mr); } - rcv_mqueue = temp_rcv_mqueue; - rcv_object = temp_rcv_object; /* hold ref for rcv_object */ - slow_receive: /* + * slow_receive: + * * Now we have sent the request and copied in rcv_name, * and hold ref for rcv_object (to keep mqueue alive). * Just receive a reply and try to get back to fast path. @@ -1689,7 +1723,6 @@ mach_msg_overwrite_trap( THREAD_ABORTSAFE); mr = self->ith_state; - temp_kmsg = self->ith_kmsg; temp_seqno = self->ith_seqno; ipc_object_release(rcv_object); @@ -1698,8 +1731,8 @@ mach_msg_overwrite_trap( return(mr); } - kmsg = temp_kmsg; - hdr = &kmsg->ikm_header; + kmsg = self->ith_kmsg; + hdr = kmsg->ikm_header; send_size = hdr->msgh_size; trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + round_msg(send_size)); @@ -1719,9 +1752,11 @@ mach_msg_overwrite_trap( * ipc_kmsg_copyout/ipc_kmsg_put. */ - reply_size = send_size + trailer->msgh_trailer_size; + /* LP64support - have to compute real size as it would be received */ + reply_size = ipc_kmsg_copyout_size(kmsg, current_map()) + + REQUESTED_TRAILER_SIZE(option); if (rcv_size < reply_size) { - if (msg_receive_error(kmsg, msg, option, temp_seqno, + if (msg_receive_error(kmsg, msg_addr, option, temp_seqno, space) == MACH_RCV_INVALID_DATA) { mr = MACH_RCV_INVALID_DATA; return(mr); @@ -1736,12 +1771,12 @@ mach_msg_overwrite_trap( MACH_PORT_NULL, MACH_MSG_BODY_NULL); if (mr != MACH_MSG_SUCCESS) { if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { - if (ipc_kmsg_put(msg, kmsg, reply_size) == + if (ipc_kmsg_put(msg_addr, kmsg, reply_size) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } else { - if (msg_receive_error(kmsg, msg, option, + if (msg_receive_error(kmsg, msg_addr, option, temp_seqno, space) == MACH_RCV_INVALID_DATA) mr = MACH_RCV_INVALID_DATA; } @@ -1759,15 +1794,48 @@ mach_msg_overwrite_trap( #endif /* ENABLE_HOTPATH */ if (option & MACH_SEND_MSG) { - mr = mach_msg_send(msg, option, send_size, - timeout, notify); + ipc_space_t space = current_space(); + vm_map_t map = current_map(); + ipc_kmsg_t kmsg; + + mr = ipc_kmsg_get(msg_addr, send_size, &kmsg); + + if (mr != MACH_MSG_SUCCESS) + return mr; + + if (option & MACH_SEND_CANCEL) { + if (notify == MACH_PORT_NULL) + mr = MACH_SEND_INVALID_NOTIFY; + else + mr = ipc_kmsg_copyin(kmsg, space, map, notify); + } else + mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL); if (mr != MACH_MSG_SUCCESS) { + ipc_kmsg_free(kmsg); return mr; } + + mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, msg_timeout); + + if (mr != MACH_MSG_SUCCESS) { + mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL); + (void) ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size); + return mr; + } + } if (option & MACH_RCV_MSG) { - mach_msg_header_t *rcv; + thread_t self = current_thread(); + ipc_space_t space = current_space(); + ipc_object_t object; + ipc_mqueue_t mqueue; + + mr = ipc_mqueue_copyin(space, rcv_name, &mqueue, &object); + if (mr != MACH_MSG_SUCCESS) { + return mr; + } + /* hold ref for object */ /* * 1. MACH_RCV_OVERWRITE is on, and rcv_msg is our scatter list @@ -1776,14 +1844,21 @@ mach_msg_overwrite_trap( * alternate receive buffer (separate send and receive buffers). */ if (option & MACH_RCV_OVERWRITE) - rcv = rcv_msg; - else if (rcv_msg != MACH_MSG_NULL) - rcv = rcv_msg; + self->ith_msg_addr = rcv_msg_addr; + else if (rcv_msg_addr != (mach_vm_address_t)0) + self->ith_msg_addr = rcv_msg_addr; else - rcv = msg; - mr = mach_msg_receive(rcv, option, rcv_size, rcv_name, - timeout, thread_syscall_return, scatter_list_size); - thread_syscall_return(mr); + self->ith_msg_addr = msg_addr; + self->ith_object = object; + self->ith_msize = rcv_size; + self->ith_option = option; + self->ith_scatter_list_size = scatter_list_size; + self->ith_continuation = thread_syscall_return; + + ipc_mqueue_receive(mqueue, option, rcv_size, msg_timeout, THREAD_ABORTSAFE); + if ((option & MACH_RCV_TIMEOUT) && msg_timeout == 0) + thread_poll_yield(self); + return mach_msg_receive_results(); } return MACH_MSG_SUCCESS; @@ -1801,23 +1876,13 @@ mach_msg_overwrite_trap( mach_msg_return_t mach_msg_trap( - mach_msg_header_t *msg, - mach_msg_option_t option, - mach_msg_size_t send_size, - mach_msg_size_t rcv_size, - mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, - mach_port_name_t notify) + struct mach_msg_overwrite_trap_args *args) { - return mach_msg_overwrite_trap(msg, - option, - send_size, - rcv_size, - rcv_name, - timeout, - notify, - (mach_msg_header_t *)0, - (mach_msg_size_t)0); + kern_return_t kr; + args->rcv_msg = (mach_vm_address_t)0; + + kr = mach_msg_overwrite_trap(args); + return kr; } @@ -1837,7 +1902,7 @@ mach_msg_trap( mach_msg_return_t msg_receive_error( ipc_kmsg_t kmsg, - mach_msg_header_t *msg, + mach_vm_address_t msg_addr, mach_msg_option_t option, mach_port_seqno_t seqno, ipc_space_t space) @@ -1854,9 +1919,9 @@ msg_receive_error( * Build a minimal message with the requested trailer. */ trailer = (mach_msg_format_0_trailer_t *) - ((vm_offset_t)&kmsg->ikm_header + + ((vm_offset_t)kmsg->ikm_header + round_msg(sizeof(mach_msg_header_t))); - kmsg->ikm_header.msgh_size = sizeof(mach_msg_header_t); + kmsg->ikm_header->msgh_size = sizeof(mach_msg_header_t); bcopy( (char *)&trailer_template, (char *)trailer, sizeof(trailer_template)); @@ -1868,7 +1933,7 @@ msg_receive_error( /* * Copy the message to user space */ - if (ipc_kmsg_put(msg, kmsg, kmsg->ikm_header.msgh_size + + if (ipc_kmsg_put(msg_addr, kmsg, kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size) == MACH_RCV_INVALID_DATA) return(MACH_RCV_INVALID_DATA); else diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index f2925d2c6..f9dc656fc 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,7 +69,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -92,8 +93,7 @@ void mach_port_names_helper( mach_port_name_t name, mach_port_name_t *names, mach_port_type_t *types, - ipc_entry_num_t *actualp, - ipc_space_t space); + ipc_entry_num_t *actualp); void mach_port_gst_helper( ipc_pset_t pset, @@ -120,8 +120,7 @@ mach_port_names_helper( mach_port_name_t name, mach_port_name_t *names, mach_port_type_t *types, - ipc_entry_num_t *actualp, - ipc_space_t space) + ipc_entry_num_t *actualp) { ipc_entry_bits_t bits; ipc_port_request_index_t request; @@ -195,7 +194,6 @@ mach_port_names( mach_port_type_t **typesp, mach_msg_type_number_t *typesCnt) { - ipc_entry_bits_t *capability; ipc_tree_entry_t tentry; ipc_entry_t table; ipc_entry_num_t tsize; @@ -237,7 +235,7 @@ mach_port_names( /* upper bound on number of names in the space */ bound = space->is_table_size + space->is_tree_total; - size_needed = round_page_32(bound * sizeof(mach_port_name_t)); + size_needed = round_page(bound * sizeof(mach_port_name_t)); if (size_needed <= size) break; @@ -250,11 +248,11 @@ mach_port_names( } size = size_needed; - kr = vm_allocate(ipc_kernel_map, &addr1, size, TRUE); + kr = vm_allocate(ipc_kernel_map, &addr1, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; - kr = vm_allocate(ipc_kernel_map, &addr2, size, TRUE); + kr = vm_allocate(ipc_kernel_map, &addr2, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr1, size); return KERN_RESOURCE_SHORTAGE; @@ -262,16 +260,18 @@ mach_port_names( /* can't fault while we hold locks */ - kr = vm_map_wire(ipc_kernel_map, addr1, addr1 + size, - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr1), + vm_map_round_page(addr1 + size), + VM_PROT_READ|VM_PROT_WRITE, FALSE); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr1, size); kmem_free(ipc_kernel_map, addr2, size); return KERN_RESOURCE_SHORTAGE; } - kr = vm_map_wire(ipc_kernel_map, addr2, addr2 + size, - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr2), + vm_map_round_page(addr2 + size), + VM_PROT_READ|VM_PROT_WRITE, FALSE); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr1, size); kmem_free(ipc_kernel_map, addr2, size); @@ -299,7 +299,7 @@ mach_port_names( name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits)); mach_port_names_helper(timestamp, entry, name, names, - types, &actual, space); + types, &actual); } } @@ -311,7 +311,7 @@ mach_port_names( assert(IE_BITS_TYPE(tentry->ite_bits) != MACH_PORT_TYPE_NONE); mach_port_names_helper(timestamp, entry, name, names, - types, &actual, space); + types, &actual); } ipc_splay_traverse_finish(&space->is_tree); is_read_unlock(space); @@ -329,27 +329,27 @@ mach_port_names( vm_size_t vm_size_used; size_used = actual * sizeof(mach_port_name_t); - vm_size_used = round_page_32(size_used); + vm_size_used = round_page(size_used); /* * Make used memory pageable and get it into * copied-in form. Free any unused memory. */ - kr = vm_map_unwire(ipc_kernel_map, - addr1, addr1 + vm_size_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr1), + vm_map_round_page(addr1 + vm_size_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_unwire(ipc_kernel_map, - addr2, addr2 + vm_size_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr2), + vm_map_round_page(addr2 + vm_size_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, addr1, size_used, - TRUE, &memory1); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr1, + (vm_map_size_t)size_used, TRUE, &memory1); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, addr2, size_used, - TRUE, &memory2); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr2, + (vm_map_size_t)size_used, TRUE, &memory2); assert(kr == KERN_SUCCESS); if (vm_size_used != size) { @@ -606,7 +606,7 @@ mach_port_allocate_full( mach_port_qos_t *qosp, mach_port_name_t *namep) { - ipc_kmsg_t kmsg; + ipc_kmsg_t kmsg = IKM_NULL; kern_return_t kr; if (space == IS_NULL) @@ -626,10 +626,9 @@ mach_port_allocate_full( mach_msg_size_t size = qosp->len + MAX_TRAILER_SIZE; if (right != MACH_PORT_RIGHT_RECEIVE) return (KERN_INVALID_VALUE); - kmsg = (ipc_kmsg_t)kalloc(ikm_plus_overhead(size)); + kmsg = (ipc_kmsg_t)ipc_kmsg_alloc(size); if (kmsg == IKM_NULL) return (KERN_RESOURCE_SHORTAGE); - ikm_init(kmsg, size); } switch (right) { @@ -642,12 +641,12 @@ mach_port_allocate_full( else kr = ipc_port_alloc(space, namep, &port); if (kr == KERN_SUCCESS) { - if (qosp->prealloc) + if (kmsg != IKM_NULL) ipc_kmsg_set_prealloc(kmsg, port); ip_unlock(port); - } else if (qosp->prealloc) + } else if (kmsg != IKM_NULL) ipc_kmsg_free(kmsg); break; } @@ -986,7 +985,6 @@ mach_port_gst_helper( mach_port_name_t *names, ipc_entry_num_t *actualp) { - ipc_pset_t ip_pset; mach_port_name_t name; assert(port != IP_NULL); @@ -1056,7 +1054,7 @@ mach_port_get_set_status( mach_port_name_t *names; ipc_pset_t pset; - kr = vm_allocate(ipc_kernel_map, &addr, size, TRUE); + kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; @@ -1125,7 +1123,7 @@ mach_port_get_set_status( /* didn't have enough memory; allocate more */ kmem_free(ipc_kernel_map, addr, size); - size = round_page_32(actual * sizeof(mach_port_name_t)) + PAGE_SIZE; + size = round_page(actual * sizeof(mach_port_name_t)) + PAGE_SIZE; } if (actual == 0) { @@ -1137,19 +1135,19 @@ mach_port_get_set_status( vm_size_t vm_size_used; size_used = actual * sizeof(mach_port_name_t); - vm_size_used = round_page_32(size_used); + vm_size_used = round_page(size_used); /* * Make used memory pageable and get it into * copied-in form. Free any unused memory. */ - kr = vm_map_unwire(ipc_kernel_map, - addr, addr + vm_size_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + vm_size_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, addr, size_used, - TRUE, &memory); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size_used, TRUE, &memory); assert(kr == KERN_SUCCESS); if (vm_size_used != size) @@ -1300,9 +1298,6 @@ mach_port_request_notification( ipc_port_t *previousp) { kern_return_t kr; - ipc_entry_t entry; - ipc_port_t port; - if (space == IS_NULL) return KERN_INVALID_TASK; @@ -1314,18 +1309,23 @@ mach_port_request_notification( /* * Requesting notifications on RPC ports is an error. */ - kr = ipc_right_lookup_write(space, name, &entry); - if (kr != KERN_SUCCESS) - return kr; + { + ipc_port_t port; + ipc_entry_t entry; - port = (ipc_port_t) entry->ie_object; + kr = ipc_right_lookup_write(space, name, &entry); + if (kr != KERN_SUCCESS) + return kr; - if (port->ip_subsystem != NULL) { + port = (ipc_port_t) entry->ie_object; + + if (port->ip_subsystem != NULL) { + is_write_unlock(space); + panic("mach_port_request_notification: on RPC port!!"); + return KERN_INVALID_CAPABILITY; + } is_write_unlock(space); - panic("mach_port_request_notification: on RPC port!!"); - return KERN_INVALID_CAPABILITY; } - is_write_unlock(space); #endif /* NOTYET */ @@ -1726,7 +1726,6 @@ mach_port_extract_member( mach_port_name_t name, mach_port_name_t psname) { - mach_port_name_t oldname; ipc_object_t psobj; ipc_object_t obj; kern_return_t kr; @@ -1753,3 +1752,22 @@ mach_port_extract_member( return kr; } +/* + * task_set_port_space: + * + * Set port name space of task to specified size. + */ +kern_return_t +task_set_port_space( + ipc_space_t space, + int table_entries) +{ + kern_return_t kr; + + is_write_lock(space); + kr = ipc_entry_grow_table(space, table_entries); + if (kr == KERN_SUCCESS) + is_write_unlock(space); + return kr; +} + diff --git a/osfmk/ipc/port.h b/osfmk/ipc/port.h index 52ffaaf68..52e25a645 100644 --- a/osfmk/ipc/port.h +++ b/osfmk/ipc/port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,9 +69,6 @@ * the same (unsigned) type as mach_port_name_t. */ -typedef mach_port_name_t mach_port_index_t; /* index values */ -typedef mach_port_name_t mach_port_gen_t; /* generation numbers */ - #define MACH_PORT_UREFS_MAX ((mach_port_urefs_t) ((1 << 16) - 1)) @@ -81,6 +78,6 @@ typedef mach_port_name_t mach_port_gen_t; /* generation numbers */ (((urefs) + (delta)) > MACH_PORT_UREFS_MAX))) #define MACH_PORT_UREFS_UNDERFLOW(urefs, delta) \ - (((delta) < 0) && (-(delta) > (urefs))) + (((delta) < 0) && (((mach_port_urefs_t)-(delta)) > (urefs))) #endif /* _IPC_PORT_H_ */ diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index d13b7a849..695b356c6 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -28,6 +28,8 @@ #include +#include /* bcopy */ + int kdp_vm_read( caddr_t, caddr_t, unsigned int); int kdp_vm_write( caddr_t, caddr_t, unsigned int); @@ -41,7 +43,7 @@ int kdp_vm_write( caddr_t, caddr_t, unsigned int); #endif static kdp_dispatch_t - dispatch_table[KDP_REATTACH - KDP_CONNECT +1] = + dispatch_table[KDP_HOSTREBOOT - KDP_CONNECT +1] = { /* 0 */ kdp_connect, /* 1 */ kdp_disconnect, @@ -61,7 +63,8 @@ static kdp_dispatch_t /* F */ kdp_breakpoint_set, /*10 */ kdp_breakpoint_remove, /*11 */ kdp_regions, -/*12 */ kdp_reattach +/*12 */ kdp_reattach, +/*13 */ kdp_reboot }; kdp_glob_t kdp; @@ -118,7 +121,7 @@ kdp_packet( } req = rd->hdr.request; - if ((req < KDP_CONNECT) || (req > KDP_REATTACH)) { + if ((req < KDP_CONNECT) || (req > KDP_HOSTREBOOT)) { printf("kdp_packet bad request %x len %d seq %x key %x\n", rd->hdr.request, rd->hdr.len, rd->hdr.seq, rd->hdr.key); diff --git a/osfmk/kdp/kdp_core.h b/osfmk/kdp/kdp_core.h index f56992958..a716146be 100644 --- a/osfmk/kdp/kdp_core.h +++ b/osfmk/kdp/kdp_core.h @@ -1,5 +1,31 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* HISTORY + * 8 Aug. 2003 - Created (Derek Kumar) + */ + /* Various protocol definitions - * for the core transfer protocol, which is a variant of TFTP + * for the core transfer protocol, which is a variant of TFTP */ /* diff --git a/osfmk/kdp/kdp_internal.h b/osfmk/kdp/kdp_internal.h index da8059e74..37eccf3ab 100644 --- a/osfmk/kdp/kdp_internal.h +++ b/osfmk/kdp/kdp_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,6 +56,7 @@ typedef boolean_t unsigned short * ); +extern boolean_t kdp_packet( unsigned char *, @@ -63,9 +64,11 @@ kdp_packet( unsigned short * ); +extern boolean_t -kdp_remove_all_breakpoints (); +kdp_remove_all_breakpoints (void); +extern void kdp_exception( unsigned char *, @@ -76,42 +79,44 @@ kdp_exception( unsigned int ); +extern boolean_t kdp_exception_ack( unsigned char *, int ); +extern void kdp_panic( const char *msg ); -void -kdp_reset( - void -); - +extern void kdp_reboot( void ); +extern void kdp_us_spin( int usec ); +extern int kdp_intr_disbl( void ); +extern void kdp_intr_enbl( int s ); +extern kdp_error_t kdp_machine_read_regs( unsigned int cpu, @@ -120,6 +125,7 @@ kdp_machine_read_regs( int *size ); +extern kdp_error_t kdp_machine_write_regs( unsigned int cpu, @@ -128,13 +134,19 @@ kdp_machine_write_regs( int *size ); +extern void kdp_machine_hostinfo( kdp_hostinfo_t *hostinfo ); +extern void kdp_sync_cache( void ); +unsigned int +kdp_ml_get_breakinsn( + void +); diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index b88e7e58e..903054c96 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -42,6 +42,8 @@ #include #include +#include + #include #include @@ -125,15 +127,18 @@ static char router_ip_str[20]; static unsigned int panic_block = 0; static volatile unsigned int kdp_trigger_core_dump = 0; +static volatile unsigned int flag_kdp_trigger_reboot = 0; extern unsigned int not_in_kdp; +extern int kdp_vm_read( caddr_t, caddr_t, unsigned int); + void kdp_register_send_receive( kdp_send_t send, kdp_receive_t receive) { - unsigned int debug; + unsigned int debug=0; kdp_en_send_pkt = send; kdp_en_recv_pkt = receive; @@ -366,8 +371,6 @@ kdp_set_ip_and_mac_addresses( struct in_addr *ipaddr, struct ether_addr *macaddr) { - unsigned int debug = 0; - kdp_current_ip_address = ipaddr->s_addr; kdp_current_mac_address = *macaddr; } @@ -376,6 +379,7 @@ void kdp_set_gateway_mac(void *gatewaymac) { router_mac = *(struct ether_addr *)gatewaymac; + flag_router_mac_initialized = 1; } struct ether_addr @@ -750,8 +754,7 @@ kdp_raise_exception( { int index; - extern unsigned int disableDebugOuput; - extern unsigned int disableConsoleOutput; + extern unsigned int disableConsoleOutput; disable_preemption(); @@ -820,17 +823,30 @@ kdp_raise_exception( * Continuing after setting kdp_trigger_core_dump should do the * trick. */ + if (1 == kdp_trigger_core_dump) { kdp_flag &= ~PANIC_LOG_DUMP; kdp_flag |= KDP_PANIC_DUMP_ENABLED; kdp_panic_dump(); } +/* Trigger a reboot if the user has set this flag through the + * debugger.Ideally, this would be done through the HOSTREBOOT packet + * in the protocol,but that will need gdb support,and when it's + * available, it should work automatically. + */ + if (1 == flag_kdp_trigger_reboot) { + kdp_reboot(); + /* If we're still around, reset the flag */ + flag_kdp_trigger_reboot = 0; + } + kdp_sync_cache(); if (reattach_wait == 1) goto again; - exit_raise_exception: + +exit_raise_exception: enable_preemption(); } @@ -952,6 +968,7 @@ int kdp_send_panic_packets (unsigned int request, char *corename, kdp_send_panic_pkt(request, corename, (txend - txstart), (caddr_t) txstart); } } + return 0; } int @@ -1118,6 +1135,7 @@ kdp_get_xnu_version(char *versionbuf) strcpy(versionbuf, vstr); return retval; } + /* Primary dispatch routine for the system dump */ void kdp_panic_dump() @@ -1125,13 +1143,11 @@ kdp_panic_dump() char corename[50]; char coreprefix[10]; int panic_error; - extern char *debug_buf; + extern vm_map_t kernel_map; extern char *inet_aton(const char *cp, struct in_addr *pin); - extern char *debug_buf; - extern char *debug_buf_ptr; uint64_t abstime; printf ("Entering system dump routine\n"); @@ -1186,7 +1202,7 @@ kdp_panic_dump() */ } } - /* These & 0xffs aren't necessary,but cut&paste is ever so convenient */ + printf("Routing via router MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", router_mac.ether_addr_octet[0] & 0xff, router_mac.ether_addr_octet[1] & 0xff, @@ -1195,10 +1211,10 @@ kdp_panic_dump() router_mac.ether_addr_octet[4] & 0xff, router_mac.ether_addr_octet[5] & 0xff); - printf("Kernel map size is %d\n", get_vmmap_size(kernel_map)); + printf("Kernel map size is %llu\n", (unsigned long long) get_vmmap_size(kernel_map)); printf ("Sending write request for %s\n", corename); - if ((panic_error = kdp_send_panic_pkt (KDP_WRQ, corename, 0 , NULL) < 0)) { + if ((panic_error = kdp_send_panic_pkt (KDP_WRQ, corename, 0 , NULL)) < 0) { printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); goto panic_dump_exit; } diff --git a/osfmk/kdp/ml/i386/kdp_machdep.c b/osfmk/kdp/ml/i386/kdp_machdep.c index 3bacce3a7..d4863a6ce 100644 --- a/osfmk/kdp/ml/i386/kdp_machdep.c +++ b/osfmk/kdp/ml/i386/kdp_machdep.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -34,10 +35,21 @@ #define dprintf(x) #endif -void print_saved_state(void *); -void kdp_call(void); -void kdp_i386_trap(unsigned int, struct i386_saved_state *, kern_return_t, vm_offset_t); -int kdp_getc(void); +extern void kdreboot(void); + +void print_saved_state(void *); +void kdp_call(void); +int kdp_getc(void); +boolean_t kdp_call_kdb(void); +void kdp_getstate(i386_thread_state_t *); +void kdp_setstate(i386_thread_state_t *); +void kdp_print_phys(int); +void kdp_i386_backtrace(void *, int); +void kdp_i386_trap( + unsigned int, + struct i386_saved_state *, + kern_return_t, + vm_offset_t); void kdp_exception( @@ -81,7 +93,7 @@ kdp_exception_ack( { kdp_exception_ack_t *rq = (kdp_exception_ack_t *)pkt; - if (len < sizeof (*rq)) + if (((unsigned int) len) < sizeof (*rq)) return(FALSE); if (!rq->hdr.is_reply || rq->hdr.request != KDP_EXCEPTION) @@ -101,11 +113,12 @@ kdp_getstate( i386_thread_state_t *state ) { + static i386_thread_state_t null_state; struct i386_saved_state *saved_state; saved_state = (struct i386_saved_state *)kdp.saved_state; - *state = (i386_thread_state_t) { 0 }; + *state = null_state; state->eax = saved_state->eax; state->ebx = saved_state->ebx; state->ecx = saved_state->ecx; @@ -161,12 +174,14 @@ kdp_setstate( kdp_error_t kdp_machine_read_regs( - unsigned int cpu, - unsigned int flavor, + __unused unsigned int cpu, + __unused unsigned int flavor, char *data, - int *size + __unused int *size ) { + static i386_thread_fpstate_t null_fpstate; + switch (flavor) { case i386_THREAD_STATE: @@ -177,22 +192,23 @@ kdp_machine_read_regs( case i386_THREAD_FPSTATE: dprintf(("kdp_readregs THREAD_FPSTATE\n")); - *(i386_thread_fpstate_t *)data = (i386_thread_fpstate_t) { 0 }; + *(i386_thread_fpstate_t *)data = null_fpstate; *size = sizeof (i386_thread_fpstate_t); return KDPERR_NO_ERROR; default: - dprintf(("kdp_readregs bad flavor %d\n")); + dprintf(("kdp_readregs bad flavor %d\n", flavor)); + *size = 0; return KDPERR_BADFLAVOR; } } kdp_error_t kdp_machine_write_regs( - unsigned int cpu, + __unused unsigned int cpu, unsigned int flavor, char *data, - int *size + __unused int *size ) { switch (flavor) { @@ -219,14 +235,12 @@ kdp_machine_hostinfo( kdp_hostinfo_t *hostinfo ) { - machine_slot_t m; int i; hostinfo->cpus_mask = 0; for (i = 0; i < machine_info.max_cpus; i++) { - m = &machine_slot[i]; - if (!m->is_cpu) + if (cpu_data_ptr[i] == NULL) continue; hostinfo->cpus_mask |= (1 << i); @@ -242,7 +256,7 @@ kdp_panic( const char *msg ) { - printf("kdp panic: %s\n", msg); + kprintf("kdp panic: %s\n", msg); __asm__ volatile("hlt"); } @@ -274,8 +288,6 @@ kdp_getc() void kdp_us_spin(int usec) { - extern void delay(int); - delay(usec/100); } @@ -285,10 +297,10 @@ void print_saved_state(void *state) saved_state = state; - printf("pc = 0x%x\n", saved_state->eip); - printf("cr3= 0x%x\n", saved_state->cr2); - printf("rp = TODO FIXME\n"); - printf("sp = 0x%x\n", saved_state->esp); + kprintf("pc = 0x%x\n", saved_state->eip); + kprintf("cr3= 0x%x\n", saved_state->cr2); + kprintf("rp = TODO FIXME\n"); + kprintf("sp = 0x%x\n", saved_state->esp); } @@ -311,6 +323,29 @@ typedef struct _cframe_t { unsigned args[0]; } cframe_t; +#include +extern pt_entry_t *DMAP2; +extern caddr_t DADDR2; + +void +kdp_print_phys(int src) +{ + unsigned int *iptr; + int i; + + *(int *) DMAP2 = 0x63 | (src & 0xfffff000); + invlpg((u_int) DADDR2); + iptr = (unsigned int *) DADDR2; + for (i = 0; i < 100; i++) { + kprintf("0x%x ", *iptr++); + if ((i % 8) == 0) + kprintf("\n"); + } + kprintf("\n"); + *(int *) DMAP2 = 0; + +} + #define MAX_FRAME_DELTA 65536 @@ -325,9 +360,9 @@ kdp_i386_backtrace(void *_frame, int nframes) (vm_offset_t)frame > VM_MAX_KERNEL_ADDRESS) { goto invalid; } - printf("frame %x called by %x ", + kprintf("frame 0x%x called by 0x%x ", frame, frame->caller); - printf("args %x %x %x %x\n", + kprintf("args 0x%x 0x%x 0x%x 0x%x\n", frame->args[0], frame->args[1], frame->args[2], frame->args[3]); if ((frame->prev < frame) || /* wrong direction */ @@ -338,7 +373,7 @@ kdp_i386_backtrace(void *_frame, int nframes) } return; invalid: - printf("invalid frame pointer %x\n",frame); + kprintf("invalid frame pointer 0x%x\n",frame); } void @@ -354,7 +389,8 @@ kdp_i386_trap( mp_kdp_enter(); if (trapno != T_INT3 && trapno != T_DEBUG) - printf("unexpected kernel trap %x eip %x\n", trapno, saved_state->eip); + kprintf("unexpected kernel trap 0x%x eip 0x%x cr2 0x%x \n", + trapno, saved_state->eip, saved_state->esp); switch (trapno) { @@ -419,7 +455,7 @@ kdp_i386_trap( break; } -// kdp_i386_backtrace((void *) saved_state->ebp, 10); + kdp_i386_backtrace((void *) saved_state->ebp, 10); kdp_raise_exception(exception, code, subcode, saved_state); @@ -433,7 +469,8 @@ kdp_call_kdb( return(FALSE); } -unsigned int kdp_ml_get_breakinsn() +unsigned int +kdp_ml_get_breakinsn(void) { return 0xcc; } diff --git a/osfmk/kdp/ml/i386/kdp_vm.c b/osfmk/kdp/ml/i386/kdp_vm.c index 053165392..6ad1202f4 100644 --- a/osfmk/kdp/ml/i386/kdp_vm.c +++ b/osfmk/kdp/ml/i386/kdp_vm.c @@ -27,6 +27,7 @@ unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); unsigned kdp_copy_kmem( caddr_t, caddr_t, unsigned); +int kern_dump(void); unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in kdp */ @@ -51,8 +52,9 @@ unsigned kdp_vm_write( { return kdp_copy_kmem(src, dst, len); } + /* A stub until i386 support is added for remote kernel core dumps */ -int kern_dump() +int kern_dump(void) { return 0; } diff --git a/osfmk/kdp/ml/ppc/kdp_machdep.c b/osfmk/kdp/ml/ppc/kdp_machdep.c index 249c55f40..8d90afd7d 100644 --- a/osfmk/kdp/ml/ppc/kdp_machdep.c +++ b/osfmk/kdp/ml/ppc/kdp_machdep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,6 +27,7 @@ #include #include #include +#include #define KDP_TEST_HARNESS 0 #if KDP_TEST_HARNESS @@ -382,21 +383,20 @@ kdp_machine_hostinfo( kdp_hostinfo_t *hostinfo ) { - machine_slot_t m; int i; hostinfo->cpus_mask = 0; hostinfo->cpu_type = 0; for (i = 0; i < machine_info.max_cpus; i++) { - m = &machine_slot[i]; - if (!m->is_cpu) + if ((PerProcTable[i].ppe_vaddr == (struct per_proc_info *)NULL) || + !(PerProcTable[i].ppe_vaddr->running)) continue; hostinfo->cpus_mask |= (1 << i); if (hostinfo->cpu_type == 0) { - hostinfo->cpu_type = m->cpu_type; - hostinfo->cpu_subtype = m->cpu_subtype; + hostinfo->cpu_type = slot_type(i); + hostinfo->cpu_subtype = slot_subtype(i); } } } @@ -414,7 +414,12 @@ kdp_panic( void kdp_reboot(void) { - halt_all_cpus(TRUE);; + printf("Attempting system restart..."); + /* Call the platform specific restart*/ + if (PE_halt_restart) + (*PE_halt_restart)(kPERestartCPU); + /* If we do reach this, give up */ + halt_all_cpus(TRUE); } int @@ -608,7 +613,6 @@ kdp_print_backtrace( { extern void kdp_print_registers(struct savearea *); extern void print_backtrace(struct savearea *); - extern unsigned int debug_mode, disableDebugOuput; disableDebugOuput = FALSE; debug_mode = TRUE; @@ -620,7 +624,7 @@ kdp_print_backtrace( while(1); } -unsigned int kdp_ml_get_breakinsn() +unsigned int kdp_ml_get_breakinsn(void) { return 0x7fe00008; } diff --git a/osfmk/kdp/ml/ppc/kdp_vm.c b/osfmk/kdp/ml/ppc/kdp_vm.c index 47c42b784..c54cc73a2 100644 --- a/osfmk/kdp/ml/ppc/kdp_vm.c +++ b/osfmk/kdp/ml/ppc/kdp_vm.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -37,10 +38,15 @@ #include #include +#include +#include #include #include #include +#include +#include + pmap_t kdp_pmap=0; boolean_t kdp_trans_off=0; @@ -49,6 +55,22 @@ boolean_t kdp_read_io =0; unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); +extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; +extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; + +/* XXX prototypes which should be in a commmon header file */ +addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); +int kern_dump(void); +int kdp_dump_trap(int type, struct savearea *regs); +/* + * XXX the following prototype doesn't match the declaration because the + * XXX actual declaration is wrong. + */ +extern int kdp_send_panic_packets(unsigned int request, char *corename, + unsigned int length, caddr_t txstart); + + + typedef struct { int flavor; /* the number for this flavor */ @@ -88,12 +110,13 @@ unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in char command_buffer[512]; -static struct vm_object test_object; +// XXX static struct vm_object test_object; /* * */ -addr64_t kdp_vtophys( +addr64_t +kdp_vtophys( pmap_t pmap, addr64_t va) { @@ -106,19 +129,19 @@ addr64_t kdp_vtophys( pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL); /* Shove in the page offset */ return(pa); } - -/* - * Note that kdp_vm_read() does not translate the destination address.Therefore - * there's an implicit assumption that the destination will be a statically - * allocated structure, since those map to the same phys. and virt. addresses +/* Verify that src is valid, and physically copy len bytes from src to + * dst, translating if necessary. If translation is enabled + * (kdp_trans_off is 0), a non-zero kdp_pmap specifies the pmap to use + * when translating src. */ + unsigned kdp_vm_read( caddr_t src, caddr_t dst, unsigned len) { addr64_t cur_virt_src, cur_virt_dst; - addr64_t cur_phys_src; + addr64_t cur_phys_src, cur_phys_dst; unsigned resid, cnt; unsigned int dummy; pmap_t pmap; @@ -137,14 +160,19 @@ unsigned kdp_vm_read( while (resid != 0) { + if((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) + goto exit; + if(kdp_read_io == 0) if(!mapping_phys_lookup((ppnum_t)(cur_virt_src >> 12), &dummy)) return 0; /* Can't read where there's not any memory */ cnt = 4096 - (cur_virt_src & 0xFFF); /* Get length left on page */ + if (cnt > (4096 - (cur_virt_dst & 0xFFF))) + cnt = 4096 - (cur_virt_dst & 0xFFF); if (cnt > resid) cnt = resid; - bcopy_phys(cur_virt_src, cur_virt_dst, cnt); /* Copy stuff over */ + bcopy_phys(cur_virt_src, cur_phys_dst, cnt); /* Copy stuff over */ cur_virt_src += cnt; cur_virt_dst += cnt; @@ -159,12 +187,20 @@ unsigned kdp_vm_read( else pmap = kernel_pmap; /* otherwise, use kernel's */ while (resid != 0) { +/* Always translate the destination using the kernel_pmap. */ + if((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) + goto exit; + + if((cur_phys_src = kdp_vtophys(pmap, cur_virt_src)) == 0) + goto exit; - if((cur_phys_src = kdp_vtophys(pmap, cur_virt_src)) == 0) goto exit; if(kdp_read_io == 0) if(!mapping_phys_lookup((ppnum_t)(cur_phys_src >> 12), &dummy)) goto exit; /* Can't read where there's not any memory */ cnt = 4096 - (cur_virt_src & 0xFFF); /* Get length left on page */ + if (cnt > (4096 - (cur_virt_dst & 0xFFF))) + cnt = 4096 - (cur_virt_dst & 0xFFF); + if (cnt > resid) cnt = resid; #ifdef KDP_VM_READ_DEBUG @@ -172,7 +208,7 @@ unsigned kdp_vm_read( pmap, cur_virt_src, cur_phys_src); #endif - bcopy_phys(cur_phys_src, cur_virt_dst, cnt); /* Copy stuff over */ + bcopy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ cur_virt_src +=cnt; cur_virt_dst +=cnt; @@ -210,6 +246,7 @@ unsigned kdp_vm_write( while (resid != 0) { if ((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) goto exit; + if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) goto exit; @@ -236,7 +273,7 @@ exit: static void -kern_collectth_state(thread_act_t th_act, tir_t *t) +kern_collectth_state(thread_t thread, tir_t *t) { vm_offset_t header; int hoffset, i ; @@ -264,7 +301,7 @@ kern_collectth_state(thread_act_t th_act, tir_t *t) flavors[i]; hoffset += sizeof(mythread_state_flavor_t); - if (machine_thread_get_kern_state(th_act, flavors[i].flavor, + if (machine_thread_get_kern_state(thread, flavors[i].flavor, (thread_state_t) (header+hoffset), &flavors[i].count) != KERN_SUCCESS) printf ("Failure in machine_thread_get_kern_state()\n"); @@ -277,10 +314,8 @@ kern_collectth_state(thread_act_t th_act, tir_t *t) int kdp_dump_trap( int type, - struct savearea *regs) + __unused struct savearea *regs) { - extern int kdp_flag; - printf ("An unexpected trap (type %d) occurred during the kernel dump, terminating.\n", type); kdp_send_panic_pkt (KDP_EOF, NULL, 0, ((void *) 0)); abort_panic_transfer(); @@ -291,11 +326,14 @@ kdp_dump_trap( kdp_reset(); kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state); - return; + return( 0 ); } +/* + * Kernel dump (limited to currently executing 32 bit mach_kernel only) + */ int -kern_dump() +kern_dump(void) { int error = 0; vm_map_t map; @@ -304,21 +342,18 @@ kern_dump() unsigned int hoffset = 0, foffset = 0, nfoffset = 0, vmoffset = 0; unsigned int max_header_size = 0; vm_offset_t header; - struct machine_slot *ms; struct mach_header *mh; struct segment_command *sc; - struct thread_command *tc; vm_size_t size; vm_prot_t prot = 0; vm_prot_t maxprot = 0; vm_inherit_t inherit = 0; - vm_offset_t offset; - int error1; + int error1 = 0; mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS]; vm_size_t nflavors; - int i; + vm_size_t i; int nesting_depth = 0; - kern_return_t kret; + kern_return_t kret = 0; struct vm_region_submap_info_64 vbr; int vbrcount = 0; tir_t tir1; @@ -328,12 +363,6 @@ kern_dump() unsigned int mach_section_count = 4; unsigned int num_sects_txed = 0; - - extern int SEGSIZE; - - extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; - extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; - map = kernel_map; not_in_kdp = 0; /* Tell vm functions not to acquire locks */ @@ -358,15 +387,14 @@ kern_dump() header = (vm_offset_t) command_buffer; /* - * Set up Mach-O header. + * Set up Mach-O header for currently executing 32 bit kernel. */ printf ("Generated Mach-O header size was %d\n", header_size); mh = (struct mach_header *) header; - ms = &machine_slot[cpu_number()]; mh->magic = MH_MAGIC; - mh->cputype = ms->cpu_type; - mh->cpusubtype = ms->cpu_subtype; + mh->cputype = cpu_type(); + mh->cpusubtype = cpu_subtype(); /* XXX incorrect; should match kernel */ mh->filetype = MH_CORE; mh->ncmds = segment_count + thread_count + mach_section_count; mh->sizeofcmds = command_size; @@ -418,7 +446,8 @@ kern_dump() vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; if((kret = vm_region_recurse_64(map, &vmoffset, &size, &nesting_depth, - &vbr, &vbrcount)) != KERN_SUCCESS) { + (vm_region_recurse_info_t)&vbr, + &vbrcount)) != KERN_SUCCESS) { break; } @@ -537,7 +566,7 @@ kern_dump() * not followed by a normal VM region; i.e. there will be no hole that * reaches to the end of the core file. */ - kern_collectth_state (current_act(), &tir1); + kern_collectth_state (current_thread(), &tir1); if ((panic_error = kdp_send_panic_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { printf ("kdp_send_panic_pkt failed with error %d\n", panic_error); @@ -556,7 +585,6 @@ kern_dump() return (-1) ; } - out: if (error == 0) error = error1; return (error); diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile index 16361eef0..ab810f2c2 100644 --- a/osfmk/kern/Makefile +++ b/osfmk/kern/Makefile @@ -19,7 +19,7 @@ EXPORT_ONLY_FILES = \ kalloc.h \ kern_types.h \ lock.h \ - ledger.h \ + locks.h \ host.h \ mach_param.h \ macro_help.h \ @@ -27,13 +27,10 @@ EXPORT_ONLY_FILES = \ queue.h \ sched_prim.h \ simple_lock.h \ - simple_lock_types.h \ - sync_lock.h \ + startup.h \ task.h \ thread.h \ - thread_act.h \ thread_call.h \ - time_out.h \ wait_queue.h \ zalloc.h diff --git a/osfmk/kern/assert.h b/osfmk/kern/assert.h index 42718d5ca..fc390b5aa 100644 --- a/osfmk/kern/assert.h +++ b/osfmk/kern/assert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,16 +56,19 @@ /* assert.h 4.2 85/01/21 */ #include +#include #ifdef MACH_KERNEL_PRIVATE #include #endif +__BEGIN_DECLS /* Assert error */ extern void Assert( const char *file, int line, const char *expression); +__END_DECLS #if MACH_ASSERT @@ -73,11 +76,15 @@ extern void Assert( ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) #define assert_static(x) assert(x) +#define __assert_only + #else /* MACH_ASSERT */ #define assert(ex) ((void)0) #define assert_static(ex) +#define __assert_only __unused + #endif /* MACH_ASSERT */ #endif /* _KERN_ASSERT_H_ */ diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c index 7540375ce..ed95755ec 100644 --- a/osfmk/kern/ast.c +++ b/osfmk/kern/ast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,7 +60,6 @@ */ #include -#include #include #include @@ -70,24 +69,18 @@ #include #include #include -#include -#include #include #include +#include #include -volatile ast_t need_ast[NCPUS]; +#ifdef __ppc__ +#include // for CHUD AST hook +#endif void ast_init(void) { -#ifndef MACHINE_AST - register int i; - - for (i=0; istate & TH_IDLE)) { + if (!(thread->state & TH_IDLE)) { /* * Check for urgent preemption. */ if ( (reasons & AST_URGENT) && - wait_queue_assert_possible(self) ) { + wait_queue_assert_possible(thread) ) { if (reasons & AST_PREEMPT) { counter(c_ast_taken_block++); - thread_block_reason(THREAD_CONTINUE_NULL, + thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_PREEMPT | AST_URGENT); } @@ -137,11 +147,8 @@ ast_taken( * Handle BSD hook. */ if (reasons & AST_BSD) { - extern void bsd_ast(thread_act_t act); - thread_act_t act = self->top_act; - - thread_ast_clear(act, AST_BSD); - bsd_ast(act); + thread_ast_clear(thread, AST_BSD); + bsd_ast(thread); } #endif @@ -159,15 +166,15 @@ ast_taken( if (reasons & AST_PREEMPT) { processor_t myprocessor = current_processor(); - if (csw_needed(self, myprocessor)) + if (csw_needed(thread, myprocessor)) reasons = AST_PREEMPT; else reasons = AST_NONE; } if ( (reasons & AST_PREEMPT) && - wait_queue_assert_possible(self) ) { + wait_queue_assert_possible(thread) ) { counter(c_ast_taken_block++); - thread_block_reason(thread_exception_return, AST_PREEMPT); + thread_block_reason((thread_continue_t)thread_exception_return, NULL, AST_PREEMPT); } } } @@ -182,30 +189,22 @@ void ast_check( processor_t processor) { - register thread_t self = processor->active_thread; + register thread_t thread = processor->active_thread; - processor->current_pri = self->sched_pri; - if (processor->state == PROCESSOR_RUNNING) { + processor->current_pri = thread->sched_pri; + if ( processor->state == PROCESSOR_RUNNING || + processor->state == PROCESSOR_SHUTDOWN ) { register ast_t preempt; -processor_running: /* * Propagate thread ast to processor. */ - ast_propagate(self->top_act->ast); + ast_propagate(thread->ast); /* * Context switch check. */ - if ((preempt = csw_check(self, processor)) != AST_NONE) + if ((preempt = csw_check(thread, processor)) != AST_NONE) ast_on(preempt); } - else - if ( processor->state == PROCESSOR_DISPATCHING || - processor->state == PROCESSOR_IDLE ) { - return; - } - else - if (processor->state == PROCESSOR_SHUTDOWN) - goto processor_running; } diff --git a/osfmk/kern/ast.h b/osfmk/kern/ast.h index 02a81b2ab..5a6de8d90 100644 --- a/osfmk/kern/ast.h +++ b/osfmk/kern/ast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,11 +57,9 @@ #ifndef _KERN_AST_H_ #define _KERN_AST_H_ -#include #include #include -#include #include #include #include @@ -98,8 +96,6 @@ typedef uint32_t ast_t; #define AST_SCHEDULING (AST_PREEMPTION | AST_YIELD | AST_HANDOFF) #define AST_PREEMPTION (AST_PREEMPT | AST_QUANTUM | AST_URGENT) -extern volatile ast_t need_ast[NCPUS]; - #ifdef MACHINE_AST /* * machine/ast.h is responsible for defining aston and astoff. @@ -123,6 +119,9 @@ extern void ast_taken( extern void ast_check( processor_t processor); +/* Pending ast mask for the current processor */ +extern ast_t *ast_pending(void); + /* * Per-thread ASTs are reset at context-switch time. */ @@ -132,40 +131,45 @@ extern void ast_check( #define AST_PER_THREAD (AST_APC | AST_BSD | MACHINE_AST_PER_THREAD) /* - * ast_needed(), ast_on(), ast_off(), ast_context(), and ast_propagate() + * ast_pending(), ast_on(), ast_off(), ast_context(), and ast_propagate() * assume splsched. */ -#define ast_needed(mycpu) (need_ast[mycpu] != AST_NONE) -#define ast_on_fast(reasons) \ -MACRO_BEGIN \ - int mycpu = cpu_number(); \ - if ((need_ast[mycpu] |= (reasons)) != AST_NONE) \ - { aston(mycpu); } \ +#define ast_on_fast(reasons) \ +MACRO_BEGIN \ + ast_t *myast = ast_pending(); \ + \ + if ((*myast |= (reasons)) != AST_NONE) \ + { aston(myast); } \ MACRO_END -#define ast_off_fast(reasons) \ -MACRO_BEGIN \ - int mycpu = cpu_number(); \ - if ((need_ast[mycpu] &= ~(reasons)) == AST_NONE) \ - { astoff(mycpu); } \ +#define ast_off_fast(reasons) \ +MACRO_BEGIN \ + ast_t *myast = ast_pending(); \ + \ + if ((*myast &= ~(reasons)) == AST_NONE) \ + { astoff(myast); } \ MACRO_END #define ast_propagate(reasons) ast_on(reasons) -#define ast_context(act, mycpu) \ -MACRO_BEGIN \ - assert((mycpu) == cpu_number()); \ - if ((need_ast[mycpu] = \ - ((need_ast[mycpu] &~ AST_PER_THREAD) | (act)->ast)) != AST_NONE) \ - { aston(mycpu); } \ - else \ - { astoff(mycpu); } \ +#define ast_context(act) \ +MACRO_BEGIN \ + ast_t *myast = ast_pending(); \ + \ + if ((*myast = ((*myast &~ AST_PER_THREAD) | (act)->ast)) != AST_NONE) \ + { aston(myast); } \ + else \ + { astoff(myast); } \ MACRO_END #define ast_on(reason) ast_on_fast(reason) #define ast_off(reason) ast_off_fast(reason) +/* + * NOTE: if thread is the current thread, thread_ast_set() should + * be followed by ast_propagate(). + */ #define thread_ast_set(act, reason) \ (hw_atomic_or(&(act)->ast, (reason))) #define thread_ast_clear(act, reason) \ @@ -173,9 +177,12 @@ MACRO_END #define thread_ast_clear_all(act) \ (hw_atomic_and(&(act)->ast, AST_NONE)) -/* - * NOTE: if thread is the current thread, thread_ast_set() should - * be followed by ast_propagate(). - */ +#ifdef MACH_BSD + +extern void astbsd_on(void); +extern void act_set_astbsd(thread_t); +extern void bsd_ast(thread_t); + +#endif /* MACH_BSD */ #endif /* _KERN_AST_H_ */ diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c index 5a86b23ad..57c4559d2 100644 --- a/osfmk/kern/bsd_kern.c +++ b/osfmk/kern/bsd_kern.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,17 +20,19 @@ * @APPLE_LICENSE_HEADER_END@ */ #include -#include -#include + +#include +#include #include -#include #include #include #include -#include -#include +#include #include #include +#include +#include +#include /* last */ #undef thread_should_halt #undef ipc_port_release @@ -41,27 +43,16 @@ task_t bsd_init_task = TASK_NULL; char init_task_failure_data[1024]; extern unsigned int not_in_kdp; /* Skip acquiring locks if we're in kdp */ -thread_act_t get_firstthread(task_t); -vm_map_t get_task_map(task_t); -ipc_space_t get_task_ipcspace(task_t); -boolean_t is_kerneltask(task_t); -boolean_t is_thread_idle(thread_t); -vm_offset_t get_map_min( vm_map_t); -vm_offset_t get_map_max( vm_map_t); +thread_t get_firstthread(task_t); int get_task_userstop(task_t); -int get_thread_userstop(thread_act_t); +int get_thread_userstop(thread_t); boolean_t thread_should_abort(thread_t); boolean_t current_thread_aborted(void); -void task_act_iterate_wth_args(task_t, void(*)(thread_act_t, void *), void *); +void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); void ipc_port_release(ipc_port_t); boolean_t is_thread_active(thread_t); -kern_return_t get_thread_waitresult(thread_t); -vm_size_t get_vmmap_size(vm_map_t); -int get_vmmap_entries(vm_map_t); -int get_task_numacts(task_t); -thread_act_t get_firstthread(task_t task); -kern_return_t get_signalact(task_t , thread_act_t *, int); -void astbsd_on(void); +kern_return_t get_signalact(task_t , thread_t *, int); +int get_vmsubmap_entries(vm_map_t, vm_object_offset_t, vm_object_offset_t); /* * @@ -82,7 +73,7 @@ void set_bsdtask_info(task_t t,void * v) /* * */ -void *get_bsdthread_info(thread_act_t th) +void *get_bsdthread_info(thread_t th) { return(th->uthread); } @@ -93,121 +84,143 @@ void *get_bsdthread_info(thread_act_t th) * can't go away, so we make sure it is still active after * retrieving the first thread for extra safety. */ -thread_act_t get_firstthread(task_t task) +thread_t get_firstthread(task_t task) { - thread_act_t thr_act; + thread_t thread = (thread_t)queue_first(&task->threads); + + if (queue_end(&task->threads, (queue_entry_t)thread)) + thread = THREAD_NULL; - thr_act = (thread_act_t)queue_first(&task->threads); - if (queue_end(&task->threads, (queue_entry_t)thr_act)) - thr_act = THR_ACT_NULL; if (!task->active) - return(THR_ACT_NULL); - return(thr_act); + return (THREAD_NULL); + + return (thread); } -kern_return_t get_signalact(task_t task,thread_act_t * thact, int setast) +kern_return_t +get_signalact( + task_t task, + thread_t *result_out, + int setast) { - - thread_act_t inc; - thread_act_t ninc; - thread_act_t thr_act; - thread_t th; + kern_return_t result = KERN_SUCCESS; + thread_t inc, thread = THREAD_NULL; task_lock(task); + if (!task->active) { task_unlock(task); - return(KERN_FAILURE); + + return (KERN_FAILURE); } - thr_act = THR_ACT_NULL; - for (inc = (thread_act_t)queue_first(&task->threads); - !queue_end(&task->threads, (queue_entry_t)inc); - inc = ninc) { - th = act_lock_thread(inc); - if ((inc->active) && - ((th->state & (TH_ABORT|TH_ABORT_SAFELY)) != TH_ABORT)) { - thr_act = inc; - break; + for (inc = (thread_t)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)inc); ) { + thread_mtx_lock(inc); + if (inc->active && + (inc->state & (TH_ABORT|TH_ABORT_SAFELY)) != TH_ABORT) { + thread = inc; + break; } - act_unlock_thread(inc); - ninc = (thread_act_t)queue_next(&inc->task_threads); - } -out: - if (thact) - *thact = thr_act; - if (thr_act) { - if (setast) - act_set_astbsd(thr_act); - - act_unlock_thread(thr_act); - } + thread_mtx_unlock(inc); + + inc = (thread_t)queue_next(&inc->task_threads); + } + + if (result_out) + *result_out = thread; + + if (thread) { + if (setast) + act_set_astbsd(thread); + + thread_mtx_unlock(thread); + } + else + result = KERN_FAILURE; + task_unlock(task); - if (thr_act) - return(KERN_SUCCESS); - else - return(KERN_FAILURE); + return (result); } -kern_return_t check_actforsig(task_t task, thread_act_t thact, int setast) +kern_return_t +check_actforsig( + task_t task, + thread_t thread, + int setast) { - - thread_act_t inc; - thread_act_t ninc; - thread_act_t thr_act; - thread_t th; - int found=0; + kern_return_t result = KERN_FAILURE; + thread_t inc; task_lock(task); + if (!task->active) { task_unlock(task); - return(KERN_FAILURE); + + return (KERN_FAILURE); } - thr_act = THR_ACT_NULL; - for (inc = (thread_act_t)queue_first(&task->threads); - !queue_end(&task->threads, (queue_entry_t)inc); - inc = ninc) { - - if (inc != thact) { - ninc = (thread_act_t)queue_next(&inc->task_threads); - continue; - } - th = act_lock_thread(inc); - if ((inc->active) && - ((th->state & (TH_ABORT|TH_ABORT_SAFELY)) != TH_ABORT)) { - found = 1; - thr_act = inc; - break; - } - act_unlock_thread(inc); - /* ninc = (thread_act_t)queue_next(&inc->thr_acts); */ + for (inc = (thread_t)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)inc); ) { + if (inc == thread) { + thread_mtx_lock(inc); + + if (inc->active && + (inc->state & (TH_ABORT|TH_ABORT_SAFELY)) != TH_ABORT) { + result = KERN_SUCCESS; break; - } -out: - if (found) { - if (setast) - act_set_astbsd(thr_act); - - act_unlock_thread(thr_act); - } - task_unlock(task); + } - if (found) - return(KERN_SUCCESS); - else - return(KERN_FAILURE); + thread_mtx_unlock(inc); + break; + } + + inc = (thread_t)queue_next(&inc->task_threads); + } + + if (result == KERN_SUCCESS) { + if (setast) + act_set_astbsd(thread); + + thread_mtx_unlock(thread); + } + + task_unlock(task); + + return (result); } /* - * + * This is only safe to call from a thread executing in + * in the task's context or if the task is locked Otherwise, + * the map could be switched for the task (and freed) before + * we to return it here. */ vm_map_t get_task_map(task_t t) { return(t->map); } +vm_map_t get_task_map_reference(task_t t) +{ + vm_map_t m; + + if (t == NULL) + return VM_MAP_NULL; + + task_lock(t); + if (!t->active) { + task_unlock(t); + return VM_MAP_NULL; + } + m = t->map; + vm_map_reference_swap(m); + task_unlock(t); + return m; +} + /* * */ @@ -237,25 +250,19 @@ int is_64signalregset(void) vm_map_t swap_task_map(task_t task,vm_map_t map) { - thread_act_t act = current_act(); + thread_t thread = current_thread(); vm_map_t old_map; - if (task != act->task) + if (task != thread->task) panic("swap_task_map"); task_lock(task); old_map = task->map; - act->map = task->map = map; + thread->map = task->map = map; task_unlock(task); return old_map; } -vm_map_t -swap_act_map(thread_act_t thr_act,vm_map_t map) -{ - panic("swap_act_map"); -} - /* * */ @@ -274,7 +281,7 @@ pmap_t get_map_pmap(vm_map_t map) /* * */ -task_t get_threadtask(thread_act_t th) +task_t get_threadtask(thread_t th) { return(th->task); } @@ -319,7 +326,7 @@ getact_thread( /* * */ -vm_offset_t +vm_map_offset_t get_map_min( vm_map_t map) { @@ -329,13 +336,13 @@ get_map_min( /* * */ -vm_offset_t +vm_map_offset_t get_map_max( vm_map_t map) { return(vm_map_max(map)); } -vm_size_t +vm_map_size_t get_vmmap_size( vm_map_t map) { @@ -421,7 +428,7 @@ get_task_userstop( */ int get_thread_userstop( - thread_act_t th) + thread_t th) { return(th->user_stop_count); } @@ -433,8 +440,7 @@ boolean_t thread_should_abort( thread_t th) { - return(!th->top_act || - (th->state & (TH_ABORT|TH_ABORT_SAFELY)) == TH_ABORT); + return ((th->state & (TH_ABORT|TH_ABORT_SAFELY)) == TH_ABORT); } /* @@ -452,9 +458,8 @@ current_thread_aborted ( thread_t th = current_thread(); spl_t s; - if (!th->top_act || - ((th->state & (TH_ABORT|TH_ABORT_SAFELY)) == TH_ABORT && - th->interrupt_level != THREAD_UNINT)) + if ((th->state & (TH_ABORT|TH_ABORT_SAFELY)) == TH_ABORT && + (th->options & TH_OPT_INTMASK) != THREAD_UNINT) return (TRUE); if (th->state & TH_ABORT_SAFELY) { s = splsched(); @@ -472,19 +477,20 @@ current_thread_aborted ( */ void task_act_iterate_wth_args( - task_t task, - void (*func_callback)(thread_act_t, void *), - void *func_arg) + task_t task, + void (*func_callback)(thread_t, void *), + void *func_arg) { - thread_act_t inc, ninc; + thread_t inc; task_lock(task); - for (inc = (thread_act_t)queue_first(&task->threads); - !queue_end(&task->threads, (queue_entry_t)inc); - inc = ninc) { - ninc = (thread_act_t)queue_next(&inc->task_threads); - (void) (*func_callback)(inc, func_arg); - } + + for (inc = (thread_t)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)inc); ) { + (void) (*func_callback)(inc, func_arg); + inc = (thread_t)queue_next(&inc->task_threads); + } + task_unlock(task); } @@ -502,13 +508,6 @@ is_thread_active( return(th->active); } -kern_return_t -get_thread_waitresult( - thread_t th) -{ - return(th->wait_result); -} - void astbsd_on(void) { diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 0494754cf..25fecf46f 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,13 +30,13 @@ * machine-independent clock service layer. */ -#include #include +#include #include #include #include -#include + #include #include #include @@ -44,16 +44,21 @@ #include #include #include -#include #include #include #include + +#include #include -#include +#include #include #include +#include +#include +#include + /* * Exported interface */ @@ -72,14 +77,10 @@ static thread_call_data_t alarm_deliver; decl_simple_lock_data(static,calend_adjlock) static timer_call_data_t calend_adjcall; -static uint64_t calend_adjinterval, calend_adjdeadline; +static uint64_t calend_adjdeadline; static thread_call_data_t calend_wakecall; -/* backwards compatibility */ -int hz = HZ; /* GET RID OF THIS !!! */ -int tick = (1000000 / HZ); /* GET RID OF THIS !!! */ - /* external declarations */ extern struct clock clock_list[]; extern int clock_count; @@ -127,8 +128,9 @@ void calend_dowakeup( splx(s); /* - * Configure the clock system. (Not sure if we need this, - * as separate from clock_init()). + * clock_config: + * + * Called once at boot to configure the clock subsystem. */ void clock_config(void) @@ -136,13 +138,12 @@ clock_config(void) clock_t clock; register int i; - if (cpu_number() != master_cpu) - panic("clock_config"); + assert(cpu_number() == master_cpu); - simple_lock_init(&ClockLock, ETAP_MISC_CLOCK); + simple_lock_init(&ClockLock, 0); thread_call_setup(&alarm_deliver, clock_alarm_deliver, NULL); - simple_lock_init(&calend_adjlock, ETAP_MISC_CLOCK); + simple_lock_init(&calend_adjlock, 0); timer_call_setup(&calend_adjcall, calend_adjust_call, NULL); thread_call_setup(&calend_wakecall, calend_dowakeup, NULL); @@ -158,12 +159,19 @@ clock_config(void) } } + /* + * Initialize the timer callouts. + */ + timer_call_initialize(); + /* start alarm sequence numbers at 0 */ alrm_seqno = 0; } /* - * Initialize the clock system. + * clock_init: + * + * Called on a processor each time started. */ void clock_init(void) @@ -176,7 +184,7 @@ clock_init(void) */ for (i = 0; i < clock_count; i++) { clock = &clock_list[i]; - if (clock->cl_ops) + if (clock->cl_ops && clock->cl_ops->c_init) (*clock->cl_ops->c_init)(); } } @@ -284,15 +292,10 @@ clock_get_attributes( clock_attr_t attr, /* OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { - kern_return_t (*getattr)( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t *count); - if (clock == CLOCK_NULL) return (KERN_INVALID_ARGUMENT); - if (getattr = clock->cl_ops->c_getattr) - return((*getattr)(flavor, attr, count)); + if (clock->cl_ops->c_getattr) + return(clock->cl_ops->c_getattr(flavor, attr, count)); else return (KERN_FAILURE); } @@ -306,12 +309,10 @@ clock_set_time( mach_timespec_t new_time) { mach_timespec_t *clock_time; - kern_return_t (*settime)( - mach_timespec_t *clock_time); if (clock == CLOCK_NULL) return (KERN_INVALID_ARGUMENT); - if ((settime = clock->cl_ops->c_settime) == 0) + if (clock->cl_ops->c_settime == NULL) return (KERN_FAILURE); clock_time = &new_time; if (BAD_MACH_TIMESPEC(clock_time)) @@ -325,7 +326,7 @@ clock_set_time( /* * Set the new time. */ - return ((*settime)(clock_time)); + return (clock->cl_ops->c_settime(clock_time)); } /* @@ -338,15 +339,10 @@ clock_set_attributes( clock_attr_t attr, mach_msg_type_number_t count) { - kern_return_t (*setattr)( - clock_flavor_t flavor, - clock_attr_t attr, - mach_msg_type_number_t count); - if (clock == CLOCK_NULL) return (KERN_INVALID_ARGUMENT); - if (setattr = clock->cl_ops->c_setattr) - return ((*setattr)(flavor, attr, count)); + if (clock->cl_ops->c_setattr) + return (clock->cl_ops->c_setattr(flavor, attr, count)); else return (KERN_FAILURE); } @@ -424,12 +420,13 @@ clock_alarm( */ kern_return_t clock_sleep_trap( - mach_port_name_t clock_name, - sleep_type_t sleep_type, - int sleep_sec, - int sleep_nsec, - mach_timespec_t *wakeup_time) + struct clock_sleep_trap_args *args) { + mach_port_name_t clock_name = args->clock_name; + sleep_type_t sleep_type = args->sleep_type; + int sleep_sec = args->sleep_sec; + int sleep_nsec = args->sleep_nsec; + mach_vm_address_t wakeup_time_addr = args->wakeup_time; clock_t clock; mach_timespec_t swtime; kern_return_t rvalue; @@ -454,8 +451,7 @@ clock_sleep_trap( * Return current time as wakeup time. */ if (rvalue != KERN_INVALID_ARGUMENT && rvalue != KERN_FAILURE) { - copyout((char *)&swtime, (char *)wakeup_time, - sizeof(mach_timespec_t)); + copyout((char *)&swtime, wakeup_time_addr, sizeof(mach_timespec_t)); } return (rvalue); } @@ -529,7 +525,7 @@ clock_sleep_internal( LOCK_CLOCK(s); if (alarm->al_status != ALARM_DONE) { assert(wait_result != THREAD_AWAKENED); - if ((alarm->al_prev)->al_next = alarm->al_next) + if (((alarm->al_prev)->al_next = alarm->al_next) != NULL) (alarm->al_next)->al_prev = alarm->al_prev; rvalue = KERN_ABORTED; } @@ -579,7 +575,7 @@ clock_alarm_intr( LOCK_CLOCK(s); alrm1 = (alarm_t) &clock->cl_alarm; - while (alrm2 = alrm1->al_next) { + while ((alrm2 = alrm1->al_next) != NULL) { alarm_time = &alrm2->al_time; if (CMP_MACH_TIMESPEC(alarm_time, clock_time) > 0) break; @@ -588,7 +584,7 @@ clock_alarm_intr( * Alarm has expired, so remove it from the * clock alarm list. */ - if (alrm1->al_next = alrm2->al_next) + if ((alrm1->al_next = alrm2->al_next) != NULL) (alrm1->al_next)->al_prev = alrm1; /* @@ -609,7 +605,7 @@ clock_alarm_intr( */ else { assert(alrm2->al_status == ALARM_CLOCK); - if (alrm2->al_next = alrmdone) + if ((alrm2->al_next = alrmdone) != NULL) alrmdone->al_prev = alrm2; else thread_call_enter(&alarm_deliver); @@ -635,16 +631,16 @@ clock_alarm_intr( static void clock_alarm_deliver( - thread_call_param_t p0, - thread_call_param_t p1) + __unused thread_call_param_t p0, + __unused thread_call_param_t p1) { register alarm_t alrm; kern_return_t code; spl_t s; LOCK_CLOCK(s); - while (alrm = alrmdone) { - if (alrmdone = alrm->al_next) + while ((alrm = alrmdone) != NULL) { + if ((alrmdone = alrm->al_next) != NULL) alrmdone->al_prev = (alarm_t) &alrmdone; UNLOCK_CLOCK(s); @@ -691,11 +687,11 @@ flush_alarms( */ LOCK_CLOCK(s); alrm1 = (alarm_t) &clock->cl_alarm; - while (alrm2 = alrm1->al_next) { + while ((alrm2 = alrm1->al_next) != NULL) { /* * Remove alarm from the clock alarm list. */ - if (alrm1->al_next = alrm2->al_next) + if ((alrm1->al_next = alrm2->al_next) != NULL) (alrm1->al_next)->al_prev = alrm1; /* @@ -713,7 +709,7 @@ flush_alarms( * kernel alarm_thread to service the alarm. */ assert(alrm2->al_status == ALARM_CLOCK); - if (alrm2->al_next = alrmdone) + if ((alrm2->al_next = alrmdone) != NULL) alrmdone->al_prev = alrm2; else thread_wakeup((event_t)&alrmdone); @@ -745,7 +741,7 @@ post_alarm( */ alarm_time = &alarm->al_time; alrm1 = (alarm_t) &clock->cl_alarm; - while (alrm2 = alrm1->al_next) { + while ((alrm2 = alrm1->al_next) != NULL) { queue_time = &alrm2->al_time; if (CMP_MACH_TIMESPEC(queue_time, alarm_time) > 0) break; @@ -834,55 +830,105 @@ clock_deadline_for_periodic_event( } void -mk_timebase_info( - uint32_t *delta, - uint32_t *abs_to_ns_numer, - uint32_t *abs_to_ns_denom, - uint32_t *proc_to_abs_numer, - uint32_t *proc_to_abs_denom) +mk_timebase_info_trap( + struct mk_timebase_info_trap_args *args) { + uint32_t *delta = args->delta; + uint32_t *abs_to_ns_numer = args->abs_to_ns_numer; + uint32_t *abs_to_ns_denom = args->abs_to_ns_denom; + uint32_t *proc_to_abs_numer = args->proc_to_abs_numer; + uint32_t *proc_to_abs_denom = args->proc_to_abs_denom; mach_timebase_info_data_t info; uint32_t one = 1; clock_timebase_info(&info); - copyout((void *)&one, (void *)delta, sizeof (uint32_t)); + copyout((void *)&one, CAST_USER_ADDR_T(delta), sizeof (uint32_t)); - copyout((void *)&info.numer, (void *)abs_to_ns_numer, sizeof (uint32_t)); - copyout((void *)&info.denom, (void *)abs_to_ns_denom, sizeof (uint32_t)); + copyout((void *)&info.numer, CAST_USER_ADDR_T(abs_to_ns_numer), sizeof (uint32_t)); + copyout((void *)&info.denom, CAST_USER_ADDR_T(abs_to_ns_denom), sizeof (uint32_t)); - copyout((void *)&one, (void *)proc_to_abs_numer, sizeof (uint32_t)); - copyout((void *)&one, (void *)proc_to_abs_denom, sizeof (uint32_t)); + copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_numer), sizeof (uint32_t)); + copyout((void *)&one, CAST_USER_ADDR_T(proc_to_abs_denom), sizeof (uint32_t)); } kern_return_t -mach_timebase_info( - mach_timebase_info_t out_info) +mach_timebase_info_trap( + struct mach_timebase_info_trap_args *args) { + mach_vm_address_t out_info_addr = args->info; mach_timebase_info_data_t info; clock_timebase_info(&info); - copyout((void *)&info, (void *)out_info, sizeof (info)); + copyout((void *)&info, out_info_addr, sizeof (info)); return (KERN_SUCCESS); } +static void +mach_wait_until_continue( + __unused void *parameter, + wait_result_t wresult) +{ + thread_syscall_return((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS); + /*NOTREACHED*/ +} + kern_return_t -mach_wait_until( +mach_wait_until_trap( + struct mach_wait_until_trap_args *args) +{ + uint64_t deadline = args->deadline; + wait_result_t wresult; + + wresult = assert_wait_deadline((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, deadline); + if (wresult == THREAD_WAITING) + wresult = thread_block(mach_wait_until_continue); + + return ((wresult == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS); +} + +/* + * Delay primitives. + */ +void +clock_delay_until( uint64_t deadline) { - int wait_result; + uint64_t now = mach_absolute_time(); + + if (now >= deadline) + return; - wait_result = assert_wait((event_t)&mach_wait_until, THREAD_ABORTSAFE); - if (wait_result == THREAD_WAITING) { - thread_set_timer_deadline(deadline); - wait_result = thread_block(THREAD_CONTINUE_NULL); - if (wait_result != THREAD_TIMED_OUT) - thread_cancel_timer(); + if ( (deadline - now) < (8 * sched_cswtime) || + get_preemption_level() != 0 || + ml_get_interrupts_enabled() == FALSE ) + machine_delay_until(deadline); + else { + assert_wait_deadline((event_t)clock_delay_until, THREAD_UNINT, deadline - sched_cswtime); + + thread_block(THREAD_CONTINUE_NULL); } +} - return ((wait_result == THREAD_INTERRUPTED)? KERN_ABORTED: KERN_SUCCESS); +void +delay_for_interval( + uint32_t interval, + uint32_t scale_factor) +{ + uint64_t end; + + clock_interval_to_deadline(interval, scale_factor, &end); + + clock_delay_until(end); +} + +void +delay( + int usec) +{ + delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC); } void @@ -914,8 +960,8 @@ clock_adjtime( static void calend_adjust_call( - timer_call_param_t p0, - timer_call_param_t p1) + __unused timer_call_param_t p0, + __unused timer_call_param_t p1) { uint32_t interval; spl_t s; @@ -941,12 +987,13 @@ clock_wakeup_calendar(void) thread_call_enter(&calend_wakecall); } +extern void IOKitResetTime(void); /* XXX */ + static void calend_dowakeup( - thread_call_param_t p0, - thread_call_param_t p1) + __unused thread_call_param_t p0, + __unused thread_call_param_t p1) { - void IOKitResetTime(void); IOKitResetTime(); } diff --git a/osfmk/kern/clock.h b/osfmk/kern/clock.h index d6d610c60..f90001360 100644 --- a/osfmk/kern/clock.h +++ b/osfmk/kern/clock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,18 +32,18 @@ #ifndef _KERN_CLOCK_H_ #define _KERN_CLOCK_H_ -#include +#include +#include #include +#include #include -#include +#include -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE -#include - /* * Actual clock alarm structure. Used for user clock_sleep() and * clock_alarm() calls. Alarms are allocated from the alarm free @@ -165,40 +165,34 @@ extern void clock_set_timer_func( extern void clock_set_timer_deadline( uint64_t deadline); -extern void mk_timebase_info( - uint32_t *delta, - uint32_t *abs_to_ns_numer, - uint32_t *abs_to_ns_denom, - uint32_t *proc_to_abs_numer, - uint32_t *proc_to_abs_denom); - extern uint32_t clock_set_calendar_adjtime( int32_t *secs, int32_t *microsecs); extern uint32_t clock_adjust_calendar(void); -#endif /* MACH_KERNEL_PRIVATE */ +extern void machine_delay_until( + uint64_t deadline); -extern void clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs); +#include -extern void clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs); +extern void hertz_tick( +#if STAT_TIME + natural_t ticks, +#endif /* STAT_TIME */ + boolean_t usermode, /* executing user code */ + natural_t pc); -extern void clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs); +extern void absolutetime_to_microtime( + uint64_t abstime, + uint32_t *secs, + uint32_t *microsecs); -extern void clock_get_system_microtime( - uint32_t *secs, - uint32_t *microsecs); +#endif /* MACH_KERNEL_PRIVATE */ -extern void clock_get_system_nanotime( - uint32_t *secs, - uint32_t *nanosecs); +__BEGIN_DECLS + +#ifdef XNU_KERNEL_PRIVATE extern void clock_adjtime( int32_t *secs, @@ -212,38 +206,37 @@ extern void clock_gettimeofday( uint32_t *secs, uint32_t *microsecs); -#endif /* __APPLE_API_PRIVATE */ +extern void clock_set_calendar_microtime( + uint32_t secs, + uint32_t microsecs); -#ifdef __APPLE_API_UNSTABLE +extern void clock_get_boottime_nanotime( + uint32_t *secs, + uint32_t *nanosecs); -#define MACH_TIMESPEC_SEC_MAX (0 - 1) -#define MACH_TIMESPEC_NSEC_MAX (NSEC_PER_SEC - 1) +extern void clock_deadline_for_periodic_event( + uint64_t interval, + uint64_t abstime, + uint64_t *deadline); -#define MACH_TIMESPEC_MAX ((mach_timespec_t) { \ - MACH_TIMESPEC_SEC_MAX, \ - MACH_TIMESPEC_NSEC_MAX } ) -#define MACH_TIMESPEC_ZERO ((mach_timespec_t) { 0, 0 } ) +#endif /* XNU_KERNEL_PRIVATE */ -#define ADD_MACH_TIMESPEC_NSEC(t1, nsec) \ - do { \ - (t1)->tv_nsec += (clock_res_t)(nsec); \ - if ((clock_res_t)(nsec) > 0 && \ - (t1)->tv_nsec >= NSEC_PER_SEC) { \ - (t1)->tv_nsec -= NSEC_PER_SEC; \ - (t1)->tv_sec += 1; \ - } \ - else if ((clock_res_t)(nsec) < 0 && \ - (t1)->tv_nsec < 0) { \ - (t1)->tv_nsec += NSEC_PER_SEC; \ - (t1)->tv_sec -= 1; \ - } \ - } while (0) -#endif /* __APPLE_API_UNSTABLE */ +extern void clock_get_calendar_microtime( + uint32_t *secs, + uint32_t *microsecs); -extern mach_timespec_t clock_get_system_value(void); +extern void clock_get_calendar_nanotime( + uint32_t *secs, + uint32_t *nanosecs); -extern mach_timespec_t clock_get_calendar_value(void); +extern void clock_get_system_microtime( + uint32_t *secs, + uint32_t *microsecs); + +extern void clock_get_system_nanotime( + uint32_t *secs, + uint32_t *nanosecs); extern void clock_timebase_info( mach_timebase_info_t info); @@ -265,15 +258,6 @@ extern void clock_absolutetime_interval_to_deadline( uint64_t abstime, uint64_t *result); -extern void clock_deadline_for_periodic_event( - uint64_t interval, - uint64_t abstime, - uint64_t *deadline); - -extern void clock_delay_for_interval( - uint32_t interval, - uint32_t scale_factor); - extern void clock_delay_until( uint64_t deadline); @@ -285,9 +269,46 @@ extern void nanoseconds_to_absolutetime( uint64_t nanoseconds, uint64_t *result); -#if !defined(MACH_KERNEL_PRIVATE) && !defined(ABSOLUTETIME_SCALAR_TYPE) +#ifdef KERNEL_PRIVATE -#include +/* + * Obsolete interfaces. + */ + +#define MACH_TIMESPEC_SEC_MAX (0 - 1) +#define MACH_TIMESPEC_NSEC_MAX (NSEC_PER_SEC - 1) + +#define MACH_TIMESPEC_MAX ((mach_timespec_t) { \ + MACH_TIMESPEC_SEC_MAX, \ + MACH_TIMESPEC_NSEC_MAX } ) +#define MACH_TIMESPEC_ZERO ((mach_timespec_t) { 0, 0 } ) + +#define ADD_MACH_TIMESPEC_NSEC(t1, nsec) \ + do { \ + (t1)->tv_nsec += (clock_res_t)(nsec); \ + if ((clock_res_t)(nsec) > 0 && \ + (t1)->tv_nsec >= NSEC_PER_SEC) { \ + (t1)->tv_nsec -= NSEC_PER_SEC; \ + (t1)->tv_sec += 1; \ + } \ + else if ((clock_res_t)(nsec) < 0 && \ + (t1)->tv_nsec < 0) { \ + (t1)->tv_nsec += NSEC_PER_SEC; \ + (t1)->tv_sec -= 1; \ + } \ + } while (0) + + +extern mach_timespec_t clock_get_system_value(void); + +extern mach_timespec_t clock_get_calendar_value(void); + +extern void delay_for_interval( + uint32_t interval, + uint32_t scale_factor); +#ifndef MACH_KERNEL_PRIVATE + +#ifndef ABSOLUTETIME_SCALAR_TYPE #define clock_get_uptime(a) \ clock_get_uptime(__OSAbsoluteTimePtr(a)) @@ -313,29 +334,12 @@ extern void nanoseconds_to_absolutetime( #define nanoseconds_to_absolutetime(a, b) \ nanoseconds_to_absolutetime((a), __OSAbsoluteTimePtr(b)) -#define AbsoluteTime_to_scalar(x) (*(uint64_t *)(x)) - -/* t1 < = > t2 */ -#define CMP_ABSOLUTETIME(t1, t2) \ - (AbsoluteTime_to_scalar(t1) > \ - AbsoluteTime_to_scalar(t2)? (int)+1 : \ - (AbsoluteTime_to_scalar(t1) < \ - AbsoluteTime_to_scalar(t2)? (int)-1 : 0)) - -/* t1 += t2 */ -#define ADD_ABSOLUTETIME(t1, t2) \ - (AbsoluteTime_to_scalar(t1) += \ - AbsoluteTime_to_scalar(t2)) +#endif /* ABSOLUTETIME_SCALAR_TYPE */ -/* t1 -= t2 */ -#define SUB_ABSOLUTETIME(t1, t2) \ - (AbsoluteTime_to_scalar(t1) -= \ - AbsoluteTime_to_scalar(t2)) +#endif /* !MACH_KERNEL_PRIVATE */ -#define ADD_ABSOLUTETIME_TICKS(t1, ticks) \ - (AbsoluteTime_to_scalar(t1) += \ - (int32_t)(ticks)) +#endif /* KERNEL_PRIVATE */ -#endif +__END_DECLS #endif /* _KERN_CLOCK_H_ */ diff --git a/osfmk/kern/counters.c b/osfmk/kern/counters.c index c1704b6e4..ff186fe15 100644 --- a/osfmk/kern/counters.c +++ b/osfmk/kern/counters.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,73 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:35 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.13.7 1995/02/24 15:19:11 alanl - * Merge with DIPC2_SHARED. - * [1995/02/22 20:31:50 alanl] - * - * Revision 1.1.21.1 1994/11/04 10:06:28 dwm - * mk6 CR668 - 1.3b26 merge - * remove unused counters - * * Revision 1.1.2.4 1994/01/06 17:53:55 jeffc - * CR9854 -- Missing exception_raise_state counters - * CR10394 -- instrument vm_map_simplify - * * End1.3merge - * [1994/11/04 09:20:23 dwm] - * - * Revision 1.1.13.5 1994/09/23 02:15:57 ezf - * change marker to not FREE - * [1994/09/22 21:32:09 ezf] - * - * Revision 1.1.13.4 1994/09/16 06:29:22 dwm - * mk6 CR551 - remove unused SAFE_VM_FAULT pseudo-continuation, - * remove unused args from vm_page_wait, vm_fault(_page). - * Also, fix vm_page_wait counters. - * [1994/09/16 06:23:24 dwm] - * - * Revision 1.1.13.3 1994/09/10 21:45:51 bolinger - * Merge up to NMK17.3 - * [1994/09/08 19:57:27 bolinger] - * - * Revision 1.1.13.2 1994/06/21 17:28:40 dlb - * Add two vm_fault counters from latest NMK17 version. - * [94/06/17 dlb] - * - * Revision 1.1.13.1 1994/06/14 16:59:58 bolinger - * Merge up to NMK17.2. - * [1994/06/14 16:53:39 bolinger] - * - * Revision 1.1.8.2 1994/03/17 22:40:02 dwm - * dead code removal: thread swapping. - * [1994/03/17 21:29:18 dwm] - * - * Revision 1.1.8.1 1993/11/18 18:14:54 dwm - * Coloc: remove continuations entirely; - * [1993/11/18 18:09:54 dwm] - * - * Revision 1.1.2.3 1993/06/07 22:12:34 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:04:06 jeffc] - * - * Revision 1.1.2.2 1993/06/02 23:35:48 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:12:06 jeffc] - * - * Revision 1.1 1992/09/30 02:08:53 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ /* * Revision 2.3 91/05/14 16:40:19 mrt * Correcting copyright @@ -170,8 +103,6 @@ mach_counter_t c_thread_switch_block = 0; mach_counter_t c_thread_switch_handoff = 0; mach_counter_t c_vm_fault_page_block_backoff_kernel = 0; mach_counter_t c_vm_fault_page_block_busy_kernel = 0; -mach_counter_t c_vm_fault_retry_on_w_prot; -mach_counter_t c_vm_fault_wait_on_unlock; mach_counter_t c_vm_map_simplified = 0; mach_counter_t c_vm_map_simplify_called = 0; mach_counter_t c_vm_map_simplify_entry_called = 0; diff --git a/osfmk/kern/counters.h b/osfmk/kern/counters.h index 8fe140e0a..61b58852d 100644 --- a/osfmk/kern/counters.h +++ b/osfmk/kern/counters.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,87 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:35 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.13.7 1995/02/24 15:19:14 alanl - * Merge with DIPC2_SHARED. - * [1995/02/22 20:19:55 alanl] - * - * Revision 1.1.19.4 1994/11/04 10:16:23 dwm - * mk6 CR668 - 1.3b26 merge - * add counters, then remove unused items - * [1994/11/04 09:45:39 dwm] - * - * Revision 1.1.13.5 1994/09/23 02:16:08 ezf - * change marker to not FREE - * [1994/09/22 21:32:13 ezf] - * - * Revision 1.1.13.4 1994/09/16 06:29:25 dwm - * mk6 CR551 - remove unused SAFE_VM_FAULT pseudo-continuation, - * remove unused args from vm_page_wait, vm_fault(_page). - * Fix vm_page_wait counters, and rm thread_handoff counter. - * [1994/09/16 06:23:26 dwm] - * - * Revision 1.1.13.3 1994/09/10 21:45:55 bolinger - * Merge up to NMK17.3 - * [1994/09/08 19:57:29 bolinger] - * - * Revision 1.1.13.2 1994/06/21 17:28:43 dlb - * Add two vm_fault counters from NMK17. - * [94/06/17 dlb] - * - * Revision 1.1.10.3 1994/06/15 09:12:05 paire - * Corrected spelling of c_vm_fault_wait_on_unlock variable. - * [94/06/15 paire] - * - * Revision 1.1.13.1 1994/06/14 17:00:01 bolinger - * Merge up to NMK17.2. - * [1994/06/14 16:53:41 bolinger] - * - * Revision 1.1.10.2 1994/05/30 07:37:03 bernadat - * Added new c_vm_fault_retry_on_unlock and c_vm_fault_retry_on_w_prot. - * Sorted the whole list of counters. - * [paire@gr.osf.org] - * [94/05/26 bernadat] - * - * Revision 1.1.10.1 1994/02/11 14:25:21 paire - * Added missing c_exception_raise_state_block and - * c_exception_raise_state_identity_block counters. - * Change from NMK16.1 [93/08/09 paire] - * [94/02/04 paire] - * - * Revision 1.1.2.3 1993/06/07 22:12:36 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:04:11 jeffc] - * - * Revision 1.1.2.2 1993/06/02 23:35:54 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:12:09 jeffc] - * - * Revision 1.1 1992/09/30 02:29:32 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.3 91/05/14 16:40:30 mrt - * Correcting copyright - * - * Revision 2.2 91/03/16 15:16:06 rpd - * Created. - * [91/03/13 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -205,8 +124,6 @@ extern mach_counter_t c_vm_map_simplify_entry_called; extern mach_counter_t c_vm_page_wait_block; extern mach_counter_t c_vm_pageout_block; extern mach_counter_t c_vm_pageout_scan_block; -extern mach_counter_t c_vm_fault_retry_on_w_prot; -extern mach_counter_t c_vm_fault_wait_on_unlock; #endif /* MACH_COUNTERS */ #endif /* _KERN_COUNTERS_ */ diff --git a/osfmk/kern/cpu_data.c b/osfmk/kern/cpu_data.c deleted file mode 100644 index 10d1afb1f..000000000 --- a/osfmk/kern/cpu_data.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ - -#include -#include - -#include -#include -#include -#include diff --git a/osfmk/kern/cpu_data.h b/osfmk/kern/cpu_data.h index eee782728..25ea59254 100644 --- a/osfmk/kern/cpu_data.h +++ b/osfmk/kern/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,38 +23,31 @@ * @OSF_COPYRIGHT@ */ -#ifndef _CPU_DATA_H_ -#define _CPU_DATA_H_ +#ifdef XNU_KERNEL_PRIVATE -#include +#ifndef _KERN_CPU_DATA_H_ +#define _KERN_CPU_DATA_H_ -#ifdef __APPLE_API_PRIVATE +#include +#include #ifdef MACH_KERNEL_PRIVATE -#include -#include - #include -#else /* MACH_KERNEL_PRIVATE */ - -#define disable_preemption() _disable_preemption() -#define enable_preemption() _enable_preemption() -#define enable_preemption_no_check() _enable_preemption_no_check() - #endif /* MACH_KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ +__BEGIN_DECLS -#ifdef __APPLE_API_UNSTABLE +extern void _disable_preemption(void); +extern void _enable_preemption(void); -#if !defined(MACH_KERNEL_PRIVATE) +#define disable_preemption() _disable_preemption() +#define enable_preemption() _enable_preemption() -extern thread_t current_thread(void); -#endif /* MACH_KERNEL_PRIVATE */ +__END_DECLS -#endif /* __APPLE_API_UNSTABLE */ +#endif /* _KERN_CPU_DATA_H_ */ -#endif /* _CPU_DATA_H_ */ +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/kern/cpu_number.h b/osfmk/kern/cpu_number.h index 59d1f50c4..7c8fbdb87 100644 --- a/osfmk/kern/cpu_number.h +++ b/osfmk/kern/cpu_number.h @@ -48,21 +48,19 @@ * the rights to redistribute these changes. */ +#ifdef XNU_KERNEL_PRIVATE + #ifndef _KERN_CPU_NUMBER_H_ #define _KERN_CPU_NUMBER_H_ -#include - -#ifdef __APPLE_API_PRIVATE - #ifdef MACH_KERNEL_PRIVATE extern int master_cpu; #endif /* MACH_KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ - #include #endif /* _KERN_CPU_NUMBER_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index 5fedc28cc..c5f6cf3d9 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include @@ -62,6 +61,7 @@ #include #include #include +#include #include #ifdef __ppc__ @@ -77,11 +77,7 @@ unsigned int debug_mode=0; unsigned int disableDebugOuput = TRUE; unsigned int systemLogDiags = FALSE; unsigned int logPanicDataToScreen = FALSE; -#ifdef __ppc__ - unsigned int panicDebugging = FALSE; -#else - unsigned int panicDebugging = TRUE; -#endif +unsigned int panicDebugging = FALSE; int mach_assert = 1; @@ -89,12 +85,13 @@ const char *panicstr = (char *) 0; decl_simple_lock_data(,panic_lock) int paniccpu; volatile int panicwait; -volatile int nestedpanic= 0; +volatile unsigned int nestedpanic= 0; unsigned int panic_is_inited = 0; unsigned int return_on_panic = 0; -wait_queue_t save_waits[NCPUS]; +unsigned long panic_caller; char *debug_buf; +ppnum_t debug_buf_page; char *debug_buf_ptr; unsigned int debug_buf_size = 0; @@ -133,8 +130,9 @@ MACRO_END void panic_init(void) { - simple_lock_init(&panic_lock, ETAP_NO_TRACE); + simple_lock_init(&panic_lock, 0); panic_is_inited = 1; + panic_caller = 0; } void @@ -143,6 +141,7 @@ panic(const char *str, ...) va_list listp; spl_t s; thread_t thread; + wait_queue_t wq; s = splhigh(); disable_preemption(); @@ -152,13 +151,18 @@ panic(const char *str, ...) #endif thread = current_thread(); /* Get failing thread */ - save_waits[cpu_number()] = thread->wait_queue; /* Save the old value */ + wq = thread->wait_queue; /* Save the old value */ thread->wait_queue = 0; /* Clear the wait so we do not get double panics when we try locks */ if( logPanicDataToScreen ) disableDebugOuput = FALSE; debug_mode = TRUE; + + /* panic_caller is initialized to 0. If set, don't change it */ + if ( ! panic_caller ) + panic_caller = (unsigned long) __builtin_return_address(0); + restart: PANIC_LOCK(); if (panicstr) { @@ -185,7 +189,7 @@ restart: panicwait = 1; PANIC_UNLOCK(); - kdb_printf("panic(cpu %d): ", (unsigned) paniccpu); + kdb_printf("panic(cpu %d caller 0x%08X): ", (unsigned) paniccpu, panic_caller); va_start(listp, str); _doprnt(str, &listp, consdebug_putc, 0); va_end(listp); @@ -202,7 +206,7 @@ restart: PANIC_LOCK(); panicstr = (char *)0; PANIC_UNLOCK(); - thread->wait_queue = save_waits[cpu_number()]; /* Restore the wait queue */ + thread->wait_queue = wq; /* Restore the wait queue */ if (return_on_panic) { enable_preemption(); splx(s); @@ -239,6 +243,7 @@ debug_log_init(void) panic("cannot allocate debug_buf \n"); debug_buf_ptr = debug_buf; debug_buf_size = PAGE_SIZE; + debug_buf_page = pmap_find_phys(kernel_pmap, (addr64_t)(uintptr_t)debug_buf_ptr); } void diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index b4a8afba4..128b02ea3 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,9 +23,9 @@ #ifndef _KERN_DEBUG_H_ #define _KERN_DEBUG_H_ -#include +#include -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE extern unsigned int systemLogDiags; @@ -60,9 +60,8 @@ extern int db_run_mode; #define STEP_COUNT 6 #define STEP_TRACE 7 /* Show all calls to functions and returns */ -extern char *panicstr; - -extern unsigned int nestedpanic; +extern const char *panicstr; +extern volatile unsigned int nestedpanic; extern char *debug_buf; extern char *debug_buf_ptr; @@ -71,6 +70,8 @@ extern unsigned int debug_buf_size; extern void debug_log_init(void); extern void debug_putc(char); +extern void panic_init(void); + #endif /* MACH_KERNEL_PRIVATE */ #define DB_HALT 0x1 @@ -88,6 +89,13 @@ extern void debug_putc(char); #define DB_KERN_DUMP_ON_NMI 0x800 /* Trigger core dump on NMI */ #define DB_DBG_POST_CORE 0x1000 /*Wait in debugger after NMI core */ #define DB_PANICLOG_DUMP 0x2000 /* Send paniclog on panic,not core*/ -#endif /* __APPLE_API_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + +__BEGIN_DECLS + +extern void panic(const char *string, ...); + +__END_DECLS #endif /* _KERN_DEBUG_H_ */ diff --git a/osfmk/kern/etap.c b/osfmk/kern/etap.c deleted file mode 100644 index d1f5ad913..000000000 --- a/osfmk/kern/etap.c +++ /dev/null @@ -1,1866 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * File: etap.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for kernel_map, ipc_kernel_map */ -#if ETAP_MONITOR -#include -#include -#include -#include -#include -#include -#include -#include -/*#include */ -#include -#include -#include -#include -#include -#include -#include -#endif -#if MACH_KDB -#include -#include -#include -#if 0 /* WHY?? */ -#include -#endif -#endif - -/* - * Forwards - */ - -kern_return_t -etap_get_info(host_priv_t, int*, int*, vm_offset_t*, vm_offset_t*, - int*, int*, int*, int*); - -kern_return_t -etap_mon_reconfig(host_priv_t, int); - -kern_return_t -etap_new_probe(host_priv_t, vm_address_t, vm_size_t, boolean_t, vm_address_t); - -kern_return_t -etap_trace_thread(thread_act_t, boolean_t); - -void -etap_trace_reset(int); - -void -etap_interrupt_probe(int, int); - -void -etap_machcall_probe1(int); - -void -etap_machcall_probe2(void); - -void -etap_print(void); - - -#if ETAP - -#ifndef max -#define max(x,y) ((x > y) ? x : y) -#endif /* max */ - -event_table_t -etap_event_table_find(etap_event_t); - -/* ======================= - * ETAP Lock definitions - * ======================= - */ - -#if ETAP_LOCK_TRACE -#define etap_lock simple_lock_no_trace -#define etap_unlock simple_unlock_no_trace -#else /* ETAP_LOCK_TRACE */ -#define etap_lock simple_lock -#define etap_unlock simple_unlock -#endif /* ETAP_LOCK_TRACE */ - -#define event_table_lock() etap_lock(&event_table_lock) -#define event_table_unlock() etap_unlock(&event_table_lock) - -#define cumulative_buffer_lock(s) \ -MACRO_BEGIN \ - s = splhigh(); \ - etap_lock(&cbuff_lock); \ -MACRO_END - -#define cumulative_buffer_unlock(s) \ -MACRO_BEGIN \ - etap_unlock(&cbuff_lock); \ - splx(s); \ -MACRO_END - - -#if ETAP_LOCK_ACCUMULATE - -/* ======================================== - * ETAP Cumulative lock trace definitions - * ======================================== - */ - -int cbuff_width = ETAP_CBUFF_WIDTH; - -/* - * Cumulative buffer declaration - * - * For both protection and mapping purposes, the cumulative - * buffer must be aligned on a page boundary. Since the cumulative - * buffer must be statically defined, page boundary alignment is not - * garenteed. Instead, the buffer is allocated with 2 extra pages. - * The cumulative buffer pointer will round up to the nearest page. - * - * This will garentee page boundary alignment. - */ - -#define TWO_PAGES 16384 /* XXX does this apply ??*/ -#define CBUFF_ALLOCATED_SIZE sizeof(struct cumulative_buffer)+TWO_PAGES - -decl_simple_lock_data (,cbuff_lock) -#if MACH_LDEBUG -simple_lock_t cbuff_locks; -#else -simple_lock_data_t cbuff_locks; -#endif -char cbuff_allocated [CBUFF_ALLOCATED_SIZE]; -cumulative_buffer_t cbuff = {0}; - -#endif /* ETAP_LOCK_ACCUMULATE */ - -#if ETAP_MONITOR - -int mbuff_entries = ETAP_MBUFF_ENTRIES; - -/* - * Create an array of pointers to monitor buffers. - * The buffers themselves are allocated at run-time. - */ - -struct monitor_buffer *mbuff[NCPUS]; -#endif /* ETAP_MONITOR */ - -/* ========================== - * Event table declarations - * ========================== - */ - -decl_simple_lock_data(,event_table_lock) - -const struct event_table_entry event_table_init[] = -{ - - /*-----------------------------------------------------------------------* - * ETAP EVENT TRACE STATUS TEXT NAME DYNAMIC * - *-----------------------------------------------------------------------*/ - -#if ETAP_EVENT_MONITOR - {ETAP_P_USER_EVENT0 , ETAP_TRACE_OFF , "p_user_event0" , STATIC}, - {ETAP_P_USER_EVENT1 , ETAP_TRACE_OFF , "p_user_event1" , STATIC}, - {ETAP_P_USER_EVENT2 , ETAP_TRACE_OFF , "p_user_event2" , STATIC}, - {ETAP_P_USER_EVENT3 , ETAP_TRACE_OFF , "p_user_event3" , STATIC}, - {ETAP_P_USER_EVENT4 , ETAP_TRACE_OFF , "p_user_event4" , STATIC}, - {ETAP_P_USER_EVENT5 , ETAP_TRACE_OFF , "p_user_event5" , STATIC}, - {ETAP_P_USER_EVENT6 , ETAP_TRACE_OFF , "p_user_event6" , STATIC}, - {ETAP_P_USER_EVENT7 , ETAP_TRACE_OFF , "p_user_event7" , STATIC}, - {ETAP_P_USER_EVENT8 , ETAP_TRACE_OFF , "p_user_event8" , STATIC}, - {ETAP_P_USER_EVENT9 , ETAP_TRACE_OFF , "p_user_event9" , STATIC}, - {ETAP_P_USER_EVENT10 , ETAP_TRACE_OFF , "p_user_event10" , STATIC}, - {ETAP_P_USER_EVENT11 , ETAP_TRACE_OFF , "p_user_event11" , STATIC}, - {ETAP_P_USER_EVENT12 , ETAP_TRACE_OFF , "p_user_event12" , STATIC}, - {ETAP_P_USER_EVENT13 , ETAP_TRACE_OFF , "p_user_event13" , STATIC}, - {ETAP_P_USER_EVENT14 , ETAP_TRACE_OFF , "p_user_event14" , STATIC}, - {ETAP_P_USER_EVENT15 , ETAP_TRACE_OFF , "p_user_event15" , STATIC}, - {ETAP_P_USER_EVENT16 , ETAP_TRACE_OFF , "p_user_event16" , STATIC}, - {ETAP_P_USER_EVENT17 , ETAP_TRACE_OFF , "p_user_event17" , STATIC}, - {ETAP_P_USER_EVENT18 , ETAP_TRACE_OFF , "p_user_event18" , STATIC}, - {ETAP_P_USER_EVENT19 , ETAP_TRACE_OFF , "p_user_event19" , STATIC}, - {ETAP_P_USER_EVENT20 , ETAP_TRACE_OFF , "p_user_event20" , STATIC}, - {ETAP_P_USER_EVENT21 , ETAP_TRACE_OFF , "p_user_event21" , STATIC}, - {ETAP_P_USER_EVENT22 , ETAP_TRACE_OFF , "p_user_event22" , STATIC}, - {ETAP_P_USER_EVENT23 , ETAP_TRACE_OFF , "p_user_event23" , STATIC}, - {ETAP_P_USER_EVENT24 , ETAP_TRACE_OFF , "p_user_event24" , STATIC}, - {ETAP_P_USER_EVENT25 , ETAP_TRACE_OFF , "p_user_event25" , STATIC}, - {ETAP_P_USER_EVENT26 , ETAP_TRACE_OFF , "p_user_event26" , STATIC}, - {ETAP_P_USER_EVENT27 , ETAP_TRACE_OFF , "p_user_event27" , STATIC}, - {ETAP_P_USER_EVENT28 , ETAP_TRACE_OFF , "p_user_event28" , STATIC}, - {ETAP_P_USER_EVENT29 , ETAP_TRACE_OFF , "p_user_event29" , STATIC}, - {ETAP_P_USER_EVENT30 , ETAP_TRACE_OFF , "p_user_event30" , STATIC}, - {ETAP_P_USER_EVENT31 , ETAP_TRACE_OFF , "p_user_event31" , STATIC}, - {ETAP_P_SYSCALL_MACH , ETAP_TRACE_OFF , "p_syscall_mach" , STATIC}, - {ETAP_P_SYSCALL_UNIX , ETAP_TRACE_OFF , "p_syscall_unix" , STATIC}, - {ETAP_P_THREAD_LIFE , ETAP_TRACE_OFF , "p_thread_life" , STATIC}, - {ETAP_P_THREAD_CTX , ETAP_TRACE_OFF , "p_thread_ctx" , STATIC}, - {ETAP_P_RPC , ETAP_TRACE_OFF , "p_rpc" , STATIC}, - {ETAP_P_INTERRUPT , ETAP_TRACE_OFF , "p_interrupt" , STATIC}, - {ETAP_P_ACT_ABORT , ETAP_TRACE_OFF , "p_act_abort" , STATIC}, - {ETAP_P_PRIORITY , ETAP_TRACE_OFF , "p_priority" , STATIC}, - {ETAP_P_EXCEPTION , ETAP_TRACE_OFF , "p_exception" , STATIC}, - {ETAP_P_DEPRESSION , ETAP_TRACE_OFF , "p_depression" , STATIC}, - {ETAP_P_MISC , ETAP_TRACE_OFF , "p_misc" , STATIC}, - {ETAP_P_DETAP , ETAP_TRACE_OFF , "p_detap" , STATIC}, -#endif /* ETAP_EVENT_MONITOR */ - -#if ETAP_LOCK_TRACE - {ETAP_VM_BUCKET , ETAP_TRACE_OFF , "vm_bucket" , STATIC},/**/ - {ETAP_VM_HIMEM , ETAP_TRACE_OFF , "vm_himem" , STATIC}, - {ETAP_VM_MAP , ETAP_TRACE_OFF , "vm_map" , 1}, - {ETAP_VM_MAP_I , ETAP_TRACE_OFF , "vm_map_i" , 2}, - {ETAP_VM_MEMMAN , ETAP_TRACE_OFF , "vm_memman" , STATIC},/**/ - {ETAP_VM_MSYNC , ETAP_TRACE_OFF , "vm_msync" , 3}, - {ETAP_VM_OBJ , ETAP_TRACE_OFF , "vm_obj" , 4}, - {ETAP_VM_OBJ_CACHE , ETAP_TRACE_OFF , "vm_obj_cache" , 5}, - {ETAP_VM_PAGE_ALLOC , ETAP_TRACE_OFF , "vm_page_alloc" , STATIC},/**/ - {ETAP_VM_PAGEOUT , ETAP_TRACE_OFF , "vm_pageout" , STATIC}, - {ETAP_VM_PAGEQ , ETAP_TRACE_OFF , "vm_pageq" , STATIC}, - {ETAP_VM_PAGEQ_FREE , ETAP_TRACE_OFF , "vm_pageq_free" , STATIC}, - {ETAP_VM_PMAP , ETAP_TRACE_OFF , "vm_pmap" , 6}, - {ETAP_VM_PMAP_CACHE , ETAP_TRACE_OFF , "vm_pmap_cache" , STATIC}, - {ETAP_VM_PMAP_FREE , ETAP_TRACE_OFF , "vm_pmap_free" , STATIC}, - {ETAP_VM_PMAP_KERNEL , ETAP_TRACE_OFF , "vm_pmap_kern" , STATIC}, - {ETAP_VM_PMAP_SYS , ETAP_TRACE_OFF , "vm_pmap_sys" , 7}, - {ETAP_VM_PMAP_SYS_I , ETAP_TRACE_OFF , "vm_pmap_sys_i" , 8}, - {ETAP_VM_PMAP_UPDATE , ETAP_TRACE_OFF , "vm_pmap_update" , STATIC}, - {ETAP_VM_PREPPIN , ETAP_TRACE_OFF , "vm_preppin" , STATIC}, - {ETAP_VM_RESULT , ETAP_TRACE_OFF , "vm_result" , 9}, - {ETAP_VM_TEST , ETAP_TRACE_OFF , "vm_tes" , STATIC},/**/ - {ETAP_VM_PMAP_PHYSENTRIES, ETAP_TRACE_OFF , "vm_pmap_physentries", STATIC}, - {ETAP_VM_PMAP_SID , ETAP_TRACE_OFF , "vm_pmap_sid" , STATIC}, - {ETAP_VM_PMAP_PTE , ETAP_TRACE_OFF , "vm_pmap_pte" , STATIC}, - {ETAP_VM_PMAP_PTE_OVFLW , ETAP_TRACE_OFF , "vm_pmap_pte_ovflw", STATIC}, - {ETAP_VM_PMAP_TLB , ETAP_TRACE_OFF , "vm_pmap_tlb" , STATIC}, - - {ETAP_IPC_IHGB , ETAP_TRACE_OFF , "ipc_ihgb" , 10},/**/ - {ETAP_IPC_IS , ETAP_TRACE_OFF , "ipc_is" , 11},/**/ - {ETAP_IPC_IS_REF , ETAP_TRACE_OFF , "ipc_is_ref" , 12},/**/ - {ETAP_IPC_MQUEUE , ETAP_TRACE_OFF , "ipc_mqueue" , STATIC},/**/ - {ETAP_IPC_OBJECT , ETAP_TRACE_OFF , "ipc_object" , STATIC},/**/ - {ETAP_IPC_PORT_MULT , ETAP_TRACE_OFF , "ipc_port_mult" , 13},/**/ - {ETAP_IPC_PORT_TIME , ETAP_TRACE_OFF , "ipc_port_time" , 14},/**/ - {ETAP_IPC_RPC , ETAP_TRACE_OFF , "ipc_rpc" , 15},/**/ - {ETAP_IPC_PORT_ALLOCQ , ETAP_TRACE_OFF , "ipc_port_allocq" , STATIC},/**/ - - {ETAP_IO_AHA , ETAP_TRACE_OFF , "io_aha" , STATIC}, - {ETAP_IO_CHIP , ETAP_TRACE_OFF , "io_chip" , STATIC}, - {ETAP_IO_DEV , ETAP_TRACE_OFF , "io_dev" , 16},/**/ - {ETAP_IO_DEV_NUM , ETAP_TRACE_OFF , "io_dev_num" , STATIC}, - {ETAP_IO_DEV_PAGEH , ETAP_TRACE_OFF , "io_dev_pageh" , STATIC},/**/ - {ETAP_IO_DEV_PAGER , ETAP_TRACE_OFF , "io_dev_pager" , STATIC},/**/ - {ETAP_IO_DEV_PORT , ETAP_TRACE_OFF , "io_dev_port" , STATIC},/**/ - {ETAP_IO_DEV_REF , ETAP_TRACE_OFF , "io_dev_new" , 17},/**/ - {ETAP_IO_DEVINS , ETAP_TRACE_OFF , "io_devins" , STATIC}, - {ETAP_IO_DONE_LIST , ETAP_TRACE_OFF , "io_done_list" , STATIC}, - {ETAP_IO_DONE_Q , ETAP_TRACE_OFF , "io_doneq" , 18}, - {ETAP_IO_DONE_REF , ETAP_TRACE_OFF , "io_done_ref" , 19}, - {ETAP_IO_EAHA , ETAP_TRACE_OFF , "io_eaha" , STATIC}, - {ETAP_IO_HD_PROBE , ETAP_TRACE_OFF , "io_hd_probe" , STATIC}, - {ETAP_IO_IOPB , ETAP_TRACE_OFF , "io_iopb" , STATIC}, - {ETAP_IO_KDQ , ETAP_TRACE_OFF , "io_kdq" , STATIC}, - {ETAP_IO_KDTTY , ETAP_TRACE_OFF , "io_kdtty" , STATIC}, - {ETAP_IO_REQ , ETAP_TRACE_OFF , "io_req" , 20}, - {ETAP_IO_TARGET , ETAP_TRACE_OFF , "io_target" , STATIC}, - {ETAP_IO_TTY , ETAP_TRACE_OFF , "io_tty" , STATIC}, - {ETAP_IO_IOP_LOCK , ETAP_TRACE_OFF , "io_iop" , STATIC},/**/ - {ETAP_IO_DEV_NAME , ETAP_TRACE_OFF , "io_dev_name" , STATIC},/**/ - {ETAP_IO_CDLI , ETAP_TRACE_OFF , "io_cdli" , STATIC},/**/ - {ETAP_IO_HIPPI_FILTER , ETAP_TRACE_OFF , "io_hippi_filter" , STATIC},/**/ - {ETAP_IO_HIPPI_SRC , ETAP_TRACE_OFF , "io_hippi_src" , STATIC},/**/ - {ETAP_IO_HIPPI_DST , ETAP_TRACE_OFF , "io_hippi_dst" , STATIC},/**/ - {ETAP_IO_HIPPI_PKT , ETAP_TRACE_OFF , "io_hippi_pkt" , STATIC},/**/ - {ETAP_IO_NOTIFY , ETAP_TRACE_OFF , "io_notify" , STATIC},/**/ - {ETAP_IO_DATADEV , ETAP_TRACE_OFF , "io_data_device" , STATIC},/**/ - {ETAP_IO_OPEN , ETAP_TRACE_OFF , "io_open" , STATIC}, - {ETAP_IO_OPEN_I , ETAP_TRACE_OFF , "io_open_i" , STATIC}, - - {ETAP_THREAD_ACT , ETAP_TRACE_OFF , "th_act" , 21}, - {ETAP_THREAD_ACTION , ETAP_TRACE_OFF , "th_action" , STATIC}, - {ETAP_THREAD_LOCK , ETAP_TRACE_OFF , "th_lock" , 22}, - {ETAP_THREAD_LOCK_SET , ETAP_TRACE_OFF , "th_lock_set" , 23}, - {ETAP_THREAD_NEW , ETAP_TRACE_OFF , "th_new" , 24}, - {ETAP_THREAD_PSET , ETAP_TRACE_OFF , "th_pset" , STATIC},/**/ - {ETAP_THREAD_PSET_ALL , ETAP_TRACE_OFF , "th_pset_all" , STATIC}, - {ETAP_THREAD_PSET_RUNQ , ETAP_TRACE_OFF , "th_pset_runq" , STATIC}, - {ETAP_THREAD_PSET_IDLE , ETAP_TRACE_OFF , "th_pset_idle" , STATIC}, - {ETAP_THREAD_PSET_QUANT , ETAP_TRACE_OFF , "th_pset_quant" , STATIC}, - {ETAP_THREAD_PROC , ETAP_TRACE_OFF , "th_proc" , STATIC}, - {ETAP_THREAD_PROC_RUNQ , ETAP_TRACE_OFF , "th_proc_runq" , STATIC}, - {ETAP_THREAD_REAPER , ETAP_TRACE_OFF , "th_reaper" , STATIC}, - {ETAP_THREAD_RPC , ETAP_TRACE_OFF , "th_rpc" , 25}, - {ETAP_THREAD_SEMA , ETAP_TRACE_OFF , "th_sema" , 26}, - {ETAP_THREAD_STACK , ETAP_TRACE_OFF , "th_stack" , STATIC}, - {ETAP_THREAD_STACK_USAGE , ETAP_TRACE_OFF , "th_stack_usage" , STATIC}, - {ETAP_THREAD_TASK_NEW , ETAP_TRACE_OFF , "th_task_new" , 27}, - {ETAP_THREAD_TASK_ITK , ETAP_TRACE_OFF , "th_task_itk" , 28}, - {ETAP_THREAD_ULOCK , ETAP_TRACE_OFF , "th_ulock" , 29}, - {ETAP_THREAD_WAIT , ETAP_TRACE_OFF , "th_wait" , STATIC}, - {ETAP_THREAD_WAKE , ETAP_TRACE_OFF , "th_wake" , 30}, - {ETAP_THREAD_ACT_LIST , ETAP_TRACE_OFF , "th_act_list" , 31}, - {ETAP_THREAD_TASK_SWAP , ETAP_TRACE_OFF , "th_task_swap" , 32}, - {ETAP_THREAD_TASK_SWAPOUT, ETAP_TRACE_OFF , "th_task_swapout" , 33}, - {ETAP_THREAD_SWAPPER , ETAP_TRACE_OFF , "th_swapper" , STATIC}, - - {ETAP_NET_IFQ , ETAP_TRACE_OFF , "net_ifq" , STATIC}, - {ETAP_NET_KMSG , ETAP_TRACE_OFF , "net_kmsg" , STATIC}, - {ETAP_NET_MBUF , ETAP_TRACE_OFF , "net_mbuf" , STATIC},/**/ - {ETAP_NET_POOL , ETAP_TRACE_OFF , "net_pool" , STATIC}, - {ETAP_NET_Q , ETAP_TRACE_OFF , "net_q" , STATIC}, - {ETAP_NET_QFREE , ETAP_TRACE_OFF , "net_qfree" , STATIC}, - {ETAP_NET_RCV , ETAP_TRACE_OFF , "net_rcv" , STATIC}, - {ETAP_NET_RCV_PLIST , ETAP_TRACE_OFF , "net_rcv_plist" , STATIC},/**/ - {ETAP_NET_THREAD , ETAP_TRACE_OFF , "net_thread" , STATIC}, - - {ETAP_NORMA_XMM , ETAP_TRACE_OFF , "norma_xmm" , STATIC}, - {ETAP_NORMA_XMMOBJ , ETAP_TRACE_OFF , "norma_xmmobj" , STATIC}, - {ETAP_NORMA_XMMCACHE , ETAP_TRACE_OFF , "norma_xmmcache" , STATIC}, - {ETAP_NORMA_MP , ETAP_TRACE_OFF , "norma_mp" , STATIC}, - {ETAP_NORMA_VOR , ETAP_TRACE_OFF , "norma_vor" , STATIC},/**/ - {ETAP_NORMA_TASK , ETAP_TRACE_OFF , "norma_task" , 38},/**/ - - {ETAP_DIPC_CLEANUP , ETAP_TRACE_OFF , "dipc_cleanup" , STATIC},/**/ - {ETAP_DIPC_MSG_PROG , ETAP_TRACE_OFF , "dipc_msgp_prog" , STATIC},/**/ - {ETAP_DIPC_PREP_QUEUE , ETAP_TRACE_OFF , "dipc_prep_queue" , STATIC},/**/ - {ETAP_DIPC_PREP_FILL , ETAP_TRACE_OFF , "dipc_prep_fill" , STATIC},/**/ - {ETAP_DIPC_MIGRATE , ETAP_TRACE_OFF , "dipc_migrate" , STATIC},/**/ - {ETAP_DIPC_DELIVER , ETAP_TRACE_OFF , "dipc_deliver" , STATIC},/**/ - {ETAP_DIPC_RECV_SYNC , ETAP_TRACE_OFF , "dipc_recv_sync" , STATIC},/**/ - {ETAP_DIPC_RPC , ETAP_TRACE_OFF , "dipc_rpc" , STATIC},/**/ - {ETAP_DIPC_MSG_REQ , ETAP_TRACE_OFF , "dipc_msg_req" , STATIC},/**/ - {ETAP_DIPC_MSG_ORDER , ETAP_TRACE_OFF , "dipc_msg_order" , STATIC},/**/ - {ETAP_DIPC_MSG_PREPQ , ETAP_TRACE_OFF , "dipc_msg_prepq" , STATIC},/**/ - {ETAP_DIPC_MSG_FREE , ETAP_TRACE_OFF , "dipc_msg_free" , STATIC},/**/ - {ETAP_DIPC_KMSG_AST , ETAP_TRACE_OFF , "dipc_kmsg_ast" , STATIC},/**/ - {ETAP_DIPC_TEST_LOCK , ETAP_TRACE_OFF , "dipc_test_lock" , STATIC},/**/ - {ETAP_DIPC_SPINLOCK , ETAP_TRACE_OFF , "dipc_spinlock" , STATIC},/**/ - {ETAP_DIPC_TRACE , ETAP_TRACE_OFF , "dipc_trace" , STATIC},/**/ - {ETAP_DIPC_REQ_CALLBACK , ETAP_TRACE_OFF , "dipc_req_clbck" , STATIC},/**/ - {ETAP_DIPC_PORT_NAME , ETAP_TRACE_OFF , "dipc_port_name" , STATIC},/**/ - {ETAP_DIPC_RESTART_PORT , ETAP_TRACE_OFF , "dipc_restart_port", STATIC},/**/ - {ETAP_DIPC_ZERO_PAGE , ETAP_TRACE_OFF , "dipc_zero_page" , STATIC},/**/ - {ETAP_DIPC_BLOCKED_NODE , ETAP_TRACE_OFF , "dipc_blocked_node", STATIC},/**/ - {ETAP_DIPC_TIMER , ETAP_TRACE_OFF , "dipc_timer" , STATIC},/**/ - {ETAP_DIPC_SPECIAL_PORT , ETAP_TRACE_OFF , "dipc_special_port", STATIC},/**/ - - {ETAP_KKT_TEST_WORK , ETAP_TRACE_OFF , "kkt_test_work" , STATIC},/**/ - {ETAP_KKT_TEST_MP , ETAP_TRACE_OFF , "kkt_work_mp" , STATIC},/**/ - {ETAP_KKT_NODE , ETAP_TRACE_OFF , "kkt_node" , STATIC},/**/ - {ETAP_KKT_CHANNEL_LIST , ETAP_TRACE_OFF , "kkt_channel_list" , STATIC},/**/ - {ETAP_KKT_CHANNEL , ETAP_TRACE_OFF , "kkt_channel" , STATIC},/**/ - {ETAP_KKT_HANDLE , ETAP_TRACE_OFF , "kkt_handle" , STATIC},/**/ - {ETAP_KKT_MAP , ETAP_TRACE_OFF , "kkt_map" , STATIC},/**/ - {ETAP_KKT_RESOURCE , ETAP_TRACE_OFF , "kkt_resource" , STATIC},/**/ - - {ETAP_XKERNEL_MASTER , ETAP_TRACE_OFF , "xkernel_master" , STATIC},/**/ - {ETAP_XKERNEL_EVENT , ETAP_TRACE_OFF , "xkernel_event" , STATIC},/**/ - {ETAP_XKERNEL_ETHINPUT , ETAP_TRACE_OFF , "xkernel_input" , STATIC},/**/ - - {ETAP_MISC_AST , ETAP_TRACE_OFF , "m_ast" , STATIC}, - {ETAP_MISC_CLOCK , ETAP_TRACE_OFF , "m_clock" , STATIC}, - {ETAP_MISC_EMULATE , ETAP_TRACE_OFF , "m_emulate" , 34}, - {ETAP_MISC_EVENT , ETAP_TRACE_OFF , "m_event" , STATIC}, - {ETAP_MISC_KDB , ETAP_TRACE_OFF , "m_kdb" , STATIC}, - {ETAP_MISC_PCB , ETAP_TRACE_OFF , "m_pcb" , 35}, - {ETAP_MISC_PRINTF , ETAP_TRACE_OFF , "m_printf" , STATIC}, - {ETAP_MISC_Q , ETAP_TRACE_OFF , "m_q" , STATIC}, - {ETAP_MISC_RPC_SUBSYS , ETAP_TRACE_OFF , "m_rpc_sub" , 36}, - {ETAP_MISC_RT_CLOCK , ETAP_TRACE_OFF , "m_rt_clock" , STATIC}, - {ETAP_MISC_SD_POOL , ETAP_TRACE_OFF , "m_sd_pool" , STATIC}, - {ETAP_MISC_TIMER , ETAP_TRACE_OFF , "m_timer" , STATIC}, - {ETAP_MISC_UTIME , ETAP_TRACE_OFF , "m_utime" , STATIC}, - {ETAP_MISC_XPR , ETAP_TRACE_OFF , "m_xpr" , STATIC}, - {ETAP_MISC_ZONE , ETAP_TRACE_OFF , "m_zone" , 37}, - {ETAP_MISC_ZONE_ALL , ETAP_TRACE_OFF , "m_zone_all" , STATIC}, - {ETAP_MISC_ZONE_GET , ETAP_TRACE_OFF , "m_zone_get" , STATIC}, - {ETAP_MISC_ZONE_PTABLE , ETAP_TRACE_OFF , "m_zone_ptable" , STATIC},/**/ - {ETAP_MISC_LEDGER , ETAP_TRACE_OFF , "m_ledger" , STATIC},/**/ - {ETAP_MISC_SCSIT_TGT , ETAP_TRACE_OFF , "m_scsit_tgt_lock" , STATIC},/**/ - {ETAP_MISC_SCSIT_SELF , ETAP_TRACE_OFF , "m_scsit_self_lock", STATIC},/**/ - {ETAP_MISC_SPL , ETAP_TRACE_OFF , "m_spl_lock" , STATIC},/**/ - {ETAP_MISC_MASTER , ETAP_TRACE_OFF , "m_master" , STATIC},/**/ - {ETAP_MISC_FLOAT , ETAP_TRACE_OFF , "m_float" , STATIC},/**/ - {ETAP_MISC_GROUP , ETAP_TRACE_OFF , "m_group" , STATIC},/**/ - {ETAP_MISC_FLIPC , ETAP_TRACE_OFF , "m_flipc" , STATIC},/**/ - {ETAP_MISC_MP_IO , ETAP_TRACE_OFF , "m_mp_io" , STATIC},/**/ - {ETAP_MISC_KERNEL_TEST , ETAP_TRACE_OFF , "m_kernel_test" , STATIC},/**/ - - {ETAP_NO_TRACE , ETAP_TRACE_OFF , "NEVER_TRACE" , STATIC}, -#endif /* ETAP_LOCK_TRACE */ -}; - -/* - * Variable initially pointing to the event table, then to its mappable - * copy. The cast is needed to discard the `const' qualifier; without it - * gcc issues a warning. - */ -event_table_t event_table = (event_table_t) event_table_init; - -/* - * Linked list of pointers into event_table_init[] so they can be switched - * into the mappable copy when it is made. - */ -struct event_table_chain *event_table_chain; - -/* - * max number of event types in the event table - */ - -int event_table_max = sizeof(event_table_init)/sizeof(struct event_table_entry); - -const struct subs_table_entry subs_table_init[] = -{ - /*------------------------------------------* - * ETAP SUBSYSTEM TEXT NAME * - *------------------------------------------*/ - -#if ETAP_EVENT_MONITOR - {ETAP_SUBS_PROBE , "event_probes" }, -#endif /* ETAP_EVENT_MONITOR */ - -#if ETAP_LOCK_TRACE - {ETAP_SUBS_LOCK_DIPC , "lock_dipc" }, - {ETAP_SUBS_LOCK_IO , "lock_io" }, - {ETAP_SUBS_LOCK_IPC , "lock_ipc" }, - {ETAP_SUBS_LOCK_KKT , "lock_kkt" }, - {ETAP_SUBS_LOCK_MISC , "lock_misc" }, - {ETAP_SUBS_LOCK_NET , "lock_net" }, - {ETAP_SUBS_LOCK_NORMA , "lock_norma" }, - {ETAP_SUBS_LOCK_THREAD , "lock_thread" }, - {ETAP_SUBS_LOCK_VM , "lock_vm" }, - {ETAP_SUBS_LOCK_XKERNEL , "lock_xkernel" }, -#endif /* ETAP_LOCK_TRACE */ -}; - -/* - * Variable initially pointing to the subsystem table, then to its mappable - * copy. - */ -subs_table_t subs_table = (subs_table_t) subs_table_init; - -/* - * max number of subsystem types in the subsystem table - */ - -int subs_table_max = sizeof(subs_table_init)/sizeof(struct subs_table_entry); - -#if ETAP_MONITOR -#define MAX_NAME_SIZE 35 - -#define SYS_TABLE_MACH_TRAP 0 -#define SYS_TABLE_MACH_MESSAGE 1 -#define SYS_TABLE_UNIX_SYSCALL 2 -#define SYS_TABLE_INTERRUPT 3 -#define SYS_TABLE_EXCEPTION 4 - - -extern char *system_table_lookup (unsigned int table, - unsigned int number); - - -char *mach_trap_names[] = { -/* 0 */ "undefined", -/* 1 */ NULL, -/* 2 */ NULL, -/* 3 */ NULL, -/* 4 */ NULL, -/* 5 */ NULL, -/* 6 */ NULL, -/* 7 */ NULL, -/* 8 */ NULL, -/* 9 */ NULL, -/* 10 */ NULL, -/* 11 */ NULL, -/* 12 */ NULL, -/* 13 */ NULL, -/* 14 */ NULL, -/* 15 */ NULL, -/* 16 */ NULL, -/* 17 */ NULL, -/* 18 */ NULL, -/* 19 */ NULL, -/* 20 */ NULL, -/* 21 */ NULL, -/* 22 */ NULL, -/* 23 */ NULL, -/* 24 */ NULL, -/* 25 */ NULL, -/* 26 */ "mach_reply_port", -/* 27 */ "mach_thread_self", -/* 28 */ "mach_task_self", -/* 29 */ "mach_host_self", -/* 30 */ "vm_read_overwrite", -/* 31 */ "vm_write", -/* 32 */ "mach_msg_overwrite_trap", -/* 33 */ NULL, -/* 34 */ NULL, -#ifdef i386 -/* 35 */ "mach_rpc_trap", -/* 36 */ "mach_rpc_return_trap", -#else -/* 35 */ NULL, -/* 36 */ NULL, -#endif /* i386 */ -/* 37 */ NULL, -/* 38 */ NULL, -/* 39 */ NULL, -/* 40 */ NULL, -/* 41 */ "init_process", -/* 42 */ NULL, -/* 43 */ "map_fd", -/* 44 */ NULL, -/* 45 */ NULL, -/* 46 */ NULL, -/* 47 */ NULL, -/* 48 */ NULL, -/* 49 */ NULL, -/* 50 */ NULL, -/* 51 */ NULL, -/* 52 */ NULL, -/* 53 */ NULL, -/* 54 */ NULL, -/* 55 */ NULL, -/* 56 */ NULL, -/* 57 */ NULL, -/* 58 */ NULL, -/* 59 */ "swtch_pri", -/* 60 */ "swtch", -/* 61 */ "thread_switch", -/* 62 */ "clock_sleep_trap", -/* 63 */ NULL, -/* 64 */ NULL, -/* 65 */ NULL, -/* 66 */ NULL, -/* 67 */ NULL, -/* 68 */ NULL, -/* 69 */ NULL, -/* 70 */ NULL, -/* 71 */ NULL, -/* 72 */ NULL, -/* 73 */ NULL, -/* 74 */ NULL, -/* 75 */ NULL, -/* 76 */ NULL, -/* 77 */ NULL, -/* 78 */ NULL, -/* 79 */ NULL, -/* 80 */ NULL, -/* 81 */ NULL, -/* 82 */ NULL, -/* 83 */ NULL, -/* 84 */ NULL, -/* 85 */ NULL, -/* 86 */ NULL, -/* 87 */ NULL, -/* 88 */ NULL, -/* 89 */ NULL, -/* 90 */ NULL, -/* 91 */ NULL, -/* 92 */ NULL, -/* 93 */ NULL, -/* 94 */ NULL, -/* 95 */ NULL, -/* 96 */ NULL, -/* 97 */ NULL, -/* 98 */ NULL, -/* 99 */ NULL, -/* 100 */ NULL, -/* 101 */ NULL, -/* 102 */ NULL, -/* 103 */ NULL, -/* 104 */ NULL, -/* 105 */ NULL, -/* 106 */ NULL, -/* 107 */ NULL, -/* 108 */ NULL, -/* 109 */ NULL, -}; -#define N_MACH_TRAP_NAMES (sizeof mach_trap_names / sizeof mach_trap_names[0]) -#define mach_trap_name(nu) \ - (((nu) < N_MACH_TRAP_NAMES) ? mach_trap_names[nu] : NULL) - -struct table_entry { - char name[MAX_NAME_SIZE]; - u_int number; -}; - -/* - * Mach message table - * - * Note: Most mach system calls are actually implemented as messages. - */ -struct table_entry mach_message_table[] = { - subsystem_to_name_map_bootstrap, - subsystem_to_name_map_clock, - subsystem_to_name_map_clock_reply, - subsystem_to_name_map_default_pager_object, - subsystem_to_name_map_device, - subsystem_to_name_map_device_reply, - subsystem_to_name_map_device_request, - subsystem_to_name_map_exc, -/* subsystem_to_name_map_mach,*/ - subsystem_to_name_map_mach_debug, -/* subsystem_to_name_map_mach_host,*/ - subsystem_to_name_map_mach_norma, - subsystem_to_name_map_mach_port, - subsystem_to_name_map_memory_object, - subsystem_to_name_map_memory_object_default, - subsystem_to_name_map_notify, - subsystem_to_name_map_prof, - subsystem_to_name_map_sync -}; - -int mach_message_table_entries = sizeof(mach_message_table) / - sizeof(struct table_entry); - - -#endif - -/* - * ================================ - * Initialization routines for ETAP - * ================================ - */ - -/* - * ROUTINE: etap_init_phase1 [internal] - * - * FUNCTION: Event trace instrumentation initialization phase - * one of two. The static phase. The cumulative buffer - * is initialized. - * - * NOTES: The cumulative buffer is statically allocated and - * must be initialized before the first simple_lock_init() - * or lock_init() call is made. - * - * The first lock init call is made before dynamic allocation - * is available. Hence, phase one is executed before dynamic - * memory allocation is available. - * - */ - -void -etap_init_phase1(void) -{ -#if ETAP_LOCK_ACCUMULATE || MACH_ASSERT - int x; -#if MACH_ASSERT - boolean_t out_of_order; -#endif /* MACH_ASSERT */ -#endif /* ETAP_LOCK_ACCUMULATE || MACH_ASSERT */ - -#if ETAP_LOCK_ACCUMULATE - /* - * Initialize Cumulative Buffer - * - * Note: The cumulative buffer is statically allocated. - * This static allocation is necessary since most - * of the lock_init calls are made before dynamic - * allocation routines are available. - */ - - /* - * Align cumulative buffer pointer to a page boundary - * (so it can be maped). - */ - - bzero(&cbuff_allocated[0], CBUFF_ALLOCATED_SIZE); - cbuff = (cumulative_buffer_t) round_page(&cbuff_allocated); - - simple_lock_init(&cbuff_lock, ETAP_NO_TRACE); - - /* - * Set the starting point for cumulative buffer entry - * reservations. - * - * This value must leave enough head room in the - * cumulative buffer to contain all dynamic events. - */ - - for (x=0; x < event_table_max; x++) - if (event_table[x].dynamic > cbuff->static_start) - cbuff->static_start = event_table[x].dynamic; - - cbuff->next = cbuff->static_start; -#endif /* ETAP_LOCK_ACCUMULATE */ - - /* - * Initialize the event table lock - */ - - simple_lock_init(&event_table_lock, ETAP_NO_TRACE); - -#if MACH_ASSERT - /* - * Check that events are in numerical order so we can do a binary - * search on them. Even better would be to make event numbers be - * simple contiguous indexes into event_table[], but that would - * break the coding of subsystems in the event number. - */ - out_of_order = FALSE; - for (x = 1; x < event_table_max; x++) { - if (event_table[x - 1].event > event_table[x].event) { - printf("events out of order: %s > %s\n", - event_table[x - 1].name, event_table[x].name); - out_of_order = TRUE; - } - } - if (out_of_order) - panic("etap_init_phase1"); -#endif /* MACH_ASSERT */ -} - - -/* - * ROUTINE: etap_init_phase2 [internal] - * - * FUNCTION: Event trace instrumentation initialization phase - * two of two. The dynamic phase. The monitored buffers - * are dynamically allocated and initialized. Cumulative - * dynamic entry locks are allocated and initialized. The - * start_data_pool is initialized. - * - * NOTES: Phase two is executed once dynamic memory allocation - * is available. - * - */ - -void -etap_init_phase2(void) -{ - int size; - int x; - int ret; - vm_offset_t table_copy; - struct event_table_chain *chainp; - - /* - * Make mappable copies of the event_table and the subs_table. - * These tables were originally mapped as they appear in the - * kernel image, but that meant that other kernel variables could - * end up being mapped with them, which is ugly. It also didn't - * work on the HP/PA, where pages with physical address == virtual - * do not have real pmap entries allocated and therefore can't be - * mapped elsewhere. - */ - size = sizeof event_table_init + sizeof subs_table_init; - ret = kmem_alloc(kernel_map, &table_copy, size); - if (ret != KERN_SUCCESS) - panic("ETAP: error allocating table copies"); - event_table = (event_table_t) table_copy; - subs_table = (subs_table_t) (table_copy + sizeof event_table_init); - bcopy((char *) event_table_init, (char *) event_table, - sizeof event_table_init); - bcopy((char *) subs_table_init, (char *) subs_table, - sizeof subs_table_init); - - /* Switch pointers from the old event_table to the new. */ - for (chainp = event_table_chain; chainp != NULL; - chainp = chainp->event_table_link) { - x = chainp->event_tablep - event_table_init; - assert(x < event_table_max); - chainp->event_tablep = event_table + x; - } - -#if ETAP_LOCK_ACCUMULATE - - /* - * Because several dynamic locks can point to a single - * cumulative buffer entry, dynamic lock writes to the - * entry are synchronized. - * - * The spin locks are allocated here. - * - */ -#if MACH_LDEBUG - size = sizeof(simple_lock_t) * cbuff->static_start; -#else - /* - * Note: These locks are different from traditional spin locks. - * They are of type int instead of type simple_lock_t. - * We can reduce lock size this way, since no tracing will - * EVER be performed on these locks. - */ - size = sizeof(simple_lock_data_t) * cbuff->static_start; -#endif - - ret = kmem_alloc(kernel_map, (vm_offset_t *) &cbuff_locks, size); - - if (ret != KERN_SUCCESS) - panic("ETAP: error allocating cumulative write locks"); - -#if MACH_LDEBUG - for(x = 0; x < cbuff->static_start; ++x) { - simple_lock_init(&cbuff_locks[x], ETAP_NO_TRACE); - } -#else - bzero((const char *) cbuff_locks, size); -#endif - -#endif /* ETAP_LOCK_ACCUMULATE */ - - -#if ETAP_MONITOR - - /* - * monitor buffer allocation - */ - - size = ((mbuff_entries-1) * sizeof(struct mbuff_entry)) + - sizeof(struct monitor_buffer); - - for (x=0; x < NCPUS; x++) { - ret = kmem_alloc(kernel_map, - (vm_offset_t *) &mbuff[x], - size); - - if (ret != KERN_SUCCESS) - panic ("ETAP: error allocating monitor buffer\n"); - - /* zero fill buffer */ - bzero((char *) mbuff[x], size); - } - -#endif /* ETAP_MONITOR */ - - -#if ETAP_LOCK_TRACE - - /* - * Initialize the start_data_pool - */ - - init_start_data_pool(); - -#endif /* ETAP_LOCK_TRACE */ -} - - -#if ETAP_LOCK_ACCUMULATE - -/* - * ROUTINE: etap_cbuff_reserve [internal] - * - * FUNCTION: The cumulative buffer operation which returns a pointer - * to a free entry in the cumulative buffer. - * - * NOTES: Disables interrupts. - * - */ - -cbuff_entry_t -etap_cbuff_reserve(event_table_t etp) -{ - cbuff_entry_t avail; - unsigned short de; - spl_t s; - - /* see if type pointer is initialized */ - if (etp == EVENT_TABLE_NULL || etp->event == ETAP_NO_TRACE) - return (CBUFF_ENTRY_NULL); - - /* check for DYNAMIC lock */ - if (de = etp->dynamic) { - if (de <= cbuff->static_start) - return (&cbuff->entry[de-1]); - else { - printf("ETAP: dynamic lock index error [%lu]\n", de); - return (CBUFF_ENTRY_NULL); - } - } - - cumulative_buffer_lock(s); - - /* if buffer is full, reservation requests fail */ - if (cbuff->next >= ETAP_CBUFF_ENTRIES) { - cumulative_buffer_unlock(s); - return (CBUFF_ENTRY_NULL); - } - - avail = &cbuff->entry[cbuff->next++]; - - cumulative_buffer_unlock(s); - - return (avail); -} - -#endif /* ETAP_LOCK_ACCUMULATE */ - -/* - * ROUTINE: etap_event_table_assign [internal] - * - * FUNCTION: Returns a pointer to the assigned event type table entry, - * using the event type as the index key. - * - */ - -event_table_t -etap_event_table_find(etap_event_t event) -{ - int last_before, first_after, try; - - /* Binary search for the event number. last_before is the highest- - numbered element known to be <= the number we're looking for; - first_after is the lowest-numbered element known to be >. */ - last_before = 0; - first_after = event_table_max; - while (last_before < first_after) { - try = (last_before + first_after) >> 1; - if (event_table[try].event == event) - return (&event_table[try]); - else if (event_table[try].event < event) - last_before = try; - else - first_after = try; - } - return EVENT_TABLE_NULL; -} - -void -etap_event_table_assign(struct event_table_chain *chainp, etap_event_t event) -{ - event_table_t event_tablep; - - event_tablep = etap_event_table_find(event); - if (event_tablep == EVENT_TABLE_NULL) - printf("\nETAP: event not found in event table: %x\n", event); - else { - if (event_table == event_table_init) { - chainp->event_table_link = event_table_chain; - event_table_chain = chainp; - } - chainp->event_tablep = event_tablep; - } -} - -#endif /* ETAP */ - -/* - * - * MESSAGE: etap_get_info [exported] - * - * FUNCTION: provides the server with ETAP buffer configurations. - * - */ - -kern_return_t -etap_get_info( - host_priv_t host_priv, - int *et_entries, - int *st_entries, - vm_offset_t *et_offset, - vm_offset_t *st_offset, - int *cb_width, - int *mb_size, - int *mb_entries, - int *mb_cpus) -{ - - if (host_priv == HOST_PRIV_NULL) - return KERN_INVALID_ARGUMENT; - -#if ETAP - *et_entries = event_table_max; - *st_entries = subs_table_max; - *et_offset = (vm_offset_t) ((char*) event_table - - trunc_page((char*) event_table)); - *st_offset = (vm_offset_t) ((char*) subs_table - - trunc_page((char*) subs_table)); -#else /* ETAP */ - *et_entries = 0; - *st_entries = 0; - *et_offset = 0; - *st_offset = 0; -#endif /* ETAP */ - -#if ETAP_LOCK_ACCUMULATE - *cb_width = cbuff_width; -#else /* ETAP_LOCK_ACCUMULATE */ - *cb_width = 0; -#endif /* ETAP_LOCK_ACCUMULATE */ - -#if ETAP_MONITOR - *mb_size = ((mbuff_entries-1) * sizeof(struct mbuff_entry)) + - sizeof(struct monitor_buffer); - *mb_entries = mbuff_entries; - *mb_cpus = NCPUS; -#else /* ETAP_MONITOR */ - *mb_size = 0; - *mb_entries = 0; - *mb_cpus = 0; -#endif /* ETAP_MONITOR */ - - return (KERN_SUCCESS); -} - -/* - * ROUTINE: etap_trace_event [exported] - * - * FUNCTION: The etap_trace_event system call is the user's interface to - * the ETAP kernel instrumentation. - * - * This call allows the user to enable and disable tracing modes - * on specific event types. The call also supports a reset option, - * where the cumulative buffer data and all event type tracing - * is reset to zero. When the reset option is used, a new - * interval width can also be defined using the op parameter. - * - */ - -kern_return_t -etap_trace_event ( - unsigned short mode, - unsigned short type, - boolean_t enable, - unsigned int nargs, - unsigned short args[]) -{ -#if ETAP - event_table_t event_tablep; - kern_return_t ret; - int i, args_size; - unsigned short status_mask; - unsigned short *tmp_args; - - /* - * Initialize operation - */ - - if (mode == ETAP_RESET) { - etap_trace_reset(nargs); - return (KERN_SUCCESS); - } - - status_mask = mode & type; - - /* - * Copy args array from user space to kernel space - */ - - args_size = nargs * sizeof *args; - tmp_args = (unsigned short *) kalloc(args_size); - - if (tmp_args == NULL) - return (KERN_NO_SPACE); - - if (copyin((const char *) args, (char *) tmp_args, args_size)) - return (KERN_INVALID_ADDRESS); - - /* - * Change appropriate status fields in the event table - */ - - event_table_lock(); - - for (i = 0; i < nargs; i++) { - if (tmp_args[i] != ETAP_NO_TRACE) { - event_tablep = etap_event_table_find(tmp_args[i]); - if (event_tablep == EVENT_TABLE_NULL) - break; - if (enable) - event_tablep->status |= status_mask; - else - event_tablep->status &= ~status_mask; - } - } - - ret = (i < nargs) ? KERN_INVALID_ARGUMENT : KERN_SUCCESS; - - event_table_unlock(); - - kfree((vm_offset_t) tmp_args, args_size); - - return (ret); - -#else /* ETAP */ - - return (KERN_FAILURE); - -#endif /* ETAP */ -} - - -#if ETAP - -/* - * ROUTINE: etap_trace_reset [internal] - * - * FUNCTION: Turns off all tracing and erases all the data accumulated - * in the cumulative buffer. If the user defined a new - * cumulative buffer interval width, it will be assigned here. - * - */ -void -etap_trace_reset(int new_interval) -{ - event_table_t scan; - int x; - register s; - - /* - * Wipe out trace fields in event table - */ - - scan = event_table; - - event_table_lock(); - - for (x=0; x < event_table_max; x++) { - scan->status = ETAP_TRACE_OFF; - scan++; - } - - event_table_unlock(); - -#if ETAP_LOCK_ACCUMULATE - - /* - * Wipe out cumulative buffer statistical fields for all entries - */ - - cumulative_buffer_lock(s); - - for (x=0; x < ETAP_CBUFF_ENTRIES; x++) { - bzero ((char *) &cbuff->entry[x].hold, - sizeof(struct cbuff_data)); - bzero ((char *) &cbuff->entry[x].wait, - sizeof(struct cbuff_data)); - bzero ((char *) &cbuff->entry[x].hold_interval[0], - sizeof(unsigned long) * ETAP_CBUFF_IBUCKETS); - bzero ((char *) &cbuff->entry[x].wait_interval[0], - sizeof(unsigned long) * ETAP_CBUFF_IBUCKETS); - } - - /* - * Assign interval width if the user defined a new one. - */ - - if (new_interval != 0) - cbuff_width = new_interval; - - cumulative_buffer_unlock(s); - -#endif /* ETAP_LOCK_ACCUMULATE */ -} - -#endif /* ETAP */ - -/* - * ROUTINE: etap_probe [exported] - * - * FUNCTION: The etap_probe system call serves as a user-level probe, - * allowing user-level code to store event data into - * the monitored buffer(s). - */ - -kern_return_t -etap_probe( - unsigned short event_type, - unsigned short event_id, - unsigned int data_size, /* total size in bytes */ - etap_data_t *data) -{ - -#if ETAP_MONITOR - - mbuff_entry_t mbuff_entryp; - int cpu; - int free; - spl_t s; - - - if (data_size > ETAP_DATA_SIZE) - return (KERN_INVALID_ARGUMENT); - - if (event_table[event_type].status == ETAP_TRACE_OFF || - event_table[event_type].event != event_type) - return (KERN_NO_ACCESS); - - mp_disable_preemption(); - cpu = cpu_number(); - s = splhigh(); - - free = mbuff[cpu]->free; - mbuff_entryp = &mbuff[cpu]->entry[free]; - - /* - * Load monitor buffer entry - */ - - ETAP_TIMESTAMP(mbuff_entryp->time); - mbuff_entryp->event = event_id; - mbuff_entryp->flags = USER_EVENT; - mbuff_entryp->instance = (u_int) current_thread(); - mbuff_entryp->pc = 0; - - if (data != ETAP_DATA_NULL) - copyin((const char *) data, - (char *) mbuff_entryp->data, - data_size); - - mbuff[cpu]->free = (free+1) % mbuff_entries; - - if (mbuff[cpu]->free == 0) - mbuff[cpu]->timestamp++; - - splx(s); - mp_enable_preemption(); - - return (KERN_SUCCESS); - -#else /* ETAP_MONITOR */ - return (KERN_FAILURE); -#endif /* ETAP_MONITOR */ -} - -/* - * ROUTINE: etap_trace_thread [exported] - * - * FUNCTION: Toggles thread's ETAP trace status bit. - */ - -kern_return_t -etap_trace_thread( - thread_act_t thr_act, - boolean_t trace_status) -{ -#if ETAP_EVENT_MONITOR - - thread_t thread; - boolean_t old_status; - etap_data_t probe_data; - spl_t s; - - if (thr_act == THR_ACT_NULL) - return (KERN_INVALID_ARGUMENT); - - thread = act_lock_thread(thr_act); - - if (thread == THREAD_NULL) { - act_unlock_thread(thr_act); - return (KERN_INVALID_ARGUMENT); - } - - s = splsched(); - thread_lock(thread); - - old_status = thread->etap_trace; - thread->etap_trace = trace_status; - - ETAP_DATA_LOAD(probe_data[0],thr_act->task); - ETAP_DATA_LOAD(probe_data[1],thr_act); - ETAP_DATA_LOAD(probe_data[2],thread->sched_pri); - - thread_unlock(thread); - splx(s); - - act_unlock_thread(thr_act); - - /* - * Thread creation (ETAP_P_THREAD_LIFE: BEGIN) is ONLY recorded - * here since a threads trace status is disabled by default. - */ - if (trace_status == TRUE && old_status == FALSE) { - ETAP_PROBE_DATA(ETAP_P_THREAD_LIFE, - EVENT_BEGIN, - thread, - &probe_data, - ETAP_DATA_ENTRY*3); - } - - /* - * Thread termination is (falsely) recorded here if the trace - * status has been disabled. This event is recorded to allow - * users the option of tracing a portion of a threads execution. - */ - if (trace_status == FALSE && old_status == TRUE) { - ETAP_PROBE_DATA(ETAP_P_THREAD_LIFE, - EVENT_END, - thread, - &probe_data, - ETAP_DATA_ENTRY*3); - } - - return (KERN_SUCCESS); - -#else /* ETAP_EVENT_MONITOR */ - return (KERN_FAILURE); -#endif /* ETAP_EVENT_MONITOR */ -} - -/* - * ROUTINE: etap_mon_reconfig [exported] - * - * FUNCTION: Reallocates monitor buffers to hold specified number - * of entries. - * - * NOTES: In multiprocessor (SMP) case, a lock needs to be added - * here and in data collection macros to protect access - * to mbuff_entries. - */ -kern_return_t -etap_mon_reconfig( - host_priv_t host_priv, - int nentries) -{ -#if ETAP_EVENT_MONITOR - struct monitor_buffer *nmbuff[NCPUS], *ombuff[NCPUS]; - int s, size, osize, i, ret; - - if (host_priv == HOST_PRIV_NULL) - return KERN_INVALID_ARGUMENT; - - if (nentries <= 0) /* must be at least 1 */ - return (KERN_FAILURE); - - size = ((nentries-1) * sizeof(struct mbuff_entry)) + - sizeof(struct monitor_buffer); - - for (i = 0; i < NCPUS; ++i) { - ret = kmem_alloc(kernel_map, - (vm_offset_t *)&nmbuff[i], - size); - if (ret != KERN_SUCCESS) { - if (i > 0) { - int j; - - for (j = 0; j < i; ++j) { - kmem_free(kernel_map, - (vm_offset_t)nmbuff[j], - size); - } - } - return (ret); - } - bzero((char *) nmbuff[i], size); - } - osize = ((mbuff_entries-1) * sizeof (struct mbuff_entry)) + - sizeof (struct monitor_buffer); - - s = splhigh(); - event_table_lock(); - for (i = 0; i < NCPUS; ++i) { - ombuff[i] = mbuff[i]; - mbuff[i] = nmbuff[i]; - } - mbuff_entries = nentries; - event_table_unlock(); - splx(s); - - for (i = 0; i < NCPUS; ++i) { - kmem_free(kernel_map, - (vm_offset_t)ombuff[i], - osize); - } - return (KERN_SUCCESS); -#else - return (KERN_FAILURE); -#endif /* ETAP_MONITOR */ -} - -/* - * ROUTINE: etap_new_probe [exported] - * - * FUNCTION: Reallocates monitor probe table, adding a new entry - * - */ -kern_return_t -etap_new_probe( - host_priv_t host_priv, - vm_address_t name, - vm_size_t namlen, - boolean_t trace_on, - vm_address_t id) -{ -#if ETAP_EVENT_MONITOR - event_table_t newtable, oldtable; - unsigned short i, nid; - int s; - vm_size_t newsize = (event_table_max + 1) * - sizeof (struct event_table_entry); - boolean_t duplicate_name = FALSE; - kern_return_t ret; - - if (host_priv == HOST_PRIV_NULL) - return KERN_INVALID_ARGUMENT; - - if (namlen > EVENT_NAME_LENGTH - 1) - return (KERN_INVALID_ARGUMENT); - - if ((ret = kmem_alloc(kernel_map, (vm_address_t *)&newtable, - newsize)) != KERN_SUCCESS) - return (ret); - - bcopy((const char *)event_table, (char *)newtable, event_table_max * - sizeof (struct event_table_entry)); - - if (copyin((const char *)name, - (char *)&newtable[event_table_max].name, namlen)) - return (KERN_INVALID_ADDRESS); - - newtable[event_table_max].name[EVENT_NAME_LENGTH - 1] = '\0'; - newtable[event_table_max].status = trace_on; - newtable[event_table_max].dynamic = 0; - - for (nid = i = 0; i < event_table_max; ++i) { - if (strcmp((char *)newtable[event_table_max].name, - newtable[i].name) == 0) { - duplicate_name = TRUE; - printf("duplicate name\n"); - } - nid = max(nid, newtable[i].event); - } - ++nid; - - if (nid >= ETAP_NO_TRACE || duplicate_name == TRUE) { - kmem_free(kernel_map, (vm_address_t)newtable, newsize); - if (nid >= ETAP_NO_TRACE) { - printf("KERN_RESOURCE_SHORTAGE\n"); - return (KERN_RESOURCE_SHORTAGE); - } - else { - printf("KERN_NAME_EXISTS\n"); - return (KERN_NAME_EXISTS); - } - } - - newtable[event_table_max].event = nid; - - s = splhigh(); - event_table_lock(); - oldtable = event_table; - event_table = newtable; - ++event_table_max; - event_table_unlock(); - splx(s); - - if (oldtable != event_table_init) - kmem_free(kernel_map, (vm_address_t)oldtable, - (event_table_max - 1) * - sizeof (struct event_table_entry)); - - *(unsigned short *)id = nid; - - return (KERN_SUCCESS); -#else - return (KERN_FAILURE); -#endif /* ETAP_EVENT_MONITOR */ - -} -/* - * ETAP trap probe hooks - */ - -void -etap_interrupt_probe(int interrupt, int flag_setting) -{ - u_short flag; - - if (flag_setting == 1) - flag = EVENT_BEGIN; - else - flag = EVENT_END; - - ETAP_PROBE_DATA_COND(ETAP_P_INTERRUPT, - flag, - current_thread(), - &interrupt, - sizeof(int), - 1); -} - -void -etap_machcall_probe1(int syscall) -{ - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_BEGIN | SYSCALL_TRAP, - current_thread(), - &syscall, - sizeof(int)); -} - -void -etap_machcall_probe2(void) -{ - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_END | SYSCALL_TRAP, - current_thread(), - 0, - 0); -} - -static void print_user_event(mbuff_entry_t); -static void print_kernel_event(mbuff_entry_t, boolean_t); -static void print_lock_event(mbuff_entry_t, const char *); - -#if MACH_KDB -void db_show_etap_log(db_expr_t, boolean_t, db_expr_t, char *); -/* - * - * ROUTINE: etap_print [internal] - * - * FUNCTION: print each mbuff table (for use in debugger) - * - */ -void -db_show_etap_log( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif) -{ -#if ETAP_MONITOR - int cpu = cpu_number(), last, i, first, step, end, restart; - boolean_t show_data = FALSE; - - last = (mbuff[cpu]->free - 1) % mbuff_entries; - - if(db_option(modif, 'r')) { - first = last; - step = -1; - end = -1; - restart = mbuff_entries - 1; - } else { - first = last + 1; - step = 1; - end = mbuff_entries; - restart = 0; - } - - if(db_option(modif, 'd')) - show_data = TRUE; - - for(i = first; i != end; i += step) { - if (mbuff[cpu]->entry[i].flags & USER_EVENT) - print_user_event(&mbuff[cpu]->entry[i]); - else - print_kernel_event(&mbuff[cpu]->entry[i], show_data); - } - for(i = restart; i != first; i += step) { - if (mbuff[cpu]->entry[i].flags & USER_EVENT) - print_user_event(&mbuff[cpu]->entry[i]); - else - print_kernel_event(&mbuff[cpu]->entry[i], show_data); - } -#else - printf("ETAP event monitor not configured\n"); -#endif /* ETAP_MONITOR */ -} - -#if ETAP_MONITOR -static -void -print_user_event(mbuff_entry_t record) -{ - char *s, buf[256]; - - db_printf("%x: %x%08x: ", record->instance, record->time.tv_sec, - record->time.tv_nsec); - switch (record->pc) - { - case ETAP_P_USER_EVENT0: s = "0"; break; - case ETAP_P_USER_EVENT1: s = "1"; break; - case ETAP_P_USER_EVENT2: s = "2"; break; - case ETAP_P_USER_EVENT3: s = "3"; break; - case ETAP_P_USER_EVENT4: s = "4"; break; - case ETAP_P_USER_EVENT5: s = "5"; break; - case ETAP_P_USER_EVENT6: s = "6"; break; - case ETAP_P_USER_EVENT7: s = "7"; break; - case ETAP_P_USER_EVENT8: s = "8"; break; - case ETAP_P_USER_EVENT9: s = "9"; break; - case ETAP_P_USER_EVENT10: s = "10"; break; - case ETAP_P_USER_EVENT11: s = "11"; break; - case ETAP_P_USER_EVENT12: s = "12"; break; - case ETAP_P_USER_EVENT13: s = "13"; break; - case ETAP_P_USER_EVENT14: s = "14"; break; - case ETAP_P_USER_EVENT15: s = "15"; break; - case ETAP_P_USER_EVENT16: s = "16"; break; - case ETAP_P_USER_EVENT17: s = "17"; break; - case ETAP_P_USER_EVENT18: s = "18"; break; - case ETAP_P_USER_EVENT19: s = "19"; break; - case ETAP_P_USER_EVENT20: s = "20"; break; - case ETAP_P_USER_EVENT21: s = "21"; break; - case ETAP_P_USER_EVENT22: s = "22"; break; - case ETAP_P_USER_EVENT23: s = "23"; break; - case ETAP_P_USER_EVENT24: s = "24"; break; - case ETAP_P_USER_EVENT25: s = "25"; break; - case ETAP_P_USER_EVENT26: s = "26"; break; - case ETAP_P_USER_EVENT27: s = "27"; break; - case ETAP_P_USER_EVENT28: s = "28"; break; - case ETAP_P_USER_EVENT29: s = "29"; break; - case ETAP_P_USER_EVENT30: s = "30"; break; - case ETAP_P_USER_EVENT31: s = "31"; break; - default: - sprintf(buf, "dynamic %x", record->pc); - s = buf; - break; - } - - db_printf("user probe %s: [%x] data = %x %x %x %x\n", - s, - record->event, - record->data[0], - record->data[1], - record->data[2], - record->data[3]); -} - -static -void -print_kernel_event(mbuff_entry_t record, boolean_t data) -{ - char *text_name; - int i; - - /* assume zero event means that record was never written to */ - if(record->event == 0) - return; - - db_printf("%x: %x%08x: ", record->instance, record->time.tv_sec, - record->time.tv_nsec); - - switch (record->event) { - - case ETAP_P_THREAD_LIFE : - if (record->flags & EVENT_BEGIN) - db_printf("thread created [T:%x A:%x] P:%d\n", - record->data[0], - record->data[1], - record->data[2]); - else - db_printf("thread terminated [T:%x A:%x] P:%d\n", - record->data[0], - record->data[1], - record->data[2]); - break; - - case ETAP_P_SYSCALL_MACH : - if (record->flags & SYSCALL_TRAP) - text_name = system_table_lookup(SYS_TABLE_MACH_TRAP, - record->data[0]); - else - text_name = system_table_lookup(SYS_TABLE_MACH_MESSAGE, - record->data[0]); - - if (record->flags & EVENT_BEGIN) - db_printf("mach enter: %s [%x]\n", - text_name, - record->data[0]); - else - db_printf("mach exit :\n"); - break; - - case ETAP_P_SYSCALL_UNIX : - text_name = system_table_lookup(SYS_TABLE_UNIX_SYSCALL, - record->data[0]); - - if (record->flags & EVENT_BEGIN) - db_printf("unix enter: %s\n", text_name); - else - db_printf("unix exit : %s\n", text_name); - break; - - case ETAP_P_THREAD_CTX : - if (record->flags & EVENT_END) - db_printf("context switch to %x ", - record->data[0]); - else /* EVENT_BEGIN */ - db_printf("context switch from %x ", - record->data[0]); - - switch (record->data[1]) { - case BLOCKED_ON_SEMAPHORE : - db_printf("R: semaphore\n"); break; - case BLOCKED_ON_LOCK : - db_printf("R: lock\n"); break; - case BLOCKED_ON_MUTEX_LOCK : - db_printf("R: mutex lock\n"); break; - case BLOCKED_ON_COMPLEX_LOCK : - db_printf("R: complex lock\n"); break; - case BLOCKED_ON_PORT_RCV : - db_printf("R: port receive\n"); break; - case BLOCKED_ON_REAPER_DONE : - db_printf("R: reaper thread done\n"); break; - case BLOCKED_ON_IDLE_DONE : - db_printf("R: idle thread done\n"); break; - case BLOCKED_ON_TERMINATION : - db_printf("R: termination\n"); break; - default : - if (record->data[2]) - db_printf("R: ast %x\n", record->data[2]); - else - db_printf("R: undefined block\n"); - }; - break; - - case ETAP_P_INTERRUPT : - if (record->flags & EVENT_BEGIN) { - text_name = system_table_lookup(SYS_TABLE_INTERRUPT, - record->data[0]); - db_printf("intr enter: %s\n", text_name); - } else - db_printf("intr exit\n"); - break; - - case ETAP_P_ACT_ABORT : - db_printf("activation abort [A %x : S %x]\n", - record->data[1], - - record->data[0]); - break; - - case ETAP_P_PRIORITY : - db_printf("priority changed for %x N:%d O:%d\n", - record->data[0], - record->data[1], - record->data[2]); - break; - - case ETAP_P_EXCEPTION : - text_name = system_table_lookup(SYS_TABLE_EXCEPTION, - record->data[0]); - db_printf("exception: %s\n", text_name); - break; - - case ETAP_P_DEPRESSION : - if (record->flags & EVENT_BEGIN) - db_printf("priority depressed\n"); - else { - if (record->data[0] == 0) - db_printf("priority undepressed : timed out\n"); - else - db_printf("priority undepressed : self inflicted\n"); - } - break; - - case ETAP_P_MISC : - db_printf("flags: %x data: %x %x %x %x\n", record->flags, - record->data[0], record->data[1], record->data[2], - record->data[3]); - break; - - case ETAP_P_DETAP : - printf("flags: %x rtc: %x %09x dtime: %x %09x\n", - record->flags, record->data[0], record->data[1], - record->data[2], record->data[3]); - break; - - default: - for(i = 0; event_table_init[i].event != ETAP_NO_TRACE; ++i) - if(record->event == event_table_init[i].event) { - print_lock_event(record, event_table_init[i].name); - return; - } - db_printf("Unknown event: %d\n", record->event); - break; - } - if(data) - db_printf(" Data: %08x %08x %08x %08x\n", record->data[0], - record->data[1], record->data[2], record->data[3]); -} - -void print_lock_event(mbuff_entry_t record, const char *name) -{ - char *sym1, *sym2; - db_addr_t offset1, offset2; - - db_find_sym_and_offset(record->data[0], &sym1, &offset1); - - db_printf("%15s", name); - if (record->flags & SPIN_LOCK) - printf(" spin "); - else if (record->flags & READ_LOCK) - printf(" read "); - else if (record->flags & WRITE_LOCK) - printf(" write "); - else - printf(" undef "); - - if (record->flags & ETAP_CONTENTION) { - db_printf("wait lock %s+%x\n", - sym1, offset1); - } - else if (record->flags & ETAP_DURATION) { - db_find_sym_and_offset(record->data[1], &sym2, &offset2); - db_printf("lock %x+%x unlock %x+%x\n", - sym1, offset1, sym2, offset2); - } else { - db_printf("illegal op: neither HOLD or WAIT are specified\n"); - } - -} - -char * -system_table_lookup(unsigned int table, unsigned int number) -{ - int x; - char *name = NULL; - unsigned int offset; - - switch (table) { - case SYS_TABLE_MACH_TRAP: - name = mach_trap_name(number >> 4); - break; - case SYS_TABLE_MACH_MESSAGE: - for (x=0; x < mach_message_table_entries; x++) { - if (mach_message_table[x].number == number) { - name = mach_message_table[x].name; - break; - } - } - break; - case SYS_TABLE_UNIX_SYSCALL: - number = -number; - name = syscall_name(number); - break; - case SYS_TABLE_INTERRUPT: - db_find_sym_and_offset((int)ivect[number], &name, &offset); - break; - case SYS_TABLE_EXCEPTION: - name = exception_name(number); - break; - } - return (name != NULL) ? name : "undefined"; -} - -#endif /* MACH_KDB */ -#endif /* ETAP_MONITOR */ diff --git a/osfmk/kern/etap_macros.h b/osfmk/kern/etap_macros.h deleted file mode 100644 index 530522648..000000000 --- a/osfmk/kern/etap_macros.h +++ /dev/null @@ -1,456 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * The Event Trace Analysis Package - * ================================ - * - * Function: Traces micro-kernel events. - * - * Macro Notes: Several macros are added throughout the lock code. - * These macros allow for convenient configuration - * and code readability. - * - * The macro prefixes determine a specific trace - * configuration operation: - * - * CUM - Cumulative trace specific operation. - * MON - Monitored trace specific operation. - * ETAP - Both a cumulative and monitored trace - * operation. - */ - - -#ifndef _KERN_ETAP_MACROS_H_ -#define _KERN_ETAP_MACROS_H_ - -#include -#include -#include -#include -#include - - -#if ETAP - -#include -#include - -#include - -extern void etap_init_phase1(void); -extern void etap_init_phase2(void); -extern void etap_event_table_assign(struct event_table_chain *, etap_event_t); -extern unsigned int etap_get_pc(void); -extern event_table_t event_table; -extern subs_table_t subs_table; - -/* - * Time Macros - */ - -#define ETAP_TIMESTAMP(t) rtc_gettime_interrupts_disabled(&t) -#define ETAP_TIME_SUM(t,sum_me) t += sum_me -#define ETAP_TIME_SUB(t,stop,start) \ -MACRO_BEGIN \ - (t) = (stop); \ - SUB_MACH_TIMESPEC(&(t), &(start)); \ -MACRO_END -#define ETAP_TIME_SQR(t,sqr_me) t += sqr_me*sqr_me -#define ETAP_TIME_DIV(r,n,d) r = (u_short) n/d -#define ETAP_TIME_IS_ZERO(t) ((t).tv_sec == 0) -#define ETAP_TIME_CLEAR(t) ((t).tv_sec = 0) -#define ETAP_TIME_GREATER(t1,t2) ((t1) > (t2)) - -#else /* ETAP */ - -#define etap_init_phase1() -#define etap_init_phase2() -#define etap_event_table_assign(event) -#define ETAP_TIMESTAMP(t) -#define ETAP_TIME_SUB(t,start,stop) -#define ETAP_TIME_CLEAR(t) - -#endif /* ETAP */ - - -/* - * =================================================== - * ETAP: cumulative trace specific macros - * =================================================== - */ - -#if ETAP_LOCK_ACCUMULATE - -extern cbuff_entry_t etap_cbuff_reserve(event_table_t); -#if MACH_LDEBUG -extern simple_lock_t cbuff_locks; -#else -extern simple_lock_data_t cbuff_locks; -#endif -extern int cbuff_width; - -/* - * If cumulative hold tracing is enabled for the event (i.e., acquired lock), - * the CUM_HOLD_ACCUMULATE macro will update the appropriate cumulative buffer - * entry with the newly collected hold data. - */ - -#define CUM_HOLD_ACCUMULATE(cp,total_time,dynamic,trace) \ -MACRO_BEGIN \ - u_short _bucket; \ - if ((cp) != CBUFF_ENTRY_NULL && ((trace) & CUM_DURATION)) { \ - if (dynamic) \ - simple_lock_no_trace(&cbuff_locks[dynamic-1]); \ - (cp)->hold.triggered++; \ - ETAP_TIME_SUM((cp)->hold.time,(total_time)); \ - ETAP_TIME_SQR((cp)->hold.time_sq,(total_time)); \ - if (ETAP_TIME_IS_ZERO((cp)->hold.min_time) || \ - ETAP_TIME_GREATER((cp)->hold.min_time,(total_time))) \ - (cp)->hold.min_time = (total_time); \ - if (ETAP_TIME_GREATER((total_time),(cp)->hold.max_time)) \ - (cp)->hold.max_time = (total_time); \ - ETAP_TIME_DIV(_bucket,(total_time),cbuff_width); \ - if (_bucket >= ETAP_CBUFF_IBUCKETS) \ - (cp)->hold_interval[ETAP_CBUFF_IBUCKETS-1]++; \ - else \ - (cp)->hold_interval[_bucket]++; \ - if (dynamic) \ - simple_unlock_no_trace(&cbuff_locks[dynamic-1]); \ - } \ -MACRO_END - -/* - * If cumulative wait tracing is enabled for the event (i.e., acquired lock), - * the CUM_WAIT_ACCUMULATE macro will update the appropriate cumulative - * buffer entry with the newly collected wait data. - */ - -#define CUM_WAIT_ACCUMULATE(cp,total_time,dynamic,trace) \ -MACRO_BEGIN \ - u_short _bucket; \ - if ((cp) != CBUFF_ENTRY_NULL && ((trace) & CUM_CONTENTION)) { \ - if (dynamic) \ - simple_lock_no_trace(&cbuff_locks[dynamic-1]); \ - (cp)->wait.triggered++; \ - ETAP_TIME_SUM((cp)->wait.time,(total_time)); \ - ETAP_TIME_SQR((cp)->wait.time_sq,(total_time)); \ - if (ETAP_TIME_IS_ZERO((cp)->wait.min_time) || \ - ETAP_TIME_GREATER((cp)->wait.min_time,(total_time))) \ - (cp)->wait.min_time = (total_time); \ - if (ETAP_TIME_GREATER((total_time),(cp)->wait.max_time)) \ - (cp)->wait.max_time = (total_time); \ - ETAP_TIME_DIV(_bucket,(total_time),cbuff_width); \ - if (_bucket >= ETAP_CBUFF_IBUCKETS) \ - (cp)->wait_interval[ETAP_CBUFF_IBUCKETS-1]++; \ - else \ - (cp)->wait_interval[_bucket]++; \ - if (dynamic) \ - simple_unlock_no_trace(&cbuff_locks[dynamic-1]); \ - } \ -MACRO_END - -/* - * Initially a lock's cbuff_read pointer is set to CBUFF_ENTRY_NULL. This - * saves space in the cumulative buffer in the event that a read lock is - * not acquired. In the case that a read lock is acquired, the - * CUM_READ_ENTRY_RESERVE macro is called. Here a cumulative - * record is reserved and initialized. - */ - -#define CUM_READ_ENTRY_RESERVE(l,cp,trace) \ -MACRO_BEGIN \ - if ((cp) == CBUFF_ENTRY_NULL && (trace) & ETAP_CUMULATIVE) { \ - (cp) = etap_cbuff_reserve(lock_event_table(l)); \ - if ((cp) != CBUFF_ENTRY_NULL) { \ - (cp)->event = lock_event_table(l)->event; \ - (cp)->instance = (u_int) l; \ - (cp)->kind = READ_LOCK; \ - } \ - } \ -MACRO_END - -#else /* ETAP_LOCK_ACCUMULATE */ -#define etap_cbuff_reserve(et) -#define CUM_HOLD_ACCUMULATE(cp,t,d,tr) -#define CUM_WAIT_ACCUMULATE(cp,t,d,tr) -#define CUM_READ_ENTRY_RESERVE(l,rep,tr) -#endif /* ETAP_LOCK_ACCUMULATE */ - -/* - * =============================================== - * ETAP: monitor trace specific macros - * =============================================== - */ - -#if ETAP_MONITOR -extern int mbuff_entries; -extern monitor_buffer_t mbuff[]; -#endif /* ETAP_MONITOR */ - - -#if ETAP_LOCK_MONITOR - -/* - * If monitor tracing is enabled for the lock, the - * MON_DATA_COLLECT macro will write collected lock data to - * the next slot in a cpu specific monitor buffer. Circular - * buffer maintenance is also performed here. - */ - -#define MON_DATA_COLLECT(l,e,total_time,type,op,trace) \ -MACRO_BEGIN \ - mbuff_entry_t _mp; \ - int _cpu, _ent, _s; \ - if ((trace) & op) { \ - mp_disable_preemption(); \ - _cpu = cpu_number(); \ - _s = splhigh(); \ - _ent = mbuff[_cpu]->free; \ - _mp = &mbuff[_cpu]->entry[_ent]; \ - _mp->event = lock_event_table(l)->event; \ - _mp->flags = ((op) | (type)); \ - _mp->instance = (u_int) (l); \ - _mp->time = (total_time); \ - _mp->data[0] = (e)->start_pc; \ - _mp->data[1] = (e)->end_pc; \ - mbuff[_cpu]->free = (_ent+1) % mbuff_entries; \ - if (mbuff[_cpu]->free == 0) \ - mbuff[_cpu]->timestamp++; \ - splx(_s); \ - mp_enable_preemption(); \ - } \ -MACRO_END - -#define MON_CLEAR_PCS(l) \ -MACRO_BEGIN \ - (l)->start_pc = 0; \ - (l)->end_pc = 0; \ -MACRO_END - -#define MON_ASSIGN_PC(target,source,trace) \ - if ((trace) & ETAP_MONITORED) target = source - -#else /* ETAP_LOCK_MONITOR */ -#define MON_DATA_COLLECT(l,le,tt,t,o,tr) -#define MON_GET_PC(pc,tr) -#define MON_CLEAR_PCS(l) -#define MON_ASSIGN_PC(t,s,tr) -#endif /* ETAP_LOCK_MONITOR */ - - -#if ETAP_EVENT_MONITOR - -#include - -#define ETAP_EXCEPTION_PROBE(_f, _th, _ex, _sysnum) \ - if (_ex == EXC_SYSCALL) { \ - ETAP_PROBE_DATA(ETAP_P_SYSCALL_UNIX, \ - _f, \ - _th, \ - _sysnum, \ - sizeof(int)); \ - } -#else /* ETAP_EVENT_MONITOR */ -#define ETAP_EXCEPTION_PROBE(_f, _th, _ex, _sysnum) -#endif /* ETAP_EVENT_MONITOR */ - -#if ETAP_EVENT_MONITOR - -#define ETAP_PROBE_DATA_COND(_event, _flags, _thread, _data, _size, _cond) \ -MACRO_BEGIN \ - mbuff_entry_t _mp; \ - int _cpu, _ent, _s; \ - if (event_table[_event].status && (_cond)) { \ - mp_disable_preemption(); \ - _cpu = cpu_number(); \ - _s = splhigh(); \ - _ent = mbuff[_cpu]->free; \ - _mp = &mbuff[_cpu]->entry[_ent]; \ - ETAP_TIMESTAMP(_mp->time); \ - _mp->pc = etap_get_pc(); \ - _mp->event = _event; \ - _mp->flags = KERNEL_EVENT | _flags; \ - _mp->instance = (u_int) _thread; \ - bcopy((char *) _data, (char *) _mp->data, _size); \ - mbuff[_cpu]->free = (_ent+1) % mbuff_entries; \ - if (mbuff[_cpu]->free == 0) \ - mbuff[_cpu]->timestamp++; \ - splx(_s); \ - mp_enable_preemption(); \ - } \ -MACRO_END - -#define ETAP_PROBE(_event, _flags, _thread) \ - ETAP_PROBE_DATA_COND(_event, _flags, _thread, 0, 0, 1) - -#define ETAP_PROBE_DATA(_event, _flags, _thread, _data, _size) \ - ETAP_PROBE_DATA_COND(_event, _flags, _thread, _data, _size, \ - (_thread)->etap_trace) - -#define ETAP_DATA_LOAD(ed, x) ((ed) = (u_int) (x)) -#define ETAP_SET_REASON(_th, _reason) ((_th)->etap_reason = (_reason)) - -#else /* ETAP_EVENT_MONITOR */ -#define ETAP_PROBE(e,f,th) -#define ETAP_PROBE_DATA(e,f,th,d,s) -#define ETAP_PROBE_DATA_COND(e,f,th,d,s,c) -#define ETAP_DATA_LOAD(d,x); -#define ETAP_SET_REASON(t,r) -#endif /* ETAP_EVENT_MONITOR */ - -/* - * ================================= - * ETAP: general lock macros - * ================================= - */ - -#if ETAP_LOCK_TRACE - -#define ETAP_TOTAL_TIME(t,stop,start) \ - ETAP_TIME_SUB((t),(stop),(start)) - -#define ETAP_DURATION_TIMESTAMP(e,trace) \ -MACRO_BEGIN \ - if ((trace) & ETAP_DURATION) \ - ETAP_TIMESTAMP((e)->start_hold_time); \ -MACRO_END - -#define ETAP_COPY_START_HOLD_TIME(entry,time,trace) \ -MACRO_BEGIN \ - if ((trace) & ETAP_DURATION) \ - (entry)->start_hold_time = time; \ -MACRO_END - -#define ETAP_CONTENTION_TIMESTAMP(e,trace) \ -MACRO_BEGIN \ - if ((trace) & ETAP_CONTENTION) \ - ETAP_TIMESTAMP((e)->start_wait_time); \ -MACRO_END - -#define ETAP_STAMP(event_table,trace,dynamic) \ -MACRO_BEGIN \ - if ((event_table) != EVENT_TABLE_NULL) { \ - (dynamic) = (event_table)->dynamic; \ - (trace) = (event_table)->status; \ - } \ -MACRO_END - -#define ETAP_WHOLE_OP(l) \ - (!(ETAP_TIME_IS_ZERO((l)->u.s.start_hold_time))) -#define ETAP_DURATION_ENABLED(trace) ((trace) & ETAP_DURATION) -#define ETAP_CONTENTION_ENABLED(trace) ((trace) & ETAP_CONTENTION) - -/* - * The ETAP_CLEAR_TRACE_DATA macro sets the etap specific fields - * of the simple_lock_t structure to zero. - * - * This is always done just before a simple lock is released. - */ - -#define ETAP_CLEAR_TRACE_DATA(l) \ -MACRO_BEGIN \ - ETAP_TIME_CLEAR((l)->u.s.start_hold_time); \ - MON_CLEAR_PCS((l)); \ -MACRO_END - - -/* ================================================== - * The ETAP_XXX_ENTRY macros manipulate the locks - * start_list (a linked list of start data). - * ================================================== - */ - -#define ETAP_CREATE_ENTRY(entry,trace) \ -MACRO_BEGIN \ - if ((trace) & ETAP_TRACE_ON) \ - (entry) = get_start_data_node(); \ -MACRO_END - -#define ETAP_LINK_ENTRY(l,entry,trace) \ -MACRO_BEGIN \ - if ((trace) & ETAP_TRACE_ON) { \ - (entry)->next = (l)->u.s.start_list; \ - (l)->u.s.start_list = (entry); \ - (entry)->thread_id = (u_int) current_thread(); \ - ETAP_TIME_CLEAR((entry)->start_wait_time); \ - } \ -MACRO_END - -#define ETAP_FIND_ENTRY(l,entry,trace) \ -MACRO_BEGIN \ - u_int _ct; \ - _ct = (u_int) current_thread(); \ - (entry) = (l)->u.s.start_list; \ - while ((entry) != SD_ENTRY_NULL && (entry)->thread_id != _ct) \ - (entry) = (entry)->next; \ - if ((entry) == SD_ENTRY_NULL) \ - (trace) = 0; \ -MACRO_END - -#define ETAP_UNLINK_ENTRY(l,entry) \ -MACRO_BEGIN \ - boolean_t _first = TRUE; \ - start_data_node_t _prev; \ - u_int _ct; \ - _ct = (u_int) current_thread(); \ - (entry) = (l)->u.s.start_list; \ - while ((entry) != SD_ENTRY_NULL && (entry)->thread_id != _ct){ \ - _prev = (entry); \ - (entry) = (entry)->next; \ - _first = FALSE; \ - } \ - if (entry != SD_ENTRY_NULL) { \ - if (_first) \ - (l)->u.s.start_list = (entry)->next; \ - else \ - _prev->next = (entry)->next; \ - (entry)->next = SD_ENTRY_NULL; \ - } \ -MACRO_END - -#define ETAP_DESTROY_ENTRY(entry) \ -MACRO_BEGIN \ - if ((entry) != SD_ENTRY_NULL) \ - free_start_data_node ((entry)); \ -MACRO_END - -#else /* ETAP_LOCK_TRACE */ -#define ETAP_TOTAL_TIME(t,stop,start) -#define ETAP_DURATION_TIMESTAMP(le,tr) -#define ETAP_CONTENTION_TIMESTAMP(le,tr) -#define ETAP_COPY_START_HOLD_TIME(le,t,tr) -#define ETAP_STAMP(tt,tr,d) -#define ETAP_DURATION_ENABLED(tr) (0) /* always fails */ -#define ETAP_CONTENTION_ENABLED(tr) (0) /* always fails */ -#define ETAP_CLEAR_TRACE_DATA(l) -#define ETAP_CREATE_ENTRY(e,tr) -#define ETAP_LINK_ENTRY(l,e,tr) -#define ETAP_FIND_ENTRY(l,e,tr) -#define ETAP_UNLINK_ENTRY(l,e) -#define ETAP_DESTROY_ENTRY(e) -#endif /* ETAP_LOCK_TRACE */ - -#endif /* _KERN_ETAP_MACROS_H_ */ diff --git a/osfmk/kern/etap_map.c b/osfmk/kern/etap_map.c deleted file mode 100644 index 76b125c60..000000000 --- a/osfmk/kern/etap_map.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1996/09/17 16:26:58 bruel - * use standalone includes only - * [1996/09/17 15:38:08 bruel] - * - * Revision 1.1.4.1 1996/02/02 12:16:40 emcmanus - * Copied from nmk20b5_shared. - * [1996/02/01 16:56:11 emcmanus] - * - * Revision 1.1.2.1 1995/12/30 17:12:07 emcmanus - * Renamed from i386/etap_map.c and made this file machine-independent. - * Delete declarations of event_table and subs_table, now declared with - * different types in etap_macros.h. - * [1995/12/30 17:03:55 emcmanus] - * - * Revision 1.1.2.4 1995/10/09 17:07:21 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:48:15 joe] - * - * Revision 1.1.2.3 1995/09/18 19:10:05 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:48:15 joe] - * - * Revision 1.1.2.2 1995/01/10 04:51:59 devrcs - * mk6 CR801 - merge up from nmk18b4 to nmk18b7 - * tweak signatures, a la osc1.3b26 - * [1994/12/09 20:38:32 dwm] - * - * mk6 CR801 - new file for mk6_shared from cnmk_shared. - * [1994/12/01 21:11:35 dwm] - * - * Revision 1.1.2.1 1994/10/21 18:35:57 joe - * Initial ETAP submission - * [1994/10/20 19:21:39 joe] - * - * $EndLog$ - */ -/* - * File : etap_map.c - * - * Pseudo-device driver to calculate the virtual addresses - * of all mappable ETAP buffers and tables: event table, - * subsystem table, cumulative buffer and monitor buffers. - * - */ -/* - * Minor device number representation: - * - * 0 = ETAP_TABLE_EVENT - * 1 = ETAP_TABLE_SUBSYSTEM - * 2 = ETAP_BUFFER_CUMULATIVE - * 3 & up = a specific monitor buffer - * - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -#if ETAP_LOCK_ACCUMULATE -extern cumulative_buffer_t cbuff; -#endif /* ETAP_LOCK_ACCUMULATE */ - -#if ETAP_MONITOR -extern monitor_buffer_t mbuff[]; -#endif /* ETAP_MONITOR */ - - -/* - * etap_map_open - Check for valid minor device - */ - -io_return_t -etap_map_open( - dev_t dev, - dev_mode_t flags, - io_req_t ior) -{ - int buffer = minor(dev); - - if (buffer >= ETAP_MAX_DEVICES) - return(D_NO_SUCH_DEVICE); - - return(D_SUCCESS); -} - -vm_offset_t -etap_map_mmap ( - dev_t dev, - vm_offset_t off, - vm_prot_t prot) -{ - int buffer = minor(dev); - vm_offset_t addr; - - /* - * Check request validity - */ - - if (prot & VM_PROT_WRITE) - return(KERN_PROTECTION_FAILURE); - - if (buffer < 0 || buffer >= ETAP_MAX_DEVICES) - return(KERN_INVALID_ARGUMENT); - - switch(buffer) { - case ETAP_TABLE_EVENT : - addr = trunc_page((char *) event_table) + off; - break; - case ETAP_TABLE_SUBSYSTEM : - addr = trunc_page((char *) subs_table) + off; - break; - case ETAP_BUFFER_CUMULATIVE : -#if ETAP_LOCK_ACCUMULATE - addr = (vm_offset_t) cbuff + off; - break; -#else /* ETAP_LOCK_ACCUMULATE */ - return(KERN_INVALID_ARGUMENT); -#endif /* ETAP_LOCK_ACCUMULATE */ - - default : -#if ETAP_MONITOR - addr = (vm_offset_t) mbuff[buffer - 3] + off; - break; -#else /* ETAP_MONITOR */ - return(KERN_INVALID_ARGUMENT); -#endif /* ETAP_MONITOR */ - - } - return machine_btop(pmap_extract(pmap_kernel(), addr)); -} diff --git a/osfmk/kern/etap_map.h b/osfmk/kern/etap_map.h deleted file mode 100644 index 1e6349d0f..000000000 --- a/osfmk/kern/etap_map.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.4.1 1996/02/02 12:16:46 emcmanus - * Copied from nmk20b5_shared. - * [1996/02/01 16:56:16 emcmanus] - * - * Revision 1.1.2.1 1995/12/30 17:12:11 emcmanus - * Renamed from i386/etap_map.h and fixed parentheses in ETAP_MAX_DEVICES. - * [1995/12/30 17:04:00 emcmanus] - * - * Revision 1.1.2.4 1995/10/09 17:07:25 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:48:18 joe] - * - * Revision 1.1.2.3 1995/09/18 19:10:09 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:48:18 joe] - * - * Revision 1.1.2.2 1995/01/10 04:52:03 devrcs - * mk6 CR801 - merge up from nmk18b4 to nmk18b7 - * tweak protos, a la osc1.3b26 - * [1994/12/09 20:38:34 dwm] - * - * mk6 CR801 - new file for mk6_shared from cnmk_shared. - * [1994/12/01 21:11:38 dwm] - * - * Revision 1.1.2.1 1994/10/21 18:36:01 joe - * Initial ETAP submission - * [1994/10/20 19:21:40 joe] - * - * $EndLog$ - */ -/* - * File : etap_map.h - */ - -#ifndef _ETAP_MAP_H_ -#define _ETAP_MAP_H_ - -#define ETAP_MAX_DEVICES (3+NCPUS) - - -extern io_return_t etap_map_open( - dev_t dev, - dev_mode_t flags, - io_req_t ior); - -extern vm_offset_t etap_map_mmap( - dev_t dev, - vm_offset_t off, - vm_prot_t prot); - -#endif /* _ETAP_MAP_H_ */ diff --git a/osfmk/kern/etap_options.h b/osfmk/kern/etap_options.h deleted file mode 100644 index 4af3a4022..000000000 --- a/osfmk/kern/etap_options.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:35 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.9.2 1995/10/09 17:13:48 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:34:10 joe] - * - * Revision 1.1.6.1 1995/05/11 20:57:18 burke - * Update ETAP changes. - * [1995/05/09 17:15:03 burke] - * - * Revision 1.1.9.1 1995/09/18 19:13:34 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:34:10 joe] - * - * Revision 1.1.6.1 1995/05/11 20:57:18 burke - * Update ETAP changes. - * [1995/05/09 17:15:03 burke] - * - * Revision 1.1.3.1 1994/12/14 18:55:51 joe - * ETAP nswc merge - * [1994/12/14 17:07:33 joe] - * - * Revision 1.1.1.2 1994/12/12 15:34:48 joe - * Initial check-in - * - * $EndLog$ - */ -/* - * ETAP build options are selected using the config.debug configuration file. - * - * ETAP options are: - * ETAP_LOCK_ACCUMULATE - Cumulative lock tracing - * ETAP_LOCK_MONITOR - Monitor lock behavior - * ETAP_EVENT_MONITOR - Monitor general events - * - * Derived options are: - * ETAP_LOCK_TRACE - Equals one if either cumulative or monitored - * lock tracing is configured (zero otherwise). - * ETAP_MONITOR - Equals one if either lock or event monitoring - * is configured (zero otherwise). - */ - -#ifndef _KERN_ETAP_OPTIONS_H_ -#define _KERN_ETAP_OPTIONS_H_ - -#ifdef ETAP_DYNAMIC_OPTIONS -#include -#include -#include -#include -#else -#define ETAP 0 -#define ETAP_LOCK_MONITOR 0 -#define ETAP_LOCK_ACCUMULATE 0 -#define ETAP_EVENT_MONITOR 0 -#endif - -#if ETAP_LOCK_MONITOR || ETAP_LOCK_ACCUMULATE -#define ETAP_LOCK_TRACE 1 -#else /* ETAP_LOCK_MONITOR || ETAP_LOCK_ACCUMULATE */ -#define ETAP_LOCK_TRACE 0 -#endif /* ETAP_LOCK_MONITOR || ETAP_LOCK_ACCUMULATE */ - -#if ETAP_LOCK_MONITOR || ETAP_EVENT_MONITOR -#define ETAP_MONITOR 1 -#else /* ETAP_LOCK_MONITOR || ETAP_EVENT_MONITOR */ -#define ETAP_MONITOR 0 -#endif /* ETAP_LOCK_MONITOR || ETAP_EVENT_MONITOR */ - -#endif /* _KERN_ETAP_OPTIONS_H_ */ diff --git a/osfmk/kern/etap_pool.c b/osfmk/kern/etap_pool.c deleted file mode 100644 index afb69bdd7..000000000 --- a/osfmk/kern/etap_pool.c +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.12.1 1996/09/17 16:27:00 bruel - * fixed bzero prototype. - * [96/09/17 bruel] - * - * Revision 1.1.2.4 1995/10/09 17:13:51 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:34:15 joe] - * - * Revision 1.1.2.3 1995/09/18 19:13:37 devrcs - * Merged in RT3_SHARED ETAP code. - * [1995/09/13 18:34:15 joe] - * - * Revision 1.1.2.2 1995/01/10 05:11:15 devrcs - * mk6 CR801 - merge up from nmk18b4 to nmk18b7 - * patch up spinlock references ==> simplelock - * [1994/12/09 20:54:30 dwm] - * - * mk6 CR801 - new file for mk6_shared from cnmk_shared. - * [1994/12/01 21:11:49 dwm] - * - * Revision 1.1.2.1 1994/10/21 18:28:50 joe - * Initial ETAP submission - * [1994/10/20 19:31:33 joe] - * - * $EndLog$ - */ -/* - * File: etap_pool.c - * - * etap_pool.c contains the functions for maintenance - * of the start_data_pool. The start_data_pool is - * used by the ETAP package. Its primary - * objective is to provide start_data_nodes to complex - * locks so they can hold start information for read - * locks (since multiple readers can acquire a read - * lock). Each complex lock will maintain a linked - * list of these nodes. - * - * NOTES: The start_data_pool is used instead of zalloc to - * eliminate complex lock dependancies. If zalloc was used, - * then no complex locks could be used in zalloc code paths. - * This is both difficult and unrealistic, since zalloc - * allocates memory dynamically. Hence, this dependancy is - * eliminated with the use of the statically allocated - * start_data_pool. - * - */ - -#include -#include -#include -#include -#include - -#if ETAP_LOCK_TRACE - -/* - * Statically allocate the start data pool, - * header and lock. - */ - -struct start_data_node sd_pool [SD_POOL_ENTRIES]; /* static buffer */ -start_data_node_t sd_free_list; /* pointer to free node list */ -int sd_sleepers; /* number of blocked threads */ - -simple_lock_data_t sd_pool_lock; - - -/* - * Interrupts must be disabled while the - * sd_pool_lock is taken. - */ - -#define pool_lock(s) \ -MACRO_BEGIN \ - s = splhigh(); \ - simple_lock(&sd_pool_lock); \ -MACRO_END - -#define pool_unlock(s) \ -MACRO_BEGIN \ - simple_unlock(&sd_pool_lock); \ - splx(s); \ -MACRO_END - - -/* - * ROUTINE: init_start_data_pool - * - * FUNCTION: Initialize the start_data_pool: - * - create the free list chain for the max - * number of entries. - * - initialize the sd_pool_lock - */ - -void -init_start_data_pool(void) -{ - int x; - - simple_lock_init(&sd_pool_lock, ETAP_MISC_SD_POOL); - - /* - * Establish free list pointer chain - */ - - for (x=0; x < SD_POOL_ENTRIES-1; x++) - sd_pool[x].next = &sd_pool[x+1]; - - sd_pool[SD_POOL_ENTRIES-1].next = SD_ENTRY_NULL; - sd_free_list = &sd_pool[0]; - sd_sleepers = 0; -} - -/* - * ROUTINE: get_start_data_node - * - * FUNCTION: Returns a free node from the start data pool - * to the caller. If none are available, the - * call will block, then try again. - */ - -start_data_node_t -get_start_data_node(void) -{ - start_data_node_t avail_node; - spl_t s; - - pool_lock(s); - - /* - * If the pool does not have any nodes available, - * block until one becomes free. - */ - - while (sd_free_list == SD_ENTRY_NULL) { - - sd_sleepers++; - assert_wait((event_t) &sd_pool[0], THREAD_UNINT); - pool_unlock(s); - - printf ("DEBUG-KERNEL: empty start_data_pool\n"); - thread_block(THREAD_CONTINUE_NULL); - - pool_lock(s); - sd_sleepers--; - } - - avail_node = sd_free_list; - sd_free_list = sd_free_list->next; - - pool_unlock(s); - - bzero ((char *) avail_node, sizeof(struct start_data_node)); - avail_node->next = SD_ENTRY_NULL; - - return (avail_node); -} - -/* - * ROUTINE: free_start_data_node - * - * FUNCTION: Releases start data node back to the sd_pool, - * so that it can be used again. - */ - -void -free_start_data_node ( - start_data_node_t node) -{ - boolean_t wakeup = FALSE; - spl_t s; - - if (node == SD_ENTRY_NULL) - return; - - pool_lock(s); - - node->next = sd_free_list; - sd_free_list = node; - - if (sd_sleepers) - wakeup = TRUE; - - pool_unlock(s); - - if (wakeup) - thread_wakeup((event_t) &sd_pool[0]); -} - -#endif /* ETAP_LOCK_TRACE */ diff --git a/osfmk/kern/etap_pool.h b/osfmk/kern/etap_pool.h deleted file mode 100644 index 7114cd52b..000000000 --- a/osfmk/kern/etap_pool.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:54 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.4 1995/10/09 17:13:55 devrcs - * Merged RT3_SHARED version into `mainline.' - * [1995/09/13 16:17:31 joe] - * - * Revision 1.1.2.3 1995/09/18 19:13:40 devrcs - * Merged RT3_SHARED version into `mainline.' - * [1995/09/13 16:17:31 joe] - * - * Revision 1.1.2.2 1995/01/10 05:11:19 devrcs - * mk6 CR801 - new file for mk6_shared from cnmk_shared. - * [1994/12/01 21:11:51 dwm] - * - * Revision 1.1.2.1 1994/10/21 18:28:53 joe - * Initial ETAP submission - * [1994/10/20 19:31:35 joe] - * - * $EndLog$ - */ -/* - * File : etap_pool.h - * - * The start_data_node structure is primarily needed to hold - * start information for read locks (since multiple readers - * can acquire a read lock). For consistency, however, the - * structure is used for write locks as well. Each complex - * lock will maintain a linked list of these structures. - */ - -#ifndef _KERN_ETAP_POOL_H_ -#define _KERN_ETAP_POOL_H_ - -#include -#include -#include - -#if ETAP_LOCK_TRACE - -#include -#include -#include -#include - -struct start_data_node { - unsigned int thread_id; /* thread id */ - etap_time_t start_hold_time; /* time of last acquisition */ - etap_time_t start_wait_time; /* time of first miss */ - unsigned int start_pc; /* pc of acquiring function */ - unsigned int end_pc; /* pc of relinquishing function */ - struct start_data_node *next; /* pointer to next list entry */ -}; - -typedef struct start_data_node* start_data_node_t; - -/* - * The start_data_node pool is statically - * allocated and privatly maintained - */ - -#define SD_POOL_ENTRIES (NCPUS * 256) - -extern void init_start_data_pool(void); -extern start_data_node_t get_start_data_node(void); -extern void free_start_data_node(start_data_node_t); - -#else /* ETAP_LOCK_TRACE */ -typedef boolean_t start_data_node_t; -#define get_start_data_node() -#define free_start_start_data_node(node) -#endif /* ETAP_LOCK_TRACE */ - -#define SD_ENTRY_NULL ((start_data_node_t) 0) - -#endif /* _KERN_ETAP_POOL_H_ */ diff --git a/osfmk/kern/exception.c b/osfmk/kern/exception.c index 306470b36..67abd9fbf 100644 --- a/osfmk/kern/exception.c +++ b/osfmk/kern/exception.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -67,12 +68,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -104,6 +103,20 @@ unsigned long c_tsk_exc_raise = 0; unsigned long c_tsk_exc_raise_state = 0; unsigned long c_tsk_exc_raise_state_id = 0; +/* forward declarations */ +void exception_deliver( + exception_type_t exception, + exception_data_t code, + mach_msg_type_number_t codeCnt, + struct exception_action *excp, + mutex_t *mutex); + +#ifdef MACH_BSD +kern_return_t bsd_exception( + exception_type_t exception, + exception_data_t code, + mach_msg_type_number_t codeCnt); +#endif /* MACH_BSD */ /* * Routine: exception_deliver @@ -125,7 +138,7 @@ exception_deliver( struct exception_action *excp, mutex_t *mutex) { - thread_act_t a_self = current_act(); + thread_t self = current_thread(); ipc_port_t exc_port; int behavior; int flavor; @@ -135,7 +148,7 @@ exception_deliver( * Save work if we are terminating. * Just go back to our AST handler. */ - if (!a_self->active) + if (!self->active) thread_exception_return(); /* @@ -171,8 +184,8 @@ exception_deliver( thread_state_data_t state; c_thr_exc_raise_state++; - state_cnt = state_count[flavor]; - kr = thread_getstatus(a_self, flavor, + state_cnt = _MachineStateCount[flavor]; + kr = thread_getstatus(self, flavor, (thread_state_t)state, &state_cnt); if (kr == KERN_SUCCESS) { @@ -182,7 +195,7 @@ exception_deliver( state, state_cnt, state, &state_cnt); if (kr == MACH_MSG_SUCCESS) - kr = thread_setstatus(a_self, flavor, + kr = thread_setstatus(self, flavor, (thread_state_t)state, state_cnt); } @@ -196,8 +209,8 @@ exception_deliver( case EXCEPTION_DEFAULT: c_thr_exc_raise++; kr = exception_raise(exc_port, - retrieve_act_self_fast(a_self), - retrieve_task_self_fast(a_self->task), + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), exception, code, codeCnt); @@ -211,21 +224,21 @@ exception_deliver( thread_state_data_t state; c_thr_exc_raise_state_id++; - state_cnt = state_count[flavor]; - kr = thread_getstatus(a_self, flavor, + state_cnt = _MachineStateCount[flavor]; + kr = thread_getstatus(self, flavor, (thread_state_t)state, &state_cnt); if (kr == KERN_SUCCESS) { kr = exception_raise_state_identity(exc_port, - retrieve_act_self_fast(a_self), - retrieve_task_self_fast(a_self->task), + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), exception, code, codeCnt, &flavor, state, state_cnt, state, &state_cnt); if (kr == MACH_MSG_SUCCESS) - kr = thread_setstatus(a_self, flavor, + kr = thread_setstatus(self, flavor, (thread_state_t)state, state_cnt); } @@ -255,12 +268,12 @@ exception_deliver( * Doesn't return. */ void -exception( +exception_triage( exception_type_t exception, exception_data_t code, mach_msg_type_number_t codeCnt) { - thread_act_t thr_act; + thread_t thread; task_t task; host_priv_t host_priv; struct exception_action *excp; @@ -274,9 +287,9 @@ exception( /* * Try to raise the exception at the activation level. */ - thr_act = current_act(); - mutex = mutex_addr(thr_act->lock); - excp = &thr_act->exc_actions[exception]; + thread = current_thread(); + mutex = mutex_addr(thread->mutex); + excp = &thread->exc_actions[exception]; exception_deliver(exception, code, codeCnt, excp, mutex); /* @@ -323,10 +336,9 @@ bsd_exception( mach_msg_type_number_t codeCnt) { task_t task; - host_priv_t host_priv; struct exception_action *excp; mutex_t *mutex; - thread_act_t a_self = current_act(); + thread_t self = current_thread(); ipc_port_t exc_port; int behavior; int flavor; @@ -343,7 +355,7 @@ bsd_exception( * Save work if we are terminating. * Just go back to our AST handler. */ - if (!a_self->active) { + if (!self->active) { return(KERN_FAILURE); } @@ -380,8 +392,8 @@ bsd_exception( thread_state_data_t state; c_thr_exc_raise_state++; - state_cnt = state_count[flavor]; - kr = thread_getstatus(a_self, flavor, + state_cnt = _MachineStateCount[flavor]; + kr = thread_getstatus(self, flavor, (thread_state_t)state, &state_cnt); if (kr == KERN_SUCCESS) { @@ -391,7 +403,7 @@ bsd_exception( state, state_cnt, state, &state_cnt); if (kr == MACH_MSG_SUCCESS) - kr = thread_setstatus(a_self, flavor, + kr = thread_setstatus(self, flavor, (thread_state_t)state, state_cnt); } @@ -405,8 +417,8 @@ bsd_exception( case EXCEPTION_DEFAULT: c_thr_exc_raise++; kr = exception_raise(exc_port, - retrieve_act_self_fast(a_self), - retrieve_task_self_fast(a_self->task), + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), exception, code, codeCnt); @@ -419,21 +431,21 @@ bsd_exception( thread_state_data_t state; c_thr_exc_raise_state_id++; - state_cnt = state_count[flavor]; - kr = thread_getstatus(a_self, flavor, + state_cnt = _MachineStateCount[flavor]; + kr = thread_getstatus(self, flavor, (thread_state_t)state, &state_cnt); if (kr == KERN_SUCCESS) { kr = exception_raise_state_identity(exc_port, - retrieve_act_self_fast(a_self), - retrieve_task_self_fast(a_self->task), + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), exception, code, codeCnt, &flavor, state, state_cnt, state, &state_cnt); if (kr == MACH_MSG_SUCCESS) - kr = thread_setstatus(a_self, flavor, + kr = thread_setstatus(self, flavor, (thread_state_t)state, state_cnt); } @@ -464,12 +476,9 @@ kern_return_t sys_perf_notify(struct task *task, { host_priv_t hostp; struct exception_action *excp; - thread_act_t act = current_act(); - thread_t thr = current_thread(); + thread_t thread = current_thread(); ipc_port_t xport; kern_return_t ret; - int abrt; - spl_t ints; wait_interrupt_t wsave; hostp = host_priv_self(); /* Get the host privileged ports */ @@ -504,8 +513,8 @@ kern_return_t sys_perf_notify(struct task *task, wsave = thread_interrupt_level(THREAD_UNINT); /* Make sure we aren't aborted here */ ret = exception_raise(xport, /* Send the exception to the perf handler */ - retrieve_act_self_fast(act), /* Not always the dying guy */ - retrieve_task_self_fast(act->task), /* Not always the dying guy */ + retrieve_thread_self_fast(thread), /* Not always the dying guy */ + retrieve_task_self_fast(thread->task), /* Not always the dying guy */ EXC_RPC_ALERT, /* Unused exception type until now */ code, codeCnt); @@ -513,4 +522,3 @@ kern_return_t sys_perf_notify(struct task *task, return(ret); /* Tell caller how it went */ } - diff --git a/osfmk/kern/exception.h b/osfmk/kern/exception.h index 88ef46f6e..766e216b4 100644 --- a/osfmk/kern/exception.h +++ b/osfmk/kern/exception.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,8 +23,8 @@ * @OSF_COPYRIGHT@ */ -#ifndef _EXCEPTION_H_ -#define _EXCEPTION_H_ +#ifndef _KERN_EXCEPTION_H_ +#define _KERN_EXCEPTION_H_ #include #include @@ -42,7 +42,7 @@ struct exception_action { }; /* Make an up-call to a thread's exception server */ -extern void exception( +extern void exception_triage( exception_type_t exception, exception_data_t code, mach_msg_type_number_t codeCnt); @@ -52,4 +52,4 @@ extern kern_return_t sys_perf_notify(struct task *task, exception_data_t code, mach_msg_type_number_t codeCnt); -#endif /* _EXCEPTION_H_ */ +#endif /* _KERN_EXCEPTION_H_ */ diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index dc173787f..912516829 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,74 +56,77 @@ * Non-ipc host functions. */ -#include #include +#include #include -#include -#include -#include -#include -#include -#include #include #include #include #include #include -#include #include #include +#include #include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + #if DIPC #include #include #endif -vm_statistics_data_t vm_stat[NCPUS]; - host_data_t realhost; kern_return_t host_processors( - host_priv_t host_priv, - processor_array_t *processor_list, + host_priv_t host_priv, + processor_array_t *out_array, mach_msg_type_number_t *countp) { - register int i; - register processor_t *tp; - vm_offset_t addr; - unsigned int count; + register processor_t processor, *tp; + void *addr; + unsigned int count, i; if (host_priv == HOST_PRIV_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); assert(host_priv == &realhost); - /* - * Determine how many processors we have. - * (This number shouldn't change.) - */ - - count = 0; - for (i = 0; i < NCPUS; i++) - if (machine_slot[i].is_cpu) - count++; - - if (count == 0) - panic("host_processors"); + count = processor_count; + assert(count != 0); addr = kalloc((vm_size_t) (count * sizeof(mach_port_t))); if (addr == 0) - return KERN_RESOURCE_SHORTAGE; + return (KERN_RESOURCE_SHORTAGE); tp = (processor_t *) addr; - for (i = 0; i < NCPUS; i++) - if (machine_slot[i].is_cpu) - *tp++ = cpu_to_processor(i); + *tp++ = processor = processor_list; + + if (count > 1) { + simple_lock(&processor_list_lock); + + for (i = 1; i < count; i++) + *tp++ = processor = processor->processor_list; + + simple_unlock(&processor_list_lock); + } *countp = count; - *processor_list = (processor_array_t)addr; + *out_array = (processor_array_t)addr; /* do the conversion that Mig should handle */ @@ -132,7 +135,7 @@ host_processors( ((mach_port_t *) tp)[i] = (mach_port_t)convert_processor_to_port(tp[i]); - return KERN_SUCCESS; + return (KERN_SUCCESS); } kern_return_t @@ -144,54 +147,64 @@ host_info( { if (host == HOST_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); - switch(flavor) { + switch (flavor) { case HOST_BASIC_INFO: { register host_basic_info_t basic_info; + register int master_slot; /* * Basic information about this host. */ - if (*count < HOST_BASIC_INFO_COUNT) - return(KERN_FAILURE); + if (*count < HOST_BASIC_INFO_OLD_COUNT) + return (KERN_FAILURE); basic_info = (host_basic_info_t) info; basic_info->max_cpus = machine_info.max_cpus; basic_info->avail_cpus = machine_info.avail_cpus; basic_info->memory_size = machine_info.memory_size; - basic_info->cpu_type = - machine_slot[master_processor->slot_num].cpu_type; - basic_info->cpu_subtype = - machine_slot[master_processor->slot_num].cpu_subtype; - - *count = HOST_BASIC_INFO_COUNT; + master_slot = PROCESSOR_DATA(master_processor, slot_num); + basic_info->cpu_type = slot_type(master_slot); + basic_info->cpu_subtype = slot_subtype(master_slot); + + if (*count >= HOST_BASIC_INFO_COUNT) { + basic_info->cpu_threadtype = slot_threadtype(master_slot); + basic_info->physical_cpu = machine_info.physical_cpu; + basic_info->physical_cpu_max = machine_info.physical_cpu_max; + basic_info->logical_cpu = machine_info.logical_cpu; + basic_info->logical_cpu_max = machine_info.logical_cpu_max; + basic_info->max_mem = machine_info.max_mem; + + *count = HOST_BASIC_INFO_COUNT; + } else { + *count = HOST_BASIC_INFO_OLD_COUNT; + } - return(KERN_SUCCESS); + return (KERN_SUCCESS); } case HOST_SCHED_INFO: { register host_sched_info_t sched_info; - extern int tick; /* XXX */ /* * Return scheduler information. */ if (*count < HOST_SCHED_INFO_COUNT) - return(KERN_FAILURE); + return (KERN_FAILURE); sched_info = (host_sched_info_t) info; - sched_info->min_timeout = tick / 1000; /* XXX */ - sched_info->min_quantum = tick / 1000; /* XXX */ + sched_info->min_timeout = + sched_info->min_quantum = std_quantum_us / 1000; *count = HOST_SCHED_INFO_COUNT; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } case HOST_RESOURCE_SIZES: @@ -200,10 +213,10 @@ host_info( * Return sizes of kernel data structures */ if (*count < HOST_RESOURCE_SIZES_COUNT) - return(KERN_FAILURE); + return (KERN_FAILURE); /* XXX Fail until ledgers are implemented */ - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } case HOST_PRIORITY_INFO: @@ -211,22 +224,22 @@ host_info( register host_priority_info_t priority_info; if (*count < HOST_PRIORITY_INFO_COUNT) - return(KERN_FAILURE); + return (KERN_FAILURE); priority_info = (host_priority_info_t) info; priority_info->kernel_priority = MINPRI_KERNEL; priority_info->system_priority = MINPRI_KERNEL; - priority_info->server_priority = MINPRI_SYSTEM; + priority_info->server_priority = MINPRI_RESERVED; priority_info->user_priority = BASEPRI_DEFAULT; priority_info->depress_priority = DEPRESSPRI; priority_info->idle_priority = IDLEPRI; - priority_info->minimum_priority = MINPRI_STANDARD; - priority_info->maximum_priority = MAXPRI_SYSTEM; + priority_info->minimum_priority = MINPRI_USER; + priority_info->maximum_priority = MAXPRI_RESERVED; *count = HOST_PRIORITY_INFO_COUNT; - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -236,68 +249,66 @@ host_info( case HOST_SEMAPHORE_TRAPS: { *count = 0; - return KERN_SUCCESS; + return (KERN_SUCCESS); } default: - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } } kern_return_t host_statistics( - host_t host, - host_flavor_t flavor, - host_info_t info, + host_t host, + host_flavor_t flavor, + host_info_t info, mach_msg_type_number_t *count) { if (host == HOST_NULL) - return(KERN_INVALID_HOST); + return (KERN_INVALID_HOST); switch(flavor) { - case HOST_LOAD_INFO: { - register host_load_info_t load_info; - extern uint32_t avenrun[3], mach_factor[3]; + case HOST_LOAD_INFO: + { + host_load_info_t load_info; if (*count < HOST_LOAD_INFO_COUNT) - return(KERN_FAILURE); + return (KERN_FAILURE); load_info = (host_load_info_t) info; bcopy((char *) avenrun, - (char *) load_info->avenrun, - sizeof avenrun); + (char *) load_info->avenrun, sizeof avenrun); bcopy((char *) mach_factor, - (char *) load_info->mach_factor, - sizeof mach_factor); + (char *) load_info->mach_factor, sizeof mach_factor); *count = HOST_LOAD_INFO_COUNT; - return(KERN_SUCCESS); - } - - case HOST_VM_INFO: { - register vm_statistics_t stat; - vm_statistics_data_t host_vm_stat; - extern int vm_page_free_count, vm_page_active_count, - vm_page_inactive_count, vm_page_wire_count; + return (KERN_SUCCESS); + } + + case HOST_VM_INFO: + { + register processor_t processor; + register vm_statistics_t stat; + vm_statistics_data_t host_vm_stat; - if (*count < HOST_VM_INFO_COUNT) - return(KERN_FAILURE); + if (*count < HOST_VM_INFO_REV0_COUNT) + return (KERN_FAILURE); - stat = &vm_stat[0]; + processor = processor_list; + stat = &PROCESSOR_DATA(processor, vm_stat); host_vm_stat = *stat; -#if NCPUS > 1 - { - register int i; - - for (i = 1; i < NCPUS; i++) { - stat++; - host_vm_stat.zero_fill_count += - stat->zero_fill_count; - host_vm_stat.reactivations += - stat->reactivations; + + if (processor_count > 1) { + simple_lock(&processor_list_lock); + + while ((processor = processor->processor_list) != NULL) { + stat = &PROCESSOR_DATA(processor, vm_stat); + + host_vm_stat.zero_fill_count += stat->zero_fill_count; + host_vm_stat.reactivations += stat->reactivations; host_vm_stat.pageins += stat->pageins; host_vm_stat.pageouts += stat->pageouts; host_vm_stat.faults += stat->faults; @@ -305,69 +316,90 @@ host_statistics( host_vm_stat.lookups += stat->lookups; host_vm_stat.hits += stat->hits; } + + simple_unlock(&processor_list_lock); } -#endif stat = (vm_statistics_t) info; - stat->free_count = vm_page_free_count; - stat->active_count = vm_page_active_count; - stat->inactive_count = vm_page_inactive_count; - stat->wire_count = vm_page_wire_count; - stat->zero_fill_count = host_vm_stat.zero_fill_count; - stat->reactivations = host_vm_stat.reactivations; - stat->pageins = host_vm_stat.pageins; - stat->pageouts = host_vm_stat.pageouts; - stat->faults = host_vm_stat.faults; - stat->cow_faults = host_vm_stat.cow_faults; - stat->lookups = host_vm_stat.lookups; - stat->hits = host_vm_stat.hits; - - *count = HOST_VM_INFO_COUNT; - return(KERN_SUCCESS); - } + stat->free_count = vm_page_free_count; + stat->active_count = vm_page_active_count; + stat->inactive_count = vm_page_inactive_count; + stat->wire_count = vm_page_wire_count; + stat->zero_fill_count = host_vm_stat.zero_fill_count; + stat->reactivations = host_vm_stat.reactivations; + stat->pageins = host_vm_stat.pageins; + stat->pageouts = host_vm_stat.pageouts; + stat->faults = host_vm_stat.faults; + stat->cow_faults = host_vm_stat.cow_faults; + stat->lookups = host_vm_stat.lookups; + stat->hits = host_vm_stat.hits; + + if (*count >= HOST_VM_INFO_COUNT) { + /* info that was not in revision 0 of that interface */ + stat->purgeable_count = vm_page_purgeable_count; + stat->purges = vm_page_purged_count; + *count = HOST_VM_INFO_COUNT; + } else { + *count = HOST_VM_INFO_REV0_COUNT; + } + + return (KERN_SUCCESS); + } - case HOST_CPU_LOAD_INFO: { + case HOST_CPU_LOAD_INFO: + { + register processor_t processor; host_cpu_load_info_t cpu_load_info; - unsigned long ticks_value1, ticks_value2; - int i; - -#define GET_TICKS_VALUE(__cpu,__state) \ -MACRO_BEGIN \ - do { \ - ticks_value1 = *(volatile integer_t *) \ - (&machine_slot[(__cpu)].cpu_ticks[(__state)]); \ - ticks_value2 = *(volatile integer_t *) \ - (&machine_slot[(__cpu)].cpu_ticks[(__state)]); \ - } while (ticks_value1 != ticks_value2); \ - cpu_load_info->cpu_ticks[(__state)] += ticks_value1; \ -MACRO_END + unsigned long ticks_value1, ticks_value2; if (*count < HOST_CPU_LOAD_INFO_COUNT) - return KERN_FAILURE; - - cpu_load_info = (host_cpu_load_info_t) info; + return (KERN_FAILURE); + +#define GET_TICKS_VALUE(processor, state) \ +MACRO_BEGIN \ + do { \ + ticks_value1 = *(volatile integer_t *) \ + &PROCESSOR_DATA((processor), cpu_ticks[(state)]); \ + ticks_value2 = *(volatile integer_t *) \ + &PROCESSOR_DATA((processor), cpu_ticks[(state)]); \ + } while (ticks_value1 != ticks_value2); \ + \ + cpu_load_info->cpu_ticks[(state)] += ticks_value1; \ +MACRO_END + cpu_load_info = (host_cpu_load_info_t)info; cpu_load_info->cpu_ticks[CPU_STATE_USER] = 0; cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0; cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0; cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = 0; - for (i = 0; i < NCPUS; i++) { - if (!machine_slot[i].is_cpu || - !machine_slot[i].running) - continue; - GET_TICKS_VALUE(i, CPU_STATE_USER); - GET_TICKS_VALUE(i, CPU_STATE_NICE); - GET_TICKS_VALUE(i, CPU_STATE_SYSTEM); - GET_TICKS_VALUE(i, CPU_STATE_IDLE); + + processor = processor_list; + GET_TICKS_VALUE(processor, CPU_STATE_USER); + GET_TICKS_VALUE(processor, CPU_STATE_NICE); + GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM); + GET_TICKS_VALUE(processor, CPU_STATE_IDLE); + + if (processor_count > 1) { + simple_lock(&processor_list_lock); + + while ((processor = processor->processor_list) != NULL) { + GET_TICKS_VALUE(processor, CPU_STATE_USER); + GET_TICKS_VALUE(processor, CPU_STATE_NICE); + GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM); + GET_TICKS_VALUE(processor, CPU_STATE_IDLE); + } + + simple_unlock(&processor_list_lock); } *count = HOST_CPU_LOAD_INFO_COUNT; - return KERN_SUCCESS; - } + + return (KERN_SUCCESS); + } default: - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } } @@ -403,13 +435,13 @@ host_page_size( * Return kernel version string (more than you ever * wanted to know about what version of the kernel this is). */ +extern char version[]; kern_return_t host_kernel_version( host_t host, kernel_version_t out_version) { - extern char version[]; if (host == HOST_NULL) return(KERN_INVALID_ARGUMENT); @@ -430,7 +462,7 @@ host_processor_sets( processor_set_name_array_t *pset_list, mach_msg_type_number_t *count) { - vm_offset_t addr; + void *addr; if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_ARGUMENT; @@ -485,76 +517,75 @@ host_processor_set_priv( */ kern_return_t host_processor_info( - host_t host, - processor_flavor_t flavor, - natural_t *proc_count, - processor_info_array_t *proc_info, - mach_msg_type_number_t *proc_info_count) + host_t host, + processor_flavor_t flavor, + natural_t *out_pcount, + processor_info_array_t *out_array, + mach_msg_type_number_t *out_array_count) { - int i; - int num; - int count; - vm_size_t size; - vm_offset_t addr; - kern_return_t kr; - vm_map_copy_t copy; - processor_info_t proc_data; + kern_return_t result; + processor_t processor; + host_t thost; + processor_info_t info; + unsigned int icount, tcount; + unsigned int pcount, i; + vm_offset_t addr; + vm_size_t size; + vm_map_copy_t copy; if (host == HOST_NULL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); - kr = processor_info_count(flavor, &count); - if (kr != KERN_SUCCESS) { - return kr; - } - - for (num = i = 0; i < NCPUS; i++) - if (machine_slot[i].is_cpu) - num++; + result = processor_info_count(flavor, &icount); + if (result != KERN_SUCCESS) + return (result); - size = (vm_size_t)round_page_32(num * count * sizeof(natural_t)); + pcount = processor_count; + assert(pcount != 0); - kr = vm_allocate(ipc_kernel_map, &addr, size, TRUE); - if (kr != KERN_SUCCESS) - return KERN_RESOURCE_SHORTAGE; + size = round_page(pcount * icount * sizeof(natural_t)); + result = kmem_alloc(ipc_kernel_map, &addr, size); + if (result != KERN_SUCCESS) + return (KERN_RESOURCE_SHORTAGE); + + info = (processor_info_t) addr; + processor = processor_list; + tcount = icount; - kr = vm_map_wire(ipc_kernel_map, addr, addr + size, - VM_PROT_READ|VM_PROT_WRITE, FALSE); - if (kr != KERN_SUCCESS) { + result = processor_info(processor, flavor, &thost, info, &tcount); + if (result != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr, size); - return KERN_RESOURCE_SHORTAGE; + return (result); } - proc_data = (processor_info_t) addr; - for (i = 0; i < NCPUS; i++) { - int count2 = count; - host_t host2; - - if (machine_slot[i].is_cpu) { - kr = processor_info(cpu_to_processor(i), - flavor, - &host2, - proc_data, - &count2); - if (kr != KERN_SUCCESS) { + if (pcount > 1) { + for (i = 1; i < pcount; i++) { + simple_lock(&processor_list_lock); + processor = processor->processor_list; + simple_unlock(&processor_list_lock); + + info += icount; + tcount = icount; + result = processor_info(processor, flavor, &thost, info, &tcount); + if (result != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr, size); - return kr; + return (result); } - assert(count == count2); - proc_data += count; } } - kr = vm_map_unwire(ipc_kernel_map, addr, addr + size, FALSE); - assert(kr == KERN_SUCCESS); - size = (vm_size_t)(num * count * sizeof(natural_t)); - kr = vm_map_copyin(ipc_kernel_map, addr, size, TRUE, ©); - assert(kr == KERN_SUCCESS); + result = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), FALSE); + assert(result == KERN_SUCCESS); + result = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size, TRUE, ©); + assert(result == KERN_SUCCESS); - *proc_count = num; - *proc_info = (processor_info_array_t) copy; - *proc_info_count = num * count; - return(KERN_SUCCESS); + *out_pcount = pcount; + *out_array = (processor_info_array_t) copy; + *out_array_count = pcount * icount; + + return (KERN_SUCCESS); } /* @@ -615,14 +646,14 @@ host_set_special_port( kern_return_t host_get_special_port( host_priv_t host_priv, - int node, + __unused int node, int id, ipc_port_t *portp) { ipc_port_t port; if (host_priv == HOST_PRIV_NULL || - id == HOST_SECURITY_PORT ) + id == HOST_SECURITY_PORT || id > HOST_MAX_SPECIAL_PORT ) return KERN_INVALID_ARGUMENT; #if DIPC diff --git a/osfmk/kern/host.h b/osfmk/kern/host.h index 2af73c745..623884ba0 100644 --- a/osfmk/kern/host.h +++ b/osfmk/kern/host.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,12 +59,10 @@ #define _KERN_HOST_H_ #include +#include -#include +#ifdef MACH_KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE #include #include #include @@ -85,15 +83,18 @@ extern host_data_t realhost; #define host_lock(host) mutex_lock(&(host)->lock) #define host_unlock(host) mutex_unlock(&(host)->lock) -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ /* * Access routines for inside the kernel. */ -extern host_t host_self(void); -extern host_priv_t host_priv_self(void); -extern host_security_t host_security_self(void); + +__BEGIN_DECLS + +extern host_t host_self(void); +extern host_priv_t host_priv_self(void); +extern host_security_t host_security_self(void); + +__END_DECLS #endif /* _KERN_HOST_H_ */ diff --git a/osfmk/kern/host_notify.c b/osfmk/kern/host_notify.c index 5c6c7373e..9497b7863 100644 --- a/osfmk/kern/host_notify.c +++ b/osfmk/kern/host_notify.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -29,7 +29,9 @@ */ #include +#include +#include #include #include @@ -60,7 +62,7 @@ host_notify_init(void) for (i = 0; i <= HOST_NOTIFY_TYPE_MAX; i++) queue_init(&host_notify_queue[i]); - mutex_init(&host_notify_lock, ETAP_MISC_EVENT); + mutex_init(&host_notify_lock, 0); i = sizeof (struct host_notify_entry); host_notify_zone = @@ -95,7 +97,7 @@ host_request_notification( ip_unlock(port); mutex_unlock(&host_notify_lock); - zfree(host_notify_zone, (vm_offset_t)entry); + zfree(host_notify_zone, entry); return (KERN_FAILURE); } @@ -128,7 +130,7 @@ host_notify_port_destroy( assert(entry->port == port); remqueue(NULL, (queue_entry_t)entry); mutex_unlock(&host_notify_lock); - zfree(host_notify_zone, (vm_offset_t)entry); + zfree(host_notify_zone, entry); ipc_port_release_sonce(port); return; @@ -176,7 +178,7 @@ host_notify_all( ip_unlock(port); mutex_unlock(&host_notify_lock); - zfree(host_notify_zone, (vm_offset_t)entry); + zfree(host_notify_zone, entry); msg->msgh_remote_port = port; diff --git a/osfmk/kern/host_statistics.h b/osfmk/kern/host_statistics.h index 2d5da2207..2be83e115 100644 --- a/osfmk/kern/host_statistics.h +++ b/osfmk/kern/host_statistics.h @@ -33,16 +33,13 @@ #define _KERN_HOST_STATISTICS_H_ #include -#include -#include +#include -extern vm_statistics_data_t vm_stat[]; - -#define VM_STAT(event) \ -MACRO_BEGIN \ - mp_disable_preemption(); \ - vm_stat[cpu_number()].event; \ - mp_enable_preemption(); \ +#define VM_STAT(event) \ +MACRO_BEGIN \ + disable_preemption(); \ + PROCESSOR_DATA(current_processor(), vm_stat).event; \ + enable_preemption(); \ MACRO_END #endif /* _KERN_HOST_STATISTICS_H_ */ diff --git a/osfmk/kern/ipc_host.c b/osfmk/kern/ipc_host.c index 78c01a543..0a36ae65c 100644 --- a/osfmk/kern/ipc_host.c +++ b/osfmk/kern/ipc_host.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,8 +57,8 @@ */ #include #include -#include #include +#include #include #include #include @@ -96,7 +96,7 @@ void ipc_host_init(void) ipc_port_t port; int i; - mutex_init(&realhost.lock, ETAP_MISC_MASTER); + mutex_init(&realhost.lock, 0); /* * Allocate and set up the two host ports. @@ -156,12 +156,15 @@ void ipc_host_init(void) */ mach_port_name_t -host_self_trap(void) +host_self_trap( + __unused struct host_self_trap_args *args) { ipc_port_t sright; + mach_port_name_t name; sright = ipc_port_copy_send(current_task()->itk_host); - return ipc_port_copyout_send(sright, current_space()); + name = ipc_port_copyout_send(sright, current_space()); + return name; } /* @@ -723,9 +726,7 @@ host_get_exception_ports( exception_behavior_array_t behaviors, thread_state_flavor_array_t flavors ) { - register int i, - j, - count; + unsigned int i, j, count; if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_ARGUMENT; @@ -787,7 +788,7 @@ host_swap_exception_ports( exception_behavior_array_t behaviors, thread_state_flavor_array_t flavors ) { - register int i, + unsigned int i, j, count; ipc_port_t old_port[EXC_TYPES_COUNT]; diff --git a/osfmk/kern/ipc_kobject.c b/osfmk/kern/ipc_kobject.c index 2a0abc1f2..62230a676 100644 --- a/osfmk/kern/ipc_kobject.c +++ b/osfmk/kern/ipc_kobject.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -70,8 +70,42 @@ #include #include #include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if MACH_MACHINE_ROUTINES +#include +#endif /* MACH_MACHINE_ROUTINES */ +#if XK_PROXY +#include +#endif /* XK_PROXY */ -#include +#include #include #include #include @@ -81,6 +115,8 @@ #include #include +#include +#include /* * Routine: ipc_kobject_notify @@ -92,8 +128,6 @@ ipc_kobject_notify( mach_msg_header_t *request_header, mach_msg_header_t *reply_header); -#include - typedef struct { mach_msg_id_t num; mig_routine_t routine; @@ -115,72 +149,46 @@ int mig_table_max_displ; mach_msg_size_t mig_reply_size; -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if MACH_MACHINE_ROUTINES -#include -#endif /* MACH_MACHINE_ROUTINES */ -#if XK_PROXY -#include -#endif /* XK_PROXY */ -mig_subsystem_t mig_e[] = { - (mig_subsystem_t)&mach_port_subsystem, - (mig_subsystem_t)&mach_host_subsystem, - (mig_subsystem_t)&host_priv_subsystem, - (mig_subsystem_t)&host_security_subsystem, - (mig_subsystem_t)&clock_subsystem, - (mig_subsystem_t)&clock_priv_subsystem, - (mig_subsystem_t)&processor_subsystem, - (mig_subsystem_t)&processor_set_subsystem, - (mig_subsystem_t)&is_iokit_subsystem, - (mig_subsystem_t)&memory_object_name_subsystem, - (mig_subsystem_t)&lock_set_subsystem, - (mig_subsystem_t)&ledger_subsystem, - (mig_subsystem_t)&semaphore_subsystem, - (mig_subsystem_t)&task_subsystem, - (mig_subsystem_t)&thread_act_subsystem, - (mig_subsystem_t)&vm_map_subsystem, - (mig_subsystem_t)&UNDReply_subsystem, +const struct mig_subsystem *mig_e[] = { + (const struct mig_subsystem *)&mach_vm_subsystem, + (const struct mig_subsystem *)&mach_port_subsystem, + (const struct mig_subsystem *)&mach_host_subsystem, + (const struct mig_subsystem *)&host_priv_subsystem, + (const struct mig_subsystem *)&host_security_subsystem, + (const struct mig_subsystem *)&clock_subsystem, + (const struct mig_subsystem *)&clock_priv_subsystem, + (const struct mig_subsystem *)&processor_subsystem, + (const struct mig_subsystem *)&processor_set_subsystem, + (const struct mig_subsystem *)&is_iokit_subsystem, + (const struct mig_subsystem *)&memory_object_name_subsystem, + (const struct mig_subsystem *)&lock_set_subsystem, + (const struct mig_subsystem *)&ledger_subsystem, + (const struct mig_subsystem *)&semaphore_subsystem, + (const struct mig_subsystem *)&task_subsystem, + (const struct mig_subsystem *)&thread_act_subsystem, + (const struct mig_subsystem *)&vm_map_subsystem, + (const struct mig_subsystem *)&UNDReply_subsystem, + (const struct mig_subsystem *)&default_pager_object_subsystem, #if XK_PROXY - (mig_subsystem_t)&do_uproxy_xk_uproxy_subsystem, + (const struct mig_subsystem *)&do_uproxy_xk_uproxy_subsystem, #endif /* XK_PROXY */ #if MACH_MACHINE_ROUTINES - (mig_subsystem_t)&MACHINE_SUBSYSTEM, + (const struct mig_subsystem *)&MACHINE_SUBSYSTEM, #endif /* MACH_MACHINE_ROUTINES */ #if MCMSG && iPSC860 - (mig_subsystem_t)&mcmsg_info_subsystem, + (const struct mig_subsystem *)&mcmsg_info_subsystem, #endif /* MCMSG && iPSC860 */ }; void mig_init(void) { - register unsigned int i, n = sizeof(mig_e)/sizeof(mig_subsystem_t); - register unsigned int howmany; - register mach_msg_id_t j, pos, nentry, range; + unsigned int i, n = sizeof(mig_e)/sizeof(const struct mig_subsystem *); + int howmany; + mach_msg_id_t j, pos, nentry, range; for (i = 0; i < n; i++) { range = mig_e[i]->end - mig_e[i]->start; @@ -235,24 +243,15 @@ ipc_kobject_server( mach_msg_size_t reply_size; ipc_kmsg_t reply; kern_return_t kr; - mig_routine_t routine; ipc_port_t *destp; mach_msg_format_0_trailer_t *trailer; register mig_hash_t *ptr; - unsigned int th; - - /* Only fetch current thread if ETAP is configured */ - ETAP_DATA_LOAD(th, current_thread()); - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_BEGIN, - ((thread_t) th), - &request->ikm_header.msgh_id, - sizeof(int)); + /* - * Find out corresponding mig_hash entry if any - */ + * Find out corresponding mig_hash entry if any + */ { - register int key = request->ikm_header.msgh_id; + register int key = request->ikm_header->msgh_id; register int i = MIG_HASH(key); register int max_iter = mig_table_max_displ; @@ -285,8 +284,8 @@ ipc_kobject_server( * Initialize reply message. */ { -#define InP ((mach_msg_header_t *) &request->ikm_header) -#define OutP ((mig_reply_error_t *) &reply->ikm_header) +#define InP ((mach_msg_header_t *) request->ikm_header) +#define OutP ((mig_reply_error_t *) reply->ikm_header) OutP->NDR = NDR_record; OutP->Head.msgh_size = sizeof(mig_reply_error_t); @@ -307,18 +306,18 @@ ipc_kobject_server( */ { if (ptr) { - (*ptr->routine)(&request->ikm_header, &reply->ikm_header); + (*ptr->routine)(request->ikm_header, reply->ikm_header); kernel_task->messages_received++; } else { - if (!ipc_kobject_notify(&request->ikm_header, &reply->ikm_header)){ + if (!ipc_kobject_notify(request->ikm_header, reply->ikm_header)){ #if MACH_IPC_TEST printf("ipc_kobject_server: bogus kernel message, id=%d\n", - request->ikm_header.msgh_id); + request->ikm_header->msgh_id); #endif /* MACH_IPC_TEST */ - _MIG_MSGID_INVALID(request->ikm_header.msgh_id); + _MIG_MSGID_INVALID(request->ikm_header->msgh_id); - ((mig_reply_error_t *) &reply->ikm_header)->RetCode + ((mig_reply_error_t *) reply->ikm_header)->RetCode = MIG_BAD_ID; } else @@ -338,8 +337,8 @@ ipc_kobject_server( * We set msgh_remote_port to IP_NULL so that the kmsg * destroy routines don't try to destroy the port twice. */ - destp = (ipc_port_t *) &request->ikm_header.msgh_remote_port; - switch (MACH_MSGH_BITS_REMOTE(request->ikm_header.msgh_bits)) { + destp = (ipc_port_t *) &request->ikm_header->msgh_remote_port; + switch (MACH_MSGH_BITS_REMOTE(request->ikm_header->msgh_bits)) { case MACH_MSG_TYPE_PORT_SEND: ipc_port_release_send(*destp); break; @@ -353,9 +352,9 @@ ipc_kobject_server( } *destp = IP_NULL; - if (!(reply->ikm_header.msgh_bits & MACH_MSGH_BITS_COMPLEX) && - ((mig_reply_error_t *) &reply->ikm_header)->RetCode != KERN_SUCCESS) - kr = ((mig_reply_error_t *) &reply->ikm_header)->RetCode; + if (!(reply->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) && + ((mig_reply_error_t *) reply->ikm_header)->RetCode != KERN_SUCCESS) + kr = ((mig_reply_error_t *) reply->ikm_header)->RetCode; else kr = KERN_SUCCESS; @@ -375,7 +374,7 @@ ipc_kobject_server( * Destroy everthing except the reply port right, * which is needed in the reply message. */ - request->ikm_header.msgh_local_port = MACH_PORT_NULL; + request->ikm_header->msgh_local_port = MACH_PORT_NULL; ipc_kmsg_destroy(request); } @@ -387,14 +386,8 @@ ipc_kobject_server( ipc_kmsg_free(reply); - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_END, - ((thread_t) th), - &request->ikm_header.msgh_id, - sizeof(int)); - return IKM_NULL; - } else if (!IP_VALID((ipc_port_t)reply->ikm_header.msgh_remote_port)) { + } else if (!IP_VALID((ipc_port_t)reply->ikm_header->msgh_remote_port)) { /* * Can't queue the reply message if the destination * (the reply port) isn't valid. @@ -402,27 +395,16 @@ ipc_kobject_server( ipc_kmsg_destroy(reply); - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_END, - ((thread_t) th), - &request->ikm_header.msgh_id, - sizeof(int)); - return IKM_NULL; } trailer = (mach_msg_format_0_trailer_t *) - ((vm_offset_t)&reply->ikm_header + (int)reply->ikm_header.msgh_size); + ((vm_offset_t)reply->ikm_header + (int)reply->ikm_header->msgh_size); + trailer->msgh_sender = KERNEL_SECURITY_TOKEN; trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; - ETAP_PROBE_DATA(ETAP_P_SYSCALL_MACH, - EVENT_END, - ((thread_t) th), - &request->ikm_header.msgh_id, - sizeof(int)); - return reply; } @@ -500,16 +482,12 @@ ipc_kobject_destroy( } -extern int vnode_pager_workaround; - boolean_t ipc_kobject_notify( mach_msg_header_t *request_header, mach_msg_header_t *reply_header) { ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port; - mig_subsystem_t paging_subsystem_object; - mach_port_seqno_t seqno; ((mig_reply_error_t *) reply_header)->RetCode = MIG_NO_REPLY; switch (request_header->msgh_id) { @@ -528,7 +506,7 @@ ipc_kobject_notify( } if (ip_kotype(port) == IKOT_UPL) { upl_no_senders( - (ipc_port_t)request_header->msgh_remote_port, + request_header->msgh_remote_port, (mach_port_mscount_t) ((mach_no_senders_notification_t *) request_header)->not_count); @@ -555,8 +533,6 @@ ipc_kobject_notify( case IKOT_IOKIT_CONNECT: case IKOT_IOKIT_SPARE: { - extern boolean_t iokit_notify( mach_msg_header_t *msg); - return iokit_notify(request_header); } #endif @@ -593,7 +569,7 @@ kobjserver_stats_clear(void) void kobjserver_stats(void) { - register unsigned int i, n = sizeof(mig_e)/sizeof(mig_subsystem_t); + register unsigned int i, n = sizeof(mig_e)/sizeof(struct mig_subsystem); register unsigned int howmany; register mach_msg_id_t j, pos, nentry, range; diff --git a/osfmk/kern/ipc_kobject.h b/osfmk/kern/ipc_kobject.h index eb714a277..9c690bb4f 100644 --- a/osfmk/kern/ipc_kobject.h +++ b/osfmk/kern/ipc_kobject.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -95,7 +95,7 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_HOST_SECURITY 17 #define IKOT_LEDGER 18 #define IKOT_MASTER_DEVICE 19 -#define IKOT_ACT 20 +/* (unused) 20 */ #define IKOT_SUBSYSTEM 21 #define IKOT_IO_DONE_QUEUE 22 #define IKOT_SEMAPHORE 23 @@ -120,9 +120,6 @@ typedef natural_t ipc_kobject_type_t; * of entry lists for copyin of out of line memory. */ -/* Initialize kernel server dispatch table */ -extern void mig_init(void); - /* Dispatch a kernel server function */ extern ipc_kmsg_t ipc_kobject_server( ipc_kmsg_t request); diff --git a/osfmk/kern/ipc_mig.c b/osfmk/kern/ipc_mig.c index 903215823..c031c089d 100644 --- a/osfmk/kern/ipc_mig.c +++ b/osfmk/kern/ipc_mig.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,13 +57,14 @@ #include #include -#include #include #include +#include #include #include #include #include + #include #include #include @@ -155,8 +156,8 @@ mach_msg_rpc_from_kernel( } /* insert send-once right for the reply port */ - kmsg->ikm_header.msgh_local_port = reply; - kmsg->ikm_header.msgh_bits |= + kmsg->ikm_header->msgh_local_port = reply; + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS(0, MACH_MSG_TYPE_MAKE_SEND_ONCE); ipc_port_reference(reply); @@ -174,7 +175,7 @@ mach_msg_rpc_from_kernel( ipc_port_release(reply); return MACH_RCV_PORT_DIED; } - if (!self->top_act || !self->top_act->active) { + if (!self->active) { ip_unlock(reply); ipc_port_release(reply); return MACH_RCV_INTERRUPTED; @@ -203,7 +204,7 @@ mach_msg_rpc_from_kernel( assert(mr == MACH_RCV_INTERRUPTED); - if (self->top_act && self->top_act->handlers) { + if (self->handlers) { ipc_port_release(reply); return(mr); } @@ -221,9 +222,9 @@ mach_msg_rpc_from_kernel( } *****/ - if (rcv_size < kmsg->ikm_header.msgh_size) { + if (rcv_size < kmsg->ikm_header->msgh_size) { ipc_kmsg_copyout_dest(kmsg, ipc_space_reply); - ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header->msgh_size); return MACH_RCV_TOO_LARGE; } @@ -234,20 +235,15 @@ mach_msg_rpc_from_kernel( */ ipc_kmsg_copyout_to_kernel(kmsg, ipc_space_reply); - ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header->msgh_size); return MACH_MSG_SUCCESS; } -/************** These Calls are set up for kernel-loaded tasks **************/ -/************** Apple does not plan on supporting that. These **************/ -/************** need to be reworked to deal with the kernel **************/ -/************** proper to eliminate the kernel specific code MIG **************/ -/************** must generate. **************/ - +/************** These Calls are set up for kernel-loaded tasks/threads **************/ /* - * Routine: mach_msg + * Routine: mach_msg_overwrite * Purpose: * Like mach_msg_overwrite_trap except that message buffers * live in kernel space. Doesn't handle any options. @@ -262,15 +258,15 @@ mach_msg_rpc_from_kernel( mach_msg_return_t mach_msg_overwrite( - mach_msg_header_t *msg, - mach_msg_option_t option, + mach_msg_header_t *msg, + mach_msg_option_t option, mach_msg_size_t send_size, mach_msg_size_t rcv_size, - mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, - mach_port_name_t notify, - mach_msg_header_t *rcv_msg, - mach_msg_size_t rcv_msg_size) + mach_port_name_t rcv_name, + __unused mach_msg_timeout_t msg_timeout, + __unused mach_port_name_t notify, + __unused mach_msg_header_t *rcv_msg, + __unused mach_msg_size_t rcv_msg_size) { ipc_space_t space = current_space(); vm_map_t map = current_map(); @@ -280,10 +276,35 @@ mach_msg_overwrite( mach_msg_format_0_trailer_t *trailer; if (option & MACH_SEND_MSG) { - mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); - if (mr != MACH_MSG_SUCCESS) - panic("mach_msg"); + mach_msg_size_t msg_and_trailer_size; + mach_msg_max_trailer_t *max_trailer; + + if ((send_size < sizeof(mach_msg_header_t)) || (send_size & 3)) + return MACH_SEND_MSG_TOO_SMALL; + + msg_and_trailer_size = send_size + MAX_TRAILER_SIZE; + kmsg = ipc_kmsg_alloc(msg_and_trailer_size); + + if (kmsg == IKM_NULL) + return MACH_SEND_NO_BUFFER; + + (void) memcpy((void *) kmsg->ikm_header, (const void *) msg, send_size); + + kmsg->ikm_header->msgh_size = send_size; + + /* + * Reserve for the trailer the largest space (MAX_TRAILER_SIZE) + * However, the internal size field of the trailer (msgh_trailer_size) + * is initialized to the minimum (sizeof(mach_msg_trailer_t)), to optimize + * the cases where no implicit data is requested. + */ + max_trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + send_size); + max_trailer->msgh_sender = current_thread()->task->sec_token; + max_trailer->msgh_audit = current_thread()->task->audit_token; + max_trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; + max_trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; + mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL); if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_free(kmsg); @@ -327,15 +348,16 @@ mach_msg_overwrite( return mr; trailer = (mach_msg_format_0_trailer_t *) - ((vm_offset_t)&kmsg->ikm_header + kmsg->ikm_header.msgh_size); + ((vm_offset_t)kmsg->ikm_header + kmsg->ikm_header->msgh_size); if (option & MACH_RCV_TRAILER_MASK) { trailer->msgh_seqno = seqno; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); } - if (rcv_size < (kmsg->ikm_header.msgh_size + trailer->msgh_trailer_size)) { + if (rcv_size < (kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size)) { ipc_kmsg_copyout_dest(kmsg, space); - ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, sizeof *msg); + ipc_kmsg_free(kmsg); return MACH_RCV_TOO_LARGE; } @@ -344,17 +366,19 @@ mach_msg_overwrite( if (mr != MACH_MSG_SUCCESS) { if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { ipc_kmsg_put_to_kernel(msg, kmsg, - kmsg->ikm_header.msgh_size + trailer->msgh_trailer_size); + kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size); } else { ipc_kmsg_copyout_dest(kmsg, space); - ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, sizeof *msg); + ipc_kmsg_free(kmsg); } return mr; } - ipc_kmsg_put_to_kernel(msg, kmsg, - kmsg->ikm_header.msgh_size + trailer->msgh_trailer_size); + (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, + kmsg->ikm_header->msgh_size + trailer->msgh_trailer_size); + ipc_kmsg_free(kmsg); } return MACH_MSG_SUCCESS; @@ -364,39 +388,23 @@ mach_msg_overwrite( * Routine: mig_get_reply_port * Purpose: * Called by client side interfaces living in the kernel - * to get a reply port. This port is used for - * mach_msg() calls which are kernel calls. + * to get a reply port. */ mach_port_t mig_get_reply_port(void) { - thread_t self = current_thread(); - - assert(self->ith_mig_reply == (mach_port_t)0); - - /* - * JMM - for now we have no real clients of this under the kernel - * loaded server model because we only have one of those. In order - * to avoid MIG changes, we just return null here - and return] - * references to ipc_port_t's instead of names. - * - * if (self->ith_mig_reply == MACH_PORT_NULL) - * self->ith_mig_reply = mach_reply_port(); - */ - return self->ith_mig_reply; + return (MACH_PORT_NULL); } /* * Routine: mig_dealloc_reply_port * Purpose: * Called by client side interfaces to get rid of a reply port. - * Shouldn't ever be called inside the kernel, because - * kernel calls shouldn't prompt Mig to call it. */ void mig_dealloc_reply_port( - mach_port_t reply_port) + __unused mach_port_t reply_port) { panic("mig_dealloc_reply_port"); } @@ -409,7 +417,7 @@ mig_dealloc_reply_port( */ void mig_put_reply_port( - mach_port_t reply_port) + __unused mach_port_t reply_port) { } @@ -460,7 +468,7 @@ mig_user_deallocate( char *data, vm_size_t size) { - kfree((vm_offset_t)data, size); + kfree(data, size); } /* @@ -474,9 +482,11 @@ mig_object_init( mig_object_t mig_object, const IMIGObject *interface) { - assert(mig_object != MIG_OBJECT_NULL); - mig_object->pVtbl = (IMIGObjectVtbl *)interface; + if (mig_object == MIG_OBJECT_NULL) + return KERN_INVALID_ARGUMENT; + mig_object->pVtbl = (const IMIGObjectVtbl *)interface; mig_object->port = MACH_PORT_NULL; + return KERN_SUCCESS; } /* @@ -492,7 +502,7 @@ mig_object_init( */ void mig_object_destroy( - mig_object_t mig_object) + __assert_only mig_object_t mig_object) { assert(mig_object->port == MACH_PORT_NULL); return; diff --git a/osfmk/kern/ipc_mig.h b/osfmk/kern/ipc_mig.h index e962d46f1..e25e071ad 100644 --- a/osfmk/kern/ipc_mig.h +++ b/osfmk/kern/ipc_mig.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,11 +23,18 @@ * @OSF_COPYRIGHT@ */ -#ifndef _IPC_MIG_H_ -#define _IPC_MIG_H_ +#ifndef _KERN_IPC_MIG_H_ +#define _KERN_IPC_MIG_H_ #include +#include #include +#include + +#include + +#ifdef XNU_KERNEL_PRIVATE + #include /* @@ -113,6 +120,10 @@ (unsigned int)(0), \ (unsigned int)(0)) +#endif /* XNU_KERNEL_PRIVATE */ + +__BEGIN_DECLS + /* Send a message from the kernel */ extern mach_msg_return_t mach_msg_send_from_kernel( mach_msg_header_t *msg, @@ -124,11 +135,15 @@ extern mach_msg_return_t mach_msg_rpc_from_kernel( mach_msg_size_t send_size, mach_msg_size_t rcv_size); +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE + extern void mach_msg_receive_continue(void); -#include +/* Initialize kernel server dispatch table */ +extern void mig_init(void); -#ifdef __APPLE_API_EVOLVING /* * Kernel implementation of the MIG object base class * @@ -138,7 +153,7 @@ extern void mach_msg_receive_continue(void); */ typedef struct mig_object { - IMIGObjectVtbl *pVtbl; /* our interface def */ + const IMIGObjectVtbl *pVtbl; /* our interface def */ mach_port_t port; /* our port pointer */ } mig_object_data_t; @@ -150,10 +165,34 @@ typedef struct mig_object { * chain and deliver the appropriate notification. */ typedef struct mig_notify_object { - IMIGNotifyObjectVtbl *pVtbl; /* our interface def */ + const IMIGNotifyObjectVtbl *pVtbl; /* our interface def */ mach_port_t port; /* our port pointer */ } mig_notify_object_data_t; -#endif /* __APPLE_API_EVOLVING */ +extern kern_return_t mig_object_init( + mig_object_t mig_object, + const IMIGObject *interface); + +extern void mig_object_destroy( + mig_object_t mig_object); + +extern void mig_object_reference( + mig_object_t mig_object); + +extern void mig_object_deallocate( + mig_object_t mig_object); + +extern ipc_port_t convert_mig_object_to_port( + mig_object_t mig_object); + +extern mig_object_t convert_port_to_mig_object( + ipc_port_t port, + const MIGIID *iid); + +boolean_t mig_object_no_senders( + ipc_port_t port, + mach_port_mscount_t mscount); + +#endif /* MACH_KERNEL_PRIVATE */ -#endif /* _IPC_MIG_H_ */ +#endif /* _KERN_IPC_MIG_H_ */ diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index 04071f778..3af0f19c5 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,23 +58,36 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include #include #include +#include #include + +#include #include +#include #include -#include +#include +#include #include + +#include #include +#include +#include + +/* forward declarations */ +task_t convert_port_to_locked_task(ipc_port_t port); + /* * Routine: ipc_task_init @@ -237,7 +250,8 @@ ipc_task_terminate( if (IP_VALID(task->exc_actions[i].port)) { ipc_port_release_send(task->exc_actions[i].port); } - }/* for */ + } + if (IP_VALID(task->itk_host)) ipc_port_release_send(task->itk_host); @@ -335,94 +349,72 @@ void ipc_thread_init( thread_t thread) { + ipc_port_t kport; + int i; + + kport = ipc_port_alloc_kernel(); + if (kport == IP_NULL) + panic("ipc_thread_init"); + + thread->ith_self = kport; + thread->ith_sself = ipc_port_make_send(kport); + + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) + thread->exc_actions[i].port = IP_NULL; + + ipc_kobject_set(kport, (ipc_kobject_t)thread, IKOT_THREAD); + ipc_kmsg_queue_init(&thread->ith_messages); - thread->ith_mig_reply = MACH_PORT_NULL; + thread->ith_rpc_reply = IP_NULL; } -/* - * Routine: ipc_thread_terminate - * Purpose: - * Clean up and destroy a thread's IPC state. - * Conditions: - * Nothing locked. The thread must be suspended. - * (Or be the current thread.) - */ - void -ipc_thread_terminate( +ipc_thread_disable( thread_t thread) { - assert(ipc_kmsg_queue_empty(&thread->ith_messages)); + ipc_port_t kport = thread->ith_self; - if (thread->ith_rpc_reply != IP_NULL) - ipc_port_dealloc_reply(thread->ith_rpc_reply); - thread->ith_rpc_reply = IP_NULL; + if (kport != IP_NULL) + ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); } /* - * Routine: ipc_thr_act_init + * Routine: ipc_thread_terminate * Purpose: - * Initialize an thr_act's IPC state. + * Clean up and destroy a thread's IPC state. * Conditions: * Nothing locked. */ void -ipc_thr_act_init(task_t task, thread_act_t thr_act) -{ - ipc_port_t kport; int i; - - kport = ipc_port_alloc_kernel(); - if (kport == IP_NULL) - panic("ipc_thr_act_init"); - - thr_act->ith_self = kport; - thr_act->ith_sself = ipc_port_make_send(kport); - - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) - thr_act->exc_actions[i].port = IP_NULL; - - ipc_kobject_set(kport, (ipc_kobject_t) thr_act, IKOT_ACT); -} - -void -ipc_thr_act_disable(thread_act_t thr_act) +ipc_thread_terminate( + thread_t thread) { - int i; - ipc_port_t kport; + ipc_port_t kport = thread->ith_self; - kport = thr_act->ith_self; + if (kport != IP_NULL) { + int i; - if (kport != IP_NULL) - ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); -} + if (IP_VALID(thread->ith_sself)) + ipc_port_release_send(thread->ith_sself); -void -ipc_thr_act_terminate(thread_act_t thr_act) -{ - ipc_port_t kport; int i; + thread->ith_sself = thread->ith_self = IP_NULL; - kport = thr_act->ith_self; + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { + if (IP_VALID(thread->exc_actions[i].port)) + ipc_port_release_send(thread->exc_actions[i].port); + } - if (kport == IP_NULL) { - /* the thread is already terminated (can this happen?) */ - return; + ipc_port_dealloc_kernel(kport); } - thr_act->ith_self = IP_NULL; - - /* release the naked send rights */ + assert(ipc_kmsg_queue_empty(&thread->ith_messages)); - if (IP_VALID(thr_act->ith_sself)) - ipc_port_release_send(thr_act->ith_sself); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { - if (IP_VALID(thr_act->exc_actions[i].port)) - ipc_port_release_send(thr_act->exc_actions[i].port); - } + if (thread->ith_rpc_reply != IP_NULL) + ipc_port_dealloc_reply(thread->ith_rpc_reply); - /* destroy the kernel port */ - ipc_port_dealloc_kernel(kport); + thread->ith_rpc_reply = IP_NULL; } /* @@ -464,27 +456,30 @@ retrieve_task_self_fast( } /* - * Routine: retrieve_act_self_fast + * Routine: retrieve_thread_self_fast * Purpose: - * Optimized version of retrieve_thread_self, - * that only works for the current thread. - * * Return a send right (possibly null/dead) * for the thread's user-visible self port. + * + * Only works for the current thread. + * * Conditions: * Nothing locked. */ ipc_port_t -retrieve_act_self_fast(thread_act_t thr_act) +retrieve_thread_self_fast( + thread_t thread) { register ipc_port_t port; - assert(thr_act == current_act()); - act_lock(thr_act); - assert(thr_act->ith_self != IP_NULL); + assert(thread == current_thread()); + + thread_mtx_lock(thread); - if ((port = thr_act->ith_sself) == thr_act->ith_self) { + assert(thread->ith_self != IP_NULL); + + if ((port = thread->ith_sself) == thread->ith_self) { /* no interposing */ ip_lock(port); @@ -492,9 +487,11 @@ retrieve_act_self_fast(thread_act_t thr_act) ip_reference(port); port->ip_srights++; ip_unlock(port); - } else + } + else port = ipc_port_copy_send(port); - act_unlock(thr_act); + + thread_mtx_unlock(thread); return port; } @@ -511,13 +508,16 @@ retrieve_act_self_fast(thread_act_t thr_act) */ mach_port_name_t -task_self_trap(void) +task_self_trap( + __unused struct task_self_trap_args *args) { task_t task = current_task(); ipc_port_t sright; + mach_port_name_t name; sright = retrieve_task_self_fast(task); - return ipc_port_copyout_send(sright, task->itk_space); + name = ipc_port_copyout_send(sright, task->itk_space); + return name; } /* @@ -532,14 +532,18 @@ task_self_trap(void) */ mach_port_name_t -thread_self_trap(void) +thread_self_trap( + __unused struct thread_self_trap_args *args) { - thread_act_t thr_act = current_act(); - task_t task = thr_act->task; + thread_t thread = current_thread(); + task_t task = thread->task; ipc_port_t sright; + mach_port_name_t name; + + sright = retrieve_thread_self_fast(thread); + name = ipc_port_copyout_send(sright, task->itk_space); + return name; - sright = retrieve_act_self_fast(thr_act); - return ipc_port_copyout_send(sright, task->itk_space); } /* @@ -554,7 +558,8 @@ thread_self_trap(void) */ mach_port_name_t -mach_reply_port(void) +mach_reply_port( + __unused struct mach_reply_port_args *args) { ipc_port_t port; mach_port_name_t name; @@ -565,10 +570,111 @@ mach_reply_port(void) ip_unlock(port); else name = MACH_PORT_NULL; - return name; } +/* + * Routine: thread_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the thread's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_get_special_port( + thread_t thread, + int which, + ipc_port_t *portp) +{ + kern_return_t result = KERN_SUCCESS; + ipc_port_t *whichp; + + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); + + switch (which) { + + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + default: + return (KERN_INVALID_ARGUMENT); + } + + thread_mtx_lock(thread); + + if (thread->active) + *portp = ipc_port_copy_send(*whichp); + else + result = KERN_FAILURE; + + thread_mtx_unlock(thread); + + return (result); +} + +/* + * Routine: thread_set_special_port [kernel call] + * Purpose: + * Changes one of the thread's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_set_special_port( + thread_t thread, + int which, + ipc_port_t port) +{ + kern_return_t result = KERN_SUCCESS; + ipc_port_t *whichp, old = IP_NULL; + + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); + + switch (which) { + + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + default: + return (KERN_INVALID_ARGUMENT); + } + + thread_mtx_lock(thread); + + if (thread->active) { + old = *whichp; + *whichp = port; + } + else + result = KERN_FAILURE; + + thread_mtx_unlock(thread); + + if (IP_VALID(old)) + ipc_port_release_send(old); + + return (result); +} + /* * Routine: task_get_special_port [kernel call] * Purpose: @@ -727,7 +833,7 @@ mach_ports_register( mach_msg_type_number_t portsCnt) { ipc_port_t ports[TASK_PORT_REGISTER_MAX]; - int i; + unsigned int i; if ((task == TASK_NULL) || (portsCnt > TASK_PORT_REGISTER_MAX)) @@ -773,7 +879,7 @@ mach_ports_register( */ if (portsCnt != 0) - kfree((vm_offset_t) memory, + kfree(memory, (vm_size_t) (portsCnt * sizeof(mach_port_t))); return KERN_SUCCESS; @@ -799,13 +905,11 @@ mach_ports_lookup( mach_port_array_t *portsp, mach_msg_type_number_t *portsCnt) { - vm_offset_t memory; + void *memory; vm_size_t size; ipc_port_t *ports; int i; - kern_return_t kr; - if (task == TASK_NULL) return KERN_INVALID_ARGUMENT; @@ -889,16 +993,25 @@ convert_port_to_locked_task(ipc_port_t port) */ task_t convert_port_to_task( - ipc_port_t port) + ipc_port_t port) { - task_t task; + task_t task = TASK_NULL; - task = convert_port_to_locked_task(port); - if (task) { - task->ref_count++; - task_unlock(task); + if (IP_VALID(port)) { + ip_lock(port); + + if ( ip_active(port) && + ip_kotype(port) == IKOT_TASK ) { + task = (task_t)port->ip_kobject; + assert(task != TASK_NULL); + + task_reference_internal(task); + } + + ip_unlock(port); } - return task; + + return (task); } /* @@ -933,166 +1046,6 @@ convert_port_to_space( return (space); } -upl_t -convert_port_to_upl( - ipc_port_t port) -{ - upl_t upl; - - ip_lock(port); - if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) { - ip_unlock(port); - return (upl_t)NULL; - } - upl = (upl_t) port->ip_kobject; - ip_unlock(port); - upl_lock(upl); - upl->ref_count+=1; - upl_unlock(upl); - return upl; -} - -mach_port_t -convert_upl_to_port( - upl_t upl) -{ - return MACH_PORT_NULL; -} - -__private_extern__ void -upl_no_senders( - upl_t upl, - mach_port_mscount_t mscount) -{ - return; -} - -/* - * Routine: convert_port_entry_to_map - * Purpose: - * Convert from a port specifying an entry or a task - * to a map. Doesn't consume the port ref; produces a map ref, - * which may be null. Unlike convert_port_to_map, the - * port may be task or a named entry backed. - * Conditions: - * Nothing locked. - */ - - -vm_map_t -convert_port_entry_to_map( - ipc_port_t port) -{ - task_t task; - vm_map_t map; - vm_named_entry_t named_entry; - - if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { - while(TRUE) { - ip_lock(port); - if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { - named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { - ip_unlock(port); - mutex_pause(); - continue; - } - named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); - ip_unlock(port); - if ((named_entry->is_sub_map) && - (named_entry->protection - & VM_PROT_WRITE)) { - map = named_entry->backing.map; - } else { - mach_destroy_memory_entry(port); - return VM_MAP_NULL; - } - vm_map_reference_swap(map); - mach_destroy_memory_entry(port); - break; - } - else - return VM_MAP_NULL; - } - } else { - task_t task; - - task = convert_port_to_locked_task(port); - - if (task == TASK_NULL) - return VM_MAP_NULL; - - if (!task->active) { - task_unlock(task); - return VM_MAP_NULL; - } - - map = task->map; - vm_map_reference_swap(map); - task_unlock(task); - } - - return map; -} - -/* - * Routine: convert_port_entry_to_object - * Purpose: - * Convert from a port specifying a named entry to an - * object. Doesn't consume the port ref; produces a map ref, - * which may be null. - * Conditions: - * Nothing locked. - */ - - -vm_object_t -convert_port_entry_to_object( - ipc_port_t port) -{ - vm_object_t object; - vm_named_entry_t named_entry; - - if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { - while(TRUE) { - ip_lock(port); - if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { - named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { - ip_unlock(port); - mutex_pause(); - continue; - } - named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); - ip_unlock(port); - if ((!named_entry->is_sub_map) && - (named_entry->protection - & VM_PROT_WRITE)) { - object = named_entry->object; - } else { - mach_destroy_memory_entry(port); - return (vm_object_t)NULL; - } - vm_object_reference(named_entry->object); - mach_destroy_memory_entry(port); - break; - } - else - return (vm_object_t)NULL; - } - } else { - return (vm_object_t)NULL; - } - - return object; -} - /* * Routine: convert_port_to_map * Purpose: @@ -1128,87 +1081,66 @@ convert_port_to_map( /* - * Routine: convert_port_to_act + * Routine: convert_port_to_thread * Purpose: - * Convert from a port to a thr_act. - * Doesn't consume the port ref; produces an thr_act ref, + * Convert from a port to a thread. + * Doesn't consume the port ref; produces an thread ref, * which may be null. * Conditions: * Nothing locked. */ -thread_act_t -convert_port_to_act( ipc_port_t port ) +thread_t +convert_port_to_thread( + ipc_port_t port) { - boolean_t r; - thread_act_t thr_act = 0; + thread_t thread = THREAD_NULL; - r = FALSE; - while (!r && IP_VALID(port)) { + if (IP_VALID(port)) { ip_lock(port); - r = ref_act_port_locked(port, &thr_act); - /* port unlocked */ - } - return (thr_act); -} - -boolean_t -ref_act_port_locked( ipc_port_t port, thread_act_t *pthr_act ) -{ - thread_act_t thr_act; - thr_act = 0; - if (ip_active(port) && - (ip_kotype(port) == IKOT_ACT)) { - thr_act = (thread_act_t) port->ip_kobject; - assert(thr_act != THR_ACT_NULL); + if ( ip_active(port) && + ip_kotype(port) == IKOT_THREAD ) { + thread = (thread_t)port->ip_kobject; + assert(thread != THREAD_NULL); - /* - * Out of order locking here, normal - * ordering is act_lock(), then ip_lock(). - */ - if (!act_lock_try(thr_act)) { - ip_unlock(port); - mutex_pause(); - return (FALSE); + thread_reference_internal(thread); } - act_reference_locked(thr_act); - act_unlock(thr_act); + + ip_unlock(port); } - *pthr_act = thr_act; - ip_unlock(port); - return (TRUE); + + return (thread); } /* - * Routine: port_name_to_act + * Routine: port_name_to_thread * Purpose: - * Convert from a port name to an act reference - * A name of MACH_PORT_NULL is valid for the null act + * Convert from a port name to an thread reference + * A name of MACH_PORT_NULL is valid for the null thread. * Conditions: * Nothing locked. */ -thread_act_t -port_name_to_act( +thread_t +port_name_to_thread( mach_port_name_t name) { - thread_act_t thr_act = THR_ACT_NULL; - ipc_port_t kern_port; - kern_return_t kr; + thread_t thread = THREAD_NULL; + ipc_port_t kport; if (MACH_PORT_VALID(name)) { - kr = ipc_object_copyin(current_space(), name, - MACH_MSG_TYPE_COPY_SEND, - (ipc_object_t *) &kern_port); - if (kr != KERN_SUCCESS) - return THR_ACT_NULL; + if (ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *)&kport) != KERN_SUCCESS) + return (THREAD_NULL); - thr_act = convert_port_to_act(kern_port); + thread = convert_port_to_thread(kport); - if (IP_VALID(kern_port)) - ipc_port_release_send(kern_port); + if (IP_VALID(kport)) + ipc_port_release_send(kport); } - return thr_act; + + return (thread); } task_t @@ -1252,12 +1184,6 @@ convert_task_to_port( itk_lock(task); if (task->itk_self != IP_NULL) -#if NORMA_TASK - if (task->map == VM_MAP_NULL) - /* norma placeholder task */ - port = ipc_port_copy_send(task->itk_self); - else -#endif /* NORMA_TASK */ port = ipc_port_make_send(task->itk_self); else port = IP_NULL; @@ -1268,30 +1194,33 @@ convert_task_to_port( } /* - * Routine: convert_act_to_port + * Routine: convert_thread_to_port * Purpose: - * Convert from a thr_act to a port. - * Consumes an thr_act ref; produces a naked send right + * Convert from a thread to a port. + * Consumes an thread ref; produces a naked send right * which may be invalid. * Conditions: * Nothing locked. */ ipc_port_t -convert_act_to_port(thr_act) - thread_act_t thr_act; +convert_thread_to_port( + thread_t thread) { - ipc_port_t port; + ipc_port_t port; - act_lock(thr_act); - if (thr_act->ith_self != IP_NULL) - port = ipc_port_make_send(thr_act->ith_self); + thread_mtx_lock(thread); + + if (thread->ith_self != IP_NULL) + port = ipc_port_make_send(thread->ith_self); else port = IP_NULL; - act_unlock(thr_act); - act_deallocate(thr_act); - return port; + thread_mtx_unlock(thread); + + thread_deallocate(thread); + + return (port); } /* @@ -1330,29 +1259,31 @@ space_deallocate( kern_return_t thread_set_exception_ports( - thread_act_t thr_act, + thread_t thread, exception_mask_t exception_mask, - ipc_port_t new_port, - exception_behavior_t new_behavior, - thread_state_flavor_t new_flavor) + ipc_port_t new_port, + exception_behavior_t new_behavior, + thread_state_flavor_t new_flavor) { + ipc_port_t old_port[EXC_TYPES_COUNT]; register int i; - ipc_port_t old_port[EXC_TYPES_COUNT]; - if (!thr_act) - return KERN_INVALID_ARGUMENT; + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); if (exception_mask & ~EXC_MASK_ALL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { switch (new_behavior) { + case EXCEPTION_DEFAULT: case EXCEPTION_STATE: case EXCEPTION_STATE_IDENTITY: break; + default: - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); } } @@ -1361,101 +1292,101 @@ thread_set_exception_ports( * VALID_THREAD_STATE_FLAVOR architecture dependent macro defined in * osfmk/mach/ARCHITECTURE/thread_status.h */ - if (!VALID_THREAD_STATE_FLAVOR(new_flavor)) { - return KERN_INVALID_ARGUMENT; - } + if (!VALID_THREAD_STATE_FLAVOR(new_flavor)) + return (KERN_INVALID_ARGUMENT); - act_lock(thr_act); - if (!thr_act->active) { - act_unlock(thr_act); - return KERN_FAILURE; + thread_mtx_lock(thread); + + if (!thread->active) { + thread_mtx_unlock(thread); + + return (KERN_FAILURE); } - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { - old_port[i] = thr_act->exc_actions[i].port; - thr_act->exc_actions[i].port = - ipc_port_copy_send(new_port); - thr_act->exc_actions[i].behavior = new_behavior; - thr_act->exc_actions[i].flavor = new_flavor; - } else + old_port[i] = thread->exc_actions[i].port; + thread->exc_actions[i].port = ipc_port_copy_send(new_port); + thread->exc_actions[i].behavior = new_behavior; + thread->exc_actions[i].flavor = new_flavor; + } + else old_port[i] = IP_NULL; - }/* for */ - /* - * Consume send rights without any lock held. - */ - act_unlock(thr_act); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) + } + + thread_mtx_unlock(thread); + + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); - return KERN_SUCCESS; -}/* thread_set_exception_port */ + return (KERN_SUCCESS); +} kern_return_t task_set_exception_ports( - task_t task, + task_t task, exception_mask_t exception_mask, - ipc_port_t new_port, - exception_behavior_t new_behavior, - thread_state_flavor_t new_flavor) + ipc_port_t new_port, + exception_behavior_t new_behavior, + thread_state_flavor_t new_flavor) { + ipc_port_t old_port[EXC_TYPES_COUNT]; register int i; - ipc_port_t old_port[EXC_TYPES_COUNT]; - if (task == TASK_NULL) { - return KERN_INVALID_ARGUMENT; - } + if (task == TASK_NULL) + return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) { - return KERN_INVALID_ARGUMENT; - } + if (exception_mask & ~EXC_MASK_ALL) + return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { switch (new_behavior) { + case EXCEPTION_DEFAULT: case EXCEPTION_STATE: case EXCEPTION_STATE_IDENTITY: break; + default: - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); } } - /* Cannot easily check "new_flavor", but that just means that - * the flavor in the generated exception message might be garbage: - * GIGO */ - - itk_lock(task); - if (task->itk_self == IP_NULL) { - itk_unlock(task); - return KERN_FAILURE; - } - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + itk_lock(task); + + if (task->itk_self == IP_NULL) { + itk_unlock(task); + + return (KERN_FAILURE); + } + + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { old_port[i] = task->exc_actions[i].port; task->exc_actions[i].port = ipc_port_copy_send(new_port); task->exc_actions[i].behavior = new_behavior; task->exc_actions[i].flavor = new_flavor; - } else + } + else old_port[i] = IP_NULL; - }/* for */ + } - /* - * Consume send rights without any lock held. - */ - itk_unlock(task); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) + itk_unlock(task); + + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); - return KERN_SUCCESS; -}/* task_set_exception_port */ + return (KERN_SUCCESS); +} /* * Routine: thread/task_swap_exception_ports [kernel call] @@ -1486,197 +1417,190 @@ task_set_exception_ports( kern_return_t thread_swap_exception_ports( - thread_act_t thr_act, - exception_mask_t exception_mask, - ipc_port_t new_port, + thread_t thread, + exception_mask_t exception_mask, + ipc_port_t new_port, exception_behavior_t new_behavior, thread_state_flavor_t new_flavor, exception_mask_array_t masks, - mach_msg_type_number_t * CountCnt, + mach_msg_type_number_t *CountCnt, exception_port_array_t ports, - exception_behavior_array_t behaviors, - thread_state_flavor_array_t flavors ) + exception_behavior_array_t behaviors, + thread_state_flavor_array_t flavors) { - register int i, - j, - count; - ipc_port_t old_port[EXC_TYPES_COUNT]; + ipc_port_t old_port[EXC_TYPES_COUNT]; + unsigned int i, j, count; - if (!thr_act) - return KERN_INVALID_ARGUMENT; + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) { - return KERN_INVALID_ARGUMENT; - } + if (exception_mask & ~EXC_MASK_ALL) + return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { switch (new_behavior) { + case EXCEPTION_DEFAULT: case EXCEPTION_STATE: case EXCEPTION_STATE_IDENTITY: break; + default: - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); } } - /* Cannot easily check "new_flavor", but that just means that - * the flavor in the generated exception message might be garbage: - * GIGO */ - act_lock(thr_act); - if (!thr_act->active) { - act_unlock(thr_act); - return KERN_FAILURE; + thread_mtx_lock(thread); + + if (!thread->active) { + thread_mtx_unlock(thread); + + return (KERN_FAILURE); } count = 0; - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { - for (j = 0; j < count; j++) { -/* - * search for an identical entry, if found - * set corresponding mask for this exception. - */ - if (thr_act->exc_actions[i].port == ports[j] && - thr_act->exc_actions[i].behavior ==behaviors[j] - && thr_act->exc_actions[i].flavor ==flavors[j]) - { + for (j = 0; j < count; ++j) { + /* + * search for an identical entry, if found + * set corresponding mask for this exception. + */ + if ( thread->exc_actions[i].port == ports[j] && + thread->exc_actions[i].behavior == behaviors[j] && + thread->exc_actions[i].flavor == flavors[j] ) { masks[j] |= (1 << i); break; } - }/* for */ + } + if (j == count) { masks[j] = (1 << i); - ports[j] = - ipc_port_copy_send(thr_act->exc_actions[i].port); + ports[j] = ipc_port_copy_send(thread->exc_actions[i].port); - behaviors[j] = thr_act->exc_actions[i].behavior; - flavors[j] = thr_act->exc_actions[i].flavor; - count++; + behaviors[j] = thread->exc_actions[i].behavior; + flavors[j] = thread->exc_actions[i].flavor; + ++count; } - old_port[i] = thr_act->exc_actions[i].port; - thr_act->exc_actions[i].port = - ipc_port_copy_send(new_port); - thr_act->exc_actions[i].behavior = new_behavior; - thr_act->exc_actions[i].flavor = new_flavor; - if (count > *CountCnt) { + old_port[i] = thread->exc_actions[i].port; + thread->exc_actions[i].port = ipc_port_copy_send(new_port); + thread->exc_actions[i].behavior = new_behavior; + thread->exc_actions[i].flavor = new_flavor; + if (count > *CountCnt) break; - } - } else + } + else old_port[i] = IP_NULL; - }/* for */ + } - /* - * Consume send rights without any lock held. - */ - act_unlock(thr_act); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) + thread_mtx_unlock(thread); + + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); + *CountCnt = count; - return KERN_SUCCESS; -}/* thread_swap_exception_ports */ + + return (KERN_SUCCESS); +} kern_return_t task_swap_exception_ports( - task_t task, - exception_mask_t exception_mask, - ipc_port_t new_port, + task_t task, + exception_mask_t exception_mask, + ipc_port_t new_port, exception_behavior_t new_behavior, thread_state_flavor_t new_flavor, exception_mask_array_t masks, - mach_msg_type_number_t * CountCnt, + mach_msg_type_number_t *CountCnt, exception_port_array_t ports, - exception_behavior_array_t behaviors, - thread_state_flavor_array_t flavors ) + exception_behavior_array_t behaviors, + thread_state_flavor_array_t flavors) { - register int i, - j, - count; - ipc_port_t old_port[EXC_TYPES_COUNT]; + ipc_port_t old_port[EXC_TYPES_COUNT]; + unsigned int i, j, count; if (task == TASK_NULL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) { - return KERN_INVALID_ARGUMENT; - } + if (exception_mask & ~EXC_MASK_ALL) + return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { switch (new_behavior) { + case EXCEPTION_DEFAULT: case EXCEPTION_STATE: case EXCEPTION_STATE_IDENTITY: break; + default: - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); } } - /* Cannot easily check "new_flavor", but that just means that - * the flavor in the generated exception message might be garbage: - * GIGO */ itk_lock(task); + if (task->itk_self == IP_NULL) { itk_unlock(task); - return KERN_FAILURE; + + return (KERN_FAILURE); } count = 0; - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { for (j = 0; j < count; j++) { -/* - * search for an identical entry, if found - * set corresponding mask for this exception. - */ - if (task->exc_actions[i].port == ports[j] && - task->exc_actions[i].behavior == behaviors[j] - && task->exc_actions[i].flavor == flavors[j]) - { + /* + * search for an identical entry, if found + * set corresponding mask for this exception. + */ + if ( task->exc_actions[i].port == ports[j] && + task->exc_actions[i].behavior == behaviors[j] && + task->exc_actions[i].flavor == flavors[j] ) { masks[j] |= (1 << i); break; } - }/* for */ + } + if (j == count) { masks[j] = (1 << i); - ports[j] = - ipc_port_copy_send(task->exc_actions[i].port); + ports[j] = ipc_port_copy_send(task->exc_actions[i].port); behaviors[j] = task->exc_actions[i].behavior; flavors[j] = task->exc_actions[i].flavor; - count++; + ++count; } + old_port[i] = task->exc_actions[i].port; - task->exc_actions[i].port = - ipc_port_copy_send(new_port); + task->exc_actions[i].port = ipc_port_copy_send(new_port); task->exc_actions[i].behavior = new_behavior; task->exc_actions[i].flavor = new_flavor; - if (count > *CountCnt) { + if (count > *CountCnt) break; - } - } else + } + else old_port[i] = IP_NULL; - }/* for */ - + } - /* - * Consume send rights without any lock held. - */ itk_unlock(task); + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); + *CountCnt = count; - return KERN_SUCCESS; -}/* task_swap_exception_ports */ + return (KERN_SUCCESS); +} /* * Routine: thread/task_get_exception_ports [kernel call] @@ -1699,128 +1623,124 @@ task_swap_exception_ports( kern_return_t thread_get_exception_ports( - thread_act_t thr_act, - exception_mask_t exception_mask, + thread_t thread, + exception_mask_t exception_mask, exception_mask_array_t masks, - mach_msg_type_number_t * CountCnt, + mach_msg_type_number_t *CountCnt, exception_port_array_t ports, - exception_behavior_array_t behaviors, - thread_state_flavor_array_t flavors ) + exception_behavior_array_t behaviors, + thread_state_flavor_array_t flavors) { - register int i, - j, - count; + unsigned int i, j, count; - if (!thr_act) - return KERN_INVALID_ARGUMENT; + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) { - return KERN_INVALID_ARGUMENT; - } + if (exception_mask & ~EXC_MASK_ALL) + return (KERN_INVALID_ARGUMENT); - act_lock(thr_act); - if (!thr_act->active) { - act_unlock(thr_act); - return KERN_FAILURE; + thread_mtx_lock(thread); + + if (!thread->active) { + thread_mtx_unlock(thread); + + return (KERN_FAILURE); } count = 0; - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { - for (j = 0; j < count; j++) { -/* - * search for an identical entry, if found - * set corresponding mask for this exception. - */ - if (thr_act->exc_actions[i].port == ports[j] && - thr_act->exc_actions[i].behavior ==behaviors[j] - && thr_act->exc_actions[i].flavor == flavors[j]) - { + for (j = 0; j < count; ++j) { + /* + * search for an identical entry, if found + * set corresponding mask for this exception. + */ + if ( thread->exc_actions[i].port == ports[j] && + thread->exc_actions[i].behavior ==behaviors[j] && + thread->exc_actions[i].flavor == flavors[j] ) { masks[j] |= (1 << i); break; } - }/* for */ + } + if (j == count) { masks[j] = (1 << i); - ports[j] = - ipc_port_copy_send(thr_act->exc_actions[i].port); - behaviors[j] = thr_act->exc_actions[i].behavior; - flavors[j] = thr_act->exc_actions[i].flavor; - count++; - if (count >= *CountCnt) { + ports[j] = ipc_port_copy_send(thread->exc_actions[i].port); + behaviors[j] = thread->exc_actions[i].behavior; + flavors[j] = thread->exc_actions[i].flavor; + ++count; + if (count >= *CountCnt) break; - } } } - }/* for */ + } - act_unlock(thr_act); + thread_mtx_unlock(thread); *CountCnt = count; - return KERN_SUCCESS; -}/* thread_get_exception_ports */ + + return (KERN_SUCCESS); +} kern_return_t task_get_exception_ports( - task_t task, - exception_mask_t exception_mask, + task_t task, + exception_mask_t exception_mask, exception_mask_array_t masks, - mach_msg_type_number_t * CountCnt, + mach_msg_type_number_t *CountCnt, exception_port_array_t ports, - exception_behavior_array_t behaviors, - thread_state_flavor_array_t flavors ) + exception_behavior_array_t behaviors, + thread_state_flavor_array_t flavors) { - register int i, - j, - count; + unsigned int i, j, count; if (task == TASK_NULL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) { - return KERN_INVALID_ARGUMENT; - } + if (exception_mask & ~EXC_MASK_ALL) + return (KERN_INVALID_ARGUMENT); itk_lock(task); + if (task->itk_self == IP_NULL) { itk_unlock(task); - return KERN_FAILURE; + + return (KERN_FAILURE); } count = 0; - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { - for (j = 0; j < count; j++) { -/* - * search for an identical entry, if found - * set corresponding mask for this exception. - */ - if (task->exc_actions[i].port == ports[j] && - task->exc_actions[i].behavior == behaviors[j] - && task->exc_actions[i].flavor == flavors[j]) - { + for (j = 0; j < count; ++j) { + /* + * search for an identical entry, if found + * set corresponding mask for this exception. + */ + if ( task->exc_actions[i].port == ports[j] && + task->exc_actions[i].behavior == behaviors[j] && + task->exc_actions[i].flavor == flavors[j] ) { masks[j] |= (1 << i); break; } - }/* for */ + } + if (j == count) { masks[j] = (1 << i); - ports[j] = - ipc_port_copy_send(task->exc_actions[i].port); + ports[j] = ipc_port_copy_send(task->exc_actions[i].port); behaviors[j] = task->exc_actions[i].behavior; flavors[j] = task->exc_actions[i].flavor; - count++; - if (count > *CountCnt) { + ++count; + if (count > *CountCnt) break; - } } } - }/* for */ + } itk_unlock(task); *CountCnt = count; - return KERN_SUCCESS; -}/* task_get_exception_ports */ + + return (KERN_SUCCESS); +} diff --git a/osfmk/kern/ipc_tt.h b/osfmk/kern/ipc_tt.h index ded87c88c..63b0d6d60 100644 --- a/osfmk/kern/ipc_tt.h +++ b/osfmk/kern/ipc_tt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -79,6 +79,10 @@ extern void ipc_task_enable( extern void ipc_task_disable( task_t task); +/* Clear out a task's IPC state */ +extern void ipc_task_reset( + task_t task); + /* Clean up and destroy a task's IPC state */ extern void ipc_task_terminate( task_t task); @@ -87,6 +91,10 @@ extern void ipc_task_terminate( extern void ipc_thread_init( thread_t thread); +/* Disable IPC access to a thread */ +extern void ipc_thread_disable( + thread_t thread); + /* Clean up and destroy a thread's IPC state */ extern void ipc_thread_terminate( thread_t thread); @@ -96,16 +104,15 @@ extern ipc_port_t retrieve_task_self_fast( task_t task); /* Return a send right for the thread's user-visible self port */ -extern ipc_port_t retrieve_act_self_fast( - thread_act_t); +extern ipc_port_t retrieve_thread_self_fast( + thread_t thread); /* Convert from a port to a task */ extern task_t convert_port_to_task( ipc_port_t port); -/* Convert from a port entry port to a task */ -extern task_t convert_port_to_task( - ipc_port_t port); +extern task_t port_name_to_task( + mach_port_name_t name); extern boolean_t ref_task_port_locked( ipc_port_t port, task_t *ptask); @@ -121,52 +128,23 @@ extern boolean_t ref_space_port_locked( extern vm_map_t convert_port_to_map( ipc_port_t port); -/* Convert from a map entry port to a map */ -extern vm_map_t convert_port_entry_to_map( - ipc_port_t port); - -/* Convert from a port to a vm_object */ -extern vm_object_t convert_port_entry_to_object( - ipc_port_t port); - -/* Convert from a port to a upl_object */ -extern upl_t convert_port_to_upl( - ipc_port_t port); - /* Convert from a port to a thread */ -extern thread_act_t convert_port_to_act( - ipc_port_t port); +extern thread_t convert_port_to_thread( + ipc_port_t port); -extern thread_act_t port_name_to_act( +extern thread_t port_name_to_thread( mach_port_name_t port_name); -extern boolean_t ref_act_port_locked( - ipc_port_t port, thread_act_t *pthr_act); - /* Convert from a task to a port */ extern ipc_port_t convert_task_to_port( - task_t task); + task_t task); /* Convert from a thread to a port */ -extern ipc_port_t convert_act_to_port( thread_act_t ); - -/* Convert from a upl to a port */ -extern ipc_port_t convert_upl_to_port( upl_t ); +extern ipc_port_t convert_thread_to_port( + thread_t thread); /* Deallocate a space ref produced by convert_port_to_space */ extern void space_deallocate( - ipc_space_t space); - -/* Allocate a reply port */ -extern mach_port_name_t mach_reply_port(void); - -/* Initialize a thread_act's ipc mechanism */ -extern void ipc_thr_act_init(task_t, thread_act_t); - -/* Disable IPC access to a thread_act */ -extern void ipc_thr_act_disable(thread_act_t); - -/* Clean up and destroy a thread_act's IPC state */ -extern void ipc_thr_act_terminate(thread_act_t); + ipc_space_t space); #endif /* _KERN_IPC_TT_H_ */ diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 6a10a53ff..57161e0bb 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,161 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.19.5 1995/02/24 15:20:29 alanl - * Lock package cleanup. - * [95/02/15 alanl] - * - * Merge with DIPC2_SHARED. - * [1995/01/05 15:11:02 alanl] - * - * Revision 1.2.28.2 1994/11/10 06:12:50 dwm - * mk6 CR764 - s/spinlock/simple_lock/ (name change only) - * [1994/11/10 05:28:35 dwm] - * - * Revision 1.2.28.1 1994/11/04 10:07:40 dwm - * mk6 CR668 - 1.3b26 merge - * * Revision 1.2.2.4 1993/11/08 15:04:18 gm - * CR9710: Updated to new zinit() and zone_change() interfaces. - * * End1.3merge - * [1994/11/04 09:25:48 dwm] - * - * Revision 1.2.19.3 1994/09/23 02:20:52 ezf - * change marker to not FREE - * [1994/09/22 21:33:57 ezf] - * - * Revision 1.2.19.2 1994/06/14 18:36:36 bolinger - * NMK17.2 merge: Replace simple_lock ops. - * [1994/06/14 18:35:17 bolinger] - * - * Revision 1.2.19.1 1994/06/14 17:04:23 bolinger - * Merge up to NMK17.2. - * [1994/06/14 16:54:19 bolinger] - * - * Revision 1.2.23.3 1994/10/14 12:24:33 sjs - * Removed krealloc_spinl routine: the newer locking scheme makes it - * obsolete. - * [94/10/13 sjs] - * - * Revision 1.2.23.2 1994/08/11 14:42:46 rwd - * Post merge cleanup - * [94/08/09 rwd] - * - * Changed zcollectable to use zchange. - * [94/08/04 rwd] - * - * Revision 1.2.17.2 1994/07/08 01:58:45 alanl - * Change comment to match function name. - * [1994/07/08 01:47:59 alanl] - * - * Revision 1.2.17.1 1994/05/26 16:20:38 sjs - * Added krealloc_spinl: same as krealloc but uses spin locks. - * [94/05/25 sjs] - * - * Revision 1.2.23.1 1994/08/04 02:24:55 mmp - * Added krealloc_spinl: same as krealloc but uses spin locks. - * [94/05/25 sjs] - * - * Revision 1.2.13.1 1994/02/11 14:27:12 paire - * Changed krealloc() to make it work on a MP system. Added a new parameter - * which is the simple lock that should be held while modifying the memory - * area already initialized. - * Change from NMK16.1 [93/09/02 paire] - * - * Do not set debug for kalloc zones as default. It wastes - * to much space. - * Change from NMK16.1 [93/08/16 bernadat] - * [94/02/07 paire] - * - * Revision 1.2.2.3 1993/07/28 17:15:44 bernard - * CR9523 -- Prototypes. - * [1993/07/27 20:14:12 bernard] - * - * Revision 1.2.2.2 1993/06/02 23:37:46 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:12:59 jeffc] - * - * Revision 1.2 1992/12/07 21:28:42 robert - * integrate any changes below for 14.0 (branch from 13.16 base) - * - * Joseph Barrera (jsb) at Carnegie-Mellon University 11-Sep-92 - * Added krealloc. Added kalloc_max_prerounded for quicker choice between - * zalloc and kmem_alloc. Renamed MINSIZE to KALLOC_MINSIZE. - * [1992/12/06 19:47:16 robert] - * - * Revision 1.1 1992/09/30 02:09:23 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.9 91/05/14 16:43:17 mrt - * Correcting copyright - * - * Revision 2.8 91/03/16 14:50:37 rpd - * Updated for new kmem_alloc interface. - * [91/03/03 rpd] - * - * Revision 2.7 91/02/05 17:27:22 mrt - * Changed to new Mach copyright - * [91/02/01 16:14:12 mrt] - * - * Revision 2.6 90/06/19 22:59:06 rpd - * Made the big kalloc zones collectable. - * [90/06/05 rpd] - * - * Revision 2.5 90/06/02 14:54:47 rpd - * Added kalloc_max, kalloc_map_size. - * [90/03/26 22:06:39 rpd] - * - * Revision 2.4 90/01/11 11:43:13 dbg - * De-lint. - * [89/12/06 dbg] - * - * Revision 2.3 89/09/08 11:25:51 dbg - * MACH_KERNEL: remove non-MACH data types. - * [89/07/11 dbg] - * - * Revision 2.2 89/08/31 16:18:59 rwd - * First Checkin - * [89/08/23 15:41:37 rwd] - * - * Revision 2.6 89/08/02 08:03:28 jsb - * Make all kalloc zones 8 MB big. (No more kalloc panics!) - * [89/08/01 14:10:17 jsb] - * - * Revision 2.4 89/04/05 13:03:10 rvb - * Guarantee a zone max of at least 100 elements or 10 pages - * which ever is greater. Afs (AllocDouble()) puts a great demand - * on the 2048 zone and used to blow away. - * [89/03/09 rvb] - * - * Revision 2.3 89/02/25 18:04:39 gm0w - * Changes for cleanup. - * - * Revision 2.2 89/01/18 02:07:04 jsb - * Give each kalloc zone a meaningful name (for panics); - * create a zone for each power of 2 between MINSIZE - * and PAGE_SIZE, instead of using (obsoleted) NQUEUES. - * [89/01/17 10:16:33 jsb] - * - * - * 13-Feb-88 John Seamons (jks) at NeXT - * Updated to use kmem routines instead of vmem routines. - * - * 21-Jun-85 Avadis Tevanian (avie) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -225,6 +70,7 @@ #include #include #include +#include #ifdef MACH_BSD zone_t kalloc_zone(vm_size_t); @@ -256,7 +102,7 @@ vm_size_t kalloc_large_max; int first_k_zone = -1; struct zone *k_zone[16]; -static char *k_zone_name[16] = { +static const char *k_zone_name[16] = { "kalloc.1", "kalloc.2", "kalloc.4", "kalloc.8", "kalloc.16", "kalloc.32", @@ -292,6 +138,23 @@ unsigned long k_zone_max[16] = { 64, /* 32768 Byte */ }; +/* forward declarations */ +void * kalloc_canblock( + vm_size_t size, + boolean_t canblock); + + +/* OSMalloc local data declarations */ +static +queue_head_t OSMalloc_tag_list; + +decl_simple_lock_data(static,OSMalloc_tag_lock) + +/* OSMalloc forward declarations */ +void OSMalloc_init(void); +void OSMalloc_Tagref(OSMallocTag tag); +void OSMalloc_Tagrele(OSMallocTag tag); + /* * Initialize the memory allocator. This should be called only * once on a system wide basis (i.e. first processor to get here @@ -310,7 +173,8 @@ kalloc_init( register int i; retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, TRUE, &kalloc_map); + FALSE, VM_FLAGS_ANYWHERE, &kalloc_map); + if (retval != KERN_SUCCESS) panic("kalloc_init: kmem_suballoc failed"); @@ -341,9 +205,10 @@ kalloc_init( k_zone[i] = zinit(size, k_zone_max[i] * size, size, k_zone_name[i]); } + OSMalloc_init(); } -vm_offset_t +void * kalloc_canblock( vm_size_t size, boolean_t canblock) @@ -358,13 +223,13 @@ kalloc_canblock( */ if (size >= kalloc_max_prerounded) { - vm_offset_t addr; + void *addr; /* kmem_alloc could block so we return if noblock */ if (!canblock) { return(0); } - if (kmem_alloc(kalloc_map, &addr, size) != KERN_SUCCESS) + if (kmem_alloc(kalloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) addr = 0; if (addr) { @@ -387,36 +252,35 @@ kalloc_canblock( } /* allocate from the appropriate zone */ - assert(allocsize < kalloc_max); return(zalloc_canblock(k_zone[zindex], canblock)); } -vm_offset_t +void * kalloc( vm_size_t size) { - return( kalloc_canblock(size, TRUE) ); + return( kalloc_canblock(size, TRUE) ); } -vm_offset_t +void * kalloc_noblock( vm_size_t size) { - return( kalloc_canblock(size, FALSE) ); + return( kalloc_canblock(size, FALSE) ); } void krealloc( - vm_offset_t *addrp, + void **addrp, vm_size_t old_size, vm_size_t new_size, simple_lock_t lock) { register int zindex; register vm_size_t allocsize; - vm_offset_t naddr; + void *naddr; /* can only be used for increasing allocation size */ @@ -435,26 +299,28 @@ krealloc( /* if old block was kmem_alloc'd, then use kmem_realloc if necessary */ if (old_size >= kalloc_max_prerounded) { - old_size = round_page_32(old_size); - new_size = round_page_32(new_size); + old_size = round_page(old_size); + new_size = round_page(new_size); if (new_size > old_size) { - if (kmem_realloc(kalloc_map, *addrp, old_size, &naddr, - new_size) != KERN_SUCCESS) { + if (KERN_SUCCESS != kmem_realloc(kalloc_map, + (vm_offset_t)*addrp, old_size, + (vm_offset_t *)&naddr, new_size)) { panic("krealloc: kmem_realloc"); naddr = 0; } simple_lock(lock); - *addrp = naddr; + *addrp = (void *) naddr; /* kmem_realloc() doesn't free old page range. */ - kmem_free(kalloc_map, *addrp, old_size); + kmem_free(kalloc_map, (vm_offset_t)*addrp, old_size); kalloc_large_total += (new_size - old_size); if (kalloc_large_total > kalloc_large_max) - kalloc_large_max = kalloc_large_total; + kalloc_large_max = kalloc_large_total; + } return; } @@ -478,10 +344,11 @@ krealloc( simple_unlock(lock); if (new_size >= kalloc_max_prerounded) { - if (kmem_alloc(kalloc_map, &naddr, new_size) != KERN_SUCCESS) { + if (KERN_SUCCESS != kmem_alloc(kalloc_map, + (vm_offset_t *)&naddr, new_size)) { panic("krealloc: kmem_alloc"); simple_lock(lock); - *addrp = 0; + *addrp = NULL; return; } kalloc_large_inuse++; @@ -512,11 +379,11 @@ krealloc( /* set up new address */ - *addrp = naddr; + *addrp = (void *) naddr; } -vm_offset_t +void * kget( vm_size_t size) { @@ -547,7 +414,7 @@ kget( void kfree( - vm_offset_t data, + void *data, vm_size_t size) { register int zindex; @@ -556,7 +423,7 @@ kfree( /* if size was too large for a zone, then use kmem_free */ if (size >= kalloc_max_prerounded) { - kmem_free(kalloc_map, data, size); + kmem_free(kalloc_map, (vm_offset_t)data, size); kalloc_large_total -= size; kalloc_large_inuse--; @@ -604,11 +471,11 @@ kalloc_zone( #endif - +void kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, int *collectable, int *exhaustable) { - *count = kalloc_large_inuse; + *count = kalloc_large_inuse; *cur_size = kalloc_large_total; *max_size = kalloc_large_max; *elem_size = kalloc_large_total / kalloc_large_inuse; @@ -617,3 +484,151 @@ kalloc_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_s *exhaustable = 0; } + +void +OSMalloc_init( + void) +{ + queue_init(&OSMalloc_tag_list); + simple_lock_init(&OSMalloc_tag_lock, 0); +} + +OSMallocTag +OSMalloc_Tagalloc( + const char *str, + uint32_t flags) +{ + OSMallocTag OSMTag; + + OSMTag = (OSMallocTag)kalloc(sizeof(*OSMTag)); + + bzero((void *)OSMTag, sizeof(*OSMTag)); + + if (flags & OSMT_PAGEABLE) + OSMTag->OSMT_attr = OSMT_ATTR_PAGEABLE; + + OSMTag->OSMT_refcnt = 1; + + strncpy(OSMTag->OSMT_name, str, OSMT_MAX_NAME); + + simple_lock(&OSMalloc_tag_lock); + enqueue_tail(&OSMalloc_tag_list, (queue_entry_t)OSMTag); + simple_unlock(&OSMalloc_tag_lock); + OSMTag->OSMT_state = OSMT_VALID; + return(OSMTag); +} + +void +OSMalloc_Tagref( + OSMallocTag tag) +{ + if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) + panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + + (void)hw_atomic_add((uint32_t *)(&tag->OSMT_refcnt), 1); +} + +void +OSMalloc_Tagrele( + OSMallocTag tag) +{ + if (!((tag->OSMT_state & OSMT_VALID_MASK) == OSMT_VALID)) + panic("OSMalloc_Tagref(): bad state 0x%08X\n",tag->OSMT_state); + + if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { + if (hw_compare_and_store(OSMT_VALID|OSMT_RELEASED, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) { + simple_lock(&OSMalloc_tag_lock); + (void)remque((queue_entry_t)tag); + simple_unlock(&OSMalloc_tag_lock); + kfree((void*)tag, sizeof(*tag)); + } else + panic("OSMalloc_Tagrele(): refcnt 0\n"); + } +} + +void +OSMalloc_Tagfree( + OSMallocTag tag) +{ + if (!hw_compare_and_store(OSMT_VALID, OSMT_VALID|OSMT_RELEASED, &tag->OSMT_state)) + panic("OSMalloc_Tagfree(): bad state 0x%08X\n", tag->OSMT_state); + + if (hw_atomic_sub((uint32_t *)(&tag->OSMT_refcnt), 1) == 0) { + simple_lock(&OSMalloc_tag_lock); + (void)remque((queue_entry_t)tag); + simple_unlock(&OSMalloc_tag_lock); + kfree((void*)tag, sizeof(*tag)); + } +} + +void * +OSMalloc( + uint32_t size, + OSMallocTag tag) +{ + void *addr=NULL; + kern_return_t kr; + + OSMalloc_Tagref(tag); + if ((tag->OSMT_attr & OSMT_PAGEABLE) + && (size & ~PAGE_MASK)) { + + if ((kr = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&addr, size)) != KERN_SUCCESS) + panic("OSMalloc(): kmem_alloc_pageable() failed 0x%08X\n", kr); + } else + addr = kalloc((vm_size_t)size); + + return(addr); +} + +void * +OSMalloc_nowait( + uint32_t size, + OSMallocTag tag) +{ + void *addr=NULL; + + if (tag->OSMT_attr & OSMT_PAGEABLE) + return(NULL); + + OSMalloc_Tagref(tag); + /* XXX: use non-blocking kalloc for now */ + addr = kalloc_noblock((vm_size_t)size); + if (addr == NULL) + OSMalloc_Tagrele(tag); + + return(addr); +} + +void * +OSMalloc_noblock( + uint32_t size, + OSMallocTag tag) +{ + void *addr=NULL; + + if (tag->OSMT_attr & OSMT_PAGEABLE) + return(NULL); + + OSMalloc_Tagref(tag); + addr = kalloc_noblock((vm_size_t)size); + if (addr == NULL) + OSMalloc_Tagrele(tag); + + return(addr); +} + +void +OSFree( + void *addr, + uint32_t size, + OSMallocTag tag) +{ + if ((tag->OSMT_attr & OSMT_PAGEABLE) + && (size & ~PAGE_MASK)) { + kmem_free(kernel_map, (vm_offset_t)addr, size); + } else + kfree((void*)addr, size); + + OSMalloc_Tagrele(tag); +} diff --git a/osfmk/kern/kalloc.h b/osfmk/kern/kalloc.h index 657848b41..2e7ecc170 100644 --- a/osfmk/kern/kalloc.h +++ b/osfmk/kern/kalloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,43 +48,53 @@ * the rights to redistribute these changes. */ +#ifdef KERNEL_PRIVATE + #ifndef _KERN_KALLOC_H_ #define _KERN_KALLOC_H_ #include +#include -#define KALLOC_MINSIZE 16 +__BEGIN_DECLS -extern vm_offset_t kalloc( - vm_size_t size); +extern void *kalloc(vm_size_t size); -extern vm_offset_t kalloc_noblock( - vm_size_t size); +extern void *kalloc_noblock(vm_size_t size); -extern vm_offset_t kget( - vm_size_t size); +extern void *kget(vm_size_t size); -extern void kfree( - vm_offset_t data, - vm_size_t size); +extern void kfree(void *data, + vm_size_t size); -#include +__END_DECLS -#ifdef __APPLE_API_PRIVATE +#ifdef MACH_KERNEL_PRIVATE -#ifdef MACH_KERNEL_PRIVATE #include +#define KALLOC_MINSIZE 16 + extern void kalloc_init(void); -extern void krealloc( - vm_offset_t *addrp, - vm_size_t old_size, - vm_size_t new_size, - simple_lock_t lock); +extern void krealloc(void **addrp, + vm_size_t old_size, + vm_size_t new_size, + simple_lock_t lock); + +extern void kalloc_fake_zone_info( + int *count, + vm_size_t *cur_size, + vm_size_t *max_size, + vm_size_t *elem_size, + vm_size_t *alloc_size, + int *collectable, + int *exhaustable); -#endif /* MACH_KERNEL_PRIVATE */ +extern vm_size_t kalloc_max_prerounded; -#endif /* __APPLE_APPI_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ #endif /* _KERN_KALLOC_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/kern_types.h b/osfmk/kern/kern_types.h index 318502f7a..6ae7d07f8 100644 --- a/osfmk/kern/kern_types.h +++ b/osfmk/kern/kern_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,23 +30,15 @@ #include #include -#include +#ifdef KERNEL_PRIVATE -#if !defined(MACH_KERNEL_PRIVATE) - -/* - * Declare empty structure definitions for export to other - * kernel components. This lets us still provide some level - * of type checking, without exposing our internal data - * structures. - */ +#ifndef MACH_KERNEL_PRIVATE struct zone ; -struct wait_queue { unsigned int opaque[2]; uintptr_t opaquep[2]; } ; +struct wait_queue { unsigned int opaque[2]; uintptr_t opaquep[2]; } ; -#endif /* MACH_KERNEL_PRIVATE */ - +#endif /* MACH_KERNEL_PRIVATE */ typedef struct zone *zone_t; #define ZONE_NULL ((zone_t) 0) @@ -58,6 +50,8 @@ typedef struct wait_queue *wait_queue_t; typedef vm_offset_t ipc_kobject_t; #define IKO_NULL ((ipc_kobject_t) 0) +#endif /* KERNEL_PRIVATE */ + typedef void *event_t; /* wait event */ #define NO_EVENT ((event_t) 0) @@ -75,7 +69,7 @@ typedef int wait_result_t; #define THREAD_RESTART 3 /* restart operation entirely */ -typedef void (*thread_continue_t)(void); /* where to resume it */ +typedef void (*thread_continue_t)(void *, wait_result_t); #define THREAD_CONTINUE_NULL ((thread_continue_t) 0) /* @@ -86,21 +80,28 @@ typedef int wait_interrupt_t; #define THREAD_INTERRUPTIBLE 1 /* may not be restartable */ #define THREAD_ABORTSAFE 2 /* abortable safely */ -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #ifdef MACH_KERNEL_PRIVATE #include typedef struct clock *clock_t; -#endif /* MACH_KERNEL_PRIVATE */ +typedef struct mig_object *mig_object_t; +#define MIG_OBJECT_NULL ((mig_object_t) 0) + +typedef struct mig_notify *mig_notify_t; +#define MIG_NOTIFY_NULL ((mig_notify_t) 0) -#ifdef __APPLE_API_EVOLVING +typedef boolean_t (*thread_roust_t)(thread_t, wait_result_t); +#define THREAD_ROUST_NULL ((thread_roust_t) 0) + +#else /* MACH_KERNEL_PRIVATE */ -#ifndef MACH_KERNEL_PRIVATE struct wait_queue_set ; struct wait_queue_link ; -#endif + +#endif /* MACH_KERNEL_PRIVATE */ typedef struct wait_queue_set *wait_queue_set_t; #define WAIT_QUEUE_SET_NULL ((wait_queue_set_t)0) @@ -110,29 +111,12 @@ typedef struct wait_queue_link *wait_queue_link_t; #define WAIT_QUEUE_LINK_NULL ((wait_queue_link_t)0) #define SIZEOF_WAITQUEUE_LINK wait_queue_link_size() -typedef struct mig_object *mig_object_t; -#define MIG_OBJECT_NULL ((mig_object_t) 0) - -typedef struct mig_notify *mig_notify_t; -#define MIG_NOTIFY_NULL ((mig_notify_t) 0) - -typedef boolean_t (*thread_roust_t)(thread_t, wait_result_t); -#define THREAD_ROUST_NULL ((thread_roust_t) 0) - -#endif /* __APPLE_API_EVOLVING */ - -#ifdef __APPLE_API_UNSTABLE - /* legacy definitions - going away */ -typedef struct thread *thread_shuttle_t; -#define THREAD_SHUTTLE_NULL ((thread_shuttle_t)0) struct wait_queue_sub ; typedef struct wait_queue_sub *wait_queue_sub_t; #define WAIT_QUEUE_SUB_NULL ((wait_queue_sub_t)0) #define SIZEOF_WAITQUEUE_SUB wait_queue_set_size() -#endif /* __APPLE_API_UNSTABLE */ - -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* _KERN_KERN_TYPES_H_ */ diff --git a/osfmk/kern/kmod.c b/osfmk/kern/kmod.c index aac5aa956..a79fd2b0b 100644 --- a/osfmk/kern/kmod.c +++ b/osfmk/kern/kmod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,13 +30,36 @@ #include #include #include +#include +#include + +#include #include -#include #include + +#include + #include #include +/* + * XXX headers for which prototypes should be in a common include file; + * XXX see libsa/kext.cpp for why. + */ +kern_return_t kmod_create_internal(kmod_info_t *info, kmod_t *id); +kern_return_t kmod_destroy_internal(kmod_t id); +kern_return_t kmod_start_or_stop(kmod_t id, int start, kmod_args_t *data, + mach_msg_type_number_t *dataCount); +kern_return_t kmod_retain(kmod_t id); +kern_return_t kmod_release(kmod_t id); +kern_return_t kmod_queue_cmd(vm_address_t data, vm_size_t size); +kern_return_t kmod_get_info(host_t host, kmod_info_array_t *kmods, + mach_msg_type_number_t *kmodCount); +extern void kdb_printf(const char *fmt, ...); + + + #define WRITE_PROTECT_MODULE_TEXT (0) kmod_info_t *kmod = 0; @@ -54,10 +77,10 @@ typedef struct cmd_queue_entry { queue_head_t kmod_cmd_queue; void -kmod_init() +kmod_init(void) { - simple_lock_init(&kmod_lock, ETAP_MISC_Q); - simple_lock_init(&kmod_queue_lock, ETAP_MISC_Q); + simple_lock_init(&kmod_lock, 0); + simple_lock_init(&kmod_queue_lock, 0); queue_init(&kmod_cmd_queue); } @@ -103,11 +126,11 @@ kmod_lookupbyid_locked(kmod_t id) if (k) { bcopy((char*)k, (char *)kc, sizeof(kmod_info_t)); } -finish: + simple_unlock(&kmod_queue_lock); if (k == 0) { - kfree((vm_offset_t)kc, sizeof(kmod_info_t)); + kfree(kc, sizeof(kmod_info_t)); kc = 0; } return kc; @@ -127,11 +150,11 @@ kmod_lookupbyname_locked(const char * name) if (k) { bcopy((char *)k, (char *)kc, sizeof(kmod_info_t)); } -finish: + simple_unlock(&kmod_queue_lock); if (k == 0) { - kfree((vm_offset_t)kc, sizeof(kmod_info_t)); + kfree(kc, sizeof(kmod_info_t)); kc = 0; } return kc; @@ -148,7 +171,7 @@ kmod_queue_cmd(vm_address_t data, vm_size_t size) rc = kmem_alloc(kernel_map, &e->data, size); if (rc != KERN_SUCCESS) { - kfree((vm_offset_t)e, sizeof(struct cmd_queue_entry)); + kfree(e, sizeof(struct cmd_queue_entry)); return rc; } e->size = size; @@ -226,6 +249,10 @@ kmod_send_generic(int type, void *generic_data, int size) extern vm_offset_t sectPRELINKB; extern int sectSizePRELINK; +/* + * Operates only on 32 bit mach keaders on behalf of kernel module loader + * if WRITE_PROTECT_MODULE_TEXT is defined. + */ kern_return_t kmod_create_internal(kmod_info_t *info, kmod_t *id) { @@ -293,15 +320,19 @@ kmod_create_internal(kmod_info_t *info, kmod_t *id) kern_return_t kmod_create(host_priv_t host_priv, - kmod_info_t *info, + vm_address_t addr, kmod_t *id) { + kmod_info_t *info = (kmod_info_t *)addr; + if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; return kmod_create_internal(info, id); } kern_return_t -kmod_create_fake(const char *name, const char *version) +kmod_create_fake_with_address(const char *name, const char *version, + vm_address_t address, vm_size_t size, + int * return_id) { kmod_info_t *info; @@ -323,7 +354,9 @@ kmod_create_fake(const char *name, const char *version) bcopy(version, info->version, 1 + strlen(version)); //NIK fixed this part info->reference_count = 1; // keep it from unloading, starting, stopping info->reference_list = 0; - info->address = info->size = info->hdr_size = 0; + info->address = address; + info->size = size; + info->hdr_size = 0; info->start = info->stop = 0; simple_lock(&kmod_lock); @@ -335,6 +368,8 @@ kmod_create_fake(const char *name, const char *version) } info->id = kmod_index++; + if (return_id) + *return_id = info->id; info->next = kmod; kmod = info; @@ -345,7 +380,14 @@ kmod_create_fake(const char *name, const char *version) } kern_return_t -kmod_destroy_internal(kmod_t id) +kmod_create_fake(const char *name, const char *version) +{ + return kmod_create_fake_with_address(name, version, 0, 0, NULL); +} + + +static kern_return_t +_kmod_destroy_internal(kmod_t id, boolean_t fake) { kern_return_t rc; kmod_info_t *k; @@ -358,7 +400,7 @@ kmod_destroy_internal(kmod_t id) if (k->id == id) { kmod_reference_t *r, *t; - if (k->reference_count != 0) { + if (!fake && (k->reference_count != 0)) { simple_unlock(&kmod_lock); return KERN_INVALID_ARGUMENT; } @@ -375,31 +417,34 @@ kmod_destroy_internal(kmod_t id) r->info->reference_count--; t = r; r = r->next; - kfree((vm_offset_t)t, sizeof(struct kmod_reference)); + kfree(t, sizeof(struct kmod_reference)); } + if (!fake) + { #if DEBUG - printf("kmod_destroy: %s (id %d), deallocating %d pages starting at 0x%x\n", - k->name, k->id, k->size / PAGE_SIZE, k->address); + printf("kmod_destroy: %s (id %d), deallocating %d pages starting at 0x%x\n", + k->name, k->id, k->size / PAGE_SIZE, k->address); #endif /* DEBUG */ - if( (k->address >= sectPRELINKB) && (k->address < (sectPRELINKB + sectSizePRELINK))) - { - vm_offset_t - virt = ml_static_ptovirt(k->address); - if( virt) { - ml_static_mfree( virt, k->size); - } - } - else - { - rc = vm_map_unwire(kernel_map, k->address + k->hdr_size, - k->address + k->size, FALSE); - assert(rc == KERN_SUCCESS); - - rc = vm_deallocate(kernel_map, k->address, k->size); - assert(rc == KERN_SUCCESS); - } + if( (k->address >= sectPRELINKB) && (k->address < (sectPRELINKB + sectSizePRELINK))) + { + vm_offset_t + virt = ml_static_ptovirt(k->address); + if( virt) { + ml_static_mfree( virt, k->size); + } + } + else + { + rc = vm_map_unwire(kernel_map, k->address + k->hdr_size, + k->address + k->size, FALSE); + assert(rc == KERN_SUCCESS); + + rc = vm_deallocate(kernel_map, k->address, k->size); + assert(rc == KERN_SUCCESS); + } + } return KERN_SUCCESS; } p = k; @@ -411,15 +456,25 @@ kmod_destroy_internal(kmod_t id) return KERN_INVALID_ARGUMENT; } +kern_return_t +kmod_destroy_internal(kmod_t id) +{ + return _kmod_destroy_internal(id, FALSE); +} kern_return_t kmod_destroy(host_priv_t host_priv, kmod_t id) { if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; - return kmod_destroy_internal(id); + return _kmod_destroy_internal(id, FALSE); } +kern_return_t +kmod_destroy_fake(kmod_t id) +{ + return _kmod_destroy_internal(id, TRUE); +} kern_return_t kmod_start_or_stop( @@ -430,7 +485,7 @@ kmod_start_or_stop( { kern_return_t rc = KERN_SUCCESS; void * user_data = 0; - kern_return_t (*func)(); + kern_return_t (*func)(kmod_info_t *, void *); kmod_info_t *k; simple_lock(&kmod_lock); @@ -454,7 +509,9 @@ kmod_start_or_stop( // call kmod entry point // if (data && dataCount && *data && *dataCount) { - vm_map_copyout(kernel_map, (vm_offset_t *)&user_data, (vm_map_copy_t)*data); + vm_map_offset_t map_addr; + vm_map_copyout(kernel_map, &map_addr, (vm_map_copy_t)*data); + user_data = CAST_DOWN(void *, map_addr); } rc = (*func)(k, user_data); @@ -499,7 +556,7 @@ kmod_retain(kmod_t id) f = kmod_lookupbyid(KMOD_UNPACK_FROM_ID(id)); if (!t || !f) { simple_unlock(&kmod_lock); - if (r) kfree((vm_offset_t)r, sizeof(struct kmod_reference)); + if (r) kfree(r, sizeof(struct kmod_reference)); rc = KERN_INVALID_ARGUMENT; goto finish; } @@ -547,7 +604,7 @@ kmod_release(kmod_t id) r->info->reference_count--; simple_unlock(&kmod_lock); - kfree((vm_offset_t)r, sizeof(struct kmod_reference)); + kfree(r, sizeof(struct kmod_reference)); rc = KERN_SUCCESS; goto finish; } @@ -632,7 +689,8 @@ kmod_control(host_priv_t host_priv, simple_unlock(&kmod_queue_lock); - rc = vm_map_copyin(kernel_map, e->data, e->size, TRUE, (vm_map_copy_t *)data); + rc = vm_map_copyin(kernel_map, (vm_map_address_t)e->data, + (vm_map_size_t)e->size, TRUE, (vm_map_copy_t *)data); if (rc) { simple_lock(&kmod_queue_lock); enqueue_head(&kmod_cmd_queue, (queue_entry_t)e); @@ -643,7 +701,7 @@ kmod_control(host_priv_t host_priv, } *dataCount = e->size; - kfree((vm_offset_t)e, sizeof(struct cmd_queue_entry)); + kfree(e, sizeof(struct cmd_queue_entry)); break; } @@ -657,7 +715,7 @@ kmod_control(host_priv_t host_priv, kern_return_t -kmod_get_info(host_t host, +kmod_get_info(__unused host_t host, kmod_info_array_t *kmods, mach_msg_type_number_t *kmodCount) { @@ -743,6 +801,9 @@ retry: return KERN_SUCCESS; } +/* + * Operates only on 32 bit mach keaders on behalf of kernel module loader + */ static kern_return_t kmod_call_funcs_in_section(struct mach_header *header, const char *sectName) { @@ -754,7 +815,7 @@ kmod_call_funcs_in_section(struct mach_header *header, const char *sectName) return KERN_INVALID_ARGUMENT; } - routines = (Routine *) getsectdatafromheader(header, SEG_TEXT, (char *) sectName, &size); + routines = (Routine *) getsectdatafromheader(header, SEG_TEXT, /*(char *)*/ sectName, &size); if (!routines) return KERN_SUCCESS; size /= sizeof(Routine); @@ -765,12 +826,18 @@ kmod_call_funcs_in_section(struct mach_header *header, const char *sectName) return KERN_SUCCESS; } +/* + * Operates only on 32 bit mach keaders on behalf of kernel module loader + */ kern_return_t kmod_initialize_cpp(kmod_info_t *info) { return kmod_call_funcs_in_section((struct mach_header *)info->address, "__constructor"); } +/* + * Operates only on 32 bit mach keaders on behalf of kernel module loader + */ kern_return_t kmod_finalize_cpp(kmod_info_t *info) { @@ -778,74 +845,71 @@ kmod_finalize_cpp(kmod_info_t *info) } kern_return_t -kmod_default_start(struct kmod_info *ki, void *data) +kmod_default_start(__unused struct kmod_info *ki, __unused void *data) { return KMOD_RETURN_SUCCESS; } kern_return_t -kmod_default_stop(struct kmod_info *ki, void *data) +kmod_default_stop(__unused struct kmod_info *ki, __unused void *data) { return KMOD_RETURN_SUCCESS; } -void -kmod_dump(vm_offset_t *addr, unsigned int cnt) +static void +kmod_dump_to(vm_offset_t *addr, unsigned int cnt, + void (*printf_func)(const char *fmt, ...)) { vm_offset_t * kscan_addr = 0; - vm_offset_t * rscan_addr = 0; kmod_info_t * k; kmod_reference_t * r; - int i, j; + unsigned int i; int found_kmod = 0; - int kmod_scan_stopped = 0; kmod_info_t * stop_kmod = 0; - int ref_scan_stopped = 0; - kmod_reference_t * stop_ref = 0; for (k = kmod; k; k = k->next) { - if (!k->address) { - continue; // skip fake entries for built-in kernel components - } if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)k)) == 0) { - kdb_printf(" kmod scan stopped due to missing " + (*printf_func)(" kmod scan stopped due to missing " "kmod page: %08x\n", stop_kmod); break; } + if (!k->address) { + continue; // skip fake entries for built-in kernel components + } for (i = 0, kscan_addr = addr; i < cnt; i++, kscan_addr++) { if ((*kscan_addr >= k->address) && (*kscan_addr < (k->address + k->size))) { if (!found_kmod) { - kdb_printf(" Kernel loadable modules in backtrace " + (*printf_func)(" Kernel loadable modules in backtrace " "(with dependencies):\n"); } found_kmod = 1; - kdb_printf(" %s(%s)@0x%x\n", + (*printf_func)(" %s(%s)@0x%x\n", k->name, k->version, k->address); for (r = k->reference_list; r; r = r->next) { kmod_info_t * rinfo; if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)r)) == 0) { - kdb_printf(" kmod dependency scan stopped " + (*printf_func)(" kmod dependency scan stopped " "due to missing dependency page: %08x\n", r); break; } rinfo = r->info; - if (!rinfo->address) { - continue; // skip fake entries for built-ins - } - if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)rinfo)) == 0) { - kdb_printf(" kmod dependency scan stopped " + (*printf_func)(" kmod dependency scan stopped " "due to missing kmod page: %08x\n", rinfo); break; } - kdb_printf(" dependency: %s(%s)@0x%x\n", + if (!rinfo->address) { + continue; // skip fake entries for built-ins + } + + (*printf_func)(" dependency: %s(%s)@0x%x\n", rinfo->name, rinfo->version, rinfo->address); } @@ -856,3 +920,15 @@ kmod_dump(vm_offset_t *addr, unsigned int cnt) return; } + +void +kmod_dump(vm_offset_t *addr, unsigned int cnt) +{ + kmod_dump_to(addr, cnt, &kdb_printf); +} + +void +kmod_dump_log(vm_offset_t *addr, unsigned int cnt) +{ + kmod_dump_to(addr, cnt, &printf); +} diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index e7e897c49..dbca9b48f 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,41 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1995/01/06 19:47:19 devrcs - * mk6 CR668 - 1.3b26 merge - * new file for mk6 - * [1994/10/12 22:19:28 dwm] - * - * Revision 1.1.3.4 1994/05/13 20:10:01 tmt - * Changed three unsigned casts to natural_t. - * [1994/05/12 22:12:28 tmt] - * - * Revision 1.1.3.2 1993/11/30 18:26:24 jph - * CR10228 -- Typo in unlock(), ledger_ledger should be child_ledger. - * [1993/11/30 16:10:43 jph] - * - * Revision 1.1.3.1 1993/11/24 21:22:14 jph - * CR9801 brezak merge, ledgers, security and NMK15_COMPAT - * [1993/11/23 22:41:07 jph] - * - * Revision 1.1.1.4 1993/09/08 14:17:36 brezak - * Include for protos. - * - * Revision 1.1.1.3 1993/08/20 14:16:55 brezak - * Created. - * - * $EndLog$ - */ - /* * 8/13/93 * @@ -70,16 +35,19 @@ #include #include +#include +#include + #include #include -#include #include #include -#include -#include #include #include -#include +#include + +#include +#include ledger_t root_wired_ledger; ledger_t root_paged_ledger; @@ -104,7 +72,7 @@ ledger_enter( ledger_unlock(ledger); return(KERN_RESOURCE_SHORTAGE); } - if ((natural_t)(ledger->ledger_balance + amount) + if ((ledger->ledger_balance + amount) < LEDGER_ITEM_INFINITY) ledger->ledger_balance += amount; else @@ -158,7 +126,7 @@ ledger_deallocate( ipc_port_dealloc_kernel(ledger->ledger_self); /* XXX release send right on service port */ - kfree((vm_offset_t)ledger, sizeof(*ledger)); + kfree(ledger, sizeof(*ledger)); } diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index afad3b628..0d6cbf87e 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,23 +22,17 @@ /* * @OSF_COPYRIGHT@ */ + +#ifdef MACH_KERNEL_PRIVATE + #ifndef _KERN_LEDGER_H_ #define _KERN_LEDGER_H_ - #include -#include - -#include - -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE +#include #include -#include - -#define LEDGER_ITEM_INFINITY (~0) +#include struct ledger { ipc_port_t ledger_self; @@ -55,7 +49,7 @@ typedef struct ledger ledger_data_t; #define ledger_lock(ledger) simple_lock(&(ledger)->lock) #define ledger_unlock(ledger) simple_unlock(&(ledger)->lock) #define ledger_lock_init(ledger) \ - simple_lock_init(&(ledger)->lock, ETAP_MISC_LEDGER) + simple_lock_init(&(ledger)->lock, 0) extern ledger_t root_wired_ledger; extern ledger_t root_paged_ledger; @@ -69,12 +63,10 @@ extern ipc_port_t ledger_copy(ledger_t); extern kern_return_t ledger_enter(ledger_t, ledger_item_t); -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ - extern ledger_t convert_port_to_ledger(ipc_port_t); extern ipc_port_t convert_ledger_to_port(ledger_t); #endif /* _KERN_LEDGER_H_ */ + +#endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/lock.c b/osfmk/kern/lock.c deleted file mode 100644 index b19b8f2b0..000000000 --- a/osfmk/kern/lock.c +++ /dev/null @@ -1,2384 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: kern/lock.c - * Author: Avadis Tevanian, Jr., Michael Wayne Young - * Date: 1985 - * - * Locking primitives implementation - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if MACH_KDB -#include -#include -#include -#include -#endif /* MACH_KDB */ - -#ifdef __ppc__ -#include -#endif - -#include - -#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) - -/* - * Some portions of the lock debugging code must run with - * interrupts disabled. This can be machine-dependent, - * but we don't have any good hooks for that at the moment. - * If your architecture is different, add a machine-dependent - * ifdef here for these macros. XXX - */ - -#define DISABLE_INTERRUPTS(s) s = ml_set_interrupts_enabled(FALSE) -#define ENABLE_INTERRUPTS(s) (void)ml_set_interrupts_enabled(s) - -#if NCPUS > 1 -/* Time we loop without holding the interlock. - * The former is for when we cannot sleep, the latter - * for when our thread can go to sleep (loop less) - * we shouldn't retake the interlock at all frequently - * if we cannot go to sleep, since it interferes with - * any other processors. In particular, 100 is too small - * a number for powerpc MP systems because of cache - * coherency issues and differing lock fetch times between - * the processors - */ -unsigned int lock_wait_time[2] = { (unsigned int)-1, 100 } ; -#else /* NCPUS > 1 */ - - /* - * It is silly to spin on a uni-processor as if we - * thought something magical would happen to the - * want_write bit while we are executing. - */ - -unsigned int lock_wait_time[2] = { 0, 0 }; -#endif /* NCPUS > 1 */ - -/* Forwards */ - -#if MACH_KDB -void db_print_simple_lock( - simple_lock_t addr); - -void db_print_mutex( - mutex_t * addr); -#endif /* MACH_KDB */ - - -#if USLOCK_DEBUG -/* - * Perform simple lock checks. - */ -int uslock_check = 1; -int max_lock_loops = 100000000; -decl_simple_lock_data(extern , printf_lock) -decl_simple_lock_data(extern , panic_lock) -#if MACH_KDB && NCPUS > 1 -decl_simple_lock_data(extern , kdb_lock) -#endif /* MACH_KDB && NCPUS >1 */ -#endif /* USLOCK_DEBUG */ - - -/* - * We often want to know the addresses of the callers - * of the various lock routines. However, this information - * is only used for debugging and statistics. - */ -typedef void *pc_t; -#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) -#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) -#if ANY_LOCK_DEBUG || ETAP_LOCK_TRACE -#define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l))) -#else /* ANY_LOCK_DEBUG || ETAP_LOCK_TRACE */ -#ifdef lint -/* - * Eliminate lint complaints about unused local pc variables. - */ -#define OBTAIN_PC(pc,l) ++pc -#else /* lint */ -#define OBTAIN_PC(pc,l) -#endif /* lint */ -#endif /* USLOCK_DEBUG || ETAP_LOCK_TRACE */ - - -/* #ifndef USIMPLE_LOCK_CALLS - * The i386 production version of usimple_locks isn't ready yet. - */ -/* - * Portable lock package implementation of usimple_locks. - */ - -#if ETAP_LOCK_TRACE -#define ETAPCALL(stmt) stmt -void etap_simplelock_init(simple_lock_t, etap_event_t); -void etap_simplelock_unlock(simple_lock_t); -void etap_simplelock_hold(simple_lock_t, pc_t, etap_time_t); -etap_time_t etap_simplelock_miss(simple_lock_t); - -void etap_mutex_init(mutex_t*, etap_event_t); -void etap_mutex_unlock(mutex_t*); -void etap_mutex_hold(mutex_t*, pc_t, etap_time_t); -etap_time_t etap_mutex_miss(mutex_t*); -#else /* ETAP_LOCK_TRACE */ -#define ETAPCALL(stmt) -#endif /* ETAP_LOCK_TRACE */ - -#if USLOCK_DEBUG -#define USLDBG(stmt) stmt -void usld_lock_init(usimple_lock_t, etap_event_t); -void usld_lock_pre(usimple_lock_t, pc_t); -void usld_lock_post(usimple_lock_t, pc_t); -void usld_unlock(usimple_lock_t, pc_t); -void usld_lock_try_pre(usimple_lock_t, pc_t); -void usld_lock_try_post(usimple_lock_t, pc_t); -void usld_lock_held(usimple_lock_t); -void usld_lock_none_held(void); -int usld_lock_common_checks(usimple_lock_t, char *); -#else /* USLOCK_DEBUG */ -#define USLDBG(stmt) -#endif /* USLOCK_DEBUG */ - -/* - * Initialize a usimple_lock. - * - * No change in preemption state. - */ -void -usimple_lock_init( - usimple_lock_t l, - etap_event_t event) -{ -#ifndef MACHINE_SIMPLE_LOCK - USLDBG(usld_lock_init(l, event)); - ETAPCALL(etap_simplelock_init((l),(event))); - hw_lock_init(&l->interlock); -#else - simple_lock_init((simple_lock_t)l,event); -#endif -} - - -/* - * Acquire a usimple_lock. - * - * Returns with preemption disabled. Note - * that the hw_lock routines are responsible for - * maintaining preemption state. - */ -void -usimple_lock( - usimple_lock_t l) -{ -#ifndef MACHINE_SIMPLE_LOCK - int i; - pc_t pc; -#if ETAP_LOCK_TRACE - etap_time_t start_wait_time; - int no_miss_info = 0; -#endif /* ETAP_LOCK_TRACE */ -#if USLOCK_DEBUG - int count = 0; -#endif /* USLOCK_DEBUG */ - - OBTAIN_PC(pc, l); - USLDBG(usld_lock_pre(l, pc)); -#if ETAP_LOCK_TRACE - ETAP_TIME_CLEAR(start_wait_time); -#endif /* ETAP_LOCK_TRACE */ - - if(!hw_lock_to(&l->interlock, LockTimeOut)) /* Try to get the lock with a timeout */ - panic("simple lock deadlock detection - l=%08X, cpu=%d, ret=%08X", l, cpu_number(), pc); - - ETAPCALL(etap_simplelock_hold(l, pc, start_wait_time)); - USLDBG(usld_lock_post(l, pc)); -#else - simple_lock((simple_lock_t)l); -#endif -} - - -/* - * Release a usimple_lock. - * - * Returns with preemption enabled. Note - * that the hw_lock routines are responsible for - * maintaining preemption state. - */ -void -usimple_unlock( - usimple_lock_t l) -{ -#ifndef MACHINE_SIMPLE_LOCK - pc_t pc; - -// checkNMI(); /* (TEST/DEBUG) */ - - OBTAIN_PC(pc, l); - USLDBG(usld_unlock(l, pc)); - ETAPCALL(etap_simplelock_unlock(l)); -#ifdef __ppc__ - sync(); -#endif - hw_lock_unlock(&l->interlock); -#else - simple_unlock_rwmb((simple_lock_t)l); -#endif -} - - -/* - * Conditionally acquire a usimple_lock. - * - * On success, returns with preemption disabled. - * On failure, returns with preemption in the same state - * as when first invoked. Note that the hw_lock routines - * are responsible for maintaining preemption state. - * - * XXX No stats are gathered on a miss; I preserved this - * behavior from the original assembly-language code, but - * doesn't it make sense to log misses? XXX - */ -unsigned int -usimple_lock_try( - usimple_lock_t l) -{ -#ifndef MACHINE_SIMPLE_LOCK - pc_t pc; - unsigned int success; - etap_time_t zero_time; - - OBTAIN_PC(pc, l); - USLDBG(usld_lock_try_pre(l, pc)); - if (success = hw_lock_try(&l->interlock)) { - USLDBG(usld_lock_try_post(l, pc)); - ETAP_TIME_CLEAR(zero_time); - ETAPCALL(etap_simplelock_hold(l, pc, zero_time)); - } - return success; -#else - return(simple_lock_try((simple_lock_t)l)); -#endif -} - -#if ETAP_LOCK_TRACE -void -simple_lock_no_trace( - simple_lock_t l) -{ - pc_t pc; - - OBTAIN_PC(pc, l); - USLDBG(usld_lock_pre(l, pc)); - while (!hw_lock_try(&l->interlock)) { - while (hw_lock_held(&l->interlock)) { - /* - * Spin watching the lock value in cache, - * without consuming external bus cycles. - * On most SMP architectures, the atomic - * instruction(s) used by hw_lock_try - * cost much, much more than an ordinary - * memory read. - */ - } - } - USLDBG(usld_lock_post(l, pc)); -} - -void -simple_unlock_no_trace( - simple_lock_t l) -{ - pc_t pc; - - OBTAIN_PC(pc, l); - USLDBG(usld_unlock(l, pc)); - hw_lock_unlock(&l->interlock); -} - -int -simple_lock_try_no_trace( - simple_lock_t l) -{ - pc_t pc; - unsigned int success; - - OBTAIN_PC(pc, l); - USLDBG(usld_lock_try_pre(l, pc)); - if (success = hw_lock_try(&l->interlock)) { - USLDBG(usld_lock_try_post(l, pc)); - } - return success; -} -#endif /* ETAP_LOCK_TRACE */ - - -#if USLOCK_DEBUG -/* - * Verify that the lock is locked and owned by - * the current thread. - */ -void -usimple_lock_held( - usimple_lock_t l) -{ - usld_lock_held(l); -} - - -/* - * Verify that no usimple_locks are held by - * this processor. Typically used in a - * trap handler when returning to user mode - * or in a path known to relinquish the processor. - */ -void -usimple_lock_none_held(void) -{ - usld_lock_none_held(); -} -#endif /* USLOCK_DEBUG */ - - -#if USLOCK_DEBUG -/* - * States of a usimple_lock. The default when initializing - * a usimple_lock is setting it up for debug checking. - */ -#define USLOCK_CHECKED 0x0001 /* lock is being checked */ -#define USLOCK_TAKEN 0x0002 /* lock has been taken */ -#define USLOCK_INIT 0xBAA0 /* lock has been initialized */ -#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) -#define USLOCK_CHECKING(l) (uslock_check && \ - ((l)->debug.state & USLOCK_CHECKED)) - -/* - * Maintain a per-cpu stack of acquired usimple_locks. - */ -void usl_stack_push(usimple_lock_t, int); -void usl_stack_pop(usimple_lock_t, int); - -/* - * Trace activities of a particularly interesting lock. - */ -void usl_trace(usimple_lock_t, int, pc_t, const char *); - - -/* - * Initialize the debugging information contained - * in a usimple_lock. - */ -void -usld_lock_init( - usimple_lock_t l, - etap_event_t type) -{ - if (l == USIMPLE_LOCK_NULL) - panic("lock initialization: null lock pointer"); - l->lock_type = USLOCK_TAG; - l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; - l->debug.lock_cpu = l->debug.unlock_cpu = 0; - l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; - l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; - l->debug.duration[0] = l->debug.duration[1] = 0; - l->debug.unlock_cpu = l->debug.unlock_cpu = 0; - l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; - l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; -} - - -/* - * These checks apply to all usimple_locks, not just - * those with USLOCK_CHECKED turned on. - */ -int -usld_lock_common_checks( - usimple_lock_t l, - char *caller) -{ - if (l == USIMPLE_LOCK_NULL) - panic("%s: null lock pointer", caller); - if (l->lock_type != USLOCK_TAG) - panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l); - if (!(l->debug.state & USLOCK_INIT)) - panic("%s: 0x%x is not an initialized lock", - caller, (integer_t) l); - return USLOCK_CHECKING(l); -} - - -/* - * Debug checks on a usimple_lock just before attempting - * to acquire it. - */ -/* ARGSUSED */ -void -usld_lock_pre( - usimple_lock_t l, - pc_t pc) -{ - char *caller = "usimple_lock"; - - -#if 0 - printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */ - l->debug.lock_pc, - l->debug.lock_thread, - l->debug.state, - l->debug.lock_cpu, - l->debug.unlock_thread, - l->debug.unlock_cpu, - l->debug.unlock_pc, - caller); -#endif - - if (!usld_lock_common_checks(l, caller)) - return; - -/* - * Note that we have a weird case where we are getting a lock when we are] - * in the process of putting the system to sleep. We are running with no - * current threads, therefore we can't tell if we are trying to retake a lock - * we have or someone on the other processor has it. Therefore we just - * ignore this test if the locking thread is 0. - */ - - if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && - l->debug.lock_thread == (void *) current_thread()) { - printf("%s: lock 0x%x already locked (at 0x%x) by", - caller, (integer_t) l, l->debug.lock_pc); - printf(" current thread 0x%x (new attempt at pc 0x%x)\n", - l->debug.lock_thread, pc); - panic(caller); - } - mp_disable_preemption(); - usl_trace(l, cpu_number(), pc, caller); - mp_enable_preemption(); -} - - -/* - * Debug checks on a usimple_lock just after acquiring it. - * - * Pre-emption has been disabled at this point, - * so we are safe in using cpu_number. - */ -void -usld_lock_post( - usimple_lock_t l, - pc_t pc) -{ - register int mycpu; - char *caller = "successful usimple_lock"; - - -#if 0 - printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */ - l->debug.lock_pc, - l->debug.lock_thread, - l->debug.state, - l->debug.lock_cpu, - l->debug.unlock_thread, - l->debug.unlock_cpu, - l->debug.unlock_pc, - caller); -#endif - - if (!usld_lock_common_checks(l, caller)) - return; - - if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) - panic("%s: lock 0x%x became uninitialized", - caller, (integer_t) l); - if ((l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x became TAKEN by someone else", - caller, (integer_t) l); - - mycpu = cpu_number(); - l->debug.lock_thread = (void *)current_thread(); - l->debug.state |= USLOCK_TAKEN; - l->debug.lock_pc = pc; - l->debug.lock_cpu = mycpu; - - usl_stack_push(l, mycpu); - usl_trace(l, mycpu, pc, caller); -} - - -/* - * Debug checks on a usimple_lock just before - * releasing it. Note that the caller has not - * yet released the hardware lock. - * - * Preemption is still disabled, so there's - * no problem using cpu_number. - */ -void -usld_unlock( - usimple_lock_t l, - pc_t pc) -{ - register int mycpu; - char *caller = "usimple_unlock"; - - -#if 0 - printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */ - l->debug.lock_pc, - l->debug.lock_thread, - l->debug.state, - l->debug.lock_cpu, - l->debug.unlock_thread, - l->debug.unlock_cpu, - l->debug.unlock_pc, - caller); -#endif - - if (!usld_lock_common_checks(l, caller)) - return; - - mycpu = cpu_number(); - - if (!(l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x hasn't been taken", - caller, (integer_t) l); - if (l->debug.lock_thread != (void *) current_thread()) - panic("%s: unlocking lock 0x%x, owned by thread 0x%x", - caller, (integer_t) l, l->debug.lock_thread); - if (l->debug.lock_cpu != mycpu) { - printf("%s: unlocking lock 0x%x on cpu 0x%x", - caller, (integer_t) l, mycpu); - printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); - panic(caller); - } - usl_trace(l, mycpu, pc, caller); - usl_stack_pop(l, mycpu); - - l->debug.unlock_thread = l->debug.lock_thread; - l->debug.lock_thread = INVALID_PC; - l->debug.state &= ~USLOCK_TAKEN; - l->debug.unlock_pc = pc; - l->debug.unlock_cpu = mycpu; -} - - -/* - * Debug checks on a usimple_lock just before - * attempting to acquire it. - * - * Preemption isn't guaranteed to be disabled. - */ -void -usld_lock_try_pre( - usimple_lock_t l, - pc_t pc) -{ - char *caller = "usimple_lock_try"; - - if (!usld_lock_common_checks(l, caller)) - return; - mp_disable_preemption(); - usl_trace(l, cpu_number(), pc, caller); - mp_enable_preemption(); -} - - -/* - * Debug checks on a usimple_lock just after - * successfully attempting to acquire it. - * - * Preemption has been disabled by the - * lock acquisition attempt, so it's safe - * to use cpu_number. - */ -void -usld_lock_try_post( - usimple_lock_t l, - pc_t pc) -{ - register int mycpu; - char *caller = "successful usimple_lock_try"; - - if (!usld_lock_common_checks(l, caller)) - return; - - if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) - panic("%s: lock 0x%x became uninitialized", - caller, (integer_t) l); - if ((l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x became TAKEN by someone else", - caller, (integer_t) l); - - mycpu = cpu_number(); - l->debug.lock_thread = (void *) current_thread(); - l->debug.state |= USLOCK_TAKEN; - l->debug.lock_pc = pc; - l->debug.lock_cpu = mycpu; - -#if 0 - printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */ - l->debug.lock_pc, - l->debug.lock_thread, - l->debug.state, - l->debug.lock_cpu, - l->debug.unlock_thread, - l->debug.unlock_cpu, - l->debug.unlock_pc, - caller); -#endif - - usl_stack_push(l, mycpu); - usl_trace(l, mycpu, pc, caller); -} - - -/* - * Determine whether the lock in question is owned - * by the current thread. - */ -void -usld_lock_held( - usimple_lock_t l) -{ - char *caller = "usimple_lock_held"; - - -#if 0 - printf("*** %08X %08X %04X %02X %08X %02X %08X - %s\n", /* (TEST/DEBUG) */ - l->debug.lock_pc, - l->debug.lock_thread, - l->debug.state, - l->debug.lock_cpu, - l->debug.unlock_thread, - l->debug.unlock_cpu, - l->debug.unlock_pc, - caller); -#endif - - if (!usld_lock_common_checks(l, caller)) - return; - - if (!(l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x hasn't been taken", - caller, (integer_t) l); - if (l->debug.lock_thread != (void *) current_thread()) - panic("%s: lock 0x%x is owned by thread 0x%x", caller, - (integer_t) l, (integer_t) l->debug.lock_thread); - - /* - * The usimple_lock is active, so preemption - * is disabled and the current cpu should - * match the one recorded at lock acquisition time. - */ - if (l->debug.lock_cpu != cpu_number()) - panic("%s: current cpu 0x%x isn't acquiring cpu 0x%x", - caller, cpu_number(), (integer_t) l->debug.lock_cpu); -} - - -/* - * Per-cpu stack of currently active usimple_locks. - * Requires spl protection so that interrupt-level - * locks plug-n-play with their thread-context friends. - */ -#define USLOCK_STACK_DEPTH 20 -usimple_lock_t uslock_stack[NCPUS][USLOCK_STACK_DEPTH]; -unsigned int uslock_stack_index[NCPUS]; -boolean_t uslock_stack_enabled = FALSE; - - -/* - * Record a usimple_lock just acquired on - * the current processor. - * - * Preemption has been disabled by lock - * acquisition, so it's safe to use the cpu number - * specified by the caller. - */ -void -usl_stack_push( - usimple_lock_t l, - int mycpu) -{ - boolean_t s; - - if (uslock_stack_enabled == FALSE) - return; - - DISABLE_INTERRUPTS(s); - assert(uslock_stack_index[mycpu] >= 0); - assert(uslock_stack_index[mycpu] < USLOCK_STACK_DEPTH); - if (uslock_stack_index[mycpu] >= USLOCK_STACK_DEPTH) { - printf("usl_stack_push (cpu 0x%x): too many locks (%d)", - mycpu, uslock_stack_index[mycpu]); - printf(" disabling stacks\n"); - uslock_stack_enabled = FALSE; - ENABLE_INTERRUPTS(s); - return; - } - uslock_stack[mycpu][uslock_stack_index[mycpu]] = l; - uslock_stack_index[mycpu]++; - ENABLE_INTERRUPTS(s); -} - - -/* - * Eliminate the entry for a usimple_lock - * that had been active on the current processor. - * - * Preemption has been disabled by lock - * acquisition, and we haven't yet actually - * released the hardware lock associated with - * this usimple_lock, so it's safe to use the - * cpu number supplied by the caller. - */ -void -usl_stack_pop( - usimple_lock_t l, - int mycpu) -{ - unsigned int i, index; - boolean_t s; - - if (uslock_stack_enabled == FALSE) - return; - - DISABLE_INTERRUPTS(s); - assert(uslock_stack_index[mycpu] > 0); - assert(uslock_stack_index[mycpu] <= USLOCK_STACK_DEPTH); - if (uslock_stack_index[mycpu] == 0) { - printf("usl_stack_pop (cpu 0x%x): not enough locks (%d)", - mycpu, uslock_stack_index[mycpu]); - printf(" disabling stacks\n"); - uslock_stack_enabled = FALSE; - ENABLE_INTERRUPTS(s); - return; - } - index = --uslock_stack_index[mycpu]; - for (i = 0; i <= index; ++i) { - if (uslock_stack[mycpu][i] == l) { - if (i != index) - uslock_stack[mycpu][i] = - uslock_stack[mycpu][index]; - ENABLE_INTERRUPTS(s); - return; - } - } - ENABLE_INTERRUPTS(s); - panic("usl_stack_pop: can't find usimple_lock 0x%x", l); -} - - -/* - * Determine whether any usimple_locks are currently held. - * - * Caller's preemption state is uncertain. If - * preemption has been disabled, this check is accurate. - * Otherwise, this check is just a guess. We do the best - * we can by disabling scheduler interrupts, so at least - * the check is accurate w.r.t. whatever cpu we're running - * on while in this routine. - */ -void -usld_lock_none_held() -{ - register int mycpu; - boolean_t s; - unsigned int locks_held; - char *caller = "usimple_lock_none_held"; - - DISABLE_INTERRUPTS(s); - mp_disable_preemption(); - mycpu = cpu_number(); - locks_held = uslock_stack_index[mycpu]; - mp_enable_preemption(); - ENABLE_INTERRUPTS(s); - if (locks_held > 0) - panic("%s: no locks should be held (0x%x locks held)", - caller, (integer_t) locks_held); -} - - -/* - * For very special cases, set traced_lock to point to a - * specific lock of interest. The result is a series of - * XPRs showing lock operations on that lock. The lock_seq - * value is used to show the order of those operations. - */ -usimple_lock_t traced_lock; -unsigned int lock_seq; - -void -usl_trace( - usimple_lock_t l, - int mycpu, - pc_t pc, - const char * op_name) -{ - if (traced_lock == l) { - XPR(XPR_SLOCK, - "seq %d, cpu %d, %s @ %x\n", - (integer_t) lock_seq, (integer_t) mycpu, - (integer_t) op_name, (integer_t) pc, 0); - lock_seq++; - } -} - - - -#if MACH_KDB -#define printf kdbprintf -void db_show_all_slocks(void); -void -db_show_all_slocks(void) -{ - unsigned int i, index; - int mycpu = cpu_number(); - usimple_lock_t l; - - if (uslock_stack_enabled == FALSE) { - printf("Lock stack not enabled\n"); - return; - } - -#if 0 - if (!mach_slocks_init) - iprintf("WARNING: simple locks stack may not be accurate\n"); -#endif - assert(uslock_stack_index[mycpu] >= 0); - assert(uslock_stack_index[mycpu] <= USLOCK_STACK_DEPTH); - index = uslock_stack_index[mycpu]; - for (i = 0; i < index; ++i) { - l = uslock_stack[mycpu][i]; - iprintf("%d: ", i); - db_printsym((vm_offset_t)l, DB_STGY_ANY); - if (l->debug.lock_pc != INVALID_PC) { - printf(" locked by "); - db_printsym((int)l->debug.lock_pc, DB_STGY_PROC); - } - printf("\n"); - } -} -#endif /* MACH_KDB */ - -#endif /* USLOCK_DEBUG */ - -/* #endif USIMPLE_LOCK_CALLS */ - -/* - * Routine: lock_alloc - * Function: - * Allocate a lock for external users who cannot - * hard-code the structure definition into their - * objects. - * For now just use kalloc, but a zone is probably - * warranted. - */ -lock_t * -lock_alloc( - boolean_t can_sleep, - etap_event_t event, - etap_event_t i_event) -{ - lock_t *l; - - if ((l = (lock_t *)kalloc(sizeof(lock_t))) != 0) - lock_init(l, can_sleep, event, i_event); - return(l); -} - -/* - * Routine: lock_free - * Function: - * Free a lock allocated for external users. - * For now just use kfree, but a zone is probably - * warranted. - */ -void -lock_free( - lock_t *l) -{ - kfree((vm_offset_t)l, sizeof(lock_t)); -} - - -/* - * Routine: lock_init - * Function: - * Initialize a lock; required before use. - * Note that clients declare the "struct lock" - * variables and then initialize them, rather - * than getting a new one from this module. - */ -void -lock_init( - lock_t *l, - boolean_t can_sleep, - etap_event_t event, - etap_event_t i_event) -{ - (void) memset((void *) l, 0, sizeof(lock_t)); - -#if ETAP_LOCK_TRACE - etap_event_table_assign(&l->u.event_table_chain, event); - l->u.s.start_list = SD_ENTRY_NULL; -#endif /* ETAP_LOCK_TRACE */ - - simple_lock_init(&l->interlock, i_event); - l->want_write = FALSE; - l->want_upgrade = FALSE; - l->read_count = 0; - l->can_sleep = can_sleep; - -#if ETAP_LOCK_ACCUMULATE - l->cbuff_write = etap_cbuff_reserve(lock_event_table(l)); - if (l->cbuff_write != CBUFF_ENTRY_NULL) { - l->cbuff_write->event = event; - l->cbuff_write->instance = (unsigned long) l; - l->cbuff_write->kind = WRITE_LOCK; - } - l->cbuff_read = CBUFF_ENTRY_NULL; -#endif /* ETAP_LOCK_ACCUMULATE */ -} - - -/* - * Sleep locks. These use the same data structure and algorithm - * as the spin locks, but the process sleeps while it is waiting - * for the lock. These work on uniprocessor systems. - */ - -#define DECREMENTER_TIMEOUT 1000000 - -void -lock_write( - register lock_t * l) -{ - register int i; - start_data_node_t entry = {0}; - boolean_t lock_miss = FALSE; - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_wait_time; - pc_t pc; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - ETAP_CREATE_ENTRY(entry, trace); - MON_ASSIGN_PC(entry->start_pc, pc, trace); - - simple_lock(&l->interlock); - - /* - * Link the new start_list entry - */ - ETAP_LINK_ENTRY(l, entry, trace); - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - - /* - * Try to acquire the want_write bit. - */ - while (l->want_write) { - if (!lock_miss) { - ETAP_CONTENTION_TIMESTAMP(entry, trace); - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - want_write"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && l->want_write) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->want_write) { - l->waiting = TRUE; - ETAP_SET_REASON(current_thread(), - BLOCKED_ON_COMPLEX_LOCK); - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - l->want_write = TRUE; - - /* Wait for readers (and upgrades) to finish */ - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while ((l->read_count != 0) || l->want_upgrade) { - if (!lock_miss) { - ETAP_CONTENTION_TIMESTAMP(entry,trace); - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait for readers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && (l->read_count != 0 || - l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { - l->waiting = TRUE; - ETAP_SET_REASON(current_thread(), - BLOCKED_ON_COMPLEX_LOCK); - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - /* - * do not collect wait data if either the lock - * was free or no wait traces are enabled. - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) { - ETAP_TIMESTAMP(stop_wait_time); - ETAP_TOTAL_TIME(total_time, - stop_wait_time, - entry->start_wait_time); - CUM_WAIT_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace); - MON_DATA_COLLECT(l, - entry, - total_time, - WRITE_LOCK, - MON_CONTENTION, - trace); - } - - simple_unlock(&l->interlock); - - /* - * Set start hold time if some type of hold tracing is enabled. - * - * Note: if the stop_wait_time was already stamped, use - * it as the start_hold_time instead of doing an - * expensive bus access. - * - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) - ETAP_COPY_START_HOLD_TIME(entry, stop_wait_time, trace); - else - ETAP_DURATION_TIMESTAMP(entry, trace); - -} - -void -lock_done( - register lock_t * l) -{ - boolean_t do_wakeup = FALSE; - start_data_node_t entry; - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t stop_hold_time; - etap_time_t total_time; - unsigned long lock_kind; - pc_t pc; - - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - simple_lock(&l->interlock); - - if (l->read_count != 0) { - l->read_count--; - lock_kind = READ_LOCK; - } - else - if (l->want_upgrade) { - l->want_upgrade = FALSE; - lock_kind = WRITE_LOCK; - } - else { - l->want_write = FALSE; - lock_kind = WRITE_LOCK; - } - - /* - * There is no reason to wakeup a waiting thread - * if the read-count is non-zero. Consider: - * we must be dropping a read lock - * threads are waiting only if one wants a write lock - * if there are still readers, they can't proceed - */ - - if (l->waiting && (l->read_count == 0)) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - /* - * Collect hold data if hold tracing is - * enabled. - */ - - /* - * NOTE: All complex locks whose tracing was on when the - * lock was acquired will have an entry in the start_data - * list. - */ - - ETAP_UNLINK_ENTRY(l,entry); - if (ETAP_DURATION_ENABLED(trace) && entry != SD_ENTRY_NULL) { - ETAP_TIMESTAMP (stop_hold_time); - ETAP_TOTAL_TIME (total_time, - stop_hold_time, - entry->start_hold_time); - - if (lock_kind & WRITE_LOCK) - CUM_HOLD_ACCUMULATE (l->cbuff_write, - total_time, - dynamic, - trace); - else { - CUM_READ_ENTRY_RESERVE(l,l->cbuff_read,trace); - CUM_HOLD_ACCUMULATE (l->cbuff_read, - total_time, - dynamic, - trace); - } - MON_ASSIGN_PC(entry->end_pc,pc,trace); - MON_DATA_COLLECT(l,entry, - total_time, - lock_kind, - MON_DURATION, - trace); - } - - simple_unlock(&l->interlock); - - ETAP_DESTROY_ENTRY(entry); - - if (do_wakeup) - thread_wakeup((event_t) l); -} - -void -lock_read( - register lock_t * l) -{ - register int i; - start_data_node_t entry = {0}; - boolean_t lock_miss = FALSE; - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_wait_time; - pc_t pc; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - ETAP_CREATE_ENTRY(entry, trace); - MON_ASSIGN_PC(entry->start_pc, pc, trace); - - simple_lock(&l->interlock); - - /* - * Link the new start_list entry - */ - ETAP_LINK_ENTRY(l,entry,trace); - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while (l->want_write || l->want_upgrade) { - if (!lock_miss) { - ETAP_CONTENTION_TIMESTAMP(entry, trace); - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait no writers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && (l->want_write || l->want_upgrade)) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && (l->want_write || l->want_upgrade)) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - l->read_count++; - - /* - * Do not collect wait data if the lock was free - * or if no wait traces are enabled. - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) { - ETAP_TIMESTAMP(stop_wait_time); - ETAP_TOTAL_TIME(total_time, - stop_wait_time, - entry->start_wait_time); - CUM_READ_ENTRY_RESERVE(l, l->cbuff_read, trace); - CUM_WAIT_ACCUMULATE(l->cbuff_read, total_time, dynamic, trace); - MON_DATA_COLLECT(l, - entry, - total_time, - READ_LOCK, - MON_CONTENTION, - trace); - } - simple_unlock(&l->interlock); - - /* - * Set start hold time if some type of hold tracing is enabled. - * - * Note: if the stop_wait_time was already stamped, use - * it instead of doing an expensive bus access. - * - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) - ETAP_COPY_START_HOLD_TIME(entry, stop_wait_time, trace); - else - ETAP_DURATION_TIMESTAMP(entry,trace); -} - - -/* - * Routine: lock_read_to_write - * Function: - * Improves a read-only lock to one with - * write permission. If another reader has - * already requested an upgrade to a write lock, - * no lock is held upon return. - * - * Returns TRUE if the upgrade *failed*. - */ - -boolean_t -lock_read_to_write( - register lock_t * l) -{ - register int i; - boolean_t do_wakeup = FALSE; - start_data_node_t entry = {0}; - boolean_t lock_miss = FALSE; - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_time; - pc_t pc; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - simple_lock(&l->interlock); - - l->read_count--; - - /* - * Since the read lock is lost whether the write lock - * is acquired or not, read hold data is collected here. - * This, of course, is assuming some type of hold - * tracing is enabled. - * - * Note: trace is set to zero if the entry does not exist. - */ - - ETAP_FIND_ENTRY(l, entry, trace); - - if (ETAP_DURATION_ENABLED(trace)) { - ETAP_TIMESTAMP(stop_time); - ETAP_TOTAL_TIME(total_time, stop_time, entry->start_hold_time); - CUM_HOLD_ACCUMULATE(l->cbuff_read, total_time, dynamic, trace); - MON_ASSIGN_PC(entry->end_pc, pc, trace); - MON_DATA_COLLECT(l, - entry, - total_time, - READ_LOCK, - MON_DURATION, - trace); - } - - if (l->want_upgrade) { - /* - * Someone else has requested upgrade. - * Since we've released a read lock, wake - * him up. - */ - if (l->waiting && (l->read_count == 0)) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - ETAP_UNLINK_ENTRY(l, entry); - simple_unlock(&l->interlock); - ETAP_DESTROY_ENTRY(entry); - - if (do_wakeup) - thread_wakeup((event_t) l); - return (TRUE); - } - - l->want_upgrade = TRUE; - - MON_ASSIGN_PC(entry->start_pc, pc, trace); - -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while (l->read_count != 0) { - if (!lock_miss) { - ETAP_CONTENTION_TIMESTAMP(entry, trace); - lock_miss = TRUE; - } - - i = lock_wait_time[l->can_sleep ? 1 : 0]; - - if (i != 0) { - simple_unlock(&l->interlock); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - read_count"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && l->read_count != 0) - continue; - simple_lock(&l->interlock); - } - - if (l->can_sleep && l->read_count != 0) { - l->waiting = TRUE; - thread_sleep_simple_lock((event_t) l, - simple_lock_addr(l->interlock), - THREAD_UNINT); - /* interlock relocked */ - } - } - - /* - * do not collect wait data if the lock was free - * or if no wait traces are enabled. - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) { - ETAP_TIMESTAMP (stop_time); - ETAP_TOTAL_TIME(total_time, stop_time, entry->start_wait_time); - CUM_WAIT_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace); - MON_DATA_COLLECT(l, - entry, - total_time, - WRITE_LOCK, - MON_CONTENTION, - trace); - } - - simple_unlock(&l->interlock); - - /* - * Set start hold time if some type of hold tracing is enabled - * - * Note: if the stop_time was already stamped, use - * it as the new start_hold_time instead of doing - * an expensive VME access. - * - */ - - if (lock_miss && ETAP_CONTENTION_ENABLED(trace)) - ETAP_COPY_START_HOLD_TIME(entry, stop_time, trace); - else - ETAP_DURATION_TIMESTAMP(entry, trace); - - return (FALSE); -} - -void -lock_write_to_read( - register lock_t * l) -{ - boolean_t do_wakeup = FALSE; - start_data_node_t entry = {0}; - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t stop_hold_time; - etap_time_t total_time; - pc_t pc; - - ETAP_STAMP(lock_event_table(l), trace,dynamic); - - simple_lock(&l->interlock); - - l->read_count++; - if (l->want_upgrade) - l->want_upgrade = FALSE; - else - l->want_write = FALSE; - - if (l->waiting) { - l->waiting = FALSE; - do_wakeup = TRUE; - } - - /* - * Since we are switching from a write lock to a read lock, - * the write lock data is stored and the read lock data - * collection begins. - * - * Note: trace is set to zero if the entry does not exist. - */ - - ETAP_FIND_ENTRY(l, entry, trace); - - if (ETAP_DURATION_ENABLED(trace)) { - ETAP_TIMESTAMP (stop_hold_time); - ETAP_TOTAL_TIME(total_time, stop_hold_time, entry->start_hold_time); - CUM_HOLD_ACCUMULATE(l->cbuff_write, total_time, dynamic, trace); - MON_ASSIGN_PC(entry->end_pc, pc, trace); - MON_DATA_COLLECT(l, - entry, - total_time, - WRITE_LOCK, - MON_DURATION, - trace); - } - - simple_unlock(&l->interlock); - - /* - * Set start hold time if some type of hold tracing is enabled - * - * Note: if the stop_hold_time was already stamped, use - * it as the new start_hold_time instead of doing - * an expensive bus access. - * - */ - - if (ETAP_DURATION_ENABLED(trace)) - ETAP_COPY_START_HOLD_TIME(entry, stop_hold_time, trace); - else - ETAP_DURATION_TIMESTAMP(entry, trace); - - MON_ASSIGN_PC(entry->start_pc, pc, trace); - - if (do_wakeup) - thread_wakeup((event_t) l); -} - - -#if 0 /* Unused */ -/* - * Routine: lock_try_write - * Function: - * Tries to get a write lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lock_try_write( - register lock_t * l) -{ - start_data_node_t entry = {0}; - unsigned short trace = 0; - pc_t pc; - - ETAP_STAMP(lock_event_table(l), trace, trace); - ETAP_CREATE_ENTRY(entry, trace); - - simple_lock(&l->interlock); - - if (l->want_write || l->want_upgrade || l->read_count) { - /* - * Can't get lock. - */ - simple_unlock(&l->interlock); - ETAP_DESTROY_ENTRY(entry); - return(FALSE); - } - - /* - * Have lock. - */ - - l->want_write = TRUE; - - ETAP_LINK_ENTRY(l, entry, trace); - - simple_unlock(&l->interlock); - - MON_ASSIGN_PC(entry->start_pc, pc, trace); - ETAP_DURATION_TIMESTAMP(entry, trace); - - return(TRUE); -} - -/* - * Routine: lock_try_read - * Function: - * Tries to get a read lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lock_try_read( - register lock_t * l) -{ - start_data_node_t entry = {0}; - unsigned short trace = 0; - pc_t pc; - - ETAP_STAMP(lock_event_table(l), trace, trace); - ETAP_CREATE_ENTRY(entry, trace); - - simple_lock(&l->interlock); - - if (l->want_write || l->want_upgrade) { - simple_unlock(&l->interlock); - ETAP_DESTROY_ENTRY(entry); - return(FALSE); - } - - l->read_count++; - - ETAP_LINK_ENTRY(l, entry, trace); - - simple_unlock(&l->interlock); - - MON_ASSIGN_PC(entry->start_pc, pc, trace); - ETAP_DURATION_TIMESTAMP(entry, trace); - - return(TRUE); -} -#endif /* Unused */ - -#if MACH_KDB - -void db_show_one_lock(lock_t *); - - -void -db_show_one_lock( - lock_t *lock) -{ - db_printf("Read_count = 0x%x, %swant_upgrade, %swant_write, ", - lock->read_count, - lock->want_upgrade ? "" : "!", - lock->want_write ? "" : "!"); - db_printf("%swaiting, %scan_sleep\n", - lock->waiting ? "" : "!", lock->can_sleep ? "" : "!"); - db_printf("Interlock:\n"); - db_show_one_simple_lock((db_expr_t)simple_lock_addr(lock->interlock), - TRUE, (db_expr_t)0, (char *)0); -} -#endif /* MACH_KDB */ - -/* - * The C portion of the mutex package. These routines are only invoked - * if the optimized assembler routines can't do the work. - */ - -/* - * Routine: lock_alloc - * Function: - * Allocate a mutex for external users who cannot - * hard-code the structure definition into their - * objects. - * For now just use kalloc, but a zone is probably - * warranted. - */ -mutex_t * -mutex_alloc( - etap_event_t event) -{ - mutex_t *m; - - if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0) - mutex_init(m, event); - return(m); -} - -/* - * Routine: mutex_free - * Function: - * Free a mutex allocated for external users. - * For now just use kfree, but a zone is probably - * warranted. - */ -void -mutex_free( - mutex_t *m) -{ - kfree((vm_offset_t)m, sizeof(mutex_t)); -} - -/* - * mutex_lock_wait - * - * Invoked in order to wait on contention. - * - * Called with the interlock locked and - * returns it unlocked. - */ -void -mutex_lock_wait ( - mutex_t *mutex, - thread_t holder) -{ - thread_t self = current_thread(); -#if !defined(i386) - integer_t priority; - spl_t s = splsched(); - - priority = self->sched_pri; - if (priority < self->priority) - priority = self->priority; - if (priority > MINPRI_KERNEL) - priority = MINPRI_KERNEL; - else - if (priority < BASEPRI_DEFAULT) - priority = BASEPRI_DEFAULT; - - assert(holder->thread == holder); /* XXX */ - thread_lock(holder); - if (mutex->promoted_pri == 0) - holder->promotions++; - if (holder->priority < MINPRI_KERNEL) { - holder->sched_mode |= TH_MODE_PROMOTED; - if ( mutex->promoted_pri < priority && - holder->sched_pri < priority ) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, - holder->sched_pri, priority, (int)holder, (int)mutex, 0); - - set_sched_pri(holder, priority); - } - } - thread_unlock(holder); - splx(s); - - if (mutex->promoted_pri < priority) - mutex->promoted_pri = priority; -#endif - - if (self->pending_promoter[self->pending_promoter_index] == NULL) { - self->pending_promoter[self->pending_promoter_index] = mutex; - mutex->waiters++; - } - else - if (self->pending_promoter[self->pending_promoter_index] != mutex) { - self->pending_promoter[++self->pending_promoter_index] = mutex; - mutex->waiters++; - } - - assert_wait(mutex, THREAD_UNINT); - interlock_unlock(&mutex->interlock); - - thread_block(THREAD_CONTINUE_NULL); -} - -/* - * mutex_lock_acquire - * - * Invoked on acquiring the mutex when there is - * contention. - * - * Returns the current number of waiters. - * - * Called with the interlock locked. - */ -int -mutex_lock_acquire( - mutex_t *mutex) -{ - thread_t thread = current_thread(); - - if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { - thread->pending_promoter[thread->pending_promoter_index] = NULL; - if (thread->pending_promoter_index > 0) - thread->pending_promoter_index--; - mutex->waiters--; - } - -#if !defined(i386) - if (mutex->waiters > 0) { - integer_t priority = mutex->promoted_pri; - spl_t s = splsched(); - - thread_lock(thread); - thread->promotions++; - if (thread->priority < MINPRI_KERNEL) { - thread->sched_mode |= TH_MODE_PROMOTED; - if (thread->sched_pri < priority) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, - thread->sched_pri, priority, 0, (int)mutex, 0); - - set_sched_pri(thread, priority); - } - } - thread_unlock(thread); - splx(s); - } - else - mutex->promoted_pri = 0; -#endif - - return (mutex->waiters); -} - -/* - * mutex_unlock_wakeup - * - * Invoked on unlock when there is contention. - * - * Called with the interlock locked. - */ -void -mutex_unlock_wakeup ( - mutex_t *mutex, - thread_t holder) -{ -#if !defined(i386) - thread_t thread = current_thread(); - - if (thread->top_act != holder) - panic("mutex_unlock_wakeup: mutex %x holder %x\n", mutex, holder); - - if (thread->promotions > 0) { - spl_t s = splsched(); - - thread_lock(thread); - if ( --thread->promotions == 0 && - (thread->sched_mode & TH_MODE_PROMOTED) ) { - thread->sched_mode &= ~TH_MODE_PROMOTED; - if (thread->sched_mode & TH_MODE_ISDEPRESSED) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, - thread->sched_pri, DEPRESSPRI, 0, (int)mutex, 0); - - set_sched_pri(thread, DEPRESSPRI); - } - else { - if (thread->priority < thread->sched_pri) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | - DBG_FUNC_NONE, - thread->sched_pri, thread->priority, - 0, (int)mutex, 0); - } - - compute_priority(thread, FALSE); - } - } - thread_unlock(thread); - splx(s); - } -#endif - - assert(mutex->waiters > 0); - thread_wakeup_one(mutex); -} - -boolean_t -mutex_preblock_wait( - mutex_t *mutex, - thread_t thread, - thread_t holder) -{ - wait_result_t wresult; - integer_t priority; - wait_queue_t wq; - - assert(holder == NULL || holder->thread == holder); - - wq = wait_event_wait_queue((event_t)mutex); - if (!wait_queue_lock_try(wq)) - return (FALSE); - - if (holder != NULL && !thread_lock_try(holder)) { - wait_queue_unlock(wq); - return (FALSE); - } - - wresult = wait_queue_assert_wait64_locked(wq, (uint32_t)mutex, - THREAD_UNINT, thread); - wait_queue_unlock(wq); - assert(wresult == THREAD_WAITING); - - priority = thread->sched_pri; - if (priority < thread->priority) - priority = thread->priority; - if (priority > MINPRI_KERNEL) - priority = MINPRI_KERNEL; - else - if (priority < BASEPRI_DEFAULT) - priority = BASEPRI_DEFAULT; - - if (holder != NULL) { - if (mutex->promoted_pri == 0) - holder->promotions++; - if (holder->priority < MINPRI_KERNEL) { - holder->sched_mode |= TH_MODE_PROMOTED; - if ( mutex->promoted_pri < priority && - holder->sched_pri < priority ) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, - holder->sched_pri, priority, - (int)holder, (int)mutex, 0); - - set_sched_pri(holder, priority); - } - } - thread_unlock(holder); - } - - if (mutex->promoted_pri < priority) - mutex->promoted_pri = priority; - - if (thread->pending_promoter[thread->pending_promoter_index] == NULL) { - thread->pending_promoter[thread->pending_promoter_index] = mutex; - mutex->waiters++; - } - else - if (thread->pending_promoter[thread->pending_promoter_index] != mutex) { - thread->pending_promoter[++thread->pending_promoter_index] = mutex; - mutex->waiters++; - } - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_PREBLOCK_MUTEX) | DBG_FUNC_NONE, - (int)thread, thread->sched_pri, (int)mutex, 0, 0); - - return (TRUE); -} - -/* - * mutex_pause: Called by former callers of simple_lock_pause(). - */ - -void -mutex_pause(void) -{ - wait_result_t wait_result; - - wait_result = assert_wait_timeout( 1, THREAD_UNINT); - assert(wait_result == THREAD_WAITING); - - ETAP_SET_REASON(current_thread(), BLOCKED_ON_MUTEX_LOCK); - - wait_result = thread_block(THREAD_CONTINUE_NULL); - assert(wait_result == THREAD_TIMED_OUT); -} - -#if MACH_KDB -/* - * Routines to print out simple_locks and mutexes in a nicely-formatted - * fashion. - */ - -char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; -char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; - -void -db_show_one_simple_lock ( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif) -{ - simple_lock_t saddr = (simple_lock_t)addr; - - if (saddr == (simple_lock_t)0 || !have_addr) { - db_error ("No simple_lock\n"); - } -#if USLOCK_DEBUG - else if (saddr->lock_type != USLOCK_TAG) - db_error ("Not a simple_lock\n"); -#endif /* USLOCK_DEBUG */ - - db_printf ("%s\n", simple_lock_labels); - db_print_simple_lock (saddr); -} - -void -db_print_simple_lock ( - simple_lock_t addr) -{ - - db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock)); -#if USLOCK_DEBUG - db_printf (" %08x", addr->debug.lock_thread); - db_printf (" %08x ", addr->debug.duration[1]); - db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY); -#endif /* USLOCK_DEBUG */ - db_printf ("\n"); -} - -void -db_show_one_mutex ( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif) -{ - mutex_t * maddr = (mutex_t *)addr; - - if (maddr == (mutex_t *)0 || !have_addr) - db_error ("No mutex\n"); -#if MACH_LDEBUG - else if (maddr->type != MUTEX_TAG) - db_error ("Not a mutex\n"); -#endif /* MACH_LDEBUG */ - - db_printf ("%s\n", mutex_labels); - db_print_mutex (maddr); -} - -void -db_print_mutex ( - mutex_t * addr) -{ - db_printf ("%08x %6d %7d", - addr, *hw_lock_addr(addr->locked), addr->waiters); -#if MACH_LDEBUG - db_printf (" %08x ", addr->thread); - db_printsym (addr->pc, DB_STGY_ANY); -#endif /* MACH_LDEBUG */ - db_printf ("\n"); -} -#endif /* MACH_KDB */ - -#if MACH_LDEBUG -extern void meter_simple_lock ( - simple_lock_t l); -extern void meter_simple_unlock ( - simple_lock_t l); -extern void cyctm05_stamp ( - unsigned long * start); -extern void cyctm05_diff ( - unsigned long * start, - unsigned long * end, - unsigned long * diff); - -#if 0 -simple_lock_data_t loser; -#endif - -void -meter_simple_lock( - simple_lock_t lp) -{ -#if 0 - cyctm05_stamp (lp->duration); -#endif -} - -int long_simple_lock_crash; -int long_simple_lock_time = 0x600; -/* - * This is pretty gawd-awful. XXX - */ -decl_simple_lock_data(extern,kd_tty) - -void -meter_simple_unlock( - simple_lock_t lp) -{ -#if 0 - unsigned long stime[2], etime[2], delta[2]; - - if (lp == &kd_tty) /* XXX */ - return; /* XXX */ - - stime[0] = lp->duration[0]; - stime[1] = lp->duration[1]; - - cyctm05_stamp (etime); - - if (etime[1] < stime[1]) /* XXX */ - return; /* XXX */ - - cyctm05_diff (stime, etime, delta); - - if (delta[1] >= 0x10000) /* XXX */ - return; /* XXX */ - - lp->duration[0] = delta[0]; - lp->duration[1] = delta[1]; - - if (loser.duration[1] < lp->duration[1]) - loser = *lp; - - assert (!long_simple_lock_crash || delta[1] < long_simple_lock_time); -#endif -} -#endif /* MACH_LDEBUG */ - - -#if ETAP_LOCK_TRACE - -/* - * ============================================================== - * ETAP hook when initializing a usimple_lock. May be invoked - * from the portable lock package or from an optimized machine- - * dependent implementation. - * ============================================================== - */ - -void -etap_simplelock_init ( - simple_lock_t l, - etap_event_t event) -{ - ETAP_CLEAR_TRACE_DATA(l); - etap_event_table_assign(&l->u.event_table_chain, event); - -#if ETAP_LOCK_ACCUMULATE - /* reserve an entry in the cumulative buffer */ - l->cbuff_entry = etap_cbuff_reserve(lock_event_table(l)); - /* initialize the entry if one was returned */ - if (l->cbuff_entry != CBUFF_ENTRY_NULL) { - l->cbuff_entry->event = event; - l->cbuff_entry->instance = (unsigned long) l; - l->cbuff_entry->kind = SPIN_LOCK; - } -#endif /* ETAP_LOCK_ACCUMULATE */ -} - - -void -etap_simplelock_unlock( - simple_lock_t l) -{ - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_hold_time; - pc_t pc; - - OBTAIN_PC(pc, l); - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - /* - * Calculate & collect hold time data only if - * the hold tracing was enabled throughout the - * whole operation. This prevents collection of - * bogus data caused by mid-operation trace changes. - * - */ - - if (ETAP_DURATION_ENABLED(trace) && ETAP_WHOLE_OP(l)) { - ETAP_TIMESTAMP (stop_hold_time); - ETAP_TOTAL_TIME(total_time, stop_hold_time, - l->u.s.start_hold_time); - CUM_HOLD_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace); - MON_ASSIGN_PC(l->end_pc, pc, trace); - MON_DATA_COLLECT(l, - l, - total_time, - SPIN_LOCK, - MON_DURATION, - trace); - } - ETAP_CLEAR_TRACE_DATA(l); -} - -/* ======================================================================== - * Since the the simple_lock() routine is machine dependant, it must always - * be coded in assembly. The two hook routines below are used to collect - * lock_stat data. - * ======================================================================== - */ - -/* - * ROUTINE: etap_simplelock_miss() - * - * FUNCTION: This spin lock routine is called upon the first - * spin (miss) of the lock. - * - * A timestamp is taken at the beginning of the wait period, - * if wait tracing is enabled. - * - * - * PARAMETERS: - * - lock address. - * - timestamp address. - * - * RETURNS: Wait timestamp value. The timestamp value is later used - * by etap_simplelock_hold(). - * - * NOTES: This routine is NOT ALWAYS called. The lock may be free - * (never spinning). For this reason the pc is collected in - * etap_simplelock_hold(). - * - */ -etap_time_t -etap_simplelock_miss ( - simple_lock_t l) - -{ - unsigned short trace = 0; - unsigned short dynamic = 0; - etap_time_t start_miss_time; - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - if (trace & ETAP_CONTENTION) - ETAP_TIMESTAMP(start_miss_time); - - return(start_miss_time); -} - -/* - * ROUTINE: etap_simplelock_hold() - * - * FUNCTION: This spin lock routine is ALWAYS called once the lock - * is acquired. Here, the contention time is calculated and - * the start hold time is stamped. - * - * PARAMETERS: - * - lock address. - * - PC of the calling function. - * - start wait timestamp. - * - */ - -void -etap_simplelock_hold ( - simple_lock_t l, - pc_t pc, - etap_time_t start_hold_time) -{ - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_hold_time; - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - MON_ASSIGN_PC(l->start_pc, pc, trace); - - /* do not collect wait data if lock was free */ - if (ETAP_TIME_IS_ZERO(start_hold_time) && (trace & ETAP_CONTENTION)) { - ETAP_TIMESTAMP(stop_hold_time); - ETAP_TOTAL_TIME(total_time, - stop_hold_time, - start_hold_time); - CUM_WAIT_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace); - MON_DATA_COLLECT(l, - l, - total_time, - SPIN_LOCK, - MON_CONTENTION, - trace); - ETAP_COPY_START_HOLD_TIME(&l->u.s, stop_hold_time, trace); - } - else - ETAP_DURATION_TIMESTAMP(&l->u.s, trace); -} - -void -etap_mutex_init ( - mutex_t *l, - etap_event_t event) -{ - ETAP_CLEAR_TRACE_DATA(l); - etap_event_table_assign(&l->u.event_table_chain, event); - -#if ETAP_LOCK_ACCUMULATE - /* reserve an entry in the cumulative buffer */ - l->cbuff_entry = etap_cbuff_reserve(lock_event_table(l)); - /* initialize the entry if one was returned */ - if (l->cbuff_entry != CBUFF_ENTRY_NULL) { - l->cbuff_entry->event = event; - l->cbuff_entry->instance = (unsigned long) l; - l->cbuff_entry->kind = MUTEX_LOCK; - } -#endif /* ETAP_LOCK_ACCUMULATE */ -} - -etap_time_t -etap_mutex_miss ( - mutex_t *l) -{ - unsigned short trace = 0; - unsigned short dynamic = 0; - etap_time_t start_miss_time; - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - if (trace & ETAP_CONTENTION) - ETAP_TIMESTAMP(start_miss_time); - else - ETAP_TIME_CLEAR(start_miss_time); - - return(start_miss_time); -} - -void -etap_mutex_hold ( - mutex_t *l, - pc_t pc, - etap_time_t start_hold_time) -{ - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_hold_time; - - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - MON_ASSIGN_PC(l->start_pc, pc, trace); - - /* do not collect wait data if lock was free */ - if (!ETAP_TIME_IS_ZERO(start_hold_time) && (trace & ETAP_CONTENTION)) { - ETAP_TIMESTAMP(stop_hold_time); - ETAP_TOTAL_TIME(total_time, - stop_hold_time, - start_hold_time); - CUM_WAIT_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace); - MON_DATA_COLLECT(l, - l, - total_time, - MUTEX_LOCK, - MON_CONTENTION, - trace); - ETAP_COPY_START_HOLD_TIME(&l->u.s, stop_hold_time, trace); - } - else - ETAP_DURATION_TIMESTAMP(&l->u.s, trace); -} - -void -etap_mutex_unlock( - mutex_t *l) -{ - unsigned short dynamic = 0; - unsigned short trace = 0; - etap_time_t total_time; - etap_time_t stop_hold_time; - pc_t pc; - - OBTAIN_PC(pc, l); - ETAP_STAMP(lock_event_table(l), trace, dynamic); - - /* - * Calculate & collect hold time data only if - * the hold tracing was enabled throughout the - * whole operation. This prevents collection of - * bogus data caused by mid-operation trace changes. - * - */ - - if (ETAP_DURATION_ENABLED(trace) && ETAP_WHOLE_OP(l)) { - ETAP_TIMESTAMP(stop_hold_time); - ETAP_TOTAL_TIME(total_time, stop_hold_time, - l->u.s.start_hold_time); - CUM_HOLD_ACCUMULATE(l->cbuff_entry, total_time, dynamic, trace); - MON_ASSIGN_PC(l->end_pc, pc, trace); - MON_DATA_COLLECT(l, - l, - total_time, - MUTEX_LOCK, - MON_DURATION, - trace); - } - ETAP_CLEAR_TRACE_DATA(l); -} - -#endif /* ETAP_LOCK_TRACE */ diff --git a/osfmk/kern/lock.h b/osfmk/kern/lock.h index cbb9cdb6b..4cb10d325 100644 --- a/osfmk/kern/lock.h +++ b/osfmk/kern/lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,260 +55,133 @@ * Higher Level Locking primitives definitions */ +#ifdef KERNEL_PRIVATE + #ifndef _KERN_LOCK_H_ #define _KERN_LOCK_H_ -/* - * Configuration variables: - * - * - * MACH_LDEBUG: record pc and thread of callers, turn on - * all lock debugging. - * - * - * ETAP: The Event Trace Analysis Package (ETAP) monitors - * and records micro-kernel lock behavior and general - * kernel events. ETAP supports two levels of - * tracing for locks: - * - cumulative (ETAP_LOCK_ACCUMULATE) - * - monitored (ETAP_LOCK_MONITOR) - * - * Note: If either level of tracing is configured then - * ETAP_LOCK_TRACE is automatically defined to - * equal one. - * - * Several macros are added throughout the lock code to - * allow for convenient configuration. - */ - #include #include -#include -#include +#include -/* - * The Mach lock package exports the following high-level - * lock abstractions: - * - * Lock Type Properties - * mutex blocking mutual exclusion lock, intended for - * SMP synchronization (vanishes on a uniprocessor); - * supports debugging, statistics, and pre-emption - * lock blocking synchronization permitting multiple - * simultaneous readers or a single writer; supports - * debugging and statistics but not pre-emption - * - * In general, mutex locks are preferred over all others, as the - * mutex supports pre-emption and relinquishes the processor - * upon contention. - * - */ - -#include +__BEGIN_DECLS -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE +#ifndef MACH_KERNEL_PRIVATE -/* - * A simple mutex lock. - * Do not change the order of the fields in this structure without - * changing the machine-dependent assembler routines which depend - * on them. - */ +typedef struct __mutex__ mutex_t; -#include -#include -#include - -typedef struct { - hw_lock_data_t interlock; - hw_lock_data_t locked; - uint16_t waiters; - uint16_t promoted_pri; -#if MACH_LDEBUG - int type; -#define MUTEX_TAG 0x4d4d - vm_offset_t pc; - vm_offset_t thread; -#endif /* MACH_LDEBUG */ -#if ETAP_LOCK_TRACE - union { /* Must be overlaid on the event_tablep */ - struct event_table_chain event_table_chain; - struct { - event_table_t event_tablep; /* ptr to event table entry */ - etap_time_t start_hold_time; /* Time of last acquistion */ - } s; - } u; -#endif /* ETAP_LOCK_TRACE */ -#if ETAP_LOCK_ACCUMULATE - cbuff_entry_t cbuff_entry; /* cumulative buffer entry */ -#endif /* ETAP_LOCK_ACCUMULATE */ -#if ETAP_LOCK_MONITOR - vm_offset_t start_pc; /* pc where lock operation began */ - vm_offset_t end_pc; /* pc where lock operation ended */ -#endif /* ETAP_LOCK_MONITOR */ -} mutex_t; +#else /* MACH_KERNEL_PRIVATE */ #define decl_mutex_data(class,name) class mutex_t name; #define mutex_addr(m) (&(m)) -extern void mutex_init( - mutex_t *mutex, - etap_event_t tag); - -extern void mutex_lock_wait( - mutex_t *mutex, - thread_t holder); - -extern int mutex_lock_acquire( - mutex_t *mutex); - -extern void mutex_unlock_wakeup( - mutex_t *mutex, - thread_t holder); - -extern boolean_t mutex_preblock( - mutex_t *mutex, - thread_t thread); - -extern boolean_t mutex_preblock_wait( - mutex_t *mutex, - thread_t thread, - thread_t holder); - -extern void interlock_unlock( - hw_lock_t lock); +extern void mutex_init( + mutex_t *mutex, + unsigned short tag); #endif /* MACH_KERNEL_PRIVATE */ -extern void mutex_pause(void); +extern mutex_t *mutex_alloc( + unsigned short tag); -#endif /* __APPLE_API_PRIVATE */ - -#if !defined(MACH_KERNEL_PRIVATE) - -typedef struct __mutex__ mutex_t; - -#endif /* MACH_KERNEL_PRIVATE */ - -extern mutex_t *mutex_alloc( - etap_event_t tag); - -extern void mutex_free( - mutex_t *mutex); +extern void mutex_free( + mutex_t *mutex); -extern void mutex_lock( - mutex_t *mutex); +extern void mutex_lock( + mutex_t *mutex); -extern void mutex_unlock( - mutex_t *mutex); +extern void mutex_unlock( + mutex_t *mutex); extern boolean_t mutex_try( mutex_t *mutex); -#ifdef __APPLE_API_PRIVATE +extern void mutex_pause(void); -#ifdef MACH_KERNEL_PRIVATE +#define MA_OWNED 0x01 +#define MA_NOTOWNED 0x02 + +void _mutex_assert ( + mutex_t *mutex, + unsigned int what); -/* - * The general lock structure. Provides for multiple readers, - * upgrading from read to write, and sleeping until the lock - * can be gained. - * - * On some architectures, assembly language code in the 'inline' - * program fiddles the lock structures. It must be changed in - * concert with the structure layout. - * - * Only the "interlock" field is used for hardware exclusion; - * other fields are modified with normal instructions after - * acquiring the interlock bit. - */ +#define mutex_assert(a, b) _mutex_assert(a, b) -typedef struct { - decl_simple_lock_data(,interlock) /* "hardware" interlock field */ - volatile unsigned int - read_count:16, /* No. of accepted readers */ - want_upgrade:1, /* Read-to-write upgrade waiting */ - want_write:1, /* Writer is waiting, or - locked for write */ - waiting:1, /* Someone is sleeping on lock */ - can_sleep:1; /* Can attempts to lock go to sleep? */ -#if ETAP_LOCK_TRACE - union { /* Must be overlaid on the event_tablep */ - struct event_table_chain event_table_chain; - struct { - event_table_t event_tablep; /* ptr to event table entry */ - start_data_node_t start_list; /* linked list of start times - and pcs */ - } s; - } u; -#endif /* ETAP_LOCK_TRACE */ -#if ETAP_LOCK_ACCUMULATE - cbuff_entry_t cbuff_write; /* write cumulative buffer entry */ - cbuff_entry_t cbuff_read; /* read cumulative buffer entry */ -#endif /* ETAP_LOCK_ACCUMULATE */ -} lock_t; - -/* Sleep locks must work even if no multiprocessing */ +#ifndef MACH_KERNEL_PRIVATE -/* - * Complex lock operations - */ +typedef struct __lock__ lock_t; -#if ETAP -/* - * Locks have a pointer into an event_table entry that names the - * corresponding lock event and controls whether it is being traced. - * Initially this pointer is into a read-only table event_table_init[]. - * Once dynamic allocation becomes possible a modifiable copy of the table - * is allocated and pointers are set to within this copy. The pointers - * that were already in place at that point need to be switched to point - * into the copy. To do this we overlay the event_table_chain structure - * onto sufficiently-big elements of the various lock structures so we - * can sweep down this list switching the pointers. The assumption is - * that we will not want to enable tracing before this is done (which is - * after all during kernel bootstrap, before any user tasks are launched). - * - * This is admittedly rather ugly but so were the alternatives: - * - record the event_table pointers in a statically-allocated array - * (dynamic allocation not yet being available) -- but there were - * over 8000 of them; - * - add a new link field to each lock structure; - * - change pointers to array indices -- this adds quite a bit of - * arithmetic to every lock operation that might be traced. - */ -#define lock_event_table(lockp) ((lockp)->u.s.event_tablep) -#define lock_start_hold_time(lockp) ((lockp)->u.s.start_hold_time) -#endif /* ETAP_LOCK_TRACE */ +#else /* MACH_KERNEL_PRIVATE */ -extern void lock_init (lock_t*, - boolean_t, - etap_event_t, - etap_event_t); +extern void lock_init( + lock_t *lock, + boolean_t can_sleep, + unsigned short tag0, + unsigned short tag1); #endif /* MACH_KERNEL_PRIVATE */ -extern unsigned int LockTimeOut; /* Standard lock timeout value */ +extern lock_t *lock_alloc( + boolean_t can_sleep, + unsigned short tag0, + unsigned short tag1); -#endif /* __APPLE_API_PRIVATE */ +extern void lock_free( + lock_t *lock); -#if !defined(MACH_KERNEL_PRIVATE) +extern void lock_write( + lock_t *lock); -typedef struct __lock__ lock_t; -extern lock_t *lock_alloc(boolean_t, etap_event_t, etap_event_t); -void lock_free(lock_t *); +extern void lock_read( + lock_t *lock); -#endif /* MACH_KERNEL_PRIVATE */ +extern void lock_done( + lock_t *lock); -extern void lock_write (lock_t*); -extern void lock_read (lock_t*); -extern void lock_done (lock_t*); -extern void lock_write_to_read (lock_t*); +extern void lock_write_to_read( + lock_t *lock); #define lock_read_done(l) lock_done(l) #define lock_write_done(l) lock_done(l) -extern boolean_t lock_read_to_write (lock_t*); /* vm_map is only user */ +extern boolean_t lock_read_to_write( + lock_t *lock); + + +/* Sleep, unlocking and then relocking a usimple_lock in the process */ +extern wait_result_t thread_sleep_usimple_lock( + event_t event, + usimple_lock_t lock, + wait_interrupt_t interruptible); + +/* Sleep, unlocking and then relocking a mutex in the process */ +extern wait_result_t thread_sleep_mutex( + event_t event, + mutex_t *mutex, + wait_interrupt_t interruptible); + +/* Sleep with a deadline, unlocking and then relocking a mutex in the process */ +extern wait_result_t thread_sleep_mutex_deadline( + event_t event, + mutex_t *mutex, + uint64_t deadline, + wait_interrupt_t interruptible); + +/* Sleep, unlocking and then relocking a write lock in the process */ +extern wait_result_t thread_sleep_lock_write( + event_t event, + lock_t *lock, + wait_interrupt_t interruptible); +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE + +extern wait_result_t thread_sleep_fast_usimple_lock( + event_t event, + simple_lock_t lock, + wait_interrupt_t interruptible); +#endif /* MACH_KERNEL_PRIVATE */ #endif /* _KERN_LOCK_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/lock_mon.c b/osfmk/kern/lock_mon.c deleted file mode 100644 index 05cfb54a3..000000000 --- a/osfmk/kern/lock_mon.c +++ /dev/null @@ -1,415 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.3.19.1 1997/09/22 17:39:46 barbou - * MP+RT: protect cpu_number() usage against preemption. - * [97/09/16 barbou] - * - * Revision 1.3.15.4 1995/02/24 15:20:58 alanl - * DIPC: Merge from nmk17b2 to nmk18b8. - * Notes: major lock cleanup. Change kdb_lock and printf_lock - * references to conform with simple_lock declaration rules. - * This code is broken and non-portable; its functionality - * should be subsumed in the regular lock package. - * [95/01/16 alanl] - * - * Revision 1.3.17.2 1994/11/10 06:13:19 dwm - * mk6 CR764 - s/spinlock/simple_lock/ (name change only) - * [1994/11/10 05:28:52 dwm] - * - * Revision 1.3.17.1 1994/11/04 10:07:54 dwm - * mk6 CR668 - 1.3b26 merge - * This file is obviously UNUSED - hence broken; merged anyway - * * Revision 1.3.4.4 1994/05/06 18:50:11 tmt - * Merge in DEC Alpha changes to osc1.3b19. - * Merge Alpha changes into osc1.312b source code. - * 64bit cleanup. - * * End1.3merge - * [1994/11/04 09:25:58 dwm] - * - * Revision 1.3.15.1 1994/09/23 02:21:48 ezf - * change marker to not FREE - * [1994/09/22 21:34:22 ezf] - * - * Revision 1.3.13.1 1994/06/09 14:11:30 dswartz - * Preemption merge. - * [1994/06/09 14:07:06 dswartz] - * - * Revision 1.3.4.2 1993/06/09 02:36:12 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:13:15 jeffc] - * - * Revision 1.3 1993/04/19 16:26:56 devrcs - * Fix for TIME_STAMP configuration. - * [Patrick Petit ] - * [93/02/11 bernadat] - * - * Revision 1.2 1992/11/25 01:11:05 robert - * integrate changes below for norma_14 - * - * Philippe Bernadat (bernadat) at gr.osf.org - * Moved MACH_MP_DEBUG code to kern/lock.c - * [1992/11/13 19:33:47 robert] - * - * Revision 1.1 1992/09/30 02:09:28 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.1.2.1.3.1 92/02/18 19:08:45 jeffreyh - * Created. Might need some work if used on anything but a 386. - * [92/02/11 07:56:50 bernadat] - */ -/* CMU_ENDHIST */ - -/* - * Mach Operating System - * Copyright (c) 1990 Carnegie-Mellon University - * Copyright (c) 1989 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -/* - */ - -/* - * Support For MP Debugging - * if MACH_MP_DEBUG is on, we use alternate locking - * routines do detect dealocks - * Support for MP lock monitoring (MACH_LOCK_MON). - * Registers use of locks, contention. - * Depending on hardware also records time spent with locks held - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - - -decl_simple_lock_data(extern, kdb_lock) -decl_simple_lock_data(extern, printf_lock) - -#if NCPUS > 1 && MACH_LOCK_MON - -#if TIME_STAMP -extern time_stamp_t time_stamp; -#else TIME_STAMP -typedef unsigned int time_stamp_t; -#define time_stamp 0 -#endif TIME_STAMP - -#define LOCK_INFO_MAX (1024*32) -#define LOCK_INFO_HASH_COUNT 1024 -#define LOCK_INFO_PER_BUCKET (LOCK_INFO_MAX/LOCK_INFO_HASH_COUNT) - - -#define HASH_LOCK(lock) ((long)lock>>5 & (LOCK_INFO_HASH_COUNT-1)) - -struct lock_info { - unsigned int success; - unsigned int fail; - unsigned int masked; - unsigned int stack; - unsigned int time; -#if MACH_SLOCKS - simple_lock_data_t * lock; -#endif - vm_offset_t caller; -}; - -struct lock_info_bucket { - struct lock_info info[LOCK_INFO_PER_BUCKET]; -}; - -struct lock_info_bucket lock_info[LOCK_INFO_HASH_COUNT]; -struct lock_info default_lock_info; -unsigned default_lock_stack = 0; - -extern int curr_ipl[]; - - - -struct lock_info * -locate_lock_info(lock) -simple_lock_data_t ** lock; -{ - struct lock_info *li = &(lock_info[HASH_LOCK(*lock)].info[0]); - register i; - - for (i=0; i < LOCK_INFO_PER_BUCKET; i++, li++) - if (li->lock) { - if (li->lock == *lock) - return(li); - } else { - li->lock = *lock; - li->caller = *((vm_offset_t *)lock - 1); - return(li); - } - db_printf("out of lock_info slots\n"); - li = &default_lock_info; - return(li); -} - - -simple_lock(lock) -decl_simple_lock_data(, *lock) -{ - register struct lock_info *li = locate_lock_info(&lock); - - if (current_thread()) - li->stack = current_thread()->lock_stack++; - mp_disable_preemption(); - if (curr_ipl[cpu_number()]) - li->masked++; - mp_enable_preemption(); - if (_simple_lock_try(lock)) - li->success++; - else { - _simple_lock(lock); - li->fail++; - } - li->time = time_stamp - li->time; -} - -simple_lock_try(lock) -decl_simple_lock_data(, *lock) -{ - register struct lock_info *li = locate_lock_info(&lock); - - mp_disable_preemption(); - if (curr_ipl[cpu_number()]) - li->masked++; - mp_enable_preemption(); - if (_simple_lock_try(lock)) { - li->success++; - li->time = time_stamp - li->time; - if (current_thread()) - li->stack = current_thread()->lock_stack++; - return(1); - } else { - li->fail++; - return(0); - } -} - -simple_unlock(lock) -decl_simple_lock_data(, *lock) -{ - register time_stamp_t stamp = time_stamp; - register time_stamp_t *time = &locate_lock_info(&lock)->time; - register unsigned *lock_stack; - - *time = stamp - *time; - _simple_unlock(lock); - if (current_thread()) { - lock_stack = ¤t_thread()->lock_stack; - if (*lock_stack) - (*lock_stack)--; - } -} - -lip() { - lis(4, 1, 0); -} - -#define lock_info_sort lis - -unsigned scurval, ssum; -struct lock_info *sli; - -lock_info_sort(arg, abs, count) -{ - struct lock_info *li, mean; - int bucket = 0; - int i; - unsigned max_val; - unsigned old_val = (unsigned)-1; - struct lock_info *target_li = &lock_info[0].info[0]; - unsigned sum; - unsigned empty, total; - unsigned curval; - - printf("\nSUCCESS FAIL MASKED STACK TIME LOCK/CALLER\n"); - if (!count) - count = 8 ; - while (count && target_li) { - empty = LOCK_INFO_HASH_COUNT; - target_li = 0; - total = 0; - max_val = 0; - mean.success = 0; - mean.fail = 0; - mean.masked = 0; - mean.stack = 0; - mean.time = 0; - mean.lock = (simple_lock_data_t *) &lock_info; - mean.caller = (vm_offset_t) &lock_info; - for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { - li = &lock_info[bucket].info[0]; - if (li->lock) - empty--; - for (i= 0; i< LOCK_INFO_PER_BUCKET && li->lock; i++, li++) { - if (li->lock == &kdb_lock || li->lock == &printf_lock) - continue; - total++; - curval = *((int *)li + arg); - sum = li->success + li->fail; - if(!sum && !abs) - continue; - scurval = curval; - ssum = sum; - sli = li; - if (!abs) switch(arg) { - case 0: - break; - case 1: - case 2: - curval = (curval*100) / sum; - break; - case 3: - case 4: - curval = curval / sum; - break; - } - if (curval > max_val && curval < old_val) { - max_val = curval; - target_li = li; - } - if (curval == old_val && count != 0) { - print_lock_info(li); - count--; - } - mean.success += li->success; - mean.fail += li->fail; - mean.masked += li->masked; - mean.stack += li->stack; - mean.time += li->time; - } - } - if (target_li) - old_val = max_val; - } - db_printf("\n%d total locks, %d empty buckets", total, empty ); - if (default_lock_info.success) - db_printf(", default: %d", default_lock_info.success + default_lock_info.fail); - db_printf("\n"); - print_lock_info(&mean); -} - -#define lock_info_clear lic - -lock_info_clear() -{ - struct lock_info *li; - int bucket = 0; - int i; - for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { - li = &lock_info[bucket].info[0]; - for (i= 0; i< LOCK_INFO_PER_BUCKET; i++, li++) { - bzero(li, sizeof(struct lock_info)); - } - } - bzero(&default_lock_info, sizeof(struct lock_info)); -} - -print_lock_info(li) -struct lock_info *li; -{ - int off; - int sum = li->success + li->fail; - db_printf("%d %d/%d %d/%d %d/%d %d/%d ", li->success, - li->fail, (li->fail*100)/sum, - li->masked, (li->masked*100)/sum, - li->stack, li->stack/sum, - li->time, li->time/sum); - db_search_symbol(li->lock, 0, &off); - if (off < 1024) - db_printsym(li->lock, 0); - else { - db_printsym(li->caller, 0); - db_printf("(%X)", li->lock); - } - db_printf("\n"); -} - -#endif NCPUS > 1 && MACH_LOCK_MON - -#if TIME_STAMP - -/* - * Measure lock/unlock operations - */ - -time_lock(loops) -{ - decl_simple_lock_data(, lock) - register time_stamp_t stamp; - register int i; - - - if (!loops) - loops = 1000; - simple_lock_init(&lock); - stamp = time_stamp; - for (i = 0; i < loops; i++) { - simple_lock(&lock); - simple_unlock(&lock); - } - stamp = time_stamp - stamp; - db_printf("%d stamps for simple_locks\n", stamp/loops); -#if MACH_LOCK_MON - stamp = time_stamp; - for (i = 0; i < loops; i++) { - _simple_lock(&lock); - _simple_unlock(&lock); - } - stamp = time_stamp - stamp; - db_printf("%d stamps for _simple_locks\n", stamp/loops); -#endif MACH_LOCK_MON -} -#endif TIME_STAMP - - - - - diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c new file mode 100644 index 000000000..aa7b65fc8 --- /dev/null +++ b/osfmk/kern/locks.c @@ -0,0 +1,1055 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +#define LCK_MTX_SLEEP_CODE 0 +#define LCK_MTX_SLEEP_DEADLINE_CODE 1 +#define LCK_MTX_LCK_WAIT_CODE 2 +#define LCK_MTX_UNLCK_WAKEUP_CODE 3 + + +static queue_head_t lck_grp_queue; +static unsigned int lck_grp_cnt; + +decl_mutex_data(static,lck_grp_lock) + +lck_grp_attr_t LockDefaultGroupAttr; +lck_grp_t LockCompatGroup; +lck_attr_t LockDefaultLckAttr; + +/* + * Routine: lck_mod_init + */ + +void +lck_mod_init( + void) +{ + queue_init(&lck_grp_queue); + mutex_init(&lck_grp_lock, 0); + lck_grp_cnt = 0; + lck_grp_attr_setdefault( &LockDefaultGroupAttr); + lck_grp_init( &LockCompatGroup, "Compatibility APIs", LCK_GRP_ATTR_NULL); + lck_attr_setdefault(&LockDefaultLckAttr); +} + +/* + * Routine: lck_grp_attr_alloc_init + */ + +lck_grp_attr_t * +lck_grp_attr_alloc_init( + void) +{ + lck_grp_attr_t *attr; + + if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0) + lck_grp_attr_setdefault(attr); + + return(attr); +} + + +/* + * Routine: lck_grp_attr_setdefault + */ + +void +lck_grp_attr_setdefault( + lck_grp_attr_t *attr) +{ + if (LcksOpts & enaLkStat) + attr->grp_attr_val = LCK_GRP_ATTR_STAT; + else + attr->grp_attr_val = 0; +} + + +/* + * Routine: lck_grp_attr_setstat + */ + +void +lck_grp_attr_setstat( + lck_grp_attr_t *attr) +{ + (void)hw_atomic_or((uint32_t *)&attr->grp_attr_val, LCK_GRP_ATTR_STAT); +} + + +/* + * Routine: lck_grp_attr_free + */ + +void +lck_grp_attr_free( + lck_grp_attr_t *attr) +{ + kfree(attr, sizeof(lck_grp_attr_t)); +} + + +/* + * Routine: lck_grp_alloc_init + */ + +lck_grp_t * +lck_grp_alloc_init( + const char* grp_name, + lck_grp_attr_t *attr) +{ + lck_grp_t *grp; + + if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0) + lck_grp_init(grp, grp_name, attr); + + return(grp); +} + + +/* + * Routine: lck_grp_init + */ + +void +lck_grp_init( + lck_grp_t *grp, + const char* grp_name, + lck_grp_attr_t *attr) +{ + bzero((void *)grp, sizeof(lck_grp_t)); + + (void) strncpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME); + + if (attr != LCK_GRP_ATTR_NULL) + grp->lck_grp_attr = attr->grp_attr_val; + else if (LcksOpts & enaLkStat) + grp->lck_grp_attr = LCK_GRP_ATTR_STAT; + else + grp->lck_grp_attr = LCK_ATTR_NONE; + + grp->lck_grp_refcnt = 1; + + mutex_lock(&lck_grp_lock); + enqueue_tail(&lck_grp_queue, (queue_entry_t)grp); + lck_grp_cnt++; + mutex_unlock(&lck_grp_lock); + +} + + +/* + * Routine: lck_grp_free + */ + +void +lck_grp_free( + lck_grp_t *grp) +{ + mutex_lock(&lck_grp_lock); + lck_grp_cnt--; + (void)remque((queue_entry_t)grp); + mutex_unlock(&lck_grp_lock); + lck_grp_deallocate(grp); +} + + +/* + * Routine: lck_grp_reference + */ + +void +lck_grp_reference( + lck_grp_t *grp) +{ + (void)hw_atomic_add((uint32_t *)(&grp->lck_grp_refcnt), 1); +} + + +/* + * Routine: lck_grp_deallocate + */ + +void +lck_grp_deallocate( + lck_grp_t *grp) +{ + if (hw_atomic_sub((uint32_t *)(&grp->lck_grp_refcnt), 1) == 0) + kfree(grp, sizeof(lck_grp_t)); +} + +/* + * Routine: lck_grp_lckcnt_incr + */ + +void +lck_grp_lckcnt_incr( + lck_grp_t *grp, + lck_type_t lck_type) +{ + unsigned int *lckcnt; + + switch (lck_type) { + case LCK_TYPE_SPIN: + lckcnt = &grp->lck_grp_spincnt; + break; + case LCK_TYPE_MTX: + lckcnt = &grp->lck_grp_mtxcnt; + break; + case LCK_TYPE_RW: + lckcnt = &grp->lck_grp_rwcnt; + break; + default: + return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type); + } + + (void)hw_atomic_add((uint32_t *)lckcnt, 1); +} + +/* + * Routine: lck_grp_lckcnt_decr + */ + +void +lck_grp_lckcnt_decr( + lck_grp_t *grp, + lck_type_t lck_type) +{ + unsigned int *lckcnt; + + switch (lck_type) { + case LCK_TYPE_SPIN: + lckcnt = &grp->lck_grp_spincnt; + break; + case LCK_TYPE_MTX: + lckcnt = &grp->lck_grp_mtxcnt; + break; + case LCK_TYPE_RW: + lckcnt = &grp->lck_grp_rwcnt; + break; + default: + return panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type); + } + + (void)hw_atomic_sub((uint32_t *)lckcnt, 1); +} + +/* + * Routine: lck_attr_alloc_init + */ + +lck_attr_t * +lck_attr_alloc_init( + void) +{ + lck_attr_t *attr; + + if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0) + lck_attr_setdefault(attr); + + return(attr); +} + + +/* + * Routine: lck_attr_setdefault + */ + +void +lck_attr_setdefault( + lck_attr_t *attr) +{ +#if !DEBUG + if (LcksOpts & enaLkDeb) + attr->lck_attr_val = LCK_ATTR_DEBUG; + else + attr->lck_attr_val = LCK_ATTR_NONE; +#else + attr->lck_attr_val = LCK_ATTR_DEBUG; +#endif + +} + + +/* + * Routine: lck_attr_setdebug + */ +void +lck_attr_setdebug( + lck_attr_t *attr) +{ + (void)hw_atomic_or((uint32_t *)&attr->lck_attr_val, LCK_ATTR_DEBUG); +} + + +/* + * Routine: lck_attr_free + */ +void +lck_attr_free( + lck_attr_t *attr) +{ + kfree(attr, sizeof(lck_attr_t)); +} + + +/* + * Routine: lck_spin_sleep + */ +wait_result_t +lck_spin_sleep( + lck_spin_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible) +{ + wait_result_t res; + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait(event, interruptible); + if (res == THREAD_WAITING) { + lck_spin_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) + lck_spin_lock(lck); + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + lck_spin_unlock(lck); + + return res; +} + + +/* + * Routine: lck_spin_sleep_deadline + */ +wait_result_t +lck_spin_sleep_deadline( + lck_spin_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline) +{ + wait_result_t res; + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait_deadline(event, interruptible, deadline); + if (res == THREAD_WAITING) { + lck_spin_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) + lck_spin_lock(lck); + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + lck_spin_unlock(lck); + + return res; +} + + +/* + * Routine: lck_mtx_sleep + */ +wait_result_t +lck_mtx_sleep( + lck_mtx_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible) +{ + wait_result_t res; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START, + (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait(event, interruptible); + if (res == THREAD_WAITING) { + lck_mtx_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) + lck_mtx_lock(lck); + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + lck_mtx_unlock(lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); + + return res; +} + + +/* + * Routine: lck_mtx_sleep_deadline + */ +wait_result_t +lck_mtx_sleep_deadline( + lck_mtx_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline) +{ + wait_result_t res; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START, + (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0); + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait_deadline(event, interruptible, deadline); + if (res == THREAD_WAITING) { + lck_mtx_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) + lck_mtx_lock(lck); + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + lck_mtx_unlock(lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0); + + return res; +} + +/* + * Routine: lck_mtx_lock_wait + * + * Invoked in order to wait on contention. + * + * Called with the interlock locked and + * returns it unlocked. + */ +void +lck_mtx_lock_wait ( + lck_mtx_t *lck, + thread_t holder) +{ + thread_t self = current_thread(); + lck_mtx_t *mutex; + integer_t priority; + spl_t s = splsched(); + + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) + mutex = lck; + else + mutex = &lck->lck_mtx_ptr->lck_mtx; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); + + priority = self->sched_pri; + if (priority < self->priority) + priority = self->priority; + if (priority > MINPRI_KERNEL) + priority = MINPRI_KERNEL; + else + if (priority < BASEPRI_DEFAULT) + priority = BASEPRI_DEFAULT; + + thread_lock(holder); + if (mutex->lck_mtx_pri == 0) + holder->promotions++; + if (holder->priority < MINPRI_KERNEL) { + holder->sched_mode |= TH_MODE_PROMOTED; + if ( mutex->lck_mtx_pri < priority && + holder->sched_pri < priority ) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, + holder->sched_pri, priority, (int)holder, (int)lck, 0); + + set_sched_pri(holder, priority); + } + } + thread_unlock(holder); + splx(s); + + if (mutex->lck_mtx_pri < priority) + mutex->lck_mtx_pri = priority; + if (self->pending_promoter[self->pending_promoter_index] == NULL) { + self->pending_promoter[self->pending_promoter_index] = mutex; + mutex->lck_mtx_waiters++; + } + else + if (self->pending_promoter[self->pending_promoter_index] != mutex) { + self->pending_promoter[++self->pending_promoter_index] = mutex; + mutex->lck_mtx_waiters++; + } + + assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + lck_mtx_ilk_unlock(mutex); + + thread_block(THREAD_CONTINUE_NULL); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); +} + +/* + * Routine: lck_mtx_lock_acquire + * + * Invoked on acquiring the mutex when there is + * contention. + * + * Returns the current number of waiters. + * + * Called with the interlock locked. + */ +int +lck_mtx_lock_acquire( + lck_mtx_t *lck) +{ + thread_t thread = current_thread(); + lck_mtx_t *mutex; + + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) + mutex = lck; + else + mutex = &lck->lck_mtx_ptr->lck_mtx; + + if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { + thread->pending_promoter[thread->pending_promoter_index] = NULL; + if (thread->pending_promoter_index > 0) + thread->pending_promoter_index--; + mutex->lck_mtx_waiters--; + } + + if (mutex->lck_mtx_waiters > 0) { + integer_t priority = mutex->lck_mtx_pri; + spl_t s = splsched(); + + thread_lock(thread); + thread->promotions++; + if (thread->priority < MINPRI_KERNEL) { + thread->sched_mode |= TH_MODE_PROMOTED; + if (thread->sched_pri < priority) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, + thread->sched_pri, priority, 0, (int)lck, 0); + + set_sched_pri(thread, priority); + } + } + thread_unlock(thread); + splx(s); + } + else + mutex->lck_mtx_pri = 0; + + return (mutex->lck_mtx_waiters); +} + +/* + * Routine: lck_mtx_unlock_wakeup + * + * Invoked on unlock when there is contention. + * + * Called with the interlock locked. + */ +void +lck_mtx_unlock_wakeup ( + lck_mtx_t *lck, + thread_t holder) +{ + thread_t thread = current_thread(); + lck_mtx_t *mutex; + + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) + mutex = lck; + else + mutex = &lck->lck_mtx_ptr->lck_mtx; + + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); + + if (thread != holder) + panic("lck_mtx_unlock_wakeup: mutex %x holder %x\n", mutex, holder); + + if (thread->promotions > 0) { + spl_t s = splsched(); + + thread_lock(thread); + if ( --thread->promotions == 0 && + (thread->sched_mode & TH_MODE_PROMOTED) ) { + thread->sched_mode &= ~TH_MODE_PROMOTED; + if (thread->sched_mode & TH_MODE_ISDEPRESSED) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, + thread->sched_pri, DEPRESSPRI, 0, (int)lck, 0); + + set_sched_pri(thread, DEPRESSPRI); + } + else { + if (thread->priority < thread->sched_pri) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | + DBG_FUNC_NONE, + thread->sched_pri, thread->priority, + 0, (int)lck, 0); + } + + compute_priority(thread, FALSE); + } + } + thread_unlock(thread); + splx(s); + } + assert(mutex->lck_mtx_waiters > 0); + thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); +} + +/* + * Routine: mutex_pause + * + * Called by former callers of simple_lock_pause(). + */ + +void +mutex_pause(void) +{ + wait_result_t wait_result; + + wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, 1, 1000*NSEC_PER_USEC); + assert(wait_result == THREAD_WAITING); + + wait_result = thread_block(THREAD_CONTINUE_NULL); + assert(wait_result == THREAD_TIMED_OUT); +} + +/* + * Routine: lck_rw_sleep + */ +wait_result_t +lck_rw_sleep( + lck_rw_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible) +{ + wait_result_t res; + lck_rw_type_t lck_rw_type; + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait(event, interruptible); + if (res == THREAD_WAITING) { + lck_rw_type = lck_rw_done(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { + if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE))) + lck_rw_lock(lck, lck_rw_type); + else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) + lck_rw_lock_exclusive(lck); + else + lck_rw_lock_shared(lck); + } + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + (void)lck_rw_done(lck); + + return res; +} + + +/* + * Routine: lck_rw_sleep_deadline + */ +wait_result_t +lck_rw_sleep_deadline( + lck_rw_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline) +{ + wait_result_t res; + lck_rw_type_t lck_rw_type; + + if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) + panic("Invalid lock sleep action %x\n", lck_sleep_action); + + res = assert_wait_deadline(event, interruptible, deadline); + if (res == THREAD_WAITING) { + lck_rw_type = lck_rw_done(lck); + res = thread_block(THREAD_CONTINUE_NULL); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { + if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE))) + lck_rw_lock(lck, lck_rw_type); + else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) + lck_rw_lock_exclusive(lck); + else + lck_rw_lock_shared(lck); + } + } + else + if (lck_sleep_action & LCK_SLEEP_UNLOCK) + (void)lck_rw_done(lck); + + return res; +} + +kern_return_t +host_lockgroup_info( + host_t host, + lockgroup_info_array_t *lockgroup_infop, + mach_msg_type_number_t *lockgroup_infoCntp) +{ + lockgroup_info_t *lockgroup_info_base; + lockgroup_info_t *lockgroup_info; + vm_offset_t lockgroup_info_addr; + vm_size_t lockgroup_info_size; + lck_grp_t *lck_grp; + unsigned int i; + vm_size_t used; + vm_map_copy_t copy; + kern_return_t kr; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + mutex_lock(&lck_grp_lock); + + lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info); + kr = kmem_alloc_pageable(ipc_kernel_map, + &lockgroup_info_addr, lockgroup_info_size); + if (kr != KERN_SUCCESS) { + mutex_unlock(&lck_grp_lock); + return(kr); + } + + lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr; + lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue); + lockgroup_info = lockgroup_info_base; + + for (i = 0; i < lck_grp_cnt; i++) { + + lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt; + lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt; + lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt; + lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt; + lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max; + lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum; + + lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt; + lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt; + lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt; + lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt; + lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt; + lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max; + lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum; + lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max; + lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum; + + lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt; + lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt; + lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt; + lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt; + lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt; + lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max; + lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum; + lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max; + lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum; + + (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME); + + lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp))); + lockgroup_info++; + } + + *lockgroup_infoCntp = lck_grp_cnt; + mutex_unlock(&lck_grp_lock); + + used = (*lockgroup_infoCntp) * sizeof *lockgroup_info; + + if (used != lockgroup_info_size) + bzero((char *) lockgroup_info, lockgroup_info_size - used); + + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr, + (vm_map_size_t)lockgroup_info_size, TRUE, ©); + assert(kr == KERN_SUCCESS); + + *lockgroup_infop = (lockgroup_info_t *) copy; + + return(KERN_SUCCESS); +} + +/* + * Compatibility module + */ + +extern lck_rw_t *lock_alloc_EXT( boolean_t can_sleep, unsigned short tag0, unsigned short tag1); +extern void lock_done_EXT(lck_rw_t *lock); +extern void lock_free_EXT(lck_rw_t *lock); +extern void lock_init_EXT(lck_rw_t *lock, boolean_t can_sleep, unsigned short tag0, unsigned short tag1); +extern void lock_read_EXT(lck_rw_t *lock); +extern boolean_t lock_read_to_write_EXT(lck_rw_t *lock); +extern void lock_write_EXT(lck_rw_t *lock); +extern void lock_write_to_read_EXT(lck_rw_t *lock); +extern wait_result_t thread_sleep_lock_write_EXT( + event_t event, lck_rw_t *lock, wait_interrupt_t interruptible); + +extern lck_mtx_t *mutex_alloc_EXT(unsigned short tag); +extern void mutex_free_EXT(lck_mtx_t *mutex); +extern void mutex_init_EXT(lck_mtx_t *mutex, unsigned short tag); +extern void mutex_lock_EXT(lck_mtx_t *mutex); +extern boolean_t mutex_try_EXT(lck_mtx_t *mutex); +extern void mutex_unlock_EXT(lck_mtx_t *mutex); +extern wait_result_t thread_sleep_mutex_EXT( + event_t event, lck_mtx_t *mutex, wait_interrupt_t interruptible); +extern wait_result_t thread_sleep_mutex_deadline_EXT( + event_t event, lck_mtx_t *mutex, uint64_t deadline, wait_interrupt_t interruptible); + +extern void usimple_lock_EXT(lck_spin_t *lock); +extern void usimple_lock_init_EXT(lck_spin_t *lock, unsigned short tag); +extern unsigned int usimple_lock_try_EXT(lck_spin_t *lock); +extern void usimple_unlock_EXT(lck_spin_t *lock); +extern wait_result_t thread_sleep_usimple_lock_EXT(event_t event, lck_spin_t *lock, wait_interrupt_t interruptible); + +lck_rw_t * +lock_alloc_EXT( + __unused boolean_t can_sleep, + __unused unsigned short tag0, + __unused unsigned short tag1) +{ + return( lck_rw_alloc_init( &LockCompatGroup, LCK_ATTR_NULL)); +} + +void +lock_done_EXT( + lck_rw_t *lock) +{ + (void) lck_rw_done(lock); +} + +void +lock_free_EXT( + lck_rw_t *lock) +{ + lck_rw_free(lock, &LockCompatGroup); +} + +void +lock_init_EXT( + lck_rw_t *lock, + __unused boolean_t can_sleep, + __unused unsigned short tag0, + __unused unsigned short tag1) +{ + lck_rw_init(lock, &LockCompatGroup, LCK_ATTR_NULL); +} + +void +lock_read_EXT( + lck_rw_t *lock) +{ + lck_rw_lock_shared( lock); +} + +boolean_t +lock_read_to_write_EXT( + lck_rw_t *lock) +{ + return( lck_rw_lock_shared_to_exclusive(lock)); +} + +void +lock_write_EXT( + lck_rw_t *lock) +{ + lck_rw_lock_exclusive(lock); +} + +void +lock_write_to_read_EXT( + lck_rw_t *lock) +{ + lck_rw_lock_exclusive_to_shared(lock); +} + +wait_result_t +thread_sleep_lock_write_EXT( + event_t event, + lck_rw_t *lock, + wait_interrupt_t interruptible) +{ + return( lck_rw_sleep(lock, LCK_SLEEP_EXCLUSIVE, event, interruptible)); +} + +lck_mtx_t * +mutex_alloc_EXT( + __unused unsigned short tag) +{ + return(lck_mtx_alloc_init(&LockCompatGroup, LCK_ATTR_NULL)); +} + +void +mutex_free_EXT( + lck_mtx_t *mutex) +{ + lck_mtx_free(mutex, &LockCompatGroup); +} + +void +mutex_init_EXT( + lck_mtx_t *mutex, + __unused unsigned short tag) +{ + lck_mtx_init(mutex, &LockCompatGroup, LCK_ATTR_NULL); +} + +void +mutex_lock_EXT( + lck_mtx_t *mutex) +{ + lck_mtx_lock(mutex); +} + +boolean_t +mutex_try_EXT( + lck_mtx_t *mutex) +{ + return(lck_mtx_try_lock(mutex)); +} + +void +mutex_unlock_EXT( + lck_mtx_t *mutex) +{ + lck_mtx_unlock(mutex); +} + +wait_result_t +thread_sleep_mutex_EXT( + event_t event, + lck_mtx_t *mutex, + wait_interrupt_t interruptible) +{ + return( lck_mtx_sleep(mutex, LCK_SLEEP_DEFAULT, event, interruptible)); +} + +wait_result_t +thread_sleep_mutex_deadline_EXT( + event_t event, + lck_mtx_t *mutex, + uint64_t deadline, + wait_interrupt_t interruptible) +{ + return( lck_mtx_sleep_deadline(mutex, LCK_SLEEP_DEFAULT, event, interruptible, deadline)); +} + +void +usimple_lock_EXT( + lck_spin_t *lock) +{ + lck_spin_lock(lock); +} + +void +usimple_lock_init_EXT( + lck_spin_t *lock, + __unused unsigned short tag) +{ + lck_spin_init(lock, &LockCompatGroup, LCK_ATTR_NULL); +} + +unsigned int +usimple_lock_try_EXT( + lck_spin_t *lock) +{ + lck_spin_try_lock(lock); +} + +void +usimple_unlock_EXT( + lck_spin_t *lock) +{ + lck_spin_unlock(lock); +} + +wait_result_t +thread_sleep_usimple_lock_EXT( + event_t event, + lck_spin_t *lock, + wait_interrupt_t interruptible) +{ + return( lck_spin_sleep(lock, LCK_SLEEP_DEFAULT, event, interruptible)); +} diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h new file mode 100644 index 000000000..06496c640 --- /dev/null +++ b/osfmk/kern/locks.h @@ -0,0 +1,409 @@ +/* + * Copyright (c) 2003-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_LOCKS_H_ +#define _KERN_LOCKS_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef MACH_KERNEL_PRIVATE +#include + +extern void lck_mod_init( + void); + +typedef unsigned int lck_type_t; + +#define LCK_TYPE_SPIN 1 +#define LCK_TYPE_MTX 2 +#define LCK_TYPE_RW 3 + +#endif + +typedef unsigned int lck_sleep_action_t; + +#define LCK_SLEEP_DEFAULT 0x00 /* Release the lock while waiting for the event, then reclaim */ + /* RW locks are returned in the same mode */ +#define LCK_SLEEP_UNLOCK 0x01 /* Release the lock and return unheld */ +#define LCK_SLEEP_SHARED 0x02 /* Reclaim the lock in shared mode (RW only) */ +#define LCK_SLEEP_EXCLUSIVE 0x04 /* Reclaim the lock in exclusive mode (RW only) */ + +#define LCK_SLEEP_MASK 0x07 /* Valid actions */ + +#ifdef MACH_KERNEL_PRIVATE + +typedef struct { + uint64_t lck_grp_spin_util_cnt; + uint64_t lck_grp_spin_held_cnt; + uint64_t lck_grp_spin_miss_cnt; + uint64_t lck_grp_spin_held_max; + uint64_t lck_grp_spin_held_cum; +} lck_grp_spin_stat_t; + +typedef struct { + uint64_t lck_grp_mtx_util_cnt; + uint64_t lck_grp_mtx_held_cnt; + uint64_t lck_grp_mtx_miss_cnt; + uint64_t lck_grp_mtx_wait_cnt; + uint64_t lck_grp_mtx_held_max; + uint64_t lck_grp_mtx_held_cum; + uint64_t lck_grp_mtx_wait_max; + uint64_t lck_grp_mtx_wait_cum; +} lck_grp_mtx_stat_t; + +typedef struct { + uint64_t lck_grp_rw_util_cnt; + uint64_t lck_grp_rw_held_cnt; + uint64_t lck_grp_rw_miss_cnt; + uint64_t lck_grp_rw_wait_cnt; + uint64_t lck_grp_rw_held_max; + uint64_t lck_grp_rw_held_cum; + uint64_t lck_grp_rw_wait_max; + uint64_t lck_grp_rw_wait_cum; +} lck_grp_rw_stat_t; + +typedef struct _lck_grp_stat_ { + lck_grp_spin_stat_t lck_grp_spin_stat; + lck_grp_mtx_stat_t lck_grp_mtx_stat; + lck_grp_rw_stat_t lck_grp_rw_stat; +} lck_grp_stat_t; + +#define LCK_GRP_MAX_NAME 64 + +typedef struct _lck_grp_ { + queue_chain_t lck_grp_link; + unsigned int lck_grp_refcnt; + unsigned int lck_grp_spincnt; + unsigned int lck_grp_mtxcnt; + unsigned int lck_grp_rwcnt; + unsigned int lck_grp_attr; + char lck_grp_name[LCK_GRP_MAX_NAME]; + lck_grp_stat_t lck_grp_stat; +} lck_grp_t; + +#define LCK_GRP_NULL (lck_grp_t *)0 + +#else +typedef struct __lck_grp__ lck_grp_t; +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct _lck_grp_attr_ { + unsigned int grp_attr_val; +} lck_grp_attr_t; + +extern lck_grp_attr_t LockDefaultGroupAttr; + +#define LCK_GRP_ATTR_STAT 0x1 + +#else +typedef struct __lck_grp_attr__ lck_grp_attr_t; +#endif + +#define LCK_GRP_ATTR_NULL (lck_grp_attr_t *)0 + +__BEGIN_DECLS + +extern lck_grp_attr_t *lck_grp_attr_alloc_init( + void); + +extern void lck_grp_attr_setdefault( + lck_grp_attr_t *attr); + +extern void lck_grp_attr_setstat( + lck_grp_attr_t *attr); + +extern void lck_grp_attr_free( + lck_grp_attr_t *attr); + +extern lck_grp_t *lck_grp_alloc_init( + const char* grp_name, + lck_grp_attr_t *attr); + +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE +extern void lck_grp_init( + lck_grp_t *grp, + const char* grp_name, + lck_grp_attr_t *attr); + +extern void lck_grp_reference( + lck_grp_t *grp); + +extern void lck_grp_deallocate( + lck_grp_t *grp); + +extern void lck_grp_lckcnt_incr( + lck_grp_t *grp, + lck_type_t lck_type); + +extern void lck_grp_lckcnt_decr( + lck_grp_t *grp, + lck_type_t lck_type); +#endif + +__BEGIN_DECLS + +extern void lck_grp_free( + lck_grp_t *grp); + +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE +typedef struct _lck_attr_ { + unsigned int lck_attr_val; +} lck_attr_t; + +extern lck_attr_t LockDefaultLckAttr; + +#define LCK_ATTR_NONE 0 +#define LCK_ATTR_DEBUG 0x1 + +#else +typedef struct __lck_attr__ lck_attr_t; +#endif + +#define LCK_ATTR_NULL (lck_attr_t *)0 + +__BEGIN_DECLS + +extern lck_attr_t *lck_attr_alloc_init( + void); + +extern void lck_attr_setdefault( + lck_attr_t *attr); + +extern void lck_attr_setdebug( + lck_attr_t *attr); + +extern void lck_attr_free( + lck_attr_t *attr); + +#define decl_lck_spin_data(class,name) class lck_spin_t name; + +extern lck_spin_t *lck_spin_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_spin_init( + lck_spin_t *lck, + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_spin_lock( + lck_spin_t *lck); + +extern void lck_spin_unlock( + lck_spin_t *lck); + +extern void lck_spin_destroy( + lck_spin_t *lck, + lck_grp_t *grp); + +extern void lck_spin_free( + lck_spin_t *lck, + lck_grp_t *grp); + +extern wait_result_t lck_spin_sleep( + lck_spin_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible); + +extern wait_result_t lck_spin_sleep_deadline( + lck_spin_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline); + +#ifdef KERNEL_PRIVATE + +extern boolean_t lck_spin_try_lock( + lck_spin_t *lck); + +#endif + + +#define decl_lck_mtx_data(class,name) class lck_mtx_t name; + +extern lck_mtx_t *lck_mtx_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_mtx_init( + lck_mtx_t *lck, + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_mtx_lock( + lck_mtx_t *lck); + +extern void lck_mtx_unlock( + lck_mtx_t *lck); + +extern void lck_mtx_destroy( + lck_mtx_t *lck, + lck_grp_t *grp); + +extern void lck_mtx_free( + lck_mtx_t *lck, + lck_grp_t *grp); + +extern wait_result_t lck_mtx_sleep( + lck_mtx_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible); + +extern wait_result_t lck_mtx_sleep_deadline( + lck_mtx_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline); + +#ifdef KERNEL_PRIVATE + +extern boolean_t lck_mtx_try_lock( + lck_mtx_t *lck); + +#endif /* KERNEL_PRIVATE */ + +extern void lck_mtx_assert( + lck_mtx_t *lck, + unsigned int type); + +__END_DECLS + +#define LCK_MTX_ASSERT_OWNED 0x01 +#define LCK_MTX_ASSERT_NOTOWNED 0x02 + +#ifdef MACH_KERNEL_PRIVATE +extern void lck_mtx_lock_wait( + lck_mtx_t *lck, + thread_t holder); + +extern int lck_mtx_lock_acquire( + lck_mtx_t *lck); + +extern void lck_mtx_unlock_wakeup( + lck_mtx_t *lck, + thread_t holder); + +extern boolean_t lck_mtx_ilk_unlock( + lck_mtx_t *lck); +#endif + +#define decl_lck_rw_data(class,name) class lck_rw_t name; + +typedef unsigned int lck_rw_type_t; + +#define LCK_RW_TYPE_SHARED 0x01 +#define LCK_RW_TYPE_EXCLUSIVE 0x02 + +__BEGIN_DECLS + +extern lck_rw_t *lck_rw_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_rw_init( + lck_rw_t *lck, + lck_grp_t *grp, + lck_attr_t *attr); + +extern void lck_rw_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type); + +extern void lck_rw_unlock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type); + +extern void lck_rw_lock_shared( + lck_rw_t *lck); + +extern void lck_rw_unlock_shared( + lck_rw_t *lck); + +extern void lck_rw_lock_exclusive( + lck_rw_t *lck); + +extern void lck_rw_unlock_exclusive( + lck_rw_t *lck); + +#ifdef KERNEL_PRIVATE + +extern lck_rw_type_t lck_rw_done( + lck_rw_t *lck); +#endif + +extern void lck_rw_destroy( + lck_rw_t *lck, + lck_grp_t *grp); + +extern void lck_rw_free( + lck_rw_t *lck, + lck_grp_t *grp); + +extern wait_result_t lck_rw_sleep( + lck_rw_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible); + +extern wait_result_t lck_rw_sleep_deadline( + lck_rw_t *lck, + lck_sleep_action_t lck_sleep_action, + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline); + +#ifdef KERNEL_PRIVATE + +extern boolean_t lck_rw_lock_shared_to_exclusive( + lck_rw_t *lck); + +extern void lck_rw_lock_exclusive_to_shared( + lck_rw_t *lck); + +extern boolean_t lck_rw_try_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type); + +extern boolean_t lck_rw_try_lock_shared( + lck_rw_t *lck); + +extern boolean_t lck_rw_try_lock_exclusive( + lck_rw_t *lck); +#endif + +__END_DECLS + +#endif /* _KERN_LOCKS_H_ */ diff --git a/osfmk/kern/mach_clock.c b/osfmk/kern/mach_clock.c index ab0d9102f..fb4392290 100644 --- a/osfmk/kern/mach_clock.c +++ b/osfmk/kern/mach_clock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,8 +56,6 @@ * * Clock primitives. */ -#include -#include #include #include @@ -79,10 +77,7 @@ #include #include #include -#include -#include #include /* kernel_map */ -#include /* HZ */ #include #include @@ -90,15 +85,16 @@ #include -#if STAT_TIME -#define TICKBUMP(t) timer_bump(t, (1000000/HZ)) -#else -#define TICKBUMP(t) -#endif - boolean_t profile_kernel_services = TRUE; /* Indicates wether or not we * account kernel services + * samples for user task */ +#ifdef MACH_BSD +extern void bsd_hardclock( + boolean_t usermode, + natural_t pc, + int numticks); +#endif /* MACH_BSD */ /* * Hertz rate clock interrupt servicing. Primarily used to @@ -107,22 +103,24 @@ boolean_t profile_kernel_services = TRUE; /* Indicates wether or not we */ void hertz_tick( - boolean_t usermode, /* executing user code */ - natural_t pc) +#if STAT_TIME + natural_t ticks, +#endif /* STAT_TIME */ + boolean_t usermode, + natural_t pc) { - thread_act_t thr_act; - register int my_cpu; - register thread_t thread = current_thread(); - int state; + processor_t processor = current_processor(); + thread_t thread = current_thread(); + int state; #if MACH_PROF #ifdef __MACHO__ -#define ETEXT etext - extern long etext; +#define ETEXT etext + extern long etext; #else -#define ETEXT &etext - extern char etext; +#define ETEXT &etext + extern char etext; #endif - boolean_t inkernel; + boolean_t inkernel; #endif /* MACH_PROF */ #if GPROF struct profile_vars *pv; @@ -133,8 +131,6 @@ hertz_tick( pc++; #endif /* lint */ - my_cpu = cpu_number(); - /* * The system startup sequence initializes the clock * before kicking off threads. So it's possible, @@ -156,11 +152,11 @@ hertz_tick( counter(c_clock_ticks++); #if GPROF - pv = PROFILE_VARS(my_cpu); + pv = PROFILE_VARS(cpu_number()); #endif if (usermode) { - TICKBUMP(&thread->user_timer); + TIMER_BUMP(&thread->user_timer, ticks); if (thread->priority < BASEPRI_DEFAULT) state = CPU_STATE_NICE; else @@ -171,11 +167,14 @@ hertz_tick( #endif } else { - TICKBUMP(&thread->system_timer); + TIMER_BUMP(&thread->system_timer, ticks); - state = processor_ptr[my_cpu]->state; + state = processor->state; if ( state == PROCESSOR_IDLE || - state == PROCESSOR_DISPATCHING ) + state == PROCESSOR_DISPATCHING) + state = CPU_STATE_IDLE; + else + if (thread->options & TH_OPT_DELAYIDLE) state = CPU_STATE_IDLE; else state = CPU_STATE_SYSTEM; @@ -201,35 +200,25 @@ hertz_tick( #endif } - machine_slot[my_cpu].cpu_ticks[state]++; + PROCESSOR_DATA(processor, cpu_ticks[state]++); - /* - * Hertz processing performed by the master-cpu - * exclusively. - */ - if (my_cpu == master_cpu) { #ifdef MACH_BSD - { - extern void bsd_hardclock( - boolean_t usermode, - natural_t pc, - int ticks); - - bsd_hardclock(usermode, pc, 1); - } -#endif /* MACH_BSD */ + /*XXX*/ + if (processor == master_processor) { + bsd_hardclock(usermode, pc, 1); } + /*XXX*/ +#endif /* MACH_BSD */ #if MACH_PROF - thr_act = thread->top_act; - if (thr_act->act_profiled) { - if (inkernel && thr_act->map != kernel_map) { + if (thread->act_profiled) { + if (inkernel && thread->map != kernel_map) { /* * Non-kernel thread running in kernel * Register user pc (mach_msg, vm_allocate ...) */ if (profile_kernel_services) - profile(user_pc(thr_act), thr_act->profil_buffer); + profile(user_pc(thread), thread->profil_buffer); } else /* @@ -238,10 +227,10 @@ hertz_tick( * kernel thread and kernel mode * register interrupted pc */ - profile(pc, thr_act->profil_buffer); + profile(pc, thread->profil_buffer); } if (kernel_task->task_profiled) { - if (inkernel && thr_act->map != kernel_map) + if (inkernel && thread->map != kernel_map) /* * User thread not profiled in kernel mode, * kernel task profiled, register kernel pc diff --git a/osfmk/kern/mach_param.h b/osfmk/kern/mach_param.h index 8c8aff36c..4e45a7396 100644 --- a/osfmk/kern/mach_param.h +++ b/osfmk/kern/mach_param.h @@ -58,14 +58,11 @@ * */ +#ifdef XNU_KERNEL_PRIVATE + #ifndef _KERN_MACH_PARAM_H_ #define _KERN_MACH_PARAM_H_ -#include - -#ifdef __APPLE_API_PRIVATE -#ifdef __APPLE_API_EVOLVING - #define THREAD_MAX 2560 /* Max number of threads */ #define THREAD_CHUNK 64 /* Allocation chunk */ @@ -86,7 +83,6 @@ #define SEMAPHORE_MAX (PORT_MAX >> 1) /* Maximum number of semaphores */ -#endif /* __APPLE_API_EVOLVING */ -#endif /* __APPLE_API_PRIVATE */ - #endif /* _KERN_MACH_PARAM_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c index f8e7b652c..b29a1ee40 100644 --- a/osfmk/kern/machine.c +++ b/osfmk/kern/machine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,100 +57,65 @@ * Support for machine independent machine abstraction. */ -#include - #include + +#include #include #include -#include #include #include #include +#include +#include + +#include #include #include #include #include #include #include +#include #include #include #include #include #include -#include -#include -#include /* * Exported variables: */ struct machine_info machine_info; -struct machine_slot machine_slot[NCPUS]; - -thread_t machine_wake_thread; /* Forwards */ void processor_doshutdown( processor_t processor); /* - * cpu_up: + * processor_up: * - * Flag specified cpu as up and running. Called when a processor comes - * online. + * Flag processor as up and running, and available + * for scheduling. */ void -cpu_up( - int cpu) +processor_up( + processor_t processor) { - processor_t processor = cpu_to_processor(cpu); - processor_set_t pset = &default_pset; - struct machine_slot *ms; - spl_t s; + processor_set_t pset = &default_pset; + spl_t s; s = splsched(); processor_lock(processor); init_ast_check(processor); - ms = &machine_slot[cpu]; - ms->running = TRUE; - machine_info.avail_cpus++; simple_lock(&pset->sched_lock); pset_add_processor(pset, processor); enqueue_tail(&pset->active_queue, (queue_entry_t)processor); - processor->deadline = UINT64_MAX; processor->state = PROCESSOR_RUNNING; simple_unlock(&pset->sched_lock); - processor_unlock(processor); - splx(s); -} - -/* - * cpu_down: - * - * Flag specified cpu as down. Called when a processor is about to - * go offline. - */ -void -cpu_down( - int cpu) -{ - processor_t processor; - struct machine_slot *ms; - spl_t s; - - processor = cpu_to_processor(cpu); - - s = splsched(); - processor_lock(processor); - ms = &machine_slot[cpu]; - ms->running = FALSE; - machine_info.avail_cpus--; - /* - * processor has already been removed from pset. - */ - processor->state = PROCESSOR_OFF_LINE; + hw_atomic_add(&machine_info.avail_cpus, 1); + ml_cpu_up(); processor_unlock(processor); splx(s); } @@ -177,13 +142,10 @@ host_reboot( kern_return_t processor_assign( - processor_t processor, - processor_set_t new_pset, - boolean_t wait) + __unused processor_t processor, + __unused processor_set_t new_pset, + __unused boolean_t wait) { -#ifdef lint - processor++; new_pset++; wait++; -#endif /* lint */ return (KERN_FAILURE); } @@ -196,10 +158,9 @@ processor_shutdown( s = splsched(); processor_lock(processor); - if ( processor->state == PROCESSOR_OFF_LINE || - processor->state == PROCESSOR_SHUTDOWN ) { + if (processor->state == PROCESSOR_OFF_LINE) { /* - * Success if already shutdown or being shutdown. + * Success if already shutdown. */ processor_unlock(processor); splx(s); @@ -218,20 +179,41 @@ processor_shutdown( } /* - * Processor must be in a processor set. Must lock the scheduling - * lock to get at the processor state. + * Must lock the scheduling lock + * to get at the processor state. */ pset = processor->processor_set; - simple_lock(&pset->sched_lock); - - /* - * If the processor is dispatching, let it finish - it will set its - * state to running very soon. - */ - while (*(volatile int *)&processor->state == PROCESSOR_DISPATCHING) { - simple_unlock(&pset->sched_lock); - delay(1); + if (pset != PROCESSOR_SET_NULL) { simple_lock(&pset->sched_lock); + + /* + * If the processor is dispatching, let it finish. + */ + while (processor->state == PROCESSOR_DISPATCHING) { + simple_unlock(&pset->sched_lock); + delay(1); + simple_lock(&pset->sched_lock); + } + + /* + * Success if already being shutdown. + */ + if (processor->state == PROCESSOR_SHUTDOWN) { + simple_unlock(&pset->sched_lock); + processor_unlock(processor); + splx(s); + + return (KERN_SUCCESS); + } + } + else { + /* + * Success, already being shutdown. + */ + processor_unlock(processor); + splx(s); + + return (KERN_SUCCESS); } if (processor->state == PROCESSOR_IDLE) { @@ -242,7 +224,7 @@ processor_shutdown( if (processor->state == PROCESSOR_RUNNING) remqueue(&pset->active_queue, (queue_entry_t)processor); else - panic("processor_request_action"); + panic("processor_shutdown"); processor->state = PROCESSOR_SHUTDOWN; @@ -253,9 +235,7 @@ processor_shutdown( processor_doshutdown(processor); splx(s); -#ifdef __ppc__ - cpu_exit_wait(processor->slot_num); -#endif + cpu_exit_wait(PROCESSOR_DATA(processor, slot_num)); return (KERN_SUCCESS); } @@ -270,6 +250,7 @@ processor_doshutdown( thread_t old_thread, self = current_thread(); processor_set_t pset; processor_t prev; + int pcount; /* * Get onto the processor to shutdown @@ -281,24 +262,10 @@ processor_doshutdown( pset = processor->processor_set; simple_lock(&pset->sched_lock); - if (pset->processor_count == 1) { - thread_t thread; - extern void start_cpu_thread(void); - + if ((pcount = pset->processor_count) == 1) { simple_unlock(&pset->sched_lock); processor_unlock(processor); - /* - * Create the thread, and point it at the routine. - */ - thread = kernel_thread_create(start_cpu_thread, MAXPRI_KERNEL); - - thread_lock(thread); - machine_wake_thread = thread; - thread->state = TH_RUN; - pset_run_incr(thread->processor_set); - thread_unlock(thread); - processor_lock(processor); simple_lock(&pset->sched_lock); } @@ -309,43 +276,65 @@ processor_doshutdown( simple_unlock(&pset->sched_lock); processor_unlock(processor); + /* - * Clean up. + * Continue processor shutdown in shutdown context. */ thread_bind(self, prev); - old_thread = switch_to_shutdown_context(self, - processor_offline, processor); - if (processor != current_processor()) - timer_call_shutdown(processor); + old_thread = machine_processor_shutdown(self, processor_offline, processor); - _mk_sp_thread_begin(self, self->last_processor); + thread_begin(self, self->last_processor); thread_dispatch(old_thread); + + /* + * If we just shutdown another processor, move the + * timer call outs to the current processor. + */ + if (processor != current_processor()) { + processor_lock(processor); + if ( processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN ) + timer_call_shutdown(processor); + processor_unlock(processor); + } } /* - * Actually do the processor shutdown. This is called at splsched, - * running on the processor's shutdown stack. + * Complete the shutdown and place the processor offline. + * + * Called at splsched in the shutdown context. */ - void processor_offline( processor_t processor) { - register thread_t old_thread = processor->active_thread; - register int cpu = processor->slot_num; + thread_t thread, old_thread = processor->active_thread; + + thread = processor->idle_thread; + processor->active_thread = thread; + processor->current_pri = IDLEPRI; + + processor->last_dispatch = mach_absolute_time(); + timer_switch((uint32_t)processor->last_dispatch, + &PROCESSOR_DATA(processor, offline_timer)); + + thread_done(old_thread, thread, processor); + + machine_set_current_thread(thread); + + thread_begin(thread, processor); - timer_call_cancel(&processor->quantum_timer); - timer_switch(&kernel_timer[cpu]); - processor->active_thread = processor->idle_thread; - machine_thread_set_current(processor->active_thread); thread_dispatch(old_thread); - /* - * OK, now exit this cpu. - */ - PMAP_DEACTIVATE_KERNEL(cpu); - cpu_down(cpu); + PMAP_DEACTIVATE_KERNEL(PROCESSOR_DATA(processor, slot_num)); + + processor_lock(processor); + processor->state = PROCESSOR_OFF_LINE; + hw_atomic_sub(&machine_info.avail_cpus, 1); + ml_cpu_down(); + processor_unlock(processor); + cpu_sleep(); panic("zombie processor"); /*NOTREACHED*/ @@ -356,11 +345,7 @@ host_get_boot_info( host_priv_t host_priv, kernel_boot_info_t boot_info) { - char *src = ""; - extern char *machine_boot_info( - kernel_boot_info_t boot_info, - vm_size_t buf_len); - + const char *src = ""; if (host_priv == HOST_PRIV_NULL) return (KERN_INVALID_HOST); diff --git a/osfmk/kern/machine.h b/osfmk/kern/machine.h index 0b93cbf43..ce77bd868 100644 --- a/osfmk/kern/machine.h +++ b/osfmk/kern/machine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,6 @@ #ifndef _KERN_MACHINE_H_ #define _KERN_MACHINE_H_ -#include #include #include #include @@ -35,46 +34,76 @@ * Machine support declarations. */ -extern thread_t machine_wake_thread; - -extern void cpu_down( - int cpu); - -extern void cpu_up( - int cpu); +extern void processor_up( + processor_t processor); extern void processor_offline( processor_t processor); +extern void processor_start_thread(void); + /* * Must be implemented in machine dependent code. */ /* Initialize machine dependent ast code */ -extern void init_ast_check( - processor_t processor); +extern void init_ast_check( + processor_t processor); /* Cause check for ast */ -extern void cause_ast_check( - processor_t processor); +extern void cause_ast_check( + processor_t processor); -extern kern_return_t cpu_start( - int slot_num); +extern kern_return_t cpu_control( + int slot_num, + processor_info_t info, + unsigned int count); -extern kern_return_t cpu_control( - int slot_num, - processor_info_t info, - unsigned int count); +extern void cpu_sleep(void); -extern thread_t switch_to_shutdown_context( - thread_t thread, - void (*doshutdown)(processor_t), - processor_t processor); +extern kern_return_t cpu_start( + int slot_num); + +extern void cpu_exit_wait( + int slot_num); + +extern kern_return_t cpu_info( + processor_flavor_t flavor, + int slot_num, + processor_info_t info, + unsigned int *count); + +extern kern_return_t cpu_info_count( + processor_flavor_t flavor, + unsigned int *count); + +extern thread_t machine_processor_shutdown( + thread_t thread, + void (*doshutdown)(processor_t), + processor_t processor); + +extern void machine_idle(void); + +extern void machine_signal_idle( + processor_t processor); + +extern void halt_cpu(void); + +extern void halt_all_cpus( + boolean_t reboot); + +extern char *machine_boot_info( + char *buf, + vm_size_t buf_len); + +/* + * Machine-dependent routine to fill in an array with up to callstack_max + * levels of return pc information. + */ +extern void machine_callstack( + natural_t *buf, + vm_size_t callstack_max); -extern kern_return_t cpu_signal( /* Signal the target CPU */ - int target, - int signal, - unsigned int p1, - unsigned int p2); +extern void consider_machine_collect(void); #endif /* _KERN_MACHINE_H_ */ diff --git a/osfmk/kern/misc_protos.h b/osfmk/kern/misc_protos.h index 5bee473c3..193dce700 100644 --- a/osfmk/kern/misc_protos.h +++ b/osfmk/kern/misc_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,6 +33,7 @@ #include #include #include +#include /* Set a bit in a bit array */ extern void setbit( @@ -57,58 +58,35 @@ extern int testbit( int which, int *bitmap); -/* Move arbitrarily-aligned data from one array to another */ -extern void bcopy( - const char *from, - char *to, - vm_size_t nbytes); - -/* Move overlapping, arbitrarily aligned data from one array to another */ -/* Not present on all ports */ -extern void ovbcopy( - const char *from, - char *to, - vm_size_t nbytes); - -extern int bcmp( - const char *a, - const char *b, - vm_size_t len); - -/* Zero an arbitrarily aligned array */ -extern void bzero( - char *from, - vm_size_t nbytes); - /* Move arbitrarily-aligned data from a user space to kernel space */ -extern boolean_t copyin( - const char *user_addr, - char *kernel_addr, - vm_size_t nbytes); +extern int copyin( + const user_addr_t user_addr, + char *kernel_addr, + vm_size_t nbytes); /* Move a NUL-terminated string from a user space to kernel space */ -extern boolean_t copyinstr( - const char *user_addr, - char *kernel_addr, - vm_size_t max, - vm_size_t *actual); +extern int copyinstr( + const user_addr_t user_addr, + char *kernel_addr, + vm_size_t max, + vm_size_t *actual); /* Move arbitrarily-aligned data from a user space to kernel space */ -extern boolean_t copyinmsg( - const char *user_addr, - char *kernel_addr, - mach_msg_size_t nbytes); +extern int copyinmsg( + const user_addr_t user_addr, + char *kernel_addr, + mach_msg_size_t nbytes); /* Move arbitrarily-aligned data from a kernel space to user space */ -extern boolean_t copyout( - const char *kernel_addr, - char *user_addr, - vm_size_t nbytes); +extern int copyout( + const char *kernel_addr, + user_addr_t user_addr, + vm_size_t nbytes); /* Move arbitrarily-aligned data from a kernel space to user space */ -extern boolean_t copyoutmsg( - const char *kernel_addr, - char *user_addr, +extern int copyoutmsg( + const char *kernel_addr, + user_addr_t user_addr, mach_msg_size_t nbytes); extern int sscanf(const char *input, const char *fmt, ...); @@ -123,10 +101,6 @@ extern void kdp_printf(const char *format, ...); extern void printf_init(void); -extern void panic(const char *string, ...); - -extern void panic_init(void); - extern void log(int level, char *fmt, ...); void @@ -168,30 +142,12 @@ extern int _longjmp( extern void bootstrap_create(void); -extern void halt_cpu(void); - -extern void halt_all_cpus( - boolean_t reboot); - extern void Debugger( const char * message); extern void delay( int n); -extern char *machine_boot_info( - char *buf, - vm_size_t buf_len); - -/* - * Machine-dependent routine to fill in an array with up to callstack_max - * levels of return pc information. - */ -extern void machine_callstack( - natural_t *buf, - vm_size_t callstack_max); - -extern void consider_machine_collect(void); extern void norma_bootstrap(void); diff --git a/osfmk/kern/mk_sp.c b/osfmk/kern/mk_sp.c index 9f9990210..565681e39 100644 --- a/osfmk/kern/mk_sp.c +++ b/osfmk/kern/mk_sp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,12 +24,7 @@ * */ -/*** - *** ??? The following lines were picked up when code was incorporated - *** into this file from `kern/syscall_subr.c.' These should be moved - *** with the code if it moves again. Otherwise, they should be trimmed, - *** based on the files included above. - ***/ +/* The routines in this module are all obsolete */ #include #include @@ -42,19 +37,12 @@ #include #include #include -#include #include #include #include #include -/*** - *** ??? End of lines picked up when code was incorporated - *** into this file from `kern/syscall_subr.c.' - ***/ - -#include #include #include #include @@ -62,141 +50,9 @@ #include #include #include - -/*** - *** ??? The next two files supply the prototypes for `thread_set_policy()' - *** and `thread_policy.' These routines cannot stay here if they are - *** exported Mach system calls. - ***/ #include #include -void -_mk_sp_thread_unblock( - thread_t thread) -{ - if (thread->state & TH_IDLE) - return; - - if (thread->sched_mode & TH_MODE_REALTIME) { - thread->realtime.deadline = mach_absolute_time(); - thread->realtime.deadline += thread->realtime.constraint; - } - - thread->current_quantum = 0; - thread->computation_metered = 0; - thread->reason = AST_NONE; -} - -void -_mk_sp_thread_done( - thread_t old_thread, - thread_t new_thread, - processor_t processor) -{ - /* - * A running thread is being taken off a processor: - */ - processor->last_dispatch = mach_absolute_time(); - - if (old_thread->state & TH_IDLE) - return; - - /* - * Compute remainder of current quantum. - */ - if ( first_timeslice(processor) && - processor->quantum_end > processor->last_dispatch ) - old_thread->current_quantum = - (processor->quantum_end - processor->last_dispatch); - else - old_thread->current_quantum = 0; - - if (old_thread->sched_mode & TH_MODE_REALTIME) { - /* - * Cancel the deadline if the thread has - * consumed the entire quantum. - */ - if (old_thread->current_quantum == 0) { - old_thread->realtime.deadline = UINT64_MAX; - old_thread->reason |= AST_QUANTUM; - } - } - else { - /* - * For non-realtime threads treat a tiny - * remaining quantum as an expired quantum - * but include what's left next time. - */ - if (old_thread->current_quantum < min_std_quantum) { - old_thread->reason |= AST_QUANTUM; - old_thread->current_quantum += std_quantum; - } - } - - /* - * If we are doing a direct handoff then - * give the remainder of our quantum to - * the next guy. - */ - if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) { - new_thread->current_quantum = old_thread->current_quantum; - old_thread->reason |= AST_QUANTUM; - old_thread->current_quantum = 0; - } - - old_thread->last_switch = processor->last_dispatch; - - old_thread->computation_metered += - (old_thread->last_switch - old_thread->computation_epoch); -} - -void -_mk_sp_thread_begin( - thread_t thread, - processor_t processor) -{ - - /* - * The designated thread is beginning execution: - */ - if (thread->state & TH_IDLE) { - timer_call_cancel(&processor->quantum_timer); - processor->timeslice = 1; - - return; - } - - if (thread->current_quantum == 0) - thread_quantum_init(thread); - - processor->quantum_end = - (processor->last_dispatch + thread->current_quantum); - timer_call_enter1(&processor->quantum_timer, - thread, processor->quantum_end); - - processor_timeslice_setup(processor, thread); - - thread->last_switch = processor->last_dispatch; - - thread->computation_epoch = thread->last_switch; -} - -void -_mk_sp_thread_dispatch( - thread_t thread) -{ - if (thread->reason & AST_QUANTUM) - thread_setrun(thread, SCHED_TAILQ); - else - if (thread->reason & AST_PREEMPT) - thread_setrun(thread, SCHED_HEADQ); - else - thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); - - thread->reason = AST_NONE; -} - /* * thread_policy_common: * @@ -249,8 +105,8 @@ thread_policy_common( if (priority >= MINPRI_KERNEL) priority -= MINPRI_KERNEL; else - if (priority >= MINPRI_SYSTEM) - priority -= MINPRI_SYSTEM; + if (priority >= MINPRI_RESERVED) + priority -= MINPRI_RESERVED; else priority -= BASEPRI_DEFAULT; @@ -282,7 +138,7 @@ thread_policy_common( */ kern_return_t thread_set_policy( - thread_act_t thr_act, + thread_t thread, processor_set_t pset, policy_t policy, policy_base_t base, @@ -290,25 +146,19 @@ thread_set_policy( policy_limit_t limit, mach_msg_type_number_t limit_count) { - thread_t thread; int max, bas; kern_return_t result = KERN_SUCCESS; - if ( thr_act == THR_ACT_NULL || + if ( thread == THREAD_NULL || pset == PROCESSOR_SET_NULL ) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(thr_act); - if (thread == THREAD_NULL) { - act_unlock_thread(thr_act); - - return(KERN_INVALID_ARGUMENT); - } + thread_mtx_lock(thread); if (pset != thread->processor_set) { - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); - return(KERN_FAILURE); + return (KERN_FAILURE); } switch (policy) { @@ -382,15 +232,16 @@ thread_set_policy( } if (result != KERN_SUCCESS) { - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); - return(result); + return (result); } result = thread_policy_common(thread, policy, bas); - act_unlock_thread(thr_act); - return(result); + thread_mtx_unlock(thread); + + return (result); } @@ -403,38 +254,37 @@ thread_set_policy( */ kern_return_t thread_policy( - thread_act_t thr_act, + thread_t thread, policy_t policy, policy_base_t base, mach_msg_type_number_t count, boolean_t set_limit) { - thread_t thread; - processor_set_t pset; kern_return_t result = KERN_SUCCESS; + processor_set_t pset; policy_limit_t limit; int limcount; policy_rr_limit_data_t rr_limit; policy_fifo_limit_data_t fifo_limit; policy_timeshare_limit_data_t ts_limit; - if (thr_act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(thr_act); + thread_mtx_lock(thread); + pset = thread->processor_set; - if ( thread == THREAD_NULL || - pset == PROCESSOR_SET_NULL ){ - act_unlock_thread(thr_act); + if (pset == PROCESSOR_SET_NULL) { + thread_mtx_unlock(thread); - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); } if ( invalid_policy(policy) || ((POLICY_TIMESHARE | POLICY_RR | POLICY_FIFO) & policy) == 0 ) { - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); - return(KERN_INVALID_POLICY); + return (KERN_INVALID_POLICY); } if (set_limit) { @@ -580,496 +430,11 @@ thread_policy( } - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); if (result == KERN_SUCCESS) - result = thread_set_policy(thr_act, pset, + result = thread_set_policy(thread, pset, policy, base, count, limit, limcount); return(result); } - -/* - * Define shifts for simulating (5/8)**n - */ - -shift_data_t wait_shift[32] = { - {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7}, - {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13}, - {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18}, - {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}}; - -/* - * do_priority_computation: - * - * Calculate new priority for thread based on its base priority plus - * accumulated usage. PRI_SHIFT and PRI_SHIFT_2 convert from - * usage to priorities. SCHED_SHIFT converts for the scaling - * of the sched_usage field by SCHED_SCALE. This scaling comes - * from the multiplication by sched_load (thread_timer_delta) - * in sched.h. sched_load is calculated as a scaled overload - * factor in compute_mach_factor (mach_factor.c). - */ -#ifdef PRI_SHIFT_2 -#if PRI_SHIFT_2 > 0 -#define do_priority_computation(thread, pri) \ - MACRO_BEGIN \ - (pri) = (thread)->priority /* start with base priority */ \ - - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ - - ((thread)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT)); \ - if ((pri) < MINPRI_STANDARD) \ - (pri) = MINPRI_STANDARD; \ - else \ - if ((pri) > MAXPRI_STANDARD) \ - (pri) = MAXPRI_STANDARD; \ - MACRO_END -#else /* PRI_SHIFT_2 */ -#define do_priority_computation(thread, pri) \ - MACRO_BEGIN \ - (pri) = (thread)->priority /* start with base priority */ \ - - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ - + ((thread)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2)); \ - if ((pri) < MINPRI_STANDARD) \ - (pri) = MINPRI_STANDARD; \ - else \ - if ((pri) > MAXPRI_STANDARD) \ - (pri) = MAXPRI_STANDARD; \ - MACRO_END -#endif /* PRI_SHIFT_2 */ -#else /* defined(PRI_SHIFT_2) */ -#define do_priority_computation(thread, pri) \ - MACRO_BEGIN \ - (pri) = (thread)->priority /* start with base priority */ \ - - ((thread)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)); \ - if ((pri) < MINPRI_STANDARD) \ - (pri) = MINPRI_STANDARD; \ - else \ - if ((pri) > MAXPRI_STANDARD) \ - (pri) = MAXPRI_STANDARD; \ - MACRO_END -#endif /* defined(PRI_SHIFT_2) */ - -void -set_priority( - register thread_t thread, - register int priority) -{ - thread->priority = priority; - compute_priority(thread, FALSE); -} - -/* - * compute_priority: - * - * Reset the current scheduled priority of the - * thread according to its base priority if the - * thread has not been promoted or depressed. - * - * If the thread is timesharing, adjust according - * to recent cpu usage. - * - * The thread *must* be locked by the caller. - */ -void -compute_priority( - register thread_t thread, - boolean_t override_depress) -{ - register int priority; - - if ( !(thread->sched_mode & TH_MODE_PROMOTED) && - (!(thread->sched_mode & TH_MODE_ISDEPRESSED) || - override_depress ) ) { - if (thread->sched_mode & TH_MODE_TIMESHARE) - do_priority_computation(thread, priority); - else - priority = thread->priority; - - set_sched_pri(thread, priority); - } -} - -/* - * compute_my_priority: - * - * Version of compute priority for current thread. - * Caller must have thread locked and thread must - * be timesharing and not depressed. - * - * Only used for priority updates. - */ -void -compute_my_priority( - register thread_t thread) -{ - register int priority; - - do_priority_computation(thread, priority); - assert(thread->runq == RUN_QUEUE_NULL); - thread->sched_pri = priority; -} - -/* - * update_priority - * - * Cause the priority computation of a thread that has been - * sleeping or suspended to "catch up" with the system. Thread - * *MUST* be locked by caller. If thread is running, then this - * can only be called by the thread on itself. - */ -void -update_priority( - register thread_t thread) -{ - register unsigned int ticks; - register shift_t shiftp; - - ticks = sched_tick - thread->sched_stamp; - assert(ticks != 0); - - /* - * If asleep for more than 30 seconds forget all - * cpu_usage, else catch up on missed aging. - * 5/8 ** n is approximated by the two shifts - * in the wait_shift array. - */ - thread->sched_stamp += ticks; - thread_timer_delta(thread); - if (ticks > 30) { - thread->cpu_usage = 0; - thread->sched_usage = 0; - } - else { - thread->cpu_usage += thread->cpu_delta; - thread->sched_usage += thread->sched_delta; - - shiftp = &wait_shift[ticks]; - if (shiftp->shift2 > 0) { - thread->cpu_usage = - (thread->cpu_usage >> shiftp->shift1) + - (thread->cpu_usage >> shiftp->shift2); - thread->sched_usage = - (thread->sched_usage >> shiftp->shift1) + - (thread->sched_usage >> shiftp->shift2); - } - else { - thread->cpu_usage = - (thread->cpu_usage >> shiftp->shift1) - - (thread->cpu_usage >> -(shiftp->shift2)); - thread->sched_usage = - (thread->sched_usage >> shiftp->shift1) - - (thread->sched_usage >> -(shiftp->shift2)); - } - } - - thread->cpu_delta = 0; - thread->sched_delta = 0; - - /* - * Check for fail-safe release. - */ - if ( (thread->sched_mode & TH_MODE_FAILSAFE) && - thread->sched_stamp >= thread->safe_release ) { - if (!(thread->safe_mode & TH_MODE_TIMESHARE)) { - if (thread->safe_mode & TH_MODE_REALTIME) { - thread->priority = BASEPRI_RTQUEUES; - - thread->sched_mode |= TH_MODE_REALTIME; - } - - thread->sched_mode &= ~TH_MODE_TIMESHARE; - - if (thread->state & TH_RUN) - pset_share_decr(thread->processor_set); - - if (!(thread->sched_mode & TH_MODE_ISDEPRESSED)) - set_sched_pri(thread, thread->priority); - } - - thread->safe_mode = 0; - thread->sched_mode &= ~TH_MODE_FAILSAFE; - } - - /* - * Recompute scheduled priority if appropriate. - */ - if ( (thread->sched_mode & TH_MODE_TIMESHARE) && - !(thread->sched_mode & TH_MODE_PROMOTED) && - !(thread->sched_mode & TH_MODE_ISDEPRESSED) ) { - register int new_pri; - - do_priority_computation(thread, new_pri); - if (new_pri != thread->sched_pri) { - run_queue_t runq; - - runq = run_queue_remove(thread); - thread->sched_pri = new_pri; - if (runq != RUN_QUEUE_NULL) - thread_setrun(thread, SCHED_TAILQ); - } - } -} - -/* - * thread_switch_continue: - * - * Continuation routine for a thread switch. - * - * Just need to arrange the return value gets sent out correctly and that - * we cancel the timer or the depression called for by the options to the - * thread_switch call. - */ -void -_mk_sp_thread_switch_continue(void) -{ - register thread_t self = current_thread(); - int wait_result = self->wait_result; - int option = self->saved.swtch.option; - - if (option == SWITCH_OPTION_WAIT && wait_result != THREAD_TIMED_OUT) - thread_cancel_timer(); - else - if (option == SWITCH_OPTION_DEPRESS) - _mk_sp_thread_depress_abort(self, FALSE); - - thread_syscall_return(KERN_SUCCESS); - /*NOTREACHED*/ -} - -/* - * thread_switch: - * - * Context switch. User may supply thread hint. - * - * Fixed priority threads that call this get what they asked for - * even if that violates priority order. - */ -kern_return_t -_mk_sp_thread_switch( - thread_act_t hint_act, - int option, - mach_msg_timeout_t option_time) -{ - register thread_t self = current_thread(); - int s; - - /* - * Check and use thr_act hint if appropriate. It is not - * appropriate to give a hint that shares the current shuttle. - */ - if (hint_act != THR_ACT_NULL) { - register thread_t thread = act_lock_thread(hint_act); - - if ( thread != THREAD_NULL && - thread != self && - thread->top_act == hint_act ) { - processor_t processor; - - s = splsched(); - thread_lock(thread); - - /* - * Check if the thread is in the right pset, - * is not bound to a different processor, - * and that realtime is not involved. - * - * Next, pull it off its run queue. If it - * doesn't come, it's not eligible. - */ - processor = current_processor(); - if (processor->current_pri < BASEPRI_RTQUEUES && - thread->sched_pri < BASEPRI_RTQUEUES && - thread->processor_set == processor->processor_set && - (thread->bound_processor == PROCESSOR_NULL || - thread->bound_processor == processor) && - run_queue_remove(thread) != RUN_QUEUE_NULL ) { - /* - * Hah, got it!! - */ - thread_unlock(thread); - - act_unlock_thread(hint_act); - act_deallocate(hint_act); - - if (option == SWITCH_OPTION_WAIT) - assert_wait_timeout(option_time, THREAD_ABORTSAFE); - else - if (option == SWITCH_OPTION_DEPRESS) - _mk_sp_thread_depress_ms(option_time); - - self->saved.swtch.option = option; - - thread_run(self, _mk_sp_thread_switch_continue, thread); - /* NOTREACHED */ - } - - thread_unlock(thread); - splx(s); - } - - act_unlock_thread(hint_act); - act_deallocate(hint_act); - } - - /* - * No handoff hint supplied, or hint was wrong. Call thread_block() in - * hopes of running something else. If nothing else is runnable, - * thread_block will detect this. WARNING: thread_switch with no - * option will not do anything useful if the thread calling it is the - * highest priority thread (can easily happen with a collection - * of timesharing threads). - */ - if (option == SWITCH_OPTION_WAIT) - assert_wait_timeout(option_time, THREAD_ABORTSAFE); - else - if (option == SWITCH_OPTION_DEPRESS) - _mk_sp_thread_depress_ms(option_time); - - self->saved.swtch.option = option; - - thread_block_reason(_mk_sp_thread_switch_continue, AST_YIELD); - - if (option == SWITCH_OPTION_WAIT) - thread_cancel_timer(); - else - if (option == SWITCH_OPTION_DEPRESS) - _mk_sp_thread_depress_abort(self, FALSE); - - return (KERN_SUCCESS); -} - -/* - * Depress thread's priority to lowest possible for the specified interval, - * with a value of zero resulting in no timeout being scheduled. - */ -void -_mk_sp_thread_depress_abstime( - uint64_t interval) -{ - register thread_t self = current_thread(); - uint64_t deadline; - spl_t s; - - s = splsched(); - thread_lock(self); - if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { - processor_t myprocessor = self->last_processor; - - self->sched_pri = DEPRESSPRI; - myprocessor->current_pri = self->sched_pri; - self->sched_mode &= ~TH_MODE_PREEMPT; - self->sched_mode |= TH_MODE_DEPRESS; - - if (interval != 0) { - clock_absolutetime_interval_to_deadline(interval, &deadline); - if (!timer_call_enter(&self->depress_timer, deadline)) - self->depress_timer_active++; - } - } - thread_unlock(self); - splx(s); -} - -void -_mk_sp_thread_depress_ms( - mach_msg_timeout_t interval) -{ - uint64_t abstime; - - clock_interval_to_absolutetime_interval( - interval, 1000*NSEC_PER_USEC, &abstime); - _mk_sp_thread_depress_abstime(abstime); -} - -/* - * Priority depression expiration. - */ -void -thread_depress_expire( - timer_call_param_t p0, - timer_call_param_t p1) -{ - thread_t thread = p0; - spl_t s; - - s = splsched(); - thread_lock(thread); - if (--thread->depress_timer_active == 1) { - thread->sched_mode &= ~TH_MODE_ISDEPRESSED; - compute_priority(thread, FALSE); - } - thread_unlock(thread); - splx(s); -} - -/* - * Prematurely abort priority depression if there is one. - */ -kern_return_t -_mk_sp_thread_depress_abort( - register thread_t thread, - boolean_t abortall) -{ - kern_return_t result = KERN_NOT_DEPRESSED; - spl_t s; - - s = splsched(); - thread_lock(thread); - if (abortall || !(thread->sched_mode & TH_MODE_POLLDEPRESS)) { - if (thread->sched_mode & TH_MODE_ISDEPRESSED) { - thread->sched_mode &= ~TH_MODE_ISDEPRESSED; - compute_priority(thread, FALSE); - result = KERN_SUCCESS; - } - - if (timer_call_cancel(&thread->depress_timer)) - thread->depress_timer_active--; - } - thread_unlock(thread); - splx(s); - - return (result); -} - -void -_mk_sp_thread_perhaps_yield( - thread_t self) -{ - spl_t s; - - assert(self == current_thread()); - - s = splsched(); - if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) { - extern uint64_t max_poll_computation; - extern int sched_poll_yield_shift; - uint64_t total_computation, abstime; - - abstime = mach_absolute_time(); - total_computation = abstime - self->computation_epoch; - total_computation += self->computation_metered; - if (total_computation >= max_poll_computation) { - processor_t myprocessor = current_processor(); - ast_t preempt; - - thread_lock(self); - if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { - self->sched_pri = DEPRESSPRI; - myprocessor->current_pri = self->sched_pri; - self->sched_mode &= ~TH_MODE_PREEMPT; - } - self->computation_epoch = abstime; - self->computation_metered = 0; - self->sched_mode |= TH_MODE_POLLDEPRESS; - - abstime += (total_computation >> sched_poll_yield_shift); - if (!timer_call_enter(&self->depress_timer, abstime)) - self->depress_timer_active++; - thread_unlock(self); - - if ((preempt = csw_check(self, myprocessor)) != AST_NONE) - ast_on(preempt); - } - } - splx(s); -} diff --git a/osfmk/kern/mk_sp.h b/osfmk/kern/mk_sp.h deleted file mode 100644 index fe78f7808..000000000 --- a/osfmk/kern/mk_sp.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ - -#ifndef _KERN_MK_SP_H_ -#define _KERN_MK_SP_H_ - -/* - * Scheduling operation prototypes - */ - -void _mk_sp_thread_unblock( - thread_t thread); - -void _mk_sp_thread_done( - thread_t old_thread, - thread_t new_thread, - processor_t processor); - -void _mk_sp_thread_begin( - thread_t new_thread, - processor_t processor); - -void _mk_sp_thread_dispatch( - thread_t thread); - -kern_return_t _mk_sp_thread_switch( - thread_act_t hint_act, - int option, - mach_msg_timeout_t option_time); - -void _mk_sp_thread_depress_ms( - mach_msg_timeout_t interval); - -void _mk_sp_thread_depress_abstime( - uint64_t interval); - -kern_return_t _mk_sp_thread_depress_abort( - thread_t thread, - boolean_t abortall); - -void _mk_sp_thread_perhaps_yield( - thread_t self); - -#endif /* _KERN_MK_SP_H_ */ diff --git a/osfmk/kern/mk_timer.c b/osfmk/kern/mk_timer.c index dfa26e353..6b6d15d23 100644 --- a/osfmk/kern/mk_timer.c +++ b/osfmk/kern/mk_timer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -29,6 +29,7 @@ */ #include +#include #include #include @@ -49,7 +50,8 @@ static void mk_timer_expire( void *p1); mach_port_name_t -mk_timer_create(void) +mk_timer_create_trap( + __unused struct mk_timer_create_trap_args *args) { mk_timer_t timer; ipc_space_t myspace = current_space(); @@ -67,12 +69,12 @@ mk_timer_create(void) result = ipc_port_translate_receive(myspace, name, &port); if (result != KERN_SUCCESS) { - zfree(mk_timer_zone, (vm_offset_t)timer); + zfree(mk_timer_zone, timer); return (MACH_PORT_NULL); } - simple_lock_init(&timer->lock, ETAP_MISC_TIMER); + simple_lock_init(&timer->lock, 0); call_entry_setup(&timer->call_entry, mk_timer_expire, timer); timer->is_armed = timer->is_dead = FALSE; timer->active = 0; @@ -111,7 +113,7 @@ mk_timer_port_destroy( timer->is_dead = TRUE; if (timer->active == 0) { simple_unlock(&timer->lock); - zfree(mk_timer_zone, (vm_offset_t)timer); + zfree(mk_timer_zone, timer); ipc_port_release_send(port); return; @@ -134,7 +136,7 @@ mk_timer_init(void) static void mk_timer_expire( void *p0, - void *p1) + __unused void *p1) { mk_timer_t timer = p0; ipc_port_t port; @@ -171,7 +173,7 @@ mk_timer_expire( if (--timer->active == 0 && timer->is_dead) { simple_unlock(&timer->lock); - zfree(mk_timer_zone, (vm_offset_t)timer); + zfree(mk_timer_zone, timer); ipc_port_release_send(port); return; @@ -181,9 +183,10 @@ mk_timer_expire( } kern_return_t -mk_timer_destroy( - mach_port_name_t name) +mk_timer_destroy_trap( + struct mk_timer_destroy_trap_args *args) { + mach_port_name_t name = args->name; ipc_space_t myspace = current_space(); ipc_port_t port; kern_return_t result; @@ -205,18 +208,16 @@ mk_timer_destroy( } kern_return_t -mk_timer_arm( - mach_port_name_t name, - uint64_t expire_time) +mk_timer_arm_trap( + struct mk_timer_arm_trap_args *args) { - uint64_t time_of_arming; + mach_port_name_t name = args->name; + uint64_t expire_time = args->expire_time; mk_timer_t timer; ipc_space_t myspace = current_space(); ipc_port_t port; kern_return_t result; - clock_get_uptime(&time_of_arming); - result = ipc_port_translate_receive(myspace, name, &port); if (result != KERN_SUCCESS) return (result); @@ -229,7 +230,6 @@ mk_timer_arm( ip_unlock(port); if (!timer->is_dead) { - timer->time_of_arming = time_of_arming; timer->is_armed = TRUE; if (!thread_call_enter_delayed(&timer->call_entry, expire_time)) @@ -247,10 +247,11 @@ mk_timer_arm( } kern_return_t -mk_timer_cancel( - mach_port_name_t name, - uint64_t *result_time) +mk_timer_cancel_trap( + struct mk_timer_cancel_trap_args *args) { + mach_port_name_t name = args->name; + mach_vm_address_t result_time_addr = args->result_time; uint64_t armed_time = 0; mk_timer_t timer; ipc_space_t myspace = current_space(); @@ -283,8 +284,8 @@ mk_timer_cancel( } if (result == KERN_SUCCESS) - if ( result_time != NULL && - copyout((void *)&armed_time, (void *)result_time, + if ( result_time_addr != 0 && + copyout((void *)&armed_time, result_time_addr, sizeof (armed_time)) != 0 ) result = KERN_FAILURE; diff --git a/osfmk/kern/mk_timer.h b/osfmk/kern/mk_timer.h index ddf89da12..d47e02bb2 100644 --- a/osfmk/kern/mk_timer.h +++ b/osfmk/kern/mk_timer.h @@ -39,7 +39,6 @@ struct mk_timer { decl_simple_lock_data(,lock) call_entry_data_t call_entry; - uint64_t time_of_arming; uint32_t is_dead:1, is_armed:1; int active; diff --git a/osfmk/kern/norma_protos.h b/osfmk/kern/norma_protos.h index d00d2901a..2f45824a9 100644 --- a/osfmk/kern/norma_protos.h +++ b/osfmk/kern/norma_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,42 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:34 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:55 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.4.1 1995/02/23 17:31:45 alanl - * DIPC: Merge from nmk17b2 to nmk18b8. - * [95/01/05 alanl] - * - * Revision 1.1.10.1 1994/12/01 20:43:40 dwm - * mk6 CR801 - copyright marker not FREE_ - * [1994/12/01 19:25:52 dwm] - * - * Revision 1.1.5.2 1994/09/10 21:47:18 bolinger - * Merge up to NMK17.3 - * [1994/09/08 19:58:04 bolinger] - * - * Revision 1.1.5.1 1994/06/21 19:43:06 dlb - * Bring forward to NMK18 - * [1994/06/17 18:58:04 dlb] - * - * Revision 1.1.2.2 1994/07/22 09:54:09 paire - * Added vm_remap_remote prototype. - * [94/07/05 paire] - * - * Revision 1.1.2.1 1994/12/06 20:11:22 alanl - * Initial revision. Moved here from kern/norma_task.h to avoid a - * name collision with the mig-generated kern/norma_task.h. - * [94/12/05 mmp] - * - * $EndLog$ - */ /* * Mach Operating System * Copyright (c) 1991 Carnegie Mellon University @@ -87,24 +51,24 @@ #ifndef _KERN_NORMA_PROTOS_H_ #define _KERN_NORMA_PROTOS_H_ -extern void task_copy_vm( - ipc_port_t host, - vm_map_t old_map, - boolean_t clone, - boolean_t kill_parent, - ipc_port_t to); +extern void task_copy_vm( + ipc_port_t host, + vm_map_t old_map, + boolean_t clone, + boolean_t kill_parent, + ipc_port_t to); -extern kern_return_t vm_remap_remote( - ipc_port_t target_task_port, - vm_offset_t *target_address, - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - ipc_port_t source_task_port, - vm_offset_t source_address, - boolean_t copy, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance); +extern kern_return_t vm_remap_remote( + ipc_port_t target_task_port, + mach_vm_offset_t *target_address, + mach_vm_size_t size, + mach_vm_offset_t mask, + boolean_t anywhere, + ipc_port_t source_task_port, + mach_vm_offset_t source_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance); #endif /* _KERN_NORMA_PROTOS_H_ */ diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c index bf5df6fca..e154b7501 100644 --- a/osfmk/kern/printf.c +++ b/osfmk/kern/printf.c @@ -155,7 +155,6 @@ #include #include #include -#include #include #include #include @@ -646,7 +645,11 @@ boolean_t new_printf_cpu_number = FALSE; decl_simple_lock_data(,printf_lock) +decl_simple_lock_data(,bsd_log_spinlock) decl_mutex_data(,sprintf_lock) +extern void bsd_log_init(void); +void bsd_log_lock(void); +void bsd_log_unlock(void); void printf_init(void) @@ -654,8 +657,22 @@ printf_init(void) /* * Lock is only really needed after the first thread is created. */ - simple_lock_init(&printf_lock, ETAP_MISC_PRINTF); - mutex_init(&sprintf_lock, ETAP_MISC_PRINTF); + simple_lock_init(&printf_lock, 0); + simple_lock_init(&bsd_log_spinlock, 0); + bsd_log_init(); + mutex_init(&sprintf_lock, 0); +} + +void +bsd_log_lock() +{ + simple_lock(&bsd_log_spinlock); +} + +void +bsd_log_unlock() +{ + simple_unlock(&bsd_log_spinlock); } /* derived from boot_gets */ @@ -759,6 +776,11 @@ consdebug_putc( cnputc(c); debug_putc(c); + +#ifdef __ppc__ + if (!console_is_serial()) + PE_kputc(c); +#endif } void diff --git a/osfmk/kern/priority.c b/osfmk/kern/priority.c index a1e170de5..9ff146229 100644 --- a/osfmk/kern/priority.c +++ b/osfmk/kern/priority.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,8 +57,6 @@ * Clock primitives. */ -#include - #include #include #include @@ -92,14 +90,12 @@ thread_quantum_expire( * Check for fail-safe trip. */ if (!(thread->sched_mode & TH_MODE_TIMESHARE)) { - extern uint64_t max_unsafe_computation; uint64_t new_computation; new_computation = myprocessor->quantum_end; new_computation -= thread->computation_epoch; if (new_computation + thread->computation_metered > max_unsafe_computation) { - extern uint32_t sched_safe_duration; if (thread->sched_mode & TH_MODE_REALTIME) { thread->priority = DEPRESSPRI; @@ -123,9 +119,19 @@ thread_quantum_expire( update_priority(thread); else if (thread->sched_mode & TH_MODE_TIMESHARE) { - thread_timer_delta(thread); - thread->sched_usage += thread->sched_delta; - thread->sched_delta = 0; + register uint32_t delta; + + thread_timer_delta(thread, delta); + + /* + * Accumulate timesharing usage only + * during contention for processor + * resources. + */ + if (thread->pri_shift < INT8_MAX) + thread->sched_usage += delta; + + thread->cpu_delta += delta; /* * Adjust the scheduled priority if @@ -157,3 +163,211 @@ thread_quantum_expire( splx(s); } + +/* + * Define shifts for simulating (5/8) ** n + * + * Shift structures for holding update shifts. Actual computation + * is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the + * +/- is determined by the sign of shift 2. + */ +struct shift_data { + int shift1; + int shift2; +}; + +#define SCHED_DECAY_TICKS 32 +static struct shift_data sched_decay_shifts[SCHED_DECAY_TICKS] = { + {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7}, + {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13}, + {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18}, + {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27} +}; + +/* + * do_priority_computation: + * + * Calculate the timesharing priority based upon usage and load. + */ +#define do_priority_computation(thread, pri) \ + MACRO_BEGIN \ + (pri) = (thread)->priority /* start with base priority */ \ + - ((thread)->sched_usage >> (thread)->pri_shift); \ + if ((pri) < MINPRI_USER) \ + (pri) = MINPRI_USER; \ + else \ + if ((pri) > MAXPRI_KERNEL) \ + (pri) = MAXPRI_KERNEL; \ + MACRO_END + +/* + * set_priority: + * + * Set the base priority of the thread + * and reset its scheduled priority. + * + * Called with the thread locked. + */ +void +set_priority( + register thread_t thread, + register int priority) +{ + thread->priority = priority; + compute_priority(thread, FALSE); +} + +/* + * compute_priority: + * + * Reset the scheduled priority of the thread + * according to its base priority if the + * thread has not been promoted or depressed. + * + * Called with the thread locked. + */ +void +compute_priority( + register thread_t thread, + boolean_t override_depress) +{ + register int priority; + + if ( !(thread->sched_mode & TH_MODE_PROMOTED) && + (!(thread->sched_mode & TH_MODE_ISDEPRESSED) || + override_depress ) ) { + if (thread->sched_mode & TH_MODE_TIMESHARE) + do_priority_computation(thread, priority); + else + priority = thread->priority; + + set_sched_pri(thread, priority); + } +} + +/* + * compute_my_priority: + * + * Reset the scheduled priority for + * a timesharing thread. + * + * Only for use on the current thread + * if timesharing and not depressed. + * + * Called with the thread locked. + */ +void +compute_my_priority( + register thread_t thread) +{ + register int priority; + + do_priority_computation(thread, priority); + assert(thread->runq == RUN_QUEUE_NULL); + thread->sched_pri = priority; +} + +/* + * update_priority + * + * Perform housekeeping operations driven by scheduler tick. + * + * Called with the thread locked. + */ +void +update_priority( + register thread_t thread) +{ + register unsigned ticks; + register uint32_t delta; + + ticks = sched_tick - thread->sched_stamp; + assert(ticks != 0); + thread->sched_stamp += ticks; + thread->pri_shift = thread->processor_set->pri_shift; + + /* + * Gather cpu usage data. + */ + thread_timer_delta(thread, delta); + if (ticks < SCHED_DECAY_TICKS) { + register struct shift_data *shiftp; + + /* + * Accumulate timesharing usage only + * during contention for processor + * resources. + */ + if (thread->pri_shift < INT8_MAX) + thread->sched_usage += delta; + + thread->cpu_usage += delta + thread->cpu_delta; + thread->cpu_delta = 0; + + shiftp = &sched_decay_shifts[ticks]; + if (shiftp->shift2 > 0) { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) + + (thread->cpu_usage >> shiftp->shift2); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) + + (thread->sched_usage >> shiftp->shift2); + } + else { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) - + (thread->cpu_usage >> -(shiftp->shift2)); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) - + (thread->sched_usage >> -(shiftp->shift2)); + } + } + else { + thread->cpu_usage = thread->cpu_delta = 0; + thread->sched_usage = 0; + } + + /* + * Check for fail-safe release. + */ + if ( (thread->sched_mode & TH_MODE_FAILSAFE) && + thread->sched_stamp >= thread->safe_release ) { + if (!(thread->safe_mode & TH_MODE_TIMESHARE)) { + if (thread->safe_mode & TH_MODE_REALTIME) { + thread->priority = BASEPRI_RTQUEUES; + + thread->sched_mode |= TH_MODE_REALTIME; + } + + thread->sched_mode &= ~TH_MODE_TIMESHARE; + + if (thread->state & TH_RUN) + pset_share_decr(thread->processor_set); + + if (!(thread->sched_mode & TH_MODE_ISDEPRESSED)) + set_sched_pri(thread, thread->priority); + } + + thread->safe_mode = 0; + thread->sched_mode &= ~TH_MODE_FAILSAFE; + } + + /* + * Recompute scheduled priority if appropriate. + */ + if ( (thread->sched_mode & TH_MODE_TIMESHARE) && + !(thread->sched_mode & TH_MODE_PROMOTED) && + !(thread->sched_mode & TH_MODE_ISDEPRESSED) ) { + register int new_pri; + + do_priority_computation(thread, new_pri); + if (new_pri != thread->sched_pri) { + run_queue_t runq; + + runq = run_queue_remove(thread); + thread->sched_pri = new_pri; + if (runq != RUN_QUEUE_NULL) + thread_setrun(thread, SCHED_TAILQ); + } + } +} diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c index 6a1b9e459..6f93f5ef4 100644 --- a/osfmk/kern/processor.c +++ b/osfmk/kern/processor.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,10 +54,9 @@ * processor.c: processor and processor_set manipulation routines. */ -#include - #include #include +#include #include #include #include @@ -77,29 +76,22 @@ * Exported interface */ #include +#include /* * Exported variables. */ -struct processor_set default_pset; -struct processor processor_array[NCPUS]; +struct processor_set default_pset; -int master_cpu = 0; +processor_t processor_list; +unsigned int processor_count; +static processor_t processor_list_tail; +decl_simple_lock_data(,processor_list_lock) processor_t master_processor; -processor_t processor_ptr[NCPUS]; +int master_cpu = 0; /* Forwards */ -void pset_init( - processor_set_t pset); - -void processor_init( - register processor_t pr, - int slot_num); - -void pset_quanta_setup( - processor_set_t pset); - kern_return_t processor_set_base( processor_set_t pset, policy_t policy, @@ -118,36 +110,22 @@ kern_return_t processor_set_things( mach_msg_type_number_t *count, int type); - -/* - * Bootstrap the processor/pset system so the scheduler can run. - */ void -pset_sys_bootstrap(void) +processor_bootstrap(void) { - register int i; - - pset_init(&default_pset); - - for (i = 0; i < NCPUS; i++) { - /* - * Initialize processor data structures. - * Note that cpu_to_processor(i) is processor_ptr[i]. - */ - processor_ptr[i] = &processor_array[i]; - processor_init(processor_ptr[i], i); - } + simple_lock_init(&processor_list_lock, 0); master_processor = cpu_to_processor(master_cpu); - default_pset.active = TRUE; + processor_init(master_processor, master_cpu); } /* * Initialize the given processor_set structure. */ -void pset_init( +void +pset_init( register processor_set_t pset) { register int i; @@ -164,10 +142,10 @@ void pset_init( queue_init(&pset->idle_queue); pset->idle_count = 0; queue_init(&pset->active_queue); - simple_lock_init(&pset->sched_lock, ETAP_THREAD_PSET_IDLE); + simple_lock_init(&pset->sched_lock, 0); pset->run_count = pset->share_count = 0; pset->mach_factor = pset->load_average = 0; - pset->sched_load = 0; + pset->pri_shift = INT8_MAX; queue_init(&pset->processors); pset->processor_count = 0; queue_init(&pset->tasks); @@ -175,14 +153,11 @@ void pset_init( queue_init(&pset->threads); pset->thread_count = 0; pset->ref_count = 1; - pset->active = FALSE; - mutex_init(&pset->lock, ETAP_THREAD_PSET); + pset->active = TRUE; + mutex_init(&pset->lock, 0); pset->pset_self = IP_NULL; pset->pset_name_self = IP_NULL; pset->timeshare_quanta = 1; - - for (i = 0; i <= NCPUS; i++) - pset->quantum_factors[i] = 1; } /* @@ -209,12 +184,23 @@ processor_init( p->active_thread = p->next_thread = p->idle_thread = THREAD_NULL; p->processor_set = PROCESSOR_SET_NULL; p->current_pri = MINPRI; + p->deadline = UINT64_MAX; timer_call_setup(&p->quantum_timer, thread_quantum_expire, p); p->timeslice = 0; - p->deadline = UINT64_MAX; - simple_lock_init(&p->lock, ETAP_THREAD_PROC); + simple_lock_init(&p->lock, 0); p->processor_self = IP_NULL; - p->slot_num = slot_num; + processor_data_init(p); + PROCESSOR_DATA(p, slot_num) = slot_num; + + simple_lock(&processor_list_lock); + if (processor_list == NULL) + processor_list = p; + else + processor_list_tail->processor_list = p; + processor_list_tail = p; + processor_count++; + p->processor_list = NULL; + simple_unlock(&processor_list_lock); } /* @@ -243,6 +229,9 @@ void pset_reference( processor_set_t pset) { + if (pset == PROCESSOR_SET_NULL) + return; + assert(pset == &default_pset); } @@ -264,7 +253,7 @@ pset_remove_processor( queue_remove(&pset->processors, processor, processor_t, processors); processor->processor_set = PROCESSOR_SET_NULL; pset->processor_count--; - pset_quanta_setup(pset); + timeshare_quanta_update(pset); } /* @@ -281,7 +270,7 @@ pset_add_processor( queue_enter(&pset->processors, processor, processor_t, processors); processor->processor_set = pset; pset->processor_count++; - pset_quanta_setup(pset); + timeshare_quanta_update(pset); } /* @@ -300,7 +289,6 @@ pset_remove_task( return; queue_remove(&pset->tasks, task, task_t, pset_tasks); - task->processor_set = PROCESSOR_SET_NULL; pset->task_count--; } @@ -333,7 +321,6 @@ pset_remove_thread( thread_t thread) { queue_remove(&pset->threads, thread, thread_t, pset_threads); - thread->processor_set = PROCESSOR_SET_NULL; pset->thread_count--; } @@ -375,88 +362,97 @@ thread_change_psets( kern_return_t processor_info_count( - processor_flavor_t flavor, + processor_flavor_t flavor, mach_msg_type_number_t *count) { - kern_return_t kr; - switch (flavor) { + case PROCESSOR_BASIC_INFO: *count = PROCESSOR_BASIC_INFO_COUNT; - return KERN_SUCCESS; + break; + case PROCESSOR_CPU_LOAD_INFO: *count = PROCESSOR_CPU_LOAD_INFO_COUNT; - return KERN_SUCCESS; + break; + default: - kr = cpu_info_count(flavor, count); - return kr; + return (cpu_info_count(flavor, count)); } + + return (KERN_SUCCESS); } kern_return_t processor_info( register processor_t processor, - processor_flavor_t flavor, - host_t *host, - processor_info_t info, + processor_flavor_t flavor, + host_t *host, + processor_info_t info, mach_msg_type_number_t *count) { register int i, slot_num, state; - register processor_basic_info_t basic_info; - register processor_cpu_load_info_t cpu_load_info; - kern_return_t kr; + kern_return_t result; if (processor == PROCESSOR_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); - slot_num = processor->slot_num; + slot_num = PROCESSOR_DATA(processor, slot_num); switch (flavor) { case PROCESSOR_BASIC_INFO: - { - if (*count < PROCESSOR_BASIC_INFO_COUNT) - return(KERN_FAILURE); - - basic_info = (processor_basic_info_t) info; - basic_info->cpu_type = machine_slot[slot_num].cpu_type; - basic_info->cpu_subtype = machine_slot[slot_num].cpu_subtype; - state = processor->state; - if (state == PROCESSOR_OFF_LINE) - basic_info->running = FALSE; - else - basic_info->running = TRUE; - basic_info->slot_num = slot_num; - if (processor == master_processor) - basic_info->is_master = TRUE; - else - basic_info->is_master = FALSE; - - *count = PROCESSOR_BASIC_INFO_COUNT; - *host = &realhost; - return(KERN_SUCCESS); - } + { + register processor_basic_info_t basic_info; + + if (*count < PROCESSOR_BASIC_INFO_COUNT) + return (KERN_FAILURE); + + basic_info = (processor_basic_info_t) info; + basic_info->cpu_type = slot_type(slot_num); + basic_info->cpu_subtype = slot_subtype(slot_num); + state = processor->state; + if (state == PROCESSOR_OFF_LINE) + basic_info->running = FALSE; + else + basic_info->running = TRUE; + basic_info->slot_num = slot_num; + if (processor == master_processor) + basic_info->is_master = TRUE; + else + basic_info->is_master = FALSE; + + *count = PROCESSOR_BASIC_INFO_COUNT; + *host = &realhost; + + return (KERN_SUCCESS); + } + case PROCESSOR_CPU_LOAD_INFO: - { + { + register processor_cpu_load_info_t cpu_load_info; + register integer_t *cpu_ticks; + if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) - return(KERN_FAILURE); + return (KERN_FAILURE); cpu_load_info = (processor_cpu_load_info_t) info; - for (i=0;icpu_ticks[i] = machine_slot[slot_num].cpu_ticks[i]; + cpu_ticks = PROCESSOR_DATA(processor, cpu_ticks); + for (i=0; i < CPU_STATE_MAX; i++) + cpu_load_info->cpu_ticks[i] = cpu_ticks[i]; *count = PROCESSOR_CPU_LOAD_INFO_COUNT; *host = &realhost; - return(KERN_SUCCESS); - } + + return (KERN_SUCCESS); + } + default: - { - kr=cpu_info(flavor, slot_num, info, count); - if (kr == KERN_SUCCESS) - *host = &realhost; - return(kr); - } + result = cpu_info(flavor, slot_num, info, count); + if (result == KERN_SUCCESS) + *host = &realhost; + + return (result); } } @@ -465,20 +461,22 @@ processor_start( processor_t processor) { kern_return_t result; + thread_t thread; spl_t s; if (processor == PROCESSOR_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); if (processor == master_processor) { + thread_t self = current_thread(); processor_t prev; - prev = thread_bind(current_thread(), processor); + prev = thread_bind(self, processor); thread_block(THREAD_CONTINUE_NULL); - result = cpu_start(processor->slot_num); + result = cpu_start(PROCESSOR_DATA(processor, slot_num)); - thread_bind(current_thread(), prev); + thread_bind(self, prev); return (result); } @@ -496,30 +494,60 @@ processor_start( processor_unlock(processor); splx(s); - if (processor->next_thread == THREAD_NULL) { - thread_t thread; - extern void start_cpu_thread(void); - - thread = kernel_thread_create(start_cpu_thread, MAXPRI_KERNEL); + /* + * Create the idle processor thread. + */ + if (processor->idle_thread == THREAD_NULL) { + result = idle_thread_create(processor); + if (result != KERN_SUCCESS) { + s = splsched(); + processor_lock(processor); + processor->state = PROCESSOR_OFF_LINE; + processor_unlock(processor); + splx(s); + + return (result); + } + } + + /* + * If there is no active thread, the processor + * has never been started. Create a dedicated + * start up thread. + */ + if ( processor->active_thread == THREAD_NULL && + processor->next_thread == THREAD_NULL ) { + result = kernel_thread_create((thread_continue_t)processor_start_thread, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) { + s = splsched(); + processor_lock(processor); + processor->state = PROCESSOR_OFF_LINE; + processor_unlock(processor); + splx(s); + + return (result); + } s = splsched(); thread_lock(thread); thread->bound_processor = processor; processor->next_thread = thread; thread->state = TH_RUN; - pset_run_incr(thread->processor_set); thread_unlock(thread); splx(s); + + thread_deallocate(thread); } if (processor->processor_self == IP_NULL) ipc_processor_init(processor); - result = cpu_start(processor->slot_num); + result = cpu_start(PROCESSOR_DATA(processor, slot_num)); if (result != KERN_SUCCESS) { s = splsched(); processor_lock(processor); processor->state = PROCESSOR_OFF_LINE; + timer_call_shutdown(processor); processor_unlock(processor); splx(s); @@ -550,49 +578,44 @@ processor_control( if (processor == PROCESSOR_NULL) return(KERN_INVALID_ARGUMENT); - return(cpu_control(processor->slot_num, info, count)); + return(cpu_control(PROCESSOR_DATA(processor, slot_num), info, count)); } /* - * Precalculate the appropriate timesharing quanta based on load. The - * index into quantum_factors[] is the number of threads on the - * processor set queue. It is limited to the number of processors in - * the set. + * Calculate the appropriate timesharing quanta based on set load. */ void -pset_quanta_setup( +timeshare_quanta_update( processor_set_t pset) { - register int i, count = pset->processor_count; - - for (i = 1; i <= count; i++) - pset->quantum_factors[i] = (count + (i / 2)) / i; - - pset->quantum_factors[0] = pset->quantum_factors[1]; - - timeshare_quanta_update(pset); + int pcount = pset->processor_count; + int i = pset->runq.count; + + if (i >= pcount) + i = 1; + else + if (i <= 1) + i = pcount; + else + i = (pcount + (i / 2)) / i; + + pset->timeshare_quanta = i; } kern_return_t processor_set_create( - host_t host, - processor_set_t *new_set, - processor_set_t *new_name) + __unused host_t host, + __unused processor_set_t *new_set, + __unused processor_set_t *new_name) { -#ifdef lint - host++; new_set++; new_name++; -#endif /* lint */ return(KERN_FAILURE); } kern_return_t processor_set_destroy( - processor_set_t pset) + __unused processor_set_t pset) { -#ifdef lint - pset++; -#endif /* lint */ return(KERN_FAILURE); } @@ -684,7 +707,7 @@ processor_set_info( return(KERN_FAILURE); ts_limit = (policy_timeshare_limit_t) info; - ts_limit->max_priority = MAXPRI_STANDARD; + ts_limit->max_priority = MAXPRI_KERNEL; *count = POLICY_TIMESHARE_LIMIT_COUNT; *host = &realhost; @@ -697,7 +720,7 @@ processor_set_info( return(KERN_FAILURE); fifo_limit = (policy_fifo_limit_t) info; - fifo_limit->max_priority = MAXPRI_STANDARD; + fifo_limit->max_priority = MAXPRI_KERNEL; *count = POLICY_FIFO_LIMIT_COUNT; *host = &realhost; @@ -710,7 +733,7 @@ processor_set_info( return(KERN_FAILURE); rr_limit = (policy_rr_limit_t) info; - rr_limit->max_priority = MAXPRI_STANDARD; + rr_limit->max_priority = MAXPRI_KERNEL; *count = POLICY_RR_LIMIT_COUNT; *host = &realhost; @@ -781,9 +804,9 @@ processor_set_statistics( */ kern_return_t processor_set_max_priority( - processor_set_t pset, - int max_priority, - boolean_t change_threads) + __unused processor_set_t pset, + __unused int max_priority, + __unused boolean_t change_threads) { return (KERN_INVALID_ARGUMENT); } @@ -796,8 +819,8 @@ processor_set_max_priority( kern_return_t processor_set_policy_enable( - processor_set_t pset, - int policy) + __unused processor_set_t pset, + __unused int policy) { return (KERN_INVALID_ARGUMENT); } @@ -810,9 +833,9 @@ processor_set_policy_enable( */ kern_return_t processor_set_policy_disable( - processor_set_t pset, - int policy, - boolean_t change_threads) + __unused processor_set_t pset, + __unused int policy, + __unused boolean_t change_threads) { return (KERN_INVALID_ARGUMENT); } @@ -827,19 +850,20 @@ processor_set_policy_disable( */ kern_return_t processor_set_things( - processor_set_t pset, - mach_port_t **thing_list, + processor_set_t pset, + mach_port_t **thing_list, mach_msg_type_number_t *count, - int type) + int type) { unsigned int actual; /* this many things */ - int i; + unsigned int maxthings; + unsigned int i; vm_size_t size, size_needed; - vm_offset_t addr; + void *addr; if (pset == PROCESSOR_SET_NULL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); size = 0; addr = 0; @@ -847,17 +871,18 @@ processor_set_things( pset_lock(pset); if (!pset->active) { pset_unlock(pset); - return KERN_FAILURE; + + return (KERN_FAILURE); } if (type == THING_TASK) - actual = pset->task_count; + maxthings = pset->task_count; else - actual = pset->thread_count; + maxthings = pset->thread_count; /* do we have the memory we need? */ - size_needed = actual * sizeof(mach_port_t); + size_needed = maxthings * sizeof (mach_port_t); if (size_needed <= size) break; @@ -872,60 +897,47 @@ processor_set_things( addr = kalloc(size); if (addr == 0) - return KERN_RESOURCE_SHORTAGE; + return (KERN_RESOURCE_SHORTAGE); } /* OK, have memory and the processor_set is locked & active */ + actual = 0; switch (type) { - case THING_TASK: { - task_t *tasks = (task_t *) addr; - task_t task; - - for (i = 0, task = (task_t) queue_first(&pset->tasks); - !queue_end(&pset->tasks, (queue_entry_t) task); - task = (task_t) queue_next(&task->pset_tasks)) { - - task_lock(task); - if (task->ref_count > 0) { - /* take ref for convert_task_to_port */ - task_reference_locked(task); - tasks[i++] = task; - } - task_unlock(task); + + case THING_TASK: + { + task_t task, *tasks = (task_t *)addr; + + for (task = (task_t)queue_first(&pset->tasks); + !queue_end(&pset->tasks, (queue_entry_t)task); + task = (task_t)queue_next(&task->pset_tasks)) { + task_reference_internal(task); + tasks[actual++] = task; } + break; - } - - case THING_THREAD: { - thread_act_t *thr_acts = (thread_act_t *) addr; - thread_t thread; - thread_act_t thr_act; - - for (i = 0, thread = (thread_t) queue_first(&pset->threads); - !queue_end(&pset->threads, (queue_entry_t)thread); - thread = (thread_t) queue_next(&thread->pset_threads)) { - - thr_act = thread_lock_act(thread); - if (thr_act && thr_act->act_ref_count > 0) { - /* take ref for convert_act_to_port */ - act_reference_locked(thr_act); - thr_acts[i++] = thr_act; - } - thread_unlock_act(thread); + } + + case THING_THREAD: + { + thread_t thread, *threads = (thread_t *)addr; + + for (i = 0, thread = (thread_t)queue_first(&pset->threads); + !queue_end(&pset->threads, (queue_entry_t)thread); + thread = (thread_t)queue_next(&thread->pset_threads)) { + thread_reference_internal(thread); + threads[actual++] = thread; } + break; - } + } } - /* can unlock processor set now that we have the task/thread refs */ pset_unlock(pset); - if (i < actual) { - actual = i; - size_needed = actual * sizeof(mach_port_t); - } - assert(i == actual); + if (actual < maxthings) + size_needed = actual * sizeof (mach_port_t); if (actual == 0) { /* no things, so return null pointer and deallocate memory */ @@ -934,65 +946,73 @@ processor_set_things( if (size != 0) kfree(addr, size); - } else { + } + else { /* if we allocated too much, must copy */ if (size_needed < size) { - vm_offset_t newaddr; + void *newaddr; newaddr = kalloc(size_needed); if (newaddr == 0) { switch (type) { - case THING_TASK: { - task_t *tasks = (task_t *) addr; + + case THING_TASK: + { + task_t *tasks = (task_t *)addr; for (i = 0; i < actual; i++) task_deallocate(tasks[i]); break; - } + } - case THING_THREAD: { - thread_act_t *acts = (thread_act_t *) addr; + case THING_THREAD: + { + thread_t *threads = (thread_t *)addr; for (i = 0; i < actual; i++) - act_deallocate(acts[i]); + thread_deallocate(threads[i]); break; - } } + } + kfree(addr, size); - return KERN_RESOURCE_SHORTAGE; + return (KERN_RESOURCE_SHORTAGE); } - bcopy((char *) addr, (char *) newaddr, size_needed); + bcopy((void *) addr, (void *) newaddr, size_needed); kfree(addr, size); addr = newaddr; } - *thing_list = (mach_port_t *) addr; + *thing_list = (mach_port_t *)addr; *count = actual; /* do the conversion that Mig should handle */ switch (type) { - case THING_TASK: { - task_t *tasks = (task_t *) addr; + + case THING_TASK: + { + task_t *tasks = (task_t *)addr; for (i = 0; i < actual; i++) (*thing_list)[i] = convert_task_to_port(tasks[i]); break; - } + } - case THING_THREAD: { - thread_act_t *thr_acts = (thread_act_t *) addr; + case THING_THREAD: + { + thread_t *threads = (thread_t *)addr; for (i = 0; i < actual; i++) - (*thing_list)[i] = convert_act_to_port(thr_acts[i]); + (*thing_list)[i] = convert_thread_to_port(threads[i]); break; - } + } } } - return(KERN_SUCCESS); + return (KERN_SUCCESS); } @@ -1033,10 +1053,10 @@ processor_set_threads( */ kern_return_t processor_set_base( - processor_set_t pset, - policy_t policy, - policy_base_t base, - boolean_t change) + __unused processor_set_t pset, + __unused policy_t policy, + __unused policy_base_t base, + __unused boolean_t change) { return (KERN_INVALID_ARGUMENT); } @@ -1050,10 +1070,10 @@ processor_set_base( */ kern_return_t processor_set_limit( - processor_set_t pset, - policy_t policy, - policy_limit_t limit, - boolean_t change) + __unused processor_set_t pset, + __unused policy_t policy, + __unused policy_limit_t limit, + __unused boolean_t change) { return (KERN_POLICY_LIMIT); } @@ -1067,11 +1087,11 @@ processor_set_limit( */ kern_return_t processor_set_policy_control( - processor_set_t pset, - int flavor, - processor_set_info_t policy_info, - mach_msg_type_number_t count, - boolean_t change) + __unused processor_set_t pset, + __unused int flavor, + __unused processor_set_info_t policy_info, + __unused mach_msg_type_number_t count, + __unused boolean_t change) { return (KERN_INVALID_ARGUMENT); } diff --git a/osfmk/kern/processor.h b/osfmk/kern/processor.h index be661abaf..64c7315dd 100644 --- a/osfmk/kern/processor.h +++ b/osfmk/kern/processor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -51,32 +51,27 @@ */ /* - * processor.h: Processor and processor-set definitions. + * processor.h: Processor and processor-related definitions. */ #ifndef _KERN_PROCESSOR_H_ #define _KERN_PROCESSOR_H_ -/* - * Data structures for managing processors and sets of processors. - */ #include #include #include -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE -#include - #include +#include #include #include #include #include +#include #include @@ -87,7 +82,7 @@ struct processor_set { queue_head_t processors; /* all processors here */ int processor_count;/* how many ? */ - decl_simple_lock_data(,sched_lock) /* lock for above */ + decl_simple_lock_data(,sched_lock) /* lock for runq and above */ struct run_queue runq; /* runq for this set */ @@ -100,7 +95,6 @@ struct processor_set { decl_mutex_data(, lock) /* lock for above */ int timeshare_quanta; /* timeshare quantum factor */ - int quantum_factors[NCPUS+1]; struct ipc_port * pset_self; /* port for operations */ struct ipc_port * pset_name_self; /* port for information */ @@ -110,16 +104,19 @@ struct processor_set { integer_t mach_factor; /* mach_factor */ integer_t load_average; /* load_average */ - uint32_t sched_load; /* load avg for scheduler */ + + uint32_t pri_shift; /* timeshare usage -> priority */ }; +extern struct processor_set default_pset; + struct processor { - queue_chain_t processor_queue;/* idle/active/action queue link, + queue_chain_t processor_queue;/* idle/active queue link, * MUST remain the first element */ int state; /* See below */ struct thread *active_thread, /* thread running on processor */ - *next_thread, /* next thread to run if dispatched */ + *next_thread, /* next thread when dispatched */ *idle_thread; /* this processor's idle thread. */ processor_set_t processor_set; /* current membership */ @@ -135,16 +132,18 @@ struct processor { struct run_queue runq; /* local runq for this processor */ - queue_chain_t processors; /* all processors in set */ + queue_chain_t processors; /* processors in set */ decl_simple_lock_data(,lock) - struct ipc_port *processor_self;/* port for operations */ - int slot_num; /* machine-indep slot number */ + struct ipc_port * processor_self; /* port for operations */ + processor_t processor_list; /* all existing processors */ + processor_data_t processor_data; /* per-processor data */ }; -extern struct processor_set default_pset; -extern processor_t master_processor; +extern processor_t processor_list; +extern unsigned int processor_count; +decl_simple_lock_data(extern,processor_list_lock) -extern struct processor processor_array[NCPUS]; +extern processor_t master_processor; /* * NOTE: The processor->processor_set link is needed in one of the @@ -177,23 +176,10 @@ extern struct processor processor_array[NCPUS]; #define PROCESSOR_SHUTDOWN 4 /* Going off-line */ #define PROCESSOR_START 5 /* Being started */ -/* - * Use processor ptr array to find current processor's data structure. - * This replaces a multiplication (index into processor_array) with - * an array lookup and a memory reference. It also allows us to save - * space if processor numbering gets too sparse. - */ - -extern processor_t processor_ptr[NCPUS]; - -#define cpu_to_processor(i) (processor_ptr[i]) - -#define current_processor() (processor_ptr[cpu_number()]) +extern processor_t current_processor(void); -/* Compatibility -- will go away */ - -#define cpu_state(slot_num) (processor_ptr[slot_num]->state) -#define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE) +extern processor_t cpu_to_processor( + int cpu); /* Useful lock macros */ @@ -204,17 +190,17 @@ extern processor_t processor_ptr[NCPUS]; #define processor_lock(pr) simple_lock(&(pr)->lock) #define processor_unlock(pr) simple_unlock(&(pr)->lock) -extern void pset_sys_bootstrap(void); +extern void processor_bootstrap(void); + +extern void processor_init( + processor_t processor, + int slot_num); + +extern void timeshare_quanta_update( + processor_set_t pset); -#define timeshare_quanta_update(pset) \ -MACRO_BEGIN \ - int proc_count = (pset)->processor_count; \ - int runq_count = (pset)->runq.count; \ - \ - (pset)->timeshare_quanta = (pset)->quantum_factors[ \ - (runq_count > proc_count)? \ - proc_count: runq_count]; \ -MACRO_END +extern void pset_init( + processor_set_t pset); #define pset_run_incr(pset) \ hw_atomic_add(&(pset)->run_count, 1) @@ -228,9 +214,6 @@ MACRO_END #define pset_share_decr(pset) \ hw_atomic_sub(&(pset)->share_count, 1) -extern void cpu_up( - int cpu); - extern kern_return_t processor_shutdown( processor_t processor); @@ -263,10 +246,6 @@ extern void thread_change_psets( processor_set_t old_pset, processor_set_t new_pset); -extern kern_return_t processor_assign( - processor_t processor, - processor_set_t new_pset, - boolean_t wait); extern kern_return_t processor_info_count( processor_flavor_t flavor, @@ -274,13 +253,7 @@ extern kern_return_t processor_info_count( #endif /* MACH_KERNEL_PRIVATE */ -extern kern_return_t processor_start( - processor_t processor); - -extern kern_return_t processor_exit( - processor_t processor); - -#endif /* __APPLE_API_PRIVATE */ +__BEGIN_DECLS extern void pset_deallocate( processor_set_t pset); @@ -288,4 +261,6 @@ extern void pset_deallocate( extern void pset_reference( processor_set_t pset); +__END_DECLS + #endif /* _KERN_PROCESSOR_H_ */ diff --git a/bsd/vm/vm_pageout.h b/osfmk/kern/processor_data.c similarity index 65% rename from bsd/vm/vm_pageout.h rename to osfmk/kern/processor_data.c index 4a473fd46..801cf3bdd 100644 --- a/bsd/vm/vm_pageout.h +++ b/osfmk/kern/processor_data.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,14 +19,27 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _BSD_VM_VM_PAGEOUT_H_ -#define _BSD_VM_VM_PAGEOUT_H_ +/* + * Machine independent per processor data. + * + * HISTORY + * + * 16 October 2003 (debo) + * Created. + */ #include -#include -#include -extern vm_map_t kernel_map; +#include +void +processor_data_init( + processor_t processor) +{ + (void)memset(&processor->processor_data, 0, sizeof (processor_data_t)); -#endif /* _BSD_VM_VM_PAGEOUT_H_ */ + queue_init(&PROCESSOR_DATA(processor, timer_call_queue)); +#if !STAT_TIME + timer_init(&PROCESSOR_DATA(processor, offline_timer)); +#endif /* STAT_TIME */ +} diff --git a/osfmk/kern/processor_data.h b/osfmk/kern/processor_data.h new file mode 100644 index 000000000..3aa431248 --- /dev/null +++ b/osfmk/kern/processor_data.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Machine independent per processor data. + */ + +#ifndef _KERN_PROCESSOR_DATA_H_ +#define _KERN_PROCESSOR_DATA_H_ + +/* + * #include kern/processor.h instead of this file. + */ + +#ifdef MACH_KERNEL_PRIVATE + +#include +#include + +struct processor_data { + /* Processor state statistics */ + integer_t cpu_ticks[CPU_STATE_MAX]; + +#if !STAT_TIME + /* Current execution timer */ + timer_t current_timer; + timer_data_t offline_timer; +#endif /* STAT_TIME */ + + /* Kernel stack cache */ + struct stack_cache { + vm_offset_t free; + unsigned int count; + } stack_cache; + + /* Pending timer callouts */ + queue_head_t timer_call_queue; + + /* VM event counters */ + vm_statistics_data_t vm_stat; + + /* IPC free message cache */ + struct ikm_cache { +#define IKM_STASH 16 + ipc_kmsg_t entries[IKM_STASH]; + unsigned int avail; + } ikm_cache; + + int slot_num; +}; + +typedef struct processor_data processor_data_t; + +#define PROCESSOR_DATA(processor, member) \ + (processor)->processor_data.member + +extern void processor_data_init( + processor_t processor); + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* _KERN_PROCESSOR_DATA_H_ */ diff --git a/osfmk/kern/profile.c b/osfmk/kern/profile.c index 610918ec4..ee8e346b3 100644 --- a/osfmk/kern/profile.c +++ b/osfmk/kern/profile.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,9 +56,7 @@ #include #if MACH_PROF -#include #include -#include #include #include #include @@ -101,8 +99,6 @@ profile_thread(void) kern_return_t kr; int j; - thread_swappable(current_act(), FALSE); - /* Initialise the queue header for the prof_queue */ mpqueue_init(&prof_queue); @@ -334,9 +330,18 @@ pbuf_free( ***************************************************************************** */ +#if !MACH_PROF kern_return_t thread_sample( - thread_act_t thr_act, + __unused thread_t thread, + __unused ipc_port_t reply) +{ + return KERN_FAILURE; +} +#else +kern_return_t +thread_sample( + thread_t thread, ipc_port_t reply) { /* @@ -346,18 +351,15 @@ thread_sample( * we are going to use as a reply port to send out the samples resulting * from its execution. */ -#if !MACH_PROF - return KERN_FAILURE; -#else prof_data_t pbuf; vm_offset_t vmpbuf; if (reply != MACH_PORT_NULL) { - if (thr_act->act_profiled) /* yuck! */ + if (thread->profiled) /* yuck! */ return KERN_INVALID_ARGUMENT; /* Start profiling this activation, do the initialization. */ pbuf = pbuf_alloc(); - if ((thr_act->profil_buffer = pbuf) == NULLPROFDATA) { + if ((thread->profil_buffer = pbuf) == NULLPROFDATA) { printf("thread_sample: cannot allocate pbuf\n"); return KERN_RESOURCE_SHORTAGE; } @@ -369,29 +371,29 @@ thread_sample( reset_pbuf_area(pbuf); } pbuf->prof_port = reply; - thr_act->act_profiled = TRUE; - thr_act->act_profiled_own = TRUE; + thread->profiled = TRUE; + thread->profiled_own = TRUE; if (profile_thread_id == THREAD_NULL) profile_thread_id = kernel_thread(kernel_task, profile_thread); } else { - if (!thr_act->act_profiled) + if (!thread->profiled) return(KERN_INVALID_ARGUMENT); - thr_act->act_profiled = FALSE; + thread->profiled = FALSE; /* do not stop sampling if thread is not profiled by its own */ - if (!thr_act->act_profiled_own) + if (!thread->profiled_own) return KERN_SUCCESS; else - thr_act->act_profiled_own = FALSE; + thread->profiled_own = FALSE; - send_last_sample_buf(thr_act->profil_buffer); - pbuf_free(thr_act->profil_buffer); - thr_act->profil_buffer = NULLPROFDATA; + send_last_sample_buf(thread->profil_buffer); + pbuf_free(thread->profil_buffer); + thread->profil_buffer = NULLPROFDATA; } return KERN_SUCCESS; -#endif /* MACH_PROF */ } +#endif /* MACH_PROF */ /* ***************************************************************************** @@ -403,14 +405,20 @@ thread_sample( ***************************************************************************** */ +#if !MACH_PROF kern_return_t task_sample( - task_t task, - ipc_port_t reply) + __unused task_t task, + __unused ipc_port_t reply) { -#if !MACH_PROF return KERN_FAILURE; +} #else +kern_return_t +task_sample( + task_t task, + ipc_port_t reply) +{ prof_data_t pbuf=task->profil_buffer; vm_offset_t vmpbuf; boolean_t turnon = (reply != MACH_PORT_NULL); @@ -455,24 +463,24 @@ task_sample( if (turnon != task->task_profiled) { int actual, i; - thread_act_t thr_act; + thread_t thread; if (turnon && profile_thread_id == THREAD_NULL) /* 1st time thru? */ profile_thread_id = /* then start profile thread. */ kernel_thread(kernel_task, profile_thread); task->task_profiled = turnon; actual = task->thread_count; - for (i = 0, thr_act = (thread_act_t)queue_first(&task->threads); + for (i = 0, thread = (thread_t)queue_first(&task->threads); i < actual; - i++, thr_act = (thread_act_t)queue_next(&thr_act->task_threads)) { - if (!thr_act->act_profiled_own) { - thr_act->act_profiled = turnon; + i++, thread = (thread_t)queue_next(&thr_act->task_threads)) { + if (!thread->profiled_own) { + threadt->profiled = turnon; if (turnon) { - thr_act->profil_buffer = task->profil_buffer; - thr_act->act_profiled = TRUE; + threadt->profil_buffer = task->profil_buffer; + thread->profiled = TRUE; } else { - thr_act->act_profiled = FALSE; - thr_act->profil_buffer = NULLPROFDATA; + thread->profiled = FALSE; + thread->profil_buffer = NULLPROFDATA; } } } @@ -485,6 +493,6 @@ task_sample( task_unlock(task); return KERN_SUCCESS; -#endif /* MACH_PROF */ } +#endif /* MACH_PROF */ diff --git a/osfmk/kern/profile.h b/osfmk/kern/profile.h index 5124b2316..10488e8ef 100644 --- a/osfmk/kern/profile.h +++ b/osfmk/kern/profile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -97,12 +97,10 @@ typedef struct buffer *buffer_t; (pbuf)->prof_index -extern vm_map_t kernel_map; - /* MACRO set_pbuf_value ** ** enters the value 'val' in the buffer 'pbuf' and returns the following -** indications: 0: means that a fatal error occured: the buffer was full +** indications: 0: means that a fatal error occurred: the buffer was full ** (it hasn't been sent yet) ** 1: means that a value has been inserted successfully ** 2: means that we'v just entered the last value causing @@ -162,28 +160,27 @@ extern void profile( task->task_profiled = FALSE; \ task->profil_buffer = NULLPROFDATA; -#define act_prof_init(thr_act, task) \ - thr_act->act_profiled = task->task_profiled; \ - thr_act->profil_buffer = task->profil_buffer; +#define thread_prof_init(thread, task) \ + thread->profiled = task->profiled; \ + thread->profil_buffer = task->profil_buffer; #define task_prof_deallocate(task) \ if (task->profil_buffer) \ task_sample(task, MACH_PORT_NULL); \ -#define act_prof_deallocate(thr_act) \ - if (thr_act->act_profiled_own && thr_act->profil_buffer) \ - thread_sample(thr_act, MACH_PORT_NULL); \ +#define thread_prof_deallocate(thread) \ + if (thread->profiled_own && thread->profil_buffer) \ + thread_sample(thread, MACH_PORT_NULL); \ -extern kern_return_t thread_sample(thread_act_t, ipc_port_t); +extern kern_return_t thread_sample(thread_t, ipc_port_t); extern kern_return_t task_sample(task_t, ipc_port_t); #else /* !MACH_PROT */ #define task_prof_init(task) -#define act_prof_init(thr_act, task) +#define thread_prof_init(thread, task) #define task_prof_deallocate(task) -#define act_prof_deallocate(thr_act) - +#define thread_prof_deallocate(thread) #endif /* !MACH_PROF */ diff --git a/osfmk/kern/queue.h b/osfmk/kern/queue.h index bdb700fb3..98effaac4 100644 --- a/osfmk/kern/queue.h +++ b/osfmk/kern/queue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,7 +61,7 @@ #ifndef _KERN_QUEUE_H_ #define _KERN_QUEUE_H_ -#include +#include #include /* @@ -108,6 +108,9 @@ typedef struct queue_entry *queue_entry_t; #if !defined(__GNUC__) +#include +__BEGIN_DECLS + /* Enqueue element to head of queue */ extern void enqueue_head( queue_t que, @@ -140,7 +143,9 @@ extern void insque( extern int remque( queue_entry_t elt); -#else +__END_DECLS + +#else /* !__GNUC__ */ static __inline__ void enqueue_head( @@ -196,7 +201,7 @@ dequeue_tail( static __inline__ void remqueue( - queue_t que, + __unused queue_t que, queue_entry_t elt) { elt->next->prev = elt->prev; @@ -224,7 +229,7 @@ remque( return((integer_t)elt); } -#endif /* defined(__GNUC__) */ +#endif /* !__GNUC__ */ /* * Macro: queue_init @@ -323,16 +328,16 @@ MACRO_END */ #define queue_enter(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t prev; \ + register queue_entry_t __prev; \ \ - prev = (head)->prev; \ - if ((head) == prev) { \ + __prev = (head)->prev; \ + if ((head) == __prev) { \ (head)->next = (queue_entry_t) (elt); \ } \ else { \ - ((type)prev)->field.next = (queue_entry_t)(elt);\ + ((type)__prev)->field.next = (queue_entry_t)(elt);\ } \ - (elt)->field.prev = prev; \ + (elt)->field.prev = __prev; \ (elt)->field.next = head; \ (head)->prev = (queue_entry_t) elt; \ MACRO_END @@ -350,16 +355,16 @@ MACRO_END */ #define queue_enter_first(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t next; \ + register queue_entry_t __next; \ \ - next = (head)->next; \ - if ((head) == next) { \ + __next = (head)->next; \ + if ((head) == __next) { \ (head)->prev = (queue_entry_t) (elt); \ } \ else { \ - ((type)next)->field.prev = (queue_entry_t)(elt);\ + ((type)__next)->field.prev = (queue_entry_t)(elt);\ } \ - (elt)->field.next = next; \ + (elt)->field.next = __next; \ (elt)->field.prev = head; \ (head)->next = (queue_entry_t) elt; \ MACRO_END @@ -378,7 +383,7 @@ MACRO_END */ #define queue_insert_before(head, elt, cur, type, field) \ MACRO_BEGIN \ - register queue_entry_t prev; \ + register queue_entry_t __prev; \ \ if ((head) == (queue_entry_t)(cur)) { \ (elt)->field.next = (head); \ @@ -386,8 +391,8 @@ MACRO_BEGIN \ (elt)->field.prev = (head); \ (head)->next = (queue_entry_t)(elt); \ } else { /* last element */ \ - prev = (elt)->field.prev = (head)->prev; \ - ((type)prev)->field.next = (queue_entry_t)(elt);\ + __prev = (elt)->field.prev = (head)->prev; \ + ((type)__prev)->field.next = (queue_entry_t)(elt);\ } \ (head)->prev = (queue_entry_t)(elt); \ } else { \ @@ -397,8 +402,8 @@ MACRO_BEGIN \ (elt)->field.prev = (head); \ (head)->next = (queue_entry_t)(elt); \ } else { /* middle element */ \ - prev = (elt)->field.prev = (cur)->field.prev; \ - ((type)prev)->field.next = (queue_entry_t)(elt);\ + __prev = (elt)->field.prev = (cur)->field.prev; \ + ((type)__prev)->field.next = (queue_entry_t)(elt);\ } \ (cur)->field.prev = (queue_entry_t)(elt); \ } \ @@ -418,7 +423,7 @@ MACRO_END */ #define queue_insert_after(head, elt, cur, type, field) \ MACRO_BEGIN \ - register queue_entry_t next; \ + register queue_entry_t __next; \ \ if ((head) == (queue_entry_t)(cur)) { \ (elt)->field.prev = (head); \ @@ -426,8 +431,8 @@ MACRO_BEGIN \ (elt)->field.next = (head); \ (head)->prev = (queue_entry_t)(elt); \ } else { /* first element */ \ - next = (elt)->field.next = (head)->next; \ - ((type)next)->field.prev = (queue_entry_t)(elt);\ + __next = (elt)->field.next = (head)->next; \ + ((type)__next)->field.prev = (queue_entry_t)(elt);\ } \ (head)->next = (queue_entry_t)(elt); \ } else { \ @@ -437,8 +442,8 @@ MACRO_BEGIN \ (elt)->field.next = (head); \ (head)->prev = (queue_entry_t)(elt); \ } else { /* middle element */ \ - next = (elt)->field.next = (cur)->field.next; \ - ((type)next)->field.prev = (queue_entry_t)(elt);\ + __next = (elt)->field.next = (cur)->field.next; \ + ((type)__next)->field.prev = (queue_entry_t)(elt);\ } \ (cur)->field.next = (queue_entry_t)(elt); \ } \ @@ -463,20 +468,20 @@ MACRO_END */ #define queue_remove(head, elt, type, field) \ MACRO_BEGIN \ - register queue_entry_t next, prev; \ + register queue_entry_t __next, __prev; \ \ - next = (elt)->field.next; \ - prev = (elt)->field.prev; \ + __next = (elt)->field.next; \ + __prev = (elt)->field.prev; \ \ - if ((head) == next) \ - (head)->prev = prev; \ + if ((head) == __next) \ + (head)->prev = __prev; \ else \ - ((type)next)->field.prev = prev; \ + ((type)__next)->field.prev = __prev; \ \ - if ((head) == prev) \ - (head)->next = next; \ + if ((head) == __prev) \ + (head)->next = __next; \ else \ - ((type)prev)->field.next = next; \ + ((type)__prev)->field.next = __next; \ MACRO_END /* @@ -490,16 +495,16 @@ MACRO_END */ #define queue_remove_first(head, entry, type, field) \ MACRO_BEGIN \ - register queue_entry_t next; \ + register queue_entry_t __next; \ \ (entry) = (type) ((head)->next); \ - next = (entry)->field.next; \ + __next = (entry)->field.next; \ \ - if ((head) == next) \ + if ((head) == __next) \ (head)->prev = (head); \ else \ - ((type)(next))->field.prev = (head); \ - (head)->next = next; \ + ((type)(__next))->field.prev = (head); \ + (head)->next = __next; \ MACRO_END /* @@ -513,16 +518,16 @@ MACRO_END */ #define queue_remove_last(head, entry, type, field) \ MACRO_BEGIN \ - register queue_entry_t prev; \ + register queue_entry_t __prev; \ \ (entry) = (type) ((head)->prev); \ - prev = (entry)->field.prev; \ + __prev = (entry)->field.prev; \ \ - if ((head) == prev) \ + if ((head) == __prev) \ (head)->next = (head); \ else \ - ((type)(prev))->field.next = (head); \ - (head)->prev = prev; \ + ((type)(__prev))->field.next = (head); \ + (head)->prev = __prev; \ MACRO_END /* @@ -548,7 +553,7 @@ MACRO_END */ #define queue_new_head(old, new, type, field) \ MACRO_BEGIN \ - if (!queue_empty(new)) { \ + if (!queue_empty(old)) { \ *(new) = *(old); \ ((type)((new)->next))->field.prev = (new); \ ((type)((new)->prev))->field.next = (new); \ @@ -575,12 +580,10 @@ MACRO_END !queue_end((head), (queue_entry_t)(elt)); \ (elt) = (type) queue_next(&(elt)->field)) -#include - -#ifdef __APPLE_API_PRIVATE - #ifdef MACH_KERNEL_PRIVATE +#include + /*----------------------------------------------------------------*/ /* * Define macros for queues with locks. @@ -597,7 +600,7 @@ typedef struct mpqueue_head mpqueue_head_t; #define mpqueue_init(q) \ MACRO_BEGIN \ queue_init(&(q)->head); \ - simple_lock_init(&(q)->lock, ETAP_MISC_Q); \ + simple_lock_init(&(q)->lock, 0); \ MACRO_END #define mpenqueue_tail(q, elt) \ @@ -619,6 +622,4 @@ MACRO_END #endif /* MACH_KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ - #endif /* _KERN_QUEUE_H_ */ diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h index c6870f82b..4c16001bd 100644 --- a/osfmk/kern/sched.h +++ b/osfmk/kern/sched.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,7 +61,6 @@ #ifndef _KERN_SCHED_H_ #define _KERN_SCHED_H_ -#include #include #include @@ -72,22 +71,6 @@ #include #include -#if STAT_TIME - -/* - * Statistical timing uses microseconds as timer units. - */ -#define PRI_SHIFT (16 - SCHED_TICK_SHIFT) - -#else /* STAT_TIME */ - -/* - * Otherwise machine provides shift(s) based on time units it uses. - */ -#include - -#endif /* STAT_TIME */ - #define NRQS 128 /* 128 levels per run queue */ #define NRQBM (NRQS / 32) /* number of words per bit map */ @@ -156,25 +139,21 @@ #define BASEPRI_RTQUEUES (BASEPRI_REALTIME + 1) /* 97 */ #define BASEPRI_REALTIME (MAXPRI - (NRQS / 4) + 1) /* 96 */ -#define MAXPRI_STANDARD (BASEPRI_REALTIME - 1) /* 95 */ - -#define MAXPRI_KERNEL MAXPRI_STANDARD /* 95 */ +#define MAXPRI_KERNEL (BASEPRI_REALTIME - 1) /* 95 */ #define BASEPRI_PREEMPT (MAXPRI_KERNEL - 2) /* 93 */ #define BASEPRI_KERNEL (MINPRI_KERNEL + 1) /* 81 */ #define MINPRI_KERNEL (MAXPRI_KERNEL - (NRQS / 8) + 1) /* 80 */ -#define MAXPRI_SYSTEM (MINPRI_KERNEL - 1) /* 79 */ -#define MINPRI_SYSTEM (MAXPRI_SYSTEM - (NRQS / 8) + 1) /* 64 */ +#define MAXPRI_RESERVED (MINPRI_KERNEL - 1) /* 79 */ +#define MINPRI_RESERVED (MAXPRI_RESERVED - (NRQS / 8) + 1) /* 64 */ -#define MAXPRI_USER (MINPRI_SYSTEM - 1) /* 63 */ +#define MAXPRI_USER (MINPRI_RESERVED - 1) /* 63 */ #define BASEPRI_CONTROL (BASEPRI_DEFAULT + 17) /* 48 */ #define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */ #define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */ #define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS / 4)) /* 31 */ #define MINPRI_USER MINPRI /* 0 */ -#define MINPRI_STANDARD MINPRI_USER /* 0 */ - /* * Macro to check for invalid priorities. */ @@ -224,9 +203,6 @@ MACRO_END extern run_queue_t run_queue_remove( thread_t thread); -/* Periodic computation of load factors */ -extern void compute_mach_factor(void); - /* Handle quantum expiration for an executing thread */ extern void thread_quantum_expire( timer_call_param_t processor, @@ -242,53 +218,54 @@ extern uint32_t std_quantum_us; extern uint32_t max_rt_quantum, min_rt_quantum; +extern uint32_t sched_cswtime; + /* - * Shift structures for holding update shifts. Actual computation - * is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the - * +/- is determined by the sign of shift 2. + * Age usage (1 << SCHED_TICK_SHIFT) times per second. */ -struct shift { - int shift1; - int shift2; -}; +#define SCHED_TICK_SHIFT 3 + +extern unsigned sched_tick; +extern uint32_t sched_tick_interval; + +/* Periodic computation of various averages */ +extern void compute_averages(void); -typedef struct shift *shift_t, shift_data_t; +extern void compute_averunnable( + void *nrun); + +extern void compute_stack_target( + void *arg); /* - * Age usage (1 << SCHED_TICK_SHIFT) times per second. + * Conversion factor from usage + * to priority. + */ +extern uint32_t sched_pri_shift; + +/* + * Scaling factor for usage + * based on load. */ +extern int8_t sched_load_shifts[NRQS]; -extern unsigned sched_tick; +extern int32_t sched_poll_yield_shift; +extern uint32_t sched_safe_duration; -#define SCHED_TICK_SHIFT 3 +extern uint64_t max_unsafe_computation; +extern uint64_t max_poll_computation; -#define SCHED_SCALE 128 -#define SCHED_SHIFT 7 +extern uint32_t avenrun[3], mach_factor[3]; /* * thread_timer_delta macro takes care of both thread timers. */ -#define thread_timer_delta(thread) \ +#define thread_timer_delta(thread, delta) \ MACRO_BEGIN \ - register uint32_t delta; \ - \ - delta = 0; \ - TIMER_DELTA((thread)->system_timer, \ - (thread)->system_timer_save, delta); \ - TIMER_DELTA((thread)->user_timer, \ - (thread)->user_timer_save, delta); \ - (thread)->cpu_delta += delta; \ - (thread)->sched_delta += (delta * \ - (thread)->processor_set->sched_load); \ + (delta) = timer_delta(&(thread)->system_timer, \ + &(thread)->system_timer_save); \ + (delta) += timer_delta(&(thread)->user_timer, \ + &(thread)->user_timer_save); \ MACRO_END -#if SIMPLE_CLOCK -/* - * sched_usec is an exponential average of number of microseconds - * in a second for clock drift compensation. - */ - -extern int sched_usec; -#endif /* SIMPLE_CLOCK */ - #endif /* _KERN_SCHED_H_ */ diff --git a/osfmk/kern/mach_factor.c b/osfmk/kern/sched_average.c similarity index 69% rename from osfmk/kern/mach_factor.c rename to osfmk/kern/sched_average.c index 9a0cdedca..61084906d 100644 --- a/osfmk/kern/mach_factor.c +++ b/osfmk/kern/sched_average.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,26 +50,19 @@ /* */ /* - * File: kern/mach_factor.c * Author: Avadis Tevanian, Jr. * Date: 1986 * - * Compute the Mach Factor. + * Compute various averages. */ -#include +#include -#include -#include #include #include #include #include -#if MACH_KERNEL -#include -#include -#endif /* MACH_KERNEL */ - + uint32_t avenrun[3] = {0, 0, 0}; uint32_t mach_factor[3] = {0, 0, 0}; @@ -88,28 +81,45 @@ static uint32_t fract[3] = { #undef base #undef frac +static unsigned int sched_nrun; + +typedef void (*sched_avg_comp_t)( + void *param); + +#define SCHED_AVG_SECS(n) ((n) << SCHED_TICK_SHIFT) + +static struct sched_average { + sched_avg_comp_t comp; + void *param; + int period; + int tick; +} sched_average[] = { + { compute_averunnable, &sched_nrun, SCHED_AVG_SECS(5), 0 }, + { compute_stack_target, NULL, SCHED_AVG_SECS(5), 1 }, + { NULL, NULL, 0, 0 } +}; + +typedef struct sched_average *sched_average_t; + void -compute_mach_factor(void) +compute_averages(void) { register processor_set_t pset = &default_pset; register int ncpus; register int nthreads, nshared; + sched_average_t avg; register uint32_t factor_now = 0; register uint32_t average_now = 0; register uint32_t load_now = 0; if ((ncpus = pset->processor_count) > 0) { /* - * Retrieve thread counts. + * Retrieve counts, ignoring + * the current thread. */ - nthreads = pset->run_count; + nthreads = pset->run_count - 1; nshared = pset->share_count; - /* - * Don't include the current thread. - */ - nthreads -= 1; - /* * Load average and mach factor calculations for * those which ask about these things. @@ -125,22 +135,46 @@ compute_mach_factor(void) pset->load_average = ((pset->load_average << 2) + average_now) / 5; /* - * Compute the load factor used by the timesharing - * algorithm. + * Compute the timeshare priority + * conversion factor based on loading. */ if (nshared > nthreads) nshared = nthreads; - if (nshared > ncpus) - load_now = (nshared << SCHED_SHIFT) / ncpus; + if (nshared > ncpus) { + if (ncpus > 1) + load_now = nshared / ncpus; + else + load_now = nshared; - pset->sched_load = (pset->sched_load + load_now) >> 1; + if (load_now > NRQS - 1) + load_now = NRQS - 1; + } + + /* + * The conversion factor consists of + * two components: a fixed value based + * on the absolute time unit, and a + * dynamic portion based on loading. + * + * Zero loading results in a out of range + * shift count. Accumulated usage is ignored + * during conversion and new usage deltas + * are discarded. + */ + pset->pri_shift = sched_pri_shift - sched_load_shifts[load_now]; } else { pset->mach_factor = pset->load_average = 0; - pset->sched_load = 0; + pset->pri_shift = INT8_MAX; + nthreads = pset->run_count; } + /* + * Sample total running threads. + */ + sched_nrun = nthreads; + /* * Compute old-style Mach load averages. */ @@ -157,18 +191,12 @@ compute_mach_factor(void) } /* - * Call out to BSD for averunnable. + * Compute averages in other components. */ - { -#define AVGTICK_PERIOD (5 << SCHED_TICK_SHIFT) - static uint32_t avgtick_count; - extern void compute_averunnable( - int nrun); - - if (++avgtick_count == 1) - compute_averunnable(nthreads); - else - if (avgtick_count >= AVGTICK_PERIOD) - avgtick_count = 0; + for (avg = sched_average; avg->comp != NULL; ++avg) { + if (++avg->tick >= avg->period) { + (*avg->comp)(avg->param); + avg->tick = 0; + } } } diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 73567f9a7..01b353cc1 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,20 +59,24 @@ */ #include -#include #include -#include #include + +#include #include +#include +#include + #include #include -#include + +#include #include #include #include #include -#include +#include #include #include #include @@ -84,13 +88,12 @@ #include #include #include -#include +#include + #include #include #include -#include -#include -#include /*** ??? fix so this can be removed ***/ + #include #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ @@ -105,8 +108,6 @@ int max_poll_quanta = MAX_POLL_QUANTA; #define SCHED_POLL_YIELD_SHIFT 4 /* 1/16 */ int sched_poll_yield_shift = SCHED_POLL_YIELD_SHIFT; -uint32_t std_quantum_us; - uint64_t max_unsafe_computation; uint32_t sched_safe_duration; uint64_t max_poll_computation; @@ -114,25 +115,33 @@ uint64_t max_poll_computation; uint32_t std_quantum; uint32_t min_std_quantum; +uint32_t std_quantum_us; + uint32_t max_rt_quantum; uint32_t min_rt_quantum; -static uint32_t sched_tick_interval; +uint32_t sched_cswtime; + +static uint32_t delay_idle_limit, delay_idle_spin; +static processor_t delay_idle( + processor_t processor, + thread_t self); unsigned sched_tick; +uint32_t sched_tick_interval; -#if SIMPLE_CLOCK -int sched_usec; -#endif /* SIMPLE_CLOCK */ +uint32_t sched_pri_shift; /* Forwards */ void wait_queues_init(void); +static void load_shift_init(void); + static thread_t choose_thread( processor_set_t pset, processor_t processor); -static void do_thread_scan(void); +static void thread_update_scan(void); #if DEBUG static @@ -201,6 +210,8 @@ struct wait_queue wait_queues[NUMQUEUES]; #define wait_hash(event) \ ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES) +int8_t sched_load_shifts[NRQS]; + void sched_init(void) { @@ -218,47 +229,66 @@ sched_init(void) (1 << SCHED_TICK_SHIFT); wait_queues_init(); - pset_sys_bootstrap(); /* initialize processor mgmt. */ + load_shift_init(); + pset_init(&default_pset); sched_tick = 0; -#if SIMPLE_CLOCK - sched_usec = 0; -#endif /* SIMPLE_CLOCK */ ast_init(); } void sched_timebase_init(void) { - uint64_t abstime; + uint64_t abstime; + uint32_t shift; + /* standard timeslicing quantum */ clock_interval_to_absolutetime_interval( std_quantum_us, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); std_quantum = abstime; - /* 250 us */ + /* smallest remaining quantum (250 us) */ clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); min_std_quantum = abstime; - /* 50 us */ + /* smallest rt computaton (50 us) */ clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); min_rt_quantum = abstime; - /* 50 ms */ + /* maximum rt computation (50 ms) */ clock_interval_to_absolutetime_interval( 50, 1000*NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); max_rt_quantum = abstime; - clock_interval_to_absolutetime_interval(1000 >> SCHED_TICK_SHIFT, - USEC_PER_SEC, &abstime); + /* scheduler tick interval */ + clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT, + NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); sched_tick_interval = abstime; + /* + * Compute conversion factor from usage to + * timesharing priorities with 5/8 ** n aging. + */ + abstime = (abstime * 5) / 3; + for (shift = 0; abstime > BASEPRI_DEFAULT; ++shift) + abstime >>= 1; + sched_pri_shift = shift; + max_unsafe_computation = max_unsafe_quanta * std_quantum; max_poll_computation = max_poll_quanta * std_quantum; + + /* delay idle constant(s) (60, 1 us) */ + clock_interval_to_absolutetime_interval(60, NSEC_PER_USEC, &abstime); + assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); + delay_idle_limit = abstime; + + clock_interval_to_absolutetime_interval(1, NSEC_PER_USEC, &abstime); + assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); + delay_idle_spin = abstime; } void @@ -271,20 +301,38 @@ wait_queues_init(void) } } +/* + * Set up values for timeshare + * loading factors. + */ +static void +load_shift_init(void) +{ + int8_t k, *p = sched_load_shifts; + uint32_t i, j; + + *p++ = INT8_MIN; *p++ = 0; + + for (i = j = 2, k = 1; i < NRQS; ++k) { + for (j <<= 1; i < j; ++i) + *p++ = k; + } +} + /* * Thread wait timer expiration. */ void thread_timer_expire( - timer_call_param_t p0, - timer_call_param_t p1) + void *p0, + __unused void *p1) { thread_t thread = p0; spl_t s; s = splsched(); thread_lock(thread); - if (--thread->wait_timer_active == 1) { + if (--thread->wait_timer_active == 0) { if (thread->wait_timer_is_set) { thread->wait_timer_is_set = FALSE; clear_wait_internal(thread, THREAD_TIMED_OUT); @@ -314,9 +362,8 @@ thread_set_timer( thread_lock(thread); if ((thread->state & TH_WAIT) != 0) { clock_interval_to_deadline(interval, scale_factor, &deadline); - timer_call_enter(&thread->wait_timer, deadline); - assert(!thread->wait_timer_is_set); - thread->wait_timer_active++; + if (!timer_call_enter(&thread->wait_timer, deadline)) + thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } thread_unlock(thread); @@ -333,9 +380,8 @@ thread_set_timer_deadline( s = splsched(); thread_lock(thread); if ((thread->state & TH_WAIT) != 0) { - timer_call_enter(&thread->wait_timer, deadline); - assert(!thread->wait_timer_is_set); - thread->wait_timer_active++; + if (!timer_call_enter(&thread->wait_timer, deadline)) + thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } thread_unlock(thread); @@ -360,75 +406,85 @@ thread_cancel_timer(void) } /* - * Set up thread timeout element when thread is created. + * thread_unblock: + * + * Unblock thread on wake up. + * + * Returns TRUE if the thread is still running. + * + * Thread must be locked. */ -void -thread_timer_setup( - thread_t thread) +boolean_t +thread_unblock( + thread_t thread, + wait_result_t wresult) { - extern void thread_depress_expire( - timer_call_param_t p0, - timer_call_param_t p1); - - timer_call_setup(&thread->wait_timer, thread_timer_expire, thread); - thread->wait_timer_is_set = FALSE; - thread->wait_timer_active = 1; - - timer_call_setup(&thread->depress_timer, thread_depress_expire, thread); - thread->depress_timer_active = 1; + boolean_t result = FALSE; - thread->ref_count++; -} - -void -thread_timer_terminate(void) -{ - thread_t thread = current_thread(); - wait_result_t res; - spl_t s; + /* + * Set wait_result. + */ + thread->wait_result = wresult; - s = splsched(); - thread_lock(thread); + /* + * Cancel pending wait timer. + */ if (thread->wait_timer_is_set) { if (timer_call_cancel(&thread->wait_timer)) thread->wait_timer_active--; thread->wait_timer_is_set = FALSE; } - thread->wait_timer_active--; + /* + * Update scheduling state. + */ + thread->state &= ~(TH_WAIT|TH_UNINT); - while (thread->wait_timer_active > 0) { - thread_unlock(thread); - splx(s); + if (!(thread->state & TH_RUN)) { + thread->state |= TH_RUN; - delay(1); + /* + * Mark unblocked if call out. + */ + if (thread->options & TH_OPT_CALLOUT) + call_thread_unblock(); - s = splsched(); - thread_lock(thread); + /* + * Update pset run counts. + */ + pset_run_incr(thread->processor_set); + if (thread->sched_mode & TH_MODE_TIMESHARE) + pset_share_incr(thread->processor_set); } + else + result = TRUE; - thread->depress_timer_active--; - - while (thread->depress_timer_active > 0) { - thread_unlock(thread); - splx(s); - - delay(1); - - s = splsched(); - thread_lock(thread); + /* + * Calculate deadline for real-time threads. + */ + if (thread->sched_mode & TH_MODE_REALTIME) { + thread->realtime.deadline = mach_absolute_time(); + thread->realtime.deadline += thread->realtime.constraint; } - thread_unlock(thread); - splx(s); + /* + * Clear old quantum, fail-safe computation, etc. + */ + thread->current_quantum = 0; + thread->computation_metered = 0; + thread->reason = AST_NONE; - thread_deallocate(thread); + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, + (int)thread, (int)thread->sched_pri, 0, 0, 0); + + return (result); } /* - * Routine: thread_go_locked + * Routine: thread_go * Purpose: - * Start a thread running. + * Unblock and dispatch thread. * Conditions: * thread lock held, IPC locks may be held. * thread must have been pulled from wait queue under same lock hold. @@ -437,7 +493,7 @@ thread_timer_terminate(void) * KERN_NOT_WAITING - Thread was not waiting */ kern_return_t -thread_go_locked( +thread_go( thread_t thread, wait_result_t wresult) { @@ -446,42 +502,8 @@ thread_go_locked( assert(thread->wait_queue == WAIT_QUEUE_NULL); if ((thread->state & (TH_WAIT|TH_TERMINATE)) == TH_WAIT) { - thread_roust_t roust_hint; - - thread->state &= ~(TH_WAIT|TH_UNINT); - _mk_sp_thread_unblock(thread); - - roust_hint = thread->roust; - thread->roust = NULL; - if ( roust_hint != NULL && - (*roust_hint)(thread, wresult) ) { - if (thread->wait_timer_is_set) { - if (timer_call_cancel(&thread->wait_timer)) - thread->wait_timer_active--; - thread->wait_timer_is_set = FALSE; - } - - return (KERN_SUCCESS); - } - - thread->wait_result = wresult; - - if (!(thread->state & TH_RUN)) { - thread->state |= TH_RUN; - - if (thread->active_callout) - call_thread_unblock(); - - pset_run_incr(thread->processor_set); - if (thread->sched_mode & TH_MODE_TIMESHARE) - pset_share_incr(thread->processor_set); - + if (!thread_unblock(thread, wresult)) thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); - } - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, - (int)thread, (int)thread->sched_pri, 0, 0, 0); return (KERN_SUCCESS); } @@ -512,8 +534,8 @@ thread_mark_wait_locked( * are OK, we have to honor mask settings (outer-scoped code may * not be able to handle aborts at the moment). */ - if (interruptible > thread->interrupt_level) - interruptible = thread->interrupt_level; + if (interruptible > (thread->options & TH_OPT_INTMASK)) + interruptible = thread->options & TH_OPT_INTMASK; at_safe_point = (interruptible == THREAD_ABORTSAFE); @@ -523,7 +545,6 @@ thread_mark_wait_locked( (thread->state & TH_ABORT_SAFELY)) ) { thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT); thread->at_safe_point = at_safe_point; - thread->sleep_stamp = sched_tick; return (thread->wait_result = THREAD_WAITING); } else @@ -552,31 +573,11 @@ thread_interrupt_level( wait_interrupt_t new_level) { thread_t thread = current_thread(); - wait_interrupt_t result = thread->interrupt_level; - - thread->interrupt_level = new_level; - return result; -} + wait_interrupt_t result = thread->options & TH_OPT_INTMASK; -/* - * Routine: assert_wait_timeout - * Purpose: - * Assert that the thread intends to block, - * waiting for a timeout (no user known event). - */ -unsigned int assert_wait_timeout_event; - -wait_result_t -assert_wait_timeout( - mach_msg_timeout_t msecs, - wait_interrupt_t interruptible) -{ - wait_result_t res; + thread->options = (thread->options & ~TH_OPT_INTMASK) | (new_level & TH_OPT_INTMASK); - res = assert_wait((event_t)&assert_wait_timeout_event, interruptible); - if (res == THREAD_WAITING) - thread_set_timer(msecs, 1000*NSEC_PER_USEC); - return res; + return result; } /* @@ -591,7 +592,6 @@ assert_wait_possible(void) { thread_t thread; - extern unsigned int debug_mode; #if DEBUG if(debug_mode) return TRUE; /* Always succeed in debug mode */ @@ -620,55 +620,63 @@ assert_wait( index = wait_hash(event); wq = &wait_queues[index]; - return wait_queue_assert_wait(wq, event, interruptible); + return wait_queue_assert_wait(wq, event, interruptible, 0); } -__private_extern__ -wait_queue_t -wait_event_wait_queue( - event_t event) +wait_result_t +assert_wait_timeout( + event_t event, + wait_interrupt_t interruptible, + uint32_t interval, + uint32_t scale_factor) { + thread_t thread = current_thread(); + wait_result_t wresult; + wait_queue_t wqueue; + uint64_t deadline; + spl_t s; + assert(event != NO_EVENT); + wqueue = &wait_queues[wait_hash(event)]; + + s = splsched(); + wait_queue_lock(wqueue); + thread_lock(thread); + + clock_interval_to_deadline(interval, scale_factor, &deadline); + wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event, + interruptible, deadline, thread); + + thread_unlock(thread); + wait_queue_unlock(wqueue); + splx(s); - return (&wait_queues[wait_hash(event)]); + return (wresult); } wait_result_t -assert_wait_prim( +assert_wait_deadline( event_t event, - thread_roust_t roust_hint, - uint64_t deadline, - wait_interrupt_t interruptible) + wait_interrupt_t interruptible, + uint64_t deadline) { thread_t thread = current_thread(); - wait_result_t wresult; - wait_queue_t wq; + wait_result_t wresult; + wait_queue_t wqueue; spl_t s; assert(event != NO_EVENT); - - wq = &wait_queues[wait_hash(event)]; + wqueue = &wait_queues[wait_hash(event)]; s = splsched(); - wait_queue_lock(wq); + wait_queue_lock(wqueue); thread_lock(thread); - wresult = wait_queue_assert_wait64_locked(wq, (uint32_t)event, - interruptible, thread); - if (wresult == THREAD_WAITING) { - if (roust_hint != NULL) - thread->roust = roust_hint; - - if (deadline != 0) { - timer_call_enter(&thread->wait_timer, deadline); - assert(!thread->wait_timer_is_set); - thread->wait_timer_active++; - thread->wait_timer_is_set = TRUE; - } - } + wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event, + interruptible, deadline, thread); thread_unlock(thread); - wait_queue_unlock(wq); + wait_queue_unlock(wqueue); splx(s); return (wresult); @@ -761,8 +769,6 @@ thread_sleep_mutex( * Cause the current thread to wait until the specified event * (or deadline) occurs. The specified mutex is unlocked before * releasing the cpu. The mutex will be re-acquired before returning. - * - * JMM - Add hint to make sure mutex is available before rousting */ wait_result_t thread_sleep_mutex_deadline( @@ -773,13 +779,10 @@ thread_sleep_mutex_deadline( { wait_result_t res; - res = assert_wait(event, interruptible); + res = assert_wait_deadline(event, interruptible, deadline); if (res == THREAD_WAITING) { mutex_unlock(mutex); - thread_set_timer_deadline(deadline); res = thread_block(THREAD_CONTINUE_NULL); - if (res != THREAD_TIMED_OUT) - thread_cancel_timer(); mutex_lock(mutex); } return res; @@ -791,8 +794,6 @@ thread_sleep_mutex_deadline( * Cause the current thread to wait until the specified event * occurs. The specified (write) lock is unlocked before releasing * the cpu. The (write) lock will be re-acquired before returning. - * - * JMM - Add hint to make sure mutex is available before rousting */ wait_result_t thread_sleep_lock_write( @@ -811,67 +812,38 @@ thread_sleep_lock_write( return res; } - -/* - * thread_sleep_funnel: - * - * Cause the current thread to wait until the specified event - * occurs. If the thread is funnelled, the funnel will be released - * before giving up the cpu. The funnel will be re-acquired before returning. - * - * JMM - Right now the funnel is dropped and re-acquired inside - * thread_block(). At some point, this may give thread_block() a hint. - */ -wait_result_t -thread_sleep_funnel( - event_t event, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait(event, interruptible); - if (res == THREAD_WAITING) { - res = thread_block(THREAD_CONTINUE_NULL); - } - return res; -} - /* - * thread_[un]stop(thread) - * Once a thread has blocked interruptibly (via assert_wait) prevent - * it from running until thread_unstop. + * thread_stop: * - * If someone else has already stopped the thread, wait for the - * stop to be cleared, and then stop it again. + * Force a preemption point for a thread and wait + * for it to stop running. Arbitrates access among + * multiple stop requests. (released by unstop) * - * Return FALSE if interrupted. + * The thread must enter a wait state and stop via a + * separate means. * - * NOTE: thread_hold/thread_suspend should be called on the activation - * before calling thread_stop. TH_SUSP is only recognized when - * a thread blocks and only prevents clear_wait/thread_wakeup - * from restarting an interruptible wait. The wake_active flag is - * used to indicate that someone is waiting on the thread. + * Returns FALSE if interrupted. */ boolean_t thread_stop( - thread_t thread) + thread_t thread) { - spl_t s = splsched(); + wait_result_t wresult; + spl_t s; + s = splsched(); wake_lock(thread); while (thread->state & TH_SUSP) { - wait_result_t result; - thread->wake_active = TRUE; - result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); + wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); wake_unlock(thread); splx(s); - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); + if (wresult == THREAD_WAITING) + wresult = thread_block(THREAD_CONTINUE_NULL); - if (result != THREAD_AWAKENED) + if (wresult != THREAD_AWAKENED) return (FALSE); s = splsched(); @@ -882,7 +854,6 @@ thread_stop( thread->state |= TH_SUSP; while (thread->state & TH_RUN) { - wait_result_t result; processor_t processor = thread->last_processor; if ( processor != PROCESSOR_NULL && @@ -892,14 +863,14 @@ thread_stop( thread_unlock(thread); thread->wake_active = TRUE; - result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); + wresult = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); wake_unlock(thread); splx(s); - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); + if (wresult == THREAD_WAITING) + wresult = thread_block(THREAD_CONTINUE_NULL); - if (result != THREAD_AWAKENED) { + if (wresult != THREAD_AWAKENED) { thread_unstop(thread); return (FALSE); } @@ -917,8 +888,12 @@ thread_stop( } /* - * Clear TH_SUSP and if the thread has been stopped and is now runnable, - * put it back on the run queue. + * thread_unstop: + * + * Release a previous stop request and set + * the thread running if appropriate. + * + * Use only after a successful stop operation. */ void thread_unstop( @@ -931,19 +906,9 @@ thread_unstop( if ((thread->state & (TH_RUN|TH_WAIT|TH_SUSP)) == TH_SUSP) { thread->state &= ~TH_SUSP; - thread->state |= TH_RUN; - - _mk_sp_thread_unblock(thread); - - pset_run_incr(thread->processor_set); - if (thread->sched_mode & TH_MODE_TIMESHARE) - pset_share_incr(thread->processor_set); + thread_unblock(thread, THREAD_AWAKENED); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, - (int)thread, (int)thread->sched_pri, 0, 0, 0); } else if (thread->state & TH_SUSP) { @@ -966,19 +931,22 @@ thread_unstop( } /* - * Wait for the thread's RUN bit to clear + * thread_wait: + * + * Wait for a thread to stop running. (non-interruptible) + * */ -boolean_t +void thread_wait( - thread_t thread) + thread_t thread) { - spl_t s = splsched(); + wait_result_t wresult; + spl_t s = splsched(); wake_lock(thread); thread_lock(thread); while (thread->state & TH_RUN) { - wait_result_t result; processor_t processor = thread->last_processor; if ( processor != PROCESSOR_NULL && @@ -988,15 +956,12 @@ thread_wait( thread_unlock(thread); thread->wake_active = TRUE; - result = assert_wait(&thread->wake_active, THREAD_ABORTSAFE); + wresult = assert_wait(&thread->wake_active, THREAD_UNINT); wake_unlock(thread); splx(s); - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); - - if (result != THREAD_AWAKENED) - return (FALSE); + if (wresult == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); s = splsched(); wake_lock(thread); @@ -1006,8 +971,6 @@ thread_wait( thread_unlock(thread); wake_unlock(thread); splx(s); - - return (TRUE); } /* @@ -1055,7 +1018,7 @@ clear_wait_internal( } } - return (thread_go_locked(thread, wresult)); + return (thread_go(thread, wresult)); } while (--i > 0); panic("clear_wait_internal: deadlock: thread=0x%x, wq=0x%x, cpu=%d\n", @@ -1275,11 +1238,7 @@ thread_select( /* * Perform a context switch and start executing the new thread. * - * If continuation is non-zero, resume the old (current) thread - * next by executing at continuation on a new stack, in lieu - * of returning. - * - * Returns TRUE if the hand-off succeeds. + * Returns FALSE on failure, and the thread is re-dispatched. * * Called at splsched. */ @@ -1310,26 +1269,23 @@ MACRO_BEGIN \ } \ MACRO_END -static thread_t -__current_thread(void) -{ - return (current_thread()); -} - boolean_t thread_invoke( register thread_t old_thread, register thread_t new_thread, - int reason, - thread_continue_t old_cont) + ast_t reason) { - thread_continue_t new_cont; + thread_continue_t new_cont, continuation = old_thread->continuation; + void *new_param, *parameter = old_thread->parameter; processor_t processor; + thread_t prev_thread; if (get_preemption_level() != 0) panic("thread_invoke: preemption_level %d\n", get_preemption_level()); + assert(old_thread == current_thread()); + /* * Mark thread interruptible. */ @@ -1338,8 +1294,6 @@ thread_invoke( assert(thread_runnable(new_thread)); - assert(old_thread->continuation == NULL); - /* * Allow time constraint threads to hang onto * a stack. @@ -1349,8 +1303,8 @@ thread_invoke( old_thread->reserved_stack = old_thread->kernel_stack; } - if (old_cont != NULL) { - if (new_thread->state & TH_STACK_HANDOFF) { + if (continuation != NULL) { + if (!new_thread->kernel_stack) { /* * If the old thread is using a privileged stack, * check to see whether we can exchange it with @@ -1360,104 +1314,39 @@ thread_invoke( !new_thread->reserved_stack) goto need_stack; - new_thread->state &= ~TH_STACK_HANDOFF; + /* + * Context switch by performing a stack handoff. + */ new_cont = new_thread->continuation; new_thread->continuation = NULL; + new_param = new_thread->parameter; + new_thread->parameter = NULL; - /* - * Set up ast context of new thread and switch - * to its timer. - */ processor = current_processor(); processor->active_thread = new_thread; processor->current_pri = new_thread->sched_pri; new_thread->last_processor = processor; - ast_context(new_thread->top_act, processor->slot_num); - timer_switch(&new_thread->system_timer); + ast_context(new_thread); thread_unlock(new_thread); current_task()->csw++; old_thread->reason = reason; - old_thread->continuation = old_cont; + + processor->last_dispatch = mach_absolute_time(); + timer_event((uint32_t)processor->last_dispatch, + &new_thread->system_timer); - _mk_sp_thread_done(old_thread, new_thread, processor); + thread_done(old_thread, new_thread, processor); machine_stack_handoff(old_thread, new_thread); - _mk_sp_thread_begin(new_thread, processor); + thread_begin(new_thread, processor); - wake_lock(old_thread); - thread_lock(old_thread); - - /* - * Inline thread_dispatch but - * don't free stack. + /* + * Now dispatch the old thread. */ - - switch (old_thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) { - - case TH_RUN | TH_UNINT: - case TH_RUN: - /* - * Still running, put back - * onto a run queue. - */ - old_thread->state |= TH_STACK_HANDOFF; - _mk_sp_thread_dispatch(old_thread); - - thread_unlock(old_thread); - wake_unlock(old_thread); - break; - - case TH_RUN | TH_WAIT | TH_UNINT: - case TH_RUN | TH_WAIT: - { - boolean_t term, wake, callout; - - /* - * Waiting. - */ - old_thread->sleep_stamp = sched_tick; - old_thread->state |= TH_STACK_HANDOFF; - old_thread->state &= ~TH_RUN; - - term = (old_thread->state & TH_TERMINATE)? TRUE: FALSE; - callout = old_thread->active_callout; - wake = old_thread->wake_active; - old_thread->wake_active = FALSE; - - if (old_thread->sched_mode & TH_MODE_TIMESHARE) - pset_share_decr(old_thread->processor_set); - pset_run_decr(old_thread->processor_set); - - thread_unlock(old_thread); - wake_unlock(old_thread); - - if (callout) - call_thread_block(); - - if (wake) - thread_wakeup((event_t)&old_thread->wake_active); - - if (term) - thread_reaper_enqueue(old_thread); - break; - } - - case TH_RUN | TH_IDLE: - /* - * The idle threads don't go - * onto a run queue. - */ - old_thread->state |= TH_STACK_HANDOFF; - thread_unlock(old_thread); - wake_unlock(old_thread); - break; - - default: - panic("thread_invoke: state 0x%x\n", old_thread->state); - } + thread_dispatch(old_thread); counter_always(c_thread_invoke_hits++); @@ -1465,18 +1354,8 @@ thread_invoke( (void) spllo(); assert(new_cont); - call_continuation(new_cont); + call_continuation(new_cont, new_param, new_thread->wait_result); /*NOTREACHED*/ - return (TRUE); - } - else - if (new_thread->state & TH_STACK_ALLOC) { - /* - * Waiting for a stack - */ - counter_always(c_thread_invoke_misses++); - thread_unlock(new_thread); - return (FALSE); } else if (new_thread == old_thread) { @@ -1487,7 +1366,7 @@ thread_invoke( funnel_refunnel_check(new_thread, 3); (void) spllo(); - call_continuation(old_cont); + call_continuation(continuation, parameter, new_thread->wait_result); /*NOTREACHED*/ } } @@ -1495,27 +1374,17 @@ thread_invoke( /* * Check that the new thread has a stack */ - if (new_thread->state & TH_STACK_HANDOFF) { + if (!new_thread->kernel_stack) { need_stack: - if (!stack_alloc_try(new_thread, thread_continue)) { + if (!stack_alloc_try(new_thread)) { counter_always(c_thread_invoke_misses++); - thread_swapin(new_thread); + thread_unlock(new_thread); + thread_stack_enqueue(new_thread); return (FALSE); } - - new_thread->state &= ~TH_STACK_HANDOFF; - } - else - if (new_thread->state & TH_STACK_ALLOC) { - /* - * Waiting for a stack - */ - counter_always(c_thread_invoke_misses++); - thread_unlock(new_thread); - return (FALSE); } else - if (old_thread == new_thread) { + if (new_thread == old_thread) { counter(++c_thread_invoke_same); thread_unlock(new_thread); return (TRUE); @@ -1523,14 +1392,13 @@ need_stack: } /* - * Set up ast context of new thread and switch to its timer. + * Context switch by full context save. */ processor = current_processor(); processor->active_thread = new_thread; processor->current_pri = new_thread->sched_pri; new_thread->last_processor = processor; - ast_context(new_thread->top_act, processor->slot_num); - timer_switch(&new_thread->system_timer); + ast_context(new_thread); assert(thread_runnable(new_thread)); thread_unlock(new_thread); @@ -1539,35 +1407,35 @@ need_stack: assert(old_thread->runq == RUN_QUEUE_NULL); old_thread->reason = reason; - old_thread->continuation = old_cont; - _mk_sp_thread_done(old_thread, new_thread, processor); + processor->last_dispatch = mach_absolute_time(); + timer_event((uint32_t)processor->last_dispatch, &new_thread->system_timer); + + thread_done(old_thread, new_thread, processor); /* - * Here is where we actually change register context, - * and address space if required. Note that control - * will not return here immediately. + * This is where we actually switch register context, + * and address space if required. Control will not + * return here immediately. */ - old_thread = machine_switch_context(old_thread, old_cont, new_thread); - - /* Now on new thread's stack. Set a local variable to refer to it. */ - new_thread = __current_thread(); - assert(old_thread != new_thread); + prev_thread = machine_switch_context(old_thread, continuation, new_thread); - assert(thread_runnable(new_thread)); - _mk_sp_thread_begin(new_thread, new_thread->last_processor); + /* + * We are still old_thread, possibly on a different processor, + * and new_thread is now stale. + */ + thread_begin(old_thread, old_thread->last_processor); /* - * We're back. Now old_thread is the thread that resumed - * us, and we have to dispatch it. + * Now dispatch the thread which resumed us. */ - thread_dispatch(old_thread); + thread_dispatch(prev_thread); - if (old_cont) { - funnel_refunnel_check(new_thread, 3); + if (continuation) { + funnel_refunnel_check(old_thread, 3); (void) spllo(); - call_continuation(old_cont); + call_continuation(continuation, parameter, old_thread->wait_result); /*NOTREACHED*/ } @@ -1575,37 +1443,182 @@ need_stack: } /* - * thread_continue: + * thread_done: * - * Called at splsched when a thread first receives - * a new stack after a continuation. + * Perform calculations for thread + * finishing execution on the current processor. + * + * Called at splsched. */ void -thread_continue( - register thread_t old_thread) +thread_done( + thread_t old_thread, + thread_t new_thread, + processor_t processor) { - register thread_t self = current_thread(); - register thread_continue_t continuation; - - continuation = self->continuation; - self->continuation = NULL; + if (!(old_thread->state & TH_IDLE)) { + /* + * Compute remainder of current quantum. + */ + if ( first_timeslice(processor) && + processor->quantum_end > processor->last_dispatch ) + old_thread->current_quantum = + (processor->quantum_end - processor->last_dispatch); + else + old_thread->current_quantum = 0; - _mk_sp_thread_begin(self, self->last_processor); - + if (old_thread->sched_mode & TH_MODE_REALTIME) { + /* + * Cancel the deadline if the thread has + * consumed the entire quantum. + */ + if (old_thread->current_quantum == 0) { + old_thread->realtime.deadline = UINT64_MAX; + old_thread->reason |= AST_QUANTUM; + } + } + else { + /* + * For non-realtime threads treat a tiny + * remaining quantum as an expired quantum + * but include what's left next time. + */ + if (old_thread->current_quantum < min_std_quantum) { + old_thread->reason |= AST_QUANTUM; + old_thread->current_quantum += std_quantum; + } + } + + /* + * If we are doing a direct handoff then + * give the remainder of our quantum to + * the next thread. + */ + if ((old_thread->reason & (AST_HANDOFF|AST_QUANTUM)) == AST_HANDOFF) { + new_thread->current_quantum = old_thread->current_quantum; + old_thread->reason |= AST_QUANTUM; + old_thread->current_quantum = 0; + } + + old_thread->last_switch = processor->last_dispatch; + + old_thread->computation_metered += + (old_thread->last_switch - old_thread->computation_epoch); + } +} + +/* + * thread_begin: + * + * Set up for thread beginning execution on + * the current processor. + * + * Called at splsched. + */ +void +thread_begin( + thread_t thread, + processor_t processor) +{ + if (!(thread->state & TH_IDLE)) { + /* + * Give the thread a new quantum + * if none remaining. + */ + if (thread->current_quantum == 0) + thread_quantum_init(thread); + + /* + * Set up quantum timer and timeslice. + */ + processor->quantum_end = + (processor->last_dispatch + thread->current_quantum); + timer_call_enter1(&processor->quantum_timer, + thread, processor->quantum_end); + + processor_timeslice_setup(processor, thread); + + thread->last_switch = processor->last_dispatch; + + thread->computation_epoch = thread->last_switch; + } + else { + timer_call_cancel(&processor->quantum_timer); + processor->timeslice = 1; + } +} + +/* + * thread_dispatch: + * + * Handle previous thread at context switch. Re-dispatch + * if still running, otherwise update run state and perform + * special actions. + * + * Called at splsched. + */ +void +thread_dispatch( + register thread_t thread) +{ /* - * We must dispatch the old thread and then - * call the current thread's continuation. - * There might not be an old thread, if we are - * the first thread to run on this processor. + * If blocked at a continuation, discard + * the stack. */ - if (old_thread != THREAD_NULL) - thread_dispatch(old_thread); +#ifndef i386 + if (thread->continuation != NULL && thread->kernel_stack) + stack_free(thread); +#endif - funnel_refunnel_check(self, 4); - (void)spllo(); + if (!(thread->state & TH_IDLE)) { + wake_lock(thread); + thread_lock(thread); - call_continuation(continuation); - /*NOTREACHED*/ + if (!(thread->state & TH_WAIT)) { + /* + * Still running. + */ + if (thread->reason & AST_QUANTUM) + thread_setrun(thread, SCHED_TAILQ); + else + if (thread->reason & AST_PREEMPT) + thread_setrun(thread, SCHED_HEADQ); + else + thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); + + thread->reason = AST_NONE; + + thread_unlock(thread); + wake_unlock(thread); + } + else { + boolean_t wake; + + /* + * Waiting. + */ + thread->state &= ~TH_RUN; + + wake = thread->wake_active; + thread->wake_active = FALSE; + + if (thread->sched_mode & TH_MODE_TIMESHARE) + pset_share_decr(thread->processor_set); + pset_run_decr(thread->processor_set); + + thread_unlock(thread); + wake_unlock(thread); + + if (thread->options & TH_OPT_CALLOUT) + call_thread_block(); + + if (wake) + thread_wakeup((event_t)&thread->wake_active); + + if (thread->state & TH_TERMINATE) + thread_terminate_enqueue(thread); + } + } } /* @@ -1621,27 +1634,36 @@ thread_continue( */ counter(mach_counter_t c_thread_block_calls = 0;) -int +wait_result_t thread_block_reason( thread_continue_t continuation, + void *parameter, ast_t reason) { - register thread_t thread = current_thread(); + register thread_t self = current_thread(); register processor_t processor; register thread_t new_thread; spl_t s; counter(++c_thread_block_calls); - check_simple_locks(); - s = splsched(); if (!(reason & AST_PREEMPT)) - funnel_release_check(thread, 2); + funnel_release_check(self, 2); processor = current_processor(); + /* + * Delay switching to the idle thread under certain conditions. + */ + if (s != FALSE && (self->state & (TH_IDLE|TH_TERMINATE|TH_WAIT)) == TH_WAIT) { + if ( processor->processor_set->processor_count > 1 && + processor->processor_set->runq.count == 0 && + processor->runq.count == 0 ) + processor = delay_idle(processor, self); + } + /* If we're explicitly yielding, force a subsequent quantum */ if (reason & AST_YIELD) processor->timeslice = 0; @@ -1649,21 +1671,24 @@ thread_block_reason( /* We're handling all scheduling AST's */ ast_off(AST_SCHEDULING); - thread_lock(thread); + self->continuation = continuation; + self->parameter = parameter; + + thread_lock(self); new_thread = thread_select(processor); assert(new_thread && thread_runnable(new_thread)); - thread_unlock(thread); - while (!thread_invoke(thread, new_thread, reason, continuation)) { - thread_lock(thread); + thread_unlock(self); + while (!thread_invoke(self, new_thread, reason)) { + thread_lock(self); new_thread = thread_select(processor); assert(new_thread && thread_runnable(new_thread)); - thread_unlock(thread); + thread_unlock(self); } - funnel_refunnel_check(thread, 5); + funnel_refunnel_check(self, 5); splx(s); - return (thread->wait_result); + return (self->wait_result); } /* @@ -1671,17 +1696,25 @@ thread_block_reason( * * Block the current thread if a wait has been asserted. */ -int +wait_result_t thread_block( thread_continue_t continuation) { - return thread_block_reason(continuation, AST_NONE); + return thread_block_reason(continuation, NULL, AST_NONE); +} + +wait_result_t +thread_block_parameter( + thread_continue_t continuation, + void *parameter) +{ + return thread_block_reason(continuation, parameter, AST_NONE); } /* * thread_run: * - * Switch directly from the current (old) thread to the + * Switch directly from the current thread to the * new thread, handing off our quantum if appropriate. * * New thread must be runnable, and not on a run queue. @@ -1690,113 +1723,63 @@ thread_block( */ int thread_run( - thread_t old_thread, + thread_t self, thread_continue_t continuation, + void *parameter, thread_t new_thread) { ast_t handoff = AST_HANDOFF; - assert(old_thread == current_thread()); + funnel_release_check(self, 3); - funnel_release_check(old_thread, 3); + self->continuation = continuation; + self->parameter = parameter; - while (!thread_invoke(old_thread, new_thread, handoff, continuation)) { + while (!thread_invoke(self, new_thread, handoff)) { register processor_t processor = current_processor(); - thread_lock(old_thread); + thread_lock(self); new_thread = thread_select(processor); - thread_unlock(old_thread); + thread_unlock(self); handoff = AST_NONE; } - funnel_refunnel_check(old_thread, 6); + funnel_refunnel_check(self, 6); - return (old_thread->wait_result); + return (self->wait_result); } /* - * Dispatches a running thread that is not on a - * run queue. + * thread_continue: * - * Called at splsched. + * Called at splsched when a thread first receives + * a new stack after a continuation. */ void -thread_dispatch( - register thread_t thread) +thread_continue( + register thread_t old_thread) { - wake_lock(thread); - thread_lock(thread); - - /* - * If we are discarding the thread's stack, we must do it - * before the thread has a chance to run. - */ -#ifndef i386 - if (thread->continuation != NULL) { - assert((thread->state & TH_STACK_STATE) == 0); - thread->state |= TH_STACK_HANDOFF; - stack_free(thread); - } -#endif - - switch (thread->state & (TH_RUN|TH_WAIT|TH_UNINT|TH_IDLE)) { - - case TH_RUN | TH_UNINT: - case TH_RUN: - /* - * No reason to stop. Put back on a run queue. - */ - _mk_sp_thread_dispatch(thread); - break; - - case TH_RUN | TH_WAIT | TH_UNINT: - case TH_RUN | TH_WAIT: - { - boolean_t term, wake, callout; + register thread_t self = current_thread(); + register thread_continue_t continuation; + register void *parameter; - /* - * Waiting - */ - thread->sleep_stamp = sched_tick; - thread->state &= ~TH_RUN; - - term = (thread->state & TH_TERMINATE)? TRUE: FALSE; - callout = thread->active_callout; - wake = thread->wake_active; - thread->wake_active = FALSE; - - if (thread->sched_mode & TH_MODE_TIMESHARE) - pset_share_decr(thread->processor_set); - pset_run_decr(thread->processor_set); - - thread_unlock(thread); - wake_unlock(thread); - - if (callout) - call_thread_block(); - - if (wake) - thread_wakeup((event_t)&thread->wake_active); + continuation = self->continuation; + self->continuation = NULL; + parameter = self->parameter; + self->parameter = NULL; - if (term) - thread_reaper_enqueue(thread); + thread_begin(self, self->last_processor); - return; - } + if (old_thread != THREAD_NULL) + thread_dispatch(old_thread); - case TH_RUN | TH_IDLE: - /* - * The idle threads don't go - * onto a run queue. - */ - break; + funnel_refunnel_check(self, 4); - default: - panic("thread_dispatch: state 0x%x\n", thread->state); - } + if (old_thread != THREAD_NULL) + (void)spllo(); - thread_unlock(thread); - wake_unlock(thread); + call_continuation(continuation, parameter, self->wait_result); + /*NOTREACHED*/ } /* @@ -2265,8 +2248,7 @@ set_sched_pri( (priority >= BASEPRI_PREEMPT || (thread->task_priority < MINPRI_KERNEL && thread->task_priority >= BASEPRI_BACKGROUND && - priority > thread->task_priority) || - (thread->sched_mode & TH_MODE_FORCEDPREEMPT) ) ) + priority > thread->task_priority) ) ) thread->sched_mode |= TH_MODE_PREEMPT; else thread->sched_mode &= ~TH_MODE_PREEMPT; @@ -2292,6 +2274,36 @@ set_sched_pri( } } +#if 0 + +static void +run_queue_check( + run_queue_t rq, + thread_t thread) +{ + queue_t q; + queue_entry_t qe; + + if (rq != thread->runq) + panic("run_queue_check: thread runq"); + + if (thread->sched_pri > MAXPRI || thread->sched_pri < MINPRI) + panic("run_queue_check: thread sched_pri"); + + q = &rq->queues[thread->sched_pri]; + qe = queue_first(q); + while (!queue_end(q, qe)) { + if (qe == (queue_entry_t)thread) + return; + + qe = queue_next(qe); + } + + panic("run_queue_check: end"); +} + +#endif /* DEBUG */ + /* * run_queue_remove: * @@ -2437,6 +2449,52 @@ choose_thread( return (thread); } +static processor_t +delay_idle( + processor_t processor, + thread_t self) +{ + int *gcount, *lcount; + uint64_t abstime, spin, limit; + + lcount = &processor->runq.count; + gcount = &processor->processor_set->runq.count; + + abstime = mach_absolute_time(); + limit = abstime + delay_idle_limit; + spin = abstime + delay_idle_spin; + + timer_event((uint32_t)abstime, &processor->idle_thread->system_timer); + + self->options |= TH_OPT_DELAYIDLE; + + while ( *gcount == 0 && *lcount == 0 && + (self->state & TH_WAIT) != 0 && + abstime < limit ) { + if (abstime >= spin) { + (void)spllo(); + + (void)splsched(); + processor = current_processor(); + lcount = &processor->runq.count; + gcount = &processor->processor_set->runq.count; + + abstime = mach_absolute_time(); + spin = abstime + delay_idle_spin; + + timer_event((uint32_t)abstime, &processor->idle_thread->system_timer); + } + else + abstime = mach_absolute_time(); + } + + timer_event((uint32_t)abstime, &self->system_timer); + + self->options &= ~TH_OPT_DELAYIDLE; + + return (processor); +} + /* * no_dispatch_count counts number of times processors go non-idle * without being dispatched. This should be very rare. @@ -2444,36 +2502,35 @@ choose_thread( int no_dispatch_count = 0; /* - * This is the idle thread, which just looks for other threads + * This is the idle processor thread, which just looks for other threads * to execute. */ void -idle_thread_continue(void) +idle_thread(void) { register processor_t processor; - register volatile thread_t *threadp; - register volatile int *gcount; - register volatile int *lcount; + register thread_t *threadp; + register int *gcount; + register int *lcount; register thread_t new_thread; register int state; register processor_set_t pset; - int mycpu; + ast_t *myast = ast_pending(); - mycpu = cpu_number(); - processor = cpu_to_processor(mycpu); - threadp = (volatile thread_t *) &processor->next_thread; - lcount = (volatile int *) &processor->runq.count; + processor = current_processor(); - gcount = (volatile int *)&processor->processor_set->runq.count; + threadp = &processor->next_thread; + lcount = &processor->runq.count; + gcount = &processor->processor_set->runq.count; (void)splsched(); - while ( (*threadp == (volatile thread_t)THREAD_NULL) && - (*gcount == 0) && (*lcount == 0) ) { + while ( (*threadp == THREAD_NULL) && + (*gcount == 0) && (*lcount == 0) ) { /* check for ASTs while we wait */ - if (need_ast[mycpu] &~ ( AST_SCHEDULING | AST_BSD )) { + if (*myast &~ (AST_SCHEDULING | AST_BSD)) { /* no ASTs for us */ - need_ast[mycpu] &= AST_NONE; + *myast &= AST_NONE; (void)spllo(); } else @@ -2529,15 +2586,13 @@ idle_thread_continue(void) thread_unlock(thread); counter(c_idle_thread_handoff++); - thread_run(processor->idle_thread, - idle_thread_continue, new_thread); + thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); /*NOTREACHED*/ } simple_unlock(&pset->sched_lock); counter(c_idle_thread_handoff++); - thread_run(processor->idle_thread, - idle_thread_continue, new_thread); + thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); /*NOTREACHED*/ } @@ -2554,16 +2609,14 @@ idle_thread_continue(void) thread_unlock(thread); counter(c_idle_thread_handoff++); - thread_run(processor->idle_thread, - idle_thread_continue, new_thread); + thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); /* NOTREACHED */ } else { simple_unlock(&pset->sched_lock); counter(c_idle_thread_handoff++); - thread_run(processor->idle_thread, - idle_thread_continue, new_thread); + thread_run(processor->idle_thread, (thread_continue_t)idle_thread, NULL, new_thread); /* NOTREACHED */ } } @@ -2582,7 +2635,7 @@ idle_thread_continue(void) simple_unlock(&pset->sched_lock); counter(c_idle_thread_block++); - thread_block(idle_thread_continue); + thread_block((thread_continue_t)idle_thread); /* NOTREACHED */ } else @@ -2604,125 +2657,189 @@ idle_thread_continue(void) simple_unlock(&pset->sched_lock); counter(c_idle_thread_block++); - thread_block(idle_thread_continue); + thread_block((thread_continue_t)idle_thread); /* NOTREACHED */ } simple_unlock(&pset->sched_lock); - panic("idle_thread: state %d\n", cpu_state(mycpu)); + panic("idle_thread: state %d\n", processor->state); /*NOTREACHED*/ } -void -idle_thread(void) +kern_return_t +idle_thread_create( + processor_t processor) { - counter(c_idle_thread_block++); - thread_block(idle_thread_continue); - /*NOTREACHED*/ + kern_return_t result; + thread_t thread; + spl_t s; + + result = kernel_thread_create((thread_continue_t)idle_thread, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + return (result); + + s = splsched(); + thread_lock(thread); + thread->bound_processor = processor; + processor->idle_thread = thread; + thread->sched_pri = thread->priority = IDLEPRI; + thread->state = (TH_RUN | TH_IDLE); + thread_unlock(thread); + splx(s); + + thread_deallocate(thread); + + return (KERN_SUCCESS); } static uint64_t sched_tick_deadline; -void sched_tick_thread(void); - +/* + * sched_startup: + * + * Kicks off scheduler services. + * + * Called at splsched. + */ void -sched_tick_init(void) +sched_startup(void) { - kernel_thread_with_priority(sched_tick_thread, MAXPRI_STANDARD); + kern_return_t result; + thread_t thread; + + result = kernel_thread_start_priority((thread_continue_t)sched_tick_thread, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + panic("sched_startup"); + + thread_deallocate(thread); + + /* + * Yield to the sched_tick_thread while it times + * a series of context switches back. It stores + * the baseline value in sched_cswtime. + * + * The current thread is the only other thread + * active at this point. + */ + while (sched_cswtime == 0) + thread_block(THREAD_CONTINUE_NULL); + + thread_daemon_init(); + + thread_call_initialize(); } /* - * sched_tick_thread + * sched_tick_thread: * * Perform periodic bookkeeping functions about ten * times per second. */ -void -sched_tick_thread_continue(void) +static void +sched_tick_continue(void) { - uint64_t abstime; -#if SIMPLE_CLOCK - int new_usec; -#endif /* SIMPLE_CLOCK */ - - abstime = mach_absolute_time(); + uint64_t abstime = mach_absolute_time(); - sched_tick++; /* age usage one more time */ -#if SIMPLE_CLOCK - /* - * Compensate for clock drift. sched_usec is an - * exponential average of the number of microseconds in - * a second. It decays in the same fashion as cpu_usage. - */ - new_usec = sched_usec_elapsed(); - sched_usec = (5*sched_usec + 3*new_usec)/8; -#endif /* SIMPLE_CLOCK */ + sched_tick++; /* - * Compute the scheduler load factors. + * Compute various averages. */ - compute_mach_factor(); + compute_averages(); /* - * Scan the run queues for timesharing threads which - * may need to have their priorities recalculated. + * Scan the run queues for threads which + * may need to be updated. */ - do_thread_scan(); + thread_update_scan(); clock_deadline_for_periodic_event(sched_tick_interval, abstime, &sched_tick_deadline); - assert_wait((event_t)sched_tick_thread_continue, THREAD_INTERRUPTIBLE); - thread_set_timer_deadline(sched_tick_deadline); - thread_block(sched_tick_thread_continue); + assert_wait_deadline((event_t)sched_tick_thread, THREAD_UNINT, sched_tick_deadline); + thread_block((thread_continue_t)sched_tick_continue); /*NOTREACHED*/ } +/* + * Time a series of context switches to determine + * a baseline. Toss the high and low and return + * the one-way value. + */ +static uint32_t +time_cswitch(void) +{ + uint32_t new, hi, low, accum; + uint64_t abstime; + int i, tries = 7; + + accum = hi = low = 0; + for (i = 0; i < tries; ++i) { + abstime = mach_absolute_time(); + thread_block(THREAD_CONTINUE_NULL); + + new = mach_absolute_time() - abstime; + + if (i == 0) + accum = hi = low = new; + else { + if (new < low) + low = new; + else + if (new > hi) + hi = new; + accum += new; + } + } + + return ((accum - hi - low) / (2 * (tries - 2))); +} + void sched_tick_thread(void) { + sched_cswtime = time_cswitch(); + sched_tick_deadline = mach_absolute_time(); - thread_block(sched_tick_thread_continue); + sched_tick_continue(); /*NOTREACHED*/ } /* - * do_thread_scan: + * thread_update_scan / runq_scan: * - * Scan the run queues for timesharing threads which need - * to be aged, possibily adjusting their priorities upwards. + * Scan the run queues to account for timesharing threads + * which need to be updated. * * Scanner runs in two passes. Pass one squirrels likely - * thread away in an array (takes out references for them). - * Pass two does the priority updates. This is necessary because - * the run queue lock is required for the candidate scan, but - * cannot be held during updates. + * threads away in an array, pass two does the update. * - * Array length should be enough so that restart isn't necessary, - * but restart logic is included. + * This is necessary because the run queue is locked for + * the candidate scan, but the thread is locked for the update. * + * Array should be sized to make forward progress, without + * disabling preemption for long periods. */ -#define MAX_STUCK_THREADS 128 +#define THREAD_UPDATE_SIZE 128 -static thread_t stuck_threads[MAX_STUCK_THREADS]; -static int stuck_count = 0; +static thread_t thread_update_array[THREAD_UPDATE_SIZE]; +static int thread_update_count = 0; /* - * do_runq_scan is the guts of pass 1. It scans a runq for - * stuck threads. A boolean is returned indicating whether - * a retry is needed. + * Scan a runq for candidate threads. + * + * Returns TRUE if retry is needed. */ static boolean_t -do_runq_scan( +runq_scan( run_queue_t runq) { + register int count; register queue_t q; register thread_t thread; - register int count; - boolean_t result = FALSE; if ((count = runq->count) > 0) { q = runq->queues + runq->highq; @@ -2730,23 +2847,11 @@ do_runq_scan( queue_iterate(q, thread, thread_t, links) { if ( thread->sched_stamp != sched_tick && (thread->sched_mode & TH_MODE_TIMESHARE) ) { - /* - * Stuck, save its id for later. - */ - if (stuck_count == MAX_STUCK_THREADS) { - /* - * !@#$% No more room. - */ + if (thread_update_count == THREAD_UPDATE_SIZE) return (TRUE); - } - if (thread_lock_try(thread)) { - thread->ref_count++; - thread_unlock(thread); - stuck_threads[stuck_count++] = thread; - } - else - result = TRUE; + thread_update_array[thread_update_count++] = thread; + thread_reference_internal(thread); } count--; @@ -2756,44 +2861,40 @@ do_runq_scan( } } - return (result); + return (FALSE); } -boolean_t thread_scan_enabled = TRUE; - static void -do_thread_scan(void) +thread_update_scan(void) { - register boolean_t restart_needed = FALSE; - register thread_t thread; + register boolean_t restart_needed; register processor_set_t pset = &default_pset; register processor_t processor; + register thread_t thread; spl_t s; - if (!thread_scan_enabled) - return; - do { s = splsched(); simple_lock(&pset->sched_lock); - restart_needed = do_runq_scan(&pset->runq); + restart_needed = runq_scan(&pset->runq); simple_unlock(&pset->sched_lock); if (!restart_needed) { simple_lock(&pset->sched_lock); processor = (processor_t)queue_first(&pset->processors); while (!queue_end(&pset->processors, (queue_entry_t)processor)) { - if (restart_needed = do_runq_scan(&processor->runq)) + if ((restart_needed = runq_scan(&processor->runq)) != 0) break; thread = processor->idle_thread; if (thread->sched_stamp != sched_tick) { - if (stuck_count == MAX_STUCK_THREADS) { + if (thread_update_count == THREAD_UPDATE_SIZE) { restart_needed = TRUE; break; } - stuck_threads[stuck_count++] = thread; + thread_update_array[thread_update_count++] = thread; + thread_reference_internal(thread); } processor = (processor_t)queue_next(&processor->processors); @@ -2805,28 +2906,20 @@ do_thread_scan(void) /* * Ok, we now have a collection of candidates -- fix them. */ - while (stuck_count > 0) { - boolean_t idle_thread; - - thread = stuck_threads[--stuck_count]; - stuck_threads[stuck_count] = THREAD_NULL; + while (thread_update_count > 0) { + thread = thread_update_array[--thread_update_count]; + thread_update_array[thread_update_count] = THREAD_NULL; s = splsched(); thread_lock(thread); - idle_thread = (thread->state & TH_IDLE) != 0; if ( !(thread->state & (TH_WAIT|TH_SUSP)) && thread->sched_stamp != sched_tick ) update_priority(thread); thread_unlock(thread); splx(s); - if (!idle_thread) - thread_deallocate(thread); + thread_deallocate(thread); } - - if (restart_needed) - delay(1); /* XXX */ - } while (restart_needed); } @@ -2845,6 +2938,11 @@ thread_wakeup( thread_wakeup_with_result(x, THREAD_AWAKENED); } +boolean_t +preemption_enabled(void) +{ + return (get_preemption_level() == 0 && ml_get_interrupts_enabled()); +} #if DEBUG static boolean_t @@ -2858,7 +2956,6 @@ thread_runnable( #if MACH_KDB #include #define printf kdbprintf -extern int db_indent; void db_sched(void); void diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h index 012cc97bb..929795787 100644 --- a/osfmk/kern/sched_prim.h +++ b/osfmk/kern/sched_prim.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,80 +66,80 @@ #include #include #include -#include -#include /*** ??? temp - remove me soon ***/ -#include - -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE -#include -/* - * Exported interface to sched_prim.c. - * A few of these functions are actually defined in - * ipc_sched.c, for historical reasons. - */ - -/* Initialize scheduler module */ +/* Initialization */ extern void sched_init(void); -extern void sched_timebase_init(void); - -/* - * Set up thread timeout element(s) when thread is created. - */ -extern void thread_timer_setup( - thread_t thread); +extern void sched_startup(void); -extern void thread_timer_terminate(void); +extern void sched_timebase_init(void); -/* - * Stop a thread and wait for it to stop running. - */ +/* Force a preemption point for a thread and wait for it to stop running */ extern boolean_t thread_stop( thread_t thread); -/* - * Wait for a thread to stop running. - */ -extern boolean_t thread_wait( +/* Release a previous stop request */ +extern void thread_unstop( thread_t thread); -/* Select a thread to run on a particular processor */ -extern thread_t thread_select( - processor_t myprocessor); +/* Wait for a thread to stop running */ +extern void thread_wait( + thread_t thread); -extern kern_return_t thread_go_locked( - thread_t thread, - wait_result_t result); +/* Select a thread to run */ +extern thread_t thread_select( + processor_t myprocessor); -/* Stop old thread and run new thread */ -extern boolean_t thread_invoke( - thread_t old_thread, - thread_t new_thread, - int reason, - thread_continue_t continuation); +/* Unblock thread on wake up */ +extern boolean_t thread_unblock( + thread_t thread, + wait_result_t wresult); -/* Called when current thread is given new stack */ -extern void thread_continue( - thread_t old_thread); +/* Unblock and dispatch thread */ +extern kern_return_t thread_go( + thread_t thread, + wait_result_t wresult); -/* Switch directly to a particular thread */ -extern int thread_run( +/* Context switch primitive */ +extern boolean_t thread_invoke( + thread_t old_thread, + thread_t new_thread, + ast_t reason); + +/* Perform calculations for thread finishing execution */ +extern void thread_done( thread_t old_thread, - thread_continue_t continuation, - thread_t new_thread); + thread_t new_thread, + processor_t processor); + +/* Set up for thread beginning execution */ +extern void thread_begin( + thread_t thread, + processor_t processor); -/* Dispatch a thread not on a run queue */ -extern void thread_dispatch( +/* Handle previous thread at context switch */ +extern void thread_dispatch( thread_t thread); +/* Switch directly to a particular thread */ +extern int thread_run( + thread_t self, + thread_continue_t continuation, + void *parameter, + thread_t new_thread); + +/* Resume thread with new stack */ +extern void thread_continue( + thread_t old_thread); + /* Invoke continuation */ extern void call_continuation( - thread_continue_t continuation); + thread_continue_t continuation, + void *parameter, + wait_result_t wresult); /* Set the current scheduled priority */ extern void set_sched_pri( @@ -161,42 +161,32 @@ extern void compute_my_priority( thread_t thread); /* Periodic scheduler activity */ -extern void sched_tick_init(void); +extern void sched_tick_thread(void); -/* - * Update thread to the current scheduler tick. - */ +/* Perform sched_tick housekeeping activities */ extern void update_priority( thread_t thread); -/* Idle thread loop */ +/* Idle processor thread */ extern void idle_thread(void); -/* - * Machine-dependent code must define these functions. - */ +extern kern_return_t idle_thread_create( + processor_t processor); /* Start thread running */ extern void thread_bootstrap_return(void); -/* Return from exception */ -extern void thread_exception_return(void); - /* Continuation return from syscall */ extern void thread_syscall_return( kern_return_t ret); -/* - * These functions are either defined in kern/thread.c - * or are defined directly by machine-dependent code. - */ - -/* Block current thread, indicating reason */ +/* Context switch */ extern wait_result_t thread_block_reason( thread_continue_t continuation, + void *parameter, ast_t reason); -/* Dispatch a thread for execution */ +/* Reschedule thread for execution */ extern void thread_setrun( thread_t thread, integer_t options); @@ -210,6 +200,10 @@ extern processor_t thread_bind( thread_t thread, processor_t processor); +extern void thread_timer_expire( + void *thread, + void *p1); + /* Set the maximum interrupt level for the thread */ __private_extern__ wait_interrupt_t thread_interrupt_level( wait_interrupt_t interruptible); @@ -218,117 +212,63 @@ __private_extern__ wait_result_t thread_mark_wait_locked( thread_t thread, wait_interrupt_t interruptible); -/* Sleep, unlocking and then relocking a usimple_lock in the process */ -__private_extern__ wait_result_t thread_sleep_fast_usimple_lock( - event_t event, - simple_lock_t lock, - wait_interrupt_t interruptible); - /* Wake up locked thread directly, passing result */ __private_extern__ kern_return_t clear_wait_internal( thread_t thread, wait_result_t result); -__private_extern__ - wait_queue_t wait_event_wait_queue( - event_t event); - #endif /* MACH_KERNEL_PRIVATE */ -extern wait_result_t assert_wait_prim( - event_t event, - thread_roust_t roust_hint, - uint64_t deadline, - wait_interrupt_t interruptible); +__BEGIN_DECLS -/* - ****************** Only exported until BSD stops using ******************** - */ +#ifdef XNU_KERNEL_PRIVATE + +extern boolean_t assert_wait_possible(void); /* - * Cancel a stop and unblock the thread if already stopped. + ****************** Only exported until BSD stops using ******************** */ -extern void thread_unstop( - thread_t thread); /* Wake up thread directly, passing result */ extern kern_return_t clear_wait( thread_t thread, wait_result_t result); -#endif /* __APPLE_API_PRIVATE */ +/* Return from exception (BSD-visible interface) */ +extern void thread_exception_return(void); -/* - * ********************* PUBLIC APIs ************************************ - */ +#endif /* XNU_KERNEL_PRIVATE */ -/* Set timer for current thread */ -extern void thread_set_timer( - uint32_t interval, - uint32_t scale_factor); +/* Context switch */ +extern wait_result_t thread_block( + thread_continue_t continuation); -extern void thread_set_timer_deadline( - uint64_t deadline); - -extern void thread_cancel_timer(void); +extern wait_result_t thread_block_parameter( + thread_continue_t continuation, + void *parameter); /* Declare thread will wait on a particular event */ -extern wait_result_t assert_wait( - event_t event, - wait_interrupt_t interruptflag); - -/* Assert that the thread intends to wait for a timeout */ -extern wait_result_t assert_wait_timeout( - natural_t msecs, - wait_interrupt_t interruptflags); - -/* Sleep, unlocking and then relocking a usimple_lock in the process */ -extern wait_result_t thread_sleep_usimple_lock( - event_t event, - usimple_lock_t lock, - wait_interrupt_t interruptible); - -/* Sleep, unlocking and then relocking a mutex in the process */ -extern wait_result_t thread_sleep_mutex( - event_t event, - mutex_t *mutex, - wait_interrupt_t interruptible); - -/* Sleep with a deadline, unlocking and then relocking a mutex in the process */ -extern wait_result_t thread_sleep_mutex_deadline( - event_t event, - mutex_t *mutex, - uint64_t deadline, - wait_interrupt_t interruptible); - -/* Sleep, unlocking and then relocking a write lock in the process */ -extern wait_result_t thread_sleep_lock_write( - event_t event, - lock_t *lock, - wait_interrupt_t interruptible); - -/* Sleep, hinting that a thread funnel may be involved in the process */ -extern wait_result_t thread_sleep_funnel( - event_t event, - wait_interrupt_t interruptible); - -/* Wake up thread (or threads) waiting on a particular event */ -extern kern_return_t thread_wakeup_prim( - event_t event, - boolean_t one_thread, - wait_result_t result); - -#ifdef __APPLE_API_UNSTABLE +extern wait_result_t assert_wait( + event_t event, + wait_interrupt_t interruptible); -/* Block current thread (Block reason) */ -extern wait_result_t thread_block( - thread_continue_t continuation); +/* Assert that the thread intends to wait with a timeout */ +extern wait_result_t assert_wait_timeout( + event_t event, + wait_interrupt_t interruptible, + uint32_t interval, + uint32_t scale_factor); -#endif /* __APPLE_API_UNSTABLE */ +extern wait_result_t assert_wait_deadline( + event_t event, + wait_interrupt_t interruptible, + uint64_t deadline); -/* - * Routines defined as macros - */ +/* Wake up thread (or threads) waiting on a particular event */ +extern kern_return_t thread_wakeup_prim( + event_t event, + boolean_t one_thread, + wait_result_t result); #define thread_wakeup(x) \ thread_wakeup_prim((x), FALSE, THREAD_AWAKENED) @@ -337,13 +277,36 @@ extern wait_result_t thread_block( #define thread_wakeup_one(x) \ thread_wakeup_prim((x), TRUE, THREAD_AWAKENED) -#if !defined(MACH_KERNEL_PRIVATE) && !defined(ABSOLUTETIME_SCALAR_TYPE) +extern boolean_t preemption_enabled(void); + +#ifdef KERNEL_PRIVATE + +/* + * Obsolete interfaces. + */ + +extern void thread_set_timer( + uint32_t interval, + uint32_t scale_factor); + +extern void thread_set_timer_deadline( + uint64_t deadline); -#include +extern void thread_cancel_timer(void); + +#ifndef MACH_KERNEL_PRIVATE + +#ifndef ABSOLUTETIME_SCALAR_TYPE #define thread_set_timer_deadline(a) \ thread_set_timer_deadline(__OSAbsoluteTime(a)) -#endif +#endif /* ABSOLUTETIME_SCALAR_TYPE */ + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + +__END_DECLS #endif /* _KERN_SCHED_PRIM_H_ */ diff --git a/osfmk/kern/simple_lock.h b/osfmk/kern/simple_lock.h index 3ca178638..ad451134e 100644 --- a/osfmk/kern/simple_lock.h +++ b/osfmk/kern/simple_lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,269 +56,98 @@ * Author: Avadis Tevanian, Jr., Michael Wayne Young * Date: 1985 * - * Simple Locking primitives definitions + * Atomic primitives and Simple Locking primitives definitions */ -#ifndef _SIMPLE_LOCK_H_ -#define _SIMPLE_LOCK_H_ +#ifdef KERNEL_PRIVATE -/* - * Configuration variables: - * - * - * MACH_LDEBUG: record pc and thread of callers, turn on - * all lock debugging. - * - * - * ETAP: The Event Trace Analysis Package (ETAP) monitors - * and records micro-kernel lock behavior and general - * kernel events. ETAP supports two levels of - * tracing for locks: - * - cumulative (ETAP_LOCK_ACCUMULATE) - * - monitored (ETAP_LOCK_MONITOR) - * - * Note: If either level of tracing is configured then - * ETAP_LOCK_TRACE is automatically defined to - * equal one. - * - * Several macros are added throughout the lock code to - * allow for convenient configuration. - */ +#ifndef _KERN_SIMPLE_LOCK_H_ +#define _KERN_SIMPLE_LOCK_H_ +#include #include #include - -#include -#include -#include -#include - -/* - * The Mach lock package exports the following simple lock abstractions: - * - * Lock Type Properties - * hw_lock lowest level hardware abstraction; atomic, - * non-blocking, mutual exclusion; supports pre-emption - * usimple non-blocking spinning lock, available in all - * kernel configurations; may be used from thread - * and interrupt contexts; supports debugging, - * statistics and pre-emption - * simple non-blocking spinning lock, intended for SMP - * synchronization (vanishes on a uniprocessor); - * supports debugging, statistics and pre-emption - * - * NOTES TO IMPLEMENTORS: there are essentially two versions - * of the lock package. One is portable, written in C, and - * supports all of the various flavors of debugging, statistics, - * uni- versus multi-processor, pre-emption, etc. The "other" - * is whatever set of lock routines is provided by machine-dependent - * code. Presumably, the machine-dependent package is heavily - * optimized and meant for production kernels. - * - * We encourage implementors to focus on highly-efficient, - * production implementations of machine-dependent lock code, - * and use the portable lock package for everything else. - */ - -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE +#include -/* - * Mach always initializes locks, even those statically - * allocated. - * - * The conditional acquisition call, hw_lock_try, - * must return non-zero on success and zero on failure. - * - * The hw_lock_held operation returns non-zero if the - * lock is set, zero if the lock is clear. This operation - * should be implemented using an ordinary memory read, - * rather than a special atomic instruction, allowing - * a processor to spin in cache waiting for the lock to - * be released without chewing up bus cycles. - */ -extern void hw_lock_init(hw_lock_t); -extern void hw_lock_lock(hw_lock_t); -extern void hw_lock_unlock(hw_lock_t); -extern unsigned int hw_lock_to(hw_lock_t, unsigned int); -extern unsigned int hw_lock_try(hw_lock_t); -extern unsigned int hw_lock_held(hw_lock_t); +extern void hw_lock_init( + hw_lock_t); -#endif /* MACH_KERNEL_PRIVATE */ +extern void hw_lock_lock( + hw_lock_t); -#endif /* __APPLE_API_PRIVATE */ +extern void hw_lock_unlock( + hw_lock_t); -/* - * Machine dependent ops. - */ -extern unsigned int hw_lock_bit(unsigned int *, unsigned int, unsigned int); -extern unsigned int hw_cpu_sync(unsigned int *, unsigned int); -extern unsigned int hw_cpu_wcng(unsigned int *, unsigned int, unsigned int); -extern unsigned int hw_lock_mbits(unsigned int *, unsigned int, unsigned int, - unsigned int, unsigned int); -void hw_unlock_bit(unsigned int *, unsigned int); +extern unsigned int hw_lock_to( + hw_lock_t, + unsigned int); -extern uint32_t hw_atomic_add( - uint32_t *dest, - uint32_t delt); +extern unsigned int hw_lock_try( + hw_lock_t); -extern uint32_t hw_atomic_sub( - uint32_t *dest, - uint32_t delt); +extern unsigned int hw_lock_held( + hw_lock_t); -extern uint32_t hw_atomic_or( - uint32_t *dest, - uint32_t mask); - -extern uint32_t hw_atomic_and( - uint32_t *dest, - uint32_t mask); +#endif /* MACH_KERNEL_PRIVATE */ -extern uint32_t hw_compare_and_store( - uint32_t oldval, - uint32_t newval, - uint32_t *dest); +__BEGIN_DECLS -extern void hw_queue_atomic(unsigned int *anchor, unsigned int *elem, unsigned int disp); -extern void hw_queue_atomic_list(unsigned int *anchor, unsigned int *first, unsigned int *last, unsigned int disp); -extern unsigned int *hw_dequeue_atomic(unsigned int *anchor, unsigned int disp); +extern uint32_t hw_atomic_add( + uint32_t *dest, + uint32_t delt); +extern uint32_t hw_atomic_sub( + uint32_t *dest, + uint32_t delt); -/* - * The remaining locking constructs may have two versions. - * One version is machine-independent, built in C on top of the - * hw_lock construct. This version supports production, debugging - * and statistics configurations and is portable across architectures. - * - * Any particular port may override some or all of the portable - * lock package for whatever reason -- usually efficiency. - * - * The direct use of hw_locks by machine-independent Mach code - * should be rare; the preferred spinning lock is the simple_lock - * (see below). - */ +extern uint32_t hw_atomic_or( + uint32_t *dest, + uint32_t mask); -/* - * A "simple" spin lock, providing non-blocking mutual - * exclusion and conditional acquisition. - * - * The usimple_lock exists even in uniprocessor configurations. - * A data structure is always allocated for it and the following - * operations are always defined: - * - * usimple_lock_init lock initialization (mandatory!) - * usimple_lock lock acquisition - * usimple_unlock lock release - * usimple_lock_try conditional lock acquisition; - * non-zero means success - * Simple lock DEBUG interfaces - * usimple_lock_held verify lock already held by me - * usimple_lock_none_held verify no usimple locks are held - * - * The usimple_lock may be used for synchronization between - * thread context and interrupt context, or between a uniprocessor - * and an intelligent device. Obviously, it may also be used for - * multiprocessor synchronization. Its use should be rare; the - * simple_lock is the preferred spinning lock (see below). - * - * The usimple_lock supports optional lock debugging and statistics. - * - * Normally, we expect the usimple_lock data structure to be - * defined here, with its operations implemented in an efficient, - * machine-dependent way. However, any implementation may choose - * to rely on a C-based, portable version of the usimple_lock for - * debugging, statistics, and/or tracing. Three hooks are used in - * the portable lock package to allow the machine-dependent package - * to override some or all of the portable package's features. - * - * The usimple_lock also handles pre-emption. Lock acquisition - * implies disabling pre-emption, while lock release implies - * re-enabling pre-emption. Conditional lock acquisition does - * not assume success: on success, pre-emption is disabled - * but on failure the pre-emption state remains the same as - * the pre-emption state before the acquisition attempt. - */ +extern uint32_t hw_atomic_and( + uint32_t *dest, + uint32_t mask); -/* - * Each usimple_lock has a type, used for debugging and - * statistics. This type may safely be ignored in a - * production configuration. - * - * The conditional acquisition call, usimple_lock_try, - * must return non-zero on success and zero on failure. - */ -extern void usimple_lock_init(usimple_lock_t,etap_event_t); -extern void usimple_lock(usimple_lock_t); -extern void usimple_unlock(usimple_lock_t); -extern unsigned int usimple_lock_try(usimple_lock_t); -extern void usimple_lock_held(usimple_lock_t); -extern void usimple_lock_none_held(void); +extern uint32_t hw_compare_and_store( + uint32_t oldval, + uint32_t newval, + uint32_t *dest); +extern void hw_queue_atomic( + unsigned int *anchor, + unsigned int *elem, + unsigned int disp); -/* - * Upon the usimple_lock we define the simple_lock, which - * exists for SMP configurations. These locks aren't needed - * in a uniprocessor configuration, so compile-time tricks - * make them disappear when NCPUS==1. (For debugging purposes, - * however, they can be enabled even on a uniprocessor.) This - * should be the "most popular" spinning lock; the usimple_lock - * and hw_lock should only be used in rare cases. - * - * IMPORTANT: simple_locks that may be shared between interrupt - * and thread context must have their use coordinated with spl. - * The spl level must alway be the same when acquiring the lock. - * Otherwise, deadlock may result. - */ +extern void hw_queue_atomic_list( + unsigned int *anchor, + unsigned int *first, + unsigned int *last, + unsigned int disp); -#ifdef __APPLE_API_PRIVATE +extern unsigned int *hw_dequeue_atomic( + unsigned int *anchor, + unsigned int disp); -#ifdef MACH_KERNEL_PRIVATE +extern void usimple_lock_init( + usimple_lock_t, + unsigned short); -#include -#include +extern void usimple_lock( + usimple_lock_t); -#if NCPUS == 1 && !ETAP_LOCK_TRACE && !USLOCK_DEBUG -/* - * MACH_RT is a very special case: in the case that the - * machine-dependent lock package hasn't taken responsibility - * but there is no other reason to turn on locks, if MACH_RT - * is turned on locks denote critical, non-preemptable points - * in the code. - * - * Otherwise, simple_locks may be layered directly on top of - * usimple_locks. - * - * N.B. The reason that simple_lock_try may be assumed to - * succeed under MACH_RT is that the definition only is used - * when NCPUS==1 AND because simple_locks shared between thread - * and interrupt context are always acquired with elevated spl. - * Thus, it is never possible to be interrupted in a dangerous - * way while holding a simple_lock. - */ -/* - * for locks and there is no other apparent reason to turn them on. - * So make them disappear. - */ -#define simple_lock_init(l,t) -#define simple_lock(l) disable_preemption() -#define simple_unlock(l) enable_preemption() -#define simple_lock_try(l) (disable_preemption(), 1) -#define simple_lock_addr(lock) ((simple_lock_t)0) -#define __slock_held_func__(l) preemption_is_disabled() -#endif /* NCPUS == 1 && !ETAP_LOCK_TRACE && !USLOCK_DEBUG */ +extern void usimple_unlock( + usimple_lock_t); -#if ETAP_LOCK_TRACE -extern void simple_lock_no_trace(simple_lock_t l); -extern int simple_lock_try_no_trace(simple_lock_t l); -extern void simple_unlock_no_trace(simple_lock_t l); -#endif /* ETAP_LOCK_TRACE */ +extern unsigned int usimple_lock_try( + usimple_lock_t); -#endif /* MACH_KERNEL_PRIVATE */ +__END_DECLS -#endif /* __APPLE_API_PRIVATE */ +#define ETAP_NO_TRACE 0 +#define ETAP_IO_AHA 0 /* * If we got to here and we still don't have simple_lock_init @@ -331,37 +160,10 @@ extern void simple_unlock_no_trace(simple_lock_t l); #define simple_unlock(l) usimple_unlock(l) #define simple_lock_try(l) usimple_lock_try(l) #define simple_lock_addr(l) (&(l)) -#define __slock_held_func__(l) usimple_lock_held(l) #define thread_sleep_simple_lock(l, e, i) \ thread_sleep_usimple_lock((l), (e), (i)) #endif /* !defined(simple_lock_init) */ -#if USLOCK_DEBUG -/* - * Debug-time only: - * + verify that usimple_lock is already held by caller - * + verify that usimple_lock is NOT held by caller - * + verify that current processor owns no usimple_locks - * - * We do not provide a simple_lock_NOT_held function because - * it's impossible to verify when only MACH_RT is turned on. - * In that situation, only preemption is enabled/disabled - * around lock use, and it's impossible to tell which lock - * acquisition caused preemption to be disabled. However, - * note that it's still valid to use check_simple_locks - * when only MACH_RT is turned on -- no locks should be - * held, hence preemption should be enabled. - * Actually, the above isn't strictly true, as explicit calls - * to disable_preemption() need to be accounted for. - */ -#define simple_lock_held(l) __slock_held_func__(l) -#define check_simple_locks() usimple_lock_none_held() - -#else /* USLOCK_DEBUG */ - -#define simple_lock_held(l) -#define check_simple_locks() - -#endif /* USLOCK_DEBUG */ +#endif /*!_KERN_SIMPLE_LOCK_H_*/ -#endif /*!_SIMPLE_LOCK_H_*/ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/simple_lock_types.h b/osfmk/kern/simple_lock_types.h deleted file mode 100644 index 72b0a2f50..000000000 --- a/osfmk/kern/simple_lock_types.h +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: kern/simple_lock_types.h - * Author: Avadis Tevanian, Jr., Michael Wayne Young - * Date: 1985 - * - * Simple lock data type definitions - */ - -#ifndef _SIMPLE_LOCK_TYPES_H_ -#define _SIMPLE_LOCK_TYPES_H_ - -#include -#include -#include - -/* - * The Mach lock package exports the following simple lock abstractions: - * - * Lock Type Properties - * hw_lock lowest level hardware abstraction; atomic, - * non-blocking, mutual exclusion; supports pre-emption - * usimple non-blocking spinning lock, available in all - * kernel configurations; may be used from thread - * and interrupt contexts; supports debugging, - * statistics and pre-emption - * simple non-blocking spinning lock, intended for SMP - * synchronization (vanishes on a uniprocessor); - * supports debugging, statistics and pre-emption - * - * NOTES TO IMPLEMENTORS: there are essentially two versions - * of the lock package. One is portable, written in C, and - * supports all of the various flavors of debugging, statistics, - * uni- versus multi-processor, pre-emption, etc. The "other" - * is whatever set of lock routines is provided by machine-dependent - * code. Presumably, the machine-dependent package is heavily - * optimized and meant for production kernels. - * - * We encourage implementors to focus on highly-efficient, - * production implementations of machine-dependent lock code, - * and use the portable lock package for everything else. - */ - -/* - * All of the remaining locking constructs may have two versions. - * One version is machine-independent, built in C on top of the - * hw_lock construct. This version supports production, debugging - * and statistics configurations and is portable across architectures. - * - * Any particular port may override some or all of the portable - * lock package for whatever reason -- usually efficiency. - * - * The direct use of hw_locks by machine-independent Mach code - * should be rare; the preferred spinning lock is the simple_lock - * (see below). - */ - -/* - * A "simple" spin lock, providing non-blocking mutual - * exclusion and conditional acquisition. - * - * The usimple_lock exists even in uniprocessor configurations. - * A data structure is always allocated for it. - * - * The usimple_lock may be used for synchronization between - * thread context and interrupt context, or between a uniprocessor - * and an intelligent device. Obviously, it may also be used for - * multiprocessor synchronization. Its use should be rare; the - * simple_lock is the preferred spinning lock (see below). - * - * The usimple_lock supports optional lock debugging and statistics. - * - * The usimple_lock may be inlined or optimized in ways that - * depend on the particular machine architecture and kernel - * build configuration; e.g., processor type, number of CPUs, - * production v. debugging. - * - * Normally, we expect the usimple_lock data structure to be - * defined here, with its operations implemented in an efficient, - * machine-dependent way. However, any implementation may choose - * to rely on a C-based, portable version of the usimple_lock for - * debugging, statistics, and/or tracing. Three hooks are used in - * the portable lock package to allow the machine-dependent package - * to override some or all of the portable package's features. - * - * - * The usimple_lock data structure - * can be overriden in a machine-dependent way by defining - * LOCK_USIMPLE_DATA, although we expect this to be unnecessary. - * (Note that if you choose to override LOCK_USIMPLE_DATA, you'd - * better also be prepared to override LOCK_USIMPLE_CALLS.) - * - * The usimple_lock also handles pre-emption. Lock acquisition - * implies disabling pre-emption, while lock release implies - * re-enabling pre-emption. Conditional lock acquisition does - * not assume success: on success, pre-emption is disabled - * but on failure the pre-emption state remains the same as - * the pre-emption state before the acquisition attempt. - */ - -#ifndef USIMPLE_LOCK_DATA -#define USLOCK_DEBUG_DATA 1 /* Always allocate lock debug data for now */ -#if USLOCK_DEBUG_DATA -/* - * - * - * This structure records additional information about lock state - * and recent operations. The data are carefully organized so that - * some portions of it can be examined BEFORE actually acquiring - * the lock -- for instance, the lock_thread field, to detect an - * attempt to acquire a lock already owned by the calling thread. - * All *updates* to this structure are governed by the lock to which - * this structure belongs. - * - * Note cache consistency dependency: being able to examine some - * of the fields in this structure without first acquiring a lock - * implies strongly-ordered cache coherency OR release consistency. - * Perhaps needless to say, acquisition consistency may not suffice. - * However, it's hard to imagine a scenario using acquisition - * consistency that results in using stale data from this structure. - * It would be necessary for the thread manipulating the lock to - * switch to another processor without first executing any instructions - * that would cause the needed consistency updates; basically, without - * taking a lock. Not possible in this kernel! - */ -typedef struct uslock_debug { - void *lock_pc; /* pc where lock operation began */ - void *lock_thread; /* thread that acquired lock */ - unsigned long duration[2]; - unsigned short state; - unsigned char lock_cpu; - void *unlock_thread; /* last thread to release lock */ - unsigned char unlock_cpu; - void *unlock_pc; /* pc where lock operation ended */ -} uslock_debug; -#endif /* USLOCK_DEBUG_DATA */ - -typedef struct slock { - hw_lock_data_t interlock; /* must be first... see lock.c */ -#if USLOCK_DEBUG_DATA - unsigned short lock_type; /* must be second... see lock.c */ -#define USLOCK_TAG 0x5353 - uslock_debug debug; -#endif /* USLOCK_DEBUG_DATA */ -} usimple_lock_data_t, *usimple_lock_t; - -#define USIMPLE_LOCK_NULL ((usimple_lock_t) 0) - -#endif /* USIMPLE_LOCK_DATA */ - -/* - * Upon the usimple_lock we define the simple_lock, which - * exists for SMP configurations. These locks aren't needed - * in a uniprocessor configuration, so compile-time tricks - * make them disappear when NCPUS==1. (For debugging purposes, - * however, they can be enabled even on a uniprocessor.) This - * should be the "most popular" spinning lock; the usimple_lock - * and hw_lock should only be used in rare cases. - * - * IMPORTANT: simple_locks that may be shared between interrupt - * and thread context must have their use coordinated with spl. - * The spl level must alway be the same when acquiring the lock. - * Otherwise, deadlock may result. - * - * Given that, in some configurations, Mach does not need to - * allocate simple_lock data structures, users of simple_locks - * should employ the "decl_simple_lock_data" macro when allocating - * simple_locks. Note that it use should be something like - * decl_simple_lock_data(static,foo_lock) - * WITHOUT any terminating semi-colon. Because the macro expands - * to include its own semi-colon, if one is needed, it may safely - * be used multiple times at arbitrary positions within a structure. - * Adding a semi-colon will cause structure definitions to fail - * when locks are turned off and a naked semi-colon is left behind. - */ - -/* - * Decide whether to allocate simple_lock data structures. - * If the machine-dependent code has turned on LOCK_SIMPLE_DATA, - * then it assumes all responsibility. Otherwise, we need - * these data structures if the configuration includes SMP or - * lock debugging or statistics. - * - * N.B. Simple locks should be declared using - * decl_simple_lock_data(class,name) - * with no trailing semi-colon. This syntax works best because - * - it correctly disappears in production uniprocessor - * configurations, leaving behind no allocated data - * structure - * - it can handle static and extern declarations: - * decl_simple_lock_data(extern,foo) extern - * decl_simple_lock_data(static,foo) static - * decl_simple_lock_data(,foo) ordinary - */ - -#include - -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE - -#include -#include - -/* - * Turn on the uslock debug (internally to oskmk) when we are using the - * package and mach_ldebug build option is set. - */ -#if (MACH_LDEBUG) && !(defined(LOCK_SIMPLE_DATA)) -#define USLOCK_DEBUG 1 -#else -#define USLOCK_DEBUG 0 -#endif - -#if (defined(LOCK_SIMPLE_DATA) || ((NCPUS == 1) && !USLOCK_DEBUG )) -typedef usimple_lock_data_t *simple_lock_t; -#define decl_simple_lock_data(class,name) -#endif - -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ - -/* - * Outside the mach kernel component, and even within it on SMP or - * debug systems, simple locks are the same as usimple locks. - */ -#if !defined(decl_simple_lock_data) -typedef usimple_lock_data_t *simple_lock_t; -typedef usimple_lock_data_t simple_lock_data_t; - -#define decl_simple_lock_data(class,name) \ - class simple_lock_data_t name; - -#endif /* !defined(decl_simple_lock_data) */ - -#endif /* !_SIMPLE_LOCK_TYPES_H_ */ diff --git a/osfmk/kern/spl.h b/osfmk/kern/spl.h index e7553a211..f19a3fa59 100644 --- a/osfmk/kern/spl.h +++ b/osfmk/kern/spl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,6 +22,8 @@ #ifndef _KERN_SPL_H_ #define _KERN_SPL_H_ +#include + typedef unsigned spl_t; #define splhigh() (spl_t) ml_set_interrupts_enabled(FALSE) diff --git a/osfmk/kern/sscanf.c b/osfmk/kern/sscanf.c deleted file mode 100644 index ade695877..000000000 --- a/osfmk/kern/sscanf.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_FREE_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:56 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.2 1997/02/12 12:52:33 stephen - * New file, reimplemented to be sure of copyright status - * Initially only supports character matching and '%d' - * [1997/02/12 12:43:09 stephen] - * - * $EndLog$ - */ - -#include -#include - -#define isdigit(c) ((unsigned) ((c) - '0') < 10U) - -/* - * Scan items from a string in accordance with a format. This is much - * simpler than the C standard function: it only recognises %d without a - * field width, and does not treat space in the format string or the - * input any differently from other characters. The return value is the - * number of characters from the input string that were successfully - * scanned, not the number of format items matched as in standard sscanf. - * e.mcmanus@opengroup.org, 12 Feb 97 - */ -int -sscanf(const char *str, const char *format, ...) -{ - const char *start = str; - va_list args; - - va_start(args, format); - for ( ; *format != '\0'; format++) { - if (*format == '%' && format[1] == 'd') { - int positive; - int value; - int *valp; - - if (*str == '-') { - positive = 0; - str++; - } else - positive = 1; - if (!isdigit(*str)) - break; - value = 0; - do { - value = (value * 10) - (*str - '0'); - str++; - } while (isdigit(*str)); - if (positive) - value = -value; - valp = va_arg(args, int *); - *valp = value; - format++; - } else if (*format == *str) { - str++; - } else - break; - } - va_end(args); - return str - start; -} diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c new file mode 100644 index 000000000..530d7bff5 --- /dev/null +++ b/osfmk/kern/stack.c @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Kernel stack management routines. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +/* + * We allocate stacks from generic kernel VM. + * + * The stack_free_list can only be accessed at splsched, + * because stack_alloc_try/thread_invoke operate at splsched. + */ + +decl_simple_lock_data(static,stack_lock_data) +#define stack_lock() simple_lock(&stack_lock_data) +#define stack_unlock() simple_unlock(&stack_lock_data) + +#define STACK_CACHE_SIZE 2 + +static vm_map_t stack_map; +static vm_offset_t stack_free_list; + +static unsigned int stack_free_count, stack_free_hiwat; /* free list count */ +static unsigned int stack_total, stack_hiwat; /* current total count */ + +static unsigned int stack_free_target; +static int stack_free_delta; + +static unsigned int stack_new_count; /* total new stack allocations */ + +static vm_offset_t stack_addr_mask; + +/* + * The next field is at the base of the stack, + * so the low end is left unsullied. + */ +#define stack_next(stack) \ + (*((vm_offset_t *)((stack) + KERNEL_STACK_SIZE) - 1)) + +void +stack_init(void) +{ + vm_offset_t stacks, boundary; + vm_map_offset_t map_addr; + + simple_lock_init(&stack_lock_data, 0); + + if (KERNEL_STACK_SIZE < round_page(KERNEL_STACK_SIZE)) + panic("stack_init: stack size %d not a multiple of page size %d\n", KERNEL_STACK_SIZE, PAGE_SIZE); + + for (boundary = PAGE_SIZE; boundary <= KERNEL_STACK_SIZE; ) + boundary <<= 1; + + stack_addr_mask = boundary - 1; + + if (kmem_suballoc(kernel_map, &stacks, (boundary * (2 * THREAD_MAX + 64)), + FALSE, VM_FLAGS_ANYWHERE, &stack_map) != KERN_SUCCESS) + panic("stack_init: kmem_suballoc"); + + map_addr = vm_map_min(stack_map); + if (vm_map_enter(stack_map, &map_addr, vm_map_round_page(PAGE_SIZE), 0, VM_FLAGS_FIXED, + VM_OBJECT_NULL, 0, FALSE, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT) != KERN_SUCCESS) + panic("stack_init: vm_map_enter"); +} + +/* + * stack_alloc: + * + * Allocate a stack for a thread, may + * block. + */ +void +stack_alloc( + thread_t thread) +{ + vm_offset_t stack; + spl_t s; + + assert(thread->kernel_stack == 0); + + s = splsched(); + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + } + else { + if (++stack_total > stack_hiwat) + stack_hiwat = stack_total; + stack_new_count++; + } + stack_free_delta--; + stack_unlock(); + splx(s); + + if (stack == 0) { + if (kernel_memory_allocate(stack_map, &stack, KERNEL_STACK_SIZE, stack_addr_mask, KMA_KOBJECT) != KERN_SUCCESS) + panic("stack_alloc: kernel_memory_allocate"); + } + + machine_stack_attach(thread, stack); +} + +/* + * stack_free: + * + * Detach and free the stack for a thread. + */ +void +stack_free( + thread_t thread) +{ + vm_offset_t stack = machine_stack_detach(thread); + + assert(stack); + if (stack != thread->reserved_stack) { + struct stack_cache *cache; + spl_t s; + + s = splsched(); + cache = &PROCESSOR_DATA(current_processor(), stack_cache); + if (cache->count < STACK_CACHE_SIZE) { + stack_next(stack) = cache->free; + cache->free = stack; + cache->count++; + } + else { + stack_lock(); + stack_next(stack) = stack_free_list; + stack_free_list = stack; + if (++stack_free_count > stack_free_hiwat) + stack_free_hiwat = stack_free_count; + stack_free_delta++; + stack_unlock(); + } + splx(s); + } +} + +void +stack_free_stack( + vm_offset_t stack) +{ + struct stack_cache *cache; + spl_t s; + + s = splsched(); + cache = &PROCESSOR_DATA(current_processor(), stack_cache); + if (cache->count < STACK_CACHE_SIZE) { + stack_next(stack) = cache->free; + cache->free = stack; + cache->count++; + } + else { + stack_lock(); + stack_next(stack) = stack_free_list; + stack_free_list = stack; + if (++stack_free_count > stack_free_hiwat) + stack_free_hiwat = stack_free_count; + stack_free_delta++; + stack_unlock(); + } + splx(s); +} + +/* + * stack_alloc_try: + * + * Non-blocking attempt to allocate a + * stack for a thread. + * + * Returns TRUE on success. + * + * Called at splsched. + */ +boolean_t +stack_alloc_try( + thread_t thread) +{ + struct stack_cache *cache; + vm_offset_t stack; + + cache = &PROCESSOR_DATA(current_processor(), stack_cache); + stack = cache->free; + if (stack != 0) { + cache->free = stack_next(stack); + cache->count--; + } + else { + if (stack_free_list != 0) { + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + stack_free_delta--; + } + stack_unlock(); + } + } + + if (stack != 0 || (stack = thread->reserved_stack) != 0) { + machine_stack_attach(thread, stack); + return (TRUE); + } + + return (FALSE); +} + +static unsigned int stack_collect_tick, last_stack_tick; + +/* + * stack_collect: + * + * Free excess kernel stacks, may + * block. + */ +void +stack_collect(void) +{ + if (stack_collect_tick != last_stack_tick) { + unsigned int target; + vm_offset_t stack; + spl_t s; + + s = splsched(); + stack_lock(); + + target = stack_free_target + (STACK_CACHE_SIZE * processor_count); + target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; + + while (stack_free_count > target) { + stack = stack_free_list; + stack_free_list = stack_next(stack); + stack_free_count--; stack_total--; + stack_unlock(); + splx(s); + + if (vm_map_remove(stack_map, vm_map_trunc_page(stack), + vm_map_round_page(stack + KERNEL_STACK_SIZE), VM_MAP_REMOVE_KUNWIRE) != KERN_SUCCESS) + panic("stack_collect: vm_map_remove"); + + s = splsched(); + stack_lock(); + + target = stack_free_target + (STACK_CACHE_SIZE * processor_count); + target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; + } + + last_stack_tick = stack_collect_tick; + + stack_unlock(); + splx(s); + } +} + +/* + * compute_stack_target: + * + * Computes a new target free list count + * based on recent alloc / free activity. + * + * Limits stack collection to once per + * computation period. + */ +void +compute_stack_target( +__unused void *arg) +{ + spl_t s; + + s = splsched(); + stack_lock(); + + if (stack_free_target > 5) + stack_free_target = (4 * stack_free_target) / 5; + else + if (stack_free_target > 0) + stack_free_target--; + + stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta; + + stack_free_delta = 0; + stack_collect_tick++; + + stack_unlock(); + splx(s); +} + +void +stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, + vm_size_t *alloc_size, int *collectable, int *exhaustable) +{ + unsigned int total, hiwat, free; + spl_t s; + + s = splsched(); + stack_lock(); + total = stack_total; + hiwat = stack_hiwat; + free = stack_free_count; + stack_unlock(); + splx(s); + + *count = total - free; + *cur_size = KERNEL_STACK_SIZE * total; + *max_size = KERNEL_STACK_SIZE * hiwat; + *elem_size = KERNEL_STACK_SIZE; + *alloc_size = KERNEL_STACK_SIZE; + *collectable = 1; + *exhaustable = 0; +} + +/* OBSOLETE */ +void stack_privilege( + thread_t thread); + +void +stack_privilege( + __unused thread_t thread) +{ + /* OBSOLETE */ +} + +/* + * Return info on stack usage for threads in a specific processor set + */ +kern_return_t +processor_set_stack_usage( + processor_set_t pset, + unsigned int *totalp, + vm_size_t *spacep, + vm_size_t *residentp, + vm_size_t *maxusagep, + vm_offset_t *maxstackp) +{ +#if !MACH_DEBUG + return KERN_NOT_SUPPORTED; +#else + unsigned int total; + vm_size_t maxusage; + vm_offset_t maxstack; + + register thread_t *threads; + register thread_t thread; + + unsigned int actual; /* this many things */ + unsigned int i; + + vm_size_t size, size_needed; + void *addr; + + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + return KERN_INVALID_ARGUMENT; + } + + actual = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(thread_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the processor_set is locked & active */ + threads = (thread_t *) addr; + for (i = 0, thread = (thread_t) queue_first(&pset->threads); + !queue_end(&pset->threads, (queue_entry_t) thread); + thread = (thread_t) queue_next(&thread->pset_threads)) { + thread_reference_internal(thread); + threads[i++] = thread; + } + assert(i <= actual); + + /* can unlock processor set now that we have the thread refs */ + pset_unlock(pset); + + /* calculate maxusage and free thread references */ + + total = 0; + maxusage = 0; + maxstack = 0; + while (i > 0) { + thread_t threadref = threads[--i]; + + if (threadref->kernel_stack != 0) + total++; + + thread_deallocate(threadref); + } + + if (size != 0) + kfree(addr, size); + + *totalp = total; + *residentp = *spacep = total * round_page(KERNEL_STACK_SIZE); + *maxusagep = maxusage; + *maxstackp = maxstack; + return KERN_SUCCESS; + +#endif /* MACH_DEBUG */ +} + +vm_offset_t min_valid_stack_address(void) +{ + return vm_map_min(stack_map); +} + +vm_offset_t max_valid_stack_address(void) +{ + return vm_map_max(stack_map); +} diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index c6668bd8c..65f9d00f8 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,13 +57,12 @@ #include #include #include -#include #include #include -#include #include #include +#include #include #include #include @@ -71,18 +70,17 @@ #include #include #include -#include +#include #include #include #include -#include #include #include #include #include -#include #include #include +#include #include #include #include @@ -92,32 +90,29 @@ #include #include #include -#include +#include #ifdef __ppc__ #include #include +#include #endif -/* Externs XXX */ -extern void rtclock_reset(void); +static void kernel_bootstrap_thread(void); -/* Forwards */ -void cpu_launch_first_thread( - thread_t thread); -void start_kernel_threads(void); +static void load_context( + thread_t thread); /* * Running in virtual memory, on the interrupt stack. - * Does not return. Dispatches initial thread. - * - * Assumes that master_cpu is set. */ void -setup_main(void) +kernel_bootstrap(void) { - thread_t startup_thread; + kern_return_t result; + thread_t thread; + lck_mod_init(); sched_init(); vm_mem_bootstrap(); ipc_bootstrap(); @@ -130,22 +125,16 @@ setup_main(void) */ PMAP_ACTIVATE_KERNEL(master_cpu); -#ifdef __ppc__ mapping_free_prime(); /* Load up with temporary mapping blocks */ -#endif machine_init(); kmod_init(); clock_init(); - init_timers(); - timer_call_initialize(); - - machine_info.max_cpus = NCPUS; machine_info.memory_size = mem_size; - machine_info.avail_cpus = 0; - machine_info.major_version = KERNEL_MAJOR_VERSION; - machine_info.minor_version = KERNEL_MINOR_VERSION; + machine_info.max_mem = max_mem; + machine_info.major_version = version_major; + machine_info.minor_version = version_minor; /* * Initialize the IPC, task, and thread subsystems. @@ -153,89 +142,54 @@ setup_main(void) ledger_init(); task_init(); thread_init(); - - /* - * Initialize the Event Trace Analysis Package. - * Dynamic Phase: 2 of 2 - */ - etap_init_phase2(); /* - * Create a kernel thread to start the other kernel - * threads. + * Create a kernel thread to execute the kernel bootstrap. */ - startup_thread = kernel_thread_create(start_kernel_threads, MAXPRI_KERNEL); + result = kernel_thread_create((thread_continue_t)kernel_bootstrap_thread, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + panic("kernel_bootstrap"); - /* - * Start the thread. - */ - startup_thread->state = TH_RUN; - pset_run_incr(startup_thread->processor_set); + thread->state = TH_RUN; + thread_deallocate(thread); - cpu_launch_first_thread(startup_thread); + load_context(thread); /*NOTREACHED*/ - panic("cpu_launch_first_thread returns!"); } /* - * Now running in a thread. Create the rest of the kernel threads - * and the bootstrap task. + * Now running in a thread. Kick off other services, + * invoke user bootstrap, enter pageout loop. */ -void -start_kernel_threads(void) +static void +kernel_bootstrap_thread(void) { - register int i; - - thread_bind(current_thread(), cpu_to_processor(cpu_number())); + processor_t processor = current_processor(); + thread_t self = current_thread(); /* - * Create the idle threads and the other - * service threads. + * Create the idle processor thread. */ - for (i = 0; i < NCPUS; i++) { - processor_t processor = cpu_to_processor(i); - thread_t thread; - spl_t s; - - thread = kernel_thread_create(idle_thread, MAXPRI_KERNEL); - - s = splsched(); - thread_lock(thread); - thread->bound_processor = processor; - processor->idle_thread = thread; - thread->ref_count++; - thread->sched_pri = thread->priority = IDLEPRI; - thread->state = (TH_RUN | TH_IDLE); - thread_unlock(thread); - splx(s); - } - - /* - * Initialize the thread reaper mechanism. - */ - thread_reaper_init(); - - /* - * Initialize the stack swapin mechanism. - */ - swapin_init(); + idle_thread_create(processor); /* - * Initialize the periodic scheduler mechanism. + * N.B. Do not stick anything else + * before this point. + * + * Start up the scheduler services. */ - sched_tick_init(); + sched_startup(); /* - * Initialize the thread callout mechanism. + * Remain on current processor as + * additional processors come online. */ - thread_call_initialize(); + thread_bind(self, processor); /* - * Invoke some black magic. + * Kick off memory mapping adjustments. */ -#if __ppc__ mapping_adjust(); -#endif /* * Create the clock service. @@ -247,7 +201,7 @@ start_kernel_threads(void) */ device_service_create(); - shared_file_boot_time_init(ENV_DEFAULT_ROOT, machine_slot[cpu_number()].cpu_type); + shared_file_boot_time_init(ENV_DEFAULT_ROOT, cpu_type()); #ifdef IOKIT { @@ -265,10 +219,8 @@ start_kernel_threads(void) /* * Start the user bootstrap. */ - #ifdef MACH_BSD { - extern void bsd_init(void); bsd_init(); } #endif @@ -277,75 +229,114 @@ start_kernel_threads(void) serial_keyboard_init(); /* Start serial keyboard if wanted */ #endif - thread_bind(current_thread(), PROCESSOR_NULL); + thread_bind(self, PROCESSOR_NULL); /* * Become the pageout daemon. */ - vm_pageout(); /*NOTREACHED*/ } +/* + * slave_main: + * + * Load the first thread to start a processor. + */ void slave_main(void) { - processor_t myprocessor = current_processor(); + processor_t processor = current_processor(); thread_t thread; - thread = myprocessor->next_thread; - myprocessor->next_thread = THREAD_NULL; - if (thread == THREAD_NULL) { - thread = machine_wake_thread; - machine_wake_thread = THREAD_NULL; + /* + * Use the idle processor thread if there + * is no dedicated start up thread. + */ + if (processor->next_thread == THREAD_NULL) { + thread = processor->idle_thread; + thread->continuation = (thread_continue_t)processor_start_thread; + thread->parameter = NULL; + } + else { + thread = processor->next_thread; + processor->next_thread = THREAD_NULL; } - cpu_launch_first_thread(thread); + load_context(thread); /*NOTREACHED*/ - panic("slave_main"); } /* - * Now running in a thread context + * processor_start_thread: + * + * First thread to execute on a started processor. + * + * Called at splsched. */ void -start_cpu_thread(void) +processor_start_thread(void) { + processor_t processor = current_processor(); + thread_t self = current_thread(); + slave_machine_init(); - (void) thread_terminate(current_act()); + /* + * If running the idle processor thread, + * reenter the idle loop, else terminate. + */ + if (self == processor->idle_thread) + thread_block((thread_continue_t)idle_thread); + + thread_terminate(self); + /*NOTREACHED*/ } /* - * Start up the first thread on a CPU. + * load_context: + * + * Start the first thread on a processor. */ -void -cpu_launch_first_thread( +static void +load_context( thread_t thread) { - register int mycpu = cpu_number(); - processor_t processor = cpu_to_processor(mycpu); + processor_t processor = current_processor(); - clock_get_uptime(&processor->last_dispatch); - start_timer(&kernel_timer[mycpu]); - machine_thread_set_current(thread); - cpu_up(mycpu); + machine_set_current_thread(thread); + processor_up(processor); - rtclock_reset(); /* start realtime clock ticking */ - PMAP_ACTIVATE_KERNEL(mycpu); + PMAP_ACTIVATE_KERNEL(PROCESSOR_DATA(processor, slot_num)); + + /* + * Acquire a stack if none attached. The panic + * should never occur since the thread is expected + * to have reserved stack. + */ + if (!thread->kernel_stack) { + if (!stack_alloc_try(thread)) + panic("load_context"); + } + + /* + * The idle processor threads are not counted as + * running for load calculations. + */ + if (!(thread->state & TH_IDLE)) + pset_run_incr(thread->processor_set); - thread_lock(thread); - thread->state &= ~TH_UNINT; - thread->last_processor = processor; processor->active_thread = thread; processor->current_pri = thread->sched_pri; - _mk_sp_thread_begin(thread, processor); - thread_unlock(thread); - timer_switch(&thread->system_timer); + processor->deadline = UINT64_MAX; + thread->last_processor = processor; + + processor->last_dispatch = mach_absolute_time(); + timer_switch((uint32_t)processor->last_dispatch, + &PROCESSOR_DATA(processor, offline_timer)); - PMAP_ACTIVATE_USER(thread->top_act, mycpu); + PMAP_ACTIVATE_USER(thread, PROCESSOR_DATA(processor, slot_num)); - /* preemption enabled by load_context */ machine_load_context(thread); /*NOTREACHED*/ } diff --git a/osfmk/kern/startup.h b/osfmk/kern/startup.h index bbd76a447..45689973e 100644 --- a/osfmk/kern/startup.h +++ b/osfmk/kern/startup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,50 +22,25 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:56 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.7.1 1994/09/23 02:26:54 ezf - * change marker to not FREE - * [1994/09/22 21:36:18 ezf] - * - * Revision 1.1.2.3 1993/08/16 18:08:47 bernard - * Clean up MP configuration warnings - CR#9523 - * [1993/08/13 15:29:52 bernard] - * - * Revision 1.1.2.2 1993/08/11 18:04:28 bernard - * Fixed to use machine include file ANSI prototypes - CR#9523 - * [1993/08/11 16:28:27 bernard] - * - * Second pass fixes for ANSI prototypes - CR#9523 - * [1993/08/11 14:20:54 bernard] - * - * $EndLog$ - */ + +#ifdef XNU_KERNEL_PRIVATE #ifndef _KERN_STARTUP_H_ #define _KERN_STARTUP_H_ -#include +#include +__BEGIN_DECLS /* * Kernel and machine startup declarations */ /* Initialize kernel */ -extern void setup_main(void); +extern void kernel_bootstrap(void); /* Initialize machine dependent stuff */ extern void machine_init(void); -#if NCPUS > 1 - extern void slave_main(void); /* @@ -75,8 +50,18 @@ extern void slave_main(void); /* Slave cpu initialization */ extern void slave_machine_init(void); -/* Start slave processors */ -extern void start_other_cpus(void); +/* Device subystem initialization */ +extern void device_service_create(void); + +#ifdef MACH_BSD + +/* BSD subsystem initialization */ +extern void bsd_init(void); + +#endif /* MACH_BSD */ + +__END_DECLS -#endif /* NCPUS > 1 */ #endif /* _KERN_STARTUP_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/kern/sync_lock.c b/osfmk/kern/sync_lock.c index 9acae3739..027754519 100644 --- a/osfmk/kern/sync_lock.c +++ b/osfmk/kern/sync_lock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,13 +30,16 @@ * Contains RT distributed lock synchronization services. */ -#include +#include +#include +#include + #include +#include #include #include #include #include -#include #include #include @@ -51,28 +54,26 @@ #define ulock_ownership_set(ul, th) \ MACRO_BEGIN \ - thread_act_t _th_act; \ - _th_act = (th)->top_act; \ - act_lock(_th_act); \ - enqueue (&_th_act->held_ulocks, (queue_entry_t) (ul)); \ - act_unlock(_th_act); \ - (ul)->holder = _th_act; \ + thread_mtx_lock(th); \ + enqueue (&th->held_ulocks, (queue_entry_t) (ul)); \ + thread_mtx_unlock(th); \ + (ul)->holder = th; \ MACRO_END #define ulock_ownership_clear(ul) \ MACRO_BEGIN \ - thread_act_t _th_act; \ - _th_act = (ul)->holder; \ - if (_th_act->active) { \ - act_lock(_th_act); \ - remqueue(&_th_act->held_ulocks, \ + thread_t th; \ + th = (ul)->holder; \ + if (th->active) { \ + thread_mtx_lock(th); \ + remqueue(&th->held_ulocks, \ (queue_entry_t) (ul)); \ - act_unlock(_th_act); \ + thread_mtx_unlock(th); \ } else { \ - remqueue(&_th_act->held_ulocks, \ + remqueue(&th->held_ulocks, \ (queue_entry_t) (ul)); \ } \ - (ul)->holder = THR_ACT_NULL; \ + (ul)->holder = THREAD_NULL; \ MACRO_END /* @@ -172,7 +173,7 @@ lock_set_create ( ulock = (ulock_t) &lock_set->ulock_list[x]; ulock_lock_init(ulock); ulock->lock_set = lock_set; - ulock->holder = THR_ACT_NULL; + ulock->holder = THREAD_NULL; ulock->blocked = FALSE; ulock->unstable = FALSE; ulock->ho_wait = FALSE; @@ -201,7 +202,6 @@ lock_set_create ( kern_return_t lock_set_destroy (task_t task, lock_set_t lock_set) { - thread_t thread; ulock_t ulock; int i; @@ -306,10 +306,10 @@ lock_acquire (lock_set_t lock_set, int lock_id) * Block the current thread if the lock is already held. */ - if (ulock->holder != THR_ACT_NULL) { + if (ulock->holder != THREAD_NULL) { int wait_result; - if (ulock->holder == current_act()) { + if (ulock->holder == current_thread()) { ulock_unlock(ulock); return KERN_LOCK_OWNED_SELF; } @@ -317,7 +317,7 @@ lock_acquire (lock_set_t lock_set, int lock_id) ulock->blocked = TRUE; wait_result = wait_queue_assert_wait64(&ulock->wait_queue, LOCK_SET_EVENT, - THREAD_ABORTSAFE); + THREAD_ABORTSAFE, 0); ulock_unlock(ulock); /* @@ -372,7 +372,7 @@ lock_release (lock_set_t lock_set, int lock_id) ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - return (lock_release_internal(ulock, current_act())); + return (ulock_release_internal(ulock, current_thread())); } kern_return_t @@ -405,10 +405,10 @@ lock_try (lock_set_t lock_set, int lock_id) * whether it already holds the lock or another thread does. */ - if (ulock->holder != THR_ACT_NULL) { + if (ulock->holder != THREAD_NULL) { lock_set_unlock(lock_set); - if (ulock->holder == current_act()) { + if (ulock->holder == current_thread()) { ulock_unlock(ulock); return KERN_LOCK_OWNED_SELF; } @@ -450,7 +450,7 @@ lock_make_stable (lock_set_t lock_set, int lock_id) ulock_lock(ulock); lock_set_unlock(lock_set); - if (ulock->holder != current_act()) { + if (ulock->holder != current_thread()) { ulock_unlock(ulock); return KERN_INVALID_RIGHT; } @@ -471,11 +471,10 @@ lock_make_stable (lock_set_t lock_set, int lock_id) * KERN_LOCK_UNSTABLE status, until the lock is made stable again. */ kern_return_t -lock_make_unstable (ulock_t ulock, thread_act_t thr_act) +lock_make_unstable (ulock_t ulock, thread_t thread) { lock_set_t lock_set; - lock_set = ulock->lock_set; lock_set_lock(lock_set); if (!lock_set->active) { @@ -486,7 +485,7 @@ lock_make_unstable (ulock_t ulock, thread_act_t thr_act) ulock_lock(ulock); lock_set_unlock(lock_set); - if (ulock->holder != thr_act) { + if (ulock->holder != thread) { ulock_unlock(ulock); return KERN_INVALID_RIGHT; } @@ -498,18 +497,16 @@ lock_make_unstable (ulock_t ulock, thread_act_t thr_act) } /* - * ROUTINE: lock_release_internal [internal] + * ROUTINE: ulock_release_internal [internal] * * Releases the ulock. * If any threads are blocked waiting for the ulock, one is woken-up. * */ kern_return_t -lock_release_internal (ulock_t ulock, thread_act_t thr_act) +ulock_release_internal (ulock_t ulock, thread_t thread) { lock_set_t lock_set; - int result; - if ((lock_set = ulock->lock_set) == LOCK_SET_NULL) return KERN_INVALID_ARGUMENT; @@ -522,7 +519,7 @@ lock_release_internal (ulock_t ulock, thread_act_t thr_act) ulock_lock(ulock); lock_set_unlock(lock_set); - if (ulock->holder != thr_act) { + if (ulock->holder != thread) { ulock_unlock(ulock); return KERN_INVALID_RIGHT; } @@ -534,18 +531,18 @@ lock_release_internal (ulock_t ulock, thread_act_t thr_act) */ if (ulock->blocked) { wait_queue_t wq = &ulock->wait_queue; - thread_t thread; + thread_t wqthread; spl_t s; s = splsched(); wait_queue_lock(wq); - thread = wait_queue_wakeup64_identity_locked(wq, + wqthread = wait_queue_wakeup64_identity_locked(wq, LOCK_SET_EVENT, THREAD_AWAKENED, TRUE); /* wait_queue now unlocked, thread locked */ - if (thread != THREAD_NULL) { + if (wqthread != THREAD_NULL) { /* * JMM - These ownership transfer macros have a * locking/race problem. To keep the thread from @@ -556,7 +553,7 @@ lock_release_internal (ulock_t ulock, thread_act_t thr_act) * Since this code was already broken before I got * here, I will leave it for now. */ - thread_unlock(thread); + thread_unlock(wqthread); splx(s); /* @@ -564,7 +561,7 @@ lock_release_internal (ulock_t ulock, thread_act_t thr_act) * from the current thread to the acquisition thread. */ ulock_ownership_clear(ulock); - ulock_ownership_set(ulock, thread); + ulock_ownership_set(ulock, wqthread); ulock_unlock(ulock); return KERN_SUCCESS; @@ -608,7 +605,7 @@ lock_handoff (lock_set_t lock_set, int lock_id) ulock_lock(ulock); lock_set_unlock(lock_set); - if (ulock->holder != current_act()) { + if (ulock->holder != current_thread()) { ulock_unlock(ulock); return KERN_INVALID_RIGHT; } @@ -646,7 +643,8 @@ lock_handoff (lock_set_t lock_set, int lock_id) * changing states on us (nullifying the ownership * assignment) we need to keep the thread locked * during the assignment. But we can't because the - * macros take an activation lock, which is a mutex. + * macros take a thread mutex lock. + * * Since this code was already broken before I got * here, I will leave it for now. */ @@ -679,7 +677,7 @@ lock_handoff (lock_set_t lock_set, int lock_id) ulock->ho_wait = TRUE; wait_result = wait_queue_assert_wait64(&ulock->wait_queue, LOCK_SET_HANDOFF, - THREAD_ABORTSAFE); + THREAD_ABORTSAFE, 0); ulock_unlock(ulock); if (wait_result == THREAD_WAITING) @@ -697,17 +695,17 @@ lock_handoff (lock_set_t lock_set, int lock_id) case THREAD_INTERRUPTED: ulock_lock(ulock); - assert(ulock->holder == current_act()); + assert(ulock->holder == current_thread()); ulock->ho_wait = FALSE; ulock_unlock(ulock); return KERN_ABORTED; case THREAD_RESTART: goto retry; - - default: - panic("lock_handoff"); } + + panic("lock_handoff"); + return KERN_FAILURE; } kern_return_t @@ -743,7 +741,7 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id) return KERN_ALREADY_WAITING; } - if (ulock->holder == current_act()) { + if (ulock->holder == current_thread()) { ulock_unlock(ulock); return KERN_LOCK_OWNED_SELF; } @@ -755,17 +753,15 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id) */ if (ulock->ho_wait) { wait_queue_t wq = &ulock->wait_queue; - thread_t thread; /* * See who the lucky devil is, if he is still there waiting. */ - assert(ulock->holder != THR_ACT_NULL); - thread = ulock->holder->thread; + assert(ulock->holder != THREAD_NULL); if (wait_queue_wakeup64_thread(wq, LOCK_SET_HANDOFF, - thread, + ulock->holder, THREAD_AWAKENED) == KERN_SUCCESS) { /* * Holder thread was still waiting to give it @@ -790,7 +786,7 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id) ulock->accept_wait = TRUE; wait_result = wait_queue_assert_wait64(&ulock->wait_queue, LOCK_SET_HANDOFF, - THREAD_ABORTSAFE); + THREAD_ABORTSAFE, 0); ulock_unlock(ulock); if (wait_result == THREAD_WAITING) @@ -814,10 +810,10 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id) case THREAD_RESTART: goto retry; - - default: - panic("lock_handoff_accept"); } + + panic("lock_handoff_accept"); + return KERN_FAILURE; } /* @@ -853,6 +849,19 @@ lock_set_dereference(lock_set_t lock_set) if (ref_count == 0) { size = sizeof(struct lock_set) + (sizeof(struct ulock) * (lock_set->n_ulocks - 1)); - kfree((vm_offset_t) lock_set, size); + kfree(lock_set, size); + } +} + +void +ulock_release_all( + thread_t thread) +{ + ulock_t ulock; + + while (!queue_empty(&thread->held_ulocks)) { + ulock = (ulock_t)queue_first(&thread->held_ulocks); + lock_make_unstable(ulock, thread); + ulock_release_internal(ulock, thread); } } diff --git a/osfmk/kern/sync_lock.h b/osfmk/kern/sync_lock.h index 99f2d098c..a746d58a9 100644 --- a/osfmk/kern/sync_lock.h +++ b/osfmk/kern/sync_lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,10 +35,6 @@ #include -#include - -#ifdef __APPLE_API_PRIVATE - #ifdef MACH_KERNEL_PRIVATE #include @@ -54,7 +50,7 @@ typedef struct ulock { decl_mutex_data(,lock) /* ulock lock */ struct lock_set *lock_set; /* the retaining lock set */ - thread_act_t holder; /* thread_act that holds the lock */ + thread_t holder; /* thread that holds the lock */ unsigned int /* flags */ /* boolean_t */ blocked:1, /* did threads block waiting? */ /* boolean_t */ unstable:1, /* unstable? (holder died) */ @@ -89,60 +85,30 @@ typedef struct lock_set { * Data structure internal lock macros */ -#define lock_set_lock_init(ls) mutex_init(&(ls)->lock, \ - ETAP_THREAD_LOCK_SET) +#define lock_set_lock_init(ls) mutex_init(&(ls)->lock, 0) #define lock_set_lock(ls) mutex_lock(&(ls)->lock) #define lock_set_unlock(ls) mutex_unlock(&(ls)->lock) -#define ulock_lock_init(ul) mutex_init(&(ul)->lock, \ - ETAP_THREAD_ULOCK) +#define ulock_lock_init(ul) mutex_init(&(ul)->lock, 0) #define ulock_lock(ul) mutex_lock(&(ul)->lock) #define ulock_unlock(ul) mutex_unlock(&(ul)->lock) extern void lock_set_init(void); -extern kern_return_t lock_release_internal (ulock_t ulock, - thread_act_t thr_act); - -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ - - -/* - * Forward Declarations - */ - -extern kern_return_t lock_set_create (task_t task, - lock_set_t *new_lock_set, - int n_ulocks, - int policy); - -extern kern_return_t lock_set_destroy (task_t task, - lock_set_t lock_set); - -extern kern_return_t lock_acquire (lock_set_t lock_set, - int lock_id); - -extern kern_return_t lock_release (lock_set_t lock_set, - int lock_id); - -extern kern_return_t lock_try (lock_set_t lock_set, - int lock_id); +extern kern_return_t ulock_release_internal( + ulock_t ulock, + thread_t thread); -extern kern_return_t lock_make_stable (lock_set_t lock_set, - int lock_id); +extern kern_return_t lock_make_unstable( + ulock_t ulock, + thread_t thread); -extern kern_return_t lock_make_unstable (ulock_t ulock, - thread_act_t thr_act); - -extern kern_return_t lock_handoff (lock_set_t lock_set, - int lock_id); - -extern kern_return_t lock_handoff_accept (lock_set_t lock_set, - int lock_id); +extern void ulock_release_all( + thread_t thread); extern void lock_set_reference (lock_set_t lock_set); extern void lock_set_dereference (lock_set_t lock_set); +#endif /* MACH_KERNEL_PRIVATE */ + #endif /* _KERN_SYNC_LOCK_H_ */ diff --git a/osfmk/kern/sync_sema.c b/osfmk/kern/sync_sema.c index a112422a4..dc22aaa4e 100644 --- a/osfmk/kern/sync_sema.c +++ b/osfmk/kern/sync_sema.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,9 +31,11 @@ */ #include +#include #include #include #include +#include #include #include @@ -56,6 +58,56 @@ static unsigned int semaphore_event; zone_t semaphore_zone; unsigned int semaphore_max = SEMAPHORE_MAX; +/* Forward declarations */ + + +kern_return_t +semaphore_wait_trap_internal( + mach_port_name_t name, + void (*caller_cont)(kern_return_t)); + +kern_return_t +semaphore_wait_signal_trap_internal( + mach_port_name_t wait_name, + mach_port_name_t signal_name, + void (*caller_cont)(kern_return_t)); + +kern_return_t +semaphore_timedwait_trap_internal( + mach_port_name_t name, + unsigned int sec, + clock_res_t nsec, + void (*caller_cont)(kern_return_t)); + +kern_return_t +semaphore_timedwait_signal_trap_internal( + mach_port_name_t wait_name, + mach_port_name_t signal_name, + unsigned int sec, + clock_res_t nsec, + void (*caller_cont)(kern_return_t)); + + +kern_return_t +semaphore_signal_internal( + semaphore_t semaphore, + thread_t thread, + int options); + +kern_return_t +semaphore_convert_wait_result( + int wait_result); + +void +semaphore_wait_continue(void); + +kern_return_t +semaphore_wait_internal( + semaphore_t wait_semaphore, + semaphore_t signal_semaphore, + mach_timespec_t *wait_timep, + void (*caller_cont)(kern_return_t)); + /* * ROUTINE: semaphore_init [private] * @@ -152,7 +204,6 @@ semaphore_destroy( semaphore_t semaphore) { int old_count; - thread_t thread; spl_t spl_level; @@ -218,8 +269,8 @@ semaphore_destroy( kern_return_t semaphore_signal_internal( semaphore_t semaphore, - thread_act_t thread_act, - int options) + thread_t thread, + int options) { kern_return_t kr; spl_t spl_level; @@ -233,12 +284,12 @@ semaphore_signal_internal( return KERN_TERMINATED; } - if (thread_act != THR_ACT_NULL) { + if (thread != THREAD_NULL) { if (semaphore->count < 0) { kr = wait_queue_wakeup64_thread_locked( &semaphore->wait_queue, SEMAPHORE_EVENT, - thread_act->thread, + thread, THREAD_AWAKENED, TRUE); /* unlock? */ } else { @@ -294,15 +345,15 @@ semaphore_signal_internal( /* * Routine: semaphore_signal_thread * - * If the specified thread_act is blocked on the semaphore, it is - * woken up. If a NULL thread_act was supplied, then any one + * If the specified thread is blocked on the semaphore, it is + * woken up. If a NULL thread was supplied, then any one * thread is woken up. Otherwise the caller gets KERN_NOT_WAITING * and the semaphore is unchanged. */ kern_return_t semaphore_signal_thread( semaphore_t semaphore, - thread_act_t thread_act) + thread_t thread) { kern_return_t ret; @@ -310,7 +361,7 @@ semaphore_signal_thread( return KERN_INVALID_ARGUMENT; ret = semaphore_signal_internal(semaphore, - thread_act, + thread, SEMAPHORE_OPTION_NONE); return ret; } @@ -322,12 +373,12 @@ semaphore_signal_thread( */ kern_return_t semaphore_signal_thread_trap( - mach_port_name_t sema_name, - mach_port_name_t thread_name) + struct semaphore_signal_thread_trap_args *args) { - + mach_port_name_t sema_name = args->signal_name; + mach_port_name_t thread_name = args->thread_name; semaphore_t semaphore; - thread_act_t thread_act; + thread_t thread; kern_return_t kr; /* @@ -336,22 +387,22 @@ semaphore_signal_thread_trap( * pre-post the semaphore. */ if (thread_name != MACH_PORT_NULL) { - thread_act = port_name_to_act(thread_name); - if (thread_act == THR_ACT_NULL) + thread = port_name_to_thread(thread_name); + if (thread == THREAD_NULL) return KERN_INVALID_ARGUMENT; } else - thread_act = THR_ACT_NULL; + thread = THREAD_NULL; kr = port_name_to_semaphore(sema_name, &semaphore); - if (kr != KERN_SUCCESS) { - act_deallocate(thread_act); - return kr; + if (kr == KERN_SUCCESS) { + kr = semaphore_signal_internal(semaphore, + thread, + SEMAPHORE_OPTION_NONE); + semaphore_dereference(semaphore); + } + if (thread != THREAD_NULL) { + thread_deallocate(thread); } - kr = semaphore_signal_internal(semaphore, - thread_act, - SEMAPHORE_OPTION_NONE); - semaphore_dereference(semaphore); - act_deallocate(thread_act); return kr; } @@ -378,7 +429,7 @@ semaphore_signal( return KERN_INVALID_ARGUMENT; kr = semaphore_signal_internal(semaphore, - THR_ACT_NULL, + THREAD_NULL, SEMAPHORE_SIGNAL_PREPOST); if (kr == KERN_NOT_WAITING) return KERN_SUCCESS; @@ -392,22 +443,21 @@ semaphore_signal( */ kern_return_t semaphore_signal_trap( - mach_port_name_t sema_name) + struct semaphore_signal_trap_args *args) { - + mach_port_name_t sema_name = args->signal_name; semaphore_t semaphore; kern_return_t kr; kr = port_name_to_semaphore(sema_name, &semaphore); - if (kr != KERN_SUCCESS) { - return kr; + if (kr == KERN_SUCCESS) { + kr = semaphore_signal_internal(semaphore, + THREAD_NULL, + SEMAPHORE_SIGNAL_PREPOST); + semaphore_dereference(semaphore); + if (kr == KERN_NOT_WAITING) + kr = KERN_SUCCESS; } - kr = semaphore_signal_internal(semaphore, - THR_ACT_NULL, - SEMAPHORE_SIGNAL_PREPOST); - semaphore_dereference(semaphore); - if (kr == KERN_NOT_WAITING) - return KERN_SUCCESS; return kr; } @@ -427,7 +477,7 @@ semaphore_signal_all( return KERN_INVALID_ARGUMENT; kr = semaphore_signal_internal(semaphore, - THR_ACT_NULL, + THREAD_NULL, SEMAPHORE_SIGNAL_ALL); if (kr == KERN_NOT_WAITING) return KERN_SUCCESS; @@ -441,22 +491,21 @@ semaphore_signal_all( */ kern_return_t semaphore_signal_all_trap( - mach_port_name_t sema_name) + struct semaphore_signal_all_trap_args *args) { - + mach_port_name_t sema_name = args->signal_name; semaphore_t semaphore; kern_return_t kr; kr = port_name_to_semaphore(sema_name, &semaphore); - if (kr != KERN_SUCCESS) { - return kr; + if (kr == KERN_SUCCESS) { + kr = semaphore_signal_internal(semaphore, + THREAD_NULL, + SEMAPHORE_SIGNAL_ALL); + semaphore_dereference(semaphore); + if (kr == KERN_NOT_WAITING) + kr = KERN_SUCCESS; } - kr = semaphore_signal_internal(semaphore, - THR_ACT_NULL, - SEMAPHORE_SIGNAL_ALL); - semaphore_dereference(semaphore); - if (kr == KERN_NOT_WAITING) - return KERN_SUCCESS; return kr; } @@ -511,33 +560,6 @@ semaphore_wait_continue(void) (*caller_cont)(semaphore_convert_wait_result(wait_result)); } -/* - * Routine: semaphore_timedwait_continue - * - * Common continuation routine after doing a timed wait on a - * semaphore. It clears the timer before calling the semaphore - * routine saved in the thread struct. - */ -void -semaphore_timedwait_continue(void) -{ - thread_t self = current_thread(); - int wait_result = self->wait_result; - void (*caller_cont)(kern_return_t) = self->sth_continuation; - - if (wait_result != THREAD_TIMED_OUT) - thread_cancel_timer(); - - assert(self->sth_waitsemaphore != SEMAPHORE_NULL); - semaphore_dereference(self->sth_waitsemaphore); - if (self->sth_signalsemaphore != SEMAPHORE_NULL) - semaphore_dereference(self->sth_signalsemaphore); - - assert(caller_cont != (void (*)(kern_return_t))0); - (*caller_cont)(semaphore_convert_wait_result(wait_result)); -} - - /* * Routine: semaphore_wait_internal * @@ -556,11 +578,9 @@ semaphore_wait_internal( mach_timespec_t *wait_timep, void (*caller_cont)(kern_return_t)) { - void (*continuation)(void); - uint64_t abstime; - boolean_t nonblocking; - int wait_result; - spl_t spl_level; + boolean_t nonblocking; + int wait_result; + spl_t spl_level; kern_return_t kr = KERN_ALREADY_WAITING; spl_level = splsched(); @@ -581,14 +601,27 @@ semaphore_wait_internal( } else if (nonblocking) { kr = KERN_OPERATION_TIMED_OUT; } else { - thread_t self = current_thread(); + uint64_t abstime; + thread_t self = current_thread(); wait_semaphore->count = -1; /* we don't keep an actual count */ thread_lock(self); + + /* + * If it is a timed wait, calculate the wake up deadline. + */ + if (wait_timep != (mach_timespec_t *)0) { + nanoseconds_to_absolutetime((uint64_t)wait_timep->tv_sec * + NSEC_PER_SEC + wait_timep->tv_nsec, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + } + else + abstime = 0; + (void)wait_queue_assert_wait64_locked( &wait_semaphore->wait_queue, SEMAPHORE_EVENT, - THREAD_ABORTSAFE, + THREAD_ABORTSAFE, abstime, self); thread_unlock(self); } @@ -608,7 +641,7 @@ semaphore_wait_internal( * our intention to wait above). */ signal_kr = semaphore_signal_internal(signal_semaphore, - THR_ACT_NULL, + THREAD_NULL, SEMAPHORE_SIGNAL_PREPOST); if (signal_kr == KERN_NOT_WAITING) @@ -642,19 +675,6 @@ semaphore_wait_internal( */ if (kr != KERN_ALREADY_WAITING) return kr; - - /* - * If it is a timed wait, go ahead and set up the timer. - */ - if (wait_timep != (mach_timespec_t *)0) { - nanoseconds_to_absolutetime((uint64_t)wait_timep->tv_sec * - NSEC_PER_SEC + wait_timep->tv_nsec, &abstime); - clock_absolutetime_interval_to_deadline(abstime, &abstime); - thread_set_timer_deadline(abstime); - continuation = semaphore_timedwait_continue; - } else { - continuation = semaphore_wait_continue; - } /* * Now, we can block. If the caller supplied a continuation @@ -669,20 +689,12 @@ semaphore_wait_internal( self->sth_continuation = caller_cont; self->sth_waitsemaphore = wait_semaphore; self->sth_signalsemaphore = signal_semaphore; - wait_result = thread_block(continuation); - } else { + wait_result = thread_block((thread_continue_t)semaphore_wait_continue); + } + else { wait_result = thread_block(THREAD_CONTINUE_NULL); } - /* - * If we came back here (not continuation case) cancel - * any pending timers, convert the wait result to an - * appropriate semaphore return value, and then return - * that. - */ - if (wait_timep && (wait_result != THREAD_TIMED_OUT)) - thread_cancel_timer(); - return (semaphore_convert_wait_result(wait_result)); } @@ -713,22 +725,32 @@ semaphore_wait( * Trap version of semaphore wait. Called on behalf of user-level * clients. */ + kern_return_t semaphore_wait_trap( - mach_port_name_t name) + struct semaphore_wait_trap_args *args) +{ + return(semaphore_wait_trap_internal(args->wait_name, thread_syscall_return)); +} + + + +kern_return_t +semaphore_wait_trap_internal( + mach_port_name_t name, + void (*caller_cont)(kern_return_t)) { semaphore_t semaphore; kern_return_t kr; kr = port_name_to_semaphore(name, &semaphore); - if (kr != KERN_SUCCESS) - return kr; - - kr = semaphore_wait_internal(semaphore, - SEMAPHORE_NULL, - (mach_timespec_t *)0, - thread_syscall_return); - semaphore_dereference(semaphore); + if (kr == KERN_SUCCESS) { + kr = semaphore_wait_internal(semaphore, + SEMAPHORE_NULL, + (mach_timespec_t *)0, + caller_cont); + semaphore_dereference(semaphore); + } return kr; } @@ -771,10 +793,21 @@ semaphore_timedwait( */ kern_return_t semaphore_timedwait_trap( - mach_port_name_t name, - unsigned int sec, - clock_res_t nsec) + struct semaphore_timedwait_trap_args *args) { + + return(semaphore_timedwait_trap_internal(args->wait_name, args->sec, args->nsec, thread_syscall_return)); +} + + +kern_return_t +semaphore_timedwait_trap_internal( + mach_port_name_t name, + unsigned int sec, + clock_res_t nsec, + void (*caller_cont)(kern_return_t)) +{ + semaphore_t semaphore; mach_timespec_t wait_time; kern_return_t kr; @@ -785,14 +818,13 @@ semaphore_timedwait_trap( return KERN_INVALID_VALUE; kr = port_name_to_semaphore(name, &semaphore); - if (kr != KERN_SUCCESS) - return kr; - - kr = semaphore_wait_internal(semaphore, - SEMAPHORE_NULL, - &wait_time, - thread_syscall_return); - semaphore_dereference(semaphore); + if (kr == KERN_SUCCESS) { + kr = semaphore_wait_internal(semaphore, + SEMAPHORE_NULL, + &wait_time, + caller_cont); + semaphore_dereference(semaphore); + } return kr; } @@ -826,30 +858,33 @@ semaphore_wait_signal( */ kern_return_t semaphore_wait_signal_trap( - mach_port_name_t wait_name, - mach_port_name_t signal_name) + struct semaphore_wait_signal_trap_args *args) +{ + return(semaphore_wait_signal_trap_internal(args->wait_name, args->signal_name, thread_syscall_return)); +} + +kern_return_t +semaphore_wait_signal_trap_internal( + mach_port_name_t wait_name, + mach_port_name_t signal_name, + void (*caller_cont)(kern_return_t)) { semaphore_t wait_semaphore; semaphore_t signal_semaphore; kern_return_t kr; kr = port_name_to_semaphore(signal_name, &signal_semaphore); - if (kr != KERN_SUCCESS) - return kr; - - kr = port_name_to_semaphore(wait_name, &wait_semaphore); - if (kr != KERN_SUCCESS) { + if (kr == KERN_SUCCESS) { + kr = port_name_to_semaphore(wait_name, &wait_semaphore); + if (kr == KERN_SUCCESS) { + kr = semaphore_wait_internal(wait_semaphore, + signal_semaphore, + (mach_timespec_t *)0, + caller_cont); + semaphore_dereference(wait_semaphore); + } semaphore_dereference(signal_semaphore); - return kr; } - - kr = semaphore_wait_internal(wait_semaphore, - signal_semaphore, - (mach_timespec_t *)0, - thread_syscall_return); - - semaphore_dereference(wait_semaphore); - semaphore_dereference(signal_semaphore); return kr; } @@ -889,10 +924,18 @@ semaphore_timedwait_signal( */ kern_return_t semaphore_timedwait_signal_trap( - mach_port_name_t wait_name, - mach_port_name_t signal_name, - unsigned int sec, - clock_res_t nsec) + struct semaphore_timedwait_signal_trap_args *args) +{ + return(semaphore_timedwait_signal_trap_internal(args->wait_name, args->signal_name, args->sec, args->nsec, thread_syscall_return)); +} + +kern_return_t +semaphore_timedwait_signal_trap_internal( + mach_port_name_t wait_name, + mach_port_name_t signal_name, + unsigned int sec, + clock_res_t nsec, + void (*caller_cont)(kern_return_t)) { semaphore_t wait_semaphore; semaphore_t signal_semaphore; @@ -905,22 +948,17 @@ semaphore_timedwait_signal_trap( return KERN_INVALID_VALUE; kr = port_name_to_semaphore(signal_name, &signal_semaphore); - if (kr != KERN_SUCCESS) - return kr; - - kr = port_name_to_semaphore(wait_name, &wait_semaphore); - if (kr != KERN_SUCCESS) { + if (kr == KERN_SUCCESS) { + kr = port_name_to_semaphore(wait_name, &wait_semaphore); + if (kr == KERN_SUCCESS) { + kr = semaphore_wait_internal(wait_semaphore, + signal_semaphore, + &wait_time, + caller_cont); + semaphore_dereference(wait_semaphore); + } semaphore_dereference(signal_semaphore); - return kr; } - - kr = semaphore_wait_internal(wait_semaphore, - signal_semaphore, - &wait_time, - thread_syscall_return); - - semaphore_dereference(wait_semaphore); - semaphore_dereference(signal_semaphore); return kr; } @@ -970,7 +1008,7 @@ semaphore_dereference( if (ref_count == 0) { assert(wait_queue_empty(&semaphore->wait_queue)); - zfree(semaphore_zone, (vm_offset_t)semaphore); + zfree(semaphore_zone, semaphore); } } } diff --git a/osfmk/kern/syscall_emulation.c b/osfmk/kern/syscall_emulation.c index 028e523d9..54f50e76b 100644 --- a/osfmk/kern/syscall_emulation.c +++ b/osfmk/kern/syscall_emulation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -49,122 +49,24 @@ */ /* */ - +#include #include #include #include +#include + +#include #include -#include #include #include + +#include #include -#include /* for syscall_emulation_sync */ /* * Exported interface */ -/* - * WARNING: - * This code knows that kalloc() allocates memory most efficiently - * in sizes that are powers of 2, and asks for those sizes. - */ - -/* - * Go from number of entries to size of struct eml_dispatch and back. - */ -#define base_size (sizeof(struct eml_dispatch) - sizeof(eml_routine_t)) -#define count_to_size(count) \ - (base_size + sizeof(vm_offset_t) * (count)) - -#define size_to_count(size) \ - ( ((size) - base_size) / sizeof(vm_offset_t) ) - -/* Forwards */ -kern_return_t -task_set_emulation_vector_internal( - task_t task, - int vector_start, - emulation_vector_t emulation_vector, - mach_msg_type_number_t emulation_vector_count); - -/* - * eml_init: initialize user space emulation code - */ -void -eml_init(void) -{ -} - -/* - * eml_task_reference() [Exported] - * - * Bumps the reference count on the common emulation - * vector. - */ - -void -eml_task_reference( - task_t task, - task_t parent) -{ - register eml_dispatch_t eml; - - if (parent == TASK_NULL) - eml = EML_DISPATCH_NULL; - else - eml = parent->eml_dispatch; - - if (eml != EML_DISPATCH_NULL) { - mutex_lock(&eml->lock); - eml->ref_count++; - mutex_unlock(&eml->lock); - } - task->eml_dispatch = eml; -} - - -/* - * eml_task_deallocate() [Exported] - * - * Cleans up after the emulation code when a process exits. - */ - -void -eml_task_deallocate( - task_t task) -{ - register eml_dispatch_t eml; - - eml = task->eml_dispatch; - if (eml != EML_DISPATCH_NULL) { - int count; - - mutex_lock(&eml->lock); - count = --eml->ref_count; - mutex_unlock(&eml->lock); - - if (count == 0) - kfree((vm_offset_t)eml, count_to_size(eml->disp_count)); - - task->eml_dispatch = EML_DISPATCH_NULL; - } -} - -/* - * task_set_emulation_vector: [Server Entry] - * set a list of emulated system calls for this task. - */ -kern_return_t -task_set_emulation_vector_internal( - task_t task, - int vector_start, - emulation_vector_t emulation_vector, - mach_msg_type_number_t emulation_vector_count) -{ - return KERN_NOT_SUPPORTED; -} - /* * task_set_emulation_vector: [Server Entry] * diff --git a/osfmk/kern/syscall_emulation.h b/osfmk/kern/syscall_emulation.h deleted file mode 100644 index baf87c667..000000000 --- a/osfmk/kern/syscall_emulation.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -#ifndef _KERN_SYSCALL_EMULATION_H_ -#define _KERN_SYSCALL_EMULATION_H_ - -#ifndef ASSEMBLER -#include -#include -#include - -typedef vm_offset_t eml_routine_t; - -typedef struct eml_dispatch { - decl_mutex_data(,lock) /* lock for reference count */ - int ref_count; /* reference count */ - int disp_count; /* count of entries in vector */ - int disp_min; /* index of lowest entry in vector */ - eml_routine_t disp_vector[1]; /* first entry in array of dispatch */ - /* routines (array has disp_count - elements) */ -} *eml_dispatch_t; - -#define EML_ROUTINE_NULL (eml_routine_t)0 -#define EML_DISPATCH_NULL (eml_dispatch_t)0 - -#define EML_SUCCESS (0) - -#define EML_MOD (err_kern|err_sub(2)) -#define EML_BAD_TASK (EML_MOD|0x0001) -#define EML_BAD_CNT (EML_MOD|0x0002) - -/* Per-task initialization */ -extern void eml_init(void); - -/* Take reference on common task emulation vector */ -extern void eml_task_reference( - task_t new_task, - task_t parent_task); - -/* Deallocate reference on common task emulation vector */ -extern void eml_task_deallocate( - task_t task); - -#endif /* ASSEMBLER */ - -#endif /* _KERN_SYSCALL_EMULATION_H_ */ diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c index 26fc9b214..b40d4a63d 100644 --- a/osfmk/kern/syscall_subr.c +++ b/osfmk/kern/syscall_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include @@ -63,15 +62,12 @@ #include #include #include -#include #include #include #include #include -#include - /* * swtch and swtch_pri both attempt to context switch (logic in * thread_block no-ops the context switch if nothing would happen). @@ -86,29 +82,30 @@ * lock and then be a good citizen and really suspend. */ -void +static void swtch_continue(void) { register processor_t myprocessor; boolean_t result; - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); result = myprocessor->runq.count > 0 || myprocessor->processor_set->runq.count > 0; - mp_enable_preemption(); + enable_preemption(); thread_syscall_return(result); /*NOTREACHED*/ } boolean_t -swtch(void) +swtch( + __unused struct swtch_args *args) { register processor_t myprocessor; boolean_t result; - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); if ( myprocessor->runq.count == 0 && myprocessor->processor_set->runq.count == 0 ) { @@ -116,30 +113,30 @@ swtch(void) return (FALSE); } - mp_enable_preemption(); + enable_preemption(); counter(c_swtch_block++); - thread_block_reason(swtch_continue, AST_YIELD); + thread_block_reason((thread_continue_t)swtch_continue, NULL, AST_YIELD); - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); result = myprocessor->runq.count > 0 || myprocessor->processor_set->runq.count > 0; - mp_enable_preemption(); + enable_preemption(); return (result); } -void +static void swtch_pri_continue(void) { register processor_t myprocessor; boolean_t result; - _mk_sp_thread_depress_abort(current_thread(), FALSE); + thread_depress_abort_internal(current_thread()); - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); result = myprocessor->runq.count > 0 || myprocessor->processor_set->runq.count > 0; @@ -151,12 +148,12 @@ swtch_pri_continue(void) boolean_t swtch_pri( - int pri) +__unused struct swtch_pri_args *args) { register processor_t myprocessor; boolean_t result; - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); if ( myprocessor->runq.count == 0 && myprocessor->processor_set->runq.count == 0 ) { @@ -164,25 +161,38 @@ swtch_pri( return (FALSE); } - mp_enable_preemption(); + enable_preemption(); counter(c_swtch_pri_block++); - _mk_sp_thread_depress_abstime(std_quantum); + thread_depress_abstime(std_quantum); - thread_block_reason(swtch_pri_continue, AST_YIELD); + thread_block_reason((thread_continue_t)swtch_pri_continue, NULL, AST_YIELD); - _mk_sp_thread_depress_abort(current_thread(), FALSE); + thread_depress_abort_internal(current_thread()); - mp_disable_preemption(); + disable_preemption(); myprocessor = current_processor(); result = myprocessor->runq.count > 0 || myprocessor->processor_set->runq.count > 0; - mp_enable_preemption(); + enable_preemption(); return (result); } +static void +thread_switch_continue(void) +{ + register thread_t self = current_thread(); + int option = self->saved.swtch.option; + + if (option == SWITCH_OPTION_DEPRESS) + thread_depress_abort_internal(self); + + thread_syscall_return(KERN_SUCCESS); + /*NOTREACHED*/ +} + /* * thread_switch: * @@ -190,11 +200,12 @@ swtch_pri( */ kern_return_t thread_switch( - mach_port_name_t thread_name, - int option, - mach_msg_timeout_t option_time) + struct thread_switch_args *args) { - register thread_act_t hint_act = THR_ACT_NULL; + register thread_t thread, self = current_thread(); + mach_port_name_t thread_name = args->thread_name; + int option = args->option; + mach_msg_timeout_t option_time = args->option_time; /* * Process option. @@ -210,18 +221,227 @@ thread_switch( return (KERN_INVALID_ARGUMENT); } + /* + * Translate the port name if supplied. + */ if (thread_name != MACH_PORT_NULL) { ipc_port_t port; - if (ipc_port_translate_send(current_task()->itk_space, + if (ipc_port_translate_send(self->task->itk_space, thread_name, &port) == KERN_SUCCESS) { ip_reference(port); ip_unlock(port); - hint_act = convert_port_to_act(port); + thread = convert_port_to_thread(port); ipc_port_release(port); + + if (thread == self) { + thread_deallocate_internal(thread); + thread = THREAD_NULL; + } } + else + thread = THREAD_NULL; } + else + thread = THREAD_NULL; + + /* + * Try to handoff if supplied. + */ + if (thread != THREAD_NULL) { + processor_t processor; + spl_t s; + + s = splsched(); + thread_lock(thread); + + /* + * Check if the thread is in the right pset, + * is not bound to a different processor, + * and that realtime is not involved. + * + * Next, pull it off its run queue. If it + * doesn't come, it's not eligible. + */ + processor = current_processor(); + if (processor->current_pri < BASEPRI_RTQUEUES && + thread->sched_pri < BASEPRI_RTQUEUES && + thread->processor_set == processor->processor_set && + (thread->bound_processor == PROCESSOR_NULL || + thread->bound_processor == processor) && + run_queue_remove(thread) != RUN_QUEUE_NULL ) { + /* + * Hah, got it!! + */ + thread_unlock(thread); + + thread_deallocate_internal(thread); + + if (option == SWITCH_OPTION_WAIT) + assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, + option_time, 1000*NSEC_PER_USEC); + else + if (option == SWITCH_OPTION_DEPRESS) + thread_depress_ms(option_time); + + self->saved.swtch.option = option; + + thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread); + /* NOTREACHED */ + } + + thread_unlock(thread); + splx(s); + + thread_deallocate(thread); + } + + if (option == SWITCH_OPTION_WAIT) + assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, 1000*NSEC_PER_USEC); + else + if (option == SWITCH_OPTION_DEPRESS) + thread_depress_ms(option_time); + + self->saved.swtch.option = option; + + thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD); + + if (option == SWITCH_OPTION_DEPRESS) + thread_depress_abort_internal(self); + + return (KERN_SUCCESS); +} - return _mk_sp_thread_switch(hint_act, option, option_time); +/* + * Depress thread's priority to lowest possible for the specified interval, + * with a value of zero resulting in no timeout being scheduled. + */ +void +thread_depress_abstime( + uint64_t interval) +{ + register thread_t self = current_thread(); + uint64_t deadline; + spl_t s; + + s = splsched(); + thread_lock(self); + if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { + processor_t myprocessor = self->last_processor; + + self->sched_pri = DEPRESSPRI; + myprocessor->current_pri = self->sched_pri; + self->sched_mode &= ~TH_MODE_PREEMPT; + self->sched_mode |= TH_MODE_DEPRESS; + + if (interval != 0) { + clock_absolutetime_interval_to_deadline(interval, &deadline); + if (!timer_call_enter(&self->depress_timer, deadline)) + self->depress_timer_active++; + } + } + thread_unlock(self); + splx(s); +} + +void +thread_depress_ms( + mach_msg_timeout_t interval) +{ + uint64_t abstime; + + clock_interval_to_absolutetime_interval( + interval, 1000*NSEC_PER_USEC, &abstime); + thread_depress_abstime(abstime); +} + +/* + * Priority depression expiration. + */ +void +thread_depress_expire( + void *p0, + __unused void *p1) +{ + thread_t thread = p0; + spl_t s; + + s = splsched(); + thread_lock(thread); + if (--thread->depress_timer_active == 0) { + thread->sched_mode &= ~TH_MODE_ISDEPRESSED; + compute_priority(thread, FALSE); + } + thread_unlock(thread); + splx(s); +} + +/* + * Prematurely abort priority depression if there is one. + */ +kern_return_t +thread_depress_abort_internal( + thread_t thread) +{ + kern_return_t result = KERN_NOT_DEPRESSED; + spl_t s; + + s = splsched(); + thread_lock(thread); + if (!(thread->sched_mode & TH_MODE_POLLDEPRESS)) { + if (thread->sched_mode & TH_MODE_ISDEPRESSED) { + thread->sched_mode &= ~TH_MODE_ISDEPRESSED; + compute_priority(thread, FALSE); + result = KERN_SUCCESS; + } + + if (timer_call_cancel(&thread->depress_timer)) + thread->depress_timer_active--; + } + thread_unlock(thread); + splx(s); + + return (result); +} + +void +thread_poll_yield( + thread_t self) +{ + spl_t s; + + assert(self == current_thread()); + + s = splsched(); + if (!(self->sched_mode & (TH_MODE_REALTIME|TH_MODE_TIMESHARE))) { + uint64_t total_computation, abstime; + + abstime = mach_absolute_time(); + total_computation = abstime - self->computation_epoch; + total_computation += self->computation_metered; + if (total_computation >= max_poll_computation) { + processor_t myprocessor = current_processor(); + ast_t preempt; + + thread_lock(self); + if (!(self->sched_mode & TH_MODE_ISDEPRESSED)) { + self->sched_pri = DEPRESSPRI; + myprocessor->current_pri = self->sched_pri; + self->sched_mode &= ~TH_MODE_PREEMPT; + } + self->computation_epoch = abstime; + self->computation_metered = 0; + self->sched_mode |= TH_MODE_POLLDEPRESS; + + abstime += (total_computation >> sched_poll_yield_shift); + if (!timer_call_enter(&self->depress_timer, abstime)) + self->depress_timer_active++; + thread_unlock(self); + + if ((preempt = csw_check(self, myprocessor)) != AST_NONE) + ast_on(preempt); + } + } + splx(s); } diff --git a/osfmk/kern/syscall_subr.h b/osfmk/kern/syscall_subr.h index 80c86fe82..92b39cc7e 100644 --- a/osfmk/kern/syscall_subr.h +++ b/osfmk/kern/syscall_subr.h @@ -53,16 +53,22 @@ #ifndef _KERN_SYSCALL_SUBR_H_ #define _KERN_SYSCALL_SUBR_H_ -#include -#include -#include -#include +#include -/* Attempt to context switch */ -extern boolean_t swtch(void); +extern void thread_depress_abstime( + uint64_t interval); -/* Attempt to context switch */ -extern boolean_t swtch_pri( - int pri); +extern void thread_depress_ms( + mach_msg_timeout_t interval); + +extern kern_return_t thread_depress_abort_internal( + thread_t thread); + +extern void thread_depress_expire( + void *thread, + void *p1); + +extern void thread_poll_yield( + thread_t self); #endif /* _KERN_SYSCALL_SUBR_H_ */ diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c index 227f39ac9..bf85bb308 100644 --- a/osfmk/kern/syscall_sw.c +++ b/osfmk/kern/syscall_sw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -51,13 +51,12 @@ */ #include +#include #include /* Forwards */ -extern kern_return_t kern_invalid(void); -extern mach_port_name_t null_port(void); -extern kern_return_t not_implemented(void); + /* * To add a new entry: @@ -86,161 +85,148 @@ int kern_invalid_debug = 0; #include #include -extern kern_return_t iokit_user_client_trap(); - mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { - MACH_TRAP(kern_invalid, 0), /* 0 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 1 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 2 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 3 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 4 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 5 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 6 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 7 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 8 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 9 */ /* Unix */ - MACH_TRAP(kern_invalid, 0), /* 10 */ - MACH_TRAP(kern_invalid, 0), /* 11 */ - MACH_TRAP(kern_invalid, 0), /* 12 */ - MACH_TRAP(kern_invalid, 0), /* 13 */ - MACH_TRAP(kern_invalid, 0), /* 14 */ - MACH_TRAP(kern_invalid, 0), /* 15 */ - MACH_TRAP(kern_invalid, 0), /* 16 */ - MACH_TRAP(kern_invalid, 0), /* 17 */ - MACH_TRAP(kern_invalid, 0), /* 18 */ - MACH_TRAP(kern_invalid, 0), /* 19 */ - MACH_TRAP(kern_invalid, 0), /* 20 */ - MACH_TRAP(kern_invalid, 0), /* 21 */ - MACH_TRAP(kern_invalid, 0), /* 22 */ - MACH_TRAP(kern_invalid, 0), /* 23 */ - MACH_TRAP(kern_invalid, 0), /* 24 */ - MACH_TRAP(kern_invalid, 0), /* 25 */ - MACH_TRAP(mach_reply_port, 0), /* 26 */ - MACH_TRAP(thread_self_trap, 0), /* 27 */ - MACH_TRAP(task_self_trap, 0), /* 28 */ - MACH_TRAP(host_self_trap, 0), /* 29 */ - MACH_TRAP(kern_invalid, 0), /* 30 */ - MACH_TRAP(mach_msg_trap, 7), /* 31 */ - MACH_TRAP(mach_msg_overwrite_trap, 9), /* 32 */ - MACH_TRAP(semaphore_signal_trap, 1), /* 33 */ - MACH_TRAP(semaphore_signal_all_trap, 1), /* 34 */ - MACH_TRAP(semaphore_signal_thread_trap, 2), /* 35 */ - MACH_TRAP(semaphore_wait_trap, 1), /* 36 */ - MACH_TRAP(semaphore_wait_signal_trap, 2), /* 37 */ - MACH_TRAP(semaphore_timedwait_trap, 3), /* 38 */ - MACH_TRAP(semaphore_timedwait_signal_trap, 4), /* 39 */ - MACH_TRAP(kern_invalid, 0), /* 40 */ - MACH_TRAP(init_process, 0), /* 41 */ - MACH_TRAP(kern_invalid, 0), /* 42 */ - MACH_TRAP(map_fd, 5), /* 43 */ - MACH_TRAP(kern_invalid, 0), /* 44 */ - MACH_TRAP(task_for_pid, 3), /* 45 */ - MACH_TRAP(pid_for_task, 2), /* 46 */ - MACH_TRAP(kern_invalid, 0), /* 47 */ - MACH_TRAP(macx_swapon, 4), /* 48 */ - MACH_TRAP(macx_swapoff, 2), /* 49 */ - MACH_TRAP(kern_invalid, 0), /* 50 */ - MACH_TRAP(macx_triggers, 4), /* 51 */ - MACH_TRAP(macx_backing_store_suspend, 1), /* 52 */ - MACH_TRAP(macx_backing_store_recovery, 1), /* 53 */ - MACH_TRAP(kern_invalid, 0), /* 54 */ - MACH_TRAP(kern_invalid, 0), /* 55 */ - MACH_TRAP(kern_invalid, 0), /* 56 */ - MACH_TRAP(kern_invalid, 0), /* 57 */ - MACH_TRAP(kern_invalid, 0), /* 58 */ - MACH_TRAP(swtch_pri, 1), /* 59 */ - MACH_TRAP(swtch, 0), /* 60 */ - MACH_TRAP(thread_switch, 3), /* 61 */ - MACH_TRAP(clock_sleep_trap, 5), /* 62 */ - MACH_TRAP(kern_invalid,0), /* 63 */ +/* 0 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 1 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 2 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 3 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 4 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 5 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 6 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 7 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 8 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 9 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 10 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 11 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 12 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 13 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 14 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 15 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 16 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 17 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 18 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 19 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 20 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 21 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 22 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 23 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 24 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 25 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 26 */ MACH_TRAP(mach_reply_port, 0, NULL, NULL), +/* 27 */ MACH_TRAP(thread_self_trap, 0, NULL, NULL), +/* 28 */ MACH_TRAP(task_self_trap, 0, NULL, NULL), +/* 29 */ MACH_TRAP(host_self_trap, 0, NULL, NULL), +/* 30 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 31 */ MACH_TRAP(mach_msg_trap, 7, munge_wwwwwww, munge_ddddddd), +/* 32 */ MACH_TRAP(mach_msg_overwrite_trap, 8, munge_wwwwwwww, munge_dddddddd), +/* 33 */ MACH_TRAP(semaphore_signal_trap, 1, munge_w, munge_d), +/* 34 */ MACH_TRAP(semaphore_signal_all_trap, 1, munge_w, munge_d), +/* 35 */ MACH_TRAP(semaphore_signal_thread_trap, 2, munge_ww, munge_dd), +/* 36 */ MACH_TRAP(semaphore_wait_trap, 1, munge_w, munge_d), +/* 37 */ MACH_TRAP(semaphore_wait_signal_trap, 2, munge_ww, munge_dd), +/* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, munge_www, munge_ddd), +/* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, munge_wwww, munge_dddd), +/* 40 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 41 */ MACH_TRAP(init_process, 0, NULL, NULL), +/* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd), +/* 44 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd), +/* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd), +/* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 48 */ MACH_TRAP(macx_swapon, 4, munge_wwww, munge_dddd), +/* 49 */ MACH_TRAP(macx_swapoff, 2, munge_ww, munge_dd), +/* 50 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 51 */ MACH_TRAP(macx_triggers, 4, munge_wwww, munge_dddd), +/* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, munge_w, munge_d), +/* 53 */ MACH_TRAP(macx_backing_store_recovery, 1, munge_w, munge_d), +/* 54 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 55 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 56 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 57 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 58 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 59 */ MACH_TRAP(swtch_pri, 0, NULL, NULL), +/* 60 */ MACH_TRAP(swtch, 0, NULL, NULL), +/* 61 */ MACH_TRAP(thread_switch, 3, munge_www, munge_ddd), +/* 62 */ MACH_TRAP(clock_sleep_trap, 5, munge_wwwww, munge_ddddd), +/* 63 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* traps 64 - 95 reserved (debo) */ - MACH_TRAP(kern_invalid,0), /* 64 */ - MACH_TRAP(kern_invalid,0), /* 65 */ - MACH_TRAP(kern_invalid,0), /* 66 */ - MACH_TRAP(kern_invalid,0), /* 67 */ - MACH_TRAP(kern_invalid,0), /* 68 */ - MACH_TRAP(kern_invalid,0), /* 69 */ - MACH_TRAP(kern_invalid,0), /* 70 */ - MACH_TRAP(kern_invalid,0), /* 71 */ - MACH_TRAP(kern_invalid,0), /* 72 */ - MACH_TRAP(kern_invalid,0), /* 73 */ - MACH_TRAP(kern_invalid,0), /* 74 */ - MACH_TRAP(kern_invalid,0), /* 75 */ - MACH_TRAP(kern_invalid,0), /* 76 */ - MACH_TRAP(kern_invalid,0), /* 77 */ - MACH_TRAP(kern_invalid,0), /* 78 */ - MACH_TRAP(kern_invalid,0), /* 79 */ - MACH_TRAP(kern_invalid,0), /* 80 */ - MACH_TRAP(kern_invalid,0), /* 81 */ - MACH_TRAP(kern_invalid,0), /* 82 */ - MACH_TRAP(kern_invalid,0), /* 83 */ - MACH_TRAP(kern_invalid,0), /* 84 */ - MACH_TRAP(kern_invalid,0), /* 85 */ - MACH_TRAP(kern_invalid,0), /* 86 */ - MACH_TRAP(kern_invalid,0), /* 87 */ - MACH_TRAP(kern_invalid,0), /* 88 */ - MACH_TRAP(mach_timebase_info, 1), /* 89 */ - MACH_TRAP(mach_wait_until, 2), /* 90 */ - MACH_TRAP(mk_timer_create, 0), /* 91 */ - MACH_TRAP(mk_timer_destroy, 1), /* 92 */ - MACH_TRAP(mk_timer_arm, 3), /* 93 */ - MACH_TRAP(mk_timer_cancel, 2), /* 94 */ - MACH_TRAP(mk_timebase_info, 5), /* 95 */ +/* 64 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 65 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 66 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 67 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 68 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 69 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 70 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 71 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 72 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 73 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 74 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 75 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 76 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 77 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 78 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 79 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 80 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 81 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 82 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 83 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 84 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 85 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 86 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 87 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 88 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 89 */ MACH_TRAP(mach_timebase_info_trap, 1, munge_w, munge_d), +/* 90 */ MACH_TRAP(mach_wait_until_trap, 2, munge_l, munge_d), +/* 91 */ MACH_TRAP(mk_timer_create_trap, 0, NULL, NULL), +/* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, munge_w, munge_d), +/* 93 */ MACH_TRAP(mk_timer_arm_trap, 3, munge_wl, munge_dd), +/* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, munge_ww, munge_dd), +/* 95 */ MACH_TRAP(mk_timebase_info_trap, 5, munge_wwwww, munge_ddddd), /* traps 64 - 95 reserved (debo) */ - MACH_TRAP(kern_invalid,0), /* 96 */ - MACH_TRAP(kern_invalid,0), /* 97 */ - MACH_TRAP(kern_invalid,0), /* 98 */ - MACH_TRAP(kern_invalid,0), /* 99 */ +/* 96 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 97 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 98 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 99 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* traps 100-107 reserved for iokit (esb) */ - MACH_TRAP(iokit_user_client_trap, 8), - /* 100 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 101 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 102 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 103 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 104 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 105 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 106 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 107 */ /* IOKit */ - MACH_TRAP(kern_invalid,0), /* 108 */ - MACH_TRAP(kern_invalid,0), /* 109 */ - MACH_TRAP(kern_invalid,0), /* 110 */ - MACH_TRAP(kern_invalid,0), /* 111 */ - MACH_TRAP(kern_invalid,0), /* 112 */ - MACH_TRAP(kern_invalid,0), /* 113 */ - MACH_TRAP(kern_invalid,0), /* 114 */ - MACH_TRAP(kern_invalid,0), /* 115 */ - MACH_TRAP(kern_invalid,0), /* 116 */ - MACH_TRAP(kern_invalid,0), /* 117 */ - MACH_TRAP(kern_invalid,0), /* 118 */ - MACH_TRAP(kern_invalid,0), /* 119 */ - MACH_TRAP(kern_invalid,0), /* 120 */ - MACH_TRAP(kern_invalid,0), /* 121 */ - MACH_TRAP(kern_invalid,0), /* 122 */ - MACH_TRAP(kern_invalid,0), /* 123 */ - MACH_TRAP(kern_invalid,0), /* 124 */ - MACH_TRAP(kern_invalid,0), /* 125 */ - MACH_TRAP(kern_invalid,0), /* 126 */ - MACH_TRAP(kern_invalid,0), /* 127 */ +/* 100 */ MACH_TRAP(iokit_user_client_trap, 8, munge_wwwwwwww, munge_dddddddd), +/* 101 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 102 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 103 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 104 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 105 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 106 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 107 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* traps 108-127 unused */ +/* 108 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 109 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 110 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 111 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 112 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 113 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 114 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 115 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 116 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 117 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 118 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 119 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 120 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 121 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 122 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 123 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 124 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 125 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 126 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 127 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), }; int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0])); -mach_port_name_t -null_port(void) -{ - if (kern_invalid_debug) Debugger("null_port mach trap"); - return(MACH_PORT_NULL); -} - kern_return_t -kern_invalid(void) +kern_invalid( + __unused struct kern_invalid_args *args) { if (kern_invalid_debug) Debugger("kern_invalid mach trap"); return(KERN_INVALID_ARGUMENT); } -kern_return_t -not_implemented(void) -{ - return(MACH_SEND_INTERRUPTED); -} diff --git a/osfmk/kern/syscall_sw.h b/osfmk/kern/syscall_sw.h index 0c7a737d2..72a8afad0 100644 --- a/osfmk/kern/syscall_sw.h +++ b/osfmk/kern/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,15 +60,21 @@ * its kernel stack. Some architectures may need * to save more state in the pcb for these traps. */ +typedef void mach_munge_t(const void *, void *); typedef struct { int mach_trap_arg_count; int (*mach_trap_function)(void); - boolean_t mach_trap_stack; +#if defined(__i386__) + boolean_t mach_trap_stack; +#else + mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ + mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */ +#endif #if !MACH_ASSERT int mach_trap_unused; #else - char* mach_trap_name; + const char* mach_trap_name; #endif /* !MACH_ASSERT */ } mach_trap_t; @@ -76,15 +82,25 @@ typedef struct { extern mach_trap_t mach_trap_table[]; -extern int mach_trap_count; -extern kern_return_t kern_invalid(void); +extern int mach_trap_count; +#if defined(__i386__) #if !MACH_ASSERT -#define MACH_TRAP(name, arg_count) \ +#define MACH_TRAP(name, arg_count, munge32, munge64) \ { (arg_count), (int (*)(void)) (name), FALSE, 0 } #else -#define MACH_TRAP(name, arg_count) \ +#define MACH_TRAP(name, arg_count, munge32, munge64) \ { (arg_count), (int (*)(void)) (name), FALSE, #name } #endif /* !MACH_ASSERT */ +#else /* !defined(__i386__) */ +#if !MACH_ASSERT +#define MACH_TRAP(name, arg_count, munge32, munge64) \ + { (arg_count), (int (*)(void)) (name), (munge32), (munge64), 0 } +#else +#define MACH_TRAP(name, arg_count, munge32, munge64) \ + { (arg_count), (int (*)(void)) (name), (munge32), (munge64), #name } +#endif /* !MACH_ASSERT */ + +#endif /* !defined(__i386__) */ #endif /* _KERN_SYSCALL_SW_H_ */ diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index e9acf180b..4cbdb120f 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -77,18 +77,22 @@ #include #include #include -#include #include +#include #include +#include #include #include #include #include #include -#include + +#include #include #include + +#include #include #include #include @@ -100,18 +104,22 @@ #include #include #include -#include /* for kernel_map, ipc_kernel_map */ +#include +#include #include #include #include + +#include +#include +#include /* for kernel_map, ipc_kernel_map */ +#include +#include /* for vm_map_remove_commpage64 */ + #if MACH_KDB #include #endif /* MACH_KDB */ -#if TASK_SWAPPER -#include -#endif /* TASK_SWAPPER */ - #ifdef __ppc__ #include #include @@ -124,7 +132,10 @@ #include #include #include +#include + #include +#include task_t kernel_task; zone_t task_zone; @@ -137,7 +148,6 @@ void task_wait_locked( task_t task); void task_release_locked( task_t task); -void task_collect_scan(void); void task_free( task_t task ); void task_synchronizer_destroy_all( @@ -158,6 +168,51 @@ task_backing_store_privileged( return; } +void +task_working_set_disable(task_t task) +{ + struct tws_hash *ws; + + task_lock(task); + ws = task->dynamic_working_set; + task->dynamic_working_set = NULL; + task_unlock(task); + if (ws) { + tws_hash_ws_flush(ws); + tws_hash_destroy(ws); + } +} + +void +task_set_64bit( + task_t task, + boolean_t is64bit) +{ + if(is64bit) { + /* LP64todo - no task working set for 64-bit */ + task_set_64BitAddr(task); + task_working_set_disable(task); + task->map->max_offset = MACH_VM_MAX_ADDRESS; + } else { + /* + * Deallocate all memory previously allocated + * above the 32-bit address space, since it won't + * be accessible anymore. + */ + /* LP64todo - make this clean */ +#ifdef __ppc__ + vm_map_remove_commpage64(task->map); + pmap_unmap_sharedpage(task->map->pmap); /* Unmap commpage */ +#endif + (void) vm_map_remove(task->map, + (vm_map_offset_t) VM_MAX_ADDRESS, + MACH_VM_MAX_ADDRESS, + VM_MAP_NO_FLAGS); + task_clear_64BitAddr(task); + task->map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; + } +} + void task_init(void) { @@ -167,8 +222,6 @@ task_init(void) TASK_CHUNK * sizeof(struct task), "tasks"); - eml_init(); - /* * Create the kernel task as the first task. */ @@ -234,21 +287,21 @@ task_unfreeze( */ kern_return_t kernel_task_create( - task_t parent_task, - vm_offset_t map_base, - vm_size_t map_size, - task_t *child_task) + __unused task_t parent_task, + __unused vm_offset_t map_base, + __unused vm_size_t map_size, + __unused task_t *child_task) { return (KERN_INVALID_ARGUMENT); } kern_return_t task_create( - task_t parent_task, - ledger_port_array_t ledger_ports, - mach_msg_type_number_t num_ledger_ports, - boolean_t inherit_memory, - task_t *child_task) /* OUT */ + task_t parent_task, + __unused ledger_port_array_t ledger_ports, + __unused mach_msg_type_number_t num_ledger_ports, + boolean_t inherit_memory, + task_t *child_task) /* OUT */ { if (parent_task == TASK_NULL) return(KERN_INVALID_ARGUMENT); @@ -259,15 +312,15 @@ task_create( kern_return_t host_security_create_task_token( - host_security_t host_security, - task_t parent_task, - security_token_t sec_token, - audit_token_t audit_token, - host_priv_t host_priv, - ledger_port_array_t ledger_ports, - mach_msg_type_number_t num_ledger_ports, - boolean_t inherit_memory, - task_t *child_task) /* OUT */ + host_security_t host_security, + task_t parent_task, + security_token_t sec_token, + audit_token_t audit_token, + host_priv_t host_priv, + __unused ledger_port_array_t ledger_ports, + __unused mach_msg_type_number_t num_ledger_ports, + boolean_t inherit_memory, + task_t *child_task) /* OUT */ { kern_return_t result; @@ -316,14 +369,13 @@ task_create_internal( new_task->map = vm_map_fork(parent_task->map); else new_task->map = vm_map_create(pmap_create(0), - round_page_32(VM_MIN_ADDRESS), - trunc_page_32(VM_MAX_ADDRESS), TRUE); + (vm_map_offset_t)(VM_MIN_ADDRESS), + (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE); - mutex_init(&new_task->lock, ETAP_THREAD_TASK_NEW); + mutex_init(&new_task->lock, 0); queue_init(&new_task->threads); new_task->suspend_count = 0; new_task->thread_count = 0; - new_task->res_thread_count = 0; new_task->active_thread_count = 0; new_task->user_stop_count = 0; new_task->role = TASK_UNSPECIFIED; @@ -341,27 +393,18 @@ task_create_internal( new_task->taskFeatures[0] = 0; /* Init task features */ new_task->taskFeatures[1] = 0; /* Init task features */ new_task->dynamic_working_set = 0; - + task_working_set_create(new_task, TWS_SMALL_HASH_LINE_COUNT, - 0, TWS_HASH_STYLE_DEFAULT); + 0, TWS_HASH_STYLE_DEFAULT); #ifdef MACH_BSD new_task->bsd_info = 0; #endif /* MACH_BSD */ #ifdef __ppc__ - if(per_proc_info[0].pf.Available & pf64Bit) new_task->taskFeatures[0] |= tf64BitData; /* If 64-bit machine, show we have 64-bit registers at least */ + if(BootProcInfo.pf.Available & pf64Bit) new_task->taskFeatures[0] |= tf64BitData; /* If 64-bit machine, show we have 64-bit registers at least */ #endif -#if TASK_SWAPPER - new_task->swap_state = TASK_SW_IN; - new_task->swap_flags = 0; - new_task->swap_ast_waiting = 0; - new_task->swap_stamp = sched_tick; - new_task->swap_rss = 0; - new_task->swap_nswap = 0; -#endif /* TASK_SWAPPER */ - queue_init(&new_task->semaphore_list); queue_init(&new_task->lock_set_list); new_task->semaphores_owned = 0; @@ -371,14 +414,11 @@ task_create_internal( new_task->may_assign = TRUE; new_task->assign_active = FALSE; #endif /* MACH_HOST */ - eml_task_reference(new_task, parent_task); ipc_task_init(new_task, parent_task); - new_task->total_user_time.seconds = 0; - new_task->total_user_time.microseconds = 0; - new_task->total_system_time.seconds = 0; - new_task->total_system_time.microseconds = 0; + new_task->total_user_time = 0; + new_task->total_system_time = 0; task_prof_init(new_task); @@ -404,6 +444,8 @@ task_create_internal( convert_port_to_ledger(parent_task->wired_ledger_port)); new_task->paged_ledger_port = ledger_copy( convert_port_to_ledger(parent_task->paged_ledger_port)); + if(task_has_64BitAddr(parent_task)) + task_set_64BitAddr(new_task); } else { pset = &default_pset; @@ -441,84 +483,35 @@ task_create_internal( } /* - * task_deallocate + * task_deallocate: * - * Drop a reference on a task - * Task is locked. + * Drop a reference on a task. */ void task_deallocate( task_t task) { - processor_set_t pset; - int refs; + processor_set_t pset; if (task == TASK_NULL) return; - task_lock(task); - refs = --task->ref_count; - task_unlock(task); - - if (refs > 0) + if (task_deallocate_internal(task) > 0) return; -#if TASK_SWAPPER - /* task_terminate guarantees that this task is off the list */ - assert((task->swap_state & TASK_SW_ELIGIBLE) == 0); -#endif /* TASK_SWAPPER */ + pset = task->processor_set; + pset_deallocate(pset); if(task->dynamic_working_set) - tws_hash_destroy((tws_hash_t)task->dynamic_working_set); - - eml_task_deallocate(task); + tws_hash_destroy(task->dynamic_working_set); ipc_task_terminate(task); -#if MACH_HOST - task_freeze(task); -#endif - - pset = task->processor_set; - pset_lock(pset); - pset_remove_task(pset,task); - pset_unlock(pset); - pset_deallocate(pset); - -#if MACH_HOST - task_unfreeze(task); -#endif - vm_map_deallocate(task->map); is_release(task->itk_space); - task_prof_deallocate(task); - zfree(task_zone, (vm_offset_t) task); -} - -void -task_reference( - task_t task) -{ - if (task != TASK_NULL) { - task_lock(task); - task->ref_count++; - task_unlock(task); - } -} - -boolean_t -task_reference_try( - task_t task) -{ - if (task != TASK_NULL) { - if (task_lock_try(task)) { - task->ref_count++; - task_unlock(task); - return TRUE; - } - } - return FALSE; + task_prof_deallocate(task); + zfree(task_zone, task); } /* @@ -533,58 +526,45 @@ task_terminate( task_t task) { if (task == TASK_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); + if (task->bsd_info) - return(KERN_FAILURE); + return (KERN_FAILURE); + return (task_terminate_internal(task)); } kern_return_t task_terminate_internal( - task_t task) + task_t task) { - thread_act_t thr_act, cur_thr_act; - task_t cur_task; - boolean_t interrupt_save; + processor_set_t pset; + thread_t thread, self; + task_t self_task; + boolean_t interrupt_save; assert(task != kernel_task); - cur_thr_act = current_act(); - cur_task = cur_thr_act->task; - -#if TASK_SWAPPER - /* - * If task is not resident (swapped out, or being swapped - * out), we want to bring it back in (this can block). - * NOTE: The only way that this can happen in the current - * system is if the task is swapped while it has a thread - * in exit(), and the thread does not hit a clean point - * to swap itself before getting here. - * Terminating other tasks is another way to this code, but - * it is not yet fully supported. - * The task_swapin is unconditional. It used to be done - * only if the task is not resident. Swapping in a - * resident task will prevent it from being swapped out - * while it terminates. - */ - task_swapin(task, TRUE); /* TRUE means make it unswappable */ -#endif /* TASK_SWAPPER */ + self = current_thread(); + self_task = self->task; /* * Get the task locked and make sure that we are not racing * with someone else trying to terminate us. */ - if (task == cur_task) { + if (task == self_task) task_lock(task); - } else if (task < cur_task) { + else + if (task < self_task) { task_lock(task); - task_lock(cur_task); - } else { - task_lock(cur_task); + task_lock(self_task); + } + else { + task_lock(self_task); task_lock(task); } - if (!task->active || !cur_thr_act->active) { + if (!task->active || !self->active) { /* * Task or current act is already being terminated. * Just return an error. If we are dying, this will @@ -592,12 +572,14 @@ task_terminate_internal( * will get us to finalize the termination of ourselves. */ task_unlock(task); - if (cur_task != task) - task_unlock(cur_task); - return(KERN_FAILURE); + if (self_task != task) + task_unlock(self_task); + + return (KERN_FAILURE); } - if (cur_task != task) - task_unlock(cur_task); + + if (self_task != task) + task_unlock(self_task); /* * Make sure the current thread does not get aborted out of @@ -618,16 +600,10 @@ task_terminate_internal( ipc_task_disable(task); /* - * Terminate each activation in the task. - * - * Each terminated activation will run it's special handler - * when its current kernel context is unwound. That will - * clean up most of the thread resources. Then it will be - * handed over to the reaper, who will finally remove the - * thread from the task list and free the structures. - */ - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { - thread_terminate_internal(thr_act); + * Terminate each thread in the task. + */ + queue_iterate(&task->threads, thread, thread_t, task_threads) { + thread_terminate_internal(thread); } /* @@ -635,7 +611,7 @@ task_terminate_internal( * to perform cleanup before ripping apart * the task. */ - if (cur_thr_act->task == task) + if (self_task == task) machine_thread_terminate_self(); task_unlock(task); @@ -650,6 +626,12 @@ task_terminate_internal( */ ipc_space_destroy(task->itk_space); +/* LP64todo - make this clean */ +#ifdef __ppc__ + vm_map_remove_commpage64(task->map); + pmap_unmap_sharedpage(task->map->pmap); /* Unmap commpage */ +#endif + /* * If the current thread is a member of the task * being terminated, then the last reference to @@ -658,18 +640,21 @@ task_terminate_internal( * expense of removing the address space regions * at reap time, we do it explictly here. */ - (void) vm_map_remove(task->map, - task->map->min_offset, - task->map->max_offset, VM_MAP_NO_FLAGS); + vm_map_remove(task->map, task->map->min_offset, + task->map->max_offset, VM_MAP_NO_FLAGS); shared_region_mapping_dealloc(task->system_shared_region); /* * Flush working set here to avoid I/O in reaper thread */ - if(task->dynamic_working_set) - tws_hash_ws_flush((tws_hash_t) - task->dynamic_working_set); + if (task->dynamic_working_set) + tws_hash_ws_flush(task->dynamic_working_set); + + pset = task->processor_set; + pset_lock(pset); + pset_remove_task(pset,task); + pset_unlock(pset); /* * We no longer need to guard against being aborted, so restore @@ -686,44 +671,34 @@ task_terminate_internal( */ task_deallocate(task); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* - * task_halt - Shut the current task down (except for the current thread) in - * preparation for dramatic changes to the task (probably exec). - * We hold the task, terminate all other threads in the task and - * wait for them to terminate, clean up the portspace, and when - * all done, let the current thread go. + * task_halt: + * + * Shut the current task down (except for the current thread) in + * preparation for dramatic changes to the task (probably exec). + * We hold the task, terminate all other threads in the task and + * wait for them to terminate, clean up the portspace, and when + * all done, let the current thread go. */ kern_return_t task_halt( task_t task) { - thread_act_t thr_act, cur_thr_act; - task_t cur_task; + thread_t thread, self; assert(task != kernel_task); - cur_thr_act = current_act(); - cur_task = cur_thr_act->task; + self = current_thread(); - if (task != cur_task) { - return(KERN_INVALID_ARGUMENT); - } - -#if TASK_SWAPPER - /* - * If task is not resident (swapped out, or being swapped - * out), we want to bring it back in and make it unswappable. - * This can block, so do it early. - */ - task_swapin(task, TRUE); /* TRUE means make it unswappable */ -#endif /* TASK_SWAPPER */ + if (task != self->task) + return (KERN_INVALID_ARGUMENT); task_lock(task); - if (!task->active || !cur_thr_act->active) { + if (!task->active || !self->active) { /* * Task or current thread is already being terminated. * Hurry up and return out of the current kernel context @@ -731,7 +706,8 @@ task_halt( * ourselves. */ task_unlock(task); - return(KERN_FAILURE); + + return (KERN_FAILURE); } if (task->thread_count > 1) { @@ -744,18 +720,13 @@ task_halt( task_hold_locked(task); /* - * Terminate all the other activations in the task. - * - * Each terminated activation will run it's special handler - * when its current kernel context is unwound. That will - * clean up most of the thread resources. Then it will be - * handed over to the reaper, who will finally remove the - * thread from the task list and free the structures. + * Terminate all the other threads in the task. */ - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { - if (thr_act != cur_thr_act) - thread_terminate_internal(thr_act); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (thread != self) + thread_terminate_internal(thread); } + task_release_locked(task); } @@ -783,11 +754,10 @@ task_halt( * Clean out the address space, as we are going to be * getting a new one. */ - (void) vm_map_remove(task->map, - task->map->min_offset, - task->map->max_offset, VM_MAP_NO_FLAGS); + vm_map_remove(task->map, task->map->min_offset, + task->map->max_offset, VM_MAP_NO_FLAGS); - return KERN_SUCCESS; + return (KERN_SUCCESS); } /* @@ -801,9 +771,9 @@ task_halt( */ void task_hold_locked( - register task_t task) + register task_t task) { - register thread_act_t thr_act; + register thread_t thread; assert(task->active); @@ -811,12 +781,12 @@ task_hold_locked( return; /* - * Iterate through all the thread_act's and hold them. + * Iterate through all the threads and hold them. */ - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { - act_lock_thread(thr_act); - thread_hold(thr_act); - act_unlock_thread(thr_act); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + thread_mtx_lock(thread); + thread_hold(thread); + thread_mtx_unlock(thread); } } @@ -832,25 +802,29 @@ task_hold_locked( * CONDITIONS: the caller holds a reference on the task */ kern_return_t -task_hold(task_t task) +task_hold( + register task_t task) { - kern_return_t kret; - if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); + task_lock(task); + if (!task->active) { task_unlock(task); + return (KERN_FAILURE); } - task_hold_locked(task); - task_unlock(task); - return(KERN_SUCCESS); + task_hold_locked(task); + task_unlock(task); + + return (KERN_SUCCESS); } /* - * Routine: task_wait_locked + * task_wait_locked: + * * Wait for all threads in task to stop. * * Conditions: @@ -860,25 +834,21 @@ void task_wait_locked( register task_t task) { - register thread_act_t thr_act, cur_thr_act; + register thread_t thread, self; assert(task->active); assert(task->suspend_count > 0); - cur_thr_act = current_act(); + self = current_thread(); + /* - * Iterate through all the thread's and wait for them to + * Iterate through all the threads and wait for them to * stop. Do not wait for the current thread if it is within * the task. */ - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { - if (thr_act != cur_thr_act) { - thread_t thread; - - thread = act_lock_thread(thr_act); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (thread != self) thread_wait(thread); - act_unlock_thread(thr_act); - } } } @@ -891,9 +861,9 @@ task_wait_locked( */ void task_release_locked( - register task_t task) + register task_t task) { - register thread_act_t thr_act; + register thread_t thread; assert(task->active); assert(task->suspend_count > 0); @@ -901,15 +871,10 @@ task_release_locked( if (--task->suspend_count > 0) return; - /* - * Iterate through all the thread_act's and hold them. - * Do not hold the current thread_act if it is within the - * task. - */ - queue_iterate(&task->threads, thr_act, thread_act_t, task_threads) { - act_lock_thread(thr_act); - thread_release(thr_act); - act_unlock_thread(thr_act); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + thread_mtx_lock(thread); + thread_release(thread); + thread_mtx_unlock(thread); } } @@ -922,40 +887,41 @@ task_release_locked( * CONDITIONS: The caller holds a reference to the task */ kern_return_t -task_release(task_t task) +task_release( + task_t task) { - kern_return_t kret; - if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); + task_lock(task); + if (!task->active) { task_unlock(task); + return (KERN_FAILURE); } - task_release_locked(task); - task_unlock(task); - return(KERN_SUCCESS); + task_release_locked(task); + task_unlock(task); + + return (KERN_SUCCESS); } kern_return_t task_threads( - task_t task, - thread_act_array_t *thr_act_list, + task_t task, + thread_act_array_t *threads_out, mach_msg_type_number_t *count) { - unsigned int actual; /* this many thr_acts */ - thread_act_t thr_act; - thread_act_t *thr_acts; - thread_t thread; - int i, j; - - vm_size_t size, size_needed; - vm_offset_t addr; + mach_msg_type_number_t actual; + thread_t *threads; + thread_t thread; + vm_size_t size, size_needed; + void *addr; + unsigned int i, j; if (task == TASK_NULL) - return KERN_INVALID_ARGUMENT; + return (KERN_INVALID_ARGUMENT); size = 0; addr = 0; @@ -963,15 +929,17 @@ task_threads( task_lock(task); if (!task->active) { task_unlock(task); + if (size != 0) kfree(addr, size); - return KERN_FAILURE; + + return (KERN_FAILURE); } actual = task->thread_count; /* do we have the memory we need? */ - size_needed = actual * sizeof(mach_port_t); + size_needed = actual * sizeof (mach_port_t); if (size_needed <= size) break; @@ -986,72 +954,71 @@ task_threads( addr = kalloc(size); if (addr == 0) - return KERN_RESOURCE_SHORTAGE; + return (KERN_RESOURCE_SHORTAGE); } /* OK, have memory and the task is locked & active */ - thr_acts = (thread_act_t *) addr; - - for (i = j = 0, thr_act = (thread_act_t) queue_first(&task->threads); - i < actual; - i++, thr_act = (thread_act_t) queue_next(&thr_act->task_threads)) { - act_lock(thr_act); - if (thr_act->act_ref_count > 0) { - act_reference_locked(thr_act); - thr_acts[j++] = thr_act; - } - act_unlock(thr_act); + threads = (thread_t *)addr; + + i = j = 0; + + for (thread = (thread_t)queue_first(&task->threads); i < actual; + ++i, thread = (thread_t)queue_next(&thread->task_threads)) { + thread_reference_internal(thread); + threads[j++] = thread; } - assert(queue_end(&task->threads, (queue_entry_t) thr_act)); + + assert(queue_end(&task->threads, (queue_entry_t)thread)); actual = j; - size_needed = actual * sizeof(mach_port_t); + size_needed = actual * sizeof (mach_port_t); - /* can unlock task now that we've got the thr_act refs */ + /* can unlock task now that we've got the thread refs */ task_unlock(task); if (actual == 0) { - /* no thr_acts, so return null pointer and deallocate memory */ + /* no threads, so return null pointer and deallocate memory */ - *thr_act_list = 0; + *threads_out = 0; *count = 0; if (size != 0) kfree(addr, size); - } else { + } + else { /* if we allocated too much, must copy */ if (size_needed < size) { - vm_offset_t newaddr; + void *newaddr; newaddr = kalloc(size_needed); if (newaddr == 0) { - for (i = 0; i < actual; i++) - act_deallocate(thr_acts[i]); + for (i = 0; i < actual; ++i) + thread_deallocate(threads[i]); kfree(addr, size); - return KERN_RESOURCE_SHORTAGE; + return (KERN_RESOURCE_SHORTAGE); } - bcopy((char *) addr, (char *) newaddr, size_needed); + bcopy(addr, newaddr, size_needed); kfree(addr, size); - thr_acts = (thread_act_t *) newaddr; + threads = (thread_t *)newaddr; } - *thr_act_list = thr_acts; + *threads_out = threads; *count = actual; /* do the conversion that Mig should handle */ - for (i = 0; i < actual; i++) - ((ipc_port_t *) thr_acts)[i] = - convert_act_to_port(thr_acts[i]); + for (i = 0; i < actual; ++i) + ((ipc_port_t *) threads)[i] = convert_thread_to_port(threads[i]); } - return KERN_SUCCESS; + return (KERN_SUCCESS); } /* - * Routine: task_suspend + * task_suspend: + * * Implement a user-level suspension on a task. * * Conditions: @@ -1061,20 +1028,24 @@ kern_return_t task_suspend( register task_t task) { - if (task == TASK_NULL) + if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); task_lock(task); + if (!task->active) { task_unlock(task); + return (KERN_FAILURE); } - if ((task->user_stop_count)++ > 0) { + + if (task->user_stop_count++ > 0) { /* * If the stop count was positive, the task is * already stopped and we can exit. */ task_unlock(task); + return (KERN_SUCCESS); } @@ -1086,38 +1057,44 @@ task_suspend( */ task_hold_locked(task); task_wait_locked(task); + task_unlock(task); + return (KERN_SUCCESS); } /* - * Routine: task_resume + * task_resume: * Release a kernel hold on a task. * * Conditions: * The caller holds a reference to the task */ kern_return_t -task_resume(register task_t task) +task_resume( + register task_t task) { - register boolean_t release; + register boolean_t release = FALSE; - if (task == TASK_NULL) - return(KERN_INVALID_ARGUMENT); + if (task == TASK_NULL || task == kernel_task) + return (KERN_INVALID_ARGUMENT); - release = FALSE; task_lock(task); + if (!task->active) { task_unlock(task); - return(KERN_FAILURE); + + return (KERN_FAILURE); } + if (task->user_stop_count > 0) { - if (--(task->user_stop_count) == 0) - release = TRUE; + if (--task->user_stop_count == 0) + release = TRUE; } else { task_unlock(task); - return(KERN_FAILURE); + + return (KERN_FAILURE); } /* @@ -1127,7 +1104,8 @@ task_resume(register task_t task) task_release_locked(task); task_unlock(task); - return(KERN_SUCCESS); + + return (KERN_SUCCESS); } kern_return_t @@ -1200,11 +1178,9 @@ kern_return_t task_set_info( task_t task, task_flavor_t flavor, - task_info_t task_info_in, /* pointer to IN array */ - mach_msg_type_number_t task_info_count) + __unused task_info_t task_info_in, /* pointer to IN array */ + __unused mach_msg_type_number_t task_info_count) { - vm_map_t map; - if (task == TASK_NULL) return(KERN_INVALID_ARGUMENT); @@ -1217,32 +1193,28 @@ task_set_info( kern_return_t task_info( - task_t task, - task_flavor_t flavor, - task_info_t task_info_out, + task_t task, + task_flavor_t flavor, + task_info_t task_info_out, mach_msg_type_number_t *task_info_count) { - thread_t thread; - vm_map_t map; - if (task == TASK_NULL) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); switch (flavor) { - case TASK_BASIC_INFO: - { - register task_basic_info_t basic_info; - - if (*task_info_count < TASK_BASIC_INFO_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + case TASK_BASIC_INFO_32: + { + task_basic_info_32_t basic_info; + vm_map_t map; - basic_info = (task_basic_info_t) task_info_out; + if (*task_info_count < TASK_BASIC_INFO_32_COUNT) + return (KERN_INVALID_ARGUMENT); - map = (task == kernel_task) ? kernel_map : task->map; + basic_info = (task_basic_info_32_t)task_info_out; - basic_info->virtual_size = map->size; + map = (task == kernel_task)? kernel_map: task->map; + basic_info->virtual_size = CAST_DOWN(vm_offset_t,map->size); basic_info->resident_size = pmap_resident_count(map->pmap) * PAGE_SIZE; @@ -1250,29 +1222,62 @@ task_info( basic_info->policy = ((task != kernel_task)? POLICY_TIMESHARE: POLICY_RR); basic_info->suspend_count = task->user_stop_count; - basic_info->user_time.seconds - = task->total_user_time.seconds; - basic_info->user_time.microseconds - = task->total_user_time.microseconds; - basic_info->system_time.seconds - = task->total_system_time.seconds; - basic_info->system_time.microseconds - = task->total_system_time.microseconds; + + absolutetime_to_microtime( + task->total_user_time, + &basic_info->user_time.seconds, + &basic_info->user_time.microseconds); + absolutetime_to_microtime( + task->total_system_time, + &basic_info->system_time.seconds, + &basic_info->system_time.microseconds); task_unlock(task); - *task_info_count = TASK_BASIC_INFO_COUNT; + *task_info_count = TASK_BASIC_INFO_32_COUNT; break; - } + } - case TASK_THREAD_TIMES_INFO: - { - register task_thread_times_info_t times_info; - register thread_t thread; - register thread_act_t thr_act; + case TASK_BASIC_INFO_64: + { + task_basic_info_64_t basic_info; + vm_map_t map; - if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) { + if (*task_info_count < TASK_BASIC_INFO_64_COUNT) + return (KERN_INVALID_ARGUMENT); + + basic_info = (task_basic_info_64_t)task_info_out; + + map = (task == kernel_task)? kernel_map: task->map; + basic_info->virtual_size = map->size; + basic_info->resident_size = (mach_vm_size_t)(pmap_resident_count(map->pmap) + * PAGE_SIZE); + + task_lock(task); + basic_info->policy = ((task != kernel_task)? + POLICY_TIMESHARE: POLICY_RR); + basic_info->suspend_count = task->user_stop_count; + + absolutetime_to_microtime( + task->total_user_time, + &basic_info->user_time.seconds, + &basic_info->user_time.microseconds); + absolutetime_to_microtime( + task->total_system_time, + &basic_info->system_time.seconds, + &basic_info->system_time.microseconds); + task_unlock(task); + + *task_info_count = TASK_BASIC_INFO_64_COUNT; + break; + } + + case TASK_THREAD_TIMES_INFO: + { + register task_thread_times_info_t times_info; + register thread_t thread; + + if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) return (KERN_INVALID_ARGUMENT); - } times_info = (task_thread_times_info_t) task_info_out; times_info->user_time.seconds = 0; @@ -1281,83 +1286,105 @@ task_info( times_info->system_time.microseconds = 0; task_lock(task); - queue_iterate(&task->threads, thr_act, - thread_act_t, task_threads) - { - time_value_t user_time, system_time; - spl_t s; - - thread = act_lock_thread(thr_act); - /* JMM - add logic to skip threads that have migrated - * into this task? - */ - - assert(thread); /* Must have thread */ - s = splsched(); - thread_lock(thread); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + time_value_t user_time, system_time; thread_read_times(thread, &user_time, &system_time); - thread_unlock(thread); - splx(s); - act_unlock_thread(thr_act); - time_value_add(×_info->user_time, &user_time); time_value_add(×_info->system_time, &system_time); } + task_unlock(task); *task_info_count = TASK_THREAD_TIMES_INFO_COUNT; break; - } + } + + case TASK_ABSOLUTETIME_INFO: + { + task_absolutetime_info_t info; + register thread_t thread; + + if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) + return (KERN_INVALID_ARGUMENT); + + info = (task_absolutetime_info_t)task_info_out; + info->threads_user = info->threads_system = 0; + + task_lock(task); + + info->total_user = task->total_user_time; + info->total_system = task->total_system_time; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + uint64_t tval; + + tval = timer_grab(&thread->user_timer); + info->threads_user += tval; + info->total_user += tval; + + tval = timer_grab(&thread->system_timer); + info->threads_system += tval; + info->total_system += tval; + } + + task_unlock(task); + + *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT; + break; + } - case TASK_SCHED_FIFO_INFO: - { + /* OBSOLETE */ + case TASK_SCHED_FIFO_INFO: + { if (*task_info_count < POLICY_FIFO_BASE_COUNT) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); - return(KERN_INVALID_POLICY); - } + return (KERN_INVALID_POLICY); + } - case TASK_SCHED_RR_INFO: - { + /* OBSOLETE */ + case TASK_SCHED_RR_INFO: + { register policy_rr_base_t rr_base; if (*task_info_count < POLICY_RR_BASE_COUNT) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); rr_base = (policy_rr_base_t) task_info_out; task_lock(task); if (task != kernel_task) { task_unlock(task); - return(KERN_INVALID_POLICY); + return (KERN_INVALID_POLICY); } rr_base->base_priority = task->priority; task_unlock(task); - rr_base->quantum = tick / 1000; + rr_base->quantum = std_quantum_us / 1000; *task_info_count = POLICY_RR_BASE_COUNT; break; - } + } - case TASK_SCHED_TIMESHARE_INFO: - { + /* OBSOLETE */ + case TASK_SCHED_TIMESHARE_INFO: + { register policy_timeshare_base_t ts_base; if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) - return(KERN_INVALID_ARGUMENT); + return (KERN_INVALID_ARGUMENT); ts_base = (policy_timeshare_base_t) task_info_out; task_lock(task); if (task == kernel_task) { task_unlock(task); - return(KERN_INVALID_POLICY); + return (KERN_INVALID_POLICY); } ts_base->base_priority = task->priority; @@ -1365,15 +1392,14 @@ task_info( *task_info_count = POLICY_TIMESHARE_BASE_COUNT; break; - } + } - case TASK_SECURITY_TOKEN: - { - register security_token_t *sec_token_p; + case TASK_SECURITY_TOKEN: + { + register security_token_t *sec_token_p; - if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) + return (KERN_INVALID_ARGUMENT); sec_token_p = (security_token_t *) task_info_out; @@ -1382,16 +1408,15 @@ task_info( task_unlock(task); *task_info_count = TASK_SECURITY_TOKEN_COUNT; - break; - } + break; + } - case TASK_AUDIT_TOKEN: - { - register audit_token_t *audit_token_p; + case TASK_AUDIT_TOKEN: + { + register audit_token_t *audit_token_p; - if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) + return (KERN_INVALID_ARGUMENT); audit_token_p = (audit_token_t *) task_info_out; @@ -1400,19 +1425,18 @@ task_info( task_unlock(task); *task_info_count = TASK_AUDIT_TOKEN_COUNT; - break; - } + break; + } - case TASK_SCHED_INFO: - return(KERN_INVALID_ARGUMENT); + case TASK_SCHED_INFO: + return (KERN_INVALID_ARGUMENT); - case TASK_EVENTS_INFO: - { + case TASK_EVENTS_INFO: + { register task_events_info_t events_info; - if (*task_info_count < TASK_EVENTS_INFO_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + if (*task_info_count < TASK_EVENTS_INFO_COUNT) + return (KERN_INVALID_ARGUMENT); events_info = (task_events_info_t) task_info_out; @@ -1429,13 +1453,13 @@ task_info( *task_info_count = TASK_EVENTS_INFO_COUNT; break; - } + } - default: + default: return (KERN_INVALID_ARGUMENT); } - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1445,13 +1469,10 @@ task_info( */ kern_return_t task_assign( - task_t task, - processor_set_t new_pset, - boolean_t assign_threads) + __unused task_t task, + __unused processor_set_t new_pset, + __unused boolean_t assign_threads) { -#ifdef lint - task++; new_pset++; assign_threads++; -#endif /* lint */ return(KERN_FAILURE); } @@ -1496,12 +1517,12 @@ task_get_assignment( */ kern_return_t task_policy( - task_t task, - policy_t policy_id, - policy_base_t base, - mach_msg_type_number_t count, - boolean_t set_limit, - boolean_t change) + __unused task_t task, + __unused policy_t policy_id, + __unused policy_base_t base, + __unused mach_msg_type_number_t count, + __unused boolean_t set_limit, + __unused boolean_t change) { return(KERN_FAILURE); } @@ -1516,110 +1537,25 @@ task_policy( */ kern_return_t task_set_policy( - task_t task, - processor_set_t pset, - policy_t policy_id, - policy_base_t base, - mach_msg_type_number_t base_count, - policy_limit_t limit, - mach_msg_type_number_t limit_count, - boolean_t change) + __unused task_t task, + __unused processor_set_t pset, + __unused policy_t policy_id, + __unused policy_base_t base, + __unused mach_msg_type_number_t base_count, + __unused policy_limit_t limit, + __unused mach_msg_type_number_t limit_count, + __unused boolean_t change) { return(KERN_FAILURE); } -/* - * task_collect_scan: - * - * Attempt to free resources owned by tasks. - */ - -void -task_collect_scan(void) -{ - register task_t task, prev_task; - processor_set_t pset = &default_pset; - - pset_lock(pset); - pset->ref_count++; - task = (task_t) queue_first(&pset->tasks); - while (!queue_end(&pset->tasks, (queue_entry_t) task)) { - task_lock(task); - if (task->ref_count > 0) { - - task_reference_locked(task); - task_unlock(task); - -#if MACH_HOST - /* - * While we still have the pset locked, freeze the task in - * this pset. That way, when we get back from collecting - * it, we can dereference the pset_tasks chain for the task - * and be assured that we are still in this chain. - */ - task_freeze(task); -#endif - - pset_unlock(pset); - - pmap_collect(task->map->pmap); - - pset_lock(pset); - prev_task = task; - task = (task_t) queue_next(&task->pset_tasks); - -#if MACH_HOST - task_unfreeze(prev_task); -#endif - - task_deallocate(prev_task); - } else { - task_unlock(task); - task = (task_t) queue_next(&task->pset_tasks); - } - } - - pset_unlock(pset); - - pset_deallocate(pset); -} - -/* Also disabled in vm/vm_pageout.c */ -boolean_t task_collect_allowed = FALSE; -unsigned task_collect_last_tick = 0; -unsigned task_collect_max_rate = 0; /* in ticks */ - -/* - * consider_task_collect: - * - * Called by the pageout daemon when the system needs more free pages. - */ - -void -consider_task_collect(void) -{ - /* - * By default, don't attempt task collection more frequently - * than once per second. - */ - - if (task_collect_max_rate == 0) - task_collect_max_rate = (1 << SCHED_TICK_SHIFT) + 1; - - if (task_collect_allowed && - (sched_tick > (task_collect_last_tick + task_collect_max_rate))) { - task_collect_last_tick = sched_tick; - task_collect_scan(); - } -} - +#if FAST_TAS kern_return_t task_set_ras_pc( task_t task, vm_offset_t pc, vm_offset_t endpc) { -#if FAST_TAS extern int fast_tas_debug; if (fast_tas_debug) { @@ -1631,18 +1567,17 @@ task_set_ras_pc( task->fast_tas_end = endpc; task_unlock(task); return KERN_SUCCESS; - +} #else /* FAST_TAS */ -#ifdef lint - task++; - pc++; - endpc++; -#endif /* lint */ - +kern_return_t +task_set_ras_pc( + __unused task_t task, + __unused vm_offset_t pc, + __unused vm_offset_t endpc) +{ return KERN_FAILURE; - -#endif /* FAST_TAS */ } +#endif /* FAST_TAS */ void task_synchronizer_destroy_all(task_t task) @@ -1669,45 +1604,6 @@ task_synchronizer_destroy_all(task_t task) } } -/* - * task_set_port_space: - * - * Set port name space of task to specified size. - */ - -kern_return_t -task_set_port_space( - task_t task, - int table_entries) -{ - kern_return_t kr; - - is_write_lock(task->itk_space); - kr = ipc_entry_grow_table(task->itk_space, table_entries); - if (kr == KERN_SUCCESS) - is_write_unlock(task->itk_space); - return kr; -} - -/* - * Routine: - * task_is_classic - * Purpose: - * Returns true if the task is a P_CLASSIC task. - */ -boolean_t -task_is_classic( - task_t task) -{ - boolean_t result = FALSE; - - if (task) { - struct proc *p = get_bsdtask_info(task); - result = proc_is_classic(p) ? TRUE : FALSE; - } - return result; -} - /* * We need to export some functions to other components that * are currently implemented in macros within the osfmk @@ -1722,7 +1618,18 @@ boolean_t is_kerneltask(task_t t) } #undef current_task -task_t current_task() +task_t current_task(void); +task_t current_task(void) { return (current_task_fast()); } + +#undef task_reference +void task_reference(task_t task); +void +task_reference( + task_t task) +{ + if (task != TASK_NULL) + task_reference_internal(task); +} diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 3e4c87b3c..050f66906 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -80,9 +80,7 @@ #include #include -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE @@ -93,19 +91,17 @@ #include #include #include -#include -#include + +#include #include #include #include -#include -#include +#include + #include -#include -#include -#include +#include -typedef struct task { +struct task { /* Synchronization/destruction information */ decl_mutex_data(,lock) /* Task's lock */ int ref_count; /* Number of references to me */ @@ -117,22 +113,9 @@ typedef struct task { void *user_data; /* Arbitrary data settable via IPC */ int suspend_count; /* Internal scheduling only */ -#if TASK_SWAPPER - /* Task swapper data */ - unsigned short swap_state; /* swap state (e.g. IN/OUT) */ - unsigned short swap_flags; /* swap flags (e.g. MAKE_UNSWAPP) */ - unsigned int swap_stamp; /* when last swapped */ - unsigned long swap_rss; /* size (pages) when last swapped */ - int swap_ast_waiting; /* number of threads that have not */ - /* reached a clean point and halted */ - int swap_nswap; /* number of times this task swapped */ - queue_chain_t swapped_tasks; /* list of non-resident tasks */ -#endif /* TASK_SWAPPER */ - /* Threads in this task */ queue_head_t threads; int thread_count; - int res_thread_count; int active_thread_count; processor_set_t processor_set; /* processor set for new threads */ @@ -154,8 +137,8 @@ typedef struct task { audit_token_t audit_token; /* Statistics */ - time_value_t total_user_time; /* user time for dead threads */ - time_value_t total_system_time; /* system time for dead threads */ + uint64_t total_user_time; /* terminated threads only */ + uint64_t total_system_time; #if MACH_PROF boolean_t task_profiled; /* is task being profiled ? */ @@ -181,22 +164,12 @@ typedef struct task { int semaphores_owned; /* number of semaphores owned */ int lock_sets_owned; /* number of lock sets owned */ - /* User space system call emulation support */ - struct eml_dispatch *eml_dispatch; - - /* Ledgers */ + /* Ledgers */ struct ipc_port *wired_ledger_port; struct ipc_port *paged_ledger_port; - unsigned long priv_flags; /* privelege resource flags */ + unsigned int priv_flags; /* privilege resource flags */ +#define VM_BACKING_STORE_PRIV 0x1 -#if NORMA_TASK - long child_node; /* if != -1, node for new children */ -#endif /* NORMA_TASK */ -#if FAST_TAS - vm_offset_t fast_tas_base; - vm_offset_t fast_tas_end; -#endif /* FAST_TAS */ - MACHINE_TASK integer_t faults; /* faults counter */ integer_t pageins; /* pageins counter */ integer_t cow_faults; /* copy on write fault counter */ @@ -208,93 +181,129 @@ typedef struct task { #ifdef MACH_BSD void *bsd_info; #endif - vm_offset_t system_shared_region; - vm_offset_t dynamic_working_set; + struct shared_region_mapping *system_shared_region; + struct tws_hash *dynamic_working_set; uint32_t taskFeatures[2]; /* Special feature for this task */ #define tf64BitAddr 0x80000000 /* Task has 64-bit addressing */ #define tf64BitData 0x40000000 /* Task has 64-bit data registers */ -} Task; +#define task_has_64BitAddr(task) \ + (((task)->taskFeatures[0] & tf64BitAddr) != 0) +#define task_set_64BitAddr(task) \ + ((task)->taskFeatures[0] |= tf64BitAddr) +#define task_clear_64BitAddr(task) \ + ((task)->taskFeatures[0] &= ~tf64BitAddr) + +}; #define task_lock(task) mutex_lock(&(task)->lock) #define task_lock_try(task) mutex_try(&(task)->lock) #define task_unlock(task) mutex_unlock(&(task)->lock) -#define itk_lock_init(task) mutex_init(&(task)->itk_lock_data, \ - ETAP_THREAD_TASK_ITK) +#define itk_lock_init(task) mutex_init(&(task)->itk_lock_data, 0) #define itk_lock(task) mutex_lock(&(task)->itk_lock_data) #define itk_unlock(task) mutex_unlock(&(task)->itk_lock_data) -#define task_reference_locked(task) ((task)->ref_count++) +#define task_reference_internal(task) \ + hw_atomic_add(&(task)->ref_count, 1) -/* - * priv_flags definitions - */ -#define VM_BACKING_STORE_PRIV 0x1 +#define task_deallocate_internal(task) \ + hw_atomic_sub(&(task)->ref_count, 1) -/* - * Internal only routines - */ +#define task_reference(task) \ +MACRO_BEGIN \ + if ((task) != TASK_NULL) \ + task_reference_internal(task); \ +MACRO_END -extern void task_backing_store_privileged( - task_t task); +extern kern_return_t kernel_task_create( + task_t task, + vm_offset_t map_base, + vm_size_t map_size, + task_t *child); /* Initialize task module */ extern void task_init(void); -/* task create */ -extern kern_return_t task_create_internal( - task_t parent_task, - boolean_t inherit_memory, - task_t *child_task); /* OUT */ +#define current_task_fast() (current_thread()->task) +#define current_task() current_task_fast() -extern void consider_task_collect(void); +#else /* MACH_KERNEL_PRIVATE */ -#define current_task_fast() (current_act_fast()->task) -#define current_task() current_task_fast() +__BEGIN_DECLS + +extern task_t current_task(void); + +extern void task_reference(task_t task); + +__END_DECLS #endif /* MACH_KERNEL_PRIVATE */ -extern task_t kernel_task; +__BEGIN_DECLS -/* Temporarily hold all threads in a task */ +#ifdef XNU_KERNEL_PRIVATE + +/* Hold all threads in a task */ extern kern_return_t task_hold( - task_t task); + task_t task); -/* Release temporary hold on all threads in a task */ +/* Release hold on all threads in a task */ extern kern_return_t task_release( - task_t task); + task_t task); -/* Get a task prepared for major changes */ +/* Halt all other threads in the current task */ extern kern_return_t task_halt( - task_t task); + task_t task); + +extern kern_return_t task_terminate_internal( + task_t task); + +extern kern_return_t task_create_internal( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task); /* OUT */ -#if defined(MACH_KERNEL_PRIVATE) || defined(BSD_BUILD) extern kern_return_t task_importance( task_t task, integer_t importance); -#endif + +extern void task_set_64bit( + task_t task, + boolean_t is64bit); + +extern void task_backing_store_privileged( + task_t task); + +extern void task_working_set_disable( + task_t task); + +/* Get number of activations in a task */ +extern int get_task_numacts( + task_t task); + /* JMM - should just be temporary (implementation in bsd_kern still) */ -extern void *get_bsdtask_info(task_t); extern void set_bsdtask_info(task_t,void *); -extern vm_map_t get_task_map(task_t); +extern vm_map_t get_task_map_reference(task_t); extern vm_map_t swap_task_map(task_t, vm_map_t); extern pmap_t get_task_pmap(task_t); -extern boolean_t task_reference_try(task_t task); +extern boolean_t is_kerneltask(task_t task); -#endif /* __APPLE_API_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ -#if !defined(MACH_KERNEL_PRIVATE) +#ifdef KERNEL_PRIVATE -extern task_t current_task(void); +extern void *get_bsdtask_info(task_t); +extern vm_map_t get_task_map(task_t); -#endif /* MACH_KERNEL_TASK */ +#endif /* KERNEL_PRIVATE */ -/* Take reference on task (make sure it doesn't go away) */ -extern void task_reference(task_t task); +extern task_t kernel_task; + +extern void task_deallocate( + task_t task); -/* Remove reference to task */ -extern void task_deallocate(task_t task); +__END_DECLS #endif /* _KERN_TASK_H_ */ diff --git a/osfmk/kern/task_policy.c b/osfmk/kern/task_policy.c index c1657ea5e..a0029e1df 100644 --- a/osfmk/kern/task_policy.c +++ b/osfmk/kern/task_policy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,15 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 15 October 2000 (debo) - * Created. - */ +#include +#include + +#include #include static void @@ -99,7 +95,7 @@ task_policy_set( task->sec_token.val[0] != 0 ) result = KERN_INVALID_ARGUMENT; else { - task_priority(task, MAXPRI_SYSTEM - 3, MAXPRI_SYSTEM); + task_priority(task, MAXPRI_RESERVED - 3, MAXPRI_RESERVED); task->role = info->role; } } @@ -125,7 +121,7 @@ task_priority( integer_t priority, integer_t max_priority) { - thread_act_t act; + thread_t thread; task->max_priority = max_priority; @@ -137,13 +133,13 @@ task_priority( task->priority = priority; - queue_iterate(&task->threads, act, thread_act_t, task_threads) { - thread_t thread = act_lock_thread(act); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + thread_mtx_lock(thread); - if (act->active) + if (thread->active) thread_task_priority(thread, priority, max_priority); - act_unlock_thread(act); + thread_mtx_unlock(thread); } } diff --git a/osfmk/kern/task_swap.c b/osfmk/kern/task_swap.c index a4029b496..e7767de9b 100644 --- a/osfmk/kern/task_swap.c +++ b/osfmk/kern/task_swap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -55,7 +54,7 @@ kern_return_t task_swappable( host_priv_t host_priv, task_t task, - boolean_t make_swappable) + __unused boolean_t make_swappable) { if (host_priv == HOST_PRIV_NULL) return (KERN_INVALID_ARGUMENT); diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 85ebb7dcf..0c181bfb6 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,7 +54,7 @@ * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub * Date: 1986 * - * Thread/thread_shuttle management primitives implementation. + * Thread management primitives implementation. */ /* * Copyright (c) 1993 The University of Utah and @@ -75,12 +75,10 @@ * */ -#include #include -#include -#include #include +#include #include #include #include @@ -88,10 +86,13 @@ #include #include #include -#include + +#include + +#include +#include #include #include -#include #include #include #include @@ -101,577 +102,435 @@ #include #include #include -#include /*** ??? fix so this can be removed ***/ +#include +#include #include #include -#include -#include #include #include -#include -#include -#include -#include /* for MACHINE_STACK */ #include #include + +#include +#include + +#include +#include + #include /* * Exported interfaces */ - +#include #include #include +#include static struct zone *thread_zone; -static queue_head_t reaper_queue; -decl_simple_lock_data(static,reaper_lock) +decl_simple_lock_data(static,thread_stack_lock) +static queue_head_t thread_stack_queue; -extern int tick; +decl_simple_lock_data(static,thread_terminate_lock) +static queue_head_t thread_terminate_queue; -/* private */ static struct thread thread_template, init_thread; -#if MACH_DEBUG - -#ifdef MACHINE_STACK -extern void stack_statistics( - unsigned int *totalp, - vm_size_t *maxusagep); -#endif /* MACHINE_STACK */ -#endif /* MACH_DEBUG */ - -#ifdef MACHINE_STACK -/* - * Machine-dependent code must define: - * stack_alloc_try - * stack_alloc - * stack_free - * stack_free_stack - * stack_collect - * and if MACH_DEBUG: - * stack_statistics - */ -#else /* MACHINE_STACK */ -/* - * We allocate stacks from generic kernel VM. - * Machine-dependent code must define: - * machine_kernel_stack_init - * - * The stack_free_list can only be accessed at splsched, - * because stack_alloc_try/thread_invoke operate at splsched. - */ - -decl_simple_lock_data(static,stack_lock_data) -#define stack_lock() simple_lock(&stack_lock_data) -#define stack_unlock() simple_unlock(&stack_lock_data) - -static vm_map_t stack_map; -static vm_offset_t stack_free_list; +#ifdef MACH_BSD +extern void proc_exit(void *); +#endif /* MACH_BSD */ -static vm_offset_t stack_free_cache[NCPUS]; +void +thread_bootstrap(void) +{ + /* + * Fill in a template thread for fast initialization. + */ -unsigned int stack_free_max = 0; -unsigned int stack_free_count = 0; /* splsched only */ -unsigned int stack_free_limit = 1; /* Arbitrary */ + thread_template.runq = RUN_QUEUE_NULL; -unsigned int stack_cache_hits = 0; /* debugging */ + thread_template.ref_count = 2; -unsigned int stack_alloc_hits = 0; /* debugging */ -unsigned int stack_alloc_misses = 0; /* debugging */ + thread_template.reason = AST_NONE; + thread_template.at_safe_point = FALSE; + thread_template.wait_event = NO_EVENT64; + thread_template.wait_queue = WAIT_QUEUE_NULL; + thread_template.wait_result = THREAD_WAITING; + thread_template.options = THREAD_ABORTSAFE; + thread_template.state = TH_WAIT | TH_UNINT; + thread_template.wake_active = FALSE; + thread_template.continuation = THREAD_CONTINUE_NULL; + thread_template.parameter = NULL; -unsigned int stack_alloc_total = 0; -unsigned int stack_alloc_hiwater = 0; -unsigned int stack_alloc_bndry = 0; + thread_template.importance = 0; + thread_template.sched_mode = 0; + thread_template.safe_mode = 0; + thread_template.safe_release = 0; + thread_template.priority = 0; + thread_template.sched_pri = 0; + thread_template.max_priority = 0; + thread_template.task_priority = 0; + thread_template.promotions = 0; + thread_template.pending_promoter_index = 0; + thread_template.pending_promoter[0] = + thread_template.pending_promoter[1] = NULL; -/* - * The next field is at the base of the stack, - * so the low end is left unsullied. - */ + thread_template.realtime.deadline = UINT64_MAX; -#define stack_next(stack) (*((vm_offset_t *)((stack) + KERNEL_STACK_SIZE) - 1)) + thread_template.current_quantum = 0; -/* - * stack_alloc: - * - * Allocate a kernel stack for a thread. - * May block. - */ -vm_offset_t -stack_alloc( - thread_t thread, - void (*start_pos)(thread_t)) -{ - vm_offset_t stack = thread->kernel_stack; - spl_t s; + thread_template.computation_metered = 0; + thread_template.computation_epoch = 0; - if (stack) - return (stack); + thread_template.sched_stamp = 0; + thread_template.sched_usage = 0; + thread_template.pri_shift = INT8_MAX; + thread_template.cpu_usage = thread_template.cpu_delta = 0; - s = splsched(); - stack_lock(); - stack = stack_free_list; - if (stack != 0) { - stack_free_list = stack_next(stack); - stack_free_count--; - } - stack_unlock(); - splx(s); + thread_template.bound_processor = PROCESSOR_NULL; + thread_template.last_processor = PROCESSOR_NULL; + thread_template.last_switch = 0; - if (stack != 0) { - machine_stack_attach(thread, stack, start_pos); - return (stack); - } - - if (kernel_memory_allocate( - stack_map, &stack, - KERNEL_STACK_SIZE, stack_alloc_bndry - 1, - KMA_KOBJECT) != KERN_SUCCESS) - panic("stack_alloc: no space left for stack maps"); - - stack_alloc_total++; - if (stack_alloc_total > stack_alloc_hiwater) - stack_alloc_hiwater = stack_alloc_total; - - machine_stack_attach(thread, stack, start_pos); - return (stack); -} + timer_init(&thread_template.user_timer); + timer_init(&thread_template.system_timer); + thread_template.user_timer_save = 0; + thread_template.system_timer_save = 0; -/* - * stack_free: - * - * Free a kernel stack. - */ + thread_template.wait_timer_is_set = FALSE; + thread_template.wait_timer_active = 0; -void -stack_free( - thread_t thread) -{ - vm_offset_t stack = machine_stack_detach(thread); + thread_template.depress_timer_active = 0; - assert(stack); - if (stack != thread->reserved_stack) { - spl_t s = splsched(); - vm_offset_t *cache; + thread_template.processor_set = PROCESSOR_SET_NULL; - cache = &stack_free_cache[cpu_number()]; - if (*cache == 0) { - *cache = stack; - splx(s); + thread_template.special_handler.handler = special_handler; + thread_template.special_handler.next = 0; - return; - } +#if MACH_HOST + thread_template.may_assign = TRUE; + thread_template.assign_active = FALSE; +#endif /* MACH_HOST */ + thread_template.funnel_lock = THR_FUNNEL_NULL; + thread_template.funnel_state = 0; + thread_template.recover = (vm_offset_t)NULL; - stack_lock(); - stack_next(stack) = stack_free_list; - stack_free_list = stack; - if (++stack_free_count > stack_free_max) - stack_free_max = stack_free_count; - stack_unlock(); - splx(s); - } + init_thread = thread_template; + machine_set_current_thread(&init_thread); } void -stack_free_stack( - vm_offset_t stack) +thread_init(void) { - spl_t s = splsched(); - vm_offset_t *cache; + thread_zone = zinit( + sizeof(struct thread), + THREAD_MAX * sizeof(struct thread), + THREAD_CHUNK * sizeof(struct thread), + "threads"); - cache = &stack_free_cache[cpu_number()]; - if (*cache == 0) { - *cache = stack; - splx(s); + stack_init(); - return; - } + /* + * Initialize any machine-dependent + * per-thread structures necessary. + */ + machine_thread_init(); +} - stack_lock(); - stack_next(stack) = stack_free_list; - stack_free_list = stack; - if (++stack_free_count > stack_free_max) - stack_free_max = stack_free_count; - stack_unlock(); - splx(s); +static void +thread_terminate_continue(void) +{ + panic("thread_terminate_continue"); + /*NOTREACHED*/ } /* - * stack_collect: - * - * Free excess kernel stacks. - * May block. + * thread_terminate_self: */ - void -stack_collect(void) +thread_terminate_self(void) { - spl_t s = splsched(); - - stack_lock(); - while (stack_free_count > stack_free_limit) { - vm_offset_t stack = stack_free_list; + thread_t thread = current_thread(); + task_t task; + spl_t s; - stack_free_list = stack_next(stack); - stack_free_count--; - stack_unlock(); - splx(s); + s = splsched(); + thread_lock(thread); - if (vm_map_remove( - stack_map, stack, stack + KERNEL_STACK_SIZE, - VM_MAP_REMOVE_KUNWIRE) != KERN_SUCCESS) - panic("stack_collect: vm_map_remove failed"); + /* + * Cancel priority depression, reset scheduling parameters, + * and wait for concurrent expirations on other processors. + */ + if (thread->sched_mode & TH_MODE_ISDEPRESSED) { + thread->sched_mode &= ~TH_MODE_ISDEPRESSED; - s = splsched(); - stack_lock(); - stack_alloc_total--; + if (timer_call_cancel(&thread->depress_timer)) + thread->depress_timer_active--; } - stack_unlock(); - splx(s); -} -/* - * stack_alloc_try: - * - * Non-blocking attempt to allocate a kernel stack. - * Called at splsched with the thread locked. - */ + thread_policy_reset(thread); -boolean_t stack_alloc_try( - thread_t thread, - void (*start)(thread_t)) -{ - register vm_offset_t stack, *cache; + while (thread->depress_timer_active > 0) { + thread_unlock(thread); + splx(s); - cache = &stack_free_cache[cpu_number()]; - if (stack = *cache) { - *cache = 0; - machine_stack_attach(thread, stack, start); - stack_cache_hits++; + delay(1); - return (TRUE); + s = splsched(); + thread_lock(thread); } - stack_lock(); - stack = stack_free_list; - if (stack != (vm_offset_t)0) { - stack_free_list = stack_next(stack); - stack_free_count--; - } - stack_unlock(); + thread_unlock(thread); + splx(s); - if (stack == 0) - stack = thread->reserved_stack; + thread_mtx_lock(thread); - if (stack != 0) { - machine_stack_attach(thread, stack, start); - stack_alloc_hits++; + ulock_release_all(thread); - return (TRUE); - } - else { - stack_alloc_misses++; + ipc_thread_disable(thread); + + thread_mtx_unlock(thread); - return (FALSE); - } -} + /* + * If we are the last thread to terminate and the task is + * associated with a BSD process, perform BSD process exit. + */ + task = thread->task; + if ( hw_atomic_sub(&task->active_thread_count, 1) == 0 && + task->bsd_info != NULL ) + proc_exit(task->bsd_info); -#if MACH_DEBUG -/* - * stack_statistics: - * - * Return statistics on cached kernel stacks. - * *maxusagep must be initialized by the caller. - */ + s = splsched(); + thread_lock(thread); -void -stack_statistics( - unsigned int *totalp, - vm_size_t *maxusagep) -{ - spl_t s; + /* + * Cancel wait timer, and wait for + * concurrent expirations. + */ + if (thread->wait_timer_is_set) { + thread->wait_timer_is_set = FALSE; - s = splsched(); - stack_lock(); + if (timer_call_cancel(&thread->wait_timer)) + thread->wait_timer_active--; + } - *totalp = stack_free_count; - *maxusagep = 0; + while (thread->wait_timer_active > 0) { + thread_unlock(thread); + splx(s); - stack_unlock(); - splx(s); -} -#endif /* MACH_DEBUG */ + delay(1); -#endif /* MACHINE_STACK */ + s = splsched(); + thread_lock(thread); + } + /* + * If there is a reserved stack, release it. + */ + if (thread->reserved_stack != 0) { + if (thread->reserved_stack != thread->kernel_stack) + stack_free_stack(thread->reserved_stack); + thread->reserved_stack = 0; + } -stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, - vm_size_t *alloc_size, int *collectable, int *exhaustable) -{ - *count = stack_alloc_total - stack_free_count; - *cur_size = KERNEL_STACK_SIZE * stack_alloc_total; - *max_size = KERNEL_STACK_SIZE * stack_alloc_hiwater; - *elem_size = KERNEL_STACK_SIZE; - *alloc_size = KERNEL_STACK_SIZE; - *collectable = 1; - *exhaustable = 0; -} + /* + * Mark thread as terminating, and block. + */ + thread->state |= TH_TERMINATE; + thread_mark_wait_locked(thread, THREAD_UNINT); + assert(thread->promotions == 0); + thread_unlock(thread); + /* splsched */ -void -stack_privilege( - register thread_t thread) -{ - /* OBSOLETE */ + thread_block((thread_continue_t)thread_terminate_continue); + /*NOTREACHED*/ } void -thread_bootstrap(void) +thread_deallocate( + thread_t thread) { - /* - * Fill in a template thread for fast initialization. - */ + processor_set_t pset; + task_t task; - thread_template.runq = RUN_QUEUE_NULL; + if (thread == THREAD_NULL) + return; - thread_template.ref_count = 1; + if (thread_deallocate_internal(thread) > 0) + return; - thread_template.reason = AST_NONE; - thread_template.at_safe_point = FALSE; - thread_template.wait_event = NO_EVENT64; - thread_template.wait_queue = WAIT_QUEUE_NULL; - thread_template.wait_result = THREAD_WAITING; - thread_template.interrupt_level = THREAD_ABORTSAFE; - thread_template.state = TH_STACK_HANDOFF | TH_WAIT | TH_UNINT; - thread_template.wake_active = FALSE; - thread_template.active_callout = FALSE; - thread_template.continuation = (void (*)(void))0; - thread_template.top_act = THR_ACT_NULL; + ipc_thread_terminate(thread); - thread_template.importance = 0; - thread_template.sched_mode = 0; - thread_template.safe_mode = 0; + task = thread->task; - thread_template.priority = 0; - thread_template.sched_pri = 0; - thread_template.max_priority = 0; - thread_template.task_priority = 0; - thread_template.promotions = 0; - thread_template.pending_promoter_index = 0; - thread_template.pending_promoter[0] = - thread_template.pending_promoter[1] = NULL; +#ifdef MACH_BSD + { + void *ut = thread->uthread; - thread_template.realtime.deadline = UINT64_MAX; + thread->uthread = NULL; + uthread_free(task, ut, task->bsd_info); + } +#endif /* MACH_BSD */ - thread_template.current_quantum = 0; + task_deallocate(task); - thread_template.computation_metered = 0; - thread_template.computation_epoch = 0; + pset = thread->processor_set; + pset_deallocate(pset); - thread_template.cpu_usage = 0; - thread_template.cpu_delta = 0; - thread_template.sched_usage = 0; - thread_template.sched_delta = 0; - thread_template.sched_stamp = 0; - thread_template.sleep_stamp = 0; - thread_template.safe_release = 0; + if (thread->kernel_stack != 0) + stack_free(thread); - thread_template.bound_processor = PROCESSOR_NULL; - thread_template.last_processor = PROCESSOR_NULL; - thread_template.last_switch = 0; + machine_thread_destroy(thread); - thread_template.vm_privilege = FALSE; + zfree(thread_zone, thread); +} - timer_init(&(thread_template.user_timer)); - timer_init(&(thread_template.system_timer)); - thread_template.user_timer_save.low = 0; - thread_template.user_timer_save.high = 0; - thread_template.system_timer_save.low = 0; - thread_template.system_timer_save.high = 0; +/* + * thread_terminate_daemon: + * + * Perform final clean up for terminating threads. + */ +static void +thread_terminate_daemon(void) +{ + thread_t thread; + task_t task; + processor_set_t pset; - thread_template.processor_set = PROCESSOR_SET_NULL; + (void)splsched(); + simple_lock(&thread_terminate_lock); - thread_template.act_ref_count = 2; + while ((thread = (thread_t)dequeue_head(&thread_terminate_queue)) != THREAD_NULL) { + simple_unlock(&thread_terminate_lock); + (void)spllo(); - thread_template.special_handler.handler = special_handler; - thread_template.special_handler.next = 0; + task = thread->task; -#if MACH_HOST - thread_template.may_assign = TRUE; - thread_template.assign_active = FALSE; -#endif /* MACH_HOST */ - thread_template.funnel_lock = THR_FUNNEL_NULL; - thread_template.funnel_state = 0; -#if MACH_LDEBUG - thread_template.mutex_count = 0; -#endif /* MACH_LDEBUG */ - - init_thread = thread_template; + task_lock(task); + task->total_user_time += timer_grab(&thread->user_timer); + task->total_system_time += timer_grab(&thread->system_timer); - init_thread.top_act = &init_thread; - init_thread.thread = &init_thread; - machine_thread_set_current(&init_thread); -} + queue_remove(&task->threads, thread, thread_t, task_threads); + task->thread_count--; + task_unlock(task); -void -thread_init(void) -{ - kern_return_t ret; - unsigned int stack; - - thread_zone = zinit( - sizeof(struct thread), - THREAD_MAX * sizeof(struct thread), - THREAD_CHUNK * sizeof(struct thread), - "threads"); + pset = thread->processor_set; - /* - * Initialize other data structures used in - * this module. - */ + pset_lock(pset); + pset_remove_thread(pset, thread); + pset_unlock(pset); - queue_init(&reaper_queue); - simple_lock_init(&reaper_lock, ETAP_THREAD_REAPER); + thread_deallocate(thread); -#ifndef MACHINE_STACK - simple_lock_init(&stack_lock_data, ETAP_THREAD_STACK); /* Initialize the stack lock */ - - if (KERNEL_STACK_SIZE < round_page_32(KERNEL_STACK_SIZE)) { /* Kernel stacks must be multiples of pages */ - panic("thread_init: kernel stack size (%08X) must be a multiple of page size (%08X)\n", - KERNEL_STACK_SIZE, PAGE_SIZE); - } - - for(stack_alloc_bndry = PAGE_SIZE; stack_alloc_bndry <= KERNEL_STACK_SIZE; stack_alloc_bndry <<= 1); /* Find next power of 2 above stack size */ - - ret = kmem_suballoc(kernel_map, /* Suballocate from the kernel map */ - - &stack, - (stack_alloc_bndry * (2*THREAD_MAX + 64)), /* Allocate enough for all of it */ - FALSE, /* Say not pageable so that it is wired */ - TRUE, /* Allocate from anywhere */ - &stack_map); /* Allocate a submap */ - - if(ret != KERN_SUCCESS) { /* Did we get one? */ - panic("thread_init: kmem_suballoc for stacks failed - ret = %d\n", ret); /* Die */ - } - stack = vm_map_min(stack_map); /* Make sure we skip the first hunk */ - ret = vm_map_enter(stack_map, &stack, PAGE_SIZE, 0, /* Make sure there is nothing at the start */ - 0, /* Force it at start */ - VM_OBJECT_NULL, 0, /* No object yet */ - FALSE, /* No copy */ - VM_PROT_NONE, /* Allow no access */ - VM_PROT_NONE, /* Allow no access */ - VM_INHERIT_DEFAULT); /* Just be normal */ - - if(ret != KERN_SUCCESS) { /* Did it work? */ - panic("thread_init: dummy alignment allocation failed; ret = %d\n", ret); + (void)splsched(); + simple_lock(&thread_terminate_lock); } - -#endif /* MACHINE_STACK */ - /* - * Initialize any machine-dependent - * per-thread structures necessary. - */ - machine_thread_init(); + assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT); + simple_unlock(&thread_terminate_lock); + /* splsched */ + + thread_block((thread_continue_t)thread_terminate_daemon); + /*NOTREACHED*/ } /* - * Called at splsched. + * thread_terminate_enqueue: + * + * Enqueue a terminating thread for final disposition. + * + * Called at splsched. */ void -thread_reaper_enqueue( +thread_terminate_enqueue( thread_t thread) { - simple_lock(&reaper_lock); - enqueue_tail(&reaper_queue, (queue_entry_t)thread); - simple_unlock(&reaper_lock); + simple_lock(&thread_terminate_lock); + enqueue_tail(&thread_terminate_queue, (queue_entry_t)thread); + simple_unlock(&thread_terminate_lock); - thread_wakeup((event_t)&reaper_queue); + thread_wakeup((event_t)&thread_terminate_queue); } -void -thread_termination_continue(void) +/* + * thread_stack_daemon: + * + * Perform stack allocation as required due to + * invoke failures. + */ +static void +thread_stack_daemon(void) { - panic("thread_termination_continue"); + thread_t thread; + + (void)splsched(); + simple_lock(&thread_stack_lock); + + while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) { + simple_unlock(&thread_stack_lock); + /* splsched */ + + stack_alloc(thread); + + thread_lock(thread); + thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); + thread_unlock(thread); + (void)spllo(); + + (void)splsched(); + simple_lock(&thread_stack_lock); + } + + assert_wait((event_t)&thread_stack_queue, THREAD_UNINT); + simple_unlock(&thread_stack_lock); + /* splsched */ + + thread_block((thread_continue_t)thread_stack_daemon); /*NOTREACHED*/ } /* - * Routine: thread_terminate_self + * thread_stack_enqueue: * - * This routine is called by a thread which has unwound from - * its current RPC and kernel contexts and found that it's - * root activation has been marked for extinction. This lets - * it clean up the last few things that can only be cleaned - * up in this context and then impale itself on the reaper - * queue. + * Enqueue a thread for stack allocation. * - * When the reaper gets the thread, it will deallocate the - * thread_act's reference on itself, which in turn will release - * its own reference on this thread. By doing things in that - * order, a thread_act will always have a valid thread - but the - * thread may persist beyond having a thread_act (but must never - * run like that). + * Called at splsched. */ void -thread_terminate_self(void) +thread_stack_enqueue( + thread_t thread) { - thread_act_t thr_act = current_act(); - thread_t thread; - task_t task = thr_act->task; - long active_acts; - spl_t s; + simple_lock(&thread_stack_lock); + enqueue_tail(&thread_stack_queue, (queue_entry_t)thread); + simple_unlock(&thread_stack_lock); - /* - * We should be at the base of the inheritance chain. - */ - thread = act_lock_thread(thr_act); - assert(thr_act->thread == thread); - - /* This will allow no more control ops on this thr_act. */ - ipc_thr_act_disable(thr_act); - - /* Clean-up any ulocks that are still owned by the thread - * activation (acquired but not released or handed-off). - */ - act_ulock_release_all(thr_act); - - act_unlock_thread(thr_act); + thread_wakeup((event_t)&thread_stack_queue); +} - _mk_sp_thread_depress_abort(thread, TRUE); +void +thread_daemon_init(void) +{ + kern_return_t result; + thread_t thread; - /* - * Check to see if this is the last active activation. By - * this we mean the last activation to call thread_terminate_self. - * If so, and the task is associated with a BSD process, we - * need to call BSD and let them clean up. - */ - active_acts = hw_atomic_sub(&task->active_thread_count, 1); + simple_lock_init(&thread_terminate_lock, 0); + queue_init(&thread_terminate_queue); - if (active_acts == 0 && task->bsd_info) - proc_exit(task->bsd_info); + result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + panic("thread_daemon_init: thread_terminate_daemon"); - /* JMM - for now, no migration */ - assert(!thr_act->lower); + thread_deallocate(thread); - thread_timer_terminate(); + simple_lock_init(&thread_stack_lock, 0); + queue_init(&thread_stack_queue); - ipc_thread_terminate(thread); - - s = splsched(); - thread_lock(thread); - thread->state |= TH_TERMINATE; - assert((thread->state & TH_UNINT) == 0); - thread_mark_wait_locked(thread, THREAD_UNINT); - assert(thread->promotions == 0); - thread_unlock(thread); - /* splx(s); */ + result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT, &thread); + if (result != KERN_SUCCESS) + panic("thread_daemon_init: thread_stack_daemon"); - ETAP_SET_REASON(thread, BLOCKED_ON_TERMINATION); - thread_block(thread_termination_continue); - /*NOTREACHED*/ + thread_deallocate(thread); } /* @@ -682,7 +541,7 @@ static kern_return_t thread_create_internal( task_t parent_task, integer_t priority, - void (*start)(void), + thread_continue_t continuation, thread_t *out_thread) { thread_t new_thread; @@ -693,7 +552,7 @@ thread_create_internal( * Allocate a thread and initialize static fields */ if (first_thread == NULL) - new_thread = first_thread = current_act(); + new_thread = first_thread = current_thread(); else new_thread = (thread_t)zalloc(thread_zone); if (new_thread == NULL) @@ -704,11 +563,9 @@ thread_create_internal( #ifdef MACH_BSD { - extern void *uthread_alloc(task_t, thread_act_t); - new_thread->uthread = uthread_alloc(parent_task, new_thread); if (new_thread->uthread == NULL) { - zfree(thread_zone, (vm_offset_t)new_thread); + zfree(thread_zone, new_thread); return (KERN_RESOURCE_SHORTAGE); } } @@ -717,14 +574,13 @@ thread_create_internal( if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) { #ifdef MACH_BSD { - extern void uthread_free(task_t, void *, void *, void *); void *ut = new_thread->uthread; new_thread->uthread = NULL; - uthread_free(parent_task, (void *)new_thread, ut, parent_task->bsd_info); + uthread_free(parent_task, ut, parent_task->bsd_info); } #endif /* MACH_BSD */ - zfree(thread_zone, (vm_offset_t)new_thread); + zfree(thread_zone, new_thread); return (KERN_FAILURE); } @@ -733,16 +589,13 @@ thread_create_internal( thread_lock_init(new_thread); wake_lock_init(new_thread); - mutex_init(&new_thread->lock, ETAP_THREAD_ACT); - - ipc_thr_act_init(parent_task, new_thread); + mutex_init(&new_thread->mutex, 0); ipc_thread_init(new_thread); queue_init(&new_thread->held_ulocks); - act_prof_init(new_thread, parent_task); + thread_prof_init(new_thread, parent_task); - new_thread->continuation = start; - new_thread->sleep_stamp = sched_tick; + new_thread->continuation = continuation; pset = parent_task->processor_set; assert(pset == &default_pset); @@ -759,31 +612,27 @@ thread_create_internal( #ifdef MACH_BSD { - extern void uthread_free(task_t, void *, void *, void *); void *ut = new_thread->uthread; new_thread->uthread = NULL; - uthread_free(parent_task, (void *)new_thread, ut, parent_task->bsd_info); + uthread_free(parent_task, ut, parent_task->bsd_info); } #endif /* MACH_BSD */ - act_prof_deallocate(new_thread); - ipc_thr_act_terminate(new_thread); + ipc_thread_disable(new_thread); + ipc_thread_terminate(new_thread); machine_thread_destroy(new_thread); - zfree(thread_zone, (vm_offset_t) new_thread); + zfree(thread_zone, new_thread); return (KERN_FAILURE); } - act_attach(new_thread, new_thread); - - task_reference_locked(parent_task); + task_reference_internal(parent_task); /* Cache the task's map */ new_thread->map = parent_task->map; /* Chain the thread onto the task's list */ - queue_enter(&parent_task->threads, new_thread, thread_act_t, task_threads); + queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); parent_task->thread_count++; - parent_task->res_thread_count++; /* So terminating threads don't need to take the task lock to decrement */ hw_atomic_add(&parent_task->active_thread_count, 1); @@ -791,7 +640,8 @@ thread_create_internal( /* Associate the thread with the processor set */ pset_add_thread(pset, new_thread); - thread_timer_setup(new_thread); + timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); + timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); /* Set the thread's scheduling parameters */ if (parent_task != kernel_task) @@ -804,13 +654,9 @@ thread_create_internal( new_thread->importance = new_thread->priority - new_thread->task_priority; new_thread->sched_stamp = sched_tick; + new_thread->pri_shift = new_thread->processor_set->pri_shift; compute_priority(new_thread, FALSE); -#if ETAP_EVENT_MONITOR - new_thread->etap_reason = 0; - new_thread->etap_trace = FALSE; -#endif /* ETAP_EVENT_MONITOR */ - new_thread->active = TRUE; *out_thread = new_thread; @@ -835,12 +681,10 @@ thread_create_internal( return (KERN_SUCCESS); } -extern void thread_bootstrap_return(void); - kern_return_t thread_create( task_t task, - thread_act_t *new_thread) + thread_t *new_thread) { kern_return_t result; thread_t thread; @@ -848,7 +692,7 @@ thread_create( if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); - result = thread_create_internal(task, -1, thread_bootstrap_return, &thread); + result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, &thread); if (result != KERN_SUCCESS) return (result); @@ -871,7 +715,7 @@ thread_create_running( int flavor, thread_state_t new_state, mach_msg_type_number_t new_state_count, - thread_act_t *new_thread) + thread_t *new_thread) { register kern_return_t result; thread_t thread; @@ -879,24 +723,25 @@ thread_create_running( if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); - result = thread_create_internal(task, -1, thread_bootstrap_return, &thread); + result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, &thread); if (result != KERN_SUCCESS) return (result); - result = machine_thread_set_state(thread, flavor, new_state, new_state_count); + result = machine_thread_set_state( + thread, flavor, new_state, new_state_count); if (result != KERN_SUCCESS) { pset_unlock(task->processor_set); task_unlock(task); thread_terminate(thread); - act_deallocate(thread); + thread_deallocate(thread); return (result); } - act_lock(thread); + thread_mtx_lock(thread); clear_wait(thread, THREAD_AWAKENED); thread->started = TRUE; - act_unlock(thread); + thread_mtx_unlock(thread); pset_unlock(task->processor_set); task_unlock(task); @@ -906,56 +751,73 @@ thread_create_running( } /* - * kernel_thread: + * kernel_thread_create: * * Create a thread in the kernel task * to execute in kernel context. */ -thread_t +kern_return_t kernel_thread_create( - void (*start)(void), - integer_t priority) + thread_continue_t continuation, + void *parameter, + integer_t priority, + thread_t *new_thread) { kern_return_t result; - task_t task = kernel_task; thread_t thread; + task_t task = kernel_task; - result = thread_create_internal(task, priority, start, &thread); + result = thread_create_internal(task, priority, continuation, &thread); if (result != KERN_SUCCESS) - return (THREAD_NULL); + return (result); pset_unlock(task->processor_set); task_unlock(task); - thread_doswapin(thread); +#if !defined(i386) + stack_alloc(thread); assert(thread->kernel_stack != 0); thread->reserved_stack = thread->kernel_stack; +#endif /* !defined(i386) */ - act_deallocate(thread); + thread->parameter = parameter; - return (thread); + *new_thread = thread; + + return (result); } -thread_t -kernel_thread_with_priority( - void (*start)(void), - integer_t priority) +kern_return_t +kernel_thread_start_priority( + thread_continue_t continuation, + void *parameter, + integer_t priority, + thread_t *new_thread) { + kern_return_t result; thread_t thread; - thread = kernel_thread_create(start, priority); - if (thread == THREAD_NULL) - return (THREAD_NULL); + result = kernel_thread_create(continuation, parameter, priority, &thread); + if (result != KERN_SUCCESS) + return (result); - act_lock(thread); + thread_mtx_lock(thread); clear_wait(thread, THREAD_AWAKENED); thread->started = TRUE; - act_unlock(thread); + thread_mtx_unlock(thread); -#ifdef i386 - thread_bind(thread, master_processor); -#endif /* i386 */ - return (thread); + *new_thread = thread; + + return (result); +} + +kern_return_t +kernel_thread_start( + thread_continue_t continuation, + void *parameter, + thread_t *new_thread) +{ + return kernel_thread_start_priority(continuation, parameter, -1, new_thread); } thread_t @@ -963,117 +825,28 @@ kernel_thread( task_t task, void (*start)(void)) { + kern_return_t result; + thread_t thread; + if (task != kernel_task) panic("kernel_thread"); - return kernel_thread_with_priority(start, -1); -} - -unsigned int c_weird_pset_ref_exit = 0; /* pset code raced us */ - -#if MACH_HOST -/* Preclude thread processor set assignement */ -#define thread_freeze(thread) assert((thread)->processor_set == &default_pset) - -/* Allow thread processor set assignement */ -#define thread_unfreeze(thread) assert((thread)->processor_set == &default_pset) - -#endif /* MACH_HOST */ - -void -thread_deallocate( - thread_t thread) -{ - task_t task; - processor_set_t pset; - int refs; - spl_t s; - - if (thread == THREAD_NULL) - return; - - /* - * First, check for new count > 0 (the common case). - * Only the thread needs to be locked. - */ - s = splsched(); - thread_lock(thread); - refs = --thread->ref_count; - thread_unlock(thread); - splx(s); - - if (refs > 0) - return; - - if (thread == current_thread()) - panic("thread_deallocate"); - - /* - * There is a dangling pointer to the thread from the - * processor_set. To clean it up, we freeze the thread - * in the pset (because pset destruction can cause even - * reference-less threads to be reassigned to the default - * pset) and then remove it. - */ - -#if MACH_HOST - thread_freeze(thread); -#endif - - pset = thread->processor_set; - pset_lock(pset); - pset_remove_thread(pset, thread); - pset_unlock(pset); - -#if MACH_HOST - thread_unfreeze(thread); -#endif - - pset_deallocate(pset); - - if (thread->reserved_stack != 0) { - if (thread->reserved_stack != thread->kernel_stack) - stack_free_stack(thread->reserved_stack); - thread->reserved_stack = 0; - } - - if (thread->kernel_stack != 0) - stack_free(thread); - - machine_thread_destroy(thread); - - zfree(thread_zone, (vm_offset_t) thread); -} - -void -thread_reference( - thread_t thread) -{ - spl_t s; + result = kernel_thread_start_priority((thread_continue_t)start, NULL, -1, &thread); + if (result != KERN_SUCCESS) + return (THREAD_NULL); - if (thread == THREAD_NULL) - return; + thread_deallocate(thread); - s = splsched(); - thread_lock(thread); - thread_reference_locked(thread); - thread_unlock(thread); - splx(s); + return (thread); } -/* - * Called with "appropriate" thread-related locks held on - * thread and its top_act for synchrony with RPC (see - * act_lock_thread()). - */ kern_return_t -thread_info_shuttle( - register thread_act_t thr_act, +thread_info_internal( + register thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, /* ptr to OUT array */ mach_msg_type_number_t *thread_info_count) /*IN/OUT*/ { - register thread_t thread = thr_act->thread; int state, flags; spl_t s; @@ -1109,15 +882,12 @@ thread_info_shuttle( * then for 5/8 ageing. The correction factor [3/5] is * (1/(5/8) - 1). */ - basic_info->cpu_usage = (thread->cpu_usage << SCHED_TICK_SHIFT) / - (TIMER_RATE / TH_USAGE_SCALE); + basic_info->cpu_usage = ((uint64_t)thread->cpu_usage + * TH_USAGE_SCALE) / sched_tick_interval; basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; -#if SIMPLE_CLOCK - /* - * Clock drift compensation. - */ - basic_info->cpu_usage = (basic_info->cpu_usage * 1000000) / sched_usec; -#endif /* SIMPLE_CLOCK */ + + if (basic_info->cpu_usage > TH_USAGE_SCALE) + basic_info->cpu_usage = TH_USAGE_SCALE; basic_info->policy = ((thread->sched_mode & TH_MODE_TIMESHARE)? POLICY_TIMESHARE: POLICY_RR); @@ -1126,7 +896,7 @@ thread_info_shuttle( if (thread->state & TH_IDLE) flags |= TH_FLAGS_IDLE; - if (thread->state & TH_STACK_HANDOFF) + if (!thread->kernel_stack) flags |= TH_FLAGS_SWAPPED; state = 0; @@ -1148,7 +918,7 @@ thread_info_shuttle( basic_info->run_state = state; basic_info->flags = flags; - basic_info->suspend_count = thr_act->user_stop_count; + basic_info->suspend_count = thread->user_stop_count; thread_unlock(thread); splx(s); @@ -1247,83 +1017,26 @@ thread_info_shuttle( } void -thread_doreap( - register thread_t thread) -{ - thread_act_t thr_act; - - - thr_act = thread_lock_act(thread); - assert(thr_act && thr_act->thread == thread); - - act_reference_locked(thr_act); - - /* - * Replace `act_unlock_thread()' with individual - * calls. (`act_detach()' can change fields used - * to determine which locks are held, confusing - * `act_unlock_thread()'.) - */ - act_unlock(thr_act); - - /* Remove the reference held by a rooted thread */ - act_deallocate(thr_act); - - /* Remove the reference held by the thread: */ - act_deallocate(thr_act); -} - -/* - * reaper_thread: - * - * This kernel thread runs forever looking for terminating - * threads, releasing their "self" references. - */ -static void -reaper_thread_continue(void) -{ - register thread_t thread; - - (void)splsched(); - simple_lock(&reaper_lock); - - while ((thread = (thread_t) dequeue_head(&reaper_queue)) != THREAD_NULL) { - simple_unlock(&reaper_lock); - (void)spllo(); - - thread_doreap(thread); - - (void)splsched(); - simple_lock(&reaper_lock); - } - - assert_wait((event_t)&reaper_queue, THREAD_UNINT); - simple_unlock(&reaper_lock); - (void)spllo(); - - thread_block(reaper_thread_continue); - /*NOTREACHED*/ -} - -static void -reaper_thread(void) +thread_read_times( + thread_t thread, + time_value_t *user_time, + time_value_t *system_time) { - reaper_thread_continue(); - /*NOTREACHED*/ -} + absolutetime_to_microtime( + timer_grab(&thread->user_timer), + &user_time->seconds, &user_time->microseconds); -void -thread_reaper_init(void) -{ - kernel_thread_with_priority(reaper_thread, MINPRI_KERNEL); + absolutetime_to_microtime( + timer_grab(&thread->system_timer), + &system_time->seconds, &system_time->microseconds); } kern_return_t thread_assign( - thread_act_t thr_act, - processor_set_t new_pset) + __unused thread_t thread, + __unused processor_set_t new_pset) { - return(KERN_FAILURE); + return (KERN_FAILURE); } /* @@ -1334,9 +1047,9 @@ thread_assign( */ kern_return_t thread_assign_default( - thread_act_t thr_act) + thread_t thread) { - return (thread_assign(thr_act, &default_pset)); + return (thread_assign(thread, &default_pset)); } /* @@ -1346,22 +1059,15 @@ thread_assign_default( */ kern_return_t thread_get_assignment( - thread_act_t thr_act, + thread_t thread, processor_set_t *pset) { - thread_t thread; - - if (thr_act == THR_ACT_NULL) - return(KERN_INVALID_ARGUMENT); - thread = act_lock_thread(thr_act); - if (thread == THREAD_NULL) { - act_unlock_thread(thr_act); - return(KERN_INVALID_ARGUMENT); - } + if (thread == NULL) + return (KERN_INVALID_ARGUMENT); + *pset = thread->processor_set; - act_unlock_thread(thr_act); pset_reference(*pset); - return(KERN_SUCCESS); + return (KERN_SUCCESS); } /* @@ -1372,54 +1078,31 @@ thread_get_assignment( */ kern_return_t thread_wire_internal( - host_priv_t host_priv, - thread_act_t thr_act, - boolean_t wired, - boolean_t *prev_state) + host_priv_t host_priv, + thread_t thread, + boolean_t wired, + boolean_t *prev_state) { - spl_t s; - thread_t thread; - extern void vm_page_free_reserve(int pages); - - if (thr_act == THR_ACT_NULL || host_priv == HOST_PRIV_NULL) + if (host_priv == NULL || thread != current_thread()) return (KERN_INVALID_ARGUMENT); assert(host_priv == &realhost); - thread = act_lock_thread(thr_act); - if (thread ==THREAD_NULL) { - act_unlock_thread(thr_act); - return(KERN_INVALID_ARGUMENT); - } - - /* - * This implementation only works for the current thread. - */ - if (thr_act != current_act()) - return KERN_INVALID_ARGUMENT; - - s = splsched(); - thread_lock(thread); - - if (prev_state) { - *prev_state = thread->vm_privilege; - } + if (prev_state) + *prev_state = (thread->options & TH_OPT_VMPRIV) != 0; if (wired) { - if (thread->vm_privilege == FALSE) + if (!(thread->options & TH_OPT_VMPRIV)) vm_page_free_reserve(1); /* XXX */ - thread->vm_privilege = TRUE; - } else { - if (thread->vm_privilege == TRUE) + thread->options |= TH_OPT_VMPRIV; + } + else { + if (thread->options & TH_OPT_VMPRIV) vm_page_free_reserve(-1); /* XXX */ - thread->vm_privilege = FALSE; + thread->options &= ~TH_OPT_VMPRIV; } - thread_unlock(thread); - splx(s); - act_unlock_thread(thr_act); - - return KERN_SUCCESS; + return (KERN_SUCCESS); } @@ -1431,166 +1114,37 @@ thread_wire_internal( kern_return_t thread_wire( host_priv_t host_priv, - thread_act_t thr_act, + thread_t thread, boolean_t wired) - { - return thread_wire_internal(host_priv, thr_act, wired, NULL); + return (thread_wire_internal(host_priv, thread, wired, NULL)); } -kern_return_t -host_stack_usage( - host_t host, - vm_size_t *reservedp, - unsigned int *totalp, - vm_size_t *spacep, - vm_size_t *residentp, - vm_size_t *maxusagep, - vm_offset_t *maxstackp) -{ -#if !MACH_DEBUG - return KERN_NOT_SUPPORTED; -#else - unsigned int total; - vm_size_t maxusage; - - if (host == HOST_NULL) - return KERN_INVALID_HOST; - - maxusage = 0; - - stack_statistics(&total, &maxusage); +int split_funnel_off = 0; +lck_grp_t *funnel_lck_grp = LCK_GRP_NULL; +lck_grp_attr_t *funnel_lck_grp_attr; +lck_attr_t *funnel_lck_attr; - *reservedp = 0; - *totalp = total; - *spacep = *residentp = total * round_page_32(KERNEL_STACK_SIZE); - *maxusagep = maxusage; - *maxstackp = 0; - return KERN_SUCCESS; - -#endif /* MACH_DEBUG */ -} - -/* - * Return info on stack usage for threads in a specific processor set - */ -kern_return_t -processor_set_stack_usage( - processor_set_t pset, - unsigned int *totalp, - vm_size_t *spacep, - vm_size_t *residentp, - vm_size_t *maxusagep, - vm_offset_t *maxstackp) +funnel_t * +funnel_alloc( + int type) { -#if !MACH_DEBUG - return KERN_NOT_SUPPORTED; -#else - unsigned int total; - vm_size_t maxusage; - vm_offset_t maxstack; - - register thread_t *threads; - register thread_t thread; - - unsigned int actual; /* this many things */ - unsigned int i; - - vm_size_t size, size_needed; - vm_offset_t addr; - - spl_t s; - - if (pset == PROCESSOR_SET_NULL) - return KERN_INVALID_ARGUMENT; - - size = 0; addr = 0; - - for (;;) { - pset_lock(pset); - if (!pset->active) { - pset_unlock(pset); - return KERN_INVALID_ARGUMENT; - } - - actual = pset->thread_count; - - /* do we have the memory we need? */ - - size_needed = actual * sizeof(thread_t); - if (size_needed <= size) - break; - - /* unlock the pset and allocate more memory */ - pset_unlock(pset); - - if (size != 0) - kfree(addr, size); + lck_mtx_t *m; + funnel_t *fnl; - assert(size_needed > 0); - size = size_needed; + if (funnel_lck_grp == LCK_GRP_NULL) { + funnel_lck_grp_attr = lck_grp_attr_alloc_init(); + //lck_grp_attr_setstat(funnel_lck_grp_attr); - addr = kalloc(size); - if (addr == 0) - return KERN_RESOURCE_SHORTAGE; - } - - /* OK, have memory and the processor_set is locked & active */ - s = splsched(); - threads = (thread_t *) addr; - for (i = 0, thread = (thread_t) queue_first(&pset->threads); - !queue_end(&pset->threads, (queue_entry_t) thread); - thread = (thread_t) queue_next(&thread->pset_threads)) { - thread_lock(thread); - if (thread->ref_count > 0) { - thread_reference_locked(thread); - threads[i++] = thread; - } - thread_unlock(thread); - } - splx(s); - assert(i <= actual); + funnel_lck_grp = lck_grp_alloc_init("Funnel", funnel_lck_grp_attr); - /* can unlock processor set now that we have the thread refs */ - pset_unlock(pset); - - /* calculate maxusage and free thread references */ - - total = 0; - maxusage = 0; - maxstack = 0; - while (i > 0) { - thread_t thread = threads[--i]; - - if (thread->kernel_stack != 0) - total++; - - thread_deallocate(thread); + funnel_lck_attr = lck_attr_alloc_init(); + //lck_attr_setdebug(funnel_lck_attr); } - - if (size != 0) - kfree(addr, size); - - *totalp = total; - *residentp = *spacep = total * round_page_32(KERNEL_STACK_SIZE); - *maxusagep = maxusage; - *maxstackp = maxstack; - return KERN_SUCCESS; - -#endif /* MACH_DEBUG */ -} - -int split_funnel_off = 0; -funnel_t * -funnel_alloc( - int type) -{ - mutex_t *m; - funnel_t * fnl; if ((fnl = (funnel_t *)kalloc(sizeof(funnel_t))) != 0){ bzero((void *)fnl, sizeof(funnel_t)); - if ((m = mutex_alloc(0)) == (mutex_t *)NULL) { - kfree((vm_offset_t)fnl, sizeof(funnel_t)); + if ((m = lck_mtx_alloc_init(funnel_lck_grp, funnel_lck_attr)) == (lck_mtx_t *)NULL) { + kfree(fnl, sizeof(funnel_t)); return(THR_FUNNEL_NULL); } fnl->fnl_mutex = m; @@ -1603,54 +1157,28 @@ void funnel_free( funnel_t * fnl) { - mutex_free(fnl->fnl_mutex); + lck_mtx_free(fnl->fnl_mutex, funnel_lck_grp); if (fnl->fnl_oldmutex) - mutex_free(fnl->fnl_oldmutex); - kfree((vm_offset_t)fnl, sizeof(funnel_t)); + lck_mtx_free(fnl->fnl_oldmutex, funnel_lck_grp); + kfree(fnl, sizeof(funnel_t)); } void funnel_lock( funnel_t * fnl) { - mutex_t * m; - - m = fnl->fnl_mutex; -restart: - mutex_lock(m); + lck_mtx_lock(fnl->fnl_mutex); fnl->fnl_mtxholder = current_thread(); - if (split_funnel_off && (m != fnl->fnl_mutex)) { - mutex_unlock(m); - m = fnl->fnl_mutex; - goto restart; - } } void funnel_unlock( funnel_t * fnl) { - mutex_unlock(fnl->fnl_mutex); + lck_mtx_unlock(fnl->fnl_mutex); fnl->fnl_mtxrelease = current_thread(); } -int refunnel_hint_enabled = 0; - -boolean_t -refunnel_hint( - thread_t thread, - wait_result_t wresult) -{ - if ( !(thread->funnel_state & TH_FN_REFUNNEL) || - wresult != THREAD_AWAKENED ) - return (FALSE); - - if (!refunnel_hint_enabled) - return (FALSE); - - return (mutex_preblock(thread->funnel_lock->fnl_mutex, thread)); -} - funnel_t * thread_funnel_get( void) @@ -1710,78 +1238,27 @@ thread_funnel_set( return(funnel_state_prev); } -boolean_t -thread_funnel_merge( - funnel_t * fnl, - funnel_t * otherfnl) -{ - mutex_t * m; - mutex_t * otherm; - funnel_t * gfnl; - extern int disable_funnel; - - if ((gfnl = thread_funnel_get()) == THR_FUNNEL_NULL) - panic("thread_funnel_merge called with no funnels held"); - - if (gfnl->fnl_type != 1) - panic("thread_funnel_merge called from non kernel funnel"); - - if (gfnl != fnl) - panic("thread_funnel_merge incorrect invocation"); - - if (disable_funnel || split_funnel_off) - return (KERN_FAILURE); - - m = fnl->fnl_mutex; - otherm = otherfnl->fnl_mutex; - /* Acquire other funnel mutex */ - mutex_lock(otherm); - split_funnel_off = 1; - disable_funnel = 1; - otherfnl->fnl_mutex = m; - otherfnl->fnl_type = fnl->fnl_type; - otherfnl->fnl_oldmutex = otherm; /* save this for future use */ - - mutex_unlock(otherm); - return(KERN_SUCCESS); -} +/* + * Export routines to other components for things that are done as macros + * within the osfmk component. + */ +#undef thread_reference +void thread_reference(thread_t thread); void -thread_set_cont_arg( - int arg) -{ - thread_t self = current_thread(); - - self->saved.misc = arg; -} - -int -thread_get_cont_arg(void) +thread_reference( + thread_t thread) { - thread_t self = current_thread(); - - return (self->saved.misc); + if (thread != THREAD_NULL) + thread_reference_internal(thread); } -/* - * Export routines to other components for things that are done as macros - * within the osfmk component. - */ #undef thread_should_halt + boolean_t thread_should_halt( thread_t th) { - return(thread_should_halt_fast(th)); -} - -vm_offset_t min_valid_stack_address(void) -{ - return vm_map_min(stack_map); -} - -vm_offset_t max_valid_stack_address(void) -{ - return vm_map_max(stack_map); + return (thread_should_halt_fast(th)); } diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index 80804265a..cde616313 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -87,63 +87,38 @@ #include #include -#include /* for current_thread */ #include -#include - -/* - * Logically, a thread of control consists of two parts: - * - * + A thread_shuttle, which may migrate due to resource contention - * - * + A thread_activation, which remains attached to a task. - * - * The thread_shuttle contains scheduling info, accounting info, - * and links to the thread_activation within which the shuttle is - * currently operating. - * - * An activation always has a valid task pointer, and it is always constant. - * The activation is only linked onto the task's activation list until - * the activation is terminated. - * - * The thread holds a reference on the activation while using it. - */ - -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE -#include #include #include #include #include -#include #include +#include + #include -#include #include #include -#include #include #include +#include #include #include #include #include #include #include -#include + #include -#include +#include #include -#include struct thread { /* @@ -152,23 +127,21 @@ struct thread { * locked by the thread_lock, but if its value is something else * (i.e. a run_queue) then it is locked by that run_queue's lock. * - * Beginning of thread_shuttle proper. When the thread is on - * a wait queue, these first three fields are treated as an un- - * official union with a wait_queue_element. If you change - * these, you must change that definition as well (wait_queue.h). + * When the thread is on a wait queue, these first three fields + * are treated as an unofficial union with a wait_queue_element. + * If you change these, you must change that definition as well + * (kern/wait_queue.h). */ /* Items examined often, modified infrequently */ queue_chain_t links; /* run/wait queue links */ run_queue_t runq; /* run queue thread is on SEE BELOW */ wait_queue_t wait_queue; /* wait queue we are currently on */ event64_t wait_event; /* wait queue event */ - thread_act_t top_act; /* "current" thr_act */ - uint32_t /* Only set by thread itself */ - interrupt_level:2, /* interrupts/aborts allowed */ - vm_privilege:1, /* can use reserved memory? */ - active_callout:1, /* an active callout */ - :0; - + integer_t options; /* options set by thread itself */ +#define TH_OPT_INTMASK 0x03 /* interrupt / abort level */ +#define TH_OPT_VMPRIV 0x04 /* may allocate reserved memory */ +#define TH_OPT_DELAYIDLE 0x08 /* performing delayed idle */ +#define TH_OPT_CALLOUT 0x10 /* executing as callout */ /* Data updated during assert_wait/thread_wakeup */ decl_simple_lock_data(,sched_lock) /* scheduling lock (thread_lock()) */ @@ -179,8 +152,8 @@ struct thread { wait_result_t wait_result; /* outcome of wait - * may be examined by this thread * WITHOUT locking */ - thread_roust_t roust; /* routine to roust it after wait */ - thread_continue_t continuation; /* resume here next dispatch */ + thread_continue_t continuation; /* continue here next dispatch */ + void *parameter; /* continuation parameter */ /* Data updated/used in thread_invoke */ struct funnel_lock *funnel_lock; /* Non-reentrancy funnel */ @@ -209,10 +182,6 @@ struct thread { #define TH_SCHED_STATE (TH_WAIT|TH_SUSP|TH_RUN|TH_UNINT) -#define TH_STACK_HANDOFF 0x0100 /* thread has no kernel stack */ -#define TH_STACK_ALLOC 0x0200 /* waiting for stack allocation */ -#define TH_STACK_STATE (TH_STACK_HANDOFF | TH_STACK_ALLOC) - /* Scheduling information */ integer_t sched_mode; /* scheduling mode bits */ #define TH_MODE_REALTIME 0x0001 /* time constraints supplied */ @@ -220,9 +189,8 @@ struct thread { #define TH_MODE_PREEMPT 0x0004 /* can preempt kernel contexts */ #define TH_MODE_FAILSAFE 0x0008 /* fail-safe has tripped */ #define TH_MODE_PROMOTED 0x0010 /* sched pri has been promoted */ -#define TH_MODE_FORCEDPREEMPT 0x0020 /* force setting of mode PREEMPT */ -#define TH_MODE_DEPRESS 0x0040 /* normal depress yield */ -#define TH_MODE_POLLDEPRESS 0x0080 /* polled depress yield */ +#define TH_MODE_DEPRESS 0x0020 /* normal depress yield */ +#define TH_MODE_POLLDEPRESS 0x0040 /* polled depress yield */ #define TH_MODE_ISDEPRESSED (TH_MODE_DEPRESS | TH_MODE_POLLDEPRESS) integer_t sched_pri; /* scheduled (current) priority */ @@ -262,33 +230,32 @@ struct thread { natural_t safe_release; /* when to release fail-safe */ /* Statistics and timesharing calculations */ - natural_t sched_stamp; /* when priority was updated */ - natural_t cpu_usage; /* exp. decaying cpu usage [%cpu] */ - natural_t cpu_delta; /* cpu usage since last update */ - natural_t sched_usage; /* load-weighted cpu usage [sched] */ - natural_t sched_delta; /* weighted cpu usage since update */ - natural_t sleep_stamp; /* when entered TH_WAIT state */ + natural_t sched_stamp; /* last scheduler tick */ + natural_t sched_usage; /* timesharing cpu usage [sched] */ + natural_t pri_shift; /* usage -> priority from pset */ + natural_t cpu_usage; /* instrumented cpu usage [%cpu] */ + natural_t cpu_delta; /* accumulated cpu_usage delta */ /* Timing data structures */ - timer_data_t user_timer; /* user mode timer */ - timer_save_data_t system_timer_save; /* saved system timer value */ - timer_save_data_t user_timer_save; /* saved user timer value */ + timer_data_t user_timer; /* user mode timer */ + uint64_t system_timer_save; /* saved system timer value */ + uint64_t user_timer_save; /* saved user timer value */ /* Timed wait expiration */ - timer_call_data_t wait_timer; - integer_t wait_timer_active; - boolean_t wait_timer_is_set; + timer_call_data_t wait_timer; + integer_t wait_timer_active; + boolean_t wait_timer_is_set; /* Priority depression expiration */ - timer_call_data_t depress_timer; - integer_t depress_timer_active; + timer_call_data_t depress_timer; + integer_t depress_timer_active; /* Various bits of stashed state */ union { struct { mach_msg_return_t state; /* receive state */ ipc_object_t object; /* object received on */ - mach_msg_header_t *msg; /* receive buffer pointer */ + mach_vm_address_t msg_addr; /* receive buffer pointer */ mach_msg_size_t msize; /* max size for recvd msg */ mach_msg_option_t option; /* options for receive */ mach_msg_size_t slist_size; /* scatter list size */ @@ -311,7 +278,6 @@ struct thread { /* IPC data structures */ struct ipc_kmsg_queue ith_messages; - mach_port_t ith_mig_reply; /* reply port for mig */ mach_port_t ith_rpc_reply; /* reply port for kernel RPCs */ /* Ast/Halt data structures */ @@ -329,23 +295,13 @@ struct thread { queue_chain_t task_threads; /*** Machine-dependent state ***/ - struct MachineThrAct mact; + struct machine_thread machine; /* Task membership */ struct task *task; vm_map_t map; - decl_mutex_data(,lock) - int act_ref_count; - - /* Associated shuttle */ - struct thread *thread; - - /* - * Next higher and next lower activation on - * the thread's activation stack. - */ - struct thread *higher, *lower; + decl_mutex_data(,mutex) /* Kernel holds on this thread */ int suspend_count; @@ -356,7 +312,7 @@ struct thread { /* Pending thread ast(s) */ ast_t ast; - /* Miscellaneous bits guarded by lock mutex */ + /* Miscellaneous bits guarded by mutex */ uint32_t /* Indicates that the thread has not been terminated */ active:1, @@ -370,7 +326,7 @@ struct thread { struct ReturnHandler *next; void (*handler)( struct ReturnHandler *rh, - struct thread *act); + struct thread *thread); } *handlers, special_handler; /* Ports associated with this thread */ @@ -391,40 +347,11 @@ struct thread { #ifdef MACH_BSD void *uthread; #endif - -/* BEGIN TRACING/DEBUG */ - -#if MACH_LOCK_MON - unsigned lock_stack; /* number of locks held */ -#endif /* MACH_LOCK_MON */ - -#if ETAP_EVENT_MONITOR - int etap_reason; /* real reason why we blocked */ - boolean_t etap_trace; /* ETAP trace status */ -#endif /* ETAP_EVENT_MONITOR */ - -#if MACH_LDEBUG - /* - * Debugging: track acquired mutexes and locks. - * Because a thread can block while holding such - * synchronizers, we think of the thread as - * "owning" them. - */ -#define MUTEX_STACK_DEPTH 20 -#define LOCK_STACK_DEPTH 20 - mutex_t *mutex_stack[MUTEX_STACK_DEPTH]; - lock_t *lock_stack[LOCK_STACK_DEPTH]; - unsigned int mutex_stack_index; - unsigned int lock_stack_index; - unsigned mutex_count; /* XXX to be deleted XXX */ -#endif /* MACH_LDEBUG */ -/* END TRACING/DEBUG */ - }; #define ith_state saved.receive.state #define ith_object saved.receive.object -#define ith_msg saved.receive.msg +#define ith_msg_addr saved.receive.msg_addr #define ith_msize saved.receive.msize #define ith_option saved.receive.option #define ith_scatter_list_size saved.receive.slist_size @@ -442,45 +369,54 @@ extern void thread_bootstrap(void); extern void thread_init(void); -extern void thread_reaper_init(void); +extern void thread_daemon_init(void); -extern void thread_reference( - thread_t thread); +#define thread_reference_internal(thread) \ + hw_atomic_add(&(thread)->ref_count, 1) + +#define thread_deallocate_internal(thread) \ + hw_atomic_sub(&(thread)->ref_count, 1) + +#define thread_reference(thread) \ +MACRO_BEGIN \ + if ((thread) != THREAD_NULL) \ + thread_reference_internal(thread); \ +MACRO_END extern void thread_deallocate( thread_t thread); extern void thread_terminate_self(void); +extern kern_return_t thread_terminate_internal( + thread_t thread); + +extern void thread_terminate_enqueue( + thread_t thread); + +extern void thread_stack_enqueue( + thread_t thread); + extern void thread_hold( - thread_act_t thread); + thread_t thread); extern void thread_release( - thread_act_t thread); + thread_t thread); -#define thread_lock_init(th) simple_lock_init(&(th)->sched_lock, ETAP_THREAD_LOCK) +#define thread_lock_init(th) simple_lock_init(&(th)->sched_lock, 0) #define thread_lock(th) simple_lock(&(th)->sched_lock) #define thread_unlock(th) simple_unlock(&(th)->sched_lock) #define thread_lock_try(th) simple_lock_try(&(th)->sched_lock) -#define thread_should_halt_fast(thread) \ - (!(thread)->top_act || !(thread)->top_act->active) - -#define thread_reference_locked(thread) ((thread)->ref_count++) +#define thread_should_halt_fast(thread) (!(thread)->active) -#define wake_lock_init(th) \ - simple_lock_init(&(th)->wake_lock, ETAP_THREAD_WAKE) -#define wake_lock(th) simple_lock(&(th)->wake_lock) -#define wake_unlock(th) simple_unlock(&(th)->wake_lock) +#define wake_lock_init(th) simple_lock_init(&(th)->wake_lock, 0) +#define wake_lock(th) simple_lock(&(th)->wake_lock) +#define wake_unlock(th) simple_unlock(&(th)->wake_lock) #define wake_lock_try(th) simple_lock_try(&(th)->wake_lock) -extern vm_offset_t stack_alloc( - thread_t thread, - void (*start)(thread_t)); - -extern boolean_t stack_alloc_try( - thread_t thread, - void (*start)(thread_t)); +extern void stack_alloc( + thread_t thread); extern void stack_free( thread_t thread); @@ -488,22 +424,30 @@ extern void stack_free( extern void stack_free_stack( vm_offset_t stack); +extern boolean_t stack_alloc_try( + thread_t thread); + extern void stack_collect(void); +extern void stack_init(void); + +extern kern_return_t thread_state_initialize( + thread_t thread); + extern kern_return_t thread_setstatus( - thread_act_t thread, + thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t count); extern kern_return_t thread_getstatus( - thread_act_t thread, + thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); -extern kern_return_t thread_info_shuttle( - thread_act_t thread, +extern kern_return_t thread_info_internal( + thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count); @@ -513,31 +457,24 @@ extern void thread_task_priority( integer_t priority, integer_t max_priority); -extern kern_return_t thread_get_special_port( - thread_act_t thread, - int which, - ipc_port_t *port); - -extern kern_return_t thread_set_special_port( - thread_act_t thread, - int which, - ipc_port_t port); - -extern thread_act_t switch_act( - thread_act_t act); +extern void thread_policy_reset( + thread_t thread); -extern thread_t kernel_thread_create( - void (*start)(void), - integer_t priority); +extern kern_return_t kernel_thread_create( + thread_continue_t continuation, + void *parameter, + integer_t priority, + thread_t *new_thread); -extern thread_t kernel_thread_with_priority( - void (*start)(void), - integer_t priority); +extern kern_return_t kernel_thread_start_priority( + thread_continue_t continuation, + void *parameter, + integer_t priority, + thread_t *new_thread); extern void machine_stack_attach( thread_t thread, - vm_offset_t stack, - void (*start)(thread_t)); + vm_offset_t stack); extern vm_offset_t machine_stack_detach( thread_t thread); @@ -554,26 +491,24 @@ extern thread_t machine_switch_context( extern void machine_load_context( thread_t thread); -extern void machine_switch_act( - thread_t thread, - thread_act_t old, - thread_act_t new); +extern kern_return_t machine_thread_state_initialize( + thread_t thread); extern kern_return_t machine_thread_set_state( - thread_act_t act, + thread_t thread, thread_flavor_t flavor, thread_state_t state, mach_msg_type_number_t count); extern kern_return_t machine_thread_get_state( - thread_act_t act, + thread_t thread, thread_flavor_t flavor, thread_state_t state, mach_msg_type_number_t *count); extern kern_return_t machine_thread_dup( - thread_act_t self, - thread_act_t target); + thread_t self, + thread_t target); extern void machine_thread_init(void); @@ -584,135 +519,97 @@ extern kern_return_t machine_thread_create( extern void machine_thread_destroy( thread_t thread); -extern void machine_thread_set_current( - thread_t thread); +extern void machine_set_current_thread( + thread_t thread); extern void machine_thread_terminate_self(void); +extern kern_return_t machine_thread_get_kern_state( + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count); + + /* * XXX Funnel locks XXX */ struct funnel_lock { int fnl_type; /* funnel type */ - mutex_t *fnl_mutex; /* underlying mutex for the funnel */ + lck_mtx_t *fnl_mutex; /* underlying mutex for the funnel */ void * fnl_mtxholder; /* thread (last)holdng mutex */ void * fnl_mtxrelease; /* thread (last)releasing mutex */ - mutex_t *fnl_oldmutex; /* Mutex before collapsing split funnel */ + lck_mtx_t *fnl_oldmutex; /* Mutex before collapsing split funnel */ }; -typedef struct funnel_lock funnel_t; - -extern void funnel_lock( - funnel_t *lock); - -extern void funnel_unlock( - funnel_t *lock); - typedef struct ReturnHandler ReturnHandler; -#define act_lock(act) mutex_lock(&(act)->lock) -#define act_lock_try(act) mutex_try(&(act)->lock) -#define act_unlock(act) mutex_unlock(&(act)->lock) - -#define act_reference_locked(act) \ -MACRO_BEGIN \ - (act)->act_ref_count++; \ -MACRO_END - -#define act_deallocate_locked(act) \ -MACRO_BEGIN \ - if (--(act)->act_ref_count == 0) \ - panic("act_deallocate_locked"); \ -MACRO_END - -extern void act_reference( - thread_act_t act); - -extern void act_deallocate( - thread_act_t act); - -extern void act_attach( - thread_act_t act, - thread_t thread); - -extern void act_detach( - thread_act_t act); - -extern thread_t act_lock_thread( - thread_act_t act); - -extern void act_unlock_thread( - thread_act_t act); - -extern thread_act_t thread_lock_act( - thread_t thread); - -extern void thread_unlock_act( - thread_t thread); +#define thread_mtx_lock(thread) mutex_lock(&(thread)->mutex) +#define thread_mtx_try(thread) mutex_try(&(thread)->mutex) +#define thread_mtx_unlock(thread) mutex_unlock(&(thread)->mutex) extern void act_execute_returnhandlers(void); extern void install_special_handler( - thread_act_t thread); + thread_t thread); extern void special_handler( ReturnHandler *rh, - thread_act_t act); + thread_t thread); #else /* MACH_KERNEL_PRIVATE */ -typedef struct funnel_lock funnel_t; +__BEGIN_DECLS -extern boolean_t thread_should_halt( - thread_t thread); +extern thread_t current_thread(void); + +extern void thread_reference( + thread_t thread); -extern void act_reference( - thread_act_t act); +extern void thread_deallocate( + thread_t thread); -extern void act_deallocate( - thread_act_t act); +__END_DECLS #endif /* MACH_KERNEL_PRIVATE */ -extern thread_t kernel_thread( - task_t task, - void (*start)(void)); +#ifdef KERNEL_PRIVATE -extern void thread_set_cont_arg( - int arg); +typedef struct funnel_lock funnel_t; -extern int thread_get_cont_arg(void); +#ifdef MACH_KERNEL_PRIVATE -/* JMM - These are only temporary */ -extern boolean_t is_thread_running(thread_act_t); /* True is TH_RUN */ -extern boolean_t is_thread_idle(thread_t); /* True is TH_IDLE */ -extern kern_return_t get_thread_waitresult(thread_t); +extern void funnel_lock( + funnel_t *lock); -typedef void (thread_apc_handler_t)(thread_act_t); +extern void funnel_unlock( + funnel_t *lock); -extern kern_return_t thread_apc_set(thread_act_t, thread_apc_handler_t); -extern kern_return_t thread_apc_clear(thread_act_t, thread_apc_handler_t); +vm_offset_t min_valid_stack_address(void); +vm_offset_t max_valid_stack_address(void); -extern vm_map_t swap_act_map(thread_act_t, vm_map_t); +#endif /* MACH_KERNEL_PRIVATE */ -extern void *get_bsdthread_info(thread_act_t); -extern void set_bsdthread_info(thread_act_t, void *); -extern task_t get_threadtask(thread_act_t); +__BEGIN_DECLS -#endif /* __APPLE_API_PRIVATE */ +extern funnel_t *thread_funnel_get(void); -#ifdef __APPLE_API_UNSTABLE +extern boolean_t thread_funnel_set( + funnel_t *lock, + boolean_t funneled); -#if !defined(MACH_KERNEL_PRIVATE) +extern thread_t kernel_thread( + task_t task, + void (*start)(void)); -extern thread_act_t current_act(void); +__END_DECLS -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_UNSTABLE */ +__BEGIN_DECLS -#ifdef __APPLE_API_EVOLVING +#ifdef XNU_KERNEL_PRIVATE /* * XXX Funnel locks XXX @@ -723,28 +620,55 @@ extern thread_act_t current_act(void); extern funnel_t *funnel_alloc( int type); -extern funnel_t *thread_funnel_get(void); - -extern boolean_t thread_funnel_set( - funnel_t *lock, - boolean_t funneled); +extern void funnel_free( + funnel_t *lock); -extern boolean_t thread_funnel_merge( - funnel_t *lock, - funnel_t *other); +extern void thread_read_times( + thread_t thread, + time_value_t *user_time, + time_value_t *system_time); -#endif /* __APPLE_API_EVOLVING */ +extern void thread_setuserstack( + thread_t thread, + mach_vm_offset_t user_stack); -#ifdef __APPLE_API_PRIVATE +extern uint64_t thread_adjuserstack( + thread_t thread, + int adjust); -extern boolean_t refunnel_hint( +extern void thread_setentrypoint( thread_t thread, - wait_result_t wresult); + mach_vm_offset_t entry); + +extern kern_return_t thread_wire_internal( + host_priv_t host_priv, + thread_t thread, + boolean_t wired, + boolean_t *prev_state); + +/* JMM - These are only temporary */ +extern boolean_t is_thread_running(thread_t); /* True is TH_RUN */ +extern boolean_t is_thread_idle(thread_t); /* True is TH_IDLE */ -/* For use by CHUD */ -vm_offset_t min_valid_stack_address(void); -vm_offset_t max_valid_stack_address(void); +extern kern_return_t thread_dup(thread_t); + +extern task_t get_threadtask(thread_t); + +extern void *get_bsdthread_info(thread_t); +extern void set_bsdthread_info(thread_t, void *); +extern void *uthread_alloc(task_t, thread_t); +extern void uthread_free(task_t, void *, void *); + +extern boolean_t thread_should_halt( + thread_t thread); + +#endif /* XNU_KERNEL_PRIVATE */ + +extern kern_return_t kernel_thread_start( + thread_continue_t continuation, + void *parameter, + thread_t *new_thread); -#endif /* __APPLE_API_PRIVATE */ +__END_DECLS #endif /* _KERN_THREAD_H_ */ diff --git a/osfmk/kern/thread_act.c b/osfmk/kern/thread_act.c index bd0f1a5a9..872fc21dc 100644 --- a/osfmk/kern/thread_act.c +++ b/osfmk/kern/thread_act.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -41,21 +41,21 @@ * * Author: Bryan Ford, University of Utah CSS * - * Thread_Activation management routines + * Thread management routines */ - -#include -#include +#include #include #include -#include +#include +#include +#include + +#include +#include #include #include #include -#include #include -#include -#include #include #include #include @@ -67,39 +67,15 @@ #include #include #include -#include /*** ??? fix so this can be removed ***/ #include +#include #include #include -/* - * Track the number of times we need to swapin a thread to deallocate it. - */ -int act_free_swapin = 0; - -/* - * Forward declarations for functions local to this file. - */ -kern_return_t act_abort( thread_act_t, boolean_t); -void special_handler(ReturnHandler *, thread_act_t); -kern_return_t act_set_state_locked(thread_act_t, int, - thread_state_t, - mach_msg_type_number_t); -kern_return_t act_get_state_locked(thread_act_t, int, - thread_state_t, - mach_msg_type_number_t *); -void act_set_astbsd(thread_act_t); -void act_set_apc(thread_act_t); -void act_ulock_release_all(thread_act_t thr_act); - -void install_special_handler_locked(thread_act_t); - -static void act_disable(thread_act_t); - -/* - * Thread interfaces accessed via a thread_activation: - */ - +void act_abort(thread_t); +void act_set_apc(thread_t); +void install_special_handler_locked(thread_t); +void special_handler_continue(void); /* * Internal routine to terminate a thread. @@ -107,39 +83,31 @@ static void act_disable(thread_act_t); */ kern_return_t thread_terminate_internal( - register thread_act_t act) + thread_t thread) { - kern_return_t result; - thread_t thread; + kern_return_t result = KERN_SUCCESS; - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); - } + if (thread->active) { + thread->active = FALSE; - act_disable(act); - result = act_abort(act, FALSE); + act_abort(thread); - /* - * Make sure this thread enters the kernel - * Must unlock the act, but leave the shuttle - * captured in this act. - */ - if (thread != current_thread()) { - act_unlock(act); - - if (thread_stop(thread)) - thread_unstop(thread); - else - result = KERN_ABORTED; - - act_lock(act); + if (thread->started) + clear_wait(thread, THREAD_INTERRUPTED); + else { + clear_wait(thread, THREAD_AWAKENED); + thread->started = TRUE; + } } + else + result = KERN_TERMINATED; - clear_wait(thread, act->started? THREAD_INTERRUPTED: THREAD_AWAKENED); - act_unlock_thread(act); + thread_mtx_unlock(thread); + + if (thread != current_thread() && result == KERN_SUCCESS) + thread_wait(thread); return (result); } @@ -149,18 +117,18 @@ thread_terminate_internal( */ kern_return_t thread_terminate( - register thread_act_t act) + thread_t thread) { kern_return_t result; - if (act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - if ( act->task == kernel_task && - act != current_act() ) + if ( thread->task == kernel_task && + thread != current_thread() ) return (KERN_FAILURE); - result = thread_terminate_internal(act); + result = thread_terminate_internal(thread); /* * If a kernel thread is terminating itself, force an AST here. @@ -168,9 +136,8 @@ thread_terminate( * code - and all threads finish their own termination in the * special handler APC. */ - if (act->task == kernel_task) { + if (thread->task == kernel_task) { ml_set_interrupts_enabled(FALSE); - assert(act == current_act()); ast_taken(AST_APC, TRUE); panic("thread_terminate"); } @@ -183,109 +150,92 @@ thread_terminate( * This is a recursive-style suspension of the thread, a count of * suspends is maintained. * - * Called with act_lock held. + * Called with thread mutex held. */ void thread_hold( - register thread_act_t act) + register thread_t thread) { - thread_t thread = act->thread; - - if (act->suspend_count++ == 0) { - install_special_handler(act); - if ( act->started && - thread != THREAD_NULL && - thread->top_act == act ) - thread_wakeup_one(&act->suspend_count); + if (thread->suspend_count++ == 0) { + install_special_handler(thread); + if (thread->started) + thread_wakeup_one(&thread->suspend_count); } } /* - * Decrement internal suspension count for thr_act, setting thread + * Decrement internal suspension count, setting thread * runnable when count falls to zero. * - * Called with act_lock held. + * Called with thread mutex held. */ void thread_release( - register thread_act_t act) + register thread_t thread) { - thread_t thread = act->thread; - - if ( act->suspend_count > 0 && - --act->suspend_count == 0 && - thread != THREAD_NULL && - thread->top_act == act ) { - if (!act->started) { + if ( thread->suspend_count > 0 && + --thread->suspend_count == 0 ) { + if (thread->started) + thread_wakeup_one(&thread->suspend_count); + else { clear_wait(thread, THREAD_AWAKENED); - act->started = TRUE; + thread->started = TRUE; } - else - thread_wakeup_one(&act->suspend_count); } } kern_return_t thread_suspend( - register thread_act_t act) + register thread_t thread) { - thread_t thread; + thread_t self = current_thread(); + kern_return_t result = KERN_SUCCESS; - if (act == THR_ACT_NULL || act->task == kernel_task) + if (thread == THREAD_NULL || thread->task == kernel_task) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); - } - - if ( act->user_stop_count++ == 0 && - act->suspend_count++ == 0 ) { - install_special_handler(act); - if ( thread != current_thread() && - thread != THREAD_NULL && - thread->top_act == act ) { - assert(act->started); - thread_wakeup_one(&act->suspend_count); - act_unlock_thread(act); - - thread_wait(thread); + if (thread->active) { + if ( thread->user_stop_count++ == 0 && + thread->suspend_count++ == 0 ) { + install_special_handler(thread); + if (thread != self) + thread_wakeup_one(&thread->suspend_count); } - else - act_unlock_thread(act); } else - act_unlock_thread(act); + result = KERN_TERMINATED; + + thread_mtx_unlock(thread); + + if (thread != self && result == KERN_SUCCESS) + thread_wait(thread); - return (KERN_SUCCESS); + return (result); } kern_return_t thread_resume( - register thread_act_t act) + register thread_t thread) { - kern_return_t result = KERN_SUCCESS; - thread_t thread; + kern_return_t result = KERN_SUCCESS; - if (act == THR_ACT_NULL || act->task == kernel_task) + if (thread == THREAD_NULL || thread->task == kernel_task) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (act->active) { - if (act->user_stop_count > 0) { - if ( --act->user_stop_count == 0 && - --act->suspend_count == 0 && - thread != THREAD_NULL && - thread->top_act == act ) { - if (!act->started) { + if (thread->active) { + if (thread->user_stop_count > 0) { + if ( --thread->user_stop_count == 0 && + --thread->suspend_count == 0 ) { + if (thread->started) + thread_wakeup_one(&thread->suspend_count); + else { clear_wait(thread, THREAD_AWAKENED); - act->started = TRUE; + thread->started = TRUE; } - else - thread_wakeup_one(&act->suspend_count); } } else @@ -294,7 +244,7 @@ thread_resume( else result = KERN_TERMINATED; - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } @@ -306,25 +256,21 @@ thread_resume( */ kern_return_t thread_depress_abort( - register thread_act_t thr_act) + register thread_t thread) { - register thread_t thread; - kern_return_t result; + kern_return_t result; - if (thr_act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(thr_act); - /* if activation is terminating, this operation is not meaningful */ - if (!thr_act->active) { - act_unlock_thread(thr_act); - - return (KERN_TERMINATED); - } + thread_mtx_lock(thread); - result = _mk_sp_thread_depress_abort(thread, FALSE); + if (thread->active) + result = thread_depress_abort_internal(thread); + else + result = KERN_TERMINATED; - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); return (result); } @@ -332,410 +278,291 @@ thread_depress_abort( /* * Indicate that the activation should run its - * special handler to detect the condition. + * special handler to detect a condition. * - * Called with act_lock held. + * Called with thread mutex held. */ -kern_return_t +void act_abort( - thread_act_t act, - boolean_t chain_break ) + thread_t thread) { - thread_t thread = act->thread; spl_t s = splsched(); - assert(thread->top_act == act); - thread_lock(thread); + if (!(thread->state & TH_ABORT)) { thread->state |= TH_ABORT; - install_special_handler_locked(act); - } else { - thread->state &= ~TH_ABORT_SAFELY; + install_special_handler_locked(thread); } + else + thread->state &= ~TH_ABORT_SAFELY; + thread_unlock(thread); splx(s); - - return (KERN_SUCCESS); } kern_return_t thread_abort( - register thread_act_t act) + register thread_t thread) { - kern_return_t result; - thread_t thread; + kern_return_t result = KERN_SUCCESS; - if (act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); + if (thread->active) { + act_abort(thread); + clear_wait(thread, THREAD_INTERRUPTED); } + else + result = KERN_TERMINATED; - result = act_abort(act, FALSE); - clear_wait(thread, THREAD_INTERRUPTED); - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } kern_return_t thread_abort_safely( - thread_act_t act) + thread_t thread) { - thread_t thread; - kern_return_t ret; - spl_t s; + kern_return_t result = KERN_SUCCESS; - if ( act == THR_ACT_NULL ) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); - } + if (thread->active) { + spl_t s = splsched(); - s = splsched(); - thread_lock(thread); - if (!thread->at_safe_point || - clear_wait_internal(thread, THREAD_INTERRUPTED) != KERN_SUCCESS) { - if (!(thread->state & TH_ABORT)) { - thread->state |= (TH_ABORT|TH_ABORT_SAFELY); - install_special_handler_locked(act); + thread_lock(thread); + if (!thread->at_safe_point || + clear_wait_internal(thread, THREAD_INTERRUPTED) != KERN_SUCCESS) { + if (!(thread->state & TH_ABORT)) { + thread->state |= (TH_ABORT|TH_ABORT_SAFELY); + install_special_handler_locked(thread); + } } + thread_unlock(thread); + splx(s); } - thread_unlock(thread); - splx(s); + else + result = KERN_TERMINATED; - act_unlock_thread(act); + thread_mtx_unlock(thread); - return (KERN_SUCCESS); + return (result); } /*** backward compatibility hacks ***/ #include #include #include -#include kern_return_t thread_info( - thread_act_t thr_act, + thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count) { - register thread_t thread; kern_return_t result; - if (thr_act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(thr_act); - if (!thr_act->active) { - act_unlock_thread(thr_act); + thread_mtx_lock(thread); - return (KERN_TERMINATED); - } - - result = thread_info_shuttle(thr_act, flavor, - thread_info_out, thread_info_count); + if (thread->active) + result = thread_info_internal( + thread, flavor, thread_info_out, thread_info_count); + else + result = KERN_TERMINATED; - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); return (result); } -/* - * Routine: thread_get_special_port [kernel call] - * Purpose: - * Clones a send right for one of the thread's - * special ports. - * Conditions: - * Nothing locked. - * Returns: - * KERN_SUCCESS Extracted a send right. - * KERN_INVALID_ARGUMENT The thread is null. - * KERN_FAILURE The thread is dead. - * KERN_INVALID_ARGUMENT Invalid special port. - */ - kern_return_t -thread_get_special_port( - thread_act_t thr_act, - int which, - ipc_port_t *portp) +thread_get_state( + register thread_t thread, + int flavor, + thread_state_t state, /* pointer to OUT array */ + mach_msg_type_number_t *state_count) /*IN/OUT*/ { - ipc_port_t *whichp; - ipc_port_t port; - thread_t thread; - - if (!thr_act) - return KERN_INVALID_ARGUMENT; - thread = act_lock_thread(thr_act); - switch (which) { - case THREAD_KERNEL_PORT: - whichp = &thr_act->ith_sself; - break; - - default: - act_unlock_thread(thr_act); - return KERN_INVALID_ARGUMENT; - } - - if (!thr_act->active) { - act_unlock_thread(thr_act); - return KERN_FAILURE; - } - - port = ipc_port_copy_send(*whichp); - act_unlock_thread(thr_act); - - *portp = port; - return KERN_SUCCESS; -} + kern_return_t result = KERN_SUCCESS; -/* - * Routine: thread_set_special_port [kernel call] - * Purpose: - * Changes one of the thread's special ports, - * setting it to the supplied send right. - * Conditions: - * Nothing locked. If successful, consumes - * the supplied send right. - * Returns: - * KERN_SUCCESS Changed the special port. - * KERN_INVALID_ARGUMENT The thread is null. - * KERN_FAILURE The thread is dead. - * KERN_INVALID_ARGUMENT Invalid special port. - */ + if (thread == THREAD_NULL) + return (KERN_INVALID_ARGUMENT); -kern_return_t -thread_set_special_port( - thread_act_t thr_act, - int which, - ipc_port_t port) -{ - ipc_port_t *whichp; - ipc_port_t old; - thread_t thread; + thread_mtx_lock(thread); - if (thr_act == 0) - return KERN_INVALID_ARGUMENT; + if (thread->active) { + if (thread != current_thread()) { + thread_hold(thread); - thread = act_lock_thread(thr_act); - switch (which) { - case THREAD_KERNEL_PORT: - whichp = &thr_act->ith_self; - break; + thread_mtx_unlock(thread); - default: - act_unlock_thread(thr_act); - return KERN_INVALID_ARGUMENT; - } + if (thread_stop(thread)) { + thread_mtx_lock(thread); + result = machine_thread_get_state( + thread, flavor, state, state_count); + thread_unstop(thread); + } + else { + thread_mtx_lock(thread); + result = KERN_ABORTED; + } - if (!thr_act->active) { - act_unlock_thread(thr_act); - return KERN_FAILURE; + thread_release(thread); + } + else + result = machine_thread_get_state( + thread, flavor, state, state_count); } + else + result = KERN_TERMINATED; - old = *whichp; - *whichp = port; - act_unlock_thread(thr_act); + thread_mtx_unlock(thread); - if (IP_VALID(old)) - ipc_port_release_send(old); - return KERN_SUCCESS; + return (result); } /* - * thread state should always be accessible by locking the thread - * and copying it. The activation messes things up so for right - * now if it's not the top of the chain, use a special handler to - * get the information when the shuttle returns to the activation. + * Change thread's machine-dependent state. Called with nothing + * locked. Returns same way. */ kern_return_t -thread_get_state( - register thread_act_t act, +thread_set_state( + register thread_t thread, int flavor, - thread_state_t state, /* pointer to OUT array */ - mach_msg_type_number_t *state_count) /*IN/OUT*/ + thread_state_t state, + mach_msg_type_number_t state_count) { kern_return_t result = KERN_SUCCESS; - thread_t thread; - if (act == THR_ACT_NULL || act == current_act()) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); - } + if (thread->active) { + if (thread != current_thread()) { + thread_hold(thread); - thread_hold(act); + thread_mtx_unlock(thread); - for (;;) { - thread_t thread1; - - if ( thread == THREAD_NULL || - thread->top_act != act ) - break; - act_unlock_thread(act); + if (thread_stop(thread)) { + thread_mtx_lock(thread); + result = machine_thread_set_state( + thread, flavor, state, state_count); + thread_unstop(thread); + } + else { + thread_mtx_lock(thread); + result = KERN_ABORTED; + } - if (!thread_stop(thread)) { - result = KERN_ABORTED; - (void)act_lock_thread(act); - thread = THREAD_NULL; - break; + thread_release(thread); } - - thread1 = act_lock_thread(act); - if (thread1 == thread) - break; - - thread_unstop(thread); - thread = thread1; + else + result = machine_thread_set_state( + thread, flavor, state, state_count); } + else + result = KERN_TERMINATED; - if (result == KERN_SUCCESS) - result = machine_thread_get_state(act, flavor, state, state_count); - - if ( thread != THREAD_NULL && - thread->top_act == act ) - thread_unstop(thread); - - thread_release(act); - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } - + + /* - * Change thread's machine-dependent state. Called with nothing - * locked. Returns same way. + * Kernel-internal "thread" interfaces used outside this file: + */ + +/* Initialize (or re-initialize) a thread state. Called from execve + * with nothing locked, returns same way. */ kern_return_t -thread_set_state( - register thread_act_t act, - int flavor, - thread_state_t state, - mach_msg_type_number_t state_count) +thread_state_initialize( + register thread_t thread) { kern_return_t result = KERN_SUCCESS; - thread_t thread; - if (act == THR_ACT_NULL || act == current_act()) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); + thread_mtx_lock(thread); - if (!act->active) { - act_unlock_thread(act); - return (KERN_TERMINATED); - } - - thread_hold(act); + if (thread->active) { + if (thread != current_thread()) { + thread_hold(thread); - for (;;) { - thread_t thread1; + thread_mtx_unlock(thread); - if ( thread == THREAD_NULL || - thread->top_act != act ) - break; - act_unlock_thread(act); + if (thread_stop(thread)) { + thread_mtx_lock(thread); + result = machine_thread_state_initialize( thread ); + thread_unstop(thread); + } + else { + thread_mtx_lock(thread); + result = KERN_ABORTED; + } - if (!thread_stop(thread)) { - result = KERN_ABORTED; - (void)act_lock_thread(act); - thread = THREAD_NULL; - break; + thread_release(thread); } - - thread1 = act_lock_thread(act); - if (thread1 == thread) - break; - - thread_unstop(thread); - thread = thread1; + else + result = machine_thread_state_initialize( thread ); } + else + result = KERN_TERMINATED; - if (result == KERN_SUCCESS) - result = machine_thread_set_state(act, flavor, state, state_count); - - if ( thread != THREAD_NULL && - thread->top_act == act ) - thread_unstop(thread); - - thread_release(act); - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } -/* - * Kernel-internal "thread" interfaces used outside this file: - */ kern_return_t thread_dup( - register thread_act_t target) + register thread_t target) { + thread_t self = current_thread(); kern_return_t result = KERN_SUCCESS; - thread_act_t self = current_act(); - thread_t thread; - if (target == THR_ACT_NULL || target == self) + if (target == THREAD_NULL || target == self) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(target); + thread_mtx_lock(target); - if (!target->active) { - act_unlock_thread(target); - return (KERN_TERMINATED); - } - - thread_hold(target); + if (target->active) { + thread_hold(target); - for (;;) { - thread_t thread1; + thread_mtx_unlock(target); - if ( thread == THREAD_NULL || - thread->top_act != target ) - break; - act_unlock_thread(target); - - if (!thread_stop(thread)) { + if (thread_stop(target)) { + thread_mtx_lock(target); + result = machine_thread_dup(self, target); + thread_unstop(target); + } + else { + thread_mtx_lock(target); result = KERN_ABORTED; - (void)act_lock_thread(target); - thread = THREAD_NULL; - break; } - thread1 = act_lock_thread(target); - if (thread1 == thread) - break; - - thread_unstop(thread); - thread = thread1; + thread_release(target); } + else + result = KERN_TERMINATED; - if (result == KERN_SUCCESS) - result = machine_thread_dup(self, target); - - if ( thread != THREAD_NULL && - thread->top_act == target ) - thread_unstop(thread); - - thread_release(target); - act_unlock_thread(target); + thread_mtx_unlock(target); return (result); } @@ -749,29 +576,13 @@ thread_dup( */ kern_return_t thread_setstatus( - register thread_act_t act, + register thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t count) { - kern_return_t result = KERN_SUCCESS; - thread_t thread; - - thread = act_lock_thread(act); - - if ( act != current_act() && - (act->suspend_count == 0 || - thread == THREAD_NULL || - (thread->state & TH_RUN) || - thread->top_act != act) ) - result = KERN_FAILURE; - - if (result == KERN_SUCCESS) - result = machine_thread_set_state(act, flavor, tstate, count); - act_unlock_thread(act); - - return (result); + return (thread_set_state(thread, flavor, tstate, count)); } /* @@ -781,350 +592,67 @@ thread_setstatus( */ kern_return_t thread_getstatus( - register thread_act_t act, + register thread_t thread, int flavor, thread_state_t tstate, mach_msg_type_number_t *count) { - kern_return_t result = KERN_SUCCESS; - thread_t thread; - - thread = act_lock_thread(act); - - if ( act != current_act() && - (act->suspend_count == 0 || - thread == THREAD_NULL || - (thread->state & TH_RUN) || - thread->top_act != act) ) - result = KERN_FAILURE; - - if (result == KERN_SUCCESS) - result = machine_thread_get_state(act, flavor, tstate, count); - - act_unlock_thread(act); - - return (result); -} - -/* - * Kernel-internal thread_activation interfaces used outside this file: - */ - -void -act_reference( - thread_act_t act) -{ - if (act == NULL) - return; - - act_lock(act); - act_reference_locked(act); - act_unlock(act); -} - -void -act_deallocate( - thread_act_t act) -{ - task_t task; - thread_t thread; - void *task_proc; - - if (act == NULL) - return; - - act_lock(act); - - if (--act->act_ref_count > 0) { - act_unlock(act); - return; - } - - assert(!act->active); - - thread = act->thread; - assert(thread != NULL); - - thread->top_act = NULL; - - act_unlock(act); - - task = act->task; - task_lock(task); - - task_proc = task->bsd_info; - - { - time_value_t user_time, system_time; - - thread_read_times(thread, &user_time, &system_time); - time_value_add(&task->total_user_time, &user_time); - time_value_add(&task->total_system_time, &system_time); - - queue_remove(&task->threads, act, thread_act_t, task_threads); - act->task_threads.next = NULL; - task->thread_count--; - task->res_thread_count--; - } - - task_unlock(task); - - act_prof_deallocate(act); - ipc_thr_act_terminate(act); - -#ifdef MACH_BSD - { - extern void uthread_free(task_t, void *, void *, void *); - void *ut = act->uthread; - - uthread_free(task, act, ut, task_proc); - act->uthread = NULL; - } -#endif /* MACH_BSD */ - - task_deallocate(task); - - thread_deallocate(thread); -} - - -/* - * act_attach - Attach an thr_act to the top of a thread ("push the stack"). - * - * The thread_shuttle must be either the current one or a brand-new one. - * Assumes the thr_act is active but not in use. - * - * Already locked: thr_act plus "appropriate" thread-related locks - * (see act_lock_thread()). - */ -void -act_attach( - thread_act_t act, - thread_t thread) -{ - thread_act_t lower; - - /* - * Chain the act onto the thread's act stack. - */ - act->act_ref_count++; - act->thread = thread; - act->higher = THR_ACT_NULL; - lower = act->lower = thread->top_act; - if (lower != THR_ACT_NULL) - lower->higher = act; - - thread->top_act = act; + return (thread_get_state(thread, flavor, tstate, count)); } /* - * act_detach + * install_special_handler: * - * Remove the current thr_act from the top of the current thread, i.e. - * "pop the stack". Assumes already locked: thr_act plus "appropriate" - * thread-related locks (see act_lock_thread). - */ -void -act_detach( - thread_act_t cur_act) -{ - thread_t cur_thread = cur_act->thread; - - /* Unlink the thr_act from the thread's thr_act stack */ - cur_thread->top_act = cur_act->lower; - cur_act->thread = 0; - cur_act->act_ref_count--; - assert(cur_act->act_ref_count > 0); - -#if MACH_ASSERT - cur_act->lower = cur_act->higher = THR_ACT_NULL; - if (cur_thread->top_act) - cur_thread->top_act->higher = THR_ACT_NULL; -#endif /* MACH_ASSERT */ - - return; -} - - -/* - * Synchronize a thread operation with migration. - * Called with nothing locked. - * Returns with thr_act locked. - */ -thread_t -act_lock_thread( - thread_act_t thr_act) -{ - - /* - * JMM - We have moved away from explicit RPC locks - * and towards a generic migration approach. The wait - * queue lock will be the point of synchronization for - * the shuttle linkage when this is rolled out. Until - * then, just lock the act. - */ - act_lock(thr_act); - return (thr_act->thread); -} - -/* - * Unsynchronize with migration (i.e., undo an act_lock_thread() call). - * Called with thr_act locked, plus thread locks held that are - * "correct" for thr_act's state. Returns with nothing locked. - */ -void -act_unlock_thread(thread_act_t thr_act) -{ - act_unlock(thr_act); -} - -/* - * Synchronize with migration given a pointer to a shuttle (instead of an - * activation). Called with nothing locked; returns with all - * "appropriate" thread-related locks held (see act_lock_thread()). - */ -thread_act_t -thread_lock_act( - thread_t thread) -{ - thread_act_t thr_act; - - while (1) { - thr_act = thread->top_act; - if (!thr_act) - break; - if (!act_lock_try(thr_act)) { - mutex_pause(); - continue; - } - break; - } - return (thr_act); -} - -/* - * Unsynchronize with an activation starting from a pointer to - * a shuttle. - */ -void -thread_unlock_act( - thread_t thread) -{ - thread_act_t thr_act; - - if (thr_act = thread->top_act) { - act_unlock(thr_act); - } -} - -/* - * switch_act - * - * If a new activation is given, switch to it. If not, - * switch to the lower activation (pop). Returns the old - * activation. This is for migration support. - */ -thread_act_t -switch_act( - thread_act_t act) -{ - thread_act_t old, new; - thread_t thread; - - disable_preemption(); - - thread = current_thread(); - - /* - * Find the old and new activation for switch. - */ - old = thread->top_act; - - if (act) { - new = act; - new->thread = thread; - } - else { - new = old->lower; - } - - assert(new != THR_ACT_NULL); - assert(current_processor()->active_thread == thread); - - /* This is where all the work happens */ - machine_switch_act(thread, old, new); - - /* - * Push or pop an activation on the chain. - */ - if (act) { - act_attach(new, thread); - } - else { - act_detach(old); - } - - enable_preemption(); - - return(old); -} - -/* - * install_special_handler * Install the special returnhandler that handles suspension and * termination, if it hasn't been installed already. * - * Already locked: RPC-related locks for thr_act, but not - * scheduling lock (thread_lock()) of the associated thread. + * Called with the thread mutex held. */ void install_special_handler( - thread_act_t thr_act) + thread_t thread) { - spl_t spl; - thread_t thread = thr_act->thread; + spl_t s = splsched(); - spl = splsched(); thread_lock(thread); - install_special_handler_locked(thr_act); + install_special_handler_locked(thread); thread_unlock(thread); - splx(spl); + splx(s); } /* - * install_special_handler_locked + * install_special_handler_locked: + * * Do the work of installing the special_handler. * - * Already locked: RPC-related locks for thr_act, plus the - * scheduling lock (thread_lock()) of the associated thread. + * Called with the thread mutex and scheduling lock held. */ void install_special_handler_locked( - thread_act_t act) + thread_t thread) { - thread_t thread = act->thread; ReturnHandler **rh; /* The work handler must always be the last ReturnHandler on the list, because it can do tricky things like detach the thr_act. */ - for (rh = &act->handlers; *rh; rh = &(*rh)->next) + for (rh = &thread->handlers; *rh; rh = &(*rh)->next) continue; - if (rh != &act->special_handler.next) - *rh = &act->special_handler; - - if (act == thread->top_act) { - /* - * Temporarily undepress, so target has - * a chance to do locking required to - * block itself in special_handler(). - */ - if (thread->sched_mode & TH_MODE_ISDEPRESSED) - compute_priority(thread, TRUE); - } - thread_ast_set(act, AST_APC); - if (act == current_act()) - ast_propagate(act->ast); + if (rh != &thread->special_handler.next) + *rh = &thread->special_handler; + + /* + * Temporarily undepress, so target has + * a chance to do locking required to + * block itself in special_handler(). + */ + if (thread->sched_mode & TH_MODE_ISDEPRESSED) + compute_priority(thread, TRUE); + + thread_ast_set(thread, AST_APC); + + if (thread == current_thread()) + ast_propagate(thread->ast); else { processor_t processor = thread->last_processor; @@ -1135,67 +663,46 @@ install_special_handler_locked( } } -kern_return_t -thread_apc_set( - thread_act_t act, - thread_apc_handler_t apc) -{ - extern thread_apc_handler_t bsd_ast; - - assert(apc == bsd_ast); - return (KERN_FAILURE); -} - -kern_return_t -thread_apc_clear( - thread_act_t act, - thread_apc_handler_t apc) -{ - extern thread_apc_handler_t bsd_ast; - - assert(apc == bsd_ast); - return (KERN_FAILURE); -} - /* * Activation control support routines internal to this file: */ -/* - * act_execute_returnhandlers() - does just what the name says - * - * This is called by system-dependent code when it detects that - * thr_act->handlers is non-null while returning into user mode. - */ void act_execute_returnhandlers(void) { - thread_act_t act = current_act(); + thread_t thread = current_thread(); - thread_ast_clear(act, AST_APC); + thread_ast_clear(thread, AST_APC); spllo(); for (;;) { ReturnHandler *rh; - thread_t thread = act_lock_thread(act); + + thread_mtx_lock(thread); (void)splsched(); thread_lock(thread); - rh = act->handlers; - if (!rh) { + + rh = thread->handlers; + if (rh != NULL) { + thread->handlers = rh->next; + thread_unlock(thread); spllo(); - act_unlock_thread(act); - return; - } - act->handlers = rh->next; - thread_unlock(thread); - spllo(); - act_unlock_thread(act); - /* Execute it */ - (*rh->handler)(rh, act); + thread_mtx_unlock(thread); + + /* Execute it */ + (*rh->handler)(rh, thread); + } + else + break; } + + thread_unlock(thread); + spllo(); + + thread_mtx_unlock(thread); } /* @@ -1210,12 +717,13 @@ act_execute_returnhandlers(void) void special_handler_continue(void) { - thread_act_t self = current_act(); + thread_t thread = current_thread(); + + thread_mtx_lock(thread); - if (self->suspend_count > 0) - install_special_handler(self); + if (thread->suspend_count > 0) + install_special_handler(thread); else { - thread_t thread = self->thread; spl_t s = splsched(); thread_lock(thread); @@ -1230,6 +738,8 @@ special_handler_continue(void) splx(s); } + thread_mtx_unlock(thread); + thread_exception_return(); /*NOTREACHED*/ } @@ -1240,13 +750,12 @@ special_handler_continue(void) */ void special_handler( - ReturnHandler *rh, - thread_act_t self) + __unused ReturnHandler *rh, + thread_t thread) { - thread_t thread = act_lock_thread(self); - spl_t s; + spl_t s; - assert(thread != THREAD_NULL); + thread_mtx_lock(thread); s = splsched(); thread_lock(thread); @@ -1254,212 +763,76 @@ special_handler( thread_unlock(thread); splx(s); - if (!self->active) { - act_unlock_thread(self); - thread_terminate_self(); - /*NOTREACHED*/ - } - /* * If we're suspended, go to sleep and wait for someone to wake us up. */ - if (self->suspend_count > 0) { - if (self->handlers == NULL) { - assert_wait(&self->suspend_count, THREAD_ABORTSAFE); - act_unlock_thread(self); - thread_block(special_handler_continue); - /*NOTREACHED*/ - } - - act_unlock_thread(self); - - special_handler_continue(); - /*NOTREACHED*/ - } - - act_unlock_thread(self); -} - -/* - * Already locked: activation (shuttle frozen within) - * - * Mark an activation inactive, and prepare it to terminate - * itself. - */ -static void -act_disable( - thread_act_t thr_act) -{ - thr_act->active = 0; - - /* Drop the thr_act reference taken for being active. - * (There is still at least one reference left: - * the one we were passed.) - * Inline the deallocate because thr_act is locked. - */ - act_deallocate_locked(thr_act); -} - -typedef struct GetSetState { - struct ReturnHandler rh; - int flavor; - void *state; - int *pcount; - int result; -} GetSetState; - -/* Local Forward decls */ -kern_return_t get_set_state( - thread_act_t thr_act, int flavor, - thread_state_t state, int *pcount, - void (*handler)(ReturnHandler *rh, thread_act_t thr_act)); -void get_state_handler(ReturnHandler *rh, thread_act_t thr_act); -void set_state_handler(ReturnHandler *rh, thread_act_t thr_act); - -/* - * get_set_state(thr_act ...) - * - * General code to install g/set_state handler. - * Called with thr_act's act_lock() and "appropriate" - * thread-related locks held. (See act_lock_thread().) - */ -kern_return_t -get_set_state( - thread_act_t act, - int flavor, - thread_state_t state, - int *pcount, - void (*handler)( - ReturnHandler *rh, - thread_act_t act)) -{ - GetSetState gss; - - /* Initialize a small parameter structure */ - gss.rh.handler = handler; - gss.flavor = flavor; - gss.state = state; - gss.pcount = pcount; - gss.result = KERN_ABORTED; /* iff wait below is interrupted */ - - /* Add it to the thr_act's return handler list */ - gss.rh.next = act->handlers; - act->handlers = &gss.rh; - - act_set_apc(act); - - assert(act->thread); - assert(act != current_act()); - - for (;;) { - wait_result_t result; - - if ( act->started && - act->thread->top_act == act ) - thread_wakeup_one(&act->suspend_count); - - /* - * Wait must be interruptible to avoid deadlock (e.g.) with - * task_suspend() when caller and target of get_set_state() - * are in same task. - */ - result = assert_wait(&gss, THREAD_ABORTSAFE); - act_unlock_thread(act); - - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); + if (thread->active) { + if (thread->suspend_count > 0) { + if (thread->handlers == NULL) { + assert_wait(&thread->suspend_count, THREAD_ABORTSAFE); + thread_mtx_unlock(thread); + thread_block((thread_continue_t)special_handler_continue); + /*NOTREACHED*/ + } - assert(result != THREAD_WAITING); + thread_mtx_unlock(thread); - if (gss.result != KERN_ABORTED) { - assert(result != THREAD_INTERRUPTED); - break; + special_handler_continue(); + /*NOTREACHED*/ } - - /* JMM - What about other aborts (like BSD signals)? */ - if (current_act()->handlers) - act_execute_returnhandlers(); - - act_lock_thread(act); } + else { + thread_mtx_unlock(thread); - return (gss.result); -} - -void -set_state_handler(ReturnHandler *rh, thread_act_t thr_act) -{ - GetSetState *gss = (GetSetState*)rh; - - gss->result = machine_thread_set_state(thr_act, gss->flavor, - gss->state, *gss->pcount); - thread_wakeup((event_t)gss); -} - -void -get_state_handler(ReturnHandler *rh, thread_act_t thr_act) -{ - GetSetState *gss = (GetSetState*)rh; - - gss->result = machine_thread_get_state(thr_act, gss->flavor, - gss->state, - (mach_msg_type_number_t *) gss->pcount); - thread_wakeup((event_t)gss); -} - -kern_return_t -act_get_state_locked(thread_act_t thr_act, int flavor, thread_state_t state, - mach_msg_type_number_t *pcount) -{ - return(get_set_state(thr_act, flavor, state, (int*)pcount, get_state_handler)); -} + thread_terminate_self(); + /*NOTREACHED*/ + } -kern_return_t -act_set_state_locked(thread_act_t thr_act, int flavor, thread_state_t state, - mach_msg_type_number_t count) -{ - return(get_set_state(thr_act, flavor, state, (int*)&count, set_state_handler)); + thread_mtx_unlock(thread); } kern_return_t -act_set_state(thread_act_t thr_act, int flavor, thread_state_t state, - mach_msg_type_number_t count) +act_set_state( + thread_t thread, + int flavor, + thread_state_t state, + mach_msg_type_number_t count) { - if (thr_act == THR_ACT_NULL || thr_act == current_act()) - return(KERN_INVALID_ARGUMENT); + if (thread == current_thread()) + return (KERN_INVALID_ARGUMENT); - act_lock_thread(thr_act); - return(act_set_state_locked(thr_act, flavor, state, count)); + return (thread_set_state(thread, flavor, state, count)); } kern_return_t -act_get_state(thread_act_t thr_act, int flavor, thread_state_t state, - mach_msg_type_number_t *pcount) +act_get_state( + thread_t thread, + int flavor, + thread_state_t state, + mach_msg_type_number_t *count) { - if (thr_act == THR_ACT_NULL || thr_act == current_act()) - return(KERN_INVALID_ARGUMENT); + if (thread == current_thread()) + return (KERN_INVALID_ARGUMENT); - act_lock_thread(thr_act); - return(act_get_state_locked(thr_act, flavor, state, pcount)); + return (thread_get_state(thread, flavor, state, count)); } void act_set_astbsd( - thread_act_t act) + thread_t thread) { - spl_t s = splsched(); + spl_t s = splsched(); - if (act == current_act()) { - thread_ast_set(act, AST_BSD); - ast_propagate(act->ast); + if (thread == current_thread()) { + thread_ast_set(thread, AST_BSD); + ast_propagate(thread->ast); } else { - thread_t thread = act->thread; processor_t processor; thread_lock(thread); - thread_ast_set(act, AST_BSD); + thread_ast_set(thread, AST_BSD); processor = thread->last_processor; if ( processor != PROCESSOR_NULL && processor->state == PROCESSOR_RUNNING && @@ -1473,20 +846,19 @@ act_set_astbsd( void act_set_apc( - thread_act_t act) + thread_t thread) { - spl_t s = splsched(); + spl_t s = splsched(); - if (act == current_act()) { - thread_ast_set(act, AST_APC); - ast_propagate(act->ast); + if (thread == current_thread()) { + thread_ast_set(thread, AST_APC); + ast_propagate(thread->ast); } else { - thread_t thread = act->thread; processor_t processor; thread_lock(thread); - thread_ast_set(act, AST_APC); + thread_ast_set(thread, AST_APC); processor = thread->last_processor; if ( processor != PROCESSOR_NULL && processor->state == PROCESSOR_RUNNING && @@ -1497,37 +869,3 @@ act_set_apc( splx(s); } - -void -act_ulock_release_all(thread_act_t thr_act) -{ - ulock_t ulock; - - while (!queue_empty(&thr_act->held_ulocks)) { - ulock = (ulock_t) queue_first(&thr_act->held_ulocks); - (void) lock_make_unstable(ulock, thr_act); - (void) lock_release_internal(ulock, thr_act); - } -} - -/* - * Provide routines (for export to other components) of things that - * are implemented as macros insternally. - */ -thread_act_t -thread_self(void) -{ - thread_act_t self = current_act_fast(); - - act_reference(self); - return self; -} - -thread_act_t -mach_thread_self(void) -{ - thread_act_t self = current_act_fast(); - - act_reference(self); - return self; -} diff --git a/osfmk/kern/thread_act.h b/osfmk/kern/thread_act.h deleted file mode 100644 index 6593bee3e..000000000 --- a/osfmk/kern/thread_act.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_FREE_COPYRIGHT@ - */ -/* - * Copyright (c) 1993 The University of Utah and - * the Computer Systems Laboratory (CSL). All rights reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS - * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF - * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * CSL requests users of this software to return to csl-dist@cs.utah.edu any - * improvements that they make and grant CSL redistribution rights. - * - * Author: Bryan Ford, University of Utah CSL - * - * File: thread_act.h - * - * thread activation definitions - */ -#ifndef _KERN_THREAD_ACT_H_ -#define _KERN_THREAD_ACT_H_ - -#include - -#endif /* _KERN_THREAD_ACT_H_ */ diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 365f4c2bf..789e1d52f 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc. + * Copyright (c) 1993-1995, 1999-2005 Apple Computer, Inc. * All rights reserved. * * @APPLE_LICENSE_HEADER_START@ @@ -20,24 +20,19 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Thread-based callout module. - * - * HISTORY - * - * 10 July 1999 (debo) - * Pulled into Mac OS X (microkernel). - * - * 3 July 1993 (debo) - * Created. - */ #include +#include +#include +#include #include #include #include #include +#include + +#include #include #include @@ -109,7 +104,7 @@ _delayed_call_dequeue( thread_call_t call ); -static void __inline__ +static __inline__ void _set_delayed_call_timer( thread_call_t call ); @@ -156,10 +151,12 @@ _delayed_call_timer( void thread_call_initialize(void) { - thread_call_t call; - spl_t s; + kern_return_t result; + thread_t thread; + thread_call_t call; + spl_t s; - simple_lock_init(&thread_call_lock, ETAP_MISC_TIMER); + simple_lock_init(&thread_call_lock, 0); s = splsched(); simple_lock(&thread_call_lock); @@ -186,7 +183,11 @@ thread_call_initialize(void) simple_unlock(&thread_call_lock); splx(s); - kernel_thread_with_priority(_activate_thread, MAXPRI_KERNEL - 2); + result = kernel_thread_start_priority((thread_continue_t)_activate_thread, NULL, MAXPRI_KERNEL - 2, &thread); + if (result != KERN_SUCCESS) + panic("thread_call_initialize"); + + thread_deallocate(thread); } void @@ -664,7 +665,7 @@ thread_call_free( simple_unlock(&thread_call_lock); splx(s); - kfree((vm_offset_t)call, sizeof (thread_call_data_t)); + kfree(call, sizeof (thread_call_data_t)); return (TRUE); } @@ -919,9 +920,7 @@ static __inline__ void _call_thread_wake(void) { - if (wait_queue_wakeup_one( - &call_thread_waitqueue, &call_thread_waitqueue, - THREAD_AWAKENED) == KERN_SUCCESS) { + if (wait_queue_wakeup_one(&call_thread_waitqueue, NULL, THREAD_AWAKENED) == KERN_SUCCESS) { thread_call_vars.idle_thread_num--; if (++thread_call_vars.active_num > thread_call_vars.active_hiwat) @@ -1001,7 +1000,7 @@ _call_thread_continue(void) (void) splsched(); simple_lock(&thread_call_lock); - self->active_callout = TRUE; + self->options |= TH_OPT_CALLOUT; while (thread_call_vars.pending_num > 0) { thread_call_t call; @@ -1034,7 +1033,7 @@ _call_thread_continue(void) simple_lock(&thread_call_lock); } - self->active_callout = FALSE; + self->options &= ~TH_OPT_CALLOUT; if (--thread_call_vars.active_num < thread_call_vars.active_lowat) thread_call_vars.active_lowat = thread_call_vars.active_num; @@ -1042,14 +1041,12 @@ _call_thread_continue(void) if (thread_call_vars.idle_thread_num < thread_call_vars.thread_lowat) { thread_call_vars.idle_thread_num++; - wait_queue_assert_wait( - &call_thread_waitqueue, &call_thread_waitqueue, - THREAD_INTERRUPTIBLE); + wait_queue_assert_wait(&call_thread_waitqueue, NULL, THREAD_UNINT, 0); simple_unlock(&thread_call_lock); (void) spllo(); - thread_block(_call_thread_continue); + thread_block((thread_continue_t)_call_thread_continue); /* NOTREACHED */ } @@ -1058,7 +1055,7 @@ _call_thread_continue(void) simple_unlock(&thread_call_lock); (void) spllo(); - (void) thread_terminate(self->top_act); + thread_terminate(self); /* NOTREACHED */ } @@ -1084,6 +1081,9 @@ static void _activate_thread_continue(void) { + kern_return_t result; + thread_t thread; + (void) splsched(); simple_lock(&thread_call_lock); @@ -1099,7 +1099,11 @@ _activate_thread_continue(void) simple_unlock(&thread_call_lock); (void) spllo(); - kernel_thread_with_priority(_call_thread, MAXPRI_KERNEL - 1); + result = kernel_thread_start_priority((thread_continue_t)_call_thread, NULL, MAXPRI_KERNEL - 1, &thread); + if (result != KERN_SUCCESS) + panic("activate_thread"); + + thread_deallocate(thread); (void) splsched(); simple_lock(&thread_call_lock); @@ -1111,7 +1115,7 @@ _activate_thread_continue(void) simple_unlock(&thread_call_lock); (void) spllo(); - thread_block(_activate_thread_continue); + thread_block((thread_continue_t)_activate_thread_continue); /* NOTREACHED */ } @@ -1121,7 +1125,7 @@ _activate_thread(void) { thread_t self = current_thread(); - self->vm_privilege = TRUE; + self->options |= TH_OPT_VMPRIV; vm_page_free_reserve(2); /* XXX */ _activate_thread_continue(); @@ -1131,8 +1135,8 @@ _activate_thread(void) static void _delayed_call_timer( - timer_call_param_t p0, - timer_call_param_t p1 + __unused timer_call_param_t p0, + __unused timer_call_param_t p1 ) { uint64_t timestamp; diff --git a/osfmk/kern/thread_call.h b/osfmk/kern/thread_call.h index 952265dd9..446bb1ce4 100644 --- a/osfmk/kern/thread_call.h +++ b/osfmk/kern/thread_call.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc. + * Copyright (c) 1993-1995, 1999-2005 Apple Computer, Inc. * All rights reserved. * * @APPLE_LICENSE_HEADER_START@ @@ -22,87 +22,109 @@ */ /* * Declarations for thread-based callouts. - * - * HISTORY - * - * 10 July 1999 (debo) - * Pulled into Mac OS X (microkernel). - * - * 3 July 1993 (debo) - * Created. */ #ifndef _KERN_THREAD_CALL_H_ #define _KERN_THREAD_CALL_H_ -#include - #include #include +#include + typedef struct call_entry *thread_call_t; typedef void *thread_call_param_t; typedef void (*thread_call_func_t)( thread_call_param_t param0, thread_call_param_t param1); +__BEGIN_DECLS boolean_t thread_call_enter( thread_call_t call ); + boolean_t thread_call_enter1( thread_call_t call, thread_call_param_t param1 ); + boolean_t thread_call_enter_delayed( thread_call_t call, uint64_t deadline ); + boolean_t thread_call_enter1_delayed( thread_call_t call, thread_call_param_t param1, uint64_t deadline ); + boolean_t thread_call_cancel( thread_call_t call ); -boolean_t -thread_call_is_delayed( - thread_call_t call, - uint64_t *deadline -); thread_call_t thread_call_allocate( thread_call_func_t func, thread_call_param_t param0 ); + boolean_t thread_call_free( thread_call_t call ); -#ifdef __APPLE_API_PRIVATE +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE + +#include + +typedef struct call_entry thread_call_data_t; + +void +thread_call_initialize(void); + +void +thread_call_setup( + thread_call_t call, + thread_call_func_t func, + thread_call_param_t param0 +); + +void +call_thread_block(void), +call_thread_unblock(void); + +#endif /* MACH_KERNEL_PRIVATE */ + +#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_OBSOLETE +__BEGIN_DECLS /* - * This portion of the interface - * is OBSOLETE and DEPRECATED. It - * will disappear shortly. + * Obsolete interfaces. */ +boolean_t +thread_call_is_delayed( + thread_call_t call, + uint64_t *deadline +); + void thread_call_func( thread_call_func_t func, thread_call_param_t param, boolean_t unique_call ); + void thread_call_func_delayed( thread_call_func_t func, @@ -117,36 +139,9 @@ thread_call_func_cancel( boolean_t cancel_all ); -/* End OBSOLETE and DEPRECATED */ - -#endif /* __APPLE_API_OBSOLETE */ - -#ifdef MACH_KERNEL_PRIVATE -#include - -typedef struct call_entry thread_call_data_t; - -void -thread_call_initialize(void); - -void -thread_call_setup( - thread_call_t call, - thread_call_func_t func, - thread_call_param_t param0 -); - -void -call_thread_block(void), -call_thread_unblock(void); - -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ +#ifndef MACH_KERNEL_PRIVATE -#if !defined(MACH_KERNEL_PRIVATE) && !defined(ABSOLUTETIME_SCALAR_TYPE) - -#include +#ifndef ABSOLUTETIME_SCALAR_TYPE #define thread_call_enter_delayed(a, b) \ thread_call_enter_delayed((a), __OSAbsoluteTime(b)) @@ -160,6 +155,12 @@ call_thread_unblock(void); #define thread_call_func_delayed(a, b, c) \ thread_call_func_delayed((a), (b), __OSAbsoluteTime(c)) -#endif +#endif /* ABSOLUTETIME_SCALAR_TYPE */ + +#endif /* MACH_KERNEL_PRIVATE */ + +__END_DECLS + +#endif /* KERNEL_PRIVATE */ -#endif /* _KERN_THREAD_CALL_H_ */ +#endif /* _KERN_THREAD_CALL_H_ */ diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c index 2d7e2765c..c9e75fc1e 100644 --- a/osfmk/kern/thread_policy.c +++ b/osfmk/kern/thread_policy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,15 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 15 October 2000 (debo) - * Created. - */ +#include +#include + +#include #include #include @@ -37,27 +33,24 @@ thread_recompute_priority( kern_return_t thread_policy_set( - thread_act_t act, + thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t count) { kern_return_t result = KERN_SUCCESS; - thread_t thread; spl_t s; - if (act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); - if (!act->active) { - act_unlock_thread(act); + thread_mtx_lock(thread); + if (!thread->active) { + thread_mtx_unlock(thread); return (KERN_TERMINATED); } - assert(thread != THREAD_NULL); - switch (flavor) { case THREAD_EXTENDED_POLICY: @@ -185,7 +178,7 @@ thread_policy_set( break; } - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } @@ -241,30 +234,51 @@ thread_task_priority( splx(s); } +void +thread_policy_reset( + thread_t thread) +{ + if (!(thread->sched_mode & TH_MODE_FAILSAFE)) { + thread->sched_mode &= ~TH_MODE_REALTIME; + + if (!(thread->sched_mode & TH_MODE_TIMESHARE)) { + thread->sched_mode |= TH_MODE_TIMESHARE; + + if (thread->state & TH_RUN) + pset_share_incr(thread->processor_set); + } + } + else { + thread->safe_mode = 0; + thread->sched_mode &= ~TH_MODE_FAILSAFE; + } + + thread->importance = 0; + + thread_recompute_priority(thread); +} + kern_return_t thread_policy_get( - thread_act_t act, + thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t *count, boolean_t *get_default) { kern_return_t result = KERN_SUCCESS; - thread_t thread; spl_t s; - if (act == THR_ACT_NULL) + if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - thread = act_lock_thread(act); - if (!act->active) { - act_unlock_thread(act); + thread_mtx_lock(thread); + if (!thread->active) { + thread_mtx_unlock(thread); return (KERN_TERMINATED); } - assert(thread != THREAD_NULL); - switch (flavor) { case THREAD_EXTENDED_POLICY: @@ -369,7 +383,7 @@ thread_policy_get( break; } - act_unlock_thread(act); + thread_mtx_unlock(thread); return (result); } diff --git a/osfmk/kern/thread_swap.c b/osfmk/kern/thread_swap.c deleted file mode 100644 index 159abcbc4..000000000 --- a/osfmk/kern/thread_swap.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for splsched */ -#include -#include -#include - -queue_head_t swapin_queue; -decl_simple_lock_data(,swapin_lock) - -mach_counter_t c_swapin_thread_block; - -void swapin_thread(void); - -/* - * swapin_init: [exported] - * - * Initialize the swapper module. - */ -void -swapin_init(void) -{ - queue_init(&swapin_queue); - simple_lock_init(&swapin_lock, ETAP_THREAD_SWAPPER); - kernel_thread_with_priority(swapin_thread, MINPRI_KERNEL); -} - -/* - * thread_swapin: [exported] - * - * Place the specified thread in the list of threads to swapin. - * Called with thread locked, returned unlocked. - */ - -void -thread_swapin( - register thread_t thread) -{ - switch (thread->state & TH_STACK_STATE) { - - case TH_STACK_HANDOFF: - /* - * Swapped out. - */ - thread->state = (thread->state & ~TH_STACK_STATE) | TH_STACK_ALLOC; - thread_unlock(thread); - simple_lock(&swapin_lock); - enqueue_tail(&swapin_queue, (queue_entry_t) thread); - simple_unlock(&swapin_lock); - thread_wakeup((event_t)&swapin_queue); - break; - - case TH_STACK_ALLOC: - /* - * Already queued. - */ - thread_unlock(thread); - break; - - default: - /* - * Already swapped in. - */ - panic("thread_swapin"); - } -} - -/* - * thread_doswapin: - * - * Swapin the specified thread, if it should be runnable, then put - * it on a run queue. - */ -void -thread_doswapin( - register thread_t thread) -{ - vm_offset_t stack; - spl_t s; - - /* - * Allocate the kernel stack. - */ - stack = stack_alloc(thread, thread_continue); - assert(stack); - - /* - * Place on run queue. - */ - s = splsched(); - thread_lock(thread); - thread->state &= ~(TH_STACK_HANDOFF | TH_STACK_ALLOC); - if (thread->state & TH_RUN) - thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); - thread_unlock(thread); - (void) splx(s); -} - -/* - * swapin_thread: [exported] - * - * This procedure executes as a kernel thread. Threads that need to - * be swapped in are swapped in by this thread. - */ -void -swapin_thread_continue(void) -{ - register thread_t thread; - - (void)splsched(); - simple_lock(&swapin_lock); - - while ((thread = (thread_t)dequeue_head(&swapin_queue)) != THREAD_NULL) { - simple_unlock(&swapin_lock); - (void)spllo(); - - thread_doswapin(thread); - - (void)splsched(); - simple_lock(&swapin_lock); - } - - assert_wait((event_t)&swapin_queue, THREAD_UNINT); - simple_unlock(&swapin_lock); - (void)spllo(); - - counter(c_swapin_thread_block++); - thread_block(swapin_thread_continue); - /*NOTREACHED*/ -} - -void -swapin_thread(void) -{ - swapin_thread_continue(); - /*NOTREACHED*/ -} diff --git a/osfmk/kern/thread_swap.h b/osfmk/kern/thread_swap.h deleted file mode 100644 index a6b107e56..000000000 --- a/osfmk/kern/thread_swap.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1989 Carnegie-Mellon University - * Copyright (c) 1988 Carnegie-Mellon University - * Copyright (c) 1987 Carnegie-Mellon University - * All rights reserved. The CMU software License Agreement specifies - * the terms and conditions for use and redistribution. - */ - -/* - * File: kern/thread_swap.h - * - * Declarations of thread "swapping" routines. - */ - -#ifndef _KERN_THREAD_SWAP_H_ -#define _KERN_THREAD_SWAP_H_ - -/* - * exported routines - */ - -extern void swapin_init(void); -extern void thread_swapin( - thread_t thread); -extern void thread_doswapin( - thread_t thread); - -#endif /*_KERN_THREAD_SWAP_H_*/ diff --git a/osfmk/kern/timer.c b/osfmk/kern/timer.c index 996bc6fe6..4dc36f6b5 100644 --- a/osfmk/kern/timer.c +++ b/osfmk/kern/timer.c @@ -50,542 +50,129 @@ /* */ -#include #include +#include #include #include #include +#include #include #include -#include #include -#include - -#include -#include - -timer_t current_timer[NCPUS]; -timer_data_t kernel_timer[NCPUS]; - -/* Forwards */ -void timer_grab( - timer_t timer, - timer_save_t save); - -void db_timer_grab( - timer_t timer, - timer_save_t save); - -void db_thread_read_times( - thread_t thread, - time_value_t *user_time_p, - time_value_t *system_time_p); /* - * init_timers initializes all non-thread timers and puts the - * service routine on the callout queue. All timers must be - * serviced by the callout routine once an hour. - */ -void -init_timers(void) -{ - register int i; - register timer_t this_timer; - - /* - * Initialize all the kernel timers and start the one - * for this cpu (master) slaves start theirs later. - */ - this_timer = &kernel_timer[0]; - for ( i=0 ; ilow_bits = 0; - this_timer->high_bits = 0; - this_timer->tstamp = 0; - this_timer->high_bits_check = 0; -} - -#if STAT_TIME -#else /* STAT_TIME */ - -#ifdef MACHINE_TIMER_ROUTINES - -/* - * Machine-dependent code implements the timer routines. - */ - -#else /* MACHINE_TIMER_ROUTINES */ - -/* - * start_timer starts the given timer for this cpu. It is called - * exactly once for each cpu during the boot sequence. - */ -void -start_timer( - register timer_t timer) -{ - timer->tstamp = get_timestamp(); - mp_disable_preemption(); - current_timer[cpu_number()] = timer; - mp_enable_preemption(); -} - -/* - * time_trap_uentry does trap entry timing. Caller must lock out - * interrupts and take a timestamp. ts is a timestamp taken after - * interrupts were locked out. Must only be called if trap was - * from user mode. - */ -void -time_trap_uentry( - unsigned ts) -{ - int elapsed; - int mycpu; - timer_t mytimer; - - mp_disable_preemption(); - - /* - * Calculate elapsed time. - */ - mycpu = cpu_number(); - mytimer = current_timer[mycpu]; - elapsed = ts - mytimer->tstamp; -#ifdef TIMER_MAX - if (elapsed < 0) elapsed += TIMER_MAX; -#endif /* TIMER_MAX */ - - /* - * Update current timer. - */ - mytimer->low_bits += elapsed; - mytimer->tstamp = 0; - - if (mytimer->low_bits & TIMER_LOW_FULL) { - timer_normalize(mytimer); - } - - /* - * Record new timer. - */ - mytimer = &(current_thread()->system_timer); - current_timer[mycpu] = mytimer; - mytimer->tstamp = ts; - - mp_enable_preemption(); -} - -/* - * time_trap_uexit does trap exit timing. Caller must lock out - * interrupts and take a timestamp. ts is a timestamp taken after - * interrupts were locked out. Must only be called if returning to - * user mode. - */ -void -time_trap_uexit( - unsigned ts) -{ - int elapsed; - int mycpu; - timer_t mytimer; - - mp_disable_preemption(); - - /* - * Calculate elapsed time. - */ - mycpu = cpu_number(); - mytimer = current_timer[mycpu]; - elapsed = ts - mytimer->tstamp; -#ifdef TIMER_MAX - if (elapsed < 0) elapsed += TIMER_MAX; -#endif /* TIMER_MAX */ - - /* - * Update current timer. - */ - mytimer->low_bits += elapsed; - mytimer->tstamp = 0; - - if (mytimer->low_bits & TIMER_LOW_FULL) { - timer_normalize(mytimer); /* SYSTEMMODE */ - } - - mytimer = &(current_thread()->user_timer); - - /* - * Record new timer. - */ - current_timer[mycpu] = mytimer; - mytimer->tstamp = ts; - - mp_enable_preemption(); + timer->low_bits = 0; + timer->high_bits = 0; + timer->high_bits_check = 0; +#if !STAT_TIME + timer->tstamp = 0; +#endif /* STAT_TIME */ } /* - * time_int_entry does interrupt entry timing. Caller must lock out - * interrupts and take a timestamp. ts is a timestamp taken after - * interrupts were locked out. new_timer is the new timer to - * switch to. This routine returns the currently running timer, - * which MUST be pushed onto the stack by the caller, or otherwise - * saved for time_int_exit. + * Calculate the difference between a timer + * and saved value, and update the saved value. */ -timer_t -time_int_entry( - unsigned ts, - timer_t new_timer) +uint64_t +timer_delta( + timer_t timer, + uint64_t *save) { - int elapsed; - int mycpu; - timer_t mytimer; - - mp_disable_preemption(); - - /* - * Calculate elapsed time. - */ - mycpu = cpu_number(); - mytimer = current_timer[mycpu]; - - elapsed = ts - mytimer->tstamp; -#ifdef TIMER_MAX - if (elapsed < 0) elapsed += TIMER_MAX; -#endif /* TIMER_MAX */ + uint64_t new, old = *save; - /* - * Update current timer. - */ - mytimer->low_bits += elapsed; - mytimer->tstamp = 0; - - /* - * Switch to new timer, and save old one on stack. - */ - new_timer->tstamp = ts; - current_timer[mycpu] = new_timer; + *save = new = timer_grab(timer); - mp_enable_preemption(); - - return(mytimer); + return (new - old); } -/* - * time_int_exit does interrupt exit timing. Caller must lock out - * interrupts and take a timestamp. ts is a timestamp taken after - * interrupts were locked out. old_timer is the timer value pushed - * onto the stack or otherwise saved after time_int_entry returned - * it. - */ -void -time_int_exit( - unsigned ts, - timer_t old_timer) -{ - int elapsed; - int mycpu; - timer_t mytimer; - - mp_disable_preemption(); - - /* - * Calculate elapsed time. - */ - mycpu = cpu_number(); - mytimer = current_timer[mycpu]; - elapsed = ts - mytimer->tstamp; -#ifdef TIMER_MAX - if (elapsed < 0) elapsed += TIMER_MAX; -#endif /* TIMER_MAX */ - - /* - * Update current timer. - */ - mytimer->low_bits += elapsed; - mytimer->tstamp = 0; - - /* - * If normalization requested, do it. - */ - if (mytimer->low_bits & TIMER_LOW_FULL) { - timer_normalize(mytimer); - } - if (old_timer->low_bits & TIMER_LOW_FULL) { - timer_normalize(old_timer); - } - - /* - * Start timer that was running before interrupt. - */ - old_timer->tstamp = ts; - current_timer[mycpu] = old_timer; - - mp_enable_preemption(); -} +#if !STAT_TIME /* - * timer_switch switches to a new timer. The machine - * dependent routine/macro get_timestamp must return a timestamp. - * Caller must lock out interrupts. + * Update the current timer (if any) + * and start the new timer, which + * could be either the same or NULL. + * + * Called with interrupts disabled. */ void timer_switch( - timer_t new_timer) + uint32_t tstamp, + timer_t new_timer) { - int elapsed; - int mycpu; - timer_t mytimer; - unsigned ts; - - mp_disable_preemption(); - - /* - * Calculate elapsed time. - */ - mycpu = cpu_number(); - mytimer = current_timer[mycpu]; - ts = get_timestamp(); - elapsed = ts - mytimer->tstamp; -#ifdef TIMER_MAX - if (elapsed < 0) elapsed += TIMER_MAX; -#endif /* TIMER_MAX */ + processor_t processor = current_processor(); + timer_t timer; + uint32_t old_low, low; /* * Update current timer. */ - mytimer->low_bits += elapsed; - mytimer->tstamp = 0; - - /* - * Normalization check - */ - if (mytimer->low_bits & TIMER_LOW_FULL) { - timer_normalize(mytimer); + timer = PROCESSOR_DATA(processor, current_timer); + if (timer != NULL) { + old_low = timer->low_bits; + low = old_low + tstamp - timer->tstamp; + if (low < old_low) + timer_update(timer, timer->high_bits + 1, low); + else + timer->low_bits = low; } /* - * Record new timer. + * Start new timer. */ - current_timer[mycpu] = new_timer; - new_timer->tstamp = ts; - - mp_enable_preemption(); + PROCESSOR_DATA(processor, current_timer) = new_timer; + if (new_timer != NULL) + new_timer->tstamp = tstamp; } -#endif /* MACHINE_TIMER_ROUTINES */ -#endif /* STAT_TIME */ +#if MACHINE_TIMER_ROUTINES /* - * timer_normalize normalizes the value of a timer. It is - * called only rarely, to make sure low_bits never overflows. + * Machine-dependent code implements the timer event routine. */ -void -timer_normalize( - register timer_t timer) -{ - unsigned int high_increment; - - /* - * Calculate high_increment, then write high check field first - * followed by low and high. timer_grab() reads these fields in - * reverse order so if high and high check match, we know - * that the values read are ok. - */ - - high_increment = timer->low_bits/TIMER_HIGH_UNIT; - timer->high_bits_check += high_increment; - timer->low_bits %= TIMER_HIGH_UNIT; - timer->high_bits += high_increment; -} +#else /* MACHINE_TIMER_ROUTINES */ /* - * timer_grab() retrieves the value of a timer. + * Update the current timer and start + * the new timer. Requires a current + * and new timer. * - * Critical scheduling code uses TIMER_DELTA macro in timer.h - * (called from thread_timer_delta in sched.h). - * - * Keep coherent with db_time_grab below. + * Called with interrupts disabled. */ - void -timer_grab( - timer_t timer, - timer_save_t save) +timer_event( + uint32_t tstamp, + timer_t new_timer) { -#if MACH_ASSERT - unsigned int passes=0; -#endif - do { - (save)->high = (timer)->high_bits; - (save)->low = (timer)->low_bits; + processor_t processor = current_processor(); + timer_t timer; + uint32_t old_low, low; + /* - * If the timer was normalized while we were doing this, - * the high_bits value read above and the high_bits check - * value will not match because high_bits_check is the first - * field touched by the normalization procedure, and - * high_bits is the last. - * - * Additions to timer only touch low bits and - * are therefore atomic with respect to this. + * Update current timer. */ -#if MACH_ASSERT - passes++; - assert(passes < 10000); -#endif - } while ( (save)->high != (timer)->high_bits_check); -} + timer = PROCESSOR_DATA(processor, current_timer); + old_low = timer->low_bits; + low = old_low + tstamp - timer->tstamp; + if (low < old_low) + timer_update(timer, timer->high_bits + 1, low); + else + timer->low_bits = low; -/* - * - * Db_timer_grab(): used by db_thread_read_times. An nonblocking - * version of db_thread_get_times. Keep coherent with timer_grab - * above. - * - */ -void -db_timer_grab( - timer_t timer, - timer_save_t save) -{ - /* Don't worry about coherency */ - - (save)->high = (timer)->high_bits; - (save)->low = (timer)->low_bits; -} - - -/* - * timer_read reads the value of a timer into a time_value_t. If the - * timer was modified during the read, retry. The value returned - * is accurate to the last update; time accumulated by a running - * timer since its last timestamp is not included. - */ - -void -timer_read( - timer_t timer, - register time_value_t *tv) -{ - timer_save_data_t temp; - - timer_grab(timer,&temp); /* - * Normalize the result + * Start new timer. */ -#ifdef TIMER_ADJUST - TIMER_ADJUST(&temp); -#endif /* TIMER_ADJUST */ - tv->seconds = temp.high + temp.low/1000000; - tv->microseconds = temp.low%1000000; -} - -/* - * thread_read_times reads the user and system times from a thread. - * Time accumulated since last timestamp is not included. Should - * be called at splsched() to avoid having user and system times - * be out of step. Doesn't care if caller locked thread. - * - * Needs to be kept coherent with thread_read_times ahead. - */ -void -thread_read_times( - thread_t thread, - time_value_t *user_time_p, - time_value_t *system_time_p) -{ - timer_save_data_t temp; - register timer_t timer; - - timer = &thread->user_timer; - timer_grab(timer, &temp); - -#ifdef TIMER_ADJUST - TIMER_ADJUST(&temp); -#endif /* TIMER_ADJUST */ - user_time_p->seconds = temp.high + temp.low/1000000; - user_time_p->microseconds = temp.low % 1000000; - - timer = &thread->system_timer; - timer_grab(timer, &temp); - -#ifdef TIMER_ADJUST - TIMER_ADJUST(&temp); -#endif /* TIMER_ADJUST */ - system_time_p->seconds = temp.high + temp.low/1000000; - system_time_p->microseconds = temp.low % 1000000; -} - -/* - * Db_thread_read_times: A version of thread_read_times that - * can be called by the debugger. This version does not call - * timer_grab, which can block. Please keep it up to date with - * thread_read_times above. - * - */ -void -db_thread_read_times( - thread_t thread, - time_value_t *user_time_p, - time_value_t *system_time_p) -{ - timer_save_data_t temp; - register timer_t timer; - - timer = &thread->user_timer; - db_timer_grab(timer, &temp); - -#ifdef TIMER_ADJUST - TIMER_ADJUST(&temp); -#endif /* TIMER_ADJUST */ - user_time_p->seconds = temp.high + temp.low/1000000; - user_time_p->microseconds = temp.low % 1000000; - - timer = &thread->system_timer; - timer_grab(timer, &temp); - -#ifdef TIMER_ADJUST - TIMER_ADJUST(&temp); -#endif /* TIMER_ADJUST */ - system_time_p->seconds = temp.high + temp.low/1000000; - system_time_p->microseconds = temp.low % 1000000; + PROCESSOR_DATA(processor, current_timer) = new_timer; + new_timer->tstamp = tstamp; } -/* - * timer_delta takes the difference of a saved timer value - * and the current one, and updates the saved value to current. - * The difference is returned as a function value. See - * TIMER_DELTA macro (timer.h) for optimization to this. - */ - -unsigned -timer_delta( - register timer_t timer, - timer_save_t save) -{ - timer_save_data_t new_save; - register unsigned result; +#endif /* MACHINE_TIMER_ROUTINES */ - timer_grab(timer,&new_save); - result = (new_save.high - save->high) * TIMER_HIGH_UNIT + - new_save.low - save->low; - save->high = new_save.high; - save->low = new_save.low; - return(result); -} +#endif /* STAT_TIME */ diff --git a/osfmk/kern/timer.h b/osfmk/kern/timer.h index c55f093da..1c202efd3 100644 --- a/osfmk/kern/timer.h +++ b/osfmk/kern/timer.h @@ -53,176 +53,91 @@ #ifndef _KERN_TIMER_H_ #define _KERN_TIMER_H_ -#include #include -#include #include -#if STAT_TIME -/* - * Statistical timer definitions - use microseconds in timer, seconds - * in high unit field. No adjustment needed to convert to time_value_t - * as a result. Service timers once an hour. - */ - -#define TIMER_RATE 1000000 -#define TIMER_HIGH_UNIT TIMER_RATE -#undef TIMER_ADJUST - -#else /* STAT_TIME */ -/* - * Machine dependent definitions based on hardware support. - */ - -#include - -#endif /* STAT_TIME */ - /* - * Definitions for accurate timers. high_bits_check is a copy of - * high_bits that allows reader to verify that values read are ok. + * Definitions for high resolution timers. A check + * word on the high portion allows atomic updates. */ struct timer { - unsigned low_bits; - unsigned high_bits; - unsigned high_bits_check; - unsigned tstamp; + uint32_t low_bits; + uint32_t high_bits; + uint32_t high_bits_check; +#if !STAT_TIME + uint32_t tstamp; +#endif /* STAT_TIME */ }; -typedef struct timer timer_data_t; -typedef struct timer *timer_t; - -/* - * Mask to check if low_bits is in danger of overflowing - */ - -#define TIMER_LOW_FULL 0x80000000 +typedef struct timer timer_data_t, *timer_t; /* - * Kernel timers and current timer array. [Exported] + * Exported kernel interface to timers */ -extern timer_t current_timer[NCPUS]; -extern timer_data_t kernel_timer[NCPUS]; - -/* - * save structure for timer readings. This is used to save timer - * readings for elapsed time computations. - */ +#if STAT_TIME -struct timer_save { - unsigned low; - unsigned high; -}; +#include -typedef struct timer_save timer_save_data_t, *timer_save_t; +/* Advance a timer by the specified amount */ +#define TIMER_BUMP(timer, ticks) \ +MACRO_BEGIN \ + uint32_t old_low, low; \ + \ + old_low = (timer)->low_bits; \ + low = old_low + (ticks); \ + if (low < old_low) \ + timer_update((timer), (timer)->high_bits + 1, low); \ + else \ + (timer)->low_bits = low; \ +MACRO_END -/* - * Exported kernel interface to timers - */ +#define timer_switch(tstamp, new_timer) +#define timer_event(tstamp, new_timer) -#if STAT_TIME -#define start_timer(timer) -#define timer_switch(timer) #else /* STAT_TIME */ -/* Start timer for this cpu */ -extern void start_timer( - timer_t timer); -/* Switch to a new timer */ +/* Update the current timer and start a new one */ extern void timer_switch( - timer_t new_timer); -#endif /* STAT_TIME */ - -/* Initialize timer module */ -extern void init_timers(void); - -/* - * Initializes a single timer. - */ -extern void timer_init( - timer_t this_timer); - -/* Normalize timer value */ -extern void timer_normalize( - timer_t timer); + uint32_t tstamp, + timer_t new_timer); -/* Read value of timer into tv */ -extern void timer_read( - timer_t timer, - time_value_t *tv); +#define TIMER_BUMP(timer, ticks) -/* Read thread times */ -extern void thread_read_times( - thread_t thread, - time_value_t *user_time_p, - time_value_t *system_time_p); +#endif /* STAT_TIME */ -/* Compute timer difference */ -extern unsigned timer_delta( - timer_t timer, - timer_save_t save); +/* Initialize a timer */ +extern void timer_init( + timer_t timer); -#if STAT_TIME -/* - * Macro to bump timer values. - */ -#define timer_bump(timer, usec) \ -MACRO_BEGIN \ - (timer)->low_bits += usec; \ - if ((timer)->low_bits & TIMER_LOW_FULL) { \ - timer_normalize(timer); \ - } \ -MACRO_END +/* Update a saved timer value and return delta to current value */ +extern uint64_t timer_delta( + timer_t timer, + uint64_t *save); -#else /* STAT_TIME */ /* * Exported hardware interface to timers */ -/* Time trap entry */ -extern void time_trap_uentry( - unsigned ts); -/* Time trap exit */ -extern void time_trap_uexit( - unsigned ts); +/* Read timer value */ +extern uint64_t timer_grab( + timer_t timer); -/* Time interrupt entry */ -extern timer_t time_int_entry( - unsigned ts, - timer_t new_timer); +/* Update timer value */ +extern void timer_update( + timer_t timer, + uint32_t new_high, + uint32_t new_low); -/* Time interrrupt exit */ -extern void time_int_exit( - unsigned ts, - timer_t old_timer); +#if !STAT_TIME -#endif /* STAT_TIME */ - -/* - * TIMER_DELTA finds the difference between a timer and a saved value, - * and updates the saved value. Look at high_bits check field after - * reading low because that's the first written by a normalize - * operation; this isn't necessary for current usage because - * this macro is only used when the timer can't be normalized: - * thread is not running, or running thread calls it on itself at - * splsched(). - */ +/* Update the current timer at an event */ +extern void timer_event( + uint32_t tstamp, + timer_t new_timer); -#define TIMER_DELTA(timer, save, result) \ -MACRO_BEGIN \ - register unsigned temp; \ - \ - temp = (timer).low_bits; \ - if ((save).high != (timer).high_bits_check) { \ - result += timer_delta(&(timer), &(save)); \ - } \ - else { \ - result += temp - (save).low; \ - (save).low = temp; \ - } \ -MACRO_END +#endif /* STAT_TIME */ #endif /* _KERN_TIMER_H_ */ diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index cf5d481f1..fd48c3959 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc. + * Copyright (c) 1993-1995, 1999-2004 Apple Computer, Inc. * All rights reserved. * * @APPLE_LICENSE_HEADER_START@ @@ -37,21 +37,8 @@ #include #include -#ifdef i386 -/* - * Until we arrange for per-cpu timers, use the master cpus queues only. - * Fortunately, the timer_call_lock synchronizes access to all queues. - */ -#undef cpu_number() -#define cpu_number() 0 -#endif /* i386 */ - decl_simple_lock_data(static,timer_call_lock) -static -queue_head_t - timer_call_queues[NCPUS]; - static struct { int delayed_num, delayed_hiwat; @@ -68,16 +55,12 @@ void timer_call_initialize(void) { spl_t s; - int i; - simple_lock_init(&timer_call_lock, ETAP_MISC_TIMER); + simple_lock_init(&timer_call_lock, 0); s = splclock(); simple_lock(&timer_call_lock); - for (i = 0; i < NCPUS; i++) - queue_init(&timer_call_queues[i]); - clock_set_timer_func((clock_timer_func_t)timer_call_interrupt); simple_unlock(&timer_call_lock); @@ -159,7 +142,7 @@ timer_call_enter( call->param1 = 0; call->deadline = deadline; - queue = &timer_call_queues[cpu_number()]; + queue = &PROCESSOR_DATA(current_processor(), timer_call_queue); _delayed_call_enqueue(queue, call); @@ -193,7 +176,7 @@ timer_call_enter1( call->param1 = param1; call->deadline = deadline; - queue = &timer_call_queues[cpu_number()]; + queue = &PROCESSOR_DATA(current_processor(), timer_call_queue); _delayed_call_enqueue(queue, call); @@ -263,8 +246,8 @@ timer_call_shutdown( assert(processor != current_processor()); - queue = &timer_call_queues[processor->slot_num]; - myqueue = &timer_call_queues[cpu_number()]; + queue = &PROCESSOR_DATA(processor, timer_call_queue); + myqueue = &PROCESSOR_DATA(current_processor(), timer_call_queue); simple_lock(&timer_call_lock); @@ -292,10 +275,12 @@ timer_call_interrupt( uint64_t timestamp) { timer_call_t call; - queue_t queue = &timer_call_queues[cpu_number()]; + queue_t queue; simple_lock(&timer_call_lock); + queue = &PROCESSOR_DATA(current_processor(), timer_call_queue); + call = TC(queue_first(queue)); while (!queue_end(queue, qe(call))) { diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index eef777359..8faa4013f 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,6 +69,18 @@ #include +/* forward declarations */ +static boolean_t wait_queue_member_locked( + wait_queue_t wq, + wait_queue_set_t wq_set); + +void wait_queue_unlink_one( + wait_queue_t wq, + wait_queue_set_t *wq_setp); + +kern_return_t wait_queue_set_unlink_all_nofree( + wait_queue_set_t wq_set); + /* * Routine: wait_queue_init * Purpose: @@ -114,7 +126,7 @@ wait_queue_alloc( if (wq != WAIT_QUEUE_NULL) { ret = wait_queue_init(wq, policy); if (ret != KERN_SUCCESS) { - kfree((vm_offset_t)wq, sizeof(struct wait_queue)); + kfree(wq, sizeof(struct wait_queue)); wq = WAIT_QUEUE_NULL; } } @@ -136,7 +148,7 @@ wait_queue_free( return KERN_INVALID_ARGUMENT; if (!queue_empty(&wq->wq_queue)) return KERN_FAILURE; - kfree((vm_offset_t)wq, sizeof(struct wait_queue)); + kfree(wq, sizeof(struct wait_queue)); return KERN_SUCCESS; } @@ -169,7 +181,7 @@ wait_queue_set_init( return KERN_SUCCESS; } -/* legacy API */ + kern_return_t wait_queue_sub_init( wait_queue_set_t wqset, @@ -178,6 +190,19 @@ wait_queue_sub_init( return wait_queue_set_init(wqset, policy); } +kern_return_t +wait_queue_sub_clearrefs( + wait_queue_set_t wq_set) +{ + if (!wait_queue_is_set(wq_set)) + return KERN_INVALID_ARGUMENT; + + wqs_lock(wq_set); + wq_set->wqs_refcount = 0; + wqs_unlock(wq_set); + return KERN_SUCCESS; +} + /* * Routine: wait_queue_set_alloc * Purpose: @@ -201,7 +226,7 @@ wait_queue_set_alloc( ret = wait_queue_set_init(wq_set, policy); if (ret != KERN_SUCCESS) { - kfree((vm_offset_t)wq_set, sizeof(struct wait_queue_set)); + kfree(wq_set, sizeof(struct wait_queue_set)); wq_set = WAIT_QUEUE_SET_NULL; } } @@ -225,22 +250,10 @@ wait_queue_set_free( if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue)) return KERN_FAILURE; - kfree((vm_offset_t)wq_set, sizeof(struct wait_queue_set)); + kfree(wq_set, sizeof(struct wait_queue_set)); return KERN_SUCCESS; } -kern_return_t -wait_queue_sub_clearrefs( - wait_queue_set_t wq_set) -{ - if (!wait_queue_is_set(wq_set)) - return KERN_INVALID_ARGUMENT; - - wqs_lock(wq_set); - wq_set->wqs_refcount = 0; - wqs_unlock(wq_set); - return KERN_SUCCESS; -} /* * @@ -322,7 +335,7 @@ MACRO_END * The wait queue is locked * The set queue is just that, a set queue */ -__private_extern__ boolean_t +static boolean_t wait_queue_member_locked( wait_queue_t wq, wait_queue_set_t wq_set) @@ -465,7 +478,7 @@ wait_queue_link( ret = wait_queue_link_noalloc(wq, wq_set, wql); if (ret != KERN_SUCCESS) - kfree((vm_offset_t)wql, sizeof(struct wait_queue_link)); + kfree(wql, sizeof(struct wait_queue_link)); return ret; } @@ -532,7 +545,7 @@ wait_queue_unlink( wqs_unlock(wq_set); wait_queue_unlock(wq); splx(s); - kfree((vm_offset_t)wql, sizeof(struct wait_queue_link)); + kfree(wql, sizeof(struct wait_queue_link)); return KERN_SUCCESS; } } @@ -652,7 +665,7 @@ wait_queue_unlink_all( while(!queue_empty(links)) { wql = (wait_queue_link_t) dequeue(links); - kfree((vm_offset_t) wql, sizeof(struct wait_queue_link)); + kfree(wql, sizeof(struct wait_queue_link)); } return(KERN_SUCCESS); @@ -675,7 +688,6 @@ wait_queue_set_unlink_all_nofree( wait_queue_link_t wql; wait_queue_t wq; queue_t q; - kern_return_t kret; spl_t s; if (!wait_queue_is_set(wq_set)) { @@ -735,7 +747,6 @@ wait_queue_set_unlink_all( queue_t q; queue_head_t links_queue_head; queue_t links = &links_queue_head; - kern_return_t kret; spl_t s; if (!wait_queue_is_set(wq_set)) { @@ -771,7 +782,7 @@ retry: while (!queue_empty (links)) { wql = (wait_queue_link_t) dequeue(links); - kfree((vm_offset_t)wql, sizeof(struct wait_queue_link)); + kfree(wql, sizeof(struct wait_queue_link)); } return(KERN_SUCCESS); } @@ -810,7 +821,7 @@ wait_queue_unlink_one( wqs_unlock(wq_set); wait_queue_unlock(wq); splx(s); - kfree((vm_offset_t)wql,sizeof(struct wait_queue_link)); + kfree(wql,sizeof(struct wait_queue_link)); *wq_setp = wq_set; return; } @@ -840,6 +851,7 @@ wait_queue_assert_wait64_locked( wait_queue_t wq, event64_t event, wait_interrupt_t interruptible, + uint64_t deadline, thread_t thread) { wait_result_t wait_result; @@ -862,12 +874,19 @@ wait_queue_assert_wait64_locked( */ wait_result = thread_mark_wait_locked(thread, interruptible); if (wait_result == THREAD_WAITING) { - if (thread->vm_privilege) + if (thread->options & TH_OPT_VMPRIV) enqueue_head(&wq->wq_queue, (queue_entry_t) thread); else enqueue_tail(&wq->wq_queue, (queue_entry_t) thread); + thread->wait_event = event; thread->wait_queue = wq; + + if (deadline != 0) { + if (!timer_call_enter(&thread->wait_timer, deadline)) + thread->wait_timer_active++; + thread->wait_timer_is_set = TRUE; + } } return(wait_result); } @@ -885,25 +904,23 @@ wait_result_t wait_queue_assert_wait( wait_queue_t wq, event_t event, - wait_interrupt_t interruptible) + wait_interrupt_t interruptible, + uint64_t deadline) { spl_t s; wait_result_t ret; - thread_t cur_thread = current_thread(); + thread_t thread = current_thread(); /* If it is an invalid wait queue, you can't wait on it */ - if (!wait_queue_is_valid(wq)) { - thread_t thread = current_thread(); + if (!wait_queue_is_valid(wq)) return (thread->wait_result = THREAD_RESTART); - } s = splsched(); wait_queue_lock(wq); - thread_lock(cur_thread); - ret = wait_queue_assert_wait64_locked( - wq, (event64_t)((uint32_t)event), - interruptible, cur_thread); - thread_unlock(cur_thread); + thread_lock(thread); + ret = wait_queue_assert_wait64_locked(wq, (event64_t)((uint32_t)event), + interruptible, deadline, thread); + thread_unlock(thread); wait_queue_unlock(wq); splx(s); return(ret); @@ -921,29 +938,27 @@ wait_result_t wait_queue_assert_wait64( wait_queue_t wq, event64_t event, - wait_interrupt_t interruptible) + wait_interrupt_t interruptible, + uint64_t deadline) { spl_t s; wait_result_t ret; - thread_t cur_thread = current_thread(); + thread_t thread = current_thread(); /* If it is an invalid wait queue, you cant wait on it */ - if (!wait_queue_is_valid(wq)) { - thread_t thread = current_thread(); + if (!wait_queue_is_valid(wq)) return (thread->wait_result = THREAD_RESTART); - } s = splsched(); wait_queue_lock(wq); - thread_lock(cur_thread); - ret = wait_queue_assert_wait64_locked(wq, event, interruptible, cur_thread); - thread_unlock(cur_thread); + thread_lock(thread); + ret = wait_queue_assert_wait64_locked(wq, event, interruptible, deadline, thread); + thread_unlock(thread); wait_queue_unlock(wq); splx(s); return(ret); } - /* * Routine: _wait_queue_select64_all * Purpose: @@ -1062,7 +1077,7 @@ wait_queue_wakeup64_all_locked( res = KERN_NOT_WAITING; while (!queue_empty (q)) { thread_t thread = (thread_t) dequeue(q); - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); } @@ -1196,8 +1211,7 @@ _wait_queue_select64_one( * the event we are posting to this queue, pull * it off the queue and stick it in out wake_queue. */ - thread_t t = (thread_t)wq_element; - + t = (thread_t)wq_element; if (t->wait_event == event) { thread_lock(t); remqueue(q, (queue_entry_t) t); @@ -1206,6 +1220,8 @@ _wait_queue_select64_one( t->at_safe_point = FALSE; return t; /* still locked */ } + + t = THREAD_NULL; } wq_element = wqe_next; } @@ -1239,7 +1255,6 @@ wait_queue_peek64_locked( { wait_queue_element_t wq_element; wait_queue_element_t wqe_next; - thread_t t; queue_t q; assert(wq->wq_fifo); @@ -1425,7 +1440,7 @@ wait_queue_wakeup64_identity_locked( wait_queue_unlock(wq); if (thread) { - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); } return thread; /* still locked if not NULL */ @@ -1464,7 +1479,7 @@ wait_queue_wakeup64_one_locked( if (thread) { kern_return_t res; - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); return res; @@ -1505,7 +1520,7 @@ wait_queue_wakeup_one( if (thread) { kern_return_t res; - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); splx(s); @@ -1547,7 +1562,7 @@ wait_queue_wakeup64_one( if (thread) { kern_return_t res; - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); splx(s); @@ -1601,7 +1616,7 @@ wait_queue_wakeup64_thread_locked( if (res != KERN_SUCCESS) return KERN_NOT_WAITING; - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); return res; @@ -1646,7 +1661,7 @@ wait_queue_wakeup_thread( wait_queue_unlock(wq); if (res == KERN_SUCCESS) { - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); splx(s); @@ -1695,7 +1710,7 @@ wait_queue_wakeup64_thread( wait_queue_unlock(wq); if (res == KERN_SUCCESS) { - res = thread_go_locked(thread, result); + res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); splx(s); diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h index 3a15fd266..6315e9e3a 100644 --- a/osfmk/kern/wait_queue.h +++ b/osfmk/kern/wait_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,23 +19,25 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _KERN_WAIT_QUEUE_H_ -#define _KERN_WAIT_QUEUE_H_ -#include +#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE +#ifndef _KERN_WAIT_QUEUE_H_ +#define _KERN_WAIT_QUEUE_H_ +#include #include #include /* for kern_return_t */ #include /* for wait_queue_t */ -#ifdef MACH_KERNEL_PRIVATE +#include + +#ifdef MACH_KERNEL_PRIVATE #include #include - +#include /* * wait_queue_t @@ -169,6 +171,7 @@ __private_extern__ wait_result_t wait_queue_assert_wait64_locked( wait_queue_t wait_queue, event64_t wait_event, wait_interrupt_t interruptible, + uint64_t deadline, thread_t thread); /* peek to see which thread would be chosen for a wakeup - but keep on queue */ @@ -213,11 +216,15 @@ __private_extern__ kern_return_t wait_queue_wakeup64_thread_locked( wait_result_t result, boolean_t unlock); -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ + +__BEGIN_DECLS -#ifdef __APPLE_API_UNSTABLE /******** Semi-Public interfaces (not a part of a higher construct) ************/ +extern unsigned int wait_queue_set_size(void); +extern unsigned int wait_queue_link_size(void); + extern kern_return_t wait_queue_init( wait_queue_t wait_queue, int policy); @@ -225,6 +232,10 @@ extern kern_return_t wait_queue_init( extern wait_queue_set_t wait_queue_set_alloc( int policy); +extern kern_return_t wait_queue_set_init( + wait_queue_set_t set_queue, + int policy); + extern kern_return_t wait_queue_set_free( wait_queue_set_t set_queue); @@ -234,17 +245,16 @@ extern wait_queue_link_t wait_queue_link_alloc( extern kern_return_t wait_queue_link_free( wait_queue_link_t link_element); -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_EVOLVING - -extern wait_queue_t wait_queue_alloc( - int policy); +extern kern_return_t wait_queue_link( + wait_queue_t wait_queue, + wait_queue_set_t set_queue); -extern kern_return_t wait_queue_free( - wait_queue_t wait_queue); +extern kern_return_t wait_queue_link_noalloc( + wait_queue_t wait_queue, + wait_queue_set_t set_queue, + wait_queue_link_t link); -extern kern_return_t wait_queue_link( +extern boolean_t wait_queue_member( wait_queue_t wait_queue, wait_queue_set_t set_queue); @@ -255,14 +265,35 @@ extern kern_return_t wait_queue_unlink( extern kern_return_t wait_queue_unlink_all( wait_queue_t wait_queue); +extern kern_return_t wait_queue_unlinkall_nofree( + wait_queue_t wait_queue); + extern kern_return_t wait_queue_set_unlink_all( wait_queue_set_t set_queue); +/* legacy API */ +kern_return_t wait_queue_sub_init( + wait_queue_set_t set_queue, + int policy); + +kern_return_t wait_queue_sub_clearrefs( + wait_queue_set_t wq_set); + +extern kern_return_t wait_subqueue_unlink_all( + wait_queue_set_t set_queue); + +extern wait_queue_t wait_queue_alloc( + int policy); + +extern kern_return_t wait_queue_free( + wait_queue_t wait_queue); + /* assert intent to wait on pair */ extern wait_result_t wait_queue_assert_wait64( wait_queue_t wait_queue, event64_t wait_event, - wait_interrupt_t interruptible); + wait_interrupt_t interruptible, + uint64_t deadline); /* wakeup the most appropriate thread waiting on pair */ extern kern_return_t wait_queue_wakeup64_one( @@ -283,8 +314,6 @@ extern kern_return_t wait_queue_wakeup64_thread( thread_t thread, wait_result_t result); -#endif /* __APPLE_API_EVOLVING */ - /* * Compatibility Wait Queue APIs based on pointer events instead of 64bit * integer events. @@ -294,7 +323,8 @@ extern kern_return_t wait_queue_wakeup64_thread( extern wait_result_t wait_queue_assert_wait( wait_queue_t wait_queue, event_t wait_event, - wait_interrupt_t interruptible); + wait_interrupt_t interruptible, + uint64_t deadline); /* wakeup the most appropriate thread waiting on pair */ extern kern_return_t wait_queue_wakeup_one( @@ -315,6 +345,8 @@ extern kern_return_t wait_queue_wakeup_thread( thread_t thread, wait_result_t result); -#endif /* __APPLE_API_PRIVATE */ +__END_DECLS + +#endif /* _KERN_WAIT_QUEUE_H_ */ -#endif /* _KERN_WAIT_QUEUE_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/xpr.c b/osfmk/kern/xpr.c index 2a8d85f3f..d6088c4c1 100644 --- a/osfmk/kern/xpr.c +++ b/osfmk/kern/xpr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,180 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:57 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.46.1 1997/09/22 17:41:21 barbou - * MP+RT: protect cpu_number() usage against preemption. - * [97/09/16 barbou] - * - * Revision 1.2.25.5 1996/07/31 09:56:06 paire - * Merged with nmk20b7_shared (1.2.41.1) - * [96/06/10 paire] - * - * Revision 1.2.41.1 1996/04/15 14:35:12 bernadat - * Keep interrupts disabled while accessing XPR_TIMESTAMP. - * [96/04/12 bernadat] - * - * Revision 1.2.25.4 1995/02/24 15:22:42 alanl - * DIPC: Merge from nmk17b2 to nmk18b8. - * Notes: Restore portable locks package, derived from nmk17b2. - * [95/02/07 alanl] - * - * Revision 1.2.28.3 1994/12/09 22:25:16 dwm - * mk6 CR801 - merge up from nmk18b4 to nmk18b7 - * * Rev 1.2.25.2 1994/10/21 18:30:41 joe - * Added ETAP support - * [1994/12/09 21:10:59 dwm] - * - * Revision 1.2.28.2 1994/11/10 06:15:29 dwm - * mk6 CR764 - s/spinlock/simple_lock/ (name change only) - * [1994/11/10 05:58:48 dwm] - * - * Revision 1.2.28.1 1994/11/04 10:10:56 dwm - * mk6 CR668 - 1.3b26 merge - * * Revision 1.2.5.7 1994/05/06 18:54:13 tmt - * Merge in DEC Alpha changes to osc1.3b19. - * include - * 64 bit cleanup. - * * End1.3merge - * [1994/11/04 09:39:17 dwm] - * - * Revision 1.2.25.1 1994/09/23 02:32:39 ezf - * change marker to not FREE - * [1994/09/22 21:38:29 ezf] - * - * Revision 1.2.22.1 1994/06/09 14:14:11 dswartz - * Preemption merge. - * [1994/06/09 14:08:38 dswartz] - * - * Revision 1.2.5.5 1993/08/12 20:16:51 bernard - * Last pass for ANSI prototypes - CR#9523 - * [1993/08/12 15:43:24 bernard] - * - * Revision 1.2.5.4 1993/08/02 17:26:05 rod - * ANSI prototypes: zap explicit include of machine/setjmp.h. CR #9523. - * [1993/08/01 13:36:31 rod] - * - * Revision 1.2.5.3 1993/07/27 18:09:05 rod - * Add ANSI prototypes. CR #9523. - * [1993/07/27 14:33:23 rod] - * - * Revision 1.2.5.2 1993/06/09 02:39:13 gm - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:07:51 jeffc] - * - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:15:13 jeffc] - * - * Revision 1.2 1993/04/19 16:31:21 devrcs - * Added void to fcns that still needed it. - * [93/02/05 bruel] - * - * Revision 1.1 1992/09/30 02:10:39 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.9.5.2 92/03/03 16:20:57 jeffreyh - * Fix Log. - * [92/02/24 13:24:44 jeffreyh] - * - * Revision 2.9.5.1 92/02/18 19:13:03 jeffreyh - * Added an xpr_search function to which you can give - * a selection function. - * [92/02/11 08:13:23 bernadat] - * - * Revision 2.9.4.1 92/02/13 18:53:47 jeffreyh - * Added an xpr_search function to which you can give - * a selection function. - * [92/02/11 08:13:23 bernadat] - * - * Revision 2.9.3.1 92/02/11 17:19:59 jeffreyh - * Added an xpr_search function to which you can give - * a selection function. - * [92/02/11 08:13:23 bernadat] - * - * Revision 2.9.2.1 92/02/11 08:13:23 bernadat - * Added an xpr_search function to which you can give - * a selection function. - * - * - * Revision 2.9 91/10/09 16:11:50 af - * Removed xpr_save. Modified xpr_dump to make it useful - * for dumping xpr buffers in user space tasks. - * [91/09/20 rpd] - * - * Turned on xprenable by default. xprbootstrap now preserves - * the original contents of the buffer if xprenable is off. - * [91/09/18 rpd] - * - * Revision 2.8 91/08/28 11:14:56 jsb - * Fixed xprbootstrap to zero the allocate memory. - * [91/08/18 rpd] - * - * Revision 2.7 91/05/18 14:34:37 rpd - * Added xprenable and other minor changes so that the xpr buffer - * may be examined after a spontaneous reboot. - * [91/05/03 rpd] - * Fixed the initialization check in xpr. - * Fixed xpr_dump. - * [91/04/02 rpd] - * - * Revision 2.6 91/05/14 16:50:09 mrt - * Correcting copyright - * - * Revision 2.5 91/03/16 14:53:24 rpd - * Updated for new kmem_alloc interface. - * [91/03/03 rpd] - * - * Revision 2.4 91/02/05 17:31:13 mrt - * Changed to new Mach copyright - * [91/02/01 16:21:17 mrt] - * - * Revision 2.3 90/09/09 14:33:04 rpd - * Use decl_simple_lock_data. - * [90/08/30 rpd] - * - * Revision 2.2 89/11/29 14:09:21 af - * Added xpr_dump() to print on console the content of the buffer, - * only valid for KDB usage. - * [89/11/12 af] - * - * MACH_KERNEL: include sys/cpu_number.h instead of machine/cpu.h. - * Clean up comments. - * [88/12/19 dbg] - * - * Revision 2.1 89/08/03 15:49:11 rwd - * Created. - * - * Revision 2.2 88/12/19 02:48:30 mwyoung - * Fix include file references. - * [88/11/22 02:17:01 mwyoung] - * - * Separate initialization into two phases. - * [88/11/22 01:13:11 mwyoung] - * - * 6-Jan-88 Michael Young (mwyoung) at Carnegie-Mellon University - * Eliminate use of arg6 in order to allow a more shapely event structure. - * - * 30-Dec-87 David Golub (dbg) at Carnegie-Mellon University - * Delinted. - * - * 7-Dec-87 Richard Sanzi (sanzi) at Carnegie-Mellon University - * Added xpr_save() routine. - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -226,8 +52,6 @@ * xpr silent tracing circular buffer. */ -#include - #include #include #include @@ -257,12 +81,12 @@ struct xprbuf *xprlast; /* Pointer to end of circular buffer */ void xpr( - char *msg, - long arg1, - long arg2, - long arg3, - long arg4, - long arg5) + const char *msg, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5) { spl_t s; register struct xprbuf *x; @@ -302,7 +126,7 @@ xprbootstrap(void) vm_size_t size; kern_return_t kr; - simple_lock_init(&xprlock, ETAP_MISC_XPR); + simple_lock_init(&xprlock, 0); if (nxprbufs == 0) return; /* assume XPR support not desired */ diff --git a/osfmk/kern/xpr.h b/osfmk/kern/xpr.h index 9c528830a..39430bf29 100644 --- a/osfmk/kern/xpr.h +++ b/osfmk/kern/xpr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,102 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:57 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.12.5 1995/02/24 15:22:46 alanl - * Add XPR definition to trace generic XMM activities. - * [95/01/31 alanl] - * - * Revision 1.1.14.3 1994/11/02 18:37:35 dwm - * mk6 CR668 - 1.3b26 merge - * Add MOR token, update XPR names for locks, vm_maps. - * now only a single XPR(...) macro, 5 args always. - * [1994/11/02 18:17:33 dwm] - * - * Revision 1.1.12.3 1994/09/23 02:32:50 ezf - * change marker to not FREE - * [1994/09/22 21:38:33 ezf] - * - * Revision 1.1.12.2 1994/09/10 21:46:57 bolinger - * Merge up to NMK17.3 - * [1994/09/08 19:57:50 bolinger] - * - * Revision 1.1.12.1 1994/06/14 17:13:10 bolinger - * Merge up to NMK17.2. - * [1994/06/14 16:55:44 bolinger] - * - * Revision 1.1.7.2 1994/05/30 07:37:07 bernadat - * Added missing ')' to XPR5. - * [94/05/25 bernadat] - * - * Revision 1.1.7.1 1994/03/24 15:29:18 paire - * Set up correct XPR and XPR[1-5] macros. - * Added XPR_SIMPLE_LOCK define. - * [94/03/08 paire] - * - * Revision 1.1.2.5 1993/08/03 18:29:24 gm - * CR9596: Change KERNEL to MACH_KERNEL. - * [1993/08/02 17:41:44 gm] - * - * Revision 1.1.2.4 1993/07/27 18:09:08 rod - * Add ANSI prototypes. CR #9523. - * [1993/07/27 10:42:04 rod] - * - * Revision 1.1.2.3 1993/06/07 22:15:39 jeffc - * CR9176 - ANSI C violations: trailing tokens on CPP - * directives, extra semicolons after decl_ ..., asm keywords - * [1993/06/07 19:07:55 jeffc] - * - * Revision 1.1.2.2 1993/06/02 23:42:14 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:15:17 jeffc] - * - * Revision 1.1 1992/09/30 02:30:28 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.3 91/05/14 16:50:21 mrt - * Correcting copyright - * - * Revision 2.2 91/02/05 17:31:18 mrt - * MACH_KERNEL: removed conditionals. - * [88/12/19 dbg] - * - * Revision 2.1 89/08/03 15:57:39 rwd - * Created. - * - * Revision 2.5 88/12/19 02:51:59 mwyoung - * Added VM system tags. - * [88/11/22 mwyoung] - * - * Revision 2.4 88/08/24 02:55:54 mwyoung - * Adjusted include file references. - * [88/08/17 02:29:56 mwyoung] - * - * - * 9-Apr-88 Daniel Julin (dpj) at Carnegie-Mellon University - * Added flags for TCP and MACH_NP debugging. - * - * 6-Jan-88 Michael Young (mwyoung) at Carnegie-Mellon University - * Make the event structure smaller to make it easier to read from - * kernel debuggers. - * - * 16-Mar-87 Mike Accetta (mja) at Carnegie-Mellon University - * MACH: made XPR_DEBUG definition conditional on MACH - * since the routines invoked under it won't link without MACH. - * [ V5.1(F7) ] - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -216,10 +120,10 @@ extern int xprflags; #endif /* XPR_DEBUG */ struct xprbuf { - char *msg; - long arg1,arg2,arg3,arg4,arg5; - int timestamp; - int cpuinfo; + const char *msg; + long arg1,arg2,arg3,arg4,arg5; + int timestamp; + int cpuinfo; }; /* Bootstrap XPR facility */ @@ -230,11 +134,11 @@ extern void xprinit(void); /* Log an XPR message */ extern void xpr( - char *msg, - long arg1, - long arg2, - long arg3, - long arg4, - long arg5); + const char *msg, + long arg1, + long arg2, + long arg3, + long arg4, + long arg5); #endif /* _KERN_XPR_H_ */ diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index 6f6966c3b..bb68d2236 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,8 +59,17 @@ #include #include #include -#include + +#include +#include +#include +#include +#include +#include + +#include #include +#include #include #include #include @@ -68,10 +77,20 @@ #include #include #include -#include +#include + +#include +#include #include +#include + #include +#if defined(__ppc__) +/* for fake zone stat routines */ +#include +#include +#endif #if MACH_ASSERT /* Detect use of zone elt after freeing it by two methods: @@ -83,12 +102,12 @@ #if defined(__alpha) #define is_kernel_data_addr(a) \ - (!(a) || IS_SYS_VA(a) && !((a) & (sizeof(long)-1))) + (!(a) || (IS_SYS_VA(a) && !((a) & (sizeof(long)-1)))) #else /* !defined(__alpha) */ #define is_kernel_data_addr(a) \ - (!(a) || (a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3)) + (!(a) || ((a) >= VM_MIN_KERNEL_ADDRESS && !((a) & 0x3))) #endif /* defined(__alpha) */ @@ -101,7 +120,7 @@ boolean_t zfree_clear = FALSE; #define ADD_TO_ZONE(zone, element) \ MACRO_BEGIN \ if (zfree_clear) \ - { int i; \ + { unsigned int i; \ for (i=1; \ i < zone->elem_size/sizeof(vm_offset_t) - 1; \ i++) \ @@ -211,26 +230,26 @@ vm_size_t zdata_size; #define lock_zone(zone) \ MACRO_BEGIN \ - simple_lock(&(zone)->lock); \ + mutex_lock(&(zone)->lock); \ MACRO_END #define unlock_zone(zone) \ MACRO_BEGIN \ - simple_unlock(&(zone)->lock); \ + mutex_unlock(&(zone)->lock); \ MACRO_END #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) #define zone_sleep(zone) \ - thread_sleep_simple_lock((event_t)(zone), \ + thread_sleep_mutex((event_t)(zone), \ &(zone)->lock, \ THREAD_UNINT) #define lock_zone_init(zone) \ MACRO_BEGIN \ - simple_lock_init(&zone->lock, ETAP_MISC_ZONE); \ + mutex_init(&zone->lock, 0); \ MACRO_END -#define lock_try_zone(zone) simple_lock_try(&zone->lock) +#define lock_try_zone(zone) mutex_try(&zone->lock) kern_return_t zget_space( vm_offset_t size, @@ -247,7 +266,7 @@ vm_size_t zalloc_wasted_space; struct zone_page_table_entry * zone_page_table; vm_offset_t zone_map_min_address; vm_offset_t zone_map_max_address; -integer_t zone_pages; +unsigned int zone_pages; /* * Exclude more than one concurrent garbage collection @@ -269,7 +288,7 @@ decl_mutex_data(, zone_gc_lock) decl_simple_lock_data(, all_zones_lock) zone_t first_zone; zone_t *last_zone; -int num_zones; +unsigned int num_zones; boolean_t zone_gc_allowed = TRUE; boolean_t zone_gc_forced = FALSE; @@ -287,7 +306,7 @@ zinit( vm_size_t size, /* the size of an element */ vm_size_t max, /* maximum memory to use */ vm_size_t alloc, /* allocation size */ - char *name) /* a name for the zone */ + const char *name) /* a name for the zone */ { zone_t z; @@ -309,18 +328,29 @@ zinit( ((size-1) % sizeof(z->free_elements)); if (alloc == 0) alloc = PAGE_SIZE; - alloc = round_page_32(alloc); - max = round_page_32(max); + alloc = round_page(alloc); + max = round_page(max); /* - * We look for an allocation size with least fragmentation - * in the range of 1 - 5 pages. This size will be used unless + * we look for an allocation size with less than 1% waste + * up to 5 pages in size... + * otherwise, we look for an allocation size with least fragmentation + * in the range of 1 - 5 pages + * This size will be used unless * the user suggestion is larger AND has less fragmentation */ { vm_size_t best, waste; unsigned int i; best = PAGE_SIZE; waste = best % size; - for (i = 2; i <= 5; i++){ vm_size_t tsize, twaste; - tsize = i * PAGE_SIZE; + + for (i = 1; i <= 5; i++) { + vm_size_t tsize, twaste; + + tsize = i * PAGE_SIZE; + + if ((tsize % size) < (tsize / 100)) { + alloc = tsize; + goto use_this_allocation; + } twaste = tsize % size; if (twaste < waste) best = tsize, waste = twaste; @@ -328,6 +358,7 @@ zinit( if (alloc <= best || (alloc % size >= waste)) alloc = best; } +use_this_allocation: if (max && (max < alloc)) max = alloc; @@ -374,10 +405,11 @@ zinit( void zcram( register zone_t zone, - vm_offset_t newmem, + void *newaddr, vm_size_t size) { register vm_size_t elem_size; + vm_offset_t newmem = (vm_offset_t) newaddr; /* Basic sanity checks */ assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); @@ -410,7 +442,7 @@ zget_space( vm_offset_t *result) { vm_offset_t new_space = 0; - vm_size_t space_to_add; + vm_size_t space_to_add = 0; simple_lock(&zget_space_lock); while ((zalloc_next_space + size) > zalloc_end_of_space) { @@ -418,7 +450,7 @@ zget_space( * Add at least one page to allocation area. */ - space_to_add = round_page_32(size); + space_to_add = round_page(size); if (new_space == 0) { kern_return_t retval; @@ -487,8 +519,8 @@ zget_space( void zone_steal_memory(void) { - zdata_size = round_page_32(128*sizeof(struct zone)); - zdata = pmap_steal_memory(zdata_size); + zdata_size = round_page(128*sizeof(struct zone)); + zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0); } @@ -513,13 +545,13 @@ zfill( if (nelem <= 0) return 0; size = nelem * zone->elem_size; - size = round_page_32(size); + size = round_page(size); kr = kmem_alloc_wired(kernel_map, &memory, size); if (kr != KERN_SUCCESS) return 0; zone_change(zone, Z_FOREIGN, TRUE); - zcram(zone, memory, size); + zcram(zone, (void *)memory, size); nalloc = size / zone->elem_size; assert(nalloc >= nelem); @@ -537,13 +569,13 @@ zone_bootstrap(void) vm_size_t zone_zone_size; vm_offset_t zone_zone_space; - simple_lock_init(&all_zones_lock, ETAP_MISC_ZONE_ALL); + simple_lock_init(&all_zones_lock, 0); first_zone = ZONE_NULL; last_zone = &first_zone; num_zones = 0; - simple_lock_init(&zget_space_lock, ETAP_MISC_ZONE_GET); + simple_lock_init(&zget_space_lock, 0); zalloc_next_space = zdata; zalloc_end_of_space = zdata + zdata_size; zalloc_wasted_space = 0; @@ -555,7 +587,7 @@ zone_bootstrap(void) zone_change(zone_zone, Z_COLLECT, FALSE); zone_zone_size = zalloc_end_of_space - zalloc_next_space; zget_space(zone_zone_size, &zone_zone_space); - zcram(zone_zone, zone_zone_space, zone_zone_size); + zcram(zone_zone, (void *)zone_zone_space, zone_zone_size); } void @@ -568,10 +600,11 @@ zone_init( vm_size_t zone_table_size; retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, - FALSE, TRUE, &zone_map); + FALSE, VM_FLAGS_ANYWHERE, &zone_map); + if (retval != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed"); - zone_max = zone_min + round_page_32(max_zonemap_size); + zone_max = zone_min + round_page(max_zonemap_size); /* * Setup garbage collection information: */ @@ -580,11 +613,11 @@ zone_init( if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, zone_table_size) != KERN_SUCCESS) panic("zone_init"); - zone_min = (vm_offset_t)zone_page_table + round_page_32(zone_table_size); + zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); zone_pages = atop_32(zone_max - zone_min); zone_map_min_address = zone_min; zone_map_max_address = zone_max; - mutex_init(&zone_gc_lock, ETAP_NO_TRACE); + mutex_init(&zone_gc_lock, 0); zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); } @@ -592,7 +625,7 @@ zone_init( /* * zalloc returns an element from the specified zone. */ -vm_offset_t +void * zalloc_canblock( register zone_t zone, boolean_t canblock) @@ -601,7 +634,6 @@ zalloc_canblock( kern_return_t retval; assert(zone != ZONE_NULL); - check_simple_locks(); lock_zone(zone); @@ -660,7 +692,7 @@ zalloc_canblock( if (vm_pool_low() || retry == TRUE) alloc_size = - round_page_32(zone->elem_size); + round_page(zone->elem_size); else alloc_size = zone->alloc_size; @@ -670,13 +702,13 @@ zalloc_canblock( if (retval == KERN_SUCCESS) { zone_page_init(space, alloc_size, ZONE_PAGE_USED); - zcram(zone, space, alloc_size); + zcram(zone, (void *)space, alloc_size); break; } else if (retval != KERN_RESOURCE_SHORTAGE) { /* would like to cause a zone_gc() */ if (retry == TRUE) - panic("zalloc"); + panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval); retry = TRUE; } else { break; @@ -720,7 +752,7 @@ zalloc_canblock( if (zone_debug_enabled(zone)) space += ZONE_DEBUG_OFFSET; #endif - return(space); + return((void *)space); } if (retval == KERN_RESOURCE_SHORTAGE) { unlock_zone(zone); @@ -728,7 +760,7 @@ zalloc_canblock( VM_PAGE_WAIT(); lock_zone(zone); } else { - panic("zalloc"); + panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval); } } } @@ -753,18 +785,18 @@ zalloc_canblock( unlock_zone(zone); - return(addr); + return((void *)addr); } -vm_offset_t +void * zalloc( register zone_t zone) { return( zalloc_canblock(zone, TRUE) ); } -vm_offset_t +void * zalloc_noblock( register zone_t zone) { @@ -773,10 +805,10 @@ zalloc_noblock( void zalloc_async( - thread_call_param_t p0, - thread_call_param_t p1) + thread_call_param_t p0, + __unused thread_call_param_t p1) { - vm_offset_t elt; + void *elt; elt = zalloc_canblock((zone_t)p0, TRUE); zfree((zone_t)p0, elt); @@ -793,7 +825,7 @@ zalloc_async( * This form should be used when you can not block (like when * processing an interrupt). */ -vm_offset_t +void * zget( register zone_t zone) { @@ -802,7 +834,7 @@ zget( assert( zone != ZONE_NULL ); if (!lock_try_zone(zone)) - return ((vm_offset_t)0); + return NULL; REMOVE_FROM_ZONE(zone, addr, vm_offset_t); #if ZONE_DEBUG @@ -813,7 +845,7 @@ zget( #endif /* ZONE_DEBUG */ unlock_zone(zone); - return(addr); + return((void *) addr); } /* Keep this FALSE by default. Large memory machine run orders of magnitude @@ -826,8 +858,9 @@ static vm_offset_t zone_last_bogus_elem = 0; void zfree( register zone_t zone, - vm_offset_t elem) + void *addr) { + vm_offset_t elem = (vm_offset_t) addr; #if MACH_ASSERT /* Basic sanity checks */ @@ -842,11 +875,10 @@ zfree( !from_zone_map(elem, zone->elem_size)) { #if MACH_ASSERT panic("zfree: non-allocated memory in collectable zone!"); -#else +#endif zone_last_bogus_zone = zone; zone_last_bogus_elem = elem; return; -#endif } lock_zone(zone); @@ -965,7 +997,7 @@ zprealloc( if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS) panic("zprealloc"); zone_page_init(addr, size, ZONE_PAGE_USED); - zcram(zone, addr, size); + zcram(zone, (void *)addr, size); } } @@ -1177,9 +1209,13 @@ zone_gc(void) /* * Do a quick feasability check before we scan the zone: - * skip unless there is likelihood of getting 1+ pages back. + * skip unless there is likelihood of getting pages back + * (i.e we need a whole allocation block's worth of free + * elements before we can garbage collect) and + * the zone has more than 10 percent of it's elements free */ - if (z->cur_size - z->count * elt_size <= 2 * PAGE_SIZE){ + if (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) || + ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10))) { unlock_zone(z); continue; } @@ -1390,11 +1426,11 @@ consider_zone_gc(void) { /* * By default, don't attempt zone GC more frequently - * than once / 2 seconds. + * than once / 1 minutes. */ if (zone_gc_max_rate == 0) - zone_gc_max_rate = (2 << SCHED_TICK_SHIFT) + 1; + zone_gc_max_rate = (60 << SCHED_TICK_SHIFT) + 1; if (zone_gc_allowed && ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) || @@ -1405,14 +1441,6 @@ consider_zone_gc(void) } } -#include -#include -#include -#include -#include -#include - -#include kern_return_t host_zone_info( @@ -1453,10 +1481,10 @@ host_zone_info( if (max_zones <= *namesCntp) { /* use in-line memory */ - + names_size = *namesCntp * sizeof *names; names = *namesp; } else { - names_size = round_page_32(max_zones * sizeof *names); + names_size = round_page(max_zones * sizeof *names); kr = kmem_alloc_pageable(ipc_kernel_map, &names_addr, names_size); if (kr != KERN_SUCCESS) @@ -1466,10 +1494,10 @@ host_zone_info( if (max_zones <= *infoCntp) { /* use in-line memory */ - + info_size = *infoCntp * sizeof *info; info = *infop; } else { - info_size = round_page_32(max_zones * sizeof *info); + info_size = round_page(max_zones * sizeof *info); kr = kmem_alloc_pageable(ipc_kernel_map, &info_addr, info_size); if (kr != KERN_SUCCESS) { @@ -1543,8 +1571,8 @@ host_zone_info( if (used != names_size) bzero((char *) (names_addr + used), names_size - used); - kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)names_addr, + (vm_map_size_t)names_size, TRUE, ©); assert(kr == KERN_SUCCESS); *namesp = (zone_name_t *) copy; @@ -1560,8 +1588,8 @@ host_zone_info( if (used != info_size) bzero((char *) (info_addr + used), info_size - used); - kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)info_addr, + (vm_map_size_t)info_size, TRUE, ©); assert(kr == KERN_SUCCESS); *infop = (zone_info_t *) copy; @@ -1615,12 +1643,12 @@ db_print_zone( /*ARGSUSED*/ void db_show_one_zone( - db_expr_t addr, - int have_addr, - db_expr_t count, - char * modif) + db_expr_t addr, + int have_addr, + __unused db_expr_t count, + __unused char * modif) { - struct zone *z = (zone_t)addr; + struct zone *z = (zone_t)((char *)0 + addr); if (z == ZONE_NULL || !have_addr){ db_error("No Zone\n"); @@ -1634,10 +1662,10 @@ db_show_one_zone( /*ARGSUSED*/ void db_show_all_zones( - db_expr_t addr, - int have_addr, - db_expr_t count, - char * modif) + __unused db_expr_t addr, + int have_addr, + db_expr_t count, + __unused char * modif) { zone_t z; unsigned total = 0; @@ -1784,32 +1812,34 @@ db_zone_print_free( /* should we care about locks here ? */ #if MACH_KDB -vm_offset_t +void * next_element( zone_t z, - vm_offset_t elt) + void *prev) { + char *elt = (char *)prev; + if (!zone_debug_enabled(z)) return(0); elt -= ZONE_DEBUG_OFFSET; - elt = (vm_offset_t) queue_next((queue_t) elt); + elt = (char *) queue_next((queue_t) elt); if ((queue_t) elt == &z->active_zones) return(0); elt += ZONE_DEBUG_OFFSET; return(elt); } -vm_offset_t +void * first_element( zone_t z) { - vm_offset_t elt; + char *elt; if (!zone_debug_enabled(z)) return(0); if (queue_empty(&z->active_zones)) return(0); - elt = (vm_offset_t) queue_first(&z->active_zones); + elt = (char *)queue_first(&z->active_zones); elt += ZONE_DEBUG_OFFSET; return(elt); } @@ -1825,7 +1855,7 @@ zone_count( zone_t z, int tail) { - vm_offset_t elt; + void *elt; int count = 0; boolean_t print = (tail != 0); diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index 30a8a80d2..f03900066 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,15 +56,14 @@ * */ +#ifdef KERNEL_PRIVATE + #ifndef _KERN_ZALLOC_H_ #define _KERN_ZALLOC_H_ #include #include - -#include - -#ifdef __APPLE_API_PRIVATE +#include #ifdef MACH_KERNEL_PRIVATE @@ -85,11 +84,11 @@ struct zone { int count; /* Number of elements used now */ vm_offset_t free_elements; + decl_mutex_data(,lock) /* generic lock */ vm_size_t cur_size; /* current memory utilization */ vm_size_t max_size; /* how large can this zone grow */ vm_size_t elem_size; /* size of an element */ vm_size_t alloc_size; /* size used for more memory */ - char *zone_name; /* a name for the zone */ unsigned int /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */ /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */ @@ -101,10 +100,10 @@ struct zone { /* boolean_t */ doing_gc :1; /* garbage collect in progress? */ struct zone * next_zone; /* Link for all-zones list */ call_entry_data_t call_async_alloc; /* callout for asynchronous alloc */ + const char *zone_name; /* a name for the zone */ #if ZONE_DEBUG queue_head_t active_zones; /* active elements */ #endif /* ZONE_DEBUG */ - decl_simple_lock_data(,lock) /* generic lock */ }; extern void zone_gc(void); @@ -117,102 +116,111 @@ extern void zone_steal_memory(void); extern void zone_bootstrap(void); /* Init zone module */ -extern void zone_init(vm_size_t); +extern void zone_init( + vm_size_t map_size); -#endif /* MACH_KERNEL_PRIVATE */ +/* Stack use statistics */ +extern void stack_fake_zone_info( + int *count, + vm_size_t *cur_size, + vm_size_t *max_size, + vm_size_t *elem_size, + vm_size_t *alloc_size, + int *collectable, + int *exhaustable); -#endif /* __APPLE_API_PRIVATE */ +#if ZONE_DEBUG -/* Allocate from zone */ -extern vm_offset_t zalloc( - zone_t zone); +#if MACH_KDB -/* Non-blocking version of zalloc */ -extern vm_offset_t zalloc_noblock( - zone_t zone); +extern void * next_element( + zone_t z, + void *elt); -/* Get from zone free list */ -extern vm_offset_t zget( - zone_t zone); +extern void * first_element( + zone_t z); -/* Create zone */ -extern zone_t zinit( - vm_size_t size, /* the size of an element */ - vm_size_t max, /* maximum memory to use */ - vm_size_t alloc, /* allocation size */ - char *name); /* a name for the zone */ +#endif /* MACH_KDB */ + +extern void zone_debug_enable( + zone_t z); + +extern void zone_debug_disable( + zone_t z); + +#endif /* ZONE_DEBUG */ + +#endif /* MACH_KERNEL_PRIVATE */ + +__BEGIN_DECLS + +#ifdef XNU_KERNEL_PRIVATE + +/* Allocate from zone */ +extern void * zalloc( + zone_t zone); /* Free zone element */ extern void zfree( - zone_t zone, - vm_offset_t elem); + zone_t zone, + void *elem); + +/* Create zone */ +extern zone_t zinit( + vm_size_t size, /* the size of an element */ + vm_size_t maxmem, /* maximum memory to use */ + vm_size_t alloc, /* allocation size */ + const char *name); /* a name for the zone */ + + +/* Non-blocking version of zalloc */ +extern void * zalloc_noblock( + zone_t zone); + +/* direct (non-wrappered) interface */ +extern void * zalloc_canblock( + zone_t zone, + boolean_t canblock); + +/* Get from zone free list */ +extern void * zget( + zone_t zone); /* Fill zone with memory */ extern void zcram( - zone_t zone, - vm_offset_t newmem, - vm_size_t size); + zone_t zone, + void *newmem, + vm_size_t size); /* Initially fill zone with specified number of elements */ extern int zfill( - zone_t zone, - int nelem); + zone_t zone, + int nelem); + /* Change zone parameters */ extern void zone_change( - zone_t zone, - unsigned int item, - boolean_t value); - -/* Preallocate space for zone from zone map */ -extern void zprealloc( - zone_t zone, - vm_size_t size); - -/* - * zone_free_count returns a hint as to the current number of free elements - * in the zone. By the time it returns, it may no longer be true (a new - * element might have been added, or an element removed). - * This routine may be used in conjunction with zcram and a lock to regulate - * adding memory to a non-expandable zone. - */ -extern integer_t zone_free_count(zone_t zone); + zone_t zone, + unsigned int item, + boolean_t value); -/* - * Item definitions for zone_change: - */ +/* Item definitions */ #define Z_EXHAUST 1 /* Make zone exhaustible */ #define Z_COLLECT 2 /* Make zone collectable */ #define Z_EXPAND 3 /* Make zone expandable */ -#define Z_FOREIGN 4 /* Allow collectable zone to contain foreign */ - /* (not allocated via zalloc) elements. */ - -#ifdef __APPLE_API_PRIVATE - -#ifdef MACH_KERNEL_PRIVATE - -#if ZONE_DEBUG - -#if MACH_KDB +#define Z_FOREIGN 4 /* Allow collectable zone to contain foreign elements */ -extern vm_offset_t next_element( - zone_t z, - vm_offset_t elt); - -extern vm_offset_t first_element( - zone_t z); - -#endif /* MACH_KDB */ - -extern void zone_debug_enable( - zone_t z); - -extern void zone_debug_disable( - zone_t z); +/* Preallocate space for zone from zone map */ +extern void zprealloc( + zone_t zone, + vm_size_t size); -#endif /* ZONE_DEBUG */ +extern integer_t zone_free_count( + zone_t zone); -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ -#endif /* __APPLE_API_PRIVATE */ +__END_DECLS #endif /* _KERN_ZALLOC_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/libsa/string.h b/osfmk/libsa/string.h index 486492e95..ba3539aa8 100644 --- a/osfmk/libsa/string.h +++ b/osfmk/libsa/string.h @@ -22,40 +22,8 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.4.1 1997/02/21 15:43:21 barbou - * Removed "size_t" definition, include "types.h" instead. - * [1997/02/21 15:36:54 barbou] - * - * Revision 1.1.2.4 1996/10/10 14:13:33 emcmanus - * Added memmove() prototype. - * [1996/10/10 14:11:51 emcmanus] - * - * Revision 1.1.2.3 1996/10/07 07:20:26 paire - * Added strncat() prototype, since it is defined in libsa_mach. - * [96/10/07 paire] - * - * Revision 1.1.2.2 1996/10/04 11:36:07 emcmanus - * Added strspn() prototype, since it is defined in libsa_mach. - * [1996/10/04 11:31:57 emcmanus] - * - * Revision 1.1.2.1 1996/09/17 16:56:15 bruel - * created for standalone mach servers. - * [96/09/17 bruel] - * - * $EndLog$ - */ - -#ifndef _MACH_STRING_H_ -#define _MACH_STRING_H_ 1 +#ifndef _STRING_H_ +#define _STRING_H_ 1 #ifdef MACH_KERNEL_PRIVATE #include @@ -72,6 +40,7 @@ extern "C" { #endif extern void *memcpy(void *, const void *, size_t); +extern int memcmp(const void *, const void *, size_t); extern void *memmove(void *, const void *, size_t); extern void *memset(void *, int, size_t); @@ -82,11 +51,16 @@ extern char *strcat(char *, const char *); extern char *strncat(char *, const char *, size_t); extern int strcmp(const char *, const char *); extern int strncmp(const char *,const char *, size_t); +extern int strcasecmp(const char *s1, const char *s2); +extern int strncasecmp(const char *s1, const char *s2, size_t n); extern char *strchr(const char *s, int c); -extern size_t strspn(const char *, const char *); + +extern int bcmp(const void *, const void *, size_t); +extern void bcopy(const void *, void *, size_t); +extern void bzero(void *, size_t); #ifdef __cplusplus } #endif -#endif /* _MACH_STRING_H_ */ +#endif /* _STRING_H_ */ diff --git a/osfmk/libsa/types.h b/osfmk/libsa/types.h index ec12bec91..0a90a2d6d 100644 --- a/osfmk/libsa/types.h +++ b/osfmk/libsa/types.h @@ -65,7 +65,9 @@ typedef char * caddr_t; /* address of a (signed) char */ typedef int time_t; /* a signed 32 */ typedef unsigned int daddr_t; /* an unsigned 32 */ +#if 0 /* off_t should be 64-bit ! */ typedef unsigned int off_t; /* another unsigned 32 */ +#endif #define major(i) (((i) >> 8) & 0xFF) diff --git a/osfmk/mach-o/loader.h b/osfmk/mach-o/loader.h index 277b2b1e2..2b979ee5b 100644 --- a/osfmk/mach-o/loader.h +++ b/osfmk/mach-o/loader.h @@ -24,6 +24,10 @@ /* * This file describes the format of mach object files. + * + * NOTE: This header is used for manipulationg 32 bit mach object + * withing a 32 bit mach_kernel for the purpose of dealing + * with linking loadable kernel modules. */ /* diff --git a/osfmk/mach-o/mach_header.c b/osfmk/mach-o/mach_header.c index 02e88426b..dc4261337 100644 --- a/osfmk/mach-o/mach_header.c +++ b/osfmk/mach-o/mach_header.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,6 +24,14 @@ * * Functions for accessing mach-o headers. * + * NOTE: This file supports only 32 bit mach headers at the present + * time; it's primary use is by kld, and all externally + * referenced routines at the present time operate against + * the 32 bit mach header _mh_execute_header, which is the + * header for the currently executing kernel. Adding support + * for 64 bit kernels is possible, but is not necessary at the + * present time. + * * HISTORY * 27-MAR-97 Umesh Vaishampayan (umeshv@NeXT.com) * Added getsegdatafromheader(); @@ -36,31 +44,27 @@ #include #include #include +#include // from libsa #ifdef __MACHO__ extern struct mach_header _mh_execute_header; -struct section *getsectbynamefromheader( - struct mach_header *header, - char *seg_name, - char *sect_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - char *seg_name); - /* * return the last address (first avail) + * + * This routine operates against the currently executing kernel only */ #ifdef MACH_BSD __private_extern__ #endif -vm_offset_t getlastaddr(void) +vm_offset_t +getlastaddr(void) { struct segment_command *sgp; vm_offset_t last_addr = 0; struct mach_header *header = &_mh_execute_header; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -77,10 +81,12 @@ vm_offset_t getlastaddr(void) #ifdef XXX_MACH_BSD __private_extern__ #endif +/* + * This routine operates against the currently executing kernel only + */ struct mach_header ** getmachheaders(void) { - extern struct mach_header _mh_execute_header; struct mach_header **tl; if (kmem_alloc(kernel_map, (vm_offset_t *) &tl, 2*sizeof(struct mach_header *)) != KERN_SUCCESS) @@ -96,6 +102,8 @@ getmachheaders(void) * named segment if it exist in the mach header passed to it. Also it returns * the size of the section data indirectly through the pointer size. Otherwise * it returns zero for the pointer and the size. + * + * This routine can operate against any 32 bit mach header. */ #ifdef MACH_BSD __private_extern__ @@ -103,8 +111,8 @@ __private_extern__ void * getsectdatafromheader( struct mach_header *mhp, - char *segname, - char *sectname, + const char *segname, + const char *sectname, int *size) { const struct section *sp; @@ -132,7 +140,7 @@ __private_extern__ void * getsegdatafromheader( struct mach_header *mhp, - char *segname, + const char *segname, int *size) { const struct segment_command *sc; @@ -152,6 +160,8 @@ getsegdatafromheader( * This routine returns the section structure for the named section in the * named segment for the mach_header pointer passed to it if it exist. * Otherwise it returns zero. + * + * This routine can operate against any 32 bit mach header. */ #ifdef MACH_BSD __private_extern__ @@ -159,12 +169,12 @@ __private_extern__ struct section * getsectbynamefromheader( struct mach_header *mhp, - char *segname, - char *sectname) + const char *segname, + const char *sectname) { struct segment_command *sgp; struct section *sp; - long i, j; + unsigned long i, j; sgp = (struct segment_command *) ((char *)mhp + sizeof(struct mach_header)); @@ -192,12 +202,16 @@ getsectbynamefromheader( #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *getsegbynamefromheader( +/* + * This routine can operate against any 32 bit mach header. + */ +struct segment_command * +getsegbynamefromheader( struct mach_header *header, - char *seg_name) + const char *seg_name) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -243,7 +257,9 @@ static struct { 4, // align 0, // reloff 0, // nreloc - 0 // flags + 0, // flags + 0, // reserved1 + 0 // reserved2 } }; @@ -261,7 +277,8 @@ static vm_offset_t getsizeofmacho(struct mach_header *header); #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *firstseg(void) +struct segment_command * +firstseg(void) { return firstsegfromheader(&_mh_execute_header); } @@ -269,10 +286,11 @@ struct segment_command *firstseg(void) #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *firstsegfromheader(struct mach_header *header) +struct segment_command * +firstsegfromheader(struct mach_header *header) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -287,7 +305,14 @@ struct segment_command *firstsegfromheader(struct mach_header *header) #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *nextseg(struct segment_command *sgp) +/* + * This routine operates against a 32 bit mach segment_command structure + * pointer from the currently executing kernel only, to obtain the + * sequentially next segment_command structure in the currently executing + * kernel + */ +struct segment_command * +nextseg(struct segment_command *sgp) { struct segment_command *this; @@ -306,12 +331,18 @@ struct segment_command *nextseg(struct segment_command *sgp) #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *nextsegfromheader( +/* + * This routine operates against any 32 bit mach segment_command structure + * pointer and the provided 32 bit header, to obtain the sequentially next + * segment_command structure in that header. + */ +struct segment_command * +nextsegfromheader( struct mach_header *header, struct segment_command *seg) { struct segment_command *sgp; - int i; + unsigned long i; sgp = (struct segment_command *) ((char *)header + sizeof(struct mach_header)); @@ -336,12 +367,14 @@ struct segment_command *nextsegfromheader( /* - * Return the address of the named Mach-O segment, or NULL. + * Return the address of the named Mach-O segment from the currently + * executing 32 bit kernel, or NULL. */ #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *getsegbyname(char *seg_name) +struct segment_command * +getsegbyname(const char *seg_name) { struct segment_command *this; @@ -359,16 +392,16 @@ struct segment_command *getsegbyname(char *seg_name) /* * This routine returns the a pointer the section structure of the named - * section in the named segment if it exist in the mach executable it is - * linked into. Otherwise it returns zero. + * section in the named segment if it exists in the currently executing + * kernel, which it is presumed to be linked into. Otherwise it returns NULL. */ #ifdef MACH_BSD __private_extern__ #endif struct section * getsectbyname( - char *segname, - char *sectname) + const char *segname, + const char *sectname) { return(getsectbynamefromheader( (struct mach_header *)&_mh_execute_header, segname, sectname)); @@ -377,10 +410,15 @@ getsectbyname( #ifdef MACH_BSD __private_extern__ #endif -struct section *firstsect(struct segment_command *sgp) +/* + * This routine can operate against any 32 bit segment_command structure to + * return the first 32 bit section immediately following that structure. If + * there are no sections associated with the segment_command structure, it + * returns NULL. + */ +struct section * +firstsect(struct segment_command *sgp) { - struct section *sp; - if (!sgp || sgp->nsects == 0) return (struct section *)0; @@ -390,20 +428,33 @@ struct section *firstsect(struct segment_command *sgp) #ifdef MACH_BSD __private_extern__ #endif -struct section *nextsect(struct segment_command *sgp, struct section *sp) +/* + * This routine can operate against any 32 bit segment_command structure and + * 32 bit section to return the next consecutive 32 bit section immediately + * following the 32 bit section provided. If there are no sections following + * the provided section, it returns NULL. + */ +struct section * +nextsect(struct segment_command *sgp, struct section *sp) { struct section *fsp = firstsect(sgp); - if (sp - fsp >= sgp->nsects-1) + if (((unsigned long)(sp - fsp) + 1) >= sgp->nsects) return (struct section *)0; return sp+1; } -static struct fvmfile_command *fvmfilefromheader(struct mach_header *header) +/* + * This routine can operate against any 32 bit mach header to return the + * first occurring 32 bit fvmfile_command section. If one is not present, + * it returns NULL. + */ +static struct fvmfile_command * +fvmfilefromheader(struct mach_header *header) { struct fvmfile_command *fvp; - int i; + unsigned long i; fvp = (struct fvmfile_command *) ((char *)header + sizeof(struct mach_header)); @@ -417,11 +468,14 @@ static struct fvmfile_command *fvmfilefromheader(struct mach_header *header) /* * Create a fake USER seg if a fvmfile_command is present. + * + * This routine operates against the currently executing kernel only */ #ifdef MACH_BSD __private_extern__ #endif -struct segment_command *getfakefvmseg(void) +struct segment_command * +getfakefvmseg(void) { struct segment_command *sgp = getsegbyname("__USER"); struct fvmfile_command *fvp = fvmfilefromheader(&_mh_execute_header); @@ -454,11 +508,13 @@ struct segment_command *getfakefvmseg(void) /* * Figure out the size the size of the data associated with a * loaded mach_header. + * + * This routine operates against the currently executing kernel only */ -static vm_offset_t getsizeofmacho(struct mach_header *header) +static vm_offset_t +getsizeofmacho(struct mach_header *header) { struct segment_command *sgp; - struct section *sp; vm_offset_t last_addr; last_addr = 0; @@ -484,8 +540,7 @@ getsectcmdsymtabfromheader( struct mach_header *mhp) { struct segment_command *sgp; - struct section *sp; - long i; + unsigned long i; sgp = (struct segment_command *) ((char *)mhp + sizeof(struct mach_header)); diff --git a/osfmk/mach-o/mach_header.h b/osfmk/mach-o/mach_header.h index 8cf484869..25f83b459 100644 --- a/osfmk/mach-o/mach_header.h +++ b/osfmk/mach-o/mach_header.h @@ -22,7 +22,14 @@ /* * File: kern/mach_header.h * - * Definitions for accessing mach-o headers. + * Definitions for accessing mach-o headers. This header wraps the + * routines defined in osfmk/mach-o/mach_header.c; this is made clear + * by the existance of the getsectcmdsymtabfromheader() prototype. + * + * NOTE: The functions prototyped by this header only operate againt + * 32 bit mach headers. Many of these functions imply the + * currently running kernel, and cannot be used against mach + * headers other than that of the currently running kernel. * * HISTORY * 29-Jan-92 Mike DeMoney (mike@next.com) @@ -46,17 +53,17 @@ struct segment_command *nextseg(struct segment_command *sgp); struct segment_command *nextsegfromheader( struct mach_header *header, struct segment_command *seg); -struct segment_command *getsegbyname(char *seg_name); +struct segment_command *getsegbyname(const char *seg_name); struct segment_command *getsegbynamefromheader( struct mach_header *header, - char *seg_name); -void *getsegdatafromheader(struct mach_header *, char *, int *); -struct section *getsectbyname(char *seg_name, char *sect_name); + const char *seg_name); +void *getsegdatafromheader(struct mach_header *, const char *, int *); +struct section *getsectbyname(const char *seg_name, const char *sect_name); struct section *getsectbynamefromheader( struct mach_header *header, - char *seg_name, - char *sect_name); -void *getsectdatafromheader(struct mach_header *, char *, char *, int *); + const char *seg_name, + const char *sect_name); +void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); struct section *firstsect(struct segment_command *sgp); struct section *nextsect(struct segment_command *sgp, struct section *sp); struct fvmlib_command *fvmlib(void); diff --git a/osfmk/mach/AT386/machdep.mk b/osfmk/mach/AT386/machdep.mk deleted file mode 100644 index 59c45cdbd..000000000 --- a/osfmk/mach/AT386/machdep.mk +++ /dev/null @@ -1,35 +0,0 @@ -# -# @OSF_COPYRIGHT@ -# -# -# HISTORY -# -# Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez -# Import of Mac OS X kernel (~semeria) -# -# Revision 1.1.1.1 1998/03/07 02:25:46 wsanchez -# Import of OSF Mach kernel (~mburg) -# -# Revision 1.1.6.1 1994/09/23 02:33:31 ezf -# change marker to not FREE -# [1994/09/22 21:38:48 ezf] -# -# Revision 1.1.2.2 1993/08/04 19:32:26 gm -# CR9605: Add SUBDIRS to mach_kernel build process. -# [1993/08/03 13:30:04 gm] -# -# $EndLog$ - -T_M_FILES = ${MACH_I386_FILES} - -MACH_I386_FILES = mach_i386_server.c mach_i386_server.h - -.ORDER: ${MACH_I386_FILES} - -${MACH_I386_FILES}: mach/i386/mach_i386.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_i386_server.h \ - -server mach_i386_server.c \ - ${mach/i386/mach_i386.defs:P} diff --git a/osfmk/mach/Makefile b/osfmk/mach/Makefile index 95f956adb..89a374c11 100644 --- a/osfmk/mach/Makefile +++ b/osfmk/mach/Makefile @@ -43,26 +43,28 @@ MIG_DEFS = \ lock_set.defs \ mach_host.defs \ mach_port.defs \ + mach_vm.defs \ notify.defs \ processor.defs \ processor_set.defs \ task.defs \ thread_act.defs \ - vm_map.defs \ - upl.defs + vm_map.defs MACH_PRIVATE_DEFS = \ mach_notify.defs \ memory_object.defs \ memory_object_control.defs \ memory_object_default.defs \ - memory_object_name.defs + memory_object_name.defs \ + upl.defs # # MIG-generated headers that are traditionally used by user # level code. # MIG_USHDRS = \ + audit_triggers_server.h \ clock_reply_server.h \ exc_server.h \ memory_object_server.h \ @@ -70,7 +72,6 @@ MIG_USHDRS = \ notify_server.h MIG_UUHDRS = \ - audit_triggers.h \ clock.h \ clock_priv.h \ host_priv.h \ @@ -79,6 +80,7 @@ MIG_UUHDRS = \ lock_set.h \ mach_host.h \ mach_port.h \ + mach_vm.h \ memory_object_control.h \ memory_object_name.h \ processor.h \ @@ -92,8 +94,6 @@ MIGINCLUDES = ${MIG_UUHDRS} ${MIG_USHDRS} DATAFILES = \ boolean.h \ - boot_info.h \ - bootstrap.h \ clock_types.h \ error.h \ exception.h \ @@ -140,6 +140,7 @@ DATAFILES = \ vm_inherit.h \ vm_param.h \ vm_prot.h \ + vm_purgable.h \ vm_region.h \ vm_statistics.h \ vm_sync.h \ @@ -148,15 +149,23 @@ DATAFILES = \ ${MIG_DEFS} INSTALL_MI_LIST = \ + bootstrap.h \ ${DATAFILES} +INSTALL_KF_MI_LIST = \ + mach_interface.h \ + $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES}) + +INSTALL_KF_MI_LCL_LIST = \ + mach_interface.h \ + $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES}) + INSTALL_MI_GEN_LIST = INSTALL_MI_DIR = mach EXPORT_MI_LIST = \ mach_interface.h \ - etap.h etap_events.h \ ${DATAFILES} EXPORT_MI_GEN_LIST = \ @@ -204,6 +213,7 @@ MIGKUFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_USER=1 -maxonstack 1024 # level code. # MIG_KUHDRS = \ + audit_triggers.h \ clock_reply.h \ exc.h \ host_notify_reply.h \ @@ -237,6 +247,7 @@ MIG_KSHDRS = \ mach_host_server.h \ mach_notify_server.h \ mach_port_server.h \ + mach_vm_server.h \ memory_object_server.h \ memory_object_control_server.h \ memory_object_default_server.h \ @@ -260,6 +271,7 @@ MIG_KSSRC = \ mach_host_server.c \ mach_notify_server.c \ mach_port_server.c \ + mach_vm_server.c \ memory_object_server.c \ memory_object_control_server.c \ memory_object_default_server.c \ diff --git a/osfmk/mach/boolean.h b/osfmk/mach/boolean.h index 4b41b4462..83b25e31d 100644 --- a/osfmk/mach/boolean.h +++ b/osfmk/mach/boolean.h @@ -22,66 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 02:34:07 ezf - * change marker to not FREE - * [1994/09/22 21:39:00 ezf] - * - * Revision 1.2.2.3 1993/08/03 18:22:11 gm - * CR9598: Remove unneeded EXPORT_BOOLEAN and KERNEL ifdefs. Move - * the code inside the include protection and remove the boolean_t - * casts from TRUE and FALSE. - * [1993/08/02 17:49:29 gm] - * - * Revision 1.2.2.2 1993/06/09 02:39:27 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:15:31 jeffc] - * - * Revision 1.2 1993/04/19 16:31:43 devrcs - * ansi C conformance changes - * [1993/02/02 18:52:46 david] - * - * Revision 1.1 1992/09/30 02:30:33 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.3 91/05/14 16:51:06 mrt - * Correcting copyright - * - * Revision 2.2 91/02/05 17:31:38 mrt - * Changed to new Mach copyright - * [91/02/01 17:16:36 mrt] - * - * Revision 2.1 89/08/03 15:59:35 rwd - * Created. - * - * Revision 2.4 89/02/25 18:12:08 gm0w - * Changes for cleanup. - * - * Revision 2.3 89/02/07 00:51:34 mwyoung - * Relocated from sys/boolean.h - * - * Revision 2.2 88/08/24 02:23:06 mwyoung - * Adjusted include file references. - * [88/08/17 02:09:46 mwyoung] - * - * - * 18-Nov-87 Avadis Tevanian (avie) at Carnegie-Mellon University - * Header file fixup, purge history. - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -116,8 +56,8 @@ * */ -#ifndef BOOLEAN_H_ -#define BOOLEAN_H_ +#ifndef _MACH_BOOLEAN_H_ +#define _MACH_BOOLEAN_H_ /* * Pick up "boolean_t" type definition @@ -128,12 +68,9 @@ #endif /* ASSEMBLER */ /* - * Define TRUE and FALSE, only if they haven't been before, - * and not if they're explicitly refused. + * Define TRUE and FALSE if not defined. */ -#ifndef NOBOOL - #ifndef TRUE #define TRUE 1 #endif /* TRUE */ @@ -142,6 +79,4 @@ #define FALSE 0 #endif /* FALSE */ -#endif /* !defined(NOBOOL) */ - -#endif /* BOOLEAN_H_ */ +#endif /* _MACH_BOOLEAN_H_ */ diff --git a/osfmk/mach/boot_info.h b/osfmk/mach/boot_info.h deleted file mode 100644 index 9c9555187..000000000 --- a/osfmk/mach/boot_info.h +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.16.4 1996/01/09 19:21:29 devrcs - * Added bootstrap map for alpha. - * This probably should be moved to some MD include file. - * It's not really machine dependent, just a different - * way of doing things. - * [1995/12/01 19:49:04 jfraser] - * - * Merged '64-bit safe' changes from DEC alpha port. - * [1995/11/21 18:08:36 jfraser] - * - * Revision 1.2.16.3 1995/01/06 19:50:04 devrcs - * mk6 CR668 - 1.3b26 merge - * Added region_desc structure; - * [1994/10/14 03:42:28 dwm] - * - * Revision 1.2.16.2 1994/09/23 02:34:18 ezf - * change marker to not FREE - * [1994/09/22 21:39:03 ezf] - * - * Revision 1.2.16.1 1994/06/13 20:49:19 dlb - * Merge MK6 and NMK17 - * [1994/06/13 20:24:22 dlb] - * - * Revision 1.2.14.1 1994/02/08 11:01:06 bernadat - * Checked in NMK16_1 changes - * [94/02/04 bernadat] - * - * Revision 1.2.12.1 1993/12/23 08:56:06 bernadat - * Added COFF_F. - * [93/11/29 bernadat] - * - * Revision 1.2.3.2 1993/06/24 16:14:07 gm - * CR9371: Moved here from default_pager. - * [1993/06/24 16:08:52 gm] - * - * Revision 1.2.3.2 1993/06/09 02:10:53 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 20:41:54 gm] - * - * Revision 1.2 1993/04/19 16:31:50 devrcs - * Added ROSE support: we need several symbol table entries since ROSE - * can have up to 3 symbol section. - * [93/03/24 bruel] - * - * Use free copyright - * [1993/03/03 12:12:37 bernadat] - * - * Fixed History Revision Comments - * [93/02/24 bernadat] - * - * Created for external default pager. - * [1993/02/09 15:40:42 bruel] - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.2 92/01/03 20:19:42 dbg - * Created. - * [91/09/06 dbg] - * - */ -/* CMU_ENDHIST */ -/* - */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#ifndef _MACH_BOOT_INFO_H_ -#define _MACH_BOOT_INFO_H_ - -#include -#include -#include - -/* - * The boot loader uses several instances of the following structure to - * provide a description of the bootstrap task virtual address space, - * consisting of memory regions that need to be mapped from physical pages - * filled by the boot loader, or regions that need to be allocated in the - * new address space. - */ -struct region_desc { - vm_offset_t addr; /* virtual address */ - vm_offset_t offset; /* offset within object */ - vm_size_t size; /* size */ - vm_prot_t prot; /* protection */ - boolean_t mapped; /* mapped or zero-filled */ -}; - -#ifdef __alpha -typedef long physaddr_t; -#define NBOOT_REGIONS 5 - -#define TEXT 0 -#define DATA 1 -#define BSS 2 -#define STACK 3 -#define SYMS 4 - -struct bootstrap_map { - physaddr_t boot_location; - long boot_size; - long boot_entry; - long boot_gp_value; - long boot_region_count; - struct region_desc boot_regions[NBOOT_REGIONS]; -}; - -#endif /* __alpha */ - -#define BOOT_INFO_COMPAT 1 -#if BOOT_INFO_COMPAT -/* - * Old compat code for makeboot produced images - */ - -/* - * Structure of Mach kernel boot file. - */ -#include - -/* - * A Mach kernel boot file consists of the Mach - * kernel image and the bootstrap image, glued - * together. - * - * The first part of the file is a normal executable - * (bootable) file: a.out, coff, or whatever. The - * text and data sizes are set to include the entire - * file. (Some machines do not allow a zero-length - * data segment). - * - * The rest of the file sits where the kernel BSS - * should be. A boot_info record describes the - * sizes of the next 3 sections. Following this - * are the kernel symbol table, the bootstrap image - * (including its symbol table), and the loader - * information for the bootstrap image. Each - * of these sections is padded to an integer (4-byte) - * boundary. - * - * When the file is loaded into memory, the kernel - * text and data segments are at their normal locations. - * - * The boot_info structure appears at the start of - * the bss (at 'edata[]'): - */ - -struct boot_info { - vm_size_t sym_size; /* size of kernel symbols */ - vm_size_t boot_size; /* size of bootstrap image */ - vm_size_t load_info_size; /* size of loader information - for bootstrap image */ -}; - -/* - * The 3 sections must be moved out of BSS for the kernel to run: - * - * The kernel symbol table follows the BSS (at 'end[]'). - * - * The bootstrap image is on the first page boundary (machine page - * size) following the kernel symbol table. - * - * The loader information immediately follows the bootstrap image. - */ - -/* - * Loader information for bootstrap image: - */ - -#define AOUT_F 1 -#define ROSE_F 2 -#define COFF_F 3 - -struct loader_info { - int format; /* symbol table format (A.OUT or ROSE) */ - vm_offset_t text_start; /* text start in memory */ - vm_size_t text_size; /* text size */ - vm_offset_t text_offset; /* text offset in file */ - vm_offset_t data_start; /* data+bss start in memory */ - vm_size_t data_size; /* data size */ - vm_offset_t data_offset; /* data offset in file */ - vm_size_t bss_size; /* BSS size */ - vm_offset_t str_offset; /* strings table offset in file */ - vm_size_t str_size; /* strings table size */ - vm_offset_t sym_offset[4]; /* symbol table offset in file */ - vm_size_t sym_size[4]; /* symbol table size */ - vm_offset_t entry_1; /* 2 words for entry address */ - vm_offset_t entry_2; -} ; - -#define EX_NOT_EXECUTABLE 6000 - -#endif /* BOOT_INFO_COMPAT */ - -#endif /* _MACH_BOOT_INFO_H_ */ diff --git a/osfmk/mach/clock_types.defs b/osfmk/mach/clock_types.defs index 840926dc8..0b4c68257 100644 --- a/osfmk/mach/clock_types.defs +++ b/osfmk/mach/clock_types.defs @@ -34,7 +34,7 @@ #include type clock_serv_t = mach_port_t - ctype: clock_serv_t + cusertype: clock_serv_t #if KERNEL_SERVER intran: clock_serv_t convert_port_to_clock(mach_port_t) outtran: mach_port_t convert_clock_to_port(clock_serv_t) @@ -42,7 +42,7 @@ type clock_serv_t = mach_port_t ; type clock_ctrl_t = mach_port_t - ctype: clock_ctrl_t + cusertype: clock_ctrl_t #if KERNEL_SERVER intran: clock_ctrl_t convert_port_to_clock_ctrl(mach_port_t) outtran: mach_port_t convert_clock_ctrl_to_port(clock_ctrl_t) diff --git a/osfmk/mach/clock_types.h b/osfmk/mach/clock_types.h index 93687e944..5fd3f1243 100644 --- a/osfmk/mach/clock_types.h +++ b/osfmk/mach/clock_types.h @@ -30,15 +30,14 @@ */ /* - * N.B. This interface has been deprecated and the contents - * of this file should be considered obsolete. + * All interfaces defined here are obsolete. */ #ifndef _MACH_CLOCK_TYPES_H_ #define _MACH_CLOCK_TYPES_H_ +#include #include -#include /* * Type definitions. @@ -59,19 +58,13 @@ struct mach_timespec { }; typedef struct mach_timespec mach_timespec_t; -#ifdef __APPLE_API_UNSTABLE - /* * Reserved clock id values for default clocks. */ -#define SYSTEM_CLOCK 0 /* advances monotonically and - * uniformly; set to zero at boot */ -#define CALENDAR_CLOCK 1 /* 'wall' clock; effectively - * synchronized to UTC */ +#define SYSTEM_CLOCK 0 +#define CALENDAR_CLOCK 1 -#define REALTIME_CLOCK 0 /* obsolete; use SYSTEM or CALENDAR - * clock depending on particular - * requirements */ +#define REALTIME_CLOCK 0 /* * Attribute names. @@ -124,6 +117,4 @@ typedef struct mach_timespec mach_timespec_t; #define BAD_ALRMTYPE(t) (((t) &~ TIME_RELATIVE) != 0) -#endif /* __APPLE_API_UNSTABLE */ - #endif /* _MACH_CLOCK_TYPES_H_ */ diff --git a/osfmk/mach/error.h b/osfmk/mach/error.h index 2f6d6c4cb..6335bdfac 100644 --- a/osfmk/mach/error.h +++ b/osfmk/mach/error.h @@ -22,91 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:29 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.10.2 1995/02/23 17:51:15 alanl - * Merge with DIPC2_SHARED. - * [1995/01/03 21:49:04 alanl] - * - * Revision 1.2.10.1 1994/09/23 02:35:28 ezf - * change marker to not FREE - * [1994/09/22 21:39:26 ezf] - * - * Revision 1.2.8.1 1994/08/04 02:27:36 mmp - * NOTE: file was moved back to b11 version for dipc2_shared. - * Added DIPC error system. - * [1994/05/11 17:36:37 alanl] - * - * Revision 1.2.2.3 1993/08/12 21:59:50 jvs - * Correctly prototype mach_error_fn_t typedef. 9523 - * [1993/08/12 21:57:56 jvs] - * - * Revision 1.2.2.2 1993/06/09 02:39:58 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:15:47 jeffc] - * - * Revision 1.2 1993/04/19 16:33:02 devrcs - * make endif tags ansi compliant/include files - * [1993/02/20 21:44:37 david] - * - * Revision 1.1 1992/09/30 02:30:35 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:51:24 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:31:48 mrt - * Changed to new Mach copyright - * [91/02/01 17:16:50 mrt] - * - * Revision 2.2 90/06/02 14:57:47 rpd - * Added err_mach_ipc for new IPC. - * [90/03/26 22:28:42 rpd] - * - * Revision 2.1 89/08/03 16:02:07 rwd - * Created. - * - * Revision 2.4 89/02/25 18:13:18 gm0w - * Changes for cleanup. - * - * Revision 2.3 89/02/07 00:51:57 mwyoung - * Relocated from sys/error.h - * - * Revision 2.2 88/10/18 00:37:31 mwyoung - * Added {system,sub and code}_emask - * [88/10/17 17:06:58 mrt] - * - * Added {system,sub and code}_emask - * - * 12-May-88 Mary Thompson (mrt) at Carnegie Mellon - * Changed mach_error_t from unsigned int to kern_return_t - * which is a 32 bit integer regardless of machine type. - * insigned int was incompatible with old usages of mach_error. - * - * 10-May-88 Douglas Orr (dorr) at Carnegie-Mellon University - * Missing endif replaced - * - * 5-May-88 Mary Thompson (mrt) at Carnegie Mellon - * Changed typedef of mach_error_t from long to unsigned int - * to keep our Camelot users happy. Also moved the nonkernel - * function declarations from here to mach_error.h. - * - * 10-Feb-88 Douglas Orr (dorr) at Carnegie-Mellon University - * Created. - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University @@ -141,8 +56,9 @@ * */ -#ifndef ERROR_H_ -#define ERROR_H_ +#ifndef _MACH_ERROR_H_ +#define _MACH_ERROR_H_ + #include /* @@ -189,4 +105,4 @@ typedef kern_return_t mach_error_t; typedef mach_error_t (* mach_error_fn_t)( void ); -#endif /* ERROR_H_ */ +#endif /* _MACH_ERROR_H_ */ diff --git a/osfmk/mach/etap.h b/osfmk/mach/etap.h deleted file mode 100644 index a83d30cff..000000000 --- a/osfmk/mach/etap.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * File : etap.h - * - * Contains ETAP buffer and table definitions - * - */ - -#ifndef _MACH_ETAP_H_ -#define _MACH_ETAP_H_ - -#include -#include -#include -#include -#include - - -#define ETAP_CBUFF_ENTRIES 20000 -#define ETAP_CBUFF_IBUCKETS 10 -#define ETAP_CBUFF_WIDTH 80 - -#define ETAP_MBUFF_ENTRIES 28000 -#define ETAP_MBUFF_DATASIZE 4 - - -/* =================================== - * Event & Subsystem Table Definitions - * =================================== - */ - -#define EVENT_NAME_LENGTH 20 /* max event name size */ - -struct event_table_entry { - unsigned short event; /* etap event type */ - unsigned short status; /* event trace status */ - char name [EVENT_NAME_LENGTH]; /* event text name */ - unsigned short dynamic; /* dynamic ID (0=none) */ -}; - -struct subs_table_entry { - unsigned short subs; /* etap subsystem type */ - char name [EVENT_NAME_LENGTH]; /* subsystem text name */ -}; - -typedef struct event_table_entry* event_table_t; -typedef struct subs_table_entry* subs_table_t; -typedef unsigned short etap_event_t; - -#define EVENT_TABLE_NULL ((event_table_t) 0) - -/* ========= - * ETAP Time - * ========= - */ - -typedef mach_timespec_t etap_time_t; - -/* ============================= - * Cumulative buffer definitions - * ============================= - */ - -/* - * The cbuff_data structure contains cumulative lock - * statistical information for EITHER hold operations - * OR wait operations. - */ - -struct cbuff_data { - unsigned long triggered; /* number of event occurances */ - etap_time_t time; /* sum of event durations */ - etap_time_t time_sq; /* sum of squared durations */ - etap_time_t min_time; /* min duration of event */ - etap_time_t max_time; /* max duration of event */ -}; - -/* - * The cbuff_entry contains all trace data for an event. - * The cumulative buffer consists of these entries. - */ - -struct cbuff_entry { - etap_event_t event; /* event type */ - unsigned short kind; /* read,write,or simple */ - unsigned int instance; /* & of event struct */ - struct cbuff_data hold; /* hold trace data */ - struct cbuff_data wait; /* wait trace data */ - unsigned long hold_interval[ETAP_CBUFF_IBUCKETS]; /* hold interval array */ - unsigned long wait_interval[ETAP_CBUFF_IBUCKETS]; /* wait interval array */ -}; - -typedef struct cbuff_entry* cbuff_entry_t; - -#define CBUFF_ENTRY_NULL ((cbuff_entry_t)0) - -/* - * The cumulative buffer maintains a header which is used by - * both the kernel instrumentation and the ETAP user-utilities. - */ - -struct cumulative_buffer { - unsigned long next; /* next available entry in buffer */ - unsigned short static_start; /* first static entry in buffer */ - struct cbuff_entry entry [ETAP_CBUFF_ENTRIES]; /* buffer entries */ -}; - -typedef struct cumulative_buffer* cumulative_buffer_t; - - -/* =========================== - * ETAP probe data definitions - * =========================== - */ - -typedef unsigned int etap_data_t[ETAP_MBUFF_DATASIZE]; - -#define ETAP_DATA_ENTRY sizeof(unsigned int) -#define ETAP_DATA_SIZE ETAP_DATA_ENTRY * ETAP_MBUFF_DATASIZE -#define ETAP_DATA_NULL (etap_data_t*) 0 - -/* ========================== - * Monitor buffer definitions - * ========================== - */ - -/* - * The mbuff_entry structure contains trace event instance data. - */ - -struct mbuff_entry { - unsigned short event; /* event type */ - unsigned short flags; /* event strain flags */ - unsigned int instance; /* address of event (lock, thread, etc.) */ - unsigned int pc; /* program counter */ - etap_time_t time; /* operation time */ - etap_data_t data; /* event specific data */ -}; - -typedef struct mbuff_entry* mbuff_entry_t; - -/* - * Each circular monitor buffer will contain maintanence - * information and mon_entry records. - */ - -struct monitor_buffer { - unsigned long free; /* index of next available record */ - unsigned long timestamp; /* timestamp of last wrap around */ - struct mbuff_entry entry[1]; /* buffer entries (holder) */ -}; - -typedef struct monitor_buffer* monitor_buffer_t; - - -/* =================== - * Event strains/flags - * =================== - */ /* | |t|b|e|k|u|m|s|r|w| | | | | */ - /* ----------------------------- */ -#define WRITE_LOCK 0x10 /* | | | | | | | | | |1| | | | | */ -#define READ_LOCK 0x20 /* | | | | | | | | |1| | | | | | */ -#define COMPLEX_LOCK 0x30 /* | | | | | | | | |1|1| | | | | */ -#define SPIN_LOCK 0x40 /* | | | | | | | |1| | | | | | | */ -#define MUTEX_LOCK 0x80 /* | | | | | | |1| | | | | | | | */ -#define USER_EVENT 0x100 /* | | | | | |1| | | | | | | | | */ -#define KERNEL_EVENT 0x200 /* | | | | |1| | | | | | | | | | */ -#define EVENT_END 0x400 /* | | | |1| | | | | | | | | | | */ -#define EVENT_BEGIN 0x800 /* | | |1| | | | | | | | | | | | */ -#define SYSCALL_TRAP 0x1000 /* | |1| | | | | | | | | | | | | */ - - -/* ========================= - * Event trace status values - * ========================= - */ /* | | | | | | | | | | |M|M|C|C| */ - /* | | | | | | | | | | |d|c|d|c| */ - /* ----------------------------- */ -#define CUM_CONTENTION 0x1 /* | | | | | | | | | | | | | |1| */ -#define CUM_DURATION 0x2 /* | | | | | | | | | | | | |1| | */ -#define MON_CONTENTION 0x4 /* | | | | | | | | | | | |1| | | */ -#define MON_DURATION 0x8 /* | | | | | | | | | | |1| | | | */ - -#define ETAP_TRACE_ON 0xf /* | | | | | | | | | | |1|1|1|1| */ -#define ETAP_TRACE_OFF 0x0 /* | | | | | | | | | | | | | | | */ - - -/* ================== - * ETAP trace flavors - * ================== - */ - -/* Mode */ - -#define ETAP_CUMULATIVE 0x3 /* | | | | | | | | | | | | |1|1| */ -#define ETAP_MONITORED 0xc /* | | | | | | | | | | |1|1| | | */ -#define ETAP_RESET 0xf0f0 - -/* Type */ - -#define ETAP_CONTENTION 0x5 /* | | | | | | | | | | | |1| |1| */ -#define ETAP_DURATION 0xa /* | | | | | | | | | | |1| |1| | */ - - -/* =============================== - * Buffer/Table flavor definitions - * =============================== - */ - -#define ETAP_TABLE_EVENT 0 -#define ETAP_TABLE_SUBSYSTEM 1 -#define ETAP_BUFFER_CUMULATIVE 2 -#define ETAP_BUFFER_MONITORED 3 - -/* ========================== - * ETAP function declarations - * ========================== - */ - -extern -kern_return_t etap_trace_event( - unsigned short mode, - unsigned short type, - boolean_t enable, - unsigned int nargs, - unsigned short args[]); - -extern -kern_return_t etap_probe( - unsigned short eventno, - unsigned short event_id, - unsigned int data_size, - etap_data_t *data); - -/* ================================================================= - * convienience user probe macro - only used if DO_PROBE is #defined - * ================================================================= - */ -#ifdef DO_PROBE -#define PROBE_DATA(subsys, tag, data0, data1, data2, data3) \ - { \ - etap_data_t _mmmm; \ - _mmmm[0] = (u_int)data0; \ - _mmmm[1] = (u_int)data1; \ - _mmmm[2] = (u_int)data2; \ - _mmmm[3] = (u_int)data3; \ - etap_probe(subsys, tag, sizeof (etap_data_t), &_mmmm); \ - } -#else -#define PROBE_DATA(type, tag, data0, data1, data2, data3) -#endif /* DO_PROBE */ -#endif /* _MACH_ETAP_H_ */ diff --git a/osfmk/mach/etap_events.h b/osfmk/mach/etap_events.h deleted file mode 100644 index 29dced2f9..000000000 --- a/osfmk/mach/etap_events.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * File : etap_events.h - * - * Kernel trace event definitions - * - * Notes : If new trace event or subsystem definitions are added - * to this file, the appropriate tables in kern/etap.c MUST be - * updated for the ETAP package to recognize them. - * - */ - -#ifndef _MACH_ETAP_EVENTS_H_ -#define _MACH_ETAP_EVENTS_H_ - -#include - -#ifdef __APPLE_API_UNSTABLE - -/* ============================ - * ETAP Subsystem Definitions - * ============================ - */ - -#define ETAP_SUBS_PROBE 0x0000 /* must be zero */ - -#define ETAP_SUBS_LOCK_VM 0x0100 -#define ETAP_SUBS_LOCK_IPC 0x0200 -#define ETAP_SUBS_LOCK_IO 0x0300 -#define ETAP_SUBS_LOCK_THREAD 0x0400 -#define ETAP_SUBS_LOCK_NET 0x0500 -#define ETAP_SUBS_LOCK_NORMA 0x0600 -#define ETAP_SUBS_LOCK_DIPC 0x0700 -#define ETAP_SUBS_LOCK_KKT 0x0800 -#define ETAP_SUBS_LOCK_XKERNEL 0x0900 -#define ETAP_SUBS_LOCK_MISC 0x0a00 - -#define ETAP_NO_TRACE 0x0fff /* never traced */ - - -/* ======================= - * Lock Event Definitions - * ======================= - */ - -#define ETAP_VM_BUCKET (ETAP_SUBS_LOCK_VM + 1) -#define ETAP_VM_HIMEM (ETAP_SUBS_LOCK_VM + 2) -#define ETAP_VM_MAP (ETAP_SUBS_LOCK_VM + 3) -#define ETAP_VM_MAP_I (ETAP_SUBS_LOCK_VM + 4) -#define ETAP_VM_MEMMAN (ETAP_SUBS_LOCK_VM + 5) -#define ETAP_VM_MSYNC (ETAP_SUBS_LOCK_VM + 6) -#define ETAP_VM_OBJ (ETAP_SUBS_LOCK_VM + 7) -#define ETAP_VM_OBJ_CACHE (ETAP_SUBS_LOCK_VM + 8) -#define ETAP_VM_PAGE_ALLOC (ETAP_SUBS_LOCK_VM + 9) -#define ETAP_VM_PAGEOUT (ETAP_SUBS_LOCK_VM + 10) -#define ETAP_VM_PAGEQ (ETAP_SUBS_LOCK_VM + 11) -#define ETAP_VM_PAGEQ_FREE (ETAP_SUBS_LOCK_VM + 12) -#define ETAP_VM_PMAP (ETAP_SUBS_LOCK_VM + 13) -#define ETAP_VM_PMAP_CACHE (ETAP_SUBS_LOCK_VM + 14) -#define ETAP_VM_PMAP_FREE (ETAP_SUBS_LOCK_VM + 15) -#define ETAP_VM_PMAP_KERNEL (ETAP_SUBS_LOCK_VM + 16) -#define ETAP_VM_PMAP_SYS (ETAP_SUBS_LOCK_VM + 17) -#define ETAP_VM_PMAP_SYS_I (ETAP_SUBS_LOCK_VM + 18) -#define ETAP_VM_PMAP_UPDATE (ETAP_SUBS_LOCK_VM + 19) -#define ETAP_VM_PREPPIN (ETAP_SUBS_LOCK_VM + 20) -#define ETAP_VM_RESULT (ETAP_SUBS_LOCK_VM + 21) -#define ETAP_VM_TEST (ETAP_SUBS_LOCK_VM + 22) -#define ETAP_VM_PMAP_PHYSENTRIES (ETAP_SUBS_LOCK_VM + 23) -#define ETAP_VM_PMAP_SID (ETAP_SUBS_LOCK_VM + 24) -#define ETAP_VM_PMAP_PTE (ETAP_SUBS_LOCK_VM + 25) -#define ETAP_VM_PMAP_PTE_OVFLW (ETAP_SUBS_LOCK_VM + 26) -#define ETAP_VM_PMAP_TLB (ETAP_SUBS_LOCK_VM + 27) - -#define ETAP_IPC_IHGB (ETAP_SUBS_LOCK_IPC + 1) -#define ETAP_IPC_IS (ETAP_SUBS_LOCK_IPC + 2) -#define ETAP_IPC_IS_REF (ETAP_SUBS_LOCK_IPC + 3) -#define ETAP_IPC_MQUEUE (ETAP_SUBS_LOCK_IPC + 4) -#define ETAP_IPC_OBJECT (ETAP_SUBS_LOCK_IPC + 5) -#define ETAP_IPC_PORT_MULT (ETAP_SUBS_LOCK_IPC + 6) -#define ETAP_IPC_PORT_TIME (ETAP_SUBS_LOCK_IPC + 7) -#define ETAP_IPC_RPC (ETAP_SUBS_LOCK_IPC + 8) -#define ETAP_IPC_PORT_ALLOCQ (ETAP_SUBS_LOCK_IPC + 9) - -#define ETAP_IO_AHA (ETAP_SUBS_LOCK_IO + 1) -#define ETAP_IO_CHIP (ETAP_SUBS_LOCK_IO + 2) -#define ETAP_IO_DEV (ETAP_SUBS_LOCK_IO + 3) -#define ETAP_IO_DEV_NUM (ETAP_SUBS_LOCK_IO + 4) -#define ETAP_IO_DEV_PAGEH (ETAP_SUBS_LOCK_IO + 5) -#define ETAP_IO_DEV_PAGER (ETAP_SUBS_LOCK_IO + 6) -#define ETAP_IO_DEV_PORT (ETAP_SUBS_LOCK_IO + 7) -#define ETAP_IO_DEV_REF (ETAP_SUBS_LOCK_IO + 8) -#define ETAP_IO_DEVINS (ETAP_SUBS_LOCK_IO + 9) -#define ETAP_IO_DONE_LIST (ETAP_SUBS_LOCK_IO + 10) -#define ETAP_IO_DONE_Q (ETAP_SUBS_LOCK_IO + 11) -#define ETAP_IO_DONE_REF (ETAP_SUBS_LOCK_IO + 12) -#define ETAP_IO_EAHA (ETAP_SUBS_LOCK_IO + 13) -#define ETAP_IO_HD_PROBE (ETAP_SUBS_LOCK_IO + 14) -#define ETAP_IO_IHGB (ETAP_SUBS_LOCK_IO + 15) -#define ETAP_IO_IOPB (ETAP_SUBS_LOCK_IO + 16) -#define ETAP_IO_KDQ (ETAP_SUBS_LOCK_IO + 17) -#define ETAP_IO_KDTTY (ETAP_SUBS_LOCK_IO + 18) -#define ETAP_IO_REQ (ETAP_SUBS_LOCK_IO + 19) -#define ETAP_IO_TARGET (ETAP_SUBS_LOCK_IO + 20) -#define ETAP_IO_TTY (ETAP_SUBS_LOCK_IO + 21) -#define ETAP_IO_IOP_LOCK (ETAP_SUBS_LOCK_IO + 22) -#define ETAP_IO_DEV_NAME (ETAP_SUBS_LOCK_IO + 23) -#define ETAP_IO_CDLI (ETAP_SUBS_LOCK_IO + 24) -#define ETAP_IO_HIPPI_FILTER (ETAP_SUBS_LOCK_IO + 25) -#define ETAP_IO_HIPPI_SRC (ETAP_SUBS_LOCK_IO + 26) -#define ETAP_IO_HIPPI_DST (ETAP_SUBS_LOCK_IO + 27) -#define ETAP_IO_HIPPI_PKT (ETAP_SUBS_LOCK_IO + 28) -#define ETAP_IO_NOTIFY (ETAP_SUBS_LOCK_IO + 29) -#define ETAP_IO_DATADEV (ETAP_SUBS_LOCK_IO + 30) -#define ETAP_IO_OPEN (ETAP_SUBS_LOCK_IO + 31) -#define ETAP_IO_OPEN_I (ETAP_SUBS_LOCK_IO + 32) -#define ETAP_IO_UNDREPLY (ETAP_SUBS_LOCK_IO + 33) - -#define ETAP_THREAD_ACT (ETAP_SUBS_LOCK_THREAD + 1) -#define ETAP_THREAD_ACTION (ETAP_SUBS_LOCK_THREAD + 2) -#define ETAP_THREAD_LOCK (ETAP_SUBS_LOCK_THREAD + 3) -#define ETAP_THREAD_LOCK_SET (ETAP_SUBS_LOCK_THREAD + 4) -#define ETAP_THREAD_NEW (ETAP_SUBS_LOCK_THREAD + 5) -#define ETAP_THREAD_PSET (ETAP_SUBS_LOCK_THREAD + 6) -#define ETAP_THREAD_PSET_ALL (ETAP_SUBS_LOCK_THREAD + 7) -#define ETAP_THREAD_PSET_RUNQ (ETAP_SUBS_LOCK_THREAD + 8) -#define ETAP_THREAD_PSET_IDLE (ETAP_SUBS_LOCK_THREAD + 9) -#define ETAP_THREAD_PSET_QUANT (ETAP_SUBS_LOCK_THREAD + 10) -#define ETAP_THREAD_PROC (ETAP_SUBS_LOCK_THREAD + 11) -#define ETAP_THREAD_PROC_RUNQ (ETAP_SUBS_LOCK_THREAD + 12) -#define ETAP_THREAD_REAPER (ETAP_SUBS_LOCK_THREAD + 13) -#define ETAP_THREAD_RPC (ETAP_SUBS_LOCK_THREAD + 14) -#define ETAP_THREAD_REM_RPC (ETAP_SUBS_LOCK_THREAD + 15) -#define ETAP_THREAD_SEMA (ETAP_SUBS_LOCK_THREAD + 16) -#define ETAP_THREAD_STACK (ETAP_SUBS_LOCK_THREAD + 17) -#define ETAP_THREAD_STACK_USAGE (ETAP_SUBS_LOCK_THREAD + 18) -#define ETAP_THREAD_TASK_NEW (ETAP_SUBS_LOCK_THREAD + 19) -#define ETAP_THREAD_TASK_ITK (ETAP_SUBS_LOCK_THREAD + 20) -#define ETAP_THREAD_ULOCK (ETAP_SUBS_LOCK_THREAD + 21) -#define ETAP_THREAD_WAIT (ETAP_SUBS_LOCK_THREAD + 22) -#define ETAP_THREAD_WAKE (ETAP_SUBS_LOCK_THREAD + 23) -#define ETAP_THREAD_ACT_LIST (ETAP_SUBS_LOCK_THREAD + 24) -#define ETAP_THREAD_TASK_SWAP (ETAP_SUBS_LOCK_THREAD + 25) -#define ETAP_THREAD_TASK_SWAPOUT (ETAP_SUBS_LOCK_THREAD + 26) -#define ETAP_THREAD_SWAPPER (ETAP_SUBS_LOCK_THREAD + 27) - -#define ETAP_NET_IFQ (ETAP_SUBS_LOCK_NET + 1) -#define ETAP_NET_KMSG (ETAP_SUBS_LOCK_NET + 2) -#define ETAP_NET_MBUF (ETAP_SUBS_LOCK_NET + 3) -#define ETAP_NET_POOL (ETAP_SUBS_LOCK_NET + 4) -#define ETAP_NET_Q (ETAP_SUBS_LOCK_NET + 5) -#define ETAP_NET_QFREE (ETAP_SUBS_LOCK_NET + 6) -#define ETAP_NET_RCV (ETAP_SUBS_LOCK_NET + 7) -#define ETAP_NET_RCV_PLIST (ETAP_SUBS_LOCK_NET + 8) -#define ETAP_NET_THREAD (ETAP_SUBS_LOCK_NET + 9) - -#define ETAP_NORMA_XMM (ETAP_SUBS_LOCK_NORMA + 1) -#define ETAP_NORMA_XMMOBJ (ETAP_SUBS_LOCK_NORMA + 2) -#define ETAP_NORMA_XMMCACHE (ETAP_SUBS_LOCK_NORMA + 3) -#define ETAP_NORMA_MP (ETAP_SUBS_LOCK_NORMA + 4) -#define ETAP_NORMA_VOR (ETAP_SUBS_LOCK_NORMA + 5) -#define ETAP_NORMA_TASK (ETAP_SUBS_LOCK_NORMA + 6) - -#define ETAP_DIPC_CLEANUP (ETAP_SUBS_LOCK_DIPC + 1) -#define ETAP_DIPC_MSG_PROG (ETAP_SUBS_LOCK_DIPC + 2) -#define ETAP_DIPC_PREP_QUEUE (ETAP_SUBS_LOCK_DIPC + 3) -#define ETAP_DIPC_PREP_FILL (ETAP_SUBS_LOCK_DIPC + 4) -#define ETAP_DIPC_MIGRATE (ETAP_SUBS_LOCK_DIPC + 5) -#define ETAP_DIPC_DELIVER (ETAP_SUBS_LOCK_DIPC + 6) -#define ETAP_DIPC_RECV_SYNC (ETAP_SUBS_LOCK_DIPC + 7) -#define ETAP_DIPC_RPC (ETAP_SUBS_LOCK_DIPC + 8) -#define ETAP_DIPC_MSG_REQ (ETAP_SUBS_LOCK_DIPC + 9) -#define ETAP_DIPC_MSG_ORDER (ETAP_SUBS_LOCK_DIPC + 10) -#define ETAP_DIPC_MSG_PREPQ (ETAP_SUBS_LOCK_DIPC + 11) -#define ETAP_DIPC_MSG_FREE (ETAP_SUBS_LOCK_DIPC + 12) -#define ETAP_DIPC_KMSG_AST (ETAP_SUBS_LOCK_DIPC + 13) -#define ETAP_DIPC_TEST_LOCK (ETAP_SUBS_LOCK_DIPC + 14) -#define ETAP_DIPC_SPINLOCK (ETAP_SUBS_LOCK_DIPC + 15) -#define ETAP_DIPC_TRACE (ETAP_SUBS_LOCK_DIPC + 16) -#define ETAP_DIPC_REQ_CALLBACK (ETAP_SUBS_LOCK_DIPC + 17) -#define ETAP_DIPC_PORT_NAME (ETAP_SUBS_LOCK_DIPC + 18) -#define ETAP_DIPC_RESTART_PORT (ETAP_SUBS_LOCK_DIPC + 19) -#define ETAP_DIPC_ZERO_PAGE (ETAP_SUBS_LOCK_DIPC + 20) -#define ETAP_DIPC_BLOCKED_NODE (ETAP_SUBS_LOCK_DIPC + 21) -#define ETAP_DIPC_TIMER (ETAP_SUBS_LOCK_DIPC + 22) -#define ETAP_DIPC_SPECIAL_PORT (ETAP_SUBS_LOCK_DIPC + 23) - -#define ETAP_KKT_TEST_WORK (ETAP_SUBS_LOCK_KKT + 1) -#define ETAP_KKT_TEST_MP (ETAP_SUBS_LOCK_KKT + 2) -#define ETAP_KKT_NODE (ETAP_SUBS_LOCK_KKT + 3) -#define ETAP_KKT_CHANNEL_LIST (ETAP_SUBS_LOCK_KKT + 4) -#define ETAP_KKT_CHANNEL (ETAP_SUBS_LOCK_KKT + 5) -#define ETAP_KKT_HANDLE (ETAP_SUBS_LOCK_KKT + 6) -#define ETAP_KKT_MAP (ETAP_SUBS_LOCK_KKT + 7) -#define ETAP_KKT_RESOURCE (ETAP_SUBS_LOCK_KKT + 8) - -#define ETAP_XKERNEL_MASTER (ETAP_SUBS_LOCK_XKERNEL + 1) -#define ETAP_XKERNEL_EVENT (ETAP_SUBS_LOCK_XKERNEL + 2) -#define ETAP_XKERNEL_ETHINPUT (ETAP_SUBS_LOCK_XKERNEL + 3) - -#define ETAP_MISC_AST (ETAP_SUBS_LOCK_MISC + 1) -#define ETAP_MISC_CLOCK (ETAP_SUBS_LOCK_MISC + 2) -#define ETAP_MISC_EMULATE (ETAP_SUBS_LOCK_MISC + 3) -#define ETAP_MISC_EVENT (ETAP_SUBS_LOCK_MISC + 4) -#define ETAP_MISC_KDB (ETAP_SUBS_LOCK_MISC + 5) -#define ETAP_MISC_PCB (ETAP_SUBS_LOCK_MISC + 6) -#define ETAP_MISC_PRINTF (ETAP_SUBS_LOCK_MISC + 7) -#define ETAP_MISC_Q (ETAP_SUBS_LOCK_MISC + 8) -#define ETAP_MISC_RPC_SUBSYS (ETAP_SUBS_LOCK_MISC + 9) -#define ETAP_MISC_RT_CLOCK (ETAP_SUBS_LOCK_MISC + 10) -#define ETAP_MISC_SD_POOL (ETAP_SUBS_LOCK_MISC + 11) -#define ETAP_MISC_TIMER (ETAP_SUBS_LOCK_MISC + 12) -#define ETAP_MISC_UTIME (ETAP_SUBS_LOCK_MISC + 13) -#define ETAP_MISC_XPR (ETAP_SUBS_LOCK_MISC + 14) -#define ETAP_MISC_ZONE (ETAP_SUBS_LOCK_MISC + 15) -#define ETAP_MISC_ZONE_ALL (ETAP_SUBS_LOCK_MISC + 16) -#define ETAP_MISC_ZONE_GET (ETAP_SUBS_LOCK_MISC + 17) -#define ETAP_MISC_ZONE_PTABLE (ETAP_SUBS_LOCK_MISC + 18) -#define ETAP_MISC_LEDGER (ETAP_SUBS_LOCK_MISC + 19) -#define ETAP_MISC_SCSIT_TGT (ETAP_SUBS_LOCK_MISC + 20) -#define ETAP_MISC_SCSIT_SELF (ETAP_SUBS_LOCK_MISC + 21) -#define ETAP_MISC_SPL (ETAP_SUBS_LOCK_MISC + 22) /* i860 */ -#define ETAP_MISC_MASTER (ETAP_SUBS_LOCK_MISC + 23) /* i860 */ -#define ETAP_MISC_FLOAT (ETAP_SUBS_LOCK_MISC + 24) /* i860 */ -#define ETAP_MISC_GROUP (ETAP_SUBS_LOCK_MISC + 25) /* i860 */ -#define ETAP_MISC_FLIPC (ETAP_SUBS_LOCK_MISC + 26) -#define ETAP_MISC_MP_IO (ETAP_SUBS_LOCK_MISC + 27) -#define ETAP_MISC_KERNEL_TEST (ETAP_SUBS_LOCK_MISC + 28) -#define ETAP_MISC_TIMER_LOCK (ETAP_SUBS_LOCK_MISC + 29) -#define ETAP_MISC_POST (ETAP_SUBS_LOCK_MISC + 30) /* i860 */ -#define ETAP_MISC_KERNLOG (ETAP_SUBS_LOCK_MISC + 31) /* Alpha */ -#define ETAP_DPAGE_BS (ETAP_SUBS_LOCK_MISC + 32) /* def pager */ -#define ETAP_DPAGE_BSL (ETAP_SUBS_LOCK_MISC + 33) /* def pager */ -#define ETAP_DPAGE_SEGMENT (ETAP_SUBS_LOCK_MISC + 34) /* def pager */ -#define ETAP_DPAGE_SEGLIST (ETAP_SUBS_LOCK_MISC + 35) /* def pager */ -#define ETAP_DPAGE_VSTRUCT (ETAP_SUBS_LOCK_MISC + 36) /* def pager */ -#define ETAP_DPAGE_VSMAP (ETAP_SUBS_LOCK_MISC + 37) /* def pager */ -#define ETAP_DPAGE_VSLIST (ETAP_SUBS_LOCK_MISC + 38) /* def pager */ -#define ETAP_DPAGE_VSSEQNO (ETAP_SUBS_LOCK_MISC + 39) /* def pager */ -#define ETAP_DPAGE_VSREAD (ETAP_SUBS_LOCK_MISC + 40) /* def pager */ -#define ETAP_DPAGE_VSWRITE (ETAP_SUBS_LOCK_MISC + 41) /* def pager */ -#define ETAP_DPAGE_VSREFS (ETAP_SUBS_LOCK_MISC + 42) /* def pager */ -#define ETAP_DPAGE_VSASYNC (ETAP_SUBS_LOCK_MISC + 43) /* def pager */ - -/* ========================== - * System Probe Definitions - * ========================== - */ -/* probes 0-31 reserved for non-MK (e.g. users and servers) */ -#define ETAP_USER_BASE 0 -#define ETAP_USER_NEVENTS 32 - -#define ETAP_P_USER_EVENT0 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 0) -#define ETAP_P_USER_EVENT1 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 1) -#define ETAP_P_USER_EVENT2 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 2) -#define ETAP_P_USER_EVENT3 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 3) -#define ETAP_P_USER_EVENT4 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 4) -#define ETAP_P_USER_EVENT5 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 5) -#define ETAP_P_USER_EVENT6 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 6) -#define ETAP_P_USER_EVENT7 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 7) -#define ETAP_P_USER_EVENT8 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 8) -#define ETAP_P_USER_EVENT9 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 9) -#define ETAP_P_USER_EVENT10 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 10) -#define ETAP_P_USER_EVENT11 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 11) -#define ETAP_P_USER_EVENT12 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 12) -#define ETAP_P_USER_EVENT13 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 13) -#define ETAP_P_USER_EVENT14 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 14) -#define ETAP_P_USER_EVENT15 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 15) -#define ETAP_P_USER_EVENT16 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 16) -#define ETAP_P_USER_EVENT17 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 17) -#define ETAP_P_USER_EVENT18 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 18) -#define ETAP_P_USER_EVENT19 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 19) -#define ETAP_P_USER_EVENT20 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 20) -#define ETAP_P_USER_EVENT21 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 21) -#define ETAP_P_USER_EVENT22 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 22) -#define ETAP_P_USER_EVENT23 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 23) -#define ETAP_P_USER_EVENT24 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 24) -#define ETAP_P_USER_EVENT25 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 25) -#define ETAP_P_USER_EVENT26 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 26) -#define ETAP_P_USER_EVENT27 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 27) -#define ETAP_P_USER_EVENT28 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 28) -#define ETAP_P_USER_EVENT29 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 29) -#define ETAP_P_USER_EVENT30 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 30) -#define ETAP_P_USER_EVENT31 (ETAP_SUBS_PROBE + ETAP_USER_BASE + 31) - -/* probes 32-63 reserved for MK */ -#define ETAP_SYS_BASE 32 - -#define ETAP_P_SYSCALL_MACH (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 0) -#define ETAP_P_SYSCALL_UNIX (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 1) -#define ETAP_P_THREAD_LIFE (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 2) -#define ETAP_P_THREAD_CTX (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 3) -#define ETAP_P_RPC (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 4) -#define ETAP_P_INTERRUPT (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 5) -#define ETAP_P_ACT_ABORT (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 6) -#define ETAP_P_PRIORITY (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 7) -#define ETAP_P_EXCEPTION (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 8) -#define ETAP_P_DEPRESSION (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 9) -#define ETAP_P_MISC (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 10) -#define ETAP_P_DETAP (ETAP_SUBS_PROBE + ETAP_SYS_BASE + 11) - -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_OBSOLETE -/* =========================== - * ETAP Thread block reasons - * =========================== - */ - -#define BLOCKED_ON_UNDEFINED 0 -#define BLOCKED_ON_CLEAR 0 - -#define BLOCKED_ON_SEMAPHORE 1 -#define BLOCKED_ON_LOCK 2 -#define BLOCKED_ON_LOCK_HANDOFF 3 -#define BLOCKED_ON_MUTEX_LOCK 4 -#define BLOCKED_ON_COMPLEX_LOCK 5 -#define BLOCKED_ON_PORT_RCV 6 -#define BLOCKED_ON_REAPER_DONE 7 -#define BLOCKED_ON_IDLE_DONE 8 -#define BLOCKED_ON_TERMINATION 9 - -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* _MACH_ETAP_EVENTS_H_ */ diff --git a/osfmk/mach/events_info.h b/osfmk/mach/events_info.h index c6eead64e..ab5b2eca6 100644 --- a/osfmk/mach/events_info.h +++ b/osfmk/mach/events_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,41 +22,6 @@ /* * @OSF_FREE_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:29 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.3 1995/01/26 22:15:39 ezf - * removed extraneous CMU CR - * [1995/01/26 20:24:56 ezf] - * - * Revision 1.1.8.2 1995/01/06 19:50:12 devrcs - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup - * [1994/10/14 03:42:32 dwm] - * - * Revision 1.1.4.3 1993/09/17 21:35:27 robert - * change marker to OSF_FREE_COPYRIGHT - * [1993/09/17 21:28:46 robert] - * - * Revision 1.1.4.2 1993/06/04 15:13:47 jeffc - * CR9193 - MK5.0 merge. - * [1993/05/18 02:37:52 gm] - * - * Revision 3.0 92/12/31 22:12:17 ede - * Initial revision for OSF/1 R1.3 - * - * Revision 1.2 1991/06/20 12:13:09 devrcs - * Created from mach/task_info.h. - * [91/06/04 08:53:02 jeffc] - * - * $EndLog$ - */ /* * Machine-independent event information structures and definitions. * @@ -71,18 +36,20 @@ #ifndef _MACH_EVENTS_INFO_H_ #define _MACH_EVENTS_INFO_H_ +#include + struct events_info { - long faults; /* number of page faults */ - long zero_fills; /* number of zero fill pages */ - long reactivations; /* number of reactivated pages */ - long pageins; /* number of actual pageins */ - long cow_faults; /* number of copy-on-write faults */ - long messages_sent; /* number of messages sent */ - long messages_received; /* number of messages received */ + integer_t faults; /* number of page faults */ + integer_t zero_fills; /* number of zero fill pages */ + integer_t reactivations; /* number of reactivated pages */ + integer_t pageins; /* number of actual pageins */ + integer_t cow_faults; /* number of copy-on-write faults */ + integer_t messages_sent; /* number of messages sent */ + integer_t messages_received; /* number of messages received */ }; typedef struct events_info events_info_data_t; typedef struct events_info *events_info_t; -#define EVENTS_INFO_COUNT \ - (sizeof(events_info_data_t) / sizeof(long)) +#define EVENTS_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(events_info_data_t) / sizeof(integer_t))) #endif /*_MACH_EVENTS_INFO_H_*/ diff --git a/osfmk/mach/exception.h b/osfmk/mach/exception.h index 6796107b2..b4a74bc14 100644 --- a/osfmk/mach/exception.h +++ b/osfmk/mach/exception.h @@ -47,15 +47,6 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ -/* - * JMM - - * This header will eventually be MIG-generated and define the - * exception interfaces. It used to define the exception data - * types, but those have been moved to exception_types.h for - * consistency. Once this is MIG-generated, it will automatically - * drag in the types, but for compatibility in the interim, just - * pull them in manually. - */ #ifndef _MACH_EXCEPTION_H_ #define _MACH_EXCEPTION_H_ diff --git a/osfmk/mach/exception_types.h b/osfmk/mach/exception_types.h index 014349674..421f3fa16 100644 --- a/osfmk/mach/exception_types.h +++ b/osfmk/mach/exception_types.h @@ -59,7 +59,6 @@ * Machine-independent exception definitions. */ - #define EXC_BAD_ACCESS 1 /* Could not access memory */ /* Code contains kern_return_t describing error. */ /* Subcode contains bad memory address. */ @@ -143,6 +142,7 @@ #define EXC_SOFT_SIGNAL 0x10003 /* Unix signal exceptions */ #ifndef ASSEMBLER + #include #include #include @@ -161,4 +161,5 @@ typedef thread_state_flavor_t *exception_flavor_array_t; typedef mach_port_t *exception_port_array_t; #endif /* ASSEMBLER */ + #endif /* _MACH_EXCEPTION_TYPES_H_ */ diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index 28cdb19fe..565a56e24 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,11 +59,14 @@ #ifndef _MACH_HOST_INFO_H_ #define _MACH_HOST_INFO_H_ +#include #include #include #include #include +#include + /* * Generic information structure to allow for expansion. */ @@ -78,8 +81,6 @@ typedef char kernel_version_t[KERNEL_VERSION_MAX]; #define KERNEL_BOOT_INFO_MAX (4096) typedef char kernel_boot_info_t[KERNEL_BOOT_INFO_MAX]; -#define KERNEL_BOOTMAGIC_MAX (8192) - /* * Currently defined information. */ @@ -92,18 +93,47 @@ typedef integer_t host_flavor_t; #define HOST_SEMAPHORE_TRAPS 7 /* Has semaphore traps */ #define HOST_MACH_MSG_TRAP 8 /* Has mach_msg_trap */ -struct host_basic_info { +#ifdef MACH_KERNEL_PRIVATE +struct host_basic_info_old { integer_t max_cpus; /* max number of cpus possible */ integer_t avail_cpus; /* number of cpus now available */ - vm_size_t memory_size; /* size of memory in bytes */ + natural_t memory_size; /* size of memory in bytes */ cpu_type_t cpu_type; /* cpu type */ cpu_subtype_t cpu_subtype; /* cpu subtype */ }; +typedef struct host_basic_info_old host_basic_info_data_old_t; +typedef struct host_basic_info_old *host_basic_info_old_t; +#define HOST_BASIC_INFO_OLD_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_basic_info_data_old_t)/sizeof(integer_t))) +#endif + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + +struct host_basic_info { + integer_t max_cpus; /* max number of CPUs possible */ + integer_t avail_cpus; /* number of CPUs now available */ + natural_t memory_size; /* size of memory in bytes, capped at 2 GB */ + cpu_type_t cpu_type; /* cpu type */ + cpu_subtype_t cpu_subtype; /* cpu subtype */ + cpu_threadtype_t cpu_threadtype; /* cpu threadtype */ + integer_t physical_cpu; /* number of physical CPUs now available */ + integer_t physical_cpu_max; /* max number of physical CPUs possible */ + integer_t logical_cpu; /* number of logical cpu now available */ + integer_t logical_cpu_max; /* max number of physical CPUs possible */ + uint64_t max_mem; /* actual size of physical memory */ +}; + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + typedef struct host_basic_info host_basic_info_data_t; typedef struct host_basic_info *host_basic_info_t; -#define HOST_BASIC_INFO_COUNT \ - (sizeof(host_basic_info_data_t)/sizeof(integer_t)) +#define HOST_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_basic_info_data_t)/sizeof(integer_t))) struct host_sched_info { integer_t min_timeout; /* minimum timeout in milliseconds */ @@ -112,21 +142,21 @@ struct host_sched_info { typedef struct host_sched_info host_sched_info_data_t; typedef struct host_sched_info *host_sched_info_t; -#define HOST_SCHED_INFO_COUNT \ - (sizeof(host_sched_info_data_t)/sizeof(integer_t)) +#define HOST_SCHED_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_sched_info_data_t)/sizeof(integer_t))) struct kernel_resource_sizes { - vm_size_t task; - vm_size_t thread; - vm_size_t port; - vm_size_t memory_region; - vm_size_t memory_object; + natural_t task; + natural_t thread; + natural_t port; + natural_t memory_region; + natural_t memory_object; }; typedef struct kernel_resource_sizes kernel_resource_sizes_data_t; typedef struct kernel_resource_sizes *kernel_resource_sizes_t; -#define HOST_RESOURCE_SIZES_COUNT \ - (sizeof(kernel_resource_sizes_data_t)/sizeof(integer_t)) +#define HOST_RESOURCE_SIZES_COUNT ((mach_msg_type_number_t) \ + (sizeof(kernel_resource_sizes_data_t)/sizeof(integer_t))) struct host_priority_info { integer_t kernel_priority; @@ -141,8 +171,8 @@ struct host_priority_info { typedef struct host_priority_info host_priority_info_data_t; typedef struct host_priority_info *host_priority_info_t; -#define HOST_PRIORITY_INFO_COUNT \ - (sizeof(host_priority_info_data_t)/sizeof(integer_t)) +#define HOST_PRIORITY_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_priority_info_data_t)/sizeof(integer_t))) /* host_statistics() */ #define HOST_LOAD_INFO 1 /* System loading stats */ @@ -156,19 +186,22 @@ struct host_load_info { typedef struct host_load_info host_load_info_data_t; typedef struct host_load_info *host_load_info_t; -#define HOST_LOAD_INFO_COUNT \ - (sizeof(host_load_info_data_t)/sizeof(integer_t)) +#define HOST_LOAD_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_load_info_data_t)/sizeof(integer_t))) /* in */ -#define HOST_VM_INFO_COUNT \ - (sizeof(vm_statistics_data_t)/sizeof(integer_t)) +#define HOST_VM_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_statistics_data_t)/sizeof(integer_t))) +#define HOST_VM_INFO_REV0_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_statistics_rev0_data_t)/sizeof(integer_t))) struct host_cpu_load_info { /* number of ticks while running... */ - unsigned long cpu_ticks[CPU_STATE_MAX]; /* ... in the given mode */ + natural_t cpu_ticks[CPU_STATE_MAX]; /* ... in the given mode */ }; + typedef struct host_cpu_load_info host_cpu_load_info_data_t; typedef struct host_cpu_load_info *host_cpu_load_info_t; -#define HOST_CPU_LOAD_INFO_COUNT \ - (sizeof (host_cpu_load_info_data_t) / sizeof (integer_t)) +#define HOST_CPU_LOAD_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof (host_cpu_load_info_data_t) / sizeof (integer_t))) #endif /* _MACH_HOST_INFO_H_ */ diff --git a/osfmk/mach/host_notify.h b/osfmk/mach/host_notify.h index 0d63f5b73..fc165bd44 100644 --- a/osfmk/mach/host_notify.h +++ b/osfmk/mach/host_notify.h @@ -19,14 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 14 January 2003 (debo) - * Created. - */ #ifndef _MACH_HOST_NOTIFY_H_ #define _MACH_HOST_NOTIFY_H_ diff --git a/osfmk/mach/host_notify_reply.defs b/osfmk/mach/host_notify_reply.defs index 9387af725..dac4b9e97 100644 --- a/osfmk/mach/host_notify_reply.defs +++ b/osfmk/mach/host_notify_reply.defs @@ -19,14 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 23 January 2003 (debo) - * Created. - */ subsystem #if KERNEL_USER diff --git a/osfmk/mach/host_priv.defs b/osfmk/mach/host_priv.defs index e3900d68b..1bc09c837 100644 --- a/osfmk/mach/host_priv.defs +++ b/osfmk/mach/host_priv.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -122,7 +122,7 @@ routine vm_wire( task : vm_map_t; address : vm_address_t; size : vm_size_t; - access : vm_prot_t); + desired_access : vm_prot_t); /* * Specify that the target thread must always be able @@ -162,8 +162,8 @@ routine vm_allocate_cpm( * Get list of processors on this host. */ routine host_processors( - host_priv : host_priv_t; - out processor_list : processor_array_t); + host_priv : host_priv_t; + out out_processor_list : processor_array_t); /* @@ -273,12 +273,18 @@ routine host_load_symbol_table( symtab : pointer_t); /* - * Make the target task swappable or unswappable. + * Specify that the range of the virtual address space + * of the target task must not cause page faults for + * the indicated accesses. + * + * [ To unwire the pages, specify VM_PROT_NONE. ] */ -routine task_swappable( +routine mach_vm_wire( host_priv : host_priv_t; - target_task : task_t; - make_swappable : boolean_t); + task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + desired_access : vm_prot_t); /* * JMM - Keep all processor_set related items at the end for easy diff --git a/osfmk/mach/host_reboot.h b/osfmk/mach/host_reboot.h index acb4f643f..efc17f980 100644 --- a/osfmk/mach/host_reboot.h +++ b/osfmk/mach/host_reboot.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,27 +22,11 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:30 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.5.1 1995/01/06 19:50:20 devrcs - * mk6 CR668 - 1.3b26 merge - * new file for mk6 - * [1994/10/12 22:25:10 dwm] - * - * Revision 1.1.2.2 1993/10/20 19:09:29 gm - * CR9913: Replacement for flags used in host_reboot() - * calls. - * [1993/10/13 17:21:14 gm] - * - * $EndLog$ - */ + +#ifndef _MACH_HOST_REBOOT_ +#define _MACH_HOST_REBOOT_ #define HOST_REBOOT_HALT 0x8 #define HOST_REBOOT_DEBUGGER 0x1000 + +#endif /* _MACH_HOST_REBOOT_ */ diff --git a/osfmk/mach/host_special_ports.h b/osfmk/mach/host_special_ports.h index 62f88f186..27d00aa99 100644 --- a/osfmk/mach/host_special_ports.h +++ b/osfmk/mach/host_special_ports.h @@ -128,5 +128,4 @@ #define host_set_user_notification_port(host, port) \ (host_set_special_port((host), HOST_USER_NOTIFICATION_PORT, (port))) - #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/i386/Makefile b/osfmk/mach/i386/Makefile index 10a2c5410..a177315eb 100644 --- a/osfmk/mach/i386/Makefile +++ b/osfmk/mach/i386/Makefile @@ -8,49 +8,27 @@ include $(MakeInc_def) VPATH+=$(SOURCE)/../../i386: -MIG_DEFS = mach_i386.defs - -MIGINCLUDES = mach_i386_server.h - DATAFILES = \ boolean.h exception.h fp_reg.h \ - processor_info.h kern_return.h mach_i386_types.h ndr_def.h syscall_sw.h \ + processor_info.h kern_return.h ndr_def.h syscall_sw.h \ thread_status.h thread_state.h vm_param.h \ vm_types.h rpc.h \ - machine_types.defs ${MIG_DEFS} + machine_types.defs INSTALL_MD_LIST = ${DATAFILES} INSTALL_MD_GEN_LIST = \ - asm.h \ - ${MIGINCLUDES} + asm.h INSTALL_MD_DIR = mach/i386 EXPORT_MD_LIST = ${DATAFILES} EXPORT_MD_GEN_LIST = \ - asm.h \ - ${MIGINCLUDES} + asm.h EXPORT_MD_DIR = mach/i386 -.ORDER: ${MIG_HDRS} ${MIGINCLUDES} - -COMP_FILES = mach_i386_server.h mach_i386_server.c - -.ORDER: mach_i386_server.h mach_i386_server.c - -MIGKSFLAGS = -DKERNEL_SERVER - -mach_i386_server.h mach_i386_server.c: mach_i386.defs - ${MIG} ${MIGFLAGS} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_i386_server.h \ - -server mach_i386_server.c \ - $< - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/mach/i386/boolean.h b/osfmk/mach/i386/boolean.h index 0ec6b1893..78ae850d4 100644 --- a/osfmk/mach/i386/boolean.h +++ b/osfmk/mach/i386/boolean.h @@ -22,54 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 02:36:44 ezf - * change marker to not FREE - * [1994/09/22 21:39:49 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:19 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:03 jeffc] - * - * Revision 1.2 1993/04/19 16:33:37 devrcs - * ansi C conformance changes - * [1993/02/02 18:55:53 david] - * - * Revision 1.1 1992/09/30 02:30:40 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:51:56 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:32:04 mrt - * Changed to new Mach copyright - * [91/02/01 17:09:33 mrt] - * - * Revision 2.2 90/05/03 15:47:26 dbg - * First checkin. - * - * Revision 1.3 89/03/09 20:19:36 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:00:41 gm0w - * Changes for cleanup. - * - * 24-Sep-86 Michael Young (mwyoung) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University diff --git a/osfmk/mach/i386/exception.h b/osfmk/mach/i386/exception.h index aa45d06dd..3a4e4e9dd 100644 --- a/osfmk/mach/i386/exception.h +++ b/osfmk/mach/i386/exception.h @@ -22,80 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.8.5 1995/04/07 19:05:14 barbou - * Backed out previous submission. - * [95/03/29 barbou] - * - * Revision 1.2.8.4 1995/03/15 17:19:29 bruel - * EXC_TYPES_COUNT is machine independant. - * (the machine exception type is given in the code argument). - * [95/03/06 bruel] - * - * Revision 1.2.8.3 1995/01/10 05:16:18 devrcs - * mk6 CR801 - merge up from nmk18b4 to nmk18b7 - * * Rev 1.2.6.3 1994/11/08 21:53:17 rkc - * Incremented the number of exception types to reflect the addition - * of the alert exception. - * [1994/12/09 21:11:21 dwm] - * - * Revision 1.2.8.1 1994/09/23 02:36:53 ezf - * change marker to not FREE - * [1994/09/22 21:39:53 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:25 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:07 jeffc] - * - * Revision 1.2 1993/04/19 16:33:44 devrcs - * changes for EXC_MACH_SYSCALL - * [1993/04/05 12:06:25 david] - * - * make endif tags ansi compliant/include files - * [1993/02/20 21:44:18 david] - * - * Updated to new exception interface. - * [1992/12/23 13:05:21 david] - * - * Revision 1.1 1992/09/30 02:30:41 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:52:05 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:32:08 mrt - * Changed to new Mach copyright - * [91/02/01 17:09:45 mrt] - * - * Revision 2.2 90/05/03 15:47:38 dbg - * First checkin. - * - * Revision 1.3 89/03/09 20:19:42 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:00:47 gm0w - * Changes for cleanup. - * - * 31-Dec-88 Robert Baron (rvb) at Carnegie-Mellon University - * Derived from MACH2.0 vax release. - * - * 2-Nov-87 David Golub (dbg) at Carnegie-Mellon University - * Created. - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University @@ -199,5 +125,4 @@ */ #define EXC_MASK_MACHINE 0 - #endif /* _MACH_I386_EXCEPTION_H_ */ diff --git a/osfmk/mach/i386/flipc_dep.h b/osfmk/mach/i386/flipc_dep.h index 7aa606390..a8be8ed52 100644 --- a/osfmk/mach/i386/flipc_dep.h +++ b/osfmk/mach/i386/flipc_dep.h @@ -23,45 +23,6 @@ * @OSF_COPYRIGHT@ * */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.5.1 1995/06/13 18:20:42 sjs - * Merge from flipc_shared. - * [95/06/07 sjs] - * - * Revision 1.1.3.3 1995/02/21 17:23:16 randys - * Re-indented code to four space indentation - * [1995/02/21 16:26:50 randys] - * - * Revision 1.1.3.2 1994/12/20 19:02:12 randys - * Moved definition of flipc_simple_lock to flipc_cb.h - * [1994/12/20 17:35:15 randys] - * - * Moved the machine independent macros into mach/flipc_locks.h - * [1994/12/20 16:44:14 randys] - * - * Added filename in comment at top of file - * [1994/12/19 20:29:36 randys] - * - * Fixed incorrect return of lock_try - * [1994/12/13 00:36:46 randys] - * - * Revision 1.1.3.1 1994/12/12 17:46:29 randys - * Putting initial flipc implementation under flipc_shared - * [1994/12/12 16:27:51 randys] - * - * Revision 1.1.1.2 1994/12/11 23:08:36 randys - * Initial flipc code checkin. - * - * $EndLog$ - */ /* * mach/i386/flipc_dep.h diff --git a/osfmk/mach/i386/fp_reg.h b/osfmk/mach/i386/fp_reg.h index a4d44b8da..9721e52cb 100644 --- a/osfmk/mach/i386/fp_reg.h +++ b/osfmk/mach/i386/fp_reg.h @@ -22,56 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 02:37:03 ezf - * change marker to not FREE - * [1994/09/22 21:39:57 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:30 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:11 jeffc] - * - * Revision 1.2 1993/04/19 16:33:51 devrcs - * ansi C conformance changes - * [1993/02/02 18:56:01 david] - * - * Revision 1.1 1992/09/30 02:30:43 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.1.1.1.2.1 92/03/03 16:21:23 jeffreyh - * Merged up to Trunk - * [92/02/26 jeffreyh] - * - * Revision 2.4 92/02/26 13:10:29 elf - * Added stupid alaises to make i386/fpu.c compile. RVB will fix. - * - * [92/02/26 elf] - * - * Revision 2.3 92/02/26 12:47:46 elf - * Installed from i386 directory. - * [92/02/26 danner] - * - * - * Revision 2.2 92/01/03 20:19:47 dbg - * Move this file to mach/i386. Add FP_NO..FP_387 codes for - * floating-point processor status. Error bits in control - * register are masks, not enables. - * [91/10/19 dbg] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1992-1989 Carnegie Mellon University @@ -102,6 +52,7 @@ #ifndef _I386_FP_SAVE_H_ #define _I386_FP_SAVE_H_ + /* * Floating point registers and status, as saved * and restored by FP save/restore instructions. diff --git a/osfmk/mach/i386/kern_return.h b/osfmk/mach/i386/kern_return.h index d317340c0..6a19c7463 100644 --- a/osfmk/mach/i386/kern_return.h +++ b/osfmk/mach/i386/kern_return.h @@ -22,57 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 02:37:12 ezf - * change marker to not FREE - * [1994/09/22 21:40:01 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:35 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:15 jeffc] - * - * Revision 1.2 1993/04/19 16:33:58 devrcs - * ansi C conformance changes - * [1993/02/02 18:56:09 david] - * - * Revision 1.1 1992/09/30 02:30:47 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:52:15 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:32:12 mrt - * Changed to new Mach copyright - * [91/02/01 17:09:54 mrt] - * - * Revision 2.2 90/05/03 15:47:51 dbg - * First checkin. - * - * Revision 1.3 89/03/09 20:19:48 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:00:54 gm0w - * Changes for cleanup. - * - * 3-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University - * Allow inclusion in assembler input. - * - * 14-Oct-85 Michael Wayne Young (mwyoung) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -115,4 +64,5 @@ #ifndef ASSEMBLER typedef int kern_return_t; #endif /* ASSEMBLER */ + #endif /* _MACH_I386_KERN_RETURN_H_ */ diff --git a/osfmk/mach/i386/mach_i386.defs b/osfmk/mach/i386/mach_i386.defs deleted file mode 100644 index c29abf0b0..000000000 --- a/osfmk/mach/i386/mach_i386.defs +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.9.1 1994/09/23 02:37:21 ezf - * change marker to not FREE - * [1994/09/22 21:40:05 ezf] - * - * Revision 1.2.6.2 1994/03/17 22:38:39 dwm - * The infamous name change: thread_activation + thread_shuttle = thread. - * [1994/03/17 21:28:20 dwm] - * - * Revision 1.2.6.1 1994/01/12 17:56:07 dwm - * Coloc: initial restructuring to follow Utah model. - * [1994/01/12 17:30:21 dwm] - * - * Revision 1.2.2.3 1993/09/10 13:08:26 rod - * Zap obsolete MACH_IPC_TYPED conditional code. - * [1993/08/31 11:29:37 rod] - * - * Revision 1.2.2.2 1993/06/09 02:40:40 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:18 jeffc] - * - * Revision 1.2 1993/04/19 16:34:07 devrcs - * Merge untyped ipc: - * Introducing new MIG syntax for Untyped IPC (via compile option - * MACH_IPC_TYPED) - * [1993/02/17 23:44:54 travos] - * - * Revision 1.1 1992/09/30 02:22:34 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.2.3.1 92/03/03 16:21:26 jeffreyh - * Changes from TRUNK - * [92/02/26 11:58:37 jeffreyh] - * - * Revision 2.3 92/01/03 20:19:56 dbg - * Renamed io_map to io_port_add. Added io_port_remove, - * io_port_list, set_ldt, get_ldt. - * [91/08/20 dbg] - * - * Revision 2.2 91/07/31 17:51:57 dbg - * Created. - * [91/07/30 17:09:09 dbg] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -/* - */ - -/* - * Special functions for i386. - */ - -subsystem -#if KERNEL_SERVER - KernelServer -#endif KERNEL_SERVER - mach_i386 3800; - -#include -#include -#include - -type device_list_t = ^array[] of device_t; - -type descriptor_t = struct[2] of int; -type descriptor_list_t = array[*] of descriptor_t; - -#undef i386 /* XXX! */ -import ; - -routine i386_io_port_add( - target_act : thread_act_t; - device : device_t); - -routine i386_io_port_remove( - target_act : thread_act_t; - device : device_t); - -routine i386_io_port_list( - target_act : thread_act_t; - out device_list : device_list_t); - -routine i386_set_ldt( - target_act : thread_act_t; - first_selector : int; - desc_list : descriptor_list_t -); - -routine i386_get_ldt( - target_act : thread_act_t; - first_selector : int; - selector_count : int; - out desc_list : descriptor_list_t); diff --git a/osfmk/mach/i386/mach_i386_types.h b/osfmk/mach/i386/mach_i386_types.h deleted file mode 100644 index b2023f05e..000000000 --- a/osfmk/mach/i386/mach_i386_types.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1994/09/23 02:37:30 ezf - * change marker to not FREE - * [1994/09/22 21:40:09 ezf] - * - * Revision 1.1.2.2 1993/06/02 23:44:21 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:21 jeffc] - * - * Revision 1.1 1992/09/30 02:30:48 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.1.1.1.2.1 92/03/03 16:21:41 jeffreyh - * New file from TRUNK - * [92/02/26 11:59:15 jeffreyh] - * - * Revision 2.2 92/01/03 20:20:01 dbg - * Created. - * [91/08/20 dbg] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -/* - */ - -/* - * Type definitions for i386 interface routines. - */ - -#ifndef _MACH_MACH_I386_TYPES_H_ -#define _MACH_MACH_I386_TYPES_H_ - -/* - * Array of devices. - */ -typedef device_t *device_list_t; - -/* - * i386 segment descriptor. - */ -struct descriptor { - unsigned int low_word; - unsigned int high_word; -}; - -typedef struct descriptor descriptor_t; -typedef struct descriptor *descriptor_list_t; - -#endif /* _MACH_MACH_I386_TYPES_H_ */ diff --git a/osfmk/mach/i386/machine_types.defs b/osfmk/mach/i386/machine_types.defs index 3b1376206..d6cb4af54 100644 --- a/osfmk/mach/i386/machine_types.defs +++ b/osfmk/mach/i386/machine_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,6 +22,7 @@ /* * @OSF_COPYRIGHT@ */ + /* * Header file for basic, machine-dependent data types. i386 version. */ @@ -64,6 +65,23 @@ type register_t = int32_t; type integer_t = int32_t; type natural_t = uint32_t; +/* + * These are the VM types that scale with the address + * space size of a given process. + */ +type vm_address_t = natural_t; +type vm_offset_t = natural_t; +type vm_size_t = natural_t; + +/* + * The mach_vm_xxx_t types are sized to hold the + * maximum pointer, offset, etc... supported on the + * platform. + */ +type mach_vm_address_t = uint32_t; +type mach_vm_offset_t = uint32_t; +type mach_vm_size_t = uint32_t; + #if MACH_IPC_COMPAT /* * For the old IPC interface @@ -72,5 +90,4 @@ type natural_t = uint32_t; #endif /* MACH_IPC_COMPAT */ - #endif /* _MACHINE_VM_TYPES_DEFS_ */ diff --git a/osfmk/mach/i386/ndr_def.h b/osfmk/mach/i386/ndr_def.h index 7a36022f1..f6af3476a 100644 --- a/osfmk/mach/i386/ndr_def.h +++ b/osfmk/mach/i386/ndr_def.h @@ -22,36 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:38 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 01:59:33 ezf - * change marker to not FREE - * [1994/09/22 21:25:24 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:29:06 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:06:33 jeffc] - * - * Revision 1.2 1993/04/19 16:15:32 devrcs - * Untyped ipc merge: - * New names for the fields - the structure isn't changed - * [1993/03/12 23:01:28 travos] - * Extended NDR record to include version number(s) - * [1993/03/05 23:09:51 travos] - * It initializes the NDR record. Included also by libmach - * [1993/02/17 21:58:01 travos] - * [1993/03/16 13:42:33 rod] - * - * $EndLog$ - */ - /* NDR record for Intel x86s */ diff --git a/osfmk/mach/i386/processor_info.h b/osfmk/mach/i386/processor_info.h index 8709e475a..8ca068dac 100644 --- a/osfmk/mach/i386/processor_info.h +++ b/osfmk/mach/i386/processor_info.h @@ -28,5 +28,4 @@ #ifndef _MACH_I386_PROCESSOR_INFO_H_ #define _MACH_I386_PROCESSOR_INFO_H_ - #endif /* _MACH_I386_PROCESSOR_INFO_H_ */ diff --git a/osfmk/mach/i386/rpc.h b/osfmk/mach/i386/rpc.h index 1f8b879b8..c1d3aa404 100644 --- a/osfmk/mach/i386/rpc.h +++ b/osfmk/mach/i386/rpc.h @@ -22,7 +22,9 @@ /* * @OSF_COPYRIGHT@ */ -#ifndef _MACH_I386_RPC_H_ -#define _MACH_I386_RPC_H_ -#endif /* _MACH_I386_RPC_H_ */ +#ifndef _MACH_I386_RPC_H_ +#define _MACH_I386_RPC_H_ + +#endif _MACH_I386_RPC_H_ + diff --git a/osfmk/mach/i386/syscall_sw.h b/osfmk/mach/i386/syscall_sw.h index 16104a874..1ee827e0f 100644 --- a/osfmk/mach/i386/syscall_sw.h +++ b/osfmk/mach/i386/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,94 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.2 1998/04/29 17:36:36 mburg - * MK7.3 merger - * - * Revision 1.2.22.1 1998/02/03 09:32:55 gdt - * Merge up to MK7.3 - * [1998/02/03 09:17:02 gdt] - * - * Revision 1.2.20.1 1997/06/17 03:00:55 devrcs - * RPC Enhancements. - * Added new definition of the rpc_return_trap. - * [1996/04/26 21:53:51 yfei] - * - * Revision 1.2.17.2 1996/02/16 00:07:27 yfei - * Merged NSWC based RPC enhancements into MK7_MAIN. - * - * Revision 1.2.12.2 1994/09/23 02:37:42 ezf - * change marker to not FREE - * [1994/09/22 21:40:17 ezf] - * - * Revision 1.2.12.1 1994/08/26 20:48:44 watkins - * Merge with rt2_shared. - * [1994/08/26 18:38:55 watkins] - * - * Revision 1.2.9.1 1994/07/18 22:03:32 burke - * Check-in for merge. - * [1994/07/15 21:04:49 burke] - * - * Revision 1.2.7.3 1994/07/05 14:28:23 watkins - * Merge with rpc. - * [1994/07/05 14:27:30 watkins] - * - * Revision 1.2.6.1 1994/05/18 21:18:29 watkins - * Add macro for rpc call gate. - * [1994/05/18 21:16:19 watkins] - * - * Revision 1.2.2.2 1993/06/09 02:40:45 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:24 jeffc] - * - * Revision 1.2 1993/04/19 16:34:14 devrcs - * Fixes for ANSI C - * [1993/02/26 13:35:10 sp] - * - * Revision 1.1 1992/09/30 02:30:50 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/05/14 16:52:22 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:32:17 mrt - * Changed to new Mach copyright - * [91/02/01 17:10:01 mrt] - * - * Revision 2.3 90/12/05 23:46:16 af - * Made GNU preproc happy. - * - * Revision 2.2 90/05/03 15:48:01 dbg - * Created. - * [90/04/30 16:36:25 dbg] - * - * Revision 1.3.1.1 89/12/22 22:22:03 rvb - * Use asm.h - * [89/12/22 rvb] - * - * Revision 1.3 89/03/09 20:19:53 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:01:00 gm0w - * Changes for cleanup. - * - * 31-Dec-88 Robert Baron (rvb) at Carnegie-Mellon University - * Derived from MACH2.0 vax release. - * - * 1-Sep-86 Michael Young (mwyoung) at Carnegie-Mellon University - * Created from mach_syscalls.h in the user library sources. - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University @@ -138,13 +50,14 @@ /* */ +#ifdef PRIVATE + #ifndef _MACH_I386_SYSCALL_SW_H_ #define _MACH_I386_SYSCALL_SW_H_ #include #define MACHCALLSEL $0x07 -#define RPCCALLSEL $0x0f #define kernel_trap(trap_name,trap_number,number_args) \ LEAF(_##trap_name,0) ;\ @@ -152,17 +65,6 @@ LEAF(_##trap_name,0) ;\ lcall MACHCALLSEL, $0 ;\ END(_##trap_name) -#define rpc_trap(trap_name,trap_number,number_args) \ -LEAF(_##trap_name,0) ;\ - movl $##trap_number,%eax; \ - lcall RPCCALLSEL, $0 ;\ -END(_##trap_name) - -#define rpc_return_trap(trap_name,trap_number,number_args) \ -LEAF(_##trap_name,0) ;\ - movl %eax, %ecx; \ - movl $##trap_number,%eax; \ - lcall RPCCALLSEL, $0 ;\ -END(_##trap_name) - #endif /* _MACH_I386_SYSCALL_SW_H_ */ + +#endif /* PRIVATE */ diff --git a/osfmk/mach/i386/thread_state.h b/osfmk/mach/i386/thread_state.h index d46d667d9..c043c69d0 100644 --- a/osfmk/mach/i386/thread_state.h +++ b/osfmk/mach/i386/thread_state.h @@ -22,29 +22,7 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:47 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.3 1995/01/10 05:16:26 devrcs - * mk6 CR801 - copyright marker not FREE_ - * [1994/12/01 19:25:21 dwm] - * - * Revision 1.1.6.1 1994/08/07 20:48:54 bolinger - * Merge up to colo_b7. - * [1994/08/01 21:01:26 bolinger] - * - * Revision 1.1.4.1 1994/06/25 03:47:07 dwm - * mk6 CR98 - new file to hold MD THREAD_STATE_MAX - * [1994/06/24 21:54:48 dwm] - * - * $EndLog$ - */ + #ifndef _MACH_I386_THREAD_STATE_H_ #define _MACH_I386_THREAD_STATE_H_ diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index 5b109ae16..b41e6e7b7 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,6 +61,7 @@ #ifndef _MACH_I386_THREAD_STATUS_H_ #define _MACH_I386_THREAD_STATUS_H_ +#include #include #include #include /* FIXME */ @@ -133,8 +134,8 @@ struct i386_new_thread_state { unsigned int uesp; unsigned int ss; }; -#define i386_NEW_THREAD_STATE_COUNT \ - (sizeof (struct i386_new_thread_state)/sizeof(unsigned int)) +#define i386_NEW_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct i386_new_thread_state)/sizeof(unsigned int))) /* * Subset of saved state stored by processor on kernel-to-kernel @@ -202,7 +203,8 @@ struct i386_saved_state { * function call args from the stack, for * efficient syscall exceptions */ }; -#define i386_SAVED_STATE_COUNT (sizeof (struct i386_saved_state)/sizeof(unsigned int)) +#define i386_SAVED_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct i386_saved_state)/sizeof(unsigned int))) #define i386_REGS_SEGS_STATE_COUNT i386_SAVED_STATE_COUNT /* @@ -251,12 +253,12 @@ struct i386_float_state { unsigned char hw_state[FP_STATE_BYTES]; /* actual "hardware" state */ int exc_status; /* exception status (readonly) */ }; -#define i386_FLOAT_STATE_COUNT \ - (sizeof(struct i386_float_state)/sizeof(unsigned int)) +#define i386_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct i386_float_state)/sizeof(unsigned int))) -#define FP_old_STATE_BYTES \ - (sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs)) +#define FP_old_STATE_BYTES ((mach_msg_type_number_t) \ + (sizeof (struct i386_fp_save) + sizeof (struct i386_fp_regs))) struct i386_old_float_state { int fpkind; /* FP_NO..FP_387 (readonly) */ @@ -264,8 +266,8 @@ struct i386_old_float_state { unsigned char hw_state[FP_old_STATE_BYTES]; /* actual "hardware" state */ int exc_status; /* exception status (readonly) */ }; -#define i386_old_FLOAT_STATE_COUNT \ - (sizeof(struct i386_old_float_state)/sizeof(unsigned int)) +#define i386_old_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct i386_old_float_state)/sizeof(unsigned int))) #define PORT_MAP_BITS 0x400 @@ -273,8 +275,8 @@ struct i386_isa_port_map_state { unsigned char pm[PORT_MAP_BITS>>3]; }; -#define i386_ISA_PORT_MAP_STATE_COUNT \ - (sizeof(struct i386_isa_port_map_state)/sizeof(unsigned int)) +#define i386_ISA_PORT_MAP_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct i386_isa_port_map_state)/sizeof(unsigned int))) /* * V8086 assist supplies a pointer to an interrupt @@ -291,8 +293,8 @@ struct v86_interrupt_table { unsigned short vec; /* vector to take */ }; -#define i386_V86_ASSIST_STATE_COUNT \ - (sizeof(struct i386_v86_assist_state)/sizeof(unsigned int)) +#define i386_V86_ASSIST_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct i386_v86_assist_state)/sizeof(unsigned int))) struct thread_syscall_state { unsigned eax; @@ -302,8 +304,8 @@ struct thread_syscall_state { unsigned esp; }; -#define i386_THREAD_SYSCALL_STATE_COUNT \ - (sizeof(struct thread_syscall_state) / sizeof(unsigned int)) +#define i386_THREAD_SYSCALL_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct thread_syscall_state) / sizeof(unsigned int))) /* * Main thread state consists of @@ -332,8 +334,8 @@ typedef struct { unsigned int gs; } i386_thread_state_t; -#define i386_THREAD_STATE_COUNT \ - ( sizeof (i386_thread_state_t) / sizeof (int) ) +#define i386_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_thread_state_t) / sizeof (int) )) /* * Default segment register values. @@ -357,8 +359,8 @@ typedef struct { fp_stack_t stack; } i386_thread_fpstate_t; -#define i386_THREAD_FPSTATE_COUNT \ - ( sizeof (i386_thread_fpstate_t) / sizeof (int) ) +#define i386_THREAD_FPSTATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_thread_fpstate_t) / sizeof (int) )) /* * Extra state that may be @@ -372,8 +374,8 @@ typedef struct { err_code_t err; } i386_thread_exceptstate_t; -#define i386_THREAD_EXCEPTSTATE_COUNT \ - ( sizeof (i386_thread_exceptstate_t) / sizeof (int) ) +#define i386_THREAD_EXCEPTSTATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_thread_exceptstate_t) / sizeof (int) )) /* * Per-thread variable used @@ -386,7 +388,7 @@ typedef struct { unsigned int self; } i386_thread_cthreadstate_t; -#define i386_THREAD_CTHREADSTATE_COUNT \ - ( sizeof (i386_thread_cthreadstate_t) / sizeof (int) ) +#define i386_THREAD_CTHREADSTATE_COUNT ((mach_msg_type_number_t) \ + ( sizeof (i386_thread_cthreadstate_t) / sizeof (int) )) #endif /* _MACH_I386_THREAD_STATUS_H_ */ diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h index 822a8c21a..abc489418 100644 --- a/osfmk/mach/i386/vm_param.h +++ b/osfmk/mach/i386/vm_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -84,19 +84,23 @@ #ifndef _MACH_I386_VM_PARAM_H_ #define _MACH_I386_VM_PARAM_H_ -#define BYTE_SIZE 8 /* byte size in bits */ +#define BYTE_SIZE 8 /* byte size in bits */ #define I386_PGBYTES 4096 /* bytes per 80386 page */ -#define I386_PGSHIFT 12 /* number of bits to shift for pages */ +#define I386_PGSHIFT 12 /* number of bits to shift for pages */ + +#define PAGE_SIZE I386_PGBYTES +#define PAGE_SHIFT I386_PGSHIFT +#define PAGE_MASK (PAGE_SIZE - 1) /* * Convert bytes to pages and convert pages to bytes. * No rounding is used. */ -#define i386_btop(x) (((unsigned)(x)) >> I386_PGSHIFT) +#define i386_btop(x) (((pmap_paddr_t)(x)) >> I386_PGSHIFT) #define machine_btop(x) i386_btop(x) -#define i386_ptob(x) (((unsigned)(x)) << I386_PGSHIFT) +#define i386_ptob(x) (((pmap_paddr_t)(x)) << I386_PGSHIFT) /* * Round off or truncate to the nearest page. These will work @@ -104,26 +108,41 @@ * bytes. */ -#define i386_round_page(x) ((((unsigned)(x)) + I386_PGBYTES - 1) & \ +#define i386_round_page(x) ((((pmap_paddr_t)(x)) + I386_PGBYTES - 1) & \ ~(I386_PGBYTES-1)) -#define i386_trunc_page(x) (((unsigned)(x)) & ~(I386_PGBYTES-1)) +#define i386_trunc_page(x) (((pmap_paddr_t)(x)) & ~(I386_PGBYTES-1)) #define VM_MAX_PAGE_ADDRESS 0x00000000C0000000ULL +/* system-wide values */ +#define MACH_VM_MIN_ADDRESS ((mach_vm_offset_t) 0) +#define MACH_VM_MAX_ADDRESS ((mach_vm_offset_t) VM_MAX_PAGE_ADDRESS) + +/* process-relative values (all 32-bit legacy only for now) */ #define VM_MIN_ADDRESS ((vm_offset_t) 0) #define VM_MAX_ADDRESS ((vm_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) -#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0xc0000000) +#ifdef KERNEL_PRIVATE -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00000000U) -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0x3fffffffU) - -#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x0c000000U) -#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1fffffffU) +/* Kernel-wide values */ +#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0xC0000000U) +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFfffffffU) +#define KERNEL_STACK_SIZE (I386_PGBYTES*4) /* FIXME - always leave like this? */ #define INTSTACK_SIZE (I386_PGBYTES*4) -#define KERNEL_STACK_SIZE (I386_PGBYTES*4) + +#ifdef MACH_KERNEL_PRIVATE + +/* For implementing legacy 32-bit interfaces */ +#define VM32_SUPPORT +#define VM32_MIN_ADDRESS ((vm32_offset_t) 0) +#define VM32_MAX_ADDRESS ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) + +#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0xc0000000) + +#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x0c000000U) +#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1fffffffU) /* * Conversion between 80386 pages and VM pages @@ -133,55 +152,31 @@ #define round_i386_to_vm(p) (atop(round_page(i386_ptob(p)))) #define vm_to_i386(p) (i386_btop(ptoa(p))) -/* - * Physical memory is mapped 1-1 with virtual memory starting - * at VM_MIN_KERNEL_ADDRESS. - */ -#define phystokv(a) ((vm_offset_t)(a) + VM_MIN_KERNEL_ADDRESS) - -/* - * For 386 only, ensure that pages are installed in the - * kernel_pmap with VM_PROT_WRITE enabled. This avoids - * code in pmap_enter that disallows a read-only mapping - * in the kernel's pmap. (See ri-osc CR1387.) - * - * An entry in kernel_pmap is made only by the kernel or - * a collocated server -- by definition (;-)), the requester - * is trusted code. If it asked for read-only access, - * it won't attempt a write. We don't have to enforce the - * restriction. (Naturally, this assumes that any collocated - * server will _not_ depend on trapping write accesses to pages - * mapped read-only; this cannot be made to work in the current - * i386-inspired pmap model.) - */ - -/*#if defined(AT386) - -#define PMAP_ENTER_386_CHECK \ - if (cpuid_family == CPUID_FAMILY_386) - -#else -- FIXME? We're only running on Pentiums or better */ - -#define PMAP_ENTER_386_CHECK - -/*#endif*/ - #define PMAP_ENTER(pmap, virtual_address, page, protection, flags, wired) \ MACRO_BEGIN \ + pmap_t __pmap = (pmap); \ + vm_page_t __page = (page); \ vm_prot_t __prot__ = \ (protection) & ~(page)->page_lock; \ \ - PMAP_ENTER_386_CHECK \ - if ((pmap) == kernel_pmap) \ + if (__pmap == kernel_pmap) { \ __prot__ |= VM_PROT_WRITE; \ + } else { \ + assert(!__page->encrypted); \ + } \ + \ pmap_enter( \ - (pmap), \ + __pmap, \ (virtual_address), \ - (page)->phys_page, \ + __page->phys_page, \ __prot__, \ flags, \ (wired) \ ); \ MACRO_END +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + #endif /* _MACH_I386_VM_PARAM_H_ */ diff --git a/osfmk/mach/i386/vm_types.h b/osfmk/mach/i386/vm_types.h index 0ad257fcb..f432e35a6 100644 --- a/osfmk/mach/i386/vm_types.h +++ b/osfmk/mach/i386/vm_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,50 +61,91 @@ #ifndef _MACH_I386_VM_TYPES_H_ #define _MACH_I386_VM_TYPES_H_ -#ifdef ASSEMBLER -#else /* ASSEMBLER */ +#ifndef ASSEMBLER -/* - * A natural_t is the type for the native - * integer type, e.g. 32 or 64 or.. whatever - * register size the machine has. Unsigned, it is - * used for entities that might be either - * unsigned integers or pointers, and for - * type-casting between the two. - * For instance, the IPC system represents - * a port in user space as an integer and - * in kernel space as a pointer. - */ -typedef unsigned int natural_t; +#include +#include /* - * An integer_t is the signed counterpart - * of the natural_t type. Both types are - * only supposed to be used to define - * other types in a machine-independent - * way. + * natural_t and integer_t are Mach's legacy types for machine- + * independent integer types (unsigned, and signed, respectively). + * Their original purpose was to define other types in a machine/ + * compiler independent way. + * + * They also had an implicit "same size as pointer" characteristic + * to them (i.e. Mach's traditional types are very ILP32 or ILP64 + * centric). We will likely support x86 ABIs that do not follow + * either ofthese models (specifically LP64). Therefore, we had to + * make a choice between making these types scale with pointers or stay + * tied to integers. Because their use is predominantly tied to + * to the size of an integer, we are keeping that association and + * breaking free from pointer size guarantees. + * + * New use of these types is discouraged. */ -typedef int integer_t; +typedef __darwin_natural_t natural_t; +typedef int integer_t; /* * A vm_offset_t is a type-neutral pointer, * e.g. an offset into a virtual memory space. */ -typedef natural_t vm_offset_t; +typedef natural_t vm_offset_t; /* * A vm_size_t is the proper type for e.g. * expressing the difference between two * vm_offset_t entities. */ -typedef natural_t vm_size_t; +typedef natural_t vm_size_t; -#endif /* ASSEMBLER */ +/* + * This new type is independent of a particular vm map's + * implementation size - and represents appropriate types + * for all possible maps. This is used for interfaces + * where the size of the map is not known - or we don't + * want to have to distinguish. + */ +typedef uint32_t mach_vm_address_t; +typedef uint32_t mach_vm_offset_t; +typedef uint32_t mach_vm_size_t; + +/* LP64todo - convert these over for good */ +#if 0 +typedef uint64_t vm_map_offset_t; +typedef uint64_t vm_map_address_t; +typedef uint64_t vm_map_size_t; +#define VM_MAP_MIN_ADDRESS MACH_VM_MIN_ADDRESS +#define VM_MAP_MAX_ADDRESS MACH_VM_MAX_ADDRESS +#else +typedef uint32_t vm_map_offset_t; +typedef uint32_t vm_map_address_t; +typedef uint32_t vm_map_size_t; +#define VM_MAP_MIN_ADDRESS VM_MIN_ADDRESS +#define VM_MAP_MAX_ADDRESS VM_MAX_ADDRESS +#endif + +#ifdef MACH_KERNEL_PRIVATE + +#ifdef VM32_SUPPORT /* - * If composing messages by hand (please dont) + * These are types used internal to Mach to implement the + * legacy 32-bit VM APIs published by the kernel. */ +typedef uint32_t vm32_address_t; +typedef uint32_t vm32_offset_t; +typedef uint32_t vm32_size_t; + +#endif /* VM32_SUPPORT */ + +#endif /* MACH_KERNEL_PRIVATE */ +#endif /* ASSEMBLER */ + +/* + * If composing messages by hand (please do not) + */ #define MACH_MSG_TYPE_INTEGER_T MACH_MSG_TYPE_INTEGER_32 #endif /* _MACH_I386_VM_TYPES_H_ */ diff --git a/osfmk/mach/kern_return.h b/osfmk/mach/kern_return.h index c347f1e3d..f2b1fcc70 100644 --- a/osfmk/mach/kern_return.h +++ b/osfmk/mach/kern_return.h @@ -63,11 +63,6 @@ #include -/* - * N.B.: If you add errors, please update - * mach_services/lib/libmach/err_kern.sub - */ - #define KERN_SUCCESS 0 #define KERN_INVALID_ADDRESS 1 diff --git a/osfmk/mach/kmod.h b/osfmk/mach/kmod.h index 49d0f0103..7b75bb452 100644 --- a/osfmk/mach/kmod.h +++ b/osfmk/mach/kmod.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,21 +19,13 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 1999 Mar 29 rsulack created. - */ #ifndef _MACH_KMOD_H_ #define _MACH_KMOD_H_ -#include #include -#ifdef __APPLE_API_PRIVATE +#include #define KMOD_CNTL_START 1 // call kmod's start routine #define KMOD_CNTL_STOP 2 // call kmod's stop routine @@ -45,23 +37,25 @@ #define KMOD_UNPACK_FROM_ID(i) ((unsigned long)i >> 16) #define KMOD_UNPACK_TO_ID(i) ((unsigned long)i & 0xffff) -#endif /* __APPLE_API_PRIVATE */ - -#define KMOD_MAX_NAME 64 - -#ifdef __APPLE_API_PRIVATE - typedef int kmod_t; typedef int kmod_control_flavor_t; typedef void* kmod_args_t; -#endif /* __APPLE_API_PRIVATE */ +#define KMOD_MAX_NAME 64 + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif +/* LP64todo - not 64-bit safe */ typedef struct kmod_reference { struct kmod_reference *next; struct kmod_info *info; } kmod_reference_t; +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif /**************************************************************************************/ /* warning any changes to this structure affect the following macros. */ @@ -73,6 +67,12 @@ typedef struct kmod_reference { typedef kern_return_t kmod_start_func_t(struct kmod_info *ki, void *data); typedef kern_return_t kmod_stop_func_t(struct kmod_info *ki, void *data); +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + +/* LP64todo - not 64-bit safe */ + typedef struct kmod_info { struct kmod_info *next; int info_version; // version of this structure @@ -88,12 +88,12 @@ typedef struct kmod_info { kmod_stop_func_t *stop; } kmod_info_t; -#ifdef __APPLE_API_PRIVATE +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif typedef kmod_info_t *kmod_info_array_t; -#endif /* __APPLE_API_PRIVATE */ - #define KMOD_INFO_NAME kmod_info #define KMOD_INFO_VERSION 1 @@ -122,8 +122,6 @@ typedef kmod_info_t *kmod_info_array_t; // kmod kernel to user commands // ************************************************************************************* -#ifdef __APPLE_API_PRIVATE - #define KMOD_LOAD_EXTENSION_PACKET 1 #define KMOD_LOAD_WITH_DEPENDENCIES_PACKET 2 @@ -147,33 +145,35 @@ typedef struct kmod_generic_cmd { char data[1]; } kmod_generic_cmd_t; -#ifdef KERNEL_PRIVATE - -extern void kmod_init(); - -extern kern_return_t kmod_create_fake(const char *name, const char *version); +#ifdef KERNEL_PRIVATE extern kmod_info_t *kmod_lookupbyname(const char * name); extern kmod_info_t *kmod_lookupbyid(kmod_t id); extern kmod_info_t *kmod_lookupbyname_locked(const char * name); extern kmod_info_t *kmod_lookupbyid_locked(kmod_t id); +extern kmod_start_func_t kmod_default_start; +extern kmod_stop_func_t kmod_default_stop; + +__BEGIN_DECLS +extern void kmod_init(void); + +extern kern_return_t kmod_create_fake(const char *name, const char *version); +extern kern_return_t kmod_create_fake_with_address(const char *name, const char *version, + vm_address_t address, vm_size_t size, + int * return_id); +extern kern_return_t kmod_destroy_fake(kmod_t id); extern kern_return_t kmod_load_extension(char *name); extern kern_return_t kmod_load_extension_with_dependencies(char *name, char **dependencies); extern kern_return_t kmod_send_generic(int type, void *data, int size); -extern kmod_start_func_t kmod_default_start; -extern kmod_stop_func_t kmod_default_stop; - extern kern_return_t kmod_initialize_cpp(kmod_info_t *info); extern kern_return_t kmod_finalize_cpp(kmod_info_t *info); -extern void kmod_dump(vm_offset_t *addr, unsigned int cnt); - -#endif /* KERNEL_PRIVATE */ - -#endif /* __APPLE_API_PRIVATE */ +extern void kmod_dump(vm_offset_t *addr, unsigned int dump_cnt); +__END_DECLS +#endif /* KERNEL_PRIVATE */ #endif /* _MACH_KMOD_H_ */ diff --git a/osfmk/mach/mach_host.defs b/osfmk/mach/mach_host.defs index a5b87e24d..fb062cde1 100644 --- a/osfmk/mach/mach_host.defs +++ b/osfmk/mach/mach_host.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -103,7 +103,7 @@ routine host_kernel_version( */ routine host_page_size( host : host_t; - out page_size : vm_size_t); + out out_page_size : vm_size_t); /* * Allow pagers to create named entries that point to un-mapped @@ -124,10 +124,10 @@ routine mach_memory_object_memory_entry( * The returned data is an OOL array of processor info. */ routine host_processor_info( - host : host_t; - flavor : processor_flavor_t; - out processor_count : natural_t; - out processor_info : processor_info_array_t); + host : host_t; + flavor : processor_flavor_t; + out out_processor_count : natural_t; + out out_processor_info : processor_info_array_t); /* * Return host IO master access port @@ -238,3 +238,9 @@ routine host_request_notification( host : host_t; notify_type : host_flavor_t; notify_port : mach_port_make_send_once_t); + +routine host_lockgroup_info( + host : host_t; + out lockgroup_info : lockgroup_info_array_t, + Dealloc); + diff --git a/osfmk/mach/mach_interface.h b/osfmk/mach/mach_interface.h index 6b0a54fb7..91b4b2bac 100644 --- a/osfmk/mach/mach_interface.h +++ b/osfmk/mach/mach_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,7 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (C) Apple Computer 1998 - * ALL Rights Reserved - */ -/* - * This file represents the interfaces that used to come - * from creating the user headers from the mach.defs file. - * Because mach.defs was decomposed, this file now just - * wraps up all the new interface headers generated from - * each of the new .defs resulting from that decomposition. - */ + #ifndef _MACH_INTERFACE_H_ #define _MACH_INTERFACE_H_ @@ -43,10 +33,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -55,6 +41,15 @@ #include #include -#endif /* _MACH_INTERFACE_H_ */ - +#ifdef XNU_KERNEL_PRIVATE +/* + * Raw EMMI interfaces are private to xnu + * and subject to change. + */ +#include +#include +#include +#include +#endif +#endif /* _MACH_INTERFACE_H_ */ diff --git a/osfmk/mach/mach_param.h b/osfmk/mach/mach_param.h index f82d2b3ff..c68789a1e 100644 --- a/osfmk/mach/mach_param.h +++ b/osfmk/mach/mach_param.h @@ -22,82 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:30 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.16.2 1994/09/23 02:38:50 ezf - * change marker to not FREE - * [1994/09/22 21:40:58 ezf] - * - * Revision 1.2.16.1 1994/06/13 20:49:40 dlb - * Merge MK6 and NMK17 - * [1994/06/13 20:47:55 dlb] - * - * Revision 1.2.7.1 1994/03/11 15:26:48 bernadat - * Do not account exception ports as registered ports. - * [94/03/11 bernadat] - * - * Revision 1.2.2.4 1993/08/05 19:09:19 jeffc - * CR9508 - Delete dead code. Remove MACH_IPC_COMPAT - * [1993/08/03 17:09:06 jeffc] - * - * Revision 1.2.2.3 1993/08/03 18:29:29 gm - * CR9596: Change KERNEL to MACH_KERNEL. - * [1993/08/02 18:04:55 gm] - * - * Revision 1.2.2.2 1993/06/09 02:41:29 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:53 jeffc] - * - * Revision 1.2 1993/04/19 16:35:13 devrcs - * Fixes for ANSI C - * [1993/02/26 13:30:09 sp] - * - * Updated to new exception interface. - * [1992/12/23 13:09:02 david] - * - * Revision 1.1 1992/09/30 02:31:14 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4.2.1 92/03/03 16:22:03 jeffreyh - * Changes from TRUNK - * [92/02/26 12:02:58 jeffreyh] - * - * Revision 2.5 92/01/15 13:44:51 rpd - * Changed MACH_IPC_COMPAT conditionals to default to not present. - * - * Revision 2.4 91/05/14 16:54:40 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:33:28 mrt - * Changed to new Mach copyright - * [91/02/01 17:18:01 mrt] - * - * Revision 2.2 90/06/02 14:58:21 rpd - * Created. - * [90/03/26 23:56:39 rpd] - * - * - * Condensed history: - * Moved implementation constants elsewhere (rpd). - * Added SET_MAX (rpd). - * Added KERN_MSG_SMALL_SIZE (mwyoung). - * Added PORT_BACKLOG_MAX (mwyoung). - * Added PORT_BACKLOG_MAX (mwyoung). - * Added TASK_PORT_REGISTER_MAX (mwyoung). - * Created (mwyoung). - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University diff --git a/osfmk/mach/mach_port.defs b/osfmk/mach/mach_port.defs index 4590e0fc7..5da6fc755 100644 --- a/osfmk/mach/mach_port.defs +++ b/osfmk/mach/mach_port.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -378,7 +378,7 @@ routine mach_port_get_srights( */ routine mach_port_space_info( task : ipc_space_t; - out info : ipc_info_space_t; + out space_info : ipc_info_space_t; out table_info : ipc_info_name_array_t, Dealloc; out tree_info : ipc_info_tree_name_array_t, @@ -393,8 +393,8 @@ routine mach_port_space_info( routine mach_port_dnrequest_info( task : ipc_space_t; name : mach_port_name_t; - out total : unsigned; /* total size of table */ - out used : unsigned); /* amount used */ + out dnr_total : unsigned; /* total size of table */ + out dnr_used : unsigned); /* amount used */ /* * Return the type and address of the kernel object diff --git a/osfmk/mach/mach_syscalls.h b/osfmk/mach/mach_syscalls.h index 9b9eb91cb..dcf9dc2da 100644 --- a/osfmk/mach/mach_syscalls.h +++ b/osfmk/mach/mach_syscalls.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,22 +26,6 @@ #ifndef _MACH_MACH_SYSCALLS_H_ #define _MACH_MACH_SYSCALLS_H_ -#include -#include -#include -#include -#include - -extern kern_return_t clock_sleep_trap( - mach_port_name_t clock_name, - sleep_type_t sleep_type, - int sleep_sec, - int sleep_nsec, - mach_timespec_t *wakeup_time); - -extern kern_return_t thread_switch( - mach_port_name_t thread_name, - int option, - mach_msg_timeout_t option_time); +#include #endif /* _MACH_MACH_SYSCALLS_H_ */ diff --git a/osfmk/mach/mach_time.h b/osfmk/mach/mach_time.h index fe70975e0..e1bb2d17c 100644 --- a/osfmk/mach/mach_time.h +++ b/osfmk/mach/mach_time.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2001-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,24 +19,13 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 30 January 2001 (debo) - * Created. - */ #ifndef _MACH_MACH_TIME_H_ #define _MACH_MACH_TIME_H_ #include -uint64_t mach_absolute_time(void); - -kern_return_t mach_wait_until( - uint64_t deadline); +#include struct mach_timebase_info { uint32_t numer; @@ -46,7 +35,18 @@ struct mach_timebase_info { typedef struct mach_timebase_info *mach_timebase_info_t; typedef struct mach_timebase_info mach_timebase_info_data_t; +__BEGIN_DECLS +#ifndef KERNEL + kern_return_t mach_timebase_info( mach_timebase_info_t info); +kern_return_t mach_wait_until( + uint64_t deadline); + +#endif /* KERNEL */ + +uint64_t mach_absolute_time(void); +__END_DECLS + #endif /* _MACH_MACH_TIME_H_ */ diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h index 888eda988..660b641ef 100644 --- a/osfmk/mach/mach_traps.h +++ b/osfmk/mach/mach_traps.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -52,92 +52,507 @@ /* * Definitions of general Mach system traps. * - * IPC traps are defined in . + * These are the definitions as seen from user-space. + * The kernel definitions are in . * Kernel RPC functions are defined in . */ #ifndef _MACH_MACH_TRAPS_H_ #define _MACH_MACH_TRAPS_H_ +#include + +#include +#include #include #include #include #include -mach_port_name_t mach_reply_port(void); +#include + +#include + +__BEGIN_DECLS + +#ifndef KERNEL + +#ifdef PRIVATE -mach_port_name_t thread_self_trap(void); +extern mach_port_name_t mach_reply_port(void); -mach_port_name_t task_self_trap(void); +extern mach_port_name_t thread_self_trap(void); -mach_port_name_t host_self_trap(void); +extern mach_port_name_t host_self_trap(void); -kern_return_t semaphore_signal_trap( +extern mach_msg_return_t mach_msg_trap( + mach_msg_header_t *msg, + mach_msg_option_t option, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size, + mach_port_name_t rcv_name, + mach_msg_timeout_t timeout, + mach_port_name_t notify); + +extern mach_msg_return_t mach_msg_overwrite_trap( + mach_msg_header_t *msg, + mach_msg_option_t option, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size, + mach_port_name_t rcv_name, + mach_msg_timeout_t timeout, + mach_port_name_t notify, + mach_msg_header_t *rcv_msg, + mach_msg_size_t rcv_limit); + +extern kern_return_t semaphore_signal_trap( mach_port_name_t signal_name); -kern_return_t semaphore_signal_all_trap( +extern kern_return_t semaphore_signal_all_trap( mach_port_name_t signal_name); -kern_return_t semaphore_signal_thread_trap( +extern kern_return_t semaphore_signal_thread_trap( mach_port_name_t signal_name, mach_port_name_t thread_name); -kern_return_t semaphore_wait_trap( +extern kern_return_t semaphore_wait_trap( mach_port_name_t wait_name); -kern_return_t semaphore_timedwait_trap( +extern kern_return_t semaphore_wait_signal_trap( mach_port_name_t wait_name, - unsigned int sec, - clock_res_t nsec); + mach_port_name_t signal_name); -kern_return_t semaphore_wait_signal_trap( +extern kern_return_t semaphore_timedwait_trap( mach_port_name_t wait_name, - mach_port_name_t signal_name); + unsigned int sec, + clock_res_t nsec); -kern_return_t semaphore_timedwait_signal_trap( +extern kern_return_t semaphore_timedwait_signal_trap( mach_port_name_t wait_name, mach_port_name_t signal_name, unsigned int sec, clock_res_t nsec); -kern_return_t init_process(void); - -kern_return_t map_fd( - int fd, - vm_offset_t offset, - vm_offset_t *va, - boolean_t findspace, - vm_size_t size); - -kern_return_t task_for_pid( - mach_port_t target_tport, - int pid, - mach_port_t *t); - -kern_return_t pid_for_task( - mach_port_t t, - int *x); - -kern_return_t macx_swapon( - char *name, - int flags, - int size, - int priority); - -kern_return_t macx_swapoff( - char *name, - int flags); - -kern_return_t macx_triggers( - int hi_water, - int low_water, - int flags, - mach_port_t alert_port); - -kern_return_t macx_backing_store_suspend( - boolean_t suspend); - -kern_return_t macx_backing_store_recovery( - int pid); +#if !defined(__LP64__) +/* these should go away altogether - so no 64 legacy please */ + +extern kern_return_t init_process(void); + +#endif /* !defined(__LP64__) */ + +#if !defined(__LP64__) + +/* more that should go away so no 64-bit legacy please */ +extern kern_return_t macx_swapon( + char *filename, + int flags, + int size, + int priority); + +extern kern_return_t macx_swapoff( + char *filename, + int flags); + +extern kern_return_t macx_triggers( + int hi_water, + int low_water, + int flags, + mach_port_t alert_port); + +extern kern_return_t macx_backing_store_suspend( + boolean_t suspend); + +extern kern_return_t macx_backing_store_recovery( + int pid); + +#endif /* !defined(__LP64__) */ + +extern kern_return_t clock_sleep_trap( + mach_port_name_t clock_name, + sleep_type_t sleep_type, + int sleep_sec, + int sleep_nsec, + mach_timespec_t *wakeup_time); + +#endif /* PRIVATE */ + +extern boolean_t swtch_pri(int pri); + +extern boolean_t swtch(void); + +extern kern_return_t thread_switch( + mach_port_name_t thread_name, + int option, + mach_msg_timeout_t option_time); + +extern mach_port_name_t task_self_trap(void); + +/* + * Obsolete interfaces. + */ + +extern kern_return_t task_for_pid( + mach_port_name_t target_tport, + int pid, + mach_port_name_t *t); + +extern kern_return_t pid_for_task( + mach_port_name_t t, + int *x); + +#if !defined(__LP64__) +/* these should go away altogether - so no 64 legacy please */ + +extern kern_return_t map_fd( + int fd, + vm_offset_t offset, + vm_offset_t *va, + boolean_t findspace, + vm_size_t size); + +#endif /* !defined(__LP64__) */ + +#else /* KERNEL */ + +#ifdef XNU_KERNEL_PRIVATE + +/* Syscall data translations routines */ +#ifdef __ppc__ +#define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ + ? 0 : sizeof(uint64_t) - sizeof(t)) +#else +#define PAD_(t) (sizeof(register_t) <= sizeof(t) \ + ? 0 : sizeof(register_t) - sizeof(t)) +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +#define PADL_(t) 0 +#define PADR_(t) PAD_(t) +#else +#define PADL_(t) PAD_(t) +#define PADR_(t) 0 +#endif + +#define PAD_ARG_(arg_type, arg_name) \ + char arg_name##_l_[PADL_(arg_type)]; arg_type arg_name; char arg_name##_r_[PADR_(arg_type)]; + +#ifndef __MUNGE_ONCE +#define __MUNGE_ONCE +#ifdef __ppc__ +void munge_w(const void *, void *); +void munge_ww(const void *, void *); +void munge_www(const void *, void *); +void munge_wwww(const void *, void *); +void munge_wwwww(const void *, void *); +void munge_wwwwww(const void *, void *); +void munge_wwwwwww(const void *, void *); +void munge_wwwwwwww(const void *, void *); +void munge_d(const void *, void *); +void munge_dd(const void *, void *); +void munge_ddd(const void *, void *); +void munge_dddd(const void *, void *); +void munge_ddddd(const void *, void *); +void munge_dddddd(const void *, void *); +void munge_ddddddd(const void *, void *); +void munge_dddddddd(const void *, void *); +void munge_l(const void *, void *); +void munge_wl(const void *, void *); +void munge_wlw(const void *, void *); +void munge_wwwl(const void *, void *); +void munge_wwwwl(const void *, void *); +void munge_wwwwwl(const void *, void *); +#else +#define munge_w NULL +#define munge_ww NULL +#define munge_www NULL +#define munge_wwww NULL +#define munge_wwwww NULL +#define munge_wwwwww NULL +#define munge_wwwwwww NULL +#define munge_wwwwwwww NULL +#define munge_d NULL +#define munge_dd NULL +#define munge_ddd NULL +#define munge_dddd NULL +#define munge_ddddd NULL +#define munge_dddddd NULL +#define munge_ddddddd NULL +#define munge_dddddddd NULL +#define munge_l NULL +#define munge_wl NULL +#define munge_wlw NULL +#define munge_wwwl NULL +#define munge_wwwwl NULL +#define munge_wwwwwl NULL +#endif /* __ppc__ */ +#endif /* !__MUNGE_ONCE */ + +struct kern_invalid_args { + register_t dummy; +}; +extern kern_return_t kern_invalid( + struct kern_invalid_args *args); + +struct mach_reply_port_args { + register_t dummy; +}; +extern mach_port_name_t mach_reply_port( + struct mach_reply_port_args *args); + +struct thread_self_trap_args { + register_t dummy; +}; +extern mach_port_name_t thread_self_trap( + struct thread_self_trap_args *args); + +struct task_self_trap_args { + register_t dummy; +}; +extern mach_port_name_t task_self_trap( + struct task_self_trap_args *args); + +struct host_self_trap_args { + register_t dummy; +}; +extern mach_port_name_t host_self_trap( + struct host_self_trap_args *args); + +struct mach_msg_overwrite_trap_args { + PAD_ARG_(mach_vm_address_t, msg); + PAD_ARG_(mach_msg_option_t, option); + PAD_ARG_(mach_msg_size_t, send_size); + PAD_ARG_(mach_msg_size_t, rcv_size); + PAD_ARG_(mach_port_name_t, rcv_name); + PAD_ARG_(mach_msg_timeout_t, timeout); + PAD_ARG_(mach_port_name_t, notify); + PAD_ARG_(mach_vm_address_t, rcv_msg); /* Unused on mach_msg_trap */ +}; +extern mach_msg_return_t mach_msg_trap( + struct mach_msg_overwrite_trap_args *args); +extern mach_msg_return_t mach_msg_overwrite_trap( + struct mach_msg_overwrite_trap_args *args); + +struct semaphore_signal_trap_args { + PAD_ARG_(mach_port_name_t, signal_name); +}; +extern kern_return_t semaphore_signal_trap( + struct semaphore_signal_trap_args *args); + +struct semaphore_signal_all_trap_args { + PAD_ARG_(mach_port_name_t, signal_name); +}; +extern kern_return_t semaphore_signal_all_trap( + struct semaphore_signal_all_trap_args *args); + +struct semaphore_signal_thread_trap_args { + PAD_ARG_(mach_port_name_t, signal_name); + PAD_ARG_(mach_port_name_t, thread_name); +}; +extern kern_return_t semaphore_signal_thread_trap( + struct semaphore_signal_thread_trap_args *args); + +struct semaphore_wait_trap_args { + PAD_ARG_(mach_port_name_t, wait_name); +}; +extern kern_return_t semaphore_wait_trap( + struct semaphore_wait_trap_args *args); + +struct semaphore_wait_signal_trap_args { + PAD_ARG_(mach_port_name_t, wait_name); + PAD_ARG_(mach_port_name_t, signal_name); +}; +extern kern_return_t semaphore_wait_signal_trap( + struct semaphore_wait_signal_trap_args *args); + +struct semaphore_timedwait_trap_args { + PAD_ARG_(mach_port_name_t, wait_name); + PAD_ARG_(unsigned int, sec); + PAD_ARG_(clock_res_t, nsec); +}; +extern kern_return_t semaphore_timedwait_trap( + struct semaphore_timedwait_trap_args *args); + +struct semaphore_timedwait_signal_trap_args { + PAD_ARG_(mach_port_name_t, wait_name); + PAD_ARG_(mach_port_name_t, signal_name); + PAD_ARG_(unsigned int, sec); + PAD_ARG_(clock_res_t, nsec); +}; +extern kern_return_t semaphore_timedwait_signal_trap( + struct semaphore_timedwait_signal_trap_args *args); + +/* not published to LP64 clients */ +struct init_process_args { + register_t dummy; +}; +extern kern_return_t init_process( + struct init_process_args *args); + +struct map_fd_args { + PAD_ARG_(int, fd); + PAD_ARG_(vm_offset_t, offset); + PAD_ARG_(vm_offset_t *, va); + PAD_ARG_(boolean_t, findspace); + PAD_ARG_(vm_size_t, size); +}; +extern kern_return_t map_fd( + struct map_fd_args *args); + +struct task_for_pid_args { + PAD_ARG_(mach_port_name_t, target_tport); + PAD_ARG_(int, pid); + PAD_ARG_(user_addr_t, t); +}; +extern kern_return_t task_for_pid( + struct task_for_pid_args *args); + +struct pid_for_task_args { + PAD_ARG_(mach_port_name_t, t); + PAD_ARG_(user_addr_t, pid); +}; +extern kern_return_t pid_for_task( + struct pid_for_task_args *args); + +/* not published to LP64 clients*/ +struct macx_swapon_args { + PAD_ARG_(char *, filename); + PAD_ARG_(int, flags); + PAD_ARG_(int, size); + PAD_ARG_(int, priority); +}; +extern kern_return_t macx_swapon( + struct macx_swapon_args *args); + +struct macx_swapoff_args { + PAD_ARG_(char *, filename); + PAD_ARG_(int, flags); +}; +extern kern_return_t macx_swapoff( + struct macx_swapoff_args *args); + +struct macx_triggers_args { + PAD_ARG_(int, hi_water); + PAD_ARG_(int, low_water); + PAD_ARG_(int, flags); + PAD_ARG_(mach_port_t, alert_port); +}; +extern kern_return_t macx_triggers( + struct macx_triggers_args *args); + +struct macx_backing_store_suspend_args { + PAD_ARG_(boolean_t, suspend); +}; +extern kern_return_t macx_backing_store_suspend( + struct macx_backing_store_suspend_args *args); + +struct macx_backing_store_recovery_args { + PAD_ARG_(int, pid); +}; +extern kern_return_t macx_backing_store_recovery( + struct macx_backing_store_recovery_args *args); + +struct swtch_pri_args { + PAD_ARG_(int, pri); +}; +extern boolean_t swtch_pri( + struct swtch_pri_args *args); + +struct swtch_args { + register_t dummy; +}; +extern boolean_t swtch( + struct swtch_args *args); + +struct clock_sleep_trap_args{ + PAD_ARG_(mach_port_name_t, clock_name); + PAD_ARG_(sleep_type_t, sleep_type); + PAD_ARG_(int, sleep_sec); + PAD_ARG_(int, sleep_nsec); + PAD_ARG_(mach_vm_address_t, wakeup_time); +}; +extern kern_return_t clock_sleep_trap( + struct clock_sleep_trap_args *args); + +struct thread_switch_args { + PAD_ARG_(mach_port_name_t, thread_name); + PAD_ARG_(int, option); + PAD_ARG_(mach_msg_timeout_t, option_time); +}; +extern kern_return_t thread_switch( + struct thread_switch_args *args); + +struct mach_timebase_info_trap_args { + PAD_ARG_(mach_vm_address_t, info); +}; +extern kern_return_t mach_timebase_info_trap( + struct mach_timebase_info_trap_args *args); + +struct mach_wait_until_trap_args { + PAD_ARG_(uint64_t, deadline); +}; +extern kern_return_t mach_wait_until_trap( + struct mach_wait_until_trap_args *args); + +struct mk_timer_create_trap_args { + register_t dummy; +}; +extern mach_port_name_t mk_timer_create_trap( + struct mk_timer_create_trap_args *args); + +struct mk_timer_destroy_trap_args { + PAD_ARG_(mach_port_name_t, name); +}; +extern kern_return_t mk_timer_destroy_trap( + struct mk_timer_destroy_trap_args *args); + +struct mk_timer_arm_trap_args { + PAD_ARG_(mach_port_name_t, name); + PAD_ARG_(uint64_t, expire_time); +}; +extern kern_return_t mk_timer_arm_trap( + struct mk_timer_arm_trap_args *args); + +struct mk_timer_cancel_trap_args { + PAD_ARG_(mach_port_name_t, name); + PAD_ARG_(mach_vm_address_t, result_time); +}; +extern kern_return_t mk_timer_cancel_trap( + struct mk_timer_cancel_trap_args *args); + +/* no user-level prototype for this one */ +struct mk_timebase_info_trap_args { + PAD_ARG_(uint32_t *, delta); + PAD_ARG_(uint32_t *, abs_to_ns_numer); + PAD_ARG_(uint32_t *, abs_to_ns_denom); + PAD_ARG_(uint32_t *, proc_to_abs_numer); + PAD_ARG_(uint32_t *, proc_to_abs_denom); +}; +extern void mk_timebase_info_trap( + struct mk_timebase_info_trap_args *args); + +/* not published to LP64 clients yet */ +struct iokit_user_client_trap_args { + PAD_ARG_(void *, userClientRef); + PAD_ARG_(uint32_t, index); + PAD_ARG_(void *, p1); + PAD_ARG_(void *, p2); + PAD_ARG_(void *, p3); + PAD_ARG_(void *, p4); + PAD_ARG_(void *, p5); + PAD_ARG_(void *, p6); +}; +kern_return_t iokit_user_client_trap( + struct iokit_user_client_trap_args *args); + +#undef PAD_ +#undef PADL_ +#undef PADR_ +#undef PAD_ARG_ + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* KERNEL */ + +__END_DECLS #endif /* _MACH_MACH_TRAPS_H_ */ diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs index 02020835e..4613dea3f 100644 --- a/osfmk/mach/mach_types.defs +++ b/osfmk/mach/mach_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -59,14 +59,25 @@ #include -type upl_page_info_t = struct[2] of integer_t; -type upl_page_info_array_t = array[*:20] of upl_page_info_t; - type memory_object_offset_t = uint64_t; type memory_object_size_t = uint64_t; +type memory_object_cluster_size_t = uint32_t; +#ifdef KERNEL_PRIVATE +/* Universal Page Lists - restricted to (in-kernel) pagers for now */ +type upl_size_t = uint32_t; +type upl_offset_t = uint32_t; +type upl_page_info_t = struct[2] of integer_t; +type upl_page_info_array_t = array[*:256] of upl_page_info_t; +type upl_t = mach_port_t + intran: upl_t convert_port_to_upl(mach_port_t) + outtran: mach_port_t convert_upl_to_port(upl_t) + destructor: upl_deallocate(upl_t) + ; + +#endif /* KERNEL_PRIVATE */ type mach_port_status_t = struct[10] of integer_t; /* obsolete */ @@ -97,17 +108,17 @@ type thread_t = mach_port_t type thread_act_t = mach_port_t #if KERNEL_SERVER - intran: thread_act_t convert_port_to_act(mach_port_t) - outtran: mach_port_t convert_act_to_port(thread_act_t) - destructor: act_deallocate(thread_act_t) + intran: thread_act_t convert_port_to_thread(mach_port_t) + outtran: mach_port_t convert_thread_to_port(thread_act_t) + destructor: thread_deallocate(thread_act_t) #endif /* KERNEL_SERVER */ ; type thread_act_consume_ref_t = mach_port_move_send_t - ctype: thread_act_t + cusertype: thread_act_t #if KERNEL_SERVER - intran: thread_act_t convert_port_to_act(mach_port_t) - destructor: act_deallocate(thread_act_t) + intran: thread_act_t convert_port_to_thread(mach_port_t) + destructor: thread_deallocate(thread_act_t) #endif /* KERNEL_SERVER */ ; @@ -133,7 +144,7 @@ type vm_map_t = mach_port_t ; type vm_task_entry_t = mach_port_t - ctype: vm_map_t + cusertype: vm_map_t #if KERNEL_SERVER intran: vm_map_t convert_port_entry_to_map(mach_port_t) destructor: vm_map_deallocate(vm_map_t) @@ -147,14 +158,12 @@ type ipc_space_t = mach_port_t #endif /* KERNEL_SERVER */ ; -type vm_address_t = natural_t; -type vm_offset_t = natural_t; -type vm_size_t = natural_t; type vm_prot_t = int; type vm_inherit_t = int; +type vm_purgable_t = int; type xxx_vm_statistics_data_t = struct[13] of integer_t; type vm_behavior_t = int; -type vm_statistics_data_t = struct[12] of integer_t; +type vm_statistics_data_t = struct[14] of integer_t; type vm_machine_attribute_t = int; type vm_machine_attribute_val_t = int; type vm_sync_t = int; @@ -174,7 +183,8 @@ type thread_policy_flavor_t = natural_t; type thread_policy_t = array[*:16] of integer_t; /* task_info_t: this inline array can hold any of: - * task_basic_info_t (8 ints) + * task_basic_info_32_t (8 ints) + * task_basic_info_64_t (10 ints) * task_events_info_t (8 ints) * task_thread_times_info_t (4 ints) * policy_timeshare_info_t (5 ints) @@ -186,7 +196,7 @@ type thread_policy_t = array[*:16] of integer_t; * definition may need to be changed. (See * mach/task_info.h and mach/policy.h) */ type task_flavor_t = int; -type task_info_t = array[*:8] of integer_t; +type task_info_t = array[*:10] of integer_t; type task_policy_flavor_t = natural_t; type task_policy_t = array[*:16] of integer_t; @@ -194,6 +204,15 @@ type task_policy_t = array[*:16] of integer_t; type mem_entry_name_port_t = mach_port_t #if KERNEL_SERVER intran: mem_entry_name_port_t null_conversion(mach_port_t) + outtran: mach_port_t null_conversion(mem_entry_name_port_t) +#endif /* KERNEL_SERVER */ + ; + +type mem_entry_name_port_move_send_t = mach_port_move_send_t + cusertype: mem_entry_name_port_t +#if KERNEL_SERVER + intran: mem_entry_name_port_t null_conversion(mach_port_t) + outtran: mach_port_t null_conversion(mem_entry_name_port_t) #endif /* KERNEL_SERVER */ ; @@ -224,14 +243,6 @@ type memory_object_name_t = mach_port_t ctype: mach_port_t ; -type upl_t = mach_port_t -#if KERNEL_PRIVATE - intran: upl_t convert_port_to_upl(mach_port_t) - outtran: mach_port_t convert_upl_to_port(upl_t) - destructor: upl_deallocate(upl_t) -#endif /* KERNEL_PRIVATE */ - ; - type memory_object_copy_strategy_t = int; type memory_object_return_t = int; @@ -259,16 +270,17 @@ type host_security_t = mach_port_t ; /* host_info_t: variable-sized inline array that can contain: - * host_basic_info_t (5 ints) + * host_basic_info_old_t (5 ints) + * host_basic_info_t (12 ints) * host_sched_info_t (2 ints) * kernel_resource_sizes_t (5 ints) * host_load_info_t (6 ints) - * vm_statistics_t (12 ints) + * vm_statistics_t (14 ints) * If other host_info flavors are added, this definition may * need to be changed. (See mach/{host_info,vm_statistics}.h) */ type host_flavor_t = int; -type host_info_t = array[*:12] of integer_t; +type host_info_t = array[*:14] of integer_t; type processor_t = mach_port_t #if KERNEL_SERVER @@ -378,13 +390,11 @@ type memory_object_info_t = array[*:6] of int; * If other flavors are added, this definition may * need to be changed. (see mach/vm_region.h) */ type vm_region_flavor_t = int; -type vm_region_info_t = array[*:9] of int; -type vm_region_info_64_t = array[*:10] of int; +type vm_region_info_t = array[*:10] of int; type vm_region_recurse_info_t = array[*:19] of int; -type vm_region_recurse_info_64_t = array[*:19] of int; -type vm_read_entry_t = array[512] of int - ctype: vm_read_entry_t; +type mach_vm_read_entry_t = array[512] of mach_vm_offset_t; +type vm_read_entry_t = array[512] of vm_offset_t; type exception_mask_t = int; type exception_behavior_t = int; @@ -412,7 +422,7 @@ type semaphore_t = mach_port_t ; type semaphore_consume_ref_t = mach_port_move_send_t - ctype: semaphore_t + cusertype: semaphore_t #if KERNEL_SERVER intran: semaphore_t convert_port_to_semaphore(mach_port_t) outtran: mach_port_t convert_semaphore_to_port(semaphore_t) @@ -445,7 +455,11 @@ simport ; /* for task/thread conversion */ simport ; /* for host/processor/pset conversions */ simport ; /* for lock_set and semaphore conversions */ simport ; /* for ledger conversions */ -simport ; /* for memory object type conversions */ +simport ; /* for processor conversions */ +simport ; /* for lock-set conversions */ +simport ; /* for semaphore conversions */ +simport ; /* for memory object type conversions */ +simport ; /* for vm_map conversions */ #endif /* MACH_KERNEL_PRIVATE */ simport ; /* pick up kernel-specific MIG things */ diff --git a/osfmk/mach/mach_types.h b/osfmk/mach/mach_types.h index 7e9f87002..486d3be41 100644 --- a/osfmk/mach/mach_types.h +++ b/osfmk/mach/mach_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,12 +63,15 @@ #include +#include + #include #include #include #include #include #include +#include #include #include #include @@ -83,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -91,7 +95,7 @@ #include #include -#ifdef KERNEL_PRIVATE +#ifdef KERNEL #include @@ -115,14 +119,10 @@ typedef struct alarm *alarm_t; typedef struct clock *clock_serv_t; typedef struct clock *clock_ctrl_t; -#if !defined(MACH_KERNEL_PRIVATE) +#ifndef MACH_KERNEL_PRIVATE + +__BEGIN_DECLS -/* - * Declare empty structure definitions for export to other - * kernel components. This lets us still provide some level - * of type checking, without exposing our internal data - * structures. - */ struct task ; struct thread ; struct host ; @@ -134,9 +134,11 @@ struct ledger ; struct alarm ; struct clock ; -#endif /* !MACH_KERNEL_PRIVATE */ +__END_DECLS -#else /* !KERNEL_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ + +#else /* KERNEL */ /* * If we are not in the kernel, then these will all be represented by @@ -159,7 +161,7 @@ typedef mach_port_t alarm_t; typedef mach_port_t clock_serv_t; typedef mach_port_t clock_ctrl_t; -#endif /* !KERNEL_PRIVATE */ +#endif /* KERNEL */ /* * These aren't really unique types. They are just called @@ -169,7 +171,7 @@ typedef mach_port_t clock_ctrl_t; typedef processor_set_t processor_set_name_t; /* - * JMM - These types are just hard-coded as ports for now + * These types are just hard-coded as ports */ typedef mach_port_t clock_reply_t; typedef mach_port_t bootstrap_t; @@ -181,11 +183,11 @@ typedef mach_port_t io_master_t; typedef mach_port_t UNDServerRef; /* - * JMM - Mig doesn't translate the components of an array. + * Mig doesn't translate the components of an array. * For example, Mig won't use the thread_t translations * to translate a thread_array_t argument. So, these definitions * are not completely accurate at the moment for other kernel - * components. MIG is being fixed. + * components. */ typedef task_t *task_array_t; typedef thread_t *thread_array_t; @@ -195,7 +197,6 @@ typedef processor_t *processor_array_t; typedef thread_act_t *thread_act_array_t; typedef ledger_t *ledger_array_t; - /* * However the real mach_types got declared, we also have to declare * types with "port" in the name for compatability with the way OSF @@ -244,8 +245,10 @@ typedef exception_handler_array_t exception_port_arrary_t; #define CLOCK_NULL ((clock_t) 0) #define UND_SERVER_NULL ((UNDServerRef) 0) -typedef integer_t ledger_item_t; -typedef vm_offset_t *emulation_vector_t; +typedef natural_t ledger_item_t; +#define LEDGER_ITEM_INFINITY ((ledger_item_t) (~0)) + +typedef mach_vm_offset_t *emulation_vector_t; typedef char *user_subsystem_t; /* diff --git a/osfmk/mach/mach_vm.defs b/osfmk/mach/mach_vm.defs new file mode 100644 index 000000000..9e39042d7 --- /dev/null +++ b/osfmk/mach/mach_vm.defs @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_FREE_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: mach/mach_vm.defs + * + * Exported kernel VM calls (for any task on the platform). + */ + +subsystem +#if KERNEL_SERVER + KernelServer +#endif /* KERNEL_SERVER */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) + mach_vm +#else + vm_map_lp64_local +#endif + 4800; + +#include +#include +#include + +/* + * Allocate zero-filled memory in the address space + * of the target task, either at the specified address, + * or wherever space can be found (controlled by flags), + * of the specified size. The address at which the + * allocation actually took place is returned. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_allocate( +#else +routine vm_allocate( +#endif + target : vm_task_entry_t; + inout address : mach_vm_address_t; + size : mach_vm_size_t; + flags : int); + +/* + * Deallocate the specified range from the virtual + * address space of the target virtual memory map. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_deallocate( +#else +routine vm_deallocate( +#endif + target : vm_task_entry_t; + address : mach_vm_address_t; + size : mach_vm_size_t); + +/* + * Set the current or maximum protection attribute + * for the specified range of the virtual address + * space of the target virtual memory map. The current + * protection limits the memory access rights of threads + * within the map; the maximum protection limits the accesses + * that may be given in the current protection. + * Protections are specified as a set of {read, write, execute} + * *permissions*. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_protect( +#else +routine vm_protect( +#endif + target_task : vm_task_entry_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + set_maximum : boolean_t; + new_protection : vm_prot_t); + +/* + * Set the inheritance attribute for the specified range + * of the virtual address space of the target address space. + * The inheritance value is one of {none, copy, share}, and + * specifies how the child address space should acquire + * this memory at the time of a task_create call. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_inherit( +#else +routine vm_inherit( +#endif + target_task : vm_task_entry_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + new_inheritance : vm_inherit_t); + +/* + * Returns the contents of the specified range of the + * virtual address space of the target task. [The + * range must be aligned on a virtual page boundary, + * and must be a multiple of pages in extent. The + * protection on the specified range must permit reading.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_read( +#else +routine vm_read( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + out data : pointer_t); + +/* + * List corrollary to vm_read, returns mapped contents of specified + * ranges within target address space. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_read_list( +#else +routine vm_read_list( +#endif + target_task : vm_map_t; + inout data_list : mach_vm_read_entry_t; + count : natural_t); + +/* + * Writes the contents of the specified range of the + * virtual address space of the target task. [The + * range must be aligned on a virtual page boundary, + * and must be a multiple of pages in extent. The + * protection on the specified range must permit writing.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_write( +#else +routine vm_write( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + data : pointer_t); + +/* + * Copy the contents of the source range of the virtual + * address space of the target task to the destination + * range in that same address space. [Both of the + * ranges must be aligned on a virtual page boundary, + * and must be multiples of pages in extent. The + * protection on the source range must permit reading, + * and the protection on the destination range must + * permit writing.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_copy( +#else +routine vm_copy( +#endif + target_task : vm_map_t; + source_address : mach_vm_address_t; + size : mach_vm_size_t; + dest_address : mach_vm_address_t); + +/* + * Returns the contents of the specified range of the + * virtual address space of the target task. [There + * are no alignment restrictions, and the results will + * overwrite the area pointed to by data - which must + * already exist. The protection on the specified range + * must permit reading.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_read_overwrite( +#else +routine vm_read_overwrite( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + data : mach_vm_address_t; + out outsize : mach_vm_size_t); + + +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_msync( +#else +routine vm_msync( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + sync_flags : vm_sync_t ); + +/* + * Set the paging behavior attribute for the specified range + * of the virtual address space of the target task. + * The behavior value is one of {default, random, forward + * sequential, reverse sequential} and indicates the expected + * page reference pattern for the specified range. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_behavior_set( +#else +routine vm_behavior_set( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + new_behavior : vm_behavior_t); + + +/* + * Map a user-supplie memory object into the virtual address + * space of the target task. If desired (anywhere is TRUE), + * the kernel will find a suitable address range of the + * specified size; else, the specific address will be allocated. + * + * The beginning address of the range will be aligned on a virtual + * page boundary, be at or beyond the address specified, and + * meet the mask requirements (bits turned on in the mask must not + * be turned on in the result); the size of the range, in bytes, + * will be rounded up to an integral number of virtual pages. + * + * The memory in the resulting range will be associated with the + * specified memory object, with the beginning of the memory range + * referring to the specified offset into the memory object. + * + * The mapping will take the current and maximum protections and + * the inheritance attributes specified; see the vm_protect and + * vm_inherit calls for a description of these attributes. + * + * If desired (copy is TRUE), the memory range will be filled + * with a copy of the data from the memory object; this copy will + * be private to this mapping in this target task. Otherwise, + * the memory in this mapping will be shared with other mappings + * of the same memory object at the same offset (in this task or + * in other tasks). [The Mach kernel only enforces shared memory + * consistency among mappings on one host with similar page alignments. + * The user-defined memory manager for this object is responsible + * for further consistency.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_map( +#else +routine vm_map( +#endif + target_task : vm_task_entry_t; + inout address : mach_vm_address_t; + size : mach_vm_size_t; + mask : mach_vm_offset_t; + flags : int; + object : mem_entry_name_port_t; + offset : memory_object_offset_t; + copy : boolean_t; + cur_protection : vm_prot_t; + max_protection : vm_prot_t; + inheritance : vm_inherit_t); + +/* + * Set/Get special properties of memory associated + * to some virtual address range, such as cachability, + * migrability, replicability. Machine-dependent. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_machine_attribute( +#else +routine vm_machine_attribute( +#endif + target_task : vm_map_t; + address : mach_vm_address_t; + size : mach_vm_size_t; + attribute : vm_machine_attribute_t; + inout value : vm_machine_attribute_val_t); + +/* + * Map portion of a task's address space. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_remap( +#else +routine vm_remap( +#endif + target_task : vm_map_t; + inout target_address : mach_vm_address_t; + size : mach_vm_size_t; + mask : mach_vm_offset_t; + anywhere : boolean_t; + src_task : vm_map_t; + src_address : mach_vm_address_t; + copy : boolean_t; + out cur_protection : vm_prot_t; + out max_protection : vm_prot_t; + inheritance : vm_inherit_t); + +/* + * Give the caller information on the given location in a virtual + * address space. If a page is mapped return ref and dirty info. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_page_query( +#else +routine vm_map_page_query( +#endif + target_map :vm_map_t; + offset :mach_vm_offset_t; + out disposition :integer_t; + out ref_count :integer_t); + + +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_region_recurse( +#else +routine vm_region_recurse_64( +#endif + target_task : vm_map_t; + inout address : mach_vm_address_t; + out size : mach_vm_size_t; + inout nesting_depth : natural_t; + out info : vm_region_recurse_info_t,CountInOut); + +/* + * Returns information about the contents of the virtual + * address space of the target task at the specified + * address. The returned protection, inheritance, sharing + * and memory object values apply to the entire range described + * by the address range returned; the memory object offset + * corresponds to the beginning of the address range. + * [If the specified address is not allocated, the next + * highest address range is described. If no addresses beyond + * the one specified are allocated, the call returns KERN_NO_SPACE.] + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_region( +#else +routine vm_region_64( +#endif + target_task : vm_map_t; + inout address : mach_vm_address_t; + out size : mach_vm_size_t; + flavor : vm_region_flavor_t; + out info : vm_region_info_t, CountInOut; + out object_name : memory_object_name_t = + MACH_MSG_TYPE_MOVE_SEND + ctype: mach_port_t); + +/* + * Allow application level processes to create named entries which + * correspond to mapped portions of their address space. These named + * entries can then be manipulated, shared with other processes in + * other address spaces and ultimately mapped in ohter address spaces + * + * THIS INTERFACE IS STILL EVOLVING. + */ +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine _mach_make_memory_entry( +#else +routine mach_make_memory_entry_64( +#endif + target_task :vm_map_t; + inout size :memory_object_size_t; + offset :memory_object_offset_t; + permission :vm_prot_t; + out object_handle :mem_entry_name_port_move_send_t; + parent_handle :mem_entry_name_port_t); + + +/****************************** Legacy section ***************************/ +/* The following definitions are exist to provide compatibility with */ +/* the legacy APIs. They are no different. We just need to produce */ +/* the user-level stub interface for them. */ +/****************************** Legacy section ***************************/ + + +/* + * These interfaces just aren't supported in the new (wide) model: + * + * mach_vm_region_info() - + * vm_map_pages_info() - + * no user-level replacement for these MACH_DEBUG interfaces + * vm_map_get_upl() - + * no user-level replacement at the moment + * vm_region_info() - + * use mach_vm_region_info() or vm_region_info_64() + * vm_region_recurse() - + * use mach_vm_region_recurse() or vm_region_recurse_64() + */ + +/* + * The following legacy interfaces are provides as macro wrappers to the new + * interfaces. You should strive to use the new ones instead: + * + * vm_map() - + * use mach_vm_map() or vm_map_64() + * vm_region() - + * use mach_vm_region() or vm_region_64() + * mach_make_memory_entry() - + * use mach_vm_make_memory_entry() or mach_make_memory_entry_64() + */ diff --git a/osfmk/mach/machine.h b/osfmk/mach/machine.h index 52078e8f3..130a23ce7 100644 --- a/osfmk/mach/machine.h +++ b/osfmk/mach/machine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,33 +54,13 @@ #ifndef _MACH_MACHINE_H_ #define _MACH_MACHINE_H_ -#include - +#include #include #include -/* - * For each host, there is a maximum possible number of - * cpus that may be available in the system. This is the - * compile-time constant NCPUS, which is defined in cpus.h. - * - * In addition, there is a machine_slot specifier for each - * possible cpu in the system. - */ - -struct machine_info { - integer_t major_version; /* kernel major version id */ - integer_t minor_version; /* kernel minor version id */ - integer_t max_cpus; /* max number of cpus compiled */ - integer_t avail_cpus; /* number actually available */ - vm_size_t memory_size; /* size of memory in bytes */ -}; - -typedef struct machine_info *machine_info_t; -typedef struct machine_info machine_info_data_t; /* bogus */ - typedef integer_t cpu_type_t; typedef integer_t cpu_subtype_t; +typedef integer_t cpu_threadtype_t; #define CPU_STATE_MAX 4 @@ -90,25 +70,57 @@ typedef integer_t cpu_subtype_t; #define CPU_STATE_NICE 3 #ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_UNSTABLE - -struct machine_slot { -/*boolean_t*/integer_t is_cpu; /* is there a cpu in this slot? */ - cpu_type_t cpu_type; /* type of cpu */ - cpu_subtype_t cpu_subtype; /* subtype of cpu */ -/*boolean_t*/integer_t running; /* is cpu running */ - integer_t cpu_ticks[CPU_STATE_MAX]; - integer_t clock_freq; /* clock interrupt frequency */ + +#include + +__BEGIN_DECLS +cpu_type_t cpu_type(void); + +cpu_subtype_t cpu_subtype(void); + +cpu_threadtype_t cpu_threadtype(void); +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE + +struct machine_info { + integer_t major_version; /* kernel major version id */ + integer_t minor_version; /* kernel minor version id */ + integer_t max_cpus; /* max number of CPUs possible */ + integer_t avail_cpus; /* number of CPUs now available */ + uint32_t memory_size; /* size of memory in bytes, capped at 2 GB */ + uint64_t max_mem; /* actual size of physical memory */ + integer_t physical_cpu; /* number of physical CPUs now available */ + integer_t physical_cpu_max; /* max number of physical CPUs possible */ + integer_t logical_cpu; /* number of logical cpu now available */ + integer_t logical_cpu_max; /* max number of physical CPUs possible */ }; -typedef struct machine_slot *machine_slot_t; -typedef struct machine_slot machine_slot_data_t; /* bogus */ +typedef struct machine_info *machine_info_t; +typedef struct machine_info machine_info_data_t; extern struct machine_info machine_info; -extern struct machine_slot machine_slot[]; -#endif /* __APPLE_API_UNSTABLE */ -#endif /* KERNEL_PRIVATE */ +__BEGIN_DECLS +cpu_type_t slot_type( + int slot_num); + +cpu_subtype_t slot_subtype( + int slot_num); + +cpu_threadtype_t slot_threadtype( + int slot_num); +__END_DECLS + +#endif /* MACH_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ + + +/* + * Capability bits used in the definition of cpu_type. + */ +#define CPU_ARCH_MASK 0xff000000 /* mask for architecture bits */ +#define CPU_ARCH_ABI64 0x01000000 /* 64 bit ABI */ /* * Machine types known by all. @@ -134,7 +146,7 @@ extern struct machine_slot machine_slot[]; /* skip CPU_TYPE_ALPHA ((cpu_type_t) 16) */ /* skip ((cpu_type_t) 17) */ #define CPU_TYPE_POWERPC ((cpu_type_t) 18) - +#define CPU_TYPE_POWERPC64 (CPU_TYPE_POWERPC | CPU_ARCH_ABI64) /* * Machine subtypes (these are defined here, instead of in a machine @@ -157,6 +169,12 @@ extern struct machine_slot machine_slot[]; #define CPU_SUBTYPE_LITTLE_ENDIAN ((cpu_subtype_t) 0) #define CPU_SUBTYPE_BIG_ENDIAN ((cpu_subtype_t) 1) +/* + * Machine threadtypes. + * This is none - not defined - for most machine types/subtypes. + */ +#define CPU_THREADTYPE_NONE ((cpu_threadtype_t) 0) + /* * VAX subtypes (these do *not* necessary conform to the actual cpu * ID assigned by DEC available via the SID register). @@ -219,6 +237,9 @@ extern struct machine_slot machine_slot[]; #define CPU_SUBTYPE_INTEL_MODEL(x) ((x) >> 4) #define CPU_SUBTYPE_INTEL_MODEL_ALL 0 + +#define CPU_THREADTYPE_INTEL_HTT ((cpu_threadtype_t) 1) + /* * Mips subtypes. */ diff --git a/osfmk/mach/machine/asm.h b/osfmk/mach/machine/asm.h index ed74cc334..d4a8aee92 100644 --- a/osfmk/mach/machine/asm.h +++ b/osfmk/mach/machine/asm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_ASM_H #define _MACH_MACHINE_ASM_H - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/asm.h" #elif defined (__i386__) #include "mach/i386/asm.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_ASM_H */ diff --git a/osfmk/mach/machine/boolean.h b/osfmk/mach/machine/boolean.h index d85824189..6a3f99afe 100644 --- a/osfmk/mach/machine/boolean.h +++ b/osfmk/mach/machine/boolean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_BOOLEAN_H_ #define _MACH_MACHINE_BOOLEAN_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/boolean.h" #elif defined (__i386__) #include "mach/i386/boolean.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_BOOLEAN_H_ */ diff --git a/osfmk/mach/machine/exception.h b/osfmk/mach/machine/exception.h index e08c34a15..3640e2b18 100644 --- a/osfmk/mach/machine/exception.h +++ b/osfmk/mach/machine/exception.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_EXCEPTION_H_ #define _MACH_MACHINE_EXCEPTION_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/exception.h" #elif defined (__i386__) #include "mach/i386/exception.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_EXCEPTION_H_ */ diff --git a/osfmk/mach/machine/kern_return.h b/osfmk/mach/machine/kern_return.h index 45ad20b8b..8f3366f99 100644 --- a/osfmk/mach/machine/kern_return.h +++ b/osfmk/mach/machine/kern_return.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_KERN_RETURN_H_ #define _MACH_MACHINE_KERN_RETURN_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/kern_return.h" #elif defined (__i386__) #include "mach/i386/kern_return.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_KERN_RETURN_H_ */ diff --git a/osfmk/mach/machine/machine_types.defs b/osfmk/mach/machine/machine_types.defs index 2b9ecc264..459ff4b3a 100644 --- a/osfmk/mach/machine/machine_types.defs +++ b/osfmk/mach/machine/machine_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_MACHINE_TYPES_DEFS #define _MACH_MACHINE_MACHINE_TYPES_DEFS - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/machine_types.defs" #elif defined (__i386__) #include "mach/i386/machine_types.defs" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_THREAD_STATUS_H_ */ diff --git a/osfmk/mach/machine/ndr_def.h b/osfmk/mach/machine/ndr_def.h index 498d5c984..f96f182b9 100644 --- a/osfmk/mach/machine/ndr_def.h +++ b/osfmk/mach/machine/ndr_def.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_NDR_DEF_H #define _MACH_MACHINE_NDR_DEF_H - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/ndr_def.h" #elif defined (__i386__) #include "mach/i386/ndr_def.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_NDR_DEF_H */ diff --git a/osfmk/mach/machine/processor_info.h b/osfmk/mach/machine/processor_info.h index fa04db434..f8944a03f 100644 --- a/osfmk/mach/machine/processor_info.h +++ b/osfmk/mach/machine/processor_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_PROCESSOR_INFO_H_ #define _MACH_MACHINE_PROCESSOR_INFO_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/processor_info.h" #elif defined (__i386__) #include "mach/i386/processor_info.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_PROCESSOR_INFO_H_ */ diff --git a/osfmk/mach/machine/rpc.h b/osfmk/mach/machine/rpc.h index 3d3f19243..62cacfd7b 100644 --- a/osfmk/mach/machine/rpc.h +++ b/osfmk/mach/machine/rpc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_RPC_H_ #define _MACH_MACHINE_RPC_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/rpc.h" #elif defined (__i386__) #include "mach/i386/rpc.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - -#endif /* _MACH_MACHINE_RPC_H_ */ +#endif /* _MACH_MACHINE_RPC_H_ */ diff --git a/osfmk/mach/machine/syscall_sw.h b/osfmk/mach/machine/syscall_sw.h index c140fcdbc..e20bf846e 100644 --- a/osfmk/mach/machine/syscall_sw.h +++ b/osfmk/mach/machine/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,13 @@ * * @APPLE_LICENSE_HEADER_END@ */ + +#ifdef PRIVATE + #ifndef _MACH_MACHINE_SYSCALL_SW_H_ #define _MACH_MACHINE_SYSCALL_SW_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/syscall_sw.h" #elif defined (__i386__) #include "mach/i386/syscall_sw.h" @@ -31,5 +33,6 @@ #error architecture not supported #endif +#endif /* _MACH_MACHINE_SYSCALL_SW_H_ */ -#endif /* _MACH_MACHINE_SYSCALL_SW_H_ */ +#endif /* PRIVATE */ diff --git a/osfmk/mach/machine/thread_state.h b/osfmk/mach/machine/thread_state.h index 286cfc29d..9601fb728 100644 --- a/osfmk/mach/machine/thread_state.h +++ b/osfmk/mach/machine/thread_state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_THREAD_STATE_H_ #define _MACH_MACHINE_THREAD_STATE_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined(__ppc64__) #include "mach/ppc/thread_state.h" #elif defined (__i386__) #include "mach/i386/thread_state.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_THREAD_STATE_H_ */ diff --git a/osfmk/mach/machine/thread_status.h b/osfmk/mach/machine/thread_status.h index 6047637e6..e78affe07 100644 --- a/osfmk/mach/machine/thread_status.h +++ b/osfmk/mach/machine/thread_status.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_THREAD_STATUS_H_ #define _MACH_MACHINE_THREAD_STATUS_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/thread_status.h" #elif defined (__i386__) #include "mach/i386/thread_status.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_THREAD_STATUS_H_ */ diff --git a/osfmk/mach/machine/vm_param.h b/osfmk/mach/machine/vm_param.h index beb5bc185..8fb0957c5 100644 --- a/osfmk/mach/machine/vm_param.h +++ b/osfmk/mach/machine/vm_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_VM_PARAM_H_ #define _MACH_MACHINE_VM_PARAM_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #include "mach/ppc/vm_param.h" #elif defined (__i386__) #include "mach/i386/vm_param.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_VM_PARAM_H_ */ diff --git a/osfmk/mach/machine/vm_types.h b/osfmk/mach/machine/vm_types.h index 7b8770485..77a91ca50 100644 --- a/osfmk/mach/machine/vm_types.h +++ b/osfmk/mach/machine/vm_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,11 +19,11 @@ * * @APPLE_LICENSE_HEADER_END@ */ + #ifndef _MACH_MACHINE_VM_TYPES_H_ #define _MACH_MACHINE_VM_TYPES_H_ - -#if defined (__ppc__) +#if defined (__ppc__) || defined(__ppc64__) #include "mach/ppc/vm_types.h" #elif defined (__i386__) #include "mach/i386/vm_types.h" @@ -31,5 +31,4 @@ #error architecture not supported #endif - #endif /* _MACH_MACHINE_VM_TYPES_H_ */ diff --git a/osfmk/mach/memory_object.defs b/osfmk/mach/memory_object.defs index 49525fc0b..920397d64 100644 --- a/osfmk/mach/memory_object.defs +++ b/osfmk/mach/memory_object.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -89,7 +89,7 @@ serverprefix dp_; routine memory_object_init( memory_object : memory_object_t; memory_control : memory_object_control_t; - memory_object_page_size : vm_size_t); + memory_object_page_size : memory_object_cluster_size_t); /* @@ -116,7 +116,7 @@ routine memory_object_terminate( routine memory_object_data_request( memory_object : memory_object_t; offset : memory_object_offset_t; - length : vm_size_t; + length : memory_object_cluster_size_t; desired_access : vm_prot_t); /* @@ -133,9 +133,12 @@ routine memory_object_data_request( routine memory_object_data_return( memory_object : memory_object_t; offset : memory_object_offset_t; - size : vm_size_t; + size : memory_object_cluster_size_t; + out resid_offset : memory_object_offset_t; + out io_error : int; dirty : boolean_t; - kernel_copy : boolean_t); + kernel_copy : boolean_t; + upl_flags : int); /* * Provide initial data contents for this region of @@ -148,7 +151,7 @@ routine memory_object_data_return( routine memory_object_data_initialize( memory_object : memory_object_t; offset : memory_object_offset_t; - size : vm_size_t); + size : memory_object_cluster_size_t); /* * Request that the specified portion of this @@ -161,7 +164,7 @@ routine memory_object_data_initialize( routine memory_object_data_unlock( memory_object : memory_object_t; offset : memory_object_offset_t; - size : vm_size_t; + size : memory_object_cluster_size_t; desired_access : vm_prot_t); @@ -176,7 +179,7 @@ routine memory_object_data_unlock( routine memory_object_synchronize( memory_object : memory_object_t; offset : memory_object_offset_t; - size : vm_size_t; + size : memory_object_cluster_size_t; sync_flags : vm_sync_t ); /* diff --git a/osfmk/mach/memory_object.h b/osfmk/mach/memory_object.h index 63675bd21..e7949d9c8 100644 --- a/osfmk/mach/memory_object.h +++ b/osfmk/mach/memory_object.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,6 +65,7 @@ */ #include +#include #include typedef mach_port_t memory_object_t; @@ -160,8 +161,8 @@ struct old_memory_object_behave_info { }; struct memory_object_perf_info { - vm_size_t cluster_size; - boolean_t may_cache; + memory_object_cluster_size_t cluster_size; + boolean_t may_cache; }; struct old_memory_object_attr_info { /* old attr list */ @@ -172,7 +173,7 @@ struct old_memory_object_attr_info { /* old attr list */ struct memory_object_attr_info { memory_object_copy_strategy_t copy_strategy; - vm_offset_t cluster_size; + memory_object_cluster_size_t cluster_size; boolean_t may_cache_object; boolean_t temporary; }; @@ -200,16 +201,16 @@ typedef struct old_memory_object_attr_info old_memory_object_attr_info_data_t; typedef struct memory_object_attr_info *memory_object_attr_info_t; typedef struct memory_object_attr_info memory_object_attr_info_data_t; -#define OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT \ - (sizeof(old_memory_object_behave_info_data_t)/sizeof(int)) -#define MEMORY_OBJECT_BEHAVE_INFO_COUNT \ - (sizeof(memory_object_behave_info_data_t)/sizeof(int)) -#define MEMORY_OBJECT_PERF_INFO_COUNT \ - (sizeof(memory_object_perf_info_data_t)/sizeof(int)) -#define OLD_MEMORY_OBJECT_ATTR_INFO_COUNT \ - (sizeof(old_memory_object_attr_info_data_t)/sizeof(int)) -#define MEMORY_OBJECT_ATTR_INFO_COUNT \ - (sizeof(memory_object_attr_info_data_t)/sizeof(int)) +#define OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(old_memory_object_behave_info_data_t)/sizeof(int))) +#define MEMORY_OBJECT_BEHAVE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_behave_info_data_t)/sizeof(int))) +#define MEMORY_OBJECT_PERF_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_perf_info_data_t)/sizeof(int))) +#define OLD_MEMORY_OBJECT_ATTR_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(old_memory_object_attr_info_data_t)/sizeof(int))) +#define MEMORY_OBJECT_ATTR_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_attr_info_data_t)/sizeof(int))) #define invalid_memory_object_flavor(f) \ (f != MEMORY_OBJECT_ATTRIBUTE_INFO && \ diff --git a/osfmk/mach/memory_object_control.defs b/osfmk/mach/memory_object_control.defs index 0a8680bca..22a6518c0 100644 --- a/osfmk/mach/memory_object_control.defs +++ b/osfmk/mach/memory_object_control.defs @@ -114,6 +114,8 @@ routine memory_object_lock_request( memory_control : memory_object_control_t; offset : memory_object_offset_t; size : memory_object_size_t; + out resid_offset : memory_object_offset_t; + out io_errno : integer_t; should_return : memory_object_return_t; flags : integer_t; lock_value : vm_prot_t diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 04c279b0f..46c32d901 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,21 +65,23 @@ */ #include +#include #include #include -#include +#include #define VM_64_BIT_DATA_OBJECTS typedef unsigned long long memory_object_offset_t; typedef unsigned long long memory_object_size_t; -#ifdef __APPLE_API_EVOLVING /* * Temporary until real EMMI version gets re-implemented */ -#ifdef KERNEL_PRIVATE + +#ifdef KERNEL_PRIVATE + typedef struct memory_object { int *pager; } *memory_object_t; @@ -88,12 +90,12 @@ typedef struct memory_object_control { struct vm_object *object; } *memory_object_control_t; -#else /* !KERNEL_PRIVATE */ +#else /* KERNEL_PRIVATE */ typedef mach_port_t memory_object_t; typedef mach_port_t memory_object_control_t; -#endif /* !KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ typedef memory_object_t *memory_object_array_t; /* A memory object ... */ @@ -171,6 +173,7 @@ typedef int memory_object_return_t; #define MEMORY_OBJECT_DATA_PURGE 0x4 #define MEMORY_OBJECT_COPY_SYNC 0x8 #define MEMORY_OBJECT_DATA_SYNC 0x10 +#define MEMORY_OBJECT_IO_SYNC 0x20 /* * Types for the memory object flavor interfaces @@ -186,7 +189,8 @@ typedef int memory_object_info_data_t[MEMORY_OBJECT_INFO_MAX]; #define MEMORY_OBJECT_ATTRIBUTE_INFO 14 #define MEMORY_OBJECT_BEHAVIOR_INFO 15 -#ifdef __APPLE_API_UNSTABLE +#ifdef PRIVATE + #define OLD_MEMORY_OBJECT_BEHAVIOR_INFO 10 #define OLD_MEMORY_OBJECT_ATTRIBUTE_INFO 12 @@ -207,20 +211,39 @@ typedef struct old_memory_object_behave_info old_memory_object_behave_info_data_ typedef struct old_memory_object_attr_info *old_memory_object_attr_info_t; typedef struct old_memory_object_attr_info old_memory_object_attr_info_data_t; -#define OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT \ - (sizeof(old_memory_object_behave_info_data_t)/sizeof(int)) -#define OLD_MEMORY_OBJECT_ATTR_INFO_COUNT \ - (sizeof(old_memory_object_attr_info_data_t)/sizeof(int)) -#endif /* __APPLE_API_UNSTABLE */ +#define OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(old_memory_object_behave_info_data_t)/sizeof(int))) +#define OLD_MEMORY_OBJECT_ATTR_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(old_memory_object_attr_info_data_t)/sizeof(int))) + +#ifdef KERNEL + +__BEGIN_DECLS +extern void memory_object_reference(memory_object_t object); +extern void memory_object_deallocate(memory_object_t object); + +extern void memory_object_default_reference(memory_object_default_t); +extern void memory_object_default_deallocate(memory_object_default_t); + +extern void memory_object_control_reference(memory_object_control_t control); +extern void memory_object_control_deallocate(memory_object_control_t control); +extern int memory_object_control_uiomove(memory_object_control_t, memory_object_offset_t, void *, int, int, int); +__END_DECLS + +#endif /* KERNEL */ + +#endif /* PRIVATE */ + +typedef natural_t memory_object_cluster_size_t; struct memory_object_perf_info { - vm_size_t cluster_size; + memory_object_cluster_size_t cluster_size; boolean_t may_cache; }; struct memory_object_attr_info { memory_object_copy_strategy_t copy_strategy; - vm_offset_t cluster_size; + memory_object_cluster_size_t cluster_size; boolean_t may_cache_object; boolean_t temporary; }; @@ -243,12 +266,12 @@ typedef struct memory_object_perf_info memory_object_perf_info_data_t; typedef struct memory_object_attr_info *memory_object_attr_info_t; typedef struct memory_object_attr_info memory_object_attr_info_data_t; -#define MEMORY_OBJECT_BEHAVE_INFO_COUNT \ - (sizeof(memory_object_behave_info_data_t)/sizeof(int)) -#define MEMORY_OBJECT_PERF_INFO_COUNT \ - (sizeof(memory_object_perf_info_data_t)/sizeof(int)) -#define MEMORY_OBJECT_ATTR_INFO_COUNT \ - (sizeof(memory_object_attr_info_data_t)/sizeof(int)) +#define MEMORY_OBJECT_BEHAVE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_behave_info_data_t)/sizeof(int))) +#define MEMORY_OBJECT_PERF_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_perf_info_data_t)/sizeof(int))) +#define MEMORY_OBJECT_ATTR_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(memory_object_attr_info_data_t)/sizeof(int))) #define invalid_memory_object_flavor(f) \ (f != MEMORY_OBJECT_ATTRIBUTE_INFO && \ @@ -266,27 +289,6 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; #define MEMORY_OBJECT_RELEASE_NO_OP 0x4 -/* - * Universal Page List data structures - */ - -#define MAX_UPL_TRANSFER 256 - -struct upl_page_info { - vm_offset_t phys_addr; - unsigned int - pageout:1, /* page is to be removed on commit */ - absent:1, /* No valid data in this page */ - dirty:1, /* Page must be cleaned (O) */ - precious:1, /* must be cleaned, we have only copy */ - device:1, /* no page data, mapped dev memory */ - :0; /* force to long boundary */ -}; - -typedef struct upl_page_info upl_page_info_t; -typedef upl_page_info_t *upl_page_info_array_t; -typedef upl_page_info_array_t upl_page_list_ptr_t; - /* named entry processor mapping options */ /* enumerated */ #define MAP_MEM_NOOP 0 @@ -306,43 +308,149 @@ typedef upl_page_info_array_t upl_page_list_ptr_t; /* leave room for vm_prot bits */ #define MAP_MEM_ONLY 0x10000 /* change processor caching */ #define MAP_MEM_NAMED_CREATE 0x20000 /* create extant object */ +#define MAP_MEM_PURGABLE 0x40000 /* create a purgable VM object */ +#define MAP_MEM_NAMED_REUSE 0x80000 /* reuse provided entry if identical */ + +#ifdef KERNEL + +/* + * Universal Page List data structures + * + * A UPL describes a bounded set of physical pages + * associated with some range of an object or map + * and a snapshot of the attributes associated with + * each of those pages. + */ +#ifdef PRIVATE +#define MAX_UPL_TRANSFER 256 + +struct upl_page_info { + ppnum_t phys_addr; /* physical page index number */ + unsigned int +#ifdef XNU_KERNEL_PRIVATE + pageout:1, /* page is to be removed on commit */ + absent:1, /* No valid data in this page */ + dirty:1, /* Page must be cleaned (O) */ + precious:1, /* must be cleaned, we have only copy */ + device:1, /* no page data, mapped dev memory */ + :0; /* force to long boundary */ +#else + opaque; /* use upl_page_xxx() accessor funcs */ +#endif /* XNU_KERNEL_PRIVATE */ +}; + +#else + +struct upl_page_info { + unsigned int opaque[2]; /* use upl_page_xxx() accessor funcs */ +}; + +#endif /* PRIVATE */ + +typedef struct upl_page_info upl_page_info_t; +typedef upl_page_info_t *upl_page_info_array_t; +typedef upl_page_info_array_t upl_page_list_ptr_t; + +typedef uint32_t upl_offset_t; /* page-aligned byte offset */ +typedef uint32_t upl_size_t; /* page-aligned byte size */ /* upl invocation flags */ /* top nibble is used by super upl */ -#define UPL_FLAGS_NONE 0x0 -#define UPL_COPYOUT_FROM 0x1 -#define UPL_PRECIOUS 0x2 -#define UPL_NO_SYNC 0x4 -#define UPL_CLEAN_IN_PLACE 0x8 -#define UPL_NOBLOCK 0x10 -#define UPL_RET_ONLY_DIRTY 0x20 -#define UPL_SET_INTERNAL 0x40 -#define UPL_QUERY_OBJECT_TYPE 0x80 -#define UPL_RET_ONLY_ABSENT 0x100 /* used only for COPY_FROM = FALSE */ -#define UPL_FILE_IO 0x200 -#define UPL_SET_LITE 0x400 -#define UPL_SET_INTERRUPTIBLE 0x800 -#define UPL_SET_IO_WIRE 0x1000 -#define UPL_FOR_PAGEOUT 0x2000 -#define UPL_WILL_BE_DUMPED 0x4000 +#define UPL_FLAGS_NONE 0x00000000 +#define UPL_COPYOUT_FROM 0x00000001 +#define UPL_PRECIOUS 0x00000002 +#define UPL_NO_SYNC 0x00000004 +#define UPL_CLEAN_IN_PLACE 0x00000008 +#define UPL_NOBLOCK 0x00000010 +#define UPL_RET_ONLY_DIRTY 0x00000020 +#define UPL_SET_INTERNAL 0x00000040 +#define UPL_QUERY_OBJECT_TYPE 0x00000080 +#define UPL_RET_ONLY_ABSENT 0x00000100 /* used only for COPY_FROM = FALSE */ +#define UPL_FILE_IO 0x00000200 +#define UPL_SET_LITE 0x00000400 +#define UPL_SET_INTERRUPTIBLE 0x00000800 +#define UPL_SET_IO_WIRE 0x00001000 +#define UPL_FOR_PAGEOUT 0x00002000 +#define UPL_WILL_BE_DUMPED 0x00004000 +#define UPL_FORCE_DATA_SYNC 0x00008000 +/* continued after the ticket bits... */ + +#define UPL_PAGE_TICKET_MASK 0x000F0000 +#define UPL_PAGE_TICKET_SHIFT 16 + +/* ... flags resume here */ +#define UPL_BLOCK_ACCESS 0x00100000 +#define UPL_ENCRYPT 0x00200000 +#define UPL_NOZEROFILL 0x00400000 +#define UPL_WILL_MODIFY 0x00800000 /* caller will modify the pages */ + +/* UPL flags known by this kernel */ +#define UPL_VALID_FLAGS 0x00FFFFFF /* upl abort error flags */ -#define UPL_ABORT_RESTART 0x1 +#define UPL_ABORT_RESTART 0x1 #define UPL_ABORT_UNAVAILABLE 0x2 #define UPL_ABORT_ERROR 0x4 #define UPL_ABORT_FREE_ON_EMPTY 0x8 /* only implemented in wrappers */ #define UPL_ABORT_DUMP_PAGES 0x10 #define UPL_ABORT_NOTIFY_EMPTY 0x20 +#define UPL_ABORT_ALLOW_ACCESS 0x40 /* upl pages check flags */ #define UPL_CHECK_DIRTY 0x1 -/* upl pagein/pageout flags */ + +/* + * upl pagein/pageout flags + * + * + * when I/O is issued from this UPL it should be done synchronously + */ #define UPL_IOSYNC 0x1 + +/* + * the passed in UPL should not have either a commit or abort + * applied to it by the underlying layers... the site that + * created the UPL is responsible for cleaning it up. + */ #define UPL_NOCOMMIT 0x2 -#define UPL_NORDAHEAD 0x4 + +/* + * turn off any speculative read-ahead applied at the I/O layer + */ +#define UPL_NORDAHEAD 0x4 + +/* + * pageout request is targeting a real file + * as opposed to a swap file. + */ + +#define UPL_VNODE_PAGER 0x8 +/* + * this pageout is being originated as part of an explicit + * memory synchronization operation... no speculative clustering + * should be applied, only the range specified should be pushed. + */ +#define UPL_MSYNC 0x10 + +/* + * + */ +#ifdef MACH_KERNEL_PRIVATE +#define UPL_PAGING_ENCRYPTED 0x20 +#endif /* MACH_KERNEL_PRIVATE */ + +/* + * this pageout is being originated as part of an explicit + * memory synchronization operation that is checking for I/O + * errors and taking it's own action... if an error occurs, + * just abort the pages back into the cache unchanged + */ +#define UPL_KEEPCACHED 0x40 + + /* upl commit flags */ #define UPL_COMMIT_FREE_ON_EMPTY 0x1 /* only implemented in wrappers */ @@ -350,6 +458,7 @@ typedef upl_page_info_array_t upl_page_list_ptr_t; #define UPL_COMMIT_SET_DIRTY 0x4 #define UPL_COMMIT_INACTIVATE 0x8 #define UPL_COMMIT_NOTIFY_EMPTY 0x10 +#define UPL_COMMIT_ALLOW_ACCESS 0x20 /* flags for return of state from vm_map_get_upl, vm_upl address space */ /* based call */ @@ -357,51 +466,18 @@ typedef upl_page_info_array_t upl_page_list_ptr_t; #define UPL_PHYS_CONTIG 0x2 -/* access macros for upl_t */ - -#define UPL_DEVICE_PAGE(upl) \ - (((upl)[(index)].phys_addr != 0) ? (!((upl)[0].device)) : FALSE) - -#define UPL_PAGE_PRESENT(upl, index) \ - ((upl)[(index)].phys_addr != 0) - -#define UPL_PHYS_PAGE(upl, index) \ - (((upl)[(index)].phys_addr != 0) ? \ - ((upl)[(index)].phys_addr) : (vm_offset_t)NULL) - -#define UPL_DIRTY_PAGE(upl, index) \ - (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].dirty) : FALSE) - -#define UPL_PRECIOUS_PAGE(upl, index) \ - (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].precious) : FALSE) - -#define UPL_VALID_PAGE(upl, index) \ - (((upl)[(index)].phys_addr != 0) ? (!((upl)[(index)].absent)) : FALSE) - -#define UPL_PAGEOUT_PAGE(upl, index) \ - (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].pageout) : FALSE) - -#define UPL_SET_PAGE_FREE_ON_COMMIT(upl, index) \ - if ((upl)[(index)].phys_addr != 0) \ - ((upl)[(index)].pageout) = TRUE - -#define UPL_CLR_PAGE_FREE_ON_COMMIT(upl, index) \ - if ((upl)[(index)].phys_addr != 0) \ - ((upl)[(index)].pageout) = FALSE - - /* * Flags for the UPL page ops routine. This routine is not exported * out of the kernel at the moment and so the defs live here. */ #define UPL_POP_DIRTY 0x1 #define UPL_POP_PAGEOUT 0x2 -#define UPL_POP_PRECIOUS 0x4 +#define UPL_POP_PRECIOUS 0x4 #define UPL_POP_ABSENT 0x8 -#define UPL_POP_BUSY 0x10 +#define UPL_POP_BUSY 0x10 #define UPL_POP_PHYSICAL 0x10000000 -#define UPL_POP_DUMP 0x20000000 +#define UPL_POP_DUMP 0x20000000 #define UPL_POP_SET 0x40000000 #define UPL_POP_CLR 0x80000000 @@ -423,26 +499,45 @@ typedef upl_page_info_array_t upl_page_list_ptr_t; * UPL_ROP_DUMP: Dump the pages which are found in the target object * for the target range. */ -#define UPL_ROP_DUMP 0x04 +#define UPL_ROP_DUMP 0x04 + +#ifdef PRIVATE +/* access macros for upl_t */ +#define UPL_DEVICE_PAGE(upl) \ + (((upl)[(index)].phys_addr != 0) ? (!((upl)[0].device)) : FALSE) -#ifdef KERNEL_PRIVATE +#define UPL_PAGE_PRESENT(upl, index) \ + ((upl)[(index)].phys_addr != 0) -extern void memory_object_reference(memory_object_t object); -extern void memory_object_deallocate(memory_object_t object); +#define UPL_PHYS_PAGE(upl, index) \ + ((upl)[(index)].phys_addr) -extern void memory_object_default_reference(memory_object_default_t); -extern void memory_object_default_deallocate(memory_object_default_t); +#define UPL_DIRTY_PAGE(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].dirty) : FALSE) -extern void memory_object_control_reference(memory_object_control_t control); -extern void memory_object_control_deallocate(memory_object_control_t control); +#define UPL_PRECIOUS_PAGE(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].precious) : FALSE) +#define UPL_VALID_PAGE(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? (!((upl)[(index)].absent)) : FALSE) + +#define UPL_PAGEOUT_PAGE(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? ((upl)[(index)].pageout) : FALSE) + +#define UPL_SET_PAGE_FREE_ON_COMMIT(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? \ + ((upl)[(index)].pageout = TRUE) : FALSE) + +#define UPL_CLR_PAGE_FREE_ON_COMMIT(upl, index) \ + (((upl)[(index)].phys_addr != 0) ? \ + ((upl)[(index)].pageout = FALSE) : FALSE) /* The call prototyped below is used strictly by UPL_GET_INTERNAL_PAGE_LIST */ extern vm_size_t upl_offset_to_pagelist; -extern vm_size_t upl_get_internal_pagelist_offset(); +extern vm_size_t upl_get_internal_pagelist_offset(void); /* UPL_GET_INTERNAL_PAGE_LIST is only valid on internal objects where the */ /* list request was made with the UPL_INTERNAL flag */ @@ -452,74 +547,25 @@ extern vm_size_t upl_get_internal_pagelist_offset(); (unsigned int)upl + (unsigned int)(upl_offset_to_pagelist = upl_get_internal_pagelist_offset()): \ (unsigned int)upl + (unsigned int)upl_offset_to_pagelist)) -extern boolean_t upl_page_present(upl_page_info_t *upl, int index); - -extern boolean_t upl_dirty_page(upl_page_info_t *upl, int index); - -extern boolean_t upl_valid_page(upl_page_info_t *upl, int index); - -extern vm_offset_t upl_phys_page(upl_page_info_t *upl, int index); +__BEGIN_DECLS +extern ppnum_t upl_phys_page(upl_page_info_t *upl, int index); extern void upl_set_dirty(upl_t upl); - extern void upl_clear_dirty(upl_t upl); +__END_DECLS -/* - * The following interface definitions should be generated automatically - * through Mig definitions or whatever follows the MIG tool as part of the - * component API. Until this is up and running however this explicit - * description will do. - */ +#endif /* PRIVATE */ -#include +__BEGIN_DECLS + +extern boolean_t upl_page_present(upl_page_info_t *upl, int index); +extern boolean_t upl_dirty_page(upl_page_info_t *upl, int index); +extern boolean_t upl_valid_page(upl_page_info_t *upl, int index); +extern void upl_deallocate(upl_t upl); + +__END_DECLS -/* supply a map and a range, a upl will be returned. */ -extern int kernel_vm_map_get_upl( - vm_map_t map, - vm_address_t offset, - vm_size_t *upl_size, - upl_t *upl, - upl_page_info_array_t page_list, - unsigned int *count, - int *flags, - int force_data_sync); - -extern int kernel_upl_map( - vm_map_t map, - upl_t upl, - vm_offset_t *dst_addr); - -extern int kernel_upl_unmap( - vm_map_t map, - upl_t upl); - -extern int kernel_upl_commit( - upl_t upl, - upl_page_info_t *pl, - mach_msg_type_number_t count); - -extern int kernel_upl_commit_range( - upl_t upl, - vm_offset_t offset, - vm_size_t size, - int flags, - upl_page_info_array_t pl, - mach_msg_type_number_t count); - -extern int kernel_upl_abort( - upl_t upl, - int abort_type); - -extern int kernel_upl_abort_range( - upl_t upl, - vm_offset_t offset, - vm_size_t size, - int abort_flags); - - -#endif /* KERNEL_PRIVATE */ - -#endif /* __APPLE_API_EVOLVING */ +#endif /* KERNEL */ #endif /* _MACH_MEMORY_OBJECT_TYPES_H_ */ diff --git a/osfmk/mach/message.h b/osfmk/mach/message.h index 64c5bebe3..77f71a4e1 100644 --- a/osfmk/mach/message.h +++ b/osfmk/mach/message.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,23 +58,14 @@ #ifndef _MACH_MESSAGE_H_ #define _MACH_MESSAGE_H_ -#ifdef MACH_KERNEL -/* Have to have MIG parameter check for kernel */ -#define TypeCheck 1 -#define _MIG_KERNEL_SPECIFIC_CODE_ 1 -#endif /* MACH_KERNEL */ - -/* static templates are slower and bigger */ -/* #define UseStaticTemplates 0 */ - -#include - #include #include #include #include #include +#include + /* * The timeout mechanism uses mach_msg_timeout_t values, * passed by value. The timeout units are milliseconds. @@ -199,10 +190,11 @@ typedef unsigned int mach_msg_descriptor_type_t; #define MACH_MSG_OOL_PORTS_DESCRIPTOR 2 #define MACH_MSG_OOL_VOLATILE_DESCRIPTOR 3 +#pragma pack(4) typedef struct { - void* pad1; + natural_t pad1; mach_msg_size_t pad2; unsigned int pad3 : 24; mach_msg_descriptor_type_t type : 8; @@ -219,24 +211,79 @@ typedef struct typedef struct { - void* address; + uint32_t address; + mach_msg_size_t size; + boolean_t deallocate: 8; + mach_msg_copy_options_t copy: 8; + unsigned int pad1: 8; + mach_msg_descriptor_type_t type: 8; +} mach_msg_ool_descriptor32_t; + +typedef struct +{ + uint64_t address; + boolean_t deallocate: 8; + mach_msg_copy_options_t copy: 8; + unsigned int pad1: 8; + mach_msg_descriptor_type_t type: 8; + mach_msg_size_t size; +} mach_msg_ool_descriptor64_t; + +typedef struct +{ + void* address; +#if !defined(__LP64__) mach_msg_size_t size; +#endif boolean_t deallocate: 8; mach_msg_copy_options_t copy: 8; unsigned int pad1: 8; mach_msg_descriptor_type_t type: 8; +#if defined(__LP64__) + mach_msg_size_t size; +#endif } mach_msg_ool_descriptor_t; +typedef struct +{ + uint32_t address; + mach_msg_size_t count; + boolean_t deallocate: 8; + mach_msg_copy_options_t copy: 8; + mach_msg_type_name_t disposition : 8; + mach_msg_descriptor_type_t type : 8; +} mach_msg_ool_ports_descriptor32_t; + +typedef struct +{ + uint64_t address; + boolean_t deallocate: 8; + mach_msg_copy_options_t copy: 8; + mach_msg_type_name_t disposition : 8; + mach_msg_descriptor_type_t type : 8; + mach_msg_size_t count; +} mach_msg_ool_ports_descriptor64_t; + typedef struct { void* address; +#if !defined(__LP64__) mach_msg_size_t count; +#endif boolean_t deallocate: 8; mach_msg_copy_options_t copy: 8; mach_msg_type_name_t disposition : 8; mach_msg_descriptor_type_t type : 8; +#if defined(__LP64__) + mach_msg_size_t count; +#endif } mach_msg_ool_ports_descriptor_t; +/* + * LP64support - This union definition is not really + * appropriate in LP64 mode because not all descriptors + * are of the same size in that environment. + */ typedef union { mach_msg_port_descriptor_t port; @@ -375,6 +422,8 @@ typedef union mach_msg_empty_rcv_t rcv; } mach_msg_empty_t; +#pragma pack() + /* utility to round the message size - will become machine dependent */ #define round_msg(x) (((mach_msg_size_t)(x) + sizeof (natural_t) - 1) & \ ~(sizeof (natural_t) - 1)) @@ -385,7 +434,6 @@ typedef union #define MACH_MSG_SIZE_MAX ((mach_msg_size_t) ~0) -#ifdef __APPLE_API_OBSOLETE /* * Compatibility definitions, for code written * when there was a msgh_kind instead of msgh_seqno. @@ -394,7 +442,6 @@ typedef union #define MACH_MSGH_KIND_NOTIFICATION 0x00000001 #define msgh_kind msgh_seqno #define mach_msg_kind_t mach_port_seqno_t -#endif /* __APPLE_API_OBSOLETE */ /* * The msgt_number field specifies the number of data elements. @@ -601,6 +648,9 @@ typedef kern_return_t mach_msg_return_t; #define MACH_RCV_IN_PROGRESS_TIMED 0x10004011 /* Waiting for receive with timeout. (Internal use only.) */ + +__BEGIN_DECLS + /* * Routine: mach_msg_overwrite * Purpose: @@ -617,18 +667,6 @@ typedef kern_return_t mach_msg_return_t; * already contain scatter control information to direct the * receiving of the message. */ -#ifdef __APPLE_API_PRIVATE -extern mach_msg_return_t mach_msg_overwrite_trap( - mach_msg_header_t *msg, - mach_msg_option_t option, - mach_msg_size_t send_size, - mach_msg_size_t rcv_size, - mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, - mach_port_name_t notify, - mach_msg_header_t *rcv_msg, - mach_msg_size_t rcv_limit); -#endif /* __APPLE_API_PRIVATE */ extern mach_msg_return_t mach_msg_overwrite( mach_msg_header_t *msg, @@ -641,6 +679,8 @@ extern mach_msg_return_t mach_msg_overwrite( mach_msg_header_t *rcv_msg, mach_msg_size_t rcv_limit); +#ifndef KERNEL + /* * Routine: mach_msg * Purpose: @@ -649,17 +689,6 @@ extern mach_msg_return_t mach_msg_overwrite( * of that fact, then restart the appropriate parts of the * operation silently (trap version does not restart). */ -#ifdef __APPLE_API_PRIVATE -extern mach_msg_return_t mach_msg_trap( - mach_msg_header_t *msg, - mach_msg_option_t option, - mach_msg_size_t send_size, - mach_msg_size_t rcv_size, - mach_port_name_t rcv_name, - mach_msg_timeout_t timeout, - mach_port_name_t notify); -#endif /* __APPLE_API_PRIVATE */ - extern mach_msg_return_t mach_msg( mach_msg_header_t *msg, mach_msg_option_t option, @@ -669,4 +698,8 @@ extern mach_msg_return_t mach_msg( mach_msg_timeout_t timeout, mach_port_name_t notify); +#endif /* KERNEL */ + +__END_DECLS + #endif /* _MACH_MESSAGE_H_ */ diff --git a/osfmk/mach/mig.h b/osfmk/mach/mig.h index fa6ac0b03..ad6cc4aa2 100644 --- a/osfmk/mach/mig.h +++ b/osfmk/mach/mig.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -35,6 +35,31 @@ #include #include +#include + +#if defined(MACH_KERNEL) + +/* Turn MIG type checking on by default for kernel */ +#define __MigTypeCheck 1 +#define __MigKernelSpecificCode 1 +#define _MIG_KERNEL_SPECIFIC_CODE_ 1 + +/* Otherwise check legacy setting (temporary) */ +#elif defined(TypeCheck) + +#define __MigTypeCheck TypeCheck + +#endif /* defined(TypeCheck) */ + +/* + * Pack MIG message structs if we have Power alignment of structs. + * This is an indicator of the need to view shared structs in a + * binary-compatible format - and MIG message structs are no different. + */ +#if __DARWIN_ALIGN_POWER +#define __MigPackStructs 1 +#endif + /* * Definition for MIG-generated server stub routines. These routines * unpack the request message, call the server procedure, and pack the @@ -103,28 +128,12 @@ typedef struct mig_symtab { */ } mig_symtab_t; -/* Client side reply port allocate */ -extern mach_port_t mig_get_reply_port(void); - -/* Client side reply port deallocate */ -extern void mig_dealloc_reply_port(mach_port_t reply_port); - -/* Client side reply port "deallocation" */ -extern void mig_put_reply_port(mach_port_t reply_port); - -/* Bounded string copy */ -extern int mig_strncpy(char *dest, const char *src, int len); - -#ifdef KERNEL_PRIVATE -#include +#ifdef PRIVATE -/* Allocate memory for out-of-stack mig structures */ -extern char *mig_user_allocate(vm_size_t size); +/* MIG object runtime - not ready for public consumption */ -/* Deallocate memory used for out-of-stack mig structures */ -extern void mig_user_deallocate(char *data, vm_size_t size); +#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_EVOLVING /* * MIG object runtime definitions * @@ -156,11 +165,11 @@ typedef struct IMIGObjectVtbl IMIGObjectVtbl; typedef struct IMIGNotifyObjectVtbl IMIGNotifyObjectVtbl; typedef struct IMIGObject { - IMIGObjectVtbl *pVtbl; + const IMIGObjectVtbl *pVtbl; } IMIGObject; typedef struct IMIGNotifyObject { - IMIGNotifyObjectVtbl *pVtbl; + const IMIGNotifyObjectVtbl *pVtbl; } IMIGNotifyObject; struct IMIGObjectVtbl { @@ -236,8 +245,41 @@ struct IMIGNotifyObjectVtbl { mig_notify_type_t notify_type); }; -#endif /* __APPLE_API_EVOLVING */ +#endif /* KERNEL_PRIVATE */ +#endif /* PRIVATE */ + +__BEGIN_DECLS + +/* Client side reply port allocate */ +extern mach_port_t mig_get_reply_port(void); + +/* Client side reply port deallocate */ +extern void mig_dealloc_reply_port(mach_port_t reply_port); + +/* Client side reply port "deallocation" */ +extern void mig_put_reply_port(mach_port_t reply_port); + +/* Bounded string copy */ +extern int mig_strncpy(char *dest, const char *src, int len); + +#ifdef KERNEL_PRIVATE + +/* Allocate memory for out-of-stack mig structures */ +extern char *mig_user_allocate(vm_size_t size); + +/* Deallocate memory used for out-of-stack mig structures */ +extern void mig_user_deallocate(char *data, vm_size_t size); + +#else + +/* Allocate memory for out-of-line mig structures */ +extern void mig_allocate(vm_address_t *, vm_size_t); + +/* Deallocate memory used for out-of-line mig structures */ +extern void mig_deallocate(vm_address_t, vm_size_t); #endif /* KERNEL_PRIVATE */ -#endif /* _MACH_MIG_H_ */ +__END_DECLS + +#endif /* _MACH_MIG_H_ */ diff --git a/osfmk/mach/mig_errors.h b/osfmk/mach/mig_errors.h index d7394efc4..8e588251a 100644 --- a/osfmk/mach/mig_errors.h +++ b/osfmk/mach/mig_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -62,6 +62,8 @@ #include #include +#include + /* * These error codes should be specified as system 4, subsytem 2. * But alas backwards compatibility makes that impossible. @@ -87,11 +89,16 @@ * mig_reply_error_t format message. Clients must accept * these in addition to the expected reply message format. */ +#pragma pack(4) typedef struct { mach_msg_header_t Head; NDR_record_t NDR; kern_return_t RetCode; } mig_reply_error_t; +#pragma pack() + + +__BEGIN_DECLS #define __NDR_convert__mig_reply_error_t__defined #if mig_internal @@ -108,5 +115,6 @@ __NDR_convert__mig_reply_error_t(mig_reply_error_t *x) #endif /* __NDR_convert__int_rep__kern_return_t__defined */ } -#endif /* _MACH_MIG_ERRORS_H_ */ +__END_DECLS +#endif /* _MACH_MIG_ERRORS_H_ */ diff --git a/osfmk/mach/mk_timer.h b/osfmk/mach/mk_timer.h index 4e1d97731..1d2db1822 100644 --- a/osfmk/mach/mk_timer.h +++ b/osfmk/mach/mk_timer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +32,9 @@ #define _MACH_MK_TIMER_H_ #include +#include + +__BEGIN_DECLS mach_port_name_t mk_timer_create(void); @@ -46,10 +49,14 @@ kern_return_t mk_timer_cancel( mach_port_name_t name, uint64_t *result_time); +__END_DECLS + +#pragma pack(4) struct mk_timer_expire_msg { mach_msg_header_t header; uint64_t unused[3]; }; +#pragma pack() typedef struct mk_timer_expire_msg mk_timer_expire_msg_t; diff --git a/osfmk/mach/ndr.h b/osfmk/mach/ndr.h index efb4fb180..9b3d18037 100644 --- a/osfmk/mach/ndr.h +++ b/osfmk/mach/ndr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,6 +27,7 @@ #define _MACH_NDR_H_ #include +#include typedef struct { unsigned char mig_vers; @@ -101,6 +102,8 @@ extern NDR_record_t NDR_record; #define __NDR_READSWAP__uint64_t(a) OSReadSwapInt64((void *)a, 0) #define __NDR_READSWAP__int64_t(a) (int64_t)OSReadSwapInt64((void *)a, 0) +__BEGIN_DECLS + static __inline__ float __NDR_READSWAP__float(float *argp) { union { float sv; @@ -119,6 +122,8 @@ static __inline__ double __NDR_READSWAP__double(double *argp) { return result.sv; } +__END_DECLS + #define __NDR_convert__int_rep__int16_t__defined #define __NDR_convert__int_rep__int16_t(v,f) \ __NDR_READSWAP_assign(v, __NDR_READSWAP__int16_t) diff --git a/osfmk/mach/notify.h b/osfmk/mach/notify.h index 28d823d92..89515ec57 100644 --- a/osfmk/mach/notify.h +++ b/osfmk/mach/notify.h @@ -82,8 +82,6 @@ typedef mach_port_t notify_port_t; -#include -#ifdef __APPLE_API_OBSOLETE /* * Hard-coded message structures for receiving Mach port notification * messages. However, they are not actual large enough to receive @@ -125,6 +123,4 @@ typedef struct { mach_msg_format_0_trailer_t trailer; } mach_dead_name_notification_t; -#endif /* __APPLE_API_OBSOLETE */ - #endif /* _MACH_NOTIFY_H_ */ diff --git a/osfmk/mach/policy.h b/osfmk/mach/policy.h index 12df6e4f5..d82b9dda1 100644 --- a/osfmk/mach/policy.h +++ b/osfmk/mach/policy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,15 +57,16 @@ * mach/policy.h * * Definitions for scheduing policy. - * - * N.B. The interfaces defined here are all obsolete!! + */ + +/* + * All interfaces defined here are obsolete. */ #include +#include #include -#include - /* * Old scheduling control interface */ @@ -74,8 +75,6 @@ typedef integer_t *policy_info_t; typedef integer_t *policy_base_t; typedef integer_t *policy_limit_t; -#ifdef __APPLE_API_UNSTABLE - /* * Policy definitions. Policies should be powers of 2, * but cannot be or'd together other than to test for a @@ -128,12 +127,12 @@ typedef struct policy_timeshare_limit policy_timeshare_limit_data_t; typedef struct policy_timeshare_info policy_timeshare_info_data_t; -#define POLICY_TIMESHARE_BASE_COUNT \ - (sizeof(struct policy_timeshare_base)/sizeof(integer_t)) -#define POLICY_TIMESHARE_LIMIT_COUNT \ - (sizeof(struct policy_timeshare_limit)/sizeof(integer_t)) -#define POLICY_TIMESHARE_INFO_COUNT \ - (sizeof(struct policy_timeshare_info)/sizeof(integer_t)) +#define POLICY_TIMESHARE_BASE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_timeshare_base)/sizeof(integer_t))) +#define POLICY_TIMESHARE_LIMIT_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_timeshare_limit)/sizeof(integer_t))) +#define POLICY_TIMESHARE_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_timeshare_info)/sizeof(integer_t))) /* @@ -162,12 +161,12 @@ typedef struct policy_rr_base policy_rr_base_data_t; typedef struct policy_rr_limit policy_rr_limit_data_t; typedef struct policy_rr_info policy_rr_info_data_t; -#define POLICY_RR_BASE_COUNT \ - (sizeof(struct policy_rr_base)/sizeof(integer_t)) -#define POLICY_RR_LIMIT_COUNT \ - (sizeof(struct policy_rr_limit)/sizeof(integer_t)) -#define POLICY_RR_INFO_COUNT \ - (sizeof(struct policy_rr_info)/sizeof(integer_t)) +#define POLICY_RR_BASE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_rr_base)/sizeof(integer_t))) +#define POLICY_RR_LIMIT_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_rr_limit)/sizeof(integer_t))) +#define POLICY_RR_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_rr_info)/sizeof(integer_t))) /* @@ -194,12 +193,12 @@ typedef struct policy_fifo_base policy_fifo_base_data_t; typedef struct policy_fifo_limit policy_fifo_limit_data_t; typedef struct policy_fifo_info policy_fifo_info_data_t; -#define POLICY_FIFO_BASE_COUNT \ - (sizeof(struct policy_fifo_base)/sizeof(integer_t)) -#define POLICY_FIFO_LIMIT_COUNT \ - (sizeof(struct policy_fifo_limit)/sizeof(integer_t)) -#define POLICY_FIFO_INFO_COUNT \ - (sizeof(struct policy_fifo_info)/sizeof(integer_t)) +#define POLICY_FIFO_BASE_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_fifo_base)/sizeof(integer_t))) +#define POLICY_FIFO_LIMIT_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_fifo_limit)/sizeof(integer_t))) +#define POLICY_FIFO_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(struct policy_fifo_info)/sizeof(integer_t))) /* * Aggregate policy types @@ -227,6 +226,4 @@ typedef struct policy_bases policy_base_data_t; typedef struct policy_limits policy_limit_data_t; typedef struct policy_infos policy_info_data_t; -#endif /* __APPLE_API_UNSTABLE */ - -#endif /* _MACH_POLICY_H_ */ +#endif /* _MACH_POLICY_H_ */ diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h index df7d75142..b54438dc3 100644 --- a/osfmk/mach/port.h +++ b/osfmk/mach/port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -52,10 +52,23 @@ /* * File: mach/port.h * - * Definition of a port + * Definition of a Mach port + * + * Mach ports are the endpoints to Mach-implemented communications + * channels (usually uni-directional message queues, but other types + * also exist). + * + * Unique collections of these endpoints are maintained for each + * Mach task. Each Mach port in the task's collection is given a + * [task-local] name to identify it - and the the various "rights" + * held by the task for that specific endpoint. + * + * This header defines the types used to identify these Mach ports + * and the various rights associated with them. For more info see: + * + * - manipulation of port rights in a given space + * - message queue [and port right passing] mechanism * - * [The basic port_t type should probably be machine-dependent, - * as it must be represented by a 32-bit integer.] */ #ifndef _MACH_PORT_H_ @@ -65,82 +78,97 @@ #include #include -#include - /* - * A port_name_t is a 32 bit value which represents a name of a - * port right within some ipc space. This is a constant definition - * everywhere. - * - * The type port_t represents a reference added or deleted to a - * port right. + * mach_port_name_t - the local identity for a Mach port * - * At user space, this is represented by returning the name of - * the right(s) that got altered within the user's ipc space. - * So a port_t is the same type as a port_name_t there. + * The name is Mach port namespace specific. It is used to + * identify the rights held for that port by the task whose + * namespace is implied [or specifically provided]. * - * Since there is no right space for the kernel proper (all rights - * are naked rights) these rights are represented by passing a - * pointer to the specific ipc_object_t subclass (typically - * ipc_port_t) that got altered/is to be altered. + * Use of this type usually implies just a name - no rights. + * See mach_port_t for a type that implies a "named right." * - * JMM - Because of this pointer/integer overloading, port names - * should be defined as uintptr_t types. But that would make - * message headers and descriptors pointer-length dependent. */ -typedef natural_t port_name_t; -typedef port_name_t *port_name_array_t; + +typedef natural_t mach_port_name_t; +typedef mach_port_name_t *mach_port_name_array_t; -#ifdef KERNEL_PRIVATE +#ifdef KERNEL_PRIVATE + +/* + * mach_port_t - a named port right + * + * In the kernel, "rights" are represented [named] by pointers to + * the ipc port object in question. There is no port namespace for the + * rights to be collected. + * + * Actually, there is namespace for the kernel task. But most kernel + * code - including, but not limited to, Mach IPC code - lives in the + * limbo between the current user-level task and the "next" task. Very + * little of the kernel code runs in full kernel task context. So very + * little of it gets to use the kernel task's port name space. + * + * Because of this implementation approach, all in-kernel rights for + * a given port coalesce [have the same name/pointer]. The actual + * references are counted in the port itself. It is up to the kernel + * code in question to "just remember" how many [and what type of] + * rights it holds and handle them appropriately. + * + */ -#if !defined(__APPLE_API_PRIVATE) || !defined(MACH_KERNEL_PRIVATE) +#ifndef MACH_KERNEL_PRIVATE /* - * For kernel code that resides outside of mach - * we define empty structs so that everything will - * remain strongly typed, without giving out - * implementation details. + * For kernel code that resides outside of Mach proper, we opaque the + * port structure definition. */ struct ipc_port ; -#endif /* !__APPLE_API_PRIVATE || !MACH_KERNEL_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ typedef struct ipc_port *ipc_port_t; -typedef ipc_port_t port_t; -#define IPC_PORT_NULL ((ipc_port_t) 0) -#define IPC_PORT_DEAD ((ipc_port_t)~0) -#define IPC_PORT_VALID(port) (((port) != IPC_PORT_NULL) && \ - ((port) != IPC_PORT_DEAD)) +#define IPC_PORT_NULL ((ipc_port_t) 0) +#define IPC_PORT_DEAD ((ipc_port_t)~0) +#define IPC_PORT_VALID(port) \ + ((port) != IPC_PORT_NULL && (port) != IPC_PORT_DEAD) -#else /* ! KERNEL_PRIVATE */ +typedef ipc_port_t mach_port_t; -typedef port_name_t port_t; +#else /* KERNEL_PRIVATE */ -#endif /* KERNEL_PRIVATE */ +/* + * mach_port_t - a named port right + * + * In user-space, "rights" are represented by the name of the + * right in the Mach port namespace. Even so, this type is + * presented as a unique one to more clearly denote the presence + * of a right coming along with the name. + * + * Often, various rights for a port held in a single name space + * will coalesce and are, therefore, be identified by a single name + * [this is the case for send and receive rights]. But not + * always [send-once rights currently get a unique name for + * each right]. + * + */ + +#ifndef _MACH_PORT_T +#define _MACH_PORT_T +typedef mach_port_name_t mach_port_t; +#endif + +#endif /* KERNEL_PRIVATE */ + +typedef mach_port_t *mach_port_array_t; /* - * PORT_NULL is a legal value that can be carried in messages. + * MACH_PORT_NULL is a legal value that can be carried in messages. * It indicates the absence of any port or port rights. (A port * argument keeps the message from being "simple", even if the - * value is PORT_NULL.) The value PORT_DEAD is also a legal + * value is MACH_PORT_NULL.) The value MACH_PORT_DEAD is also a legal * value that can be carried in messages. It indicates * that a port right was present, but it died. */ -#define PORT_NULL ((port_t) 0) -#define PORT_DEAD ((port_t) ~0) -#define PORT_VALID(name) \ - (((port_t)(name) != PORT_NULL) && \ - ((port_t)(name) != PORT_DEAD)) - -/* - * Mach 3.0 renamed everything to have mach_ in front of it. - * Do that mapping here, so we have the types and macros in - * both formats. - */ -typedef port_t mach_port_t; -typedef port_t *mach_port_array_t; -typedef port_name_t mach_port_name_t; -typedef mach_port_name_t *mach_port_name_array_t; #define MACH_PORT_NULL 0 /* intentional loose typing */ #define MACH_PORT_DEAD ((mach_port_name_t) ~0) @@ -148,35 +176,39 @@ typedef mach_port_name_t *mach_port_name_array_t; (((name) != MACH_PORT_NULL) && \ ((name) != MACH_PORT_DEAD)) + /* - * mach_port_name_t must be an unsigned type. Port values - * have two parts, a generation number and an index. - * These macros encapsulate all knowledge of how - * a mach_port_name_t is laid out. They are made visible - * to user tasks so that packages to map from a mach_port_name_t - * to associated user data can discount the generation - * nuber (if desired) in doing the mapping. + * For kernel-selected [assigned] port names, the name is + * comprised of two parts: a generation number and an index. + * This approach keeps the exact same name from being generated + * and reused too quickly [to catch right/reference counting bugs]. + * The dividing line between the constituent parts is exposed so + * that efficient "mach_port_name_t to data structure pointer" + * conversion implementation can be made. But it is possible + * for user-level code to assign their own names to Mach ports. + * These are not required to participate in this algorithm. So + * care should be taken before "assuming" this model. * - * Within the kernel, ipc/ipc_entry.c implicitly assumes - * when it uses the splay tree functions that the generation - * number is in the low bits, so that names are ordered first - * by index and then by generation. If the size of generation - * numbers changes, be sure to update IE_BITS_GEN_MASK and - * friends in ipc/ipc_entry.h. */ -#ifndef NO_PORT_GEN + +#ifndef NO_PORT_GEN + #define MACH_PORT_INDEX(name) ((name) >> 8) #define MACH_PORT_GEN(name) (((name) & 0xff) << 24) #define MACH_PORT_MAKE(index, gen) \ (((index) << 8) | (gen) >> 24) -#else + +#else /* NO_PORT_GEN */ + #define MACH_PORT_INDEX(name) (name) #define MACH_PORT_GEN(name) (0) #define MACH_PORT_MAKE(index, gen) (index) -#endif /* !NO_PORT_GEN */ + +#endif /* NO_PORT_GEN */ + /* - * These are the different rights a task may have. + * These are the different rights a task may have for a port. * The MACH_PORT_RIGHT_* definitions are used as arguments * to mach_port_allocate, mach_port_get_refs, etc, to specify * a particular right to act upon. The mach_port_names and @@ -253,11 +285,11 @@ typedef struct mach_port_status { boolean_t mps_srights; /* do send rights exist? */ boolean_t mps_pdrequest; /* port-deleted requested? */ boolean_t mps_nsrequest; /* no-senders requested? */ - unsigned int mps_flags; /* port flags */ + natural_t mps_flags; /* port flags */ } mach_port_status_t; #define MACH_PORT_QLIMIT_DEFAULT ((mach_port_msgcount_t) 5) -#define MACH_PORT_QLIMIT_MAX ((mach_port_msgcount_t) 16) +#define MACH_PORT_QLIMIT_MAX ((mach_port_msgcount_t) 16) typedef struct mach_port_limits { mach_port_msgcount_t mpl_qlimit; /* number of msgs */ @@ -271,17 +303,16 @@ typedef int mach_port_flavor_t; #define MACH_PORT_RECEIVE_STATUS 2 /* uses mach_port_limits_t */ #define MACH_PORT_DNREQUESTS_SIZE 3 /* info is int */ -#define MACH_PORT_LIMITS_INFO_COUNT \ - (sizeof(mach_port_limits_t)/sizeof(natural_t)) -#define MACH_PORT_RECEIVE_STATUS_COUNT \ - (sizeof(mach_port_status_t)/sizeof(natural_t)) +#define MACH_PORT_LIMITS_INFO_COUNT ((natural_t) \ + (sizeof(mach_port_limits_t)/sizeof(natural_t))) +#define MACH_PORT_RECEIVE_STATUS_COUNT ((natural_t) \ + (sizeof(mach_port_status_t)/sizeof(natural_t))) #define MACH_PORT_DNREQUESTS_SIZE_COUNT 1 /* * Structure used to pass information about port allocation requests. * Must be padded to 64-bits total length. */ - typedef struct mach_port_qos { boolean_t name:1; /* name given */ boolean_t prealloc:1; /* prealloced message */ @@ -289,4 +320,21 @@ typedef struct mach_port_qos { natural_t len; } mach_port_qos_t; +#if !defined(_POSIX_C_SOURCE) && !defined(_NO_PORT_T_FROM_MACH) +/* + * Mach 3.0 renamed everything to have mach_ in front of it. + * These types and macros are provided for backward compatibility + * but are deprecated. + */ +typedef mach_port_t port_t; +typedef mach_port_name_t port_name_t; +typedef mach_port_name_t *port_name_array_t; + +#define PORT_NULL ((port_t) 0) +#define PORT_DEAD ((port_t) ~0) +#define PORT_VALID(name) \ + ((port_t)(name) != PORT_NULL && (port_t)(name) != PORT_DEAD) + +#endif /* !_POSIX_C_SOURCE && !_NO_PORT_T_FROM_MACH */ + #endif /* _MACH_PORT_H_ */ diff --git a/osfmk/mach/ppc/Makefile b/osfmk/mach/ppc/Makefile index 2479b84ca..bdfa7ed0a 100644 --- a/osfmk/mach/ppc/Makefile +++ b/osfmk/mach/ppc/Makefile @@ -13,7 +13,7 @@ DATAFILES = \ boolean.h exception.h kern_return.h ndr_def.h \ processor_info.h rpc.h thread_state.h thread_status.h \ vm_param.h vm_types.h machine_types.defs \ - syscall_sw.h + syscall_sw.h _types.h INSTALL_MD_LIST = ${DATAFILES} diff --git a/osfmk/mach/ppc/_types.h b/osfmk/mach/ppc/_types.h new file mode 100644 index 000000000..098c1670a --- /dev/null +++ b/osfmk/mach/ppc/_types.h @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ + +#ifndef _MACH_PPC__TYPES_H_ +#define _MACH_PPC__TYPES_H_ + +/* + * ppc_thread_state is the structure that is exported to user threads for + * use in status/mutate calls. This structure should never change. + * + */ + +#ifndef _POSIX_C_SOURCE +struct ppc_thread_state +#else /* _POSIX_C_SOURCE */ +struct __darwin_ppc_thread_state +#endif /* _POSIX_C_SOURCE */ +{ + unsigned int srr0; /* Instruction address register (PC) */ + unsigned int srr1; /* Machine state register (supervisor) */ + unsigned int r0; + unsigned int r1; + unsigned int r2; + unsigned int r3; + unsigned int r4; + unsigned int r5; + unsigned int r6; + unsigned int r7; + unsigned int r8; + unsigned int r9; + unsigned int r10; + unsigned int r11; + unsigned int r12; + unsigned int r13; + unsigned int r14; + unsigned int r15; + unsigned int r16; + unsigned int r17; + unsigned int r18; + unsigned int r19; + unsigned int r20; + unsigned int r21; + unsigned int r22; + unsigned int r23; + unsigned int r24; + unsigned int r25; + unsigned int r26; + unsigned int r27; + unsigned int r28; + unsigned int r29; + unsigned int r30; + unsigned int r31; + + unsigned int cr; /* Condition register */ + unsigned int xer; /* User's integer exception register */ + unsigned int lr; /* Link register */ + unsigned int ctr; /* Count register */ + unsigned int mq; /* MQ register (601 only) */ + + unsigned int vrsave; /* Vector Save Register */ +}; + +#ifndef _POSIX_C_SOURCE +#pragma pack(4) /* Make sure the structure stays as we defined it */ +struct ppc_thread_state64 { + unsigned long long srr0; /* Instruction address register (PC) */ + unsigned long long srr1; /* Machine state register (supervisor) */ + unsigned long long r0; + unsigned long long r1; + unsigned long long r2; + unsigned long long r3; + unsigned long long r4; + unsigned long long r5; + unsigned long long r6; + unsigned long long r7; + unsigned long long r8; + unsigned long long r9; + unsigned long long r10; + unsigned long long r11; + unsigned long long r12; + unsigned long long r13; + unsigned long long r14; + unsigned long long r15; + unsigned long long r16; + unsigned long long r17; + unsigned long long r18; + unsigned long long r19; + unsigned long long r20; + unsigned long long r21; + unsigned long long r22; + unsigned long long r23; + unsigned long long r24; + unsigned long long r25; + unsigned long long r26; + unsigned long long r27; + unsigned long long r28; + unsigned long long r29; + unsigned long long r30; + unsigned long long r31; + + unsigned int cr; /* Condition register */ + unsigned long long xer; /* User's integer exception register */ + unsigned long long lr; /* Link register */ + unsigned long long ctr; /* Count register */ + + unsigned int vrsave; /* Vector Save Register */ +}; + +#pragma pack() +#endif /* _POSIX_C_SOURCE */ + +/* This structure should be double-word aligned for performance */ + +#ifndef _POSIX_C_SOURCE +struct ppc_float_state +#else /* _POSIX_C_SOURCE */ +struct __darwin_ppc_float_state +#endif /* _POSIX_C_SOURCE */ +{ + double fpregs[32]; + + unsigned int fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ + unsigned int fpscr; /* floating point status register */ +}; + +#pragma pack(4) /* Make sure the structure stays as we defined it */ + +#ifndef _POSIX_C_SOURCE +struct ppc_vector_state +#else /* _POSIX_C_SOURCE */ +struct __darwin_ppc_vector_state +#endif /* _POSIX_C_SOURCE */ +{ +#if defined(__LP64__) + unsigned int save_vr[32][4]; + unsigned int save_vscr[4]; +#else + unsigned long save_vr[32][4]; + unsigned long save_vscr[4]; +#endif + unsigned int save_pad5[4]; + unsigned int save_vrvalid; /* VRs that have been saved */ + unsigned int save_pad6[7]; +}; +#pragma pack() + +/* + * ppc_exception_state + * + * This structure corresponds to some additional state of the user + * registers as saved in the PCB upon kernel entry. They are only + * available if an exception is passed out of the kernel, and even + * then not all are guaranteed to be updated. + * + * Some padding is included in this structure which allows space for + * servers to store temporary values if need be, to maintain binary + * compatiblity. + */ + +/* Exception state for 32-bit thread (on 32-bit processor) */ +/* Still available on 64-bit processors, but may fall short */ +/* of covering the full potential state (hi half available). */ + +#pragma pack(4) /* Make sure the structure stays as we defined it */ + +#ifndef _POSIX_C_SOURCE +struct ppc_exception_state +#else /* _POSIX_C_SOURCE */ +struct __darwin_ppc_exception_state +#endif /* _POSIX_C_SOURCE */ +{ +#if defined(__LP64__) + unsigned int dar; /* Fault registers for coredump */ + unsigned int dsisr; + unsigned int exception; /* number of powerpc exception taken */ + unsigned int pad0; /* align to 16 bytes */ + unsigned int pad1[4]; /* space in PCB "just in case" */ +#else + unsigned long dar; /* Fault registers for coredump */ + unsigned long dsisr; + unsigned long exception; /* number of powerpc exception taken */ + unsigned long pad0; /* align to 16 bytes */ + unsigned long pad1[4]; /* space in PCB "just in case" */ +#endif +}; + +#ifndef _POSIX_C_SOURCE +struct ppc_exception_state64 { + unsigned long long dar; /* Fault registers for coredump */ +#if defined(__LP64__) + unsigned int dsisr; + unsigned int exception; /* number of powerpc exception taken */ + unsigned int pad1[4]; /* space in PCB "just in case" */ +#else + unsigned long dsisr; + unsigned long exception; /* number of powerpc exception taken */ + unsigned long pad1[4]; /* space in PCB "just in case" */ +#endif +}; +#endif /* _POSIX_C_SOURCE */ + +#pragma pack() + +#endif /* _MACH_PPC__TYPES_H_ */ diff --git a/osfmk/mach/ppc/boolean.h b/osfmk/mach/ppc/boolean.h index 8e5a91443..3767a2115 100644 --- a/osfmk/mach/ppc/boolean.h +++ b/osfmk/mach/ppc/boolean.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,73 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:46 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.1 1996/12/09 16:50:03 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * [1996/12/09 10:50:49 stephen] - * - * Revision 1.1.6.1 1996/04/11 11:19:44 emcmanus - * Copied from mainline.ppc. - * [1996/04/10 16:56:37 emcmanus] - * - * Revision 1.1.4.1 1995/11/23 17:36:42 stephen - * first powerpc checkin to mainline.ppc - * [1995/11/23 16:44:33 stephen] - * - * Revision 1.1.2.1 1995/08/25 06:49:32 stephen - * Initial checkin of files for PowerPC port - * [1995/08/23 16:27:03 stephen] - * - * Initial checkin of files for PowerPC port - * [1995/08/23 15:03:41 stephen] - * - * Revision 1.2.6.1 1994/09/23 02:36:44 ezf - * change marker to not FREE - * [1994/09/22 21:39:49 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:19 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:03 jeffc] - * - * Revision 1.2 1993/04/19 16:33:37 devrcs - * ansi C conformance changes - * [1993/02/02 18:55:53 david] - * - * Revision 1.1 1992/09/30 02:30:40 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:51:56 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:32:04 mrt - * Changed to new Mach copyright - * [91/02/01 17:09:33 mrt] - * - * Revision 2.2 90/05/03 15:47:26 dbg - * First checkin. - * - * Revision 1.3 89/03/09 20:19:36 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:00:41 gm0w - * Changes for cleanup. - * - * 24-Sep-86 Michael Young (mwyoung) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -126,6 +59,10 @@ #ifndef _MACH_PPC_BOOLEAN_H_ #define _MACH_PPC_BOOLEAN_H_ +#if defined(__ppc64__) +typedef unsigned int boolean_t; +#else typedef int boolean_t; +#endif #endif /* _MACH_PPC_BOOLEAN_H_ */ diff --git a/osfmk/mach/ppc/exception.h b/osfmk/mach/ppc/exception.h index 9f9e8a6f5..47232ac7a 100644 --- a/osfmk/mach/ppc/exception.h +++ b/osfmk/mach/ppc/exception.h @@ -48,9 +48,6 @@ #ifndef _MACH_PPC_EXCEPTION_H_ #define _MACH_PPC_EXCEPTION_H_ - -/* STATUS - TODO - this is just transcribed from hp stuff */ - #define EXC_TYPES_COUNT 10 /* incl. illegal exception 0 */ #define EXCEPTION_CODE_MAX 2 /* elements in vector (code+subcode) */ diff --git a/osfmk/mach/ppc/kern_return.h b/osfmk/mach/ppc/kern_return.h index e14be8d22..5ddff5a56 100644 --- a/osfmk/mach/ppc/kern_return.h +++ b/osfmk/mach/ppc/kern_return.h @@ -22,76 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:46 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.1 1996/12/09 16:50:07 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * [1996/12/09 10:50:59 stephen] - * - * Revision 1.1.6.1 1996/04/11 11:19:54 emcmanus - * Copied from mainline.ppc. - * [1996/04/10 16:56:46 emcmanus] - * - * Revision 1.1.4.1 1995/11/23 17:36:50 stephen - * first powerpc checkin to mainline.ppc - * [1995/11/23 16:44:51 stephen] - * - * Revision 1.1.2.1 1995/08/25 06:49:40 stephen - * Initial checkin of files for PowerPC port - * [1995/08/23 16:28:33 stephen] - * - * Initial checkin of files for PowerPC port - * [1995/08/23 15:04:02 stephen] - * - * Revision 1.2.6.1 1994/09/23 02:37:12 ezf - * change marker to not FREE - * [1994/09/22 21:40:01 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:40:35 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:15 jeffc] - * - * Revision 1.2 1993/04/19 16:33:58 devrcs - * ansi C conformance changes - * [1993/02/02 18:56:09 david] - * - * Revision 1.1 1992/09/30 02:30:47 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 16:52:15 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:32:12 mrt - * Changed to new Mach copyright - * [91/02/01 17:09:54 mrt] - * - * Revision 2.2 90/05/03 15:47:51 dbg - * First checkin. - * - * Revision 1.3 89/03/09 20:19:48 rpd - * More cleanup. - * - * Revision 1.2 89/02/26 13:00:54 gm0w - * Changes for cleanup. - * - * 3-Mar-87 Avadis Tevanian (avie) at Carnegie-Mellon University - * Allow inclusion in assembler input. - * - * 14-Oct-85 Michael Wayne Young (mwyoung) at Carnegie-Mellon University - * Created. - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -134,4 +64,5 @@ #ifndef ASSEMBLER typedef int kern_return_t; #endif /* ASSEMBLER */ + #endif /* _MACH_PPC_KERN_RETURN_H_ */ diff --git a/osfmk/mach/ppc/machine_types.defs b/osfmk/mach/ppc/machine_types.defs index 3b852b6a0..60eca8e0b 100644 --- a/osfmk/mach/ppc/machine_types.defs +++ b/osfmk/mach/ppc/machine_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,12 +22,13 @@ /* * @OSF_COPYRIGHT@ */ + /* - * Header file for basic, machine-dependent data types. i386 version. + * Header file for basic, machine-dependent data types. */ -#ifndef _MACHINE_VM_TYPES_DEFS_ -#define _MACHINE_VM_TYPES_DEFS_ 1 +#ifndef _PPC_VM_TYPES_DEFS_ +#define _PPC_VM_TYPES_DEFS_ type short = int16_t; type int = int32_t; @@ -36,9 +37,8 @@ type unsigned = uint32_t; type float = MACH_MSG_TYPE_REAL_32; type double = MACH_MSG_TYPE_REAL_64; - /* from ISO/IEC 988:1999 spec */ -/* 7.18.1.4 Integer types capable of hgolding object pointers */ +/* 7.18.1.4 Integer types capable of holding object pointers */ /* * The [u]intptr_t types for the native * integer type, e.g. 32 or 64 or.. whatever @@ -51,8 +51,13 @@ type double = MACH_MSG_TYPE_REAL_64; * a port in user space as an integer and * in kernel space as a pointer. */ +#if defined(__ppc64__) +type uintptr_t = uint64_t; +type intptr_t = int64_t; +#else type uintptr_t = uint32_t; type intptr_t = int32_t; +#endif /* * These are the legacy Mach types that are @@ -60,18 +65,44 @@ type intptr_t = int32_t; * They were defined in terms of int, not * long int, so they remain separate. */ +#if defined(__ppc64__) +type register_t = int64_t; +#else +type register_t = int32_t; +#endif type integer_t = int32_t; type natural_t = uint32_t; -type register_t = int32_t; +/* + * These are the VM types that scale with the address + * space size of a given process. + */ + +#if defined(__ppc64__) +type vm_address_t = uint64_t; +type vm_offset_t = uint64_t; +type vm_size_t = uint64_t; +#else +type vm_address_t = natural_t; +type vm_offset_t = natural_t; +type vm_size_t = natural_t; +#endif + +/* + * The mach_vm_xxx_t types are sized to hold the + * maximum pointer, offset, etc... supported on the + * platform. + */ +type mach_vm_address_t = uint64_t; +type mach_vm_offset_t = uint64_t; +type mach_vm_size_t = uint64_t; #if MACH_IPC_COMPAT /* * For the old IPC interface */ -#define MSG_TYPE_PORT_NAME natural_t +#define MSG_TYPE_PORT_NAME uint32_t #endif /* MACH_IPC_COMPAT */ - -#endif /* _MACHINE_VM_TYPES_DEFS_ */ +#endif /* _PPC_VM_TYPES_DEFS_ */ diff --git a/osfmk/mach/ppc/ndr_def.h b/osfmk/mach/ppc/ndr_def.h index 40e82519c..818f9906c 100644 --- a/osfmk/mach/ppc/ndr_def.h +++ b/osfmk/mach/ppc/ndr_def.h @@ -22,33 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:31 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:02 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.8.1 1996/12/09 16:55:41 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * [1996/12/09 11:10:10 stephen] - * - * Revision 1.1.6.1 1996/04/11 09:10:43 emcmanus - * Copied from mainline.ppc. - * [1996/04/10 17:09:22 emcmanus] - * - * Revision 1.1.4.1 1995/11/23 17:39:22 stephen - * first powerpc checkin to mainline.ppc - * [1995/11/23 16:53:16 stephen] - * - * Revision 1.1.2.1 1995/08/25 06:35:32 stephen - * Initial checkin of files for PowerPC port - * [1995/08/23 15:13:31 stephen] - * - * $EndLog$ - */ #include diff --git a/osfmk/mach/ppc/processor_info.h b/osfmk/mach/ppc/processor_info.h index 9712b9543..3195c2c23 100644 --- a/osfmk/mach/ppc/processor_info.h +++ b/osfmk/mach/ppc/processor_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,20 @@ * * @APPLE_LICENSE_HEADER_END@ */ + /* * File: mach/ppc/processor_info.h * * Data structure definitions for ppc specific processor control */ - #ifndef _MACH_PPC_PROCESSOR_INFO_H_ #define _MACH_PPC_PROCESSOR_INFO_H_ #include +#include + +#ifdef PRIVATE /* processor_control command operations */ #define PROCESSOR_PM_SET_REGS 1 /* Set Performance Monitor Registers */ @@ -38,6 +41,8 @@ /* * Performance Monitor Register structures + * + * XXX - These have not been updated for ppc64. */ typedef union { @@ -101,8 +106,8 @@ struct processor_pm_regs { typedef struct processor_pm_regs processor_pm_regs_data_t; typedef struct processor_pm_regs *processor_pm_regs_t; -#define PROCESSOR_PM_REGS_COUNT \ - (sizeof(processor_pm_regs_data_t) / sizeof (unsigned int)) +#define PROCESSOR_PM_REGS_COUNT ((mach_msg_type_number_t) \ + (sizeof(processor_pm_regs_data_t) / sizeof (unsigned int))) #define PROCESSOR_PM_REGS_COUNT_POWERPC_750 \ (PROCESSOR_PM_REGS_COUNT * 2 ) @@ -110,11 +115,6 @@ typedef struct processor_pm_regs *processor_pm_regs_t; #define PROCESSOR_PM_REGS_COUNT_POWERPC_7400 \ (PROCESSOR_PM_REGS_COUNT * 3 ) -typedef unsigned int processor_temperature_data_t; -typedef unsigned int *processor_temperature_t; - -#define PROCESSOR_TEMPERATURE_COUNT 1 - union processor_control_data { processor_pm_regs_data_t cmd_pm_regs[3]; }; @@ -131,9 +131,9 @@ typedef struct processor_control_cmd *processor_control_cmd_t; #define cmd_pm_regs u.cmd_pm_regs; #define cmd_pm_ctls u.cmd_pm_ctls; -#define PROCESSOR_CONTROL_CMD_COUNT \ +#define PROCESSOR_CONTROL_CMD_COUNT ((mach_msg_type_number_t) \ (((sizeof(processor_control_cmd_data_t)) - \ - (sizeof(union processor_control_data))) / sizeof (integer_t)) + (sizeof(union processor_control_data))) / sizeof (integer_t))) /* x should be a processor_pm_regs_t */ #define PERFMON_MMCR0(x) ((x)[0].u.mmcr0.word) @@ -160,5 +160,11 @@ typedef struct processor_control_cmd *processor_control_cmd_t; #define PERFMON_PMC3_CV(x) ((x)[1].u.pmc[0].bits.cv) #define PERFMON_PMC4_CV(x) ((x)[1].u.pmc[1].bits.cv) -#endif /* _MACH_PPC_PROCESSOR_INFO_H_ */ +typedef unsigned int processor_temperature_data_t; +typedef unsigned int *processor_temperature_t; +#define PROCESSOR_TEMPERATURE_COUNT 1 + +#endif /* PRIVATE */ + +#endif /* _MACH_PPC_PROCESSOR_INFO_H_ */ diff --git a/osfmk/mach/ppc/rpc.h b/osfmk/mach/ppc/rpc.h index 1ceb410c3..0380563c4 100644 --- a/osfmk/mach/ppc/rpc.h +++ b/osfmk/mach/ppc/rpc.h @@ -21,20 +21,9 @@ */ /* * @OSF_COPYRIGHT@ - * */ -#ifndef _MACH_PPC_RPC_H_ -#define _MACH_PPC_RPC_H_ - -#endif /* _MACH_PPC_RPC_H_ */ - - - - - - - - - +#ifndef _MACH_PPC_RPC_H_ +#define _MACH_PPC_RPC_H_ +#endif _MACH_PPC_RPC_H_ diff --git a/osfmk/mach/ppc/syscall_sw.h b/osfmk/mach/ppc/syscall_sw.h index ce53af7e4..3fc7c6106 100644 --- a/osfmk/mach/ppc/syscall_sw.h +++ b/osfmk/mach/ppc/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,6 +23,8 @@ * @OSF_COPYRIGHT@ */ +#ifdef PRIVATE + #ifndef _MACH_PPC_SYSCALL_SW_H_ #define _MACH_PPC_SYSCALL_SW_H_ @@ -34,19 +36,6 @@ ENTRY(trap_name, TAG_NO_FRAME_USED) @\ sc @\ blr -#define rpc_trap(trap_name,trap_number,number_args) \ -ENTRY(trap_name, TAG_NO_FRAME_USED) @\ - li r0, trap_number @\ - sc @\ - blr - - /* CHECKME! What is this supposed to do? */ -#define rpc_return_trap(trap_name,trap_number,number_args) \ -ENTRY(trap_name, TAG_NO_FRAME_USED) @\ - li r0, trap_number @\ - sc @\ - blr - #define ppc_trap(trap_name,trap_number) \ ENTRY(trap_name, TAG_NO_FRAME_USED) @\ li r0, trap_number @\ @@ -60,7 +49,9 @@ ENTRY(trap_name, TAG_NO_FRAME_USED) @\ * * Note: PPC-only system calls are in the 0x6000 to 0x6FFF range */ + #ifdef _MACH_SYSCALL_SW_H_ + ppc_trap(diagCall,0x6000) ppc_trap(vmm_get_version,0x6001) ppc_trap(vmm_get_features,0x6002) @@ -74,6 +65,9 @@ ppc_trap(CHUDCall,0x6009) ppc_trap(ppcNull,0x600A) ppc_trap(perfmon_control,0x600B) ppc_trap(ppcNullinst,0x600C) + #endif /* _MACH_SYSCALL_SW_H_ */ #endif /* _MACH_PPC_SYSCALL_SW_H_ */ + +#endif /* PRIVATE */ diff --git a/osfmk/mach/ppc/thread_state.h b/osfmk/mach/ppc/thread_state.h index 515d0d730..668e0f8fa 100644 --- a/osfmk/mach/ppc/thread_state.h +++ b/osfmk/mach/ppc/thread_state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,13 +22,14 @@ /* * @OSF_COPYRIGHT@ */ + #ifndef _MACH_PPC_THREAD_STATE_H_ #define _MACH_PPC_THREAD_STATE_H_ /* Size of maximum exported thread state in words */ #define PPC_THREAD_STATE_MAX (144) /* Size of biggest state possible */ -#if defined (__ppc__) +#if defined (__ppc__) || defined (__ppc64__) #define THREAD_STATE_MAX PPC_THREAD_STATE_MAX #endif diff --git a/osfmk/mach/ppc/thread_status.h b/osfmk/mach/ppc/thread_status.h index d552c6ff4..ef389380e 100644 --- a/osfmk/mach/ppc/thread_status.h +++ b/osfmk/mach/ppc/thread_status.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,11 +26,9 @@ #ifndef _MACH_PPC_THREAD_STATUS_H_ #define _MACH_PPC_THREAD_STATUS_H_ -#include +#include +#include -#ifdef MACH_KERNEL_PRIVATE -#include -#endif /* * ppc_thread_state is the structure that is exported to user threads for * use in status/mutate calls. This structure should never change. @@ -61,113 +59,27 @@ (x == PPC_EXCEPTION_STATE64) || \ (x == THREAD_STATE_NONE)) -typedef struct ppc_thread_state { - unsigned int srr0; /* Instruction address register (PC) */ - unsigned int srr1; /* Machine state register (supervisor) */ - unsigned int r0; - unsigned int r1; - unsigned int r2; - unsigned int r3; - unsigned int r4; - unsigned int r5; - unsigned int r6; - unsigned int r7; - unsigned int r8; - unsigned int r9; - unsigned int r10; - unsigned int r11; - unsigned int r12; - unsigned int r13; - unsigned int r14; - unsigned int r15; - unsigned int r16; - unsigned int r17; - unsigned int r18; - unsigned int r19; - unsigned int r20; - unsigned int r21; - unsigned int r22; - unsigned int r23; - unsigned int r24; - unsigned int r25; - unsigned int r26; - unsigned int r27; - unsigned int r28; - unsigned int r29; - unsigned int r30; - unsigned int r31; - - unsigned int cr; /* Condition register */ - unsigned int xer; /* User's integer exception register */ - unsigned int lr; /* Link register */ - unsigned int ctr; /* Count register */ - unsigned int mq; /* MQ register (601 only) */ - - unsigned int vrsave; /* Vector Save Register */ -} ppc_thread_state_t; - -#pragma pack(4) /* Make sure the structure stays as we defined it */ -typedef struct ppc_thread_state64 { - unsigned long long srr0; /* Instruction address register (PC) */ - unsigned long long srr1; /* Machine state register (supervisor) */ - unsigned long long r0; - unsigned long long r1; - unsigned long long r2; - unsigned long long r3; - unsigned long long r4; - unsigned long long r5; - unsigned long long r6; - unsigned long long r7; - unsigned long long r8; - unsigned long long r9; - unsigned long long r10; - unsigned long long r11; - unsigned long long r12; - unsigned long long r13; - unsigned long long r14; - unsigned long long r15; - unsigned long long r16; - unsigned long long r17; - unsigned long long r18; - unsigned long long r19; - unsigned long long r20; - unsigned long long r21; - unsigned long long r22; - unsigned long long r23; - unsigned long long r24; - unsigned long long r25; - unsigned long long r26; - unsigned long long r27; - unsigned long long r28; - unsigned long long r29; - unsigned long long r30; - unsigned long long r31; - - unsigned int cr; /* Condition register */ - unsigned long long xer; /* User's integer exception register */ - unsigned long long lr; /* Link register */ - unsigned long long ctr; /* Count register */ - - unsigned int vrsave; /* Vector Save Register */ -} ppc_thread_state64_t; -#pragma pack() - -/* This structure should be double-word aligned for performance */ - -typedef struct ppc_float_state { - double fpregs[32]; - - unsigned int fpscr_pad; /* fpscr is 64 bits, 32 bits of rubbish */ - unsigned int fpscr; /* floating point status register */ -} ppc_float_state_t; - -typedef struct ppc_vector_state { - unsigned long save_vr[32][4]; - unsigned long save_vscr[4]; - unsigned int save_pad5[4]; - unsigned int save_vrvalid; /* VRs that have been saved */ - unsigned int save_pad6[7]; -} ppc_vector_state_t; +#ifndef _POSIX_C_SOURCE +typedef struct ppc_thread_state ppc_thread_state_t; +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ppc_thread_state ppc_thread_state_t; +#endif /* _POSIX_C_SOURCE */ + +#ifndef _POSIX_C_SOURCE +typedef struct ppc_thread_state64 ppc_thread_state64_t; +#endif /* _POSIX_C_SOURCE */ + +#ifndef _POSIX_C_SOURCE +typedef struct ppc_float_state ppc_float_state_t; +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ppc_float_state ppc_float_state_t; +#endif /* _POSIX_C_SOURCE */ + +#ifndef _POSIX_C_SOURCE +typedef struct ppc_vector_state ppc_vector_state_t; +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ppc_vector_state ppc_vector_state_t; +#endif /* _POSIX_C_SOURCE */ /* * saved state structure @@ -176,11 +88,17 @@ typedef struct ppc_vector_state { * */ -#if defined(__APPLE_API_PRIVATE) && defined(MACH_KERNEL_PRIVATE) -typedef struct savearea ppc_saved_state_t; -#else -typedef struct ppc_thread_state ppc_saved_state_t; -#endif /* __APPLE_API_PRIVATE && MACH_KERNEL_PRIVATE */ +#ifdef MACH__POSIX_C_SOURCE_PRIVATE + +#include + +typedef struct savearea ppc_saved_state_t; + +#else /* MACH__POSIX_C_SOURCE_PRIVATE */ + +typedef struct ppc_thread_state ppc_saved_state_t; + +#endif /* MACH__POSIX_C_SOURCE_PRIVATE */ /* * ppc_exception_state @@ -195,46 +113,38 @@ typedef struct ppc_thread_state ppc_saved_state_t; * compatiblity. */ -typedef struct ppc_exception_state { - unsigned long dar; /* Fault registers for coredump */ - unsigned long dsisr; - unsigned long exception; /* number of powerpc exception taken */ - unsigned long pad0; /* align to 16 bytes */ - - unsigned long pad1[4]; /* space in PCB "just in case" */ -} ppc_exception_state_t; - -#pragma pack(4) /* Make sure the structure stays as we defined it */ -typedef struct ppc_exception_state64 { - unsigned long long dar; /* Fault registers for coredump */ - unsigned long dsisr; - unsigned long exception; /* number of powerpc exception taken */ +/* Exception state for 32-bit thread (on 32-bit processor) */ +/* Still available on 64-bit processors, but may fall short */ +/* of covering the full potential state (hi half available). */ - unsigned long pad1[4]; /* space in PCB "just in case" */ -} ppc_exception_state64_t; -#pragma pack() +#ifndef _POSIX_C_SOURCE +typedef struct ppc_exception_state ppc_exception_state_t; +typedef struct ppc_exception_state64 ppc_exception_state64_t; +#else /* _POSIX_C_SOURCE */ +typedef struct __darwin_ppc_exception_state ppc_exception_state_t; +#endif /* _POSIX_C_SOURCE */ /* * Save State Flags */ -#define PPC_THREAD_STATE_COUNT \ - (sizeof(struct ppc_thread_state) / sizeof(int)) +#define PPC_THREAD_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_thread_state_t) / sizeof(int))) -#define PPC_THREAD_STATE64_COUNT \ - (sizeof(struct ppc_thread_state64) / sizeof(int)) +#define PPC_THREAD_STATE64_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_thread_state64_t) / sizeof(int))) -#define PPC_EXCEPTION_STATE_COUNT \ - (sizeof(struct ppc_exception_state) / sizeof(int)) +#define PPC_EXCEPTION_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_exception_state_t) / sizeof(int))) -#define PPC_EXCEPTION_STATE64_COUNT \ - (sizeof(struct ppc_exception_state64) / sizeof(int)) +#define PPC_EXCEPTION_STATE64_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_exception_state64_t) / sizeof(int))) -#define PPC_FLOAT_STATE_COUNT \ - (sizeof(struct ppc_float_state) / sizeof(int)) +#define PPC_FLOAT_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_float_state_t) / sizeof(int))) -#define PPC_VECTOR_STATE_COUNT \ - (sizeof(struct ppc_vector_state) / sizeof(int)) +#define PPC_VECTOR_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof(ppc_vector_state_t) / sizeof(int))) /* * Machine-independent way for servers and Mach's exception mechanism to diff --git a/osfmk/mach/ppc/vm_param.h b/osfmk/mach/ppc/vm_param.h index bfbb796ab..7a8a1ca84 100644 --- a/osfmk/mach/ppc/vm_param.h +++ b/osfmk/mach/ppc/vm_param.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,26 +23,72 @@ /* * @OSF_COPYRIGHT@ */ + #ifndef _MACH_PPC_VM_PARAM_H_ #define _MACH_PPC_VM_PARAM_H_ -#define BYTE_SIZE 8 /* byte size in bits */ +/* + * These are the global definitions + */ -#define PPC_PGBYTES 4096 /* bytes per ppc page */ -#define PPC_PGSHIFT 12 /* number of bits to shift for pages */ +#define BYTE_SIZE 8 /* byte size in bits */ -#define VM_MAX_PAGE_ADDRESS 0xFFFFFFFFFFFFF000ULL +#define PPC_PGBYTES 4096 /* bytes per ppc page */ +#define PPC_PGSHIFT 12 /* number of bits to shift for pages */ +#define PAGE_SIZE PPC_PGBYTES +#define PAGE_SHIFT PPC_PGSHIFT +#define PAGE_MASK (PAGE_SIZE - 1) + +#if 0 +#define VM_MAX_PAGE_ADDRESS 0xFFFFFFFFFFFFF000ULL +#else +/* + * LP64todo - For now, we are limited to 51-bits of user addressing + */ +#define VM_MAX_PAGE_ADDRESS 0x0007FFFFFFFFF000ULL +#endif + +#define MACH_VM_MIN_ADDRESS ((mach_vm_offset_t) 0) +#define MACH_VM_MAX_ADDRESS ((mach_vm_offset_t) VM_MAX_PAGE_ADDRESS) + +/* + * These are the values relative to the local process. + */ +#if defined (__ppc64__) +/* + * LP64todo - We don't have the 64-bit address space layout yet. + * Use the 32-bit stack layout for now. + */ +#define VM_MIN_ADDRESS ((vm_offset_t) MACH_VM_MIN_ADDRESS) +#define VM_MAX_ADDRESS ((vm_offset_t) MACH_VM_MAX_ADDRESS) +#define USER_STACK_END ((vm_offset_t) 0x00000000ffff0000ULL) +#else #define VM_MIN_ADDRESS ((vm_offset_t) 0) #define VM_MAX_ADDRESS ((vm_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) +#define USER_STACK_END ((vm_offset_t) 0xffff0000U) +#endif /* defined(__ppc64__) */ -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000) +#ifdef KERNEL_PRIVATE +/* Kernel-wide values */ +#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000) #define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xDFFFFFFF) +#define KERNEL_STACK_SIZE (4 * PPC_PGBYTES) +#define INTSTACK_SIZE (5 * PPC_PGBYTES) -#define USER_STACK_END ((vm_offset_t) 0xffff0000U) +#define VM_MAP_MIN_ADDRESS MACH_VM_MIN_ADDRESS +#define VM_MAP_MAX_ADDRESS MACH_VM_MAX_ADDRESS -#define KERNEL_STACK_SIZE (4 * PPC_PGBYTES) -#define INTSTACK_SIZE (5 * PPC_PGBYTES) +#ifdef MACH_KERNEL_PRIVATE + +/* For implementing legacy 32-bit interfaces */ +#define VM32_SUPPORT +#define VM32_MIN_ADDRESS ((vm32_offset_t) 0) +#define VM32_MAX_ADDRESS ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) + +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ -#endif /* _PPC_VM_PARAM_H_ */ +#endif /* _MACH_PPC_VM_PARAM_H_ */ diff --git a/osfmk/mach/ppc/vm_types.h b/osfmk/mach/ppc/vm_types.h index fe9669d13..b5b0bd8a4 100644 --- a/osfmk/mach/ppc/vm_types.h +++ b/osfmk/mach/ppc/vm_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,47 +63,96 @@ #ifndef ASSEMBLER +#include +#include +#include + /* - * A natural_t is the type for the native - * integer type, e.g. 32 or 64 or.. whatever - * register size the machine has. Unsigned, it is - * used for entities that might be either - * unsigned integers or pointers, and for - * type-casting between the two. - * For instance, the IPC system represents - * a port in user space as an integer and - * in kernel space as a pointer. + * natural_t and integer_t are Mach's legacy types for machine- + * independent integer types (unsigned, and signed, respectively). + * Their original purpose was to define other types in a machine/ + * compiler independent way. + * + * They also had an implicit "same size as pointer" characteristic + * to them (i.e. Mach's traditional types are very ILP32 or ILP64 + * centric). We support PowerPC ABIs that do not follow either of + * these models (specifically LP64). Therefore, we had to make a + * choice between making these types scale with pointers or stay + * tied to integers. Because their use is predominantly tied to + * to the size of an integer, we are keeping that association and + * breaking free from pointer size guarantees. + * + * New use of these types is discouraged. */ -typedef unsigned int natural_t; +typedef __darwin_natural_t natural_t; +typedef int integer_t; + +#if defined(__ppc__) /* - * An integer_t is the signed counterpart - * of the natural_t type. Both types are - * only supposed to be used to define - * other types in a machine-independent - * way. + * For 32-bit PowerPC ABIs, the scalable types were + * always based upon natural_t (unsigned int). + * Because of potential legacy issues with name mangling, + * we cannot use the stdint uintptr_t type. */ -typedef int integer_t; +typedef natural_t vm_offset_t; +typedef natural_t vm_size_t; + +#else /* __ppc64__ */ /* - * A vm_offset_t is a type-neutral pointer, - * e.g. an offset into a virtual memory space. + * For 64-bit PowerPC ABIs, we have no legacy name mangling + * issues, so we use the stdint types for scaling these + * types to the same size as a pointer. */ -typedef natural_t vm_offset_t; +typedef uintptr_t vm_offset_t; +typedef uintptr_t vm_size_t; + +#endif /* - * A vm_size_t is the proper type for e.g. - * expressing the difference between two - * vm_offset_t entities. + * This new type is independent of a particular vm map's + * implementation size - and represents appropriate types + * for all possible maps. This is used for interfaces + * where the size of the map is not known - or we don't + * want to have to distinguish. */ -typedef natural_t vm_size_t; +typedef uint64_t mach_vm_address_t; +typedef uint64_t mach_vm_offset_t; +typedef uint64_t mach_vm_size_t; -#endif /* ndef ASSEMBLER */ +/* LP64todo - convert these over for good */ +#if 1 +typedef uint64_t vm_map_offset_t; +typedef uint64_t vm_map_address_t; +typedef uint64_t vm_map_size_t; +#else +typedef uint32_t vm_map_offset_t; +typedef uint32_t vm_map_address_t; +typedef uint32_t vm_map_size_t; +#endif + +#ifdef MACH_KERNEL_PRIVATE + +#ifdef VM32_SUPPORT /* - * If composing messages by hand (please dont) + * These are types used internal to Mach to implement the + * legacy 32-bit VM APIs published by the kernel. */ +typedef uint32_t vm32_address_t; +typedef uint32_t vm32_offset_t; +typedef uint32_t vm32_size_t; + +#endif /* VM32_SUPPORT */ + +#endif /* MACH_KERNEL_PRIVATE */ +#endif /* ASSEMBLER */ + +/* + * If composing messages by hand (please do not) + */ #define MACH_MSG_TYPE_INTEGER_T MACH_MSG_TYPE_INTEGER_32 #endif /* _MACH_PPC_VM_TYPES_H_ */ diff --git a/osfmk/mach/processor_info.h b/osfmk/mach/processor_info.h index 17824334b..c9dd97fec 100644 --- a/osfmk/mach/processor_info.h +++ b/osfmk/mach/processor_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,6 +61,7 @@ #ifndef _MACH_PROCESSOR_INFO_H_ #define _MACH_PROCESSOR_INFO_H_ +#include #include #include @@ -98,8 +99,8 @@ struct processor_basic_info { typedef struct processor_basic_info processor_basic_info_data_t; typedef struct processor_basic_info *processor_basic_info_t; -#define PROCESSOR_BASIC_INFO_COUNT \ - (sizeof(processor_basic_info_data_t)/sizeof(natural_t)) +#define PROCESSOR_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(processor_basic_info_data_t)/sizeof(natural_t))) struct processor_cpu_load_info { /* number of ticks while running... */ unsigned long cpu_ticks[CPU_STATE_MAX]; /* ... in the given mode */ @@ -107,8 +108,8 @@ struct processor_cpu_load_info { /* number of ticks while running... typedef struct processor_cpu_load_info processor_cpu_load_info_data_t; typedef struct processor_cpu_load_info *processor_cpu_load_info_t; -#define PROCESSOR_CPU_LOAD_INFO_COUNT \ - (sizeof(processor_cpu_load_info_data_t)/sizeof(natural_t)) +#define PROCESSOR_CPU_LOAD_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(processor_cpu_load_info_data_t)/sizeof(natural_t))) /* * Scaling factor for load_average, mach_factor. @@ -125,8 +126,8 @@ struct processor_set_basic_info { typedef struct processor_set_basic_info processor_set_basic_info_data_t; typedef struct processor_set_basic_info *processor_set_basic_info_t; -#define PROCESSOR_SET_BASIC_INFO_COUNT \ - (sizeof(processor_set_basic_info_data_t)/sizeof(natural_t)) +#define PROCESSOR_SET_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(processor_set_basic_info_data_t)/sizeof(natural_t))) #define PROCESSOR_SET_LOAD_INFO 4 /* scheduling statistics */ @@ -139,16 +140,14 @@ struct processor_set_load_info { typedef struct processor_set_load_info processor_set_load_info_data_t; typedef struct processor_set_load_info *processor_set_load_info_t; -#define PROCESSOR_SET_LOAD_INFO_COUNT \ - (sizeof(processor_set_load_info_data_t)/sizeof(natural_t)) +#define PROCESSOR_SET_LOAD_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(processor_set_load_info_data_t)/sizeof(natural_t))) +#ifdef PRIVATE -/* - * New scheduling control interface - */ #define PROCESSOR_SET_ENABLED_POLICIES 3 -#define PROCESSOR_SET_ENABLED_POLICIES_COUNT \ - (sizeof(policy_t)/sizeof(natural_t)) +#define PROCESSOR_SET_ENABLED_POLICIES_COUNT ((mach_msg_type_number_t) \ + (sizeof(policy_t)/sizeof(natural_t))) #define PROCESSOR_SET_TIMESHARE_DEFAULT 10 #define PROCESSOR_SET_TIMESHARE_LIMITS 11 @@ -159,4 +158,6 @@ typedef struct processor_set_load_info *processor_set_load_info_t; #define PROCESSOR_SET_FIFO_DEFAULT 30 #define PROCESSOR_SET_FIFO_LIMITS 31 +#endif /* PRIVATE */ + #endif /* _MACH_PROCESSOR_INFO_H_ */ diff --git a/osfmk/mach/rpc.h b/osfmk/mach/rpc.h index 29d511d22..079e92705 100644 --- a/osfmk/mach/rpc.h +++ b/osfmk/mach/rpc.h @@ -40,10 +40,6 @@ #include #include -#include - -#ifdef __APPLE_API_OBSOLETE - /* * These are the types for RPC-specific variants of the MIG routine * descriptor and subsystem data types. @@ -130,6 +126,4 @@ typedef struct rpc_subsystem *rpc_subsystem_t; #define RPC_SUBSYSTEM_NULL ((rpc_subsystem_t) 0) -#endif /* __APPLE_API_OBSOLETE */ - #endif /* _MACH_RPC_H_ */ diff --git a/osfmk/mach/semaphore.h b/osfmk/mach/semaphore.h index 55a4a39ab..e5241b51f 100644 --- a/osfmk/mach/semaphore.h +++ b/osfmk/mach/semaphore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -43,10 +43,13 @@ * semaphore_t semaphore); */ +#include +__BEGIN_DECLS + extern kern_return_t semaphore_signal (semaphore_t semaphore); extern kern_return_t semaphore_signal_all (semaphore_t semaphore); extern kern_return_t semaphore_signal_thread (semaphore_t semaphore, - thread_act_t thread_act); + thread_t thread); extern kern_return_t semaphore_wait (semaphore_t semaphore); extern kern_return_t semaphore_timedwait (semaphore_t semaphore, @@ -58,11 +61,9 @@ extern kern_return_t semaphore_wait_signal (semaphore_t wait_semaphore, extern kern_return_t semaphore_timedwait_signal(semaphore_t wait_semaphore, semaphore_t signal_semaphore, mach_timespec_t wait_time); +__END_DECLS -#include - -#ifdef __APPLE_API_PRIVATE -#ifdef __APPLE_API_EVOLVING +#ifdef PRIVATE #define SEMAPHORE_OPTION_NONE 0x00000000 @@ -84,14 +85,6 @@ extern kern_return_t semaphore_timedwait_signal(semaphore_t wait_semaphore, #define SEMAPHORE_USE_SAVED_RESULT 0x01000000 /* internal use only */ #define SEMAPHORE_SIGNAL_RELEASE 0x02000000 /* internal use only */ -extern kern_return_t semaphore_operator (int options, - semaphore_t wait_semaphore, - semaphore_t signal_semaphore, - thread_act_t thread, - mach_timespec_t wait_time); - -#endif /* __APPLE_API_EVOLVING */ - -#endif /* __APPLE_API_PRIVATE */ +#endif /* PRIVATE */ -#endif /* _MACH_SEMAPHORE_H_ */ +#endif /* _MACH_SEMAPHORE_H_ */ diff --git a/osfmk/mach/shared_memory_server.h b/osfmk/mach/shared_memory_server.h index 4f44265d9..af2aea45c 100644 --- a/osfmk/mach/shared_memory_server.h +++ b/osfmk/mach/shared_memory_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -29,14 +29,25 @@ #ifndef _MACH_SHARED_MEMORY_SERVER_H_ #define _MACH_SHARED_MEMORY_SERVER_H_ +#include +#include +#include +#include + +#define VM_PROT_COW 0x8 /* must not interfere with normal prot assignments */ +#define VM_PROT_ZF 0x10 /* must not interfere with normal prot assignments */ + + +#if !defined(__LP64__) + #define SHARED_LIBRARY_SERVER_SUPPORTED -#define GLOBAL_SHARED_TEXT_SEGMENT 0x90000000 -#define GLOBAL_SHARED_DATA_SEGMENT 0xA0000000 -#define GLOBAL_SHARED_SEGMENT_MASK 0xF0000000 +#define GLOBAL_SHARED_TEXT_SEGMENT 0x90000000 +#define GLOBAL_SHARED_DATA_SEGMENT 0xA0000000 +#define GLOBAL_SHARED_SEGMENT_MASK 0xF0000000 -#define SHARED_TEXT_REGION_SIZE 0x10000000 -#define SHARED_DATA_REGION_SIZE 0x10000000 -#define SHARED_ALTERNATE_LOAD_BASE 0x9000000 +#define SHARED_TEXT_REGION_SIZE 0x10000000 +#define SHARED_DATA_REGION_SIZE 0x10000000 +#define SHARED_ALTERNATE_LOAD_BASE 0x09000000 /* * Note: the two masks below are useful because the assumption is @@ -44,12 +55,8 @@ * i.e. if the size is 0x10000000 the object can be mapped at * 0x20000000, or 0x30000000, but not 0x1000000 */ -#define SHARED_TEXT_REGION_MASK 0xFFFFFFF -#define SHARED_DATA_REGION_MASK 0xFFFFFFF - - -#include -#include +#define SHARED_TEXT_REGION_MASK 0x0FFFFFFF +#define SHARED_DATA_REGION_MASK 0x0FFFFFFF #define SHARED_LIB_ALIAS 0x10 @@ -66,12 +73,6 @@ #define SYSTEM_REGION_BACKED 0x2 -#define load_file_hash(file_object, size) \ - ((((natural_t)file_object) & 0xffffff) % size) - -#define VM_PROT_COW 0x8 /* must not interfere with normal prot assignments */ -#define VM_PROT_ZF 0x10 /* must not interfere with normal prot assignments */ - struct sf_mapping { vm_offset_t mapping_offset; vm_size_t size; @@ -79,7 +80,49 @@ struct sf_mapping { vm_prot_t protection; /* read/write/execute/COW/ZF */ vm_offset_t cksum; }; - typedef struct sf_mapping sf_mapping_t; +#ifndef KERNEL +/* load_shared_file and friends is deprecated */ +__BEGIN_DECLS +int load_shared_file(char *, caddr_t, u_long, + caddr_t *, int, sf_mapping_t *, int *); +int reset_shared_file(caddr_t *, int, sf_mapping_t *); +int new_system_shared_regions(void); +__END_DECLS +#endif /* !KERNEL */ + +#endif /* !defined(__LP64__) */ + +/* + * All shared_region_* declarations are a private interface + * between dyld and the kernel. + * + */ +struct shared_file_mapping_np { + mach_vm_address_t sfm_address; + mach_vm_size_t sfm_size; + mach_vm_offset_t sfm_file_offset; + vm_prot_t sfm_max_prot; + vm_prot_t sfm_init_prot; +}; + +struct shared_region_range_np { + mach_vm_address_t srr_address; + mach_vm_size_t srr_size; +}; + +#ifndef KERNEL + +__BEGIN_DECLS +int shared_region_map_file_np(int fd, + uint32_t mappingCount, + const struct shared_file_mapping_np *mappings, + int64_t *slide_p); +int shared_region_make_private_np(uint32_t rangeCount, + const struct shared_region_range_np *ranges); +__END_DECLS + +#endif /* !KERNEL */ + #endif /* _MACH_SHARED_MEMORY_SERVER_H_ */ diff --git a/osfmk/mach/std_types.h b/osfmk/mach/std_types.h index c002d05fe..d28084437 100644 --- a/osfmk/mach/std_types.h +++ b/osfmk/mach/std_types.h @@ -54,8 +54,8 @@ * */ -#ifndef STD_TYPES_H_ -#define STD_TYPES_H_ +#ifndef _MACH_STD_TYPES_H_ +#define _MACH_STD_TYPES_H_ #include #include @@ -63,4 +63,4 @@ #include #include -#endif /* STD_TYPES_H_ */ +#endif /* _MACH_STD_TYPES_H_ */ diff --git a/osfmk/mach/sync_policy.h b/osfmk/mach/sync_policy.h index bca61a6d8..132154756 100644 --- a/osfmk/mach/sync_policy.h +++ b/osfmk/mach/sync_policy.h @@ -23,8 +23,8 @@ * @OSF_COPYRIGHT@ */ -#ifndef _SYNC_POLICY_H_ -#define _SYNC_POLICY_H_ +#ifndef _MACH_SYNC_POLICY_H_ +#define _MACH_SYNC_POLICY_H_ typedef int sync_policy_t; @@ -37,20 +37,16 @@ typedef int sync_policy_t; #define SYNC_POLICY_ORDER_MASK 0x3 #define SYNC_POLICY_LIFO (SYNC_POLICY_FIFO|SYNC_POLICY_REVERSED) +#ifdef KERNEL_PRIVATE + /* * These options provide addition (kernel-private) behaviors */ -#ifdef KERNEL_PRIVATE -#include - -#ifdef __APPLE_API_EVOLVING #define SYNC_POLICY_PREPOST 0x4 -#endif /* __APPLE_API_EVOLVING */ - -#endif /* KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #define SYNC_POLICY_MAX 0x7 -#endif /*_SYNC_POLICY_H_*/ +#endif /* _MACH_SYNC_POLICY_H_ */ diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h index 3e1fa82e7..d6c2f5589 100644 --- a/osfmk/mach/syscall_sw.h +++ b/osfmk/mach/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,6 +48,8 @@ * the rights to redistribute these changes. */ +#ifdef PRIVATE + #ifndef _MACH_SYSCALL_SW_H_ #define _MACH_SYSCALL_SW_H_ @@ -72,6 +74,7 @@ kernel_trap(mach_reply_port,-26,0) kernel_trap(thread_self_trap,-27,0) kernel_trap(task_self_trap,-28,0) kernel_trap(host_self_trap,-29,0) + kernel_trap(mach_msg_trap,-31,7) kernel_trap(mach_msg_overwrite_trap,-32,9) kernel_trap(semaphore_signal_trap, -33, 1) @@ -82,29 +85,55 @@ kernel_trap(semaphore_wait_signal_trap,-37,2) kernel_trap(semaphore_timedwait_trap,-38,3) kernel_trap(semaphore_timedwait_signal_trap,-39,4) +#if !defined(__LP64__) kernel_trap(init_process,-41,0) kernel_trap(map_fd,-43,5) +#endif /* __LP64__ */ + kernel_trap(task_for_pid,-45,3) kernel_trap(pid_for_task,-46,2) + +#if !defined(__LP64__) kernel_trap(macx_swapon,-48, 4) kernel_trap(macx_swapoff,-49, 2) kernel_trap(macx_triggers,-51, 4) kernel_trap(macx_backing_store_suspend,-52, 1) kernel_trap(macx_backing_store_recovery,-53, 1) +#endif /* __LP64__ */ +/* These are currently used by pthreads even on LP64 */ +/* But as soon as that is fixed - they will go away there */ kernel_trap(swtch_pri,-59,1) kernel_trap(swtch,-60,0) + kernel_trap(syscall_thread_switch,-61,3) kernel_trap(clock_sleep_trap,-62,5) kernel_trap(mach_timebase_info,-89,1) + +#if defined(__LP64__) +/* unit64_t arguments passed in one register in LP64 */ +kernel_trap(mach_wait_until,-90,1) +#else /* __LP64__ */ kernel_trap(mach_wait_until,-90,2) -kernel_trap(mk_wait_until,-90,2) +#endif /* __LP64__ */ + kernel_trap(mk_timer_create,-91,0) kernel_trap(mk_timer_destroy,-92,1) + +#if defined(__LP64__) +/* unit64_t arguments passed in one register in LP64 */ +kernel_trap(mk_timer_arm,-93,2) +#else /* __LP64__ */ kernel_trap(mk_timer_arm,-93,3) +#endif /* __LP64__ */ + kernel_trap(mk_timer_cancel,-94,2) +#if !defined(__LP64__) kernel_trap(MKGetTimeBaseInfo,-95,5) +#endif /* __LP64__ */ #endif /* _MACH_SYSCALL_SW_H_ */ + +#endif /* PRIVATE */ diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs index 9bf38a5d3..d04efd4bd 100644 --- a/osfmk/mach/task.defs +++ b/osfmk/mach/task.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -322,20 +322,11 @@ routine task_set_ras_pc( boundspc : vm_address_t); +skip; /* was kernel_task_create() */ + /* - * JMM - Want to eliminate kernel tasks and processor_set so - * keep them at the end. - */ -/* - * Create a new task in the kernel's address space with - * an empty set of IPC rights, with a map allocated from - * the kernel's map starting at map_base of length map_size. + * JMM - Want to eliminate processor_set so keep them at the end. */ -routine kernel_task_create( - target_task : task_t; - map_base : vm_offset_t; - map_size : vm_size_t; - out child_task : task_t); /* * Assign task to processor set. diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index c4628a1f5..05e4db68c 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,14 +58,15 @@ * */ -#ifndef TASK_INFO_H_ -#define TASK_INFO_H_ +#ifndef _MACH_TASK_INFO_H_ +#define _MACH_TASK_INFO_H_ +#include #include #include #include -#include +#include /* * Generic information structure to allow for expansion. @@ -80,12 +81,52 @@ typedef integer_t task_info_data_t[TASK_INFO_MAX]; * Currently defined information structures. */ -#define TASK_BASIC_INFO 4 /* basic information */ +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + +#define TASK_BASIC_INFO_32 4 /* basic information */ + +struct task_basic_info_32 { + integer_t suspend_count; /* suspend count for task */ + natural_t virtual_size; /* virtual memory size (bytes) */ + natural_t resident_size; /* resident memory size (bytes) */ + time_value_t user_time; /* total user run time for + terminated threads */ + time_value_t system_time; /* total system run time for + terminated threads */ + policy_t policy; /* default policy for new threads */ +}; +typedef struct task_basic_info_32 task_basic_info_32_data_t; +typedef struct task_basic_info_32 *task_basic_info_32_t; +#define TASK_BASIC_INFO_32_COUNT \ + (sizeof(task_basic_info_32_data_t) / sizeof(natural_t)) + + +#define TASK_BASIC_INFO_64 5 /* 64-bit capable basic info */ + +struct task_basic_info_64 { + integer_t suspend_count; /* suspend count for task */ + mach_vm_size_t virtual_size; /* virtual memory size (bytes) */ + mach_vm_size_t resident_size; /* resident memory size (bytes) */ + time_value_t user_time; /* total user run time for + terminated threads */ + time_value_t system_time; /* total system run time for + terminated threads */ + policy_t policy; /* default policy for new threads */ +}; +typedef struct task_basic_info_64 task_basic_info_64_data_t; +typedef struct task_basic_info_64 *task_basic_info_64_t; +#define TASK_BASIC_INFO_64_COUNT \ + (sizeof(task_basic_info_64_data_t) / sizeof(natural_t)) + + +/* localized structure - cannot be safely passed between tasks of differing sizes */ struct task_basic_info { integer_t suspend_count; /* suspend count for task */ - vm_size_t virtual_size; /* number of virtual pages */ - vm_size_t resident_size; /* number of resident pages */ + vm_size_t virtual_size; /* virtual memory size (bytes) */ + vm_size_t resident_size; /* resident memory size (bytes) */ time_value_t user_time; /* total user run time for terminated threads */ time_value_t system_time; /* total system run time for @@ -97,6 +138,12 @@ typedef struct task_basic_info task_basic_info_data_t; typedef struct task_basic_info *task_basic_info_t; #define TASK_BASIC_INFO_COUNT \ (sizeof(task_basic_info_data_t) / sizeof(natural_t)) +#if !defined(__LP64__) +#define TASK_BASIC_INFO TASK_BASIC_INFO_32 +#else +#define TASK_BASIC_INFO TASK_BASIC_INFO_64 +#endif + #define TASK_EVENTS_INFO 2 /* various event counts */ @@ -113,8 +160,8 @@ struct task_events_info { }; typedef struct task_events_info task_events_info_data_t; typedef struct task_events_info *task_events_info_t; -#define TASK_EVENTS_INFO_COUNT \ - (sizeof(task_events_info_data_t) / sizeof(natural_t)) +#define TASK_EVENTS_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(task_events_info_data_t) / sizeof(natural_t))) #define TASK_THREAD_TIMES_INFO 3 /* total times for live threads - only accurate if suspended */ @@ -128,25 +175,44 @@ struct task_thread_times_info { typedef struct task_thread_times_info task_thread_times_info_data_t; typedef struct task_thread_times_info *task_thread_times_info_t; -#define TASK_THREAD_TIMES_INFO_COUNT \ - (sizeof(task_thread_times_info_data_t) / sizeof(natural_t)) - -#ifdef __APPLE_API_UNSTABLE +#define TASK_THREAD_TIMES_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(task_thread_times_info_data_t) / sizeof(natural_t))) -#define TASK_SCHED_TIMESHARE_INFO 10 -#define TASK_SCHED_RR_INFO 11 -#define TASK_SCHED_FIFO_INFO 12 +#define TASK_ABSOLUTETIME_INFO 1 -#define TASK_SCHED_INFO 14 +struct task_absolutetime_info { + uint64_t total_user; /* total time */ + uint64_t total_system; + uint64_t threads_user; /* existing threads only */ + uint64_t threads_system; +}; -#endif /* __APPLE_API_UNSTABLE */ +typedef struct task_absolutetime_info task_absolutetime_info_data_t; +typedef struct task_absolutetime_info *task_absolutetime_info_t; +#define TASK_ABSOLUTETIME_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_absolutetime_info_data_t) / sizeof (natural_t))) #define TASK_SECURITY_TOKEN 13 -#define TASK_SECURITY_TOKEN_COUNT \ - (sizeof(security_token_t) / sizeof(natural_t)) +#define TASK_SECURITY_TOKEN_COUNT ((mach_msg_type_number_t) \ + (sizeof(security_token_t) / sizeof(natural_t))) #define TASK_AUDIT_TOKEN 15 #define TASK_AUDIT_TOKEN_COUNT \ (sizeof(audit_token_t) / sizeof(natural_t)) -#endif /* TASK_INFO_H_ */ +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + + +/* + * Obsolete interfaces. + */ + +#define TASK_SCHED_TIMESHARE_INFO 10 +#define TASK_SCHED_RR_INFO 11 +#define TASK_SCHED_FIFO_INFO 12 + +#define TASK_SCHED_INFO 14 + +#endif /* _MACH_TASK_INFO_H_ */ diff --git a/osfmk/mach/task_ledger.h b/osfmk/mach/task_ledger.h index cf68eded1..6076266a8 100644 --- a/osfmk/mach/task_ledger.h +++ b/osfmk/mach/task_ledger.h @@ -22,12 +22,13 @@ /* * @OSF_COPYRIGHT@ */ -#include #ifndef _MACH_TASK_LEDGER_H_ #define _MACH_TASK_LEDGER_H_ -#ifdef __APPLE_API_EVOLVING +/* + * Evolving and likely to change. + */ /* * Definitions for task ledger line items @@ -41,6 +42,4 @@ #define LEDGER_UNLIMITED 0 /* ignored item.maximum */ -#endif /* __APPLE_API_EVOLVING */ - #endif /* _MACH_TASK_LEDGER_H_ */ diff --git a/osfmk/mach/task_policy.h b/osfmk/mach/task_policy.h index cbf35f467..f4d108896 100644 --- a/osfmk/mach/task_policy.h +++ b/osfmk/mach/task_policy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 10 October 2000 (debo) - * Created. - * - * 30 November 2000 (debo) - * Final resolution of review feedback. - */ #ifndef _MACH_TASK_POLICY_H_ #define _MACH_TASK_POLICY_H_ @@ -126,7 +115,7 @@ struct task_category_policy { typedef struct task_category_policy task_category_policy_data_t; typedef struct task_category_policy *task_category_policy_t; -#define TASK_CATEGORY_POLICY_COUNT \ - (sizeof (task_category_policy_data_t) / sizeof (integer_t)) +#define TASK_CATEGORY_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_category_policy_data_t) / sizeof (integer_t))) #endif /* _MACH_TASK_POLICY_H_ */ diff --git a/osfmk/mach/task_special_ports.h b/osfmk/mach/task_special_ports.h index b3384f50e..a3d74ec7a 100644 --- a/osfmk/mach/task_special_ports.h +++ b/osfmk/mach/task_special_ports.h @@ -61,8 +61,6 @@ #ifndef _MACH_TASK_SPECIAL_PORTS_H_ #define _MACH_TASK_SPECIAL_PORTS_H_ -#include - typedef int task_special_port_t; #define TASK_KERNEL_PORT 1 /* Represents task to the outside @@ -72,7 +70,9 @@ typedef int task_special_port_t; #define TASK_BOOTSTRAP_PORT 4 /* Bootstrap environment for task. */ -#ifdef __APPLE_API_EVOLVING +/* + * Evolving and likely to change. + */ #define TASK_WIRED_LEDGER_PORT 5 /* Wired resource ledger for task. */ @@ -90,8 +90,6 @@ typedef int task_special_port_t; #define task_set_paged_ledger_port(task, port) \ (task_set_special_port((task), TASK_PAGED_LEDGER_PORT, (port))) -#endif /* __APPLE_API_EVOLVING */ - /* * Definitions for ease of use */ diff --git a/osfmk/mach/thread_act.defs b/osfmk/mach/thread_act.defs index c56ae74ba..0478b1cfd 100644 --- a/osfmk/mach/thread_act.defs +++ b/osfmk/mach/thread_act.defs @@ -271,15 +271,15 @@ routine thread_sample( reply : mach_port_make_send_t); /* - * JMM - Keep etap and processor_set related things at the end - * because they are likely to be removed. - */ -/* - * Sets the ETAP trace status of the target thread. + * ETAP has been removed from the kernel. */ +#if KERNEL_SERVER +skip; +#else routine etap_trace_thread( target_act : thread_act_t; trace_status : boolean_t); +#endif /* * Assign thread to processor set. diff --git a/osfmk/mach/thread_info.h b/osfmk/mach/thread_info.h index 7b99b9f59..e324aa5fc 100644 --- a/osfmk/mach/thread_info.h +++ b/osfmk/mach/thread_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -63,11 +63,10 @@ #ifndef _MACH_THREAD_INFO_H_ #define _MACH_THREAD_INFO_H_ -#include - #include #include #include +#include #include /* @@ -98,8 +97,8 @@ struct thread_basic_info { typedef struct thread_basic_info thread_basic_info_data_t; typedef struct thread_basic_info *thread_basic_info_t; -#define THREAD_BASIC_INFO_COUNT \ - (sizeof(thread_basic_info_data_t) / sizeof(natural_t)) +#define THREAD_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(thread_basic_info_data_t) / sizeof(natural_t))) /* * Scale factor for usage field. @@ -125,12 +124,12 @@ typedef struct thread_basic_info *thread_basic_info_t; #define TH_FLAGS_SWAPPED 0x1 /* thread is swapped out */ #define TH_FLAGS_IDLE 0x2 /* thread is an idle thread */ -#ifdef __APPLE_API_UNSTABLE +/* + * Obsolete interfaces. + */ #define THREAD_SCHED_TIMESHARE_INFO 10 #define THREAD_SCHED_RR_INFO 11 #define THREAD_SCHED_FIFO_INFO 12 -#endif /* __APPLE_API_UNSTABLE */ - #endif /* _MACH_THREAD_INFO_H_ */ diff --git a/osfmk/mach/thread_policy.h b/osfmk/mach/thread_policy.h index 143081614..057d26831 100644 --- a/osfmk/mach/thread_policy.h +++ b/osfmk/mach/thread_policy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,6 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - * 10 October 2000 (debo) - * Created. - * - * 30 November 2000 (debo) - * Final resolution of review feedback. - */ #ifndef _MACH_THREAD_POLICY_H_ #define _MACH_THREAD_POLICY_H_ @@ -58,13 +47,13 @@ typedef integer_t *thread_policy_t; /* kern_return_t thread_policy_set( - thread_act_t thread, + thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t count); kern_return_t thread_policy_get( - thread_act_t thread, + thread_t thread, thread_policy_flavor_t flavor, thread_policy_t policy_info, mach_msg_type_number_t *count, @@ -118,8 +107,8 @@ struct thread_extended_policy { typedef struct thread_extended_policy thread_extended_policy_data_t; typedef struct thread_extended_policy *thread_extended_policy_t; -#define THREAD_EXTENDED_POLICY_COUNT \ - (sizeof (thread_extended_policy_data_t) / sizeof (integer_t)) +#define THREAD_EXTENDED_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_extended_policy_data_t) / sizeof (integer_t))) /* * THREAD_TIME_CONSTRAINT_POLICY: @@ -162,8 +151,8 @@ typedef struct thread_time_constraint_policy \ typedef struct thread_time_constraint_policy \ *thread_time_constraint_policy_t; -#define THREAD_TIME_CONSTRAINT_POLICY_COUNT \ - (sizeof (thread_time_constraint_policy_data_t) / sizeof (integer_t)) +#define THREAD_TIME_CONSTRAINT_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_time_constraint_policy_data_t) / sizeof (integer_t))) /* * THREAD_PRECEDENCE_POLICY: @@ -185,7 +174,7 @@ struct thread_precedence_policy { typedef struct thread_precedence_policy thread_precedence_policy_data_t; typedef struct thread_precedence_policy *thread_precedence_policy_t; -#define THREAD_PRECEDENCE_POLICY_COUNT \ - (sizeof (thread_precedence_policy_data_t) / sizeof (integer_t)) +#define THREAD_PRECEDENCE_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_precedence_policy_data_t) / sizeof (integer_t))) #endif /* _MACH_THREAD_POLICY_H_ */ diff --git a/osfmk/mach/thread_status.h b/osfmk/mach/thread_status.h index 49903043b..c806f7fc0 100644 --- a/osfmk/mach/thread_status.h +++ b/osfmk/mach/thread_status.h @@ -59,8 +59,8 @@ * */ -#ifndef THREAD_STATUS_H_ -#define THREAD_STATUS_H_ +#ifndef _MACH_THREAD_STATUS_H_ +#define _MACH_THREAD_STATUS_H_ /* * The actual structure that comprises the thread state is defined @@ -84,4 +84,4 @@ typedef int thread_state_data_t[THREAD_STATE_MAX]; typedef int thread_state_flavor_t; typedef thread_state_flavor_t *thread_state_flavor_array_t; -#endif /* THREAD_STATUS_H_ */ +#endif /* _MACH_THREAD_STATUS_H_ */ diff --git a/osfmk/mach/thread_switch.h b/osfmk/mach/thread_switch.h index e03383287..522250329 100644 --- a/osfmk/mach/thread_switch.h +++ b/osfmk/mach/thread_switch.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,6 +56,7 @@ #include #include #include +#include /* * Constant definitions for thread_switch trap. @@ -67,9 +68,4 @@ #define valid_switch_option(opt) (0 <= (opt) && (opt) <= 2) -extern kern_return_t thread_switch( - mach_port_name_t thread_name, - int option, - mach_msg_timeout_t option_time); - #endif /* _MACH_THREAD_SWITCH_H_ */ diff --git a/osfmk/mach/time_value.h b/osfmk/mach/time_value.h index afaff9eee..c1969f281 100644 --- a/osfmk/mach/time_value.h +++ b/osfmk/mach/time_value.h @@ -48,10 +48,8 @@ * the rights to redistribute these changes. */ -#ifndef TIME_VALUE_H_ -#define TIME_VALUE_H_ - -#include +#ifndef _MACH_TIME_VALUE_H_ +#define _MACH_TIME_VALUE_H_ #include @@ -63,9 +61,8 @@ struct time_value { integer_t seconds; integer_t microseconds; }; -typedef struct time_value time_value_t; -#ifdef __APPLE_API_UNSTABLE +typedef struct time_value time_value_t; /* * Macros to manipulate time values. Assume that time values @@ -90,25 +87,4 @@ typedef struct time_value time_value_t; } \ } -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_OBSOLETE - -/* - * Time value available through the mapped-time interface. - * Read this mapped value with - * do { - * secs = mtime->seconds; - * usecs = mtime->microseconds; - * } while (secs != mtime->check_seconds); - */ - -typedef struct mapped_time_value { - integer_t seconds; - integer_t microseconds; - integer_t check_seconds; -} mapped_time_value_t; - -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* TIME_VALUE_H_ */ +#endif /* _MACH_TIME_VALUE_H_ */ diff --git a/osfmk/mach/upl.defs b/osfmk/mach/upl.defs index 799dfb681..18d1737ec 100644 --- a/osfmk/mach/upl.defs +++ b/osfmk/mach/upl.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -78,10 +78,10 @@ routine upl_abort( routine upl_abort_range( upl_object : upl_t; - offset : vm_offset_t; - size : vm_size_t; + offset : upl_offset_t; + size : upl_size_t; in abort_cond : integer_t; - out empty : boolean_t); + out empty : boolean_t); routine upl_commit( upl_object : upl_t; @@ -89,9 +89,9 @@ routine upl_commit( routine upl_commit_range( upl_object : upl_t; - offset : vm_offset_t; - size : vm_size_t; + offset : upl_offset_t; + size : upl_size_t; in cntrl_flags : integer_t; in page_list : upl_page_info_array_t; - out empty : boolean_t); + out empty : boolean_t); diff --git a/osfmk/mach/vm_attributes.h b/osfmk/mach/vm_attributes.h index 595c8b926..61017334a 100644 --- a/osfmk/mach/vm_attributes.h +++ b/osfmk/mach/vm_attributes.h @@ -61,8 +61,8 @@ * */ -#ifndef VM_ATTRIBUTES_H_ -#define VM_ATTRIBUTES_H_ +#ifndef _MACH_VM_ATTRIBUTES_H_ +#define _MACH_VM_ATTRIBUTES_H_ /* * Types of machine-dependent attributes @@ -90,4 +90,4 @@ typedef int vm_machine_attribute_val_t; #define MATTR_VAL_GET_INFO 10 /* get page info (stats) */ -#endif /* VM_ATTRIBUTES_H_ */ +#endif /* _MACH_VM_ATTRIBUTES_H_ */ diff --git a/osfmk/mach/vm_inherit.h b/osfmk/mach/vm_inherit.h index d6efd9b57..3828478f1 100644 --- a/osfmk/mach/vm_inherit.h +++ b/osfmk/mach/vm_inherit.h @@ -57,8 +57,8 @@ * */ -#ifndef VM_INHERIT_H_ -#define VM_INHERIT_H_ +#ifndef _MACH_VM_INHERIT_H_ +#define _MACH_VM_INHERIT_H_ /* * Types defined: @@ -80,4 +80,4 @@ typedef unsigned int vm_inherit_t; /* might want to change this */ #define VM_INHERIT_DEFAULT VM_INHERIT_COPY #define VM_INHERIT_LAST_VALID VM_INHERIT_NONE -#endif /* VM_INHERIT_H_ */ +#endif /* _MACH_VM_INHERIT_H_ */ diff --git a/osfmk/mach/vm_map.defs b/osfmk/mach/vm_map.defs index f780aa480..889d71416 100644 --- a/osfmk/mach/vm_map.defs +++ b/osfmk/mach/vm_map.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -50,10 +50,9 @@ /* */ /* - * File: mach/mach_port.defs - * Author: Rich Draves + * File: mach/vm_map.defs * - * Exported kernel calls. + * Exported (native-sized) kernel VM calls. */ subsystem @@ -318,7 +317,7 @@ routine mach_make_memory_entry( inout size :vm_size_t; offset :vm_offset_t; permission :vm_prot_t; - out object_handle :mach_port_move_send_t; + out object_handle :mem_entry_name_port_move_send_t; parent_entry :mem_entry_name_port_t); /* @@ -347,6 +346,7 @@ routine vm_mapped_pages_info( task : vm_map_t; out pages : page_address_array_t); +#if 0 /* * Allow application level processes to create named entries which * are backed by sub-maps which describe regions of address space. @@ -354,11 +354,13 @@ routine vm_mapped_pages_info( * in turn, can be mapped into target address spaces */ - routine vm_region_object_create( target_task :vm_map_t; in size :vm_size_t; out region_object :mach_port_move_send_t); +#else +skip; /* was vm_region_object_create */ +#endif /* * A recursive form of vm_region which probes submaps withint the @@ -383,7 +385,7 @@ routine vm_region_recurse_64( inout address : vm_address_t; out size : vm_size_t; inout nesting_depth : natural_t; - out info : vm_region_recurse_info_64_t,CountInOut); + out info : vm_region_recurse_info_t,CountInOut); routine mach_vm_region_info_64( task : vm_map_t; @@ -396,7 +398,7 @@ routine vm_region_64( inout address : vm_address_t; out size : vm_size_t; flavor : vm_region_flavor_t; - out info : vm_region_info_64_t, CountInOut; + out info : vm_region_info_t, CountInOut; out object_name : memory_object_name_t = MACH_MSG_TYPE_MOVE_SEND ctype: mach_port_t); @@ -424,6 +426,10 @@ routine vm_map_64( max_protection : vm_prot_t; inheritance : vm_inherit_t); +#if 0 +/* + * The UPL interfaces are not ready for user-level export. + */ routine vm_map_get_upl( target_task : vm_map_t; address : vm_address_t; @@ -441,5 +447,23 @@ routine vm_upl_map( routine vm_upl_unmap( target_task : vm_map_t; upl : upl_t); - +#else +skip; /* was vm_map_get_upl */ +skip; /* was vm_upl_map */ +skip; /* was vm_upl_unmap */ +#endif + +/* + * Control behavior and investigate state of a "purgable" object in + * the virtual address space of the target task. A purgable object is + * created via a call to vm_allocate() with VM_FLAGS_PURGABLE + * specified. See the routine implementation for a complete + * definition of the routine. + */ +routine vm_purgable_control( + target_task : vm_map_t; + address : vm_address_t; + control : vm_purgable_t; + inout state : int); + diff --git a/osfmk/mach/vm_param.h b/osfmk/mach/vm_param.h index 334011b15..4a2da445e 100644 --- a/osfmk/mach/vm_param.h +++ b/osfmk/mach/vm_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,25 +61,10 @@ #ifndef _MACH_VM_PARAM_H_ #define _MACH_VM_PARAM_H_ -#ifndef KERNEL_PRIVATE - -#error YOU HAVE MADE A MISTAKE BY INCLUDING THIS FILE; -#error -#error THIS FILE SHOULD NOT BE VISIBLE TO USER PROGRAMS. -#error -#error USE TO GET MACHINE-DEPENDENT ADDRESS -#error SPACE AND PAGE SIZE ITEMS. -#error -#error USE TO GET TYPE DECLARATIONS USED IN -#error THE MACH KERNEL INTERFACE. -#error -#error IN ALL PROBABILITY, YOU SHOULD GET ALL OF THE TYPES USED IN THE -#error INTERFACE FROM - -#endif /* KERNEL_PRIVATE */ - #include +#ifdef KERNEL + #ifndef ASSEMBLER #include #endif /* ASSEMBLER */ @@ -89,42 +74,10 @@ * is some number of hardware pages, depending on the target machine. */ -/* - * All references to the size of a page should be done with PAGE_SIZE - * or PAGE_SHIFT. The fact they are variables is hidden here so that - * we can easily make them constant if we so desire. - */ - -/* - * Regardless whether it is implemented with a constant or a variable, - * the PAGE_SIZE is assumed to be a power of two throughout the - * virtual memory system implementation. - */ - -#ifdef PAGE_SIZE_FIXED -#define PAGE_SIZE 4096 -#define PAGE_SHIFT 12 -#define PAGE_MASK (PAGE_SIZE-1) -#endif /* PAGE_SIZE_FIXED */ - #ifndef ASSEMBLER -extern vm_size_t page_size; -extern vm_size_t page_mask; -extern int page_shift; - -#ifndef PAGE_SIZE_FIXED -#define PAGE_SIZE page_size /* pagesize in addr units */ -#define PAGE_SHIFT page_shift /* number of bits to shift for pages */ -#define PAGE_MASK page_mask /* mask for off in page */ - -#define PAGE_SIZE_64 (unsigned long long)page_size /* pagesize in addr units */ -#define PAGE_MASK_64 (unsigned long long)page_mask /* mask for off in page */ -#else /* PAGE_SIZE_FIXED */ - -#define PAGE_SIZE_64 (unsigned long long)4096 -#define PAGE_MASK_64 (PAGE_SIZE_64-1) -#endif /* PAGE_SIZE_FIXED */ +#define PAGE_SIZE_64 (unsigned long long)PAGE_SIZE /* pagesize in addr units */ +#define PAGE_MASK_64 (unsigned long long)PAGE_MASK /* mask for off in page */ /* * Convert addresses to pages and vice versa. No rounding is used. @@ -151,6 +104,22 @@ extern int page_shift; #define ptoa(x) (0UL = 0) #endif +/* + * Page-size rounding macros for the Public fixed-width VM types. + */ +#define mach_vm_round_page(x) (((mach_vm_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) +#define mach_vm_trunc_page(x) ((mach_vm_offset_t)(x) & ~((signed)PAGE_MASK)) + +#define memory_object_round_page(x) (((memory_object_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) +#define memory_object_trunc_page(x) ((memory_object_offset_t)(x) & ~((signed)PAGE_MASK)) + +/* + * Rounding macros for the legacy (scalable with the current task's + * address space size) VM types. + */ + +#define round_page(x) (((vm_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) +#define trunc_page(x) ((vm_offset_t)(x) & ~((signed)PAGE_MASK)) /* * Round off or truncate to the nearest page. These will work @@ -158,6 +127,10 @@ extern int page_shift; * bytes. The round_page_32 and trunc_page_32 macros should not be * use on 64 bit types. The round_page_64 and trunc_page_64 macros * should be used instead. + * + * These should only be used in the rare case the size of the address + * or length is hard-coded as 32 or 64 bit. Otherwise, the macros + * associated with the specific VM type should be used. */ #define round_page_32(x) (((uint32_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) @@ -166,20 +139,6 @@ extern int page_shift; #define trunc_page_64(x) ((uint64_t)(x) & ~((signed)PAGE_MASK)) -/* - * While the following block is enabled, the legacy round_page - * and trunc_page macros will behave correctly. If not, they will - * generate invalid lvalue errors. - */ - -#if 1 -#define round_page(x) (((uint32_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) -#define trunc_page(x) ((uint32_t)(x) & ~((signed)PAGE_MASK)) -#else -#define round_page(x) (0UL = 0) -#define trunc_page(x) (0UL = 0) -#endif - /* * Enable the following block to find uses of xxx_32 macros that should * be xxx_64. These macros only work in C code, not C++. The resulting @@ -247,11 +206,20 @@ extern int page_shift; #define page_aligned(x) ((((vm_object_offset_t) (x)) & PAGE_MASK) == 0) extern vm_size_t mem_size; /* 32-bit size of memory - limited by maxmem - deprecated */ -extern uint64_t max_mem; /* 64-bit size of memory - limited by maxmem */ -extern uint64_t mem_actual; /* 64-bit size of memory - not limited by maxmem */ -extern uint64_t sane_size; /* Memory size to use for defaults calculations */ +extern uint64_t max_mem; /* 64-bit size of memory - limited by maxmem */ + +#ifdef XNU_KERNEL_PRIVATE + +extern uint64_t mem_actual; /* 64-bit size of memory - not limited by maxmem */ +extern uint64_t sane_size; /* Memory size to use for defaults calculations */ extern addr64_t vm_last_addr; /* Highest kernel virtual address known to the VM system */ +#endif /* XNU_KERNEL_PRIVATE */ + +extern vm_size_t page_size; +extern vm_size_t page_mask; +extern int page_shift; + /* We need a way to get rid of compiler warnings when we cast from */ /* a 64 bit value to an address that is 32 bits. */ /* We know at this point the cast is harmless but sometime in */ @@ -266,4 +234,7 @@ typedef char __NEED_TO_CHANGE_CAST_DOWN[ sizeof(uintptr_t) == sizeof(int) ? 0 : #endif /* __CAST_DOWN_CHECK */ #endif /* ASSEMBLER */ + +#endif /* KERNEL */ + #endif /* _MACH_VM_PARAM_H_ */ diff --git a/osfmk/mach/vm_prot.h b/osfmk/mach/vm_prot.h index 9980336e4..011ce85a7 100644 --- a/osfmk/mach/vm_prot.h +++ b/osfmk/mach/vm_prot.h @@ -57,8 +57,8 @@ * */ -#ifndef VM_PROT_H_ -#define VM_PROT_H_ +#ifndef _MACH_VM_PROT_H_ +#define _MACH_VM_PROT_H_ /* * Types defined: @@ -96,6 +96,7 @@ typedef int vm_prot_t; * to page locks. Using -1 here is a bad idea because it * looks like VM_PROT_ALL and then some. */ + #define VM_PROT_NO_CHANGE ((vm_prot_t) 0x08) /* @@ -105,6 +106,7 @@ typedef int vm_prot_t; * and write permission will be added to the maximum protections * for the associated entry. */ + #define VM_PROT_COPY ((vm_prot_t) 0x10) @@ -118,6 +120,7 @@ typedef int vm_prot_t; * being pushed up by the memory manager and the kernel * walking down the shadow chain. */ + #define VM_PROT_WANTS_COPY ((vm_prot_t) 0x10) -#endif /* VM_PROT_H_ */ +#endif /* _MACH_VM_PROT_H_ */ diff --git a/osfmk/mach/vm_purgable.h b/osfmk/mach/vm_purgable.h new file mode 100644 index 000000000..e5d919170 --- /dev/null +++ b/osfmk/mach/vm_purgable.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * Virtual memory map purgable object definitions. + * + */ + +#ifndef _MACH_VM_PURGABLE_H_ +#define _MACH_VM_PURGABLE_H_ + +/* + * Types defined: + * + * vm_purgable_t purgable object control codes. + */ + +typedef int vm_purgable_t; + +/* + * Enumeration of valid values for vm_purgable_t. + */ +#define VM_PURGABLE_SET_STATE ((vm_purgable_t) 0) /* set state of purgable object */ +#define VM_PURGABLE_GET_STATE ((vm_purgable_t) 1) /* get state of purgable object */ + +/* + * Valid states of a purgable object. + */ +#define VM_PURGABLE_STATE_MIN 0 /* minimum purgable object state value */ +#define VM_PURGABLE_STATE_MAX 2 /* maximum purgable object state value */ + +#define VM_PURGABLE_NONVOLATILE 0 /* purgable object is non-volatile */ +#define VM_PURGABLE_VOLATILE 1 /* purgable object is volatile */ +#define VM_PURGABLE_EMPTY 2 /* purgable object is volatile and empty */ + +#endif /* _MACH_VM_PURGABLE_H_ */ diff --git a/osfmk/mach/vm_region.h b/osfmk/mach/vm_region.h index 4747068e6..3677bc25c 100644 --- a/osfmk/mach/vm_region.h +++ b/osfmk/mach/vm_region.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,6 +36,17 @@ #include #include #include +#include +#include +#include +#include +#include + +#include + +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif /* * Types defined: @@ -51,37 +62,42 @@ typedef int *vm_region_recurse_info_64_t; typedef int vm_region_flavor_t; typedef int vm_region_info_data_t[VM_REGION_INFO_MAX]; -#define VM_REGION_BASIC_INFO 10 - +#define VM_REGION_BASIC_INFO_64 9 struct vm_region_basic_info_64 { vm_prot_t protection; vm_prot_t max_protection; vm_inherit_t inheritance; boolean_t shared; boolean_t reserved; - vm_object_offset_t offset; + memory_object_offset_t offset; vm_behavior_t behavior; unsigned short user_wired_count; }; - typedef struct vm_region_basic_info_64 *vm_region_basic_info_64_t; typedef struct vm_region_basic_info_64 vm_region_basic_info_data_64_t; -#define VM_REGION_BASIC_INFO_COUNT_64 \ - (sizeof(vm_region_basic_info_data_64_t)/sizeof(int)) +#define VM_REGION_BASIC_INFO_COUNT_64 ((mach_msg_type_number_t) \ + (sizeof(vm_region_basic_info_data_64_t)/sizeof(int))) +/* + * Passing VM_REGION_BASIC_INFO to vm_region_64 + * automatically converts it to a VM_REGION_BASIC_INFO_64. + * Please use that explicitly instead. + */ +#define VM_REGION_BASIC_INFO 10 +/* + * This is the legacy basic info structure. It is + * deprecated because it passes only a 32-bit memory object + * offset back - too small for many larger objects (e.g. files). + */ struct vm_region_basic_info { vm_prot_t protection; vm_prot_t max_protection; vm_inherit_t inheritance; boolean_t shared; boolean_t reserved; -#ifdef soon - vm_object_offset_t offset; -#else - vm_offset_t offset; -#endif + uint32_t offset; /* too small for a real offset */ vm_behavior_t behavior; unsigned short user_wired_count; }; @@ -89,10 +105,8 @@ struct vm_region_basic_info { typedef struct vm_region_basic_info *vm_region_basic_info_t; typedef struct vm_region_basic_info vm_region_basic_info_data_t; -#define VM_REGION_BASIC_INFO_COUNT \ - (sizeof(vm_region_basic_info_data_t)/sizeof(int)) - - +#define VM_REGION_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_region_basic_info_data_t)/sizeof(int))) #define VM_REGION_EXTENDED_INFO 11 @@ -112,7 +126,6 @@ typedef struct vm_region_basic_info vm_region_basic_info_data_t; * back. */ - struct vm_region_extended_info { vm_prot_t protection; unsigned int user_tag; @@ -129,8 +142,8 @@ struct vm_region_extended_info { typedef struct vm_region_extended_info *vm_region_extended_info_t; typedef struct vm_region_extended_info vm_region_extended_info_data_t; -#define VM_REGION_EXTENDED_INFO_COUNT \ - (sizeof(vm_region_extended_info_data_t)/sizeof(int)) +#define VM_REGION_EXTENDED_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_region_extended_info_data_t)/sizeof(int))) #define VM_REGION_TOP_INFO 12 @@ -146,8 +159,8 @@ struct vm_region_top_info { typedef struct vm_region_top_info *vm_region_top_info_t; typedef struct vm_region_top_info vm_region_top_info_data_t; -#define VM_REGION_TOP_INFO_COUNT \ - (sizeof(vm_region_top_info_data_t)/sizeof(int)) +#define VM_REGION_TOP_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_region_top_info_data_t)/sizeof(int))) @@ -170,15 +183,12 @@ typedef struct vm_region_top_info vm_region_top_info_data_t; * chain (where one is present), and a walking of the resident page queue. * */ + struct vm_region_submap_info { vm_prot_t protection; /* present access protection */ vm_prot_t max_protection; /* max avail through vm_prot */ vm_inherit_t inheritance;/* behavior of map/obj on fork */ -#ifdef soon - vm_object_offset_t offset; /* offset into object/map */ -#else - vm_offset_t offset; /* offset into object/map */ -#endif + uint32_t offset; /* offset into object/map */ unsigned int user_tag; /* user tag on map entry */ unsigned int pages_resident; /* only valid for objects */ unsigned int pages_shared_now_private; /* only for objects */ @@ -197,16 +207,14 @@ struct vm_region_submap_info { typedef struct vm_region_submap_info *vm_region_submap_info_t; typedef struct vm_region_submap_info vm_region_submap_info_data_t; -#define VM_REGION_SUBMAP_INFO_COUNT \ - (sizeof(vm_region_submap_info_data_t)/sizeof(int)) - - +#define VM_REGION_SUBMAP_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_region_submap_info_data_t)/sizeof(int))) struct vm_region_submap_info_64 { vm_prot_t protection; /* present access protection */ vm_prot_t max_protection; /* max avail through vm_prot */ vm_inherit_t inheritance;/* behavior of map/obj on fork */ - vm_object_offset_t offset; /* offset into object/map */ + memory_object_offset_t offset; /* offset into object/map */ unsigned int user_tag; /* user tag on map entry */ unsigned int pages_resident; /* only valid for objects */ unsigned int pages_shared_now_private; /* only for objects */ @@ -225,32 +233,14 @@ struct vm_region_submap_info_64 { typedef struct vm_region_submap_info_64 *vm_region_submap_info_64_t; typedef struct vm_region_submap_info_64 vm_region_submap_info_data_64_t; -#define VM_REGION_SUBMAP_INFO_COUNT_64 \ - (sizeof(vm_region_submap_info_data_64_t)/sizeof(int)) - +#define VM_REGION_SUBMAP_INFO_COUNT_64 ((mach_msg_type_number_t) \ + (sizeof(vm_region_submap_info_data_64_t)/sizeof(int))) -#define VM_REGION_OBJECT_INFO_64 13 - -struct vm_region_object_info_64 { - vm_prot_t protection; - vm_prot_t max_protection; - vm_inherit_t inheritance; - boolean_t shared; - boolean_t is_sub_map; - vm_object_offset_t offset; - vm_behavior_t behavior; - unsigned short user_wired_count; - vm_offset_t object_id; +struct mach_vm_read_entry { + mach_vm_address_t address; + mach_vm_size_t size; }; -typedef struct vm_region_object_info_64 *vm_region_object_info_64_t; -typedef struct vm_region_object_info_64 vm_region_object_info_data_64_t; - -#define VM_REGION_OBJECT_INFO_COUNT_64 \ - (sizeof(vm_region_object_info_data_64_t)/sizeof(int)) - - - struct vm_read_entry { vm_address_t address; vm_size_t size; @@ -258,6 +248,11 @@ struct vm_read_entry { #define VM_MAP_ENTRY_MAX (256) -typedef struct vm_read_entry vm_read_entry_t[VM_MAP_ENTRY_MAX]; +typedef struct mach_vm_read_entry mach_vm_read_entry_t[VM_MAP_ENTRY_MAX]; +typedef struct vm_read_entry vm_read_entry_t[VM_MAP_ENTRY_MAX]; + +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif #endif /*_MACH_VM_REGION_H_*/ diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index 75fd47fc0..544c121ce 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,29 +57,49 @@ * */ -#ifndef VM_STATISTICS_H_ -#define VM_STATISTICS_H_ +#ifndef _MACH_VM_STATISTICS_H_ +#define _MACH_VM_STATISTICS_H_ #include struct vm_statistics { - integer_t free_count; /* # of pages free */ - integer_t active_count; /* # of pages active */ - integer_t inactive_count; /* # of pages inactive */ - integer_t wire_count; /* # of pages wired down */ - integer_t zero_fill_count; /* # of zero fill pages */ - integer_t reactivations; /* # of pages reactivated */ - integer_t pageins; /* # of pageins */ - integer_t pageouts; /* # of pageouts */ - integer_t faults; /* # of faults */ - integer_t cow_faults; /* # of copy-on-writes */ - integer_t lookups; /* object cache lookups */ - integer_t hits; /* object cache hits */ + natural_t free_count; /* # of pages free */ + natural_t active_count; /* # of pages active */ + natural_t inactive_count; /* # of pages inactive */ + natural_t wire_count; /* # of pages wired down */ + natural_t zero_fill_count; /* # of zero fill pages */ + natural_t reactivations; /* # of pages reactivated */ + natural_t pageins; /* # of pageins */ + natural_t pageouts; /* # of pageouts */ + natural_t faults; /* # of faults */ + natural_t cow_faults; /* # of copy-on-writes */ + natural_t lookups; /* object cache lookups */ + natural_t hits; /* object cache hits */ + + natural_t purgeable_count; /* # of pages purgeable */ + natural_t purges; /* # of pages purged */ }; typedef struct vm_statistics *vm_statistics_t; typedef struct vm_statistics vm_statistics_data_t; +struct vm_statistics_rev0 { + natural_t free_count; /* # of pages free */ + natural_t active_count; /* # of pages active */ + natural_t inactive_count; /* # of pages inactive */ + natural_t wire_count; /* # of pages wired down */ + natural_t zero_fill_count; /* # of zero fill pages */ + natural_t reactivations; /* # of pages reactivated */ + natural_t pageins; /* # of pageins */ + natural_t pageouts; /* # of pageouts */ + natural_t faults; /* # of faults */ + natural_t cow_faults; /* # of copy-on-writes */ + natural_t lookups; /* object cache lookups */ + natural_t hits; /* object cache hits */ +}; + +typedef struct vm_statistics_rev0 *vm_statistics_rev0_t; +typedef struct vm_statistics_rev0 vm_statistics_rev0_data_t; /* included for the vm_map_page_query call */ @@ -88,6 +108,7 @@ typedef struct vm_statistics vm_statistics_data_t; #define VM_PAGE_QUERY_PAGE_REF 0x4 #define VM_PAGE_QUERY_PAGE_DIRTY 0x8 +#ifdef MACH_KERNEL_PRIVATE /* * Each machine dependent implementation is expected to @@ -103,8 +124,39 @@ struct pmap_statistics { typedef struct pmap_statistics *pmap_statistics_t; -#define VM_FLAGS_FIXED 0x0 -#define VM_FLAGS_ANYWHERE 0x1 +#endif /* MACH_KERNEL_PRIVATE */ + +/* + * VM allocation flags: + * + * VM_FLAGS_FIXED + * (really the absence of VM_FLAGS_ANYWHERE) + * Allocate new VM region at the specified virtual address, if possible. + * + * VM_FLAGS_ANYWHERE + * Allocate new VM region anywhere it would fit in the address space. + * + * VM_FLAGS_PURGABLE + * Create a purgable VM object for that new VM region. + * + * VM_FLAGS_NO_PMAP_CHECK + * (for DEBUG kernel config only, ignored for other configs) + * Do not check that there is no stale pmap mapping for the new VM region. + * This is useful for kernel memory allocations at bootstrap when building + * the initial kernel address space while some memory is already in use. + * + * VM_FLAGS_OVERWRITE + * The new VM region can replace existing VM regions if necessary + * (to be used in combination with VM_FLAGS_FIXED). + */ +#define VM_FLAGS_FIXED 0x0000 +#define VM_FLAGS_ANYWHERE 0x0001 +#define VM_FLAGS_PURGABLE 0x0002 +#ifdef KERNEL_PRIVATE +#define VM_FLAGS_NO_PMAP_CHECK 0x0004 +#endif /* KERNEL_PRIVATE */ +#define VM_FLAGS_OVERWRITE 0x0008 + #define VM_FLAGS_ALIAS_MASK 0xFF000000 #define VM_GET_FLAGS_ALIAS(flags, alias) \ (alias) = ((flags) & VM_FLAGS_ALIAS_MASK) >> 24 @@ -139,7 +191,6 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_MEMORY_JAVA 44 #define VM_MEMORY_ATS 50 - /* memory allocated by the dynamic loader for itself */ #define VM_MEMORY_DYLD 60 /* malloc'd memory created by dyld */ @@ -150,4 +201,5 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_MEMORY_APPLICATION_SPECIFIC_16 255 #define VM_MAKE_TAG(tag) (tag<<24) -#endif /* VM_STATISTICS_H_ */ + +#endif /* _MACH_VM_STATISTICS_H_ */ diff --git a/osfmk/mach/vm_sync.h b/osfmk/mach/vm_sync.h index af5562372..4e38c4fa9 100644 --- a/osfmk/mach/vm_sync.h +++ b/osfmk/mach/vm_sync.h @@ -54,8 +54,8 @@ * */ -#ifndef VM_SYNC_H_ -#define VM_SYNC_H_ +#ifndef _MACH_VM_SYNC_H_ +#define _MACH_VM_SYNC_H_ typedef unsigned vm_sync_t; @@ -68,5 +68,6 @@ typedef unsigned vm_sync_t; #define VM_SYNC_INVALIDATE ((vm_sync_t) 0x04) #define VM_SYNC_KILLPAGES ((vm_sync_t) 0x08) #define VM_SYNC_DEACTIVATE ((vm_sync_t) 0x10) +#define VM_SYNC_CONTIGUOUS ((vm_sync_t) 0x20) -#endif /* VM_SYNC_H_ */ +#endif /* _MACH_VM_SYNC_H_ */ diff --git a/osfmk/mach/vm_types.h b/osfmk/mach/vm_types.h index 437f6bfe2..17ac4187f 100644 --- a/osfmk/mach/vm_types.h +++ b/osfmk/mach/vm_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,18 +23,16 @@ * @OSF_COPYRIGHT@ * */ -#ifndef MACH_VM_TYPES_H_ -#define MACH_VM_TYPES_H_ - -#include -#include +#ifndef _MACH_VM_TYPES_H_ +#define _MACH_VM_TYPES_H_ #include #include +#include + typedef vm_offset_t pointer_t; typedef vm_offset_t vm_address_t; -typedef uint64_t vm_object_offset_t; /* * We use addr64_t for 64-bit addresses that are used on both @@ -64,24 +62,34 @@ typedef uint32_t reg64_t; typedef uint32_t ppnum_t; /* Physical page number */ #define PPNUM_MAX UINT32_MAX -#ifdef KERNEL_PRIVATE -#if !defined(__APPLE_API_PRIVATE) || !defined(MACH_KERNEL_PRIVATE) +#ifdef KERNEL_PRIVATE +#include + +#ifndef MACH_KERNEL_PRIVATE /* * Use specifically typed null structures for these in * other parts of the kernel to enable compiler warnings * about type mismatches, etc... Otherwise, these would * be void*. */ +__BEGIN_DECLS + +struct pmap ; struct vm_map ; struct vm_object ; -#endif /* !__APPLE_API_PRIVATE || !MACH_KERNEL_PRIVATE */ +__END_DECLS + +#endif /* MACH_KERNEL_PRIVATE */ +typedef struct pmap *pmap_t; typedef struct vm_map *vm_map_t; typedef struct vm_object *vm_object_t; -#define VM_OBJECT_NULL ((vm_object_t) 0) + +#define PMAP_NULL ((pmap_t) 0) +#define VM_OBJECT_NULL ((vm_object_t) 0) #else /* KERNEL_PRIVATE */ @@ -91,16 +99,26 @@ typedef mach_port_t vm_map_t; #define VM_MAP_NULL ((vm_map_t) 0) +/* + * Evolving definitions, likely to change. + */ -#ifdef __APPLE_API_EVOLVING +typedef uint64_t vm_object_offset_t; +typedef uint64_t vm_object_size_t; #ifdef KERNEL_PRIVATE -#ifndef MACH_KERNEL_PRIVATE +#ifndef MACH_KERNEL_PRIVATE + +__BEGIN_DECLS + struct upl ; struct vm_map_copy ; struct vm_named_entry ; -#endif /* !MACH_KERNEL_PRIVATE */ + +__END_DECLS + +#endif /* MACH_KERNEL_PRIVATE */ typedef struct upl *upl_t; typedef struct vm_map_copy *vm_map_copy_t; @@ -108,18 +126,14 @@ typedef struct vm_named_entry *vm_named_entry_t; #define VM_MAP_COPY_NULL ((vm_map_copy_t) 0) -#else /* !KERNEL_PRIVATE */ +#else /* KERNEL_PRIVATE */ typedef mach_port_t upl_t; typedef mach_port_t vm_named_entry_t; -#endif /* !KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #define UPL_NULL ((upl_t) 0) #define VM_NAMED_ENTRY_NULL ((vm_named_entry_t) 0) -#endif /* __APPLE_API_EVOLVING */ - -#endif /* MACH_VM_TYPES_H_ */ - - +#endif /* _MACH_VM_TYPES_H_ */ diff --git a/osfmk/mach_debug/Makefile b/osfmk/mach_debug/Makefile index be1dbe1bd..6ddb140a6 100644 --- a/osfmk/mach_debug/Makefile +++ b/osfmk/mach_debug/Makefile @@ -12,7 +12,7 @@ MIG_DEFS = mach_debug_types.defs DATAFILES = \ mach_debug.h \ hash_info.h ipc_info.h vm_info.h zone_info.h \ - page_info.h mach_debug_types.h \ + page_info.h mach_debug_types.h lockgroup_info.h \ ${MIG_DEFS} INSTALL_MI_LIST = ${DATAFILES} diff --git a/osfmk/mach_debug/hash_info.h b/osfmk/mach_debug/hash_info.h index bb8b5e3a2..3629b02b7 100644 --- a/osfmk/mach_debug/hash_info.h +++ b/osfmk/mach_debug/hash_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,52 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.10.2 1995/01/06 19:52:35 devrcs - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup - * [1994/10/14 03:43:33 dwm] - * - * Revision 1.2.10.1 1994/09/23 02:45:09 ezf - * change marker to not FREE - * [1994/09/22 21:44:01 ezf] - * - * Revision 1.2.3.2 1993/06/09 02:44:38 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:19:01 jeffc] - * - * Revision 1.2 1993/04/19 16:41:12 devrcs - * ansi C conformance changes - * [1993/02/02 18:56:42 david] - * - * Revision 1.1 1992/09/30 02:32:32 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 17:03:21 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:37:46 mrt - * Changed to new Mach copyright - * [91/02/01 17:28:22 mrt] - * - * Revision 2.2 91/01/08 15:18:59 rpd - * Created. - * [91/01/02 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University diff --git a/osfmk/mach_debug/ipc_info.h b/osfmk/mach_debug/ipc_info.h index f3ec4fac8..c358c9d78 100644 --- a/osfmk/mach_debug/ipc_info.h +++ b/osfmk/mach_debug/ipc_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,64 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.13.2 1995/01/06 19:52:40 devrcs - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup - * [1994/10/14 03:43:35 dwm] - * - * Revision 1.2.13.1 1994/09/23 02:45:18 ezf - * change marker to not FREE - * [1994/09/22 21:44:05 ezf] - * - * Revision 1.2.3.3 1993/09/09 16:07:52 jeffc - * CR9745 - Delete message accepted notifications - * [1993/09/03 20:45:48 jeffc] - * - * Revision 1.2.3.2 1993/06/09 02:44:43 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:19:04 jeffc] - * - * Revision 1.2 1993/04/19 16:41:20 devrcs - * ansi C conformance changes - * [1993/02/02 18:56:50 david] - * - * Revision 1.1 1992/09/30 02:32:34 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5.4.2 92/04/08 15:45:00 jeffreyh - * Back out Mainline changes. Revert back to revision 2.5. - * [92/04/07 10:29:40 jeffreyh] - * - * Revision 2.5 91/05/14 17:03:28 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:37:50 mrt - * Changed to new Mach copyright - * [91/02/01 17:28:30 mrt] - * - * Revision 2.3 91/01/08 15:19:05 rpd - * Moved ipc_info_bucket_t to mach_debug/hash_info.h. - * [91/01/02 rpd] - * - * Revision 2.2 90/06/02 15:00:28 rpd - * Created for new IPC. - * [90/03/26 23:45:14 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -143,7 +85,7 @@ typedef struct ipc_info_name { /*boolean_t*/integer_t iin_collision; /* collision at this entry? */ mach_port_type_t iin_type; /* straight port type */ mach_port_urefs_t iin_urefs; /* user-references */ - vm_offset_t iin_object; /* object pointer */ + natural_t iin_object; /* object pointer/identifier */ natural_t iin_next; /* marequest/next in free list */ natural_t iin_hash; /* hash index */ } ipc_info_name_t; diff --git a/osfmk/mach_debug/lockgroup_info.h b/osfmk/mach_debug/lockgroup_info.h new file mode 100644 index 000000000..d356d6b9f --- /dev/null +++ b/osfmk/mach_debug/lockgroup_info.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * File: mach/lockgroup_info.h + * + * Definitions for host_lockgroup_info call. + */ + +#ifndef _MACH_DEBUG_LOCKGROUP_INFO_H_ +#define _MACH_DEBUG_LOCKGROUP_INFO_H_ + +#include + +#define LOCKGROUP_MAX_NAME 64 + +#define LOCKGROUP_ATTR_STAT 0x01ULL + +typedef struct lockgroup_info { + char lockgroup_name[LOCKGROUP_MAX_NAME]; + uint64_t lockgroup_attr; + uint64_t lock_spin_cnt; + uint64_t lock_spin_util_cnt; + uint64_t lock_spin_held_cnt; + uint64_t lock_spin_miss_cnt; + uint64_t lock_spin_held_max; + uint64_t lock_spin_held_cum; + uint64_t lock_mtx_cnt; + uint64_t lock_mtx_util_cnt; + uint64_t lock_mtx_held_cnt; + uint64_t lock_mtx_miss_cnt; + uint64_t lock_mtx_wait_cnt; + uint64_t lock_mtx_held_max; + uint64_t lock_mtx_held_cum; + uint64_t lock_mtx_wait_max; + uint64_t lock_mtx_wait_cum; + uint64_t lock_rw_cnt; + uint64_t lock_rw_util_cnt; + uint64_t lock_rw_held_cnt; + uint64_t lock_rw_miss_cnt; + uint64_t lock_rw_wait_cnt; + uint64_t lock_rw_held_max; + uint64_t lock_rw_held_cum; + uint64_t lock_rw_wait_max; + uint64_t lock_rw_wait_cum; +} lockgroup_info_t; + +typedef lockgroup_info_t *lockgroup_info_array_t; + +#endif /* _MACH_DEBUG_LOCKGROUP_INFO_H_ */ + diff --git a/osfmk/mach_debug/mach_debug_types.defs b/osfmk/mach_debug/mach_debug_types.defs index ce06a74d0..2d9090e5e 100644 --- a/osfmk/mach_debug/mach_debug_types.defs +++ b/osfmk/mach_debug/mach_debug_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,110 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.18.1 1996/10/03 17:27:31 emcmanus - * Brought ipc_info_name_t, ipc_info_tree_name_t, and vm_info_object_t - * sizes into sync with the C reality. We should not have to do this - * by hand, but that's MiG for you. [CR 2244] - * [1996/09/17 16:35:23 emcmanus] - * - * Revision 1.2.10.2 1995/01/06 19:52:44 devrcs - * mk6 CR668 - 1.3b26 merge - * * Revision 1.2.3.5 1994/05/06 18:56:40 tmt - * Merged with osc1.3b19 - * Merge Alpha changes into osc1.312b source code. - * 64 bits cleanup. - * * End1.3merge - * [1994/11/02 18:32:27 dwm] - * - * Revision 1.2.10.1 1994/09/23 02:45:37 ezf - * change marker to not FREE - * [1994/09/22 21:44:13 ezf] - * - * Revision 1.2.3.3 1993/08/05 19:09:52 jeffc - * CR9508 - delete dead Mach3 code. Remove MACH_IPC_TYPED - * [1993/08/04 17:30:38 jeffc] - * - * Revision 1.2.3.2 1993/06/09 02:44:48 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:19:07 jeffc] - * - * Revision 1.2 1993/04/19 16:41:28 devrcs - * Merge untyped ipc: - * Introducing new MIG syntax for Untyped IPC (via compile option - * MACH_IPC_TYPED) - * [1993/02/17 23:46:03 travos] - * - * Revision 1.1 1992/09/30 02:23:07 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.9.4.2 92/04/08 15:45:11 jeffreyh - * Back out changes from TRUNK. Now back to a Revision 2.9 base. - * [92/04/07 10:30:25 jeffreyh] - * - * Revision 2.9 91/07/31 17:55:42 dbg - * Add symtab_name_t. - * [91/07/30 17:11:38 dbg] - * - * Revision 2.8 91/05/14 17:03:43 mrt - * Correcting copyright - * - * Revision 2.7 91/02/05 17:37:59 mrt - * Changed to new Mach copyright - * [91/02/01 17:28:58 mrt] - * - * Revision 2.6 91/01/08 16:18:08 rpd - * Changed ipc_info_bucket_t to hash_info_bucket_t. - * [91/01/02 rpd] - * - * Revision 2.5 90/10/25 14:46:18 rwd - * Updated vm_info_region_t size. - * [90/10/17 rpd] - * - * Revision 2.4 90/06/19 23:00:23 rpd - * Adjusted zone_info_t definition to account for new collectable field. - * [90/06/05 rpd] - * - * Revision 2.3 90/06/02 15:00:39 rpd - * Added vm_info_region_t, vm_info_object_t. - * [90/05/02 14:47:17 rpd] - * - * Converted to new IPC. - * [90/03/26 22:43:24 rpd] - * - * Revision 2.2 90/05/03 15:48:49 dbg - * Remove callout types. Add zone_name, zone_info, page_address - * types. - * [90/04/06 dbg] - * - * Revision 2.1 89/08/03 17:20:25 rwd - * Created. - * - * Revision 2.4 89/02/25 18:43:41 gm0w - * Changes for cleanup. - * - * Revision 2.3 89/01/15 16:32:43 rpd - * Updated includes for the new mach/ directory. - * [89/01/15 15:11:33 rpd] - * - * Revision 2.2 89/01/12 08:00:34 rpd - * Created. - * [89/01/12 04:21:37 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University @@ -181,6 +77,7 @@ type ipc_info_tree_name_array_t = array[] of ipc_info_tree_name_t; type vm_info_region_t = struct[10] of natural_t; type vm_info_region_64_t = struct[11] of natural_t; +type mach_vm_info_region_t = struct[14] of natural_t; type vm_info_object_t = struct[21] of natural_t; type vm_info_object_array_t = ^array[] of vm_info_object_t; @@ -189,6 +86,9 @@ type page_address_array_t = ^array[] of integer_t; type symtab_name_t = c_string[*:32]; +type lockgroup_info_t = struct[63] of integer_t; +type lockgroup_info_array_t = array[] of lockgroup_info_t; + import ; #endif /* _MACH_DEBUG_MACH_DEBUG_TYPES_DEFS_ */ diff --git a/osfmk/mach_debug/mach_debug_types.h b/osfmk/mach_debug/mach_debug_types.h index 27e2684cc..66b5e4c8b 100644 --- a/osfmk/mach_debug/mach_debug_types.h +++ b/osfmk/mach_debug/mach_debug_types.h @@ -61,6 +61,7 @@ #include #include #include +#include typedef char symtab_name_t[32]; diff --git a/osfmk/mach_debug/page_info.h b/osfmk/mach_debug/page_info.h index 3f05d26a8..3a95d9a8e 100644 --- a/osfmk/mach_debug/page_info.h +++ b/osfmk/mach_debug/page_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,48 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1994/09/23 02:45:56 ezf - * change marker to not FREE - * [1994/09/22 21:44:21 ezf] - * - * Revision 1.1.2.3 1993/07/28 18:01:46 jeffc - * CR9523 - Add prototypes to kernel. Protect this file - * against multiple inclusion - * [1993/07/28 12:23:48 jeffc] - * - * Revision 1.1.2.2 1993/06/02 23:50:21 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:19:13 jeffc] - * - * Revision 1.1 1992/09/30 02:32:37 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 17:04:01 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:38:10 mrt - * Changed to new Mach copyright - * [91/02/01 17:29:22 mrt] - * - * Revision 2.2 90/05/03 15:48:58 dbg - * Created. - * [90/04/06 dbg] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University diff --git a/osfmk/mach_debug/vm_info.h b/osfmk/mach_debug/vm_info.h index 38aa6e6b1..aee5bb755 100644 --- a/osfmk/mach_debug/vm_info.h +++ b/osfmk/mach_debug/vm_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -66,15 +66,32 @@ #include #include +#if __DARWIN_ALIGN_POWER +#pragma options align=power +#endif + /* * Remember to update the mig type definitions * in mach_debug_types.defs when adding/removing fields. */ +typedef struct mach_vm_info_region { + mach_vm_offset_t vir_start; /* start of region */ + mach_vm_offset_t vir_end; /* end of region */ + mach_vm_offset_t vir_object; /* the mapped object(kernal addr) */ + memory_object_offset_t vir_offset; /* offset into object */ + boolean_t vir_needs_copy; /* does object need to be copied? */ + vm_prot_t vir_protection; /* protection code */ + vm_prot_t vir_max_protection; /* maximum protection */ + vm_inherit_t vir_inheritance; /* inheritance */ + natural_t vir_wired_count; /* number of times wired */ + natural_t vir_user_wired_count; /* number of times user has wired */ +} mach_vm_info_region_t; + typedef struct vm_info_region_64 { - vm_offset_t vir_start; /* start of region */ - vm_offset_t vir_end; /* end of region */ - vm_offset_t vir_object; /* the mapped object */ - vm_object_offset_t vir_offset; /* offset into object */ + natural_t vir_start; /* start of region */ + natural_t vir_end; /* end of region */ + natural_t vir_object; /* the mapped object */ + memory_object_offset_t vir_offset; /* offset into object */ boolean_t vir_needs_copy; /* does object need to be copied? */ vm_prot_t vir_protection; /* protection code */ vm_prot_t vir_max_protection; /* maximum protection */ @@ -84,10 +101,10 @@ typedef struct vm_info_region_64 { } vm_info_region_64_t; typedef struct vm_info_region { - vm_offset_t vir_start; /* start of region */ - vm_offset_t vir_end; /* end of region */ - vm_offset_t vir_object; /* the mapped object */ - vm_offset_t vir_offset; /* offset into object */ + natural_t vir_start; /* start of region */ + natural_t vir_end; /* end of region */ + natural_t vir_object; /* the mapped object */ + natural_t vir_offset; /* offset into object */ boolean_t vir_needs_copy; /* does object need to be copied? */ vm_prot_t vir_protection; /* protection code */ vm_prot_t vir_max_protection; /* maximum protection */ @@ -98,15 +115,15 @@ typedef struct vm_info_region { typedef struct vm_info_object { - vm_offset_t vio_object; /* this object */ - vm_size_t vio_size; /* object size (valid if internal) */ + natural_t vio_object; /* this object */ + natural_t vio_size; /* object size (valid if internal - but too small) */ unsigned int vio_ref_count; /* number of references */ unsigned int vio_resident_page_count; /* number of resident pages */ unsigned int vio_absent_count; /* number requested but not filled */ - vm_offset_t vio_copy; /* copy object */ - vm_offset_t vio_shadow; /* shadow object */ - vm_offset_t vio_shadow_offset; /* offset into shadow object */ - vm_offset_t vio_paging_offset; /* offset into memory object */ + natural_t vio_copy; /* copy object */ + natural_t vio_shadow; /* shadow object */ + natural_t vio_shadow_offset; /* offset into shadow object */ + natural_t vio_paging_offset; /* offset into memory object */ memory_object_copy_strategy_t vio_copy_strategy; /* how to handle data copy */ vm_offset_t vio_last_alloc; /* offset of last allocation */ @@ -119,10 +136,14 @@ typedef struct vm_info_object { boolean_t vio_internal; boolean_t vio_temporary; boolean_t vio_alive; - boolean_t vio_lock_in_progress; - boolean_t vio_lock_restart; + boolean_t vio_purgable; + boolean_t vio_purgable_volatile; } vm_info_object_t; typedef vm_info_object_t *vm_info_object_array_t; +#if __DARWIN_ALIGN_POWER +#pragma options align=reset +#endif + #endif /* _MACH_DEBUG_VM_INFO_H_ */ diff --git a/osfmk/mach_debug/zone_info.h b/osfmk/mach_debug/zone_info.h index 60b56d694..81f945ae5 100644 --- a/osfmk/mach_debug/zone_info.h +++ b/osfmk/mach_debug/zone_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,62 +22,6 @@ /* * @OSF_COPYRIGHT@ */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.12.2 1995/01/06 19:52:51 devrcs - * mk6 CR668 - 1.3b26 merge - * 64bit cleanup - * [1994/10/14 03:43:40 dwm] - * - * Revision 1.2.12.1 1994/09/23 02:46:19 ezf - * change marker to not FREE - * [1994/09/22 21:44:33 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:45:03 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:19:19 jeffc] - * - * Revision 1.2 1993/04/19 16:41:52 devrcs - * ansi C conformance changes - * [1993/02/02 18:57:07 david] - * - * Revision 1.1 1992/09/30 02:32:41 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.5 91/05/14 17:04:15 mrt - * Correcting copyright - * - * Revision 2.4 91/02/05 17:38:17 mrt - * Changed to new Mach copyright - * [91/02/01 17:29:40 mrt] - * - * Revision 2.3 90/06/19 23:00:29 rpd - * Added zi_ prefix to zone_info field names. - * Added zi_collectable field to zone_info. - * Added zn_ prefix to zone_name field names. - * [90/06/05 rpd] - * - * Revision 2.2 90/06/02 15:00:54 rpd - * Created. - * [90/03/26 23:53:57 rpd] - * - * Revision 2.2 89/05/06 12:36:08 rpd - * Created. - * [89/05/06 12:35:19 rpd] - * - */ -/* CMU_ENDHIST */ /* * Mach Operating System * Copyright (c) 1991,1990,1989 Carnegie Mellon University @@ -128,10 +72,10 @@ typedef zone_name_t *zone_name_array_t; typedef struct zone_info { integer_t zi_count; /* Number of elements used now */ - vm_size_t zi_cur_size; /* current memory utilization */ - vm_size_t zi_max_size; /* how large can this zone grow */ - vm_size_t zi_elem_size; /* size of an element */ - vm_size_t zi_alloc_size; /* size used for more memory */ + natural_t zi_cur_size; /* current memory utilization */ + natural_t zi_max_size; /* how large can this zone grow */ + natural_t zi_elem_size; /* size of an element */ + natural_t zi_alloc_size; /* size used for more memory */ integer_t zi_pageable; /* zone pageable? */ integer_t zi_sleepable; /* sleep if empty? */ integer_t zi_exhaustible; /* merely return if empty? */ diff --git a/osfmk/machine/Makefile b/osfmk/machine/Makefile index 141fa285b..d68ef2fbf 100644 --- a/osfmk/machine/Makefile +++ b/osfmk/machine/Makefile @@ -9,34 +9,13 @@ include $(MakeInc_def) DATAFILES = \ - machlimits.h \ - ast.h \ - ast_types.h \ - commpage.h \ - cpu_capabilities.h \ cpu_number.h \ - db_machdep.h \ - endian.h \ - gdb_defs.h \ - iobus.h \ + cpu_capabilities.h \ io_map_entries.h \ - kgdb_defs.h \ - kgdb_setjmp.h \ lock.h \ - mach_param.h \ + locks.h \ machine_routines.h \ - machine_rpc.h \ - machparam.h \ - pmap.h \ - setjmp.h \ - spl.h \ - task.h \ - thread.h \ - thread_act.h \ - trap.h \ - vm_tuning.h \ - xpr.h \ - hw_lock_types.h + simple_lock.h INSTALL_MI_LCL_LIST = cpu_capabilities.h diff --git a/osfmk/machine/cpu_capabilities.h b/osfmk/machine/cpu_capabilities.h index a573941ef..65f8ea8f7 100644 --- a/osfmk/machine/cpu_capabilities.h +++ b/osfmk/machine/cpu_capabilities.h @@ -19,12 +19,12 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef PRIVATE #ifndef _MACHINE_CPU_CAPABILITIES_H #define _MACHINE_CPU_CAPABILITIES_H -#ifdef __APPLE_API_PRIVATE - +#ifdef KERNEL_PRIVATE #if defined (__ppc__) #include "ppc/cpu_capabilities.h" #elif defined (__i386__) @@ -33,5 +33,15 @@ #error architecture not supported #endif -#endif /* __APPLE_API_PRIVATE */ +#else /* !KERNEL_PRIVATE -- System Framework header */ +#if defined (__ppc__) || defined(__ppc64__) +#include +#elif defined (__i386__) +#include +#else +#error architecture not supported +#endif +#endif /* KERNEL_PRIVATE */ + #endif /* _MACHINE_CPU_CAPABILITIES_H */ +#endif /* PRIVATE */ diff --git a/osfmk/machine/cpu_number.h b/osfmk/machine/cpu_number.h index 075c108eb..57f865255 100644 --- a/osfmk/machine/cpu_number.h +++ b/osfmk/machine/cpu_number.h @@ -19,6 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef KERNEL_PRIVATE + #ifndef _MACHINE_CPU_NUMBER_H #define _MACHINE_CPU_NUMBER_H @@ -33,3 +35,5 @@ #endif /* _MACHINE_CPU_NUMBER_H */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/machine/disk.h b/osfmk/machine/disk.h deleted file mode 100644 index 03174e58d..000000000 --- a/osfmk/machine/disk.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_DISK_H -#define _MACHINE_DISK_H - - -#if defined (__ppc__) -#include "ppc/POWERMAC/disk.h" -#elif defined (__i386__) -#include "i386/AT386/disk.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_IOBUS_H */ diff --git a/osfmk/machine/gdb_defs.h b/osfmk/machine/gdb_defs.h deleted file mode 100644 index 821448666..000000000 --- a/osfmk/machine/gdb_defs.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_GDB_DEFS_H -#define _MACHINE_GDB_DEFS_H - - -#if defined (__ppc__) -#include "ppc/gdb_defs.h" -#elif defined (__i386__) -#include "i386/gdb_defs.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_GDB_DEFS_H */ diff --git a/osfmk/machine/hw_lock_types.h b/osfmk/machine/hw_lock_types.h deleted file mode 100644 index 0335f3601..000000000 --- a/osfmk/machine/hw_lock_types.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_HW_LOCK_TYPES_H_ -#define _MACHINE_HW_LOCK_TYPES_H_ - - -#if defined (__ppc__) -#include "ppc/hw_lock_types.h" -#elif defined (__i386__) -#include "i386/hw_lock_types.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_HW_LOCK_TYPES_H_ */ diff --git a/osfmk/machine/io_map_entries.h b/osfmk/machine/io_map_entries.h index 49a6b83ab..e946c123c 100644 --- a/osfmk/machine/io_map_entries.h +++ b/osfmk/machine/io_map_entries.h @@ -19,6 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef KERNEL_PRIVATE + #ifndef _MACHINE_IO_MAP_ENTRIES_H_ #define _MACHINE_IO_MAP_ENTRIES_H_ @@ -33,3 +35,5 @@ #endif /* _MACHINE_IO_MAP_ENTRIES_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/machine/iobus.h b/osfmk/machine/iobus.h deleted file mode 100644 index b6e758edd..000000000 --- a/osfmk/machine/iobus.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_IOBUS_H -#define _MACHINE_IOBUS_H - - -#if defined (__ppc__) -#include "ppc/POWERMAC/iobus.h" -#elif defined (__i386__) -#include "i386/AT386/iobus.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_IOBUS_H */ diff --git a/osfmk/machine/kgdb_defs.h b/osfmk/machine/kgdb_defs.h deleted file mode 100644 index c4178dcd1..000000000 --- a/osfmk/machine/kgdb_defs.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_KGDB_DEFS_H -#define _MACHINE_KGDB_DEFS_H - - -#if defined (__ppc__) -#include "ppc/kgdb_defs.h" -#elif defined (__i386__) -#include "i386/kgdb_defs.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_KGDB_DEFS_H */ diff --git a/osfmk/machine/kgdb_setjmp.h b/osfmk/machine/kgdb_setjmp.h deleted file mode 100644 index 256830d27..000000000 --- a/osfmk/machine/kgdb_setjmp.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_KGDB_SETJMP_H -#define _MACHINE_KGDB_SETJMP_H - - -#if defined (__ppc__) -#include "ppc/kgdb_setjmp.h" -#elif defined (__i386__) -#include "i386/kgdb_setjmp.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_KGDB_SETJMP_H */ diff --git a/osfmk/machine/lock.h b/osfmk/machine/lock.h index a65444044..1fb0484c3 100644 --- a/osfmk/machine/lock.h +++ b/osfmk/machine/lock.h @@ -19,6 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef KERNEL_PRIVATE + #ifndef _MACHINE_LOCK_H_ #define _MACHINE_LOCK_H_ @@ -33,3 +35,5 @@ #endif /* _MACHINE_LOCK_H_ */ + +#endif diff --git a/osfmk/machine/spl.h b/osfmk/machine/locks.h similarity index 83% rename from osfmk/machine/spl.h rename to osfmk/machine/locks.h index f343dbb3e..5ba5ab982 100644 --- a/osfmk/machine/spl.h +++ b/osfmk/machine/locks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,17 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _MACHINE_SPL_H -#define _MACHINE_SPL_H +#ifndef _MACHINE_LOCKS_H_ +#define _MACHINE_LOCKS_H_ #if defined (__ppc__) -#include "ppc/spl.h" +#include "ppc/locks.h" #elif defined (__i386__) -#include "i386/spl.h" +#include "i386/locks.h" #else #error architecture not supported #endif -#endif /* _MACHINE_SPL_H */ +#endif /* _MACHINE_LOCKS_H_ */ diff --git a/osfmk/machine/mach_param.h b/osfmk/machine/mach_param.h deleted file mode 100644 index f143d6aff..000000000 --- a/osfmk/machine/mach_param.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_MACH_PARAM_H -#define _MACHINE_MACH_PARAM_H - - -#if defined (__ppc__) -#include "ppc/mach_param.h" -#elif defined (__i386__) -#include "i386/mach_param.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_MACH_PARAM_H */ diff --git a/bsd/machine/label_t.h b/osfmk/machine/machine_cpu.h similarity index 81% rename from bsd/machine/label_t.h rename to osfmk/machine/machine_cpu.h index 094c204ac..a9235e191 100644 --- a/bsd/machine/label_t.h +++ b/osfmk/machine/machine_cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,17 +19,17 @@ * * @APPLE_LICENSE_HEADER_END@ */ -#ifndef _BSD_MACHINE_LABEL_T_H_ -#define _BSD_MACHINE_LABEL_T_H_ +#ifndef _MACHINE_MACHINE_CPU_H +#define _MACHINE_MACHINE_CPU_H #if defined (__ppc__) -#include "ppc/label_t.h" +#include "ppc/machine_cpu.h" #elif defined (__i386__) -#include "i386/label_t.h" +#include "i386/machine_cpu.h" #else #error architecture not supported #endif -#endif /* _BSD_MACHINE_LABEL_T_H_ */ +#endif /* _MACHINE_MACHINE_CPU_H */ diff --git a/osfmk/machine/machine_rpc.h b/osfmk/machine/machine_rpc.h index bb1891453..c582f112c 100644 --- a/osfmk/machine/machine_rpc.h +++ b/osfmk/machine/machine_rpc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * diff --git a/bsd/machine/cpu.h b/osfmk/machine/simple_lock.h similarity index 84% rename from bsd/machine/cpu.h rename to osfmk/machine/simple_lock.h index 36ca09b25..1a28cbe75 100644 --- a/bsd/machine/cpu.h +++ b/osfmk/machine/simple_lock.h @@ -19,18 +19,21 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef KERNEL_PRIVATE -#ifndef _BSD_MACHINE_CPU_H_ -#define _BSD_MACHINE_CPU_H_ +#ifndef _MACHINE_SIMPLE_LOCK_H_ +#define _MACHINE_SIMPLE_LOCK_H_ #if defined (__ppc__) -#include "ppc/cpu.h" +#include "ppc/simple_lock.h" #elif defined (__i386__) -#include "i386/cpu.h" +#include "i386/simple_lock.h" #else #error architecture not supported #endif -#endif /* _BSD_MACHINE_CPU_H_ */ +#endif /* _MACHINE_SIMPLE_LOCK_H_ */ + +#endif diff --git a/osfmk/machine/thread_act.h b/osfmk/machine/thread_act.h deleted file mode 100644 index e40989981..000000000 --- a/osfmk/machine/thread_act.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -#ifndef _MACHINE_THREAD_ACT_H -#define _MACHINE_THREAD_ACT_H - - -#if defined (__ppc__) -#include "ppc/thread_act.h" -#elif defined (__i386__) -#include "i386/thread_act.h" -#else -#error architecture not supported -#endif - - -#endif /* _MACHINE_THREAD_ACT_H */ diff --git a/osfmk/man/host_basic_info.html b/osfmk/man/host_basic_info.html index 0e359f0f2..2a30d3187 100755 --- a/osfmk/man/host_basic_info.html +++ b/osfmk/man/host_basic_info.html @@ -1 +1 @@ -

host_basic_info


Structure - Used to present basic information about a host.

SYNOPSIS

struct host_basic_info
{
       integer_t            max_cpus;
       integer_t          avail_cpus;
       vm_size_t         memory_size;
       cpu_type_t           cpu_type;
       cpu_subtype_t     cpu_subtype;
};

typedef struct host_basic_info* host_basic_info_t;

FIELDS

max_cpus
Maximum possible CPUs for which kernel is configured

avail_cpus
Number of CPUs now available

memory_size
Size of memory, in bytes

cpu_type
CPU type

cpu_subtype
CPU sub-type

DESCRIPTION

The host_basic_info structure defines the basic information available about a host.

NOTES

This structure is machine word length specific because of the memory size returned.

RELATED INFORMATION

Functions: host_info.

Data Structures: host_load_info, host_sched_info. \ No newline at end of file +

host_basic_info


Structure - Used to present basic information about a host.

SYNOPSIS

struct host_basic_info
{
       integer_t            max_cpus;
       integer_t            avail_cpus;
       vm_size_t            memory_size;
       cpu_type_t           cpu_type;
       cpu_subtype_t        cpu_subtype;
       cpu_threadtype_t     cpu_threadtype;
       integer_t            physical_cpu;
       integer_t            physical_cpu_max;
       integer_t            logical_cpu;
       integer_t            logical_cpu_max;
       uint64_t             max_mem;
};

typedef struct host_basic_info* host_basic_info_t;

FIELDS

max_cpus
Maximum number of CPUs possible

avail_cpus
Number of CPUs now available

memory_size
Size of memory in bytes, capped at 2 GB

cpu_type
CPU type

cpu_subtype
CPU sub-type

cpu_threadtype
CPU thread-type

physical_cpu
Number of physical CPUs now available

physical_cpu_max
Maximum number of physical CPUs possible

logical_cpu
Number of logical CPUs now available

logical_cpu_max
Maximum number of logical CPUs possible

max_mem
Actual size of physical memory in bytes

DESCRIPTION

The host_basic_info structure defines the basic information available about a host.

NOTES

This structure is machine word length specific because of the memory size returned.

RELATED INFORMATION

Functions: host_info.

Data Structures: host_load_info, host_sched_info. \ No newline at end of file diff --git a/osfmk/ppc/AltiAssist.s b/osfmk/ppc/AltiAssist.s index 54162aac7..6bf24f4aa 100644 --- a/osfmk/ppc/AltiAssist.s +++ b/osfmk/ppc/AltiAssist.s @@ -29,7 +29,6 @@ */ -#include #include #include #include diff --git a/osfmk/ppc/Diagnostics.c b/osfmk/ppc/Diagnostics.c index f6409117b..0e8587034 100644 --- a/osfmk/ppc/Diagnostics.c +++ b/osfmk/ppc/Diagnostics.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -42,11 +42,18 @@ #include #include #include +#include #include +#include +#include +#include +#include +#include #include #include #include #include +#include #include #include #include @@ -56,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -78,10 +84,15 @@ int diagCall(struct savearea *save) { natural_t tbu, tbu2, tbl; struct per_proc_info *per_proc; /* Area for my per_proc address */ int cpu, ret, subc; - unsigned int tstrt, tend, temp, temp2, oldwar; + unsigned int tstrt, tend, temp, temp2, *baddr, oldwar; addr64_t src, snk; uint64_t scom, hid1, hid4, srrwrk, stat; scomcomm sarea; + ipc_port_t port; + ipc_entry_t ientry; + processor_t prssr; + vm_address_t addrs; + if(!(dgWork.dgFlags & enaDiagSCs)) return 0; /* If not enabled, cause an exception */ @@ -121,7 +132,7 @@ int diagCall(struct savearea *save) { */ case dgLRA: - save->save_r3 = pmap_find_phys(current_act()->map->pmap, save->save_r4); /* Get read address */ + save->save_r3 = pmap_find_phys(current_thread()->map->pmap, save->save_r4); /* Get read address */ return -1; /* Return no AST checking... */ @@ -190,14 +201,14 @@ int diagCall(struct savearea *save) { cpu = save->save_r4; /* Get the requested CPU number */ - if(cpu >= NCPUS) { /* Check for bogus cpu */ + if(cpu >= MAX_CPUS) { /* Check for bogus cpu */ save->save_r3 = KERN_FAILURE; /* Set failure */ return 1; } - if(!machine_slot[cpu].running) return KERN_FAILURE; /* It is not running */ + per_proc = PerProcTable[cpu].ppe_vaddr; /* Point to the processor */ + if(!per_proc->running) return KERN_FAILURE; /* It is not running */ - per_proc = &per_proc_info[cpu]; /* Point to the processor */ (void)PE_cpu_start(per_proc->cpu_id, per_proc->start_paddr, (vm_offset_t)per_proc); @@ -219,8 +230,10 @@ int diagCall(struct savearea *save) { */ case dgtest: - if(save->save_r4) perfTrapHook = testPerfTrap; - else perfTrapHook = 0; + kprintf("Trying to hang\n"); + baddr = (unsigned int)&baddr | 1; /* Make an odd address */ + __asm__ volatile("lwarx r2,0,%0" : : "r" (baddr)); + kprintf("Didn't hang\n"); return 1; /* Return and check for ASTs... */ @@ -233,7 +246,7 @@ int diagCall(struct savearea *save) { */ case dgBMphys: - pmap_map_block(current_act()->map->pmap, (addr64_t)save->save_r4, /* Map in the block */ + pmap_map_block(current_thread()->map->pmap, (addr64_t)save->save_r4, /* Map in the block */ save->save_r5, save->save_r6, save->save_r7, save->save_r8, 0); return 1; /* Return and check for ASTs... */ @@ -246,7 +259,7 @@ int diagCall(struct savearea *save) { */ case dgUnMap: - (void)mapping_remove(current_act()->map->pmap, save->save_r4); /* Remove mapping */ + (void)mapping_remove(current_thread()->map->pmap, save->save_r4); /* Remove mapping */ return 1; /* Return and check for ASTs... */ @@ -272,7 +285,7 @@ int diagCall(struct savearea *save) { case dgBootScreen: ml_set_interrupts_enabled(1); - (void)copyout((char *)&vinfo, CAST_DOWN(char *, save->save_r4), sizeof(struct vc_info)); /* Copy out the video info */ + (void)copyout((char *)&vinfo, save->save_r4, sizeof(struct vc_info)); /* Copy out the video info */ ml_set_interrupts_enabled(0); return 1; /* Return and check for ASTs... */ @@ -282,7 +295,7 @@ int diagCall(struct savearea *save) { case dgCPNull: ml_set_interrupts_enabled(1); - (void)copyout((char *)&vinfo, CAST_DOWN(char *, save->save_r4), 0); /* Copy out nothing */ + (void)copyout((char *)&vinfo, save->save_r4, 0); /* Copy out nothing */ ml_set_interrupts_enabled(0); return 1; /* Return and check for ASTs... */ @@ -290,7 +303,7 @@ int diagCall(struct savearea *save) { * Test machine check handler - only on 64-bit machines */ case dgmck: - if(!(per_proc_info[0].pf.Available & pf64Bit)) return 0; /* Leave if not correct machine */ + if(!(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)) return 0; /* Leave if not correct machine */ fwEmMck(save->save_r4, save->save_r5, save->save_r6, save->save_r7, save->save_r8, save->save_r9); /* Start injecting */ @@ -300,13 +313,16 @@ int diagCall(struct savearea *save) { * Set 64-bit on or off - only on 64-bit machines */ case dg64: - if(!(per_proc_info[0].pf.Available & pf64Bit)) return 0; /* Leave if not correct machine */ + if(!(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)) return 0; /* Leave if not correct machine */ srrwrk = save->save_srr1 >> 63; /* Save the old 64-bit bit */ save->save_srr1 = (save->save_srr1 & 0x7FFFFFFFFFFFFFFFULL) | (save->save_r4 << 63); /* Set the requested mode */ save->save_r3 = srrwrk; /* Return the old value */ + task_clear_64BitAddr(current_thread()->task); + if((save->save_r4 & 1)) task_set_64BitAddr(current_thread()->task); + return -1; /* Return and don't check for ASTs... */ /* @@ -336,7 +352,7 @@ int diagCall(struct savearea *save) { */ case dgMapPage: - (void)mapping_map(current_act()->map->pmap, /* Map in the page */ + (void)mapping_make(current_thread()->map->pmap, /* Map in the page */ (addr64_t)(((save->save_r5 & 0xFFFFFFFF) << 32) | (save->save_r5 & 0xFFFFFFFF)), save->save_r6, 0, 1, VM_PROT_READ|VM_PROT_WRITE); return -1; /* Return and check for ASTs... */ @@ -347,13 +363,13 @@ int diagCall(struct savearea *save) { */ case dgScom: - ret = copyin((unsigned int)(save->save_r4), &sarea, sizeof(scomcomm)); /* Get the data */ + ret = copyin(save->save_r4, (void *)&sarea, sizeof(scomcomm)); /* Get the data */ if(ret) return 0; /* Copyin failed - return an exception */ sarea.scomstat = 0xFFFFFFFFFFFFFFFFULL; /* Clear status */ cpu = cpu_number(); /* Get us */ - if((sarea.scomcpu < NCPUS) && machine_slot[sarea.scomcpu].running) { + if((sarea.scomcpu < real_ncpus) && PerProcTable[sarea.scomcpu].ppe_vaddr->running) { if(sarea.scomcpu == cpu) { /* Is it us? */ if(sarea.scomfunc) { /* Are we writing */ sarea.scomstat = ml_scom_write(sarea.scomreg, sarea.scomdata); /* Write scom */ @@ -368,11 +384,124 @@ int diagCall(struct savearea *save) { } } - ret = copyout(&sarea, (unsigned int)(save->save_r4), sizeof(scomcomm)); /* Get the data */ + ret = copyout((void *)&sarea, save->save_r4, sizeof(scomcomm)); /* Get the data */ if(ret) return 0; /* Copyin failed - return an exception */ return -1; /* Return and check for ASTs... */ +/* + * Bind current thread to a processor. Parm is processor port. If port is 0, unbind. + */ + + case dgBind: + + if(save->save_r4 == 0) { /* Are we unbinding? */ + thread_bind(current_thread(), PROCESSOR_NULL); /* Unbind us */ + save->save_r3 = KERN_SUCCESS; /* Set success */ + return -1; /* Return and check asts */ + } + + ret = ipc_right_lookup_write(current_space(), (mach_port_name_t)save->save_r4, + &ientry); /* Look up the IPC entry */ + + if(ret != KERN_SUCCESS) { /* Couldn't find it */ + save->save_r3 = ret; /* Pass back return */ + return -1; /* Return and check asts */ + } + + port = (ipc_port_t)ientry->ie_object; /* Get the actual port */ + + if (!ip_active(port) || (ip_kotype(port) != IKOT_PROCESSOR)) { /* Active and a processor? */ + is_write_unlock(current_space()); /* Unlock the space */ + save->save_r3 = KERN_INVALID_ARGUMENT; /* This port is not a processor */ + return -1; /* Return and check asts */ + } + + prssr = (processor_t)port->ip_kobject; /* Extract the processor */ + is_write_unlock(current_space()); /* All done with the space now, unlock it */ + +/* + * The following probably isn't valid if a processor is in the processor going offline, + * but who cares, this is a diagnostic interface... + */ + + if(prssr->state == PROCESSOR_SHUTDOWN) { /* Are we trying to bind to an offline processor? */ + save->save_r3 = KERN_INVALID_ARGUMENT; /* This processor is offline */ + return -1; /* Return and check asts */ + } + + thread_bind(current_thread(), prssr); /* Bind us to the processor */ + thread_block(THREAD_CONTINUE_NULL); /* Make it so */ + + save->save_r3 = KERN_SUCCESS; /* Set success */ + return -1; /* Return and check asts */ + +/* + * Return per_proc for the named processor. Pass in a port. Returns per_proc or 0 if failure + */ + + case dgPproc: + + ret = ipc_right_lookup_write(current_space(), (mach_port_name_t)save->save_r4, + &ientry); /* Look up the IPC entry */ + + if(ret != KERN_SUCCESS) { /* Couldn't find it */ + save->save_r3 = 0; /* Pass back return */ + return -1; /* Return and check asts */ + } + + port = (ipc_port_t)ientry->ie_object; /* Get the actualy port */ + + if (!ip_active(port) || (ip_kotype(port) != IKOT_PROCESSOR)) { /* Active and a processor? */ + is_write_unlock(current_space()); /* Unlock the space */ + save->save_r3 = 0; /* This port is not a processor */ + return -1; /* Return and check asts */ + } + + prssr = (processor_t)port->ip_kobject; /* Extract the processor */ + is_write_unlock(current_space()); /* All done with the space now, unlock it */ + + save->save_r3 = (uint64_t)PerProcTable[prssr->processor_data.slot_num].ppe_vaddr; /* Pass back ther per proc */ + return -1; /* Return and check asts */ + +/* + * Allocate contiguous memory in the kernel. Pass in size, pass back vaddr or 0 for error + * Note that this must be explicitly released by the user. There is an "issue" + * if we try to allocate directly into the user: the contiguous area has a kernel wire + * on it. If we terminate, we will hang waiting for wire to be released. Ain't no + * way that will happen, so we do it in the kernel and make them release it. That way + * we will leak rather than hang. + * + */ + case dgAcntg: + + addrs = 0; /* Clear just in case */ + + ret = kmem_alloc_contig(kernel_map, &addrs, (vm_size_t)save->save_r4, + PAGE_MASK, 0); /* That which does not make us stronger, kills us... */ + if(ret != KERN_SUCCESS) addrs = 0; /* Pass 0 if error */ + + save->save_r3 = (uint64_t)addrs; /* Pass back whatever */ + return -1; /* Return and check for ASTs... */ + + +/* + * Return physical address of a page in the kernel + */ + case dgKlra: + + save->save_r3 = pmap_find_phys(kernel_pmap, save->save_r4); /* Get read address */ + return -1; /* Return no AST checking... */ + +/* + * Release kernel memory - intent is to release congiguous memory + */ + case dgKfree: + + kmem_free( kernel_map, (vm_address_t) save->save_r4, (vm_size_t)save->save_r5); + return -1; /* Return no AST checking... */ + + case dgWar: /* Set or reset workaround flags */ save->save_r3 = (uint32_t)warFlags; /* Get the old flags */ @@ -412,7 +541,7 @@ int diagCall(struct savearea *save) { save->save_r3 = oldwar; /* Pass back original */ return -1; - + default: /* Handle invalid ones */ return 0; /* Return an exception */ diff --git a/osfmk/ppc/Diagnostics.h b/osfmk/ppc/Diagnostics.h index ab76e195b..7983b9bb2 100644 --- a/osfmk/ppc/Diagnostics.h +++ b/osfmk/ppc/Diagnostics.h @@ -30,6 +30,7 @@ * Here are the Diagnostic interface interfaces * Lovingly crafted by Bill Angell using traditional methods */ +#ifdef KERNEL_PRIVATE #ifndef _DIAGNOSTICS_H_ #define _DIAGNOSTICS_H_ @@ -61,7 +62,12 @@ int diagCall(struct savearea *save); #define dgPerfMon 15 #define dgMapPage 16 #define dgScom 17 -#define dgWar 18 +#define dgBind 18 +#define dgPproc 19 +#define dgAcntg 20 +#define dgKlra 21 +#define dgKfree 22 +#define dgWar 23 typedef struct diagWork { /* Diagnostic work area */ @@ -84,15 +90,6 @@ typedef struct diagWork { /* Diagnostic work area */ #define enaDiagTrapb 25 #define enaNotifyEM 0x00000080 #define enaNotifyEMb 24 -/* Suppress lock checks */ -#define disLkType 0x80000000 -#define disLktypeb 0 -#define disLkThread 0x40000000 -#define disLkThreadb 1 -#define disLkNmSimp 0x20000000 -#define disLkNmSimpb 2 -#define disLkMyLck 0x10000000 -#define disLkMyLckb 3 unsigned int dgMisc0; unsigned int dgMisc1; @@ -116,3 +113,5 @@ extern int diagTrap(struct savearea *, unsigned int); #endif /* _DIAGNOSTICS_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/Emulate.s b/osfmk/ppc/Emulate.s index d391f0211..7a4ef3ab9 100644 --- a/osfmk/ppc/Emulate.s +++ b/osfmk/ppc/Emulate.s @@ -29,10 +29,10 @@ */ -#include #include #include #include +#include #include #include @@ -218,9 +218,11 @@ LEXT(AlignAssist) b EXT(AlignAssist64) ; Jump to the 64-bit code... aan64: lwz r20,savedsisr(r13) ; Get the DSISR + li r0,0 ; Assume we emulate mfsprg r31,0 ; Get the per_proc mtcrf 0x10,r20 ; Put instruction ID in CR for later lwz r21,spcFlags(r31) ; Grab the special flags + stw r0,savemisc3(r13) ; Assume that we emulate ok mtcrf 0x08,r20 ; Put instruction ID in CR for later rlwinm. r0,r21,0,runningVMbit,runningVMbit ; Are we running a VM? mtcrf 0x04,r20 ; Put instruction ID in CR for later @@ -332,15 +334,11 @@ aaComExGo: b EXT(EmulExit) ; We are done, no tracing on... ; ; This is not a floating point operation ; -; The emulation routines for these are positioned every 64 bytes (16 instructions) -; in a 1024-byte aligned table. It is indexed by taking the low order 4 bits of +; The table of these emulation routines is indexed by taking the low order 4 bits of ; the instruction code in the DSISR and subtracting 7. If this comes up negative, ; the instruction is not to be emulated. Then we add bit 0 of the code * 4. This ; gives us a fairly compact and almost unique index. Both lwm and stmw map to 0 so -; that one needs to be further reduced, and we end up with holes at index 6, 8, and 10. -; -; If the emulation routine takes more than 16 instructions, it must branch elsewhere -; to finish up. +; that one needs to be further reduced, and we end up with holes at a few indexes. ; .align 5 @@ -361,9 +359,7 @@ aaNotFloat: ; ; This is the table of non-floating point emulation routines. -; It is indexed by low 4 bits of DSISR op type - 7 + bit 0 of -; op type * 4 -; +; It is indexed by the code immediately above. .align 5 @@ -955,9 +951,17 @@ aaSthbrx: .align 5 -aaDcbz: - rlwinm r23,r23,0,0,26 ; Round back to a 32-byte boundary - +aaDcbz: + lwz r0,savesrr0+4(r13) ; get instruction address + li r4,_COMM_PAGE_BASE_ADDRESS + rlwinm r23,r23,0,0,26 ; Round EA back to a 32-byte boundary + sub r4,r0,r4 ; compute instruction offset from base of commpage + cmplwi r4,_COMM_PAGE_AREA_USED ; did fault occur in commpage? + bge+ aaDcbz1 ; skip if not in commpage + lwz r4,savecr(r13) ; if we take a dcbz in the commpage... + rlwinm r4,r4,0,0,27 ; ...clear users cr7 as a flag for commpage code + stw r4,savecr(r13) +aaDcbz1: crset cr0_eq ; Set this to see if we failed li r0,0 ; Clear this out mtmsr r22 ; Flip DR, RI, and maybe PR on @@ -994,6 +998,8 @@ aaDcbzXit: mr r4,r0 ; Save the DAR if we failed the access ; aaPassAlong: + li r0,1 ; Indicate that we failed to emulate + stw r0,savemisc3(r13) ; Assume that we emulate ok b EXT(EmulExit) diff --git a/osfmk/ppc/Emulate64.s b/osfmk/ppc/Emulate64.s index 72d95bbd2..924caaf88 100644 --- a/osfmk/ppc/Emulate64.s +++ b/osfmk/ppc/Emulate64.s @@ -26,7 +26,6 @@ */ #include -#include #include #include #include @@ -188,9 +187,7 @@ a64NotEmulated: // This routine supports all legal permutations of alignment interrupts occuring in user or // supervisor mode, 32 or 64-bit addressing, and translation on or off. We do not emulate // instructions that go past the end of an address space, such as "LHZ -1(0)"; we just pass -// along the alignment exception rather than wrap around to byte 0. (Treatment of address -// space wrap is a moot point in Mac OS X, since we do not map either the last page or -// page 0.) +// along the alignment exception rather than wrap around to byte 0. // // First, check for a few special cases such as virtual machines, etc. @@ -200,11 +197,13 @@ LEXT(AlignAssist64) crset kAlignment // mark as alignment interrupt a64AlignAssistJoin: // join here from program interrupt handler + li r0,0 // Get a 0 mfsprg r31,0 // get the per_proc data ptr mcrf cr3,cr6 // save feature flags here... lwz r21,spcFlags(r31) // grab the special flags ld r29,savesrr1(r13) // get the MSR etc at the fault ld r28,savesrr0(r13) // get the EA of faulting instruction + stw r0,savemisc3(r13) // Assume we will handle this ok mfmsr r26 // save MSR at entry rlwinm. r0,r21,0,runningVMbit,runningVMbit // Are we running a VM? lwz r19,dgFlags(0) // Get the diagnostics flags @@ -267,11 +266,12 @@ a64AlignAssistJoin: // join here from program interrupt handler // // When we "bctr" to the opcode-specific reoutine, the following are all set up: // MSR = EE and IR off, SF and FP on +// r12 = full 64-bit EA (r17 is clamped EA) // r13 = save-area pointer (physical) // r14 = ptr to saver0 in save-area (ie, to base of GPRs) // r15 = 0x00000000FFFFFFFF if 32-bit mode fault, 0xFFFFFFFFFFFFFFFF if 64 // r16 = RA * 8 (ie, reg# not reg value) -// r17 = EA +// r17 = EA, clamped to 32 bits if 32-bit mode fault (see also r12) // r18 = (RA|0) (reg value) // r19 = -1 if X-form, 0 if D-form // r20 = faulting instruction @@ -321,7 +321,7 @@ a64GotInstruction: // here from program interrupt with instruction in r20 sradi r15,r29,32 // propogate SF bit from SRR1 (MSR_SF, which is bit 0) andc r18,r18,r24 // r18 <- (RA|0) mtcrf 0x02,r21 // move opcode bits 24-27 to CR6 (kUpdate is bit 25) - add r17,r18,r12 // r17 <- EA, which might need to be clamped to 32 bits + add r12,r18,r12 // r12 <- 64-bit EA mtctr r30 // set up branch address oris r15,r15,0xFFFF // start to fill low word of r15 with 1s @@ -329,7 +329,7 @@ a64GotInstruction: // here from program interrupt with instruction in r20 lis r22,ha16(EXT(aaFPopTable)) // start to compute address of floating pt table ori r15,r15,0xFFFF // now bits 32-63 of r15 are 1s addi r22,r22,lo16(EXT(aaFPopTable)) - and r17,r17,r15 // clamp EA to 32 bits if necessary + and r17,r12,r15 // clamp EA to 32 bits if fault occured in 32-bit mode rlwimi r22,r21,2,22,26 // move RT into aaFPopTable address (which is 1KB aligned) bf-- kAlignment,a64HandleProgramInt // return to Program Interrupt handler @@ -487,27 +487,30 @@ a64Stwbrx: // Load doubleword (ld[u], ldx[u]), also lwa. a64LdLwa: // these are DS form: ld=0, ldu=1, and lwa=2 - andi. r0,r20,2 // ld[u] or lwa? (test bit 30 of DS field) + mtcrf 0x01,r20 // move DS field to cr7 rlwinm r3,r20,0,30,31 // must adjust EA by subtracting DS field - sub r17,r17,r3 - and r17,r17,r15 // re-clamp to 32 bits if necessary - bne a64Lwa // handle lwa + sub r12,r12,r3 // subtract from full 64-bit EA + and r17,r12,r15 // then re-clamp to 32 bits if necessary + bt 30,a64Lwa // handle lwa + crmove kUpdate,31 // if opcode bit 31 is set, it is ldu so set update flag a64Ldx: bl a64Load8Bytes // load 8 bytes from user space into r30 stdx r30,r14,r21 // update register file b a64UpdateCheck // update RA if necessary and exit -// Store doubleword (stdx[u], std[u]) +// Store doubleword (stdx[u], std[u], stwcx) a64StdxStwcx: bf-- 30,a64PassAlong // stwcx, so pass along alignment exception b a64Stdx // was stdx -a64StdStfiwx: +a64StdStfiwx: // if DS form: 0=std, 1=stdu, 2-3=undefined bt 30,a64Stfiwx // handle stfiwx - rlwinm. r3,r20,0,30,31 // must adjust EA by subtracting DS field - sub r17,r17,r3 - and r17,r17,r15 // re-clamp to 32 bits if necessary + rlwinm r3,r20,0,30,31 // must adjust EA by subtracting DS field + mtcrf 0x01,r20 // move DS field to cr7 + sub r12,r12,r3 // subtract from full 64-bit EA + and r17,r12,r15 // then re-clamp to 32 bits if necessary + crmove kUpdate,31 // if DS==1, then it is update form a64Stdx: ldx r30,r14,r21 // get RT bl a64Store8Bytes // store RT into user space @@ -520,21 +523,21 @@ a64DcbzDcbz128: andis. r0,r20,0x0020 // bit 10 set? li r3,0 // get a 0 to store li r0,4 // assume 32-bit version, store 8 bytes 4x - li r4,_COMM_PAGE_BASE_ADDRESS rldicr r17,r17,0,63-5 // 32-byte align EA + li r4,_COMM_PAGE_BASE_ADDRESS beq a64DcbzSetup // it was the 32-byte version rldicr r17,r17,0,63-7 // zero low 7 bits of EA li r0,16 // store 8 bytes 16x a64DcbzSetup: - xor r4,r4,r28 // was dcbz in the commpage(s)? + sub r4,r28,r4 // get instruction offset from start of commpage and r4,r4,r15 // mask off high-order bits if 32-bit mode - srdi. r4,r4,12 // check SRR0 - bne a64NotCommpage // not in commpage + cmpldi r4,_COMM_PAGE_AREA_USED // did fault occur in commpage area? + bge a64NotCommpage // not in commpage rlwinm. r4,r29,0,MSR_PR_BIT,MSR_PR_BIT // did fault occur in user mode? beq-- a64NotCommpage // do not zero cr7 if kernel got alignment exception lwz r4,savecr(r13) // if we take a dcbz{128} in the commpage... rlwinm r4,r4,0,0,27 // ...clear user's cr7... - stw r4,savecr(r13) // ...as a flag for _COMM_PAGE_BIGCOPY + stw r4,savecr(r13) // ...as a flag for commpage code a64NotCommpage: mtctr r0 cmpw r0,r0 // turn cr0 beq on so we can check for DSIs @@ -836,12 +839,14 @@ a64ExitEm: b a64Exit // Join standard exit routine... a64PassAlong: // unhandled exception, just pass it along + li r0,1 // Set that the alignment/program exception was not emulated crset kNotify // return T_ALIGNMENT or T_PROGRAM + stw r0,savemisc3(r13) // Set that emulation was not done crclr kTrace // not a trace interrupt b a64Exit1 a64UpdateCheck: // successfully emulated, may be update form bf kUpdate,a64Exit // update? - stdx r17,r14,r16 // yes, store EA into RA + stdx r12,r14,r16 // yes, store 64-bit EA into RA a64Exit: // instruction successfully emulated addi r28,r28,4 // bump SRR0 past the emulated instruction li r30,T_IN_VAIN // eat the interrupt since we emulated it diff --git a/osfmk/ppc/Firmware.s b/osfmk/ppc/Firmware.s index d07ed62e8..960529bf8 100644 --- a/osfmk/ppc/Firmware.s +++ b/osfmk/ppc/Firmware.s @@ -36,7 +36,6 @@ */ -#include #include #include #include @@ -2179,7 +2178,7 @@ LEXT(stSpecrs) mfmsr r0 ; Save the MSR andc r0,r0,r2 ; Turn off VEC and FP - andc r4,r0,r4 ; And EE + andc r4,r0,r4 ; And EE mtmsr r4 isync @@ -2234,7 +2233,7 @@ LEXT(stSpecrs) mfsdr1 r4 stw r4,88(r3) - + la r4,92(r3) li r5,0 @@ -2244,7 +2243,7 @@ stSnsr: mfsrin r6,r5 mr. r5,r5 addi r4,r4,4 bne+ stSnsr - + cmplwi r12,PROCESSOR_VERSION_750 mfspr r4,hid0 stw r4,(39*4)(r3) diff --git a/osfmk/ppc/FirmwareC.c b/osfmk/ppc/FirmwareC.c index 8dc173d97..654671ce1 100644 --- a/osfmk/ppc/FirmwareC.c +++ b/osfmk/ppc/FirmwareC.c @@ -24,7 +24,6 @@ * */ -#include #include #include #include diff --git a/osfmk/ppc/Makefile b/osfmk/ppc/Makefile index fbef39c9c..b978cc676 100644 --- a/osfmk/ppc/Makefile +++ b/osfmk/ppc/Makefile @@ -10,16 +10,17 @@ include $(MakeInc_def) EXPORT_ONLY_FILES = \ asm.h \ - cpu_capabilities.h \ cpu_number.h \ - lock.h \ - hw_lock_types.h \ + cpu_capabilities.h \ + Diagnostics.h \ io_map_entries.h \ + lock.h \ + locks.h \ proc_reg.h \ machine_routines.h \ - Diagnostics.h \ + mappings.h \ savearea.h \ - mappings.h + simple_lock.h INSTALL_MD_DIR = ppc diff --git a/osfmk/ppc/PPCcalls.c b/osfmk/ppc/PPCcalls.c index 13b9b688a..2131832a1 100644 --- a/osfmk/ppc/PPCcalls.c +++ b/osfmk/ppc/PPCcalls.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include diff --git a/osfmk/ppc/PseudoKernel.c b/osfmk/ppc/PseudoKernel.c index de30417ac..2ef1a5986 100644 --- a/osfmk/ppc/PseudoKernel.c +++ b/osfmk/ppc/PseudoKernel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +32,9 @@ #include #include + +#include +#include #include #include #include @@ -39,9 +42,12 @@ #include #include #include + +#include +#include #include -void bbSetRupt(ReturnHandler *rh, thread_act_t ct); +void bbSetRupt(ReturnHandler *rh, thread_t ct); /* ** Function: NotifyInterruption @@ -59,8 +65,7 @@ kern_return_t syscall_notify_interrupt ( void ) { UInt32 interruptState; task_t task; - thread_act_t act, fact; - thread_t thread; + thread_t act, fact; bbRupt *bbr; BTTD_t *bttd; int i; @@ -69,52 +74,58 @@ kern_return_t syscall_notify_interrupt ( void ) { task_lock(task); /* Lock our task */ - fact = (thread_act_t)task->threads.next; /* Get the first activation on task */ + fact = (thread_t)task->threads.next; /* Get the first activation on task */ act = 0; /* Pretend we didn't find it yet */ for(i = 0; i < task->thread_count; i++) { /* Scan the whole list */ - if(fact->mact.bbDescAddr) { /* Is this a Blue thread? */ - bttd = (BTTD_t *)(fact->mact.bbDescAddr & -PAGE_SIZE); + if(fact->machine.bbDescAddr) { /* Is this a Blue thread? */ + bttd = (BTTD_t *)(fact->machine.bbDescAddr & -PAGE_SIZE); if(bttd->InterruptVector) { /* Is this the Blue interrupt thread? */ act = fact; /* Yeah... */ break; /* Found it, Bail the loop... */ } } - fact = (thread_act_t)fact->task_threads.next; /* Go to the next one */ + fact = (thread_t)fact->task_threads.next; /* Go to the next one */ } if(!act) { /* Couldn't find a bluebox */ task_unlock(task); /* Release task lock */ return KERN_FAILURE; /* No tickie, no shirtee... */ } + + thread_reference(act); - act_lock_thread(act); /* Make sure this stays 'round */ task_unlock(task); /* Safe to release now */ + thread_mtx_lock(act); + /* if the calling thread is the BlueBox thread that handles interrupts * we know that we are in the PsuedoKernel and we can short circuit * setting up the asynchronous task by setting a pending interrupt. */ - if ( (unsigned int)act == (unsigned int)current_act() ) { + if ( (unsigned int)act == (unsigned int)current_thread() ) { bttd->InterruptControlWord = bttd->InterruptControlWord | ((bttd->postIntMask >> kCR2ToBackupShift) & kBackupCR2Mask); - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); return KERN_SUCCESS; } - if(act->mact.emPendRupts >= 16) { /* Have we hit the arbitrary maximum? */ - act_unlock_thread(act); /* Unlock the activation */ + if(act->machine.emPendRupts >= 16) { /* Have we hit the arbitrary maximum? */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); return KERN_RESOURCE_SHORTAGE; /* Too many pending right now */ } if(!(bbr = (bbRupt *)kalloc(sizeof(bbRupt)))) { /* Get a return handler control block */ - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); return KERN_RESOURCE_SHORTAGE; /* No storage... */ } - (void)hw_atomic_add(&act->mact.emPendRupts, 1); /* Count this 'rupt */ + (void)hw_atomic_add(&act->machine.emPendRupts, 1); /* Count this 'rupt */ bbr->rh.handler = bbSetRupt; /* Set interruption routine */ bbr->rh.next = act->handlers; /* Put our interrupt at the start of the list */ @@ -122,7 +133,8 @@ kern_return_t syscall_notify_interrupt ( void ) { act_set_apc(act); /* Set an APC AST */ - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); return KERN_SUCCESS; /* We're done... */ } @@ -132,7 +144,7 @@ kern_return_t syscall_notify_interrupt ( void ) { * we just leave after releasing our work area */ -void bbSetRupt(ReturnHandler *rh, thread_act_t act) { +void bbSetRupt(ReturnHandler *rh, thread_t act) { savearea *sv; BTTD_t *bttd; @@ -141,19 +153,19 @@ void bbSetRupt(ReturnHandler *rh, thread_act_t act) { bbr = (bbRupt *)rh; /* Make our area convenient */ - if(!(act->mact.bbDescAddr)) { /* Is BlueBox still enabled? */ - kfree((vm_offset_t)bbr, sizeof(bbRupt)); /* No, release the control block */ + if(!(act->machine.bbDescAddr)) { /* Is BlueBox still enabled? */ + kfree(bbr, sizeof(bbRupt)); /* No, release the control block */ return; } - (void)hw_atomic_sub(&act->mact.emPendRupts, 1); /* Uncount this 'rupt */ + (void)hw_atomic_sub(&act->machine.emPendRupts, 1); /* Uncount this 'rupt */ if(!(sv = find_user_regs(act))) { /* Find the user state registers */ - kfree((vm_offset_t)bbr, sizeof(bbRupt)); /* Couldn't find 'em, release the control block */ + kfree(bbr, sizeof(bbRupt)); /* Couldn't find 'em, release the control block */ return; } - bttd = (BTTD_t *)(act->mact.bbDescAddr & -PAGE_SIZE); + bttd = (BTTD_t *)(act->machine.bbDescAddr & -PAGE_SIZE); interruptState = (bttd->InterruptControlWord & kInterruptStateMask) >> kInterruptStateShift; @@ -168,14 +180,14 @@ void bbSetRupt(ReturnHandler *rh, thread_act_t act) { (kInPseudoKernel << kInterruptStateShift); bttd->exceptionInfo.srr0 = (unsigned int)sv->save_srr0; /* Save the current PC */ - sv->save_srr0 = (uint64_t)act->mact.bbInterrupt; /* Set the new PC */ + sv->save_srr0 = (uint64_t)act->machine.bbInterrupt; /* Set the new PC */ bttd->exceptionInfo.sprg1 = (unsigned int)sv->save_r1; /* Save the original R1 */ sv->save_r1 = (uint64_t)bttd->exceptionInfo.sprg0; /* Set the new R1 */ bttd->exceptionInfo.srr1 = (unsigned int)sv->save_srr1; /* Save the original MSR */ sv->save_srr1 &= ~(MASK(MSR_BE)|MASK(MSR_SE)); /* Clear SE|BE bits in MSR */ - act->mact.specFlags &= ~bbNoMachSC; /* reactivate Mach SCs */ + act->machine.specFlags &= ~bbNoMachSC; /* reactivate Mach SCs */ disable_preemption(); /* Don't move us around */ - per_proc_info[cpu_number()].spcFlags = act->mact.specFlags; /* Copy the flags */ + getPerProc()->spcFlags = act->machine.specFlags; /* Copy the flags */ enable_preemption(); /* Ok to move us around */ /* drop through to post int in backup CR2 in ICW */ @@ -190,7 +202,7 @@ void bbSetRupt(ReturnHandler *rh, thread_act_t act) { break; } - kfree((vm_offset_t)bbr, sizeof(bbRupt)); /* Release the control block */ + kfree(bbr, sizeof(bbRupt)); /* Release the control block */ return; } @@ -221,7 +233,7 @@ kern_return_t enable_bluebox( if ( host == HOST_NULL ) return KERN_INVALID_HOST; if ( ! is_suser() ) return KERN_FAILURE; /* We will only do this for the superuser */ - if ( th->top_act->mact.bbDescAddr ) return KERN_FAILURE; /* Bail if already authorized... */ + if ( th->machine.bbDescAddr ) return KERN_FAILURE; /* Bail if already authorized... */ if ( ! (unsigned int) Desc_TableStart ) return KERN_FAILURE; /* There has to be a descriptor page */ if ( ! TWI_TableStart ) return KERN_FAILURE; /* There has to be a TWI table */ @@ -231,7 +243,7 @@ kern_return_t enable_bluebox( /* Align the descriptor to a page */ Desc_TableStart = (char *)((vm_offset_t)Desc_TableStart & -PAGE_SIZE); - ret = vm_map_wire(th->top_act->map, /* Kernel wire the descriptor in the user's map */ + ret = vm_map_wire(th->map, /* Kernel wire the descriptor in the user's map */ (vm_offset_t)Desc_TableStart, (vm_offset_t)Desc_TableStart + PAGE_SIZE, VM_PROT_READ | VM_PROT_WRITE, @@ -242,11 +254,11 @@ kern_return_t enable_bluebox( } physdescpage = /* Get the physical page number of the page */ - pmap_find_phys(th->top_act->map->pmap, (addr64_t)Desc_TableStart); + pmap_find_phys(th->map->pmap, (addr64_t)Desc_TableStart); ret = kmem_alloc_pageable(kernel_map, &kerndescaddr, PAGE_SIZE); /* Find a virtual address to use */ if(ret != KERN_SUCCESS) { /* Could we get an address? */ - (void) vm_map_unwire(th->top_act->map, /* No, unwire the descriptor */ + (void) vm_map_unwire(th->map, /* No, unwire the descriptor */ (vm_offset_t)Desc_TableStart, (vm_offset_t)Desc_TableStart + PAGE_SIZE, TRUE); @@ -259,32 +271,32 @@ kern_return_t enable_bluebox( bttd = (BTTD_t *)kerndescaddr; /* Get the address in a convienient spot */ - th->top_act->mact.bbDescAddr = (unsigned int)kerndescaddr+origdescoffset; /* Set kernel address of the table */ - th->top_act->mact.bbUserDA = (unsigned int)Desc_TableStart; /* Set user address of the table */ - th->top_act->mact.bbTableStart = (unsigned int)TWI_TableStart; /* Set address of the trap table */ - th->top_act->mact.bbTaskID = (unsigned int)taskID; /* Assign opaque task ID */ - th->top_act->mact.bbTaskEnv = 0; /* Clean task environment data */ - th->top_act->mact.emPendRupts = 0; /* Clean pending 'rupt count */ - th->top_act->mact.bbTrap = bttd->TrapVector; /* Remember trap vector */ - th->top_act->mact.bbSysCall = bttd->SysCallVector; /* Remember syscall vector */ - th->top_act->mact.bbInterrupt = bttd->InterruptVector; /* Remember interrupt vector */ - th->top_act->mact.bbPending = bttd->PendingIntVector; /* Remember pending vector */ - th->top_act->mact.specFlags &= ~(bbNoMachSC | bbPreemptive); /* Make sure mach SCs are enabled and we are not marked preemptive */ - th->top_act->mact.specFlags |= bbThread; /* Set that we are Classic thread */ + th->machine.bbDescAddr = (unsigned int)kerndescaddr+origdescoffset; /* Set kernel address of the table */ + th->machine.bbUserDA = (unsigned int)Desc_TableStart; /* Set user address of the table */ + th->machine.bbTableStart = (unsigned int)TWI_TableStart; /* Set address of the trap table */ + th->machine.bbTaskID = (unsigned int)taskID; /* Assign opaque task ID */ + th->machine.bbTaskEnv = 0; /* Clean task environment data */ + th->machine.emPendRupts = 0; /* Clean pending 'rupt count */ + th->machine.bbTrap = bttd->TrapVector; /* Remember trap vector */ + th->machine.bbSysCall = bttd->SysCallVector; /* Remember syscall vector */ + th->machine.bbInterrupt = bttd->InterruptVector; /* Remember interrupt vector */ + th->machine.bbPending = bttd->PendingIntVector; /* Remember pending vector */ + th->machine.specFlags &= ~(bbNoMachSC | bbPreemptive); /* Make sure mach SCs are enabled and we are not marked preemptive */ + th->machine.specFlags |= bbThread; /* Set that we are Classic thread */ if(!(bttd->InterruptVector)) { /* See if this is a preemptive (MP) BlueBox thread */ - th->top_act->mact.specFlags |= bbPreemptive; /* Yes, remember it */ + th->machine.specFlags |= bbPreemptive; /* Yes, remember it */ } disable_preemption(); /* Don't move us around */ - per_proc_info[cpu_number()].spcFlags = th->top_act->mact.specFlags; /* Copy the flags */ + getPerProc()->spcFlags = th->machine.specFlags; /* Copy the flags */ enable_preemption(); /* Ok to move us around */ { /* mark the proc to indicate that this is a TBE proc */ extern void tbeproc(void *proc); - tbeproc(th->top_act->task->bsd_info); + tbeproc(th->task->bsd_info); } return KERN_SUCCESS; @@ -292,37 +304,37 @@ kern_return_t enable_bluebox( kern_return_t disable_bluebox( host_t host ) { /* User call to terminate bluebox */ - thread_act_t act; + thread_t act; - act = current_act(); /* Get our thread */ + act = current_thread(); /* Get our thread */ if (host == HOST_NULL) return KERN_INVALID_HOST; if(!is_suser()) return KERN_FAILURE; /* We will only do this for the superuser */ - if(!act->mact.bbDescAddr) return KERN_FAILURE; /* Bail if not authorized... */ + if(!act->machine.bbDescAddr) return KERN_FAILURE; /* Bail if not authorized... */ disable_bluebox_internal(act); /* Clean it all up */ return KERN_SUCCESS; /* Leave */ } -void disable_bluebox_internal(thread_act_t act) { /* Terminate bluebox */ +void disable_bluebox_internal(thread_t act) { /* Terminate bluebox */ (void) vm_map_unwire(act->map, /* Unwire the descriptor in user's address space */ - (vm_offset_t)act->mact.bbUserDA, - (vm_offset_t)act->mact.bbUserDA + PAGE_SIZE, + (vm_offset_t)act->machine.bbUserDA, + (vm_offset_t)act->machine.bbUserDA + PAGE_SIZE, FALSE); - kmem_free(kernel_map, (vm_offset_t)act->mact.bbDescAddr & -PAGE_SIZE, PAGE_SIZE); /* Release the page */ + kmem_free(kernel_map, (vm_offset_t)act->machine.bbDescAddr & -PAGE_SIZE, PAGE_SIZE); /* Release the page */ - act->mact.bbDescAddr = 0; /* Clear kernel pointer to it */ - act->mact.bbUserDA = 0; /* Clear user pointer to it */ - act->mact.bbTableStart = 0; /* Clear user pointer to TWI table */ - act->mact.bbTaskID = 0; /* Clear opaque task ID */ - act->mact.bbTaskEnv = 0; /* Clean task environment data */ - act->mact.emPendRupts = 0; /* Clean pending 'rupt count */ - act->mact.specFlags &= ~(bbNoMachSC | bbPreemptive | bbThread); /* Clean up Blue Box enables */ + act->machine.bbDescAddr = 0; /* Clear kernel pointer to it */ + act->machine.bbUserDA = 0; /* Clear user pointer to it */ + act->machine.bbTableStart = 0; /* Clear user pointer to TWI table */ + act->machine.bbTaskID = 0; /* Clear opaque task ID */ + act->machine.bbTaskEnv = 0; /* Clean task environment data */ + act->machine.emPendRupts = 0; /* Clean pending 'rupt count */ + act->machine.specFlags &= ~(bbNoMachSC | bbPreemptive | bbThread); /* Clean up Blue Box enables */ disable_preemption(); /* Don't move us around */ - per_proc_info[cpu_number()].spcFlags = act->mact.specFlags; /* Copy the flags */ + getPerProc()->spcFlags = act->machine.specFlags; /* Copy the flags */ enable_preemption(); /* Ok to move us around */ return; } @@ -370,45 +382,46 @@ int bb_settaskenv( struct savearea *save ) { int i; task_t task; - thread_act_t act, fact; + thread_t act, fact; task = current_task(); /* Figure out who our task is */ task_lock(task); /* Lock our task */ - fact = (thread_act_t)task->threads.next; /* Get the first activation on task */ + fact = (thread_t)task->threads.next; /* Get the first activation on task */ act = 0; /* Pretend we didn't find it yet */ for(i = 0; i < task->thread_count; i++) { /* Scan the whole list */ - if(fact->mact.bbDescAddr) { /* Is this a Blue thread? */ - if ( fact->mact.bbTaskID == save->save_r3 ) { /* Is this the task we are looking for? */ + if(fact->machine.bbDescAddr) { /* Is this a Blue thread? */ + if ( fact->machine.bbTaskID == save->save_r3 ) { /* Is this the task we are looking for? */ act = fact; /* Yeah... */ break; /* Found it, Bail the loop... */ } } - fact = (thread_act_t)fact->task_threads.next; /* Go to the next one */ + fact = (thread_t)fact->task_threads.next; /* Go to the next one */ } if ( !act || !act->active) { task_unlock(task); /* Release task lock */ - goto failure; + save->save_r3 = -1; /* we failed to find the taskID */ + return 1; } - act_lock_thread(act); /* Make sure this stays 'round */ + thread_reference(act); + task_unlock(task); /* Safe to release now */ - act->mact.bbTaskEnv = save->save_r4; - if(act == current_act()) { /* Are we setting our own? */ + thread_mtx_lock(act); /* Make sure this stays 'round */ + + act->machine.bbTaskEnv = save->save_r4; + if(act == current_thread()) { /* Are we setting our own? */ disable_preemption(); /* Don't move us around */ - per_proc_info[cpu_number()].ppbbTaskEnv = act->mact.bbTaskEnv; /* Remember the environment */ + getPerProc()->ppbbTaskEnv = act->machine.bbTaskEnv; /* Remember the environment */ enable_preemption(); /* Ok to move us around */ } - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); save->save_r3 = 0; return 1; - -failure: - save->save_r3 = -1; /* we failed to find the taskID */ - return 1; } diff --git a/osfmk/ppc/_setjmp.s b/osfmk/ppc/_setjmp.s index c8a40b93d..cfe3d05cf 100644 --- a/osfmk/ppc/_setjmp.s +++ b/osfmk/ppc/_setjmp.s @@ -81,8 +81,8 @@ ENTRY(_setjmp,TAG_NO_FRAME_USED) stw r0, 88(ARG0) /* Fixed point exception register */ #if FLOATING_POINT_SUPPORT /* TODO NMGS probably not needed for kern */ - mffs r0 - stw r0, 92(ARG0) /* Floating point status register */ + mffs f0 /* get FPSCR in low 32 bits of f0 */ + stfiwx f0, 92(ARG0) /* Floating point status register */ stfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ stfd f15, 104(ARG0) @@ -153,8 +153,8 @@ ENTRY(_longjmp, TAG_NO_FRAME_USED) /* TODO NMGS - need correct tag */ mtxer r0 #ifdef FLOATING_POINT_SUPPORT - lwz r0, 92(ARG0) /* Floating point status register */ - mtfs r0 + lfd f0, 92-4(ARG0) /* get Floating point status register in low 32 bits of f0 */ + mtfsf 0xFF,f0 /* restore FPSCR */ lfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ lfd f15, 104(ARG0) diff --git a/osfmk/ppc/aligned_data.s b/osfmk/ppc/aligned_data.s index 217edea7b..7145a2493 100644 --- a/osfmk/ppc/aligned_data.s +++ b/osfmk/ppc/aligned_data.s @@ -43,27 +43,25 @@ #include -#include #include #include #include #include #include -; -; NOTE: We need this only if PREEMPTSTACK is set to non-zero in hw_lock. -; Make sure they are set to the same thing -; -#define PREEMPTSTACK 0 - .data /* 4096-byte aligned areas */ - .globl EXT(per_proc_info) + .globl EXT(PerProcTable) + .align 12 +EXT(PerProcTable): ; Per processor table + .space (ppeSize*MAX_CPUS),0 ; (filled with 0s) + + .globl EXT(BootProcInfo) .align 12 -EXT(per_proc_info): ; Per processor data area - .space (ppSize*NCPUS),0 ; (filled with 0s) +EXT(BootProcInfo): ; Per processor data area + .space ppSize,0 ; (filled with 0s) /* 512-byte aligned areas */ @@ -85,19 +83,6 @@ EXT(GratefulDebWork): ; Enough for 2 rows of 8 chars of 16-pixel wide 32- debstash: .set .,.+256 -#if PREEMPTSTACK - -; -; NOTE: We need this only if PREEMPTSTACK is set to non-zero in hw_lock. -; - - .globl EXT(DBGpreempt) ; preemption debug stack - .align 8 -EXT(DBGpreempt): - .set .,.+(NCPUS*PREEMPTSTACK*16) -#endif - - /* 128-byte aligned areas */ .globl EXT(mapCtl) @@ -116,18 +101,6 @@ fwdisplock: EXT(free_mappings): .long 0 - .globl EXT(syncClkSpot) - .align 7 -EXT(syncClkSpot): - .long 0 - .long 0 - .long 0 - .long 0 - .long 0 - .long 0 - .long 0 - .long 0 - .globl EXT(NMIss) .align 7 EXT(NMIss): @@ -196,39 +169,30 @@ EXT(dbspecrs): .set .,.+(336*4) /* - * Interrupt and debug stacks go here + * Boot processor Interrupt and debug stacks go here. */ - + + .section __HIB, __data + .align PPC_PGSHIFT - .globl EXT(FixedStackStart) -EXT(FixedStackStart): .globl EXT(intstack) EXT(intstack): - .set .,.+INTSTACK_SIZE*NCPUS - + + .set .,.+INTSTACK_SIZE + + + /* back to the regular __DATA section. */ + + .section __DATA, __data + .align PPC_PGSHIFT + /* Debugger stack - used by the debugger if present */ -/* NOTE!!! Keep the debugger stack right after the interrupt stack */ .globl EXT(debstack) EXT(debstack): - .set ., .+KERNEL_STACK_SIZE*NCPUS - - .globl EXT(FixedStackEnd) -EXT(FixedStackEnd): - - .align ALIGN - .globl EXT(intstack_top_ss) -EXT(intstack_top_ss): - .long EXT(intstack)+INTSTACK_SIZE-FM_SIZE /* intstack_top_ss points to the top of interrupt stack */ - - .align ALIGN - .globl EXT(debstack_top_ss) -EXT(debstack_top_ss): + .set ., .+KERNEL_STACK_SIZE - .long EXT(debstack)+KERNEL_STACK_SIZE-FM_SIZE /* debstack_top_ss points to the top of debug stack */ + .section __DATA, __data - .globl EXT(debstackptr) -EXT(debstackptr): - .long EXT(debstack)+KERNEL_STACK_SIZE-FM_SIZE diff --git a/osfmk/ppc/asm.h b/osfmk/ppc/asm.h index ea1cd5723..842167e3e 100644 --- a/osfmk/ppc/asm.h +++ b/osfmk/ppc/asm.h @@ -456,13 +456,6 @@ #define busFIR 0x0A00 #define busFIRrst 0x0A10 -; GUS Mode Register -#define GUSModeReg 0x0430 -#define GUSMdmapen 0x00008000 -#define GUSMstgtdis 0x00000080 -#define GUSMstgttim 0x00000038 -#define GUSMstgttoff 0x00000004 - ; PowerTune #define PowerTuneControlReg 0x0AA001 #define PowerTuneStatusReg 0x408001 @@ -662,6 +655,13 @@ #endif /* ASSEMBLER */ +/* GUS Mode Register */ +#define GUSModeReg 0x0430 +#define GUSMdmapen 0x00008000 +#define GUSMstgtdis 0x00000080 +#define GUSMstgttim 0x00000038 +#define GUSMstgttoff 0x00000004 + /* Tags are placed before Immediately Following Code (IFC) for the debugger * to be able to deduce where to find various registers when backtracing * diff --git a/osfmk/ppc/ast.h b/osfmk/ppc/ast.h index f8d72fe8d..8b0e95c6d 100644 --- a/osfmk/ppc/ast.h +++ b/osfmk/ppc/ast.h @@ -30,6 +30,9 @@ #ifndef _PPC_AST_H_ #define _PPC_AST_H_ -#define AST_PPC_CHUD 0x80000000 +#define AST_PPC_CHUD_URGENT 0x80000000 +#define AST_PPC_CHUD 0x40000000 + +#define AST_PPC_CHUD_ALL (AST_PPC_CHUD_URGENT|AST_PPC_CHUD) #endif /* _PPC_AST_H_ */ diff --git a/osfmk/ppc/atomic_switch.s b/osfmk/ppc/atomic_switch.s index dc80415c4..effa74de1 100644 --- a/osfmk/ppc/atomic_switch.s +++ b/osfmk/ppc/atomic_switch.s @@ -104,7 +104,8 @@ ENTRY(atomic_switch_trap, TAG_NO_FRAME_USED) .L_CallPseudoKernel: - mfsprg r2,0 ; Get the per_proc + mfsprg r2,1 ; Get the current activation + lwz r2,ACT_PER_PROC(r2) ; Get the per_proc block rlwinm r6,r26,0,0,19 ; Start of page is bttd lwz r7,ACT_MACT_SPF(r13) ; Get special flags lwz r1,BTTD_INTERRUPT_VECTOR(r6) ; Get interrupt vector @@ -213,7 +214,8 @@ ENTRY(atomic_switch_trap, TAG_NO_FRAME_USED) stw r1,savecr(r4) ; Update CR .L_ExitFromPreemptiveThread: - mfsprg r3,0 ; Get the per_proc + mfsprg r3,1 ; Get the current activation + lwz r3,ACT_PER_PROC(r3) ; Get the per_proc block lwz r2,savesrr1+4(r4) ; Get current MSR lwz r1,BEDA_SRR1(r26) ; Get new MSR stw r7,ACT_MACT_SPF(r13) ; Update special flags diff --git a/osfmk/ppc/bcopy.s b/osfmk/ppc/bcopy.s index 30e5b7a16..12edce704 100644 --- a/osfmk/ppc/bcopy.s +++ b/osfmk/ppc/bcopy.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,58 +20,39 @@ * @APPLE_LICENSE_HEADER_END@ */ ; -; Copy bytes of data around. handles overlapped data. -; -; Change this to use Altivec later on, and maybe floating point. +; Copy bytes of data around. Handles overlapped data. ; ; #include #include #include -; Use CR5_lt to indicate non-cached +; These routines use CR5 for certain flags: +; Use CR5_lt to indicate non-cached (in bcopy and memcpy) #define noncache 20 -; Use CR5_gt to indicate that we need to turn data translation back on -#define fixxlate 21 - -; Use CR5_eq to indicate that we need to invalidate bats (if 32-bit) or turn off -; 64-bit mode (if 64-bit) before returning to our caller. We overload the -; bit to reduce the number of conditional branches at bcopy exit. -#define restorex 22 -; Use CR5_so to indicate that we need to restore real-mode cachability -; Only needed on 64-bit machines -#define flipcache 23 +; The bcopy_phys variants use a stack frame so they can call bcopy as a subroutine. +#define BCOPY_SF_SIZE 32 // total size +#define BCOPY_SF_MSR 16 // we save caller's MSR here (possibly minus VEC and FP) -; -; bcopy_nc(from, to, nbytes) -; -; bcopy_nc operates on non-cached memory so we can not use any kind -; of cache instructions. -; - .align 5 - .globl EXT(bcopy_nc) +#define kShort 32 // short operands are special cased -LEXT(bcopy_nc) - - crset noncache ; Set non-cached - b bcpswap -; -; void bcopy_physvir(from, to, nbytes) +; void bcopy_physvir_32(from, to, nbytes) +; ; Attempt to copy physically addressed memory with translation on if conditions are met. ; Otherwise do a normal bcopy_phys. This routine is used because some 32-bit processors ; are very slow doing real-mode (translation off) copies, so we set up temporary BATs ; for the passed phys addrs and do the copy with translation on. ; -; Rules are: neither source nor destination can cross a page. +; Rules are: - neither source nor destination can cross a page. +; - Interrupts must be disabled when this routine is called. +; - Translation must be on when called. ; -; Interrupts must be disabled throughout the copy when this is called. -; To do this, we build a -; 128 DBAT for both the source and sink. If both are the same, only one is -; loaded. We do not touch the IBATs, so there is no issue if either physical page +; To do the copy, we build a 128 DBAT for both the source and sink. If both are the same, only one +; is loaded. We do not touch the IBATs, so there is no issue if either physical page ; address is the same as the virtual address of the instructions we are executing. ; ; At the end, we invalidate the used DBATs. @@ -81,28 +62,31 @@ LEXT(bcopy_nc) ; passed in value. This should be ok since we can not have any bigger than 32 bit addresses ; there anyhow. ; -; Note, this one will not work in user state -; +; Note also that this routine is used only on 32-bit machines. If you're contemplating use +; on a 64-bit processor, use the physical memory window instead; please refer to copypv() +; for an example of how this is done. .align 5 - .globl EXT(bcopy_physvir) - -LEXT(bcopy_physvir) + .globl EXT(bcopy_physvir_32) - crclr flipcache ; (HACK) No cache flip needed - mfsprg r8,2 ; get processor feature flags +LEXT(bcopy_physvir_32) + mflr r0 ; get return address rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg - addic. r0,r7,-1 ; Get length - 1 + mfsprg r8,2 ; get processor feature flags + stw r0,8(r1) ; save return address rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits - add r11,r3,r0 ; Point to last byte of sink - rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg + stwu r1,-BCOPY_SF_SIZE(r1) ; push on a stack frame so we can call bcopy mtcrf 0x02,r8 ; move pf64Bit to cr6 so we can test - rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits + subi r0,r7,1 ; get length - 1 + rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg + add r11,r3,r0 ; Point to last byte of sink mr r5,r7 ; Get the length into the right register - cmplw cr1,r3,r4 ; Does source == sink? - bt++ pf64Bitb,bcopy_phys1 ; if 64-bit processor, use standard routine (no BATs) + rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits + +; This test for page overflow may not work if the length is negative. Negative lengths are invalid input +; to bcopy_physvir() on 32-bit machines, and will result in a panic. + add r12,r4,r0 ; Point to last byte of source - bltlr- ; Bail if length is 0 or way too big xor r7,r11,r3 ; See if we went to next page xor r8,r12,r4 ; See if we went to next page or r0,r7,r8 ; Combine wrap @@ -114,7 +98,6 @@ LEXT(bcopy_physvir) li r8,2 ; Set validity flags bne- bcopy_phys1 ; Overflowed page, do normal physical copy... - crset restorex ; Remember to trash BATs on the way out rlwimi r11,r9,0,15,31 ; Set sink lower DBAT value rlwimi r12,r9,0,15,31 ; Set source lower DBAT value rlwimi r7,r11,0,0,14 ; Set sink upper DBAT value @@ -130,21 +113,30 @@ LEXT(bcopy_physvir) mtdbatl 1,r12 ; Set source lower DBAT mtdbatu 1,r8 ; Set source upper DBAT - + bcpvsame: - sync ; wait for BAT to stabilize + sync ; wait for the BATs to stabilize isync - mr r6,r3 ; Set source - crclr noncache ; Set cached - crclr fixxlate ; Set translation already ok - - b copyit32 ; Go copy it... + + bl EXT(bcopy) ; BATs set up, args in r3-r5, so do the copy with DR on + + li r0,0 ; Get set to invalidate upper half of BATs + sync ; Make sure all is well + mtdbatu 0,r0 ; Clear sink upper DBAT + mtdbatu 1,r0 ; Clear source upper DBAT + sync + isync + + lwz r0,BCOPY_SF_SIZE+8(r1) ; get return address + addi r1,r1,BCOPY_SF_SIZE ; pop off stack frame + mtlr r0 + blr + -; ; void bcopy_phys(from, to, nbytes) -; Turns off data translation before the copy. Note, this one will -; not work in user state. This routine is used on 32 and 64-bit -; machines. +; +; Turns off data translation before the copy. This one will not work in user state. +; This routine is used on 32 and 64-bit machines. ; ; Note that the address parameters are long longs. We will transform these to 64-bit ; values. Note that on 32-bit architectures that this will ignore the high half of the @@ -155,209 +147,265 @@ bcpvsame: ; boundary between RAM and I/O space. Good chance of hanging the machine and this code ; will not check, so be careful. ; +; NOTE: when called, translation must be on, and we must be in 32-bit mode. +; Interrupts may or may not be disabled. .align 5 .globl EXT(bcopy_phys) LEXT(bcopy_phys) - crclr flipcache ; (HACK) No cache flip needed + mflr r0 ; get return address rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg + stw r0,8(r1) ; save mfsprg r8,2 ; get processor feature flags + stwu r1,-BCOPY_SF_SIZE(r1) ; push on a stack frame so we can call bcopy rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg mtcrf 0x02,r8 ; move pf64Bit to cr6 so we can test rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits mr r5,r7 ; Get the length into the right register - -bcopy_phys1: ; enter from bcopy_physvir with pf64Bit already in cr6 + +bcopy_phys1: ; enter from bcopy_physvir with pf64Bit in cr6 and parms in r3-r5 mfmsr r9 ; Get the MSR - crclr noncache ; Set cached + lis r6,hi16(MASK(MSR_VEC)) ; Get vector enable + ori r6,r6,lo16(MASK(MSR_FP)|MASK(MSR_DR)) ; Add in FP and DR + andc r9,r9,r6 ; unconditionally turn DR, VEC, and FP off bt++ pf64Bitb,bcopy_phys64 ; skip if 64-bit (only they take hint) ; 32-bit CPUs - - sub. r0,r3,r4 ; to==from? - rlwinm r8,r9,0,MSR_DR_BIT,MSR_DR_BIT ; was translation on? - cmpwi cr1,r8,0 ; set cr1 beq if translation was off - oris r8,r8,hi16(MASK(MSR_VEC)) ; Get vector enable - cmplwi cr7,r5,0 ; Check if we have a 0 length - beqlr- ; bail if to==from - ori r8,r8,lo16(MASK(MSR_FP)) ; Get FP - mr r6,r3 ; Set source - andc r9,r9,r8 ; Turn off translation if it is on (should be) and FP, VEC - beqlr- cr7 ; Bail if length is 0 - - crclr restorex ; Make sure we do not trash BATs on the way out - mtmsr r9 ; Set DR translation off + + mtmsr r9 ; turn DR, FP, and VEC off isync ; Wait for it - crnot fixxlate,cr1_eq ; Remember to turn on translation if it was - b copyit32 ; Go copy it... + bl EXT(bcopy) ; do the copy with translation off and caching on -; 64-bit: turn DR off and SF on, remember if we need to restore on way out. + mfmsr r9 ; Get the MSR + ori r9,r9,lo16(MASK(MSR_DR)) ; turn translation back on (but leave VEC and FP off) + mtmsr r9 ; restore msr + isync ; wait for it to happen + lwz r0,BCOPY_SF_SIZE+8(r1) ; get return address once translation is back on + mtlr r0 + addi r1,r1,BCOPY_SF_SIZE ; pop off stack frame + blr -bcopy_phys64: ; r9 = MSR + +; 64-bit: turn DR off and SF on. - srdi r2,r3,31 ; (HACK) Get a 1 if source is in I/O memory - srdi. r0,r9,63-MSR_SF_BIT ; set cr0 beq on if SF was off when we were called - rlwinm r8,r9,MSR_DR_BIT+1,31,31 ; r8 <- DR bit right justified - cmpld cr1,r3,r4 ; to==from? +bcopy_phys64: ; r9 = MSR with DP, VEC, and FP off + ori r8,r9,lo16(MASK(MSR_DR)) ; make a copy with DR back on... this is what we return to caller + srdi r2,r3,31 ; Get a 1 if source is in I/O memory li r0,1 ; Note - we use this in a couple places below - lis r6,hi16(MASK(MSR_VEC)) ; Get vector enable - cmpwi cr7,r5,0 ; length==0 ? - ori r6,r6,lo16(MASK(MSR_FP)|MASK(MSR_DR)) ; Add in FP and DR - beqlr-- cr1 ; bail if to==from - srdi r10,r4,31 ; (HACK) Get a 1 if sink is in I/O memory - rldimi r9,r0,63,MSR_SF_BIT ; set SF on - beqlr-- cr7 ; bail if length==0 - andc r9,r9,r6 ; turn DR, VEC, FP off - cmpwi cr1,r8,0 ; was DR on? - crmove restorex,cr0_eq ; if SF was off, remember to turn back off before we return + srdi r10,r4,31 ; Get a 1 if sink is in I/O memory + std r8,BCOPY_SF_MSR(r1) ; save caller's MSR so we remember whether EE was on + rldimi r9,r0,63,MSR_SF_BIT ; set SF on in MSR we will copy with + cmpldi cr0,r2,1 ; Is source in I/O memory? + cmpldi cr7,r10,1 ; Is sink in I/O memory? mtmsrd r9 ; turn 64-bit addressing on, data translation off - cmpldi cr0,r2,1 ; (HACK) Is source in I/O memory? isync ; wait for it to happen - mr r6,r3 ; Set source - cmpldi cr7,r10,1 ; (HACK) Is sink in I/O memory? - crnot fixxlate,cr1_eq ; if DR was on, remember to turn back on before we return - - cror flipcache,cr0_eq,cr7_eq ; (HACK) See if either source or sink is in I/O area - - rlwinm r10,r9,MSR_EE_BIT+1,31,31 ; (HACK GLORIOUS HACK) Isolate the EE bit - sldi r11,r0,31-MSR_EE_BIT ; (HACK GLORIOUS HACK)) Get a mask for the EE bit - sldi r0,r0,32+8 ; (HACK) Get the right bit to turn off caching - bf++ flipcache,copyit64 ; (HACK) No need to mess with caching... - -; -; HACK GLORIOUS HACK - when we force of caching, we need to also force off -; interruptions. We are out of CR bits, so we need to stash the entry EE -; somewheres. It is in the XER.... We NEED to change this!!!! -; + cror cr7_eq,cr0_eq,cr7_eq ; See if either source or sink is in I/O area + beq-- cr7,io_space_real_mode_copy ; an operand is in I/O space + + bl EXT(bcopy) ; do copy with DR off and SF on, cache enabled + +bcopy_phys64x: + mfmsr r9 ; Get the MSR we used to copy + rldicl r9,r9,0,MSR_SF_BIT+1 ; clear SF + ori r9,r9,lo16(MASK(MSR_DR)) ; turn translation back on + mtmsrd r9 ; turn 64-bit mode off, translation back on + isync ; wait for it to happen + lwz r0,BCOPY_SF_SIZE+8(r1) ; get return address once translation is back on + ld r8,BCOPY_SF_MSR(r1) ; get caller's MSR once translation is back on + mtlr r0 + mtmsrd r8,1 ; turn EE back on if necessary + addi r1,r1,BCOPY_SF_SIZE ; pop off stack frame + blr - mtxer r10 ; (HACK GLORIOUS HACK) Remember EE - andc r9,r9,r11 ; (HACK GLORIOUS HACK) Turn off EE bit - mfspr r2,hid4 ; (HACK) Get HID4 - crset noncache ; (HACK) Set non-cached - mtmsrd r9 ; (HACK GLORIOUS HACK) Force off EE - or r2,r2,r0 ; (HACK) Set bit to make real accesses cache-inhibited - sync ; (HACK) Sync up - li r0,1 - mtspr hid4,r2 ; (HACK) Make real accesses cache-inhibited - isync ; (HACK) Toss prefetches - - lis r12,0xE000 ; (HACK) Get the unlikeliest ESID possible - srdi r12,r12,1 ; (HACK) Make 0x7FFFFFFFF0000000 - slbie r12 ; (HACK) Make sure the ERAT is cleared +; We need to copy with DR off, but one of the operands is in I/O space. To avoid wedging U3, +; which cannot handle a cache burst in I/O space, we must turn caching off for the real memory access. +; This can only be done by setting bits in HID4. We cannot lose control and execute random code in +; this state, so we have to disable interrupts as well. This is an unpleasant hack. + +io_space_real_mode_copy: ; r0=1, r9=MSR we want to copy with + sldi r11,r0,31-MSR_EE_BIT ; Get a mask for the EE bit + sldi r0,r0,32+8 ; Get the right bit to turn off caching + andc r9,r9,r11 ; Turn off EE bit + mfspr r2,hid4 ; Get HID4 + mtmsrd r9,1 ; Force off EE + or r2,r2,r0 ; Set bit to make real accesses cache-inhibited + sync ; Sync up + mtspr hid4,r2 ; Make real accesses cache-inhibited + isync ; Toss prefetches + + lis r12,0xE000 ; Get the unlikeliest ESID possible + srdi r12,r12,1 ; Make 0x7FFFFFFFF0000000 + slbie r12 ; Make sure the ERAT is cleared - sync ; (HACK) - isync ; (HACK) + sync + isync - b copyit64 + bl EXT(bcopy_nc) ; copy with SF on and EE, DR, VEC, and FP off, cache inhibited + li r0,1 ; Get a 1 + sldi r0,r0,32+8 ; Get the right bit to turn off caching + mfspr r2,hid4 ; Get HID4 + andc r2,r2,r0 ; Clear bit to make real accesses cache-inhibited + sync ; Sync up + mtspr hid4,r2 ; Make real accesses not cache-inhibited + isync ; Toss prefetches + + lis r12,0xE000 ; Get the unlikeliest ESID possible + srdi r12,r12,1 ; Make 0x7FFFFFFFF0000000 + slbie r12 ; Make sure the ERAT is cleared + b bcopy_phys64x + +; +; shortcopy +; +; Special case short operands (<32 bytes), which are very common. Note that the check for +; reverse vs normal moves isn't quite correct in 64-bit mode; in rare cases we will move in +; reverse when it wasn't necessary to do so. This is OK, since performance of the two cases +; is similar. We do get the direction right when it counts (ie, when the operands overlap.) +; Also note that we use the G3/G4 "backend" code, even on G5. This is OK too, since G5 has +; plenty of load/store dispatch bandwidth in this case, the extra ops are hidden by latency, +; and using word instead of doubleword moves reduces the possibility of unaligned accesses, +; which cost about 20 cycles if they cross a 32-byte boundary on G5. Finally, because we +; might do unaligned accesses this code cannot be called from bcopy_nc(). +; r4 = destination +; r5 = length (<32) +; r6 = source +; r12 = (dest - source) + + .align 5 +shortcopy: + cmplw r12,r5 ; must move reverse if (dest-source)0) +; r6 = source +; r12 = (dest - source) +; cr5 = noncache flag + +copyit32: ; WARNING! can drop down to this label + cmplw cr1,r12,r5 ; must move reverse if (dest-source)0) +; r6 = source +; r8 = inverse of largest mask smaller than operand length +; r9 = neg(dest), used to compute alignment +; cr5 = noncache flag + +forward32bit: ; enter from 64-bit CPUs with word aligned uncached operands + rlwinm r7,r9,0,0x1F ; get bytes to 32-byte-align destination + andc. r0,r7,r8 ; limit to the maximum front end move + mtcrf 0x01,r0 ; move length to cr6 and cr7 one cr at a time... beq alline ; Already on a line... + mtcrf 0x02,r0 ; ...since moving more than one is slower on G4 and G5 + sub r5,r5,r0 ; Set the length left to move + bf 31,alhalf ; No single byte to do... lbz r7,0(r6) ; Get the byte addi r6,r6,1 ; Point to the next @@ -407,43 +455,45 @@ alquad: bf 27,alline ; No quad to do... ; Sink is line aligned here alline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move - mtcrf 3,r5 ; Make branch mask for backend partial moves - rlwinm r11,r5,0,0,26 ; Get number of bytes we are going to move + mtcrf 0x02,r5 ; move length to cr6 and cr7 one cr at a time... + mtcrf 0x01,r5 ; ...since moving more than one is slower on G4 and G5 beq- backend ; No full lines to move - - sub r5,r5,r11 ; Calculate the residual - li r10,96 ; Stride for touch ahead - -nxtline: subic. r0,r0,1 ; Account for the line now - + + mtctr r0 ; set up loop count + li r0,96 ; Stride for touch ahead + b nxtline + + .align 4 +nxtline: + lwz r2,0(r6) ; Get the first word + lwz r5,4(r6) ; Get the second word + lwz r7,8(r6) ; Get the third word + lwz r8,12(r6) ; Get the fourth word + lwz r9,16(r6) ; Get the fifth word + lwz r10,20(r6) ; Get the sixth word + lwz r11,24(r6) ; Get the seventh word + lwz r12,28(r6) ; Get the eighth word bt- noncache,skipz ; Skip if we are not cached... - dcbz br0,r4 ; Blow away the whole line because we are replacing it - dcbt r6,r10 ; Touch ahead a bit - -skipz: lwz r7,0(r6) ; Get the first word - lwz r8,4(r6) ; Get the second word - lwz r9,8(r6) ; Get the third word - stw r7,0(r4) ; Save the first word - lwz r11,12(r6) ; Get the fourth word - stw r8,4(r4) ; Save the second word - lwz r7,16(r6) ; Get the fifth word - stw r9,8(r4) ; Save the third word - lwz r8,20(r6) ; Get the sixth word - stw r11,12(r4) ; Save the fourth word - lwz r9,24(r6) ; Get the seventh word - stw r7,16(r4) ; Save the fifth word - lwz r11,28(r6) ; Get the eighth word + dcbz 0,r4 ; Blow away the whole line because we are replacing it + dcbt r6,r0 ; Touch ahead a bit +skipz: addi r6,r6,32 ; Point to the next - stw r8,20(r4) ; Save the sixth word - stw r9,24(r4) ; Save the seventh word - stw r11,28(r4) ; Save the eighth word + stw r2,0(r4) ; Save the first word + stw r5,4(r4) ; Save the second word + stw r7,8(r4) ; Save the third word + stw r8,12(r4) ; Save the fourth word + stw r9,16(r4) ; Save the fifth word + stw r10,20(r4) ; Save the sixth word + stw r11,24(r4) ; Save the seventh word + stw r12,28(r4) ; Save the eighth word addi r4,r4,32 ; Bump sink - bgt+ nxtline ; Do the next line, if any... + bdnz+ nxtline ; Do the next line, if any... ; Move backend quadword -backend: bf 27,noquad ; No quad to do... +backend: ; Join here from "shortcopy" for forward moves <32 bytes + bf 27,noquad ; No quad to do... lwz r7,0(r6) ; Get the first word lwz r8,4(r6) ; Get the second word lwz r9,8(r6) ; Get the third word @@ -483,97 +533,33 @@ noword: bf 30,nohalf ; No halfword to do... ; Move backend byte -nohalf: bf 31,bcpydone ; Leave cuz we are all done... +nohalf: bflr 31 ; Leave cuz we are all done... lbz r7,0(r6) ; Get the byte stb r7,0(r4) ; Save the single - -bcpydone: - mfmsr r9 ; Get the MSR - bf++ flipcache,bcpydone0 ; (HACK) No need to mess with caching... - - li r0,1 ; (HACK) Get a 1 - mfxer r10 ; (HACK GLORIOUS HACK) Get the entry EE - sldi r0,r0,32+8 ; (HACK) Get the right bit to turn off caching - mfspr r2,hid4 ; (HACK) Get HID4 - rlwinm r10,r10,31-MSR_EE_BIT,MSR_EE_BIT,MSR_EE_BIT ; (HACK GLORIOUS HACK) Set the EE bit - andc r2,r2,r0 ; (HACK) Clear bit to make real accesses cache-inhibited - or r9,r9,r10 ; (HACK GLORIOUS HACK) Set the EE in MSR - sync ; (HACK) Sync up - mtspr hid4,r2 ; (HACK) Make real accesses not cache-inhibited - isync ; (HACK) Toss prefetches - - lis r12,0xE000 ; (HACK) Get the unlikeliest ESID possible - srdi r12,r12,1 ; (HACK) Make 0x7FFFFFFFF0000000 - slbie r12 ; (HACK) Make sure the ERAT is cleared - - mtmsr r9 ; (HACK GLORIOUS HACK) Set EE properly - -bcpydone0: - lis r0,hi16(MASK(MSR_VEC)) ; Get the vector bit - ori r0,r0,lo16(MASK(MSR_FP)) ; Get the float bit - bf++ fixxlate,bcpydone1 ; skip if we do not need to fix translation... - ori r9,r9,lo16(MASK(MSR_DR)) ; Turn data translation on - andc r9,r9,r0 ; Make sure that FP and VEC are off - mtmsr r9 ; Just do it - isync ; Hang in there - -bcpydone1: - bflr++ restorex ; done if we do not have to fix up addressing - mfsprg r8,2 ; get the feature flags again - mtcrf 0x02,r8 ; put pf64Bit where we can test it - bt++ pf64Bitb,bcpydone2 ; skip if 64-bit processor - - ; 32-bit processor, so clear out the BATs we set up for bcopy_physvir - - li r0,0 ; Get set to invalidate upper half - sync ; Make sure all is well - mtdbatu 0,r0 ; Clear sink upper DBAT - mtdbatu 1,r0 ; Clear source upper DBAT - sync - isync - blr - - ; 64-bit processor, so turn off 64-bit mode we turned on to do bcopy_phys - -bcpydone2: - mfmsr r9 ; get MSR again - andc r9,r9,r0 ; Make sure that FP and VEC are off - rldicl r9,r9,0,MSR_SF_BIT+1 ; clear SF - mtmsrd r9 - isync blr -; -; 0123456789ABCDEF0123456789ABCDEF -; 0123456789ABCDEF0123456789ABCDEF -; F -; DE -; 9ABC -; 12345678 -; 123456789ABCDEF0 -; 0 +; Reverse moves on 32-bit machines, also reverse word aligned uncached moves on 64-bit machines. +; NOTE: we never do an unaligned access if the source and destination are "relatively" +; word aligned. We depend on this in the uncached case on 64-bit processors. +; These are slower because we don't bother with dcbz. Fortunately, reverse moves are uncommon. +; r4 = destination +; r5 = length (>0) +; r6 = source +; r8 = inverse of largest mask smaller than operand length +; cr5 = noncache flag (but we don't dcbz anyway) -; -; Here is where we handle a forward overlapping move. These will be slow -; because we can not kill the cache of the destination until after we have -; loaded/saved the source area. Also, because reading memory backwards is -; slower when the cache line needs to be loaded because the critical -; doubleword is loaded first, i.e., the last, then it goes back to the first, -; and on in order. That means that when we are at the second to last DW we -; have to wait until the whole line is in cache before we can proceed. -; - -G4reverseWord: ; here from 64-bit code with word aligned uncached operands -fwdovrlap: add r4,r5,r4 ; Point past the last sink byte +reverse32bit: ; here from 64-bit code with word aligned uncached operands + add r4,r5,r4 ; Point past the last sink byte add r6,r5,r6 ; Point past the last source byte - and r0,r4,r8 ; Apply movement limit - li r12,-1 ; Make sure we touch in the actual line - mtcrf 3,r0 ; Figure out the best way to move backwards + rlwinm r7,r4,0,0x1F ; Calculate the length to align dest on cache boundary + li r12,-1 ; Make sure we touch in the actual line + andc. r0,r7,r8 ; Apply movement limit dcbt r12,r6 ; Touch in the last line of source - rlwinm. r0,r0,0,27,31 ; Calculate the length to adjust to cache boundary + mtcrf 0x01,r0 ; move length to cr6 and cr7 one cr at a time... dcbtst r12,r4 ; Touch in the last line of the sink - beq- balline ; Aready on cache line boundary + mtcrf 0x02,r0 ; ...since moving more than one is slower on G4 and G5 + beq- balline ; Aready on cache line boundary (or too short to bother) sub r5,r5,r0 ; Precaculate move length left after alignment @@ -626,15 +612,14 @@ balquad: bf 27,balline ; No quad to do... ; Sink is line aligned here balline: rlwinm. r0,r5,27,5,31 ; Get the number of full lines to move - mtcrf 3,r5 ; Make branch mask for backend partial moves + mtcrf 0x02,r5 ; move length to cr6 and cr7 one cr at a time... + mtcrf 0x01,r5 ; ...since moving more than one is slower on G4 and G5 beq- bbackend ; No full lines to move - - -; Registers in use: R0, R1, R3, R4, R5, R6 -; Registers not in use: R2, R7, R8, R9, R10, R11, R12 - Ok, we can make another free for 8 of them + mtctr r0 ; set up loop count + b bnxtline -bnxtline: subic. r0,r0,1 ; Account for the line now - + .align 4 +bnxtline: lwz r7,-32(r6) ; Get the first word lwz r5,-28(r6) ; Get the second word lwz r2,-24(r6) ; Get the third word @@ -646,10 +631,7 @@ bnxtline: subic. r0,r0,1 ; Account for the line now subi r6,r6,32 ; Point to the next stw r7,-32(r4) ; Get the first word - ble- bnotouch ; Last time, skip touch of source... - dcbt br0,r6 ; Touch in next source line - -bnotouch: stw r5,-28(r4) ; Get the second word + stw r5,-28(r4) ; Get the second word stw r2,-24(r4) ; Get the third word stw r12,-20(r4) ; Get the third word stw r11,-16(r4) ; Get the fifth word @@ -658,7 +640,7 @@ bnotouch: stw r5,-28(r4) ; Get the second word stw r8,-4(r4) ; Get the eighth word subi r4,r4,32 ; Bump sink - bgt+ bnxtline ; Do the next line, if any... + bdnz+ bnxtline ; Do the next line, if any... ; ; Note: We touched these lines in at the beginning @@ -666,7 +648,8 @@ bnotouch: stw r5,-28(r4) ; Get the second word ; Move backend quadword -bbackend: bf 27,bnoquad ; No quad to do... +bbackend: ; Join here from "shortcopy" for reverse moves of <32 bytes + bf 27,bnoquad ; No quad to do... lwz r7,-16(r6) ; Get the first word lwz r8,-12(r6) ; Get the second word lwz r9,-8(r6) ; Get the third word @@ -706,11 +689,10 @@ bnoword: bf 30,bnohalf ; No halfword to do... ; Move backend byte -bnohalf: bf 31,bcpydone ; Leave cuz we are all done... +bnohalf: bflr 31 ; Leave cuz we are all done... lbz r7,-1(r6) ; Get the byte stb r7,-1(r4) ; Save the single - - b bcpydone ; Go exit cuz we are all done... + blr // Here on 64-bit processors, which have a 128-byte cache line. This can be @@ -722,40 +704,38 @@ bnohalf: bf 31,bcpydone ; Leave cuz we are all done... // r4 = destination (32 or 64-bit ptr) // r5 = length (always 32 bits) // r6 = source (32 or 64-bit ptr) -// cr5 = noncache, fixxlate, flipcache, and restorex flags set +// r12 = (dest - source), reverse move required if (dest-source)=length, in mode-independent way + li r0,0 // get a 0 + lis r10,hi16(0x80000000)// get 0x80000000 + addze. r0,r0 // set cr0 on carry bit (beq if reverse move required) + neg r9,r4 // start to get alignment for destination + sraw r8,r10,r11 // get mask based on operand length, to limit alignment + bt-- noncache,c64uncached// skip if uncached + beq-- c64rdouble // handle cached reverse moves + // Forward, cached or doubleword aligned uncached. This is the common case. -// r4-r6 = dest, length, source (as above) -// r7 = #bytes 128-byte align dest (limited by copy length) -// cr5 = flags, as above +// NOTE: we never do an unaligned access if the source and destination are "relatively" +// doubleword aligned. We depend on this in the uncached case. +// r4 = destination +// r5 = length (>0) +// r6 = source +// r8 = inverse of largest mask smaller than operand length +// r9 = neg(dest), used to compute alignment +// cr5 = noncache flag c64double: + rlwinm r7,r9,0,0x7F // get #bytes to 128-byte align destination + andc r7,r7,r8 // limit by operand length andi. r8,r7,7 // r8 <- #bytes to doubleword align srwi r9,r7,3 // r9 <- #doublewords to 128-byte align sub r5,r5,r7 // adjust length remaining @@ -774,7 +754,7 @@ c64double1: // copy bytes until dest is doubleword aligned addi r4,r4,1 bdnz c64double1 -c64double2: // r9/cr1=doublewords, r10=128-byte chunks, cr7=blt if r5==0 +c64double2: // r9/cr1=doublewords, r10/cr7=128-byte chunks beq cr1,c64double4 // no doublewords to xfer in order to cache align mtctr r9 b c64double3 @@ -787,76 +767,57 @@ c64double3: // copy doublewords until dest is 128-byte aligned addi r4,r4,8 bdnz c64double3 -// Here to xfer 128-byte chunks, if any. Because the IBM 970 cannot issue two stores/cycle, -// we pipeline the inner loop so we can pair loads and stores. Since we only have 8 GPRs for +// Here to xfer 128-byte chunks, if any. Since we only have 8 GPRs for // data (64 bytes), we load/store each twice per 128-byte chunk. c64double4: // r10/cr7=128-byte chunks rlwinm r0,r5,29,28,31 // r0 <- count of leftover doublewords, after moving chunks cmpwi cr1,r0,0 // set cr1 on leftover doublewords beq cr7,c64double7 // no 128-byte chunks - sub r8,r6,r4 // r8 <- (source - dest) - li r9,128 // start at next cache line (we've already touched in 1st line) - cmpldi cr7,r8,128 // if (source-dest)<128, cannot use dcbz128 beacause of overlap - cror noncache,cr7_lt,noncache // turn on "noncache" flag if (source-dest)<128 - bt-- noncache,noncache3 // (HACK) Skip cache touch if noncachable - dcbt128 r9,r6,1 // start forward stream -noncache3: - mtctr r10 - ld r0,0(r6) // start pipe: load 1st half-line - ld r2,8(r6) - ld r7,16(r6) - ld r8,24(r6) - ld r9,32(r6) - ld r10,40(r6) - ld r11,48(r6) - ld r12,56(r6) - b c64InnerLoopEntryPt + ; We must check for (source-dest)<128 in a mode-independent way. If within 128 bytes, + ; turn on "noncache" because we cannot use dcbz128 even if operands are cacheable. + sub r8,r6,r4 // r8 <- (source - dest) + rldicr. r0,r8,0,63-7 // zero low 7 bits and check for 0, mode independent + cror noncache,cr0_eq,noncache // turn on "noncache" flag if (source-dest)<128 + mtctr r10 + b c64InnerLoop + .align 5 // align inner loop c64InnerLoop: // loop copying 128-byte cache lines to 128-aligned destination - std r0,64(r4) // store 2nd half of chunk n - ld r0,0(r6) // load 1st half of chunk n+1 - std r2,72(r4) + ld r0,0(r6) // start pipe: load 1st half-line ld r2,8(r6) - std r7,80(r4) ld r7,16(r6) - std r8,88(r4) ld r8,24(r6) - std r9,96(r4) ld r9,32(r6) - std r10,104(r4) ld r10,40(r6) - std r11,112(r4) ld r11,48(r6) - std r12,120(r4) ld r12,56(r6) - addi r4,r4,128 // advance to next dest chunk -c64InnerLoopEntryPt: // initial entry into loop, with 1st halfline loaded bt noncache,c64InnerLoop1 // skip if uncached or overlap dcbz128 0,r4 // avoid prefetch of next cache line c64InnerLoop1: - std r0,0(r4) // store 1st half of chunk n - ld r0,64(r6) // load 2nd half of chunk n + + std r0,0(r4) std r2,8(r4) - ld r2,72(r6) std r7,16(r4) - ld r7,80(r6) std r8,24(r4) - ld r8,88(r6) std r9,32(r4) - ld r9,96(r6) std r10,40(r4) - ld r10,104(r6) std r11,48(r4) - ld r11,112(r6) std r12,56(r4) - ld r12,120(r6) - addi r6,r6,128 // advance to next source chunk if any - bdnz c64InnerLoop // loop if more chunks - std r0,64(r4) // store 2nd half of last chunk + ld r0,64(r6) // load 2nd half of chunk + ld r2,72(r6) + ld r7,80(r6) + ld r8,88(r6) + ld r9,96(r6) + ld r10,104(r6) + ld r11,112(r6) + ld r12,120(r6) + addi r6,r6,128 + + std r0,64(r4) std r2,72(r4) std r7,80(r4) std r8,88(r4) @@ -866,6 +827,9 @@ c64InnerLoop1: std r12,120(r4) addi r4,r4,128 // advance to next dest chunk + bdnz c64InnerLoop // loop if more chunks + + c64double7: // r5 <- leftover bytes, cr1 set on doubleword count rlwinm r0,r5,29,28,31 // r0 <- count of leftover doublewords (0-15) andi. r5,r5,7 // r5/cr0 <- count of leftover bytes (0-7) @@ -885,7 +849,7 @@ c64double8: // loop copying leftover doublewords // Forward byte loop. c64byte: // r5/cr0 <- byte count (can be big if unaligned uncached) - beq bcpydone // done if no leftover bytes + beqlr // done if no leftover bytes mtctr r5 b c64byte1 @@ -897,36 +861,36 @@ c64byte1: addi r4,r4,1 bdnz c64byte1 - b bcpydone + blr // Uncached copies. We must avoid unaligned accesses, since they always take alignment // exceptions on uncached memory on 64-bit processors. This may mean we copy long operands // a byte at a time, but that is still much faster than alignment exceptions. -// r4-r6 = dest, length, source (as above) -// r2 = mask of 1s for leading 0s in length, plus 1 extra 1 -// r7 = #bytes to copy to 128-byte align dest (limited by operand length) -// cr1 = blt if reverse move required +// r4 = destination +// r5 = length (>0) +// r6 = source +// r8 = inverse of largest mask smaller than operand length +// r9 = neg(dest), used to compute alignment +// r12 = (dest-source), used to test relative alignment +// cr0 = beq if reverse move required +// cr5 = noncache flag c64uncached: - xor r0,r6,r4 // get relative alignment - rlwinm r10,r0,0,29,31 // relatively doubleword aligned? - rlwinm r11,r0,0,30,31 // relatively word aligned? - not r8,r2 // get mask to limit initial length of copy for G4word - blt cr1,c64reverseUncached - - cmpwi cr0,r10,0 // set cr0 beq if doubleword aligned + rlwinm r10,r12,0,29,31 // relatively doubleword aligned? + rlwinm r11,r12,0,30,31 // relatively word aligned? + cmpwi cr7,r10,0 // set cr7 beq if doubleword aligned cmpwi cr1,r11,0 // set cr1 beq if word aligned - beq cr0,c64double // doubleword aligned - beq cr1,G4word // word aligned, use G3/G4 code + beq-- c64reverseUncached + + beq cr7,c64double // doubleword aligned + beq cr1,forward32bit // word aligned, use G3/G4 code cmpwi r5,0 // set cr0 on byte count b c64byte // unaligned operands c64reverseUncached: - cmpwi cr0,r10,0 // set cr0 beq if doubleword aligned - cmpwi cr1,r11,0 // set cr1 beq if word aligned - beq cr0,c64rdouble // doubleword aligned so can use LD/STD - beq cr1,G4reverseWord // word aligned, use G3/G4 code + beq cr7,c64rdouble // doubleword aligned so can use LD/STD + beq cr1,reverse32bit // word aligned, use G3/G4 code add r6,r6,r5 // point to (end+1) of source and dest add r4,r4,r5 cmpwi r5,0 // set cr0 on length @@ -936,19 +900,17 @@ c64reverseUncached: // Reverse doubleword copies. This is used for all cached copies, and doubleword // aligned uncached copies. -// r4 = destination (32 or 64-bit ptr) -// r5 = length (always 32 bits) -// r6 = source (32 or 64-bit ptr) -// cr5 = noncache, fixxlate, and restorex flags set +// r4 = destination +// r5 = length (>0) +// r6 = source +// r8 = inverse of largest mask of low-order 1s smaller than operand length +// cr5 = noncache flag c64rdouble: add r6,r6,r5 // point to (end+1) of source and dest add r4,r4,r5 - rlwinm. r7,r4,0,29,31 // r7 <- #bytes to doubleword align dest - cmplw cr1,r7,r5 // operand long enough to doubleword align? - blt cr1,c64rd0 // yes - mr r7,r5 // no -c64rd0: + rlwinm r7,r4,0,29,31 // r7 <- #bytes to doubleword align dest + andc. r7,r7,r8 // limit by operand length sub r5,r5,r7 // adjust length srwi r8,r5,6 // r8 <- 64-byte chunks to xfer cmpwi cr1,r8,0 // any chunks? @@ -965,10 +927,7 @@ c64rd2: // r8/cr1 <- count of 64-byte chunks andi. r5,r5,7 // r5/cr0 <- count of leftover bytes cmpwi cr7,r0,0 // leftover doublewords? beq cr1,c64rd4 // no chunks to xfer - li r9,-128 // start at next cache line mtctr r8 - bt noncache,c64rd3 // (HACK) Do not start a stream if noncachable... - dcbt128 r9,r6,3 // start reverse stream b c64rd3 .align 5 // align inner loop @@ -1004,7 +963,7 @@ c64rd5: // loop copying leftover doublewords // Reverse byte loop. c64rbyte: // r5/cr0 <- byte count (can be big if unaligned uncached) - beq bcpydone // done if no leftover bytes + beqlr // done if no leftover bytes mtctr r5 c64rbyte1: @@ -1012,5 +971,5 @@ c64rbyte1: stbu r0,-1(r4) bdnz c64rbyte1 - b bcpydone + blr diff --git a/osfmk/ppc/bcopytest.c b/osfmk/ppc/bcopytest.c index d93871f87..dc994332f 100644 --- a/osfmk/ppc/bcopytest.c +++ b/osfmk/ppc/bcopytest.c @@ -1,11 +1,9 @@ -#include #include #include #include #include #include -#include #include #include #include diff --git a/osfmk/ppc/cache.s b/osfmk/ppc/cache.s index c20e8b97b..5318f83f6 100644 --- a/osfmk/ppc/cache.s +++ b/osfmk/ppc/cache.s @@ -273,3 +273,111 @@ cache_op_exit: beqlr-- cr5 // if using virtual addresses, no need to restore MSR b EXT(ml_restore) // restore MSR and return + +//////////////////////////////////////////////////// + + .align 5 + .globl _dcache_incoherent_io_store64 +_dcache_incoherent_io_store64: + rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg + rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits + mr r4,r5 ; Move count + + // here with r3=addr, r4=count + mfsprg r10,2 // r10 <- processor feature flags + andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size + mtcrf 0x02,r10 // move pf64Bit bit to CR6 + subi r8,r9,1 // r8 <- (linesize-1) + + bf-- pf64Bitb,cache_ios_not64 // This is not a 64-bit machine + + srdi r12,r3,31 // Slide bit 32 to bit 63 + cmpldi r12,1 // Are we in the I/O mapped area? + beqlr-- // No cache ops allowed here... + +cache_ios_not64: + mflr r12 // save return address + bl EXT(ml_set_physical) // turn on physical addressing + mtlr r12 // restore return address + + // get r3=first cache line, r4=first line not in set, r6=byte count + add r7,r3,r4 // point to 1st byte not to operate on + andc r3,r3,r8 // r3 <- 1st line to operate on + add r4,r7,r8 // round up + andc r4,r4,r8 // r4 <- 1st line not to operate on + sub. r6,r4,r3 // r6 <- byte count to operate on + beq-- cache_ios_exit // nothing to do + + sub. r6,r6,r9 // >1 line? + beq cache_ios_last_line // use dcbst on all lines but last + + // DCBST loop +cache_ios_5: + sub. r6,r6,r9 // more to go? + dcbst r6,r3 // store next line + bne cache_ios_5 // loop if more to go + +cache_ios_last_line: + sync // flush last line + isync + dcbf r6,r3 + sync + isync + add r6,r6,r3 + lwz r0,0(r6) // make sure the data reaches RAM (not just the memory controller) + isync + + // restore MSR +cache_ios_exit: + b EXT(ml_restore) // restore MSR and return + + +//////////////////////////////////////////////////// + + .align 5 + .globl _dcache_incoherent_io_flush64 +_dcache_incoherent_io_flush64: + rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg + rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits + mr r4,r5 ; Move count + + // here with r3=addr, r4=count + mfsprg r10,2 // r10 <- processor feature flags + andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size + mtcrf 0x02,r10 // move pf64Bit bit to CR6 + subi r8,r9,1 // r8 <- (linesize-1) + + bf-- pf64Bitb,cache_iof_not64 // This is not a 64-bit machine + + srdi r12,r3,31 // Slide bit 32 to bit 63 + cmpldi r12,1 // Are we in the I/O mapped area? + beqlr-- // No cache ops allowed here... + +cache_iof_not64: + mflr r12 // save return address + bl EXT(ml_set_physical) // turn on physical addressing + mtlr r12 // restore return address + + // get r3=first cache line, r4=first line not in set, r6=byte count + add r7,r3,r4 // point to 1st byte not to operate on + andc r3,r3,r8 // r3 <- 1st line to operate on + add r4,r7,r8 // round up + andc r4,r4,r8 // r4 <- 1st line not to operate on + sub. r6,r4,r3 // r6 <- byte count to operate on + beq-- cache_iof_exit // nothing to do + + // DCBF loop +cache_iof_5: + sub. r6,r6,r9 // more to go? + dcbf r6,r3 // store next line + bne cache_iof_5 // loop if more to go + +cache_iof_last_line: + sync // flush last line + isync + + // restore MSR +cache_iof_exit: + b EXT(ml_restore) // restore MSR and return + + diff --git a/osfmk/ppc/chud/chud_cpu.c b/osfmk/ppc/chud/chud_cpu.c index 086259530..faed408a4 100644 --- a/osfmk/ppc/chud/chud_cpu.c +++ b/osfmk/ppc/chud/chud_cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,16 +19,39 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#include +#include + +#include +#include #include #include #include -#include #include #include -#include +#include #include +// the macros in proc_reg.h fail with "expression must be absolute" + +#undef mtsprg +#undef mfsprg +#define mtsprg(n, reg) __asm__ volatile("mtsprg " # n ", %0" : : "r" (reg)) +#define mfsprg(reg, n) __asm__ volatile("mfsprg %0, " # n : "=r" (reg)) + +#undef mtspr +#undef mfspr +#define mtspr(spr, reg) __asm__ volatile ("mtspr %0, %1" : : "n" (spr), "r" (reg)) +#define mfspr(reg, spr) __asm__ volatile("mfspr %0, %1" : "=r" (reg) : "n" (spr)); + +#undef mtsr +#undef mfsr +#define mtsr(sr, reg) __asm__ volatile("sync" "@" "mtsr sr%0, %1 " "@" "isync" : : "i" (sr), "r" (reg)); +#define mfsr(reg, sr) __asm__ volatile("mfsr %0, sr%1" : "=r" (reg) : "i" (sr)); + +#pragma mark **** cpu count **** + __private_extern__ int chudxnu_avail_cpu_count(void) { @@ -65,25 +88,35 @@ int chudxnu_cpu_number(void) return cpu_number(); } +#pragma mark **** cpu enable/disable **** + +extern kern_return_t processor_start(processor_t processor); // osfmk/kern/processor.c +extern kern_return_t processor_exit(processor_t processor); // osfmk/kern/processor.c + __private_extern__ kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable) { - chudxnu_unbind_current_thread(); + chudxnu_unbind_thread(current_thread()); if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument return KERN_FAILURE; } - if(processor_ptr[cpu]!=PROCESSOR_NULL && processor_ptr[cpu]!=master_processor) { + if((PerProcTable[cpu].ppe_vaddr != (struct per_proc_info *)NULL) + && cpu != master_cpu) { + processor_t processor = cpu_to_processor(cpu); + if(enable) { - return processor_start(processor_ptr[cpu]); + return processor_start(processor); } else { - return processor_exit(processor_ptr[cpu]); + return processor_exit(processor); } } return KERN_FAILURE; } +#pragma mark **** nap **** + __private_extern__ kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable) { @@ -91,7 +124,7 @@ kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable) return KERN_FAILURE; } - if(processor_ptr[cpu]!=PROCESSOR_NULL) { + if(PerProcTable[cpu].ppe_vaddr != (struct per_proc_info *)NULL) { ml_enable_nap(cpu, enable); return KERN_SUCCESS; } @@ -114,37 +147,48 @@ boolean_t chudxnu_cpu_nap_enabled(int cpu) return prev; } +#pragma mark **** shadowed spr **** + __private_extern__ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) { - cpu_subtype_t cpu_subtype; + cpu_subtype_t target_cpu_subtype; uint32_t available; kern_return_t retval = KERN_FAILURE; + struct per_proc_info *per_proc; + boolean_t didBind = FALSE; - if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument + if(cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument return KERN_FAILURE; } - chudxnu_bind_current_thread(cpu); + if(cpu<0) { // cpu<0 means don't bind (current cpu) + cpu = chudxnu_cpu_number(); + didBind = FALSE; + } else { + chudxnu_bind_thread(current_thread(), cpu); + didBind = TRUE; + } - available = per_proc_info[cpu].pf.Available; - cpu_subtype = machine_slot[cpu].cpu_subtype; + per_proc = PerProcTable[cpu].ppe_vaddr; + available = per_proc->pf.Available; + target_cpu_subtype = per_proc->cpu_subtype; if(spr==chud_750_l2cr) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: if(available & pfL2) { // int enable = (val & 0x80000000) ? TRUE : FALSE; // if(enable) { -// per_proc_info[cpu].pf.l2cr = val; +// per_proc->pf.l2cr = val; // } else { -// per_proc_info[cpu].pf.l2cr = 0; +// per_proc->pf.l2cr = 0; // } - per_proc_info[cpu].pf.l2cr = val; + per_proc->pf.l2cr = val; cacheInit(); - // mtspr(l2cr, per_proc_info[cpu].pf.l2cr); // XXXXXXX why is this necessary? XXXXXXX + // mtspr(l2cr, per_proc->pf.l2cr); // XXXXXXX why is this necessary? XXXXXXX retval = KERN_SUCCESS; } else { retval = KERN_FAILURE; @@ -156,14 +200,14 @@ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) } } else if(spr==chud_7450_l3cr) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_7450: if(available & pfL3) { int enable = (val & 0x80000000) ? TRUE : FALSE; if(enable) { - per_proc_info[cpu].pf.l3cr = val; + per_proc->pf.l3cr = val; } else { - per_proc_info[cpu].pf.l3cr = 0; + per_proc->pf.l3cr = 0; } cacheInit(); retval = KERN_SUCCESS; @@ -177,19 +221,19 @@ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) } } else if(spr==chud_750_hid0) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: cacheInit(); cacheDisable(); /* disable caches */ - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_750_hid0), "r" (val)); - per_proc_info[cpu].pf.pfHID0 = val; + mtspr(chud_750_hid0, val); + per_proc->pf.pfHID0 = val; cacheInit(); /* reenable caches */ retval = KERN_SUCCESS; break; case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_750_hid0), "r" (val)); - per_proc_info[cpu].pf.pfHID0 = val; + mtspr(chud_750_hid0, val); + per_proc->pf.pfHID0 = val; retval = KERN_SUCCESS; break; default: @@ -198,12 +242,12 @@ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) } } else if(spr==chud_750_hid1) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_750_hid1), "r" (val)); - per_proc_info[cpu].pf.pfHID1 = val; + mtspr(chud_750_hid1, val); + per_proc->pf.pfHID1 = val; retval = KERN_SUCCESS; break; default: @@ -211,57 +255,69 @@ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) break; } } - else if(spr==chud_750fx_hid2 && cpu_subtype==CPU_SUBTYPE_POWERPC_750) { - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_750fx_hid2), "r" (val)); - per_proc_info[cpu].pf.pfHID2 = val; + else if(spr==chud_750fx_hid2 && target_cpu_subtype==CPU_SUBTYPE_POWERPC_750) { + mtspr(chud_750fx_hid2, val); + per_proc->pf.pfHID2 = val; retval = KERN_SUCCESS; } - else if(spr==chud_7400_msscr0 && (cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || cpu_subtype==CPU_SUBTYPE_POWERPC_7450)) { - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_7400_msscr0), "r" (val)); - per_proc_info[cpu].pf.pfMSSCR0 = val; + else if(spr==chud_7400_msscr0 && (target_cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450)) { + mtspr(chud_7400_msscr0, val); + per_proc->pf.pfMSSCR0 = val; retval = KERN_SUCCESS; } - else if(spr==chud_7400_msscr1 && cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || cpu_subtype==CPU_SUBTYPE_POWERPC_7450) { // called msssr0 on 7450 - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_7400_msscr1), "r" (val)); - per_proc_info[cpu].pf.pfMSSCR1 = val; + else if(spr==chud_7400_msscr1 && (target_cpu_subtype==CPU_SUBTYPE_POWERPC_7400 || target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450)) { // called msssr0 on 7450 + mtspr(chud_7400_msscr1, val); + per_proc->pf.pfMSSCR1 = val; retval = KERN_SUCCESS; } - else if(spr==chud_7450_ldstcr && cpu_subtype==CPU_SUBTYPE_POWERPC_7450) { - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_7450_ldstcr), "r" (val)); - per_proc_info[cpu].pf.pfLDSTCR = val; + else if(spr==chud_7450_ldstcr && target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450) { + mtspr(chud_7450_ldstcr, val); + per_proc->pf.pfLDSTCR = val; retval = KERN_SUCCESS; } - else if(spr==chud_7450_ictrl && cpu_subtype==CPU_SUBTYPE_POWERPC_7450) { - __asm__ volatile ("mtspr %0, %1" : : "n" (chud_7450_ictrl), "r" (val)); - per_proc_info[cpu].pf.pfICTRL = val; + else if(spr==chud_7450_ictrl && target_cpu_subtype==CPU_SUBTYPE_POWERPC_7450) { + mtspr(chud_7450_ictrl, val); + per_proc->pf.pfICTRL = val; retval = KERN_SUCCESS; } else { retval = KERN_INVALID_ARGUMENT; } - chudxnu_unbind_current_thread(); + if(didBind) { + chudxnu_unbind_thread(current_thread()); + } + return retval; } __private_extern__ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val) { - cpu_subtype_t cpu_subtype; + cpu_subtype_t target_cpu_subtype; kern_return_t retval = KERN_FAILURE; + struct per_proc_info *per_proc; + boolean_t didBind = FALSE; - if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument + if(cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument return KERN_FAILURE; } - chudxnu_bind_current_thread(cpu); + if(cpu<0) { // cpu<0 means don't bind (current cpu) + cpu = chudxnu_cpu_number(); + didBind = FALSE; + } else { + chudxnu_bind_thread(current_thread(), cpu); + didBind = TRUE; + } - cpu_subtype = machine_slot[cpu].cpu_subtype; + per_proc = PerProcTable[cpu].ppe_vaddr; + target_cpu_subtype = per_proc->cpu_subtype; if(spr==chud_970_hid0) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_970: - chudxnu_mthid0_64(&val); - per_proc_info[cpu].pf.pfHID0 = val; + mtspr64(chud_970_hid0, &val); + per_proc->pf.pfHID0 = val; retval = KERN_SUCCESS; break; default: @@ -270,10 +326,10 @@ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val) } } else if(spr==chud_970_hid1) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_970: - chudxnu_mthid1_64(&val); - per_proc_info[cpu].pf.pfHID1 = val; + mtspr64(chud_970_hid1, &val); + per_proc->pf.pfHID1 = val; retval = KERN_SUCCESS; break; default: @@ -282,10 +338,10 @@ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val) } } else if(spr==chud_970_hid4) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_970: - chudxnu_mthid4_64(&val); - per_proc_info[cpu].pf.pfHID4 = val; + mtspr64(chud_970_hid4, &val); + per_proc->pf.pfHID4 = val; retval = KERN_SUCCESS; break; default: @@ -294,10 +350,10 @@ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val) } } else if(spr==chud_970_hid5) { - switch(cpu_subtype) { + switch(target_cpu_subtype) { case CPU_SUBTYPE_POWERPC_970: - chudxnu_mthid5_64(&val); - per_proc_info[cpu].pf.pfHID5 = val; + mtspr64(chud_970_hid5, &val); + per_proc->pf.pfHID5 = val; retval = KERN_SUCCESS; break; default: @@ -308,7 +364,9 @@ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val) retval = KERN_INVALID_ARGUMENT; } - chudxnu_unbind_current_thread(); + if(didBind) { + chudxnu_unbind_thread(current_thread()); + } return retval; } @@ -319,7 +377,7 @@ uint32_t chudxnu_get_orig_cpu_l2cr(int cpu) if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument cpu = 0; } - return per_proc_info[cpu].pf.l2crOriginal; + return PerProcTable[cpu].ppe_vaddr->pf.l2crOriginal; } __private_extern__ @@ -328,9 +386,730 @@ uint32_t chudxnu_get_orig_cpu_l3cr(int cpu) if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument cpu = 0; } - return per_proc_info[cpu].pf.l3crOriginal; + return PerProcTable[cpu].ppe_vaddr->pf.l3crOriginal; +} + +#pragma mark **** spr **** + +__private_extern__ +kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p) +{ + kern_return_t retval = KERN_SUCCESS; + boolean_t oldlevel; + uint32_t val = 0xFFFFFFFF; + + /* bind to requested CPU */ + if(cpu>=0) { // cpu<0 means don't bind + if(chudxnu_bind_thread(current_thread(), cpu)!=KERN_SUCCESS) { + return KERN_INVALID_ARGUMENT; + } + } + + oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */ + + do { + /* PPC SPRs - 32-bit and 64-bit implementations */ + if(spr==chud_ppc_srr0) { mfspr(val, chud_ppc_srr0); break; } + if(spr==chud_ppc_srr1) { mfspr(val, chud_ppc_srr1); break; } + if(spr==chud_ppc_dsisr) { mfspr(val, chud_ppc_dsisr); break; } + if(spr==chud_ppc_dar) { mfspr(val, chud_ppc_dar); break; } + if(spr==chud_ppc_dec) { mfspr(val, chud_ppc_dec); break; } + if(spr==chud_ppc_sdr1) { mfspr(val, chud_ppc_sdr1); break; } + if(spr==chud_ppc_sprg0) { mfspr(val, chud_ppc_sprg0); break; } + if(spr==chud_ppc_sprg1) { mfspr(val, chud_ppc_sprg1); break; } + if(spr==chud_ppc_sprg2) { mfspr(val, chud_ppc_sprg2); break; } + if(spr==chud_ppc_sprg3) { mfspr(val, chud_ppc_sprg3); break; } + if(spr==chud_ppc_ear) { mfspr(val, chud_ppc_ear); break; } + if(spr==chud_ppc_tbl) { mfspr(val, 268); break; } /* timebase consists of read registers and write registers */ + if(spr==chud_ppc_tbu) { mfspr(val, 269); break; } + if(spr==chud_ppc_pvr) { mfspr(val, chud_ppc_pvr); break; } + if(spr==chud_ppc_ibat0u) { mfspr(val, chud_ppc_ibat0u); break; } + if(spr==chud_ppc_ibat0l) { mfspr(val, chud_ppc_ibat0l); break; } + if(spr==chud_ppc_ibat1u) { mfspr(val, chud_ppc_ibat1u); break; } + if(spr==chud_ppc_ibat1l) { mfspr(val, chud_ppc_ibat1l); break; } + if(spr==chud_ppc_ibat2u) { mfspr(val, chud_ppc_ibat2u); break; } + if(spr==chud_ppc_ibat2l) { mfspr(val, chud_ppc_ibat2l); break; } + if(spr==chud_ppc_ibat3u) { mfspr(val, chud_ppc_ibat3u); break; } + if(spr==chud_ppc_ibat3l) { mfspr(val, chud_ppc_ibat3l); break; } + if(spr==chud_ppc_dbat0u) { mfspr(val, chud_ppc_dbat0u); break; } + if(spr==chud_ppc_dbat0l) { mfspr(val, chud_ppc_dbat0l); break; } + if(spr==chud_ppc_dbat1u) { mfspr(val, chud_ppc_dbat1u); break; } + if(spr==chud_ppc_dbat1l) { mfspr(val, chud_ppc_dbat1l); break; } + if(spr==chud_ppc_dbat2u) { mfspr(val, chud_ppc_dbat2u); break; } + if(spr==chud_ppc_dbat2l) { mfspr(val, chud_ppc_dbat2l); break; } + if(spr==chud_ppc_dbat3u) { mfspr(val, chud_ppc_dbat3u); break; } + if(spr==chud_ppc_dbat3l) { mfspr(val, chud_ppc_dbat3l); break; } + if(spr==chud_ppc_dabr) { mfspr(val, chud_ppc_dabr); break; } + if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */ + struct ppc_thread_state64 state; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + kern_return_t kr; + kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */); + if(KERN_SUCCESS==kr) { + val = state.srr1; + } else { + retval = KERN_FAILURE; + } + break; + } + + /* PPC SPRs - 32-bit implementations */ + if(spr==chud_ppc32_sr0) { mfsr(val, 0); break; } + if(spr==chud_ppc32_sr1) { mfsr(val, 1); break; } + if(spr==chud_ppc32_sr2) { mfsr(val, 2); break; } + if(spr==chud_ppc32_sr3) { mfsr(val, 3); break; } + if(spr==chud_ppc32_sr4) { mfsr(val, 4); break; } + if(spr==chud_ppc32_sr5) { mfsr(val, 5); break; } + if(spr==chud_ppc32_sr6) { mfsr(val, 6); break; } + if(spr==chud_ppc32_sr7) { mfsr(val, 7); break; } + if(spr==chud_ppc32_sr8) { mfsr(val, 8); break; } + if(spr==chud_ppc32_sr9) { mfsr(val, 9); break; } + if(spr==chud_ppc32_sr10) { mfsr(val, 10); break; } + if(spr==chud_ppc32_sr11) { mfsr(val, 11); break; } + if(spr==chud_ppc32_sr12) { mfsr(val, 12); break; } + if(spr==chud_ppc32_sr13) { mfsr(val, 13); break; } + if(spr==chud_ppc32_sr14) { mfsr(val, 14); break; } + if(spr==chud_ppc32_sr15) { mfsr(val, 15); break; } + + /* PPC SPRs - 64-bit implementations */ + if(spr==chud_ppc64_ctrl) { mfspr(val, chud_ppc64_ctrl); break; } + + /* Implementation Specific SPRs */ + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_750) { + if(spr==chud_750_mmcr0) { mfspr(val, chud_750_mmcr0); break; } + if(spr==chud_750_pmc1) { mfspr(val, chud_750_pmc1); break; } + if(spr==chud_750_pmc2) { mfspr(val, chud_750_pmc2); break; } + if(spr==chud_750_sia) { mfspr(val, chud_750_sia); break; } + if(spr==chud_750_mmcr1) { mfspr(val, chud_750_mmcr1); break; } + if(spr==chud_750_pmc3) { mfspr(val, chud_750_pmc3); break; } + if(spr==chud_750_pmc4) { mfspr(val, chud_750_pmc4); break; } + if(spr==chud_750_hid0) { mfspr(val, chud_750_hid0); break; } + if(spr==chud_750_hid1) { mfspr(val, chud_750_hid1); break; } + if(spr==chud_750_iabr) { mfspr(val, chud_750_iabr); break; } + if(spr==chud_750_ictc) { mfspr(val, chud_750_ictc); break; } + if(spr==chud_750_thrm1) { mfspr(val, chud_750_thrm1); break; } + if(spr==chud_750_thrm2) { mfspr(val, chud_750_thrm2); break; } + if(spr==chud_750_thrm3) { mfspr(val, chud_750_thrm3); break; } + if(spr==chud_750_l2cr) { mfspr(val, chud_750_l2cr); break; } + + // 750FX only + if(spr==chud_750fx_ibat4u) { mfspr(val, chud_750fx_ibat4u); break; } + if(spr==chud_750fx_ibat4l) { mfspr(val, chud_750fx_ibat4l); break; } + if(spr==chud_750fx_ibat5u) { mfspr(val, chud_750fx_ibat5u); break; } + if(spr==chud_750fx_ibat5l) { mfspr(val, chud_750fx_ibat5l); break; } + if(spr==chud_750fx_ibat6u) { mfspr(val, chud_750fx_ibat6u); break; } + if(spr==chud_750fx_ibat6l) { mfspr(val, chud_750fx_ibat6l); break; } + if(spr==chud_750fx_ibat7u) { mfspr(val, chud_750fx_ibat7u); break; } + if(spr==chud_750fx_ibat7l) { mfspr(val, chud_750fx_ibat7l); break; } + if(spr==chud_750fx_dbat4u) { mfspr(val, chud_750fx_dbat4u); break; } + if(spr==chud_750fx_dbat4l) { mfspr(val, chud_750fx_dbat4l); break; } + if(spr==chud_750fx_dbat5u) { mfspr(val, chud_750fx_dbat5u); break; } + if(spr==chud_750fx_dbat5l) { mfspr(val, chud_750fx_dbat5l); break; } + if(spr==chud_750fx_dbat6u) { mfspr(val, chud_750fx_dbat6u); break; } + if(spr==chud_750fx_dbat6l) { mfspr(val, chud_750fx_dbat6l); break; } + if(spr==chud_750fx_dbat7u) { mfspr(val, chud_750fx_dbat7u); break; } + if(spr==chud_750fx_dbat7l) { mfspr(val, chud_750fx_dbat7l); break; } + + // 750FX >= DDR2.x only + if(spr==chud_750fx_hid2) { mfspr(val, chud_750fx_hid2); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7400) { + if(spr==chud_7400_mmcr2) { mfspr(val, chud_7400_mmcr2); break; } + if(spr==chud_7400_bamr) { mfspr(val, chud_7400_bamr); break; } + if(spr==chud_7400_mmcr0) { mfspr(val, chud_7400_mmcr0); break; } + if(spr==chud_7400_pmc1) { mfspr(val, chud_7400_pmc1); break; } + if(spr==chud_7400_pmc2) { mfspr(val, chud_7400_pmc2); break; } + if(spr==chud_7400_siar) { mfspr(val, chud_7400_siar); break; } + if(spr==chud_7400_mmcr1) { mfspr(val, chud_7400_mmcr1); break; } + if(spr==chud_7400_pmc3) { mfspr(val, chud_7400_pmc3); break; } + if(spr==chud_7400_pmc4) { mfspr(val, chud_7400_pmc4); break; } + if(spr==chud_7400_hid0) { mfspr(val, chud_7400_hid0); break; } + if(spr==chud_7400_hid1) { mfspr(val, chud_7400_hid1); break; } + if(spr==chud_7400_iabr) { mfspr(val, chud_7400_iabr); break; } + if(spr==chud_7400_msscr0) { mfspr(val, chud_7400_msscr0); break; } + if(spr==chud_7400_msscr1) { mfspr(val, chud_7400_msscr1); break; } /* private */ + if(spr==chud_7400_ictc) { mfspr(val, chud_7400_ictc); break; } + if(spr==chud_7400_thrm1) { mfspr(val, chud_7400_thrm1); break; } + if(spr==chud_7400_thrm2) { mfspr(val, chud_7400_thrm2); break; } + if(spr==chud_7400_thrm3) { mfspr(val, chud_7400_thrm3); break; } + if(spr==chud_7400_pir) { mfspr(val, chud_7400_pir); break; } + if(spr==chud_7400_l2cr) { mfspr(val, chud_7400_l2cr); break; } + + // 7410 only + if(spr==chud_7410_l2pmcr) { mfspr(val, chud_7410_l2pmcr); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7450) { + if(spr==chud_7450_mmcr2) { mfspr(val, chud_7450_mmcr2); break; } + if(spr==chud_7450_pmc5) { mfspr(val, chud_7450_pmc5); break; } + if(spr==chud_7450_pmc6) { mfspr(val, chud_7450_pmc6); break; } + if(spr==chud_7450_bamr) { mfspr(val, chud_7450_bamr); break; } + if(spr==chud_7450_mmcr0) { mfspr(val, chud_7450_mmcr0); break; } + if(spr==chud_7450_pmc1) { mfspr(val, chud_7450_pmc1); break; } + if(spr==chud_7450_pmc2) { mfspr(val, chud_7450_pmc2); break; } + if(spr==chud_7450_siar) { mfspr(val, chud_7450_siar); break; } + if(spr==chud_7450_mmcr1) { mfspr(val, chud_7450_mmcr1); break; } + if(spr==chud_7450_pmc3) { mfspr(val, chud_7450_pmc3); break; } + if(spr==chud_7450_pmc4) { mfspr(val, chud_7450_pmc4); break; } + if(spr==chud_7450_tlbmiss) { mfspr(val, chud_7450_tlbmiss); break; } + if(spr==chud_7450_ptehi) { mfspr(val, chud_7450_ptehi); break; } + if(spr==chud_7450_ptelo) { mfspr(val, chud_7450_ptelo); break; } + if(spr==chud_7450_l3pm) { mfspr(val, chud_7450_l3pm); break; } + if(spr==chud_7450_hid0) { mfspr(val, chud_7450_hid0); break; } + if(spr==chud_7450_hid1) { mfspr(val, chud_7450_hid1); break; } + if(spr==chud_7450_iabr) { mfspr(val, chud_7450_iabr); break; } + if(spr==chud_7450_ldstdb) { mfspr(val, chud_7450_ldstdb); break; } + if(spr==chud_7450_msscr0) { mfspr(val, chud_7450_msscr0); break; } + if(spr==chud_7450_msssr0) { mfspr(val, chud_7450_msssr0); break; } + if(spr==chud_7450_ldstcr) { mfspr(val, chud_7450_ldstcr); break; } + if(spr==chud_7450_ictc) { mfspr(val, chud_7450_ictc); break; } + if(spr==chud_7450_ictrl) { mfspr(val, chud_7450_ictrl); break; } + if(spr==chud_7450_thrm1) { mfspr(val, chud_7450_thrm1); break; } + if(spr==chud_7450_thrm2) { mfspr(val, chud_7450_thrm2); break; } + if(spr==chud_7450_thrm3) { mfspr(val, chud_7450_thrm3); break; } + if(spr==chud_7450_pir) { mfspr(val, chud_7450_pir); break; } + if(spr==chud_7450_l2cr) { mfspr(val, chud_7450_l2cr); break; } + if(spr==chud_7450_l3cr) { mfspr(val, chud_7450_l3cr); break; } + + // 7455/7457 only + if(spr==chud_7455_sprg4) { mfspr(val, chud_7455_sprg4); break; } + if(spr==chud_7455_sprg5) { mfspr(val, chud_7455_sprg5); break; } + if(spr==chud_7455_sprg6) { mfspr(val, chud_7455_sprg6); break; } + if(spr==chud_7455_sprg7) { mfspr(val, chud_7455_sprg7); break; } + if(spr==chud_7455_ibat4u) { mfspr(val, chud_7455_ibat4u); break; } + if(spr==chud_7455_ibat4l) { mfspr(val, chud_7455_ibat4l); break; } + if(spr==chud_7455_ibat5u) { mfspr(val, chud_7455_ibat5u); break; } + if(spr==chud_7455_ibat5l) { mfspr(val, chud_7455_ibat5l); break; } + if(spr==chud_7455_ibat6u) { mfspr(val, chud_7455_ibat6u); break; } + if(spr==chud_7455_ibat6l) { mfspr(val, chud_7455_ibat6l); break; } + if(spr==chud_7455_ibat7u) { mfspr(val, chud_7455_ibat7u); break; } + if(spr==chud_7455_ibat7l) { mfspr(val, chud_7455_ibat7l); break; } + if(spr==chud_7455_dbat4u) { mfspr(val, chud_7455_dbat4u); break; } + if(spr==chud_7455_dbat4l) { mfspr(val, chud_7455_dbat4l); break; } + if(spr==chud_7455_dbat5u) { mfspr(val, chud_7455_dbat5u); break; } + if(spr==chud_7455_dbat5l) { mfspr(val, chud_7455_dbat5l); break; } + if(spr==chud_7455_dbat6u) { mfspr(val, chud_7455_dbat6u); break; } + if(spr==chud_7455_dbat6l) { mfspr(val, chud_7455_dbat6l); break; } + if(spr==chud_7455_dbat7u) { mfspr(val, chud_7455_dbat7u); break; } + if(spr==chud_7455_dbat7l) { mfspr(val, chud_7455_dbat7l); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) { + if(spr==chud_970_pir) { mfspr(val, chud_970_pir); break; } + if(spr==chud_970_pmc1) { mfspr(val, chud_970_pmc1); break; } + if(spr==chud_970_pmc2) { mfspr(val, chud_970_pmc2); break; } + if(spr==chud_970_pmc3) { mfspr(val, chud_970_pmc3); break; } + if(spr==chud_970_pmc4) { mfspr(val, chud_970_pmc4); break; } + if(spr==chud_970_pmc5) { mfspr(val, chud_970_pmc5); break; } + if(spr==chud_970_pmc6) { mfspr(val, chud_970_pmc6); break; } + if(spr==chud_970_pmc7) { mfspr(val, chud_970_pmc7); break; } + if(spr==chud_970_pmc8) { mfspr(val, chud_970_pmc8); break; } + if(spr==chud_970_hdec) { mfspr(val, chud_970_hdec); break; } + } + + /* we only get here if none of the above cases qualify */ + retval = KERN_INVALID_ARGUMENT; + } while(0); + + chudxnu_set_interrupts_enabled(oldlevel); /* enable interrupts */ + + if(cpu>=0) { // cpu<0 means don't bind + chudxnu_unbind_thread(current_thread()); + } + + *val_p = val; + + return retval; +} + +__private_extern__ +kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p) +{ + kern_return_t retval = KERN_SUCCESS; + boolean_t oldlevel; + + /* bind to requested CPU */ + if(cpu>=0) { // cpu<0 means don't bind + if(chudxnu_bind_thread(current_thread(), cpu)!=KERN_SUCCESS) { + return KERN_INVALID_ARGUMENT; + } + } + + oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */ + + do { + /* PPC SPRs - 32-bit and 64-bit implementations */ + if(spr==chud_ppc_srr0) { retval = mfspr64(val_p, chud_ppc_srr0); break; } + if(spr==chud_ppc_srr1) { retval = mfspr64(val_p, chud_ppc_srr1); break; } + if(spr==chud_ppc_dar) { retval = mfspr64(val_p, chud_ppc_dar); break; } + if(spr==chud_ppc_dsisr) { retval = mfspr64(val_p, chud_ppc_dsisr); break; } + if(spr==chud_ppc_sdr1) { retval = mfspr64(val_p, chud_ppc_sdr1); break; } + if(spr==chud_ppc_sprg0) { retval = mfspr64(val_p, chud_ppc_sprg0); break; } + if(spr==chud_ppc_sprg1) { retval = mfspr64(val_p, chud_ppc_sprg1); break; } + if(spr==chud_ppc_sprg2) { retval = mfspr64(val_p, chud_ppc_sprg2); break; } + if(spr==chud_ppc_sprg3) { retval = mfspr64(val_p, chud_ppc_sprg3); break; } + if(spr==chud_ppc_dabr) { retval = mfspr64(val_p, chud_ppc_dabr); break; } + if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */ + struct ppc_thread_state64 state; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + kern_return_t kr; + kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */); + if(KERN_SUCCESS==kr) { + *val_p = state.srr1; + } else { + retval = KERN_FAILURE; + } + break; + } + + /* PPC SPRs - 64-bit implementations */ + if(spr==chud_ppc64_asr) { retval = mfspr64(val_p, chud_ppc64_asr); break; } + if(spr==chud_ppc64_accr) { retval = mfspr64(val_p, chud_ppc64_accr); break; } + + /* Implementation Specific SPRs */ + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) { + if(spr==chud_970_hid0) { retval = mfspr64(val_p, chud_970_hid0); break; } + if(spr==chud_970_hid1) { retval = mfspr64(val_p, chud_970_hid1); break; } + if(spr==chud_970_hid4) { retval = mfspr64(val_p, chud_970_hid4); break; } + if(spr==chud_970_hid5) { retval = mfspr64(val_p, chud_970_hid5); break; } + if(spr==chud_970_mmcr0) { retval = mfspr64(val_p, chud_970_mmcr0); break; } + if(spr==chud_970_mmcr1) { retval = mfspr64(val_p, chud_970_mmcr1); break; } + if(spr==chud_970_mmcra) { retval = mfspr64(val_p, chud_970_mmcra); break; } + if(spr==chud_970_siar) { retval = mfspr64(val_p, chud_970_siar); break; } + if(spr==chud_970_sdar) { retval = mfspr64(val_p, chud_970_sdar); break; } + if(spr==chud_970_imc) { retval = mfspr64(val_p, chud_970_imc); break; } + if(spr==chud_970_rmor) { retval = mfspr64(val_p, chud_970_rmor); break; } + if(spr==chud_970_hrmor) { retval = mfspr64(val_p, chud_970_hrmor); break; } + if(spr==chud_970_hior) { retval = mfspr64(val_p, chud_970_hior); break; } + if(spr==chud_970_lpidr) { retval = mfspr64(val_p, chud_970_lpidr); break; } + if(spr==chud_970_lpcr) { retval = mfspr64(val_p, chud_970_lpcr); break; } + if(spr==chud_970_dabrx) { retval = mfspr64(val_p, chud_970_dabrx); break; } + if(spr==chud_970_hsprg0) { retval = mfspr64(val_p, chud_970_hsprg0); break; } + if(spr==chud_970_hsprg1) { retval = mfspr64(val_p, chud_970_hsprg1); break; } + if(spr==chud_970_hsrr0) { retval = mfspr64(val_p, chud_970_hsrr0); break; } + if(spr==chud_970_hsrr1) { retval = mfspr64(val_p, chud_970_hsrr1); break; } + if(spr==chud_970_hdec) { retval = mfspr64(val_p, chud_970_hdec); break; } + if(spr==chud_970_trig0) { retval = mfspr64(val_p, chud_970_trig0); break; } + if(spr==chud_970_trig1) { retval = mfspr64(val_p, chud_970_trig1); break; } + if(spr==chud_970_trig2) { retval = mfspr64(val_p, chud_970_trig2); break; } + if(spr==chud_970_scomc) { retval = mfspr64(val_p, chud_970_scomc); break; } + if(spr==chud_970_scomd) { retval = mfspr64(val_p, chud_970_scomd); break; } + } + + /* we only get here if none of the above cases qualify */ + *val_p = 0xFFFFFFFFFFFFFFFFLL; + retval = KERN_INVALID_ARGUMENT; + } while(0); + + chudxnu_set_interrupts_enabled(oldlevel); /* enable interrupts */ + + if(cpu>=0) { // cpu<0 means don't bind + chudxnu_unbind_thread(current_thread()); + } + + return retval; } +__private_extern__ +kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val) +{ + kern_return_t retval = KERN_SUCCESS; + boolean_t oldlevel; + + /* bind to requested CPU */ + if(cpu>=0) { // cpu<0 means don't bind + if(chudxnu_bind_thread(current_thread(), cpu)!=KERN_SUCCESS) { + return KERN_INVALID_ARGUMENT; + } + } + + oldlevel = chudxnu_set_interrupts_enabled(FALSE); /* disable interrupts */ + + do { + /* PPC SPRs - 32-bit and 64-bit implementations */ + if(spr==chud_ppc_srr0) { mtspr(chud_ppc_srr0, val); break; } + if(spr==chud_ppc_srr1) { mtspr(chud_ppc_srr1, val); break; } + if(spr==chud_ppc_dsisr) { mtspr(chud_ppc_dsisr, val); break; } + if(spr==chud_ppc_dar) { mtspr(chud_ppc_dar, val); break; } + if(spr==chud_ppc_dec) { mtspr(chud_ppc_dec, val); break; } + if(spr==chud_ppc_sdr1) { mtspr(chud_ppc_sdr1, val); break; } + if(spr==chud_ppc_sprg0) { mtspr(chud_ppc_sprg0, val); break; } + if(spr==chud_ppc_sprg1) { mtspr(chud_ppc_sprg1, val); break; } + if(spr==chud_ppc_sprg2) { mtspr(chud_ppc_sprg2, val); break; } + if(spr==chud_ppc_sprg3) { mtspr(chud_ppc_sprg3, val); break; } + if(spr==chud_ppc_ear) { mtspr(chud_ppc_ear, val); break; } + if(spr==chud_ppc_tbl) { mtspr(284, val); break; } /* timebase consists of read registers and write registers */ + if(spr==chud_ppc_tbu) { mtspr(285, val); break; } + if(spr==chud_ppc_pvr) { mtspr(chud_ppc_pvr, val); break; } + if(spr==chud_ppc_ibat0u) { mtspr(chud_ppc_ibat0u, val); break; } + if(spr==chud_ppc_ibat0l) { mtspr(chud_ppc_ibat0l, val); break; } + if(spr==chud_ppc_ibat1u) { mtspr(chud_ppc_ibat1u, val); break; } + if(spr==chud_ppc_ibat1l) { mtspr(chud_ppc_ibat1l, val); break; } + if(spr==chud_ppc_ibat2u) { mtspr(chud_ppc_ibat2u, val); break; } + if(spr==chud_ppc_ibat2l) { mtspr(chud_ppc_ibat2l, val); break; } + if(spr==chud_ppc_ibat3u) { mtspr(chud_ppc_ibat3u, val); break; } + if(spr==chud_ppc_ibat3l) { mtspr(chud_ppc_ibat3l, val); break; } + if(spr==chud_ppc_dbat0u) { mtspr(chud_ppc_dbat0u, val); break; } + if(spr==chud_ppc_dbat0l) { mtspr(chud_ppc_dbat0l, val); break; } + if(spr==chud_ppc_dbat1u) { mtspr(chud_ppc_dbat1u, val); break; } + if(spr==chud_ppc_dbat1l) { mtspr(chud_ppc_dbat1l, val); break; } + if(spr==chud_ppc_dbat2u) { mtspr(chud_ppc_dbat2u, val); break; } + if(spr==chud_ppc_dbat2l) { mtspr(chud_ppc_dbat2l, val); break; } + if(spr==chud_ppc_dbat3u) { mtspr(chud_ppc_dbat3u, val); break; } + if(spr==chud_ppc_dbat3l) { mtspr(chud_ppc_dbat3l, val); break; } + if(spr==chud_ppc_dabr) { mtspr(chud_ppc_dabr, val); break; } + if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */ + struct ppc_thread_state64 state; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + kern_return_t kr; + kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */); + if(KERN_SUCCESS==kr) { + state.srr1 = val; + kr = chudxnu_thread_set_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, count, TRUE /* user only */); + if(KERN_SUCCESS!=kr) { + retval = KERN_FAILURE; + } + } else { + retval = KERN_FAILURE; + } + break; + } + + /* PPC SPRs - 32-bit implementations */ + if(spr==chud_ppc32_sr0) { mtsr(0, val); break; } + if(spr==chud_ppc32_sr1) { mtsr(1, val); break; } + if(spr==chud_ppc32_sr2) { mtsr(2, val); break; } + if(spr==chud_ppc32_sr3) { mtsr(3, val); break; } + if(spr==chud_ppc32_sr4) { mtsr(4, val); break; } + if(spr==chud_ppc32_sr5) { mtsr(5, val); break; } + if(spr==chud_ppc32_sr6) { mtsr(6, val); break; } + if(spr==chud_ppc32_sr7) { mtsr(7, val); break; } + if(spr==chud_ppc32_sr8) { mtsr(8, val); break; } + if(spr==chud_ppc32_sr9) { mtsr(9, val); break; } + if(spr==chud_ppc32_sr10) { mtsr(10, val); break; } + if(spr==chud_ppc32_sr11) { mtsr(11, val); break; } + if(spr==chud_ppc32_sr12) { mtsr(12, val); break; } + if(spr==chud_ppc32_sr13) { mtsr(13, val); break; } + if(spr==chud_ppc32_sr14) { mtsr(14, val); break; } + if(spr==chud_ppc32_sr15) { mtsr(15, val); break; } + + /* Implementation Specific SPRs */ + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_750) { + if(spr==chud_750_mmcr0) { mtspr(chud_750_mmcr0, val); break; } + if(spr==chud_750_pmc1) { mtspr(chud_750_pmc1, val); break; } + if(spr==chud_750_pmc2) { mtspr(chud_750_pmc2, val); break; } + if(spr==chud_750_sia) { mtspr(chud_750_sia, val); break; } + if(spr==chud_750_mmcr1) { mtspr(chud_750_mmcr1, val); break; } + if(spr==chud_750_pmc3) { mtspr(chud_750_pmc3, val); break; } + if(spr==chud_750_pmc4) { mtspr(chud_750_pmc4, val); break; } + if(spr==chud_750_iabr) { mtspr(chud_750_iabr, val); break; } + if(spr==chud_750_ictc) { mtspr(chud_750_ictc, val); break; } + if(spr==chud_750_thrm1) { mtspr(chud_750_thrm1, val); break; } + if(spr==chud_750_thrm2) { mtspr(chud_750_thrm2, val); break; } + if(spr==chud_750_thrm3) { mtspr(chud_750_thrm3, val); break; } + if(spr==chud_750_l2cr) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_750_hid0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_750_hid1) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + + // 750FX only + if(spr==chud_750fx_ibat4u) { mtspr(chud_750fx_ibat4u, val); break; } + if(spr==chud_750fx_ibat4l) { mtspr(chud_750fx_ibat4l, val); break; } + if(spr==chud_750fx_ibat5u) { mtspr(chud_750fx_ibat5u, val); break; } + if(spr==chud_750fx_ibat5l) { mtspr(chud_750fx_ibat5l, val); break; } + if(spr==chud_750fx_ibat6u) { mtspr(chud_750fx_ibat6u, val); break; } + if(spr==chud_750fx_ibat6l) { mtspr(chud_750fx_ibat6l, val); break; } + if(spr==chud_750fx_ibat7u) { mtspr(chud_750fx_ibat7u, val); break; } + if(spr==chud_750fx_ibat7l) { mtspr(chud_750fx_ibat7l, val); break; } + if(spr==chud_750fx_dbat4u) { mtspr(chud_750fx_dbat4u, val); break; } + if(spr==chud_750fx_dbat4l) { mtspr(chud_750fx_dbat4l, val); break; } + if(spr==chud_750fx_dbat5u) { mtspr(chud_750fx_dbat5u, val); break; } + if(spr==chud_750fx_dbat5l) { mtspr(chud_750fx_dbat5l, val); break; } + if(spr==chud_750fx_dbat6u) { mtspr(chud_750fx_dbat6u, val); break; } + if(spr==chud_750fx_dbat6l) { mtspr(chud_750fx_dbat6l, val); break; } + if(spr==chud_750fx_dbat7u) { mtspr(chud_750fx_dbat7u, val); break; } + if(spr==chud_750fx_dbat7l) { mtspr(chud_750fx_dbat7l, val); break; } + + // 750FX >= DDR2.x + if(spr==chud_750fx_hid2) { mtspr(chud_750fx_hid2, val); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7400) { + if(spr==chud_7400_mmcr2) { mtspr(chud_7400_mmcr2, val); break; } + if(spr==chud_7400_bamr) { mtspr(chud_7400_bamr, val); break; } + if(spr==chud_7400_mmcr0) { mtspr(chud_7400_mmcr0, val); break; } + if(spr==chud_7400_pmc1) { mtspr(chud_7400_pmc1, val); break; } + if(spr==chud_7400_pmc2) { mtspr(chud_7400_pmc2, val); break; } + if(spr==chud_7400_siar) { mtspr(chud_7400_siar, val); break; } + if(spr==chud_7400_mmcr1) { mtspr(chud_7400_mmcr1, val); break; } + if(spr==chud_7400_pmc3) { mtspr(chud_7400_pmc3, val); break; } + if(spr==chud_7400_pmc4) { mtspr(chud_7400_pmc4, val); break; } + if(spr==chud_7400_iabr) { mtspr(chud_7400_iabr, val); break; } + if(spr==chud_7400_ictc) { mtspr(chud_7400_ictc, val); break; } + if(spr==chud_7400_thrm1) { mtspr(chud_7400_thrm1, val); break; } + if(spr==chud_7400_thrm2) { mtspr(chud_7400_thrm2, val); break; } + if(spr==chud_7400_thrm3) { mtspr(chud_7400_thrm3, val); break; } + if(spr==chud_7400_pir) { mtspr(chud_7400_pir, val); break; } + + if(spr==chud_7400_l2cr) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7400_hid0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7400_hid1) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7400_msscr0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7400_msscr1) { /* private */ + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + + // 7410 only + if(spr==chud_7410_l2pmcr) { mtspr(chud_7410_l2pmcr, val); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_7450) { + if(spr==chud_7450_mmcr2) { mtspr(chud_7450_mmcr2, val); break; } + if(spr==chud_7450_pmc5) { mtspr(chud_7450_pmc5, val); break; } + if(spr==chud_7450_pmc6) { mtspr(chud_7450_pmc6, val); break; } + if(spr==chud_7450_bamr) { mtspr(chud_7450_bamr, val); break; } + if(spr==chud_7450_mmcr0) { mtspr(chud_7450_mmcr0, val); break; } + if(spr==chud_7450_pmc1) { mtspr(chud_7450_pmc1, val); break; } + if(spr==chud_7450_pmc2) { mtspr(chud_7450_pmc2, val); break; } + if(spr==chud_7450_siar) { mtspr(chud_7450_siar, val); break; } + if(spr==chud_7450_mmcr1) { mtspr(chud_7450_mmcr1, val); break; } + if(spr==chud_7450_pmc3) { mtspr(chud_7450_pmc3, val); break; } + if(spr==chud_7450_pmc4) { mtspr(chud_7450_pmc4, val); break; } + if(spr==chud_7450_tlbmiss) { mtspr(chud_7450_tlbmiss, val); break; } + if(spr==chud_7450_ptehi) { mtspr(chud_7450_ptehi, val); break; } + if(spr==chud_7450_ptelo) { mtspr(chud_7450_ptelo, val); break; } + if(spr==chud_7450_l3pm) { mtspr(chud_7450_l3pm, val); break; } + if(spr==chud_7450_iabr) { mtspr(chud_7450_iabr, val); break; } + if(spr==chud_7450_ldstdb) { mtspr(chud_7450_ldstdb, val); break; } + if(spr==chud_7450_ictc) { mtspr(chud_7450_ictc, val); break; } + if(spr==chud_7450_thrm1) { mtspr(chud_7450_thrm1, val); break; } + if(spr==chud_7450_thrm2) { mtspr(chud_7450_thrm2, val); break; } + if(spr==chud_7450_thrm3) { mtspr(chud_7450_thrm3, val); break; } + if(spr==chud_7450_pir) { mtspr(chud_7450_pir, val); break; } + + if(spr==chud_7450_l2cr) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + + if(spr==chud_7450_l3cr) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_ldstcr) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_hid0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_hid1) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_msscr0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_msssr0) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + if(spr==chud_7450_ictrl) { + retval = chudxnu_set_shadowed_spr(cpu, spr, val); + break; + } + + // 7455/7457 only + if(spr==chud_7455_sprg4) { mtspr(chud_7455_sprg4, val); break; } + if(spr==chud_7455_sprg5) { mtspr(chud_7455_sprg5, val); break; } + if(spr==chud_7455_sprg6) { mtspr(chud_7455_sprg6, val); break; } + if(spr==chud_7455_sprg7) { mtspr(chud_7455_sprg7, val); break; } + if(spr==chud_7455_ibat4u) { mtspr(chud_7455_ibat4u, val); break; } + if(spr==chud_7455_ibat4l) { mtspr(chud_7455_ibat4l, val); break; } + if(spr==chud_7455_ibat5u) { mtspr(chud_7455_ibat5u, val); break; } + if(spr==chud_7455_ibat5l) { mtspr(chud_7455_ibat5l, val); break; } + if(spr==chud_7455_ibat6u) { mtspr(chud_7455_ibat6u, val); break; } + if(spr==chud_7455_ibat6l) { mtspr(chud_7455_ibat6l, val); break; } + if(spr==chud_7455_ibat7u) { mtspr(chud_7455_ibat7u, val); break; } + if(spr==chud_7455_ibat7l) { mtspr(chud_7455_ibat7l, val); break; } + if(spr==chud_7455_dbat4u) { mtspr(chud_7455_dbat4u, val); break; } + if(spr==chud_7455_dbat4l) { mtspr(chud_7455_dbat4l, val); break; } + if(spr==chud_7455_dbat5u) { mtspr(chud_7455_dbat5u, val); break; } + if(spr==chud_7455_dbat5l) { mtspr(chud_7455_dbat5l, val); break; } + if(spr==chud_7455_dbat6u) { mtspr(chud_7455_dbat6u, val); break; } + if(spr==chud_7455_dbat6l) { mtspr(chud_7455_dbat6l, val); break; } + if(spr==chud_7455_dbat7u) { mtspr(chud_7455_dbat7u, val); break; } + if(spr==chud_7455_dbat7l) { mtspr(chud_7455_dbat7l, val); break; } + } + + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) { + if(spr==chud_970_pir) { mtspr(chud_970_pir, val); break; } + if(spr==chud_970_pmc1) { mtspr(chud_970_pmc1, val); break; } + if(spr==chud_970_pmc2) { mtspr(chud_970_pmc2, val); break; } + if(spr==chud_970_pmc3) { mtspr(chud_970_pmc3, val); break; } + if(spr==chud_970_pmc4) { mtspr(chud_970_pmc4, val); break; } + if(spr==chud_970_pmc5) { mtspr(chud_970_pmc5, val); break; } + if(spr==chud_970_pmc6) { mtspr(chud_970_pmc6, val); break; } + if(spr==chud_970_pmc7) { mtspr(chud_970_pmc7, val); break; } + if(spr==chud_970_pmc8) { mtspr(chud_970_pmc8, val); break; } + if(spr==chud_970_hdec) { mtspr(chud_970_hdec, val); break; } + } + + /* we only get here if none of the above cases qualify */ + retval = KERN_INVALID_ARGUMENT; + } while(0); + + chudxnu_set_interrupts_enabled(oldlevel); /* re-enable interrupts */ + + if(cpu>=0) { // cpu<0 means don't bind + chudxnu_unbind_thread(current_thread()); + } + + return retval; +} + +__private_extern__ +kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val) +{ + kern_return_t retval = KERN_SUCCESS; + boolean_t oldlevel; + uint64_t *val_p = &val; + + /* bind to requested CPU */ + if(cpu>=0) { // cpu<0 means don't bind + if(chudxnu_bind_thread(current_thread(), cpu)!=KERN_SUCCESS) { + return KERN_INVALID_ARGUMENT; + } + } + + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + + do { + /* PPC SPRs - 32-bit and 64-bit implementations */ + if(spr==chud_ppc_srr0) { retval = mtspr64(chud_ppc_srr0, val_p); break; } + if(spr==chud_ppc_srr1) { retval = mtspr64(chud_ppc_srr1, val_p); break; } + if(spr==chud_ppc_dar) { retval = mtspr64(chud_ppc_dar, val_p); break; } + if(spr==chud_ppc_dsisr) { retval = mtspr64(chud_ppc_dsisr, val_p); break; } + if(spr==chud_ppc_sdr1) { retval = mtspr64(chud_ppc_sdr1, val_p); break; } + if(spr==chud_ppc_sprg0) { retval = mtspr64(chud_ppc_sprg0, val_p); break; } + if(spr==chud_ppc_sprg1) { retval = mtspr64(chud_ppc_sprg1, val_p); break; } + if(spr==chud_ppc_sprg2) { retval = mtspr64(chud_ppc_sprg2, val_p); break; } + if(spr==chud_ppc_sprg3) { retval = mtspr64(chud_ppc_sprg3, val_p); break; } + if(spr==chud_ppc_dabr) { retval = mtspr64(chud_ppc_dabr, val_p); break; } + if(spr==chud_ppc_msr) { /* this is the MSR for the calling process */ + struct ppc_thread_state64 state; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + kern_return_t kr; + kr = chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, TRUE /* user only */); + if(KERN_SUCCESS==kr) { + state.srr1 = val; + kr = chudxnu_thread_set_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, count, TRUE /* user only */); + if(KERN_SUCCESS!=kr) { + retval = KERN_FAILURE; + } + } else { + retval = KERN_FAILURE; + } + break; + } + + /* PPC SPRs - 64-bit implementations */ + if(spr==chud_ppc64_asr) { retval = mtspr64(chud_ppc64_asr, val_p); break; } + if(spr==chud_ppc64_accr) { retval = mtspr64(chud_ppc64_accr, val_p); break; } + if(spr==chud_ppc64_ctrl) { retval = mtspr64(chud_ppc64_ctrl, val_p); break; } + + /* Implementation Specific SPRs */ + if(cpu_subtype()==CPU_SUBTYPE_POWERPC_970) { + if(spr==chud_970_hid0) { retval = mtspr64(chud_970_hid0, val_p); break; } + if(spr==chud_970_hid1) { retval = mtspr64(chud_970_hid1, val_p); break; } + if(spr==chud_970_hid4) { retval = mtspr64(chud_970_hid4, val_p); break; } + if(spr==chud_970_hid5) { retval = mtspr64(chud_970_hid5, val_p); break; } + if(spr==chud_970_mmcr0) { retval = mtspr64(chud_970_mmcr0, val_p); break; } + if(spr==chud_970_mmcr1) { retval = mtspr64(chud_970_mmcr1, val_p); break; } + if(spr==chud_970_mmcra) { retval = mtspr64(chud_970_mmcra, val_p); break; } + if(spr==chud_970_siar) { retval = mtspr64(chud_970_siar, val_p); break; } + if(spr==chud_970_sdar) { retval = mtspr64(chud_970_sdar, val_p); break; } + if(spr==chud_970_imc) { retval = mtspr64(chud_970_imc, val_p); break; } + + if(spr==chud_970_rmor) { retval = mtspr64(chud_970_rmor, val_p); break; } + if(spr==chud_970_hrmor) { retval = mtspr64(chud_970_hrmor, val_p); break; } + if(spr==chud_970_hior) { retval = mtspr64(chud_970_hior, val_p); break; } + if(spr==chud_970_lpidr) { retval = mtspr64(chud_970_lpidr, val_p); break; } + if(spr==chud_970_lpcr) { retval = mtspr64(chud_970_lpcr, val_p); break; } + if(spr==chud_970_dabrx) { retval = mtspr64(chud_970_dabrx, val_p); break; } + + if(spr==chud_970_hsprg0) { retval = mtspr64(chud_970_hsprg0, val_p); break; } + if(spr==chud_970_hsprg1) { retval = mtspr64(chud_970_hsprg1, val_p); break; } + if(spr==chud_970_hsrr0) { retval = mtspr64(chud_970_hsrr0, val_p); break; } + if(spr==chud_970_hsrr1) { retval = mtspr64(chud_970_hsrr1, val_p); break; } + if(spr==chud_970_hdec) { retval = mtspr64(chud_970_hdec, val_p); break; } + if(spr==chud_970_trig0) { retval = mtspr64(chud_970_trig0, val_p); break; } + if(spr==chud_970_trig1) { retval = mtspr64(chud_970_trig1, val_p); break; } + if(spr==chud_970_trig2) { retval = mtspr64(chud_970_trig2, val_p); break; } + if(spr==chud_970_scomc) { retval = mtspr64(chud_970_scomc, val_p); break; } + if(spr==chud_970_scomd) { retval = mtspr64(chud_970_scomd, val_p); break; } + + if(spr==chud_970_hid0) { + retval = chudxnu_set_shadowed_spr64(cpu, spr, val); + break; + } + + if(spr==chud_970_hid1) { + retval = chudxnu_set_shadowed_spr64(cpu, spr, val); + break; + } + + if(spr==chud_970_hid4) { + retval = chudxnu_set_shadowed_spr64(cpu, spr, val); + break; + } + + if(spr==chud_970_hid5) { + retval = chudxnu_set_shadowed_spr64(cpu, spr, val); + break; + } + + } + + /* we only get here if none of the above cases qualify */ + retval = KERN_INVALID_ARGUMENT; + } while(0); + + chudxnu_set_interrupts_enabled(oldlevel); /* re-enable interrupts */ + + if(cpu>=0) { // cpu<0 means don't bind + chudxnu_unbind_thread(current_thread()); + } + + return retval; +} + +#pragma mark **** cache flush **** + __private_extern__ void chudxnu_flush_caches(void) { @@ -348,6 +1127,8 @@ void chudxnu_enable_caches(boolean_t enable) } } +#pragma mark **** perfmon facility **** + __private_extern__ kern_return_t chudxnu_perfmon_acquire_facility(task_t task) { @@ -360,16 +1141,21 @@ kern_return_t chudxnu_perfmon_release_facility(task_t task) return perfmon_release_facility(task); } +#pragma mark **** branch trace buffer **** + +extern int pc_trace_buf[1024]; + __private_extern__ uint32_t * chudxnu_get_branch_trace_buffer(uint32_t *entries) { - extern int pc_trace_buf[1024]; if(entries) { *entries = sizeof(pc_trace_buf)/sizeof(int); } return pc_trace_buf; } +#pragma mark **** interrupts enable/disable **** + __private_extern__ boolean_t chudxnu_get_interrupts_enabled(void) { @@ -394,6 +1180,8 @@ void chudxnu_cause_interrupt(void) ml_cause_interrupt(); } +#pragma mark **** rupt counters **** + __private_extern__ kern_return_t chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts) { @@ -403,29 +1191,31 @@ kern_return_t chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts) if(rupts) { boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + struct per_proc_info *per_proc; - rupts->hwResets = per_proc_info[cpu].hwCtr.hwResets; - rupts->hwMachineChecks = per_proc_info[cpu].hwCtr.hwMachineChecks; - rupts->hwDSIs = per_proc_info[cpu].hwCtr.hwDSIs; - rupts->hwISIs = per_proc_info[cpu].hwCtr.hwISIs; - rupts->hwExternals = per_proc_info[cpu].hwCtr.hwExternals; - rupts->hwAlignments = per_proc_info[cpu].hwCtr.hwAlignments; - rupts->hwPrograms = per_proc_info[cpu].hwCtr.hwPrograms; - rupts->hwFloatPointUnavailable = per_proc_info[cpu].hwCtr.hwFloatPointUnavailable; - rupts->hwDecrementers = per_proc_info[cpu].hwCtr.hwDecrementers; - rupts->hwIOErrors = per_proc_info[cpu].hwCtr.hwIOErrors; - rupts->hwSystemCalls = per_proc_info[cpu].hwCtr.hwSystemCalls; - rupts->hwTraces = per_proc_info[cpu].hwCtr.hwTraces; - rupts->hwFloatingPointAssists = per_proc_info[cpu].hwCtr.hwFloatingPointAssists; - rupts->hwPerformanceMonitors = per_proc_info[cpu].hwCtr.hwPerformanceMonitors; - rupts->hwAltivecs = per_proc_info[cpu].hwCtr.hwAltivecs; - rupts->hwInstBreakpoints = per_proc_info[cpu].hwCtr.hwInstBreakpoints; - rupts->hwSystemManagements = per_proc_info[cpu].hwCtr.hwSystemManagements; - rupts->hwAltivecAssists = per_proc_info[cpu].hwCtr.hwAltivecAssists; - rupts->hwThermal = per_proc_info[cpu].hwCtr.hwThermal; - rupts->hwSoftPatches = per_proc_info[cpu].hwCtr.hwSoftPatches; - rupts->hwMaintenances = per_proc_info[cpu].hwCtr.hwMaintenances; - rupts->hwInstrumentations = per_proc_info[cpu].hwCtr.hwInstrumentations; + per_proc = PerProcTable[cpu].ppe_vaddr; + rupts->hwResets = per_proc->hwCtr.hwResets; + rupts->hwMachineChecks = per_proc->hwCtr.hwMachineChecks; + rupts->hwDSIs = per_proc->hwCtr.hwDSIs; + rupts->hwISIs = per_proc->hwCtr.hwISIs; + rupts->hwExternals = per_proc->hwCtr.hwExternals; + rupts->hwAlignments = per_proc->hwCtr.hwAlignments; + rupts->hwPrograms = per_proc->hwCtr.hwPrograms; + rupts->hwFloatPointUnavailable = per_proc->hwCtr.hwFloatPointUnavailable; + rupts->hwDecrementers = per_proc->hwCtr.hwDecrementers; + rupts->hwIOErrors = per_proc->hwCtr.hwIOErrors; + rupts->hwSystemCalls = per_proc->hwCtr.hwSystemCalls; + rupts->hwTraces = per_proc->hwCtr.hwTraces; + rupts->hwFloatingPointAssists = per_proc->hwCtr.hwFloatingPointAssists; + rupts->hwPerformanceMonitors = per_proc->hwCtr.hwPerformanceMonitors; + rupts->hwAltivecs = per_proc->hwCtr.hwAltivecs; + rupts->hwInstBreakpoints = per_proc->hwCtr.hwInstBreakpoints; + rupts->hwSystemManagements = per_proc->hwCtr.hwSystemManagements; + rupts->hwAltivecAssists = per_proc->hwCtr.hwAltivecAssists; + rupts->hwThermal = per_proc->hwCtr.hwThermal; + rupts->hwSoftPatches = per_proc->hwCtr.hwSoftPatches; + rupts->hwMaintenances = per_proc->hwCtr.hwMaintenances; + rupts->hwInstrumentations = per_proc->hwCtr.hwInstrumentations; ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; @@ -441,10 +1231,12 @@ kern_return_t chudxnu_clear_cpu_rupt_counters(int cpu) return KERN_FAILURE; } - bzero(&(per_proc_info[cpu].hwCtr), sizeof(struct hwCtrs)); + bzero((char *)&(PerProcTable[cpu].ppe_vaddr->hwCtr), sizeof(struct hwCtrs)); return KERN_SUCCESS; } +#pragma mark **** alignment exceptions **** + __private_extern__ kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable) { @@ -453,4 +1245,18 @@ kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable) } else { dgWork.dgFlags &= ~enaNotifyEM; } + return KERN_SUCCESS; +} + +#pragma mark **** scom **** +kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data) +{ + ml_scom_read(reg, data); + return KERN_SUCCESS; +} + +kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data) +{ + ml_scom_write(reg, data); + return KERN_SUCCESS; } diff --git a/osfmk/ppc/chud/chud_cpu_asm.h b/osfmk/ppc/chud/chud_cpu_asm.h index d0cb1ead3..a422a3a6c 100644 --- a/osfmk/ppc/chud/chud_cpu_asm.h +++ b/osfmk/ppc/chud/chud_cpu_asm.h @@ -23,82 +23,10 @@ #ifndef _CHUD_CPU_ASM_H_ #define _CHUD_CPU_ASM_H_ -void chudxnu_mfsrr0_64(uint64_t *val); -void chudxnu_mfsrr1_64(uint64_t *val); -void chudxnu_mfdar_64(uint64_t *val); -void chudxnu_mfsdr1_64(uint64_t *val); -void chudxnu_mfsprg0_64(uint64_t *val); -void chudxnu_mfsprg1_64(uint64_t *val); -void chudxnu_mfsprg2_64(uint64_t *val); -void chudxnu_mfsprg3_64(uint64_t *val); -void chudxnu_mfasr_64(uint64_t *val); -void chudxnu_mfdabr_64(uint64_t *val); -void chudxnu_mfhid0_64(uint64_t *val); -void chudxnu_mfhid1_64(uint64_t *val); -void chudxnu_mfhid4_64(uint64_t *val); -void chudxnu_mfhid5_64(uint64_t *val); -void chudxnu_mfmmcr0_64(uint64_t *val); -void chudxnu_mfmmcr1_64(uint64_t *val); -void chudxnu_mfmmcra_64(uint64_t *val); -void chudxnu_mfsiar_64(uint64_t *val); -void chudxnu_mfsdar_64(uint64_t *val); -void chudxnu_mfimc_64(uint64_t *val); -void chudxnu_mfrmor_64(uint64_t *val); -void chudxnu_mfhrmor_64(uint64_t *val); -void chudxnu_mfhior_64(uint64_t *val); -void chudxnu_mflpidr_64(uint64_t *val); -void chudxnu_mflpcr_64(uint64_t *val); -void chudxnu_mfdabrx_64(uint64_t *val); -void chudxnu_mfhsprg0_64(uint64_t *val); -void chudxnu_mfhsprg1_64(uint64_t *val); -void chudxnu_mfhsrr0_64(uint64_t *val); -void chudxnu_mfhsrr1_64(uint64_t *val); -void chudxnu_mfhdec_64(uint64_t *val); -void chudxnu_mftrig0_64(uint64_t *val); -void chudxnu_mftrig1_64(uint64_t *val); -void chudxnu_mftrig2_64(uint64_t *val); -void chudxnu_mfaccr_64(uint64_t *val); -void chudxnu_mfscomc_64(uint64_t *val); -void chudxnu_mfscomd_64(uint64_t *val); -void chudxnu_mfmsr_64(uint64_t *val); +kern_return_t mfspr64(uint64_t *val, int spr); +kern_return_t mfmsr64(uint64_t *val); -void chudxnu_mtsrr0_64(uint64_t *val); -void chudxnu_mtsrr1_64(uint64_t *val); -void chudxnu_mtdar_64(uint64_t *val); -void chudxnu_mtsdr1_64(uint64_t *val); -void chudxnu_mtsprg0_64(uint64_t *val); -void chudxnu_mtsprg1_64(uint64_t *val); -void chudxnu_mtsprg2_64(uint64_t *val); -void chudxnu_mtsprg3_64(uint64_t *val); -void chudxnu_mtasr_64(uint64_t *val); -void chudxnu_mtdabr_64(uint64_t *val); -void chudxnu_mthid0_64(uint64_t *val); -void chudxnu_mthid1_64(uint64_t *val); -void chudxnu_mthid4_64(uint64_t *val); -void chudxnu_mthid5_64(uint64_t *val); -void chudxnu_mtmmcr0_64(uint64_t *val); -void chudxnu_mtmmcr1_64(uint64_t *val); -void chudxnu_mtmmcra_64(uint64_t *val); -void chudxnu_mtsiar_64(uint64_t *val); -void chudxnu_mtsdar_64(uint64_t *val); -void chudxnu_mtimc_64(uint64_t *val); -void chudxnu_mtrmor_64(uint64_t *val); -void chudxnu_mthrmor_64(uint64_t *val); -void chudxnu_mthior_64(uint64_t *val); -void chudxnu_mtlpidr_64(uint64_t *val); -void chudxnu_mtlpcr_64(uint64_t *val); -void chudxnu_mtdabrx_64(uint64_t *val); -void chudxnu_mthsprg0_64(uint64_t *val); -void chudxnu_mthsprg1_64(uint64_t *val); -void chudxnu_mthsrr0_64(uint64_t *val); -void chudxnu_mthsrr1_64(uint64_t *val); -void chudxnu_mthdec_64(uint64_t *val); -void chudxnu_mttrig0_64(uint64_t *val); -void chudxnu_mttrig1_64(uint64_t *val); -void chudxnu_mttrig2_64(uint64_t *val); -void chudxnu_mtaccr_64(uint64_t *val); -void chudxnu_mtscomc_64(uint64_t *val); -void chudxnu_mtscomd_64(uint64_t *val); -void chudxnu_mtmsr_64(uint64_t *val); +kern_return_t mtspr64(int spr, uint64_t *val); +kern_return_t mtmsr64(uint64_t *val); #endif // _CHUD_CPU_ASM_H_ diff --git a/osfmk/ppc/chud/chud_cpu_asm.s b/osfmk/ppc/chud/chud_cpu_asm.s index fba86691f..4bf583034 100644 --- a/osfmk/ppc/chud/chud_cpu_asm.s +++ b/osfmk/ppc/chud/chud_cpu_asm.s @@ -25,339 +25,391 @@ #include #include - .text - .align 5 - .globl EXT(chudxnu_mfsrr0_64) -EXT(chudxnu_mfsrr0_64): +/* + * kern_return_t mfspr64(uint64_t *val, int spr); + * + * r3: address to store value in + * r4: spr to read from + * + */ + +; Force a line boundry here + .align 5 + .globl EXT(mfspr64) + +EXT(mfspr64): + ;; generic PPC 64-bit wide SPRs + cmpwi r4,chud_ppc_srr0 + beq mfspr64_srr0 + cmpwi r4,chud_ppc_srr1 + beq mfspr64_srr1 + cmpwi r4,chud_ppc_dar + beq mfspr64_dar + cmpwi r4,chud_ppc_sdr1 + beq mfspr64_sdr1 + cmpwi r4,chud_ppc_sprg0 + beq mfspr64_sprg0 + cmpwi r4,chud_ppc_sprg1 + beq mfspr64_sprg1 + cmpwi r4,chud_ppc_sprg2 + beq mfspr64_sprg2 + cmpwi r4,chud_ppc_sprg3 + beq mfspr64_sprg3 + cmpwi r4,chud_ppc64_asr + beq mfspr64_asr + cmpwi r4,chud_ppc_dabr + beq mfspr64_dabr + + ;; GPUL specific 64-bit wide SPRs + cmpwi r4,chud_970_hid0 + beq mfspr64_hid0 + cmpwi r4,chud_970_hid1 + beq mfspr64_hid1 + cmpwi r4,chud_970_hid4 + beq mfspr64_hid4 + cmpwi r4,chud_970_hid5 + beq mfspr64_hid5 + cmpwi r4,chud_970_mmcr0 + beq mfspr64_mmcr0 + cmpwi r4,chud_970_mmcr1 + beq mfspr64_mmcr1 + cmpwi r4,chud_970_mmcra + beq mfspr64_mmcra + cmpwi r4,chud_970_siar + beq mfspr64_siar + cmpwi r4,chud_970_sdar + beq mfspr64_sdar + cmpwi r4,chud_970_imc + beq mfspr64_imc + cmpwi r4,chud_970_rmor + beq mfspr64_rmor + cmpwi r4,chud_970_hrmor + beq mfspr64_hrmor + cmpwi r4,chud_970_hior + beq mfspr64_hior + cmpwi r4,chud_970_lpidr + beq mfspr64_lpidr + cmpwi r4,chud_970_lpcr + beq mfspr64_lpcr + cmpwi r4,chud_970_dabrx + beq mfspr64_dabrx + cmpwi r4,chud_970_hsprg0 + beq mfspr64_hsprg0 + cmpwi r4,chud_970_hsprg1 + beq mfspr64_hsprg1 + cmpwi r4,chud_970_hsrr0 + beq mfspr64_hsrr0 + cmpwi r4,chud_970_hsrr1 + beq mfspr64_hsrr1 + cmpwi r4,chud_970_hdec + beq mfspr64_hdec + cmpwi r4,chud_970_trig0 + beq mfspr64_trig0 + cmpwi r4,chud_970_trig1 + beq mfspr64_trig1 + cmpwi r4,chud_970_trig2 + beq mfspr64_trig2 + cmpwi r4,chud_ppc64_accr + beq mfspr64_accr + cmpwi r4,chud_970_scomc + beq mfspr64_scomc + cmpwi r4,chud_970_scomd + beq mfspr64_scomd + + b mfspr64_failure + +mfspr64_srr0: mfspr r5,chud_ppc_srr0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsrr1_64) -EXT(chudxnu_mfsrr1_64): + b mfspr64_success +mfspr64_srr1: mfspr r5,chud_ppc_srr1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfdar_64) -EXT(chudxnu_mfdar_64): + b mfspr64_success +mfspr64_dar: mfspr r5,chud_ppc_dar std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsdr1_64) -EXT(chudxnu_mfsdr1_64): + b mfspr64_success +mfspr64_sdr1: mfspr r5,chud_ppc_sdr1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsprg0_64) -EXT(chudxnu_mfsprg0_64): + b mfspr64_success +mfspr64_sprg0: mfspr r5,chud_ppc_sprg0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsprg1_64) -EXT(chudxnu_mfsprg1_64): + b mfspr64_success +mfspr64_sprg1: mfspr r5,chud_ppc_sprg1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsprg2_64) -EXT(chudxnu_mfsprg2_64): + b mfspr64_success +mfspr64_sprg2: mfspr r5,chud_ppc_sprg2 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsprg3_64) -EXT(chudxnu_mfsprg3_64): + b mfspr64_success +mfspr64_sprg3: mfspr r5,chud_ppc_sprg3 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfasr_64) -EXT(chudxnu_mfasr_64): + b mfspr64_success +mfspr64_asr: mfspr r5,chud_ppc64_asr std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfdabr_64) -EXT(chudxnu_mfdabr_64): + b mfspr64_success +mfspr64_dabr: mfspr r5,chud_ppc_dabr std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhid0_64) -EXT(chudxnu_mfhid0_64): + b mfspr64_success +mfspr64_hid0: mfspr r5,chud_970_hid0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhid1_64) -EXT(chudxnu_mfhid1_64): + b mfspr64_success +mfspr64_hid1: mfspr r5,chud_970_hid1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhid4_64) -EXT(chudxnu_mfhid4_64): + b mfspr64_success +mfspr64_hid4: mfspr r5,chud_970_hid4 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhid5_64) -EXT(chudxnu_mfhid5_64): + b mfspr64_success +mfspr64_hid5: mfspr r5,chud_970_hid5 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfmmcr0_64) -EXT(chudxnu_mfmmcr0_64): + b mfspr64_success +mfspr64_mmcr0: mfspr r5,chud_970_mmcr0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfmmcr1_64) -EXT(chudxnu_mfmmcr1_64): + b mfspr64_success +mfspr64_mmcr1: mfspr r5,chud_970_mmcr1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfmmcra_64) -EXT(chudxnu_mfmmcra_64): + b mfspr64_success +mfspr64_mmcra: mfspr r5,chud_970_mmcra std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsiar_64) -EXT(chudxnu_mfsiar_64): + b mfspr64_success +mfspr64_siar: mfspr r5,chud_970_siar std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfsdar_64) -EXT(chudxnu_mfsdar_64): + b mfspr64_success +mfspr64_sdar: mfspr r5,chud_970_sdar std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfimc_64) -EXT(chudxnu_mfimc_64): + b mfspr64_success +mfspr64_imc: mfspr r5,chud_970_imc std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfrmor_64) -EXT(chudxnu_mfrmor_64): + b mfspr64_success +mfspr64_rmor: mfspr r5,chud_970_rmor std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhrmor_64) -EXT(chudxnu_mfhrmor_64): + b mfspr64_success +mfspr64_hrmor: mfspr r5,chud_970_hrmor std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhior_64) -EXT(chudxnu_mfhior_64): + b mfspr64_success +mfspr64_hior: mfspr r5,chud_970_hior std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mflpidr_64) -EXT(chudxnu_mflpidr_64): + b mfspr64_success +mfspr64_lpidr: mfspr r5,chud_970_lpidr std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mflpcr_64) -EXT(chudxnu_mflpcr_64): + b mfspr64_success +mfspr64_lpcr: mfspr r5,chud_970_lpcr std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfdabrx_64) -EXT(chudxnu_mfdabrx_64): + b mfspr64_success +mfspr64_dabrx: mfspr r5,chud_970_dabrx std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhsprg0_64) -EXT(chudxnu_mfhsprg0_64): + b mfspr64_success +mfspr64_hsprg0: mfspr r5,chud_970_hsprg0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhsprg1_64) -EXT(chudxnu_mfhsprg1_64): + b mfspr64_success +mfspr64_hsprg1: mfspr r5,chud_970_hsprg1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhsrr0_64) -EXT(chudxnu_mfhsrr0_64): + b mfspr64_success +mfspr64_hsrr0: mfspr r5,chud_970_hsrr0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhsrr1_64) -EXT(chudxnu_mfhsrr1_64): + b mfspr64_success +mfspr64_hsrr1: mfspr r5,chud_970_hsrr1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfhdec_64) -EXT(chudxnu_mfhdec_64): + b mfspr64_success +mfspr64_hdec: mfspr r5,chud_970_hdec std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mftrig0_64) -EXT(chudxnu_mftrig0_64): + b mfspr64_success +mfspr64_trig0: mfspr r5,chud_970_trig0 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mftrig1_64) -EXT(chudxnu_mftrig1_64): + b mfspr64_success +mfspr64_trig1: mfspr r5,chud_970_trig1 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mftrig2_64) -EXT(chudxnu_mftrig2_64): + b mfspr64_success +mfspr64_trig2: mfspr r5,chud_970_trig2 std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfaccr_64) -EXT(chudxnu_mfaccr_64): + b mfspr64_success +mfspr64_accr: mfspr r5,chud_ppc64_accr std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfscomc_64) -EXT(chudxnu_mfscomc_64): + b mfspr64_success +mfspr64_scomc: mfspr r5,chud_970_scomc std r5,0(r3) - blr - - .align 5 - .globl EXT(chudxnu_mfscomd_64) -EXT(chudxnu_mfscomd_64): + b mfspr64_success +mfspr64_scomd: mfspr r5,chud_970_scomd std r5,0(r3) + b mfspr64_success + +mfspr64_failure: + li r3,KERN_FAILURE blr - .align 5 - .globl EXT(chudxnu_mtsrr0_64) -EXT(chudxnu_mtsrr0_64): - ld r5,0(r4) - mtspr chud_ppc_srr0,r5 +mfspr64_success: + li r3,KERN_SUCCESS blr - .align 5 - .globl EXT(chudxnu_mtsrr1_64) -EXT(chudxnu_mtsrr1_64): + +/* + * kern_return_t mtspr64(int spr, uint64_t *val); + * + * r3: spr to write to + * r4: address to get value from + * + */ + +; Force a line boundry here + .align 5 + .globl EXT(mtspr64) + +EXT(mtspr64): + ;; generic PPC 64-bit wide SPRs + cmpwi r3,chud_ppc_srr0 + beq mtspr64_srr0 + cmpwi r3,chud_ppc_srr1 + beq mtspr64_srr1 + cmpwi r3,chud_ppc_dar + beq mtspr64_dar + cmpwi r3,chud_ppc_sdr1 + beq mtspr64_sdr1 + cmpwi r3,chud_ppc_sprg0 + beq mtspr64_sprg0 + cmpwi r3,chud_ppc_sprg1 + beq mtspr64_sprg1 + cmpwi r3,chud_ppc_sprg2 + beq mtspr64_sprg2 + cmpwi r3,chud_ppc_sprg3 + beq mtspr64_sprg3 + cmpwi r3,chud_ppc64_asr + beq mtspr64_asr + cmpwi r3,chud_ppc_dabr + beq mtspr64_dabr + + ;; GPUL specific 64-bit wide SPRs + cmpwi r3,chud_970_hid0 + beq mtspr64_hid0 + cmpwi r3,chud_970_hid1 + beq mtspr64_hid1 + cmpwi r3,chud_970_hid4 + beq mtspr64_hid4 + cmpwi r3,chud_970_hid5 + beq mtspr64_hid5 + cmpwi r3,chud_970_mmcr0 + beq mtspr64_mmcr0 + cmpwi r3,chud_970_mmcr1 + beq mtspr64_mmcr1 + cmpwi r3,chud_970_mmcra + beq mtspr64_mmcra + cmpwi r3,chud_970_siar + beq mtspr64_siar + cmpwi r3,chud_970_sdar + beq mtspr64_sdar + cmpwi r3,chud_970_imc + beq mtspr64_imc + cmpwi r3,chud_970_rmor + beq mtspr64_rmor + cmpwi r3,chud_970_hrmor + beq mtspr64_hrmor + cmpwi r3,chud_970_hior + beq mtspr64_hior + cmpwi r3,chud_970_lpidr + beq mtspr64_lpidr + cmpwi r3,chud_970_lpcr + beq mtspr64_lpcr + cmpwi r3,chud_970_dabrx + beq mtspr64_dabrx + cmpwi r3,chud_970_hsprg0 + beq mtspr64_hsprg0 + cmpwi r3,chud_970_hsprg1 + beq mtspr64_hsprg1 + cmpwi r3,chud_970_hsrr0 + beq mtspr64_hsrr0 + cmpwi r3,chud_970_hsrr1 + beq mtspr64_hsrr1 + cmpwi r3,chud_970_hdec + beq mtspr64_hdec + cmpwi r3,chud_970_trig0 + beq mtspr64_trig0 + cmpwi r3,chud_970_trig1 + beq mtspr64_trig1 + cmpwi r3,chud_970_trig2 + beq mtspr64_trig2 + cmpwi r3,chud_ppc64_accr + beq mtspr64_accr + cmpwi r3,chud_970_scomc + beq mtspr64_scomc + cmpwi r3,chud_970_scomd + beq mtspr64_scomd + + b mtspr64_failure + +mtspr64_srr0: + ld r5,0(r4) + mtspr chud_ppc_srr0,r5 + b mtspr64_success +mtspr64_srr1: ld r5,0(r4) mtspr chud_ppc_srr1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtdar_64) -EXT(chudxnu_mtdar_64): + b mtspr64_success +mtspr64_dar: ld r5,0(r4) mtspr chud_ppc_dar,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsdr1_64) -EXT(chudxnu_mtsdr1_64): + b mtspr64_success +mtspr64_sdr1: ld r5,0(r4) mtspr chud_ppc_sdr1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsprg0_64) -EXT(chudxnu_mtsprg0_64): + b mtspr64_success +mtspr64_sprg0: ld r5,0(r4) mtspr chud_ppc_sprg0,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsprg1_64) -EXT(chudxnu_mtsprg1_64): + b mtspr64_success +mtspr64_sprg1: ld r5,0(r4) mtspr chud_ppc_sprg1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsprg2_64) -EXT(chudxnu_mtsprg2_64): + b mtspr64_success +mtspr64_sprg2: ld r5,0(r4) mtspr chud_ppc_sprg2,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsprg3_64) -EXT(chudxnu_mtsprg3_64): + b mtspr64_success +mtspr64_sprg3: ld r5,0(r4) mtspr chud_ppc_sprg3,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtasr_64) -EXT(chudxnu_mtasr_64): + b mtspr64_success +mtspr64_asr: ld r5,0(r4) mtspr chud_ppc64_asr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtdabr_64) -EXT(chudxnu_mtdabr_64): + b mtspr64_success +mtspr64_dabr: ld r5,0(r4) mtspr chud_ppc_dabr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthid0_64) -EXT(chudxnu_mthid0_64): + b mtspr64_success +mtspr64_hid0: ld r5,0(r4) sync mtspr chud_970_hid0,r5 @@ -367,205 +419,170 @@ EXT(chudxnu_mthid0_64): mfspr r5,chud_970_hid0 mfspr r5,chud_970_hid0 mfspr r5,chud_970_hid0 - blr - - .align 5 - .globl EXT(chudxnu_mthid1_64) -EXT(chudxnu_mthid1_64): + b mtspr64_success +mtspr64_hid1: ld r5,0(r4) mtspr chud_970_hid1,r5 /* tell you twice */ mtspr chud_970_hid1,r5 isync - blr - - .align 5 - .globl EXT(chudxnu_mthid4_64) -EXT(chudxnu_mthid4_64): + b mtspr64_success +mtspr64_hid4: ld r5,0(r4) sync /* syncronization requirements */ mtspr chud_970_hid4,r5 isync - blr - - .align 5 - .globl EXT(chudxnu_mthid5_64) -EXT(chudxnu_mthid5_64): + b mtspr64_success +mtspr64_hid5: ld r5,0(r4) mtspr chud_970_hid5,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtmmcr0_64) -EXT(chudxnu_mtmmcr0_64): + b mtspr64_success +mtspr64_mmcr0: ld r5,0(r4) mtspr chud_970_mmcr0,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtmmcr1_64) -EXT(chudxnu_mtmmcr1_64): + b mtspr64_success +mtspr64_mmcr1: ld r5,0(r4) mtspr chud_970_mmcr1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtmmcra_64) -EXT(chudxnu_mtmmcra_64): + b mtspr64_success +mtspr64_mmcra: ld r5,0(r4) mtspr chud_970_mmcra,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsiar_64) -EXT(chudxnu_mtsiar_64): + b mtspr64_success +mtspr64_siar: ld r5,0(r4) mtspr chud_970_siar,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtsdar_64) -EXT(chudxnu_mtsdar_64): + b mtspr64_success +mtspr64_sdar: ld r5,0(r4) mtspr chud_970_sdar,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtimc_64) -EXT(chudxnu_mtimc_64): + b mtspr64_success +mtspr64_imc: ld r5,0(r4) mtspr chud_970_imc,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtrmor_64) -EXT(chudxnu_mtrmor_64): + b mtspr64_success +mtspr64_rmor: ld r5,0(r4) mtspr chud_970_rmor,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthrmor_64) -EXT(chudxnu_mthrmor_64): + b mtspr64_success +mtspr64_hrmor: ld r5,0(r4) mtspr chud_970_hrmor,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthior_64) -EXT(chudxnu_mthior_64): + b mtspr64_success +mtspr64_hior: ld r5,0(r4) mtspr chud_970_hior,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtlpidr_64) -EXT(chudxnu_mtlpidr_64): + b mtspr64_success +mtspr64_lpidr: ld r5,0(r4) mtspr chud_970_lpidr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtlpcr_64) -EXT(chudxnu_mtlpcr_64): + b mtspr64_success +mtspr64_lpcr: ld r5,0(r4) mtspr chud_970_lpcr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtdabrx_64) -EXT(chudxnu_mtdabrx_64): + b mtspr64_success +mtspr64_dabrx: ld r5,0(r4) - mtspr chud_970_lpcr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthsprg0_64) -EXT(chudxnu_mthsprg0_64): + mtspr chud_970_dabrx,r5 + b mtspr64_success +mtspr64_hsprg0: ld r5,0(r4) mtspr chud_970_hsprg0,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthsprg1_64) -EXT(chudxnu_mthsprg1_64): + b mtspr64_success +mtspr64_hsprg1: ld r5,0(r4) mtspr chud_970_hsprg1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthsrr0_64) -EXT(chudxnu_mthsrr0_64): + b mtspr64_success +mtspr64_hsrr0: ld r5,0(r4) mtspr chud_970_hsrr0,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthsrr1_64) -EXT(chudxnu_mthsrr1_64): + b mtspr64_success +mtspr64_hsrr1: ld r5,0(r4) mtspr chud_970_hsrr1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mthdec_64) -EXT(chudxnu_mthdec_64): + b mtspr64_success +mtspr64_hdec: ld r5,0(r4) mtspr chud_970_hdec,r5 - blr - - .align 5 - .globl EXT(chudxnu_mttrig0_64) -EXT(chudxnu_mttrig0_64): + b mtspr64_success +mtspr64_trig0: ld r5,0(r4) mtspr chud_970_trig0,r5 - blr - - .align 5 - .globl EXT(chudxnu_mttrig1_64) -EXT(chudxnu_mttrig1_64): + b mtspr64_success +mtspr64_trig1: ld r5,0(r4) mtspr chud_970_trig1,r5 - blr - - .align 5 - .globl EXT(chudxnu_mttrig2_64) -EXT(chudxnu_mttrig2_64): + b mtspr64_success +mtspr64_trig2: ld r5,0(r4) mtspr chud_970_trig2,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtaccr_64) -EXT(chudxnu_mtaccr_64): + b mtspr64_success +mtspr64_accr: ld r5,0(r4) mtspr chud_ppc64_accr,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtscomc_64) -EXT(chudxnu_mtscomc_64): + b mtspr64_success +mtspr64_scomc: ld r5,0(r4) mtspr chud_970_scomc,r5 - blr - - .align 5 - .globl EXT(chudxnu_mtscomd_64) -EXT(chudxnu_mtscomd_64): + b mtspr64_success +mtspr64_scomd: ld r5,0(r4) mtspr chud_970_scomd,r5 + b mtspr64_success + +mtspr64_failure: + li r3,KERN_FAILURE + blr +mtspr64_success: + li r3,KERN_SUCCESS + blr + + +/* + * kern_return_t mfmsr64(uint64_t *val); + * + * r3: address to store value in + * + */ + +; Force a line boundry here .align 5 - .globl EXT(chudxnu_mfmsr_64) -EXT(chudxnu_mfmsr_64): + .globl EXT(mfmsr64) + +EXT(mfmsr64): mfmsr r5 std r5,0(r3) +mfmsr64_success: + li r3,KERN_SUCCESS + blr + +mfmsr64_failure: + li r3,KERN_FAILURE blr + +/* + * kern_return_t mtmsr64(uint64_t *val); + * + * r3: address to load value from + * + */ + +; Force a line boundry here .align 5 - .globl EXT(chudxnu_mtmsr_64) -EXT(chudxnu_mtmsr_64): + .globl EXT(mtmsr64) + +EXT(mtmsr64): ld r5,0(r3) mtmsrd r5 + b mtmsr64_success + +mtmsr64_success: + li r3,KERN_SUCCESS + blr + +mtmsr64_failure: + li r3,KERN_FAILURE blr .L_end: diff --git a/osfmk/ppc/chud/chud_memory.c b/osfmk/ppc/chud/chud_memory.c index 1cda5156f..5529fe8b1 100644 --- a/osfmk/ppc/chud/chud_memory.c +++ b/osfmk/ppc/chud/chud_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,20 +20,19 @@ * @APPLE_LICENSE_HEADER_END@ */ +#include #include #include __private_extern__ uint64_t chudxnu_avail_memory_size(void) { - extern vm_size_t mem_size; return mem_size; } __private_extern__ uint64_t chudxnu_phys_memory_size(void) { - extern uint64_t mem_actual; return mem_actual; } diff --git a/osfmk/ppc/chud/chud_osfmk_callback.c b/osfmk/ppc/chud/chud_osfmk_callback.c index a5681eda3..e0ccc2012 100644 --- a/osfmk/ppc/chud/chud_osfmk_callback.c +++ b/osfmk/ppc/chud/chud_osfmk_callback.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,52 +24,85 @@ #include #include +#include +#include +#include +#include +#include +#include + #include +#include +#include #include -#include -#include -#include +#include +#include -extern kern_return_t chud_copy_savearea_to_threadstate(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, struct savearea *sv); -extern kern_return_t chud_copy_threadstate_to_savearea(struct savearea *sv, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count); +#include +#include __private_extern__ void chudxnu_cancel_all_callbacks(void) { - extern void chudxnu_exit_callback_cancel(void); - extern void chudxnu_thread_timer_callback_cancel(void); - chudxnu_cpu_timer_callback_cancel_all(); chudxnu_trap_callback_cancel(); chudxnu_interrupt_callback_cancel(); chudxnu_perfmon_ast_callback_cancel(); chudxnu_cpusig_callback_cancel(); chudxnu_kdebug_callback_cancel(); - chudxnu_exit_callback_cancel(); chudxnu_thread_timer_callback_cancel(); + chudxnu_syscall_callback_cancel(); } #pragma mark **** cpu timer **** -static timer_call_data_t cpu_timer_call[NCPUS] = {{0}, {0}}; -static uint64_t t_deadline[NCPUS] = {0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL}; +typedef struct { + timer_call_data_t cpu_timer_call; + uint64_t t_deadline; + chudxnu_cpu_timer_callback_func_t cpu_timer_callback_fn; +} chudcpu_data_t; -typedef void (*chudxnu_cpu_timer_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -static chudxnu_cpu_timer_callback_func_t cpu_timer_callback_fn[NCPUS] = {NULL, NULL}; +static chudcpu_data_t chudcpu_boot_cpu; + +void *chudxnu_per_proc_alloc(boolean_t boot_processor) +{ + chudcpu_data_t *chud_proc_info; + + if (boot_processor) { + chud_proc_info = &chudcpu_boot_cpu; + } else { + chud_proc_info = (chudcpu_data_t *)kalloc(sizeof(chudcpu_data_t)); + if (chud_proc_info == (chudcpu_data_t *)NULL) { + return (void *)NULL; + } + } + bzero((char *)chud_proc_info, sizeof(chudcpu_data_t)); + chud_proc_info->t_deadline = 0xFFFFFFFFFFFFFFFFULL; + return (void *)chud_proc_info; +} + +void chudxnu_per_proc_free(void *per_proc_chud) +{ + if (per_proc_chud == (void *)&chudcpu_boot_cpu) { + return; + } else { + kfree(per_proc_chud,sizeof(chudcpu_data_t)); + } +} static void chudxnu_private_cpu_timer_callback(timer_call_param_t param0, timer_call_param_t param1) { - int cpu; + chudcpu_data_t *chud_proc_info; boolean_t oldlevel; struct ppc_thread_state64 state; mach_msg_type_number_t count; oldlevel = ml_set_interrupts_enabled(FALSE); - cpu = cpu_number(); + chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud); count = PPC_THREAD_STATE64_COUNT; - if(chudxnu_thread_get_state(current_act(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) { - if(cpu_timer_callback_fn[cpu]) { - (cpu_timer_callback_fn[cpu])(PPC_THREAD_STATE64, (thread_state_t)&state, count); + if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) { + if(chud_proc_info->cpu_timer_callback_fn) { + (chud_proc_info->cpu_timer_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); } } @@ -79,19 +112,19 @@ static void chudxnu_private_cpu_timer_callback(timer_call_param_t param0, timer_ __private_extern__ kern_return_t chudxnu_cpu_timer_callback_enter(chudxnu_cpu_timer_callback_func_t func, uint32_t time, uint32_t units) { - int cpu; + chudcpu_data_t *chud_proc_info; boolean_t oldlevel; oldlevel = ml_set_interrupts_enabled(FALSE); - cpu = cpu_number(); + chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud); - timer_call_cancel(&(cpu_timer_call[cpu])); // cancel any existing callback for this cpu + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); // cancel any existing callback for this cpu - cpu_timer_callback_fn[cpu] = func; + chud_proc_info->cpu_timer_callback_fn = func; - clock_interval_to_deadline(time, units, &(t_deadline[cpu])); - timer_call_setup(&(cpu_timer_call[cpu]), chudxnu_private_cpu_timer_callback, NULL); - timer_call_enter(&(cpu_timer_call[cpu]), t_deadline[cpu]); + clock_interval_to_deadline(time, units, &(chud_proc_info->t_deadline)); + timer_call_setup(&(chud_proc_info->cpu_timer_call), chudxnu_private_cpu_timer_callback, NULL); + timer_call_enter(&(chud_proc_info->cpu_timer_call), chud_proc_info->t_deadline); ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; @@ -100,15 +133,15 @@ kern_return_t chudxnu_cpu_timer_callback_enter(chudxnu_cpu_timer_callback_func_t __private_extern__ kern_return_t chudxnu_cpu_timer_callback_cancel(void) { - int cpu; + chudcpu_data_t *chud_proc_info; boolean_t oldlevel; oldlevel = ml_set_interrupts_enabled(FALSE); - cpu = cpu_number(); + chud_proc_info = (chudcpu_data_t *)(getPerProc()->pp_chud); - timer_call_cancel(&(cpu_timer_call[cpu])); - t_deadline[cpu] = t_deadline[cpu] | ~(t_deadline[cpu]); // set to max value - cpu_timer_callback_fn[cpu] = NULL; + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); + chud_proc_info->t_deadline = chud_proc_info->t_deadline | ~(chud_proc_info->t_deadline); // set to max value + chud_proc_info->cpu_timer_callback_fn = NULL; ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; @@ -117,26 +150,24 @@ kern_return_t chudxnu_cpu_timer_callback_cancel(void) __private_extern__ kern_return_t chudxnu_cpu_timer_callback_cancel_all(void) { - int cpu; - - for(cpu=0; cpupp_chud == 0)) + continue; + chud_proc_info = (chudcpu_data_t *)PerProcTable[cpu].ppe_vaddr->pp_chud; + timer_call_cancel(&(chud_proc_info->cpu_timer_call)); + chud_proc_info->t_deadline = chud_proc_info->t_deadline | ~(chud_proc_info->t_deadline); // set to max value + chud_proc_info->cpu_timer_callback_fn = NULL; } return KERN_SUCCESS; } -#pragma mark **** trap and ast **** -typedef kern_return_t (*chudxnu_trap_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +#pragma mark **** trap **** static chudxnu_trap_callback_func_t trap_callback_fn = NULL; -typedef kern_return_t (*perfTrap)(int trapno, struct savearea *ssp, unsigned int dsisr, unsigned int dar); -extern perfTrap perfTrapHook; /* function hook into trap() */ - -typedef void (*chudxnu_perfmon_ast_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); -static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; - #define TRAP_ENTRY_POINT(t) ((t==T_RESET) ? 0x100 : \ (t==T_MACHINE_CHECK) ? 0x200 : \ (t==T_DATA_ACCESS) ? 0x300 : \ @@ -170,20 +201,9 @@ static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; static kern_return_t chudxnu_private_trap_callback(int trapno, struct savearea *ssp, unsigned int dsisr, unsigned int dar) { boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); - int cpu = cpu_number(); - kern_return_t retval = KERN_FAILURE; uint32_t trapentry = TRAP_ENTRY_POINT(trapno); - // ASTs from ihandler go through thandler and are made to look like traps - if(perfmon_ast_callback_fn && (need_ast[cpu] & AST_PPC_CHUD)) { - struct ppc_thread_state64 state; - mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; - chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); - (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); - need_ast[cpu] &= ~(AST_PPC_CHUD); - } - if(trapentry!=0x0) { if(trap_callback_fn) { struct ppc_thread_state64 state; @@ -212,19 +232,69 @@ __private_extern__ kern_return_t chudxnu_trap_callback_cancel(void) { trap_callback_fn = NULL; - if(!perfmon_ast_callback_fn) { perfTrapHook = NULL; - } __asm__ volatile("eieio"); /* force order */ __asm__ volatile("sync"); /* force to memory */ return KERN_SUCCESS; } +#pragma mark **** ast **** +static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; + +static kern_return_t chudxnu_private_chud_ast_callback(int trapno, struct savearea *ssp, unsigned int dsisr, unsigned int dar) +{ + boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + ast_t *myast = ast_pending(); + kern_return_t retval = KERN_FAILURE; + + if(*myast & AST_PPC_CHUD_URGENT) { + *myast &= ~(AST_PPC_CHUD_URGENT | AST_PPC_CHUD); + if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT); + retval = KERN_SUCCESS; + } else if(*myast & AST_PPC_CHUD) { + *myast &= ~(AST_PPC_CHUD); + retval = KERN_SUCCESS; + } + + if(perfmon_ast_callback_fn) { + struct ppc_thread_state64 state; + mach_msg_type_number_t count; + count = PPC_THREAD_STATE64_COUNT; + + if(chudxnu_thread_get_state(current_thread(), PPC_THREAD_STATE64, (thread_state_t)&state, &count, FALSE)==KERN_SUCCESS) { + (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + } + } + +#if 0 + // ASTs from ihandler go through thandler and are made to look like traps + // always handle AST_PPC_CHUD_URGENT if there's a callback + // only handle AST_PPC_CHUD if it's the only AST pending + if(perfmon_ast_callback_fn && ((*myast & AST_PPC_CHUD_URGENT) || ((*myast & AST_PPC_CHUD) && !(*myast & AST_URGENT)))) { + struct ppc_thread_state64 state; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); + if(*myast & AST_PPC_CHUD_URGENT) { + *myast &= ~(AST_PPC_CHUD_URGENT | AST_PPC_CHUD); + if((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT); + retval = KERN_SUCCESS; + } else if(*myast & AST_PPC_CHUD) { + *myast &= ~(AST_PPC_CHUD); + retval = KERN_SUCCESS; + } + (perfmon_ast_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + } +#endif + + ml_set_interrupts_enabled(oldlevel); + return retval; +} + __private_extern__ kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func) { perfmon_ast_callback_fn = func; - perfTrapHook = chudxnu_private_trap_callback; + perfASTHook = chudxnu_private_chud_ast_callback; __asm__ volatile("eieio"); /* force order */ __asm__ volatile("sync"); /* force to memory */ return KERN_SUCCESS; @@ -234,34 +304,37 @@ __private_extern__ kern_return_t chudxnu_perfmon_ast_callback_cancel(void) { perfmon_ast_callback_fn = NULL; - if(!trap_callback_fn) { - perfTrapHook = NULL; - } + perfASTHook = NULL; __asm__ volatile("eieio"); /* force order */ __asm__ volatile("sync"); /* force to memory */ return KERN_SUCCESS; } __private_extern__ -kern_return_t chudxnu_perfmon_ast_send(void) +kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent) { - int cpu; - boolean_t oldlevel; - - oldlevel = ml_set_interrupts_enabled(FALSE); - cpu = cpu_number(); + boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); + ast_t *myast = ast_pending(); - need_ast[cpu] |= (AST_PPC_CHUD | AST_URGENT); + if(urgent) { + *myast |= (AST_PPC_CHUD_URGENT | AST_URGENT); + } else { + *myast |= (AST_PPC_CHUD); + } ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; } +__private_extern__ +kern_return_t chudxnu_perfmon_ast_send(void) +{ + return chudxnu_perfmon_ast_send_urgent(TRUE); +} + #pragma mark **** interrupt **** -typedef kern_return_t (*chudxnu_interrupt_callback_func_t)(uint32_t trapentry, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); static chudxnu_interrupt_callback_func_t interrupt_callback_fn = NULL; - -extern perfTrap perfIntHook; /* function hook into interrupt() */ +//extern perfCallback perfIntHook; /* function hook into interrupt() */ static kern_return_t chudxnu_private_interrupt_callback(int trapno, struct savearea *ssp, unsigned int dsisr, unsigned int dar) { @@ -296,10 +369,8 @@ kern_return_t chudxnu_interrupt_callback_cancel(void) } #pragma mark **** cpu signal **** -typedef kern_return_t (*chudxnu_cpusig_callback_func_t)(int request, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); static chudxnu_cpusig_callback_func_t cpusig_callback_fn = NULL; - -extern perfTrap perfCpuSigHook; /* function hook into cpu_signal_handler() */ +extern perfCallback perfCpuSigHook; /* function hook into cpu_signal_handler() */ static kern_return_t chudxnu_private_cpu_signal_handler(int request, struct savearea *ssp, unsigned int arg0, unsigned int arg1) { @@ -372,11 +443,84 @@ kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request) return retval; } -#pragma mark **** thread timer **** +#pragma mark **** timer **** +__private_extern__ +chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0) +{ + return (chud_timer_t)thread_call_allocate((thread_call_func_t)func, (thread_call_param_t)param0); +} -static thread_call_t thread_timer_call = NULL; +__private_extern__ +kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units) +{ + uint64_t t_delay; + clock_interval_to_deadline(time, units, &t_delay); + thread_call_enter1_delayed((thread_call_t)timer, (thread_call_param_t)param1, t_delay); + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer) +{ + thread_call_cancel((thread_call_t)timer); + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_timer_free(chud_timer_t timer) +{ + thread_call_cancel((thread_call_t)timer); + thread_call_free((thread_call_t)timer); + return KERN_SUCCESS; +} + +#pragma mark **** CHUD syscall (PPC) **** -typedef void (*chudxnu_thread_timer_callback_func_t)(uint32_t arg); +typedef int (*PPCcallEnt)(struct savearea *save); +extern PPCcallEnt PPCcalls[]; + +static chudxnu_syscall_callback_func_t syscall_callback_fn = NULL; + +static int chudxnu_private_syscall_callback(struct savearea *ssp) +{ + if(ssp) { + if(syscall_callback_fn) { + struct ppc_thread_state64 state; + kern_return_t retval; + mach_msg_type_number_t count = PPC_THREAD_STATE64_COUNT; + chudxnu_copy_savearea_to_threadstate(PPC_THREAD_STATE64, (thread_state_t)&state, &count, ssp); + ssp->save_r3 = (syscall_callback_fn)(PPC_THREAD_STATE64, (thread_state_t)&state, count); + } else { + ssp->save_r3 = KERN_FAILURE; + } + } + + return 1; // check for ASTs (always) +} + +__private_extern__ +kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func) +{ + syscall_callback_fn = func; + PPCcalls[9] = chudxnu_private_syscall_callback; + __asm__ volatile("eieio"); /* force order */ + __asm__ volatile("sync"); /* force to memory */ + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_syscall_callback_cancel(void) +{ + syscall_callback_fn = NULL; + PPCcalls[9] = NULL; + __asm__ volatile("eieio"); /* force order */ + __asm__ volatile("sync"); /* force to memory */ + return KERN_SUCCESS; +} + +#pragma mark **** thread timer - DEPRECATED **** + +static thread_call_t thread_timer_call = NULL; static chudxnu_thread_timer_callback_func_t thread_timer_callback_fn = NULL; static void chudxnu_private_thread_timer_callback(thread_call_param_t param0, thread_call_param_t param1) @@ -391,13 +535,14 @@ static void chudxnu_private_thread_timer_callback(thread_call_param_t param0, th } } +// DEPRECATED __private_extern__ -kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t arg, uint32_t time, uint32_t units) +kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t param, uint32_t time, uint32_t units) { if(!thread_timer_call) { uint64_t t_delay; thread_timer_callback_fn = func; - thread_timer_call = thread_call_allocate((thread_call_func_t)chudxnu_private_thread_timer_callback, (thread_call_param_t)arg); + thread_timer_call = thread_call_allocate((thread_call_func_t)chudxnu_private_thread_timer_callback, (thread_call_param_t)param); clock_interval_to_deadline(time, units, &t_delay); thread_call_enter_delayed(thread_timer_call, t_delay); return KERN_SUCCESS; @@ -406,10 +551,12 @@ kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_ } } +// DEPRECATED __private_extern__ kern_return_t chudxnu_thread_timer_callback_cancel(void) { if(thread_timer_call) { + thread_call_cancel(thread_timer_call); thread_call_free(thread_timer_call); thread_timer_call = NULL; } diff --git a/osfmk/ppc/chud/chud_spr.h b/osfmk/ppc/chud/chud_spr.h index 489036c1d..f268c15e3 100644 --- a/osfmk/ppc/chud/chud_spr.h +++ b/osfmk/ppc/chud/chud_spr.h @@ -264,3 +264,4 @@ #define chud_970_pir 1023 #endif // _CHUD_SPR_H_ + diff --git a/osfmk/ppc/chud/chud_thread.c b/osfmk/ppc/chud/chud_thread.c index 3a886a158..b0fe7a94d 100644 --- a/osfmk/ppc/chud/chud_thread.c +++ b/osfmk/ppc/chud/chud_thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -20,20 +20,43 @@ * @APPLE_LICENSE_HEADER_END@ */ -#include +#include +#include +#include + +#include #include #include -#include #include + +#include +#include + +#include +#include + +#include #include #include +#include + +// forward declarations +extern kern_return_t machine_thread_get_kern_state( thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count); + + +#pragma mark **** thread binding **** __private_extern__ -kern_return_t chudxnu_bind_current_thread(int cpu) +kern_return_t chudxnu_bind_thread(thread_t thread, int cpu) { if(cpu>=0 && cpumact.pcb; // take the top savearea (user or kernel) -} - -static savearea *chudxnu_private_get_user_regs(void) -{ - return find_user_regs(current_act()); // take the top user savearea (skip any kernel saveareas) -} - -static savearea_fpu *chudxnu_private_get_fp_regs(void) -{ - fpu_save(current_act()->mact.curctx); // just in case it's live, save it - return current_act()->mact.curctx->FPUsave; // take the top savearea (user or kernel) -} - -static savearea_fpu *chudxnu_private_get_user_fp_regs(void) -{ - return find_user_fpu(current_act()); // take the top user savearea (skip any kernel saveareas) -} - -static savearea_vec *chudxnu_private_get_vec_regs(void) -{ - vec_save(current_act()->mact.curctx); // just in case it's live, save it - return current_act()->mact.curctx->VMXsave; // take the top savearea (user or kernel) -} - -static savearea_vec *chudxnu_private_get_user_vec_regs(void) -{ - return find_user_vec(current_act()); // take the top user savearea (skip any kernel saveareas) -} +#pragma mark **** thread state **** __private_extern__ kern_return_t chudxnu_copy_savearea_to_threadstate(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, struct savearea *sv) @@ -250,8 +243,6 @@ kern_return_t chudxnu_copy_threadstate_to_savearea(struct savearea *sv, thread_f sv->save_srr1 = (uint64_t)ts->srr1; sv->save_vrsave = ts->vrsave; return KERN_SUCCESS; - } else { - return KERN_FAILURE; } break; case PPC_THREAD_STATE64: @@ -300,104 +291,202 @@ kern_return_t chudxnu_copy_threadstate_to_savearea(struct savearea *sv, thread_f sv->save_srr1 = xts->srr1; sv->save_vrsave = xts->vrsave; return KERN_SUCCESS; - } else { - return KERN_FAILURE; } } + return KERN_FAILURE; } __private_extern__ -kern_return_t chudxnu_thread_get_state(thread_act_t thr_act, - thread_flavor_t flavor, +kern_return_t chudxnu_thread_user_state_available(thread_t thread) +{ + if(find_user_regs(thread)) { + return KERN_SUCCESS; + } else { + return KERN_FAILURE; + } +} + +__private_extern__ +kern_return_t chudxnu_thread_get_state(thread_t thread, + thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, boolean_t user_only) { - if(thr_act==current_act()) { - if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { - struct savearea *sv; - if(user_only) { - sv = chudxnu_private_get_user_regs(); - } else { - sv = chudxnu_private_get_regs(); - } - return chudxnu_copy_savearea_to_threadstate(flavor, tstate, count, sv); - } else if(flavor==PPC_FLOAT_STATE && user_only) { -#warning chudxnu_thread_get_state() does not yet support supervisor FP - return machine_thread_get_state(current_act(), flavor, tstate, count); - } else if(flavor==PPC_VECTOR_STATE && user_only) { -#warning chudxnu_thread_get_state() does not yet support supervisor VMX - return machine_thread_get_state(current_act(), flavor, tstate, count); + if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { // machine_thread_get_state filters out some bits + struct savearea *sv; + if(user_only) { + sv = find_user_regs(thread); } else { - *count = 0; - return KERN_INVALID_ARGUMENT; + sv = find_kern_regs(thread); } - } else { - return machine_thread_get_state(thr_act, flavor, tstate, count); - } + return chudxnu_copy_savearea_to_threadstate(flavor, tstate, count, sv); + } else { + if(user_only) { + return machine_thread_get_state(thread, flavor, tstate, count); + } else { + // doesn't do FP or VMX + return machine_thread_get_kern_state(thread, flavor, tstate, count); + } + } } __private_extern__ -kern_return_t chudxnu_thread_set_state(thread_act_t thr_act, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t count, - boolean_t user_only) +kern_return_t chudxnu_thread_set_state(thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t count, + boolean_t user_only) { - if(thr_act==current_act()) { - if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { - struct savearea *sv; - if(user_only) { - sv = chudxnu_private_get_user_regs(); - } else { - sv = chudxnu_private_get_regs(); - } - return chudxnu_copy_threadstate_to_savearea(sv, flavor, tstate, &count); - } else if(flavor==PPC_FLOAT_STATE && user_only) { -#warning chudxnu_thread_set_state() does not yet support supervisor FP - return machine_thread_set_state(current_act(), flavor, tstate, count); - } else if(flavor==PPC_VECTOR_STATE && user_only) { -#warning chudxnu_thread_set_state() does not yet support supervisor VMX - return machine_thread_set_state(current_act(), flavor, tstate, count); + if(flavor==PPC_THREAD_STATE || flavor==PPC_THREAD_STATE64) { // machine_thread_set_state filters out some bits + struct savearea *sv; + if(user_only) { + sv = find_user_regs(thread); } else { - return KERN_INVALID_ARGUMENT; + sv = find_kern_regs(thread); } - } else { - return machine_thread_set_state(thr_act, flavor, tstate, count); - } + return chudxnu_copy_threadstate_to_savearea(sv, flavor, tstate, &count); + } else { + return machine_thread_set_state(thread, flavor, tstate, count); // always user + } } -static inline kern_return_t chudxnu_private_task_read_bytes(task_t task, vm_offset_t addr, int size, void *data) -{ +#pragma mark **** task memory read/write **** - kern_return_t ret; +__private_extern__ +kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size) +{ + kern_return_t ret = KERN_SUCCESS; - if(task==kernel_task) { - if(size==sizeof(unsigned int)) { - addr64_t phys_addr; - ppnum_t pp; + if(!chudxnu_is_64bit_task(task)) { // clear any cruft out of upper 32-bits for 32-bit tasks + usraddr &= 0x00000000FFFFFFFFULL; + } - pp = pmap_find_phys(kernel_pmap, addr); /* Get the page number */ - if(!pp) return KERN_FAILURE; /* Not mapped... */ - - phys_addr = ((addr64_t)pp << 12) | (addr & 0x0000000000000FFFULL); /* Shove in the page offset */ + if(current_task()==task) { + thread_t cur_thr = current_thread(); + vm_offset_t recover_handler = cur_thr->recover; + + if(ml_at_interrupt_context()) { + return KERN_FAILURE; // can't do copyin on interrupt stack + } + + if(copyin(usraddr, kernaddr, size)) { + ret = KERN_FAILURE; + } + cur_thr->recover = recover_handler; + } else { + vm_map_t map = get_task_map(task); + ret = vm_map_read_user(map, usraddr, kernaddr, size); + } + + return ret; +} - if(phys_addr < mem_actual) { /* Sanity check: is it in memory? */ - *((uint32_t *)data) = ml_phys_read_64(phys_addr); - return KERN_SUCCESS; - } - } else { - return KERN_FAILURE; - } +__private_extern__ +kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size) +{ + kern_return_t ret = KERN_SUCCESS; + + if(!chudxnu_is_64bit_task(task)) { // clear any cruft out of upper 32-bits for 32-bit tasks + useraddr &= 0x00000000FFFFFFFFULL; + } + + if(current_task()==task) { + thread_t cur_thr = current_thread(); + vm_offset_t recover_handler = cur_thr->recover; + + if(ml_at_interrupt_context()) { + return KERN_FAILURE; // can't do copyout on interrupt stack + } + + if(copyout(kernaddr, useraddr, size)) { + ret = KERN_FAILURE; + } + cur_thr->recover = recover_handler; } else { - - ret = KERN_SUCCESS; /* Assume everything worked */ - if(copyin((void *)addr, data, size)) ret = KERN_FAILURE; /* Get memory, if non-zero rc, it didn't work */ - return ret; + vm_map_t map = get_task_map(task); + ret = vm_map_write_user(map, kernaddr, useraddr, size); + } + + return ret; +} + +__private_extern__ +kern_return_t chudxnu_kern_read(void *dstaddr, vm_offset_t srcaddr, vm_size_t size) +{ + while(size>0) { + ppnum_t pp; + addr64_t phys_addr; + + pp = pmap_find_phys(kernel_pmap, srcaddr); /* Get the page number */ + if(!pp) { + return KERN_FAILURE; /* Not mapped... */ + } + + phys_addr = ((addr64_t)pp << 12) | (srcaddr & 0x0000000000000FFFULL); /* Shove in the page offset */ + if(phys_addr >= mem_actual) { + return KERN_FAILURE; /* out of range */ + } + + if((phys_addr&0x1) || size==1) { + *((uint8_t *)dstaddr) = ml_phys_read_byte_64(phys_addr); + ((uint8_t *)dstaddr)++; + srcaddr += sizeof(uint8_t); + size -= sizeof(uint8_t); + } else if((phys_addr&0x3) || size<=2) { + *((uint16_t *)dstaddr) = ml_phys_read_half_64(phys_addr); + ((uint16_t *)dstaddr)++; + srcaddr += sizeof(uint16_t); + size -= sizeof(uint16_t); + } else { + *((uint32_t *)dstaddr) = ml_phys_read_word_64(phys_addr); + ((uint32_t *)dstaddr)++; + srcaddr += sizeof(uint32_t); + size -= sizeof(uint32_t); + } + } + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_kern_write(vm_offset_t dstaddr, void *srcaddr, vm_size_t size) +{ + while(size>0) { + ppnum_t pp; + addr64_t phys_addr; + + pp = pmap_find_phys(kernel_pmap, dstaddr); /* Get the page number */ + if(!pp) { + return KERN_FAILURE; /* Not mapped... */ + } + + phys_addr = ((addr64_t)pp << 12) | (dstaddr & 0x0000000000000FFFULL); /* Shove in the page offset */ + if(phys_addr >= mem_actual) { + return KERN_FAILURE; /* out of range */ + } + + if((phys_addr&0x1) || size==1) { + ml_phys_write_byte_64(phys_addr, *((uint8_t *)srcaddr)); + ((uint8_t *)srcaddr)++; + dstaddr += sizeof(uint8_t); + size -= sizeof(uint8_t); + } else if((phys_addr&0x3) || size<=2) { + ml_phys_write_half_64(phys_addr, *((uint16_t *)srcaddr)); + ((uint16_t *)srcaddr)++; + dstaddr += sizeof(uint16_t); + size -= sizeof(uint16_t); + } else { + ml_phys_write_word_64(phys_addr, *((uint32_t *)srcaddr)); + ((uint32_t *)srcaddr)++; + dstaddr += sizeof(uint32_t); + size -= sizeof(uint32_t); + } } + + return KERN_SUCCESS; } -// chudxnu_current_thread_get_callstack gathers a raw callstack along with any information needed to +// chudxnu_thread_get_callstack gathers a raw callstack along with any information needed to // fix it up later (in case we stopped program as it was saving values into prev stack frame, etc.) // after sampling has finished. // @@ -421,30 +510,35 @@ static inline kern_return_t chudxnu_private_task_read_bytes(task_t task, vm_offs #define SUPERVISOR_MODE(msr) ((msr) & MASK(MSR_PR) ? FALSE : TRUE) #endif -#define VALID_STACK_ADDRESS(addr) (addr>=0x1000 && (addr&STACK_ALIGNMENT_MASK)==0x0 && (supervisor ? (addr>=kernStackMin && addr<=kernStackMax) : TRUE)) +#define VALID_STACK_ADDRESS(addr) (addr>=0x1000ULL && (addr&STACK_ALIGNMENT_MASK)==0x0 && (supervisor ? (addr>=kernStackMin && addr<=kernStackMax) : TRUE)) + __private_extern__ -kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only) +kern_return_t chudxnu_thread_get_callstack64( thread_t thread, + uint64_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only) { kern_return_t kr; - vm_address_t nextFramePointer = 0; - vm_address_t currPC, currLR, currR0; - vm_address_t framePointer; - vm_address_t prevPC = 0; - vm_address_t kernStackMin = min_valid_stack_address(); - vm_address_t kernStackMax = max_valid_stack_address(); - unsigned int *buffer = callStack; + task_t task = get_threadtask(thread); + uint64_t nextFramePointer = 0; + uint64_t currPC, currLR, currR0; + uint64_t framePointer; + uint64_t prevPC = 0; + uint64_t kernStackMin = min_valid_stack_address(); + uint64_t kernStackMax = max_valid_stack_address(); + uint64_t *buffer = callStack; + uint32_t tmpWord; int bufferIndex = 0; int bufferMaxIndex = *count; boolean_t supervisor; + boolean_t is64Bit; struct savearea *sv; if(user_only) { - sv = chudxnu_private_get_user_regs(); + sv = find_user_regs(thread); } else { - sv = chudxnu_private_get_regs(); + sv = find_kern_regs(thread); } if(!sv) { @@ -453,12 +547,156 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, } supervisor = SUPERVISOR_MODE(sv->save_srr1); + if(supervisor) { +#warning assuming kernel task is always 32-bit + is64Bit = FALSE; + } else { + is64Bit = chudxnu_is_64bit_task(task); + } - if(!supervisor && ml_at_interrupt_context()) { // can't do copyin() if on interrupt stack + bufferMaxIndex = bufferMaxIndex - 2; // allot space for saving the LR and R0 on the stack at the end. + if(bufferMaxIndex<2) { + *count = 0; + return KERN_RESOURCE_SHORTAGE; + } + + currPC = sv->save_srr0; + framePointer = sv->save_r1; /* r1 is the stack pointer (no FP on PPC) */ + currLR = sv->save_lr; + currR0 = sv->save_r0; + + bufferIndex = 0; // start with a stack of size zero + buffer[bufferIndex++] = currPC; // save PC in position 0. + + // Now, fill buffer with stack backtraces. + while(bufferIndex SP + // Here, we'll get the lr from the stack. + uint64_t fp_link; + + if(is64Bit) { + fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint64_t); + } else { + fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint32_t); + } + + // Note that we read the pc even for the first stack frame (which, in theory, + // is always empty because the callee fills it in just before it lowers the + // stack. However, if we catch the program in between filling in the return + // address and lowering the stack, we want to still have a valid backtrace. + // FixupStack correctly disregards this value if necessary. + + if(supervisor) { + if(is64Bit) { + kr = chudxnu_kern_read(&pc, fp_link, sizeof(uint64_t)); + } else { + kr = chudxnu_kern_read(&tmpWord, fp_link, sizeof(uint32_t)); + pc = tmpWord; + } + } else { + if(is64Bit) { + kr = chudxnu_task_read(task, &pc, fp_link, sizeof(uint64_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, fp_link, sizeof(uint32_t)); + pc = tmpWord; + } + } + if(kr!=KERN_SUCCESS) { + pc = 0; + break; + } + + // retrieve the contents of the frame pointer and advance to the next stack frame if it's valid + if(supervisor) { + if(is64Bit) { + kr = chudxnu_kern_read(&nextFramePointer, framePointer, sizeof(uint64_t)); + } else { + kr = chudxnu_kern_read(&tmpWord, framePointer, sizeof(uint32_t)); + nextFramePointer = tmpWord; + } + } else { + if(is64Bit) { + kr = chudxnu_task_read(task, &nextFramePointer, framePointer, sizeof(uint64_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, framePointer, sizeof(uint32_t)); + nextFramePointer = tmpWord; + } + } + if(kr!=KERN_SUCCESS) { + nextFramePointer = 0; + } + + if(nextFramePointer) { + buffer[bufferIndex++] = pc; + prevPC = pc; + } + + if(nextFramePointer=bufferMaxIndex) { + *count = 0; + return KERN_RESOURCE_SHORTAGE; + } + + // Save link register and R0 at bottom of stack (used for later fixup). + buffer[bufferIndex++] = currLR; + buffer[bufferIndex++] = currR0; + + *count = bufferIndex; + return KERN_SUCCESS; +} + +__private_extern__ +kern_return_t chudxnu_thread_get_callstack( thread_t thread, + uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + kern_return_t kr; + task_t task = get_threadtask(thread); + uint64_t nextFramePointer = 0; + uint64_t currPC, currLR, currR0; + uint64_t framePointer; + uint64_t prevPC = 0; + uint64_t kernStackMin = min_valid_stack_address(); + uint64_t kernStackMax = max_valid_stack_address(); + uint32_t *buffer = callStack; + uint32_t tmpWord; + int bufferIndex = 0; + int bufferMaxIndex = *count; + boolean_t supervisor; + boolean_t is64Bit; + struct savearea *sv; + + if(user_only) { + sv = find_user_regs(thread); + } else { + sv = find_kern_regs(thread); + } + + if(!sv) { *count = 0; return KERN_FAILURE; } + supervisor = SUPERVISOR_MODE(sv->save_srr1); + if(supervisor) { +#warning assuming kernel task is always 32-bit + is64Bit = FALSE; + } else { + is64Bit = chudxnu_is_64bit_task(task); + } + bufferMaxIndex = bufferMaxIndex - 2; // allot space for saving the LR and R0 on the stack at the end. if(bufferMaxIndex<2) { *count = 0; @@ -475,14 +713,20 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, // Now, fill buffer with stack backtraces. while(bufferIndex SP // Here, we'll get the lr from the stack. - volatile vm_address_t fp_link = (vm_address_t)(((unsigned *)framePointer)+FP_LINK_OFFSET); + uint64_t fp_link; + + if(is64Bit) { + fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint64_t); + } else { + fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint32_t); + } // Note that we read the pc even for the first stack frame (which, in theory, // is always empty because the callee fills it in just before it lowers the @@ -491,22 +735,40 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, // FixupStack correctly disregards this value if necessary. if(supervisor) { - kr = chudxnu_private_task_read_bytes(kernel_task, fp_link, sizeof(unsigned int), &pc); + if(is64Bit) { + kr = chudxnu_kern_read(&pc, fp_link, sizeof(uint64_t)); + } else { + kr = chudxnu_kern_read(&tmpWord, fp_link, sizeof(uint32_t)); + pc = tmpWord; + } } else { - kr = chudxnu_private_task_read_bytes(current_task(), fp_link, sizeof(unsigned int), &pc); + if(is64Bit) { + kr = chudxnu_task_read(task, &pc, fp_link, sizeof(uint64_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, fp_link, sizeof(uint32_t)); + pc = tmpWord; + } } if(kr!=KERN_SUCCESS) { - // IOLog("task_read_callstack: unable to read framePointer: %08x\n",framePointer); pc = 0; break; } // retrieve the contents of the frame pointer and advance to the next stack frame if it's valid - if(supervisor) { - kr = chudxnu_private_task_read_bytes(kernel_task, framePointer, sizeof(unsigned int), &nextFramePointer); + if(is64Bit) { + kr = chudxnu_kern_read(&nextFramePointer, framePointer, sizeof(uint64_t)); + } else { + kr = chudxnu_kern_read(&tmpWord, framePointer, sizeof(uint32_t)); + nextFramePointer = tmpWord; + } } else { - kr = chudxnu_private_task_read_bytes(current_task(), framePointer, sizeof(unsigned int), &nextFramePointer); + if(is64Bit) { + kr = chudxnu_task_read(task, &nextFramePointer, framePointer, sizeof(uint64_t)); + } else { + kr = chudxnu_task_read(task, &tmpWord, framePointer, sizeof(uint32_t)); + nextFramePointer = tmpWord; + } } if(kr!=KERN_SUCCESS) { nextFramePointer = 0; @@ -520,8 +782,8 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, if(nextFramePointer=bufferMaxIndex) { @@ -529,9 +791,7 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, return KERN_RESOURCE_SHORTAGE; } - // Save link register and R0 at bottom of stack. This means that we won't worry - // about these values messing up stack compression. These end up being used - // by FixupStack. + // Save link register and R0 at bottom of stack (used for later fixup). buffer[bufferIndex++] = currLR; buffer[bufferIndex++] = currR0; @@ -539,31 +799,326 @@ kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, return KERN_SUCCESS; } +#pragma mark **** task and thread info **** + __private_extern__ -int chudxnu_task_threads(task_t task, - thread_act_array_t *thr_act_list, - mach_msg_type_number_t *count) +boolean_t chudxnu_is_64bit_task(task_t task) { - mach_msg_type_number_t task_thread_count = 0; - kern_return_t kr; + return (task_has_64BitAddr(task)); +} + +#define THING_TASK 0 +#define THING_THREAD 1 + +// an exact copy of processor_set_things() except no mig conversion at the end! +static kern_return_t chudxnu_private_processor_set_things( processor_set_t pset, + mach_port_t **thing_list, + mach_msg_type_number_t *count, + int type) +{ + unsigned int actual; /* this many things */ + unsigned int maxthings; + unsigned int i; + + vm_size_t size, size_needed; + void *addr; - kr = task_threads(current_task(), thr_act_list, count); - if(kr==KERN_SUCCESS) { - thread_act_t thr_act; - int i, state_count; - for(i=0; i<(*count); i++) { - thr_act = convert_port_to_act(((ipc_port_t *)(*thr_act_list))[i]); - /* undo the mig conversion task_threads does */ - thr_act_list[i] = thr_act; + if (pset == PROCESSOR_SET_NULL) + return (KERN_INVALID_ARGUMENT); + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + + return (KERN_FAILURE); } - } - return kr; + + if (type == THING_TASK) + maxthings = pset->task_count; + else + maxthings = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = maxthings * sizeof (mach_port_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return (KERN_RESOURCE_SHORTAGE); + } + + /* OK, have memory and the processor_set is locked & active */ + + actual = 0; + switch (type) { + + case THING_TASK: + { + task_t task, *tasks = (task_t *)addr; + + for (task = (task_t)queue_first(&pset->tasks); + !queue_end(&pset->tasks, (queue_entry_t)task); + task = (task_t)queue_next(&task->pset_tasks)) { + task_reference_internal(task); + tasks[actual++] = task; + } + + break; + } + + case THING_THREAD: + { + thread_t thread, *threads = (thread_t *)addr; + + for (i = 0, thread = (thread_t)queue_first(&pset->threads); + !queue_end(&pset->threads, (queue_entry_t)thread); + thread = (thread_t)queue_next(&thread->pset_threads)) { + thread_reference_internal(thread); + threads[actual++] = thread; + } + + break; + } + } + + pset_unlock(pset); + + if (actual < maxthings) + size_needed = actual * sizeof (mach_port_t); + + if (actual == 0) { + /* no things, so return null pointer and deallocate memory */ + *thing_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } + else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + void *newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + switch (type) { + + case THING_TASK: + { + task_t *tasks = (task_t *)addr; + + for (i = 0; i < actual; i++) + task_deallocate(tasks[i]); + break; + } + + case THING_THREAD: + { + thread_t *threads = (thread_t *)addr; + + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + break; + } + } + + kfree(addr, size); + return (KERN_RESOURCE_SHORTAGE); + } + + bcopy((void *) addr, (void *) newaddr, size_needed); + kfree(addr, size); + addr = newaddr; + } + + *thing_list = (mach_port_t *)addr; + *count = actual; + } + + return (KERN_SUCCESS); +} + +// an exact copy of task_threads() except no mig conversion at the end! +static kern_return_t chudxnu_private_task_threads(task_t task, + thread_act_array_t *threads_out, + mach_msg_type_number_t *count) +{ + mach_msg_type_number_t actual; + thread_t *threads; + thread_t thread; + vm_size_t size, size_needed; + void *addr; + unsigned int i, j; + + if (task == TASK_NULL) + return (KERN_INVALID_ARGUMENT); + + size = 0; addr = 0; + + for (;;) { + task_lock(task); + if (!task->active) { + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + return (KERN_FAILURE); + } + + actual = task->thread_count; + + /* do we have the memory we need? */ + size_needed = actual * sizeof (mach_port_t); + if (size_needed <= size) + break; + + /* unlock the task and allocate more memory */ + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return (KERN_RESOURCE_SHORTAGE); + } + + /* OK, have memory and the task is locked & active */ + threads = (thread_t *)addr; + + i = j = 0; + + for (thread = (thread_t)queue_first(&task->threads); i < actual; + ++i, thread = (thread_t)queue_next(&thread->task_threads)) { + thread_reference_internal(thread); + threads[j++] = thread; + } + + assert(queue_end(&task->threads, (queue_entry_t)thread)); + + actual = j; + size_needed = actual * sizeof (mach_port_t); + + /* can unlock task now that we've got the thread refs */ + task_unlock(task); + + if (actual == 0) { + /* no threads, so return null pointer and deallocate memory */ + + *threads_out = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } + else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + void *newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; ++i) + thread_deallocate(threads[i]); + kfree(addr, size); + return (KERN_RESOURCE_SHORTAGE); + } + + bcopy(addr, newaddr, size_needed); + kfree(addr, size); + threads = (thread_t *)newaddr; + } + + *threads_out = threads; + *count = actual; + } + + return (KERN_SUCCESS); +} + + +__private_extern__ +kern_return_t chudxnu_all_tasks(task_array_t *task_list, + mach_msg_type_number_t *count) +{ + return chudxnu_private_processor_set_things(&default_pset, (mach_port_t **)task_list, count, THING_TASK); +} + +__private_extern__ +kern_return_t chudxnu_free_task_list(task_array_t *task_list, + mach_msg_type_number_t *count) +{ + vm_size_t size = (*count)*sizeof(mach_port_t); + void *addr = *task_list; + + if(addr) { + int i, maxCount = *count; + for(i=0; ilast_switch; + return KERN_SUCCESS; +} + +#pragma mark **** DEPRECATED **** + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_bind_current_thread(int cpu) +{ + return chudxnu_bind_thread(current_thread(), cpu); +} + +// DEPRECATED +kern_return_t chudxnu_unbind_current_thread(void) +{ + return chudxnu_unbind_thread(current_thread()); +} + +// DEPRECATED +__private_extern__ +kern_return_t chudxnu_current_thread_get_callstack( uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + return chudxnu_thread_get_callstack(current_thread(), callStack, count, user_only); +} + +// DEPRECATED +__private_extern__ +thread_t chudxnu_current_act(void) +{ + return chudxnu_current_thread(); } diff --git a/osfmk/ppc/chud/chud_xnu.h b/osfmk/ppc/chud/chud_xnu.h index 2cee3509a..ace3b7969 100644 --- a/osfmk/ppc/chud/chud_xnu.h +++ b/osfmk/ppc/chud/chud_xnu.h @@ -28,51 +28,83 @@ #include #include -#pragma mark **** process **** +#pragma mark **** version **** +uint32_t chudxnu_version(void); + +#pragma mark **** task **** // ******************************************************************************** -// process +// task // ******************************************************************************** int chudxnu_pid_for_task(task_t task); task_t chudxnu_task_for_pid(int pid); int chudxnu_current_pid(void); +kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size); +kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size); +kern_return_t chudxnu_kern_read(void *destaddr, vm_offset_t srcaddr, vm_size_t size); +kern_return_t chudxnu_kern_write(vm_offset_t destaddr, void *srcaddr, vm_size_t size); + +boolean_t chudxnu_is_64bit_task(task_t task); + #pragma mark **** thread **** // ******************************************************************************** // thread // ******************************************************************************** -kern_return_t chudxnu_bind_current_thread(int cpu); +kern_return_t chudxnu_bind_thread(thread_t thread, int cpu); -kern_return_t chudxnu_unbind_current_thread(void); +kern_return_t chudxnu_unbind_thread(thread_t thread); -kern_return_t chudxnu_thread_get_state(thread_act_t thr_act, +kern_return_t chudxnu_thread_get_state( thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, boolean_t user_only); -kern_return_t chudxnu_thread_set_state(thread_act_t thr_act, +kern_return_t chudxnu_thread_set_state( thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count, boolean_t user_only); -kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, +kern_return_t chudxnu_thread_user_state_available(thread_t thread); + + +kern_return_t chudxnu_thread_get_callstack( thread_t thread, + uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only); + +kern_return_t chudxnu_thread_get_callstack64(thread_t thread, + uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); task_t chudxnu_current_task(void); - -thread_act_t chudxnu_current_act(void); - -int chudxnu_task_threads(task_t task, - thread_act_array_t *thr_act_list, +thread_t chudxnu_current_thread(void); + +task_t chudxnu_task_for_thread(thread_t thread); + +kern_return_t chudxnu_all_tasks(task_array_t *task_list, + mach_msg_type_number_t *count); +kern_return_t chudxnu_free_task_list(task_array_t *task_list, + mach_msg_type_number_t *count); + +kern_return_t chudxnu_all_threads( thread_array_t *thread_list, + mach_msg_type_number_t *count); +kern_return_t chudxnu_task_threads( task_t task, + thread_array_t *thread_list, + mach_msg_type_number_t *count); +kern_return_t chudxnu_free_thread_list(thread_array_t *thread_list, mach_msg_type_number_t *count); -kern_return_t chudxnu_thread_info(thread_act_t thr_act, +kern_return_t chudxnu_thread_info( thread_t thread, thread_flavor_t flavor, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count); +kern_return_t chudxnu_thread_last_context_switch(thread_t thread, uint64_t *timestamp); + + #pragma mark **** memory **** // ******************************************************************************** // memory @@ -109,6 +141,11 @@ kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val); uint32_t chudxnu_get_orig_cpu_l2cr(int cpu); uint32_t chudxnu_get_orig_cpu_l3cr(int cpu); +kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p); +kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p); +kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val); +kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val); + void chudxnu_flush_caches(void); void chudxnu_enable_caches(boolean_t enable); @@ -147,6 +184,9 @@ kern_return_t chudxnu_clear_cpu_rupt_counters(int cpu); kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable); +kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data); +kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data); + #pragma mark **** callbacks **** // ******************************************************************************** // callbacks @@ -175,6 +215,7 @@ typedef kern_return_t (*chudxnu_perfmon_ast_callback_func_t)(thread_flavor_t fla kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func); kern_return_t chudxnu_perfmon_ast_callback_cancel(void); kern_return_t chudxnu_perfmon_ast_send(void); +kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent); // cpusig callback - one callback for system typedef kern_return_t (*chudxnu_cpusig_callback_func_t)(int request, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); @@ -187,15 +228,35 @@ typedef kern_return_t (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32 kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func); kern_return_t chudxnu_kdebug_callback_cancel(void); -// task exit callback - one callback for system -typedef kern_return_t (*chudxnu_exit_callback_func_t)(int pid); -kern_return_t chudxnu_exit_callback_enter(chudxnu_exit_callback_func_t func); -kern_return_t chudxnu_exit_callback_cancel(void); +// timer callback - multiple callbacks +typedef kern_return_t (*chudxnu_timer_callback_func_t)(uint32_t param0, uint32_t param1); +typedef void * chud_timer_t; +chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0); +kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units); +kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer); +kern_return_t chudxnu_timer_free(chud_timer_t timer); + +// CHUD systemcall callback - one callback for system +typedef kern_return_t (*chudxnu_syscall_callback_func_t)(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count); +kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); +kern_return_t chudxnu_syscall_callback_cancel(void); + +// ******************************************************************************** +// DEPRECATED +// ******************************************************************************** +kern_return_t chudxnu_bind_current_thread(int cpu); + +kern_return_t chudxnu_unbind_current_thread(void); + +kern_return_t chudxnu_current_thread_get_callstack(uint32_t *callStack, + mach_msg_type_number_t *count, + boolean_t user_only); + +thread_t chudxnu_current_act(void); // thread timer callback - one callback for system -typedef kern_return_t (*chudxnu_thread_timer_callback_func_t)(uint32_t arg); -kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t arg, uint32_t time, uint32_t units); +typedef kern_return_t (*chudxnu_thread_timer_callback_func_t)(uint32_t param); +kern_return_t chudxnu_thread_timer_callback_enter(chudxnu_thread_timer_callback_func_t func, uint32_t param, uint32_t time, uint32_t units); kern_return_t chudxnu_thread_timer_callback_cancel(void); - #endif /* _PPC_CHUD_XNU_H_ */ diff --git a/osfmk/ppc/chud/chud_xnu_private.h b/osfmk/ppc/chud/chud_xnu_private.h new file mode 100644 index 000000000..eeeaa92f2 --- /dev/null +++ b/osfmk/ppc/chud/chud_xnu_private.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _PPC_CHUD_XNU_PRIVATE_H_ +#define _PPC_CHUD_XNU_PRIVATE_H_ + +#include +#include +#include + +#pragma mark **** thread **** +// ******************************************************************************** +// thread +// ******************************************************************************** +extern kern_return_t chudxnu_copy_savearea_to_threadstate( thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count, + struct savearea *sv); + +extern kern_return_t chudxnu_copy_threadstate_to_savearea( struct savearea *sv, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count); + +#endif /* _PPC_CHUD_XNU_PRIVATE_H_ */ diff --git a/osfmk/ppc/clock.h b/osfmk/ppc/clock.h deleted file mode 100644 index e4c912349..000000000 --- a/osfmk/ppc/clock.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ - -#ifndef _PPC_CLOCK_H_ -#define _PPC_CLOCK_H_ - -#include - -#define CLK_SPEED 0.0000012766 /* time to complete a clock (3 MHz) */ - -#if HZ == 120 -# define CLK_INTERVAL 6528 /* clocks to hit CLK_TCK ticks per sec */ -#elif HZ == 100 -# define CLK_INTERVAL 7833 /* clocks to hit CLK_TCK ticks per sec */ -#elif HZ == 60 -# define CLK_INTERVAL 13055 /* clocks to hit CLK_TCK ticks per sec */ -#else -#error "unknown clock speed" -#endif - /* 6528 for 119.998 Hz. */ - /* 7833 for 100.004 Hz */ - /* 13055 for 60.002 Hz. */ -#define CLK_INTH (CLK_INTERVAL >> 8) -#define CLK_INTL (CLK_INTERVAL & 0xff) - -#define SECDAY ((unsigned)(24*60*60)) -#define SECYR ((unsigned)(365*SECDAY + SECDAY/4)) - -#endif /* _PPC_CLOCK_H_ */ diff --git a/osfmk/ppc/commpage/atomic.s b/osfmk/ppc/commpage/atomic.s new file mode 100644 index 000000000..38a82f839 --- /dev/null +++ b/osfmk/ppc/commpage/atomic.s @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include // EXT, LEXT +#include +#include + + +/* OSAtomic.h library native implementations. */ + + .text + .align 2 + +atomic_add32: // int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); +1: + lwarx r5,0,r4 + add r6,r3,r5 + stwcx. r6,0,r4 + bne-- 1b + mr r3,r6 + blr + + COMMPAGE_DESCRIPTOR(atomic_add32,_COMM_PAGE_ATOMIC_ADD32,0,0,kCommPageBoth) + + +atomic_add64: // int64_t OSAtomicAdd64( int64_t amt, int64_t *value ); +1: + ldarx r5,0,r4 + add r6,r3,r5 + stdcx. r6,0,r4 + bne-- 1b + mr r3,r6 + blr + + COMMPAGE_DESCRIPTOR(atomic_add64,_COMM_PAGE_ATOMIC_ADD64,k64Bit,0,kCommPage64) + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This is the no-barrier version */ +compare_and_swap32_on32: // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); +1: + lwarx r7,0,r5 + cmplw r7,r3 + bne- 2f + stwcx. r4,0,r5 + bne- 1b + li r3,1 + blr +2: + li r3,0 // return failure + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap32_on32,_COMM_PAGE_COMPARE_AND_SWAP32,0,k64Bit,kCommPageBoth) + + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This is the no-barrier version */ +compare_and_swap32_on64: // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); +1: + lwarx r7,0,r5 + cmplw r7,r3 + bne-- 2f + stwcx. r4,0,r5 + bne-- 1b + li r3,1 + blr +2: + li r8,-8 // on 970, must release reservation + li r3,0 // return failure + stwcx. r4,r8,r1 // store into red zone to release + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap32_on64,_COMM_PAGE_COMPARE_AND_SWAP32,k64Bit,0,kCommPageBoth) + + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This is the no-barrier version */ +compare_and_swap64: // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); +1: + ldarx r7,0,r5 + cmpld r7,r3 + bne-- 2f + stdcx. r4,0,r5 + bne-- 1b + li r3,1 + blr +2: + li r8,-8 // on 970, must release reservation + li r3,0 // return failure + stdcx. r4,r8,r1 // store into red zone to release + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap64,_COMM_PAGE_COMPARE_AND_SWAP64,k64Bit,0,kCommPage64) + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This version of compare-and-swap incorporates a memory barrier. */ +compare_and_swap32_on32b: // bool OSAtomicCompareAndSwapBarrier32( int32_t old, int32_t new, int32_t *value); + eieio // write barrier, NOP'd on a UP +1: + lwarx r7,0,r5 + cmplw r7,r3 + bne- 2f + stwcx. r4,0,r5 + bne- 1b + isync // read barrier, NOP'd on a UP + li r3,1 + blr +2: + li r3,0 // return failure + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap32_on32b,_COMM_PAGE_COMPARE_AND_SWAP32B,0,k64Bit,kCommPageBoth+kCommPageSYNC+kCommPageISYNC) + + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This version of compare-and-swap incorporates a memory barrier. */ +compare_and_swap32_on64b: // bool OSAtomicCompareAndSwapBarrier32( int32_t old, int32_t new, int32_t *value); + lwsync // write barrier, NOP'd on a UP +1: + lwarx r7,0,r5 + cmplw r7,r3 + bne-- 2f + stwcx. r4,0,r5 + bne-- 1b + isync // read barrier, NOP'd on a UP + li r3,1 + blr +2: + li r8,-8 // on 970, must release reservation + li r3,0 // return failure + stwcx. r4,r8,r1 // store into red zone to release + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap32_on64b,_COMM_PAGE_COMPARE_AND_SWAP32B,k64Bit,0,kCommPageBoth+kCommPageSYNC+kCommPageISYNC) + + +/* WARNING: Libc clients assume compare-and-swap preserves r4, r5, and r9-r12! */ +/* This version of compare-and-swap incorporates a memory barrier. */ +compare_and_swap64b: // bool OSAtomicCompareAndSwapBarrier64( int64_t old, int64_t new, int64_t *value); + lwsync // write barrier, NOP'd on a UP +1: + ldarx r7,0,r5 + cmpld r7,r3 + bne-- 2f + stdcx. r4,0,r5 + bne-- 1b + isync // read barrier, NOP'd on a UP + li r3,1 + blr +2: + li r8,-8 // on 970, must release reservation + li r3,0 // return failure + stdcx. r4,r8,r1 // store into red zone to release + blr + + COMMPAGE_DESCRIPTOR(compare_and_swap64b,_COMM_PAGE_COMPARE_AND_SWAP64B,k64Bit,0,kCommPage64+kCommPageSYNC+kCommPageISYNC) + + +atomic_enqueue32: // void OSAtomicEnqueue( void **list, void *new, size_t offset); +1: + lwarx r6,0,r3 // get link to 1st on list + stwx r6,r4,r5 // hang list off new node + eieio // make sure the "stwx" comes before "stwcx." (nop'd on UP) + stwcx. r4,0,r3 // make new 1st on list + beqlr++ + b 1b + + COMMPAGE_DESCRIPTOR(atomic_enqueue32,_COMM_PAGE_ENQUEUE,0,0,kCommPageSYNC+kCommPage32) + + +atomic_enqueue64: // void OSAtomicEnqueue( void **list, void *new, size_t offset); +1: + ldarx r6,0,r3 // get link to 1st on list + stdx r6,r4,r5 // hang list off new node + lwsync // make sure the "stdx" comes before the "stdcx." (nop'd on UP) + stdcx. r4,0,r3 // make new 1st on list + beqlr++ + b 1b + + COMMPAGE_DESCRIPTOR(atomic_enqueue64,_COMM_PAGE_ENQUEUE,k64Bit,0,kCommPageSYNC+kCommPage64) + + +atomic_dequeue32_on32: // void* OSAtomicDequeue( void **list, size_t offset); + mr r5,r3 +1: + lwarx r3,0,r5 // get 1st in list + cmpwi r3,0 // null? + beqlr // yes, list empty + lwzx r6,r3,r4 // get 2nd + stwcx. r6,0,r5 // make 2nd first + bne-- 1b + isync // cancel read-aheads (nop'd on UP) + blr + + COMMPAGE_DESCRIPTOR(atomic_dequeue32_on32,_COMM_PAGE_DEQUEUE,0,k64Bit,kCommPageISYNC+kCommPage32) + + +atomic_dequeue32_on64: // void* OSAtomicDequeue( void **list, size_t offset); + mr r5,r3 + li r7,-8 // use red zone to release reservation if necessary +1: + lwarx r3,0,r5 // get 1st in list + cmpwi r3,0 // null? + beq 2f + lwzx r6,r3,r4 // get 2nd + stwcx. r6,0,r5 // make 2nd first + isync // cancel read-aheads (nop'd on UP) + beqlr++ // return next element in r2 + b 1b // retry (lost reservation) +2: + stwcx. r0,r7,r1 // on 970, release reservation using red zone + blr // return null + + COMMPAGE_DESCRIPTOR(atomic_dequeue32_on64,_COMM_PAGE_DEQUEUE,k64Bit,0,kCommPageISYNC+kCommPage32) + + +atomic_dequeue64: // void* OSAtomicDequeue( void **list, size_t offset); + mr r5,r3 + li r7,-8 // use red zone to release reservation if necessary +1: + ldarx r3,0,r5 // get 1st in list + cmpdi r3,0 // null? + beq 2f + ldx r6,r3,r4 // get 2nd + stdcx. r6,0,r5 // make 2nd first + isync // cancel read-aheads (nop'd on UP) + beqlr++ // return next element in r2 + b 1b // retry (lost reservation) +2: + stdcx. r0,r7,r1 // on 970, release reservation using red zone + blr // return null + + COMMPAGE_DESCRIPTOR(atomic_dequeue64,_COMM_PAGE_DEQUEUE,k64Bit,0,kCommPageISYNC+kCommPage64) + + +memory_barrier_up: // void OSMemoryBarrier( void ) + blr // nothing to do on UP + + COMMPAGE_DESCRIPTOR(memory_barrier_up,_COMM_PAGE_MEMORY_BARRIER,kUP,0,kCommPageBoth) + + +memory_barrier_mp32: // void OSMemoryBarrier( void ) + isync // we use eieio in preference to sync... + eieio // ...because it is faster + blr + + COMMPAGE_DESCRIPTOR(memory_barrier_mp32,_COMM_PAGE_MEMORY_BARRIER,0,kUP+k64Bit,kCommPage32) + + +memory_barrier_mp64: // void OSMemoryBarrier( void ) + isync + lwsync // on 970, lwsync is faster than eieio + blr + + COMMPAGE_DESCRIPTOR(memory_barrier_mp64,_COMM_PAGE_MEMORY_BARRIER,k64Bit,kUP,kCommPageBoth) diff --git a/osfmk/ppc/commpage/bcopy_64.s b/osfmk/ppc/commpage/bcopy_64.s index 4ca185ea1..e336c2ea6 100644 --- a/osfmk/ppc/commpage/bcopy_64.s +++ b/osfmk/ppc/commpage/bcopy_64.s @@ -28,6 +28,10 @@ * Altivec bugs that need to be worked around. It is not particularly well * optimized. * + * For 64-bit processors with a 128-byte cache line, running in either + * 32- or 64-bit mode. This is written for 32-bit execution, the kernel + * will translate to 64-bit code when it compiles the 64-bit commpage. + * * Register usage. Note we use R2, so this code will not run in a PEF/CFM * environment. * r0 = "w7" or temp @@ -64,7 +68,6 @@ #include .text - .globl EXT(bcopy_64) #define kLong 64 // too long for inline loopless code @@ -295,4 +298,4 @@ LRevAligned: b LShortReverse64 - COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,0) + COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64) diff --git a/osfmk/ppc/commpage/bcopy_970.s b/osfmk/ppc/commpage/bcopy_970.s index 6294f16d7..8417fde74 100644 --- a/osfmk/ppc/commpage/bcopy_970.s +++ b/osfmk/ppc/commpage/bcopy_970.s @@ -25,7 +25,6 @@ * * Version of 6/11/2003, tuned for the IBM 970. * - * * Register usage. Note the rather delicate way we assign multiple uses * to the same register. Beware. * r0 = temp (NB: cannot use r0 for any constant such as "c16") @@ -74,8 +73,22 @@ #include .text - .globl EXT(bcopy_970) - +/* + * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary + * to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following + * simple transformations: + * - all word compares are changed to doubleword + * - all "srwi[.]" opcodes are changed to "srdi[.]" + * Nothing else is done. For this to work, the following rules must be + * carefully followed: + * - do not use carry or overflow + * - only use record mode if you are sure the results are mode-invariant + * for example, all "andi." and almost all "rlwinm." are fine + * - do not use "slwi", "slw", or "srw" + * An imaginative programmer could break the porting model in other ways, but the above + * are the most likely problem areas. It is perhaps surprising how well in practice + * this simple method works. + */ #define kShort 64 #define kVeryLong (128*1024) @@ -347,7 +360,7 @@ LFwdLongVectors: lis w3,kVeryLong>>16 // cutoff for very-long-operand special case path cmplw cr1,rc,w3 // very long operand? rlwinm w3,rc,0,28,31 // move last 0-15 byte count to w3 - bgea-- cr1,_COMM_PAGE_BIGCOPY // handle big copies separately + bge-- cr1,LBigCopy // handle big copies separately mtctr r0 // set up loop count cmpwi cr6,w3,0 // set cr6 on leftover byte count oris w4,rv,0xFFF8 // we use v0-v12 @@ -586,4 +599,23 @@ LReverseVecUnal: bne cr6,LShortReverse16 // handle last 0-15 bytes iff any blr - COMMPAGE_DESCRIPTOR(bcopy_970,_COMM_PAGE_BCOPY,k64Bit+kHasAltivec,0,kCommPageMTCRF) + +// Very Big Copy Path. Save our return address in the stack for help decoding backtraces. +// The conditions bigcopy expects are: +// r0 = return address (also stored in caller's SF) +// r4 = source ptr +// r5 = length (at least several pages) +// r12 = dest ptr + +LBigCopy: + lis r2,0x4000 // r2 <- 0x40000000 + mflr r0 // get our return address + add. r2,r2,r2 // set cr0_lt if running in 32-bit mode + stw r0,8(r1) // save return, assuming 32-bit mode ("crsave" if 64-bit mode) + blta _COMM_PAGE_BIGCOPY // 32-bit mode, join big operand copy + std r0,16(r1) // save return in correct spot for 64-bit mode + ba _COMM_PAGE_BIGCOPY // then join big operand code + + + COMMPAGE_DESCRIPTOR(bcopy_970,_COMM_PAGE_BCOPY,k64Bit+kHasAltivec,0, \ + kCommPageMTCRF+kCommPageBoth+kPort32to64) diff --git a/osfmk/ppc/commpage/bcopy_g3.s b/osfmk/ppc/commpage/bcopy_g3.s index 5079d11ec..fa7d8dd24 100644 --- a/osfmk/ppc/commpage/bcopy_g3.s +++ b/osfmk/ppc/commpage/bcopy_g3.s @@ -62,7 +62,6 @@ #include .text - .globl EXT(bcopy_g3) #define kLong 33 // too long for string ops @@ -268,4 +267,4 @@ LReverseFloat: stswx r5,0,r0 // store them blr - COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,0) + COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32) diff --git a/osfmk/ppc/commpage/bcopy_g4.s b/osfmk/ppc/commpage/bcopy_g4.s index 9e0a53545..1fd142e82 100644 --- a/osfmk/ppc/commpage/bcopy_g4.s +++ b/osfmk/ppc/commpage/bcopy_g4.s @@ -86,7 +86,6 @@ #include .text - .globl EXT(bcopy_g4) #define kMedium 32 // too long for inline loopless code #define kLong 96 // long enough to justify use of Altivec @@ -615,4 +614,4 @@ LReverseVecUnal: bne cr6,LShortReverse16 // handle last 0-15 bytes iff any blr - COMMPAGE_DESCRIPTOR(bcopy_g4,_COMM_PAGE_BCOPY,kHasAltivec,k64Bit,kCommPageDCBA) + COMMPAGE_DESCRIPTOR(bcopy_g4,_COMM_PAGE_BCOPY,kHasAltivec,k64Bit,kCommPageDCBA+kCommPage32) diff --git a/osfmk/ppc/commpage/bigcopy_970.s b/osfmk/ppc/commpage/bigcopy_970.s index befd85ba7..e8aad5f77 100644 --- a/osfmk/ppc/commpage/bigcopy_970.s +++ b/osfmk/ppc/commpage/bigcopy_970.s @@ -23,8 +23,9 @@ * Very Long Operand BCOPY for Mac OS X * ==================================== * - * Version of 6/11/2003, tuned for the IBM 970. This is for operands at - * least several pages long. It is called from bcopy()/memcpy()/memmove(). + * Version of 2/21/2004, tuned for the IBM 970. This is for operands at + * least several pages long. It is called from bcopy()/memcpy()/memmove(), + * and runs both in 32 and 64-bit mode. * * We use the following additional strategies not used by the shorter * operand paths. Mostly, we try to optimize for memory bandwidth: @@ -36,58 +37,40 @@ * which is amortized across the very long operand. * 2. Copy larger chunks per iteration to minimize R/W bus turnaround * and maximize DRAM page locality (opening a new page is expensive.) + * We use 256-byte chunks. * 3. Touch in one source chunk ahead with DCBT. This is probably the * least important change, and probably only helps restart the * hardware stream at the start of each source page. - * - * Register usage. Note the rather delicate way we assign multiple uses - * to the same register. Beware. - * r0 = temp (NB: cannot use r0 for any constant such as "c16") - * r3 = not used, as memcpy and memmove return 1st parameter as a value - * r4 = source ptr ("rs") - * r5 = count of bytes to move ("rc") - * r6 = constant 16 ("c16") - * r7 = constant 32 (""c32") - * r8 = constant 48 (""c48") - * r9 = constant 128 (""c128") - * r10 = vrsave ("rv") - * r11 = constant 256 (""c256") - * r12 = destination ptr ("rd") - * r13 = constant 384 (""c384") - * r14 = temp ("rx") - * r15 = temp ("rt") */ -#define rs r4 -#define rd r12 -#define rc r5 -#define rv r10 -#define rx r14 -#define rt r15 - -#define c16 r6 -#define c32 r7 -#define c48 r8 -#define c128 r9 -#define c256 r11 -#define c384 r13 + +#define rs r13 +#define rd r14 +#define rc r15 +#define rx r16 + +#define c16 r3 +#define c32 r4 +#define c48 r5 +#define c64 r6 +#define c80 r7 +#define c96 r8 +#define c112 r9 +#define c256 r10 +#define c384 r11 +#define rv r12 // vrsave // Offsets within the "red zone" (which is 224 bytes long): -#define rzR13 -8 -#define rzR14 -12 -#define rzR15 -16 -#define rzV20 -32 -#define rzV21 -48 -#define rzV22 -64 -#define rzV23 -80 -#define rzV24 -96 -#define rzV25 -112 -#define rzV26 -128 -#define rzV27 -144 -#define rzV28 -160 -#define rzV29 -176 -#define rzV30 -192 -#define rzV31 -208 +#define rzR3 -8 +#define rzR13 -16 +#define rzR14 -24 +#define rzR15 -32 +#define rzR16 -40 + +#define rzV20 -64 +#define rzV21 -80 +#define rzV22 -96 +#define rzV23 -112 #include @@ -96,401 +79,247 @@ #include .text - .globl EXT(bigcopy_970) - +/* + * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary + * to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following + * simple transformations: + * - all word compares are changed to doubleword + * - all "srwi[.]" opcodes are changed to "srdi[.]" + * Nothing else is done. For this to work, the following rules must be + * carefully followed: + * - do not use carry or overflow + * - only use record mode if you are sure the results are mode-invariant + * for example, all "andi." and almost all "rlwinm." are fine + * - do not use "slwi", "slw", or "srw" + * An imaginative programmer could break the porting model in other ways, but the above + * are the most likely problem areas. It is perhaps surprising how well in practice + * this simple method works. + */ // Entry point. This is a subroutine of bcopy(). When called: -// r4 = source ptr (aka "rs") -// r12 = dest ptr (aka "rd") -// r5 = length (>= 16K bytes) (aka "rc") +// r0 = return address (also stored in caller's SF) +// r4 = source ptr +// r5 = length (at least several pages) +// r12 = dest ptr // -// We only do "forward" moves, ie non-overlapping or toward 0. -// -// We return with non-volatiles and r3 preserved. +// We only do "forward" moves, ie non-overlapping or toward 0. We return with non-volatiles +// and r3 preserved. .align 5 bigcopy_970: - stw r13,rzR13(r1) // spill non-volatile regs we use to redzone - stw r14,rzR14(r1) - stw r15,rzR15(r1) - li r0,rzV20 - neg rt,rd // start to cache-line-align destination - stvx v20,r1,r0 // we use all 32 VRs - li r0,rzV21 - stvx v21,r1,r0 - li r0,rzV22 - stvx v22,r1,r0 - li r0,rzV23 - stvx v23,r1,r0 - li r0,rzV24 - andi. rt,rt,127 // get #bytes to 128-byte align - stvx v24,r1,r0 - li r0,rzV25 - stvx v25,r1,r0 - li r0,rzV26 - sub rc,rc,rt // adjust length by #bytes to align destination - stvx v26,r1,r0 - li r0,rzV27 - stvx v27,r1,r0 - li r0,rzV28 - mtctr rt // #bytes to align destination - stvx v28,r1,r0 - li r0,rzV29 - stvx v29,r1,r0 - li r0,rzV30 - stvx v30,r1,r0 - li r0,rzV31 - stvx v31,r1,r0 - beq 2f // dest already 128-byte aligned - b 1f - + neg r2,r12 // is destination cache-line-aligned? + std r3,rzR3(r1) // save caller's r3, which must be preserved for memcpy() + std r13,rzR13(r1) // spill non-volatile regs we use to redzone + std r14,rzR14(r1) + std r15,rzR15(r1) + andi. r2,r2,0x7F // #bytes to align + std r16,rzR16(r1) + mr rs,r4 // copy parameters into nonvolatile registers + mr rd,r12 + mr rc,r5 + mr rx,r0 // also save return address + beq 1f // skip if already aligned // Cache-line-align destination. - - .align 5 -1: - lbz r0,0(rs) - addi rs,rs,1 - stb r0,0(rd) - addi rd,rd,1 - bdnz 1b + + mr r3,rd // set up dest ptr for memcpy() + mr r5,r2 // number of bytes to copy + add rs,rs,r2 // then bump our parameters past initial copy + add rd,rd,r2 + sub rc,rc,r2 + bla _COMM_PAGE_MEMCPY // 128-byte-align destination -// Is source 16-byte aligned? Load constant offsets. +// Load constant offsets and check whether source is 16-byte aligned. +// NB: the kernel clears cr7 if it emulates a dcbz128 on the commpage, +// and we dcbz only if cr7 beq is set. -2: +1: + dcbt 0,rs // touch in 1st line of source andi. r0,rs,15 // check source alignment mfspr rv,vrsave // save caller's bitmask - li r0,-1 // we use all 32 VRs li c16,16 // load the constant offsets for x-form ops li c32,32 + srwi r2,rc,8 // get number of 256-byte chunks to xfer + li r0,-256 // we use 24 VRs (ie, 0-23) li c48,48 - li c128,128 + li c64,64 + li c80,80 + or r0,r0,rv // add our bits to caller's + li c96,96 + mtctr r2 // set up loop count + li c112,112 + cmpd cr7,r2,r2 // initialize cr7_eq to "on", so we dcbz128 + mtspr vrsave,r0 // say we use vr0..vr23 li c256,256 li c384,384 - mtspr vrsave,r0 - -// NB: the kernel clears cr7 if it emulates a dcbz128 on the commpage, -// and we dcbz only if cr7 beq is set. We check to be sure the dcbz's -// won't zero source bytes before we load them, since we zero before -// loading as this is faster than zeroing after loading and before storing. + beq LalignedLoop // handle aligned sources - cmpw cr7,r0,r0 // initialize cr7 beq to use dcbz128 - sub rt,rs,rd // get (rs-rd) - cmplwi cr1,rt,512 // are we moving down less than 512 bytes? -// Start fetching in source cache lines. +// Set up for unaligned loop. - dcbt c128,rs // first line already touched in - dcbt c256,rs - dcbt c384,rs - - bge++ cr1,3f // skip if not moving down less than 512 bytes - cmpw cr7,c16,c32 // cannot dcbz since it would zero source bytes -3: - beq LalignedLoop // handle aligned sources lvsl v0,0,rs // get permute vector for left shift lvxl v1,0,rs // prime the loop + li r0,rzV20 // save non-volatile VRs in redzone + stvx v20,r1,r0 + li r0,rzV21 + stvx v21,r1,r0 + li r0,rzV22 + stvx v22,r1,r0 + li r0,rzV23 + stvx v23,r1,r0 b LunalignedLoop // enter unaligned loop -// Main loop for unaligned operands. We loop over 384-byte chunks (3 cache lines) -// since we need a few VRs for permuted destination QWs and the permute vector. +// Main loop for unaligned operands. We loop over 256-byte chunks (2 cache lines). +// Destination is 128-byte aligned, source is unaligned. .align 5 LunalignedLoop: - subi rc,rc,384 // decrement byte count - addi rx,rs,384 // get address of next chunk + dcbt c256,rs // touch in next chunk + dcbt c384,rs + addi r2,rs,128 // point to 2nd 128 bytes of source lvxl v2,c16,rs lvxl v3,c32,rs + lvxl v4,c48,rs + lvxl v5,c64,rs + lvxl v6,c80,rs + lvxl v7,c96,rs + lvxl v8,c112,rs + lvxl v9,0,r2 + addi rs,rs,256 // point to next source chunk + lvxl v10,c16,r2 + lvxl v11,c32,r2 + vperm v17,v1,v2,v0 + lvxl v12,c48,r2 + lvxl v13,c64,r2 + vperm v18,v2,v3,v0 + lvxl v14,c80,r2 + lvxl v15,c96,r2 + vperm v19,v3,v4,v0 + lvxl v16,c112,r2 + lvxl v1,0,rs // peek ahead at first source quad in next chunk + vperm v20,v4,v5,v0 + addi r2,rd,128 // point to 2nd 128 bytes of dest bne-- cr7,1f // skip dcbz's if cr7 beq has been turned off by kernel - dcbz128 0,rd // (also skip if moving down less than 512 bytes) - bne-- cr7,1f // catch it first time through - dcbz128 c128,rd - dcbz128 c256,rd + dcbz128 0,rd + dcbz128 0,r2 1: - addi rt,rs,64 - dcbt 0,rx // touch in next chunk - dcbt c128,rx - dcbt c256,rx - lvxl v4,c48,rs - addi rs,rs,128 - lvxl v5,0,rt - cmplwi rc,384 // another chunk to go? - lvxl v6,c16,rt - lvxl v7,c32,rt - lvxl v8,c48,rt - addi rt,rs,64 - vperm v25,v1,v2,v0 - lvxl v9,0,rs - lvxl v10,c16,rs - vperm v26,v2,v3,v0 - lvxl v11,c32,rs - lvxl v12,c48,rs - vperm v27,v3,v4,v0 - addi rs,rs,128 - lvxl v13,0,rt - lvxl v14,c16,rt - vperm v28,v4,v5,v0 - lvxl v15,c32,rt - lvxl v16,c48,rt - vperm v29,v5,v6,v0 - addi rt,rs,64 - lvxl v17,0,rs - lvxl v18,c16,rs - vperm v30,v6,v7,v0 - lvxl v19,c32,rs - lvxl v20,c48,rs - vperm v31,v7,v8,v0 - addi rs,rs,128 - lvxl v21,0,rt - lvxl v22,c16,rt - vperm v2,v8,v9,v0 - lvxl v23,c32,rt - lvxl v24,c48,rt - vperm v3,v9,v10,v0 - lvx v1,0,rs // get 1st qw of next chunk - vperm v4,v10,v11,v0 - - addi rt,rd,64 - stvxl v25,0,rd - stvxl v26,c16,rd - vperm v5,v11,v12,v0 - stvxl v27,c32,rd - stvxl v28,c48,rd - vperm v6,v12,v13,v0 - addi rd,rd,128 - stvxl v29,0,rt - stvxl v30,c16,rt - vperm v7,v13,v14,v0 - stvxl v31,c32,rt - stvxl v2,c48,rt - vperm v8,v14,v15,v0 - addi rt,rd,64 - stvxl v3,0,rd - stvxl v4,c16,rd - vperm v9,v15,v16,v0 - stvxl v5,c32,rd - stvxl v6,c48,rd - vperm v10,v16,v17,v0 - addi rd,rd,128 - stvxl v7,0,rt - vperm v11,v17,v18,v0 - stvxl v8,c16,rt - stvxl v9,c32,rt - vperm v12,v18,v19,v0 - stvxl v10,c48,rt - addi rt,rd,64 - vperm v13,v19,v20,v0 - stvxl v11,0,rd - stvxl v12,c16,rd - vperm v14,v20,v21,v0 - stvxl v13,c32,rd - vperm v15,v21,v22,v0 - stvxl v14,c48,rd - vperm v16,v22,v23,v0 - addi rd,rd,128 - stvxl v15,0,rt - vperm v17,v23,v24,v0 - stvxl v16,c16,rt - vperm v18,v24,v1,v0 - stvxl v17,c32,rt - stvxl v18,c48,rt - bge++ LunalignedLoop // loop if another 384 bytes to go - -// End of unaligned main loop. Handle up to 384 leftover bytes. - - srwi. r0,rc,5 // get count of 32-byte chunks remaining - beq Ldone // none - rlwinm rc,rc,0,0x1F // mask count down to 0..31 leftover bytes - mtctr r0 -1: // loop over 32-byte chunks - lvx v2,c16,rs - lvx v3,c32,rs - addi rs,rs,32 - vperm v8,v1,v2,v0 - vperm v9,v2,v3,v0 - vor v1,v3,v3 // v1 <- v3 - stvx v8,0,rd - stvx v9,c16,rd - addi rd,rd,32 - bdnz 1b - - b Ldone + vperm v21,v5,v6,v0 + stvxl v17,0,rd + vperm v22,v6,v7,v0 + stvxl v18,c16,rd + vperm v23,v7,v8,v0 + stvxl v19,c32,rd + vperm v17,v8,v9,v0 + stvxl v20,c48,rd + vperm v18,v9,v10,v0 + stvxl v21,c64,rd + vperm v19,v10,v11,v0 + stvxl v22,c80,rd + vperm v20,v11,v12,v0 + stvxl v23,c96,rd + vperm v21,v12,v13,v0 + stvxl v17,c112,rd + vperm v22,v13,v14,v0 + addi rd,rd,256 // point to next dest chunk + stvxl v18,0,r2 + vperm v23,v14,v15,v0 + stvxl v19,c16,r2 + vperm v17,v15,v16,v0 + stvxl v20,c32,r2 + vperm v18,v16,v1,v0 + stvxl v21,c48,r2 + stvxl v22,c64,r2 + stvxl v23,c80,r2 + stvxl v17,c96,r2 + stvxl v18,c112,r2 + bdnz++ LunalignedLoop // loop if another 256 bytes to go + + li r6,rzV20 // restore non-volatile VRs + li r7,rzV21 + li r8,rzV22 + li r9,rzV23 + lvx v20,r1,r6 + lvx v21,r1,r7 + lvx v22,r1,r8 + lvx v23,r1,r9 + b Ldone // Aligned loop. Destination is 128-byte aligned, and source is 16-byte -// aligned. Loop over 512-byte chunks (4 cache lines.) +// aligned. Loop over 256-byte chunks (2 cache lines.) .align 5 LalignedLoop: - subi rc,rc,512 // decrement count - addi rx,rs,512 // address of next chunk + dcbt c256,rs // touch in next chunk + dcbt c384,rs + addi r2,rs,128 // point to 2nd 128 bytes of source lvxl v1,0,rs lvxl v2,c16,rs - bne-- cr7,1f // skip dcbz's if cr7 beq has been turned off by kernel - dcbz128 0,rd // (also skip if moving down less than 512 bytes) - bne-- cr7,1f // catch it first time through - dcbz128 c128,rd - dcbz128 c256,rd - dcbz128 c384,rd -1: - addi rt,rs,64 - dcbt 0,rx // touch in next chunk - dcbt c128,rx - dcbt c256,rx - dcbt c384,rx lvxl v3,c32,rs lvxl v4,c48,rs - addi rs,rs,128 - lvxl v5,0,rt - cmplwi rc,512 // another chunk to go? - lvxl v6,c16,rt - lvxl v7,c32,rt - lvxl v8,c48,rt - addi rt,rs,64 - lvxl v9,0,rs - lvxl v10,c16,rs - lvxl v11,c32,rs - lvxl v12,c48,rs - addi rs,rs,128 - lvxl v13,0,rt - lvxl v14,c16,rt - lvxl v15,c32,rt - lvxl v16,c48,rt - addi rt,rs,64 - lvxl v17,0,rs - lvxl v18,c16,rs - lvxl v19,c32,rs - lvxl v20,c48,rs - addi rs,rs,128 - lvxl v21,0,rt - lvxl v22,c16,rt - lvxl v23,c32,rt - lvxl v24,c48,rt - addi rt,rs,64 - lvxl v25,0,rs - lvxl v26,c16,rs - lvxl v27,c32,rs - lvxl v28,c48,rs - addi rs,rs,128 - lvxl v29,0,rt - lvxl v30,c16,rt - lvxl v31,c32,rt - lvxl v0,c48,rt - - addi rt,rd,64 + lvxl v5,c64,rs + lvxl v6,c80,rs + lvxl v7,c96,rs + lvxl v8,c112,rs + lvxl v9,0,r2 + lvxl v10,c16,r2 + lvxl v11,c32,r2 + lvxl v12,c48,r2 + lvxl v13,c64,r2 + lvxl v14,c80,r2 + lvxl v15,c96,r2 + lvxl v16,c112,r2 + addi r2,rd,128 // point to 2nd 128 bytes of dest + bne-- cr7,1f // skip dcbz's if cr7 beq has been turned off by kernel + dcbz128 0,rd + dcbz128 0,r2 +1: + addi rs,rs,256 // point to next source chunk stvxl v1,0,rd stvxl v2,c16,rd stvxl v3,c32,rd stvxl v4,c48,rd - addi rd,rd,128 - stvxl v5,0,rt - stvxl v6,c16,rt - stvxl v7,c32,rt - stvxl v8,c48,rt - addi rt,rd,64 - stvxl v9,0,rd - stvxl v10,c16,rd - stvxl v11,c32,rd - stvxl v12,c48,rd - addi rd,rd,128 - stvxl v13,0,rt - stvxl v14,c16,rt - stvxl v15,c32,rt - stvxl v16,c48,rt - addi rt,rd,64 - stvxl v17,0,rd - stvxl v18,c16,rd - stvxl v19,c32,rd - stvxl v20,c48,rd - addi rd,rd,128 - stvxl v21,0,rt - stvxl v22,c16,rt - stvxl v23,c32,rt - stvxl v24,c48,rt - addi rt,rd,64 - stvxl v25,0,rd - stvxl v26,c16,rd - stvxl v27,c32,rd - stvxl v28,c48,rd - addi rd,rd,128 - stvxl v29,0,rt - stvxl v30,c16,rt - stvxl v31,c32,rt - stvxl v0,c48,rt - bge++ LalignedLoop // loop if another 512 bytes to go - -// End of aligned main loop. Handle up to 511 leftover bytes. - - srwi. r0,rc,5 // get count of 32-byte chunks remaining - beq Ldone // none - rlwinm rc,rc,0,0x1F // mask count down to 0..31 leftover bytes - mtctr r0 -1: // loop over 32-byte chunks - lvx v1,0,rs - lvx v2,c16,rs - addi rs,rs,32 - stvx v1,0,rd - stvx v2,c16,rd - addi rd,rd,32 - bdnz 1b - - -// Done, except for 0..31 leftovers at end. Restore non-volatiles. + stvxl v5,c64,rd + stvxl v6,c80,rd + stvxl v7,c96,rd + stvxl v8,c112,rd + addi rd,rd,256 // point to next dest chunk + stvxl v9,0,r2 + stvxl v10,c16,r2 + stvxl v11,c32,r2 + stvxl v12,c48,r2 + stvxl v13,c64,r2 + stvxl v14,c80,r2 + stvxl v15,c96,r2 + stvxl v16,c112,r2 + bdnz++ LalignedLoop // loop if another 256 bytes to go + + +// Done, except for 0..255 leftover bytes at end. // rs = source ptr // rd = dest ptr -// rc = count (0..31) +// rc = remaining count in low 7 bits // rv = caller's vrsave +// rx = caller's return address Ldone: - cmpwi rc,0 // any leftover bytes? - lwz r13,rzR13(r1) // restore non-volatiles from redzone - lwz r14,rzR14(r1) - lwz r15,rzR15(r1) - li r0,rzV20 - lvx v20,r1,r0 - li r0,rzV21 - lvx v21,r1,r0 - li r0,rzV22 - lvx v22,r1,r0 - li r0,rzV23 - lvx v23,r1,r0 - li r0,rzV24 - lvx v24,r1,r0 - li r0,rzV25 - lvx v25,r1,r0 - li r0,rzV26 - lvx v26,r1,r0 - li r0,rzV27 - lvx v27,r1,r0 - li r0,rzV28 - lvx v28,r1,r0 - li r0,rzV29 - lvx v29,r1,r0 - li r0,rzV30 - lvx v30,r1,r0 - li r0,rzV31 - lvx v31,r1,r0 - mtspr vrsave,rv // restore caller's bitmask - beqlr // done if no leftover bytes - - -// Handle 1..31 leftover bytes at end. - - mtctr rc // set up loop count - b 1f - - .align 5 -1: - lbz r0,0(rs) - addi rs,rs,1 - stb r0,0(rd) - addi rd,rd,1 - bdnz 1b + andi. r5,rc,0xFF // any leftover bytes? (0..255) + mtspr vrsave,rv // restore bitmap of live vr's + mr r3,rd + mr r4,rs + bnela _COMM_PAGE_MEMCPY // copy leftover bytes + + mtlr rx // restore return address + ld r3,rzR3(r1) // restore non-volatile GPRs from redzone + ld r13,rzR13(r1) + ld r14,rzR14(r1) + ld r15,rzR15(r1) + ld r16,rzR16(r1) blr - COMMPAGE_DESCRIPTOR(bigcopy_970,_COMM_PAGE_BIGCOPY,0,0,0) // load on all machines for now + COMMPAGE_DESCRIPTOR(bigcopy_970,_COMM_PAGE_BIGCOPY,0,0,kPort32to64+kCommPageBoth) diff --git a/osfmk/ppc/commpage/bzero_128.s b/osfmk/ppc/commpage/bzero_128.s index 1b127e0e1..055ef9968 100644 --- a/osfmk/ppc/commpage/bzero_128.s +++ b/osfmk/ppc/commpage/bzero_128.s @@ -28,12 +28,26 @@ .text .align 2 - .globl EXT(bzero_128) - +/* + * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary + * to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following + * simple transformations: + * - all word compares are changed to doubleword + * - all "srwi[.]" opcodes are changed to "srdi[.]" + * Nothing else is done. For this to work, the following rules must be + * carefully followed: + * - do not use carry or overflow + * - only use record mode if you are sure the results are mode-invariant + * for example, all "andi." and almost all "rlwinm." are fine + * - do not use "slwi", "slw", or "srw" + * An imaginative programmer could break the porting model in other ways, but the above + * are the most likely problem areas. It is perhaps surprising how well in practice + * this simple method works. + */ -// ********************* -// * B Z E R O _ 1 2 8 * -// ********************* +// ********************** +// * B Z E R O _ 1 2 8 * +// ********************** // // For 64-bit processors with a 128-byte cache line. // @@ -42,7 +56,7 @@ // r3 = original ptr, not changed since memset returns it // r4 = count of bytes to set // r9 = working operand ptr -// We do not touch r2 and r10-r12, which some callers depend on. +// WARNING: We do not touch r2 and r10-r12, which some callers depend on. .align 5 bzero_128: // void bzero(void *b, size_t len); @@ -150,4 +164,5 @@ Ltail: stb r0,0(r9) blr - COMMPAGE_DESCRIPTOR(bzero_128,_COMM_PAGE_BZERO,kCache128+k64Bit,0,kCommPageMTCRF) + COMMPAGE_DESCRIPTOR(bzero_128,_COMM_PAGE_BZERO,kCache128+k64Bit,0, \ + kCommPageMTCRF+kCommPageBoth+kPort32to64) diff --git a/osfmk/ppc/commpage/bzero_32.s b/osfmk/ppc/commpage/bzero_32.s index 57b6579bf..9bee8a55a 100644 --- a/osfmk/ppc/commpage/bzero_32.s +++ b/osfmk/ppc/commpage/bzero_32.s @@ -28,7 +28,6 @@ .text .align 2 - .globl EXT(bzero_32) // ******************* @@ -122,4 +121,4 @@ Ltail: stb r0,0(r9) blr - COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,0) + COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32) diff --git a/osfmk/ppc/commpage/cacheflush.s b/osfmk/ppc/commpage/cacheflush.s index 8b53f91e9..0b8cb48ae 100644 --- a/osfmk/ppc/commpage/cacheflush.s +++ b/osfmk/ppc/commpage/cacheflush.s @@ -28,19 +28,19 @@ .text .align 2 - .globl EXT(commpage_flush_dcache) - .globl EXT(commpage_flush_icache) // ********************************************* // * C O M M P A G E _ F L U S H _ D C A C H E * // ********************************************* // +// Note that this routine is called both in 32 and 64-bit mode. +// // r3 = ptr to 1st byte to flush // r4 = length to flush (may be 0) commpage_flush_dcache: - cmpwi r4,0 // length 0? + mr. r4,r4 // test length for 0 in mode-independent way lhz r5,_COMM_PAGE_CACHE_LINESIZE(0) subi r9,r5,1 // get (linesize-1) and r0,r3,r9 // get offset within line of 1st byte @@ -56,18 +56,20 @@ commpage_flush_dcache: sync // make sure lines are flushed before we return blr - COMMPAGE_DESCRIPTOR(commpage_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,0,0,0) // matches all CPUs + COMMPAGE_DESCRIPTOR(commpage_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,0,0,kCommPageBoth) // ********************************************* // * C O M M P A G E _ F L U S H _ I C A C H E * // ********************************************* // +// Note that this routine is called both in 32 and 64-bit mode. +// // r3 = ptr to 1st byte to flush // r4 = length to flush (may be 0) commpage_flush_icache: - cmpwi r4,0 // length 0? + mr. r4,r4 // test length for 0 in mode-independent way lhz r5,_COMM_PAGE_CACHE_LINESIZE(0) subi r9,r5,1 // get (linesize-1) and r0,r3,r9 // get offset within line of 1st byte @@ -88,10 +90,16 @@ commpage_flush_icache: icbi 0,r7 add r7,r7,r5 bne 2b + + // The following sync is only needed on MP machines, probably only on + // 7400-family MP machines. But because we're not certain of this, and + // this isn't a speed critical routine, we are conservative and always sync. + + sync // wait until other processors see the icbi's isync // make sure we haven't prefetched old instructions blr - COMMPAGE_DESCRIPTOR(commpage_flush_icache,_COMM_PAGE_FLUSH_ICACHE,0,0,0) // matches all CPUs + COMMPAGE_DESCRIPTOR(commpage_flush_icache,_COMM_PAGE_FLUSH_ICACHE,0,0,kCommPageBoth) diff --git a/osfmk/ppc/commpage/commpage.c b/osfmk/ppc/commpage/commpage.c index 544a0bbb2..d7eca5030 100644 --- a/osfmk/ppc/commpage/commpage.c +++ b/osfmk/ppc/commpage/commpage.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,55 +33,189 @@ * The source files should be in osfmk/ppc/commpage/. * * 3. Add a ptr to your new commpage_descriptor(s) in the "routines" - * array in commpage_populate(). Of course, you'll also have to - * declare them "extern" in commpage_populate(). + * static array below. Of course, you'll also have to declare them + * "extern". * * 4. Write the code in Libc to use the new routine. */ #include #include +#include #include #include #include #include #include #include -#include +#include +#include -static char *next = NULL; // next available byte in comm page -static int cur_routine = 0; // comm page address of "current" routine -static int matched; // true if we've found a match for "current" routine +extern vm_map_t com_region_map32; // the 32-bit shared submap, set up in vm init +extern vm_map_t com_region_map64; // the 64-bit shared submap +char *commPagePtr32 = NULL; // virtual address of 32-bit comm page in kernel map +char *commPagePtr64 = NULL; // and 64-bit commpage int _cpu_capabilities = 0; // define the capability vector -char *commPagePtr = NULL; // virtual address of comm page in kernel map - - -/* Allocate the commpages and add to the shared submap created by vm: +static char *next; // next available byte in comm page +static int cur_routine; // comm page address of "current" routine +static int matched; // true if we've found a match for "current" routine +static char *commPagePtr; // virtual address in kernel of commpage we are working on + +extern commpage_descriptor compare_and_swap32_on32; +extern commpage_descriptor compare_and_swap32_on64; +extern commpage_descriptor compare_and_swap64; +extern commpage_descriptor atomic_enqueue32; +extern commpage_descriptor atomic_enqueue64; +extern commpage_descriptor atomic_dequeue32_on32; +extern commpage_descriptor atomic_dequeue32_on64; +extern commpage_descriptor atomic_dequeue64; +extern commpage_descriptor memory_barrier_up; +extern commpage_descriptor memory_barrier_mp32; +extern commpage_descriptor memory_barrier_mp64; +extern commpage_descriptor atomic_add32; +extern commpage_descriptor atomic_add64; +extern commpage_descriptor mach_absolute_time_32; +extern commpage_descriptor mach_absolute_time_64; +extern commpage_descriptor mach_absolute_time_lp64; +extern commpage_descriptor spinlock_32_try_mp; +extern commpage_descriptor spinlock_32_try_up; +extern commpage_descriptor spinlock_64_try_mp; +extern commpage_descriptor spinlock_64_try_up; +extern commpage_descriptor spinlock_32_lock_mp; +extern commpage_descriptor spinlock_32_lock_up; +extern commpage_descriptor spinlock_64_lock_mp; +extern commpage_descriptor spinlock_64_lock_up; +extern commpage_descriptor spinlock_32_unlock_mp; +extern commpage_descriptor spinlock_32_unlock_up; +extern commpage_descriptor spinlock_64_unlock_mp; +extern commpage_descriptor spinlock_64_unlock_up; +extern commpage_descriptor pthread_getspecific_sprg3_32; +extern commpage_descriptor pthread_getspecific_sprg3_64; +extern commpage_descriptor pthread_getspecific_uftrap; +extern commpage_descriptor gettimeofday_32; +extern commpage_descriptor gettimeofday_g5_32; +extern commpage_descriptor gettimeofday_g5_64; +extern commpage_descriptor commpage_flush_dcache; +extern commpage_descriptor commpage_flush_icache; +extern commpage_descriptor pthread_self_sprg3; +extern commpage_descriptor pthread_self_uftrap; +extern commpage_descriptor spinlock_relinquish; +extern commpage_descriptor bzero_32; +extern commpage_descriptor bzero_128; +extern commpage_descriptor bcopy_g3; +extern commpage_descriptor bcopy_g4; +extern commpage_descriptor bcopy_970; +extern commpage_descriptor bcopy_64; +extern commpage_descriptor compare_and_swap32_on32b; +extern commpage_descriptor compare_and_swap32_on64b; +extern commpage_descriptor compare_and_swap64b; +extern commpage_descriptor memset_64; +extern commpage_descriptor memset_g3; +extern commpage_descriptor memset_g4; +extern commpage_descriptor memset_g5; +extern commpage_descriptor bigcopy_970; + +/* The list of all possible commpage routines. WARNING: the check for overlap + * assumes that these routines are in strictly ascending order, sorted by address + * in the commpage. We panic if not. + */ +static commpage_descriptor *routines[] = { + &compare_and_swap32_on32, + &compare_and_swap32_on64, + &compare_and_swap64, + &atomic_enqueue32, + &atomic_enqueue64, + &atomic_dequeue32_on32, + &atomic_dequeue32_on64, + &atomic_dequeue64, + &memory_barrier_up, + &memory_barrier_mp32, + &memory_barrier_mp64, + &atomic_add32, + &atomic_add64, + &mach_absolute_time_32, + &mach_absolute_time_64, + &mach_absolute_time_lp64, + &spinlock_32_try_mp, + &spinlock_32_try_up, + &spinlock_64_try_mp, + &spinlock_64_try_up, + &spinlock_32_lock_mp, + &spinlock_32_lock_up, + &spinlock_64_lock_mp, + &spinlock_64_lock_up, + &spinlock_32_unlock_mp, + &spinlock_32_unlock_up, + &spinlock_64_unlock_mp, + &spinlock_64_unlock_up, + &pthread_getspecific_sprg3_32, + &pthread_getspecific_sprg3_64, + &pthread_getspecific_uftrap, + &gettimeofday_32, + &gettimeofday_g5_32, + &gettimeofday_g5_64, + &commpage_flush_dcache, + &commpage_flush_icache, + &pthread_self_sprg3, + &pthread_self_uftrap, + &spinlock_relinquish, + &bzero_32, + &bzero_128, + &bcopy_g3, + &bcopy_g4, + &bcopy_970, + &bcopy_64, + &compare_and_swap32_on32b, + &compare_and_swap32_on64b, + &compare_and_swap64b, + &memset_64, + &memset_g3, + &memset_g4, + &memset_g5, + &bigcopy_970, + NULL }; + + +/* Allocate the commpages and add to one of the shared submaps created by vm. + * Called once each for the 32 and 64-bit submaps. * 1. allocate pages in the kernel map (RW) * 2. wire them down * 3. make a memory entry out of them * 4. map that entry into the shared comm region map (R-only) */ static void* -commpage_allocate( void ) +commpage_allocate( + vm_map_t submap ) // com_region_map32 or com_region_map64 { - extern vm_map_t com_region_map; // the shared submap, set up in vm init vm_offset_t kernel_addr; // address of commpage in kernel map vm_offset_t zero = 0; vm_size_t size = _COMM_PAGE_AREA_USED; // size actually populated + vm_map_entry_t entry; ipc_port_t handle; - if (com_region_map == NULL) - panic("commpage map is null"); + if (submap == NULL) + panic("commpage submap is null"); if (vm_allocate(kernel_map,&kernel_addr,_COMM_PAGE_AREA_USED,VM_FLAGS_ANYWHERE)) panic("cannot allocate commpage"); if (vm_map_wire(kernel_map,kernel_addr,kernel_addr+_COMM_PAGE_AREA_USED,VM_PROT_DEFAULT,FALSE)) panic("cannot wire commpage"); - + + /* + * Now that the object is created and wired into the kernel map, mark it so that no delay + * copy-on-write will ever be performed on it as a result of mapping it into user-space. + * If such a delayed copy ever occurred, we could remove the kernel's wired mapping - and + * that would be a real disaster. + * + * JMM - What we really need is a way to create it like this in the first place. + */ + if (!vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map) + panic("cannot find commpage entry"); + entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + if (mach_make_memory_entry( kernel_map, // target map &size, // size kernel_addr, // offset (address in kernel map) @@ -90,7 +224,7 @@ commpage_allocate( void ) NULL )) // parent_entry panic("cannot make entry for commpage"); - if (vm_map_64( com_region_map, // target map (shared submap) + if (vm_map_64( submap, // target map (shared submap) &zero, // address (map into 1st page in submap) _COMM_PAGE_AREA_USED, // size 0, // mask @@ -143,13 +277,11 @@ commpage_cpus( void ) static void commpage_init_cpu_capabilities( void ) { - struct per_proc_info *pp; procFeatures *pfp; int cpus; int available; - pp = per_proc_info; // use CPU 0's per-proc - pfp = &pp->pf; // point to features in per-proc + pfp = &(PerProcTable[0].ppe_vaddr->pf); // point to features in per-proc available = pfp->Available; // If AltiVec is disabled make sure it is not reported as available. @@ -161,26 +293,29 @@ commpage_init_cpu_capabilities( void ) _cpu_capabilities |= commpage_time_dcba(); // ...and set kDcbaRecomended if it helps. } - cpus = commpage_cpus(); // how many CPUs do we have + cpus = commpage_cpus(); // how many CPUs do we have if (cpus == 1) _cpu_capabilities |= kUP; _cpu_capabilities |= (cpus << kNumCPUsShift); + + if (_cpu_capabilities & k64Bit) // 64-bit processors use SPRG3 for TLS + _cpu_capabilities |= kFastThreadLocalStorage; } /* Copy data into commpage. */ - void +static void commpage_stuff( - int address, - void *source, - int length ) + int address, + const void *source, + int length ) { char *dest = commpage_addr_of(address); if (dest < next) panic("commpage overlap: %08 - %08X", dest, next); - bcopy((char*)source,dest,length); + bcopy((const char*)source,dest,length); next = (dest + length); } @@ -200,13 +335,11 @@ commpage_change( { int words = bytes >> 2; uint32_t word; - int found_one = 0; while( (--words) >= 0 ) { word = *ptr; if ((word & search_mask)==search_pattern) { if ((check==NULL) || (check(word))) { // check instruction if necessary - found_one = 1; word &= ~new_mask; word |= new_pattern; *ptr = word; @@ -214,9 +347,6 @@ commpage_change( } ptr++; } - - if (!found_one) - panic("commpage opcode not found"); } @@ -235,7 +365,21 @@ commpage_onebit( } -/* Handle kCommPageDCBA bit: this routine uses DCBA. If the machine we're +/* Check to see if a RLWINM (whose ME is 31) is a SRWI. Since to shift right n bits + * you must "RLWINM ra,rs,32-n,n,31", if (SH+MB)==32 then we have a SRWI. + */ +static int +commpage_srwi( + uint32_t rlwinm ) +{ + int sh = (rlwinm >> 11) & 0x1F; // extract SH field of RLWINM, ie bits 16-20 + int mb = (rlwinm >> 6 ) & 0x1F; // extract MB field of RLWINM, ie bits 21-25 + + return (sh + mb) == 32; // it is a SRWI if (SH+MB)==32 +} + + +/* Handle kCommPageDCBA bit: the commpage routine uses DCBA. If the machine we're * running on doesn't benefit from use of that instruction, map them to NOPs * in the commpage. */ @@ -246,7 +390,7 @@ commpage_handle_dcbas( { uint32_t *ptr, search_mask, search, replace_mask, replace; - if ((_cpu_capabilities & kDcbaAvailable) == 0) { + if ( (_cpu_capabilities & kDcbaRecommended) == 0 ) { ptr = commpage_addr_of(address); search_mask = 0xFC0007FE; // search x-form opcode bits @@ -259,7 +403,7 @@ commpage_handle_dcbas( } -/* Handle kCommPageSYNC bit: this routine uses SYNC or LWSYNC. If we're +/* Handle kCommPageSYNC bit: this routine uses SYNC, LWSYNC, or EIEIO. If we're * running on a UP machine, map them to NOPs. */ static void @@ -272,8 +416,31 @@ commpage_handle_syncs( if (_NumCPUs() == 1) { ptr = commpage_addr_of(address); - search_mask = 0xFC0007FE; // search x-form opcode bits - search = 0x7C0004AC; // for a SYNC or LWSYNC + search_mask = 0xFC0005FE; // search x-form opcode bits (but ignore bit 0x00000200) + search = 0x7C0004AC; // for a SYNC, LWSYNC, or EIEIO + replace_mask = 0xFFFFFFFF; // replace all bits... + replace = 0x60000000; // ...with a NOP + + commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL); + } +} + + +/* Handle kCommPageISYNC bit: this routine uses ISYNCs. If we're running on a UP machine, + * map them to NOPs. + */ +static void +commpage_handle_isyncs( + int address, + int length ) +{ + uint32_t *ptr, search_mask, search, replace_mask, replace; + + if (_NumCPUs() == 1) { + ptr = commpage_addr_of(address); + + search_mask = 0xFC0007FE; // search xl-form opcode bits + search = 0x4C00012C; // for an ISYNC replace_mask = 0xFFFFFFFF; // replace all bits... replace = 0x60000000; // ...with a NOP @@ -308,18 +475,70 @@ commpage_handle_mtcrfs( } +/* Port 32-bit code to 64-bit for use in the 64-bit commpage. This sounds fancier than + * it is. We do the following: + * - map "cmpw*" into "cmpd*" + * - map "srwi" into "srdi" + * Perhaps surprisingly, this is enough to permit lots of code to run in 64-bit mode, as + * long as it is written with this in mind. + */ +static void +commpage_port_32_to_64( + int address, + int length ) +{ + uint32_t *ptr, search_mask, search, replace_mask, replace; + + ptr = commpage_addr_of(address); + + search_mask = 0xFC2007FE; // search x-form opcode bits (and L bit) + search = 0x7C000000; // for a CMPW + replace_mask = 0x00200000; // replace bit 10 (L)... + replace = 0x00200000; // ...with a 1-bit, converting word to doubleword compares + commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL); + + search_mask = 0xFC2007FE; // search x-form opcode bits (and L bit) + search = 0x7C000040; // for a CMPLW + replace_mask = 0x00200000; // replace bit 10 (L)... + replace = 0x00200000; // ...with a 1-bit, converting word to doubleword compares + commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL); + + search_mask = 0xFC200000; // search d-form opcode bits (and L bit) + search = 0x28000000; // for a CMPLWI + replace_mask = 0x00200000; // replace bit 10 (L)... + replace = 0x00200000; // ...with a 1-bit, converting word to doubleword compares + commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL); + + search_mask = 0xFC200000; // search d-form opcode bits (and L bit) + search = 0x2C000000; // for a CMPWI + replace_mask = 0x00200000; // replace bit 10 (L)... + replace = 0x00200000; // ...with a 1-bit, converting word to doubleword compares + commpage_change(ptr,length,search_mask,search,replace_mask,replace,NULL); + + search_mask = 0xFC00003E; // search d-form opcode bits and ME (mask end) field + search = 0x5400003E; // for an RLWINM with ME=31 (which might be a "srwi") + replace_mask = 0xFC00003E; // then replace RLWINM's opcode and ME field to make a RLDICL + replace = 0x78000002; // opcode is 30, ME is 0, except we add 32 to SH amount + commpage_change(ptr,length,search_mask,search,replace_mask,replace,commpage_srwi); +} + + /* Copy a routine into comm page if it matches running machine. */ static void commpage_stuff_routine( - commpage_descriptor *rd ) + commpage_descriptor *rd, + int mode ) // kCommPage32 or kCommPage64 { char *routine_code; int must,cant; + + if ( (rd->special & mode) == 0 ) // is this routine useable in this mode? + return; if (rd->commpage_address != cur_routine) { if ((cur_routine!=0) && (matched==0)) - panic("commpage no match"); + panic("commpage no match for last, next address %08x", rd->commpage_address); cur_routine = rd->commpage_address; matched = 0; } @@ -329,7 +548,7 @@ commpage_stuff_routine( if ((must == rd->musthave) && (cant == 0)) { if (matched) - panic("commpage duplicate matches"); + panic("commpage multiple matches for address %08x", rd->commpage_address); matched = 1; routine_code = ((char*)rd) + rd->code_offset; @@ -341,38 +560,49 @@ commpage_stuff_routine( if (rd->special & kCommPageSYNC) commpage_handle_syncs(rd->commpage_address,rd->code_length); + if (rd->special & kCommPageISYNC) + commpage_handle_isyncs(rd->commpage_address,rd->code_length); + if (rd->special & kCommPageMTCRF) commpage_handle_mtcrfs(rd->commpage_address,rd->code_length); + + if ((mode == kCommPage64) && (rd->special & kPort32to64)) + commpage_port_32_to_64(rd->commpage_address,rd->code_length); } } -/* Fill in commpage: called once, during kernel initialization, from the - * startup thread before user-mode code is running. - * See the top of this file for a list of what you have to do to add - * a new routine to the commpage. - */ -void -commpage_populate( void ) +/* Fill in the 32- or 64-bit commpage. Called once for each. */ + +static void +commpage_populate_one( + vm_map_t submap, // the map to populate + char ** kernAddressPtr, // address within kernel of this commpage + int mode, // either kCommPage32 or kCommPage64 + const char* signature ) // "commpage 32-bit" or "commpage 64-bit" { char c1; short c2; addr64_t c8; static double two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52 static double ten6 = 1000000.0; // 10**6 + static uint64_t magicFE = 0xFEFEFEFEFEFEFEFFLL; // used to find 0s in strings + static uint64_t magic80 = 0x8080808080808080LL; // also used to find 0s commpage_descriptor **rd; short version = _COMM_PAGE_THIS_VERSION; - - commPagePtr = (char*) commpage_allocate(); - - commpage_init_cpu_capabilities(); - + next = NULL; // initialize next available byte in the commpage + cur_routine = 0; // initialize comm page address of "current" routine + + commPagePtr = (char*) commpage_allocate( submap ); + *kernAddressPtr = commPagePtr; // save address either in commPagePtr32 or 64 /* Stuff in the constants. We move things into the comm page in strictly * ascending order, so we can check for overlap and panic if so. */ - + + commpage_stuff(_COMM_PAGE_SIGNATURE,signature,strlen(signature)); + commpage_stuff(_COMM_PAGE_VERSION,&version,2); commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int)); @@ -391,100 +621,53 @@ commpage_populate( void ) c2 = 128; commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2); - commpage_stuff(_COMM_PAGE_2_TO_52,&two52,8); - + commpage_stuff(_COMM_PAGE_2_TO_52,&two52,8); commpage_stuff(_COMM_PAGE_10_TO_6,&ten6,8); + commpage_stuff(_COMM_PAGE_MAGIC_FE,&magicFE,8); + commpage_stuff(_COMM_PAGE_MAGIC_80,&magic80,8); c8 = 0; // 0 timestamp means "disabled" commpage_stuff(_COMM_PAGE_TIMEBASE,&c8,8); commpage_stuff(_COMM_PAGE_TIMESTAMP,&c8,8); commpage_stuff(_COMM_PAGE_SEC_PER_TICK,&c8,8); - /* Now the routines. We try each potential routine in turn, * and copy in any that "match" the platform we are running on. * We require that exactly one routine match for each slot in the * comm page, and panic if not. - * - * The check for overlap assumes that these routines are - * in strictly ascending order, sorted by address in the - * comm page. */ - - extern commpage_descriptor mach_absolute_time_32; - extern commpage_descriptor mach_absolute_time_64; - extern commpage_descriptor spinlock_32_try_mp; - extern commpage_descriptor spinlock_32_try_up; - extern commpage_descriptor spinlock_64_try_mp; - extern commpage_descriptor spinlock_64_try_up; - extern commpage_descriptor spinlock_32_lock_mp; - extern commpage_descriptor spinlock_32_lock_up; - extern commpage_descriptor spinlock_64_lock_mp; - extern commpage_descriptor spinlock_64_lock_up; - extern commpage_descriptor spinlock_32_unlock_mp; - extern commpage_descriptor spinlock_32_unlock_up; - extern commpage_descriptor spinlock_64_unlock_mp; - extern commpage_descriptor spinlock_64_unlock_up; - extern commpage_descriptor pthread_getspecific_sprg3; - extern commpage_descriptor pthread_getspecific_uftrap; - extern commpage_descriptor gettimeofday_32; - extern commpage_descriptor gettimeofday_64; - extern commpage_descriptor commpage_flush_dcache; - extern commpage_descriptor commpage_flush_icache; - extern commpage_descriptor pthread_self_sprg3; - extern commpage_descriptor pthread_self_uftrap; - extern commpage_descriptor spinlock_relinquish; - extern commpage_descriptor bzero_32; - extern commpage_descriptor bzero_128; - extern commpage_descriptor bcopy_g3; - extern commpage_descriptor bcopy_g4; - extern commpage_descriptor bcopy_970; - extern commpage_descriptor bcopy_64; - extern commpage_descriptor bigcopy_970; - - static commpage_descriptor *routines[] = { - &mach_absolute_time_32, - &mach_absolute_time_64, - &spinlock_32_try_mp, - &spinlock_32_try_up, - &spinlock_64_try_mp, - &spinlock_64_try_up, - &spinlock_32_lock_mp, - &spinlock_32_lock_up, - &spinlock_64_lock_mp, - &spinlock_64_lock_up, - &spinlock_32_unlock_mp, - &spinlock_32_unlock_up, - &spinlock_64_unlock_mp, - &spinlock_64_unlock_up, - &pthread_getspecific_sprg3, - &pthread_getspecific_uftrap, - &gettimeofday_32, - &gettimeofday_64, - &commpage_flush_dcache, - &commpage_flush_icache, - &pthread_self_sprg3, - &pthread_self_uftrap, - &spinlock_relinquish, - &bzero_32, - &bzero_128, - &bcopy_g3, - &bcopy_g4, - &bcopy_970, - &bcopy_64, - &bigcopy_970, - NULL }; for( rd = routines; *rd != NULL ; rd++ ) - commpage_stuff_routine(*rd); + commpage_stuff_routine(*rd,mode); if (!matched) panic("commpage no match on last routine"); if (next > (commPagePtr + _COMM_PAGE_AREA_USED)) panic("commpage overflow"); - - sync_cache_virtual((vm_offset_t) commPagePtr,_COMM_PAGE_AREA_USED); // make all that new code executable - + + + // make all that new code executable + + sync_cache_virtual((vm_offset_t) commPagePtr,_COMM_PAGE_AREA_USED); } + +/* Fill in commpage: called once, during kernel initialization, from the + * startup thread before user-mode code is running. + * + * See the top of this file for a list of what you have to do to add + * a new routine to the commpage. + */ + +void +commpage_populate( void ) +{ + commpage_init_cpu_capabilities(); + commpage_populate_one( com_region_map32, &commPagePtr32, kCommPage32, "commpage 32-bit"); + if (_cpu_capabilities & k64Bit) { + commpage_populate_one( com_region_map64, &commPagePtr64, kCommPage64, "commpage 64-bit"); + pmap_init_sharedpage((vm_offset_t)commPagePtr64); // Do the 64-bit version + } + +} diff --git a/osfmk/ppc/commpage/commpage.h b/osfmk/ppc/commpage/commpage.h index 178547e14..9ba062f5d 100644 --- a/osfmk/ppc/commpage/commpage.h +++ b/osfmk/ppc/commpage/commpage.h @@ -31,19 +31,26 @@ /* Special check bits for the compage_descriptor "special" field. */ #define kCommPageDCBA 0x0001 // this routine uses DCBA, map to NOP if not appropriate -#define kCommPageSYNC 0x0002 // this routine uses SYNC, map to NOP if UP -#define kCommPageMTCRF 0x0004 // set bit 11 in MTCRF if only 1 cr specified +#define kCommPageSYNC 0x0002 // this routine uses SYNC, LWSYNC, or EIEIO, map to NOP if UP +#define kCommPageISYNC 0x0004 // this routine uses ISYNC, map to NOP if UP +#define kCommPageMTCRF 0x0008 // set bit 11 in MTCRF if only 1 cr specified + +#define kPort32to64 0x1000 // written for 32-bit, must port to 64-bit +#define kCommPage64 0x2000 // this routine is useable in 64-bit mode +#define kCommPage32 0x4000 // this routine is useable in 32-bit mode +#define kCommPageBoth (kCommPage32+kCommPage64) #ifdef __ASSEMBLER__ #define COMMPAGE_DESCRIPTOR(label,address,must,cant,special) \ + .globl EXT(label) @\ LEXT(label) @\ .short label-. @\ .short .-label-2 @\ .short address @\ .short special @\ - .long must @\ + .long must @\ .long cant @@ -64,7 +71,8 @@ typedef struct commpage_descriptor { } commpage_descriptor; -extern char *commPagePtr; // virt address of commpage in kernel map +extern char *commPagePtr32; // virt address of 32-bit commpage in kernel map +extern char *commPagePtr64; // virt address of 64-bit commpage in kernel map extern void commpage_set_timestamp(uint64_t tbr,uint32_t secs,uint32_t usecs,uint32_t ticks_per_sec); diff --git a/osfmk/ppc/commpage/commpage_asm.s b/osfmk/ppc/commpage/commpage_asm.s index 266d437ec..b372b979e 100644 --- a/osfmk/ppc/commpage/commpage_asm.s +++ b/osfmk/ppc/commpage/commpage_asm.s @@ -73,7 +73,7 @@ Ldata: * * C O M M P A G E _ S E T _ T I M E S T A M P * * *********************************************** * - * Update the gettimeofday() shared data on the commpage, as follows: + * Update the gettimeofday() shared data on the commpages, as follows: * _COMM_PAGE_TIMESTAMP = a BSD-style pair of uint_32's for secs and usecs * _COMM_PAGE_TIMEBASE = the timebase at which the timestamp was valid * _COMM_PAGE_SEC_PER_TICK = multiply timebase ticks by this to get seconds (double) @@ -99,7 +99,8 @@ Ldata: * r7 = divisor (ie, timebase ticks per sec) * We set up: * r8 = ptr to our static data (kkBinary0, kkDouble1, kkTicksPerSec) - * r9 = ptr to comm page in kernel map + * r9 = ptr to 32-bit commpage in kernel map + * r10 = ptr to 64-bit commpage in kernel map * * --> Interrupts must be disabled and rtclock locked when called. <-- */ @@ -113,16 +114,24 @@ LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,di or. r0,r3,r4 // is timebase 0? (thus disabled) lis r8,hi16(Ldata) // point to our data - lis r9,ha16(EXT(commPagePtr)) // get ptr to address of commpage in kernel map + lis r9,ha16(EXT(commPagePtr32)) // get ptrs to address of commpages in kernel map + lis r10,ha16(EXT(commPagePtr64)) stfd f1,rzSaveF1(r1) // save a FPR in the red zone ori r8,r8,lo16(Ldata) - lwz r9,lo16(EXT(commPagePtr))(r9) // r9 <- commPagePtr + lwz r9,lo16(EXT(commPagePtr32))(r9) // r9 <- 32-bit commpage ptr + lwz r10,lo16(EXT(commPagePtr64))(r10) // r10 <- 64-bit commpage ptr lfd f1,kkBinary0(r8) // get fixed 0s li r0,_COMM_PAGE_BASE_ADDRESS // get va in user space of commpage - cmpwi cr1,r9,0 // is commpage allocated yet? - sub r9,r9,r0 // r9 <- commpage address, biased by user va - beq-- cr1,3f // skip if not allocated - stfd f1,_COMM_PAGE_TIMEBASE(r9) // turn off the timestamp (atomically) + cmpwi cr1,r9,0 // is 32-bit commpage allocated yet? + cmpwi cr6,r10,0 // is 64-bit commpage allocated yet? + sub r9,r9,r0 // r9 <- 32-bit commpage address, biased by user va + sub r10,r10,r0 // r10<- 64-bit commpage address + beq-- cr1,3f // skip if 32-bit commpage not allocated (64-bit won't be either) + bne++ cr6,1f // skip if 64-bit commpage is allocated + mr r10,r9 // if no 64-bit commpage, point to 32-bit version with r10 too +1: + stfd f1,_COMM_PAGE_TIMEBASE(r9) // turn off the 32-bit-commpage timestamp (atomically) + stfd f1,_COMM_PAGE_TIMEBASE(r10) // and the 64-bit one too eieio // make sure all CPUs see it is off beq 3f // all we had to do is turn off timestamp @@ -130,8 +139,10 @@ LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,di stw r3,rzNewTimeBase(r1) // store new timebase so we can lfd stw r4,rzNewTimeBase+4(r1) cmpw r0,r7 // do we need to recompute _COMM_PAGE_SEC_PER_TICK? - stw r5,_COMM_PAGE_TIMESTAMP(r9) // store the new timestamp + stw r5,_COMM_PAGE_TIMESTAMP(r9) // store the new timestamp in the 32-bit page stw r6,_COMM_PAGE_TIMESTAMP+4(r9) + stw r5,_COMM_PAGE_TIMESTAMP(r10)// and the 64-bit commpage + stw r6,_COMM_PAGE_TIMESTAMP+4(r10) lfd f1,rzNewTimeBase(r1) // get timebase in a FPR so we can store atomically beq++ 2f // same ticks_per_sec, no need to recompute @@ -148,6 +159,7 @@ LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,di fsub f3,f3,f2 // get ticks_per_sec fdiv f3,f4,f3 // divide 1 by ticks_per_sec to get SEC_PER_TICK stfd f3,_COMM_PAGE_SEC_PER_TICK(r9) + stfd f3,_COMM_PAGE_SEC_PER_TICK(r10) mtfsf 0xFF,f5 // restore FPSCR lfd f2,rzSaveF2(r1) // restore FPRs lfd f3,rzSaveF3(r1) @@ -156,6 +168,7 @@ LEXT(commpage_set_timestamp) // void commpage_set_timestamp(tbr,secs,usecs,di 2: // f1 == new timestamp eieio // wait until the stores take stfd f1,_COMM_PAGE_TIMEBASE(r9) // then turn the timestamp back on (atomically) + stfd f1,_COMM_PAGE_TIMEBASE(r10) // both 3: // here once all fields updated lfd f1,rzSaveF1(r1) // restore last FPR mtmsr r11 // turn FP back off diff --git a/osfmk/ppc/commpage/gettimeofday.s b/osfmk/ppc/commpage/gettimeofday.s index e26428c1a..6d17fd9ec 100644 --- a/osfmk/ppc/commpage/gettimeofday.s +++ b/osfmk/ppc/commpage/gettimeofday.s @@ -38,8 +38,6 @@ .text .align 2 - .globl EXT(gettimeofday_32) - .globl EXT(gettimeofday_64) // ********************************* @@ -67,7 +65,7 @@ // // r3 = ptr to user's timeval structure (should not be null) -gettimeofday_32: // int gettimeofday_32(timeval *tp); +gettimeofday_32: // int gettimeofday(timeval *tp); 0: lwz r5,_COMM_PAGE_TIMEBASE+0(0) // r5,r6 = TBR at timestamp lwz r6,_COMM_PAGE_TIMEBASE+4(0) @@ -143,14 +141,17 @@ gettimeofday_32: // int gettimeofday_32(timeval *tp); li r3,1 // return bad status so our caller will make syscall blr - COMMPAGE_DESCRIPTOR(gettimeofday_32,_COMM_PAGE_GETTIMEOFDAY,0,k64Bit,kCommPageSYNC) + COMMPAGE_DESCRIPTOR(gettimeofday_32,_COMM_PAGE_GETTIMEOFDAY,0,k64Bit,kCommPageSYNC+kCommPage32) -// ********************************* -// * G E T T I M E O F D A Y _ 6 4 * -// ********************************* +// *************************************** +// * G E T T I M E O F D A Y _ G 5 _ 3 2 * +// *************************************** +// +// This routine is called in 32-bit mode on 64-bit processors. A timeval is a struct of +// a long seconds and int useconds, so it's size depends on mode. -gettimeofday_64: // int gettimeofday_64(timeval *tp); +gettimeofday_g5_32: // int gettimeofday(timeval *tp); 0: ld r6,_COMM_PAGE_TIMEBASE(0) // r6 = TBR at timestamp ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds,useconds) @@ -204,6 +205,70 @@ gettimeofday_64: // int gettimeofday_64(timeval *tp); li r3,1 // return bad status so our caller will make syscall blr - COMMPAGE_DESCRIPTOR(gettimeofday_64,_COMM_PAGE_GETTIMEOFDAY,k64Bit,0,kCommPageSYNC) + COMMPAGE_DESCRIPTOR(gettimeofday_g5_32,_COMM_PAGE_GETTIMEOFDAY,k64Bit,0,kCommPageSYNC+kCommPage32) + + +// *************************************** +// * G E T T I M E O F D A Y _ G 5 _ 6 4 * +// *************************************** +// +// This routine is called in 64-bit mode on 64-bit processors. A timeval is a struct of +// a long seconds and int useconds, so it's size depends on mode. + +gettimeofday_g5_64: // int gettimeofday(timeval *tp); +0: + ld r6,_COMM_PAGE_TIMEBASE(0) // r6 = TBR at timestamp + ld r8,_COMM_PAGE_TIMESTAMP(0) // r8 = timestamp (seconds,useconds) + lfd f1,_COMM_PAGE_SEC_PER_TICK(0) + mftb r10 // r10 = get current timebase + lwsync // create a barrier if MP (patched to NOP if UP) + ld r11,_COMM_PAGE_TIMEBASE(0) // then get data a 2nd time + ld r12,_COMM_PAGE_TIMESTAMP(0) + cmpdi cr1,r6,0 // is the timestamp disabled? + cmpld cr6,r6,r11 // did we read a consistent set? + cmpld cr7,r8,r12 + beq-- cr1,3f // exit if timestamp disabled + crand cr6_eq,cr7_eq,cr6_eq + sub r11,r10,r6 // compute elapsed ticks from timestamp + bne-- cr6,0b // loop until we have a consistent set of data + + srdi. r0,r11,35 // has it been more than 2**35 ticks since last timestamp? + std r11,rzTicks(r1) // put ticks in redzone where we can "lfd" it + bne-- 3f // timestamp too old, so reprime + + lfd f3,rzTicks(r1) // get elapsed ticks since timestamp (fixed pt) + fcfid f4,f3 // float the tick count + fmul f5,f4,f1 // f5 <- elapsed seconds since timestamp + lfd f3,_COMM_PAGE_10_TO_6(0) // get 10**6 + fctidz f6,f5 // convert integer seconds to fixed pt + stfd f6,rzSeconds(r1) // save fixed pt integer seconds in red zone + fcfid f6,f6 // float the integer seconds + fsub f6,f5,f6 // f6 <- fractional part of elapsed seconds + fmul f6,f6,f3 // f6 <- fractional elapsed useconds + fctidz f6,f6 // convert useconds to fixed pt integer + stfd f6,rzUSeconds(r1) // store useconds into red zone + + lis r12,hi16(USEC_PER_SEC) // r12 <- 10**6 + srdi r7,r8,32 // extract seconds from doubleword timestamp + lwz r5,rzSeconds+4(r1) // r5 <- seconds since timestamp + ori r12,r12,lo16(USEC_PER_SEC) + lwz r6,rzUSeconds+4(r1) // r6 <- useconds since timestamp + add r7,r7,r5 // add elapsed seconds to timestamp seconds + add r8,r8,r6 // ditto useconds + + cmplw r8,r12 // r8 >= USEC_PER_SEC ? + blt 2f // no + addi r7,r7,1 // add 1 to secs + sub r8,r8,r12 // subtract USEC_PER_SEC from usecs +2: + std r7,0(r3) // store secs//usecs into user's timeval + stw r8,8(r3) + li r3,0 // return success + blr +3: // too long since last timestamp or this code is disabled + li r3,1 // return bad status so our caller will make syscall + blr + + COMMPAGE_DESCRIPTOR(gettimeofday_g5_64,_COMM_PAGE_GETTIMEOFDAY,k64Bit,0,kCommPageSYNC+kCommPage64) diff --git a/osfmk/ppc/commpage/mach_absolute_time.s b/osfmk/ppc/commpage/mach_absolute_time.s index 4ccdffb5b..e2441bde5 100644 --- a/osfmk/ppc/commpage/mach_absolute_time.s +++ b/osfmk/ppc/commpage/mach_absolute_time.s @@ -28,8 +28,6 @@ .text .align 2 - .globl EXT(mach_absolute_time_32) - .globl EXT(mach_absolute_time_64) // ********************************************* @@ -45,21 +43,33 @@ mach_absolute_time_32: beqlr+ b 1b - COMMPAGE_DESCRIPTOR(mach_absolute_time_32,_COMM_PAGE_ABSOLUTE_TIME,0,k64Bit,0) + COMMPAGE_DESCRIPTOR(mach_absolute_time_32,_COMM_PAGE_ABSOLUTE_TIME,0,k64Bit,kCommPage32) // ********************************************* // * M A C H _ A B S O L U T E _ T I M E _ 6 4 * // ********************************************* // -// Why bother to special case for 64-bit? Because the "mftb" variants -// are 10 cycles each, and they are serialized. +// This is the version that is called in 32-bit mode, so we return the TBR in r3 and r4. mach_absolute_time_64: mftb r4 srdi r3,r4,32 blr - COMMPAGE_DESCRIPTOR(mach_absolute_time_64,_COMM_PAGE_ABSOLUTE_TIME,k64Bit,0,0) + COMMPAGE_DESCRIPTOR(mach_absolute_time_64,_COMM_PAGE_ABSOLUTE_TIME,k64Bit,0,kCommPage32) + + +// ************************************************* +// * M A C H _ A B S O L U T E _ T I M E _ L P 6 4 * +// ************************************************* +// +// This is the version that is called in 64-bit mode, so we return the TBR in r3. + +mach_absolute_time_lp64: + mftb r3 + blr + + COMMPAGE_DESCRIPTOR(mach_absolute_time_lp64,_COMM_PAGE_ABSOLUTE_TIME,k64Bit,0,kCommPage64) diff --git a/osfmk/ppc/commpage/memset_64.s b/osfmk/ppc/commpage/memset_64.s new file mode 100644 index 000000000..938159ac3 --- /dev/null +++ b/osfmk/ppc/commpage/memset_64.s @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define ASSEMBLER +#include +#include +#include +#include + +/* + * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary + * to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following + * simple transformations: + * - all word compares are changed to doubleword + * - all "srwi[.]" opcodes are changed to "srdi[.]" + * Nothing else is done. For this to work, the following rules must be + * carefully followed: + * - do not use carry or overflow + * - only use record mode if you are sure the results are mode-invariant + * for example, all "andi." and almost all "rlwinm." are fine + * - do not use "slwi", "slw", or "srw" + * An imaginative programmer could break the porting model in other ways, but the above + * are the most likely problem areas. It is perhaps surprising how well in practice + * this simple method works. + */ + + .text + .align 2 + + +/* ********************* + * * M E M S E T _ 6 4 * + * ********************* + * + * This is a subroutine called by Libc memset and _memset_pattern for large nonzero + * operands (zero operands are funneled into bzero.) This version is for a + * hypothetic processor that is 64-bit but not Altivec. + * It is not optimized, since it would only be used during bringup. + * + * Registers at entry: + * r4 = count of bytes to store (must be >= 32) + * r8 = ptr to the 1st byte to store (16-byte aligned) + * r9 = ptr to 16-byte pattern to store (16-byte aligned) + * When we return: + * r3 = not changed, since memset returns it + * r4 = bytes remaining to store (will be <32) + * r7 = not changed + * r8 = ptr to next byte to store (still 16-byte aligned) + * r12 = not changed (holds return value for memset) + */ + +memset_64: + srwi r0,r4,5 // get number of 32-byte chunks (>0) + ld r10,0(r9) // load pattern + ld r11,8(r9) + rlwinm r4,r4,0,0x1F // mask down count + mtctr r0 // set up loop count + + // Loop over 32-byte chunks. +1: + std r10,0(r8) + std r11,8(r8) + std r10,16(r8) + std r11,24(r8) + addi r8,r8,32 + bdnz++ 1b + + blr + + + COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \ + kCommPageBoth+kPort32to64) diff --git a/osfmk/ppc/commpage/memset_g3.s b/osfmk/ppc/commpage/memset_g3.s new file mode 100644 index 000000000..cf9ae1d42 --- /dev/null +++ b/osfmk/ppc/commpage/memset_g3.s @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define ASSEMBLER +#include +#include +#include +#include + + .text + .align 2 + +/* ********************* + * * M E M S E T _ G 3 * + * ********************* + * + * This is a subroutine called by Libc memset and _memset_pattern for large nonzero + * operands (zero operands are funneled into bzero.) This version is for + * 32-bit processors with a 32-byte cache line and no Altivec. + * + * Registers at entry: + * r4 = count of bytes to store (must be >= 32) + * r8 = ptr to the 1st byte to store (16-byte aligned) + * r9 = ptr to 16-byte pattern to store (16-byte aligned) + * When we return: + * r3 = not changed, since memset returns it + * r4 = bytes remaining to store (will be <32) + * r7 = not changed + * r8 = ptr to next byte to store (still 16-byte aligned) + * r12 = not changed (holds return value for memset) + */ + + .align 4 +memset_g3: + andi. r0,r8,16 // cache line aligned? + lfd f0,0(r9) // pick up the pattern in two FPRs + lfd f1,8(r9) + beq 1f // skip if already aligned + + // cache line align + + stfd f0,0(r8) // no, store another 16 bytes to align + stfd f1,8(r8) + subi r4,r4,16 // skip past the 16 bytes we just stored + addi r8,r8,16 + + // Loop over cache lines. This code uses a private protocol with the kernel: + // when the kernel emulates an alignment exception on a DCBZ that occurs in the + // commpage, it zeroes CR7. We use this to detect the case where we are operating on + // uncached memory, and do not use DCBZ again in this code. We assume that either + // all the operand is cacheable or none of it is, so we only check the first DCBZ. +1: + srwi. r0,r4,6 // get count of 64-byte chunks + cmpw cr7,r0,r0 // set cr7_eq (kernel turns off on alignment exception) + rlwinm r4,r4,0,0x3F // mask down to residual count (0..63) + beq Lleftover // no chunks + dcbz 0,r8 // zero first cache line (clearing cr7 if alignment exception) + mtctr r0 + li r6,32 // get an offset for DCBZ + beq+ cr7,LDcbzEnter // enter DCBZ loop (we didn't get an alignment exception) + + // Loop over 64-byte chunks without DCBZ. +LNoDcbz: + stfd f0,0(r8) + stfd f1,8(r8) + stfd f0,16(r8) + stfd f1,24(r8) + stfd f0,32(r8) + stfd f1,40(r8) + stfd f0,48(r8) + stfd f1,56(r8) + addi r8,r8,64 + bdnz LNoDcbz + + b Lleftover + + // Loop over 64-byte chunks using DCBZ. +LDcbz: + dcbz 0,r8 +LDcbzEnter: + dcbz r6,r8 + stfd f0,0(r8) + stfd f1,8(r8) + stfd f0,16(r8) + stfd f1,24(r8) + stfd f0,32(r8) + stfd f1,40(r8) + stfd f0,48(r8) + stfd f1,56(r8) + addi r8,r8,64 + bdnz LDcbz + + // Handle leftovers (0..63 bytes) +Lleftover: + srwi. r0,r4,4 // get count of 16-byte chunks + rlwinm r4,r4,0,0xF // mask down to residuals + beqlr // no 16-byte chunks so done + mtctr r0 +2: + stfd f0,0(r8) + stfd f1,8(r8) + addi r8,r8,16 + bdnz 2b + + blr + + COMMPAGE_DESCRIPTOR(memset_g3,_COMM_PAGE_MEMSET_PATTERN,kCache32,kHasAltivec, \ + kCommPage32) diff --git a/osfmk/ppc/commpage/memset_g4.s b/osfmk/ppc/commpage/memset_g4.s new file mode 100644 index 000000000..da47f785f --- /dev/null +++ b/osfmk/ppc/commpage/memset_g4.s @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define ASSEMBLER +#include +#include +#include +#include + + .text + .align 2 + + +/* ********************* + * * M E M S E T _ G 4 * + * ********************* + * + * This is a subroutine called by Libc memset and memset_pattern for large nonzero + * operands (zero operands are funneled into bzero.) This version is for + * 32-bit processors with a 32-byte cache line and Altivec. + * + * Registers at entry: + * r4 = count of bytes to store (must be >= 32) + * r8 = ptr to the 1st byte to store (16-byte aligned) + * r9 = ptr to 16-byte pattern to store (16-byte aligned) + * When we return: + * r3 = not changed, since memset returns it + * r4 = bytes remaining to store (will be <32) + * r7 = not changed + * r8 = ptr to next byte to store (still 16-byte aligned) + * r12 = not changed (holds return value for memset) + */ + +#define kBig (3*64) // big enough to warrant using dcba (NB: must be >= 3*64) + + .align 4 +memset_g4: + cmplwi cr1,r4,kBig // big enough to warrant using dcbz? + mfspr r2,vrsave // we'll be using VRs + oris r0,r2,0x8000 // we use vr0 + andi. r5,r8,0x10 // is ptr 32-byte aligned? + mtspr vrsave,r0 + li r5,16 // get offsets for "stvx" + lvx v0,0,r9 // load the pattern into v0 + li r6,32 + blt cr1,LShort // not big enough to bother with dcba + li r9,48 + + // cache line align + + beq 2f // already aligned + stvx v0,0,r8 // store another 16 bytes to align + addi r8,r8,16 + subi r4,r4,16 + + // Set up for inner loop. +2: + srwi r0,r4,6 // get count of 64-byte chunks (>=2) + dcba 0,r8 // pre-allocate first cache line (possibly nop'd) + rlwinm r4,r4,0,0x3F // mask down to residual count (0..63) + subic r0,r0,1 // loop 1-too-few times + li r10,64 // get offsets to DCBA one chunk ahead + li r11,64+32 + mtctr r0 + dcba r6,r8 // zero 2nd cache line (possibly nop'd) + b 3f // enter DCBA loop + + // Loop over 64-byte chunks. We DCBA one chunk ahead, which is a little faster. + // Note that some G4s do not benefit from the DCBAs. We nop them in that case. + + .align 4 +3: + dcba r10,r8 // zero one 64-byte chunk ahead (possibly nop'd) + dcba r11,r8 + stvx v0,0,r8 + stvx v0,r5,r8 + stvx v0,r6,r8 + stvx v0,r9,r8 + addi r8,r8,64 + bdnz+ 3b + + // Last chunk, which we've already DCBAd. + + stvx v0,0,r8 + stvx v0,r5,r8 + stvx v0,r6,r8 + stvx v0,r9,r8 + addi r8,r8,64 + + // loop over 32-byte chunks at end +LShort: + srwi. r0,r4,5 // get count of 32-byte chunks + rlwinm r4,r4,0,0x1F // mask down to residual count (0..31) + beq 7f // no chunks so done + mtctr r0 +6: + stvx v0,0,r8 + stvx v0,r5,r8 + addi r8,r8,32 + bdnz 6b +7: + mtspr vrsave,r2 // restore caller's vrsave + blr + + + COMMPAGE_DESCRIPTOR(memset_g4,_COMM_PAGE_MEMSET_PATTERN,kCache32+kHasAltivec,0, \ + kCommPageDCBA+kCommPage32) diff --git a/osfmk/ppc/commpage/memset_g5.s b/osfmk/ppc/commpage/memset_g5.s new file mode 100644 index 000000000..048ba75a4 --- /dev/null +++ b/osfmk/ppc/commpage/memset_g5.s @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#define ASSEMBLER +#include +#include +#include +#include + + .text + .align 2 +/* + * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary + * to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following + * simple transformations: + * - all word compares are changed to doubleword + * - all "srwi[.]" opcodes are changed to "srdi[.]" + * Nothing else is done. For this to work, the following rules must be + * carefully followed: + * - do not use carry or overflow + * - only use record mode if you are sure the results are mode-invariant + * for example, all "andi." and almost all "rlwinm." are fine + * - do not use "slwi", "slw", or "srw" + * An imaginative programmer could break the porting model in other ways, but the above + * are the most likely problem areas. It is perhaps surprising how well in practice + * this simple method works. + */ + +/* ********************* + * * M E M S E T _ G 5 * + * ********************* + * + * This is a subroutine called by Libc memset and memset_pattern for large nonzero + * operands (zero operands are funneled into bzero.) This version is for + * 64-bit processors with a 128-byte cache line and Altivec. + * + * Registers at entry: + * r4 = count of bytes to store (must be >= 32) + * r8 = ptr to the 1st byte to store (16-byte aligned) + * r9 = ptr to 16-byte pattern to store (16-byte aligned) + * When we return: + * r3 = not changed, since memset returns it + * r4 = bytes remaining to store (will be <32) + * r7 = not changed + * r8 = ptr to next byte to store (still 16-byte aligned) + * r12 = not changed (holds return value for memset) + */ + +#define kBig (3*128) // big enough to warrant using dcbz (NB: must be >= 3*128) + + .align 5 +memset_g5: + cmplwi cr1,r4,kBig // big enough to warrant using dcbz? + neg r10,r8 // start to align ptr + mfspr r2,vrsave // we'll be using VRs + andi. r10,r10,0x70 // get #bytes to cache line align + oris r0,r2,0x8000 // we use vr0 + mtspr vrsave,r0 + li r5,16 // get offsets for "stvx" + lvx v0,0,r9 // load the pattern into v0 + li r6,32 + blt cr1,LShort // not big enough to bother with dcbz + li r9,48 + + // cache line align + + beq 2f // already aligned +1: + subic. r10,r10,16 // more to go? + stvx v0,0,r8 + addi r8,r8,16 + subi r4,r4,16 + bne 1b + + // Loop over cache lines. This code uses a private protocol with the kernel: + // when the kernel emulates an alignment exception on a DCBZ that occurs in the + // commpage, it zeroes CR7. We use this to detect the case where we are operating on + // uncached memory, and do not use DCBZ again in this code. We assume that either + // all the operand is cacheable or none of it is, so we only check the first DCBZ. +2: + cmpw cr7,r3,r3 // set cr7_eq (kernel will clear if DCBZ faults) + dcbzl 0,r8 // zero first cache line (clearing cr7 if alignment exception) + srwi r0,r4,7 // get #cache lines (>=2) + rlwinm r4,r4,0,0x7F // mask down to residual count (0..127) + bne-- cr7,LNoDcbz // exit if we took alignment exception on the first DCBZ + subic r0,r0,1 // loop 1-too-few times + li r11,128 // set DCBZ look-ahead + mtctr r0 + b 3f // use loop that DCBZs + + // Loop over cache lines. We DCBZ one line ahead, which is a little faster. + + .align 5 +3: + dcbzl r11,r8 // zero one line ahead + addi r10,r8,64 + stvx v0,0,r8 + stvx v0,r5,r8 + stvx v0,r6,r8 + stvx v0,r9,r8 + addi r8,r8,128 + stvx v0,0,r10 + stvx v0,r5,r10 + stvx v0,r6,r10 + stvx v0,r9,r10 + bdnz++ 3b + + li r0,1 // we've already DCBZ'd the last line +LNoDcbz: // r0: loop count + mtctr r0 + + // Loop which does not DCBZ. Normally this is only used for last cache line, + // because we've already zeroed it. +4: + addi r10,r8,64 + stvx v0,0,r8 + stvx v0,r5,r8 + stvx v0,r6,r8 + stvx v0,r9,r8 + addi r8,r8,128 + stvx v0,0,r10 + stvx v0,r5,r10 + stvx v0,r6,r10 + stvx v0,r9,r10 + bdnz-- 4b // optimize for the cacheable case + + // loop over 32-byte chunks +LShort: + srwi. r0,r4,5 // get count of 32-byte chunks + rlwinm r4,r4,0,0x1F // mask down to residual count (0..31) + beq 7f // no chunks so done + mtctr r0 +6: + stvx v0,0,r8 + stvx v0,r5,r8 + addi r8,r8,32 + bdnz++ 6b +7: + mtspr vrsave,r2 // restore caller's vrsave + blr + + + COMMPAGE_DESCRIPTOR(memset_g5,_COMM_PAGE_MEMSET_PATTERN,kCache128+k64Bit+kHasAltivec,0, \ + kCommPageBoth+kPort32to64) diff --git a/osfmk/ppc/commpage/pthread.s b/osfmk/ppc/commpage/pthread.s index dfd9083fa..24c785121 100644 --- a/osfmk/ppc/commpage/pthread.s +++ b/osfmk/ppc/commpage/pthread.s @@ -27,43 +27,58 @@ .text .align 2 - .globl EXT(pthread_getspecific_sprg3) - .globl EXT(pthread_getspecific_uftrap) - .globl EXT(pthread_self_sprg3) - .globl EXT(pthread_self_uftrap) #define USER_SPRG3 259 // user-mode-readable encoding for SPRG3 -// ***************************************************** -// * P T H R E A D _ G E T S P E C I F I C _ S P R G 3 * -// ***************************************************** +// *********************************************************** +// * P T H R E A D _ G E T S P E C I F I C _ S P R G 3 _ 3 2 * +// *********************************************************** // -// For processors with user-readable SPRG3. Called with: +// For processors with user-readable SPRG3, in 32-bit mode. Called with: // r3 = word number // r4 = offset to thread specific data (_PTHREAD_TSD_OFFSET) -pthread_getspecific_sprg3: +pthread_getspecific_sprg3_32: slwi r5,r3,2 // convert word# to byte offset mfspr r3,USER_SPRG3 // get per-thread cookie add r5,r5,r4 // add in offset to first word lwzx r3,r3,r5 // get the thread-specific word blr - COMMPAGE_DESCRIPTOR(pthread_getspecific_sprg3,_COMM_PAGE_PTHREAD_GETSPECIFIC,k64Bit,0,0) + COMMPAGE_DESCRIPTOR(pthread_getspecific_sprg3_32,_COMM_PAGE_PTHREAD_GETSPECIFIC,k64Bit,0,kCommPage32) + + +// *********************************************************** +// * P T H R E A D _ G E T S P E C I F I C _ S P R G 3 _ 6 4 * +// *********************************************************** +// +// For processors with user-readable SPRG3, in 64-bit mode. This may not be used +// because the 64-bit ABI uses r13 for the thread-local-data pointer. Called with: +// r3 = word number +// r4 = offset to thread specific data (_PTHREAD_TSD_OFFSET) + +pthread_getspecific_sprg3_64: + sldi r5,r3,3 // convert double-word# to byte offset + mfspr r3,USER_SPRG3 // get per-thread cookie + add r5,r5,r4 // add in offset to first word + ldx r3,r3,r5 // get the thread-specific doubleword + blr + + COMMPAGE_DESCRIPTOR(pthread_getspecific_sprg3_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,k64Bit,0,kCommPage64) // *************************************** // * P T H R E A D _ S E L F _ S P R G 3 * // *************************************** // -// For processors with user-readable SPRG3. +// For processors with user-readable SPRG3. Useable both in 32 and 64-bit modes. pthread_self_sprg3: mfspr r3,USER_SPRG3 // get per-thread cookie blr - COMMPAGE_DESCRIPTOR(pthread_self_sprg3,_COMM_PAGE_PTHREAD_SELF,k64Bit,0,0) + COMMPAGE_DESCRIPTOR(pthread_self_sprg3,_COMM_PAGE_PTHREAD_SELF,k64Bit,0,kCommPageBoth) // ******************************************************* @@ -83,7 +98,7 @@ pthread_getspecific_uftrap: lwzx r3,r3,r5 // get the thread-specific word blr - COMMPAGE_DESCRIPTOR(pthread_getspecific_uftrap,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,k64Bit,0) + COMMPAGE_DESCRIPTOR(pthread_getspecific_uftrap,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,k64Bit,kCommPage32) // ***************************************** @@ -97,4 +112,4 @@ pthread_self_uftrap: sc // get r3==TLDP blr - COMMPAGE_DESCRIPTOR(pthread_self_uftrap,_COMM_PAGE_PTHREAD_SELF,0,k64Bit,0) + COMMPAGE_DESCRIPTOR(pthread_self_uftrap,_COMM_PAGE_PTHREAD_SELF,0,k64Bit,kCommPage32) diff --git a/osfmk/ppc/commpage/spinlocks.s b/osfmk/ppc/commpage/spinlocks.s index 249557777..cda60a78d 100644 --- a/osfmk/ppc/commpage/spinlocks.s +++ b/osfmk/ppc/commpage/spinlocks.s @@ -27,76 +27,68 @@ .text .align 2 - .globl EXT(spinlock_32_try_mp) - .globl EXT(spinlock_32_try_up) - .globl EXT(spinlock_32_lock_mp) - .globl EXT(spinlock_32_lock_up) - .globl EXT(spinlock_32_unlock_mp) - .globl EXT(spinlock_32_unlock_up) - - .globl EXT(spinlock_64_try_mp) - .globl EXT(spinlock_64_try_up) - .globl EXT(spinlock_64_lock_mp) - .globl EXT(spinlock_64_lock_up) - .globl EXT(spinlock_64_unlock_mp) - .globl EXT(spinlock_64_unlock_up) - - .globl EXT(spinlock_relinquish) #define MP_SPIN_TRIES 1000 -// The user mode spinlock library. There are many versions, -// in order to take advantage of a few special cases: -// - no barrier instructions (SYNC,ISYNC) are needed if UP -// - 64-bit processors can use LWSYNC instead of SYNC (if MP) -// - branch hints appropriate to the processor (+ vs ++ etc) -// - potentially custom relinquish strategies (not used at present) -// - fixes for errata as necessary +/* The user mode spinlock library. There are many versions, + * in order to take advantage of a few special cases: + * - no barrier instructions (SYNC,ISYNC) are needed if UP + * - 64-bit processors can use LWSYNC instead of SYNC (if MP) + * - 32-bit processors can use ISYNC/EIEIO instead of SYNC (if MP) + * - branch hints appropriate to the processor (+ vs ++ etc) + * - potentially custom relinquish strategies (not used at present) + * - fixes for errata as necessary + * + * The convention for lockwords is that 0==free and -1==locked. + */ spinlock_32_try_mp: - mr r5, r3 - li r3, 1 + mr r5, r3 + li r3, 1 1: lwarx r4,0,r5 + li r6,-1 // locked == -1 cmpwi r4,0 bne- 2f - stwcx. r5,0,r5 + stwcx. r6,0,r5 isync // cancel speculative execution beqlr+ b 1b 2: - li r3,0 + li r3,0 // we did not get the lock blr - COMMPAGE_DESCRIPTOR(spinlock_32_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,k64Bit+kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_32_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,k64Bit+kUP,kCommPage32) spinlock_32_try_up: - mr r5, r3 - li r3, 1 + mr r5, r3 + li r3, 1 1: lwarx r4,0,r5 + li r6,-1 // locked == -1 cmpwi r4,0 bne- 2f - stwcx. r5,0,r5 + stwcx. r6,0,r5 beqlr+ b 1b 2: - li r3,0 + li r3,0 // we did not get the lock blr - COMMPAGE_DESCRIPTOR(spinlock_32_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,k64Bit,0) + COMMPAGE_DESCRIPTOR(spinlock_32_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,k64Bit,kCommPage32) spinlock_32_lock_mp: li r5,MP_SPIN_TRIES 1: lwarx r4,0,r3 + li r6,-1 // locked == -1 cmpwi r4,0 bne- 2f - stwcx. r3,0,r3 + stwcx. r6,0,r3 isync // cancel speculative execution beqlr+ // we return void b 1b @@ -105,28 +97,30 @@ spinlock_32_lock_mp: bne 1b ba _COMM_PAGE_RELINQUISH - COMMPAGE_DESCRIPTOR(spinlock_32_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,k64Bit+kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_32_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,k64Bit+kUP,kCommPage32) spinlock_32_lock_up: 1: lwarx r4,0,r3 + li r6,-1 // locked == -1 cmpwi r4,0 bnea- _COMM_PAGE_RELINQUISH // always depress on UP (let lock owner run) - stwcx. r3,0,r3 + stwcx. r6,0,r3 beqlr+ // we return void b 1b - COMMPAGE_DESCRIPTOR(spinlock_32_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,k64Bit,0) + COMMPAGE_DESCRIPTOR(spinlock_32_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,k64Bit,kCommPage32) spinlock_32_unlock_mp: li r4,0 - sync // complete prior stores before unlock + isync // complete prior stores before unlock + eieio // (using isync/eieio is faster than a sync) stw r4,0(r3) blr - COMMPAGE_DESCRIPTOR(spinlock_32_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,0,k64Bit+kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_32_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,0,k64Bit+kUP,kCommPage32) spinlock_32_unlock_up: @@ -134,55 +128,58 @@ spinlock_32_unlock_up: stw r4,0(r3) blr - COMMPAGE_DESCRIPTOR(spinlock_32_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,kUP,k64Bit,0) + COMMPAGE_DESCRIPTOR(spinlock_32_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,kUP,k64Bit,kCommPage32) spinlock_64_try_mp: - mr r5, r3 - li r3, 1 + mr r5, r3 + li r3, 1 1: lwarx r4,0,r5 + li r6,-1 // locked == -1 cmpwi r4,0 bne-- 2f - stwcx. r5,0,r5 + stwcx. r6,0,r5 isync // cancel speculative execution beqlr++ b 1b 2: li r6,-4 stwcx. r5,r6,r1 // clear the pending reservation (using red zone) - li r3,0 // Pass failure + li r3,0 // we did not get the lock blr - COMMPAGE_DESCRIPTOR(spinlock_64_try_mp,_COMM_PAGE_SPINLOCK_TRY,k64Bit,kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_64_try_mp,_COMM_PAGE_SPINLOCK_TRY,k64Bit,kUP,kCommPageBoth) spinlock_64_try_up: - mr r5, r3 - li r3, 1 + mr r5, r3 + li r3, 1 1: lwarx r4,0,r5 + li r6,-1 // locked == -1 cmpwi r4,0 bne-- 2f - stwcx. r5,0,r5 + stwcx. r6,0,r5 beqlr++ b 1b 2: li r6,-4 stwcx. r5,r6,r1 // clear the pending reservation (using red zone) - li r3,0 + li r3,0 // we did not get the lock blr - COMMPAGE_DESCRIPTOR(spinlock_64_try_up,_COMM_PAGE_SPINLOCK_TRY,k64Bit+kUP,0,0) + COMMPAGE_DESCRIPTOR(spinlock_64_try_up,_COMM_PAGE_SPINLOCK_TRY,k64Bit+kUP,0,kCommPageBoth) spinlock_64_lock_mp: li r5,MP_SPIN_TRIES 1: lwarx r4,0,r3 + li r6,-1 // locked == -1 cmpwi r4,0 bne-- 2f - stwcx. r3,0,r3 + stwcx. r6,0,r3 isync // cancel speculative execution beqlr++ // we return void b 1b @@ -193,15 +190,16 @@ spinlock_64_lock_mp: bne-- 1b // mispredict this one (a cheap back-off) ba _COMM_PAGE_RELINQUISH - COMMPAGE_DESCRIPTOR(spinlock_64_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,k64Bit,kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_64_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,k64Bit,kUP,kCommPageBoth) spinlock_64_lock_up: 1: lwarx r4,0,r3 + li r6,-1 // locked == -1 cmpwi r4,0 bne-- 2f - stwcx. r3,0,r3 + stwcx. r6,0,r3 beqlr++ // we return void b 1b 2: // always relinquish on UP (let lock owner run) @@ -209,16 +207,16 @@ spinlock_64_lock_up: stwcx. r3,r6,r1 // clear the pending reservation (using red zone) ba _COMM_PAGE_RELINQUISH - COMMPAGE_DESCRIPTOR(spinlock_64_lock_up,_COMM_PAGE_SPINLOCK_LOCK,k64Bit+kUP,0,0) + COMMPAGE_DESCRIPTOR(spinlock_64_lock_up,_COMM_PAGE_SPINLOCK_LOCK,k64Bit+kUP,0,kCommPageBoth) spinlock_64_unlock_mp: - li r4,0 lwsync // complete prior stores before unlock + li r4,0 stw r4,0(r3) blr - COMMPAGE_DESCRIPTOR(spinlock_64_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit,kUP,0) + COMMPAGE_DESCRIPTOR(spinlock_64_unlock_mp,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit,kUP,kCommPageBoth) spinlock_64_unlock_up: @@ -226,7 +224,7 @@ spinlock_64_unlock_up: stw r4,0(r3) blr - COMMPAGE_DESCRIPTOR(spinlock_64_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit+kUP,0,0) + COMMPAGE_DESCRIPTOR(spinlock_64_unlock_up,_COMM_PAGE_SPINLOCK_UNLOCK,k64Bit+kUP,0,kCommPageBoth) spinlock_relinquish: @@ -239,5 +237,5 @@ spinlock_relinquish: mr r3,r12 ba _COMM_PAGE_SPINLOCK_LOCK - COMMPAGE_DESCRIPTOR(spinlock_relinquish,_COMM_PAGE_RELINQUISH,0,0,0) + COMMPAGE_DESCRIPTOR(spinlock_relinquish,_COMM_PAGE_RELINQUISH,0,0,kCommPageBoth) diff --git a/osfmk/ppc/console_feed.c b/osfmk/ppc/console_feed.c index 710013a2d..66b2e9217 100644 --- a/osfmk/ppc/console_feed.c +++ b/osfmk/ppc/console_feed.c @@ -59,7 +59,7 @@ console_feed_open( { spl_t s; - simple_lock_init(&cons_feed_lock, ETAP_IO_TTY); + simple_lock_init(&cons_feed_lock, 0); #if MACH_KDB if (console_is_serial()) { return D_DEVICE_DOWN; diff --git a/osfmk/ppc/cpu.c b/osfmk/ppc/cpu.c index f50c3a76c..f70af13a4 100644 --- a/osfmk/ppc/cpu.c +++ b/osfmk/ppc/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,23 +19,25 @@ * * @APPLE_LICENSE_HEADER_END@ */ -/* - * File: ppc/cpu.c - * - * cpu specific routines - */ +#include +#include +#include + +#include +#include #include #include #include +#include #include -#include -#include -#include + +#include + #include #include #include -#include +#include #include #include #include @@ -44,24 +46,20 @@ #include #include #include +#include -/* TODO: BOGUS TO BE REMOVED */ -int real_ncpus = 1; +decl_mutex_data(static,ppt_lock); -int wncpu = NCPUS; -resethandler_t resethandler_target; +unsigned int real_ncpus = 1; +unsigned int max_ncpus = MAX_CPUS; -decl_simple_lock_data(static,SignalReadyLock); -static unsigned int SignalReadyWait = 0xFFFFFFFFU; +decl_simple_lock_data(static,rht_lock); -#define MMCR0_SUPPORT_MASK 0xf83f1fff -#define MMCR1_SUPPORT_MASK 0xffc00000 -#define MMCR2_SUPPORT_MASK 0x80000000 +static unsigned int rht_state = 0; +#define RHT_WAIT 0x01 +#define RHT_BUSY 0x02 -extern int debugger_pending[NCPUS]; -extern int debugger_is_slave[NCPUS]; -extern int debugger_holdoff[NCPUS]; -extern int debugger_sync; +decl_simple_lock_data(static,SignalReadyLock); struct SIGtimebase { boolean_t avail; @@ -70,352 +68,207 @@ struct SIGtimebase { uint64_t abstime; }; -struct per_proc_info *pper_proc_info = per_proc_info; - -extern struct SIGtimebase syncClkSpot; +perfCallback perfCpuSigHook = 0; /* Pointer to CHUD cpu signal hook routine */ -void cpu_sync_timebase(void); +extern int debugger_sync; -kern_return_t -cpu_control( - int slot_num, - processor_info_t info, - unsigned int count) -{ - cpu_type_t cpu_type; - cpu_subtype_t cpu_subtype; - processor_pm_regs_t perf_regs; - processor_control_cmd_t cmd; - boolean_t oldlevel; - - cpu_type = machine_slot[slot_num].cpu_type; - cpu_subtype = machine_slot[slot_num].cpu_subtype; - cmd = (processor_control_cmd_t) info; - - if (count < PROCESSOR_CONTROL_CMD_COUNT) - return(KERN_FAILURE); - - if ( cpu_type != cmd->cmd_cpu_type || - cpu_subtype != cmd->cmd_cpu_subtype) - return(KERN_FAILURE); +/* + * Forward definitions + */ - if (perfmon_acquire_facility(current_task()) != KERN_SUCCESS) { - return(KERN_RESOURCE_SHORTAGE); /* cpu performance facility in use by another task */ - } +void cpu_sync_timebase( + void); - switch (cmd->cmd_op) - { - case PROCESSOR_PM_CLR_PMC: /* Clear Performance Monitor Counters */ - switch (cpu_subtype) - { - case CPU_SUBTYPE_POWERPC_750: - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_7450: - { - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - mtpmc1(0x0); - mtpmc2(0x0); - mtpmc3(0x0); - mtpmc4(0x0); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - return(KERN_SUCCESS); - } - default: - return(KERN_FAILURE); - } /* cpu_subtype */ - case PROCESSOR_PM_SET_REGS: /* Set Performance Monitor Registors */ - switch (cpu_subtype) - { - case CPU_SUBTYPE_POWERPC_750: - if (count < (PROCESSOR_CONTROL_CMD_COUNT + - PROCESSOR_PM_REGS_COUNT_POWERPC_750)) - return(KERN_FAILURE); - else - { - perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); - mtpmc1(PERFMON_PMC1(perf_regs)); - mtpmc2(PERFMON_PMC2(perf_regs)); - mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); - mtpmc3(PERFMON_PMC3(perf_regs)); - mtpmc4(PERFMON_PMC4(perf_regs)); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - return(KERN_SUCCESS); - } - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_7450: - if (count < (PROCESSOR_CONTROL_CMD_COUNT + - PROCESSOR_PM_REGS_COUNT_POWERPC_7400)) - return(KERN_FAILURE); - else - { - perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); - mtpmc1(PERFMON_PMC1(perf_regs)); - mtpmc2(PERFMON_PMC2(perf_regs)); - mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); - mtpmc3(PERFMON_PMC3(perf_regs)); - mtpmc4(PERFMON_PMC4(perf_regs)); - mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - return(KERN_SUCCESS); - } - default: - return(KERN_FAILURE); - } /* switch cpu_subtype */ - case PROCESSOR_PM_SET_MMCR: - switch (cpu_subtype) - { - case CPU_SUBTYPE_POWERPC_750: - if (count < (PROCESSOR_CONTROL_CMD_COUNT + - PROCESSOR_PM_REGS_COUNT_POWERPC_750)) - return(KERN_FAILURE); - else - { - perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); - mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - return(KERN_SUCCESS); - } - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_7450: - if (count < (PROCESSOR_CONTROL_CMD_COUNT + - PROCESSOR_PM_REGS_COUNT_POWERPC_7400)) - return(KERN_FAILURE); - else - { - perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); - mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); - mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - return(KERN_SUCCESS); - } - default: - return(KERN_FAILURE); - } /* cpu_subtype */ - default: - return(KERN_FAILURE); - } /* switch cmd_op */ -} +void cpu_timebase_signal_handler( + struct per_proc_info *proc_info, + struct SIGtimebase *timebaseAddr); -kern_return_t -cpu_info_count( - processor_flavor_t flavor, - unsigned int *count) +/* + * Routine: cpu_bootstrap + * Function: + */ +void +cpu_bootstrap( + void) { - cpu_subtype_t cpu_subtype; - - /* - * For now, we just assume that all CPUs are of the same type - */ - cpu_subtype = machine_slot[0].cpu_subtype; - switch (flavor) { - case PROCESSOR_PM_REGS_INFO: - switch (cpu_subtype) { - case CPU_SUBTYPE_POWERPC_750: - - *count = PROCESSOR_PM_REGS_COUNT_POWERPC_750; - return(KERN_SUCCESS); - - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_7450: - - *count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400; - return(KERN_SUCCESS); - - default: - *count = 0; - return(KERN_INVALID_ARGUMENT); - } /* switch cpu_subtype */ - - case PROCESSOR_TEMPERATURE: - *count = PROCESSOR_TEMPERATURE_COUNT; - return (KERN_SUCCESS); - - default: - *count = 0; - return(KERN_INVALID_ARGUMENT); - - } + simple_lock_init(&rht_lock,0); + simple_lock_init(&SignalReadyLock,0); + mutex_init(&ppt_lock,0); } -kern_return_t -cpu_info( - processor_flavor_t flavor, - int slot_num, - processor_info_t info, - unsigned int *count) -{ - cpu_subtype_t cpu_subtype; - processor_pm_regs_t perf_regs; - boolean_t oldlevel; - unsigned int temp[2]; - - cpu_subtype = machine_slot[slot_num].cpu_subtype; - - switch (flavor) { - case PROCESSOR_PM_REGS_INFO: - - perf_regs = (processor_pm_regs_t) info; - - switch (cpu_subtype) { - case CPU_SUBTYPE_POWERPC_750: - - if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_750) - return(KERN_FAILURE); - - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - PERFMON_MMCR0(perf_regs) = mfmmcr0(); - PERFMON_PMC1(perf_regs) = mfpmc1(); - PERFMON_PMC2(perf_regs) = mfpmc2(); - PERFMON_MMCR1(perf_regs) = mfmmcr1(); - PERFMON_PMC3(perf_regs) = mfpmc3(); - PERFMON_PMC4(perf_regs) = mfpmc4(); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - - *count = PROCESSOR_PM_REGS_COUNT_POWERPC_750; - return(KERN_SUCCESS); - - case CPU_SUBTYPE_POWERPC_7400: - case CPU_SUBTYPE_POWERPC_7450: - - if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_7400) - return(KERN_FAILURE); - - oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ - PERFMON_MMCR0(perf_regs) = mfmmcr0(); - PERFMON_PMC1(perf_regs) = mfpmc1(); - PERFMON_PMC2(perf_regs) = mfpmc2(); - PERFMON_MMCR1(perf_regs) = mfmmcr1(); - PERFMON_PMC3(perf_regs) = mfpmc3(); - PERFMON_PMC4(perf_regs) = mfpmc4(); - PERFMON_MMCR2(perf_regs) = mfmmcr2(); - ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ - - *count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400; - return(KERN_SUCCESS); - - default: - return(KERN_FAILURE); - } /* switch cpu_subtype */ - - case PROCESSOR_TEMPERATURE: /* Get the temperature of a processor */ - - disable_preemption(); /* Don't move me now */ - - if(slot_num == cpu_number()) { /* Is this for the local CPU? */ - *info = ml_read_temp(); /* Get the temperature */ - } - else { /* For another CPU */ - temp[0] = -1; /* Set sync flag */ - eieio(); - sync(); - temp[1] = -1; /* Set invalid temperature */ - (void)cpu_signal(slot_num, SIGPcpureq, CPRQtemp ,(unsigned int)&temp); /* Ask him to take his temperature */ - (void)hw_cpu_sync(temp, LockTimeOut); /* Wait for the other processor to get its temperature */ - *info = temp[1]; /* Pass it back */ - } - - enable_preemption(); /* Ok to move now */ - return(KERN_SUCCESS); - - default: - return(KERN_INVALID_ARGUMENT); - - } /* flavor */ -} +/* + * Routine: cpu_init + * Function: + */ void cpu_init( void) { - int cpu; + struct per_proc_info *proc_info; + + proc_info = getPerProc(); - cpu = cpu_number(); + /* + * Restore the TBR. + */ + if (proc_info->save_tbu != 0 || proc_info->save_tbl != 0) { + mttb(0); + mttbu(proc_info->save_tbu); + mttb(proc_info->save_tbl); + } - machine_slot[cpu].running = TRUE; - machine_slot[cpu].cpu_type = CPU_TYPE_POWERPC; - machine_slot[cpu].cpu_subtype = (cpu_subtype_t)per_proc_info[cpu].pf.rptdProc; + proc_info->cpu_type = CPU_TYPE_POWERPC; + proc_info->cpu_subtype = (cpu_subtype_t)proc_info->pf.rptdProc; + proc_info->cpu_threadtype = CPU_THREADTYPE_NONE; + proc_info->running = TRUE; } +/* + * Routine: cpu_machine_init + * Function: + */ void cpu_machine_init( void) { - struct per_proc_info *tproc_info; + struct per_proc_info *proc_info; volatile struct per_proc_info *mproc_info; - int cpu; - /* TODO: realese mutex lock reset_handler_lock */ - cpu = cpu_number(); - tproc_info = &per_proc_info[cpu]; - mproc_info = &per_proc_info[master_cpu]; - PE_cpu_machine_init(tproc_info->cpu_id, !(tproc_info->cpu_flags & BootDone)); - if (cpu != master_cpu) { - while (!((mproc_info->cpu_flags) & SignalReady)) + proc_info = getPerProc(); + mproc_info = PerProcTable[master_cpu].ppe_vaddr; + + if (proc_info != mproc_info) { + simple_lock(&rht_lock); + if (rht_state & RHT_WAIT) + thread_wakeup(&rht_state); + rht_state &= ~(RHT_BUSY|RHT_WAIT); + simple_unlock(&rht_lock); + } + + PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone)); + + + if (proc_info != mproc_info) { + while (!((mproc_info->cpu_flags) & SignalReady)) continue; cpu_sync_timebase(); } + ml_init_interrupt(); - if (cpu != master_cpu) + if (proc_info != mproc_info) simple_lock(&SignalReadyLock); - tproc_info->cpu_flags |= BootDone|SignalReady; - if (cpu != master_cpu) { - if (SignalReadyWait != 0) { - SignalReadyWait--; - thread_wakeup(&tproc_info->cpu_flags); + proc_info->cpu_flags |= BootDone|SignalReady; + if (proc_info != mproc_info) { + if (proc_info->ppXFlags & SignalReadyWait) { + hw_atomic_and(&proc_info->ppXFlags, ~SignalReadyWait); + thread_wakeup(&proc_info->cpu_flags); } simple_unlock(&SignalReadyLock); } } -kern_return_t -cpu_register( - int *target_cpu -) -{ - int cpu; - - /* - * TODO: - * - Run cpu_register() in exclusion mode - */ - *target_cpu = -1; - for(cpu=0; cpu < wncpu; cpu++) { - if(!machine_slot[cpu].is_cpu) { - machine_slot[cpu].is_cpu = TRUE; - *target_cpu = cpu; - break; - } +/* + * Routine: cpu_per_proc_alloc + * Function: + */ +struct per_proc_info * +cpu_per_proc_alloc( + void) +{ + struct per_proc_info *proc_info=0; + void *interrupt_stack=0; + void *debugger_stack=0; + + if ((proc_info = (struct per_proc_info*)kalloc(PAGE_SIZE)) == (struct per_proc_info*)0) + return (struct per_proc_info *)NULL;; + if ((interrupt_stack = kalloc(INTSTACK_SIZE)) == 0) { + kfree(proc_info, PAGE_SIZE); + return (struct per_proc_info *)NULL;; } - if (*target_cpu != -1) { - real_ncpus++; - return KERN_SUCCESS; - } else +#if MACH_KDP || MACH_KDB + if ((debugger_stack = kalloc(KERNEL_STACK_SIZE)) == 0) { + kfree(proc_info, PAGE_SIZE); + kfree(interrupt_stack, INTSTACK_SIZE); + return (struct per_proc_info *)NULL;; + } +#endif + + bzero((void *)proc_info, sizeof(struct per_proc_info)); + + proc_info->next_savearea = (uint64_t)save_get_init(); + proc_info->pf = BootProcInfo.pf; + proc_info->istackptr = (vm_offset_t)interrupt_stack + INTSTACK_SIZE - FM_SIZE; + proc_info->intstack_top_ss = proc_info->istackptr; +#if MACH_KDP || MACH_KDB + proc_info->debstackptr = (vm_offset_t)debugger_stack + KERNEL_STACK_SIZE - FM_SIZE; + proc_info->debstack_top_ss = proc_info->debstackptr; +#endif /* MACH_KDP || MACH_KDB */ + return proc_info; + +} + + +/* + * Routine: cpu_per_proc_free + * Function: + */ +void +cpu_per_proc_free( + struct per_proc_info *proc_info +) +{ + if (proc_info->cpu_number == master_cpu) + return; + kfree((void *)(proc_info->intstack_top_ss - INTSTACK_SIZE + FM_SIZE), INTSTACK_SIZE); + kfree((void *)(proc_info->debstack_top_ss - KERNEL_STACK_SIZE + FM_SIZE), KERNEL_STACK_SIZE); + kfree((void *)proc_info, PAGE_SIZE); +} + + +/* + * Routine: cpu_per_proc_register + * Function: + */ +kern_return_t +cpu_per_proc_register( + struct per_proc_info *proc_info +) +{ + int cpu; + + mutex_lock(&ppt_lock); + if (real_ncpus >= max_ncpus) { + mutex_unlock(&ppt_lock); return KERN_FAILURE; + } + cpu = real_ncpus; + proc_info->cpu_number = cpu; + PerProcTable[cpu].ppe_vaddr = proc_info; + PerProcTable[cpu].ppe_paddr = ((addr64_t)pmap_find_phys(kernel_pmap, (vm_offset_t)proc_info)) << PAGE_SHIFT; + eieio(); + real_ncpus++; + mutex_unlock(&ppt_lock); + return KERN_SUCCESS; } + +/* + * Routine: cpu_start + * Function: + */ kern_return_t cpu_start( int cpu) { struct per_proc_info *proc_info; - kern_return_t ret; - mapping *mp; - - extern vm_offset_t intstack; - extern vm_offset_t debstack; + kern_return_t ret; + mapping_t *mp; - proc_info = &per_proc_info[cpu]; + proc_info = PerProcTable[cpu].ppe_vaddr; if (cpu == cpu_number()) { PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone)); @@ -424,45 +277,32 @@ cpu_start( return KERN_SUCCESS; } else { - extern void _start_cpu(void); - - if (SignalReadyWait == 0xFFFFFFFFU) { - SignalReadyWait = 0; - simple_lock_init(&SignalReadyLock,0); - } - - proc_info->cpu_number = cpu; proc_info->cpu_flags &= BootDone; - proc_info->istackptr = (vm_offset_t)&intstack + (INTSTACK_SIZE*(cpu+1)) - FM_SIZE; - proc_info->intstack_top_ss = proc_info->istackptr; -#if MACH_KDP || MACH_KDB - proc_info->debstackptr = (vm_offset_t)&debstack + (KERNEL_STACK_SIZE*(cpu+1)) - FM_SIZE; - proc_info->debstack_top_ss = proc_info->debstackptr; -#endif /* MACH_KDP || MACH_KDB */ proc_info->interrupts_enabled = 0; - proc_info->need_ast = (unsigned int)&need_ast[cpu]; - proc_info->FPU_owner = 0; - proc_info->VMX_owner = 0; + proc_info->pending_ast = AST_NONE; + proc_info->istackptr = proc_info->intstack_top_ss; proc_info->rtcPop = 0xFFFFFFFFFFFFFFFFULL; - mp = (mapping *)(&proc_info->ppCIOmp); - mp->mpFlags = 0x01000000 | mpSpecial | 1; + mp = (mapping_t *)(&proc_info->ppUMWmp); + mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1; mp->mpSpace = invalSpace; if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) { - /* TODO: get mutex lock reset_handler_lock */ + simple_lock(&rht_lock); + while (rht_state & RHT_BUSY) { + rht_state |= RHT_WAIT; + thread_sleep_usimple_lock((event_t)&rht_state, + &rht_lock, THREAD_UNINT); + } + rht_state |= RHT_BUSY; + simple_unlock(&rht_lock); - resethandler_target.type = RESET_HANDLER_START; - resethandler_target.call_paddr = (vm_offset_t)_start_cpu; /* Note: these routines are always V=R */ - resethandler_target.arg__paddr = (vm_offset_t)proc_info; /* Note: these routines are always V=R */ - ml_phys_write((vm_offset_t)&ResetHandler + 0, - resethandler_target.type); + RESET_HANDLER_START); ml_phys_write((vm_offset_t)&ResetHandler + 4, - resethandler_target.call_paddr); + (vm_offset_t)_start_cpu); ml_phys_write((vm_offset_t)&ResetHandler + 8, - resethandler_target.arg__paddr); - + (vm_offset_t)&PerProcTable[cpu]); } /* * Note: we pass the current time to the other processor here. He will load it @@ -471,48 +311,223 @@ cpu_start( * that all processors are the same. This is just to get close. */ - ml_get_timebase((unsigned long long *)&proc_info->ruptStamp); /* Pass our current time to the other guy */ + ml_get_timebase((unsigned long long *)&proc_info->ruptStamp); __asm__ volatile("sync"); /* Commit to storage */ __asm__ volatile("isync"); /* Wait a second */ - ret = PE_cpu_start(proc_info->cpu_id, - proc_info->start_paddr, (vm_offset_t)proc_info); - - if (ret != KERN_SUCCESS && - proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) { - - /* TODO: realese mutex lock reset_handler_lock */ + ret = PE_cpu_start(proc_info->cpu_id, + proc_info->start_paddr, (vm_offset_t)proc_info); + + if (ret != KERN_SUCCESS) { + if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) { + simple_lock(&rht_lock); + if (rht_state & RHT_WAIT) + thread_wakeup(&rht_state); + rht_state &= ~(RHT_BUSY|RHT_WAIT); + simple_unlock(&rht_lock); + }; } else { simple_lock(&SignalReadyLock); - - while (!((*(volatile short *)&per_proc_info[cpu].cpu_flags) & SignalReady)) { - SignalReadyWait++; - thread_sleep_simple_lock((event_t)&per_proc_info[cpu].cpu_flags, - &SignalReadyLock, THREAD_UNINT); + if (!((*(volatile short *)&proc_info->cpu_flags) & SignalReady)) { + hw_atomic_or(&proc_info->ppXFlags, SignalReadyWait); + thread_sleep_simple_lock((event_t)&proc_info->cpu_flags, + &SignalReadyLock, THREAD_UNINT); } simple_unlock(&SignalReadyLock); + } return(ret); } } +/* + * Routine: cpu_exit_wait + * Function: + */ void cpu_exit_wait( - int cpu) + int cpu) +{ + struct per_proc_info *tpproc; + + if ( cpu != master_cpu) { + tpproc = PerProcTable[cpu].ppe_vaddr; + while (!((*(volatile short *)&tpproc->cpu_flags) & SleepState)) {}; + } +} + + +/* + * Routine: cpu_doshutdown + * Function: + */ +void +cpu_doshutdown( + void) +{ + enable_preemption(); + processor_offline(current_processor()); +} + + +/* + * Routine: cpu_sleep + * Function: + */ +void +cpu_sleep( + void) +{ + struct per_proc_info *proc_info; + unsigned int i; + unsigned int wait_ncpus_sleep, ncpus_sleep; + facility_context *fowner; + + proc_info = getPerProc(); + + proc_info->running = FALSE; + + fowner = proc_info->FPU_owner; /* Cache this */ + if(fowner) fpu_save(fowner); /* If anyone owns FPU, save it */ + proc_info->FPU_owner = 0; /* Set no fpu owner now */ + + fowner = proc_info->VMX_owner; /* Cache this */ + if(fowner) vec_save(fowner); /* If anyone owns vectors, save it */ + proc_info->VMX_owner = 0; /* Set no vector owner now */ + + if (proc_info->cpu_number == master_cpu) { + proc_info->cpu_flags &= BootDone; + proc_info->interrupts_enabled = 0; + proc_info->pending_ast = AST_NONE; + + if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) { + ml_phys_write((vm_offset_t)&ResetHandler + 0, + RESET_HANDLER_START); + ml_phys_write((vm_offset_t)&ResetHandler + 4, + (vm_offset_t)_start_cpu); + ml_phys_write((vm_offset_t)&ResetHandler + 8, + (vm_offset_t)&PerProcTable[master_cpu]); + + __asm__ volatile("sync"); + __asm__ volatile("isync"); + } + + wait_ncpus_sleep = real_ncpus-1; + ncpus_sleep = 0; + while (wait_ncpus_sleep != ncpus_sleep) { + ncpus_sleep = 0; + for(i=1; i < real_ncpus ; i++) { + if ((*(volatile short *)&(PerProcTable[i].ppe_vaddr->cpu_flags)) & SleepState) + ncpus_sleep++; + } + } + + } + + /* + * Save the TBR before stopping. + */ + do { + proc_info->save_tbu = mftbu(); + proc_info->save_tbl = mftb(); + } while (mftbu() != proc_info->save_tbu); + + PE_cpu_machine_quiesce(proc_info->cpu_id); +} + + +/* + * Routine: cpu_signal + * Function: + * Here is where we send a message to another processor. So far we only have two: + * SIGPast and SIGPdebug. SIGPast is used to preempt and kick off threads (this is + * currently disabled). SIGPdebug is used to enter the debugger. + * + * We set up the SIGP function to indicate that this is a simple message and set the + * order code (MPsigpParm0) to SIGPast or SIGPdebug). After finding the per_processor + * block for the target, we lock the message block. Then we set the parameter(s). + * Next we change the lock (also called "busy") to "passing" and finally signal + * the other processor. Note that we only wait about 1ms to get the message lock. + * If we time out, we return failure to our caller. It is their responsibility to + * recover. + */ +kern_return_t +cpu_signal( + int target, + int signal, + unsigned int p1, + unsigned int p2) { - if ( cpu != master_cpu) - while (!((*(volatile short *)&per_proc_info[cpu].cpu_flags) & SleepState)) {}; + + unsigned int holdStat; + struct per_proc_info *tpproc, *mpproc; + int busybitset=0; + +#if DEBUG + if(((unsigned int)target) >= MAX_CPUS) panic("cpu_signal: invalid target CPU - %08X\n", target); +#endif + + mpproc = getPerProc(); /* Point to our block */ + tpproc = PerProcTable[target].ppe_vaddr; /* Point to the target's block */ + if(mpproc == tpproc) return KERN_FAILURE; /* Cannot signal ourselves */ + + if(!tpproc->running) return KERN_FAILURE; + + if (!(tpproc->cpu_flags & SignalReady)) return KERN_FAILURE; + + if((tpproc->MPsigpStat & MPsigpMsgp) == MPsigpMsgp) { /* Is there an unreceived message already pending? */ + + if(signal == SIGPwake) { /* SIGPwake can merge into all others... */ + mpproc->hwCtr.numSIGPmwake++; /* Account for merged wakes */ + return KERN_SUCCESS; + } + + if((signal == SIGPast) && (tpproc->MPsigpParm0 == SIGPast)) { /* We can merge ASTs */ + mpproc->hwCtr.numSIGPmast++; /* Account for merged ASTs */ + return KERN_SUCCESS; /* Don't bother to send this one... */ + } + + if (tpproc->MPsigpParm0 == SIGPwake) { + if (hw_lock_mbits(&tpproc->MPsigpStat, (MPsigpMsgp | MPsigpAck), + (MPsigpBusy | MPsigpPass ), MPsigpBusy, 0)) { + busybitset = 1; + mpproc->hwCtr.numSIGPmwake++; + } + } + } + + if((busybitset == 0) && + (!hw_lock_mbits(&tpproc->MPsigpStat, MPsigpMsgp, 0, MPsigpBusy, + (gPEClockFrequencyInfo.timebase_frequency_hz >> 11)))) { /* Try to lock the message block with a .5ms timeout */ + mpproc->hwCtr.numSIGPtimo++; /* Account for timeouts */ + return KERN_FAILURE; /* Timed out, take your ball and go home... */ + } + + holdStat = MPsigpBusy | MPsigpPass | (MPsigpSigp << 8) | mpproc->cpu_number; /* Set up the signal status word */ + tpproc->MPsigpParm0 = signal; /* Set message order */ + tpproc->MPsigpParm1 = p1; /* Set additional parm */ + tpproc->MPsigpParm2 = p2; /* Set additional parm */ + + __asm__ volatile("sync"); /* Make sure it's all there */ + + tpproc->MPsigpStat = holdStat; /* Set status and pass the lock */ + __asm__ volatile("eieio"); /* I'm a paraniod freak */ + + if (busybitset == 0) + PE_cpu_signal(mpproc->cpu_id, tpproc->cpu_id); /* Kick the other processor */ + + return KERN_SUCCESS; /* All is goodness and rainbows... */ } -perfTrap perfCpuSigHook = 0; /* Pointer to CHUD cpu signal hook routine */ /* + * Routine: cpu_signal_handler + * Function: * Here is where we implement the receiver of the signaling protocol. * We wait for the signal status area to be passed to us. Then we snarf * up the status, the sender, and the 3 potential parms. Next we release * the lock and signal the other guy. */ - void cpu_signal_handler( void) @@ -520,30 +535,29 @@ cpu_signal_handler( unsigned int holdStat, holdParm0, holdParm1, holdParm2, mtype; unsigned int *parmAddr; - struct per_proc_info *pproc; /* Area for my per_proc address */ + struct per_proc_info *proc_info; int cpu; - struct SIGtimebase *timebaseAddr; - natural_t tbu, tbu2, tbl; broadcastFunc xfunc; cpu = cpu_number(); /* Get the CPU number */ - pproc = &per_proc_info[cpu]; /* Point to our block */ - + + proc_info = getPerProc(); + /* * Since we've been signaled, wait about 31 ms for the signal lock to pass */ - if(!hw_lock_mbits(&pproc->MPsigpStat, (MPsigpMsgp | MPsigpAck), (MPsigpBusy | MPsigpPass), + if(!hw_lock_mbits(&proc_info->MPsigpStat, (MPsigpMsgp | MPsigpAck), (MPsigpBusy | MPsigpPass), (MPsigpBusy | MPsigpPass | MPsigpAck), (gPEClockFrequencyInfo.timebase_frequency_hz >> 5))) { panic("cpu_signal_handler: Lock pass timed out\n"); } - holdStat = pproc->MPsigpStat; /* Snarf stat word */ - holdParm0 = pproc->MPsigpParm0; /* Snarf parameter */ - holdParm1 = pproc->MPsigpParm1; /* Snarf parameter */ - holdParm2 = pproc->MPsigpParm2; /* Snarf parameter */ + holdStat = proc_info->MPsigpStat; /* Snarf stat word */ + holdParm0 = proc_info->MPsigpParm0; /* Snarf parameter */ + holdParm1 = proc_info->MPsigpParm1; /* Snarf parameter */ + holdParm2 = proc_info->MPsigpParm2; /* Snarf parameter */ __asm__ volatile("isync"); /* Make sure we don't unlock until memory is in */ - pproc->MPsigpStat = holdStat & ~(MPsigpMsgp | MPsigpAck | MPsigpFunc); /* Release lock */ + proc_info->MPsigpStat = holdStat & ~(MPsigpMsgp | MPsigpAck | MPsigpFunc); /* Release lock */ switch ((holdStat & MPsigpFunc) >> 8) { /* Decode function code */ @@ -555,54 +569,21 @@ cpu_signal_handler( switch (holdParm0) { /* Decode SIGP message order */ case SIGPast: /* Should we do an AST? */ - pproc->hwCtr.numSIGPast++; /* Count this one */ + proc_info->hwCtr.numSIGPast++; /* Count this one */ #if 0 kprintf("cpu_signal_handler: AST check on cpu %x\n", cpu_number()); #endif - ast_check(cpu_to_processor(cpu)); + ast_check((processor_t)proc_info->processor); return; /* All done... */ case SIGPcpureq: /* CPU specific function? */ - pproc->hwCtr.numSIGPcpureq++; /* Count this one */ + proc_info->hwCtr.numSIGPcpureq++; /* Count this one */ switch (holdParm1) { /* Select specific function */ - case CPRQtemp: /* Get the temperature */ - parmAddr = (unsigned int *)holdParm2; /* Get the destination address */ - parmAddr[1] = ml_read_temp(); /* Get the core temperature */ - eieio(); /* Force order */ - sync(); /* Force to memory */ - parmAddr[0] = 0; /* Show we're done */ - return; - case CPRQtimebase: - timebaseAddr = (struct SIGtimebase *)holdParm2; - - if(pproc->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) - pproc->time_base_enable(pproc->cpu_id, FALSE); - - timebaseAddr->abstime = 0; /* Touch to force into cache */ - sync(); - - do { - asm volatile(" mftbu %0" : "=r" (tbu)); - asm volatile(" mftb %0" : "=r" (tbl)); - asm volatile(" mftbu %0" : "=r" (tbu2)); - } while (tbu != tbu2); - - timebaseAddr->abstime = ((uint64_t)tbu << 32) | tbl; - sync(); /* Force order */ - - timebaseAddr->avail = TRUE; - - while (*(volatile int *)&(syncClkSpot.ready) == FALSE); - - if(pproc->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) - pproc->time_base_enable(pproc->cpu_id, TRUE); - - timebaseAddr->done = TRUE; - + cpu_timebase_signal_handler(proc_info, (struct SIGtimebase *)holdParm2); return; case CPRQsegload: @@ -611,7 +592,7 @@ cpu_signal_handler( case CPRQchud: parmAddr = (unsigned int *)holdParm2; /* Get the destination address */ if(perfCpuSigHook) { - struct savearea *ssp = current_act()->mact.pcb; + struct savearea *ssp = current_thread()->machine.pcb; if(ssp) { (perfCpuSigHook)(parmAddr[1] /* request */, ssp, 0, 0); } @@ -631,12 +612,9 @@ cpu_signal_handler( case CPRQsps: { - extern void ml_set_processor_speed_slave(unsigned long speed); - - ml_set_processor_speed_slave(holdParm2); - return; - } - + ml_set_processor_speed_slave(holdParm2); + return; + } default: panic("cpu_signal_handler: unknown CPU request - %08X\n", holdParm1); return; @@ -645,18 +623,18 @@ cpu_signal_handler( case SIGPdebug: /* Enter the debugger? */ - pproc->hwCtr.numSIGPdebug++; /* Count this one */ - debugger_is_slave[cpu]++; /* Bump up the count to show we're here */ + proc_info->hwCtr.numSIGPdebug++; /* Count this one */ + proc_info->debugger_is_slave++; /* Bump up the count to show we're here */ hw_atomic_sub(&debugger_sync, 1); /* Show we've received the 'rupt */ __asm__ volatile("tw 4,r3,r3"); /* Enter the debugger */ return; /* All done now... */ case SIGPwake: /* Wake up CPU */ - pproc->hwCtr.numSIGPwake++; /* Count this one */ + proc_info->hwCtr.numSIGPwake++; /* Count this one */ return; /* No need to do anything, the interrupt does it all... */ case SIGPcall: /* Call function on CPU */ - pproc->hwCtr.numSIGPcall++; /* Count this one */ + proc_info->hwCtr.numSIGPcall++; /* Count this one */ xfunc = holdParm1; /* Do this since I can't seem to figure C out */ xfunc(holdParm2); /* Call the passed function */ return; /* Done... */ @@ -675,204 +653,428 @@ cpu_signal_handler( panic("cpu_signal_handler: we should never get here\n"); } + /* - * Here is where we send a message to another processor. So far we only have two: - * SIGPast and SIGPdebug. SIGPast is used to preempt and kick off threads (this is - * currently disabled). SIGPdebug is used to enter the debugger. - * - * We set up the SIGP function to indicate that this is a simple message and set the - * order code (MPsigpParm0) to SIGPast or SIGPdebug). After finding the per_processor - * block for the target, we lock the message block. Then we set the parameter(s). - * Next we change the lock (also called "busy") to "passing" and finally signal - * the other processor. Note that we only wait about 1ms to get the message lock. - * If we time out, we return failure to our caller. It is their responsibility to - * recover. + * Routine: cpu_sync_timebase + * Function: */ - -kern_return_t -cpu_signal( - int target, - int signal, - unsigned int p1, - unsigned int p2) +void +cpu_sync_timebase( + void) { + natural_t tbu, tbl; + boolean_t intr; + struct SIGtimebase syncClkSpot; - unsigned int holdStat, holdParm0, holdParm1, holdParm2, mtype; - struct per_proc_info *tpproc, *mpproc; /* Area for per_proc addresses */ - int cpu; - int busybitset =0; + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ -#if DEBUG - if(target > NCPUS) panic("cpu_signal: invalid target CPU - %08X\n", target); -#endif + syncClkSpot.avail = FALSE; + syncClkSpot.ready = FALSE; + syncClkSpot.done = FALSE; - cpu = cpu_number(); /* Get our CPU number */ - if(target == cpu) return KERN_FAILURE; /* Don't play with ourselves */ - if(!machine_slot[target].running) return KERN_FAILURE; /* These guys are too young */ + while (cpu_signal(master_cpu, SIGPcpureq, CPRQtimebase, + (unsigned int)&syncClkSpot) != KERN_SUCCESS) + continue; - mpproc = &per_proc_info[cpu]; /* Point to our block */ - tpproc = &per_proc_info[target]; /* Point to the target's block */ + while (*(volatile int *)&(syncClkSpot.avail) == FALSE) + continue; - if (!(tpproc->cpu_flags & SignalReady)) return KERN_FAILURE; - - if((tpproc->MPsigpStat & MPsigpMsgp) == MPsigpMsgp) { /* Is there an unreceived message already pending? */ + isync(); - if(signal == SIGPwake) { /* SIGPwake can merge into all others... */ - mpproc->hwCtr.numSIGPmwake++; /* Account for merged wakes */ - return KERN_SUCCESS; - } + /* + * We do the following to keep the compiler from generating extra stuff + * in tb set part + */ + tbu = syncClkSpot.abstime >> 32; + tbl = (uint32_t)syncClkSpot.abstime; - if((signal == SIGPast) && (tpproc->MPsigpParm0 == SIGPast)) { /* We can merge ASTs */ - mpproc->hwCtr.numSIGPmast++; /* Account for merged ASTs */ - return KERN_SUCCESS; /* Don't bother to send this one... */ - } + mttb(0); + mttbu(tbu); + mttb(tbl); - if (tpproc->MPsigpParm0 == SIGPwake) { - if (hw_lock_mbits(&tpproc->MPsigpStat, (MPsigpMsgp | MPsigpAck), - (MPsigpBusy | MPsigpPass ), MPsigpBusy, 0)) { - busybitset = 1; - mpproc->hwCtr.numSIGPmwake++; - } - } - } - - if((busybitset == 0) && - (!hw_lock_mbits(&tpproc->MPsigpStat, MPsigpMsgp, 0, MPsigpBusy, - (gPEClockFrequencyInfo.timebase_frequency_hz >> 11)))) { /* Try to lock the message block with a .5ms timeout */ - mpproc->hwCtr.numSIGPtimo++; /* Account for timeouts */ - return KERN_FAILURE; /* Timed out, take your ball and go home... */ - } + syncClkSpot.ready = TRUE; - holdStat = MPsigpBusy | MPsigpPass | (MPsigpSigp << 8) | cpu; /* Set up the signal status word */ - tpproc->MPsigpParm0 = signal; /* Set message order */ - tpproc->MPsigpParm1 = p1; /* Set additional parm */ - tpproc->MPsigpParm2 = p2; /* Set additional parm */ - - __asm__ volatile("sync"); /* Make sure it's all there */ - - tpproc->MPsigpStat = holdStat; /* Set status and pass the lock */ - __asm__ volatile("eieio"); /* I'm a paraniod freak */ - - if (busybitset == 0) - PE_cpu_signal(mpproc->cpu_id, tpproc->cpu_id); /* Kick the other processor */ + while (*(volatile int *)&(syncClkSpot.done) == FALSE) + continue; - return KERN_SUCCESS; /* All is goodness and rainbows... */ + (void)ml_set_interrupts_enabled(intr); } + +/* + * Routine: cpu_timebase_signal_handler + * Function: + */ void -cpu_doshutdown( - void) +cpu_timebase_signal_handler( + struct per_proc_info *proc_info, + struct SIGtimebase *timebaseAddr) { - enable_preemption(); - processor_offline(current_processor()); + unsigned int tbu, tbu2, tbl; + + if(proc_info->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) + proc_info->time_base_enable(proc_info->cpu_id, FALSE); + + timebaseAddr->abstime = 0; /* Touch to force into cache */ + sync(); + + do { + asm volatile(" mftbu %0" : "=r" (tbu)); + asm volatile(" mftb %0" : "=r" (tbl)); + asm volatile(" mftbu %0" : "=r" (tbu2)); + } while (tbu != tbu2); + + timebaseAddr->abstime = ((uint64_t)tbu << 32) | tbl; + sync(); /* Force order */ + + timebaseAddr->avail = TRUE; + + while (*(volatile int *)&(timebaseAddr->ready) == FALSE); + + if(proc_info->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) + proc_info->time_base_enable(proc_info->cpu_id, TRUE); + + timebaseAddr->done = TRUE; } -void -cpu_sleep( - void) + +/* + * Routine: cpu_control + * Function: + */ +kern_return_t +cpu_control( + int slot_num, + processor_info_t info, + unsigned int count) { struct per_proc_info *proc_info; - unsigned int cpu, i; - unsigned int wait_ncpus_sleep, ncpus_sleep; - facility_context *fowner; - extern vm_offset_t intstack; - extern vm_offset_t debstack; - extern void _restart_cpu(void); + cpu_type_t tcpu_type; + cpu_subtype_t tcpu_subtype; + processor_pm_regs_t perf_regs; + processor_control_cmd_t cmd; + boolean_t oldlevel; +#define MMCR0_SUPPORT_MASK 0xf83f1fff +#define MMCR1_SUPPORT_MASK 0xffc00000 +#define MMCR2_SUPPORT_MASK 0x80000000 + + proc_info = PerProcTable[slot_num].ppe_vaddr; + tcpu_type = proc_info->cpu_type; + tcpu_subtype = proc_info->cpu_subtype; + cmd = (processor_control_cmd_t) info; - cpu = cpu_number(); + if (count < PROCESSOR_CONTROL_CMD_COUNT) + return(KERN_FAILURE); - proc_info = &per_proc_info[cpu]; + if ( tcpu_type != cmd->cmd_cpu_type || + tcpu_subtype != cmd->cmd_cpu_subtype) + return(KERN_FAILURE); - fowner = proc_info->FPU_owner; /* Cache this */ - if(fowner) fpu_save(fowner); /* If anyone owns FPU, save it */ - proc_info->FPU_owner = 0; /* Set no fpu owner now */ + if (perfmon_acquire_facility(current_task()) != KERN_SUCCESS) { + return(KERN_RESOURCE_SHORTAGE); /* cpu performance facility in use by another task */ + } - fowner = proc_info->VMX_owner; /* Cache this */ - if(fowner) vec_save(fowner); /* If anyone owns vectors, save it */ - proc_info->VMX_owner = 0; /* Set no vector owner now */ + switch (cmd->cmd_op) + { + case PROCESSOR_PM_CLR_PMC: /* Clear Performance Monitor Counters */ + switch (tcpu_subtype) + { + case CPU_SUBTYPE_POWERPC_750: + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + { + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + mtpmc1(0x0); + mtpmc2(0x0); + mtpmc3(0x0); + mtpmc4(0x0); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + return(KERN_SUCCESS); + } + default: + return(KERN_FAILURE); + } /* tcpu_subtype */ + case PROCESSOR_PM_SET_REGS: /* Set Performance Monitor Registors */ + switch (tcpu_subtype) + { + case CPU_SUBTYPE_POWERPC_750: + if (count < (PROCESSOR_CONTROL_CMD_COUNT + + PROCESSOR_PM_REGS_COUNT_POWERPC_750)) + return(KERN_FAILURE); + else + { + perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); + mtpmc1(PERFMON_PMC1(perf_regs)); + mtpmc2(PERFMON_PMC2(perf_regs)); + mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); + mtpmc3(PERFMON_PMC3(perf_regs)); + mtpmc4(PERFMON_PMC4(perf_regs)); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + return(KERN_SUCCESS); + } + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + if (count < (PROCESSOR_CONTROL_CMD_COUNT + + PROCESSOR_PM_REGS_COUNT_POWERPC_7400)) + return(KERN_FAILURE); + else + { + perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); + mtpmc1(PERFMON_PMC1(perf_regs)); + mtpmc2(PERFMON_PMC2(perf_regs)); + mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); + mtpmc3(PERFMON_PMC3(perf_regs)); + mtpmc4(PERFMON_PMC4(perf_regs)); + mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + return(KERN_SUCCESS); + } + default: + return(KERN_FAILURE); + } /* switch tcpu_subtype */ + case PROCESSOR_PM_SET_MMCR: + switch (tcpu_subtype) + { + case CPU_SUBTYPE_POWERPC_750: + if (count < (PROCESSOR_CONTROL_CMD_COUNT + + PROCESSOR_PM_REGS_COUNT_POWERPC_750)) + return(KERN_FAILURE); + else + { + perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); + mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + return(KERN_SUCCESS); + } + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + if (count < (PROCESSOR_CONTROL_CMD_COUNT + + PROCESSOR_PM_REGS_COUNT_POWERPC_7400)) + return(KERN_FAILURE); + else + { + perf_regs = (processor_pm_regs_t)cmd->cmd_pm_regs; + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + mtmmcr0(PERFMON_MMCR0(perf_regs) & MMCR0_SUPPORT_MASK); + mtmmcr1(PERFMON_MMCR1(perf_regs) & MMCR1_SUPPORT_MASK); + mtmmcr2(PERFMON_MMCR2(perf_regs) & MMCR2_SUPPORT_MASK); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + return(KERN_SUCCESS); + } + default: + return(KERN_FAILURE); + } /* tcpu_subtype */ + default: + return(KERN_FAILURE); + } /* switch cmd_op */ +} - if (proc_info->cpu_number == 0) { - proc_info->cpu_flags &= BootDone; - proc_info->istackptr = (vm_offset_t)&intstack + (INTSTACK_SIZE*(cpu+1)) - FM_SIZE; - proc_info->intstack_top_ss = proc_info->istackptr; -#if MACH_KDP || MACH_KDB - proc_info->debstackptr = (vm_offset_t)&debstack + (KERNEL_STACK_SIZE*(cpu+1)) - FM_SIZE; - proc_info->debstack_top_ss = proc_info->debstackptr; -#endif /* MACH_KDP || MACH_KDB */ - proc_info->interrupts_enabled = 0; - if (proc_info->start_paddr == EXCEPTION_VECTOR(T_RESET)) { - extern void _start_cpu(void); - - resethandler_target.type = RESET_HANDLER_START; - resethandler_target.call_paddr = (vm_offset_t)_start_cpu; /* Note: these routines are always V=R */ - resethandler_target.arg__paddr = (vm_offset_t)proc_info; /* Note: these routines are always V=R */ - - ml_phys_write((vm_offset_t)&ResetHandler + 0, - resethandler_target.type); - ml_phys_write((vm_offset_t)&ResetHandler + 4, - resethandler_target.call_paddr); - ml_phys_write((vm_offset_t)&ResetHandler + 8, - resethandler_target.arg__paddr); - - __asm__ volatile("sync"); - __asm__ volatile("isync"); - } +/* + * Routine: cpu_info_count + * Function: + */ +kern_return_t +cpu_info_count( + processor_flavor_t flavor, + unsigned int *count) +{ + cpu_subtype_t tcpu_subtype; - wait_ncpus_sleep = real_ncpus-1; - ncpus_sleep = 0; - while (wait_ncpus_sleep != ncpus_sleep) { - ncpus_sleep = 0; - for(i=1; i < real_ncpus ; i++) { - if ((*(volatile short *)&per_proc_info[i].cpu_flags) & SleepState) - ncpus_sleep++; - } - } + /* + * For now, we just assume that all CPUs are of the same type + */ + tcpu_subtype = PerProcTable[master_cpu].ppe_vaddr->cpu_subtype; + switch (flavor) { + case PROCESSOR_PM_REGS_INFO: + switch (tcpu_subtype) { + case CPU_SUBTYPE_POWERPC_750: + + *count = PROCESSOR_PM_REGS_COUNT_POWERPC_750; + return(KERN_SUCCESS); + + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + + *count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400; + return(KERN_SUCCESS); + + default: + *count = 0; + return(KERN_INVALID_ARGUMENT); + } /* switch tcpu_subtype */ + + case PROCESSOR_TEMPERATURE: + *count = PROCESSOR_TEMPERATURE_COUNT; + return (KERN_SUCCESS); + + default: + *count = 0; + return(KERN_INVALID_ARGUMENT); + } +} - PE_cpu_machine_quiesce(proc_info->cpu_id); + +/* + * Routine: cpu_info + * Function: + */ +kern_return_t +cpu_info( + processor_flavor_t flavor, + int slot_num, + processor_info_t info, + unsigned int *count) +{ + cpu_subtype_t tcpu_subtype; + processor_pm_regs_t perf_regs; + boolean_t oldlevel; + + tcpu_subtype = PerProcTable[slot_num].ppe_vaddr->cpu_subtype; + + switch (flavor) { + case PROCESSOR_PM_REGS_INFO: + + perf_regs = (processor_pm_regs_t) info; + + switch (tcpu_subtype) { + case CPU_SUBTYPE_POWERPC_750: + + if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_750) + return(KERN_FAILURE); + + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + PERFMON_MMCR0(perf_regs) = mfmmcr0(); + PERFMON_PMC1(perf_regs) = mfpmc1(); + PERFMON_PMC2(perf_regs) = mfpmc2(); + PERFMON_MMCR1(perf_regs) = mfmmcr1(); + PERFMON_PMC3(perf_regs) = mfpmc3(); + PERFMON_PMC4(perf_regs) = mfpmc4(); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + + *count = PROCESSOR_PM_REGS_COUNT_POWERPC_750; + return(KERN_SUCCESS); + + case CPU_SUBTYPE_POWERPC_7400: + case CPU_SUBTYPE_POWERPC_7450: + + if (*count < PROCESSOR_PM_REGS_COUNT_POWERPC_7400) + return(KERN_FAILURE); + + oldlevel = ml_set_interrupts_enabled(FALSE); /* disable interrupts */ + PERFMON_MMCR0(perf_regs) = mfmmcr0(); + PERFMON_PMC1(perf_regs) = mfpmc1(); + PERFMON_PMC2(perf_regs) = mfpmc2(); + PERFMON_MMCR1(perf_regs) = mfmmcr1(); + PERFMON_PMC3(perf_regs) = mfpmc3(); + PERFMON_PMC4(perf_regs) = mfpmc4(); + PERFMON_MMCR2(perf_regs) = mfmmcr2(); + ml_set_interrupts_enabled(oldlevel); /* enable interrupts */ + + *count = PROCESSOR_PM_REGS_COUNT_POWERPC_7400; + return(KERN_SUCCESS); + + default: + return(KERN_FAILURE); + } /* switch tcpu_subtype */ + + case PROCESSOR_TEMPERATURE: /* Get the temperature of a processor */ + + *info = -1; /* Get the temperature */ + return(KERN_FAILURE); + + default: + return(KERN_INVALID_ARGUMENT); + + } /* flavor */ } -void -cpu_sync_timebase( - void) + +/* + * Routine: cpu_to_processor + * Function: + */ +processor_t +cpu_to_processor( + int cpu) { - natural_t tbu, tbl; - boolean_t intr; + return ((processor_t)PerProcTable[cpu].ppe_vaddr->processor); +} - intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ - /* Note that syncClkSpot is in a cache aligned area */ - syncClkSpot.avail = FALSE; - syncClkSpot.ready = FALSE; - syncClkSpot.done = FALSE; +/* + * Routine: slot_type + * Function: + */ +cpu_type_t +slot_type( + int slot_num) +{ + return (PerProcTable[slot_num].ppe_vaddr->cpu_type); +} - while (cpu_signal(master_cpu, SIGPcpureq, CPRQtimebase, - (unsigned int)&syncClkSpot) != KERN_SUCCESS) - continue; - while (*(volatile int *)&(syncClkSpot.avail) == FALSE) - continue; +/* + * Routine: slot_subtype + * Function: + */ +cpu_subtype_t +slot_subtype( + int slot_num) +{ + return (PerProcTable[slot_num].ppe_vaddr->cpu_subtype); +} - isync(); - /* - * We do the following to keep the compiler from generating extra stuff - * in tb set part - */ - tbu = syncClkSpot.abstime >> 32; - tbl = (uint32_t)syncClkSpot.abstime; +/* + * Routine: slot_threadtype + * Function: + */ +cpu_threadtype_t +slot_threadtype( + int slot_num) +{ + return (PerProcTable[slot_num].ppe_vaddr->cpu_threadtype); +} - mttb(0); - mttbu(tbu); - mttb(tbl); - syncClkSpot.ready = TRUE; +/* + * Routine: cpu_type + * Function: + */ +cpu_type_t +cpu_type(void) +{ + return (getPerProc()->cpu_type); +} - while (*(volatile int *)&(syncClkSpot.done) == FALSE) - continue; - (void)ml_set_interrupts_enabled(intr); +/* + * Routine: cpu_subtype + * Function: + */ +cpu_subtype_t +cpu_subtype(void) +{ + return (getPerProc()->cpu_subtype); +} + + +/* + * Routine: cpu_threadtype + * Function: + */ +cpu_threadtype_t +cpu_threadtype(void) +{ + return (getPerProc()->cpu_threadtype); } /* diff --git a/osfmk/ppc/cpu_capabilities.h b/osfmk/ppc/cpu_capabilities.h index 9eb971a25..7c7539426 100644 --- a/osfmk/ppc/cpu_capabilities.h +++ b/osfmk/ppc/cpu_capabilities.h @@ -19,29 +19,16 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef PRIVATE #ifndef _PPC_CPU_CAPABILITIES_H #define _PPC_CPU_CAPABILITIES_H -/* Sadly, some clients of this interface misspell __APPLE_API_PRIVATE. - * To avoid breaking them, we accept the incorrect _APPLE_API_PRIVATE. - */ -#ifdef _APPLE_API_PRIVATE -#ifndef __APPLE_API_PRIVATE -#define __APPLE_API_PRIVATE -#endif /* __APPLE_API_PRIVATE */ -#endif /* _APPLE_API_PRIVATE */ - -#ifndef __APPLE_API_PRIVATE -#error cpu_capabilities.h is for Apple Internal use only -#else /* __APPLE_API_PRIVATE */ - /* _cpu_capabilities * * This is the authoritative way to determine from user mode what * implementation-specific processor features are available. - * This API only supported for Apple internal use. - * + * This API is only supported for Apple internal use. */ #ifndef __ASSEMBLER__ @@ -58,11 +45,12 @@ extern int _cpu_capabilities; #define kCache64 0x00000008 #define kCache128 0x00000010 #define kDcbaRecommended 0x00000020 // PPC: dcba is available and recommended -#define kDcbaAvailable 0x00000040 // PPC: dcba is available but is not recommended +#define kDcbaAvailable 0x00000040 // PPC: dcba is available (but may or may not be recommended) #define kDataStreamsRecommended 0x00000080 // PPC: dst, dstt, dstst, dss, and dssall instructions available and recommended -#define kDataStreamsAvailable 0x00000100 // PPC: dst, dstt, dstst, dss, and dssall instructions available but not recommended +#define kDataStreamsAvailable 0x00000100 // PPC: dst, dstt, dstst, dss, and dssall instructions available (may or may not be rec'd) #define kDcbtStreamsRecommended 0x00000200 // PPC: enhanced dcbt instruction available and recommended -#define kDcbtStreamsAvailable 0x00000400 // PPC: enhanced dcbt instruction available and recommended +#define kDcbtStreamsAvailable 0x00000400 // PPC: enhanced dcbt instruction available (but may or may not be recommended) +#define kFastThreadLocalStorage 0x00000800 // TLS ptr is kept in a user-mode-readable register #define kUP 0x00008000 // set if (kNumCPUs == 1) #define kNumCPUs 0x00FF0000 // number of CPUs (see _NumCPUs() below) @@ -102,12 +90,15 @@ static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) > #define _COMM_PAGE_BASE_ADDRESS (-8*4096) // start at page -8, ie 0xFFFF8000 #define _COMM_PAGE_AREA_LENGTH ( 7*4096) // reserved length of entire comm area #define _COMM_PAGE_AREA_USED ( 2*4096) // we use two pages so far + +/* The Objective-C runtime fixed address page to optimize message dispatch */ +#define _OBJC_PAGE_BASE_ADDRESS (-20*4096) // start at page -20, ie 0xFFFEC000 /* data in the comm page */ #define _COMM_PAGE_SIGNATURE (_COMM_PAGE_BASE_ADDRESS+0x000) // first few bytes are a signature #define _COMM_PAGE_VERSION (_COMM_PAGE_BASE_ADDRESS+0x01E) // 16-bit version# -#define _COMM_PAGE_THIS_VERSION 1 // this is version 1 of the commarea format +#define _COMM_PAGE_THIS_VERSION 2 // this is version 2 of the commarea format #define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_BASE_ADDRESS+0x020) // mirror of extern int _cpu_capabilities #define _COMM_PAGE_NCPUS (_COMM_PAGE_BASE_ADDRESS+0x021) // number of configured CPUs @@ -115,22 +106,30 @@ static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) > #define _COMM_PAGE_64_BIT (_COMM_PAGE_BASE_ADDRESS+0x025) // nonzero if 64-bit processor #define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_BASE_ADDRESS+0x026) // cache line size (16-bit field) -#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_BASE_ADDRESS+0x030) // 16 unused bytes +#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_BASE_ADDRESS+0x028) // 24 unused bytes #define _COMM_PAGE_2_TO_52 (_COMM_PAGE_BASE_ADDRESS+0x040) // double float constant 2**52 #define _COMM_PAGE_10_TO_6 (_COMM_PAGE_BASE_ADDRESS+0x048) // double float constant 10**6 - -#define _COMM_PAGE_UNUSED2 (_COMM_PAGE_BASE_ADDRESS+0x050) // 16 unused bytes +#define _COMM_PAGE_MAGIC_FE (_COMM_PAGE_BASE_ADDRESS+0x050) // magic constant 0xFEFEFEFEFEFEFEFF (to find 0s) +#define _COMM_PAGE_MAGIC_80 (_COMM_PAGE_BASE_ADDRESS+0x058) // magic constant 0x8080808080808080 (to find 0s) #define _COMM_PAGE_TIMEBASE (_COMM_PAGE_BASE_ADDRESS+0x060) // used by gettimeofday() #define _COMM_PAGE_TIMESTAMP (_COMM_PAGE_BASE_ADDRESS+0x068) // used by gettimeofday() #define _COMM_PAGE_SEC_PER_TICK (_COMM_PAGE_BASE_ADDRESS+0x070) // used by gettimeofday() -#define _COMM_PAGE_UNUSED3 (_COMM_PAGE_BASE_ADDRESS+0x080) // 384 unused bytes - /* jump table (bla to this address, which may be a branch to the actual code somewhere else) */ /* When new jump table entries are added, corresponding symbols should be added below */ +#define _COMM_PAGE_COMPARE_AND_SWAP32 (_COMM_PAGE_BASE_ADDRESS+0x080) // compare-and-swap word, no barrier +#define _COMM_PAGE_COMPARE_AND_SWAP64 (_COMM_PAGE_BASE_ADDRESS+0x0c0) // compare-and-swap doubleword, no barrier +#define _COMM_PAGE_ENQUEUE (_COMM_PAGE_BASE_ADDRESS+0x100) // enqueue +#define _COMM_PAGE_DEQUEUE (_COMM_PAGE_BASE_ADDRESS+0x140) // dequeue +#define _COMM_PAGE_MEMORY_BARRIER (_COMM_PAGE_BASE_ADDRESS+0x180) // memory barrier +#define _COMM_PAGE_ATOMIC_ADD32 (_COMM_PAGE_BASE_ADDRESS+0x1a0) // add atomic word +#define _COMM_PAGE_ATOMIC_ADD64 (_COMM_PAGE_BASE_ADDRESS+0x1c0) // add atomic doubleword + +#define _COMM_PAGE_UNUSED3 (_COMM_PAGE_BASE_ADDRESS+0x1e0) // 32 unused bytes + #define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_BASE_ADDRESS+0x200) // mach_absolute_time() #define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_BASE_ADDRESS+0x220) // spinlock_try() #define _COMM_PAGE_SPINLOCK_LOCK (_COMM_PAGE_BASE_ADDRESS+0x260) // spinlock_lock() @@ -140,7 +139,9 @@ static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) > #define _COMM_PAGE_FLUSH_DCACHE (_COMM_PAGE_BASE_ADDRESS+0x4e0) // sys_dcache_flush() #define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_BASE_ADDRESS+0x520) // sys_icache_invalidate() #define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_BASE_ADDRESS+0x580) // pthread_self() + #define _COMM_PAGE_UNUSED4 (_COMM_PAGE_BASE_ADDRESS+0x5a0) // 32 unused bytes + #define _COMM_PAGE_RELINQUISH (_COMM_PAGE_BASE_ADDRESS+0x5c0) // used by spinlocks #define _COMM_PAGE_UNUSED5 (_COMM_PAGE_BASE_ADDRESS+0x5e0) // 32 unused bytes @@ -150,11 +151,13 @@ static __inline__ int _NumCPUs( void ) { return (_cpu_capabilities & kNumCPUs) > #define _COMM_PAGE_MEMCPY (_COMM_PAGE_BASE_ADDRESS+0x7a0) // memcpy() #define _COMM_PAGE_MEMMOVE (_COMM_PAGE_BASE_ADDRESS+0x7a0) // memmove() -#define _COMM_PAGE_UNUSED6 (_COMM_PAGE_BASE_ADDRESS+0xF80) // 128 unused bytes +#define _COMM_PAGE_COMPARE_AND_SWAP32B (_COMM_PAGE_BASE_ADDRESS+0xf80) // compare-and-swap word w barrier +#define _COMM_PAGE_COMPARE_AND_SWAP64B (_COMM_PAGE_BASE_ADDRESS+0xfc0) // compare-and-swap doubleword w barrier -#define _COMM_PAGE_BIGCOPY (_COMM_PAGE_BASE_ADDRESS+0x1000)// very-long-operand copies +#define _COMM_PAGE_MEMSET_PATTERN (_COMM_PAGE_BASE_ADDRESS+0x1000)// used by nonzero memset() +#define _COMM_PAGE_BIGCOPY (_COMM_PAGE_BASE_ADDRESS+0x1140)// very-long-operand copies -#define _COMM_PAGE_END (_COMM_PAGE_BASE_ADDRESS+0x1600)// end of common page +#define _COMM_PAGE_END (_COMM_PAGE_BASE_ADDRESS+0x1700)// end of commpage area #ifdef __ASSEMBLER__ #ifdef __COMM_PAGE_SYMBOLS @@ -165,6 +168,13 @@ symbol_name: nop .text // Required to make a well behaved symbol file + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32, _COMM_PAGE_COMPARE_AND_SWAP32) + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64, _COMM_PAGE_COMPARE_AND_SWAP64) + CREATE_COMM_PAGE_SYMBOL(___atomic_enqueue, _COMM_PAGE_ENQUEUE) + CREATE_COMM_PAGE_SYMBOL(___atomic_dequeue, _COMM_PAGE_DEQUEUE) + CREATE_COMM_PAGE_SYMBOL(___memory_barrier, _COMM_PAGE_MEMORY_BARRIER) + CREATE_COMM_PAGE_SYMBOL(___atomic_add32, _COMM_PAGE_ATOMIC_ADD32) + CREATE_COMM_PAGE_SYMBOL(___atomic_add64, _COMM_PAGE_ATOMIC_ADD64) CREATE_COMM_PAGE_SYMBOL(___mach_absolute_time, _COMM_PAGE_ABSOLUTE_TIME) CREATE_COMM_PAGE_SYMBOL(___spin_lock_try, _COMM_PAGE_SPINLOCK_TRY) CREATE_COMM_PAGE_SYMBOL(___spin_lock, _COMM_PAGE_SPINLOCK_LOCK) @@ -179,7 +189,11 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___bcopy, _COMM_PAGE_BCOPY) CREATE_COMM_PAGE_SYMBOL(___memcpy, _COMM_PAGE_MEMCPY) // CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap32b, _COMM_PAGE_COMPARE_AND_SWAP32B) + CREATE_COMM_PAGE_SYMBOL(___compare_and_swap64b, _COMM_PAGE_COMPARE_AND_SWAP64B) + CREATE_COMM_PAGE_SYMBOL(___memset_pattern, _COMM_PAGE_MEMSET_PATTERN) CREATE_COMM_PAGE_SYMBOL(___bigcopy, _COMM_PAGE_BIGCOPY) + CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END) .data // Required to make a well behaved symbol file @@ -188,5 +202,5 @@ symbol_name: nop #endif /* __COMM_PAGE_SYMBOLS */ #endif /* __ASSEMBLER__ */ -#endif /* __APPLE_API_PRIVATE */ #endif /* _PPC_CPU_CAPABILITIES_H */ +#endif /* PRIVATE */ diff --git a/osfmk/ppc/cpu_data.h b/osfmk/ppc/cpu_data.h index 19f7e59d7..c5021b134 100644 --- a/osfmk/ppc/cpu_data.h +++ b/osfmk/ppc/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,48 +27,31 @@ #ifndef PPC_CPU_DATA #define PPC_CPU_DATA -typedef struct -{ - int preemption_level; - int simple_lock_count; - int interrupt_level; -} cpu_data_t; +#ifdef MACH_KERNEL_PRIVATE -#define disable_preemption _disable_preemption -#define enable_preemption _enable_preemption -#define enable_preemption_no_check _enable_preemption_no_check -#define mp_disable_preemption _disable_preemption -#define mp_enable_preemption _enable_preemption -#define mp_enable_preemption_no_check _enable_preemption_no_check +#include +#include -extern __inline__ thread_act_t current_act(void) +extern thread_t current_thread(void); +extern __inline__ thread_t current_thread(void) { - thread_act_t act; - __asm__ volatile("mfsprg %0,1" : "=r" (act)); - return act; -}; + thread_t result; -/* - * Note that the following function is ONLY guaranteed when preemption or interrupts are disabled - */ -extern __inline__ struct per_proc_info *getPerProc(void) -{ - struct per_proc_info *perproc; - __asm__ volatile("mfsprg %0,0" : "=r" (perproc)); - return perproc; -}; + __asm__ volatile("mfsprg %0,1" : "=r" (result)); -#define current_thread() current_act()->thread + return (result); +} -extern void set_machine_current_act(thread_act_t); +#define getPerProc() current_thread()->machine.PerProc extern int get_preemption_level(void); -extern void disable_preemption(void); -extern void enable_preemption(void); -extern void enable_preemption_no_check(void); -extern void mp_disable_preemption(void); -extern void mp_enable_preemption(void); -extern void mp_enable_preemption_no_check(void); -extern int get_simple_lock_count(void); +extern void _enable_preemption_no_check(void); + +#define enable_preemption_no_check() _enable_preemption_no_check() +#define mp_disable_preemption() _disable_preemption() +#define mp_enable_preemption() _enable_preemption() +#define mp_enable_preemption_no_check() _enable_preemption_no_check() + +#endif /* MACH_KERNEL_PRIVATE */ #endif /* PPC_CPU_DATA */ diff --git a/osfmk/ppc/cpu_internal.h b/osfmk/ppc/cpu_internal.h new file mode 100644 index 000000000..034f5167b --- /dev/null +++ b/osfmk/ppc/cpu_internal.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +#ifndef _PPC_CPU_INTERNAL_H_ +#define _PPC_CPU_INTERNAL_H_ + +#include +#include + +extern void _start_cpu( + void); + +extern void cpu_bootstrap( + void); + +extern void cpu_init( + void); + +extern void cpu_machine_init( + void); + +extern void cpu_doshutdown( + void); + +extern void cpu_signal_handler( + void); + +extern kern_return_t cpu_signal( + int target, + int signal, + unsigned int p1, + unsigned int p2); + +#define SIGPast 0 /* Requests an ast on target processor */ +#define SIGPcpureq 1 /* Requests CPU specific function */ +#define SIGPdebug 2 /* Requests a debugger entry */ +#define SIGPwake 3 /* Wake up a sleeping processor */ +#define SIGPcall 4 /* Call a function on a processor */ + +#define CPRQtimebase 1 /* Get timebase of processor */ +#define CPRQsegload 2 /* Segment registers reload */ +#define CPRQscom 3 /* SCOM */ +#define CPRQchud 4 /* CHUD perfmon */ +#define CPRQsps 5 /* Set Processor Speed */ + + +extern struct per_proc_info * cpu_per_proc_alloc( + void); + +extern void cpu_per_proc_free( + struct per_proc_info *per_proc); + +extern void * console_per_proc_alloc( + boolean_t boot_processor); + +extern void console_per_proc_free( + void *per_proc_cbfr); + +extern void * chudxnu_per_proc_alloc( + boolean_t boot_processor); + +extern void chudxnu_per_proc_free( + void *per_proc_chud); + +extern kern_return_t cpu_per_proc_register( + struct per_proc_info *proc_info); + +extern unsigned int real_ncpus; +extern unsigned int max_ncpus; + +#endif /* _PPC_CPU_INTERNAL_H_ */ diff --git a/osfmk/ppc/cpu_number.h b/osfmk/ppc/cpu_number.h index 1c3626e58..ab47c79e9 100644 --- a/osfmk/ppc/cpu_number.h +++ b/osfmk/ppc/cpu_number.h @@ -22,13 +22,13 @@ /* * @OSF_COPYRIGHT@ */ +#ifdef KERNEL_PRIVATE + #ifndef _PPC_CPU_NUMBER_H_ #define _PPC_CPU_NUMBER_H_ -#include - -#ifdef __APPLE_API_UNSTABLE extern int cpu_number(void); -#endif #endif /* _PPC_CPU_NUMBER_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/cswtch.s b/osfmk/ppc/cswtch.s index 8dad11188..cd131bf4f 100644 --- a/osfmk/ppc/cswtch.s +++ b/osfmk/ppc/cswtch.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,7 +25,6 @@ #include #include -#include #include #include #include @@ -48,10 +47,11 @@ .globl EXT(machine_load_context) LEXT(machine_load_context) - mfsprg r6,0 + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block lwz r0,PP_INTSTACK_TOP_SS(r6) stw r0,PP_ISTACKPTR(r6) - lwz r9,THREAD_TOP_ACT(r3) /* Set up the current thread */ + mr r9,r3 /* Set up the current thread */ mtsprg 1,r9 li r0,0 /* Clear a register */ lwz r3,ACT_MACT_PCB(r9) /* Get the savearea used */ @@ -78,17 +78,20 @@ LEXT(machine_load_context) * */ -/* void Call_continuation( void (*continuation)(void), vm_offset_t stack_ptr) +/* void Call_continuation( void (*continuation)(void), void *param, wait_result_t wresult, vm_offset_t stack_ptr) */ .align 5 .globl EXT(Call_continuation) LEXT(Call_continuation) - - mtlr r3 - mr r1, r4 /* Load new stack pointer */ - blr /* Jump to the continuation */ + mtlr r3 /* continuation */ + mr r3,r4 /* parameter */ + mr r4,r5 /* wait result */ + mr r1,r6 /* Load new stack pointer */ + blrl /* Jump to the continuation */ + mfsprg r3,1 + b EXT(thread_terminate) /* * Get the old kernel stack, and store into the thread structure. @@ -116,7 +119,7 @@ LEXT(Call_continuation) LEXT(Switch_context) - mfsprg r12,0 ; Get the per_proc block + lwz r12,ACT_PER_PROC(r3) ; Get the per_proc block #if DEBUG lwz r0,PP_ISTACKPTR(r12) ; (DEBUG/TRACE) make sure we are not mr. r0,r0 ; (DEBUG/TRACE) on the interrupt @@ -124,23 +127,25 @@ LEXT(Switch_context) BREAKPOINT_TRAP notonintstack: #endif - lwz r5,THREAD_TOP_ACT(r5) ; Get the new activation lwz r8,ACT_MACT_PCB(r5) ; Get the PCB for the new guy - lwz r9,cioSpace(r5) ; Get copyin/out address space + lwz r9,umwSpace(r5) ; Get user memory window address space cmpwi cr1,r4,0 ; Remeber if there is a continuation - used waaaay down below - lwz r7,CTHREAD_SELF(r5) ; Pick up the user assist word + lwz r0,CTHREAD_SELF+0(r5) ; Pick up the user assist "word" (actually a double) + lwz r7,CTHREAD_SELF+4(r5) ; both halves lwz r11,ACT_MACT_BTE(r5) ; Get BlueBox Task Environment - lwz r6,cioRelo(r5) ; Get copyin/out relocation top + lwz r6,umwRelo(r5) ; Get user memory window relocation top + stw r12,ACT_PER_PROC(r5) ; Set per_proc in new activation mtsprg 1,r5 - lwz r2,cioRelo+4(r5) ; Get copyin/out relocation bottom + lwz r2,umwRelo+4(r5) ; Get user memory window relocation bottom - stw r7,UAW(r12) ; Save the assist word for the "ultra fast path" + stw r0,UAW+0(r12) ; Save the assist word for the "ultra fast path" + stw r7,UAW+4(r12) lwz r7,ACT_MACT_SPF(r5) ; Get the special flags - sth r9,ppCIOmp+mpSpace(r12) ; Save the space - stw r6,ppCIOmp+mpNestReloc(r12) ; Save top part of physical address - stw r2,ppCIOmp+mpNestReloc+4(r12) ; Save bottom part of physical address + sth r9,ppUMWmp+mpSpace(r12) ; Save the space + stw r6,ppUMWmp+mpNestReloc(r12) ; Save top part of physical address + stw r2,ppUMWmp+mpNestReloc+4(r12) ; Save bottom part of physical address stw r11,ppbbTaskEnv(r12) ; Save the bb task env lwz r2,traceMask(0) ; Get the enabled traces stw r7,spcFlags(r12) ; Set per_proc copy of the special flags @@ -150,7 +155,7 @@ notonintstack: ori r0,r0,lo16(CutTrace) ; Trace FW call beq++ cswNoTrc ; No trace today, dude... mr r10,r3 ; Save across trace - lwz r2,THREAD_TOP_ACT(r3) ; Trace old activation + mr r2,r3 ; Trace old activation mr r3,r11 ; Trace prev savearea sc ; Cut trace entry of context switch mr r3,r10 ; Restore @@ -158,7 +163,6 @@ notonintstack: cswNoTrc: lwz r2,curctx(r5) ; Grab our current context pointer lwz r10,FPUowner(r12) ; Grab the owner of the FPU lwz r9,VMXowner(r12) ; Grab the owner of the vector - lhz r0,PP_CPU_NUMBER(r12) ; Get our CPU number mfmsr r6 ; Get the MSR because the switched to thread should inherit it stw r11,ACT_MACT_PCB(r5) ; Dequeue the savearea we are switching to li r0,1 ; Get set to hold off quickfret @@ -173,6 +177,7 @@ cswNoTrc: lwz r2,curctx(r5) ; Grab our current context pointer bne++ cswnofloat ; Float is not ours... cmplw r10,r11 ; Is the level the same? + lhz r0,PP_CPU_NUMBER(r12) ; Get our CPU number lwz r5,FPUcpu(r2) ; Get the owning cpu bne++ cswnofloat ; Level not the same, this is not live... @@ -220,6 +225,7 @@ cswnofloat: bne++ cr5,cswnovect ; Vector is not ours... lwz r10,VMXlevel(r2) ; Get the live level cmplw r10,r11 ; Is the level the same? + lhz r0,PP_CPU_NUMBER(r12) ; Get our CPU number lwz r5,VMXcpu(r2) ; Get the owning cpu bne++ cswnovect ; Level not the same, this is not live... @@ -270,7 +276,7 @@ cswnovect: li r0,0 ; Get set to release quickfret holdoff lwz r9,SAVflags(r8) /* Get the flags */ lis r0,hi16(SwitchContextCall) /* Top part of switch context */ - li r10,MSR_SUPERVISOR_INT_OFF /* Get the switcher's MSR */ + li r10,(MASK(MSR_ME)|MASK(MSR_DR)) /* Get the switcher's MSR */ ori r0,r0,lo16(SwitchContextCall) /* Bottom part of switch context */ stw r10,savesrr1+4(r8) /* Set up for switch in */ rlwinm r9,r9,0,15,13 /* Reset the syscall flag */ @@ -313,6 +319,8 @@ swtchtocont: * with translation on. If we could, this should be done in lowmem_vectors * before translation is turned on. But we can't, dang it! * + * switch_in() runs with DR on and IR off + * * R3 = switcher's savearea (32-bit virtual) * saver4 = old thread in switcher's save * saver5 = new SRR0 in switcher's save @@ -331,7 +339,7 @@ LEXT(switch_in) lwz r5,saver5+4(r3) ; Get the srr0 value mfsprg r0,2 ; Get feature flags - lwz r9,THREAD_TOP_ACT(r4) ; Get the switched from ACT + mr r9,r4 ; Get the switched from ACT lwz r6,saver6+4(r3) ; Get the srr1 value rlwinm. r0,r0,0,pf64Bitb,pf64Bitb ; Check for 64-bit lwz r10,ACT_MACT_PCB(r9) ; Get the top PCB on the old thread @@ -376,7 +384,8 @@ LEXT(fpu_save) mtmsr r2 ; Set the MSR isync - mfsprg r6,0 ; Get the per_processor block + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block lwz r12,FPUowner(r6) ; Get the context ID for owner #if FPVECDBG @@ -428,7 +437,8 @@ fsgoodcpu: lwz r3,FPUsave(r12) ; Get the current FPU savearea for the threa fsneedone: bl EXT(save_get) ; Get a savearea for the context - mfsprg r6,0 ; Get back per_processor block + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block li r4,SAVfloat ; Get floating point tag lwz r12,FPUowner(r6) ; Get back our thread stb r4,SAVflags+2(r3) ; Mark this savearea as a float @@ -485,9 +495,9 @@ LEXT(fpu_switch) stw r1,0(r3) #endif /* DEBUG */ - mfsprg r26,0 ; Get the per_processor block + mfsprg r17,1 ; Get the current activation + lwz r26,ACT_PER_PROC(r17) ; Get the per_proc block mfmsr r19 ; Get the current MSR - mfsprg r17,1 ; Get the current thread mr r25,r4 ; Save the entry savearea lwz r22,FPUowner(r26) ; Get the thread that owns the FPU @@ -578,7 +588,7 @@ fswsync: lwarx r19,0,r15 ; Get the sync word li r0,1 ; Get the lock cmplwi cr1,r19,0 ; Is it unlocked? stwcx. r0,0,r15 ; Store lock and test reservation - cror cr0_eq,cr1_eq,cr0_eq ; Combine lost reservation and previously locked + crand cr0_eq,cr1_eq,cr0_eq ; Combine lost reservation and previously locked bne-- fswsync ; Try again if lost reservation or locked... isync ; Toss speculation @@ -649,11 +659,12 @@ fsnosave: lwz r15,ACT_MACT_PCB(r17) ; Get the current level of the "new" one sc ; (TEST/DEBUG) #endif - lis r18,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r19,r19,ppSize ; Find offset to the owner per_proc - ori r18,r18,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r18,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r19,r19,ppeSize ; Find offset to the owner per_proc_entry + ori r18,r18,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r16,FPUowner ; Displacement to float owner - add r19,r18,r19 ; Point to the owner per_proc + add r19,r18,r19 ; Point to the owner per_proc_entry + lwz r19,ppe_vaddr(r19) ; Point to the owner per_proc fsinvothr: lwarx r18,r16,r19 ; Get the owner sub r0,r18,r29 ; Subtract one from the other @@ -866,7 +877,8 @@ LEXT(toss_live_fpu) isync beq+ tlfnotours ; Floats off, can not be live here... - mfsprg r8,0 ; Get the per proc + mfsprg r8,1 ; Get the current activation + lwz r8,ACT_PER_PROC(r8) ; Get the per_proc block ; ; Note that at this point, since floats are on, we are the owner @@ -882,11 +894,12 @@ LEXT(toss_live_fpu) mtfsf 0xFF,f1 ; Clear it tlfnotours: lwz r11,FPUcpu(r3) ; Get the cpu on which we last loaded context - lis r12,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r11,r11,ppSize ; Find offset to the owner per_proc - ori r12,r12,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r12,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r11,r11,ppeSize ; Find offset to the owner per_proc_entry + ori r12,r12,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r10,FPUowner ; Displacement to float owner - add r11,r12,r11 ; Point to the owner per_proc + add r11,r12,r11 ; Point to the owner per_proc_entry + lwz r11,ppe_vaddr(r11) ; Point to the owner per_proc tlfinvothr: lwarx r12,r10,r11 ; Get the owner @@ -942,7 +955,8 @@ LEXT(vec_save) mtmsr r2 ; Set the MSR isync - mfsprg r6,0 ; Get the per_processor block + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block lwz r12,VMXowner(r6) ; Get the context ID for owner #if FPVECDBG @@ -1016,7 +1030,8 @@ vsneedone: mr. r10,r10 ; Is VRsave set to 0? bl EXT(save_get) ; Get a savearea for the context - mfsprg r6,0 ; Get back per_processor block + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block li r4,SAVvector ; Get vector tag lwz r12,VMXowner(r6) ; Get back our context ID stb r4,SAVflags+2(r3) ; Mark this savearea as a vector @@ -1074,9 +1089,9 @@ LEXT(vec_switch) stw r1,0(r3) #endif /* DEBUG */ - mfsprg r26,0 ; Get the per_processor block + mfsprg r17,1 ; Get the current activation + lwz r26,ACT_PER_PROC(r17) ; Get the per_proc block mfmsr r19 ; Get the current MSR - mfsprg r17,1 ; Get the current thread mr r25,r4 ; Save the entry savearea oris r19,r19,hi16(MASK(MSR_VEC)) ; Enable the vector feature @@ -1189,7 +1204,7 @@ vswsync: lwarx r19,0,r15 ; Get the sync word li r0,1 ; Get the lock cmplwi cr1,r19,0 ; Is it unlocked? stwcx. r0,0,r15 ; Store lock and test reservation - cror cr0_eq,cr1_eq,cr0_eq ; Combine lost reservation and previously locked + crand cr0_eq,cr1_eq,cr0_eq ; Combine lost reservation and previously locked bne-- vswsync ; Try again if lost reservation or locked... isync ; Toss speculation @@ -1271,14 +1286,15 @@ vsnosave: vspltisb v31,-10 ; Get 0xF6F6F6F6 sc ; (TEST/DEBUG) #endif - lis r18,hi16(EXT(per_proc_info)) ; Set base per_proc + lis r18,hi16(EXT(PerProcTable)) ; Set base PerProcTable vspltisb v28,-2 ; Get 0xFEFEFEFE - mulli r19,r19,ppSize ; Find offset to the owner per_proc + mulli r19,r19,ppeSize ; Find offset to the owner per_proc_entry vsubuhm v31,v31,v29 ; Get 0xDEDADEDA - ori r18,r18,lo16(EXT(per_proc_info)) ; Set base per_proc + ori r18,r18,lo16(EXT(PerProcTable)) ; Set base PerProcTable vpkpx v30,v28,v3 ; Get 0x7FFF7FFF li r16,VMXowner ; Displacement to vector owner - add r19,r18,r19 ; Point to the owner per_proc + add r19,r18,r19 ; Point to the owner per_proc_entry + lwz r19,ppe_vaddr(r19) ; Point to the owner per_proc vrlb v31,v31,v29 ; Get 0xDEADDEAD vsinvothr: lwarx r18,r16,r19 ; Get the owner @@ -1444,7 +1460,8 @@ LEXT(toss_live_vec) isync beq+ tlvnotours ; Vector off, can not be live here... - mfsprg r8,0 ; Get the per proc + mfsprg r8,1 ; Get the current activation + lwz r8,ACT_PER_PROC(r8) ; Get the per_proc block ; ; Note that at this point, since vecs are on, we are the owner @@ -1463,11 +1480,12 @@ LEXT(toss_live_vec) mtvscr v1 ; Set the non-java, no saturate status tlvnotours: lwz r11,VMXcpu(r3) ; Get the cpu on which we last loaded context - lis r12,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r11,r11,ppSize ; Find offset to the owner per_proc - ori r12,r12,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r12,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r11,r11,ppeSize ; Find offset to the owner per_proc_entry + ori r12,r12,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r10,VMXowner ; Displacement to vector owner - add r11,r12,r11 ; Point to the owner per_proc + add r11,r12,r11 ; Point to the owner per_proc_entry + lwz r11,ppe_vaddr(r11) ; Point to the owner per_proc li r0,0 ; Set a 0 to invalidate context tlvinvothr: lwarx r12,r10,r11 ; Get the owner @@ -1505,11 +1523,12 @@ LEXT(vec_trash) bnelr+ ; No, we do nothing... lwz r11,VMXcpu(r3) ; Get the cpu on which we last loaded context - lis r12,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r11,r11,ppSize ; Find offset to the owner per_proc - ori r12,r12,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r12,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r11,r11,ppeSize ; Find offset to the owner per_proc_entry + ori r12,r12,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r10,VMXowner ; Displacement to vector owner - add r11,r12,r11 ; Point to the owner per_proc + add r11,r12,r11 ; Point to the owner per_proc_entry + lwz r11,ppe_vaddr(r11) ; Point to the owner per_proc vtinvothr: lwarx r12,r10,r11 ; Get the owner diff --git a/osfmk/ppc/db_interface.c b/osfmk/ppc/db_interface.c index e019b5e94..55dda17cb 100644 --- a/osfmk/ppc/db_interface.c +++ b/osfmk/ppc/db_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -23,7 +23,6 @@ * @OSF_COPYRIGHT@ */ -#include #include #include #include @@ -37,12 +36,12 @@ #include #include -#include #include #include #include #include #include +#include #include #include #include @@ -54,6 +53,8 @@ #include #include #include +#include +#include #include #include @@ -69,7 +70,6 @@ struct savearea *ppc_last_saved_statep; struct savearea ppc_nested_saved_state; unsigned ppc_last_kdb_sp; -extern int debugger_active[NCPUS]; /* Debugger active on CPU */ extern int debugger_cpu; /* Current cpu running debugger */ int db_all_set_up = 0; @@ -143,8 +143,6 @@ void kdp_register_send_receive(void) {} #endif extern jmp_buf_t *db_recover; -spl_t saved_ipl[NCPUS]; /* just to know what IPL was before trap */ -struct savearea *saved_state[NCPUS]; /* * kdb_trap - field a TRACE or BPT trap @@ -177,7 +175,7 @@ kdb_trap( db_printf("type %d", type); else db_printf("%s", trap_type[type]); - db_printf(" trap, pc = %x\n", + db_printf(" trap, pc = %llx\n", regs->save_srr0); db_error(""); /*NOTREACHED*/ @@ -185,7 +183,7 @@ kdb_trap( kdbprinttrap(type, code, (int *)®s->save_srr0, regs->save_r1); } - saved_state[cpu_number()] = regs; + getPerProc()->db_saved_state = regs; ppc_last_saved_statep = regs; ppc_last_kdb_sp = (unsigned) &type; @@ -209,13 +207,13 @@ kdb_trap( (db_get_task_value(regs->save_srr0, BKPT_SIZE, FALSE, - db_target_space(current_act(), + db_target_space(current_thread(), trap_from_user)) == BKPT_INST)) regs->save_srr0 += BKPT_SIZE; kdb_exit: - saved_state[cpu_number()] = 0; + getPerProc()->db_saved_state = 0; switch_to_old_console(previous_console_device); } @@ -388,8 +386,8 @@ db_check_access( if (kernel_task == TASK_NULL) return(TRUE); task = kernel_task; } else if (task == TASK_NULL) { - if (current_act() == THR_ACT_NULL) return(FALSE); - task = current_act()->task; + if (current_thread() == THR_ACT_NULL) return(FALSE); + task = current_thread()->task; } while (size > 0) { @@ -416,9 +414,9 @@ db_phys_eq( return FALSE; if (task1 == TASK_NULL) { /* See if there is a task active */ - if (current_act() == THR_ACT_NULL) /* See if there is a current task */ + if (current_thread() == THR_ACT_NULL) /* See if there is a current task */ return FALSE; - task1 = current_act()->task; /* If so, use that one */ + task1 = current_thread()->task; /* If so, use that one */ } if(!(physa = db_vtophys(task1->map->pmap, (vm_offset_t)trunc_page_32(addr1)))) return FALSE; /* Get real address of the first */ @@ -543,7 +541,7 @@ kdb_on( int cpu) { KDB_SAVE_CTXT(); - if (cpu < 0 || cpu >= NCPUS || !debugger_active[cpu]) + if (cpu < 0 || cpu >= real_ncpus || !PerProcTable[cpu].ppe_vaddr->debugger_active) return; db_set_breakpoints(); db_set_watchpoints(); @@ -562,6 +560,9 @@ kdb_on( /* * system reboot */ + +extern int (*PE_halt_restart)(unsigned int type); + void db_reboot( db_expr_t addr, boolean_t have_addr, @@ -578,7 +579,11 @@ void db_reboot( if (c == 'h') /* halt */ reboot = FALSE; } - halt_all_cpus(reboot); + if(!reboot) halt_all_cpus(FALSE); /* If no reboot, try to be clean about it */ + + if (PE_halt_restart) return (*PE_halt_restart)(kPERestartCPU); + db_printf("Sorry, system can't reboot automatically yet... You need to do it by hand...\n"); + } /* diff --git a/osfmk/ppc/db_low_trace.c b/osfmk/ppc/db_low_trace.c index d64f6615b..9bf612b48 100644 --- a/osfmk/ppc/db_low_trace.c +++ b/osfmk/ppc/db_low_trace.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -263,7 +263,7 @@ void db_display_mappings(db_expr_t addr, int have_addr, db_expr_t count, char * pmap_t pmap; addr64_t lnextva; - mapping *mp; + mapping_t *mp; if (db_expression(&xspace)) { /* Get the address space requested */ if(xspace >= maxAdrSp) { @@ -318,7 +318,7 @@ void db_display_hash(db_expr_t addr, int have_addr, db_expr_t count, char * modi llva = (addr64_t)((unsigned int)addr); /* Make sure we are 64-bit now */ - s4bit = !((per_proc_info[0].pf.Available & pf64Bit) == 0); /* Are we a big guy? */ + s4bit = !((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) == 0); /* Are we a big guy? */ if (db_expression(&xspace)) { /* Get the address space requested */ if(xspace >= maxAdrSp) { db_printf("requested address space (%llX) larger than max (%X)\n", xspace, maxAdrSp - 1); @@ -466,7 +466,7 @@ void db_dumpmapping(struct mapping *mp) { /* Dump out a mapping */ pmapTrans[mp->mpSpace].pmapPAddr); /* Header */ db_printf(" mpFlags: %08X\n", mp->mpFlags); db_printf(" mpSpace: %04X\n", mp->mpSpace); - db_printf(" mpBSize: %04X\n", mp->mpBSize); + db_printf(" mpBSize: %04X\n", mp->u.mpBSize); db_printf(" mpPte: %08X\n", mp->mpPte); db_printf(" mpPAddr: %08X\n", mp->mpPAddr); db_printf(" mpVAddr: %016llX\n", mp->mpVAddr); @@ -492,7 +492,7 @@ void db_dumppca(unsigned int ptegindex) { int i, s4bit; unsigned long long llslot, llseg, llhash; - s4bit = !((per_proc_info[0].pf.Available & pf64Bit) == 0); /* Are we a big guy? */ + s4bit = !((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) == 0); /* Are we a big guy? */ pteg = hash_table_base + (ptegindex << 6); /* Point to the PTEG */ if(s4bit) pteg = hash_table_base + (ptegindex << 7); /* Point to the PTEG */ @@ -563,7 +563,7 @@ void db_display_virtual(db_expr_t addr, int have_addr, db_expr_t count, char * m db_expr_t xspace; pmap_t pmap; - mapping *mp, *mpv; + mapping_t *mp, *mpv; addr64_t pa; ppnum_t pnum; @@ -605,7 +605,7 @@ void db_display_virtual(db_expr_t addr, int have_addr, db_expr_t count, char * m db_printf("%016llX %08X %08X %08X %08X %08X %08X %08X %08X\n", addr, /* Print a line */ xbuf[0], xbuf[1], xbuf[2], xbuf[3], xbuf[4], xbuf[5], xbuf[6], xbuf[7]); - addr = (db_expr_t)((unsigned int)addr + 0x00000020); /* Point to next address */ + addr = (db_expr_t)(addr + 0x00000020); /* Point to next address */ pa = pa + 0x00000020; /* Point to next address */ } db_next = addr; @@ -663,14 +663,14 @@ void db_display_save(db_expr_t addr, int have_addr, db_expr_t count, char * modi db_printf("\nTask %4d @%08X:\n", tottasks, task); /* Show where we're at */ for(act = (thread_act_t)task->threads.next; act != (thread_act_t)&task->threads; act = (thread_act_t)act->task_threads.next) { /* Go through activations */ db_printf(" Act %4d @%08X - p: %08X current context: %08X\n", - taskact, act, act->mact.pcb, act->mact.curctx); + taskact, act, act->machine.pcb, act->machine.curctx); - save = (savearea *)act->mact.pcb; /* Set the start of the normal chain */ + save = (savearea *)act->machine.pcb; /* Set the start of the normal chain */ chainsize = 0; db_printf(" General context - fp: %08X fl: %08X fc: %d vp: %08X vl: %08X vp: %d\n", - act->mact.facctx.FPUsave, act->mact.facctx.FPUlevel, act->mact.facctx.FPUcpu, - act->mact.facctx.VMXsave, act->mact.facctx.VMXlevel, act->mact.facctx.VMXcpu); + act->machine.facctx.FPUsave, act->machine.facctx.FPUlevel, act->machine.facctx.FPUcpu, + act->machine.facctx.VMXsave, act->machine.facctx.VMXlevel, act->machine.facctx.VMXcpu); while(save) { /* Do them all */ totsaves++; /* Count savearea */ @@ -682,7 +682,7 @@ void db_display_save(db_expr_t addr, int have_addr, db_expr_t count, char * modi } } - save = (savearea *)act->mact.facctx.FPUsave; /* Set the start of the floating point chain */ + save = (savearea *)act->machine.facctx.FPUsave; /* Set the start of the floating point chain */ chainsize = 0; while(save) { /* Do them all */ totsaves++; /* Count savearea */ @@ -694,7 +694,7 @@ void db_display_save(db_expr_t addr, int have_addr, db_expr_t count, char * modi } } - save = (savearea *)act->mact.facctx.VMXsave; /* Set the start of the floating point chain */ + save = (savearea *)act->machine.facctx.VMXsave; /* Set the start of the floating point chain */ chainsize = 0; while(save) { /* Do them all */ totsaves++; /* Count savearea */ @@ -706,7 +706,7 @@ void db_display_save(db_expr_t addr, int have_addr, db_expr_t count, char * modi } } - if(CTable = act->mact.vmmControl) { /* Are there virtual machines? */ + if(CTable = act->machine.vmmControl) { /* Are there virtual machines? */ for(vmid = 0; vmid < kVmmMaxContexts; vmid++) { @@ -762,14 +762,14 @@ void db_display_save(db_expr_t addr, int have_addr, db_expr_t count, char * modi extern unsigned int dbfloats[33][2]; extern unsigned int dbvecs[33][4]; -extern unsigned int dbspecrs[80]; +extern unsigned int dbspecrs[336]; void db_display_xregs(db_expr_t addr, int have_addr, db_expr_t count, char * modif) { int i, j, pents; stSpecrs(dbspecrs); /* Save special registers */ - if(per_proc_info[0].pf.Available & pf64Bit) { + if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) { db_printf("PIR: %08X\n", dbspecrs[0]); db_printf("PVR: %08X\n", dbspecrs[1]); db_printf("SDR1: %08X.%08X\n", dbspecrs[26], dbspecrs[27]); @@ -850,12 +850,12 @@ void db_check_mappings(db_expr_t addr, int have_addr, db_expr_t count, char * mo unsigned int xpteg[32], xpca[8], space, hash, pva, seg, api, va, free, free2, xauto, PTEGcnt, wimgkk, wimgxx, slotoff; int i, j, fnderr, slot, slot2, k, s4bit; pmap_t pmap; - mapping *mp; + mapping_t *mp; ppnum_t ppn, pa, aoff; unsigned long long llslot, llseg, llhash; s4bit = 0; /* Assume dinky? */ - if(per_proc_info[0].pf.Available & pf64Bit) s4bit = 1; /* Are we a big guy? */ + if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) s4bit = 1; /* Are we a big guy? */ PTEGcnt = hash_table_size / 64; /* Get the number of PTEGS */ if(s4bit) PTEGcnt = PTEGcnt / 2; /* PTEGs are twice as big */ @@ -953,57 +953,63 @@ void db_check_mappings(db_expr_t addr, int have_addr, db_expr_t count, char * mo fnderr = 1; goto dcmout; } - - mp = hw_find_map(pmap, llva, &lnextva); /* Try to find the mapping for this address */ -// db_printf("%08X - %017llX\n", mp, llva); - if((unsigned int)mp == mapRtBadLk) { /* Did we lock up ok? */ - db_printf("Timeout locking mapping for for virtual address %016ll8X, slot = %d\n", llva, j); - return; - } - - if(!mp) { /* Did we find one? */ - db_printf("Not mapped, slot = %d, va = %08X\n", j, (unsigned int)llva); - fnderr = 1; - goto dcmout; - } - - if((mp->mpFlags & 0xFF000000) > 0x01000000) { /* Is busy count too high? */ - db_printf("Busy count too high, slot = %d\n", j); - fnderr = 1; - } - - if(mp->mpFlags & mpBlock) { /* Is this a block map? */ - if(!(xpca[0] & xauto)) { /* Is it marked as such? */ - db_printf("mapping marked as block, PCA is not, slot = %d\n", j); - fnderr = 1; + + if (pmap->pmapFlags & pmapVMgsaa) { + unsigned int ret; + mapping_t mpcopy; + ret = hw_find_map_gv(pmap, llva, &mpcopy); + } else { + mp = hw_find_map(pmap, llva, &lnextva); /* Try to find the mapping for this address */ + // db_printf("%08X - %017llX\n", mp, llva); + if((unsigned int)mp == mapRtBadLk) { /* Did we lock up ok? */ + db_printf("Timeout locking mapping for for virtual address %016ll8X, slot = %d\n", llva, j); + return; } - } - else { /* Is a block */ - if(xpca[0] & xauto) { /* Is it marked as such? */ - db_printf("mapping not marked as block, PCA is, slot = %d\n", j); + + if(!mp) { /* Did we find one? */ + db_printf("Not mapped, slot = %d, va = %08X\n", j, (unsigned int)llva); fnderr = 1; + goto dcmout; } - if(mp->mpPte != slotoff) { /* See if mapping PTEG offset is us */ - db_printf("mapping does not point to PTE, slot = %d\n", j); + + if((mp->mpFlags & 0xFF000000) > 0x01000000) { /* Is busy count too high? */ + db_printf("Busy count too high, slot = %d\n", j); fnderr = 1; } - } - - wimgkk = (unsigned int)mp->mpVAddr; /* Get last half of vaddr where keys, etc are */ - wimgkk = (wimgkk ^ wimgxx) & 0x7F; /* XOR to find differences from PTE */ - if(wimgkk) { /* See if key in PTE is what we want */ - db_printf("key or WIMG does not match, slot = %d\n", j); - fnderr = 1; - } + + if((mp->mpFlags & mpType) == mpBlock) { /* Is this a block map? */ + if(!(xpca[0] & xauto)) { /* Is it marked as such? */ + db_printf("mapping marked as block, PCA is not, slot = %d\n", j); + fnderr = 1; + } + } + else { /* Is a block */ + if(xpca[0] & xauto) { /* Is it marked as such? */ + db_printf("mapping not marked as block, PCA is, slot = %d\n", j); + fnderr = 1; + } + if(mp->mpPte != slotoff) { /* See if mapping PTEG offset is us */ + db_printf("mapping does not point to PTE, slot = %d\n", j); + fnderr = 1; + } + } - aoff = (ppnum_t)((llva >> 12) - (mp->mpVAddr >> 12)); /* Get the offset from vaddr */ - pa = aoff + mp->mpPAddr; /* Get the physical page number we expect */ - if(pa != ppn) { /* Is physical address expected? */ - db_printf("Physical address does not match, slot = %d\n", j); - fnderr = 1; + wimgkk = (unsigned int)mp->mpVAddr; /* Get last half of vaddr where keys, etc are */ + wimgkk = (wimgkk ^ wimgxx) & 0x7F; /* XOR to find differences from PTE */ + if(wimgkk) { /* See if key in PTE is what we want */ + db_printf("key or WIMG does not match, slot = %d\n", j); + fnderr = 1; + } + + aoff = (ppnum_t)((llva >> 12) - (mp->mpVAddr >> 12)); /* Get the offset from vaddr */ + pa = aoff + mp->mpPAddr; /* Get the physical page number we expect */ + if(pa != ppn) { /* Is physical address expected? */ + db_printf("Physical address does not match, slot = %d\n", j); + fnderr = 1; + } + + mapping_drop_busy(mp); /* We're done with the mapping */ } - - mapping_drop_busy(mp); /* We're done with the mapping */ } } diff --git a/osfmk/ppc/db_machdep.h b/osfmk/ppc/db_machdep.h index adb73976a..2f42b9a8f 100644 --- a/osfmk/ppc/db_machdep.h +++ b/osfmk/ppc/db_machdep.h @@ -60,7 +60,7 @@ #include #include #include -#include /* for thread_status */ +#include #include #include #include diff --git a/osfmk/ppc/db_trace.c b/osfmk/ppc/db_trace.c index 0befef59c..531f9b6bb 100644 --- a/osfmk/ppc/db_trace.c +++ b/osfmk/ppc/db_trace.c @@ -31,6 +31,8 @@ #include #include +#include +#include #include #include #include @@ -44,7 +46,6 @@ #include extern jmp_buf_t *db_recover; -extern struct savearea *saved_state[]; struct savearea ddb_null_kregs; @@ -53,10 +54,7 @@ extern vm_offset_t vm_min_inks_addr; /* set by db_clone_symtabXXX */ #define DB_NUMARGS_MAX 5 -extern char FixedStackStart[], FixedStackEnd[]; -#define INFIXEDSTACK(va) \ - ((((vm_offset_t)(va)) >= (vm_offset_t)&FixedStackStart) && \ - (((vm_offset_t)(va)) < ((vm_offset_t)&FixedStackEnd))) +#define INFIXEDSTACK(va) 0 \ #define INKERNELSTACK(va, th) 1 @@ -169,31 +167,30 @@ db_ppc_reg_value( if (db_option(ap->modif, 'u')) { if (thr_act == THR_ACT_NULL) { - if ((thr_act = current_act()) == THR_ACT_NULL) + if ((thr_act = current_thread()) == THR_ACT_NULL) db_error("no user registers\n"); } - if (thr_act == current_act()) { + if (thr_act == current_thread()) { if (IS_USER_TRAP((&ddb_regs))) dp = vp->valuep; else if (INFIXEDSTACK(ddb_regs.save_r1)) db_error("cannot get/set user registers in nested interrupt\n"); } } else { - if (thr_act == THR_ACT_NULL || thr_act == current_act()) { + if (thr_act == THR_ACT_NULL || thr_act == current_thread()) { dp = vp->valuep; } else { - if (thr_act->thread && - !(thr_act->thread->state & TH_STACK_HANDOFF) && - thr_act->thread->kernel_stack) { + if (thr_act->kernel_stack) { int cpu; - for (cpu = 0; cpu < NCPUS; cpu++) { + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING && - cpu_to_processor(cpu)->active_thread == thr_act->thread && saved_state[cpu]) { + cpu_to_processor(cpu)->active_thread == thr_act && + PerProcTable[cpu].ppe_vaddr->db_saved_state) { - dp = (db_expr_t)(((uint32_t)saved_state[cpu]) + + dp = (db_expr_t)(((uint32_t)(PerProcTable[cpu].ppe_vaddr->db_saved_state)) + (((uint32_t) vp->valuep) - (uint32_t) &ddb_regs)); break; @@ -202,10 +199,10 @@ db_ppc_reg_value( if (dp == 0) dp = &null_reg; } - else if (thr_act->thread && (thr_act->thread->state & TH_STACK_HANDOFF)){ + else { /* only PC is valid */ if (vp->valuep == (int *) &ddb_regs.save_srr0) { - dp = (int *)(&thr_act->thread->continuation); + dp = (int *)(&thr_act->continuation); } else { dp = &null_reg; @@ -216,18 +213,19 @@ db_ppc_reg_value( if (dp == 0) { if (!db_option(ap->modif, 'u')) { - for (cpu = 0; cpu < NCPUS; cpu++) { + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING && - cpu_to_processor(cpu)->active_thread == thr_act->thread && saved_state[cpu]) { - dp = (int *) (((int)saved_state[cpu]) + + cpu_to_processor(cpu)->active_thread == thr_act && + PerProcTable[cpu].ppe_vaddr->db_saved_state) { + dp = (int *) (((int)(PerProcTable[cpu].ppe_vaddr->db_saved_state)) + (((int) vp->valuep) - (int) &ddb_regs)); break; } } } if (dp == 0) { - if (!thr_act || thr_act->mact.pcb == 0) db_error("no pcb\n"); - dp = (int *)((int)thr_act->mact.pcb + ((int)vp->valuep - (int)&ddb_regs)); + if (!thr_act || thr_act->machine.pcb == 0) db_error("no pcb\n"); + dp = (int *)((int)thr_act->machine.pcb + ((int)vp->valuep - (int)&ddb_regs)); } } @@ -301,7 +299,7 @@ db_find_arg( int inst; char *name; -#if XXX_BS +#if 0 db_find_task_sym_and_offset(calleepc, &name, &offset, task); calleep = calleepc-offset; @@ -362,9 +360,9 @@ db_nextframe( goto miss_frame; break; case SYSCALL: - if (thr_act != THR_ACT_NULL && thr_act->mact.pcb) { - *ip = (db_addr_t) thr_act->mact.pcb->save_srr0; - *fp = (struct db_ppc_frame *) (thr_act->mact.pcb->save_r1); + if (thr_act != THR_ACT_NULL && thr_act->machine.pcb) { + *ip = (db_addr_t) thr_act->machine.pcb->save_srr0; + *fp = (struct db_ppc_frame *) (thr_act->machine.pcb->save_r1); break; } /* falling down for unknown case */ @@ -459,7 +457,7 @@ db_stack_trace_cmd( else { th = db_default_act; if (th == THR_ACT_NULL) - th = current_act(); + th = current_thread(); if (th == THR_ACT_NULL) { db_printf("no active thr_act\n"); return; @@ -484,7 +482,7 @@ next_thread: frame = (struct db_ppc_frame *)(ddb_regs.save_r1); callpc = (db_addr_t)ddb_regs.save_srr0; linkpc = (db_addr_t)ddb_regs.save_lr; - th = current_act(); + th = current_thread(); task = (th != THR_ACT_NULL)? th->task: TASK_NULL; } else if (trace_thread) { @@ -496,7 +494,7 @@ next_thread: else { th = db_default_act; if (th == THR_ACT_NULL) - th = current_act(); + th = current_thread(); if (th == THR_ACT_NULL) { db_printf("no active thread\n"); return; @@ -511,30 +509,22 @@ next_activation: user_frame = 0; task = th->task; - if (th == current_act()) { + if (th == current_thread()) { frame = (struct db_ppc_frame *)(ddb_regs.save_r1); callpc = (db_addr_t)ddb_regs.save_srr0; linkpc = (db_addr_t)ddb_regs.save_lr; } else { - if (th->mact.pcb == 0) { + if (th->machine.pcb == 0) { db_printf("thread has no pcb\n"); goto thread_done; } - if (!th->thread) { + if (th->kernel_stack == 0) { register struct savearea *pss = - th->mact.pcb; - - db_printf("thread has no shuttle\n"); - goto thread_done; - } - else if ((th->thread->state & TH_STACK_HANDOFF) || - th->thread->kernel_stack == 0) { - register struct savearea *pss = - th->mact.pcb; + th->machine.pcb; db_printf("Continuation "); - db_task_printsym((db_expr_t)th->thread->continuation, + db_task_printsym((db_expr_t)th->continuation, DB_STGY_PROC, task); db_printf("\n"); frame = (struct db_ppc_frame *) (pss->save_r1); @@ -544,10 +534,10 @@ next_activation: else { int cpu; - for (cpu = 0; cpu < NCPUS; cpu++) { + for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_to_processor(cpu)->state == PROCESSOR_RUNNING && - cpu_to_processor(cpu)->active_thread == th->thread && - saved_state[cpu]) { + cpu_to_processor(cpu)->active_thread == th && + PerProcTable[cpu].ppe_vaddr->db_saved_state) { break; } } @@ -559,16 +549,16 @@ next_activation: */ struct savearea *pss; - pss = th->mact.pcb; + pss = th->machine.pcb; frame = (struct db_ppc_frame *) (pss->save_r1); callpc = (db_addr_t) (pss->save_srr0); linkpc = (db_addr_t) (pss->save_lr); } else { - if (cpu == NCPUS) { + if (cpu == real_ncpus) { register struct savearea *iks; int r; - iks = th->mact.pcb; + iks = th->machine.pcb; prev = db_recover; if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) { frame = (struct db_ppc_frame *) (iks->save_r1); @@ -592,16 +582,16 @@ next_activation: db_printf(">>>>> active on cpu %d <<<<<\n", cpu); frame = (struct db_ppc_frame *) - (saved_state[cpu]->save_r1); - callpc = (db_addr_t) saved_state[cpu]->save_srr0; - linkpc = (db_addr_t) saved_state[cpu]->save_lr; + (PerProcTable[cpu].ppe_vaddr->db_saved_state->save_r1); + callpc = (db_addr_t) PerProcTable[cpu].ppe_vaddr->db_saved_state->save_srr0; + linkpc = (db_addr_t) PerProcTable[cpu].ppe_vaddr->db_saved_state->save_lr; } } } } } else { frame = (struct db_ppc_frame *)addr; - th = (db_default_act)? db_default_act: current_act(); + th = (db_default_act)? db_default_act: current_thread(); task = (th != THR_ACT_NULL)? th->task: TASK_NULL; if (frame->f_frame) { callpc = (db_addr_t)db_get_task_value @@ -670,7 +660,7 @@ next_activation: goto next_act; } } - } else { + } else { frame_type = 0; prev = db_recover; if ((r = _setjmp(db_recover = &db_jmp_buf)) == 0) { @@ -685,11 +675,11 @@ next_activation: } if (name == 0 || offset > db_maxoff) { - db_printf("[%08X]0x%08X(", frame, callpc); + db_printf("[%08X]0x%08X(", frame, callpc); } else { db_printf("[%08X]%s", frame, name); - if (offset) - db_printf("+%x", offset); + if (offset) + db_printf("+%llx", offset); db_printf("("); }; @@ -747,16 +737,6 @@ next_activation: if (frame == 0) { next_act: - if (th->lower != THR_ACT_NULL) { - if (top_act == THR_ACT_NULL) - top_act = th; - th = th->lower; - db_printf(">>>>> next activation 0x%x ($task%d.%d) <<<<<\n", - th, - db_lookup_task(th->task), - db_lookup_task_act(th->task, th)); - goto next_activation; - } /* end of chain */ break; } diff --git a/osfmk/ppc/exception.h b/osfmk/ppc/exception.h index 91b441a58..dedad228d 100644 --- a/osfmk/ppc/exception.h +++ b/osfmk/ppc/exception.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,12 +34,12 @@ #ifndef ASSEMBLER -#include #include #include #include #include +#include #include #include #include @@ -237,9 +237,14 @@ struct hwCtrs { unsigned int numSIGPtimo; /* Number of SIGP send timeouts */ unsigned int numSIGPmast; /* Number of SIGPast messages merged */ unsigned int numSIGPmwake; /* Number of SIGPwake messages merged */ + + unsigned int hwWalkPhys; /* Number of entries to hw_walk_phys */ + unsigned int hwWalkFull; /* Full purge of connected PTE's */ + unsigned int hwWalkMerge; /* RC merge of connected PTE's */ + unsigned int hwWalkQuick; /* Quick scan of connected PTE's */ unsigned int numSIGPcall; /* Number of SIGPcall messages received */ - unsigned int hwspare3[20]; /* Pad to 512 */ + unsigned int hwspare3[16]; /* Pad to 512 */ }; #pragma pack() @@ -258,8 +263,7 @@ typedef struct patch_entry patch_entry_t; #define PATCH_INVALID 0 #define PATCH_PROCESSOR 1 #define PATCH_FEATURE 2 - -#define PATCH_TABLE_SIZE 12 +#define PATCH_END_OF_TABLE 3 #define PatchExt32 0x80000000 #define PatchExt32b 0 @@ -280,13 +284,14 @@ struct per_proc_info { vm_offset_t debstack_top_ss; unsigned int spcFlags; /* Special thread flags */ - unsigned int Uassist; /* User Assist Word */ unsigned int old_thread; + ast_t pending_ast; /* mask of pending ast(s) */ /* PPC cache line boundary here - 020 */ - uint64_t rtcPop; /* Real Time Clock pop */ - unsigned int need_ast; /* pointer to need_ast[CPU_NO] */ + int cpu_type; + int cpu_subtype; + int cpu_threadtype; /* * Note: the following two pairs of words need to stay in order and each pair must * be in the same reservation (line) granule @@ -294,16 +299,15 @@ struct per_proc_info { struct facility_context *FPU_owner; /* Owner of the FPU on this cpu */ unsigned int liveVRSave; /* VRSave assiciated with live vector registers */ struct facility_context *VMX_owner; /* Owner of the VMX on this cpu */ - unsigned int holdQFret; /* Hold off releasing quickfret list */ - unsigned int save_exception_type; + unsigned int spcTRc; /* Special trace count */ + unsigned int spcTRp; /* Special trace buffer pointer */ /* PPC cache line boundary here - 040 */ addr64_t quickfret; /* List of saveareas to release */ addr64_t lclfree; /* Pointer to local savearea list */ unsigned int lclfreecnt; /* Entries in local savearea list */ - unsigned int spcTRc; /* Special trace count */ - unsigned int spcTRp; /* Special trace buffer pointer */ - unsigned int ppbbTaskEnv; /* BlueBox Task Environment */ + unsigned int holdQFret; /* Hold off releasing quickfret list */ + uint64_t rtcPop; /* Real Time Clock pop */ /* PPC cache line boundary here - 060 */ boolean_t interrupts_enabled; @@ -326,20 +330,6 @@ struct per_proc_info { #define MPsigpFunc 0x0000FF00 /* Current function */ #define MPsigpIdle 0x00 /* No function pending */ #define MPsigpSigp 0x04 /* Signal a processor */ - -#define SIGPast 0 /* Requests an ast on target processor */ -#define SIGPcpureq 1 /* Requests CPU specific function */ -#define SIGPdebug 2 /* Requests a debugger entry */ -#define SIGPwake 3 /* Wake up a sleeping processor */ -#define SIGPcall 4 /* Call a function on a processor */ - -#define CPRQtemp 0 /* Get temprature of processor */ -#define CPRQtimebase 1 /* Get timebase of processor */ -#define CPRQsegload 2 /* Segment registers reload */ -#define CPRQscom 3 /* SCOM */ -#define CPRQchud 4 /* CHUD perfmon */ -#define CPRQsps 5 /* Set Processor Speed */ - unsigned int MPsigpParm0; /* SIGP parm 0 */ unsigned int MPsigpParm1; /* SIGP parm 1 */ unsigned int MPsigpParm2; /* SIGP parm 2 */ @@ -351,15 +341,30 @@ struct per_proc_info { procFeatures pf; /* Processor features */ /* PPC cache line boundary here - 140 */ - unsigned int ppRsvd140[8]; /* Reserved */ - + void * pp_cbfr; + void * pp_chud; + uint64_t rtclock_tick_deadline; + struct rtclock_timer { + uint64_t deadline; + uint32_t + /*boolean_t*/ is_set:1, + has_expired:1, + :0; + } rtclock_timer; + unsigned int ppbbTaskEnv; /* BlueBox Task Environment */ + /* PPC cache line boundary here - 160 */ + struct savearea * db_saved_state; time_base_enable_t time_base_enable; - unsigned int ppRsvd164[4]; /* Reserved */ - cpu_data_t pp_cpu_data; /* cpu data info */ + int ppXFlags; + int running; + int debugger_is_slave; + int debugger_active; + int debugger_pending; + int debugger_holdoff; /* PPC cache line boundary here - 180 */ - unsigned int ppRsvd180[2]; /* Reserved */ + uint64_t Uassist; /* User Assist DoubleWord */ uint64_t validSegs; /* Valid SR/STB slots */ addr64_t ppUserPmap; /* Current user state pmap (physical address) */ unsigned int ppUserPmapVirt; /* Current user state pmap (virtual address) */ @@ -372,10 +377,12 @@ struct per_proc_info { ppnum_t VMMareaPhys; /* vmm state page physical addr */ unsigned int VMMXAFlgs; /* vmm extended flags */ unsigned int FAMintercept; /* vmm FAM Exceptions to intercept */ - unsigned int rsrvd1B4[3]; /* Reserved slots */ + unsigned int ppinfo_reserved1; + uint32_t save_tbl; + uint32_t save_tbu; /* PPC cache line boundary here - 1C0 */ - unsigned int ppCIOmp[16]; /* Linkage mapping for copyin/out - 64 bytes */ + unsigned int ppUMWmp[16]; /* Linkage mapping for user memory window - 64 bytes */ /* PPC cache line boundary here - 200 */ uint64_t tempr0; /* temporary savearea */ @@ -512,20 +519,33 @@ struct per_proc_info { hwCtrs hwCtr; /* Hardware exception counters */ /* - A00 */ - unsigned int pppadpage[384]; /* Pad to end of page */ + unsigned int processor[384]; /* processor structure */ /* - 1000 */ }; -#define pp_preemption_count pp_cpu_data.preemption_level -#define pp_simple_lock_count pp_cpu_data.simple_lock_count -#define pp_interrupt_level pp_cpu_data.interrupt_level - #pragma pack() -extern struct per_proc_info per_proc_info[NCPUS]; +/* + * Macro to conver a processor_t processor to its attached per_proc_info_t per_proc + */ +#define PROCESSOR_TO_PER_PROC(x) \ + ((struct per_proc_info*)((unsigned int)(x) \ + - (unsigned int)(((struct per_proc_info *)0)->processor))) + +extern struct per_proc_info BootProcInfo; + +#define MAX_CPUS 256 + +struct per_proc_entry { + addr64_t ppe_paddr; + unsigned int ppe_pad4[1]; + struct per_proc_info *ppe_vaddr; +}; + +extern struct per_proc_entry PerProcTable[MAX_CPUS-1]; extern char *trap_type[]; @@ -545,6 +565,9 @@ extern char *trap_type[]; #define BootDone 0x0100 #define loadMSR 0x7FF4 +/* ppXFlags defs */ +#define SignalReadyWait 0x00000001 + #define T_VECTOR_SIZE 4 /* function pointer size */ /* Hardware exceptions */ @@ -620,9 +643,10 @@ extern char *trap_type[]; #define failBadLiveContext 6 #define failSkipLists 7 #define failUnalignedStk 8 +#define failPmap 9 /* Always must be last - update failNames table in model_dep.c as well */ -#define failUnknown 9 +#define failUnknown 10 #ifndef ASSEMBLER diff --git a/osfmk/ppc/fpu_protos.h b/osfmk/ppc/fpu_protos.h index 6781cc8ce..99ec6d953 100644 --- a/osfmk/ppc/fpu_protos.h +++ b/osfmk/ppc/fpu_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,9 +27,9 @@ #ifndef _PPC_FPU_PROTOS_H_ #define _PPC_FPU_PROTOS_H_ -#include +#include -extern void fpu_save(thread_act_t act); +extern void fpu_save(struct facility_context *); extern void fpu_disable(void); #endif /* _PPC_FPU_PROTOS_H_ */ diff --git a/osfmk/ppc/genassym.c b/osfmk/ppc/genassym.c index a1f0b0fb8..46bbd060c 100644 --- a/osfmk/ppc/genassym.c +++ b/osfmk/ppc/genassym.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -37,22 +37,19 @@ * the values, but we cannot run anything on the target machine. */ -#include #include #include #include #include -#include #include #include +#include #include -#include #include -#include +#include #include #include -#include #include #include #include @@ -79,21 +76,22 @@ int main(int argc, char *argv[]) { /* Process Control Block */ - DECLARE("ACT_MACT_KSP", offsetof(thread_act_t, mact.ksp)); - DECLARE("ACT_MACT_BEDA", offsetof(thread_act_t, mact.bbDescAddr)); - DECLARE("ACT_MACT_BTS", offsetof(thread_act_t, mact.bbTableStart)); - DECLARE("ACT_MACT_BTE", offsetof(thread_act_t, mact.bbTaskEnv)); - DECLARE("ACT_MACT_SPF", offsetof(thread_act_t, mact.specFlags)); - DECLARE("ACT_PREEMPT_CNT", offsetof(thread_act_t, mact.preemption_count)); - DECLARE("qactTimer", offsetof(thread_act_t, mact.qactTimer)); - DECLARE("cioSpace", offsetof(thread_act_t, mact.cioSpace)); - DECLARE("cioRelo", offsetof(thread_act_t, mact.cioRelo)); - DECLARE("cioSwitchAway", cioSwitchAway); - DECLARE("cioSwitchAwayb", cioSwitchAwayb); - DECLARE("bbTrap", offsetof(thread_act_t, mact.bbTrap)); - DECLARE("bbSysCall", offsetof(thread_act_t, mact.bbSysCall)); - DECLARE("bbInterrupt", offsetof(thread_act_t, mact.bbInterrupt)); - DECLARE("bbPending", offsetof(thread_act_t, mact.bbPending)); + DECLARE("ACT_MACT_KSP", offsetof(thread_t, machine.ksp)); + DECLARE("ACT_MACT_BEDA", offsetof(thread_t, machine.bbDescAddr)); + DECLARE("ACT_MACT_BTS", offsetof(thread_t, machine.bbTableStart)); + DECLARE("ACT_MACT_BTE", offsetof(thread_t, machine.bbTaskEnv)); + DECLARE("ACT_MACT_SPF", offsetof(thread_t, machine.specFlags)); + DECLARE("ACT_PREEMPT_CNT", offsetof(thread_t, machine.preemption_count)); + DECLARE("ACT_PER_PROC", offsetof(thread_t, machine.PerProc)); + DECLARE("qactTimer", offsetof(thread_t, machine.qactTimer)); + DECLARE("umwSpace", offsetof(thread_t, machine.umwSpace)); + DECLARE("umwRelo", offsetof(thread_t, machine.umwRelo)); + DECLARE("umwSwitchAway", umwSwitchAway); + DECLARE("umwSwitchAwayb", umwSwitchAwayb); + DECLARE("bbTrap", offsetof(thread_t, machine.bbTrap)); + DECLARE("bbSysCall", offsetof(thread_t, machine.bbSysCall)); + DECLARE("bbInterrupt", offsetof(thread_t, machine.bbInterrupt)); + DECLARE("bbPending", offsetof(thread_t, machine.bbPending)); DECLARE("floatUsed", floatUsed); DECLARE("vectorUsed", vectorUsed); @@ -127,7 +125,6 @@ int main(int argc, char *argv[]) /* Per Proc info structure */ DECLARE("PP_CPU_NUMBER", offsetof(struct per_proc_info *, cpu_number)); DECLARE("PP_CPU_FLAGS", offsetof(struct per_proc_info *, cpu_flags)); - DECLARE("SleepState", SleepState); DECLARE("PP_ISTACKPTR", offsetof(struct per_proc_info *, istackptr)); DECLARE("PP_INTSTACK_TOP_SS", offsetof(struct per_proc_info *, intstack_top_ss)); DECLARE("PP_DEBSTACKPTR", offsetof(struct per_proc_info *, debstackptr)); @@ -137,17 +134,13 @@ int main(int argc, char *argv[]) DECLARE("holdQFret", offsetof(struct per_proc_info *, holdQFret)); DECLARE("rtcPop", offsetof(struct per_proc_info *, rtcPop)); - DECLARE("PP_SAVE_EXCEPTION_TYPE", offsetof(struct per_proc_info *, save_exception_type)); - DECLARE("PP_NEED_AST", offsetof(struct per_proc_info *, need_ast)); + DECLARE("PP_PENDING_AST", offsetof(struct per_proc_info *, pending_ast)); DECLARE("quickfret", offsetof(struct per_proc_info *, quickfret)); DECLARE("lclfree", offsetof(struct per_proc_info *, lclfree)); DECLARE("lclfreecnt", offsetof(struct per_proc_info *, lclfreecnt)); DECLARE("PP_INTS_ENABLED", offsetof(struct per_proc_info *, interrupts_enabled)); DECLARE("UAW", offsetof(struct per_proc_info *, Uassist)); DECLARE("next_savearea", offsetof(struct per_proc_info *, next_savearea)); - DECLARE("PP_CPU_DATA", offsetof(struct per_proc_info *, pp_cpu_data)); - DECLARE("PP_SIMPLE_LOCK_CNT", offsetof(struct per_proc_info *, pp_simple_lock_count)); - DECLARE("PP_INTERRUPT_LVL", offsetof(struct per_proc_info *, pp_interrupt_level)); DECLARE("ppbbTaskEnv", offsetof(struct per_proc_info *, ppbbTaskEnv)); DECLARE("liveVRS", offsetof(struct per_proc_info *, liveVRSave)); DECLARE("spcFlags", offsetof(struct per_proc_info *, spcFlags)); @@ -257,7 +250,7 @@ int main(int argc, char *argv[]) DECLARE("VMMXAFlgs", offsetof(struct per_proc_info *, VMMXAFlgs)); DECLARE("FAMintercept", offsetof(struct per_proc_info *, FAMintercept)); - DECLARE("ppCIOmp", offsetof(struct per_proc_info *, ppCIOmp)); + DECLARE("ppUMWmp", offsetof(struct per_proc_info *, ppUMWmp)); DECLARE("tempr0", offsetof(struct per_proc_info *, tempr0)); DECLARE("tempr1", offsetof(struct per_proc_info *, tempr1)); @@ -362,6 +355,10 @@ int main(int argc, char *argv[]) DECLARE("empadvr", offsetof(struct per_proc_info *, empadvr)); DECLARE("skipListPrev", offsetof(struct per_proc_info *, skipListPrev)); DECLARE("ppSize", sizeof(struct per_proc_info)); + DECLARE("ppe_paddr", offsetof(struct per_proc_entry *, ppe_paddr)); + DECLARE("ppe_vaddr", offsetof(struct per_proc_entry *, ppe_vaddr)); + DECLARE("ppeSize", sizeof(struct per_proc_entry)); + DECLARE("MAX_CPUS", MAX_CPUS); DECLARE("patcharea", offsetof(struct per_proc_info *, patcharea)); DECLARE("hwCounts", offsetof(struct per_proc_info *, hwCtr)); @@ -413,6 +410,11 @@ int main(int argc, char *argv[]) DECLARE("hwIgnored", offsetof(struct per_proc_info *, hwCtr.hwIgnored)); DECLARE("hwhdec", offsetof(struct per_proc_info *, hwCtr.hwhdec)); DECLARE("hwSteals", offsetof(struct per_proc_info *, hwCtr.hwSteals)); + + DECLARE("hwWalkPhys", offsetof(struct per_proc_info *, hwCtr.hwWalkPhys)); + DECLARE("hwWalkFull", offsetof(struct per_proc_info *, hwCtr.hwWalkFull)); + DECLARE("hwWalkMerge", offsetof(struct per_proc_info *, hwCtr.hwWalkMerge)); + DECLARE("hwWalkQuick", offsetof(struct per_proc_info *, hwCtr.hwWalkQuick)); DECLARE("hwMckHang", offsetof(struct per_proc_info *, hwCtr.hwMckHang)); DECLARE("hwMckSLBPE", offsetof(struct per_proc_info *, hwCtr.hwMckSLBPE)); @@ -434,6 +436,9 @@ int main(int argc, char *argv[]) DECLARE("napStamp", offsetof(struct per_proc_info *, hwCtr.napStamp)); DECLARE("napTotal", offsetof(struct per_proc_info *, hwCtr.napTotal)); + DECLARE("PP_PROCESSOR", offsetof(struct per_proc_info *, processor[0])); + DECLARE("PP_PROCESSOR_SIZE", sizeof(((struct per_proc_info *)0)->processor)); + DECLARE("PROCESSOR_SIZE", sizeof (struct processor)); DECLARE("patchAddr", offsetof(struct patch_entry *, addr)); DECLARE("patchData", offsetof(struct patch_entry *, data)); @@ -442,7 +447,7 @@ int main(int argc, char *argv[]) DECLARE("peSize", sizeof(patch_entry_t)); DECLARE("PATCH_PROCESSOR", PATCH_PROCESSOR); DECLARE("PATCH_FEATURE", PATCH_FEATURE); - DECLARE("PATCH_TABLE_SIZE", PATCH_TABLE_SIZE); + DECLARE("PATCH_END_OF_TABLE", PATCH_END_OF_TABLE); DECLARE("PatchExt32", PatchExt32); DECLARE("PatchExt32b", PatchExt32b); DECLARE("PatchLwsync", PatchLwsync); @@ -458,34 +463,29 @@ int main(int argc, char *argv[]) #define IKSBASE (u_int)STACK_IKS(0) /* values from kern/thread.h */ - DECLARE("THREAD_TOP_ACT", offsetof(thread_t, top_act)); - DECLARE("THREAD_KERNEL_STACK", offsetof(thread_act_t, kernel_stack)); - DECLARE("THREAD_RECOVER", offsetof(thread_act_t, recover)); + DECLARE("THREAD_OPTIONS", offsetof(thread_t, options)); + DECLARE("TH_OPT_DELAYIDLE", TH_OPT_DELAYIDLE); + DECLARE("THREAD_KERNEL_STACK", offsetof(thread_t, kernel_stack)); + DECLARE("THREAD_RECOVER", offsetof(thread_t, recover)); DECLARE("THREAD_FUNNEL_LOCK", - offsetof(thread_act_t, funnel_lock)); + offsetof(thread_t, funnel_lock)); DECLARE("THREAD_FUNNEL_STATE", - offsetof(thread_act_t, funnel_state)); + offsetof(thread_t, funnel_state)); DECLARE("LOCK_FNL_MUTEX", offsetof(struct funnel_lock *, fnl_mutex)); -#if MACH_LDEBUG - DECLARE("THREAD_MUTEX_COUNT", offsetof(thread_t, mutex_count)); -#endif /* MACH_LDEBUG */ - - /* values from kern/thread_act.h */ - DECLARE("ACT_TASK", offsetof(thread_act_t, task)); - DECLARE("ACT_THREAD", offsetof(thread_act_t, thread)); - DECLARE("ACT_LOWER", offsetof(thread_act_t, lower)); - DECLARE("ACT_MACT_PCB", offsetof(thread_act_t, mact.pcb)); - DECLARE("ACT_MACT_UPCB", offsetof(thread_act_t, mact.upcb)); - DECLARE("ACT_AST", offsetof(thread_act_t, ast)); - DECLARE("ACT_VMMAP", offsetof(thread_act_t, map)); - DECLARE("vmmCEntry", offsetof(thread_act_t, mact.vmmCEntry)); - DECLARE("vmmControl", offsetof(thread_act_t, mact.vmmControl)); - DECLARE("curctx", offsetof(thread_act_t, mact.curctx)); - DECLARE("deferctx", offsetof(thread_act_t, mact.deferctx)); - DECLARE("facctx", offsetof(thread_act_t, mact.facctx)); + + DECLARE("ACT_TASK", offsetof(thread_t, task)); + DECLARE("ACT_MACT_PCB", offsetof(thread_t, machine.pcb)); + DECLARE("ACT_MACT_UPCB", offsetof(thread_t, machine.upcb)); + DECLARE("ACT_AST", offsetof(thread_t, ast)); + DECLARE("ACT_VMMAP", offsetof(thread_t, map)); + DECLARE("vmmCEntry", offsetof(thread_t, machine.vmmCEntry)); + DECLARE("vmmControl", offsetof(thread_t, machine.vmmControl)); + DECLARE("curctx", offsetof(thread_t, machine.curctx)); + DECLARE("deferctx", offsetof(thread_t, machine.deferctx)); + DECLARE("facctx", offsetof(thread_t, machine.facctx)); #ifdef MACH_BSD - DECLARE("CTHREAD_SELF", offsetof(thread_act_t, mact.cthread_self)); + DECLARE("CTHREAD_SELF", offsetof(thread_t, machine.cthread_self)); #endif DECLARE("FPUsave", offsetof(struct facility_context *,FPUsave)); @@ -518,8 +518,9 @@ int main(int argc, char *argv[]) DECLARE("kVmmProtectPage", kVmmProtectPage); DECLARE("kVmmMapList", kVmmMapList); DECLARE("kVmmUnmapList", kVmmUnmapList); - DECLARE("kVmmSetXA", kVmmSetXA); - DECLARE("kVmmGetXA", kVmmGetXA); + DECLARE("kVmmActivateXA", kVmmActivateXA); + DECLARE("kVmmDeactivateXA", kVmmDeactivateXA); + DECLARE("kVmmGetXA", kVmmGetXA); DECLARE("kVmmMapPage64", kVmmMapPage64); DECLARE("kVmmGetPageMapping64", kVmmGetPageMapping64); DECLARE("kVmmUnmapPage64", kVmmUnmapPage64); @@ -768,6 +769,10 @@ int main(int argc, char *argv[]) DECLARE("pmapKeys", pmapKeys); DECLARE("pmapKeyDef", pmapKeyDef); DECLARE("pmapSCSubTag", offsetof(struct pmap *, pmapSCSubTag)); + DECLARE("pmapVmmExt", offsetof(struct pmap *, pmapVmmExt)); + DECLARE("pmapVmmExtPhys", offsetof(struct pmap *, pmapVmmExtPhys)); + DECLARE("pmapVMhost", pmapVMhost); + DECLARE("pmapVMgsaa", pmapVMgsaa); DECLARE("pmapSegCache", offsetof(struct pmap *, pmapSegCache)); DECLARE("pmapCurLists", offsetof(struct pmap *, pmapCurLists)); DECLARE("pmapRandNum", offsetof(struct pmap *, pmapRandNum)); @@ -793,22 +798,71 @@ int main(int argc, char *argv[]) DECLARE("maxAdrSp", maxAdrSp); DECLARE("maxAdrSpb", maxAdrSpb); - - /* values from kern/processor.h */ - DECLARE("psthreads", offsetof(struct processor_set *, threads)); - DECLARE("psthreadcnt", offsetof(struct processor_set *, thread_count)); - - /* values from kern/processor.h */ - DECLARE("psthreads", offsetof(struct processor_set *, threads)); - DECLARE("psthreadcnt", offsetof(struct processor_set *, thread_count)); + DECLARE("cppvPsnkb", cppvPsnkb); + DECLARE("cppvPsrcb", cppvPsrcb); + DECLARE("cppvFsnkb", cppvFsnkb); + DECLARE("cppvFsrcb", cppvFsrcb); + DECLARE("cppvNoModSnkb", cppvNoModSnkb); + DECLARE("cppvNoRefSrcb", cppvNoRefSrcb); + DECLARE("cppvKmapb", cppvKmapb); + + DECLARE("vmxSalt", offsetof(struct pmap_vmm_ext *, vmxSalt)); + DECLARE("vmxHostPmapPhys", offsetof(struct pmap_vmm_ext *, vmxHostPmapPhys)); + DECLARE("vmxHostPmap", offsetof(struct pmap_vmm_ext *, vmxHostPmap)); + DECLARE("vmxHashPgIdx", offsetof(struct pmap_vmm_ext *, vmxHashPgIdx)); + DECLARE("vmxHashPgList", offsetof(struct pmap_vmm_ext *, vmxHashPgList)); + DECLARE("vmxStats", offsetof(struct pmap_vmm_ext *, vmxStats)); + DECLARE("vmxSize", sizeof(struct pmap_vmm_ext)); + DECLARE("VMX_HPIDX_OFFSET", VMX_HPIDX_OFFSET); + DECLARE("VMX_HPLIST_OFFSET", VMX_HPLIST_OFFSET); + DECLARE("VMX_ACTMAP_OFFSET", VMX_ACTMAP_OFFSET); + DECLARE("vxsGpf", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGpf)); + DECLARE("vxsGpfMiss", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGpfMiss)); + DECLARE("vxsGrm", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrm)); + DECLARE("vxsGrmMiss", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrmMiss)); + DECLARE("vxsGrmActive", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrmActive)); + DECLARE("vxsGra", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGra)); + DECLARE("vxsGraHits", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGraHits)); + DECLARE("vxsGraActive", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGraActive)); + DECLARE("vxsGrl", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrl)); + DECLARE("vxsGrlActive", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrlActive)); + DECLARE("vxsGrs", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrs)); + DECLARE("vxsGrsHitAct", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitAct)); + DECLARE("vxsGrsHitSusp", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitSusp)); + DECLARE("vxsGrsMissGV", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsMissGV)); + DECLARE("vxsGrsHitPE", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsHitPE)); + DECLARE("vxsGrsMissPE", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGrsMissPE)); + DECLARE("vxsGad", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGad)); + DECLARE("vxsGadHit", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadHit)); + DECLARE("vxsGadFree", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadFree)); + DECLARE("vxsGadDormant", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadDormant)); + DECLARE("vxsGadSteal", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGadSteal)); + DECLARE("vxsGsu", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsu)); + DECLARE("vxsGsuHit", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsuHit)); + DECLARE("vxsGsuMiss", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGsuMiss)); + DECLARE("vxsGtd", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtd)); + DECLARE("vxsGtdHit", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtdHit)); + DECLARE("vxsGtdMiss", offsetof(struct pmap_vmm_ext *, vmxStats.vxsGtdMiss)); + + /* values from kern/timer.h */ + DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); + DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); + DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); + DECLARE("TIMER_TSTAMP", offsetof(struct timer *, tstamp)); + + DECLARE("CURRENT_TIMER", offsetof(struct processor *, processor_data.current_timer)); + DECLARE("SYSTEM_TIMER", offsetof(struct thread *, system_timer)); + DECLARE("USER_TIMER", offsetof(struct thread *, user_timer)); /* Constants from pmap.h */ DECLARE("PPC_SID_KERNEL", PPC_SID_KERNEL); /* values for accessing mach_trap table */ - DECLARE("MACH_TRAP_OFFSET_POW2", 4); - + DECLARE("MACH_TRAP_ARG_MUNGE32", + offsetof(mach_trap_t *, mach_trap_arg_munge32)); + DECLARE("MACH_TRAP_ARG_MUNGE64", + offsetof(mach_trap_t *, mach_trap_arg_munge64)); DECLARE("MACH_TRAP_ARGC", offsetof(mach_trap_t *, mach_trap_arg_count)); DECLARE("MACH_TRAP_FUNCTION", @@ -822,28 +876,40 @@ int main(int argc, char *argv[]) DECLARE("AST_ALL", AST_ALL); DECLARE("AST_URGENT", AST_URGENT); - /* Simple Lock structure */ - DECLARE("SLOCK_ILK", offsetof(usimple_lock_t, interlock)); -#if MACH_LDEBUG - DECLARE("SLOCK_TYPE", offsetof(usimple_lock_t, lock_type)); - DECLARE("SLOCK_PC", offsetof(usimple_lock_t, debug.lock_pc)); - DECLARE("SLOCK_THREAD", offsetof(usimple_lock_t, debug.lock_thread)); - DECLARE("SLOCK_DURATIONH",offsetof(usimple_lock_t, debug.duration[0])); - DECLARE("SLOCK_DURATIONL",offsetof(usimple_lock_t, debug.duration[1])); - DECLARE("USLOCK_TAG", USLOCK_TAG); -#endif /* MACH_LDEBUG */ + /* Spin Lock structure */ + DECLARE("SLOCK_ILK", offsetof(lck_spin_t *, interlock)); /* Mutex structure */ - DECLARE("LOCK_DATA", offsetof(mutex_t *, interlock)); - DECLARE("MUTEX_WAITERS",offsetof(mutex_t *, waiters)); - DECLARE("MUTEX_PROMOTED_PRI",offsetof(mutex_t *, promoted_pri)); -#if MACH_LDEBUG - DECLARE("MUTEX_TYPE", offsetof(mutex_t *, type)); - DECLARE("MUTEX_PC", offsetof(mutex_t *, pc)); - DECLARE("MUTEX_THREAD", offsetof(mutex_t *, thread)); + DECLARE("MUTEX_DATA", offsetof(lck_mtx_t *, lck_mtx_data)); + DECLARE("MUTEX_WAITERS",offsetof(lck_mtx_t *, lck_mtx_waiters)); + DECLARE("MUTEX_PROMOTED_PRI",offsetof(lck_mtx_t *, lck_mtx_pri)); + DECLARE("MUTEX_TYPE", offsetof(lck_mtx_ext_t *, lck_mtx_deb.type)); + DECLARE("MUTEX_STACK", offsetof(lck_mtx_ext_t *, lck_mtx_deb.stack)); + DECLARE("MUTEX_FRAMES", LCK_FRAMES_MAX); + DECLARE("MUTEX_THREAD", offsetof(lck_mtx_ext_t *, lck_mtx_deb.thread)); + DECLARE("MUTEX_ATTR", offsetof(lck_mtx_ext_t *, lck_mtx_attr)); + DECLARE("MUTEX_ATTR_DEBUG", LCK_MTX_ATTR_DEBUG); + DECLARE("MUTEX_ATTR_DEBUGb", LCK_MTX_ATTR_DEBUGb); + DECLARE("MUTEX_ATTR_STAT", LCK_MTX_ATTR_STAT); + DECLARE("MUTEX_ATTR_STATb", LCK_MTX_ATTR_STATb); + DECLARE("MUTEX_GRP", offsetof(lck_mtx_ext_t *, lck_mtx_grp)); DECLARE("MUTEX_TAG", MUTEX_TAG); -#endif /* MACH_LDEBUG */ - + DECLARE("MUTEX_IND", LCK_MTX_TAG_INDIRECT); + DECLARE("MUTEX_ITAG",offsetof(lck_mtx_t *, lck_mtx_tag)); + DECLARE("MUTEX_PTR",offsetof(lck_mtx_t *, lck_mtx_ptr)); + DECLARE("MUTEX_ASSERT_OWNED", LCK_MTX_ASSERT_OWNED); + DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED); + DECLARE("GRP_MTX_STAT_UTIL", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt)); + DECLARE("GRP_MTX_STAT_MISS", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt)); + DECLARE("GRP_MTX_STAT_WAIT", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt)); + + /* RW lock structure */ + DECLARE("RW_IND", LCK_RW_TAG_INDIRECT); + DECLARE("RW_PTR", offsetof(lck_rw_t *, lck_rw_ptr)); + DECLARE("RW_SHARED", LCK_RW_TYPE_SHARED); + DECLARE("RW_EXCL", LCK_RW_TYPE_EXCLUSIVE); + DECLARE("RW_EVENT", (((sizeof(lck_rw_t)-1))/sizeof(unsigned int))*sizeof(unsigned int)); + /* values from low_trace.h */ DECLARE("LTR_cpu", offsetof(struct LowTraceRecord *, LTR_cpu)); DECLARE("LTR_excpt", offsetof(struct LowTraceRecord *, LTR_excpt)); @@ -875,27 +941,37 @@ int main(int argc, char *argv[]) DECLARE("mpFlags", offsetof(struct mapping *, mpFlags)); DECLARE("mpBusy", mpBusy); + DECLARE("mpPrevious", mpPrevious); + DECLARE("mpNext", mpNext); DECLARE("mpPIndex", mpPIndex); - DECLARE("mpSpecial", mpSpecial); - DECLARE("mpSpecialb", mpSpecialb); + DECLARE("mpType", mpType); + DECLARE("mpNormal", mpNormal); + DECLARE("mpBlock", mpBlock); + DECLARE("mpMinSpecial", mpMinSpecial); + DECLARE("mpNest", mpNest); + DECLARE("mpLinkage", mpLinkage); + DECLARE("mpACID", mpACID); + DECLARE("mpGuest", mpGuest); DECLARE("mpFIP", mpFIP); DECLARE("mpFIPb", mpFIPb); - DECLARE("mpRemovable", mpRemovable); - DECLARE("mpRemovableb", mpRemovableb); - DECLARE("mpNest", mpNest); - DECLARE("mpNestb", mpNestb); - DECLARE("mpPerm", mpPerm); - DECLARE("mpPermb", mpPermb); - DECLARE("mpBlock", mpBlock); - DECLARE("mpBlockb", mpBlockb); + DECLARE("mpPcfg", mpPcfg); + DECLARE("mpPcfgb", mpPcfgb); DECLARE("mpRIP", mpRIP); DECLARE("mpRIPb", mpRIPb); - DECLARE("mpRSVD1", mpRSVD1); + DECLARE("mpPerm", mpPerm); + DECLARE("mpPermb", mpPermb); + DECLARE("mpBSu", mpBSu); + DECLARE("mpBSub", mpBSub); DECLARE("mpLists", mpLists); DECLARE("mpListsb", mpListsb); + DECLARE("mpgFlags", mpgFlags); + DECLARE("mpgFree", mpgFree); + DECLARE("mpgGlobal", mpgGlobal); + DECLARE("mpgDormant", mpgDormant); DECLARE("mpSpace", offsetof(struct mapping *, mpSpace)); - DECLARE("mpBSize", offsetof(struct mapping *, mpBSize)); + DECLARE("mpBSize", offsetof(struct mapping *, u.mpBSize)); + DECLARE("mpgCursor", offsetof(struct mapping *, u.mpgCursor)); DECLARE("mpPte", offsetof(struct mapping *, mpPte)); DECLARE("mpHValid", mpHValid); DECLARE("mpHValidb", mpHValidb); @@ -903,8 +979,12 @@ int main(int argc, char *argv[]) DECLARE("mpPAddr", offsetof(struct mapping *, mpPAddr)); DECLARE("mpVAddr", offsetof(struct mapping *, mpVAddr)); DECLARE("mpHWFlags", mpHWFlags); + DECLARE("mpHWFlagsb", mpHWFlagsb); + DECLARE("mpN", mpN); + DECLARE("mpNb", mpNb); DECLARE("mpPP", mpPP); DECLARE("mpPPb", mpPPb); + DECLARE("mpPPe", mpPPe); DECLARE("mpKKN", mpKKN); DECLARE("mpKKNb", mpKKNb); DECLARE("mpWIMG", mpWIMG); @@ -933,22 +1013,38 @@ int main(int argc, char *argv[]) DECLARE("mbvrswap", offsetof(struct mappingblok *, mapblokvrswap)); DECLARE("mbfree", offsetof(struct mappingblok *, mapblokfree)); DECLARE("mapcsize", sizeof(struct mappingctl)); + + DECLARE("hwpPurgePTE", hwpPurgePTE); + DECLARE("hwpMergePTE", hwpMergePTE); + DECLARE("hwpNoopPTE", hwpNoopPTE); +// DANGER WIL ROBINSON!!! This wonderfully magical tool doesn't seem to handle 64-bit constants, +// leaving us with only the cold ash of a zero. ppI, ppG, and who knows what else is affected. DECLARE("ppLink", offsetof(struct phys_entry *, ppLink)); DECLARE("ppLock", ppLock); - DECLARE("ppN", ppN); DECLARE("ppFlags", ppFlags); - DECLARE("ppI", ppI); +// DECLARE("ppI", ppI); DECLARE("ppIb", ppIb); - DECLARE("ppG", ppG); +// DECLARE("ppG", ppG); DECLARE("ppGb", ppGb); DECLARE("ppR", ppR); DECLARE("ppRb", ppRb); DECLARE("ppC", ppC); DECLARE("ppCb", ppCb); - DECLARE("ppPP", ppPP); - DECLARE("ppPPb", ppPPb); - DECLARE("ppPPe", ppPPe); + DECLARE("physEntrySize",physEntrySize); + DECLARE("ppLFAmask", ppLFAmask); + DECLARE("ppLFArrot", ppLFArrot); + + DECLARE("pcfFlags", offsetof(struct pcfg *, pcfFlags)); + DECLARE("pcfEncode", offsetof(struct pcfg *, pcfEncode)); + DECLARE("pcfPSize", offsetof(struct pcfg *, pcfPSize)); + DECLARE("pcfShift", offsetof(struct pcfg *, pcfShift)); + DECLARE("pcfValid", pcfValid); + DECLARE("pcfLarge", pcfLarge); + DECLARE("pcfDedSeg", pcfDedSeg); + DECLARE("pcfSize", sizeof(struct pcfg)); + DECLARE("pcfDefPcfg", pcfDefPcfg); + DECLARE("pcfLargePcfg", pcfLargePcfg); DECLARE("PCAallo", offsetof(struct PCA *, flgs.PCAallo)); DECLARE("PCAfree", offsetof(struct PCA *, flgs.PCAalflgs.PCAfree)); @@ -976,6 +1072,9 @@ int main(int argc, char *argv[]) DECLARE("mapRtNest", mapRtNest); DECLARE("mapRtRemove", mapRtRemove); DECLARE("mapRtMapDup", mapRtMapDup); + DECLARE("mapRtGuest", mapRtGuest); + DECLARE("mapRtEmpty", mapRtEmpty); + DECLARE("mapRtSmash", mapRtSmash); #if 0 DECLARE("MFpcaptr", offsetof(struct mappingflush *, pcaptr)); @@ -984,6 +1083,34 @@ int main(int argc, char *argv[]) DECLARE("MFmappingSize", sizeof(struct mfmapping)); #endif + DECLARE("GV_GROUPS_LG2", GV_GROUPS_LG2); + DECLARE("GV_GROUPS", GV_GROUPS); + DECLARE("GV_SLOT_SZ_LG2", GV_SLOT_SZ_LG2); + DECLARE("GV_SLOT_SZ", GV_SLOT_SZ); + DECLARE("GV_SLOTS_LG2", GV_SLOTS_LG2); + DECLARE("GV_SLOTS", GV_SLOTS); + DECLARE("GV_PGIDX_SZ_LG2", GV_PGIDX_SZ_LG2); + DECLARE("GV_PAGE_SZ_LG2", GV_PAGE_SZ_LG2); + DECLARE("GV_PAGE_SZ", GV_PAGE_SZ); + DECLARE("GV_PAGE_MASK", GV_PAGE_MASK); + DECLARE("GV_HPAGES", GV_HPAGES); + DECLARE("GV_GRPS_PPG_LG2", GV_GRPS_PPG_LG2); + DECLARE("GV_GRPS_PPG", GV_GRPS_PPG); + DECLARE("GV_GRP_MASK", GV_GRP_MASK); + DECLARE("GV_SLOT_MASK", GV_SLOT_MASK); + DECLARE("GV_HPAGE_SHIFT", GV_HPAGE_SHIFT); + DECLARE("GV_HPAGE_MASK", GV_HPAGE_MASK); + DECLARE("GV_HGRP_SHIFT", GV_HGRP_SHIFT); + DECLARE("GV_HGRP_MASK", GV_HGRP_MASK); + DECLARE("GV_MAPWD_BITS_LG2",GV_MAPWD_BITS_LG2); + DECLARE("GV_MAPWD_SZ_LG2", GV_MAPWD_SZ_LG2); + DECLARE("GV_MAP_WORDS", GV_MAP_WORDS); + DECLARE("GV_MAP_MASK", GV_MAP_MASK); + DECLARE("GV_MAP_SHIFT", GV_MAP_SHIFT); + DECLARE("GV_BAND_SHIFT", GV_BAND_SHIFT); + DECLARE("GV_BAND_SZ_LG2", GV_BAND_SZ_LG2); + DECLARE("GV_BAND_MASK", GV_BAND_MASK); + #if 1 DECLARE("GDsave", offsetof(struct GDWorkArea *, GDsave)); DECLARE("GDfp0", offsetof(struct GDWorkArea *, GDfp0)); @@ -1018,8 +1145,8 @@ int main(int argc, char *argv[]) DECLARE("disLktypeb", disLktypeb); DECLARE("disLkThread", disLkThread); DECLARE("disLkThreadb", disLkThreadb); - DECLARE("disLkNmSimp", disLkNmSimp); - DECLARE("disLkNmSimpb", disLkNmSimpb); + DECLARE("enaLkExtStck", enaLkExtStck); + DECLARE("enaLkExtStckb",enaLkExtStckb); DECLARE("disLkMyLck", disLkMyLck); DECLARE("disLkMyLckb", disLkMyLckb); DECLARE("dgMisc1", offsetof(struct diagWork *, dgMisc1)); @@ -1240,6 +1367,9 @@ int main(int argc, char *argv[]) DECLARE("lgVerCode", offsetof(struct lowglo *, lgVerCode)); DECLARE("lgPPStart", offsetof(struct lowglo *, lgPPStart)); + DECLARE("mckFlags", offsetof(struct lowglo *, lgMckFlags)); + DECLARE("lgPMWvaddr", offsetof(struct lowglo *, lgPMWvaddr)); + DECLARE("lgUMWvaddr", offsetof(struct lowglo *, lgUMWvaddr)); DECLARE("trcWork", offsetof(struct lowglo *, lgTrcWork)); DECLARE("traceMask", offsetof(struct lowglo *, lgTrcWork.traceMask)); DECLARE("traceCurr", offsetof(struct lowglo *, lgTrcWork.traceCurr)); @@ -1258,6 +1388,8 @@ int main(int argc, char *argv[]) DECLARE("SVadjust", offsetof(struct lowglo *, lgSaveanchor.saveadjust)); DECLARE("SVinuse", offsetof(struct lowglo *, lgSaveanchor.saveinuse)); DECLARE("SVtarget", offsetof(struct lowglo *, lgSaveanchor.savetarget)); + DECLARE("SVsaveinusesnapshot", offsetof(struct lowglo *, lgSaveanchor.saveinusesnapshot)); + DECLARE("SVsavefreesnapshot", offsetof(struct lowglo *, lgSaveanchor.savefreesnapshot)); DECLARE("SVsize", sizeof(struct Saveanchor)); DECLARE("tlbieLock", offsetof(struct lowglo *, lgTlbieLck)); @@ -1266,9 +1398,11 @@ int main(int argc, char *argv[]) DECLARE("dgLock", offsetof(struct lowglo *, lgdgWork.dgLock)); DECLARE("dgMisc0", offsetof(struct lowglo *, lgdgWork.dgMisc0)); + DECLARE("lglcksWork", offsetof(struct lowglo *, lglcksWork)); DECLARE("lgKillResv", offsetof(struct lowglo *, lgKillResv)); + DECLARE("lgpPcfg", offsetof(struct lowglo *, lgpPcfg)); + - DECLARE("scomcpu", offsetof(struct scomcomm *, scomcpu)); DECLARE("scomfunc", offsetof(struct scomcomm *, scomfunc)); DECLARE("scomreg", offsetof(struct scomcomm *, scomreg)); diff --git a/osfmk/ppc/hw_exception.s b/osfmk/ppc/hw_exception.s index 4904b63ca..51b344457 100644 --- a/osfmk/ppc/hw_exception.s +++ b/osfmk/ppc/hw_exception.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -46,7 +46,6 @@ #include #include #include -#include #define VERIFYSAVE 0 @@ -92,13 +91,13 @@ .globl EXT(thandler) LEXT(thandler) ; Trap handler - mfsprg r25,0 ; Get the per_proc + mfsprg r13,1 ; Get the current activation + lwz r25,ACT_PER_PROC(r13) ; Get the per_proc block lwz r1,PP_ISTACKPTR(r25) ; Get interrupt stack pointer - mfsprg r13,1 ; Get the current thread cmpwi cr0,r1,0 ; Are we on interrupt stack? - lwz r6,ACT_THREAD(r13) ; Get the shuttle + mr r6,r13 beq- cr0,EXT(ihandler) ; If on interrupt stack, treat this as interrupt... lwz r26,ACT_MACT_SPF(r13) ; Get special flags lwz r8,ACT_MACT_PCB(r13) ; Get the last savearea used @@ -122,7 +121,7 @@ LEXT(thandler) ; Trap handler subi r1,r26,FM_REDZONE ; Make a red zone on interrupt time kernel stack .L_kstackfree: - lwz r7,savesrr1+4(r4) ; Pick up the entry MSR + lwz r31,savesrr1+4(r4) ; Pick up the entry MSR sub r9,r1,r9 ; Get displacment into the kernel stack li r0,0 ; Make this 0 rlwinm. r0,r9,0,28,31 ; Verify that we have a 16-byte aligned stack (and get a 0) @@ -134,7 +133,7 @@ LEXT(thandler) ; Trap handler .L_state_on_kstack: lwz r9,savevrsave(r4) ; Get the VRSAVE register bne-- kernelStackUnaligned ; Stack is unaligned... - rlwinm. r6,r7,0,MSR_VEC_BIT,MSR_VEC_BIT ; Was vector on? + rlwinm. r6,r31,0,MSR_VEC_BIT,MSR_VEC_BIT ; Was vector on? subi r1,r1,FM_SIZE ; Push a header onto the current stack bgt-- cr2,kernelStackBad ; Kernel stack is bogus... @@ -154,16 +153,20 @@ tvecoff: stw r26,FM_BACKPTR(r1) ; Link back to the previous frame stwu r1, -FM_SIZE(r1) ; and make new frame #endif /* DEBUG */ + mr r30,r4 + lwz r3,SAVtime+4(r4) + addi r4,r13,SYSTEM_TIMER + bl EXT(timer_event) /* call trap handler proper, with - * ARG0 = type (not yet, holds pcb ptr) - * ARG1 = saved_state ptr (already there) - * ARG2 = dsisr (already there) - * ARG3 = dar (already there) + * ARG0 = type + * ARG1 = saved_state ptr + * ARG2 = dsisr + * ARG3 = dar */ - - lwz r3,saveexception(r4) ; Get the exception code + mr r4,r30 + lwz r3,saveexception(r30) ; Get the exception code lwz r0,ACT_MACT_SPF(r13) ; Get the special flags addi r5,r3,-T_DATA_ACCESS ; Adjust to start of range @@ -173,7 +176,7 @@ tvecoff: stw r26,FM_BACKPTR(r1) ; Link back to the previous frame lwz r5,savedsisr(r4) ; Get the saved DSISR crnor cr7_eq,cr0_eq,cr2_gt ; We should intercept if in VM and is a true trap (cr7_eq == 1 if yes) - rlwinm. r0,r7,0,MSR_PR_BIT,MSR_PR_BIT ; Are we trapping from supervisor state? (cr0_eq == 1 if yes) + rlwinm. r0,r31,0,MSR_PR_BIT,MSR_PR_BIT ; Are we trapping from supervisor state? (cr0_eq == 1 if yes) cmpi cr2,r3,T_PREEMPT ; Is this a preemption? @@ -202,7 +205,8 @@ tvecoff: stw r26,FM_BACKPTR(r1) ; Link back to the previous frame ori r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE)) ; Add in FP and EE andc r7,r7,r10 ; Turn off VEC, FP, and EE mtmsr r7 ; Disable for interrupts - mfsprg r10,0 ; Restore the per_proc info + mfsprg r8,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r8) ; Get the per_proc block /* * This is also the point where new threads come when they are created. * The new thread is setup to look like a thread that took an @@ -216,7 +220,7 @@ thread_return: mfsprg r8,1 ; Get the current thread rlwinm r11,r11,0,15,13 ; Clear the syscall flag rlwinm. r0,r0,0,MSR_PR_BIT,MSR_PR_BIT ; Are we going to the user? - lwz r1,ACT_THREAD(r8) ; Get the shuttle + mr r1,r8 stw r11,SAVflags(r3) ; Save back the flags (with reset stack cleared) lwz r5,THREAD_KERNEL_STACK(r1) ; Get the base pointer to the stack @@ -289,16 +293,16 @@ kernelStackUnaligned: LEXT(shandler) ; System call handler lwz r7,savesrr1+4(r4) ; Get the SRR1 value - mfsprg r25,0 ; Get the per proc area + mfsprg r13,1 ; Get the current activation + lwz r25,ACT_PER_PROC(r13) ; Get the per_proc block lwz r0,saver0+4(r4) ; Get the original syscall number lwz r17,PP_ISTACKPTR(r25) ; Get interrupt stack pointer - mfsprg r13,1 ; Get the current thread rlwinm r15,r0,0,0,19 ; Clear the bottom of call number for fast check mr. r17,r17 ; Are we on interrupt stack? lwz r9,savevrsave(r4) ; Get the VRsave register beq-- EXT(ihandler) ; On interrupt stack, not allowed... rlwinm. r6,r7,0,MSR_VEC_BIT,MSR_VEC_BIT ; Was vector on? - lwz r16,ACT_THREAD(r13) ; Get the shuttle + mr r16,r13 beq++ svecoff ; Vector off, do not save vrsave... stw r9,liveVRS(r25) ; Set the live value @@ -348,6 +352,10 @@ noassist: cmplwi r15,0x7000 ; Do we have a fast path trap? mr r30,r4 ; Save pointer to the new context savearea stw r0,ACT_MACT_KSP(r13) ; Mark stack as busy with 0 val stw r15,FM_BACKPTR(r1) ; Link stack frame backwards + + lwz r3,SAVtime+4(r30) + addi r4,r13,SYSTEM_TIMER + bl EXT(timer_event) #if DEBUG /* If debugging, we need two frames, the first being a dummy @@ -359,10 +367,12 @@ noassist: cmplwi r15,0x7000 ; Do we have a fast path trap? stwu r1, -FM_SIZE(r1) ; and make new frame #endif /* DEBUG */ + mr r4,r30 + lwz r15,SAVflags(r30) ; Get the savearea flags lwz r0,saver0+4(r30) ; Get R0 back mfmsr r11 ; Get the MSR - stwu r1,-(FM_SIZE+ARG_SIZE)(r1) ; Make a stack frame + stwu r1,-(FM_SIZE+ARG_SIZE+MUNGE_ARGS_SIZE)(r1) ; Make a stack frame ori r11,r11,lo16(MASK(MSR_EE)) ; Turn on interruption enabled bit rlwinm r10,r0,0,0,19 ; Keep only the top part oris r15,r15,SAVsyscall >> 16 ; Mark that it this is a syscall @@ -470,7 +480,8 @@ LEXT(ppcscret) mr. r3,r3 ; See what we should do mr r31,r16 ; Restore the current thread pointer bgt+ .L_thread_syscall_ret_check_ast ; Take normal AST checking return.... - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block blt+ .L_thread_syscall_return ; Return, but no ASTs.... lwz r0,saver0+4(r30) ; Restore the system call number b .L_call_server_syscall_exception ; Go to common exit... @@ -491,25 +502,26 @@ ksystrace: bge- .L_call_server_syscall_exception ; The syscall number is invalid - lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address -; -; NOTE: We do not support more than 8 parameters for PPC. The only -; system call to use more than 8 is mach_msg_overwrite_trap and it -; uses 9. We pass a 0 in as number 9. -; - lwz r3,saver3+4(r30) ; Restore r3 - lwz r4,saver4+4(r30) ; Restore r4 + lwz r0,savesrr1(r30) ; Get the saved srr1 + rlwinm. r0,r0,0,MSR_SF_BIT,MSR_SF_BIT ; Test for 64 bit caller + lwz r0,MACH_TRAP_ARG_MUNGE32(r31) ; Pick up the 32 bit munge function address + beq-- .L_ksystrace_munge + lwz r0,MACH_TRAP_ARG_MUNGE64(r31) ; Pick up the 64 bit munge function address + +.L_ksystrace_munge: + cmplwi r0,0 ; do we have a munger to call? mtctr r0 ; Set the function call address - lwz r5,saver5+4(r30) ; Restore r5 - lwz r6,saver6+4(r30) ; Restore r6 - lwz r7,saver7+4(r30) ; Restore r7 - li r0,0 ; Clear this out - lwz r8,saver8+4(r30) ; Restore r8 - lwz r9,saver9+4(r30) ; Restore r9 - lwz r10,saver10+4(r30) ; Restore r10 - stw r0,FM_ARG0(r1) ; Clear that 9th parameter just in case some fool uses it - bctrl ; perform the actual syscall - + addi r3,r30,saver3 ; Pointer to args from save area + addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args + beq-- .L_ksystrace_trapcall ; just make the trap call + bctrl ; Call the munge function + +.L_ksystrace_trapcall: + lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address + mtctr r0 ; Set the function call address + addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args + bctrl + mr r4,r30 ; Pass in the savearea bl EXT(syscall_trace_end) ; Trace the exit of the system call b .L_mach_return @@ -540,8 +552,11 @@ ksystrace: lwz r7,TASK_SYSCALLS_MACH(r10) ; Get the current count neg r31,r0 ; Make this positive - slwi r27,r31,MACH_TRAP_OFFSET_POW2 ; Convert index to offset + mr r3,r31 ; save it + slwi r27,r3,4 ; multiply by 16 + slwi r3,r3,2 ; and the original by 4 ori r28,r28,lo16(EXT(mach_trap_table)) ; Get address of table + add r27,r27,r3 ; for a total of 20x (5 words/entry) addi r7,r7,1 ; Bump TASK_SYSCALLS_MACH count cmplwi r8,0 ; Is kdebug_enable non-zero stw r7,TASK_SYSCALLS_MACH(r10) ; Save count @@ -551,26 +566,27 @@ ksystrace: add r31,r27,r28 ; Point right to the syscall table entry bge-- .L_call_server_syscall_exception ; The syscall number is invalid - - lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address -; -; NOTE: We do not support more than 8 parameters for PPC. The only -; system call to use more than 8 is mach_msg_overwrite_trap and it -; uses 9. We pass a 0 in as number 9. -; - lwz r3,saver3+4(r30) ; Restore r3 - lwz r4,saver4+4(r30) ; Restore r4 - lwz r5,saver5+4(r30) ; Restore r5 + lwz r0,savesrr1(r30) ; Get the saved srr1 + rlwinm. r0,r0,0,MSR_SF_BIT,MSR_SF_BIT ; Test for 64 bit caller + lwz r0,MACH_TRAP_ARG_MUNGE32(r31) ; Pick up the 32 bit munge function address + beq-- .L_kernel_syscall_munge + lwz r0,MACH_TRAP_ARG_MUNGE64(r31) ; Pick up the 64 bit munge function address + +.L_kernel_syscall_munge: + cmplwi r0,0 ; test for null munger + mtctr r0 ; Set the function call address + addi r3,r30,saver3 ; Pointer to args from save area + addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args + beq-- .L_kernel_syscall_trapcall ; null munger - skip to trap call + bctrl ; Call the munge function + +.L_kernel_syscall_trapcall: + lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address mtctr r0 ; Set the function call address - lwz r6,saver6+4(r30) ; Restore r6 - lwz r7,saver7+4(r30) ; Restore r7 - lwz r8,saver8+4(r30) ; Restore r8 - li r0,0 ; Clear this out - lwz r9,saver9+4(r30) ; Restore r9 - lwz r10,saver10+4(r30) ; Restore r10 - stw r0,FM_ARG0(r1) ; Clear that 9th parameter just in case some fool uses it - bctrl ; perform the actual syscall + addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args + bctrl + /* * Ok, return from C function, R3 = return value @@ -579,11 +595,13 @@ ksystrace: * saved state is still in R30 and the active thread is in R16 */ -.L_mach_return: +.L_mach_return: + srawi r0,r3,31 ; properly extend the return code + cmpi cr0,r3,KERN_INVALID_ARGUMENT ; deal with invalid system calls mr r31,r16 ; Move the current thread pointer - stw r3,saver3+4(r30) ; Stash the return code - cmpi cr0,r3,KERN_INVALID_ARGUMENT ; deal with invalid system calls - beq- cr0,.L_mach_invalid_ret ; otherwise fall through into the normal return path + stw r0, saver3(r30) ; stash the high part of the return code + stw r3,saver3+4(r30) ; Stash the low part of the return code + beq- cr0,.L_mach_invalid_ret ; otherwise fall through into the normal return path .L_mach_invalid_arg: @@ -606,12 +624,12 @@ ksystrace: andc r12,r12,r10 ; Turn off VEC, FP, and EE mtmsr r12 ; Turn interruptions off - mfsprg r10,0 ; Get the per_processor block + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block /* Check to see if there's an outstanding AST */ - lwz r4,PP_NEED_AST(r10) ; Get the pointer to the ast requests - lwz r4,0(r4) ; Get the flags + lwz r4,PP_PENDING_AST(r10) cmpi cr0,r4, 0 ; Any pending asts? beq++ cr0,.L_syscall_no_ast ; Nope... @@ -634,8 +652,9 @@ ksystrace: scrnotkern: #endif /* DEBUG */ - li r3,AST_ALL ; Set ast flags + lis r3,hi16(AST_ALL) ; Set ast flags li r4,1 ; Set interrupt allowed + ori r3,r3,lo16(AST_ALL) bl EXT(ast_taken) ; Process the pending ast b .L_thread_syscall_ret_check_ast ; Go see if there was another... @@ -646,10 +665,13 @@ scrnotkern: * then we want to throw an exception... otherwise * we want to pass the error code back to the caller */ - lwz r0,saver0+4(r30) ; reload the original syscall number - neg r28,r0 ; Make this positive - slwi r27,r28,MACH_TRAP_OFFSET_POW2 ; Convert index to offset + lwz r0,saver0+4(r30) ; reload the original syscall number + neg r28,r0 ; Make this positive + mr r4,r28 ; save a copy + slwi r27,r4,4 ; multiply by 16 + slwi r4,r4,2 ; and another 4 lis r28,hi16(EXT(mach_trap_table)) ; Get address of table + add r27,r27,r4 ; for a total of 20x (5 words/entry) ori r28,r28,lo16(EXT(mach_trap_table)) ; Get address of table add r28,r27,r28 ; Point right to the syscall table entry lwz r27,MACH_TRAP_FUNCTION(r28) ; Pick up the function address @@ -713,9 +735,9 @@ LEXT(thread_exception_return) ; Directly return to user mode frame, given that we're not going to return. */ - mfsprg r10,0 ; Get the per_processor block - lwz r4,PP_NEED_AST(r10) - lwz r4,0(r4) + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block + lwz r4,PP_PENDING_AST(r10) cmpi cr0,r4, 0 beq+ cr0,.L_exc_ret_no_ast @@ -724,8 +746,9 @@ LEXT(thread_exception_return) ; Directly return to user mode * ast_taken will save all state and bring us back here */ - li r3,AST_ALL + lis r3,hi16(AST_ALL) li r4,1 + ori r3,r3,lo16(AST_ALL) bl EXT(ast_taken) b .L_thread_exc_ret_check_ast ; check for a second AST (rare) @@ -736,7 +759,7 @@ LEXT(thread_exception_return) ; Directly return to user mode .L_exc_ret_no_ast: mfsprg r30,1 ; Get the currrent activation - lwz r31,ACT_THREAD(r30) ; Get the current thread + mr r31,r30 lwz r30,ACT_MACT_PCB(r30) mr. r30,r30 ; Is there any context yet? @@ -818,11 +841,11 @@ LEXT(ihandler) ; Interrupt handler */ lwz r10,savesrr1+4(r4) ; Get SRR1 lwz r7,savevrsave(r4) ; Get the VRSAVE register - mfsprg r25,0 ; Get the per_proc block + mfsprg r13,1 ; Get the current activation + lwz r25,ACT_PER_PROC(r13) ; Get the per_proc block li r14,0 ; Zero this for now - rlwinm. r13,r10,0,MSR_VEC_BIT,MSR_VEC_BIT ; Was vector on? + rlwinm. r16,r10,0,MSR_VEC_BIT,MSR_VEC_BIT ; Was vector on? lwz r1,PP_ISTACKPTR(r25) ; Get the interrupt stack - mfsprg r13,1 ; Get the current thread li r16,0 ; Zero this for now beq+ ivecoff ; Vector off, do not save vrsave... @@ -832,7 +855,7 @@ ivecoff: li r0,0 ; Get a constant 0 rlwinm r5,r10,0,MSR_PR_BIT,MSR_PR_BIT ; Are we trapping from supervisor state? mr. r1,r1 ; Is it active? cmplwi cr2,r5,0 ; cr2_eq == 1 if yes - lwz r16,ACT_THREAD(r13) ; Get the shuttle + mr r16,r13 lwz r14,ACT_MACT_PCB(r13) ; Now point to the PCB lwz r9,saver1+4(r4) ; Pick up the rupt time stack stw r14,SAVprev+4(r4) ; Queue the new save area in the front @@ -908,8 +931,17 @@ ihbootnover: ; (TEST/DEBUG) stwu r1,-FM_SIZE(r1) ; Make another new frame for C routine #endif /* DEBUG */ - lwz r5,savedsisr(r4) ; Get the DSISR - lwz r6,savedar+4(r4) ; Get the DAR + mr r31,r3 + mr r30,r4 + + lwz r3,SAVtime+4(r4) + addi r4,r13,SYSTEM_TIMER + bl EXT(timer_event) + + mr r3,r31 + mr r4,r30 + lwz r5,savedsisr(r30) ; Get the DSISR + lwz r6,savedar+4(r30) ; Get the DAR bl EXT(interrupt) @@ -927,14 +959,13 @@ LEXT(ihandler_ret) ; Marks our return point from debugger entry ori r10,r10,lo16(MASK(MSR_FP)|MASK(MSR_EE)) ; Add in FP and EE andc r0,r0,r10 ; Turn off VEC, FP, and EE mtmsr r0 ; Make sure interrupts are disabled - mfsprg r10,0 ; Get the per_proc block + mfsprg r8,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r8) ; Get the per_proc block lwz r7,SAVflags(r3) ; Pick up the flags - mfsprg r8,1 ; Get the current thread lwz r9,SAVprev+4(r3) ; Get previous save area cmplwi cr1,r8,0 ; Are we still initializing? lwz r12,savesrr1+4(r3) ; Get the MSR we will load on return - lwz r8,THREAD_TOP_ACT(r8) ; Pick up the active thread andis. r11,r7,hi16(SAVrststk) ; Is this the first on the stack? stw r9,ACT_MACT_PCB(r8) ; Point to previous context savearea mr r4,r3 ; Move the savearea pointer @@ -955,8 +986,7 @@ LEXT(ihandler_ret) ; Marks our return point from debugger entry cmplwi r3, 0 ; Check for preemption bne .L_no_int_ast ; Do not preempt if level is not zero andi. r6,r12,MASK(MSR_PR) ; privilege mode - lwz r11,PP_NEED_AST(r10) ; Get the AST request address - lwz r11,0(r11) ; Get the request + lwz r11,PP_PENDING_AST(r10) ; Get the pending AST mask beq- .L_kernel_int_ast ; In kernel space, AST_URGENT check li r3,T_AST ; Assume the worst mr. r11,r11 ; Are there any pending? @@ -1045,15 +1075,16 @@ chkfac: lwz r29,savesrr1+4(r3) ; Get the current MSR eieio ; Make sure this gets out before owner clear -#if ppSize != 4096 -#error per_proc_info is not 4k in size +#if ppeSize != 16 +#error per_proc_entry is not 16bytes in size #endif - lis r23,hi16(EXT(per_proc_info)) ; Set base per_proc - slwi r22,r22,12 ; FInd offset to the owner per_proc - ori r23,r23,lo16(EXT(per_proc_info)) ; Set base per_proc - li r24,FPUowner ; Displacement to FPU owner - add r22,r23,r22 ; Point to the owner per_proc + lis r23,hi16(EXT(PerProcTable)) ; Set base PerProcTable + slwi r22,r22,4 ; Find offset to the owner per_proc_entry + ori r23,r23,lo16(EXT(PerProcTable)) ; Set base PerProcTable + li r24,FPUowner ; Displacement to float owner + add r22,r23,r22 ; Point to the owner per_proc_entry + lwz r22,ppe_vaddr(r22) ; Point to the owner per_proc fpuinvothr: lwarx r23,r24,r22 ; Get the owner @@ -1204,11 +1235,12 @@ chkvec: eieio ; Make sure this gets out before owner clear - lis r23,hi16(EXT(per_proc_info)) ; Set base per_proc - slwi r22,r22,12 ; Find offset to the owner per_proc - ori r23,r23,lo16(EXT(per_proc_info)) ; Set base per_proc - li r24,VMXowner ; Displacement to VMX owner - add r22,r23,r22 ; Point to the owner per_proc + lis r23,hi16(EXT(PerProcTable)) ; Set base PerProcTable + slwi r22,r22,4 ; Find offset to the owner per_proc_entry + ori r23,r23,lo16(EXT(PerProcTable)) ; Set base PerProcTable + li r24,VMXowner ; Displacement to float owner + add r22,r23,r22 ; Point to the owner per_proc_entry + lwz r22,ppe_vaddr(r22) ; Point to the owner per_proc vmxinvothr: lwarx r23,r24,r22 ; Get the owner @@ -1310,7 +1342,7 @@ vmxchkena: lwz r21,VMXowner(r31) ; Get the ID of the live context vmxena: oris r29,r29,hi16(MASK(MSR_VEC)) ; Enable facility -setena: lwz r18,cioSpace(r28) ; Get the space ID in case we are launching user +setena: lwz r18,umwSpace(r28) ; Get the space ID in case we are launching user rlwinm. r0,r29,0,MSR_PR_BIT,MSR_PR_BIT ; Are we about to launch user state? li r0,0 ; Get set to release quickfret holdoff crmove cr7_eq,cr0_eq ; Remember if we are going to user state @@ -1319,7 +1351,7 @@ setena: lwz r18,cioSpace(r28) ; Get the space ID in case we are launching u rlwinm r20,r29,(((31-vectorCngbit)+(MSR_VEC_BIT+1))&31),vectorCngbit,vectorCngbit ; Set flag if we enabled vector stw r29,savesrr1+4(r27) ; Turn facility on or off stw r0,holdQFret(r31) ; Release quickfret - oris r18,r18,hi16(cioSwitchAway) ; Set the switch-away bit in case we go to user + oris r18,r18,hi16(umwSwitchAway) ; Set the switch-away bit in case we go to user beq setenaa ; Neither float nor vector turned on.... @@ -1362,12 +1394,16 @@ segtb: mftbu r20 ; Get the upper time base mtdec r13 ; Set our value -chkifuser: beq-- cr7,chkenax ; Skip this if we are going to kernel... - stw r18,cioSpace(r28) ; Half-invalidate to force MapUserAddressSpace to reload SRs +chkifuser: addi r4,r28,SYSTEM_TIMER + mftb r3 + beq-- cr7,chkifuser1 ; Skip this if we are going to kernel... + stw r18,umwSpace(r28) ; Half-invalidate to force MapUserAddressWindow to reload SRs + addi r4,r28,USER_TIMER + +chkifuser1: bl EXT(timer_event) chkenax: - #if DEBUG lwz r20,SAVact(r27) ; (TEST/DEBUG) Make sure our restore mfsprg r21, 1 ; (TEST/DEBUG) with the current act. @@ -1445,20 +1481,24 @@ fastpath: cmplwi cr3,r0,0x7FF5 ; Is this a null fastpath? /* * void cthread_set_self(cproc_t p) * - * set's thread state "user_value" + * Set's thread state "user_value". In practice this is the thread-local-data-pointer (TLDP), + * though we do not interpret it. This call is mostly used by 32-bit tasks, but we save all 64 bits + * in case a 64-bit task wants to use this facility. They normally do not, because the 64-bit + * ABI reserves r13 for the TLDP. * * This op is invoked as follows: * li r0, CthreadSetSelfNumber // load the fast-trap number * sc // invoke fast-trap * blr - * */ CthreadSetSelfNumber: - - lwz r5,saver3+4(r4) /* Retrieve the self number */ - stw r5,CTHREAD_SELF(r13) /* Remember it */ - stw r5,UAW(r25) /* Prime the per_proc_info with it */ + lwz r3,saver3+0(r4) /* get the TLDP passed in r3 */ + lwz r5,saver3+4(r4) /* (all 64 bits, in case this is a 64-bit task) */ + stw r3,CTHREAD_SELF+0(r13) /* Remember it in the activation... */ + stw r5,CTHREAD_SELF+4(r13) + stw r3,UAW+0(r25) /* ...and in the per-proc */ + stw r5,UAW+4(r25) .globl EXT(fastexit) @@ -1511,13 +1551,14 @@ exitFromVM: mr r30,r4 ; Get the savearea .globl EXT(retFromVM) LEXT(retFromVM) - mfsprg r10,0 ; Restore the per_proc info + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block mr r8,r3 ; Get the activation lwz r4,SAVprev+4(r30) ; Pick up the previous savearea mr r3,r30 ; Put savearea in proper register for common code lwz r11,SAVflags(r30) ; Get the flags of the current savearea rlwinm r11,r11,0,15,13 ; Clear the syscall flag - lwz r1,ACT_THREAD(r8) ; and the active thread + mr r1,r8 stw r11,SAVflags(r3) ; Save back the flags (with reset stack cleared) stw r4,ACT_MACT_PCB(r8) ; Point to the previous savearea (or 0 if none) @@ -1550,7 +1591,8 @@ LEXT(retFromVM) LEXT(chandler) ; Choke handler li r31,0 ; Get a 0 - mfsprg r25,0 ; Get the per_proc + mfsprg r25,1 ; Get the current activation + lwz r25,ACT_PER_PROC(r25) ; Get the per_proc block stw r31,traceMask(0) ; Force tracing off right now @@ -1600,7 +1642,8 @@ versave: lwz r23,0(r22) ; (TEST/DEBUG) mr. r23,r23 ; (TEST/DEBUG) beqlr- ; (TEST/DEBUG) - mfsprg r20,0 ; (TEST/DEBUG) + mfsprg r20,1 ; Get the current activation + lwz r20,ACT_PER_PROC(r20) ; Get the per_proc block lwz r21,pfAvailable(r20) ; (TEST/DEBUG) mr. r21,r21 ; (TEST/DEBUG) bnelr+ ; (TEST/DEBUG) diff --git a/osfmk/ppc/hw_lock.s b/osfmk/ppc/hw_lock.s index d9487a4ba..7ebbedecd 100644 --- a/osfmk/ppc/hw_lock.s +++ b/osfmk/ppc/hw_lock.s @@ -20,189 +20,41 @@ * @APPLE_LICENSE_HEADER_END@ */ -#include #include #include -#include #include #include #include #define STRING ascii -#define SWT_HI 0+FM_SIZE -#define SWT_LO 4+FM_SIZE -#define MISSED 8+FM_SIZE - #define ILK_LOCKED 0x01 #define WAIT_FLAG 0x02 -#define TH_FN_OWNED 0x01 - -#define CHECKNMI 0 -#define CHECKLOCKS 1 - -#define PROLOG(space) \ - stwu r1,-(FM_ALIGN(space)+FM_SIZE)(r1) __ASMNL__ \ - mflr r0 __ASMNL__ \ - stw r3,FM_ARG0(r1) __ASMNL__ \ - stw r0,(FM_ALIGN(space)+FM_SIZE+FM_LR_SAVE)(r1) __ASMNL__ - -#define EPILOG \ - lwz r1,0(r1) __ASMNL__ \ - lwz r0,FM_LR_SAVE(r1) __ASMNL__ \ - mtlr r0 __ASMNL__ - -#if MACH_LDEBUG && CHECKLOCKS -/* - * Routines for general lock debugging. - */ - -/* - * Gets lock check flags in CR6: CR bits 24-27 - */ - -#define CHECK_SETUP(rg) \ - lbz rg,dgFlags(0) __ASMNL__ \ - mtcrf 2,rg __ASMNL__ - - -/* - * Checks for expected lock types and calls "panic" on - * mismatch. Detects calls to Mutex functions with - * type simplelock and vice versa. - */ -#define CHECK_MUTEX_TYPE() \ - bt 24+disLktypeb,1f __ASMNL__ \ - lwz r10,MUTEX_TYPE(r3) __ASMNL__ \ - cmpwi r10,MUTEX_TAG __ASMNL__ \ - beq+ 1f __ASMNL__ \ - lis r3,hi16(not_a_mutex) __ASMNL__ \ - ori r3,r3,lo16(not_a_mutex) __ASMNL__ \ - bl EXT(panic) __ASMNL__ \ - lwz r3,FM_ARG0(r1) __ASMNL__ \ -1: - - .data -not_a_mutex: - STRINGD "not a mutex!\n\000" - .text +#define WANT_UPGRADE 0x04 +#define WANT_EXCL 0x08 -#define CHECK_SIMPLE_LOCK_TYPE() \ - bt 24+disLktypeb,1f __ASMNL__ \ - lhz r10,SLOCK_TYPE(r3) __ASMNL__ \ - cmpwi r10,USLOCK_TAG __ASMNL__ \ - beq+ 1f __ASMNL__ \ - lis r3,hi16(not_a_slock) __ASMNL__ \ - ori r3,r3,lo16(not_a_slock) __ASMNL__ \ - bl EXT(panic) __ASMNL__ \ - lwz r3,FM_ARG0(r1) __ASMNL__ \ -1: - - .data -not_a_slock: - STRINGD "not a simple lock!\n\000" - .text +#define TH_FN_OWNED 0x01 -#define CHECK_NO_SIMPLELOCKS() \ - bt 24+disLkNmSimpb,2f __ASMNL__ \ - lis r10,hi16(MASK(MSR_VEC)) __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_FP)) __ASMNL__ \ - mfmsr r11 __ASMNL__ \ - andc r11,r11,r10 __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_EE)) __ASMNL__ \ - andc r10,r11,r10 __ASMNL__ \ - mtmsr r10 __ASMNL__ \ - isync __ASMNL__ \ - mfsprg r10,0 __ASMNL__ \ - lwz r10,PP_SIMPLE_LOCK_CNT(r10) __ASMNL__ \ - cmpwi r10,0 __ASMNL__ \ - beq+ 1f __ASMNL__ \ - lis r3,hi16(simple_locks_held) __ASMNL__ \ - ori r3,r3,lo16(simple_locks_held) __ASMNL__ \ - bl EXT(panic) __ASMNL__ \ - lwz r3,FM_ARG0(r1) __ASMNL__ \ -1: __ASMNL__ \ - mtmsr r11 __ASMNL__ \ -2: - - .data -simple_locks_held: - STRINGD "simple locks held!\n\000" - .text +# volatile CR bits +#define hwtimeout 20 +#define mlckmiss 21 -/* - * Verifies return to the correct thread in "unlock" situations. - */ -#define CHECK_THREAD(thread_offset) \ - bt 24+disLkThreadb,2f __ASMNL__ \ - lis r10,hi16(MASK(MSR_VEC)) __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_FP)) __ASMNL__ \ - mfmsr r11 __ASMNL__ \ - andc r11,r11,r10 __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_EE)) __ASMNL__ \ - andc r10,r11,r10 __ASMNL__ \ - mtmsr r10 __ASMNL__ \ - isync __ASMNL__ \ - mfsprg r10,1 __ASMNL__ \ - lwz r10,ACT_THREAD(r10) __ASMNL__ \ - cmpwi r10,0 __ASMNL__ \ - beq- 1f __ASMNL__ \ - lwz r9,thread_offset(r3) __ASMNL__ \ - cmpw r9,r10 __ASMNL__ \ - beq+ 1f __ASMNL__ \ - lis r3,hi16(wrong_thread) __ASMNL__ \ - ori r3,r3,lo16(wrong_thread) __ASMNL__ \ - bl EXT(panic) __ASMNL__ \ - lwz r3,FM_ARG0(r1) __ASMNL__ \ -1: __ASMNL__ \ - mtmsr r11 __ASMNL__ \ -2: - .data -wrong_thread: - STRINGD "wrong thread!\n\000" - .text +#define RW_DATA 0 -#define CHECK_MYLOCK(thread_offset) \ - bt 24+disLkMyLckb,2f __ASMNL__ \ - lis r10,hi16(MASK(MSR_VEC)) __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_FP)) __ASMNL__ \ - mfmsr r11 __ASMNL__ \ - andc r11,r11,r10 __ASMNL__ \ - ori r10,r10,lo16(MASK(MSR_EE)) __ASMNL__ \ - andc r10,r11,r10 __ASMNL__ \ - mtmsr r10 __ASMNL__ \ - isync __ASMNL__ \ - mfsprg r10,1 __ASMNL__ \ - lwz r10,ACT_THREAD(r10) __ASMNL__ \ - cmpwi r10,0 __ASMNL__ \ - beq- 1f __ASMNL__ \ - lwz r9, thread_offset(r3) __ASMNL__ \ - cmpw r9,r10 __ASMNL__ \ - bne+ 1f __ASMNL__ \ - lis r3, hi16(mylock_attempt) __ASMNL__ \ - ori r3,r3,lo16(mylock_attempt) __ASMNL__ \ - bl EXT(panic) __ASMNL__ \ - lwz r3,FM_ARG0(r1) __ASMNL__ \ -1: __ASMNL__ \ - mtmsr r11 __ASMNL__ \ -2: +#define PROLOG(space) \ + stwu r1,-(FM_ALIGN(space)+FM_SIZE)(r1) __ASMNL__ \ + mfcr r2 __ASMNL__ \ + mflr r0 __ASMNL__ \ + stw r3,FM_ARG0(r1) __ASMNL__ \ + stw r11,FM_ARG0+0x04(r1) __ASMNL__ \ + stw r2,(FM_ALIGN(space)+FM_SIZE+FM_CR_SAVE)(r1) __ASMNL__ \ + stw r0,(FM_ALIGN(space)+FM_SIZE+FM_LR_SAVE)(r1) __ASMNL__ - .data -mylock_attempt: - STRINGD "mylock attempt!\n\000" - .text - -#else /* MACH_LDEBUG */ - -#define CHECK_SETUP(rg) -#define CHECK_MUTEX_TYPE() -#define CHECK_SIMPLE_LOCK_TYPE() -#define CHECK_THREAD(thread_offset) -#define CHECK_NO_SIMPLELOCKS() -#define CHECK_MYLOCK(thread_offset) +#define EPILOG \ + lwz r1,0(r1) __ASMNL__ \ + lwz r0,FM_LR_SAVE(r1) __ASMNL__ \ + mtlr r0 __ASMNL__ -#endif /* MACH_LDEBUG */ - /* * void hw_lock_init(hw_lock_t) * @@ -218,26 +70,21 @@ LEXT(hw_lock_init) blr /* - * void hw_lock_unlock(hw_lock_t) + * unsigned int hw_lock_bit(hw_lock_t, unsigned int bit, unsigned int timeout) * - * Unconditionally release lock. - * Release preemption level. + * Try to acquire spin-lock. The second parameter is the bit mask to test and set. + * multiple bits may be set. Return success (1) or failure (0). + * Attempt will fail after timeout ticks of the timebase. */ .align 5 - .globl EXT(hw_lock_unlock) - -LEXT(hw_lock_unlock) + .globl EXT(hw_lock_bit) - .globl EXT(hwulckPatch_isync) -LEXT(hwulckPatch_isync) - isync - .globl EXT(hwulckPatch_eieio) -LEXT(hwulckPatch_eieio) - eieio - li r0, 0 ; set lock to free - stw r0, 0(r3) +LEXT(hw_lock_bit) - b epStart ; Go enable preemption... + crset hwtimeout ; timeout option + mr r12,r4 ; Load bit mask + mr r4,r5 ; Load timeout value + b lckcomm ; Join on up... /* * void hw_lock_lock(hw_lock_t) @@ -250,8 +97,15 @@ LEXT(hwulckPatch_eieio) .globl EXT(hw_lock_lock) LEXT(hw_lock_lock) + crclr hwtimeout ; no timeout option + li r4,0 ; request default timeout value + li r12,ILK_LOCKED ; Load bit mask + b lckcomm ; Join on up... + lockDisa: - li r4,0 ; no timeout value + crset hwtimeout ; timeout option + li r4,0 ; request default timeout value + li r12,ILK_LOCKED ; Load bit mask b lckcomm ; Join on up... /* @@ -270,13 +124,8 @@ lockDisa: .globl EXT(hw_lock_to) LEXT(hw_lock_to) - -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif - + crset hwtimeout ; timeout option + li r12,ILK_LOCKED ; Load bit mask lckcomm: mfsprg r6,1 ; Get the current activation lwz r5,ACT_PREEMPT_CNT(r6) ; Get the preemption level @@ -286,12 +135,14 @@ lckcomm: li r8,0 ; Set r8 to zero lcktry: lwarx r6,0,r5 ; Grab the lock value - andi. r3,r6,ILK_LOCKED ; Is it locked? - ori r6,r6,ILK_LOCKED ; Set interlock + and. r3,r6,r12 ; Is it locked? + or r6,r6,r12 ; Set interlock bne-- lckspin ; Yeah, wait for it to clear... stwcx. r6,0,r5 ; Try to seize that there durn lock bne-- lcktry ; Couldn't get it... li r3,1 ; return true + .globl EXT(hwllckPatch_isync) +LEXT(hwllckPatch_isync) isync ; Make sure we don't use a speculativily loaded value blr ; Go on home... @@ -321,7 +172,7 @@ lockspin1: mtmsr r7 ; Turn off interruptions mftb r8 ; Get timestamp on entry lcksniff: lwz r3,0(r5) ; Get that lock in here - andi. r3,r3,ILK_LOCKED ; Is it free yet? + and. r3,r3,r12 ; Is it free yet? beq++ lckretry ; Yeah, try for it again... mftb r10 ; Time stamp us now @@ -348,94 +199,42 @@ lckretry: li r8,1 ; Insure that R8 is not 0 b lcktry lckfail: ; We couldn't get the lock + bf hwtimeout,lckpanic li r3,0 ; Set failure return code blr ; Return, head hanging low... - +lckpanic: + mr r4,r5 + mr r5,r3 + lis r3,hi16(lckpanic_str) ; Get the failed lck message + ori r3,r3,lo16(lckpanic_str) ; Get the failed lck message + bl EXT(panic) + BREAKPOINT_TRAP ; We die here anyway + .data +lckpanic_str: + STRINGD "timeout on attempt to acquire lock (0x%08X), value = 0x%08X\n\000" + .text /* - * unsigned int hw_lock_bit(hw_lock_t, unsigned int bit, unsigned int timeout) + * void hw_lock_unlock(hw_lock_t) * - * Try to acquire spin-lock. The second parameter is the bit mask to test and set. - * multiple bits may be set. Return success (1) or failure (0). - * Attempt will fail after timeout ticks of the timebase. - * We try fairly hard to get this lock. We disable for interruptions, but - * reenable after a "short" timeout (128 ticks, we may want to shorten this). - * After checking to see if the large timeout value (passed in) has expired and a - * sufficient number of cycles have gone by (to insure pending 'rupts are taken), - * we return either in abject failure, or disable and go back to the lock sniff routine. - * If the sniffer finds the lock free, it jumps right up and tries to grab it. + * Unconditionally release lock. + * Release preemption level. */ .align 5 - .globl EXT(hw_lock_bit) - -LEXT(hw_lock_bit) - - li r10,0 - -bittry: lwarx r6,0,r3 ; Grab the lock value - and. r0,r6,r4 ; See if any of the lock bits are on - or r6,r6,r4 ; Turn on the lock bits - bne-- bitspin ; Yeah, wait for it to clear... - stwcx. r6,0,r3 ; Try to seize that there durn lock - bne-- bittry ; Just start up again if the store failed... - - li r3,1 ; Set good return code - isync ; Make sure we don't use a speculativily loaded value - blr - - .align 5 - -bitspin: li r11,lgKillResv ; Get killing field - stwcx. r11,0,r11 ; Kill reservation - - mr. r10,r10 ; Is r8 set to zero - li r10,1 ; Close gate - beq-- bit1sttime ; If yes, first spin attempt - -bitspin0: mtmsr r7 ; Turn off interruptions - mftb r8 ; Get the low part of the time base - -bitsniff: lwz r6,0(r3) ; Get that lock in here - and. r0,r6,r4 ; See if any of the lock bits are on - beq++ bitretry ; Yeah, try for it again... - - mftb r6 ; Time stamp us now - sub r6,r6,r8 ; Get the elapsed time - cmplwi r6,128 ; Have we been spinning for 128 tb ticks? - blt++ bitsniff ; Not yet... - - mtmsr r9 ; Say, any interrupts pending? - -; The following instructions force the pipeline to be interlocked to that only one -; instruction is issued per cycle. The insures that we stay enabled for a long enough -; time. If it's too short, pending interruptions will not have a chance to be taken - - subi r5,r5,128 ; Back off elapsed time from timeout value - or r5,r5,r5 ; Do nothing here but force a single cycle delay - mr. r5,r5 ; See if we used the whole timeout - or r5,r5,r5 ; Do nothing here but force a single cycle delay - - bgt++ bitspin0 ; Now that we've opened an enable window, keep trying... - - li r3,0 ; Set failure return code - blr ; Return, head hanging low... - -bitretry: mtmsr r9 ; Enable for interruptions - b bittry + .globl EXT(hw_lock_unlock) -bit1sttime: lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable - mfmsr r9 ; Get the MSR value - ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable - ori r7,r0,lo16(MASK(MSR_EE)) ; Get EE bit on too - andc r9,r9,r0 ; Clear FP and VEC - andc r7,r9,r7 ; Clear EE as well - mtmsr r7 ; Turn off interruptions - isync ; May have turned off vec and fp here - mftb r8 ; Get the low part of the time base - b bitsniff +LEXT(hw_lock_unlock) - .align 5 + .globl EXT(hwulckPatch_isync) +LEXT(hwulckPatch_isync) + isync + .globl EXT(hwulckPatch_eieio) +LEXT(hwulckPatch_eieio) + eieio + li r0, 0 ; set lock to free + stw r0, 0(r3) + b epStart ; Go enable preemption... /* * unsigned int hw_unlock_bit(hw_lock_t, unsigned int bit) @@ -460,7 +259,7 @@ ubittry: lwarx r0,0,r3 ; Grab the lock value stwcx. r0,0,r3 ; Try to clear that there durn lock bne- ubittry ; Try again, couldn't save it... - blr ; Leave... + b epStart ; Go enable preemption... /* * unsigned int hw_lock_mbits(hw_lock_t, unsigned int bits, unsigned int value, @@ -545,6 +344,8 @@ mbitretry: .align 5 mbitgot: li r3,1 ; Set good return code + .globl EXT(hwlmlckPatch_isync) +LEXT(hwlmlckPatch_isync) isync ; Make sure we do not use a speculativily loaded value blr @@ -656,6 +457,8 @@ LEXT(hw_lock_try) stwcx. r5,0,r3 ; And SET (if still reserved) bne-- .L_lock_try_loop ; If set failed, loop back + .globl EXT(hwltlckPatch_isync) +LEXT(hwltlckPatch_isync) isync mfsprg r6,1 ; Get current activation @@ -713,6 +516,8 @@ cstry: lwarx r9,0,r5 ; Grab the area value bne-- csfail ; No, it must have changed... stwcx. r4,0,r5 ; Try to save the new value bne-- cstry ; Didn't get it, try again... + .globl EXT(hwcsatomicPatch_isync) +LEXT(hwcsatomicPatch_isync) isync ; Just hold up prefetch blr ; Return... @@ -814,6 +619,7 @@ andtry: lwarx r3,0,r6 ; Grab the area value * element is the pointer to the element to insert * disp is the displacement into the element to the chain pointer * + * NOTE: OSEnqueueAtomic() is aliased to this, see xnu/libkern/Makefile */ .align 5 .globl EXT(hw_queue_atomic) @@ -857,6 +663,8 @@ hw_queue_comm: * anchor is the pointer to the first element * disp is the displacement into the element to the chain pointer * Returns element if found, 0 if empty. + * + * NOTE: OSDequeueAtomic() is aliased to this, see xnu/libkern/Makefile */ .align 5 .globl EXT(hw_dequeue_atomic) @@ -879,73 +687,323 @@ hdcFail: li r4,lgKillResv ; Killing field blr ; Leave... +/* + * Routines for mutex lock debugging. + */ + +/* + * Gets lock check flags in CR6: CR bits 24-27 + */ +#define CHECK_SETUP(rg) \ + lbz rg,lglcksWork(0) __ASMNL__ \ + mtcrf 2,rg __ASMNL__ + + +/* + * Checks for expected lock type. + */ +#define CHECK_MUTEX_TYPE() \ + bf MUTEX_ATTR_DEBUGb,1f __ASMNL__ \ + bt 24+disLktypeb,1f __ASMNL__ \ + lwz r10,MUTEX_TYPE(r3) __ASMNL__ \ + cmpwi r10,MUTEX_TAG __ASMNL__ \ + beq++ 1f __ASMNL__ \ + PROLOG(0) __ASMNL__ \ + mr r4,r11 __ASMNL__ \ + mr r5,r10 __ASMNL__ \ + lis r3,hi16(not_a_mutex) __ASMNL__ \ + ori r3,r3,lo16(not_a_mutex) __ASMNL__ \ + bl EXT(panic) __ASMNL__ \ + BREAKPOINT_TRAP __ASMNL__ \ +1: + + .data +not_a_mutex: + STRINGD "mutex (0x%08X) not a mutex type (0x%08X)\n\000" + .text + +/* + * Verifies return to the correct thread in "unlock" situations. + */ +#define CHECK_THREAD(thread_offset) \ + bf MUTEX_ATTR_DEBUGb,3f __ASMNL__ \ + bt 24+disLkThreadb,3f __ASMNL__ \ + mfsprg r10,1 __ASMNL__ \ + lwz r5,MUTEX_DATA(r3) __ASMNL__ \ + rlwinm. r9,r5,0,0,29 __ASMNL__ \ + bne++ 1f __ASMNL__ \ + lis r3,hi16(not_held) __ASMNL__ \ + ori r3,r3,lo16(not_held) __ASMNL__ \ + b 2f __ASMNL__ \ +1: __ASMNL__ \ + cmpw r9,r10 __ASMNL__ \ + beq++ 3f __ASMNL__ \ + mr r5,r10 __ASMNL__ \ + mr r6,r9 __ASMNL__ \ + lis r3,hi16(wrong_thread) __ASMNL__ \ + ori r3,r3,lo16(wrong_thread) __ASMNL__ \ +2: __ASMNL__ \ + mr r4,r11 __ASMNL__ \ + PROLOG(0) __ASMNL__ \ + bl EXT(panic) __ASMNL__ \ + BREAKPOINT_TRAP __ASMNL__ \ +3: + + .data +not_held: + STRINGD "mutex (0x%08X) not held\n\000" +wrong_thread: + STRINGD "mutex (0x%08X) unlocked by non-owner(0x%08X), current owner(0x%08X)\n\000" + .text + +#define CHECK_MYLOCK() \ + bf MUTEX_ATTR_DEBUGb,1f __ASMNL__ \ + bt 24+disLkMyLckb,1f __ASMNL__ \ + mfsprg r10,1 __ASMNL__ \ + lwz r9,MUTEX_DATA(r3) __ASMNL__ \ + rlwinm r9,r9,0,0,29 __ASMNL__ \ + cmpw r9,r10 __ASMNL__ \ + bne++ 1f __ASMNL__ \ + mr r4,r11 __ASMNL__ \ + lis r3, hi16(mylock_attempt) __ASMNL__ \ + ori r3,r3,lo16(mylock_attempt) __ASMNL__ \ + bl EXT(panic) __ASMNL__ \ + BREAKPOINT_TRAP __ASMNL__ \ +1: + + .data +mylock_attempt: + STRINGD "mutex (0x%08X) recursive lock attempt\n\000" + .text + +#define LCK_STACK(lck, stack, lck_stack, frame_cnt, lr_save, tmp) \ + bf 24+enaLkExtStckb,3f __ASMNL__ \ + addi lck_stack,lck,MUTEX_STACK __ASMNL__ \ + li frame_cnt,MUTEX_FRAMES-1 __ASMNL__ \ +1: __ASMNL__ \ + mr tmp,stack __ASMNL__ \ + lwz stack,0(stack) __ASMNL__ \ + xor tmp,stack,tmp __ASMNL__ \ + cmplwi tmp,8192 __ASMNL__ \ + bge-- 2f __ASMNL__ \ + lwz lr_save,FM_LR_SAVE(stack) __ASMNL__ \ + stwu lr_save,4(lck_stack) __ASMNL__ \ + subi frame_cnt,frame_cnt,1 __ASMNL__ \ + cmpi cr0,frame_cnt,0 __ASMNL__ \ + bne 1b __ASMNL__ \ + b 3f __ASMNL__ \ +2: __ASMNL__ \ + li tmp,0 __ASMNL__ \ + stwu tmp,4(lck_stack) __ASMNL__ \ + subi frame_cnt,frame_cnt,1 __ASMNL__ \ + cmpi cr0,frame_cnt,0 __ASMNL__ \ + bne 2b __ASMNL__ \ +3: + /* * void mutex_init(mutex_t* l, etap_event_t etap) * */ .align 5 .globl EXT(mutex_init) - LEXT(mutex_init) PROLOG(0) - li r10, 0 - stw r10, LOCK_DATA(r3) ; clear lock word - sth r10, MUTEX_WAITERS(r3) ; init waiter count - sth r10, MUTEX_PROMOTED_PRI(r3) + li r10,0 + stw r10,MUTEX_DATA(r3) ; clear lock word + sth r10,MUTEX_WAITERS(r3) ; init waiter count + sth r10,MUTEX_PROMOTED_PRI(r3) #if MACH_LDEBUG - stw r10, MUTEX_PC(r3) ; init caller pc - stw r10, MUTEX_THREAD(r3) ; and owning thread - li r10, MUTEX_TAG - stw r10, MUTEX_TYPE(r3) ; set lock type + li r11,MUTEX_ATTR_DEBUG + stw r10,MUTEX_STACK(r3) ; init caller pc + stw r10,MUTEX_THREAD(r3) ; and owning thread + li r9, MUTEX_TAG + stw r9, MUTEX_TYPE(r3) ; set lock type + stw r11,MUTEX_ATTR(r3) + addi r8,r3,MUTEX_STACK-4 + li r9,MUTEX_FRAMES +mlistck: + stwu r10,4(r8) ; init stack + subi r9,r9,1 + cmpi cr0,r9,0 + bne mlistck #endif /* MACH_LDEBUG */ EPILOG blr /* - * void mutex_lock(mutex_t*) + * void lck_mtx_lock_ext(lck_mtx_ext_t*) * */ .align 5 + .globl EXT(lck_mtx_lock_ext) +LEXT(lck_mtx_lock_ext) +#if MACH_LDEBUG .globl EXT(mutex_lock) LEXT(mutex_lock) .globl EXT(_mutex_lock) LEXT(_mutex_lock) +#endif + mr r11,r3 ; Save lock addr +mlckeEnter: + lwz r0,MUTEX_ATTR(r3) + mtcrf 1,r0 ; Set cr7 + CHECK_SETUP(r12) + CHECK_MUTEX_TYPE() + + bf MUTEX_ATTR_DEBUGb,L_mutex_lock_assert_wait_2 + PROLOG(0) + bl EXT(assert_wait_possible) + mr. r3,r3 + bne L_mutex_lock_assert_wait_1 + lis r3,hi16(L_mutex_lock_assert_wait_panic_str) + ori r3,r3,lo16(L_mutex_lock_assert_wait_panic_str) + bl EXT(panic) + BREAKPOINT_TRAP ; We die here anyway + + .data +L_mutex_lock_assert_wait_panic_str: + STRINGD "mutex lock attempt with assert_wait_possible false\n\000" + .text + +L_mutex_lock_assert_wait_1: + lwz r3,FM_ARG0(r1) + lwz r11,FM_ARG0+0x04(r1) + lwz r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) + mtcr r2 + EPILOG +L_mutex_lock_assert_wait_2: -#if !MACH_LDEBUG mfsprg r6,1 ; load the current thread - lwz r5,0(r3) ; Get the lock quickly + bf MUTEX_ATTR_STATb,mlckestatskip ; Branch if no stat + lwz r5,MUTEX_GRP(r3) ; Load lock group + li r7,GRP_MTX_STAT_UTIL+4 ; Set stat util offset +mlckestatloop: + lwarx r8,r7,r5 ; Load stat util cnt + addi r8,r8,1 ; Increment stat util cnt + stwcx. r8,r7,r5 ; Store stat util cnt + bne-- mlckestatloop ; Retry if failed + mr. r8,r8 ; Test for zero + bne++ mlckestatskip ; Did stat util cnt wrapped? + lwz r8,GRP_MTX_STAT_UTIL(r5) ; Load upper stat util cnt + addi r8,r8,1 ; Increment upper stat util cnt + stw r8,GRP_MTX_STAT_UTIL(r5) ; Store upper stat util cnt +mlckestatskip: + lwz r5,MUTEX_DATA(r3) ; Get the lock quickly li r4,0 li r8,0 + lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable + mfmsr r9 ; Get the MSR value + ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable + ori r7,r0,lo16(MASK(MSR_EE)) ; Get EE bit on too + andc r9,r9,r0 ; Clear FP and VEC + andc r7,r9,r7 ; Clear EE as well + mtmsr r7 ; Turn off interruptions + isync ; May have turned off vec and fp here mr. r5,r5 ; Quick check - bne-- mlckspin1 ; Can not get it right now... + bne-- mlckespin01 ; Can not get it right now... -mlcktry: - lwarx r5,0,r3 ; load the mutex lock +mlcketry: + lwarx r5,MUTEX_DATA,r3 ; load the mutex lock mr. r5,r5 - bne-- mlckspin0 ; Can not get it right now... - stwcx. r6,0,r3 ; grab the lock - bne-- mlcktry ; loop back if failed + bne-- mlckespin0 ; Can not get it right now... + stwcx. r6,MUTEX_DATA,r3 ; grab the lock + bne-- mlcketry ; loop back if failed + .globl EXT(mlckePatch_isync) +LEXT(mlckePatch_isync) isync ; stop prefeteching - mflr r8 - stw r8,4(r3) + mflr r12 + bf MUTEX_ATTR_DEBUGb,mlckedebskip + mr r8,r6 ; Get the active thread + stw r12,MUTEX_STACK(r3) ; Save our caller + stw r8,MUTEX_THREAD(r3) ; Set the mutex's holding thread + mr r5,r1 + LCK_STACK(r3,r5,r6,r7,r8,r10) +mlckedebskip: + mtmsr r9 ; Say, any interrupts pending? blr -mlckspin0: +mlckespin0: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Kill reservation -mlckspin1: - mr. r4,r4 ; Test timeout value - bne++ mlckspin2 - lis r4,hi16(EXT(MutexSpin)) ; Get the high part +mlckespin01: + mflr r12 + mtmsr r9 ; Say, any interrupts pending? + bl mlckspin1 + mtmsr r7 ; Turn off interruptions, vec and fp off already + mtlr r12 + b mlcketry + +/* + * void lck_mtx_lock(lck_mtx_t*) + * + */ + .align 5 + .globl EXT(lck_mtx_lock) +LEXT(lck_mtx_lock) + +#if !MACH_LDEBUG + .globl EXT(mutex_lock) +LEXT(mutex_lock) + + .globl EXT(_mutex_lock) +LEXT(_mutex_lock) +#endif + + mfsprg r6,1 ; load the current thread + lwz r5,MUTEX_DATA(r3) ; Get the lock quickly + mr r11,r3 ; Save lock addr + li r4,0 + li r8,0 + li r9,0 + mr. r5,r5 ; Quick check + bne-- mlckspin00 ; Indirect or Can not get it right now... + +mlcktry: + lwarx r5,MUTEX_DATA,r3 ; load the mutex lock + mr. r5,r5 + bne-- mlckspin01 ; Can not get it right now... + stwcx. r6,MUTEX_DATA,r3 ; grab the lock + bne-- mlcktry ; loop back if failed + .globl EXT(mlckPatch_isync) +LEXT(mlckPatch_isync) + isync ; stop prefeteching + blr + +mlckspin00: + cmpli cr0,r5,MUTEX_IND ; Is it a mutex indirect + bne-- mlckspin02 ; No, go handle contention + lwz r3,MUTEX_PTR(r3) ; load mutex ext pointer + b mlckeEnter +mlckspin01: + li r5,lgKillResv ; Killing field + stwcx. r5,0,r5 ; Kill reservation +mlckspin02: + mflr r12 + li r0,0 + mtcrf 1,r0 ; Set cr7 to zero + bl mlckspin1 + mtlr r12 + b mlcktry + + +mlckspin1: + mr. r4,r4 ; Test timeout value + bne++ mlckspin2 + lis r4,hi16(EXT(MutexSpin)) ; Get the high part ori r4,r4,lo16(EXT(MutexSpin) ) ; And the low part lwz r4,0(r4) ; Get spin timerout value mr. r4,r4 ; Test spin timeout value - beq mlckslow1 ; Is spin timeout set to zero + bne++ mlckspin2 ; Is spin timeout requested + crclr mlckmiss ; Clear miss test + b mlckslow1 ; Don't try to spin mlckspin2: mr. r8,r8 ; Is r8 set to zero bne++ mlckspin3 ; If yes, first spin attempt + crclr mlckmiss ; Clear miss test + mr. r9,r9 ; Is r9 set to zero + bne++ mlckspin3 ; If yes, r9 set with msr value lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable mfmsr r9 ; Get the MSR value ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable @@ -960,15 +1018,33 @@ mlckspin2: mr. r8,r8 ; Is r8 set to zero mlckspin3: mtmsr r7 ; Turn off interruptions mftb r8 ; Get timestamp on entry -mlcksniff: lwz r5,0(r3) ; Get that lock in here +mlcksniff: lwz r5,MUTEX_DATA(r3) ; Get that lock in here mr. r5,r5 ; Is the lock held beq++ mlckretry ; No, try for it again... - rlwinm r5,r5,0,0,29 ; Extract the lock owner - mr. r5,r5 ; Quick check + rlwinm. r10,r5,0,0,29 ; Extract the lock owner beq++ mlckslow0 ; InterLock is held - lwz r10,ACT_MACT_SPF(r5) ; Get the special flags - rlwinm. r10,r10,0,OnProcbit,OnProcbit ; Is OnProcbit set? + bf MUTEX_ATTR_STATb,mlStatSkip ; Branch if no stat + andi. r5,r5,ILK_LOCKED ; extract interlocked? + bne mlStatSkip ; yes, skip + bt mlckmiss,mlStatSkip ; miss already counted + crset mlckmiss ; Remember miss recorded + lwz r5,MUTEX_GRP(r3) ; Load lock group + addi r5,r5,GRP_MTX_STAT_MISS+4 ; Add stat miss offset +mlStatLoop: + lwarx r6,0,r5 ; Load stat miss cnt + addi r6,r6,1 ; Increment stat miss cnt + stwcx. r6,0,r5 ; Update stat miss cnt + bne-- mlStatLoop ; Retry if failed + mfsprg r6,1 ; Reload current thread +mlStatSkip: + lwz r2,ACT_MACT_SPF(r10) ; Get the special flags + rlwinm. r2,r2,0,OnProcbit,OnProcbit ; Is OnProcbit set? beq mlckslow0 ; Lock owner isn't running + lis r2,hi16(TH_OPT_DELAYIDLE) ; Get DelayedIdle Option + ori r2,r2,lo16(TH_OPT_DELAYIDLE) ; Get DelayedIdle Option + lwz r10,THREAD_OPTIONS(r10) ; Get the thread options + and. r10,r10,r2 ; Is DelayedIdle set? + bne mlckslow0 ; Lock owner is in delay idle mftb r10 ; Time stamp us now sub r10,r10,r8 ; Get the elapsed time @@ -987,59 +1063,33 @@ mlcksniff: lwz r5,0(r3) ; Get that lock in here or r4,r4,r4 ; Do nothing here but force a single cycle delay ble-- mlckslow1 ; We failed - b mlckspin1 ; Now that we've opened an enable window, keep trying... + b mlckspin3 ; Now that we've opened an enable window, keep trying... mlckretry: mtmsr r9 ; Restore interrupt state li r8,1 ; Show already through once - b mlcktry + blr + mlckslow0: ; We couldn't get the lock mtmsr r9 ; Restore interrupt state mlckslow1: -#endif -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif + mtlr r12 - PROLOG(12) -#if MACH_LDEBUG - bl EXT(assert_wait_possible) - mr. r3,r3 - bne L_mutex_lock_assert_wait_1 - lis r3,hi16(L_mutex_lock_assert_wait_panic_str) - ori r3,r3,lo16(L_mutex_lock_assert_wait_panic_str) PROLOG(0) - bl EXT(panic) - BREAKPOINT_TRAP ; We die here anyway - - .data -L_mutex_lock_assert_wait_panic_str: - STRINGD "mutex_lock: assert_wait_possible false\n\000" - .text - -L_mutex_lock_assert_wait_1: - lwz r3,FM_ARG0(r1) -#endif - CHECK_SETUP(r12) - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() .L_ml_retry: bl lockDisa ; Go get a lock on the mutex's interlock lock mr. r4,r3 ; Did we get it? lwz r3,FM_ARG0(r1) ; Restore the lock address bne+ mlGotInt ; We got it just fine... - + mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed1) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed1) ; Get the failed mutex message - PROLOG(0) bl EXT(panic) ; Call panic BREAKPOINT_TRAP ; We die here anyway, can not get the lock .data mutex_failed1: - STRINGD "We can't get a mutex interlock lock on mutex_lock\n\000" + STRINGD "attempt to interlock mutex (0x%08X) failed on mutex lock\n\000" .text mlGotInt: @@ -1048,39 +1098,33 @@ mlGotInt: ; hold the interlock lock and no one can touch this field unless they ; have that, so, we're free to play - lwz r4,LOCK_DATA(r3) ; Get the mutex's lock field + lwz r4,MUTEX_DATA(r3) ; Get the mutex's lock field rlwinm. r9,r4,30,2,31 ; So, can we have it? bne- mlInUse ; Nope, sombody's playing already... -#if MACH_LDEBUG - li r5,lo16(MASK(MSR_EE)) ; Get the EE bit - mfmsr r11 ; Note: no need to deal with fp or vec here - andc r5,r11,r5 - mtmsr r5 + bf++ MUTEX_ATTR_DEBUGb,mlDebSkip + CHECK_SETUP(r5) mfsprg r9,1 ; Get the current activation lwz r5,0(r1) ; Get previous save frame - lwz r5,FM_LR_SAVE(r5) ; Get our caller's address - lwz r8, ACT_THREAD(r9) ; Get the active thread - stw r5,MUTEX_PC(r3) ; Save our caller - mr. r8,r8 ; Is there any thread? + lwz r6,FM_LR_SAVE(r5) ; Get our caller's address + mr r8,r9 ; Get the active thread + stw r6,MUTEX_STACK(r3) ; Save our caller stw r8,MUTEX_THREAD(r3) ; Set the mutex's holding thread - beq- .L_ml_no_active_thread ; No owning thread... - lwz r9,THREAD_MUTEX_COUNT(r8) ; Get the mutex count - addi r9,r9,1 ; Bump it up - stw r9,THREAD_MUTEX_COUNT(r8) ; Stash it back -.L_ml_no_active_thread: - mtmsr r11 -#endif /* MACH_LDEBUG */ - - bl EXT(mutex_lock_acquire) + LCK_STACK(r3,r5,r6,r7,r8,r10) +mlDebSkip: + mr r3,r11 ; Get the based lock address + bl EXT(lck_mtx_lock_acquire) + lwz r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) mfsprg r5,1 + mtcr r2 mr. r4,r3 - lwz r3,FM_ARG0(r1) + lwz r3,FM_ARG0(r1) ; restore r3 (saved in prolog) + lwz r11,FM_ARG0+0x04(r1) ; restore r11 (saved in prolog) beq mlUnlock ori r5,r5,WAIT_FLAG mlUnlock: eieio - stw r5,LOCK_DATA(r3) ; grab the mutexlock and free the interlock + stw r5,MUTEX_DATA(r3) ; grab the mutexlock and free the interlock EPILOG ; Restore all saved registers b epStart ; Go enable preemption... @@ -1091,60 +1135,164 @@ mlUnlock: eieio mlInUse: CHECK_SETUP(r12) - CHECK_MYLOCK(MUTEX_THREAD) ; Assert we don't own the lock already */ + CHECK_MYLOCK() ; Assert we don't own the lock already */ ; Note that we come in here with the interlock set. The wait routine ; will unlock it before waiting. + bf MUTEX_ATTR_STATb,mlStatSkip2 ; Branch if no stat + lwz r5,MUTEX_GRP(r3) ; Load lck group + bt mlckmiss,mlStatSkip1 ; Skip miss already counted + crset mlckmiss ; Remember miss recorded + li r9,GRP_MTX_STAT_MISS+4 ; Get stat miss offset +mlStatLoop1: + lwarx r8,r9,r5 ; Load stat miss cnt + addi r8,r8,1 ; Increment stat miss cnt + stwcx. r8,r9,r5 ; Store stat miss cnt + bne-- mlStatLoop1 ; Retry if failed +mlStatSkip1: + lwz r9,GRP_MTX_STAT_WAIT+4(r5) ; Load wait cnt + addi r9,r9,1 ; Increment wait cnt + stw r9,GRP_MTX_STAT_WAIT+4(r5) ; Update miss cnt +mlStatSkip2: ori r4,r4,WAIT_FLAG ; Set the wait flag - stw r4,LOCK_DATA(r3) + stw r4,MUTEX_DATA(r3) rlwinm r4,r4,0,0,29 ; Extract the lock owner - bl EXT(mutex_lock_wait) ; Wait for our turn at the lock + mfcr r2 + stw r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) + mr r3,r11 ; Get the based lock address + bl EXT(lck_mtx_lock_wait) ; Wait for our turn at the lock lwz r3,FM_ARG0(r1) ; restore r3 (saved in prolog) + lwz r11,FM_ARG0+0x04(r1) ; restore r11 (saved in prolog) + lwz r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) + mtcr r2 b .L_ml_retry ; and try again... /* - * void _mutex_try(mutex_t*) + * void lck_mtx_try_lock(_extlck_mtx_ext_t*) * */ .align 5 + .globl EXT(lck_mtx_try_lock_ext) +LEXT(lck_mtx_try_lock_ext) +#if MACH_LDEBUG .globl EXT(mutex_try) LEXT(mutex_try) .globl EXT(_mutex_try) LEXT(_mutex_try) -#if !MACH_LDEBUG +#endif + mr r11,r3 ; Save lock addr +mlteEnter: + lwz r0,MUTEX_ATTR(r3) + mtcrf 1,r0 ; Set cr7 + CHECK_SETUP(r12) + CHECK_MUTEX_TYPE() + + bf MUTEX_ATTR_STATb,mlteStatSkip ; Branch if no stat + lwz r5,MUTEX_GRP(r3) ; Load lock group + li r7,GRP_MTX_STAT_UTIL+4 ; Set stat util offset +mlteStatLoop: + lwarx r8,r7,r5 ; Load stat util cnt + addi r8,r8,1 ; Increment stat util cnt + stwcx. r8,r7,r5 ; Store stat util cnt + bne-- mlteStatLoop ; Retry if failed + mr. r8,r8 ; Test for zero + bne++ mlteStatSkip ; Did stat util cnt wrapped? + lwz r8,GRP_MTX_STAT_UTIL(r5) ; Load upper stat util cnt + addi r8,r8,1 ; Increment upper stat util cnt + stw r8,GRP_MTX_STAT_UTIL(r5) ; Store upper stat util cnt +mlteStatSkip: mfsprg r6,1 ; load the current thread - lwz r5,0(r3) ; Get the lock value + lwz r5,MUTEX_DATA(r3) ; Get the lock value mr. r5,r5 ; Quick check bne-- L_mutex_try_slow ; Can not get it now... + mfmsr r9 ; Get the MSR value + lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable + ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable + ori r7,r0,lo16(MASK(MSR_EE)) ; Get EE bit on too + andc r9,r9,r0 ; Clear FP and VEC + andc r7,r9,r7 ; Clear EE as well + mtmsr r7 ; Turn off interruptions + isync ; May have turned off vec and fp here -L_mutex_try_loop: - lwarx r5,0,r3 ; load the lock value +mlteLoopTry: + lwarx r5,MUTEX_DATA,r3 ; load the lock value mr. r5,r5 - bne-- L_mutex_try_slowX ; branch to the slow path - stwcx. r6,0,r3 ; grab the lock - bne-- L_mutex_try_loop ; retry if failed + bne-- mlteSlowX ; branch to the slow path + stwcx. r6,MUTEX_DATA,r3 ; grab the lock + bne-- mlteLoopTry ; retry if failed + .globl EXT(mltelckPatch_isync) +LEXT(mltelckPatch_isync) isync ; stop prefetching + mflr r12 + bf MUTEX_ATTR_DEBUGb,mlteDebSkip + mr r8,r6 ; Get the active thread + stw r12,MUTEX_STACK(r3) ; Save our caller + stw r8,MUTEX_THREAD(r3) ; Set the mutex's holding thread + mr r5,r1 + LCK_STACK(r3,r5,r6,r7,r8,r10) +mlteDebSkip: li r3, 1 + mtmsr r9 ; Say, any interrupts pending? blr - -L_mutex_try_slowX: +mlteSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Kill reservation + mtmsr r9 ; Say, any interrupts pending? + b L_mutex_try_slow -L_mutex_try_slow: +/* + * void lck_mtx_try_lock(lck_mtx_t*) + * + */ + .align 5 + .globl EXT(lck_mtx_try_lock) +LEXT(lck_mtx_try_lock) +#if !MACH_LDEBUG + .globl EXT(mutex_try) +LEXT(mutex_try) + .globl EXT(_mutex_try) +LEXT(_mutex_try) #endif - PROLOG(8) ; reserve space for SWT_HI and SWT_LO + mfsprg r6,1 ; load the current thread + lwz r5,MUTEX_DATA(r3) ; Get the lock value + mr r11,r3 ; Save lock addr + mr. r5,r5 ; Quick check + bne-- mltSlow00 ; Indirect or Can not get it now... + +mltLoopTry: + lwarx r5,MUTEX_DATA,r3 ; load the lock value + mr. r5,r5 + bne-- mltSlow01 ; branch to the slow path + stwcx. r6,MUTEX_DATA,r3 ; grab the lock + bne-- mltLoopTry ; retry if failed + .globl EXT(mltlckPatch_isync) +LEXT(mltlckPatch_isync) + isync ; stop prefetching + li r3, 1 + blr + +mltSlow00: + cmpli cr0,r5,MUTEX_IND ; Is it a mutex indirect + bne-- mltSlow02 ; No, go handle contention + lwz r3,MUTEX_PTR(r3) ; load mutex ext pointer + b mlteEnter +mltSlow01: + li r5,lgKillResv ; Killing field + stwcx. r5,0,r5 ; Kill reservation + +mltSlow02: + li r0,0 + mtcrf 1,r0 ; Set cr7 to zero + +L_mutex_try_slow: + PROLOG(0) - CHECK_SETUP(r12) - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() - - lwz r6,LOCK_DATA(r3) ; Quick check + lwz r6,MUTEX_DATA(r3) ; Quick check rlwinm. r6,r6,30,2,31 ; to see if someone has this lock already bne- mtFail ; Someone's got it already... @@ -1152,16 +1300,15 @@ L_mutex_try_slow: mr. r4,r3 ; Did we get it? */ lwz r3,FM_ARG0(r1) ; Restore the lock address bne+ mtGotInt ; We got it just fine... - + mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed2) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed2) ; Get the failed mutex message - PROLOG(0) bl EXT(panic) ; Call panic BREAKPOINT_TRAP ; We die here anyway, can not get the lock .data mutex_failed2: - STRINGD "We can't get a mutex interlock lock on mutex_try\n\000" + STRINGD "attempt to interlock mutex (0x%08X) failed on mutex lock try\n\000" .text mtGotInt: @@ -1170,43 +1317,31 @@ mtGotInt: ; hold the interlock and no one can touch at this field unless they ; have that, so, we're free to play - lwz r4,LOCK_DATA(r3) ; Get the mutex's lock field + lwz r4,MUTEX_DATA(r3) ; Get the mutex's lock field rlwinm. r9,r4,30,2,31 ; So, can we have it? bne- mtInUse ; Nope, sombody's playing already... -#if MACH_LDEBUG - lis r9,hi16(MASK(MSR_VEC)) ; Get vector enable - mfmsr r11 ; Get the MSR value - ori r9,r9,lo16(MASK(MSR_FP)) ; Get FP enable - ori r5,r9,lo16(MASK(MSR_EE)) ; Get EE bit on too - andc r11,r11,r9 ; Clear FP and VEC - andc r5,r11,r5 ; Clear EE as well - - mtmsr r5 + bf++ MUTEX_ATTR_DEBUGb,mtDebSkip + CHECK_SETUP(r5) mfsprg r9,1 ; Get the current activation lwz r5,0(r1) ; Get previous save frame - lwz r5,FM_LR_SAVE(r5) ; Get our caller's address - lwz r8,ACT_THREAD(r9) ; Get the active thread - stw r5,MUTEX_PC(r3) ; Save our caller - mr. r8,r8 ; Is there any thread? + lwz r6,FM_LR_SAVE(r5) ; Get our caller's address + mr r8,r9 ; Get the active thread + stw r6,MUTEX_STACK(r3) ; Save our caller stw r8,MUTEX_THREAD(r3) ; Set the mutex's holding thread - beq- .L_mt_no_active_thread ; No owning thread... - lwz r9, THREAD_MUTEX_COUNT(r8) ; Get the mutex count - addi r9, r9, 1 ; Bump it up - stw r9, THREAD_MUTEX_COUNT(r8) ; Stash it back -.L_mt_no_active_thread: - mtmsr r11 -#endif /* MACH_LDEBUG */ - - bl EXT(mutex_lock_acquire) + LCK_STACK(r3,r5,r6,r7,r8,r10) +mtDebSkip: + mr r3,r11 ; Get the based lock address + bl EXT(lck_mtx_lock_acquire) mfsprg r5,1 mr. r4,r3 - lwz r3,FM_ARG0(r1) + lwz r3,FM_ARG0(r1) ; restore r3 (saved in prolog) + lwz r11,FM_ARG0+0x04(r1) ; restore r11 (saved in prolog) beq mtUnlock ori r5,r5,WAIT_FLAG mtUnlock: eieio - stw r5,LOCK_DATA(r3) ; grab the mutexlock and free the interlock + stw r5,MUTEX_DATA(r3) ; grab the mutexlock and free the interlock bl epStart ; Go enable preemption... @@ -1218,8 +1353,17 @@ mtUnlock: eieio ; the mutex is held. mtInUse: + bf++ MUTEX_ATTR_STATb,mtStatSkip ; Branch if no stat + lwz r5,MUTEX_GRP(r3) ; Load lock group + li r9,GRP_MTX_STAT_MISS+4 ; Get stat miss offset +mtStatLoop: + lwarx r8,r9,r5 ; Load stat miss cnt + addi r8,r8,1 ; Increment stat miss cnt + stwcx. r8,r9,r5 ; Store stat miss cnt + bne-- mtStatLoop ; Retry if failed +mtStatSkip: rlwinm r4,r4,0,0,30 ; Get the unlock value - stw r4,LOCK_DATA(r3) ; free the interlock + stw r4,MUTEX_DATA(r3) ; free the interlock bl epStart ; Go enable preemption... mtFail: li r3,0 ; Set failure code @@ -1228,218 +1372,214 @@ mtFail: li r3,0 ; Set failure code /* - * void mutex_unlock_rwcmb(mutex_t* l) + * void mutex_unlock(mutex_t* l) * */ .align 5 - .globl EXT(mutex_unlock_rwcmb) + .globl EXT(mutex_unlock) +LEXT(mutex_unlock) + + sync + mr r11,r3 ; Save lock addr +#if MACH_LDEBUG + b mlueEnter1 +#else + b mluEnter1 +#endif +/* + * void lck_mtx_ext_unlock(lck_mtx_ext_t* l) + * + */ + .align 5 + .globl EXT(lck_mtx_ext_unlock) +LEXT(lck_mtx_ext_unlock) +#if MACH_LDEBUG + .globl EXT(mutex_unlock_rwcmb) LEXT(mutex_unlock_rwcmb) - .globl EXT(mulckPatch_isync) -LEXT(mulckPatch_isync) +#endif +mlueEnter: + .globl EXT(mulckePatch_isync) +LEXT(mulckePatch_isync) isync - .globl EXT(mulckPatch_eieio) -LEXT(mulckPatch_eieio) + .globl EXT(mulckePatch_eieio) +LEXT(mulckePatch_eieio) eieio + mr r11,r3 ; Save lock addr +mlueEnter1: + lwz r0,MUTEX_ATTR(r3) + mtcrf 1,r0 ; Set cr7 + CHECK_SETUP(r12) + CHECK_MUTEX_TYPE() + CHECK_THREAD(MUTEX_THREAD) - lwz r5,0(r3) ; Get the lock + lwz r5,MUTEX_DATA(r3) ; Get the lock rlwinm. r4,r5,0,30,31 ; Quick check bne-- L_mutex_unlock_slow ; Can not get it now... + mfmsr r9 ; Get the MSR value + lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable + ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable + ori r7,r0,lo16(MASK(MSR_EE)) ; Get EE bit on too + andc r9,r9,r0 ; Clear FP and VEC + andc r7,r9,r7 ; Clear EE as well + mtmsr r7 ; Turn off interruptions + isync ; May have turned off vec and fp here -L_mutex_unlock_rwcmb_loop: - lwarx r5,0,r3 +mlueLoop: + lwarx r5,MUTEX_DATA,r3 rlwinm. r4,r5,0,30,31 ; Bail if pending waiter or interlock set li r5,0 ; Clear the mutexlock - bne-- L_mutex_unlock_rwcmb_slowX - stwcx. r5,0,r3 - bne-- L_mutex_unlock_rwcmb_loop + bne-- mlueSlowX + stwcx. r5,MUTEX_DATA,r3 + bne-- mlueLoop + mtmsr r9 ; Say, any interrupts pending? blr -L_mutex_unlock_rwcmb_slowX: +mlueSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Dump reservation + mtmsr r9 ; Say, any interrupts pending? b L_mutex_unlock_slow ; Join slow path... /* - * void mutex_unlock(mutex_t* l) + * void lck_mtx_unlock(lck_mtx_t* l) * */ .align 5 - .globl EXT(mutex_unlock) - -LEXT(mutex_unlock) + .globl EXT(lck_mtx_unlock) +LEXT(lck_mtx_unlock) #if !MACH_LDEBUG - sync - lwz r5,0(r3) ; Get the lock + .globl EXT(mutex_unlock_rwcmb) +LEXT(mutex_unlock_rwcmb) +#endif +mluEnter: + .globl EXT(mulckPatch_isync) +LEXT(mulckPatch_isync) + isync + .globl EXT(mulckPatch_eieio) +LEXT(mulckPatch_eieio) + eieio + mr r11,r3 ; Save lock addr +mluEnter1: + lwz r5,MUTEX_DATA(r3) ; Get the lock rlwinm. r4,r5,0,30,31 ; Quick check - bne-- L_mutex_unlock_slow ; Can not get it now... + bne-- mluSlow0 ; Indirect or Can not get it now... -L_mutex_unlock_loop: - lwarx r5,0,r3 +mluLoop: + lwarx r5,MUTEX_DATA,r3 rlwinm. r4,r5,0,30,31 ; Bail if pending waiter or interlock set li r5,0 ; Clear the mutexlock - bne-- L_mutex_unlock_slowX - stwcx. r5,0,r3 - bne-- L_mutex_unlock_loop + bne-- mluSlowX + stwcx. r5,MUTEX_DATA,r3 + bne-- mluLoop blr -L_mutex_unlock_slowX: + +mluSlow0: + cmpli cr0,r5,MUTEX_IND ; Is it a mutex indirect + bne-- L_mutex_unlock_slow ; No, go handle contention + lwz r3,MUTEX_PTR(r3) ; load mutex ext pointer + b mlueEnter1 +mluSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Dump reservation -#endif - L_mutex_unlock_slow: PROLOG(0) - CHECK_SETUP(r12) - CHECK_MUTEX_TYPE() - CHECK_THREAD(MUTEX_THREAD) - bl lockDisa ; Go get a lock on the mutex's interlock lock mr. r4,r3 ; Did we get it? lwz r3,FM_ARG0(r1) ; Restore the lock address bne+ muGotInt ; We got it just fine... - + mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed3) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed3) ; Get the failed mutex message - PROLOG(0) bl EXT(panic) ; Call panic BREAKPOINT_TRAP ; We die here anyway, can not get the lock .data mutex_failed3: - STRINGD "We can't get a mutex interlock lock on mutex_unlock\n\000" + STRINGD "attempt to interlock mutex (0x%08X) failed on mutex unlock\n\000" .text muGotInt: - lwz r4,LOCK_DATA(r3) + lwz r4,MUTEX_DATA(r3) andi. r5,r4,WAIT_FLAG ; are there any waiters ? rlwinm r4,r4,0,0,29 beq+ muUnlock ; Nope, we're done... - bl EXT(mutex_unlock_wakeup) ; yes, wake a thread + mr r3,r11 ; Get the based lock address + bl EXT(lck_mtx_unlock_wakeup) ; yes, wake a thread lwz r3,FM_ARG0(r1) ; restore r3 (saved in prolog) - lwz r5,LOCK_DATA(r3) ; load the lock + lwz r11,FM_ARG0+0x04(r1) ; restore r11 (saved in prolog) + lwz r5,MUTEX_DATA(r3) ; load the lock muUnlock: -#if MACH_LDEBUG - lis r8,hi16(MASK(MSR_VEC)) ; Get vector enable - mfmsr r11 ; Get the MSR value - ori r8,r8,lo16(MASK(MSR_FP)) ; Get FP enable - ori r9,r8,lo16(MASK(MSR_EE)) ; Get EE bit on too - andc r11,r11,r8 ; Clear FP and VEC - andc r9,r11,r9 ; Clear EE as well - - mtmsr r9 - mfsprg r9,1 - lwz r9,ACT_THREAD(r9) - stw r9,MUTEX_THREAD(r3) ; disown thread - cmpwi r9,0 - beq- .L_mu_no_active_thread - lwz r8,THREAD_MUTEX_COUNT(r9) - subi r8,r8,1 - stw r8,THREAD_MUTEX_COUNT(r9) -.L_mu_no_active_thread: - mtmsr r11 -#endif /* MACH_LDEBUG */ - andi. r5,r5,WAIT_FLAG ; Get the unlock value eieio - stw r5,LOCK_DATA(r3) ; unlock the interlock and lock + stw r5,MUTEX_DATA(r3) ; unlock the interlock and lock EPILOG ; Deal with the stack now, enable_preemption doesn't always want one b epStart ; Go enable preemption... /* - * boolean_t mutex_preblock(mutex_t*, thread_t) + * void lck_mtx_assert(lck_mtx_t* l, unsigned int) + * */ - .align 5 - .globl EXT(mutex_preblock) - -LEXT(mutex_preblock) - mr r6,r3 - lwz r5,LOCK_DATA(r3) - mr. r3,r5 - beqlr+ - mr r3,r6 - + .align 5 + .globl EXT(lck_mtx_assert) +LEXT(lck_mtx_assert) + .globl EXT(_mutex_assert) +LEXT(_mutex_assert) + mr r11,r3 +maEnter: + lwz r5,MUTEX_DATA(r3) + cmpli cr0,r5,MUTEX_IND ; Is it a mutex indirect + bne-- maCheck ; No, go check the assertion + lwz r3,MUTEX_PTR(r3) ; load mutex ext pointer + b maEnter +maCheck: + mfsprg r6,1 ; load the current thread + rlwinm r5,r5,0,0,29 ; Extract the lock owner + cmpwi r4,MUTEX_ASSERT_OWNED + cmplw cr1,r6,r5 ; Is the lock held by current act + crandc cr0_eq,cr0_eq,cr1_eq ; Check owned assertion + bne-- maNext + mr r4,r11 + lis r3,hi16(mutex_assert1) ; Get the failed mutex message + ori r3,r3,lo16(mutex_assert1) ; Get the failed mutex message + b maPanic ; Panic path +maNext: + cmpwi r4,MUTEX_ASSERT_NOTOWNED ; Check not owned assertion + crand cr0_eq,cr0_eq,cr1_eq ; + bnelr++ +maPanic: PROLOG(0) - stw r4,(FM_ARG0-4)(r1) - - bl EXT(hw_lock_try) - mr. r4,r3 - lwz r3,FM_ARG0(r1) - bne+ mpbGotInt - - li r3,0 - - EPILOG - - blr - -mpbGotInt: - lwz r6,LOCK_DATA(r3) - rlwinm. r5,r6,0,0,30 - bne+ mpbInUse - - stw r5,LOCK_DATA(r3) - - bl epStart - - li r3,0 - - EPILOG - - blr - -mpbInUse: - lwz r4,(FM_ARG0-4)(r1) - rlwinm r5,r6,0,0,29 - bl EXT(mutex_preblock_wait) - lwz r4,FM_ARG0(r1) - mr. r3,r3 - lwz r5,LOCK_DATA(r4) - rlwinm r5,r5,0,0,30 - beq- mpbUnlock0 - ori r5,r5,WAIT_FLAG - - eieio - stw r5,LOCK_DATA(r4) - - bl epStart - - li r3,1 - - EPILOG - - blr - -mpbUnlock0: - eieio - stw r5,LOCK_DATA(r4) - - bl epStart - - li r3,0 - - EPILOG - - blr + mr r4,r11 + lis r3,hi16(mutex_assert2) ; Get the failed mutex message + ori r3,r3,lo16(mutex_assert2) ; Get the failed mutex message + bl EXT(panic) ; Call panic + BREAKPOINT_TRAP ; We die here anyway + .data +mutex_assert1: + STRINGD "mutex (0x%08X) not owned\n\000" +mutex_assert2: + STRINGD "mutex (0x%08X) owned\n\000" + .text + + /* - * void interlock_unlock(hw_lock_t lock) + * void lck_mtx_ilk_unlock(lck_mtx *lock) */ - .align 5 - .globl EXT(interlock_unlock) - -LEXT(interlock_unlock) + .globl EXT(lck_mtx_ilk_unlock) +LEXT(lck_mtx_ilk_unlock) - lwz r10,LOCK_DATA(r3) + lwz r10,MUTEX_DATA(r3) rlwinm r10,r10,0,0,30 eieio - stw r10,LOCK_DATA(r3) + stw r10,MUTEX_DATA(r3) b epStart ; Go enable preemption... @@ -1466,17 +1606,7 @@ LEXT(_enable_preemption_no_check) LEXT(_enable_preemption) -; Here is where we enable preemption. We need to be protected -; against ourselves, we can't chance getting interrupted and modifying -; our processor wide preemption count after we'sve loaded it up. So, -; we need to disable all 'rupts. Actually, we could use a compare -; and swap to do this, but, since there are no MP considerations -; (we are dealing with a CPU local field) it is much, much faster -; to disable. -; -; Note that if we are not genned MP, the calls here will be no-opped via -; a #define and since the _mp forms are the same, likewise a #define -; will be used to route to the other forms +; Here is where we enable preemption. epStart: cmplwi cr1,r1,0 ; Force non-zero cr so we know to check if preempted @@ -1501,7 +1631,7 @@ epTooFar: .data epTooFarStr: - STRINGD "_enable_preemption: preemption_level %d\n\000" + STRINGD "enable_preemption: preemption_level %d\n\000" .text .align 5 @@ -1509,17 +1639,16 @@ epCheckPreempt: lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable mfmsr r9 ; Get the MSR value ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable - andi. r3,r9,lo16(MASK(MSR_EE)) ; We cannot preempt if interruptions are off + andi. r4,r9,lo16(MASK(MSR_EE)) ; We cannot preempt if interruptions are off beq+ epCPno ; No preemption here... ori r7,r0,lo16(MASK(MSR_EE)) ; Get EE bit on too andc r9,r9,r0 ; Clear FP and VEC andc r7,r9,r7 ; Clear EE as well mtmsr r7 ; Turn off interruptions isync ; May have turned off vec and fp here - mfsprg r3,0 ; Get per_proc - lwz r7,PP_NEED_AST(r3) ; Get the AST request address + lwz r3,ACT_PER_PROC(r3) ; Get the per_proc block + lwz r7,PP_PENDING_AST(r3) ; Get pending AST mask li r5,AST_URGENT ; Get the requests we do honor - lwz r7,0(r7) ; Get the actual, real live, extra special AST word lis r0,hi16(DoPreemptCall) ; Just in case, get the top of firmware call and. r7,r7,r5 ; Should we preempt? ori r0,r0,lo16(DoPreemptCall) ; Merge in bottom part @@ -1532,13 +1661,7 @@ epCPno: /* * void disable_preemption(void) * - * Here is where we disable preemption. Since preemption is on a - * per processor basis (a thread runs on one CPU at a time) we don't - * need any cross-processor synchronization. We do, however, need to - * be interrupt safe, so we don't preempt while in the process of - * disabling it. We could use SPLs, but since we always want complete - * disablement, and this is platform specific code, we'll just kick the - * MSR. We'll save a couple of orders of magnitude over using SPLs. + * Here is where we disable preemption. */ .align 5 .globl EXT(_disable_preemption) @@ -1565,34 +1688,6 @@ LEXT(get_preemption_level) lwz r3,ACT_PREEMPT_CNT(r6) ; Get the preemption level blr ; Return... -/* - * int get_simple_lock_count(void) - * - * Return the simple lock count - * - */ - .align 5 - .globl EXT(get_simple_lock_count) - -LEXT(get_simple_lock_count) - -#if MACH_LDEBUG - lis r3,hi16(MASK(MSR_VEC)) ; Get vector enable - mfmsr r9 ; Get the MSR value - ori r3,r3,lo16(MASK(MSR_FP)) ; Get FP enable - ori r8,r3,lo16(MASK(MSR_EE)) ; Get EE bit on too - andc r9,r9,r3 ; Clear FP and VEC - andc r8,r9,r8 ; Clear EE as well - mtmsr r8 ; Interrupts off - isync ; May have messed with vec/fp - mfsprg r6,0 ; Get the per_proc - lwz r3,PP_SIMPLE_LOCK_CNT(r6) ; Get the simple lock count - mtmsr r9 ; Restore interruptions to entry -#else - li r3,0 ; simple lock count not updated -#endif - blr ; Return... - /* * void ppc_usimple_lock_init(simple_lock_t, etap_event_t) * @@ -1608,20 +1703,16 @@ LEXT(ppc_usimple_lock_init) blr /* - * void ppc_usimple_lock(simple_lock_t) + * void lck_spin_lock(lck_spin_t *) + * void ppc_usimple_lock(simple_lock_t *) * */ .align 5 + .globl EXT(lck_spin_lock) +LEXT(lck_spin_lock) .globl EXT(ppc_usimple_lock) - LEXT(ppc_usimple_lock) -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif - mfsprg r6,1 ; Get the current activation lwz r5,ACT_PREEMPT_CNT(r6) ; Get the preemption level addi r5,r5,1 ; Bring up the disable count @@ -1630,12 +1721,14 @@ LEXT(ppc_usimple_lock) li r8,0 ; Set r8 to zero li r4,0 ; Set r4 to zero -slcktry: lwarx r11,0,r5 ; Grab the lock value +slcktry: lwarx r11,SLOCK_ILK,r5 ; Grab the lock value andi. r3,r11,ILK_LOCKED ; Is it locked? ori r11,r6,ILK_LOCKED ; Set interlock bne-- slckspin ; Yeah, wait for it to clear... - stwcx. r11,0,r5 ; Try to seize that there durn lock + stwcx. r11,SLOCK_ILK,r5 ; Try to seize that there durn lock bne-- slcktry ; Couldn't get it... + .globl EXT(slckPatch_isync) +LEXT(slckPatch_isync) isync ; Make sure we don't use a speculativily loaded value blr ; Go on home... @@ -1664,7 +1757,7 @@ slockspin0: mr. r8,r8 ; Is r8 set to zero slockspin1: mtmsr r7 ; Turn off interruptions mftb r8 ; Get timestamp on entry -slcksniff: lwz r3,0(r5) ; Get that lock in here +slcksniff: lwz r3,SLOCK_ILK(r5) ; Get that lock in here andi. r3,r3,ILK_LOCKED ; Is it free yet? beq++ slckretry ; Yeah, try for it again... @@ -1702,23 +1795,20 @@ slckfail: ; We couldn't get the lock .data slckpanic_str: - STRINGD "ppc_usimple_lock: simple lock deadlock detection l=0x%08X, pc=0x%08X\n\000" + STRINGD "simple lock (0x%08X) deadlock detection, pc=0x%08X\n\000" .text /* - * unsigned int ppc_usimple_lock_try(simple_lock_t) + * boolean_t lck_spin_try_lock(lck_spin_t *) + * unsigned int ppc_usimple_lock_try(simple_lock_t *) * */ .align 5 + .globl EXT(lck_spin_try_lock) +LEXT(lck_spin_try_lock) .globl EXT(ppc_usimple_lock_try) - LEXT(ppc_usimple_lock_try) -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable mfmsr r9 ; Get the MSR value ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable @@ -1728,20 +1818,22 @@ LEXT(ppc_usimple_lock_try) mtmsr r7 ; Disable interruptions and thus, preemption mfsprg r6,1 ; Get current activation - lwz r11,0(r3) ; Get the lock + lwz r11,SLOCK_ILK(r3) ; Get the lock andi. r5,r11,ILK_LOCKED ; Check it... bne-- slcktryfail ; Quickly fail... slcktryloop: - lwarx r11,0,r3 ; Ld from addr of arg and reserve + lwarx r11,SLOCK_ILK,r3 ; Ld from addr of arg and reserve andi. r5,r11,ILK_LOCKED ; TEST... ori r5,r6,ILK_LOCKED bne-- slcktryfailX ; branch if taken. Predict free - stwcx. r5,0,r3 ; And SET (if still reserved) + stwcx. r5,SLOCK_ILK,r3 ; And SET (if still reserved) bne-- slcktryloop ; If set failed, loop back + .globl EXT(stlckPatch_isync) +LEXT(stlckPatch_isync) isync lwz r5,ACT_PREEMPT_CNT(r6) ; Get the preemption level @@ -1763,19 +1855,16 @@ slcktryfail: /* - * void ppc_usimple_unlock_rwcmb(simple_lock_t) + * void lck_spin_unlock(lck_spin_t *) + * void ppc_usimple_unlock_rwcmb(simple_lock_t *) * */ .align 5 + .globl EXT(lck_spin_unlock) +LEXT(lck_spin_unlock) .globl EXT(ppc_usimple_unlock_rwcmb) - LEXT(ppc_usimple_unlock_rwcmb) -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif li r0,0 .globl EXT(sulckPatch_isync) LEXT(sulckPatch_isync) @@ -1783,12 +1872,12 @@ LEXT(sulckPatch_isync) .globl EXT(sulckPatch_eieio) LEXT(sulckPatch_eieio) eieio - stw r0, LOCK_DATA(r3) + stw r0, SLOCK_ILK(r3) b epStart ; Go enable preemption... /* - * void ppc_usimple_unlock_rwmb(simple_lock_t) + * void ppc_usimple_unlock_rwmb(simple_lock_t *) * */ .align 5 @@ -1796,14 +1885,9 @@ LEXT(sulckPatch_eieio) LEXT(ppc_usimple_unlock_rwmb) -#if CHECKNMI - mflr r12 ; (TEST/DEBUG) - bl EXT(ml_sense_nmi) ; (TEST/DEBUG) - mtlr r12 ; (TEST/DEBUG) -#endif li r0,0 sync - stw r0, LOCK_DATA(r3) + stw r0, SLOCK_ILK(r3) b epStart ; Go enable preemption... @@ -1838,8 +1922,9 @@ L_enter_funnel_section_loop: bne-- L_enter_funnel_section_slowX ; Go to the slow path stwcx. r6,0,r7 ; Grab the lock bne-- L_enter_funnel_section_loop ; Loop back if failed + .globl EXT(entfsectPatch_isync) +LEXT(entfsectPatch_isync) isync ; Stop prefeteching - lwz r6,ACT_THREAD(r6) ; Get the current thread li r7,TH_FN_OWNED stw r3,THREAD_FUNNEL_LOCK(r6) ; Set the funnel lock reference stw r7,THREAD_FUNNEL_STATE(r6) ; Set the funnel state @@ -1864,7 +1949,6 @@ L_enter_funnel_section_slow: LEXT(exit_funnel_section) mfsprg r6,1 ; Get the current activation - lwz r6,ACT_THREAD(r6) ; Get the current thread lwz r3,THREAD_FUNNEL_LOCK(r6) ; Get the funnel lock mr. r3,r3 ; Check on funnel held beq- L_exit_funnel_section_ret ; @@ -1909,15 +1993,318 @@ L_exit_funnel_section_slow: L_exit_funnel_section_ret: blr -; -; This is bring up code -; - .align 5 - .globl EXT(condStop) +/* + * void lck_rw_lock_exclusive(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_lock_exclusive) +LEXT(lck_rw_lock_exclusive) +#if !MACH_LDEBUG + .globl EXT(lock_write) +LEXT(lock_write) +#endif +rwleloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + rlwinm. r7,r5,30,1,31 ; Can we have it? + ori r6,r5,WANT_EXCL ; Mark Exclusive + bne-- rwlespin ; Branch if cannot be held + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwleloop + .globl EXT(rwlePatch_isync) +LEXT(rwlePatch_isync) + isync + blr +rwlespin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwlespin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_lock_exclusive_ext) +rwlespin1: + b EXT(lck_rw_lock_exclusive_gen) + +/* + * void lck_rw_lock_shared(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_lock_shared) +LEXT(lck_rw_lock_shared) +#if !MACH_LDEBUG + .globl EXT(lock_read) +LEXT(lock_read) +#endif +rwlsloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + andi. r7,r5,WANT_EXCL|WANT_UPGRADE|ILK_LOCKED ; Can we have it? + addis r6,r5,1 ; Increment read cnt + bne-- rwlsspin ; Branch if cannot be held + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwlsloop + .globl EXT(rwlsPatch_isync) +LEXT(rwlsPatch_isync) + isync + blr +rwlsspin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwlsspin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_lock_shared_ext) +rwlsspin1: + b EXT(lck_rw_lock_shared_gen) + +/* + * boolean_t lck_rw_lock_shared_to_exclusive(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_lock_shared_to_exclusive) +LEXT(lck_rw_lock_shared_to_exclusive) +#if !MACH_LDEBUG + .globl EXT(lock_read_to_write) +LEXT(lock_read_to_write) +#endif +rwlseloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + addis r6,r5,0xFFFF ; Decrement read cnt + lis r8,0xFFFF ; Get read count mask + ori r8,r8,WANT_UPGRADE|ILK_LOCKED ; Include Interlock and upgrade flags + and. r7,r6,r8 ; Can we have it? + ori r9,r6,WANT_UPGRADE ; Mark Exclusive + bne-- rwlsespin ; Branch if cannot be held + stwcx. r9,RW_DATA,r3 ; Update lock word + bne-- rwlseloop + .globl EXT(rwlsePatch_isync) +LEXT(rwlsePatch_isync) + isync + li r3,0 ; Succeed, return FALSE... + blr +rwlsespin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwlsespin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_lock_shared_to_exclusive_ext) +rwlsespin1: + b EXT(lck_rw_lock_shared_to_exclusive_gen) + + + +/* + * void lck_rw_lock_exclusive_to_shared(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_lock_exclusive_to_shared) +LEXT(lck_rw_lock_exclusive_to_shared) +#if !MACH_LDEBUG + .globl EXT(lock_write_to_read) +LEXT(lock_write_to_read) +#endif + .globl EXT(rwlesPatch_isync) +LEXT(rwlesPatch_isync) + isync + .globl EXT(rwlesPatch_eieio) +LEXT(rwlesPatch_eieio) + eieio +rwlesloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + andi. r7,r5,ILK_LOCKED ; Test interlock flag + bne-- rwlesspin ; Branch if interlocked + lis r6,1 ; Get 1 for read count + andi. r10,r5,WANT_UPGRADE ; Is it held with upgrade + li r9,WANT_UPGRADE|WAIT_FLAG ; Get upgrade and wait flags mask + bne rwlesexcl1 ; Skip if held with upgrade + li r9,WANT_EXCL|WAIT_FLAG ; Get exclusive and wait flags mask +rwlesexcl1: + andc r7,r5,r9 ; Marked free + rlwimi r6,r7,0,16,31 ; Set shared cnt to one + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwlesloop + andi. r7,r5,WAIT_FLAG ; Test wait flag + beqlr++ ; Return of no waiters + addi r3,r3,RW_EVENT ; Get lock event address + b EXT(thread_wakeup) ; wakeup waiters +rwlesspin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwlesspin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_lock_exclusive_to_shared_ext) +rwlesspin1: + b EXT(lck_rw_lock_exclusive_to_shared_gen) + + + +/* + * boolean_t lck_rw_try_lock_exclusive(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_try_lock_exclusive) +LEXT(lck_rw_try_lock_exclusive) + lis r10,0xFFFF ; Load read count mask + ori r10,r10,WANT_EXCL|WANT_UPGRADE ; Include exclusive and upgrade flags +rwtleloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + andi. r7,r5,ILK_LOCKED ; Test interlock flag + bne-- rwtlespin ; Branch if interlocked + and. r7,r5,r10 ; Can we have it + ori r6,r5,WANT_EXCL ; Mark Exclusive + bne-- rwtlefail ; + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwtleloop + .globl EXT(rwtlePatch_isync) +LEXT(rwtlePatch_isync) + isync + li r3,1 ; Return TRUE + blr +rwtlefail: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + li r3,0 ; Return FALSE + blr +rwtlespin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwtlespin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_try_lock_exclusive_ext) +rwtlespin1: + b EXT(lck_rw_try_lock_exclusive_gen) + + +/* + * boolean_t lck_rw_try_lock_shared(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_try_lock_shared) +LEXT(lck_rw_try_lock_shared) +rwtlsloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + andi. r7,r5,ILK_LOCKED ; Test interlock flag + bne-- rwtlsspin ; Branch if interlocked + andi. r7,r5,WANT_EXCL|WANT_UPGRADE ; So, can we have it? + addis r6,r5,1 ; Increment read cnt + bne-- rwtlsfail ; Branch if held exclusive + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwtlsloop + .globl EXT(rwtlsPatch_isync) +LEXT(rwtlsPatch_isync) + isync + li r3,1 ; Return TRUE + blr +rwtlsfail: + li r3,0 ; Return FALSE + blr +rwtlsspin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwtlsspin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_try_lock_shared_ext) +rwtlsspin1: + b EXT(lck_rw_try_lock_shared_gen) -LEXT(condStop) -XcondStop: cmplw r3,r4 ; Check if these are equal - beq-- XcondStop ; Stop here until they are different - blr ; Return. +/* + * lck_rw_type_t lck_rw_done(lck_rw_t*) + * + */ + .align 5 + .globl EXT(lck_rw_done) +LEXT(lck_rw_done) +#if !MACH_LDEBUG + .globl EXT(lock_done) +LEXT(lock_done) +#endif + .globl EXT(rwldPatch_isync) +LEXT(rwldPatch_isync) + isync + .globl EXT(rwldPatch_eieio) +LEXT(rwldPatch_eieio) + eieio + li r10,WAIT_FLAG ; Get wait flag + lis r7,0xFFFF ; Get read cnt mask + mr r12,r3 ; Save lock addr +rwldloop: lwarx r5,RW_DATA,r3 ; Grab the lock value + andi. r8,r5,ILK_LOCKED ; Test interlock flag + bne-- rwldspin ; Branch if interlocked + and. r8,r5,r7 ; Is it shared + cmpi cr1,r8,0 ; Is it shared + beq cr1,rwldexcl ; No, check exclusive + li r11,RW_SHARED ; Set return value + addis r6,r5,0xFFFF ; Decrement read count + and. r8,r6,r7 ; Is it still shared + li r8,0 ; Assume no wakeup + bne rwldshared1 ; Skip if still held shared + and r8,r6,r10 ; Extract wait flag + andc r6,r6,r10 ; Clear wait flag +rwldshared1: + b rwldstore +rwldexcl: + li r11,RW_EXCL ; Set return value + li r9,WANT_UPGRADE ; Get upgrade flag + and. r6,r5,r9 ; Is it held with upgrade + li r9,WANT_UPGRADE|WAIT_FLAG ; Mask upgrade abd wait flags + bne rwldexcl1 ; Skip if held with upgrade + li r9,WANT_EXCL|WAIT_FLAG ; Mask exclusive and wait flags +rwldexcl1: + andc r6,r5,r9 ; Marked free + and r8,r5,r10 ; Null if no waiter +rwldstore: + stwcx. r6,RW_DATA,r3 ; Update lock word + bne-- rwldloop + mr. r8,r8 ; wakeup needed? + mr r3,r11 ; Return lock held type + beqlr++ + mr r3,r12 ; Restore lock address + PROLOG(0) + addi r3,r3,RW_EVENT ; Get lock event address + bl EXT(thread_wakeup) ; wakeup threads + lwz r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) + mtcr r2 + EPILOG + li r3,RW_SHARED ; Assume lock type shared + bne cr1,rwldret ; Branch if was held exclusive + li r3,RW_EXCL ; Return lock type exclusive +rwldret: + blr +rwldspin: + li r4,lgKillResv ; Killing field + stwcx. r4,0,r4 ; Kill it + cmpli cr0,r5,RW_IND ; Is it a lock indirect + bne-- rwldspin1 ; No, go handle contention + mr r4,r3 ; pass lock pointer + lwz r3,RW_PTR(r3) ; load lock ext pointer + b EXT(lck_rw_done_ext) +rwldspin1: + b EXT(lck_rw_done_gen) + +/* + * void lck_rw_ilk_lock(lck_rw_t *lock) + */ + .globl EXT(lck_rw_ilk_lock) +LEXT(lck_rw_ilk_lock) + crclr hwtimeout ; no timeout option + li r4,0 ; request default timeout value + li r12,ILK_LOCKED ; Load bit mask + b lckcomm ; Join on up... + +/* + * void lck_rw_ilk_unlock(lck_rw_t *lock) + */ + .globl EXT(lck_rw_ilk_unlock) +LEXT(lck_rw_ilk_unlock) + li r4,1 + b EXT(hw_unlock_bit) diff --git a/osfmk/ppc/hw_lock_types.h b/osfmk/ppc/hw_lock_types.h index 54ffa016c..f7478fbdc 100644 --- a/osfmk/ppc/hw_lock_types.h +++ b/osfmk/ppc/hw_lock_types.h @@ -56,55 +56,13 @@ #ifndef _PPC_HW_LOCK_TYPES_H_ #define _PPC_HW_LOCK_TYPES_H_ -/* - * The "hardware lock". Low-level locking primitives that - * MUST be exported by machine-dependent code; this abstraction - * must provide atomic, non-blocking mutual exclusion that - * is invulnerable to uniprocessor or SMP races, interrupts, - * traps or any other events. - * - * hw_lock_data_t machine-specific lock data structure - * hw_lock_t pointer to hw_lock_data_t - * - * An implementation must export these data types and must - * also provide routines to manipulate them (see prototypes, - * below). These routines may be external, inlined, optimized, - * or whatever, based on the kernel configuration. In the event - * that the implementation wishes to define its own prototypes, - * macros, or inline functions, it may define LOCK_HW_PROTOS - * to disable the definitions below. - * - * Mach does not expect these locks to support statistics, - * debugging, tracing or any other complexity. In certain - * configurations, Mach will build other locking constructs - * on top of this one. A correctly functioning Mach port need - * only implement these locks to be successful. However, - * greater efficiency may be gained with additional machine- - * dependent optimizations for the locking constructs defined - * later in kern/lock.h. - */ - struct hslock { int lock_data; }; -typedef struct hslock hw_lock_data_t, *hw_lock_t; -#define hw_lock_addr(hwl) (&((hwl).lock_data)) - - -#if defined(MACH_KERNEL_PRIVATE) - -#include -#include -#if !(NCPUS == 1 || MACH_LDEBUG) - -typedef hw_lock_data_t simple_lock_data_t; -typedef hw_lock_data_t *simple_lock_t; +typedef struct hslock hw_lock_data_t, *hw_lock_t; -#define decl_simple_lock_data(class, name) \ -class hw_lock_data_t name; -#endif +#define hw_lock_addr(hwl) (&((hwl).lock_data)) -#endif #endif /* _PPC_HW_LOCK_TYPES_H_ */ diff --git a/osfmk/ppc/hw_perfmon.c b/osfmk/ppc/hw_perfmon.c index c8507c3c1..609a68ec3 100644 --- a/osfmk/ppc/hw_perfmon.c +++ b/osfmk/ppc/hw_perfmon.c @@ -21,7 +21,6 @@ */ #include -#include #include #include #include @@ -132,14 +131,14 @@ int perfmon_release_facility(task_t task) return retval; } -int perfmon_enable(thread_act_t thr_act) +int perfmon_enable(thread_t thread) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t kr; kern_return_t retval = KERN_SUCCESS; int curPMC; - if(thr_act->mact.specFlags & perfMonitor) { + if(thread->machine.specFlags & perfMonitor) { return KERN_SUCCESS; /* already enabled */ } else if(perfmon_acquire_facility(kernel_task)!=KERN_SUCCESS) { return KERN_RESOURCE_SHORTAGE; /* facility is in use */ @@ -152,7 +151,7 @@ int perfmon_enable(thread_act_t thr_act) sv->save_mmcr1 = 0; sv->save_mmcr2 = 0; - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: @@ -183,12 +182,12 @@ int perfmon_enable(thread_act_t thr_act) if(retval==KERN_SUCCESS) { for(curPMC=0; curPMCsave_pmc[curPMC] = 0; - thr_act->mact.pmcovfl[curPMC] = 0; + thread->machine.pmcovfl[curPMC] = 0; } - thr_act->mact.perfmonFlags = 0; - thr_act->mact.specFlags |= perfMonitor; /* enable perf monitor facility for this thread */ - if(thr_act==current_act()) { - per_proc_info[cpu_number()].spcFlags |= perfMonitor; /* update per_proc */ + thread->machine.perfmonFlags = 0; + thread->machine.specFlags |= perfMonitor; /* enable perf monitor facility for this thread */ + if(thread==current_thread()) { + getPerProc()->spcFlags |= perfMonitor; /* update per_proc */ } } @@ -199,12 +198,12 @@ int perfmon_enable(thread_act_t thr_act) return retval; } -int perfmon_disable(thread_act_t thr_act) +int perfmon_disable(thread_t thread) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; int curPMC; - if(!(thr_act->mact.specFlags & perfMonitor)) { + if(!(thread->machine.specFlags & perfMonitor)) { return KERN_NO_ACCESS; /* not enabled */ } else { simple_lock(&hw_perfmon_lock); @@ -213,9 +212,9 @@ int perfmon_disable(thread_act_t thr_act) perfmon_release_facility(kernel_task); /* will release if hw_perfmon_thread_count is 0 */ } - thr_act->mact.specFlags &= ~perfMonitor; /* disable perf monitor facility for this thread */ - if(thr_act==current_act()) { - per_proc_info[cpu_number()].spcFlags &= ~perfMonitor; /* update per_proc */ + thread->machine.specFlags &= ~perfMonitor; /* disable perf monitor facility for this thread */ + if(thread==current_thread()) { + PerProcTable[cpu_number()].ppe_vaddr->spcFlags &= ~perfMonitor; /* update per_proc */ } sv->save_mmcr0 = 0; sv->save_mmcr1 = 0; @@ -223,8 +222,8 @@ int perfmon_disable(thread_act_t thr_act) for(curPMC=0; curPMCsave_pmc[curPMC] = 0; - thr_act->mact.pmcovfl[curPMC] = 0; - thr_act->mact.perfmonFlags = 0; + thread->machine.pmcovfl[curPMC] = 0; + thread->machine.perfmonFlags = 0; } #ifdef HWPERFMON_DEBUG @@ -234,9 +233,9 @@ int perfmon_disable(thread_act_t thr_act) return KERN_SUCCESS; } -int perfmon_clear_counters(thread_act_t thr_act) +int perfmon_clear_counters(thread_t thread) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; int curPMC; #ifdef HWPERFMON_DEBUG @@ -246,15 +245,15 @@ int perfmon_clear_counters(thread_act_t thr_act) /* clear thread copy */ for(curPMC=0; curPMCsave_pmc[curPMC] = 0; - thr_act->mact.pmcovfl[curPMC] = 0; + thread->machine.pmcovfl[curPMC] = 0; } return KERN_SUCCESS; } -int perfmon_write_counters(thread_act_t thr_act, uint64_t *pmcs) +int perfmon_write_counters(thread_t thread, uint64_t *pmcs) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; int curPMC; #ifdef HWPERFMON_DEBUG @@ -264,26 +263,26 @@ int perfmon_write_counters(thread_act_t thr_act, uint64_t *pmcs) /* update thread copy */ for(curPMC=0; curPMCsave_pmc[curPMC] = pmcs[curPMC] & 0x7FFFFFFF; - thr_act->mact.pmcovfl[curPMC] = (pmcs[curPMC]>>31) & 0xFFFFFFFF; + thread->machine.pmcovfl[curPMC] = (pmcs[curPMC]>>31) & 0xFFFFFFFF; } return KERN_SUCCESS; } -int perfmon_read_counters(thread_act_t thr_act, uint64_t *pmcs) +int perfmon_read_counters(thread_t thread, uint64_t *pmcs) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; int curPMC; /* retrieve from thread copy */ for(curPMC=0; curPMCmact.pmcovfl[curPMC]; + pmcs[curPMC] = thread->machine.pmcovfl[curPMC]; pmcs[curPMC] = pmcs[curPMC]<<31; pmcs[curPMC] |= (sv->save_pmc[curPMC] & 0x7FFFFFFF); } /* zero any unused counters on this platform */ - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: @@ -301,12 +300,12 @@ int perfmon_read_counters(thread_act_t thr_act, uint64_t *pmcs) return KERN_SUCCESS; } -int perfmon_start_counters(thread_act_t thr_act) +int perfmon_start_counters(thread_t thread) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: { @@ -357,12 +356,12 @@ int perfmon_start_counters(thread_act_t thr_act) return retval; } -int perfmon_stop_counters(thread_act_t thr_act) +int perfmon_stop_counters(thread_t thread) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: @@ -393,16 +392,16 @@ int perfmon_stop_counters(thread_act_t thr_act) return retval; } -int perfmon_set_event(thread_act_t thr_act, int pmc, int event) +int perfmon_set_event(thread_t thread, int pmc, int event) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; #ifdef HWPERFMON_DEBUG kprintf("perfmon_set_event b4 (CPU%d) - pmc=%d, event=%d - mmcr0=0x%llx mmcr1=0x%llx mmcr2=0x%llx\n", cpu_number(), pmc, event, sv->save_mmcr0, sv->save_mmcr1, sv->save_mmcr2); #endif - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: { @@ -533,9 +532,9 @@ int perfmon_set_event(thread_act_t thr_act, int pmc, int event) return retval; } -int perfmon_set_event_func(thread_act_t thr_act, uint32_t f) +int perfmon_set_event_func(thread_t thread, uint32_t f) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; #ifdef HWPERFMON_DEBUG @@ -555,7 +554,7 @@ int perfmon_set_event_func(thread_act_t thr_act, uint32_t f) "UNKNOWN"); #endif /* HWPERFMON_DEBUG */ - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: @@ -590,12 +589,12 @@ int perfmon_set_event_func(thread_act_t thr_act, uint32_t f) return retval; } -int perfmon_set_threshold(thread_act_t thr_act, int threshold) +int perfmon_set_threshold(thread_t thread, int threshold) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: { ppc32_mmcr0_reg_t mmcr0_reg; @@ -687,12 +686,12 @@ int perfmon_set_threshold(thread_act_t thr_act, int threshold) return retval; } -int perfmon_set_tbsel(thread_act_t thr_act, int tbsel) +int perfmon_set_tbsel(thread_t thread, int tbsel) { - struct savearea *sv = thr_act->mact.pcb; + struct savearea *sv = thread->machine.pcb; kern_return_t retval = KERN_SUCCESS; - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_750: case CPU_SUBTYPE_POWERPC_7400: case CPU_SUBTYPE_POWERPC_7450: @@ -750,20 +749,20 @@ int perfmon_control(struct savearea *ssp) int pmc = (int)ssp->save_r5; int val = (int)ssp->save_r6; uint64_t *usr_pmcs_p = CAST_DOWN(uint64_t *, ssp->save_r7); - thread_act_t thr_act = THREAD_NULL; + thread_t thread = THREAD_NULL; uint64_t kern_pmcs[MAX_CPUPMC_COUNT]; kern_return_t retval = KERN_SUCCESS; int error; boolean_t oldlevel; - thr_act = (thread_act_t) port_name_to_act(thr_port); // convert user space thread port name to a thread_act_t - if(!thr_act) { + thread = (thread_t) port_name_to_thread(thr_port); // convert user space thread port name to a thread_t + if(!thread) { ssp->save_r3 = KERN_INVALID_ARGUMENT; return 1; /* Return and check for ASTs... */ } - if(thr_act!=current_act()) { - thread_suspend(thr_act); + if(thread!=current_thread()) { + thread_suspend(thread); } #ifdef HWPERFMON_DEBUG @@ -774,14 +773,14 @@ int perfmon_control(struct savearea *ssp) /* individual actions which do not require perfmon facility to be enabled */ if(action==PPC_PERFMON_DISABLE) { - retval = perfmon_disable(thr_act); + retval = perfmon_disable(thread); } else if(action==PPC_PERFMON_ENABLE) { - retval = perfmon_enable(thr_act); + retval = perfmon_enable(thread); } else { /* individual actions which do require perfmon facility to be enabled */ - if(!(thr_act->mact.specFlags & perfMonitor)) { /* perfmon not enabled */ + if(!(thread->machine.specFlags & perfMonitor)) { /* perfmon not enabled */ #ifdef HWPERFMON_DEBUG kprintf("perfmon_control: ERROR - perfmon not enabled for this thread\n"); #endif @@ -790,22 +789,22 @@ int perfmon_control(struct savearea *ssp) } if(action==PPC_PERFMON_SET_EVENT) { - retval = perfmon_set_event(thr_act, pmc, val); + retval = perfmon_set_event(thread, pmc, val); } else if(action==PPC_PERFMON_SET_THRESHOLD) { - retval = perfmon_set_threshold(thr_act, val); + retval = perfmon_set_threshold(thread, val); } else if(action==PPC_PERFMON_SET_TBSEL) { - retval = perfmon_set_tbsel(thr_act, val); + retval = perfmon_set_tbsel(thread, val); } else if(action==PPC_PERFMON_SET_EVENT_FUNC) { - retval = perfmon_set_event_func(thr_act, val); + retval = perfmon_set_event_func(thread, val); } else if(action==PPC_PERFMON_ENABLE_PMI_BRKPT) { if(val) { - thr_act->mact.perfmonFlags |= PERFMONFLAG_BREAKPOINT_FOR_PMI; + thread->machine.perfmonFlags |= PERFMONFLAG_BREAKPOINT_FOR_PMI; } else { - thr_act->mact.perfmonFlags &= ~PERFMONFLAG_BREAKPOINT_FOR_PMI; + thread->machine.perfmonFlags &= ~PERFMONFLAG_BREAKPOINT_FOR_PMI; } retval = KERN_SUCCESS; } @@ -813,43 +812,43 @@ int perfmon_control(struct savearea *ssp) /* combinable actions */ else { if(action & PPC_PERFMON_STOP_COUNTERS) { - error = perfmon_stop_counters(thr_act); + error = perfmon_stop_counters(thread); if(error!=KERN_SUCCESS) { retval = error; goto perfmon_return; } } if(action & PPC_PERFMON_CLEAR_COUNTERS) { - error = perfmon_clear_counters(thr_act); + error = perfmon_clear_counters(thread); if(error!=KERN_SUCCESS) { retval = error; goto perfmon_return; } } if(action & PPC_PERFMON_WRITE_COUNTERS) { - if(error = copyin((void *)usr_pmcs_p, (void *)kern_pmcs, MAX_CPUPMC_COUNT*sizeof(uint64_t))) { + if(error = copyin(CAST_USER_ADDR_T(usr_pmcs_p), (void *)kern_pmcs, MAX_CPUPMC_COUNT*sizeof(uint64_t))) { retval = error; goto perfmon_return; } - error = perfmon_write_counters(thr_act, kern_pmcs); + error = perfmon_write_counters(thread, kern_pmcs); if(error!=KERN_SUCCESS) { retval = error; goto perfmon_return; } } if(action & PPC_PERFMON_READ_COUNTERS) { - error = perfmon_read_counters(thr_act, kern_pmcs); + error = perfmon_read_counters(thread, kern_pmcs); if(error!=KERN_SUCCESS) { retval = error; goto perfmon_return; } - if(error = copyout((void *)kern_pmcs, (void *)usr_pmcs_p, MAX_CPUPMC_COUNT*sizeof(uint64_t))) { + if(error = copyout((void *)kern_pmcs, CAST_USER_ADDR_T(usr_pmcs_p), MAX_CPUPMC_COUNT*sizeof(uint64_t))) { retval = error; goto perfmon_return; } } if(action & PPC_PERFMON_START_COUNTERS) { - error = perfmon_start_counters(thr_act); + error = perfmon_start_counters(thread); if(error!=KERN_SUCCESS) { retval = error; goto perfmon_return; @@ -865,8 +864,8 @@ int perfmon_control(struct savearea *ssp) kprintf("perfmon_control (CPU%d): mmcr0 = %016llX, pmc1=%X pmc2=%X pmc3=%X pmc4=%X pmc5=%X pmc6=%X pmc7=%X pmc8=%X\n", cpu_number(), ssp->save_mmcr0, ssp->save_pmc[PMC_1], ssp->save_pmc[PMC_2], ssp->save_pmc[PMC_3], ssp->save_pmc[PMC_4], ssp->save_pmc[PMC_5], ssp->save_pmc[PMC_6], ssp->save_pmc[PMC_7], ssp->save_pmc[PMC_8]); #endif - if(thr_act!=current_act()) { - thread_resume(thr_act); + if(thread!=current_thread()) { + thread_resume(thread); } #ifdef HWPERFMON_DEBUG @@ -883,13 +882,13 @@ int perfmon_handle_pmi(struct savearea *ssp) { int curPMC; kern_return_t retval = KERN_SUCCESS; - thread_act_t thr_act = current_act(); + thread_t thread = current_thread(); #ifdef HWPERFMON_DEBUG kprintf("perfmon_handle_pmi: got rupt\n"); #endif - if(!(thr_act->mact.specFlags & perfMonitor)) { /* perfmon not enabled */ + if(!(thread->machine.specFlags & perfMonitor)) { /* perfmon not enabled */ #ifdef HWPERFMON_DEBUG kprintf("perfmon_handle_pmi: ERROR - perfmon not enabled for this thread\n"); #endif @@ -897,27 +896,27 @@ int perfmon_handle_pmi(struct savearea *ssp) } for(curPMC=0; curPMCmact.pcb->save_pmc[curPMC] & 0x80000000) { - if(thr_act->mact.pmcovfl[curPMC]==0xFFFFFFFF && (thr_act->mact.perfmonFlags & PERFMONFLAG_BREAKPOINT_FOR_PMI)) { + if(thread->machine.pcb->save_pmc[curPMC] & 0x80000000) { + if(thread->machine.pmcovfl[curPMC]==0xFFFFFFFF && (thread->machine.perfmonFlags & PERFMONFLAG_BREAKPOINT_FOR_PMI)) { doexception(EXC_BREAKPOINT, EXC_PPC_PERFMON, (unsigned int)ssp->save_srr0); // pass up a breakpoint exception return KERN_SUCCESS; } else { - thr_act->mact.pmcovfl[curPMC]++; - thr_act->mact.pcb->save_pmc[curPMC] = 0; + thread->machine.pmcovfl[curPMC]++; + thread->machine.pcb->save_pmc[curPMC] = 0; } } } if(retval==KERN_SUCCESS) { - switch(machine_slot[0].cpu_subtype) { + switch(PerProcTable[0].ppe_vaddr->cpu_subtype) { case CPU_SUBTYPE_POWERPC_7450: { ppc32_mmcr0_reg_t mmcr0_reg; - mmcr0_reg.value = thr_act->mact.pcb->save_mmcr0; + mmcr0_reg.value = thread->machine.pcb->save_mmcr0; mmcr0_reg.field.disable_counters_always = FALSE; mmcr0_reg.field.enable_pmi = TRUE; - thr_act->mact.pcb->save_mmcr0 = mmcr0_reg.value; + thread->machine.pcb->save_mmcr0 = mmcr0_reg.value; } retval = KERN_SUCCESS; break; @@ -925,10 +924,10 @@ int perfmon_handle_pmi(struct savearea *ssp) { ppc64_mmcr0_reg_t mmcr0_reg; - mmcr0_reg.value = thr_act->mact.pcb->save_mmcr0; + mmcr0_reg.value = thread->machine.pcb->save_mmcr0; mmcr0_reg.field.disable_counters_always = FALSE; mmcr0_reg.field.enable_pmi = TRUE; - thr_act->mact.pcb->save_mmcr0 = mmcr0_reg.value; + thread->machine.pcb->save_mmcr0 = mmcr0_reg.value; } retval = KERN_SUCCESS; break; diff --git a/osfmk/ppc/hw_perfmon.h b/osfmk/ppc/hw_perfmon.h index 480837e08..eb21a4d00 100644 --- a/osfmk/ppc/hw_perfmon.h +++ b/osfmk/ppc/hw_perfmon.h @@ -77,7 +77,7 @@ typedef enum { /* SPECSEL[0:1] TD_CP_DBGxSEL[0:1] TTM3S int perfmon_acquire_facility(task_t task); int perfmon_release_facility(task_t task); -extern int perfmon_disable(thread_act_t thr_act); +extern int perfmon_disable(thread_t thr_act); extern int perfmon_init(void); extern int perfmon_control(struct savearea *save); extern int perfmon_handle_pmi(struct savearea *ssp); diff --git a/osfmk/ppc/hw_vm.s b/osfmk/ppc/hw_vm.s index 364d20f97..bcfeda6ee 100644 --- a/osfmk/ppc/hw_vm.s +++ b/osfmk/ppc/hw_vm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,7 +21,6 @@ */ #include #include -#include #include #include @@ -155,6 +154,12 @@ LEXT(hw_add_map) stw r31,FM_ARG0+0x38(r1) ; Save a register stw r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hamPanic ; Call not valid for guest shadow assist pmap +#endif + rlwinm r11,r4,0,0,19 ; Round down to get mapping block address mr r28,r3 ; Save the pmap mr r31,r4 ; Save the mapping @@ -217,12 +222,15 @@ hamRescan: lwz r4,mpVAddr(r31) ; Get the new vaddr top half mfspr r0,pmc4 ; INSTRUMENT - Get stamp stw r0,0x6100+(17*16)+0xC(0) ; INSTRUMENT - Save it #endif - - andi. r0,r24,mpNest ; See if we are a nest + + rlwinm r0,r24,0,mpType ; Isolate the mapping type rlwinm r23,r23,12,0,19 ; Convert standard block size to bytes + cmplwi r0,mpNest ; Is this a nested type? + cmplwi cr1,r0,mpLinkage ; Linkage type? + cror cr0_eq,cr1_eq,cr0_eq ; Nested or linkage type? lis r0,0x8000 ; Get 0xFFFFFFFF80000000 li r22,0 ; Assume high part of size is 0 - beq++ hamNoNest ; This is not a nest... + bne++ hamNoNest ; This is not a nested or linkage type rlwinm r22,r23,16,16,31 ; Convert partially converted size to segments rlwinm r23,r23,16,0,3 ; Finish shift @@ -301,23 +309,27 @@ hamGotX: stw r4,0x6100+(19*16)+0xC(0) ; INSTRUMENT - Save it #endif + rlwinm r11,r24,mpPcfgb+2,mpPcfg>>6 ; Get the index into the page config table lhz r8,mpSpace(r31) ; Get the address space + lwz r11,lgpPcfg(r11) ; Get the page config mfsdr1 r7 ; Get the hash table base/bounds lwz r4,pmapResidentCnt(r28) ; Get the mapped page count - andi. r0,r24,mpNest|mpBlock ; Is this a nest or block? + + andi. r0,r24,mpType ; Is this a normal mapping? rlwimi r8,r8,14,4,17 ; Double address space - rlwinm r9,r30,20,16,31 ; Isolate the page number + rlwinm r9,r30,0,4,31 ; Clear segment rlwinm r10,r30,18,14,17 ; Shift EA[32:35] down to correct spot in VSID (actually shift up 14) rlwimi r8,r8,28,0,3 ; Get the last nybble of the hash rlwimi r10,r29,18,0,13 ; Shift EA[18:31] down to VSID (31-bit math works because of max hash table size) rlwinm r7,r7,0,16,31 ; Isolate length mask (or count) addi r4,r4,1 ; Bump up the mapped page count + srw r9,r9,r11 ; Isolate just the page index xor r10,r10,r8 ; Calculate the low 32 bits of the VSID stw r4,pmapResidentCnt(r28) ; Set the mapped page count xor r9,r9,r10 ; Get the hash to the PTEG - bne-- hamDoneNP ; This is a block or nest, therefore, no physent... + bne-- hamDoneNP ; Not a normal mapping, therefore, no physent... bl mapPhysFindLock ; Go find and lock the physent @@ -327,7 +339,7 @@ hamGotX: rlwinm r7,r7,16,0,15 ; Get the PTEG wrap size slwi r9,r9,6 ; Make PTEG offset ori r7,r7,0xFFC0 ; Stick in the bottom part - rlwinm r12,r11,0,0,25 ; Clean it up + rlwinm r12,r11,0,~ppFlags ; Clean it up and r9,r9,r7 ; Wrap offset into table mr r4,r31 ; Set the link to install stw r9,mpPte(r31) ; Point the mapping at the PTEG (exact offset is invalid) @@ -337,11 +349,11 @@ hamGotX: .align 5 -ham64: li r0,0xFF ; Get mask to clean up alias pointer +ham64: li r0,ppLFAmask ; Get mask to clean up alias pointer subfic r7,r7,46 ; Get number of leading zeros eqv r4,r4,r4 ; Get all ones ld r11,ppLink(r3) ; Get the alias chain pointer - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F srd r4,r4,r7 ; Get the wrap mask sldi r9,r9,7 ; Change hash to PTEG offset andc r11,r11,r0 ; Clean out the lock and flags @@ -439,18 +451,19 @@ hamOverlay: lwz r22,mpFlags(r3) ; Get the overlay flags crand cr5_eq,cr5_eq,cr0_eq ; Remember crand cr5_eq,cr5_eq,cr1_eq ; Remember if same - xor r23,r23,r22 ; Check for differences in flags - ori r23,r23,mpFIP ; "Fault in Progress" is ok to be different - xori r23,r23,mpFIP ; Force mpFIP off - rlwinm. r0,r23,0,mpSpecialb,mpListsb-1 ; See if any important flags are different + xor r23,r23,r22 ; Compare mapping flag words + andi. r23,r23,mpType|mpPerm ; Are mapping types and attributes the same? crand cr5_eq,cr5_eq,cr0_eq ; Merge in final check - bf-- cr5_eq,hamReturn ; This is not the same, so we just return a collision... + bf-- cr5_eq,hamSmash ; This is not the same, so we return a smash... ori r4,r4,mapRtMapDup ; Set duplicate b hamReturn ; And leave... hamRemv: ori r4,r4,mapRtRemove ; We are in the process of removing the collision b hamReturn ; Come back yall... + +hamSmash: ori r4,r4,mapRtSmash ; Tell caller that it has some clean up to do + b hamReturn ; Join common epilog code .align 5 @@ -458,6 +471,10 @@ hamBadLock: li r3,0 ; Set lock time out error code li r4,mapRtBadLk ; Set lock time out error code b hamReturn ; Leave.... +hamPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc @@ -577,6 +594,12 @@ LEXT(hw_rem_map) stw r6,FM_ARG0+0x44(r1) ; Save address to save next mapped vaddr stw r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hrmPanic ; Call not valid for guest shadow assist pmap +#endif + bt++ pf64Bitb,hrmSF1 ; skip if 64-bit (only they take the hint) lwz r9,pmapvr+4(r3) ; Get conversion mask b hrmSF1x ; Done... @@ -592,12 +615,16 @@ hrmSF1x: ; Here is where we join in from the hw_purge_* routines ; -hrmJoin: mfsprg r19,2 ; Get feature flags again (for alternate entries) +hrmJoin: lwz r3,pmapFlags(r28) ; Get pmap's flags + mfsprg r19,2 ; Get feature flags again (for alternate entries) mr r17,r11 ; Save the MSR mr r29,r4 ; Top half of vaddr mr r30,r5 ; Bottom half of vaddr + rlwinm. r3,r3,0,pmapVMgsaa ; Is guest shadow assist active? + bne-- hrmGuest ; Yes, handle specially + la r3,pmapSXlk(r28) ; Point to the pmap search lock bl sxlkShared ; Go get a shared lock on the mapping lists mr. r3,r3 ; Did we get the lock? @@ -613,17 +640,14 @@ hrmJoin: mfsprg r19,2 ; Get feature flags again (for alternate entries) mr r4,r29 ; High order of address mr r5,r30 ; Low order of address bl EXT(mapSearchFull) ; Go see if we can find it - - andi. r0,r7,lo16(mpPerm|mpSpecial|mpNest) ; Is this nested, special, or a perm mapping? + + andi. r0,r7,mpPerm ; Mapping marked permanent? + crmove cr5_eq,cr0_eq ; Remember permanent marking mr r20,r7 ; Remember mpFlags - rlwinm r0,r7,0,mpRemovableb,mpRemovableb ; Are we allowed to remove it? - crmove cr5_eq,cr0_eq ; Remember if we should remove this mr. r31,r3 ; Did we? (And remember mapping address for later) - cmplwi cr1,r0,0 ; Are we allowed to remove? mr r15,r4 ; Save top of next vaddr - crorc cr5_eq,cr5_eq,cr1_eq ; cr5_eq is true if this is not removable mr r16,r5 ; Save bottom of next vaddr - beq hrmNotFound ; Nope, not found... + beq-- hrmNotFound ; Nope, not found... bf-- cr5_eq,hrmPerm ; This one can't be removed... ; @@ -653,13 +677,10 @@ hrmJoin: mfsprg r19,2 ; Get feature flags again (for alternate entries) mr r5,r30 ; Low order of address bl EXT(mapSearchFull) ; Rescan the list - andi. r0,r7,lo16(mpPerm|mpSpecial|mpNest) ; Is this nested, special, or a perm mapping? - rlwinm r0,r7,0,mpRemovableb,mpRemovableb ; Are we allowed to remove it? - crmove cr5_eq,cr0_eq ; Remember if we should remove this + andi. r0,r7,mpPerm ; Mapping marked permanent? + crmove cr5_eq,cr0_eq ; Remember permanent marking mr. r31,r3 ; Did we lose it when we converted? - cmplwi cr1,r0,0 ; Are we allowed to remove? mr r20,r7 ; Remember mpFlags - crorc cr5_eq,cr5_eq,cr1_eq ; cr5_eq is true if this is not removable mr r15,r4 ; Save top of next vaddr mr r16,r5 ; Save bottom of next vaddr beq-- hrmNotFound ; Yeah, we did, someone tossed it for us... @@ -686,14 +707,16 @@ hrmGotX: mr r3,r31 ; Get the mapping lwz r21,mpPte(r31) ; Grab the offset to the PTE rlwinm r23,r29,0,1,0 ; Copy high order vaddr to high if 64-bit machine mfsdr1 r29 ; Get the hash table base and size - rlwinm r0,r20,0,mpBlockb,mpBlockb ; Is this a block mapping? - andi. r2,r20,lo16(mpSpecial|mpNest) ; Is this nest or special mapping? - cmplwi cr5,r0,0 ; Remember if this is a block mapping + + rlwinm r0,r20,0,mpType ; Isolate mapping type + cmplwi cr5,r0,mpBlock ; Remember whether this is a block mapping + cmplwi r0,mpMinSpecial ; cr0_lt <- not a special mapping type + rlwinm r0,r21,0,mpHValidb,mpHValidb ; See if we actually have a PTE ori r2,r2,0xFFFF ; Get mask to clean out hash table base (works for both 32- and 64-bit) cmpwi cr1,r0,0 ; Have we made a PTE for this yet? - rlwinm r21,r21,0,0,30 ; Clear out valid bit - crorc cr0_eq,cr1_eq,cr0_eq ; No need to look at PTE if none or a special mapping + rlwinm r21,r21,0,~mpHValid ; Clear out valid bit + crorc cr0_eq,cr1_eq,cr0_lt ; No need to look at PTE if none or a special mapping rlwimi r23,r30,0,0,31 ; Insert low under high part of address andc r29,r29,r2 ; Clean up hash table base li r22,0 ; Clear this on out (also sets RC to 0 if we bail) @@ -703,11 +726,10 @@ hrmGotX: mr r3,r31 ; Get the mapping bt++ pf64Bitb,hrmSplit64 ; Go do 64-bit version... rlwinm r9,r21,28,4,29 ; Convert PTEG to PCA entry - bne- cr5,hrmBlock32 ; Go treat block specially... + beq- cr5,hrmBlock32 ; Go treat block specially... subfic r9,r9,-4 ; Get the PCA entry offset bt- cr0_eq,hrmPysDQ32 ; Skip next if no possible PTE... add r7,r9,r29 ; Point to the PCA slot - bl mapLockPteg ; Go lock up the PTEG (Note: we need to save R6 to set PCA) @@ -715,7 +737,7 @@ hrmGotX: mr r3,r31 ; Get the mapping lwz r5,0(r26) ; Get the top of PTE rlwinm. r0,r21,0,mpHValidb,mpHValidb ; See if we actually have a PTE - rlwinm r21,r21,0,0,30 ; Clear out valid bit + rlwinm r21,r21,0,~mpHValid ; Clear out valid bit rlwinm r5,r5,0,1,31 ; Turn off valid bit in PTE stw r21,mpPte(r31) ; Make sure we invalidate mpPte, still pointing to PTEG (keep walk_page from making a mistake) beq- hrmUlckPCA32 ; Pte is gone, no need to invalidate... @@ -770,16 +792,15 @@ hrmPysDQ32: mr r3,r31 ; Point to the mapping mr r4,r31 ; Point to the mapping bl EXT(mapRemove) ; Remove the mapping from the list - lwz r4,pmapResidentCnt(r28) ; Get the mapped page count - andi. r0,r20,lo16(mpSpecial|mpNest) ; Is this nest or special mapping? - cmplwi cr1,r0,0 ; Special thingie? + rlwinm r0,r20,0,mpType ; Isolate mapping type + cmplwi cr1,r0,mpMinSpecial ; cr1_lt <- not a special mapping type la r3,pmapSXlk(r28) ; Point to the pmap search lock subi r4,r4,1 ; Drop down the mapped page count stw r4,pmapResidentCnt(r28) ; Set the mapped page count bl sxlkUnlock ; Unlock the search list - bne-- cr1,hrmRetn32 ; This one has no real memory associated with it so we are done... + bf-- cr1_lt,hrmRetn32 ; This one has no real memory associated with it so we are done... bl mapPhysFindLock ; Go find and lock the physent @@ -788,7 +809,7 @@ hrmPysDQ32: mr r3,r31 ; Point to the mapping mr r4,r22 ; Get the RC bits we just got bl mapPhysMerge ; Go merge the RC bits - rlwinm r9,r9,0,0,25 ; Clear the flags from the mapping pointer + rlwinm r9,r9,0,~ppFlags ; Clear the flags from the mapping pointer cmplw r9,r31 ; Are we the first on the list? bne- hrmNot1st ; Nope... @@ -831,7 +852,7 @@ hrmDoneChunk: hrmNotFound: la r3,pmapSXlk(r28) ; Point to the pmap search lock bl sxlkUnlock ; Unlock the search list - li r3,0 ; Make sure we know we did not find it + li r3,mapRtNotFnd ; No mapping found hrmErRtn: bt++ pf64Bitb,hrmSF1z ; skip if 64-bit (only they take the hint) @@ -1120,7 +1141,7 @@ hrmBDone1: bl mapDrainBusy ; Go wait until mapping is unused .align 5 hrmSplit64: rlwinm r9,r21,27,5,29 ; Convert PTEG to PCA entry - bne-- cr5,hrmBlock64 ; Go treat block specially... + beq-- cr5,hrmBlock64 ; Go treat block specially... subfic r9,r9,-4 ; Get the PCA entry offset bt-- cr0_eq,hrmPysDQ64 ; Skip next if no possible PTE... add r7,r9,r29 ; Point to the PCA slot @@ -1131,8 +1152,9 @@ hrmSplit64: rlwinm r9,r21,27,5,29 ; Convert PTEG to PCA entry ld r5,0(r26) ; Get the top of PTE rlwinm. r0,r21,0,mpHValidb,mpHValidb ; See if we actually have a PTE - rlwinm r21,r21,0,0,30 ; Clear out valid bit + rlwinm r21,r21,0,~mpHValid ; Clear out valid bit sldi r23,r5,16 ; Shift AVPN up to EA format +// **** Need to adjust above shift based on the page size - large pages need to shift a bit more rldicr r5,r5,0,62 ; Clear the valid bit rldimi r23,r30,0,36 ; Insert the page portion of the VPN stw r21,mpPte(r31) ; Make sure we invalidate mpPte but keep pointing to PTEG (keep walk_page from making a mistake) @@ -1152,18 +1174,17 @@ hrmPtlb64: lwarx r5,0,r9 ; Get the TLBIE lock stwcx. r5,0,r9 ; Try to get it bne-- hrmPtlb64 ; We was beat... - tlbie r23 ; Invalidate it all corresponding TLB entries + tlbie r23 ; Invalidate all corresponding TLB entries eieio ; Make sure that the tlbie happens first tlbsync ; Wait for everyone to catch up - isync ptesync ; Make sure of it all li r0,0 ; Clear this rlwinm r2,r21,28,29,31 ; Get slot number (16 byte entries) stw r0,tlbieLock(0) ; Clear the tlbie lock oris r0,r0,0x8000 ; Assume slot 0 - eieio ; Make sure those RC bit have been stashed in PTE + srw r0,r0,r2 ; Get slot mask to deallocate lwz r22,12(r26) ; Get the latest reference and change bits @@ -1177,25 +1198,25 @@ hrmUlckPCA64: hrmPysDQ64: mr r3,r31 ; Point to the mapping bl mapDrainBusy ; Go wait until mapping is unused - mr r3,r28 ; Get the pmap to insert into + mr r3,r28 ; Get the pmap to remove from mr r4,r31 ; Point to the mapping bl EXT(mapRemove) ; Remove the mapping from the list - andi. r0,r20,lo16(mpSpecial|mpNest) ; Is this nest or special mapping? + rlwinm r0,r20,0,mpType ; Isolate mapping type + cmplwi cr1,r0,mpMinSpecial ; cr1_lt <- not a special mapping type lwz r4,pmapResidentCnt(r28) ; Get the mapped page count - cmplwi cr1,r0,0 ; Special thingie? la r3,pmapSXlk(r28) ; Point to the pmap search lock subi r4,r4,1 ; Drop down the mapped page count stw r4,pmapResidentCnt(r28) ; Set the mapped page count bl sxlkUnlock ; Unlock the search list - bne-- cr1,hrmRetn64 ; This one has no real memory associated with it so we are done... + bf-- cr1_lt,hrmRetn64 ; This one has no real memory associated with it so we are done... bl mapPhysFindLock ; Go find and lock the physent - li r0,0xFF ; Get mask to clean up mapping pointer + li r0,ppLFAmask ; Get mask to clean up mapping pointer ld r9,ppLink(r3) ; Get first mapping - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F mr r4,r22 ; Get the RC bits we just got bl mapPhysMerge ; Go merge the RC bits @@ -1203,7 +1224,7 @@ hrmPysDQ64: mr r3,r31 ; Point to the mapping andc r9,r9,r0 ; Clean up the mapping pointer cmpld r9,r31 ; Are we the first on the list? - bne- hrmNot1st64 ; Nope... + bne-- hrmNot1st64 ; Nope... li r9,0 ; Get a 0 ld r4,mpAlias(r31) ; Get our forward pointer @@ -1226,10 +1247,10 @@ hrmPtlb64x: lwz r5,0(r9) ; Do a regular load to avoid taking reservation hrmNot1st64: mr. r8,r9 ; Remember and test current node - beq- hrmNotFound ; Could not find our node... + beq-- hrmPhyDQd64 ; Could not find our node... ld r9,mpAlias(r9) ; Chain to the next cmpld r9,r31 ; Is this us? - bne- hrmNot1st64 ; Not us... + bne-- hrmNot1st64 ; Not us... ld r9,mpAlias(r9) ; Get our forward pointer std r9,mpAlias(r8) ; Unchain us @@ -1386,17 +1407,13 @@ hrmBTLBj: sldi r2,r27,maxAdrSpb ; Move to make room for address space ID tlbie r2 ; Invalidate it everywhere addi r27,r27,0x1000 ; Up to the next page bge++ hrmBTLBj ; Make sure we have done it all... - - sync ; Make sure all is quiet eieio ; Make sure that the tlbie happens first tlbsync ; wait for everyone to catch up - isync li r2,0 ; Lock clear value ptesync ; Wait for quiet again - sync ; Make sure that is done stw r2,tlbieLock(0) ; Clear the tlbie lock @@ -1463,6 +1480,266 @@ hrmBTLBlcn: lwz r2,0(r7) ; Get the TLBIE lock beq++ hrmBTLBlcl ; Nope... b hrmBTLBlcn ; Yeah... +; +; Guest shadow assist -- mapping remove +; +; Method of operation: +; o Locate the VMM extension block and the host pmap +; o Obtain the host pmap's search lock exclusively +; o Locate the requested mapping in the shadow hash table, +; exit if not found +; o If connected, disconnect the PTE and gather R&C to physent +; o Locate and lock the physent +; o Remove mapping from physent's chain +; o Unlock physent +; o Unlock pmap's search lock +; +; Non-volatile registers on entry: +; r17: caller's msr image +; r19: sprg2 (feature flags) +; r28: guest pmap's physical address +; r29: high-order 32 bits of guest virtual address +; r30: low-order 32 bits of guest virtual address +; +; Non-volatile register usage: +; r26: VMM extension block's physical address +; r27: host pmap's physical address +; r28: guest pmap's physical address +; r29: physent's physical address +; r30: guest virtual address +; r31: guest mapping's physical address +; + .align 5 +hrmGuest: + rlwinm r30,r30,0,0xFFFFF000 ; Clean up low-order bits of 32-bit guest vaddr + bt++ pf64Bitb,hrmG64 ; Test for 64-bit machine + lwz r26,pmapVmmExtPhys+4(r28) ; r26 <- VMM pmap extension block paddr + lwz r27,vmxHostPmapPhys+4(r26) ; r27 <- host pmap's paddr + b hrmGStart ; Join common code + +hrmG64: ld r26,pmapVmmExtPhys(r28) ; r26 <- VMM pmap extension block paddr + ld r27,vmxHostPmapPhys(r26) ; r27 <- host pmap's paddr + rldimi r30,r29,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + +hrmGStart: la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + lwz r3,vxsGrm(r26) ; Get mapping remove request count + + lwz r9,pmapSpace(r28) ; r9 <- guest space ID number + la r31,VMX_HPIDX_OFFSET(r26) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r9,r11 ; spaceID ^ (vaddr >> 12) + rlwinm r12,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r12 ; r31 <- hash page index entry + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,hrmG64Search ; Separate handling for 64-bit search + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + + addi r3,r3,1 ; Increment remove request count + stw r3,vxsGrm(r26) ; Update remove request count + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b hrmG32SrchLp ; Let the search begin! + + .align 5 +hrmG32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free mapping flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(free && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq hrmGSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz hrmG32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free mapping flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(free && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && space match && virtual addr match + beq hrmGSrchHit ; Join common path on hit (r31 points to guest mapping) + b hrmGSrchMiss ; No joy in our hash group + +hrmG64Search: + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b hrmG64SrchLp ; Let the search begin! + + .align 5 +hrmG64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free mapping flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(free && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq hrmGSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz hrmG64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free mapping flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(free && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && space match && virtual addr match + beq hrmGSrchHit ; Join common path on hit (r31 points to guest mapping) +hrmGSrchMiss: + lwz r3,vxsGrmMiss(r26) ; Get remove miss count + li r25,mapRtNotFnd ; Return not found + addi r3,r3,1 ; Increment miss count + stw r3,vxsGrmMiss(r26) ; Update miss count + b hrmGReturn ; Join guest return + + .align 5 +hrmGSrchHit: + rlwinm. r0,r6,0,mpgDormant ; Is this entry dormant? + bne hrmGDormant ; Yes, nothing to disconnect + + lwz r3,vxsGrmActive(r26) ; Get active hit count + addi r3,r3,1 ; Increment active hit count + stw r3,vxsGrmActive(r26) ; Update hit count + + bt++ pf64Bitb,hrmGDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b hrmGFreePTE ; Join 64-bit path to release the PTE +hrmGDscon64: + bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +hrmGFreePTE: + mr. r3,r3 ; Was there a valid PTE? + beq hrmGDormant ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE offset + rlwinm r8,r8,0,~mpHValid ; Make the offset invalid + stw r8,mpPte(r31) ; Save invalidated PTE offset + eieio ; Synchronize all previous updates (mapInvPtexx didn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +hrmGDormant: + lwz r3,mpPAddr(r31) ; r3 <- physical 4K-page number + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r29,r3 ; Got lock on our physent? + beq-- hrmGBadPLock ; No, time to bail out + + crset cr1_eq ; cr1_eq <- previous link is the anchor + bt++ pf64Bitb,hrmGRemove64 ; Use 64-bit version on 64-bit machine + la r11,ppLink+4(r29) ; Point to chain anchor + lwz r9,ppLink+4(r29) ; Get chain anchor + rlwinm. r9,r9,0,~ppFlags ; Remove flags, yielding 32-bit physical chain pointer +hrmGRemLoop: + beq- hrmGPEMissMiss ; End of chain, this is not good + cmplw r9,r31 ; Is this the mapping to remove? + lwz r8,mpAlias+4(r9) ; Get forward chain pointer + bne hrmGRemNext ; No, chain onward + bt cr1_eq,hrmGRemRetry ; Mapping to remove is chained from anchor + stw r8,0(r11) ; Unchain gpv->phys mapping + b hrmGDelete ; Finish deleting mapping +hrmGRemRetry: + lwarx r0,0,r11 ; Get previous link + rlwimi r0,r8,0,~ppFlags ; Insert new forward pointer whilst preserving flags + stwcx. r0,0,r11 ; Update previous link + bne- hrmGRemRetry ; Lost reservation, retry + b hrmGDelete ; Finish deleting mapping + +hrmGRemNext: + la r11,mpAlias+4(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b hrmGRemLoop ; Carry on + +hrmGRemove64: + li r7,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r7,r7,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + la r11,ppLink(r29) ; Point to chain anchor + ld r9,ppLink(r29) ; Get chain anchor + andc. r9,r9,r7 ; Remove flags, yielding 64-bit physical chain pointer +hrmGRem64Lp: + beq-- hrmGPEMissMiss ; End of chain, this is not good + cmpld r9,r31 ; Is this the mapping to remove? + ld r8,mpAlias(r9) ; Get forward chain pinter + bne hrmGRem64Nxt ; No mapping to remove, chain on, dude + bt cr1_eq,hrmGRem64Rt ; Mapping to remove is chained from anchor + std r8,0(r11) ; Unchain gpv->phys mapping + b hrmGDelete ; Finish deleting mapping +hrmGRem64Rt: + ldarx r0,0,r11 ; Get previous link + and r0,r0,r7 ; Get flags + or r0,r0,r8 ; Insert new forward pointer + stdcx. r0,0,r11 ; Slam it back in + bne-- hrmGRem64Rt ; Lost reservation, retry + b hrmGDelete ; Finish deleting mapping + + .align 5 +hrmGRem64Nxt: + la r11,mpAlias(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b hrmGRem64Lp ; Carry on + +hrmGDelete: + mr r3,r29 ; r3 <- physent addr + bl mapPhysUnlock ; Unlock physent chain + lwz r3,mpFlags(r31) ; Get mapping's flags + rlwinm r3,r3,0,~mpgFlags ; Clear all guest flags + ori r3,r3,mpgFree ; Mark mapping free + stw r3,mpFlags(r31) ; Update flags + li r25,mapRtGuest ; Set return code to 'found guest mapping' + +hrmGReturn: + la r3,pmapSXlk(r27) ; r3 <- host pmap search lock phys addr + bl sxlkUnlock ; Release host pmap search lock + + mr r3,r25 ; r3 <- return code + bt++ pf64Bitb,hrmGRtn64 ; Handle 64-bit separately + mtmsr r17 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b hrmRetnCmn ; Nothing to do now but pop a frame and return +hrmGRtn64: mtmsrd r17 ; Restore 'rupts, translation, 32-bit mode + b hrmRetnCmn ; Join common return + +hrmGBadPLock: +hrmGPEMissMiss: + lis r0,hi16(Choke) ; Seen the arrow on the doorpost + ori r0,r0,lo16(Choke) ; Sayin' "THIS LAND IS CONDEMNED" + li r3,failMapping ; All the way from New Orleans + sc ; To Jeruselem /* @@ -1533,12 +1810,12 @@ LEXT(hw_purge_phys) bt++ pf64Bitb,hppSF ; skip if 64-bit (only they take the hint) lwz r12,ppLink+4(r3) ; Grab the pointer to the first mapping - li r0,0x3F ; Set the bottom stuff to clear + li r0,ppFlags ; Set the bottom stuff to clear b hppJoin ; Join the common... -hppSF: li r0,0xFF +hppSF: li r0,ppLFAmask ld r12,ppLink(r3) ; Get the pointer to the first mapping - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F hppJoin: andc. r12,r12,r0 ; Clean and test link beq-- hppNone ; There are no more mappings on physical page @@ -1584,7 +1861,7 @@ hppSF3: mtmsrd r11 ; Restore enables/translation/etc. hppRetnCmn: lwz r12,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Restore the return - li r3,0 ; Clear high order mapping address because we are 32-bit + li r3,mapRtEmpty ; Physent chain is empty mtlr r12 ; Restore the return lwz r1,0(r1) ; Pop the stack blr ; Leave... @@ -1638,6 +1915,12 @@ LEXT(hw_purge_map) stw r6,FM_ARG0+0x44(r1) ; Save address to save next mapped vaddr stw r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hpmPanic ; Call not valid for guest shadow assist pmap +#endif + bt++ pf64Bitb,hpmSF1 ; skip if 64-bit (only they take the hint) lwz r9,pmapvr+4(r3) ; Get conversion mask b hpmSF1x ; Done... @@ -1676,13 +1959,18 @@ hpmCNext: bne++ cr1,hpmSearch ; There is another to check... b hrmNotFound ; No more in pmap to check... hpmGotOne: lwz r20,mpFlags(r3) ; Get the flags - andi. r9,r20,lo16(mpSpecial|mpNest|mpPerm|mpBlock) ; Are we allowed to remove it? + andi. r0,r20,lo16(mpType|mpPerm) ; cr0_eq <- normal mapping && !permanent rlwinm r21,r20,8,24,31 ; Extract the busy count cmplwi cr2,r21,0 ; Is it busy? crand cr0_eq,cr2_eq,cr0_eq ; not busy and can be removed? beq++ hrmGotX ; Found, branch to remove the mapping... b hpmCNext ; Nope... +hpmPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc + /* * mapping *hw_purge_space(physent, pmap) - remove a mapping from the system based upon address space * @@ -1747,6 +2035,12 @@ LEXT(hw_purge_space) stw r6,FM_ARG0+0x44(r1) ; Save address to save next mapped vaddr stw r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r4) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hpsPanic ; Call not valid for guest shadow assist pmap +#endif + bt++ pf64Bitb,hpsSF1 ; skip if 64-bit (only they take the hint) lwz r9,pmapvr+4(r4) ; Get conversion mask for pmap @@ -1767,7 +2061,7 @@ hpsSF1x: bl EXT(mapSetUp) ; Turn off interrupts, translation, and possibly e lwz r12,ppLink+4(r3) ; Grab the pointer to the first mapping -hpsSrc32: rlwinm. r12,r12,0,0,25 ; Clean and test mapping address +hpsSrc32: rlwinm. r12,r12,0,~ppFlags ; Clean and test mapping address beq hpsNone ; Did not find one... lhz r10,mpSpace(r12) ; Get the space @@ -1780,9 +2074,9 @@ hpsSrc32: rlwinm. r12,r12,0,0,25 ; Clean and test mapping address .align 5 -hpsSF: li r0,0xFF +hpsSF: li r0,ppLFAmask ld r12,ppLink(r3) ; Get the pointer to the first mapping - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F hpsSrc64: andc. r12,r12,r0 ; Clean and test mapping address beq hpsNone ; Did not find one... @@ -1823,11 +2117,156 @@ hpsSF3: mtmsrd r11 ; Restore enables/translation/etc. hpsRetnCmn: lwz r12,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Restore the return - li r3,0 ; Set return code + li r3,mapRtEmpty ; No mappings for specified pmap on physent chain mtlr r12 ; Restore the return lwz r1,0(r1) ; Pop the stack blr ; Leave... +hpsPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc + +/* + * mapping *hw_scrub_guest(physent, pmap) - remove first guest mapping associated with host + * on this physent chain + * + * Locates the first guest mapping on the physent chain that is associated with the + * specified host pmap. If this succeeds, the mapping is removed by joining the general + * remove path; otherwise, we return NULL. The caller is expected to invoke this entry + * repeatedly until no additional guest mappings that match our criteria are removed. + * + * Because this entry point exits through hw_rem_map, our prolog pushes its frame. + * + * Parameters: + * r3 : physent, 32-bit kernel virtual address + * r4 : host pmap, 32-bit kernel virtual address + * + * Volatile register usage (for linkage through hrmJoin): + * r4 : high-order 32 bits of guest virtual address + * r5 : low-order 32 bits of guest virtual address + * r11: saved MSR image + * + * Non-volatile register usage: + * r26: VMM extension block's physical address + * r27: host pmap's physical address + * r28: guest pmap's physical address + * + */ + + .align 5 + .globl EXT(hw_scrub_guest) + +LEXT(hw_scrub_guest) + stwu r1,-(FM_ALIGN(hrmStackSize)+FM_SIZE)(r1) ; Make some space on the stack + mflr r0 ; Save the link register + stw r15,FM_ARG0+0x00(r1) ; Save a register + stw r16,FM_ARG0+0x04(r1) ; Save a register + stw r17,FM_ARG0+0x08(r1) ; Save a register + mfsprg r2,2 ; Get feature flags + stw r18,FM_ARG0+0x0C(r1) ; Save a register + stw r19,FM_ARG0+0x10(r1) ; Save a register + stw r20,FM_ARG0+0x14(r1) ; Save a register + stw r21,FM_ARG0+0x18(r1) ; Save a register + stw r22,FM_ARG0+0x1C(r1) ; Save a register + mtcrf 0x02,r2 ; move pf64Bit cr6 + stw r23,FM_ARG0+0x20(r1) ; Save a register + stw r24,FM_ARG0+0x24(r1) ; Save a register + stw r25,FM_ARG0+0x28(r1) ; Save a register + stw r26,FM_ARG0+0x2C(r1) ; Save a register + stw r27,FM_ARG0+0x30(r1) ; Save a register + li r6,0 ; Set no next address return + stw r28,FM_ARG0+0x34(r1) ; Save a register + stw r29,FM_ARG0+0x38(r1) ; Save a register + stw r30,FM_ARG0+0x3C(r1) ; Save a register + stw r31,FM_ARG0+0x40(r1) ; Save a register + stw r6,FM_ARG0+0x44(r1) ; Save address to save next mapped vaddr + stw r0,(FM_ALIGN(hrmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return + + lwz r11,pmapVmmExt(r4) ; get VMM pmap extension block vaddr + + bt++ pf64Bitb,hsg64Salt ; Test for 64-bit machine + lwz r26,pmapVmmExtPhys+4(r4) ; Get VMM pmap extension block paddr + lwz r9,pmapvr+4(r4) ; Get 32-bit virt<->real conversion salt + b hsgStart ; Get to work + +hsg64Salt: ld r26,pmapVmmExtPhys(r4) ; Get VMM pmap extension block paddr + ld r9,pmapvr+4(r4) ; Get 64-bit virt<->real conversion salt + +hsgStart: bl EXT(mapSetUp) ; Disable 'rupts, translation, enter 64-bit mode + xor r27,r4,r9 ; Convert host pmap_t virt->real + bl mapPhysLock ; Lock the physent + + bt++ pf64Bitb,hsg64Scan ; Test for 64-bit machine + + lwz r12,ppLink+4(r3) ; Grab the pointer to the first mapping +hsg32Loop: rlwinm. r12,r12,0,~ppFlags ; Clean and test mapping address + beq hsg32Miss ; Did not find one... + lwz r8,mpFlags(r12) ; Get mapping's flags + lhz r7,mpSpace(r12) ; Get mapping's space id + rlwinm r8,r8,0,mpType ; Extract mapping's type code + lis r28,hi16(EXT(pmapTrans)) ; Get the top of the start of the pmap hash to pmap translate table + xori r8,r8,mpGuest ; Is it a guest mapping? + ori r28,r28,lo16(EXT(pmapTrans)) ; Get the top of the start of the pmap hash to pmap translate table + slwi r9,r7,2 ; Multiply space by 4 + lwz r28,0(r28) ; Get the actual translation map + lwz r4,mpVAddr(r12) ; Get the top of the vaddr + slwi r7,r7,3 ; Multiply space by 8 + lwz r5,mpVAddr+4(r12) ; Get the bottom of the vaddr + add r7,r7,r9 ; Get correct displacement into translate table + add r28,r28,r7 ; Point to the pmap translation + lwz r28,pmapPAddr+4(r28) ; Get guest pmap paddr + lwz r7,pmapVmmExtPhys+4(r28) ; Get VMM extension block paddr + xor r7,r7,r26 ; Is guest associated with specified host? + or. r7,r7,r8 ; Guest mapping && associated with host? + lwz r12,mpAlias+4(r12) ; Chain on to the next + bne hsg32Loop ; Try next mapping on alias chain + +hsg32Hit: bl mapPhysUnlock ; Unlock physent chain + b hrmJoin ; Join common path for mapping removal + + .align 5 +hsg32Miss: bl mapPhysUnlock ; Unlock physent chain + mtmsr r11 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + li r3,mapRtEmpty ; No mappings found matching specified criteria + b hrmRetnCmn ; Exit through common epilog + + .align 5 +hsg64Scan: li r6,ppLFAmask ; Get lock, flag, attribute mask seed + ld r12,ppLink(r3) ; Grab the pointer to the first mapping + rotrdi r6,r6,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F +hsg64Loop: andc. r12,r12,r6 ; Clean and test mapping address + beq hsg64Miss ; Did not find one... + lwz r8,mpFlags(r12) ; Get mapping's flags + lhz r7,mpSpace(r12) ; Get mapping's space id + rlwinm r8,r8,0,mpType ; Extract mapping's type code + lis r28,hi16(EXT(pmapTrans)) ; Get the top of the start of the pmap hash to pmap translate table + xori r8,r8,mpGuest ; Is it a guest mapping? + ori r28,r28,lo16(EXT(pmapTrans)) ; Get the top of the start of the pmap hash to pmap translate table + slwi r9,r7,2 ; Multiply space by 4 + lwz r28,0(r28) ; Get the actual translation map + lwz r4,mpVAddr(r12) ; Get the top of the vaddr + slwi r7,r7,3 ; Multiply space by 8 + lwz r5,mpVAddr+4(r12) ; Get the bottom of the vaddr + add r7,r7,r9 ; Get correct displacement into translate table + add r28,r28,r7 ; Point to the pmap translation + ld r28,pmapPAddr(r28) ; Get guest pmap paddr + ld r7,pmapVmmExtPhys(r28) ; Get VMM extension block paddr + xor r7,r7,r26 ; Is guest associated with specified host? + or. r7,r7,r8 ; Guest mapping && associated with host? + ld r12,mpAlias(r12) ; Chain on to the next + bne hsg64Loop ; Try next mapping on alias chain + +hsg64Hit: bl mapPhysUnlock ; Unlock physent chain + b hrmJoin ; Join common path for mapping removal + + .align 5 +hsg64Miss: bl mapPhysUnlock ; Unlock physent chain + mtmsr r11 ; Restore 'rupts, translation + li r3,mapRtEmpty ; No mappings found matching specified criteria + b hrmRetnCmn ; Exit through common epilog + /* * mapping *hw_find_space(physent, space) - finds the first mapping on physent for specified space @@ -1863,7 +2302,7 @@ LEXT(hw_find_space) lwz r12,ppLink+4(r3) ; Grab the pointer to the first mapping -hfsSrc32: rlwinm. r12,r12,0,0,25 ; Clean and test mapping address +hfsSrc32: rlwinm. r12,r12,0,~ppFlags ; Clean and test mapping address beq hfsNone ; Did not find one... lhz r10,mpSpace(r12) ; Get the space @@ -1876,9 +2315,9 @@ hfsSrc32: rlwinm. r12,r12,0,0,25 ; Clean and test mapping address .align 5 -hfsSF: li r0,0xFF +hfsSF: li r0,ppLFAmask ld r12,ppLink(r3) ; Get the pointer to the first mapping - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F hfsSrc64: andc. r12,r12,r0 ; Clean and test mapping address beq hfsNone ; Did not find one... @@ -1924,12 +2363,27 @@ hfsSF3: mtmsrd r11 ; Restore enables/translation/etc. ; hfsRetnCmn: mr r3,r12 ; Get the mapping or a 0 if we failed + +#if DEBUG + mr. r3,r3 ; Anything to return? + beq hfsRetnNull ; Nope + lwz r11,mpFlags(r3) ; Get mapping flags + rlwinm r0,r11,0,mpType ; Isolate the mapping type + cmplwi r0,mpGuest ; Shadow guest mapping? + beq hfsPanic ; Yup, kick the bucket +hfsRetnNull: +#endif + lwz r12,(FM_SIZE+FM_LR_SAVE)(r1) ; Restore the return mtlr r12 ; Restore the return lwz r1,0(r1) ; Pop the stack blr ; Leave... +hfsPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc ; ; mapping *hw_find_map(pmap, va, *nextva) - Looks up a vaddr in a pmap @@ -1952,6 +2406,12 @@ LEXT(hw_find_map) stw r31,FM_ARG0+0x18(r1) ; Save a register stw r0,(FM_ALIGN((31-26+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hfmPanic ; Call not valid for guest shadow assist pmap +#endif + lwz r6,pmapvr(r3) ; Get the first part of the VR translation for pmap lwz r7,pmapvr+4(r3) ; Get the second part @@ -2039,9 +2499,109 @@ hfmReturnC: stw r29,0(r25) ; Save the top of the next va hfmBadLock: li r3,1 ; Set lock time out error code b hfmReturn ; Leave.... +hfmPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc + + +/* + * void hw_clear_maps(void) + * + * Remove all mappings for all phys entries. + * + * + */ + + .align 5 + .globl EXT(hw_clear_maps) + +LEXT(hw_clear_maps) + mflr r10 ; Save the link register + mfcr r9 ; Save the condition register + bl EXT(mapSetUp) ; Turn off interrupts, translation, and possibly enter 64-bit + + lis r5,hi16(EXT(pmap_mem_regions)) ; Point to the start of the region table + ori r5,r5,lo16(EXT(pmap_mem_regions)) ; Point to the start of the region table + +hcmNextRegion: + lwz r3,mrPhysTab(r5) ; Get the actual table address + lwz r0,mrStart(r5) ; Get start of table entry + lwz r4,mrEnd(r5) ; Get end of table entry + addi r5,r5,mrSize ; Point to the next regions + + cmplwi r3,0 ; No more regions? + beq-- hcmDone ; Leave... + + sub r4,r4,r0 ; Calculate physical entry count + addi r4,r4,1 + mtctr r4 + + bt++ pf64Bitb,hcmNextPhys64 ; 64-bit version + + +hcmNextPhys32: + lwz r4,ppLink+4(r3) ; Grab the pointer to the first mapping + addi r3,r3,physEntrySize ; Next phys_entry + +hcmNextMap32: + rlwinm. r4,r4,0,0,25 ; Clean and test mapping address + beq hcmNoMap32 ; Did not find one... + + lwz r0,mpPte(r4) ; Grab the offset to the PTE + rlwinm r0,r0,0,~mpHValid ; Clear out valid bit + stw r0,mpPte(r4) ; Get the quick pointer again + + lwz r4,mpAlias+4(r4) ; Chain on to the next + b hcmNextMap32 ; Check it out... +hcmNoMap32: + bdnz hcmNextPhys32 + b hcmNextRegion + + + .align 5 +hcmNextPhys64: + li r0,ppLFAmask ; Get mask to clean up mapping pointer + ld r4,ppLink(r3) ; Get the pointer to the first mapping + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + addi r3,r3,physEntrySize ; Next phys_entry + +hcmNextMap64: + andc. r4,r4,r0 ; Clean and test mapping address + beq hcmNoMap64 ; Did not find one... + + lwz r0,mpPte(r4) ; Grab the offset to the PTE + rlwinm r0,r0,0,~mpHValid ; Clear out valid bit + stw r0,mpPte(r4) ; Get the quick pointer again + + ld r4,mpAlias(r4) ; Chain on to the next + li r0,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + b hcmNextMap64 ; Check it out... +hcmNoMap64: + bdnz hcmNextPhys64 + b hcmNextRegion + + + .align 5 +hcmDone: + mtlr r10 ; Restore the return + mtcr r9 ; Restore the condition register + bt++ pf64Bitb,hcmDone64 ; 64-bit version +hcmDone32: + mtmsr r11 ; Restore translation/mode/etc. + isync + blr ; Leave... + +hcmDone64: + mtmsrd r11 ; Restore translation/mode/etc. + isync + blr ; Leave... + + /* - * unsigned int hw_walk_phys(pp, preop, op, postop, parm) + * unsigned int hw_walk_phys(pp, preop, op, postop, parm, opmod) * walks all mapping for a physical page and performs * specified operations on each. * @@ -2051,6 +2611,8 @@ hfmBadLock: li r3,1 ; Set lock time out error code * op is the operation to perform on each mapping during walk * postop is operation to perform in the phsyent after walk. this would be * used to set or reset the RC bits. + * opmod modifies the action taken on any connected PTEs visited during + * the mapping walk. * * We return the RC bits from before postop is run. * @@ -2074,19 +2636,31 @@ hfmBadLock: li r3,1 ; Set lock time out error code .globl EXT(hw_walk_phys) LEXT(hw_walk_phys) - stwu r1,-(FM_ALIGN((31-25+1)*4)+FM_SIZE)(r1) ; Make some space on the stack + stwu r1,-(FM_ALIGN((31-24+1)*4)+FM_SIZE)(r1) ; Make some space on the stack mflr r0 ; Save the link register - stw r25,FM_ARG0+0x00(r1) ; Save a register - stw r26,FM_ARG0+0x04(r1) ; Save a register - stw r27,FM_ARG0+0x08(r1) ; Save a register - stw r28,FM_ARG0+0x0C(r1) ; Save a register + stw r24,FM_ARG0+0x00(r1) ; Save a register + stw r25,FM_ARG0+0x04(r1) ; Save a register + stw r26,FM_ARG0+0x08(r1) ; Save a register + stw r27,FM_ARG0+0x0C(r1) ; Save a register + mr r24,r8 ; Save the parm mr r25,r7 ; Save the parm - stw r29,FM_ARG0+0x10(r1) ; Save a register - stw r30,FM_ARG0+0x14(r1) ; Save a register - stw r31,FM_ARG0+0x18(r1) ; Save a register - stw r0,(FM_ALIGN((31-25+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return + stw r28,FM_ARG0+0x10(r1) ; Save a register + stw r29,FM_ARG0+0x14(r1) ; Save a register + stw r30,FM_ARG0+0x18(r1) ; Save a register + stw r31,FM_ARG0+0x1C(r1) ; Save a register + stw r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return bl EXT(mapSetUp) ; Turn off interrupts, translation, and possibly enter 64-bit + + mfsprg r26,0 ; (INSTRUMENTATION) + lwz r27,hwWalkPhys(r26) ; (INSTRUMENTATION) + addi r27,r27,1 ; (INSTRUMENTATION) + stw r27,hwWalkPhys(r26) ; (INSTRUMENTATION) + la r26,hwWalkFull(r26) ; (INSTRUMENTATION) + slwi r12,r24,2 ; (INSTRUMENTATION) + lwzx r27,r26,r12 ; (INSTRUMENTATION) + addi r27,r27,1 ; (INSTRUMENTATION) + stwx r27,r26,r12 ; (INSTRUMENTATION) mr r26,r11 ; Save the old MSR lis r27,hi16(hwpOpBase) ; Get high order of op base @@ -2107,13 +2681,37 @@ LEXT(hw_walk_phys) bctrl ; Call preop routine bne- hwpEarly32 ; preop says to bail now... - + + cmplwi r24,hwpMergePTE ; Classify operation modifier mtctr r27 ; Set up the op function address lwz r31,ppLink+4(r3) ; Grab the pointer to the first mapping + blt hwpSrc32 ; Do TLB invalidate/purge/merge/reload for each mapping + beq hwpMSrc32 ; Do TLB merge for each mapping + +hwpQSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address + beq hwpNone32 ; Did not find one... -hwpSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address + bctrl ; Call the op function + + bne- hwpEarly32 ; op says to bail now... + lwz r31,mpAlias+4(r31) ; Chain on to the next + b hwpQSrc32 ; Check it out... + + .align 5 +hwpMSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address beq hwpNone32 ; Did not find one... + + bl mapMergeRC32 ; Merge reference and change into mapping and physent + bctrl ; Call the op function + + bne- hwpEarly32 ; op says to bail now... + lwz r31,mpAlias+4(r31) ; Chain on to the next + b hwpMSrc32 ; Check it out... + .align 5 +hwpSrc32: rlwinm. r31,r31,0,~ppFlags ; Clean and test mapping address + beq hwpNone32 ; Did not find one... + ; ; Note: mapInvPte32 returns the PTE in R3 (or 0 if none), PTE high in R4, ; PTE low in R5. The PCA address is in R7. The PTEG come back locked. @@ -2170,13 +2768,37 @@ hwpEarly32: lwz r30,ppLink+4(r29) ; Save the old RC hwp64: bctrl ; Call preop routine bne-- hwpEarly64 ; preop says to bail now... + cmplwi r24,hwpMergePTE ; Classify operation modifier mtctr r27 ; Set up the op function address - li r0,0xFF + li r24,ppLFAmask ld r31,ppLink(r3) ; Get the pointer to the first mapping - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r24,r24,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + blt hwpSrc64 ; Do TLB invalidate/purge/merge/reload for each mapping + beq hwpMSrc64 ; Do TLB merge for each mapping -hwpSrc64: andc. r31,r31,r0 ; Clean and test mapping address +hwpQSrc64: andc. r31,r31,r24 ; Clean and test mapping address + beq hwpNone64 ; Did not find one... + + bctrl ; Call the op function + + bne-- hwpEarly64 ; op says to bail now... + ld r31,mpAlias(r31) ; Chain on to the next + b hwpQSrc64 ; Check it out... + + .align 5 +hwpMSrc64: andc. r31,r31,r24 ; Clean and test mapping address + beq hwpNone64 ; Did not find one... + + bl mapMergeRC64 ; Merge reference and change into mapping and physent + bctrl ; Call the op function + + bne-- hwpEarly64 ; op says to bail now... + ld r31,mpAlias(r31) ; Chain on to the next + b hwpMSrc64 ; Check it out... + + .align 5 +hwpSrc64: andc. r31,r31,r24 ; Clean and test mapping address beq hwpNone64 ; Did not find one... ; ; Note: mapInvPte64 returns the PTE in R3 (or 0 if none), PTE high in R4, @@ -2200,8 +2822,6 @@ hwpSrc64: andc. r31,r31,r0 ; Clean and test mapping address hwpNxt64: bne-- cr1,hwpEarly64 ; op says to bail now... ld r31,mpAlias(r31) ; Chain on to the next - li r0,0xFF - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F b hwpSrc64 ; Check it out... .align 5 @@ -2227,16 +2847,17 @@ hwpEarly64: lwz r30,ppLink+4(r29) ; Save the old RC mtmsrd r26 ; Restore translation/mode/etc. isync -hwpReturn: lwz r0,(FM_ALIGN((31-25+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Restore the return - lwz r25,FM_ARG0+0x00(r1) ; Restore a register - lwz r26,FM_ARG0+0x04(r1) ; Restore a register +hwpReturn: lwz r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Restore the return + lwz r24,FM_ARG0+0x00(r1) ; Restore a register + lwz r25,FM_ARG0+0x04(r1) ; Restore a register + lwz r26,FM_ARG0+0x08(r1) ; Restore a register mr r3,r30 ; Pass back the RC - lwz r27,FM_ARG0+0x08(r1) ; Restore a register - lwz r28,FM_ARG0+0x0C(r1) ; Restore a register + lwz r27,FM_ARG0+0x0C(r1) ; Restore a register + lwz r28,FM_ARG0+0x10(r1) ; Restore a register mtlr r0 ; Restore the return - lwz r29,FM_ARG0+0x10(r1) ; Restore a register - lwz r30,FM_ARG0+0x14(r1) ; Restore a register - lwz r31,FM_ARG0+0x18(r1) ; Restore a register + lwz r29,FM_ARG0+0x14(r1) ; Restore a register + lwz r30,FM_ARG0+0x18(r1) ; Restore a register + lwz r31,FM_ARG0+0x1C(r1) ; Restore a register lwz r1,0(r1) ; Pop the stack blr ; Leave... @@ -2267,7 +2888,7 @@ hwpNoop: cmplw r0,r0 ; Make sure CR0_EQ is set ; We changed the attributes of a mapped page. Make sure there are no cache paradoxes. ; NOTE: Do we have to deal with i-cache here? -hwpSAM: li r11,4096 ; Get page size +hwpSAM: li r11,4096 ; Get page size hwpSAMinvd: sub. r11,r11,r9 ; Back off a line dcbf r11,r5 ; Flush the line in the data cache @@ -2275,7 +2896,7 @@ hwpSAMinvd: sub. r11,r11,r9 ; Back off a line sync ; Make sure it is done - li r11,4096 ; Get page size + li r11,4096 ; Get page size hwpSAMinvi: sub. r11,r11,r9 ; Back off a line icbi r11,r5 ; Flush the line in the icache @@ -2287,17 +2908,11 @@ hwpSAMinvi: sub. r11,r11,r9 ; Back off a line blr ; Return... -; Function 1 - Set protection in physent +; Function 1 - Set protection in physent (obsolete) .set .,hwpOpBase+(1*128) ; Generate error if previous function too long -hwpSPrtPhy: li r5,ppLink+4 ; Get offset for flag part of physent - -hwpSPrtPhX: lwarx r4,r5,r29 ; Get the old flags - rlwimi r4,r25,0,ppPPb-32,ppPPe-32 ; Stick in the new protection - stwcx. r4,r5,r29 ; Try to stuff it - bne-- hwpSPrtPhX ; Try again... -; Note: CR0_EQ is set because of stwcx. +hwpSPrtPhy: cmplw r0,r0 ; Make sure we return CR0_EQ blr ; Return... @@ -2308,12 +2923,12 @@ hwpSPrtPhX: lwarx r4,r5,r29 ; Get the old flags hwpSPrtMap: lwz r9,mpFlags(r31) ; Get the mapping flags lwz r8,mpVAddr+4(r31) ; Get the protection part of mapping rlwinm. r9,r9,0,mpPermb,mpPermb ; Is the mapping permanent? - li r0,lo16(mpPP) ; Get protection bits + li r0,lo16(mpN|mpPP) ; Get no-execute and protection bits crnot cr0_eq,cr0_eq ; Change CR0_EQ to true if mapping is permanent - rlwinm r2,r25,0,mpPPb-32,mpPPb-32+2 ; Position new protection + rlwinm r2,r25,0,mpNb-32,mpPPe-32 ; Isolate new no-execute and protection bits beqlr-- ; Leave if permanent mapping (before we trash R5)... - andc r5,r5,r0 ; Clear the old prot bits - or r5,r5,r2 ; Move in the prot bits + andc r5,r5,r0 ; Clear the old no-execute and prot bits + or r5,r5,r2 ; Move in the new no-execute and prot bits rlwimi r8,r5,0,20,31 ; Copy into the mapping copy cmpw r0,r0 ; Make sure we return CR0_EQ stw r8,mpVAddr+4(r31) ; Set the flag part of mapping @@ -2323,10 +2938,10 @@ hwpSPrtMap: lwz r9,mpFlags(r31) ; Get the mapping flags .set .,hwpOpBase+(3*128) ; Generate error if previous function too long -hwpSAtrPhy: li r5,ppLink+4 ; Get offset for flag part of physent +hwpSAtrPhy: li r5,ppLink ; Get offset for flag part of physent hwpSAtrPhX: lwarx r4,r5,r29 ; Get the old flags - rlwimi r4,r25,0,ppIb-32,ppGb-32 ; Stick in the new attributes + rlwimi r4,r25,0,ppIb,ppGb ; Stick in the new attributes stwcx. r4,r5,r29 ; Try to stuff it bne-- hwpSAtrPhX ; Try again... ; Note: CR0_EQ is set because of stwcx. @@ -2338,14 +2953,16 @@ hwpSAtrPhX: lwarx r4,r5,r29 ; Get the old flags hwpSAtrMap: lwz r9,mpFlags(r31) ; Get the mapping flags lwz r8,mpVAddr+4(r31) ; Get the attribute part of mapping - li r2,0x10 ; Force on coherent + li r2,mpM ; Force on coherent rlwinm. r9,r9,0,mpPermb,mpPermb ; Is the mapping permanent? li r0,lo16(mpWIMG) ; Get wimg mask crnot cr0_eq,cr0_eq ; Change CR0_EQ to true if mapping is permanent - rlwimi r2,r2,mpIb-ppIb,mpIb-32,mpIb-32 ; Copy in the cache inhibited bit + rlwimi r2,r25,32-(mpIb-32-ppIb),mpIb-32,mpIb-32 + ; Copy in the cache inhibited bit beqlr-- ; Leave if permanent mapping (before we trash R5)... andc r5,r5,r0 ; Clear the old wimg - rlwimi r2,r2,32-(mpGb-ppGb),mpGb-32,mpGb-32 ; Copy in the guarded bit + rlwimi r2,r25,32-(mpGb-32-ppGb),mpGb-32,mpGb-32 + ; Copy in the guarded bit mfsprg r9,2 ; Feature flags or r5,r5,r2 ; Move in the new wimg rlwimi r8,r5,0,20,31 ; Copy into the mapping copy @@ -2374,7 +2991,7 @@ hwpSAtrMap: lwz r9,mpFlags(r31) ; Get the mapping flags hwpCRefPhy: li r5,ppLink+4 ; Get offset for flag part of physent hwpCRefPhX: lwarx r4,r5,r29 ; Get the old flags - rlwinm r4,r4,0,ppRb+1-32,ppRb-1-32 ; Clear R + rlwinm r4,r4,0,ppRb+1-32,ppRb-1-32 ; Clear R stwcx. r4,r5,r29 ; Try to stuff it bne-- hwpCRefPhX ; Try again... ; Note: CR0_EQ is set because of stwcx. @@ -2440,7 +3057,6 @@ hwpSRefPhX: lwarx r4,r5,r29 ; Get the old flags .set .,hwpOpBase+(10*128) ; Generate error if previous function too long hwpSRefMap: lwz r8,mpVAddr+4(r31) ; Get the flag part of mapping - ori r5,r5,lo16(mpR) ; Set reference in PTE low ori r8,r8,lo16(mpR) ; Set reference in mapping cmpw r0,r0 ; Make sure we return CR0_EQ stw r8,mpVAddr+4(r31) ; Set the flag part of mapping @@ -2464,7 +3080,6 @@ hwpSCngPhX: lwarx r4,r5,r29 ; Get the old flags .set .,hwpOpBase+(12*128) ; Generate error if previous function too long hwpSCngMap: lwz r8,mpVAddr+4(r31) ; Get the flag part of mapping - ori r5,r5,lo16(mpC) ; Set change in PTE low ori r8,r8,lo16(mpC) ; Set chage in mapping cmpw r0,r0 ; Make sure we return CR0_EQ stw r8,mpVAddr+4(r31) ; Set the flag part of mapping @@ -2486,13 +3101,14 @@ hwpTRefPhy: lwz r0,ppLink+4(r29) ; Get the flags from physent hwpTRefMap: rlwinm. r0,r5,0,mpRb-32,mpRb-32 ; Isolate reference bit and see if 0 blr ; Return (CR0_EQ set to continue if reference is off)... + ; Function 15 - Test change in physent .set .,hwpOpBase+(15*128) ; Generate error if previous function too long hwpTCngPhy: lwz r0,ppLink+4(r29) ; Get the flags from physent rlwinm. r0,r0,0,ppCb-32,ppCb-32 ; Isolate change bit and see if 0 - blr ; Return (CR0_EQ set to continue if reference is off)... + blr ; Return (CR0_EQ set to continue if change is off)... ; Function 16 - Test change in mapping @@ -2500,14 +3116,63 @@ hwpTCngPhy: lwz r0,ppLink+4(r29) ; Get the flags from physent .set .,hwpOpBase+(16*128) ; Generate error if previous function too long hwpTCngMap: rlwinm. r0,r5,0,mpCb-32,mpCb-32 ; Isolate change bit and see if 0 - blr ; Return (CR0_EQ set to continue if reference is off)... + blr ; Return (CR0_EQ set to continue if change is off)... + + +; Function 17 - Test reference and change in physent .set .,hwpOpBase+(17*128) ; Generate error if previous function too long +hwpTRefCngPhy: + lwz r0,ppLink+4(r29) ; Get the flags from physent + rlwinm r0,r0,0,ppRb-32,ppCb-32 ; Isolate reference and change bits + cmplwi r0,lo16(ppR|ppC) ; cr0_eq <- ((R == 1) && (C == 1)) + crnot cr0_eq,cr0_eq ; cr0_eq <- ((R == 0) || (C == 0)) + blr ; Return (CR0_EQ set to continue if either R or C is off)... + + +; Function 18 - Test reference and change in mapping + + .set .,hwpOpBase+(18*128) ; Generate error if previous function too long +hwpTRefCngMap: + rlwinm r0,r5,0,mpRb-32,mpCb-32 ; Isolate reference and change bits from mapping + cmplwi r0,lo16(mpR|mpC) ; cr0_eq <- ((R == 1) && (C == 1)) + crnot cr0_eq,cr0_eq ; cr0_eq <- ((R == 0) || (C == 0)) + blr ; Return (CR0_EQ set to continue if either R or C is off)... + + +; Function 19 - Clear reference and change in physent + + .set .,hwpOpBase+(19*128) ; Generate error if previous function too long +hwpCRefCngPhy: + li r5,ppLink+4 ; Get offset for flag part of physent + +hwpCRefCngPhX: + lwarx r4,r5,r29 ; Get the old flags + andc r4,r4,r25 ; Clear R and C as specified by mask + stwcx. r4,r5,r29 ; Try to stuff it + bne-- hwpCRefCngPhX ; Try again... +; Note: CR0_EQ is set because of stwcx. + blr ; Return... + + +; Function 20 - Clear reference and change in mapping + + .set .,hwpOpBase+(20*128) ; Generate error if previous function too long +hwpCRefCngMap: + srwi r0,r25,(ppRb - mpRb) ; Align reference/change clear mask (phys->map) + lwz r8,mpVAddr+4(r31) ; Get the flag part of mapping + andc r5,r5,r0 ; Clear in PTE copy + andc r8,r8,r0 ; and in the mapping + cmpw r0,r0 ; Make sure we return CR0_EQ + stw r8,mpVAddr+4(r31) ; Set the flag part of mapping + blr ; Return... + + .set .,hwpOpBase+(21*128) ; Generate error if previous function too long ; -; int hw_protect(pmap, va, prot, *nextva) - Changes protection on a specific mapping. +; unsigned int hw_protect(pmap, va, prot, *nextva) - Changes protection on a specific mapping. ; ; Returns: ; mapRtOK - if all is ok @@ -2534,6 +3199,12 @@ LEXT(hw_protect) stw r31,FM_ARG0+0x1C(r1) ; Save a register stw r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne hpPanic ; Call not valid for guest shadow assist pmap +#endif + lwz r6,pmapvr(r3) ; Get the first part of the VR translation for pmap lwz r7,pmapvr+4(r3) ; Get the second part @@ -2563,21 +3234,23 @@ hpSF1: mr r29,r4 ; Save top half of vaddr bl EXT(mapSearch) ; Go see if we can find it (note: R7 comes back with mpFlags) - andi. r7,r7,lo16(mpSpecial|mpNest|mpPerm|mpBlock|mpRIP) ; Are we allowed to change it or is it being removed? + rlwinm. r0,r7,0,mpType ; Is this a normal mapping? + crmove cr1_eq,cr0_eq ; cr1_eq <- this is a normal mapping + andi. r0,r7,mpPerm|mpRIP ; Is it permanent or being removed? + cror cr1_eq,cr0_eq,cr1_eq ; cr1_eq <- normal mapping and not permanent and not being removed mr. r31,r3 ; Save the mapping if we found it - cmplwi cr1,r7,0 ; Anything special going on? mr r29,r4 ; Save next va high half mr r30,r5 ; Save next va low half beq-- hpNotFound ; Not found... - bne-- cr1,hpNotAllowed ; Something special is happening... + bf-- cr1_eq,hpNotAllowed ; Something special is happening... bt++ pf64Bitb,hpDo64 ; Split for 64 bit bl mapInvPte32 ; Invalidate and lock PTEG, also merge into physent - rlwimi r5,r24,0,mpPPb-32,mpPPb-32+2 ; Stick in the new pp + rlwimi r5,r24,0,mpPPb-32,mpPPe-32 ; Stick in the new pp (note that we ignore no-execute for 32-bit) mr. r3,r3 ; Was there a previously valid PTE? stb r5,mpVAddr+7(r31) ; Set the new pp field (do not muck with the rest) @@ -2602,7 +3275,7 @@ hpNoOld32: la r3,pmapSXlk(r28) ; Point to the pmap search lock hpDo64: bl mapInvPte64 ; Invalidate and lock PTEG, also merge into physent - rldimi r5,r24,0,mpPPb ; Stick in the new pp + rldimi r5,r24,0,mpNb ; Stick in the new no-exectue and pp bits mr. r3,r3 ; Was there a previously valid PTE? stb r5,mpVAddr+7(r31) ; Set the new pp field (do not muck with the rest) @@ -2666,12 +3339,18 @@ hpNotAllowed: bl sxlkUnlock ; Unlock the search list li r3,mapRtBlock ; Assume it was a block - andi. r7,r7,lo16(mpBlock) ; Is this a block? - bne++ hpReturn ; Yes, leave... + rlwinm r0,r7,0,mpType ; Isolate mapping type + cmplwi r0,mpBlock ; Is this a block mapping? + beq++ hpReturn ; Yes, leave... li r3,mapRtPerm ; Set that we hit a permanent page b hpReturn ; Leave.... +hpPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc + ; ; int hw_test_rc(pmap, va, reset) - tests RC on a specific va @@ -2698,6 +3377,12 @@ LEXT(hw_test_rc) stw r31,FM_ARG0+0x1C(r1) ; Save a register stw r0,(FM_ALIGN((31-24+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return +#if DEBUG + lwz r11,pmapFlags(r3) ; Get pmaps flags + rlwinm. r11,r11,0,pmapVMgsaa ; Is guest shadow assist active? + bne htrPanic ; Call not valid for guest shadow assist pmap +#endif + lwz r6,pmapvr(r3) ; Get the first part of the VR translation for pmap lwz r7,pmapvr+4(r3) ; Get the second part @@ -2728,12 +3413,14 @@ htrSF1: mr r29,r4 ; Save top half of vaddr bl EXT(mapSearch) ; Go see if we can find it (R7 comes back with mpFlags) - andi. r0,r7,lo16(mpSpecial|mpNest|mpPerm|mpBlock|mpRIP) ; Are we allowed to change it or is it being removed? + rlwinm. r0,r7,0,mpType ; Is this a normal mapping? + crmove cr1_eq,cr0_eq ; cr1_eq <- this is a normal mapping + andi. r0,r7,mpPerm|mpRIP ; Is it permanent or being removed? + crand cr1_eq,cr0_eq,cr1_eq ; cr1_eq <- normal mapping and not permanent and not being removed mr. r31,r3 ; Save the mapping if we found it - cmplwi cr1,r0,0 ; Are we removing it? - crorc cr0_eq,cr0_eq,cr1_eq ; Did we not find it or is it being removed? + crandc cr1_eq,cr1_eq,cr0_eq ; cr1_eq <- found & normal & not permanent & not being removed - bt-- cr0_eq,htrNotFound ; Not found, something special, or being removed... + bf-- cr1_eq,htrNotFound ; Not found, something special, or being removed... bt++ pf64Bitb,htrDo64 ; Split for 64 bit @@ -2833,9 +3520,46 @@ htrNotFound: li r3,mapRtNotFnd ; Set that we did not find the requested page b htrReturn ; Leave.... - - -; +htrPanic: lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failMapping ; Show that we failed some kind of mapping thing + sc + + +; +; +; mapFindLockPN - find and lock physent for a given page number +; +; + .align 5 +mapFindLockPN: + lis r9,hi16(EXT(pmap_mem_regions)) ; Point to the start of the region table + mr r2,r3 ; Save our target + ori r9,r9,lo16(EXT(pmap_mem_regions)) ; Point to the start of the region table + +mapFLPNitr: lwz r3,mrPhysTab(r9) ; Get the actual table address + lwz r5,mrStart(r9) ; Get start of table entry + lwz r0,mrEnd(r9) ; Get end of table entry + addi r9,r9,mrSize ; Point to the next slot + cmplwi cr2,r3,0 ; Are we at the end of the table? + cmplw r2,r5 ; See if we are in this table + cmplw cr1,r2,r0 ; Check end also + sub r4,r2,r5 ; Calculate index to physical entry + beq-- cr2,mapFLPNmiss ; Leave if we did not find an entry... + cror cr0_lt,cr0_lt,cr1_gt ; Set CR0_LT if it is NOT this entry + slwi r4,r4,3 ; Get offset to physical entry + + blt-- mapFLPNitr ; Did not find it... + + add r3,r3,r4 ; Point right to the slot + b mapPhysLock ; Join common lock code + +mapFLPNmiss: + li r3,0 ; Show that we did not find it + blr ; Leave... + + +; ; mapPhysFindLock - find physent list and lock it ; R31 points to mapping ; @@ -2844,7 +3568,7 @@ htrNotFound: mapPhysFindLock: lbz r4,mpFlags+1(r31) ; Get the index into the physent bank table lis r3,ha16(EXT(pmap_mem_regions)) ; Get high order of physent table (note use of ha16 to get value appropriate for an addi of low part) - rlwinm r4,r4,2,0,29 ; Change index into byte offset + rlwinm r4,r4,2,24,29 ; Mask index bits and convert to byte offset addi r4,r4,lo16(EXT(pmap_mem_regions)) ; Get low part of address of entry add r3,r3,r4 ; Point to table entry lwz r5,mpPAddr(r31) ; Get physical page number @@ -2929,7 +3653,7 @@ mapPhyCSet32: mapPhyCSetR: lwarx r2,0,r5 ; Get the link and flags - rlwimi r4,r2,0,26,31 ; Insert the flags + rlwimi r4,r2,0,ppFlags ; Insert the flags stwcx. r4,0,r5 ; Stick them back bne-- mapPhyCSetR ; Someone else did something, try again... blr ; Return... @@ -2937,8 +3661,8 @@ mapPhyCSetR: .align 5 mapPhyCSet64: - li r0,0xFF ; Get mask to clean up mapping pointer - rldicl r0,r0,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + li r0,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F mapPhyCSet64x: ldarx r2,0,r3 ; Get the link and flags @@ -3073,7 +3797,7 @@ gotIfetch: andis. r27,r8,hi16(dsiValid) ; Clean this up to construct a DSISR va ckIfProt: and. r4,r27,r0 ; Is this a non-handlable exception? li r20,64 ; Set a limit of 64 nests for sanity check bne-- hpfExit ; Yes... (probably not though) - + ; ; Note: if the RI is on, we are accessing user space from the kernel, therefore we ; should be loading the user pmap here. @@ -3101,25 +3825,27 @@ hpfInKern: mr r22,r29 ; Save the high part of faulting address lwz r28,4(r8) ; Pick up the pmap rlwinm. r18,r18,0,SAVredriveb,SAVredriveb ; Was this a redrive? mr r25,r28 ; Save the original pmap (in case we nest) - bne hpfNest ; Segs are not ours if so... + lwz r0,pmapFlags(r28) ; Get pmap's flags + bne hpfGVtest ; Segs are not ours if so... mfsrin r4,r30 ; Get the SR that was used for translation cmplwi r4,invalSpace ; Is this a simulated segment fault? - bne++ hpfNest ; No... + bne++ hpfGVtest ; No... rlwinm r27,r27,0,dsiMissb+1,dsiMissb-1 ; Clear the PTE miss bit in DSISR - b hpfNest ; Join on up... + b hpfGVtest ; Join on up... .align 5 nop ; Push hpfNest to a 32-byte boundary nop ; Push hpfNest to a 32-byte boundary nop ; Push hpfNest to a 32-byte boundary - nop ; Push hpfNest to a 32-byte boundary - nop ; Push hpfNest to a 32-byte boundary - nop ; Push hpfNest to a 32-byte boundary hpf64a: ld r28,0(r8) ; Get the pmap pointer (64-bit) mr r25,r28 ; Save the original pmap (in case we nest) + lwz r0,pmapFlags(r28) ; Get pmap's flags + +hpfGVtest: rlwinm. r0,r0,0,pmapVMgsaa ; Using guest shadow mapping assist? + bne hpfGVxlate ; Yup, do accelerated shadow stuff ; ; This is where we loop descending nested pmaps @@ -3142,21 +3868,24 @@ hpfNest: la r3,pmapSXlk(r28) ; Point to the pmap search lock crorc cr0_eq,cr0_eq,cr1_eq ; Merge not found and removing bt-- cr0_eq,hpfNotFound ; Not found or removing... - - rlwinm. r0,r7,0,mpNestb,mpNestb ; Are we nested? + + rlwinm r0,r7,0,mpType ; Isolate mapping type + cmplwi r0,mpNest ; Are we again nested? + cmplwi cr1,r0,mpLinkage ; Are we a linkage type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- nested or linkage type? mr r26,r7 ; Get the flags for this mapping (passed back from search call) lhz r21,mpSpace(r31) ; Get the space - beq++ hpfFoundIt ; No, we found our guy... + bne++ hpfFoundIt ; No, we found our guy... #if pmapTransSize != 12 #error pmapTrans entry size is not 12 bytes!!!!!!!!!!!! It is pmapTransSize #endif - rlwinm. r0,r26,0,mpSpecialb,mpSpecialb ; Special handling? + cmplwi r0,mpLinkage ; Linkage mapping? cmplwi cr1,r20,0 ; Too many nestings? - bne-- hpfSpclNest ; Do we need to do special handling? + beq-- hpfSpclNest ; Do we need to do special handling? hpfCSrch: lhz r21,mpSpace(r31) ; Get the space lwz r8,mpNestReloc(r31) ; Get the vaddr relocation @@ -3178,11 +3907,18 @@ hpfCSrch: lhz r21,mpSpace(r31) ; Get the space add r12,r12,r10 ; Now we are pointing at the space to pmap translation entry bl sxlkUnlock ; Unlock the search list + bt++ pf64Bitb,hpfGetPmap64 ; Separate handling for 64-bit machines lwz r28,pmapPAddr+4(r12) ; Get the physical address of the new pmap - bf-- pf64Bitb,hpfNest ; Done if 32-bit... + cmplwi r28,0 ; Is the pmap paddr valid? + bne+ hpfNest ; Nest into new pmap... + b hpfBadPmap ; Handle bad pmap +hpfGetPmap64: ld r28,pmapPAddr(r12) ; Get the physical address of the new pmap - b hpfNest ; Go try the new pmap... + cmpldi r28,0 ; Is the pmap paddr valid? + bne++ hpfNest ; Nest into new pmap... + b hpfBadPmap ; Handle bad pmap + ; ; Error condition. We only allow 64 nestings. This keeps us from having to @@ -3211,6 +3947,19 @@ hpfBadLock: ori r0,r0,lo16(Choke) ; System abend li r3,failMapping ; Show mapping failure sc + +; +; Error condition - space id selected an invalid pmap - fatal +; + + .align 5 + +hpfBadPmap: + lis r0,hi16(Choke) ; System abend + ori r0,r0,lo16(Choke) ; System abend + li r3,failPmap ; Show invalid pmap + sc + ; ; Did not find any kind of mapping ; @@ -3240,8 +3989,8 @@ hpfExit: ; We need this because we can not do a relative branch .align 5 hpfSpclNest: - la r31,ppCIOmp(r19) ; Just point to the mapping - oris r27,r27,hi16(dsiSpcNest) ; Show that we had a special nesting here + la r31,ppUMWmp(r19) ; Just point to the mapping + oris r27,r27,hi16(dsiLinkage) ; Show that we had a linkage mapping here b hpfCSrch ; Go continue search... @@ -3264,14 +4013,26 @@ hpfSpclNest: #error maxAdrSpb (address space id size) is not 14 bits!!!!!!!!!!!! #endif +; Important non-volatile registers at this point ('home' means the final pmap/mapping found +; when a multi-level mapping has been successfully searched): +; r21: home space id number +; r22: relocated high-order 32 bits of vaddr +; r23: relocated low-order 32 bits of vaddr +; r25: pmap physical address +; r27: dsisr +; r28: home pmap physical address +; r29: high-order 32 bits of faulting vaddr +; r30: low-order 32 bits of faulting vaddr +; r31: mapping's physical address + .align 5 hpfFoundIt: lwz r12,pmapFlags(r28) ; Get the pmap flags so we can find the keys for this segment - rlwinm. r0,r27,0,dsiMissb,dsiMissb ; Did we actually miss the segment? +hpfGVfound: rlwinm. r0,r27,0,dsiMissb,dsiMissb ; Did we actually miss the segment? rlwinm r15,r23,18,14,17 ; Shift 32:35 (0:3) of vaddr just above space ID rlwinm r20,r21,28,22,31 ; Shift upper 10 bits of space into high order rlwinm r14,r22,18,14,31 ; Shift 0:17 of vaddr over - rlwinm r0,r27,0,dsiSpcNestb,dsiSpcNestb ; Isolate special nest flag + rlwinm r0,r27,0,dsiLinkageb,dsiLinkageb ; Isolate linkage mapping flag rlwimi r21,r21,14,4,17 ; Make a second copy of space above first cmplwi cr5,r0,0 ; Did we just do a special nesting? rlwimi r15,r22,18,0,13 ; Shift 18:31 of vaddr just above shifted 32:35 @@ -3509,8 +4270,9 @@ hpfPteMiss: lwarx r0,0,r31 ; Load the mapping flag field and. r12,r12,r3 ; Isolate the valid bit crorc cr0_eq,cr1_eq,cr0_eq ; Bail if FIP is on. Then, if already have PTE, bail... beq-- hpfAbandon ; Yes, other processor is or already has handled this... - andi. r0,r2,mpBlock ; Is this a block mapping? - crmove cr7_eq,cr0_eq ; Remember if we have a block mapping + rlwinm r0,r2,0,mpType ; Isolate mapping type + cmplwi r0,mpBlock ; Is this a block mapping? + crnot cr7_eq,cr0_eq ; Remember if we have a block mapping stwcx. r2,0,r31 ; Store the flags bne-- hpfPteMiss ; Collision, try again... @@ -3590,7 +4352,7 @@ hpfBldPTE32: bne- hpfBailOut ; Someone already did this for us... ; -; The mapSelSlot function selects a PTEG slot to use. As input, it uses R3 as a +; The mapSelSlot function selects a PTEG slot to use. As input, it uses R6 as a ; pointer to the PCA. When it returns, R3 contains 0 if an unoccupied slot was ; selected, 1 if it stole a non-block PTE, or 2 if it stole a block mapped PTE. ; R4 returns the slot index. @@ -3654,14 +4416,14 @@ hpfTLBIE32: lwarx r0,0,r9 ; Get the TLBIE lock tlbie r12 ; Invalidate it everywhere + beq- hpfNoTS32 ; Can not have MP on this machine... eieio ; Make sure that the tlbie happens first tlbsync ; Wait for everyone to catch up sync ; Make sure of it all - -hpfNoTS32: - stw r0,tlbieLock(0) ; Clear the tlbie lock + +hpfNoTS32: stw r0,tlbieLock(0) ; Clear the tlbie lock stw r7,hwSteals(r4) ; Save the steal count bgt cr5,hpfInser32 ; We just stole a block mapping... @@ -3678,7 +4440,7 @@ hpfMrgRC32: lwarx r0,0,r11 ; Get the master RC bne- hpfMrgRC32 ; Try again if we collided... -hpfFPnch: rlwinm. r7,r7,0,0,25 ; Clean and test mapping address +hpfFPnch: rlwinm. r7,r7,0,~ppFlags ; Clean and test mapping address beq- hpfLostPhys ; We could not find our mapping. Kick the bucket... lhz r10,mpSpace(r7) ; Get the space @@ -3843,14 +4605,11 @@ hpfTLBIE64: lwarx r0,0,r9 ; Get the TLBIE lock rldimi r7,r7,14,36 ; Copy address space to make hash value tlbsync ; Wait for everyone to catch up rldimi r7,r7,28,22 ; Add in a 3rd copy of the hash up top - isync srdi r2,r6,26 ; Shift original segment down to bottom ptesync ; Make sure of it all - - stw r0,tlbieLock(0) ; Clear the tlbie lock - xor r7,r7,r2 ; Compute original segment + stw r0,tlbieLock(0) ; Clear the tlbie lock stw r10,hwSteals(r4) ; Save the steal count bgt cr5,hpfInser64 ; We just stole a block mapping... @@ -3865,9 +4624,9 @@ hpfTLBIE64: lwarx r0,0,r9 ; Get the TLBIE lock rlwinm r2,r12,27,ppRb-32,ppCb-32 ; Position the new RC hpfMrgRC64: lwarx r0,0,r11 ; Get the master RC - li r12,0xFF ; Get mask to clean up alias pointer + li r12,ppLFAmask ; Get mask to clean up alias pointer or r0,r0,r2 ; Merge in the new RC - rldicl r12,r12,62,0 ; Rotate clean up mask to get 0xC0000000000000003F + rotrdi r12,r12,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F stwcx. r0,0,r11 ; Try to stick it back bne-- hpfMrgRC64 ; Try again if we collided... @@ -3956,7 +4715,162 @@ hpfAbandon: li r3,lgKillResv ; Kill off any reservation li r11,T_IN_VAIN ; Say that it was handled b EXT(PFSExit) ; Leave... +; +; Guest shadow assist -- page fault handler +; +; Here we handle a fault in a guest pmap that has the guest shadow mapping +; assist active. We locate the VMM pmap extension block, which contains an +; index over the discontiguous multi-page shadow hash table. The index +; corresponding to our vaddr is selected, and the selected group within +; that page is searched for a valid and active entry that contains +; our vaddr and space id. The search is pipelined, so that we may fetch +; the next slot while examining the current slot for a hit. The final +; search iteration is unrolled so that we don't fetch beyond the end of +; our group, which could have dire consequences depending upon where the +; physical hash page is located. +; +; The VMM pmap extension block occupies a page. Begining at offset 0, we +; have the pmap_vmm_ext proper. Aligned at the first 128-byte boundary +; after the pmap_vmm_ext is the hash table physical address index, a +; linear list of 64-bit physical addresses of the pages that comprise +; the hash table. +; +; In the event that we succesfully locate a guest mapping, we re-join +; the page fault path at hpfGVfound with the mapping's address in r31; +; otherwise, we re-join at hpfNotFound. In either case, we re-join holding +; a share of the pmap search lock for the host pmap with the host pmap's +; address in r28, the guest pmap's space id in r21, and the guest pmap's +; flags in r12. +; + + .align 5 +hpfGVxlate: + bt pf64Bitb,hpfGV64 ; Take 64-bit path for 64-bit machine + + lwz r11,pmapVmmExtPhys+4(r28) ; r11 <- VMM pmap extension block paddr + lwz r12,pmapFlags(r28) ; r12 <- guest pmap's flags + lwz r21,pmapSpace(r28) ; r21 <- guest space ID number + lwz r28,vmxHostPmapPhys+4(r11) ; r28 <- host pmap's paddr + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + rlwinm r10,r30,0,0xFFFFF000 ; r10 <- page-aligned guest vaddr + lwz r6,vxsGpf(r11) ; Get guest fault count + + srwi r3,r10,12 ; Form shadow hash: + xor r3,r3,r21 ; spaceID ^ (vaddr >> 12) + rlwinm r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r4 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r3,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + + la r3,pmapSXlk(r28) ; Point to the host pmap's search lock + bl sxlkShared ; Go get a shared lock on the mapping lists + mr. r3,r3 ; Did we get the lock? + bne- hpfBadLock ; Nope... + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + addi r6,r6,1 ; Increment guest fault count + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + stw r6,vxsGpf(r11) ; Update guest fault count + b hpfGVlp32 + + .align 5 +hpfGVlp32: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r6,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r21 ; Compare space ID + or r0,r6,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r10 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq hpfGVfound ; Join common patch on hit (r31 points to mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz hpfGVlp32 ; Iterate + + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r3,r3,mpgFree+mpgDormant ; Isolate guest free and dormant flag + xor r4,r4,r21 ; Compare space ID + or r0,r3,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r10 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq hpfGVfound ; Join common patch on hit (r31 points to mapping) + + b hpfGVmiss + + .align 5 +hpfGV64: + ld r11,pmapVmmExtPhys(r28) ; r11 <- VMM pmap extension block paddr + lwz r12,pmapFlags(r28) ; r12 <- guest pmap's flags + lwz r21,pmapSpace(r28) ; r21 <- guest space ID number + ld r28,vmxHostPmapPhys(r11) ; r28 <- host pmap's paddr + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + rlwinm r10,r30,0,0xFFFFF000 ; Form 64-bit guest vaddr + rldimi r10,r29,32,0 ; cleaning up low-order 12 bits + lwz r6,vxsGpf(r11) ; Get guest fault count + + srwi r3,r10,12 ; Form shadow hash: + xor r3,r3,r21 ; spaceID ^ (vaddr >> 12) + rlwinm r4,r3,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r4 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r3,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + + la r3,pmapSXlk(r28) ; Point to the host pmap's search lock + bl sxlkShared ; Go get a shared lock on the mapping lists + mr. r3,r3 ; Did we get the lock? + bne-- hpfBadLock ; Nope... + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + addi r6,r6,1 ; Increment guest fault count + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + stw r6,vxsGpf(r11) ; Update guest fault count + b hpfGVlp64 + + .align 5 +hpfGVlp64: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r6,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flag + xor r7,r7,r21 ; Compare space ID + or r0,r6,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r10 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq hpfGVfound ; Join common path on hit (r31 points to mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz hpfGVlp64 ; Iterate + + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r3,r3,mpgFree+mpgDormant ; Isolate guest free and dormant flag + xor r4,r4,r21 ; Compare space ID + or r0,r3,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r10 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq hpfGVfound ; Join common path on hit (r31 points to mapping) + +hpfGVmiss: + lwz r6,vxsGpfMiss(r11) ; Guest guest fault miss count + addi r6,r6,1 ; Increment miss count + stw r6,vxsGpfMiss(r11) ; Update guest fault miss count + b hpfNotFound /* * hw_set_user_space(pmap) @@ -3985,7 +4899,8 @@ LEXT(hw_set_user_space) andc r9,r10,r9 ; Turn off EE also mtmsr r9 ; Disable them isync ; Make sure FP and vec are off - mfsprg r6,0 ; Get the per_proc_info address + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block lwz r2,ppUserPmapVirt(r6) ; Get our virtual pmap address mfsprg r4,2 ; The the feature flags lwz r7,pmapvr(r3) ; Get the v to r translation @@ -4012,7 +4927,8 @@ LEXT(hw_set_user_space_dis) lwz r7,pmapvr(r3) ; Get the v to r translation mfsprg r4,2 ; The the feature flags lwz r8,pmapvr+4(r3) ; Get the v to r translation - mfsprg r6,0 ; Get the per_proc_info address + mfsprg r6,1 ; Get the current activation + lwz r6,ACT_PER_PROC(r6) ; Get the per_proc block lwz r2,ppUserPmapVirt(r6) ; Get our virtual pmap address mtcrf 0x80,r4 ; Get the Altivec flag xor r4,r3,r8 ; Get bottom of the real address of bmap anchor @@ -4343,7 +5259,6 @@ LEXT(hw_map_seg) lwz r0,pmapSpace(r3) ; Get the space, we will need it soon lwz r9,pmapFlags(r3) ; Get the flags for the keys now mfsprg r10,2 ; Get feature flags - mfsprg r12,0 ; Get the per_proc ; ; Note: the following code would problably be easier to follow if I split it, @@ -4409,7 +5324,7 @@ LEXT(hw_map_seg) xor r8,r8,r2 ; Calculate VSID bf-- pf64Bitb,hms32bit ; Skip out if 32-bit... - + mfsprg r12,0 ; Get the per_proc li r0,1 ; Prepare to set bit 0 (also to clear EE) mfmsr r6 ; Get current MSR li r2,MASK(MSR_IR)|MASK(MSR_DR) ; Get the translation bits @@ -4472,7 +5387,10 @@ hmsFreeSeg: subi r2,r7,1 ; Adjust for skipped slb 0 .align 5 -hms32bit: rlwinm r8,r8,0,8,31 ; Clean up the VSID +hms32bit: + mfsprg r12,1 ; Get the current activation + lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block + rlwinm r8,r8,0,8,31 ; Clean up the VSID rlwinm r2,r4,4,28,31 ; Isolate the segment we are setting lis r0,0x8000 ; Set bit 0 rlwimi r8,r9,28,1,3 ; Insert the keys and N bit @@ -4500,10 +5418,8 @@ hmsrupt: lwarx r6,0,r7 ; Get and reserve the valid segment flags LEXT(hw_blow_seg) mfsprg r10,2 ; Get feature flags - mfsprg r12,0 ; Get the per_proc mtcrf 0x02,r10 ; move pf64Bit and pfNoMSRirb to cr5 and 6 - addi r7,r12,validSegs ; Point to the valid segment flags directly rlwinm r9,r4,0,0,3 ; Save low segment address and make sure it is clean bf-- pf64Bitb,hbs32bit ; Skip out if 32-bit... @@ -4530,7 +5446,11 @@ LEXT(hw_blow_seg) .align 5 -hbs32bit: lwarx r4,0,r7 ; Get and reserve the valid segment flags +hbs32bit: + mfsprg r12,1 ; Get the current activation + lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block + addi r7,r12,validSegs ; Point to the valid segment flags directly + lwarx r4,0,r7 ; Get and reserve the valid segment flags rlwinm r6,r9,4,28,31 ; Convert segment to number lis r2,0x8000 ; Set up a mask srw r2,r2,r6 ; Make a mask @@ -4780,6 +5700,7 @@ ssg64Done: stw r15,pmapCCtl(r28) ; Unlock the segment cache controls ; ; We also return the original MSR in r11, the feature flags in R12, ; and CR6 set up so we can do easy branches for 64-bit +; hw_clear_maps assumes r10, r9 will not be trashed. ; .align 5 @@ -4821,6 +5742,2279 @@ msuNoMSR: mr r2,r3 ; Save R3 across call blr ; Go back all set up... +; +; Guest shadow assist -- remove all guest mappings +; +; Remove all mappings for a guest pmap from the shadow hash table. +; +; Parameters: +; r3 : address of pmap, 32-bit kernel virtual address +; +; Non-volatile register usage: +; r24 : host pmap's physical address +; r25 : VMM extension block's physical address +; r26 : physent address +; r27 : guest pmap's space ID number +; r28 : current hash table page index +; r29 : guest pmap's physical address +; r30 : saved msr image +; r31 : current mapping +; + .align 5 + .globl EXT(hw_rem_all_gv) + +LEXT(hw_rem_all_gv) + +#define graStackSize ((31-24+1)*4)+4 + stwu r1,-(FM_ALIGN(graStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(graStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + + bt++ pf64Bitb,gra64Salt ; Test for 64-bit machine + lwz r25,pmapVmmExtPhys+4(r3) ; r25 <- VMM pmap extension block paddr + lwz r9,pmapvr+4(r3) ; Get 32-bit virt<->real conversion salt + lwz r24,vmxHostPmapPhys+4(r11) ; r24 <- host pmap's paddr + b graStart ; Get to it +gra64Salt: ld r25,pmapVmmExtPhys(r3) ; r25 <- VMM pmap extension block paddr + ld r9,pmapvr(r3) ; Get 64-bit virt<->real conversion salt + ld r24,vmxHostPmapPhys(r11) ; r24 <- host pmap's paddr +graStart: bl EXT(mapSetUp) ; Disable 'rupts, translation, enter 64-bit mode + xor r29,r3,r9 ; Convert pmap_t virt->real + mr r30,r11 ; Save caller's msr image + + la r3,pmapSXlk(r24) ; r3 <- host pmap's search lock + bl sxlkExclusive ; Get lock exclusive + + lwz r3,vxsGra(r25) ; Get remove all count + addi r3,r3,1 ; Increment remove all count + stw r3,vxsGra(r25) ; Update remove all count + + li r28,0 ; r28 <- first hash page table index to search + lwz r27,pmapSpace(r29) ; r27 <- guest pmap's space ID number +graPgLoop: + la r31,VMX_HPIDX_OFFSET(r25) ; Get base of hash page physical index + rlwinm r11,r28,GV_PGIDX_SZ_LG2,GV_HPAGE_MASK + ; Convert page index into page physical index offset + add r31,r31,r11 ; Calculate page physical index entry address + bt++ pf64Bitb,gra64Page ; Separate handling for 64-bit + lwz r31,4(r31) ; r31 <- first slot in hash table page to examine + b graLoop ; Examine all slots in this page +gra64Page: ld r31,0(r31) ; r31 <- first slot in hash table page to examine + b graLoop ; Examine all slots in this page + + .align 5 +graLoop: lwz r3,mpFlags(r31) ; Get mapping's flags + lhz r4,mpSpace(r31) ; Get mapping's space ID number + rlwinm r6,r3,0,mpgFree ; Isolate guest free mapping flag + xor r4,r4,r27 ; Compare space ID number + or. r0,r6,r4 ; cr0_eq <- !free && space id match + bne graMiss ; Not one of ours, skip it + + lwz r11,vxsGraHits(r25) ; Get remove hit count + addi r11,r11,1 ; Increment remove hit count + stw r11,vxsGraHits(r25) ; Update remove hit count + + rlwinm. r0,r3,0,mpgDormant ; Is this entry dormant? + bne graRemPhys ; Yes, nothing to disconnect + + lwz r11,vxsGraActive(r25) ; Get remove active count + addi r11,r11,1 ; Increment remove hit count + stw r11,vxsGraActive(r25) ; Update remove hit count + + bt++ pf64Bitb,graDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b graFreePTE ; Join 64-bit path to release the PTE +graDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +graFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- graRemPhys ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx doesn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +graRemPhys: + lwz r3,mpPAddr(r31) ; r3 <- physical 4K-page number + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r26,r3 ; Got lock on our physent? + beq-- graBadPLock ; No, time to bail out + + crset cr1_eq ; cr1_eq <- previous link is the anchor + bt++ pf64Bitb,graRemove64 ; Use 64-bit version on 64-bit machine + la r11,ppLink+4(r26) ; Point to chain anchor + lwz r9,ppLink+4(r26) ; Get chain anchor + rlwinm. r9,r9,0,~ppFlags ; Remove flags, yielding 32-bit physical chain pointer + +graRemLoop: beq- graRemoveMiss ; End of chain, this is not good + cmplw r9,r31 ; Is this the mapping to remove? + lwz r8,mpAlias+4(r9) ; Get forward chain pointer + bne graRemNext ; No, chain onward + bt cr1_eq,graRemRetry ; Mapping to remove is chained from anchor + stw r8,0(r11) ; Unchain gpv->phys mapping + b graRemoved ; Exit loop +graRemRetry: + lwarx r0,0,r11 ; Get previous link + rlwimi r0,r8,0,~ppFlags ; Insert new forward pointer whilst preserving flags + stwcx. r0,0,r11 ; Update previous link + bne- graRemRetry ; Lost reservation, retry + b graRemoved ; Good work, let's get outta here + +graRemNext: la r11,mpAlias+4(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b graRemLoop ; Carry on + +graRemove64: + li r7,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r7,r7,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + la r11,ppLink(r26) ; Point to chain anchor + ld r9,ppLink(r26) ; Get chain anchor + andc. r9,r9,r7 ; Remove flags, yielding 64-bit physical chain pointer +graRem64Lp: beq-- graRemoveMiss ; End of chain, this is not good + cmpld r9,r31 ; Is this the mapping to remove? + ld r8,mpAlias(r9) ; Get forward chain pinter + bne graRem64Nxt ; Not mapping to remove, chain on, dude + bt cr1_eq,graRem64Rt ; Mapping to remove is chained from anchor + std r8,0(r11) ; Unchain gpv->phys mapping + b graRemoved ; Exit loop +graRem64Rt: ldarx r0,0,r11 ; Get previous link + and r0,r0,r7 ; Get flags + or r0,r0,r8 ; Insert new forward pointer + stdcx. r0,0,r11 ; Slam it back in + bne-- graRem64Rt ; Lost reservation, retry + b graRemoved ; Good work, let's go home + +graRem64Nxt: + la r11,mpAlias(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b graRem64Lp ; Carry on + +graRemoved: + mr r3,r26 ; r3 <- physent's address + bl mapPhysUnlock ; Unlock the physent (and its chain of mappings) + + lwz r3,mpFlags(r31) ; Get mapping's flags + rlwinm r3,r3,0,~mpgFlags ; Clear all guest flags + ori r3,r3,mpgFree ; Mark mapping free + stw r3,mpFlags(r31) ; Update flags + +graMiss: addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping + rlwinm. r0,r31,0,GV_PAGE_MASK ; End of hash table page? + bne graLoop ; No, examine next slot + addi r28,r28,1 ; Increment hash table page index + cmplwi r28,GV_HPAGES ; End of hash table? + bne graPgLoop ; Examine next hash table page + + la r3,pmapSXlk(r24) ; r3 <- host pmap's search lock + bl sxlkUnlock ; Release host pmap's search lock + + bt++ pf64Bitb,graRtn64 ; Handle 64-bit separately + mtmsr r30 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b graPopFrame ; Nothing to do now but pop a frame and return +graRtn64: mtmsrd r30 ; Restore 'rupts, translation, 32-bit mode +graPopFrame: + lwz r0,(FM_ALIGN(graStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + +graBadPLock: +graRemoveMiss: + lis r0,hi16(Choke) ; Dmitri, you know how we've always talked about the + ori r0,r0,lo16(Choke) ; possibility of something going wrong with the bomb? + li r3,failMapping ; The BOMB, Dmitri. + sc ; The hydrogen bomb. + + +; +; Guest shadow assist -- remove local guest mappings +; +; Remove local mappings for a guest pmap from the shadow hash table. +; +; Parameters: +; r3 : address of guest pmap, 32-bit kernel virtual address +; +; Non-volatile register usage: +; r20 : current active map word's physical address +; r21 : current hash table page address +; r22 : updated active map word in process +; r23 : active map word in process +; r24 : host pmap's physical address +; r25 : VMM extension block's physical address +; r26 : physent address +; r27 : guest pmap's space ID number +; r28 : current active map index +; r29 : guest pmap's physical address +; r30 : saved msr image +; r31 : current mapping +; + .align 5 + .globl EXT(hw_rem_local_gv) + +LEXT(hw_rem_local_gv) + +#define grlStackSize ((31-20+1)*4)+4 + stwu r1,-(FM_ALIGN(grlStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(grlStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + stw r23,FM_ARG0+0x20(r1) ; Save non-volatile r23 + stw r22,FM_ARG0+0x24(r1) ; Save non-volatile r22 + stw r21,FM_ARG0+0x28(r1) ; Save non-volatile r21 + stw r20,FM_ARG0+0x2C(r1) ; Save non-volatile r20 + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + + bt++ pf64Bitb,grl64Salt ; Test for 64-bit machine + lwz r25,pmapVmmExtPhys+4(r3) ; r25 <- VMM pmap extension block paddr + lwz r9,pmapvr+4(r3) ; Get 32-bit virt<->real conversion salt + lwz r24,vmxHostPmapPhys+4(r11) ; r24 <- host pmap's paddr + b grlStart ; Get to it +grl64Salt: ld r25,pmapVmmExtPhys(r3) ; r25 <- VMM pmap extension block paddr + ld r9,pmapvr(r3) ; Get 64-bit virt<->real conversion salt + ld r24,vmxHostPmapPhys(r11) ; r24 <- host pmap's paddr + +grlStart: bl EXT(mapSetUp) ; Disable 'rupts, translation, enter 64-bit mode + xor r29,r3,r9 ; Convert pmap_t virt->real + mr r30,r11 ; Save caller's msr image + + la r3,pmapSXlk(r24) ; r3 <- host pmap's search lock + bl sxlkExclusive ; Get lock exclusive + + li r28,0 ; r28 <- index of first active map word to search + lwz r27,pmapSpace(r29) ; r27 <- guest pmap's space ID number + b grlMap1st ; Examine first map word + + .align 5 +grlNextMap: stw r22,0(r21) ; Save updated map word + addi r28,r28,1 ; Increment map word index + cmplwi r28,GV_MAP_WORDS ; See if we're done + beq grlDone ; Yup, let's get outta here + +grlMap1st: la r20,VMX_ACTMAP_OFFSET(r25) ; Get base of active map word array + rlwinm r11,r28,GV_MAPWD_SZ_LG2,GV_MAP_MASK + ; Convert map index into map index offset + add r20,r20,r11 ; Calculate map array element address + lwz r22,0(r20) ; Get active map word at index + mr. r23,r22 ; Any active mappings indicated? + beq grlNextMap ; Nope, check next word + + la r21,VMX_HPIDX_OFFSET(r25) ; Get base of hash page physical index + rlwinm r11,r28,GV_MAP_SHIFT,GV_HPAGE_MASK + ; Extract page index from map word index and convert + ; into page physical index offset + add r21,r21,r11 ; Calculate page physical index entry address + bt++ pf64Bitb,grl64Page ; Separate handling for 64-bit + lwz r21,4(r21) ; Get selected hash table page's address + b grlLoop ; Examine all slots in this page +grl64Page: ld r21,0(r21) ; Get selected hash table page's address + b grlLoop ; Examine all slots in this page + + .align 5 +grlLoop: cntlzw r11,r23 ; Get next active bit lit in map word + cmplwi r11,32 ; Any active mappings left in this word? + lis r12,0x8000 ; Prepare mask to reset bit + srw r12,r12,r11 ; Position mask bit + andc r23,r23,r12 ; Reset lit bit + beq grlNextMap ; No bits lit, examine next map word + + slwi r31,r11,GV_SLOT_SZ_LG2 ; Get slot offset in slot band from lit bit number + rlwinm r31,r28,GV_BAND_SHIFT,GV_BAND_MASK + ; Extract slot band number from index and insert + add r31,r31,r21 ; Add hash page address yielding mapping slot address + + lwz r3,mpFlags(r31) ; Get mapping's flags + lhz r4,mpSpace(r31) ; Get mapping's space ID number + rlwinm r5,r3,0,mpgGlobal ; Extract global bit + xor r4,r4,r27 ; Compare space ID number + or. r4,r4,r5 ; (space id miss || global) + bne grlLoop ; Not one of ours, skip it + andc r22,r22,r12 ; Reset active bit corresponding to this mapping + ori r3,r3,mpgDormant ; Mark entry dormant + stw r3,mpFlags(r31) ; Update mapping's flags + + bt++ pf64Bitb,grlDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b grlFreePTE ; Join 64-bit path to release the PTE +grlDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +grlFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- grlLoop ; No valid PTE, we're done with this mapping + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx doesn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + b grlLoop ; On to next active mapping in this map word + +grlDone: la r3,pmapSXlk(r24) ; r3 <- host pmap's search lock + bl sxlkUnlock ; Release host pmap's search lock + + bt++ pf64Bitb,grlRtn64 ; Handle 64-bit separately + mtmsr r30 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b grlPopFrame ; Nothing to do now but pop a frame and return +grlRtn64: mtmsrd r30 ; Restore 'rupts, translation, 32-bit mode +grlPopFrame: + lwz r0,(FM_ALIGN(grlStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r23,FM_ARG0+0x20(r1) ; Restore non-volatile r23 + lwz r22,FM_ARG0+0x24(r1) ; Restore non-volatile r22 + lwz r21,FM_ARG0+0x28(r1) ; Restore non-volatile r21 + lwz r20,FM_ARG0+0x2C(r1) ; Restore non-volatile r20 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + + +; +; Guest shadow assist -- resume a guest mapping +; +; Locates the specified dormant mapping, and if it exists validates it and makes it +; active. +; +; Parameters: +; r3 : address of host pmap, 32-bit kernel virtual address +; r4 : address of guest pmap, 32-bit kernel virtual address +; r5 : host virtual address, high-order 32 bits +; r6 : host virtual address, low-order 32 bits +; r7 : guest virtual address, high-order 32 bits +; r8 : guest virtual address, low-order 32 bits +; r9 : guest mapping protection code +; +; Non-volatile register usage: +; r23 : VMM extension block's physical address +; r24 : physent physical address +; r25 : caller's msr image from mapSetUp +; r26 : guest mapping protection code +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : host virtual address +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + .align 5 + .globl EXT(hw_res_map_gv) + +LEXT(hw_res_map_gv) + +#define grsStackSize ((31-23+1)*4)+4 + + stwu r1,-(FM_ALIGN(grsStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(grsStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + stw r23,FM_ARG0+0x20(r1) ; Save non-volatile r23 + + rlwinm r29,r6,0,0xFFFFF000 ; Clean up low-order 32 bits of host vaddr + rlwinm r30,r8,0,0xFFFFF000 ; Clean up low-order 32 bits of guest vaddr + mr r26,r9 ; Copy guest mapping protection code + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r4) ; r9 <- guest space ID number + bt++ pf64Bitb,grs64Salt ; Handle 64-bit machine separately + lwz r23,pmapVmmExtPhys+4(r3) ; r23 <- VMM pmap extension block paddr + lwz r27,pmapvr+4(r3) ; Get 32-bit virt<->real host pmap conversion salt + lwz r28,pmapvr+4(r4) ; Get 32-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b grsStart ; Get to it + +grs64Salt: rldimi r29,r5,32,0 ; Insert high-order 32 bits of 64-bit host vaddr + rldimi r30,r7,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r23,pmapVmmExtPhys(r3) ; r23 <- VMM pmap extension block paddr + ld r27,pmapvr(r3) ; Get 64-bit virt<->real host pmap conversion salt + ld r28,pmapvr(r4) ; Get 64-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +grsStart: xor r27,r3,r27 ; Convert host pmap_t virt->real + xor r28,r4,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r25,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,grs64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b grs32SrchLp ; Let the search begin! + + .align 5 +grs32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq grsSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz grs32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && space match && virtual addr match + beq grsSrchHit ; Join common path on hit (r31 points to guest mapping) + b grsSrchMiss ; No joy in our hash group + +grs64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b grs64SrchLp ; Let the search begin! + + .align 5 +grs64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq grsSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz grs64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && space match && virtual addr match + bne grsSrchMiss ; No joy in our hash group + +grsSrchHit: + rlwinm. r0,r6,0,mpgDormant ; Is the mapping dormant? + bne grsFindHost ; Yes, nothing to disconnect + + bt++ pf64Bitb,grsDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b grsFreePTE ; Join 64-bit path to release the PTE +grsDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +grsFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- grsFindHost ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx didn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +grsFindHost: + +// We now have a dormant guest mapping that matches our space id and virtual address. Our next +// step is to locate the host mapping that completes the guest mapping's connection to a physical +// frame. The guest and host mappings must connect to the same physical frame, so they must both +// be chained on the same physent. We search the physent chain for a host mapping matching our +// host's space id and the host virtual address. If we succeed, we know that the entire chain +// of mappings (guest virtual->host virtual->physical) is valid, so the dormant mapping can be +// resumed. If we fail to find the specified host virtual->physical mapping, it is because the +// host virtual or physical address has changed since the guest mapping was suspended, so it +// is no longer valid and cannot be resumed -- we therefore delete the guest mappping and tell +// our caller that it will have to take its long path, translating the host virtual address +// through the host's skiplist and installing a new guest mapping. + + lwz r3,mpPAddr(r31) ; r3 <- physical 4K-page number + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r24,r3 ; Got lock on our physent? + beq-- grsBadPLock ; No, time to bail out + + bt++ pf64Bitb,grsPFnd64 ; 64-bit version of physent chain search + + lwz r9,ppLink+4(r24) ; Get first mapping on physent + lwz r6,pmapSpace(r27) ; Get host pmap's space id number + rlwinm r9,r9,0,~ppFlags ; Be-gone, unsightly flags +grsPELoop: mr. r12,r9 ; Got a mapping to look at? + beq- grsPEMiss ; Nope, we've missed hva->phys mapping + lwz r7,mpFlags(r12) ; Get mapping's flags + lhz r4,mpSpace(r12) ; Get mapping's space id number + lwz r5,mpVAddr+4(r12) ; Get mapping's virtual address + lwz r9,mpAlias+4(r12) ; Next mapping in physent alias chain + + rlwinm r0,r7,0,mpType ; Isolate mapping's type + rlwinm r5,r5,0,~mpHWFlags ; Bye-bye unsightly flags + xori r0,r0,mpNormal ; Normal mapping? + xor r4,r4,r6 ; Compare w/ host space id number + xor r5,r5,r29 ; Compare w/ host virtual address + or r0,r0,r4 ; r0 <- (wrong type || !space id) + or. r0,r0,r5 ; cr0_eq <- (right type && space id hit && hva hit) + beq grsPEHit ; Hit + b grsPELoop ; Iterate + +grsPFnd64: li r0,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + ld r9,ppLink(r24) ; Get first mapping on physent + lwz r6,pmapSpace(r27) ; Get pmap's space id number + andc r9,r9,r0 ; Cleanup mapping pointer +grsPELp64: mr. r12,r9 ; Got a mapping to look at? + beq-- grsPEMiss ; Nope, we've missed hva->phys mapping + lwz r7,mpFlags(r12) ; Get mapping's flags + lhz r4,mpSpace(r12) ; Get mapping's space id number + ld r5,mpVAddr(r12) ; Get mapping's virtual address + ld r9,mpAlias(r12) ; Next mapping physent alias chain + rlwinm r0,r7,0,mpType ; Isolate mapping's type + rldicr r5,r5,0,mpHWFlagsb-1 ; Bye-bye unsightly flags + xori r0,r0,mpNormal ; Normal mapping? + xor r4,r4,r6 ; Compare w/ host space id number + xor r5,r5,r29 ; Compare w/ host virtual address + or r0,r0,r4 ; r0 <- (wrong type || !space id) + or. r0,r0,r5 ; cr0_eq <- (right type && space id hit && hva hit) + beq grsPEHit ; Hit + b grsPELp64 ; Iterate + +grsPEHit: lwz r0,mpVAddr+4(r31) ; Get va byte containing protection bits + rlwimi r0,r26,0,mpPP ; Insert new protection bits + stw r0,mpVAddr+4(r31) ; Write 'em back + + eieio ; Ensure previous mapping updates are visible + lwz r0,mpFlags(r31) ; Get flags + rlwinm r0,r0,0,~mpgDormant ; Turn off dormant flag + stw r0,mpFlags(r31) ; Set updated flags, entry is now valid + + li r31,mapRtOK ; Indicate success + b grsRelPhy ; Exit through physent lock release + +grsPEMiss: crset cr1_eq ; cr1_eq <- previous link is the anchor + bt++ pf64Bitb,grsRemove64 ; Use 64-bit version on 64-bit machine + la r11,ppLink+4(r24) ; Point to chain anchor + lwz r9,ppLink+4(r24) ; Get chain anchor + rlwinm. r9,r9,0,~ppFlags ; Remove flags, yielding 32-bit physical chain pointer +grsRemLoop: beq- grsPEMissMiss ; End of chain, this is not good + cmplw r9,r31 ; Is this the mapping to remove? + lwz r8,mpAlias+4(r9) ; Get forward chain pointer + bne grsRemNext ; No, chain onward + bt cr1_eq,grsRemRetry ; Mapping to remove is chained from anchor + stw r8,0(r11) ; Unchain gpv->phys mapping + b grsDelete ; Finish deleting mapping +grsRemRetry: + lwarx r0,0,r11 ; Get previous link + rlwimi r0,r8,0,~ppFlags ; Insert new forward pointer whilst preserving flags + stwcx. r0,0,r11 ; Update previous link + bne- grsRemRetry ; Lost reservation, retry + b grsDelete ; Finish deleting mapping + + .align 5 +grsRemNext: la r11,mpAlias+4(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b grsRemLoop ; Carry on + +grsRemove64: + li r7,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r7,r7,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + la r11,ppLink(r24) ; Point to chain anchor + ld r9,ppLink(r24) ; Get chain anchor + andc. r9,r9,r7 ; Remove flags, yielding 64-bit physical chain pointer +grsRem64Lp: beq-- grsPEMissMiss ; End of chain, this is not good + cmpld r9,r31 ; Is this the mapping to remove? + ld r8,mpAlias(r9) ; Get forward chain pinter + bne grsRem64Nxt ; Not mapping to remove, chain on, dude + bt cr1_eq,grsRem64Rt ; Mapping to remove is chained from anchor + std r8,0(r11) ; Unchain gpv->phys mapping + b grsDelete ; Finish deleting mapping +grsRem64Rt: ldarx r0,0,r11 ; Get previous link + and r0,r0,r7 ; Get flags + or r0,r0,r8 ; Insert new forward pointer + stdcx. r0,0,r11 ; Slam it back in + bne-- grsRem64Rt ; Lost reservation, retry + b grsDelete ; Finish deleting mapping + + .align 5 +grsRem64Nxt: + la r11,mpAlias(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b grsRem64Lp ; Carry on + +grsDelete: + lwz r3,mpFlags(r31) ; Get mapping's flags + rlwinm r3,r3,0,~mpgFlags ; Clear all guest flags + ori r3,r3,mpgFree ; Mark mapping free + stw r3,mpFlags(r31) ; Update flags + + li r31,mapRtNotFnd ; Didn't succeed + +grsRelPhy: mr r3,r24 ; r3 <- physent addr + bl mapPhysUnlock ; Unlock physent chain + +grsRelPmap: la r3,pmapSXlk(r27) ; r3 <- host pmap search lock phys addr + bl sxlkUnlock ; Release host pmap search lock + +grsRtn: mr r3,r31 ; r3 <- result code + bt++ pf64Bitb,grsRtn64 ; Handle 64-bit separately + mtmsr r25 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b grsPopFrame ; Nothing to do now but pop a frame and return +grsRtn64: mtmsrd r25 ; Restore 'rupts, translation, 32-bit mode +grsPopFrame: + lwz r0,(FM_ALIGN(grsStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r23,FM_ARG0+0x20(r1) ; Restore non-volatile r23 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + + .align 5 +grsSrchMiss: + li r31,mapRtNotFnd ; Could not locate requested mapping + b grsRelPmap ; Exit through host pmap search lock release + +grsBadPLock: +grsPEMissMiss: + lis r0,hi16(Choke) ; Dmitri, you know how we've always talked about the + ori r0,r0,lo16(Choke) ; possibility of something going wrong with the bomb? + li r3,failMapping ; The BOMB, Dmitri. + sc ; The hydrogen bomb. + + +; +; Guest shadow assist -- add a guest mapping +; +; Adds a guest mapping. +; +; Parameters: +; r3 : address of host pmap, 32-bit kernel virtual address +; r4 : address of guest pmap, 32-bit kernel virtual address +; r5 : guest virtual address, high-order 32 bits +; r6 : guest virtual address, low-order 32 bits (with mpHWFlags) +; r7 : new mapping's flags +; r8 : physical address, 32-bit page number +; +; Non-volatile register usage: +; r22 : hash group's physical address +; r23 : VMM extension block's physical address +; r24 : mapping's flags +; r25 : caller's msr image from mapSetUp +; r26 : physent physical address +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : physical address, 32-bit 4k-page number +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + + .align 5 + .globl EXT(hw_add_map_gv) + + +LEXT(hw_add_map_gv) + +#define gadStackSize ((31-22+1)*4)+4 + + stwu r1,-(FM_ALIGN(gadStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gadStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + stw r23,FM_ARG0+0x20(r1) ; Save non-volatile r23 + stw r22,FM_ARG0+0x24(r1) ; Save non-volatile r22 + + rlwinm r30,r5,0,1,0 ; Get high-order 32 bits of guest vaddr + rlwimi r30,r6,0,0,31 ; Get low-order 32 bits of guest vaddr + mr r24,r7 ; Copy guest mapping's flags + mr r29,r8 ; Copy target frame's physical address + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r4) ; r9 <- guest space ID number + bt++ pf64Bitb,gad64Salt ; Test for 64-bit machine + lwz r23,pmapVmmExtPhys+4(r3) ; r23 <- VMM pmap extension block paddr + lwz r27,pmapvr+4(r3) ; Get 32-bit virt<->real host pmap conversion salt + lwz r28,pmapvr+4(r4) ; Get 32-bit virt<->real guest pmap conversion salt + la r22,VMX_HPIDX_OFFSET(r11) ; r22 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r22,r22,r10 ; r22 <- hash page index entry + lwz r22,4(r22) ; r22 <- hash page paddr + rlwimi r22,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r22 <- hash group paddr + b gadStart ; Get to it + +gad64Salt: ld r23,pmapVmmExtPhys(r3) ; r23 <- VMM pmap extension block paddr + ld r27,pmapvr(r3) ; Get 64-bit virt<->real host pmap conversion salt + ld r28,pmapvr(r4) ; Get 64-bit virt<->real guest pmap conversion salt + la r22,VMX_HPIDX_OFFSET(r11) ; r22 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r22,r22,r10 ; r22 <- hash page index entry + ld r22,0(r22) ; r22 <- hash page paddr + insrdi r22,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r22 <- hash group paddr + +gadStart: xor r27,r3,r27 ; Convert host pmap_t virt->real + xor r28,r4,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r25,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exlusive + + mr r31,r22 ; Prepare to search this group + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gad64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + clrrwi r12,r30,12 ; r12 <- virtual address we're searching for + b gad32SrchLp ; Let the search begin! + + .align 5 +gad32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && space match) + xor r8,r8,r12 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq gadRelPmap ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gad32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && && space match) + xor r5,r5,r12 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- free && space match && virtual addr match + beq gadRelPmap ; Join common path on hit (r31 points to guest mapping) + b gadScan ; No joy in our hash group + +gad64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + clrrdi r12,r30,12 ; r12 <- virtual address we're searching for + b gad64SrchLp ; Let the search begin! + + .align 5 +gad64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && space match) + xor r8,r8,r12 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && space match && virtual addr match + beq gadRelPmap ; Hit, let upper-level redrive sort it out + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gad64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + rlwinm r11,r6,0,mpgFree ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && && space match) + xor r5,r5,r12 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && space match && virtual addr match + bne gadScan ; No joy in our hash group + b gadRelPmap ; Hit, let upper-level redrive sort it out + +gadScan: lbz r12,mpgCursor(r22) ; Get group's cursor + rlwinm r12,r12,GV_SLOT_SZ_LG2,(GV_SLOT_MASK << GV_SLOT_SZ_LG2) + ; Prepare to address slot at cursor + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + or r2,r22,r12 ; r2 <- 1st mapping to search + lwz r3,mpFlags(r2) ; r3 <- 1st mapping slot's flags + li r11,0 ; No dormant entries found yet + b gadScanLoop ; Let the search begin! + + .align 5 +gadScanLoop: + addi r12,r12,GV_SLOT_SZ ; Calculate next slot number to search + rlwinm r12,r12,0,(GV_SLOT_MASK << GV_SLOT_SZ_LG2) + ; Trim off any carry, wrapping into slot number range + mr r31,r2 ; r31 <- current mapping's address + or r2,r22,r12 ; r2 <- next mapping to search + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags(r2) ; r3 <- next mapping slot's flags + rlwinm. r0,r6,0,mpgFree ; Test free flag + bne gadFillMap ; Join common path on hit (r31 points to free mapping) + rlwinm r0,r6,0,mpgDormant ; Dormant entry? + xori r0,r0,mpgDormant ; Invert dormant flag + or. r0,r0,r11 ; Skip all but the first dormant entry we see + bne gadNotDorm ; Not dormant or we've already seen one + mr r11,r31 ; We'll use this dormant entry if we don't find a free one first +gadNotDorm: bdnz gadScanLoop ; Iterate + + mr r31,r2 ; r31 <- final mapping's address + rlwinm. r0,r6,0,mpgFree ; Test free flag in final mapping + bne gadFillMap ; Join common path on hit (r31 points to dormant mapping) + rlwinm r0,r6,0,mpgDormant ; Dormant entry? + xori r0,r0,mpgDormant ; Invert dormant flag + or. r0,r0,r11 ; Skip all but the first dormant entry we see + bne gadCkDormant ; Not dormant or we've already seen one + mr r11,r31 ; We'll use this dormant entry if we don't find a free one first + +gadCkDormant: + mr. r31,r11 ; Get dormant mapping, if any, and test + bne gadUpCursor ; Go update the cursor, we'll take the dormant entry + +gadSteal: + lbz r12,mpgCursor(r22) ; Get group's cursor + rlwinm r12,r12,GV_SLOT_SZ_LG2,(GV_SLOT_MASK << GV_SLOT_SZ_LG2) + ; Prepare to address slot at cursor + or r31,r22,r12 ; r31 <- address of mapping to steal + + bt++ pf64Bitb,gadDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b gadFreePTE ; Join 64-bit path to release the PTE +gadDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +gadFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- gadUpCursor ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx didn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +gadUpCursor: + rlwinm r12,r31,(32-GV_SLOT_SZ_LG2),GV_SLOT_MASK + ; Recover slot number from stolen mapping's address + addi r12,r12,1 ; Increment slot number + rlwinm r12,r12,0,GV_SLOT_MASK ; Clip to slot number range + stb r12,mpgCursor(r22) ; Update group's cursor + + lwz r3,mpPAddr(r31) ; r3 <- physical 4K-page number + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r26,r3 ; Got lock on our physent? + beq-- gadBadPLock ; No, time to bail out + + crset cr1_eq ; cr1_eq <- previous link is the anchor + bt++ pf64Bitb,gadRemove64 ; Use 64-bit version on 64-bit machine + la r11,ppLink+4(r26) ; Point to chain anchor + lwz r9,ppLink+4(r26) ; Get chain anchor + rlwinm. r9,r9,0,~ppFlags ; Remove flags, yielding 32-bit physical chain pointer +gadRemLoop: beq- gadPEMissMiss ; End of chain, this is not good + cmplw r9,r31 ; Is this the mapping to remove? + lwz r8,mpAlias+4(r9) ; Get forward chain pointer + bne gadRemNext ; No, chain onward + bt cr1_eq,gadRemRetry ; Mapping to remove is chained from anchor + stw r8,0(r11) ; Unchain gpv->phys mapping + b gadDelDone ; Finish deleting mapping +gadRemRetry: + lwarx r0,0,r11 ; Get previous link + rlwimi r0,r8,0,~ppFlags ; Insert new forward pointer whilst preserving flags + stwcx. r0,0,r11 ; Update previous link + bne- gadRemRetry ; Lost reservation, retry + b gadDelDone ; Finish deleting mapping + +gadRemNext: la r11,mpAlias+4(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b gadRemLoop ; Carry on + +gadRemove64: + li r7,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r7,r7,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + la r11,ppLink(r26) ; Point to chain anchor + ld r9,ppLink(r26) ; Get chain anchor + andc. r9,r9,r7 ; Remove flags, yielding 64-bit physical chain pointer +gadRem64Lp: beq-- gadPEMissMiss ; End of chain, this is not good + cmpld r9,r31 ; Is this the mapping to remove? + ld r8,mpAlias(r9) ; Get forward chain pinter + bne gadRem64Nxt ; Not mapping to remove, chain on, dude + bt cr1_eq,gadRem64Rt ; Mapping to remove is chained from anchor + std r8,0(r11) ; Unchain gpv->phys mapping + b gadDelDone ; Finish deleting mapping +gadRem64Rt: ldarx r0,0,r11 ; Get previous link + and r0,r0,r7 ; Get flags + or r0,r0,r8 ; Insert new forward pointer + stdcx. r0,0,r11 ; Slam it back in + bne-- gadRem64Rt ; Lost reservation, retry + b gadDelDone ; Finish deleting mapping + + .align 5 +gadRem64Nxt: + la r11,mpAlias(r9) ; Point to (soon to be) previous link + crclr cr1_eq ; ~cr1_eq <- Previous link is not the anchor + mr. r9,r8 ; Does next entry exist? + b gadRem64Lp ; Carry on + +gadDelDone: + mr r3,r26 ; Get physent address + bl mapPhysUnlock ; Unlock physent chain + +gadFillMap: + lwz r12,pmapSpace(r28) ; Get guest space id number + li r2,0 ; Get a zero + stw r24,mpFlags(r31) ; Set mapping's flags + sth r12,mpSpace(r31) ; Set mapping's space id number + stw r2,mpPte(r31) ; Set mapping's pte pointer invalid + stw r29,mpPAddr(r31) ; Set mapping's physical address + bt++ pf64Bitb,gadVA64 ; Use 64-bit version on 64-bit machine + stw r30,mpVAddr+4(r31) ; Set mapping's virtual address (w/flags) + b gadChain ; Continue with chaining mapping to physent +gadVA64: std r30,mpVAddr(r31) ; Set mapping's virtual address (w/flags) + +gadChain: mr r3,r29 ; r3 <- physical frame address + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r26,r3 ; Got lock on our physent? + beq-- gadBadPLock ; No, time to bail out + + bt++ pf64Bitb,gadChain64 ; Use 64-bit version on 64-bit machine + lwz r12,ppLink+4(r26) ; Get forward chain + rlwinm r11,r12,0,~ppFlags ; Get physent's forward pointer sans flags + rlwimi r12,r31,0,~ppFlags ; Insert new mapping, preserve physent flags + stw r11,mpAlias+4(r31) ; New mapping will head chain + stw r12,ppLink+4(r26) ; Point physent to new mapping + b gadFinish ; All over now... + +gadChain64: li r7,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r7,r7,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + ld r12,ppLink(r26) ; Get forward chain + andc r11,r12,r7 ; Get physent's forward chain pointer sans flags + and r12,r12,r7 ; Isolate pointer's flags + or r12,r12,r31 ; Insert new mapping's address forming pointer + std r11,mpAlias(r31) ; New mapping will head chain + std r12,ppLink(r26) ; Point physent to new mapping + +gadFinish: eieio ; Ensure new mapping is completely visible + +gadRelPhy: mr r3,r26 ; r3 <- physent addr + bl mapPhysUnlock ; Unlock physent chain + +gadRelPmap: la r3,pmapSXlk(r27) ; r3 <- host pmap search lock phys addr + bl sxlkUnlock ; Release host pmap search lock + + bt++ pf64Bitb,gadRtn64 ; Handle 64-bit separately + mtmsr r25 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b gadPopFrame ; Nothing to do now but pop a frame and return +gadRtn64: mtmsrd r25 ; Restore 'rupts, translation, 32-bit mode +gadPopFrame: + lwz r0,(FM_ALIGN(gadStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r23,FM_ARG0+0x20(r1) ; Restore non-volatile r23 + lwz r22,FM_ARG0+0x24(r1) ; Restore non-volatile r22 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + +gadPEMissMiss: +gadBadPLock: + lis r0,hi16(Choke) ; Dmitri, you know how we've always talked about the + ori r0,r0,lo16(Choke) ; possibility of something going wrong with the bomb? + li r3,failMapping ; The BOMB, Dmitri. + sc ; The hydrogen bomb. + + +; +; Guest shadow assist -- supend a guest mapping +; +; Suspends a guest mapping. +; +; Parameters: +; r3 : address of host pmap, 32-bit kernel virtual address +; r4 : address of guest pmap, 32-bit kernel virtual address +; r5 : guest virtual address, high-order 32 bits +; r6 : guest virtual address, low-order 32 bits +; +; Non-volatile register usage: +; r26 : VMM extension block's physical address +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : caller's msr image from mapSetUp +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + + .align 5 + .globl EXT(hw_susp_map_gv) + +LEXT(hw_susp_map_gv) + +#define gsuStackSize ((31-26+1)*4)+4 + + stwu r1,-(FM_ALIGN(gsuStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gsuStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + + rlwinm r30,r6,0,0xFFFFF000 ; Clean up low-order 32 bits of guest vaddr + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r4) ; r9 <- guest space ID number + bt++ pf64Bitb,gsu64Salt ; Test for 64-bit machine + + lwz r26,pmapVmmExtPhys+4(r3) ; r26 <- VMM pmap extension block paddr + lwz r27,pmapvr+4(r3) ; Get 32-bit virt<->real host pmap conversion salt + lwz r28,pmapvr+4(r4) ; Get 32-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b gsuStart ; Get to it +gsu64Salt: rldimi r30,r5,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r26,pmapVmmExtPhys(r3) ; r26 <- VMM pmap extension block paddr + ld r27,pmapvr(r3) ; Get 64-bit virt<->real host pmap conversion salt + ld r28,pmapvr(r4) ; Get 64-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +gsuStart: xor r27,r3,r27 ; Convert host pmap_t virt->real + xor r28,r4,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r29,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gsu64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b gsu32SrchLp ; Let the search begin! + + .align 5 +gsu32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gsuSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gsu32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gsuSrchHit ; Join common path on hit (r31 points to guest mapping) + b gsuSrchMiss ; No joy in our hash group + +gsu64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b gsu64SrchLp ; Let the search begin! + + .align 5 +gsu64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gsuSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gsu64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + bne gsuSrchMiss ; No joy in our hash group + +gsuSrchHit: + bt++ pf64Bitb,gsuDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b gsuFreePTE ; Join 64-bit path to release the PTE +gsuDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +gsuFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- gsuNoPTE ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx didn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +gsuNoPTE: lwz r3,mpFlags(r31) ; Get mapping's flags + ori r3,r3,mpgDormant ; Mark entry dormant + stw r3,mpFlags(r31) ; Save updated flags + eieio ; Ensure update is visible when we unlock + +gsuSrchMiss: + la r3,pmapSXlk(r27) ; r3 <- host pmap search lock phys addr + bl sxlkUnlock ; Release host pmap search lock + + bt++ pf64Bitb,gsuRtn64 ; Handle 64-bit separately + mtmsr r29 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b gsuPopFrame ; Nothing to do now but pop a frame and return +gsuRtn64: mtmsrd r29 ; Restore 'rupts, translation, 32-bit mode +gsuPopFrame: + lwz r0,(FM_ALIGN(gsuStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + +; +; Guest shadow assist -- test guest mapping reference and change bits +; +; Locates the specified guest mapping, and if it exists gathers its reference +; and change bit, optionallyÊresetting them. +; +; Parameters: +; r3 : address of host pmap, 32-bit kernel virtual address +; r4 : address of guest pmap, 32-bit kernel virtual address +; r5 : guest virtual address, high-order 32 bits +; r6 : guest virtual address, low-order 32 bits +; r7 : reset boolean +; +; Non-volatile register usage: +; r24 : VMM extension block's physical address +; r25 : return code (w/reference and change bits) +; r26 : reset boolean +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : caller's msr image from mapSetUp +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + + .align 5 + .globl EXT(hw_test_rc_gv) + +LEXT(hw_test_rc_gv) + +#define gtdStackSize ((31-24+1)*4)+4 + + stwu r1,-(FM_ALIGN(gtdStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + + rlwinm r30,r6,0,0xFFFFF000 ; Clean up low-order 20 bits of guest vaddr + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r4) ; r9 <- guest space ID number + + bt++ pf64Bitb,gtd64Salt ; Test for 64-bit machine + + lwz r24,pmapVmmExtPhys+4(r3) ; r24 <- VMM pmap extension block paddr + lwz r27,pmapvr+4(r3) ; Get 32-bit virt<->real host pmap conversion salt + lwz r28,pmapvr+4(r4) ; Get 32-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b gtdStart ; Get to it + +gtd64Salt: rldimi r30,r5,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r24,pmapVmmExtPhys(r3) ; r24 <- VMM pmap extension block paddr + ld r27,pmapvr(r3) ; Get 64-bit virt<->real host pmap conversion salt + ld r28,pmapvr(r4) ; Get 64-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +gtdStart: xor r27,r3,r27 ; Convert host pmap_t virt->real + xor r28,r4,r28 ; Convert guest pmap_t virt->real + mr r26,r7 ; Save reset boolean + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r29,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gtd64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b gtd32SrchLp ; Let the search begin! + + .align 5 +gtd32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gtdSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gtd32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gtdSrchHit ; Join common path on hit (r31 points to guest mapping) + b gtdSrchMiss ; No joy in our hash group + +gtd64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b gtd64SrchLp ; Let the search begin! + + .align 5 +gtd64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gtdSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gtd64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + bne gtdSrchMiss ; No joy in our hash group + +gtdSrchHit: + bt++ pf64Bitb,gtdDo64 ; Split for 64 bit + + bl mapInvPte32 ; Invalidate and lock PTEG, also merge into physent + + cmplwi cr1,r26,0 ; Do we want to clear RC? + lwz r12,mpVAddr+4(r31) ; Get the bottom of the mapping vaddr field + mr. r3,r3 ; Was there a previously valid PTE? + li r0,lo16(mpR|mpC) ; Get bits to clear + + and r25,r5,r0 ; Copy RC bits into result + beq++ cr1,gtdNoClr32 ; Nope... + + andc r12,r12,r0 ; Clear mapping copy of RC + andc r5,r5,r0 ; Clear PTE copy of RC + sth r12,mpVAddr+6(r31) ; Set the new RC in mapping + +gtdNoClr32: beq-- gtdNoOld32 ; No previously valid PTE... + + sth r5,6(r3) ; Store updated RC in PTE + eieio ; Make sure we do not reorder + stw r4,0(r3) ; Revalidate the PTE + + eieio ; Make sure all updates come first + stw r6,0(r7) ; Unlock PCA + +gtdNoOld32: la r3,pmapSXlk(r27) ; Point to the pmap search lock + bl sxlkUnlock ; Unlock the search list + b gtdR32 ; Join common... + + .align 5 + + +gtdDo64: bl mapInvPte64 ; Invalidate and lock PTEG, also merge into physent + + cmplwi cr1,r26,0 ; Do we want to clear RC? + lwz r12,mpVAddr+4(r31) ; Get the bottom of the mapping vaddr field + mr. r3,r3 ; Was there a previously valid PTE? + li r0,lo16(mpR|mpC) ; Get bits to clear + + and r25,r5,r0 ; Copy RC bits into result + beq++ cr1,gtdNoClr64 ; Nope... + + andc r12,r12,r0 ; Clear mapping copy of RC + andc r5,r5,r0 ; Clear PTE copy of RC + sth r12,mpVAddr+6(r31) ; Set the new RC + +gtdNoClr64: beq-- gtdNoOld64 ; Nope, no pevious pte... + + sth r5,14(r3) ; Store updated RC + eieio ; Make sure we do not reorder + std r4,0(r3) ; Revalidate the PTE + + eieio ; Make sure all updates come first + stw r6,0(r7) ; Unlock PCA + +gtdNoOld64: la r3,pmapSXlk(r27) ; Point to the pmap search lock + bl sxlkUnlock ; Unlock the search list + b gtdR64 ; Join common... + +gtdSrchMiss: + la r3,pmapSXlk(r27) ; Point to the pmap search lock + bl sxlkUnlock ; Unlock the search list + li r25,mapRtNotFnd ; Get ready to return not found + bt++ pf64Bitb,gtdR64 ; Test for 64-bit machine + +gtdR32: mtmsr r29 ; Restore caller's msr image + isync + b gtdEpilog + +gtdR64: mtmsrd r29 ; Restore caller's msr image + +gtdEpilog: lwz r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + mr r3,r25 ; Get return code + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + +; +; Guest shadow assist -- convert guest to host virtual address +; +; Locates the specified guest mapping, and if it exists locates the +; first mapping belonging to its host on the physical chain and returns +; its virtual address. +; +; Note that if there are multiple mappings belonging to this host +; chained to the physent to which the guest mapping is chained, then +; host virtual aliases exist for this physical address. If host aliases +; exist, then we select the first on the physent chain, making it +; unpredictable which of the two or more possible host virtual addresses +; will be returned. +; +; Parameters: +; r3 : address of guest pmap, 32-bit kernel virtual address +; r4 : guest virtual address, high-order 32 bits +; r5 : guest virtual address, low-order 32 bits +; +; Non-volatile register usage: +; r24 : physent physical address +; r25 : VMM extension block's physical address +; r26 : host virtual address +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : caller's msr image from mapSetUp +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + + .align 5 + .globl EXT(hw_gva_to_hva) + +LEXT(hw_gva_to_hva) + +#define gthStackSize ((31-24+1)*4)+4 + + stwu r1,-(FM_ALIGN(gtdStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gtdStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + stw r24,FM_ARG0+0x1C(r1) ; Save non-volatile r24 + + rlwinm r30,r5,0,0xFFFFF000 ; Clean up low-order 32 bits of guest vaddr + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r3) ; r9 <- guest space ID number + + bt++ pf64Bitb,gth64Salt ; Test for 64-bit machine + + lwz r25,pmapVmmExtPhys+4(r3) ; r25 <- VMM pmap extension block paddr + lwz r28,pmapvr+4(r3) ; Get 32-bit virt<->real guest pmap conversion salt + lwz r27,vmxHostPmapPhys+4(r11) ; Get host pmap physical address + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b gthStart ; Get to it + +gth64Salt: rldimi r30,r4,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r25,pmapVmmExtPhys(r3) ; r24 <- VMM pmap extension block paddr + ld r28,pmapvr(r3) ; Get 64-bit virt<->real guest pmap conversion salt + ld r27,vmxHostPmapPhys(r11) ; Get host pmap physical address + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +gthStart: xor r28,r3,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r29,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gth64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b gth32SrchLp ; Let the search begin! + + .align 5 +gth32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gthSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gth32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gthSrchHit ; Join common path on hit (r31 points to guest mapping) + b gthSrchMiss ; No joy in our hash group + +gth64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b gth64SrchLp ; Let the search begin! + + .align 5 +gth64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gthSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gth64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + bne gthSrchMiss ; No joy in our hash group + +gthSrchHit: lwz r3,mpPAddr(r31) ; r3 <- physical 4K-page number + bl mapFindLockPN ; Find 'n' lock this page's physent + mr. r24,r3 ; Got lock on our physent? + beq-- gthBadPLock ; No, time to bail out + + bt++ pf64Bitb,gthPFnd64 ; 64-bit version of physent chain search + + lwz r9,ppLink+4(r24) ; Get first mapping on physent + lwz r6,pmapSpace(r27) ; Get host pmap's space id number + rlwinm r9,r9,0,~ppFlags ; Be-gone, unsightly flags +gthPELoop: mr. r12,r9 ; Got a mapping to look at? + beq- gthPEMiss ; Nope, we've missed hva->phys mapping + lwz r7,mpFlags(r12) ; Get mapping's flags + lhz r4,mpSpace(r12) ; Get mapping's space id number + lwz r26,mpVAddr+4(r12) ; Get mapping's virtual address + lwz r9,mpAlias+4(r12) ; Next mapping in physent alias chain + + rlwinm r0,r7,0,mpType ; Isolate mapping's type + rlwinm r26,r26,0,~mpHWFlags ; Bye-bye unsightly flags + xori r0,r0,mpNormal ; Normal mapping? + xor r4,r4,r6 ; Compare w/ host space id number + or. r0,r0,r4 ; cr0_eq <- (normal && space id hit) + beq gthPEHit ; Hit + b gthPELoop ; Iterate + +gthPFnd64: li r0,ppLFAmask ; Get mask to clean up mapping pointer + rotrdi r0,r0,ppLFArrot ; Rotate clean up mask to get 0xF0000000000000000F + ld r9,ppLink(r24) ; Get first mapping on physent + lwz r6,pmapSpace(r27) ; Get host pmap's space id number + andc r9,r9,r0 ; Cleanup mapping pointer +gthPELp64: mr. r12,r9 ; Got a mapping to look at? + beq-- gthPEMiss ; Nope, we've missed hva->phys mapping + lwz r7,mpFlags(r12) ; Get mapping's flags + lhz r4,mpSpace(r12) ; Get mapping's space id number + ld r26,mpVAddr(r12) ; Get mapping's virtual address + ld r9,mpAlias(r12) ; Next mapping physent alias chain + rlwinm r0,r7,0,mpType ; Isolate mapping's type + rldicr r26,r26,0,mpHWFlagsb-1 ; Bye-bye unsightly flags + xori r0,r0,mpNormal ; Normal mapping? + xor r4,r4,r6 ; Compare w/ host space id number + or. r0,r0,r4 ; cr0_eq <- (normal && space id hit) + beq gthPEHit ; Hit + b gthPELp64 ; Iterate + + .align 5 +gthPEMiss: mr r3,r24 ; Get physent's address + bl mapPhysUnlock ; Unlock physent chain +gthSrchMiss: + la r3,pmapSXlk(r27) ; Get host pmap search lock address + bl sxlkUnlock ; Release host pmap search lock + li r3,-1 ; Return 64-bit -1 + li r4,-1 + bt++ pf64Bitb,gthEpi64 ; Take 64-bit exit + b gthEpi32 ; Take 32-bit exit + + .align 5 +gthPEHit: mr r3,r24 ; Get physent's address + bl mapPhysUnlock ; Unlock physent chain + la r3,pmapSXlk(r27) ; Get host pmap search lock address + bl sxlkUnlock ; Release host pmap search lock + + bt++ pf64Bitb,gthR64 ; Test for 64-bit machine + +gthR32: li r3,0 ; High-order 32 bits host virtual address + mr r4,r26 ; Low-order 32 bits host virtual address +gthEpi32: mtmsr r29 ; Restore caller's msr image + isync + b gthEpilog + + .align 5 +gthR64: srdi r3,r26,32 ; High-order 32 bits host virtual address + clrldi r4,r26,32 ; Low-order 32 bits host virtual address +gthEpi64: mtmsrd r29 ; Restore caller's msr image + +gthEpilog: lwz r0,(FM_ALIGN(gthStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r24,FM_ARG0+0x1C(r1) ; Restore non-volatile r24 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + +gthBadPLock: + lis r0,hi16(Choke) ; Dmitri, you know how we've always talked about the + ori r0,r0,lo16(Choke) ; possibility of something going wrong with the bomb? + li r3,failMapping ; The BOMB, Dmitri. + sc ; The hydrogen bomb. + + +; +; Guest shadow assist -- find a guest mapping +; +; Locates the specified guest mapping, and if it exists returns a copy +; of it. +; +; Parameters: +; r3 : address of guest pmap, 32-bit kernel virtual address +; r4 : guest virtual address, high-order 32 bits +; r5 : guest virtual address, low-order 32 bits +; r6 : 32 byte copy area, 32-bit kernel virtual address +; +; Non-volatile register usage: +; r25 : VMM extension block's physical address +; r26 : copy area virtual address +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : caller's msr image from mapSetUp +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + + .align 5 + .globl EXT(hw_find_map_gv) + +LEXT(hw_find_map_gv) + +#define gfmStackSize ((31-25+1)*4)+4 + + stwu r1,-(FM_ALIGN(gfmStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gfmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + + rlwinm r30,r5,0,0xFFFFF000 ; Clean up low-order 32 bits of guest vaddr + mr r26,r6 ; Copy copy buffer vaddr + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r3) ; r9 <- guest space ID number + + bt++ pf64Bitb,gfm64Salt ; Test for 64-bit machine + + lwz r25,pmapVmmExtPhys+4(r3) ; r25 <- VMM pmap extension block paddr + lwz r28,pmapvr+4(r3) ; Get 32-bit virt<->real guest pmap conversion salt + lwz r27,vmxHostPmapPhys+4(r11) ; Get host pmap physical address + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b gfmStart ; Get to it + +gfm64Salt: rldimi r30,r4,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r25,pmapVmmExtPhys(r3) ; r24 <- VMM pmap extension block paddr + ld r28,pmapvr(r3) ; Get 64-bit virt<->real guest pmap conversion salt + ld r27,vmxHostPmapPhys(r11) ; Get host pmap physical address + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +gfmStart: xor r28,r3,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r29,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gfm64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b gfm32SrchLp ; Let the search begin! + + .align 5 +gfm32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gfmSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gfm32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gfmSrchHit ; Join common path on hit (r31 points to guest mapping) + b gfmSrchMiss ; No joy in our hash group + +gfm64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b gfm64SrchLp ; Let the search begin! + + .align 5 +gfm64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- !(!free && !dormant && space match) + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gfmSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gfm64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free and dormant flags + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- !(!free && !dormant && space match) + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + bne gfmSrchMiss ; No joy in our hash group + +gfmSrchHit: lwz r5,0(r31) ; Fetch 32 bytes of mapping from physical + lwz r6,4(r31) ; +4 + lwz r7,8(r31) ; +8 + lwz r8,12(r31) ; +12 + lwz r9,16(r31) ; +16 + lwz r10,20(r31) ; +20 + lwz r11,24(r31) ; +24 + lwz r12,28(r31) ; +28 + + li r31,mapRtOK ; Return found mapping + + la r3,pmapSXlk(r27) ; Get host pmap search lock address + bl sxlkUnlock ; Release host pmap search lock + + bt++ pf64Bitb,gfmEpi64 ; Test for 64-bit machine + +gfmEpi32: mtmsr r29 ; Restore caller's msr image + isync ; A small wrench + b gfmEpilog ; and a larger bubble + + .align 5 +gfmEpi64: mtmsrd r29 ; Restore caller's msr image + +gfmEpilog: mr. r3,r31 ; Copy/test mapping address + beq gfmNotFound ; Skip copy if no mapping found + + stw r5,0(r26) ; Store 32 bytes of mapping into virtual + stw r6,4(r26) ; +4 + stw r7,8(r26) ; +8 + stw r8,12(r26) ; +12 + stw r9,16(r26) ; +16 + stw r10,20(r26) ; +20 + stw r11,24(r26) ; +24 + stw r12,28(r26) ; +28 + +gfmNotFound: + lwz r0,(FM_ALIGN(gfmStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + + .align 5 +gfmSrchMiss: + li r31,mapRtNotFnd ; Indicate mapping not found + la r3,pmapSXlk(r27) ; Get host pmap search lock address + bl sxlkUnlock ; Release host pmap search lock + bt++ pf64Bitb,gfmEpi64 ; Take 64-bit exit + b gfmEpi32 ; Take 32-bit exit + + +; +; Guest shadow assist -- change guest page protection +; +; Locates the specified dormant mapping, and if it is active, changes its +; protection. +; +; Parameters: +; r3 : address of guest pmap, 32-bit kernel virtual address +; r4 : guest virtual address, high-order 32 bits +; r5 : guest virtual address, low-order 32 bits +; r6 : guest mapping protection code +; +; Non-volatile register usage: +; r25 : caller's msr image from mapSetUp +; r26 : guest mapping protection code +; r27 : host pmap physical address +; r28 : guest pmap physical address +; r29 : VMM extension block's physical address +; r30 : guest virtual address +; r31 : gva->phys mapping's physical address +; + .align 5 + .globl EXT(hw_protect_gv) + +LEXT(hw_protect_gv) + +#define gcpStackSize ((31-24+1)*4)+4 + + stwu r1,-(FM_ALIGN(gcpStackSize)+FM_SIZE)(r1) + ; Mint a new stack frame + mflr r0 ; Get caller's return address + mfsprg r11,2 ; Get feature flags + mtcrf 0x02,r11 ; Insert feature flags into cr6 + stw r0,(FM_ALIGN(gcpStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Save caller's return address + stw r31,FM_ARG0+0x00(r1) ; Save non-volatile r31 + stw r30,FM_ARG0+0x04(r1) ; Save non-volatile r30 + stw r29,FM_ARG0+0x08(r1) ; Save non-volatile r29 + stw r28,FM_ARG0+0x0C(r1) ; Save non-volatile r28 + stw r27,FM_ARG0+0x10(r1) ; Save non-volatile r27 + stw r26,FM_ARG0+0x14(r1) ; Save non-volatile r26 + stw r25,FM_ARG0+0x18(r1) ; Save non-volatile r25 + + rlwinm r30,r5,0,0xFFFFF000 ; Clean up low-order 32 bits of guest vaddr + mr r26,r6 ; Copy guest mapping protection code + + lwz r11,pmapVmmExt(r3) ; r11 <- VMM pmap extension block vaddr + lwz r9,pmapSpace(r3) ; r9 <- guest space ID number + bt++ pf64Bitb,gcp64Salt ; Handle 64-bit machine separately + lwz r29,pmapVmmExtPhys+4(r3) ; r29 <- VMM pmap extension block paddr + lwz r27,vmxHostPmapPhys+4(r11) ; r27 <- host pmap paddr + lwz r28,pmapvr+4(r3) ; Get 32-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + lwz r31,4(r31) ; r31 <- hash page paddr + rlwimi r31,r11,GV_HGRP_SHIFT,GV_HGRP_MASK + ; r31 <- hash group paddr + b gcpStart ; Get to it + +gcp64Salt: rldimi r30,r4,32,0 ; Insert high-order 32 bits of 64-bit guest vaddr + ld r29,pmapVmmExtPhys(r3) ; r29 <- VMM pmap extension block paddr + ld r27,vmxHostPmapPhys(r11) ; r27 <- host pmap paddr + ld r28,pmapvr(r3) ; Get 64-bit virt<->real guest pmap conversion salt + la r31,VMX_HPIDX_OFFSET(r11) ; r31 <- base of hash page physical index + srwi r11,r30,12 ; Form shadow hash: + xor r11,r11,r9 ; spaceID ^ (vaddr >> 12) + rlwinm r10,r11,GV_HPAGE_SHIFT,GV_HPAGE_MASK + ; Form index offset from hash page number + add r31,r31,r10 ; r31 <- hash page index entry + ld r31,0(r31) ; r31 <- hash page paddr + insrdi r31,r11,GV_GRPS_PPG_LG2,64-(GV_HGRP_SHIFT+GV_GRPS_PPG_LG2) + ; r31 <- hash group paddr + +gcpStart: xor r28,r4,r28 ; Convert guest pmap_t virt->real + bl EXT(mapSetUp) ; Disable 'rupts, translation, maybe enter 64-bit mode + mr r25,r11 ; Save caller's msr image + + la r3,pmapSXlk(r27) ; r3 <- host pmap's search lock address + bl sxlkExclusive ; Get lock exclusive + + li r0,(GV_SLOTS - 1) ; Prepare to iterate over mapping slots + mtctr r0 ; in this group + bt++ pf64Bitb,gcp64Search ; Test for 64-bit machine + + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + lwz r5,mpVAddr+4(r31) ; r5 <- 1st mapping slot's virtual address + b gcp32SrchLp ; Let the search begin! + + .align 5 +gcp32SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrwi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + lwz r5,mpVAddr+4+GV_SLOT_SZ(r31); r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- free || dormant || !space match + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gcpSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gcp32SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrwi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- free || dormant || !space match + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gcpSrchHit ; Join common path on hit (r31 points to guest mapping) + b gcpSrchMiss ; No joy in our hash group + +gcp64Search: + lwz r3,mpFlags(r31) ; r3 <- 1st mapping slot's flags + lhz r4,mpSpace(r31) ; r4 <- 1st mapping slot's space ID + ld r5,mpVAddr(r31) ; r5 <- 1st mapping slot's virtual address + b gcp64SrchLp ; Let the search begin! + + .align 5 +gcp64SrchLp: + mr r6,r3 ; r6 <- current mapping slot's flags + lwz r3,mpFlags+GV_SLOT_SZ(r31) ; r3 <- next mapping slot's flags + mr r7,r4 ; r7 <- current mapping slot's space ID + lhz r4,mpSpace+GV_SLOT_SZ(r31) ; r4 <- next mapping slot's space ID + clrrdi r8,r5,12 ; r8 <- current mapping slot's virtual addr w/o flags + ld r5,mpVAddr+GV_SLOT_SZ(r31) ; r5 <- next mapping slot's virtual addr + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free flag + xor r7,r7,r9 ; Compare space ID + or r0,r11,r7 ; r0 <- free || dormant || !space match + xor r8,r8,r30 ; Compare virtual address + or. r0,r0,r8 ; cr0_eq <- !free && !dormant && space match && virtual addr match + beq gcpSrchHit ; Join common path on hit (r31 points to guest mapping) + + addi r31,r31,GV_SLOT_SZ ; r31 <- next mapping slot + bdnz gcp64SrchLp ; Iterate + + mr r6,r3 ; r6 <- current mapping slot's flags + clrrdi r5,r5,12 ; Remove flags from virtual address + andi. r11,r6,mpgFree+mpgDormant ; Isolate guest free flag + xor r4,r4,r9 ; Compare space ID + or r0,r11,r4 ; r0 <- free || dormant || !space match + xor r5,r5,r30 ; Compare virtual address + or. r0,r0,r5 ; cr0_eq <- !free && !dormant && space match && virtual addr match + bne gcpSrchMiss ; No joy in our hash group + +gcpSrchHit: + bt++ pf64Bitb,gcpDscon64 ; Handle 64-bit disconnect separately + bl mapInvPte32 ; Disconnect PTE, invalidate, gather ref and change + ; r31 <- mapping's physical address + ; r3 -> PTE slot physical address + ; r4 -> High-order 32 bits of PTE + ; r5 -> Low-order 32 bits of PTE + ; r6 -> PCA + ; r7 -> PCA physical address + rlwinm r2,r3,29,29,31 ; Get PTE's slot number in the PTEG (8-byte PTEs) + b gcpFreePTE ; Join 64-bit path to release the PTE +gcpDscon64: bl mapInvPte64 ; Disconnect PTE, invalidate, gather ref and change + rlwinm r2,r3,28,29,31 ; Get PTE's slot number in the PTEG (16-byte PTEs) +gcpFreePTE: mr. r3,r3 ; Was there a valid PTE? + beq- gcpSetKey ; No valid PTE, we're almost done + lis r0,0x8000 ; Prepare free bit for this slot + srw r0,r0,r2 ; Position free bit + or r6,r6,r0 ; Set it in our PCA image + lwz r8,mpPte(r31) ; Get PTE pointer + rlwinm r8,r8,0,~mpHValid ; Make the pointer invalid + stw r8,mpPte(r31) ; Save invalidated PTE pointer + eieio ; Synchronize all previous updates (mapInvPtexx didn't) + stw r6,0(r7) ; Update PCA and unlock the PTEG + +gcpSetKey: lwz r0,mpVAddr+4(r31) ; Get va word containing protection bits + rlwimi r0,r26,0,mpPP ; Insert new protection bits + stw r0,mpVAddr+4(r31) ; Write 'em back + eieio ; Ensure previous mapping updates are visible + li r31,mapRtOK ; I'm a success + +gcpRelPmap: la r3,pmapSXlk(r27) ; r3 <- host pmap search lock phys addr + bl sxlkUnlock ; Release host pmap search lock + + mr r3,r31 ; r3 <- result code + bt++ pf64Bitb,gcpRtn64 ; Handle 64-bit separately + mtmsr r25 ; Restore 'rupts, translation + isync ; Throw a small wrench into the pipeline + b gcpPopFrame ; Nothing to do now but pop a frame and return +gcpRtn64: mtmsrd r25 ; Restore 'rupts, translation, 32-bit mode +gcpPopFrame: + lwz r0,(FM_ALIGN(gcpStackSize)+FM_SIZE+FM_LR_SAVE)(r1) + ; Get caller's return address + lwz r31,FM_ARG0+0x00(r1) ; Restore non-volatile r31 + lwz r30,FM_ARG0+0x04(r1) ; Restore non-volatile r30 + lwz r29,FM_ARG0+0x08(r1) ; Restore non-volatile r29 + lwz r28,FM_ARG0+0x0C(r1) ; Restore non-volatile r28 + mtlr r0 ; Prepare return address + lwz r27,FM_ARG0+0x10(r1) ; Restore non-volatile r27 + lwz r26,FM_ARG0+0x14(r1) ; Restore non-volatile r26 + lwz r25,FM_ARG0+0x18(r1) ; Restore non-volatile r25 + lwz r1,0(r1) ; Pop stack frame + blr ; Return to caller + + .align 5 +gcpSrchMiss: + li r31,mapRtNotFnd ; Could not locate requested mapping + b gcpRelPmap ; Exit through host pmap search lock release + + ; ; Find the physent based on a physical page and try to lock it (but not too hard) ; Note that this table always has an entry that with a 0 table pointer at the end @@ -4992,7 +8186,95 @@ pmapCacheLookus: beq++ pmapCacheLookup ; Nope... b pmapCacheLookus ; Yup, keep waiting... + +; +; mapMergeRC -- Given a physical mapping address in R31, locate its +; connected PTE (if any) and merge the PTE referenced and changed bits +; into the mapping and physent. +; + + .align 5 + +mapMergeRC32: + lwz r0,mpPte(r31) ; Grab the PTE offset + mfsdr1 r7 ; Get the pointer to the hash table + lwz r5,mpVAddr+4(r31) ; Grab the virtual address + rlwinm r10,r7,0,0,15 ; Clean up the hash table base + andi. r3,r0,mpHValid ; Is there a possible PTE? + srwi r7,r0,4 ; Convert to PCA units + rlwinm r7,r7,0,0,29 ; Clean up PCA offset + mflr r2 ; Save the return + subfic r7,r7,-4 ; Convert to -4 based negative index + add r7,r10,r7 ; Point to the PCA directly + beqlr-- ; There was no PTE to start with... + bl mapLockPteg ; Lock the PTEG + + lwz r0,mpPte(r31) ; Grab the PTE offset + mtlr r2 ; Restore the LR + andi. r3,r0,mpHValid ; Is there a possible PTE? + beq- mMPUnlock ; There is no PTE, someone took it so just unlock and leave... + + rlwinm r3,r0,0,0,30 ; Clear the valid bit + add r3,r3,r10 ; Point to actual PTE + lwz r5,4(r3) ; Get the real part of the PTE + srwi r10,r5,12 ; Change physical address to a ppnum + +mMNmerge: lbz r11,mpFlags+1(r31) ; Get the offset to the physical entry table + lwz r0,mpVAddr+4(r31) ; Get the flags part of the field + lis r8,hi16(EXT(pmap_mem_regions)) ; Get the top of the region table + ori r8,r8,lo16(EXT(pmap_mem_regions)) ; Get the bottom of the region table + rlwinm r11,r11,2,24,29 ; Mask index bits and convert to byte offset + add r11,r11,r8 ; Point to the bank table + lwz r2,mrPhysTab(r11) ; Get the physical table bank pointer + lwz r11,mrStart(r11) ; Get the start of bank + rlwimi r0,r5,0,mpRb-32,mpCb-32 ; Copy in the RC + addi r2,r2,4 ; Offset to last half of field + stw r0,mpVAddr+4(r31) ; Set the new RC into the field + sub r11,r10,r11 ; Get the index into the table + rlwinm r11,r11,3,0,28 ; Get offset to the physent + +mMmrgRC: lwarx r10,r11,r2 ; Get the master RC + rlwinm r0,r5,27,ppRb-32,ppCb-32 ; Position the new RC + or r0,r0,r10 ; Merge in the new RC + stwcx. r0,r11,r2 ; Try to stick it back + bne-- mMmrgRC ; Try again if we collided... + eieio ; Commit all updates + +mMPUnlock: + stw r6,0(r7) ; Unlock PTEG + blr ; Return + +; +; 64-bit version of mapMergeRC +; + .align 5 + +mapMergeRC64: + lwz r0,mpPte(r31) ; Grab the PTE offset + ld r5,mpVAddr(r31) ; Grab the virtual address + mfsdr1 r7 ; Get the pointer to the hash table + rldicr r10,r7,0,45 ; Clean up the hash table base + andi. r3,r0,mpHValid ; Is there a possible PTE? + srdi r7,r0,5 ; Convert to PCA units + rldicr r7,r7,0,61 ; Clean up PCA + subfic r7,r7,-4 ; Convert to -4 based negative index + mflr r2 ; Save the return + add r7,r10,r7 ; Point to the PCA directly + beqlr-- ; There was no PTE to start with... + + bl mapLockPteg ; Lock the PTEG + + lwz r0,mpPte(r31) ; Grab the PTE offset again + mtlr r2 ; Restore the LR + andi. r3,r0,mpHValid ; Is there a possible PTE? + beq-- mMPUnlock ; There is no PTE, someone took it so just unlock and leave... + + rlwinm r3,r0,0,0,30 ; Clear the valid bit + add r3,r3,r10 ; Point to the actual PTE + ld r5,8(r3) ; Get the real part + srdi r10,r5,12 ; Change physical address to a ppnum + b mMNmerge ; Join the common 32-64-bit code... ; @@ -5057,16 +8339,14 @@ mITLBIE32: lwarx r0,0,r8 ; Get the TLBIE lock li r0,0 ; Lock clear value tlbie r5 ; Invalidate it everywhere - + beq- mINoTS32 ; Can not have MP on this machine... eieio ; Make sure that the tlbie happens first tlbsync ; Wait for everyone to catch up sync ; Make sure of it all -mINoTS32: - stw r0,tlbieLock(0) ; Clear the tlbie lock - +mINoTS32: stw r0,tlbieLock(0) ; Clear the tlbie lock lwz r5,4(r3) ; Get the real part srwi r10,r5,12 ; Change physical address to a ppnum @@ -5074,7 +8354,7 @@ mINmerge: lbz r11,mpFlags+1(r31) ; Get the offset to the physical entry table lwz r0,mpVAddr+4(r31) ; Get the flags part of the field lis r8,hi16(EXT(pmap_mem_regions)) ; Get the top of the region table ori r8,r8,lo16(EXT(pmap_mem_regions)) ; Get the bottom of the region table - rlwinm r11,r11,2,0,29 ; Change index into byte offset + rlwinm r11,r11,2,24,29 ; Mask index bits and convert to byte offset add r11,r11,r8 ; Point to the bank table lwz r2,mrPhysTab(r11) ; Get the physical table bank pointer lwz r11,mrStart(r11) ; Get the start of bank @@ -5151,13 +8431,9 @@ mITLBIE64: lwarx r0,0,r8 ; Get the TLBIE lock eieio ; Make sure that the tlbie happens first tlbsync ; Wait for everyone to catch up - isync ptesync ; Wait for quiet again - -mINoTS64: + stw r0,tlbieLock(0) ; Clear the tlbie lock - - sync ; Make sure of it all ld r5,8(r3) ; Get the real part srdi r10,r5,12 ; Change physical address to a ppnum diff --git a/osfmk/ppc/interrupt.c b/osfmk/ppc/interrupt.c index 7924041cd..170ad2227 100644 --- a/osfmk/ppc/interrupt.c +++ b/osfmk/ppc/interrupt.c @@ -37,7 +37,13 @@ #include #include -perfTrap perfIntHook = 0; /* Pointer to performance trap hook routine */ +perfCallback perfIntHook = 0; /* Pointer to CHUD trap hook routine */ + +void unresolved_kernel_trap(int trapno, + struct savearea *ssp, + unsigned int dsisr, + addr64_t dar, + const char *message); struct savearea * interrupt( int type, @@ -46,8 +52,9 @@ struct savearea * interrupt( unsigned int dar) { int current_cpu; + struct per_proc_info *proc_info; uint64_t now; - thread_act_t act; + thread_t thread; disable_preemption(); @@ -61,9 +68,10 @@ struct savearea * interrupt( fctx_test(); } #endif - - + + current_cpu = cpu_number(); + proc_info = getPerProc(); switch (type) { @@ -73,18 +81,18 @@ struct savearea * interrupt( #if 0 if (pcsample_enable) { - if (find_user_regs(current_act())) - add_pcsamples (user_pc(current_act())); + if (find_user_regs(current_thread())) + add_pcsamples (user_pc(current_thread())); } #endif - act = current_act(); /* Find ourselves */ - if(act->mact.qactTimer != 0) { /* Is the timer set? */ + thread = current_thread(); /* Find ourselves */ + if(thread->machine.qactTimer != 0) { /* Is the timer set? */ clock_get_uptime(&now); /* Find out what time it is */ - if (act->mact.qactTimer <= now) { /* It is set, has it popped? */ - act->mact.qactTimer = 0; /* Clear single shot timer */ - if((unsigned int)act->mact.vmmControl & 0xFFFFFFFE) { /* Are there any virtual machines? */ - vmm_timer_pop(act); /* Yes, check out them out... */ + if (thread->machine.qactTimer <= now) { /* It is set, has it popped? */ + thread->machine.qactTimer = 0; /* Clear single shot timer */ + if((unsigned int)thread->machine.vmmControl & 0xFFFFFFFE) { /* Are there any virtual machines? */ + vmm_timer_pop(thread); /* Yes, check out them out... */ } } } @@ -99,11 +107,11 @@ struct savearea * interrupt( KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, current_cpu, (unsigned int)ssp->save_srr0, 0, 0, 0); - per_proc_info[current_cpu].interrupt_handler( - per_proc_info[current_cpu].interrupt_target, - per_proc_info[current_cpu].interrupt_refCon, - per_proc_info[current_cpu].interrupt_nub, - per_proc_info[current_cpu].interrupt_source); + proc_info->interrupt_handler( + proc_info->interrupt_target, + proc_info->interrupt_refCon, + proc_info->interrupt_nub, + proc_info->interrupt_source); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0); @@ -122,11 +130,10 @@ struct savearea * interrupt( default: - #if MACH_KDP || MACH_KDB - (void)Call_Debugger(type, ssp); - #else - panic("Invalid interrupt type %x\n", type); - #endif +#if MACH_KDP || MACH_KDB + if (!Call_Debugger(type, ssp)) +#endif + unresolved_kernel_trap(type, ssp, dsisr, dar, NULL); break; } diff --git a/osfmk/ppc/io_map.c b/osfmk/ppc/io_map.c index 98ae15845..60a1688ad 100644 --- a/osfmk/ppc/io_map.c +++ b/osfmk/ppc/io_map.c @@ -62,7 +62,7 @@ io_map(phys_addr, size) if (phys_addr != 0) { /* If they supplied a physical address, use it */ - size = round_page_32(size + (phys_addr & PAGE_MASK)); /* Make sure we map all of it */ + size = round_page(size + (phys_addr & PAGE_MASK)); /* Make sure we map all of it */ (void) kmem_alloc_pageable(kernel_map, &start, size); /* Get some virtual addresses to use */ @@ -105,10 +105,6 @@ io_map(phys_addr, size) vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size) { vm_offset_t start; - int i; - unsigned int j; - vm_page_t m; - if(kernel_map != VM_MAP_NULL) { /* If VM system is up, redirect to normal routine */ @@ -116,7 +112,7 @@ vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size) } - size = round_page_32(size + (phys_addr - (phys_addr & -PAGE_SIZE))); /* Extend the length to include it all */ + size = round_page(size + (phys_addr - (phys_addr & -PAGE_SIZE))); /* Extend the length to include it all */ start = pmap_boot_map(size); /* Get me some virtual address */ (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), diff --git a/osfmk/ppc/io_map_entries.h b/osfmk/ppc/io_map_entries.h index 4a19a33db..afdf4ac1c 100644 --- a/osfmk/ppc/io_map_entries.h +++ b/osfmk/ppc/io_map_entries.h @@ -23,6 +23,7 @@ * @OSF_COPYRIGHT@ * */ +#ifdef KERNEL_PRIVATE #ifndef _PPC_IO_MAP_ENTRIES_H_ #define _PPC_IO_MAP_ENTRIES_H_ @@ -33,3 +34,5 @@ extern vm_offset_t io_map( extern vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size); #endif /* _PPC_IO_MAP_ENTRIES_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/lock.h b/osfmk/ppc/lock.h index 0e36a9589..dde4dbbcc 100644 --- a/osfmk/ppc/lock.h +++ b/osfmk/ppc/lock.h @@ -53,53 +53,36 @@ * the rights to redistribute these changes. */ +#ifdef KERNEL_PRIVATE + #ifndef _PPC_LOCK_H_ #define _PPC_LOCK_H_ -#include - -#ifdef __APPLE_API_PRIVATE +#ifdef MACH_KERNEL_PRIVATE #include #include -#include -#include - -extern unsigned int LockTimeOut; /* Number of hardware ticks of a lock timeout */ - -#if defined(MACH_KERNEL_PRIVATE) - -#include - -#if !(NCPUS == 1 || ETAP_LOCK_TRACE || USLOCK_DEBUG) +#include +#include -#include +#if !MACH_LDEBUG +typedef lck_mtx_t mutex_t; +#else +typedef lck_mtx_ext_t mutex_t; +#endif /* !MACH_LDEBUG */ -#define __slock_held_func__(l) hw_lock_held(l) +#if !MACH_LDEBUG +typedef lck_rw_t lock_t; +#else +typedef lck_rw_ext_t lock_t; +#endif /* !MACH_LDEBUG */ -extern void ppc_usimple_lock_init(simple_lock_t,etap_event_t); -extern void ppc_usimple_lock(simple_lock_t); -extern void ppc_usimple_unlock_rwmb(simple_lock_t); -extern void ppc_usimple_unlock_rwcmb(simple_lock_t); -extern unsigned int ppc_usimple_lock_try(simple_lock_t); - -#define MACHINE_SIMPLE_LOCK - -#define simple_lock_init(l,t) ppc_usimple_lock_init(l,t) -#define simple_lock(l) ppc_usimple_lock(l) -#define simple_unlock(l) ppc_usimple_unlock_rwcmb(l) -#define simple_unlock_rwmb(l) ppc_usimple_unlock_rwmb(l) -#define simple_lock_try(l) ppc_usimple_lock_try(l) -#define simple_lock_addr(l) (&(l)) -#define thread_sleep_simple_lock(l, e, i) \ - thread_sleep_fast_usimple_lock((l), (e), (i)) - -#define mutex_unlock(l) mutex_unlock_rwcmb(l) - -#endif /* !(NCPUS == 1 || ETAP_LOCK_TRACE || USLOCK_DEBUG) */ +extern unsigned int LockTimeOut; /* Number of hardware ticks of a lock timeout */ -#endif /* MACH_KERNEL_PRIVATE */ +#define mutex_unlock(l) mutex_unlock_rwcmb(l) -#endif /* __APPLE_API_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ #endif /* _PPC_LOCK_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/locks.h b/osfmk/ppc/locks.h new file mode 100644 index 000000000..ab878b6d4 --- /dev/null +++ b/osfmk/ppc/locks.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _PPC_LOCKS_H_ +#define _PPC_LOCKS_H_ + +#include +#ifdef MACH_KERNEL_PRIVATE +#include +#endif + + +#ifdef MACH_KERNEL_PRIVATE + +extern unsigned int LcksOpts; + +#define enaLkDeb 0x00000001 /* Request debug in default attribute */ +#define enaLkStat 0x00000002 /* Request statistic in default attribute */ + +#define disLkType 0x80000000 /* Disable type checking */ +#define disLktypeb 0 +#define disLkThread 0x40000000 /* Disable ownership checking */ +#define disLkThreadb 1 +#define enaLkExtStck 0x20000000 /* Enable extended backtrace */ +#define enaLkExtStckb 2 +#define disLkMyLck 0x10000000 /* Disable recursive lock dectection */ +#define disLkMyLckb 3 + +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct { + unsigned int interlock; + unsigned int lck_spin_pad4[2]; +} lck_spin_t; + +#define LCK_SPIN_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[3]; +} lck_spin_t; +#else +typedef struct __lck_spin_t__ lck_spin_t; +#endif +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct _lck_mtx_ { + union { + struct { + unsigned int lck_mtxd_data; + unsigned short lck_mtxd_waiters; + unsigned short lck_mtxd_pri; + unsigned int lck_mtxd_pad8; + } lck_mtxd; + struct { + unsigned int lck_mtxi_tag; + struct _lck_mtx_ext_ *lck_mtxi_ptr; + unsigned int lck_mtxi_pad8; + } lck_mtxi; + } lck_mtx_sw; +} lck_mtx_t; + +#define lck_mtx_data lck_mtx_sw.lck_mtxd.lck_mtxd_data +#define lck_mtx_waiters lck_mtx_sw.lck_mtxd.lck_mtxd_waiters +#define lck_mtx_pri lck_mtx_sw.lck_mtxd.lck_mtxd_pri + +#define lck_mtx_tag lck_mtx_sw.lck_mtxi.lck_mtxi_tag +#define lck_mtx_ptr lck_mtx_sw.lck_mtxi.lck_mtxi_ptr + +#define LCK_MTX_TAG_INDIRECT 0x00001007 /* lock marked as Indirect */ +#define LCK_MTX_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ + +#define LCK_FRAMES_MAX 8 + +typedef struct { + unsigned int type; + vm_offset_t stack[LCK_FRAMES_MAX]; + vm_offset_t thread; +} lck_mtx_deb_t; + +#define MUTEX_TAG 0x4d4d + +typedef struct { + unsigned int lck_mtx_stat_data; +} lck_mtx_stat_t; + +typedef struct _lck_mtx_ext_ { + lck_mtx_t lck_mtx; + struct _lck_grp_ *lck_mtx_grp; + unsigned int lck_mtx_attr; + lck_mtx_deb_t lck_mtx_deb; + lck_mtx_stat_t lck_mtx_stat; +} lck_mtx_ext_t; + +#define LCK_MTX_ATTR_DEBUG 0x1 +#define LCK_MTX_ATTR_DEBUGb 31 +#define LCK_MTX_ATTR_STAT 0x2 +#define LCK_MTX_ATTR_STATb 30 + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[3]; +} lck_mtx_t; +#else +typedef struct __lck_mtx_t__ lck_mtx_t; +#endif +#endif + +#ifdef MACH_KERNEL_PRIVATE +typedef struct { + union { + struct { + unsigned int lck_rwd_shared_cnt:16, /* No. of shared granted request */ + lck_rwd_pad16:12, /* padding */ + lck_rwd_want_excl:1, /* Writer is waiting, or locked for write */ + lck_rwd_want_upgrade:1, /* Read-to-write upgrade waiting */ + lck_rwd_waiting:1, /* Someone is sleeping on lock */ + lck_rwd_interlock:1; /* Read-to-write upgrade waiting */ + unsigned int lck_rwd_pad4; + unsigned int lck_rwd_pad8; + } lck_rwd; + struct { + unsigned int lck_rwi_tag; + struct _lck_rw_ext_ *lck_rwi_ptr; + unsigned int lck_rwi_pad8; + } lck_rwi; + } lck_rw_sw; +} lck_rw_t; + +#define lck_rw_interlock lck_rw_sw.lck_rwd.lck_rwd_interlock +#define lck_rw_want_upgrade lck_rw_sw.lck_rwd.lck_rwd_want_upgrade +#define lck_rw_want_excl lck_rw_sw.lck_rwd.lck_rwd_want_excl +#define lck_rw_waiting lck_rw_sw.lck_rwd.lck_rwd_waiting +#define lck_rw_shared_cnt lck_rw_sw.lck_rwd.lck_rwd_shared_cnt + +#define lck_rw_tag lck_rw_sw.lck_rwi.lck_rwi_tag +#define lck_rw_ptr lck_rw_sw.lck_rwi.lck_rwi_ptr + +typedef struct { + unsigned int type; + vm_offset_t stack[LCK_FRAMES_MAX]; + thread_t thread; + void (*pc_excl)(void); + void (*pc_done)(void); +} lck_rw_deb_t; + +#define RW_TAG 0x5d5d + +typedef struct { + unsigned int lck_rw_stat_data; +} lck_rw_stat_t; + +typedef struct _lck_rw_ext_ { + lck_rw_t lck_rw; + struct _lck_grp_ *lck_rw_grp; + unsigned int lck_rw_attr; + lck_rw_deb_t lck_rw_deb; + lck_rw_stat_t lck_rw_stat; +} lck_rw_ext_t; + +#define LCK_RW_ATTR_DEBUG 0x1 +#define LCK_RW_ATTR_DEBUGb 31 +#define LCK_RW_ATTR_STAT 0x2 +#define LCK_RW_ATTR_STATb 30 +#define LCK_RW_ATTR_DIS_THREAD 0x40000000 +#define LCK_RW_ATTR_DIS_THREADb 1 +#define LCK_RW_ATTR_DIS_MYLOCK 0x10000000 +#define LCK_RW_ATTR_DIS_MYLOCKb 3 + +#define LCK_RW_TAG_INDIRECT 0x00001107 /* lock marked as Indirect */ +#define LCK_RW_TAG_DESTROYED 0x00002107 /* lock marked as Destroyed */ + +#else +#ifdef KERNEL_PRIVATE +typedef struct { + unsigned int opaque[3]; +} lck_rw_t; +#else +typedef struct __lck_rw_t__ lck_rw_t; +#endif +#endif + +#endif /* _PPC_LOCKS_H_ */ diff --git a/osfmk/ppc/locks_ppc.c b/osfmk/ppc/locks_ppc.c new file mode 100644 index 000000000..4ea067e9d --- /dev/null +++ b/osfmk/ppc/locks_ppc.c @@ -0,0 +1,2054 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives implementation + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if MACH_KDB +#include +#include +#include +#include +#endif /* MACH_KDB */ + +#ifdef __ppc__ +#include +#endif + +#include + +#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100 +#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101 +#define LCK_RW_LCK_SHARED_CODE 0x102 +#define LCK_RW_LCK_SH_TO_EX_CODE 0x103 +#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 +#define LCK_RW_LCK_EX_TO_SH_CODE 0x105 + + +#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) + +unsigned int lock_wait_time[2] = { (unsigned int)-1, 0 } ; + +/* Forwards */ + + +#if USLOCK_DEBUG +/* + * Perform simple lock checks. + */ +int uslock_check = 1; +int max_lock_loops = 100000000; +decl_simple_lock_data(extern , printf_lock) +decl_simple_lock_data(extern , panic_lock) +#if MACH_KDB +decl_simple_lock_data(extern , kdb_lock) +#endif /* MACH_KDB */ +#endif /* USLOCK_DEBUG */ + + +/* + * We often want to know the addresses of the callers + * of the various lock routines. However, this information + * is only used for debugging and statistics. + */ +typedef void *pc_t; +#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) +#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) +#if ANY_LOCK_DEBUG +#define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l))) +#else /* ANY_LOCK_DEBUG */ +#ifdef lint +/* + * Eliminate lint complaints about unused local pc variables. + */ +#define OBTAIN_PC(pc,l) ++pc +#else /* lint */ +#define OBTAIN_PC(pc,l) +#endif /* lint */ +#endif /* USLOCK_DEBUG */ + + +/* + * Portable lock package implementation of usimple_locks. + */ + +#if USLOCK_DEBUG +#define USLDBG(stmt) stmt +void usld_lock_init(usimple_lock_t, unsigned short); +void usld_lock_pre(usimple_lock_t, pc_t); +void usld_lock_post(usimple_lock_t, pc_t); +void usld_unlock(usimple_lock_t, pc_t); +void usld_lock_try_pre(usimple_lock_t, pc_t); +void usld_lock_try_post(usimple_lock_t, pc_t); +int usld_lock_common_checks(usimple_lock_t, char *); +#else /* USLOCK_DEBUG */ +#define USLDBG(stmt) +#endif /* USLOCK_DEBUG */ + +/* + * Routine: lck_spin_alloc_init + */ +lck_spin_t * +lck_spin_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) { + lck_spin_t *lck; + + if ((lck = (lck_spin_t *)kalloc(sizeof(lck_spin_t))) != 0) + lck_spin_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_spin_free + */ +void +lck_spin_free( + lck_spin_t *lck, + lck_grp_t *grp) { + lck_spin_destroy(lck, grp); + kfree((void *)lck, sizeof(lck_spin_t)); +} + +/* + * Routine: lck_spin_init + */ +void +lck_spin_init( + lck_spin_t *lck, + lck_grp_t *grp, + __unused lck_attr_t *attr) { + + lck->interlock = 0; + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN); +} + +/* + * Routine: lck_spin_destroy + */ +void +lck_spin_destroy( + lck_spin_t *lck, + lck_grp_t *grp) { + if (lck->interlock == LCK_SPIN_TAG_DESTROYED) + return; + lck->interlock = LCK_SPIN_TAG_DESTROYED; + lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); + lck_grp_deallocate(grp); +} + +/* + * Initialize a usimple_lock. + * + * No change in preemption state. + */ +void +usimple_lock_init( + usimple_lock_t l, + unsigned short tag) +{ +#ifndef MACHINE_SIMPLE_LOCK + USLDBG(usld_lock_init(l, tag)); + hw_lock_init(&l->interlock); +#else + simple_lock_init((simple_lock_t)l,tag); +#endif +} + + +/* + * Acquire a usimple_lock. + * + * Returns with preemption disabled. Note + * that the hw_lock routines are responsible for + * maintaining preemption state. + */ +void +usimple_lock( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + int i; + pc_t pc; +#if USLOCK_DEBUG + int count = 0; +#endif /* USLOCK_DEBUG */ + + OBTAIN_PC(pc, l); + USLDBG(usld_lock_pre(l, pc)); + + if(!hw_lock_to(&l->interlock, LockTimeOut)) /* Try to get the lock with a timeout */ + panic("simple lock deadlock detection - l=0x%08X, cpu=%d, ret=0x%08X", l, cpu_number(), pc); + + USLDBG(usld_lock_post(l, pc)); +#else + simple_lock((simple_lock_t)l); +#endif +} + + +/* + * Release a usimple_lock. + * + * Returns with preemption enabled. Note + * that the hw_lock routines are responsible for + * maintaining preemption state. + */ +void +usimple_unlock( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + pc_t pc; + + OBTAIN_PC(pc, l); + USLDBG(usld_unlock(l, pc)); + sync(); + hw_lock_unlock(&l->interlock); +#else + simple_unlock_rwmb((simple_lock_t)l); +#endif +} + + +/* + * Conditionally acquire a usimple_lock. + * + * On success, returns with preemption disabled. + * On failure, returns with preemption in the same state + * as when first invoked. Note that the hw_lock routines + * are responsible for maintaining preemption state. + * + * XXX No stats are gathered on a miss; I preserved this + * behavior from the original assembly-language code, but + * doesn't it make sense to log misses? XXX + */ +unsigned int +usimple_lock_try( + usimple_lock_t l) +{ +#ifndef MACHINE_SIMPLE_LOCK + pc_t pc; + unsigned int success; + + OBTAIN_PC(pc, l); + USLDBG(usld_lock_try_pre(l, pc)); + if (success = hw_lock_try(&l->interlock)) { + USLDBG(usld_lock_try_post(l, pc)); + } + return success; +#else + return(simple_lock_try((simple_lock_t)l)); +#endif +} + +#if USLOCK_DEBUG +/* + * States of a usimple_lock. The default when initializing + * a usimple_lock is setting it up for debug checking. + */ +#define USLOCK_CHECKED 0x0001 /* lock is being checked */ +#define USLOCK_TAKEN 0x0002 /* lock has been taken */ +#define USLOCK_INIT 0xBAA0 /* lock has been initialized */ +#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) +#define USLOCK_CHECKING(l) (uslock_check && \ + ((l)->debug.state & USLOCK_CHECKED)) + +/* + * Trace activities of a particularly interesting lock. + */ +void usl_trace(usimple_lock_t, int, pc_t, const char *); + + +/* + * Initialize the debugging information contained + * in a usimple_lock. + */ +void +usld_lock_init( + usimple_lock_t l, + unsigned short tag) +{ + if (l == USIMPLE_LOCK_NULL) + panic("lock initialization: null lock pointer"); + l->lock_type = USLOCK_TAG; + l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; + l->debug.lock_cpu = l->debug.unlock_cpu = 0; + l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; + l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; + l->debug.duration[0] = l->debug.duration[1] = 0; + l->debug.unlock_cpu = l->debug.unlock_cpu = 0; + l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; + l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; +} + + +/* + * These checks apply to all usimple_locks, not just + * those with USLOCK_CHECKED turned on. + */ +int +usld_lock_common_checks( + usimple_lock_t l, + char *caller) +{ + if (l == USIMPLE_LOCK_NULL) + panic("%s: null lock pointer", caller); + if (l->lock_type != USLOCK_TAG) + panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l); + if (!(l->debug.state & USLOCK_INIT)) + panic("%s: 0x%x is not an initialized lock", + caller, (integer_t) l); + return USLOCK_CHECKING(l); +} + + +/* + * Debug checks on a usimple_lock just before attempting + * to acquire it. + */ +/* ARGSUSED */ +void +usld_lock_pre( + usimple_lock_t l, + pc_t pc) +{ + char *caller = "usimple_lock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + +/* + * Note that we have a weird case where we are getting a lock when we are] + * in the process of putting the system to sleep. We are running with no + * current threads, therefore we can't tell if we are trying to retake a lock + * we have or someone on the other processor has it. Therefore we just + * ignore this test if the locking thread is 0. + */ + + if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && + l->debug.lock_thread == (void *) current_thread()) { + printf("%s: lock 0x%x already locked (at 0x%x) by", + caller, (integer_t) l, l->debug.lock_pc); + printf(" current thread 0x%x (new attempt at pc 0x%x)\n", + l->debug.lock_thread, pc); + panic(caller); + } + mp_disable_preemption(); + usl_trace(l, cpu_number(), pc, caller); + mp_enable_preemption(); +} + + +/* + * Debug checks on a usimple_lock just after acquiring it. + * + * Pre-emption has been disabled at this point, + * so we are safe in using cpu_number. + */ +void +usld_lock_post( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char *caller = "successful usimple_lock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + + if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) + panic("%s: lock 0x%x became uninitialized", + caller, (integer_t) l); + if ((l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x became TAKEN by someone else", + caller, (integer_t) l); + + mycpu = cpu_number(); + l->debug.lock_thread = (void *)current_thread(); + l->debug.state |= USLOCK_TAKEN; + l->debug.lock_pc = pc; + l->debug.lock_cpu = mycpu; + + usl_trace(l, mycpu, pc, caller); +} + + +/* + * Debug checks on a usimple_lock just before + * releasing it. Note that the caller has not + * yet released the hardware lock. + * + * Preemption is still disabled, so there's + * no problem using cpu_number. + */ +void +usld_unlock( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char *caller = "usimple_unlock"; + + + if (!usld_lock_common_checks(l, caller)) + return; + + mycpu = cpu_number(); + + if (!(l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x hasn't been taken", + caller, (integer_t) l); + if (l->debug.lock_thread != (void *) current_thread()) + panic("%s: unlocking lock 0x%x, owned by thread 0x%x", + caller, (integer_t) l, l->debug.lock_thread); + if (l->debug.lock_cpu != mycpu) { + printf("%s: unlocking lock 0x%x on cpu 0x%x", + caller, (integer_t) l, mycpu); + printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); + panic(caller); + } + usl_trace(l, mycpu, pc, caller); + + l->debug.unlock_thread = l->debug.lock_thread; + l->debug.lock_thread = INVALID_PC; + l->debug.state &= ~USLOCK_TAKEN; + l->debug.unlock_pc = pc; + l->debug.unlock_cpu = mycpu; +} + + +/* + * Debug checks on a usimple_lock just before + * attempting to acquire it. + * + * Preemption isn't guaranteed to be disabled. + */ +void +usld_lock_try_pre( + usimple_lock_t l, + pc_t pc) +{ + char *caller = "usimple_lock_try"; + + if (!usld_lock_common_checks(l, caller)) + return; + mp_disable_preemption(); + usl_trace(l, cpu_number(), pc, caller); + mp_enable_preemption(); +} + + +/* + * Debug checks on a usimple_lock just after + * successfully attempting to acquire it. + * + * Preemption has been disabled by the + * lock acquisition attempt, so it's safe + * to use cpu_number. + */ +void +usld_lock_try_post( + usimple_lock_t l, + pc_t pc) +{ + register int mycpu; + char *caller = "successful usimple_lock_try"; + + if (!usld_lock_common_checks(l, caller)) + return; + + if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) + panic("%s: lock 0x%x became uninitialized", + caller, (integer_t) l); + if ((l->debug.state & USLOCK_TAKEN)) + panic("%s: lock 0x%x became TAKEN by someone else", + caller, (integer_t) l); + + mycpu = cpu_number(); + l->debug.lock_thread = (void *) current_thread(); + l->debug.state |= USLOCK_TAKEN; + l->debug.lock_pc = pc; + l->debug.lock_cpu = mycpu; + + usl_trace(l, mycpu, pc, caller); +} + + +/* + * For very special cases, set traced_lock to point to a + * specific lock of interest. The result is a series of + * XPRs showing lock operations on that lock. The lock_seq + * value is used to show the order of those operations. + */ +usimple_lock_t traced_lock; +unsigned int lock_seq; + +void +usl_trace( + usimple_lock_t l, + int mycpu, + pc_t pc, + const char * op_name) +{ + if (traced_lock == l) { + XPR(XPR_SLOCK, + "seq %d, cpu %d, %s @ %x\n", + (integer_t) lock_seq, (integer_t) mycpu, + (integer_t) op_name, (integer_t) pc, 0); + lock_seq++; + } +} + + +#endif /* USLOCK_DEBUG */ + +/* + * The C portion of the shared/exclusive locks package. + */ + +/* + * Forward definition + */ + +void lck_rw_lock_exclusive_gen( + lck_rw_t *lck); + +lck_rw_type_t lck_rw_done_gen( + lck_rw_t *lck); + +void +lck_rw_lock_shared_gen( + lck_rw_t *lck); + +boolean_t +lck_rw_lock_shared_to_exclusive_gen( + lck_rw_t *lck); + +void +lck_rw_lock_exclusive_to_shared_gen( + lck_rw_t *lck); + +boolean_t +lck_rw_try_lock_exclusive_gen( + lck_rw_t *lck); + +boolean_t +lck_rw_try_lock_shared_gen( + lck_rw_t *lck); + +void lck_rw_ext_init( + lck_rw_ext_t *lck, + lck_grp_t *grp, + lck_attr_t *attr); + +void lck_rw_ext_backtrace( + lck_rw_ext_t *lck); + +void lck_rw_lock_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +lck_rw_type_t lck_rw_done_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +void +lck_rw_lock_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +boolean_t +lck_rw_lock_shared_to_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +void +lck_rw_lock_exclusive_to_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +boolean_t +lck_rw_try_lock_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +boolean_t +lck_rw_try_lock_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +void +lck_rw_ilk_lock( + lck_rw_t *lck); + +void +lck_rw_ilk_unlock( + lck_rw_t *lck); + +void +lck_rw_check_type( + lck_rw_ext_t *lck, + lck_rw_t *rlck); + +/* + * Routine: lock_alloc + * Function: + * Allocate a lock for external users who cannot + * hard-code the structure definition into their + * objects. + * For now just use kalloc, but a zone is probably + * warranted. + */ +lock_t * +lock_alloc( + boolean_t can_sleep, + __unused unsigned short tag, + __unused unsigned short tag1) +{ + lock_t *lck; + + if ((lck = (lock_t *)kalloc(sizeof(lock_t))) != 0) + lock_init(lck, can_sleep, tag, tag1); + return(lck); +} + +/* + * Routine: lock_init + * Function: + * Initialize a lock; required before use. + * Note that clients declare the "struct lock" + * variables and then initialize them, rather + * than getting a new one from this module. + */ +void +lock_init( + lock_t *lck, + boolean_t can_sleep, + __unused unsigned short tag, + __unused unsigned short tag1) +{ + if (!can_sleep) + panic("lock_init: sleep mode must be set to TRUE\n"); + + (void) memset((void *) lck, 0, sizeof(lock_t)); +#if MACH_LDEBUG + lck->lck_rw_deb.type = RW_TAG; + lck->lck_rw_attr |= (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD|LCK_RW_ATTR_DIS_MYLOCK); +#endif + +} + + +/* + * Routine: lock_free + * Function: + * Free a lock allocated for external users. + * For now just use kfree, but a zone is probably + * warranted. + */ +void +lock_free( + lock_t *lck) +{ + kfree((void *)lck, sizeof(lock_t)); +} + +#if MACH_LDEBUG +void +lock_write( + lock_t *lck) +{ + lck_rw_lock_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck); +} + +void +lock_done( + lock_t *lck) +{ + (void)lck_rw_done_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck); +} + +void +lock_read( + lock_t *lck) +{ + lck_rw_lock_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck); +} + +boolean_t +lock_read_to_write( + lock_t *lck) +{ + return(lck_rw_lock_shared_to_exclusive_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck)); +} + +void +lock_write_to_read( + register lock_t *lck) +{ + lck_rw_lock_exclusive_to_shared_ext((lck_rw_ext_t *)lck, (lck_rw_t *)lck); +} +#endif + +/* + * Routine: lck_rw_alloc_init + */ +lck_rw_t * +lck_rw_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) { + lck_rw_t *lck; + + if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) + lck_rw_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_rw_free + */ +void +lck_rw_free( + lck_rw_t *lck, + lck_grp_t *grp) { + lck_rw_destroy(lck, grp); + kfree((void *)lck, sizeof(lck_rw_t)); +} + +/* + * Routine: lck_rw_init + */ +void +lck_rw_init( + lck_rw_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) { + lck_rw_ext_t *lck_ext; + lck_attr_t *lck_attr; + + if (attr != LCK_ATTR_NULL) + lck_attr = attr; + else + lck_attr = &LockDefaultLckAttr; + + if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { + if ((lck_ext = (lck_rw_ext_t *)kalloc(sizeof(lck_rw_ext_t))) != 0) { + lck_rw_ext_init(lck_ext, grp, lck_attr); + lck->lck_rw_tag = LCK_RW_TAG_INDIRECT; + lck->lck_rw_ptr = lck_ext; + } + } else { + (void) memset((void *) lck, 0, sizeof(lck_rw_t)); + } + + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); +} + +/* + * Routine: lck_rw_ext_init + */ +void +lck_rw_ext_init( + lck_rw_ext_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) { + + bzero((void *)lck, sizeof(lck_rw_ext_t)); + + if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { + lck->lck_rw_deb.type = RW_TAG; + lck->lck_rw_attr |= LCK_RW_ATTR_DEBUG; + } + + lck->lck_rw_grp = grp; + + if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) + lck->lck_rw_attr |= LCK_RW_ATTR_STAT; +} + +/* + * Routine: lck_rw_destroy + */ +void +lck_rw_destroy( + lck_rw_t *lck, + lck_grp_t *grp) { + boolean_t lck_is_indirect; + + if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) + return; + lck_is_indirect = (lck->lck_rw_tag == LCK_RW_TAG_INDIRECT); + lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; + if (lck_is_indirect) + kfree((void *)lck->lck_rw_ptr, sizeof(lck_rw_ext_t)); + + lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); + lck_grp_deallocate(grp); + return; +} + +/* + * Routine: lck_rw_lock + */ +void +lck_rw_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + lck_rw_lock_shared(lck); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + lck_rw_lock_exclusive(lck); + else + panic("lck_rw_lock(): Invalid RW lock type: %d\n", lck_rw_type); +} + + +/* + * Routine: lck_rw_unlock + */ +void +lck_rw_unlock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + lck_rw_unlock_shared(lck); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + lck_rw_unlock_exclusive(lck); + else + panic("lck_rw_unlock(): Invalid RW lock type: %d\n", lck_rw_type); +} + + +/* + * Routine: lck_rw_unlock_shared + */ +void +lck_rw_unlock_shared( + lck_rw_t *lck) +{ + lck_rw_type_t ret; + + ret = lck_rw_done(lck); + + if (ret != LCK_RW_TYPE_SHARED) + panic("lck_rw_unlock(): lock held in mode: %d\n", ret); +} + + +/* + * Routine: lck_rw_unlock_exclusive + */ +void +lck_rw_unlock_exclusive( + lck_rw_t *lck) +{ + lck_rw_type_t ret; + + ret = lck_rw_done(lck); + + if (ret != LCK_RW_TYPE_EXCLUSIVE) + panic("lck_rw_unlock_exclusive(): lock held in mode: %d\n", ret); +} + + +/* + * Routine: lck_rw_try_lock + */ +boolean_t +lck_rw_try_lock( + lck_rw_t *lck, + lck_rw_type_t lck_rw_type) +{ + if (lck_rw_type == LCK_RW_TYPE_SHARED) + return(lck_rw_try_lock_shared(lck)); + else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) + return(lck_rw_try_lock_exclusive(lck)); + else + panic("lck_rw_try_lock(): Invalid rw lock type: %x\n", lck_rw_type); + return(FALSE); +} + + + +/* + * Routine: lck_rw_lock_exclusive_gen + */ +void +lck_rw_lock_exclusive_gen( + lck_rw_t *lck) +{ + int i; + boolean_t lock_miss = FALSE; + wait_result_t res; + + lck_rw_ilk_lock(lck); + + /* + * Try to acquire the lck_rw_want_excl bit. + */ + while (lck->lck_rw_want_excl) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); + + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[1]; + if (i != 0) { + lck_rw_ilk_unlock(lck); + while (--i != 0 && lck->lck_rw_want_excl) + continue; + lck_rw_ilk_lock(lck); + } + + if (lck->lck_rw_want_excl) { + lck->lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_rw_ilk_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0); + } + lck->lck_rw_want_excl = TRUE; + + /* Wait for readers (and upgrades) to finish */ + + while ((lck->lck_rw_shared_cnt != 0) || lck->lck_rw_want_upgrade) { + if (!lock_miss) { + lock_miss = TRUE; + } + + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, i, 0); + + if (i != 0) { + lck_rw_ilk_unlock(lck); + while (--i != 0 && (lck->lck_rw_shared_cnt != 0 || + lck->lck_rw_want_upgrade)) + continue; + lck_rw_ilk_lock(lck); + } + + if (lck->lck_rw_shared_cnt != 0 || lck->lck_rw_want_upgrade) { + lck->lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_rw_ilk_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, res, 0); + } + + lck_rw_ilk_unlock(lck); +} + + +/* + * Routine: lck_rw_done_gen + */ +lck_rw_type_t +lck_rw_done_gen( + lck_rw_t *lck) +{ + boolean_t do_wakeup = FALSE; + lck_rw_type_t lck_rw_type; + + + lck_rw_ilk_lock(lck); + + if (lck->lck_rw_shared_cnt != 0) { + lck_rw_type = LCK_RW_TYPE_SHARED; + lck->lck_rw_shared_cnt--; + } + else { + lck_rw_type = LCK_RW_TYPE_EXCLUSIVE; + if (lck->lck_rw_want_upgrade) + lck->lck_rw_want_upgrade = FALSE; + else + lck->lck_rw_want_excl = FALSE; + } + + /* + * There is no reason to wakeup a lck_rw_waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) { + lck->lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(lck); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + return(lck_rw_type); +} + + +/* + * Routine: lck_rw_lock_shared_gen + */ +void +lck_rw_lock_shared_gen( + lck_rw_t *lck) +{ + int i; + wait_result_t res; + + lck_rw_ilk_lock(lck); + + while (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, i, 0); + + if (i != 0) { + lck_rw_ilk_unlock(lck); + while (--i != 0 && (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade)) + continue; + lck_rw_ilk_lock(lck); + } + + if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + lck->lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_rw_ilk_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, res, 0); + } + + lck->lck_rw_shared_cnt++; + + lck_rw_ilk_unlock(lck); +} + + +/* + * Routine: lck_rw_lock_shared_to_exclusive_gen + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ + +boolean_t +lck_rw_lock_shared_to_exclusive_gen( + lck_rw_t *lck) +{ + int i; + boolean_t do_wakeup = FALSE; + wait_result_t res; + + lck_rw_ilk_lock(lck); + + lck->lck_rw_shared_cnt--; + + if (lck->lck_rw_want_upgrade) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0); + + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (lck->lck_rw_waiting && (lck->lck_rw_shared_cnt == 0)) { + lck->lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(lck); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_shared_cnt, lck->lck_rw_want_upgrade, 0, 0); + + return (TRUE); + } + + lck->lck_rw_want_upgrade = TRUE; + + while (lck->lck_rw_shared_cnt != 0) { + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_shared_cnt, i, 0, 0); + + if (i != 0) { + lck_rw_ilk_unlock(lck); + while (--i != 0 && lck->lck_rw_shared_cnt != 0) + continue; + lck_rw_ilk_lock(lck); + } + + if (lck->lck_rw_shared_cnt != 0) { + lck->lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + lck_rw_ilk_unlock(lck); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(lck); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_shared_cnt, 0, 0, 0); + } + + lck_rw_ilk_unlock(lck); + + return (FALSE); +} + +/* + * Routine: lck_rw_lock_exclusive_to_shared_gen + */ +void +lck_rw_lock_exclusive_to_shared_gen( + lck_rw_t *lck) +{ + boolean_t do_wakeup = FALSE; + + lck_rw_ilk_lock(lck); + + lck->lck_rw_shared_cnt++; + if (lck->lck_rw_want_upgrade) + lck->lck_rw_want_upgrade = FALSE; + else + lck->lck_rw_want_excl = FALSE; + + if (lck->lck_rw_waiting) { + lck->lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(lck); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + +} + + +/* + * Routine: lck_rw_try_lock_exclusive_gen + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_exclusive_gen( + lck_rw_t *lck) +{ + lck_rw_ilk_lock(lck); + + if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade || lck->lck_rw_shared_cnt) { + /* + * Can't get lock. + */ + lck_rw_ilk_unlock(lck); + return(FALSE); + } + + /* + * Have lock. + */ + + lck->lck_rw_want_excl = TRUE; + + lck_rw_ilk_unlock(lck); + + return(TRUE); +} + +/* + * Routine: lck_rw_try_lock_shared_gen + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_shared_gen( + lck_rw_t *lck) +{ + lck_rw_ilk_lock(lck); + + if (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) { + lck_rw_ilk_unlock(lck); + return(FALSE); + } + + lck->lck_rw_shared_cnt++; + + lck_rw_ilk_unlock(lck); + + return(TRUE); +} + + +/* + * Routine: lck_rw_ext_backtrace + */ +void +lck_rw_ext_backtrace( + lck_rw_ext_t *lck) +{ + unsigned int *stackptr, *stackptr_prev; + unsigned int frame; + + __asm__ volatile("mr %0,r1" : "=r" (stackptr)); + frame = 0; + while (frame < LCK_FRAMES_MAX) { + stackptr_prev = stackptr; + stackptr = ( unsigned int *)*stackptr; + if ( (((unsigned int)stackptr_prev) ^ ((unsigned int)stackptr)) > 8192) + break; + lck->lck_rw_deb.stack[frame] = *(stackptr+2); + frame++; + } + while (frame < LCK_FRAMES_MAX) { + lck->lck_rw_deb.stack[frame] = 0; + frame++; + } +} + + +/* + * Routine: lck_rw_lock_exclusive_ext + */ +void +lck_rw_lock_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + int i; + wait_result_t res; + boolean_t lock_miss = FALSE; + boolean_t lock_wait = FALSE; + boolean_t lock_stat; + + lck_rw_check_type(lck, rlck); + + if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_MYLOCK)) == LCK_RW_ATTR_DEBUG) + && (lck->lck_rw_deb.thread == current_thread())) + panic("rw lock (0x%08X) recursive lock attempt\n", rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE; + + if (lock_stat) + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; + + /* + * Try to acquire the lck_rw.lck_rw_want_excl bit. + */ + while (lck->lck_rw.lck_rw_want_excl) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)rlck, 0, 0, 0, 0); + + if (lock_stat && !lock_miss) { + lock_miss = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + + i = lock_wait_time[1]; + if (i != 0) { + lck_rw_ilk_unlock(&lck->lck_rw); + while (--i != 0 && lck->lck_rw.lck_rw_want_excl) + continue; + lck_rw_ilk_lock(&lck->lck_rw); + } + + if (lck->lck_rw.lck_rw_want_excl) { + lck->lck_rw.lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + if (lock_stat && !lock_wait) { + lock_wait = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(&lck->lck_rw); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)rlck, res, 0, 0, 0); + } + lck->lck_rw.lck_rw_want_excl = TRUE; + + /* Wait for readers (and upgrades) to finish */ + + while ((lck->lck_rw.lck_rw_shared_cnt != 0) || lck->lck_rw.lck_rw_want_upgrade) { + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, i, 0); + + if (lock_stat && !lock_miss) { + lock_miss = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + + if (i != 0) { + lck_rw_ilk_unlock(&lck->lck_rw); + while (--i != 0 && (lck->lck_rw.lck_rw_shared_cnt != 0 || + lck->lck_rw.lck_rw_want_upgrade)) + continue; + lck_rw_ilk_lock(&lck->lck_rw); + } + + if (lck->lck_rw.lck_rw_shared_cnt != 0 || lck->lck_rw.lck_rw_want_upgrade) { + lck->lck_rw.lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + if (lock_stat && !lock_wait) { + lock_wait = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(&lck->lck_rw); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, res, 0); + } + + lck->lck_rw_deb.pc_excl = __builtin_return_address(0); + if (LcksOpts & enaLkExtStck) + lck_rw_ext_backtrace(lck); + lck->lck_rw_deb.thread = current_thread(); + + lck_rw_ilk_unlock(&lck->lck_rw); +} + + +/* + * Routine: lck_rw_done_ext + */ +lck_rw_type_t +lck_rw_done_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + boolean_t do_wakeup = FALSE; + lck_rw_type_t lck_rw_type; + + + lck_rw_check_type(lck, rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + if (lck->lck_rw.lck_rw_shared_cnt != 0) { + lck_rw_type = LCK_RW_TYPE_SHARED; + lck->lck_rw.lck_rw_shared_cnt--; + } + else { + lck_rw_type = LCK_RW_TYPE_EXCLUSIVE; + if (lck->lck_rw.lck_rw_want_upgrade) + lck->lck_rw.lck_rw_want_upgrade = FALSE; + else if (lck->lck_rw.lck_rw_want_excl) + lck->lck_rw.lck_rw_want_excl = FALSE; + else + panic("rw lock (0x%08X) bad state (0x%08X) on attempt to release a shared or exlusive right\n", + rlck, lck->lck_rw); + if (lck->lck_rw_deb.thread == THREAD_NULL) + panic("rw lock (0x%08X) not held\n", + rlck); + else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG) + && (lck->lck_rw_deb.thread != current_thread())) + panic("rw lock (0x%08X) unlocked by non-owner(0x%08X), current owner(0x%08X)\n", + rlck, current_thread(), lck->lck_rw_deb.thread); + lck->lck_rw_deb.thread = THREAD_NULL; + } + + if (lck->lck_rw_attr & LCK_RW_ATTR_DEBUG) + lck->lck_rw_deb.pc_done = __builtin_return_address(0); + + /* + * There is no reason to wakeup a waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) { + lck->lck_rw.lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(&lck->lck_rw); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + return(lck_rw_type); +} + + +/* + * Routine: lck_rw_lock_shared_ext + */ +void +lck_rw_lock_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + int i; + wait_result_t res; + boolean_t lock_miss = FALSE; + boolean_t lock_wait = FALSE; + boolean_t lock_stat; + + lck_rw_check_type(lck, rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE; + + if (lock_stat) + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; + + while (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, + (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, i, 0); + + if (lock_stat && !lock_miss) { + lock_miss = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + + if (i != 0) { + lck_rw_ilk_unlock(&lck->lck_rw); + while (--i != 0 && (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade)) + continue; + lck_rw_ilk_lock(&lck->lck_rw); + } + + if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + lck->lck_rw.lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + if (lock_stat && !lock_wait) { + lock_wait = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(&lck->lck_rw); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END, + (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, res, 0); + } + + lck->lck_rw.lck_rw_shared_cnt++; + + lck_rw_ilk_unlock(&lck->lck_rw); +} + + +/* + * Routine: lck_rw_lock_shared_to_exclusive_ext + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ + +boolean_t +lck_rw_lock_shared_to_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + int i; + boolean_t do_wakeup = FALSE; + wait_result_t res; + boolean_t lock_miss = FALSE; + boolean_t lock_wait = FALSE; + boolean_t lock_stat; + + lck_rw_check_type(lck, rlck); + + if (lck->lck_rw_deb.thread == current_thread()) + panic("rw lock (0x%08X) recursive lock attempt\n", rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE; + + if (lock_stat) + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; + + lck->lck_rw.lck_rw_shared_cnt--; + + if (lck->lck_rw.lck_rw_want_upgrade) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0); + + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (lck->lck_rw.lck_rw_waiting && (lck->lck_rw.lck_rw_shared_cnt == 0)) { + lck->lck_rw.lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(&lck->lck_rw); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, lck->lck_rw.lck_rw_want_upgrade, 0, 0); + + return (TRUE); + } + + lck->lck_rw.lck_rw_want_upgrade = TRUE; + + while (lck->lck_rw.lck_rw_shared_cnt != 0) { + i = lock_wait_time[1]; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, i, 0, 0); + + if (lock_stat && !lock_miss) { + lock_miss = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + + if (i != 0) { + lck_rw_ilk_unlock(&lck->lck_rw); + while (--i != 0 && lck->lck_rw.lck_rw_shared_cnt != 0) + continue; + lck_rw_ilk_lock(&lck->lck_rw); + } + + if (lck->lck_rw.lck_rw_shared_cnt != 0) { + lck->lck_rw.lck_rw_waiting = TRUE; + res = assert_wait((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + if (res == THREAD_WAITING) { + if (lock_stat && !lock_wait) { + lock_wait = TRUE; + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + res = thread_block(THREAD_CONTINUE_NULL); + lck_rw_ilk_lock(&lck->lck_rw); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END, + (int)rlck, lck->lck_rw.lck_rw_shared_cnt, 0, 0, 0); + } + + lck->lck_rw_deb.pc_excl = __builtin_return_address(0); + if (LcksOpts & enaLkExtStck) + lck_rw_ext_backtrace(lck); + lck->lck_rw_deb.thread = current_thread(); + + lck_rw_ilk_unlock(&lck->lck_rw); + + return (FALSE); +} + +/* + * Routine: lck_rw_lock_exclusive_to_shared_ext + */ +void +lck_rw_lock_exclusive_to_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + boolean_t do_wakeup = FALSE; + + lck_rw_check_type(lck, rlck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, + (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, 0, 0); + + lck_rw_ilk_lock(&lck->lck_rw); + + lck->lck_rw.lck_rw_shared_cnt++; + if (lck->lck_rw.lck_rw_want_upgrade) + lck->lck_rw.lck_rw_want_upgrade = FALSE; + else if (lck->lck_rw.lck_rw_want_excl) + lck->lck_rw.lck_rw_want_excl = FALSE; + else + panic("rw lock (0x%08X) bad state (0x%08X) on attempt to release a shared or exlusive right\n", + rlck, lck->lck_rw); + if (lck->lck_rw_deb.thread == THREAD_NULL) + panic("rw lock (0x%08X) not held\n", + rlck); + else if ( ((lck->lck_rw_attr & (LCK_RW_ATTR_DEBUG|LCK_RW_ATTR_DIS_THREAD)) == LCK_RW_ATTR_DEBUG) + && (lck->lck_rw_deb.thread != current_thread())) + panic("rw lock (0x%08X) unlocked by non-owner(0x%08X), current owner(0x%08X)\n", + rlck, current_thread(), lck->lck_rw_deb.thread); + + lck->lck_rw_deb.thread = THREAD_NULL; + + if (lck->lck_rw.lck_rw_waiting) { + lck->lck_rw.lck_rw_waiting = FALSE; + do_wakeup = TRUE; + } + + lck_rw_ilk_unlock(&lck->lck_rw); + + if (do_wakeup) + thread_wakeup((event_t)(((unsigned int*)rlck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int)))); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, + (int)rlck, lck->lck_rw.lck_rw_want_excl, lck->lck_rw.lck_rw_want_upgrade, lck->lck_rw.lck_rw_shared_cnt, 0); + +} + + +/* + * Routine: lck_rw_try_lock_exclusive_ext + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_exclusive_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + boolean_t lock_stat; + + lck_rw_check_type(lck, rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE; + + if (lock_stat) + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; + + if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade || lck->lck_rw.lck_rw_shared_cnt) { + /* + * Can't get lock. + */ + if (lock_stat) { + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + return(FALSE); + } + + /* + * Have lock. + */ + + lck->lck_rw.lck_rw_want_excl = TRUE; + lck->lck_rw_deb.pc_excl = __builtin_return_address(0); + if (LcksOpts & enaLkExtStck) + lck_rw_ext_backtrace(lck); + lck->lck_rw_deb.thread = current_thread(); + + lck_rw_ilk_unlock(&lck->lck_rw); + + return(TRUE); +} + +/* + * Routine: lck_rw_try_lock_shared_ext + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t +lck_rw_try_lock_shared_ext( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + boolean_t lock_stat; + + lck_rw_check_type(lck, rlck); + + lck_rw_ilk_lock(&lck->lck_rw); + + lock_stat = (lck->lck_rw_attr & LCK_RW_ATTR_STAT) ? TRUE : FALSE; + + if (lock_stat) + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt++; + + if (lck->lck_rw.lck_rw_want_excl || lck->lck_rw.lck_rw_want_upgrade) { + if (lock_stat) { + lck->lck_rw_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt++; + } + lck_rw_ilk_unlock(&lck->lck_rw); + return(FALSE); + } + + lck->lck_rw.lck_rw_shared_cnt++; + + lck_rw_ilk_unlock(&lck->lck_rw); + + return(TRUE); +} + +void +lck_rw_check_type( + lck_rw_ext_t *lck, + lck_rw_t *rlck) +{ + if (lck->lck_rw_deb.type != RW_TAG) + panic("rw lock (0x%08X) not a rw lock type (0x%08X)\n",rlck, lck->lck_rw_deb.type); +} + +/* + * The C portion of the mutex package. These routines are only invoked + * if the optimized assembler routines can't do the work. + */ + +/* + * Forward definition + */ + +void lck_mtx_ext_init( + lck_mtx_ext_t *lck, + lck_grp_t *grp, + lck_attr_t *attr); + +/* + * Routine: mutex_alloc + * Function: + * Allocate a mutex for external users who cannot + * hard-code the structure definition into their + * objects. + * For now just use kalloc, but a zone is probably + * warranted. + */ +mutex_t * +mutex_alloc( + unsigned short tag) +{ + mutex_t *m; + + if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0) + mutex_init(m, tag); + return(m); +} + +/* + * Routine: mutex_free + */ +void +mutex_free( + mutex_t *m) +{ + kfree((void *)m, sizeof(mutex_t)); +} + +/* + * Routine: lck_mtx_alloc_init + */ +lck_mtx_t * +lck_mtx_alloc_init( + lck_grp_t *grp, + lck_attr_t *attr) { + lck_mtx_t *lck; + + if ((lck = (lck_mtx_t *)kalloc(sizeof(lck_mtx_t))) != 0) + lck_mtx_init(lck, grp, attr); + + return(lck); +} + +/* + * Routine: lck_mtx_free + */ +void +lck_mtx_free( + lck_mtx_t *lck, + lck_grp_t *grp) { + lck_mtx_destroy(lck, grp); + kfree((void *)lck, sizeof(lck_mtx_t)); +} + +/* + * Routine: lck_mtx_init + */ +void +lck_mtx_init( + lck_mtx_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) { + lck_mtx_ext_t *lck_ext; + lck_attr_t *lck_attr; + + if (attr != LCK_ATTR_NULL) + lck_attr = attr; + else + lck_attr = &LockDefaultLckAttr; + + if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { + if ((lck_ext = (lck_mtx_ext_t *)kalloc(sizeof(lck_mtx_ext_t))) != 0) { + lck_mtx_ext_init(lck_ext, grp, lck_attr); + lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; + lck->lck_mtx_ptr = lck_ext; + } + } else { + lck->lck_mtx_data = 0; + lck->lck_mtx_waiters = 0; + lck->lck_mtx_pri = 0; + } + lck_grp_reference(grp); + lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); +} + +/* + * Routine: lck_mtx_ext_init + */ +void +lck_mtx_ext_init( + lck_mtx_ext_t *lck, + lck_grp_t *grp, + lck_attr_t *attr) { + + bzero((void *)lck, sizeof(lck_mtx_ext_t)); + + if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { + lck->lck_mtx_deb.type = MUTEX_TAG; + lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG; + } + + lck->lck_mtx_grp = grp; + + if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) + lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; +} + +/* + * Routine: lck_mtx_destroy + */ +void +lck_mtx_destroy( + lck_mtx_t *lck, + lck_grp_t *grp) { + boolean_t lck_is_indirect; + + if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) + return; + lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); + lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED; + if (lck_is_indirect) + kfree((void *)lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t)); + + lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); + lck_grp_deallocate(grp); + return; +} + + +#if MACH_KDB +/* + * Routines to print out simple_locks and mutexes in a nicely-formatted + * fashion. + */ + +char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; +char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; + +void db_print_simple_lock( + simple_lock_t addr); + +void db_print_mutex( + mutex_t * addr); + +void +db_show_one_simple_lock ( + db_expr_t addr, + boolean_t have_addr, + db_expr_t count, + char * modif) +{ + simple_lock_t saddr = (simple_lock_t)addr; + + if (saddr == (simple_lock_t)0 || !have_addr) { + db_error ("No simple_lock\n"); + } +#if USLOCK_DEBUG + else if (saddr->lock_type != USLOCK_TAG) + db_error ("Not a simple_lock\n"); +#endif /* USLOCK_DEBUG */ + + db_printf ("%s\n", simple_lock_labels); + db_print_simple_lock (saddr); +} + +void +db_print_simple_lock ( + simple_lock_t addr) +{ + + db_printf ("%08x %3d", addr, *hw_lock_addr(addr->interlock)); +#if USLOCK_DEBUG + db_printf (" %08x", addr->debug.lock_thread); + db_printf (" %08x ", addr->debug.duration[1]); + db_printsym ((int)addr->debug.lock_pc, DB_STGY_ANY); +#endif /* USLOCK_DEBUG */ + db_printf ("\n"); +} + +void +db_show_one_mutex ( + db_expr_t addr, + boolean_t have_addr, + db_expr_t count, + char * modif) +{ + mutex_t * maddr = (mutex_t *)addr; + + if (maddr == (mutex_t *)0 || !have_addr) + db_error ("No mutex\n"); +#if MACH_LDEBUG + else if (maddr->lck_mtx_deb.type != MUTEX_TAG) + db_error ("Not a mutex\n"); +#endif /* MACH_LDEBUG */ + + db_printf ("%s\n", mutex_labels); + db_print_mutex (maddr); +} + +void +db_print_mutex ( + mutex_t * addr) +{ + db_printf ("%08x %6d %7d", + addr, *addr, addr->lck_mtx.lck_mtx_waiters); +#if MACH_LDEBUG + db_printf (" %08x ", addr->lck_mtx_deb.thread); + db_printsym (addr->lck_mtx_deb.stack[0], DB_STGY_ANY); +#endif /* MACH_LDEBUG */ + db_printf ("\n"); +} + +void +db_show_one_lock( + lock_t *lock) +{ + db_printf("shared_count = 0x%x, %swant_upgrade, %swant_exclusive, ", + lock->lck_rw.lck_rw_shared_cnt, + lock->lck_rw.lck_rw_want_upgrade ? "" : "!", + lock->lck_rw.lck_rw_want_excl ? "" : "!"); + db_printf("%swaiting\n", + lock->lck_rw.lck_rw_waiting ? "" : "!"); + db_printf("%sInterlock\n", + lock->lck_rw.lck_rw_interlock ? "" : "!"); +} + +#endif /* MACH_KDB */ + diff --git a/osfmk/ppc/lowglobals.h b/osfmk/ppc/lowglobals.h index 755904943..512cef481 100644 --- a/osfmk/ppc/lowglobals.h +++ b/osfmk/ppc/lowglobals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,8 +25,6 @@ #ifndef _LOW_MEMORY_GLOBALS_H_ #define _LOW_MEMORY_GLOBALS_H_ -#include - #include #include #include @@ -35,6 +33,7 @@ #include #include #include +#include /* * Don't change these structures unless you change the corresponding assembly code @@ -56,7 +55,12 @@ typedef struct lowglo { unsigned long long lgZero; /* 5008 Double constant 0 */ unsigned int lgPPStart; /* 5010 Start of per_proc blocks */ unsigned int lgCHUDXNUfnStart; /* 5014 CHUD XNU function glue table */ - unsigned int lgRsv018[26]; /* 5018 reserved */ + unsigned int lgMckFlags; /* 5018 Machine check flags */ + unsigned int lgVersion; /* 501C Pointer to kernel version string */ + uint64_t lgPMWvaddr; /* 5020 physical memory window virtual address */ + uint64_t lgUMWvaddr; /* 5028 user memory window virtual address */ + unsigned int lgVMMforcedFeats; /* 5030 VMM boot-args forced feature flags */ + unsigned int lgRsv034[19]; /* 5034 reserved */ traceWork lgTrcWork; /* 5080 Tracing control block - trcWork */ unsigned int lgRsv0A0[24]; /* 50A0 reserved */ struct Saveanchor lgSaveanchor; /* 5100 Savearea anchor - saveanchor */ @@ -64,8 +68,10 @@ typedef struct lowglo { unsigned int lgTlbieLck; /* 5180 TLBIE lock */ unsigned int lgRsv184[31]; /* 5184 reserved - push to next line */ struct diagWork lgdgWork; /* 5200 Start of diagnostic work area */ - unsigned int lgRsv220[24]; /* 5220 reserved */ - unsigned int lgRsv280[32]; /* 5280 reserved */ + unsigned int lglcksWork; /* 5220 lcks option */ + unsigned int lgRsv224[23]; /* 5224 reserved */ + pcfg lgpPcfg[8]; /* 5280 Page configurations */ + unsigned int lgRst2A0[24]; /* 52A0 reserved */ unsigned int lgKillResv; /* 5300 line used to kill reservations */ unsigned int lgKillResvpad[31]; /* 5304 pad reservation kill line */ diff --git a/osfmk/ppc/lowmem_vectors.s b/osfmk/ppc/lowmem_vectors.s index 10cb3ff56..9bdf98f95 100644 --- a/osfmk/ppc/lowmem_vectors.s +++ b/osfmk/ppc/lowmem_vectors.s @@ -25,7 +25,6 @@ #include #include -#include #include #include @@ -46,8 +45,10 @@ VECTOR_SEGMENT + .globl EXT(lowGlo) +EXT(lowGlo): - .globl EXT(ExceptionVectorsStart) + .globl EXT(ExceptionVectorsStart) EXT(ExceptionVectorsStart): /* Used if relocating the exception vectors */ baseR: /* Used so we have more readable code */ @@ -89,7 +90,7 @@ EXT(ResetHandler): mtlr r4 blr -resetexc: cmplwi r13,RESET_HANDLER_BUPOR ; Special bring up POR sequence? +resetexc: cmplwi r13,RESET_HANDLER_BUPOR ; Special bring up POR sequence? bne resetexc2 ; No... lis r4,hi16(EXT(resetPOR)) ; Get POR code ori r4,r4,lo16(EXT(resetPOR)) ; The rest @@ -116,7 +117,7 @@ resetexc2: cmplwi cr1,r13,RESET_HANDLER_IGNORE ; Are we ignoring these? (Softwar rxIg64: mtcr r11 ; Restore the CR mfsprg r11,0 ; Get per_proc mtspr hsprg0,r14 ; Save a register - lwz r14,UAW(r11) ; Get the User Assist Word + ld r14,UAW(r11) ; Get the User Assist DoubleWord mfsprg r13,2 ; Restore R13 lwz r11,pfAvailable(r11) ; Get the features mtsprg 2,r11 ; Restore sprg2 @@ -149,23 +150,23 @@ LEXT(extPatchMCK) ; This is patched to a nop for 64-bit ; Fall through here for 970 MCKs. ; - li r11,1 ; - sldi r11,r11,32+3 ; - mfspr r13,hid4 ; - or r11,r11,r13 ; + li r11,1 ; ? + sldi r11,r11,32+3 ; ? + mfspr r13,hid4 ; ? + or r11,r11,r13 ; ? sync - mtspr hid4,r11 ; + mtspr hid4,r11 ; ? isync - li r11,1 ; - sldi r11,r11,32+8 ; - andc r13,r13,r11 ; + li r11,1 ; ? + sldi r11,r11,32+8 ; ? + andc r13,r13,r11 ; ? lis r11,0xE000 ; Get the unlikeliest ESID possible sync - mtspr hid4,r13 ; - isync ; + mtspr hid4,r13 ; ? + isync ; ? - srdi r11,r11,1 ; - slbie r11 ; + srdi r11,r11,1 ; ? + slbie r11 ; ? sync isync @@ -179,24 +180,24 @@ LEXT(extPatchMCK) ; This is patched to a nop for 64-bit h200aaa: mfsrr1 r11 ; Get the SRR1 mfcr r13 ; Save the CR - rlwinm. r11,r11,0,dcmck,dcmck ; - beq+ notDCache ; + rlwinm. r11,r11,0,dcmck,dcmck ; ? + beq+ notDCache ; ? sync - mfspr r11,msscr0 ; - dssall ; + mfspr r11,msscr0 ; ? + dssall ; ? sync isync - oris r11,r11,hi16(dl1hwfm) ; - mtspr msscr0,r11 ; + oris r11,r11,hi16(dl1hwfm) ; ? + mtspr msscr0,r11 ; ? -rstbsy: mfspr r11,msscr0 ; +rstbsy: mfspr r11,msscr0 ; ? - rlwinm. r11,r11,0,dl1hwf,dl1hwf ; - bne rstbsy ; + rlwinm. r11,r11,0,dl1hwf,dl1hwf ; ? + bne rstbsy ; ? - sync ; + sync ; ? mfsprg r11,0 ; Get the per_proc mtcrf 255,r13 ; Restore CRs @@ -350,126 +351,39 @@ notDCache: mtcrf 255,r13 ; Restore CRs li r11,T_RESERVED /* Set 'rupt code */ b .L_exception_entry /* Join common... */ + +; System Calls (sc instruction) ; -; System call - generated by the sc instruction -; -; We handle the ultra-fast traps right here. They are: -; -; 0xFFFFFFFF - BlueBox only - MKIsPreemptiveTask -; 0xFFFFFFFE - BlueBox only - kcNKIsPreemptiveTaskEnv -; 0x00007FF2 - User state only - thread info -; 0x00007FF3 - User state only - floating point / vector facility status -; 0x00007FF4 - Kernel only - loadMSR - not used on 64-bit machines -; -; Note: none handled if virtual machine is running -; Also, it we treat SCs as kernel SCs if the RI bit is set +; The syscall number is in r0. All we do here is munge the number into a +; 7-bit index into the "scTable", and dispatch on it to handle the Ultra +; Fast Traps (UFTs.) The index is: ; +; 0x40 - set if syscall number is 0x00006004 +; 0x20 - set if upper 29 bits of syscall number are 0xFFFFFFF8 +; 0x10 - set if upper 29 bits of syscall number are 0x00007FF0 +; 0x0E - low three bits of syscall number +; 0x01 - zero, as scTable is an array of shorts . = 0xC00 .L_handlerC00: mtsprg 3,r11 ; Save R11 - mfsprg r11,2 ; Get the feature flags - mtsprg 2,r13 ; Save R13 - rlwinm r11,r11,pf64Bitb-4,4,4 ; Get the 64-bit flag - mfsrr1 r13 ; Get SRR1 for loadMSR - rlwimi r11,r13,MSR_PR_BIT-5,5,5 ; Move the PR bit to bit 1 - mfcr r13 ; Save the CR - - mtcrf 0x40,r11 ; Get the top 3 CR bits to 64-bit, PR, sign - - cmpwi r0,lo16(-3) ; Eliminate all negatives but -1 and -2 - mfsprg r11,0 ; Get the per_proc - bf-- 5,uftInKern ; We came from the kernel... - ble-- notufp ; This is a mach call - - lwz r11,spcFlags(r11) ; Pick up the special flags - - cmpwi cr7,r0,lo16(-1) ; Is this a BlueBox call? - cmplwi cr2,r0,0x7FF2 ; Ultra fast path cthread info call? - cmplwi cr3,r0,0x7FF3 ; Ultra fast path facility status? - cror cr4_eq,cr2_eq,cr3_eq ; Is this one of the two ufts we handle here? - - ble-- cr7,uftBBCall ; We think this is blue box call... - - rlwinm r11,r11,16,16,31 ; Extract spcFlags upper bits - andi. r11,r11,hi16(runningVM|FamVMena|FamVMmode) - cmpwi cr0,r11,hi16(runningVM|FamVMena|FamVMmode) ; Test in VM FAM - beq-- cr0,ufpVM ; fast paths running VM ... - - bne-- cr4_eq,notufp ; Bail ifthis is not a uft... - -; -; Handle normal user ultra-fast trap -; - - li r3,spcFlags ; Assume facility status - 0x7FF3 - - beq-- cr3,uftFacStat ; This is a facilities status call... - - li r3,UAW ; This is really a thread info call - 0x7FF2 - -uftFacStat: mfsprg r11,0 ; Get the per_proc - lwzx r3,r11,r3 ; Get the UAW or spcFlags field - -uftExit: bt++ 4,uftX64 ; Go do the 64-bit exit... - - lwz r11,pfAvailable(r11) ; Get the feature flags - mtcrf 255,r13 ; Restore the CRs - mfsprg r13,2 ; Restore R13 - mtsprg 2,r11 ; Set the feature flags - mfsprg r11,3 ; Restore R11 - - rfi ; Back to our guy... - -uftX64: mtspr hsprg0,r14 ; Save a register - - lwz r14,UAW(r11) ; Get the User Assist Word - lwz r11,pfAvailable(r11) ; Get the feature flags - - mtcrf 255,r13 ; Restore the CRs - - mfsprg r13,2 ; Restore R13 - mtsprg 2,r11 ; Set the feature flags - mfsprg r11,3 ; Restore R11 - mtsprg 3,r14 ; Set the UAW in sprg3 - mfspr r14,hsprg0 ; Restore R14 - - rfid ; Back to our guy... - -; -; Handle BlueBox ultra-fast trap -; - -uftBBCall: andi. r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need - cmplwi r11,bbNoMachSC ; See if we are trapping syscalls - blt-- notufp ; No... - - rlwimi r13,r11,bbPreemptivebit-cr0_eq,cr0_eq,cr0_eq ; Copy preemptive task flag into user cr0_eq - - mfsprg r11,0 ; Get the per proc - - beq++ cr7,uftExit ; For MKIsPreemptiveTask we are done... - - lwz r0,ppbbTaskEnv(r11) ; Get the shadowed taskEnv from per_proc_area - b uftExit ; We are really all done now... - -; Kernel ultra-fast trap - -uftInKern: cmplwi r0,0x7FF4 ; Ultra fast path loadMSR? - bne- notufp ; Someone is trying to cheat... - - mtsrr1 r3 ; Set new MSR - - b uftExit ; Go load the new MSR... - -notufp: mtcrf 0xFF,r13 ; Restore the used CRs - li r11,T_SYSTEM_CALL|T_FAM ; Set interrupt code - b .L_exception_entry ; Join common... - - - - + rlwinm r11,r0,0,0xFFFFFFF8 ; mask off low 3 bits of syscall number + xori r13,r11,0x7FF0 ; start to check for the 0x7FFx traps + addi r11,r11,8 ; make a 0 iff this is a 0xFFFFFFF8 trap + cntlzw r13,r13 ; set bit 0x20 iff a 0x7FFx trap + cntlzw r11,r11 ; set bit 0x20 iff a 0xFFFFFFF8 trap + rlwimi r11,r13,31,0x10 ; move 0x7FFx bit into position + xori r13,r0,0x6004 ; start to check for 0x6004 + rlwimi r11,r0,1,0xE ; move in low 3 bits of syscall number + cntlzw r13,r13 ; set bit 0x20 iff 0x6004 + rlwinm r11,r11,0,0,30 ; clear out bit 31 + rlwimi r11,r13,1,0x40 ; move 0x6004 bit into position + lhz r11,lo16(scTable)(r11) ; get branch address from sc table + mfctr r13 ; save caller's ctr in r13 + mtctr r11 ; set up branch to syscall handler + mfsprg r11,0 ; get per_proc, which most UFTs use + bctr ; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11) /* * Trace - generated by single stepping @@ -543,13 +457,15 @@ sbxx64b: lis r2,hi16(EXT(pc_trace_buf)) ; Get the top of the buffer lwz r2,tempr1+4(r11) ; Restore work register lwz r3,tempr2+4(r11) ; Restore work register beq cr2,notspectr ; Buffer filled, make a rupt... - b uftExit ; Go restore and leave... + mtcr r13 ; Restore CR + b uftRFI ; Go restore and leave... sbxx64c: ld r1,tempr0(r11) ; Restore work register ld r2,tempr1(r11) ; Restore work register ld r3,tempr2(r11) ; Restore work register beq cr2,notspectr ; Buffer filled, make a rupt... - b uftExit ; Go restore and leave... + mtcr r13 ; Restore CR + b uftRFI ; Go restore and leave... /* * Floating point assist @@ -708,42 +624,254 @@ VMXhandler: li r11,T_INSTRUMENTATION /* Set 'rupt code */ b .L_exception_entry /* Join common... */ - . = 0x2100 -/* - * Filter Ultra Fast Path syscalls for VMM + + .data + .align ALIGN + .globl EXT(exception_entry) +EXT(exception_entry): + .long .L_exception_entry-EXT(ExceptionVectorsStart) /* phys addr of fn */ + + VECTOR_SEGMENT + +/*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + * + * First-level syscall dispatch. The syscall vector maps r0 (the syscall number) into an + * index into the "scTable" (below), and then branches to one of these routines. The PPC + * syscalls come in several varieties, as follows: + * + * 1. If (syscall & 0xFFFFF000) == 0x00007000, then it is a PPC Fast Trap or UFT. + * The UFTs are dispatched here, the Fast Traps are dispatched in hw_exceptions.s. + * + * 2. If (syscall & 0xFFFFF000) == 0x00006000, then it is a PPC-only trap. + * One of these (0x6004) is a UFT, but most are dispatched in hw_exceptions.s. These + * are mostly Blue Box or VMM (Virtual Machine) calls. + * + * 3. If (syscall & 0xFFFFFFF0) == 0xFFFFFFF0, then it is also a UFT and is dispatched here. + * + * 4. If (syscall & 0xFFFFF000) == 0x80000000, then it is a "firmware" call and is dispatched in + * Firmware.s, though the special "Cut Trace" trap (0x80000000) is handled here in xcpSyscall. + * + * 5. If (syscall & 0xFFFFF000) == 0xFFFFF000, and it is not one of the above, then it is a Mach + * syscall, which are dispatched in hw_exceptions.s via "mach_trap_table". + * + * 6. If (syscall & 0xFFFFF000) == 0x00000000, then it is a BSD syscall, which are dispatched + * by "unix_syscall" using the "sysent" table. + * + * What distinguishes the UFTs, aside from being ultra fast, is that they cannot rely on translation + * being on, and so cannot look at the activation or task control block, etc. We handle them right + * here, and return to the caller without turning interrupts or translation on. The UFTs are: + * + * 0xFFFFFFFF - BlueBox only - MKIsPreemptiveTask + * 0xFFFFFFFE - BlueBox only - MKIsPreemptiveTaskEnv + * 0x00007FF2 - User state only - thread info (32-bit mode) + * 0x00007FF3 - User state only - floating point / vector facility status + * 0x00007FF4 - Kernel only - loadMSR - not used on 64-bit machines + * 0x00006004 - vmm_dispatch (only some of which are UFTs) + * + * "scTable" is an array of 2-byte addresses, accessed using a 7-bit index derived from the syscall + * number as follows: + * + * 0x40 (A) - set if syscall number is 0x00006004 + * 0x20 (B) - set if upper 29 bits of syscall number are 0xFFFFFFF8 + * 0x10 (C) - set if upper 29 bits of syscall number are 0x00007FF0 + * 0x0E (D) - low three bits of syscall number + * + * If you define another UFT, try to use a number in one of the currently decoded ranges, ie one marked + * "unassigned" below. The dispatch table and the UFT handlers must reside in the first 32KB of + * physical memory. + */ + + .align 7 ; start this table on a cache line +scTable: ; ABC D + .short uftNormalSyscall-baseR ; 000 0 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 1 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 2 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 3 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 4 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 5 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 6 these syscalls are not in a reserved range + .short uftNormalSyscall-baseR ; 000 7 these syscalls are not in a reserved range + + .short uftNormalSyscall-baseR ; 001 0 0x7FF0 is unassigned + .short uftNormalSyscall-baseR ; 001 1 0x7FF1 is Set Thread Info Fast Trap (pass up) + .short uftThreadInfo-baseR ; 001 2 0x7FF2 is Thread Info + .short uftFacilityStatus-baseR ; 001 3 0x7FF3 is Facility Status + .short uftLoadMSR-baseR ; 001 4 0x7FF4 is Load MSR + .short uftNormalSyscall-baseR ; 001 5 0x7FF5 is the Null FastPath Trap (pass up) + .short uftNormalSyscall-baseR ; 001 6 0x7FF6 is unassigned + .short uftNormalSyscall-baseR ; 001 7 0x7FF7 is unassigned + + .short uftNormalSyscall-baseR ; 010 0 0xFFFFFFF0 is unassigned + .short uftNormalSyscall-baseR ; 010 1 0xFFFFFFF1 is unassigned + .short uftNormalSyscall-baseR ; 010 2 0xFFFFFFF2 is unassigned + .short uftNormalSyscall-baseR ; 010 3 0xFFFFFFF3 is unassigned + .short uftNormalSyscall-baseR ; 010 4 0xFFFFFFF4 is unassigned + .short uftNormalSyscall-baseR ; 010 5 0xFFFFFFF5 is unassigned + .short uftIsPreemptiveTaskEnv-baseR ; 010 6 0xFFFFFFFE is Blue Box uftIsPreemptiveTaskEnv + .short uftIsPreemptiveTask-baseR ; 010 7 0xFFFFFFFF is Blue Box IsPreemptiveTask + + .short WhoaBaby-baseR ; 011 0 impossible combination + .short WhoaBaby-baseR ; 011 1 impossible combination + .short WhoaBaby-baseR ; 011 2 impossible combination + .short WhoaBaby-baseR ; 011 3 impossible combination + .short WhoaBaby-baseR ; 011 4 impossible combination + .short WhoaBaby-baseR ; 011 5 impossible combination + .short WhoaBaby-baseR ; 011 6 impossible combination + .short WhoaBaby-baseR ; 011 7 impossible combination + + .short WhoaBaby-baseR ; 100 0 0x6000 is an impossible index (diagCall) + .short WhoaBaby-baseR ; 100 1 0x6001 is an impossible index (vmm_get_version) + .short WhoaBaby-baseR ; 100 2 0x6002 is an impossible index (vmm_get_features) + .short WhoaBaby-baseR ; 100 3 0x6003 is an impossible index (vmm_init_context) + .short uftVMM-baseR ; 100 4 0x6004 is vmm_dispatch (only some of which are UFTs) + .short WhoaBaby-baseR ; 100 5 0x6005 is an impossible index (bb_enable_bluebox) + .short WhoaBaby-baseR ; 100 6 0x6006 is an impossible index (bb_disable_bluebox) + .short WhoaBaby-baseR ; 100 7 0x6007 is an impossible index (bb_settaskenv) + + .align 2 ; prepare for code + + +/* Ultra Fast Trap (UFT) Handlers: + * + * We get here directly from the hw syscall vector via the "scTable" vector (above), + * with interrupts and VM off, in 64-bit mode if supported, and with all registers live + * except the following: + * + * r11 = per_proc ptr (ie, sprg0) + * r13 = holds caller's ctr register + * sprg2 = holds caller's r13 + * sprg3 = holds caller's r11 */ -ufpVM: - cmpwi cr2,r0,0x6004 ; Is it vmm_dispatch - bne cr2,notufp ; Exit If not + +; Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs. + +uftVMM: + mtctr r13 ; restore caller's ctr + lwz r11,spcFlags(r11) ; get the special flags word from per_proc + mfcr r13 ; save caller's entire cr (we use all fields below) + rlwinm r11,r11,16,16,31 ; Extract spcFlags upper bits + andi. r11,r11,hi16(runningVM|FamVMena|FamVMmode) + cmpwi cr0,r11,hi16(runningVM|FamVMena|FamVMmode) ; Test in VM FAM + bne-- uftNormal80 ; not eligible for FAM UFTs cmpwi cr5,r3,kvmmResumeGuest ; Compare r3 with kvmmResumeGuest cmpwi cr2,r3,kvmmSetGuestRegister ; Compare r3 with kvmmSetGuestRegister cror cr1_eq,cr5_lt,cr2_gt ; Set true if out of VMM Fast syscall range - bt- cr1_eq,notufp ; Exit if out of range - b EXT(vmm_ufp) ; Ultra Fast Path syscall + bt-- cr1_eq,uftNormalFF ; Exit if out of range (the others are not UFTs) + b EXT(vmm_ufp) ; handle UFT range of vmm_dispatch syscall + +; Handle blue box UFTs (syscalls -1 and -2). + +uftIsPreemptiveTask: +uftIsPreemptiveTaskEnv: + mtctr r13 ; restore caller's ctr + lwz r11,spcFlags(r11) ; get the special flags word from per_proc + mfcr r13,0x80 ; save caller's cr0 so we can use it + andi. r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need + cmplwi r11,bbNoMachSC ; See if we are trapping syscalls + blt-- uftNormal80 ; No... + cmpwi r0,-2 ; is this call IsPreemptiveTaskEnv? + rlwimi r13,r11,bbPreemptivebit-cr0_eq,cr0_eq,cr0_eq ; Copy preemptive task flag into user cr0_eq + mfsprg r11,0 ; Get the per proc once more + bne++ uftRestoreThenRFI ; do not load r0 if IsPreemptiveTask + lwz r0,ppbbTaskEnv(r11) ; Get the shadowed taskEnv (only difference) + b uftRestoreThenRFI ; restore modified cr0 and return + + +; Handle "Thread Info" UFT (0x7FF2) + + .globl EXT(uft_uaw_nop_if_32bit) +uftThreadInfo: + lwz r3,UAW+4(r11) ; get user assist word, assuming a 32-bit processor +LEXT(uft_uaw_nop_if_32bit) + ld r3,UAW(r11) ; get the whole doubleword if 64-bit (patched to nop if 32-bit) + mtctr r13 ; restore caller's ctr + b uftRFI ; done + + +; Handle "Facility Status" UFT (0x7FF3) + +uftFacilityStatus: + lwz r3,spcFlags(r11) ; get "special flags" word from per_proc + mtctr r13 ; restore caller's ctr + b uftRFI ; done + + +; Handle "Load MSR" UFT (0x7FF4). This is not used on 64-bit processors, though it would work. + +uftLoadMSR: + mfsrr1 r11 ; get caller's MSR + mtctr r13 ; restore caller's ctr + mfcr r13,0x80 ; save caller's cr0 so we can test PR + rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel? + bne- uftNormal80 ; do not permit from user mode + mfsprg r11,0 ; restore per_proc + mtsrr1 r3 ; Set new MSR + + +; Return to caller after UFT. When called: +; r11 = per_proc ptr +; r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called) +; sprg2 = callers r13 +; sprg3 = callers r11 + +uftRestoreThenRFI: ; WARNING: can drop down to here + mtcrf 0x80,r13 ; restore caller's cr0 +uftRFI: + .globl EXT(uft_nop_if_32bit) +LEXT(uft_nop_if_32bit) + b uftX64 ; patched to NOP if 32-bit processor + + lwz r11,pfAvailable(r11) ; Get the feature flags + mfsprg r13,2 ; Restore R13 + mtsprg 2,r11 ; Set the feature flags + mfsprg r11,3 ; Restore R11 + rfi ; Back to our guy... + +uftX64: mtspr hsprg0,r14 ; Save a register in a Hypervisor SPRG + ld r14,UAW(r11) ; Get the User Assist DoubleWord + lwz r11,pfAvailable(r11) ; Get the feature flags + mfsprg r13,2 ; Restore R13 + mtsprg 2,r11 ; Set the feature flags + mfsprg r11,3 ; Restore R11 + mtsprg 3,r14 ; Set the UAW in sprg3 + mfspr r14,hsprg0 ; Restore R14 + rfid ; Back to our guy... + + +; Handle a system call that is not a UFT and which thus goes upstairs. + +uftNormalFF: ; here with entire cr in r13 + mtcr r13 ; restore all 8 fields + b uftNormalSyscall1 ; Join common... + +uftNormal80: ; here with callers cr0 in r13 + mtcrf 0x80,r13 ; restore cr0 + b uftNormalSyscall1 ; Join common... + +uftNormalSyscall: ; r13 = callers ctr + mtctr r13 ; restore ctr +uftNormalSyscall1: + li r11,T_SYSTEM_CALL|T_FAM ; this is a system call (and fall through) + + +/*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>*/ /* * .L_exception_entry(type) * - * This is the common exception handling routine called by any - * type of system exception. + * Come here via branch directly from the vector, or falling down from above, with the following + * set up: * - * ENTRY: via a system exception handler, thus interrupts off, VM off. - * r3 has been saved in sprg3 and now contains a number - * representing the exception's origins + * ENTRY: interrupts off, VM off, in 64-bit mode if supported + * Caller's r13 saved in sprg2. + * Caller's r11 saved in sprg3. + * Exception code (ie, T_SYSTEM_CALL etc) in r11. + * All other registers are live. * */ - - .data - .align ALIGN - .globl EXT(exception_entry) -EXT(exception_entry): - .long .L_exception_entry-EXT(ExceptionVectorsStart) /* phys addr of fn */ - - VECTOR_SEGMENT - .align 5 -.L_exception_entry: +.L_exception_entry: ; WARNING: can fall through from UFT handler /* * @@ -762,7 +890,7 @@ EXT(exception_entry): LEXT(extPatch32) - b extEntry64 ; Go do 64-bit (patched out for 32-bit) + b extEntry64 ; Go do 64-bit (patched to a nop if 32-bit) mfsprg r13,0 ; Load per_proc lwz r13,next_savearea+4(r13) ; Get the exception save area stw r0,saver0+4(r13) ; Save register 0 @@ -1248,7 +1376,7 @@ eE64NoNap: crnot wasNapping,cr0_eq ; Remember if we were napping mfsrr1 r3 ; Load srr1 andc r11,r11,r1 ; Clear FAM bit rlwinm. r3,r3,0,MSR_PR_BIT,MSR_PR_BIT ; Are we trapping from supervisor state? - beq+ eEnoFAM ; From supervisor state + beq++ eEnoFAM ; From supervisor state lwz r1,spcFlags(r2) ; Load spcFlags rlwinm r1,r1,1+FamVMmodebit,30,31 ; Extract FamVMenabit and FamVMmodebit cmpwi cr0,r1,2 ; Check FamVMena set without FamVMmode @@ -1450,7 +1578,7 @@ gotTrcEntSF: ld r1,saver1(r13) ; Get back interrupt time R1 rlwinm r9,r9,20,16,23 ; Isolate the special flags ld r18,saver2(r13) ; Get back interrupt time R2 - std r0,LTR_r0(r20) ; Save off register 0 + std r0,LTR_r0(r20) ; Save off register 0 rlwimi r9,r19,0,24,31 ; Slide in the cpu number ld r3,saver3(r13) ; Restore this one sth r9,LTR_cpu(r20) ; Stash the cpu number and special flags @@ -1569,12 +1697,11 @@ Redrive: lwz r0,saver0+4(r13) ; Get back interrupt time syscall number mfsprg r2,0 ; Restore per_proc - li r20,lo16(xcpTable) ; Point to the vector table (note: this must be in 1st 64k of physical memory) + lwz r20,lo16(xcpTable)(r11) ; Get the interrupt handler (note: xcpTable must be in 1st 32k of physical memory) la r12,hwCounts(r2) ; Point to the exception count area andis. r24,r22,hi16(SAVeat) ; Should we eat this one? rlwinm r22,r22,SAVredriveb+1,31,31 ; Get a 1 if we are redriving add r12,r12,r11 ; Point to the count - lwzx r20,r20,r11 ; Get the interrupt handler lwz r25,0(r12) ; Get the old value lwz r23,hwRedrives(r2) ; Get the redrive count crmove cr3_eq,cr0_eq ; Remember if we are ignoring @@ -1582,7 +1709,7 @@ Redrive: mtctr r20 ; Point to the interrupt handler mtcrf 0x80,r0 ; Set our CR0 to the high nybble of possible syscall code add r25,r25,r24 ; Count this one if not a redrive - add r23,r23,r24 ; Count this one if if is a redrive + add r23,r23,r22 ; Count this one if if is a redrive crandc cr0_lt,cr0_lt,cr0_gt ; See if we have R0 equal to 0b10xx...x stw r25,0(r12) ; Store it back stw r23,hwRedrives(r2) ; Save the redrive count @@ -1591,7 +1718,7 @@ Redrive: ; -; Exception vector filter table +; Exception vector filter table (like everything in this file, must be in 1st 32KB of physical memory) ; .align 7 @@ -1648,10 +1775,9 @@ xcpTable: .long WhoaBaby ; T_ARCHDEP0 .long EatRupt ; T_HDEC ; -; Just what the heck happened here???? +; Just what the heck happened here???? +; NB: also get here from UFT dispatch table, on bogus index ; - - .align 5 WhoaBaby: b . ; Open the hood and wait for help @@ -1705,7 +1831,7 @@ FCisok: beq++ cr2,EatRupt ; This is a CutTrace system call, we are done wi LEXT(FCReturn) cmplwi r3,T_IN_VAIN ; Was it handled? - beq+ EatRupt ; Interrupt was handled... + beq++ EatRupt ; Interrupt was handled... mr r11,r3 ; Put the rupt code into the right register b Redrive ; Go through the filter again... @@ -1967,14 +2093,14 @@ xcswNo64: lwz r30,SACvrswap+4(r30) ; get real to virtual translation ; ; Handle machine check here. ; -; +; ? ; .align 5 MachineCheck: - bt++ pf64Bitb,mck64 ; + bt++ pf64Bitb,mck64 ; ? lwz r27,savesrr1+4(r13) ; Pick up srr1 @@ -2442,8 +2568,11 @@ mckL1T: lwz r21,hwMckL1TPE(r2) ; Get TLB parity error count addi r21,r21,1 ; Count it stw r21,hwMckL1TPE(r2) ; Stuff it -ceMck: li r0,1 ; Set the recovered flag before passing up +ceMck: lwz r21,mckFlags(0) ; Get the flags + li r0,1 ; Set the recovered flag before passing up + rlwinm. r21,r21,0,31,31 ; Check if we want to log recoverables stw r0,savemisc3(r13) ; Set it + beq++ EatRupt ; No log of recoverables wanted... b PassUpTrap ; Go up and log error... ueMck: li r0,0 ; Set the unrecovered flag before passing up @@ -2543,7 +2672,7 @@ PassUp: lwz r5,SACvrswap+4(r5) ; Get real to virtual conversion or r21,r21,r3 ; Keep the trace bits if they are on stw r10,SAVflags(r13) ; Set the flags with the cleared redrive flag - mr r3,r11 ; Pass the exception code in the paramter reg + xor r4,r13,r5 ; Pass up the virtual address of context savearea mfsprg r29,0 ; Get the per_proc block back rlwinm r4,r4,0,0,31 ; Clean top half of virtual savearea if 64-bit @@ -3019,7 +3148,7 @@ noPerfMonRestore64: ld r29,saver29(r31) ; Restore R29 lwz r31,pfAvailable(r30) ; Get the feature flags - lwz r30,UAW(r30) ; Get the User Assist Word + ld r30,UAW(r30) ; Get the User Assist DoubleWord mtsprg 2,r31 ; Set the feature flags mfsprg r31,3 ; Restore R31 mtsprg 3,r30 ; Set the UAW @@ -3099,6 +3228,15 @@ eeNoMSRx: xor r31,r31,r4 ; Convert the savearea to physical addressing beq+ EatRupt ; No redrive, just exit... +0: mftbu r2 ; Avoid using an obsolete timestamp for the redrive + mftb r4 + mftbu r0 + cmplw r0,r2 + bne-- 0b + + stw r2,SAVtime(r13) + stw r4,SAVtime+4(r13) + lwz r11,saveexception(r13) ; Restore exception code b Redrive ; Redrive the exception... @@ -3117,22 +3255,23 @@ EXT(ExceptionVectorsEnd): /* Used if relocating the exception vectors */ ; . = 0x5000 - .globl EXT(lowGlo) -EXT(lowGlo): - .ascii "Hagfish " ; 5000 Unique eyecatcher .long 0 ; 5008 Zero .long 0 ; 500C Zero cont... - .long EXT(per_proc_info) ; 5010 pointer to per_procs - .long 0 - .long 0 ; 5018 reserved - .long 0 ; 501C reserved - .long 0 ; 5020 reserved - .long 0 ; 5024 reserved - .long 0 ; 5028 reserved - .long 0 ; 502C reserved - .long 0 ; 5030 reserved + .long EXT(PerProcTable) ; 5010 pointer to per_proc_entry table + .long 0 ; 5014 Zero + + .globl EXT(mckFlags) +EXT(mckFlags): + .long 0 ; 5018 Machine check flags + + .long EXT(version) ; 501C Pointer to kernel version string + .long 0 ; 5020 physical memory window virtual address + .long 0 ; 5024 physical memory window virtual address + .long 0 ; 5028 user memory window virtual address + .long 0 ; 502C user memory window virtual address + .long 0 ; 5030 VMM boot-args forced feature flags .long 0 ; 5034 reserved .long 0 ; 5038 reserved .long 0 ; 503C reserved @@ -3252,7 +3391,6 @@ EXT(saveanchor): ; 5100 saveanchor .globl EXT(dgWork) EXT(dgWork): - .long 0 ; 5200 dgLock .long 0 ; 5204 dgFlags .long 0 ; 5208 dgMisc0 @@ -3261,8 +3399,10 @@ EXT(dgWork): .long 0 ; 5214 dgMisc3 .long 0 ; 5218 dgMisc4 .long 0 ; 521C dgMisc5 - - .long 0 ; 5220 reserved + + .globl EXT(LcksOpts) +EXT(LcksOpts): + .long 0 ; 5220 lcksWork .long 0 ; 5224 reserved .long 0 ; 5228 reserved .long 0 ; 522C reserved @@ -3287,14 +3427,17 @@ EXT(dgWork): .long 0 ; 5278 reserved .long 0 ; 527C reserved - .long 0 ; 5280 reserved - .long 0 ; 5284 reserved - .long 0 ; 5288 reserved - .long 0 ; 528C reserved - .long 0 ; 5290 reserved - .long 0 ; 5294 reserved - .long 0 ; 5298 reserved - .long 0 ; 529C reserved + .globl EXT(pPcfg) +EXT(pPcfg): + .long 0x80000000 | (12 << 8) | 12 ; 5280 pcfDefPcfg - 4k + .long 0 ; 5284 pcfLargePcfg + .long 0 ; 5288 Non-primary page configurations + .long 0 ; 528C Non-primary page configurations + .long 0 ; 5290 Non-primary page configurations + .long 0 ; 5294 Non-primary page configurations + .long 0 ; 5298 Non-primary page configurations + .long 0 ; 529C Non-primary page configurations + .long 0 ; 52A0 reserved .long 0 ; 52A4 reserved .long 0 ; 52A8 reserved diff --git a/osfmk/ppc/mach_param.h b/osfmk/ppc/mach_param.h deleted file mode 100644 index 49ac015de..000000000 --- a/osfmk/ppc/mach_param.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Machine-dependent parameters for ppc. - */ - -#define HZ (100) /* clock tick each 10 ms. */ diff --git a/osfmk/ppc/machine_cpu.h b/osfmk/ppc/machine_cpu.h index fcd5f1048..02c5d403e 100644 --- a/osfmk/ppc/machine_cpu.h +++ b/osfmk/ppc/machine_cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,27 +27,15 @@ #include #include -void cpu_machine_init( +extern void cpu_machine_init( void); -kern_return_t cpu_register( - int *); - -kern_return_t cpu_start( - int); - -void cpu_doshutdown( +extern void cpu_doshutdown( void); -void cpu_sleep( - void); - -void cpu_signal_handler( +extern void cpu_signal_handler( void); -void cpu_exit_wait( - int); - typedef void (*broadcastFunc) (int); int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t); diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c index bc7b6dd03..e424bed03 100644 --- a/osfmk/ppc/machine_routines.c +++ b/osfmk/ppc/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,29 +19,122 @@ * * @APPLE_LICENSE_HEADER_END@ */ + +#include + #include -#include +#include #include +#include #include +#include #include -#include #include +#include +#include #include +#include #include +#include -unsigned int max_cpus_initialized = 0; -unsigned int LockTimeOut = 12500000; -unsigned int MutexSpin = 0; -extern int forcenap; +#include + +unsigned int LockTimeOut = 12500000; +unsigned int MutexSpin = 0; + +decl_mutex_data(static,mcpus_lock); +unsigned int mcpus_lock_initialized = 0; +unsigned int mcpus_state = 0; -decl_simple_lock_data(, spsLock); -unsigned int spsLockInit = 0; uint32_t warFlags = 0; #define warDisMBpoff 0x80000000 -#define MAX_CPUS_SET 0x1 -#define MAX_CPUS_WAIT 0x2 +#define MAX_CPUS_SET 0x01 +#define MAX_CPUS_WAIT 0x02 -boolean_t get_interrupts_enabled(void); +decl_simple_lock_data(, spsLock); +unsigned int spsLockInit = 0; + +extern unsigned int hwllckPatch_isync; +extern unsigned int hwulckPatch_isync; +extern unsigned int hwulckbPatch_isync; +extern unsigned int hwlmlckPatch_isync; +extern unsigned int hwltlckPatch_isync; +extern unsigned int hwcsatomicPatch_isync; +extern unsigned int mlckePatch_isync; +extern unsigned int mlckPatch_isync; +extern unsigned int mltelckPatch_isync; +extern unsigned int mltlckPatch_isync; +extern unsigned int mulckePatch_isync; +extern unsigned int mulckPatch_isync; +extern unsigned int slckPatch_isync; +extern unsigned int stlckPatch_isync; +extern unsigned int sulckPatch_isync; +extern unsigned int rwlePatch_isync; +extern unsigned int rwlsPatch_isync; +extern unsigned int rwlsePatch_isync; +extern unsigned int rwlesPatch_isync; +extern unsigned int rwtlePatch_isync; +extern unsigned int rwtlsPatch_isync; +extern unsigned int rwldPatch_isync; +extern unsigned int hwulckPatch_eieio; +extern unsigned int mulckPatch_eieio; +extern unsigned int mulckePatch_eieio; +extern unsigned int sulckPatch_eieio; +extern unsigned int rwlesPatch_eieio; +extern unsigned int rwldPatch_eieio; +#if !MACH_LDEBUG +extern unsigned int entfsectPatch_isync; +extern unsigned int retfsectPatch_isync; +extern unsigned int retfsectPatch_eieio; +#endif + +struct patch_up { + unsigned int *addr; + unsigned int data; +}; + +typedef struct patch_up patch_up_t; + +patch_up_t patch_up_table[] = { + {&hwllckPatch_isync, 0x60000000}, + {&hwulckPatch_isync, 0x60000000}, + {&hwulckbPatch_isync, 0x60000000}, + {&hwlmlckPatch_isync, 0x60000000}, + {&hwltlckPatch_isync, 0x60000000}, + {&hwcsatomicPatch_isync, 0x60000000}, + {&mlckePatch_isync, 0x60000000}, + {&mlckPatch_isync, 0x60000000}, + {&mltelckPatch_isync, 0x60000000}, + {&mltlckPatch_isync, 0x60000000}, + {&mulckePatch_isync, 0x60000000}, + {&mulckPatch_isync, 0x60000000}, + {&slckPatch_isync, 0x60000000}, + {&stlckPatch_isync, 0x60000000}, + {&sulckPatch_isync, 0x60000000}, + {&rwlePatch_isync, 0x60000000}, + {&rwlsPatch_isync, 0x60000000}, + {&rwlsePatch_isync, 0x60000000}, + {&rwlesPatch_isync, 0x60000000}, + {&rwtlePatch_isync, 0x60000000}, + {&rwtlsPatch_isync, 0x60000000}, + {&rwldPatch_isync, 0x60000000}, + {&hwulckPatch_eieio, 0x60000000}, + {&hwulckPatch_eieio, 0x60000000}, + {&mulckPatch_eieio, 0x60000000}, + {&mulckePatch_eieio, 0x60000000}, + {&sulckPatch_eieio, 0x60000000}, + {&rwlesPatch_eieio, 0x60000000}, + {&rwldPatch_eieio, 0x60000000}, +#if !MACH_LDEBUG + {&entfsectPatch_isync, 0x60000000}, + {&retfsectPatch_isync, 0x60000000}, + {&retfsectPatch_eieio, 0x60000000}, +#endif + {NULL, 0x00000000} +}; + +extern int forcenap; +extern boolean_t pmap_initialized; /* Map memory map IO space */ vm_offset_t @@ -52,29 +145,33 @@ ml_io_map( return(io_map(phys_addr,size)); } -/* static memory allocation */ +/* + * Routine: ml_static_malloc + * Function: static memory allocation + */ vm_offset_t ml_static_malloc( vm_size_t size) { - extern vm_offset_t static_memory_end; - extern boolean_t pmap_initialized; vm_offset_t vaddr; if (pmap_initialized) return((vm_offset_t)NULL); else { vaddr = static_memory_end; - static_memory_end = round_page_32(vaddr+size); + static_memory_end = round_page(vaddr+size); return(vaddr); } } +/* + * Routine: ml_static_ptovirt + * Function: + */ vm_offset_t ml_static_ptovirt( vm_offset_t paddr) { - extern vm_offset_t static_memory_end; vm_offset_t vaddr; /* Static memory is map V=R */ @@ -85,6 +182,10 @@ ml_static_ptovirt( return((vm_offset_t)NULL); } +/* + * Routine: ml_static_mfree + * Function: + */ void ml_static_mfree( vm_offset_t vaddr, @@ -104,14 +205,20 @@ ml_static_mfree( } } -/* virtual to physical on wired pages */ +/* + * Routine: ml_vtophys + * Function: virtual to physical on static pages + */ vm_offset_t ml_vtophys( vm_offset_t vaddr) { return(pmap_extract(kernel_pmap, vaddr)); } -/* Initialize Interrupt Handler */ +/* + * Routine: ml_install_interrupt_handler + * Function: Initialize Interrupt Handler + */ void ml_install_interrupt_handler( void *nub, int source, @@ -119,70 +226,83 @@ void ml_install_interrupt_handler( IOInterruptHandler handler, void *refCon) { - int current_cpu; - boolean_t current_state; + struct per_proc_info *proc_info; + boolean_t current_state; - current_cpu = cpu_number(); current_state = ml_get_interrupts_enabled(); + proc_info = getPerProc(); - per_proc_info[current_cpu].interrupt_nub = nub; - per_proc_info[current_cpu].interrupt_source = source; - per_proc_info[current_cpu].interrupt_target = target; - per_proc_info[current_cpu].interrupt_handler = handler; - per_proc_info[current_cpu].interrupt_refCon = refCon; + proc_info->interrupt_nub = nub; + proc_info->interrupt_source = source; + proc_info->interrupt_target = target; + proc_info->interrupt_handler = handler; + proc_info->interrupt_refCon = refCon; - per_proc_info[current_cpu].interrupts_enabled = TRUE; + proc_info->interrupts_enabled = TRUE; (void) ml_set_interrupts_enabled(current_state); initialize_screen(0, kPEAcquireScreen); } -/* Initialize Interrupts */ +/* + * Routine: ml_init_interrupt + * Function: Initialize Interrupts + */ void ml_init_interrupt(void) { - int current_cpu; boolean_t current_state; current_state = ml_get_interrupts_enabled(); - current_cpu = cpu_number(); - per_proc_info[current_cpu].interrupts_enabled = TRUE; + getPerProc()->interrupts_enabled = TRUE; (void) ml_set_interrupts_enabled(current_state); } -/* Get Interrupts Enabled */ +/* + * Routine: ml_get_interrupts_enabled + * Function: Get Interrupts Enabled + */ boolean_t ml_get_interrupts_enabled(void) { return((mfmsr() & MASK(MSR_EE)) != 0); } -/* Check if running at interrupt context */ +/* + * Routine: ml_at_interrupt_context + * Function: Check if running at interrupt context + */ boolean_t ml_at_interrupt_context(void) { boolean_t ret; boolean_t current_state; current_state = ml_set_interrupts_enabled(FALSE); - ret = (per_proc_info[cpu_number()].istackptr == 0); + ret = (getPerProc()->istackptr == 0); ml_set_interrupts_enabled(current_state); return(ret); } -/* Generate a fake interrupt */ +/* + * Routine: ml_cause_interrupt + * Function: Generate a fake interrupt + */ void ml_cause_interrupt(void) { CreateFakeIO(); } +/* + * Routine: ml_thread_policy + * Function: + */ void ml_thread_policy( thread_t thread, unsigned policy_id, unsigned policy_info) { - extern int srv; if ((policy_id == MACHINE_GROUP) && - ((per_proc_info[0].pf.Available) & pfSMPcap)) + ((PerProcTable[master_cpu].ppe_vaddr->pf.Available) & pfSMPcap)) thread_bind(thread, master_processor); if (policy_info & MACHINE_NETWORK_WORKLOOP) { @@ -190,8 +310,6 @@ void ml_thread_policy( thread_lock(thread); - if (srv == 0) - thread->sched_mode |= TH_MODE_FORCEDPREEMPT; set_priority(thread, thread->priority + 1); thread_unlock(thread); @@ -199,192 +317,282 @@ void ml_thread_policy( } } -void machine_idle(void) -{ - struct per_proc_info *ppinfo; - - ppinfo = getPerProc(); - - if ((ppinfo->interrupts_enabled == TRUE) && - (ppinfo->cpu_flags & SignalReady)) { /* Check to see if we are allowed to nap */ - int cur_decr; - - machine_idle_ppc(); - /* - * protect against a lost decrementer trap - * if the current decrementer value is negative - * by more than 10 ticks, re-arm it since it's - * unlikely to fire at this point... a hardware - * interrupt got us out of machine_idle and may - * also be contributing to this state - */ - cur_decr = isync_mfdec(); - - if (cur_decr < -10) { - mtdec(1); - } - } - else { - (void) ml_set_interrupts_enabled(TRUE); /* Enable for interruptions even if nap is not allowed */ - } -} - +/* + * Routine: machine_signal_idle + * Function: + */ void machine_signal_idle( processor_t processor) { - if (per_proc_info[processor->slot_num].pf.Available & (pfCanDoze|pfWillNap)) - (void)cpu_signal(processor->slot_num, SIGPwake, 0, 0); + struct per_proc_info *proc_info; + + proc_info = PROCESSOR_TO_PER_PROC(processor); + + if (proc_info->pf.Available & (pfCanDoze|pfWillNap)) + (void)cpu_signal(proc_info->cpu_number, SIGPwake, 0, 0); } +/* + * Routine: ml_processor_register + * Function: + */ kern_return_t ml_processor_register( - ml_processor_info_t *processor_info, - processor_t *processor, - ipi_handler_t *ipi_handler) + ml_processor_info_t *in_processor_info, + processor_t *processor_out, + ipi_handler_t *ipi_handler) { - kern_return_t ret; - int target_cpu, cpu; - int donap; + struct per_proc_info *proc_info; + int donap; + boolean_t current_state; + boolean_t boot_processor; - if (processor_info->boot_cpu == FALSE) { + if (in_processor_info->boot_cpu == FALSE) { if (spsLockInit == 0) { spsLockInit = 1; simple_lock_init(&spsLock, 0); - } - if (cpu_register(&target_cpu) != KERN_SUCCESS) + } + boot_processor = FALSE; + proc_info = cpu_per_proc_alloc(); + if (proc_info == (struct per_proc_info *)NULL) return KERN_FAILURE; + proc_info->pp_cbfr = console_per_proc_alloc(FALSE); + if (proc_info->pp_cbfr == (void *)NULL) + goto processor_register_error; } else { - /* boot_cpu is always 0 */ - target_cpu = 0; + boot_processor = TRUE; + proc_info = PerProcTable[master_cpu].ppe_vaddr; } - per_proc_info[target_cpu].cpu_id = processor_info->cpu_id; - per_proc_info[target_cpu].start_paddr = processor_info->start_paddr; + proc_info->pp_chud = chudxnu_per_proc_alloc(boot_processor); + if (proc_info->pp_chud == (void *)NULL) + goto processor_register_error; + + if (!boot_processor) + if (cpu_per_proc_register(proc_info) != KERN_SUCCESS) + goto processor_register_error; + + proc_info->cpu_id = in_processor_info->cpu_id; + proc_info->start_paddr = in_processor_info->start_paddr; + if(in_processor_info->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) + proc_info->time_base_enable = in_processor_info->time_base_enable; + else + proc_info->time_base_enable = (void(*)(cpu_id_t, boolean_t ))NULL; - if (per_proc_info[target_cpu].pf.pfPowerModes & pmPowerTune) { - per_proc_info[target_cpu].pf.pfPowerTune0 = processor_info->power_mode_0; - per_proc_info[target_cpu].pf.pfPowerTune1 = processor_info->power_mode_1; + if (proc_info->pf.pfPowerModes & pmPowerTune) { + proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0; + proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1; } - donap = processor_info->supports_nap; /* Assume we use requested nap */ - if(forcenap) donap = forcenap - 1; /* If there was an override, use that */ - - if(per_proc_info[target_cpu].pf.Available & pfCanNap) - if(donap) - per_proc_info[target_cpu].pf.Available |= pfWillNap; + donap = in_processor_info->supports_nap; /* Assume we use requested nap */ + if(forcenap) donap = forcenap - 1; /* If there was an override, use that */ - if(processor_info->time_base_enable != (void(*)(cpu_id_t, boolean_t ))NULL) - per_proc_info[target_cpu].time_base_enable = processor_info->time_base_enable; - else - per_proc_info[target_cpu].time_base_enable = (void(*)(cpu_id_t, boolean_t ))NULL; - - if(target_cpu == cpu_number()) - __asm__ volatile("mtsprg 2,%0" : : "r" (per_proc_info[target_cpu].pf.Available)); /* Set live value */ + if((proc_info->pf.Available & pfCanNap) + && (donap)) { + proc_info->pf.Available |= pfWillNap; + current_state = ml_set_interrupts_enabled(FALSE); + if(proc_info == getPerProc()) + __asm__ volatile("mtsprg 2,%0" : : "r" (proc_info->pf.Available)); /* Set live value */ + (void) ml_set_interrupts_enabled(current_state); + } - *processor = cpu_to_processor(target_cpu); + if (!boot_processor) { + (void)hw_atomic_add((uint32_t *)&saveanchor.savetarget, FreeListMin); /* saveareas for this processor */ + processor_init((struct processor *)proc_info->processor, proc_info->cpu_number); + } + + *processor_out = (struct processor *)proc_info->processor; *ipi_handler = cpu_signal_handler; return KERN_SUCCESS; + +processor_register_error: + if (proc_info->pp_cbfr != (void *)NULL) + console_per_proc_free(proc_info->pp_cbfr); + if (proc_info->pp_chud != (void *)NULL) + chudxnu_per_proc_free(proc_info->pp_chud); + if (!boot_processor) + cpu_per_proc_free(proc_info); + return KERN_FAILURE; } +/* + * Routine: ml_enable_nap + * Function: + */ boolean_t ml_enable_nap(int target_cpu, boolean_t nap_enabled) { - boolean_t prev_value = (per_proc_info[target_cpu].pf.Available & pfCanNap) && (per_proc_info[target_cpu].pf.Available & pfWillNap); + struct per_proc_info *proc_info; + boolean_t prev_value; + boolean_t current_state; + + proc_info = PerProcTable[target_cpu].ppe_vaddr; + + prev_value = (proc_info->pf.Available & pfCanNap) && (proc_info->pf.Available & pfWillNap); - if(forcenap) nap_enabled = forcenap - 1; /* If we are to force nap on or off, do it */ + if(forcenap) nap_enabled = forcenap - 1; /* If we are to force nap on or off, do it */ - if(per_proc_info[target_cpu].pf.Available & pfCanNap) { /* Can the processor nap? */ - if (nap_enabled) per_proc_info[target_cpu].pf.Available |= pfWillNap; /* Is nap supported on this machine? */ - else per_proc_info[target_cpu].pf.Available &= ~pfWillNap; /* Clear if not */ + if(proc_info->pf.Available & pfCanNap) { /* Can the processor nap? */ + if (nap_enabled) proc_info->pf.Available |= pfWillNap; /* Is nap supported on this machine? */ + else proc_info->pf.Available &= ~pfWillNap; /* Clear if not */ } - if(target_cpu == cpu_number()) - __asm__ volatile("mtsprg 2,%0" : : "r" (per_proc_info[target_cpu].pf.Available)); /* Set live value */ + current_state = ml_set_interrupts_enabled(FALSE); + if(proc_info == getPerProc()) + __asm__ volatile("mtsprg 2,%0" : : "r" (proc_info->pf.Available)); /* Set live value */ + (void) ml_set_interrupts_enabled(current_state); return (prev_value); } +/* + * Routine: ml_init_max_cpus + * Function: + */ void -ml_init_max_cpus(unsigned long max_cpus) +ml_init_max_cpus(unsigned int mcpus) { - boolean_t current_state; - current_state = ml_set_interrupts_enabled(FALSE); - if (max_cpus_initialized != MAX_CPUS_SET) { - if (max_cpus > 0 && max_cpus < NCPUS) - machine_info.max_cpus = max_cpus; - if (max_cpus_initialized == MAX_CPUS_WAIT) - wakeup((event_t)&max_cpus_initialized); - max_cpus_initialized = MAX_CPUS_SET; + if (hw_compare_and_store(0,1,&mcpus_lock_initialized)) + mutex_init(&mcpus_lock,0); + mutex_lock(&mcpus_lock); + if ((mcpus_state & MAX_CPUS_SET) + || (mcpus == 0) + || (mcpus > MAX_CPUS)) + panic("ml_init_max_cpus(): Invalid call, max_cpus: %d\n", mcpus); + + machine_info.max_cpus = mcpus; + machine_info.physical_cpu_max = mcpus; + machine_info.logical_cpu_max = mcpus; + mcpus_state |= MAX_CPUS_SET; + + if (mcpus_state & MAX_CPUS_WAIT) { + mcpus_state |= ~MAX_CPUS_WAIT; + thread_wakeup((event_t)&mcpus_state); + } + mutex_unlock(&mcpus_lock); + + if (machine_info.logical_cpu_max == 1) { + struct patch_up *patch_up_ptr; + boolean_t current_state; + + patch_up_ptr = &patch_up_table[0]; + + current_state = ml_set_interrupts_enabled(FALSE); + while (patch_up_ptr->addr != NULL) { + /* + * Patch for V=R kernel text section + */ + bcopy_phys((addr64_t)((unsigned int)(&patch_up_ptr->data)), + (addr64_t)((unsigned int)(patch_up_ptr->addr)), 4); + sync_cache64((addr64_t)((unsigned int)(patch_up_ptr->addr)),4); + patch_up_ptr++; + } + (void) ml_set_interrupts_enabled(current_state); } - (void) ml_set_interrupts_enabled(current_state); } -int +/* + * Routine: ml_get_max_cpus + * Function: + */ +unsigned int ml_get_max_cpus(void) { - boolean_t current_state; - - current_state = ml_set_interrupts_enabled(FALSE); - if (max_cpus_initialized != MAX_CPUS_SET) { - max_cpus_initialized = MAX_CPUS_WAIT; - assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT); - (void)thread_block(THREAD_CONTINUE_NULL); - } - (void) ml_set_interrupts_enabled(current_state); + if (hw_compare_and_store(0,1,&mcpus_lock_initialized)) + mutex_init(&mcpus_lock,0); + mutex_lock(&mcpus_lock); + if (!(mcpus_state & MAX_CPUS_SET)) { + mcpus_state |= MAX_CPUS_WAIT; + thread_sleep_mutex((event_t)&mcpus_state, + &mcpus_lock, THREAD_UNINT); + } else + mutex_unlock(&mcpus_lock); return(machine_info.max_cpus); } +/* + * This is called from the machine-independent routine cpu_up() + * to perform machine-dependent info updates. + */ +void +ml_cpu_up(void) +{ + hw_atomic_add(&machine_info.physical_cpu, 1); + hw_atomic_add(&machine_info.logical_cpu, 1); +} + +/* + * This is called from the machine-independent routine cpu_down() + * to perform machine-dependent info updates. + */ void -ml_cpu_get_info(ml_cpu_info_t *cpu_info) +ml_cpu_down(void) { - if (cpu_info == 0) return; + hw_atomic_sub(&machine_info.physical_cpu, 1); + hw_atomic_sub(&machine_info.logical_cpu, 1); +} + +/* + * Routine: ml_cpu_get_info + * Function: + */ +void +ml_cpu_get_info(ml_cpu_info_t *ml_cpu_info) +{ + struct per_proc_info *proc_info; + + if (ml_cpu_info == 0) return; - cpu_info->vector_unit = (per_proc_info[0].pf.Available & pfAltivec) != 0; - cpu_info->cache_line_size = per_proc_info[0].pf.lineSize; - cpu_info->l1_icache_size = per_proc_info[0].pf.l1iSize; - cpu_info->l1_dcache_size = per_proc_info[0].pf.l1dSize; + proc_info = PerProcTable[master_cpu].ppe_vaddr; + ml_cpu_info->vector_unit = (proc_info->pf.Available & pfAltivec) != 0; + ml_cpu_info->cache_line_size = proc_info->pf.lineSize; + ml_cpu_info->l1_icache_size = proc_info->pf.l1iSize; + ml_cpu_info->l1_dcache_size = proc_info->pf.l1dSize; - if (per_proc_info[0].pf.Available & pfL2) { - cpu_info->l2_settings = per_proc_info[0].pf.l2cr; - cpu_info->l2_cache_size = per_proc_info[0].pf.l2Size; + if (proc_info->pf.Available & pfL2) { + ml_cpu_info->l2_settings = proc_info->pf.l2cr; + ml_cpu_info->l2_cache_size = proc_info->pf.l2Size; } else { - cpu_info->l2_settings = 0; - cpu_info->l2_cache_size = 0xFFFFFFFF; + ml_cpu_info->l2_settings = 0; + ml_cpu_info->l2_cache_size = 0xFFFFFFFF; } - if (per_proc_info[0].pf.Available & pfL3) { - cpu_info->l3_settings = per_proc_info[0].pf.l3cr; - cpu_info->l3_cache_size = per_proc_info[0].pf.l3Size; + if (proc_info->pf.Available & pfL3) { + ml_cpu_info->l3_settings = proc_info->pf.l3cr; + ml_cpu_info->l3_cache_size = proc_info->pf.l3Size; } else { - cpu_info->l3_settings = 0; - cpu_info->l3_cache_size = 0xFFFFFFFF; + ml_cpu_info->l3_settings = 0; + ml_cpu_info->l3_cache_size = 0xFFFFFFFF; } } +/* + * Routine: ml_enable_cache_level + * Function: + */ #define l2em 0x80000000 #define l3em 0x80000000 - -extern int real_ncpus; - int ml_enable_cache_level(int cache_level, int enable) { int old_mode; unsigned long available, ccr; + struct per_proc_info *proc_info; - if (real_ncpus != 1) return -1; + if (real_ncpus != 1) return -1; /* XXX: This test is not safe */ - available = per_proc_info[0].pf.Available; + proc_info = PerProcTable[master_cpu].ppe_vaddr; + available = proc_info->pf.Available; if ((cache_level == 2) && (available & pfL2)) { - ccr = per_proc_info[0].pf.l2cr; + ccr = proc_info->pf.l2cr; old_mode = (ccr & l2em) ? TRUE : FALSE; if (old_mode != enable) { - if (enable) ccr = per_proc_info[0].pf.l2crOriginal; + if (enable) ccr = proc_info->pf.l2crOriginal; else ccr = 0; - per_proc_info[0].pf.l2cr = ccr; + proc_info->pf.l2cr = ccr; cacheInit(); } @@ -392,12 +600,12 @@ ml_enable_cache_level(int cache_level, int enable) } if ((cache_level == 3) && (available & pfL3)) { - ccr = per_proc_info[0].pf.l3cr; + ccr = proc_info->pf.l3cr; old_mode = (ccr & l3em) ? TRUE : FALSE; if (old_mode != enable) { - if (enable) ccr = per_proc_info[0].pf.l3crOriginal; + if (enable) ccr = proc_info->pf.l3crOriginal; else ccr = 0; - per_proc_info[0].pf.l3cr = ccr; + proc_info->pf.l3cr = ccr; cacheInit(); } @@ -407,6 +615,9 @@ ml_enable_cache_level(int cache_level, int enable) return -1; } + +decl_simple_lock_data(, spsLock); + /* * Routine: ml_set_processor_speed * Function: @@ -414,19 +625,15 @@ ml_enable_cache_level(int cache_level, int enable) void ml_set_processor_speed(unsigned long speed) { - struct per_proc_info *proc_info; - uint32_t powerModes, cpu; - kern_return_t result; - boolean_t current_state; - unsigned int i; - - extern void ml_set_processor_speed_slave(unsigned long speed); - extern void ml_set_processor_speed_dpll(unsigned long speed); - extern void ml_set_processor_speed_dfs(unsigned long speed); - extern void ml_set_processor_speed_powertune(unsigned long speed); - - powerModes = per_proc_info[0].pf.pfPowerModes; + struct per_proc_info *proc_info; + uint32_t powerModes, cpu; + kern_return_t result; + boolean_t current_state; + unsigned int i; + proc_info = PerProcTable[master_cpu].ppe_vaddr; + powerModes = proc_info->pf.pfPowerModes; + if (powerModes & pmDualPLL) { ml_set_processor_speed_dpll(speed); @@ -441,8 +648,7 @@ ml_set_processor_speed(unsigned long speed) for (i=200; i>0; i--) { current_state = ml_set_interrupts_enabled(FALSE); if (cpu != cpu_number()) { - if(!((machine_slot[cpu].running) && - (per_proc_info[cpu].cpu_flags & SignalReady))) + if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady) /* * Target cpu is off-line, skip */ @@ -480,11 +686,9 @@ ml_set_processor_speed(unsigned long speed) void ml_set_processor_speed_slave(unsigned long speed) { - extern void ml_set_processor_speed_dfs(unsigned long speed); - ml_set_processor_speed_dfs(speed); - simple_lock(&spsLock); + simple_lock(&spsLock); thread_wakeup(&spsLock); simple_unlock(&spsLock); } @@ -507,66 +711,92 @@ ml_init_lock_timeout(void) mtxspin = USEC_PER_SEC>>4; nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime); } else { - nanoseconds_to_absolutetime(20*NSEC_PER_USEC, &abstime); + nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime); } MutexSpin = (unsigned int)abstime; } +/* + * Routine: init_ast_check + * Function: + */ void -init_ast_check(processor_t processor) +init_ast_check( + __unused processor_t processor) {} - + +/* + * Routine: cause_ast_check + * Function: + */ void cause_ast_check( processor_t processor) { - if ( processor != current_processor() && - per_proc_info[processor->slot_num].interrupts_enabled == TRUE ) - cpu_signal(processor->slot_num, SIGPast, NULL, NULL); + struct per_proc_info *proc_info; + + proc_info = PROCESSOR_TO_PER_PROC(processor); + + if (proc_info != getPerProc() + && proc_info->interrupts_enabled == TRUE) + cpu_signal(proc_info->cpu_number, SIGPast, (unsigned int)NULL, (unsigned int)NULL); } +/* + * Routine: machine_processor_shutdown + * Function: + */ thread_t -switch_to_shutdown_context( - thread_t thread, - void (*doshutdown)(processor_t), - processor_t processor) +machine_processor_shutdown( + __unused thread_t thread, + __unused void (*doshutdown)(processor_t), + __unused processor_t processor) { CreateShutdownCTX(); - return((thread_t)(per_proc_info[cpu_number()].old_thread)); + return((thread_t)(getPerProc()->old_thread)); } +/* + * Routine: set_be_bit + * Function: + */ int -set_be_bit() +set_be_bit( + void) { - - int mycpu; boolean_t current_state; - current_state = ml_set_interrupts_enabled(FALSE); /* Can't allow interruptions when mucking with per_proc flags */ - mycpu = cpu_number(); - per_proc_info[mycpu].cpu_flags |= traceBE; + current_state = ml_set_interrupts_enabled(FALSE); + getPerProc()->cpu_flags |= traceBE; (void) ml_set_interrupts_enabled(current_state); return(1); } +/* + * Routine: clr_be_bit + * Function: + */ int -clr_be_bit() +clr_be_bit( + void) { - int mycpu; boolean_t current_state; - current_state = ml_set_interrupts_enabled(FALSE); /* Can't allow interruptions when mucking with per_proc flags */ - mycpu = cpu_number(); - per_proc_info[mycpu].cpu_flags &= ~traceBE; + current_state = ml_set_interrupts_enabled(FALSE); + getPerProc()->cpu_flags &= ~traceBE; (void) ml_set_interrupts_enabled(current_state); return(1); } +/* + * Routine: be_tracing + * Function: + */ int -be_tracing() +be_tracing( + void) { - int mycpu = cpu_number(); - return(per_proc_info[mycpu].cpu_flags & traceBE); + return(getPerProc()->cpu_flags & traceBE); } diff --git a/osfmk/ppc/machine_routines.h b/osfmk/ppc/machine_routines.h index c274705ec..02850ad89 100644 --- a/osfmk/ppc/machine_routines.h +++ b/osfmk/ppc/machine_routines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -29,22 +29,25 @@ #include #include #include -#include #include -#include - /* Get Interrupts Enabled */ -boolean_t ml_get_interrupts_enabled(void); +extern boolean_t ml_get_interrupts_enabled( + void); /* Set Interrupts Enabled */ -boolean_t ml_set_interrupts_enabled(boolean_t enable); +extern boolean_t ml_set_interrupts_enabled( + boolean_t enable); /* Check if running at interrupt context */ -boolean_t ml_at_interrupt_context(void); +extern boolean_t ml_at_interrupt_context( + void); + +#ifdef KERNEL_PRIVATE /* Generate a fake interrupt */ -void ml_cause_interrupt(void); +extern void ml_cause_interrupt( + void); /* Type for the IPI Hander */ typedef void (*ipi_handler_t)(void); @@ -53,102 +56,126 @@ typedef void (*ipi_handler_t)(void); typedef void (*time_base_enable_t)(cpu_id_t cpu_id, boolean_t enable); /* enables (or disables) the processor nap mode the function returns the previous value*/ -boolean_t ml_enable_nap( - int target_cpu, - boolean_t nap_enabled); +extern boolean_t ml_enable_nap( + int target_cpu, + boolean_t nap_enabled); /* Put the processor to sleep */ -void ml_ppc_sleep(void); -void ml_get_timebase(unsigned long long *timstamp); -void ml_sense__nmi(void); +extern void ml_ppc_sleep( + void); + +extern void ml_get_timebase( + unsigned long long *timstamp); -int ml_enable_cache_level(int cache_level, int enable); +extern int ml_enable_cache_level( + int cache_level, + int enable); -void ml_static_mfree( - vm_offset_t, - vm_size_t); +extern void ml_static_mfree( + vm_offset_t vaddr, + vm_size_t size); /* Init Interrupts */ -void ml_install_interrupt_handler( - void *nub, - int source, - void *target, - IOInterruptHandler handler, - void *refCon); +extern void ml_install_interrupt_handler( + void *nub, + int source, + void *target, + IOInterruptHandler handler, + void *refCon); -#ifdef __APPLE_API_UNSTABLE - -vm_offset_t -ml_static_ptovirt( - vm_offset_t); +extern vm_offset_t ml_static_ptovirt( + vm_offset_t paddr); /* virtual to physical on wired pages */ -vm_offset_t ml_vtophys( - vm_offset_t vaddr); +extern vm_offset_t ml_vtophys( + vm_offset_t vaddr); /* PCI config cycle probing */ -boolean_t ml_probe_read( - vm_offset_t paddr, - unsigned int *val); -boolean_t ml_probe_read_64( - addr64_t paddr, - unsigned int *val); +extern boolean_t ml_probe_read( + vm_offset_t paddr, + unsigned int *val); + +extern boolean_t ml_probe_read_64( + addr64_t paddr, + unsigned int *val); /* Read physical address byte */ -unsigned int ml_phys_read_byte( - vm_offset_t paddr); -unsigned int ml_phys_read_byte_64( - addr64_t paddr); +extern unsigned int ml_phys_read_byte( + vm_offset_t paddr); + +extern unsigned int ml_phys_read_byte_64( + addr64_t paddr); /* Read physical address half word */ -unsigned int ml_phys_read_half( - vm_offset_t paddr); -unsigned int ml_phys_read_half_64( - addr64_t paddr); +extern unsigned int ml_phys_read_half( + vm_offset_t paddr); + +extern unsigned int ml_phys_read_half_64( + addr64_t paddr); /* Read physical address word*/ -unsigned int ml_phys_read( - vm_offset_t paddr); -unsigned int ml_phys_read_64( - addr64_t paddr); -unsigned int ml_phys_read_word( - vm_offset_t paddr); -unsigned int ml_phys_read_word_64( - addr64_t paddr); +extern unsigned int ml_phys_read( + vm_offset_t paddr); + +extern unsigned int ml_phys_read_64( + addr64_t paddr); + +extern unsigned int ml_phys_read_word( + vm_offset_t paddr); + +extern unsigned int ml_phys_read_word_64( + addr64_t paddr); /* Read physical address double word */ -unsigned long long ml_phys_read_double( - vm_offset_t paddr); -unsigned long long ml_phys_read_double_64( - addr64_t paddr); +extern unsigned long long ml_phys_read_double( + vm_offset_t paddr); + +extern unsigned long long ml_phys_read_double_64( + addr64_t paddr); /* Write physical address byte */ -void ml_phys_write_byte( - vm_offset_t paddr, unsigned int data); -void ml_phys_write_byte_64( - addr64_t paddr, unsigned int data); +extern void ml_phys_write_byte( + vm_offset_t paddr, + unsigned int data); + +extern void ml_phys_write_byte_64( + addr64_t paddr, + unsigned int data); /* Write physical address half word */ -void ml_phys_write_half( - vm_offset_t paddr, unsigned int data); -void ml_phys_write_half_64( - addr64_t paddr, unsigned int data); +extern void ml_phys_write_half( + vm_offset_t paddr, + unsigned int data); + +extern void ml_phys_write_half_64( + addr64_t paddr, + unsigned int data); /* Write physical address word */ -void ml_phys_write( - vm_offset_t paddr, unsigned int data); -void ml_phys_write_64( - addr64_t paddr, unsigned int data); -void ml_phys_write_word( - vm_offset_t paddr, unsigned int data); -void ml_phys_write_word_64( - addr64_t paddr, unsigned int data); +extern void ml_phys_write( + vm_offset_t paddr, + unsigned int data); + +extern void ml_phys_write_64( + addr64_t paddr, + unsigned int data); + +extern void ml_phys_write_word( + vm_offset_t paddr, + unsigned int data); + +extern void ml_phys_write_word_64( + addr64_t paddr, + unsigned int data); /* Write physical address double word */ -void ml_phys_write_double( - vm_offset_t paddr, unsigned long long data); -void ml_phys_write_double_64( - addr64_t paddr, unsigned long long data); +extern void ml_phys_write_double( + vm_offset_t paddr, + unsigned long long data); + +extern void ml_phys_write_double_64( + addr64_t paddr, + unsigned long long data); /* Struct for ml_processor_register */ struct ml_processor_info { @@ -156,8 +183,8 @@ struct ml_processor_info { boolean_t boot_cpu; vm_offset_t start_paddr; boolean_t supports_nap; - unsigned long l2cr_value; - time_base_enable_t time_base_enable; + unsigned long l2cr_value; + time_base_enable_t time_base_enable; uint32_t power_mode_0; uint32_t power_mode_1; }; @@ -165,68 +192,68 @@ struct ml_processor_info { typedef struct ml_processor_info ml_processor_info_t; /* Register a processor */ -kern_return_t ml_processor_register( - ml_processor_info_t *processor_info, - processor_t *processor, - ipi_handler_t *ipi_handler); +extern kern_return_t ml_processor_register( + ml_processor_info_t *ml_processor_info, + processor_t *processor, + ipi_handler_t *ipi_handler); /* Zero bytes starting at a physical address */ -void bzero_phys( - addr64_t phys_address, - uint32_t length); +extern void bzero_phys( + addr64_t phys_address, + uint32_t length); -#endif /* __APPLE_API_UNSTABLE */ +#endif /* KERNEL_PRIVATE */ -#ifdef __APPLE_API_PRIVATE +#ifdef XNU_KERNEL_PRIVATE #if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE) -/* IO memory map services */ /* Map memory map IO space */ -vm_offset_t ml_io_map( - vm_offset_t phys_addr, - vm_size_t size); +extern vm_offset_t ml_io_map( + vm_offset_t phys_addr, + vm_size_t size); /* boot memory allocation */ -vm_offset_t ml_static_malloc( - vm_size_t size); +extern vm_offset_t ml_static_malloc( + vm_size_t size); #endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ -#ifdef MACH_KERNEL_PRIVATE -void ml_init_interrupt(void); - -boolean_t fake_get_interrupts_enabled(void); +#if defined(BSD_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE) -boolean_t fake_set_interrupts_enabled( - boolean_t enable); +extern int set_be_bit( + void); -void machine_idle(void); +extern int clr_be_bit( + void); -void machine_signal_idle( - processor_t processor); +extern int be_tracing( + void); -void cacheInit(void); +#endif /* BSD_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ -void cacheDisable(void); +#ifdef MACH_KERNEL_PRIVATE +extern void ml_init_interrupt( + void); -void ml_thrm_init(void); -unsigned int ml_read_temp(void); +extern void cacheInit( + void); -void ml_thrm_set( - unsigned int low, - unsigned int high); +extern void cacheDisable( + void); -unsigned int ml_throttle( - unsigned int); +extern void ml_init_lock_timeout( + void); -void ml_init_lock_timeout(void); +void ml_ppc_do_sleep(void); #endif /* MACH_KERNEL_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ -void ml_thread_policy( - thread_t thread, - unsigned policy_id, - unsigned policy_info); +#ifdef KERNEL_PRIVATE +extern void ml_thread_policy( + thread_t thread, + unsigned policy_id, + unsigned policy_info); #define MACHINE_GROUP 0x00000001 #define MACHINE_NETWORK_GROUP 0x10000000 @@ -234,16 +261,15 @@ void ml_thread_policy( #define MACHINE_NETWORK_NETISR 0x00000002 /* Initialize the maximum number of CPUs */ -void ml_init_max_cpus( - unsigned long max_cpus); +extern void ml_init_max_cpus( + unsigned int max_cpus); /* Return the maximum number of CPUs set by ml_init_max_cpus() */ -int ml_get_max_cpus( - void); +extern unsigned int ml_get_max_cpus( + void); -/* Return the current number of CPUs */ -int ml_get_current_cpus( - void); +extern void ml_cpu_up(void); +extern void ml_cpu_down(void); /* Struct for ml_cpu_get_info */ struct ml_cpu_info { @@ -260,14 +286,33 @@ struct ml_cpu_info { typedef struct ml_cpu_info ml_cpu_info_t; /* Get processor info */ -void ml_cpu_get_info(ml_cpu_info_t *cpu_info); - -void ml_set_processor_speed(unsigned long speed); -void ml_set_processor_voltage(unsigned long voltage); -unsigned int ml_scom_write(uint32_t reg, uint64_t data); -unsigned int ml_scom_read(uint32_t reg, uint64_t *data); -uint32_t ml_hdec_ratio(void); - -#endif /* __APPLE_API_PRIVATE */ +extern void ml_cpu_get_info( + ml_cpu_info_t *ml_cpu_info); + +extern void ml_set_processor_speed( + unsigned long speed); +extern void ml_set_processor_speed_slave( + unsigned long speed); +extern void ml_set_processor_speed_dpll( + unsigned long speed); +extern void ml_set_processor_speed_dfs( + unsigned long speed); +extern void ml_set_processor_speed_powertune( + unsigned long speed); + +extern void ml_set_processor_voltage( + unsigned long voltage); + +extern unsigned int ml_scom_write( + uint32_t reg, + uint64_t data); + +extern unsigned int ml_scom_read( + uint32_t reg, + uint64_t *data); + +extern uint32_t ml_hdec_ratio(void); + +#endif /* KERNEL_PRIVATE */ #endif /* _PPC_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s index e3f8df43b..854928434 100644 --- a/osfmk/ppc/machine_routines_asm.s +++ b/osfmk/ppc/machine_routines_asm.s @@ -21,7 +21,6 @@ */ #include #include -#include #include #include #include @@ -710,13 +709,12 @@ LEXT(ml_set_interrupts_enabled) .align 5 CheckPreemption: - mfsprg r7,0 - ori r5,r5,lo16(MASK(MSR_EE)) ; Turn on the enable - lwz r8,PP_NEED_AST(r7) ; Get pointer to AST flags mfsprg r9,1 ; Get current activation + lwz r7,ACT_PER_PROC(r9) ; Get the per_proc block + ori r5,r5,lo16(MASK(MSR_EE)) ; Turn on the enable + lwz r8,PP_PENDING_AST(r7) ; Get pending AST mask li r6,AST_URGENT ; Get the type we will preempt for lwz r7,ACT_PREEMPT_CNT(r9) ; Get preemption count - lwz r8,0(r8) ; Get AST flags lis r0,hi16(DoPreemptCall) ; High part of Preempt FW call cmpwi cr1,r7,0 ; Are preemptions masked off? and. r8,r8,r6 ; Are we urgent? @@ -730,27 +728,67 @@ CheckPreemption: sc ; Preempt blr -/* Emulate a decremeter exception - * - * void machine_clock_assist(void) - * - */ +; Force a line boundry here + .align 5 + .globl EXT(timer_update) + +LEXT(timer_update) + stw r4,TIMER_HIGHCHK(r3) + eieio + stw r5,TIMER_LOW(r3) + eieio + stw r4,TIMER_HIGH(r3) + blr ; Force a line boundry here .align 5 - .globl EXT(machine_clock_assist) + .globl EXT(timer_grab) -LEXT(machine_clock_assist) +LEXT(timer_grab) +0: lwz r11,TIMER_HIGH(r3) + lwz r4,TIMER_LOW(r3) + isync + lwz r9,TIMER_HIGHCHK(r3) + cmpw r11,r9 + bne-- 0b + mr r3,r11 + blr - mfsprg r7,0 - lwz r4,PP_INTS_ENABLED(r7) - mr. r4,r4 - bnelr+ cr0 - b EXT(CreateFakeDEC) +; Force a line boundry here + .align 5 + .globl EXT(timer_event) + +LEXT(timer_event) + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block + addi r10,r10,PP_PROCESSOR + lwz r11,CURRENT_TIMER(r10) + + lwz r9,TIMER_LOW(r11) + lwz r2,TIMER_TSTAMP(r11) + add r0,r9,r3 + subf r5,r2,r0 + cmplw r5,r9 + bge++ 0f + + lwz r6,TIMER_HIGH(r11) + addi r6,r6,1 + stw r6,TIMER_HIGHCHK(r11) + eieio + stw r5,TIMER_LOW(r11) + eieio + stw r6,TIMER_HIGH(r11) + b 1f + +0: stw r5,TIMER_LOW(r11) + +1: stw r4,CURRENT_TIMER(r10) + stw r3,TIMER_TSTAMP(r4) + blr /* Set machine into idle power-saving mode. * - * void machine_idle_ppc(void) + * void machine_idle(void) * * We will use the PPC NAP or DOZE for this. * This call always returns. Must be called with spllo (i.e., interruptions @@ -760,12 +798,21 @@ LEXT(machine_clock_assist) ; Force a line boundry here .align 5 - .globl EXT(machine_idle_ppc) + .globl EXT(machine_idle) -LEXT(machine_idle_ppc) +LEXT(machine_idle) - lis r0,hi16(MASK(MSR_VEC)) ; Get the vector flag + mfsprg r12,1 ; Get the current activation + lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block + lhz r10,PP_CPU_FLAGS(r12) ; Get the flags + lwz r11,PP_INTS_ENABLED(r12) ; Get interrupt enabled state + andi. r10,r10,SignalReady ; Are Signal ready? + cmpwi cr1,r11,0 ; Are interrupt disabled? + cror cr0_eq, cr1_eq, cr0_eq ; Interrupt disabled or Signal not ready? mfmsr r3 ; Save the MSR + + beq-- nonap ; Yes, return after re-enabling interrupts + lis r0,hi16(MASK(MSR_VEC)) ; Get the vector flag ori r0,r0,lo16(MASK(MSR_FP)) ; Add the FP flag andc r3,r3,r0 ; Clear VEC and FP ori r0,r0,lo16(MASK(MSR_EE)) ; Drop EE also @@ -773,7 +820,6 @@ LEXT(machine_idle_ppc) mtmsr r5 ; Hold up interruptions for now isync ; May have messed with fp/vec - mfsprg r12,0 ; Get the per_proc_info mfsprg r11,2 ; Get CPU specific features mfspr r6,hid0 ; Get the current power-saving mode mtcrf 0xC7,r11 ; Get the facility flags @@ -784,7 +830,7 @@ LEXT(machine_idle_ppc) lis r4,hi16(dozem) ; Assume we can doze bt pfCanDozeb,yesnap ; We can sleep or doze one this machine... - ori r3,r3,lo16(MASK(MSR_EE)) ; Flip on EE +nonap: ori r3,r3,lo16(MASK(MSR_EE)) ; Flip on EE mtmsr r3 ; Turn interruptions back on blr ; Leave... @@ -798,7 +844,7 @@ yesnap: mftbu r9 ; Get the upper timebase stw r7,napStamp+4(r12) ; Set low order nap stamp rlwinm. r0,r11,0,pfAltivecb,pfAltivecb ; Do we have altivec? - beq- minovec ; No... + beq-- minovec ; No... dssall ; Stop the streams before we nap/doze sync lwz r8,napStamp(r12) ; Reload high order time stamp @@ -806,10 +852,9 @@ clearpipe: cmplw r8,r8 bne- clearpipe isync -minovec: - rlwinm. r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb ; Turn off L2 Prefetch before nap? - beq miL2PFok +minovec: rlwinm. r7,r11,0,pfNoL2PFNapb,pfNoL2PFNapb ; Turn off L2 Prefetch before nap? + beq++ miL2PFok mfspr r7,msscr0 ; Get currect MSSCR0 value rlwinm r7,r7,0,0,l2pfes-1 ; Disable L2 Prefetch @@ -817,8 +862,9 @@ minovec: sync isync -miL2PFok: rlwinm. r7,r11,0,pfSlowNapb,pfSlowNapb ; Should nap at slow speed? - beq minoslownap +miL2PFok: + rlwinm. r7,r11,0,pfSlowNapb,pfSlowNapb ; Should nap at slow speed? + beq minoslownap mfspr r7,hid1 ; Get current HID1 value oris r7,r7,hi16(hid1psm) ; Select PLL1 @@ -859,6 +905,7 @@ mipNSF1: li r2,lo16(MASK(MSR_DR)|MASK(MSR_IR)) ; Get the translation mask mfspr r6,hid0 ; Yes, this is a duplicate, keep it here isync ; Make sure it is set + ; ; Turn translation off to nap ; @@ -912,21 +959,30 @@ mipowloop: LEXT(machine_idle_ret) mtmsr r7 ; Make sure the MSR is what we want isync ; In case we turn on translation - +; +; Protect against a lost decrementer trap if the current decrementer value is negative +; by more than 10 ticks, re-arm it since it is unlikely to fire at this point... +; A hardware interrupt got us out of machine_idle and may also be contributing to this state +; + mfdec r6 ; Get decrementer + cmpwi cr0,r6,-10 ; Compare decrementer with -10 + bgelr++ ; Return if greater + li r0,1 ; Load 1 + mtdec r0 ; Set decrementer to 1 blr ; Return... /* Put machine to sleep. * This call never returns. We always exit sleep via a soft reset. * All external interruptions must be drained at this point and disabled. * - * void ml_ppc_sleep(void) + * void ml_ppc_do_sleep(void) * * We will use the PPC SLEEP for this. * * There is one bit of hackery in here: we need to enable for * interruptions when we go to sleep and there may be a pending - * decrementer rupt. So we make the decrementer 0x7FFFFFFF and enable for - * interruptions. The decrementer rupt vector recognizes this and returns + * decrimenter rupt. So we make the decrimenter 0x7FFFFFFF and enable for + * interruptions. The decrimenter rupt vector recognizes this and returns * directly back here. * */ @@ -934,9 +990,11 @@ LEXT(machine_idle_ret) ; Force a line boundry here .align 5 .globl EXT(ml_ppc_sleep) - LEXT(ml_ppc_sleep) + .globl EXT(ml_ppc_do_sleep) +LEXT(ml_ppc_do_sleep) + #if 0 mfmsr r5 ; Hack to spin instead of sleep rlwinm r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1 ; Turn off translation @@ -950,10 +1008,13 @@ deadsleep: addi r3,r3,1 ; Make analyzer happy b deadsleep ; Die the death of 1000 joys... #endif - mfsprg r12,0 ; Get the per_proc_info - mfspr r4,hid0 ; Get the current power-saving mode - eqv r10,r10,r10 ; Get all foxes + mfsprg r12,1 ; Get the current activation + lwz r12,ACT_PER_PROC(r12) ; Get the per_proc block mfsprg r11,2 ; Get CPU specific features + eqv r10,r10,r10 ; Get all foxes + mtcrf 0x04,r11 ; move pfNoMSRirb to cr5 + mfspr r4,hid0 ; Get the current power-saving mode + mtcrf 0x02,r11 ; move pf64Bit to cr6 rlwinm. r5,r11,0,pfNoL2PFNapb,pfNoL2PFNapb ; Turn off L2 Prefetch before sleep? beq mpsL2PFok @@ -965,8 +1026,7 @@ deadsleep: addi r3,r3,1 ; Make analyzer happy isync mpsL2PFok: - rlwinm. r5,r11,0,pf64Bitb,pf64Bitb ; PM bits are shifted on 64bit systems. - bne mpsPF64bit + bt++ pf64Bitb,mpsPF64bit ; PM bits are shifted on 64bit systems. rlwinm r4,r4,0,sleep+1,doze-1 ; Clear all possible power-saving modes (not DPM though) oris r4,r4,hi16(sleepm) ; Set sleep @@ -983,17 +1043,23 @@ mpsPF64bit: mpsClearDEC: mfmsr r5 ; Get the current MSR rlwinm r10,r10,0,1,31 ; Make 0x7FFFFFFF - mtdec r10 ; Load decrementer with 0x7FFFFFFF + mtdec r10 ; Load decrimenter with 0x7FFFFFFF isync ; and make sure, mfdec r9 ; really sure, it gets there - mtcrf 0x07,r11 ; Get the cache flags, etc - + li r2,1 ; Prepare for 64 bit rlwinm r5,r5,0,MSR_DR_BIT+1,MSR_IR_BIT-1 ; Turn off translation ; ; Note that we need translation off before we set the HID to sleep. Otherwise ; we will ignore any PTE misses that occur and cause an infinite loop. ; + bf++ pf64Bitb,mpsCheckMSR ; check 64-bit processor + rldimi r5,r2,63,MSR_SF_BIT ; set SF bit (bit 0) + mtmsrd r5 ; set 64-bit mode, turn off EE, DR, and IR + isync ; Toss prefetch + b mpsNoMSRx + +mpsCheckMSR: bt pfNoMSRirb,mpsNoMSR ; No MSR... mtmsr r5 ; Translation off @@ -1016,7 +1082,7 @@ mpsNoMSRx: mfspr r4,hid0 ; Yes, this is a duplicate, keep it here mfspr r4,hid0 ; Yes, this is a duplicate, keep it here - mtmsr r3 ; Enable for interrupts to drain decrementer + mtmsr r3 ; Enable for interrupts to drain decrimenter add r6,r4,r5 ; Just waste time add r6,r6,r4 ; A bit more @@ -1027,7 +1093,7 @@ mpsNoMSRx: ; ; We are here with translation off, interrupts off, all possible -; interruptions drained off, and a decrementer that will not pop. +; interruptions drained off, and a decrimenter that will not pop. ; bl EXT(cacheInit) ; Clear out the caches. This will leave them on @@ -1048,8 +1114,8 @@ mpsNoMSRx: eqv r4,r4,r4 ; Get all foxes rlwinm r4,r4,0,1,31 ; Make 0x7FFFFFFF beq slSleepNow ; skip if 32-bit... - li r3,0x4000 ; Cause decrementer to roll over soon - mtdec r3 ; Load decrementer with 0x00004000 + li r3, 0x4000 ; Cause decrimenter to roll over soon + mtdec r3 ; Load decrimenter with 0x00004000 isync ; and make sure, mfdec r3 ; really sure, it gets there @@ -1057,7 +1123,7 @@ slSleepNow: sync ; Sync it all up mtmsr r5 ; Do sleep with interruptions enabled isync ; Take a pill - mtdec r4 ; Load decrementer with 0x7FFFFFFF + mtdec r4 ; Load decrimenter with 0x7FFFFFFF isync ; and make sure, mfdec r3 ; really sure, it gets there b slSleepNow ; Go back to sleep if we wake up... @@ -1195,8 +1261,8 @@ ciswdl1: lwz r0,pfl1dSize(r12) ; Get the level 1 cache size bf 31,cisnlck ; Skip if pfLClck not set... - mfspr r4,msscr0 ; - rlwinm r6,r4,0,0,l2pfes-1 ; + mfspr r4,msscr0 ; ? + rlwinm r6,r4,0,0,l2pfes-1 ; ? mtspr msscr0,r6 ; Set it sync isync @@ -1252,7 +1318,7 @@ cisflush: dcbf r3,r6 ; Flush everything out sync isync - mtspr msscr0,r4 ; + mtspr msscr0,r4 ; ? sync isync @@ -1458,19 +1524,19 @@ ciinvdl3b: mfspr r8,l3cr ; Get the L3CR bne+ ciinvdl3b ; Assume so... sync - lwz r10, pfBootConfig(r12) ; - rlwinm. r10, r10, 24, 28, 31 ; - beq ciinvdl3nopdet ; - - mfspr r8,l3pdet ; - srw r2, r8, r10 ; - rlwimi r2, r8, 0, 24, 31 ; - subfic r10, r10, 32 ; - li r8, -1 ; - ori r2, r2, 0x0080 ; - slw r8, r8, r10 ; - or r8, r2, r8 ; - mtspr l3pdet, r8 ; + lwz r10, pfBootConfig(r12) ; ? + rlwinm. r10, r10, 24, 28, 31 ; ? + beq ciinvdl3nopdet ; ? + + mfspr r8,l3pdet ; ? + srw r2, r8, r10 ; ? + rlwimi r2, r8, 0, 24, 31 ; ? + subfic r10, r10, 32 ; ? + li r8, -1 ; ? + ori r2, r2, 0x0080 ; ? + slw r8, r8, r10 ; ? + or r8, r2, r8 ; ? + mtspr l3pdet, r8 ; ? isync ciinvdl3nopdet: @@ -1478,14 +1544,14 @@ ciinvdl3nopdet: rlwinm r8,r8,0,l3clken+1,l3clken-1 ; Clear the clock enable bit mtspr l3cr,r8 ; Disable the clock - li r2,128 ; -ciinvdl3c: addi r2,r2,-1 ; - cmplwi r2,0 ; + li r2,128 ; ? +ciinvdl3c: addi r2,r2,-1 ; ? + cmplwi r2,0 ; ? bne+ ciinvdl3c - mfspr r10,msssr0 ; - rlwinm r10,r10,0,vgL3TAG+1,vgL3TAG-1 ; - mtspr msssr0,r10 ; + mfspr r10,msssr0 ; ? + rlwinm r10,r10,0,vgL3TAG+1,vgL3TAG-1 ; ? + mtspr msssr0,r10 ; ? sync mtspr l3cr,r3 ; Enable it as desired @@ -1834,37 +1900,74 @@ loop: .globl EXT(cpu_number) LEXT(cpu_number) - mfsprg r4,0 ; Get per-proc block + mfsprg r4,1 ; Get the current activation + lwz r4,ACT_PER_PROC(r4) ; Get the per_proc block lhz r3,PP_CPU_NUMBER(r4) ; Get CPU number blr ; Return... +/* + * processor_t current_processor(void) + * + * Returns the current processor. + */ + + .align 5 + .globl EXT(current_processor) + +LEXT(current_processor) + mfsprg r3,1 ; Get the current activation + lwz r3,ACT_PER_PROC(r3) ; Get the per_proc block + addi r3,r3,PP_PROCESSOR + blr + +#if PROCESSOR_SIZE > PP_PROCESSOR_SIZE +#error processor overflows per_proc +#endif /* - * void set_machine_current_act(thread_act_t) + * ast_t *ast_pending(void) * - * Set the current activation + * Returns the address of the pending AST mask for the current processor. + */ + + .align 5 + .globl EXT(ast_pending) + +LEXT(ast_pending) + mfsprg r3,1 ; Get the current activation + lwz r3,ACT_PER_PROC(r3) ; Get the per_proc block + addi r3,r3,PP_PENDING_AST + blr ; Return... + +/* + * void machine_set_current_thread(thread_t) + * + * Set the current thread */ .align 5 - .globl EXT(set_machine_current_act) + .globl EXT(machine_set_current_thread) -LEXT(set_machine_current_act) +LEXT(machine_set_current_thread) + mfsprg r4,1 ; Get spr1 + lwz r5,ACT_PER_PROC(r4) ; Get the PerProc from the previous active thread + stw r5,ACT_PER_PROC(r3) ; Set the PerProc in the active thread mtsprg 1,r3 ; Set spr1 with the active thread blr ; Return... /* - * thread_t current_act(void) * thread_t current_thread(void) + * thread_t current_act(void) * * * Return the current thread for outside components. */ .align 5 - .globl EXT(current_act) .globl EXT(current_thread) + .globl EXT(current_act) -LEXT(current_act) LEXT(current_thread) +LEXT(current_act) mfsprg r3,1 blr @@ -1876,7 +1979,7 @@ LEXT(clock_get_uptime) mftb r0 mftbu r11 cmpw r11,r9 - bne- 1b + bne-- 1b stw r0,4(r3) stw r9,0(r3) blr @@ -1889,7 +1992,7 @@ LEXT(mach_absolute_time) mftb r4 mftbu r0 cmpw r0,r3 - bne- 1b + bne-- 1b blr /* @@ -1905,7 +2008,7 @@ LEXT(ml_sense_nmi) blr ; Leave... /* -** ml_set_processor_speed_powertunw() +** ml_set_processor_speed_powertune() ** */ ; Force a line boundry here @@ -1921,7 +2024,8 @@ LEXT(ml_set_processor_speed_powertune) stw r31, FM_ARG0+0x0C(r1) ; Save a register stw r0, (FM_ALIGN(4*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Save the return - mfsprg r31, 0 ; Get the per_proc_info + mfsprg r31,1 ; Get the current activation + lwz r31,ACT_PER_PROC(r31) ; Get the per_proc block lwz r30, pfPowerModes(r31) ; Get the supported power modes @@ -2003,7 +2107,8 @@ spsPowerTuneDone: .globl EXT(ml_set_processor_speed_dpll) LEXT(ml_set_processor_speed_dpll) - mfsprg r5, 0 ; Get the per_proc_info + mfsprg r5,1 ; Get the current activation + lwz r5,ACT_PER_PROC(r5) ; Get the per_proc block cmplwi r3, 0 ; Turn off BTIC before low speed beq spsDPLL1 @@ -2040,7 +2145,8 @@ spsDPLL2: .globl EXT(ml_set_processor_speed_dfs) LEXT(ml_set_processor_speed_dfs) - mfsprg r5, 0 ; Get the per_proc_info + mfsprg r5,1 ; Get the current activation + lwz r5,ACT_PER_PROC(r5) ; Get the per_proc block cmplwi r3, 0 ; full speed? mfspr r3, hid1 ; Get the current HID1 @@ -2066,7 +2172,8 @@ spsDFS: .globl EXT(ml_set_processor_voltage) LEXT(ml_set_processor_voltage) - mfsprg r5, 0 ; Get the per_proc_info + mfsprg r5,1 ; Get the current activation + lwz r5,ACT_PER_PROC(r5) ; Get the per_proc block lwz r6, pfPowerModes(r5) ; Get the supported power modes diff --git a/osfmk/ppc/mappings.c b/osfmk/ppc/mappings.c index c68a4ef77..ebeef928b 100644 --- a/osfmk/ppc/mappings.c +++ b/osfmk/ppc/mappings.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,31 +30,31 @@ * */ -#include #include #include #include #include -#include -#include +#include #include #include + +#include +#include +#include +#include + #include #include #include #include -#include +#include -#include #include #include #include - -#include #include #include - #include #include #include @@ -74,21 +74,25 @@ int ppc_max_adrsp; /* Maximum address spaces */ addr64_t *mapdebug; /* (BRINGUP) */ extern unsigned int DebugWork; /* (BRINGUP) */ -extern unsigned int hash_table_size; - void mapping_verify(void); void mapping_phys_unused(ppnum_t pa); /* - * ppc_prot translates from the mach representation of protections to the PPC version. - * We also allow for a direct setting of the protection bits. This extends the mach - * concepts to allow the greater control we need for Virtual Machines (VMM). - * Calculation of it like this saves a memory reference - and maybe a couple of microseconds. - * It eliminates the used of this table. - * unsigned char ppc_prot[16] = { 0, 3, 2, 2, 3, 3, 2, 2, 0, 1, 2, 3, 0, 1, 2, 3 }; + * ppc_prot translates Mach's representation of protections to that of the PPC hardware. + * For Virtual Machines (VMM), we also provide translation entries where the output is + * the same as the input, allowing direct specification of PPC protections. Mach's + * representations are always in the range 0..7, so they always fall into the first + * 8 table entries; direct translations are placed in the range 8..16, so they fall into + * the second half of the table. + * + * ***NOTE*** I've commented out the Mach->PPC translations that would set page-level + * no-execute, pending updates to the VM layer that will properly enable its + * use. Bob Abeles 08.02.04 */ - -#define ppc_prot(p) ((0xE4E4AFAC >> (p << 1)) & 3) + +//unsigned char ppc_prot[16] = { 4, 7, 6, 6, 3, 3, 2, 2, /* Mach -> PPC translations */ +unsigned char ppc_prot[16] = { 0, 3, 2, 2, 3, 3, 2, 2, /* Mach -> PPC translations */ + 0, 1, 2, 3, 4, 5, 6, 7 }; /* VMM direct translations */ /* * About PPC VSID generation: @@ -155,15 +159,15 @@ void mapping_init(void) { ppc_max_adrsp = maxAdrSp; /* Set maximum address spaces */ maxeff = 32; /* Assume 32-bit */ - if(per_proc_info[0].pf.Available & pf64Bit) maxeff = 64; /* Is this a 64-bit machine? */ + if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) maxeff = 64; /* Is this a 64-bit machine? */ - rwidth = per_proc_info[0].pf.pfMaxVAddr - maxAdrSpb; /* Reduce address width by width of address space ID */ + rwidth = PerProcTable[0].ppe_vaddr->pf.pfMaxVAddr - maxAdrSpb; /* Reduce address width by width of address space ID */ if(rwidth > maxeff) rwidth = maxeff; /* If we still have more virtual than effective, clamp at effective */ vm_max_address = 0xFFFFFFFFFFFFFFFFULL >> (64 - rwidth); /* Get maximum effective address supported */ - vm_max_physical = 0xFFFFFFFFFFFFFFFFULL >> (64 - per_proc_info[0].pf.pfMaxPAddr); /* Get maximum physical address supported */ + vm_max_physical = 0xFFFFFFFFFFFFFFFFULL >> (64 - PerProcTable[0].ppe_vaddr->pf.pfMaxPAddr); /* Get maximum physical address supported */ - if(per_proc_info[0].pf.Available & pf64Bit) { /* Are we 64 bit? */ + if(PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit) { /* Are we 64 bit? */ tmp = 12; /* Size of hash space */ } else { @@ -197,27 +201,55 @@ void mapping_init(void) { addr64_t mapping_remove(pmap_t pmap, addr64_t va) { /* Remove a single mapping for this VADDR Returns TRUE if a mapping was found to remove */ - mapping *mp; + mapping_t *mp; addr64_t nextva; + ppnum_t pgaddr; - disable_preemption(); /* Don't change threads */ - - while(1) { /* Keep trying until we truely fail */ + va &= ~PAGE_MASK; /* Scrub noise bits */ + + do { /* Keep trying until we truely fail */ mp = hw_rem_map(pmap, va, &nextva); /* Remove a mapping from this pmap */ - if(((unsigned int)mp & mapRetCode) != mapRtRemove) break; /* If it is gone, we are done */ - } - - enable_preemption(); /* Thread change ok */ - - if(!mp) return (nextva | 1); /* Nothing found to unmap */ - - if((unsigned int)mp & mapRetCode) { /* Was there a failure? */ + } while (mapRtRemove == ((unsigned int)mp & mapRetCode)); - panic("mapping_remove: hw_rem_map failed - pmap = %08X, va = %016llX, code = %08X\n", - pmap, va, mp); + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + break; /* Mapping removed */ + case mapRtNotFnd: + return (nextva | 1); /* Nothing found to unmap */ + default: + panic("mapping_remove: hw_rem_map failed - pmap = %08X, va = %016llX, code = %08X\n", + pmap, va, mp); + break; } + + pgaddr = mp->mpPAddr; /* Get page number from mapping */ mapping_free(mp); /* Add mapping to the free list */ + + if ((pmap->pmapFlags & pmapVMhost) && pmap->pmapVmmExt) { + /* If this is an assisted host, scrub any guest mappings */ + unsigned int idx; + phys_entry_t *physent = mapping_phys_lookup(pgaddr, &idx); + /* Get physent for our physical page */ + if (!physent) { /* No physent, could be in I/O area, so exit */ + return (nextva); + } + + do { /* Iterate 'till all guest mappings are gone */ + mp = hw_scrub_guest(physent, pmap); /* Attempt to scrub a guest mapping */ + switch ((unsigned int)mp & mapRetCode) { + case mapRtGuest: /* Found a guest mapping */ + case mapRtNotFnd: /* Mapping was there, but disappeared, must retry */ + case mapRtEmpty: /* No guest mappings left to scrub */ + break; + default: + panic("mapping_remove: hw_scrub_guest failed - physent = %08X, code = %08X\n", + physent, mp); /* Cry havoc, cry wrack, + at least we die with harness on our backs */ + break; + } + } while (mapRtEmpty != ((unsigned int)mp & mapRetCode)); + } return nextva; /* Tell them we did it */ } @@ -259,18 +291,34 @@ addr64_t mapping_remove(pmap_t pmap, addr64_t va) { /* Remove a single mapping addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int size, vm_prot_t prot) { /* Make an address mapping */ - register mapping *mp; - addr64_t colladdr; - unsigned int pindex, mflags, pattr, wimg; - phys_entry *physent; - int i, nlists; - - disable_preemption(); /* Don't change threads */ + register mapping_t *mp; + addr64_t colladdr, psmask; + unsigned int pindex, mflags, pattr, wimg, rc; + phys_entry_t *physent; + int nlists, pcf; pindex = 0; mflags = 0x01000000; /* Start building mpFlags field (busy count = 1) */ + + pcf = (flags & mmFlgPcfg) >> 24; /* Get the physical page config index */ + if(!(pPcfg[pcf].pcfFlags)) { /* Validate requested physical page configuration */ + panic("mapping_make: invalid physical page configuration request - pmap = %08X, va = %016llX, cfg = %d\n", + pmap, va, pcf); + } + + psmask = (1ULL << pPcfg[pcf].pcfPSize) - 1; /* Mask to isolate any offset into a page */ + if(va & psmask) { /* Make sure we are page aligned on virtual */ + panic("mapping_make: attempt to map unaligned vaddr - pmap = %08X, va = %016llX, cfg = %d\n", + pmap, va, pcf); + } + if(((addr64_t)pa << 12) & psmask) { /* Make sure we are page aligned on physical */ + panic("mapping_make: attempt to map unaligned paddr - pmap = %08X, pa = %016llX, cfg = %d\n", + pmap, pa, pcf); + } + mflags |= (pcf << (31-mpPcfgb)); /* Insert physical page configuration index */ + if(!(flags & mmFlgBlock)) { /* Is this a block map? */ size = 1; /* Set size to 1 page if not block */ @@ -278,11 +326,10 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ mflags |= mpBlock; /* Force this to a block if no physent */ - size = 1; /* Force size to 1 page */ pattr = 0; /* Assume normal, non-I/O memory */ if((pa & 0xFFF80000) == 0x00080000) pattr = mmFlgCInhib | mmFlgGuarded; /* If this page is in I/O range, set I/O attributes */ } - else pattr = ((physent->ppLink & (ppI | ppG)) >> 4); /* Get the default attributes from physent */ + else pattr = ((physent->ppLink & (ppI | ppG)) >> 60); /* Get the default attributes from physent */ if(flags & mmFlgUseAttr) pattr = flags & (mmFlgCInhib | mmFlgGuarded); /* Use requested attributes */ } @@ -310,40 +357,42 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, /* the mapping is zero except that the mpLists field is set */ mp->mpFlags |= mflags; /* Add in the rest of the flags to mpLists */ mp->mpSpace = pmap->space; /* Set the address space/pmap lookup ID */ - mp->mpBSize = size; /* Set the size */ + mp->u.mpBSize = size; /* Set the size */ mp->mpPte = 0; /* Set the PTE invalid */ mp->mpPAddr = pa; /* Set the physical page number */ - mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) | ppc_prot(prot); /* Add the protection and attributes to the field */ + mp->mpVAddr = (va & ~mpHWFlags) | (wimg << 3) /* Add the protection and attributes to the field */ + | ((PerProcTable[0].ppe_vaddr->pf.Available & pf64Bit)? + getProtPPC(prot) : (getProtPPC(prot) & 0x3)); /* Mask off no-execute control for 32-bit machines */ while(1) { /* Keep trying... */ colladdr = hw_add_map(pmap, mp); /* Go add the mapping to the pmap */ - if(!colladdr) { /* All is ok... */ - enable_preemption(); /* Ok to switch around here */ - return 0; /* Return... */ - } - - if((colladdr & mapRetCode) == mapRtRemove) { /* Is our target being removed? */ - (void)mapping_remove(pmap, colladdr); /* Yes, go help out */ - continue; /* Try to add it now */ - } + rc = colladdr & mapRetCode; /* Separate return code */ + colladdr &= ~mapRetCode; /* Clean up collision effective address */ - if((colladdr & mapRetCode) == mapRtMapDup) { /* Is our target already mapped (collision mapping must be identical)? */ - mapping_free(mp); /* Return mapping to the free list */ - enable_preemption(); /* Ok to switch around here */ - return 0; /* Normal return */ + switch (rc) { + case mapRtOK: + return 0; /* Mapping added successfully */ + + case mapRtRemove: /* Remove in progress */ + (void)mapping_remove(pmap, colladdr); /* Lend a helping hand to another CPU doing block removal */ + continue; /* Retry mapping add */ + + case mapRtMapDup: /* Identical mapping already present */ + mapping_free(mp); /* Free duplicate mapping */ + return 0; /* Return success */ + + case mapRtSmash: /* Mapping already present but does not match new mapping */ + mapping_free(mp); /* Free duplicate mapping */ + return (colladdr | 1); /* Return colliding address, with some dirt added to avoid + confusion if effective address is 0 */ + default: + panic("mapping_make: hw_add_map failed - collision addr = %016llX, code = %02X, pmap = %08X, va = %016llX, mapping = %08X\n", + colladdr, rc, pmap, va, mp); /* Die dead */ } - if(colladdr != mapRtBadLk) { /* Did it collide? */ - mapping_free(mp); /* Yeah, toss the pending mapping */ - enable_preemption(); /* Ok to switch around here */ - return colladdr; /* Pass back the overlapping address */ - } - - panic("mapping_make: hw_add_map failed - code = %08X, pmap = %08X, va = %016llX, mapping = %08X\n", - colladdr, pmap, va, mp); /* Die dead */ } - return 1; /* Leave... */ + return 1; /* Unreachable, but pleases compiler */ } @@ -364,16 +413,16 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, * */ -mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* Make an address mapping */ +mapping_t *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* Make an address mapping */ - register mapping *mp; + register mapping_t *mp; addr64_t curva; pmap_t curpmap; int nestdepth; curpmap = pmap; /* Remember entry */ nestdepth = 0; /* Set nest depth */ - curva = (addr64_t)va; /* Set current va */ + curva = (addr64_t)va; /* Set current va */ while(1) { @@ -382,9 +431,10 @@ mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* panic("mapping_find: pmap lock failure - rc = %08X, pmap = %08X\n", mp, curpmap); /* Die... */ } - if(!mp || !(mp->mpFlags & mpNest) || !full) break; /* Are we a nest or are we only going one deep? */ + if(!mp || ((mp->mpFlags & mpType) < mpMinSpecial) || !full) break; /* Are we done looking? */ - if(mp->mpFlags & mpSpecial) { /* Don't chain through a special mapping */ + if((mp->mpFlags & mpType) != mpNest) { /* Don't chain through anything other than a nested pmap */ + mapping_drop_busy(mp); /* We have everything we need from the mapping */ mp = 0; /* Set not found */ break; } @@ -404,7 +454,7 @@ mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* } /* - * kern_return_t mapping_protect(pmap_t pmap, addt_t va, vm_prot_t prot, addr64_t *nextva) - change the protection of a virtual page + * void mapping_protect(pmap_t pmap, addt_t va, vm_prot_t prot, addr64_t *nextva) - change the protection of a virtual page * * This routine takes a pmap and virtual address and changes * the protection. If there are PTEs associated with the mappings, they will be invalidated before @@ -416,22 +466,19 @@ mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full) { /* * */ -int mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) { /* Change protection of a virtual page */ +void +mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) { /* Change protection of a virtual page */ int ret; - ret = hw_protect(pmap, va, ppc_prot(prot), nextva); /* Try to change the protect here */ + ret = hw_protect(pmap, va, getProtPPC(prot), nextva); /* Try to change the protect here */ switch (ret) { /* Decode return code */ case mapRtOK: /* Changed */ case mapRtNotFnd: /* Didn't find it */ - return mapRtOK; /* Ok, return... */ - break; - case mapRtBlock: /* Block map, just ignore request */ case mapRtNest: /* Nested pmap, just ignore request */ - return ret; /* Pass back return code */ break; default: @@ -457,14 +504,15 @@ int mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva) void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) { /* Change protection of all mappings to page */ unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_protect_phys: invalid physical page %08X\n", pa); } - hw_walk_phys(physent, hwpSPrtPhy, hwpSPrtMap, hwpNoop, ppc_prot(prot)); /* Set the new protection for page and mappings */ + hw_walk_phys(physent, hwpNoop, hwpSPrtMap, hwpNoop, + getProtPPC(prot), hwpPurgePTE); /* Set the new protection for page and mappings */ return; /* Leave... */ } @@ -480,14 +528,15 @@ void mapping_protect_phys(ppnum_t pa, vm_prot_t prot) { /* Change protection of void mapping_clr_mod(ppnum_t pa) { /* Clears the change bit of a physical page */ unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_clr_mod: invalid physical page %08X\n", pa); } - hw_walk_phys(physent, hwpNoop, hwpCCngMap, hwpCCngPhy, 0); /* Clear change for page and mappings */ + hw_walk_phys(physent, hwpNoop, hwpCCngMap, hwpCCngPhy, + 0, hwpPurgePTE); /* Clear change for page and mappings */ return; /* Leave... */ } @@ -502,14 +551,15 @@ void mapping_clr_mod(ppnum_t pa) { /* Clears the change bit of a physical void mapping_set_mod(ppnum_t pa) { /* Sets the change bit of a physical page */ unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_set_mod: invalid physical page %08X\n", pa); } - hw_walk_phys(physent, hwpNoop, hwpSCngMap, hwpSCngPhy, 0); /* Set change for page and mappings */ + hw_walk_phys(physent, hwpNoop, hwpSCngMap, hwpSCngPhy, + 0, hwpNoopPTE); /* Set change for page and mappings */ return; /* Leave... */ } @@ -524,14 +574,15 @@ void mapping_set_mod(ppnum_t pa) { /* Sets the change bit of a physical p void mapping_clr_ref(ppnum_t pa) { /* Clears the reference bit of a physical page */ unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_clr_ref: invalid physical page %08X\n", pa); } - hw_walk_phys(physent, hwpNoop, hwpCRefMap, hwpCRefPhy, 0); /* Clear reference for page and mappings */ + hw_walk_phys(physent, hwpNoop, hwpCRefMap, hwpCRefPhy, + 0, hwpPurgePTE); /* Clear reference for page and mappings */ return; /* Leave... */ } @@ -546,20 +597,21 @@ void mapping_clr_ref(ppnum_t pa) { /* Clears the reference bit of a physi void mapping_set_ref(ppnum_t pa) { /* Sets the reference bit of a physical page */ unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_set_ref: invalid physical page %08X\n", pa); } - hw_walk_phys(physent, hwpNoop, hwpSRefMap, hwpSRefPhy, 0); /* Set reference for page and mappings */ + hw_walk_phys(physent, hwpNoop, hwpSRefMap, hwpSRefPhy, + 0, hwpNoopPTE); /* Set reference for page and mappings */ return; /* Leave... */ } /* - * void mapping_tst_mod(ppnum_t pa) - test the change bit of a physical page + * boolean_t mapping_tst_mod(ppnum_t pa) - test the change bit of a physical page * * This routine takes a physical entry and runs through all mappings attached to it and tests * the changed bit. @@ -568,20 +620,21 @@ void mapping_set_ref(ppnum_t pa) { /* Sets the reference bit of a physica boolean_t mapping_tst_mod(ppnum_t pa) { /* Tests the change bit of a physical page */ unsigned int pindex, rc; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_tst_mod: invalid physical page %08X\n", pa); } - rc = hw_walk_phys(physent, hwpTCngPhy, hwpTCngMap, hwpNoop, 0); /* Set change for page and mappings */ + rc = hw_walk_phys(physent, hwpTCngPhy, hwpTCngMap, hwpNoop, + 0, hwpMergePTE); /* Set change for page and mappings */ return ((rc & (unsigned long)ppC) != 0); /* Leave with change bit */ } /* - * void mapping_tst_ref(ppnum_t pa) - tests the reference bit of a physical page + * boolean_t mapping_tst_ref(ppnum_t pa) - tests the reference bit of a physical page * * This routine takes a physical entry and runs through all mappings attached to it and tests * the reference bit. @@ -590,18 +643,72 @@ boolean_t mapping_tst_mod(ppnum_t pa) { /* Tests the change bit of a physi boolean_t mapping_tst_ref(ppnum_t pa) { /* Tests the reference bit of a physical page */ unsigned int pindex, rc; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) { /* Did we find the physical page? */ panic("mapping_tst_ref: invalid physical page %08X\n", pa); } - rc = hw_walk_phys(physent, hwpTRefPhy, hwpTRefMap, hwpNoop, 0); /* Test reference for page and mappings */ + rc = hw_walk_phys(physent, hwpTRefPhy, hwpTRefMap, hwpNoop, + 0, hwpMergePTE); /* Test reference for page and mappings */ return ((rc & (unsigned long)ppR) != 0); /* Leave with reference bit */ } +/* + * unsigned int mapping_tst_refmod(ppnum_t pa) - tests the reference and change bits of a physical page + * + * This routine takes a physical entry and runs through all mappings attached to it and tests + * their reference and changed bits. + */ + +unsigned int mapping_tst_refmod(ppnum_t pa) { /* Tests the reference and change bits of a physical page */ + + unsigned int pindex, rc; + phys_entry_t *physent; + + physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ + if (!physent) { /* Did we find the physical page? */ + panic("mapping_tst_refmod: invalid physical page %08X\n", pa); + } + + rc = hw_walk_phys(physent, hwpTRefCngPhy, hwpTRefCngMap, hwpNoop, + 0, hwpMergePTE); /* Test reference and change bits in page and mappings */ + return (((rc & ppC)? VM_MEM_MODIFIED : 0) | ((rc & ppR)? VM_MEM_REFERENCED : 0)); + /* Convert bits to generic format and return */ + +} + + +/* + * void mapping_clr_refmod(ppnum_t pa, unsigned int mask) - clears the reference and change bits specified + * by mask of a physical page + * + * This routine takes a physical entry and runs through all mappings attached to it and turns + * off all the reference and change bits. + */ + +void mapping_clr_refmod(ppnum_t pa, unsigned int mask) { /* Clears the reference and change bits of a physical page */ + + unsigned int pindex; + phys_entry_t *physent; + unsigned int ppcMask; + + physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ + if(!physent) { /* Did we find the physical page? */ + panic("mapping_clr_refmod: invalid physical page %08X\n", pa); + } + + ppcMask = (((mask & VM_MEM_MODIFIED)? ppC : 0) | ((mask & VM_MEM_REFERENCED)? ppR : 0)); + /* Convert mask bits to PPC-specific format */ + hw_walk_phys(physent, hwpNoop, hwpCRefCngMap, hwpCRefCngPhy, + ppcMask, hwpPurgePTE); /* Clear reference and change bits for page and mappings */ + return; /* Leave... */ +} + + + /* * phys_ent *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) - tests the reference bit of a physical page * @@ -610,9 +717,8 @@ boolean_t mapping_tst_ref(ppnum_t pa) { /* Tests the reference bit of a ph * the reference bit. */ -phys_entry *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) { /* Finds the physical entry for the page */ +phys_entry_t *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) { /* Finds the physical entry for the page */ - phys_entry *physent; int i; for(i = 0; i < pmap_mem_regions_count; i++) { /* Walk through the list */ @@ -624,7 +730,7 @@ phys_entry *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) { /* Finds the return &pmap_mem_regions[i].mrPhysTab[pp - pmap_mem_regions[i].mrStart]; /* Return the physent pointer */ } - return (phys_entry *)0; /* Shucks, can't find it... */ + return (phys_entry_t *)0; /* Shucks, can't find it... */ } @@ -649,10 +755,9 @@ static thread_call_data_t mapping_adjust_call_data; void mapping_adjust(void) { /* Adjust free mappings */ kern_return_t retr = KERN_SUCCESS; - mappingblok *mb, *mbn; + mappingblok_t *mb, *mbn; spl_t s; - int allocsize, i; - extern int vm_page_free_count; + int allocsize; if(mapCtl.mapcmin <= MAPPERBLOK) { mapCtl.mapcmin = (sane_size / PAGE_SIZE) / 16; @@ -712,7 +817,7 @@ void mapping_adjust(void) { /* Adjust free mappings */ break; /* Fail to alocate, bail out... */ for(; allocsize > 0; allocsize -= MAPPERBLOK) { /* Release one block at a time */ mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */ - mbn = (mappingblok *)((unsigned int)mbn + PAGE_SIZE); /* Point to the next slot */ + mbn = (mappingblok_t *)((unsigned int)mbn + PAGE_SIZE); /* Point to the next slot */ } if ((mapCtl.mapcinuse + mapCtl.mapcfree + (mapCtl.mapcreln * (MAPPERBLOK + 1))) > mapCtl.mapcmaxalloc) @@ -758,12 +863,12 @@ void mapping_adjust(void) { /* Adjust free mappings */ void mapping_free(struct mapping *mp) { /* Release a mapping */ - mappingblok *mb, *mbn; + mappingblok_t *mb, *mbn; spl_t s; unsigned int full, mindx, lists; mindx = ((unsigned int)mp & (PAGE_SIZE - 1)) >> 6; /* Get index to mapping */ - mb = (mappingblok *)((unsigned int)mp & -PAGE_SIZE); /* Point to the mapping block */ + mb = (mappingblok_t *)((unsigned int)mp & -PAGE_SIZE); /* Point to the mapping block */ lists = (mp->mpFlags & mpLists); /* get #lists */ if ((lists == 0) || (lists > kSkipListMaxLists)) /* panic if out of range */ panic("mapping_free: mpLists invalid\n"); @@ -771,7 +876,7 @@ void mapping_free(struct mapping *mp) { /* Release a mapping */ #if 0 mp->mpFlags = 0x99999999; /* (BRINGUP) */ mp->mpSpace = 0x9999; /* (BRINGUP) */ - mp->mpBSize = 0x9999; /* (BRINGUP) */ + mp->u.mpBSize = 0x9999; /* (BRINGUP) */ mp->mpPte = 0x99999998; /* (BRINGUP) */ mp->mpPAddr = 0x99999999; /* (BRINGUP) */ mp->mpVAddr = 0x9999999999999999ULL; /* (BRINGUP) */ @@ -878,22 +983,19 @@ void mapping_free(struct mapping *mp) { /* Release a mapping */ * we allocate a new block. * */ +decl_simple_lock_data(extern,free_pmap_lock) -mapping *mapping_alloc(int lists) { /* Obtain a mapping */ +mapping_t * +mapping_alloc(int lists) { /* Obtain a mapping */ - register mapping *mp; - mappingblok *mb, *mbn; + register mapping_t *mp; + mappingblok_t *mb, *mbn; spl_t s; int mindx; - kern_return_t retr; int big = (lists > mpBasicLists); /* set flag if big block req'd */ pmap_t refpmap, ckpmap; unsigned int space, i; - int ref_count; addr64_t va, nextva; - extern pmap_t free_pmap_list; - extern int free_pmap_count; - decl_simple_lock_data(extern,free_pmap_lock) boolean_t found_mapping; boolean_t do_rescan; @@ -909,7 +1011,7 @@ mapping *mapping_alloc(int lists) { /* Obtain a mapping */ * list. If so, rescue one. Otherwise, try to steal a couple blocks worth. */ - if(mbn = mapCtl.mapcrel) { /* Try to rescue a block from impending doom */ + if((mbn = mapCtl.mapcrel) != 0) { /* Try to rescue a block from impending doom */ mapCtl.mapcrel = mbn->nextblok; /* Pop the queue */ mapCtl.mapcreln--; /* Back off the count */ mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */ @@ -943,24 +1045,32 @@ mapping *mapping_alloc(int lists) { /* Obtain a mapping */ ckpmap = (pmap_t)ckpmap->pmap_link.next; - if ((ckpmap->stats.resident_count != 0) && (ckpmap != kernel_pmap)) { + /* We don't steal mappings from the kernel pmap, a VMM host pmap, or a VMM guest pmap with guest + shadow assist active. + */ + if ((ckpmap->stats.resident_count != 0) && (ckpmap != kernel_pmap) + && !(ckpmap->pmapFlags & (pmapVMgsaa|pmapVMhost))) { do_rescan = TRUE; for (i=0;i<8;i++) { mp = hw_purge_map(ckpmap, va, &nextva); - if((unsigned int)mp & mapRetCode) { - panic("mapping_alloc: hw_purge_map failed - pmap = %08X, va = %16llX, code = %08X\n", ckpmap, va, mp); + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + mapping_free(mp); + found_mapping = TRUE; + break; + case mapRtNotFnd: + break; + default: + panic("mapping_alloc: hw_purge_map failed - pmap = %08X, va = %16llX, code = %08X\n", ckpmap, va, mp); + break; } - if(!mp) { + if (mapRtNotFnd == ((unsigned int)mp & mapRetCode)) if (do_rescan) do_rescan = FALSE; else break; - } else { - mapping_free(mp); - found_mapping = TRUE; - } va = nextva; } @@ -1048,7 +1158,7 @@ rescued: */ if(mapCtl.mapcfree < mapCtl.mapcmin) { /* See if we need to replenish */ - if(mbn = mapCtl.mapcrel) { /* Try to rescue a block from impending doom */ + if((mbn = mapCtl.mapcrel) != 0) { /* Try to rescue a block from impending doom */ mapCtl.mapcrel = mbn->nextblok; /* Pop the queue */ mapCtl.mapcreln--; /* Back off the count */ mapping_free_init((vm_offset_t)mbn, 0, 1); /* Initialize a non-permanent block */ @@ -1065,7 +1175,7 @@ rescued: hw_lock_unlock((hw_lock_t)&mapCtl.mapclock); /* Unlock our stuff */ splx(s); /* Restore 'rupts */ - mp = &((mapping *)mb)[mindx]; /* Point to the allocated mapping */ + mp = &((mapping_t *)mb)[mindx]; /* Point to the allocated mapping */ mp->mpFlags = lists; /* set the list count */ @@ -1074,7 +1184,7 @@ rescued: void -consider_mapping_adjust() +consider_mapping_adjust(void) { spl_t s; @@ -1136,13 +1246,12 @@ void mapping_free_init(vm_offset_t mbl, int perm, boolean_t locked) { or goes straight to the release queue . locked indicates if the lock is held already */ - mappingblok *mb; + mappingblok_t *mb; spl_t s; - int i; addr64_t raddr; ppnum_t pp; - mb = (mappingblok *)mbl; /* Start of area */ + mb = (mappingblok_t *)mbl; /* Start of area */ if(perm >= 0) { /* See if we need to initialize the block */ if(perm) { @@ -1217,7 +1326,7 @@ void mapping_prealloc(unsigned int size) { /* Preallocates mapppings for lar int nmapb, i; kern_return_t retr; - mappingblok *mbn; + mappingblok_t *mbn; spl_t s; s = splhigh(); /* Don't bother from now on */ @@ -1295,11 +1404,11 @@ void mapping_free_prime(void) { /* Primes the mapping block release list int nmapb, i; kern_return_t retr; - mappingblok *mbn; + mappingblok_t *mbn; vm_offset_t mapping_min; retr = kmem_suballoc(kernel_map, &mapping_min, sane_size / 16, - FALSE, TRUE, &mapping_map); + FALSE, VM_FLAGS_ANYWHERE, &mapping_map); if (retr != KERN_SUCCESS) panic("mapping_free_prime: kmem_suballoc failed"); @@ -1325,7 +1434,7 @@ void mapping_free_prime(void) { /* Primes the mapping block release list } - +void mapping_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, int *collectable, int *exhaustable) { @@ -1352,9 +1461,9 @@ mapping_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_ addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa) { /* Finds first virtual mapping of a physical page in a space */ spl_t s; - mapping *mp; + mapping_t *mp; unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; addr64_t va; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ @@ -1364,7 +1473,7 @@ addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa) { /* Finds first virtual mappin s = splhigh(); /* Make sure interruptions are disabled */ - mp = (mapping *) hw_find_space(physent, pmap->space); /* Go find the first mapping to the page from the requested pmap */ + mp = hw_find_space(physent, pmap->space); /* Go find the first mapping to the page from the requested pmap */ if(mp) { /* Did we find one? */ va = mp->mpVAddr & -4096; /* If so, get the cleaned up vaddr */ @@ -1422,8 +1531,8 @@ vm_offset_t kvtophys(vm_offset_t va) { void ignore_zero_fault(boolean_t type) { /* Sets up to ignore or honor any fault on page 0 access for the current thread */ - if(type) current_act()->mact.specFlags |= ignoreZeroFault; /* Ignore faults on page 0 */ - else current_act()->mact.specFlags &= ~ignoreZeroFault; /* Honor faults on page 0 */ + if(type) current_thread()->machine.specFlags |= ignoreZeroFault; /* Ignore faults on page 0 */ + else current_thread()->machine.specFlags &= ~ignoreZeroFault; /* Honor faults on page 0 */ return; /* Return the result or 0... */ } @@ -1433,10 +1542,10 @@ void ignore_zero_fault(boolean_t type) { /* Sets up to ignore or honor any fa * Copies data between a physical page and a virtual page, or 2 physical. This is used to * move data from the kernel to user state. Note that the "which" parm * says which of the parameters is physical and if we need to flush sink/source. - * Note that both addresses may be physicical but only one may be virtual + * Note that both addresses may be physical, but only one may be virtual. * * The rules are that the size can be anything. Either address can be on any boundary - * and span pages. The physical data must be congiguous as must the virtual. + * and span pages. The physical data must be contiguous as must the virtual. * * We can block when we try to resolve the virtual address at each page boundary. * We don't check protection on the physical page. @@ -1446,17 +1555,17 @@ void ignore_zero_fault(boolean_t type) { /* Sets up to ignore or honor any fa * */ -kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int which) { +kern_return_t hw_copypv_32(addr64_t source, addr64_t sink, unsigned int size, int which) { vm_map_t map; kern_return_t ret; - addr64_t pa, nextva, vaddr, paddr; - register mapping *mp; + addr64_t nextva, vaddr, paddr; + register mapping_t *mp; spl_t s; - unsigned int sz, left, lop, csize; + unsigned int lop, csize; int needtran, bothphys; unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; vm_prot_t prot; int orig_which; @@ -1470,11 +1579,11 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic bothphys = 1; /* Assume both are physical */ - if(!(which & cppvPsnk)) { /* Is there a virtual page here? */ + if(!(which & cppvPsnk)) { /* Is sink page virtual? */ vaddr = sink; /* Sink side is virtual */ bothphys = 0; /* Show both aren't physical */ prot = VM_PROT_READ | VM_PROT_WRITE; /* Sink always must be read/write */ - } else if(!(which & cppvPsrc)) { /* Source side is virtual */ + } else if (!(which & cppvPsrc)) { /* Is source page virtual? */ vaddr = source; /* Source side is virtual */ bothphys = 0; /* Show both aren't physical */ prot = VM_PROT_READ; /* Virtual source is always read only */ @@ -1494,11 +1603,11 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic while(1) { mp = mapping_find(map->pmap, vaddr, &nextva, 1); /* Find and busy the mapping */ if(!mp) { /* Was it there? */ - if(per_proc_info[cpu_number()].istackptr == 0) + if(getPerProc()->istackptr == 0) panic("copypv: No vaild mapping on memory %s %x", "RD", vaddr); splx(s); /* Restore the interrupt level */ - ret = vm_fault(map, trunc_page_32((vm_offset_t)vaddr), prot, FALSE, FALSE, NULL, 0); /* Didn't find it, try to fault it in... */ + ret = vm_fault(map, vm_map_trunc_page(vaddr), prot, FALSE, THREAD_UNINT, NULL, 0); /* Didn't find it, try to fault it in... */ if(ret != KERN_SUCCESS)return KERN_FAILURE; /* Didn't find any, return no good... */ @@ -1520,11 +1629,11 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic if((which & cppvPsnk) || !(mp->mpVAddr & 1)) break; /* We got it mapped R/W or the source is not virtual, leave... */ mapping_drop_busy(mp); /* Go ahead and release the mapping for now */ - if(per_proc_info[cpu_number()].istackptr == 0) + if(getPerProc()->istackptr == 0) panic("copypv: No vaild mapping on memory %s %x", "RDWR", vaddr); splx(s); /* Restore the interrupt level */ - ret = vm_fault(map, trunc_page_32((vm_offset_t)vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, FALSE, NULL, 0); /* check for a COW area */ + ret = vm_fault(map, vm_map_trunc_page(vaddr), VM_PROT_READ | VM_PROT_WRITE, FALSE, THREAD_UNINT, NULL, 0); /* check for a COW area */ if (ret != KERN_SUCCESS) return KERN_FAILURE; /* We couldn't get it R/W, leave in disgrace... */ s = splhigh(); /* Don't bother me */ } @@ -1543,7 +1652,7 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic if(which & cppvFsrc) flush_dcache64(source, csize, 1); /* If requested, flush source before move */ if(which & cppvFsnk) flush_dcache64(sink, csize, 1); /* If requested, flush sink before move */ - bcopy_physvir(source, sink, csize); /* Do a physical copy, virtually */ + bcopy_physvir_32(source, sink, csize); /* Do a physical copy, virtually */ if(which & cppvFsrc) flush_dcache64(source, csize, 1); /* If requested, flush source after move */ if(which & cppvFsnk) flush_dcache64(sink, csize, 1); /* If requested, flush sink after move */ @@ -1581,8 +1690,8 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic void mapping_verify(void) { spl_t s; - mappingblok *mb, *mbn; - int relncnt; + mappingblok_t *mb, *mbn; + unsigned int relncnt; unsigned int dumbodude; dumbodude = 0; @@ -1591,7 +1700,7 @@ void mapping_verify(void) { mbn = 0; /* Start with none */ for(mb = mapCtl.mapcnext; mb; mb = mb->nextblok) { /* Walk the free chain */ - if((mappingblok *)(mb->mapblokflags & 0x7FFFFFFF) != mb) { /* Is tag ok? */ + if((mappingblok_t *)(mb->mapblokflags & 0x7FFFFFFF) != mb) { /* Is tag ok? */ panic("mapping_verify: flags tag bad, free chain; mb = %08X, tag = %08X\n", mb, mb->mapblokflags); } mbn = mb; /* Remember the last one */ @@ -1619,23 +1728,19 @@ void mapping_verify(void) { void mapping_phys_unused(ppnum_t pa) { unsigned int pindex; - phys_entry *physent; + phys_entry_t *physent; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if(!physent) return; /* Did we find the physical page? */ - if(!(physent->ppLink & ~(ppLock | ppN | ppFlags))) return; /* No one else is here */ + if(!(physent->ppLink & ~(ppLock | ppFlags))) return; /* No one else is here */ panic("mapping_phys_unused: physical page (%08X) in use, physent = %08X\n", pa, physent); } - - - - - - - - - + + + + + diff --git a/osfmk/ppc/mappings.h b/osfmk/ppc/mappings.h index b11735d15..9d7a7c759 100644 --- a/osfmk/ppc/mappings.h +++ b/osfmk/ppc/mappings.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -22,11 +22,11 @@ /* * Header files for the hardware virtual memory mapping stuff */ +#ifdef XNU_KERNEL_PRIVATE + #ifndef _PPC_MAPPINGS_H_ #define _PPC_MAPPINGS_H_ -#include - #include #include #include @@ -55,21 +55,27 @@ typedef struct phys_entry { addr64_t ppLink; /* Physical pointer to aliased mappings and flags */ #define ppLock 0x8000000000000000LL /* Lock for alias chain */ -#define ppN 0x4000000000000000LL /* Not executable */ -#define ppFlags 0x000000000000003FLL /* Status and flags */ -#define ppI 0x0000000000000020LL /* Cache inhibited */ -#define ppIb 58 /* Cache inhibited */ -#define ppG 0x0000000000000010LL /* Guarded */ -#define ppGb 59 /* Guarded */ +#define ppFlags 0x700000000000000FLL /* Status and flags */ +#define ppI 0x2000000000000000LL /* Cache inhibited */ +#define ppIb 2 /* Cache inhibited */ +#define ppG 0x1000000000000000LL /* Guarded */ +#define ppGb 3 /* Guarded */ #define ppR 0x0000000000000008LL /* Referenced */ #define ppRb 60 /* Referenced */ #define ppC 0x0000000000000004LL /* Changed */ #define ppCb 61 /* Changed */ -#define ppPP 0x0000000000000003LL /* Protection */ -#define ppPPb 62 /* Protection begin */ -#define ppPPe 63 /* Protection end */ -} phys_entry; + +/* The lock, attribute, and flag bits are arranged so that their positions may be + * described by a contiguous mask of one bits wrapping from bit postion 63 to 0. + * In assembly language, we can then rapidly produce this mask with: + * li r0,ppLFAmask ; r0 <- 0x00000000000000FF + * rotrdi r0,r0,ppLFArrot ; r0 <- 0xF00000000000000F + */ +#define ppLFAmask 0x00FF /* One bit for each lock, attr, or flag bit */ +#define ppLFArrot 4 /* Right-rotate count to obtain 64-bit mask */ +} phys_entry_t; #pragma pack() +#define physEntrySize sizeof(phys_entry_t) /* Memory may be non-contiguous. This data structure contains info * for mapping this non-contiguous space into the contiguous @@ -80,7 +86,7 @@ typedef struct phys_entry { #pragma pack(4) /* Make sure the structure stays as we defined it */ typedef struct mem_region { - phys_entry *mrPhysTab; /* Base of region table */ + phys_entry_t *mrPhysTab; /* Base of region table */ ppnum_t mrStart; /* Start of region */ ppnum_t mrEnd; /* Last page in region */ ppnum_t mrAStart; /* Next page in region to allocate */ @@ -89,7 +95,7 @@ typedef struct mem_region { #pragma pack() #define mrSize sizeof(mem_region_t) -#define PMAP_MEM_REGION_MAX 26 +#define PMAP_MEM_REGION_MAX 11 extern mem_region_t pmap_mem_regions[PMAP_MEM_REGION_MAX + 1]; extern int pmap_mem_regions_count; @@ -110,9 +116,54 @@ typedef struct PCA { /* PTEG Control Area */ #define PCAlockb 31 } PCAalflgs; } flgs; -} PCA; +} PCA_t; #pragma pack() +/* The hash table is composed of mappings organized into G groups of S slots + * each. In the macros below, by GV_GROUPS_LG2, GV_SLOT_SZ_LG2, and GV_SLOTS_LG2, the number + * of groups, the size (in bytes) of a slot, and the number of slots in a group are given. + * Since these values are given as log2, they're restricted to powers of two. Fast operation + * and all that. + * + * This patch of macros define all of the hash table's metrics and handy masks. It's a + * build-time thing because it's faster that way. Only the first group of values may + * be adjusted. + */ +#define GV_GROUPS_LG2 10 /* 1024 groups per hash table (log2(max) is 14, viz. 16K groups) */ +#define GV_SLOTS_LG2 3 /* 8 slots per group (log2(max) is 8, viz. 256 slots) */ + +#define GV_SLOT_SZ_LG2 5 /* 32 bytes per slot (mapping size) */ +#define GV_PGIDX_SZ_LG2 3 /* 64-bit Hash-table-page physical-addrress index entry size */ +#define GV_PAGE_SZ_LG2 12 /* 4k-byte hash-table-page size */ + +#define GV_GROUPS (1 << GV_GROUPS_LG2) +#define GV_SLOT_SZ (1 << GV_SLOT_SZ_LG2) +#define GV_SLOTS (1 << GV_SLOTS_LG2) +#define GV_PAGE_SZ (1 << GV_PAGE_SZ_LG2) +#define GV_GRP_MASK (GV_GROUPS - 1) +#define GV_SLOT_MASK (GV_SLOTS - 1) +#define GV_PAGE_MASK (GV_PAGE_SZ - 1) +#define GV_HPAGES (1 << (GV_GROUPS_LG2 + GV_SLOT_SZ_LG2 + GV_SLOTS_LG2 - GV_PAGE_SZ_LG2)) +#define GV_GRPS_PPG_LG2 (GV_PAGE_SZ_LG2 - (GV_SLOT_SZ_LG2 + GV_SLOTS_LG2)) +#define GV_GRPS_PPG (1 << GV_GRPS_PPG_LG2) +#define GV_SLTS_PPG_LG2 (GV_PAGE_SZ_LG2 - GV_SLOT_SZ_LG2) +#define GV_SLTS_PPG (1 << GV_SLTS_PPG_LG2) + +#define GV_HPAGE_SHIFT (GV_PGIDX_SZ_LG2 - GV_GRPS_PPG_LG2) +#define GV_HPAGE_MASK ((GV_HPAGES - 1) << GV_PGIDX_SZ_LG2) +#define GV_HGRP_SHIFT (GV_SLOT_SZ_LG2 + GV_SLOTS_LG2) +#define GV_HGRP_MASK ((GV_GRPS_PPG - 1) << GV_HGRP_SHIFT) + +#define GV_MAPWD_BITS_LG2 5 /* 32-bit active map word size */ +#define GV_MAPWD_SZ_LG2 (GV_MAPWD_BITS_LG2 - 3) +#define GV_BAND_SHIFT (GV_MAPWD_BITS_LG2 + GV_SLOT_SZ_LG2) +#define GV_BAND_SZ_LG2 (GV_PAGE_SZ_LG2 - GV_SLOT_SZ_LG2 - GV_MAPWD_BITS_LG2) +#define GV_BAND_MASK (((1 << GV_BAND_SZ_LG2) - 1) << GV_BAND_SHIFT) +#define GV_MAP_WORDS (1 << (GV_GROUPS_LG2 + GV_SLOTS_LG2 - GV_MAPWD_BITS_LG2)) +#define GV_MAP_MASK ((GV_MAP_WORDS - 1) << GV_MAPWD_SZ_LG2) +#define GV_MAP_SHIFT (GV_PGIDX_SZ_LG2 - GV_BAND_SZ_LG2) + + /* Mappings currently come in two sizes: 64 and 128 bytes. The only difference is the * number of skiplists (ie, mpLists): 64-byte mappings have 1-4 lists and 128-byte mappings * have from 5-12. Only 1 in 256 mappings is large, so an average mapping is 64.25 bytes. @@ -123,50 +174,67 @@ typedef struct PCA { /* PTEG Control Area */ * exclusive lock while FIP is shared. The only worry is that there is a possibility that * FIP could be attempted by more than 1 processor at a time. Obviously, one will win. * The other(s) bail all the way to user state and may refault (or not). There are only - * a few things in mpFlags that are not static, mpFIP, mpRIP, mpRemovable, and mpBusy. + * a few things in mpFlags that are not static, mpFIP, mpRIP, and mpBusy. * - * We organize these so that mpFIP is in a byte with static data and mpRIP and mpRemovable - * is in another. That means that we can use a store byte to update the guys without - * worrying about load and reserve. Note that mpFIP must be set atomically because it is - * under a share lock, but it may be clear with a simple store byte. So far as mpRIP - * goes, it is in the same byte as mpRemovable. However, mpRemovable is set atomically - * but never cleared, and mpRIP will not ever be set until after mpRemovable. Note that - * mpRIP is never cleared either. + * We organize these so that mpFIP is in a byte with static data and mpRIP is in another. + * That means that we can use a store byte to update the guys without worrying about load + * and reserve. Note that mpFIP must be set atomically because it is under a share lock; + * but, it may be cleared with a simple store byte. Because mpRip is set once and then never + * cleared, we can get away with setting it by means of a simple store byte. * */ #pragma pack(4) /* Make sure the structure stays as we defined it */ typedef struct mapping { unsigned int mpFlags; /* 0x000 - Various flags, lock bit. These are static except for lock */ #define mpBusy 0xFF000000 /* Busy count */ -#define mpPIndex 0x00FF0000 /* Index into physical table (in words) */ -#define mpSpecial 0x00008000 /* Special mapping - processor specific. */ -#define mpSpecialb 16 /* Special mapping - processor specific. */ -#define mpFIP 0x00004000 /* Fault in progress */ -#define mpFIPb 17 /* Fault in progress */ -#define mpNest 0x00001000 /* Mapping describes nested pmap */ -#define mpNestb 19 /* Mapping describes nested pmap */ -#define mpPerm 0x00000800 /* Mapping is permanent */ -#define mpPermb 20 /* Mapping is permanent */ -#define mpBlock 0x00000400 /* Mapping is a block map - used for V=F or I/O */ -#define mpBlockb 21 /* Mapping is a block map - used for V=F or I/O */ +#define mpPrevious 0x00800000 /* A previous mapping exists in a composite */ +#define mpNext 0x00400000 /* A next mapping exist in a composite */ +#define mpPIndex 0x003F0000 /* Index into physical table (in words) */ +#define mpType 0x0000F000 /* Mapping type: */ +#define mpNormal 0x00000000 /* Normal logical page - backed by RAM, RC maintained, logical page size == physical page size */ + /* DO NOT CHANGE THIS CODE */ +#define mpBlock 0x00001000 /* Block mapping - used for I/O memory or non-RC maintained RAM, logical page size is independent from physical */ +#define mpMinSpecial 0x00002000 /* Any mapping with this type or above has extra special handling */ +#define mpNest 0x00002000 /* Forces transtion to an alternate address space after applying relocation */ +#define mpLinkage 0x00003000 /* Transition to current user address space with relocation - used for copyin/out/pv */ +#define mpACID 0x00004000 /* Address Chunk ID - provides the address space ID for VSID calculation. Normally mapped at chunk size - 2KB */ +#define mpGuest 0x00005000 /* Guest->physical shadow mapping */ +/* 0x00006000 - 0x0000F000 Reserved */ +#define mpFIP 0x00000800 /* Fault in progress */ +#define mpFIPb 20 /* Fault in progress */ +#define mpPcfg 0x00000700 /* Physical Page configuration */ +#define mpPcfgb 23 /* Physical Page configuration index bit */ #define mpRIP 0x00000080 /* Remove in progress - DO NOT MOVE */ #define mpRIPb 24 /* Remove in progress */ -#define mpRemovable 0x00000040 /* Mapping is removable - DO NOT MOVE */ -#define mpRemovableb 25 /* Mapping is removable */ -#define mpRSVD1 0x00002330 /* Reserved for future use */ +#define mpPerm 0x00000040 /* Mapping is permanent - DO NOT MOVE */ +#define mpPermb 25 /* Mapping is permanent */ +#define mpBSu 0x00000020 /* Basic Size unit - 0 = 4KB, 1 = 32MB */ +#define mpBSub 26 /* Basic Size unit - 0 = 4KB, 1 = 32MB */ #define mpLists 0x0000001F /* Number of skip lists mapping is on, max of 27 */ #define mpListsb 27 /* Number of skip lists mapping is on, max of 27 */ +#define mpgFlags 0x0000001F /* Shadow cache mappings re-use mpLists for flags: */ +#define mpgGlobal 0x00000004 /* Mapping is global (1) or local (0) */ +#define mpgFree 0x00000002 /* Mapping is free */ +#define mpgDormant 0x00000001 /* Mapping is dormant */ unsigned short mpSpace; /* 0x004 - Address space hash */ - unsigned short mpBSize; /* 0x006 - Block size - 1 in pages - max block size 256MB */ + union { + unsigned short mpBSize; /* 0x006 - Block size - 1 in pages - max block size 256MB */ + unsigned char mpgCursor; /* 0x006 - Shadow-cache group allocation cursor (first mapping in the group) */ + } u; + unsigned int mpPte; /* 0x008 - Offset to PTEG in hash table. Offset to exact PTE if mpHValid set - NOTE: this MUST be 0 for block mappings */ #define mpHValid 0x00000001 /* PTE is entered in hash table */ #define mpHValidb 31 /* PTE is entered in hash table */ ppnum_t mpPAddr; /* 0x00C - Physical page number */ addr64_t mpVAddr; /* 0x010 - Starting virtual address */ #define mpHWFlags 0x0000000000000FFFULL /* Reference/Change, WIMG, AC, N, protection flags from PTE */ -#define mpPP 0x0000000000000007ULL /* Protection flags */ -#define mpPPb 61 +#define mpHWFlagsb 52 +#define mpN 0x0000000000000004ULL /* Page-level no-execute (PowerAS machines) */ +#define mpNb 61 +#define mpPP 0x0000000000000003ULL /* Protection flags */ +#define mpPPb 62 +#define mpPPe 63 #define mpKKN 0x0000000000000007ULL /* Segment key and no execute flag (nested pmap) */ #define mpKKNb 61 #define mpWIMG 0x0000000000000078ULL /* Attribute bits */ @@ -200,7 +268,7 @@ typedef struct mapping { /* addr64_t mpList20[8]; 0x0C0 - Third extended list entries */ /* 0x100 - End of third extended mapping */ -} mapping; +} mapping_t; #pragma pack() #define MAPPING_NULL ((struct mapping *) 0) @@ -220,7 +288,7 @@ typedef struct mappingflush { addr64_t addr; /* Start address to search mapping */ unsigned int spacenum; /* Last space num to search pmap */ unsigned int mapfgas[1]; /* Pad to 64 bytes */ -} mappingflush; +} mappingflush_t; typedef struct mappingctl { unsigned int mapclock; /* Mapping allocation lock */ @@ -240,7 +308,7 @@ typedef struct mappingctl { unsigned int mapcmaxalloc; /* Maximum number of mappings allocated at one time */ unsigned int mapcgas[1]; /* Pad to 64 bytes */ struct mappingflush mapcflush; -} mappingctl; +} mappingctl_t; #pragma pack() /* MAPPERBLOK is the number of basic 64-byte mappings per block (ie, per page.) */ @@ -253,7 +321,7 @@ typedef struct mappingblok { unsigned int mapblokflags; /* Various flags */ #define mbPerm 0x80000000 /* Block is permanent */ struct mappingblok *nextblok; /* Pointer to the next mapping block */ -} mappingblok; +} mappingblok_t; #define mapRemChunk 128 @@ -266,18 +334,46 @@ typedef struct mappingblok { #define mapRtNest 5 #define mapRtRemove 6 #define mapRtMapDup 7 +#define mapRtGuest 8 +#define mapRtEmpty 9 +#define mapRtSmash 0xA /* Mapping already exists and doesn't match new mapping */ -extern mappingctl mapCtl; /* Mapping allocation control */ +/* + * This struct describes available physical page configurations + * Note: + * Index 0 is required and is the primary page configuration (4K, non-large) + * Index 1 is the primary large page config if supported by hw (16M, large page) + */ + +typedef struct pcfg { + uint8_t pcfFlags; /* Flags */ +#define pcfValid 0x80 /* Configuration is valid */ +#define pcfLarge 0x40 /* Large page */ +#define pcfDedSeg 0x20 /* Requires dedicated segment */ + uint8_t pcfEncode; /* Implementation specific PTE encoding */ + uint8_t pcfPSize; /* Page size in powers of 2 */ + uint8_t pcfShift; /* Shift for PTE construction */ +} pcfg; + +#define pcfDefPcfg 0 /* Primary page configuration */ +#define pcfLargePcfg 1 /* Primary large page configuration */ + +extern pcfg pPcfg[8]; /* Supported page configurations */ + +extern mappingctl_t mapCtl; /* Mapping allocation control */ + +extern unsigned char ppc_prot[]; /* Mach -> PPC protection translation table */ + +#define getProtPPC(__key) (ppc_prot[(__key) & 0xF]) + /* Safe Mach -> PPC protection key conversion */ extern addr64_t mapping_remove(pmap_t pmap, addr64_t va); /* Remove a single mapping for this VADDR */ -extern mapping *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full); /* Finds a mapping */ +extern mapping_t *mapping_find(pmap_t pmap, addr64_t va, addr64_t *nextva, int full); /* Finds a mapping */ extern void mapping_free_init(vm_offset_t mbl, int perm, boolean_t locked); /* Sets start and end of a block of mappings */ -extern void mapping_adjust(void); /* Adjust free mapping count */ -extern void mapping_free_prime(void); /* Primes the mapping block release list */ extern void mapping_prealloc(unsigned int); /* Preallocate mappings for large use */ extern void mapping_relpre(void); /* Releases preallocate request */ extern void mapping_init(void); /* Do initial stuff */ -extern mapping *mapping_alloc(int lists); /* Obtain a mapping */ +extern mapping_t *mapping_alloc(int lists); /* Obtain a mapping */ extern void mapping_free(struct mapping *mp); /* Release a mapping */ extern boolean_t mapping_tst_ref(ppnum_t pa); /* Tests the reference bit of a physical page */ extern boolean_t mapping_tst_mod(ppnum_t pa); /* Tests the change bit of a physical page */ @@ -285,60 +381,76 @@ extern void mapping_set_ref(ppnum_t pa); /* Sets the reference bit of a phy extern void mapping_clr_ref(ppnum_t pa); /* Clears the reference bit of a physical page */ extern void mapping_set_mod(ppnum_t pa); /* Sets the change bit of a physical page */ extern void mapping_clr_mod(ppnum_t pa); /* Clears the change bit of a physical page */ +extern unsigned int mapping_tst_refmod(ppnum_t pa); /* Tests the reference and change bits of a physical page */ +extern void mapping_clr_refmod(ppnum_t pa, unsigned int mask); /* Clears the reference and change bits of a physical page */ extern void mapping_protect_phys(ppnum_t pa, vm_prot_t prot); /* Change protection of all mappings to page */ -extern int mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva); /* Change protection of a single mapping to page */ +extern void mapping_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva); /* Change protection of a single mapping to page */ extern addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, unsigned int size, vm_prot_t prot); /* Make a mapping */ /* Flags for mapping_make */ #define mmFlgBlock 0x80000000 /* This is a block map, use size for number of pages covered */ #define mmFlgUseAttr 0x40000000 /* Use specified attributes */ #define mmFlgPerm 0x20000000 /* Mapping is permanant */ +#define mmFlgPcfg 0x07000000 /* Physical page configuration index */ #define mmFlgCInhib 0x00000002 /* Cahching inhibited - use if mapFlgUseAttr set or block */ #define mmFlgGuarded 0x00000001 /* Access guarded - use if mapFlgUseAttr set or block */ extern void mapping_purge(ppnum_t pa); /* Remove all mappings for this physent */ extern addr64_t mapping_p2v(pmap_t pmap, ppnum_t pa); /* Finds first virtual mapping of a physical page in a space */ extern void mapping_drop_busy(struct mapping *mapping); /* Drops busy count on mapping */ -extern phys_entry *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex); /* Finds the physical entry for the page */ +extern phys_entry_t *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex); /* Finds the physical entry for the page */ extern int mapalc1(struct mappingblok *mb); /* Finds and allcates a 1-bit mapping entry */ extern int mapalc2(struct mappingblok *mb); /* Finds and allcates a 2-bit mapping entry */ extern void ignore_zero_fault(boolean_t type); /* Sets up to ignore or honor any fault on page 0 access for the current thread */ - -extern mapping *hw_rem_map(pmap_t pmap, addr64_t va, addr64_t *next); /* Remove a mapping from the system */ -extern mapping *hw_purge_map(pmap_t pmap, addr64_t va, addr64_t *next); /* Remove a regular mapping from the system */ -extern mapping *hw_purge_space(struct phys_entry *pp, pmap_t pmap); /* Remove the first mapping for a specific pmap from physentry */ -extern mapping *hw_purge_phys(struct phys_entry *pp); /* Remove the first mapping for a physentry */ -extern mapping *hw_find_map(pmap_t pmap, addr64_t va, addr64_t *nextva); /* Finds a mapping */ +extern void mapping_fake_zone_info( /* return mapping usage stats as a fake zone info */ + int *count, + vm_size_t *cur_size, + vm_size_t *max_size, + vm_size_t *elem_size, + vm_size_t *alloc_size, + int *collectable, + int *exhaustable); + +extern mapping_t *hw_rem_map(pmap_t pmap, addr64_t va, addr64_t *next); /* Remove a mapping from the system */ +extern mapping_t *hw_purge_map(pmap_t pmap, addr64_t va, addr64_t *next); /* Remove a regular mapping from the system */ +extern mapping_t *hw_purge_space(struct phys_entry *pp, pmap_t pmap); /* Remove the first mapping for a specific pmap from physentry */ +extern mapping_t *hw_purge_phys(struct phys_entry *pp); /* Remove the first mapping for a physentry */ +extern mapping_t *hw_scrub_guest(struct phys_entry *pp, pmap_t pmap); /* Scrub first guest mapping belonging to this host */ +extern mapping_t *hw_find_map(pmap_t pmap, addr64_t va, addr64_t *nextva); /* Finds a mapping */ +extern mapping_t *hw_find_space(struct phys_entry *pp, unsigned int space); /* Given a phys_entry, find its first mapping in the specified space */ extern addr64_t hw_add_map(pmap_t pmap, struct mapping *mp); /* Add a mapping to a pmap */ -extern int hw_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva); /* Change the protection of a virtual page */ +extern unsigned int hw_protect(pmap_t pmap, addr64_t va, vm_prot_t prot, addr64_t *nextva); /* Change the protection of a virtual page */ extern unsigned int hw_test_rc(pmap_t pmap, addr64_t va, boolean_t reset); /* Test and optionally reset the RC bit of specific mapping */ -extern unsigned int hw_phys_walk(struct phys_entry *pp, unsigned int preop, unsigned int op, /* Perform function on all mappings on a physical page */ - unsigned int postop, unsigned int parm); -#define hwpNoop 0 /* No operation */ -#define hwpSPrtPhy 1 /* Sets protection in physent */ -#define hwpSPrtMap 2 /* Sets protection in mapping */ -#define hwpSAtrPhy 3 /* Sets attributes in physent */ -#define hwpSAtrMap 4 /* Sets attributes in mapping */ -#define hwpCRefPhy 5 /* Clears reference in physent */ -#define hwpCRefMap 6 /* Clears reference in mapping */ -#define hwpCCngPhy 7 /* Clears change in physent */ -#define hwpCCngMap 8 /* Clears change in mapping */ -#define hwpSRefPhy 9 /* Sets reference in physent */ -#define hwpSRefMap 10 /* Sets reference in mapping */ -#define hwpSCngPhy 11 /* Sets change in physent */ -#define hwpSCngMap 12 /* Sets change in mapping */ -#define hwpTRefPhy 13 /* Tests reference in physent */ -#define hwpTRefMap 14 /* Tests reference in mapping */ -#define hwpTCngPhy 15 /* Tests change in physent */ -#define hwpTCngMap 16 /* Tests change in mapping */ - -extern boolean_t hw_tst_mod(struct phys_entry *pp); /* Tests change bit */ -extern void hw_set_mod(struct phys_entry *pp); /* Set change bit */ -extern void hw_clr_mod(struct phys_entry *pp); /* Clear change bit */ - -extern boolean_t hw_tst_ref(struct phys_entry *pp); /* Tests reference bit */ -extern void hw_set_ref(struct phys_entry *pp); /* Set reference bit */ -extern void hw_clr_ref(struct phys_entry *pp); /* Clear reference bit */ +extern unsigned int hw_clear_maps(void); + +extern unsigned int hw_walk_phys(struct phys_entry *pp, unsigned int preop, unsigned int op, /* Perform function on all mappings on a physical page */ + unsigned int postop, unsigned int parm, unsigned int opmod); +/* Opcodes for hw_walk_phys */ +#define hwpNoop 0 /* No operation */ +#define hwpSPrtPhy 1 /* Sets protection in physent (obsolete) */ +#define hwpSPrtMap 2 /* Sets protection in mapping */ +#define hwpSAtrPhy 3 /* Sets attributes in physent */ +#define hwpSAtrMap 4 /* Sets attributes in mapping */ +#define hwpCRefPhy 5 /* Clears reference in physent */ +#define hwpCRefMap 6 /* Clears reference in mapping */ +#define hwpCCngPhy 7 /* Clears change in physent */ +#define hwpCCngMap 8 /* Clears change in mapping */ +#define hwpSRefPhy 9 /* Sets reference in physent */ +#define hwpSRefMap 10 /* Sets reference in mapping */ +#define hwpSCngPhy 11 /* Sets change in physent */ +#define hwpSCngMap 12 /* Sets change in mapping */ +#define hwpTRefPhy 13 /* Tests reference in physent */ +#define hwpTRefMap 14 /* Tests reference in mapping */ +#define hwpTCngPhy 15 /* Tests change in physent */ +#define hwpTCngMap 16 /* Tests change in mapping */ +#define hwpTRefCngPhy 17 /* Tests reference and change in physent */ +#define hwpTRefCngMap 18 /* Tests reference and change in mapping */ +#define hwpCRefCngPhy 19 /* Clears reference and change in physent */ +#define hwpCRefCngMap 20 /* Clears reference and change in mapping */ +/* Operation modifiers for connected PTE visits for hw_walk_phys */ +#define hwpPurgePTE 0 /* Invalidate/purge PTE and merge RC bits for each connected mapping */ +#define hwpMergePTE 1 /* Merge RC bits for each connected mapping */ +#define hwpNoopPTE 2 /* Take no additional action for each connected mapping */ extern void hw_set_user_space(pmap_t pmap); /* Indicate we need a space switch */ extern void hw_set_user_space_dis(pmap_t pmap); /* Indicate we need a space switch (already disabled) */ @@ -351,9 +463,30 @@ extern struct phys_entry *pmap_find_physentry(ppnum_t pa); extern void mapLog(unsigned int laddr, unsigned int type, addr64_t va); extern unsigned int mapSkipListVerifyC(pmap_t pmap, unsigned long long *dumpa); extern void fillPage(ppnum_t pa, unsigned int fill); +extern kern_return_t hw_copypv_32(addr64_t source, addr64_t sink, unsigned int size, int which); + +extern void hw_rem_all_gv(pmap_t pmap); /* Remove all of a guest's mappings */ +extern void hw_rem_local_gv(pmap_t gpmap); /* Remove guest local mappings */ +extern unsigned int hw_res_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t hva, addr64_t gva, vm_prot_t prot); + /* Resume a guest mapping */ +extern void hw_add_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva, unsigned int mflags, ppnum_t pa); + /* Add a guest mapping */ +extern void hw_susp_map_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva); + /* Suspend a guest mapping */ +extern unsigned int hw_test_rc_gv(pmap_t hpmap, pmap_t gpmap, addr64_t gva, unsigned int reset); + /* Test/reset mapping ref and chg */ +extern unsigned int hw_protect_gv(pmap_t gpmap, addr64_t va, vm_prot_t prot); + /* Change the protection of a guest page */ +extern addr64_t hw_gva_to_hva(pmap_t gpmap, addr64_t gva); /* Convert guest to host virtual address */ +extern unsigned int hw_find_map_gv(pmap_t gpmap, addr64_t gva, void *mpbuf); + /* Find and copy guest mapping into buffer */ extern unsigned int mappingdeb0; /* (TEST/DEBUG) */ extern unsigned int incrVSID; /* VSID increment value */ +extern int mapSetLists(pmap_t); +extern void consider_mapping_adjust(void); + #endif /* _PPC_MAPPINGS_H_ */ +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/ppc/mcount.s b/osfmk/ppc/mcount.s index b05860953..38a4d9153 100644 --- a/osfmk/ppc/mcount.s +++ b/osfmk/ppc/mcount.s @@ -21,7 +21,6 @@ */ #include #include -#include #include #include #include @@ -50,7 +49,8 @@ mcount: rlwinm r8,r9,0,MSR_EE_BIT+1,MSR_EE_BIT-1 ; Turn off interruptions mtmsr r8 ; Update msr isync - mfsprg r7,0 ; Get per_proc + mfsprg r7,1 ; Get the current activation + lwz r7,ACT_PER_PROC(r7) ; Get the per_proc block lhz r6,PP_CPU_FLAGS(r7) ; Get cpu flags ori r5,r6,mcountOff ; cmplw r5,r6 ; is mount off @@ -60,7 +60,8 @@ mcount: mr r4, r0 bl _mcount ; Call the C routine lwz r9,FM_ARG0(r1) - mfsprg r7,0 ; Get per-proc block + mfsprg r7,1 ; Get the current activation + lwz r7,ACT_PER_PROC(r7) ; Get the per_proc block lhz r6,PP_CPU_FLAGS(r7) ; Get CPU number li r5,mcountOff ; andc r6,r6,r5 ; Clear mcount_off diff --git a/osfmk/ppc/mem.h b/osfmk/ppc/mem.h index 9dd946ba3..39609648c 100644 --- a/osfmk/ppc/mem.h +++ b/osfmk/ppc/mem.h @@ -33,8 +33,11 @@ #include #include -extern addr64_t hash_table_base; -extern unsigned int hash_table_size; +extern vm_offset_t static_memory_end; + +extern addr64_t hash_table_base; +extern unsigned int hash_table_size; +extern int hash_table_shift; /* size adjustment: bigger if >0, smaller if <0 */ void hash_table_init(vm_offset_t base, vm_offset_t size); diff --git a/osfmk/ppc/misc.c b/osfmk/ppc/misc.c index 5ed36d0e7..b3a4ee934 100644 --- a/osfmk/ppc/misc.c +++ b/osfmk/ppc/misc.c @@ -22,6 +22,7 @@ /* * @OSF_COPYRIGHT@ */ +#if 0 // dead code #include #include @@ -67,14 +68,14 @@ boolean_t copyin_multiple(const char *src, midpoint = (const char*) ((vm_offset_t)(src + count) & 0xF0000000); first_count = (midpoint - src); - first_result = copyin(src, dst, first_count); + first_result = copyin(CAST_USER_ADDR_T(src), dst, first_count); /* If there was an error, stop now and return error */ if (first_result != 0) return first_result; /* otherwise finish the job and return result */ - return copyin(midpoint, dst + first_count, count-first_count); + return copyin(CAST_USER_ADDR_T(midpoint), dst + first_count, count-first_count); } extern int copyout_multiple(const char *src, char *dst, vm_size_t count); @@ -99,7 +100,7 @@ int copyout_multiple(const char *src, char *dst, vm_size_t count) midpoint = (char *) ((vm_offset_t)(dst + count) & 0xF0000000); first_count = (midpoint - dst); - first_result = copyout(src, dst, first_count); + first_result = copyout(src, CAST_USER_ADDR_T(dst), first_count); /* If there was an error, stop now and return error */ if (first_result != 0) @@ -107,6 +108,7 @@ int copyout_multiple(const char *src, char *dst, vm_size_t count) /* otherwise finish the job and return result */ - return copyout(src + first_count, midpoint, count-first_count); + return copyout(src + first_count, CAST_USER_ADDR_T(midpoint), count-first_count); } +#endif // dead code diff --git a/osfmk/ppc/misc_asm.s b/osfmk/ppc/misc_asm.s index 1b3222dbd..33d39c41b 100644 --- a/osfmk/ppc/misc_asm.s +++ b/osfmk/ppc/misc_asm.s @@ -45,46 +45,6 @@ ENTRY(getrpc, TAG_NO_FRAME_USED) blr /* And return */ -/* Mask and unmask interrupts at the processor level */ -ENTRY(interrupt_disable, TAG_NO_FRAME_USED) - lis r8,hi16(MASK(MSR_VEC)) ; Get the vector flag - mfmsr r0 ; Save the MSR - ori r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_FP)) ; Add the FP flag - andc r0,r0,r8 ; Clear VEC, FP, DR, and EE - mtmsr r0 - isync - blr - -ENTRY(interrupt_enable, TAG_NO_FRAME_USED) - - mfmsr r0 - ori r0, r0, MASK(MSR_EE) - mtmsr r0 - blr - -#if MACH_KDB -/* - * Kernel debugger versions of the spl*() functions. This allows breakpoints - * in the spl*() functions. - */ - -/* Mask and unmask interrupts at the processor level */ -ENTRY(db_interrupt_disable, TAG_NO_FRAME_USED) - lis r8,hi16(MASK(MSR_VEC)) ; Get the vector flag - mfmsr r0 ; Save the MSR - ori r8,r8,lo16(MASK(MSR_EE)|MASK(MSR_FP)) ; Add the FP flag - andc r0,r0,r8 ; Clear VEC, FP, DR, and EE - mtmsr r0 - isync - blr - -ENTRY(db_interrupt_enable, TAG_NO_FRAME_USED) - mfmsr r0 - ori r0, r0, MASK(MSR_EE) - mtmsr r0 - blr -#endif /* MACH_KDB */ - /* * General entry for all debuggers. This gets us onto the debug stack and * then back off at exit. We need to pass back R3 to caller. @@ -100,7 +60,8 @@ ENTRY(Call_Debugger, TAG_NO_FRAME_USED) andc r7,r7,r8 ; Clear VEC and FP mtmsr r7 ; Do it isync - mfsprg r8,0 ; Get the per_proc block + mfsprg r8,1 ; Get the current activation + lwz r8,ACT_PER_PROC(r8) ; Get the per_proc block stw r0,FM_LR_SAVE(r1) ; Save return on current stack lwz r9,PP_DEBSTACKPTR(r8) ; Get the debug stack @@ -127,7 +88,8 @@ cdNewDeb: li r0,0 ; Clear this out andc r0,r0,r8 ; Turn off all the interesting stuff mtmsr r0 - mfsprg r8,0 ; Get the per_proc block address + mfsprg r8,1 ; Get the current activation + lwz r8,ACT_PER_PROC(r8) ; Get the per_proc block lwz r9,PP_DEBSTACK_TOP_SS(r8) ; Get the top of the stack cmplw r1,r9 ; Have we hit the bottom of the debug stack? diff --git a/osfmk/ppc/misc_protos.h b/osfmk/ppc/misc_protos.h index 8698e2f1e..edbf0fe19 100644 --- a/osfmk/ppc/misc_protos.h +++ b/osfmk/ppc/misc_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,99 +26,109 @@ #ifndef _PPC_MISC_PROTOS_H_ #define _PPC_MISC_PROTOS_H_ -#include #include #include #include #include #include -#include #include -#include +#include #include #include +#include #include #include +#include -extern int strcmp(const char *s1, const char *s2); -extern int strncmp(const char *s1, const char *s2, unsigned long n); -extern char *strcat(char *dest, const char *src); -extern char *strcpy(char *dest, const char *src); +/* uncached-safe */ +extern void bzero_nc( + char *buf, + int size); -extern void vprintf(const char *fmt, va_list args); -extern void printf(const char *fmt, ...); +/* uncached-safe */ +extern void bcopy_nc( + char *from, + char *to, + int size); -extern void bzero_nc(char* buf, int size); /* uncached-safe */ -extern void bcopy_nc(char *from, char *to, int size); /* uncached-safe */ -extern void bcopy_phys(addr64_t from, addr64_t to, int size); /* Physical to physical copy (ints must be disabled) */ -extern void bcopy_physvir(addr64_t from, addr64_t to, int size); /* Physical to physical copy virtually (ints must be disabled) */ +/* Physical to physical copy (ints must be disabled) */ +extern void bcopy_phys( + addr64_t from, + addr64_t to, + int size); -extern void ppc_init(boot_args *args); -extern struct savearea *enterDebugger(unsigned int trap, - struct savearea *state, - unsigned int dsisr); +/* Physical to physical copy virtually (ints must be disabled) */ +extern void bcopy_physvir_32( + addr64_t from, + addr64_t to, + int size); -extern void draw_panic_dialog(void); -extern void ppc_vm_init(uint64_t mem_size, boot_args *args); +extern void phys_copy( + addr64_t from, + addr64_t to, + vm_size_t size); -extern int ppcNull(struct savearea *); -extern int ppcNullinst(struct savearea *); +extern void machine_conf( + void); -extern void autoconf(void); -extern void machine_init(void); -extern void machine_conf(void); -extern void probeio(void); -extern int cons_find(boolean_t); -extern void machine_startup(boot_args *args); +extern void machine_startup( + boot_args *args); -extern void interrupt_init(void); -extern void interrupt_enable(void); -extern void interrupt_disable(void); -extern void disable_bluebox_internal(thread_act_t act); -extern uint64_t hid0get64(void); -extern void hid5set64(uint64_t); -#if MACH_KDB -extern void db_interrupt_enable(void); -extern void db_interrupt_disable(void); -#endif /* MACH_KDB */ +extern void ppc_vm_init( + uint64_t ppc_mem_size, + boot_args *args); -extern void phys_zero(vm_offset_t, vm_size_t); -extern void phys_copy(addr64_t, addr64_t, vm_size_t); +extern int ppcNull( + struct savearea *asavearea); -extern void Load_context(thread_t th); +extern int ppcNullinst( + struct savearea *asavearea); -extern thread_t Switch_context( - thread_t old, - void (*cont)(void), - thread_t new); +extern void disable_bluebox_internal( + thread_t act); -extern void fpu_save(struct facility_context *); -extern void vec_save(struct facility_context *); -extern void toss_live_fpu(struct facility_context *); -extern void toss_live_vec(struct facility_context *); +extern uint64_t hid0get64( + void); -extern void condStop(unsigned int, unsigned int); +extern void hid5set64( + uint64_t); -extern int nsec_to_processor_clock_ticks(int nsec); +extern void Load_context( + thread_t th); -extern void tick_delay(int ticks); +extern thread_t Switch_context( + thread_t old, + void (*cont)(void), + thread_t new); + +extern void fpu_save( + struct facility_context *fpu_fc); + +extern void vec_save( + struct facility_context *vec_fc); + +extern void toss_live_fpu( + struct facility_context *fpu_fc); + +extern void toss_live_vec( + struct facility_context *vec_fc); + +extern struct savearea *enterDebugger( + unsigned int trap, + struct savearea *state, + unsigned int dsisr); + +extern void draw_panic_dialog( + void); #ifdef DEBUG #define DPRINTF(x) { printf("%s : ",__FUNCTION__);printf x; } #endif /* DEBUG */ #if MACH_ASSERT -extern void dump_thread(thread_t th); +extern void dump_thread( + thread_t th); #endif -#if NCPUS > 1 -extern void mp_probe_cpus(void); -#if MACH_KDB -extern void remote_kdb(void); -extern void clear_kdb_intr(void); -extern void kdb_console(void); -#endif /* MACH_KDB */ -#endif /* NCPUS > 1 */ - #endif /* _PPC_MISC_PROTOS_H_ */ diff --git a/osfmk/ppc/model_dep.c b/osfmk/ppc/model_dep.c index 43e108020..ca0f8a3ef 100644 --- a/osfmk/ppc/model_dep.c +++ b/osfmk/ppc/model_dep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,11 +57,9 @@ #include #include #include -#include #include #include -#include #include #include @@ -81,16 +79,16 @@ #include #include #include -#include +#include #include #include +#include #include #include #include #include #include -#include #include @@ -131,10 +129,6 @@ hw_lock_data_t pbtlock; /* backtrace print lock */ int debugger_cpu = -1; /* current cpu running debugger */ int debugger_debug = 0; /* Debug debugger */ -int debugger_is_slave[NCPUS]; /* Show that we were entered via sigp */ -int debugger_active[NCPUS]; /* Debugger active on CPU */ -int debugger_pending[NCPUS]; /* Debugger entry pending on CPU (this is a HACK) */ -int debugger_holdoff[NCPUS]; /* Holdoff debugger entry on this CPU (this is a HACK) */ int db_run_mode; /* Debugger run mode */ unsigned int debugger_sync = 0; /* Cross processor debugger entry sync */ extern unsigned int NMIss; /* NMI debounce switch */ @@ -151,7 +145,7 @@ volatile unsigned int cpus_holding_bkpts; /* counter for number of cpus holding insert back breakpoints) */ void unlock_debugger(void); void lock_debugger(void); -void dump_backtrace(unsigned int stackptr, unsigned int fence); +void dump_backtrace(savearea *sv, unsigned int stackptr, unsigned int fence); void dump_savearea(savearea *sv, unsigned int fence); int packAsc (unsigned char *inbuf, unsigned int length); @@ -186,7 +180,9 @@ char *failNames[] = { "No saveareas", /* failNoSavearea */ "Savearea corruption", /* failSaveareaCorr */ "Invalid live context", /* failBadLiveContext */ + "Corrupt skip lists", /* failSkipLists */ "Unaligned stack", /* failUnalignedStk */ + "Invalid pmap", /* failPmap */ "Unknown failure code" /* Unknown failure code - must always be last */ }; @@ -213,12 +209,13 @@ void machine_startup(boot_args *args) { int boot_arg; + unsigned int wncpu; + unsigned int vmm_arg; if (PE_parse_boot_arg("cpus", &wncpu)) { - if (!((wncpu > 0) && (wncpu < NCPUS))) - wncpu = NCPUS; - } else - wncpu = NCPUS; + if ((wncpu > 0) && (wncpu < MAX_CPUS)) + max_ncpus = wncpu; + } if( PE_get_hotkey( kPEControlKey )) halt_in_debugger = halt_in_debugger ? 0 : 1; @@ -230,6 +227,8 @@ machine_startup(boot_args *args) if (boot_arg & DB_NMI) panicDebugging=TRUE; if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE; } + + PE_parse_boot_arg("vmmforce", &lowGlo.lgVMMforcedFeats); hw_lock_init(&debugger_lock); /* initialize debugger lock */ hw_lock_init(&pbtlock); /* initialize print backtrace lock */ @@ -277,22 +276,14 @@ machine_startup(boot_args *args) sched_poll_yield_shift = boot_arg; } - if (PE_parse_boot_arg("refunn", &boot_arg)) { - extern int refunnel_hint_enabled; - - refunnel_hint_enabled = boot_arg; - } machine_conf(); - ml_thrm_init(); /* Start thermal monitoring on this processor */ - /* - * Start the system. + * Kick off the kernel bootstrap. */ - setup_main(); - - /* Should never return */ + kernel_bootstrap(); + /*NOTREACHED*/ } char * @@ -306,8 +297,6 @@ machine_boot_info( void machine_conf(void) { - machine_info.max_cpus = NCPUS; - machine_info.avail_cpus = 1; machine_info.memory_size = mem_size; /* Note that this will be 2 GB for >= 2 GB machines */ } @@ -320,9 +309,8 @@ machine_init(void) void slave_machine_init(void) { - (void) ml_set_interrupts_enabled(FALSE); /* Make sure we are disabled */ - clock_init(); /* Init the clock */ cpu_machine_init(); /* Initialize the processor */ + clock_init(); /* Init the clock */ } void @@ -367,7 +355,6 @@ print_backtrace(struct savearea *ssp) int i, frames_cnt, skip_top_frames, frames_max; unsigned int store[8]; /* Buffer for real storage reads */ vm_offset_t backtrace_entries[32]; - thread_act_t *act; savearea *sv, *svssp; int cpu; savearea *psv; @@ -385,7 +372,7 @@ print_backtrace(struct savearea *ssp) svssp = (savearea *)ssp; /* Make this easier */ sv = 0; - if(current_thread()) sv = (savearea *)current_act()->mact.pcb; /* Find most current savearea if system has started */ + if(current_thread()) sv = (savearea *)current_thread()->machine.pcb; /* Find most current savearea if system has started */ fence = 0xFFFFFFFF; /* Show we go all the way */ if(sv) fence = (unsigned int)sv->save_r1; /* Stop at previous exception point */ @@ -393,7 +380,7 @@ print_backtrace(struct savearea *ssp) if(!svssp) { /* Should we start from stack? */ kdb_printf("Latest stack backtrace for cpu %d:\n", cpu_number()); __asm__ volatile("mr %0,r1" : "=r" (stackptr)); /* Get current stack */ - dump_backtrace(stackptr, fence); /* Dump the backtrace */ + dump_backtrace((savearea *)0,stackptr, fence); /* Dump the backtrace */ if(!sv) { /* Leave if no saveareas */ kdb_printf("\nKernel version:\n%s\n",version); /* Print kernel version */ hw_lock_unlock(&pbtlock); /* Allow another back trace to happen */ @@ -470,7 +457,7 @@ void dump_savearea(savearea *sv, unsigned int fence) { (unsigned int)sv->save_lr, (unsigned int)sv->save_r1, sv->save_exception, xcode); if(!(sv->save_srr1 & MASK(MSR_PR))) { /* Are we in the kernel? */ - dump_backtrace((unsigned int)sv->save_r1, fence); /* Dump the stack back trace from here if not user state */ + dump_backtrace(sv, (unsigned int)sv->save_r1, fence); /* Dump the stack back trace from here if not user state */ } return; @@ -478,17 +465,22 @@ void dump_savearea(savearea *sv, unsigned int fence) { -#define DUMPFRAMES 32 +#define DUMPFRAMES 34 #define LRindex 2 -void dump_backtrace(unsigned int stackptr, unsigned int fence) { +void dump_backtrace(savearea *sv, unsigned int stackptr, unsigned int fence) { unsigned int bframes[DUMPFRAMES]; unsigned int sframe[8], raddr, dumbo; - int i; + int i, index=0; kdb_printf(" Backtrace:\n"); - for(i = 0; i < DUMPFRAMES; i++) { /* Dump up to max frames */ + if (sv != (savearea *)0) { + bframes[0] = (unsigned int)sv->save_srr0; + bframes[1] = (unsigned int)sv->save_lr; + index = 2; + } + for(i = index; i < DUMPFRAMES; i++) { /* Dump up to max frames */ if(!stackptr || (stackptr == fence)) break; /* Hit stop point or end... */ @@ -549,7 +541,7 @@ Debugger(const char *message) { } } - if (debug_mode && debugger_active[cpu_number()]) { /* Are we already on debugger on this processor? */ + if (debug_mode && getPerProc()->debugger_active) { /* Are we already on debugger on this processor? */ splx(spl); return; /* Yeah, don't do it again... */ } @@ -595,17 +587,17 @@ Debugger(const char *message) { } if( !panicDebugging && (pi_size != 0) ) { - int my_cpu, debugger_cpu; + int my_cpu; int tcpu; my_cpu = cpu_number(); debugger_cpu = my_cpu; hw_atomic_add(&debug_mode, 1); - debugger_active[my_cpu]++; + PerProcTable[my_cpu].ppe_vaddr->debugger_active++; lock_debugger(); - for(tcpu = 0; tcpu < NCPUS; tcpu++) { + for(tcpu = 0; tcpu < real_ncpus; tcpu++) { if(tcpu == my_cpu) continue; hw_atomic_add(&debugger_sync, 1); (void)cpu_signal(tcpu, SIGPdebug, 0 ,0); @@ -692,7 +684,7 @@ int Call_DebuggerC( #if MACH_KDB if((debugger_cpu == my_cpu) && /* Do we already own debugger? */ - debugger_active[my_cpu] && /* and are we really active? */ + PerProcTable[my_cpu].ppe_vaddr->debugger_active && /* and are we really active? */ db_recover && /* and have we set up recovery? */ (current_debugger == KDB_CUR_DB)) { /* and are we in KDB (only it handles recovery) */ kdb_trap(type, saved_state); /* Then reenter it... */ @@ -700,7 +692,8 @@ int Call_DebuggerC( #endif hw_atomic_add(&debug_mode, 1); /* Indicate we are in debugger */ - debugger_active[my_cpu]++; /* Show active on our CPU */ + PerProcTable[my_cpu].ppe_vaddr->debugger_active++; /* Show active on our CPU */ + lock_debugger(); /* Insure that only one CPU is in debugger */ if(db_im_stepping == my_cpu) { /* Are we just back from a step? */ @@ -713,7 +706,7 @@ int Call_DebuggerC( kprintf("Call_DebuggerC(%d): %08X %08X, debact = %d\n", my_cpu, type, saved_state, debug_mode); /* (TEST/DEBUG) */ #endif printf("Call_Debugger: enter - cpu %d, is_slave %d, debugger_cpu %d, pc %08X\n", - my_cpu, debugger_is_slave[my_cpu], debugger_cpu, saved_state->save_srr0); + my_cpu, PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave, debugger_cpu, saved_state->save_srr0); } instr_pp = (vm_offset_t)pmap_find_phys(kernel_pmap, (addr64_t)(saved_state->save_srr0)); @@ -729,7 +722,7 @@ int Call_DebuggerC( #endif if (db_breakpoints_inserted) cpus_holding_bkpts++; /* Bump up the holding count */ - if (debugger_cpu == -1 && !debugger_is_slave[my_cpu]) { + if (debugger_cpu == -1 && !PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave) { #if 0 if (debugger_debug) kprintf("Call_DebuggerC(%d): lasttrace = %08X\n", my_cpu, lastTrace); /* (TEST/DEBUG) */ #endif @@ -738,7 +731,7 @@ int Call_DebuggerC( lastTrace = LLTraceSet(0); /* Disable low-level tracing */ - for(tcpu = 0; tcpu < NCPUS; tcpu++) { /* Stop all the other guys */ + for(tcpu = 0; tcpu < real_ncpus; tcpu++) { /* Stop all the other guys */ if(tcpu == my_cpu) continue; /* Don't diddle ourselves */ hw_atomic_add(&debugger_sync, 1); /* Count signal sent */ (void)cpu_signal(tcpu, SIGPdebug, 0 ,0); /* Tell 'em to enter debugger */ @@ -836,7 +829,7 @@ int Call_DebuggerC( debugger_exit: #if 0 if (debugger_debug) kprintf("Call_DebuggerC(%d): exit - inst = %08X, cpu=%d(%d), run=%d\n", my_cpu, - instr, my_cpu, debugger_cpu, db_run_mode); /* (TEST/DEBUG) */ + instr, my_cpu, debugger_cpu, db_run_mode); /* (TEST/DEBUG) */ #endif if ((instr == TRAP_DEBUGGER_INST) || /* Did we trap to enter debugger? */ (instr == TRAP_DIRECT_INST)) saved_state->save_srr0 += TRAP_INST_SIZE; /* Yes, point past trap */ @@ -851,8 +844,8 @@ debugger_exit: if (db_run_mode == STEP_CONTINUE) { /* Are we going to run? */ wait = TRUE; /* Yeah, remember to wait for breakpoints to clear */ debugger_cpu = -1; /* Release other processor's debuggers */ - debugger_pending[0] = 0; /* Release request (this is a HACK) */ - debugger_pending[1] = 0; /* Release request (this is a HACK) */ + for(tcpu = 0; tcpu < real_ncpus; tcpu++) + PerProcTable[tcpu].ppe_vaddr->debugger_pending = 0; /* Release request (this is a HACK) */ NMIss = 0; /* Let NMI bounce */ } @@ -862,14 +855,14 @@ debugger_exit: } if (db_breakpoints_inserted) cpus_holding_bkpts--; /* If any breakpoints, back off count */ - if (debugger_is_slave[my_cpu]) debugger_is_slave[my_cpu]--; /* If we were a slove, uncount us */ + if (PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave) PerProcTable[my_cpu].ppe_vaddr->debugger_is_slave--; /* If we were a slove, uncount us */ if (debugger_debug) printf("Call_Debugger: exit - cpu %d, debugger_cpu %d, run_mode %d holds %d\n", my_cpu, debugger_cpu, db_run_mode, cpus_holding_bkpts); unlock_debugger(); /* Release the lock */ - debugger_active[my_cpu]--; /* Say we aren't active anymore */ + PerProcTable[my_cpu].ppe_vaddr->debugger_active--; /* Say we aren't active anymore */ if (wait) while(cpus_holding_bkpts); /* Wait for breakpoints to clear */ diff --git a/osfmk/ppc/movc.s b/osfmk/ppc/movc.s index 6ef231f25..b7ec62939 100644 --- a/osfmk/ppc/movc.s +++ b/osfmk/ppc/movc.s @@ -461,7 +461,7 @@ pmap_novmx_icache_flush: // Stack frame format used by copyin, copyout, copyinstr and copyoutstr. // These routines all run both on 32 and 64-bit machines, though because they are called // by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned -// by MapUserAddressSpace will be 64 bits however on 64-bit machines. Beware to avoid +// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid // using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there // is no need to store or load it, which are mode-dependent operations since it could be // 32 or 64 bits. @@ -469,11 +469,12 @@ pmap_novmx_icache_flush: #define kkFrameSize (FM_SIZE+32) #define kkBufSize (FM_SIZE+0) -#define kkCR (FM_SIZE+4) +#define kkCR3 (FM_SIZE+4) #define kkSource (FM_SIZE+8) #define kkDest (FM_SIZE+12) #define kkCountPtr (FM_SIZE+16) #define kkR31Save (FM_SIZE+20) +#define kkThrErrJmp (FM_SIZE+24) // nonvolatile CR bits we use as flags in cr3 @@ -489,20 +490,21 @@ pmap_novmx_icache_flush: /* * int * copyoutstr(src, dst, maxcount, count) - * vm_offset_t src; - * vm_offset_t dst; - * vm_size_t maxcount; - * vm_size_t* count; + * vm_offset_t src; // r3 + * addr64_t dst; // r4 and r5 + * vm_size_t maxcount; // r6 + * vm_size_t* count; // r7 * * Set *count to the number of bytes copied. */ ENTRY(copyoutstr, TAG_NO_FRAME_USED) - mfcr r2 // we use nonvolatile cr3 + mfcr r2,0x10 // save caller's cr3, which we use for flags + mr r10,r4 // move high word of 64-bit user address to r10 li r0,0 crset kkString // flag as a string op - mr r10,r4 // for copyout, dest ptr (r4) is in user space - stw r0,0(r6) // initialize #bytes moved + mr r11,r5 // move low word of 64-bit user address to r11 + stw r0,0(r7) // initialize #bytes moved crclr kkIn // flag as copyout b copyJoin @@ -511,10 +513,10 @@ ENTRY(copyoutstr, TAG_NO_FRAME_USED) /* * int * copyinstr(src, dst, maxcount, count) - * vm_offset_t src; - * vm_offset_t dst; - * vm_size_t maxcount; - * vm_size_t* count; + * addr64_t src; // r3 and r4 + * vm_offset_t dst; // r5 + * vm_size_t maxcount; // r6 + * vm_size_t* count; // r7 * * Set *count to the number of bytes copied * If dst == NULL, don't copy, just count bytes. @@ -522,13 +524,14 @@ ENTRY(copyoutstr, TAG_NO_FRAME_USED) */ ENTRY(copyinstr, TAG_NO_FRAME_USED) - mfcr r2 // we use nonvolatile cr3 - cmplwi r4,0 // dst==NULL? + mfcr r2,0x10 // save caller's cr3, which we use for flags + cmplwi r5,0 // dst==NULL? + mr r10,r3 // move high word of 64-bit user address to r10 li r0,0 crset kkString // flag as a string op - mr r10,r3 // for copyin, source ptr (r3) is in user space + mr r11,r4 // move low word of 64-bit user address to r11 crmove kkNull,cr0_eq // remember if (dst==NULL) - stw r0,0(r6) // initialize #bytes moved + stw r0,0(r7) // initialize #bytes moved crset kkIn // flag as copyin (rather than copyout) b copyJoin1 // skip over the "crclr kkNull" @@ -537,9 +540,9 @@ ENTRY(copyinstr, TAG_NO_FRAME_USED) /* * int * copyout(src, dst, count) - * vm_offset_t src; - * vm_offset_t dst; - * size_t count; + * vm_offset_t src; // r3 + * addr64_t dst; // r4 and r5 + * size_t count; // r6 */ .align 5 @@ -550,18 +553,19 @@ LEXT(copyout) LEXT(copyoutmsg) #if INSTRUMENT - mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout - stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r12,pmc2 ; INSTRUMENT - Get stamp - stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r12,pmc3 ; INSTRUMENT - Get stamp - stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r12,pmc4 ; INSTRUMENT - Get stamp - stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it + mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout + stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it + mfspr r12,pmc2 ; INSTRUMENT - Get stamp + stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it + mfspr r12,pmc3 ; INSTRUMENT - Get stamp + stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it + mfspr r12,pmc4 ; INSTRUMENT - Get stamp + stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it #endif - mfcr r2 // save caller's CR + mfcr r2,0x10 // save caller's cr3, which we use for flags + mr r10,r4 // move high word of 64-bit user address to r10 crclr kkString // not a string version - mr r10,r4 // dest (r4) is user-space ptr + mr r11,r5 // move low word of 64-bit user address to r11 crclr kkIn // flag as copyout b copyJoin @@ -570,9 +574,9 @@ LEXT(copyoutmsg) /* * int * copyin(src, dst, count) - * vm_offset_t src; - * vm_offset_t dst; - * size_t count; + * addr64_t src; // r3 and r4 + * vm_offset_t dst; // r5 + * size_t count; // r6 */ @@ -583,36 +587,41 @@ LEXT(copyoutmsg) LEXT(copyin) LEXT(copyinmsg) - mfcr r2 // save caller's CR + mfcr r2,0x10 // save caller's cr3, which we use for flags + mr r10,r3 // move high word of 64-bit user address to r10 crclr kkString // not a string version - mr r10,r3 // source (r3) is user-space ptr in copyin + mr r11,r4 // move low word of 64-bit user address to r11 crset kkIn // flag as copyin // Common code to handle setup for all the copy variants: -// r2 = caller's CR, since we use cr3 -// r3-r6 = parameters -// r10 = user-space ptr (r3 if copyin, r4 if copyout) +// r2 = caller's cr3 +// r3 = source if copyout +// r5 = dest if copyin +// r6 = buffer length or count +// r7 = count output ptr (if kkString set) +// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout) +// r11 = low word of 64-bit user-space address // cr3 = kkIn, kkString, kkNull flags copyJoin: crclr kkNull // (dst==NULL) convention not used with this call copyJoin1: // enter from copyinstr with kkNull set mflr r0 // get return address - cmplwi r5,0 // buffer length 0? + cmplwi r6,0 // buffer length 0? lis r9,0x1000 // r9 <- 0x10000000 (256MB) stw r0,FM_LR_SAVE(r1) // save return - cmplw cr1,r5,r9 // buffer length > 256MB ? + cmplw cr1,r6,r9 // buffer length > 256MB ? mfsprg r8,2 // get the features beq-- copyinout_0 // 0 length is degenerate case stwu r1,-kkFrameSize(r1) // set up stack frame - stw r2,kkCR(r1) // save caller's CR since we use cr3 + stw r2,kkCR3(r1) // save caller's cr3, which we use for flags mtcrf 0x02,r8 // move pf64Bit to cr6 - stw r3,kkSource(r1) // save args across MapUserAddressSpace - stw r4,kkDest(r1) - stw r5,kkBufSize(r1) + stw r3,kkSource(r1) // save args across MapUserMemoryWindow + stw r5,kkDest(r1) + stw r6,kkBufSize(r1) crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor - stw r6,kkCountPtr(r1) + stw r7,kkCountPtr(r1) stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr li r31,0 // no mapped ptr yet @@ -621,58 +630,60 @@ copyJoin1: // enter from copyinstr with kkNull set // The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp // the buffer length to 256MB. This isn't an issue if the string is less than 256MB // (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction -// is due to MapUserAddressSpace; we don't want to consume more than two segments for +// is due to MapUserMemoryWindow; we don't want to consume more than two segments for // the mapping. ble++ cr1,copyin0 // skip if buffer length <= 256MB bf kkString,copyinout_too_big // error if not string op - mr r5,r9 // silently clamp buffer length to 256MB + mr r6,r9 // silently clamp buffer length to 256MB stw r9,kkBufSize(r1) // update saved copy too // Set up thread_recover in case we hit an illegal address. copyin0: - mfsprg r8,1 /* Get the current act */ + mfsprg r8,1 // Get the current thread lis r2,hi16(copyinout_error) - lwz r7,ACT_THREAD(r8) ori r2,r2,lo16(copyinout_error) + lwz r4,THREAD_RECOVER(r8) lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address - stw r2,THREAD_RECOVER(r7) + stw r2,THREAD_RECOVER(r8) + stw r4,kkThrErrJmp(r1) -// Map user segment into kernel map, turn on 64-bit mode. +// Map user segment into kernel map, turn on 64-bit mode. At this point: // r3 = vm map -// r5 = buffer length -// r10 = user space ptr (r3 if copyin, r4 if copyout) +// r6 = buffer length +// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout) +// +// When we call MapUserMemoryWindow, we pass: +// r3 = vm map ptr +// r4/r5 = 64-bit user space address as an addr64_t - mr r6,r5 // Set length to map - li r4,0 // Note: we only do this 32-bit for now - mr r5,r10 // arg2 <- user space ptr + mr r4,r10 // copy user ptr into r4/r5 + mr r5,r11 #if INSTRUMENT - mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace - stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r12,pmc2 ; INSTRUMENT - Get stamp - stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r12,pmc3 ; INSTRUMENT - Get stamp - stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r12,pmc4 ; INSTRUMENT - Get stamp - stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it + mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace + stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it + mfspr r12,pmc2 ; INSTRUMENT - Get stamp + stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it + mfspr r12,pmc3 ; INSTRUMENT - Get stamp + stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it + mfspr r12,pmc4 ; INSTRUMENT - Get stamp + stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it #endif - bl EXT(MapUserAddressSpace) // set r3 <- address in kernel map of user operand + bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand #if INSTRUMENT - mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace - stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r12,pmc2 ; INSTRUMENT - Get stamp - stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r12,pmc3 ; INSTRUMENT - Get stamp - stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r12,pmc4 ; INSTRUMENT - Get stamp - stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it + mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace + stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it + mfspr r12,pmc2 ; INSTRUMENT - Get stamp + stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it + mfspr r12,pmc3 ; INSTRUMENT - Get stamp + stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it + mfspr r12,pmc4 ; INSTRUMENT - Get stamp + stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it #endif - or. r0,r3,r4 // Did we fail the mapping? mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) - beq-- copyinout_error // was 0, so there was an error making the mapping bf-- kk64bit,copyin1 // skip if a 32-bit processor rldimi r31,r3,32,0 // slam high-order bits into mapped ptr @@ -688,7 +699,7 @@ copyin0: copyin1: lwz r5,kkBufSize(r1) // restore length to copy bf kkIn,copyin2 // skip if copyout - lwz r4,kkDest(r1) // copyin: source is mapped, dest is r4 at entry + lwz r4,kkDest(r1) // copyin: dest is kernel ptr mr r3,r31 // source is mapped ptr b copyin3 copyin2: // handle copyout @@ -700,7 +711,7 @@ copyin2: // handle copyout // r3 = source ptr (mapped if copyin) // r4 = dest ptr (mapped if copyout) // r5 = length -// r31 = mapped ptr returned by MapUserAddressSpace +// r31 = mapped ptr returned by MapUserMemoryWindow // cr3 = kkIn, kkString, kk64bit, and kkNull flags copyin3: @@ -711,29 +722,24 @@ copyin3: // Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached // from error recovery if we get a DSI accessing user space. Clear recovery ptr, -// and pop off frame. Note that we have kept -// the mapped ptr into user space in r31, as a reg64_t type (ie, a 64-bit ptr on -// 64-bit machines.) We must unpack r31 into an addr64_t in (r3,r4) before passing -// it to ReleaseUserAddressSpace. +// and pop off frame. // r3 = 0, EFAULT, or ENAMETOOLONG copyinx: - lwz r2,kkCR(r1) // get callers cr3 - mfsprg r6,1 // Get the current act - lwz r10,ACT_THREAD(r6) - + lwz r2,kkCR3(r1) // get callers cr3 + mfsprg r6,1 // Get the current thread bf-- kk64bit,copyinx1 // skip if 32-bit processor mfmsr r12 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off - mtmsrd r12 // turn SF off and EE back on + mtmsrd r12 // turn SF off isync // wait for the mode to change copyinx1: + lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address lwz r31,kkR31Save(r1) // restore callers r31 + lwz r4,kkThrErrJmp(r1) // load saved thread recover addi r1,r1,kkFrameSize // pop off our stack frame - lwz r0,FM_LR_SAVE(r1) - li r4,0 - stw r4,THREAD_RECOVER(r10) // Clear recovery mtlr r0 + stw r4,THREAD_RECOVER(r6) // restore thread recover mtcrf 0x10,r2 // restore cr3 blr @@ -767,55 +773,85 @@ copyinout_too_big: // degenerate case // r3 = source ptr, mapped if copyinstr // r4 = dest ptr, mapped if copyoutstr // r5 = buffer length -// r31 = mapped ptr returned by MapUserAddressSpace +// r31 = mapped ptr returned by MapUserMemoryWindow // cr3 = kkIn, kkString, kkNull, and kk64bit flags // We do word copies unless the buffer is very short, then use a byte copy loop -// for the leftovers if necessary. +// for the leftovers if necessary. The crossover at which the word loop becomes +// faster is about seven bytes, counting the zero. +// +// We first must word-align the source ptr, in order to avoid taking a spurious +// page fault. copyString: - li r12,0 // Set header bytes count to zero - cmplwi cr1,r5,20 // is buffer very short? + cmplwi cr1,r5,15 // is buffer very short? + mr r12,r3 // remember ptr to 1st source byte mtctr r5 // assuming short, set up loop count for bytes - blt cr1,copyinstr8 // too short for word loop - andi. r12,r3,0x3 // is source ptr word aligned? - bne copyinstr11 // bytes loop -copyinstr1: - srwi r6,r5,2 // get #words in buffer - mtctr r6 // set up word loop count + blt-- cr1,copyinstr8 // too short for word loop + rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word + rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word + li r7,-1 + sub r3,r3,r2 // word-align source address + add r6,r5,r2 // get length starting at byte 0 in word + srw r7,r7,r9 // get mask for bytes in first word + srwi r0,r6,2 // get #words in buffer + lwz r5,0(r3) // get aligned word with first source byte lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 lis r11,hi16(0x80808080) + mtctr r0 // set up word loop count + addi r3,r3,4 // advance past the source word ori r10,r10,lo16(0xFEFEFEFF) ori r11,r11,lo16(0x80808080) - bf kkNull,copyinstr6 // enter loop that copies - b copyinstr5 // use loop that just counts + orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF + bt-- kkNull,copyinstr5enter // enter loop that just counts + +// Special case 1st word, which has been 0xFF filled on left. Note that we use +// "and.", even though we execute both in 32 and 64-bit mode. This is OK. + + slw r5,r5,r9 // left justify payload bytes + add r9,r10,r8 // r9 = data + 0xFEFEFEFF + andc r7,r11,r8 // r7 = ~data & 0x80808080 + subfic r0,r2,4 // get r0 <- #payload bytes in 1st word + and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero + stw r5,0(r4) // copy payload bytes to dest buffer + add r4,r4,r0 // then point to next byte in dest buffer + bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found + + b copyinstr7 // 0 found (buffer can't be full) // Word loop(s). They do a word-parallel search for 0s, using the following // inobvious but very efficient test: // y = data + 0xFEFEFEFF // z = ~data & 0x80808080 -// If (y & z)==0, then all bytes in dataword are nonzero. We need two copies of -// this loop, since if we test kkNull in the loop then it becomes 9 words long. +// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies +// of this loop, one that just counts and another that copies. +// r3 = ptr to next word of source (word aligned) +// r4 = ptr to next byte in buffer +// r6 = original buffer length (adjusted to be word origin) +// r10 = 0xFEFEFEFE +// r11 = 0x80808080 +// r12 = ptr to 1st source byte (used to determine string length) .align 5 // align inner loops for speed copyinstr5: // version that counts but does not copy - lwz r8,0(r3) // get next word of source - addi r3,r3,4 // increment source ptr + lwz r8,0(r3) // get next word of source + addi r3,r3,4 // advance past it +copyinstr5enter: add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 - and. r7,r9,r7 // r7 = r9 & r7 - bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes are nonzero + and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) + bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero b copyinstr7 .align 5 // align inner loops for speed copyinstr6: // version that counts and copies - lwz r8,0(r3) // get next word of source - addi r3,r3,4 // increment source ptr + lwz r8,0(r3) // get next word of source + addi r3,r3,4 // advance past it addi r4,r4,4 // increment dest ptr while we wait for data add r9,r10,r8 // r9 = data + 0xFEFEFEFF andc r7,r11,r8 // r7 = ~data & 0x80808080 - and. r7,r9,r7 // r7 = r9 & r7 + and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) stw r8,-4(r4) // pack all 4 bytes into buffer bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero @@ -823,19 +859,24 @@ copyinstr6: // version that counts and copies // Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 // and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also // mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. +// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4)) +// r6 = original buffer length (adjusted to be word origin) +// r7 = computed vector of 0x00 and 0x80 bytes +// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word +// r12 = ptr to 1st source byte (used to determine string length) +// cr0 = beq set iff 0 not found copyinstr7: - crnot kkZero,cr0_eq // 0 found iff cr0_eq is off - mfctr r6 // get #words remaining in buffer rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position - slwi r6,r6,2 // convert to #bytes remaining + rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word andc r7,r7,r2 // turn off false hits from 0x0100 worst case - rlwimi r6,r5,0,30,31 // add in odd bytes leftover in buffer - srwi r7,r7,8 // we want to count the 0 as a byte xferred - addi r6,r6,4 // don't count last word xferred (yet) + crnot kkZero,cr0_eq // 0 found iff cr0_eq is off + srwi r7,r7,8 // we want to count the 0 as a byte xferred + cmpwi r6,0 // any bytes left over in last word? cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) + subi r3,r3,4 // back up r3 to point to 1st byte in r8 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 - sub. r6,r6,r7 // account for nonzero bytes in last word + add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred bt++ kkZero,copyinstr10 // 0 found, so done beq copyinstr10 // r6==0, so buffer truly full @@ -845,6 +886,10 @@ copyinstr7: // Byte loop. This is used for very small buffers and for the odd bytes left over // after searching and copying words at a time. +// r3 = ptr to next byte of source +// r4 = ptr to next dest byte +// r12 = ptr to first byte of source +// ctr = count of bytes to check .align 5 // align inner loops for speed copyinstr8: // loop over bytes of source @@ -852,53 +897,400 @@ copyinstr8: // loop over bytes of source addi r3,r3,1 addi r4,r4,1 // increment dest addr whether we store or not cmpwi r0,0 // the 0? - bt-- kkNull,copyinstr9 // don't store (was copyinstr with NULL ptr) + bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr stb r0,-1(r4) copyinstr9: bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer - mfctr r6 // get #bytes left in buffer crmove kkZero,cr0_eq // remember if 0 found or buffer filled // Buffer filled or 0 found. Unwind and return. -// r5 = kkBufSize, ie buffer length -// r6 = untransferred bytes remaining in buffer -// r31 = mapped ptr returned by MapUserAddressSpace -// cr3 = kkZero set iff 0 found +// r3 = ptr to 1st source byte not transferred +// r12 = ptr to 1st source byte +// r31 = mapped ptr returned by MapUserMemoryWindow +// cr3 = kkZero set iff 0 found copyinstr10: lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved - sub r2,r5,r6 // get #bytes we moved, counting the 0 iff any - add r2,r2,r12 // add the header bytes count - li r3,0 // assume 0 return status + sub r2,r3,r12 // compute #bytes copied (including the 0) + li r3,0 // assume success return status stw r2,0(r9) // store #bytes moved bt++ kkZero,copyinx // we did find the 0 so return 0 li r3,ENAMETOOLONG // buffer filled b copyinx // join main exit routine -// Byte loop. This is used on the header bytes for unaligned source - - .align 5 // align inner loops for speed -copyinstr11: - li r10,4 // load word size - sub r12,r10,r12 // set the header bytes count - mtctr r12 // set up bytes loop count -copyinstr12: // loop over bytes of source - lbz r0,0(r3) // get next byte of source - addi r3,r3,1 - addi r4,r4,1 // increment dest addr whether we store or not - cmpwi r0,0 // the 0? - bt-- kkNull,copyinstr13 // don't store (was copyinstr with NULL ptr) - stb r0,-1(r4) -copyinstr13: - bdnzf cr0_eq,copyinstr12 // loop if byte not 0 and more room in buffer - sub r5,r5,r12 // substract the bytes copied - bne cr0_eq,copyinstr1 // branch to word loop - - mr r5,r12 // Get the header bytes count - li r12,0 // Clear the header bytes count - mfctr r6 // get #bytes left in buffer - crmove kkZero,cr0_eq // remember if 0 found or buffer filled - b copyinstr10 +//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> +/* + * int + * copypv(source, sink, size, which) + * addr64_t src; // r3 and r4 + * addr64_t dst; // r5 and r6 + * size_t size; // r7 + * int which; // r8 + * + * Operand size bytes are copied from operand src into operand dst. The source and + * destination operand addresses are given as addr64_t, and may designate starting + * locations in physical or virtual memory in any combination except where both are + * virtual. Virtual memory locations may be in either the kernel or the current thread's + * address space. Operand size may be up to 256MB. + * + * Operation is controlled by operand which, which offers these options: + * cppvPsrc : source operand is (1) physical or (0) virtual + * cppvPsnk : destination operand is (1) physical or (0) virtual + * cppvKmap : virtual operand is in (1) kernel or (0) current thread + * cppvFsnk : (1) flush destination before and after transfer + * cppvFsrc : (1) flush source before and after transfer + * cppvNoModSnk : (1) don't set source operand's changed bit(s) + * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s) + * + * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32(). + * This section describes the operation of the new 64-bit path. + * + * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a + * window in the kernel address space into all of physical RAM plus the I/O hole. Since + * the window's mappings specify the proper access policies for the underlying memory, + * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk + * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical + * memory window, and are accessed with data relocation on. Virtual addresses are either + * within the kernel, or are mapped into the kernel address space through the user memory + * window. Because accesses to a virtual operand are performed with data relocation on, + * the new path does not have to translate the address, disable/enable interrupts, lock + * the mapping, or update referenced and changed bits. + * + * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is + * a substantial performance penalty for copypv operating in real mode. Utilizing the + * new 64-bit path, transfer performance increases >100% on the G5. + * + * The attentive reader may notice that mtmsrd ops are not followed by isync ops as + * might be expected. The 970 follows PowerPC architecture version 2.01, which defines + * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer + * required. + * + * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need + * to call 32-bit functions, which would lead to the high-order 32 bits of our values + * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles + * in our own stack frame across calls to 32-bit functions. + * + */ +// Map operand which bits into non-volatile CR2 and CR3 bits. +#define whichAlign ((3+1)*4) +#define whichMask 0x007F0000 +#define pvPsnk (cppvPsnkb - whichAlign) +#define pvPsrc (cppvPsrcb - whichAlign) +#define pvFsnk (cppvFsnkb - whichAlign) +#define pvFsrc (cppvFsrcb - whichAlign) +#define pvNoModSnk (cppvNoModSnkb - whichAlign) +#define pvNoRefSrc (cppvNoRefSrcb - whichAlign) +#define pvKmap (cppvKmapb - whichAlign) +#define pvNoCache cr2_lt + + .align 5 + .globl EXT(copypv) + +LEXT(copypv) + mfsprg r10,2 // get feature flags + mtcrf 0x02,r10 // we need to test pf64Bit + bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint) + + b EXT(hw_copypv_32) // carry on with 32-bit copypv + +// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber. +copypv_64: + mfsprg r9,1 // get current thread + stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1) + // allocate stack frame and link it + mflr r0 // get return address + mfcr r10 // get cr2 and cr3 + lwz r12,THREAD_RECOVER(r9) // get error callback + stw r26,FM_ARG0+0x00(r1) // save non-volatile r26 + stw r27,FM_ARG0+0x04(r1) // save non-volatile r27 + stw r28,FM_ARG0+0x08(r1) // save non-volatile r28 + stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29 + stw r30,FM_ARG0+0x10(r1) // save non-volatile r30 + stw r31,FM_ARG0+0x14(r1) // save non-volatile r31 + stw r12,FM_ARG0+0x20(r1) // save error callback + stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) + // save return address + stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) + // save non-volatile cr2 and cr3 + +// Non-volatile register usage in this routine is: +// r26: saved msr image +// r27: current pmap_t / virtual source address +// r28: destination virtual address +// r29: source address +// r30: destination address +// r31: byte count to copy +// cr2/3: parameter 'which' bits + + rlwinm r8,r8,whichAlign,whichMask // align and mask which bits + mr r31,r7 // copy size to somewhere non-volatile + mtcrf 0x20,r8 // insert which bits into cr2 and cr3 + mtcrf 0x10,r8 // insert which bits into cr2 and cr3 + rlwinm r29,r3,0,1,0 // form source address high-order bits + rlwinm r30,r5,0,1,0 // form destination address high-order bits + rlwimi r29,r4,0,0,31 // form source address low-order bits + rlwimi r30,r6,0,0,31 // form destination address low-order bits + crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical? + cntlzw r0,r31 // count leading zeroes in byte count + cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical + bf-- cr7_eq,copypv_einval // both operands may not be virtual + cmplwi r0,4 // byte count greater than or equal 256M (2**28)? + blt-- copypv_einval // byte count too big, give EINVAL + cmplwi r31,0 // byte count zero? + beq-- copypv_zero // early out + bt cr7_lt,copypv_phys // both operand addresses are physical + mr r28,r30 // assume destination is virtual + bf pvPsnk,copypv_dv // is destination virtual? + mr r28,r29 // no, so source must be virtual +copypv_dv: + lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order + lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t + bt pvKmap,copypv_kern // virtual address in kernel map? + lwz r3,ACT_VMMAP(r9) // get user's vm_map * + rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address + rldicl r5,r28,0,32 + std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call + std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call + bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space + ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 + ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30 + rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address + rlwimi r28,r4,0,0,31 // into a single 64-bit scalar +copypv_kern: + +// Since we'll be accessing the virtual operand with data-relocation on, we won't need to +// update the referenced and changed bits manually after the copy. So, force the appropriate +// flag bit on for the virtual operand. + crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits + crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit + +// We'll be finding a mapping and looking at, so we need to disable 'rupts. + lis r0,hi16(MASK(MSR_VEC)) // get vector mask + ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask + mfmsr r26 // save current msr + andc r26,r26,r0 // turn off VEC and FP in saved copy + ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask + andc r0,r26,r0 // disable EE in our new msr image + mtmsrd r0 // introduce new msr image + +// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now +// try to find a mapping corresponding to this address in order to determine whether the address +// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable +// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we +// examine the mapping's caching-inhibited bit. + mr r3,r27 // r3 <- pmap_t pmap + rldicl r4,r28,32,32 // r4, r5 <- addr64_t va + rldicl r5,r28,0,32 + la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva + li r7,1 // r7 <- int full, search nested mappings + std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls + std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls + std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls + std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls + bl EXT(mapping_find) // find mapping for virtual operand + mr. r3,r3 // did we find it? + beq copypv_nomapping // nope, so we'll assume it's cacheable + lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags + rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set? + crnot pvNoCache,cr0_eq // if it is, use bcopy_nc + bl EXT(mapping_drop_busy) // drop busy on the mapping +copypv_nomapping: + ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26 + ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28 + ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29 + ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30 + mtmsrd r26 // restore msr to it's previous state + +// Set both the source and destination virtual addresses to the virtual operand's address -- +// we'll overlay one of them with the physical operand's address. + mr r27,r28 // make virtual operand BOTH source AND destination + +// Now we're ready to relocate the physical operand address(es) into the physical memory window. +// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address +// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole, +// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy. +copypv_phys: + ld r6,lgPMWvaddr(0) // get physical memory window virtual address + bf pvPsnk,copypv_dstvirt // is destination address virtual? + cntlzd r4,r30 // count leading zeros in destination address + cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) + cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations + add r28,r30,r6 // relocate physical destination into physical window +copypv_dstvirt: + bf pvPsrc,copypv_srcvirt // is source address virtual? + cntlzd r4,r29 // count leading zeros in source address + cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) + cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations + add r27,r29,r6 // relocate physical source into physical window +copypv_srcvirt: + +// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything +// funny happens during the copy. So, we set a pointer to our error handler in the per-thread +// control block. + mfsprg r8,1 // get current threads stuff + lis r3,hi16(copypv_error) // get our error callback's address, high + ori r3,r3,lo16(copypv_error) // get our error callback's address, low + stw r3,THREAD_RECOVER(r8) // set our error callback + +// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter +// 64-bit mode. + li r0,1 // get a handy one bit + mfmsr r3 // get current msr + rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy + mtmsrd r3 // enter 64-bit mode + +// If requested, flush data cache +// Note that we don't flush, the code is being saved "just in case". +#if 0 + bf pvFsrc,copypv_nfs // do we flush the source? + rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address + rldicl r4,r27,0,32 + mr r5,r31 // r5 <- count (in bytes) + li r6,0 // r6 <- boolean phys (false, not physical) + bl EXT(flush_dcache) // flush the source operand +copypv_nfs: + bf pvFsnk,copypv_nfdx // do we flush the destination? + rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address + rldicl r4,r28,0,32 + mr r5,r31 // r5 <- count (in bytes) + li r6,0 // r6 <- boolean phys (false, not physical) + bl EXT(flush_dcache) // flush the destination operand +copypv_nfdx: +#endif + +// Call bcopy or bcopy_nc to perform the copy. + mr r3,r27 // r3 <- source virtual address + mr r4,r28 // r4 <- destination virtual address + mr r5,r31 // r5 <- bytes to copy + bt pvNoCache,copypv_nc // take non-caching route + bl EXT(bcopy) // call bcopy to do the copying + b copypv_copydone +copypv_nc: + bl EXT(bcopy_nc) // call bcopy_nc to do the copying +copypv_copydone: + +// If requested, flush data cache +// Note that we don't flush, the code is being saved "just in case". +#if 0 + bf pvFsrc,copypv_nfsx // do we flush the source? + rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address + rldicl r4,r27,0,32 + mr r5,r31 // r5 <- count (in bytes) + li r6,0 // r6 <- boolean phys (false, not physical) + bl EXT(flush_dcache) // flush the source operand +copypv_nfsx: + bf pvFsnk,copypv_nfd // do we flush the destination? + rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address + rldicl r4,r28,0,32 + mr r5,r31 // r5 <- count (in bytes) + li r6,0 // r6 <- boolean phys (false, not physical) + bl EXT(flush_dcache) // flush the destination operand +copypv_nfd: +#endif + +// Leave 64-bit mode. + mfmsr r3 // get current msr + rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy + mtmsrd r3 // leave 64-bit mode + +// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is +// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling +// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic. +// Note that this code is page-size sensitive, so it should probably be a part of our low-level +// code in hw_vm.s. + bt pvNoModSnk,copypv_nomod // skip destination update if not requested + std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls + li r26,1 // r26 <- 4K-page count + mr r27,r31 // r27 <- byte count + rlwinm r3,r30,0,20,31 // does destination cross a page boundary? + subfic r3,r3,4096 // + cmplw r3,r27 // + blt copypv_modnox // skip if not crossing case + subf r27,r3,r27 // r27 <- byte count less initial fragment + addi r26,r26,1 // increment page count +copypv_modnox: + srdi r3,r27,12 // pages to update (not including crosser) + add r26,r26,r3 // add in crosser + srdi r27,r30,12 // r27 <- destination page number +copypv_modloop: + mr r3,r27 // r3 <- destination page number + la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex + bl EXT(mapping_phys_lookup) // see if page is really there + mr. r3,r3 // is it? + beq-- copypv_modend // nope, break out of modify loop + mr r3,r27 // r3 <- destination page number + bl EXT(mapping_set_mod) // set page changed status + subi r26,r26,1 // decrement page count + cmpwi r26,0 // done yet? + bgt copypv_modloop // nope, iterate +copypv_modend: + ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 +copypv_nomod: + bt pvNoRefSrc,copypv_done // skip source update if not requested +copypv_debugref: + li r26,1 // r26 <- 4K-page count + mr r27,r31 // r27 <- byte count + rlwinm r3,r29,0,20,31 // does source cross a page boundary? + subfic r3,r3,4096 // + cmplw r3,r27 // + blt copypv_refnox // skip if not crossing case + subf r27,r3,r27 // r27 <- byte count less initial fragment + addi r26,r26,1 // increment page count +copypv_refnox: + srdi r3,r27,12 // pages to update (not including crosser) + add r26,r26,r3 // add in crosser + srdi r27,r29,12 // r27 <- source page number +copypv_refloop: + mr r3,r27 // r3 <- source page number + la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex + bl EXT(mapping_phys_lookup) // see if page is really there + mr. r3,r3 // is it? + beq-- copypv_done // nope, break out of modify loop + mr r3,r27 // r3 <- source page number + bl EXT(mapping_set_ref) // set page referenced status + subi r26,r26,1 // decrement page count + cmpwi r26,0 // done yet? + bgt copypv_refloop // nope, iterate + +// Return, indicating success. +copypv_done: +copypv_zero: + li r3,0 // our efforts were crowned with success + +// Pop frame, restore caller's non-volatiles, clear recovery routine pointer. +copypv_return: + mfsprg r9,1 // get current threads stuff + lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) + // get return address + lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) + // get non-volatile cr2 and cr3 + lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26 + lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27 + mtlr r0 // restore return address + lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28 + mtcrf 0x20,r4 // restore non-volatile cr2 + mtcrf 0x10,r4 // restore non-volatile cr3 + lwz r11,FM_ARG0+0x20(r1) // save error callback + lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29 + lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30 + lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31 + stw r11,THREAD_RECOVER(r9) // restore our error callback + lwz r1,0(r1) // release stack frame + + blr // y'all come back now + +// Invalid argument handler. +copypv_einval: + li r3,EINVAL // invalid argument + b copypv_return // return + +// Error encountered during bcopy or bcopy_nc. +copypv_error: + mfmsr r3 // get current msr + rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy + mtmsrd r3 // leave 64-bit mode + li r3,EFAULT // it was all his fault + b copypv_return // return diff --git a/osfmk/ppc/new_screen.h b/osfmk/ppc/new_screen.h index 0bec494ff..b2e6189eb 100644 --- a/osfmk/ppc/new_screen.h +++ b/osfmk/ppc/new_screen.h @@ -38,5 +38,8 @@ extern void clear_RGB16(int color); extern void adj_position(unsigned char C); extern void put_cursor(int color); extern void screen_put_char(unsigned char C); -extern void initialize_screen(void *); +extern void initialize_screen( + Boot_Video * boot_vinfo, + unsigned int op); + #endif /* _NEW_SCREEN_H_ */ diff --git a/osfmk/ppc/pcb.c b/osfmk/ppc/pcb.c index 182428248..d3c70db49 100644 --- a/osfmk/ppc/pcb.c +++ b/osfmk/ppc/pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -44,39 +44,41 @@ * Utah $Hdr: pcb.c 1.23 92/06/27$ */ -#include #include #include + +#include +#include + +#include #include #include -#include -#include -#include -#include +#include #include +#include + +#include +#include -#include #include +#include #include #include -#include #include #include #include #include #include #include -#include +#include #include #include +#include #include -extern int real_ncpus; /* Number of actual CPUs */ -extern struct Saveanchor saveanchor; /* Aliged savearea anchor */ - -void machine_act_terminate(thread_act_t act); +void machine_act_terminate(thread_t); /* * These constants are dumb. They should not be in asm.h! @@ -120,7 +122,6 @@ machine_switch_context( thread_continue_t continuation, thread_t new) { - register thread_act_t old_act = old->top_act, new_act = new->top_act; register thread_t retval; pmap_t new_pmap; facility_context *fowner; @@ -134,8 +135,6 @@ machine_switch_context( ppinfo->old_thread = (unsigned int)old; ppinfo->cpu_flags &= ~traceBE; /* disable branch tracing if on */ - check_simple_locks(); - /* Our context might wake up on another processor, so we must * not keep hot state in our FPU, it must go back to the pcb * so that it can be found by the other if needed @@ -143,13 +142,13 @@ machine_switch_context( if(real_ncpus > 1) { /* This is potentially slow, so only do when actually SMP */ fowner = ppinfo->FPU_owner; /* Cache this because it may change */ if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old->top_act) { /* Is it for us? */ + if(fowner->facAct == old) { /* Is it for us? */ fpu_save(fowner); /* Yes, save it */ } } fowner = ppinfo->VMX_owner; /* Cache this because it may change */ if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old->top_act) { /* Is it for us? */ + if(fowner->facAct == old) { /* Is it for us? */ vec_save(fowner); /* Yes, save it */ } } @@ -159,44 +158,44 @@ machine_switch_context( * If old thread is running VM, save per proc userProtKey and FamVMmode spcFlags bits in the thread spcFlags * This bits can be modified in the per proc without updating the thread spcFlags */ - if(old_act->mact.specFlags & runningVM) { - old_act->mact.specFlags &= ~(userProtKey|FamVMmode); - old_act->mact.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode); + if(old->machine.specFlags & runningVM) { + old->machine.specFlags &= ~(userProtKey|FamVMmode); + old->machine.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode); } - old_act->mact.specFlags &= ~OnProc; - new_act->mact.specFlags |= OnProc; + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; /* * We do not have to worry about the PMAP module, so switch. * - * We must not use top_act->map since this may not be the actual + * We must not use thread->map since this may not be the actual * task map, but the map being used for a klcopyin/out. */ - if(new_act->mact.specFlags & runningVM) { /* Is the new guy running a VM? */ - pmap_switch(new_act->mact.vmmCEntry->vmmPmap); /* Switch to the VM's pmap */ - ppinfo->VMMareaPhys = new_act->mact.vmmCEntry->vmmContextPhys; - ppinfo->VMMXAFlgs = new_act->mact.vmmCEntry->vmmXAFlgs; - ppinfo->FAMintercept = new_act->mact.vmmCEntry->vmmFAMintercept; + if(new->machine.specFlags & runningVM) { /* Is the new guy running a VM? */ + pmap_switch(new->machine.vmmCEntry->vmmPmap); /* Switch to the VM's pmap */ + ppinfo->VMMareaPhys = new->machine.vmmCEntry->vmmContextPhys; + ppinfo->VMMXAFlgs = new->machine.vmmCEntry->vmmXAFlgs; + ppinfo->FAMintercept = new->machine.vmmCEntry->vmmFAMintercept; } else { /* otherwise, we use the task's pmap */ - new_pmap = new_act->task->map->pmap; - if ((old_act->task->map->pmap != new_pmap) || (old_act->mact.specFlags & runningVM)) { + new_pmap = new->task->map->pmap; + if ((old->task->map->pmap != new_pmap) || (old->machine.specFlags & runningVM)) { pmap_switch(new_pmap); /* Switch if there is a change */ } } - if(old_act->mact.cioSpace != invalSpace) { /* Does our old guy have an active copyin/out? */ - old_act->mact.cioSpace |= cioSwitchAway; /* Show we switched away from this guy */ - hw_blow_seg(copyIOaddr); /* Blow off the first segment */ - hw_blow_seg(copyIOaddr + 0x10000000ULL); /* Blow off the second segment */ + if(old->machine.umwSpace != invalSpace) { /* Does our old guy have an active window? */ + old->machine.umwSpace |= umwSwitchAway; /* Show we switched away from this guy */ + hw_blow_seg(lowGlo.lgUMWvaddr); /* Blow off the first segment */ + hw_blow_seg(lowGlo.lgUMWvaddr + 0x10000000ULL); /* Blow off the second segment */ } KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, old->reason, (int)new, old->sched_pri, new->sched_pri, 0); retval = Switch_context(old, continuation, new); - assert(retval != (struct thread_shuttle*)NULL); + assert(retval != NULL); if (branch_tracing_enabled()) { ppinfo = getPerProc(); /* Get our processor block */ @@ -223,7 +222,7 @@ machine_thread_create( hw_atomic_add((uint32_t *)&saveanchor.savetarget, 4); /* Account for the number of saveareas we think we "need" for this activation */ - assert(thread->mact.pcb == (savearea *)0); /* Make sure there was no previous savearea */ + assert(thread->machine.pcb == (savearea *)0); /* Make sure there was no previous savearea */ sv = save_alloc(); /* Go get us a savearea */ @@ -231,12 +230,12 @@ machine_thread_create( sv->save_hdr.save_prev = 0; /* Clear the back pointer */ sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft); /* Mark as in use */ - sv->save_hdr.save_act = (struct thread_activation *)thread; /* Set who owns it */ - thread->mact.pcb = sv; /* Point to the save area */ - thread->mact.curctx = &thread->mact.facctx; /* Initialize facility context */ - thread->mact.facctx.facAct = thread; /* Initialize facility context pointer to activation */ - thread->mact.cioSpace = invalSpace; /* Initialize copyin/out space to invalid */ - thread->mact.preemption_count = 0; /* Initialize preemption counter */ + sv->save_hdr.save_act = thread; /* Set who owns it */ + thread->machine.pcb = sv; /* Point to the save area */ + thread->machine.curctx = &thread->machine.facctx; /* Initialize facility context */ + thread->machine.facctx.facAct = thread; /* Initialize facility context pointer to activation */ + thread->machine.umwSpace = invalSpace; /* Initialize user memory window space to invalid */ + thread->machine.preemption_count = 0; /* Initialize preemption counter */ /* * User threads will pull their context from the pcb when first @@ -245,8 +244,9 @@ machine_thread_create( * at the base of the kernel stack (see stack_attach()). */ - thread->mact.upcb = sv; /* Set user pcb */ + thread->machine.upcb = sv; /* Set user pcb */ sv->save_srr1 = (uint64_t)MSR_EXPORT_MASK_SET; /* Set the default user MSR */ + if(task_has_64BitAddr(task)) sv->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32; /* If 64-bit task, force 64-bit mode */ sv->save_fpscr = 0; /* Clear all floating point exceptions */ sv->save_vrsave = 0; /* Set the vector save state */ sv->save_vscr[0] = 0x00000000; @@ -282,9 +282,9 @@ machine_thread_destroy( * */ - toss_live_vec(thread->mact.curctx); /* Dump live vectors */ + toss_live_vec(thread->machine.curctx); /* Dump live vectors */ - vsv = thread->mact.curctx->VMXsave; /* Get the top vector savearea */ + vsv = thread->machine.curctx->VMXsave; /* Get the top vector savearea */ while(vsv) { /* Any VMX saved state? */ vpsv = vsv; /* Remember so we can toss this */ @@ -292,11 +292,11 @@ machine_thread_destroy( save_release((savearea *)vpsv); /* Release it */ } - thread->mact.curctx->VMXsave = 0; /* Kill chain */ + thread->machine.curctx->VMXsave = 0; /* Kill chain */ - toss_live_fpu(thread->mact.curctx); /* Dump live float */ + toss_live_fpu(thread->machine.curctx); /* Dump live float */ - fsv = thread->mact.curctx->FPUsave; /* Get the top float savearea */ + fsv = thread->machine.curctx->FPUsave; /* Get the top float savearea */ while(fsv) { /* Any float saved state? */ fpsv = fsv; /* Remember so we can toss this */ @@ -304,13 +304,13 @@ machine_thread_destroy( save_release((savearea *)fpsv); /* Release it */ } - thread->mact.curctx->FPUsave = 0; /* Kill chain */ + thread->machine.curctx->FPUsave = 0; /* Kill chain */ /* * free all regular saveareas. */ - pcb = thread->mact.pcb; /* Get the general savearea */ + pcb = thread->machine.pcb; /* Get the general savearea */ while(pcb) { /* Any float saved state? */ ppsv = pcb; /* Remember so we can toss this */ @@ -321,70 +321,6 @@ machine_thread_destroy( hw_atomic_sub((uint32_t *)&saveanchor.savetarget, 4); /* Unaccount for the number of saveareas we think we "need" */ } -/* - * Number of times we needed to swap an activation back in before - * switching to it. - */ -int switch_act_swapins = 0; - -/* - * machine_switch_act - * - * Machine-dependent details of activation switching. Called with - * RPC locks held and preemption disabled. - */ -void -machine_switch_act( - thread_t thread, - thread_act_t old, - thread_act_t new) -{ - pmap_t new_pmap; - facility_context *fowner; - struct per_proc_info *ppinfo; - - ppinfo = getPerProc(); /* Get our processor block */ - - /* Our context might wake up on another processor, so we must - * not keep hot state in our FPU, it must go back to the pcb - * so that it can be found by the other if needed - */ - if(real_ncpus > 1) { /* This is potentially slow, so only do when actually SMP */ - fowner = ppinfo->FPU_owner; /* Cache this because it may change */ - if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old) { /* Is it for us? */ - fpu_save(fowner); /* Yes, save it */ - } - } - fowner = ppinfo->VMX_owner; /* Cache this because it may change */ - if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old) { /* Is it for us? */ - vec_save(fowner); /* Yes, save it */ - } - } - } - - old->mact.cioSpace |= cioSwitchAway; /* Show we switched away from this guy */ - - ast_context(new, cpu_number()); - - /* Activations might have different pmaps - * (process->kernel->server, for example). - * Change space if needed - */ - - if(new->mact.specFlags & runningVM) { /* Is the new guy running a VM? */ - pmap_switch(new->mact.vmmCEntry->vmmPmap); /* Switch to the VM's pmap */ - } - else { /* otherwise, we use the task's pmap */ - new_pmap = new->task->map->pmap; - if ((old->task->map->pmap != new_pmap) || (old->mact.specFlags & runningVM)) { - pmap_switch(new_pmap); - } - } - -} - /* * act_machine_sv_free * release saveareas associated with an act. if flag is true, release @@ -393,7 +329,7 @@ machine_switch_act( * this code cannot block so we call the proper save area free routine */ void -act_machine_sv_free(thread_act_t act) +act_machine_sv_free(thread_t act) { register savearea *pcb, *userpcb; register savearea_vec *vsv, *vpst, *vsvt; @@ -420,22 +356,22 @@ act_machine_sv_free(thread_act_t act) * */ - if(act->mact.curctx->VMXlevel) { /* Is the current level user state? */ + if(act->machine.curctx->VMXlevel) { /* Is the current level user state? */ - toss_live_vec(act->mact.curctx); /* Dump live vectors if is not user */ + toss_live_vec(act->machine.curctx); /* Dump live vectors if is not user */ - vsv = act->mact.curctx->VMXsave; /* Get the top vector savearea */ + vsv = act->machine.curctx->VMXsave; /* Get the top vector savearea */ while(vsv && vsv->save_hdr.save_level) vsv = (savearea_vec *)vsv->save_hdr.save_prev; /* Find user context if any */ - if(!hw_lock_to((hw_lock_t)&act->mact.curctx->VMXsync, LockTimeOut)) { /* Get the sync lock */ + if(!hw_lock_to((hw_lock_t)&act->machine.curctx->VMXsync, LockTimeOut)) { /* Get the sync lock */ panic("act_machine_sv_free - timeout getting VMX sync lock\n"); /* Tell all and die */ } - vsvt = act->mact.curctx->VMXsave; /* Get the top of the chain */ - act->mact.curctx->VMXsave = vsv; /* Point to the user context */ - act->mact.curctx->VMXlevel = 0; /* Set the level to user */ - hw_lock_unlock((hw_lock_t)&act->mact.curctx->VMXsync); /* Unlock */ + vsvt = act->machine.curctx->VMXsave; /* Get the top of the chain */ + act->machine.curctx->VMXsave = vsv; /* Point to the user context */ + act->machine.curctx->VMXlevel = 0; /* Set the level to user */ + hw_lock_unlock((hw_lock_t)&act->machine.curctx->VMXsync); /* Unlock */ while(vsvt) { /* Clear any VMX saved state */ if (vsvt == vsv) break; /* Done when hit user if any */ @@ -446,22 +382,22 @@ act_machine_sv_free(thread_act_t act) } - if(act->mact.curctx->FPUlevel) { /* Is the current level user state? */ + if(act->machine.curctx->FPUlevel) { /* Is the current level user state? */ - toss_live_fpu(act->mact.curctx); /* Dump live floats if is not user */ + toss_live_fpu(act->machine.curctx); /* Dump live floats if is not user */ - fsv = act->mact.curctx->FPUsave; /* Get the top floats savearea */ + fsv = act->machine.curctx->FPUsave; /* Get the top floats savearea */ while(fsv && fsv->save_hdr.save_level) fsv = (savearea_fpu *)fsv->save_hdr.save_prev; /* Find user context if any */ - if(!hw_lock_to((hw_lock_t)&act->mact.curctx->FPUsync, LockTimeOut)) { /* Get the sync lock */ + if(!hw_lock_to((hw_lock_t)&act->machine.curctx->FPUsync, LockTimeOut)) { /* Get the sync lock */ panic("act_machine_sv_free - timeout getting FPU sync lock\n"); /* Tell all and die */ } - fsvt = act->mact.curctx->FPUsave; /* Get the top of the chain */ - act->mact.curctx->FPUsave = fsv; /* Point to the user context */ - act->mact.curctx->FPUlevel = 0; /* Set the level to user */ - hw_lock_unlock((hw_lock_t)&act->mact.curctx->FPUsync); /* Unlock */ + fsvt = act->machine.curctx->FPUsave; /* Get the top of the chain */ + act->machine.curctx->FPUsave = fsv; /* Point to the user context */ + act->machine.curctx->FPUlevel = 0; /* Set the level to user */ + hw_lock_unlock((hw_lock_t)&act->machine.curctx->FPUsync); /* Unlock */ while(fsvt) { /* Clear any VMX saved state */ if (fsvt == fsv) break; /* Done when hit user if any */ @@ -476,7 +412,7 @@ act_machine_sv_free(thread_act_t act) * free all regular saveareas except a user savearea, if any */ - pcb = act->mact.pcb; /* Get the general savearea */ + pcb = act->machine.pcb; /* Get the general savearea */ userpcb = 0; /* Assume no user context for now */ while(pcb) { /* Any float saved state? */ @@ -489,25 +425,19 @@ act_machine_sv_free(thread_act_t act) save_ret(svp); /* Release it */ } - act->mact.pcb = userpcb; /* Chain in the user if there is one, or 0 if not */ + act->machine.pcb = userpcb; /* Chain in the user if there is one, or 0 if not */ } -void -machine_thread_set_current(thread_t thread) -{ - set_machine_current_act(thread->top_act); -} - void machine_act_terminate( - thread_act_t act) + thread_t act) { - if(act->mact.bbDescAddr) { /* Check if the Blue box assist is active */ + if(act->machine.bbDescAddr) { /* Check if the Blue box assist is active */ disable_bluebox_internal(act); /* Kill off bluebox */ } - if(act->mact.vmmControl) { /* Check if VMM is active */ + if(act->machine.vmmControl) { /* Check if VMM is active */ vmm_tear_down_all(act); /* Kill off all VMM contexts */ } } @@ -515,7 +445,7 @@ machine_act_terminate( void machine_thread_terminate_self(void) { - machine_act_terminate(current_act()); + machine_act_terminate(current_thread()); } void @@ -537,30 +467,27 @@ dump_thread(thread_t th) } int - dump_act(thread_act_t thr_act) + dump_act(thread_t thr_act) { if (!thr_act) return(0); - printf("thr_act(0x%x)(%d): thread=%x(%d) task=%x(%d)\n", + printf("thread(0x%x)(%d): task=%x(%d)\n", thr_act, thr_act->ref_count, - thr_act->thread, thr_act->thread ? thr_act->thread->ref_count:0, thr_act->task, thr_act->task ? thr_act->task->ref_count : 0); - printf("\tsusp=%x active=%x hi=%x lo=%x\n", - 0 /*thr_act->alerts*/, 0 /*thr_act->alert_mask*/, - thr_act->suspend_count, thr_act->active, - thr_act->higher, thr_act->lower); + printf("\tsusp=%x active=%x\n", + thr_act->suspend_count, thr_act->active); return((int)thr_act); } #endif -unsigned int +user_addr_t get_useraddr() { - return(current_act()->mact.upcb->save_srr0); + return(current_thread()->machine.upcb->save_srr0); } /* @@ -577,8 +504,7 @@ machine_stack_detach( thread, thread->priority, thread->sched_pri, 0, 0); - if (thread->top_act) - act_machine_sv_free(thread->top_act); + act_machine_sv_free(thread); stack = thread->kernel_stack; thread->kernel_stack = 0; @@ -600,17 +526,14 @@ machine_stack_detach( void machine_stack_attach( thread_t thread, - vm_offset_t stack, - void (*start)(thread_t)) + vm_offset_t stack) { - thread_act_t thr_act; unsigned int *kss; struct savearea *sv; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH), thread, thread->priority, - thread->sched_pri, start, - 0); + thread->sched_pri, 0, 0); assert(stack); kss = (unsigned int *)STACK_IKS(stack); @@ -618,25 +541,22 @@ machine_stack_attach( /* during initialization we sometimes do not have an activation. in that case do not do anything */ - if ((thr_act = thread->top_act) != 0) { - sv = save_get(); /* cannot block */ - sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft); /* Mark as in use */ - sv->save_hdr.save_act = (struct thread_activation *)thr_act; - sv->save_hdr.save_prev = (addr64_t)((uintptr_t)thr_act->mact.pcb); - thr_act->mact.pcb = sv; - - sv->save_srr0 = (unsigned int) start; - /* sv->save_r3 = ARG ? */ - sv->save_r1 = (vm_offset_t)((int)kss - KF_SIZE); - sv->save_srr1 = MSR_SUPERVISOR_INT_OFF; - sv->save_fpscr = 0; /* Clear all floating point exceptions */ - sv->save_vrsave = 0; /* Set the vector save state */ - sv->save_vscr[3] = 0x00010000; /* Supress java mode */ - *(CAST_DOWN(int *, sv->save_r1)) = 0; - thr_act->mact.ksp = 0; - } - - return; + sv = save_get(); /* cannot block */ + sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft); /* Mark as in use */ + sv->save_hdr.save_act = thread; + sv->save_hdr.save_prev = (addr64_t)((uintptr_t)thread->machine.pcb); + thread->machine.pcb = sv; + + sv->save_srr0 = (unsigned int)thread_continue; + /* sv->save_r3 = ARG ? */ + sv->save_r1 = (vm_offset_t)((int)kss - KF_SIZE); + sv->save_srr1 = MSR_SUPERVISOR_INT_OFF; + sv->save_fpscr = 0; /* Clear all floating point exceptions */ + sv->save_vrsave = 0; /* Set the vector save state */ + sv->save_vscr[3] = 0x00010000; /* Supress java mode */ + *(CAST_DOWN(int *, sv->save_r1)) = 0; + + thread->machine.ksp = 0; } /* @@ -652,11 +572,11 @@ machine_stack_handoff( vm_offset_t stack; pmap_t new_pmap; facility_context *fowner; - mapping *mp; + mapping_t *mp; struct per_proc_info *ppinfo; - assert(new->top_act); - assert(old->top_act); + assert(new); + assert(old); if (old == new) panic("machine_stack_handoff"); @@ -676,13 +596,13 @@ machine_stack_handoff( if(real_ncpus > 1) { /* This is potentially slow, so only do when actually SMP */ fowner = ppinfo->FPU_owner; /* Cache this because it may change */ if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old->top_act) { /* Is it for us? */ + if(fowner->facAct == old) { /* Is it for us? */ fpu_save(fowner); /* Yes, save it */ } } fowner = ppinfo->VMX_owner; /* Cache this because it may change */ if(fowner) { /* Is there any live context? */ - if(fowner->facAct == old->top_act) { /* Is it for us? */ + if(fowner->facAct == old) { /* Is it for us? */ vec_save(fowner); /* Yes, save it */ } } @@ -692,44 +612,44 @@ machine_stack_handoff( * If old thread is running VM, save per proc userProtKey and FamVMmode spcFlags bits in the thread spcFlags * This bits can be modified in the per proc without updating the thread spcFlags */ - if(old->top_act->mact.specFlags & runningVM) { /* Is the current thread running a VM? */ - old->top_act->mact.specFlags &= ~(userProtKey|FamVMmode); - old->top_act->mact.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode); + if(old->machine.specFlags & runningVM) { /* Is the current thread running a VM? */ + old->machine.specFlags &= ~(userProtKey|FamVMmode); + old->machine.specFlags |= (ppinfo->spcFlags) & (userProtKey|FamVMmode); } - old->top_act->mact.specFlags &= ~OnProc; - new->top_act->mact.specFlags |= OnProc; + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF) | DBG_FUNC_NONE, old->reason, (int)new, old->sched_pri, new->sched_pri, 0); - if(new->top_act->mact.specFlags & runningVM) { /* Is the new guy running a VM? */ - pmap_switch(new->top_act->mact.vmmCEntry->vmmPmap); /* Switch to the VM's pmap */ - ppinfo->VMMareaPhys = new->top_act->mact.vmmCEntry->vmmContextPhys; - ppinfo->VMMXAFlgs = new->top_act->mact.vmmCEntry->vmmXAFlgs; - ppinfo->FAMintercept = new->top_act->mact.vmmCEntry->vmmFAMintercept; + if(new->machine.specFlags & runningVM) { /* Is the new guy running a VM? */ + pmap_switch(new->machine.vmmCEntry->vmmPmap); /* Switch to the VM's pmap */ + ppinfo->VMMareaPhys = new->machine.vmmCEntry->vmmContextPhys; + ppinfo->VMMXAFlgs = new->machine.vmmCEntry->vmmXAFlgs; + ppinfo->FAMintercept = new->machine.vmmCEntry->vmmFAMintercept; } else { /* otherwise, we use the task's pmap */ - new_pmap = new->top_act->task->map->pmap; - if ((old->top_act->task->map->pmap != new_pmap) || (old->top_act->mact.specFlags & runningVM)) { + new_pmap = new->task->map->pmap; + if ((old->task->map->pmap != new_pmap) || (old->machine.specFlags & runningVM)) { pmap_switch(new_pmap); } } - machine_thread_set_current(new); - ppinfo->Uassist = new->top_act->mact.cthread_self; + machine_set_current_thread(new); + ppinfo->Uassist = new->machine.cthread_self; - ppinfo->ppbbTaskEnv = new->top_act->mact.bbTaskEnv; - ppinfo->spcFlags = new->top_act->mact.specFlags; + ppinfo->ppbbTaskEnv = new->machine.bbTaskEnv; + ppinfo->spcFlags = new->machine.specFlags; - old->top_act->mact.cioSpace |= cioSwitchAway; /* Show we switched away from this guy */ - mp = (mapping *)&ppinfo->ppCIOmp; + old->machine.umwSpace |= umwSwitchAway; /* Show we switched away from this guy */ + mp = (mapping_t *)&ppinfo->ppUMWmp; mp->mpSpace = invalSpace; /* Since we can't handoff in the middle of copy in/out, just invalidate */ if (branch_tracing_enabled()) ppinfo->cpu_flags |= traceBE; - if(trcWork.traceMask) dbgTrace(0x12345678, (unsigned int)old->top_act, (unsigned int)new->top_act, 0); /* Cut trace entry if tracing */ + if(trcWork.traceMask) dbgTrace(0x12345678, (unsigned int)old, (unsigned int)new, 0); /* Cut trace entry if tracing */ return; } @@ -740,21 +660,22 @@ machine_stack_handoff( */ void -call_continuation(void (*continuation)(void) ) +call_continuation( + thread_continue_t continuation, + void *parameter, + wait_result_t wresult) { + thread_t self = current_thread(); + unsigned int *kss; + vm_offset_t tsp; - unsigned int *kss; - vm_offset_t tsp; - - assert(current_thread()->kernel_stack); - kss = (unsigned int *)STACK_IKS(current_thread()->kernel_stack); - assert(continuation); + assert(self->kernel_stack); + kss = (unsigned int *)STACK_IKS(self->kernel_stack); + assert(continuation); - tsp = (vm_offset_t)((int)kss - KF_SIZE); - assert(tsp); - *((int *)tsp) = 0; + tsp = (vm_offset_t)((int)kss - KF_SIZE); + assert(tsp); + *((int *)tsp) = 0; - Call_continuation(continuation, tsp); - - return; + Call_continuation(continuation, parameter, wresult, tsp); } diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c index 5d32ae08f..7453b8aa5 100644 --- a/osfmk/ppc/pmap.c +++ b/osfmk/ppc/pmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -86,7 +86,6 @@ */ #include -#include #include #include #include @@ -114,14 +113,17 @@ #include #include #include +#include #include #include +#include #include +#include + +#include /* must be last */ + extern unsigned int avail_remaining; -extern unsigned int mappingdeb0; -extern struct Saveanchor saveanchor; /* Aliged savearea anchor */ -extern int real_ncpus; /* Number of actual CPUs */ unsigned int debugbackpocket; /* (TEST/DEBUG) */ vm_offset_t first_free_virt; @@ -131,13 +133,12 @@ pmapTransTab *pmapTrans; /* Point to the hash to pmap translations */ struct phys_entry *phys_table; /* forward */ +static void pmap_map_physical(void); +static void pmap_map_iohole(addr64_t paddr, addr64_t size); void pmap_activate(pmap_t pmap, thread_t th, int which_cpu); void pmap_deactivate(pmap_t pmap, thread_t th, int which_cpu); -void copy_to_phys(vm_offset_t sva, vm_offset_t dpa, int bytecount); -#if MACH_VM_DEBUG -int pmap_list_resident_pages(pmap_t pmap, vm_offset_t *listp, int space); -#endif +extern void hw_hash_init(void); /* NOTE: kernel_pmap_store must be in V=R storage and aligned!!!!!!!!!!!!!! */ @@ -182,7 +183,7 @@ struct phys_entry *pmap_find_physentry(ppnum_t pa) if (pa < pmap_mem_regions[i].mrStart) continue; /* See if we fit in this region */ if (pa > pmap_mem_regions[i].mrEnd) continue; /* Check the end too */ - entry = (unsigned int)pmap_mem_regions[i].mrPhysTab + ((pa - pmap_mem_regions[i].mrStart) * sizeof(phys_entry)); + entry = (unsigned int)pmap_mem_regions[i].mrPhysTab + ((pa - pmap_mem_regions[i].mrStart) * sizeof(phys_entry_t)); return (struct phys_entry *)entry; } // kprintf("DEBUG - pmap_find_physentry: page 0x%08X not found\n", pa); @@ -196,8 +197,12 @@ struct phys_entry *pmap_find_physentry(ppnum_t pa) * * THIS IS NOT SUPPORTED */ -kern_return_t pmap_add_physical_memory(vm_offset_t spa, vm_offset_t epa, - boolean_t available, unsigned int attr) +kern_return_t +pmap_add_physical_memory( + __unused vm_offset_t spa, + __unused vm_offset_t epa, + __unused boolean_t available, + __unused unsigned int attr) { panic("Forget it! You can't map no more memory, you greedy puke!\n"); @@ -244,6 +249,62 @@ pmap_map( return(va); } +/* + * pmap_map_physical() + * Maps physical memory into the kernel's address map beginning at lgPMWvaddr, the + * physical memory window. + * + */ +void +pmap_map_physical() +{ + unsigned region; + + /* Iterate over physical memory regions, block mapping each into the kernel's address map */ + for (region = 0; region < (unsigned)pmap_mem_regions_count; region++) { + addr64_t paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12); + addr64_t size = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr; + while (size > 0) { + /* Block mappings are limited to 256M, so we map in blocks of up to 256M */ + addr64_t vaddr = paddr + lowGlo.lgPMWvaddr; + unsigned msize = ((size > 0x10000000)? 0x10000000 : size); + addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), + (mmFlgBlock | mmFlgPerm), (msize >> 12), + (VM_PROT_READ | VM_PROT_WRITE)); + if (colladdr) { + panic ("pmap_map_physical: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", + vaddr, (paddr >> 12), (msize >> 12), colladdr); + } + paddr += msize; + size -= msize; + } + } +} + +/* + * pmap_map_iohole(addr64_t paddr, addr64_t size) + * Maps an I/O hole into the kernel's address map at its proper offset in + * the physical memory window. + * + */ +void +pmap_map_iohole(addr64_t paddr, addr64_t size) +{ + while (size > 0) { + addr64_t vaddr = paddr + lowGlo.lgPMWvaddr; + unsigned msize = ((size > 0x10000000)? 0x10000000 : size); + addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), + (mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12), + (VM_PROT_READ | VM_PROT_WRITE)); + if (colladdr) { + panic ("pmap_map_iohole: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", + vaddr, (paddr >> 12), (msize >> 12), colladdr); + } + paddr += msize; + size -= msize; + } +} + /* * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. @@ -257,16 +318,16 @@ pmap_map( void pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) { - register struct mapping *mp; vm_offset_t addr; vm_size_t size; - int i, num, j, rsize, mapsize, vmpagesz, vmmapsz, bank, nbits; + unsigned int i, num, mapsize, vmpagesz, vmmapsz, nbits; + signed bank; uint64_t tmemsize; uint_t htslop; vm_offset_t first_used_addr, PCAsize; - struct phys_entry *phys_table; + struct phys_entry *phys_entry; - *first_avail = round_page_32(*first_avail); /* Make sure we start out on a page boundary */ + *first_avail = round_page(*first_avail); /* Make sure we start out on a page boundary */ vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address know to VM */ /* @@ -276,8 +337,6 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) kernel_pmap_phys = (addr64_t)&kernel_pmap_store; cursor_pmap = &kernel_pmap_store; - simple_lock_init(&kernel_pmap->lock, ETAP_VM_PMAP_KERNEL); - kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */ kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */ kernel_pmap->ref_count = 1; @@ -287,8 +346,12 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) kernel_pmap->pmapvr = 0; /* Virtual = Real */ /* - * The hash table wants to have one pteg for every 2 physical pages. - * We will allocate this in physical RAM, outside of kernel virtual memory, + * IBM's recommended hash table size is one PTEG for every 2 physical pages. + * However, we have found that OSX rarely uses more than 4 PTEs in a PTEG + * with this size table. Therefore, by default we allocate a hash table + * one half IBM's recommended size, ie one PTEG per 4 pages. The "ht_shift" boot-arg + * can be used to override the default hash table size. + * We will allocate the hash table in physical RAM, outside of kernel virtual memory, * at the top of the highest bank that will contain it. * Note that "bank" doesn't refer to a physical memory slot here, it is a range of * physically contiguous memory. @@ -297,13 +360,29 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) */ nbits = cntlzw(((msize << 1) - 1) >> 32); /* Get first bit in upper half */ - if(nbits == 32) nbits = nbits + cntlzw((uint_t)((msize << 1) - 1)); /* If upper half was empty, find bit in bottom half */ - tmemsize = 0x8000000000000000ULL >> nbits; /* Get memory size rounded up to power of 2 */ + if (nbits == 32) /* If upper half was empty, find bit in bottom half */ + nbits = nbits + cntlzw((uint_t)((msize << 1) - 1)); + tmemsize = 0x8000000000000000ULL >> nbits; /* Get memory size rounded up to power of 2 */ - if(tmemsize > 0x0000002000000000ULL) tmemsize = 0x0000002000000000ULL; /* Make sure we don't make an unsupported hash table size */ - - hash_table_size = (uint_t)(tmemsize >> 13) * per_proc_info[0].pf.pfPTEG; /* Get provisional hash_table_size */ - if(hash_table_size < (256 * 1024)) hash_table_size = (256 * 1024); /* Make sure we are at least minimum size */ + /* Calculate hash table size: First, make sure we don't overflow 32-bit arithmetic. */ + if (tmemsize > 0x0000002000000000ULL) + tmemsize = 0x0000002000000000ULL; + + /* Second, calculate IBM recommended hash table size, ie one PTEG per 2 physical pages */ + hash_table_size = (uint_t)(tmemsize >> 13) * PerProcTable[0].ppe_vaddr->pf.pfPTEG; + + /* Third, cut this in half to produce the OSX default, ie one PTEG per 4 physical pages */ + hash_table_size >>= 1; + + /* Fourth, adjust default size per "ht_shift" boot arg */ + if (hash_table_shift >= 0) /* if positive, make size bigger */ + hash_table_size <<= hash_table_shift; + else /* if "ht_shift" is negative, make smaller */ + hash_table_size >>= (-hash_table_shift); + + /* Fifth, make sure we are at least minimum size */ + if (hash_table_size < (256 * 1024)) + hash_table_size = (256 * 1024); while(1) { /* Try to fit hash table in PCA into contiguous memory */ @@ -311,8 +390,8 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) panic("pmap_bootstrap: Can't find space for hash table\n"); /* This will never print, system isn't up far enough... */ } - PCAsize = (hash_table_size / per_proc_info[0].pf.pfPTEG) * sizeof(PCA); /* Get total size of PCA table */ - PCAsize = round_page_32(PCAsize); /* Make sure it is at least a page long */ + PCAsize = (hash_table_size / PerProcTable[0].ppe_vaddr->pf.pfPTEG) * sizeof(PCA_t); /* Get total size of PCA table */ + PCAsize = round_page(PCAsize); /* Make sure it is at least a page long */ for(bank = pmap_mem_regions_count - 1; bank >= 0; bank--) { /* Search backwards through banks */ @@ -321,7 +400,7 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) htslop = hash_table_base & (hash_table_size - 1); /* Get the extra that we will round down when we align */ hash_table_base = hash_table_base & -(addr64_t)hash_table_size; /* Round down to correct boundary */ - if((hash_table_base - round_page_32(PCAsize)) >= ((addr64_t)pmap_mem_regions[bank].mrStart << 12)) break; /* Leave if we fit */ + if((hash_table_base - round_page(PCAsize)) >= ((addr64_t)pmap_mem_regions[bank].mrStart << 12)) break; /* Leave if we fit */ } if(bank >= 0) break; /* We are done if we found a suitable bank */ @@ -330,7 +409,7 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) } if(htslop) { /* If there was slop (i.e., wasted pages for alignment) add a new region */ - for(i = pmap_mem_regions_count - 1; i >= bank; i--) { /* Copy from end to our bank, including our bank */ + for(i = pmap_mem_regions_count - 1; i >= (unsigned)bank; i--) { /* Copy from end to our bank, including our bank */ pmap_mem_regions[i + 1].mrStart = pmap_mem_regions[i].mrStart; /* Set the start of the bank */ pmap_mem_regions[i + 1].mrAStart = pmap_mem_regions[i].mrAStart; /* Set the start of allocatable area */ pmap_mem_regions[i + 1].mrEnd = pmap_mem_regions[i].mrEnd; /* Set the end address of bank */ @@ -365,11 +444,11 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) (((num * sizeof(struct phys_entry)) + 4095) & -4096) /* For the physical entries */ ); - mapsize = size = round_page_32(size); /* Get size of area to map that we just calculated */ + mapsize = size = round_page(size); /* Get size of area to map that we just calculated */ mapsize = mapsize + kmapsize; /* Account for the kernel text size */ - vmpagesz = round_page_32(num * sizeof(struct vm_page)); /* Allow for all vm_pages needed to map physical mem */ - vmmapsz = round_page_32((num / 8) * sizeof(struct vm_map_entry)); /* Allow for vm_maps */ + vmpagesz = round_page(num * sizeof(struct vm_page)); /* Allow for all vm_pages needed to map physical mem */ + vmmapsz = round_page((num / 8) * sizeof(struct vm_map_entry)); /* Allow for vm_maps */ mapsize = mapsize + vmpagesz + vmmapsz; /* Add the VM system estimates into the grand total */ @@ -405,13 +484,13 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) /* NOTE: the phys_table must be within the first 2GB of physical RAM. This makes sure we only need to do 32-bit arithmetic */ - phys_table = (struct phys_entry *) addr; /* Get pointer to physical table */ + phys_entry = (struct phys_entry *) addr; /* Get pointer to physical table */ for (bank = 0; bank < pmap_mem_regions_count; bank++) { /* Set pointer and initialize all banks of ram */ - pmap_mem_regions[bank].mrPhysTab = phys_table; /* Set pointer to the physical table for this bank */ + pmap_mem_regions[bank].mrPhysTab = phys_entry; /* Set pointer to the physical table for this bank */ - phys_table = phys_table + (pmap_mem_regions[bank].mrEnd - pmap_mem_regions[bank].mrStart + 1); /* Point to the next */ + phys_entry = phys_entry + (pmap_mem_regions[bank].mrEnd - pmap_mem_regions[bank].mrStart + 1); /* Point to the next */ } addr += (((num * sizeof(struct phys_entry)) + 4095) & -4096); /* Step on past the physical entries */ @@ -430,10 +509,20 @@ pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize) /* Map V=R the page tables */ pmap_map(first_used_addr, first_used_addr, - round_page_32(first_used_addr + size), VM_PROT_READ | VM_PROT_WRITE); + round_page(first_used_addr + size), VM_PROT_READ | VM_PROT_WRITE); - *first_avail = round_page_32(first_used_addr + size); /* Set next available page */ + *first_avail = round_page(first_used_addr + size); /* Set next available page */ first_free_virt = *first_avail; /* Ditto */ + + /* For 64-bit machines, block map physical memory and the I/O hole into kernel space */ + if(BootProcInfo.pf.Available & pf64Bit) { /* Are we on a 64-bit machine? */ + lowGlo.lgPMWvaddr = PHYS_MEM_WINDOW_VADDR; /* Initialize the physical memory window's virtual address */ + + pmap_map_physical(); /* Block map physical memory into the window */ + + pmap_map_iohole(IO_MEM_WINDOW_VADDR, IO_MEM_WINDOW_SIZE); + /* Block map the I/O hole */ + } /* All the rest of memory is free - add it to the free * regions so that it can be allocated by pmap_steal @@ -466,8 +555,6 @@ void pmap_init(void) { - addr64_t cva; - pmap_zone = zinit(pmapSize, 400 * pmapSize, 4096, "pmap"); #if ZONE_DEBUG zone_debug_disable(pmap_zone); /* Can't debug this one 'cause it messes with size and alignment */ @@ -480,7 +567,7 @@ pmap_init(void) */ free_pmap_list = 0; /* Set that there are no free pmaps */ free_pmap_count = 0; - simple_lock_init(&free_pmap_lock, ETAP_VM_PMAP_CACHE); + simple_lock_init(&free_pmap_lock, 0); } @@ -522,7 +609,7 @@ void pmap_virtual_space( vm_offset_t *startp, vm_offset_t *endp) { - *startp = round_page_32(first_free_virt); + *startp = round_page(first_free_virt); *endp = vm_last_addr; } @@ -545,11 +632,11 @@ void pmap_virtual_space( * only, and is bounded by that size. */ pmap_t -pmap_create(vm_size_t size) +pmap_create(vm_map_size_t size) { - pmap_t pmap, ckpmap, fore, aft; - int s, i; - unsigned int currSID, hspace; + pmap_t pmap, ckpmap, fore; + int s; + unsigned int currSID; addr64_t physpmap; /* @@ -610,8 +697,6 @@ pmap_create(vm_size_t size) fore->pmap_link.next = (queue_t)pmap; /* Current's previous's next points to me */ pmap->pmap_link.prev = (queue_t)fore; /* My prev points to what the current pointed to */ ckpmap->pmap_link.prev = (queue_t)pmap; /* Current's prev points to me */ - - simple_lock_init(&pmap->lock, ETAP_VM_PMAP); physpmap = ((addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)pmap)) << 12) | (addr64_t)((unsigned int)pmap & 0xFFF); /* Get the physical address of the pmap */ @@ -621,6 +706,8 @@ pmap_create(vm_size_t size) pmapTrans[pmap->space].pmapVAddr = CAST_DOWN(unsigned int, pmap); /* Set translate table virtual to point to us */ } + pmap->pmapVmmExt = 0; /* Clear VMM extension block vaddr */ + pmap->pmapVmmExtPhys = 0; /* and the paddr, too */ pmap->pmapFlags = pmapKeyDef; /* Set default key */ pmap->pmapCCtl = pmapCCtlVal; /* Initialize cache control */ pmap->ref_count = 1; @@ -656,6 +743,10 @@ pmap_destroy(pmap_t pmap) if(ref_count < 0) /* Did we go too far? */ panic("pmap_destroy(): ref_count < 0"); + + if (!(pmap->pmapFlags & pmapVMgsaa)) { /* Don't try this for a shadow assist guest */ + pmap_unmap_sharedpage(pmap); /* Remove any mapping of page -1 */ + } #ifdef notdef if(pmap->stats.resident_count != 0) @@ -692,7 +783,7 @@ pmap_destroy(pmap_t pmap) simple_unlock(&free_pmap_lock); pmapTrans[pmap->space].pmapPAddr = -1; /* Invalidate the translate table physical */ pmapTrans[pmap->space].pmapVAddr = -1; /* Invalidate the translate table virtual */ - zfree(pmap_zone, (vm_offset_t) pmap); + zfree(pmap_zone, pmap); } splx(s); } @@ -704,8 +795,6 @@ pmap_destroy(pmap_t pmap) void pmap_reference(pmap_t pmap) { - spl_t s; - if (pmap != PMAP_NULL) hw_atomic_add(&pmap->ref_count, 1); /* Bump the count */ } @@ -730,24 +819,44 @@ void pmap_remove_some_phys( pp = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if (pp == 0) return; /* Leave if not in physical RAM */ - while(1) { /* Keep going until we toss all pages from this pmap */ + do { /* Keep going until we toss all pages from this pmap */ if (pmap->pmapFlags & pmapVMhost) { mp = hw_purge_phys(pp); /* Toss a map */ - if(!mp ) return; - if((unsigned int)mp & mapRetCode) { /* Was there a failure? */ - panic("pmap_remove_some_phys: hw_purge_phys failed - pp = %08X, pmap = %08X, code = %08X\n", - pp, pmap, mp); + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + mapping_free(mp); /* Return mapping to free inventory */ + break; + case mapRtGuest: + break; /* Don't try to return a guest mapping */ + case mapRtEmpty: + break; /* Physent chain empty, we're done */ + case mapRtNotFnd: + break; /* Mapping disappeared on us, retry */ + default: + panic("pmap_remove_some_phys: hw_purge_phys failed - pp = %08X, pmap = %08X, code = %08X\n", + pp, pmap, mp); /* Handle failure with our usual lack of tact */ } } else { - mp = hw_purge_space(pp, pmap); /* Toss a map */ - if(!mp ) return; - if((unsigned int)mp & mapRetCode) { /* Was there a failure? */ - panic("pmap_remove_some_phys: hw_purge_pmap failed - pp = %08X, pmap = %08X, code = %08X\n", - pp, pmap, mp); + mp = hw_purge_space(pp, pmap); /* Toss a map */ + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + mapping_free(mp); /* Return mapping to free inventory */ + break; + case mapRtEmpty: + break; /* Physent chain empty, we're done */ + case mapRtNotFnd: + break; /* Mapping disappeared on us, retry */ + default: + panic("pmap_remove_some_phys: hw_purge_phys failed - pp = %08X, pmap = %08X, code = %08X\n", + pp, pmap, mp); /* Handle failure with our usual lack of tact */ } } - mapping_free(mp); /* Toss the mapping */ - } + } while (mapRtEmpty != ((unsigned int)mp & mapRetCode)); + +#if DEBUG + if ((pmap->pmapFlags & pmapVMhost) && !pmap_verify_free(pa)) + panic("pmap_remove_some_phys: cruft left behind - pa = %08X, pmap = %08X\n", pa, pmap); +#endif return; /* Leave... */ } @@ -808,7 +917,7 @@ pmap_page_protect( register struct phys_entry *pp; boolean_t remove; unsigned int pindex; - mapping *mp; + mapping_t *mp; switch (prot) { @@ -824,20 +933,32 @@ pmap_page_protect( } - pp = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ + pp = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if (pp == 0) return; /* Leave if not in physical RAM */ if (remove) { /* If the protection was set to none, we'll remove all mappings */ - while(1) { /* Keep going until we toss all pages from this physical page */ + do { /* Keep going until we toss all pages from this physical page */ mp = hw_purge_phys(pp); /* Toss a map */ - if(!mp ) return; - if((unsigned int)mp & mapRetCode) { /* Was there a failure? */ - panic("pmap_page_protect: hw_purge_phys failed - pp = %08X, code = %08X\n", - pp, mp); + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + mapping_free(mp); /* Return mapping to free inventory */ + break; + case mapRtGuest: + break; /* Don't try to return a guest mapping */ + case mapRtNotFnd: + break; /* Mapping disappeared on us, retry */ + case mapRtEmpty: + break; /* Physent chain empty, we're done */ + default: panic("pmap_page_protect: hw_purge_phys failed - pp = %08X, code = %08X\n", + pp, mp); /* Handle failure with our usual lack of tact */ } - mapping_free(mp); /* Toss the mapping */ - } + } while (mapRtEmpty != ((unsigned int)mp & mapRetCode)); + +#if DEBUG + if (!pmap_verify_free(pa)) + panic("pmap_page_protect: cruft left behind - pa = %08X\n", pa); +#endif return; /* Leave... */ } @@ -850,6 +971,50 @@ pmap_page_protect( } +/* + * Routine: + * pmap_disconnect + * + * Function: + * Disconnect all mappings for this page and return reference and change status + * in generic format. + * + */ +unsigned int pmap_disconnect( + ppnum_t pa) +{ + register struct phys_entry *pp; + unsigned int pindex; + mapping_t *mp; + + pp = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ + if (pp == 0) return (0); /* Return null ref and chg if not in physical RAM */ + do { /* Iterate until all mappings are dead and gone */ + mp = hw_purge_phys(pp); /* Disconnect a mapping */ + if (!mp) break; /* All mappings are gone, leave the loop */ + switch ((unsigned int)mp & mapRetCode) { + case mapRtOK: + mapping_free(mp); /* Return mapping to free inventory */ + break; + case mapRtGuest: + break; /* Don't try to return a guest mapping */ + case mapRtNotFnd: + break; /* Mapping disappeared on us, retry */ + case mapRtEmpty: + break; /* Physent chain empty, we're done */ + default: panic("hw_purge_phys: hw_purge_phys failed - pp = %08X, code = %08X\n", + pp, mp); /* Handle failure with our usual lack of tact */ + } + } while (mapRtEmpty != ((unsigned int)mp & mapRetCode)); + +#if DEBUG + if (!pmap_verify_free(pa)) + panic("pmap_disconnect: cruft left behind - pa = %08X\n", pa); +#endif + + return (mapping_tst_refmod(pa)); /* Return page ref and chg in generic format */ +} + /* * pmap_protect(pmap, s, e, prot) * changes the protection on all virtual addresses v in the @@ -862,12 +1027,12 @@ pmap_page_protect( */ void pmap_protect( pmap_t pmap, - vm_offset_t sva, - vm_offset_t eva, + vm_map_offset_t sva, + vm_map_offset_t eva, vm_prot_t prot) { - addr64_t va, endva, nextva; + addr64_t va, endva; if (pmap == PMAP_NULL) return; /* Do nothing if no pmap */ @@ -880,7 +1045,7 @@ void pmap_protect( endva = eva & -4096LL; /* Round end down to a page */ while(1) { /* Go until we finish the range */ - (void)mapping_protect(pmap, va, prot & VM_PROT_ALL, &va); /* Change the protection and see what's next */ + mapping_protect(pmap, va, prot & VM_PROT_ALL, &va); /* Change the protection and see what's next */ if((va == 0) || (va >= endva)) break; /* End loop if we finish range or run off the end */ } @@ -902,18 +1067,14 @@ void pmap_protect( * insert this page into the given map NOW. */ void -pmap_enter(pmap_t pmap, vm_offset_t va, ppnum_t pa, vm_prot_t prot, - unsigned int flags, boolean_t wired) +pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, + unsigned int flags, __unused boolean_t wired) { - int memattr; - pmap_t opmap; unsigned int mflags; addr64_t colva; if (pmap == PMAP_NULL) return; /* Leave if software pmap */ - disable_preemption(); /* Don't change threads */ - mflags = 0; /* Make sure this is initialized to nothing special */ if(!(flags & VM_WIMG_USE_DEFAULT)) { /* Are they supplying the attributes? */ mflags = mmFlgUseAttr | (flags & VM_MEM_GUARDED) | ((flags & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ @@ -931,9 +1092,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, ppnum_t pa, vm_prot_t prot, mapping_remove(pmap, colva); /* Remove the mapping that collided */ } - - enable_preemption(); /* Thread change ok */ - } /* @@ -956,7 +1114,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va, ppnum_t pa, vm_prot_t prot, void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ - int memattr; unsigned int mflags; addr64_t colva; @@ -966,7 +1123,7 @@ void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_pro } // kprintf("pmap_map_block: (%08X) va = %016llX, pa = %08X, size = %08X, prot = %08X, attr = %08X, flags = %08X\n", /* (BRINGUP) */ -// current_act(), va, pa, size, prot, attr, flags); /* (BRINGUP) */ +// current_thread(), va, pa, size, prot, attr, flags); /* (BRINGUP) */ mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ @@ -983,7 +1140,6 @@ void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_pro int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ - int memattr; unsigned int mflags; addr64_t colva; @@ -1011,7 +1167,7 @@ int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_p * * NOTE: This call always will fail for physical addresses greater than 0xFFFFF000. */ -vm_offset_t pmap_extract(pmap_t pmap, vm_offset_t va) { +vm_offset_t pmap_extract(pmap_t pmap, vm_map_offset_t va) { spl_t spl; register struct mapping *mp; @@ -1064,7 +1220,7 @@ ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) { spl_t spl; register struct mapping *mp; ppnum_t pa, ppoffset; - addr64_t nextva, curva; + addr64_t nextva; spl = splhigh(); /* We can't allow any loss of control here */ @@ -1102,12 +1258,12 @@ ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) { * */ kern_return_t -pmap_attribute(pmap, address, size, attribute, value) - pmap_t pmap; - vm_offset_t address; - vm_size_t size; - vm_machine_attribute_t attribute; - vm_machine_attribute_val_t* value; +pmap_attribute( + __unused pmap_t pmap, + __unused vm_map_offset_t address, + __unused vm_map_size_t size, + __unused vm_machine_attribute_t attribute, + __unused vm_machine_attribute_val_t* value) { return KERN_INVALID_ARGUMENT; @@ -1122,13 +1278,13 @@ pmap_attribute(pmap, address, size, attribute, value) */ kern_return_t pmap_attribute_cache_sync(ppnum_t pp, vm_size_t size, - vm_machine_attribute_t attribute, - vm_machine_attribute_val_t* value) { + __unused vm_machine_attribute_t attribute, + __unused vm_machine_attribute_val_t* value) { spl_t s; unsigned int i, npages; - npages = round_page_32(size) >> 12; /* Get the number of pages to do */ + npages = round_page(size) >> 12; /* Get the number of pages to do */ for(i = 0; i < npages; i++) { /* Do all requested pages */ s = splhigh(); /* No interruptions here */ @@ -1140,13 +1296,13 @@ kern_return_t pmap_attribute_cache_sync(ppnum_t pp, vm_size_t size, } /* - * pmap_sync_caches_phys(ppnum_t pa) + * pmap_sync_page_data_phys(ppnum_t pa) * * Invalidates all of the instruction cache on a physical page and * pushes any dirty data from the data cache for the same physical page */ -void pmap_sync_caches_phys(ppnum_t pa) { +void pmap_sync_page_data_phys(ppnum_t pa) { spl_t s; @@ -1156,6 +1312,12 @@ void pmap_sync_caches_phys(ppnum_t pa) { return; } +void +pmap_sync_page_attributes_phys(ppnum_t pa) +{ + pmap_sync_page_data_phys(pa); +} + /* * pmap_collect * @@ -1163,7 +1325,7 @@ void pmap_sync_caches_phys(ppnum_t pa) { * It isn't implemented or needed or wanted. */ void -pmap_collect(pmap_t pmap) +pmap_collect(__unused pmap_t pmap) { return; } @@ -1177,9 +1339,9 @@ pmap_collect(pmap_t pmap) */ void pmap_activate( - pmap_t pmap, - thread_t th, - int which_cpu) + __unused pmap_t pmap, + __unused thread_t th, + __unused int which_cpu) { return; } @@ -1189,9 +1351,9 @@ pmap_activate( */ void pmap_deactivate( - pmap_t pmap, - thread_t th, - int which_cpu) + __unused pmap_t pmap, + __unused thread_t th, + __unused int which_cpu) { return; } @@ -1214,10 +1376,10 @@ pmap_deactivate( */ void pmap_pageable( - pmap_t pmap, - vm_offset_t start, - vm_offset_t end, - boolean_t pageable) + __unused pmap_t pmap, + __unused vm_map_offset_t start, + __unused vm_map_offset_t end, + __unused boolean_t pageable) { return; /* This is not used... */ @@ -1229,9 +1391,9 @@ pmap_pageable( */ void pmap_change_wiring( - register pmap_t pmap, - vm_offset_t va, - boolean_t wired) + __unused pmap_t pmap, + __unused vm_map_offset_t va, + __unused boolean_t wired) { return; /* This is not used... */ } @@ -1248,14 +1410,14 @@ pmap_change_wiring( void pmap_modify_pages( pmap_t pmap, - vm_offset_t sva, - vm_offset_t eva) + vm_map_offset_t sva, + vm_map_offset_t eva) { spl_t spl; - mapping *mp; + mapping_t *mp; ppnum_t pa; - addr64_t va, endva, nextva; - unsigned int saveflags; + addr64_t va, endva; + unsigned int savetype; if (pmap == PMAP_NULL) return; /* If no pmap, can't do it... */ @@ -1274,14 +1436,14 @@ pmap_modify_pages( continue; /* We are not done and there is more to check... */ } - saveflags = mp->mpFlags; /* Remember the flags */ + savetype = mp->mpFlags & mpType; /* Remember the type */ pa = mp->mpPAddr; /* Remember ppage because mapping may vanish after drop call */ mapping_drop_busy(mp); /* We have everything we need from the mapping */ splx(spl); /* Restore 'rupts */ - if(saveflags & (mpNest | mpBlock)) continue; /* Can't mess around with these guys... */ + if(savetype != mpNormal) continue; /* Can't mess around with these guys... */ mapping_set_mod(pa); /* Set the modfied bit for this page */ @@ -1298,10 +1460,10 @@ pmap_modify_pages( * independant page boundary. */ void -pmap_clear_modify(vm_offset_t pa) +pmap_clear_modify(ppnum_t pa) { - mapping_clr_mod((ppnum_t)pa); /* Clear all change bits for physical page */ + mapping_clr_mod(pa); /* Clear all change bits for physical page */ } @@ -1311,9 +1473,9 @@ pmap_clear_modify(vm_offset_t pa) * since the last call to pmap_clear_modify(). */ boolean_t -pmap_is_modified(register vm_offset_t pa) +pmap_is_modified(register ppnum_t pa) { - return mapping_tst_mod((ppnum_t)pa); /* Check for modified */ + return mapping_tst_mod(pa); /* Check for modified */ } @@ -1324,9 +1486,9 @@ pmap_is_modified(register vm_offset_t pa) * */ void -pmap_clear_reference(vm_offset_t pa) +pmap_clear_reference(ppnum_t pa) { - mapping_clr_ref((ppnum_t)pa); /* Check for modified */ + mapping_clr_ref(pa); /* Check for modified */ } /* @@ -1335,37 +1497,59 @@ pmap_clear_reference(vm_offset_t pa) * since the last call to pmap_clear_reference(). */ boolean_t -pmap_is_referenced(vm_offset_t pa) +pmap_is_referenced(ppnum_t pa) { - return mapping_tst_ref((ppnum_t)pa); /* Check for referenced */ + return mapping_tst_ref(pa); /* Check for referenced */ } /* - * pmap_canExecute(ppnum_t pa) - * returns 1 if instructions can execute - * returns 0 if know not (i.e. guarded and/or non-executable set) - * returns -1 if we don't know (i.e., the page is no RAM) + * pmap_get_refmod(phys) + * returns the referenced and modified bits of the specified + * physical page. */ -int -pmap_canExecute(ppnum_t pa) -{ - phys_entry *physent; - unsigned int pindex; +unsigned int +pmap_get_refmod(ppnum_t pa) +{ + return (mapping_tst_refmod(pa)); +} + +/* + * pmap_clear_refmod(phys, mask) + * clears the referenced and modified bits as specified by the mask + * of the specified physical page. + */ +void +pmap_clear_refmod(ppnum_t pa, unsigned int mask) +{ + mapping_clr_refmod(pa, mask); +} + +/* + * pmap_eligible_for_execute(ppnum_t pa) + * return true if physical address is eligible to contain executable code; + * otherwise, return false + */ +boolean_t +pmap_eligible_for_execute(ppnum_t pa) +{ + phys_entry_t *physent; + unsigned int pindex; physent = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ - if(!physent) return -1; /* If there is no physical entry, we don't know... */ + if((!physent) || (physent->ppLink & ppG)) + return 0; /* If there is no physical entry or marked guarded, + the entry is not eligible for execute */ - if((physent->ppLink & (ppN | ppG))) return 0; /* If we are marked non-executable or guarded, say we can not execute */ - return 1; /* Good to go... */ + return 1; /* Otherwise, entry is eligible for execute */ } #if MACH_VM_DEBUG int pmap_list_resident_pages( - register pmap_t pmap, - register vm_offset_t *listp, - register int space) + __unused pmap_t pmap, + __unused vm_offset_t *listp, + __unused int space) { return 0; } @@ -1383,8 +1567,6 @@ pmap_copy_part_page( vm_offset_t dst_offset, vm_size_t len) { - register struct phys_entry *pp_src, *pp_dst; - spl_t s; addr64_t fsrc, fdst; assert(((dst <<12) & PAGE_MASK+dst_offset+len) <= PAGE_SIZE); @@ -1398,9 +1580,9 @@ pmap_copy_part_page( void pmap_zero_part_page( - vm_offset_t p, - vm_offset_t offset, - vm_size_t len) + __unused vm_offset_t p, + __unused vm_offset_t offset, + __unused vm_size_t len) { panic("pmap_zero_part_page"); } @@ -1413,8 +1595,8 @@ boolean_t pmap_verify_free(ppnum_t pa) { pp = mapping_phys_lookup(pa, &pindex); /* Get physical entry */ if (pp == 0) return FALSE; /* If there isn't one, show no mapping... */ - if(pp->ppLink & ~(ppLock | ppN | ppFlags)) return TRUE; /* We have at least one mapping */ - return FALSE; /* No mappings */ + if(pp->ppLink & ~(ppLock | ppFlags)) return FALSE; /* We have at least one mapping */ + return TRUE; /* No mappings */ } @@ -1422,11 +1604,8 @@ boolean_t pmap_verify_free(ppnum_t pa) { void pmap_switch(pmap_t map) { - unsigned int i; - - - hw_blow_seg(copyIOaddr); /* Blow off the first segment */ - hw_blow_seg(copyIOaddr + 0x10000000ULL); /* Blow off the second segment */ + hw_blow_seg(lowGlo.lgUMWvaddr); /* Blow off the first segment */ + hw_blow_seg(lowGlo.lgUMWvaddr + 0x10000000ULL); /* Blow off the second segment */ /* when changing to kernel space, don't bother * doing anything, the kernel is mapped from here already. @@ -1464,11 +1643,10 @@ void pmap_switch(pmap_t map) kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) { - addr64_t nextva, vend, colladdr; + addr64_t vend, colladdr; unsigned int msize; - int i, nlists, asize; - spl_t s; - mapping *mp; + int nlists; + mapping_t *mp; if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */ @@ -1486,9 +1664,10 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n mp = mapping_alloc(nlists); /* Get a spare mapping block */ - mp->mpFlags = 0x01000000 | mpNest | nlists; /* Set the flags. Make sure busy count is 1 */ + mp->mpFlags = 0x01000000 | mpNest | mpPerm | nlists; + /* Set the flags. Make sure busy count is 1 */ mp->mpSpace = subord->space; /* Set the address space/pmap lookup ID */ - mp->mpBSize = msize; /* Set the size */ + mp->u.mpBSize = msize; /* Set the size */ mp->mpPte = 0; /* Set the PTE invalid */ mp->mpPAddr = 0; /* Set the physical page number */ mp->mpVAddr = vstart; /* Set the address */ @@ -1518,11 +1697,10 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { - unsigned int oflags, seg, grandr, tstamp; - int i, tcpu, mycpu; + unsigned int tstamp, i, mycpu; addr64_t nextva; spl_t s; - mapping *mp; + mapping_t *mp; s = splhigh(); /* Make sure interruptions are disabled */ @@ -1532,7 +1710,7 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { panic("pmap_unnest: Attempt to unnest an unnested segment - va = %016llX\n", vaddr); } - if(!(mp->mpFlags & mpNest)) { /* Did we find something other than a nest? */ + if((mp->mpFlags & mpType) != mpNest) { /* Did we find something other than a nest? */ panic("pmap_unnest: Attempt to unnest something that is not a nest - va = %016llX\n", vaddr); } @@ -1540,11 +1718,10 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { panic("pmap_unnest: Attempt to unnest something that is not at start of nest - va = %016llX\n", vaddr); } - (void)hw_atomic_or(&mp->mpFlags, mpRemovable); /* Show that this mapping is now removable */ + (void)hw_atomic_and(&mp->mpFlags, ~mpPerm); /* Show that this mapping is now removable */ - mapping_drop_busy(mp); /* Go ahead and relase the mapping now */ + mapping_drop_busy(mp); /* Go ahead and release the mapping now */ - disable_preemption(); /* It's all for me! */ splx(s); /* Restore 'rupts */ (void)mapping_remove(grand, vaddr); /* Toss the nested pmap mapping */ @@ -1566,34 +1743,34 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { */ - mycpu = cpu_number(); /* Who am I? Am I just a dream? */ for(i=0; i < real_ncpus; i++) { /* Cycle through processors */ - if((unsigned int)grand == per_proc_info[i].ppUserPmapVirt) { /* Is this guy using the changed pmap? */ + disable_preemption(); + mycpu = cpu_number(); /* Who am I? Am I just a dream? */ + if((unsigned int)grand == PerProcTable[i].ppe_vaddr->ppUserPmapVirt) { /* Is this guy using the changed pmap? */ - per_proc_info[i].ppInvSeg = 1; /* Show that we need to invalidate the segments */ + PerProcTable[i].ppe_vaddr->ppInvSeg = 1; /* Show that we need to invalidate the segments */ - if(i == mycpu) continue; /* Don't diddle ourselves */ + if(i != mycpu) { - tstamp = per_proc_info[i].ruptStamp[1]; /* Save the processor's last interrupt time stamp */ - if(cpu_signal(i, SIGPcpureq, CPRQsegload, 0) != KERN_SUCCESS) { /* Make sure we see the pmap change */ - continue; - } - - if(!hw_cpu_wcng(&per_proc_info[i].ruptStamp[1], tstamp, LockTimeOut)) { /* Wait for the other processors to enter debug */ - panic("pmap_unnest: Other processor (%d) did not see interruption request\n", i); + tstamp = PerProcTable[i].ppe_vaddr->ruptStamp[1]; /* Save the processor's last interrupt time stamp */ + if(cpu_signal(i, SIGPcpureq, CPRQsegload, 0) == KERN_SUCCESS) { /* Make sure we see the pmap change */ + if(!hw_cpu_wcng(&PerProcTable[i].ppe_vaddr->ruptStamp[1], tstamp, LockTimeOut)) { /* Wait for the other processors to enter debug */ + panic("pmap_unnest: Other processor (%d) did not see interruption request\n", i); + } + } } } + enable_preemption(); } - enable_preemption(); /* Others can run now */ return KERN_SUCCESS; /* Bye, bye, butterfly... */ } /* - * void MapUserAddressSpaceInit(void) + * void MapUserMemoryWindowInit(void) * - * Initialized anything we need to in order to map user address space slices into + * Initialize anything we need to in order to map user address space slices into * the kernel. Primarily used for copy in/out. * * Currently we only support one 512MB slot for this purpose. There are two special @@ -1601,7 +1778,7 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { * * The special pmap nest (which is allocated in this function) is used as a place holder * in the kernel's pmap search list. It is 512MB long and covers the address range - * starting at copyIOaddr. It points to no actual memory and when the fault handler + * starting at lgUMWvaddr. It points to no actual memory and when the fault handler * hits in it, it knows to look in the per_proc and start using the linkage * mapping contained therin. * @@ -1613,44 +1790,47 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr) { * */ -void MapUserAddressSpaceInit(void) { +void MapUserMemoryWindowInit(void) { addr64_t colladdr; - int nlists, asize; - mapping *mp; + int nlists; + mapping_t *mp; nlists = mapSetLists(kernel_pmap); /* Set number of lists this will be on */ mp = mapping_alloc(nlists); /* Get a spare mapping block */ - - mp->mpFlags = 0x01000000 |mpNest | mpSpecial | nlists; /* Set the flags. Make sure busy count is 1 */ + + mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | nlists; + /* Set the flags. Make sure busy count is 1 */ mp->mpSpace = kernel_pmap->space; /* Set the address space/pmap lookup ID */ - mp->mpBSize = 1; /* Set the size to 2 segments */ + mp->u.mpBSize = 1; /* Set the size to 2 segments */ mp->mpPte = 0; /* Means nothing */ mp->mpPAddr = 0; /* Means nothing */ - mp->mpVAddr = copyIOaddr; /* Set the address range we cover */ + mp->mpVAddr = lowGlo.lgUMWvaddr; /* Set the address range we cover */ mp->mpNestReloc = 0; /* Means nothing */ colladdr = hw_add_map(kernel_pmap, mp); /* Go add the mapping to the pmap */ if(colladdr) { /* Did it collide? */ - panic("MapUserAddressSpaceInit: MapUserAddressSpace range already mapped\n"); + panic("MapUserMemoryWindowInit: MapUserMemoryWindow range already mapped\n"); } return; } /* - * addr64_t MapUserAddressSpace(vm_map_t map, vm_offset_t va, size) + * addr64_t MapUserMemoryWindow(vm_map_t map, vm_offset_t va, size) * * map = the vm_map that we are mapping into the kernel * va = start of the address range we are mapping - * size = size of the range. No greater than 256MB and not 0. * Note that we do not test validty, we chose to trust our fellows... * - * Maps a slice of a user address space into a predefined kernel range - * on a per-thread basis. In the future, the restriction of a predefined - * range will be loosened. + * Maps a 512M slice of a user address space into a predefined kernel range + * on a per-thread basis. We map only the first 256M segment, allowing the + * second 256M segment to fault in as needed. This allows our clients to access + * an arbitrarily aligned operand up to 256M in size. + * + * In the future, the restriction of a predefined range may be loosened. * * Builds the proper linkage map to map the user range * We will round this down to the previous segment boundary and calculate @@ -1664,15 +1844,15 @@ void MapUserAddressSpaceInit(void) { * we just exit. This is done for performance reasons. It was found that * there was a considerable boost in copyin/out performance if we did not * invalidate the segment at ReleaseUserAddressSpace time, so we dumped the - * restriction that you had to bracket MapUserAddressSpace. Further, there + * restriction that you had to bracket MapUserMemoryWindow. Further, there * is a yet further boost if you didn't need to map it each time. The theory * behind this is that many times copies are to or from the same segment and * done multiple times within the same system call. To take advantage of that, - * we check cioSpace and cioRelo to see if we've already got it. + * we check umwSpace and umwRelo to see if we've already got it. * * We also need to half-invalidate the slice when we context switch or go * back to user state. A half-invalidate does not clear the actual mapping, - * but it does force the MapUserAddressSpace function to reload the segment + * but it does force the MapUserMemoryWindow function to reload the segment * register/SLBE. If this is not done, we can end up some pretty severe * performance penalties. If we map a slice, and the cached space/relocation is * the same, we won't reload the segment registers. Howver, since we ran someone else, @@ -1687,30 +1867,30 @@ void MapUserAddressSpaceInit(void) { * */ -addr64_t MapUserAddressSpace(vm_map_t map, addr64_t va, unsigned int size) { +addr64_t MapUserMemoryWindow( + vm_map_t map, + addr64_t va) { addr64_t baddrs, reladd; - thread_act_t act; - mapping *mp; - struct per_proc_info *perproc; + thread_t thread; + mapping_t *mp; baddrs = va & 0xFFFFFFFFF0000000ULL; /* Isolate the segment */ - act = current_act(); /* Remember our activation */ + thread = current_thread(); /* Remember our activation */ - reladd = baddrs - copyIOaddr; /* Get the relocation from user to kernel */ + reladd = baddrs - lowGlo.lgUMWvaddr; /* Get the relocation from user to kernel */ - if((act->mact.cioSpace == map->pmap->space) && (act->mact.cioRelo == reladd)) { /* Already mapped? */ - return ((va & 0x0FFFFFFFULL) | copyIOaddr); /* Pass back the kernel address we are to use */ + if((thread->machine.umwSpace == map->pmap->space) && (thread->machine.umwRelo == reladd)) { /* Already mapped? */ + return ((va & 0x0FFFFFFFULL) | lowGlo.lgUMWvaddr); /* Pass back the kernel address we are to use */ } disable_preemption(); /* Don't move... */ - perproc = getPerProc(); /* Get our per_proc_block */ - mp = (mapping *)&perproc->ppCIOmp; /* Make up for C */ - act->mact.cioRelo = reladd; /* Relocation from user to kernel */ + mp = (mapping_t *)&(getPerProc()->ppUMWmp); /* Make up for C */ + thread->machine.umwRelo = reladd; /* Relocation from user to kernel */ mp->mpNestReloc = reladd; /* Relocation from user to kernel */ - act->mact.cioSpace = map->pmap->space; /* Set the address space/pmap lookup ID */ + thread->machine.umwSpace = map->pmap->space; /* Set the address space/pmap lookup ID */ mp->mpSpace = map->pmap->space; /* Set the address space/pmap lookup ID */ /* @@ -1718,51 +1898,13 @@ addr64_t MapUserAddressSpace(vm_map_t map, addr64_t va, unsigned int size) { * If we are wrong, and that would be very, very, very rare, the fault handler will fix us up. */ - hw_map_seg(map->pmap, copyIOaddr, baddrs); /* Make the entry for the first segment */ - - enable_preemption(); /* Let's move */ - return ((va & 0x0FFFFFFFULL) | copyIOaddr); /* Pass back the kernel address we are to use */ -} - -/* - * void ReleaseUserAddressMapping(addr64_t kva) - * - * kva = kernel address of the user copy in/out slice - * - */ - -void ReleaseUserAddressSpace(addr64_t kva) { - - int i; - addr64_t nextva, vend, kaddr, baddrs; - unsigned int msize; - thread_act_t act; - mapping *mp; - - if(kva == 0) return; /* Handle a 0 */ - - disable_preemption(); /* Don't move... */ - - act = current_act(); /* Remember our activation */ - - if(act->mact.cioSpace == invalSpace) { /* We only support one at a time */ - panic("ReleaseUserAddressMapping: attempt release undefined copy in/out user address space slice\n"); - } + hw_map_seg(map->pmap, lowGlo.lgUMWvaddr, baddrs); /* Make the entry for the first segment */ - act->mact.cioSpace = invalSpace; /* Invalidate space */ - mp = (mapping *)&per_proc_info[cpu_number()].ppCIOmp; /* Make up for C */ - mp->mpSpace = invalSpace; /* Trash it in the per_proc as well */ - - hw_blow_seg(copyIOaddr); /* Blow off the first segment */ - hw_blow_seg(copyIOaddr + 0x10000000ULL); /* Blow off the second segment */ - enable_preemption(); /* Let's move */ - - return; /* Let's leave */ + return ((va & 0x0FFFFFFFULL) | lowGlo.lgUMWvaddr); /* Pass back the kernel address we are to use */ } - /* * kern_return_t pmap_boot_map(size) * @@ -1782,17 +1924,127 @@ vm_offset_t pmap_boot_map(vm_size_t size) { panic("pmap_boot_map: VM started\n"); } - size = round_page_32(size); /* Make sure this is in pages */ + size = round_page(size); /* Make sure this is in pages */ vm_last_addr = vm_last_addr - size; /* Allocate the memory */ return (vm_last_addr + 1); /* Return the vaddr we just allocated */ } +/* + * void pmap_init_sharedpage(void); + * + * Hack map for the 64-bit commpage + */ + +void pmap_init_sharedpage(vm_offset_t cpg){ + + addr64_t cva, cpoff; + ppnum_t cpphys; + + sharedPmap = pmap_create(0); /* Get a pmap to hold the common segment */ + if(!sharedPmap) { /* Check for errors */ + panic("pmap_init_sharedpage: couldn't make sharedPmap\n"); + } + + for(cpoff = 0; cpoff < _COMM_PAGE_AREA_USED; cpoff += 4096) { /* Step along now */ + + cpphys = pmap_find_phys(kernel_pmap, (addr64_t)cpg + cpoff); + if(!cpphys) { + panic("pmap_init_sharedpage: compage %08X not mapped in kernel\n", cpg + cpoff); + } + + cva = mapping_make(sharedPmap, (addr64_t)((uint32_t)_COMM_PAGE_BASE_ADDRESS) + cpoff, + cpphys, mmFlgPerm, 1, VM_PROT_READ); /* Map the page read only */ + if(cva) { /* Check for errors */ + panic("pmap_init_sharedpage: couldn't map commpage page - cva = %016llX\n", cva); + } + + } + + return; +} + + +/* + * void pmap_map_sharedpage(pmap_t pmap); + * + * Maps the last segment in a 64-bit address space + * + * + */ + +void pmap_map_sharedpage(task_t task, pmap_t pmap){ + + kern_return_t ret; + + if(task_has_64BitAddr(task) || _cpu_capabilities & k64Bit) { /* Should we map the 64-bit page -1? */ + ret = pmap_nest(pmap, sharedPmap, 0xFFFFFFFFF0000000ULL, 0x00000000F0000000ULL, + 0x0000000010000000ULL); /* Nest the highest possible segment to map comm page */ + if(ret != KERN_SUCCESS) { /* Did it work? */ + panic("pmap_map_sharedpage: couldn't nest shared page - ret = %08X\n", ret); + } + } + + return; +} + + +/* + * void pmap_unmap_sharedpage(pmap_t pmap); + * + * Unmaps the last segment in a 64-bit address space + * + */ + +void pmap_unmap_sharedpage(pmap_t pmap){ + + kern_return_t ret; + mapping_t *mp; + boolean_t inter; + int gotnest; + addr64_t nextva; + + if(BootProcInfo.pf.Available & pf64Bit) { /* Are we on a 64-bit machine? */ + + inter = ml_set_interrupts_enabled(FALSE); /* Disable interruptions for now */ + mp = hw_find_map(pmap, 0xFFFFFFFFF0000000ULL, &nextva); /* Find the mapping for this address */ + if((unsigned int)mp == mapRtBadLk) { /* Did we lock up ok? */ + panic("pmap_unmap_sharedpage: mapping lock failure - rc = %08X, pmap = %08X\n", mp, pmap); /* Die... */ + } + + gotnest = 0; /* Assume nothing here */ + if(mp) { + gotnest = ((mp->mpFlags & mpType) == mpNest); + /* Remember if we have a nest here */ + mapping_drop_busy(mp); /* We have everything we need from the mapping */ + } + ml_set_interrupts_enabled(inter); /* Put interrupts back to what they were */ + + if(!gotnest) return; /* Leave if there isn't any nesting here */ + + ret = pmap_unnest(pmap, 0xFFFFFFFFF0000000ULL); /* Unnest the max 64-bit page */ + + if(ret != KERN_SUCCESS) { /* Did it work? */ + panic("pmap_unmap_sharedpage: couldn't unnest shared page - ret = %08X\n", ret); + } + } + + return; +} + /* temporary workaround */ boolean_t -coredumpok(vm_map_t map, vm_offset_t va) +coredumpok( + __unused vm_map_t map, + __unused vm_offset_t va) { - return TRUE; + return TRUE; } + +/* +;;; Local Variables: *** +;;; tab-width:4 *** +;;; End: *** +*/ diff --git a/osfmk/ppc/pmap.h b/osfmk/ppc/pmap.h index 102675a13..2d88a66f5 100644 --- a/osfmk/ppc/pmap.h +++ b/osfmk/ppc/pmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,6 +55,7 @@ #include #include #include +#include #define maxPPage32 0x000FFFFF /* Maximum page number in 32-bit machines */ @@ -75,6 +76,77 @@ struct sgc { typedef struct sgc sgc; +#pragma pack(4) /* Make sure the structure stays as we defined it */ +struct pmap_vmm_stats { + unsigned int vxsGpf; /* Guest faults */ + unsigned int vxsGpfMiss; /* Faults that miss in hash table */ + + unsigned int vxsGrm; /* Guest mapping remove requests */ + unsigned int vxsGrmMiss; /* Remove misses in hash table */ + unsigned int vxsGrmActive; /* Remove hits that are active */ + + unsigned int vxsGra; /* Guest remove all mappings requests */ + unsigned int vxsGraHits; /* Remove hits in hash table */ + unsigned int vxsGraActive; /* Remove hits that are active */ + + unsigned int vxsGrl; /* Guest remove local mappings requests */ + unsigned int vxsGrlActive; /* Active mappings removed */ + + unsigned int vxsGrs; /* Guest mapping resumes */ + unsigned int vxsGrsHitAct; /* Resume hits active entry */ + unsigned int vxsGrsHitSusp; /* Resume hits suspended entry */ + unsigned int vxsGrsMissGV; /* Resume misses on guest virtual */ + unsigned int vxsGrsHitPE; /* Resume hits on host virtual */ + unsigned int vxsGrsMissPE; /* Resume misses on host virtual */ + + unsigned int vxsGad; /* Guest mapping adds */ + unsigned int vxsGadHit; /* Add hits entry (active or dormant) */ + unsigned int vxsGadFree; /* Add takes free entry in group */ + unsigned int vxsGadDormant; /* Add steals dormant entry in group */ + unsigned int vxsGadSteal; /* Add steals active entry in group */ + + unsigned int vxsGsu; /* Guest mapping suspends */ + unsigned int vxsGsuHit; /* Suspend hits entry (active only) */ + unsigned int vxsGsuMiss; /* Suspend misses entry */ + + unsigned int vxsGtd; /* Guest test ref&chg */ + unsigned int vxsGtdHit; /* Test r&c hits entry (active only) */ + unsigned int vxsGtdMiss; /* Test r&c misses entry */ +}; +#pragma pack() +typedef struct pmap_vmm_stats pmap_vmm_stats; + +/* Not wanting to tax all of our customers for the sins of those that use virtual operating + systems, we've built the hash table from its own primitive virtual memory. We first + allocate a pmap_vmm_ext with sufficient space following to accomodate the hash table + index (one 64-bit physical address per 4k-byte page of hash table). The allocation + must not cross a 4k-byte page boundary (we'll be accessing the block with relocation + off), so we'll try a couple of times, then just burn a whole page. We stuff the effective + address of the cache-aligned index into hIdxBase; the physical-mode code locates the index + by adding the size of a pmap_vmm_extension to its translated physical address, then rounding + up to the next 32-byte boundary. Now we grab enough virtual pages to contain the hash table, + and fill in the index with the page's physical addresses. For the final touch that's sure + to please, we initialize the hash table. Mmmmm, golden brown perfection. + */ + +#pragma pack(4) +struct pmap_vmm_ext { + addr64_t vmxSalt; /* This block's virt<->real conversion salt */ + addr64_t vmxHostPmapPhys; /* Host pmap physical address */ + struct pmap *vmxHostPmap; /* Host pmap effective address */ + addr64_t *vmxHashPgIdx; /* Hash table physical index base address */ + vm_offset_t *vmxHashPgList; /* List of virtual pages comprising the hash table */ + unsigned int *vmxActiveBitmap; /* Bitmap of active mappings in hash table */ + pmap_vmm_stats vmxStats; /* Stats for VMM assists */ +#define VMX_HPIDX_OFFSET ((sizeof(pmap_vmm_ext) + 127) & ~127) + /* The hash table physical index begins at the first + 128-byte boundary after the pmap_vmm_ext struct */ +#define VMX_HPLIST_OFFSET (VMX_HPIDX_OFFSET + (GV_HPAGES * sizeof(addr64_t))) +#define VMX_ACTMAP_OFFSET (VMX_HPLIST_OFFSET + (GV_HPAGES * sizeof(vm_offset_t))) +}; +#pragma pack() +typedef struct pmap_vmm_ext pmap_vmm_ext; + #pragma pack(4) /* Make sure the structure stays as we defined it */ struct pmap { queue_head_t pmap_link; /* MUST BE FIRST */ @@ -87,6 +159,7 @@ struct pmap { #define pmapKeys 0x00000007 /* Keys and no execute bit to use with this pmap */ #define pmapKeyDef 0x00000006 /* Default keys - Sup = 1, user = 1, no ex = 0 */ #define pmapVMhost 0x00000010 /* pmap with Virtual Machines attached to it */ +#define pmapVMgsaa 0x00000020 /* Guest shadow assist active */ unsigned int spaceNum; /* Space number */ unsigned int pmapCCtl; /* Cache control */ #define pmapCCtlVal 0xFFFF0000 /* Valid entries */ @@ -98,8 +171,8 @@ struct pmap { #define pmapSegCacheUse 16 /* Number of cache entries to use */ struct pmap *freepmap; /* Free pmaps */ - - unsigned int pmapRsv1[3]; + pmap_vmm_ext *pmapVmmExt; /* VMM extension block, for VMM host and guest pmaps */ + addr64_t pmapVmmExtPhys; /* VMM extension block physical address */ /* 0x038 */ uint64_t pmapSCSubTag; /* Segment cache sub-tags. This is a 16 entry 4 bit array */ /* 0x040 */ @@ -123,7 +196,6 @@ struct pmap { /* 0x1C0 */ struct pmap_statistics stats; /* statistics */ - decl_simple_lock_data(,lock) /* lock on map */ /* Need to pad out to a power of 2 - right now it is 512 bytes */ #define pmapSize 512 @@ -139,9 +211,23 @@ struct pmapTransTab { typedef struct pmapTransTab pmapTransTab; +/* + * Address Chunk IDentified Table + */ + +struct acidTabEnt { + unsigned int acidVAddr; /* Virtual address of pmap or pointer to next free entry */ + unsigned int acidGas; /* reserved */ + addr64_t acidPAddr; /* Physcial address of pmap */ +}; + +typedef struct acidTabEnt acidTabEnt; + +extern acidTabEnt *acidTab; /* Pointer to acid table */ +extern acidTabEnt *acidFree; /* List of free acid entries */ + #define PMAP_NULL ((pmap_t) 0) -extern pmap_t kernel_pmap; /* The kernel's map */ extern pmap_t cursor_pmap; /* The pmap to start allocations with */ extern pmap_t sharedPmap; extern unsigned int sharedPage; @@ -151,8 +237,6 @@ extern addr64_t vm_max_physical; /* Maximum physical address supported */ extern pmapTransTab *pmapTrans; /* Space to pmap translate table */ #define PMAP_SWITCH_USER(th, map, my_cpu) th->map = map; -#define PMAP_ACTIVATE(pmap, th, cpu) -#define PMAP_DEACTIVATE(pmap, th, cpu) #define PMAP_CONTEXT(pmap,th) #define pmap_kernel_va(VA) \ @@ -162,7 +246,10 @@ extern pmapTransTab *pmapTrans; /* Space to pmap translate table */ #define maxAdrSp 16384 #define maxAdrSpb 14 -#define copyIOaddr 0x00000000E0000000ULL +#define USER_MEM_WINDOW_VADDR 0x00000000E0000000ULL +#define PHYS_MEM_WINDOW_VADDR 0x0000000100000000ULL +#define IO_MEM_WINDOW_VADDR 0x0000000080000000ULL +#define IO_MEM_WINDOW_SIZE 0x0000000080000000ULL #define pmap_kernel() (kernel_pmap) #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) @@ -201,15 +288,9 @@ extern kern_return_t pmap_add_physical_memory(vm_offset_t spa, extern void pmap_bootstrap(uint64_t msize, vm_offset_t *first_avail, unsigned int kmapsize); -extern void pmap_switch(pmap_t); -extern vm_offset_t pmap_extract(pmap_t pmap, - vm_offset_t va); +extern vm_offset_t pmap_boot_map(vm_size_t size); -extern void pmap_remove_all(vm_offset_t pa); - -extern boolean_t pmap_verify_free(ppnum_t pa); -extern void sync_cache(vm_offset_t pa, unsigned length); extern void sync_cache64(addr64_t pa, unsigned length); extern void sync_ppage(ppnum_t pa); extern void sync_cache_virtual(vm_offset_t va, unsigned length); @@ -219,18 +300,26 @@ extern void invalidate_dcache(vm_offset_t va, unsigned length, boolean_t phys); extern void invalidate_dcache64(addr64_t va, unsigned length, boolean_t phys); extern void invalidate_icache(vm_offset_t va, unsigned length, boolean_t phys); extern void invalidate_icache64(addr64_t va, unsigned length, boolean_t phys); -extern void pmap_sync_caches_phys(ppnum_t pa); +extern void pmap_sync_page_data_phys(ppnum_t pa); +extern void pmap_sync_page_attributes_phys(ppnum_t pa); extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags); extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags); extern kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size); +extern kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr); extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); -extern addr64_t MapUserAddressSpace(vm_map_t map, addr64_t va, unsigned int size); -extern void ReleaseUserAddressSpace(addr64_t kva); -extern kern_return_t pmap_attribute_cache_sync(ppnum_t pp, vm_size_t size, - vm_machine_attribute_t attribute, - vm_machine_attribute_val_t* value); -extern int pmap_canExecute(ppnum_t pa); +extern void MapUserMemoryWindowInit(void); +extern addr64_t MapUserMemoryWindow(vm_map_t map, addr64_t va); +extern boolean_t pmap_eligible_for_execute(ppnum_t pa); +extern int pmap_list_resident_pages( + struct pmap *pmap, + vm_offset_t *listp, + int space); +extern void pmap_init_sharedpage(vm_offset_t cpg); +extern void pmap_map_sharedpage(task_t task, pmap_t pmap); +extern void pmap_unmap_sharedpage(pmap_t pmap); + + #endif /* _PPC_PMAP_H_ */ diff --git a/osfmk/ppc/ppc_init.c b/osfmk/ppc/ppc_init.c index ca82c14a9..0ed30e1b0 100644 --- a/osfmk/ppc/ppc_init.c +++ b/osfmk/ppc/ppc_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -38,18 +39,21 @@ #include #include #include +#include #include #include #include #include +#include #include #include +#include #include -extern unsigned int intstack_top_ss; /* declared in start.s */ -extern unsigned int debstackptr; /* declared in start.s */ -extern unsigned int debstack_top_ss; /* declared in start.s */ +extern unsigned int mckFlags; +extern vm_offset_t intstack; +extern vm_offset_t debstack; int pc_trace_buf[1024] = {0}; int pc_trace_cnt = 1024; @@ -62,100 +66,129 @@ extern unsigned int hwulckbPatch_isync; extern unsigned int hwulckbPatch_eieio; extern unsigned int mulckPatch_isync; extern unsigned int mulckPatch_eieio; +extern unsigned int mulckePatch_isync; +extern unsigned int mulckePatch_eieio; extern unsigned int sulckPatch_isync; extern unsigned int sulckPatch_eieio; +extern unsigned int rwlesPatch_isync; +extern unsigned int rwlesPatch_eieio; +extern unsigned int rwldPatch_isync; +extern unsigned int rwldPatch_eieio; extern unsigned int retfsectPatch_eieio; extern unsigned int retfsectPatch_isync; +extern unsigned int bcopy_nop_if_32bit; +extern unsigned int bcopy_nc_nop_if_32bit; +extern unsigned int memcpy_nop_if_32bit; +extern unsigned int xsum_nop_if_32bit; +extern unsigned int uft_nop_if_32bit; +extern unsigned int uft_uaw_nop_if_32bit; int forcenap = 0; +int wcte = 0; /* Non-cache gather timer disabled */ -patch_entry_t patch_table[PATCH_TABLE_SIZE] = { - &extPatch32, 0x60000000, PATCH_FEATURE, PatchExt32, - &extPatchMCK, 0x60000000, PATCH_PROCESSOR, CPU_SUBTYPE_POWERPC_970, - &hwulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync, - &hwulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync, - &hwulckbPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync, - &hwulckbPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync, - &mulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync, - &mulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync, - &sulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync, - &sulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync, +patch_entry_t patch_table[] = { + {&extPatch32, 0x60000000, PATCH_FEATURE, PatchExt32}, + {&extPatchMCK, 0x60000000, PATCH_PROCESSOR, CPU_SUBTYPE_POWERPC_970}, + {&hwulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&hwulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&hwulckbPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&hwulckbPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&mulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&mulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&mulckePatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&mulckePatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&sulckPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&sulckPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&rwlesPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&rwlesPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&rwldPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&rwldPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, + {&bcopy_nop_if_32bit, 0x60000000, PATCH_FEATURE, PatchExt32}, + {&bcopy_nc_nop_if_32bit,0x60000000, PATCH_FEATURE, PatchExt32}, + {&memcpy_nop_if_32bit, 0x60000000, PATCH_FEATURE, PatchExt32}, #if !MACH_LDEBUG - &retfsectPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync, - &retfsectPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync -#else - 0, 0, PATCH_INVALID, 0, - 0, 0, PATCH_INVALID, 0 + {&retfsectPatch_isync, 0x60000000, PATCH_FEATURE, PatchLwsync}, + {&retfsectPatch_eieio, 0x7c2004ac, PATCH_FEATURE, PatchLwsync}, #endif + {&xsum_nop_if_32bit, 0x60000000, PATCH_FEATURE, PatchExt32}, + {&uft_nop_if_32bit, 0x60000000, PATCH_FEATURE, PatchExt32}, + {&uft_uaw_nop_if_32bit, 0x60000000, PATCH_FEATURE, PatchExt32}, + {NULL, 0x00000000, PATCH_END_OF_TABLE, 0} }; -void ppc_init(boot_args *args) +/* + * Forward definition + */ +void ppc_init( + boot_args *args); + +void ppc_init_cpu( + struct per_proc_info *proc_info); + +/* + * Routine: ppc_init + * Function: + */ +void +ppc_init( + boot_args *args) { - int i; - unsigned long *src,*dst; - char *str; - unsigned long addr, videoAddr; - unsigned int maxmem; - uint64_t xmaxmem, newhid; - unsigned int cputrace; - unsigned int novmx, fhrdl1; - extern vm_offset_t static_memory_end; - thread_t thread; - mapping *mp; + unsigned int maxmem; + uint64_t xmaxmem; + uint64_t newhid; + unsigned int cputrace; + unsigned int novmx; + unsigned int mcksoft; + thread_t thread; + mapping_t *mp; + uint64_t scdata; + /* * Setup per_proc info for first cpu. */ - per_proc_info[0].cpu_number = 0; - per_proc_info[0].cpu_flags = 0; - per_proc_info[0].istackptr = 0; /* we're on the interrupt stack */ - per_proc_info[0].intstack_top_ss = intstack_top_ss; - per_proc_info[0].debstackptr = debstackptr; - per_proc_info[0].debstack_top_ss = debstack_top_ss; - per_proc_info[0].interrupts_enabled = 0; - per_proc_info[0].pp_preemption_count = -1; - per_proc_info[0].pp_simple_lock_count = 0; - per_proc_info[0].pp_interrupt_level = 0; - per_proc_info[0].need_ast = (unsigned int)&need_ast[0]; - per_proc_info[0].FPU_owner = 0; - per_proc_info[0].VMX_owner = 0; - per_proc_info[0].rtcPop = 0xFFFFFFFFFFFFFFFFULL; - mp = (mapping *)per_proc_info[0].ppCIOmp; - mp->mpFlags = 0x01000000 | mpSpecial | 1; + BootProcInfo.cpu_number = 0; + BootProcInfo.cpu_flags = 0; + BootProcInfo.istackptr = 0; /* we're on the interrupt stack */ + BootProcInfo.intstack_top_ss = (vm_offset_t)&intstack + INTSTACK_SIZE - FM_SIZE; + BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + KERNEL_STACK_SIZE - FM_SIZE; + BootProcInfo.debstackptr = BootProcInfo.debstack_top_ss; + BootProcInfo.interrupts_enabled = 0; + BootProcInfo.pending_ast = AST_NONE; + BootProcInfo.FPU_owner = 0; + BootProcInfo.VMX_owner = 0; + BootProcInfo.pp_cbfr = console_per_proc_alloc(TRUE); + BootProcInfo.rtcPop = 0xFFFFFFFFFFFFFFFFULL; + mp = (mapping_t *)BootProcInfo.ppUMWmp; + mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1; mp->mpSpace = invalSpace; - machine_slot[0].is_cpu = TRUE; - thread_bootstrap(); - thread = current_act(); - thread->mact.curctx = &thread->mact.facctx; - thread->mact.facctx.facAct = thread; - thread->mact.cioSpace = invalSpace; /* Initialize copyin/out space to invalid */ - thread->mact.preemption_count = 1; + thread = current_thread(); + thread->machine.curctx = &thread->machine.facctx; + thread->machine.facctx.facAct = thread; + thread->machine.umwSpace = invalSpace; /* Initialize user memory window space to invalid */ + thread->machine.preemption_count = 1; + cpu_bootstrap(); cpu_init(); - /* - * Setup some processor related structures to satisfy funnels. - * Must be done before using unparallelized device drivers. - */ - processor_ptr[0] = &processor_array[0]; master_cpu = 0; - master_processor = cpu_to_processor(master_cpu); + processor_bootstrap(); + + timer_switch((uint32_t)mach_absolute_time(), &thread->system_timer); - static_memory_end = round_page_32(args->topOfKernelData);; + static_memory_end = round_page(args->topOfKernelData);; PE_init_platform(FALSE, args); /* Get platform expert set up */ if (!PE_parse_boot_arg("novmx", &novmx)) novmx=0; /* Special run without VMX? */ if(novmx) { /* Yeah, turn it off */ - for(i = 0; i < NCPUS; i++) { /* Cycle through all potential processors */ - per_proc_info[i].pf.Available &= ~pfAltivec; /* Turn off Altivec available */ - } - __asm__ volatile("mtsprg 2,%0" : : "r" (per_proc_info[0].pf.Available)); /* Set live value */ + BootProcInfo.pf.Available &= ~pfAltivec; /* Turn off Altivec available */ + __asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available)); /* Set live value */ } if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0; /* If force nap not set, make 0 */ @@ -165,6 +198,7 @@ void ppc_init(boot_args *args) } if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags=0; /* Set diagnostic flags */ + if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts=0; /* Set lcks options */ if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF; /* If tracing requested, enable it */ if(PE_parse_boot_arg("ctrc", &cputrace)) { /* See if tracing is limited to a specific cpu */ @@ -188,45 +222,64 @@ void ppc_init(boot_args *args) else xmaxmem = (uint64_t)maxmem * (1024 * 1024); -/* - * VM initialization, after this we're using page tables... - */ + if (!PE_parse_boot_arg("wcte", &wcte)) wcte = 0; /* If write combine timer enable not supplied, make 1 */ + else wcte = (wcte != 0); /* Force to 0 or 1 */ + + if (!PE_parse_boot_arg("mcklog", &mckFlags)) mckFlags = 0; /* If machine check flags not specified, clear */ + else if(mckFlags > 1) mckFlags = 0; /* If bogus, clear */ + + if (!PE_parse_boot_arg("ht_shift", &hash_table_shift)) /* should we use a non-default hash table size? */ + hash_table_shift = 0; /* no, use default size */ + + /* + * VM initialization, after this we're using page tables... + */ ppc_vm_init(xmaxmem, args); - if(per_proc_info[0].pf.Available & pf64Bit) { /* Are we on a 64-bit machine */ - if(PE_parse_boot_arg("fhrdl1", &fhrdl1)) { /* Have they supplied "Force Hardware Recovery of Data cache level 1 errors? */ - newhid = per_proc_info[0].pf.pfHID5; /* Get the old HID5 */ - if(fhrdl1 < 2) { + if(BootProcInfo.pf.Available & pf64Bit) { /* Are we on a 64-bit machine */ + + if(!wcte) { + (void)ml_scom_read(GUSModeReg << 8, &scdata); /* Get GUS mode register */ + scdata = scdata | GUSMstgttoff; /* Disable the NCU store gather timer */ + (void)ml_scom_write(GUSModeReg << 8, scdata); /* Get GUS mode register */ + } + + if(PE_parse_boot_arg("mcksoft", &mcksoft)) { /* Have they supplied "machine check software recovery? */ + newhid = BootProcInfo.pf.pfHID5; /* Get the old HID5 */ + if(mcksoft < 2) { newhid &= 0xFFFFFFFFFFFFDFFFULL; /* Clear the old one */ - newhid |= (fhrdl1 ^ 1) << 13; /* Set new value to enable machine check recovery */ - for(i = 0; i < NCPUS; i++) per_proc_info[i].pf.pfHID5 = newhid; /* Set all shadows */ + newhid |= (mcksoft & 1) << 13; /* Set new value to enable machine check recovery */ + BootProcInfo.pf.pfHID5 = newhid; /* Set the new one */ hid5set64(newhid); /* Set the hid for this processir */ } } } - PE_init_platform(TRUE, args); machine_startup(args); } +/* + * Routine: ppc_init_cpu + * Function: + */ +void ppc_init_cpu( - struct per_proc_info *proc_info) + struct per_proc_info *proc_info) { - int i; + uint64_t scdata; proc_info->cpu_flags &= ~SleepState; - if(!(proc_info->next_savearea)) /* Do we have a savearea set up already? */ - proc_info->next_savearea = (uint64_t)save_get_init(); /* Get a savearea */ - + if((BootProcInfo.pf.Available & pf64Bit) && !wcte) { /* Should we disable the store gather timer? */ + (void)ml_scom_read(GUSModeReg << 8, &scdata); /* Get GUS mode register */ + scdata = scdata | GUSMstgttoff; /* Disable the NCU store gather timer */ + (void)ml_scom_write(GUSModeReg << 8, scdata); /* Get GUS mode register */ + } + cpu_init(); - ppc_vm_cpu_init(proc_info); - - ml_thrm_init(); /* Start thermal monitoring on this processor */ - slave_main(); } diff --git a/osfmk/ppc/ppc_vm_init.c b/osfmk/ppc/ppc_vm_init.c index 193f57479..21a24e3f3 100644 --- a/osfmk/ppc/ppc_vm_init.c +++ b/osfmk/ppc/ppc_vm_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -38,6 +37,7 @@ #include #include #include +#include #include #include @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -55,11 +54,9 @@ extern const char version[]; extern const char version_variant[]; -extern unsigned int intstack[]; /* declared in aligned_data.s */ -extern unsigned int intstack_top_ss; /* declared in aligned_data.s */ - addr64_t hash_table_base; /* Hash table base */ unsigned int hash_table_size; /* Hash table size */ +int hash_table_shift; /* "ht_shift" boot arg, used to scale hash_table_size */ vm_offset_t taproot_addr; /* (BRINGUP) */ unsigned int taproot_size; /* (BRINGUP) */ unsigned int serialmode; /* Serial mode keyboard and console control */ @@ -99,6 +96,8 @@ vm_offset_t sectKLDB; int sectSizeKLD; vm_offset_t sectPRELINKB; int sectSizePRELINK; +vm_offset_t sectHIBB; +int sectSizeHIB; vm_offset_t end, etext, edata; @@ -108,13 +107,8 @@ extern unsigned long exception_end; void ppc_vm_init(uint64_t mem_limit, boot_args *args) { - unsigned int htabmask; - unsigned int i, j, batsize, kmapsize, pvr; - vm_offset_t addr, ioAddr, videoAddr; - int boot_task_end_offset; - const char *cpus; - mapping *mp; - vm_offset_t sizeadj, oldstart; + unsigned int i, kmapsize, pvr; + vm_offset_t addr; unsigned int *xtaproot, bank_shift; uint64_t cbsize, xhid0; @@ -188,7 +182,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) pmap_mem_regions_count++; /* Count this region */ } - + mem_size = (unsigned int)max_mem; /* Get size of memory */ if(max_mem > 0x0000000080000000ULL) mem_size = 0x80000000; /* Pin at 2 GB */ @@ -205,8 +199,9 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) first_avail = static_memory_end; -/* Now retrieve addresses for end, edata, and etext - * from MACH-O headers. +/* + * Now retrieve addresses for end, edata, and etext + * from MACH-O headers for the currently running 32 bit kernel. */ sectTEXTB = (vm_offset_t)getsegdatafromheader( &_mh_execute_header, "__TEXT", §SizeTEXT); @@ -216,72 +211,88 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) &_mh_execute_header, "__LINKEDIT", §SizeLINK); sectKLDB = (vm_offset_t)getsegdatafromheader( &_mh_execute_header, "__KLD", §SizeKLD); + sectHIBB = (vm_offset_t)getsegdatafromheader( + &_mh_execute_header, "__HIB", §SizeHIB); sectPRELINKB = (vm_offset_t)getsegdatafromheader( &_mh_execute_header, "__PRELINK", §SizePRELINK); etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; edata = (vm_offset_t) sectDATAB + sectSizeDATA; - end = round_page_32(getlastaddr()); /* Force end to next page */ + end = round_page(getlastaddr()); /* Force end to next page */ - kmapsize = (round_page_32(exception_end) - trunc_page_32(exception_entry)) + /* Get size we will map later */ - (round_page_32(sectTEXTB+sectSizeTEXT) - trunc_page_32(sectTEXTB)) + - (round_page_32(sectDATAB+sectSizeDATA) - trunc_page_32(sectDATAB)) + - (round_page_32(sectLINKB+sectSizeLINK) - trunc_page_32(sectLINKB)) + - (round_page_32(sectKLDB+sectSizeKLD) - trunc_page_32(sectKLDB)) + - (round_page_32(sectPRELINKB+sectSizePRELINK) - trunc_page_32(sectPRELINKB)) + - (round_page_32(static_memory_end) - trunc_page_32(end)); + kmapsize = (round_page(exception_end) - trunc_page(exception_entry)) + /* Get size we will map later */ + (round_page(sectTEXTB+sectSizeTEXT) - trunc_page(sectTEXTB)) + + (round_page(sectDATAB+sectSizeDATA) - trunc_page(sectDATAB)) + + (round_page(sectLINKB+sectSizeLINK) - trunc_page(sectLINKB)) + + (round_page(sectKLDB+sectSizeKLD) - trunc_page(sectKLDB)) + + (round_page_32(sectKLDB+sectSizeHIB) - trunc_page_32(sectHIBB)) + + (round_page(sectPRELINKB+sectSizePRELINK) - trunc_page(sectPRELINKB)) + + (round_page(static_memory_end) - trunc_page(end)); pmap_bootstrap(max_mem, &first_avail, kmapsize); - pmap_map(trunc_page_32(exception_entry), trunc_page_32(exception_entry), - round_page_32(exception_end), VM_PROT_READ|VM_PROT_EXECUTE); + pmap_map(trunc_page(exception_entry), trunc_page(exception_entry), + round_page(exception_end), VM_PROT_READ|VM_PROT_EXECUTE); - pmap_map(trunc_page_32(sectTEXTB), trunc_page_32(sectTEXTB), - round_page_32(sectTEXTB+sectSizeTEXT), VM_PROT_READ|VM_PROT_EXECUTE); + pmap_map(trunc_page(sectTEXTB), trunc_page(sectTEXTB), + round_page(sectTEXTB+sectSizeTEXT), VM_PROT_READ|VM_PROT_EXECUTE); - pmap_map(trunc_page_32(sectDATAB), trunc_page_32(sectDATAB), - round_page_32(sectDATAB+sectSizeDATA), VM_PROT_READ|VM_PROT_WRITE); + pmap_map(trunc_page(sectDATAB), trunc_page(sectDATAB), + round_page(sectDATAB+sectSizeDATA), VM_PROT_READ|VM_PROT_WRITE); /* The KLD and LINKEDIT segments are unloaded in toto after boot completes, * but via ml_static_mfree(), through IODTFreeLoaderInfo(). Hence, we have * to map both segments page-by-page. */ - for (addr = trunc_page_32(sectPRELINKB); - addr < round_page_32(sectPRELINKB+sectSizePRELINK); + for (addr = trunc_page(sectPRELINKB); + addr < round_page(sectPRELINKB+sectSizePRELINK); + addr += PAGE_SIZE) { + + pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), + VM_PROT_READ|VM_PROT_WRITE, + VM_WIMG_USE_DEFAULT, TRUE); + + } + + for (addr = trunc_page(sectKLDB); + addr < round_page(sectKLDB+sectSizeKLD); addr += PAGE_SIZE) { - pmap_enter(kernel_pmap, addr, addr>>12, + pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); } - for (addr = trunc_page_32(sectKLDB); - addr < round_page_32(sectKLDB+sectSizeKLD); + for (addr = trunc_page(sectLINKB); + addr < round_page(sectLINKB+sectSizeLINK); addr += PAGE_SIZE) { - pmap_enter(kernel_pmap, addr, addr>>12, + pmap_enter(kernel_pmap, (vm_map_offset_t)addr, + (ppnum_t)(addr>>12), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); } - for (addr = trunc_page_32(sectLINKB); - addr < round_page_32(sectLINKB+sectSizeLINK); + for (addr = trunc_page_32(sectHIBB); + addr < round_page_32(sectHIBB+sectSizeHIB); addr += PAGE_SIZE) { - pmap_enter(kernel_pmap, addr, addr>>12, + pmap_enter(kernel_pmap, (vm_map_offset_t)addr, (ppnum_t)(addr>>12), VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); } - pmap_enter(kernel_pmap, &sharedPage, (unsigned int)&sharedPage >> 12, /* Make sure the sharedPage is mapped */ + pmap_enter(kernel_pmap, (vm_map_offset_t)&sharedPage, + (ppnum_t)&sharedPage >> 12, /* Make sure the sharedPage is mapped */ VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); - pmap_enter(kernel_pmap, &lowGlo, (unsigned int)&lowGlo >> 12, /* Make sure the low memory globals are mapped */ + pmap_enter(kernel_pmap, (vm_map_offset_t)&lowGlo.lgVerCode, + (ppnum_t)&lowGlo.lgVerCode >> 12, /* Make sure the low memory globals are mapped */ VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); @@ -290,15 +301,23 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) * be released later, but not all. Ergo, no block mapping here */ - for(addr = trunc_page_32(end); addr < round_page_32(static_memory_end); addr += PAGE_SIZE) { + for(addr = trunc_page(end); addr < round_page(static_memory_end); addr += PAGE_SIZE) { - pmap_enter(kernel_pmap, addr, addr>>12, + pmap_enter(kernel_pmap, (vm_map_address_t)addr, (ppnum_t)addr>>12, VM_PROT_READ|VM_PROT_WRITE, VM_WIMG_USE_DEFAULT, TRUE); } + +/* + * Here we map a window into the kernel address space that will be used to + * access a slice of a user address space. Clients for this service include + * copyin/out and copypv. + */ - MapUserAddressSpaceInit(); /* Go initialize copy in/out */ + lowGlo.lgUMWvaddr = USER_MEM_WINDOW_VADDR; + /* Initialize user memory window base address */ + MapUserMemoryWindowInit(); /* Go initialize user memory window */ /* * At this point, there is enough mapped memory and all hw mapping structures are @@ -336,7 +355,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) kprintf("version = %s\n\n", version); __asm__ ("mfpvr %0" : "=r" (pvr)); kprintf("proc version = %08x\n", pvr); - if(per_proc_info[0].pf.Available & pf64Bit) { /* 64-bit processor? */ + if(getPerProc()->pf.Available & pf64Bit) { /* 64-bit processor? */ xhid0 = hid0get64(); /* Get the hid0 */ if(xhid0 & (1ULL << (63 - 19))) kprintf("Time base is externally clocked\n"); else kprintf("Time base is internally clocked\n"); @@ -380,27 +399,21 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) #if DEBUG kprintf("Mapped memory:\n"); - kprintf(" exception vector: %08X, %08X - %08X\n", trunc_page_32(exception_entry), - trunc_page_32(exception_entry), round_page_32(exception_end)); - kprintf(" sectTEXTB: %08X, %08X - %08X\n", trunc_page_32(sectTEXTB), - trunc_page_32(sectTEXTB), round_page_32(sectTEXTB+sectSizeTEXT)); - kprintf(" sectDATAB: %08X, %08X - %08X\n", trunc_page_32(sectDATAB), - trunc_page_32(sectDATAB), round_page_32(sectDATAB+sectSizeDATA)); - kprintf(" sectLINKB: %08X, %08X - %08X\n", trunc_page_32(sectLINKB), - trunc_page_32(sectLINKB), round_page_32(sectLINKB+sectSizeLINK)); - kprintf(" sectKLDB: %08X, %08X - %08X\n", trunc_page_32(sectKLDB), - trunc_page_32(sectKLDB), round_page_32(sectKLDB+sectSizeKLD)); - kprintf(" end: %08X, %08X - %08X\n", trunc_page_32(end), - trunc_page_32(end), static_memory_end); + kprintf(" exception vector: %08X, %08X - %08X\n", trunc_page(exception_entry), + trunc_page(exception_entry), round_page(exception_end)); + kprintf(" sectTEXTB: %08X, %08X - %08X\n", trunc_page(sectTEXTB), + trunc_page(sectTEXTB), round_page(sectTEXTB+sectSizeTEXT)); + kprintf(" sectDATAB: %08X, %08X - %08X\n", trunc_page(sectDATAB), + trunc_page(sectDATAB), round_page(sectDATAB+sectSizeDATA)); + kprintf(" sectLINKB: %08X, %08X - %08X\n", trunc_page(sectLINKB), + trunc_page(sectLINKB), round_page(sectLINKB+sectSizeLINK)); + kprintf(" sectKLDB: %08X, %08X - %08X\n", trunc_page(sectKLDB), + trunc_page(sectKLDB), round_page(sectKLDB+sectSizeKLD)); + kprintf(" end: %08X, %08X - %08X\n", trunc_page(end), + trunc_page(end), static_memory_end); #endif return; } -void ppc_vm_cpu_init( - struct per_proc_info *proc_info) -{ - hw_setup_trans(); /* Set up hardware needed for translation */ - hw_start_trans(); /* Start translating */ -} diff --git a/osfmk/ppc/proc_reg.h b/osfmk/ppc/proc_reg.h index ba85debe5..a7e9ebfa9 100644 --- a/osfmk/ppc/proc_reg.h +++ b/osfmk/ppc/proc_reg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -163,8 +163,8 @@ #define dsiAC 0x00400000 #define dsiSeg 0x00200000 #define dsiValid 0x5E600000 -#define dsiSpcNest 0x00010000 /* Special nest - software flag */ -#define dsiSpcNestb 15 /* Special nest - software flag */ +#define dsiLinkage 0x00010000 /* Linkage mapping type - software flag */ +#define dsiLinkageb 15 /* Linkage mapping type - software flag */ #define dsiSoftware 0x0000FFFF /* SRR1 bits on data/instruction translation exceptions */ diff --git a/osfmk/ppc/rtclock.c b/osfmk/ppc/rtclock.c index 9657b74f9..bd97881bd 100644 --- a/osfmk/ppc/rtclock.c +++ b/osfmk/ppc/rtclock.c @@ -40,13 +40,12 @@ #include -#include /* HZ */ #include #include #include #include -#include +#include #include @@ -76,8 +75,6 @@ struct clock_ops sysclk_ops = { int calend_config(void); -int calend_init(void); - kern_return_t calend_gettime( mach_timespec_t *cur_time); @@ -87,7 +84,7 @@ kern_return_t calend_getattr( mach_msg_type_number_t *count); struct clock_ops calend_ops = { - calend_config, calend_init, + calend_config, 0, calend_gettime, 0, calend_getattr, 0, 0, @@ -105,11 +102,27 @@ static struct rtclock_calend { int32_t adjdelta; } rtclock_calend; -static boolean_t rtclock_initialized; +static uint32_t rtclock_boottime; + +#define TIME_ADD(rsecs, secs, rfrac, frac, unit) \ +MACRO_BEGIN \ + if (((rfrac) += (frac)) >= (unit)) { \ + (rfrac) -= (unit); \ + (rsecs) += 1; \ + } \ + (rsecs) += (secs); \ +MACRO_END -static uint64_t rtclock_tick_deadline[NCPUS]; +#define TIME_SUB(rsecs, secs, rfrac, frac, unit) \ +MACRO_BEGIN \ + if ((int32_t)((rfrac) -= (frac)) < 0) { \ + (rfrac) += (unit); \ + (rsecs) -= 1; \ + } \ + (rsecs) -= (secs); \ +MACRO_END -#define NSEC_PER_HZ (NSEC_PER_SEC / HZ) +#define NSEC_PER_HZ (NSEC_PER_SEC / 100) static uint32_t rtclock_tick_interval; static uint32_t rtclock_sec_divisor; @@ -118,21 +131,14 @@ static mach_timebase_info_data_t rtclock_timebase_const; static boolean_t rtclock_timebase_initialized; -static struct rtclock_timer { - uint64_t deadline; - uint32_t - /*boolean_t*/ is_set:1, - has_expired:1, - :0; -} rtclock_timer[NCPUS]; - static clock_timer_func_t rtclock_timer_expire; static timer_call_data_t rtclock_alarm_timer; -static void timespec_to_absolutetime( - mach_timespec_t *ts, - uint64_t *result); +static void nanotime_to_absolutetime( + uint32_t secs, + uint32_t nanosecs, + uint64_t *result); static int deadline_to_decrementer( uint64_t deadline, @@ -197,7 +203,7 @@ timebase_callback( } else { UNLOCK_RTC(s); - printf("rtclock timebase_callback: late old %d / %d new %d / %d", + printf("rtclock timebase_callback: late old %d / %d new %d / %d\n", rtclock_timebase_const.numer, rtclock_timebase_const.denom, numer, denom); return; @@ -213,12 +219,9 @@ timebase_callback( int sysclk_config(void) { - if (cpu_number() != master_cpu) - return(1); - timer_call_setup(&rtclock_alarm_timer, rtclock_alarm_expire, NULL); - simple_lock_init(&rtclock_lock, ETAP_MISC_RT_CLOCK); + simple_lock_init(&rtclock_lock, 0); PE_register_timebase_callback(timebase_callback); @@ -231,31 +234,28 @@ sysclk_config(void) int sysclk_init(void) { - uint64_t abstime; - int decr, mycpu = cpu_number(); + uint64_t abstime, nexttick; + int decr1, decr2; + struct rtclock_timer *mytimer; + struct per_proc_info *pp; - if (mycpu != master_cpu) { - if (rtclock_initialized == FALSE) { - panic("sysclk_init on cpu %d, rtc not initialized\n", mycpu); - } - /* Set decrementer and hence our next tick due */ - abstime = mach_absolute_time(); - rtclock_tick_deadline[mycpu] = abstime; - rtclock_tick_deadline[mycpu] += rtclock_tick_interval; - decr = deadline_to_decrementer(rtclock_tick_deadline[mycpu], abstime); - treqs(decr); + decr1 = decr2 = DECREMENTER_MAX; - return(1); - } + pp = getPerProc(); + mytimer = &pp->rtclock_timer; - /* Set decrementer and our next tick due */ abstime = mach_absolute_time(); - rtclock_tick_deadline[mycpu] = abstime; - rtclock_tick_deadline[mycpu] += rtclock_tick_interval; - decr = deadline_to_decrementer(rtclock_tick_deadline[mycpu], abstime); - treqs(decr); + nexttick = abstime + rtclock_tick_interval; + pp->rtclock_tick_deadline = nexttick; + decr1 = deadline_to_decrementer(nexttick, abstime); + + if (mytimer->is_set) + decr2 = deadline_to_decrementer(mytimer->deadline, abstime); + + if (decr1 > decr2) + decr1 = decr2; - rtclock_initialized = TRUE; + treqs(decr1); return (1); } @@ -348,7 +348,7 @@ sysclk_setalarm( { uint64_t abstime; - timespec_to_absolutetime(deadline, &abstime); + nanotime_to_absolutetime(deadline->tv_sec, deadline->tv_nsec, &abstime); timer_call_enter(&rtclock_alarm_timer, abstime); } @@ -361,18 +361,6 @@ calend_config(void) return (1); } -/* - * Initialize the calendar clock. - */ -int -calend_init(void) -{ - if (cpu_number() != master_cpu) - return(1); - - return (1); -} - /* * Get the current clock time. */ @@ -446,12 +434,7 @@ clock_get_calendar_microtime( now -= (t64 * divisor); *microsecs = (now * USEC_PER_SEC) / divisor; - if ((*microsecs += microepoch) >= USEC_PER_SEC) { - *microsecs -= USEC_PER_SEC; - epoch += 1; - } - - *secs += epoch; + TIME_ADD(*secs, epoch, *microsecs, microepoch, USEC_PER_SEC); } else { uint32_t delta, t32; @@ -469,12 +452,7 @@ clock_get_calendar_microtime( t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; if (t32 > delta) - *microsecs += (t32 - delta); - - if (*microsecs >= USEC_PER_SEC) { - *microsecs -= USEC_PER_SEC; - *secs += 1; - } + TIME_ADD(*secs, 0, *microsecs, (t32 - delta), USEC_PER_SEC); } simple_unlock(&rtclock_lock); @@ -512,12 +490,7 @@ clock_gettimeofday( t64 = now - (secs_64 * rtclock_sec_divisor); microsecs = usec_64 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - if ((microsecs += microepoch) >= USEC_PER_SEC) { - microsecs -= USEC_PER_SEC; - epoch += 1; - } - - secs += epoch; + TIME_ADD(secs, epoch, microsecs, microepoch, USEC_PER_SEC); /* adjust "now" to be absolute time at _start_ of usecond */ now -= t64 - ((usec_64 * rtclock_sec_divisor) / USEC_PER_SEC); @@ -540,12 +513,7 @@ clock_gettimeofday( t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; if (t32 > delta) - microsecs += (t32 - delta); - - if (microsecs >= USEC_PER_SEC) { - microsecs -= USEC_PER_SEC; - secs += 1; - } + TIME_ADD(secs, 0, microsecs, (t32 - delta), USEC_PER_SEC); } /* no need to disable timestamp, it is already off */ @@ -583,12 +551,7 @@ clock_get_calendar_nanotime( now -= (t64 * divisor); *nanosecs = ((now * USEC_PER_SEC) / divisor) * NSEC_PER_USEC; - if ((*nanosecs += nanoepoch) >= NSEC_PER_SEC) { - *nanosecs -= NSEC_PER_SEC; - epoch += 1; - } - - *secs += epoch; + TIME_ADD(*secs, epoch, *nanosecs, nanoepoch, NSEC_PER_SEC); } else { uint32_t delta, t32; @@ -606,12 +569,7 @@ clock_get_calendar_nanotime( t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; if (t32 > delta) - *nanosecs += ((t32 - delta) * NSEC_PER_USEC); - - if (*nanosecs >= NSEC_PER_SEC) { - *nanosecs -= NSEC_PER_SEC; - *secs += 1; - } + TIME_ADD(*secs, 0, *nanosecs, ((t32 - delta) * NSEC_PER_USEC), NSEC_PER_SEC); } simple_unlock(&rtclock_lock); @@ -632,25 +590,47 @@ clock_set_calendar_microtime( newsecs = (microsecs < 500*USEC_PER_SEC)? secs: secs + 1; - LOCK_RTC(s); + s = splclock(); + simple_lock(&rtclock_lock); + commpage_set_timestamp(0,0,0,0); + /* + * Calculate the new calendar epoch based on + * the new value and the system clock. + */ clock_get_system_microtime(&sys, µsys); - if ((int32_t)(microsecs -= microsys) < 0) { - microsecs += USEC_PER_SEC; - secs -= 1; - } + TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); - secs -= sys; + /* + * Adjust the boottime based on the delta. + */ + rtclock_boottime += secs - rtclock_calend.epoch; + /* + * Set the new calendar epoch. + */ rtclock_calend.epoch = secs; rtclock_calend.microepoch = microsecs; + + /* + * Cancel any adjustment in progress. + */ rtclock_calend.epoch1 = 0; rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; - UNLOCK_RTC(s); + simple_unlock(&rtclock_lock); + + /* + * Set the new value for the platform clock. + */ PESetGMTTimeOfDay(newsecs); + splx(s); + + /* + * Send host notifications. + */ host_notify_calendar_change(); } @@ -693,12 +673,7 @@ clock_set_calendar_adjtime( t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; if (t32 > delta) - microsys += (t32 - delta); - - if (microsys >= USEC_PER_SEC) { - microsys -= USEC_PER_SEC; - sys += 1; - } + TIME_ADD(sys, 0, microsys, (t32 - delta), USEC_PER_SEC); rtclock_calend.epoch = sys; rtclock_calend.microepoch = microsys; @@ -707,12 +682,7 @@ clock_set_calendar_adjtime( now -= (t64 * rtclock_sec_divisor); microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - if ((int32_t)(rtclock_calend.microepoch -= microsys) < 0) { - rtclock_calend.microepoch += USEC_PER_SEC; - sys += 1; - } - - rtclock_calend.epoch -= sys; + TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); } if (total != 0) { @@ -742,12 +712,7 @@ clock_set_calendar_adjtime( now -= (t64 * rtclock_sec_divisor); microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - if ((rtclock_calend.microepoch += microsys) >= USEC_PER_SEC) { - rtclock_calend.microepoch -= USEC_PER_SEC; - sys += 1; - } - - rtclock_calend.epoch += sys; + TIME_ADD(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); } rtclock_calend.adjtotal = total; @@ -775,7 +740,7 @@ clock_set_calendar_adjtime( uint32_t clock_adjust_calendar(void) { - uint32_t micronew, interval = 0; + uint32_t interval = 0; int32_t delta; spl_t s; @@ -785,13 +750,7 @@ clock_adjust_calendar(void) delta = rtclock_calend.adjdelta; if (delta > 0) { - micronew = rtclock_calend.microepoch + delta; - if (micronew >= USEC_PER_SEC) { - micronew -= USEC_PER_SEC; - rtclock_calend.epoch += 1; - } - - rtclock_calend.microepoch = micronew; + TIME_ADD(rtclock_calend.epoch, 0, rtclock_calend.microepoch, delta, USEC_PER_SEC); rtclock_calend.adjtotal -= delta; if (delta > rtclock_calend.adjtotal) @@ -813,13 +772,7 @@ clock_adjust_calendar(void) t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; - micronew = rtclock_calend.microepoch + t32 + delta; - if (micronew >= USEC_PER_SEC) { - micronew -= USEC_PER_SEC; - rtclock_calend.epoch += 1; - } - - rtclock_calend.microepoch = micronew; + TIME_ADD(rtclock_calend.epoch, 0, rtclock_calend.microepoch, (t32 + delta), USEC_PER_SEC); rtclock_calend.adjtotal -= delta; if (delta < rtclock_calend.adjtotal) @@ -832,12 +785,7 @@ clock_adjust_calendar(void) now -= (t64 * rtclock_sec_divisor); microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; - if ((int32_t)(rtclock_calend.microepoch -= microsys) < 0) { - rtclock_calend.microepoch += USEC_PER_SEC; - sys += 1; - } - - rtclock_calend.epoch -= sys; + TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); rtclock_calend.epoch1 = 0; } @@ -851,6 +799,13 @@ clock_adjust_calendar(void) return (interval); } +/* + * clock_initialize_calendar: + * + * Set the calendar and related clocks + * from the platform clock at boot or + * wake event. + */ void clock_initialize_calendar(void) { @@ -861,23 +816,51 @@ clock_initialize_calendar(void) LOCK_RTC(s); commpage_set_timestamp(0,0,0,0); - clock_get_system_microtime(&sys, µsys); - if ((int32_t)(microsecs -= microsys) < 0) { - microsecs += USEC_PER_SEC; - secs -= 1; + if ((int32_t)secs >= (int32_t)rtclock_boottime) { + /* + * Initialize the boot time based on the platform clock. + */ + if (rtclock_boottime == 0) + rtclock_boottime = secs; + + /* + * Calculate the new calendar epoch based + * on the platform clock and the system + * clock. + */ + clock_get_system_microtime(&sys, µsys); + TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); + + /* + * Set the new calendar epoch. + */ + rtclock_calend.epoch = secs; + rtclock_calend.microepoch = microsecs; + + /* + * Cancel any adjustment in progress. + */ + rtclock_calend.epoch1 = 0; + rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; } - secs -= sys; - - rtclock_calend.epoch = secs; - rtclock_calend.microepoch = microsecs; - rtclock_calend.epoch1 = 0; - rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; UNLOCK_RTC(s); + /* + * Send host notifications. + */ host_notify_calendar_change(); } +void +clock_get_boottime_nanotime( + uint32_t *secs, + uint32_t *nanosecs) +{ + *secs = rtclock_boottime; + *nanosecs = 0; +} + void clock_timebase_info( mach_timebase_info_t info) @@ -895,18 +878,19 @@ clock_set_timer_deadline( uint64_t deadline) { uint64_t abstime; - int decr, mycpu; + int decr; struct rtclock_timer *mytimer; + struct per_proc_info *pp; spl_t s; s = splclock(); - mycpu = cpu_number(); - mytimer = &rtclock_timer[mycpu]; + pp = getPerProc(); + mytimer = &pp->rtclock_timer; mytimer->deadline = deadline; mytimer->is_set = TRUE; if (!mytimer->has_expired) { abstime = mach_absolute_time(); - if ( mytimer->deadline < rtclock_tick_deadline[mycpu] ) { + if ( mytimer->deadline < pp->rtclock_tick_deadline ) { decr = deadline_to_decrementer(mytimer->deadline, abstime); if ( rtclock_decrementer_min != 0 && rtclock_decrementer_min < (natural_t)decr ) @@ -933,46 +917,39 @@ clock_set_timer_func( UNLOCK_RTC(s); } -/* - * Reset the clock device. This causes the realtime clock - * device to reload its mode and count value (frequency). - */ void -rtclock_reset(void) -{ - return; -} +rtclock_intr( + int device, + struct savearea *ssp, + spl_t old); /* * Real-time clock device interrupt. */ void rtclock_intr( - int device, + __unused int device, struct savearea *ssp, - spl_t old_spl) + __unused spl_t old_spl) { uint64_t abstime; - int decr1, decr2, mycpu = cpu_number(); - struct rtclock_timer *mytimer = &rtclock_timer[mycpu]; - - /* - * We may receive interrupts too early, we must reject them. - */ - if (rtclock_initialized == FALSE) { - treqs(DECREMENTER_MAX); /* Max the decrementer if not init */ - return; - } + int decr1, decr2; + struct rtclock_timer *mytimer; + struct per_proc_info *pp; decr1 = decr2 = DECREMENTER_MAX; + pp = getPerProc(); + abstime = mach_absolute_time(); - if ( rtclock_tick_deadline[mycpu] <= abstime ) { + if ( pp->rtclock_tick_deadline <= abstime ) { clock_deadline_for_periodic_event(rtclock_tick_interval, abstime, - &rtclock_tick_deadline[mycpu]); + &pp->rtclock_tick_deadline); hertz_tick(USER_MODE(ssp->save_srr1), ssp->save_srr0); } + mytimer = &pp->rtclock_timer; + abstime = mach_absolute_time(); if ( mytimer->is_set && mytimer->deadline <= abstime ) { @@ -982,7 +959,7 @@ rtclock_intr( } abstime = mach_absolute_time(); - decr1 = deadline_to_decrementer(rtclock_tick_deadline[mycpu], abstime); + decr1 = deadline_to_decrementer(pp->rtclock_tick_deadline, abstime); if (mytimer->is_set) decr2 = deadline_to_decrementer(mytimer->deadline, abstime); @@ -1002,8 +979,8 @@ rtclock_intr( static void rtclock_alarm_expire( - timer_call_param_t p0, - timer_call_param_t p1) + __unused void *p0, + __unused void *p1) { mach_timespec_t timestamp; @@ -1029,14 +1006,29 @@ deadline_to_decrementer( } static void -timespec_to_absolutetime( - mach_timespec_t *ts, +nanotime_to_absolutetime( + uint32_t secs, + uint32_t nanosecs, uint64_t *result) { + uint32_t divisor = rtclock_sec_divisor; + + *result = ((uint64_t)secs * divisor) + + ((uint64_t)nanosecs * divisor) / NSEC_PER_SEC; +} + +void +absolutetime_to_microtime( + uint64_t abstime, + uint32_t *secs, + uint32_t *microsecs) +{ + uint64_t t64; uint32_t divisor; - *result = ((uint64_t)ts->tv_sec * (divisor = rtclock_sec_divisor)) + - ((uint64_t)ts->tv_nsec * divisor) / NSEC_PER_SEC; + *secs = t64 = abstime / (divisor = rtclock_sec_divisor); + abstime -= (t64 * divisor); + *microsecs = (abstime * USEC_PER_SEC) / divisor; } void @@ -1107,25 +1099,8 @@ nanoseconds_to_absolutetime( *result += (nanosecs * divisor) / NSEC_PER_SEC; } -/* - * Spin-loop delay primitives. - */ void -delay_for_interval( - uint32_t interval, - uint32_t scale_factor) -{ - uint64_t now, end; - - clock_interval_to_deadline(interval, scale_factor, &end); - - do { - now = mach_absolute_time(); - } while (now < end); -} - -void -clock_delay_until( +machine_delay_until( uint64_t deadline) { uint64_t now; @@ -1135,13 +1110,6 @@ clock_delay_until( } while (now < deadline); } -void -delay( - int usec) -{ - delay_for_interval((usec < 0)? -usec: usec, NSEC_PER_USEC); -} - /* * Request a decrementer pop * diff --git a/osfmk/ppc/savearea.c b/osfmk/ppc/savearea.c index bf9b4e80b..fcda2ab4c 100644 --- a/osfmk/ppc/savearea.c +++ b/osfmk/ppc/savearea.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -24,7 +24,6 @@ * */ -#include #include #include #include @@ -204,7 +203,7 @@ void savearea_init(vm_offset_t addr) { /* * This will populate the local list and get the first one for the system */ - per_proc_info[0].next_savearea = (vm_offset_t)save_get(); + getPerProc()->next_savearea = (vm_offset_t)save_get(); /* * The system is now able to take interruptions @@ -309,7 +308,7 @@ void save_adjust(void) { /* * Fake up information to make the saveareas look like a zone */ - +void save_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, int *collectable, int *exhaustable) { diff --git a/osfmk/ppc/savearea.h b/osfmk/ppc/savearea.h index 0cec372ef..640b063fc 100644 --- a/osfmk/ppc/savearea.h +++ b/osfmk/ppc/savearea.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -19,6 +19,8 @@ * * @APPLE_LICENSE_HEADER_END@ */ +#ifdef XNU_KERNEL_PRIVATE + #ifndef _PPC_SAVEAREA_H_ #define _PPC_SAVEAREA_H_ @@ -57,7 +59,7 @@ typedef struct savearea_comm { /* 0x20 */ unsigned int save_time[2]; /* Context save time - for debugging or performance */ - struct thread_activation *save_act; /* Associated activation */ + struct thread *save_act; /* Associated thread */ unsigned int save_02c; uint64_t sac_vrswap; /* XOR mask to swap V to R or vice versa */ unsigned int save_flags; /* Various flags */ @@ -96,7 +98,6 @@ typedef struct savearea { uint64_t save_xdat1; /* Exception data 1 */ uint64_t save_xdat2; /* Exception data 2 */ uint64_t save_xdat3; /* Exception data 3 */ - /* offset 0x0080 */ uint64_t save_r0; uint64_t save_r1; @@ -309,14 +310,15 @@ struct Saveanchor { volatile unsigned int savefreecnt; /* 020 Number of saveareas on global free list */ volatile int saveadjust; /* 024 If 0 number of saveareas is ok, otherwise # to change (pos means grow, neg means shrink */ volatile int saveinuse; /* 028 Number of areas in use counting those on the local free list */ - volatile int savetarget; /* 02C Number of savearea's needed */ + unsigned int savetarget; /* 02C Number of saveareas needed */ int savemaxcount; /* 030 Maximum saveareas ever allocated */ - unsigned int saveRSVD034[3]; /* 034 reserved */ + unsigned int saveinusesnapshot; /* 034 snapshot inuse count */ + volatile addr64_t savefreesnapshot; /* 038 snapshot global free list header */ /* 040 */ - }; #pragma pack() +extern struct Saveanchor saveanchor; /* Aliged savearea anchor */ #define sac_cnt (4096 / sizeof(savearea)) /* Number of saveareas per page */ #define sac_empty (0xFFFFFFFF << (32 - sac_cnt)) /* Mask with all entries empty */ @@ -327,11 +329,11 @@ struct Saveanchor { #define LocalSaveMin (LocalSaveTarget / 2) /* Min size of local savearea free list before we grow */ #define LocalSaveMax (LocalSaveTarget * 2) /* Max size of local savearea free list before we trim */ -#define FreeListMin (2 * LocalSaveTarget * NCPUS) /* Always make sure there are enough to fill local list twice per processor */ -#define SaveLowHysteresis LocalSaveTarget /* The number off from target before we adjust upwards */ -#define SaveHighHysteresis FreeListMin /* The number off from target before we adjust downwards */ +#define FreeListMin (2 * LocalSaveTarget) /* Always make sure there are enough to fill local list twice per processor */ +#define SaveLowHysteresis LocalSaveTarget /* The number off from target before we adjust upwards */ +#define SaveHighHysteresis (2 * FreeListMin) /* The number off from target before we adjust downwards */ #define InitialSaveAreas (2 * FreeListMin) /* The number of saveareas to make at boot time */ -#define InitialSaveTarget FreeListMin /* The number of saveareas for an initial target. This should be the minimum ever needed. */ +#define InitialSaveTarget FreeListMin /* The number of saveareas for an initial target. This should be the minimum ever needed. */ #define InitialSaveBloks (InitialSaveAreas + sac_cnt - 1) / sac_cnt /* The number of savearea blocks to allocate at boot */ #define BackPocketSaveBloks 8 /* Number of pages of back pocket saveareas */ @@ -350,6 +352,17 @@ struct savearea_comm *save_trim_free(void); /* Remove free pages from savearea p int save_recover(void); /* returns nonzero if we can recover enough from the free pool */ void savearea_init(vm_offset_t addr); /* Boot-time savearea initialization */ +void save_fake_zone_info( /* report savearea usage statistics as fake zone info */ + int *count, + vm_size_t *cur_size, + vm_size_t *max_size, + vm_size_t *elem_size, + vm_size_t *alloc_size, + int *collectable, + int *exhaustable); + +void save_snapshot(void); +void save_snapshot_restore(void); #endif /* MACH_KERNEL_PRIVATE */ #endif /* __APPLE_API_PRIVATE */ @@ -375,3 +388,5 @@ void savearea_init(vm_offset_t addr); /* Boot-time savearea initialization */ #endif /* _PPC_SAVEAREA_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/ppc/savearea_asm.s b/osfmk/ppc/savearea_asm.s index ba6c147ee..ee8ed0f11 100644 --- a/osfmk/ppc/savearea_asm.s +++ b/osfmk/ppc/savearea_asm.s @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -49,7 +48,170 @@ * there are parallel paths for 32- and 64-bit machines. */ - + +/* + * ***************************** + * * s a v e _ s n a p s h o t * + * ***************************** + * + * void save_snapshot(); + * + * Link the current free list & processor local list on an independent list. + */ + .align 5 + .globl EXT(save_snapshot) + +LEXT(save_snapshot) + mflr r9 ; get return address + bl saveSetup ; turn translation off, 64-bit on, load many regs + bf-- pf64Bitb,save_snapshot32 ; skip if 32-bit processor + + ; Handle 64-bit processor. + +save_snapshot64: + + ld r8,next_savearea(r10) ; Start with the current savearea + std r8,SVsavefreesnapshot(0) ; Make it the restore list anchor + ld r5,SVfree(0) ; Get free save area list anchor + +save_snapshot64nextfree: + mr r7,r5 + std r7,savemisc1(r8) ; Link this one + ld r5,SAVprev(r7) ; Get the next + mr r8,r7 + mr. r0,r5 + bne save_snapshot64nextfree + + lwz r6,SVinuse(0) ; Get inuse count + ld r5,lclfree(r10) ; Get the local savearea list + subi r6,r6,1 ; Count the first as free + +save_snapshot64nextlocalfree: + subi r6,r6,1 ; Count as free + mr r7,r5 + std r7,savemisc1(r8) ; Link this one + ld r5,SAVprev(r7) ; Get the next + mr r8,r7 + mr. r0,r5 + bne save_snapshot64nextlocalfree + + std r5,savemisc1(r8) ; End the list + stw r6,SVsaveinusesnapshot(0) ; Save the new number of inuse saveareas + + mtlr r9 ; Restore the return + b saveRestore64 ; Restore interrupts and translation + + ; Handle 32-bit processor. + +save_snapshot32: + lwz r8,next_savearea+4(r10) ; Start with the current savearea + stw r8,SVsavefreesnapshot+4(0) ; Make it the restore list anchor + lwz r5,SVfree+4(0) ; Get free save area list anchor + +save_snapshot32nextfree: + mr r7,r5 + stw r7,savemisc1+4(r8) ; Link this one + lwz r5,SAVprev+4(r7) ; Get the next + mr r8,r7 + mr. r0,r5 + bne save_snapshot32nextfree + + lwz r6,SVinuse(0) ; Get inuse count + lwz r5,lclfree+4(r10) ; Get the local savearea list + subi r6,r6,1 ; Count the first as free + +save_snapshot32nextlocalfree: + subi r6,r6,1 ; Count as free + mr r7,r5 + stw r7,savemisc1+4(r8) ; Link this one + lwz r5,SAVprev+4(r7) ; Get the next + mr r8,r7 + mr. r0,r5 + bne save_snapshot32nextlocalfree + + stw r5,savemisc1+4(r8) ; End the list + stw r6,SVsaveinusesnapshot(0) ; Save the new number of inuse saveareas + + mtlr r9 ; Restore the return + b saveRestore32 ; Restore interrupts and translation + +/* + * ********************************************* + * * s a v e _ s n a p s h o t _ r e s t o r e * + * ********************************************* + * + * void save_snapshot_restore(); + * + * Restore the free list from the snapshot list, and reset the processors next savearea. + */ + .align 5 + .globl EXT(save_snapshot_restore) + +LEXT(save_snapshot_restore) + mflr r9 ; get return address + bl saveSetup ; turn translation off, 64-bit on, load many regs + bf-- pf64Bitb,save_snapshot_restore32 ; skip if 32-bit processor + + ; Handle 64-bit processor. + +save_snapshot_restore64: + lwz r7,SVsaveinusesnapshot(0) + stw r7,SVinuse(0) ; Set the new inuse count + + li r6,0 + stw r6,lclfreecnt(r10) ; None local now + std r6,lclfree(r10) ; None local now + + ld r8,SVsavefreesnapshot(0) ; Get the restore list anchor + std r8,SVfree(0) ; Make it the free list anchor + li r5,SAVempty ; Get marker for free savearea + +save_snapshot_restore64nextfree: + addi r6,r6,1 ; Count as free + stb r5,SAVflags+2(r8) ; Mark savearea free + ld r7,savemisc1(r8) ; Get the next + std r7,SAVprev(r8) ; Set the next in free list + mr. r8,r7 + bne save_snapshot_restore64nextfree + + stw r6,SVfreecnt(0) ; Set the new free count + + bl saveGet64 + std r3,next_savearea(r10) ; Get the next savearea + + mtlr r9 ; Restore the return + b saveRestore64 ; Restore interrupts and translation + + ; Handle 32-bit processor. + +save_snapshot_restore32: + lwz r7,SVsaveinusesnapshot(0) + stw r7,SVinuse(0) ; Set the new inuse count + + li r6,0 + stw r6,lclfreecnt(r10) ; None local now + stw r6,lclfree+4(r10) ; None local now + + lwz r8,SVsavefreesnapshot+4(0) ; Get the restore list anchor + stw r8,SVfree+4(0) ; Make it the free list anchor + li r5,SAVempty ; Get marker for free savearea + +save_snapshot_restore32nextfree: + addi r6,r6,1 ; Count as free + stb r5,SAVflags+2(r8) ; Mark savearea free + lwz r7,savemisc1+4(r8) ; Get the next + stw r7,SAVprev+4(r8) ; Set the next in free list + mr. r8,r7 + bne save_snapshot_restore32nextfree + + stw r6,SVfreecnt(0) ; Set the new free count + + bl saveGet32 + stw r3,next_savearea+4(r10) ; Get the next savearea + + mtlr r9 ; Restore the return + b saveRestore32 ; Restore interrupts and translation + /* * *********************** * * s a v e _ q u e u e * diff --git a/osfmk/ppc/serial_io.c b/osfmk/ppc/serial_io.c index cccb35bfb..848c8c25b 100644 --- a/osfmk/ppc/serial_io.c +++ b/osfmk/ppc/serial_io.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -190,7 +191,7 @@ decl_simple_lock_data(,scc_stomp) /* (TEST/DEBUG) */ */ void -initialize_serial( caddr_t scc_phys_base ) +initialize_serial( caddr_t scc_phys_base, int32_t serial_baud ) { int i, chan, bits; scc_regmap_t regs; @@ -211,6 +212,8 @@ initialize_serial( caddr_t scc_phys_base ) simple_lock_init(&scc_stomp, FALSE); /* (TEST/DEBUG) */ + if (serial_baud == -1) serial_baud = DEFAULT_SPEED; + scc_softc[0].full_modem = TRUE; scc_std[0] = scc_phys_base; @@ -229,7 +232,7 @@ initialize_serial( caddr_t scc_phys_base ) /* Call probe so we are ready very early for remote gdb and for serial console output if appropriate. */ - if (scc_probe()) { + if (scc_probe(serial_baud)) { for (i = 0; i < NSCC_LINE; i++) { scc_softc[0].softr[i].wr5 = SCC_WR5_DTR | SCC_WR5_RTS; scc_param(scc_tty_for(i)); @@ -248,7 +251,7 @@ initialize_serial( caddr_t scc_phys_base ) } int -scc_probe(void) +scc_probe(int32_t serial_baud) { scc_softc_t scc; register int val, i; @@ -290,8 +293,8 @@ scc_probe(void) tp->t_ispeed = DEFAULT_PORT0_SPEED; tp->t_ospeed = DEFAULT_PORT0_SPEED; } else { - tp->t_ispeed = DEFAULT_SPEED; - tp->t_ospeed = DEFAULT_SPEED; + tp->t_ispeed = serial_baud; + tp->t_ospeed = serial_baud; } tp->t_flags = DEFAULT_FLAGS; scc->softr[i].speed = -1; @@ -648,21 +651,22 @@ scc_param(struct scc_tty *tp) void serial_keyboard_init(void) { + kern_return_t result; + thread_t thread; if(!(serialmode & 2)) return; /* Leave if we do not want a serial console */ kprintf("Serial keyboard started\n"); - kernel_thread_with_priority(serial_keyboard_start, MAXPRI_STANDARD); - return; + result = kernel_thread_start_priority((thread_continue_t)serial_keyboard_start, NULL, MAXPRI_KERNEL, &thread); + if (result != KERN_SUCCESS) + panic("serial_keyboard_init"); + + thread_deallocate(thread); } void serial_keyboard_start(void) { - thread_t cthread; - - cthread = current_thread(); /* Just who the heck are we anyway? */ - stack_privilege(cthread); /* Make sure we don't lose our stack */ serial_keyboard_poll(); /* Go see if there are any characters pending now */ panic("serial_keyboard_start: we can't get back here\n"); } @@ -682,9 +686,8 @@ serial_keyboard_poll(void) clock_interval_to_deadline(16, 1000000, &next); /* Get time of pop */ - assert_wait((event_t)serial_keyboard_poll, THREAD_INTERRUPTIBLE); /* Show we are "waiting" */ - thread_set_timer_deadline(next); /* Set the next time to check */ - thread_block(serial_keyboard_poll); /* Wait for it */ + assert_wait_deadline((event_t)serial_keyboard_poll, THREAD_UNINT, next); /* Show we are "waiting" */ + thread_block((thread_continue_t)serial_keyboard_poll); /* Wait for it */ panic("serial_keyboard_poll: Shouldn't never ever get here...\n"); } diff --git a/osfmk/ppc/serial_io.h b/osfmk/ppc/serial_io.h index 3676080b1..0b3e4756b 100644 --- a/osfmk/ppc/serial_io.h +++ b/osfmk/ppc/serial_io.h @@ -62,9 +62,9 @@ typedef struct scc_tty *scc_tty_t; * cnputc, cngetc, cnmaygetc */ -void initialize_serial(caddr_t scc_phys_base); +void initialize_serial(caddr_t scc_phys_base, int32_t serial_baud); -extern int scc_probe(void); +extern int scc_probe(int32_t serial_baud); #if 0 extern int scc_open( diff --git a/osfmk/ppc/simple_lock.h b/osfmk/ppc/simple_lock.h new file mode 100644 index 000000000..b7478bcb5 --- /dev/null +++ b/osfmk/ppc/simple_lock.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifdef KERNEL_PRIVATE + +#ifndef _PPC_SIMPLE_LOCK_TYPES_H_ +#define _PPC_SIMPLE_LOCK_TYPES_H_ + +#ifdef KERNEL_PRIVATE +#include +#include + +#include +#ifdef MACH_KERNEL_PRIVATE +#include +#include +#include +#endif + +#ifdef MACH_KERNEL_PRIVATE + +#if MACH_LDEBUG +#define USLOCK_DEBUG 1 +#else +#define USLOCK_DEBUG 0 +#endif + +#if !USLOCK_DEBUG + +typedef lck_spin_t usimple_lock_data_t, *usimple_lock_t; + +#else + +typedef struct uslock_debug { + void *lock_pc; /* pc where lock operation began */ + void *lock_thread; /* thread that acquired lock */ + unsigned long duration[2]; + unsigned short state; + unsigned char lock_cpu; + void *unlock_thread; /* last thread to release lock */ + unsigned char unlock_cpu; + void *unlock_pc; /* pc where lock operation ended */ +} uslock_debug; + +typedef struct { + hw_lock_data_t interlock; /* must be first... see lock.c */ + unsigned short lock_type; /* must be second... see lock.c */ +#define USLOCK_TAG 0x5353 + uslock_debug debug; +} usimple_lock_data_t, *usimple_lock_t; + +#endif /* USLOCK_DEBUG */ + +#else + +typedef struct slock { + unsigned int lock_data[10]; +} usimple_lock_data_t, *usimple_lock_t; + +#endif /* MACH_KERNEL_PRIVATE */ + +#define USIMPLE_LOCK_NULL ((usimple_lock_t) 0) + +#if !defined(decl_simple_lock_data) + +typedef usimple_lock_data_t *simple_lock_t; +typedef usimple_lock_data_t simple_lock_data_t; + +#define decl_simple_lock_data(class,name) \ + class simple_lock_data_t name; + +#endif /* !defined(decl_simple_lock_data) */ + +#ifdef MACH_KERNEL_PRIVATE +#if !MACH_LDEBUG + +#define MACHINE_SIMPLE_LOCK + +extern void ppc_usimple_lock_init(simple_lock_t,unsigned short); +extern void ppc_usimple_lock(simple_lock_t); +extern void ppc_usimple_unlock_rwmb(simple_lock_t); +extern void ppc_usimple_unlock_rwcmb(simple_lock_t); +extern unsigned int ppc_usimple_lock_try(simple_lock_t); + +#define simple_lock_init(l,t) ppc_usimple_lock_init(l,t) +#define simple_lock(l) ppc_usimple_lock(l) +#define simple_unlock(l) ppc_usimple_unlock_rwcmb(l) +#define simple_unlock_rwmb(l) ppc_usimple_unlock_rwmb(l) +#define simple_lock_try(l) ppc_usimple_lock_try(l) +#define simple_lock_addr(l) (&(l)) +#define thread_sleep_simple_lock(l, e, i) \ + thread_sleep_fast_usimple_lock((l), (e), (i)) +#endif /* !MACH_LDEBUG */ + +extern unsigned int hw_lock_bit( + unsigned int *, + unsigned int, + unsigned int); + +extern unsigned int hw_cpu_sync( + unsigned int *, + unsigned int); + +extern unsigned int hw_cpu_wcng( + unsigned int *, + unsigned int, + unsigned int); + +extern unsigned int hw_lock_mbits( + unsigned int *, + unsigned int, + unsigned int, + unsigned int, + unsigned int); + +void hw_unlock_bit( + unsigned int *, + unsigned int); + +#endif /* MACH_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ + +#endif /* !_PPC_SIMPLE_LOCK_TYPES_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/skiplists.s b/osfmk/ppc/skiplists.s index 13789e67b..5acc66143 100644 --- a/osfmk/ppc/skiplists.s +++ b/osfmk/ppc/skiplists.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -156,13 +156,19 @@ mapSrch64d: lwz r0,mpFlags(r9) ; get flag bits from prev mapping ld r10,mpVAddr(r9) ; re-fetch base address of prev ptr ld r4,mpList0(r9) ; get 64-bit ptr to next mapping, if any - andi. r0,r0,mpBlock+mpNest ; block mapping or nested pmap? lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping + + rlwinm r0,r0,0,mpType ; isolate mapping type code + cmplwi cr1,r0,mpBlock ; cr1_eq <- block type? + cmplwi r0,mpNest ; cr0_eq <- nested type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested type? + cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + rldicr r10,r10,0,51 ; zero low 12 bits of mapping va - beq mapSrch64Exit ; prev mapping was just a scalar page, search failed - cmpwi r0,mpBlock ; block mapping or nested pmap? + bne mapSrch64Exit ; prev mapping was just a scalar page, search failed sldi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq mapSrch64f ; we guessed right, it was a block mapping + beq cr1,mapSrch64f ; we guessed right, it was a block mapping addi r11,r11,1 ; mpBSize is 1 too low sldi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments subi r0,r11,4096 ; get address of last page in submap @@ -245,14 +251,20 @@ mapSrch32d: beq- mapSrch32Exit ; prev ptr was null, search failed lwz r0,mpFlags(r9) ; get flag bits from prev mapping lwz r10,mpVAddr+4(r9) ; re-fetch base address of prev ptr - andi. r0,r0,mpBlock+mpNest ; block mapping or nested pmap? lwz r4,mpList0+4(r9) ; get ptr to next mapping, if any - beq mapSrch32Exit ; prev mapping was just a scalar page, search failed + + rlwinm r0,r0,0,mpType ; isolate mapping type code + cmplwi cr1,r0,mpBlock ; cr1_eq <- block type? + cmplwi r0,mpNest ; cr0_eq <- nested type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested type? + cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + + bne mapSrch32Exit ; prev mapping was just a scalar page, search failed lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping - cmpwi r0,mpBlock ; block mapping or nested pmap? rlwinm r10,r10,0,0,19 ; zero low 12 bits of block mapping va slwi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq mapSrch32f ; we guessed right, it was a block mapping + beq cr1,mapSrch32f ; we guessed right, it was a block mapping addi r11,r11,1 ; mpBSize is 1 too low slwi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments subi r0,r11,4096 ; get address of last page in submap @@ -374,14 +386,22 @@ mapSrchFull64a: ; loop over each mapping ld r4,mpVAddr(r3) ; get va for this mapping (plus flags in low 12 bits) addi r2,r2,1 ; count mappings visited lwz r0,mpFlags(r3) ; get mapping flag bits + cmpld cr0,r10,r4 ; make sure VAs come in strictly ascending order rldicr r4,r4,0,51 ; zero low 12 bits of mapping va cmpld cr1,r5,r4 ; compare the vas bge-- cr0,mapSkipListPanic ; die if keys are out of order - andi. r0,r0,mpBlock+mpNest ; is it a scalar mapping? (ie, of a single page) + + rlwinm r0,r0,0,mpType ; isolate mapping type code + cmplwi r0,mpNest ; cr0_eq <- nested type? + cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- nested type or linkage type? + cmplwi cr5,r0,mpBlock ; cr5_eq <- block type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + blt cr1,mapSrchFull64d ; key is less, try next list beq cr1,mapSrchFull64Found ; this is the correct mapping - bne-- cr0,mapSrchFull64e ; handle block mapping or nested pmap + beq-- cr0,mapSrchFull64e ; handle block mapping or nested pmap mapSrchFull64b: la r8,mpList0(r3) ; point to skip list vector in this mapping mr r9,r3 ; current becomes previous @@ -408,9 +428,8 @@ mapSrchFull64d: mapSrchFull64e: lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping (if nonscalar) - cmpwi r0,mpBlock ; distinguish between block mapping and nested pmaps sldi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq mapSrchFull64f ; we guessed right, it was a block mapping + beq cr5,mapSrchFull64f ; we guessed right, it was a block mapping addi r11,r11,1 ; mpBSize is 1 too low sldi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments subi r0,r11,4096 ; get address of last page in submap @@ -456,14 +475,22 @@ mapSrchFull32a: ; loop over each mapping lwz r4,mpVAddr+4(r3) ; get va for this mapping (plus flags in low 12 bits) addi r2,r2,1 ; count mappings visited lwz r0,mpFlags(r3) ; get mapping flag bits + cmplw cr0,r10,r4 ; make sure VAs come in strictly ascending order rlwinm r4,r4,0,0,19 ; zero low 12 bits of mapping va cmplw cr1,r5,r4 ; compare the vas bge- cr0,mapSkipListPanic ; die if keys are out of order - andi. r0,r0,mpBlock+mpNest ; is it a scalar mapping? (ie, of a single page) + + rlwinm r0,r0,0,mpType ; isolate mapping type code + cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? + cmplwi r0,mpNest ; cr0_eq <- nested type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- linkage type or nested type? + cmplwi cr5,r0,mpBlock ; cr5_eq <- block type? + cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + blt cr1,mapSrchFull32d ; key is less than this va, try next list beq- cr1,mapSrchFull32Found ; this is the correct mapping - bne- cr0,mapSrchFull32e ; handle block mapping or nested pmap + beq- cr0,mapSrchFull32e ; handle block mapping or nested pmap mapSrchFull32b: la r8,mpList0+4(r3) ; point to skip list vector in this mapping mr r9,r3 ; current becomes previous @@ -490,9 +517,8 @@ mapSrchFull32d: mapSrchFull32e: lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping (if nonscalar) - cmpwi r0,mpBlock ; distinguish between block mapping and nested pmaps slwi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq mapSrchFull32f ; we guessed right, it was a block mapping + beq cr5,mapSrchFull32f ; we guessed right, it was a block mapping addi r11,r11,1 ; mpBSize is 1 too low slwi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments subi r0,r11,4096 ; get address of last page in submap @@ -1063,13 +1089,18 @@ mapVer64a: ; Do some additional checks (so we only do them once per mapping.) ; First, if a block mapping or nested pmap, compute block end. - andi. r29,r29,mpBlock+mpNest ; is it block mapping or nested pmap? + rlwinm r29,r29,0,mpType ; isolate mapping type code + cmplwi r29,mpNest ; cr0_eq <- nested type? + cmplwi cr1,r29,mpLinkage ; cr1_eq <- linkage type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- linkage type or nested type? + cmplwi cr1,r29,mpBlock ; cr1_eq <- block type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + subi r21,r21,1 ; count mappings in this pmap - beq++ mapVer64b ; not nested or pmap + bne++ mapVer64b ; not nested or pmap lhz r27,mpBSize(r26) ; get #pages or #segments - cmpwi r29,mpBlock ; which one is it? sldi r29,r27,12 ; assume block mapping, units are (pages-1) - beq mapVer64b ; guessed correctly + beq cr1,mapVer64b ; guessed correctly addi r27,r27,1 ; units of nested pmap are (#segs-1) sldi r29,r27,28 ; convert to #bytes subi r29,r29,4096 ; get offset to last byte in nested pmap @@ -1190,12 +1221,17 @@ mapVer32a: ; Then, if a block mapping or nested pmap, compute block end. - andi. r29,r29,mpBlock+mpNest ; is it block mapping or nested pmap? - beq+ mapVer32b ; no + rlwinm r29,r29,0,mpType ; isolate mapping type code + cmplwi cr1,r29,mpLinkage ; cr1_eq <- linkage type? + cmplwi r29,mpNest ; cr0_eq <- nested type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- linkage type or nested type? + cmplwi cr1,r29,mpBlock ; cr1_eq <- block type? + cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + + bne+ mapVer32b ; not block or nested type lhz r27,mpBSize(r26) ; get #pages or #segments - cmpwi r29,mpBlock ; which one is it? slwi r29,r27,12 ; assume block mapping, units are pages - beq mapVer32b ; guessed correctly + beq cr1,mapVer32b ; guessed correctly addi r27,r27,1 ; units of nested pmap are (#segs-1) slwi r29,r27,28 ; convert to #bytes subi r29,r29,4096 ; get offset to last byte in nested pmap diff --git a/osfmk/ppc/start.s b/osfmk/ppc/start.s index 583baf34d..c5d46ed5c 100644 --- a/osfmk/ppc/start.s +++ b/osfmk/ppc/start.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,7 +25,6 @@ #define __APPLE_API_PRIVATE -#include #include #include #include @@ -35,6 +34,10 @@ #include #include #include + + +; Definitions of the processor type table format, which drives this code. +; The table ("processor_types") is assembled in at the end of this file. #define ptFilter 0 #define ptVersion 4 @@ -53,6 +56,9 @@ #define ptMaxPAddr 52 #define ptSize 56 + +; We use cr2 for flags: + #define bootCPU 10 #define firstInit 9 #define firstBoot 8 @@ -86,7 +92,12 @@ ENTRY(resetPOR,TAG_NO_FRAME_USED) ENTRY(_start_cpu,TAG_NO_FRAME_USED) crclr bootCPU ; Set non-boot processor crclr firstInit ; Set not first time init - mr r30,r3 ; Set current per_proc + lwz r30,ppe_paddr(r3) ; Set current per_proc + lwz r28,ppe_paddr+4(r3) ; Set current per_proc + rlwinm r30,r30,0,1,0 ; Copy low 32 bits to top 32 + rlwimi r30,r28,0,0,31 ; Insert low part of 64-bit address in bottom 32 bits + subi r29,r3,(ACT_PER_PROC-ppe_vaddr) ; Substract mact.PerProc offset + mr r3,r30 ; Set current per_proc ; ; Note that we are just trying to get close. The real TB sync will take @@ -108,8 +119,13 @@ ENTRY(_start,TAG_NO_FRAME_USED) startJoin: mflr r2 ; Save the return address - lis r30,hi16(EXT(per_proc_info)) ; Set current per_proc - ori r30,r30,lo16(EXT(per_proc_info)) ; Set current per_proc + lis r28,hi16(EXT(PerProcTable)) ; Set PerProcTable + lis r30,hi16(EXT(BootProcInfo)) ; Set current per_proc + ori r28,r28,lo16(EXT(PerProcTable)) ; Set PerProcTable + ori r30,r30,lo16(EXT(BootProcInfo)) ; Set current per_proc + stw r30,ppe_paddr+4(r28) ; Set per_proc_entry + stw r30,ppe_vaddr(r28) ; Set per_proc_entry + subi r29,r28,(ACT_PER_PROC-ppe_vaddr) ; Substract mact.PerProc offset crset bootCPU ; Set boot processor lwz r17,pfAvailable(r30) ; Get the available bits @@ -130,10 +146,10 @@ allstart: crand firstBoot,bootCPU,firstInit ; Indicate if we are on the initial first processor startup - mtsprg 0,r30 ; Set the per_proc + mtsprg 0,r30 ; Set per_proc paddr + mtsprg 1,r29 ; Set spr1 li r9,0 ; Clear out a register - mtsprg 1,r9 ; Clear the SPRGs mtsprg 2,r9 mtsprg 3,r9 @@ -192,18 +208,23 @@ donePVR: lwz r20,ptInitRout(r26) ; Grab the special init routine stw r13,pfMaxVAddr(r30) ; Save it lwz r13,ptMaxPAddr(r26) ; Get max physical address stw r13,pfMaxPAddr(r30) ; Save it + + +; Go through the patch table, changing performance sensitive kernel routines based on the +; processor type or other things. + lis r11,hi16(EXT(patch_table)) ori r11,r11,lo16(EXT(patch_table)) lwz r19,ptPatch(r26) ; Get ptPatch field - li r12,PATCH_TABLE_SIZE - mtctr r12 patch_loop: lwz r16,patchType(r11) ; Load the patch type lwz r15,patchValue(r11) ; Load the patch value cmplwi cr1,r16,PATCH_FEATURE ; Is it a patch feature entry + cmplwi cr7,r16,PATCH_END_OF_TABLE ; end of table? and. r14,r15,r19 ; Is it set in the patch feature crandc cr0_eq,cr1_eq,cr0_eq ; Do we have a match - beq patch_apply ; Yes, patch memory + beq cr7,doOurInit ; end of table, Go do processor specific initialization + beq patch_apply ; proc feature matches, so patch memory cmplwi cr1,r16,PATCH_PROCESSOR ; Is it a patch processor entry cmplw cr0,r15,r18 ; Check matching processor crand cr0_eq,cr1_eq,cr0_eq ; Do we have a match @@ -219,8 +240,10 @@ patch_apply: sync ; Hang out some more... patch_skip: addi r11,r11,peSize ; Point to the next patch entry - bdnz patch_loop ; Loop if in the range - b doOurInit ; Go do processor specific initialization... + b patch_loop ; handle next + + +; Additional processors join here after skipping above code. notFirst: lwz r17,pfAvailable(r30) ; Get our features @@ -231,33 +254,7 @@ doOurInit: mr. r20,r20 ; See if initialization routine ori r17,r17,lo16(pfValid) ; Set the valid bit stw r17,pfAvailable(r30) ; Set the available features - bf firstBoot,nofeatcpy ; Skip feature propagate if not first time boot... - - li r2,NCPUS ; Get number of CPUs - lis r23,hi16(EXT(per_proc_info)) ; Set base per_proc - ori r23,r23,lo16(EXT(per_proc_info)) ; Set base per_proc - addi r6,r23,ppSize ; Point to the next one - -cpyFCpu: addic. r2,r2,-1 ; Count down - la r8,pfAvailable(r23) ; Point to features of boot processor - la r7,pfAvailable(r6) ; Point to features of our processor - li r9,pfSize/4 ; Get size of a features area - ble-- nofeatcpy ; Copied all we need - -cpyFeat: subi r9,r9,1 ; Count word - lwz r0,0(r8) ; Get boot cpu features - stw r0,0(r7) ; Copy to ours - mr. r9,r9 ; Finished? - addi r7,r7,4 ; Next out - addi r8,r8,4 ; Next in - bgt cpyFeat ; Copy all boot cpu features to us... - - lwz r17,pfAvailable(r6) ; Get our newly initialized features - addi r6,r6,ppSize ; Point to the next one - b cpyFCpu ; Do the next per_proc... - - -nofeatcpy: rlwinm. r0,r17,0,pf64Bitb,pf64Bitb ; Is this a 64-bit machine? + rlwinm. r0,r17,0,pf64Bitb,pf64Bitb ; Is this a 64-bit machine? mtsprg 2,r17 ; Remember the feature flags bne++ start64 ; Skip following if 64-bit... @@ -297,9 +294,9 @@ nofeatcpy: rlwinm. r0,r17,0,pf64Bitb,pf64Bitb ; Is this a 64-bit machine? start64: lis r5,hi16(startcommon) ; Get top of address of continue point mfspr r6,hid0 ; Get the HID0 ori r5,r5,lo16(startcommon) ; Get low of address of continue point - lis r9,hi16(MASK(MSR_HV)) ; ? + lis r9,hi16(MASK(MSR_HV)|MASK(MSR_SF)) ; ? lis r20,hi16(dozem|napm|sleepm) ; Get mask of power saving features - li r7,MSR_VM_OFF ; Get real mode MSR, 64-bit off + li r7,MSR_VM_OFF ; Get real mode MSR sldi r9,r9,32 ; Slide into position sldi r20,r20,32 ; Slide power stuff into position or r9,r9,r7 ; Form initial MSR @@ -422,29 +419,29 @@ noFloat: rlwinm. r0,r17,0,pfAltivecb,pfAltivecb ; See if there is Altivec mtmsr r0 isync -noVector: rlwinm. r0,r17,0,pfSMPcapb,pfSMPcapb ; See if we can do SMP - beq- noSMP ; Nope... - - lhz r13,PP_CPU_NUMBER(r30) ; Get the CPU number - mtspr pir,r13 ; Set the PIR - -noSMP: - +noVector: bl EXT(cacheInit) ; Initializes all caches (including the TLB) + bt bootCPU,run32 + + mfsprg r30,0 ; Phys per proc + bl EXT(hw_setup_trans) ; Set up hardware needed for translation + bl EXT(hw_start_trans) ; Start translating + +run32: rlwinm. r0,r17,0,pf64Bitb,pf64Bitb ; Is this a 64-bit machine? beq++ isnot64 ; Skip following if not 64-bit... mfmsr r29 ; Get the MSR - rlwinm r29,r29,0,0,31 ; Make sure that 64-bit mode is off + rldicl r29,r29,0,MSR_SF_BIT+1 ; turn 64-bit mode off mtmsrd r29 ; Set it isync ; Make sure isnot64: bf bootCPU,callcpu - lis r29,HIGH_ADDR(EXT(intstack_top_ss)) ; move onto interrupt stack - ori r29,r29,LOW_ADDR(EXT(intstack_top_ss)) - lwz r29,0(r29) + lis r29,HIGH_ADDR(EXT(intstack)) ; move onto interrupt stack + ori r29,r29,LOW_ADDR(EXT(intstack)) + addi r29,r29,INTSTACK_SIZE-FM_SIZE li r28,0 stw r28,FM_BACKPTR(r29) ; store a null frame backpointer @@ -458,16 +455,17 @@ isnot64: bf bootCPU,callcpu BREAKPOINT_TRAP callcpu: + mfsprg r31,1 ; Fake activation pointer + lwz r31,ACT_PER_PROC(r31) ; Load per_proc lwz r29,PP_INTSTACK_TOP_SS(r31) ; move onto interrupt stack li r28,0 stw r28,FM_BACKPTR(r29) ; store a null frame backpointer - mr r1,r29 ; move onto new stack mr r3,r31 ; Restore any arguments we may have trashed -; Note that we exit from here with translation still off +; Note that we exit from here with translation on bl EXT(ppc_init_cpu) ; Jump into cpu init code BREAKPOINT_TRAP ; Should never return @@ -763,12 +761,25 @@ init7450done: b init745X ; Continue with standard init -init970: - li r20,0 ; Clear this - mtspr hior,r20 ; Make sure that 0 is interrupt prefix +init970: + lis r20,8 ; Set up for 512K L2 +init970x: + li r0,0 ; Clear this + mtspr hior,r0 ; Make sure that 0 is interrupt prefix bf firstBoot,init970nb ; No init for wakeup or second processor.... +; +; We can not query or change the L2 size. We will just +; phoney up a L2CR to make sysctl "happy" and set the +; L2 size to 512K. +; + + lis r0,0x8000 ; Synthesize a "valid" but non-existant L2CR + stw r0,pfl2crOriginal(r30) ; Set a dummy L2CR + stw r0,pfl2cr(r30) ; Set a dummy L2CR + stw r20,pfl2Size(r30) ; Set the L2 size + mfspr r11,hid0 ; Get original hid0 std r11,pfHID0(r30) ; Save original mfspr r11,hid1 ; Get original hid1 @@ -792,17 +803,9 @@ init970: mfspr r11,hid0 ; Get it isync -; -; We can not query or change the L2 size. We will just -; phoney up a L2CR to make sysctl "happy" and set the -; L2 size to 512K. -; - - lis r0,0x8000 ; Synthesize a "valid" but non-existant L2CR - stw r0,pfl2crOriginal(r30) ; Set a dummy L2CR - stw r0,pfl2cr(r30) ; Set a dummy L2CR - lis r0,8 ; Get 512K - stw r0,pfl2Size(r30) ; Set the L2 size + lis r0,(pcfValid|pcfLarge|pcfDedSeg)<<8 ; Set the valid bit, dedicated segment, and large page flags + ori r0,r0,(24<<8)|24 ; Add in the 16M page size + stw r0,lgpPcfg+(pcfSize*pcfLargePcfg)(0) ; Set the 16M primary large page configuration entry blr @@ -868,7 +871,8 @@ inin970ki: icbi 0,r11 ; Kill I$ isync blr ; Leave... - + + ; Unsupported Processors initUnsupported: @@ -1243,6 +1247,7 @@ processor_types: .long 65 .long 42 + ; All other processors are not supported .align 2 diff --git a/osfmk/ppc/status.c b/osfmk/ppc/status.c index 3fef44cc1..a0545f6fe 100644 --- a/osfmk/ppc/status.c +++ b/osfmk/ppc/status.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -25,24 +25,25 @@ */ #include -#include #include #include #include +#include #include #include #include -#include +#include #include +//#include +typedef unsigned int fixpt_t; /* XXX not self contained */ +#include /* USRSTACK, etc. */ + #include extern unsigned int killprint; extern double FloatInit; extern unsigned long QNaNbarbarian[4]; -extern void thread_bootstrap_return(void); -extern struct Saveanchor saveanchor; -extern int real_ncpus; /* Number of actual CPUs */ #define USRSTACK 0xc0000000 @@ -52,7 +53,7 @@ thread_userstack( int, thread_state_t, unsigned int, - vm_offset_t *, + mach_vm_offset_t *, int * ); @@ -62,20 +63,22 @@ thread_entrypoint( int, thread_state_t, unsigned int, - vm_offset_t * + mach_vm_offset_t * ); unsigned int get_msr_exportmask(void); unsigned int get_msr_nbits(void); unsigned int get_msr_rbits(void); void ppc_checkthreadstate(void *, int); -void thread_set_child(thread_act_t child, int pid); -void thread_set_parent(thread_act_t parent, int pid); +void thread_set_child(thread_t child, int pid); +void thread_set_parent(thread_t parent, int pid); +void save_release(struct savearea *save); /* * Maps state flavor to number of words in the state: */ -unsigned int state_count[] = { +__private_extern__ +unsigned int _MachineStateCount[] = { /* FLAVOR_LIST */ 0, PPC_THREAD_STATE_COUNT, PPC_FLOAT_STATE_COUNT, @@ -93,10 +96,10 @@ unsigned int state_count[] = { kern_return_t machine_thread_get_state( - thread_act_t thr_act, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t *count) + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count) { register struct savearea *sv; /* Pointer to the context savearea */ @@ -113,7 +116,7 @@ machine_thread_get_state( register struct ppc_float_state *fs; register struct ppc_vector_state *vs; - genuser = find_user_regs(thr_act); /* Find the current user general context for this activation */ + genuser = find_user_regs(thread); switch (flavor) { @@ -267,6 +270,8 @@ machine_thread_get_state( xts->ctr = ((unsigned long long *)&FloatInit)[0]; xts->srr0 = ((unsigned long long *)&FloatInit)[0]; xts->srr1 = MSR_EXPORT_MASK_SET; + if(task_has_64BitAddr(thread->task)) + xts->srr1 |= (uint64_t)MASK32(MSR_SF) << 32; /* If 64-bit task, force 64-bit mode */ xts->vrsave = 0; /* VRSAVE register (Altivec only) */ } @@ -325,11 +330,11 @@ machine_thread_get_state( return KERN_INVALID_ARGUMENT; } - fpu_save(thr_act->mact.curctx); /* Just in case it's live, save it */ + fpu_save(thread->machine.curctx); /* Just in case it's live, save it */ fs = (struct ppc_float_state *) tstate; /* Point to destination */ - fsv = find_user_fpu(thr_act); /* Get the user's fpu savearea */ + fsv = find_user_fpu(thread); /* Get the user's fpu savearea */ if(fsv) { /* See if we have any */ bcopy((char *)&fsv->save_fp0, (char *)fs, 32*8); /* 32 registers */ @@ -356,11 +361,11 @@ machine_thread_get_state( return KERN_INVALID_ARGUMENT; } - vec_save(thr_act->mact.curctx); /* Just in case it's live, save it */ + vec_save(thread->machine.curctx); /* Just in case it's live, save it */ vs = (struct ppc_vector_state *) tstate; /* Point to destination */ - vsv = find_user_vec(thr_act); /* Find the vector savearea */ + vsv = find_user_vec(thread); /* Find the vector savearea */ if(vsv) { /* See if we have any */ @@ -417,23 +422,22 @@ machine_thread_get_state( kern_return_t machine_thread_get_kern_state( - thread_act_t thr_act, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t *count) + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count) { register struct savearea *sv; /* Pointer to the context savearea */ savearea *genkern; - int i, j; - unsigned int vrvalidwrk; + int i; register struct ppc_thread_state *ts; register struct ppc_thread_state64 *xts; register struct ppc_exception_state *es; register struct ppc_exception_state64 *xes; - genkern = find_kern_regs(thr_act); + genkern = find_kern_regs(thread); switch (flavor) { @@ -652,17 +656,17 @@ machine_thread_get_kern_state( */ kern_return_t machine_thread_set_state( - thread_act_t thr_act, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t count) + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t count) { - savearea *sv, *genuser; + savearea *genuser; savearea_fpu *fsv, *fsvn, *fsvo; savearea_vec *vsv, *vsvn, *vsvo; unsigned int i; - int clgn; + unsigned int clgn; register struct ppc_thread_state *ts; register struct ppc_thread_state64 *xts; register struct ppc_exception_state *es; @@ -670,7 +674,7 @@ machine_thread_set_state( register struct ppc_float_state *fs; register struct ppc_vector_state *vs; -// dbgTrace((unsigned int)thr_act, (unsigned int)sv, flavor); /* (TEST/DEBUG) */ +// dbgTrace((unsigned int)thr_act, (unsigned int)0 /*sv: was never set*/, flavor); /* (TEST/DEBUG) */ clgn = count; /* Get the count */ @@ -724,7 +728,7 @@ machine_thread_set_state( return KERN_INVALID_ARGUMENT; } - genuser = get_user_regs(thr_act); /* Find or allocate and initialize one */ + genuser = get_user_regs(thread); /* Find or allocate and initialize one */ switch (flavor) { @@ -777,6 +781,11 @@ machine_thread_set_state( genuser->save_srr1 |= MSR_EXPORT_MASK_SET; genuser->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC)); /* Make sure we don't enable the floating point unit */ + + if(task_has_64BitAddr(thread->task)) + genuser->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32; /* If 64-bit task, force 64-bit mode */ + else + genuser->save_srr1 &= ~((uint64_t)MASK32(MSR_SF) << 32); /* else 32-bit mode */ return KERN_SUCCESS; @@ -830,6 +839,11 @@ machine_thread_set_state( genuser->save_srr1 |= MSR_EXPORT_MASK_SET; genuser->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC)); /* Make sure we don't enable the floating point unit */ + + if(task_has_64BitAddr(thread->task)) + genuser->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32; /* If 64-bit task, force 64-bit mode */ + else + genuser->save_srr1 &= ~((uint64_t)MASK32(MSR_SF) << 32); /* else 32-bit mode */ return KERN_SUCCESS; @@ -860,21 +874,21 @@ machine_thread_set_state( case PPC_FLOAT_STATE: - toss_live_fpu(thr_act->mact.curctx); /* Toss my floating point if live anywhere */ + toss_live_fpu(thread->machine.curctx); /* Toss my floating point if live anywhere */ - fsv = find_user_fpu(thr_act); /* Get the user's floating point context */ + fsv = find_user_fpu(thread); /* Get the user's floating point context */ if(!fsv) { /* Do we have one yet? */ fsv = (savearea_fpu *)save_alloc(); /* If we still don't have one, get a new one */ fsv->save_hdr.save_flags = (fsv->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft); /* Mark as in use as float */ - fsv->save_hdr.save_act = (struct thread_activation *)thr_act; /* Point to the activation */ + fsv->save_hdr.save_act = thread; fsv->save_hdr.save_prev = 0; /* Mark no more */ fsv->save_hdr.save_level = 0; /* Mark user state */ - if(!thr_act->mact.curctx->FPUsave) thr_act->mact.curctx->FPUsave = fsv; /* If no floating point, chain us first */ + if(!thread->machine.curctx->FPUsave) thread->machine.curctx->FPUsave = fsv; /* If no floating point, chain us first */ else { - fsvn = fsvo = thr_act->mact.curctx->FPUsave; /* Remember first one */ + fsvn = fsvo = thread->machine.curctx->FPUsave; /* Remember first one */ while (fsvn) { /* Go until we hit the end */ fsvo = fsvn; /* Remember the previous one */ @@ -898,21 +912,21 @@ machine_thread_set_state( case PPC_VECTOR_STATE: - toss_live_vec(thr_act->mact.curctx); /* Toss my vector if live anywhere */ + toss_live_vec(thread->machine.curctx); /* Toss my vector if live anywhere */ - vsv = find_user_vec(thr_act); /* Get the user's vector context */ + vsv = find_user_vec(thread); /* Get the user's vector context */ if(!vsv) { /* Do we have one yet? */ vsv = (savearea_vec *)save_alloc(); /* If we still don't have one, get a new one */ vsv->save_hdr.save_flags = (vsv->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft); /* Mark as in use as vector */ - vsv->save_hdr.save_act = (struct thread_activation *)thr_act; /* Point to the activation */ + vsv->save_hdr.save_act = thread; vsv->save_hdr.save_prev = 0; /* Mark no more */ vsv->save_hdr.save_level = 0; /* Mark user state */ - if(!thr_act->mact.curctx->VMXsave) thr_act->mact.curctx->VMXsave = vsv; /* If no vector, chain us first */ + if(!thread->machine.curctx->VMXsave) thread->machine.curctx->VMXsave = vsv; /* If no vector, chain us first */ else { - vsvn = vsvo = thr_act->mact.curctx->VMXsave; /* Remember first one */ + vsvn = vsvo = thread->machine.curctx->VMXsave; /* Remember first one */ while (vsvn) { /* Go until we hit the end */ vsvo = vsvn; /* Remember the previous one */ @@ -940,6 +954,33 @@ machine_thread_set_state( } } + +/* + * This is where registers that are not normally specified by the mach-o + * file on an execve should be nullified, perhaps to avoid a covert channel. + * We've never bothered to clear FPRs or VRs, but it is important to clear + * the FPSCR, which is kept in the general state but not set by the general + * flavor (ie, PPC_THREAD_STATE or PPC_THREAD_STATE64.) + */ +kern_return_t +machine_thread_state_initialize( + thread_t thread) +{ + savearea *sv; + + sv = get_user_regs(thread); /* Find or allocate and initialize one */ + + sv->save_fpscr = 0; /* Clear all floating point exceptions */ + sv->save_vrsave = 0; /* Set the vector save state */ + sv->save_vscr[0] = 0x00000000; + sv->save_vscr[1] = 0x00000000; + sv->save_vscr[2] = 0x00000000; + sv->save_vscr[3] = 0x00010000; /* Disable java mode and clear saturated */ + + return KERN_SUCCESS; +} + + /* * Duplicates the context of one thread into a new one. * The new thread is assumed to be new and have no user state contexts except maybe a general one. @@ -949,15 +990,17 @@ machine_thread_set_state( * eliminate any floating point or vector kernel contexts and carry across the user state ones. */ -kern_return_t machine_thread_dup(thread_act_t self, thread_act_t target) { - +kern_return_t +machine_thread_dup( + thread_t self, + thread_t target) +{ savearea *sv, *osv; savearea_fpu *fsv, *fsvn; savearea_vec *vsv, *vsvn; - unsigned int spc, i, *srs; - fpu_save(self->mact.curctx); /* Make certain floating point state is all saved */ - vec_save(self->mact.curctx); /* Make certain the vector state is all saved */ + fpu_save(self->machine.curctx); /* Make certain floating point state is all saved */ + vec_save(self->machine.curctx); /* Make certain the vector state is all saved */ sv = get_user_regs(target); /* Allocate and initialze context in the new activation */ @@ -969,20 +1012,20 @@ kern_return_t machine_thread_dup(thread_act_t self, thread_act_t target) { (char *)((unsigned int)sv + sizeof(savearea_comm)), sizeof(struct savearea) - sizeof(savearea_comm)); - sv->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC)); /* Make certain that floating point and vector are turned off */ + sv->save_srr1 &= (uint64_t)(~(MASK(MSR_FP) | MASK(MSR_VEC))); /* Make certain that floating point and vector are turned off */ fsv = find_user_fpu(self); /* Get any user floating point */ - target->mact.curctx->FPUsave = 0; /* Assume no floating point */ + target->machine.curctx->FPUsave = 0; /* Assume no floating point */ if(fsv) { /* Did we find one? */ fsvn = (savearea_fpu *)save_alloc(); /* If we still don't have one, get a new one */ fsvn->save_hdr.save_flags = (fsvn->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft); /* Mark as in use as float */ - fsvn->save_hdr.save_act = (struct thread_activation *)target; /* Point to the activation */ + fsvn->save_hdr.save_act = target; fsvn->save_hdr.save_prev = 0; /* Mark no more */ fsvn->save_hdr.save_level = 0; /* Mark user state */ - target->mact.curctx->FPUsave = fsvn; /* Chain in the floating point */ + target->machine.curctx->FPUsave = fsvn; /* Chain in the floating point */ bcopy((char *)((unsigned int)fsv + sizeof(savearea_comm)), /* Copy everything but the headers */ (char *)((unsigned int)fsvn + sizeof(savearea_comm)), @@ -991,16 +1034,16 @@ kern_return_t machine_thread_dup(thread_act_t self, thread_act_t target) { vsv = find_user_vec(self); /* Get any user vector */ - target->mact.curctx->VMXsave = 0; /* Assume no vector */ + target->machine.curctx->VMXsave = 0; /* Assume no vector */ if(vsv) { /* Did we find one? */ vsvn = (savearea_vec *)save_alloc(); /* If we still don't have one, get a new one */ vsvn->save_hdr.save_flags = (vsvn->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft); /* Mark as in use as float */ - vsvn->save_hdr.save_act = (struct thread_activation *)target; /* Point to the activation */ + vsvn->save_hdr.save_act = target; vsvn->save_hdr.save_prev = 0; /* Mark no more */ vsvn->save_hdr.save_level = 0; /* Mark user state */ - target->mact.curctx->VMXsave = vsvn; /* Chain in the floating point */ + target->machine.curctx->VMXsave = vsvn; /* Chain in the floating point */ bcopy((char *)((unsigned int)vsv + sizeof(savearea_comm)), /* Copy everything but the headers */ (char *)((unsigned int)vsvn + sizeof(savearea_comm)), @@ -1017,15 +1060,17 @@ kern_return_t machine_thread_dup(thread_act_t self, thread_act_t target) { * We only set initial values if there was no context found. */ -savearea *get_user_regs(thread_act_t act) { - +savearea * +get_user_regs( + thread_t thread) +{ savearea *sv, *osv; - unsigned int spc, i, *srs; + unsigned int i; - if (act->mact.upcb) - return act->mact.upcb; + if (thread->machine.upcb) + return thread->machine.upcb; - sv = act->mact.pcb; /* Get the top savearea on the stack */ + sv = thread->machine.pcb; /* Get the top savearea on the stack */ osv = 0; /* Set no user savearea yet */ while(sv) { /* Find the user context */ @@ -1035,7 +1080,7 @@ savearea *get_user_regs(thread_act_t act) { sv = save_alloc(); /* Get one */ sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft); /* Mark as in use as general */ - sv->save_hdr.save_act = (struct thread_activation *)act; /* Point to the activation */ + sv->save_hdr.save_act = thread; sv->save_hdr.save_prev = 0; /* Mark no more */ sv->save_hdr.save_level = 0; /* Mark user state */ @@ -1043,9 +1088,9 @@ savearea *get_user_regs(thread_act_t act) { osv->save_hdr.save_prev = (addr64_t)((uintptr_t)sv); /* Chain us on the end */ } else { /* We are the first */ - act->mact.pcb = sv; /* Put it there */ + thread->machine.pcb = sv; /* Put it there */ } - act->mact.upcb = sv; /* Set user pcb */ + thread->machine.upcb = sv; /* Set user pcb */ for(i=0; i < 32; i+=2) { /* Fill up with defaults */ ((unsigned int *)&sv->save_r0)[i] = ((unsigned int *)&FloatInit)[0]; @@ -1057,6 +1102,8 @@ savearea *get_user_regs(thread_act_t act) { sv->save_ctr = (uint64_t)FloatInit; sv->save_srr0 = (uint64_t)FloatInit; sv->save_srr1 = (uint64_t)MSR_EXPORT_MASK_SET; + if(task_has_64BitAddr(thread->task)) + sv->save_srr1 |= (uint64_t)MASK32(MSR_SF) << 32; /* If 64-bit task, force 64-bit mode */ sv->save_fpscr = 0; /* Clear all floating point exceptions */ @@ -1074,15 +1121,21 @@ savearea *get_user_regs(thread_act_t act) { * we just return a 0. */ -savearea *find_user_regs(thread_act_t act) { - return act->mact.upcb; +savearea * +find_user_regs( + thread_t thread) +{ + return thread->machine.upcb; } /* The name of this call is something of a misnomer since the mact.pcb can * contain chained saveareas, but it will do for now.. */ -savearea *find_kern_regs(thread_act_t act) { - return act->mact.pcb; +savearea * +find_kern_regs( + thread_t thread) +{ + return thread->machine.pcb; } /* @@ -1090,13 +1143,15 @@ savearea *find_kern_regs(thread_act_t act) { * we just return a 0. */ -savearea_fpu *find_user_fpu(thread_act_t act) { - +savearea_fpu * +find_user_fpu( + thread_t thread) +{ savearea_fpu *fsv; - boolean_t intr; + boolean_t intr; - intr = ml_set_interrupts_enabled(FALSE); - fsv = act->mact.curctx->FPUsave; /* Get the start of the floating point chain */ + intr = ml_set_interrupts_enabled(FALSE); + fsv = thread->machine.curctx->FPUsave; /* Get the start of the floating point chain */ while(fsv) { /* Look until the end or we find it */ if(!(fsv->save_hdr.save_level)) break; /* Is the the user state stuff? (the level is 0 if so) */ @@ -1112,13 +1167,15 @@ savearea_fpu *find_user_fpu(thread_act_t act) { * we just return a 0. */ -savearea_vec *find_user_vec(thread_act_t act) { - +savearea_vec * +find_user_vec( + thread_t thread) +{ savearea_vec *vsv; - boolean_t intr; + boolean_t intr; - intr = ml_set_interrupts_enabled(FALSE); - vsv = act->mact.curctx->VMXsave; /* Get the start of the vector chain */ + intr = ml_set_interrupts_enabled(FALSE); + vsv = thread->machine.curctx->VMXsave; /* Get the start of the vector chain */ while(vsv) { /* Look until the end or we find it */ if(!(vsv->save_hdr.save_level)) break; /* Is the the user state stuff? (the level is 0 if so) */ @@ -1136,15 +1193,13 @@ savearea_vec *find_user_vec(thread_act_t act) { savearea_vec *find_user_vec_curr(void) { savearea_vec *vsv; - thread_act_t act; - boolean_t intr; - - act = current_act(); /* Get the current activation */ + thread_t thread = current_thread(); + boolean_t intr; - vec_save(act->mact.curctx); /* Force save if live */ + vec_save(thread->machine.curctx); /* Force save if live */ - intr = ml_set_interrupts_enabled(FALSE); - vsv = act->mact.curctx->VMXsave; /* Get the start of the vector chain */ + intr = ml_set_interrupts_enabled(FALSE); + vsv = thread->machine.curctx->VMXsave; /* Get the start of the vector chain */ while(vsv) { /* Look until the end or we find it */ if(!(vsv->save_hdr.save_level)) break; /* Is the the user state stuff? (the level is 0 if so) */ @@ -1163,26 +1218,23 @@ savearea_vec *find_user_vec_curr(void) { */ kern_return_t thread_userstack( - thread_t thread, + __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, - vm_offset_t *user_stack, + mach_vm_offset_t *user_stack, int *customstack ) { - struct ppc_thread_state *state; - /* * Set a default. */ - if (*user_stack == 0) - *user_stack = USRSTACK; - if (customstack) - *customstack = 0; switch (flavor) { case PPC_THREAD_STATE: + { + struct ppc_thread_state *state; + if (count < PPC_THREAD_STATE_COUNT) return (KERN_INVALID_ARGUMENT); @@ -1191,12 +1243,42 @@ thread_userstack( /* * If a valid user stack is specified, use it. */ - *user_stack = state->r1 ? state->r1: USRSTACK; - - if (customstack && state->r1) + if (state->r1) { + *user_stack = CAST_USER_ADDR_T(state->r1); + if (customstack) *customstack = 1; + } else { + *user_stack = CAST_USER_ADDR_T(USRSTACK); + if (customstack) + *customstack = 0; + } + } + break; + case PPC_THREAD_STATE64: + { + struct ppc_thread_state64 *state64; + + if (count < PPC_THREAD_STATE64_COUNT) + return (KERN_INVALID_ARGUMENT); + + state64 = (struct ppc_thread_state64 *)tstate; + + /* + * If a valid user stack is specified, use it. + */ + if (state64->r1 != MACH_VM_MIN_ADDRESS) { + *user_stack = state64->r1; + if (customstack) + *customstack = 1; + } else { + *user_stack = USRSTACK64; + if (customstack) + *customstack = 0; + } + } break; + default : return (KERN_INVALID_ARGUMENT); } @@ -1211,13 +1293,14 @@ thread_userstack( * Sets the user stack pointer into the machine * dependent thread state info. */ -void thread_setuserstack(thread_act_t act, unsigned int user_stack) +void +thread_setuserstack(thread_t thread, mach_vm_address_t user_stack) { savearea *sv; - sv = get_user_regs(act); /* Get the user state registers */ + sv = get_user_regs(thread); /* Get the user state registers */ - sv->save_r1 = (uint64_t)user_stack; + sv->save_r1 = user_stack; return; } @@ -1226,17 +1309,18 @@ void thread_setuserstack(thread_act_t act, unsigned int user_stack) * thread_adjuserstack: * * Returns the adjusted user stack pointer from the machine - * dependent thread state info. + * dependent thread state info. Usef for small (<2G) deltas. */ -unsigned int thread_adjuserstack(thread_act_t act, int adjust) +uint64_t +thread_adjuserstack(thread_t thread, int adjust) { savearea *sv; - sv = get_user_regs(act); /* Get the user state registers */ + sv = get_user_regs(thread); /* Get the user state registers */ - sv->save_r1 += adjust; /* Adjust the stack */ + sv->save_r1 += adjust; /* Adjust the stack */ - return (unsigned int)sv->save_r1; /* Return the adjusted stack */ + return sv->save_r1; /* Return the adjusted stack */ } @@ -1247,37 +1331,41 @@ unsigned int thread_adjuserstack(thread_act_t act, int adjust) * dependent thread state info. */ -void thread_setentrypoint(thread_act_t act, unsigned int entry) +void +thread_setentrypoint(thread_t thread, uint64_t entry) { savearea *sv; - sv = get_user_regs(act); /* Get the user state registers */ + sv = get_user_regs(thread); /* Get the user state registers */ - sv->save_srr0 = (uint64_t)entry; + sv->save_srr0 = entry; return; } kern_return_t thread_entrypoint( - thread_t thread, + __unused thread_t thread, int flavor, thread_state_t tstate, unsigned int count, - vm_offset_t *entry_point + mach_vm_offset_t *entry_point ) { - struct ppc_thread_state *state; - +#if 0 + /* Silly code: "if *entry_point is 0, make it 0" */ /* * Set a default. */ - if (*entry_point == 0) - *entry_point = VM_MIN_ADDRESS; + if (*entry_point == 0ULL) + *entry_point = MACH_VM_MIN_ADDRESS; +#endif switch (flavor) { - case PPC_THREAD_STATE: + { + struct ppc_thread_state *state; + if (count < PPC_THREAD_STATE_COUNT) return (KERN_INVALID_ARGUMENT); @@ -1286,8 +1374,34 @@ thread_entrypoint( /* * If a valid entry point is specified, use it. */ - *entry_point = state->srr0 ? state->srr0: VM_MIN_ADDRESS; + if (state->srr0) { + *entry_point = CAST_USER_ADDR_T(state->srr0); + } else { + *entry_point = CAST_USER_ADDR_T(VM_MIN_ADDRESS); + } + } + break; + + case PPC_THREAD_STATE64: + { + struct ppc_thread_state64 *state64; + + if (count < PPC_THREAD_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state64 = (struct ppc_thread_state64 *)tstate; + + /* + * If a valid entry point is specified, use it. + */ + if (state64->srr0) { + *entry_point = state64->srr0; + } else { + *entry_point = MACH_VM_MIN_ADDRESS; + } + } break; + default: return (KERN_INVALID_ARGUMENT); } @@ -1327,7 +1441,10 @@ void ppc_checkthreadstate(void * tsptr, int flavor) return; } -void thread_set_child(thread_act_t child, int pid) +void +thread_set_child( + thread_t child, + int pid) { struct savearea *child_state; @@ -1336,7 +1453,10 @@ void thread_set_child(thread_act_t child, int pid) child_state->save_r3 = (uint_t)pid; child_state->save_r4 = 1ULL; } -void thread_set_parent(thread_act_t parent, int pid) +void +thread_set_parent( + thread_t parent, + int pid) { struct savearea *parent_state; @@ -1361,24 +1481,23 @@ void *act_thread_csave(void) { savearea *sv, *osv; savearea_fpu *fsv, *ofsv; savearea_vec *vsv, *ovsv; - unsigned int spc, i, *srs; - thread_act_t act; + thread_t thread; - act = current_act(); /* Find ourselves */ + thread = current_thread(); - fpu_save(act->mact.curctx); /* Make certain floating point state is all saved */ - vec_save(act->mact.curctx); /* Make certain the vector state is all saved */ + fpu_save(thread->machine.curctx); /* Make certain floating point state is all saved */ + vec_save(thread->machine.curctx); /* Make certain the vector state is all saved */ - osv = find_user_regs(act); /* Get our savearea */ + osv = find_user_regs(thread); /* Get our savearea */ if(!osv) { - panic("act_thread_csave: attempting to preserve the context of an activation with none (%08X)\n", act); + panic("act_thread_csave: attempting to preserve the context of an activation with none (%08X)\n", thread); } sv = save_alloc(); /* Get a fresh save area to save into */ sv->save_hdr.save_flags = (sv->save_hdr.save_flags & ~SAVtype) | (SAVgeneral << SAVtypeshft); /* Mark as in use as general */ - sv->save_hdr.save_act = (struct thread_activation *)act; /* Point to the activation */ + sv->save_hdr.save_act = thread; sv->save_hdr.save_prev = 0; /* Mark no more */ sv->save_hdr.save_level = 0; /* Mark user state */ @@ -1387,20 +1506,20 @@ void *act_thread_csave(void) { (char *)((unsigned int)sv + sizeof(savearea_comm)), sizeof(struct savearea) - sizeof(savearea_comm)); - sv->save_srr1 &= ~(MASK(MSR_FP) | MASK(MSR_VEC)); /* Make certain that floating point and vector are turned off */ + sv->save_srr1 &= (uint64_t)(~(MASK(MSR_FP) | MASK(MSR_VEC))); /* Make certain that floating point and vector are turned off */ sv->save_hdr.save_misc2 = 0xDEBB1ED0; /* Eye catcher for debug */ sv->save_hdr.save_misc3 = 0xE5DA11A5; /* Eye catcher for debug */ - ofsv = find_user_fpu(act); /* Get any user floating point */ + ofsv = find_user_fpu(thread); /* Get any user floating point */ sv->save_hdr.save_misc0 = 0; /* Assume no floating point */ if(ofsv) { /* Did we find one? */ fsv = (savearea_fpu *)save_alloc(); /* If we still don't have one, get a new one */ fsv->save_hdr.save_flags = (fsv->save_hdr.save_flags & ~SAVtype) | (SAVfloat << SAVtypeshft); /* Mark as in use as float */ - fsv->save_hdr.save_act = (struct thread_activation *)act; /* Point to the activation */ + fsv->save_hdr.save_act = thread; fsv->save_hdr.save_prev = 0; /* Mark no more */ fsv->save_hdr.save_level = 0; /* Mark user state */ fsv->save_hdr.save_misc2 = 0xDEBB1ED0; /* Eye catcher for debug */ @@ -1413,14 +1532,14 @@ void *act_thread_csave(void) { sizeof(struct savearea) - sizeof(savearea_comm)); } - ovsv = find_user_vec(act); /* Get any user vector */ + ovsv = find_user_vec(thread); /* Get any user vector */ sv->save_hdr.save_misc1 = 0; /* Assume no vector */ if(ovsv) { /* Did we find one? */ vsv = (savearea_vec *)save_alloc(); /* If we still don't have one, get a new one */ vsv->save_hdr.save_flags = (vsv->save_hdr.save_flags & ~SAVtype) | (SAVvector << SAVtypeshft); /* Mark as in use as float */ - vsv->save_hdr.save_act = (struct thread_activation *)act; /* Point to the activation */ + vsv->save_hdr.save_act = thread; vsv->save_hdr.save_prev = 0; /* Mark no more */ vsv->save_hdr.save_level = 0; /* Mark user state */ vsv->save_hdr.save_misc2 = 0xDEBB1ED0; /* Eye catcher for debug */ @@ -1454,8 +1573,8 @@ void act_thread_catt(void *ctx) { savearea *sv, *osv, *psv; savearea_fpu *fsv, *ofsv, *pfsv; savearea_vec *vsv, *ovsv, *pvsv; - unsigned int spc, i, *srs; - thread_act_t act; + unsigned int spc; + thread_t thread; sv = (savearea *)ctx; /* Make this easier for C */ @@ -1474,18 +1593,18 @@ void act_thread_catt(void *ctx) { panic("act_thread_catt: attempt to attach invalid vector context savearea - %08X\n", vsv); /* Die */ } - act = current_act(); /* Find ourselves */ + thread = current_thread(); - toss_live_fpu(act->mact.curctx); /* Toss my floating point if live anywhere */ - toss_live_vec(act->mact.curctx); /* Toss my vector if live anywhere */ + toss_live_fpu(thread->machine.curctx); /* Toss my floating point if live anywhere */ + toss_live_vec(thread->machine.curctx); /* Toss my vector if live anywhere */ sv->save_hdr.save_misc2 = 0; /* Eye catcher for debug */ sv->save_hdr.save_misc3 = 0; /* Eye catcher for debug */ - sv->save_hdr.save_act = (struct thread_activation *)act; /* Set us as owner */ + sv->save_hdr.save_act = thread; - spc = (unsigned int)act->map->pmap->space; /* Get the space we're in */ + spc = (unsigned int)thread->map->pmap->space; /* Get the space we're in */ - osv = act->mact.pcb; /* Get the top general savearea */ + osv = thread->machine.pcb; /* Get the top general savearea */ psv = 0; while(osv) { /* Any saved state? */ if(osv->save_srr1 & MASK(MSR_PR)) break; /* Leave if this is user state */ @@ -1495,17 +1614,17 @@ void act_thread_catt(void *ctx) { if(osv) { /* Did we find one? */ if(psv) psv->save_hdr.save_prev = 0; /* Yes, clear pointer to it (it should always be last) or */ - else act->mact.pcb = 0; /* to the start if the only one */ + else thread->machine.pcb = 0; /* to the start if the only one */ save_release(osv); /* Nope, release it */ } if(psv) psv->save_hdr.save_prev = (addr64_t)((uintptr_t)sv); /* Chain us to the end or */ - else act->mact.pcb = (pcb_t)sv; /* to the start if the only one */ - act->mact.upcb = (pcb_t)sv; /* Set the user pcb */ + else thread->machine.pcb = (pcb_t)sv; /* to the start if the only one */ + thread->machine.upcb = (pcb_t)sv; /* Set the user pcb */ - ovsv = act->mact.curctx->VMXsave; /* Get the top vector savearea */ + ovsv = thread->machine.curctx->VMXsave; /* Get the top vector savearea */ pvsv = 0; while(ovsv) { /* Any VMX saved state? */ @@ -1516,21 +1635,21 @@ void act_thread_catt(void *ctx) { if(ovsv) { /* Did we find one? */ if(pvsv) pvsv->save_hdr.save_prev = 0; /* Yes, clear pointer to it (it should always be last) or */ - else act->mact.curctx->VMXsave = 0; /* to the start if the only one */ + else thread->machine.curctx->VMXsave = 0; /* to the start if the only one */ save_release((savearea *)ovsv); /* Nope, release it */ } if(vsv) { /* Are we sticking any vector on this one? */ if(pvsv) pvsv->save_hdr.save_prev = (addr64_t)((uintptr_t)vsv); /* Yes, chain us to the end or */ - else act->mact.curctx->VMXsave = vsv; /* to the start if the only one */ + else thread->machine.curctx->VMXsave = vsv; /* to the start if the only one */ vsv->save_hdr.save_misc2 = 0; /* Eye catcher for debug */ vsv->save_hdr.save_misc3 = 0; /* Eye catcher for debug */ - vsv->save_hdr.save_act = (struct thread_activation *)act; /* Set us as owner */ + vsv->save_hdr.save_act = thread; } - ofsv = act->mact.curctx->FPUsave; /* Get the top float savearea */ + ofsv = thread->machine.curctx->FPUsave; /* Get the top float savearea */ pfsv = 0; while(ofsv) { /* Any float saved state? */ @@ -1541,18 +1660,18 @@ void act_thread_catt(void *ctx) { if(ofsv) { /* Did we find one? */ if(pfsv) pfsv->save_hdr.save_prev = 0; /* Yes, clear pointer to it (it should always be last) or */ - else act->mact.curctx->FPUsave = 0; /* to the start if the only one */ + else thread->machine.curctx->FPUsave = 0; /* to the start if the only one */ save_release((savearea *)ofsv); /* Nope, release it */ } if(fsv) { /* Are we sticking any vector on this one? */ if(pfsv) pfsv->save_hdr.save_prev = (addr64_t)((uintptr_t)fsv); /* Yes, chain us to the end or */ - else act->mact.curctx->FPUsave = fsv; /* to the start if the only one */ + else thread->machine.curctx->FPUsave = fsv; /* to the start if the only one */ fsv->save_hdr.save_misc2 = 0; /* Eye catcher for debug */ fsv->save_hdr.save_misc3 = 0; /* Eye catcher for debug */ - fsv->save_hdr.save_act = (struct thread_activation *)act; /* Set us as owner */ + fsv->save_hdr.save_act = thread; } } @@ -1565,11 +1684,13 @@ void act_thread_catt(void *ctx) { * */ -void act_thread_cfree(void *ctx) { +void +act_thread_cfree(void *ctx) +{ - savearea *sv, *osv; - savearea_fpu *fsv, *ofsv; - savearea_vec *vsv, *ovsv, *pvsv; + savearea *sv; + savearea_fpu *fsv; + savearea_vec *vsv; sv = (savearea *)ctx; /* Make this easier for C */ @@ -1607,19 +1728,20 @@ void act_thread_cfree(void *ctx) { * enables or disables floating point exceptions for the thread. * returns old state */ -int thread_enable_fpe(thread_act_t act, int onoff) +int thread_enable_fpe( + thread_t thread, + int onoff) { savearea *sv; - unsigned int oldmsr; + uint64_t oldmsr; - sv = find_user_regs(act); /* Find the user registers */ - if(!sv) sv = get_user_regs(act); /* Didn't find any, allocate and initialize o -ne */ + sv = find_user_regs(thread); /* Find the user registers */ + if(!sv) sv = get_user_regs(thread); /* Didn't find any, allocate and initialize one */ - oldmsr = sv->save_srr1; /* Get the old msr */ + oldmsr = sv->save_srr1; /* Get the old msr */ - if(onoff) sv->save_srr1 = oldmsr | MASK(MSR_FE0) | MASK(MSR_FE1); /* Flip on precise FP exceptions */ - else sv->save_srr1 = oldmsr & ~(MASK(MSR_FE0) | MASK(MSR_FE1)); /* Flip on precise FP exceptions */ + if(onoff) sv->save_srr1 = oldmsr | (uint64_t)(MASK(MSR_FE0) | MASK(MSR_FE1)); /* Flip on precise FP exceptions */ + else sv->save_srr1 = oldmsr & (uint64_t)(~(MASK(MSR_FE0) | MASK(MSR_FE1))); /* Flip on precise FP exceptions */ - return ((oldmsr & (MASK(MSR_FE0) | MASK(MSR_FE1))) != 0); /* Return if it was enabled or not */ + return ((oldmsr & (MASK(MSR_FE0) | MASK(MSR_FE1))) != 0); /* Return if it was enabled or not */ } diff --git a/osfmk/ppc/thread.h b/osfmk/ppc/thread.h index 9b323dab9..3356d44c7 100644 --- a/osfmk/ppc/thread.h +++ b/osfmk/ppc/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,7 +33,153 @@ #ifndef _PPC_THREAD_H_ #define _PPC_THREAD_H_ -#include +#include +#include +#include +#include +#include +#include + +/* + * Kernel state structure + * + * This holds the kernel state that is saved and restored across context + * switches. + */ + +/* + * PPC process control block + * + * The PCB holds normal context. It does not contain vector or floating point + * registers. + * + */ + +typedef struct savearea pcb; +typedef struct savearea *pcb_t; + +struct facility_context { + + savearea_fpu *FPUsave; /* The floating point savearea */ + savearea *FPUlevel; /* The floating point context level */ + unsigned int FPUcpu; /* The last processor to enable floating point */ + unsigned int FPUsync; /* Sync lock */ + savearea_vec *VMXsave; /* The VMX savearea */ + savearea *VMXlevel; /* The VMX context level */ + unsigned int VMXcpu; /* The last processor to enable vector */ + unsigned int VMXsync; /* Sync lock */ + struct thread *facAct; +}; + +typedef struct facility_context facility_context; + +/* + * Maps state flavor to number of words in the state: + */ +__private_extern__ unsigned int _MachineStateCount[]; + +#define USER_REGS(ThrAct) ((ThrAct)->machine.pcb) + +#define user_pc(ThrAct) ((ThrAct)->machine.pcb->save_srr0) + +#define act_machine_state_ptr(ThrAct) (thread_state_t)USER_REGS(ThrAct) + +struct machine_thread { + /* + * pointer to process control block control blocks. Potentially + * one for each active facility context. They may point to the + * same saveareas. + */ + savearea *pcb; /* The "normal" savearea */ + savearea *upcb; /* The "normal" user savearea */ + facility_context *curctx; /* Current facility context */ + facility_context *deferctx; /* Deferred facility context */ + facility_context facctx; /* "Normal" facility context */ + struct vmmCntrlEntry *vmmCEntry; /* Pointer current emulation context or 0 */ + struct vmmCntrlTable *vmmControl; /* Pointer to virtual machine monitor control table */ + uint64_t qactTimer; /* Time thread needs to interrupt. This is a single-shot timer. Zero is unset */ + unsigned int umwSpace; /* Address space ID for user memory window */ +#define umwSwitchAway 0x80000000 /* Context switched away from thread since MapUserAddressWindow */ +#define umwSwitchAwayb 0 + addr64_t umwRelo; /* Relocation value for user memory window */ + unsigned int ksp; /* points to TOP OF STACK or zero */ + unsigned int preemption_count; /* preemption count */ + struct per_proc_info *PerProc; /* current per processor data */ + unsigned int bbDescAddr; /* Points to Blue Box Trap descriptor area in kernel (page aligned) */ + unsigned int bbUserDA; /* Points to Blue Box Trap descriptor area in user (page aligned) */ + unsigned int bbTableStart; /* Points to Blue Box Trap dispatch area in user */ + unsigned int emPendRupts; /* Number of pending emulated interruptions */ + unsigned int bbTaskID; /* Opaque task ID for Blue Box threads */ + unsigned int bbTaskEnv; /* Opaque task data reference for Blue Box threads */ + unsigned int specFlags; /* Special flags */ + unsigned int pmcovfl[8]; /* PMC overflow count */ + unsigned int perfmonFlags; /* Perfmon facility flags */ + unsigned int bbTrap; /* Blue Box trap vector */ + unsigned int bbSysCall; /* Blue Box syscall vector */ + unsigned int bbInterrupt; /* Blue Box interrupt vector */ + unsigned int bbPending; /* Blue Box pending interrupt vector */ + +/* special flags bits */ + +#define ignoreZeroFaultbit 0 +#define floatUsedbit 1 +#define vectorUsedbit 2 +#define runningVMbit 4 +#define floatCngbit 5 +#define vectorCngbit 6 +#define timerPopbit 7 +#define userProtKeybit 8 +#define FamVMenabit 11 +#define FamVMmodebit 12 +#define perfMonitorbit 13 +#define OnProcbit 14 +/* NOTE: Do not move or assign bit 31 without changing exception vector ultra fast path code */ +#define bbThreadbit 28 +#define bbNoMachSCbit 29 +#define bbPreemptivebit 30 +#define spfReserved1 31 /* See note above */ + +#define ignoreZeroFault 0x80000000 /* (1<<(31-ignoreZeroFaultbit)) */ +#define floatUsed 0x40000000 /* (1<<(31-floatUsedbit)) */ +#define vectorUsed 0x20000000 /* (1<<(31-vectorUsedbit)) */ + +#define runningVM 0x08000000 /* (1<<(31-runningVMbit)) */ +#define floatCng 0x04000000 /* (1<<(31-floatCngbit)) */ +#define vectorCng 0x02000000 /* (1<<(31-vectorCngbit)) */ +#define timerPop 0x01000000 /* (1<<(31-timerPopbit)) */ + +#define userProtKey 0x00800000 /* (1<<(31-userProtKeybit)) */ + +#define FamVMena 0x00100000 /* (1<<(31-FamVMenabit)) */ +#define FamVMmode 0x00080000 /* (1<<(31-FamVMmodebit)) */ +#define perfMonitor 0x00040000 /* (1<<(31-perfMonitorbit)) */ +#define OnProc 0x00020000 /* (1<<(31-OnProcbit)) */ + +#define bbThread 0x00000008 /* (1<<(31-bbThreadbit)) */ +#define bbNoMachSC 0x00000004 /* (1<<(31-bbNoMachSCbit)) */ +#define bbPreemptive 0x00000002 /* (1<<(31-bbPreemptivebit)) */ + +#define fvChkb 0 +#define fvChk 0x80000000 + +#ifdef MACH_BSD + uint64_t cthread_self; /* for use of cthread package */ +#endif + +}; + +extern struct savearea *find_user_regs(thread_t); +extern struct savearea *get_user_regs(thread_t); +extern struct savearea_fpu *find_user_fpu(thread_t); +extern struct savearea_vec *find_user_vec(thread_t); +extern struct savearea_vec *find_user_vec_curr(void); +extern int thread_enable_fpe(thread_t act, int onoff); + +extern struct savearea *find_kern_regs(thread_t); + +extern void *act_thread_csave(void); +extern void act_thread_catt(void *ctx); +extern void act_thread_cfree(void *ctx); /* * Return address of the function that called current function, given @@ -49,8 +195,6 @@ extern vm_offset_t getrpc(void); #define STACK_IKS(stack) \ ((vm_offset_t)(((vm_offset_t)stack)+KERNEL_STACK_SIZE)-FM_SIZE) -#define syscall_emulation_sync(task) /* do nothing */ - /* * Defining this indicates that MD code will supply an exception() * routine, conformant with kern/exception.c (dependency alert!) @@ -60,5 +204,3 @@ extern vm_offset_t getrpc(void); #define MACHINE_FAST_EXCEPTION 1 #endif /* _PPC_THREAD_H_ */ - - diff --git a/osfmk/ppc/thread_act.h b/osfmk/ppc/thread_act.h deleted file mode 100644 index 2586d5fb8..000000000 --- a/osfmk/ppc/thread_act.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ - -#ifndef _PPC_THREAD_ACT_H_ -#define _PPC_THREAD_ACT_H_ - -#include -#include -#include -#include -#include -#include -#include - -/* - * Kernel state structure - * - * This holds the kernel state that is saved and restored across context - * switches. - */ - -/* - * PPC process control block - * - * The PCB holds normal context. It does not contain vector or floating point - * registers. - * - */ - -typedef struct savearea pcb; -typedef struct savearea *pcb_t; - -struct facility_context { - - savearea_fpu *FPUsave; /* The floating point savearea */ - savearea *FPUlevel; /* The floating point context level */ - unsigned int FPUcpu; /* The last processor to enable floating point */ - unsigned int FPUsync; /* Sync lock */ - savearea_vec *VMXsave; /* The VMX savearea */ - savearea *VMXlevel; /* The VMX context level */ - unsigned int VMXcpu; /* The last processor to enable vector */ - unsigned int VMXsync; /* Sync lock */ - struct thread_activation *facAct; /* Activation associated with context */ -}; - -typedef struct facility_context facility_context; - -/* - * Maps state flavor to number of words in the state: - */ -extern unsigned int state_count[]; - -#define USER_REGS(ThrAct) ((ThrAct)->mact.pcb) - -#define user_pc(ThrAct) ((ThrAct)->mact.pcb->save_srr0) - -#define act_machine_state_ptr(ThrAct) (thread_state_t)USER_REGS(ThrAct) - -typedef struct MachineThrAct { - /* - * pointer to process control block control blocks. Potentially - * one for each active facility context. They may point to the - * same saveareas. - */ - savearea *pcb; /* The "normal" savearea */ - savearea *upcb; /* The "normal" user savearea */ - facility_context *curctx; /* Current facility context */ - facility_context *deferctx; /* Deferred facility context */ - facility_context facctx; /* "Normal" facility context */ - struct vmmCntrlEntry *vmmCEntry; /* Pointer current emulation context or 0 */ - struct vmmCntrlTable *vmmControl; /* Pointer to virtual machine monitor control table */ - uint64_t qactTimer; /* Time thread needs to interrupt. This is a single-shot timer. Zero is unset */ - unsigned int cioSpace; /* Address space ID for in progress copyin/out */ -#define cioSwitchAway 0x80000000 /* Context switched away from thread since MapUserAddressSpace */ -#define cioSwitchAwayb 0 - addr64_t cioRelo; /* Relocation value for in progress copyin/out */ - unsigned int ksp; /* points to TOP OF STACK or zero */ - unsigned int preemption_count; /* preemption count */ - unsigned int bbDescAddr; /* Points to Blue Box Trap descriptor area in kernel (page aligned) */ - unsigned int bbUserDA; /* Points to Blue Box Trap descriptor area in user (page aligned) */ - unsigned int bbTableStart; /* Points to Blue Box Trap dispatch area in user */ - unsigned int emPendRupts; /* Number of pending emulated interruptions */ - unsigned int bbTaskID; /* Opaque task ID for Blue Box threads */ - unsigned int bbTaskEnv; /* Opaque task data reference for Blue Box threads */ - unsigned int specFlags; /* Special flags */ - unsigned int pmcovfl[8]; /* PMC overflow count */ - unsigned int perfmonFlags; /* Perfmon facility flags */ - unsigned int bbTrap; /* Blue Box trap vector */ - unsigned int bbSysCall; /* Blue Box syscall vector */ - unsigned int bbInterrupt; /* Blue Box interrupt vector */ - unsigned int bbPending; /* Blue Box pending interrupt vector */ - -/* special flags bits */ - -#define ignoreZeroFaultbit 0 -#define floatUsedbit 1 -#define vectorUsedbit 2 -#define runningVMbit 4 -#define floatCngbit 5 -#define vectorCngbit 6 -#define timerPopbit 7 -#define userProtKeybit 8 -#define FamVMenabit 11 -#define FamVMmodebit 12 -#define perfMonitorbit 13 -#define OnProcbit 14 -/* NOTE: Do not move or assign bit 31 without changing exception vector ultra fast path code */ -#define bbThreadbit 28 -#define bbNoMachSCbit 29 -#define bbPreemptivebit 30 -#define spfReserved1 31 /* See note above */ - -#define ignoreZeroFault 0x80000000 /* (1<<(31-ignoreZeroFaultbit)) */ -#define floatUsed 0x40000000 /* (1<<(31-floatUsedbit)) */ -#define vectorUsed 0x20000000 /* (1<<(31-vectorUsedbit)) */ - -#define runningVM 0x08000000 /* (1<<(31-runningVMbit)) */ -#define floatCng 0x04000000 /* (1<<(31-floatCngbit)) */ -#define vectorCng 0x02000000 /* (1<<(31-vectorCngbit)) */ -#define timerPop 0x01000000 /* (1<<(31-timerPopbit)) */ - -#define userProtKey 0x00800000 /* (1<<(31-userProtKeybit)) */ - -#define FamVMena 0x00100000 /* (1<<(31-FamVMenabit)) */ -#define FamVMmode 0x00080000 /* (1<<(31-FamVMmodebit)) */ -#define perfMonitor 0x00040000 /* (1<<(31-perfMonitorbit)) */ -#define OnProc 0x00020000 /* (1<<(31-OnProcbit)) */ - -#define bbThread 0x00000008 /* (1<<(31-bbThreadbit)) */ -#define bbNoMachSC 0x00000004 /* (1<<(31-bbNoMachSCbit)) */ -#define bbPreemptive 0x00000002 /* (1<<(31-bbPreemptivebit)) */ - -#define fvChkb 0 -#define fvChk 0x80000000 - -#ifdef MACH_BSD - unsigned long cthread_self; /* for use of cthread package */ -#endif - -} MachineThrAct, *MachineThrAct_t; - -extern struct savearea *find_user_regs(thread_act_t act); -extern struct savearea *get_user_regs(thread_act_t); -extern struct savearea_fpu *find_user_fpu(thread_act_t act); -extern struct savearea_vec *find_user_vec(thread_act_t act); -extern struct savearea_vec *find_user_vec_curr(void); -extern int thread_enable_fpe(thread_act_t act, int onoff); - -extern struct savearea *find_kern_regs(thread_act_t act); - -extern void *act_thread_csave(void); -extern void act_thread_catt(void *ctx); -extern void act_thread_cfree(void *ctx); - -#define current_act_fast() current_act() - -#endif /* _PPC_THREAD_ACT_H_ */ diff --git a/osfmk/ppc/trap.c b/osfmk/ppc/trap.c index e87a876e9..1dafff22a 100644 --- a/osfmk/ppc/trap.c +++ b/osfmk/ppc/trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,15 +26,21 @@ #include #include #include -#include + +#include +#include +#include + +#include #include #include #include #include #include -#include + #include #include /* For kernel_map */ + #include #include #include @@ -49,7 +55,8 @@ #include -perfTrap perfTrapHook = 0; /* Pointer to performance trap hook routine */ +perfCallback perfTrapHook = 0; /* Pointer to CHUD trap hook routine */ +perfCallback perfASTHook = 0; /* Pointer to CHUD AST hook routine */ #if MACH_KDB #include @@ -67,7 +74,6 @@ extern boolean_t db_breakpoints_inserted; #endif /* MACH_KDB */ -extern int debugger_active[NCPUS]; extern task_t bsd_init_task; extern char init_task_failure_data[]; extern int not_in_kdp; @@ -80,20 +86,25 @@ extern int not_in_kdp; * before calling doexception */ #define UPDATE_PPC_EXCEPTION_STATE { \ - thread_act_t thr_act = current_act(); \ - thr_act->mact.pcb->save_dar = (uint64_t)dar; \ - thr_act->mact.pcb->save_dsisr = dsisr; \ - thr_act->mact.pcb->save_exception = trapno / T_VECTOR_SIZE; /* back to powerpc */ \ + thread_t _thread = current_thread(); \ + _thread->machine.pcb->save_dar = (uint64_t)dar; \ + _thread->machine.pcb->save_dsisr = dsisr; \ + _thread->machine.pcb->save_exception = trapno / T_VECTOR_SIZE; /* back to powerpc */ \ } -static void unresolved_kernel_trap(int trapno, +void unresolved_kernel_trap(int trapno, struct savearea *ssp, unsigned int dsisr, addr64_t dar, - char *message); + const char *message); static void handleMck(struct savearea *ssp); /* Common machine check handler */ +#ifdef MACH_BSD +extern void get_procrustime(time_value_t *); +extern void bsd_uprofil(time_value_t *, user_addr_t); +#endif /* MACH_BSD */ + struct savearea *trap(int trapno, struct savearea *ssp, @@ -106,14 +117,24 @@ struct savearea *trap(int trapno, vm_map_t map; unsigned int sp; unsigned int space, space2; - unsigned int offset; - thread_act_t thr_act; + vm_map_offset_t offset; + thread_t thread = current_thread(); boolean_t intr; + ast_t *myast; #ifdef MACH_BSD time_value_t tv; #endif /* MACH_BSD */ + myast = ast_pending(); + if(perfASTHook) { + if(*myast & AST_PPC_CHUD_ALL) { + perfASTHook(trapno, ssp, dsisr, (unsigned int)dar); + } + } else { + *myast &= ~AST_PPC_CHUD_ALL; + } + if(perfTrapHook) { /* Is there a hook? */ if(perfTrapHook(trapno, ssp, dsisr, (unsigned int)dar) == KERN_SUCCESS) return ssp; /* If it succeeds, we are done... */ } @@ -125,7 +146,6 @@ struct savearea *trap(int trapno, } #endif - thr_act = current_act(); /* Get current activation */ exception = 0; /* Clear exception for now */ /* @@ -195,6 +215,10 @@ struct savearea *trap(int trapno, * tracing of unaligned accesses. */ + if(ssp->save_hdr.save_misc3) { /* Was it a handled exception? */ + unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL); /* Go panic */ + break; + } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_EXCP_ALNG, 0) | DBG_FUNC_NONE, (int)ssp->save_srr0 - 4, (int)dar, (int)dsisr, (int)ssp->save_lr, 0); @@ -238,7 +262,7 @@ struct savearea *trap(int trapno, #if MACH_KDB mp_disable_preemption(); if (debug_mode - && debugger_active[cpu_number()] + && getPerProc()->debugger_active && !let_ddb_vm_fault) { /* * Force kdb to handle this one. @@ -249,7 +273,7 @@ struct savearea *trap(int trapno, #endif /* MACH_KDB */ /* can we take this during normal panic dump operation? */ if (debug_mode - && debugger_active[cpu_number()] + && getPerProc()->debugger_active && !not_in_kdp) { /* * Access fault while in kernel core dump. @@ -264,9 +288,9 @@ struct savearea *trap(int trapno, if(intr) ml_set_interrupts_enabled(TRUE); /* Enable if we were */ - if(((dar >> 28) < 0xE) | ((dar >> 28) > 0xF)) { /* Is this a copy in/out? */ + if(((dar >> 28) < 0xE) | ((dar >> 28) > 0xF)) { /* User memory window access? */ - offset = (unsigned int)dar; /* Set the failing address */ + offset = (vm_map_offset_t)dar; /* Set the failing address */ map = kernel_map; /* No, this is a normal kernel access */ /* @@ -275,14 +299,14 @@ struct savearea *trap(int trapno, * opened, it will clear the flag. */ if((0 == (offset & -PAGE_SIZE)) && /* Check for access of page 0 and */ - ((thr_act->mact.specFlags) & ignoreZeroFault)) { /* special case of ignoring page zero faults */ + ((thread->machine.specFlags) & ignoreZeroFault)) { /* special case of ignoring page zero faults */ ssp->save_srr0 += 4; /* Point to next instruction */ break; } - code = vm_fault(map, trunc_page_32(offset), + code = vm_fault(map, vm_map_trunc_page(offset), dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO, - FALSE, THREAD_UNINT, NULL, 0); + FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0)); if (code != KERN_SUCCESS) { unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL); @@ -294,28 +318,23 @@ struct savearea *trap(int trapno, break; } - /* If we get here, the fault was due to a copyin/out */ + /* If we get here, the fault was due to a user memory window access */ - map = thr_act->map; + map = thread->map; - offset = (unsigned int)(thr_act->mact.cioRelo + dar); /* Compute the user space address */ + offset = (vm_map_offset_t)(thread->machine.umwRelo + dar); /* Compute the user space address */ - code = vm_fault(map, trunc_page_32(offset), + code = vm_fault(map, vm_map_trunc_page(offset), dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO, - FALSE, THREAD_UNINT, NULL, 0); + FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0)); /* If we failed, there should be a recovery * spot to rfi to. */ if (code != KERN_SUCCESS) { - - if (thr_act->thread->recover) { - - act_lock_thread(thr_act); - ssp->save_srr0 = thr_act->thread->recover; - thr_act->thread->recover = - (vm_offset_t)NULL; - act_unlock_thread(thr_act); + if (thread->recover) { + ssp->save_srr0 = thread->recover; + thread->recover = (vm_offset_t)NULL; } else { unresolved_kernel_trap(trapno, ssp, dsisr, dar, "copyin/out has no recovery point"); } @@ -332,7 +351,7 @@ struct savearea *trap(int trapno, #if MACH_KDB if (debug_mode - && debugger_active[cpu_number()] + && getPerProc()->debugger_active && !let_ddb_vm_fault) { /* * Force kdb to handle this one. @@ -349,8 +368,8 @@ struct savearea *trap(int trapno, map = kernel_map; - code = vm_fault(map, trunc_page_64(ssp->save_srr0), - PROT_EXEC, FALSE, THREAD_UNINT, NULL, 0); + code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0), + PROT_EXEC, FALSE, THREAD_UNINT, NULL, vm_map_trunc_page(0)); if (code != KERN_SUCCESS) { unresolved_kernel_trap(trapno, ssp, dsisr, dar, NULL); @@ -376,12 +395,15 @@ struct savearea *trap(int trapno, } } else { - ml_set_interrupts_enabled(TRUE); /* Processing for user state traps is always enabled */ + /* + * Processing for user state traps with interrupt enabled + * For T_AST, interrupts are enabled in the AST delivery + */ + if (trapno != T_AST) + ml_set_interrupts_enabled(TRUE); #ifdef MACH_BSD { - void get_procrustime(time_value_t *); - get_procrustime(&tv); } #endif /* MACH_BSD */ @@ -450,6 +472,12 @@ struct savearea *trap(int trapno, KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_EXCP_ALNG, 0) | DBG_FUNC_NONE, (int)ssp->save_srr0 - 4, (int)dar, (int)dsisr, (int)ssp->save_lr, 0); + + if(ssp->save_hdr.save_misc3) { /* Was it a handled exception? */ + exception = EXC_BAD_ACCESS; /* Yes, throw exception */ + code = EXC_PPC_UNALIGNED; + subcode = (unsigned int)dar; + } break; case T_EMULATE: @@ -480,7 +508,7 @@ struct savearea *trap(int trapno, case T_PROGRAM: if (ssp->save_srr1 & MASK(SRR1_PRG_FE)) { - fpu_save(thr_act->mact.curctx); + fpu_save(thread->machine.curctx); UPDATE_PPC_EXCEPTION_STATE; exception = EXC_ARITHMETIC; code = EXC_ARITHMETIC; @@ -503,10 +531,10 @@ struct savearea *trap(int trapno, subcode = (unsigned int)ssp->save_srr0; } else if (ssp->save_srr1 & MASK(SRR1_PRG_TRAP)) { unsigned int inst; - char *iaddr; + //char *iaddr; - iaddr = CAST_DOWN(char *, ssp->save_srr0); /* Trim from long long and make a char pointer */ - if (copyin(iaddr, (char *) &inst, 4 )) panic("copyin failed\n"); + //iaddr = CAST_DOWN(char *, ssp->save_srr0); /* Trim from long long and make a char pointer */ + if (copyin(ssp->save_srr0, (char *) &inst, 4 )) panic("copyin failed\n"); if(dgWork.dgFlags & enaDiagTrap) { /* Is the diagnostic trap enabled? */ if((inst & 0xFFFFFFF0) == 0x0FFFFFF0) { /* Is this a TWI 31,R31,0xFFFx? */ @@ -539,7 +567,7 @@ struct savearea *trap(int trapno, break; case T_DATA_ACCESS: - map = thr_act->map; + map = thread->map; if(ssp->save_dsisr & dsiInvMode) { /* Did someone try to reserve cache inhibited? */ UPDATE_PPC_EXCEPTION_STATE; /* Don't even bother VM with this one */ @@ -548,9 +576,9 @@ struct savearea *trap(int trapno, break; } - code = vm_fault(map, trunc_page_64(dar), + code = vm_fault(map, vm_map_trunc_page(dar), dsisr & MASK(DSISR_WRITE) ? PROT_RW : PROT_RO, - FALSE, THREAD_ABORTSAFE, NULL, 0); + FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0)); if ((code != KERN_SUCCESS) && (code != KERN_ABORTED)) { UPDATE_PPC_EXCEPTION_STATE; @@ -567,10 +595,10 @@ struct savearea *trap(int trapno, /* Same as for data access, except fault type * is PROT_EXEC and addr comes from srr0 */ - map = thr_act->map; + map = thread->map; - code = vm_fault(map, trunc_page_64(ssp->save_srr0), - PROT_EXEC, FALSE, THREAD_ABORTSAFE, NULL, 0); + code = vm_fault(map, vm_map_trunc_page(ssp->save_srr0), + PROT_EXEC, FALSE, THREAD_ABORTSAFE, NULL, vm_map_trunc_page(0)); if ((code != KERN_SUCCESS) && (code != KERN_ABORTED)) { UPDATE_PPC_EXCEPTION_STATE; @@ -584,15 +612,12 @@ struct savearea *trap(int trapno, break; case T_AST: - ml_set_interrupts_enabled(FALSE); - ast_taken(AST_ALL, intr); + /* AST delivery is done below */ break; } #ifdef MACH_BSD { - void bsd_uprofil(time_value_t *, unsigned int); - bsd_uprofil(&tv, ssp->save_srr0); } #endif /* MACH_BSD */ @@ -639,7 +664,7 @@ struct savearea *trap(int trapno, for (i = 0; i < 8; i++) { if (addr == (char*)NULL) break; - if (!copyin(addr,(char*)stack_buf, + if (!copyin(ssp->save_r1,(char*)stack_buf, 3 * sizeof(int))) { buf += sprintf(buf, "0x%08X : 0x%08X\n" ,addr,stack_buf[2]); @@ -658,11 +683,15 @@ struct savearea *trap(int trapno, * Check to see if we need an AST, if so take care of it here */ ml_set_interrupts_enabled(FALSE); - if (USER_MODE(ssp->save_srr1)) - while (ast_needed(cpu_number())) { + + if (USER_MODE(ssp->save_srr1)) { + myast = ast_pending(); + while (*myast & AST_ALL) { ast_taken(AST_ALL, intr); ml_set_interrupts_enabled(FALSE); + myast = ast_pending(); } + } return ssp; } @@ -749,7 +778,7 @@ doexception( codes[0] = code; codes[1] = sub; - exception(exc, codes, 2); + exception_triage(exc, codes, 2); } char *trap_type[] = { @@ -799,18 +828,19 @@ void unresolved_kernel_trap(int trapno, struct savearea *ssp, unsigned int dsisr, addr64_t dar, - char *message) + const char *message) { char *trap_name; extern void print_backtrace(struct savearea *); extern unsigned int debug_mode, disableDebugOuput; + extern unsigned long panic_caller; ml_set_interrupts_enabled(FALSE); /* Turn off interruptions */ lastTrace = LLTraceSet(0); /* Disable low-level tracing */ if( logPanicDataToScreen ) disableDebugOuput = FALSE; - + debug_mode++; if ((unsigned)trapno <= T_MAX) trap_name = trap_type[trapno / T_VECTOR_SIZE]; @@ -824,6 +854,7 @@ void unresolved_kernel_trap(int trapno, print_backtrace(ssp); + panic_caller = (0xFFFF0000 | (trapno / T_VECTOR_SIZE) ); draw_panic_dialog(); if( panicDebugging ) @@ -831,7 +862,7 @@ void unresolved_kernel_trap(int trapno, panic(message); } -char *corr[2] = {"uncorrected", "corrected "}; +const char *corr[2] = {"uncorrected", "corrected "}; void handleMck(struct savearea *ssp) { /* Common machine check handler */ @@ -859,8 +890,8 @@ void thread_syscall_return( kern_return_t ret) { - register thread_act_t thr_act = current_act(); - register struct savearea *regs = USER_REGS(thr_act); + register thread_t thread = current_thread(); + register struct savearea *regs = USER_REGS(thread); if (kdebug_enable && ((unsigned int)regs->save_r0 & 0x80000000)) { /* Mach trap */ @@ -878,15 +909,10 @@ thread_syscall_return( void thread_kdb_return(void) { - register thread_act_t thr_act = current_act(); - register thread_t cur_thr = current_thread(); - register struct savearea *regs = USER_REGS(thr_act); - - Call_Debugger(thr_act->mact.pcb->save_exception, regs); -#if MACH_LDEBUG - assert(cur_thr->mutex_count == 0); -#endif /* MACH_LDEBUG */ - check_simple_locks(); + register thread_t thread = current_thread(); + register struct savearea *regs = USER_REGS(thread); + + Call_Debugger(thread->machine.pcb->save_exception, regs); thread_exception_return(); /*NOTREACHED*/ } diff --git a/osfmk/ppc/trap.h b/osfmk/ppc/trap.h index ace6cd0e8..bb758d7d5 100644 --- a/osfmk/ppc/trap.h +++ b/osfmk/ppc/trap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,7 +55,7 @@ #define NARGS 12 /* Size to reserve in frame for arguments - first 8 are in registers */ #define ARG_SIZE FM_ALIGN((NARGS-8)*4) - +#define MUNGE_ARGS_SIZE FM_ALIGN(8*8) /* * Hardware exception vectors for powerpc are in exception.h @@ -70,18 +70,17 @@ extern void doexception(int exc, int code, int sub); -extern void thread_exception_return(void); - extern struct savearea* trap(int trapno, struct savearea *ss, unsigned int dsisr, addr64_t dar); -typedef kern_return_t (*perfTrap)(int trapno, struct savearea *ss, +typedef kern_return_t (*perfCallback)(int trapno, struct savearea *ss, unsigned int dsisr, addr64_t dar); -extern perfTrap perfTrapHook; -extern perfTrap perfIntHook; +extern perfCallback perfTrapHook; +extern perfCallback perfASTHook; +extern perfCallback perfIntHook; extern struct savearea* interrupt(int intno, struct savearea *ss, diff --git a/osfmk/ppc/vmachmon.c b/osfmk/ppc/vmachmon.c index 62d8ff31b..c4583336b 100644 --- a/osfmk/ppc/vmachmon.c +++ b/osfmk/ppc/vmachmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,18 +30,19 @@ #include #include #include +#include #include #include #include -#include #include #include -#include +#include #include +#include #include +#include -extern struct Saveanchor saveanchor; /* Aligned savearea anchor */ extern double FloatInit; extern unsigned long QNaNbarbarian[4]; @@ -62,8 +63,8 @@ extern unsigned long QNaNbarbarian[4]; ** address of a vmmCntrlEntry or 0 if not found -----------------------------------------------------------------------*/ -vmmCntrlEntry *vmm_get_entry( - thread_act_t act, +static vmmCntrlEntry *vmm_get_entry( + thread_t act, vmm_thread_index_t index) { vmmCntrlTable *CTable; @@ -71,10 +72,10 @@ vmmCntrlEntry *vmm_get_entry( index = index & vmmTInum; /* Clean up the index */ - if (act->mact.vmmControl == 0) return NULL; /* No control table means no vmm */ + if (act->machine.vmmControl == 0) return NULL; /* No control table means no vmm */ if ((index - 1) >= kVmmMaxContexts) return NULL; /* Index not in range */ - CTable = act->mact.vmmControl; /* Make the address a bit more convienient */ + CTable = act->machine.vmmControl; /* Make the address a bit more convienient */ CEntry = &CTable->vmmc[index - 1]; /* Point to the entry */ if (!(CEntry->vmmFlags & vmmInUse)) return NULL; /* See if the slot is actually in use */ @@ -97,25 +98,247 @@ vmmCntrlEntry *vmm_get_entry( ** Note that if there is no pmap for the address space it will be created. -----------------------------------------------------------------------*/ -pmap_t vmm_get_adsp(thread_act_t act, vmm_thread_index_t index) +static pmap_t vmm_get_adsp(thread_t act, vmm_thread_index_t index) { pmap_t pmap; - if (act->mact.vmmControl == 0) return NULL; /* No control table means no vmm */ + if (act->machine.vmmControl == 0) return NULL; /* No control table means no vmm */ if ((index - 1) >= kVmmMaxContexts) return NULL; /* Index not in range */ - pmap = act->mact.vmmControl->vmmAdsp[index - 1]; /* Get the pmap */ - if(pmap) return pmap; /* We've got it... */ + pmap = act->machine.vmmControl->vmmAdsp[index - 1]; /* Get the pmap */ + return (pmap); /* and return it. */ +} - pmap = pmap_create(0); /* Make a fresh one */ - act->mact.vmmControl->vmmAdsp[index - 1] = pmap; /* Remember it */ -/* - * Note that if the create fails, we will return a null. - */ - return pmap; /* Return it... */ +/*----------------------------------------------------------------------- +** vmm_build_shadow_hash +** +** Allocate and initialize a shadow hash table. +** +** This function assumes that PAGE_SIZE is 4k-bytes. +** +-----------------------------------------------------------------------*/ +static pmap_vmm_ext *vmm_build_shadow_hash(pmap_t pmap) +{ + pmap_vmm_ext *ext; /* VMM pmap extension we're building */ + ppnum_t extPP; /* VMM pmap extension physical page number */ + kern_return_t ret; /* Return code from various calls */ + uint32_t pages = GV_HPAGES; /* Number of pages in the hash table */ + vm_offset_t free = VMX_HPIDX_OFFSET; /* Offset into extension page of free area (128-byte aligned) */ + uint32_t freeSize = PAGE_SIZE - free; /* Number of free bytes in the extension page */ + + if ((pages * sizeof(addr64_t)) + (pages * sizeof(vm_offset_t)) > freeSize) { + panic("vmm_build_shadow_hash: too little pmap_vmm_ext free space\n"); + } + + ret = kmem_alloc_wired(kernel_map, (vm_offset_t *)&ext, PAGE_SIZE); + /* Allocate a page-sized extension block */ + if (ret != KERN_SUCCESS) return (NULL); /* Return NULL for failed allocate */ + bzero((char *)ext, PAGE_SIZE); /* Zero the entire extension block page */ + + extPP = pmap_find_phys(kernel_pmap, (vm_offset_t)ext); + /* Get extension block's physical page number */ + if (!extPP) { /* This should not fail, but then again... */ + panic("vmm_build_shadow_hash: could not translate pmap_vmm_ext vaddr %08X\n", ext); + } + + ext->vmxSalt = (addr64_t)(vm_offset_t)ext ^ ptoa_64(extPP); + /* Set effective<->physical conversion salt */ + ext->vmxHostPmapPhys = (addr64_t)(vm_offset_t)pmap ^ pmap->pmapvr; + /* Set host pmap's physical address */ + ext->vmxHostPmap = pmap; /* Set host pmap's effective address */ + ext->vmxHashPgIdx = (addr64_t *)((vm_offset_t)ext + VMX_HPIDX_OFFSET); + /* Allocate physical index */ + ext->vmxHashPgList = (vm_offset_t *)((vm_offset_t)ext + VMX_HPLIST_OFFSET); + /* Allocate page list */ + ext->vmxActiveBitmap = (vm_offset_t *)((vm_offset_t)ext + VMX_ACTMAP_OFFSET); + /* Allocate active mapping bitmap */ + + /* The hash table is typically larger than a single page, but we don't require it to be in a + contiguous virtual or physical chunk. So, we allocate it page by page, noting the effective and + physical address of each page in vmxHashPgList and vmxHashPgIdx, respectively. */ + uint32_t idx; + for (idx = 0; idx < pages; idx++) { + ret = kmem_alloc_wired(kernel_map, &ext->vmxHashPgList[idx], PAGE_SIZE); + /* Allocate a hash-table page */ + if (ret != KERN_SUCCESS) goto fail; /* Allocation failed, exit through cleanup */ + bzero((char *)ext->vmxHashPgList[idx], PAGE_SIZE); /* Zero the page */ + ext->vmxHashPgIdx[idx] = ptoa_64(pmap_find_phys(kernel_pmap, (addr64_t)ext->vmxHashPgList[idx])); + /* Put page's physical address into index */ + if (!ext->vmxHashPgIdx[idx]) { /* Hash-table page's LRA failed */ + panic("vmm_build_shadow_hash: could not translate hash-table vaddr %08X\n", ext->vmxHashPgList[idx]); + } + mapping_t *map = (mapping_t *)ext->vmxHashPgList[idx]; + uint32_t mapIdx; + for (mapIdx = 0; mapIdx < GV_SLTS_PPG; mapIdx++) { /* Iterate over mappings in this page */ + map->mpFlags = (mpGuest | mpgFree); /* Mark guest type and free */ + map = (mapping_t *)((char *)map + GV_SLOT_SZ); /* Next slot-sized mapping */ + } + } + + return (ext); /* Return newly-minted VMM pmap extension */ + +fail: + for (idx = 0; idx < pages; idx++) { /* De-allocate any pages we managed to allocate */ + if (ext->vmxHashPgList[idx]) { + kmem_free(kernel_map, ext->vmxHashPgList[idx], PAGE_SIZE); + } + } + kmem_free(kernel_map, (vm_offset_t)ext, PAGE_SIZE); /* Release the VMM pmap extension page */ + return (NULL); /* Return NULL for failure */ +} + + +/*----------------------------------------------------------------------- +** vmm_release_shadow_hash +** +** Release shadow hash table and VMM extension block +** +-----------------------------------------------------------------------*/ +static void vmm_release_shadow_hash(pmap_vmm_ext *ext) +{ + uint32_t idx; + + for (idx = 0; idx < GV_HPAGES; idx++) { /* Release the hash table page by page */ + kmem_free(kernel_map, ext->vmxHashPgList[idx], PAGE_SIZE); + } + + kmem_free(kernel_map, (vm_offset_t)ext, PAGE_SIZE); /* Release the VMM pmap extension page */ +} + +/*----------------------------------------------------------------------- +** vmm_activate_gsa +** +** Activate guest shadow assist +** +-----------------------------------------------------------------------*/ +static kern_return_t vmm_activate_gsa( + thread_t act, + vmm_thread_index_t index) +{ + vmmCntrlTable *CTable = act->machine.vmmControl; /* Get VMM control table */ + if (!CTable) { /* Caller guarantees that this will work */ + panic("vmm_activate_gsa: VMM control table not present; act = %08X, idx = %d\n", + act, index); + return KERN_FAILURE; + } + vmmCntrlEntry *CEntry = vmm_get_entry(act, index); /* Get context from index */ + if (!CEntry) { /* Caller guarantees that this will work */ + panic("vmm_activate_gsa: Unexpected failure of vmm_get_entry; act = %08X, idx = %d\n", + act, index); + return KERN_FAILURE; + } + + pmap_t hpmap = act->map->pmap; /* Get host pmap */ + pmap_t gpmap = vmm_get_adsp(act, index); /* Get guest pmap */ + if (!gpmap) { /* Caller guarantees that this will work */ + panic("vmm_activate_gsa: Unexpected failure of vmm_get_adsp; act = %08X, idx = %d\n", + act, index); + return KERN_FAILURE; + } + + if (!hpmap->pmapVmmExt) { /* If there's no VMM extension for this host, create one */ + hpmap->pmapVmmExt = vmm_build_shadow_hash(hpmap); /* Build VMM extension plus shadow hash and attach */ + if (hpmap->pmapVmmExt) { /* See if we succeeded */ + hpmap->pmapVmmExtPhys = (addr64_t)(vm_offset_t)hpmap->pmapVmmExt ^ hpmap->pmapVmmExt->vmxSalt; + /* Get VMM extensions block physical address */ + } else { + return KERN_RESOURCE_SHORTAGE; /* Not enough mojo to go */ + } + } + gpmap->pmapVmmExt = hpmap->pmapVmmExt; /* Copy VMM extension block virtual address into guest */ + gpmap->pmapVmmExtPhys = hpmap->pmapVmmExtPhys; /* and its physical address, too */ + gpmap->pmapFlags |= pmapVMgsaa; /* Enable GSA for this guest */ + CEntry->vmmXAFlgs |= vmmGSA; /* Show GSA active here, too */ + + return KERN_SUCCESS; +} + + +/*----------------------------------------------------------------------- +** vmm_deactivate_gsa +** +** Deactivate guest shadow assist +** +-----------------------------------------------------------------------*/ +static void vmm_deactivate_gsa( + thread_t act, + vmm_thread_index_t index) +{ + vmmCntrlEntry *CEntry = vmm_get_entry(act, index); /* Get context from index */ + if (!CEntry) { /* Caller guarantees that this will work */ + panic("vmm_deactivate_gsa: Unexpected failure of vmm_get_entry; act = %08X, idx = %d\n", + act, index); + return KERN_FAILURE; + } + + pmap_t gpmap = vmm_get_adsp(act, index); /* Get guest pmap */ + if (!gpmap) { /* Caller guarantees that this will work */ + panic("vmm_deactivate_gsa: Unexpected failure of vmm_get_adsp; act = %08X, idx = %d\n", + act, index); + return KERN_FAILURE; + } + + gpmap->pmapFlags &= ~pmapVMgsaa; /* Deactivate GSA for this guest */ + CEntry->vmmXAFlgs &= ~vmmGSA; /* Show GSA deactivated here, too */ } +/*----------------------------------------------------------------------- +** vmm_flush_context +** +** Flush specified guest context, purging all guest mappings and clearing +** the context page. +** +-----------------------------------------------------------------------*/ +static void vmm_flush_context( + thread_t act, + vmm_thread_index_t index) +{ + vmmCntrlEntry *CEntry; + vmmCntrlTable *CTable; + vmm_state_page_t *vks; + vmm_version_t version; + + CEntry = vmm_get_entry(act, index); /* Convert index to entry */ + if (!CEntry) { /* Caller guarantees that this will work */ + panic("vmm_flush_context: Unexpected failure of vmm_get_entry; act = %08X, idx = %d\n", + act, index); + return; + } + + if(CEntry->vmmFacCtx.FPUsave) { /* Is there any floating point context? */ + toss_live_fpu(&CEntry->vmmFacCtx); /* Get rid of any live context here */ + save_release((savearea *)CEntry->vmmFacCtx.FPUsave); /* Release it */ + } + + if(CEntry->vmmFacCtx.VMXsave) { /* Is there any vector context? */ + toss_live_vec(&CEntry->vmmFacCtx); /* Get rid of any live context here */ + save_release((savearea *)CEntry->vmmFacCtx.VMXsave); /* Release it */ + } + + vmm_unmap_all_pages(act, index); /* Blow away all mappings for this context */ + + CTable = act->machine.vmmControl; /* Get the control table address */ + CTable->vmmGFlags = CTable->vmmGFlags & ~vmmLastAdSp; /* Make sure we don't try to automap into this */ + + CEntry->vmmFlags &= vmmInUse; /* Clear out all of the flags for this entry except in use */ + CEntry->vmmFacCtx.FPUsave = 0; /* Clear facility context control */ + CEntry->vmmFacCtx.FPUlevel = 0; /* Clear facility context control */ + CEntry->vmmFacCtx.FPUcpu = 0; /* Clear facility context control */ + CEntry->vmmFacCtx.VMXsave = 0; /* Clear facility context control */ + CEntry->vmmFacCtx.VMXlevel = 0; /* Clear facility context control */ + CEntry->vmmFacCtx.VMXcpu = 0; /* Clear facility context control */ + + vks = CEntry->vmmContextKern; /* Get address of the context page */ + version = vks->interface_version; /* Save the version code */ + bzero((char *)vks, 4096); /* Clear all */ + + vks->interface_version = version; /* Set our version code */ + vks->thread_index = index % vmmTInum; /* Tell the user the index for this virtual machine */ + + return; /* Context is now flushed */ +} + /************************************************************************************* Virtual Machine Monitor Exported Functionality @@ -168,7 +391,7 @@ int vmm_get_version(struct savearea *save) int vmm_get_features(struct savearea *save) { save->save_r3 = kVmmCurrentFeatures; /* Return the features */ - if(per_proc_info->pf.Available & pf64Bit) { + if(getPerProc()->pf.Available & pf64Bit) { save->save_r3 &= ~kVmmFeature_LittleEndian; /* No little endian here */ save->save_r3 |= kVmmFeature_SixtyFourBit; /* Set that we can do 64-bit */ } @@ -185,7 +408,7 @@ int vmm_get_features(struct savearea *save) ** Returns max address -----------------------------------------------------------------------*/ -addr64_t vmm_max_addr(thread_act_t act) +addr64_t vmm_max_addr(thread_t act) { return vm_max_address; /* Return the maximum address */ } @@ -211,7 +434,7 @@ addr64_t vmm_max_addr(thread_act_t act) -----------------------------------------------------------------------*/ unsigned int vmm_get_XA( - thread_act_t act, + thread_t act, vmm_thread_index_t index) { vmmCntrlEntry *CEntry; @@ -245,7 +468,7 @@ unsigned int vmm_get_XA( int vmm_init_context(struct savearea *save) { - thread_act_t act; + thread_t act; vmm_version_t version; vmm_state_page_t * vmm_user_state; vmmCntrlTable *CTable; @@ -253,10 +476,9 @@ int vmm_init_context(struct savearea *save) vmm_state_page_t * vks; ppnum_t conphys; kern_return_t ret; - pmap_t new_pmap; int cvi, i; task_t task; - thread_act_t fact, gact; + thread_t fact, gact; vmm_user_state = CAST_DOWN(vmm_state_page_t *, save->save_r4); /* Get the user address of the comm area */ if ((unsigned int)vmm_user_state & (PAGE_SIZE - 1)) { /* Make sure the comm area is page aligned */ @@ -276,7 +498,7 @@ int vmm_init_context(struct savearea *save) return 1; } - act = current_act(); /* Pick up our activation */ + act = current_thread(); /* Pick up our activation */ ml_set_interrupts_enabled(TRUE); /* This can take a bit of time so pass interruptions */ @@ -284,15 +506,15 @@ int vmm_init_context(struct savearea *save) task_lock(task); /* Lock our task */ - fact = (thread_act_t)task->threads.next; /* Get the first activation on task */ + fact = (thread_t)task->threads.next; /* Get the first activation on task */ gact = 0; /* Pretend we didn't find it yet */ for(i = 0; i < task->thread_count; i++) { /* All of the activations */ - if(fact->mact.vmmControl) { /* Is this a virtual machine monitor? */ + if(fact->machine.vmmControl) { /* Is this a virtual machine monitor? */ gact = fact; /* Yeah... */ break; /* Bail the loop... */ } - fact = (thread_act_t)fact->task_threads.next; /* Go to the next one */ + fact = (thread_t)fact->task_threads.next; /* Go to the next one */ } @@ -312,21 +534,21 @@ int vmm_init_context(struct savearea *save) return 1; } - if(!gact) act->mact.vmmControl = (vmmCntrlTable *)1; /* Temporarily mark that we are the vmm thread */ + if(!gact) act->machine.vmmControl = (vmmCntrlTable *)1; /* Temporarily mark that we are the vmm thread */ task_unlock(task); /* Safe to release now (because we've marked ourselves) */ - CTable = act->mact.vmmControl; /* Get the control table address */ + CTable = act->machine.vmmControl; /* Get the control table address */ if ((unsigned int)CTable == 1) { /* If we are marked, try to allocate a new table, otherwise we have one */ if(!(CTable = (vmmCntrlTable *)kalloc(sizeof(vmmCntrlTable)))) { /* Get a fresh emulation control table */ - act->mact.vmmControl = 0; /* Unmark us as vmm 'cause we failed */ + act->machine.vmmControl = 0; /* Unmark us as vmm 'cause we failed */ ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = KERN_RESOURCE_SHORTAGE; /* No storage... */ return 1; } bzero((void *)CTable, sizeof(vmmCntrlTable)); /* Clean it up */ - act->mact.vmmControl = CTable; /* Initialize the table anchor */ + act->machine.vmmControl = CTable; /* Initialize the table anchor */ } for(cvi = 0; cvi < kVmmMaxContexts; cvi++) { /* Search to find a free slot */ @@ -380,7 +602,7 @@ int vmm_init_context(struct savearea *save) CTable->vmmc[cvi].vmmFlags = vmmInUse; /* Mark the slot in use and make sure the rest are clear */ CTable->vmmc[cvi].vmmContextKern = vks; /* Remember the kernel address of comm area */ - CTable->vmmc[cvi].vmmContextPhys = (vmm_state_page_t *)conphys; /* Remember the state page physical addr */ + CTable->vmmc[cvi].vmmContextPhys = conphys; /* Remember the state page physical addr */ CTable->vmmc[cvi].vmmContextUser = vmm_user_state; /* Remember user address of comm area */ CTable->vmmc[cvi].vmmFacCtx.FPUsave = 0; /* Clear facility context control */ @@ -393,10 +615,20 @@ int vmm_init_context(struct savearea *save) hw_atomic_add((int *)&saveanchor.savetarget, 2); /* Account for the number of extra saveareas we think we might "need" */ - if (!(act->map->pmap->pmapFlags & pmapVMhost)) { - simple_lock(&(act->map->pmap->lock)); - act->map->pmap->pmapFlags |= pmapVMhost; - simple_unlock(&(act->map->pmap->lock)); + pmap_t hpmap = act->map->pmap; /* Get host pmap */ + pmap_t gpmap = pmap_create(0); /* Make a fresh guest pmap */ + if (gpmap) { /* Did we succeed ? */ + CTable->vmmAdsp[cvi] = gpmap; /* Remember guest pmap for new context */ + if (lowGlo.lgVMMforcedFeats & vmmGSA) { /* Forcing on guest shadow assist ? */ + vmm_activate_gsa(act, cvi+1); /* Activate GSA */ + } + } else { + ret = KERN_RESOURCE_SHORTAGE; /* We've failed to allocate a guest pmap */ + goto return_in_shame; /* Shame on us. */ + } + + if (!(hpmap->pmapFlags & pmapVMhost)) { /* Do this stuff if this is our first time hosting */ + hpmap->pmapFlags |= pmapVMhost; /* We're now hosting */ } ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ @@ -404,8 +636,8 @@ int vmm_init_context(struct savearea *save) return 1; return_in_shame: - if(!gact) kfree((vm_offset_t)CTable, sizeof(vmmCntrlTable)); /* Toss the table if we just allocated it */ - act->mact.vmmControl = 0; /* Unmark us as vmm 'cause we failed */ + if(!gact) kfree(CTable, sizeof(vmmCntrlTable)); /* Toss the table if we just allocated it */ + act->machine.vmmControl = 0; /* Unmark us as vmm 'cause we failed */ ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = ret; /* Pass back return code... */ return 1; @@ -434,7 +666,7 @@ return_in_shame: -----------------------------------------------------------------------*/ kern_return_t vmm_tear_down_context( - thread_act_t act, + thread_t act, vmm_thread_index_t index) { vmmCntrlEntry *CEntry; @@ -442,29 +674,35 @@ kern_return_t vmm_tear_down_context( int cvi; register savearea *sv; - CEntry = vmm_get_entry(act, index); /* Convert index to entry */ - if (CEntry == NULL) return KERN_FAILURE; /* Either this isn't vmm thread or the index is bogus */ + CEntry = vmm_get_entry(act, index); /* Convert index to entry */ + if (CEntry == NULL) return KERN_FAILURE; /* Either this isn't vmm thread or the index is bogus */ - ml_set_interrupts_enabled(TRUE); /* This can take a bit of time so pass interruptions */ + ml_set_interrupts_enabled(TRUE); /* This can take a bit of time so pass interruptions */ hw_atomic_sub((int *)&saveanchor.savetarget, 2); /* We don't need these extra saveareas anymore */ - if(CEntry->vmmFacCtx.FPUsave) { /* Is there any floating point context? */ - toss_live_fpu(&CEntry->vmmFacCtx); /* Get rid of any live context here */ + if(CEntry->vmmFacCtx.FPUsave) { /* Is there any floating point context? */ + toss_live_fpu(&CEntry->vmmFacCtx); /* Get rid of any live context here */ save_release((savearea *)CEntry->vmmFacCtx.FPUsave); /* Release it */ } - if(CEntry->vmmFacCtx.VMXsave) { /* Is there any vector context? */ - toss_live_vec(&CEntry->vmmFacCtx); /* Get rid of any live context here */ + if(CEntry->vmmFacCtx.VMXsave) { /* Is there any vector context? */ + toss_live_vec(&CEntry->vmmFacCtx); /* Get rid of any live context here */ save_release((savearea *)CEntry->vmmFacCtx.VMXsave); /* Release it */ } - CEntry->vmmPmap = 0; /* Remove this trace */ - if(act->mact.vmmControl->vmmAdsp[index - 1]) { /* Check if there is an address space assigned here */ - mapping_remove(act->mact.vmmControl->vmmAdsp[index - 1], 0xFFFFFFFFFFFFF000LL); /* Remove final page explicitly because we might have mapped it */ - pmap_remove(act->mact.vmmControl->vmmAdsp[index - 1], 0, 0xFFFFFFFFFFFFF000LL); /* Remove all entries from this map */ - pmap_destroy(act->mact.vmmControl->vmmAdsp[index - 1]); /* Toss the pmap for this context */ - act->mact.vmmControl->vmmAdsp[index - 1] = NULL; /* Clean it up */ + CEntry->vmmPmap = 0; /* Remove this trace */ + pmap_t gpmap = act->machine.vmmControl->vmmAdsp[index - 1]; + /* Get context's guest pmap (if any) */ + if (gpmap) { /* Check if there is an address space assigned here */ + if (gpmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist case specially */ + hw_rem_all_gv(gpmap); /* Remove all guest mappings from shadow hash table */ + } else { + mapping_remove(gpmap, 0xFFFFFFFFFFFFF000LL);/* Remove final page explicitly because we might have mapped it */ + pmap_remove(gpmap, 0, 0xFFFFFFFFFFFFF000LL);/* Remove all entries from this map */ + } + pmap_destroy(gpmap); /* Toss the pmap for this context */ + act->machine.vmmControl->vmmAdsp[index - 1] = NULL; /* Clean it up */ } (void) vm_map_unwire( /* Unwire the user comm page */ @@ -475,7 +713,7 @@ kern_return_t vmm_tear_down_context( kmem_free(kernel_map, (vm_offset_t)CEntry->vmmContextKern, PAGE_SIZE); /* Remove kernel's view of the comm page */ - CTable = act->mact.vmmControl; /* Get the control table address */ + CTable = act->machine.vmmControl; /* Get the control table address */ CTable->vmmGFlags = CTable->vmmGFlags & ~vmmLastAdSp; /* Make sure we don't try to automap into this */ CEntry->vmmFlags = 0; /* Clear out all of the flags for this entry including in use */ @@ -503,15 +741,23 @@ kern_return_t vmm_tear_down_context( */ for(cvi = 1; cvi <= kVmmMaxContexts; cvi++) { /* Look at all slots */ - if(!act->mact.vmmControl->vmmAdsp[index - 1]) continue; /* Nothing to remove here */ - mapping_remove(act->mact.vmmControl->vmmAdsp[index - 1], 0xFFFFFFFFFFFFF000LL); /* Remove final page explicitly because we might have mapped it */ - pmap_remove(act->mact.vmmControl->vmmAdsp[index - 1], 0, 0xFFFFFFFFFFFFF000LL); /* Remove all entries from this map */ - pmap_destroy(act->mact.vmmControl->vmmAdsp[index - 1]); /* Toss the pmap for this context */ - act->mact.vmmControl->vmmAdsp[index - 1] = 0; /* Clear just in case */ - } + if(!act->machine.vmmControl->vmmAdsp[index - 1]) continue; /* Nothing to remove here */ + mapping_remove(act->machine.vmmControl->vmmAdsp[index - 1], 0xFFFFFFFFFFFFF000LL); /* Remove final page explicitly because we might have mapped it */ + pmap_remove(act->machine.vmmControl->vmmAdsp[index - 1], 0, 0xFFFFFFFFFFFFF000LL); /* Remove all entries from this map */ + pmap_destroy(act->machine.vmmControl->vmmAdsp[index - 1]); /* Toss the pmap for this context */ + act->machine.vmmControl->vmmAdsp[index - 1] = 0; /* Clear just in case */ + } + + pmap_t pmap = act->map->pmap; /* Get our pmap */ + if (pmap->pmapVmmExt) { /* Release any VMM pmap extension block and shadow hash table */ + vmm_release_shadow_hash(pmap->pmapVmmExt); /* Release extension block and shadow hash table */ + pmap->pmapVmmExt = 0; /* Forget extension block */ + pmap->pmapVmmExtPhys = 0; /* Forget extension block's physical address, too */ + } + pmap->pmapFlags &= ~pmapVMhost; /* We're no longer hosting */ - kfree((vm_offset_t)CTable, sizeof(vmmCntrlTable)); /* Toss the table because to tossed the last context */ - act->mact.vmmControl = 0; /* Unmark us as vmm */ + kfree(CTable, sizeof(vmmCntrlTable)); /* Toss the table because to tossed the last context */ + act->machine.vmmControl = 0; /* Unmark us as vmm */ ml_set_interrupts_enabled(FALSE); /* No more interruptions */ @@ -520,9 +766,9 @@ kern_return_t vmm_tear_down_context( /*----------------------------------------------------------------------- -** vmm_set_XA +** vmm_activate_XA ** -** This function sets the eXtended Architecture flags for the specifed VM. +** This function activates the eXtended Architecture flags for the specifed VM. ** ** We need to return the result in the return code rather than in the return parameters ** because we need an architecture independent format so the results are actually @@ -542,56 +788,70 @@ kern_return_t vmm_tear_down_context( ** KERN_SUCCESS if vm is valid and initialized. KERN_FAILURE if not. ** Also, the internal flags are set and, additionally, the VM is completely reset. -----------------------------------------------------------------------*/ - -kern_return_t vmm_set_XA( - thread_act_t act, +kern_return_t vmm_activate_XA( + thread_t act, vmm_thread_index_t index, unsigned int xaflags) { vmmCntrlEntry *CEntry; - vmmCntrlTable *CTable; - vmm_state_page_t *vks; - vmm_version_t version; - - if(xaflags & ~vmm64Bit) return KERN_FAILURE; /* We only support this one kind now */ + kern_return_t result = KERN_SUCCESS; /* Assume success */ + if ((xaflags & ~kVmmSupportedSetXA) || ((xaflags & vmm64Bit) && (!getPerProc()->pf.Available & pf64Bit))) + return (KERN_FAILURE); /* Unknown or unsupported feature requested */ + CEntry = vmm_get_entry(act, index); /* Convert index to entry */ if (CEntry == NULL) return KERN_FAILURE; /* Either this isn't a vmm or the index is bogus */ ml_set_interrupts_enabled(TRUE); /* This can take a bit of time so pass interruptions */ - if(CEntry->vmmFacCtx.FPUsave) { /* Is there any floating point context? */ - toss_live_fpu(&CEntry->vmmFacCtx); /* Get rid of any live context here */ - save_release((savearea *)CEntry->vmmFacCtx.FPUsave); /* Release it */ - } + vmm_flush_context(act, index); /* Flush the context */ - if(CEntry->vmmFacCtx.VMXsave) { /* Is there any vector context? */ - toss_live_vec(&CEntry->vmmFacCtx); /* Get rid of any live context here */ - save_release((savearea *)CEntry->vmmFacCtx.VMXsave); /* Release it */ + if (xaflags & vmm64Bit) { /* Activating 64-bit mode ? */ + CEntry->vmmXAFlgs |= vmm64Bit; /* Activate 64-bit mode */ } - - CTable = act->mact.vmmControl; /* Get the control table address */ - CTable->vmmGFlags = CTable->vmmGFlags & ~vmmLastAdSp; /* Make sure we don't try to automap into this */ - CEntry->vmmFlags &= vmmInUse; /* Clear out all of the flags for this entry except in use */ - CEntry->vmmXAFlgs = (xaflags & vmm64Bit) | (CEntry->vmmXAFlgs & ~vmm64Bit); /* Set the XA flags */ - CEntry->vmmFacCtx.FPUsave = 0; /* Clear facility context control */ - CEntry->vmmFacCtx.FPUlevel = 0; /* Clear facility context control */ - CEntry->vmmFacCtx.FPUcpu = 0; /* Clear facility context control */ - CEntry->vmmFacCtx.VMXsave = 0; /* Clear facility context control */ - CEntry->vmmFacCtx.VMXlevel = 0; /* Clear facility context control */ - CEntry->vmmFacCtx.VMXcpu = 0; /* Clear facility context control */ + if (xaflags & vmmGSA) { /* Activating guest shadow assist ? */ + result = vmm_activate_gsa(act, index); /* Activate guest shadow assist */ + } - vks = CEntry->vmmContextKern; /* Get address of the context page */ - version = vks->interface_version; /* Save the version code */ - bzero((char *)vks, 4096); /* Clear all */ + ml_set_interrupts_enabled(FALSE); /* No more interruptions */ + + return result; /* Return activate result */ +} - vks->interface_version = version; /* Set our version code */ - vks->thread_index = index % vmmTInum; /* Tell the user the index for this virtual machine */ +/*----------------------------------------------------------------------- +** vmm_deactivate_XA +** +-----------------------------------------------------------------------*/ +kern_return_t vmm_deactivate_XA( + thread_t act, + vmm_thread_index_t index, + unsigned int xaflags) +{ + vmmCntrlEntry *CEntry; + kern_return_t result = KERN_SUCCESS; /* Assume success */ + + if ((xaflags & ~kVmmSupportedSetXA) || ((xaflags & vmm64Bit) && (getPerProc()->pf.Available & pf64Bit))) + return (KERN_FAILURE); /* Unknown or unsupported feature requested */ + + CEntry = vmm_get_entry(act, index); /* Convert index to entry */ + if (CEntry == NULL) return KERN_FAILURE; /* Either this isn't a vmm or the index is bogus */ + + ml_set_interrupts_enabled(TRUE); /* This can take a bit of time so pass interruptions */ + + vmm_flush_context(act, index); /* Flush the context */ + + if (xaflags & vmm64Bit) { /* Deactivating 64-bit mode ? */ + CEntry->vmmXAFlgs &= ~vmm64Bit; /* Deactivate 64-bit mode */ + } + + if (xaflags & vmmGSA) { /* Deactivating guest shadow assist ? */ + vmm_deactivate_gsa(act, index); /* Deactivate guest shadow assist */ + } ml_set_interrupts_enabled(FALSE); /* No more interruptions */ - return KERN_SUCCESS; /* Return the flags */ + return result; /* Return deactivate result */ } @@ -612,7 +872,7 @@ kern_return_t vmm_set_XA( ** Outputs: ** All vmm contexts released and VMM shut down -----------------------------------------------------------------------*/ -void vmm_tear_down_all(thread_act_t act) { +void vmm_tear_down_all(thread_t act) { vmmCntrlTable *CTable; int cvi; @@ -620,7 +880,7 @@ void vmm_tear_down_all(thread_act_t act) { savearea *save; spl_t s; - if(act->mact.specFlags & runningVM) { /* Are we actually in a context right now? */ + if(act->machine.specFlags & runningVM) { /* Are we actually in a context right now? */ save = find_user_regs(act); /* Find the user state context */ if(!save) { /* Did we find it? */ panic("vmm_tear_down_all: runningVM marked but no user state context\n"); @@ -633,7 +893,7 @@ void vmm_tear_down_all(thread_act_t act) { splx(s); /* Restore interrupts */ } - if(CTable = act->mact.vmmControl) { /* Do we have a vmm control block? */ + if(CTable = act->machine.vmmControl) { /* Do we have a vmm control block? */ for(cvi = 1; cvi <= kVmmMaxContexts; cvi++) { /* Look at all slots */ @@ -649,7 +909,7 @@ void vmm_tear_down_all(thread_act_t act) { /* * Note that all address apces should be gone here. */ - if(act->mact.vmmControl) { /* Did we find one? */ + if(act->machine.vmmControl) { /* Did we find one? */ panic("vmm_tear_down_all: control table did not get deallocated\n"); /* Table did not go away */ } } @@ -695,55 +955,111 @@ void vmm_tear_down_all(thread_act_t act) { -----------------------------------------------------------------------*/ kern_return_t vmm_map_page( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, addr64_t cva, addr64_t ava, vm_prot_t prot) { kern_return_t ret; - vmmCntrlEntry *CEntry; - register mapping *mp; - struct phys_entry *pp; + register mapping_t *mp; vm_map_t map; addr64_t ova, nextva; pmap_t pmap; - pmap = vmm_get_adsp(act, index); /* Get the pmap for this address space */ + pmap = vmm_get_adsp(act, index); /* Get the guest pmap for this address space */ if(!pmap) return KERN_FAILURE; /* Bogus address space, no VMs, or we can't make a pmap, failure... */ if(ava > vm_max_address) return kVmmInvalidAddress; /* Does the machine support an address of this size? */ - map = current_act()->map; /* Get the current map */ - - while(1) { /* Keep trying until we get it or until we fail */ + map = current_thread()->map; /* Get the host's map */ - mp = mapping_find(map->pmap, cva, &nextva, 0); /* Find the mapping for this address */ + if (pmap->pmapFlags & pmapVMgsaa) { /* Guest shadow assist active ? */ + ret = hw_res_map_gv(map->pmap, pmap, cva, ava, getProtPPC(prot)); + /* Attempt to resume an existing gv->phys mapping */ + if (mapRtOK != ret) { /* Nothing to resume, construct a new mapping */ + + while (1) { /* Find host mapping or fail */ + mp = mapping_find(map->pmap, cva, &nextva, 0); + /* Attempt to find host mapping and pin it */ + if (mp) break; /* Got it */ + + ml_set_interrupts_enabled(TRUE); + /* Open 'rupt window */ + ret = vm_fault(map, /* Didn't find it, try to fault in host page read/write */ + vm_map_trunc_page(cva), + VM_PROT_READ | VM_PROT_WRITE, + FALSE, /* change wiring */ + THREAD_UNINT, + NULL, + 0); + ml_set_interrupts_enabled(FALSE); + /* Close 'rupt window */ + if (ret != KERN_SUCCESS) + return KERN_FAILURE; /* Fault failed, return failure */ + } + + if (mpNormal != (mp->mpFlags & mpType)) { + /* Host mapping must be a vanilla page */ + mapping_drop_busy(mp); /* Un-pin host mapping */ + return KERN_FAILURE; /* Return failure */ + } + + /* Partially construct gv->phys mapping */ + unsigned int pindex; + phys_entry_t *physent = mapping_phys_lookup(mp->mpPAddr, &pindex); + if (!physent) { + mapping_drop_busy(mp); + return KERN_FAILURE; + } + unsigned int pattr = ((physent->ppLink & (ppI | ppG)) >> 60); + unsigned int wimg = 0x2; + if (pattr & mmFlgCInhib) wimg |= 0x4; + if (pattr & mmFlgGuarded) wimg |= 0x1; + unsigned int mflags = (pindex << 16) | mpGuest; + addr64_t gva = ((ava & ~mpHWFlags) | (wimg << 3) | getProtPPC(prot)); + + hw_add_map_gv(map->pmap, pmap, gva, mflags, mp->mpPAddr); + /* Construct new guest->phys mapping */ + + mapping_drop_busy(mp); /* Un-pin host mapping */ + } + } else { + while(1) { /* Keep trying until we get it or until we fail */ + + mp = mapping_find(map->pmap, cva, &nextva, 0); /* Find the mapping for this address */ + + if(mp) break; /* We found it */ + + ml_set_interrupts_enabled(TRUE); /* Enable interruptions */ + ret = vm_fault(map, /* Didn't find it, try to fault it in read/write... */ + vm_map_trunc_page(cva), + VM_PROT_READ | VM_PROT_WRITE, + FALSE, /*change wiring */ + THREAD_UNINT, + NULL, + 0); + ml_set_interrupts_enabled(FALSE); /* Disable interruptions */ + if (ret != KERN_SUCCESS) return KERN_FAILURE; /* There isn't a page there, return... */ + } + + if((mp->mpFlags & mpType) != mpNormal) { /* If this is a block, a nest, or some other special thing, we can't map it */ + mapping_drop_busy(mp); /* We have everything we need from the mapping */ + return KERN_FAILURE; /* Leave in shame */ + } - if(mp) break; /* We found it */ - - ml_set_interrupts_enabled(TRUE); /* Enable interruptions */ - ret = vm_fault(map, trunc_page_32((vm_offset_t)cva), VM_PROT_READ | VM_PROT_WRITE, FALSE); /* Didn't find it, try to fault it in read/write... */ - ml_set_interrupts_enabled(FALSE); /* Disable interruptions */ - if (ret != KERN_SUCCESS) return KERN_FAILURE; /* There isn't a page there, return... */ - } - - if(mp->mpFlags & (mpBlock | mpNest | mpSpecial)) { /* If this is a block, a nest, or some other special thing, we can't map it */ - mapping_drop_busy(mp); /* We have everything we need from the mapping */ - return KERN_FAILURE; /* Leave in shame */ - } + while(1) { /* Keep trying the enter until it goes in */ + ova = mapping_make(pmap, ava, mp->mpPAddr, 0, 1, prot); /* Enter the mapping into the pmap */ + if(!ova) break; /* If there were no collisions, we are done... */ + mapping_remove(pmap, ova); /* Remove the mapping that collided */ + } - while(1) { /* Keep trying the enter until it goes in */ - ova = mapping_make(pmap, ava, mp->mpPAddr, 0, 1, prot); /* Enter the mapping into the pmap */ - if(!ova) break; /* If there were no collisions, we are done... */ - mapping_remove(pmap, ova); /* Remove the mapping that collided */ + mapping_drop_busy(mp); /* We have everything we need from the mapping */ } - mapping_drop_busy(mp); /* We have everything we need from the mapping */ - - if (!((per_proc_info[cpu_number()].spcFlags) & FamVMmode)) { - act->mact.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ - act->mact.vmmControl->vmmGFlags = (act->mact.vmmControl->vmmGFlags & ~vmmLastAdSp) | index; /* Remember last address space */ + if (!((getPerProc()->spcFlags) & FamVMmode)) { + act->machine.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ + act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | index; /* Remember last address space */ } return KERN_SUCCESS; @@ -771,7 +1087,7 @@ kern_return_t vmm_map_page( -----------------------------------------------------------------------*/ vmm_return_code_t vmm_map_execute( - thread_act_t act, + thread_t act, vmm_thread_index_t index, addr64_t cva, addr64_t ava, @@ -787,7 +1103,7 @@ vmm_return_code_t vmm_map_execute( CEntry = vmm_get_entry(act, cndx); /* Get and validate the index */ if (CEntry == NULL) return kVmmBogusContext; /* Return bogus context */ - if (((per_proc_info[cpu_number()].spcFlags) & FamVMmode) && (CEntry != act->mact.vmmCEntry)) + if (((getPerProc()->spcFlags) & FamVMmode) && (CEntry != act->machine.vmmCEntry)) return kVmmBogusContext; /* Yes, invalid index in Fam */ adsp = (index >> 8) & 0xFF; /* Get any requested address space */ @@ -797,8 +1113,8 @@ vmm_return_code_t vmm_map_execute( if(ret == KERN_SUCCESS) { - act->mact.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ - act->mact.vmmControl->vmmGFlags = (act->mact.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx; /* Remember last address space */ + act->machine.vmmControl->vmmLastMap = ava & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ + act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx; /* Remember last address space */ vmm_execute_vm(act, cndx); /* Return was ok, launch the VM */ } @@ -826,7 +1142,7 @@ vmm_return_code_t vmm_map_execute( -----------------------------------------------------------------------*/ kern_return_t vmm_map_list( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, unsigned int cnt, unsigned int flavor) @@ -875,60 +1191,65 @@ kern_return_t vmm_map_list( /*----------------------------------------------------------------------- ** vmm_get_page_mapping ** -** This function determines whether the specified VMM -** virtual address is mapped. +** Given a context index and a guest virtual address, convert the address +** to its corresponding host virtual address. ** ** Inputs: ** act - pointer to current thread activation -** index - index of vmm state for this page -** va - virtual address within the alternate's address -** space +** index - context index +** gva - guest virtual address ** ** Outputs: -** Non-alternate's virtual address (page aligned) or -1 if not mapped or any failure +** Host virtual address (page aligned) or -1 if not mapped or any failure ** ** Note: -** If there are aliases to the page in the non-alternate address space, -** this call could return the wrong one. Moral of the story: no aliases. +** If the host address space contains multiple virtual addresses mapping +** to the physical address corresponding to the specified guest virtual +** address (i.e., host virtual aliases), it is unpredictable which host +** virtual address (alias) will be returned. Moral of the story: No host +** virtual aliases. -----------------------------------------------------------------------*/ addr64_t vmm_get_page_mapping( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, - addr64_t va) + addr64_t gva) { - vmmCntrlEntry *CEntry; - register mapping *mp; + register mapping_t *mp; pmap_t pmap; - addr64_t nextva, sva; + addr64_t nextva, hva; ppnum_t pa; - pmap = vmm_get_adsp(act, index); /* Get and validate the index */ - if (!pmap)return -1; /* No good, failure... */ - - mp = mapping_find(pmap, va, &nextva, 0); /* Find our page */ + pmap = vmm_get_adsp(act, index); /* Get and validate the index */ + if (!pmap)return -1; /* No good, failure... */ - if(!mp) return -1; /* Not mapped, return -1 */ + if (pmap->pmapFlags & pmapVMgsaa) { /* Guest shadow assist (GSA) active ? */ + return (hw_gva_to_hva(pmap, gva)); /* Convert guest to host virtual address */ + } else { + mp = mapping_find(pmap, gva, &nextva, 0); /* Find guest mapping for this virtual address */ + + if(!mp) return -1; /* Not mapped, return -1 */ - pa = mp->mpPAddr; /* Remember the page address */ + pa = mp->mpPAddr; /* Remember the physical page address */ - mapping_drop_busy(mp); /* Go ahead and relase the mapping now */ + mapping_drop_busy(mp); /* Go ahead and relase the mapping now */ - pmap = current_act()->map->pmap; /* Get the current pmap */ - sva = mapping_p2v(pmap, pa); /* Now find the source virtual */ + pmap = current_thread()->map->pmap; /* Get the host pmap */ + hva = mapping_p2v(pmap, pa); /* Now find the source virtual */ - if(sva != 0) return sva; /* We found it... */ + if(hva != 0) return hva; /* We found it... */ - panic("vmm_get_page_mapping: could not back-map alternate va (%016llX)\n", va); /* We are bad wrong if we can't find it */ + panic("vmm_get_page_mapping: could not back-map guest va (%016llX)\n", gva); + /* We are bad wrong if we can't find it */ - return -1; + return -1; /* Never executed, prevents compiler warning */ + } } /*----------------------------------------------------------------------- ** vmm_unmap_page ** -** This function unmaps a page from the alternate's logical -** address space. +** This function unmaps a page from the guest address space. ** ** Inputs: ** act - pointer to current thread activation @@ -941,21 +1262,25 @@ addr64_t vmm_get_page_mapping( -----------------------------------------------------------------------*/ kern_return_t vmm_unmap_page( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, addr64_t va) { vmmCntrlEntry *CEntry; addr64_t nadd; pmap_t pmap; - kern_return_t kern_result = KERN_SUCCESS; pmap = vmm_get_adsp(act, index); /* Get and validate the index */ if (!pmap)return -1; /* No good, failure... */ - nadd = mapping_remove(pmap, va); /* Toss the mapping */ - - return ((nadd & 1) ? KERN_FAILURE : KERN_SUCCESS); /* Return... */ + if (pmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist specially */ + hw_susp_map_gv(act->map->pmap, pmap, va); /* Suspend the mapping */ + return (KERN_SUCCESS); /* Always returns success */ + } else { + nadd = mapping_remove(pmap, va); /* Toss the mapping */ + + return ((nadd & 1) ? KERN_FAILURE : KERN_SUCCESS); /* Return... */ + } } /*----------------------------------------------------------------------- @@ -977,7 +1302,7 @@ kern_return_t vmm_unmap_page( -----------------------------------------------------------------------*/ kern_return_t vmm_unmap_list( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, unsigned int cnt, unsigned int flavor) @@ -1010,11 +1335,16 @@ kern_return_t vmm_unmap_list( adsp = (gva & vmmlAdID) >> 4; /* Extract an explicit address space request */ if(!adsp) adsp = index - 1; /* If no explicit, use supplied default */ - pmap = act->mact.vmmControl->vmmAdsp[adsp]; /* Get the pmap for this request */ + pmap = act->machine.vmmControl->vmmAdsp[adsp]; /* Get the pmap for this request */ if(!pmap) continue; /* Ain't nuthin' mapped here, no durn map... */ - gva = gva &= 0xFFFFFFFFFFFFF000ULL; /* Clean up the address */ - (void)mapping_remove(pmap, gva); /* Toss the mapping */ + gva = gva &= 0xFFFFFFFFFFFFF000ULL; /* Clean up the address */ + if (pmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist specially */ + hw_susp_map_gv(act->map->pmap, pmap, gva); + /* Suspend the mapping */ + } else { + (void)mapping_remove(pmap, gva); /* Toss the mapping */ + } } return KERN_SUCCESS ; /* Return... */ @@ -1038,7 +1368,7 @@ kern_return_t vmm_unmap_list( -----------------------------------------------------------------------*/ void vmm_unmap_all_pages( - thread_act_t act, + thread_t act, vmm_adsp_id_t index) { vmmCntrlEntry *CEntry; @@ -1046,12 +1376,16 @@ void vmm_unmap_all_pages( pmap = vmm_get_adsp(act, index); /* Convert index to entry */ if (!pmap) return; /* Either this isn't vmm thread or the index is bogus */ - -/* - * Note: the pmap code won't deal with the last page in the address space, so handle it explicitly - */ - mapping_remove(pmap, 0xFFFFFFFFFFFFF000LL); /* Remove final page explicitly because we might have mapped it */ - pmap_remove(pmap, 0, 0xFFFFFFFFFFFFF000LL); /* Remove all entries from this map */ + + if (pmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist specially */ + hw_rem_all_gv(pmap); /* Remove all guest's mappings from shadow hash table */ + } else { + /* + * Note: the pmap code won't deal with the last page in the address space, so handle it explicitly + */ + mapping_remove(pmap, 0xFFFFFFFFFFFFF000LL); /* Remove final page explicitly because we might have mapped it */ + pmap_remove(pmap, 0, 0xFFFFFFFFFFFFF000LL); /* Remove all entries from this map */ + } return; } @@ -1078,20 +1412,24 @@ void vmm_unmap_all_pages( -----------------------------------------------------------------------*/ boolean_t vmm_get_page_dirty_flag( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, addr64_t va, unsigned int reset) { vmmCntrlEntry *CEntry; - register mapping *mpv, *mp; + register mapping_t *mpv, *mp; unsigned int RC; pmap_t pmap; pmap = vmm_get_adsp(act, index); /* Convert index to entry */ if (!pmap) return 1; /* Either this isn't vmm thread or the index is bogus */ - - RC = hw_test_rc(pmap, (addr64_t)va, reset); /* Fetch the RC bits and clear if requested */ + + if (pmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist specially */ + RC = hw_test_rc_gv(act->map->pmap, pmap, va, reset);/* Fetch the RC bits and clear if requested */ + } else { + RC = hw_test_rc(pmap, (addr64_t)va, reset); /* Fetch the RC bits and clear if requested */ + } switch (RC & mapRetCode) { /* Decode return code */ @@ -1131,7 +1469,7 @@ boolean_t vmm_get_page_dirty_flag( -----------------------------------------------------------------------*/ kern_return_t vmm_protect_page( - thread_act_t act, + thread_t act, vmm_adsp_id_t index, addr64_t va, vm_prot_t prot) @@ -1144,7 +1482,11 @@ kern_return_t vmm_protect_page( pmap = vmm_get_adsp(act, index); /* Convert index to entry */ if (!pmap) return KERN_FAILURE; /* Either this isn't vmm thread or the index is bogus */ - ret = hw_protect(pmap, va, prot, &nextva); /* Try to change the protect here */ + if (pmap->pmapFlags & pmapVMgsaa) { /* Handle guest shadow assist specially */ + ret = hw_protect_gv(pmap, va, prot); /* Try to change protection, GSA varient */ + } else { + ret = hw_protect(pmap, va, prot, &nextva); /* Try to change protection */ + } switch (ret) { /* Decode return code */ @@ -1160,9 +1502,9 @@ kern_return_t vmm_protect_page( } - if (!((per_proc_info[cpu_number()].spcFlags) & FamVMmode)) { - act->mact.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ - act->mact.vmmControl->vmmGFlags = (act->mact.vmmControl->vmmGFlags & ~vmmLastAdSp) | index; /* Remember last address space */ + if (!((getPerProc()->spcFlags) & FamVMmode)) { + act->machine.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ + act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | index; /* Remember last address space */ } return KERN_SUCCESS; /* Return */ @@ -1187,7 +1529,7 @@ kern_return_t vmm_protect_page( -----------------------------------------------------------------------*/ vmm_return_code_t vmm_protect_execute( - thread_act_t act, + thread_t act, vmm_thread_index_t index, addr64_t va, vm_prot_t prot) @@ -1204,14 +1546,14 @@ vmm_return_code_t vmm_protect_execute( adsp = (index >> 8) & 0xFF; /* Get any requested address space */ if(!adsp) adsp = (index & 0xFF); /* If 0, use context ID as address space ID */ - if (((per_proc_info[cpu_number()].spcFlags) & FamVMmode) && (CEntry != act->mact.vmmCEntry)) + if (((getPerProc()->spcFlags) & FamVMmode) && (CEntry != act->machine.vmmCEntry)) return kVmmBogusContext; /* Yes, invalid index in Fam */ ret = vmm_protect_page(act, adsp, va, prot); /* Go try to change access */ if(ret == KERN_SUCCESS) { - act->mact.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ - act->mact.vmmControl->vmmGFlags = (act->mact.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx; /* Remember last address space */ + act->machine.vmmControl->vmmLastMap = va & 0xFFFFFFFFFFFFF000ULL; /* Remember the last mapping we made */ + act->machine.vmmControl->vmmGFlags = (act->machine.vmmControl->vmmGFlags & ~vmmLastAdSp) | cndx; /* Remember last address space */ vmm_execute_vm(act, cndx); /* Return was ok, launch the VM */ } @@ -1236,7 +1578,7 @@ vmm_return_code_t vmm_protect_execute( -----------------------------------------------------------------------*/ kern_return_t vmm_get_float_state( - thread_act_t act, + thread_t act, vmm_thread_index_t index) { vmmCntrlEntry *CEntry; @@ -1247,7 +1589,7 @@ kern_return_t vmm_get_float_state( CEntry = vmm_get_entry(act, index); /* Convert index to entry */ if (CEntry == NULL) return KERN_FAILURE; /* Either this isn't vmm thread or the index is bogus */ - act->mact.specFlags &= ~floatCng; /* Clear the special flag */ + act->machine.specFlags &= ~floatCng; /* Clear the special flag */ CEntry->vmmContextKern->vmmStat &= ~vmmFloatCngd; /* Clear the change indication */ fpu_save(&CEntry->vmmFacCtx); /* Save context if live */ @@ -1281,7 +1623,7 @@ kern_return_t vmm_get_float_state( -----------------------------------------------------------------------*/ kern_return_t vmm_get_vector_state( - thread_act_t act, + thread_t act, vmm_thread_index_t index) { vmmCntrlEntry *CEntry; @@ -1295,7 +1637,7 @@ kern_return_t vmm_get_vector_state( vec_save(&CEntry->vmmFacCtx); /* Save context if live */ - act->mact.specFlags &= ~vectorCng; /* Clear the special flag */ + act->machine.specFlags &= ~vectorCng; /* Clear the special flag */ CEntry->vmmContextKern->vmmStat &= ~vmmVectCngd; /* Clear the change indication */ if(sv = CEntry->vmmFacCtx.VMXsave) { /* Is there context yet? */ @@ -1354,7 +1696,7 @@ kern_return_t vmm_get_vector_state( -----------------------------------------------------------------------*/ kern_return_t vmm_set_timer( - thread_act_t act, + thread_t act, vmm_thread_index_t index, unsigned int timerhi, unsigned int timerlo) @@ -1392,7 +1734,7 @@ kern_return_t vmm_set_timer( -----------------------------------------------------------------------*/ kern_return_t vmm_get_timer( - thread_act_t act, + thread_t act, vmm_thread_index_t index) { vmmCntrlEntry *CEntry; @@ -1431,7 +1773,7 @@ kern_return_t vmm_get_timer( -----------------------------------------------------------------------*/ void vmm_timer_pop( - thread_act_t act) + thread_t act) { vmmCntrlEntry *CEntry; vmmCntrlTable *CTable; @@ -1439,7 +1781,7 @@ void vmm_timer_pop( uint64_t now, soonest; savearea *sv; - if(!((unsigned int)act->mact.vmmControl & 0xFFFFFFFE)) { /* Are there any virtual machines? */ + if(!((unsigned int)act->machine.vmmControl & 0xFFFFFFFE)) { /* Are there any virtual machines? */ panic("vmm_timer_pop: No virtual machines defined; act = %08X\n", act); } @@ -1447,7 +1789,7 @@ void vmm_timer_pop( clock_get_uptime(&now); /* What time is it? */ - CTable = act->mact.vmmControl; /* Make this easier */ + CTable = act->machine.vmmControl; /* Make this easier */ any = 0; /* Haven't found a running unexpired timer yet */ for(cvi = 0; cvi < kVmmMaxContexts; cvi++) { /* Cycle through all and check time now */ @@ -1463,7 +1805,7 @@ void vmm_timer_pop( if (CTable->vmmc[cvi].vmmTimer <= now) { CTable->vmmc[cvi].vmmFlags |= vmmTimerPop; /* Set timer popped here */ CTable->vmmc[cvi].vmmContextKern->vmmStat |= vmmTimerPop; /* Set timer popped here */ - if((unsigned int)&CTable->vmmc[cvi] == (unsigned int)act->mact.vmmCEntry) { /* Is this the running VM? */ + if((unsigned int)&CTable->vmmc[cvi] == (unsigned int)act->machine.vmmCEntry) { /* Is this the running VM? */ sv = find_user_regs(act); /* Get the user state registers */ if(!sv) { /* Did we find something? */ panic("vmm_timer_pop: no user context; act = %08X\n", act); @@ -1485,8 +1827,8 @@ void vmm_timer_pop( } if(any) { - if (act->mact.qactTimer == 0 || soonest <= act->mact.qactTimer) - act->mact.qactTimer = soonest; /* Set lowest timer */ + if (act->machine.qactTimer == 0 || soonest <= act->machine.qactTimer) + act->machine.qactTimer = soonest; /* Set lowest timer */ } return; @@ -1519,11 +1861,11 @@ void vmm_timer_pop( int vmm_stop_vm(struct savearea *save) { - thread_act_t act; + thread_t act; vmmCntrlTable *CTable; int cvi, i; task_t task; - thread_act_t fact; + thread_t fact; unsigned int vmmask; ReturnHandler *stopapc; @@ -1533,15 +1875,15 @@ int vmm_stop_vm(struct savearea *save) task_lock(task); /* Lock our task */ - fact = (thread_act_t)task->threads.next; /* Get the first activation on task */ + fact = (thread_t)task->threads.next; /* Get the first activation on task */ act = 0; /* Pretend we didn't find it yet */ for(i = 0; i < task->thread_count; i++) { /* All of the activations */ - if(fact->mact.vmmControl) { /* Is this a virtual machine monitor? */ + if(fact->machine.vmmControl) { /* Is this a virtual machine monitor? */ act = fact; /* Yeah... */ break; /* Bail the loop... */ } - fact = (thread_act_t)fact->task_threads.next; /* Go to the next one */ + fact = (thread_t)fact->task_threads.next; /* Go to the next one */ } if(!((unsigned int)act)) { /* See if we have VMMs yet */ @@ -1550,19 +1892,24 @@ int vmm_stop_vm(struct savearea *save) return 0; /* Go generate a syscall exception */ } - act_lock_thread(act); /* Make sure this stays 'round */ + thread_reference(act); + task_unlock(task); /* Safe to release now */ - CTable = act->mact.vmmControl; /* Get the pointer to the table */ + thread_mtx_lock(act); + + CTable = act->machine.vmmControl; /* Get the pointer to the table */ if(!((unsigned int)CTable & -2)) { /* Are there any all the way up yet? */ - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ return 0; /* Go generate a syscall exception */ } if(!(vmmask = save->save_r3)) { /* Get the stop mask and check if all zeros */ - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = KERN_SUCCESS; /* Set success */ return 1; /* Return... */ @@ -1575,16 +1922,18 @@ int vmm_stop_vm(struct savearea *save) vmmask = vmmask << 1; /* Slide mask over */ } - if(hw_compare_and_store(0, 1, &act->mact.emPendRupts)) { /* See if there is already a stop pending and lock out others if not */ - act_unlock_thread(act); /* Already one pending, unlock the activation */ + if(hw_compare_and_store(0, 1, &act->machine.emPendRupts)) { /* See if there is already a stop pending and lock out others if not */ + thread_mtx_unlock(act); /* Already one pending, unlock the activation */ + thread_deallocate(act); ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = KERN_SUCCESS; /* Say we did it... */ return 1; /* Leave */ } if(!(stopapc = (ReturnHandler *)kalloc(sizeof(ReturnHandler)))) { /* Get a return handler control block */ - act->mact.emPendRupts = 0; /* No memory, say we have given up request */ - act_unlock_thread(act); /* Unlock the activation */ + act->machine.emPendRupts = 0; /* No memory, say we have given up request */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = KERN_RESOURCE_SHORTAGE; /* No storage... */ return 1; /* Return... */ @@ -1600,7 +1949,8 @@ int vmm_stop_vm(struct savearea *save) act_set_apc(act); /* Set an APC AST */ ml_set_interrupts_enabled(TRUE); /* Enable interruptions now */ - act_unlock_thread(act); /* Unlock the activation */ + thread_mtx_unlock(act); /* Unlock the activation */ + thread_deallocate(act); ml_set_interrupts_enabled(FALSE); /* Set back interruptions */ save->save_r3 = KERN_SUCCESS; /* Hip, hip, horay... */ @@ -1617,13 +1967,13 @@ int vmm_stop_vm(struct savearea *save) ** ** Inputs: ** ReturnHandler *rh - the return handler control block as required by the APC. -** thread_act_t act - the activation +** thread_t act - the activation ** ** Outputs: ** Whatever needed to be done is done. -----------------------------------------------------------------------*/ -void vmm_interrupt(ReturnHandler *rh, thread_act_t act) { +void vmm_interrupt(ReturnHandler *rh, thread_t act) { vmmCntrlTable *CTable; savearea *sv; @@ -1631,16 +1981,16 @@ void vmm_interrupt(ReturnHandler *rh, thread_act_t act) { - kfree((vm_offset_t)rh, sizeof(ReturnHandler)); /* Release the return handler block */ + kfree(rh, sizeof(ReturnHandler)); /* Release the return handler block */ inter = ml_set_interrupts_enabled(FALSE); /* Disable interruptions for now */ - act->mact.emPendRupts = 0; /* Say that there are no more interrupts pending */ - CTable = act->mact.vmmControl; /* Get the pointer to the table */ + act->machine.emPendRupts = 0; /* Say that there are no more interrupts pending */ + CTable = act->machine.vmmControl; /* Get the pointer to the table */ if(!((unsigned int)CTable & -2)) return; /* Leave if we aren't doing VMs any more... */ - if(act->mact.vmmCEntry && (act->mact.vmmCEntry->vmmFlags & vmmXStop)) { /* Do we need to stop the running guy? */ + if(act->machine.vmmCEntry && (act->machine.vmmCEntry->vmmFlags & vmmXStop)) { /* Do we need to stop the running guy? */ sv = find_user_regs(act); /* Get the user state registers */ if(!sv) { /* Did we find something? */ panic("vmm_interrupt: no user context; act = %08X\n", act); diff --git a/osfmk/ppc/vmachmon.h b/osfmk/ppc/vmachmon.h index 8dc25ff0c..b2801eb9b 100644 --- a/osfmk/ppc/vmachmon.h +++ b/osfmk/ppc/vmachmon.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -119,7 +119,7 @@ typedef unsigned long vmm_adsp_id_t; enum { kVmmCurMajorVersion = 0x0001, - kVmmCurMinorVersion = 0x0006, + kVmmCurMinorVersion = 0x0007, kVmmMinMajorVersion = 0x0001, }; #define kVmmCurrentVersion ((kVmmCurMajorVersion << 16) | kVmmCurMinorVersion) @@ -134,14 +134,24 @@ enum { kVmmFeature_XA = 0x00000020, kVmmFeature_SixtyFourBit = 0x00000040, kVmmFeature_MultAddrSpace = 0x00000080, + kVmmFeature_GuestShadowAssist = 0x00000100, /* Guest->physical shadow hash table */ + kVmmFeature_GlobalMappingAssist = 0x00000200, /* Global shadow mapping support */ + kVmmFeature_HostShadowAssist = 0x00000400, /* Linear shadow mapping of an area of + host virtual as guest physical */ + kVmmFeature_MultAddrSpaceAssist = 0x00000800, /* Expanded pool of guest virtual + address spaces */ }; #define kVmmCurrentFeatures (kVmmFeature_LittleEndian | kVmmFeature_Stop | kVmmFeature_ExtendedMapping \ - | kVmmFeature_ListMapping | kVmmFeature_FastAssist | kVmmFeature_XA | kVmmFeature_MultAddrSpace) + | kVmmFeature_ListMapping | kVmmFeature_FastAssist | kVmmFeature_XA \ + | kVmmFeature_GuestShadowAssist) enum { - vmm64Bit = 0x80000000, + vmm64Bit = 0x80000000, /* Make guest 64-bit */ + vmmGSA = 0x40000000, /* Enable guest shadow assist (GSA) */ + vmmGMA = 0x20000000, /* Enable global shadow mapping assist (GMA) */ }; +#define kVmmSupportedSetXA (vmm64Bit | vmmGSA | vmmGMA) typedef unsigned long vmm_version_t; @@ -268,12 +278,13 @@ enum { kVmmProtectExecute, /* Set prot attributes and launch */ kVmmMapList, /* Map a list of pages into guest address spaces */ kVmmUnmapList, /* Unmap a list of pages from guest address spaces */ - kvmmExitToHost, - kvmmResumeGuest, - kvmmGetGuestRegister, - kvmmSetGuestRegister, + kvmmExitToHost, /* Exit from FAM to host -- fast-path syscall */ + kvmmResumeGuest, /* Resume guest from FAM -- fast-path syscall */ + kvmmGetGuestRegister, /* Get guest register from FAM -- fast-path syscall */ + kvmmSetGuestRegister, /* Set guest register from FAM -- fast-path syscall */ - kVmmSetXA, /* Set extended architecture features for a VM */ + kVmmActivateXA, /* Activate extended architecture features for a VM */ + kVmmDeactivateXA, /* Deactivate extended architecture features for a VM */ kVmmGetXA, /* Get extended architecture features from a VM */ kVmmMapPage64, /* Map a host to guest address space - supports 64-bit */ @@ -286,6 +297,9 @@ enum { kVmmMapList64, /* Map a list of pages into guest address spaces - supports 64-bit */ kVmmUnmapList64, /* Unmap a list of pages from guest address spaces - supports 64-bit */ kVmmMaxAddr, /* Returns the maximum virtual address that is mappable */ + + kVmmSetGuestMemory, /* Sets base and extent of guest physical memory in host address space */ + kVmmPurgeLocal, /* Purges all non-global mappings for a given guest address space */ }; #define kVmmReturnNull 0 @@ -381,7 +395,8 @@ typedef struct vmmUMList64 { #define vmmlFlgs 0x00000FFF /* Flags passed in in vmlava low order 12 bits */ #define vmmlProt 0x00000007 /* Protection flags for the page */ #define vmmlAdID 0x000003F0 /* Guest address space ID - used only if non-zero */ -#define vmmlRsvd 0x00000C08 /* Reserved for future */ +#define vmmlGlob 0x00000400 /* Mapping is global */ +#define vmmlRsvd 0x00000800 /* Reserved for future */ /************************************************************************************* Internal Emulation Types @@ -410,7 +425,7 @@ typedef struct vmmCntrlEntry { /* Virtual Machine Monitor control table ent #define vmmSpfSaveb 24 unsigned int vmmXAFlgs; /* Extended Architecture flags */ vmm_state_page_t *vmmContextKern; /* Kernel address of context communications area */ - ppnum_t vmmContextPhys; /* Physical address of context communications area */ + ppnum_t vmmContextPhys; /* Physical address of context communications area */ vmm_state_page_t *vmmContextUser; /* User address of context communications area */ facility_context vmmFacCtx; /* Header for vector and floating point contexts */ pmap_t vmmPmap; /* Last dispatched pmap */ @@ -430,47 +445,48 @@ typedef struct vmmCntrlTable { /* Virtual Machine Monitor Control table */ #pragma pack() /* function decls for kernel level routines... */ -extern void vmm_execute_vm(thread_act_t act, vmm_thread_index_t index); -extern vmmCntrlEntry *vmm_get_entry(thread_act_t act, vmm_thread_index_t index); -extern kern_return_t vmm_tear_down_context(thread_act_t act, vmm_thread_index_t index); -extern kern_return_t vmm_get_float_state(thread_act_t act, vmm_thread_index_t index); -extern kern_return_t vmm_get_vector_state(thread_act_t act, vmm_thread_index_t index); -extern kern_return_t vmm_set_timer(thread_act_t act, vmm_thread_index_t index, unsigned int timerhi, unsigned int timerlo); -extern kern_return_t vmm_get_timer(thread_act_t act, vmm_thread_index_t index); -extern void vmm_tear_down_all(thread_act_t act); -extern kern_return_t vmm_map_page(thread_act_t act, vmm_thread_index_t hindex, addr64_t cva, +extern void vmm_execute_vm(thread_t act, vmm_thread_index_t index); +extern kern_return_t vmm_tear_down_context(thread_t act, vmm_thread_index_t index); +extern kern_return_t vmm_get_float_state(thread_t act, vmm_thread_index_t index); +extern kern_return_t vmm_get_vector_state(thread_t act, vmm_thread_index_t index); +extern kern_return_t vmm_set_timer(thread_t act, vmm_thread_index_t index, unsigned int timerhi, unsigned int timerlo); +extern kern_return_t vmm_get_timer(thread_t act, vmm_thread_index_t index); +extern void vmm_tear_down_all(thread_t act); +extern kern_return_t vmm_map_page(thread_t act, vmm_thread_index_t hindex, addr64_t cva, addr64_t ava, vm_prot_t prot); -extern vmm_return_code_t vmm_map_execute(thread_act_t act, vmm_thread_index_t hindex, addr64_t cva, +extern vmm_return_code_t vmm_map_execute(thread_t act, vmm_thread_index_t hindex, addr64_t cva, addr64_t ava, vm_prot_t prot); -extern kern_return_t vmm_protect_page(thread_act_t act, vmm_thread_index_t hindex, addr64_t va, +extern kern_return_t vmm_protect_page(thread_t act, vmm_thread_index_t hindex, addr64_t va, vm_prot_t prot); -extern vmm_return_code_t vmm_protect_execute(thread_act_t act, vmm_thread_index_t hindex, addr64_t va, +extern vmm_return_code_t vmm_protect_execute(thread_t act, vmm_thread_index_t hindex, addr64_t va, vm_prot_t prot); -extern addr64_t vmm_get_page_mapping(thread_act_t act, vmm_thread_index_t index, +extern addr64_t vmm_get_page_mapping(thread_t act, vmm_thread_index_t index, addr64_t va); -extern kern_return_t vmm_unmap_page(thread_act_t act, vmm_thread_index_t index, addr64_t va); -extern void vmm_unmap_all_pages(thread_act_t act, vmm_thread_index_t index); -extern boolean_t vmm_get_page_dirty_flag(thread_act_t act, vmm_thread_index_t index, +extern kern_return_t vmm_unmap_page(thread_t act, vmm_thread_index_t index, addr64_t va); +extern void vmm_unmap_all_pages(thread_t act, vmm_thread_index_t index); +extern boolean_t vmm_get_page_dirty_flag(thread_t act, vmm_thread_index_t index, addr64_t va, unsigned int reset); -extern kern_return_t vmm_set_XA(thread_act_t act, vmm_thread_index_t index, unsigned int xaflags); -extern unsigned int vmm_get_XA(thread_act_t act, vmm_thread_index_t index); +extern kern_return_t vmm_activate_XA(thread_t act, vmm_thread_index_t index, unsigned int xaflags); +extern kern_return_t vmm_deactivate_XA(thread_t act, vmm_thread_index_t index, unsigned int xaflags); +extern unsigned int vmm_get_XA(thread_t act, vmm_thread_index_t index); extern int vmm_get_features(struct savearea *); extern int vmm_get_version(struct savearea *); extern int vmm_init_context(struct savearea *); extern int vmm_dispatch(struct savearea *); -extern int vmm_exit(thread_act_t act, struct savearea *); -extern void vmm_force_exit(thread_act_t act, struct savearea *); +extern int vmm_exit(thread_t act, struct savearea *); +extern void vmm_force_exit(thread_t act, struct savearea *); extern int vmm_stop_vm(struct savearea *save); -extern void vmm_timer_pop(thread_act_t act); -extern void vmm_interrupt(ReturnHandler *rh, thread_act_t act); -extern kern_return_t vmm_map_list(thread_act_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor); -extern kern_return_t vmm_unmap_list(thread_act_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor); +extern void vmm_timer_pop(thread_t act); +extern void vmm_interrupt(ReturnHandler *rh, thread_t act); +extern kern_return_t vmm_map_list(thread_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor); +extern kern_return_t vmm_unmap_list(thread_t act, vmm_thread_index_t index, unsigned int cnt, unsigned int flavor); extern vmm_return_code_t vmm_resume_guest(vmm_thread_index_t index, unsigned long pc, unsigned long vmmCntrl, unsigned long vmmCntrMaskl); extern vmm_return_code_t vmm_exit_to_host(vmm_thread_index_t index); extern unsigned long vmm_get_guest_register(vmm_thread_index_t index, unsigned long reg_index); extern vmm_return_code_t vmm_set_guest_register(vmm_thread_index_t index, unsigned long reg_index, unsigned long reg_value); -extern addr64_t vmm_max_addr(thread_act_t act); +extern addr64_t vmm_max_addr(thread_t act); +extern kern_return_t vmm_set_guest_memory(thread_t act, vmm_thread_index_t index, addr64_t base, addr64_t extent); +extern kern_return_t vmm_purge_local(thread_t act, vmm_thread_index_t index); #endif - diff --git a/osfmk/ppc/vmachmon_asm.s b/osfmk/ppc/vmachmon_asm.s index 6d445850f..2d5cdb785 100644 --- a/osfmk/ppc/vmachmon_asm.s +++ b/osfmk/ppc/vmachmon_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -103,7 +103,9 @@ LEXT(vmm_dispatch_table) .long 1 ; Valid in Fam .long EXT(vmm_fam_reserved) ; Set guest register from Fam .long 1 ; Valid in Fam - .long EXT(vmm_set_XA) ; Set extended architecture features for a VM + .long EXT(vmm_activate_XA) ; Activate extended architecture features for a VM + .long 0 ; Not valid in Fam + .long EXT(vmm_deactivate_XA) ; Deactivate extended architecture features for a VM .long 0 ; Not valid in Fam .long EXT(vmm_get_XA) ; Get extended architecture features from a VM .long 1 ; Valid in Fam @@ -127,8 +129,12 @@ LEXT(vmm_dispatch_table) .long 1 ; Valid in Fam .long EXT(vmm_max_addr) ; Returns the maximum virtual address .long 1 ; Valid in Fam - - +#if 0 + .long EXT(vmm_set_guest_memory) ; Set guest memory extent + .long 0 ; Not valid in FAM + .long EXT(vmm_purge_local) ; Purge all local guest mappings */ + .long 1 ; Valid in FAM +#endif .set vmm_count,(.-EXT(vmm_dispatch_table))/8 ; Get the top number @@ -148,7 +154,8 @@ LEXT(vmm_dispatch) rlwinm r11,r11,3,0,28 ; Index into table bge- cr1,vmmBogus ; It is a bogus entry add r12,r10,r11 ; Get the vmm dispatch syscall entry - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block lwz r13,0(r12) ; Get address of routine lwz r12,4(r12) ; Get validity flag lwz r5,spcFlags(r10) ; Get per_proc special flags @@ -178,8 +185,8 @@ vmmRetPt: li r0,0 ; Clear this out b EXT(ppcscret) ; Go back to handler... vmmBogus: - mfsprg r10,0 ; Get the per_proc - mfsprg r3,1 ; Load current activation + mfsprg r3,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r3) ; Get the per_proc block lwz r5,spcFlags(r10) ; Get per_proc special flags rlwinm. r5,r5,0,FamVMmodebit,FamVMmodebit ; Test FamVMmodebit bne vmmexitcall ; Do it to it @@ -349,7 +356,8 @@ LEXT(vmm_execute_vm) .globl EXT(switchIntoVM) LEXT(switchIntoVM) - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block rlwinm r31,r4,24,24,31 ; Get the address space rlwinm r4,r4,0,24,31 ; Isolate the context id lwz r28,vmmControl(r3) ; Pick up the control table address @@ -518,7 +526,8 @@ swvmNoMap: lwz r20,vmmContextKern(r27) ; Get the comm area lwz r17,vmmContextKern(r27) ; Get the comm area back la r25,vmmFacCtx(r27) ; Point to the facility context lwz r15,vmmCntrl(r17) ; Get the control flags again - mfsprg r29,0 ; Get the per_proc + mfsprg r29,1 ; Get the current activation + lwz r29,ACT_PER_PROC(r29) ; Get the per_proc block ; ; Check if there is new floating point context to load @@ -536,11 +545,12 @@ swvmNoMap: lwz r20,vmmContextKern(r27) ; Get the comm area eieio ; Make sure this stays in order - lis r18,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r19,r19,ppSize ; Find offset to the owner per_proc - ori r18,r18,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r18,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r19,r19,ppeSize ; Find offset to the owner per_proc_entry + ori r18,r18,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r16,FPUowner ; Displacement to float owner - add r19,r18,r19 ; Point to the owner per_proc + add r19,r18,r19 ; Point to the owner per_proc_entry + lwz r19,ppe_vaddr(r19) ; Point to the owner per_proc swvminvfpu: lwarx r18,r16,r19 ; Get the owner @@ -580,7 +590,8 @@ swvmGotFloat: stw r15,vmmCntrl(r17) ; Save the control flags sans vmmFloatLoad rlwinm r11,r11,0,floatCngbit+1,floatCngbit-1 ; Clear the changed bit here lwz r14,vmmStat(r17) ; Get the status flags - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block stw r11,ACT_MACT_SPF(r26) ; Get the special flags rlwinm r14,r14,0,vmmFloatCngdb+1,vmmFloatCngdb-1 ; Clear the changed flag stw r11,spcFlags(r10) ; Set per_proc copy of the special flags @@ -602,11 +613,12 @@ swvmNoNewFloats: eieio ; Make sure this stays in order - lis r18,hi16(EXT(per_proc_info)) ; Set base per_proc - mulli r19,r19,ppSize ; Find offset to the owner per_proc - ori r18,r18,lo16(EXT(per_proc_info)) ; Set base per_proc + lis r18,hi16(EXT(PerProcTable)) ; Set base PerProcTable + mulli r19,r19,ppeSize ; Find offset to the owner per_proc_entry + ori r18,r18,lo16(EXT(PerProcTable)) ; Set base PerProcTable li r16,VMXowner ; Displacement to vector owner - add r19,r18,r19 ; Point to the owner per_proc + add r19,r18,r19 ; Point to the owner per_proc_entry + lwz r19,ppe_vaddr(r19) ; Point to the owner per_proc swvminvvec: lwarx r18,r16,r19 ; Get the owner @@ -649,7 +661,8 @@ swvmGotVect: rlwinm r11,r11,0,vectorCngbit+1,vectorCngbit-1 ; Clear the changed bit here stw r8,savevrvalid(r21) ; Set the current VRSave as valid saved lwz r14,vmmStat(r17) ; Get the status flags - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block stw r11,ACT_MACT_SPF(r26) ; Get the special flags rlwinm r14,r14,0,vmmVectCngdb+1,vmmVectCngdb-1 ; Clear the changed flag stw r11,spcFlags(r10) ; Set per_proc copy of the special flags @@ -657,7 +670,7 @@ swvmGotVect: swvmNoNewVects: li r3,1 ; Show normal exit with check for AST - lwz r16,ACT_THREAD(r26) ; Restore the thread pointer + mr r16,r26 ; Restore the thread pointer b EXT(ppcscret) ; Go back to handler... .align 5 @@ -762,7 +775,8 @@ vmmexitcall: stw r0,vmmCEntry(r16) ; Clear pointer to active context stw r19,vmmFlags(r2) ; Set the status flags rlwinm r11,r11,0,userProtKeybit+1,userProtKeybit-1 ; Set back to normal protection key - mfsprg r10,0 ; Get the per_proc block + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block rlwinm r11,r11,0,FamVMenabit+1,FamVMenabit-1 ; Clear FamVMEnable lwz r18,spcFlags(r10) ; Get per_proc copy of the special flags lwz r5,vmmContextKern(r2) ; Get the state page kernel addr @@ -828,7 +842,8 @@ LEXT(vmm_force_exit) beq- vfeNotRun ; We were not in a vm.... rlwinm r9,r9,0,userProtKeybit+1,userProtKeybit-1 ; Set back to normal protection key stw r0,vmmCEntry(r26) ; Clear pointer to active context - mfsprg r10,0 ; Get the per_proc block + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block lwz r18,spcFlags(r10) ; Get per_proc copy of the special flags rlwinm r9,r9,0,FamVMenabit+1,FamVMenabit-1 ; Clear Fam Enable rlwinm r9,r9,0,FamVMmodebit+1,FamVMmodebit-1 ; Clear Fam Enable @@ -1640,7 +1655,8 @@ sw64SC: ld r10,vmmppcXr6(r5) ; Get the fourth paramter ; vmmFamGuestResume: - mfsprg r10,0 ; Get the per_proc + mfsprg r10,1 ; Get the current activation + lwz r10,ACT_PER_PROC(r10) ; Get the per_proc block lwz r27,vmmCEntry(r3) ; Get the context that is active lwz r4,VMMXAFlgs(r10) ; Get the eXtended Architecture flags rlwinm. r4,r4,0,0,0 ; Are we doing a 64-bit virtual machine? @@ -1738,7 +1754,7 @@ fgrXNoMap: std r7,saver7(r30) ; Set savearea r7 fgrret: li r3,1 ; Show normal exit with check for AST - lwz r16,ACT_THREAD(r26) ; Restore the thread pointer + mr r16,r26 ; Restore the thread pointer b EXT(ppcscret) ; Go back to handler... ; @@ -2005,18 +2021,29 @@ fpfXret: std r2,savesrr0(r13) ; Store famhandler in srr0 blr -; -; Ultra Fast Path FAM syscalls -; +/* + * Ultra Fast Path FAM syscalls + * + * The UFT FAMs are those from kvmmResumeGuest to kvmmSetGuestRegister, inclusive. + * We get here directly from the syscall vector, with interrupts and translation off, + * 64-bit mode on if supported, and all registers live except: + * + * r13 = holds caller's cr + * sprg2 = holds caller's r13 + * sprg3 = holds caller's r11 + * cr2 = set on (r3==kvmmSetGuestRegister) + * cr5 = set on (r3==kvmmResumeGuest) + */ .align 5 .globl EXT(vmm_ufp) LEXT(vmm_ufp) mfsprg r3,0 ; Get the per_proc area - mr r11,r13 ; Saved cr in r11 + mr r11,r13 ; Move saved cr to r11 lwz r13,VMMXAFlgs(r3) ; Get the eXtended Architecture flags - rlwinm. r13,r13,0,0,0 ; Are we doing a 64-bit virtual machine? + rlwinm. r13,r13,0,0,0 ; Are we doing a 64-bit virtual machine? + lwz r13,pfAvailable(r3) ; Get feature flags mtcrf 0x02,r13 ; Put pf64Bitb etc in cr6 lwz r13,VMMareaPhys(r3) ; Load fast assist area @@ -2026,12 +2053,11 @@ LEXT(vmm_ufp) ufpVMareaPhys64: sldi r13,r13,12 ; Change ppnum to physical address ufpVMareaPhysret: - bne ufpX + bne ufpX ; go handle a 64-bit virtual machine + bt cr5_eq,ufpResumeGuest ; if kvmmResumeGuest, branch to ResumeGuest - cmpwi cr7,r4,0 ; Compare first arg with 0 - cmpwi cr5,r4,7 ; Compare first arg with 7 - cror cr1_eq,cr7_lt,cr5_gt ; Is it in 0 to 7 range - beq cr1,ufpVMret ; Return if not in the range + cmplwi cr5,r4,7 ; First argument in range? (ie, 0-7) + bgt cr5,ufpVMret ; Return if not in the range slwi r4,r4,2 ; multiply index by 4 la r3,famguestr0(r13) ; Load the base address bt cr2_eq,ufpSetGuestReg ; Set/get selector @@ -2213,12 +2239,10 @@ ufpVMrfi64: mfsprg r11,3 ; Restore R11 rfid -ufpX: +ufpX: ; here if virtual machine is 64-bit bt cr5_eq,ufpXResumeGuest ; if kvmmResumeGuest, branch to ResumeGuest - cmpwi cr7,r4,0 ; Compare first arg with 0 - cmpwi cr5,r4,7 ; Compare first arg with 7 - cror cr1_eq,cr7_lt,cr5_gt ; Is it in 0 to 7 range - beq cr1,ufpXVMret ; Return if not in the range + cmplwi cr5,r4,7 ; Is first arg in range 0-7? + bgt cr5,ufpXVMret ; Return if not in the range slwi r4,r4,3 ; multiply index by 8 la r3,famguestXr0(r13) ; Load the base address bt cr2_eq,ufpXSetGuestReg ; Set/get selector diff --git a/osfmk/profiling/i386/profile-asm.s b/osfmk/profiling/i386/profile-asm.s index 94a9e1329..a47e9fcd7 100644 --- a/osfmk/profiling/i386/profile-asm.s +++ b/osfmk/profiling/i386/profile-asm.s @@ -176,8 +176,6 @@ .file "profile-asm.s" -#include - #include /* @@ -661,7 +659,7 @@ ENDDATA(_profile_do_stats) * except to load this pointer. */ -#if defined (MACH_KERNEL) && NCPUS > 1 +#if defined (MACH_KERNEL) #define ASSEMBLER #include @@ -675,7 +673,7 @@ ENDDATA(_profile_do_stats) #define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx -#else /* not kernel or not multiprocessor */ +#else /* not kernel */ #define Vload Gload; Egaddr(%ebx,_profile_vars) #endif diff --git a/osfmk/profiling/i386/profile-md.c b/osfmk/profiling/i386/profile-md.c index d0a0d3165..5b7d5480c 100644 --- a/osfmk/profiling/i386/profile-md.c +++ b/osfmk/profiling/i386/profile-md.c @@ -765,7 +765,7 @@ _prof_write(struct profile_vars *pv, struct callback *callback_ptr) /* * Update any statistics. For the 386, calculate the hash table loading factor. - * Also figure out how many overflows occured. + * Also figure out how many overflows occurred. */ void diff --git a/osfmk/sys/types.h b/osfmk/sys/types.h index e1e72d576..2f61d0065 100644 --- a/osfmk/sys/types.h +++ b/osfmk/sys/types.h @@ -125,8 +125,9 @@ typedef char * caddr_t; /* address of a (signed) char */ typedef int time_t; /* a signed 32 */ typedef unsigned int daddr_t; /* an unsigned 32 */ +#if 0 /* off_t should be 64-bit ! */ typedef unsigned int off_t; /* another unsigned 32 */ - +#endif typedef unsigned short dev_t; /* another unsigned short */ #define NODEV ((dev_t)-1) /* and a null value for it */ diff --git a/osfmk/sys/version.h b/osfmk/sys/version.h deleted file mode 100644 index fa9a638ba..000000000 --- a/osfmk/sys/version.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1994/09/23 03:13:55 ezf - * change marker to not FREE - * [1994/09/22 21:59:11 ezf] - * - * Revision 1.1.2.2 1993/06/03 00:18:34 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:31:15 jeffc] - * - * Revision 1.1 1992/09/30 02:37:07 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.4 91/05/14 17:40:52 mrt - * Correcting copyright - * - * Revision 2.3 91/02/05 17:57:18 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:58 mrt] - * - * Revision 2.2 90/01/19 14:35:31 rwd - * Set version to 3.0 and set include version to 0 - * [89/12/10 rwd] - * - * Revision 2.1 89/08/03 16:10:14 rwd - * Created. - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ - -/* - * Each kernel has a major and minor version number. Changes in - * the major number in general indicate a change in exported features. - * Changes in minor number usually correspond to internal-only - * changes that the user need not be aware of (in general). These - * values are stored at boot time in the machine_info strucuture and - * can be obtained by user programs with the host_info kernel call. - * This mechanism is intended to be the formal way for Mach programs - * to provide for backward compatibility in future releases. - * - * [ This needs to be reconciled somehow with the major/minor version - * number stuffed into the version string - mja, 5/8/87 ] - * - * Following is an informal history of the numbers: - * - * 25-March-87 Avadis Tevanian, Jr. - * Created version numbering scheme. Started with major 1, - * minor 0. - */ - -#define KERNEL_MAJOR_VERSION 3 -#define KERNEL_MINOR_VERSION 0 - -/* - * Version number of the kernel include files. - * - * This number must be changed whenever an incompatible change is made to one - * or more of our include files which are used by application programs that - * delve into kernel memory. The number should normally be simply incremented - * but may actually be changed in any manner so long as it differs from the - * numbers previously assigned to any other versions with which the current - * version is incompatible. It is used at boot time to determine which - * versions of the system programs to install. - * - * Note that the symbol _INCLUDE_VERSION must be set to this in the symbol - * table. On the VAX for example, this is done in locore.s. - */ - -/* - * Current allocation strategy: bump either branch by 2, until non-MACH is - * excised from the CSD environment. - */ -#define INCLUDE_VERSION 0 diff --git a/osfmk/vm/Makefile b/osfmk/vm/Makefile index 07ae3474e..f5bc46fa4 100644 --- a/osfmk/vm/Makefile +++ b/osfmk/vm/Makefile @@ -10,10 +10,14 @@ include $(MakeInc_def) DATAFILES = EXPORT_ONLY_FILES = \ - vm_map.h \ + pmap.h \ + task_working_set.h \ + vm_fault.h \ vm_kern.h \ - vm_shared_memory_server.h \ - pmap.h + vm_map.h \ + vm_pageout.h \ + vm_protos.h \ + vm_shared_memory_server.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index 2429771d0..fa1cfa94b 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,22 +21,33 @@ */ #include -#include + #include -#include -#include -#include +#include +#include #include +#include #include #include #include +#include +#include + +#include +#include + #include #include -#include -#include -#include #include +#include + +#include +#include +#include +#include +#include +#include /* BSD VM COMPONENT INTERFACES */ int @@ -61,24 +72,36 @@ get_map_nentries( return(map->hdr.nentries); } +mach_vm_offset_t +mach_get_vm_start(vm_map_t map) +{ + return( vm_map_first_entry(map)->vme_start); +} + +mach_vm_offset_t +mach_get_vm_end(vm_map_t map) +{ + return( vm_map_last_entry(map)->vme_end); +} + /* - * + * Legacy routines to get the start and end for a vm_map_t. They + * return them in the vm_offset_t format. So, they should only be + * called on maps that are the same size as the kernel map for + * accurate results. */ vm_offset_t -get_map_start( +get_vm_start( vm_map_t map) { - return(vm_map_first_entry(map)->vme_start); + return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start)); } -/* - * - */ vm_offset_t -get_map_end( +get_vm_end( vm_map_t map) { - return(vm_map_last_entry(map)->vme_end); + return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end)); } /* @@ -88,123 +111,43 @@ get_map_end( /* until component support available */ int vnode_pager_workaround; -typedef int vnode_port_t; - typedef struct vnode_pager { int *pager; /* pager workaround pointer */ unsigned int pager_ikot; /* JMM: fake ip_kotype() */ unsigned int ref_count; /* reference count */ memory_object_control_t control_handle; /* mem object control handle */ - vnode_port_t vnode_handle; /* vnode handle */ + struct vnode *vnode_handle; /* vnode handle */ } *vnode_pager_t; ipc_port_t -trigger_name_to_port( +trigger_name_to_port( /* forward */ mach_port_t); -void -vnode_pager_bootstrap( - void); - -void -vnode_pager_alloc_map( - void); - -memory_object_t -vnode_pager_setup( - vnode_port_t, - memory_object_t); - - -kern_return_t -vnode_pager_init( - memory_object_t, - memory_object_control_t, - vm_size_t); - -kern_return_t -vnode_pager_get_object_size( - memory_object_t, - memory_object_offset_t *); - -kern_return_t -vnode_pager_data_request( - memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); - -kern_return_t -vnode_pager_data_return( - memory_object_t, - memory_object_offset_t, - vm_size_t, - boolean_t, - boolean_t); - -kern_return_t -vnode_pager_data_initialize( - memory_object_t, - memory_object_offset_t, - vm_size_t); - -void -vnode_pager_deallocate( - memory_object_t); - -kern_return_t -vnode_pager_terminate( - memory_object_t); - kern_return_t -vnode_pager_cluster_read( +vnode_pager_cluster_read( /* forward */ vnode_pager_t, vm_object_offset_t, vm_size_t); void -vnode_pager_cluster_write( +vnode_pager_cluster_write( /* forward */ vnode_pager_t, vm_object_offset_t, - vm_size_t); - - -int -vnode_pagein( - vnode_port_t, - upl_t, - vm_offset_t, - vm_object_offset_t, - int, - int, - int *); -int -vnode_pageout( - vnode_port_t, - upl_t, - vm_offset_t, - vm_object_offset_t, - int, - int, - int *); + vm_size_t, + vm_object_offset_t *, + int *, + int); -vm_object_offset_t -vnode_pager_get_filesize( - vnode_port_t); vnode_pager_t -vnode_object_create( - vnode_port_t vp); +vnode_object_create( /* forward */ + struct vnode *); vnode_pager_t -vnode_pager_lookup( +vnode_pager_lookup( /* forward */ memory_object_t); -void -vnode_pager_release_from_cache( - int *cnt); - zone_t vnode_pager_zone; @@ -237,11 +180,12 @@ int pagerdebug=0; */ int macx_triggers( - int hi_water, - int low_water, - int flags, - mach_port_t trigger_name) + struct macx_triggers_args *args) { + int hi_water = args->hi_water; + int low_water = args->low_water; + int flags = args->flags; + mach_port_t trigger_name = args->alert_port; kern_return_t kr; memory_object_default_t default_pager; ipc_port_t trigger_port; @@ -252,6 +196,27 @@ macx_triggers( if(kr != KERN_SUCCESS) { return EINVAL; } + + if ((flags & SWAP_ENCRYPT_ON) && + (flags & SWAP_ENCRYPT_OFF)) { + /* can't have it both ways */ + return EINVAL; + } + + if (flags & SWAP_ENCRYPT_ON) { + /* ENCRYPTED SWAP: tell default_pager to encrypt */ + default_pager_triggers(default_pager, + 0, 0, + SWAP_ENCRYPT_ON, + IP_NULL); + } else if (flags & SWAP_ENCRYPT_OFF) { + /* ENCRYPTED SWAP: tell default_pager not to encrypt */ + default_pager_triggers(default_pager, + 0, 0, + SWAP_ENCRYPT_OFF, + IP_NULL); + } + if (flags & HI_WAT_ALERT) { trigger_port = trigger_name_to_port(trigger_name); if(trigger_port == NULL) { @@ -292,16 +257,20 @@ macx_triggers( ext.timeshare = FALSE; pre.importance = INT32_MAX; - thread_policy_set(current_act(), - THREAD_EXTENDED_POLICY, (thread_policy_t)&ext, - THREAD_EXTENDED_POLICY_COUNT); + thread_policy_set(current_thread(), + THREAD_EXTENDED_POLICY, + (thread_policy_t)&ext, + THREAD_EXTENDED_POLICY_COUNT); - thread_policy_set(current_act(), - THREAD_PRECEDENCE_POLICY, (thread_policy_t)&pre, - THREAD_PRECEDENCE_POLICY_COUNT); + thread_policy_set(current_thread(), + THREAD_PRECEDENCE_POLICY, + (thread_policy_t)&pre, + THREAD_PRECEDENCE_POLICY_COUNT); } - current_thread()->vm_privilege = TRUE; + current_thread()->options |= TH_OPT_VMPRIV; + + return 0; } /* @@ -324,6 +293,126 @@ trigger_name_to_port( return trigger_port; } + +extern int uiomove64(addr64_t, int, void *); +#define MAX_RUN 32 + +int +memory_object_control_uiomove( + memory_object_control_t control, + memory_object_offset_t offset, + void * uio, + int start_offset, + int io_requested, + int mark_dirty) +{ + vm_object_t object; + vm_page_t dst_page; + int xsize; + int retval = 0; + int cur_run; + int cur_needed; + int i; + vm_page_t page_run[MAX_RUN]; + + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) { + return (0); + } + assert(!object->internal); + + vm_object_lock(object); + + if (mark_dirty && object->copy != VM_OBJECT_NULL) { + /* + * We can't modify the pages without honoring + * copy-on-write obligations first, so fall off + * this optimized path and fall back to the regular + * path. + */ + vm_object_unlock(object); + return 0; + } + + while (io_requested && retval == 0) { + + cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; + + if (cur_needed > MAX_RUN) + cur_needed = MAX_RUN; + + for (cur_run = 0; cur_run < cur_needed; ) { + + if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) + break; + /* + * Sync up on getting the busy bit + */ + if ((dst_page->busy || dst_page->cleaning)) { + /* + * someone else is playing with the page... if we've + * already collected pages into this run, go ahead + * and process now, we can't block on this + * page while holding other pages in the BUSY state + * otherwise we will wait + */ + if (cur_run) + break; + PAGE_SLEEP(object, dst_page, THREAD_UNINT); + continue; + } + /* + * this routine is only called when copying + * to/from real files... no need to consider + * encrypted swap pages + */ + assert(!dst_page->encrypted); + + if (mark_dirty) + dst_page->dirty = TRUE; + dst_page->busy = TRUE; + + page_run[cur_run++] = dst_page; + + offset += PAGE_SIZE_64; + } + if (cur_run == 0) + /* + * we hit a 'hole' in the cache + * we bail at this point + * we'll unlock the object below + */ + break; + vm_object_unlock(object); + + for (i = 0; i < cur_run; i++) { + + dst_page = page_run[i]; + + if ((xsize = PAGE_SIZE - start_offset) > io_requested) + xsize = io_requested; + + if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) + break; + + io_requested -= xsize; + start_offset = 0; + } + vm_object_lock(object); + + for (i = 0; i < cur_run; i++) { + dst_page = page_run[i]; + + PAGE_WAKEUP_DONE(dst_page); + } + } + vm_object_unlock(object); + + return (retval); +} + + /* * */ @@ -343,8 +432,8 @@ vnode_pager_bootstrap(void) */ memory_object_t vnode_pager_setup( - vnode_port_t vp, - memory_object_t pager) + struct vnode *vp, + __unused memory_object_t pager) { vnode_pager_t vnode_object; @@ -360,14 +449,17 @@ vnode_pager_setup( kern_return_t vnode_pager_init(memory_object_t mem_obj, memory_object_control_t control, - vm_size_t pg_size) +#if !DEBUG + __unused +#endif + vm_size_t pg_size) { vnode_pager_t vnode_object; kern_return_t kr; memory_object_attr_info_data_t attributes; - PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %x, %x, %x\n", pager, pager_request, pg_size)); + PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size)); if (control == MEMORY_OBJECT_CONTROL_NULL) return KERN_INVALID_ARGUMENT; @@ -375,6 +467,7 @@ vnode_pager_init(memory_object_t mem_obj, vnode_object = vnode_pager_lookup(mem_obj); memory_object_control_reference(control); + vnode_object->control_handle = control; attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; @@ -402,33 +495,37 @@ vnode_pager_data_return( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t data_cnt, - boolean_t dirty, - boolean_t kernel_copy) + memory_object_offset_t *resid_offset, + int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + int upl_flags) { register vnode_pager_t vnode_object; vnode_object = vnode_pager_lookup(mem_obj); - vnode_pager_cluster_write(vnode_object, offset, data_cnt); + vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags); return KERN_SUCCESS; } kern_return_t vnode_pager_data_initialize( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t data_cnt) + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t data_cnt) { + panic("vnode_pager_data_initialize"); return KERN_FAILURE; } kern_return_t vnode_pager_data_unlock( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t size, - vm_prot_t desired_access) + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t size, + __unused vm_prot_t desired_access) { return KERN_FAILURE; } @@ -454,7 +551,10 @@ vnode_pager_data_request( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t length, - vm_prot_t protection_required) +#if !DEBUG + __unused +#endif +vm_prot_t protection_required) { register vnode_pager_t vnode_object; @@ -464,9 +564,7 @@ vnode_pager_data_request( PAGER_DEBUG(PAGER_PAGEIN, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj, offset, length, protection_required, vnode_object)); - vnode_pager_cluster_read(vnode_object, offset, length); - - return KERN_SUCCESS; + return vnode_pager_cluster_read(vnode_object, offset, length); } /* @@ -498,10 +596,10 @@ vnode_pager_deallocate( vnode_object = vnode_pager_lookup(mem_obj); if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) { - if (vnode_object->vnode_handle != (vnode_port_t) NULL) { + if (vnode_object->vnode_handle != NULL) { vnode_pager_vrele(vnode_object->vnode_handle); } - zfree(vnode_pager_zone, (vm_offset_t) vnode_object); + zfree(vnode_pager_zone, vnode_object); } return; } @@ -511,6 +609,9 @@ vnode_pager_deallocate( */ kern_return_t vnode_pager_terminate( +#if !DEBUG + __unused +#endif memory_object_t mem_obj) { PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %x\n", mem_obj)); @@ -526,7 +627,7 @@ vnode_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t length, - vm_sync_t sync_flags) + __unused vm_sync_t sync_flags) { register vnode_pager_t vnode_object; @@ -564,66 +665,115 @@ void vnode_pager_cluster_write( vnode_pager_t vnode_object, vm_object_offset_t offset, - vm_size_t cnt) + vm_size_t cnt, + vm_object_offset_t * resid_offset, + int * io_error, + int upl_flags) { - int error = 0; - int local_error = 0; - int kret; - int size; + vm_size_t size; + upl_t upl = NULL; + int request_flags; + int errno; - if (cnt & PAGE_MASK) { - panic("vs_cluster_write: cnt not a multiple of PAGE_SIZE"); - } - size = (cnt < (PAGE_SIZE*32)) ? cnt : (PAGE_SIZE*32); /* effective min */ - - while (cnt) { + if (upl_flags & UPL_MSYNC) { - kret = vnode_pageout(vnode_object->vnode_handle, - (upl_t )NULL, (vm_offset_t)NULL, - offset, size, 0, &local_error); -/* - if(kret == PAGER_ABSENT) { - Need to work out the defs here, 1 corresponds to - PAGER_ABSENT defined in bsd/vm/vm_pager.h However, - we should not be including that file here it is a - layering violation. -*/ - if(kret == 1) { - int uplflags; - upl_t upl = NULL; - int count = 0; - kern_return_t kr; - - uplflags = (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | - UPL_SET_INTERNAL | UPL_COPYOUT_FROM); - count = 0; - kr = memory_object_upl_request( - vnode_object->control_handle, - offset, size, &upl, NULL, &count, uplflags); - if(kr != KERN_SUCCESS) { - panic("vnode_pager_cluster_write: upl request failed\n"); - } - upl_abort(upl, 0); - upl_deallocate(upl); + upl_flags |= UPL_VNODE_PAGER; + + if ( (upl_flags & UPL_IOSYNC) && io_error) + upl_flags |= UPL_KEEPCACHED; + + while (cnt) { + kern_return_t kr; + + size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */ - error = 0; - local_error = 0; + request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE | + UPL_SET_INTERNAL | UPL_SET_LITE; + + kr = memory_object_upl_request(vnode_object->control_handle, + offset, size, &upl, NULL, NULL, request_flags); + if (kr != KERN_SUCCESS) + panic("vnode_pager_cluster_write: upl request failed\n"); + + vnode_pageout(vnode_object->vnode_handle, + upl, (vm_offset_t)0, offset, size, upl_flags, &errno); + + if ( (upl_flags & UPL_KEEPCACHED) ) { + if ( (*io_error = errno) ) + break; + } + cnt -= size; + offset += size; + } + if (resid_offset) + *resid_offset = offset; + + } else { + vm_object_offset_t vnode_size; + vm_object_offset_t base_offset; + vm_object_t object; + vm_page_t target_page; + int ticket; + + /* + * this is the pageout path + */ + vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle); + + if (vnode_size > (offset + PAGE_SIZE)) { + /* + * preset the maximum size of the cluster + * and put us on a nice cluster boundary... + * and then clip the size to insure we + * don't request past the end of the underlying file + */ + size = PAGE_SIZE * MAX_UPL_TRANSFER; + base_offset = offset & ~((signed)(size - 1)); + + if ((base_offset + size) > vnode_size) + size = round_page_32(((vm_size_t)(vnode_size - base_offset))); + } else { + /* + * we've been requested to page out a page beyond the current + * end of the 'file'... don't try to cluster in this case... + * we still need to send this page through because it might + * be marked precious and the underlying filesystem may need + * to do something with it (besides page it out)... + */ + base_offset = offset; + size = PAGE_SIZE; } + object = memory_object_control_to_vm_object(vnode_object->control_handle); + + if (object == VM_OBJECT_NULL) + panic("vnode_pager_cluster_write: NULL vm_object in control handle\n"); + + request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE | + UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | + UPL_SET_INTERNAL | UPL_SET_LITE; + + vm_object_lock(object); + + if ((target_page = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + /* + * only pick up pages whose ticket number matches + * the ticket number of the page orginally targeted + * for pageout + */ + ticket = target_page->page_ticket; - if (local_error != 0) { - error = local_error; - local_error = 0; + request_flags |= ((ticket << UPL_PAGE_TICKET_SHIFT) & UPL_PAGE_TICKET_MASK); } - cnt -= size; - offset += size; - size = (cnt < (PAGE_SIZE*32)) ? cnt : (PAGE_SIZE*32); /* effective min */ + vm_object_unlock(object); + + vm_object_upl_request(object, base_offset, size, + &upl, NULL, NULL, request_flags); + if (upl == NULL) + panic("vnode_pager_cluster_write: upl request failed\n"); + + vnode_pageout(vnode_object->vnode_handle, + upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL); } -#if 0 - if (error != 0) - return(KERN_FAILURE); - - return(KERN_SUCCESS); -#endif /* 0 */ } @@ -636,46 +786,54 @@ vnode_pager_cluster_read( vm_object_offset_t offset, vm_size_t cnt) { - int error = 0; int local_error = 0; int kret; - if(cnt & PAGE_MASK) { - panic("vs_cluster_read: cnt not a multiple of PAGE_SIZE"); - } + assert(! (cnt & PAGE_MASK)); - kret = vnode_pagein(vnode_object->vnode_handle, (upl_t)NULL, (vm_offset_t)NULL, offset, cnt, 0, &local_error); + kret = vnode_pagein(vnode_object->vnode_handle, + (upl_t) NULL, + (vm_offset_t) NULL, + offset, + cnt, + 0, + &local_error); /* if(kret == PAGER_ABSENT) { Need to work out the defs here, 1 corresponds to PAGER_ABSENT defined in bsd/vm/vm_pager.h However, we should not be including that file here it is a layering violation. */ - if(kret == 1) { - int uplflags; - upl_t upl = NULL; - int count = 0; - kern_return_t kr; - - uplflags = (UPL_NO_SYNC | - UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL); - count = 0; - kr = memory_object_upl_request( - vnode_object->control_handle, offset, cnt, - &upl, NULL, &count, uplflags); - if(kr != KERN_SUCCESS) { - panic("vnode_pager_cluster_read: upl request failed\n"); - } + if (kret == 1) { + int uplflags; + upl_t upl = NULL; + int count = 0; + kern_return_t kr; + + uplflags = (UPL_NO_SYNC | + UPL_CLEAN_IN_PLACE | + UPL_SET_INTERNAL); + count = 0; + kr = memory_object_upl_request(vnode_object->control_handle, + offset, cnt, + &upl, NULL, &count, uplflags); + if (kr == KERN_SUCCESS) { upl_abort(upl, 0); upl_deallocate(upl); + } else { + /* + * We couldn't gather the page list, probably + * because the memory object doesn't have a link + * to a VM object anymore (forced unmount, for + * example). Just return an error to the vm_fault() + * path and let it handle it. + */ + } - error = 1; + return KERN_FAILURE; } - if (error != 0) - return(KERN_FAILURE); - - return(KERN_SUCCESS); + return KERN_SUCCESS; } @@ -696,7 +854,7 @@ vnode_pager_release_from_cache( */ vnode_pager_t vnode_object_create( - vnode_port_t vp) + struct vnode *vp) { register vnode_pager_t vnode_object; diff --git a/osfmk/vm/cpm.h b/osfmk/vm/cpm.h index b84f9ffbd..8bbd672bf 100644 --- a/osfmk/vm/cpm.h +++ b/osfmk/vm/cpm.h @@ -38,6 +38,8 @@ #include #include +#include +#include /* * Return a linked list of physically contiguous * wired pages. Caller is responsible for disposal diff --git a/osfmk/vm/device_vm.c b/osfmk/vm/device_vm.c index 30e7edc20..464220034 100644 --- a/osfmk/vm/device_vm.c +++ b/osfmk/vm/device_vm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,20 +21,28 @@ */ #include -#include + #include -#include -#include -#include #include +#include #include #include #include +#include +#include +#include +#include +#include #include #include -#include #include +#include #include +#include +#include +#include +#include + /* Device VM COMPONENT INTERFACES */ @@ -65,82 +73,13 @@ typedef struct device_pager { -void -device_pager_bootstrap( - void); - - -memory_object_t -device_pager_setup( - memory_object_t, - int, - vm_size_t, - int); device_pager_t -device_pager_lookup( - memory_object_t); - -kern_return_t -device_pager_init( - memory_object_t, - memory_object_control_t, - vm_size_t); - - -kern_return_t -device_pager_data_request( - memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); - -kern_return_t -device_pager_data_return( - memory_object_t, - memory_object_offset_t, - vm_size_t, - boolean_t, - boolean_t); - -void -device_pager_reference( - memory_object_t); - -void -device_pager_deallocate( - memory_object_t); - -kern_return_t -device_pager_data_initialize( - memory_object_t, - memory_object_offset_t, - vm_size_t); - -kern_return_t -device_pager_data_unlock( - memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); - -kern_return_t -device_pager_terminate( - memory_object_t); - -kern_return_t -device_pager_synchronize( - memory_object_t, - memory_object_offset_t, - vm_offset_t, - vm_sync_t); - -kern_return_t -device_pager_unmap( +device_pager_lookup( /* forward */ memory_object_t); device_pager_t -device_object_create(void); +device_object_create(void); /* forward */ zone_t device_pager_zone; @@ -174,7 +113,7 @@ device_pager_bootstrap(void) */ memory_object_t device_pager_setup( - memory_object_t device, + __unused memory_object_t device, int device_handle, vm_size_t size, int flags) @@ -206,7 +145,6 @@ device_pager_populate_object( vm_object_t vm_object; kern_return_t kr; upl_t upl; - ipc_port_t previous; device_object = device_pager_lookup(device); if(device_object == DEVICE_PAGER_NULL) @@ -231,7 +169,7 @@ device_pager_populate_object( if(kr != KERN_SUCCESS) panic("device_pager_populate_object: list_req failed"); - upl_commit(upl, NULL); + upl_commit(upl, NULL, 0); upl_deallocate(upl); } @@ -257,9 +195,10 @@ device_pager_lookup( * */ kern_return_t -device_pager_init(memory_object_t mem_obj, - memory_object_control_t control, - vm_size_t pg_size) +device_pager_init( + memory_object_t mem_obj, + memory_object_control_t control, + __unused vm_size_t pg_size) { device_pager_t device_object; kern_return_t kr; @@ -312,13 +251,15 @@ device_pager_init(memory_object_t mem_obj, /* * */ +/*ARGSUSED6*/ kern_return_t device_pager_data_return( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t data_cnt, - boolean_t dirty, - boolean_t kernel_copy) + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t data_cnt, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags) { device_pager_t device_object; @@ -326,8 +267,10 @@ device_pager_data_return( if (device_object == DEVICE_PAGER_NULL) panic("device_pager_data_return: lookup failed"); - return device_data_action(device_object->device_handle, device_object, - VM_PROT_READ | VM_PROT_WRITE, offset, data_cnt); + return device_data_action(device_object->device_handle, + (ipc_port_t) device_object, + VM_PROT_READ | VM_PROT_WRITE, + offset, data_cnt); } /* @@ -338,7 +281,7 @@ device_pager_data_request( memory_object_t mem_obj, memory_object_offset_t offset, vm_size_t length, - vm_prot_t protection_required) + __unused vm_prot_t protection_required) { device_pager_t device_object; @@ -347,8 +290,9 @@ device_pager_data_request( if (device_object == DEVICE_PAGER_NULL) panic("device_pager_data_request: lookup failed"); - device_data_action(device_object->device_handle, device_object, - VM_PROT_READ, offset, length); + device_data_action(device_object->device_handle, + (ipc_port_t) device_object, + VM_PROT_READ, offset, length); return KERN_SUCCESS; } @@ -374,40 +318,58 @@ void device_pager_deallocate( memory_object_t mem_obj) { - device_pager_t device_object; + device_pager_t device_object; + memory_object_control_t device_control; device_object = device_pager_lookup(mem_obj); if (hw_atomic_sub(&device_object->ref_count, 1) == 0) { if (device_object->device_handle != (device_port_t) NULL) { device_close(device_object->device_handle); + device_object->device_handle = (device_port_t) NULL; } - zfree(device_pager_zone, (vm_offset_t) device_object); + device_control = device_object->control_handle; + if (device_control != MEMORY_OBJECT_CONTROL_NULL) { + /* + * The VM object should already have been disconnected + * from the pager at this point. + * We still have to release the "memory object control" + * handle. + */ + assert(device_control->object == VM_OBJECT_NULL); + memory_object_control_deallocate(device_control); + device_object->control_handle = + MEMORY_OBJECT_CONTROL_NULL; + } + + zfree(device_pager_zone, device_object); } return; } kern_return_t device_pager_data_initialize( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t data_cnt) + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t data_cnt) { + panic("device_pager_data_initialize"); return KERN_FAILURE; } kern_return_t device_pager_data_unlock( - memory_object_t mem_obj, - memory_object_offset_t offset, - vm_size_t size, - vm_prot_t desired_access) + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused vm_size_t size, + __unused vm_prot_t desired_access) { return KERN_FAILURE; } +kern_return_t device_pager_terminate( - memory_object_t mem_obj) + __unused memory_object_t mem_obj) { return KERN_SUCCESS; } @@ -422,7 +384,7 @@ device_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, vm_offset_t length, - vm_sync_t sync_flags) + __unused vm_sync_t sync_flags) { device_pager_t device_object; @@ -439,7 +401,7 @@ device_pager_synchronize( */ kern_return_t device_pager_unmap( - memory_object_t mem_obj) + __unused memory_object_t mem_obj) { return KERN_SUCCESS; } diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index 2bc1d624f..286fcf691 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -99,17 +99,13 @@ #include #endif /* MACH_PAGEMAP */ +#include + + memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; vm_size_t memory_manager_default_cluster = 0; decl_mutex_data(, memory_manager_default_lock) -/* - * Forward ref to file-local function: - */ -boolean_t -vm_object_update(vm_object_t, vm_object_offset_t, - vm_size_t, memory_object_return_t, int, vm_prot_t); - /* * Routine: memory_object_should_return_page @@ -299,7 +295,7 @@ memory_object_lock_page( vm_page_unlock_queues(); if (!should_flush) - pmap_page_protect(m->phys_page, VM_PROT_NONE); + pmap_disconnect(m->phys_page); if (m->dirty) return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN); @@ -314,8 +310,6 @@ memory_object_lock_page( if (should_flush) { VM_PAGE_FREE(m); } else { - extern boolean_t vm_page_deactivate_hint; - /* * XXX Make clean but not flush a paging hint, * and deactivate the pages. This is a hack @@ -335,19 +329,26 @@ memory_object_lock_page( return(MEMORY_OBJECT_LOCK_RESULT_DONE); } -#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po) \ +#define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync) \ MACRO_BEGIN \ \ - register int i; \ - register vm_page_t hp; \ - \ + register int upl_flags; \ + \ vm_object_unlock(object); \ \ + if (iosync) \ + upl_flags = UPL_MSYNC | UPL_IOSYNC; \ + else \ + upl_flags = UPL_MSYNC; \ + \ (void) memory_object_data_return(object->pager, \ po, \ data_cnt, \ + ro, \ + ioerr, \ (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN), \ - !should_flush); \ + !should_flush, \ + upl_flags); \ \ vm_object_lock(object); \ MACRO_END @@ -382,13 +383,16 @@ memory_object_lock_request( memory_object_control_t control, memory_object_offset_t offset, memory_object_size_t size, + memory_object_offset_t * resid_offset, + int * io_errno, memory_object_return_t should_return, int flags, vm_prot_t prot) { vm_object_t object; - vm_object_offset_t original_offset = offset; - boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH; + __unused boolean_t should_flush; + + should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; XPR(XPR_MEMORY_OBJECT, "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n", @@ -416,7 +420,7 @@ memory_object_lock_request( offset -= object->paging_offset; (void)vm_object_update(object, - offset, size, should_return, flags, prot); + offset, size, resid_offset, io_errno, should_return, flags, prot); vm_object_paging_end(object); vm_object_unlock(object); @@ -506,11 +510,13 @@ boolean_t vm_object_sync( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, + vm_object_size_t size, boolean_t should_flush, - boolean_t should_return) + boolean_t should_return, + boolean_t should_iosync) { boolean_t rv; + int flags; XPR(XPR_VM_OBJECT, "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n", @@ -524,12 +530,19 @@ vm_object_sync( vm_object_lock(object); vm_object_paging_begin(object); - rv = vm_object_update(object, offset, size, + if (should_flush) + flags = MEMORY_OBJECT_DATA_FLUSH; + else + flags = 0; + + if (should_iosync) + flags |= MEMORY_OBJECT_IO_SYNC; + + rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL, (should_return) ? MEMORY_OBJECT_RETURN_ALL : MEMORY_OBJECT_RETURN_NONE, - (should_flush) ? - MEMORY_OBJECT_DATA_FLUSH : 0, + flags, VM_PROT_NO_CHANGE); @@ -538,6 +551,150 @@ vm_object_sync( return rv; } + + + +static int +vm_object_update_extent( + vm_object_t object, + vm_object_offset_t offset, + vm_object_offset_t offset_end, + vm_object_offset_t *offset_resid, + int *io_errno, + boolean_t should_flush, + memory_object_return_t should_return, + boolean_t should_iosync, + vm_prot_t prot) +{ + vm_page_t m; + int retval = 0; + vm_size_t data_cnt = 0; + vm_object_offset_t paging_offset = 0; + vm_object_offset_t last_offset = offset; + memory_object_lock_result_t page_lock_result; + memory_object_lock_result_t pageout_action; + + pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE; + + for (; + offset < offset_end && object->resident_page_count; + offset += PAGE_SIZE_64) { + + /* + * Limit the number of pages to be cleaned at once. + */ + if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) { + LIST_REQ_PAGEOUT_PAGES(object, data_cnt, + pageout_action, paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + } + + while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot); + + XPR(XPR_MEMORY_OBJECT, + "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n", + (integer_t)object, offset, page_lock_result, 0, 0); + + switch (page_lock_result) + { + case MEMORY_OBJECT_LOCK_RESULT_DONE: + /* + * End of a cluster of dirty pages. + */ + if (data_cnt) { + LIST_REQ_PAGEOUT_PAGES(object, + data_cnt, pageout_action, + paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + continue; + } + break; + + case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK: + /* + * Since it is necessary to block, + * clean any dirty pages now. + */ + if (data_cnt) { + LIST_REQ_PAGEOUT_PAGES(object, + data_cnt, pageout_action, + paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + continue; + } + PAGE_SLEEP(object, m, THREAD_UNINT); + continue; + + case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN: + case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN: + /* + * The clean and return cases are similar. + * + * if this would form a discontiguous block, + * clean the old pages and start anew. + * + * Mark the page busy since we will unlock the + * object if we issue the LIST_REQ_PAGEOUT + */ + m->busy = TRUE; + if (data_cnt && + ((last_offset != offset) || (pageout_action != page_lock_result))) { + LIST_REQ_PAGEOUT_PAGES(object, + data_cnt, pageout_action, + paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + } + m->busy = FALSE; + + if (m->cleaning) { + PAGE_SLEEP(object, m, THREAD_UNINT); + continue; + } + if (data_cnt == 0) { + pageout_action = page_lock_result; + paging_offset = offset; + } + data_cnt += PAGE_SIZE; + last_offset = offset + PAGE_SIZE_64; + + vm_page_lock_queues(); + /* + * Clean + */ + m->list_req_pending = TRUE; + m->cleaning = TRUE; + + if (should_flush) { + /* + * and add additional state + * for the flush + */ + m->busy = TRUE; + m->pageout = TRUE; + vm_page_wire(m); + } + vm_page_unlock_queues(); + + retval = 1; + break; + } + break; + } + } + /* + * We have completed the scan for applicable pages. + * Clean any pages that have been saved. + */ + if (data_cnt) { + LIST_REQ_PAGEOUT_PAGES(object, + data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync); + } + return (retval); +} + + + /* * Routine: vm_object_update * Description: @@ -549,24 +706,28 @@ kern_return_t vm_object_update( register vm_object_t object, register vm_object_offset_t offset, - register vm_size_t size, + register vm_object_size_t size, + register vm_object_offset_t *resid_offset, + int *io_errno, memory_object_return_t should_return, int flags, - vm_prot_t prot) + vm_prot_t protection) { - register vm_page_t m; - vm_page_t holding_page; - vm_size_t original_size = size; - vm_object_offset_t paging_offset = 0; vm_object_t copy_object; - vm_size_t data_cnt = 0; - vm_object_offset_t last_offset = offset; - memory_object_lock_result_t page_lock_result; - memory_object_lock_result_t pageout_action; boolean_t data_returned = FALSE; boolean_t update_cow; - boolean_t should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; - boolean_t pending_pageout = FALSE; + boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE; + boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE; + int num_of_extents; + int n; +#define MAX_EXTENTS 8 +#define EXTENT_SIZE (1024 * 1024 * 256) +#define RESIDENT_LIMIT (1024 * 32) + struct extent { + vm_object_offset_t e_base; + vm_object_offset_t e_min; + vm_object_offset_t e_max; + } extents[MAX_EXTENTS]; /* * To avoid blocking while scanning for pages, save @@ -593,9 +754,9 @@ vm_object_update( if((((copy_object = object->copy) != NULL) && update_cow) || (flags & MEMORY_OBJECT_DATA_SYNC)) { - vm_size_t i; - vm_size_t copy_size; - vm_object_offset_t copy_offset; + vm_map_size_t i; + vm_map_size_t copy_size; + vm_map_offset_t copy_offset; vm_prot_t prot; vm_page_t page; vm_page_t top_page; @@ -604,26 +765,28 @@ vm_object_update( if(copy_object != NULL) { /* translate offset with respect to shadow's offset */ copy_offset = (offset >= copy_object->shadow_offset)? - offset - copy_object->shadow_offset : - (vm_object_offset_t) 0; + (vm_map_offset_t)(offset - copy_object->shadow_offset) : + (vm_map_offset_t) 0; if(copy_offset > copy_object->size) copy_offset = copy_object->size; /* clip size with respect to shadow offset */ - copy_size = (offset >= copy_object->shadow_offset) ? - size : size - (copy_object->shadow_offset - offset); - - if(copy_size <= 0) { - copy_size = 0; + if (offset >= copy_object->shadow_offset) { + copy_size = size; + } else if (size >= copy_object->shadow_offset - offset) { + copy_size = size - + (copy_object->shadow_offset - offset); } else { - copy_size = ((copy_offset + copy_size) - <= copy_object->size) ? - copy_size : copy_object->size - copy_offset; + copy_size = 0; + } + + if (copy_offset + copy_size > copy_object->size) { + if (copy_object->size >= copy_offset) { + copy_size = copy_object->size - copy_offset; + } else { + copy_size = 0; + } } - /* check for a copy_offset which is beyond the end of */ - /* the copy_object */ - if(copy_size < 0) - copy_size = 0; copy_size+=copy_offset; @@ -721,144 +884,133 @@ vm_object_update( } BYPASS_COW_COPYIN: - for (; - size != 0; - size -= PAGE_SIZE, offset += PAGE_SIZE_64) - { - /* - * Limit the number of pages to be cleaned at once. - */ - if (pending_pageout && - data_cnt >= PAGE_SIZE * DATA_WRITE_MAX) - { - LIST_REQ_PAGEOUT_PAGES(object, data_cnt, - pageout_action, paging_offset); - data_cnt = 0; - pending_pageout = FALSE; - } - - while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - page_lock_result = memory_object_lock_page(m, should_return, - should_flush, prot); + /* + * when we have a really large range to check relative + * to the number of actual resident pages, we'd like + * to use the resident page list to drive our checks + * however, the object lock will get dropped while processing + * the page which means the resident queue can change which + * means we can't walk the queue as we process the pages + * we also want to do the processing in offset order to allow + * 'runs' of pages to be collected if we're being told to + * flush to disk... the resident page queue is NOT ordered. + * + * a temporary solution (until we figure out how to deal with + * large address spaces more generically) is to pre-flight + * the resident page queue (if it's small enough) and develop + * a collection of extents (that encompass actual resident pages) + * to visit. This will at least allow us to deal with some of the + * more pathological cases in a more efficient manner. The current + * worst case (a single resident page at the end of an extremely large + * range) can take minutes to complete for ranges in the terrabyte + * category... since this routine is called when truncating a file, + * and we currently support files up to 16 Tbytes in size, this + * is not a theoretical problem + */ - XPR(XPR_MEMORY_OBJECT, - "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n", - (integer_t)object, offset, page_lock_result, 0, 0); + if ((object->resident_page_count < RESIDENT_LIMIT) && + (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) { + vm_page_t next; + vm_object_offset_t start; + vm_object_offset_t end; + vm_object_size_t e_mask; + vm_page_t m; - switch (page_lock_result) - { - case MEMORY_OBJECT_LOCK_RESULT_DONE: - /* - * End of a cluster of dirty pages. - */ - if(pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - data_cnt = 0; - pending_pageout = FALSE; - continue; - } - break; + start = offset; + end = offset + size; + num_of_extents = 0; + e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1)); - case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK: - /* - * Since it is necessary to block, - * clean any dirty pages now. - */ - if(pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - pending_pageout = FALSE; - data_cnt = 0; - continue; - } + m = (vm_page_t) queue_first(&object->memq); - PAGE_SLEEP(object, m, THREAD_UNINT); - continue; + while (!queue_end(&object->memq, (queue_entry_t) m)) { + next = (vm_page_t) queue_next(&m->listq); - case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN: - case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN: - /* - * The clean and return cases are similar. - * - */ - - /* - * if this would form a discontiguous block, - * clean the old pages and start anew. - * - */ - - /* - * Mark the page busy since we unlock the - * object below. - */ - m->busy = TRUE; - if (pending_pageout && - (last_offset != offset || - pageout_action != page_lock_result)) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, - paging_offset); - pending_pageout = FALSE; - data_cnt = 0; - } - m->busy = FALSE; - holding_page = VM_PAGE_NULL; - if(m->cleaning) { - PAGE_SLEEP(object, m, THREAD_UNINT); - continue; - } - if(!pending_pageout) { - pending_pageout = TRUE; - pageout_action = page_lock_result; - paging_offset = offset; - } - if (should_flush) { - vm_page_lock_queues(); - m->list_req_pending = TRUE; - m->cleaning = TRUE; - m->busy = TRUE; - m->pageout = TRUE; - vm_page_wire(m); - vm_page_unlock_queues(); - } else { - /* - * Clean but do not flush + if ((m->offset >= start) && (m->offset < end)) { + /* + * this is a page we're interested in + * try to fit it into a current extent */ - vm_page_lock_queues(); - m->list_req_pending = TRUE; - m->cleaning = TRUE; - vm_page_unlock_queues(); - + for (n = 0; n < num_of_extents; n++) { + if ((m->offset & e_mask) == extents[n].e_base) { + /* + * use (PAGE_SIZE - 1) to determine the + * max offset so that we don't wrap if + * we're at the last page of the space + */ + if (m->offset < extents[n].e_min) + extents[n].e_min = m->offset; + else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max) + extents[n].e_max = m->offset + (PAGE_SIZE - 1); + break; + } + } + if (n == num_of_extents) { + /* + * didn't find a current extent that can encompass + * this page + */ + if (n < MAX_EXTENTS) { + /* + * if we still have room, + * create a new extent + */ + extents[n].e_base = m->offset & e_mask; + extents[n].e_min = m->offset; + extents[n].e_max = m->offset + (PAGE_SIZE - 1); + + num_of_extents++; + } else { + /* + * no room to create a new extent... + * fall back to a single extent based + * on the min and max page offsets + * we find in the range we're interested in... + * first, look through the extent list and + * develop the overall min and max for the + * pages we've looked at up to this point + */ + for (n = 1; n < num_of_extents; n++) { + if (extents[n].e_min < extents[0].e_min) + extents[0].e_min = extents[n].e_min; + if (extents[n].e_max > extents[0].e_max) + extents[0].e_max = extents[n].e_max; + } + /* + * now setup to run through the remaining pages + * to determine the overall min and max + * offset for the specified range + */ + extents[0].e_base = 0; + e_mask = 0; + num_of_extents = 1; + + /* + * by continuing, we'll reprocess the + * page that forced us to abandon trying + * to develop multiple extents + */ + continue; + } + } } - vm_object_unlock(object); - - - data_cnt += PAGE_SIZE; - last_offset = offset + PAGE_SIZE_64; - data_returned = TRUE; - - vm_object_lock(object); - break; + m = next; } - break; - } - } + } else { + extents[0].e_min = offset; + extents[0].e_max = offset + (size - 1); - /* - * We have completed the scan for applicable pages. - * Clean any pages that have been saved. - */ - if (pending_pageout) { - LIST_REQ_PAGEOUT_PAGES(object, - data_cnt, pageout_action, paging_offset); + num_of_extents = 1; + } + for (n = 0; n < num_of_extents; n++) { + if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno, + should_flush, should_return, should_iosync, protection)) + data_returned = TRUE; } return (data_returned); } + /* * Routine: memory_object_synchronize_completed [user interface] * @@ -879,6 +1031,8 @@ memory_object_synchronize_completed( vm_object_t object; msync_req_t msr; + object = memory_object_control_to_vm_object(control); + XPR(XPR_MEMORY_OBJECT, "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n", (integer_t)object, offset, length, 0, 0); @@ -887,7 +1041,6 @@ memory_object_synchronize_completed( * Look for bogus arguments */ - object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return (KERN_INVALID_ARGUMENT); @@ -923,7 +1076,7 @@ vm_object_set_attributes_common( boolean_t may_cache, memory_object_copy_strategy_t copy_strategy, boolean_t temporary, - vm_size_t cluster_size, + memory_object_cluster_size_t cluster_size, boolean_t silent_overwrite, boolean_t advisory_pageout) { @@ -1023,7 +1176,7 @@ memory_object_change_attributes( boolean_t temporary; boolean_t may_cache; boolean_t invalidate; - vm_size_t cluster_size; + memory_object_cluster_size_t cluster_size; memory_object_copy_strategy_t copy_strategy; boolean_t silent_overwrite; boolean_t advisory_pageout; @@ -1295,7 +1448,7 @@ kern_return_t memory_object_iopl_request( ipc_port_t port, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -1307,6 +1460,14 @@ memory_object_iopl_request( caller_flags = *flags; + if (caller_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + if (ip_kotype(port) == IKOT_NAMED_ENTRY) { vm_named_entry_t named_entry; @@ -1341,17 +1502,7 @@ memory_object_iopl_request( named_entry_lock(named_entry); - if(named_entry->object) { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - vm_object_reference(named_entry->object); - object = named_entry->object; - named_entry_unlock(named_entry); - } else { + if (named_entry->is_pager) { object = vm_object_enter(named_entry->backing.pager, named_entry->offset + named_entry->size, named_entry->internal, @@ -1361,11 +1512,14 @@ memory_object_iopl_request( named_entry_unlock(named_entry); return(KERN_INVALID_OBJECT); } - vm_object_lock(object); + + /* JMM - drop reference on pager here? */ /* create an extra reference for the named entry */ + vm_object_lock(object); vm_object_reference_locked(object); - named_entry->object = object; + named_entry->backing.object = object; + named_entry->is_pager = FALSE; named_entry_unlock(named_entry); /* wait for object to be ready */ @@ -1376,6 +1530,16 @@ memory_object_iopl_request( vm_object_lock(object); } vm_object_unlock(object); + } else { + /* This is the case where we are going to map */ + /* an already mapped object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + vm_object_reference(object); + named_entry_unlock(named_entry); } } else { memory_object_control_t control; @@ -1426,7 +1590,7 @@ kern_return_t memory_object_upl_request( memory_object_control_t control, memory_object_offset_t offset, - vm_size_t size, + upl_size_t size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -1462,8 +1626,8 @@ kern_return_t memory_object_super_upl_request( memory_object_control_t control, memory_object_offset_t offset, - vm_size_t size, - vm_size_t super_cluster, + upl_size_t size, + upl_size_t super_cluster, upl_t *upl, upl_page_info_t *user_page_list, unsigned int *page_list_count, @@ -1503,7 +1667,7 @@ kern_return_t host_default_memory_manager( host_priv_t host_priv, memory_object_default_t *default_manager, - vm_size_t cluster_size) + memory_object_cluster_size_t cluster_size) { memory_object_default_t current_manager; memory_object_default_t new_manager; @@ -1571,7 +1735,7 @@ host_default_memory_manager( __private_extern__ memory_object_default_t memory_manager_default_reference( - vm_size_t *cluster_size) + memory_object_cluster_size_t *cluster_size) { memory_object_default_t current_manager; @@ -1630,95 +1794,10 @@ __private_extern__ void memory_manager_default_init(void) { memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; - mutex_init(&memory_manager_default_lock, ETAP_VM_MEMMAN); + mutex_init(&memory_manager_default_lock, 0); } -void -memory_object_deactivate_pages( - vm_object_t object, - vm_object_offset_t offset, - vm_object_size_t size, - boolean_t kill_page) -{ - vm_object_t orig_object; - int pages_moved = 0; - int pages_found = 0; - - /* - * entered with object lock held, acquire a paging reference to - * prevent the memory_object and control ports from - * being destroyed. - */ - orig_object = object; - - for (;;) { - register vm_page_t m; - vm_object_offset_t toffset; - vm_object_size_t tsize; - - vm_object_paging_begin(object); - vm_page_lock_queues(); - - for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) { - - if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) { - - pages_found++; - - if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { - - m->reference = FALSE; - pmap_clear_reference(m->phys_page); - - if ((kill_page) && (object->internal)) { - m->precious = FALSE; - m->dirty = FALSE; - pmap_clear_modify(m->phys_page); - vm_external_state_clr(object->existence_map, offset); - } - VM_PAGE_QUEUES_REMOVE(m); - - if(m->zero_fill) { - queue_enter_first( - &vm_page_queue_zf, - m, vm_page_t, pageq); - } else { - queue_enter_first( - &vm_page_queue_inactive, - m, vm_page_t, pageq); - } - - m->inactive = TRUE; - if (!m->fictitious) - vm_page_inactive_count++; - - pages_moved++; - } - } - } - vm_page_unlock_queues(); - vm_object_paging_end(object); - - if (object->shadow) { - vm_object_t tmp_object; - - kill_page = 0; - - offset += object->shadow_offset; - - tmp_object = object->shadow; - vm_object_lock(tmp_object); - - if (object != orig_object) - vm_object_unlock(object); - object = tmp_object; - } else - break; - } - if (object != orig_object) - vm_object_unlock(object); -} /* Allow manipulation of individual page state. This is actually part of */ /* the UPL regimen but takes place on the object rather than on a UPL */ @@ -1779,7 +1858,7 @@ memory_object_page_op( vm_page_lock_queues(); if (dst_page->no_isync == FALSE) - pmap_page_protect(dst_page->phys_page, VM_PROT_NONE); + pmap_disconnect(dst_page->phys_page); vm_page_free(dst_page); vm_page_unlock_queues(); @@ -1798,9 +1877,7 @@ memory_object_page_op( if(dst_page->absent) *flags |= UPL_POP_ABSENT; if(dst_page->busy) *flags |= UPL_POP_BUSY; } - if (phys_entry) - *phys_entry = dst_page->phys_page; - + /* The caller should have made a call either contingent with */ /* or prior to this call to set UPL_POP_BUSY */ if(ops & UPL_POP_SET) { @@ -1830,6 +1907,50 @@ memory_object_page_op( PAGE_WAKEUP(dst_page); } } + + if (dst_page->encrypted) { + /* + * ENCRYPTED SWAP: + * We need to decrypt this encrypted page before the + * caller can access its contents. + * But if the caller really wants to access the page's + * contents, they have to keep the page "busy". + * Otherwise, the page could get recycled or re-encrypted + * at any time. + */ + if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && + dst_page->busy) { + /* + * The page is stable enough to be accessed by + * the caller, so make sure its contents are + * not encrypted. + */ + vm_page_decrypt(dst_page, 0); + } else { + /* + * The page is not busy, so don't bother + * decrypting it, since anything could + * happen to it between now and when the + * caller wants to access it. + * We should not give the caller access + * to this page. + */ + assert(!phys_entry); + } + } + + if (phys_entry) { + /* + * The physical page number will remain valid + * only if the page is kept busy. + * ENCRYPTED SWAP: make sure we don't let the + * caller access an encrypted page. + */ + assert(dst_page->busy); + assert(!dst_page->encrypted); + *phys_entry = dst_page->phys_page; + } + break; } @@ -1875,15 +1996,18 @@ memory_object_range_op( } vm_object_lock(object); - if (object->phys_contiguous) + if (object->phys_contiguous) { + vm_object_unlock(object); return KERN_INVALID_OBJECT; + } offset = offset_beg; while (offset < offset_end) { - if (dst_page = vm_page_lookup(object, offset)) { - if (ops & UPL_ROP_DUMP) { - if (dst_page->busy || dst_page->cleaning) { + dst_page = vm_page_lookup(object, offset); + if (dst_page != VM_PAGE_NULL) { + if (ops & UPL_ROP_DUMP) { + if (dst_page->busy || dst_page->cleaning) { /* * someone else is playing with the * page, we will have to wait @@ -1901,7 +2025,7 @@ memory_object_range_op( vm_page_lock_queues(); if (dst_page->no_isync == FALSE) - pmap_page_protect(dst_page->phys_page, VM_PROT_NONE); + pmap_disconnect(dst_page->phys_page); vm_page_free(dst_page); vm_page_unlock_queues(); @@ -1920,6 +2044,27 @@ memory_object_range_op( return KERN_SUCCESS; } + +kern_return_t +memory_object_pages_resident( + memory_object_control_t control, + boolean_t * has_pages_resident) +{ + vm_object_t object; + + *has_pages_resident = FALSE; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return (KERN_INVALID_ARGUMENT); + + if (object->resident_page_count) + *has_pages_resident = TRUE; + + return (KERN_SUCCESS); +} + + static zone_t mem_obj_control_zone; __private_extern__ void @@ -1966,7 +2111,7 @@ memory_object_control_to_vm_object( memory_object_control_t convert_port_to_mo_control( - mach_port_t port) + __unused mach_port_t port) { return MEMORY_OBJECT_CONTROL_NULL; } @@ -1974,14 +2119,14 @@ convert_port_to_mo_control( mach_port_t convert_mo_control_to_port( - memory_object_control_t control) + __unused memory_object_control_t control) { return MACH_PORT_NULL; } void memory_object_control_reference( - memory_object_control_t control) + __unused memory_object_control_t control) { return; } @@ -1995,7 +2140,7 @@ void memory_object_control_deallocate( memory_object_control_t control) { - zfree(mem_obj_control_zone, (vm_offset_t)control); + zfree(mem_obj_control_zone, control); } void @@ -2022,7 +2167,7 @@ memory_object_default_deallocate( memory_object_t convert_port_to_memory_object( - mach_port_t port) + __unused mach_port_t port) { return (MEMORY_OBJECT_NULL); } @@ -2030,54 +2175,40 @@ convert_port_to_memory_object( mach_port_t convert_memory_object_to_port( - memory_object_t object) + __unused memory_object_t object) { return (MACH_PORT_NULL); } -#ifdef MACH_BSD -/* remove after component interface available */ -extern int vnode_pager_workaround; -extern int device_pager_workaround; -#endif - /* Routine memory_object_reference */ void memory_object_reference( memory_object_t memory_object) { -extern void dp_memory_object_reference(memory_object_t); #ifdef MACH_BSD - extern void vnode_pager_reference(memory_object_t); - extern void device_pager_reference(memory_object_t); - - if(memory_object->pager == &vnode_pager_workaround) { - vnode_pager_reference(memory_object); - } else if(memory_object->pager == &device_pager_workaround) { - device_pager_reference(memory_object); - } else + if (memory_object->pager == &vnode_pager_workaround) { + vnode_pager_reference(memory_object); + } else if (memory_object->pager == &device_pager_workaround) { + device_pager_reference(memory_object); + } else #endif - dp_memory_object_reference(memory_object); + dp_memory_object_reference(memory_object); } /* Routine memory_object_deallocate */ void memory_object_deallocate( memory_object_t memory_object) { -extern void dp_memory_object_deallocate(memory_object_t); #ifdef MACH_BSD - extern void vnode_pager_deallocate(memory_object_t); - extern void device_pager_deallocate(memory_object_t); - - if(memory_object->pager == &vnode_pager_workaround) { - vnode_pager_deallocate(memory_object); - } else if(memory_object->pager == &device_pager_workaround) { - device_pager_deallocate(memory_object); - } else + if (memory_object->pager == &vnode_pager_workaround) { + vnode_pager_deallocate(memory_object); + } else if (memory_object->pager == &device_pager_workaround) { + device_pager_deallocate(memory_object); + } else #endif - dp_memory_object_deallocate(memory_object); + dp_memory_object_deallocate(memory_object); } @@ -2086,33 +2217,23 @@ kern_return_t memory_object_init ( memory_object_t memory_object, memory_object_control_t memory_control, - vm_size_t memory_object_page_size + memory_object_cluster_size_t memory_object_page_size ) { -extern kern_return_t dp_memory_object_init(memory_object_t, - memory_object_control_t, - vm_size_t); #ifdef MACH_BSD -extern kern_return_t vnode_pager_init(memory_object_t, - memory_object_control_t, - vm_size_t); -extern kern_return_t device_pager_init(memory_object_t, - memory_object_control_t, - vm_size_t); - - if(memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_init(memory_object, - memory_control, - memory_object_page_size); - } else if(memory_object->pager == &device_pager_workaround) { - return device_pager_init(memory_object, - memory_control, - memory_object_page_size); - } else + if (memory_object->pager == &vnode_pager_workaround) { + return vnode_pager_init(memory_object, + memory_control, + memory_object_page_size); + } else if (memory_object->pager == &device_pager_workaround) { + return device_pager_init(memory_object, + memory_control, + memory_object_page_size); + } else #endif - return dp_memory_object_init(memory_object, - memory_control, - memory_object_page_size); + return dp_memory_object_init(memory_object, + memory_control, + memory_object_page_size); } /* Routine memory_object_terminate */ @@ -2121,15 +2242,10 @@ kern_return_t memory_object_terminate memory_object_t memory_object ) { -extern kern_return_t dp_memory_object_terminate(memory_object_t); - #ifdef MACH_BSD -extern kern_return_t vnode_pager_terminate(memory_object_t); -extern kern_return_t device_pager_terminate(memory_object_t); - - if(memory_object->pager == &vnode_pager_workaround) { + if (memory_object->pager == &vnode_pager_workaround) { return vnode_pager_terminate(memory_object); - } else if(memory_object->pager == &device_pager_workaround) { + } else if (memory_object->pager == &device_pager_workaround) { return device_pager_terminate(memory_object); } else #endif @@ -2141,35 +2257,27 @@ kern_return_t memory_object_data_request ( memory_object_t memory_object, memory_object_offset_t offset, - vm_size_t length, + memory_object_cluster_size_t length, vm_prot_t desired_access ) { -extern kern_return_t dp_memory_object_data_request(memory_object_t, - memory_object_offset_t, vm_size_t, vm_prot_t); - #ifdef MACH_BSD -extern kern_return_t vnode_pager_data_request(memory_object_t, - memory_object_offset_t, vm_size_t, vm_prot_t); -extern kern_return_t device_pager_data_request(memory_object_t, - memory_object_offset_t, vm_size_t, vm_prot_t); - - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_data_request(memory_object, - offset, - length, - desired_access); - } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_data_request(memory_object, - offset, - length, - desired_access); - } else + if (memory_object->pager == &vnode_pager_workaround) { + return vnode_pager_data_request(memory_object, + offset, + length, + desired_access); + } else if (memory_object->pager == &device_pager_workaround) { + return device_pager_data_request(memory_object, + offset, + length, + desired_access); + } else #endif - return dp_memory_object_data_request(memory_object, - offset, - length, - desired_access); + return dp_memory_object_data_request(memory_object, + offset, + length, + desired_access); } /* Routine memory_object_data_return */ @@ -2178,46 +2286,44 @@ kern_return_t memory_object_data_return memory_object_t memory_object, memory_object_offset_t offset, vm_size_t size, + memory_object_offset_t *resid_offset, + int *io_error, boolean_t dirty, - boolean_t kernel_copy + boolean_t kernel_copy, + int upl_flags ) { - extern kern_return_t dp_memory_object_data_return(memory_object_t, - memory_object_offset_t, - vm_size_t, - boolean_t, - boolean_t); #ifdef MACH_BSD - extern kern_return_t vnode_pager_data_return(memory_object_t, - memory_object_offset_t, - vm_size_t, - boolean_t, - boolean_t); - extern kern_return_t device_pager_data_return(memory_object_t, - memory_object_offset_t, - vm_size_t, - boolean_t, - boolean_t); - if (memory_object->pager == &vnode_pager_workaround) { return vnode_pager_data_return(memory_object, - offset, - size, - dirty, - kernel_copy); + offset, + size, + resid_offset, + io_error, + dirty, + kernel_copy, + upl_flags); } else if (memory_object->pager == &device_pager_workaround) { + return device_pager_data_return(memory_object, - offset, - size, - dirty, - kernel_copy); - } else + offset, + size, + dirty, + kernel_copy, + upl_flags); + } + else #endif + { return dp_memory_object_data_return(memory_object, - offset, - size, - dirty, - kernel_copy); + offset, + size, + NULL, + NULL, + dirty, + kernel_copy, + upl_flags); + } } /* Routine memory_object_data_initialize */ @@ -2228,31 +2334,20 @@ kern_return_t memory_object_data_initialize vm_size_t size ) { - - extern kern_return_t dp_memory_object_data_initialize(memory_object_t, - memory_object_offset_t, - vm_size_t); #ifdef MACH_BSD - extern kern_return_t vnode_pager_data_initialize(memory_object_t, - memory_object_offset_t, - vm_size_t); - extern kern_return_t device_pager_data_initialize(memory_object_t, - memory_object_offset_t, - vm_size_t); - if (memory_object->pager == &vnode_pager_workaround) { return vnode_pager_data_initialize(memory_object, - offset, - size); + offset, + size); } else if (memory_object->pager == &device_pager_workaround) { return device_pager_data_initialize(memory_object, - offset, - size); + offset, + size); } else #endif return dp_memory_object_data_initialize(memory_object, - offset, - size); + offset, + size); } /* Routine memory_object_data_unlock */ @@ -2264,37 +2359,23 @@ kern_return_t memory_object_data_unlock vm_prot_t desired_access ) { - extern kern_return_t dp_memory_object_data_unlock(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); #ifdef MACH_BSD - extern kern_return_t vnode_pager_data_unlock(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); - extern kern_return_t device_pager_data_unlock(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_prot_t); - if (memory_object->pager == &vnode_pager_workaround) { return vnode_pager_data_unlock(memory_object, - offset, - size, - desired_access); + offset, + size, + desired_access); } else if (memory_object->pager == &device_pager_workaround) { return device_pager_data_unlock(memory_object, - offset, - size, - desired_access); + offset, + size, + desired_access); } else #endif return dp_memory_object_data_unlock(memory_object, - offset, - size, - desired_access); - + offset, + size, + desired_access); } /* Routine memory_object_synchronize */ @@ -2306,39 +2387,23 @@ kern_return_t memory_object_synchronize vm_sync_t sync_flags ) { - extern kern_return_t dp_memory_object_data_synchronize(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_sync_t); #ifdef MACH_BSD - extern kern_return_t vnode_pager_data_synchronize(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_sync_t); - extern kern_return_t device_pager_data_synchronize(memory_object_t, - memory_object_offset_t, - vm_size_t, - vm_sync_t); - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_synchronize( - memory_object, - offset, - size, - sync_flags); + return vnode_pager_synchronize(memory_object, + offset, + size, + sync_flags); } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_synchronize( - memory_object, - offset, - size, - sync_flags); + return device_pager_synchronize(memory_object, + offset, + size, + sync_flags); } else #endif - return dp_memory_object_synchronize( - memory_object, - offset, - size, - sync_flags); + return dp_memory_object_synchronize(memory_object, + offset, + size, + sync_flags); } /* Routine memory_object_unmap */ @@ -2347,18 +2412,14 @@ kern_return_t memory_object_unmap memory_object_t memory_object ) { - extern kern_return_t dp_memory_object_unmap(memory_object_t); #ifdef MACH_BSD - extern kern_return_t vnode_pager_unmap(memory_object_t); - extern kern_return_t device_pager_unmap(memory_object_t); - if (memory_object->pager == &vnode_pager_workaround) { - return vnode_pager_unmap(memory_object); + return vnode_pager_unmap(memory_object); } else if (memory_object->pager == &device_pager_workaround) { - return device_pager_unmap(memory_object); + return device_pager_unmap(memory_object); } else #endif - return dp_memory_object_unmap(memory_object); + return dp_memory_object_unmap(memory_object); } /* Routine memory_object_create */ @@ -2369,12 +2430,41 @@ kern_return_t memory_object_create memory_object_t *new_memory_object ) { -extern kern_return_t default_pager_memory_object_create(memory_object_default_t, - vm_size_t, - memory_object_t *); - return default_pager_memory_object_create(default_memory_manager, new_memory_object_size, new_memory_object); } +upl_t +convert_port_to_upl( + ipc_port_t port) +{ + upl_t upl; + + ip_lock(port); + if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) { + ip_unlock(port); + return (upl_t)NULL; + } + upl = (upl_t) port->ip_kobject; + ip_unlock(port); + upl_lock(upl); + upl->ref_count+=1; + upl_unlock(upl); + return upl; +} + +mach_port_t +convert_upl_to_port( + __unused upl_t upl) +{ + return MACH_PORT_NULL; +} + +__private_extern__ void +upl_no_senders( + __unused ipc_port_t port, + __unused mach_port_mscount_t mscount) +{ + return; +} diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h index a169bd3c3..1bb55a3a8 100644 --- a/osfmk/vm/memory_object.h +++ b/osfmk/vm/memory_object.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -89,6 +89,9 @@ extern mach_port_t convert_mo_control_to_port( memory_object_control_t control); +extern void memory_object_control_disable( + memory_object_control_t control); + extern memory_object_control_t convert_port_to_mo_control( mach_port_t port); @@ -101,9 +104,30 @@ extern memory_object_t convert_port_to_memory_object( mach_port_t port); +extern upl_t convert_port_to_upl( + ipc_port_t port); + +extern ipc_port_t convert_upl_to_port( upl_t ); + +__private_extern__ void upl_no_senders(ipc_port_t, mach_port_mscount_t); + extern kern_return_t memory_object_free_from_cache( host_t host, int *pager_id, int *count); +extern kern_return_t memory_object_iopl_request( + ipc_port_t port, + memory_object_offset_t offset, + vm_size_t *upl_size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int *flags); + + +extern kern_return_t memory_object_pages_resident( + memory_object_control_t control, + boolean_t * has_pages_resident); + #endif /* _VM_MEMORY_OBJECT_H_ */ diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index 1609c8cb2..bc99746c9 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -61,10 +61,6 @@ #ifndef _VM_PMAP_H_ #define _VM_PMAP_H_ -#include - -#ifdef __APPLE_API_PRIVATE - #include #include #include @@ -72,6 +68,8 @@ #include #include +#ifdef KERNEL_PRIVATE + /* * The following is a description of the interface to the * machine-dependent "physical map" data structure. The module @@ -85,26 +83,28 @@ */ /* Copy between a physical page and a virtual address */ +/* LP64todo - switch to vm_map_offset_t when it grows */ extern kern_return_t copypv( addr64_t source, addr64_t sink, unsigned int size, int which); -#define cppvPsnk 1 -#define cppvPsrc 2 -#define cppvFsnk 4 -#define cppvFsrc 8 +#define cppvPsnk 1 +#define cppvPsnkb 31 +#define cppvPsrc 2 +#define cppvPsrcb 30 +#define cppvFsnk 4 +#define cppvFsnkb 29 +#define cppvFsrc 8 +#define cppvFsrcb 28 #define cppvNoModSnk 16 +#define cppvNoModSnkb 27 #define cppvNoRefSrc 32 -#define cppvKmap 64 /* User the kernel's vm_map */ - -#if !defined(MACH_KERNEL_PRIVATE) +#define cppvNoRefSrcb 26 +#define cppvKmap 64 /* Use the kernel's vm_map */ +#define cppvKmapb 25 -typedef void *pmap_t; - -#else /* MACH_KERNEL_PRIVATE */ - -typedef struct pmap *pmap_t; +#ifdef MACH_KERNEL_PRIVATE #include @@ -113,9 +113,14 @@ typedef struct pmap *pmap_t; * There is traditionally also a pmap_bootstrap, * used very early by machine-dependent code, * but it is not part of the interface. + * + * LP64todo - + * These interfaces are tied to the size of the + * kernel pmap - and therefore use the "local" + * vm_offset_t, etc... types. */ -extern vm_offset_t pmap_steal_memory(vm_size_t size); +extern void *pmap_steal_memory(vm_size_t size); /* During VM initialization, * steal a chunk of memory. */ @@ -135,6 +140,10 @@ extern void pmap_init(void); /* Initialization, * in virtual memory. */ +extern void mapping_adjust(void); /* Adjust free mapping count */ + +extern void mapping_free_prime(void); /* Primes the mapping block release list */ + #ifndef MACHINE_PAGES /* * If machine/pmap.h defines MACHINE_PAGES, it must implement @@ -169,7 +178,7 @@ extern void pmap_virtual_space( /* * Routines to manage the physical map data structure. */ -extern pmap_t pmap_create(vm_size_t size); /* Create a pmap_t. */ +extern pmap_t pmap_create(vm_map_size_t size); /* Create a pmap_t. */ extern pmap_t (pmap_kernel)(void); /* Return the kernel's pmap */ extern void pmap_reference(pmap_t pmap); /* Gain a reference. */ extern void pmap_destroy(pmap_t pmap); /* Release a reference. */ @@ -178,7 +187,7 @@ extern void pmap_switch(pmap_t); extern void pmap_enter( /* Enter a mapping */ pmap_t pmap, - vm_offset_t v, + vm_map_offset_t v, ppnum_t pn, vm_prot_t prot, unsigned int flags, @@ -227,6 +236,17 @@ extern void (pmap_copy_part_rpage)( vm_offset_t src_offset, vm_offset_t dst, vm_size_t len); + +extern unsigned int (pmap_disconnect)( /* disconnect mappings and return reference and change */ + ppnum_t phys); + +extern kern_return_t (pmap_attribute_cache_sync)( /* Flush appropriate + * cache based on + * page number sent */ + ppnum_t pn, + vm_size_t size, + vm_machine_attribute_t attribute, + vm_machine_attribute_val_t* value); /* * debug/assertions. pmap_verify_free returns true iff @@ -245,20 +265,6 @@ extern int (pmap_resident_count)(pmap_t pmap); extern void pmap_collect(pmap_t pmap);/* Perform garbage * collection, if any */ - -extern vm_offset_t (pmap_phys_address)( /* Transform address returned - * by device driver mapping - * function to physical address - * known to this module. */ - int frame); - -extern int (pmap_phys_to_frame)( /* Inverse of pmap_phys_addess, - * for use by device driver - * mapping function in - * machine-independent - * pseudo-devices. */ - vm_offset_t phys); - /* * Optional routines */ @@ -266,57 +272,65 @@ extern void (pmap_copy)( /* Copy range of mappings, * if desired. */ pmap_t dest, pmap_t source, - vm_offset_t dest_va, - vm_size_t size, - vm_offset_t source_va); + vm_map_offset_t dest_va, + vm_map_size_t size, + vm_map_offset_t source_va); extern kern_return_t (pmap_attribute)( /* Get/Set special memory * attributes */ pmap_t pmap, - vm_offset_t va, - vm_size_t size, + vm_map_offset_t va, + vm_map_size_t size, vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value); -extern kern_return_t (pmap_attribute_cache_sync)( /* Flush appropriate - * cache based on - * page number sent */ - ppnum_t pn, - vm_size_t size, - vm_machine_attribute_t attribute, - vm_machine_attribute_val_t* value); - /* * Routines defined as macros. */ #ifndef PMAP_ACTIVATE_USER -#define PMAP_ACTIVATE_USER(act, cpu) { \ +#ifndef PMAP_ACTIVATE +#define PMAP_ACTIVATE_USER(thr, cpu) +#else /* PMAP_ACTIVATE */ +#define PMAP_ACTIVATE_USER(thr, cpu) { \ pmap_t pmap; \ \ - pmap = (act)->map->pmap; \ + pmap = (thr)->map->pmap; \ if (pmap != pmap_kernel()) \ - PMAP_ACTIVATE(pmap, (act), (cpu)); \ + PMAP_ACTIVATE(pmap, (thr), (cpu)); \ } +#endif /* PMAP_ACTIVATE */ #endif /* PMAP_ACTIVATE_USER */ #ifndef PMAP_DEACTIVATE_USER -#define PMAP_DEACTIVATE_USER(act, cpu) { \ +#ifndef PMAP_DEACTIVATE +#define PMAP_DEACTIVATE_USER(thr, cpu) +#else /* PMAP_DEACTIVATE */ +#define PMAP_DEACTIVATE_USER(thr, cpu) { \ pmap_t pmap; \ \ - pmap = (act)->map->pmap; \ - if ((pmap) != pmap_kernel()) \ - PMAP_DEACTIVATE(pmap, (act), (cpu)); \ + pmap = (thr)->map->pmap; \ + if ((pmap) != pmap_kernel()) \ + PMAP_DEACTIVATE(pmap, (thr), (cpu)); \ } +#endif /* PMAP_DEACTIVATE */ #endif /* PMAP_DEACTIVATE_USER */ #ifndef PMAP_ACTIVATE_KERNEL +#ifndef PMAP_ACTIVATE +#define PMAP_ACTIVATE_KERNEL(cpu) +#else /* PMAP_ACTIVATE */ #define PMAP_ACTIVATE_KERNEL(cpu) \ - PMAP_ACTIVATE(pmap_kernel(), THR_ACT_NULL, cpu) + PMAP_ACTIVATE(pmap_kernel(), THREAD_NULL, cpu) +#endif /* PMAP_ACTIVATE */ #endif /* PMAP_ACTIVATE_KERNEL */ #ifndef PMAP_DEACTIVATE_KERNEL +#ifndef PMAP_DEACTIVATE +#define PMAP_DEACTIVATE_KERNEL(cpu) +#else /* PMAP_DEACTIVATE */ #define PMAP_DEACTIVATE_KERNEL(cpu) \ - PMAP_DEACTIVATE(pmap_kernel(), THR_ACT_NULL, cpu) + PMAP_DEACTIVATE(pmap_kernel(), THREAD_NULL, cpu) +#endif /* PMAP_DEACTIVATE */ #endif /* PMAP_DEACTIVATE_KERNEL */ #ifndef PMAP_ENTER @@ -324,16 +338,20 @@ extern kern_return_t (pmap_attribute_cache_sync)( /* Flush appropriate * Macro to be used in place of pmap_enter() */ #define PMAP_ENTER(pmap, virtual_address, page, protection, flags, wired) \ - MACRO_BEGIN \ - pmap_enter( \ - (pmap), \ - (virtual_address), \ - (page)->phys_page, \ - (protection) & ~(page)->page_lock, \ - flags, \ - (wired) \ - ); \ - MACRO_END + MACRO_BEGIN \ + pmap_t __pmap = (pmap); \ + vm_page_t __page = (page); \ + \ + if (__pmap != kernel_pmap) { \ + ASSERT_PAGE_DECRYPTED(__page); \ + } \ + pmap_enter(__pmap, \ + (virtual_address), \ + __page->phys_page, \ + (protection) & ~__page->page_lock, \ + (flags), \ + (wired)); \ + MACRO_END #endif /* !PMAP_ENTER */ /* @@ -351,23 +369,29 @@ extern void pmap_set_modify(ppnum_t pn); extern void pmap_clear_modify(ppnum_t pn); /* Return modify bit */ extern boolean_t pmap_is_modified(ppnum_t pn); + /* Return modified and referenced bits */ +extern unsigned int pmap_get_refmod(ppnum_t pn); + /* Clear modified and referenced bits */ +extern void pmap_clear_refmod(ppnum_t pn, unsigned int mask); +#define VM_MEM_MODIFIED 0x01 /* Modified bit */ +#define VM_MEM_REFERENCED 0x02 /* Referenced bit */ /* * Routines that operate on ranges of virtual addresses. */ extern void pmap_protect( /* Change protections. */ pmap_t map, - vm_offset_t s, - vm_offset_t e, + vm_map_offset_t s, + vm_map_offset_t e, vm_prot_t prot); extern void (pmap_pageable)( pmap_t pmap, - vm_offset_t start, - vm_offset_t end, + vm_map_offset_t start, + vm_map_offset_t end, boolean_t pageable); -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* MACH_KERNEL_PRIVATE */ /* * JMM - This portion is exported to other kernel components right now, @@ -375,40 +399,39 @@ extern void (pmap_pageable)( * is provided in a cleaner manner. */ -#define PMAP_NULL ((pmap_t) 0) - extern pmap_t kernel_pmap; /* The kernel's map */ #define pmap_kernel() (kernel_pmap) /* machine independent WIMG bits */ -#define VM_MEM_GUARDED 0x1 -#define VM_MEM_COHERENT 0x2 -#define VM_MEM_NOT_CACHEABLE 0x4 -#define VM_MEM_WRITE_THROUGH 0x8 +#define VM_MEM_GUARDED 0x1 /* (G) Guarded Storage */ +#define VM_MEM_COHERENT 0x2 /* (M) Memory Coherency */ +#define VM_MEM_NOT_CACHEABLE 0x4 /* (I) Cache Inhibit */ +#define VM_MEM_WRITE_THROUGH 0x8 /* (W) Write-Through */ #define VM_WIMG_MASK 0xFF #define VM_WIMG_USE_DEFAULT 0x80000000 extern void pmap_modify_pages( /* Set modify bit for pages */ pmap_t map, - vm_offset_t s, - vm_offset_t e); + vm_map_offset_t s, + vm_map_offset_t e); extern vm_offset_t pmap_extract(pmap_t pmap, - vm_offset_t va); + vm_map_offset_t va); extern void pmap_change_wiring( /* Specify pageability */ pmap_t pmap, - vm_offset_t va, + vm_map_offset_t va, boolean_t wired); +/* LP64todo - switch to vm_map_offset_t when it grows */ extern void pmap_remove( /* Remove mappings. */ pmap_t map, addr64_t s, addr64_t e); -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* _VM_PMAP_H_ */ diff --git a/osfmk/vm/task_working_set.c b/osfmk/vm/task_working_set.c index 51900109d..6292cabe5 100644 --- a/osfmk/vm/task_working_set.c +++ b/osfmk/vm/task_working_set.c @@ -1,6 +1,5 @@ -int startup_miss = 0; /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +31,7 @@ int startup_miss = 0; #include +#include #include #include #include @@ -39,11 +39,37 @@ int startup_miss = 0; #include #include #include +#include + +#include -extern unsigned sched_tick; +/* + * LP64todo - Task Working Set Support is for 32-bit only + */ extern zone_t lsf_zone; /* declarations for internal use only routines */ +int startup_miss = 0; + +tws_hash_t +tws_hash_create( + unsigned int lines, + unsigned int rows, + unsigned int style); + +kern_return_t +tws_write_startup_file( + task_t task, + int fid, + int mod, + char *name, + unsigned int string_length); + +kern_return_t +tws_read_startup_file( + task_t task, + tws_startup_t startup, + vm_offset_t cache_size); tws_startup_t tws_create_startup_list( @@ -58,6 +84,17 @@ kern_return_t tws_internal_startup_send( tws_hash_t tws); +void +tws_hash_line_clear( + tws_hash_t tws, + tws_hash_line_t hash_line, + vm_object_t object, + boolean_t live); + +void +tws_hash_clear( + tws_hash_t tws); + void tws_traverse_address_hash_list ( tws_hash_t tws, @@ -77,7 +114,7 @@ tws_traverse_object_hash_list ( unsigned int index, vm_object_t object, vm_object_offset_t offset, - unsigned int page_mask, + unsigned int pagemask, tws_hash_ptr_t *target_ele, tws_hash_ptr_t **previous_ptr, tws_hash_ptr_t **free_list); @@ -99,7 +136,14 @@ int tws_test_for_community( vm_object_t object, vm_object_offset_t offset, unsigned int threshold, - unsigned int *page_mask); + unsigned int *pagemask); + +kern_return_t +tws_internal_lookup( + tws_hash_t tws, + vm_object_offset_t offset, + vm_object_t object, + tws_hash_line_t *line); /* Note: all of the routines below depend on the associated map lock for */ /* synchronization, the map lock will be on when the routines are called */ @@ -112,8 +156,6 @@ tws_hash_create( unsigned int style) { tws_hash_t tws; - int i,j; - if ((style != TWS_HASH_STYLE_BASIC) && (style != TWS_HASH_STYLE_BASIC)) { @@ -128,39 +170,34 @@ tws_hash_create( if((tws->table[0] = (tws_hash_ptr_t *) kalloc(sizeof(tws_hash_ptr_t) * lines * rows)) == NULL) { - kfree((vm_offset_t)tws, sizeof(struct tws_hash)); + kfree(tws, sizeof(struct tws_hash)); return (tws_hash_t)NULL; } if((tws->table_ele[0] = (tws_hash_ptr_t) kalloc(sizeof(struct tws_hash_ptr) * lines * rows)) == NULL) { - kfree((vm_offset_t)tws->table[0], sizeof(tws_hash_ptr_t) - * lines * rows); - kfree((vm_offset_t)tws, sizeof(struct tws_hash)); + kfree(tws->table[0], sizeof(tws_hash_ptr_t) * lines * rows); + kfree(tws, sizeof(struct tws_hash)); return (tws_hash_t)NULL; } if((tws->alt_ele[0] = (tws_hash_ptr_t) kalloc(sizeof(struct tws_hash_ptr) * lines * rows)) == NULL) { - kfree((vm_offset_t)tws->table[0], sizeof(tws_hash_ptr_t) - * lines * rows); - kfree((vm_offset_t)tws->table_ele[0], - sizeof(struct tws_hash_ptr) - * lines * rows); - kfree((vm_offset_t)tws, sizeof(struct tws_hash)); + kfree(tws->table[0], sizeof(tws_hash_ptr_t) * lines * rows); + kfree(tws->table_ele[0], + sizeof(struct tws_hash_ptr) * lines * rows); + kfree(tws, sizeof(struct tws_hash)); return (tws_hash_t)NULL; } if((tws->cache[0] = (struct tws_hash_line *) kalloc(sizeof(struct tws_hash_line) * lines)) == NULL) { - kfree((vm_offset_t)tws->table[0], sizeof(tws_hash_ptr_t) - * lines * rows); - kfree((vm_offset_t)tws->table_ele[0], - sizeof(struct tws_hash_ptr) - * lines * rows); - kfree((vm_offset_t)tws->alt_ele[0], sizeof(struct tws_hash_ptr) - * lines * rows); - kfree((vm_offset_t)tws, sizeof(struct tws_hash)); + kfree(tws->table[0], sizeof(tws_hash_ptr_t) * lines * rows); + kfree(tws->table_ele[0], + sizeof(struct tws_hash_ptr) * lines * rows); + kfree(tws->alt_ele[0], + sizeof(struct tws_hash_ptr) * lines * rows); + kfree(tws, sizeof(struct tws_hash)); return (tws_hash_t)NULL; } tws->free_hash_ele[0] = (tws_hash_ptr_t)0; @@ -177,7 +214,7 @@ tws_hash_create( bzero((char *)tws->cache[0], sizeof(struct tws_hash_line) * lines); - mutex_init(&tws->lock, ETAP_VM_MAP); + mutex_init(&tws->lock, 0); tws->style = style; tws->current_line = 0; tws->pageout_count = 0; @@ -194,11 +231,16 @@ tws_hash_create( return tws; } -int newtest = 0; + +extern vm_page_t +vm_page_lookup_nohint(vm_object_t object, vm_object_offset_t offset); + + void tws_hash_line_clear( tws_hash_t tws, - tws_hash_line_t hash_line, + tws_hash_line_t hash_line, + __unused vm_object_t object, boolean_t live) { struct tws_hash_ele *hash_ele; @@ -206,7 +248,7 @@ tws_hash_line_clear( struct tws_hash_ptr **free_list; tws_hash_ele_t addr_ele; int index; - unsigned int i, j, k; + unsigned int i, j; int dump_pmap; int hash_loop; @@ -235,7 +277,7 @@ tws_hash_line_clear( tws_hash_ptr_t cache_ele; index = alt_tws_hash( - hash_ele->page_addr & TWS_HASH_OFF_MASK, + hash_ele->page_addr & TWS_ADDR_OFF_MASK, tws->number_of_elements, tws->number_of_lines); @@ -257,7 +299,7 @@ tws_hash_line_clear( index = alt_tws_hash( (hash_ele->page_addr - 0x1f000) - & TWS_HASH_OFF_MASK, + & TWS_ADDR_OFF_MASK, tws->number_of_elements, tws->number_of_lines); @@ -281,21 +323,34 @@ tws_hash_line_clear( if((hash_ele->map != NULL) && (live)) { vm_page_t p; - - for (j = 0x1; j != 0; j = j<<1) { - if(j & hash_ele->page_cache) { - p = vm_page_lookup(hash_ele->object, - hash_ele->offset + local_off); - if((p != NULL) && (p->wire_count == 0) - && (dump_pmap == 1)) { - pmap_remove_some_phys((pmap_t) - vm_map_pmap( - current_map()), - p->phys_page); + +#if 0 + if (object != hash_ele->object) { + if (object) + vm_object_unlock(object); + vm_object_lock(hash_ele->object); + } +#endif + if (dump_pmap == 1) { + for (j = 0x1; j != 0; j = j<<1) { + if(j & hash_ele->page_cache) { + p = vm_page_lookup_nohint(hash_ele->object, + hash_ele->offset + local_off); + if((p != NULL) && (p->wire_count == 0)) { + pmap_remove_some_phys((pmap_t)vm_map_pmap(current_map()), + p->phys_page); + } } - } - local_off += PAGE_SIZE_64; + local_off += PAGE_SIZE_64; + } + } +#if 0 + if (object != hash_ele->object) { + vm_object_unlock(hash_ele->object); + if (object) + vm_object_lock(object); } +#endif } if(tws->style == TWS_HASH_STYLE_SIGNAL) { @@ -407,17 +462,14 @@ tws_lookup( kern_return_t tws_expand_working_set( - vm_offset_t tws, - int line_count, + tws_hash_t old_tws, + unsigned int line_count, boolean_t dump_data) { tws_hash_t new_tws; - tws_hash_t old_tws; unsigned int i,j,k; struct tws_hash temp; - old_tws = (tws_hash_t)tws; - /* Note we do an elaborate dance to preserve the header that */ /* task is pointing to. In this way we can avoid taking a task */ /* lock every time we want to access the tws */ @@ -531,7 +583,6 @@ tws_insert( vm_offset_t page_addr, vm_map_t map) { - queue_t bucket; unsigned int index; unsigned int alt_index; unsigned int index_enum[2]; @@ -542,21 +593,19 @@ tws_insert( tws_hash_ptr_t *trailer; tws_hash_ptr_t *free_list; tws_hash_ele_t target_element = NULL; - int i,k; - int current_line; - int set; + unsigned int i; + unsigned int current_line; + unsigned int set; int ctr; unsigned int startup_cache_line; - vm_offset_t startup_page_addr; - int cache_full = 0; int age_of_cache = 0; - if(!tws_lock_try(tws)) { return KERN_FAILURE; } tws->insert_count++; current_line = 0xFFFFFFFF; + set = 0; startup_cache_line = 0; @@ -578,11 +627,11 @@ tws_insert( /* in avoiding duplication of entries against long lived non-cow */ /* objects */ index_enum[0] = alt_tws_hash( - page_addr & TWS_HASH_OFF_MASK, + page_addr & TWS_ADDR_OFF_MASK, tws->number_of_elements, tws->number_of_lines); index_enum[1] = alt_tws_hash( - (page_addr - 0x1f000) & TWS_HASH_OFF_MASK, + (page_addr - 0x1f000) & TWS_ADDR_OFF_MASK, tws->number_of_elements, tws->number_of_lines); for(ctr = 0; ctr < 2;) { @@ -767,10 +816,10 @@ tws_insert( * tws->number_of_lines * tws->number_of_elements)) == NULL) { - kfree((vm_offset_t)tws->table[set], - sizeof(tws_hash_ptr_t) - * tws->number_of_lines - * tws->number_of_elements); + kfree(tws->table[set], + sizeof(tws_hash_ptr_t) + * tws->number_of_lines + * tws->number_of_elements); set = 0; } else if((tws->alt_ele[set] = (tws_hash_ptr_t) @@ -778,12 +827,12 @@ tws_insert( * tws->number_of_lines * tws->number_of_elements)) == NULL) { - kfree((vm_offset_t)tws->table_ele[set], - sizeof(struct tws_hash_ptr) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->table[set], - sizeof(tws_hash_ptr_t) + kfree(tws->table_ele[set], + sizeof(struct tws_hash_ptr) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->table[set], + sizeof(tws_hash_ptr_t) * tws->number_of_lines * tws->number_of_elements); tws->table[set] = NULL; @@ -795,18 +844,18 @@ tws_insert( (struct tws_hash_line) * tws->number_of_lines)) == NULL) { - kfree((vm_offset_t)tws->alt_ele[set], - sizeof(struct tws_hash_ptr) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->table_ele[set], - sizeof(struct tws_hash_ptr) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->table[set], - sizeof(tws_hash_ptr_t) - * tws->number_of_lines - * tws->number_of_elements); + kfree(tws->alt_ele[set], + sizeof(struct tws_hash_ptr) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->table_ele[set], + sizeof(struct tws_hash_ptr) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->table[set], + sizeof(tws_hash_ptr_t) + * tws->number_of_lines + * tws->number_of_elements); tws->table[set] = NULL; set = 0; @@ -854,7 +903,7 @@ tws_insert( } if(set < tws->expansion_count) { tws_hash_line_clear(tws, - &(tws->cache[set][current_line]), TRUE); + &(tws->cache[set][current_line]), object, TRUE); if(tws->cache[set][current_line].ele_count >= tws->number_of_elements) { if(tws->style == TWS_HASH_STYLE_SIGNAL) { @@ -993,6 +1042,7 @@ tws_insert( #define PAGED_OUT(o, f) FALSE #endif /* MACH_PAGEMAP */ + void tws_build_cluster( tws_hash_t tws, @@ -1002,7 +1052,6 @@ tws_build_cluster( vm_size_t max_length) { tws_hash_line_t line; - task_t task; vm_object_offset_t before = *start; vm_object_offset_t after = *end; vm_object_offset_t original_start = *start; @@ -1012,10 +1061,11 @@ tws_build_cluster( kern_return_t kret; vm_object_offset_t object_size; int age_of_cache; - int pre_heat_size; + vm_size_t pre_heat_size; unsigned int ele_cache; unsigned int end_cache = 0; unsigned int start_cache = 0; + unsigned int memory_scarce = 0; if((object->private) || !(object->pager)) return; @@ -1031,10 +1081,12 @@ tws_build_cluster( if((!tws) || (!tws_lock_try(tws))) { return; } - age_of_cache = ((sched_tick - tws->time_of_creation) >> SCHED_TICK_SHIFT); + if (vm_page_free_count < (2 * vm_page_free_target)) + memory_scarce = 1; + /* When pre-heat files are not available, resort to speculation */ /* based on size of file */ @@ -1042,16 +1094,22 @@ tws_build_cluster( pre_heat_size = 0; } else { if (object_size > (vm_object_offset_t)(1024 * 1024)) - pre_heat_size = 16 * PAGE_SIZE; - else if (object_size > (vm_object_offset_t)(128 * 1024)) pre_heat_size = 8 * PAGE_SIZE; - else + else if (object_size > (vm_object_offset_t)(128 * 1024)) pre_heat_size = 4 * PAGE_SIZE; + else + pre_heat_size = 2 * PAGE_SIZE; } if (tws->startup_cache) { + int target_page_count; + + if (memory_scarce) + target_page_count = 16; + else + target_page_count = 4; - if (tws_test_for_community(tws, object, *start, 4, &ele_cache)) + if (tws_test_for_community(tws, object, *start, target_page_count, &ele_cache)) { start_cache = ele_cache; *start = *start & TWS_HASH_OFF_MASK; @@ -1248,6 +1306,7 @@ tws_build_cluster( tws_unlock(tws); } +void tws_line_signal( tws_hash_t tws, vm_map_t map, @@ -1283,7 +1342,7 @@ tws_line_signal( while (j != 0) { if(j & element->page_cache) break; - j << 1; + j <<= 1; local_off += PAGE_SIZE_64; } object = element->object; @@ -1579,10 +1638,9 @@ tws_send_startup_info( { tws_hash_t tws; - tws_startup_t scache; task_lock(task); - tws = (tws_hash_t)task->dynamic_working_set; + tws = task->dynamic_working_set; task_unlock(task); if(tws == NULL) { return KERN_FAILURE; @@ -1621,10 +1679,10 @@ tws_internal_startup_send( if(scache == NULL) return KERN_FAILURE; bsd_write_page_cache_file(tws->uid, tws->startup_name, - scache, scache->tws_hash_size, - tws->mod, tws->fid); - kfree((vm_offset_t)scache, scache->tws_hash_size); - kfree((vm_offset_t) tws->startup_name, tws->startup_name_length); + (caddr_t) scache, scache->tws_hash_size, + tws->mod, tws->fid); + kfree(scache, scache->tws_hash_size); + kfree(tws->startup_name, tws->startup_name_length); tws->startup_name = NULL; tws_unlock(tws); return KERN_SUCCESS; @@ -1635,7 +1693,7 @@ tws_handle_startup_file( task_t task, unsigned int uid, char *app_name, - vm_offset_t app_vp, + void *app_vp, boolean_t *new_info) { @@ -1651,7 +1709,8 @@ tws_handle_startup_file( return KERN_SUCCESS; error = bsd_read_page_cache_file(uid, &fid, &mod, app_name, - app_vp, &startup, + app_vp, + (vm_offset_t *) &startup, &cache_size); if(error) { return KERN_FAILURE; @@ -1702,14 +1761,19 @@ tws_write_startup_file( string_length = strlen(name); +restart: task_lock(task); - tws = (tws_hash_t)task->dynamic_working_set; - + tws = task->dynamic_working_set; task_unlock(task); + if(tws == NULL) { + kern_return_t error; + /* create a dynamic working set of normal size */ - task_working_set_create(task, 0, - 0, TWS_HASH_STYLE_DEFAULT); + if((error = task_working_set_create(task, 0, 0, TWS_HASH_STYLE_DEFAULT)) != KERN_SUCCESS) + return error; + /* we need to reset tws and relock */ + goto restart; } tws_lock(tws); @@ -1735,6 +1799,8 @@ tws_write_startup_file( return KERN_SUCCESS; } +unsigned long tws_read_startup_file_rejects = 0; + kern_return_t tws_read_startup_file( task_t task, @@ -1742,26 +1808,52 @@ tws_read_startup_file( vm_offset_t cache_size) { tws_hash_t tws; - int error; int lines; int old_exp_count; + unsigned int ele_count; +restart: task_lock(task); - tws = (tws_hash_t)task->dynamic_working_set; + tws = task->dynamic_working_set; + + /* create a dynamic working set to match file size */ - if(cache_size < sizeof(struct tws_hash)) { + /* start with total size of the data we got from app_profile */ + ele_count = cache_size; + /* skip the startup header */ + ele_count -= sizeof(struct tws_startup); + /* + * For each startup cache entry, we have one of these: + * tws_startup_ptr_t startup->table[]; + * struct tws_startup_ptr startup->ele[]; + * struct tws_startup_ele startup->array[]; + */ + ele_count /= (sizeof (tws_startup_ptr_t) + + sizeof (struct tws_startup_ptr) + + sizeof (struct tws_startup_ele)); + + /* + * Sanity check: make sure the value for startup->array_size + * that we read from the app_profile file matches the size + * of the data we read from disk. If it doesn't match, we + * can't trust the data and we just drop it all. + */ + if (cache_size < sizeof(struct tws_startup) || + startup->array_size != ele_count) { + tws_read_startup_file_rejects++; task_unlock(task); kmem_free(kernel_map, (vm_offset_t)startup, cache_size); return(KERN_SUCCESS); } - /* create a dynamic working set to match file size */ - lines = (cache_size - sizeof(struct tws_hash))/TWS_ARRAY_SIZE; - /* we now need to divide out element size and word size */ - /* all fields are 4 bytes. There are 8 bytes in each hash element */ - /* entry, 4 bytes in each table ptr location and 8 bytes in each */ - /* page_cache entry, making a total of 20 bytes for each entry */ - lines = (lines/(20)); + /* + * We'll create the task working set with the default row size + * (TWS_ARRAY_SIZE), so this will give us the number of lines + * we need to store all the data from the app_profile startup + * cache. + */ + lines = ele_count / TWS_ARRAY_SIZE; + if(lines <= TWS_SMALL_HASH_LINE_COUNT) { lines = TWS_SMALL_HASH_LINE_COUNT; task_unlock(task); @@ -1774,17 +1866,20 @@ tws_read_startup_file( * TWS_HASH_LINE_COUNT; } if(tws == NULL) { - task_working_set_create(task, lines, - 0, TWS_HASH_STYLE_DEFAULT); + kern_return_t error; + task_unlock(task); + if ((error = task_working_set_create(task, lines, 0, TWS_HASH_STYLE_DEFAULT)) != KERN_SUCCESS) + return error; + /* we need to reset tws and relock */ + goto restart; } else { task_unlock(task); tws_expand_working_set( - (vm_offset_t)tws, lines, TRUE); + (void *)tws, lines, TRUE); } } - tws_lock(tws); if(tws->startup_cache != NULL) { @@ -1824,21 +1919,16 @@ tws_hash_ws_flush(tws_hash_t tws) { if(scache == NULL) { /* dump the name cache, we'll */ /* get it next time */ - kfree((vm_offset_t) - tws->startup_name, - tws->startup_name_length); + kfree(tws->startup_name, tws->startup_name_length); tws->startup_name = NULL; tws_unlock(tws); return; } bsd_write_page_cache_file(tws->uid, tws->startup_name, - scache, scache->tws_hash_size, + (caddr_t) scache, scache->tws_hash_size, tws->mod, tws->fid); - kfree((vm_offset_t)scache, - scache->tws_hash_size); - kfree((vm_offset_t) - tws->startup_name, - tws->startup_name_length); + kfree(scache, scache->tws_hash_size); + kfree(tws->startup_name, tws->startup_name_length); tws->startup_name = NULL; } tws_unlock(tws); @@ -1848,8 +1938,7 @@ tws_hash_ws_flush(tws_hash_t tws) { void tws_hash_destroy(tws_hash_t tws) { - int i,k; - vm_size_t cache_size; + unsigned int i,k; if(tws->startup_cache != NULL) { kmem_free(kernel_map, @@ -1863,43 +1952,43 @@ tws_hash_destroy(tws_hash_t tws) for (i=0; inumber_of_lines; i++) { for(k=0; kexpansion_count; k++) { /* clear the object refs */ - tws_hash_line_clear(tws, &(tws->cache[k][i]), FALSE); + tws_hash_line_clear(tws, &(tws->cache[k][i]), NULL, FALSE); } } i = 0; while (i < tws->expansion_count) { - kfree((vm_offset_t)tws->table[i], sizeof(tws_hash_ptr_t) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->table_ele[i], - sizeof(struct tws_hash_ptr) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->alt_ele[i], - sizeof(struct tws_hash_ptr) - * tws->number_of_lines - * tws->number_of_elements); - kfree((vm_offset_t)tws->cache[i], sizeof(struct tws_hash_line) - * tws->number_of_lines); + kfree(tws->table[i], + sizeof(tws_hash_ptr_t) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->table_ele[i], + sizeof(struct tws_hash_ptr) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->alt_ele[i], + sizeof(struct tws_hash_ptr) + * tws->number_of_lines + * tws->number_of_elements); + kfree(tws->cache[i], + sizeof(struct tws_hash_line) * tws->number_of_lines); i++; } if(tws->startup_name != NULL) { - kfree((vm_offset_t)tws->startup_name, - tws->startup_name_length); + kfree(tws->startup_name, tws->startup_name_length); } - kfree((vm_offset_t)tws, sizeof(struct tws_hash)); + kfree(tws, sizeof(struct tws_hash)); } void tws_hash_clear(tws_hash_t tws) { - int i, k; + unsigned int i, k; for (i=0; inumber_of_lines; i++) { for(k=0; kexpansion_count; k++) { /* clear the object refs */ - tws_hash_line_clear(tws, &(tws->cache[k][i]), FALSE); + tws_hash_line_clear(tws, &(tws->cache[k][i]), NULL, FALSE); } } } @@ -1925,8 +2014,8 @@ task_working_set_create( if(task->dynamic_working_set != 0) { task_unlock(task); return(KERN_FAILURE); - } else if((task->dynamic_working_set - = (vm_offset_t) tws_hash_create(lines, rows, style)) == 0) { + } else if((task->dynamic_working_set = + tws_hash_create(lines, rows, style)) == 0) { task_unlock(task); return(KERN_NO_SPACE); } @@ -1959,7 +2048,7 @@ tws_traverse_address_hash_list ( tws_hash_ptr_t **free_list, unsigned int exclusive_addr) { - int k; + unsigned int k; tws_hash_ptr_t cache_ele; tws_hash_ptr_t base_ele; @@ -2047,12 +2136,12 @@ tws_traverse_object_hash_list ( unsigned int index, vm_object_t object, vm_object_offset_t offset, - unsigned int page_mask, + unsigned int pagemask, tws_hash_ptr_t *target_ele, tws_hash_ptr_t **previous_ptr, tws_hash_ptr_t **free_list) { - int k; + unsigned int k; tws_hash_ptr_t cache_ele; tws_hash_ptr_t base_ele; @@ -2091,8 +2180,8 @@ tws_traverse_object_hash_list ( if ((cache_ele->element->object == object) && (cache_ele->element->offset == (offset - (offset & ~TWS_HASH_OFF_MASK)))) { - if((cache_ele->element->page_cache & page_mask) - || (page_mask == 0xFFFFFFFF)) { + if((cache_ele->element->page_cache & pagemask) + || (pagemask == 0xFFFFFFFF)) { /* We've found a match */ *target_ele = cache_ele; *free_list = &(tws->free_hash_ele[k]); @@ -2117,7 +2206,7 @@ tws_test_for_community( vm_object_t object, vm_object_offset_t offset, unsigned int threshold, - unsigned int *page_mask) + unsigned int *pagemask) { int index; tws_hash_ptr_t cache_ele; @@ -2132,14 +2221,14 @@ tws_test_for_community( if(cache_ele != NULL) { int i; - int ctr; + unsigned int ctr; ctr = 0; for(i=1; i!=0; i=i<<1) { if(i & cache_ele->element->page_cache) ctr++; if(ctr == threshold) { community = 1; - *page_mask = cache_ele->element->page_cache; + *pagemask = cache_ele->element->page_cache; break; } } diff --git a/osfmk/vm/task_working_set.h b/osfmk/vm/task_working_set.h index fc7fda91e..366c5ffce 100644 --- a/osfmk/vm/task_working_set.h +++ b/osfmk/vm/task_working_set.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,6 +34,12 @@ #ifndef _VM_TASK_WORKING_SET_H_ #define _VM_TASK_WORKING_SET_H_ +#include + +#ifdef KERNEL_PRIVATE + +#ifdef MACH_KERNEL_PRIVATE + #include #include @@ -128,9 +134,9 @@ struct tws_hash_ele { }; typedef struct tws_hash_ele *tws_hash_ele_t; -#define TWS_HASH_OFF_MASK ((vm_object_offset_t)0xFFFFFFFFFFFE0000) +#define TWS_HASH_OFF_MASK ((vm_object_offset_t)0xFFFFFFFFFFFE0000ULL) #define TWS_ADDR_OFF_MASK ((vm_offset_t)0xFFFE0000) -#define TWS_INDEX_MASK ((vm_object_offset_t)0x000000000001F000) +#define TWS_INDEX_MASK ((vm_object_offset_t)0x000000000001F000ULL) struct tws_hash_ptr { tws_hash_ele_t element; @@ -139,7 +145,7 @@ struct tws_hash_ptr { typedef struct tws_hash_ptr *tws_hash_ptr_t; struct tws_hash_line { - int ele_count; + unsigned int ele_count; struct tws_hash_ele list[TWS_ARRAY_SIZE]; }; typedef struct tws_hash_line *tws_hash_line_t; @@ -158,17 +164,17 @@ struct tws_hash { decl_mutex_data(,lock) /* tws_hash's lock */ int style; - int current_line; + unsigned int current_line; unsigned int pageout_count; - int line_count; + unsigned int line_count; - int number_of_lines; - int number_of_elements; - int expansion_count; + unsigned int number_of_lines; + unsigned int number_of_elements; + unsigned int expansion_count; unsigned int time_of_creation; - int lookup_count; - int insert_count; + unsigned int lookup_count; + unsigned int insert_count; tws_startup_t startup_cache; char *startup_name; @@ -189,13 +195,6 @@ struct tws_hash { typedef struct tws_hash *tws_hash_t; -extern tws_hash_t tws_hash_create(); - -extern void tws_hash_line_clear( - tws_hash_t tws, - tws_hash_line_t hash_line, - boolean_t live); - extern kern_return_t tws_lookup( tws_hash_t tws, vm_object_offset_t offset, @@ -216,52 +215,41 @@ extern void tws_build_cluster( vm_object_offset_t *end, vm_size_t max_length); -extern tws_line_signal( - tws_hash_t tws, - vm_map_t map, - tws_hash_line_t hash_line, - vm_offset_t target_page); +extern void tws_line_signal( + tws_hash_t tws, + vm_map_t map, + tws_hash_line_t hash_line, + vm_offset_t target_page); extern void tws_hash_destroy( - tws_hash_t tws); - -extern void tws_hash_clear( - tws_hash_t tws); - -kern_return_t task_working_set_create( - task_t task, - unsigned int lines, - unsigned int rows, - unsigned int style); - -kern_return_t tws_expand_working_set( - vm_offset_t old_tws, - int line_count, - boolean_t dump_data); - -kern_return_t tws_handle_startup_file( - task_t task, - unsigned int uid, - char *app_name, - vm_offset_t app_vp, - boolean_t *new_info); - -kern_return_t tws_write_startup_file( - task_t task, - int fid, - int mod, - char *name, - unsigned int string_length); - -kern_return_t tws_read_startup_file( - task_t task, - tws_startup_t startup, - vm_offset_t cache_size); - -void -tws_hash_ws_flush( - tws_hash_t tws); + tws_hash_t tws); + +extern void tws_hash_ws_flush( + tws_hash_t tws); + +extern kern_return_t tws_expand_working_set( + tws_hash_t old_tws, + unsigned int line_count, + boolean_t dump_data); + +extern kern_return_t task_working_set_create( + task_t task, + unsigned int lines, + unsigned int rows, + unsigned int style); + +#endif /* MACH_KERNEL_PRIVATE */ + +extern kern_return_t tws_handle_startup_file( + task_t task, + unsigned int uid, + char *app_name, + void *app_vp, + boolean_t *new_info); +extern kern_return_t tws_send_startup_info( + task_t task); +#endif /* KERNEL_PRIVATE */ #endif /* _VM_TASK_WORKING_SET_H_ */ diff --git a/osfmk/vm/vm_debug.c b/osfmk/vm/vm_debug.c index ed316b55c..d786d081a 100644 --- a/osfmk/vm/vm_debug.c +++ b/osfmk/vm/vm_debug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -80,6 +80,12 @@ #include #endif +#if !MACH_VM_DEBUG +#define __DEBUG_ONLY __unused +#else /* !MACH_VM_DEBUG */ +#define __DEBUG_ONLY +#endif /* !MACH_VM_DEBUG */ + /* * Routine: mach_vm_region_info [kernel call] * Purpose: @@ -96,11 +102,11 @@ kern_return_t mach_vm_region_info( - vm_map_t map, - vm_offset_t address, - vm_info_region_t *regionp, - vm_info_object_array_t *objectsp, - mach_msg_type_number_t *objectsCntp) + __DEBUG_ONLY vm_map_t map, + __DEBUG_ONLY vm_offset_t address, + __DEBUG_ONLY vm_info_region_t *regionp, + __DEBUG_ONLY vm_info_object_array_t *objectsp, + __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) { #if !MACH_VM_DEBUG return KERN_FAILURE; @@ -130,7 +136,9 @@ mach_vm_region_info( for (cmap = map;; cmap = nmap) { /* cmap is read-locked */ - if (!vm_map_lookup_entry(cmap, address, &entry)) { + if (!vm_map_lookup_entry(cmap, + (vm_map_address_t)address, &entry)) { + entry = entry->vme_next; if (entry == vm_map_to_entry(cmap)) { vm_map_unlock_read(cmap); @@ -223,10 +231,11 @@ mach_vm_region_info( cobject->temporary; vio->vio_alive = cobject->alive; - vio->vio_lock_in_progress = - cobject->lock_in_progress; - vio->vio_lock_restart = - cobject->lock_restart; + vio->vio_purgable = + (cobject->purgable != VM_OBJECT_NONPURGABLE); + vio->vio_purgable_volatile = + (cobject->purgable == VM_OBJECT_PURGABLE_VOLATILE || + cobject->purgable == VM_OBJECT_PURGABLE_EMPTY); } used++; @@ -251,12 +260,13 @@ mach_vm_region_info( kmem_free(ipc_kernel_map, addr, size); size = round_page_32(2 * used * sizeof(vm_info_object_t)); - kr = vm_allocate(ipc_kernel_map, &addr, size, TRUE); + kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; - kr = vm_map_wire(ipc_kernel_map, addr, addr + size, - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), + VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); } @@ -271,11 +281,12 @@ mach_vm_region_info( vm_size_t size_used = round_page_32(used * sizeof(vm_info_object_t)); - kr = vm_map_unwire(ipc_kernel_map, addr, addr + size_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, addr, size_used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size_used, TRUE, ©); assert(kr == KERN_SUCCESS); if (size != size_used) @@ -289,17 +300,18 @@ mach_vm_region_info( return KERN_SUCCESS; #endif /* MACH_VM_DEBUG */ } + /* * Temporary call for 64 bit data path interface transiotion */ kern_return_t mach_vm_region_info_64( - vm_map_t map, - vm_offset_t address, - vm_info_region_64_t *regionp, - vm_info_object_array_t *objectsp, - mach_msg_type_number_t *objectsCntp) + __DEBUG_ONLY vm_map_t map, + __DEBUG_ONLY vm_offset_t address, + __DEBUG_ONLY vm_info_region_64_t *regionp, + __DEBUG_ONLY vm_info_object_array_t *objectsp, + __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) { #if !MACH_VM_DEBUG return KERN_FAILURE; @@ -422,10 +434,11 @@ mach_vm_region_info_64( cobject->temporary; vio->vio_alive = cobject->alive; - vio->vio_lock_in_progress = - cobject->lock_in_progress; - vio->vio_lock_restart = - cobject->lock_restart; + vio->vio_purgable = + (cobject->purgable != VM_OBJECT_NONPURGABLE); + vio->vio_purgable_volatile = + (cobject->purgable == VM_OBJECT_PURGABLE_VOLATILE || + cobject->purgable == VM_OBJECT_PURGABLE_EMPTY); } used++; @@ -450,12 +463,13 @@ mach_vm_region_info_64( kmem_free(ipc_kernel_map, addr, size); size = round_page_32(2 * used * sizeof(vm_info_object_t)); - kr = vm_allocate(ipc_kernel_map, &addr, size, TRUE); + kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; - kr = vm_map_wire(ipc_kernel_map, addr, addr + size, - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), + VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); } @@ -470,11 +484,12 @@ mach_vm_region_info_64( vm_size_t size_used = round_page_32(used * sizeof(vm_info_object_t)); - kr = vm_map_unwire(ipc_kernel_map, addr, addr + size_used, FALSE); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size_used), FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_copyin(ipc_kernel_map, addr, size_used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)size_used, TRUE, ©); assert(kr == KERN_SUCCESS); if (size != size_used) @@ -493,9 +508,9 @@ mach_vm_region_info_64( */ kern_return_t vm_mapped_pages_info( - vm_map_t map, - page_address_array_t *pages, - mach_msg_type_number_t *pages_count) + __DEBUG_ONLY vm_map_t map, + __DEBUG_ONLY page_address_array_t *pages, + __DEBUG_ONLY mach_msg_type_number_t *pages_count) { #if !MACH_VM_DEBUG return KERN_FAILURE; @@ -514,8 +529,9 @@ vm_mapped_pages_info( size = round_page_32(size); for (;;) { - (void) vm_allocate(ipc_kernel_map, &addr, size, TRUE); - (void) vm_map_unwire(ipc_kernel_map, addr, addr + size, FALSE); + (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); + (void) vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), FALSE); list = (page_address_array_t) addr; space = size / sizeof(vm_offset_t); @@ -544,13 +560,12 @@ vm_mapped_pages_info( else { *pages_count = actual; size_used = round_page_32(actual * sizeof(vm_offset_t)); - (void) vm_map_wire(ipc_kernel_map, - addr, addr + size, + (void) vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), VM_PROT_READ|VM_PROT_WRITE, FALSE); - (void) vm_map_copyin( - ipc_kernel_map, - addr, - size_used, + (void) vm_map_copyin(ipc_kernel_map, + (vm_map_address_t)addr, + (vm_map_size_t)size_used, TRUE, (vm_map_copy_t *)pages); if (size_used != size) { @@ -578,15 +593,15 @@ vm_mapped_pages_info( kern_return_t host_virtual_physical_table_info( - host_t host, - hash_info_bucket_array_t *infop, - mach_msg_type_number_t *countp) + __DEBUG_ONLY host_t host, + __DEBUG_ONLY hash_info_bucket_array_t *infop, + __DEBUG_ONLY mach_msg_type_number_t *countp) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else vm_offset_t addr; - vm_size_t size; + vm_size_t size = 0; hash_info_bucket_t *info; unsigned int potential, actual; kern_return_t kr; @@ -635,8 +650,8 @@ host_virtual_physical_table_info( if (used != size) kmem_free(ipc_kernel_map, addr + used, size - used); - kr = vm_map_copyin(ipc_kernel_map, addr, used, - TRUE, ©); + kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, + (vm_map_size_t)used, TRUE, ©); assert(kr == KERN_SUCCESS); *infop = (hash_info_bucket_t *) copy; diff --git a/osfmk/vm/vm_debug.h b/osfmk/vm/vm_debug.h index fee86f23a..db8c2c8b2 100644 --- a/osfmk/vm/vm_debug.h +++ b/osfmk/vm/vm_debug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,9 +36,4 @@ extern unsigned int vm_page_info( hash_info_bucket_t *info, unsigned int count); -extern int pmap_list_resident_pages( - pmap_t pmap, - vm_offset_t *listp, - int space); - #endif /* VM_VM_DEBUG_H */ diff --git a/osfmk/vm/vm_external.c b/osfmk/vm/vm_external.c index f8a511af4..5afba3ca0 100644 --- a/osfmk/vm/vm_external.c +++ b/osfmk/vm/vm_external.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -152,7 +152,7 @@ vm_external_destroy( } else { bytes = power_of_2(bytes); } - kfree((vm_offset_t)map, bytes); + kfree(map, bytes); } /* @@ -167,12 +167,13 @@ vm_external_map_size( vm_size_t bytes; bytes = stob(size); - if (bytes != 0) + if (bytes != 0) { if (bytes <= SMALL_SIZE) { bytes = SMALL_SIZE; } else { bytes = power_of_2(bytes); } + } return bytes; } diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index 5f1f381d3..c601cb0da 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -55,19 +55,20 @@ * * Page fault handling module. */ -#ifdef MACH_BSD -/* remove after component interface available */ -extern int vnode_pager_workaround; -extern int device_pager_workaround; -#endif #include #include #include -#include +#include #include #include /* for error codes */ +#include +#include +#include + /* For memory_object_data_{request,unlock} */ + +#include #include #include #include @@ -75,7 +76,14 @@ extern int device_pager_workaround; #include #include #include +#include +#include +#include +#include + #include + +#include #include #include #include @@ -83,14 +91,7 @@ extern int device_pager_workaround; #include #include #include -#include -#include -#include - /* For memory_object_data_{request,unlock} */ -#include -#include -#include -#include +#include #include @@ -99,7 +100,7 @@ extern int device_pager_workaround; #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ -int vm_object_absent_max = 50; +unsigned int vm_object_absent_max = 50; int vm_fault_debug = 0; @@ -113,13 +114,14 @@ boolean_t software_reference_bits = TRUE; extern struct db_watchpoint *db_watchpoint_list; #endif /* MACH_KDB */ + /* Forward declarations of internal routines. */ extern kern_return_t vm_fault_wire_fast( vm_map_t map, - vm_offset_t va, + vm_map_offset_t va, vm_map_entry_t entry, pmap_t pmap, - vm_offset_t pmap_addr); + vm_map_offset_t pmap_addr); extern void vm_fault_continue(void); @@ -224,9 +226,9 @@ int vm_default_behind = MAX_UPL_TRANSFER; static boolean_t vm_fault_deactivate_behind( - vm_object_t object, - vm_offset_t offset, - vm_behavior_t behavior) + vm_object_t object, + vm_object_offset_t offset, + vm_behavior_t behavior) { vm_page_t m; @@ -234,6 +236,14 @@ vm_fault_deactivate_behind( dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */ #endif + if (object == kernel_object) { + /* + * Do not deactivate pages from the kernel object: they + * are not intended to become pageable. + */ + return FALSE; + } + switch (behavior) { case VM_BEHAVIOR_RANDOM: object->sequential = PAGE_SIZE_64; @@ -356,8 +366,8 @@ vm_fault_page( vm_prot_t fault_type, /* What access is requested */ boolean_t must_be_resident,/* Must page be resident? */ int interruptible, /* how may fault be interrupted? */ - vm_object_offset_t lo_offset, /* Map entry start */ - vm_object_offset_t hi_offset, /* Map entry end */ + vm_map_offset_t lo_offset, /* Map entry start */ + vm_map_offset_t hi_offset, /* Map entry end */ vm_behavior_t behavior, /* Page reference behavior */ /* Modifies in place: */ vm_prot_t *protection, /* Protection for mapping */ @@ -374,7 +384,7 @@ vm_fault_page( * it is a write fault and a full * page is provided */ vm_map_t map, - vm_offset_t vaddr) + __unused vm_map_offset_t vaddr) { register vm_page_t m; @@ -388,10 +398,8 @@ vm_fault_page( boolean_t look_for_page; vm_prot_t access_required = fault_type; vm_prot_t wants_copy_flag; - vm_size_t cluster_size, length; - vm_object_offset_t cluster_offset; - vm_object_offset_t cluster_start, cluster_end, paging_offset; - vm_object_offset_t align_offset; + vm_object_size_t length; + vm_object_offset_t cluster_start, cluster_end; CLUSTER_STAT(int pages_at_higher_offsets;) CLUSTER_STAT(int pages_at_lower_offsets;) kern_return_t wait_result; @@ -590,8 +598,14 @@ vm_fault_page( /* * If the page was pre-paged as part of a * cluster, record the fact. + * If we were passed a valid pointer for + * "type_of_fault", than we came from + * vm_fault... we'll let it deal with + * this condition, since it + * needs to see m->clustered to correctly + * account the pageins. */ - if (m->clustered) { + if (type_of_fault == NULL && m->clustered) { vm_pagein_cluster_used++; m->clustered = FALSE; } @@ -632,6 +646,30 @@ vm_fault_page( continue; } + if (m->encrypted) { + /* + * ENCRYPTED SWAP: + * the user needs access to a page that we + * encrypted before paging it out. + * Decrypt the page now. + * Keep it busy to prevent anyone from + * accessing it during the decryption. + */ + m->busy = TRUE; + vm_page_decrypt(m, 0); + assert(object == m->object); + assert(m->busy); + PAGE_WAKEUP_DONE(m); + + /* + * Retry from the top, in case + * something changed while we were + * decrypting. + */ + continue; + } + ASSERT_PAGE_DECRYPTED(m); + /* * If the page is in error, give up now. */ @@ -725,7 +763,7 @@ vm_fault_page( THREAD_UNINT); vm_fault_cleanup(object, first_m); - thread_block((void(*)(void)) 0); + thread_block(THREAD_CONTINUE_NULL); thread_interrupt_level( interruptible_state); return(VM_FAULT_RETRY); @@ -769,22 +807,23 @@ vm_fault_page( vm_object_unlock(object); vm_page_zero_fill(m); vm_object_lock(object); - } - if (type_of_fault) - *type_of_fault = DBG_ZERO_FILL_FAULT; - VM_STAT(zero_fill_count++); + if (type_of_fault) + *type_of_fault = DBG_ZERO_FILL_FAULT; + VM_STAT(zero_fill_count++); + } if (bumped_pagein == TRUE) { VM_STAT(pageins--); current_task()->pageins--; } -#if 0 - pmap_clear_modify(m->phys_page); -#endif vm_page_lock_queues(); VM_PAGE_QUEUES_REMOVE(m); m->page_ticket = vm_page_ticket; - if(m->object->size > 0x80000) { + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); + if(m->object->size > 0x200000) { m->zero_fill = TRUE; /* depends on the queues lock */ vm_zf_count += 1; @@ -1181,7 +1220,7 @@ no_clustering: * do not need to take the map lock. */ cluster_end = offset + PAGE_SIZE_64; - tws_build_cluster((tws_hash_t) + tws_build_cluster( current_task()->dynamic_working_set, object, &cluster_start, &cluster_end, 0x40000); @@ -1201,7 +1240,7 @@ no_clustering: */ if (type_of_fault) - *type_of_fault = (length << 8) | DBG_PAGEIN_FAULT; + *type_of_fault = ((int)length << 8) | DBG_PAGEIN_FAULT; VM_STAT(pageins++); current_task()->pageins++; bumped_pagein = TRUE; @@ -1242,7 +1281,7 @@ no_clustering: if (rc != KERN_SUCCESS) { if (rc != MACH_SEND_INTERRUPTED && vm_fault_debug) - printf("%s(0x%x, 0x%x, 0x%x, 0x%x) failed, rc=%d\n", + printf("%s(0x%x, 0x%xll, 0x%xll, 0x%x) failed, rc=%d\n", "memory_object_data_request", object->pager, cluster_start + object->paging_offset, @@ -1270,44 +1309,8 @@ no_clustering: return((rc == MACH_SEND_INTERRUPTED) ? VM_FAULT_INTERRUPTED : VM_FAULT_MEMORY_ERROR); - } else { -#ifdef notdefcdy - tws_hash_line_t line; - task_t task; - - task = current_task(); - - if((map != NULL) && - (task->dynamic_working_set != 0)) - && !(object->private)) { - vm_object_t base_object; - vm_object_offset_t base_offset; - base_object = object; - base_offset = offset; - while(base_object->shadow) { - base_offset += - base_object->shadow_offset; - base_object = - base_object->shadow; - } - if(tws_lookup - ((tws_hash_t) - task->dynamic_working_set, - base_offset, base_object, - &line) == KERN_SUCCESS) { - tws_line_signal((tws_hash_t) - task->dynamic_working_set, - map, line, vaddr); - } - } -#endif } - /* - * Retry with same object/offset, since new data may - * be in a different page (i.e., m is meaningless at - * this point). - */ vm_object_lock(object); if ((interruptible != THREAD_UNINT) && (current_thread()->state & TH_ABORT)) { @@ -1315,8 +1318,29 @@ no_clustering: thread_interrupt_level(interruptible_state); return(VM_FAULT_INTERRUPTED); } - if(m == VM_PAGE_NULL) + if (m == VM_PAGE_NULL && + object->phys_contiguous) { + /* + * No page here means that the object we + * initially looked up was "physically + * contiguous" (i.e. device memory). However, + * with Virtual VRAM, the object might not + * be backed by that device memory anymore, + * so we're done here only if the object is + * still "phys_contiguous". + * Otherwise, if the object is no longer + * "phys_contiguous", we need to retry the + * page fault against the object's new backing + * store (different memory object). + */ break; + } + + /* + * Retry with same object/offset, since new data may + * be in a different page (i.e., m is meaningless at + * this point). + */ continue; } @@ -1398,7 +1422,7 @@ no_clustering: THREAD_UNINT); VM_PAGE_FREE(m); vm_fault_cleanup(object, VM_PAGE_NULL); - thread_block((void (*)(void)) 0); + thread_block(THREAD_CONTINUE_NULL); thread_interrupt_level( interruptible_state); return(VM_FAULT_RETRY); @@ -1418,19 +1442,22 @@ no_clustering: vm_object_unlock(object); vm_page_zero_fill(m); vm_object_lock(object); - } - if (type_of_fault) - *type_of_fault = DBG_ZERO_FILL_FAULT; - VM_STAT(zero_fill_count++); + if (type_of_fault) + *type_of_fault = DBG_ZERO_FILL_FAULT; + VM_STAT(zero_fill_count++); + } if (bumped_pagein == TRUE) { VM_STAT(pageins--); current_task()->pageins--; } - vm_page_lock_queues(); VM_PAGE_QUEUES_REMOVE(m); - if(m->object->size > 0x80000) { + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); + if(m->object->size > 0x200000) { m->zero_fill = TRUE; /* depends on the queues lock */ vm_zf_count += 1; @@ -1503,6 +1530,15 @@ no_clustering: } #endif /* EXTRA_ASSERTIONS */ + /* + * ENCRYPTED SWAP: + * If we found a page, we must have decrypted it before we + * get here... + */ + if (m != VM_PAGE_NULL) { + ASSERT_PAGE_DECRYPTED(m); + } + XPR(XPR_VM_FAULT, "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n", (integer_t)object, offset, (integer_t)m, @@ -1542,7 +1578,7 @@ no_clustering: THREAD_UNINT); RELEASE_PAGE(m); vm_fault_cleanup(object, first_m); - thread_block((void (*)(void)) 0); + thread_block(THREAD_CONTINUE_NULL); thread_interrupt_level( interruptible_state); return(VM_FAULT_RETRY); @@ -1593,12 +1629,12 @@ no_clustering: * * XXXO If we know that only one map has * access to this page, then we could - * avoid the pmap_page_protect() call. + * avoid the pmap_disconnect() call. */ vm_page_lock_queues(); assert(!m->cleaning); - pmap_page_protect(m->phys_page, VM_PROT_NONE); + pmap_disconnect(m->phys_page); vm_page_deactivate(m); copy_m->dirty = TRUE; /* @@ -1734,6 +1770,12 @@ no_clustering: copy_object->ref_count--; assert(copy_object->ref_count > 0); copy_m = vm_page_lookup(copy_object, copy_offset); + /* + * ENCRYPTED SWAP: + * it's OK if the "copy_m" page is encrypted, + * because we're not moving it nor handling its + * contents. + */ if (copy_m != VM_PAGE_NULL && copy_m->busy) { PAGE_ASSERT_WAIT(copy_m, interruptible); vm_object_unlock(copy_object); @@ -1777,7 +1819,7 @@ no_clustering: assert(copy_object->ref_count > 0); vm_object_unlock(copy_object); vm_fault_cleanup(object, first_m); - thread_block((void (*)(void)) 0); + thread_block(THREAD_CONTINUE_NULL); thread_interrupt_level( interruptible_state); return(VM_FAULT_RETRY); @@ -1814,7 +1856,7 @@ no_clustering: vm_page_lock_queues(); assert(!m->cleaning); - pmap_page_protect(m->phys_page, VM_PROT_NONE); + pmap_disconnect(m->phys_page); copy_m->dirty = TRUE; vm_page_unlock_queues(); @@ -1972,7 +2014,7 @@ no_clustering: * We always insert the base object/offset pair * rather the actual object/offset. * Assumptions: - * Map and pmap_map locked. + * Map and real_map locked. * Object locked and referenced. * Returns: * TRUE if startup file should be written. @@ -1982,8 +2024,8 @@ no_clustering: static boolean_t vm_fault_tws_insert( vm_map_t map, - vm_map_t pmap_map, - vm_offset_t vaddr, + vm_map_t real_map, + vm_map_offset_t vaddr, vm_object_t object, vm_object_offset_t offset) { @@ -1991,11 +2033,10 @@ vm_fault_tws_insert( task_t task; kern_return_t kr; boolean_t result = FALSE; - extern vm_map_t kalloc_map; /* Avoid possible map lock deadlock issues */ if (map == kernel_map || map == kalloc_map || - pmap_map == kernel_map || pmap_map == kalloc_map) + real_map == kernel_map || real_map == kalloc_map) return result; task = current_task(); @@ -2005,14 +2046,14 @@ vm_fault_tws_insert( vm_object_offset_t base_offset; base_object = object; base_offset = offset; - while(base_shadow = base_object->shadow) { + while ((base_shadow = base_object->shadow)) { vm_object_lock(base_shadow); vm_object_unlock(base_object); base_offset += - base_object->shadow_offset; + base_object->shadow_offset; base_object = base_shadow; } - kr = tws_lookup((tws_hash_t) + kr = tws_lookup( task->dynamic_working_set, base_offset, base_object, &line); @@ -2025,10 +2066,10 @@ vm_fault_tws_insert( } else if (kr != KERN_SUCCESS) { if(base_object != object) vm_object_reference_locked(base_object); - kr = tws_insert((tws_hash_t) + kr = tws_insert( task->dynamic_working_set, base_offset, base_object, - vaddr, pmap_map); + vaddr, real_map); if(base_object != object) { vm_object_unlock(base_object); vm_object_deallocate(base_object); @@ -2069,15 +2110,17 @@ vm_fault_tws_insert( * and deallocated when leaving vm_fault. */ +extern int _map_enter_debug; + kern_return_t vm_fault( vm_map_t map, - vm_offset_t vaddr, + vm_map_offset_t vaddr, vm_prot_t fault_type, boolean_t change_wiring, int interruptible, pmap_t caller_pmap, - vm_offset_t caller_pmap_addr) + vm_map_offset_t caller_pmap_addr) { vm_map_version_t version; /* Map version for verificiation */ boolean_t wired; /* Should mapping be wired down? */ @@ -2085,7 +2128,7 @@ vm_fault( vm_object_offset_t offset; /* Top-level offset */ vm_prot_t prot; /* Protection for mapping */ vm_behavior_t behavior; /* Expected paging behavior */ - vm_object_offset_t lo_offset, hi_offset; + vm_map_offset_t lo_offset, hi_offset; vm_object_t old_copy_object; /* Saved copy object */ vm_page_t result_page; /* Result of vm_fault_page */ vm_page_t top_page; /* Placeholder page */ @@ -2093,7 +2136,7 @@ vm_fault( register vm_page_t m; /* Fast access to result_page */ - kern_return_t error_code; /* page error reasons */ + kern_return_t error_code = 0; /* page error reasons */ register vm_object_t cur_object; register @@ -2101,15 +2144,13 @@ vm_fault( vm_page_t cur_m; vm_object_t new_object; int type_of_fault; - vm_map_t pmap_map = map; + vm_map_t real_map = map; vm_map_t original_map = map; pmap_t pmap = NULL; - boolean_t funnel_set = FALSE; - funnel_t *curflock; - thread_t cur_thread; boolean_t interruptible_state; unsigned int cache_attr; int write_startup_file = 0; + boolean_t need_activation; vm_prot_t full_fault_type; if (get_preemption_level() != 0) @@ -2143,17 +2184,6 @@ vm_fault( VM_STAT(faults++); current_task()->faults++; - /* - * drop funnel if it is already held. Then restore while returning - */ - cur_thread = current_thread(); - - if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) { - funnel_set = TRUE; - curflock = cur_thread->funnel_lock; - thread_funnel_set( curflock , FALSE); - } - RetryFault: ; /* @@ -2165,9 +2195,11 @@ vm_fault( kr = vm_map_lookup_locked(&map, vaddr, fault_type, &version, &object, &offset, &prot, &wired, - &behavior, &lo_offset, &hi_offset, &pmap_map); + &behavior, &lo_offset, &hi_offset, &real_map); + +//if (_map_enter_debug)printf("vm_map_lookup_locked(map=0x%x, addr=0x%llx, prot=%d wired=%d) = %d\n", map, vaddr, prot, wired, kr); - pmap = pmap_map->pmap; + pmap = real_map->pmap; if (kr != KERN_SUCCESS) { vm_map_unlock_read(map); @@ -2242,8 +2274,8 @@ vm_fault( vm_object_unlock(object); vm_map_unlock_read(map); - if (pmap_map != map) - vm_map_unlock(pmap_map); + if (real_map != map) + vm_map_unlock(real_map); #if !VM_FAULT_STATIC_CONFIG if (!vm_fault_interruptible) @@ -2273,6 +2305,38 @@ vm_fault( break; } + if (m->encrypted) { + /* + * ENCRYPTED SWAP: + * We've soft-faulted (because it's not in the page + * table) on an encrypted page. + * Keep the page "busy" so that noone messes with + * it during the decryption. + * Release the extra locks we're holding, keep only + * the page's VM object lock. + */ + m->busy = TRUE; + if (object != cur_object) { + vm_object_unlock(object); + } + vm_map_unlock_read(map); + if (real_map != map) + vm_map_unlock(real_map); + + vm_page_decrypt(m, 0); + + assert(m->busy); + PAGE_WAKEUP_DONE(m); + vm_object_unlock(m->object); + + /* + * Retry from the top, in case anything + * changed while we were decrypting... + */ + goto RetryFault; + } + ASSERT_PAGE_DECRYPTED(m); + /* * Two cases of map in faults: * - At top level w/o copy object. @@ -2328,10 +2392,13 @@ FastPmapEnter: cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK; sequential = FALSE; + need_activation = FALSE; + if (m->no_isync == TRUE) { m->no_isync = FALSE; - pmap_sync_caches_phys(m->phys_page); - if (type_of_fault == DBG_CACHE_HIT_FAULT) { + pmap_sync_page_data_phys(m->phys_page); + + if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) { /* * found it in the cache, but this * is the first fault-in of the page (no_isync == TRUE) @@ -2343,8 +2410,11 @@ FastPmapEnter: type_of_fault = DBG_PAGEIN_FAULT; sequential = TRUE; } + if (m->clustered) + need_activation = TRUE; + } else if (cache_attr != VM_WIMG_DEFAULT) { - pmap_sync_caches_phys(m->phys_page); + pmap_sync_page_attributes_phys(m->phys_page); } if(caller_pmap) { @@ -2369,6 +2439,7 @@ FastPmapEnter: * queue. This code doesn't. */ vm_page_lock_queues(); + if (m->clustered) { vm_pagein_cluster_used++; m->clustered = FALSE; @@ -2383,7 +2454,7 @@ FastPmapEnter: } #if VM_FAULT_STATIC_CONFIG else { - if (!m->active && !m->inactive) + if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) vm_page_activate(m); } #else @@ -2413,7 +2484,7 @@ FastPmapEnter: */ if (!sequential && !object->private) { write_startup_file = - vm_fault_tws_insert(map, pmap_map, vaddr, + vm_fault_tws_insert(map, real_map, vaddr, object, cur_offset); } @@ -2421,15 +2492,12 @@ FastPmapEnter: vm_object_unlock(object); vm_map_unlock_read(map); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); if(write_startup_file) tws_send_startup_info(current_task()); - if (funnel_set) - thread_funnel_set( curflock, TRUE); - thread_interrupt_level(interruptible_state); @@ -2492,15 +2560,13 @@ FastPmapEnter: * Now cope with the source page and object * If the top object has a ref count of 1 * then no other map can access it, and hence - * it's not necessary to do the pmap_page_protect. + * it's not necessary to do the pmap_disconnect. */ - vm_page_lock_queues(); vm_page_deactivate(cur_m); m->dirty = TRUE; - pmap_page_protect(cur_m->phys_page, - VM_PROT_NONE); + pmap_disconnect(cur_m->phys_page); vm_page_unlock_queues(); PAGE_WAKEUP_DONE(cur_m); @@ -2540,20 +2606,16 @@ FastPmapEnter: vm_object_paging_end(object); vm_object_unlock(object); vm_map_unlock_read(map); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); if(write_startup_file) tws_send_startup_info( current_task()); - if (funnel_set) { - thread_funnel_set( curflock, TRUE); - funnel_set = FALSE; - } thread_interrupt_level(interruptible_state); - return VM_FAULT_MEMORY_ERROR; + return KERN_MEMORY_ERROR; } /* @@ -2618,7 +2680,11 @@ FastPmapEnter: VM_PAGE_QUEUES_REMOVE(m); m->page_ticket = vm_page_ticket; - if(m->object->size > 0x80000) { + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); + if(m->object->size > 0x200000) { m->zero_fill = TRUE; /* depends on the queues lock */ vm_zf_count += 1; @@ -2673,8 +2739,8 @@ FastPmapEnter: } vm_map_unlock_read(map); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); /* * Make a reference to this object to @@ -2693,7 +2759,7 @@ FastPmapEnter: if (!object->private) { write_startup_file = - vm_fault_tws_insert(map, pmap_map, vaddr, object, offset); + vm_fault_tws_insert(map, real_map, vaddr, object, offset); } kr = vm_fault_page(object, offset, fault_type, @@ -2804,8 +2870,8 @@ FastPmapEnter: fault_type & ~VM_PROT_WRITE, &version, &retry_object, &retry_offset, &retry_prot, &wired, &behavior, &lo_offset, &hi_offset, - &pmap_map); - pmap = pmap_map->pmap; + &real_map); + pmap = real_map->pmap; if (kr != KERN_SUCCESS) { vm_map_unlock_read(map); @@ -2829,8 +2895,8 @@ FastPmapEnter: if ((retry_object != object) || (retry_offset != offset)) { vm_map_unlock_read(map); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); if(m != VM_PAGE_NULL) { RELEASE_PAGE(m); UNLOCK_AND_DEALLOCATE; @@ -2874,8 +2940,8 @@ FastPmapEnter: if (wired && (fault_type != (prot|VM_PROT_WRITE))) { vm_map_verify_done(map, &version); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); if(m != VM_PAGE_NULL) { RELEASE_PAGE(m); UNLOCK_AND_DEALLOCATE; @@ -2892,11 +2958,13 @@ FastPmapEnter: * the pageout queues. If the pageout daemon comes * across the page, it will remove it from the queues. */ + need_activation = FALSE; + if (m != VM_PAGE_NULL) { if (m->no_isync == TRUE) { - pmap_sync_caches_phys(m->phys_page); + pmap_sync_page_data_phys(m->phys_page); - if (type_of_fault == DBG_CACHE_HIT_FAULT) { + if ((type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) { /* * found it in the cache, but this * is the first fault-in of the page (no_isync == TRUE) @@ -2908,6 +2976,9 @@ FastPmapEnter: type_of_fault = DBG_PAGEIN_FAULT; } + if (m->clustered) { + need_activation = TRUE; + } m->no_isync = FALSE; } cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK; @@ -2926,16 +2997,15 @@ FastPmapEnter: */ if (m->object->private) { write_startup_file = - vm_fault_tws_insert(map, pmap_map, vaddr, + vm_fault_tws_insert(map, real_map, vaddr, m->object, m->offset); } } else { #ifndef i386 - int memattr; vm_map_entry_t entry; - vm_offset_t laddr; - vm_offset_t ldelta, hdelta; + vm_map_offset_t laddr; + vm_map_offset_t ldelta, hdelta; /* * do a pmap block mapping from the physical address @@ -2948,27 +3018,27 @@ FastPmapEnter: /* to execute, we return with a protection failure. */ if((full_fault_type & VM_PROT_EXECUTE) && - (pmap_canExecute((ppnum_t) - (object->shadow_offset >> 12)) < 1)) { + (!pmap_eligible_for_execute((ppnum_t) + (object->shadow_offset >> 12)))) { vm_map_verify_done(map, &version); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); vm_fault_cleanup(object, top_page); vm_object_deallocate(object); kr = KERN_PROTECTION_FAILURE; goto done; } - if(pmap_map != map) { - vm_map_unlock(pmap_map); + if(real_map != map) { + vm_map_unlock(real_map); } if (original_map != map) { vm_map_unlock_read(map); vm_map_lock_read(original_map); map = original_map; } - pmap_map = map; + real_map = map; laddr = vaddr; hdelta = 0xFFFFF000; @@ -2985,11 +3055,11 @@ FastPmapEnter: laddr = (laddr - entry->vme_start) + entry->offset; vm_map_lock_read(entry->object.sub_map); - if(map != pmap_map) + if(map != real_map) vm_map_unlock_read(map); if(entry->use_pmap) { - vm_map_unlock_read(pmap_map); - pmap_map = entry->object.sub_map; + vm_map_unlock_read(real_map); + real_map = entry->object.sub_map; } map = entry->object.sub_map; @@ -3007,7 +3077,7 @@ FastPmapEnter: /* Set up a block mapped area */ pmap_map_block(caller_pmap, (addr64_t)(caller_pmap_addr - ldelta), - (((vm_offset_t) + (((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) + entry->offset + (laddr - entry->vme_start) @@ -3016,9 +3086,9 @@ FastPmapEnter: (VM_WIMG_MASK & (int)object->wimg_bits), 0); } else { /* Set up a block mapped area */ - pmap_map_block(pmap_map->pmap, + pmap_map_block(real_map->pmap, (addr64_t)(vaddr - ldelta), - (((vm_offset_t) + (((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) + entry->offset + (laddr - entry->vme_start) - ldelta)>>12, @@ -3049,6 +3119,12 @@ FastPmapEnter: if(m != VM_PAGE_NULL) { vm_page_lock_queues(); + if (m->clustered) { + vm_pagein_cluster_used++; + m->clustered = FALSE; + } + m->reference = TRUE; + if (change_wiring) { if (wired) vm_page_wire(m); @@ -3057,9 +3133,8 @@ FastPmapEnter: } #if VM_FAULT_STATIC_CONFIG else { - if (!m->active && !m->inactive) + if ((!m->active && !m->inactive) || ((need_activation == TRUE) && !m->active)) vm_page_activate(m); - m->reference = TRUE; } #else else if (software_reference_bits) { @@ -3078,8 +3153,8 @@ FastPmapEnter: */ vm_map_verify_done(map, &version); - if(pmap_map != map) - vm_map_unlock(pmap_map); + if(real_map != map) + vm_map_unlock(real_map); if(m != VM_PAGE_NULL) { PAGE_WAKEUP_DONE(m); UNLOCK_AND_DEALLOCATE; @@ -3095,10 +3170,7 @@ FastPmapEnter: done: if(write_startup_file) tws_send_startup_info(current_task()); - if (funnel_set) { - thread_funnel_set( curflock, TRUE); - funnel_set = FALSE; - } + thread_interrupt_level(interruptible_state); KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 0)) | DBG_FUNC_END, @@ -3121,11 +3193,11 @@ vm_fault_wire( vm_map_t map, vm_map_entry_t entry, pmap_t pmap, - vm_offset_t pmap_addr) + vm_map_offset_t pmap_addr) { - register vm_offset_t va; - register vm_offset_t end_addr = entry->vme_end; + register vm_map_offset_t va; + register vm_map_offset_t end_addr = entry->vme_end; register kern_return_t rc; assert(entry->in_transition); @@ -3186,10 +3258,10 @@ vm_fault_unwire( vm_map_entry_t entry, boolean_t deallocate, pmap_t pmap, - vm_offset_t pmap_addr) + vm_map_offset_t pmap_addr) { - register vm_offset_t va; - register vm_offset_t end_addr = entry->vme_end; + register vm_map_offset_t va; + register vm_map_offset_t end_addr = entry->vme_end; vm_object_t object; object = (entry->is_sub_map) @@ -3248,8 +3320,7 @@ vm_fault_unwire( result_object = result_page->object; if (deallocate) { assert(!result_page->fictitious); - pmap_page_protect(result_page->phys_page, - VM_PROT_NONE); + pmap_disconnect(result_page->phys_page); VM_PAGE_FREE(result_page); } else { vm_page_lock_queues(); @@ -3295,23 +3366,23 @@ vm_fault_unwire( */ kern_return_t vm_fault_wire_fast( - vm_map_t map, - vm_offset_t va, + __unused vm_map_t map, + vm_map_offset_t va, vm_map_entry_t entry, - pmap_t pmap, - vm_offset_t pmap_addr) + pmap_t pmap, + vm_map_offset_t pmap_addr) { vm_object_t object; vm_object_offset_t offset; register vm_page_t m; vm_prot_t prot; - thread_act_t thr_act; + thread_t thread = current_thread(); unsigned int cache_attr; VM_STAT(faults++); - if((thr_act=current_act()) && (thr_act->task != TASK_NULL)) - thr_act->task->faults++; + if (thread != THREAD_NULL && thread->task != TASK_NULL) + thread->task->faults++; /* * Recovery actions @@ -3389,14 +3460,17 @@ vm_fault_wire_fast( /* * Look for page in top-level object. If it's not there or * there's something going on, give up. + * ENCRYPTED SWAP: use the slow fault path, since we'll need to + * decrypt the page before wiring it down. */ m = vm_page_lookup(object, offset); - if ((m == VM_PAGE_NULL) || (m->busy) || + if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) || (m->unusual && ( m->error || m->restart || m->absent || prot & m->page_lock))) { GIVE_UP; } + ASSERT_PAGE_DECRYPTED(m); /* * Wire the page down now. All bail outs beyond this @@ -3428,7 +3502,7 @@ vm_fault_wire_fast( * may cause other faults. */ if (m->no_isync == TRUE) { - pmap_sync_caches_phys(m->phys_page); + pmap_sync_page_data_phys(m->phys_page); m->no_isync = FALSE; } @@ -3518,7 +3592,7 @@ kern_return_t vm_fault_copy( vm_object_t src_object, vm_object_offset_t src_offset, - vm_size_t *src_size, /* INOUT */ + vm_map_size_t *copy_size, /* INOUT */ vm_object_t dst_object, vm_object_offset_t dst_offset, vm_map_t dst_map, @@ -3535,28 +3609,28 @@ vm_fault_copy( vm_page_t dst_top_page; vm_prot_t dst_prot; - vm_size_t amount_left; + vm_map_size_t amount_left; vm_object_t old_copy_object; kern_return_t error = 0; - vm_size_t part_size; + vm_map_size_t part_size; /* * In order not to confuse the clustered pageins, align * the different offsets on a page boundary. */ - vm_object_offset_t src_lo_offset = trunc_page_64(src_offset); - vm_object_offset_t dst_lo_offset = trunc_page_64(dst_offset); - vm_object_offset_t src_hi_offset = round_page_64(src_offset + *src_size); - vm_object_offset_t dst_hi_offset = round_page_64(dst_offset + *src_size); + vm_object_offset_t src_lo_offset = vm_object_trunc_page(src_offset); + vm_object_offset_t dst_lo_offset = vm_object_trunc_page(dst_offset); + vm_object_offset_t src_hi_offset = vm_object_round_page(src_offset + *copy_size); + vm_object_offset_t dst_hi_offset = vm_object_round_page(dst_offset + *copy_size); #define RETURN(x) \ MACRO_BEGIN \ - *src_size -= amount_left; \ + *copy_size -= amount_left; \ MACRO_RETURN(x); \ MACRO_END - amount_left = *src_size; + amount_left = *copy_size; do { /* while (amount_left > 0) */ /* * There may be a deadlock if both source and destination @@ -3574,7 +3648,7 @@ vm_fault_copy( XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0); switch (vm_fault_page(dst_object, - trunc_page_64(dst_offset), + vm_object_trunc_page(dst_offset), VM_PROT_WRITE|VM_PROT_READ, FALSE, interruptible, @@ -3648,7 +3722,7 @@ vm_fault_copy( } else { vm_object_lock(src_object); src_page = vm_page_lookup(src_object, - trunc_page_64(src_offset)); + vm_object_trunc_page(src_offset)); if (src_page == dst_page) { src_prot = dst_prot; result_page = VM_PAGE_NULL; @@ -3660,7 +3734,7 @@ vm_fault_copy( "vm_fault_copy(2) -> vm_fault_page\n", 0,0,0,0,0); switch (vm_fault_page(src_object, - trunc_page_64(src_offset), + vm_object_trunc_page(src_offset), VM_PROT_READ, FALSE, interruptible, @@ -3736,8 +3810,8 @@ vm_fault_copy( vm_object_offset_t src_po, dst_po; - src_po = src_offset - trunc_page_64(src_offset); - dst_po = dst_offset - trunc_page_64(dst_offset); + src_po = src_offset - vm_object_trunc_page(src_offset); + dst_po = dst_offset - vm_object_trunc_page(dst_offset); if (dst_po > src_po) { part_size = PAGE_SIZE - dst_po; diff --git a/osfmk/vm/vm_fault.h b/osfmk/vm/vm_fault.h index 8c9bd88f5..4941efe69 100644 --- a/osfmk/vm/vm_fault.h +++ b/osfmk/vm/vm_fault.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,16 +58,17 @@ #ifndef _VM_VM_FAULT_H_ #define _VM_VM_FAULT_H_ +#include #include #include #include #include #include -#include -#include -#include + +#ifdef KERNEL_PRIVATE typedef kern_return_t vm_fault_return_t; + #define VM_FAULT_SUCCESS 0 #define VM_FAULT_RETRY 1 #define VM_FAULT_INTERRUPTED 2 @@ -75,6 +76,25 @@ typedef kern_return_t vm_fault_return_t; #define VM_FAULT_FICTITIOUS_SHORTAGE 4 #define VM_FAULT_MEMORY_ERROR 5 +/* + * Page fault handling based on vm_map (or entries therein) + */ + +extern kern_return_t vm_fault( + vm_map_t map, + vm_map_offset_t vaddr, + vm_prot_t fault_type, + boolean_t change_wiring, + int interruptible, + pmap_t pmap, + vm_map_offset_t pmap_addr); + +#ifdef MACH_KERNEL_PRIVATE + +#include +#include +#include + extern void vm_fault_init(void); /* @@ -88,8 +108,8 @@ extern vm_fault_return_t vm_fault_page( vm_prot_t fault_type, /* What access is requested */ boolean_t must_be_resident,/* Must page be resident? */ int interruptible,/* how may fault be interrupted */ - vm_object_offset_t lo_offset, /* Map entry start */ - vm_object_offset_t hi_offset, /* Map entry end */ + vm_map_offset_t lo_offset, /* Map entry start */ + vm_map_offset_t hi_offset, /* Map entry end */ vm_behavior_t behavior, /* Expected paging behavior */ /* Modifies in place: */ vm_prot_t *protection, /* Protection for mapping */ @@ -104,45 +124,37 @@ extern vm_fault_return_t vm_fault_page( boolean_t no_zero_fill, /* don't fill absent pages */ boolean_t data_supply, /* treat as data_supply */ vm_map_t map, - vm_offset_t vaddr); + vm_map_offset_t vaddr); extern void vm_fault_cleanup( vm_object_t object, vm_page_t top_page); -/* - * Page fault handling based on vm_map (or entries therein) - */ - -extern kern_return_t vm_fault( - vm_map_t map, - vm_offset_t vaddr, - vm_prot_t fault_type, - boolean_t change_wiring, - int interruptible, - pmap_t pmap, - vm_offset_t pmap_addr); extern kern_return_t vm_fault_wire( vm_map_t map, vm_map_entry_t entry, pmap_t pmap, - vm_offset_t pmap_addr); + vm_map_offset_t pmap_addr); extern void vm_fault_unwire( vm_map_t map, vm_map_entry_t entry, boolean_t deallocate, pmap_t pmap, - vm_offset_t pmap_addr); + vm_map_offset_t pmap_addr); extern kern_return_t vm_fault_copy( vm_object_t src_object, vm_object_offset_t src_offset, - vm_size_t *src_size, /* INOUT */ + vm_map_size_t *copy_size, /* INOUT */ vm_object_t dst_object, vm_object_offset_t dst_offset, vm_map_t dst_map, vm_map_version_t *dst_version, int interruptible); +#endif /* MACH_KERNEL_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + #endif /* _VM_VM_FAULT_H_ */ diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index 8b20eae9b..283b07822 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,8 +68,12 @@ #include #include +#include + +#include + #define ZONE_MAP_MIN (12 * 1024 * 1024) -#define ZONE_MAP_MAX (256 * 1024 * 1024) +#define ZONE_MAP_MAX (768 * 1024 * 1024) /* * vm_mem_bootstrap initializes the virtual memory system. diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index b8585d73a..d43706c8d 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,7 +57,6 @@ * Kernel memory management. */ -#include #include #include #include @@ -85,7 +84,7 @@ vm_map_t kernel_pageable_map; extern kern_return_t kmem_alloc_pages( register vm_object_t object, register vm_object_offset_t offset, - register vm_size_t size); + register vm_object_size_t size); extern void kmem_remap_pages( register vm_object_t object, @@ -96,18 +95,20 @@ extern void kmem_remap_pages( kern_return_t kmem_alloc_contig( - vm_map_t map, - vm_offset_t *addrp, - vm_size_t size, - vm_offset_t mask, - int flags) + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size, + vm_offset_t mask, + int flags) { vm_object_t object; - vm_page_t m, pages; - kern_return_t kr; - vm_offset_t addr, i; vm_object_offset_t offset; + vm_map_offset_t map_addr; + vm_map_offset_t map_mask; + vm_map_size_t map_size, i; vm_map_entry_t entry; + vm_page_t m, pages; + kern_return_t kr; if (map == VM_MAP_NULL || (flags && (flags ^ KMA_KOBJECT))) return KERN_INVALID_ARGUMENT; @@ -117,47 +118,47 @@ kmem_alloc_contig( return KERN_INVALID_ARGUMENT; } - size = round_page_32(size); - if ((flags & KMA_KOBJECT) == 0) { - object = vm_object_allocate(size); - kr = vm_map_find_space(map, &addr, size, mask, &entry); - } - else { - object = kernel_object; - kr = vm_map_find_space(map, &addr, size, mask, &entry); - } + map_size = vm_map_round_page(size); + map_mask = (vm_map_offset_t)mask; - if ((flags & KMA_KOBJECT) == 0) { - entry->object.vm_object = object; - entry->offset = offset = 0; + /* + * Allocate a new object (if necessary) and the reference we + * will be donating to the map entry. We must do this before + * locking the map, or risk deadlock with the default pager. + */ + if ((flags & KMA_KOBJECT) != 0) { + object = kernel_object; + vm_object_reference(object); } else { - offset = addr - VM_MIN_KERNEL_ADDRESS; - - if (entry->object.vm_object == VM_OBJECT_NULL) { - vm_object_reference(object); - entry->object.vm_object = object; - entry->offset = offset; - } + object = vm_object_allocate(map_size); } - if (kr != KERN_SUCCESS) { - if ((flags & KMA_KOBJECT) == 0) - vm_object_deallocate(object); + kr = vm_map_find_space(map, &map_addr, map_size, map_mask, &entry); + if (KERN_SUCCESS != kr) { + vm_object_deallocate(object); return kr; } + entry->object.vm_object = object; + entry->offset = offset = (object == kernel_object) ? + map_addr - VM_MIN_KERNEL_ADDRESS : 0; + + /* Take an extra object ref in case the map entry gets deleted */ + vm_object_reference(object); vm_map_unlock(map); - kr = cpm_allocate(size, &pages, FALSE); + kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, FALSE); if (kr != KERN_SUCCESS) { - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + map_size), 0); + vm_object_deallocate(object); *addrp = 0; return kr; } vm_object_lock(object); - for (i = 0; i < size; i += PAGE_SIZE) { + for (i = 0; i < map_size; i += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); m->busy = FALSE; @@ -165,20 +166,25 @@ kmem_alloc_contig( } vm_object_unlock(object); - if ((kr = vm_map_wire(map, addr, addr + size, VM_PROT_DEFAULT, FALSE)) + if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE)) != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); - vm_object_page_remove(object, offset, offset + size); + vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, vm_map_trunc_page(map_addr), + vm_map_round_page(map_addr + map_size), 0); + vm_object_deallocate(object); return kr; } + vm_object_deallocate(object); + if (object == kernel_object) - vm_map_simplify(map, addr); + vm_map_simplify(map, map_addr); - *addrp = addr; + *addrp = map_addr; return KERN_SUCCESS; } @@ -203,77 +209,58 @@ kernel_memory_allocate( register vm_offset_t mask, int flags) { - vm_object_t object = VM_OBJECT_NULL; + vm_object_t object; + vm_object_offset_t offset; vm_map_entry_t entry; - vm_object_offset_t offset; - vm_offset_t addr; - vm_offset_t i; + vm_map_offset_t map_addr; + vm_map_offset_t map_mask; + vm_map_size_t map_size; + vm_map_size_t i; kern_return_t kr; - size = round_page_32(size); - if ((flags & KMA_KOBJECT) == 0) { - /* - * Allocate a new object. We must do this before locking - * the map, or risk deadlock with the default pager: - * device_read_alloc uses kmem_alloc, - * which tries to allocate an object, - * which uses kmem_alloc_wired to get memory, - * which blocks for pages. - * then the default pager needs to read a block - * to process a memory_object_data_write, - * and device_read_alloc calls kmem_alloc - * and deadlocks on the map lock. - */ - object = vm_object_allocate(size); - kr = vm_map_find_space(map, &addr, size, mask, &entry); + if (size == 0) { + *addrp = 0; + return KERN_INVALID_ARGUMENT; } - else { + + map_size = vm_map_round_page(size); + map_mask = (vm_map_offset_t) mask; + + /* + * Allocate a new object (if necessary). We must do this before + * locking the map, or risk deadlock with the default pager. + */ + if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; - kr = vm_map_find_space(map, &addr, size, mask, &entry); + vm_object_reference(object); + } else { + object = vm_object_allocate(map_size); } - if (kr != KERN_SUCCESS) { - if ((flags & KMA_KOBJECT) == 0) - vm_object_deallocate(object); + + kr = vm_map_find_space(map, &map_addr, map_size, map_mask, &entry); + if (KERN_SUCCESS != kr) { + vm_object_deallocate(object); return kr; } - if ((flags & KMA_KOBJECT) == 0) { - entry->object.vm_object = object; - entry->offset = offset = 0; - } else { - offset = addr - VM_MIN_KERNEL_ADDRESS; - - if (entry->object.vm_object == VM_OBJECT_NULL) { - vm_object_reference(object); - entry->object.vm_object = object; - entry->offset = offset; - } - } + entry->object.vm_object = object; + entry->offset = offset = (object == kernel_object) ? + map_addr - VM_MIN_KERNEL_ADDRESS : 0; - /* - * Since we have not given out this address yet, - * it is safe to unlock the map. Except of course - * we must make certain no one coalesces our address - * or does a blind vm_deallocate and removes the object - * an extra object reference will suffice to protect - * against both contingencies. - */ vm_object_reference(object); vm_map_unlock(map); vm_object_lock(object); - for (i = 0; i < size; i += PAGE_SIZE) { + for (i = 0; i < map_size; i += PAGE_SIZE) { vm_page_t mem; - while ((mem = vm_page_alloc(object, - offset + (vm_object_offset_t)i)) - == VM_PAGE_NULL) { + while (VM_PAGE_NULL == + (mem = vm_page_alloc(object, offset + i))) { if (flags & KMA_NOPAGEWAIT) { if (object == kernel_object) - vm_object_page_remove(object, offset, - offset + (vm_object_offset_t)i); + vm_object_page_remove(object, offset, offset + i); vm_object_unlock(object); - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, map_addr, map_addr + map_size, 0); vm_object_deallocate(object); return KERN_RESOURCE_SHORTAGE; } @@ -285,29 +272,26 @@ kernel_memory_allocate( } vm_object_unlock(object); - if ((kr = vm_map_wire(map, addr, addr + size, VM_PROT_DEFAULT, FALSE)) + if ((kr = vm_map_wire(map, map_addr, map_addr + map_size, VM_PROT_DEFAULT, FALSE)) != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); - vm_object_page_remove(object, offset, offset + size); + vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } - vm_map_remove(map, addr, addr + size, 0); + vm_map_remove(map, map_addr, map_addr + map_size, 0); vm_object_deallocate(object); return (kr); } /* now that the page is wired, we no longer have to fear coalesce */ vm_object_deallocate(object); if (object == kernel_object) - vm_map_simplify(map, addr); + vm_map_simplify(map, map_addr); /* * Return the memory, not zeroed. */ -#if (NCPUS > 1) && i860 - bzero( addr, size ); -#endif /* #if (NCPUS > 1) && i860 */ - *addrp = addr; + *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; } @@ -339,24 +323,28 @@ kmem_alloc( */ kern_return_t kmem_realloc( - vm_map_t map, - vm_offset_t oldaddr, - vm_size_t oldsize, - vm_offset_t *newaddrp, - vm_size_t newsize) + vm_map_t map, + vm_offset_t oldaddr, + vm_size_t oldsize, + vm_offset_t *newaddrp, + vm_size_t newsize) { - vm_offset_t oldmin, oldmax; - vm_offset_t newaddr; - vm_offset_t offset; - vm_object_t object; - vm_map_entry_t oldentry, newentry; - vm_page_t mem; - kern_return_t kr; + vm_object_t object; + vm_object_offset_t offset; + vm_map_offset_t oldmapmin; + vm_map_offset_t oldmapmax; + vm_map_offset_t newmapaddr; + vm_map_size_t oldmapsize; + vm_map_size_t newmapsize; + vm_map_entry_t oldentry; + vm_map_entry_t newentry; + vm_page_t mem; + kern_return_t kr; - oldmin = trunc_page_32(oldaddr); - oldmax = round_page_32(oldaddr + oldsize); - oldsize = oldmax - oldmin; - newsize = round_page_32(newsize); + oldmapmin = vm_map_trunc_page(oldaddr); + oldmapmax = vm_map_round_page(oldaddr + oldsize); + oldmapsize = oldmapmax - oldmapmin; + newmapsize = vm_map_round_page(newsize); /* @@ -365,7 +353,7 @@ kmem_realloc( vm_map_lock(map); - if (!vm_map_lookup_entry(map, oldmin, &oldentry)) + if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) panic("kmem_realloc"); object = oldentry->object.vm_object; @@ -380,33 +368,33 @@ kmem_realloc( /* attempt is made to realloc a kmem_alloc'd area */ vm_object_lock(object); vm_map_unlock(map); - if (object->size != oldsize) + if (object->size != oldmapsize) panic("kmem_realloc"); - object->size = newsize; + object->size = newmapsize; vm_object_unlock(object); /* allocate the new pages while expanded portion of the */ /* object is still not mapped */ - kmem_alloc_pages(object, oldsize, newsize-oldsize); - + kmem_alloc_pages(object, vm_object_round_page(oldmapsize), + vm_object_round_page(newmapsize-oldmapsize)); /* * Find space for the new region. */ - kr = vm_map_find_space(map, &newaddr, newsize, (vm_offset_t) 0, - &newentry); + kr = vm_map_find_space(map, &newmapaddr, newmapsize, + (vm_map_offset_t) 0, &newentry); if (kr != KERN_SUCCESS) { vm_object_lock(object); - for(offset = oldsize; - offsetsize = oldsize; + object->size = oldmapsize; vm_object_unlock(object); vm_object_deallocate(object); return kr; @@ -421,27 +409,25 @@ kmem_realloc( vm_object_reference(object); vm_map_unlock(map); - if ((kr = vm_map_wire(map, newaddr, newaddr + newsize, - VM_PROT_DEFAULT, FALSE)) != KERN_SUCCESS) { - vm_map_remove(map, newaddr, newaddr + newsize, 0); + kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE); + if (KERN_SUCCESS != kr) { + vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0); vm_object_lock(object); - for(offset = oldsize; - offsetsize = oldsize; + object->size = oldmapsize; vm_object_unlock(object); vm_object_deallocate(object); return (kr); } vm_object_deallocate(object); - - *newaddrp = newaddr; + *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr); return KERN_SUCCESS; } @@ -495,22 +481,26 @@ kmem_alloc_pageable( vm_offset_t *addrp, vm_size_t size) { - vm_offset_t addr; + vm_map_offset_t map_addr; + vm_map_size_t map_size; kern_return_t kr; #ifndef normal - addr = (vm_map_min(map)) + 0x1000; + map_addr = (vm_map_min(map)) + 0x1000; #else - addr = vm_map_min(map); + map_addr = vm_map_min(map); #endif - kr = vm_map_enter(map, &addr, round_page_32(size), - (vm_offset_t) 0, TRUE, + map_size = vm_map_round_page(size); + + kr = vm_map_enter(map, &map_addr, map_size, + (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + if (kr != KERN_SUCCESS) return kr; - *addrp = addr; + *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; } @@ -530,8 +520,8 @@ kmem_free( { kern_return_t kr; - kr = vm_map_remove(map, trunc_page_32(addr), - round_page_32(addr + size), + kr = vm_map_remove(map, vm_map_trunc_page(addr), + vm_map_round_page(addr + size), VM_MAP_REMOVE_KUNWIRE); if (kr != KERN_SUCCESS) panic("kmem_free"); @@ -545,29 +535,29 @@ kern_return_t kmem_alloc_pages( register vm_object_t object, register vm_object_offset_t offset, - register vm_size_t size) + register vm_object_size_t size) { + vm_object_size_t alloc_size; - size = round_page_32(size); + alloc_size = vm_object_round_page(size); vm_object_lock(object); - while (size) { + while (alloc_size) { register vm_page_t mem; /* * Allocate a page */ - while ((mem = vm_page_alloc(object, offset)) - == VM_PAGE_NULL) { + while (VM_PAGE_NULL == + (mem = vm_page_alloc(object, offset))) { vm_object_unlock(object); VM_PAGE_WAIT(); vm_object_lock(object); } + mem->busy = FALSE; - + alloc_size -= PAGE_SIZE; offset += PAGE_SIZE; - size -= PAGE_SIZE; - mem->busy = FALSE; } vm_object_unlock(object); return KERN_SUCCESS; @@ -586,12 +576,19 @@ kmem_remap_pages( register vm_offset_t end, vm_prot_t protection) { + + vm_map_offset_t map_start; + vm_map_offset_t map_end; + /* * Mark the pmap region as not pageable. */ - pmap_pageable(kernel_pmap, start, end, FALSE); + map_start = vm_map_trunc_page(start); + map_end = vm_map_round_page(end); + + pmap_pageable(kernel_pmap, map_start, map_end, FALSE); - while (start < end) { + while (map_start < map_end) { register vm_page_t mem; vm_object_lock(object); @@ -610,16 +607,24 @@ kmem_remap_pages( vm_page_unlock_queues(); vm_object_unlock(object); + /* + * ENCRYPTED SWAP: + * The page is supposed to be wired now, so it + * shouldn't be encrypted at this point. It can + * safely be entered in the page table. + */ + ASSERT_PAGE_DECRYPTED(mem); + /* * Enter it in the kernel pmap. The page isn't busy, * but this shouldn't be a problem because it is wired. */ - PMAP_ENTER(kernel_pmap, start, mem, protection, + PMAP_ENTER(kernel_pmap, map_start, mem, protection, ((unsigned int)(mem->object->wimg_bits)) & VM_WIMG_MASK, TRUE); - start += PAGE_SIZE; + map_start += PAGE_SIZE; offset += PAGE_SIZE; } } @@ -645,13 +650,15 @@ kmem_suballoc( vm_offset_t *addr, vm_size_t size, boolean_t pageable, - boolean_t anywhere, + int flags, vm_map_t *new_map) { - vm_map_t map; - kern_return_t kr; + vm_map_t map; + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - size = round_page_32(size); + map_size = vm_map_round_page(size); /* * Need reference on submap object because it is internal @@ -660,10 +667,11 @@ kmem_suballoc( */ vm_object_reference(vm_submap_object); - if (anywhere == TRUE) - *addr = (vm_offset_t)vm_map_min(parent); - kr = vm_map_enter(parent, addr, size, - (vm_offset_t) 0, anywhere, + map_addr = (flags & VM_FLAGS_ANYWHERE) ? + vm_map_min(parent) : vm_map_trunc_page(*addr); + + kr = vm_map_enter(parent, &map_addr, map_size, + (vm_map_offset_t) 0, flags, vm_submap_object, (vm_object_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { @@ -672,20 +680,21 @@ kmem_suballoc( } pmap_reference(vm_map_pmap(parent)); - map = vm_map_create(vm_map_pmap(parent), *addr, *addr + size, pageable); + map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable); if (map == VM_MAP_NULL) panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */ - kr = vm_map_submap(parent, *addr, *addr + size, map, *addr, FALSE); + kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE); if (kr != KERN_SUCCESS) { /* * See comment preceding vm_map_submap(). */ - vm_map_remove(parent, *addr, *addr + size, VM_MAP_NO_FLAGS); + vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); vm_map_deallocate(map); /* also removes ref to pmap */ vm_object_deallocate(vm_submap_object); return (kr); } + *addr = CAST_DOWN(vm_offset_t, map_addr); *new_map = map; return (KERN_SUCCESS); } @@ -701,19 +710,28 @@ kmem_init( vm_offset_t start, vm_offset_t end) { - kernel_map = vm_map_create(pmap_kernel(), - VM_MIN_KERNEL_ADDRESS, end, - FALSE); + vm_map_offset_t map_start; + vm_map_offset_t map_end; + + map_start = vm_map_trunc_page(start); + map_end = vm_map_round_page(end); + + kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_ADDRESS, + map_end, FALSE); /* * Reserve virtual memory allocated up to this time. */ if (start != VM_MIN_KERNEL_ADDRESS) { - vm_offset_t addr = VM_MIN_KERNEL_ADDRESS; + vm_map_offset_t map_addr; + + map_addr = VM_MIN_KERNEL_ADDRESS; (void) vm_map_enter(kernel_map, - &addr, start - VM_MIN_KERNEL_ADDRESS, - (vm_offset_t) 0, TRUE, + &map_addr, + (vm_map_size_t)(map_start - VM_MIN_KERNEL_ADDRESS), + (vm_map_offset_t) 0, + VM_FLAGS_ANYWHERE | VM_FLAGS_NO_PMAP_CHECK, VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, @@ -731,75 +749,6 @@ kmem_init( } -/* - * kmem_io_object_trunc: - * - * Truncate an object vm_map_copy_t. - * Called by the scatter/gather list network code to remove pages from - * the tail end of a packet. Also unwires the objects pages. - */ - -kern_return_t -kmem_io_object_trunc(copy, new_size) - vm_map_copy_t copy; /* IN/OUT copy object */ - register vm_size_t new_size; /* IN new object size */ -{ - register vm_size_t offset, old_size; - - assert(copy->type == VM_MAP_COPY_OBJECT); - - old_size = (vm_size_t)round_page_64(copy->size); - copy->size = new_size; - new_size = round_page_32(new_size); - - vm_object_lock(copy->cpy_object); - vm_object_page_remove(copy->cpy_object, - (vm_object_offset_t)new_size, (vm_object_offset_t)old_size); - for (offset = 0; offset < new_size; offset += PAGE_SIZE) { - register vm_page_t mem; - - if ((mem = vm_page_lookup(copy->cpy_object, - (vm_object_offset_t)offset)) == VM_PAGE_NULL) - panic("kmem_io_object_trunc: unable to find object page"); - - /* - * Make sure these pages are marked dirty - */ - mem->dirty = TRUE; - vm_page_lock_queues(); - vm_page_unwire(mem); - vm_page_unlock_queues(); - } - copy->cpy_object->size = new_size; /* adjust size of object */ - vm_object_unlock(copy->cpy_object); - return(KERN_SUCCESS); -} - -/* - * kmem_io_object_deallocate: - * - * Free an vm_map_copy_t. - * Called by the scatter/gather list network code to free a packet. - */ - -void -kmem_io_object_deallocate( - vm_map_copy_t copy) /* IN/OUT copy object */ -{ - kern_return_t ret; - - /* - * Clear out all the object pages (this will leave an empty object). - */ - ret = kmem_io_object_trunc(copy, 0); - if (ret != KERN_SUCCESS) - panic("kmem_io_object_deallocate: unable to truncate object"); - /* - * ...and discard the copy object. - */ - vm_map_copy_discard(copy); -} - /* * Routine: copyinmap * Purpose: @@ -808,23 +757,36 @@ kmem_io_object_deallocate( * is incomplete; it handles the current user map * and the kernel map/submaps. */ -boolean_t +kern_return_t copyinmap( - vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, - vm_size_t length) + vm_map_t map, + vm_map_offset_t fromaddr, + void *todata, + vm_size_t length) { - if (vm_map_pmap(map) == pmap_kernel()) { + kern_return_t kr = KERN_SUCCESS; + vm_map_t oldmap; + + if (vm_map_pmap(map) == pmap_kernel()) + { /* assume a correct copy */ - memcpy((void *)toaddr, (void *)fromaddr, length); - return FALSE; + memcpy(todata, CAST_DOWN(void *, fromaddr), length); + } + else if (current_map() == map) + { + if (copyin(fromaddr, todata, length) != 0) + kr = KERN_INVALID_ADDRESS; } - - if (current_map() == map) - return copyin((char *)fromaddr, (char *)toaddr, length); - - return TRUE; + else + { + vm_map_reference(map); + oldmap = vm_map_switch(map); + if (copyin(fromaddr, todata, length) != 0) + kr = KERN_INVALID_ADDRESS; + vm_map_switch(oldmap); + vm_map_deallocate(map); + } + return kr; } /* @@ -835,42 +797,45 @@ copyinmap( * is incomplete; it handles the current user map * and the kernel map/submaps. */ -boolean_t +kern_return_t copyoutmap( - vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, - vm_size_t length) + vm_map_t map, + void *fromdata, + vm_map_address_t toaddr, + vm_size_t length) { if (vm_map_pmap(map) == pmap_kernel()) { /* assume a correct copy */ - memcpy((void *)toaddr, (void *)fromaddr, length); - return FALSE; + memcpy(CAST_DOWN(void *, toaddr), fromdata, length); + return KERN_SUCCESS; } - if (current_map() == map) - return copyout((char *)fromaddr, (char *)toaddr, length); + if (current_map() != map) + return KERN_NOT_SUPPORTED; + + if (copyout(fromdata, toaddr, length) != 0) + return KERN_INVALID_ADDRESS; - return TRUE; + return KERN_SUCCESS; } kern_return_t vm_conflict_check( vm_map_t map, - vm_offset_t off, - vm_size_t len, - memory_object_t pager, + vm_map_offset_t off, + vm_map_size_t len, + memory_object_t pager, vm_object_offset_t file_off) { vm_map_entry_t entry; vm_object_t obj; vm_object_offset_t obj_off; vm_map_t base_map; - vm_offset_t base_offset; - vm_offset_t original_offset; + vm_map_offset_t base_offset; + vm_map_offset_t original_offset; kern_return_t kr; - vm_size_t local_len; + vm_map_size_t local_len; base_map = map; base_offset = off; diff --git a/osfmk/vm/vm_kern.h b/osfmk/vm/vm_kern.h index 247448732..8f18cf793 100644 --- a/osfmk/vm/vm_kern.h +++ b/osfmk/vm/vm_kern.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,14 +60,11 @@ #ifndef _VM_VM_KERN_H_ #define _VM_VM_KERN_H_ +#include #include #include -#include -#include -extern void kmem_init( - vm_offset_t start, - vm_offset_t end); +#ifdef KERNEL_PRIVATE extern kern_return_t kernel_memory_allocate( vm_map_t map, @@ -98,11 +95,6 @@ extern kern_return_t kmem_alloc_pageable( vm_offset_t *addrp, vm_size_t size); -extern kern_return_t kmem_alloc_wired( - vm_map_t map, - vm_offset_t *addrp, - vm_size_t size); - extern kern_return_t kmem_alloc_aligned( vm_map_t map, vm_offset_t *addrp, @@ -120,6 +112,17 @@ extern void kmem_free( vm_offset_t addr, vm_size_t size); +#ifdef MACH_KERNEL_PRIVATE + +extern void kmem_init( + vm_offset_t start, + vm_offset_t end); + +extern kern_return_t kmem_alloc_wired( + vm_map_t map, + vm_offset_t *addrp, + vm_size_t size); + extern kern_return_t kmem_suballoc( vm_map_t parent, vm_offset_t *addr, @@ -128,34 +131,31 @@ extern kern_return_t kmem_suballoc( boolean_t anywhere, vm_map_t *new_map); -extern void kmem_io_object_deallocate( - vm_map_copy_t copy); - -extern kern_return_t kmem_io_object_trunc( - vm_map_copy_t copy, - vm_size_t new_size); - -extern boolean_t copyinmap( +extern kern_return_t copyinmap( vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, + vm_map_offset_t fromaddr, + void *todata, vm_size_t length); -extern boolean_t copyoutmap( +extern kern_return_t copyoutmap( vm_map_t map, - vm_offset_t fromaddr, - vm_offset_t toaddr, + void *fromdata, + vm_map_offset_t toaddr, vm_size_t length); extern kern_return_t vm_conflict_check( vm_map_t map, - vm_offset_t off, - vm_size_t len, + vm_map_offset_t off, + vm_map_size_t len, memory_object_t pager, vm_object_offset_t file_off); +#endif /* MACH_KERNEL_PRIVATE */ + extern vm_map_t kernel_map; extern vm_map_t kernel_pageable_map; extern vm_map_t ipc_kernel_map; +#endif /* KERNEL_PRIVATE */ + #endif /* _VM_VM_KERN_H_ */ diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index fdba4d3de..020000d67 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,9 +57,9 @@ * Virtual memory mapping module. */ -#include #include #include +#include #include #include @@ -67,9 +67,15 @@ #include #include #include +#include +#include + #include #include +#include #include + +#include #include #include #include @@ -79,113 +85,179 @@ #include #include #include -#include -#include #include #include #include +#include +#include +#include +#include // for vm_map_commpage64 and vm_map_remove_compage64 + +#ifdef ppc +#include +#endif /* ppc */ + +#include + /* Internal prototypes */ -extern boolean_t vm_map_range_check( + +static void vm_map_simplify_range( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); /* forward */ + +static boolean_t vm_map_range_check( vm_map_t map, - vm_offset_t start, - vm_offset_t end, + vm_map_offset_t start, + vm_map_offset_t end, vm_map_entry_t *entry); -extern vm_map_entry_t _vm_map_entry_create( +static vm_map_entry_t _vm_map_entry_create( struct vm_map_header *map_header); -extern void _vm_map_entry_dispose( +static void _vm_map_entry_dispose( struct vm_map_header *map_header, vm_map_entry_t entry); -extern void vm_map_pmap_enter( +static void vm_map_pmap_enter( vm_map_t map, - vm_offset_t addr, - vm_offset_t end_addr, + vm_map_offset_t addr, + vm_map_offset_t end_addr, vm_object_t object, vm_object_offset_t offset, vm_prot_t protection); -extern void _vm_map_clip_end( +static void _vm_map_clip_end( + struct vm_map_header *map_header, + vm_map_entry_t entry, + vm_map_offset_t end); + +static void _vm_map_clip_start( struct vm_map_header *map_header, vm_map_entry_t entry, - vm_offset_t end); + vm_map_offset_t start); -extern void vm_map_entry_delete( +static void vm_map_entry_delete( vm_map_t map, vm_map_entry_t entry); -extern kern_return_t vm_map_delete( +static kern_return_t vm_map_delete( vm_map_t map, - vm_offset_t start, - vm_offset_t end, - int flags); + vm_map_offset_t start, + vm_map_offset_t end, + int flags, + vm_map_t zap_map); -extern void vm_map_copy_steal_pages( - vm_map_copy_t copy); - -extern kern_return_t vm_map_copy_overwrite_unaligned( +static kern_return_t vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_offset_t start); + vm_map_address_t start); -extern kern_return_t vm_map_copy_overwrite_aligned( +static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, vm_map_entry_t tmp_entry, vm_map_copy_t copy, - vm_offset_t start, + vm_map_offset_t start, pmap_t pmap); -extern kern_return_t vm_map_copyin_kernel_buffer( +static kern_return_t vm_map_copyin_kernel_buffer( vm_map_t src_map, - vm_offset_t src_addr, - vm_size_t len, + vm_map_address_t src_addr, + vm_map_size_t len, boolean_t src_destroy, vm_map_copy_t *copy_result); /* OUT */ -extern kern_return_t vm_map_copyout_kernel_buffer( +static kern_return_t vm_map_copyout_kernel_buffer( vm_map_t map, - vm_offset_t *addr, /* IN/OUT */ + vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, boolean_t overwrite); -extern void vm_map_fork_share( +static void vm_map_fork_share( vm_map_t old_map, vm_map_entry_t old_entry, vm_map_t new_map); -extern boolean_t vm_map_fork_copy( +static boolean_t vm_map_fork_copy( vm_map_t old_map, vm_map_entry_t *old_entry_p, vm_map_t new_map); -extern kern_return_t vm_remap_range_allocate( - vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - vm_map_entry_t *map_entry); /* OUT */ - -extern void _vm_map_clip_start( - struct vm_map_header *map_header, - vm_map_entry_t entry, - vm_offset_t start); - -void vm_region_top_walk( +static void vm_map_region_top_walk( vm_map_entry_t entry, vm_region_top_info_t top); -void vm_region_walk( +static void vm_map_region_walk( + vm_map_t map, + vm_map_offset_t va, vm_map_entry_t entry, - vm_region_extended_info_t extended, vm_object_offset_t offset, - vm_offset_t range, + vm_object_size_t range, + vm_region_extended_info_t extended); + +static kern_return_t vm_map_wire_nested( vm_map_t map, - vm_offset_t va); + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t access_type, + boolean_t user_wire, + pmap_t map_pmap, + vm_map_offset_t pmap_addr); + +static kern_return_t vm_map_unwire_nested( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + boolean_t user_wire, + pmap_t map_pmap, + vm_map_offset_t pmap_addr); + +static kern_return_t vm_map_overwrite_submap_recurse( + vm_map_t dst_map, + vm_map_offset_t dst_addr, + vm_map_size_t dst_size); + +static kern_return_t vm_map_copy_overwrite_nested( + vm_map_t dst_map, + vm_map_offset_t dst_addr, + vm_map_copy_t copy, + boolean_t interruptible, + pmap_t pmap); + +static kern_return_t vm_map_remap_extract( + vm_map_t map, + vm_map_offset_t addr, + vm_map_size_t size, + boolean_t copy, + struct vm_map_header *map_header, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance, + boolean_t pageable); + +static kern_return_t vm_map_remap_range_allocate( + vm_map_t map, + vm_map_address_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + boolean_t anywhere, + vm_map_entry_t *map_entry); + +static void vm_map_region_look_for_page( + vm_map_t map, + vm_map_offset_t va, + vm_object_t object, + vm_object_offset_t offset, + int max_refcnt, + int depth, + vm_region_extended_info_t extended); + +static int vm_map_region_count_obj_refs( + vm_map_entry_t entry, + vm_object_t object); /* * Macros to copy a vm_map_entry. We must be careful to correctly @@ -251,10 +323,10 @@ MACRO_END * vm_object_copy_strategically() in vm_object.c. */ -zone_t vm_map_zone; /* zone for vm_map structures */ -zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ -zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ -zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ +static zone_t vm_map_zone; /* zone for vm_map structures */ +static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ +static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ +static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ /* @@ -287,11 +359,11 @@ vm_object_t vm_submap_object; * of a new entry. */ -vm_offset_t map_data; -vm_size_t map_data_size; -vm_offset_t kentry_data; -vm_size_t kentry_data_size; -int kentry_count = 2048; /* to init kentry_data_size */ +static void *map_data; +static vm_map_size_t map_data_size; +static void *kentry_data; +static vm_map_size_t kentry_data_size; +static int kentry_count = 2048; /* to init kentry_data_size */ #define NO_COALESCE_LIMIT (1024 * 128) @@ -299,7 +371,7 @@ int kentry_count = 2048; /* to init kentry_data_size */ * Threshold for aggressive (eager) page map entering for vm copyout * operations. Any copyout larger will NOT be aggressively entered. */ -vm_size_t vm_map_aggressive_enter_max; /* set by bootstrap */ +static vm_map_size_t vm_map_aggressive_enter_max; /* set by bootstrap */ /* Skip acquiring locks if we're in the midst of a kernel core dump */ extern unsigned int not_in_kdp; @@ -308,18 +380,18 @@ void vm_map_init( void) { - vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 40*1024, + vm_map_zone = zinit((vm_map_size_t) sizeof(struct vm_map), 40*1024, PAGE_SIZE, "maps"); - vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry), + vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 1024*1024, PAGE_SIZE*5, "non-kernel map entries"); - vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry), + vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), kentry_data_size, kentry_data_size, "kernel map entries"); - vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy), + vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), 16*1024, PAGE_SIZE, "map copies"); /* @@ -337,7 +409,7 @@ void vm_map_steal_memory( void) { - map_data_size = round_page_32(10 * sizeof(struct vm_map)); + map_data_size = vm_map_round_page(10 * sizeof(struct vm_map)); map_data = pmap_steal_memory(map_data_size); #if 0 @@ -353,7 +425,7 @@ vm_map_steal_memory( kentry_data_size = - round_page_32(kentry_count * sizeof(struct vm_map_entry)); + vm_map_round_page(kentry_count * sizeof(struct vm_map_entry)); kentry_data = pmap_steal_memory(kentry_data_size); } @@ -366,10 +438,10 @@ vm_map_steal_memory( */ vm_map_t vm_map_create( - pmap_t pmap, - vm_offset_t min, - vm_offset_t max, - boolean_t pageable) + pmap_t pmap, + vm_map_offset_t min, + vm_map_offset_t max, + boolean_t pageable) { register vm_map_t result; @@ -398,7 +470,7 @@ vm_map_create( result->first_free = vm_map_to_entry(result); result->hint = vm_map_to_entry(result); vm_map_lock_init(result); - mutex_init(&result->s_lock, ETAP_VM_RESULT); + mutex_init(&result->s_lock, 0); return(result); } @@ -415,7 +487,7 @@ vm_map_create( #define vm_map_copy_entry_create(copy) \ _vm_map_entry_create(&(copy)->cpy_hdr) -vm_map_entry_t +static vm_map_entry_t _vm_map_entry_create( register struct vm_map_header *map_header) { @@ -451,7 +523,7 @@ MACRO_END #define vm_map_copy_entry_dispose(map, entry) \ _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) -void +static void _vm_map_entry_dispose( register struct vm_map_header *map_header, register vm_map_entry_t entry) @@ -463,12 +535,13 @@ _vm_map_entry_dispose( else zone = vm_map_kentry_zone; - zfree(zone, (vm_offset_t) entry); + zfree(zone, entry); } -boolean_t first_free_is_valid(vm_map_t map); /* forward */ -boolean_t first_free_check = FALSE; -boolean_t +#if MACH_ASSERT +static boolean_t first_free_is_valid(vm_map_t map); /* forward */ +static boolean_t first_free_check = FALSE; +static boolean_t first_free_is_valid( vm_map_t map) { @@ -479,8 +552,8 @@ first_free_is_valid( entry = vm_map_to_entry(map); next = entry->vme_next; - while (trunc_page_32(next->vme_start) == trunc_page_32(entry->vme_end) || - (trunc_page_32(next->vme_start) == trunc_page_32(entry->vme_start) && + while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) || + (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) && next != vm_map_to_entry(map))) { entry = next; next = entry->vme_next; @@ -494,6 +567,7 @@ first_free_is_valid( } return TRUE; } +#endif /* MACH_ASSERT */ /* * UPDATE_FIRST_FREE: @@ -510,10 +584,10 @@ MACRO_BEGIN \ UFF_map = (map); \ UFF_first_free = (new_first_free); \ UFF_next_entry = UFF_first_free->vme_next; \ - while (trunc_page_32(UFF_next_entry->vme_start) == \ - trunc_page_32(UFF_first_free->vme_end) || \ - (trunc_page_32(UFF_next_entry->vme_start) == \ - trunc_page_32(UFF_first_free->vme_start) && \ + while (vm_map_trunc_page(UFF_next_entry->vme_start) == \ + vm_map_trunc_page(UFF_first_free->vme_end) || \ + (vm_map_trunc_page(UFF_next_entry->vme_start) == \ + vm_map_trunc_page(UFF_first_free->vme_start) && \ UFF_next_entry != vm_map_to_entry(UFF_map))) { \ UFF_first_free = UFF_next_entry; \ UFF_next_entry = UFF_first_free->vme_next; \ @@ -653,16 +727,24 @@ void vm_map_res_deallocate(register vm_map_t map) void vm_map_destroy( register vm_map_t map) -{ +{ vm_map_lock(map); (void) vm_map_delete(map, map->min_offset, - map->max_offset, VM_MAP_NO_FLAGS); + map->max_offset, VM_MAP_NO_FLAGS, + VM_MAP_NULL); vm_map_unlock(map); + +#ifdef __PPC__ + if (map->hdr.nentries!=0) + vm_map_remove_commpage64(map); +#endif /* __PPC__ */ + + assert(map->hdr.nentries==0); if(map->pmap) pmap_destroy(map->pmap); - zfree(vm_map_zone, (vm_offset_t) map); + zfree(vm_map_zone, map); } #if TASK_SWAPPER @@ -874,8 +956,8 @@ MACRO_END */ boolean_t vm_map_lookup_entry( - register vm_map_t map, - register vm_offset_t address, + register vm_map_t map, + register vm_map_offset_t address, vm_map_entry_t *entry) /* OUT */ { register vm_map_entry_t cur; @@ -963,14 +1045,19 @@ vm_map_lookup_entry( kern_return_t vm_map_find_space( register vm_map_t map, - vm_offset_t *address, /* OUT */ - vm_size_t size, - vm_offset_t mask, + vm_map_offset_t *address, /* OUT */ + vm_map_size_t size, + vm_map_offset_t mask, vm_map_entry_t *o_entry) /* OUT */ { register vm_map_entry_t entry, new_entry; - register vm_offset_t start; - register vm_offset_t end; + register vm_map_offset_t start; + register vm_map_offset_t end; + + if (size == 0) { + *address = 0; + return KERN_INVALID_ARGUMENT; + } new_entry = vm_map_entry_create(map); @@ -1096,7 +1183,7 @@ int vm_map_pmap_enter_print = FALSE; int vm_map_pmap_enter_enable = FALSE; /* - * Routine: vm_map_pmap_enter + * Routine: vm_map_pmap_enter [internal only] * * Description: * Force pages from the specified object to be entered into @@ -1109,11 +1196,11 @@ int vm_map_pmap_enter_enable = FALSE; * In/out conditions: * The source map should not be locked on entry. */ -void +static void vm_map_pmap_enter( vm_map_t map, - register vm_offset_t addr, - register vm_offset_t end_addr, + register vm_map_offset_t addr, + register vm_map_offset_t end_addr, register vm_object_t object, vm_object_offset_t offset, vm_prot_t protection) @@ -1130,7 +1217,12 @@ vm_map_pmap_enter( vm_object_paging_begin(object); m = vm_page_lookup(object, offset); - if (m == VM_PAGE_NULL || m->busy || + /* + * ENCRYPTED SWAP: + * The user should never see encrypted data, so do not + * enter an encrypted page in the page table. + */ + if (m == VM_PAGE_NULL || m->busy || m->encrypted || (m->unusual && ( m->error || m->restart || m->absent || protection & m->page_lock))) { @@ -1143,13 +1235,13 @@ vm_map_pmap_enter( if (vm_map_pmap_enter_print) { printf("vm_map_pmap_enter:"); - printf("map: %x, addr: %x, object: %x, offset: %x\n", - map, addr, object, offset); + printf("map: %x, addr: %llx, object: %x, offset: %llx\n", + map, (unsigned long long)addr, object, (unsigned long long)offset); } m->busy = TRUE; if (m->no_isync == TRUE) { - pmap_sync_caches_phys(m->phys_page); + pmap_sync_page_data_phys(m->phys_page); m->no_isync = FALSE; } @@ -1174,6 +1266,35 @@ vm_map_pmap_enter( } } +boolean_t vm_map_pmap_is_empty( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); +boolean_t vm_map_pmap_is_empty( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_offset_t offset; + ppnum_t phys_page; + + if (map->pmap == NULL) { + return TRUE; + } + for (offset = start; + offset < end; + offset += PAGE_SIZE) { + phys_page = pmap_find_phys(map->pmap, offset); + if (phys_page) { + kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): " + "page %d at 0x%llx\n", + map, start, end, phys_page, offset); + return FALSE; + } + } + return TRUE; +} + /* * Routine: vm_map_enter * @@ -1184,12 +1305,15 @@ vm_map_pmap_enter( * * Arguments are as defined in the vm_map call. */ +int _map_enter_debug = 0; +static unsigned int vm_map_enter_restore_successes = 0; +static unsigned int vm_map_enter_restore_failures = 0; kern_return_t vm_map_enter( - register vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t size, - vm_offset_t mask, + vm_map_t map, + vm_map_offset_t *address, /* IN/OUT */ + vm_map_size_t size, + vm_map_offset_t mask, int flags, vm_object_t object, vm_object_offset_t offset, @@ -1198,26 +1322,67 @@ vm_map_enter( vm_prot_t max_protection, vm_inherit_t inheritance) { - vm_map_entry_t entry; - register vm_offset_t start; - register vm_offset_t end; + vm_map_entry_t entry, new_entry; + vm_map_offset_t start, tmp_start; + vm_map_offset_t end, tmp_end; kern_return_t result = KERN_SUCCESS; - - boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; + vm_map_t zap_old_map = VM_MAP_NULL; + vm_map_t zap_new_map = VM_MAP_NULL; + boolean_t map_locked = FALSE; + boolean_t pmap_empty = TRUE; + boolean_t new_mapping_established = FALSE; + boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); + boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); + boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); char alias; + if (size == 0) { + *address = 0; + return KERN_INVALID_ARGUMENT; + } + VM_GET_FLAGS_ALIAS(flags, alias); #define RETURN(value) { result = value; goto BailOut; } assert(page_aligned(*address)); assert(page_aligned(size)); + + /* + * Only zero-fill objects are allowed to be purgable. + * LP64todo - limit purgable objects to 32-bits for now + */ + if (purgable && + (offset != 0 || + (object != VM_OBJECT_NULL && + (object->size != size || + object->purgable == VM_OBJECT_NONPURGABLE)) + || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */ + return KERN_INVALID_ARGUMENT; + + if (!anywhere && overwrite) { + /* + * Create a temporary VM map to hold the old mappings in the + * affected area while we create the new one. + * This avoids releasing the VM map lock in + * vm_map_entry_delete() and allows atomicity + * when we want to replace some mappings with a new one. + * It also allows us to restore the old VM mappings if the + * new mapping fails. + */ + zap_old_map = vm_map_create(PMAP_NULL, + *address, + *address + size, + TRUE); + } + StartAgain: ; start = *address; if (anywhere) { vm_map_lock(map); + map_locked = TRUE; /* * Calculate the first possible address. @@ -1273,7 +1438,8 @@ vm_map_enter( assert_wait((event_t)map, THREAD_ABORTSAFE); vm_map_unlock(map); - thread_block((void (*)(void))0); + map_locked = FALSE; + thread_block(THREAD_CONTINUE_NULL); goto StartAgain; } } @@ -1314,6 +1480,7 @@ vm_map_enter( */ vm_map_lock(map); + map_locked = TRUE; if ((start & mask) != 0) RETURN(KERN_NO_SPACE); @@ -1329,6 +1496,17 @@ vm_map_enter( RETURN(KERN_INVALID_ADDRESS); } + if (overwrite && zap_old_map != VM_MAP_NULL) { + /* + * Fixed mapping and "overwrite" flag: attempt to + * remove all existing mappings in the specified + * address range, saving them in our "zap_old_map". + */ + (void) vm_map_delete(map, start, end, + VM_MAP_REMOVE_SAVE_ENTRIES, + zap_old_map); + } + /* * ... the starting address isn't allocated */ @@ -1361,10 +1539,20 @@ vm_map_enter( /* * See whether we can avoid creating a new entry (and object) by * extending one of our neighbors. [So far, we only attempt to - * extend from below.] + * extend from below.] Note that we can never extend/join + * purgable objects because they need to remain distinct + * entities in order to implement their "volatile object" + * semantics. */ - if ((object == VM_OBJECT_NULL) && + if (purgable) { + if (object == VM_OBJECT_NULL) { + object = vm_object_allocate(size); + object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + offset = (vm_object_offset_t)0; + } + } else if ((object == VM_OBJECT_NULL) && (entry != vm_map_to_entry(map)) && (entry->vme_end == start) && (!entry->is_shared) && @@ -1381,8 +1569,8 @@ vm_map_enter( VM_OBJECT_NULL, entry->offset, (vm_object_offset_t) 0, - (vm_size_t)(entry->vme_end - entry->vme_start), - (vm_size_t)(end - entry->vme_end))) { + (vm_map_size_t)(entry->vme_end - entry->vme_start), + (vm_map_size_t)(end - entry->vme_end))) { /* * Coalesced the two objects - can extend @@ -1398,25 +1586,41 @@ vm_map_enter( /* * Create a new entry + * LP64todo - for now, we can only allocate 4GB internal objects + * because the default pager can't page bigger ones. Remove this + * when it can. */ - - { /**/ - register vm_map_entry_t new_entry; - - new_entry = vm_map_entry_insert(map, entry, start, end, object, - offset, needs_copy, FALSE, FALSE, + tmp_start = start; + if (object == VM_OBJECT_NULL && size > (vm_map_size_t)VM_MAX_ADDRESS) + tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; + else + tmp_end = end; + do { + new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, + object, offset, needs_copy, FALSE, FALSE, cur_protection, max_protection, VM_BEHAVIOR_DEFAULT, inheritance, 0); - new_entry->alias = alias; + new_entry->alias = alias; + entry = new_entry; + } while (object == VM_OBJECT_NULL && + tmp_end != end && + (tmp_start = tmp_end) && + (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? + tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); + vm_map_unlock(map); + map_locked = FALSE; + + new_mapping_established = TRUE; /* Wire down the new entry if the user * requested all new map entries be wired. */ if (map->wiring_required) { + pmap_empty = FALSE; /* pmap won't be empty */ result = vm_map_wire(map, start, end, - new_entry->protection, TRUE); - return(result); + new_entry->protection, TRUE); + RETURN(result); } if ((object != VM_OBJECT_NULL) && @@ -1424,20 +1628,339 @@ vm_map_enter( (!anywhere) && (!needs_copy) && (size < (128*1024))) { + pmap_empty = FALSE; /* pmap won't be empty */ vm_map_pmap_enter(map, start, end, object, offset, cur_protection); } - return(result); - } /**/ - BailOut: ; - vm_map_unlock(map); - return(result); + if (result == KERN_SUCCESS && + pmap_empty && + !(flags & VM_FLAGS_NO_PMAP_CHECK)) { + assert(vm_map_pmap_is_empty(map, *address, *address+size)); + } + + if (result != KERN_SUCCESS) { + if (new_mapping_established) { + /* + * We have to get rid of the new mappings since we + * won't make them available to the user. + * Try and do that atomically, to minimize the risk + * that someone else create new mappings that range. + */ + zap_new_map = vm_map_create(PMAP_NULL, + *address, + *address + size, + TRUE); + if (!map_locked) { + vm_map_lock(map); + map_locked = TRUE; + } + (void) vm_map_delete(map, *address, *address+size, + VM_MAP_REMOVE_SAVE_ENTRIES, + zap_new_map); + } + if (zap_old_map != VM_MAP_NULL && + zap_old_map->hdr.nentries != 0) { + vm_map_entry_t entry1, entry2; + + /* + * The new mapping failed. Attempt to restore + * the old mappings, saved in the "zap_old_map". + */ + if (!map_locked) { + vm_map_lock(map); + map_locked = TRUE; + } + + /* first check if the coast is still clear */ + start = vm_map_first_entry(zap_old_map)->vme_start; + end = vm_map_last_entry(zap_old_map)->vme_end; + if (vm_map_lookup_entry(map, start, &entry1) || + vm_map_lookup_entry(map, end, &entry2) || + entry1 != entry2) { + /* + * Part of that range has already been + * re-mapped: we can't restore the old + * mappings... + */ + vm_map_enter_restore_failures++; + } else { + /* + * Transfer the saved map entries from + * "zap_old_map" to the original "map", + * inserting them all after "entry1". + */ + for (entry2 = vm_map_first_entry(zap_old_map); + entry2 != vm_map_to_entry(zap_old_map); + entry2 = vm_map_first_entry(zap_old_map)) { + vm_map_entry_unlink(zap_old_map, + entry2); + vm_map_entry_link(map, entry1, entry2); + entry1 = entry2; + } + if (map->wiring_required) { + /* + * XXX TODO: we should rewire the + * old pages here... + */ + } + vm_map_enter_restore_successes++; + } + } + } + + if (map_locked) { + vm_map_unlock(map); + } + + /* + * Get rid of the "zap_maps" and all the map entries that + * they may still contain. + */ + if (zap_old_map != VM_MAP_NULL) { + vm_map_destroy(zap_old_map); + zap_old_map = VM_MAP_NULL; + } + if (zap_new_map != VM_MAP_NULL) { + vm_map_destroy(zap_new_map); + zap_new_map = VM_MAP_NULL; + } + + return result; #undef RETURN } + +#if VM_CPM + +#ifdef MACH_ASSERT +extern vm_offset_t avail_start, avail_end; +#endif + +/* + * Allocate memory in the specified map, with the caveat that + * the memory is physically contiguous. This call may fail + * if the system can't find sufficient contiguous memory. + * This call may cause or lead to heart-stopping amounts of + * paging activity. + * + * Memory obtained from this call should be freed in the + * normal way, viz., via vm_deallocate. + */ +kern_return_t +vm_map_enter_cpm( + vm_map_t map, + vm_map_offset_t *addr, + vm_map_size_t size, + int flags) +{ + vm_object_t cpm_obj; + pmap_t pmap; + vm_page_t m, pages; + kern_return_t kr; + vm_map_offset_t va, start, end, offset; +#if MACH_ASSERT + vm_map_offset_t prev_addr; +#endif /* MACH_ASSERT */ + + boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + + if (!vm_allocate_cpm_enabled) + return KERN_FAILURE; + + if (size == 0) { + *addr = 0; + return KERN_SUCCESS; + } + + if (anywhere) + *addr = vm_map_min(map); + else + *addr = vm_map_trunc_page(*addr); + size = vm_map_round_page(size); + + /* + * LP64todo - cpm_allocate should probably allow + * allocations of >4GB, but not with the current + * algorithm, so just cast down the size for now. + */ + if (size > VM_MAX_ADDRESS) + return KERN_RESOURCE_SHORTAGE; + if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), + &pages, TRUE)) != KERN_SUCCESS) + return kr; + + cpm_obj = vm_object_allocate((vm_object_size_t)size); + assert(cpm_obj != VM_OBJECT_NULL); + assert(cpm_obj->internal); + assert(cpm_obj->size == (vm_object_size_t)size); + assert(cpm_obj->can_persist == FALSE); + assert(cpm_obj->pager_created == FALSE); + assert(cpm_obj->pageout == FALSE); + assert(cpm_obj->shadow == VM_OBJECT_NULL); + + /* + * Insert pages into object. + */ + + vm_object_lock(cpm_obj); + for (offset = 0; offset < size; offset += PAGE_SIZE) { + m = pages; + pages = NEXT_PAGE(m); + + assert(!m->gobbled); + assert(!m->wanted); + assert(!m->pageout); + assert(!m->tabled); + /* + * ENCRYPTED SWAP: + * "m" is not supposed to be pageable, so it + * should not be encrypted. It wouldn't be safe + * to enter it in a new VM object while encrypted. + */ + ASSERT_PAGE_DECRYPTED(m); + assert(m->busy); + assert(m->phys_page>=avail_start && m->phys_page<=avail_end); + + m->busy = FALSE; + vm_page_insert(m, cpm_obj, offset); + } + assert(cpm_obj->resident_page_count == size / PAGE_SIZE); + vm_object_unlock(cpm_obj); + + /* + * Hang onto a reference on the object in case a + * multi-threaded application for some reason decides + * to deallocate the portion of the address space into + * which we will insert this object. + * + * Unfortunately, we must insert the object now before + * we can talk to the pmap module about which addresses + * must be wired down. Hence, the race with a multi- + * threaded app. + */ + vm_object_reference(cpm_obj); + + /* + * Insert object into map. + */ + + kr = vm_map_enter( + map, + addr, + size, + (vm_map_offset_t)0, + flags, + cpm_obj, + (vm_object_offset_t)0, + FALSE, + VM_PROT_ALL, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); + + if (kr != KERN_SUCCESS) { + /* + * A CPM object doesn't have can_persist set, + * so all we have to do is deallocate it to + * free up these pages. + */ + assert(cpm_obj->pager_created == FALSE); + assert(cpm_obj->can_persist == FALSE); + assert(cpm_obj->pageout == FALSE); + assert(cpm_obj->shadow == VM_OBJECT_NULL); + vm_object_deallocate(cpm_obj); /* kill acquired ref */ + vm_object_deallocate(cpm_obj); /* kill creation ref */ + } + + /* + * Inform the physical mapping system that the + * range of addresses may not fault, so that + * page tables and such can be locked down as well. + */ + start = *addr; + end = start + size; + pmap = vm_map_pmap(map); + pmap_pageable(pmap, start, end, FALSE); + + /* + * Enter each page into the pmap, to avoid faults. + * Note that this loop could be coded more efficiently, + * if the need arose, rather than looking up each page + * again. + */ + for (offset = 0, va = start; offset < size; + va += PAGE_SIZE, offset += PAGE_SIZE) { + vm_object_lock(cpm_obj); + m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); + vm_object_unlock(cpm_obj); + assert(m != VM_PAGE_NULL); + PMAP_ENTER(pmap, va, m, VM_PROT_ALL, + ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK, + TRUE); + } + +#if MACH_ASSERT + /* + * Verify ordering in address space. + */ + for (offset = 0; offset < size; offset += PAGE_SIZE) { + vm_object_lock(cpm_obj); + m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); + vm_object_unlock(cpm_obj); + if (m == VM_PAGE_NULL) + panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", + cpm_obj, offset); + assert(m->tabled); + assert(!m->busy); + assert(!m->wanted); + assert(!m->fictitious); + assert(!m->private); + assert(!m->absent); + assert(!m->error); + assert(!m->cleaning); + assert(!m->precious); + assert(!m->clustered); + if (offset != 0) { + if (m->phys_page != prev_addr + 1) { + printf("start 0x%x end 0x%x va 0x%x\n", + start, end, va); + printf("obj 0x%x off 0x%x\n", cpm_obj, offset); + printf("m 0x%x prev_address 0x%x\n", m, + prev_addr); + panic("vm_allocate_cpm: pages not contig!"); + } + } + prev_addr = m->phys_page; + } +#endif /* MACH_ASSERT */ + + vm_object_deallocate(cpm_obj); /* kill extra ref */ + + return kr; +} + + +#else /* VM_CPM */ + +/* + * Interface is defined in all cases, but unless the kernel + * is built explicitly for this option, the interface does + * nothing. + */ + +kern_return_t +vm_map_enter_cpm( + __unused vm_map_t map, + __unused vm_map_offset_t *addr, + __unused vm_map_size_t size, + __unused int flags) +{ + return KERN_FAILURE; +} +#endif /* VM_CPM */ + /* * vm_map_clip_start: [ internal use only ] * @@ -1450,13 +1973,13 @@ vm_map_enter( MACRO_BEGIN \ vm_map_t VMCS_map; \ vm_map_entry_t VMCS_entry; \ - vm_offset_t VMCS_startaddr; \ + vm_map_offset_t VMCS_startaddr; \ VMCS_map = (map); \ VMCS_entry = (entry); \ VMCS_startaddr = (startaddr); \ if (VMCS_startaddr > VMCS_entry->vme_start) { \ if(entry->use_pmap) { \ - vm_offset_t pmap_base_addr; \ + vm_map_offset_t pmap_base_addr; \ \ pmap_base_addr = 0xF0000000 & entry->vme_start; \ pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \ @@ -1477,7 +2000,7 @@ MACRO_END MACRO_BEGIN \ vm_map_t VMCS_map; \ vm_map_entry_t VMCS_entry; \ - vm_offset_t VMCS_startaddr; \ + vm_map_offset_t VMCS_startaddr; \ VMCS_map = (map); \ VMCS_entry = (entry); \ VMCS_startaddr = (startaddr); \ @@ -1498,11 +2021,11 @@ MACRO_END * This routine is called only when it is known that * the entry must be split. */ -void +static void _vm_map_clip_start( register struct vm_map_header *map_header, register vm_map_entry_t entry, - register vm_offset_t start) + register vm_map_offset_t start) { register vm_map_entry_t new_entry; @@ -1542,13 +2065,13 @@ _vm_map_clip_start( MACRO_BEGIN \ vm_map_t VMCE_map; \ vm_map_entry_t VMCE_entry; \ - vm_offset_t VMCE_endaddr; \ + vm_map_offset_t VMCE_endaddr; \ VMCE_map = (map); \ VMCE_entry = (entry); \ VMCE_endaddr = (endaddr); \ if (VMCE_endaddr < VMCE_entry->vme_end) { \ if(entry->use_pmap) { \ - vm_offset_t pmap_base_addr; \ + vm_map_offset_t pmap_base_addr; \ \ pmap_base_addr = 0xF0000000 & entry->vme_start; \ pmap_unnest(map->pmap, (addr64_t)pmap_base_addr); \ @@ -1569,7 +2092,7 @@ MACRO_END MACRO_BEGIN \ vm_map_t VMCE_map; \ vm_map_entry_t VMCE_entry; \ - vm_offset_t VMCE_endaddr; \ + vm_map_offset_t VMCE_endaddr; \ VMCE_map = (map); \ VMCE_entry = (entry); \ VMCE_endaddr = (endaddr); \ @@ -1590,11 +2113,11 @@ MACRO_END * This routine is called only when it is known that * the entry must be split. */ -void +static void _vm_map_clip_end( register struct vm_map_header *map_header, register vm_map_entry_t entry, - register vm_offset_t end) + register vm_map_offset_t end) { register vm_map_entry_t new_entry; @@ -1648,15 +2171,15 @@ _vm_map_clip_end( * * The map is locked for reading on entry and is left locked. */ -boolean_t +static boolean_t vm_map_range_check( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, vm_map_entry_t *entry) { vm_map_entry_t cur; - register vm_offset_t prev; + register vm_map_offset_t prev; /* * Basic sanity checks first @@ -1715,11 +2238,14 @@ vm_map_range_check( */ kern_return_t vm_map_submap( - register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, vm_map_t submap, - vm_offset_t offset, + vm_map_offset_t offset, +#ifdef i386 + __unused +#endif boolean_t use_pmap) { vm_map_entry_t entry; @@ -1762,13 +2288,16 @@ vm_map_submap( if ((use_pmap) && (offset == 0)) { /* nest if platform code will allow */ if(submap->pmap == NULL) { - submap->pmap = pmap_create((vm_size_t) 0); + submap->pmap = pmap_create((vm_map_size_t) 0); if(submap->pmap == PMAP_NULL) { + vm_map_unlock(map); return(KERN_NO_SPACE); } } result = pmap_nest(map->pmap, (entry->object.sub_map)->pmap, - (addr64_t)start, (addr64_t)start, (uint64_t)(end - start)); + (addr64_t)start, + (addr64_t)start, + (uint64_t)(end - start)); if(result) panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); entry->use_pmap = TRUE; @@ -1795,13 +2324,13 @@ vm_map_submap( kern_return_t vm_map_protect( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, register vm_prot_t new_prot, register boolean_t set_max) { register vm_map_entry_t current; - register vm_offset_t prev; + register vm_map_offset_t prev; vm_map_entry_t entry; vm_prot_t new_max; boolean_t clip; @@ -1812,6 +2341,15 @@ vm_map_protect( vm_map_lock(map); + /* LP64todo - remove this check when vm_map_commpage64() + * no longer has to stuff in a map_entry for the commpage + * above the map's max_offset. + */ + if (start >= map->max_offset) { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + /* * Lookup the entry. If it doesn't start in a valid * entry, return an error. Remember if we need to @@ -1914,8 +2452,11 @@ vm_map_protect( /* the 256M hack for existing hardware limitations */ if (current->protection != old_prot) { if(current->is_sub_map && current->use_pmap) { - vm_offset_t pmap_base_addr; - vm_offset_t pmap_end_addr; + vm_map_offset_t pmap_base_addr; + vm_map_offset_t pmap_end_addr; +#ifdef i386 + __unused +#endif vm_map_entry_t local_entry; pmap_base_addr = 0xF0000000 & current->vme_start; @@ -1951,10 +2492,9 @@ vm_map_protect( current = current->vme_next; } - /* coalesce the map entries, if possible */ current = entry; - while (current != vm_map_to_entry(map) && - current->vme_start <= end) { + while ((current != vm_map_to_entry(map)) && + (current->vme_start <= end)) { vm_map_simplify_entry(map, current); current = current->vme_next; } @@ -1974,8 +2514,8 @@ vm_map_protect( kern_return_t vm_map_inherit( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, register vm_inherit_t new_inheritance) { register vm_map_entry_t entry; @@ -1998,8 +2538,10 @@ vm_map_inherit( /* given inheritance. */ while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { if(entry->is_sub_map) { - if(new_inheritance == VM_INHERIT_COPY) + if(new_inheritance == VM_INHERIT_COPY) { + vm_map_unlock(map); return(KERN_INVALID_ARGUMENT); + } } entry = entry->vme_next; @@ -2032,27 +2574,27 @@ vm_map_inherit( * The map must not be locked, but a reference must remain to the * map throughout the call. */ -kern_return_t +static kern_return_t vm_map_wire_nested( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, register vm_prot_t access_type, boolean_t user_wire, pmap_t map_pmap, - vm_offset_t pmap_addr) + vm_map_offset_t pmap_addr) { register vm_map_entry_t entry; struct vm_map_entry *first_entry, tmp_entry; - vm_map_t pmap_map; - register vm_offset_t s,e; + vm_map_t real_map; + register vm_map_offset_t s,e; kern_return_t rc; boolean_t need_wakeup; boolean_t main_map = FALSE; wait_interrupt_t interruptible_state; thread_t cur_thread; unsigned int last_timestamp; - vm_size_t size; + vm_map_size_t size; vm_map_lock(map); if(map_pmap == NULL) @@ -2150,10 +2692,10 @@ vm_map_wire_nested( } if(entry->is_sub_map) { - vm_offset_t sub_start; - vm_offset_t sub_end; - vm_offset_t local_start; - vm_offset_t local_end; + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; + vm_map_offset_t local_start; + vm_map_offset_t local_end; pmap_t pmap; vm_map_clip_start(map, entry, start); @@ -2201,8 +2743,8 @@ vm_map_wire_nested( } else { vm_object_t object; - vm_object_offset_t offset_hi; - vm_object_offset_t offset_lo; + vm_map_offset_t offset_hi; + vm_map_offset_t offset_lo; vm_object_offset_t offset; vm_prot_t prot; boolean_t wired; @@ -2223,15 +2765,15 @@ vm_map_wire_nested( &version, &object, &offset, &prot, &wired, &behavior, &offset_lo, - &offset_hi, &pmap_map)) { + &offset_hi, &real_map)) { - vm_map_unlock(lookup_map); + vm_map_unlock_read(lookup_map); vm_map_unwire(map, start, entry->vme_start, user_wire); return(KERN_FAILURE); } - if(pmap_map != lookup_map) - vm_map_unlock(pmap_map); + if(real_map != lookup_map) + vm_map_unlock(real_map); vm_map_unlock_read(lookup_map); vm_map_lock(map); vm_object_unlock(object); @@ -2439,6 +2981,8 @@ vm_map_wire_nested( if (!user_wire && cur_thread != THREAD_NULL) interruptible_state = thread_interrupt_level(THREAD_UNINT); + else + interruptible_state = THREAD_UNINT; if(map_pmap) rc = vm_fault_wire(map, @@ -2511,8 +3055,8 @@ vm_map_wire_nested( kern_return_t vm_map_wire( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, register vm_prot_t access_type, boolean_t user_wire) { @@ -2553,14 +3097,14 @@ vm_map_wire( * unwired and intransition entries to avoid losing memory by leaving * it unwired. */ -kern_return_t +static kern_return_t vm_map_unwire_nested( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, boolean_t user_wire, pmap_t map_pmap, - vm_offset_t pmap_addr) + vm_map_offset_t pmap_addr) { register vm_map_entry_t entry; struct vm_map_entry *first_entry, tmp_entry; @@ -2611,9 +3155,9 @@ vm_map_unwire_nested( } if(entry->is_sub_map) { - vm_offset_t sub_start; - vm_offset_t sub_end; - vm_offset_t local_end; + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; + vm_map_offset_t local_end; pmap_t pmap; @@ -2822,6 +3366,17 @@ vm_map_unwire_nested( entry = entry->vme_next; } } + + /* + * We might have fragmented the address space when we wired this + * range of addresses. Attempt to re-coalesce these VM map entries + * with their neighbors now that they're no longer wired. + * Under some circumstances, address space fragmentation can + * prevent VM object shadow chain collapsing, which can cause + * swap space leaks. + */ + vm_map_simplify_range(map, start, end); + vm_map_unlock(map); /* * wake up anybody waiting on entries that we have unwired. @@ -2835,8 +3390,8 @@ vm_map_unwire_nested( kern_return_t vm_map_unwire( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, boolean_t user_wire) { return vm_map_unwire_nested(map, start, end, @@ -2849,15 +3404,14 @@ vm_map_unwire( * * Deallocate the given entry from the target map. */ -void +static void vm_map_entry_delete( register vm_map_t map, register vm_map_entry_t entry) { - register vm_offset_t s, e; + register vm_map_offset_t s, e; register vm_object_t object; register vm_map_t submap; - extern vm_object_t kernel_object; s = entry->vme_start; e = entry->vme_end; @@ -2894,15 +3448,14 @@ vm_map_entry_delete( void vm_map_submap_pmap_clean( vm_map_t map, - vm_offset_t start, - vm_offset_t end, + vm_map_offset_t start, + vm_map_offset_t end, vm_map_t sub_map, - vm_offset_t offset) + vm_map_offset_t offset) { - vm_offset_t submap_start; - vm_offset_t submap_end; - vm_offset_t addr; - vm_size_t remove_size; + vm_map_offset_t submap_start; + vm_map_offset_t submap_end; + vm_map_size_t remove_size; vm_map_entry_t entry; submap_end = offset + (end - start); @@ -2993,21 +3546,21 @@ vm_map_submap_pmap_clean( * * This routine is called with map locked and leaves map locked. */ -kern_return_t +static kern_return_t vm_map_delete( - register vm_map_t map, - vm_offset_t start, - register vm_offset_t end, - int flags) + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + int flags, + vm_map_t zap_map) { vm_map_entry_t entry, next; struct vm_map_entry *first_entry, tmp_entry; - register vm_offset_t s, e; + register vm_map_offset_t s, e; register vm_object_t object; boolean_t need_wakeup; unsigned int last_timestamp = ~0; /* unlikely value */ int interruptible; - extern vm_map_t kernel_map; interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ? THREAD_ABORTSAFE : THREAD_UNINT; @@ -3256,47 +3809,45 @@ vm_map_delete( PMAP_NULL, entry->vme_start, VM_PROT_NONE); - } else if(object != NULL) { - if ((object->shadow != NULL) || - (object->phys_contiguous) || - (object->resident_page_count > - atop((entry->vme_end - entry->vme_start)/4))) { - pmap_remove(map->pmap, - (addr64_t)(entry->vme_start), - (addr64_t)(entry->vme_end)); - } else { - vm_page_t p; - vm_object_offset_t start_off; - vm_object_offset_t end_off; - start_off = entry->offset; - end_off = start_off + - (entry->vme_end - entry->vme_start); - vm_object_lock(object); - queue_iterate(&object->memq, - p, vm_page_t, listq) { - if ((!p->fictitious) && - (p->offset >= start_off) && - (p->offset < end_off)) { - vm_offset_t start; - start = entry->vme_start; - start += p->offset - start_off; - pmap_remove( - map->pmap, start, - start + PAGE_SIZE); - } - } - vm_object_unlock(object); - } - } + } else { + pmap_remove(map->pmap, + entry->vme_start, + entry->vme_end); + } } } + /* + * All pmap mappings for this map entry must have been + * cleared by now. + */ + assert(vm_map_pmap_is_empty(map, + entry->vme_start, + entry->vme_end)); + next = entry->vme_next; s = next->vme_start; last_timestamp = map->timestamp; - vm_map_entry_delete(map, entry); - /* vm_map_entry_delete unlocks the map */ - vm_map_lock(map); + + if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) && + zap_map != VM_MAP_NULL) { + /* + * The caller wants to save the affected VM map entries + * into the "zap_map". The caller will take care of + * these entries. + */ + /* unlink the entry from "map" ... */ + vm_map_entry_unlink(map, entry); + /* ... and add it to the end of the "zap_map" */ + vm_map_entry_link(zap_map, + vm_map_last_entry(zap_map), + entry); + } else { + vm_map_entry_delete(map, entry); + /* vm_map_entry_delete unlocks the map */ + vm_map_lock(map); + } + entry = next; if(entry == vm_map_to_entry(map)) { @@ -3348,30 +3899,17 @@ vm_map_delete( kern_return_t vm_map_remove( register vm_map_t map, - register vm_offset_t start, - register vm_offset_t end, + register vm_map_offset_t start, + register vm_map_offset_t end, register boolean_t flags) { register kern_return_t result; - boolean_t funnel_set = FALSE; - funnel_t *curflock; - thread_t cur_thread; - cur_thread = current_thread(); - - if ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED) { - funnel_set = TRUE; - curflock = cur_thread->funnel_lock; - thread_funnel_set( curflock , FALSE); - } vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); - result = vm_map_delete(map, start, end, flags); + result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); vm_map_unlock(map); - if (funnel_set) { - thread_funnel_set( curflock, TRUE); - funnel_set = FALSE; - } + return(result); } @@ -3390,7 +3928,7 @@ vm_map_copy_discard( TR_DECL("vm_map_copy_discard"); /* tr3("enter: copy 0x%x type %d", copy, copy->type);*/ -free_next_copy: + if (copy == VM_MAP_COPY_NULL) return; @@ -3415,10 +3953,10 @@ free_next_copy: * allocated by a single call to kalloc(), i.e. the * vm_map_copy_t was not allocated out of the zone. */ - kfree((vm_offset_t) copy, copy->cpy_kalloc_size); + kfree(copy, copy->cpy_kalloc_size); return; } - zfree(vm_map_copy_zone, (vm_offset_t) copy); + zfree(vm_map_copy_zone, copy); } /* @@ -3479,13 +4017,13 @@ vm_map_copy_copy( return new_copy; } -kern_return_t +static kern_return_t vm_map_overwrite_submap_recurse( vm_map_t dst_map, - vm_offset_t dst_addr, - vm_size_t dst_size) + vm_map_offset_t dst_addr, + vm_map_size_t dst_size) { - vm_offset_t dst_end; + vm_map_offset_t dst_end; vm_map_entry_t tmp_entry; vm_map_entry_t entry; kern_return_t result; @@ -3500,7 +4038,7 @@ vm_map_overwrite_submap_recurse( * splitting entries in strange ways. */ - dst_end = round_page_32(dst_addr + dst_size); + dst_end = vm_map_round_page(dst_addr + dst_size); vm_map_lock(dst_map); start_pass_1: @@ -3509,16 +4047,16 @@ start_pass_1: return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, trunc_page_32(dst_addr)); + vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); for (entry = tmp_entry;;) { vm_map_entry_t next; next = entry->vme_next; while(entry->is_sub_map) { - vm_offset_t sub_start; - vm_offset_t sub_end; - vm_offset_t local_end; + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; + vm_map_offset_t local_end; if (entry->in_transition) { /* @@ -3668,24 +4206,24 @@ start_pass_1: * returned. */ -kern_return_t +static kern_return_t vm_map_copy_overwrite_nested( - vm_map_t dst_map, - vm_offset_t dst_addr, - vm_map_copy_t copy, - boolean_t interruptible, - pmap_t pmap) + vm_map_t dst_map, + vm_map_address_t dst_addr, + vm_map_copy_t copy, + boolean_t interruptible, + pmap_t pmap) { - vm_offset_t dst_end; - vm_map_entry_t tmp_entry; - vm_map_entry_t entry; - kern_return_t kr; - boolean_t aligned = TRUE; - boolean_t contains_permanent_objects = FALSE; - boolean_t encountered_sub_map = FALSE; - vm_offset_t base_addr; - vm_size_t copy_size; - vm_size_t total_size; + vm_map_offset_t dst_end; + vm_map_entry_t tmp_entry; + vm_map_entry_t entry; + kern_return_t kr; + boolean_t aligned = TRUE; + boolean_t contains_permanent_objects = FALSE; + boolean_t encountered_sub_map = FALSE; + vm_map_offset_t base_addr; + vm_map_size_t copy_size; + vm_map_size_t total_size; /* @@ -3730,26 +4268,35 @@ vm_map_copy_overwrite_nested( !page_aligned (dst_addr)) { aligned = FALSE; - dst_end = round_page_32(dst_addr + copy->size); + dst_end = vm_map_round_page(dst_addr + copy->size); } else { dst_end = dst_addr + copy->size; } vm_map_lock(dst_map); + /* LP64todo - remove this check when vm_map_commpage64() + * no longer has to stuff in a map_entry for the commpage + * above the map's max_offset. + */ + if (dst_addr >= dst_map->max_offset) { + vm_map_unlock(dst_map); + return(KERN_INVALID_ADDRESS); + } + start_pass_1: if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, trunc_page_32(dst_addr)); + vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); for (entry = tmp_entry;;) { vm_map_entry_t next = entry->vme_next; while(entry->is_sub_map) { - vm_offset_t sub_start; - vm_offset_t sub_end; - vm_offset_t local_end; + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; + vm_map_offset_t local_end; if (entry->in_transition) { @@ -3885,11 +4432,11 @@ start_overwrite: /* deconstruct the copy object and do in parts */ /* only in sub_map, interruptable case */ vm_map_entry_t copy_entry; - vm_map_entry_t previous_prev; - vm_map_entry_t next_copy; + vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL; + vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; int nentries; - int remaining_entries; - int new_offset; + int remaining_entries = 0; + int new_offset = 0; for (entry = tmp_entry; copy_size == 0;) { vm_map_entry_t next; @@ -3921,9 +4468,9 @@ start_overwrite: continue; } if(entry->is_sub_map) { - vm_offset_t sub_start; - vm_offset_t sub_end; - vm_offset_t local_end; + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; + vm_map_offset_t local_end; if (entry->needs_copy) { /* if this is a COW submap */ @@ -3979,8 +4526,8 @@ start_overwrite: /* adjust the copy object */ if (total_size > copy_size) { - vm_size_t local_size = 0; - vm_size_t entry_size; + vm_map_size_t local_size = 0; + vm_map_size_t entry_size; nentries = 1; new_offset = copy->offset; @@ -4110,8 +4657,8 @@ start_overwrite: /* adjust the copy object */ if (total_size > copy_size) { - vm_size_t local_size = 0; - vm_size_t entry_size; + vm_map_size_t local_size = 0; + vm_map_size_t entry_size; new_offset = copy->offset; copy_entry = vm_map_copy_first_entry(copy); @@ -4227,7 +4774,7 @@ start_overwrite: break; } } - vm_map_clip_start(dst_map, tmp_entry, trunc_page_32(base_addr)); + vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); entry = tmp_entry; } /* while */ @@ -4243,7 +4790,7 @@ start_overwrite: kern_return_t vm_map_copy_overwrite( vm_map_t dst_map, - vm_offset_t dst_addr, + vm_map_offset_t dst_addr, vm_map_copy_t copy, boolean_t interruptible) { @@ -4253,7 +4800,7 @@ vm_map_copy_overwrite( /* - * Routine: vm_map_copy_overwrite_unaligned + * Routine: vm_map_copy_overwrite_unaligned [internal use only] * * Decription: * Physically copy unaligned data @@ -4275,12 +4822,12 @@ vm_map_copy_overwrite( * unlocked on error. */ -kern_return_t +static kern_return_t vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_offset_t start) + vm_map_offset_t start) { vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); vm_map_version_t version; @@ -4288,8 +4835,8 @@ vm_map_copy_overwrite_unaligned( vm_object_offset_t dst_offset; vm_object_offset_t src_offset; vm_object_offset_t entry_offset; - vm_offset_t entry_end; - vm_size_t src_size, + vm_map_offset_t entry_end; + vm_map_size_t src_size, dst_size, copy_size, amount_left; @@ -4297,7 +4844,7 @@ vm_map_copy_overwrite_unaligned( vm_map_lock_write_to_read(dst_map); - src_offset = copy->offset - trunc_page_64(copy->offset); + src_offset = copy->offset - vm_object_trunc_page(copy->offset); amount_left = copy->size; /* * unaligned so we never clipped this entry, we need the offset into @@ -4350,7 +4897,7 @@ vm_map_copy_overwrite_unaligned( } vm_object_shadow(&entry->object.vm_object, &entry->offset, - (vm_size_t)(entry->vme_end + (vm_map_size_t)(entry->vme_end - entry->vme_start)); entry->needs_copy = FALSE; vm_map_lock_write_to_read(dst_map); @@ -4365,7 +4912,7 @@ vm_map_copy_overwrite_unaligned( vm_map_lock_read(dst_map); goto RetryLookup; } - dst_object = vm_object_allocate((vm_size_t) + dst_object = vm_object_allocate((vm_map_size_t) entry->vme_end - entry->vme_start); entry->object.vm_object = dst_object; entry->offset = 0; @@ -4462,14 +5009,11 @@ RetryLookup: } }/* while */ - /* NOTREACHED ?? */ - vm_map_unlock_read(dst_map); - return KERN_SUCCESS; }/* vm_map_copy_overwrite_unaligned */ /* - * Routine: vm_map_copy_overwrite_aligned + * Routine: vm_map_copy_overwrite_aligned [internal use only] * * Description: * Does all the vm_trickery possible for whole pages. @@ -4487,18 +5031,21 @@ RetryLookup: * to the above pass and make sure that no wiring is involved. */ -kern_return_t +static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, vm_map_entry_t tmp_entry, vm_map_copy_t copy, - vm_offset_t start, + vm_map_offset_t start, +#if !BAD_OPTIMIZATION + __unused +#endif /* !BAD_OPTIMIZATION */ pmap_t pmap) { vm_object_t object; vm_map_entry_t copy_entry; - vm_size_t copy_size; - vm_size_t size; + vm_map_size_t copy_size; + vm_map_size_t size; vm_map_entry_t entry; while ((copy_entry = vm_map_copy_first_entry(copy)) @@ -4595,9 +5142,7 @@ vm_map_copy_overwrite_aligned( if(entry->use_pmap) { #ifndef i386 pmap_unnest(dst_map->pmap, - entry->vme_start, - entry->vme_end - - entry->vme_start); + entry->vme_start); #endif if(dst_map->mapped) { /* clean up parent */ @@ -4662,7 +5207,7 @@ vm_map_copy_overwrite_aligned( * (but avoid uninstantiated objects) */ if (object != VM_OBJECT_NULL) { - vm_offset_t va = entry->vme_start; + vm_map_offset_t va = entry->vme_start; while (va < entry->vme_end) { register vm_page_t m; @@ -4675,9 +5220,17 @@ vm_map_copy_overwrite_aligned( vm_object_lock(object); vm_object_paging_begin(object); + /* + * ENCRYPTED SWAP: + * If the page is encrypted, skip it: + * we can't let the user see the encrypted + * contents. The page will get decrypted + * on demand when the user generates a + * soft-fault when trying to access it. + */ if ((m = vm_page_lookup(object,offset)) != - VM_PAGE_NULL && !m->busy && - !m->fictitious && + VM_PAGE_NULL && !m->busy && + !m->fictitious && !m->encrypted && (!m->unusual || (!m->error && !m->restart && !m->absent && (prot & m->page_lock) == 0))) { @@ -4816,26 +5369,26 @@ vm_map_copy_overwrite_aligned( }/* vm_map_copy_overwrite_aligned */ /* - * Routine: vm_map_copyin_kernel_buffer + * Routine: vm_map_copyin_kernel_buffer [internal use only] * * Description: * Copy in data to a kernel buffer from space in the - * source map. The original space may be otpionally + * source map. The original space may be optionally * deallocated. * * If successful, returns a new copy object. */ -kern_return_t +static kern_return_t vm_map_copyin_kernel_buffer( vm_map_t src_map, - vm_offset_t src_addr, - vm_size_t len, + vm_map_offset_t src_addr, + vm_map_size_t len, boolean_t src_destroy, vm_map_copy_t *copy_result) { - boolean_t flags; + kern_return_t kr; vm_map_copy_t copy; - vm_size_t kalloc_size = sizeof(struct vm_map_copy) + len; + vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len; copy = (vm_map_copy_t) kalloc(kalloc_size); if (copy == VM_MAP_COPY_NULL) { @@ -4844,34 +5397,28 @@ vm_map_copyin_kernel_buffer( copy->type = VM_MAP_COPY_KERNEL_BUFFER; copy->size = len; copy->offset = 0; - copy->cpy_kdata = (vm_offset_t) (copy + 1); + copy->cpy_kdata = (void *) (copy + 1); copy->cpy_kalloc_size = kalloc_size; - if (src_map == kernel_map) { - bcopy((char *)src_addr, (char *)copy->cpy_kdata, len); - flags = VM_MAP_REMOVE_KUNWIRE | VM_MAP_REMOVE_WAIT_FOR_KWIRE | - VM_MAP_REMOVE_INTERRUPTIBLE; - } else { - kern_return_t kr; - kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); - if (kr != KERN_SUCCESS) { - kfree((vm_offset_t)copy, kalloc_size); - return kr; - } - flags = VM_MAP_REMOVE_WAIT_FOR_KWIRE | - VM_MAP_REMOVE_INTERRUPTIBLE; + kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); + if (kr != KERN_SUCCESS) { + kfree(copy, kalloc_size); + return kr; } if (src_destroy) { - (void) vm_map_remove(src_map, trunc_page_32(src_addr), - round_page_32(src_addr + len), - flags); + (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), + vm_map_round_page(src_addr + len), + VM_MAP_REMOVE_INTERRUPTIBLE | + VM_MAP_REMOVE_WAIT_FOR_KWIRE | + (src_map == kernel_map) ? + VM_MAP_REMOVE_KUNWIRE : 0); } *copy_result = copy; return KERN_SUCCESS; } /* - * Routine: vm_map_copyout_kernel_buffer + * Routine: vm_map_copyout_kernel_buffer [internal use only] * * Description: * Copy out data from a kernel buffer into space in the @@ -4881,15 +5428,16 @@ vm_map_copyin_kernel_buffer( * If successful, consumes the copy object. * Otherwise, the caller is responsible for it. */ -kern_return_t +static int vm_map_copyout_kernel_buffer_failures = 0; +static kern_return_t vm_map_copyout_kernel_buffer( - vm_map_t map, - vm_offset_t *addr, /* IN/OUT */ - vm_map_copy_t copy, - boolean_t overwrite) + vm_map_t map, + vm_map_address_t *addr, /* IN/OUT */ + vm_map_copy_t copy, + boolean_t overwrite) { kern_return_t kr = KERN_SUCCESS; - thread_act_t thr_act = current_act(); + thread_t thread = current_thread(); if (!overwrite) { @@ -4899,9 +5447,9 @@ vm_map_copyout_kernel_buffer( *addr = 0; kr = vm_map_enter(map, addr, - round_page_32(copy->size), - (vm_offset_t) 0, - TRUE, + vm_map_round_page(copy->size), + (vm_map_offset_t) 0, + VM_FLAGS_ANYWHERE, VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, @@ -4909,21 +5457,20 @@ vm_map_copyout_kernel_buffer( VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) - return(kr); + return kr; } /* * Copyout the data from the kernel buffer to the target map. */ - if (thr_act->map == map) { + if (thread->map == map) { /* * If the target map is the current map, just do * the copy. */ - if (copyout((char *)copy->cpy_kdata, (char *)*addr, - copy->size)) { - return(KERN_INVALID_ADDRESS); + if (copyout(copy->cpy_kdata, *addr, copy->size)) { + kr = KERN_INVALID_ADDRESS; } } else { @@ -4937,18 +5484,34 @@ vm_map_copyout_kernel_buffer( vm_map_reference(map); oldmap = vm_map_switch(map); - if (copyout((char *)copy->cpy_kdata, (char *)*addr, - copy->size)) { - return(KERN_INVALID_ADDRESS); + if (copyout(copy->cpy_kdata, *addr, copy->size)) { + vm_map_copyout_kernel_buffer_failures++; + kr = KERN_INVALID_ADDRESS; } (void) vm_map_switch(oldmap); vm_map_deallocate(map); } - kfree((vm_offset_t)copy, copy->cpy_kalloc_size); + if (kr != KERN_SUCCESS) { + /* the copy failed, clean up */ + if (!overwrite) { + /* + * Deallocate the space we allocated in the target map. + */ + (void) vm_map_remove(map, + vm_map_trunc_page(*addr), + vm_map_round_page(*addr + + vm_map_round_page(copy->size)), + VM_MAP_NO_FLAGS); + *addr = 0; + } + } else { + /* copy was successful, dicard the copy structure */ + kfree(copy, copy->cpy_kalloc_size); + } - return(kr); + return kr; } /* @@ -4976,7 +5539,7 @@ MACRO_BEGIN \ ->vme_prev = VMCI_where; \ VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \ UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \ - zfree(vm_map_copy_zone, (vm_offset_t) VMCI_copy); \ + zfree(vm_map_copy_zone, VMCI_copy); \ MACRO_END /* @@ -4991,13 +5554,13 @@ MACRO_END */ kern_return_t vm_map_copyout( - register vm_map_t dst_map, - vm_offset_t *dst_addr, /* OUT */ - register vm_map_copy_t copy) + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy) { - vm_size_t size; - vm_size_t adjustment; - vm_offset_t start; + vm_map_size_t size; + vm_map_size_t adjustment; + vm_map_offset_t start; vm_object_offset_t vm_copy_start; vm_map_entry_t last; register @@ -5022,20 +5585,20 @@ vm_map_copyout( kern_return_t kr; vm_object_offset_t offset; - offset = trunc_page_64(copy->offset); - size = round_page_32(copy->size + - (vm_size_t)(copy->offset - offset)); + offset = vm_object_trunc_page(copy->offset); + size = vm_map_round_page(copy->size + + (vm_map_size_t)(copy->offset - offset)); *dst_addr = 0; kr = vm_map_enter(dst_map, dst_addr, size, - (vm_offset_t) 0, TRUE, + (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, object, offset, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) return(kr); /* Account for non-pagealigned copy object */ - *dst_addr += (vm_offset_t)(copy->offset - offset); - zfree(vm_map_copy_zone, (vm_offset_t) copy); + *dst_addr += (vm_map_offset_t)(copy->offset - offset); + zfree(vm_map_copy_zone, copy); return(KERN_SUCCESS); } @@ -5053,8 +5616,8 @@ vm_map_copyout( * Find space for the data */ - vm_copy_start = trunc_page_64(copy->offset); - size = round_page_32((vm_size_t)copy->offset + copy->size) + vm_copy_start = vm_object_trunc_page(copy->offset); + size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) - vm_copy_start; StartAgain: ; @@ -5066,7 +5629,7 @@ vm_map_copyout( while (TRUE) { vm_map_entry_t next = last->vme_next; - vm_offset_t end = start + size; + vm_map_offset_t end = start + size; if ((end > dst_map->max_offset) || (end < start)) { if (dst_map->wait_for_space) { @@ -5074,7 +5637,7 @@ vm_map_copyout( assert_wait((event_t) dst_map, THREAD_INTERRUPTIBLE); vm_map_unlock(dst_map); - thread_block((void (*)(void))0); + thread_block(THREAD_CONTINUE_NULL); goto StartAgain; } } @@ -5133,7 +5696,7 @@ vm_map_copyout( vm_map_copy_last_entry(copy), new); next = entry->vme_next; - zfree(old_zone, (vm_offset_t) entry); + zfree(old_zone, entry); entry = next; } } @@ -5160,7 +5723,7 @@ vm_map_copyout( * map the pages into the destination map. */ if (entry->wired_count != 0) { - register vm_offset_t va; + register vm_map_offset_t va; vm_object_offset_t offset; register vm_object_t object; @@ -5201,6 +5764,16 @@ vm_map_copyout( m->absent) panic("vm_map_copyout: wiring 0x%x", m); + /* + * ENCRYPTED SWAP: + * The page is assumed to be wired here, so it + * shouldn't be encrypted. Otherwise, we + * couldn't enter it in the page table, since + * we don't want the user to see the encrypted + * data. + */ + ASSERT_PAGE_DECRYPTED(m); + m->busy = TRUE; vm_object_unlock(object); @@ -5222,7 +5795,7 @@ vm_map_copyout( } else if (size <= vm_map_aggressive_enter_max) { - register vm_offset_t va; + register vm_map_offset_t va; vm_object_offset_t offset; register vm_object_t object; vm_prot_t prot; @@ -5243,10 +5816,20 @@ vm_map_copyout( vm_object_lock(object); vm_object_paging_begin(object); + /* + * ENCRYPTED SWAP: + * If the page is encrypted, skip it: + * we can't let the user see the + * encrypted contents. The page will + * get decrypted on demand when the + * user generates a soft-fault when + * trying to access it. + */ if (((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) && !m->busy && !m->fictitious && + !m->encrypted && !m->absent && !m->error) { m->busy = TRUE; vm_object_unlock(object); @@ -5309,9 +5892,6 @@ vm_map_copyout( return(KERN_SUCCESS); } -boolean_t vm_map_aggressive_enter; /* not used yet */ - - /* * Routine: vm_map_copyin * @@ -5333,23 +5913,21 @@ boolean_t vm_map_aggressive_enter; /* not used yet */ typedef struct submap_map { vm_map_t parent_map; - vm_offset_t base_start; - vm_offset_t base_end; + vm_map_offset_t base_start; + vm_map_offset_t base_end; struct submap_map *next; } submap_map_t; kern_return_t vm_map_copyin_common( vm_map_t src_map, - vm_offset_t src_addr, - vm_size_t len, + vm_map_address_t src_addr, + vm_map_size_t len, boolean_t src_destroy, - boolean_t src_volatile, + __unused boolean_t src_volatile, vm_map_copy_t *copy_result, /* OUT */ boolean_t use_maxprot) { - extern int msg_ool_size_small; - vm_map_entry_t tmp_entry; /* Result of last map lookup -- * in multi-level lookup, this * entry contains the actual @@ -5358,22 +5936,18 @@ vm_map_copyin_common( register vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */ - vm_offset_t src_start; /* Start of current entry -- + vm_map_offset_t src_start; /* Start of current entry -- * where copy is taking place now */ - vm_offset_t src_end; /* End of entire region to be + vm_map_offset_t src_end; /* End of entire region to be * copied */ - vm_offset_t base_start; /* submap fields to save offsets */ - /* in original map */ - vm_offset_t base_end; - vm_map_t base_map=src_map; - vm_map_entry_t base_entry; + vm_map_t base_map = src_map; boolean_t map_share=FALSE; submap_map_t *parent_maps = NULL; register vm_map_copy_t copy; /* Resulting copy */ - vm_offset_t copy_addr; + vm_map_address_t copy_addr; /* * Check for copies of zero bytes. @@ -5404,8 +5978,8 @@ vm_map_copyin_common( /* * Compute (page aligned) start and end of region */ - src_start = trunc_page_32(src_addr); - src_end = round_page_32(src_end); + src_start = vm_map_trunc_page(src_addr); + src_end = vm_map_round_page(src_end); XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0); @@ -5437,13 +6011,13 @@ vm_map_copyin_common( vm_map_copy_entry_dispose(copy,new_entry); \ vm_map_copy_discard(copy); \ { \ - submap_map_t *ptr; \ + submap_map_t *_ptr; \ \ - for(ptr = parent_maps; ptr != NULL; ptr = parent_maps) { \ + for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \ parent_maps=parent_maps->next; \ - if (ptr->parent_map != base_map) \ - vm_map_deallocate(ptr->parent_map); \ - kfree((vm_offset_t)ptr, sizeof(submap_map_t)); \ + if (_ptr->parent_map != base_map) \ + vm_map_deallocate(_ptr->parent_map); \ + kfree(_ptr, sizeof(submap_map_t)); \ } \ } \ MACRO_RETURN(x); \ @@ -5470,7 +6044,7 @@ vm_map_copyin_common( while (TRUE) { register vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */ - vm_size_t src_size; /* Size of source + vm_map_size_t src_size; /* Size of source * map entry (in both * maps) */ @@ -5494,7 +6068,7 @@ vm_map_copyin_common( * copy_strategically. */ while(tmp_entry->is_sub_map) { - vm_size_t submap_len; + vm_map_size_t submap_len; submap_map_t *ptr; ptr = (submap_map_t *)kalloc(sizeof(submap_map_t)); @@ -5751,9 +6325,9 @@ RestartCopy: src_entry = tmp_entry; vm_map_clip_start(src_map, src_entry, src_start); - if ((src_entry->protection & VM_PROT_READ == VM_PROT_NONE && - !use_maxprot) || - src_entry->max_protection & VM_PROT_READ == 0) + if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) && + !use_maxprot) || + ((src_entry->max_protection & VM_PROT_READ) == 0)) goto VerificationFailed; if (src_entry->vme_end < new_entry->vme_end) @@ -5820,7 +6394,7 @@ RestartCopy: !vm_map_lookup_entry( src_map, src_start, &tmp_entry)) RETURN(KERN_INVALID_ADDRESS); - kfree((vm_offset_t)ptr, sizeof(submap_map_t)); + kfree(ptr, sizeof(submap_map_t)); if(parent_maps == NULL) map_share = FALSE; src_entry = tmp_entry->vme_prev; @@ -5846,11 +6420,12 @@ RestartCopy: */ if (src_destroy) { (void) vm_map_delete(src_map, - trunc_page_32(src_addr), + vm_map_trunc_page(src_addr), src_end, (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : - VM_MAP_NO_FLAGS); + VM_MAP_NO_FLAGS, + VM_MAP_NULL); } vm_map_unlock(src_map); @@ -5898,7 +6473,6 @@ vm_map_copyin_object( copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); copy->type = VM_MAP_COPY_OBJECT; copy->cpy_object = object; - copy->cpy_index = 0; copy->offset = offset; copy->size = size; @@ -5906,7 +6480,7 @@ vm_map_copyin_object( return(KERN_SUCCESS); } -void +static void vm_map_fork_share( vm_map_t old_map, vm_map_entry_t old_entry, @@ -5914,7 +6488,6 @@ vm_map_fork_share( { vm_object_t object; vm_map_entry_t new_entry; - kern_return_t result; /* * New sharing code. New map entry @@ -5931,6 +6504,8 @@ vm_map_fork_share( assert(old_entry->wired_count == 0); #ifndef i386 if(old_entry->use_pmap) { + kern_return_t result; + result = pmap_nest(new_map->pmap, (old_entry->object.sub_map)->pmap, (addr64_t)old_entry->vme_start, @@ -5941,7 +6516,7 @@ vm_map_fork_share( } #endif } else if (object == VM_OBJECT_NULL) { - object = vm_object_allocate((vm_size_t)(old_entry->vme_end - + object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - old_entry->vme_start)); old_entry->offset = 0; old_entry->object.vm_object = object; @@ -5962,7 +6537,7 @@ vm_map_fork_share( (!object->true_share && /* case 3 */ !old_entry->is_shared && (object->size > - (vm_size_t)(old_entry->vme_end - + (vm_map_size_t)(old_entry->vme_end - old_entry->vme_start)))) { /* @@ -6045,7 +6620,7 @@ vm_map_fork_share( assert(!(object->shadowed && old_entry->is_shared)); vm_object_shadow(&old_entry->object.vm_object, &old_entry->offset, - (vm_size_t) (old_entry->vme_end - + (vm_map_size_t) (old_entry->vme_end - old_entry->vme_start)); /* @@ -6131,15 +6706,15 @@ vm_map_fork_share( } } -boolean_t +static boolean_t vm_map_fork_copy( vm_map_t old_map, vm_map_entry_t *old_entry_p, vm_map_t new_map) { vm_map_entry_t old_entry = *old_entry_p; - vm_size_t entry_size = old_entry->vme_end - old_entry->vme_start; - vm_offset_t start = old_entry->vme_start; + vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start; + vm_map_offset_t start = old_entry->vme_start; vm_map_copy_t copy; vm_map_entry_t last = vm_map_last_entry(new_map); @@ -6211,10 +6786,10 @@ vm_map_t vm_map_fork( vm_map_t old_map) { - pmap_t new_pmap = pmap_create((vm_size_t) 0); + pmap_t new_pmap = pmap_create((vm_map_size_t) 0); vm_map_t new_map; vm_map_entry_t old_entry; - vm_size_t new_size = 0, entry_size; + vm_map_size_t new_size = 0, entry_size; vm_map_entry_t new_entry; boolean_t src_needs_copy; boolean_t new_entry_needs_copy; @@ -6350,28 +6925,28 @@ vm_map_fork( kern_return_t vm_map_lookup_locked( vm_map_t *var_map, /* IN/OUT */ - register vm_offset_t vaddr, - register vm_prot_t fault_type, + vm_map_offset_t vaddr, + vm_prot_t fault_type, vm_map_version_t *out_version, /* OUT */ vm_object_t *object, /* OUT */ vm_object_offset_t *offset, /* OUT */ vm_prot_t *out_prot, /* OUT */ boolean_t *wired, /* OUT */ int *behavior, /* OUT */ - vm_object_offset_t *lo_offset, /* OUT */ - vm_object_offset_t *hi_offset, /* OUT */ - vm_map_t *pmap_map) + vm_map_offset_t *lo_offset, /* OUT */ + vm_map_offset_t *hi_offset, /* OUT */ + vm_map_t *real_map) { vm_map_entry_t entry; register vm_map_t map = *var_map; vm_map_t old_map = *var_map; vm_map_t cow_sub_map_parent = VM_MAP_NULL; - vm_offset_t cow_parent_vaddr; - vm_offset_t old_start; - vm_offset_t old_end; + vm_map_offset_t cow_parent_vaddr = 0; + vm_map_offset_t old_start = 0; + vm_map_offset_t old_end = 0; register vm_prot_t prot; - *pmap_map = map; + *real_map = map; RetryLookup: ; /* @@ -6394,9 +6969,9 @@ vm_map_lookup_locked( if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { if((cow_sub_map_parent) && (cow_sub_map_parent != map)) vm_map_unlock(cow_sub_map_parent); - if((*pmap_map != map) - && (*pmap_map != cow_sub_map_parent)) - vm_map_unlock(*pmap_map); + if((*real_map != map) + && (*real_map != cow_sub_map_parent)) + vm_map_unlock(*real_map); return KERN_INVALID_ADDRESS; } @@ -6414,29 +6989,28 @@ vm_map_lookup_locked( submap_recurse: if (entry->is_sub_map) { - vm_offset_t local_vaddr; - vm_offset_t end_delta; - vm_offset_t start_delta; - vm_offset_t object_start_delta; + vm_map_offset_t local_vaddr; + vm_map_offset_t end_delta; + vm_map_offset_t start_delta; vm_map_entry_t submap_entry; boolean_t mapped_needs_copy=FALSE; local_vaddr = vaddr; if ((!entry->needs_copy) && (entry->use_pmap)) { - /* if pmap_map equals map we unlock below */ - if ((*pmap_map != map) && - (*pmap_map != cow_sub_map_parent)) - vm_map_unlock(*pmap_map); - *pmap_map = entry->object.sub_map; + /* if real_map equals map we unlock below */ + if ((*real_map != map) && + (*real_map != cow_sub_map_parent)) + vm_map_unlock(*real_map); + *real_map = entry->object.sub_map; } if(entry->needs_copy) { if (!mapped_needs_copy) { if (vm_map_lock_read_to_write(map)) { vm_map_lock_read(map); - if(*pmap_map == entry->object.sub_map) - *pmap_map = map; + if(*real_map == entry->object.sub_map) + *real_map = map; goto RetryLookup; } vm_map_lock_read(entry->object.sub_map); @@ -6451,7 +7025,7 @@ submap_recurse: } else { vm_map_lock_read(entry->object.sub_map); if((cow_sub_map_parent != map) && - (*pmap_map != map)) + (*real_map != map)) vm_map_unlock(map); } } else { @@ -6461,7 +7035,7 @@ submap_recurse: /* follow the maps down to the object */ /* here we unlock knowing we are not */ /* revisiting the map. */ - if((*pmap_map != map) && (map != cow_sub_map_parent)) + if((*real_map != map) && (map != cow_sub_map_parent)) vm_map_unlock_read(map); } @@ -6475,11 +7049,11 @@ RetrySubMap: if((cow_sub_map_parent) && (cow_sub_map_parent != map)){ vm_map_unlock(cow_sub_map_parent); } - if((*pmap_map != map) - && (*pmap_map != cow_sub_map_parent)) { - vm_map_unlock(*pmap_map); + if((*real_map != map) + && (*real_map != cow_sub_map_parent)) { + vm_map_unlock(*real_map); } - *pmap_map = map; + *real_map = map; return KERN_INVALID_ADDRESS; } /* find the attenuated shadow of the underlying object */ @@ -6514,8 +7088,8 @@ RetrySubMap: if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) { vm_object_t copy_object; - vm_offset_t local_start; - vm_offset_t local_end; + vm_map_offset_t local_start; + vm_map_offset_t local_end; boolean_t copied_slowly = FALSE; if (vm_map_lock_read_to_write(map)) { @@ -6529,10 +7103,10 @@ RetrySubMap: if (submap_entry->object.vm_object == VM_OBJECT_NULL) { submap_entry->object.vm_object = vm_object_allocate( - (vm_size_t) + (vm_map_size_t) (submap_entry->vme_end - submap_entry->vme_start)); - submap_entry->offset = 0; + submap_entry->offset = 0; } local_start = local_vaddr - (cow_parent_vaddr - old_start); @@ -6636,7 +7210,7 @@ RetrySubMap: vm_map_lock_write_to_read(map); } else { if((cow_sub_map_parent) - && (cow_sub_map_parent != *pmap_map) + && (cow_sub_map_parent != *real_map) && (cow_sub_map_parent != map)) { vm_map_unlock(cow_sub_map_parent); } @@ -6652,10 +7226,10 @@ RetrySubMap: prot = entry->protection; if ((fault_type & (prot)) != fault_type) { - if (*pmap_map != map) { - vm_map_unlock(*pmap_map); + if (*real_map != map) { + vm_map_unlock(*real_map); } - *pmap_map = map; + *real_map = map; return KERN_PROTECTION_FAILURE; } @@ -6664,7 +7238,8 @@ RetrySubMap: * it for all possible accesses. */ - if (*wired = (entry->wired_count != 0)) + *wired = (entry->wired_count != 0); + if (*wired) prot = fault_type = entry->protection; /* @@ -6680,7 +7255,7 @@ RetrySubMap: * demote the permissions allowed. */ - if (fault_type & VM_PROT_WRITE || *wired) { + if ((fault_type & VM_PROT_WRITE) || *wired) { /* * Make a new object, and place it in the * object chain. Note that no new references @@ -6694,7 +7269,7 @@ RetrySubMap: } vm_object_shadow(&entry->object.vm_object, &entry->offset, - (vm_size_t) (entry->vme_end - + (vm_map_size_t) (entry->vme_end - entry->vme_start)); entry->object.vm_object->shadowed = TRUE; @@ -6722,7 +7297,7 @@ RetrySubMap: } entry->object.vm_object = vm_object_allocate( - (vm_size_t)(entry->vme_end - entry->vme_start)); + (vm_map_size_t)(entry->vme_end - entry->vme_start)); entry->offset = 0; vm_map_lock_write_to_read(map); } @@ -6789,6 +7364,300 @@ vm_map_verify( */ +/* + * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY + * Goes away after regular vm_region_recurse function migrates to + * 64 bits + * vm_region_recurse: A form of vm_region which follows the + * submaps in a target map + * + */ + +kern_return_t +vm_map_region_recurse_64( + vm_map_t map, + vm_map_offset_t *address, /* IN/OUT */ + vm_map_size_t *size, /* OUT */ + natural_t *nesting_depth, /* IN/OUT */ + vm_region_submap_info_64_t submap_info, /* IN/OUT */ + mach_msg_type_number_t *count) /* IN/OUT */ +{ + vm_region_extended_info_data_t extended; + vm_map_entry_t tmp_entry; + vm_map_offset_t user_address; + unsigned int user_max_depth; + + /* + * "curr_entry" is the VM map entry preceding or including the + * address we're looking for. + * "curr_map" is the map or sub-map containing "curr_entry". + * "curr_offset" is the cumulated offset of "curr_map" in the + * target task's address space. + * "curr_depth" is the depth of "curr_map" in the chain of + * sub-maps. + * "curr_max_offset" is the maximum offset we should take into + * account in the current map. It may be smaller than the current + * map's "max_offset" because we might not have mapped it all in + * the upper level map. + */ + vm_map_entry_t curr_entry; + vm_map_offset_t curr_offset; + vm_map_t curr_map; + unsigned int curr_depth; + vm_map_offset_t curr_max_offset; + + /* + * "next_" is the same as "curr_" but for the VM region immediately + * after the address we're looking for. We need to keep track of this + * too because we want to return info about that region if the + * address we're looking for is not mapped. + */ + vm_map_entry_t next_entry; + vm_map_offset_t next_offset; + vm_map_t next_map; + unsigned int next_depth; + vm_map_offset_t next_max_offset; + + if (map == VM_MAP_NULL) { + /* no address space to work on */ + return KERN_INVALID_ARGUMENT; + } + + if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { + /* "info" structure is not big enough and would overflow */ + return KERN_INVALID_ARGUMENT; + } + + *count = VM_REGION_SUBMAP_INFO_COUNT_64; + + user_address = *address; + user_max_depth = *nesting_depth; + + curr_entry = NULL; + curr_map = map; + curr_offset = 0; + curr_depth = 0; + curr_max_offset = curr_map->max_offset; + + next_entry = NULL; + next_map = NULL; + next_offset = 0; + next_depth = 0; + next_max_offset = curr_max_offset; + + if (not_in_kdp) { + vm_map_lock_read(curr_map); + } + + for (;;) { + if (vm_map_lookup_entry(curr_map, + user_address - curr_offset, + &tmp_entry)) { + /* tmp_entry contains the address we're looking for */ + curr_entry = tmp_entry; + } else { + /* + * The address is not mapped. "tmp_entry" is the + * map entry preceding the address. We want the next + * one, if it exists. + */ + curr_entry = tmp_entry->vme_next; + if (curr_entry == vm_map_to_entry(curr_map) || + curr_entry->vme_start >= curr_max_offset) { + /* no next entry at this level: stop looking */ + if (not_in_kdp) { + vm_map_unlock_read(curr_map); + } + curr_entry = NULL; + curr_map = NULL; + curr_offset = 0; + curr_depth = 0; + curr_max_offset = 0; + break; + } + } + + /* + * Is the next entry at this level closer to the address (or + * deeper in the submap chain) than the one we had + * so far ? + */ + tmp_entry = curr_entry->vme_next; + if (tmp_entry == vm_map_to_entry(curr_map)) { + /* no next entry at this level */ + } else if (tmp_entry->vme_start >= curr_max_offset) { + /* + * tmp_entry is beyond the scope of what we mapped of + * this submap in the upper level: ignore it. + */ + } else if ((next_entry == NULL) || + (tmp_entry->vme_start + curr_offset <= + next_entry->vme_start + next_offset)) { + /* + * We didn't have a "next_entry" or this one is + * closer to the address we're looking for: + * use this "tmp_entry" as the new "next_entry". + */ + if (next_entry != NULL) { + /* unlock the last "next_map" */ + if (next_map != curr_map && not_in_kdp) { + vm_map_unlock_read(next_map); + } + } + next_entry = tmp_entry; + next_map = curr_map; + next_offset = curr_offset; + next_depth = curr_depth; + next_max_offset = curr_max_offset; + } + + if (!curr_entry->is_sub_map || + curr_depth >= user_max_depth) { + /* + * We hit a leaf map or we reached the maximum depth + * we could, so stop looking. Keep the current map + * locked. + */ + break; + } + + /* + * Get down to the next submap level. + */ + + /* + * Lock the next level and unlock the current level, + * unless we need to keep it locked to access the "next_entry" + * later. + */ + if (not_in_kdp) { + vm_map_lock_read(curr_entry->object.sub_map); + } + if (curr_map == next_map) { + /* keep "next_map" locked in case we need it */ + } else { + /* release this map */ + vm_map_unlock_read(curr_map); + } + + /* + * Adjust the offset. "curr_entry" maps the submap + * at relative address "curr_entry->vme_start" in the + * curr_map but skips the first "curr_entry->offset" + * bytes of the submap. + * "curr_offset" always represents the offset of a virtual + * address in the curr_map relative to the absolute address + * space (i.e. the top-level VM map). + */ + curr_offset += + (curr_entry->vme_start - curr_entry->offset); + /* switch to the submap */ + curr_map = curr_entry->object.sub_map; + curr_depth++; + /* + * "curr_max_offset" allows us to keep track of the + * portion of the submap that is actually mapped at this level: + * the rest of that submap is irrelevant to us, since it's not + * mapped here. + * The relevant portion of the map starts at + * "curr_entry->offset" up to the size of "curr_entry". + */ + curr_max_offset = + curr_entry->vme_end - curr_entry->vme_start + + curr_entry->offset; + curr_entry = NULL; + } + + if (curr_entry == NULL) { + /* no VM region contains the address... */ + if (next_entry == NULL) { + /* ... and no VM region follows it either */ + return KERN_INVALID_ADDRESS; + } + /* ... gather info about the next VM region */ + curr_entry = next_entry; + curr_map = next_map; /* still locked ... */ + curr_offset = next_offset; + curr_depth = next_depth; + curr_max_offset = next_max_offset; + } else { + /* we won't need "next_entry" after all */ + if (next_entry != NULL) { + /* release "next_map" */ + if (next_map != curr_map && not_in_kdp) { + vm_map_unlock_read(next_map); + } + } + } + next_entry = NULL; + next_map = NULL; + next_offset = 0; + next_depth = 0; + next_max_offset = 0; + + *nesting_depth = curr_depth; + *size = curr_entry->vme_end - curr_entry->vme_start; + *address = curr_entry->vme_start + curr_offset; + + submap_info->user_tag = curr_entry->alias; + submap_info->offset = curr_entry->offset; + submap_info->protection = curr_entry->protection; + submap_info->inheritance = curr_entry->inheritance; + submap_info->max_protection = curr_entry->max_protection; + submap_info->behavior = curr_entry->behavior; + submap_info->user_wired_count = curr_entry->user_wired_count; + submap_info->is_submap = curr_entry->is_sub_map; + submap_info->object_id = (uint32_t) curr_entry->object.vm_object; + + extended.pages_resident = 0; + extended.pages_swapped_out = 0; + extended.pages_shared_now_private = 0; + extended.pages_dirtied = 0; + extended.external_pager = 0; + extended.shadow_depth = 0; + + if (not_in_kdp) { + if (!curr_entry->is_sub_map) { + vm_map_region_walk(curr_map, + curr_entry->vme_start, + curr_entry, + curr_entry->offset, + (curr_entry->vme_end - + curr_entry->vme_start), + &extended); + submap_info->share_mode = extended.share_mode; + if (extended.external_pager && + extended.ref_count == 2 && + extended.share_mode == SM_SHARED) { + submap_info->share_mode = SM_PRIVATE; + } + submap_info->ref_count = extended.ref_count; + } else { + if (curr_entry->use_pmap) { + submap_info->share_mode = SM_TRUESHARED; + } else { + submap_info->share_mode = SM_PRIVATE; + } + submap_info->ref_count = + curr_entry->object.sub_map->ref_count; + } + } + + submap_info->pages_resident = extended.pages_resident; + submap_info->pages_swapped_out = extended.pages_swapped_out; + submap_info->pages_shared_now_private = + extended.pages_shared_now_private; + submap_info->pages_dirtied = extended.pages_dirtied; + submap_info->external_pager = extended.external_pager; + submap_info->shadow_depth = extended.shadow_depth; + + if (not_in_kdp) { + vm_map_unlock_read(curr_map); + } + + return KERN_SUCCESS; +} + /* * vm_region: * @@ -6799,36 +7668,32 @@ vm_map_verify( * XXX The reserved and behavior fields cannot be filled * in until the vm merge from the IK is completed, and * vm_reserve is implemented. - * - * XXX Dependency: syscall_vm_region() also supports only one flavor. */ kern_return_t -vm_region( +vm_map_region( vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t *size, /* OUT */ + vm_map_offset_t *address, /* IN/OUT */ + vm_map_size_t *size, /* OUT */ vm_region_flavor_t flavor, /* IN */ vm_region_info_t info, /* OUT */ - mach_msg_type_number_t *count, /* IN/OUT */ - ipc_port_t *object_name) /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ { vm_map_entry_t tmp_entry; - register vm_map_entry_t entry; - register - vm_offset_t start; - vm_region_basic_info_t basic; - vm_region_extended_info_t extended; - vm_region_top_info_t top; + vm_map_offset_t start; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); switch (flavor) { - + case VM_REGION_BASIC_INFO: + /* legacy for old 32-bit objects info */ { + vm_region_basic_info_t basic; + if (*count < VM_REGION_BASIC_INFO_COUNT) return(KERN_INVALID_ARGUMENT); @@ -6849,6 +7714,51 @@ vm_region( start = entry->vme_start; + basic->offset = (uint32_t)entry->offset; + basic->protection = entry->protection; + basic->inheritance = entry->inheritance; + basic->max_protection = entry->max_protection; + basic->behavior = entry->behavior; + basic->user_wired_count = entry->user_wired_count; + basic->reserved = entry->is_sub_map; + *address = start; + *size = (entry->vme_end - start); + + if (object_name) *object_name = IP_NULL; + if (entry->is_sub_map) { + basic->shared = FALSE; + } else { + basic->shared = entry->is_shared; + } + + vm_map_unlock_read(map); + return(KERN_SUCCESS); + } + + case VM_REGION_BASIC_INFO_64: + { + vm_region_basic_info_64_t basic; + + if (*count < VM_REGION_BASIC_INFO_COUNT_64) + return(KERN_INVALID_ARGUMENT); + + basic = (vm_region_basic_info_64_t) info; + *count = VM_REGION_BASIC_INFO_COUNT_64; + + vm_map_lock_read(map); + + start = *address; + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { + vm_map_unlock_read(map); + return(KERN_INVALID_ADDRESS); + } + } else { + entry = tmp_entry; + } + + start = entry->vme_start; + basic->offset = entry->offset; basic->protection = entry->protection; basic->inheritance = entry->inheritance; @@ -6871,6 +7781,7 @@ vm_region( } case VM_REGION_EXTENDED_INFO: { + vm_region_extended_info_t extended; if (*count < VM_REGION_EXTENDED_INFO_COUNT) return(KERN_INVALID_ARGUMENT); @@ -6900,7 +7811,7 @@ vm_region( extended->external_pager = 0; extended->shadow_depth = 0; - vm_region_walk(entry, extended, entry->offset, entry->vme_end - start, map, start); + vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended); if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) extended->share_mode = SM_PRIVATE; @@ -6915,6 +7826,7 @@ vm_region( } case VM_REGION_TOP_INFO: { + vm_region_top_info_t top; if (*count < VM_REGION_TOP_INFO_COUNT) return(KERN_INVALID_ARGUMENT); @@ -6939,7 +7851,7 @@ vm_region( top->private_pages_resident = 0; top->shared_pages_resident = 0; - vm_region_top_walk(entry, top); + vm_map_region_top_walk(entry, top); if (object_name) *object_name = IP_NULL; @@ -6954,2185 +7866,2272 @@ vm_region( } } -/* - * vm_region_recurse: A form of vm_region which follows the - * submaps in a target map - * - */ - -kern_return_t -vm_region_recurse( - vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t *size, /* OUT */ - natural_t *nesting_depth, /* IN/OUT */ - vm_region_recurse_info_t info, /* IN/OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ +static void +vm_map_region_top_walk( + vm_map_entry_t entry, + vm_region_top_info_t top) { - vm_map_entry_t tmp_entry; - register - vm_map_entry_t entry; - register - vm_offset_t start; - - unsigned int recurse_count; - vm_map_t submap; - vm_map_t base_map; - vm_map_entry_t base_entry; - vm_offset_t base_next; - vm_offset_t base_addr; - vm_offset_t baddr_start_delta; - vm_region_submap_info_t submap_info; - vm_region_extended_info_data_t extended; - - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); - - submap_info = (vm_region_submap_info_t) info; - *count = VM_REGION_SUBMAP_INFO_COUNT; - - if (*count < VM_REGION_SUBMAP_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); - - start = *address; - base_map = map; - recurse_count = *nesting_depth; + register struct vm_object *obj, *tmp_obj; + register int ref_count; -LOOKUP_NEXT_BASE_ENTRY: - vm_map_lock_read(map); - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; + if (entry->object.vm_object == 0 || entry->is_sub_map) { + top->share_mode = SM_EMPTY; + top->ref_count = 0; + top->obj_id = 0; + return; } - *size = entry->vme_end - entry->vme_start; - start = entry->vme_start; - base_addr = start; - baddr_start_delta = *address - start; - base_next = entry->vme_end; - base_entry = entry; - - while(entry->is_sub_map && recurse_count) { - recurse_count--; - vm_map_lock_read(entry->object.sub_map); + { + obj = entry->object.vm_object; + vm_object_lock(obj); - if(entry == base_entry) { - start = entry->offset; - start += *address - entry->vme_start; - } + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; - submap = entry->object.sub_map; - vm_map_unlock_read(map); - map = submap; + if (obj->shadow) { + if (ref_count == 1) + top->private_pages_resident = obj->resident_page_count; + else + top->shared_pages_resident = obj->resident_page_count; + top->ref_count = ref_count; + top->share_mode = SM_COW; + + while ((tmp_obj = obj->shadow)) { + vm_object_lock(tmp_obj); + vm_object_unlock(obj); + obj = tmp_obj; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) - == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - } else { - entry = tmp_entry; + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + top->shared_pages_resident += obj->resident_page_count; + top->ref_count += ref_count - 1; } - if(start <= entry->vme_start) { - vm_offset_t old_start = start; - if(baddr_start_delta) { - base_addr += (baddr_start_delta); - *size -= baddr_start_delta; - baddr_start_delta = 0; - } - if(base_next <= - (base_addr += (entry->vme_start - start))) { - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - *size -= entry->vme_start - start; - if (*size > (entry->vme_end - entry->vme_start)) { - *size = entry->vme_end - entry->vme_start; - } - start = 0; + } else { + if (entry->needs_copy) { + top->share_mode = SM_COW; + top->shared_pages_resident = obj->resident_page_count; } else { - if(baddr_start_delta) { - if((start - entry->vme_start) - < baddr_start_delta) { - base_addr += start - entry->vme_start; - *size -= start - entry->vme_start; - } else { - base_addr += baddr_start_delta; - *size += baddr_start_delta; - } - baddr_start_delta = 0; - } - base_addr += entry->vme_start; - if(base_addr >= base_next) { - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - if (*size > (entry->vme_end - start)) - *size = entry->vme_end - start; - - start = entry->vme_start - start; + if (ref_count == 1 || + (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { + top->share_mode = SM_PRIVATE; + top->private_pages_resident = obj->resident_page_count; + } else { + top->share_mode = SM_SHARED; + top->shared_pages_resident = obj->resident_page_count; + } } + top->ref_count = ref_count; + } + top->obj_id = (int)obj; - start += entry->offset; - + vm_object_unlock(obj); } - *nesting_depth -= recurse_count; - if(entry != base_entry) { - start = entry->vme_start + (start - entry->offset); +} + +static void +vm_map_region_walk( + vm_map_t map, + vm_map_offset_t va, + vm_map_entry_t entry, + vm_object_offset_t offset, + vm_object_size_t range, + vm_region_extended_info_t extended) +{ + register struct vm_object *obj, *tmp_obj; + register vm_map_offset_t last_offset; + register int i; + register int ref_count; + struct vm_object *shadow_object; + int shadow_depth; + + if ((entry->object.vm_object == 0) || + (entry->is_sub_map) || + (entry->object.vm_object->phys_contiguous)) { + extended->share_mode = SM_EMPTY; + extended->ref_count = 0; + return; } + { + obj = entry->object.vm_object; + vm_object_lock(obj); - submap_info->user_tag = entry->alias; - submap_info->offset = entry->offset; - submap_info->protection = entry->protection; - submap_info->inheritance = entry->inheritance; - submap_info->max_protection = entry->max_protection; - submap_info->behavior = entry->behavior; - submap_info->user_wired_count = entry->user_wired_count; - submap_info->is_submap = entry->is_sub_map; - submap_info->object_id = (vm_offset_t)entry->object.vm_object; - *address = base_addr; + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE) + vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended); + + shadow_object = obj->shadow; + shadow_depth = 0; + if (shadow_object != VM_OBJECT_NULL) { + vm_object_lock(shadow_object); + for (; + shadow_object != VM_OBJECT_NULL; + shadow_depth++) { + vm_object_t next_shadow; + + next_shadow = shadow_object->shadow; + if (next_shadow) { + vm_object_lock(next_shadow); + } + vm_object_unlock(shadow_object); + shadow_object = next_shadow; + } + } + extended->shadow_depth = shadow_depth; - extended.pages_resident = 0; - extended.pages_swapped_out = 0; - extended.pages_shared_now_private = 0; - extended.pages_dirtied = 0; - extended.external_pager = 0; - extended.shadow_depth = 0; + if (extended->shadow_depth || entry->needs_copy) + extended->share_mode = SM_COW; + else { + if (ref_count == 1) + extended->share_mode = SM_PRIVATE; + else { + if (obj->true_share) + extended->share_mode = SM_TRUESHARED; + else + extended->share_mode = SM_SHARED; + } + } + extended->ref_count = ref_count - extended->shadow_depth; + + for (i = 0; i < extended->shadow_depth; i++) { + if ((tmp_obj = obj->shadow) == 0) + break; + vm_object_lock(tmp_obj); + vm_object_unlock(obj); - if(!entry->is_sub_map) { - vm_region_walk(entry, &extended, entry->offset, - entry->vme_end - start, map, start); - submap_info->share_mode = extended.share_mode; - if (extended.external_pager && extended.ref_count == 2 - && extended.share_mode == SM_SHARED) - submap_info->share_mode = SM_PRIVATE; - submap_info->ref_count = extended.ref_count; - } else { - if(entry->use_pmap) - submap_info->share_mode = SM_TRUESHARED; - else - submap_info->share_mode = SM_PRIVATE; - submap_info->ref_count = entry->object.sub_map->ref_count; - } + if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) + ref_count--; - submap_info->pages_resident = extended.pages_resident; - submap_info->pages_swapped_out = extended.pages_swapped_out; - submap_info->pages_shared_now_private = - extended.pages_shared_now_private; - submap_info->pages_dirtied = extended.pages_dirtied; - submap_info->external_pager = extended.external_pager; - submap_info->shadow_depth = extended.shadow_depth; + extended->ref_count += ref_count; + obj = tmp_obj; + } + vm_object_unlock(obj); - vm_map_unlock_read(map); - return(KERN_SUCCESS); + if (extended->share_mode == SM_SHARED) { + register vm_map_entry_t cur; + register vm_map_entry_t last; + int my_refs; + + obj = entry->object.vm_object; + last = vm_map_to_entry(map); + my_refs = 0; + + if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) + ref_count--; + for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) + my_refs += vm_map_region_count_obj_refs(cur, obj); + + if (my_refs == ref_count) + extended->share_mode = SM_PRIVATE_ALIASED; + else if (my_refs > 1) + extended->share_mode = SM_SHARED_ALIASED; + } + } } -/* - * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY - * Goes away after regular vm_region_recurse function migrates to - * 64 bits - * vm_region_recurse: A form of vm_region which follows the - * submaps in a target map - * - */ -kern_return_t -vm_region_recurse_64( - vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t *size, /* OUT */ - natural_t *nesting_depth, /* IN/OUT */ - vm_region_recurse_info_t info, /* IN/OUT */ - mach_msg_type_number_t *count) /* IN/OUT */ +/* object is locked on entry and locked on return */ + + +static void +vm_map_region_look_for_page( + __unused vm_map_t map, + __unused vm_map_offset_t va, + vm_object_t object, + vm_object_offset_t offset, + int max_refcnt, + int depth, + vm_region_extended_info_t extended) { - vm_map_entry_t tmp_entry; - register - vm_map_entry_t entry; - register - vm_offset_t start; - - unsigned int recurse_count; - vm_map_t submap; - vm_map_t base_map; - vm_map_entry_t base_entry; - vm_offset_t base_next; - vm_offset_t base_addr; - vm_offset_t baddr_start_delta; - vm_region_submap_info_64_t submap_info; - vm_region_extended_info_data_t extended; + register vm_page_t p; + register vm_object_t shadow; + register int ref_count; + vm_object_t caller_object; + + shadow = object->shadow; + caller_object = object; - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + + while (TRUE) { - submap_info = (vm_region_submap_info_64_t) info; - *count = VM_REGION_SUBMAP_INFO_COUNT; + if ( !(object->pager_trusted) && !(object->internal)) + extended->external_pager = 1; - if (*count < VM_REGION_SUBMAP_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + if (shadow && (max_refcnt == 1)) + extended->pages_shared_now_private++; - start = *address; - base_map = map; - recurse_count = *nesting_depth; + if (!p->fictitious && + (p->dirty || pmap_is_modified(p->phys_page))) + extended->pages_dirtied++; -LOOKUP_NEXT_BASE_ENTRY: - if (not_in_kdp) - vm_map_lock_read(map); - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - if (not_in_kdp) - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); + extended->pages_resident++; + + if(object != caller_object) + vm_object_unlock(object); + + return; } - } else { - entry = tmp_entry; - } - *size = entry->vme_end - entry->vme_start; - start = entry->vme_start; - base_addr = start; - baddr_start_delta = *address - start; - base_next = entry->vme_end; - base_entry = entry; + if (object->existence_map) { + if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { - while(entry->is_sub_map && recurse_count) { - recurse_count--; - if (not_in_kdp) - vm_map_lock_read(entry->object.sub_map); + extended->pages_swapped_out++; + if(object != caller_object) + vm_object_unlock(object); - if(entry == base_entry) { - start = entry->offset; - start += *address - entry->vme_start; + return; + } } + if (shadow) { + vm_object_lock(shadow); - submap = entry->object.sub_map; - if (not_in_kdp) - vm_map_unlock_read(map); - map = submap; - - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) - == vm_map_to_entry(map)) { - if (not_in_kdp) - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - } else { - entry = tmp_entry; + if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) + ref_count--; - } - if(start <= entry->vme_start) { - vm_offset_t old_start = start; - if(baddr_start_delta) { - base_addr += (baddr_start_delta); - *size -= baddr_start_delta; - baddr_start_delta = 0; - } - if(base_next <= - (base_addr += (entry->vme_start - start))) { - if (not_in_kdp) - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - *size -= entry->vme_start - start; - if (*size > (entry->vme_end - entry->vme_start)) { - *size = entry->vme_end - entry->vme_start; - } - start = 0; - } else { - if(baddr_start_delta) { - if((start - entry->vme_start) - < baddr_start_delta) { - base_addr += start - entry->vme_start; - *size -= start - entry->vme_start; - } else { - base_addr += baddr_start_delta; - *size += baddr_start_delta; - } - baddr_start_delta = 0; - } - base_addr += entry->vme_start; - if(base_addr >= base_next) { - if (not_in_kdp) - vm_map_unlock_read(map); - map = base_map; - start = base_next; - recurse_count = 0; - *nesting_depth = 0; - goto LOOKUP_NEXT_BASE_ENTRY; - } - if (*size > (entry->vme_end - start)) - *size = entry->vme_end - start; + if (++depth > extended->shadow_depth) + extended->shadow_depth = depth; - start = entry->vme_start - start; + if (ref_count > max_refcnt) + max_refcnt = ref_count; + + if(object != caller_object) + vm_object_unlock(object); + + offset = offset + object->shadow_offset; + object = shadow; + shadow = object->shadow; + continue; } + if(object != caller_object) + vm_object_unlock(object); + break; + } +} - start += entry->offset; +static int +vm_map_region_count_obj_refs( + vm_map_entry_t entry, + vm_object_t object) +{ + register int ref_count; + register vm_object_t chk_obj; + register vm_object_t tmp_obj; - } - *nesting_depth -= recurse_count; - if(entry != base_entry) { - start = entry->vme_start + (start - entry->offset); - } + if (entry->object.vm_object == 0) + return(0); + if (entry->is_sub_map) + return(0); + else { + ref_count = 0; - submap_info->user_tag = entry->alias; - submap_info->offset = entry->offset; - submap_info->protection = entry->protection; - submap_info->inheritance = entry->inheritance; - submap_info->max_protection = entry->max_protection; - submap_info->behavior = entry->behavior; - submap_info->user_wired_count = entry->user_wired_count; - submap_info->is_submap = entry->is_sub_map; - submap_info->object_id = (vm_offset_t)entry->object.vm_object; - *address = base_addr; + chk_obj = entry->object.vm_object; + vm_object_lock(chk_obj); + while (chk_obj) { + if (chk_obj == object) + ref_count++; + tmp_obj = chk_obj->shadow; + if (tmp_obj) + vm_object_lock(tmp_obj); + vm_object_unlock(chk_obj); - extended.pages_resident = 0; - extended.pages_swapped_out = 0; - extended.pages_shared_now_private = 0; - extended.pages_dirtied = 0; - extended.external_pager = 0; - extended.shadow_depth = 0; - - if (not_in_kdp) - if(!entry->is_sub_map) { - vm_region_walk(entry, &extended, entry->offset, - entry->vme_end - start, map, start); - submap_info->share_mode = extended.share_mode; - if (extended.external_pager && extended.ref_count == 2 - && extended.share_mode == SM_SHARED) - submap_info->share_mode = SM_PRIVATE; - submap_info->ref_count = extended.ref_count; - } else { - if(entry->use_pmap) - submap_info->share_mode = SM_TRUESHARED; - else - submap_info->share_mode = SM_PRIVATE; - submap_info->ref_count = entry->object.sub_map->ref_count; + chk_obj = tmp_obj; + } } - - submap_info->pages_resident = extended.pages_resident; - submap_info->pages_swapped_out = extended.pages_swapped_out; - submap_info->pages_shared_now_private = - extended.pages_shared_now_private; - submap_info->pages_dirtied = extended.pages_dirtied; - submap_info->external_pager = extended.external_pager; - submap_info->shadow_depth = extended.shadow_depth; - if (not_in_kdp) - vm_map_unlock_read(map); - return(KERN_SUCCESS); + return(ref_count); } /* - * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY - * Goes away after regular vm_region function migrates to - * 64 bits + * Routine: vm_map_simplify + * + * Description: + * Attempt to simplify the map representation in + * the vicinity of the given starting address. + * Note: + * This routine is intended primarily to keep the + * kernel maps more compact -- they generally don't + * benefit from the "expand a map entry" technology + * at allocation time because the adjacent entry + * is often wired down. */ - - -kern_return_t -vm_region_64( - vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t *size, /* OUT */ - vm_region_flavor_t flavor, /* IN */ - vm_region_info_t info, /* OUT */ - mach_msg_type_number_t *count, /* IN/OUT */ - ipc_port_t *object_name) /* OUT */ +void +vm_map_simplify_entry( + vm_map_t map, + vm_map_entry_t this_entry) { - vm_map_entry_t tmp_entry; - register - vm_map_entry_t entry; - register - vm_offset_t start; - vm_region_basic_info_64_t basic; - vm_region_extended_info_t extended; - vm_region_top_info_t top; - vm_region_object_info_64_t object_info_64; + vm_map_entry_t prev_entry; - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + counter(c_vm_map_simplify_entry_called++); - switch (flavor) { - - case VM_REGION_BASIC_INFO: - { - if (*count < VM_REGION_BASIC_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + prev_entry = this_entry->vme_prev; - basic = (vm_region_basic_info_64_t) info; - *count = VM_REGION_BASIC_INFO_COUNT; + if ((this_entry != vm_map_to_entry(map)) && + (prev_entry != vm_map_to_entry(map)) && - vm_map_lock_read(map); + (prev_entry->vme_end == this_entry->vme_start) && - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; - } + (prev_entry->is_sub_map == FALSE) && + (this_entry->is_sub_map == FALSE) && - start = entry->vme_start; + (prev_entry->object.vm_object == this_entry->object.vm_object) && + ((prev_entry->offset + (prev_entry->vme_end - + prev_entry->vme_start)) + == this_entry->offset) && - basic->offset = entry->offset; - basic->protection = entry->protection; - basic->inheritance = entry->inheritance; - basic->max_protection = entry->max_protection; - basic->behavior = entry->behavior; - basic->user_wired_count = entry->user_wired_count; - basic->reserved = entry->is_sub_map; - *address = start; - *size = (entry->vme_end - start); + (prev_entry->inheritance == this_entry->inheritance) && + (prev_entry->protection == this_entry->protection) && + (prev_entry->max_protection == this_entry->max_protection) && + (prev_entry->behavior == this_entry->behavior) && + (prev_entry->alias == this_entry->alias) && + (prev_entry->wired_count == this_entry->wired_count) && + (prev_entry->user_wired_count == this_entry->user_wired_count) && - if (object_name) *object_name = IP_NULL; - if (entry->is_sub_map) { - basic->shared = FALSE; - } else { - basic->shared = entry->is_shared; - } + (prev_entry->needs_copy == this_entry->needs_copy) && - vm_map_unlock_read(map); - return(KERN_SUCCESS); + (prev_entry->use_pmap == FALSE) && + (this_entry->use_pmap == FALSE) && + (prev_entry->in_transition == FALSE) && + (this_entry->in_transition == FALSE) && + (prev_entry->needs_wakeup == FALSE) && + (this_entry->needs_wakeup == FALSE) && + (prev_entry->is_shared == FALSE) && + (this_entry->is_shared == FALSE) + ) { + _vm_map_entry_unlink(&map->hdr, prev_entry); + this_entry->vme_start = prev_entry->vme_start; + this_entry->offset = prev_entry->offset; + vm_object_deallocate(prev_entry->object.vm_object); + vm_map_entry_dispose(map, prev_entry); + SAVE_HINT(map, this_entry); + counter(c_vm_map_simplified++); } - case VM_REGION_EXTENDED_INFO: - { - - if (*count < VM_REGION_EXTENDED_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); +} - extended = (vm_region_extended_info_t) info; - *count = VM_REGION_EXTENDED_INFO_COUNT; +void +vm_map_simplify( + vm_map_t map, + vm_map_offset_t start) +{ + vm_map_entry_t this_entry; - vm_map_lock_read(map); + vm_map_lock(map); + if (vm_map_lookup_entry(map, start, &this_entry)) { + vm_map_simplify_entry(map, this_entry); + vm_map_simplify_entry(map, this_entry->vme_next); + } + counter(c_vm_map_simplify_called++); + vm_map_unlock(map); +} - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; - } - start = entry->vme_start; +static void +vm_map_simplify_range( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; - extended->protection = entry->protection; - extended->user_tag = entry->alias; - extended->pages_resident = 0; - extended->pages_swapped_out = 0; - extended->pages_shared_now_private = 0; - extended->pages_dirtied = 0; - extended->external_pager = 0; - extended->shadow_depth = 0; + /* + * The map should be locked (for "write") by the caller. + */ - vm_region_walk(entry, extended, entry->offset, entry->vme_end - start, map, start); + if (start >= end) { + /* invalid address range */ + return; + } - if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) - extended->share_mode = SM_PRIVATE; + if (!vm_map_lookup_entry(map, start, &entry)) { + /* "start" is not mapped and "entry" ends before "start" */ + if (entry == vm_map_to_entry(map)) { + /* start with first entry in the map */ + entry = vm_map_first_entry(map); + } else { + /* start with next entry */ + entry = entry->vme_next; + } + } + + while (entry != vm_map_to_entry(map) && + entry->vme_start <= end) { + /* try and coalesce "entry" with its previous entry */ + vm_map_simplify_entry(map, entry); + entry = entry->vme_next; + } +} - if (object_name) - *object_name = IP_NULL; - *address = start; - *size = (entry->vme_end - start); - vm_map_unlock_read(map); - return(KERN_SUCCESS); - } - case VM_REGION_TOP_INFO: - { +/* + * Routine: vm_map_machine_attribute + * Purpose: + * Provide machine-specific attributes to mappings, + * such as cachability etc. for machines that provide + * them. NUMA architectures and machines with big/strange + * caches will use this. + * Note: + * Responsibilities for locking and checking are handled here, + * everything else in the pmap module. If any non-volatile + * information must be kept, the pmap module should handle + * it itself. [This assumes that attributes do not + * need to be inherited, which seems ok to me] + */ +kern_return_t +vm_map_machine_attribute( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_machine_attribute_t attribute, + vm_machine_attribute_val_t* value) /* IN/OUT */ +{ + kern_return_t ret; + vm_map_size_t sync_size; + vm_map_entry_t entry; + + if (start < vm_map_min(map) || end > vm_map_max(map)) + return KERN_INVALID_ADDRESS; - if (*count < VM_REGION_TOP_INFO_COUNT) - return(KERN_INVALID_ARGUMENT); + /* Figure how much memory we need to flush (in page increments) */ + sync_size = end - start; - top = (vm_region_top_info_t) info; - *count = VM_REGION_TOP_INFO_COUNT; + vm_map_lock(map); + + if (attribute != MATTR_CACHE) { + /* If we don't have to find physical addresses, we */ + /* don't have to do an explicit traversal here. */ + ret = pmap_attribute(map->pmap, start, end-start, + attribute, value); + vm_map_unlock(map); + return ret; + } - vm_map_lock_read(map); + ret = KERN_SUCCESS; /* Assume it all worked */ - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; + while(sync_size) { + if (vm_map_lookup_entry(map, start, &entry)) { + vm_map_size_t sub_size; + if((entry->vme_end - start) > sync_size) { + sub_size = sync_size; + sync_size = 0; + } else { + sub_size = entry->vme_end - start; + sync_size -= sub_size; + } + if(entry->is_sub_map) { + vm_map_offset_t sub_start; + vm_map_offset_t sub_end; - } - start = entry->vme_start; + sub_start = (start - entry->vme_start) + + entry->offset; + sub_end = sub_start + sub_size; + vm_map_machine_attribute( + entry->object.sub_map, + sub_start, + sub_end, + attribute, value); + } else { + if(entry->object.vm_object) { + vm_page_t m; + vm_object_t object; + vm_object_t base_object; + vm_object_t last_object; + vm_object_offset_t offset; + vm_object_offset_t base_offset; + vm_map_size_t range; + range = sub_size; + offset = (start - entry->vme_start) + + entry->offset; + base_offset = offset; + object = entry->object.vm_object; + base_object = object; + last_object = NULL; - top->private_pages_resident = 0; - top->shared_pages_resident = 0; + vm_object_lock(object); - vm_region_top_walk(entry, top); + while (range) { + m = vm_page_lookup( + object, offset); - if (object_name) - *object_name = IP_NULL; - *address = start; - *size = (entry->vme_end - start); + if (m && !m->fictitious) { + ret = + pmap_attribute_cache_sync( + m->phys_page, + PAGE_SIZE, + attribute, value); + + } else if (object->shadow) { + offset = offset + object->shadow_offset; + last_object = object; + object = object->shadow; + vm_object_lock(last_object->shadow); + vm_object_unlock(last_object); + continue; + } + range -= PAGE_SIZE; - vm_map_unlock_read(map); - return(KERN_SUCCESS); + if (base_object != object) { + vm_object_unlock(object); + vm_object_lock(base_object); + object = base_object; + } + /* Bump to the next page */ + base_offset += PAGE_SIZE; + offset = base_offset; + } + vm_object_unlock(object); + } + } + start += sub_size; + } else { + vm_map_unlock(map); + return KERN_FAILURE; + } + } - case VM_REGION_OBJECT_INFO_64: - { - if (*count < VM_REGION_OBJECT_INFO_COUNT_64) - return(KERN_INVALID_ARGUMENT); - object_info_64 = (vm_region_object_info_64_t) info; - *count = VM_REGION_OBJECT_INFO_COUNT_64; + vm_map_unlock(map); - vm_map_lock_read(map); - - start = *address; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { - vm_map_unlock_read(map); - return(KERN_INVALID_ADDRESS); - } - } else { - entry = tmp_entry; - } - - start = entry->vme_start; + return ret; +} - object_info_64->offset = entry->offset; - object_info_64->protection = entry->protection; - object_info_64->inheritance = entry->inheritance; - object_info_64->max_protection = entry->max_protection; - object_info_64->behavior = entry->behavior; - object_info_64->user_wired_count = entry->user_wired_count; - object_info_64->is_sub_map = entry->is_sub_map; - *address = start; - *size = (entry->vme_end - start); +/* + * vm_map_behavior_set: + * + * Sets the paging reference behavior of the specified address + * range in the target map. Paging reference behavior affects + * how pagein operations resulting from faults on the map will be + * clustered. + */ +kern_return_t +vm_map_behavior_set( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_behavior_t new_behavior) +{ + register vm_map_entry_t entry; + vm_map_entry_t temp_entry; - if (object_name) *object_name = IP_NULL; - if (entry->is_sub_map) { - object_info_64->shared = FALSE; - object_info_64->object_id = 0; - } else { - object_info_64->shared = entry->is_shared; - object_info_64->object_id = - (vm_offset_t) entry->object.vm_object; - } + XPR(XPR_VM_MAP, + "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", + (integer_t)map, start, end, new_behavior, 0); - vm_map_unlock_read(map); - return(KERN_SUCCESS); - } + switch (new_behavior) { + case VM_BEHAVIOR_DEFAULT: + case VM_BEHAVIOR_RANDOM: + case VM_BEHAVIOR_SEQUENTIAL: + case VM_BEHAVIOR_RSEQNTL: + break; + case VM_BEHAVIOR_WILLNEED: + case VM_BEHAVIOR_DONTNEED: + new_behavior = VM_BEHAVIOR_DEFAULT; + break; default: - return(KERN_INVALID_ARGUMENT); + return(KERN_INVALID_ARGUMENT); } -} -void -vm_region_top_walk( - vm_map_entry_t entry, - vm_region_top_info_t top) -{ - register struct vm_object *obj, *tmp_obj; - register int ref_count; + vm_map_lock(map); - if (entry->object.vm_object == 0 || entry->is_sub_map) { - top->share_mode = SM_EMPTY; - top->ref_count = 0; - top->obj_id = 0; - return; + /* + * The entire address range must be valid for the map. + * Note that vm_map_range_check() does a + * vm_map_lookup_entry() internally and returns the + * entry containing the start of the address range if + * the entire range is valid. + */ + if (vm_map_range_check(map, start, end, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); } - { - obj = entry->object.vm_object; - - vm_object_lock(obj); - - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; - - if (obj->shadow) { - if (ref_count == 1) - top->private_pages_resident = obj->resident_page_count; - else - top->shared_pages_resident = obj->resident_page_count; - top->ref_count = ref_count; - top->share_mode = SM_COW; - - while (tmp_obj = obj->shadow) { - vm_object_lock(tmp_obj); - vm_object_unlock(obj); - obj = tmp_obj; - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; + while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + vm_map_clip_end(map, entry, end); - top->shared_pages_resident += obj->resident_page_count; - top->ref_count += ref_count - 1; - } - } else { - if (entry->needs_copy) { - top->share_mode = SM_COW; - top->shared_pages_resident = obj->resident_page_count; - } else { - if (ref_count == 1 || - (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { - top->share_mode = SM_PRIVATE; - top->private_pages_resident = obj->resident_page_count; - } else { - top->share_mode = SM_SHARED; - top->shared_pages_resident = obj->resident_page_count; - } - } - top->ref_count = ref_count; - } - top->obj_id = (int)obj; + entry->behavior = new_behavior; - vm_object_unlock(obj); + entry = entry->vme_next; } + + vm_map_unlock(map); + return(KERN_SUCCESS); } -void -vm_region_walk( - vm_map_entry_t entry, - vm_region_extended_info_t extended, - vm_object_offset_t offset, - vm_offset_t range, - vm_map_t map, - vm_offset_t va) -{ - register struct vm_object *obj, *tmp_obj; - register vm_offset_t last_offset; - register int i; - register int ref_count; - void vm_region_look_for_page(); - if ((entry->object.vm_object == 0) || - (entry->is_sub_map) || - (entry->object.vm_object->phys_contiguous)) { - extended->share_mode = SM_EMPTY; - extended->ref_count = 0; - return; - } - { - obj = entry->object.vm_object; +#include +#if MACH_KDB +#include +#include - vm_object_lock(obj); +#define printf db_printf - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; +/* + * Forward declarations for internal functions. + */ +extern void vm_map_links_print( + struct vm_map_links *links); - for (last_offset = offset + range; offset < last_offset; offset += PAGE_SIZE_64, va += PAGE_SIZE) - vm_region_look_for_page(obj, extended, offset, ref_count, 0, map, va); +extern void vm_map_header_print( + struct vm_map_header *header); - if (extended->shadow_depth || entry->needs_copy) - extended->share_mode = SM_COW; - else { - if (ref_count == 1) - extended->share_mode = SM_PRIVATE; - else { - if (obj->true_share) - extended->share_mode = SM_TRUESHARED; - else - extended->share_mode = SM_SHARED; - } - } - extended->ref_count = ref_count - extended->shadow_depth; - - for (i = 0; i < extended->shadow_depth; i++) { - if ((tmp_obj = obj->shadow) == 0) - break; - vm_object_lock(tmp_obj); - vm_object_unlock(obj); +extern void vm_map_entry_print( + vm_map_entry_t entry); - if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) - ref_count--; +extern void vm_follow_entry( + vm_map_entry_t entry); - extended->ref_count += ref_count; - obj = tmp_obj; - } - vm_object_unlock(obj); +extern void vm_follow_map( + vm_map_t map); - if (extended->share_mode == SM_SHARED) { - register vm_map_entry_t cur; - register vm_map_entry_t last; - int my_refs; +/* + * vm_map_links_print: [ debug ] + */ +void +vm_map_links_print( + struct vm_map_links *links) +{ + iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n", + links->prev, + links->next, + (unsigned long long)links->start, + (unsigned long long)links->end); +} - obj = entry->object.vm_object; - last = vm_map_to_entry(map); - my_refs = 0; +/* + * vm_map_header_print: [ debug ] + */ +void +vm_map_header_print( + struct vm_map_header *header) +{ + vm_map_links_print(&header->links); + iprintf("nentries = %08X, %sentries_pageable\n", + header->nentries, + (header->entries_pageable ? "" : "!")); +} - if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) - ref_count--; - for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) - my_refs += vm_region_count_obj_refs(cur, obj); +/* + * vm_follow_entry: [ debug ] + */ +void +vm_follow_entry( + vm_map_entry_t entry) +{ + int shadows; - if (my_refs == ref_count) - extended->share_mode = SM_PRIVATE_ALIASED; - else if (my_refs > 1) - extended->share_mode = SM_SHARED_ALIASED; - } - } -} + iprintf("map entry %08X\n", entry); + db_indent += 2; -/* object is locked on entry and locked on return */ + shadows = vm_follow_object(entry->object.vm_object); + iprintf("Total objects : %d\n",shadows); + db_indent -= 2; +} +/* + * vm_map_entry_print: [ debug ] + */ void -vm_region_look_for_page( - vm_object_t object, - vm_region_extended_info_t extended, - vm_object_offset_t offset, - int max_refcnt, - int depth, - vm_map_t map, - vm_offset_t va) +vm_map_entry_print( + register vm_map_entry_t entry) { - register vm_page_t p; - register vm_object_t shadow; - register int ref_count; - vm_object_t caller_object; - - shadow = object->shadow; - caller_object = object; - + static const char *inheritance_name[4] = + { "share", "copy", "none", "?"}; + static const char *behavior_name[4] = + { "dflt", "rand", "seqtl", "rseqntl" }; - while (TRUE) { - - if ( !(object->pager_trusted) && !(object->internal)) - extended->external_pager = 1; - - if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - if (shadow && (max_refcnt == 1)) - extended->pages_shared_now_private++; - - if (!p->fictitious && - (p->dirty || pmap_is_modified(p->phys_page))) - extended->pages_dirtied++; - extended->pages_resident++; - - if(object != caller_object) - vm_object_unlock(object); - - return; - } - if (object->existence_map) { - if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { - - extended->pages_swapped_out++; - - if(object != caller_object) - vm_object_unlock(object); + iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next); - return; - } - } - if (shadow) { - vm_object_lock(shadow); + db_indent += 2; - if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) - ref_count--; + vm_map_links_print(&entry->links); - if (++depth > extended->shadow_depth) - extended->shadow_depth = depth; + iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n", + (unsigned long long)entry->vme_start, + (unsigned long long)entry->vme_end, + entry->protection, + entry->max_protection, + inheritance_name[(entry->inheritance & 0x3)]); - if (ref_count > max_refcnt) - max_refcnt = ref_count; - - if(object != caller_object) - vm_object_unlock(object); + iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n", + behavior_name[(entry->behavior & 0x3)], + entry->wired_count, + entry->user_wired_count); + iprintf("%sin_transition, %sneeds_wakeup\n", + (entry->in_transition ? "" : "!"), + (entry->needs_wakeup ? "" : "!")); - object = shadow; - shadow = object->shadow; - offset = offset + object->shadow_offset; - continue; - } - if(object != caller_object) - vm_object_unlock(object); - break; + if (entry->is_sub_map) { + iprintf("submap = %08X - offset = %016llX\n", + entry->object.sub_map, + (unsigned long long)entry->offset); + } else { + iprintf("object = %08X offset = %016llX - ", + entry->object.vm_object, + (unsigned long long)entry->offset); + printf("%sis_shared, %sneeds_copy\n", + (entry->is_shared ? "" : "!"), + (entry->needs_copy ? "" : "!")); } -} + db_indent -= 2; +} -vm_region_count_obj_refs( - vm_map_entry_t entry, - vm_object_t object) +/* + * vm_follow_map: [ debug ] + */ +void +vm_follow_map( + vm_map_t map) { - register int ref_count; - register vm_object_t chk_obj; - register vm_object_t tmp_obj; - - if (entry->object.vm_object == 0) - return(0); - - if (entry->is_sub_map) - return(0); - else { - ref_count = 0; + register vm_map_entry_t entry; - chk_obj = entry->object.vm_object; - vm_object_lock(chk_obj); + iprintf("task map %08X\n", map); - while (chk_obj) { - if (chk_obj == object) - ref_count++; - if (tmp_obj = chk_obj->shadow) - vm_object_lock(tmp_obj); - vm_object_unlock(chk_obj); + db_indent += 2; - chk_obj = tmp_obj; - } + for (entry = vm_map_first_entry(map); + entry && entry != vm_map_to_entry(map); + entry = entry->vme_next) { + vm_follow_entry(entry); } - return(ref_count); -} + db_indent -= 2; +} /* - * Routine: vm_map_simplify - * - * Description: - * Attempt to simplify the map representation in - * the vicinity of the given starting address. - * Note: - * This routine is intended primarily to keep the - * kernel maps more compact -- they generally don't - * benefit from the "expand a map entry" technology - * at allocation time because the adjacent entry - * is often wired down. + * vm_map_print: [ debug ] */ void -vm_map_simplify_entry( - vm_map_t map, - vm_map_entry_t this_entry) +vm_map_print( + db_addr_t inmap) { - vm_map_entry_t prev_entry; + register vm_map_entry_t entry; + vm_map_t map; +#if TASK_SWAPPER + char *swstate; +#endif /* TASK_SWAPPER */ - prev_entry = this_entry->vme_prev; + map = (vm_map_t)(long) + inmap; /* Make sure we have the right type */ - if ((this_entry != vm_map_to_entry(map)) && - (prev_entry != vm_map_to_entry(map)) && + iprintf("task map %08X\n", map); - (prev_entry->vme_end == this_entry->vme_start) && + db_indent += 2; - (prev_entry->is_sub_map == FALSE) && - (this_entry->is_sub_map == FALSE) && - - (prev_entry->object.vm_object == this_entry->object.vm_object) && - ((prev_entry->offset + (prev_entry->vme_end - - prev_entry->vme_start)) - == this_entry->offset) && - - (prev_entry->inheritance == this_entry->inheritance) && - (prev_entry->protection == this_entry->protection) && - (prev_entry->max_protection == this_entry->max_protection) && - (prev_entry->behavior == this_entry->behavior) && - (prev_entry->alias == this_entry->alias) && - (prev_entry->wired_count == this_entry->wired_count) && - (prev_entry->user_wired_count == this_entry->user_wired_count) && - (prev_entry->needs_copy == this_entry->needs_copy) && - - (prev_entry->use_pmap == FALSE) && - (this_entry->use_pmap == FALSE) && - (prev_entry->in_transition == FALSE) && - (this_entry->in_transition == FALSE) && - (prev_entry->needs_wakeup == FALSE) && - (this_entry->needs_wakeup == FALSE) && - (prev_entry->is_shared == FALSE) && - (this_entry->is_shared == FALSE) - ) { - _vm_map_entry_unlink(&map->hdr, prev_entry); - this_entry->vme_start = prev_entry->vme_start; - this_entry->offset = prev_entry->offset; - vm_object_deallocate(prev_entry->object.vm_object); - vm_map_entry_dispose(map, prev_entry); - SAVE_HINT(map, this_entry); - counter(c_vm_map_simplified++); - } - counter(c_vm_map_simplify_entry_called++); -} + vm_map_header_print(&map->hdr); -void -vm_map_simplify( - vm_map_t map, - vm_offset_t start) -{ - vm_map_entry_t this_entry; + iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n", + map->pmap, + map->size, + map->ref_count, + map->hint, + map->first_free); - vm_map_lock(map); - if (vm_map_lookup_entry(map, start, &this_entry)) { - vm_map_simplify_entry(map, this_entry); - vm_map_simplify_entry(map, this_entry->vme_next); + iprintf("%swait_for_space, %swiring_required, timestamp = %d\n", + (map->wait_for_space ? "" : "!"), + (map->wiring_required ? "" : "!"), + map->timestamp); + +#if TASK_SWAPPER + switch (map->sw_state) { + case MAP_SW_IN: + swstate = "SW_IN"; + break; + case MAP_SW_OUT: + swstate = "SW_OUT"; + break; + default: + swstate = "????"; + break; } - counter(c_vm_map_simplify_called++); - vm_map_unlock(map); + iprintf("res = %d, sw_state = %s\n", map->res_count, swstate); +#endif /* TASK_SWAPPER */ + + for (entry = vm_map_first_entry(map); + entry && entry != vm_map_to_entry(map); + entry = entry->vme_next) { + vm_map_entry_print(entry); + } + + db_indent -= 2; } /* - * Routine: vm_map_machine_attribute + * Routine: vm_map_copy_print * Purpose: - * Provide machine-specific attributes to mappings, - * such as cachability etc. for machines that provide - * them. NUMA architectures and machines with big/strange - * caches will use this. - * Note: - * Responsibilities for locking and checking are handled here, - * everything else in the pmap module. If any non-volatile - * information must be kept, the pmap module should handle - * it itself. [This assumes that attributes do not - * need to be inherited, which seems ok to me] + * Pretty-print a copy object for ddb. */ -kern_return_t -vm_map_machine_attribute( - vm_map_t map, - vm_offset_t address, - vm_size_t size, - vm_machine_attribute_t attribute, - vm_machine_attribute_val_t* value) /* IN/OUT */ + +void +vm_map_copy_print( + db_addr_t incopy) { - kern_return_t ret; - vm_size_t sync_size; - vm_offset_t start; + vm_map_copy_t copy; vm_map_entry_t entry; - - if (address < vm_map_min(map) || - (address + size) > vm_map_max(map)) - return KERN_INVALID_ADDRESS; - vm_map_lock(map); - - if (attribute != MATTR_CACHE) { - /* If we don't have to find physical addresses, we */ - /* don't have to do an explicit traversal here. */ - ret = pmap_attribute(map->pmap, - address, size, attribute, value); - vm_map_unlock(map); - return ret; - } - - /* Get the starting address */ - start = trunc_page_32(address); - /* Figure how much memory we need to flush (in page increments) */ - sync_size = round_page_32(start + size) - start; + copy = (vm_map_copy_t)(long) + incopy; /* Make sure we have the right type */ + printf("copy object 0x%x\n", copy); - ret = KERN_SUCCESS; /* Assume it all worked */ + db_indent += 2; - while(sync_size) { - if (vm_map_lookup_entry(map, start, &entry)) { - vm_size_t sub_size; - if((entry->vme_end - start) > sync_size) { - sub_size = sync_size; - sync_size = 0; - } else { - sub_size = entry->vme_end - start; - sync_size -= sub_size; - } - if(entry->is_sub_map) { - vm_map_machine_attribute( - entry->object.sub_map, - (start - entry->vme_start) - + entry->offset, - sub_size, - attribute, value); - } else { - if(entry->object.vm_object) { - vm_page_t m; - vm_object_t object; - vm_object_t base_object; - vm_object_offset_t offset; - vm_object_offset_t base_offset; - vm_size_t range; - range = sub_size; - offset = (start - entry->vme_start) - + entry->offset; - base_offset = offset; - object = entry->object.vm_object; - base_object = object; - while(range) { - m = vm_page_lookup( - object, offset); - if(m && !m->fictitious) { - - ret = - pmap_attribute_cache_sync( - m->phys_page, - PAGE_SIZE, - attribute, value); - } else if (object->shadow) { - offset = offset + - object->shadow_offset; - object = object->shadow; - continue; - } - range -= PAGE_SIZE; - /* Bump to the next page */ - base_offset += PAGE_SIZE; - offset = base_offset; - object = base_object; - - } - } - } - start += sub_size; - } else { - vm_map_unlock(map); - return KERN_FAILURE; - } + iprintf("type=%d", copy->type); + switch (copy->type) { + case VM_MAP_COPY_ENTRY_LIST: + printf("[entry_list]"); + break; - } - - vm_map_unlock(map); - - return ret; -} - -/* - * vm_map_behavior_set: - * - * Sets the paging reference behavior of the specified address - * range in the target map. Paging reference behavior affects - * how pagein operations resulting from faults on the map will be - * clustered. - */ -kern_return_t -vm_map_behavior_set( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_behavior_t new_behavior) -{ - register vm_map_entry_t entry; - vm_map_entry_t temp_entry; - - XPR(XPR_VM_MAP, - "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", - (integer_t)map, start, end, new_behavior, 0); - - switch (new_behavior) { - case VM_BEHAVIOR_DEFAULT: - case VM_BEHAVIOR_RANDOM: - case VM_BEHAVIOR_SEQUENTIAL: - case VM_BEHAVIOR_RSEQNTL: + case VM_MAP_COPY_OBJECT: + printf("[object]"); break; - case VM_BEHAVIOR_WILLNEED: - case VM_BEHAVIOR_DONTNEED: - new_behavior = VM_BEHAVIOR_DEFAULT; + + case VM_MAP_COPY_KERNEL_BUFFER: + printf("[kernel_buffer]"); break; - default: - return(KERN_INVALID_ARGUMENT); - } - - vm_map_lock(map); - /* - * The entire address range must be valid for the map. - * Note that vm_map_range_check() does a - * vm_map_lookup_entry() internally and returns the - * entry containing the start of the address range if - * the entire range is valid. - */ - if (vm_map_range_check(map, start, end, &temp_entry)) { - entry = temp_entry; - vm_map_clip_start(map, entry, start); - } - else { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); + default: + printf("[bad type]"); + break; } + printf(", offset=0x%llx", (unsigned long long)copy->offset); + printf(", size=0x%x\n", copy->size); - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { - vm_map_clip_end(map, entry, end); + switch (copy->type) { + case VM_MAP_COPY_ENTRY_LIST: + vm_map_header_print(©->cpy_hdr); + for (entry = vm_map_copy_first_entry(copy); + entry && entry != vm_map_copy_to_entry(copy); + entry = entry->vme_next) { + vm_map_entry_print(entry); + } + break; - entry->behavior = new_behavior; + case VM_MAP_COPY_OBJECT: + iprintf("object=0x%x\n", copy->cpy_object); + break; + + case VM_MAP_COPY_KERNEL_BUFFER: + iprintf("kernel buffer=0x%x", copy->cpy_kdata); + printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); + break; - entry = entry->vme_next; } - vm_map_unlock(map); - return(KERN_SUCCESS); + db_indent -=2; } - -#include -#if MACH_KDB -#include -#include - -#define printf db_printf - /* - * Forward declarations for internal functions. + * db_vm_map_total_size(map) [ debug ] + * + * return the total virtual size (in bytes) of the map */ -extern void vm_map_links_print( - struct vm_map_links *links); +vm_map_size_t +db_vm_map_total_size( + db_addr_t inmap) +{ + vm_map_entry_t entry; + vm_map_size_t total; + vm_map_t map; -extern void vm_map_header_print( - struct vm_map_header *header); + map = (vm_map_t)(long) + inmap; /* Make sure we have the right type */ -extern void vm_map_entry_print( - vm_map_entry_t entry); + total = 0; + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + total += entry->vme_end - entry->vme_start; + } -extern void vm_follow_entry( - vm_map_entry_t entry); + return total; +} -extern void vm_follow_map( - vm_map_t map); +#endif /* MACH_KDB */ /* - * vm_map_links_print: [ debug ] + * Routine: vm_map_entry_insert + * + * Descritpion: This routine inserts a new vm_entry in a locked map. */ -void -vm_map_links_print( - struct vm_map_links *links) +vm_map_entry_t +vm_map_entry_insert( + vm_map_t map, + vm_map_entry_t insp_entry, + vm_map_offset_t start, + vm_map_offset_t end, + vm_object_t object, + vm_object_offset_t offset, + boolean_t needs_copy, + boolean_t is_shared, + boolean_t in_transition, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_behavior_t behavior, + vm_inherit_t inheritance, + unsigned wired_count) { - iprintf("prev = %08X next = %08X start = %08X end = %08X\n", - links->prev, - links->next, - links->start, - links->end); -} + vm_map_entry_t new_entry; -/* - * vm_map_header_print: [ debug ] - */ -void -vm_map_header_print( - struct vm_map_header *header) -{ - vm_map_links_print(&header->links); - iprintf("nentries = %08X, %sentries_pageable\n", - header->nentries, - (header->entries_pageable ? "" : "!")); -} + assert(insp_entry != (vm_map_entry_t)0); -/* - * vm_follow_entry: [ debug ] - */ -void -vm_follow_entry( - vm_map_entry_t entry) -{ - extern int db_indent; - int shadows; + new_entry = vm_map_entry_create(map); - iprintf("map entry %08X\n", entry); + new_entry->vme_start = start; + new_entry->vme_end = end; + assert(page_aligned(new_entry->vme_start)); + assert(page_aligned(new_entry->vme_end)); - db_indent += 2; + new_entry->object.vm_object = object; + new_entry->offset = offset; + new_entry->is_shared = is_shared; + new_entry->is_sub_map = FALSE; + new_entry->needs_copy = needs_copy; + new_entry->in_transition = in_transition; + new_entry->needs_wakeup = FALSE; + new_entry->inheritance = inheritance; + new_entry->protection = cur_protection; + new_entry->max_protection = max_protection; + new_entry->behavior = behavior; + new_entry->wired_count = wired_count; + new_entry->user_wired_count = 0; + new_entry->use_pmap = FALSE; - shadows = vm_follow_object(entry->object.vm_object); - iprintf("Total objects : %d\n",shadows); + /* + * Insert the new entry into the list. + */ - db_indent -= 2; + vm_map_entry_link(map, insp_entry, new_entry); + map->size += end - start; + + /* + * Update the free space hint and the lookup hint. + */ + + SAVE_HINT(map, new_entry); + return new_entry; } /* - * vm_map_entry_print: [ debug ] + * Routine: vm_map_remap_extract + * + * Descritpion: This routine returns a vm_entry list from a map. */ -void -vm_map_entry_print( - register vm_map_entry_t entry) +static kern_return_t +vm_map_remap_extract( + vm_map_t map, + vm_map_offset_t addr, + vm_map_size_t size, + boolean_t copy, + struct vm_map_header *map_header, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + /* What, no behavior? */ + vm_inherit_t inheritance, + boolean_t pageable) { - extern int db_indent; - static char *inheritance_name[4] = { "share", "copy", "none", "?"}; - static char *behavior_name[4] = { "dflt", "rand", "seqtl", "rseqntl" }; - - iprintf("map entry %08X n", entry); - - db_indent += 2; + kern_return_t result; + vm_map_size_t mapped_size; + vm_map_size_t tmp_size; + vm_map_entry_t src_entry; /* result of last map lookup */ + vm_map_entry_t new_entry; + vm_object_offset_t offset; + vm_map_offset_t map_address; + vm_map_offset_t src_start; /* start of entry to map */ + vm_map_offset_t src_end; /* end of region to be mapped */ + vm_object_t object; + vm_map_version_t version; + boolean_t src_needs_copy; + boolean_t new_entry_needs_copy; - vm_map_links_print(&entry->links); + assert(map != VM_MAP_NULL); + assert(size != 0 && size == vm_map_round_page(size)); + assert(inheritance == VM_INHERIT_NONE || + inheritance == VM_INHERIT_COPY || + inheritance == VM_INHERIT_SHARE); - iprintf("start = %08X end = %08X, prot=%x/%x/%s\n", - entry->vme_start, - entry->vme_end, - entry->protection, - entry->max_protection, - inheritance_name[(entry->inheritance & 0x3)]); + /* + * Compute start and end of region. + */ + src_start = vm_map_trunc_page(addr); + src_end = vm_map_round_page(src_start + size); - iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n", - behavior_name[(entry->behavior & 0x3)], - entry->wired_count, - entry->user_wired_count); - iprintf("%sin_transition, %sneeds_wakeup\n", - (entry->in_transition ? "" : "!"), - (entry->needs_wakeup ? "" : "!")); + /* + * Initialize map_header. + */ + map_header->links.next = (struct vm_map_entry *)&map_header->links; + map_header->links.prev = (struct vm_map_entry *)&map_header->links; + map_header->nentries = 0; + map_header->entries_pageable = pageable; - if (entry->is_sub_map) { - iprintf("submap = %08X - offset=%08X\n", - entry->object.sub_map, - entry->offset); - } else { - iprintf("object=%08X, offset=%08X, ", - entry->object.vm_object, - entry->offset); - printf("%sis_shared, %sneeds_copy\n", - (entry->is_shared ? "" : "!"), - (entry->needs_copy ? "" : "!")); - } + *cur_protection = VM_PROT_ALL; + *max_protection = VM_PROT_ALL; - db_indent -= 2; -} + map_address = 0; + mapped_size = 0; + result = KERN_SUCCESS; -/* - * vm_follow_map: [ debug ] - */ -void -vm_follow_map( - vm_map_t map) -{ - register vm_map_entry_t entry; - extern int db_indent; + /* + * The specified source virtual space might correspond to + * multiple map entries, need to loop on them. + */ + vm_map_lock(map); + while (mapped_size != size) { + vm_map_size_t entry_size; - iprintf("task map %08X\n", map); + /* + * Find the beginning of the region. + */ + if (! vm_map_lookup_entry(map, src_start, &src_entry)) { + result = KERN_INVALID_ADDRESS; + break; + } - db_indent += 2; + if (src_start < src_entry->vme_start || + (mapped_size && src_start != src_entry->vme_start)) { + result = KERN_INVALID_ADDRESS; + break; + } - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_follow_entry(entry); - } + if(src_entry->is_sub_map) { + result = KERN_INVALID_ADDRESS; + break; + } - db_indent -= 2; -} + tmp_size = size - mapped_size; + if (src_end > src_entry->vme_end) + tmp_size -= (src_end - src_entry->vme_end); -/* - * vm_map_print: [ debug ] - */ -void -vm_map_print( - db_addr_t inmap) -{ - register vm_map_entry_t entry; - vm_map_t map; - extern int db_indent; - char *swstate; + entry_size = (vm_map_size_t)(src_entry->vme_end - + src_entry->vme_start); - map = (vm_map_t)inmap; /* Make sure we have the right type */ + if(src_entry->is_sub_map) { + vm_map_reference(src_entry->object.sub_map); + object = VM_OBJECT_NULL; + } else { + object = src_entry->object.vm_object; - iprintf("task map %08X\n", map); + if (object == VM_OBJECT_NULL) { + object = vm_object_allocate(entry_size); + src_entry->offset = 0; + src_entry->object.vm_object = object; + } else if (object->copy_strategy != + MEMORY_OBJECT_COPY_SYMMETRIC) { + /* + * We are already using an asymmetric + * copy, and therefore we already have + * the right object. + */ + assert(!src_entry->needs_copy); + } else if (src_entry->needs_copy || object->shadowed || + (object->internal && !object->true_share && + !src_entry->is_shared && + object->size > entry_size)) { - db_indent += 2; + vm_object_shadow(&src_entry->object.vm_object, + &src_entry->offset, + entry_size); - vm_map_header_print(&map->hdr); + if (!src_entry->needs_copy && + (src_entry->protection & VM_PROT_WRITE)) { + if(map->mapped) { + vm_object_pmap_protect( + src_entry->object.vm_object, + src_entry->offset, + entry_size, + PMAP_NULL, + src_entry->vme_start, + src_entry->protection & + ~VM_PROT_WRITE); + } else { + pmap_protect(vm_map_pmap(map), + src_entry->vme_start, + src_entry->vme_end, + src_entry->protection & + ~VM_PROT_WRITE); + } + } - iprintf("pmap = %08X, size = %08X, ref = %d, hint = %08X, first_free = %08X\n", - map->pmap, - map->size, - map->ref_count, - map->hint, - map->first_free); + object = src_entry->object.vm_object; + src_entry->needs_copy = FALSE; + } - iprintf("%swait_for_space, %swiring_required, timestamp = %d\n", - (map->wait_for_space ? "" : "!"), - (map->wiring_required ? "" : "!"), - map->timestamp); -#if TASK_SWAPPER - switch (map->sw_state) { - case MAP_SW_IN: - swstate = "SW_IN"; - break; - case MAP_SW_OUT: - swstate = "SW_OUT"; - break; - default: - swstate = "????"; - break; - } - iprintf("res = %d, sw_state = %s\n", map->res_count, swstate); -#endif /* TASK_SWAPPER */ + vm_object_lock(object); + object->ref_count++; /* object ref. for new entry */ + VM_OBJ_RES_INCR(object); + if (object->copy_strategy == + MEMORY_OBJECT_COPY_SYMMETRIC) { + object->copy_strategy = + MEMORY_OBJECT_COPY_DELAY; + } + vm_object_unlock(object); + } - for (entry = vm_map_first_entry(map); - entry && entry != vm_map_to_entry(map); - entry = entry->vme_next) { - vm_map_entry_print(entry); - } + offset = src_entry->offset + (src_start - src_entry->vme_start); - db_indent -= 2; -} + new_entry = _vm_map_entry_create(map_header); + vm_map_entry_copy(new_entry, src_entry); + new_entry->use_pmap = FALSE; /* clr address space specifics */ -/* - * Routine: vm_map_copy_print - * Purpose: - * Pretty-print a copy object for ddb. - */ + new_entry->vme_start = map_address; + new_entry->vme_end = map_address + tmp_size; + new_entry->inheritance = inheritance; + new_entry->offset = offset; -void -vm_map_copy_print( - db_addr_t incopy) -{ - extern int db_indent; - vm_map_copy_t copy; - int i, npages; - vm_map_entry_t entry; + /* + * The new region has to be copied now if required. + */ + RestartCopy: + if (!copy) { + src_entry->is_shared = TRUE; + new_entry->is_shared = TRUE; + if (!(new_entry->is_sub_map)) + new_entry->needs_copy = FALSE; - copy = (vm_map_copy_t)incopy; /* Make sure we have the right type */ + } else if (src_entry->is_sub_map) { + /* make this a COW sub_map if not already */ + new_entry->needs_copy = TRUE; + object = VM_OBJECT_NULL; + } else if (src_entry->wired_count == 0 && + vm_object_copy_quickly(&new_entry->object.vm_object, + new_entry->offset, + (new_entry->vme_end - + new_entry->vme_start), + &src_needs_copy, + &new_entry_needs_copy)) { - printf("copy object 0x%x\n", copy); + new_entry->needs_copy = new_entry_needs_copy; + new_entry->is_shared = FALSE; - db_indent += 2; + /* + * Handle copy_on_write semantics. + */ + if (src_needs_copy && !src_entry->needs_copy) { + vm_object_pmap_protect(object, + offset, + entry_size, + ((src_entry->is_shared + || map->mapped) ? + PMAP_NULL : map->pmap), + src_entry->vme_start, + src_entry->protection & + ~VM_PROT_WRITE); - iprintf("type=%d", copy->type); - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - printf("[entry_list]"); - break; - - case VM_MAP_COPY_OBJECT: - printf("[object]"); - break; - - case VM_MAP_COPY_KERNEL_BUFFER: - printf("[kernel_buffer]"); - break; - - default: - printf("[bad type]"); - break; - } - printf(", offset=0x%x", copy->offset); - printf(", size=0x%x\n", copy->size); - - switch (copy->type) { - case VM_MAP_COPY_ENTRY_LIST: - vm_map_header_print(©->cpy_hdr); - for (entry = vm_map_copy_first_entry(copy); - entry && entry != vm_map_copy_to_entry(copy); - entry = entry->vme_next) { - vm_map_entry_print(entry); - } - break; - - case VM_MAP_COPY_OBJECT: - iprintf("object=0x%x\n", copy->cpy_object); - break; - - case VM_MAP_COPY_KERNEL_BUFFER: - iprintf("kernel buffer=0x%x", copy->cpy_kdata); - printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); - break; - - } + src_entry->needs_copy = TRUE; + } + /* + * Throw away the old object reference of the new entry. + */ + vm_object_deallocate(object); - db_indent -=2; -} + } else { + new_entry->is_shared = FALSE; -/* - * db_vm_map_total_size(map) [ debug ] - * - * return the total virtual size (in bytes) of the map - */ -vm_size_t -db_vm_map_total_size( - db_addr_t inmap) -{ - vm_map_entry_t entry; - vm_size_t total; - vm_map_t map; + /* + * The map can be safely unlocked since we + * already hold a reference on the object. + * + * Record the timestamp of the map for later + * verification, and unlock the map. + */ + version.main_timestamp = map->timestamp; + vm_map_unlock(map); /* Increments timestamp once! */ - map = (vm_map_t)inmap; /* Make sure we have the right type */ + /* + * Perform the copy. + */ + if (src_entry->wired_count > 0) { + vm_object_lock(object); + result = vm_object_copy_slowly( + object, + offset, + entry_size, + THREAD_UNINT, + &new_entry->object.vm_object); - total = 0; - for (entry = vm_map_first_entry(map); - entry != vm_map_to_entry(map); - entry = entry->vme_next) { - total += entry->vme_end - entry->vme_start; - } + new_entry->offset = 0; + new_entry->needs_copy = FALSE; + } else { + result = vm_object_copy_strategically( + object, + offset, + entry_size, + &new_entry->object.vm_object, + &new_entry->offset, + &new_entry_needs_copy); - return total; -} + new_entry->needs_copy = new_entry_needs_copy; + } -#endif /* MACH_KDB */ + /* + * Throw away the old object reference of the new entry. + */ + vm_object_deallocate(object); -/* - * Routine: vm_map_entry_insert - * - * Descritpion: This routine inserts a new vm_entry in a locked map. - */ -vm_map_entry_t -vm_map_entry_insert( - vm_map_t map, - vm_map_entry_t insp_entry, - vm_offset_t start, - vm_offset_t end, - vm_object_t object, - vm_object_offset_t offset, - boolean_t needs_copy, - boolean_t is_shared, - boolean_t in_transition, - vm_prot_t cur_protection, - vm_prot_t max_protection, - vm_behavior_t behavior, - vm_inherit_t inheritance, - unsigned wired_count) -{ - vm_map_entry_t new_entry; + if (result != KERN_SUCCESS && + result != KERN_MEMORY_RESTART_COPY) { + _vm_map_entry_dispose(map_header, new_entry); + break; + } - assert(insp_entry != (vm_map_entry_t)0); + /* + * Verify that the map has not substantially + * changed while the copy was being made. + */ - new_entry = vm_map_entry_create(map); + vm_map_lock(map); + if (version.main_timestamp + 1 != map->timestamp) { + /* + * Simple version comparison failed. + * + * Retry the lookup and verify that the + * same object/offset are still present. + */ + vm_object_deallocate(new_entry-> + object.vm_object); + _vm_map_entry_dispose(map_header, new_entry); + if (result == KERN_MEMORY_RESTART_COPY) + result = KERN_SUCCESS; + continue; + } - new_entry->vme_start = start; - new_entry->vme_end = end; - assert(page_aligned(new_entry->vme_start)); - assert(page_aligned(new_entry->vme_end)); + if (result == KERN_MEMORY_RESTART_COPY) { + vm_object_reference(object); + goto RestartCopy; + } + } - new_entry->object.vm_object = object; - new_entry->offset = offset; - new_entry->is_shared = is_shared; - new_entry->is_sub_map = FALSE; - new_entry->needs_copy = needs_copy; - new_entry->in_transition = in_transition; - new_entry->needs_wakeup = FALSE; - new_entry->inheritance = inheritance; - new_entry->protection = cur_protection; - new_entry->max_protection = max_protection; - new_entry->behavior = behavior; - new_entry->wired_count = wired_count; - new_entry->user_wired_count = 0; - new_entry->use_pmap = FALSE; + _vm_map_entry_link(map_header, + map_header->links.prev, new_entry); - /* - * Insert the new entry into the list. - */ + *cur_protection &= src_entry->protection; + *max_protection &= src_entry->max_protection; - vm_map_entry_link(map, insp_entry, new_entry); - map->size += end - start; + map_address += tmp_size; + mapped_size += tmp_size; + src_start += tmp_size; - /* - * Update the free space hint and the lookup hint. - */ + } /* end while */ - SAVE_HINT(map, new_entry); - return new_entry; + vm_map_unlock(map); + if (result != KERN_SUCCESS) { + /* + * Free all allocated elements. + */ + for (src_entry = map_header->links.next; + src_entry != (struct vm_map_entry *)&map_header->links; + src_entry = new_entry) { + new_entry = src_entry->vme_next; + _vm_map_entry_unlink(map_header, src_entry); + vm_object_deallocate(src_entry->object.vm_object); + _vm_map_entry_dispose(map_header, src_entry); + } + } + return result; } /* - * Routine: vm_remap_extract + * Routine: vm_remap * - * Descritpion: This routine returns a vm_entry list from a map. + * Map portion of a task's address space. + * Mapped region must not overlap more than + * one vm memory object. Protections and + * inheritance attributes remain the same + * as in the original task and are out parameters. + * Source and Target task can be identical + * Other attributes are identical as for vm_map() */ kern_return_t -vm_remap_extract( - vm_map_t map, - vm_offset_t addr, - vm_size_t size, +vm_map_remap( + vm_map_t target_map, + vm_map_address_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + boolean_t anywhere, + vm_map_t src_map, + vm_map_offset_t memory_address, boolean_t copy, - struct vm_map_header *map_header, vm_prot_t *cur_protection, vm_prot_t *max_protection, - /* What, no behavior? */ - vm_inherit_t inheritance, - boolean_t pageable) + vm_inherit_t inheritance) { kern_return_t result; - vm_size_t mapped_size; - vm_size_t tmp_size; - vm_map_entry_t src_entry; /* result of last map lookup */ + vm_map_entry_t entry; + vm_map_entry_t insp_entry; vm_map_entry_t new_entry; - vm_object_offset_t offset; - vm_offset_t map_address; - vm_offset_t src_start; /* start of entry to map */ - vm_offset_t src_end; /* end of region to be mapped */ - vm_object_t object; - vm_map_version_t version; - boolean_t src_needs_copy; - boolean_t new_entry_needs_copy; + struct vm_map_header map_header; - assert(map != VM_MAP_NULL); - assert(size != 0 && size == round_page_32(size)); - assert(inheritance == VM_INHERIT_NONE || - inheritance == VM_INHERIT_COPY || - inheritance == VM_INHERIT_SHARE); + if (target_map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; - /* - * Compute start and end of region. - */ - src_start = trunc_page_32(addr); - src_end = round_page_32(src_start + size); + switch (inheritance) { + case VM_INHERIT_NONE: + case VM_INHERIT_COPY: + case VM_INHERIT_SHARE: + if (size != 0 && src_map != VM_MAP_NULL) + break; + /*FALL THRU*/ + default: + return KERN_INVALID_ARGUMENT; + } - /* - * Initialize map_header. - */ - map_header->links.next = (struct vm_map_entry *)&map_header->links; - map_header->links.prev = (struct vm_map_entry *)&map_header->links; - map_header->nentries = 0; - map_header->entries_pageable = pageable; + size = vm_map_round_page(size); - *cur_protection = VM_PROT_ALL; - *max_protection = VM_PROT_ALL; + result = vm_map_remap_extract(src_map, memory_address, + size, copy, &map_header, + cur_protection, + max_protection, + inheritance, + target_map->hdr. + entries_pageable); - map_address = 0; - mapped_size = 0; - result = KERN_SUCCESS; + if (result != KERN_SUCCESS) { + return result; + } - /* - * The specified source virtual space might correspond to - * multiple map entries, need to loop on them. + /* + * Allocate/check a range of free virtual address + * space for the target */ - vm_map_lock(map); - while (mapped_size != size) { - vm_size_t entry_size; + *address = vm_map_trunc_page(*address); + vm_map_lock(target_map); + result = vm_map_remap_range_allocate(target_map, address, size, + mask, anywhere, &insp_entry); - /* - * Find the beginning of the region. - */ - if (! vm_map_lookup_entry(map, src_start, &src_entry)) { - result = KERN_INVALID_ADDRESS; - break; + for (entry = map_header.links.next; + entry != (struct vm_map_entry *)&map_header.links; + entry = new_entry) { + new_entry = entry->vme_next; + _vm_map_entry_unlink(&map_header, entry); + if (result == KERN_SUCCESS) { + entry->vme_start += *address; + entry->vme_end += *address; + vm_map_entry_link(target_map, insp_entry, entry); + insp_entry = entry; + } else { + if (!entry->is_sub_map) { + vm_object_deallocate(entry->object.vm_object); + } else { + vm_map_deallocate(entry->object.sub_map); + } + _vm_map_entry_dispose(&map_header, entry); } + } - if (src_start < src_entry->vme_start || - (mapped_size && src_start != src_entry->vme_start)) { - result = KERN_INVALID_ADDRESS; - break; - } + if (result == KERN_SUCCESS) { + target_map->size += size; + SAVE_HINT(target_map, insp_entry); + } + vm_map_unlock(target_map); - if(src_entry->is_sub_map) { - result = KERN_INVALID_ADDRESS; - break; - } + if (result == KERN_SUCCESS && target_map->wiring_required) + result = vm_map_wire(target_map, *address, + *address + size, *cur_protection, TRUE); + return result; +} - tmp_size = size - mapped_size; - if (src_end > src_entry->vme_end) - tmp_size -= (src_end - src_entry->vme_end); +/* + * Routine: vm_map_remap_range_allocate + * + * Description: + * Allocate a range in the specified virtual address map. + * returns the address and the map entry just before the allocated + * range + * + * Map must be locked. + */ - entry_size = (vm_size_t)(src_entry->vme_end - - src_entry->vme_start); +static kern_return_t +vm_map_remap_range_allocate( + vm_map_t map, + vm_map_address_t *address, /* IN/OUT */ + vm_map_size_t size, + vm_map_offset_t mask, + boolean_t anywhere, + vm_map_entry_t *map_entry) /* OUT */ +{ + register vm_map_entry_t entry; + register vm_map_offset_t start; + register vm_map_offset_t end; - if(src_entry->is_sub_map) { - vm_map_reference(src_entry->object.sub_map); - } else { - object = src_entry->object.vm_object; + StartAgain: ; - if (object == VM_OBJECT_NULL) { - object = vm_object_allocate(entry_size); - src_entry->offset = 0; - src_entry->object.vm_object = object; - } else if (object->copy_strategy != - MEMORY_OBJECT_COPY_SYMMETRIC) { - /* - * We are already using an asymmetric - * copy, and therefore we already have - * the right object. - */ - assert(!src_entry->needs_copy); - } else if (src_entry->needs_copy || object->shadowed || - (object->internal && !object->true_share && - !src_entry->is_shared && - object->size > entry_size)) { + start = *address; - vm_object_shadow(&src_entry->object.vm_object, - &src_entry->offset, - entry_size); + if (anywhere) + { + /* + * Calculate the first possible address. + */ - if (!src_entry->needs_copy && - (src_entry->protection & VM_PROT_WRITE)) { - if(map->mapped) { - vm_object_pmap_protect( - src_entry->object.vm_object, - src_entry->offset, - entry_size, - PMAP_NULL, - src_entry->vme_start, - src_entry->protection & - ~VM_PROT_WRITE); - } else { - pmap_protect(vm_map_pmap(map), - src_entry->vme_start, - src_entry->vme_end, - src_entry->protection & - ~VM_PROT_WRITE); - } - } + if (start < map->min_offset) + start = map->min_offset; + if (start > map->max_offset) + return(KERN_NO_SPACE); + + /* + * Look for the first possible address; + * if there's already something at this + * address, we have to start after it. + */ - object = src_entry->object.vm_object; - src_entry->needs_copy = FALSE; - } + assert(first_free_is_valid(map)); + if (start == map->min_offset) { + if ((entry = map->first_free) != vm_map_to_entry(map)) + start = entry->vme_end; + } else { + vm_map_entry_t tmp_entry; + if (vm_map_lookup_entry(map, start, &tmp_entry)) + start = tmp_entry->vme_end; + entry = tmp_entry; + } + + /* + * In any case, the "entry" always precedes + * the proposed new region throughout the + * loop: + */ + while (TRUE) { + register vm_map_entry_t next; - vm_object_lock(object); - object->ref_count++; /* object ref. for new entry */ - VM_OBJ_RES_INCR(object); - if (object->copy_strategy == - MEMORY_OBJECT_COPY_SYMMETRIC) { - object->copy_strategy = - MEMORY_OBJECT_COPY_DELAY; - } - vm_object_unlock(object); + /* + * Find the end of the proposed new region. + * Be sure we didn't go beyond the end, or + * wrap around the address. + */ + + end = ((start + mask) & ~mask); + if (end < start) + return(KERN_NO_SPACE); + start = end; + end += size; + + if ((end > map->max_offset) || (end < start)) { + if (map->wait_for_space) { + if (size <= (map->max_offset - + map->min_offset)) { + assert_wait((event_t) map, THREAD_INTERRUPTIBLE); + vm_map_unlock(map); + thread_block(THREAD_CONTINUE_NULL); + vm_map_lock(map); + goto StartAgain; + } } + + return(KERN_NO_SPACE); + } - offset = src_entry->offset + (src_start - src_entry->vme_start); + /* + * If there are no more entries, we must win. + */ - new_entry = _vm_map_entry_create(map_header); - vm_map_entry_copy(new_entry, src_entry); - new_entry->use_pmap = FALSE; /* clr address space specifics */ + next = entry->vme_next; + if (next == vm_map_to_entry(map)) + break; - new_entry->vme_start = map_address; - new_entry->vme_end = map_address + tmp_size; - new_entry->inheritance = inheritance; - new_entry->offset = offset; + /* + * If there is another entry, it must be + * after the end of the potential new region. + */ - /* - * The new region has to be copied now if required. - */ - RestartCopy: - if (!copy) { - src_entry->is_shared = TRUE; - new_entry->is_shared = TRUE; - if (!(new_entry->is_sub_map)) - new_entry->needs_copy = FALSE; + if (next->vme_start >= end) + break; - } else if (src_entry->is_sub_map) { - /* make this a COW sub_map if not already */ - new_entry->needs_copy = TRUE; - } else if (src_entry->wired_count == 0 && - vm_object_copy_quickly(&new_entry->object.vm_object, - new_entry->offset, - (new_entry->vme_end - - new_entry->vme_start), - &src_needs_copy, - &new_entry_needs_copy)) { + /* + * Didn't fit -- move to the next entry. + */ - new_entry->needs_copy = new_entry_needs_copy; - new_entry->is_shared = FALSE; + entry = next; + start = entry->vme_end; + } + *address = start; + } else { + vm_map_entry_t temp_entry; + + /* + * Verify that: + * the address doesn't itself violate + * the mask requirement. + */ - /* - * Handle copy_on_write semantics. - */ - if (src_needs_copy && !src_entry->needs_copy) { - vm_object_pmap_protect(object, - offset, - entry_size, - ((src_entry->is_shared - || map->mapped) ? - PMAP_NULL : map->pmap), - src_entry->vme_start, - src_entry->protection & - ~VM_PROT_WRITE); + if ((start & mask) != 0) + return(KERN_NO_SPACE); - src_entry->needs_copy = TRUE; - } - /* - * Throw away the old object reference of the new entry. - */ - vm_object_deallocate(object); - } else { - new_entry->is_shared = FALSE; + /* + * ... the address is within bounds + */ - /* - * The map can be safely unlocked since we - * already hold a reference on the object. - * - * Record the timestamp of the map for later - * verification, and unlock the map. - */ - version.main_timestamp = map->timestamp; - vm_map_unlock(map); /* Increments timestamp once! */ + end = start + size; - /* - * Perform the copy. - */ - if (src_entry->wired_count > 0) { - vm_object_lock(object); - result = vm_object_copy_slowly( - object, - offset, - entry_size, - THREAD_UNINT, - &new_entry->object.vm_object); + if ((start < map->min_offset) || + (end > map->max_offset) || + (start >= end)) { + return(KERN_INVALID_ADDRESS); + } - new_entry->offset = 0; - new_entry->needs_copy = FALSE; - } else { - result = vm_object_copy_strategically( - object, - offset, - entry_size, - &new_entry->object.vm_object, - &new_entry->offset, - &new_entry_needs_copy); + /* + * ... the starting address isn't allocated + */ + + if (vm_map_lookup_entry(map, start, &temp_entry)) + return(KERN_NO_SPACE); + + entry = temp_entry; + + /* + * ... the next region doesn't overlap the + * end point. + */ - new_entry->needs_copy = new_entry_needs_copy; - } + if ((entry->vme_next != vm_map_to_entry(map)) && + (entry->vme_next->vme_start < end)) + return(KERN_NO_SPACE); + } + *map_entry = entry; + return(KERN_SUCCESS); +} - /* - * Throw away the old object reference of the new entry. - */ - vm_object_deallocate(object); +/* + * vm_map_switch: + * + * Set the address map for the current thread to the specified map + */ - if (result != KERN_SUCCESS && - result != KERN_MEMORY_RESTART_COPY) { - _vm_map_entry_dispose(map_header, new_entry); - break; - } +vm_map_t +vm_map_switch( + vm_map_t map) +{ + int mycpu; + thread_t thread = current_thread(); + vm_map_t oldmap = thread->map; - /* - * Verify that the map has not substantially - * changed while the copy was being made. - */ + mp_disable_preemption(); + mycpu = cpu_number(); - vm_map_lock(map); - if (version.main_timestamp + 1 != map->timestamp) { - /* - * Simple version comparison failed. - * - * Retry the lookup and verify that the - * same object/offset are still present. - */ - vm_object_deallocate(new_entry-> - object.vm_object); - _vm_map_entry_dispose(map_header, new_entry); - if (result == KERN_MEMORY_RESTART_COPY) - result = KERN_SUCCESS; - continue; - } + /* + * Deactivate the current map and activate the requested map + */ + PMAP_SWITCH_USER(thread, map, mycpu); - if (result == KERN_MEMORY_RESTART_COPY) { - vm_object_reference(object); - goto RestartCopy; - } - } + mp_enable_preemption(); + return(oldmap); +} - _vm_map_entry_link(map_header, - map_header->links.prev, new_entry); - *cur_protection &= src_entry->protection; - *max_protection &= src_entry->max_protection; +/* + * Routine: vm_map_write_user + * + * Description: + * Copy out data from a kernel space into space in the + * destination map. The space must already exist in the + * destination map. + * NOTE: This routine should only be called by threads + * which can block on a page fault. i.e. kernel mode user + * threads. + * + */ +kern_return_t +vm_map_write_user( + vm_map_t map, + void *src_p, + vm_map_address_t dst_addr, + vm_size_t size) +{ + kern_return_t kr = KERN_SUCCESS; - map_address += tmp_size; - mapped_size += tmp_size; - src_start += tmp_size; + if(current_map() == map) { + if (copyout(src_p, dst_addr, size)) { + kr = KERN_INVALID_ADDRESS; + } + } else { + vm_map_t oldmap; - } /* end while */ + /* take on the identity of the target map while doing */ + /* the transfer */ - vm_map_unlock(map); - if (result != KERN_SUCCESS) { - /* - * Free all allocated elements. - */ - for (src_entry = map_header->links.next; - src_entry != (struct vm_map_entry *)&map_header->links; - src_entry = new_entry) { - new_entry = src_entry->vme_next; - _vm_map_entry_unlink(map_header, src_entry); - vm_object_deallocate(src_entry->object.vm_object); - _vm_map_entry_dispose(map_header, src_entry); + vm_map_reference(map); + oldmap = vm_map_switch(map); + if (copyout(src_p, dst_addr, size)) { + kr = KERN_INVALID_ADDRESS; } + vm_map_switch(oldmap); + vm_map_deallocate(map); } - return result; + return kr; } /* - * Routine: vm_remap + * Routine: vm_map_read_user + * + * Description: + * Copy in data from a user space source map into the + * kernel map. The space must already exist in the + * kernel map. + * NOTE: This routine should only be called by threads + * which can block on a page fault. i.e. kernel mode user + * threads. * - * Map portion of a task's address space. - * Mapped region must not overlap more than - * one vm memory object. Protections and - * inheritance attributes remain the same - * as in the original task and are out parameters. - * Source and Target task can be identical - * Other attributes are identical as for vm_map() */ kern_return_t -vm_remap( - vm_map_t target_map, - vm_offset_t *address, - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - vm_map_t src_map, - vm_offset_t memory_address, - boolean_t copy, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance) +vm_map_read_user( + vm_map_t map, + vm_map_address_t src_addr, + void *dst_p, + vm_size_t size) { - kern_return_t result; - vm_map_entry_t entry; - vm_map_entry_t insp_entry; - vm_map_entry_t new_entry; - struct vm_map_header map_header; + kern_return_t kr = KERN_SUCCESS; - if (target_map == VM_MAP_NULL) - return KERN_INVALID_ARGUMENT; + if(current_map() == map) { + if (copyin(src_addr, dst_p, size)) { + kr = KERN_INVALID_ADDRESS; + } + } else { + vm_map_t oldmap; - switch (inheritance) { - case VM_INHERIT_NONE: - case VM_INHERIT_COPY: - case VM_INHERIT_SHARE: - if (size != 0 && src_map != VM_MAP_NULL) - break; - /*FALL THRU*/ - default: - return KERN_INVALID_ARGUMENT; + /* take on the identity of the target map while doing */ + /* the transfer */ + + vm_map_reference(map); + oldmap = vm_map_switch(map); + if (copyin(src_addr, dst_p, size)) { + kr = KERN_INVALID_ADDRESS; + } + vm_map_switch(oldmap); + vm_map_deallocate(map); } + return kr; +} + - size = round_page_32(size); +/* + * vm_map_check_protection: + * + * Assert that the target map allows the specified + * privilege on the entire address region given. + * The entire region must be allocated. + */ +boolean_t vm_map_check_protection(map, start, end, protection) + register vm_map_t map; + register vm_map_offset_t start; + register vm_map_offset_t end; + register vm_prot_t protection; +{ + register vm_map_entry_t entry; + vm_map_entry_t tmp_entry; - result = vm_remap_extract(src_map, memory_address, - size, copy, &map_header, - cur_protection, - max_protection, - inheritance, - target_map->hdr. - entries_pageable); + vm_map_lock(map); - if (result != KERN_SUCCESS) { - return result; + if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) + { + vm_map_unlock(map); + return (FALSE); } - /* - * Allocate/check a range of free virtual address - * space for the target - */ - *address = trunc_page_32(*address); - vm_map_lock(target_map); - result = vm_remap_range_allocate(target_map, address, size, - mask, anywhere, &insp_entry); + if (!vm_map_lookup_entry(map, start, &tmp_entry)) { + vm_map_unlock(map); + return(FALSE); + } - for (entry = map_header.links.next; - entry != (struct vm_map_entry *)&map_header.links; - entry = new_entry) { - new_entry = entry->vme_next; - _vm_map_entry_unlink(&map_header, entry); - if (result == KERN_SUCCESS) { - entry->vme_start += *address; - entry->vme_end += *address; - vm_map_entry_link(target_map, insp_entry, entry); - insp_entry = entry; - } else { - if (!entry->is_sub_map) { - vm_object_deallocate(entry->object.vm_object); - } else { - vm_map_deallocate(entry->object.sub_map); - } - _vm_map_entry_dispose(&map_header, entry); + entry = tmp_entry; + + while (start < end) { + if (entry == vm_map_to_entry(map)) { + vm_map_unlock(map); + return(FALSE); } - } - if (result == KERN_SUCCESS) { - target_map->size += size; - SAVE_HINT(target_map, insp_entry); - } - vm_map_unlock(target_map); + /* + * No holes allowed! + */ - if (result == KERN_SUCCESS && target_map->wiring_required) - result = vm_map_wire(target_map, *address, - *address + size, *cur_protection, TRUE); - return result; + if (start < entry->vme_start) { + vm_map_unlock(map); + return(FALSE); + } + + /* + * Check protection associated with entry. + */ + + if ((entry->protection & protection) != protection) { + vm_map_unlock(map); + return(FALSE); + } + + /* go to next entry */ + + start = entry->vme_end; + entry = entry->vme_next; + } + vm_map_unlock(map); + return(TRUE); } -/* - * Routine: vm_remap_range_allocate - * - * Description: - * Allocate a range in the specified virtual address map. - * returns the address and the map entry just before the allocated - * range - * - * Map must be locked. - */ - kern_return_t -vm_remap_range_allocate( - vm_map_t map, - vm_offset_t *address, /* IN/OUT */ - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - vm_map_entry_t *map_entry) /* OUT */ +vm_map_purgable_control( + vm_map_t map, + vm_map_offset_t address, + vm_purgable_t control, + int *state) { - register vm_map_entry_t entry; - register vm_offset_t start; - register vm_offset_t end; - kern_return_t result = KERN_SUCCESS; - - StartAgain: ; - - start = *address; + vm_map_entry_t entry; + vm_object_t object; + kern_return_t kr; - if (anywhere) - { /* - * Calculate the first possible address. + * Vet all the input parameters and current type and state of the + * underlaying object. Return with an error if anything is amiss. */ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); - if (start < map->min_offset) - start = map->min_offset; - if (start > map->max_offset) - return(KERN_NO_SPACE); - - /* - * Look for the first possible address; - * if there's already something at this - * address, we have to start after it. - */ + if (control != VM_PURGABLE_SET_STATE && + control != VM_PURGABLE_GET_STATE) + return(KERN_INVALID_ARGUMENT); - assert(first_free_is_valid(map)); - if (start == map->min_offset) { - if ((entry = map->first_free) != vm_map_to_entry(map)) - start = entry->vme_end; - } else { - vm_map_entry_t tmp_entry; - if (vm_map_lookup_entry(map, start, &tmp_entry)) - start = tmp_entry->vme_end; - entry = tmp_entry; + if (control == VM_PURGABLE_SET_STATE && + (*state < VM_PURGABLE_STATE_MIN || + *state > VM_PURGABLE_STATE_MAX)) + return(KERN_INVALID_ARGUMENT); + + vm_map_lock(map); + + if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { + + /* + * Must pass a valid non-submap address. + */ + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + if ((entry->protection & VM_PROT_WRITE) == 0) { + /* + * Can't apply purgable controls to something you can't write. + */ + vm_map_unlock(map); + return(KERN_PROTECTION_FAILURE); + } + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + /* + * Object must already be present or it can't be purgable. + */ + vm_map_unlock(map); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + + if (entry->offset != 0 || + entry->vme_end - entry->vme_start != object->size) { + /* + * Can only apply purgable controls to the whole (existing) + * object at once. + */ + vm_map_unlock(map); + vm_object_unlock(object); + return KERN_INVALID_ARGUMENT; } - /* - * In any case, the "entry" always precedes - * the proposed new region throughout the - * loop: - */ + vm_map_unlock(map); - while (TRUE) { - register vm_map_entry_t next; + kr = vm_object_purgable_control(object, control, state); - /* - * Find the end of the proposed new region. - * Be sure we didn't go beyond the end, or - * wrap around the address. - */ + vm_object_unlock(object); - end = ((start + mask) & ~mask); - if (end < start) - return(KERN_NO_SPACE); - start = end; - end += size; + return kr; +} - if ((end > map->max_offset) || (end < start)) { - if (map->wait_for_space) { - if (size <= (map->max_offset - - map->min_offset)) { - assert_wait((event_t) map, THREAD_INTERRUPTIBLE); - vm_map_unlock(map); - thread_block((void (*)(void))0); - vm_map_lock(map); - goto StartAgain; - } +kern_return_t +vm_map_page_info( + vm_map_t target_map, + vm_map_offset_t offset, + int *disposition, + int *ref_count) +{ + vm_map_entry_t map_entry; + vm_object_t object; + vm_page_t m; + +restart_page_query: + *disposition = 0; + *ref_count = 0; + vm_map_lock(target_map); + if(!vm_map_lookup_entry(target_map, offset, &map_entry)) { + vm_map_unlock(target_map); + return KERN_FAILURE; + } + offset -= map_entry->vme_start; /* adjust to offset within entry */ + offset += map_entry->offset; /* adjust to target object offset */ + if(map_entry->object.vm_object != VM_OBJECT_NULL) { + if(!map_entry->is_sub_map) { + object = map_entry->object.vm_object; + } else { + vm_map_unlock(target_map); + target_map = map_entry->object.sub_map; + goto restart_page_query; } - - return(KERN_NO_SPACE); - } + } else { + vm_map_unlock(target_map); + return KERN_FAILURE; + } + vm_object_lock(object); + vm_map_unlock(target_map); + while(TRUE) { + m = vm_page_lookup(object, offset); + if (m != VM_PAGE_NULL) { + *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; + break; + } else { + if(object->shadow) { + offset += object->shadow_offset; + vm_object_unlock(object); + object = object->shadow; + vm_object_lock(object); + continue; + } + vm_object_unlock(object); + return KERN_FAILURE; + } + } - /* - * If there are no more entries, we must win. - */ + /* The ref_count is not strictly accurate, it measures the number */ + /* of entities holding a ref on the object, they may not be mapping */ + /* the object or may not be mapping the section holding the */ + /* target page but its still a ball park number and though an over- */ + /* count, it picks up the copy-on-write cases */ - next = entry->vme_next; - if (next == vm_map_to_entry(map)) - break; + /* We could also get a picture of page sharing from pmap_attributes */ + /* but this would under count as only faulted-in mappings would */ + /* show up. */ - /* - * If there is another entry, it must be - * after the end of the potential new region. - */ + *ref_count = object->ref_count; - if (next->vme_start >= end) - break; + if (m->fictitious) { + *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; + vm_object_unlock(object); + return KERN_SUCCESS; + } - /* - * Didn't fit -- move to the next entry. - */ + if (m->dirty) + *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; + else if(pmap_is_modified(m->phys_page)) + *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - entry = next; - start = entry->vme_end; - } - *address = start; - } else { - vm_map_entry_t temp_entry; - - /* - * Verify that: - * the address doesn't itself violate - * the mask requirement. - */ + if (m->reference) + *disposition |= VM_PAGE_QUERY_PAGE_REF; + else if(pmap_is_referenced(m->phys_page)) + *disposition |= VM_PAGE_QUERY_PAGE_REF; - if ((start & mask) != 0) - return(KERN_NO_SPACE); + vm_object_unlock(object); + return KERN_SUCCESS; + +} - /* - * ... the address is within bounds - */ +/* For a given range, check all map entries. If the entry coresponds to */ +/* the old vm_region/map provided on the call, replace it with the */ +/* corresponding range in the new vm_region/map */ +kern_return_t vm_map_region_replace( + vm_map_t target_map, + ipc_port_t old_region, + ipc_port_t new_region, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_named_entry_t old_object; + vm_named_entry_t new_object; + vm_map_t old_submap; + vm_map_t new_submap; + vm_map_offset_t addr; + vm_map_entry_t entry; + int nested_pmap = 0; - end = start + size; - if ((start < map->min_offset) || - (end > map->max_offset) || - (start >= end)) { - return(KERN_INVALID_ADDRESS); + vm_map_lock(target_map); + old_object = (vm_named_entry_t)old_region->ip_kobject; + new_object = (vm_named_entry_t)new_region->ip_kobject; + if((!old_object->is_sub_map) || (!new_object->is_sub_map)) { + vm_map_unlock(target_map); + return KERN_INVALID_ARGUMENT; + } + old_submap = (vm_map_t)old_object->backing.map; + new_submap = (vm_map_t)new_object->backing.map; + vm_map_lock(old_submap); + if((old_submap->min_offset != new_submap->min_offset) || + (old_submap->max_offset != new_submap->max_offset)) { + vm_map_unlock(old_submap); + vm_map_unlock(target_map); + return KERN_INVALID_ARGUMENT; + } + if(!vm_map_lookup_entry(target_map, start, &entry)) { + /* if the src is not contained, the entry preceeds */ + /* our range */ + addr = entry->vme_start; + if(entry == vm_map_to_entry(target_map)) { + vm_map_unlock(old_submap); + vm_map_unlock(target_map); + return KERN_SUCCESS; + } + } + if ((entry->use_pmap) && + (new_submap->pmap == NULL)) { + new_submap->pmap = pmap_create((vm_map_size_t) 0); + if(new_submap->pmap == PMAP_NULL) { + vm_map_unlock(old_submap); + vm_map_unlock(target_map); + return(KERN_NO_SPACE); + } + } + addr = entry->vme_start; + vm_map_reference(old_submap); + while((entry != vm_map_to_entry(target_map)) && + (entry->vme_start < end)) { + if((entry->is_sub_map) && + (entry->object.sub_map == old_submap)) { + if(entry->use_pmap) { + if((start & 0x0fffffff) || + ((end - start) != 0x10000000)) { + vm_map_unlock(old_submap); + vm_map_deallocate(old_submap); + vm_map_unlock(target_map); + return KERN_INVALID_ARGUMENT; + } + nested_pmap = 1; + } + entry->object.sub_map = new_submap; + vm_map_reference(new_submap); + vm_map_deallocate(old_submap); + } + entry = entry->vme_next; + addr = entry->vme_start; + } + if(nested_pmap) { +#ifndef i386 + pmap_unnest(target_map->pmap, (addr64_t)start); + if(target_map->mapped) { + vm_map_submap_pmap_clean(target_map, + start, end, old_submap, 0); + } + pmap_nest(target_map->pmap, new_submap->pmap, + (addr64_t)start, (addr64_t)start, + (uint64_t)(end - start)); +#endif /* i386 */ + } else { + vm_map_submap_pmap_clean(target_map, + start, end, old_submap, 0); } + vm_map_unlock(old_submap); + vm_map_deallocate(old_submap); + vm_map_unlock(target_map); + return KERN_SUCCESS; +} + +/* + * vm_map_msync + * + * Synchronises the memory range specified with its backing store + * image by either flushing or cleaning the contents to the appropriate + * memory manager engaging in a memory object synchronize dialog with + * the manager. The client doesn't return until the manager issues + * m_o_s_completed message. MIG Magically converts user task parameter + * to the task's address map. + * + * interpretation of sync_flags + * VM_SYNC_INVALIDATE - discard pages, only return precious + * pages to manager. + * + * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) + * - discard pages, write dirty or precious + * pages back to memory manager. + * + * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS + * - write dirty or precious pages back to + * the memory manager. + * + * VM_SYNC_CONTIGUOUS - does everything normally, but if there + * is a hole in the region, and we would + * have returned KERN_SUCCESS, return + * KERN_INVALID_ADDRESS instead. + * + * NOTE + * The memory object attributes have not yet been implemented, this + * function will have to deal with the invalidate attribute + * + * RETURNS + * KERN_INVALID_TASK Bad task parameter + * KERN_INVALID_ARGUMENT both sync and async were specified. + * KERN_SUCCESS The usual. + * KERN_INVALID_ADDRESS There was a hole in the region. + */ + +kern_return_t +vm_map_msync( + vm_map_t map, + vm_map_address_t address, + vm_map_size_t size, + vm_sync_t sync_flags) +{ + msync_req_t msr; + msync_req_t new_msr; + queue_chain_t req_q; /* queue of requests for this msync */ + vm_map_entry_t entry; + vm_map_size_t amount_left; + vm_object_offset_t offset; + boolean_t do_sync_req; + boolean_t modifiable; + boolean_t had_hole = FALSE; + + if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && + (sync_flags & VM_SYNC_SYNCHRONOUS)) + return(KERN_INVALID_ARGUMENT); /* - * ... the starting address isn't allocated + * align address and size on page boundaries */ + size = vm_map_round_page(address + size) - vm_map_trunc_page(address); + address = vm_map_trunc_page(address); - if (vm_map_lookup_entry(map, start, &temp_entry)) - return(KERN_NO_SPACE); + if (map == VM_MAP_NULL) + return(KERN_INVALID_TASK); - entry = temp_entry; + if (size == 0) + return(KERN_SUCCESS); - /* - * ... the next region doesn't overlap the - * end point. - */ + queue_init(&req_q); + amount_left = size; - if ((entry->vme_next != vm_map_to_entry(map)) && - (entry->vme_next->vme_start < end)) - return(KERN_NO_SPACE); - } - *map_entry = entry; - return(KERN_SUCCESS); -} + while (amount_left > 0) { + vm_object_size_t flush_size; + vm_object_t object; -/* - * vm_map_switch: - * - * Set the address map for the current thr_act to the specified map - */ + vm_map_lock(map); + if (!vm_map_lookup_entry(map, + vm_map_trunc_page(address), &entry)) { + + vm_size_t skip; + + /* + * hole in the address map. + */ + had_hole = TRUE; + + /* + * Check for empty map. + */ + if (entry == vm_map_to_entry(map) && + entry->vme_next == entry) { + vm_map_unlock(map); + break; + } + /* + * Check that we don't wrap and that + * we have at least one real map entry. + */ + if ((map->hdr.nentries == 0) || + (entry->vme_next->vme_start < address)) { + vm_map_unlock(map); + break; + } + /* + * Move up to the next entry if needed + */ + skip = (entry->vme_next->vme_start - address); + if (skip >= amount_left) + amount_left = 0; + else + amount_left -= skip; + address = entry->vme_next->vme_start; + vm_map_unlock(map); + continue; + } -vm_map_t -vm_map_switch( - vm_map_t map) -{ - int mycpu; - thread_act_t thr_act = current_act(); - vm_map_t oldmap = thr_act->map; + offset = address - entry->vme_start; - mp_disable_preemption(); - mycpu = cpu_number(); + /* + * do we have more to flush than is contained in this + * entry ? + */ + if (amount_left + entry->vme_start + offset > entry->vme_end) { + flush_size = entry->vme_end - + (entry->vme_start + offset); + } else { + flush_size = amount_left; + } + amount_left -= flush_size; + address += flush_size; - /* - * Deactivate the current map and activate the requested map - */ - PMAP_SWITCH_USER(thr_act, map, mycpu); + if (entry->is_sub_map == TRUE) { + vm_map_t local_map; + vm_map_offset_t local_offset; - mp_enable_preemption(); - return(oldmap); -} + local_map = entry->object.sub_map; + local_offset = entry->offset; + vm_map_unlock(map); + if (vm_map_msync( + local_map, + local_offset, + flush_size, + sync_flags) == KERN_INVALID_ADDRESS) { + had_hole = TRUE; + } + continue; + } + object = entry->object.vm_object; + /* + * We can't sync this object if the object has not been + * created yet + */ + if (object == VM_OBJECT_NULL) { + vm_map_unlock(map); + continue; + } + offset += entry->offset; + modifiable = (entry->protection & VM_PROT_WRITE) + != VM_PROT_NONE; -/* - * Routine: vm_map_write_user - * - * Description: - * Copy out data from a kernel space into space in the - * destination map. The space must already exist in the - * destination map. - * NOTE: This routine should only be called by threads - * which can block on a page fault. i.e. kernel mode user - * threads. - * - */ -kern_return_t -vm_map_write_user( - vm_map_t map, - vm_offset_t src_addr, - vm_offset_t dst_addr, - vm_size_t size) -{ - thread_act_t thr_act = current_act(); - kern_return_t kr = KERN_SUCCESS; + vm_object_lock(object); - if(thr_act->map == map) { - if (copyout((char *)src_addr, (char *)dst_addr, size)) { - kr = KERN_INVALID_ADDRESS; + if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { + boolean_t kill_pages = 0; + + if (sync_flags & VM_SYNC_KILLPAGES) { + if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) + kill_pages = 1; + else + kill_pages = -1; + } + if (kill_pages != -1) + vm_object_deactivate_pages(object, offset, + (vm_object_size_t)flush_size, kill_pages); + vm_object_unlock(object); + vm_map_unlock(map); + continue; } - } else { - vm_map_t oldmap; + /* + * We can't sync this object if there isn't a pager. + * Don't bother to sync internal objects, since there can't + * be any "permanent" storage for these objects anyway. + */ + if ((object->pager == MEMORY_OBJECT_NULL) || + (object->internal) || (object->private)) { + vm_object_unlock(object); + vm_map_unlock(map); + continue; + } + /* + * keep reference on the object until syncing is done + */ + assert(object->ref_count > 0); + object->ref_count++; + vm_object_res_reference(object); + vm_object_unlock(object); - /* take on the identity of the target map while doing */ - /* the transfer */ + vm_map_unlock(map); - vm_map_reference(map); - oldmap = vm_map_switch(map); - if (copyout((char *)src_addr, (char *)dst_addr, size)) { - kr = KERN_INVALID_ADDRESS; + do_sync_req = vm_object_sync(object, + offset, + flush_size, + sync_flags & VM_SYNC_INVALIDATE, + (modifiable && + (sync_flags & VM_SYNC_SYNCHRONOUS || + sync_flags & VM_SYNC_ASYNCHRONOUS)), + sync_flags & VM_SYNC_SYNCHRONOUS); + /* + * only send a m_o_s if we returned pages or if the entry + * is writable (ie dirty pages may have already been sent back) + */ + if (!do_sync_req && !modifiable) { + vm_object_deallocate(object); + continue; } - vm_map_switch(oldmap); - vm_map_deallocate(map); - } - return kr; -} + msync_req_alloc(new_msr); -/* - * Routine: vm_map_read_user - * - * Description: - * Copy in data from a user space source map into the - * kernel map. The space must already exist in the - * kernel map. - * NOTE: This routine should only be called by threads - * which can block on a page fault. i.e. kernel mode user - * threads. - * - */ -kern_return_t -vm_map_read_user( - vm_map_t map, - vm_offset_t src_addr, - vm_offset_t dst_addr, - vm_size_t size) -{ - thread_act_t thr_act = current_act(); - kern_return_t kr = KERN_SUCCESS; + vm_object_lock(object); + offset += object->paging_offset; - if(thr_act->map == map) { - if (copyin((char *)src_addr, (char *)dst_addr, size)) { - kr = KERN_INVALID_ADDRESS; - } - } else { - vm_map_t oldmap; + new_msr->offset = offset; + new_msr->length = flush_size; + new_msr->object = object; + new_msr->flag = VM_MSYNC_SYNCHRONIZING; +re_iterate: + queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { + /* + * need to check for overlapping entry, if found, wait + * on overlapping msr to be done, then reiterate + */ + msr_lock(msr); + if (msr->flag == VM_MSYNC_SYNCHRONIZING && + ((offset >= msr->offset && + offset < (msr->offset + msr->length)) || + (msr->offset >= offset && + msr->offset < (offset + flush_size)))) + { + assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); + msr_unlock(msr); + vm_object_unlock(object); + thread_block(THREAD_CONTINUE_NULL); + vm_object_lock(object); + goto re_iterate; + } + msr_unlock(msr); + }/* queue_iterate */ - /* take on the identity of the target map while doing */ - /* the transfer */ + queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); + vm_object_unlock(object); - vm_map_reference(map); - oldmap = vm_map_switch(map); - if (copyin((char *)src_addr, (char *)dst_addr, size)) { - kr = KERN_INVALID_ADDRESS; - } - vm_map_switch(oldmap); - vm_map_deallocate(map); - } - return kr; -} + queue_enter(&req_q, new_msr, msync_req_t, req_q); + + (void) memory_object_synchronize( + object->pager, + offset, + flush_size, + sync_flags & ~VM_SYNC_CONTIGUOUS); + }/* while */ + + /* + * wait for memory_object_sychronize_completed messages from pager(s) + */ + + while (!queue_empty(&req_q)) { + msr = (msync_req_t)queue_first(&req_q); + msr_lock(msr); + while(msr->flag != VM_MSYNC_DONE) { + assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); + msr_unlock(msr); + thread_block(THREAD_CONTINUE_NULL); + msr_lock(msr); + }/* while */ + queue_remove(&req_q, msr, msync_req_t, req_q); + msr_unlock(msr); + vm_object_deallocate(msr->object); + msync_req_free(msr); + }/* queue_iterate */ + + /* for proper msync() behaviour */ + if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) + return(KERN_INVALID_ADDRESS); + + return(KERN_SUCCESS); +}/* vm_msync */ /* Takes existing source and destination sub-maps and clones the contents of */ /* the source map */ - kern_return_t vm_region_clone( ipc_port_t src_region, @@ -9142,8 +10141,8 @@ vm_region_clone( vm_named_entry_t dst_object; vm_map_t src_map; vm_map_t dst_map; - vm_offset_t addr; - vm_offset_t max_off; + vm_map_offset_t addr; + vm_map_offset_t max_off; vm_map_entry_t entry; vm_map_entry_t new_entry; vm_map_entry_t insert_point; @@ -9201,89 +10200,126 @@ vm_region_clone( } /* - * Export routines to other components for the things we access locally through - * macros. - */ -#undef current_map -vm_map_t -current_map(void) -{ - return (current_map_fast()); -} - -/* - * vm_map_check_protection: - * - * Assert that the target map allows the specified - * privilege on the entire address region given. - * The entire region must be allocated. + * Routine: convert_port_entry_to_map + * Purpose: + * Convert from a port specifying an entry or a task + * to a map. Doesn't consume the port ref; produces a map ref, + * which may be null. Unlike convert_port_to_map, the + * port may be task or a named entry backed. + * Conditions: + * Nothing locked. */ -boolean_t vm_map_check_protection(map, start, end, protection) - register vm_map_t map; - register vm_offset_t start; - register vm_offset_t end; - register vm_prot_t protection; -{ - register vm_map_entry_t entry; - vm_map_entry_t tmp_entry; - vm_map_lock(map); - if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) - { - vm_map_unlock(map); - return (FALSE); - } +vm_map_t +convert_port_entry_to_map( + ipc_port_t port) +{ + vm_map_t map; + vm_named_entry_t named_entry; - if (!vm_map_lookup_entry(map, start, &tmp_entry)) { - vm_map_unlock(map); - return(FALSE); + if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + while(TRUE) { + ip_lock(port); + if(ip_active(port) && (ip_kotype(port) + == IKOT_NAMED_ENTRY)) { + named_entry = + (vm_named_entry_t)port->ip_kobject; + if (!(mutex_try(&(named_entry)->Lock))) { + ip_unlock(port); + mutex_pause(); + continue; + } + named_entry->ref_count++; + mutex_unlock(&(named_entry)->Lock); + ip_unlock(port); + if ((named_entry->is_sub_map) && + (named_entry->protection + & VM_PROT_WRITE)) { + map = named_entry->backing.map; + } else { + mach_destroy_memory_entry(port); + return VM_MAP_NULL; + } + vm_map_reference_swap(map); + mach_destroy_memory_entry(port); + break; + } + else + return VM_MAP_NULL; + } } + else + map = convert_port_to_map(port); - entry = tmp_entry; - - while (start < end) { - if (entry == vm_map_to_entry(map)) { - vm_map_unlock(map); - return(FALSE); - } + return map; +} - /* - * No holes allowed! - */ +/* + * Routine: convert_port_entry_to_object + * Purpose: + * Convert from a port specifying a named entry to an + * object. Doesn't consume the port ref; produces a map ref, + * which may be null. + * Conditions: + * Nothing locked. + */ - if (start < entry->vme_start) { - vm_map_unlock(map); - return(FALSE); - } - /* - * Check protection associated with entry. - */ +vm_object_t +convert_port_entry_to_object( + ipc_port_t port) +{ + vm_object_t object; + vm_named_entry_t named_entry; - if ((entry->protection & protection) != protection) { - vm_map_unlock(map); - return(FALSE); + if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + while(TRUE) { + ip_lock(port); + if(ip_active(port) && (ip_kotype(port) + == IKOT_NAMED_ENTRY)) { + named_entry = + (vm_named_entry_t)port->ip_kobject; + if (!(mutex_try(&(named_entry)->Lock))) { + ip_unlock(port); + mutex_pause(); + continue; + } + named_entry->ref_count++; + mutex_unlock(&(named_entry)->Lock); + ip_unlock(port); + if ((!named_entry->is_sub_map) && + (!named_entry->is_pager) && + (named_entry->protection + & VM_PROT_WRITE)) { + object = named_entry->backing.object; + } else { + mach_destroy_memory_entry(port); + return (vm_object_t)NULL; + } + vm_object_reference(named_entry->backing.object); + mach_destroy_memory_entry(port); + break; + } + else + return (vm_object_t)NULL; } - - /* go to next entry */ - - start = entry->vme_end; - entry = entry->vme_next; + } else { + return (vm_object_t)NULL; } - vm_map_unlock(map); - return(TRUE); + + return object; } /* - * This routine is obsolete, but included for backward - * compatibility for older drivers. + * Export routines to other components for the things we access locally through + * macros. */ -void -kernel_vm_map_reference( - vm_map_t map) +#undef current_map +vm_map_t +current_map(void) { - vm_map_reference(map); + return (current_map_fast()); } /* @@ -9348,3 +10384,92 @@ vm_map_deallocate( vm_map_destroy(map); } + +#ifdef __PPC__ + +/* LP64todo - this whole mechanism is temporary. It should be redone when + * the pmap layer can handle 64-bit address spaces. Until then, we trump + * up a map entry for the 64-bit commpage above the map's max_offset. + */ +extern vm_map_t com_region_map64; /* the submap for 64-bit commpage */ +SInt32 commpage64s_in_use = 0; + +void +vm_map_commpage64( + vm_map_t map ) +{ + vm_map_entry_t entry; + vm_object_t object; + + vm_map_lock(map); + + /* The commpage is necessarily the last entry in the map. + * See if one is already there (not sure if this can happen???) + */ + entry = vm_map_last_entry(map); + if (entry != vm_map_to_entry(map)) { + if (entry->vme_end >= (vm_map_offset_t)_COMM_PAGE_BASE_ADDRESS) { + vm_map_unlock(map); + return; + } + } + + entry = vm_map_first_entry(com_region_map64); /* the 64-bit commpage */ + object = entry->object.vm_object; + vm_object_reference(object); + + /* We bypass vm_map_enter() because we are adding the entry past the + * map's max_offset. + */ + entry = vm_map_entry_insert( + map, + vm_map_last_entry(map), /* insert after last entry */ + _COMM_PAGE_BASE_ADDRESS, + _COMM_PAGE_BASE_ADDRESS+_COMM_PAGE_AREA_USED, + object, + 0, /* offset */ + FALSE, /* needs_copy */ + FALSE, /* is_shared */ + FALSE, /* in_transition */ + VM_PROT_READ, + VM_PROT_READ, + VM_BEHAVIOR_DEFAULT, + VM_INHERIT_NONE, + 1 ); /* wired_count */ + + vm_map_unlock(map); + + OSIncrementAtomic(&commpage64s_in_use); +} + + +/* LP64todo - remove this! */ + +void +vm_map_remove_commpage64( + vm_map_t map ) +{ + vm_map_entry_t entry; + int deleted = 0; + + while( 1 ) { + vm_map_lock(map); + + entry = vm_map_last_entry(map); + if ((entry == vm_map_to_entry(map)) || + (entry->vme_start < (vm_map_offset_t)_COMM_PAGE_BASE_ADDRESS)) + break; + + /* clearing the wired count isn't strictly correct */ + entry->wired_count = 0; + vm_map_entry_delete(map,entry); + deleted++; + } + + vm_map_unlock(map); + + if (deleted != 0) + OSDecrementAtomic(&commpage64s_in_use); +} + +#endif /* __PPC__ */ diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index ffbccd036..1e3f2d3f4 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -71,24 +71,22 @@ #include #include #include +#include #include -#include +#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_PRIVATE +#include -#ifndef MACH_KERNEL_PRIVATE - -#ifdef __APPLE_API_OBSOLETE -extern void kernel_vm_map_reference(vm_map_t map); -#endif /* __APPLE_API_OBSOLETE */ +__BEGIN_DECLS extern void vm_map_reference(vm_map_t map); extern vm_map_t current_map(void); -#else /* MACH_KERNEL_PRIVATE */ +__END_DECLS + +#ifdef MACH_KERNEL_PRIVATE -#include #include #include @@ -98,9 +96,9 @@ extern vm_map_t current_map(void); #include #include -#include +#include -#define current_map_fast() (current_act_fast()->map) +#define current_map_fast() (current_thread()->map) #define current_map() (current_map_fast()) /* @@ -113,6 +111,7 @@ extern vm_map_t current_map(void); * used for inter-map copy operations */ typedef struct vm_map_entry *vm_map_entry_t; +#define VM_MAP_ENTRY_NULL ((vm_map_entry_t) 0) /* @@ -123,11 +122,11 @@ typedef struct vm_map_entry *vm_map_entry_t; * memory object or a sub map (of the kernel map). */ typedef union vm_map_object { - struct vm_object *vm_object; /* object object */ - struct vm_map *sub_map; /* belongs to another map */ + vm_object_t vm_object; /* object object */ + vm_map_t sub_map; /* belongs to another map */ } vm_map_object_t; -#define named_entry_lock_init(object) mutex_init(&(object)->Lock, ETAP_VM_OBJ) +#define named_entry_lock_init(object) mutex_init(&(object)->Lock, 0) #define named_entry_lock(object) mutex_lock(&(object)->Lock) #define named_entry_unlock(object) mutex_unlock(&(object)->Lock) @@ -151,18 +150,19 @@ typedef union vm_map_object { struct vm_named_entry { decl_mutex_data(, Lock) /* Synchronization */ - vm_object_t object; /* object I point to */ - vm_object_offset_t offset; /* offset into object */ union { - memory_object_t pager; /* amo pager port */ - vm_map_t map; /* map backing submap */ + vm_object_t object; /* object I point to */ + memory_object_t pager; /* amo pager port */ + vm_map_t map; /* map backing submap */ } backing; - unsigned int size; /* size of region */ - unsigned int protection; /* access permissions */ + vm_object_offset_t offset; /* offset into object */ + vm_object_size_t size; /* size of region */ + vm_prot_t protection; /* access permissions */ int ref_count; /* Number of references */ - unsigned int - /* boolean_t */ internal:1, /* is an internal object */ - /* boolean_t */ is_sub_map:1; /* is object is a submap? */ + unsigned int /* Is backing.xxx : */ + /* boolean_t */ internal:1, /* ... an internal object */ + /* boolean_t */ is_sub_map:1, /* ... a submap? */ + /* boolean_t */ is_pager:1; /* ... a pager port */ }; /* @@ -181,8 +181,8 @@ struct vm_named_entry { struct vm_map_links { struct vm_map_entry *prev; /* previous entry */ struct vm_map_entry *next; /* next entry */ - vm_offset_t start; /* start address */ - vm_offset_t end; /* end address */ + vm_map_offset_t start; /* start address */ + vm_map_offset_t end; /* end address */ }; struct vm_map_entry { @@ -205,8 +205,9 @@ struct vm_map_entry { /* vm_prot_t */ protection:3, /* protection code */ /* vm_prot_t */ max_protection:3,/* maximum protection */ /* vm_inherit_t */ inheritance:2, /* inheritance */ - /* nested pmap */ use_pmap:1, /* nested pmaps */ - /* user alias */ alias:8; + /* boolean_t */ use_pmap:1, /* nested pmaps */ + /* unsigned char */ alias:8, /* user alias */ + /* unsigned char */ pad:8; /* available bits */ unsigned short wired_count; /* can be paged if = 0 */ unsigned short user_wired_count; /* for vm_wire */ }; @@ -253,7 +254,7 @@ struct vm_map { #define min_offset hdr.links.start /* start of range */ #define max_offset hdr.links.end /* end of range */ pmap_t pmap; /* Physical map */ - vm_size_t size; /* virtual size */ + vm_map_size_t size; /* virtual size */ int ref_count; /* Reference count */ #if TASK_SWAPPER int res_count; /* Residence count (swap) */ @@ -322,52 +323,14 @@ typedef struct vm_map_version { * entry onto which the other entries that represent * the region are chained. * - * The second format is a single vm object. This is used - * primarily in the pageout path. The third format is a - * list of vm pages. An optional continuation provides - * a hook to be called to obtain more of the memory, - * or perform other operations. The continuation takes 3 - * arguments, a saved arg buffer, a pointer to a new vm_map_copy - * (returned) and an abort flag (abort if TRUE). - */ - -#define VM_MAP_COPY_PAGE_LIST_MAX 20 -#define VM_MAP_COPY_PAGE_LIST_MAX_SIZE (VM_MAP_COPY_PAGE_LIST_MAX * PAGE_SIZE) - - -/* - * Options for vm_map_copyin_page_list. - */ - -#define VM_MAP_COPYIN_OPT_VM_PROT 0x7 -#define VM_MAP_COPYIN_OPT_SRC_DESTROY 0x8 -#define VM_MAP_COPYIN_OPT_STEAL_PAGES 0x10 -#define VM_MAP_COPYIN_OPT_PMAP_ENTER 0x20 -#define VM_MAP_COPYIN_OPT_NO_ZERO_FILL 0x40 - -/* - * Continuation structures for vm_map_copyin_page_list. - */ -typedef struct { - vm_map_t map; - vm_offset_t src_addr; - vm_size_t src_len; - vm_offset_t destroy_addr; - vm_size_t destroy_len; - int options; -} vm_map_copyin_args_data_t, *vm_map_copyin_args_t; - -#define VM_MAP_COPYIN_ARGS_NULL ((vm_map_copyin_args_t) 0) - - -/* vm_map_copy_cont_t is a type definition/prototype - * for the cont function pointer in vm_map_copy structure. + * The second format is a single vm object. This was used + * primarily in the pageout path - but is not currently used + * except for placeholder copy objects (see vm_map_copy_copy()). + * + * The third format is a kernel buffer copy object - for data + * small enough that physical copies were the most efficient + * method. */ -typedef kern_return_t (*vm_map_copy_cont_t)( - vm_map_copyin_args_t, - vm_map_copy_t *); - -#define VM_MAP_COPY_CONT_NULL ((vm_map_copy_cont_t) 0) struct vm_map_copy { int type; @@ -375,20 +338,12 @@ struct vm_map_copy { #define VM_MAP_COPY_OBJECT 2 #define VM_MAP_COPY_KERNEL_BUFFER 3 vm_object_offset_t offset; - vm_size_t size; + vm_map_size_t size; union { struct vm_map_header hdr; /* ENTRY_LIST */ - struct { /* OBJECT */ - vm_object_t object; - vm_size_t index; /* record progress as pages - * are moved from object to - * page list; must be zero - * when first invoking - * vm_map_object_to_page_list - */ - } c_o; - struct { /* KERNEL_BUFFER */ - vm_offset_t kdata; + vm_object_t object; /* OBJECT */ + struct { + void *kdata; /* KERNEL_BUFFER */ vm_size_t kalloc_size; /* size of this copy_t */ } c_k; } c_u; @@ -397,8 +352,7 @@ struct vm_map_copy { #define cpy_hdr c_u.hdr -#define cpy_object c_u.c_o.object -#define cpy_index c_u.c_o.index +#define cpy_object c_u.object #define cpy_kdata c_u.c_k.kdata #define cpy_kalloc_size c_u.c_k.kalloc_size @@ -425,7 +379,7 @@ struct vm_map_copy { #define vm_map_lock_init(map) \ ((map)->timestamp = 0 , \ - lock_init(&(map)->lock, TRUE, ETAP_VM_MAP, ETAP_VM_MAP_I)) + lock_init(&(map)->lock, TRUE, 0, 0)) #define vm_map_lock(map) lock_write(&(map)->lock) #define vm_map_unlock(map) \ @@ -436,8 +390,6 @@ struct vm_map_copy { ((map)->timestamp++ , lock_write_to_read(&(map)->lock)) #define vm_map_lock_read_to_write(map) lock_read_to_write(&(map)->lock) -extern zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ - /* * Exported procedures that operate on vm_map_t. */ @@ -448,50 +400,44 @@ extern void vm_map_init(void); /* Allocate a range in the specified virtual address map and * return the entry allocated for that range. */ extern kern_return_t vm_map_find_space( - vm_map_t map, - vm_offset_t *address, /* OUT */ - vm_size_t size, - vm_offset_t mask, - vm_map_entry_t *o_entry); /* OUT */ + vm_map_t map, + vm_map_address_t *address, /* OUT */ + vm_map_size_t size, + vm_map_offset_t mask, + vm_map_entry_t *o_entry); /* OUT */ /* Lookup map entry containing or the specified address in the given map */ extern boolean_t vm_map_lookup_entry( - vm_map_t map, - vm_offset_t address, - vm_map_entry_t *entry); /* OUT */ + vm_map_t map, + vm_map_address_t address, + vm_map_entry_t *entry); /* OUT */ /* Find the VM object, offset, and protection for a given virtual address * in the specified map, assuming a page fault of the type specified. */ extern kern_return_t vm_map_lookup_locked( - vm_map_t *var_map, /* IN/OUT */ - vm_offset_t vaddr, - vm_prot_t fault_type, - vm_map_version_t *out_version, /* OUT */ - vm_object_t *object, /* OUT */ - vm_object_offset_t *offset, /* OUT */ - vm_prot_t *out_prot, /* OUT */ - boolean_t *wired, /* OUT */ - int *behavior, /* OUT */ - vm_object_offset_t *lo_offset, /* OUT */ - vm_object_offset_t *hi_offset, /* OUT */ - vm_map_t *pmap_map); /* OUT */ + vm_map_t *var_map, /* IN/OUT */ + vm_map_address_t vaddr, + vm_prot_t fault_type, + vm_map_version_t *out_version, /* OUT */ + vm_object_t *object, /* OUT */ + vm_object_offset_t *offset, /* OUT */ + vm_prot_t *out_prot, /* OUT */ + boolean_t *wired, /* OUT */ + int *behavior, /* OUT */ + vm_map_offset_t *lo_offset, /* OUT */ + vm_map_offset_t *hi_offset, /* OUT */ + vm_map_t *real_map); /* OUT */ /* Verifies that the map has not changed since the given version. */ extern boolean_t vm_map_verify( - vm_map_t map, - vm_map_version_t *version); /* REF */ - -/* Split a vm_map_entry into 2 entries */ -extern void _vm_map_clip_start( - struct vm_map_header *map_header, - vm_map_entry_t entry, - vm_offset_t start); + vm_map_t map, + vm_map_version_t *version); /* REF */ extern vm_map_entry_t vm_map_entry_insert( vm_map_t map, vm_map_entry_t insp_entry, - vm_offset_t start, - vm_offset_t end, + vm_map_offset_t start, + vm_map_offset_t end, vm_object_t object, vm_object_offset_t offset, boolean_t needs_copy, @@ -503,52 +449,15 @@ extern vm_map_entry_t vm_map_entry_insert( vm_inherit_t inheritance, unsigned wired_count); -extern kern_return_t vm_remap_extract( - vm_map_t map, - vm_offset_t addr, - vm_size_t size, - boolean_t copy, - struct vm_map_header *map_header, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance, - boolean_t pageable); - -extern kern_return_t vm_remap_range_allocate( - vm_map_t map, - vm_offset_t *address, - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - vm_map_entry_t *map_entry); - -extern kern_return_t vm_remap_extract( - vm_map_t map, - vm_offset_t addr, - vm_size_t size, - boolean_t copy, - struct vm_map_header *map_header, - vm_prot_t *cur_protection, - vm_prot_t *max_protection, - vm_inherit_t inheritance, - boolean_t pageable); - -extern kern_return_t vm_remap_range_allocate( - vm_map_t map, - vm_offset_t *address, - vm_size_t size, - vm_offset_t mask, - boolean_t anywhere, - vm_map_entry_t *map_entry); /* * Functions implemented as macros */ -#define vm_map_min(map) ((map)->min_offset) +#define vm_map_min(map) ((map)->min_offset) /* Lowest valid address in * a map */ -#define vm_map_max(map) ((map)->max_offset) +#define vm_map_max(map) ((map)->max_offset) /* Highest valid address */ #define vm_map_pmap(map) ((map)->pmap) @@ -581,9 +490,9 @@ extern void vm_map_reference_swap( #else /* MACH_ASSERT */ -#define vm_map_reference(map) \ +#define vm_map_reference(map) \ MACRO_BEGIN \ - vm_map_t Map = (map); \ + vm_map_t Map = (map); \ if (Map) { \ mutex_lock(&Map->s_lock); \ Map->res_count++; \ @@ -594,9 +503,9 @@ MACRO_END #define vm_map_res_reference(map) \ MACRO_BEGIN \ - vm_map_t Lmap = (map); \ + vm_map_t Lmap = (map); \ if (Lmap->res_count == 0) { \ - mutex_unlock(&Lmap->s_lock); \ + mutex_unlock(&Lmap->s_lock);\ vm_map_lock(Lmap); \ vm_map_swapin(Lmap); \ mutex_lock(&Lmap->s_lock); \ @@ -608,8 +517,8 @@ MACRO_END #define vm_map_res_deallocate(map) \ MACRO_BEGIN \ - vm_map_t Map = (map); \ - if (--Map->res_count == 0) { \ + vm_map_t Map = (map); \ + if (--Map->res_count == 0) { \ mutex_unlock(&Map->s_lock); \ vm_map_lock(Map); \ vm_map_swapout(Map); \ @@ -667,10 +576,11 @@ extern vm_object_t vm_submap_object; &(map)->lock, interruptible)) -#define vm_map_entry_wakeup(map) thread_wakeup((event_t)(&(map)->hdr)) +#define vm_map_entry_wakeup(map) \ + thread_wakeup((event_t)(&(map)->hdr)) -#define vm_map_ref_fast(map) \ +#define vm_map_ref_fast(map) \ MACRO_BEGIN \ mutex_lock(&map->s_lock); \ map->ref_count++; \ @@ -678,12 +588,12 @@ extern vm_object_t vm_submap_object; mutex_unlock(&map->s_lock); \ MACRO_END -#define vm_map_dealloc_fast(map) \ +#define vm_map_dealloc_fast(map) \ MACRO_BEGIN \ - register int c; \ + register int c; \ \ mutex_lock(&map->s_lock); \ - c = --map->ref_count; \ + c = --map->ref_count; \ if (c > 0) \ vm_map_res_deallocate(map); \ mutex_unlock(&map->s_lock); \ @@ -697,27 +607,12 @@ extern void vm_map_simplify_entry( vm_map_t map, vm_map_entry_t this_entry); extern void vm_map_simplify( - vm_map_t map, - vm_offset_t start); - -/* Steal all the pages from a vm_map_copy page_list */ -extern void vm_map_copy_steal_pages( - vm_map_copy_t copy); - -/* Discard a copy without using it */ -extern void vm_map_copy_discard( - vm_map_copy_t copy); + vm_map_t map, + vm_map_offset_t start); /* Move the information in a map copy object to a new map copy object */ extern vm_map_copy_t vm_map_copy_copy( - vm_map_copy_t copy); - -/* Overwrite existing memory with a copy */ -extern kern_return_t vm_map_copy_overwrite( - vm_map_t dst_map, - vm_offset_t dst_addr, - vm_map_copy_t copy, - int interruptible); + vm_map_copy_t copy); /* Create a copy object from an object. */ extern kern_return_t vm_map_copyin_object( @@ -726,22 +621,12 @@ extern kern_return_t vm_map_copyin_object( vm_object_size_t size, vm_map_copy_t *copy_result); /* OUT */ -extern vm_map_t vm_map_switch( - vm_map_t map); - -extern int vm_map_copy_cont_is_valid( - vm_map_copy_t copy); - - -#define VM_MAP_ENTRY_NULL ((vm_map_entry_t) 0) - - /* Enter a mapping */ extern kern_return_t vm_map_enter( vm_map_t map, - vm_offset_t *address, - vm_size_t size, - vm_offset_t mask, + vm_map_offset_t *address, + vm_map_size_t size, + vm_map_offset_t mask, int flags, vm_object_t object, vm_object_offset_t offset, @@ -750,129 +635,210 @@ extern kern_return_t vm_map_enter( vm_prot_t max_protection, vm_inherit_t inheritance); +/* XXX should go away - replaced with regular enter of contig object */ +extern kern_return_t vm_map_enter_cpm( + vm_map_t map, + vm_map_address_t *addr, + vm_map_size_t size, + int flags); + +extern kern_return_t vm_map_remap( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + boolean_t anywhere, + vm_map_t src_map, + vm_map_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance); + + +/* + * Read and write from a kernel buffer to a specified map. + */ extern kern_return_t vm_map_write_user( - vm_map_t map, - vm_offset_t src_addr, - vm_offset_t dst_addr, - vm_size_t size); + vm_map_t map, + void *src_p, + vm_map_offset_t dst_addr, + vm_size_t size); extern kern_return_t vm_map_read_user( - vm_map_t map, - vm_offset_t src_addr, - vm_offset_t dst_addr, - vm_size_t size); + vm_map_t map, + vm_map_offset_t src_addr, + void *dst_p, + vm_size_t size); /* Create a new task map using an existing task map as a template. */ extern vm_map_t vm_map_fork( - vm_map_t old_map); + vm_map_t old_map); /* Change inheritance */ extern kern_return_t vm_map_inherit( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_inherit_t new_inheritance); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_inherit_t new_inheritance); /* Add or remove machine-dependent attributes from map regions */ extern kern_return_t vm_map_machine_attribute( - vm_map_t map, - vm_offset_t address, - vm_size_t size, + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value); /* IN/OUT */ + +extern kern_return_t vm_map_msync( + vm_map_t map, + vm_map_address_t address, + vm_map_size_t size, + vm_sync_t sync_flags); + /* Set paging behavior */ extern kern_return_t vm_map_behavior_set( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_behavior_t new_behavior); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_behavior_t new_behavior); + +extern kern_return_t vm_map_purgable_control( + vm_map_t map, + vm_map_offset_t address, + vm_purgable_t control, + int *state); + +extern kern_return_t vm_map_region( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t *size, + vm_region_flavor_t flavor, + vm_region_info_t info, + mach_msg_type_number_t *count, + mach_port_t *object_name); + +extern kern_return_t vm_map_region_recurse_64( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t *size, + natural_t *nesting_depth, + vm_region_submap_info_64_t info, + mach_msg_type_number_t *count); + +extern kern_return_t vm_map_page_info( + vm_map_t map, + vm_map_offset_t offset, + int *disposition, + int *ref_count); extern kern_return_t vm_map_submap( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_map_t submap, - vm_offset_t offset, - boolean_t use_pmap); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_map_t submap, + vm_map_offset_t offset, + boolean_t use_pmap); + +extern void vm_map_submap_pmap_clean( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_map_t sub_map, + vm_map_offset_t offset); + +/* Convert from a map entry port to a map */ +extern vm_map_t convert_port_entry_to_map( + ipc_port_t port); + +/* Convert from a port to a vm_object */ +extern vm_object_t convert_port_entry_to_object( + ipc_port_t port); #endif /* MACH_KERNEL_PRIVATE */ +__BEGIN_DECLS + /* Create an empty map */ extern vm_map_t vm_map_create( - pmap_t pmap, - vm_offset_t min, - vm_offset_t max, - boolean_t pageable); + pmap_t pmap, + vm_map_offset_t min_off, + vm_map_offset_t max_off, + boolean_t pageable); /* Get rid of a map */ extern void vm_map_destroy( - vm_map_t map); + vm_map_t map); /* Lose a reference */ extern void vm_map_deallocate( - vm_map_t map); + vm_map_t map); + +extern vm_map_t vm_map_switch( + vm_map_t map); /* Change protection */ extern kern_return_t vm_map_protect( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_prot_t new_prot, - boolean_t set_max); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t new_prot, + boolean_t set_max); + +/* Check protection */ +extern boolean_t vm_map_check_protection( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t protection); /* wire down a region */ extern kern_return_t vm_map_wire( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_prot_t access_type, - boolean_t user_wire); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + vm_prot_t access_type, + boolean_t user_wire); /* unwire a region */ extern kern_return_t vm_map_unwire( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - boolean_t user_wire); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + boolean_t user_wire); /* Deallocate a region */ extern kern_return_t vm_map_remove( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - boolean_t flags); + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end, + boolean_t flags); + +/* Discard a copy without using it */ +extern void vm_map_copy_discard( + vm_map_copy_t copy); + +/* Overwrite existing memory with a copy */ +extern kern_return_t vm_map_copy_overwrite( + vm_map_t dst_map, + vm_map_address_t dst_addr, + vm_map_copy_t copy, + int interruptible); /* Place a copy into a map */ extern kern_return_t vm_map_copyout( - vm_map_t dst_map, - vm_offset_t *dst_addr, /* OUT */ - vm_map_copy_t copy); + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy); extern kern_return_t vm_map_copyin_common( - vm_map_t src_map, - vm_offset_t src_addr, - vm_size_t len, - boolean_t src_destroy, - boolean_t src_volatile, - vm_map_copy_t *copy_result, /* OUT */ - boolean_t use_maxprot); - -extern kern_return_t vm_region_clone( - ipc_port_t src_region, - ipc_port_t dst_region); - -extern kern_return_t vm_map_region_replace( - vm_map_t target_map, - ipc_port_t old_region, - ipc_port_t new_region, - vm_offset_t start, - vm_offset_t end); - -extern boolean_t vm_map_check_protection( - vm_map_t map, - vm_offset_t start, - vm_offset_t end, - vm_prot_t protection); + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + boolean_t src_destroy, + boolean_t src_volatile, + vm_map_copy_t *copy_result, /* OUT */ + boolean_t use_maxprot); /* * Macros to invoke vm_map_copyin_common. vm_map_copyin is the @@ -892,6 +858,12 @@ extern boolean_t vm_map_check_protection( vm_map_copyin_common(src_map, src_addr, len, src_destroy, \ FALSE, copy_result, TRUE) +/* + * Macros for rounding and truncation of vm_map offsets and sizes + */ +#define vm_map_round_page(x) (((vm_map_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) +#define vm_map_trunc_page(x) ((vm_map_offset_t)(x) & ~((signed)PAGE_MASK)) + /* * Flags for vm_map_remove() and vm_map_delete() */ @@ -899,17 +871,34 @@ extern boolean_t vm_map_check_protection( #define VM_MAP_REMOVE_KUNWIRE 0x1 #define VM_MAP_REMOVE_INTERRUPTIBLE 0x2 #define VM_MAP_REMOVE_WAIT_FOR_KWIRE 0x4 +#define VM_MAP_REMOVE_SAVE_ENTRIES 0x8 -/* - * Backing store throttle when BS is exhausted - */ -extern unsigned int vm_backing_store_low; - -extern void vm_backing_store_disable( - boolean_t suspend); - +/* Support for shared regions */ +extern kern_return_t vm_region_clone( + ipc_port_t src_region, + ipc_port_t dst_region); -#endif /* __APPLE_API_PRIVATE */ +extern kern_return_t vm_map_region_replace( + vm_map_t target_map, + ipc_port_t old_region, + ipc_port_t new_region, + vm_map_offset_t start, + vm_map_offset_t end); + +/* Support for UPLs from vm_maps */ + +extern kern_return_t vm_map_get_upl( + vm_map_t target_map, + vm_address_t address, + vm_size_t *size, + upl_t *upl, + upl_page_info_array_t page_info, + mach_msg_type_number_t *page_infoCnt, + integer_t *flags, + integer_t force_data_sync); + +__END_DECLS + +#endif /* KERNEL_PRIVATE */ #endif /* _VM_VM_MAP_H_ */ - diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index 7fad2ac73..5effdae7f 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,11 +56,6 @@ * Virtual memory object module. */ -#ifdef MACH_BSD -/* remove as part of compoenent support merge */ -extern int vnode_pager_workaround; -#endif - #include #include @@ -69,7 +64,11 @@ extern int vnode_pager_workaround; #include #include #include + +#include #include + +#include #include #include #include @@ -78,13 +77,15 @@ extern int vnode_pager_workaround; #include #include #include +#include + #include #include #include #include #include #include -#include +#include /* * Virtual memory objects maintain the actual data @@ -160,10 +161,6 @@ extern int vnode_pager_workaround; */ /* Forward declarations for internal functions. */ -static void _vm_object_allocate( - vm_object_size_t size, - vm_object_t object); - static kern_return_t vm_object_terminate( vm_object_t object); @@ -176,9 +173,6 @@ static vm_object_t vm_object_cache_trim( static void vm_object_deactivate_all_pages( vm_object_t object); -static void vm_object_abort_activity( - vm_object_t object); - static kern_return_t vm_object_copy_call( vm_object_t src_object, vm_object_offset_t src_offset, @@ -284,6 +278,9 @@ typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #define vm_object_hash(pager) \ ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT) +void vm_object_hash_entry_free( + vm_object_hash_entry_t entry); + /* * vm_object_hash_lookup looks up a pager in the hashtable * and returns the corresponding entry, with optional removal. @@ -349,7 +346,7 @@ void vm_object_hash_entry_free( vm_object_hash_entry_t entry) { - zfree(vm_object_hash_zone, (vm_offset_t)entry); + zfree(vm_object_hash_zone, entry); } /* @@ -358,7 +355,7 @@ vm_object_hash_entry_free( * Returns a new object with the given size. */ -static void +__private_extern__ void _vm_object_allocate( vm_object_size_t size, vm_object_t object) @@ -370,9 +367,9 @@ _vm_object_allocate( *object = vm_object_template; queue_init(&object->memq); queue_init(&object->msr_q); -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG queue_init(&object->uplq); -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ vm_object_lock_init(object); object->size = size; } @@ -401,7 +398,7 @@ vm_object_allocate( __private_extern__ void vm_object_bootstrap(void) { - register i; + register int i; vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), round_page_32(512*1024), @@ -409,7 +406,7 @@ vm_object_bootstrap(void) "vm objects"); queue_init(&vm_object_cached_list); - mutex_init(&vm_object_cached_lock_data, ETAP_VM_OBJ_CACHE); + mutex_init(&vm_object_cached_lock_data, 0); vm_object_hash_zone = zinit((vm_size_t) sizeof (struct vm_object_hash_entry), @@ -426,7 +423,7 @@ vm_object_bootstrap(void) /* memq; Lock; init after allocation */ vm_object_template.size = 0; - vm_object_template.frozen_size = 0; + vm_object_template.memq_hint = VM_PAGE_NULL; vm_object_template.ref_count = 1; #if TASK_SWAPPER vm_object_template.res_count = 1; @@ -440,7 +437,7 @@ vm_object_bootstrap(void) vm_object_template.pager = MEMORY_OBJECT_NULL; vm_object_template.paging_offset = 0; - vm_object_template.pager_request = PAGER_REQUEST_NULL; + vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; /* msr_q; init after allocation */ vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; @@ -459,8 +456,7 @@ vm_object_bootstrap(void) vm_object_template.private = FALSE; vm_object_template.pageout = FALSE; vm_object_template.alive = TRUE; - vm_object_template.lock_in_progress = FALSE; - vm_object_template.lock_restart = FALSE; + vm_object_template.purgable = VM_OBJECT_NONPURGABLE; vm_object_template.silent_overwrite = FALSE; vm_object_template.advisory_pageout = FALSE; vm_object_template.shadowed = FALSE; @@ -567,7 +563,7 @@ vm_object_deallocate( register vm_object_t object) { boolean_t retry_cache_trim = FALSE; - vm_object_t shadow; + vm_object_t shadow = VM_OBJECT_NULL; // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ @@ -648,8 +644,23 @@ vm_object_deallocate( if ((object->ref_count > 1) || object->terminating) { object->ref_count--; vm_object_res_deallocate(object); - vm_object_unlock(object); vm_object_cache_unlock(); + + if (object->ref_count == 1 && + object->shadow != VM_OBJECT_NULL) { + /* + * We don't use this VM object anymore. We + * would like to collapse it into its parent(s), + * but we don't have any pointers back to these + * parent object(s). + * But we can try and collapse this object with + * its own shadows, in case these are useless + * too... + */ + vm_object_collapse(object, 0); + } + + vm_object_unlock(object); if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { @@ -766,10 +777,10 @@ vm_object_deallocate( * This object is not cachable; terminate it. */ XPR(XPR_VM_OBJECT, - "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%lX ref %d\n", - (integer_t)object, object->resident_page_count, - object->paging_in_progress, - (natural_t)current_thread(),object->ref_count); + "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", + (integer_t)object, object->resident_page_count, + object->paging_in_progress, + (void *)current_thread(),object->ref_count); VM_OBJ_RES_DECR(object); /* XXX ? */ /* @@ -1041,7 +1052,7 @@ vm_object_terminate( object->pager = MEMORY_OBJECT_NULL; if (pager != MEMORY_OBJECT_NULL) - memory_object_control_disable(object->pager_request); + memory_object_control_disable(object->pager_control); vm_object_cache_unlock(); object->ref_count--; @@ -1101,7 +1112,7 @@ vm_object_terminate( /* * Free the space for the object. */ - zfree(vm_object_zone, (vm_offset_t) object); + zfree(vm_object_zone, object); return KERN_SUCCESS; } @@ -1164,63 +1175,6 @@ vm_object_release_pager( memory_object_deallocate(pager); } -/* - * Routine: vm_object_abort_activity [internal use only] - * Purpose: - * Abort paging requests pending on this object. - * In/out conditions: - * The object is locked on entry and exit. - */ -static void -vm_object_abort_activity( - vm_object_t object) -{ - register - vm_page_t p; - vm_page_t next; - - XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n", - (integer_t)object, 0, 0, 0, 0); - - /* - * Abort all activity that would be waiting - * for a result on this memory object. - * - * We could also choose to destroy all pages - * that we have in memory for this object, but - * we don't. - */ - - p = (vm_page_t) queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t) p)) { - next = (vm_page_t) queue_next(&p->listq); - - /* - * If it's being paged in, destroy it. - * If an unlock has been requested, start it again. - */ - - if (p->busy && p->absent) { - VM_PAGE_FREE(p); - } - else { - if (p->unlock_request != VM_PROT_NONE) - p->unlock_request = VM_PROT_NONE; - PAGE_WAKEUP(p); - } - - p = next; - } - - /* - * Wake up threads waiting for the memory object to - * become ready. - */ - - object->pager_ready = TRUE; - vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); -} - /* * Routine: vm_object_destroy * Purpose: @@ -1231,7 +1185,7 @@ vm_object_abort_activity( kern_return_t vm_object_destroy( vm_object_t object, - kern_return_t reason) + __unused kern_return_t reason) { memory_object_t old_pager; @@ -1261,7 +1215,7 @@ vm_object_destroy( old_pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (old_pager != MEMORY_OBJECT_NULL) - memory_object_control_disable(object->pager_request); + memory_object_control_disable(object->pager_control); vm_object_cache_unlock(); /* @@ -1347,6 +1301,8 @@ vm_object_deactivate_pages( if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { + assert(!m->laundry); + m->reference = FALSE; pmap_clear_reference(m->phys_page); @@ -1358,6 +1314,10 @@ vm_object_deactivate_pages( } VM_PAGE_QUEUES_REMOVE(m); + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); if(m->zero_fill) { queue_enter_first( &vm_page_queue_zf, @@ -1427,23 +1387,22 @@ __private_extern__ void vm_object_pmap_protect( register vm_object_t object, register vm_object_offset_t offset, - vm_size_t size, + vm_object_size_t size, pmap_t pmap, - vm_offset_t pmap_start, + vm_map_offset_t pmap_start, vm_prot_t prot) { if (object == VM_OBJECT_NULL) return; - size = round_page_64(size); - offset = trunc_page_64(offset); + size = vm_object_round_page(size); + offset = vm_object_trunc_page(offset); vm_object_lock(object); assert(object->internal); while (TRUE) { - if (object->resident_page_count > atop_32(size) / 2 && - pmap != PMAP_NULL) { + if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { vm_object_unlock(object); pmap_protect(pmap, pmap_start, pmap_start + size, prot); return; @@ -1452,7 +1411,7 @@ vm_object_pmap_protect( /* if we are doing large ranges with respect to resident */ /* page count then we should interate over pages otherwise */ /* inverse page look-up will be faster */ - if ((object->resident_page_count / 4) < atop_32(size)) { + if (ptoa_64(object->resident_page_count / 4) < size) { vm_page_t p; vm_object_offset_t end; @@ -1462,11 +1421,10 @@ vm_object_pmap_protect( queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { + vm_map_offset_t start; - vm_offset_t start = pmap_start + - (vm_offset_t)(p->offset - offset); - - pmap_protect(pmap, start, start + PAGE_SIZE, prot); + start = pmap_start + p->offset - offset; + pmap_protect(pmap, start, start + PAGE_SIZE_64, prot); } } } else { @@ -1488,9 +1446,12 @@ vm_object_pmap_protect( if (pmap != PMAP_NULL) { for(target_off = offset; - target_off < end; target_off += PAGE_SIZE) { - if(p = vm_page_lookup(object, target_off)) { - vm_offset_t start = pmap_start + + target_off < end; + target_off += PAGE_SIZE) { + p = vm_page_lookup(object, target_off); + if (p != VM_PAGE_NULL) { + vm_offset_t start; + start = pmap_start + (vm_offset_t)(p->offset - offset); pmap_protect(pmap, start, start + PAGE_SIZE, prot); @@ -1499,7 +1460,8 @@ vm_object_pmap_protect( } else { for(target_off = offset; target_off < end; target_off += PAGE_SIZE) { - if(p = vm_page_lookup(object, target_off)) { + p = vm_page_lookup(object, target_off); + if (p != VM_PAGE_NULL) { pmap_page_protect(p->phys_page, prot & ~p->page_lock); } @@ -1613,6 +1575,7 @@ vm_object_copy_slowly( new_object = vm_object_allocate(size); new_offset = 0; + vm_object_lock(new_object); assert(size == trunc_page_64(size)); /* Will the loop terminate? */ @@ -1627,7 +1590,9 @@ vm_object_copy_slowly( while ((new_page = vm_page_alloc(new_object, new_offset)) == VM_PAGE_NULL) { if (!vm_page_wait(interruptible)) { + vm_object_unlock(new_object); vm_object_deallocate(new_object); + vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; return(MACH_SEND_INTERRUPTED); } @@ -1715,6 +1680,7 @@ vm_object_copy_slowly( case VM_FAULT_INTERRUPTED: vm_page_free(new_page); + vm_object_unlock(new_object); vm_object_deallocate(new_object); vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; @@ -1732,6 +1698,7 @@ vm_object_copy_slowly( vm_page_lock_queues(); vm_page_free(new_page); vm_page_unlock_queues(); + vm_object_unlock(new_object); vm_object_deallocate(new_object); vm_object_deallocate(src_object); *_result_object = VM_OBJECT_NULL; @@ -1745,6 +1712,7 @@ vm_object_copy_slowly( * Lose the extra reference, and return our object. */ + vm_object_unlock(new_object); vm_object_deallocate(src_object); *_result_object = new_object; return(KERN_SUCCESS); @@ -1771,8 +1739,8 @@ vm_object_copy_slowly( __private_extern__ boolean_t vm_object_copy_quickly( vm_object_t *_object, /* INOUT */ - vm_object_offset_t offset, /* IN */ - vm_object_size_t size, /* IN */ + __unused vm_object_offset_t offset, /* IN */ + __unused vm_object_size_t size, /* IN */ boolean_t *_src_needs_copy, /* OUT */ boolean_t *_dst_needs_copy) /* OUT */ { @@ -1952,8 +1920,6 @@ static int copy_delayed_lock_collisions = 0; static int copy_delayed_max_collisions = 0; static int copy_delayed_lock_contention = 0; static int copy_delayed_protect_iterate = 0; -static int copy_delayed_protect_lookup = 0; -static int copy_delayed_protect_lookup_wait = 0; /* * Routine: vm_object_copy_delayed [internal] @@ -2070,11 +2036,6 @@ vm_object_copy_delayed( * needed). */ - if (new_copy != VM_OBJECT_NULL) { - vm_object_unlock(new_copy); - vm_object_deallocate(new_copy); - } - if (old_copy->size < copy_size) { /* * We can't perform a delayed copy if any of the @@ -2091,6 +2052,12 @@ vm_object_copy_delayed( if (p->wire_count > 0) { vm_object_unlock(old_copy); vm_object_unlock(src_object); + + if (new_copy != VM_OBJECT_NULL) { + vm_object_unlock(new_copy); + vm_object_deallocate(new_copy); + } + return VM_OBJECT_NULL; } else { pmap_page_protect(p->phys_page, @@ -2105,6 +2072,12 @@ vm_object_copy_delayed( vm_object_reference_locked(old_copy); vm_object_unlock(old_copy); vm_object_unlock(src_object); + + if (new_copy != VM_OBJECT_NULL) { + vm_object_unlock(new_copy); + vm_object_deallocate(new_copy); + } + return(old_copy); } @@ -2459,6 +2432,66 @@ vm_object_shadow( */ #if 0 +static void vm_object_abort_activity( + vm_object_t object); + +/* + * Routine: vm_object_abort_activity [internal use only] + * Purpose: + * Abort paging requests pending on this object. + * In/out conditions: + * The object is locked on entry and exit. + */ +static void +vm_object_abort_activity( + vm_object_t object) +{ + register + vm_page_t p; + vm_page_t next; + + XPR(XPR_VM_OBJECT, "vm_object_abort_activity, object 0x%X\n", + (integer_t)object, 0, 0, 0, 0); + + /* + * Abort all activity that would be waiting + * for a result on this memory object. + * + * We could also choose to destroy all pages + * that we have in memory for this object, but + * we don't. + */ + + p = (vm_page_t) queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t) p)) { + next = (vm_page_t) queue_next(&p->listq); + + /* + * If it's being paged in, destroy it. + * If an unlock has been requested, start it again. + */ + + if (p->busy && p->absent) { + VM_PAGE_FREE(p); + } + else { + if (p->unlock_request != VM_PROT_NONE) + p->unlock_request = VM_PROT_NONE; + PAGE_WAKEUP(p); + } + + p = next; + } + + /* + * Wake up threads waiting for the memory object to + * become ready. + */ + + object->pager_ready = TRUE; + vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); +} + /* * Routine: vm_object_pager_dead * @@ -2623,7 +2656,7 @@ vm_object_enter( entry = VM_OBJECT_HASH_ENTRY_NULL; assert_wait((event_t) pager, THREAD_UNINT); vm_object_cache_unlock(); - thread_block((void (*)(void))0); + thread_block(THREAD_CONTINUE_NULL); vm_object_cache_lock(); } } while (entry == VM_OBJECT_HASH_ENTRY_NULL); @@ -2676,16 +2709,17 @@ vm_object_enter( vm_object_hash_entry_free(new_entry); if (must_init) { - pager_request_t pager_request; + memory_object_control_t control; /* * Allocate request port. */ - pager_request = memory_object_control_allocate(object); - assert (pager_request != PAGER_REQUEST_NULL); + control = memory_object_control_allocate(object); + assert (control != MEMORY_OBJECT_CONTROL_NULL); vm_object_lock(object); + assert(object != kernel_object); /* * Copy the reference we were given. @@ -2700,7 +2734,7 @@ vm_object_enter( /* copy strategy invalid until set by memory manager */ object->copy_strategy = MEMORY_OBJECT_COPY_INVALID; } - object->pager_request = pager_request; + object->pager_control = control; object->pager_ready = FALSE; vm_object_unlock(object); @@ -2710,7 +2744,7 @@ vm_object_enter( */ (void) memory_object_init(pager, - object->pager_request, + object->pager_control, PAGE_SIZE); vm_object_lock(object); @@ -2777,6 +2811,8 @@ vm_object_pager_create( XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", (integer_t)object, 0,0,0,0); + assert(object != kernel_object); + if (memory_manager_default_check() != KERN_SUCCESS) return; @@ -2887,7 +2923,6 @@ vm_object_remove( vm_object_t object) { memory_object_t pager; - pager_request_t pager_request; if ((pager = object->pager) != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; @@ -2914,6 +2949,8 @@ static boolean_t vm_object_bypass_allowed = TRUE; static int vm_external_discarded; static int vm_external_collapsed; +unsigned long vm_object_collapse_encrypted = 0; + /* * Routine: vm_object_do_collapse * Purpose: @@ -2951,7 +2988,7 @@ vm_object_do_collapse( new_offset = (p->offset - backing_offset); assert(!p->busy || p->absent); - + /* * If the parent has a page here, or if * this page falls outside the parent, @@ -2963,6 +3000,19 @@ vm_object_do_collapse( if (p->offset < backing_offset || new_offset >= size) { VM_PAGE_FREE(p); } else { + /* + * ENCRYPTED SWAP: + * The encryption key includes the "pager" and the + * "paging_offset". These might not be the same in + * the new object, so we can't just move an encrypted + * page from one object to the other. We can't just + * decrypt the page here either, because that would drop + * the object lock. + * The caller should check for encrypted pages before + * attempting to collapse. + */ + ASSERT_PAGE_DECRYPTED(p); + pp = vm_page_lookup(object, new_offset); if (pp == VM_PAGE_NULL) { @@ -3028,14 +3078,14 @@ vm_object_do_collapse( assert(entry != VM_OBJECT_HASH_ENTRY_NULL); entry->object = object; object->pager_created = backing_object->pager_created; - object->pager_request = backing_object->pager_request; + object->pager_control = backing_object->pager_control; object->pager_ready = backing_object->pager_ready; object->pager_initialized = backing_object->pager_initialized; object->cluster_size = backing_object->cluster_size; object->paging_offset = backing_object->paging_offset + backing_offset; - if (object->pager_request != PAGER_REQUEST_NULL) { - memory_object_control_collapse(object->pager_request, + if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object->pager_control, object); } } @@ -3074,8 +3124,15 @@ vm_object_do_collapse( * moves from within backing_object to within object. */ + assert(!object->phys_contiguous); + assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; - object->shadow_offset += backing_object->shadow_offset; + if (object->shadow) { + object->shadow_offset += backing_object->shadow_offset; + } else { + /* no shadow, therefore no shadow offset... */ + object->shadow_offset = 0; + } assert((object->shadow == VM_OBJECT_NULL) || (object->shadow->copy != backing_object)); @@ -3097,7 +3154,7 @@ vm_object_do_collapse( XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", (integer_t)backing_object, 0,0,0,0); - zfree(vm_object_zone, (vm_offset_t) backing_object); + zfree(vm_object_zone, backing_object); object_collapses++; } @@ -3131,8 +3188,15 @@ vm_object_do_bypass( vm_object_reference(backing_object->shadow); #endif /* TASK_SWAPPER */ + assert(!object->phys_contiguous); + assert(!backing_object->phys_contiguous); object->shadow = backing_object->shadow; - object->shadow_offset += backing_object->shadow_offset; + if (object->shadow) { + object->shadow_offset += backing_object->shadow_offset; + } else { + /* no shadow, therefore no shadow offset... */ + object->shadow_offset = 0; + } /* * Backing object might have had a copy pointer @@ -3215,6 +3279,10 @@ vm_object_do_bypass( * Requires that the object be locked and the page queues be unlocked. * */ +static unsigned long vm_object_collapse_calls = 0; +static unsigned long vm_object_collapse_objects = 0; +static unsigned long vm_object_collapse_do_collapse = 0; +static unsigned long vm_object_collapse_do_bypass = 0; __private_extern__ void vm_object_collapse( register vm_object_t object, @@ -3223,6 +3291,12 @@ vm_object_collapse( register vm_object_t backing_object; register unsigned int rcount; register unsigned int size; + vm_object_offset_t collapse_min_offset; + vm_object_offset_t collapse_max_offset; + vm_page_t page; + vm_object_t original_object; + + vm_object_collapse_calls++; if (! vm_object_collapse_allowed && ! vm_object_bypass_allowed) { return; @@ -3231,26 +3305,45 @@ vm_object_collapse( XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", (integer_t)object, 0,0,0,0); + if (object == VM_OBJECT_NULL) + return; + + original_object = object; + while (TRUE) { + vm_object_collapse_objects++; /* * Verify that the conditions are right for either * collapse or bypass: - * - * The object exists and no pages in it are currently - * being paged out, and */ - if (object == VM_OBJECT_NULL || - object->paging_in_progress != 0 || - object->absent_count != 0) - return; /* * There is a backing object, and */ - if ((backing_object = object->shadow) == VM_OBJECT_NULL) + backing_object = object->shadow; + if (backing_object == VM_OBJECT_NULL) { + if (object != original_object) { + vm_object_unlock(object); + } return; + } + /* + * No pages in the object are currently + * being paged out, and + */ + if (object->paging_in_progress != 0 || + object->absent_count != 0) { + /* try and collapse the rest of the shadow chain */ + vm_object_lock(backing_object); + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } + vm_object_lock(backing_object); /* @@ -3264,8 +3357,12 @@ vm_object_collapse( if (!backing_object->internal || backing_object->paging_in_progress != 0) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3280,8 +3377,12 @@ vm_object_collapse( */ if (backing_object->shadow != VM_OBJECT_NULL && backing_object->shadow->copy == backing_object) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3307,15 +3408,15 @@ vm_object_collapse( if (backing_object->ref_count == 1 && (!object->pager_created #if !MACH_PAGEMAP - || !backing_object->pager_created + || !backing_object->pager_created #endif /*!MACH_PAGEMAP */ ) && vm_object_collapse_allowed) { XPR(XPR_VM_OBJECT, - "vm_object_collapse: %x to %x, pager %x, pager_request %x\n", + "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", (integer_t)backing_object, (integer_t)object, (integer_t)backing_object->pager, - (integer_t)backing_object->pager_request, 0); + (integer_t)backing_object->pager_control, 0); /* * We need the cache lock for collapsing, @@ -3323,10 +3424,46 @@ vm_object_collapse( */ if (! vm_object_cache_lock_try()) { + if (object != original_object) { + vm_object_unlock(object); + } vm_object_unlock(backing_object); return; } + /* + * ENCRYPTED SWAP + * We can't collapse the object if it contains + * any encypted page, because the encryption key + * includes the info. We can't + * drop the object lock in vm_object_do_collapse() + * so we can't decrypt the page there either. + */ + if (vm_pages_encrypted) { + collapse_min_offset = object->shadow_offset; + collapse_max_offset = + object->shadow_offset + object->size; + queue_iterate(&backing_object->memq, + page, vm_page_t, listq) { + if (page->encrypted && + (page->offset >= + collapse_min_offset) && + (page->offset < + collapse_max_offset)) { + /* + * We found an encrypted page + * in the backing object, + * within the range covered + * by the parent object: we can + * not collapse them. + */ + vm_object_collapse_encrypted++; + vm_object_cache_unlock(); + goto try_bypass; + } + } + } + /* * Collapse the object with its backing * object, and try again with the object's @@ -3334,18 +3471,23 @@ vm_object_collapse( */ vm_object_do_collapse(object, backing_object); + vm_object_collapse_do_collapse++; continue; } - + try_bypass: /* * Collapsing the backing object was not possible * or permitted, so let's try bypassing it. */ if (! vm_object_bypass_allowed) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } @@ -3357,7 +3499,6 @@ vm_object_collapse( size = atop(object->size); rcount = object->resident_page_count; if (rcount != size) { - vm_object_size_t size; vm_object_offset_t offset; vm_object_offset_t backing_offset; unsigned int backing_rcount; @@ -3373,8 +3514,12 @@ vm_object_collapse( && (backing_object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3387,8 +3532,12 @@ vm_object_collapse( && (object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3427,8 +3576,12 @@ vm_object_collapse( !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { /* dependency right at the hint */ object->cow_hint = (vm_offset_t)hint_offset; - vm_object_unlock(backing_object); - return; + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; } /* @@ -3466,12 +3619,19 @@ vm_object_collapse( !EXISTS_IN_OBJECT(object, offset, rc)) { /* found a dependency */ object->cow_hint = (vm_offset_t)offset; - vm_object_unlock(backing_object); - return; + break; } - p = queue_next(p); + p = (vm_page_t) queue_next(&p->listq); } while (--backing_rcount); + if (backing_rcount != 0 ) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } } /* @@ -3496,10 +3656,17 @@ vm_object_collapse( !EXISTS_IN_OBJECT(object, offset, rcount)) { /* found a dependency */ object->cow_hint = (vm_offset_t)offset; - vm_object_unlock(backing_object); - return; + break; } } + if (offset != hint_offset) { + /* try and collapse the rest of the shadow chain */ + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + continue; + } } } @@ -3513,6 +3680,7 @@ vm_object_collapse( */ vm_object_do_bypass(object, backing_object); + vm_object_collapse_do_bypass++; /* * Try again with this object's new backing object. @@ -3520,6 +3688,10 @@ vm_object_collapse( continue; } + + if (object != original_object) { + vm_object_unlock(object); + } } /* @@ -3558,8 +3730,7 @@ vm_object_page_remove( if (p != VM_PAGE_NULL) { assert(!p->cleaning && !p->pageout); if (!p->fictitious) - pmap_page_protect(p->phys_page, - VM_PROT_NONE); + pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); } } @@ -3572,8 +3743,7 @@ vm_object_page_remove( if ((start <= p->offset) && (p->offset < end)) { assert(!p->cleaning && !p->pageout); if (!p->fictitious) - pmap_page_protect(p->phys_page, - VM_PROT_NONE); + pmap_disconnect(p->phys_page); VM_PAGE_FREE(p); } p = next; @@ -3612,7 +3782,7 @@ vm_object_coalesce( register vm_object_t prev_object, vm_object_t next_object, vm_object_offset_t prev_offset, - vm_object_offset_t next_offset, + __unused vm_object_offset_t next_offset, vm_object_size_t prev_size, vm_object_size_t next_size) { @@ -3648,6 +3818,7 @@ vm_object_coalesce( * . paged out * . shadows another object * . has a copy elsewhere + * . is purgable * . paging references (pages might be in page-list) */ @@ -3656,6 +3827,7 @@ vm_object_coalesce( (prev_object->shadow != VM_OBJECT_NULL) || (prev_object->copy != VM_OBJECT_NULL) || (prev_object->true_share != FALSE) || + (prev_object->purgable != VM_OBJECT_NONPURGABLE) || (prev_object->paging_in_progress != 0)) { vm_object_unlock(prev_object); return(FALSE); @@ -3798,23 +3970,23 @@ vm_object_cached( */ void vm_external_print( - vm_external_map_t map, - vm_size_t size) + vm_external_map_t emap, + vm_size_t size) { - if (map == VM_EXTERNAL_NULL) { + if (emap == VM_EXTERNAL_NULL) { printf("0 "); } else { vm_size_t existence_size = stob(size); printf("{ size=%d, map=[", existence_size); if (existence_size > 0) { - print_bitstring(map[0]); + print_bitstring(emap[0]); } if (existence_size > 1) { - print_bitstring(map[1]); + print_bitstring(emap[1]); } if (existence_size > 2) { printf("..."); - print_bitstring(map[existence_size-1]); + print_bitstring(emap[existence_size-1]); } printf("] }\n"); } @@ -3826,8 +3998,6 @@ int vm_follow_object( vm_object_t object) { - extern db_indent; - int count = 0; int orig_db_indent = db_indent; @@ -3856,17 +4026,18 @@ vm_follow_object( */ void vm_object_print( - vm_object_t object, - boolean_t have_addr, - int arg_count, - char *modif) + db_addr_t db_addr, + __unused boolean_t have_addr, + __unused int arg_count, + __unused char *modif) { + vm_object_t object; register vm_page_t p; - extern db_indent; - char *s; + const char *s; register int count; + object = (vm_object_t) (long) db_addr; if (object == VM_OBJECT_NULL) return; @@ -3876,7 +4047,7 @@ vm_object_print( iprintf("size=0x%x", object->size); printf(", cluster=0x%x", object->cluster_size); - printf(", frozen=0x%x", object->frozen_size); + printf(", memq_hint=%p", object->memq_hint); printf(", ref_count=%d\n", object->ref_count); iprintf(""); #if TASK_SWAPPER @@ -3888,7 +4059,7 @@ vm_object_print( if (object->shadow) { register int i = 0; vm_object_t shadow = object; - while(shadow = shadow->shadow) + while((shadow = shadow->shadow)) i++; printf(" (depth %d)", i); } @@ -3898,7 +4069,7 @@ vm_object_print( iprintf("pager=0x%x", object->pager); printf(", paging_offset=0x%x", object->paging_offset); - printf(", pager_request=0x%x\n", object->pager_request); + printf(", pager_control=0x%x\n", object->pager_control); iprintf("copy_strategy=%d[", object->copy_strategy); switch (object->copy_strategy) { @@ -3974,10 +4145,11 @@ vm_object_print( (object->pageout ? "" : "!"), (object->internal ? "internal" : "external"), (object->temporary ? "temporary" : "permanent")); - iprintf("%salive, %slock_in_progress, %slock_restart, %sshadowed, %scached, %sprivate\n", + iprintf("%salive, %spurgable, %spurgable_volatile, %spurgable_empty, %sshadowed, %scached, %sprivate\n", (object->alive ? "" : "!"), - (object->lock_in_progress ? "" : "!"), - (object->lock_restart ? "" : "!"), + ((object->purgable != VM_OBJECT_NONPURGABLE) ? "" : "!"), + ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE) ? "" : "!"), + ((object->purgable == VM_OBJECT_PURGABLE_EMPTY) ? "" : "!"), (object->shadowed ? "" : "!"), (vm_object_cached(object) ? "" : "!"), (object->private ? "" : "!")); @@ -4008,7 +4180,7 @@ vm_object_print( } count++; - printf("(off=0x%X,page=0x%X)", p->offset, (integer_t) p); + printf("(off=0x%llX,page=%p)", p->offset, p); p = (vm_page_t) queue_next(&p->listq); } if (count != 0) { @@ -4123,10 +4295,21 @@ vm_object_populate_with_private( vm_page_unlock_queues(); } else if (m->phys_page != base_page) { /* pmap call to clear old mapping */ - pmap_page_protect(m->phys_page, - VM_PROT_NONE); + pmap_disconnect(m->phys_page); m->phys_page = base_page; } + + /* + * ENCRYPTED SWAP: + * We're not pointing to the same + * physical page any longer and the + * contents of the new one are not + * supposed to be encrypted. + * XXX What happens to the original + * physical page. Is it lost ? + */ + m->encrypted = FALSE; + } else { while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) @@ -4176,13 +4359,12 @@ vm_object_populate_with_private( __private_extern__ kern_return_t memory_object_free_from_cache( - host_t host, + __unused host_t host, int *pager_id, int *count) { int object_released = 0; - int i; register vm_object_t object = VM_OBJECT_NULL; vm_object_t shadow; @@ -4276,7 +4458,7 @@ memory_object_create_named( VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); } - *control = object->pager_request; + *control = object->pager_control; vm_object_unlock(object); } return (KERN_SUCCESS); @@ -4467,6 +4649,9 @@ vm_object_release_name( return KERN_SUCCESS; } } + /*NOTREACHED*/ + assert(0); + return KERN_FAILURE; } @@ -4479,8 +4664,9 @@ vm_object_lock_request( int flags, vm_prot_t prot) { - vm_object_offset_t original_offset = offset; - boolean_t should_flush=flags & MEMORY_OBJECT_DATA_FLUSH; + __unused boolean_t should_flush; + + should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; XPR(XPR_MEMORY_OBJECT, "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", @@ -4506,7 +4692,7 @@ vm_object_lock_request( vm_object_paging_begin(object); (void)vm_object_update(object, - offset, size, should_return, flags, prot); + offset, size, NULL, NULL, should_return, flags, prot); vm_object_paging_end(object); vm_object_unlock(object); @@ -4514,7 +4700,364 @@ vm_object_lock_request( return (KERN_SUCCESS); } +/* + * Empty a purgable object by grabbing the physical pages assigned to it and + * putting them on the free queue without writing them to backing store, etc. + * When the pages are next touched they will be demand zero-fill pages. We + * skip pages which are busy, being paged in/out, wired, etc. We do _not_ + * skip referenced/dirty pages, pages on the active queue, etc. We're more + * than happy to grab these since this is a purgable object. We mark the + * object as "empty" after reaping its pages. + * + * On entry the object and page queues are locked, the object must be a + * purgable object with no delayed copies pending. + */ +unsigned int +vm_object_purge(vm_object_t object) +{ + vm_page_t p, next; + unsigned int num_purged_pages; + vm_page_t local_freeq; + unsigned long local_freed; + int purge_loop_quota; +/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */ +#define PURGE_BATCH_FREE_LIMIT 50 +/* release page queues lock every PURGE_LOOP_QUOTA iterations */ +#define PURGE_LOOP_QUOTA 100 + + num_purged_pages = 0; + if (object->purgable == VM_OBJECT_NONPURGABLE) + return num_purged_pages; + object->purgable = VM_OBJECT_PURGABLE_EMPTY; + + assert(object->copy == VM_OBJECT_NULL); + assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); + purge_loop_quota = PURGE_LOOP_QUOTA; + + local_freeq = VM_PAGE_NULL; + local_freed = 0; + + /* + * Go through the object's resident pages and try and discard them. + */ + next = (vm_page_t)queue_first(&object->memq); + while (!queue_end(&object->memq, (queue_entry_t)next)) { + p = next; + next = (vm_page_t)queue_next(&next->listq); + + if (purge_loop_quota-- == 0) { + /* + * Avoid holding the page queues lock for too long. + * Let someone else take it for a while if needed. + * Keep holding the object's lock to guarantee that + * the object's page list doesn't change under us + * while we yield. + */ + if (local_freeq != VM_PAGE_NULL) { + /* + * Flush our queue of pages to free. + */ + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + vm_page_unlock_queues(); + mutex_pause(); + vm_page_lock_queues(); + + /* resume with the current page and a new quota */ + purge_loop_quota = PURGE_LOOP_QUOTA; + } + + + if (p->busy || p->cleaning || p->laundry || + p->list_req_pending) { + /* page is being acted upon, so don't mess with it */ + continue; + } + if (p->wire_count) { + /* don't discard a wired page */ + continue; + } + + if (p->tabled) { + /* clean up the object/offset table */ + vm_page_remove(p); + } + if (p->absent) { + /* update the object's count of absent pages */ + vm_object_absent_release(object); + } + + /* we can discard this page */ + + /* advertize that this page is in a transition state */ + p->busy = TRUE; + + if (p->no_isync == TRUE) { + /* the page hasn't been mapped yet */ + /* (optimization to delay the i-cache sync) */ + } else { + /* unmap the page */ + int refmod_state; + + refmod_state = pmap_disconnect(p->phys_page); + if (refmod_state & VM_MEM_MODIFIED) { + p->dirty = TRUE; + } + } + + if (p->dirty || p->precious) { + /* we saved the cost of cleaning this page ! */ + num_purged_pages++; + vm_page_purged_count++; + } + + /* remove page from active or inactive queue... */ + VM_PAGE_QUEUES_REMOVE(p); + + /* ... and put it on our queue of pages to free */ + assert(!p->laundry); + assert(p->object != kernel_object); + assert(p->pageq.next == NULL && + p->pageq.prev == NULL); + p->pageq.next = (queue_entry_t) local_freeq; + local_freeq = p; + if (++local_freed >= PURGE_BATCH_FREE_LIMIT) { + /* flush our queue of pages to free */ + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + } + + /* flush our local queue of pages to free one last time */ + if (local_freeq != VM_PAGE_NULL) { + vm_page_free_list(local_freeq); + local_freeq = VM_PAGE_NULL; + local_freed = 0; + } + + return num_purged_pages; +} + +/* + * vm_object_purgable_control() allows the caller to control and investigate the + * state of a purgable object. A purgable object is created via a call to + * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgable object will + * never be coalesced with any other object -- even other purgable objects -- + * and will thus always remain a distinct object. A purgable object has + * special semantics when its reference count is exactly 1. If its reference + * count is greater than 1, then a purgable object will behave like a normal + * object and attempts to use this interface will result in an error return + * of KERN_INVALID_ARGUMENT. + * + * A purgable object may be put into a "volatile" state which will make the + * object's pages elligable for being reclaimed without paging to backing + * store if the system runs low on memory. If the pages in a volatile + * purgable object are reclaimed, the purgable object is said to have been + * "emptied." When a purgable object is emptied the system will reclaim as + * many pages from the object as it can in a convenient manner (pages already + * en route to backing store or busy for other reasons are left as is). When + * a purgable object is made volatile, its pages will generally be reclaimed + * before other pages in the application's working set. This semantic is + * generally used by applications which can recreate the data in the object + * faster than it can be paged in. One such example might be media assets + * which can be reread from a much faster RAID volume. + * + * A purgable object may be designated as "non-volatile" which means it will + * behave like all other objects in the system with pages being written to and + * read from backing store as needed to satisfy system memory needs. If the + * object was emptied before the object was made non-volatile, that fact will + * be returned as the old state of the purgable object (see + * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which + * were reclaimed as part of emptying the object will be refaulted in as + * zero-fill on demand. It is up to the application to note that an object + * was emptied and recreate the objects contents if necessary. When a + * purgable object is made non-volatile, its pages will generally not be paged + * out to backing store in the immediate future. A purgable object may also + * be manually emptied. + * + * Finally, the current state (non-volatile, volatile, volatile & empty) of a + * volatile purgable object may be queried at any time. This information may + * be used as a control input to let the application know when the system is + * experiencing memory pressure and is reclaiming memory. + * + * The specified address may be any address within the purgable object. If + * the specified address does not represent any object in the target task's + * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the + * object containing the specified address is not a purgable object, then + * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be + * returned. + * + * The control parameter may be any one of VM_PURGABLE_SET_STATE or + * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter + * state is used to set the new state of the purgable object and return its + * old state. For VM_PURGABLE_GET_STATE, the current state of the purgable + * object is returned in the parameter state. + * + * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE, + * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent + * the non-volatile, volatile and volatile/empty states described above. + * Setting the state of a purgable object to VM_PURGABLE_EMPTY will + * immediately reclaim as many pages in the object as can be conveniently + * collected (some may have already been written to backing store or be + * otherwise busy). + * + * The process of making a purgable object non-volatile and determining its + * previous state is atomic. Thus, if a purgable object is made + * VM_PURGABLE_NONVOLATILE and the old state is returned as + * VM_PURGABLE_VOLATILE, then the purgable object's previous contents are + * completely intact and will remain so until the object is made volatile + * again. If the old state is returned as VM_PURGABLE_EMPTY then the object + * was reclaimed while it was in a volatile state and its previous contents + * have been lost. + */ +/* + * The object must be locked. + */ +kern_return_t +vm_object_purgable_control( + vm_object_t object, + vm_purgable_t control, + int *state) +{ + int old_state; + vm_page_t p; + + if (object == VM_OBJECT_NULL) { + /* + * Object must already be present or it can't be purgable. + */ + return KERN_INVALID_ARGUMENT; + } + + /* + * Get current state of the purgable object. + */ + switch (object->purgable) { + case VM_OBJECT_NONPURGABLE: + return KERN_INVALID_ARGUMENT; + + case VM_OBJECT_PURGABLE_NONVOLATILE: + old_state = VM_PURGABLE_NONVOLATILE; + break; + + case VM_OBJECT_PURGABLE_VOLATILE: + old_state = VM_PURGABLE_VOLATILE; + break; + + case VM_OBJECT_PURGABLE_EMPTY: + old_state = VM_PURGABLE_EMPTY; + break; + + default: + old_state = VM_PURGABLE_NONVOLATILE; + panic("Bad state (%d) for purgable object!\n", + object->purgable); + /*NOTREACHED*/ + } + + /* purgable cant have delayed copies - now or in the future */ + assert(object->copy == VM_OBJECT_NULL); + assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); + + /* + * Execute the desired operation. + */ + if (control == VM_PURGABLE_GET_STATE) { + *state = old_state; + return KERN_SUCCESS; + } + + switch (*state) { + case VM_PURGABLE_NONVOLATILE: + vm_page_lock_queues(); + if (object->purgable != VM_OBJECT_PURGABLE_NONVOLATILE) { + assert(vm_page_purgeable_count >= + object->resident_page_count); + vm_page_purgeable_count -= object->resident_page_count; + } + + object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + + /* + * If the object wasn't emptied, then mark all pages of the + * object as referenced in order to give them a complete turn + * of the virtual memory "clock" before becoming candidates + * for paging out (if the system is suffering from memory + * pressure). We don't really need to set the pmap reference + * bits (which would be expensive) since the software copies + * are believed if they're set to true ... + */ + if (old_state != VM_PURGABLE_EMPTY) { + for (p = (vm_page_t)queue_first(&object->memq); + !queue_end(&object->memq, (queue_entry_t)p); + p = (vm_page_t)queue_next(&p->listq)) + p->reference = TRUE; + } + + vm_page_unlock_queues(); + + break; + + case VM_PURGABLE_VOLATILE: + vm_page_lock_queues(); + + if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE && + object->purgable != VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count += object->resident_page_count; + } + + object->purgable = VM_OBJECT_PURGABLE_VOLATILE; + + /* + * We want the newly volatile purgable object to be a + * candidate for the pageout scan before other pages in the + * application if the system is suffering from memory + * pressure. To do this, we move a page of the object from + * the active queue onto the inactive queue in order to + * promote the object for early reclaim. We only need to move + * a single page since the pageout scan will reap the entire + * purgable object if it finds a single page in a volatile + * state. Obviously we don't do this if there are no pages + * associated with the object or we find a page of the object + * already on the inactive queue. + */ + for (p = (vm_page_t)queue_first(&object->memq); + !queue_end(&object->memq, (queue_entry_t)p); + p = (vm_page_t)queue_next(&p->listq)) { + if (p->inactive) { + /* already a page on the inactive queue */ + break; + } + if (p->active && !p->busy) { + /* found one we can move */ + vm_page_deactivate(p); + break; + } + } + vm_page_unlock_queues(); + + break; + + + case VM_PURGABLE_EMPTY: + vm_page_lock_queues(); + if (object->purgable != VM_OBJECT_PURGABLE_VOLATILE && + object->purgable != VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count += object->resident_page_count; + } + (void) vm_object_purge(object); + vm_page_unlock_queues(); + break; + + } + *state = old_state; + + return KERN_SUCCESS; +} #if TASK_SWAPPER /* @@ -4631,7 +5174,9 @@ vm_object_reference( * This is also needed as number of vnodes can be dynamically scaled. */ kern_return_t -adjust_vm_object_cache(vm_size_t oval, vm_size_t nval) +adjust_vm_object_cache( + __unused vm_size_t oval, + vm_size_t nval) { vm_object_cached_max = nval; vm_object_cache_trim(FALSE); @@ -4639,3 +5184,269 @@ adjust_vm_object_cache(vm_size_t oval, vm_size_t nval) } #endif /* MACH_BSD */ + +/* + * vm_object_transpose + * + * This routine takes two VM objects of the same size and exchanges + * their backing store. + * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE + * and UPL_BLOCK_ACCESS if they are referenced anywhere. + * + * The VM objects must not be locked by caller. + */ +kern_return_t +vm_object_transpose( + vm_object_t object1, + vm_object_t object2, + vm_object_size_t transpose_size) +{ + vm_object_t tmp_object; + kern_return_t retval; + boolean_t object1_locked, object2_locked; + boolean_t object1_paging, object2_paging; + vm_page_t page; + vm_object_offset_t page_offset; + + tmp_object = VM_OBJECT_NULL; + object1_locked = FALSE; object2_locked = FALSE; + object1_paging = FALSE; object2_paging = FALSE; + + if (object1 == object2 || + object1 == VM_OBJECT_NULL || + object2 == VM_OBJECT_NULL) { + /* + * If the 2 VM objects are the same, there's + * no point in exchanging their backing store. + */ + retval = KERN_INVALID_VALUE; + goto done; + } + + vm_object_lock(object1); + object1_locked = TRUE; + if (object1->copy || object1->shadow || object1->shadowed || + object1->purgable != VM_OBJECT_NONPURGABLE) { + /* + * We don't deal with copy or shadow objects (yet). + */ + retval = KERN_INVALID_VALUE; + goto done; + } + /* + * Since we're about to mess with the object's backing store, + * mark it as "paging_in_progress". Note that this is not enough + * to prevent any paging activity on this object, so the caller should + * have "quiesced" the objects beforehand, via a UPL operation with + * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) + * and UPL_BLOCK_ACCESS (to mark the pages "busy"). + */ + vm_object_paging_begin(object1); + object1_paging = TRUE; + vm_object_unlock(object1); + object1_locked = FALSE; + + /* + * Same as above for the 2nd object... + */ + vm_object_lock(object2); + object2_locked = TRUE; + if (object2->copy || object2->shadow || object2->shadowed || + object2->purgable != VM_OBJECT_NONPURGABLE) { + retval = KERN_INVALID_VALUE; + goto done; + } + vm_object_paging_begin(object2); + object2_paging = TRUE; + vm_object_unlock(object2); + object2_locked = FALSE; + + /* + * Allocate a temporary VM object to hold object1's contents + * while we copy object2 to object1. + */ + tmp_object = vm_object_allocate(transpose_size); + vm_object_lock(tmp_object); + vm_object_paging_begin(tmp_object); + tmp_object->can_persist = FALSE; + + /* + * Since we need to lock both objects at the same time, + * make sure we always lock them in the same order to + * avoid deadlocks. + */ + if (object1 < object2) { + vm_object_lock(object1); + vm_object_lock(object2); + } else { + vm_object_lock(object2); + vm_object_lock(object1); + } + object1_locked = TRUE; + object2_locked = TRUE; + + if (object1->size != object2->size || + object1->size != transpose_size) { + /* + * If the 2 objects don't have the same size, we can't + * exchange their backing stores or one would overflow. + * If their size doesn't match the caller's + * "transpose_size", we can't do it either because the + * transpose operation will affect the entire span of + * the objects. + */ + retval = KERN_INVALID_VALUE; + goto done; + } + + + /* + * Transpose the lists of resident pages. + */ + if (object1->phys_contiguous || queue_empty(&object1->memq)) { + /* + * No pages in object1, just transfer pages + * from object2 to object1. No need to go through + * an intermediate object. + */ + while (!queue_empty(&object2->memq)) { + page = (vm_page_t) queue_first(&object2->memq); + vm_page_rename(page, object1, page->offset); + } + assert(queue_empty(&object2->memq)); + } else if (object2->phys_contiguous || queue_empty(&object2->memq)) { + /* + * No pages in object2, just transfer pages + * from object1 to object2. No need to go through + * an intermediate object. + */ + while (!queue_empty(&object1->memq)) { + page = (vm_page_t) queue_first(&object1->memq); + vm_page_rename(page, object2, page->offset); + } + assert(queue_empty(&object1->memq)); + } else { + /* transfer object1's pages to tmp_object */ + vm_page_lock_queues(); + while (!queue_empty(&object1->memq)) { + page = (vm_page_t) queue_first(&object1->memq); + page_offset = page->offset; + vm_page_remove(page); + page->offset = page_offset; + queue_enter(&tmp_object->memq, page, vm_page_t, listq); + } + vm_page_unlock_queues(); + assert(queue_empty(&object1->memq)); + /* transfer object2's pages to object1 */ + while (!queue_empty(&object2->memq)) { + page = (vm_page_t) queue_first(&object2->memq); + vm_page_rename(page, object1, page->offset); + } + assert(queue_empty(&object2->memq)); + /* transfer tmp_object's pages to object1 */ + while (!queue_empty(&tmp_object->memq)) { + page = (vm_page_t) queue_first(&tmp_object->memq); + queue_remove(&tmp_object->memq, page, + vm_page_t, listq); + vm_page_insert(page, object2, page->offset); + } + assert(queue_empty(&tmp_object->memq)); + } + + /* no need to transpose the size: they should be identical */ + assert(object1->size == object2->size); + +#define __TRANSPOSE_FIELD(field) \ +MACRO_BEGIN \ + tmp_object->field = object1->field; \ + object1->field = object2->field; \ + object2->field = tmp_object->field; \ +MACRO_END + + assert(!object1->copy); + assert(!object2->copy); + + assert(!object1->shadow); + assert(!object2->shadow); + + __TRANSPOSE_FIELD(shadow_offset); /* used by phys_contiguous objects */ + __TRANSPOSE_FIELD(pager); + __TRANSPOSE_FIELD(paging_offset); + + __TRANSPOSE_FIELD(pager_control); + /* update the memory_objects' pointers back to the VM objects */ + if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object1->pager_control, + object1); + } + if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_collapse(object2->pager_control, + object2); + } + + __TRANSPOSE_FIELD(absent_count); + + assert(object1->paging_in_progress); + assert(object2->paging_in_progress); + + __TRANSPOSE_FIELD(pager_created); + __TRANSPOSE_FIELD(pager_initialized); + __TRANSPOSE_FIELD(pager_ready); + __TRANSPOSE_FIELD(pager_trusted); + __TRANSPOSE_FIELD(internal); + __TRANSPOSE_FIELD(temporary); + __TRANSPOSE_FIELD(private); + __TRANSPOSE_FIELD(pageout); + __TRANSPOSE_FIELD(true_share); + __TRANSPOSE_FIELD(phys_contiguous); + __TRANSPOSE_FIELD(nophyscache); + __TRANSPOSE_FIELD(last_alloc); + __TRANSPOSE_FIELD(sequential); + __TRANSPOSE_FIELD(cluster_size); + __TRANSPOSE_FIELD(existence_map); + __TRANSPOSE_FIELD(cow_hint); + __TRANSPOSE_FIELD(wimg_bits); + +#undef __TRANSPOSE_FIELD + + retval = KERN_SUCCESS; + +done: + /* + * Cleanup. + */ + if (tmp_object != VM_OBJECT_NULL) { + vm_object_paging_end(tmp_object); + vm_object_unlock(tmp_object); + /* + * Re-initialize the temporary object to avoid + * deallocating a real pager. + */ + _vm_object_allocate(transpose_size, tmp_object); + vm_object_deallocate(tmp_object); + tmp_object = VM_OBJECT_NULL; + } + + if (object1_locked) { + vm_object_unlock(object1); + object1_locked = FALSE; + } + if (object2_locked) { + vm_object_unlock(object2); + object2_locked = FALSE; + } + if (object1_paging) { + vm_object_lock(object1); + vm_object_paging_end(object1); + vm_object_unlock(object1); + object1_paging = FALSE; + } + if (object2_paging) { + vm_object_lock(object2); + vm_object_paging_end(object2); + vm_object_unlock(object2); + object2_paging = FALSE; + } + + return retval; +} diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index d8e4c4f40..7ef6f55f3 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -68,11 +68,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -82,8 +82,7 @@ #include #endif /* MACH_PAGEMAP */ -typedef memory_object_control_t pager_request_t; -#define PAGER_REQUEST_NULL ((pager_request_t) 0) +struct vm_page; /* * Types defined: @@ -91,9 +90,6 @@ typedef memory_object_control_t pager_request_t; * vm_object_t Virtual memory object. */ -typedef unsigned long long vm_object_size_t; - - struct vm_object { queue_head_t memq; /* Resident memory */ decl_mutex_data(, Lock) /* Synchronization */ @@ -101,10 +97,7 @@ struct vm_object { vm_object_size_t size; /* Object size (only valid * if internal) */ - vm_object_size_t frozen_size; /* How much has been marked - * copy-on-write (only - * valid if copy_symmetric) - */ + struct vm_page *memq_hint; int ref_count; /* Number of references */ #if TASK_SWAPPER int res_count; /* Residency references (swap)*/ @@ -124,7 +117,7 @@ struct vm_object { memory_object_t pager; /* Where to get data */ vm_object_offset_t paging_offset; /* Offset into memory object */ - pager_request_t pager_request; /* Where data comes back */ + memory_object_control_t pager_control; /* Where data comes back */ memory_object_copy_strategy_t copy_strategy; /* How to handle data copy */ @@ -185,13 +178,9 @@ struct vm_object { * a real memory object. */ /* boolean_t */ alive:1, /* Not yet terminated */ - /* boolean_t */ lock_in_progress:1, - /* Is a multi-page lock - * request in progress? - */ - /* boolean_t */ lock_restart:1, - /* Should lock request in - * progress restart search? + /* boolean_t */ purgable:2, /* Purgable state. See + * VM_OBJECT_PURGABLE_* + * items below. */ /* boolean_t */ shadowed:1, /* Shadow may exist */ /* boolean_t */ silent_overwrite:1, @@ -280,16 +269,44 @@ struct vm_object { unsigned int /* cache WIMG bits */ wimg_bits:8, /* wimg plus some expansion*/ not_in_use:24; -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG queue_head_t uplq; /* List of outstanding upls */ -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ }; +#define VM_PAGE_REMOVE(page) \ + MACRO_BEGIN \ + vm_page_t __page = (page); \ + vm_object_t __object = __page->object; \ + if (__page == __object->memq_hint) { \ + vm_page_t __new_hint; \ + queue_entry_t __qe; \ + __qe = queue_next(&__page->listq); \ + if (queue_end(&__object->memq, __qe)) { \ + __qe = queue_prev(&__page->listq); \ + if (queue_end(&__object->memq, __qe)) { \ + __qe = NULL; \ + } \ + } \ + __new_hint = (vm_page_t) __qe; \ + __object->memq_hint = __new_hint; \ + } \ + queue_remove(&__object->memq, __page, vm_page_t, listq); \ + MACRO_END + +#define VM_PAGE_INSERT(page, object) \ + MACRO_BEGIN \ + vm_page_t __page = (page); \ + vm_object_t __object = (object); \ + queue_enter(&__object->memq, __page, vm_page_t, listq); \ + __object->memq_hint = __page; \ + MACRO_END + __private_extern__ vm_object_t kernel_object; /* the single kernel object */ __private_extern__ -int vm_object_absent_max; /* maximum number of absent pages +unsigned int vm_object_absent_max; /* maximum number of absent pages at a time for each object */ # define VM_MSYNC_INITIALIZED 0 @@ -315,12 +332,12 @@ typedef struct msync_req *msync_req_t; #define msync_req_alloc(msr) \ MACRO_BEGIN \ (msr) = (msync_req_t)kalloc(sizeof(struct msync_req)); \ - mutex_init(&(msr)->msync_req_lock, ETAP_VM_MSYNC); \ + mutex_init(&(msr)->msync_req_lock, 0); \ msr->flag = VM_MSYNC_INITIALIZED; \ MACRO_END #define msync_req_free(msr) \ - (kfree((vm_offset_t)(msr), sizeof(struct msync_req))) + (kfree((msr), sizeof(struct msync_req))) #define msr_lock(msr) mutex_lock(&(msr)->msync_req_lock) #define msr_unlock(msr) mutex_unlock(&(msr)->msync_req_lock) @@ -336,6 +353,9 @@ __private_extern__ void vm_object_init(void); __private_extern__ vm_object_t vm_object_allocate( vm_object_size_t size); +__private_extern__ void _vm_object_allocate(vm_object_size_t size, + vm_object_t object); + #if TASK_SWAPPER __private_extern__ void vm_object_res_reference( @@ -363,12 +383,10 @@ MACRO_BEGIN \ MACRO_END -#if MACH_ASSERT - __private_extern__ void vm_object_reference( vm_object_t object); -#else /* MACH_ASSERT */ +#if !MACH_ASSERT #define vm_object_reference(object) \ MACRO_BEGIN \ @@ -392,9 +410,9 @@ __private_extern__ kern_return_t vm_object_release_name( __private_extern__ void vm_object_pmap_protect( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, + vm_object_size_t size, pmap_t pmap, - vm_offset_t pmap_start, + vm_map_offset_t pmap_start, vm_prot_t prot); __private_extern__ void vm_object_page_remove( @@ -408,6 +426,14 @@ __private_extern__ void vm_object_deactivate_pages( vm_object_size_t size, boolean_t kill_page); +__private_extern__ unsigned int vm_object_purge( + vm_object_t object); + +__private_extern__ kern_return_t vm_object_purgable_control( + vm_object_t object, + vm_purgable_t control, + int *state); + __private_extern__ boolean_t vm_object_coalesce( vm_object_t prev_object, vm_object_t next_object, @@ -472,23 +498,31 @@ __private_extern__ void vm_object_page_map( __private_extern__ kern_return_t vm_object_upl_request( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, + upl_size_t size, upl_t *upl, upl_page_info_t *page_info, unsigned int *count, int flags); +__private_extern__ kern_return_t vm_object_transpose( + vm_object_t object1, + vm_object_t object2, + vm_object_size_t transpose_size); + __private_extern__ boolean_t vm_object_sync( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, + vm_object_size_t size, boolean_t should_flush, - boolean_t should_return); + boolean_t should_return, + boolean_t should_iosync); __private_extern__ kern_return_t vm_object_update( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, /* should be 64 */ + vm_object_size_t size, + vm_object_offset_t *error_offset, + int *io_errno, memory_object_return_t should_return, int flags, vm_prot_t prot); @@ -511,6 +545,25 @@ __private_extern__ vm_object_t vm_object_enter( boolean_t check_named); +/* + * Purgable object state. + */ + +#define VM_OBJECT_NONPURGABLE 0 /* not a purgable object */ +#define VM_OBJECT_PURGABLE_NONVOLATILE 1 /* non-volatile purgable object */ +#define VM_OBJECT_PURGABLE_VOLATILE 2 /* volatile (but intact) purgable object */ +#define VM_OBJECT_PURGABLE_EMPTY 3 /* volatile purgable object that has been emptied */ + +__private_extern__ kern_return_t vm_object_populate_with_private( + vm_object_t object, + vm_object_offset_t offset, + ppnum_t phys_page, + vm_size_t size); + +__private_extern__ kern_return_t adjust_vm_object_cache( + vm_size_t oval, + vm_size_t nval); + /* * Event waiting handling */ @@ -607,9 +660,12 @@ __private_extern__ vm_object_t vm_object_enter( * Object locking macros */ -#define vm_object_lock_init(object) mutex_init(&(object)->Lock, ETAP_VM_OBJ) +#define vm_object_lock_init(object) mutex_init(&(object)->Lock, 0) #define vm_object_lock(object) mutex_lock(&(object)->Lock) #define vm_object_unlock(object) mutex_unlock(&(object)->Lock) #define vm_object_lock_try(object) mutex_try(&(object)->Lock) +#define vm_object_round_page(x) (((vm_object_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) +#define vm_object_trunc_page(x) ((vm_object_offset_t)(x) & ~((signed)PAGE_MASK)) + #endif /* _VM_VM_OBJECT_H_ */ diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index 460ed22d4..d0cfea88e 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,6 +60,8 @@ #ifndef _VM_VM_PAGE_H_ #define _VM_VM_PAGE_H_ +#include + #include #include #include @@ -81,8 +83,8 @@ */ -extern int vm_page_ticket_roll; -extern int vm_page_ticket; +extern unsigned int vm_page_ticket_roll; +extern unsigned int vm_page_ticket; #define VM_PAGE_TICKETS_IN_ROLL 512 @@ -123,21 +125,30 @@ struct vm_page { vm_object_t object; /* which object am I in (O&P) */ vm_object_offset_t offset; /* offset into that object (O,P) */ + /* + * The following word of flags is protected + * by the "page queues" lock. + */ unsigned int wire_count:16, /* how many wired down maps use me? (O&P) */ page_ticket:4, /* age of the page on the */ /* inactive queue. */ /* boolean_t */ inactive:1, /* page is in inactive list (P) */ active:1, /* page is in active list (P) */ + pageout_queue:1,/* page is on queue for pageout (P) */ laundry:1, /* page is being cleaned now (P)*/ free:1, /* page is on free list (P) */ reference:1, /* page has been used (P) */ pageout:1, /* page wired & busy for pageout (P) */ gobbled:1, /* page used internally (P) */ private:1, /* Page should not be returned to - * the free list (O) */ + * the free list (P) */ zero_fill:1, :0; + /* + * The following word of flags is protected + * by the "VM object" lock. + */ unsigned int page_error:8, /* error from I/O operations */ /* boolean_t */ busy:1, /* page is in transit (O) */ @@ -165,8 +176,7 @@ struct vm_page { /* vm_prot_t */ unlock_request:3,/* Outstanding unlock request (O) */ unusual:1, /* Page is absent, error, restart or page locked */ - discard_request:1,/* a memory_object_discard_request() - * has been sent */ + encrypted:1, /* encrypted for secure swap (O) */ list_req_pending:1, /* pagein/pageout alt mechanism */ /* allows creation of list */ /* requests on pages that are */ @@ -177,14 +187,27 @@ struct vm_page { /* a pageout candidate */ /* we've used up all 32 bits */ - vm_offset_t phys_page; /* Physical address of page, passed + ppnum_t phys_page; /* Physical address of page, passed * to pmap_enter (read-only) */ }; +#define DEBUG_ENCRYPTED_SWAP 1 +#if DEBUG_ENCRYPTED_SWAP +#define ASSERT_PAGE_DECRYPTED(page) \ + MACRO_BEGIN \ + if ((page)->encrypted) { \ + panic("VM page %p should not be encrypted here\n", \ + (page)); \ + } \ + MACRO_END +#else /* DEBUG_ENCRYPTED_SWAP */ +#define ASSERT_PAGE_DECRYPTED(page) assert(!(page)->encrypted) +#endif /* DEBUG_ENCRYPTED_SWAP */ + typedef struct vm_page *vm_page_t; #define VM_PAGE_NULL ((vm_page_t) 0) -#define NEXT_PAGE(m) ((vm_page_t) (m)->pageq.next) +#define NEXT_PAGE(m) ((vm_page_t) (m)->pageq.next) #define NEXT_PAGE_PTR(m) ((vm_page_t *) &(m)->pageq.next) /* @@ -231,36 +254,37 @@ extern vm_offset_t last_phys_addr; /* physical address for last_page */ extern -int vm_page_free_count; /* How many pages are free? */ +unsigned int vm_page_free_count; /* How many pages are free? */ extern -int vm_page_fictitious_count;/* How many fictitious pages are free? */ +unsigned int vm_page_fictitious_count;/* How many fictitious pages are free? */ extern -int vm_page_active_count; /* How many pages are active? */ +unsigned int vm_page_active_count; /* How many pages are active? */ extern -int vm_page_inactive_count; /* How many pages are inactive? */ +unsigned int vm_page_inactive_count; /* How many pages are inactive? */ extern -int vm_page_wire_count; /* How many pages are wired? */ +unsigned int vm_page_wire_count; /* How many pages are wired? */ extern -int vm_page_free_target; /* How many do we want free? */ +unsigned int vm_page_free_target; /* How many do we want free? */ extern -int vm_page_free_min; /* When to wakeup pageout */ +unsigned int vm_page_free_min; /* When to wakeup pageout */ extern -int vm_page_inactive_target;/* How many do we want inactive? */ +unsigned int vm_page_inactive_target;/* How many do we want inactive? */ extern -int vm_page_free_reserved; /* How many pages reserved to do pageout */ +unsigned int vm_page_free_reserved; /* How many pages reserved to do pageout */ extern -int vm_page_laundry_count; /* How many pages being laundered? */ +unsigned int vm_page_throttled_count;/* Count of zero-fill allocations throttled */ +extern +unsigned int vm_page_gobble_count; + extern -int vm_page_burst_count; /* How many pages being laundered to EMM? */ +unsigned int vm_page_purgeable_count;/* How many pages are purgeable now ? */ extern -int vm_page_throttled_count;/* Count of zero-fill allocations throttled */ +uint64_t vm_page_purged_count; /* How many pages got purged so far ? */ decl_mutex_data(,vm_page_queue_lock) /* lock on active and inactive page queues */ decl_mutex_data(,vm_page_queue_free_lock) /* lock on free page queue */ -decl_simple_lock_data(extern,vm_page_preppin_lock) /* lock for prep/pin */ -decl_mutex_data(,vm_page_zero_fill_lock) extern unsigned int vm_page_free_wanted; /* how many threads are waiting for memory */ @@ -268,6 +292,8 @@ extern unsigned int vm_page_free_wanted; extern vm_offset_t vm_page_fictitious_addr; /* (fake) phys_addr of fictitious pages */ +extern boolean_t vm_page_deactivate_hint; + /* * Prototypes for functions exported by this module. */ @@ -302,13 +328,6 @@ extern vm_page_t vm_page_grab(void); extern void vm_page_release( vm_page_t page); -extern void vm_page_release_limbo( - vm_page_t page); - -extern void vm_page_limbo_exchange( - vm_page_t limbo_m, - vm_page_t new_m); - extern boolean_t vm_page_wait( int interruptible ); @@ -422,28 +441,31 @@ extern void vm_page_gobble( #define VM_PAGE_THROTTLED() \ (vm_page_free_count < vm_page_free_min && \ - !current_thread()->vm_privilege && \ + !(current_thread()->options & TH_OPT_VMPRIV) && \ ++vm_page_throttled_count) #define VM_PAGE_WAIT() ((void)vm_page_wait(THREAD_UNINT)) #define vm_page_lock_queues() mutex_lock(&vm_page_queue_lock) #define vm_page_unlock_queues() mutex_unlock(&vm_page_queue_lock) -#define vm_page_pin_lock() simple_lock(&vm_page_preppin_lock) -#define vm_page_pin_unlock() simple_unlock(&vm_page_preppin_lock) #define VM_PAGE_QUEUES_REMOVE(mem) \ MACRO_BEGIN \ + assert(!mem->laundry); \ if (mem->active) { \ + assert(mem->object != kernel_object); \ assert(!mem->inactive); \ queue_remove(&vm_page_queue_active, \ mem, vm_page_t, pageq); \ + mem->pageq.next = NULL; \ + mem->pageq.prev = NULL; \ mem->active = FALSE; \ if (!mem->fictitious) \ vm_page_active_count--; \ } \ \ if (mem->inactive) { \ + assert(mem->object != kernel_object); \ assert(!mem->active); \ if (mem->zero_fill) { \ queue_remove(&vm_page_queue_zf, \ @@ -452,6 +474,8 @@ extern void vm_page_gobble( queue_remove(&vm_page_queue_inactive, \ mem, vm_page_t, pageq); \ } \ + mem->pageq.next = NULL; \ + mem->pageq.prev = NULL; \ mem->inactive = FALSE; \ if (!mem->fictitious) \ vm_page_inactive_count--; \ diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 5fc1f59bf..d75ec79de 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,6 +57,9 @@ * The proverbial page-out daemon. */ +#include + +#include #include #include #include @@ -67,32 +70,61 @@ #include #include #include +#include +#include #include #include -#include + +#include #include +#include +#include +#include #include #include +#include + +#include + #include #include #include #include #include #include -#include -#include +#include /* must be last */ +/* + * ENCRYPTED SWAP: + */ +#ifdef __ppc__ +#include +#endif /* __ppc__ */ +#include <../bsd/crypto/aes/aes.h> extern ipc_port_t memory_manager_default; + +#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE +#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 10000 /* maximum iterations of the active queue to move pages to inactive */ +#endif + +#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE +#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ +#endif + +#ifndef VM_PAGEOUT_DEADLOCK_RELIEF +#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ +#endif + +#ifndef VM_PAGEOUT_INACTIVE_RELIEF +#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */ +#endif + #ifndef VM_PAGE_LAUNDRY_MAX -#define VM_PAGE_LAUNDRY_MAX 16 /* outstanding DMM+EMM page cleans */ +#define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */ #endif /* VM_PAGEOUT_LAUNDRY_MAX */ -#ifndef VM_PAGEOUT_BURST_MAX -#define VM_PAGEOUT_BURST_MAX 6 /* simultaneous EMM page cleans */ -#endif /* VM_PAGEOUT_BURST_MAX */ - #ifndef VM_PAGEOUT_BURST_WAIT #define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */ #endif /* VM_PAGEOUT_BURST_WAIT */ @@ -101,6 +133,15 @@ extern ipc_port_t memory_manager_default; #define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ #endif /* VM_PAGEOUT_EMPTY_WAIT */ +#ifndef VM_PAGEOUT_DEADLOCK_WAIT +#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */ +#endif /* VM_PAGEOUT_DEADLOCK_WAIT */ + +#ifndef VM_PAGEOUT_IDLE_WAIT +#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ +#endif /* VM_PAGEOUT_IDLE_WAIT */ + + /* * To obtain a reasonable LRU approximation, the inactive queue * needs to be large enough to give pages on it a chance to be @@ -144,10 +185,30 @@ extern ipc_port_t memory_manager_default; */ #ifndef VM_PAGE_FREE_RESERVED -#define VM_PAGE_FREE_RESERVED \ - ((6 * VM_PAGE_LAUNDRY_MAX) + NCPUS) +#define VM_PAGE_FREE_RESERVED(n) \ + ((6 * VM_PAGE_LAUNDRY_MAX) + (n)) #endif /* VM_PAGE_FREE_RESERVED */ + +/* + * must hold the page queues lock to + * manipulate this structure + */ +struct vm_pageout_queue { + queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */ + unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */ + unsigned int pgo_maxlaundry; + + unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */ + pgo_busy:1, /* iothread is currently processing request from pgo_pending */ + pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */ + :0; +}; + +#define VM_PAGE_Q_THROTTLED(q) \ + ((q)->pgo_laundry >= (q)->pgo_maxlaundry) + + /* * Exported variable used to broadcast the activation of the pageout scan * Working Set uses this to throttle its use of pmap removes. In this @@ -160,27 +221,27 @@ unsigned int vm_pageout_scan_event_counter = 0; /* * Forward declarations for internal routines. */ + +static void vm_pageout_garbage_collect(int); +static void vm_pageout_iothread_continue(struct vm_pageout_queue *); +static void vm_pageout_iothread_external(void); +static void vm_pageout_iothread_internal(void); +static void vm_pageout_queue_steal(vm_page_t); + extern void vm_pageout_continue(void); extern void vm_pageout_scan(void); -extern void vm_pageout_throttle(vm_page_t m); -extern vm_page_t vm_pageout_cluster_page( - vm_object_t object, - vm_object_offset_t offset, - boolean_t precious_clean); unsigned int vm_pageout_reserved_internal = 0; unsigned int vm_pageout_reserved_really = 0; -unsigned int vm_page_laundry_max = 0; /* # of clusters outstanding */ -unsigned int vm_page_laundry_min = 0; +unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ -unsigned int vm_pageout_burst_max = 0; -unsigned int vm_pageout_burst_wait = 0; /* milliseconds per page */ -unsigned int vm_pageout_burst_min = 0; -unsigned int vm_pageout_burst_loop_throttle = 4096; -unsigned int vm_pageout_pause_count = 0; -unsigned int vm_pageout_pause_max = 0; -unsigned int vm_free_page_pause = 100; /* milliseconds */ +unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ +unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */ +unsigned int vm_pageout_deadlock_relief = 0; +unsigned int vm_pageout_inactive_relief = 0; +unsigned int vm_pageout_burst_active_throttle = 0; +unsigned int vm_pageout_burst_inactive_throttle = 0; /* * Protection against zero fill flushing live working sets derived @@ -210,17 +271,20 @@ unsigned int vm_pageout_inactive_used = 0; /* debugging */ unsigned int vm_pageout_inactive_clean = 0; /* debugging */ unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ +unsigned int vm_pageout_purged_objects = 0; /* debugging */ unsigned int vm_stat_discard = 0; /* debugging */ unsigned int vm_stat_discard_sent = 0; /* debugging */ unsigned int vm_stat_discard_failure = 0; /* debugging */ unsigned int vm_stat_discard_throttle = 0; /* debugging */ -unsigned int vm_pageout_scan_active_emm_throttle = 0; /* debugging */ -unsigned int vm_pageout_scan_active_emm_throttle_success = 0; /* debugging */ -unsigned int vm_pageout_scan_active_emm_throttle_failure = 0; /* debugging */ -unsigned int vm_pageout_scan_inactive_emm_throttle = 0; /* debugging */ -unsigned int vm_pageout_scan_inactive_emm_throttle_success = 0; /* debugging */ -unsigned int vm_pageout_scan_inactive_emm_throttle_failure = 0; /* debugging */ +unsigned int vm_pageout_scan_active_throttled = 0; +unsigned int vm_pageout_scan_inactive_throttled = 0; +unsigned int vm_pageout_scan_throttle = 0; /* debugging */ +unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ +unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ +unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ +unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ +unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ /* * Backing store throttle when BS is exhausted */ @@ -229,6 +293,21 @@ unsigned int vm_backing_store_low = 0; unsigned int vm_pageout_out_of_line = 0; unsigned int vm_pageout_in_place = 0; +/* + * ENCRYPTED SWAP: + * counters and statistics... + */ +unsigned long vm_page_decrypt_counter = 0; +unsigned long vm_page_decrypt_for_upl_counter = 0; +unsigned long vm_page_encrypt_counter = 0; +unsigned long vm_page_encrypt_abort_counter = 0; +unsigned long vm_page_encrypt_already_encrypted_counter = 0; +boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */ + + +struct vm_pageout_queue vm_pageout_queue_internal; +struct vm_pageout_queue vm_pageout_queue_external; + /* * Routine: vm_backing_store_disable @@ -295,7 +374,6 @@ vm_pageout_object_allocate( vm_object_lock(object); vm_object_paging_begin(object); vm_page_lock_queues(); - vm_pageout_throttle(m); vm_page_unlock_queues(); vm_object_unlock(object); @@ -389,14 +467,7 @@ vm_pageout_object_terminate( */ vm_page_lock_queues(); if (m->laundry) { - if (!shadow_internal) - vm_page_burst_count--; - vm_page_laundry_count--; - m->laundry = FALSE; - if (vm_page_laundry_count < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) &vm_page_laundry_count); - } + vm_pageout_throttle_up(m); } /* @@ -418,17 +489,17 @@ vm_pageout_object_terminate( /* * Revoke all access to the page. Since the object is * locked, and the page is busy, this prevents the page - * from being dirtied after the pmap_is_modified() call + * from being dirtied after the pmap_disconnect() call * returns. - */ - pmap_page_protect(m->phys_page, VM_PROT_NONE); - - /* + * * Since the page is left "dirty" but "not modifed", we * can detect whether the page was redirtied during * pageout by checking the modify state. */ - m->dirty = pmap_is_modified(m->phys_page); + if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) + m->dirty = TRUE; + else + m->dirty = FALSE; if (m->dirty) { CLUSTER_STAT(vm_pageout_target_page_dirtied++;) @@ -461,7 +532,7 @@ vm_pageout_object_terminate( /* We do not re-set m->dirty ! */ /* The page was busy so no extraneous activity */ - /* could have occured. COPY_INTO is a read into the */ + /* could have occurred. COPY_INTO is a read into the */ /* new pages. CLEAN_IN_PLACE does actually write */ /* out the pages but handling outside of this code */ /* will take care of resetting dirty. We clear the */ @@ -560,7 +631,6 @@ vm_pageout_setup( vm_object_offset_t offset; register vm_page_t holding_page; register vm_page_t new_m; - register vm_page_t new_page; boolean_t need_to_wire = FALSE; @@ -807,8 +877,6 @@ void vm_pageout_initialize_page( vm_page_t m) { - vm_map_copy_t copy; - vm_object_t new_object; vm_object_t object; vm_object_offset_t paging_offset; vm_page_t holding_page; @@ -850,7 +918,6 @@ vm_pageout_initialize_page( m->cleaning = TRUE; m->pageout = TRUE; vm_page_wire(m); - vm_pageout_throttle(m); vm_page_unlock_queues(); vm_object_unlock(object); @@ -883,366 +950,147 @@ boolean_t allow_clustered_pageouts = FALSE; /* * vm_pageout_cluster: * - * Given a page, page it out, and attempt to clean adjacent pages + * Given a page, queue it to the appropriate I/O thread, + * which will page it out and attempt to clean adjacent pages * in the same operation. * - * The page must be busy, and the object locked. We will take a + * The page must be busy, and the object and queues locked. We will take a * paging reference to prevent deallocation or collapse when we - * temporarily release the object lock. + * release the object lock back at the call site. The I/O thread + * is responsible for consuming this reference * * The page must not be on any pageout queue. */ + void -vm_pageout_cluster( - vm_page_t m) +vm_pageout_cluster(vm_page_t m) { vm_object_t object = m->object; - vm_object_offset_t offset = m->offset; /* from vm_object start */ - vm_object_offset_t paging_offset; - vm_object_t new_object; - vm_object_offset_t new_offset; - vm_size_t cluster_size; - vm_object_offset_t cluster_offset; /* from memory_object start */ - vm_object_offset_t cluster_lower_bound; /* from vm_object_start */ - vm_object_offset_t cluster_upper_bound; /* from vm_object_start */ - vm_object_offset_t cluster_start, cluster_end;/* from vm_object start */ - vm_object_offset_t offset_within_cluster; - vm_size_t length_of_data; - vm_page_t friend, holding_page; - kern_return_t rc; - boolean_t precious_clean = TRUE; - int pages_in_cluster; - - CLUSTER_STAT(int pages_at_higher_offsets = 0;) - CLUSTER_STAT(int pages_at_lower_offsets = 0;) + struct vm_pageout_queue *q; + XPR(XPR_VM_PAGEOUT, "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", - (integer_t)object, offset, (integer_t)m, 0, 0); + (integer_t)object, m->offset, (integer_t)m, 0, 0); - CLUSTER_STAT(vm_pageout_cluster_clusters++;) + /* + * Only a certain kind of page is appreciated here. + */ + assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0)); + assert(!m->cleaning && !m->pageout && !m->inactive && !m->active); /* * protect the object from collapse - * locking in the object's paging_offset. */ vm_object_paging_begin(object); - paging_offset = m->offset + object->paging_offset; /* - * Only a certain kind of page is appreciated here. - */ - assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0)); - assert(!m->cleaning && !m->pageout && !m->inactive && !m->active); - - cluster_size = object->cluster_size; - - assert(cluster_size >= PAGE_SIZE); - if (cluster_size < PAGE_SIZE) cluster_size = PAGE_SIZE; - assert(object->pager_created && object->pager_initialized); - assert(object->internal || object->pager_ready); - - if (m->precious && !m->dirty) - precious_clean = TRUE; - - if (!object->pager_trusted || !allow_clustered_pageouts) - cluster_size = PAGE_SIZE; - - cluster_offset = paging_offset & (vm_object_offset_t)(cluster_size - 1); - /* bytes from beginning of cluster */ - /* - * Due to unaligned mappings, we have to be careful - * of negative offsets into the VM object. Clip the cluster - * boundary to the VM object, not the memory object. + * set the page for future call to vm_fault_list_request + * page should already be marked busy */ - if (offset > cluster_offset) { - cluster_lower_bound = offset - cluster_offset; - /* from vm_object */ - } else { - cluster_lower_bound = 0; - } - cluster_upper_bound = (offset - cluster_offset) + - (vm_object_offset_t)cluster_size; - - /* set the page for future call to vm_fault_list_request */ - holding_page = NULL; - vm_page_lock_queues(); - m->busy = TRUE; + vm_page_wire(m); m->list_req_pending = TRUE; m->cleaning = TRUE; m->pageout = TRUE; - vm_page_wire(m); - vm_pageout_throttle(m); - vm_page_unlock_queues(); - vm_object_unlock(object); - - /* - * Search backward for adjacent eligible pages to clean in - * this operation. - */ - - cluster_start = offset; - if (offset) { /* avoid wrap-around at zero */ - for (cluster_start = offset - PAGE_SIZE_64; - cluster_start >= cluster_lower_bound; - cluster_start -= PAGE_SIZE_64) { - assert(cluster_size > PAGE_SIZE); - - vm_object_lock(object); - vm_page_lock_queues(); - - if ((friend = vm_pageout_cluster_page(object, cluster_start, - precious_clean)) == VM_PAGE_NULL) { - vm_page_unlock_queues(); - vm_object_unlock(object); - break; - } - new_offset = (cluster_start + object->paging_offset) - & (cluster_size - 1); - - assert(new_offset < cluster_offset); - m->list_req_pending = TRUE; - m->cleaning = TRUE; -/* do nothing except advance the write request, all we really need to */ -/* do is push the target page and let the code at the other end decide */ -/* what is really the right size */ - if (vm_page_free_count <= vm_page_free_reserved) { - m->busy = TRUE; - m->pageout = TRUE; - vm_page_wire(m); - } - - vm_page_unlock_queues(); - vm_object_unlock(object); - if(m->dirty || m->object->internal) { - CLUSTER_STAT(pages_at_lower_offsets++;) - } - - } - cluster_start += PAGE_SIZE_64; - } - assert(cluster_start >= cluster_lower_bound); - assert(cluster_start <= offset); - /* - * Search forward for adjacent eligible pages to clean in - * this operation. - */ - for (cluster_end = offset + PAGE_SIZE_64; - cluster_end < cluster_upper_bound; - cluster_end += PAGE_SIZE_64) { - assert(cluster_size > PAGE_SIZE); + m->laundry = TRUE; - vm_object_lock(object); - vm_page_lock_queues(); - - if ((friend = vm_pageout_cluster_page(object, cluster_end, - precious_clean)) == VM_PAGE_NULL) { - vm_page_unlock_queues(); - vm_object_unlock(object); - break; - } - new_offset = (cluster_end + object->paging_offset) - & (cluster_size - 1); - - assert(new_offset < cluster_size); - m->list_req_pending = TRUE; - m->cleaning = TRUE; -/* do nothing except advance the write request, all we really need to */ -/* do is push the target page and let the code at the other end decide */ -/* what is really the right size */ - if (vm_page_free_count <= vm_page_free_reserved) { - m->busy = TRUE; - m->pageout = TRUE; - vm_page_wire(m); - } - - vm_page_unlock_queues(); - vm_object_unlock(object); - - if(m->dirty || m->object->internal) { - CLUSTER_STAT(pages_at_higher_offsets++;) - } - } - assert(cluster_end <= cluster_upper_bound); - assert(cluster_end >= offset + PAGE_SIZE); - - /* - * (offset - cluster_offset) is beginning of cluster_object - * relative to vm_object start. - */ - offset_within_cluster = cluster_start - (offset - cluster_offset); - length_of_data = cluster_end - cluster_start; - - assert(offset_within_cluster < cluster_size); - assert((offset_within_cluster + length_of_data) <= cluster_size); - - rc = KERN_SUCCESS; - assert(rc == KERN_SUCCESS); - - pages_in_cluster = length_of_data/PAGE_SIZE; - -#if MACH_CLUSTER_STATS - (cluster_stats[pages_at_lower_offsets].pages_at_lower_offsets)++; - (cluster_stats[pages_at_higher_offsets].pages_at_higher_offsets)++; - (cluster_stats[pages_in_cluster].pages_in_cluster)++; -#endif /* MACH_CLUSTER_STATS */ - - /* - * Send the data to the pager. - */ - paging_offset = cluster_start + object->paging_offset; - - rc = memory_object_data_return(object->pager, - paging_offset, - length_of_data, - !precious_clean, - FALSE); - - vm_object_lock(object); - vm_object_paging_end(object); + if (object->internal == TRUE) + q = &vm_pageout_queue_internal; + else + q = &vm_pageout_queue_external; + q->pgo_laundry++; - if (holding_page) { - assert(!object->pager_trusted); - VM_PAGE_FREE(holding_page); - vm_object_paging_end(object); + m->pageout_queue = TRUE; + queue_enter(&q->pgo_pending, m, vm_page_t, pageq); + + if (q->pgo_idle == TRUE) { + q->pgo_idle = FALSE; + thread_wakeup((event_t) &q->pgo_pending); } } -/* - * Trusted pager throttle. - * Object and page queues must be locked. - */ -void -vm_pageout_throttle( - register vm_page_t m) -{ - register vm_object_t object; - - /* - * need to keep track of the object we - * started with... if we drop the object lock - * due to the throttle, it's possible that someone - * else will gather this page into an I/O if this - * is an external object... the page will then be - * potentially freed before we unwedge from the - * throttle... this is ok since no one plays with - * the page directly after the throttle... the object - * and offset are passed into the memory_object_data_return - * function where eventually it's relooked up against the - * object... if it's changed state or there is no longer - * a page at that offset, the pageout just finishes without - * issuing an I/O - */ - object = m->object; - - assert(!m->laundry); - m->laundry = TRUE; - if (!object->internal) - vm_page_burst_count++; - vm_page_laundry_count++; - - while (vm_page_laundry_count > vm_page_laundry_max) { - /* - * Set the threshold for when vm_page_free() - * should wake us up. - */ - vm_page_laundry_min = vm_page_laundry_max/2; - - assert_wait((event_t) &vm_page_laundry_count, THREAD_UNINT); - vm_page_unlock_queues(); - vm_object_unlock(object); - /* - * Pause to let the default pager catch up. - */ - thread_block((void (*)(void)) 0); - vm_object_lock(object); - vm_page_lock_queues(); - } -} +unsigned long vm_pageout_throttle_up_count = 0; /* - * The global variable vm_pageout_clean_active_pages controls whether - * active pages are considered valid to be cleaned in place during a - * clustered pageout. Performance measurements are necessary to determine - * the best policy. - */ -int vm_pageout_clean_active_pages = 1; -/* - * vm_pageout_cluster_page: [Internal] + * A page is back from laundry. See if there are some pages waiting to + * go to laundry and if we can let some of them go now. * - * return a vm_page_t to the page at (object,offset) if it is appropriate - * to clean in place. Pages that are non-existent, busy, absent, already - * cleaning, or not dirty are not eligible to be cleaned as an adjacent - * page in a cluster. - * - * The object must be locked on entry, and remains locked throughout - * this call. + * Object and page queues must be locked. */ - -vm_page_t -vm_pageout_cluster_page( - vm_object_t object, - vm_object_offset_t offset, - boolean_t precious_clean) +void +vm_pageout_throttle_up( + vm_page_t m) { - vm_page_t m; - - XPR(XPR_VM_PAGEOUT, - "vm_pageout_cluster_page, object 0x%X offset 0x%X\n", - (integer_t)object, offset, 0, 0, 0); + struct vm_pageout_queue *q; - if ((m = vm_page_lookup(object, offset)) == VM_PAGE_NULL) - return(VM_PAGE_NULL); + vm_pageout_throttle_up_count++; - if (m->busy || m->absent || m->cleaning || - (m->wire_count != 0) || m->error) - return(VM_PAGE_NULL); + assert(m->laundry); + assert(m->object != VM_OBJECT_NULL); + assert(m->object != kernel_object); - if (vm_pageout_clean_active_pages) { - if (!m->active && !m->inactive) return(VM_PAGE_NULL); - } else { - if (!m->inactive) return(VM_PAGE_NULL); - } - - assert(!m->private); - assert(!m->fictitious); + if (m->object->internal == TRUE) + q = &vm_pageout_queue_internal; + else + q = &vm_pageout_queue_external; - if (!m->dirty) m->dirty = pmap_is_modified(m->phys_page); + m->laundry = FALSE; + q->pgo_laundry--; - if (precious_clean) { - if (!m->precious || !m->dirty) - return(VM_PAGE_NULL); - } else { - if (!m->dirty) - return(VM_PAGE_NULL); + if (q->pgo_throttled == TRUE) { + q->pgo_throttled = FALSE; + thread_wakeup((event_t) &q->pgo_laundry); } - return(m); } + /* * vm_pageout_scan does the dirty work for the pageout daemon. * It returns with vm_page_queue_free_lock held and * vm_page_free_wanted == 0. */ -extern void vm_pageout_scan_continue(void); /* forward; */ -#define DELAYED_UNLOCK_LIMIT 50 -#define LOCAL_FREED_LIMIT 50 +#define DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER) + +#define FCS_IDLE 0 +#define FCS_DELAYED 1 +#define FCS_DEADLOCK_DETECTED 2 + +struct flow_control { + int state; + mach_timespec_t ts; +}; + +extern kern_return_t sysclk_gettime(mach_timespec_t *); + void vm_pageout_scan(void) { - boolean_t now = FALSE; - unsigned int laundry_pages; - int loop_count = 0; - int loop_bursted_count = 0; - int active_loop_detect; + unsigned int loop_count = 0; + unsigned int inactive_burst_count = 0; + unsigned int active_burst_count = 0; vm_page_t local_freeq = 0; int local_freed = 0; int delayed_unlock = 0; int need_internal_inactive = 0; - int need_pause; + int refmod_state = 0; + int vm_pageout_deadlock_target = 0; + struct vm_pageout_queue *iq; + struct vm_pageout_queue *eq; + struct flow_control flow_control; + boolean_t active_throttled = FALSE; + boolean_t inactive_throttled = FALSE; + mach_timespec_t ts; + unsigned int msecs = 0; + vm_object_t object; + + + flow_control.state = FCS_IDLE; + iq = &vm_pageout_queue_internal; + eq = &vm_pageout_queue_external; XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); @@ -1270,117 +1118,139 @@ vm_pageout_scan(void) * clean pages. They probably aren't running, because they * aren't vm-privileged. If we kept sending dirty pages to them, * we could exhaust the free list. - * - * consider_zone_gc should be last, because the other operations - * might return memory to zones. */ - Restart: + vm_page_lock_queues(); + delayed_unlock = 1; + - stack_collect(); - consider_task_collect(); - consider_machine_collect(); - consider_zone_gc(); +Restart: + /* + * Recalculate vm_page_inactivate_target. + */ + vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + + vm_page_inactive_count); + object = NULL; for (;;) { - register vm_page_t m; - register vm_object_t object; + vm_page_t m; - /* - * Recalculate vm_page_inactivate_target. - */ if (delayed_unlock == 0) vm_page_lock_queues(); - vm_page_inactive_target = - VM_PAGE_INACTIVE_TARGET(vm_page_active_count + - vm_page_inactive_count); - active_loop_detect = vm_page_active_count; + active_burst_count = vm_page_active_count; + + if (active_burst_count > vm_pageout_burst_active_throttle) + active_burst_count = vm_pageout_burst_active_throttle; + /* * Move pages from active to inactive. */ while ((need_internal_inactive || vm_page_inactive_count < vm_page_inactive_target) && !queue_empty(&vm_page_queue_active) && - ((active_loop_detect--) > 0)) { + ((active_burst_count--) > 0)) { - need_pause = 1; vm_pageout_active++; m = (vm_page_t) queue_first(&vm_page_queue_active); - object = m->object; + + assert(m->active && !m->inactive); + assert(!m->laundry); + assert(m->object != kernel_object); /* - * If we're getting really low on memory, - * or we have already exceed the burst - * count for the external pagers, - * try skipping to a page that will go - * directly to the default_pager. + * Try to lock object; since we've already got the + * page queues lock, we can only 'try' for this one. + * if the 'try' fails, we need to do a mutex_pause + * to allow the owner of the object lock a chance to + * run... otherwise, we're likely to trip over this + * object in the same state as we work our way through + * the queue... clumps of pages associated with the same + * object are fairly typical on the inactive and active queues */ - if (need_internal_inactive && - IP_VALID(memory_manager_default)) { - vm_pageout_scan_active_emm_throttle++; - - assert(m->active && !m->inactive); - - if (vm_object_lock_try(object)) { - if (object->internal) - goto object_locked_active; - - if (!m->dirty) - m->dirty = pmap_is_modified(m->phys_page); - if (!m->dirty && !m->precious) - goto object_locked_active; - - vm_object_unlock(object); - - need_pause = 0; + if (m->object != object) { + if (object != NULL) { + vm_object_unlock(object); + object = NULL; } - goto object_lock_try_active_failed; - } - assert(m->active && !m->inactive); - - if (!vm_object_lock_try(object)) { - /* - * Move page to end and continue. - */ -object_lock_try_active_failed: - queue_remove(&vm_page_queue_active, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_active, m, - vm_page_t, pageq); - - if (local_freeq) { - vm_page_free_list(local_freeq); + if (!vm_object_lock_try(m->object)) { + /* + * move page to end of active queue and continue + */ + queue_remove(&vm_page_queue_active, m, + vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, + vm_page_t, pageq); - local_freeq = 0; - local_freed = 0; - } - if (need_pause) { - delayed_unlock = 0; - - vm_page_unlock_queues(); - mutex_pause(); - vm_page_lock_queues(); + goto done_with_activepage; } - continue; + object = m->object; } - - object_locked_active: /* - * If the page is busy, then we pull it - * off the active queue and leave it alone. + * if the page is BUSY, then we pull it + * off the active queue and leave it alone. + * when BUSY is cleared, it will get stuck + * back on the appropriate queue */ - if (m->busy) { - vm_object_unlock(object); queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); - m->active = FALSE; + m->pageq.next = NULL; + m->pageq.prev = NULL; + if (!m->fictitious) vm_page_active_count--; - continue; + m->active = FALSE; + + goto done_with_activepage; } + if (need_internal_inactive) { + /* + * If we're unable to make forward progress + * with the current set of pages on the + * inactive queue due to busy objects or + * throttled pageout queues, then + * move a page that is already clean + * or belongs to a pageout queue that + * isn't currently throttled + */ + active_throttled = FALSE; + if (object->internal) { + if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default))) + active_throttled = TRUE; + } else if (VM_PAGE_Q_THROTTLED(eq)) { + active_throttled = TRUE; + } + if (active_throttled == TRUE) { + if (!m->dirty) { + refmod_state = pmap_get_refmod(m->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + m->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + } + if (m->dirty || m->precious) { + /* + * page is dirty and targets a THROTTLED queue + * so all we can do is move it back to the + * end of the active queue to get it out + * of the way + */ + queue_remove(&vm_page_queue_active, m, + vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, + vm_page_t, pageq); + + vm_pageout_scan_active_throttled++; + + goto done_with_activepage; + } + } + vm_pageout_scan_active_throttle_success++; + need_internal_inactive--; + } /* * Deactivate the page while holding the object * locked, so we know the page is still not busy. @@ -1389,115 +1259,205 @@ object_lock_try_active_failed: * absent or fictitious, but vm_page_deactivate * can handle that. */ + vm_page_deactivate(m); +done_with_activepage: + if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - if (need_internal_inactive) { - /* found one ! */ - vm_pageout_scan_active_emm_throttle_success++; - need_internal_inactive--; + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } + if (local_freeq) { + vm_page_free_list(local_freeq); + + local_freeq = 0; + local_freed = 0; + } + delayed_unlock = 0; + vm_page_unlock_queues(); + + mutex_pause(); + vm_page_lock_queues(); + /* + * continue the while loop processing + * the active queue... need to hold + * the page queues lock + */ + continue; } - vm_page_deactivate(m); - vm_object_unlock(object); } + + + + /********************************************************************** + * above this point we're playing with the active queue + * below this point we're playing with the throttling mechanisms + * and the inactive queue + **********************************************************************/ + + + /* * We are done if we have met our target *and* * nobody is still waiting for a page. */ if (vm_page_free_count + local_freed >= vm_page_free_target) { + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } if (local_freeq) { vm_page_free_list(local_freeq); local_freeq = 0; local_freed = 0; } - - consider_machine_adjust(); - mutex_lock(&vm_page_queue_free_lock); if ((vm_page_free_count >= vm_page_free_target) && (vm_page_free_wanted == 0)) { - delayed_unlock = 0; vm_page_unlock_queues(); - break; + + thread_wakeup((event_t) &vm_pageout_garbage_collect); + return; } mutex_unlock(&vm_page_queue_free_lock); } + /* * Sometimes we have to pause: * 1) No inactive pages - nothing to do. - * 2) Flow control - nothing but external pages and - * we have to wait for untrusted pagers to catch up. + * 2) Flow control - default pageout queue is full + * 3) Loop control - no acceptable pages found on the inactive queue + * within the last vm_pageout_burst_inactive_throttle iterations */ + if ((queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf))) { + vm_pageout_scan_empty_throttle++; + msecs = vm_pageout_empty_wait; + goto vm_pageout_scan_delay; + + } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) { + vm_pageout_scan_burst_throttle++; + msecs = vm_pageout_burst_wait; + goto vm_pageout_scan_delay; + + } else if (VM_PAGE_Q_THROTTLED(iq)) { + + switch (flow_control.state) { + + case FCS_IDLE: +reset_deadlock_timer: + ts.tv_sec = vm_pageout_deadlock_wait / 1000; + ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; + sysclk_gettime(&flow_control.ts); + ADD_MACH_TIMESPEC(&flow_control.ts, &ts); + + flow_control.state = FCS_DELAYED; + msecs = vm_pageout_deadlock_wait; - loop_count++; - if ((queue_empty(&vm_page_queue_inactive) && - queue_empty(&vm_page_queue_zf)) || - loop_bursted_count >= vm_pageout_burst_loop_throttle) { + break; + + case FCS_DELAYED: + sysclk_gettime(&ts); + + if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { + /* + * the pageout thread for the default pager is potentially + * deadlocked since the + * default pager queue has been throttled for more than the + * allowable time... we need to move some clean pages or dirty + * pages belonging to the external pagers if they aren't throttled + * vm_page_free_wanted represents the number of threads currently + * blocked waiting for pages... we'll move one page for each of + * these plus a fixed amount to break the logjam... once we're done + * moving this number of pages, we'll re-enter the FSC_DELAYED state + * with a new timeout target since we have no way of knowing + * whether we've broken the deadlock except through observation + * of the queue associated with the default pager... we need to + * stop moving pagings and allow the system to run to see what + * state it settles into. + */ + vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted; + vm_pageout_scan_deadlock_detected++; + flow_control.state = FCS_DEADLOCK_DETECTED; - unsigned int pages, msecs; - int wait_result; - - consider_machine_adjust(); - /* - * vm_pageout_burst_wait is msecs/page. - * If there is nothing for us to do, we wait - * at least vm_pageout_empty_wait msecs. - */ - pages = vm_page_burst_count; - - if (pages) { - msecs = pages * vm_pageout_burst_wait; - } else { - printf("Warning: No physical memory suitable for pageout or reclaim, pageout thread temporarily going to sleep\n"); - msecs = vm_free_page_pause; - } + thread_wakeup((event_t) &vm_pageout_garbage_collect); + goto consider_inactive; + } + /* + * just resniff instead of trying + * to compute a new delay time... we're going to be + * awakened immediately upon a laundry completion, + * so we won't wait any longer than necessary + */ + msecs = vm_pageout_idle_wait; + break; - if (queue_empty(&vm_page_queue_inactive) && - queue_empty(&vm_page_queue_zf) && - (msecs < vm_pageout_empty_wait)) - msecs = vm_pageout_empty_wait; + case FCS_DEADLOCK_DETECTED: + if (vm_pageout_deadlock_target) + goto consider_inactive; + goto reset_deadlock_timer; + } + vm_pageout_scan_throttle++; + iq->pgo_throttled = TRUE; +vm_pageout_scan_delay: + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } if (local_freeq) { vm_page_free_list(local_freeq); local_freeq = 0; local_freed = 0; } - delayed_unlock = 0; - vm_page_unlock_queues(); + assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); - assert_wait_timeout(msecs, THREAD_INTERRUPTIBLE); counter(c_vm_pageout_scan_block++); - /* - * Unfortunately, we don't have call_continuation - * so we can't rely on tail-recursion. - */ - wait_result = thread_block((void (*)(void)) 0); - if (wait_result != THREAD_TIMED_OUT) - thread_cancel_timer(); - vm_pageout_scan_continue(); + vm_page_unlock_queues(); + + thread_block(THREAD_CONTINUE_NULL); + + vm_page_lock_queues(); + delayed_unlock = 1; + + iq->pgo_throttled = FALSE; if (loop_count >= vm_page_inactive_count) { - if (vm_page_burst_count >= vm_pageout_burst_max) { + if (VM_PAGE_Q_THROTTLED(eq) || VM_PAGE_Q_THROTTLED(iq)) { /* * Make sure we move enough "appropriate" * pages to the inactive queue before trying * again. */ - need_internal_inactive = vm_page_laundry_max; + need_internal_inactive = vm_pageout_inactive_relief; } loop_count = 0; } - loop_bursted_count = 0; + inactive_burst_count = 0; + goto Restart; /*NOTREACHED*/ } + + flow_control.state = FCS_IDLE; +consider_inactive: + loop_count++; + inactive_burst_count++; vm_pageout_inactive++; + if (!queue_empty(&vm_page_queue_inactive)) { + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + + if (m->clustered && (m->no_isync == TRUE)) { + goto use_this_page; + } + } if (vm_zf_count < vm_accellerate_zf_pageout_trigger) { vm_zf_iterator = 0; } else { @@ -1506,7 +1466,7 @@ object_lock_try_active_failed: vm_zf_iterator = 0; } } - if(queue_empty(&vm_page_queue_zf) || + if (queue_empty(&vm_page_queue_zf) || (((last_page_zf) || (vm_zf_iterator == 0)) && !queue_empty(&vm_page_queue_inactive))) { m = (vm_page_t) queue_first(&vm_page_queue_inactive); @@ -1515,84 +1475,79 @@ object_lock_try_active_failed: m = (vm_page_t) queue_first(&vm_page_queue_zf); last_page_zf = 1; } - object = m->object; - - need_pause = 1; - - if (vm_page_burst_count >= vm_pageout_burst_max && - IP_VALID(memory_manager_default)) { - /* - * We're throttling external pagers. - * Try to select a page that would - * go directly to the default_pager - * or that is clean... - */ - vm_pageout_scan_inactive_emm_throttle++; - - assert(!m->active && m->inactive); +use_this_page: + assert(!m->active && m->inactive); + assert(!m->laundry); + assert(m->object != kernel_object); - if (vm_object_lock_try(object)) { - if (object->internal) { - /* found one ! */ - vm_pageout_scan_inactive_emm_throttle_success++; - goto object_locked_inactive; - } - if (!m->dirty) - m->dirty = pmap_is_modified(m->phys_page); - if (!m->dirty && !m->precious) { - /* found one ! */ - vm_pageout_scan_inactive_emm_throttle_success++; - goto object_locked_inactive; + /* + * Try to lock object; since we've alread got the + * page queues lock, we can only 'try' for this one. + * if the 'try' fails, we need to do a mutex_pause + * to allow the owner of the object lock a chance to + * run... otherwise, we're likely to trip over this + * object in the same state as we work our way through + * the queue... clumps of pages associated with the same + * object are fairly typical on the inactive and active queues + */ + if (m->object != object) { + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } + if (!vm_object_lock_try(m->object)) { + /* + * Move page to end and continue. + * Don't re-issue ticket + */ + if (m->zero_fill) { + queue_remove(&vm_page_queue_zf, m, + vm_page_t, pageq); + queue_enter(&vm_page_queue_zf, m, + vm_page_t, pageq); + } else { + queue_remove(&vm_page_queue_inactive, m, + vm_page_t, pageq); + queue_enter(&vm_page_queue_inactive, m, + vm_page_t, pageq); } - vm_object_unlock(object); + vm_pageout_inactive_nolock++; + + /* + * force us to dump any collected free pages + * and to pause before moving on + */ + delayed_unlock = DELAYED_UNLOCK_LIMIT + 1; - need_pause = 0; + goto done_with_inactivepage; } - loop_bursted_count++; - goto object_lock_try_inactive_failed; + object = m->object; } - - assert(!m->active && m->inactive); - /* - * Try to lock object; since we've got the - * page queues lock, we can only try for this one. + * If the page belongs to a purgable object with no pending copies + * against it, then we reap all of the pages in the object + * and note that the object has been "emptied". It'll be up to the + * application the discover this and recreate its contents if desired. */ + if ((object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) && + object->copy == VM_OBJECT_NULL) { - if (!vm_object_lock_try(object)) { -object_lock_try_inactive_failed: + (void) vm_object_purge(object); + vm_pageout_purged_objects++; /* - * Move page to end and continue. - * Don't re-issue ticket + * we've just taken all of the pages from this object, + * so drop the lock now since we're not going to find + * any more pages belonging to it anytime soon */ - if (m->zero_fill) { - queue_remove(&vm_page_queue_zf, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_zf, m, - vm_page_t, pageq); - } else { - queue_remove(&vm_page_queue_inactive, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_inactive, m, - vm_page_t, pageq); - } - if (local_freeq) { - vm_page_free_list(local_freeq); - - local_freeq = 0; - local_freed = 0; - } - delayed_unlock = 0; - vm_page_unlock_queues(); + vm_object_unlock(object); + object = NULL; - if (need_pause) { - mutex_pause(); - vm_pageout_inactive_nolock++; - } - continue; + inactive_burst_count = 0; + + goto done_with_inactivepage; } - object_locked_inactive: /* * Paging out pages of external objects which * are currently being created must be avoided. @@ -1614,7 +1569,7 @@ object_lock_try_inactive_failed: * one of its logically adjacent fellows is * targeted. */ - if(m->zero_fill) { + if (m->zero_fill) { queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); queue_enter(&vm_page_queue_zf, m, @@ -1629,24 +1584,20 @@ object_lock_try_inactive_failed: last_page_zf = 0; vm_zf_iterator = 1; } - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - vm_object_unlock(object); vm_pageout_inactive_avoid++; - continue; - } + goto done_with_inactivepage; + } /* * Remove the page from the inactive list. */ - - if(m->zero_fill) { + if (m->zero_fill) { queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); } else { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); } + m->pageq.next = NULL; + m->pageq.prev = NULL; m->inactive = FALSE; if (!m->fictitious) vm_page_inactive_count--; @@ -1656,14 +1607,9 @@ object_lock_try_inactive_failed: * Somebody is already playing with this page. * Leave it off the pageout queues. */ - - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - vm_object_unlock(object); vm_pageout_inactive_busy++; - continue; + + goto done_with_inactivepage; } /* @@ -1672,29 +1618,25 @@ object_lock_try_inactive_failed: if (m->absent || m->error) { vm_pageout_inactive_absent++; - reclaim_page: - +reclaim_page: + if (vm_pageout_deadlock_target) { + vm_pageout_scan_inactive_throttle_success++; + vm_pageout_deadlock_target--; + } if (m->tabled) vm_page_remove(m); /* clears tabled, object, offset */ if (m->absent) vm_object_absent_release(object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); m->pageq.next = (queue_entry_t)local_freeq; local_freeq = m; + local_freed++; - if (local_freed++ > LOCAL_FREED_LIMIT) { - vm_page_free_list(local_freeq); - - local_freeq = 0; - local_freed = 0; - } - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - vm_object_unlock(object); - loop_bursted_count = 0; - continue; + inactive_burst_count = 0; + + goto done_with_inactivepage; } assert(!m->private); @@ -1708,240 +1650,163 @@ object_lock_try_inactive_failed: */ if (m->cleaning) { -#if MACH_CLUSTER_STATS - vm_pageout_cluster_conversions++; -#endif m->busy = TRUE; m->pageout = TRUE; m->dump_cleaning = TRUE; vm_page_wire(m); - vm_object_unlock(object); - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - loop_bursted_count = 0; - continue; + CLUSTER_STAT(vm_pageout_cluster_conversions++); + + inactive_burst_count = 0; + + goto done_with_inactivepage; } /* * If it's being used, reactivate. * (Fictitious pages are either busy or absent.) */ + if ( (!m->reference) ) { + refmod_state = pmap_get_refmod(m->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + m->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + } + if (m->reference) { +was_referenced: + vm_page_activate(m); + VM_STAT(reactivations++); - if (m->reference || pmap_is_referenced(m->phys_page)) { vm_pageout_inactive_used++; - reactivate_page: -#if ADVISORY_PAGEOUT - if (m->discard_request) { - m->discard_request = FALSE; - } -#endif /* ADVISORY_PAGEOUT */ last_page_zf = 0; - vm_object_unlock(object); - vm_page_activate(m); - VM_STAT(reactivations++); + inactive_burst_count = 0; - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - continue; + goto done_with_inactivepage; } -#if ADVISORY_PAGEOUT - if (object->advisory_pageout) { - boolean_t do_throttle; - memory_object_t pager; - vm_object_offset_t discard_offset; - - if (m->discard_request) { - vm_stat_discard_failure++; - goto mandatory_pageout; - } - - assert(object->pager_initialized); - m->discard_request = TRUE; - pager = object->pager; - - /* system-wide throttle */ - do_throttle = (vm_page_free_count <= - vm_page_free_reserved); + XPR(XPR_VM_PAGEOUT, + "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", + (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0); -#if 0 - /* - * JMM - Do we need a replacement throttle - * mechanism for pagers? - */ - if (!do_throttle) { - /* throttle on this pager */ - /* XXX lock ordering ? */ - ip_lock(port); - do_throttle= imq_full(&port->ip_messages); - ip_unlock(port); + /* + * we've got a candidate page to steal... + * + * m->dirty is up to date courtesy of the + * preceding check for m->reference... if + * we get here, then m->reference had to be + * FALSE which means we did a pmap_get_refmod + * and updated both m->reference and m->dirty + * + * if it's dirty or precious we need to + * see if the target queue is throtttled + * it if is, we need to skip over it by moving it back + * to the end of the inactive queue + */ + inactive_throttled = FALSE; + + if (m->dirty || m->precious) { + if (object->internal) { + if ((VM_PAGE_Q_THROTTLED(iq) || !IP_VALID(memory_manager_default))) + inactive_throttled = TRUE; + } else if (VM_PAGE_Q_THROTTLED(eq)) { + inactive_throttled = TRUE; } -#endif - - if (do_throttle) { - vm_stat_discard_throttle++; -#if 0 - /* ignore this page and skip to next */ - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - vm_object_unlock(object); - continue; -#else - /* force mandatory pageout */ - goto mandatory_pageout; -#endif + } + if (inactive_throttled == TRUE) { + if (m->zero_fill) { + queue_enter(&vm_page_queue_zf, m, + vm_page_t, pageq); + } else { + queue_enter(&vm_page_queue_inactive, m, + vm_page_t, pageq); } + if (!m->fictitious) + vm_page_inactive_count++; + m->inactive = TRUE; - /* proceed with discard_request */ - vm_page_activate(m); - vm_stat_discard++; - VM_STAT(reactivations++); - discard_offset = m->offset + object->paging_offset; - vm_stat_discard_sent++; - - if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } - vm_object_unlock(object); + vm_pageout_scan_inactive_throttled++; -/* - memory_object_discard_request(object->pager, - discard_offset, - PAGE_SIZE); -*/ - continue; + goto done_with_inactivepage; } - mandatory_pageout: -#endif /* ADVISORY_PAGEOUT */ - - XPR(XPR_VM_PAGEOUT, - "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", - (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0); - /* - * Eliminate all mappings. + * we've got a page that we can steal... + * eliminate all mappings and make sure + * we have the up-to-date modified state + * first take the page BUSY, so that no new + * mappings can be made */ - m->busy = TRUE; - if (m->no_isync == FALSE) - pmap_page_protect(m->phys_page, VM_PROT_NONE); + /* + * if we need to do a pmap_disconnect then we + * need to re-evaluate m->dirty since the pmap_disconnect + * provides the true state atomically... the + * page was still mapped up to the pmap_disconnect + * and may have been dirtied at the last microsecond + * + * we also check for the page being referenced 'late' + * if it was, we first need to do a WAKEUP_DONE on it + * since we already set m->busy = TRUE, before + * going off to reactivate it + * + * if we don't need the pmap_disconnect, then + * m->dirty is up to date courtesy of the + * earlier check for m->reference... if + * we get here, then m->reference had to be + * FALSE which means we did a pmap_get_refmod + * and updated both m->reference and m->dirty... + */ + if (m->no_isync == FALSE) { + refmod_state = pmap_disconnect(m->phys_page); - if (!m->dirty) - m->dirty = pmap_is_modified(m->phys_page); + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + if (refmod_state & VM_MEM_REFERENCED) { + m->reference = TRUE; + + PAGE_WAKEUP_DONE(m); + goto was_referenced; + } + } /* * If it's clean and not precious, we can free the page. */ - if (!m->dirty && !m->precious) { vm_pageout_inactive_clean++; goto reclaim_page; } - if (local_freeq) { - vm_page_free_list(local_freeq); - - local_freeq = 0; - local_freed = 0; - } - delayed_unlock = 0; - vm_page_unlock_queues(); + vm_pageout_cluster(m); - /* - * If there is no memory object for the page, create - * one and hand it to the default pager. - */ + vm_pageout_inactive_dirty++; - if (!object->pager_initialized) - vm_object_collapse(object, (vm_object_offset_t)0); - if (!object->pager_initialized) - vm_object_pager_create(object); - if (!object->pager_initialized) { - /* - * Still no pager for the object. - * Reactivate the page. - * - * Should only happen if there is no - * default pager. - */ - vm_page_lock_queues(); - vm_page_activate(m); - vm_page_unlock_queues(); + inactive_burst_count = 0; - /* - * And we are done with it. - */ - PAGE_WAKEUP_DONE(m); - vm_object_unlock(object); +done_with_inactivepage: + if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { - /* - * break here to get back to the preemption - * point in the outer loop so that we don't - * spin forever if there is no default pager. - */ - vm_pageout_dirty_no_pager++; - /* - * Well there's no pager, but we can still reclaim - * free pages out of the inactive list. Go back - * to top of loop and look for suitable pages. - */ - continue; - } else if (object->pager == MEMORY_OBJECT_NULL) { - /* - * This pager has been destroyed by either - * memory_object_destroy or vm_object_destroy, and - * so there is nowhere for the page to go. - * Just free the page. - */ - VM_PAGE_FREE(m); - vm_object_unlock(object); - loop_bursted_count = 0; - continue; + if (object != NULL) { + vm_object_unlock(object); + object = NULL; + } + if (local_freeq) { + vm_page_free_list(local_freeq); + + local_freeq = 0; + local_freed = 0; + } + delayed_unlock = 0; + vm_page_unlock_queues(); + mutex_pause(); } - - vm_pageout_inactive_dirty++; - vm_pageout_cluster(m); /* flush it */ - vm_object_unlock(object); - loop_bursted_count = 0; + /* + * back to top of pageout scan loop + */ } } -counter(unsigned int c_vm_pageout_scan_continue = 0;) - -void -vm_pageout_scan_continue(void) -{ - /* - * We just paused to let the pagers catch up. - * If vm_page_laundry_count is still high, - * then we aren't waiting long enough. - * If we have paused some vm_pageout_pause_max times without - * adjusting vm_pageout_burst_wait, it might be too big, - * so we decrease it. - */ - - vm_page_lock_queues(); - counter(++c_vm_pageout_scan_continue); - if (vm_page_laundry_count > vm_pageout_burst_min) { - vm_pageout_burst_wait++; - vm_pageout_pause_count = 0; - } else if (++vm_pageout_pause_count > vm_pageout_pause_max) { - vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4; - if (vm_pageout_burst_wait < 1) - vm_pageout_burst_wait = 1; - vm_pageout_pause_count = 0; - } - vm_page_unlock_queues(); -} -void vm_page_free_reserve(int pages); int vm_page_free_count_init; void @@ -1979,46 +1844,287 @@ vm_pageout_continue(void) mutex_unlock(&vm_page_queue_free_lock); counter(c_vm_pageout_block++); - thread_block(vm_pageout_continue); + thread_block((thread_continue_t)vm_pageout_continue); /*NOTREACHED*/ } -void -vm_pageout(void) + +/* + * must be called with the + * queues and object locks held + */ +static void +vm_pageout_queue_steal(vm_page_t m) { - thread_t self = current_thread(); - spl_t s; + struct vm_pageout_queue *q; - /* - * Set thread privileges. - */ - self->vm_privilege = TRUE; + if (m->object->internal == TRUE) + q = &vm_pageout_queue_internal; + else + q = &vm_pageout_queue_external; - s = splsched(); - thread_lock(self); - self->priority = BASEPRI_PREEMPT - 1; - set_sched_pri(self, self->priority); - thread_unlock(self); - splx(s); + m->laundry = FALSE; + m->pageout_queue = FALSE; + queue_remove(&q->pgo_pending, m, vm_page_t, pageq); - /* - * Initialize some paging parameters. - */ + m->pageq.next = NULL; + m->pageq.prev = NULL; - if (vm_page_laundry_max == 0) - vm_page_laundry_max = VM_PAGE_LAUNDRY_MAX; + vm_object_paging_end(m->object); - if (vm_pageout_burst_max == 0) - vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX; + q->pgo_laundry--; +} - if (vm_pageout_burst_wait == 0) - vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; - if (vm_pageout_empty_wait == 0) - vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; +#ifdef FAKE_DEADLOCK - /* - * Set kernel task to low backing store privileged +#define FAKE_COUNT 5000 + +int internal_count = 0; +int fake_deadlock = 0; + +#endif + +static void +vm_pageout_iothread_continue(struct vm_pageout_queue *q) +{ + vm_page_t m = NULL; + vm_object_t object; + boolean_t need_wakeup; + + vm_page_lock_queues(); + + while ( !queue_empty(&q->pgo_pending) ) { + + q->pgo_busy = TRUE; + queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); + m->pageout_queue = FALSE; + vm_page_unlock_queues(); + + m->pageq.next = NULL; + m->pageq.prev = NULL; +#ifdef FAKE_DEADLOCK + if (q == &vm_pageout_queue_internal) { + vm_offset_t addr; + int pg_count; + + internal_count++; + + if ((internal_count == FAKE_COUNT)) { + + pg_count = vm_page_free_count + vm_page_free_reserved; + + if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) { + kmem_free(kernel_map, addr, PAGE_SIZE * pg_count); + } + internal_count = 0; + fake_deadlock++; + } + } +#endif + object = m->object; + + if (!object->pager_initialized) { + vm_object_lock(object); + + /* + * If there is no memory object for the page, create + * one and hand it to the default pager. + */ + + if (!object->pager_initialized) + vm_object_collapse(object, (vm_object_offset_t)0); + if (!object->pager_initialized) + vm_object_pager_create(object); + if (!object->pager_initialized) { + /* + * Still no pager for the object. + * Reactivate the page. + * + * Should only happen if there is no + * default pager. + */ + m->list_req_pending = FALSE; + m->cleaning = FALSE; + m->pageout = FALSE; + vm_page_unwire(m); + + vm_pageout_throttle_up(m); + + vm_page_lock_queues(); + vm_pageout_dirty_no_pager++; + vm_page_activate(m); + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + PAGE_WAKEUP_DONE(m); + + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_page_lock_queues(); + continue; + } else if (object->pager == MEMORY_OBJECT_NULL) { + /* + * This pager has been destroyed by either + * memory_object_destroy or vm_object_destroy, and + * so there is nowhere for the page to go. + * Just free the page... VM_PAGE_FREE takes + * care of cleaning up all the state... + * including doing the vm_pageout_throttle_up + */ + VM_PAGE_FREE(m); + + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_page_lock_queues(); + continue; + } + vm_object_unlock(object); + } + /* + * we expect the paging_in_progress reference to have + * already been taken on the object before it was added + * to the appropriate pageout I/O queue... this will + * keep the object from being terminated and/or the + * paging_offset from changing until the I/O has + * completed... therefore no need to lock the object to + * pull the paging_offset from it. + * + * Send the data to the pager. + * any pageout clustering happens there + */ + memory_object_data_return(object->pager, + m->offset + object->paging_offset, + PAGE_SIZE, + NULL, + NULL, + FALSE, + FALSE, + 0); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_page_lock_queues(); + } + assert_wait((event_t) q, THREAD_UNINT); + + + if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) { + q->pgo_throttled = FALSE; + need_wakeup = TRUE; + } else + need_wakeup = FALSE; + + q->pgo_busy = FALSE; + q->pgo_idle = TRUE; + vm_page_unlock_queues(); + + if (need_wakeup == TRUE) + thread_wakeup((event_t) &q->pgo_laundry); + + thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending); + /*NOTREACHED*/ +} + + +static void +vm_pageout_iothread_external(void) +{ + + vm_pageout_iothread_continue(&vm_pageout_queue_external); + /*NOTREACHED*/ +} + + +static void +vm_pageout_iothread_internal(void) +{ + thread_t self = current_thread(); + + self->options |= TH_OPT_VMPRIV; + + vm_pageout_iothread_continue(&vm_pageout_queue_internal); + /*NOTREACHED*/ +} + +static void +vm_pageout_garbage_collect(int collect) +{ + if (collect) { + stack_collect(); + + /* + * consider_zone_gc should be last, because the other operations + * might return memory to zones. + */ + consider_machine_collect(); + consider_zone_gc(); + + consider_machine_adjust(); + } + + assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT); + + thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1); + /*NOTREACHED*/ +} + + + +void +vm_pageout(void) +{ + thread_t self = current_thread(); + thread_t thread; + kern_return_t result; + spl_t s; + + /* + * Set thread privileges. + */ + s = splsched(); + thread_lock(self); + self->priority = BASEPRI_PREEMPT - 1; + set_sched_pri(self, self->priority); + thread_unlock(self); + splx(s); + + /* + * Initialize some paging parameters. + */ + + if (vm_pageout_idle_wait == 0) + vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; + + if (vm_pageout_burst_wait == 0) + vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; + + if (vm_pageout_empty_wait == 0) + vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; + + if (vm_pageout_deadlock_wait == 0) + vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; + + if (vm_pageout_deadlock_relief == 0) + vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; + + if (vm_pageout_inactive_relief == 0) + vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF; + + if (vm_pageout_burst_active_throttle == 0) + vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE; + + if (vm_pageout_burst_inactive_throttle == 0) + vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; + + /* + * Set kernel task to low backing store privileged * status */ task_lock(kernel_task); @@ -2034,135 +2140,57 @@ vm_pageout(void) * calling it with an arg of 0 will not change the reserve * but will re-calculate free_min and free_target */ - if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED) { - int scale; - - /* - * HFS Journaling exists on the vm_pageout path... - * it can need to allocate a lot more memory than a - * typical driver/filesystem... if it can't allocate - * the transaction buffer(s), we will deadlock... - * the amount is scaled - * based on the physical footprint of the system, so - * let's double our reserve on systems with > 512Mbytes - */ - if (vm_page_free_count > (512 * 1024 * 1024) / PAGE_SIZE) - scale = 2; - else - scale = 1; - vm_page_free_reserve((VM_PAGE_FREE_RESERVED * scale) - vm_page_free_reserved); + if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { + vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); } else vm_page_free_reserve(0); - vm_pageout_continue(); - /*NOTREACHED*/ -} - -kern_return_t -vm_pageout_emergency_availability_request() -{ - vm_page_t m; - vm_object_t object; - - vm_page_lock_queues(); - m = (vm_page_t) queue_first(&vm_page_queue_inactive); - - while (!queue_end(&vm_page_queue_inactive, (queue_entry_t) m)) { - object = m->object; - - if ( !vm_object_lock_try(object)) { - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - if ((!object->alive) || (object->pageout)) { - vm_object_unlock(object); - - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious - || m->precious || m->cleaning - || m->dump_cleaning || m->error - || m->pageout || m->laundry - || m->list_req_pending - || m->overwriting) { - vm_object_unlock(object); + queue_init(&vm_pageout_queue_external.pgo_pending); + vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; + vm_pageout_queue_external.pgo_laundry = 0; + vm_pageout_queue_external.pgo_idle = FALSE; + vm_pageout_queue_external.pgo_busy = FALSE; + vm_pageout_queue_external.pgo_throttled = FALSE; - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - m->busy = TRUE; - pmap_page_protect(m->phys_page, VM_PROT_NONE); - m->dirty = pmap_is_modified(m->phys_page); + queue_init(&vm_pageout_queue_internal.pgo_pending); + vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; + vm_pageout_queue_internal.pgo_laundry = 0; + vm_pageout_queue_internal.pgo_idle = FALSE; + vm_pageout_queue_internal.pgo_busy = FALSE; + vm_pageout_queue_internal.pgo_throttled = FALSE; - if (m->dirty) { - PAGE_WAKEUP_DONE(m); - vm_object_unlock(object); - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - vm_page_free(m); - vm_object_unlock(object); - vm_page_unlock_queues(); + result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &thread); + if (result != KERN_SUCCESS) + panic("vm_pageout_iothread_internal: create failed"); - return KERN_SUCCESS; - } - m = (vm_page_t) queue_first(&vm_page_queue_active); + thread_deallocate(thread); - while (!queue_end(&vm_page_queue_active, (queue_entry_t) m)) { - object = m->object; + result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, BASEPRI_PREEMPT - 1, &thread); + if (result != KERN_SUCCESS) + panic("vm_pageout_iothread_external: create failed"); - if ( !vm_object_lock_try(object)) { - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - if ((!object->alive) || (object->pageout)) { - vm_object_unlock(object); - - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - if (m->dirty || m->busy || m->wire_count || m->absent || m->fictitious - || m->precious || m->cleaning - || m->dump_cleaning || m->error - || m->pageout || m->laundry - || m->list_req_pending - || m->overwriting) { - vm_object_unlock(object); + thread_deallocate(thread); - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - m->busy = TRUE; - pmap_page_protect(m->phys_page, VM_PROT_NONE); - m->dirty = pmap_is_modified(m->phys_page); - if (m->dirty) { - PAGE_WAKEUP_DONE(m); - vm_object_unlock(object); + result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, BASEPRI_PREEMPT - 2, &thread); + if (result != KERN_SUCCESS) + panic("vm_pageout_garbage_collect: create failed"); - m = (vm_page_t) queue_next(&m->pageq); - continue; - } - vm_page_free(m); - vm_object_unlock(object); - vm_page_unlock_queues(); + thread_deallocate(thread); - return KERN_SUCCESS; - } - vm_page_unlock_queues(); - return KERN_FAILURE; + vm_pageout_continue(); + /*NOTREACHED*/ } static upl_t upl_create( - int flags, - vm_size_t size) + int flags, + upl_size_t size) { upl_t upl; int page_field_size; /* bit field in word size buf */ @@ -2186,10 +2214,10 @@ upl_create( upl->map_object = NULL; upl->ref_count = 1; upl_lock_init(upl); -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG upl->ubc_alias1 = 0; upl->ubc_alias2 = 0; -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ return(upl); } @@ -2199,7 +2227,7 @@ upl_destroy( { int page_field_size; /* bit field in word size buf */ -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG { upl_t upl_ele; vm_object_t object; @@ -2218,7 +2246,7 @@ upl_destroy( } vm_object_unlock(object); } -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ /* drop a reference on the map_object whether or */ /* not a pageout object is inserted */ if(upl->map_object->pageout) @@ -2230,15 +2258,16 @@ upl_destroy( page_field_size = (page_field_size + 3) & 0xFFFFFFFC; } if(upl->flags & UPL_INTERNAL) { - kfree((vm_offset_t)upl, - sizeof(struct upl) + - (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE)) - + page_field_size); + kfree(upl, + sizeof(struct upl) + + (sizeof(struct upl_page_info) * (upl->size/PAGE_SIZE)) + + page_field_size); } else { - kfree((vm_offset_t)upl, sizeof(struct upl) + page_field_size); + kfree(upl, sizeof(struct upl) + page_field_size); } } +void uc_upl_dealloc(upl_t upl); __private_extern__ void uc_upl_dealloc( upl_t upl) @@ -2260,6 +2289,16 @@ upl_deallocate( } } +/* + * Statistics about UPL enforcement of copy-on-write obligations. + */ +unsigned long upl_cow = 0; +unsigned long upl_cow_again = 0; +unsigned long upl_cow_contiguous = 0; +unsigned long upl_cow_pages = 0; +unsigned long upl_cow_again_pages = 0; +unsigned long upl_cow_contiguous_pages = 0; + /* * Routine: vm_object_upl_request * Purpose: @@ -2303,29 +2342,42 @@ upl_deallocate( * the vm_objects (cache objects), they support. * */ + __private_extern__ kern_return_t vm_object_upl_request( vm_object_t object, - vm_object_offset_t offset, - vm_size_t size, + vm_object_offset_t offset, + upl_size_t size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, - int cntrl_flags) + int cntrl_flags) { - vm_page_t dst_page; + vm_page_t dst_page = VM_PAGE_NULL; vm_object_offset_t dst_offset = offset; - vm_size_t xfer_size = size; + upl_size_t xfer_size = size; boolean_t do_m_lock = FALSE; boolean_t dirty; boolean_t hw_dirty; upl_t upl = NULL; - int entry; + unsigned int entry; +#if MACH_CLUSTER_STATS boolean_t encountered_lrp = FALSE; - +#endif vm_page_t alias_page = NULL; int page_ticket; - wpl_array_t lite_list; + int refmod_state; + wpl_array_t lite_list = NULL; + vm_object_t last_copy_object; + + + if (cntrl_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } page_ticket = (cntrl_flags & UPL_PAGE_TICKET_MASK) >> UPL_PAGE_TICKET_SHIFT; @@ -2337,10 +2389,6 @@ vm_object_upl_request( if(cntrl_flags & UPL_SET_INTERNAL) if(page_list_count != NULL) *page_list_count = MAX_UPL_TRANSFER; - if(((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) && - ((page_list_count != NULL) && (*page_list_count != 0) - && *page_list_count < (size/page_size))) - return KERN_INVALID_ARGUMENT; if((!object->internal) && (object->paging_offset != 0)) panic("vm_object_upl_request: vnode object with non-zero paging offset\n"); @@ -2349,17 +2397,21 @@ vm_object_upl_request( return KERN_SUCCESS; } + vm_object_lock(object); + vm_object_paging_begin(object); + vm_object_unlock(object); + if(upl_ptr) { if(cntrl_flags & UPL_SET_INTERNAL) { if(cntrl_flags & UPL_SET_LITE) { - vm_offset_t page_field_size; + uintptr_t page_field_size; upl = upl_create( UPL_CREATE_INTERNAL | UPL_CREATE_LITE, size); user_page_list = (upl_page_info_t *) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); lite_list = (wpl_array_t) - (((vm_offset_t)user_page_list) + + (((uintptr_t)user_page_list) + ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); page_field_size = ((size/PAGE_SIZE) + 7) >> 3; @@ -2371,16 +2423,15 @@ vm_object_upl_request( } else { upl = upl_create(UPL_CREATE_INTERNAL, size); user_page_list = (upl_page_info_t *) - (((vm_offset_t)upl) - + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); upl->flags = UPL_INTERNAL; } } else { if(cntrl_flags & UPL_SET_LITE) { - vm_offset_t page_field_size; + uintptr_t page_field_size; upl = upl_create(UPL_CREATE_LITE, size); lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); page_field_size = ((size/PAGE_SIZE) + 7) >> 3; page_field_size = (page_field_size + 3) & 0xFFFFFFFC; @@ -2392,15 +2443,52 @@ vm_object_upl_request( } } - if(object->phys_contiguous) { + if (object->phys_contiguous) { + if ((cntrl_flags & UPL_WILL_MODIFY) && + object->copy != VM_OBJECT_NULL) { + /* Honor copy-on-write obligations */ + + /* + * XXX FBDP + * We could still have a race... + * A is here building the UPL for a write(). + * A pushes the pages to the current copy + * object. + * A returns the UPL to the caller. + * B comes along and establishes another + * private mapping on this object, inserting + * a new copy object between the original + * object and the old copy object. + * B reads a page and gets the original contents + * from the original object. + * A modifies the page in the original object. + * B reads the page again and sees A's changes, + * which is wrong... + * + * The problem is that the pages are not + * marked "busy" in the original object, so + * nothing prevents B from reading it before + * before A's changes are completed. + * + * The "paging_in_progress" might protect us + * from the insertion of a new copy object + * though... To be verified. + */ + vm_object_lock_request(object, + offset, + size, + FALSE, + MEMORY_OBJECT_COPY_SYNC, + VM_PROT_NO_CHANGE); + upl_cow_contiguous++; + upl_cow_contiguous_pages += size >> PAGE_SHIFT; + } + upl->map_object = object; /* don't need any shadow mappings for this one */ /* since it is already I/O memory */ upl->flags |= UPL_DEVICE_MEMORY; - vm_object_lock(object); - vm_object_paging_begin(object); - vm_object_unlock(object); /* paging_in_progress protects paging_offset */ upl->offset = offset + object->paging_offset; @@ -2408,7 +2496,7 @@ vm_object_upl_request( *upl_ptr = upl; if(user_page_list) { user_page_list[0].phys_addr = - (offset + object->shadow_offset)>>12; + (offset + object->shadow_offset)>>PAGE_SHIFT; user_page_list[0].device = TRUE; } @@ -2419,8 +2507,10 @@ vm_object_upl_request( *page_list_count = 1; } } + return KERN_SUCCESS; } + if(user_page_list) user_page_list[0].device = FALSE; @@ -2428,7 +2518,10 @@ vm_object_upl_request( upl->map_object = object; } else { upl->map_object = vm_object_allocate(size); - vm_object_lock(upl->map_object); + /* + * No neeed to lock the new object: nobody else knows + * about it yet, so it's all ours so far. + */ upl->map_object->shadow = object; upl->map_object->pageout = TRUE; upl->map_object->can_persist = FALSE; @@ -2436,24 +2529,63 @@ vm_object_upl_request( MEMORY_OBJECT_COPY_NONE; upl->map_object->shadow_offset = offset; upl->map_object->wimg_bits = object->wimg_bits; - vm_object_unlock(upl->map_object); } + } if (!(cntrl_flags & UPL_SET_LITE)) { VM_PAGE_GRAB_FICTITIOUS(alias_page); } + + /* + * ENCRYPTED SWAP: + * Just mark the UPL as "encrypted" here. + * We'll actually encrypt the pages later, + * in upl_encrypt(), when the caller has + * selected which pages need to go to swap. + */ + if (cntrl_flags & UPL_ENCRYPT) { + upl->flags |= UPL_ENCRYPTED; + } + if (cntrl_flags & UPL_FOR_PAGEOUT) { + upl->flags |= UPL_PAGEOUT; + } vm_object_lock(object); - vm_object_paging_begin(object); /* we can lock in the paging_offset once paging_in_progress is set */ if(upl_ptr) { upl->size = size; upl->offset = offset + object->paging_offset; *upl_ptr = upl; -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG queue_enter(&object->uplq, upl, upl_t, uplq); -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ + } + + if ((cntrl_flags & UPL_WILL_MODIFY) && + object->copy != VM_OBJECT_NULL) { + /* Honor copy-on-write obligations */ + + /* + * The caller is gathering these pages and + * might modify their contents. We need to + * make sure that the copy object has its own + * private copies of these pages before we let + * the caller modify them. + */ + vm_object_update(object, + offset, + size, + NULL, + NULL, + FALSE, /* should_return */ + MEMORY_OBJECT_COPY_SYNC, + VM_PROT_NO_CHANGE); + upl_cow++; + upl_cow_pages += size >> PAGE_SHIFT; + } + /* remember which copy object we synchronized with */ + last_copy_object = object->copy; entry = 0; if(cntrl_flags & UPL_COPYOUT_FROM) { @@ -2466,28 +2598,72 @@ vm_object_upl_request( VM_PAGE_GRAB_FICTITIOUS(alias_page); vm_object_lock(object); } - if(((dst_page = vm_page_lookup(object, - dst_offset)) == VM_PAGE_NULL) || + if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || dst_page->fictitious || dst_page->absent || dst_page->error || - (dst_page->wire_count != 0 && - !dst_page->pageout) || - ((!(dst_page->dirty || dst_page->precious || - pmap_is_modified(dst_page->phys_page))) - && (cntrl_flags & UPL_RET_ONLY_DIRTY)) || - ((!(dst_page->inactive)) - && (dst_page->page_ticket != page_ticket) - && ((dst_page->page_ticket+1) != page_ticket) - && (cntrl_flags & UPL_FOR_PAGEOUT)) || - ((!dst_page->list_req_pending) && (cntrl_flags & UPL_FOR_PAGEOUT) && - (cntrl_flags & UPL_RET_ONLY_DIRTY) && - pmap_is_referenced(dst_page->phys_page))) { - if(user_page_list) { + (dst_page->wire_count && !dst_page->pageout) || + + ((!dst_page->inactive) && (cntrl_flags & UPL_FOR_PAGEOUT) && + (dst_page->page_ticket != page_ticket) && + ((dst_page->page_ticket+1) != page_ticket)) ) { + + if (user_page_list) user_page_list[entry].phys_addr = 0; + } else { + /* + * grab this up front... + * a high percentange of the time we're going to + * need the hardware modification state a bit later + * anyway... so we can eliminate an extra call into + * the pmap layer by grabbing it here and recording it + */ + refmod_state = pmap_get_refmod(dst_page->phys_page); + + if (cntrl_flags & UPL_RET_ONLY_DIRTY) { + /* + * we're only asking for DIRTY pages to be returned + */ + + if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) { + /* + * if we were the page stolen by vm_pageout_scan to be + * cleaned (as opposed to a buddy being clustered in + * or this request is not being driven by a PAGEOUT cluster + * then we only need to check for the page being diry or + * precious to decide whether to return it + */ + if (dst_page->dirty || dst_page->precious || + (refmod_state & VM_MEM_MODIFIED)) { + goto check_busy; + } + } + /* + * this is a request for a PAGEOUT cluster and this page + * is merely along for the ride as a 'buddy'... not only + * does it have to be dirty to be returned, but it also + * can't have been referenced recently... note that we've + * already filtered above based on whether this page is + * currently on the inactive queue or it meets the page + * ticket (generation count) check + */ + if ( !(refmod_state & VM_MEM_REFERENCED) && + ((refmod_state & VM_MEM_MODIFIED) || + dst_page->dirty || dst_page->precious) ) { + goto check_busy; + } + /* + * if we reach here, we're not to return + * the page... go on to the next one + */ + if (user_page_list) + user_page_list[entry].phys_addr = 0; + entry++; + dst_offset += PAGE_SIZE_64; + xfer_size -= PAGE_SIZE; + continue; } - } else { - +check_busy: if(dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) { @@ -2500,8 +2676,10 @@ vm_object_upl_request( xfer_size -= PAGE_SIZE; continue; } - /*someone else is playing with the */ - /* page. We will have to wait. */ + /* + * someone else is playing with the + * page. We will have to wait. + */ PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } @@ -2522,6 +2700,16 @@ vm_object_upl_request( vm_page_lock_queues(); + if (dst_page->pageout_queue == TRUE) + /* + * we've buddied up a page for a clustered pageout + * that has already been moved to the pageout + * queue by pageout_scan... we need to remove + * it from the queue and drop the laundry count + * on that queue + */ + vm_pageout_queue_steal(dst_page); +#if MACH_CLUSTER_STATS /* pageout statistics gathering. count */ /* all the pages we will page out that */ /* were not counted in the initial */ @@ -2541,7 +2729,7 @@ vm_object_upl_request( (pages_at_lower_offsets++;) } } - +#endif /* Turn off busy indication on pending */ /* pageout. Note: we can only get here */ /* in the request pending case. */ @@ -2549,7 +2737,7 @@ vm_object_upl_request( dst_page->busy = FALSE; dst_page->cleaning = FALSE; - hw_dirty = pmap_is_modified(dst_page->phys_page); + hw_dirty = refmod_state & VM_MEM_MODIFIED; dirty = hw_dirty ? TRUE : dst_page->dirty; if(cntrl_flags & UPL_SET_LITE) { @@ -2580,9 +2768,12 @@ vm_object_upl_request( /* use pageclean setup, it is more */ /* convenient even for the pageout */ /* cases here */ + + vm_object_lock(upl->map_object); vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); + vm_object_unlock(upl->map_object); alias_page->absent = FALSE; alias_page = NULL; @@ -2596,17 +2787,30 @@ vm_object_upl_request( if(dst_page->pageout) dst_page->busy = TRUE; - if((!(cntrl_flags & UPL_CLEAN_IN_PLACE)) - || (cntrl_flags & UPL_FOR_PAGEOUT)) { - /* deny access to the target page */ - /* while it is being worked on */ - if((!dst_page->pageout) && - (dst_page->wire_count == 0)) { + if ( (cntrl_flags & UPL_ENCRYPT) ) { + /* + * ENCRYPTED SWAP: + * We want to deny access to the target page + * because its contents are about to be + * encrypted and the user would be very + * confused to see encrypted data instead + * of their data. + */ + dst_page->busy = TRUE; + } + if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) { + /* + * deny access to the target page + * while it is being worked on + */ + if ((!dst_page->pageout) && + (dst_page->wire_count == 0)) { dst_page->busy = TRUE; dst_page->pageout = TRUE; vm_page_wire(dst_page); } } + if(user_page_list) { user_page_list[entry].phys_addr = dst_page->phys_page; @@ -2620,9 +2824,31 @@ vm_object_upl_request( dst_page->precious; } vm_page_unlock_queues(); - } - entry++; - dst_offset += PAGE_SIZE_64; + + /* + * ENCRYPTED SWAP: + * The caller is gathering this page and might + * access its contents later on. Decrypt the + * page before adding it to the UPL, so that + * the caller never sees encrypted data. + */ + if (! (cntrl_flags & UPL_ENCRYPT) && + dst_page->encrypted) { + assert(dst_page->busy); + + vm_page_decrypt(dst_page, 0); + vm_page_decrypt_for_upl_counter++; + + /* + * Retry this page, since anything + * could have changed while we were + * decrypting. + */ + continue; + } + } + entry++; + dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; } } else { @@ -2633,8 +2859,50 @@ vm_object_upl_request( VM_PAGE_GRAB_FICTITIOUS(alias_page); vm_object_lock(object); } - dst_page = vm_page_lookup(object, dst_offset); + if ((cntrl_flags & UPL_WILL_MODIFY) && + object->copy != last_copy_object) { + /* Honor copy-on-write obligations */ + + /* + * The copy object has changed since we + * last synchronized for copy-on-write. + * Another copy object might have been + * inserted while we released the object's + * lock. Since someone could have seen the + * original contents of the remaining pages + * through that new object, we have to + * synchronize with it again for the remaining + * pages only. The previous pages are "busy" + * so they can not be seen through the new + * mapping. The new mapping will see our + * upcoming changes for those previous pages, + * but that's OK since they couldn't see what + * was there before. It's just a race anyway + * and there's no guarantee of consistency or + * atomicity. We just don't want new mappings + * to see both the *before* and *after* pages. + */ + if (object->copy != VM_OBJECT_NULL) { + vm_object_update( + object, + dst_offset,/* current offset */ + xfer_size, /* remaining size */ + NULL, + NULL, + FALSE, /* should_return */ + MEMORY_OBJECT_COPY_SYNC, + VM_PROT_NO_CHANGE); + upl_cow_again++; + upl_cow_again_pages += + xfer_size >> PAGE_SHIFT; + } + /* remember the copy object we synced with */ + last_copy_object = object->copy; + } + + dst_page = vm_page_lookup(object, dst_offset); + if(dst_page != VM_PAGE_NULL) { if((cntrl_flags & UPL_RET_ONLY_ABSENT) && !((dst_page->list_req_pending) @@ -2668,16 +2936,15 @@ vm_object_upl_request( vm_page_free(dst_page); vm_page_unlock_queues(); + dst_page = NULL; } else if ((dst_page->absent && dst_page->list_req_pending)) { /* the default_pager case */ dst_page->list_req_pending = FALSE; dst_page->busy = FALSE; - dst_page->clustered = FALSE; } } - if((dst_page = vm_page_lookup(object, dst_offset)) == - VM_PAGE_NULL) { + if(dst_page == VM_PAGE_NULL) { if(object->private) { /* * This is a nasty wrinkle for users @@ -2711,6 +2978,18 @@ vm_object_upl_request( dst_page->unlock_request = 0; } #endif + if(cntrl_flags & UPL_RET_ONLY_ABSENT) { + /* + * if UPL_RET_ONLY_ABSENT was specified, + * than we're definitely setting up a + * upl for a clustered read/pagein + * operation... mark the pages as clustered + * so vm_fault can correctly attribute them + * to the 'pagein' bucket the first time + * a fault happens on them + */ + dst_page->clustered = TRUE; + } dst_page->absent = TRUE; object->absent_count++; } @@ -2720,6 +2999,24 @@ vm_object_upl_request( dst_page->unlock_request = 0; } #endif /* 1 */ + + /* + * ENCRYPTED SWAP: + */ + if (cntrl_flags & UPL_ENCRYPT) { + /* + * The page is going to be encrypted when we + * get it from the pager, so mark it so. + */ + dst_page->encrypted = TRUE; + } else { + /* + * Otherwise, the page will not contain + * encrypted data. + */ + dst_page->encrypted = FALSE; + } + dst_page->overwriting = TRUE; if(dst_page->fictitious) { panic("need corner case for fictitious page"); @@ -2740,10 +3037,10 @@ vm_object_upl_request( } vm_page_lock_queues(); - if( !(cntrl_flags & UPL_FILE_IO)) { - pmap_page_protect(dst_page->phys_page, VM_PROT_NONE); - } - hw_dirty = pmap_is_modified(dst_page->phys_page); + if( !(cntrl_flags & UPL_FILE_IO)) + hw_dirty = pmap_disconnect(dst_page->phys_page) & VM_MEM_MODIFIED; + else + hw_dirty = pmap_get_refmod(dst_page->phys_page) & VM_MEM_MODIFIED; dirty = hw_dirty ? TRUE : dst_page->dirty; if(cntrl_flags & UPL_SET_LITE) { @@ -2774,9 +3071,11 @@ vm_object_upl_request( /* use pageclean setup, it is more */ /* convenient even for the pageout */ /* cases here */ + vm_object_lock(upl->map_object); vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); + vm_object_unlock(upl->map_object); alias_page->absent = FALSE; alias_page = NULL; @@ -2805,10 +3104,22 @@ vm_object_upl_request( } else { vm_page_wire(dst_page); } - /* - * expect the page to be used - */ - dst_page->reference = TRUE; + if(cntrl_flags & UPL_RET_ONLY_ABSENT) { + /* + * expect the page not to be used + * since it's coming in as part + * of a cluster and could be + * speculative... pages that + * are 'consumed' will get a + * hardware reference + */ + dst_page->reference = FALSE; + } else { + /* + * expect the page to be used + */ + dst_page->reference = TRUE; + } dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE; @@ -2831,6 +3142,7 @@ vm_object_upl_request( xfer_size -= PAGE_SIZE; } } + if (upl->flags & UPL_INTERNAL) { if(page_list_count != NULL) *page_list_count = 0; @@ -2865,45 +3177,55 @@ vm_object_upl_request( THREAD_UNINT); if (wait_result != THREAD_AWAKENED) { vm_object_unlock(object); - return(KERN_FAILURE); + return KERN_FAILURE; } continue; } vm_object_unlock(object); - - if (rc = memory_object_data_unlock( + rc = memory_object_data_unlock( object->pager, dst_offset + object->paging_offset, size, - access_required)) { - if (rc == MACH_SEND_INTERRUPTED) - continue; - else - return KERN_FAILURE; - } - break; - + access_required); + if (rc != KERN_SUCCESS && rc != MACH_SEND_INTERRUPTED) + return KERN_FAILURE; + vm_object_lock(object); + + if (rc == KERN_SUCCESS) + break; } + /* lets wait on the last page requested */ /* NOTE: we will have to update lock completed routine to signal */ if(dst_page != VM_PAGE_NULL && (access_required & dst_page->page_lock) != access_required) { PAGE_ASSERT_WAIT(dst_page, THREAD_UNINT); - thread_block((void (*)(void))0); - vm_object_lock(object); + vm_object_unlock(object); + thread_block(THREAD_CONTINUE_NULL); + return KERN_SUCCESS; } } + vm_object_unlock(object); return KERN_SUCCESS; } /* JMM - Backward compatability for now */ kern_return_t +vm_fault_list_request( /* forward */ + memory_object_control_t control, + vm_object_offset_t offset, + upl_size_t size, + upl_t *upl_ptr, + upl_page_info_t **user_page_list_ptr, + int page_list_count, + int cntrl_flags); +kern_return_t vm_fault_list_request( memory_object_control_t control, vm_object_offset_t offset, - vm_size_t size, + upl_size_t size, upl_t *upl_ptr, upl_page_info_t **user_page_list_ptr, int page_list_count, @@ -2955,8 +3277,8 @@ __private_extern__ kern_return_t vm_object_super_upl_request( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, - vm_size_t super_cluster, + upl_size_t size, + upl_size_t super_cluster, upl_t *upl, upl_page_info_t *user_page_list, unsigned int *page_list_count, @@ -2965,12 +3287,17 @@ vm_object_super_upl_request( vm_page_t target_page; int ticket; + if(object->paging_offset > offset) return KERN_FAILURE; assert(object->paging_in_progress); offset = offset - object->paging_offset; + if(cntrl_flags & UPL_FOR_PAGEOUT) { + + vm_object_lock(object); + if((target_page = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { ticket = target_page->page_ticket; @@ -2979,18 +3306,13 @@ vm_object_super_upl_request( ((ticket << UPL_PAGE_TICKET_SHIFT) & UPL_PAGE_TICKET_MASK); } + vm_object_unlock(object); } - -/* turns off super cluster exercised by the default_pager */ -/* -super_cluster = size; -*/ - if ((super_cluster > size) && - (vm_page_free_count > vm_page_free_reserved)) { + if (super_cluster > size) { vm_object_offset_t base_offset; - vm_size_t super_size; + upl_size_t super_size; base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); @@ -2999,31 +3321,261 @@ super_cluster = size; super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size; if(offset > (base_offset + super_size)) - panic("vm_object_super_upl_request: Missed target pageout 0x%x,0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n", offset, base_offset, super_size, super_cluster, size, object->paging_offset); - /* apparently there is a case where the vm requests a */ - /* page to be written out who's offset is beyond the */ - /* object size */ + panic("vm_object_super_upl_request: Missed target pageout" + " %#llx,%#llx, %#x, %#x, %#x, %#llx\n", + offset, base_offset, super_size, super_cluster, + size, object->paging_offset); + /* + * apparently there is a case where the vm requests a + * page to be written out who's offset is beyond the + * object size + */ if((offset + size) > (base_offset + super_size)) super_size = (offset + size) - base_offset; offset = base_offset; size = super_size; } - vm_object_upl_request(object, offset, size, - upl, user_page_list, page_list_count, - cntrl_flags); + return vm_object_upl_request(object, offset, size, + upl, user_page_list, page_list_count, + cntrl_flags); } + +kern_return_t +vm_map_create_upl( + vm_map_t map, + vm_map_address_t offset, + upl_size_t *upl_size, + upl_t *upl, + upl_page_info_array_t page_list, + unsigned int *count, + int *flags) +{ + vm_map_entry_t entry; + int caller_flags; + int force_data_sync; + int sync_cow_data; + vm_object_t local_object; + vm_map_offset_t local_offset; + vm_map_offset_t local_start; + kern_return_t ret; + + caller_flags = *flags; + + if (caller_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + + force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); + sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); + + if(upl == NULL) + return KERN_INVALID_ARGUMENT; + + +REDISCOVER_ENTRY: + vm_map_lock(map); + if (vm_map_lookup_entry(map, offset, &entry)) { + if (entry->object.vm_object == VM_OBJECT_NULL || + !entry->object.vm_object->phys_contiguous) { + if((*upl_size/page_size) > MAX_UPL_TRANSFER) { + *upl_size = MAX_UPL_TRANSFER * page_size; + } + } + if((entry->vme_end - offset) < *upl_size) { + *upl_size = entry->vme_end - offset; + } + if (caller_flags & UPL_QUERY_OBJECT_TYPE) { + if (entry->object.vm_object == VM_OBJECT_NULL) { + *flags = 0; + } else if (entry->object.vm_object->private) { + *flags = UPL_DEV_MEMORY; + if (entry->object.vm_object->phys_contiguous) { + *flags |= UPL_PHYS_CONTIG; + } + } else { + *flags = 0; + } + vm_map_unlock(map); + return KERN_SUCCESS; + } + /* + * Create an object if necessary. + */ + if (entry->object.vm_object == VM_OBJECT_NULL) { + entry->object.vm_object = vm_object_allocate( + (vm_size_t)(entry->vme_end - entry->vme_start)); + entry->offset = 0; + } + if (!(caller_flags & UPL_COPYOUT_FROM)) { + if (!(entry->protection & VM_PROT_WRITE)) { + vm_map_unlock(map); + return KERN_PROTECTION_FAILURE; + } + if (entry->needs_copy) { + vm_map_t local_map; + vm_object_t object; + vm_map_offset_t offset_hi; + vm_map_offset_t offset_lo; + vm_object_offset_t new_offset; + vm_prot_t prot; + boolean_t wired; + vm_behavior_t behavior; + vm_map_version_t version; + vm_map_t real_map; + + local_map = map; + vm_map_lock_write_to_read(map); + if(vm_map_lookup_locked(&local_map, + offset, VM_PROT_WRITE, + &version, &object, + &new_offset, &prot, &wired, + &behavior, &offset_lo, + &offset_hi, &real_map)) { + vm_map_unlock(local_map); + return KERN_FAILURE; + } + if (real_map != map) { + vm_map_unlock(real_map); + } + vm_object_unlock(object); + vm_map_unlock(local_map); + + goto REDISCOVER_ENTRY; + } + } + if (entry->is_sub_map) { + vm_map_t submap; + + submap = entry->object.sub_map; + local_start = entry->vme_start; + local_offset = entry->offset; + vm_map_reference(submap); + vm_map_unlock(map); + + ret = (vm_map_create_upl(submap, + local_offset + (offset - local_start), + upl_size, upl, page_list, count, + flags)); + + vm_map_deallocate(submap); + return ret; + } + + if (sync_cow_data) { + if (entry->object.vm_object->shadow + || entry->object.vm_object->copy) { + + local_object = entry->object.vm_object; + local_start = entry->vme_start; + local_offset = entry->offset; + vm_object_reference(local_object); + vm_map_unlock(map); + + if (entry->object.vm_object->shadow && + entry->object.vm_object->copy) { + vm_object_lock_request( + local_object->shadow, + (vm_object_offset_t) + ((offset - local_start) + + local_offset) + + local_object->shadow_offset, + *upl_size, FALSE, + MEMORY_OBJECT_DATA_SYNC, + VM_PROT_NO_CHANGE); + } + sync_cow_data = FALSE; + vm_object_deallocate(local_object); + goto REDISCOVER_ENTRY; + } + } + + if (force_data_sync) { + + local_object = entry->object.vm_object; + local_start = entry->vme_start; + local_offset = entry->offset; + vm_object_reference(local_object); + vm_map_unlock(map); + + vm_object_lock_request( + local_object, + (vm_object_offset_t) + ((offset - local_start) + local_offset), + (vm_object_size_t)*upl_size, FALSE, + MEMORY_OBJECT_DATA_SYNC, + VM_PROT_NO_CHANGE); + force_data_sync = FALSE; + vm_object_deallocate(local_object); + goto REDISCOVER_ENTRY; + } + + if(!(entry->object.vm_object->private)) { + if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) + *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); + if(entry->object.vm_object->phys_contiguous) { + *flags = UPL_PHYS_CONTIG; + } else { + *flags = 0; + } + } else { + *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; + } + local_object = entry->object.vm_object; + local_offset = entry->offset; + local_start = entry->vme_start; + vm_object_reference(local_object); + vm_map_unlock(map); + if(caller_flags & UPL_SET_IO_WIRE) { + ret = (vm_object_iopl_request(local_object, + (vm_object_offset_t) + ((offset - local_start) + + local_offset), + *upl_size, + upl, + page_list, + count, + caller_flags)); + } else { + ret = (vm_object_upl_request(local_object, + (vm_object_offset_t) + ((offset - local_start) + + local_offset), + *upl_size, + upl, + page_list, + count, + caller_flags)); + } + vm_object_deallocate(local_object); + return(ret); + } + + vm_map_unlock(map); + return(KERN_FAILURE); +} + +/* + * Internal routine to enter a UPL into a VM map. + * + * JMM - This should just be doable through the standard + * vm_map_enter() API. + */ kern_return_t -vm_upl_map( - vm_map_t map, - upl_t upl, - vm_offset_t *dst_addr) +vm_map_enter_upl( + vm_map_t map, + upl_t upl, + vm_map_offset_t *dst_addr) { - vm_size_t size; + vm_map_size_t size; vm_object_offset_t offset; - vm_offset_t addr; + vm_map_offset_t addr; vm_page_t m; kern_return_t kr; @@ -3049,12 +3601,12 @@ vm_upl_map( if(upl->flags & UPL_INTERNAL) { lite_list = (wpl_array_t) - ((((vm_offset_t)upl) + sizeof(struct upl)) + ((((uintptr_t)upl) + sizeof(struct upl)) + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); } else { lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); } object = upl->map_object; upl->map_object = vm_object_allocate(upl->size); @@ -3067,11 +3619,12 @@ vm_upl_map( upl->map_object->shadow_offset = upl->offset - object->paging_offset; upl->map_object->wimg_bits = object->wimg_bits; - vm_object_unlock(upl->map_object); offset = upl->map_object->shadow_offset; new_offset = 0; size = upl->size; + vm_object_lock(object); + while(size) { pg_num = (new_offset)/PAGE_SIZE; if(lite_list[pg_num>>5] & (1 << (pg_num & 31))) { @@ -3094,7 +3647,26 @@ vm_upl_map( alias_page->private = TRUE; alias_page->pageout = TRUE; alias_page->phys_page = m->phys_page; + + vm_page_lock_queues(); vm_page_wire(alias_page); + vm_page_unlock_queues(); + + /* + * ENCRYPTED SWAP: + * The virtual page ("m") has to be wired in some way + * here or its physical page ("m->phys_page") could + * be recycled at any time. + * Assuming this is enforced by the caller, we can't + * get an encrypted page here. Since the encryption + * key depends on the VM page's "pager" object and + * the "paging_offset", we couldn't handle 2 pageable + * VM pages (with different pagers and paging_offsets) + * sharing the same physical page: we could end up + * encrypting with one key (via one VM page) and + * decrypting with another key (via the alias VM page). + */ + ASSERT_PAGE_DECRYPTED(m); vm_page_insert(alias_page, upl->map_object, new_offset); @@ -3108,6 +3680,7 @@ vm_upl_map( new_offset += PAGE_SIZE_64; } vm_object_unlock(object); + vm_object_unlock(upl->map_object); } if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous) offset = upl->offset - upl->map_object->paging_offset; @@ -3125,8 +3698,8 @@ vm_upl_map( /* NEED A UPL_MAP ALIAS */ - kr = vm_map_enter(map, dst_addr, size, (vm_offset_t) 0, TRUE, - upl->map_object, offset, FALSE, + kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, + VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { @@ -3134,6 +3707,8 @@ vm_upl_map( return(kr); } + vm_object_lock(upl->map_object); + for(addr=*dst_addr; size > 0; size-=PAGE_SIZE,addr+=PAGE_SIZE) { m = vm_page_lookup(upl->map_object, offset); if(m) { @@ -3146,6 +3721,8 @@ vm_upl_map( } offset+=PAGE_SIZE_64; } + vm_object_unlock(upl->map_object); + upl->ref_count++; /* hold a reference for the mapping */ upl->flags |= UPL_PAGE_LIST_MAPPED; upl->kaddr = *dst_addr; @@ -3153,14 +3730,23 @@ vm_upl_map( return KERN_SUCCESS; } - +/* + * Internal routine to remove a UPL mapping from a VM map. + * + * XXX - This should just be doable through a standard + * vm_map_remove() operation. Otherwise, implicit clean-up + * of the target map won't be able to correctly remove + * these (and release the reference on the UPL). Having + * to do this means we can't map these into user-space + * maps yet. + */ kern_return_t -vm_upl_unmap( +vm_map_remove_upl( vm_map_t map, upl_t upl) { vm_address_t addr; - vm_size_t size; + upl_size_t size; if (upl == UPL_NULL) return KERN_INVALID_ARGUMENT; @@ -3175,7 +3761,10 @@ vm_upl_unmap( upl->kaddr = (vm_offset_t) 0; upl_unlock(upl); - vm_deallocate(map, addr, size); + vm_map_remove( map, + vm_map_trunc_page(addr), + vm_map_round_page(addr + size), + VM_MAP_NO_FLAGS); return KERN_SUCCESS; } upl_unlock(upl); @@ -3185,14 +3774,14 @@ vm_upl_unmap( kern_return_t upl_commit_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int flags, upl_page_info_t *page_list, mach_msg_type_number_t count, boolean_t *empty) { - vm_size_t xfer_size = size; + upl_size_t xfer_size = size; vm_object_t shadow_object; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; @@ -3200,6 +3789,7 @@ upl_commit_range( wpl_array_t lite_list; int occupied; int delayed_unlock = 0; + int clear_refmod = 0; boolean_t shadow_internal; *empty = FALSE; @@ -3211,7 +3801,7 @@ upl_commit_range( if (count == 0) page_list = NULL; - if(object->pageout) { + if (object->pageout) { shadow_object = object->shadow; } else { shadow_object = object; @@ -3219,6 +3809,15 @@ upl_commit_range( upl_lock(upl); + if (upl->flags & UPL_ACCESS_BLOCKED) { + /* + * We used this UPL to block access to the pages by marking + * them "busy". Now we need to clear the "busy" bit to allow + * access to these pages again. + */ + flags |= UPL_COMMIT_ALLOW_ACCESS; + } + if (upl->flags & UPL_CLEAR_DIRTY) flags |= UPL_COMMIT_CLEAR_DIRTY; @@ -3231,20 +3830,22 @@ upl_commit_range( if (upl->flags & UPL_INTERNAL) { lite_list = (wpl_array_t) - ((((vm_offset_t)upl) + sizeof(struct upl)) + ((((uintptr_t)upl) + sizeof(struct upl)) + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); } else { lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); } - + if (object != shadow_object) + vm_object_lock(object); vm_object_lock(shadow_object); + shadow_internal = shadow_object->internal; entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; - while(xfer_size) { + while (xfer_size) { vm_page_t t,m; upl_page_info_t *p; @@ -3284,6 +3885,8 @@ upl_commit_range( } if (m != VM_PAGE_NULL) { + clear_refmod = 0; + if (upl->flags & UPL_IO_WIRE) { if (delayed_unlock == 0) @@ -3302,13 +3905,25 @@ upl_commit_range( m->dirty = TRUE; } else if (flags & UPL_COMMIT_CLEAR_DIRTY) { m->dirty = FALSE; - pmap_clear_modify(m->phys_page); + clear_refmod |= VM_MEM_MODIFIED; } if (flags & UPL_COMMIT_INACTIVATE) { m->reference = FALSE; + clear_refmod |= VM_MEM_REFERENCED; vm_page_deactivate(m); - pmap_clear_reference(m->phys_page); } + if (clear_refmod) + pmap_clear_refmod(m->phys_page, clear_refmod); + + if (flags & UPL_COMMIT_ALLOW_ACCESS) { + /* + * We blocked access to the pages in this UPL. + * Clear the "busy" bit and wake up any waiter + * for this page. + */ + PAGE_WAKEUP_DONE(m); + } + target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; @@ -3325,10 +3940,13 @@ upl_commit_range( */ if (flags & UPL_COMMIT_CLEAR_DIRTY) { m->dirty = FALSE; - pmap_clear_modify(m->phys_page); + clear_refmod |= VM_MEM_MODIFIED; } if (flags & UPL_COMMIT_INACTIVATE) - pmap_clear_reference(m->phys_page); + clear_refmod |= VM_MEM_REFERENCED; + + if (clear_refmod) + pmap_clear_refmod(m->phys_page, clear_refmod); if (page_list) { p = &(page_list[entry]); @@ -3349,15 +3967,7 @@ upl_commit_range( } m->dump_cleaning = FALSE; if(m->laundry) { - if (!shadow_internal) - vm_page_burst_count--; - vm_page_laundry_count--; - m->laundry = FALSE; - if (vm_page_laundry_count < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) - &vm_page_laundry_count); - } + vm_pageout_throttle_up(m); } if(m->pageout) { m->cleaning = FALSE; @@ -3365,21 +3975,28 @@ upl_commit_range( #if MACH_CLUSTER_STATS if (m->wanted) vm_pageout_target_collisions++; #endif - pmap_page_protect(m->phys_page, VM_PROT_NONE); - m->dirty = pmap_is_modified(m->phys_page); + if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) + m->dirty = TRUE; + else + m->dirty = FALSE; + if(m->dirty) { - CLUSTER_STAT( - vm_pageout_target_page_dirtied++;) vm_page_unwire(m);/* reactivates */ - VM_STAT(reactivations++); + + if (upl->flags & UPL_PAGEOUT) { + CLUSTER_STAT(vm_pageout_target_page_dirtied++;) + VM_STAT(reactivations++); + } PAGE_WAKEUP_DONE(m); } else { - CLUSTER_STAT( - vm_pageout_target_page_freed++;) vm_page_free(m);/* clears busy, etc. */ - if (page_list[entry].dirty) - VM_STAT(pageouts++); + if (upl->flags & UPL_PAGEOUT) { + CLUSTER_STAT(vm_pageout_target_page_freed++;) + + if (page_list[entry].dirty) + VM_STAT(pageouts++); + } } if (delayed_unlock++ > DELAYED_UNLOCK_LIMIT) { delayed_unlock = 0; @@ -3414,9 +4031,9 @@ upl_commit_range( m->dirty = FALSE; } else if (m->overwriting) { /* alternate request page list, write to - /* page_list case. Occurs when the original - /* page was wired at the time of the list - /* request */ + * page_list case. Occurs when the original + * page was wired at the time of the list + * request */ assert(m->wire_count != 0); vm_page_unwire(m);/* reactivates */ m->overwriting = FALSE; @@ -3443,6 +4060,16 @@ upl_commit_range( else vm_page_deactivate(m); } + + if (flags & UPL_COMMIT_ALLOW_ACCESS) { + /* + * We blocked access to the pages in this URL. + * Clear the "busy" bit on this page before we + * wake up any waiter. + */ + m->busy = FALSE; + } + /* * Wakeup any thread waiting for the page to be un-cleaning. */ @@ -3490,6 +4117,8 @@ upl_commit_range( vm_object_paging_end(shadow_object); } vm_object_unlock(shadow_object); + if (object != shadow_object) + vm_object_unlock(object); upl_unlock(upl); return KERN_SUCCESS; @@ -3498,16 +4127,15 @@ upl_commit_range( kern_return_t upl_abort_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int error, boolean_t *empty) { - vm_size_t xfer_size = size; + upl_size_t xfer_size = size; vm_object_t shadow_object; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; - vm_object_offset_t page_offset; int entry; wpl_array_t lite_list; int occupied; @@ -3537,24 +4165,25 @@ upl_abort_range( upl_unlock(upl); return KERN_FAILURE; } - + if (object != shadow_object) + vm_object_lock(object); vm_object_lock(shadow_object); + shadow_internal = shadow_object->internal; if(upl->flags & UPL_INTERNAL) { lite_list = (wpl_array_t) - ((((vm_offset_t)upl) + sizeof(struct upl)) + ((((uintptr_t)upl) + sizeof(struct upl)) + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); } else { lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); } entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; while(xfer_size) { vm_page_t t,m; - upl_page_info_t *p; m = VM_PAGE_NULL; if(upl->flags & UPL_LITE) { @@ -3585,6 +4214,8 @@ upl_abort_range( if(m != VM_PAGE_NULL) { vm_page_lock_queues(); if(m->absent) { + boolean_t must_free = TRUE; + /* COPYOUT = FALSE case */ /* check for error conditions which must */ /* be passed back to the pages customer */ @@ -3594,33 +4225,40 @@ upl_abort_range( vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; + must_free = FALSE; } else if(error & UPL_ABORT_UNAVAILABLE) { m->restart = FALSE; m->unusual = TRUE; - m->clustered = FALSE; + must_free = FALSE; } else if(error & UPL_ABORT_ERROR) { m->restart = FALSE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; - } else if(error & UPL_ABORT_DUMP_PAGES) { - m->clustered = TRUE; - } else { - m->clustered = TRUE; + must_free = FALSE; } - + + /* + * ENCRYPTED SWAP: + * If the page was already encrypted, + * we don't really need to decrypt it + * now. It will get decrypted later, + * on demand, as soon as someone needs + * to access its contents. + */ m->cleaning = FALSE; m->overwriting = FALSE; PAGE_WAKEUP_DONE(m); - if(m->clustered) { + + if (must_free == TRUE) { vm_page_free(m); } else { vm_page_activate(m); } - vm_page_unlock_queues(); + target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; @@ -3630,16 +4268,7 @@ upl_abort_range( * Handle the trusted pager throttle. */ if (m->laundry) { - if (!shadow_internal) - vm_page_burst_count--; - vm_page_laundry_count--; - m->laundry = FALSE; - if (vm_page_laundry_count - < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) - &vm_page_laundry_count); - } + vm_pageout_throttle_up(m); } if(m->pageout) { assert(m->busy); @@ -3649,7 +4278,6 @@ upl_abort_range( } m->dump_cleaning = FALSE; m->cleaning = FALSE; - m->busy = FALSE; m->overwriting = FALSE; #if MACH_PAGEMAP vm_external_state_clr( @@ -3657,9 +4285,9 @@ upl_abort_range( #endif /* MACH_PAGEMAP */ if(error & UPL_ABORT_DUMP_PAGES) { vm_page_free(m); - pmap_page_protect(m->phys_page, VM_PROT_NONE); + pmap_disconnect(m->phys_page); } else { - PAGE_WAKEUP(m); + PAGE_WAKEUP_DONE(m); } vm_page_unlock_queues(); } @@ -3696,6 +4324,8 @@ upl_abort_range( vm_object_paging_end(shadow_object); } vm_object_unlock(shadow_object); + if (object != shadow_object) + vm_object_unlock(object); upl_unlock(upl); @@ -3712,7 +4342,7 @@ upl_abort( vm_object_offset_t offset; vm_object_offset_t shadow_offset; vm_object_offset_t target_offset; - int i; + upl_size_t i; wpl_array_t lite_list; vm_page_t t,m; int occupied; @@ -3752,14 +4382,18 @@ upl_abort( if(upl->flags & UPL_INTERNAL) { lite_list = (wpl_array_t) - ((((vm_offset_t)upl) + sizeof(struct upl)) + ((((uintptr_t)upl) + sizeof(struct upl)) + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); } else { lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); } offset = 0; + + if (object != shadow_object) + vm_object_lock(object); vm_object_lock(shadow_object); + shadow_internal = shadow_object->internal; for(i = 0; i<(upl->size); i+=PAGE_SIZE, offset += PAGE_SIZE_64) { @@ -3789,6 +4423,8 @@ upl_abort( if(m != VM_PAGE_NULL) { vm_page_lock_queues(); if(m->absent) { + boolean_t must_free = TRUE; + /* COPYOUT = FALSE case */ /* check for error conditions which must */ /* be passed back to the pages customer */ @@ -3798,26 +4434,34 @@ upl_abort( vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; + must_free = FALSE; } else if(error & UPL_ABORT_UNAVAILABLE) { m->restart = FALSE; m->unusual = TRUE; - m->clustered = FALSE; + must_free = FALSE; } else if(error & UPL_ABORT_ERROR) { m->restart = FALSE; m->absent = FALSE; vm_object_absent_release(m->object); m->page_error = KERN_MEMORY_ERROR; m->error = TRUE; - } else if(error & UPL_ABORT_DUMP_PAGES) { - m->clustered = TRUE; - } else { - m->clustered = TRUE; + must_free = FALSE; } - + + /* + * ENCRYPTED SWAP: + * If the page was already encrypted, + * we don't really need to decrypt it + * now. It will get decrypted later, + * on demand, as soon as someone needs + * to access its contents. + */ + m->cleaning = FALSE; m->overwriting = FALSE; PAGE_WAKEUP_DONE(m); - if(m->clustered) { + + if (must_free == TRUE) { vm_page_free(m); } else { vm_page_activate(m); @@ -3829,16 +4473,7 @@ upl_abort( * Handle the trusted pager throttle. */ if (m->laundry) { - if (!shadow_internal) - vm_page_burst_count--; - vm_page_laundry_count--; - m->laundry = FALSE; - if (vm_page_laundry_count - < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) - &vm_page_laundry_count); - } + vm_pageout_throttle_up(m); } if(m->pageout) { assert(m->busy); @@ -3848,7 +4483,6 @@ upl_abort( } m->dump_cleaning = FALSE; m->cleaning = FALSE; - m->busy = FALSE; m->overwriting = FALSE; #if MACH_PAGEMAP vm_external_state_clr( @@ -3856,9 +4490,9 @@ upl_abort( #endif /* MACH_PAGEMAP */ if(error & UPL_ABORT_DUMP_PAGES) { vm_page_free(m); - pmap_page_protect(m->phys_page, VM_PROT_NONE); + pmap_disconnect(m->phys_page); } else { - PAGE_WAKEUP(m); + PAGE_WAKEUP_DONE(m); } vm_page_unlock_queues(); } @@ -3868,12 +4502,12 @@ upl_abort( occupied = 0; } else if (upl->flags & UPL_LITE) { int pg_num; - int i; + int j; pg_num = upl->size/PAGE_SIZE; pg_num = (pg_num + 31) >> 5; occupied = 0; - for(i= 0; iflags & UPL_DEVICE_MEMORY) page_list = NULL; + if (upl->flags & UPL_ENCRYPTED) { + /* + * ENCRYPTED SWAP: + * This UPL was encrypted, but we don't need + * to decrypt here. We'll decrypt each page + * later, on demand, as soon as someone needs + * to access the page's contents. + */ + } + if ((upl->flags & UPL_CLEAR_DIRTY) || (upl->flags & UPL_PAGE_SYNC_DONE) || page_list) { vm_object_t shadow_object = upl->map_object->shadow; vm_object_t object = upl->map_object; vm_object_offset_t target_offset; - vm_size_t xfer_end; + upl_size_t xfer_end; int entry; vm_page_t t, m; upl_page_info_t *p; + if (object != shadow_object) + vm_object_lock(object); vm_object_lock(shadow_object); entry = 0; @@ -3946,6 +4594,15 @@ upl_commit( m = vm_page_lookup(shadow_object, target_offset); if(m != VM_PAGE_NULL) { + /* + * ENCRYPTED SWAP: + * If this page was encrypted, we + * don't need to decrypt it here. + * We'll decrypt it later, on demand, + * as soon as someone needs to access + * its contents. + */ + if (upl->flags & UPL_CLEAR_DIRTY) { pmap_clear_modify(m->phys_page); m->dirty = FALSE; @@ -3985,8 +4642,10 @@ upl_commit( target_offset += PAGE_SIZE_64; entry++; } - vm_object_unlock(shadow_object); + if (object != shadow_object) + vm_object_unlock(object); + } if (upl->flags & UPL_DEVICE_MEMORY) { vm_object_lock(upl->map_object->shadow); @@ -4004,7 +4663,7 @@ kern_return_t vm_object_iopl_request( vm_object_t object, vm_object_offset_t offset, - vm_size_t size, + upl_size_t size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -4012,23 +4671,43 @@ vm_object_iopl_request( { vm_page_t dst_page; vm_object_offset_t dst_offset = offset; - vm_size_t xfer_size = size; + upl_size_t xfer_size = size; upl_t upl = NULL; - int entry; - wpl_array_t lite_list; + unsigned int entry; + wpl_array_t lite_list = NULL; int page_field_size; int delayed_unlock = 0; - + int no_zero_fill = FALSE; vm_page_t alias_page = NULL; kern_return_t ret; vm_prot_t prot; - if(cntrl_flags & UPL_COPYOUT_FROM) { + if (cntrl_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + + if (cntrl_flags & UPL_ENCRYPT) { + /* + * ENCRYPTED SWAP: + * The paging path doesn't use this interface, + * so we don't support the UPL_ENCRYPT flag + * here. We won't encrypt the pages. + */ + assert(! (cntrl_flags & UPL_ENCRYPT)); + } + + if (cntrl_flags & UPL_NOZEROFILL) + no_zero_fill = TRUE; + + if (cntrl_flags & UPL_COPYOUT_FROM) prot = VM_PROT_READ; - } else { + else prot = VM_PROT_READ | VM_PROT_WRITE; - } if(((size/page_size) > MAX_UPL_TRANSFER) && !object->phys_contiguous) { size = MAX_UPL_TRANSFER * page_size; @@ -4058,9 +4737,9 @@ vm_object_iopl_request( UPL_CREATE_INTERNAL | UPL_CREATE_LITE, size); user_page_list = (upl_page_info_t *) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); lite_list = (wpl_array_t) - (((vm_offset_t)user_page_list) + + (((uintptr_t)user_page_list) + ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); page_field_size = ((size/PAGE_SIZE) + 7) >> 3; @@ -4072,7 +4751,7 @@ vm_object_iopl_request( } else { upl = upl_create(UPL_CREATE_INTERNAL, size); user_page_list = (upl_page_info_t *) - (((vm_offset_t)upl) + (((uintptr_t)upl) + sizeof(struct upl)); upl->flags = UPL_INTERNAL | UPL_IO_WIRE; } @@ -4080,7 +4759,7 @@ vm_object_iopl_request( if(cntrl_flags & UPL_SET_LITE) { upl = upl_create(UPL_CREATE_LITE, size); lite_list = (wpl_array_t) - (((vm_offset_t)upl) + sizeof(struct upl)); + (((uintptr_t)upl) + sizeof(struct upl)); page_field_size = ((size/PAGE_SIZE) + 7) >> 3; page_field_size = (page_field_size + 3) & 0xFFFFFFFC; @@ -4108,7 +4787,7 @@ vm_object_iopl_request( *upl_ptr = upl; if(user_page_list) { user_page_list[0].phys_addr = - (offset + object->shadow_offset)>>12; + (offset + object->shadow_offset)>>PAGE_SHIFT; user_page_list[0].device = TRUE; } @@ -4154,9 +4833,17 @@ vm_object_iopl_request( upl->size = size; upl->offset = offset + object->paging_offset; *upl_ptr = upl; -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG queue_enter(&object->uplq, upl, upl_t, uplq); -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ + } + + if (cntrl_flags & UPL_BLOCK_ACCESS) { + /* + * The user requested that access to the pages in this URL + * be blocked until the UPL is commited or aborted. + */ + upl->flags |= UPL_ACCESS_BLOCKED; } entry = 0; @@ -4172,11 +4859,18 @@ vm_object_iopl_request( } dst_page = vm_page_lookup(object, dst_offset); + /* + * ENCRYPTED SWAP: + * If the page is encrypted, we need to decrypt it, + * so force a soft page fault. + */ if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) || - (dst_page->unusual && (dst_page->error || - dst_page->restart || dst_page->absent || - dst_page->fictitious || - prot & dst_page->page_lock))) { + (dst_page->encrypted) || + (dst_page->unusual && (dst_page->error || + dst_page->restart || + dst_page->absent || + dst_page->fictitious || + (prot & dst_page->page_lock)))) { vm_fault_return_t result; do { vm_page_t top_page; @@ -4205,7 +4899,7 @@ vm_object_iopl_request( VM_BEHAVIOR_SEQUENTIAL, &prot, &dst_page, &top_page, (int *)0, - &error_code, FALSE, FALSE, NULL, 0); + &error_code, no_zero_fill, FALSE, NULL, 0); switch(result) { case VM_FAULT_SUCCESS: @@ -4287,6 +4981,16 @@ vm_object_iopl_request( vm_page_lock_queues(); vm_page_wire(dst_page); + if (cntrl_flags & UPL_BLOCK_ACCESS) { + /* + * Mark the page "busy" to block any future page fault + * on this page. We'll also remove the mapping + * of all these pages before leaving this routine. + */ + assert(!dst_page->fictitious); + dst_page->busy = TRUE; + } + if (upl_ptr) { if (cntrl_flags & UPL_SET_LITE) { int pg_num; @@ -4357,31 +5061,929 @@ vm_object_iopl_request( } vm_object_unlock(object); - return KERN_SUCCESS; -} -vm_size_t -upl_get_internal_pagelist_offset() -{ - return sizeof(struct upl); -} + if (cntrl_flags & UPL_BLOCK_ACCESS) { + /* + * We've marked all the pages "busy" so that future + * page faults will block. + * Now remove the mapping for these pages, so that they + * can't be accessed without causing a page fault. + */ + vm_object_pmap_protect(object, offset, (vm_object_size_t)size, + PMAP_NULL, 0, VM_PROT_NONE); + } -void -upl_set_dirty( - upl_t upl) -{ - upl->flags |= UPL_CLEAR_DIRTY; + return KERN_SUCCESS; } -void -upl_clear_dirty( - upl_t upl) +kern_return_t +upl_transpose( + upl_t upl1, + upl_t upl2) { - upl->flags &= ~UPL_CLEAR_DIRTY; -} + kern_return_t retval; + boolean_t upls_locked; + vm_object_t object1, object2; + if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) { + return KERN_INVALID_ARGUMENT; + } + + upls_locked = FALSE; -#ifdef MACH_BSD + /* + * Since we need to lock both UPLs at the same time, + * avoid deadlocks by always taking locks in the same order. + */ + if (upl1 < upl2) { + upl_lock(upl1); + upl_lock(upl2); + } else { + upl_lock(upl2); + upl_lock(upl1); + } + upls_locked = TRUE; /* the UPLs will need to be unlocked */ + + object1 = upl1->map_object; + object2 = upl2->map_object; + + if (upl1->offset != 0 || upl2->offset != 0 || + upl1->size != upl2->size) { + /* + * We deal only with full objects, not subsets. + * That's because we exchange the entire backing store info + * for the objects: pager, resident pages, etc... We can't do + * only part of it. + */ + retval = KERN_INVALID_VALUE; + goto done; + } + + /* + * Tranpose the VM objects' backing store. + */ + retval = vm_object_transpose(object1, object2, + (vm_object_size_t) upl1->size); + + if (retval == KERN_SUCCESS) { + /* + * Make each UPL point to the correct VM object, i.e. the + * object holding the pages that the UPL refers to... + */ + upl1->map_object = object2; + upl2->map_object = object1; + } + +done: + /* + * Cleanup. + */ + if (upls_locked) { + upl_unlock(upl1); + upl_unlock(upl2); + upls_locked = FALSE; + } + + return retval; +} + +/* + * ENCRYPTED SWAP: + * + * Rationale: the user might have some encrypted data on disk (via + * FileVault or any other mechanism). That data is then decrypted in + * memory, which is safe as long as the machine is secure. But that + * decrypted data in memory could be paged out to disk by the default + * pager. The data would then be stored on disk in clear (not encrypted) + * and it could be accessed by anyone who gets physical access to the + * disk (if the laptop or the disk gets stolen for example). This weakens + * the security offered by FileVault. + * + * Solution: the default pager will optionally request that all the + * pages it gathers for pageout be encrypted, via the UPL interfaces, + * before it sends this UPL to disk via the vnode_pageout() path. + * + * Notes: + * + * To avoid disrupting the VM LRU algorithms, we want to keep the + * clean-in-place mechanisms, which allow us to send some extra pages to + * swap (clustering) without actually removing them from the user's + * address space. We don't want the user to unknowingly access encrypted + * data, so we have to actually remove the encrypted pages from the page + * table. When the user accesses the data, the hardware will fail to + * locate the virtual page in its page table and will trigger a page + * fault. We can then decrypt the page and enter it in the page table + * again. Whenever we allow the user to access the contents of a page, + * we have to make sure it's not encrypted. + * + * + */ +/* + * ENCRYPTED SWAP: + * Reserve of virtual addresses in the kernel address space. + * We need to map the physical pages in the kernel, so that we + * can call the encryption/decryption routines with a kernel + * virtual address. We keep this pool of pre-allocated kernel + * virtual addresses so that we don't have to scan the kernel's + * virtaul address space each time we need to encrypt or decrypt + * a physical page. + * It would be nice to be able to encrypt and decrypt in physical + * mode but that might not always be more efficient... + */ +decl_simple_lock_data(,vm_paging_lock) +#define VM_PAGING_NUM_PAGES 64 +vm_map_offset_t vm_paging_base_address = 0; +boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; +int vm_paging_max_index = 0; +unsigned long vm_paging_no_kernel_page = 0; +unsigned long vm_paging_objects_mapped = 0; +unsigned long vm_paging_pages_mapped = 0; +unsigned long vm_paging_objects_mapped_slow = 0; +unsigned long vm_paging_pages_mapped_slow = 0; + +/* + * ENCRYPTED SWAP: + * vm_paging_map_object: + * Maps part of a VM object's pages in the kernel + * virtual address space, using the pre-allocated + * kernel virtual addresses, if possible. + * Context: + * The VM object is locked. This lock will get + * dropped and re-acquired though. + */ +kern_return_t +vm_paging_map_object( + vm_map_offset_t *address, + vm_page_t page, + vm_object_t object, + vm_object_offset_t offset, + vm_map_size_t *size) +{ + kern_return_t kr; + vm_map_offset_t page_map_offset; + vm_map_size_t map_size; + vm_object_offset_t object_offset; +#ifdef __ppc__ + int i; + vm_map_entry_t map_entry; +#endif /* __ppc__ */ + + +#ifdef __ppc__ + if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { + /* + * Optimization for the PowerPC. + * Use one of the pre-allocated kernel virtual addresses + * and just enter the VM page in the kernel address space + * at that virtual address. + */ + vm_object_unlock(object); + simple_lock(&vm_paging_lock); + + if (vm_paging_base_address == 0) { + /* + * Initialize our pool of pre-allocated kernel + * virtual addresses. + */ + simple_unlock(&vm_paging_lock); + page_map_offset = 0; + kr = vm_map_find_space(kernel_map, + &page_map_offset, + VM_PAGING_NUM_PAGES * PAGE_SIZE, + 0, + &map_entry); + if (kr != KERN_SUCCESS) { + panic("vm_paging_map_object: " + "kernel_map full\n"); + } + map_entry->object.vm_object = kernel_object; + map_entry->offset = + page_map_offset - VM_MIN_KERNEL_ADDRESS; + vm_object_reference(kernel_object); + vm_map_unlock(kernel_map); + + simple_lock(&vm_paging_lock); + if (vm_paging_base_address != 0) { + /* someone raced us and won: undo */ + simple_unlock(&vm_paging_lock); + kr = vm_map_remove(kernel_map, + page_map_offset, + page_map_offset + + (VM_PAGING_NUM_PAGES + * PAGE_SIZE), + VM_MAP_NO_FLAGS); + assert(kr == KERN_SUCCESS); + simple_lock(&vm_paging_lock); + } else { + vm_paging_base_address = page_map_offset; + } + } + + /* + * Try and find an available kernel virtual address + * from our pre-allocated pool. + */ + page_map_offset = 0; + for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { + if (vm_paging_page_inuse[i] == FALSE) { + page_map_offset = vm_paging_base_address + + (i * PAGE_SIZE); + break; + } + } + + if (page_map_offset != 0) { + /* + * We found a kernel virtual address; + * map the physical page to that virtual address. + */ + if (i > vm_paging_max_index) { + vm_paging_max_index = i; + } + vm_paging_page_inuse[i] = TRUE; + simple_unlock(&vm_paging_lock); + pmap_map_block(kernel_pmap, + page_map_offset, + page->phys_page, + PAGE_SIZE, + VM_PROT_DEFAULT, + ((int) page->object->wimg_bits & + VM_WIMG_MASK), + 0); + vm_paging_objects_mapped++; + vm_paging_pages_mapped++; + *address = page_map_offset; + vm_object_lock(object); + + /* all done and mapped, ready to use ! */ + return KERN_SUCCESS; + } + + /* + * We ran out of pre-allocated kernel virtual + * addresses. Just map the page in the kernel + * the slow and regular way. + */ + vm_paging_no_kernel_page++; + simple_unlock(&vm_paging_lock); + vm_object_lock(object); + } +#endif /* __ppc__ */ + + object_offset = vm_object_trunc_page(offset); + map_size = vm_map_round_page(*size); + + /* + * Try and map the required range of the object + * in the kernel_map + */ + + /* don't go beyond the object's end... */ + if (object_offset >= object->size) { + map_size = 0; + } else if (map_size > object->size - offset) { + map_size = object->size - offset; + } + + vm_object_reference_locked(object); /* for the map entry */ + vm_object_unlock(object); + + kr = vm_map_enter(kernel_map, + address, + map_size, + 0, + VM_FLAGS_ANYWHERE, + object, + object_offset, + FALSE, + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_NONE); + if (kr != KERN_SUCCESS) { + *address = 0; + *size = 0; + vm_object_deallocate(object); /* for the map entry */ + return kr; + } + + *size = map_size; + + /* + * Enter the mapped pages in the page table now. + */ + vm_object_lock(object); + for (page_map_offset = 0; + map_size != 0; + map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { + unsigned int cache_attr; + + page = vm_page_lookup(object, offset + page_map_offset); + if (page == VM_PAGE_NULL) { + panic("vm_paging_map_object: no page !?"); + } + if (page->no_isync == TRUE) { + pmap_sync_page_data_phys(page->phys_page); + } + cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK; + + PMAP_ENTER(kernel_pmap, + *address + page_map_offset, + page, + VM_PROT_DEFAULT, + cache_attr, + FALSE); + } + + vm_paging_objects_mapped_slow++; + vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64; + + return KERN_SUCCESS; +} + +/* + * ENCRYPTED SWAP: + * vm_paging_unmap_object: + * Unmaps part of a VM object's pages from the kernel + * virtual address space. + * Context: + * The VM object is locked. This lock will get + * dropped and re-acquired though. + */ +void +vm_paging_unmap_object( + vm_object_t object, + vm_map_offset_t start, + vm_map_offset_t end) +{ + kern_return_t kr; +#ifdef __ppc__ + int i; +#endif /* __ppc__ */ + + if ((vm_paging_base_address != 0) && + ((start < vm_paging_base_address) || + (end > (vm_paging_base_address + + (VM_PAGING_NUM_PAGES * PAGE_SIZE))))) { + /* + * We didn't use our pre-allocated pool of + * kernel virtual address. Deallocate the + * virtual memory. + */ + if (object != VM_OBJECT_NULL) { + vm_object_unlock(object); + } + kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS); + if (object != VM_OBJECT_NULL) { + vm_object_lock(object); + } + assert(kr == KERN_SUCCESS); + } else { + /* + * We used a kernel virtual address from our + * pre-allocated pool. Put it back in the pool + * for next time. + */ +#ifdef __ppc__ + assert(end - start == PAGE_SIZE); + i = (start - vm_paging_base_address) >> PAGE_SHIFT; + + /* undo the pmap mapping */ + mapping_remove(kernel_pmap, start); + + simple_lock(&vm_paging_lock); + vm_paging_page_inuse[i] = FALSE; + simple_unlock(&vm_paging_lock); +#endif /* __ppc__ */ + } +} + +/* + * Encryption data. + * "iv" is the "initial vector". Ideally, we want to + * have a different one for each page we encrypt, so that + * crackers can't find encryption patterns too easily. + */ +#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */ +boolean_t swap_crypt_ctx_initialized = FALSE; +aes_32t swap_crypt_key[8]; /* big enough for a 256 key */ +aes_ctx swap_crypt_ctx; +const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, }; + +#if DEBUG +boolean_t swap_crypt_ctx_tested = FALSE; +unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096))); +unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); +unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); +#endif /* DEBUG */ + +extern u_long random(void); + +/* + * Initialize the encryption context: key and key size. + */ +void swap_crypt_ctx_initialize(void); /* forward */ +void +swap_crypt_ctx_initialize(void) +{ + unsigned int i; + + /* + * No need for locking to protect swap_crypt_ctx_initialized + * because the first use of encryption will come from the + * pageout thread (we won't pagein before there's been a pageout) + * and there's only one pageout thread. + */ + if (swap_crypt_ctx_initialized == FALSE) { + for (i = 0; + i < (sizeof (swap_crypt_key) / + sizeof (swap_crypt_key[0])); + i++) { + swap_crypt_key[i] = random(); + } + aes_encrypt_key((const unsigned char *) swap_crypt_key, + SWAP_CRYPT_AES_KEY_SIZE, + &swap_crypt_ctx.encrypt); + aes_decrypt_key((const unsigned char *) swap_crypt_key, + SWAP_CRYPT_AES_KEY_SIZE, + &swap_crypt_ctx.decrypt); + swap_crypt_ctx_initialized = TRUE; + } + +#if DEBUG + /* + * Validate the encryption algorithms. + */ + if (swap_crypt_ctx_tested == FALSE) { + /* initialize */ + for (i = 0; i < 4096; i++) { + swap_crypt_test_page_ref[i] = (char) i; + } + /* encrypt */ + aes_encrypt_cbc(swap_crypt_test_page_ref, + swap_crypt_null_iv, + PAGE_SIZE / AES_BLOCK_SIZE, + swap_crypt_test_page_encrypt, + &swap_crypt_ctx.encrypt); + /* decrypt */ + aes_decrypt_cbc(swap_crypt_test_page_encrypt, + swap_crypt_null_iv, + PAGE_SIZE / AES_BLOCK_SIZE, + swap_crypt_test_page_decrypt, + &swap_crypt_ctx.decrypt); + /* compare result with original */ + for (i = 0; i < 4096; i ++) { + if (swap_crypt_test_page_decrypt[i] != + swap_crypt_test_page_ref[i]) { + panic("encryption test failed"); + } + } + + /* encrypt again */ + aes_encrypt_cbc(swap_crypt_test_page_decrypt, + swap_crypt_null_iv, + PAGE_SIZE / AES_BLOCK_SIZE, + swap_crypt_test_page_decrypt, + &swap_crypt_ctx.encrypt); + /* decrypt in place */ + aes_decrypt_cbc(swap_crypt_test_page_decrypt, + swap_crypt_null_iv, + PAGE_SIZE / AES_BLOCK_SIZE, + swap_crypt_test_page_decrypt, + &swap_crypt_ctx.decrypt); + for (i = 0; i < 4096; i ++) { + if (swap_crypt_test_page_decrypt[i] != + swap_crypt_test_page_ref[i]) { + panic("in place encryption test failed"); + } + } + + swap_crypt_ctx_tested = TRUE; + } +#endif /* DEBUG */ +} + +/* + * ENCRYPTED SWAP: + * vm_page_encrypt: + * Encrypt the given page, for secure paging. + * The page might already be mapped at kernel virtual + * address "kernel_mapping_offset". Otherwise, we need + * to map it. + * + * Context: + * The page's object is locked, but this lock will be released + * and re-acquired. + * The page is busy and not accessible by users (not entered in any pmap). + */ +void +vm_page_encrypt( + vm_page_t page, + vm_map_offset_t kernel_mapping_offset) +{ + int clear_refmod = 0; + kern_return_t kr; + boolean_t page_was_referenced; + boolean_t page_was_modified; + vm_map_size_t kernel_mapping_size; + vm_offset_t kernel_vaddr; + union { + unsigned char aes_iv[AES_BLOCK_SIZE]; + struct { + memory_object_t pager_object; + vm_object_offset_t paging_offset; + } vm; + } encrypt_iv; + + if (! vm_pages_encrypted) { + vm_pages_encrypted = TRUE; + } + + assert(page->busy); + assert(page->dirty || page->precious); + + if (page->encrypted) { + /* + * Already encrypted: no need to do it again. + */ + vm_page_encrypt_already_encrypted_counter++; + return; + } + ASSERT_PAGE_DECRYPTED(page); + + /* + * Gather the "reference" and "modified" status of the page. + * We'll restore these values after the encryption, so that + * the encryption is transparent to the rest of the system + * and doesn't impact the VM's LRU logic. + */ + page_was_referenced = + (page->reference || pmap_is_referenced(page->phys_page)); + page_was_modified = + (page->dirty || pmap_is_modified(page->phys_page)); + + if (kernel_mapping_offset == 0) { + /* + * The page hasn't already been mapped in kernel space + * by the caller. Map it now, so that we can access + * its contents and encrypt them. + */ + kernel_mapping_size = PAGE_SIZE; + kr = vm_paging_map_object(&kernel_mapping_offset, + page, + page->object, + page->offset, + &kernel_mapping_size); + if (kr != KERN_SUCCESS) { + panic("vm_page_encrypt: " + "could not map page in kernel: 0x%x\n", + kr); + } + } else { + kernel_mapping_size = 0; + } + kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); + + if (swap_crypt_ctx_initialized == FALSE) { + swap_crypt_ctx_initialize(); + } + assert(swap_crypt_ctx_initialized); + + /* + * Prepare an "initial vector" for the encryption. + * We use the "pager" and the "paging_offset" for that + * page to obfuscate the encrypted data a bit more and + * prevent crackers from finding patterns that they could + * use to break the key. + */ + bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); + encrypt_iv.vm.pager_object = page->object->pager; + encrypt_iv.vm.paging_offset = + page->object->paging_offset + page->offset; + + vm_object_unlock(page->object); + + /* encrypt the "initial vector" */ + aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], + swap_crypt_null_iv, + 1, + &encrypt_iv.aes_iv[0], + &swap_crypt_ctx.encrypt); + + /* + * Encrypt the page. + */ + aes_encrypt_cbc((const unsigned char *) kernel_vaddr, + &encrypt_iv.aes_iv[0], + PAGE_SIZE / AES_BLOCK_SIZE, + (unsigned char *) kernel_vaddr, + &swap_crypt_ctx.encrypt); + + vm_page_encrypt_counter++; + + vm_object_lock(page->object); + + /* + * Unmap the page from the kernel's address space, + * if we had to map it ourselves. Otherwise, let + * the caller undo the mapping if needed. + */ + if (kernel_mapping_size != 0) { + vm_paging_unmap_object(page->object, + kernel_mapping_offset, + kernel_mapping_offset + kernel_mapping_size); + } + + /* + * Restore the "reference" and "modified" bits. + * This should clean up any impact the encryption had + * on them. + */ + if (! page_was_referenced) { + clear_refmod |= VM_MEM_REFERENCED; + page->reference = FALSE; + } + if (! page_was_modified) { + clear_refmod |= VM_MEM_MODIFIED; + page->dirty = FALSE; + } + if (clear_refmod) + pmap_clear_refmod(page->phys_page, clear_refmod); + + page->encrypted = TRUE; +} + +/* + * ENCRYPTED SWAP: + * vm_page_decrypt: + * Decrypt the given page. + * The page might already be mapped at kernel virtual + * address "kernel_mapping_offset". Otherwise, we need + * to map it. + * + * Context: + * The page's VM object is locked but will be unlocked and relocked. + * The page is busy and not accessible by users (not entered in any pmap). + */ +void +vm_page_decrypt( + vm_page_t page, + vm_map_offset_t kernel_mapping_offset) +{ + int clear_refmod = 0; + kern_return_t kr; + vm_map_size_t kernel_mapping_size; + vm_offset_t kernel_vaddr; + boolean_t page_was_referenced; + union { + unsigned char aes_iv[AES_BLOCK_SIZE]; + struct { + memory_object_t pager_object; + vm_object_offset_t paging_offset; + } vm; + } decrypt_iv; + + assert(page->busy); + assert(page->encrypted); + + /* + * Gather the "reference" status of the page. + * We'll restore its value after the decryption, so that + * the decryption is transparent to the rest of the system + * and doesn't impact the VM's LRU logic. + */ + page_was_referenced = + (page->reference || pmap_is_referenced(page->phys_page)); + + if (kernel_mapping_offset == 0) { + /* + * The page hasn't already been mapped in kernel space + * by the caller. Map it now, so that we can access + * its contents and decrypt them. + */ + kernel_mapping_size = PAGE_SIZE; + kr = vm_paging_map_object(&kernel_mapping_offset, + page, + page->object, + page->offset, + &kernel_mapping_size); + if (kr != KERN_SUCCESS) { + panic("vm_page_decrypt: " + "could not map page in kernel: 0x%x\n"); + } + } else { + kernel_mapping_size = 0; + } + kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); + + assert(swap_crypt_ctx_initialized); + + /* + * Prepare an "initial vector" for the decryption. + * It has to be the same as the "initial vector" we + * used to encrypt that page. + */ + bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); + decrypt_iv.vm.pager_object = page->object->pager; + decrypt_iv.vm.paging_offset = + page->object->paging_offset + page->offset; + + vm_object_unlock(page->object); + + /* encrypt the "initial vector" */ + aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], + swap_crypt_null_iv, + 1, + &decrypt_iv.aes_iv[0], + &swap_crypt_ctx.encrypt); + + /* + * Decrypt the page. + */ + aes_decrypt_cbc((const unsigned char *) kernel_vaddr, + &decrypt_iv.aes_iv[0], + PAGE_SIZE / AES_BLOCK_SIZE, + (unsigned char *) kernel_vaddr, + &swap_crypt_ctx.decrypt); + vm_page_decrypt_counter++; + + vm_object_lock(page->object); + + /* + * Unmap the page from the kernel's address space, + * if we had to map it ourselves. Otherwise, let + * the caller undo the mapping if needed. + */ + if (kernel_mapping_size != 0) { + vm_paging_unmap_object(page->object, + kernel_vaddr, + kernel_vaddr + PAGE_SIZE); + } + + /* + * After decryption, the page is actually clean. + * It was encrypted as part of paging, which "cleans" + * the "dirty" pages. + * Noone could access it after it was encrypted + * and the decryption doesn't count. + */ + page->dirty = FALSE; + clear_refmod = VM_MEM_MODIFIED; + + /* restore the "reference" bit */ + if (! page_was_referenced) { + page->reference = FALSE; + clear_refmod |= VM_MEM_REFERENCED; + } + pmap_clear_refmod(page->phys_page, clear_refmod); + + page->encrypted = FALSE; + + /* + * We've just modified the page's contents via the data cache and part + * of the new contents might still be in the cache and not yet in RAM. + * Since the page is now available and might get gathered in a UPL to + * be part of a DMA transfer from a driver that expects the memory to + * be coherent at this point, we have to flush the data cache. + */ + pmap_sync_page_data_phys(page->phys_page); + /* + * Since the page is not mapped yet, some code might assume that it + * doesn't need to invalidate the instruction cache when writing to + * that page. That code relies on "no_isync" being set, so that the + * caches get syncrhonized when the page is first mapped. So we need + * to set "no_isync" here too, despite the fact that we just + * synchronized the caches above... + */ + page->no_isync = TRUE; +} + +unsigned long upl_encrypt_upls = 0; +unsigned long upl_encrypt_pages = 0; + +/* + * ENCRYPTED SWAP: + * + * upl_encrypt: + * Encrypts all the pages in the UPL, within the specified range. + * + */ +void +upl_encrypt( + upl_t upl, + upl_offset_t crypt_offset, + upl_size_t crypt_size) +{ + upl_size_t upl_size; + upl_offset_t upl_offset; + vm_object_t upl_object; + vm_page_t page; + vm_object_t shadow_object; + vm_object_offset_t shadow_offset; + vm_object_offset_t paging_offset; + vm_object_offset_t base_offset; + + upl_encrypt_upls++; + upl_encrypt_pages += crypt_size / PAGE_SIZE; + + upl_lock(upl); + + upl_object = upl->map_object; + upl_offset = upl->offset; + upl_size = upl->size; + + upl_unlock(upl); + + vm_object_lock(upl_object); + + /* + * Find the VM object that contains the actual pages. + */ + if (upl_object->pageout) { + shadow_object = upl_object->shadow; + /* + * The offset in the shadow object is actually also + * accounted for in upl->offset. It possibly shouldn't be + * this way, but for now don't account for it twice. + */ + shadow_offset = 0; + assert(upl_object->paging_offset == 0); /* XXX ? */ + vm_object_lock(shadow_object); + } else { + shadow_object = upl_object; + shadow_offset = 0; + } + + paging_offset = shadow_object->paging_offset; + vm_object_paging_begin(shadow_object); + + if (shadow_object != upl_object) { + vm_object_unlock(shadow_object); + } + vm_object_unlock(upl_object); + + base_offset = shadow_offset; + base_offset += upl_offset; + base_offset += crypt_offset; + base_offset -= paging_offset; + /* + * Unmap the pages, so that nobody can continue accessing them while + * they're encrypted. After that point, all accesses to these pages + * will cause a page fault and block while the page is being encrypted + * (busy). After the encryption completes, any access will cause a + * page fault and the page gets decrypted at that time. + */ + assert(crypt_offset + crypt_size <= upl_size); + vm_object_pmap_protect(shadow_object, + base_offset, + (vm_object_size_t)crypt_size, + PMAP_NULL, + 0, + VM_PROT_NONE); + + /* XXX FBDP could the object have changed significantly here ? */ + vm_object_lock(shadow_object); + + for (upl_offset = 0; + upl_offset < crypt_size; + upl_offset += PAGE_SIZE) { + page = vm_page_lookup(shadow_object, + base_offset + upl_offset); + if (page == VM_PAGE_NULL) { + panic("upl_encrypt: " + "no page for (obj=%p,off=%lld+%d)!\n", + shadow_object, + base_offset, + upl_offset); + } + vm_page_encrypt(page, 0); + } + + vm_object_paging_end(shadow_object); + vm_object_unlock(shadow_object); +} + +vm_size_t +upl_get_internal_pagelist_offset(void) +{ + return sizeof(struct upl); +} + +void +upl_set_dirty( + upl_t upl) +{ + upl->flags |= UPL_CLEAR_DIRTY; +} + +void +upl_clear_dirty( + upl_t upl) +{ + upl->flags &= ~UPL_CLEAR_DIRTY; +} + + +#ifdef MACH_BSD boolean_t upl_page_present(upl_page_info_t *upl, int index) { @@ -4395,9 +5997,9 @@ boolean_t upl_valid_page(upl_page_info_t *upl, int index) { return(UPL_VALID_PAGE(upl, index)); } -vm_offset_t upl_phys_page(upl_page_info_t *upl, int index) +ppnum_t upl_phys_page(upl_page_info_t *upl, int index) { - return((vm_offset_t)UPL_PHYS_PAGE(upl, index)); + return(UPL_PHYS_PAGE(upl, index)); } void @@ -4422,6 +6024,7 @@ vm_countdirtypages(void) if(m->pageout) pgopages++; if(m->precious) precpages++; + assert(m->object != kernel_object); m = (vm_page_t) queue_next(&m->pageq); if (m ==(vm_page_t )0) break; @@ -4437,6 +6040,7 @@ vm_countdirtypages(void) if(m->pageout) pgopages++; if(m->precious) precpages++; + assert(m->object != kernel_object); m = (vm_page_t) queue_next(&m->pageq); if (m ==(vm_page_t )0) break; @@ -4458,6 +6062,7 @@ vm_countdirtypages(void) if(m->pageout) pgopages++; if(m->precious) precpages++; + assert(m->object != kernel_object); m = (vm_page_t) queue_next(&m->pageq); if(m == (vm_page_t )0) break; @@ -4469,7 +6074,7 @@ vm_countdirtypages(void) } #endif /* MACH_BSD */ -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2) { upl->ubc_alias1 = alias1; @@ -4484,7 +6089,7 @@ int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2) *al2 = upl->ubc_alias2; return KERN_SUCCESS; } -#endif /* UBC_DEBUG */ +#endif /* UPL_DEBUG */ @@ -4494,13 +6099,11 @@ int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2) #include #define printf kdbprintf -extern int db_indent; void db_pageout(void); void db_vm(void) { - extern int vm_page_gobble_count; iprintf("VM Statistics:\n"); db_indent += 2; @@ -4511,8 +6114,6 @@ db_vm(void) vm_page_free_count); printf(" wire %5d gobbl %5d\n", vm_page_wire_count, vm_page_gobble_count); - iprintf("laund %5d\n", - vm_page_laundry_count); db_indent -= 2; iprintf("target:\n"); db_indent += 2; @@ -4521,32 +6122,18 @@ db_vm(void) vm_page_free_target); printf(" resrv %5d\n", vm_page_free_reserved); db_indent -= 2; - - iprintf("burst:\n"); - db_indent += 2; - iprintf("max %5d min %5d wait %5d empty %5d\n", - vm_pageout_burst_max, vm_pageout_burst_min, - vm_pageout_burst_wait, vm_pageout_empty_wait); - db_indent -= 2; iprintf("pause:\n"); - db_indent += 2; - iprintf("count %5d max %5d\n", - vm_pageout_pause_count, vm_pageout_pause_max); -#if MACH_COUNTERS - iprintf("scan_continue called %8d\n", c_vm_pageout_scan_continue); -#endif /* MACH_COUNTERS */ - db_indent -= 2; db_pageout(); db_indent -= 2; } -void -db_pageout(void) -{ #if MACH_COUNTERS - extern int c_laundry_pages_freed; +extern int c_laundry_pages_freed; #endif /* MACH_COUNTERS */ +void +db_pageout(void) +{ iprintf("Pageout Statistics:\n"); db_indent += 2; iprintf("active %5d inactv %5d\n", @@ -4579,18 +6166,4 @@ db_pageout(void) db_indent -= 2; } -#if MACH_CLUSTER_STATS -unsigned long vm_pageout_cluster_dirtied = 0; -unsigned long vm_pageout_cluster_cleaned = 0; -unsigned long vm_pageout_cluster_collisions = 0; -unsigned long vm_pageout_cluster_clusters = 0; -unsigned long vm_pageout_cluster_conversions = 0; -unsigned long vm_pageout_target_collisions = 0; -unsigned long vm_pageout_target_page_dirtied = 0; -unsigned long vm_pageout_target_page_freed = 0; -#define CLUSTER_STAT(clause) clause -#else /* MACH_CLUSTER_STATS */ -#define CLUSTER_STAT(clause) -#endif /* MACH_CLUSTER_STATS */ - #endif /* MACH_KDB */ diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index 5b3e5185d..72f7549ce 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -60,27 +60,34 @@ #ifndef _VM_VM_PAGEOUT_H_ #define _VM_VM_PAGEOUT_H_ +#ifdef KERNEL_PRIVATE + +#include #include #include -#include -#include +#include +#include +#include +extern kern_return_t vm_map_create_upl( + vm_map_t map, + vm_map_address_t offset, + upl_size_t *upl_size, + upl_t *upl, + upl_page_info_array_t page_list, + unsigned int *count, + int *flags); +#ifdef MACH_KERNEL_PRIVATE + +#include extern unsigned int vm_pageout_scan_event_counter; extern unsigned int vm_zf_count; /* - * The following ifdef only exists because XMM must (currently) - * be given a page at a time. This should be removed - * in the future. - */ -#define DATA_WRITE_MAX 16 -#define POINTER_T(copy) (pointer_t)(copy) - -/* - * Exported routines. + * Routines exported to Mach. */ extern void vm_pageout(void); @@ -117,10 +124,7 @@ extern void vm_pageclean_copy( /* UPL exported routines and structures */ -#define UPL_COMPOSITE_PAGE_LIST_MAX 16 - - -#define upl_lock_init(object) mutex_init(&(object)->Lock, ETAP_VM_OBJ) +#define upl_lock_init(object) mutex_init(&(object)->Lock, 0) #define upl_lock(object) mutex_lock(&(object)->Lock) #define upl_unlock(object) mutex_unlock(&(object)->Lock) @@ -133,19 +137,16 @@ struct upl { int flags; vm_object_t src_object; /* object derived from */ vm_object_offset_t offset; - vm_size_t size; /* size in bytes of the address space */ + upl_size_t size; /* size in bytes of the address space */ vm_offset_t kaddr; /* secondary mapping in kernel */ vm_object_t map_object; -#ifdef UBC_DEBUG +#ifdef UPL_DEBUG unsigned int ubc_alias1; unsigned int ubc_alias2; queue_chain_t uplq; /* List of outstanding upls on an obj */ -#endif /* UBC_DEBUG */ - +#endif /* UPL_DEBUG */ }; - - /* upl struct flags */ #define UPL_PAGE_LIST_MAPPED 0x1 #define UPL_KERNEL_MAPPED 0x2 @@ -157,10 +158,8 @@ struct upl { #define UPL_PAGEOUT 0x80 #define UPL_LITE 0x100 #define UPL_IO_WIRE 0x200 - -#define UPL_PAGE_TICKET_MASK 0xF00 -#define UPL_PAGE_TICKET_SHIFT 8 - +#define UPL_ACCESS_BLOCKED 0x400 +#define UPL_ENCRYPTED 0x800 /* flags for upl_create flags parameter */ @@ -168,11 +167,100 @@ struct upl { #define UPL_CREATE_INTERNAL 0x1 #define UPL_CREATE_LITE 0x2 - +extern kern_return_t vm_object_iopl_request( + vm_object_t object, + vm_object_offset_t offset, + upl_size_t size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int cntrl_flags); + +extern kern_return_t vm_object_super_upl_request( + vm_object_t object, + vm_object_offset_t offset, + upl_size_t size, + upl_size_t super_cluster, + upl_t *upl, + upl_page_info_t *user_page_list, + unsigned int *page_list_count, + int cntrl_flags); + +/* should be just a regular vm_map_enter() */ +extern kern_return_t vm_map_enter_upl( + vm_map_t map, + upl_t upl, + vm_map_offset_t *dst_addr); + +/* should be just a regular vm_map_remove() */ +extern kern_return_t vm_map_remove_upl( + vm_map_t map, + upl_t upl); + +#ifdef UPL_DEBUG +extern kern_return_t upl_ubc_alias_set( + upl_t upl, + unsigned int alias1, + unsigned int alias2); +extern int upl_ubc_alias_get( + upl_t upl, + unsigned int * al, + unsigned int * al2); +#endif /* UPL_DEBUG */ /* wired page list structure */ typedef unsigned long *wpl_array_t; +extern void vm_page_free_list( + register vm_page_t mem); +extern void vm_page_free_reserve(int pages); + +extern void vm_pageout_throttle_down(vm_page_t page); +extern void vm_pageout_throttle_up(vm_page_t page); + +/* + * ENCRYPTED SWAP: + */ +extern void upl_encrypt( + upl_t upl, + upl_offset_t crypt_offset, + upl_size_t crypt_size); +extern void vm_page_encrypt( + vm_page_t page, + vm_map_offset_t kernel_map_offset); +extern boolean_t vm_pages_encrypted; /* are there encrypted pages ? */ +extern void vm_page_decrypt( + vm_page_t page, + vm_map_offset_t kernel_map_offset); +extern kern_return_t vm_paging_map_object( + vm_map_offset_t *address, + vm_page_t page, + vm_object_t object, + vm_object_offset_t offset, + vm_map_size_t *size); +extern void vm_paging_unmap_object( + vm_object_t object, + vm_map_offset_t start, + vm_map_offset_t end); +decl_simple_lock_data(extern, vm_paging_lock) + +/* + * Backing store throttle when BS is exhausted + */ +extern unsigned int vm_backing_store_low; + +#endif /* MACH_KERNEL_PRIVATE */ + +extern void vm_countdirtypages(void); + +extern void vm_backing_store_disable( + boolean_t suspend); + +extern kern_return_t upl_transpose( + upl_t upl1, + upl_t upl2); + +#endif /* KERNEL_PRIVATE */ #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/osfmk/vm/vm_print.h b/osfmk/vm/vm_print.h index 352823ff0..a96b03a0c 100644 --- a/osfmk/vm/vm_print.h +++ b/osfmk/vm/vm_print.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -41,7 +41,7 @@ extern int vm_follow_object( vm_object_t object); extern void vm_object_print( - vm_object_t object, + db_addr_t object, boolean_t have_addr, int arg_count, char *modif); @@ -49,7 +49,7 @@ extern void vm_object_print( #include extern void vm_page_print( - vm_page_t p); + db_addr_t p); #include #if MACH_PAGEMAP @@ -61,7 +61,7 @@ extern void vm_external_print( extern void db_vm(void); -extern vm_size_t db_vm_map_total_size( +extern vm_map_size_t db_vm_map_total_size( db_addr_t map); #endif /* VM_PRINT_H */ diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h new file mode 100644 index 000000000..84726529d --- /dev/null +++ b/osfmk/vm/vm_protos.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifdef XNU_KERNEL_PRIVATE + +#ifndef _VM_VM_PROTOS_H_ +#define _VM_VM_PROTOS_H_ + +#include +#include + +/* + * This file contains various type definitions and routine prototypes + * that are needed to avoid compilation warnings for VM code (in osfmk, + * default_pager and bsd). + * Most of these should eventually go into more appropriate header files. + * + * Include it after all other header files since it doesn't include any + * type definitions and it works around some conflicts with other header + * files. + */ + +/* + * iokit + */ +extern kern_return_t device_data_action( + int device_handle, + ipc_port_t device_pager, + vm_prot_t protection, + vm_object_offset_t offset, + vm_size_t size); + +extern kern_return_t device_close( + int device_handle); + +/* + * default_pager + */ +extern int start_def_pager( + char *bs_device); + +/* + * osfmk + */ +#ifndef _KERN_IPC_TT_H_ /* XXX FBDP */ +/* these should be exported cleanly from OSFMK since BSD needs them */ +extern ipc_port_t convert_task_to_port( + task_t task); +#endif /* _KERN_IPC_TT_H_ */ +#ifndef _IPC_IPC_PORT_H_ +extern mach_port_name_t ipc_port_copyout_send( + ipc_port_t sright, + ipc_space_t space); +extern task_t port_name_to_task( + mach_port_name_t name); +#endif /* _IPC_IPC_PORT_H_ */ + +extern ipc_space_t get_task_ipcspace( + task_t t); + +/* Some loose-ends VM stuff */ + +extern vm_map_t kalloc_map; +extern vm_size_t msg_ool_size_small; +extern vm_map_t zone_map; + +extern void consider_machine_adjust(void); +extern pmap_t get_map_pmap(vm_map_t); +extern vm_map_offset_t get_map_min(vm_map_t); +extern vm_map_offset_t get_map_max(vm_map_t); +extern vm_map_size_t get_vmmap_size(vm_map_t); +extern int get_vmmap_entries(vm_map_t); + +extern boolean_t coredumpok(vm_map_t map, vm_offset_t va); + +/* + * VM routines that used to be published to + * user space, and are now restricted to the kernel. + * + * They should eventually go away entirely - + * to be replaced with standard vm_map() and + * vm_deallocate() calls. + */ + +extern kern_return_t vm_upl_map +( + vm_map_t target_task, + upl_t upl, + vm_address_t *address +); + +extern kern_return_t vm_upl_unmap +( + vm_map_t target_task, + upl_t upl +); + +extern kern_return_t vm_region_object_create +( + vm_map_t target_task, + vm_size_t size, + ipc_port_t *object_handle +); + +extern mach_vm_offset_t mach_get_vm_start(vm_map_t); +extern mach_vm_offset_t mach_get_vm_end(vm_map_t); + +/* + * Legacy routines to get the start and end for a vm_map_t. They + * return them in the vm_offset_t format. So, they should only be + * called on maps that are the same size as the kernel map for + * accurate results. + */ +extern vm_offset_t get_vm_start(vm_map_t); +extern vm_offset_t get_vm_end(vm_map_t); + +#ifdef __PPC__ +/* + * LP64todo - map in the commpage cleanly and remove these. + */ +extern void vm_map_commpage64( vm_map_t ); +extern void vm_map_remove_commpage64( vm_map_t ); +#endif /* __PPC__ */ + +/* + * bsd + */ +struct vnode; +extern int is_suser(void); +extern int bsd_read_page_cache_file( + unsigned int user, + int *fid, + int *mod, + char *app_name, + struct vnode *app_vp, + vm_offset_t *buffer, + vm_offset_t *bufsize); +extern int bsd_write_page_cache_file( + unsigned int user, + char *file_name, + caddr_t buffer, + vm_size_t size, + int mod, + int fid); +extern int prepare_profile_database( + int user); +extern void vnode_pager_shutdown(void); +extern void *upl_get_internal_page_list( + upl_t upl); +#ifndef _VNODE_PAGER_ +typedef int pager_return_t; +extern pager_return_t vnode_pagein( + struct vnode *, upl_t, + vm_offset_t, vm_object_offset_t, + vm_size_t, int, int *); +extern pager_return_t vnode_pageout( + struct vnode *, upl_t, + vm_offset_t, vm_object_offset_t, + vm_size_t, int, int *); +extern memory_object_t vnode_pager_setup( + struct vnode *, memory_object_t); +extern vm_object_offset_t vnode_pager_get_filesize( + struct vnode *); +#endif /* _VNODE_PAGER_ */ +extern void vnode_pager_bootstrap(void); +extern kern_return_t +vnode_pager_data_unlock( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t size, + vm_prot_t desired_access); +extern kern_return_t vnode_pager_init( + memory_object_t, + memory_object_control_t, + vm_size_t); +extern kern_return_t vnode_pager_get_object_size( + memory_object_t, + memory_object_offset_t *); +extern kern_return_t vnode_pager_data_request( + memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_prot_t); +extern kern_return_t vnode_pager_data_return( + memory_object_t, + memory_object_offset_t, + vm_size_t, + memory_object_offset_t *, + int *, + boolean_t, + boolean_t, + int); +extern kern_return_t vnode_pager_data_initialize( + memory_object_t, + memory_object_offset_t, + vm_size_t); +extern void vnode_pager_reference( + memory_object_t mem_obj); +extern kern_return_t vnode_pager_synchronize( + memory_object_t mem_obj, + memory_object_offset_t offset, + vm_size_t length, + vm_sync_t sync_flags); +extern kern_return_t vnode_pager_unmap( + memory_object_t mem_obj); +extern void vnode_pager_deallocate( + memory_object_t); +extern kern_return_t vnode_pager_terminate( + memory_object_t); +extern void vnode_pager_vrele( + struct vnode *vp); +extern void vnode_pager_release_from_cache( + int *); +extern void ubc_unmap( + struct vnode *vp); + +extern int vnode_pager_workaround; +extern int device_pager_workaround; + +extern void dp_memory_object_reference(memory_object_t); +extern void dp_memory_object_deallocate(memory_object_t); +#ifndef _memory_object_server_ +extern kern_return_t dp_memory_object_init(memory_object_t, + memory_object_control_t, + vm_size_t); +extern kern_return_t dp_memory_object_terminate(memory_object_t); +extern kern_return_t dp_memory_object_data_request(memory_object_t, + memory_object_offset_t, vm_size_t, vm_prot_t); +extern kern_return_t dp_memory_object_data_return(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_size_t *, + int *, + boolean_t, + boolean_t, + int); +extern kern_return_t dp_memory_object_data_initialize(memory_object_t, + memory_object_offset_t, + vm_size_t); +extern kern_return_t dp_memory_object_data_unlock(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_prot_t); +extern kern_return_t dp_memory_object_synchronize(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_sync_t); +extern kern_return_t dp_memory_object_unmap(memory_object_t); +#endif /* _memory_object_server_ */ +#ifndef _memory_object_default_server_ +extern kern_return_t default_pager_memory_object_create( + memory_object_default_t, + vm_size_t, + memory_object_t *); +#endif /* _memory_object_default_server_ */ + +extern void device_pager_reference(memory_object_t); +extern void device_pager_deallocate(memory_object_t); +extern kern_return_t device_pager_init(memory_object_t, + memory_object_control_t, + vm_size_t); +extern kern_return_t device_pager_terminate(memory_object_t); +extern kern_return_t device_pager_data_request(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_prot_t); +extern kern_return_t device_pager_data_return(memory_object_t, + memory_object_offset_t, + vm_size_t, + boolean_t, + boolean_t, + int); +extern kern_return_t device_pager_data_initialize(memory_object_t, + memory_object_offset_t, + vm_size_t); +extern kern_return_t device_pager_data_unlock(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_prot_t); +extern kern_return_t device_pager_synchronize(memory_object_t, + memory_object_offset_t, + vm_size_t, + vm_sync_t); +extern kern_return_t device_pager_unmap(memory_object_t); +extern kern_return_t device_pager_populate_object( + memory_object_t device, + memory_object_offset_t offset, + ppnum_t page_num, + vm_size_t size); +extern memory_object_t device_pager_setup( + memory_object_t, + int, + vm_size_t, + int); +extern void device_pager_bootstrap(void); + +extern kern_return_t memory_object_create_named( + memory_object_t pager, + memory_object_offset_t size, + memory_object_control_t *control); + + +extern int macx_swapinfo( + memory_object_size_t *total_p, + memory_object_size_t *avail_p, + vm_size_t *pagesize_p, + boolean_t *encrypted_p); + +#endif /* _VM_VM_PROTOS_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index aa5c29121..dfeee81a4 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,6 +56,8 @@ * Resident memory management module. */ +#include + #include #include #include @@ -77,13 +79,14 @@ #include /* (BRINGUP) */ #include /* (BRINGUP) */ +#include /* Variables used to indicate the relative age of pages in the * inactive list */ -int vm_page_ticket_roll = 0; -int vm_page_ticket = 0; +unsigned int vm_page_ticket_roll = 0; +unsigned int vm_page_ticket = 0; /* * Associated with page of user-allocatable memory is a * page structure. @@ -122,6 +125,10 @@ unsigned int vm_page_hash_shift; /* Shift for hash function */ uint32_t vm_page_bucket_hash; /* Basic bucket hash */ decl_simple_lock_data(,vm_page_bucket_lock) +vm_page_t +vm_page_lookup_nohint(vm_object_t object, vm_object_offset_t offset); + + #if MACH_PAGE_HASH_STATS /* This routine is only for debug. It is intended to be called by * hand by a developer using a kernel debugger. This routine prints @@ -168,15 +175,9 @@ hash_debug(void) * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT * constants. */ -#ifndef PAGE_SIZE_FIXED -vm_size_t page_size = 4096; -vm_size_t page_mask = 4095; -int page_shift = 12; -#else vm_size_t page_size = PAGE_SIZE; vm_size_t page_mask = PAGE_MASK; -int page_shift = PAGE_SHIFT; -#endif /* PAGE_SIZE_FIXED */ +int page_shift = PAGE_SHIFT; /* * Resident page structures are initialized from @@ -194,10 +195,9 @@ struct vm_page vm_page_template; */ vm_page_t vm_page_queue_free; vm_page_t vm_page_queue_fictitious; -decl_mutex_data(,vm_page_queue_free_lock) unsigned int vm_page_free_wanted; -int vm_page_free_count; -int vm_page_fictitious_count; +unsigned int vm_page_free_count; +unsigned int vm_page_fictitious_count; unsigned int vm_page_free_count_minimum; /* debugging */ @@ -213,7 +213,6 @@ unsigned int vm_page_free_count_minimum; /* debugging */ zone_t vm_page_zone; decl_mutex_data(,vm_page_alloc_lock) unsigned int io_throttle_zero_fill; -decl_mutex_data(,vm_page_zero_fill_lock) /* * Fictitious pages don't have a physical address, @@ -235,22 +234,15 @@ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1; */ queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; -queue_head_t vm_page_queue_zf; -decl_mutex_data(,vm_page_queue_lock) -int vm_page_active_count; -int vm_page_inactive_count; -int vm_page_wire_count; -int vm_page_gobble_count = 0; -int vm_page_wire_count_warning = 0; -int vm_page_gobble_count_warning = 0; - -/* the following fields are protected by the vm_page_queue_lock */ -queue_head_t vm_page_queue_limbo; -int vm_page_limbo_count = 0; /* total pages in limbo */ -int vm_page_limbo_real_count = 0; /* real pages in limbo */ -int vm_page_pin_count = 0; /* number of pinned pages */ - -decl_simple_lock_data(,vm_page_preppin_lock) +unsigned int vm_page_active_count; +unsigned int vm_page_inactive_count; +unsigned int vm_page_wire_count; +unsigned int vm_page_gobble_count = 0; +unsigned int vm_page_wire_count_warning = 0; +unsigned int vm_page_gobble_count_warning = 0; + +unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ +uint64_t vm_page_purged_count = 0; /* total count of purged pages */ /* * Several page replacement parameters are also @@ -258,13 +250,11 @@ decl_simple_lock_data(,vm_page_preppin_lock) * (done here in vm_page_alloc) can trigger the * pageout daemon. */ -int vm_page_free_target = 0; -int vm_page_free_min = 0; -int vm_page_inactive_target = 0; -int vm_page_free_reserved = 0; -int vm_page_laundry_count = 0; -int vm_page_burst_count = 0; -int vm_page_throttled_count = 0; +unsigned int vm_page_free_target = 0; +unsigned int vm_page_free_min = 0; +unsigned int vm_page_inactive_target = 0; +unsigned int vm_page_free_reserved = 0; +unsigned int vm_page_throttled_count = 0; /* * The VM system has a couple of heuristics for deciding @@ -288,16 +278,14 @@ boolean_t vm_page_deactivate_hint = TRUE; void vm_set_page_size(void) { -#ifndef PAGE_SIZE_FIXED page_mask = page_size - 1; if ((page_mask & page_size) != 0) panic("vm_set_page_size: page size not a power of two"); for (page_shift = 0; ; page_shift++) - if ((1 << page_shift) == page_size) + if ((1U << page_shift) == page_size) break; -#endif /* PAGE_SIZE_FIXED */ } /* @@ -317,7 +305,7 @@ vm_page_bootstrap( vm_offset_t *endp) { register vm_page_t m; - int i; + unsigned int i; unsigned int log1; unsigned int log2; unsigned int size; @@ -327,10 +315,15 @@ vm_page_bootstrap( */ m = &vm_page_template; - m->object = VM_OBJECT_NULL; /* reset later */ - m->offset = 0; /* reset later */ + m->object = VM_OBJECT_NULL; /* reset later */ + m->offset = (vm_object_offset_t) -1; /* reset later */ m->wire_count = 0; + m->pageq.next = NULL; + m->pageq.prev = NULL; + m->listq.next = NULL; + m->listq.prev = NULL; + m->inactive = FALSE; m->active = FALSE; m->laundry = FALSE; @@ -356,6 +349,7 @@ vm_page_bootstrap( m->unusual = FALSE; m->restart = FALSE; m->zero_fill = FALSE; + m->encrypted = FALSE; m->phys_page = 0; /* reset later */ @@ -367,16 +361,14 @@ vm_page_bootstrap( * Initialize the page queues. */ - mutex_init(&vm_page_queue_free_lock, ETAP_VM_PAGEQ_FREE); - mutex_init(&vm_page_queue_lock, ETAP_VM_PAGEQ); - simple_lock_init(&vm_page_preppin_lock, ETAP_VM_PREPPIN); + mutex_init(&vm_page_queue_free_lock, 0); + mutex_init(&vm_page_queue_lock, 0); vm_page_queue_free = VM_PAGE_NULL; vm_page_queue_fictitious = VM_PAGE_NULL; queue_init(&vm_page_queue_active); queue_init(&vm_page_queue_inactive); queue_init(&vm_page_queue_zf); - queue_init(&vm_page_queue_limbo); vm_page_free_wanted = 0; @@ -397,7 +389,7 @@ vm_page_bootstrap( * than the number of physical pages in the system. */ - simple_lock_init(&vm_page_bucket_lock, ETAP_VM_BUCKET); + simple_lock_init(&vm_page_bucket_lock, 0); if (vm_page_bucket_count == 0) { unsigned int npages = pmap_free_pages(); @@ -454,8 +446,8 @@ vm_page_bootstrap( */ pmap_startup(&virtual_space_start, &virtual_space_end); - virtual_space_start = round_page_32(virtual_space_start); - virtual_space_end = trunc_page_32(virtual_space_end); + virtual_space_start = round_page(virtual_space_start); + virtual_space_end = trunc_page(virtual_space_end); *startp = virtual_space_start; *endp = virtual_space_end; @@ -471,6 +463,8 @@ vm_page_bootstrap( printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count); vm_page_free_count_minimum = vm_page_free_count; + + simple_lock_init(&vm_paging_lock, 0); } #ifndef MACHINE_PAGES @@ -479,7 +473,7 @@ vm_page_bootstrap( * of two simpler functions, pmap_virtual_space and pmap_next_page. */ -vm_offset_t +void * pmap_steal_memory( vm_size_t size) { @@ -505,8 +499,8 @@ pmap_steal_memory( * we don't trust the pmap module to do it right. */ - virtual_space_start = round_page_32(virtual_space_start); - virtual_space_end = trunc_page_32(virtual_space_end); + virtual_space_start = round_page(virtual_space_start); + virtual_space_end = trunc_page(virtual_space_end); } /* @@ -522,7 +516,7 @@ pmap_steal_memory( * Allocate and map physical pages to back new virtual pages. */ - for (vaddr = round_page_32(addr); + for (vaddr = round_page(addr); vaddr < addr + size; vaddr += PAGE_SIZE) { if (!pmap_next_page(&phys_page)) @@ -543,7 +537,7 @@ pmap_steal_memory( } - return addr; + return (void *) addr; } void @@ -595,7 +589,6 @@ pmap_startup( if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */ for (i = pages_initialized; i > 0; i--) { - extern void fillPage(ppnum_t phys_page, unsigned int fillval); if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ vm_page_release(&pages[i - 1]); } @@ -674,8 +667,7 @@ vm_page_module_init(void) vm_page_zone->count += vm_page_pages; vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size; - mutex_init(&vm_page_alloc_lock, ETAP_VM_PAGE_ALLOC); - mutex_init(&vm_page_zero_fill_lock, ETAP_VM_PAGE_ALLOC); + mutex_init(&vm_page_alloc_lock, 0); } /* @@ -741,16 +733,22 @@ vm_page_insert( (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0); VM_PAGE_CHECK(mem); +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); - if (mem->tabled) - panic("vm_page_insert"); - + if (mem->tabled || mem->object != VM_OBJECT_NULL) + panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " + "already in (obj=%p,off=0x%llx)", + mem, object, offset, mem->object, mem->offset); +#endif assert(!object->internal || offset < object->size); /* only insert "pageout" pages into "pageout" objects, * and normal pages into normal objects */ assert(object->pageout == mem->pageout); + assert(vm_page_lookup(object, offset) == VM_PAGE_NULL); + /* * Record the object/offset pair in this page */ @@ -776,7 +774,7 @@ vm_page_insert( * Now link into the object's list of backed pages. */ - queue_enter(&object->memq, mem, vm_page_t, listq); + VM_PAGE_INSERT(mem, object); mem->tabled = TRUE; /* @@ -784,6 +782,13 @@ vm_page_insert( */ object->resident_page_count++; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + vm_page_lock_queues(); + vm_page_purgeable_count++; + vm_page_unlock_queues(); + } } /* @@ -804,10 +809,15 @@ vm_page_replace( register vm_page_bucket_t *bucket; VM_PAGE_CHECK(mem); - - if (mem->tabled) - panic("vm_page_replace"); - +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + + if (mem->tabled || mem->object != VM_OBJECT_NULL) + panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) " + "already in (obj=%p,off=0x%llx)", + mem, object, offset, mem->object, mem->offset); +#endif /* * Record the object/offset pair in this page */ @@ -832,11 +842,18 @@ vm_page_replace( * and return it to the free list. */ *mp = m->next; - queue_remove(&object->memq, m, vm_page_t, - listq); + VM_PAGE_REMOVE(m); m->tabled = FALSE; + m->object = VM_OBJECT_NULL; + m->offset = (vm_object_offset_t) -1; object->resident_page_count--; + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + assert(vm_page_purgeable_count > 0); + vm_page_purgeable_count--; + } + /* * Return page to the free list. * Note the page is not tabled now, so this @@ -847,7 +864,7 @@ vm_page_replace( break; } mp = &m->next; - } while (m = *mp); + } while ((m = *mp)); mem->next = bucket->pages; } else { mem->next = VM_PAGE_NULL; @@ -859,7 +876,7 @@ vm_page_replace( * Now link into the object's list of backed pages. */ - queue_enter(&object->memq, mem, vm_page_t, listq); + VM_PAGE_INSERT(mem, object); mem->tabled = TRUE; /* @@ -868,6 +885,11 @@ vm_page_replace( */ object->resident_page_count++; + + if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + vm_page_purgeable_count++; + } } /* @@ -876,7 +898,7 @@ vm_page_replace( * Removes the given mem entry from the object/offset-page * table and the object page list. * - * The object and page must be locked. + * The object and page queues must be locked. */ void @@ -890,11 +912,15 @@ vm_page_remove( "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n", (integer_t)mem->object, (integer_t)mem->offset, (integer_t)mem, 0,0); - +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + _mutex_assert(&mem->object->Lock, MA_OWNED); +#endif assert(mem->tabled); assert(!mem->cleaning); VM_PAGE_CHECK(mem); + /* * Remove from the object_object/offset hash table */ @@ -923,7 +949,7 @@ vm_page_remove( * Now remove from the object's list of backed pages. */ - queue_remove(&mem->object->memq, mem, vm_page_t, listq); + VM_PAGE_REMOVE(mem); /* * And show that the object has one fewer resident @@ -932,9 +958,15 @@ vm_page_remove( mem->object->resident_page_count--; + if (mem->object->purgable == VM_OBJECT_PURGABLE_VOLATILE || + mem->object->purgable == VM_OBJECT_PURGABLE_EMPTY) { + assert(vm_page_purgeable_count > 0); + vm_page_purgeable_count--; + } + mem->tabled = FALSE; mem->object = VM_OBJECT_NULL; - mem->offset = 0; + mem->offset = (vm_object_offset_t) -1; } /* @@ -946,6 +978,11 @@ vm_page_remove( * The object must be locked. No side effects. */ +unsigned long vm_page_lookup_hint = 0; +unsigned long vm_page_lookup_hint_next = 0; +unsigned long vm_page_lookup_hint_prev = 0; +unsigned long vm_page_lookup_hint_miss = 0; + vm_page_t vm_page_lookup( register vm_object_t object, @@ -953,6 +990,43 @@ vm_page_lookup( { register vm_page_t mem; register vm_page_bucket_t *bucket; + queue_entry_t qe; +#if 0 + _mutex_assert(&object->Lock, MA_OWNED); +#endif + + mem = object->memq_hint; + if (mem != VM_PAGE_NULL) { + assert(mem->object == object); + if (mem->offset == offset) { + vm_page_lookup_hint++; + return mem; + } + qe = queue_next(&mem->listq); + if (! queue_end(&object->memq, qe)) { + vm_page_t next_page; + + next_page = (vm_page_t) qe; + assert(next_page->object == object); + if (next_page->offset == offset) { + vm_page_lookup_hint_next++; + object->memq_hint = next_page; /* new hint */ + return next_page; + } + } + qe = queue_prev(&mem->listq); + if (! queue_end(&object->memq, qe)) { + vm_page_t prev_page; + + prev_page = (vm_page_t) qe; + assert(prev_page->object == object); + if (prev_page->offset == offset) { + vm_page_lookup_hint_prev++; + object->memq_hint = prev_page; /* new hint */ + return prev_page; + } + } + } /* * Search the hash table for this object/offset pair @@ -968,6 +1042,43 @@ vm_page_lookup( } simple_unlock(&vm_page_bucket_lock); + if (mem != VM_PAGE_NULL) { + if (object->memq_hint != VM_PAGE_NULL) { + vm_page_lookup_hint_miss++; + } + assert(mem->object == object); + object->memq_hint = mem; + } + + return(mem); +} + + +vm_page_t +vm_page_lookup_nohint( + vm_object_t object, + vm_object_offset_t offset) +{ + register vm_page_t mem; + register vm_page_bucket_t *bucket; + +#if 0 + _mutex_assert(&object->Lock, MA_OWNED); +#endif + /* + * Search the hash table for this object/offset pair + */ + + bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + + simple_lock(&vm_page_bucket_lock); + for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { + VM_PAGE_CHECK(mem); + if ((mem->object == object) && (mem->offset == offset)) + break; + } + simple_unlock(&vm_page_bucket_lock); + return(mem); } @@ -986,6 +1097,16 @@ vm_page_rename( vm_object_offset_t new_offset) { assert(mem->object != new_object); + /* + * ENCRYPTED SWAP: + * The encryption key is based on the page's memory object + * (aka "pager") and paging offset. Moving the page to + * another VM object changes its "pager" and "paging_offset" + * so it has to be decrypted first. + */ + if (mem->encrypted) { + panic("vm_page_rename: page %p is encrypted\n", mem); + } /* * Changes to mem->object require the page lock because * the pageout daemon uses that lock to get the object. @@ -1014,6 +1135,7 @@ vm_page_init( vm_page_t mem, ppnum_t phys_page) { + assert(phys_page); *mem = vm_page_template; mem->phys_page = phys_page; } @@ -1059,11 +1181,12 @@ vm_page_release_fictitious( assert(m->phys_page == vm_page_fictitious_addr); c_vm_page_release_fictitious++; - +#if DEBUG if (m->free) panic("vm_page_release_fictitious"); +#endif m->free = TRUE; - zfree(vm_page_zone, (vm_offset_t)m); + zfree(vm_page_zone, m); } /* @@ -1096,7 +1219,6 @@ vm_page_release_fictitious( void vm_page_more_fictitious(void) { - extern vm_map_t zone_map; register vm_page_t m; vm_offset_t addr; kern_return_t retval; @@ -1134,9 +1256,10 @@ void vm_page_more_fictitious(void) return; } - if ((retval = kernel_memory_allocate(zone_map, - &addr, PAGE_SIZE, VM_PROT_ALL, - KMA_KOBJECT|KMA_NOPAGEWAIT)) != KERN_SUCCESS) { + retval = kernel_memory_allocate(zone_map, + &addr, PAGE_SIZE, VM_PROT_ALL, + KMA_KOBJECT|KMA_NOPAGEWAIT); + if (retval != KERN_SUCCESS) { /* * No page was available. Tell the pageout daemon, drop the * lock to give another thread a chance at it, and @@ -1157,7 +1280,7 @@ void vm_page_more_fictitious(void) m->fictitious = TRUE; m++; } - zcram(vm_page_zone, addr, PAGE_SIZE); + zcram(vm_page_zone, (void *) addr, PAGE_SIZE); mutex_unlock(&vm_page_alloc_lock); } @@ -1253,14 +1376,13 @@ vm_page_grab(void) */ if ((vm_page_free_count < vm_page_free_reserved) && - !current_thread()->vm_privilege) { + !(current_thread()->options & TH_OPT_VMPRIV)) { mutex_unlock(&vm_page_queue_free_lock); mem = VM_PAGE_NULL; goto wakeup_pageout; } while (vm_page_queue_free == VM_PAGE_NULL) { - printf("vm_page_grab: no free pages, trouble expected...\n"); mutex_unlock(&vm_page_queue_free_lock); VM_PAGE_WAIT(); mutex_lock(&vm_page_queue_free_lock); @@ -1270,10 +1392,18 @@ vm_page_grab(void) vm_page_free_count_minimum = vm_page_free_count; mem = vm_page_queue_free; vm_page_queue_free = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + mem->pageq.prev = NULL; + assert(mem->listq.next == NULL && mem->listq.prev == NULL); + assert(mem->tabled == FALSE); + assert(mem->object == VM_OBJECT_NULL); + assert(!mem->laundry); mem->free = FALSE; mem->no_isync = TRUE; mutex_unlock(&vm_page_queue_free_lock); + assert(pmap_verify_free(mem->phys_page)); + /* * Decide if we should poke the pageout daemon. * We do this if the free count is less than the low @@ -1317,15 +1447,20 @@ vm_page_release( } physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */ #endif - assert(!mem->private && !mem->fictitious); // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ mutex_lock(&vm_page_queue_free_lock); +#if DEBUG if (mem->free) panic("vm_page_release"); +#endif mem->free = TRUE; + assert(!mem->laundry); + assert(mem->object == VM_OBJECT_NULL); + assert(mem->pageq.next == NULL && + mem->pageq.prev == NULL); mem->pageq.next = (queue_entry_t) vm_page_queue_free; vm_page_queue_free = mem; vm_page_free_count++; @@ -1359,8 +1494,6 @@ vm_page_release( mutex_unlock(&vm_page_queue_free_lock); } -#define VM_PAGEOUT_DEADLOCK_TIMEOUT 3 - /* * vm_page_wait: * @@ -1383,40 +1516,23 @@ vm_page_wait( * succeeds, the second fails. After the first page is freed, * a call to vm_page_wait must really block. */ - uint64_t abstime; kern_return_t wait_result; - kern_return_t kr; int need_wakeup = 0; mutex_lock(&vm_page_queue_free_lock); if (vm_page_free_count < vm_page_free_target) { if (vm_page_free_wanted++ == 0) need_wakeup = 1; - wait_result = assert_wait((event_t)&vm_page_free_count, - interruptible); + wait_result = assert_wait((event_t)&vm_page_free_count, interruptible); mutex_unlock(&vm_page_queue_free_lock); counter(c_vm_page_wait_block++); if (need_wakeup) thread_wakeup((event_t)&vm_page_free_wanted); - if (wait_result == THREAD_WAITING) { - clock_interval_to_absolutetime_interval( - VM_PAGEOUT_DEADLOCK_TIMEOUT, - NSEC_PER_SEC, &abstime); - clock_absolutetime_interval_to_deadline( - abstime, &abstime); - thread_set_timer_deadline(abstime); + if (wait_result == THREAD_WAITING) wait_result = thread_block(THREAD_CONTINUE_NULL); - if(wait_result == THREAD_TIMED_OUT) { - kr = vm_pageout_emergency_availability_request(); - return TRUE; - } else { - thread_cancel_timer(); - } - } - return(wait_result == THREAD_AWAKENED); } else { mutex_unlock(&vm_page_queue_free_lock); @@ -1440,6 +1556,9 @@ vm_page_alloc( { register vm_page_t mem; +#if DEBUG + _mutex_assert(&object->Lock, MA_OWNED); +#endif mem = vm_page_grab(); if (mem == VM_PAGE_NULL) return VM_PAGE_NULL; @@ -1452,7 +1571,7 @@ vm_page_alloc( counter(unsigned int c_laundry_pages_freed = 0;) int vm_pagein_cluster_unused = 0; -boolean_t vm_page_free_verify = FALSE; +boolean_t vm_page_free_verify = TRUE; /* * vm_page_free: * @@ -1470,8 +1589,18 @@ vm_page_free( assert(!mem->free); assert(!mem->cleaning); assert(!mem->pageout); - assert(!vm_page_free_verify || pmap_verify_free(mem->phys_page)); + if (vm_page_free_verify && !mem->fictitious && !mem->private) { + assert(pmap_verify_free(mem->phys_page)); + } + +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); + if (mem->free) + panic("vm_page_free: freeing page on free list\n"); +#endif if (mem->tabled) vm_page_remove(mem); /* clears tabled, object, offset */ VM_PAGE_QUEUES_REMOVE(mem); /* clears active or inactive */ @@ -1494,20 +1623,10 @@ vm_page_free( mem->gobbled = FALSE; if (mem->laundry) { - extern int vm_page_laundry_min; - if (!object->internal) - vm_page_burst_count--; - vm_page_laundry_count--; - mem->laundry = FALSE; /* laundry is now clear */ + vm_pageout_throttle_up(mem); counter(++c_laundry_pages_freed); - if (vm_page_laundry_count < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) &vm_page_laundry_count); - } } - mem->discard_request = FALSE; - PAGE_WAKEUP(mem); /* clears wanted */ if (mem->absent) @@ -1522,6 +1641,7 @@ vm_page_free( mem->dirty = FALSE; mem->precious = FALSE; mem->reference = FALSE; + mem->encrypted = FALSE; mem->page_error = KERN_SUCCESS; @@ -1548,30 +1668,30 @@ void vm_page_free_list( register vm_page_t mem) { - register vm_page_t nxt; + register vm_page_t nxt; register vm_page_t first = NULL; - register vm_page_t last; + register vm_page_t last = VM_PAGE_NULL; register int pg_count = 0; - +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif while (mem) { +#if DEBUG + if (mem->tabled || mem->object) + panic("vm_page_free_list: freeing tabled page\n"); + if (mem->inactive || mem->active || mem->free) + panic("vm_page_free_list: freeing page on list\n"); +#endif + assert(mem->pageq.prev == NULL); nxt = (vm_page_t)(mem->pageq.next); if (mem->clustered) vm_pagein_cluster_unused++; if (mem->laundry) { - extern int vm_page_laundry_min; - - if (!mem->object->internal) - vm_page_burst_count--; - vm_page_laundry_count--; + vm_pageout_throttle_up(mem); counter(++c_laundry_pages_freed); - - if (vm_page_laundry_count < vm_page_laundry_min) { - vm_page_laundry_min = 0; - thread_wakeup((event_t) &vm_page_laundry_count); - } } mem->busy = TRUE; @@ -1584,6 +1704,7 @@ vm_page_free_list( /* depends on the queues lock */ if (mem->zero_fill) vm_zf_count -= 1; + assert(!mem->laundry); vm_page_init(mem, mem->phys_page); mem->free = TRUE; @@ -1611,9 +1732,14 @@ vm_page_free_list( if ((vm_page_free_wanted > 0) && (vm_page_free_count >= vm_page_free_reserved)) { - int available_pages; + unsigned int available_pages; - available_pages = vm_page_free_count - vm_page_free_reserved; + if (vm_page_free_count >= vm_page_free_reserved) { + available_pages = (vm_page_free_count + - vm_page_free_reserved); + } else { + available_pages = 0; + } if (available_pages >= vm_page_free_wanted) { vm_page_free_wanted = 0; @@ -1644,10 +1770,14 @@ vm_page_wire( register vm_page_t mem) { -// dbgLog(current_act(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ +// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); - +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (mem->wire_count == 0) { VM_PAGE_QUEUES_REMOVE(mem); if (!mem->private && !mem->fictitious && !mem->gobbled) @@ -1660,6 +1790,15 @@ vm_page_wire( vm_zf_count-=1; mem->zero_fill = FALSE; } + /* + * ENCRYPTED SWAP: + * The page could be encrypted, but + * We don't have to decrypt it here + * because we don't guarantee that the + * data is actually valid at this point. + * The page will get decrypted in + * vm_fault_wire() if needed. + */ } assert(!mem->gobbled); mem->wire_count++; @@ -1704,14 +1843,21 @@ vm_page_unwire( register vm_page_t mem) { -// dbgLog(current_act(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ +// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); assert(mem->wire_count > 0); - +#if DEBUG + if (mem->object) + _mutex_assert(&mem->object->Lock, MA_OWNED); + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (--mem->wire_count == 0) { assert(!mem->private && !mem->fictitious); vm_page_wire_count--; + assert(!mem->laundry); + assert(mem->object != kernel_object); + assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq); vm_page_active_count++; mem->active = TRUE; @@ -1733,9 +1879,12 @@ vm_page_deactivate( register vm_page_t m) { VM_PAGE_CHECK(m); + assert(m->object != kernel_object); // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */ - +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif /* * This page is no longer very interesting. If it was * interesting (active or inactive/referenced), then we @@ -1770,6 +1919,8 @@ vm_page_deactivate( vm_page_ticket++; } + assert(!m->laundry); + assert(m->pageq.next == NULL && m->pageq.prev == NULL); if(m->zero_fill) { queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq); } else { @@ -1796,7 +1947,10 @@ vm_page_activate( register vm_page_t m) { VM_PAGE_CHECK(m); - + assert(m->object != kernel_object); +#if DEBUG + _mutex_assert(&vm_page_queue_lock, MA_OWNED); +#endif if (m->gobbled) { assert(m->wire_count == 0); if (!m->private && !m->fictitious) @@ -1808,20 +1962,26 @@ vm_page_activate( return; if (m->inactive) { + assert(!m->laundry); if (m->zero_fill) { queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); } else { queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); } + m->pageq.next = NULL; + m->pageq.prev = NULL; if (!m->fictitious) vm_page_inactive_count--; m->inactive = FALSE; } if (m->wire_count == 0) { +#if DEBUG if (m->active) panic("vm_page_activate: already active"); - +#endif + assert(!m->laundry); + assert(m->pageq.next == NULL && m->pageq.prev == NULL); queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; m->reference = TRUE; @@ -1915,6 +2075,10 @@ vm_page_part_copy( * vm_page_copy: * * Copy one page to another + * + * ENCRYPTED SWAP: + * The source page should not be encrypted. The caller should + * make sure the page is decrypted first, if necessary. */ void @@ -1931,6 +2095,17 @@ vm_page_copy( VM_PAGE_CHECK(src_m); VM_PAGE_CHECK(dest_m); + /* + * ENCRYPTED SWAP: + * The source page should not be encrypted at this point. + * The destination page will therefore not contain encrypted + * data after the copy. + */ + if (src_m->encrypted) { + panic("vm_page_copy: source page %p is encrypted\n", src_m); + } + dest_m->encrypted = FALSE; + pmap_copy_page(src_m->phys_page, dest_m->phys_page); } @@ -1947,28 +2122,15 @@ vm_page_copy( * memory */ -#define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n)) - -#if MACH_ASSERT -int vm_page_verify_contiguous( - vm_page_t pages, - unsigned int npages); -#endif /* MACH_ASSERT */ - -cpm_counter(unsigned int vpfls_pages_handled = 0;) -cpm_counter(unsigned int vpfls_head_insertions = 0;) -cpm_counter(unsigned int vpfls_tail_insertions = 0;) -cpm_counter(unsigned int vpfls_general_insertions = 0;) -cpm_counter(unsigned int vpfc_failed = 0;) -cpm_counter(unsigned int vpfc_satisfied = 0;) - - - #if MACH_ASSERT /* * Check that the list of pages is ordered by * ascending physical address and has no holes. */ +int vm_page_verify_contiguous( + vm_page_t pages, + unsigned int npages); + int vm_page_verify_contiguous( vm_page_t pages, @@ -1976,7 +2138,7 @@ vm_page_verify_contiguous( { register vm_page_t m; unsigned int page_count; - ppnum_t prev_addr; + vm_offset_t prev_addr; prev_addr = pages->phys_page; page_count = 1; @@ -1984,7 +2146,7 @@ vm_page_verify_contiguous( if (m->phys_page != prev_addr + 1) { printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n", m, prev_addr, m->phys_page); - printf("pages 0x%x page_count %u\n", pages, page_count); + printf("pages 0x%x page_count %d\n", pages, page_count); panic("vm_page_verify_contiguous: not contiguous!"); } prev_addr = m->phys_page; @@ -2000,6 +2162,13 @@ vm_page_verify_contiguous( #endif /* MACH_ASSERT */ +cpm_counter(unsigned int vpfls_pages_handled = 0;) +cpm_counter(unsigned int vpfls_head_insertions = 0;) +cpm_counter(unsigned int vpfls_tail_insertions = 0;) +cpm_counter(unsigned int vpfls_general_insertions = 0;) +cpm_counter(unsigned int vpfc_failed = 0;) +cpm_counter(unsigned int vpfc_satisfied = 0;) + /* * Find a region large enough to contain at least npages * of contiguous physical memory. @@ -2031,6 +2200,10 @@ vm_page_find_contiguous( ppnum_t nextcontaddr; unsigned int npages; + m = NULL; +#if DEBUG + _mutex_assert(&vm_page_queue_free_lock, MA_OWNED); +#endif #if MACH_ASSERT /* * Verify pages in the free list.. @@ -2046,8 +2219,9 @@ vm_page_find_contiguous( if (contig_pages == 0 || vm_page_queue_free == VM_PAGE_NULL) return VM_PAGE_NULL; -#define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0) -#define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX) +#define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0) +#define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX) +#define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n)) npages = 1; contfirstprev = &sort_list; @@ -2135,7 +2309,8 @@ vm_page_find_contiguous( if (npages == contig_pages) break; - if (m->phys_page != nextcontaddr) { + if (m->phys_page != nextcontaddr) + { contfirstprev = NEXT_PAGE_PTR(contlast); prevcontaddr = PPNUM_PREV(m->phys_page); nextcontaddr = PPNUM_NEXT(m->phys_page); @@ -2178,6 +2353,7 @@ vm_page_find_contiguous( for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) { assert(m1->free); assert(!m1->wanted); + assert(!m1->laundry); m1->free = FALSE; m1->no_isync = TRUE; m1->gobbled = TRUE; @@ -2206,10 +2382,9 @@ cpm_allocate( boolean_t wire) { register vm_page_t m; - vm_page_t *first_contig; - vm_page_t free_list, pages; - unsigned int npages, n1pages; - int vm_pages_available; + vm_page_t pages; + unsigned int npages; + unsigned int vm_pages_available; boolean_t wakeup; if (size % page_size != 0) @@ -2309,7 +2484,7 @@ vm_page_info( hash_info_bucket_t *info, unsigned int count) { - int i; + unsigned int i; if (vm_page_bucket_count < count) count = vm_page_bucket_count; @@ -2344,9 +2519,11 @@ vm_page_info( */ void vm_page_print( - vm_page_t p) + db_addr_t db_addr) { - extern db_indent; + vm_page_t p; + + p = (vm_page_t) (long) db_addr; iprintf("page 0x%x\n", p); @@ -2356,14 +2533,14 @@ vm_page_print( printf(", offset=0x%x", p->offset); printf(", wire_count=%d", p->wire_count); - iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sdiscard\n", + iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n", (p->inactive ? "" : "!"), (p->active ? "" : "!"), (p->gobbled ? "" : "!"), (p->laundry ? "" : "!"), (p->free ? "" : "!"), (p->reference ? "" : "!"), - (p->discard_request ? "" : "!")); + (p->encrypted ? "" : "!")); iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n", (p->busy ? "" : "!"), (p->wanted ? "" : "!"), diff --git a/osfmk/vm/vm_shared_memory_server.c b/osfmk/vm/vm_shared_memory_server.c index 8303e83b3..d241dba2e 100644 --- a/osfmk/vm/vm_shared_memory_server.c +++ b/osfmk/vm/vm_shared_memory_server.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -27,27 +27,75 @@ * Support routines for an in-kernel shared memory allocator */ -#include -#include -#include +#include + +#include #include #include +#include #include + +#include +#include +#include +#include +#include + +#include +#include + #include #include #include +#include #include #include +#if DEBUG +int lsf_debug = 0; +int lsf_alloc_debug = 0; +#define LSF_DEBUG(args) \ + MACRO_BEGIN \ + if (lsf_debug) { \ + kprintf args; \ + } \ + MACRO_END +#define LSF_ALLOC_DEBUG(args) \ + MACRO_BEGIN \ + if (lsf_alloc_debug) { \ + kprintf args; \ + } \ + MACRO_END +#else /* DEBUG */ +#define LSF_DEBUG(args) +#define LSF_ALLOC_DEBUG(args) +#endif /* DEBUG */ + /* forward declarations */ +static kern_return_t +shared_region_object_create( + vm_size_t size, + ipc_port_t *object_handle); + +static kern_return_t +shared_region_mapping_dealloc_lock( + shared_region_mapping_t shared_region, + int need_sfh_lock, + int need_drl_lock); + + static kern_return_t shared_file_init( - ipc_port_t *shared_text_region_handle, + ipc_port_t *text_region_handle, vm_size_t text_region_size, - ipc_port_t *shared_data_region_handle, + ipc_port_t *data_region_handle, vm_size_t data_region_size, - vm_offset_t *shared_file_mapping_array); + vm_offset_t *file_mapping_array); + +static kern_return_t +shared_file_header_init( + shared_file_info_t *shared_file_header); static load_struct_t * lsf_hash_lookup( @@ -55,6 +103,7 @@ lsf_hash_lookup( void *file_object, vm_offset_t recognizableOffset, int size, + boolean_t regular, boolean_t alternate, shared_region_task_mappings_t sm_info); @@ -80,12 +129,36 @@ lsf_load( int flags, shared_region_task_mappings_t sm_info); +static kern_return_t +lsf_slide( + unsigned int map_cnt, + struct shared_file_mapping_np *mappings, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t *base_offset_p); + +static kern_return_t +lsf_map( + struct shared_file_mapping_np *mappings, + int map_cnt, + void *file_control, + memory_object_size_t file_size, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t base_offset, + mach_vm_offset_t *slide_p); + static void lsf_unload( void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info); +static void +lsf_deallocate( + void *file_object, + vm_offset_t base_offset, + shared_region_task_mappings_t sm_info, + boolean_t unload); + #define load_file_hash(file_object, size) \ ((((natural_t)file_object) & 0xffffff) % size) @@ -115,12 +188,375 @@ zone_t lsf_zone; int shared_file_available_hash_ele; /* com region support */ -ipc_port_t com_region_handle = NULL; -vm_map_t com_region_map = NULL; +ipc_port_t com_region_handle32 = NULL; +ipc_port_t com_region_handle64 = NULL; +vm_map_t com_region_map32 = NULL; +vm_map_t com_region_map64 = NULL; vm_size_t com_region_size = _COMM_PAGE_AREA_LENGTH; shared_region_mapping_t com_mapping_resource = NULL; -#define GLOBAL_COM_REGION_BASE _COMM_PAGE_BASE_ADDRESS + +#if DEBUG +int shared_region_debug = 0; +#endif /* DEBUG */ + + +kern_return_t +vm_get_shared_region( + task_t task, + shared_region_mapping_t *shared_region) +{ + *shared_region = (shared_region_mapping_t) task->system_shared_region; + if (*shared_region) { + assert((*shared_region)->ref_count > 0); + } + SHARED_REGION_DEBUG(("vm_get_shared_region(task=%p) -> %p\n", + task, *shared_region)); + return KERN_SUCCESS; +} + +kern_return_t +vm_set_shared_region( + task_t task, + shared_region_mapping_t shared_region) +{ + SHARED_REGION_DEBUG(("vm_set_shared_region(task=%p, " + "shared_region=%p)\n", + task, shared_region)); + if (shared_region) { + assert(shared_region->ref_count > 0); + } + task->system_shared_region = shared_region; + return KERN_SUCCESS; +} + +/* + * shared_region_object_chain_detach: + * + * Mark the shared region as being detached or standalone. This means + * that we won't keep track of which file is mapped and how, for this shared + * region. And we don't have a "shadow" shared region. + * This is used when we clone a private shared region and we intend to remove + * some mappings from it. It won't need to maintain mappings info because it's + * now private. It can't have a "shadow" shared region because we don't want + * to see the shadow of the mappings we're about to remove. + */ +void +shared_region_object_chain_detached( + shared_region_mapping_t target_region) +{ + shared_region_mapping_lock(target_region); + target_region->flags |= SHARED_REGION_STANDALONE; + shared_region_mapping_unlock(target_region); +} + +/* + * shared_region_object_chain_attach: + * + * Link "target_region" to "object_chain_region". "object_chain_region" + * is treated as a shadow of "target_region" for the purpose of looking up + * mappings. Since the "target_region" preserves all the mappings of the + * older "object_chain_region", we won't duplicate all the mappings info and + * we'll just lookup the next region in the "object_chain" if we can't find + * what we're looking for in the "target_region". See lsf_hash_lookup(). + */ +kern_return_t +shared_region_object_chain_attach( + shared_region_mapping_t target_region, + shared_region_mapping_t object_chain_region) +{ + shared_region_object_chain_t object_ele; + + SHARED_REGION_DEBUG(("shared_region_object_chain_attach(" + "target_region=%p, object_chain_region=%p\n", + target_region, object_chain_region)); + assert(target_region->ref_count > 0); + assert(object_chain_region->ref_count > 0); + if(target_region->object_chain) + return KERN_FAILURE; + object_ele = (shared_region_object_chain_t) + kalloc(sizeof (struct shared_region_object_chain)); + shared_region_mapping_lock(object_chain_region); + target_region->object_chain = object_ele; + object_ele->object_chain_region = object_chain_region; + object_ele->next = object_chain_region->object_chain; + object_ele->depth = object_chain_region->depth; + object_chain_region->depth++; + target_region->alternate_next = object_chain_region->alternate_next; + shared_region_mapping_unlock(object_chain_region); + return KERN_SUCCESS; +} + +/* LP64todo - need 64-bit safe version */ +kern_return_t +shared_region_mapping_create( + ipc_port_t text_region, + vm_size_t text_size, + ipc_port_t data_region, + vm_size_t data_size, + vm_offset_t region_mappings, + vm_offset_t client_base, + shared_region_mapping_t *shared_region, + vm_offset_t alt_base, + vm_offset_t alt_next) +{ + SHARED_REGION_DEBUG(("shared_region_mapping_create()\n")); + *shared_region = (shared_region_mapping_t) + kalloc(sizeof (struct shared_region_mapping)); + if(*shared_region == NULL) { + SHARED_REGION_DEBUG(("shared_region_mapping_create: " + "failure\n")); + return KERN_FAILURE; + } + shared_region_mapping_lock_init((*shared_region)); + (*shared_region)->text_region = text_region; + (*shared_region)->text_size = text_size; + (*shared_region)->fs_base = ENV_DEFAULT_ROOT; + (*shared_region)->system = cpu_type(); + (*shared_region)->data_region = data_region; + (*shared_region)->data_size = data_size; + (*shared_region)->region_mappings = region_mappings; + (*shared_region)->client_base = client_base; + (*shared_region)->ref_count = 1; + (*shared_region)->next = NULL; + (*shared_region)->object_chain = NULL; + (*shared_region)->self = *shared_region; + (*shared_region)->flags = 0; + (*shared_region)->depth = 0; + (*shared_region)->default_env_list = NULL; + (*shared_region)->alternate_base = alt_base; + (*shared_region)->alternate_next = alt_next; + SHARED_REGION_DEBUG(("shared_region_mapping_create -> %p\n", + *shared_region)); + return KERN_SUCCESS; +} + +/* LP64todo - need 64-bit safe version */ +kern_return_t +shared_region_mapping_info( + shared_region_mapping_t shared_region, + ipc_port_t *text_region, + vm_size_t *text_size, + ipc_port_t *data_region, + vm_size_t *data_size, + vm_offset_t *region_mappings, + vm_offset_t *client_base, + vm_offset_t *alt_base, + vm_offset_t *alt_next, + unsigned int *fs_base, + unsigned int *system, + int *flags, + shared_region_mapping_t *next) +{ + shared_region_mapping_lock(shared_region); + + SHARED_REGION_DEBUG(("shared_region_mapping_info(shared_region=%p)\n", + shared_region)); + assert(shared_region->ref_count > 0); + *text_region = shared_region->text_region; + *text_size = shared_region->text_size; + *data_region = shared_region->data_region; + *data_size = shared_region->data_size; + *region_mappings = shared_region->region_mappings; + *client_base = shared_region->client_base; + *alt_base = shared_region->alternate_base; + *alt_next = shared_region->alternate_next; + *flags = shared_region->flags; + *fs_base = shared_region->fs_base; + *system = shared_region->system; + *next = shared_region->next; + + shared_region_mapping_unlock(shared_region); +} + +/* LP64todo - need 64-bit safe version */ +kern_return_t +shared_region_mapping_set_alt_next( + shared_region_mapping_t shared_region, + vm_offset_t alt_next) +{ + SHARED_REGION_DEBUG(("shared_region_mapping_set_alt_next" + "(shared_region=%p, alt_next=0%x)\n", + shared_region, alt_next)); + assert(shared_region->ref_count > 0); + shared_region->alternate_next = alt_next; + return KERN_SUCCESS; +} + +kern_return_t +shared_region_mapping_ref( + shared_region_mapping_t shared_region) +{ + SHARED_REGION_DEBUG(("shared_region_mapping_ref(shared_region=%p): " + "ref_count=%d + 1\n", + shared_region, + shared_region ? shared_region->ref_count : 0)); + if(shared_region == NULL) + return KERN_SUCCESS; + assert(shared_region->ref_count > 0); + hw_atomic_add(&shared_region->ref_count, 1); + return KERN_SUCCESS; +} + +static kern_return_t +shared_region_mapping_dealloc_lock( + shared_region_mapping_t shared_region, + int need_sfh_lock, + int need_drl_lock) +{ + struct shared_region_task_mappings sm_info; + shared_region_mapping_t next = NULL; + int ref_count; + + SHARED_REGION_DEBUG(("shared_region_mapping_dealloc_lock" + "(shared_region=%p,%d,%d) ref_count=%d\n", + shared_region, need_sfh_lock, need_drl_lock, + shared_region ? shared_region->ref_count : 0)); + while (shared_region) { + SHARED_REGION_DEBUG(("shared_region_mapping_dealloc_lock(%p): " + "ref_count=%d\n", + shared_region, shared_region->ref_count)); + assert(shared_region->ref_count > 0); + if ((ref_count = + hw_atomic_sub(&shared_region->ref_count, 1)) == 0) { + shared_region_mapping_lock(shared_region); + + sm_info.text_region = shared_region->text_region; + sm_info.text_size = shared_region->text_size; + sm_info.data_region = shared_region->data_region; + sm_info.data_size = shared_region->data_size; + sm_info.region_mappings = shared_region->region_mappings; + sm_info.client_base = shared_region->client_base; + sm_info.alternate_base = shared_region->alternate_base; + sm_info.alternate_next = shared_region->alternate_next; + sm_info.flags = shared_region->flags; + sm_info.self = (vm_offset_t)shared_region; + + if(shared_region->region_mappings) { + lsf_remove_regions_mappings_lock(shared_region, &sm_info, need_sfh_lock); + } + if(((vm_named_entry_t) + (shared_region->text_region->ip_kobject)) + ->backing.map->pmap) { + pmap_remove(((vm_named_entry_t) + (shared_region->text_region->ip_kobject)) + ->backing.map->pmap, + sm_info.client_base, + sm_info.client_base + sm_info.text_size); + } + ipc_port_release_send(shared_region->text_region); + if(shared_region->data_region) + ipc_port_release_send(shared_region->data_region); + if (shared_region->object_chain) { + next = shared_region->object_chain->object_chain_region; + kfree(shared_region->object_chain, + sizeof (struct shared_region_object_chain)); + } else { + next = NULL; + } + shared_region_mapping_unlock(shared_region); + SHARED_REGION_DEBUG( + ("shared_region_mapping_dealloc_lock(%p): " + "freeing\n", + shared_region)); + bzero((void *)shared_region, + sizeof (*shared_region)); /* FBDP debug */ + kfree(shared_region, + sizeof (struct shared_region_mapping)); + shared_region = next; + } else { + /* Stale indicates that a system region is no */ + /* longer in the default environment list. */ + if((ref_count == 1) && + (shared_region->flags & SHARED_REGION_SYSTEM) + && !(shared_region->flags & SHARED_REGION_STALE)) { + SHARED_REGION_DEBUG( + ("shared_region_mapping_dealloc_lock" + "(%p): removing stale\n", + shared_region)); + remove_default_shared_region_lock(shared_region,need_sfh_lock, need_drl_lock); + } + break; + } + } + SHARED_REGION_DEBUG(("shared_region_mapping_dealloc_lock(%p): done\n", + shared_region)); + return KERN_SUCCESS; +} + +/* + * Stub function; always indicates that the lock needs to be taken in the + * call to lsf_remove_regions_mappings_lock(). + */ +kern_return_t +shared_region_mapping_dealloc( + shared_region_mapping_t shared_region) +{ + SHARED_REGION_DEBUG(("shared_region_mapping_dealloc" + "(shared_region=%p)\n", + shared_region)); + if (shared_region) { + assert(shared_region->ref_count > 0); + } + return shared_region_mapping_dealloc_lock(shared_region, 1, 1); +} + +static +kern_return_t +shared_region_object_create( + vm_size_t size, + ipc_port_t *object_handle) +{ + vm_named_entry_t user_entry; + ipc_port_t user_handle; + + ipc_port_t previous; + vm_map_t new_map; + + user_entry = (vm_named_entry_t) + kalloc(sizeof (struct vm_named_entry)); + if(user_entry == NULL) { + return KERN_FAILURE; + } + named_entry_lock_init(user_entry); + user_handle = ipc_port_alloc_kernel(); + + + ip_lock(user_handle); + + /* make a sonce right */ + user_handle->ip_sorights++; + ip_reference(user_handle); + + user_handle->ip_destination = IP_NULL; + user_handle->ip_receiver_name = MACH_PORT_NULL; + user_handle->ip_receiver = ipc_space_kernel; + + /* make a send right */ + user_handle->ip_mscount++; + user_handle->ip_srights++; + ip_reference(user_handle); + + ipc_port_nsrequest(user_handle, 1, user_handle, &previous); + /* nsrequest unlocks user_handle */ + + /* Create a named object based on a submap of specified size */ + + new_map = vm_map_create(pmap_create(0), 0, size, TRUE); + user_entry->backing.map = new_map; + user_entry->internal = TRUE; + user_entry->is_sub_map = TRUE; + user_entry->is_pager = FALSE; + user_entry->offset = 0; + user_entry->protection = VM_PROT_ALL; + user_entry->size = size; + user_entry->ref_count = 1; + + ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry, + IKOT_NAMED_ENTRY); + *object_handle = user_handle; + return KERN_SUCCESS; +} /* called for the non-default, private branch shared region support */ /* system default fields for fs_base and system supported are not */ @@ -136,25 +572,39 @@ shared_file_create_system_region( vm_offset_t mapping_array; kern_return_t kret; + SHARED_REGION_DEBUG(("shared_file_create_system_region()\n")); + text_size = 0x10000000; data_size = 0x10000000; kret = shared_file_init(&text_handle, text_size, &data_handle, data_size, &mapping_array); - if(kret) + if(kret) { + SHARED_REGION_DEBUG(("shared_file_create_system_region: " + "shared_file_init failed kret=0x%x\n", + kret)); return kret; + } kret = shared_region_mapping_create(text_handle, text_size, data_handle, data_size, mapping_array, GLOBAL_SHARED_TEXT_SEGMENT, shared_region, SHARED_ALTERNATE_LOAD_BASE, SHARED_ALTERNATE_LOAD_BASE); - if(kret) + if(kret) { + SHARED_REGION_DEBUG(("shared_file_create_system_region: " + "shared_region_mapping_create failed " + "kret=0x%x\n", + kret)); return kret; + } (*shared_region)->flags = 0; if(com_mapping_resource) { shared_region_mapping_ref(com_mapping_resource); (*shared_region)->next = com_mapping_resource; } + SHARED_REGION_DEBUG(("shared_file_create_system_region() " + "-> shared_region=%p\n", + *shared_region)); return KERN_SUCCESS; } @@ -173,6 +623,9 @@ update_default_shared_region( unsigned int fs_base; unsigned int system; + SHARED_REGION_DEBUG(("update_default_shared_region(new=%p)\n", + new_system_region)); + assert(new_system_region->ref_count > 0); fs_base = new_system_region->fs_base; system = new_system_region->system; new_system_region->flags |= SHARED_REGION_SYSTEM; @@ -184,22 +637,37 @@ update_default_shared_region( (old_system_region->system == system)) { new_system_region->default_env_list = old_system_region->default_env_list; + old_system_region->default_env_list = NULL; default_environment_shared_regions = new_system_region; - default_regions_list_unlock(); old_system_region->flags |= SHARED_REGION_STALE; + default_regions_list_unlock(); + SHARED_REGION_DEBUG(("update_default_shared_region(%p): " + "old=%p stale 1\n", + new_system_region, old_system_region)); + assert(old_system_region->ref_count > 0); return old_system_region; } if (old_system_region) { while(old_system_region->default_env_list != NULL) { if((old_system_region->default_env_list->fs_base == fs_base) && (old_system_region->default_env_list->system == system)) { + shared_region_mapping_t tmp_system_region; + + tmp_system_region = + old_system_region->default_env_list; new_system_region->default_env_list = - old_system_region->default_env_list - ->default_env_list; + tmp_system_region->default_env_list; + tmp_system_region->default_env_list = NULL; old_system_region->default_env_list = new_system_region; - default_regions_list_unlock(); + old_system_region = tmp_system_region; old_system_region->flags |= SHARED_REGION_STALE; + default_regions_list_unlock(); + SHARED_REGION_DEBUG(("update_default_shared_region(%p)" + ": old=%p stale 2\n", + new_system_region, + old_system_region)); + assert(old_system_region->ref_count > 0); return old_system_region; } old_system_region = old_system_region->default_env_list; @@ -208,10 +676,18 @@ update_default_shared_region( /* If we get here, we are at the end of the system list and we */ /* did not find a pre-existing entry */ if(old_system_region) { + SHARED_REGION_DEBUG(("update_default_system_region(%p): " + "adding after old=%p\n", + new_system_region, old_system_region)); + assert(old_system_region->ref_count > 0); old_system_region->default_env_list = new_system_region; } else { + SHARED_REGION_DEBUG(("update_default_system_region(%p): " + "new default\n", + new_system_region)); default_environment_shared_regions = new_system_region; } + assert(new_system_region->ref_count > 0); default_regions_list_unlock(); return NULL; } @@ -230,7 +706,18 @@ lookup_default_shared_region( default_regions_list_lock(); system_region = default_environment_shared_regions; + SHARED_REGION_DEBUG(("lookup_default_shared_region" + "(base=0x%x, system=0x%x)\n", + fs_base, system)); while(system_region != NULL) { + SHARED_REGION_DEBUG(("lookup_default_shared_region(0x%x, 0x%x)" + ": system_region=%p base=0x%x system=0x%x" + " ref_count=%d\n", + fs_base, system, system_region, + system_region->fs_base, + system_region->system, + system_region->ref_count)); + assert(system_region->ref_count > 0); if((system_region->fs_base == fs_base) && (system_region->system == system)) { break; @@ -240,6 +727,8 @@ lookup_default_shared_region( if(system_region) shared_region_mapping_ref(system_region); default_regions_list_unlock(); + SHARED_REGION_DEBUG(("lookup_default_system_region(0x%x,0x%x) -> %p\n", + system_region)); return system_region; } @@ -251,45 +740,82 @@ lookup_default_shared_region( __private_extern__ void remove_default_shared_region_lock( shared_region_mapping_t system_region, - int need_lock) + int need_sfh_lock, + int need_drl_lock) { shared_region_mapping_t old_system_region; - unsigned int fs_base; - unsigned int system; - default_regions_list_lock(); + SHARED_REGION_DEBUG(("remove_default_shared_region_lock" + "(system_region=%p, %d, %d)\n", + system_region, need_sfh_lock, need_drl_lock)); + if (need_drl_lock) { + default_regions_list_lock(); + } old_system_region = default_environment_shared_regions; if(old_system_region == NULL) { - default_regions_list_unlock(); + SHARED_REGION_DEBUG(("remove_default_shared_region_lock(%p)" + "-> default_env=NULL\n", + system_region)); + if (need_drl_lock) { + default_regions_list_unlock(); + } return; } + SHARED_REGION_DEBUG(("remove_default_shared_region_lock(%p): " + "default_env=%p\n", + system_region, old_system_region)); + assert(old_system_region->ref_count > 0); if (old_system_region == system_region) { default_environment_shared_regions = old_system_region->default_env_list; + old_system_region->default_env_list = NULL; old_system_region->flags |= SHARED_REGION_STALE; + SHARED_REGION_DEBUG(("remove_default_shared_region_lock(%p): " + "old=%p ref_count=%d STALE\n", + system_region, old_system_region, + old_system_region->ref_count)); shared_region_mapping_dealloc_lock(old_system_region, - need_lock); - default_regions_list_unlock(); + need_sfh_lock, + 0); + if (need_drl_lock) { + default_regions_list_unlock(); + } return; } while(old_system_region->default_env_list != NULL) { + SHARED_REGION_DEBUG(("remove_default_shared_region_lock(%p): " + "old=%p->default_env=%p\n", + system_region, old_system_region, + old_system_region->default_env_list)); + assert(old_system_region->default_env_list->ref_count > 0); if(old_system_region->default_env_list == system_region) { shared_region_mapping_t dead_region; dead_region = old_system_region->default_env_list; old_system_region->default_env_list = - old_system_region->default_env_list->default_env_list; + dead_region->default_env_list; + dead_region->default_env_list = NULL; dead_region->flags |= SHARED_REGION_STALE; + SHARED_REGION_DEBUG( + ("remove_default_shared_region_lock(%p): " + "dead=%p ref_count=%d stale\n", + system_region, dead_region, + dead_region->ref_count)); shared_region_mapping_dealloc_lock(dead_region, - need_lock); - default_regions_list_unlock(); + need_sfh_lock, + 0); + if (need_drl_lock) { + default_regions_list_unlock(); + } return; } old_system_region = old_system_region->default_env_list; } - default_regions_list_unlock(); + if (need_drl_lock) { + default_regions_list_unlock(); + } } /* @@ -301,15 +827,23 @@ void remove_default_shared_region( shared_region_mapping_t system_region) { - remove_default_shared_region_lock(system_region, 1); + SHARED_REGION_DEBUG(("remove_default_shared_region(%p)\n", + system_region)); + if (system_region) { + assert(system_region->ref_count > 0); + } + remove_default_shared_region_lock(system_region, 1, 1); } void -remove_all_shared_regions() +remove_all_shared_regions(void) { shared_region_mapping_t system_region; shared_region_mapping_t next_system_region; + SHARED_REGION_DEBUG(("***** REMOVE_ALL_SHARED_REGIONS()\n")); + LSF_ALLOC_DEBUG(("***** REMOVE_ALL_SHARED_REGIONS()\n")); + LSF_DEBUG(("***** REMOVE_ALL_SHARED_REGIONS()\n")); default_regions_list_lock(); system_region = default_environment_shared_regions; @@ -320,12 +854,20 @@ remove_all_shared_regions() while(system_region != NULL) { next_system_region = system_region->default_env_list; + system_region->default_env_list = NULL; system_region->flags |= SHARED_REGION_STALE; - shared_region_mapping_dealloc(system_region); + SHARED_REGION_DEBUG(("remove_all_shared_regions(): " + "%p ref_count=%d stale\n", + system_region, system_region->ref_count)); + assert(system_region->ref_count > 0); + shared_region_mapping_dealloc_lock(system_region, 1, 0); system_region = next_system_region; } default_environment_shared_regions = NULL; default_regions_list_unlock(); + SHARED_REGION_DEBUG(("***** remove_all_shared_regions() done\n")); + LSF_ALLOC_DEBUG(("***** remove_all_shared_regions() done\n")); + LSF_DEBUG(("***** remove_all_shared_regions() done\n")); } /* shared_com_boot_time_init initializes the common page shared data and */ @@ -333,55 +875,86 @@ remove_all_shared_regions() /* and so its policies have to be handled differently by the code that */ /* manipulates the mapping of shared region environments. However, */ /* the shared region delivery system supports both */ -shared_com_boot_time_init() +void shared_com_boot_time_init(void); /* forward */ +void +shared_com_boot_time_init(void) { kern_return_t kret; vm_named_entry_t named_entry; - if(com_region_handle) { + SHARED_REGION_DEBUG(("shared_com_boot_time_init()\n")); + if(com_region_handle32) { + panic("shared_com_boot_time_init: " + "com_region_handle32 already set\n"); + } + if(com_region_handle64) { panic("shared_com_boot_time_init: " - "com_region_handle already set\n"); + "com_region_handle64 already set\n"); } - /* create com page region */ - if(kret = vm_region_object_create(kernel_map, + /* create com page regions, 1 each for 32 and 64-bit code */ + if((kret = shared_region_object_create( + com_region_size, + &com_region_handle32))) { + panic("shared_com_boot_time_init: " + "unable to create 32-bit comm page\n"); + return; + } + if((kret = shared_region_object_create( com_region_size, - &com_region_handle)) { + &com_region_handle64))) { panic("shared_com_boot_time_init: " - "unable to create comm page\n"); + "unable to create 64-bit comm page\n"); return; } + /* now set export the underlying region/map */ - named_entry = (vm_named_entry_t)com_region_handle->ip_kobject; - com_region_map = named_entry->backing.map; + named_entry = (vm_named_entry_t)com_region_handle32->ip_kobject; + com_region_map32 = named_entry->backing.map; + named_entry = (vm_named_entry_t)com_region_handle64->ip_kobject; + com_region_map64 = named_entry->backing.map; + /* wrap the com region in its own shared file mapping structure */ - shared_region_mapping_create(com_region_handle, + /* 64-bit todo: call "shared_region_mapping_create" on com_region_handle64 */ + kret = shared_region_mapping_create(com_region_handle32, com_region_size, NULL, 0, 0, - GLOBAL_COM_REGION_BASE, &com_mapping_resource, + _COMM_PAGE_BASE_ADDRESS, &com_mapping_resource, 0, 0); - + if (kret) { + panic("shared_region_mapping_create failed for commpage"); + } } +void shared_file_boot_time_init( unsigned int fs_base, unsigned int system) { - long shared_text_region_size; - long shared_data_region_size; + long text_region_size; + long data_region_size; shared_region_mapping_t new_system_region; shared_region_mapping_t old_default_env; - shared_text_region_size = 0x10000000; - shared_data_region_size = 0x10000000; + SHARED_REGION_DEBUG(("shared_file_boot_time_init" + "(base=0x%x,system=0x%x)\n", + fs_base, system)); + text_region_size = 0x10000000; + data_region_size = 0x10000000; shared_file_init(&shared_text_region_handle, - shared_text_region_size, &shared_data_region_handle, - shared_data_region_size, &shared_file_mapping_array); + text_region_size, + &shared_data_region_handle, + data_region_size, + &shared_file_mapping_array); shared_region_mapping_create(shared_text_region_handle, - shared_text_region_size, shared_data_region_handle, - shared_data_region_size, shared_file_mapping_array, - GLOBAL_SHARED_TEXT_SEGMENT, &new_system_region, - SHARED_ALTERNATE_LOAD_BASE, SHARED_ALTERNATE_LOAD_BASE); + text_region_size, + shared_data_region_handle, + data_region_size, + shared_file_mapping_array, + GLOBAL_SHARED_TEXT_SEGMENT, + &new_system_region, + SHARED_ALTERNATE_LOAD_BASE, + SHARED_ALTERNATE_LOAD_BASE); new_system_region->fs_base = fs_base; new_system_region->system = system; @@ -401,6 +974,8 @@ shared_file_boot_time_init( shared_region_mapping_ref(com_mapping_resource); new_system_region->next = com_mapping_resource; vm_set_shared_region(current_task(), new_system_region); + SHARED_REGION_DEBUG(("shared_file_boot_time_init(0x%x,0x%x) done\n", + fs_base, system)); } @@ -413,18 +988,16 @@ shared_file_boot_time_init( static kern_return_t shared_file_init( - ipc_port_t *shared_text_region_handle, + ipc_port_t *text_region_handle, vm_size_t text_region_size, - ipc_port_t *shared_data_region_handle, + ipc_port_t *data_region_handle, vm_size_t data_region_size, - vm_offset_t *mapping_array) + vm_offset_t *file_mapping_array) { - vm_offset_t aligned_address; shared_file_info_t *sf_head; vm_offset_t table_mapping_address; int data_table_size; int hash_size; - int i; kern_return_t kret; vm_object_t buf_object; @@ -433,17 +1006,19 @@ shared_file_init( vm_offset_t b; vm_page_t p; + SHARED_REGION_DEBUG(("shared_file_init()\n")); /* create text and data maps/regions */ - if(kret = vm_region_object_create(kernel_map, - text_region_size, - shared_text_region_handle)) { - + kret = shared_region_object_create( + text_region_size, + text_region_handle); + if (kret) { return kret; } - if(kret = vm_region_object_create(kernel_map, - data_region_size, - shared_data_region_handle)) { - ipc_port_release_send(*shared_text_region_handle); + kret = shared_region_object_create( + data_region_size, + data_region_handle); + if (kret) { + ipc_port_release_send(*text_region_handle); return kret; } @@ -452,20 +1027,23 @@ shared_file_init( table_mapping_address = data_region_size - data_table_size; if(shared_file_mapping_array == 0) { + vm_map_address_t map_addr; buf_object = vm_object_allocate(data_table_size); - if(vm_map_find_space(kernel_map, &shared_file_mapping_array, - data_table_size, 0, &entry) != KERN_SUCCESS) { + if(vm_map_find_space(kernel_map, &map_addr, + data_table_size, 0, &entry) + != KERN_SUCCESS) { panic("shared_file_init: no space"); } - *mapping_array = shared_file_mapping_array; + shared_file_mapping_array = CAST_DOWN(vm_offset_t, map_addr); + *file_mapping_array = shared_file_mapping_array; vm_map_unlock(kernel_map); entry->object.vm_object = buf_object; entry->offset = 0; - for (b = *mapping_array, alloced = 0; + for (b = *file_mapping_array, alloced = 0; alloced < (hash_size + - round_page_32(sizeof(struct sf_mapping))); + round_page(sizeof(struct sf_mapping))); alloced += PAGE_SIZE, b += PAGE_SIZE) { vm_object_lock(buf_object); p = vm_page_alloc(buf_object, alloced); @@ -483,22 +1061,24 @@ shared_file_init( /* initialize loaded file array */ - sf_head = (shared_file_info_t *)*mapping_array; + sf_head = (shared_file_info_t *)*file_mapping_array; sf_head->hash = (queue_head_t *) - (((int)*mapping_array) + + (((int)*file_mapping_array) + sizeof(struct shared_file_info)); sf_head->hash_size = hash_size/sizeof(queue_head_t); - mutex_init(&(sf_head->lock), (ETAP_VM_MAP)); + mutex_init(&(sf_head->lock), 0); sf_head->hash_init = FALSE; mach_make_memory_entry(kernel_map, &data_table_size, - *mapping_array, VM_PROT_READ, &sfma_handle, + *file_mapping_array, VM_PROT_READ, &sfma_handle, NULL); - if (vm_map_wire(kernel_map, *mapping_array, - *mapping_array + - (hash_size + round_page_32(sizeof(struct sf_mapping))), + if (vm_map_wire(kernel_map, + vm_map_trunc_page(*file_mapping_array), + vm_map_round_page(*file_mapping_array + + hash_size + + round_page(sizeof(struct sf_mapping))), VM_PROT_DEFAULT, FALSE) != KERN_SUCCESS) { panic("shared_file_init: No memory for data table"); } @@ -514,21 +1094,77 @@ shared_file_init( zone_change(lsf_zone, Z_FOREIGN, TRUE); /* initialize the global default environment lock */ - mutex_init(&default_regions_list_lock_data, ETAP_NO_TRACE); + mutex_init(&default_regions_list_lock_data, 0); } else { - *mapping_array = shared_file_mapping_array; + *file_mapping_array = shared_file_mapping_array; } - vm_map(((vm_named_entry_t) - (*shared_data_region_handle)->ip_kobject)->backing.map, - &table_mapping_address, - data_table_size, 0, SHARED_LIB_ALIAS, - sfma_handle, 0, FALSE, - VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE); + kret = vm_map(((vm_named_entry_t) + (*data_region_handle)->ip_kobject)->backing.map, + &table_mapping_address, + data_table_size, 0, + SHARED_LIB_ALIAS | VM_FLAGS_FIXED, + sfma_handle, 0, FALSE, + VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE); + SHARED_REGION_DEBUG(("shared_file_init() done\n")); + return kret; } +static kern_return_t +shared_file_header_init( + shared_file_info_t *shared_file_header) +{ + vm_size_t hash_table_size; + vm_size_t hash_table_offset; + int i; + /* wire hash entry pool only as needed, since we are the only */ + /* users, we take a few liberties with the population of our */ + /* zone. */ + static int allocable_hash_pages; + static vm_offset_t hash_cram_address; + + + hash_table_size = shared_file_header->hash_size + * sizeof (struct queue_entry); + hash_table_offset = hash_table_size + + round_page(sizeof (struct sf_mapping)); + for (i = 0; i < shared_file_header->hash_size; i++) + queue_init(&shared_file_header->hash[i]); + + allocable_hash_pages = (((hash_table_size << 5) - hash_table_offset) + / PAGE_SIZE); + hash_cram_address = ((vm_offset_t) shared_file_header) + + hash_table_offset; + shared_file_available_hash_ele = 0; + + shared_file_header->hash_init = TRUE; + + if ((shared_file_available_hash_ele < 20) && (allocable_hash_pages)) { + int cram_pages, cram_size; + + cram_pages = allocable_hash_pages > 3 ? + 3 : allocable_hash_pages; + cram_size = cram_pages * PAGE_SIZE; + if (vm_map_wire(kernel_map, hash_cram_address, + hash_cram_address + cram_size, + VM_PROT_DEFAULT, FALSE) != KERN_SUCCESS) { + printf("shared_file_header_init: " + "No memory for data table\n"); + return KERN_NO_SPACE; + } + allocable_hash_pages -= cram_pages; + zcram(lsf_zone, (void *) hash_cram_address, cram_size); + shared_file_available_hash_ele + += cram_size/sizeof(struct load_file_ele); + hash_cram_address += cram_size; + } + + return KERN_SUCCESS; +} + + /* A call made from user space, copyin_shared_file requires the user to */ /* provide the address and size of a mapped file, the full path name of */ /* that file and a list of offsets to be mapped into shared memory. */ @@ -556,12 +1192,7 @@ copyin_shared_file( int i; kern_return_t ret; - /* wire hash entry pool only as needed, since we are the only */ - /* users, we take a few liberties with the population of our */ - /* zone. */ - static int allocable_hash_pages; - static vm_offset_t hash_cram_address; - + SHARED_REGION_DEBUG(("copyin_shared_file()\n")); shared_file_header = (shared_file_info_t *)sm_info->region_mappings; @@ -572,50 +1203,20 @@ copyin_shared_file( /* mappings based on the file object */ if(shared_file_header->hash_init == FALSE) { - vm_size_t hash_table_size; - vm_size_t hash_table_offset; - - hash_table_size = (shared_file_header->hash_size) - * sizeof(struct queue_entry); - hash_table_offset = hash_table_size + - round_page_32(sizeof(struct sf_mapping)); - for (i = 0; i < shared_file_header->hash_size; i++) - queue_init(&shared_file_header->hash[i]); - - allocable_hash_pages = - ((hash_table_size<<5) - hash_table_offset)/PAGE_SIZE; - hash_cram_address = - sm_info->region_mappings + hash_table_offset; - shared_file_available_hash_ele = 0; - - shared_file_header->hash_init = TRUE; - } - - if ((shared_file_available_hash_ele < 20) && (allocable_hash_pages)) { - int cram_size; - - cram_size = allocable_hash_pages > 3 ? - 3 : allocable_hash_pages; - allocable_hash_pages -= cram_size; - cram_size = cram_size * PAGE_SIZE; - if (vm_map_wire(kernel_map, hash_cram_address, - hash_cram_address+cram_size, - VM_PROT_DEFAULT, FALSE) != KERN_SUCCESS) { - panic("shared_file_init: No memory for data table"); + ret = shared_file_header_init(shared_file_header); + if (ret != KERN_SUCCESS) { + mutex_unlock(&shared_file_header->lock); + return ret; } - zcram(lsf_zone, hash_cram_address, cram_size); - shared_file_available_hash_ele - += cram_size/sizeof(struct load_file_ele); - hash_cram_address += cram_size; } - /* Find the entry in the map associated with the current mapping */ /* of the file object */ file_object = memory_object_control_to_vm_object(file_control); if(vm_map_lookup_entry(current_map(), mapped_file, &entry)) { vm_object_t mapped_object; - if(entry->is_sub_map) { + if(entry->is_sub_map || + entry->object.vm_object == VM_OBJECT_NULL) { mutex_unlock(&shared_file_header->lock); return KERN_INVALID_ADDRESS; } @@ -640,9 +1241,12 @@ copyin_shared_file( alternate = (*flags & ALTERNATE_LOAD_SITE) ? TRUE : FALSE; - if (file_entry = lsf_hash_lookup(shared_file_header->hash, - (void *) file_object, mappings[0].file_offset, shared_file_header->hash_size, - alternate, sm_info)) { + file_entry = lsf_hash_lookup(shared_file_header->hash, + (void *) file_object, + mappings[0].file_offset, + shared_file_header->hash_size, + !alternate, alternate, sm_info); + if (file_entry) { /* File is loaded, check the load manifest for exact match */ /* we simplify by requiring that the elements be the same */ /* size and in the same order rather than checking for */ @@ -700,21 +1304,171 @@ copyin_shared_file( system_region = lookup_default_shared_region( regions->fs_base, regions->system); if(system_region == regions) { - shared_region_mapping_t new_system_shared_regions; + shared_region_mapping_t new_system_shared_region; shared_file_boot_time_init( regions->fs_base, regions->system); /* current task must stay with its current */ /* regions, drop count on system_shared_region */ /* and put back our original set */ vm_get_shared_region(current_task(), - &new_system_shared_regions); + &new_system_shared_region); shared_region_mapping_dealloc_lock( - new_system_shared_regions, 0); + new_system_shared_region, 0, 1); vm_set_shared_region(current_task(), regions); + } else if(system_region != NULL) { + shared_region_mapping_dealloc_lock( + system_region, 0, 1); } - if(system_region != NULL) { + } + mutex_unlock(&shared_file_header->lock); + return ret; + } +} + +/* + * map_shared_file: + * + * Attempt to map a split library into the shared region. Check if the mappings + * are already in place. + */ +kern_return_t +map_shared_file( + int map_cnt, + struct shared_file_mapping_np *mappings, + memory_object_control_t file_control, + memory_object_size_t file_size, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t base_offset, + mach_vm_offset_t *slide_p) +{ + vm_object_t file_object; + shared_file_info_t *shared_file_header; + load_struct_t *file_entry; + loaded_mapping_t *file_mapping; + int i; + kern_return_t ret; + mach_vm_offset_t slide; + + SHARED_REGION_DEBUG(("map_shared_file()\n")); + + shared_file_header = (shared_file_info_t *)sm_info->region_mappings; + + mutex_lock(&shared_file_header->lock); + + /* If this is the first call to this routine, take the opportunity */ + /* to initialize the hash table which will be used to look-up */ + /* mappings based on the file object */ + + if(shared_file_header->hash_init == FALSE) { + ret = shared_file_header_init(shared_file_header); + if (ret != KERN_SUCCESS) { + mutex_unlock(&shared_file_header->lock); + return KERN_NO_SPACE; + } + } + + + /* Find the entry in the map associated with the current mapping */ + /* of the file object */ + file_object = memory_object_control_to_vm_object(file_control); + + file_entry = lsf_hash_lookup(shared_file_header->hash, + (void *) file_object, + mappings[0].sfm_file_offset, + shared_file_header->hash_size, + TRUE, TRUE, sm_info); + if (file_entry) { + /* File is loaded, check the load manifest for exact match */ + /* we simplify by requiring that the elements be the same */ + /* size and in the same order rather than checking for */ + /* semantic equivalence. */ + + i = 0; + file_mapping = file_entry->mappings; + while(file_mapping != NULL) { + if(i>=map_cnt) { + mutex_unlock(&shared_file_header->lock); + return KERN_INVALID_ARGUMENT; + } + if(((mappings[i].sfm_address) + & SHARED_DATA_REGION_MASK) != + file_mapping->mapping_offset || + mappings[i].sfm_size != file_mapping->size || + mappings[i].sfm_file_offset != file_mapping->file_offset || + mappings[i].sfm_init_prot != file_mapping->protection) { + break; + } + file_mapping = file_mapping->next; + i++; + } + if(i!=map_cnt) { + mutex_unlock(&shared_file_header->lock); + return KERN_INVALID_ARGUMENT; + } + + slide = file_entry->base_address - base_offset; + if (slide_p != NULL) { + /* + * File already mapped but at different address, + * and the caller is OK with the sliding. + */ + *slide_p = slide; + ret = KERN_SUCCESS; + } else { + /* + * The caller doesn't want any sliding. The file needs + * to be mapped at the requested address or not mapped. + */ + if (slide != 0) { + /* + * The file is already mapped but at a different + * address. + * We fail. + * XXX should we attempt to load at + * requested address too ? + */ + ret = KERN_FAILURE; + } else { + /* + * The file is already mapped at the correct + * address. + * We're done ! + */ + ret = KERN_SUCCESS; + } + } + mutex_unlock(&shared_file_header->lock); + return ret; + } else { + /* File is not loaded, lets attempt to load it */ + ret = lsf_map(mappings, map_cnt, + (void *)file_control, + file_size, + sm_info, + base_offset, + slide_p); + if(ret == KERN_NO_SPACE) { + shared_region_mapping_t regions; + shared_region_mapping_t system_region; + regions = (shared_region_mapping_t)sm_info->self; + regions->flags |= SHARED_REGION_FULL; + system_region = lookup_default_shared_region( + regions->fs_base, regions->system); + if (system_region == regions) { + shared_region_mapping_t new_system_shared_region; + shared_file_boot_time_init( + regions->fs_base, regions->system); + /* current task must stay with its current */ + /* regions, drop count on system_shared_region */ + /* and put back our original set */ + vm_get_shared_region(current_task(), + &new_system_shared_region); + shared_region_mapping_dealloc_lock( + new_system_shared_region, 0, 1); + vm_set_shared_region(current_task(), regions); + } else if (system_region != NULL) { shared_region_mapping_dealloc_lock( - system_region, 0); + system_region, 0, 1); } } mutex_unlock(&shared_file_header->lock); @@ -722,6 +1476,175 @@ copyin_shared_file( } } +/* + * shared_region_cleanup: + * + * Deallocates all the mappings in the shared region, except those explicitly + * specified in the "ranges" set of address ranges. + */ +kern_return_t +shared_region_cleanup( + unsigned int range_count, + struct shared_region_range_np *ranges, + shared_region_task_mappings_t sm_info) +{ + kern_return_t kr; + ipc_port_t region_handle; + vm_named_entry_t region_named_entry; + vm_map_t text_submap, data_submap, submap, next_submap; + unsigned int i_range; + vm_map_offset_t range_start, range_end; + vm_map_offset_t submap_base, submap_end, submap_offset; + vm_map_size_t delete_size; + + struct shared_region_range_np tmp_range; + unsigned int sort_index, sorted_index; + vm_map_offset_t sort_min_address; + unsigned int sort_min_index; + + /* + * Since we want to deallocate the holes between the "ranges", + * sort the array by increasing addresses. + */ + for (sorted_index = 0; + sorted_index < range_count; + sorted_index++) { + + /* first remaining entry is our new starting point */ + sort_min_index = sorted_index; + sort_min_address = ranges[sort_min_index].srr_address; + + /* find the lowest mapping_offset in the remaining entries */ + for (sort_index = sorted_index + 1; + sort_index < range_count; + sort_index++) { + if (ranges[sort_index].srr_address < sort_min_address) { + /* lowest address so far... */ + sort_min_index = sort_index; + sort_min_address = + ranges[sort_min_index].srr_address; + } + } + + if (sort_min_index != sorted_index) { + /* swap entries */ + tmp_range = ranges[sort_min_index]; + ranges[sort_min_index] = ranges[sorted_index]; + ranges[sorted_index] = tmp_range; + } + } + + region_handle = (ipc_port_t) sm_info->text_region; + region_named_entry = (vm_named_entry_t) region_handle->ip_kobject; + text_submap = region_named_entry->backing.map; + + region_handle = (ipc_port_t) sm_info->data_region; + region_named_entry = (vm_named_entry_t) region_handle->ip_kobject; + data_submap = region_named_entry->backing.map; + + submap = text_submap; + next_submap = submap; + submap_base = sm_info->client_base; + submap_offset = 0; + submap_end = submap_base + sm_info->text_size; + for (i_range = 0; + i_range < range_count; + i_range++) { + + /* get the next range of addresses to keep */ + range_start = ranges[i_range].srr_address; + range_end = range_start + ranges[i_range].srr_size; + /* align them to page boundaries */ + range_start = vm_map_trunc_page(range_start); + range_end = vm_map_round_page(range_end); + + /* make sure we don't go beyond the submap's boundaries */ + if (range_start < submap_base) { + range_start = submap_base; + } else if (range_start >= submap_end) { + range_start = submap_end; + } + if (range_end < submap_base) { + range_end = submap_base; + } else if (range_end >= submap_end) { + range_end = submap_end; + } + + if (range_start > submap_base + submap_offset) { + /* + * Deallocate everything between the last offset in the + * submap and the start of this range. + */ + delete_size = range_start - + (submap_base + submap_offset); + (void) vm_deallocate(submap, + submap_offset, + delete_size); + } else { + delete_size = 0; + } + + /* skip to the end of the range */ + submap_offset += delete_size + (range_end - range_start); + + if (submap_base + submap_offset >= submap_end) { + /* get to next submap */ + + if (submap == data_submap) { + /* no other submap after data: done ! */ + break; + } + + /* get original range again */ + range_start = ranges[i_range].srr_address; + range_end = range_start + ranges[i_range].srr_size; + range_start = vm_map_trunc_page(range_start); + range_end = vm_map_round_page(range_end); + + if (range_end > submap_end) { + /* + * This last range overlaps with the next + * submap. We need to process it again + * after switching submaps. Otherwise, we'll + * just continue with the next range. + */ + i_range--; + } + + if (submap == text_submap) { + /* + * Switch to the data submap. + */ + submap = data_submap; + submap_offset = 0; + submap_base = sm_info->client_base + + sm_info->text_size; + submap_end = submap_base + sm_info->data_size; + } + } + } + + if (submap_base + submap_offset < submap_end) { + /* delete remainder of this submap, from "offset" to the end */ + (void) vm_deallocate(submap, + submap_offset, + submap_end - submap_base - submap_offset); + /* if nothing to keep in data submap, delete it all */ + if (submap == text_submap) { + submap = data_submap; + submap_offset = 0; + submap_base = sm_info->client_base + sm_info->text_size; + submap_end = submap_base + sm_info->data_size; + (void) vm_deallocate(data_submap, + 0, + submap_end - submap_base); + } + } + + kr = KERN_SUCCESS; + return kr; +} + /* A hash lookup function for the list of loaded files in */ /* shared_memory_server space. */ @@ -731,6 +1654,7 @@ lsf_hash_lookup( void *file_object, vm_offset_t recognizableOffset, int size, + boolean_t regular, boolean_t alternate, shared_region_task_mappings_t sm_info) { @@ -739,14 +1663,16 @@ lsf_hash_lookup( shared_region_mapping_t target_region; int depth; + LSF_DEBUG(("lsf_hash_lookup: table=%p, file=%p, offset=0x%x size=0x%x " + "reg=%d alt=%d sm_info=%p\n", + hash_table, file_object, recognizableOffset, size, + regular, alternate, sm_info)); + bucket = &(hash_table[load_file_hash((int)file_object, size)]); for (entry = (load_struct_t *)queue_first(bucket); !queue_end(bucket, &entry->links); entry = (load_struct_t *)queue_next(&entry->links)) { - if ((entry->file_object == (int) file_object) && - (entry->file_offset != recognizableOffset)) { - } if ((entry->file_object == (int)file_object) && (entry->file_offset == recognizableOffset)) { target_region = (shared_region_mapping_t)sm_info->self; @@ -755,14 +1681,27 @@ lsf_hash_lookup( if((!(sm_info->self)) || ((target_region == entry->regions_instance) && (target_region->depth >= entry->depth))) { - if(alternate) { - if (entry->base_address >= - sm_info->alternate_base) - return entry; - } else { - if (entry->base_address < - sm_info->alternate_base) - return entry; + if(alternate && + entry->base_address >= sm_info->alternate_base) { + LSF_DEBUG(("lsf_hash_lookup: " + "alt=%d found entry %p " + "(base=0x%x " + "alt_base=0x%x)\n", + alternate, entry, + entry->base_address, + sm_info->alternate_base)); + return entry; + } + if (regular && + entry->base_address < sm_info->alternate_base) { + LSF_DEBUG(("lsf_hash_lookup: " + "reg=%d found entry %p " + "(base=0x%x " + "alt_base=0x%x)\n", + regular, entry, + entry->base_address, + sm_info->alternate_base)); + return entry; } } if(target_region->object_chain) { @@ -776,6 +1715,10 @@ lsf_hash_lookup( } } + LSF_DEBUG(("lsf_hash_lookup: table=%p, file=%p, offset=0x%x size=0x%x " + "reg=%d alt=%d sm_info=%p NOT FOUND\n", + hash_table, file_object, recognizableOffset, size, + regular, alternate, sm_info)); return (load_struct_t *)0; } @@ -783,22 +1726,27 @@ __private_extern__ load_struct_t * lsf_remove_regions_mappings_lock( shared_region_mapping_t region, shared_region_task_mappings_t sm_info, - int need_lock) + int need_sfh_lock) { int i; register queue_t bucket; shared_file_info_t *shared_file_header; load_struct_t *entry; load_struct_t *next_entry; - load_struct_t *prev_entry; shared_file_header = (shared_file_info_t *)sm_info->region_mappings; - if (need_lock) + LSF_DEBUG(("lsf_remove_regions_mappings_lock(region=%p,sm_info=%p) " + "sfh=%p\n", + region, sm_info, shared_file_header)); + if (need_sfh_lock) mutex_lock(&shared_file_header->lock); if(shared_file_header->hash_init == FALSE) { - if (need_lock) + if (need_sfh_lock) mutex_unlock(&shared_file_header->lock); + LSF_DEBUG(("lsf_remove_regions_mappings_lock" + "(region=%p,sm_info=%p): not inited\n", + region, sm_info)); return NULL; } for(i = 0; ihash_size; i++) { @@ -807,14 +1755,27 @@ lsf_remove_regions_mappings_lock( !queue_end(bucket, &entry->links);) { next_entry = (load_struct_t *)queue_next(&entry->links); if(region == entry->regions_instance) { - lsf_unload((void *)entry->file_object, + LSF_DEBUG(("lsf_remove_regions_mapping_lock: " + "entry %p region %p: " + "unloading\n", + entry, region)); + lsf_unload((void *)entry->file_object, entry->base_address, sm_info); + } else { + LSF_DEBUG(("lsf_remove_regions_mapping_lock: " + "entry %p region %p target region %p: " + "not unloading\n", + entry, entry->regions_instance, region)); } + entry = next_entry; } } - if (need_lock) + if (need_sfh_lock) mutex_unlock(&shared_file_header->lock); + LSF_DEBUG(("lsf_removed_regions_mapping_lock done\n")); + + return NULL; /* XXX */ } /* @@ -842,7 +1803,9 @@ lsf_hash_delete( register queue_t bucket; shared_file_info_t *shared_file_header; load_struct_t *entry; - load_struct_t *prev_entry; + + LSF_DEBUG(("lsf_hash_delete(file=%p,base=0x%x,sm_info=%p)\n", + file_object, base_offset, sm_info)); shared_file_header = (shared_file_info_t *)sm_info->region_mappings; @@ -858,11 +1821,13 @@ lsf_hash_delete( (entry->base_address == base_offset)) { queue_remove(bucket, entry, load_struct_ptr_t, links); + LSF_DEBUG(("lsf_hash_delete: found it\n")); return entry; } } } + LSF_DEBUG(("lsf_hash_delete; not found\n")); return (load_struct_t *)0; } @@ -876,6 +1841,9 @@ lsf_hash_insert( { shared_file_info_t *shared_file_header; + LSF_DEBUG(("lsf_hash_insert(entry=%p,sm_info=%p): file=%p base=0x%x\n", + entry, sm_info, entry->file_object, entry->base_address)); + shared_file_header = (shared_file_info_t *)sm_info->region_mappings; queue_enter(&shared_file_header->hash [load_file_hash(entry->file_object, @@ -910,7 +1878,23 @@ lsf_load( vm_offset_t original_alt_load_next; vm_offset_t alternate_load_next; + LSF_DEBUG(("lsf_load" + "(size=0x%x,base=0x%x,cnt=%d,file=%p,flags=%d,sm_info=%p)" + "\n", + mapped_file_size, *base_address, map_cnt, file_object, + flags, sm_info)); entry = (load_struct_t *)zalloc(lsf_zone); + LSF_ALLOC_DEBUG(("lsf_load: entry=%p map_cnt=%d\n", entry, map_cnt)); + LSF_DEBUG(("lsf_load" + "(size=0x%x,base=0x%x,cnt=%d,file=%p,flags=%d,sm_info=%p) " + "entry=%p\n", + mapped_file_size, *base_address, map_cnt, file_object, + flags, sm_info, entry)); + if (entry == NULL) { + printf("lsf_load: unable to allocate memory\n"); + return KERN_NO_SPACE; + } + shared_file_available_hash_ele--; entry->file_object = (int)file_object; entry->mapping_cnt = map_cnt; @@ -928,7 +1912,7 @@ lsf_load( alternate_load_next = sm_info->alternate_next; original_alt_load_next = alternate_load_next; if (flags & ALTERNATE_LOAD_SITE) { - int max_loadfile_offset; + vm_offset_t max_loadfile_offset; *base_address = ((*base_address) & ~SHARED_TEXT_REGION_MASK) + sm_info->alternate_next; @@ -943,7 +1927,7 @@ lsf_load( + mappings[i].size; } } - if((alternate_load_next + round_page_32(max_loadfile_offset)) >= + if((alternate_load_next + round_page(max_loadfile_offset)) >= (sm_info->data_size - (sm_info->data_size>>9))) { entry->base_address = (*base_address) & SHARED_TEXT_REGION_MASK; @@ -951,7 +1935,7 @@ lsf_load( return KERN_NO_SPACE; } - alternate_load_next += round_page_32(max_loadfile_offset); + alternate_load_next += round_page(max_loadfile_offset); } else { if (((*base_address) & SHARED_TEXT_REGION_MASK) > @@ -984,7 +1968,7 @@ lsf_load( vm_address_t region_end; if ((mappings[i].protection & VM_PROT_WRITE) == 0) { - // mapping offsets are relative to start of shared segments. + // mapping offsets are relative to start of shared segments. region_mask = SHARED_TEXT_REGION_MASK; region_start = (mappings[i].mapping_offset & region_mask)+entry->base_address; region_end = (mappings[i].size + region_start); @@ -992,7 +1976,7 @@ lsf_load( // No library is permitted to load so any bit of it is in the // shared alternate space. If they want it loaded, they can put // it in the alternate space explicitly. -printf("Library trying to load across alternate shared region boundary -- denied!\n"); + printf("Library trying to load across alternate shared region boundary -- denied!\n"); lsf_unload(file_object, entry->base_address, sm_info); return KERN_INVALID_ARGUMENT; } @@ -1002,9 +1986,9 @@ printf("Library trying to load across alternate shared region boundary -- denied region_start = (mappings[i].mapping_offset & region_mask)+entry->base_address; region_end = (mappings[i].size + region_start); if (region_end >= SHARED_ALTERNATE_LOAD_BASE) { -printf("Library trying to load across alternate shared region boundary-- denied!\n"); - lsf_unload(file_object, entry->base_address, sm_info); - return KERN_INVALID_ARGUMENT; + printf("Library trying to load across alternate shared region boundary-- denied!\n"); + lsf_unload(file_object, entry->base_address, sm_info); + return KERN_INVALID_ARGUMENT; } } // write? } // for @@ -1045,7 +2029,7 @@ printf("Library trying to load across alternate shared region boundary-- denied! + entry->base_address; if(vm_allocate(((vm_named_entry_t)local_map->ip_kobject) ->backing.map, &target_address, - mappings[i].size, FALSE)) { + mappings[i].size, VM_FLAGS_FIXED)) { lsf_unload(file_object, entry->base_address, sm_info); return KERN_FAILURE; } @@ -1053,15 +2037,16 @@ printf("Library trying to load across alternate shared region boundary-- denied! + entry->base_address; if(!(mappings[i].protection & VM_PROT_ZF)) { if(vm_map_copyin(current_map(), - mapped_file + mappings[i].file_offset, - round_page_32(mappings[i].size), FALSE, ©_object)) { + (vm_map_address_t)(mapped_file + mappings[i].file_offset), + vm_map_round_page(mappings[i].size), FALSE, ©_object)) { vm_deallocate(((vm_named_entry_t)local_map->ip_kobject) ->backing.map, target_address, mappings[i].size); lsf_unload(file_object, entry->base_address, sm_info); return KERN_FAILURE; } if(vm_map_copy_overwrite(((vm_named_entry_t) - local_map->ip_kobject)->backing.map, target_address, + local_map->ip_kobject)->backing.map, + (vm_map_address_t)target_address, copy_object, FALSE)) { vm_deallocate(((vm_named_entry_t)local_map->ip_kobject) ->backing.map, target_address, mappings[i].size); @@ -1069,34 +2054,512 @@ printf("Library trying to load across alternate shared region boundary-- denied! return KERN_FAILURE; } } + + file_mapping = (loaded_mapping_t *)zalloc(lsf_zone); + if (file_mapping == NULL) { + lsf_unload(file_object, entry->base_address, sm_info); + printf("lsf_load: unable to allocate memory\n"); + return KERN_NO_SPACE; + } + shared_file_available_hash_ele--; + file_mapping->mapping_offset = (mappings[i].mapping_offset) + & region_mask; + file_mapping->size = mappings[i].size; + file_mapping->file_offset = mappings[i].file_offset; + file_mapping->protection = mappings[i].protection; + file_mapping->next = NULL; + LSF_DEBUG(("lsf_load: file_mapping %p " + "for offset=0x%x size=0x%x\n", + file_mapping, file_mapping->mapping_offset, + file_mapping->size)); + vm_map_protect(((vm_named_entry_t)local_map->ip_kobject) ->backing.map, target_address, - round_page_32(target_address + mappings[i].size), + round_page(target_address + mappings[i].size), (mappings[i].protection & (VM_PROT_READ | VM_PROT_EXECUTE)), TRUE); vm_map_protect(((vm_named_entry_t)local_map->ip_kobject) ->backing.map, target_address, - round_page_32(target_address + mappings[i].size), + round_page(target_address + mappings[i].size), (mappings[i].protection & (VM_PROT_READ | VM_PROT_EXECUTE)), FALSE); + + *tptr = file_mapping; + tptr = &(file_mapping->next); + } + shared_region_mapping_set_alt_next( + (shared_region_mapping_t) sm_info->self, + alternate_load_next); + LSF_DEBUG(("lsf_load: done\n")); + return KERN_SUCCESS; +} + + +/* + * lsf_slide: + * + * Look in the shared region, starting from the end, for a place to fit all the + * mappings while respecting their relative offsets. + */ +static kern_return_t +lsf_slide( + unsigned int map_cnt, + struct shared_file_mapping_np *mappings_in, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t *base_offset_p) +{ + mach_vm_offset_t max_mapping_offset; + int i; + vm_map_entry_t map_entry, prev_entry, next_entry; + mach_vm_offset_t prev_hole_start, prev_hole_end; + mach_vm_offset_t mapping_offset, mapping_end_offset; + mach_vm_offset_t base_offset; + mach_vm_size_t mapping_size; + mach_vm_offset_t wiggle_room, wiggle; + vm_map_t text_map, data_map, map; + vm_named_entry_t region_entry; + ipc_port_t region_handle; + kern_return_t kr; + + struct shared_file_mapping_np *mappings, tmp_mapping; + unsigned int sort_index, sorted_index; + vm_map_offset_t sort_min_address; + unsigned int sort_min_index; + + /* + * Sort the mappings array, so that we can try and fit them in + * in the right order as we progress along the VM maps. + * + * We can't modify the original array (the original order is + * important when doing lookups of the mappings), so copy it first. + */ + + kr = kmem_alloc(kernel_map, + (vm_offset_t *) &mappings, + (vm_size_t) (map_cnt * sizeof (mappings[0]))); + if (kr != KERN_SUCCESS) { + return KERN_NO_SPACE; + } + + bcopy(mappings_in, mappings, map_cnt * sizeof (mappings[0])); + + max_mapping_offset = 0; + for (sorted_index = 0; + sorted_index < map_cnt; + sorted_index++) { + + /* first remaining entry is our new starting point */ + sort_min_index = sorted_index; + mapping_end_offset = ((mappings[sort_min_index].sfm_address & + SHARED_TEXT_REGION_MASK) + + mappings[sort_min_index].sfm_size); + sort_min_address = mapping_end_offset; + /* compute the highest mapping_offset as well... */ + if (mapping_end_offset > max_mapping_offset) { + max_mapping_offset = mapping_end_offset; + } + /* find the lowest mapping_offset in the remaining entries */ + for (sort_index = sorted_index + 1; + sort_index < map_cnt; + sort_index++) { + + mapping_end_offset = + ((mappings[sort_index].sfm_address & + SHARED_TEXT_REGION_MASK) + + mappings[sort_index].sfm_size); + + if (mapping_end_offset < sort_min_address) { + /* lowest mapping_offset so far... */ + sort_min_index = sort_index; + sort_min_address = mapping_end_offset; + } + } + if (sort_min_index != sorted_index) { + /* swap entries */ + tmp_mapping = mappings[sort_min_index]; + mappings[sort_min_index] = mappings[sorted_index]; + mappings[sorted_index] = tmp_mapping; + } + + } + + max_mapping_offset = vm_map_round_page(max_mapping_offset); + + /* start from the end of the shared area */ + base_offset = sm_info->text_size; + + /* can all the mappings fit ? */ + if (max_mapping_offset > base_offset) { + kmem_free(kernel_map, + (vm_offset_t) mappings, + map_cnt * sizeof (mappings[0])); + return KERN_FAILURE; + } + + /* + * Align the last mapping to the end of the submaps + * and start from there. + */ + base_offset -= max_mapping_offset; + + region_handle = (ipc_port_t) sm_info->text_region; + region_entry = (vm_named_entry_t) region_handle->ip_kobject; + text_map = region_entry->backing.map; + + region_handle = (ipc_port_t) sm_info->data_region; + region_entry = (vm_named_entry_t) region_handle->ip_kobject; + data_map = region_entry->backing.map; + + vm_map_lock_read(text_map); + vm_map_lock_read(data_map); + +start_over: + /* + * At first, we can wiggle all the way from our starting point + * (base_offset) towards the start of the map (0), if needed. + */ + wiggle_room = base_offset; + + for (i = (signed) map_cnt - 1; i >= 0; i--) { + if (mappings[i].sfm_init_prot & VM_PROT_COW) { + /* copy-on-write mappings are in the data submap */ + map = data_map; + } else { + /* other mappings are in the text submap */ + map = text_map; + } + /* get the offset within the appropriate submap */ + mapping_offset = (mappings[i].sfm_address & + SHARED_TEXT_REGION_MASK); + mapping_size = mappings[i].sfm_size; + mapping_end_offset = mapping_offset + mapping_size; + mapping_offset = vm_map_trunc_page(mapping_offset); + mapping_end_offset = vm_map_round_page(mapping_end_offset); + mapping_size = mapping_end_offset - mapping_offset; + + for (;;) { + if (vm_map_lookup_entry(map, + base_offset + mapping_offset, + &map_entry)) { + /* + * The start address for that mapping + * is already mapped: no fit. + * Locate the hole immediately before this map + * entry. + */ + prev_hole_end = map_entry->vme_start; + prev_entry = map_entry->vme_prev; + if (prev_entry == vm_map_to_entry(map)) { + /* no previous entry */ + prev_hole_start = map->min_offset; + } else { + /* previous entry ends here */ + prev_hole_start = prev_entry->vme_end; + } + } else { + /* + * The start address for that mapping is not + * mapped. + * Locate the start and end of the hole + * at that location. + */ + /* map_entry is the previous entry */ + if (map_entry == vm_map_to_entry(map)) { + /* no previous entry */ + prev_hole_start = map->min_offset; + } else { + /* previous entry ends there */ + prev_hole_start = map_entry->vme_end; + } + next_entry = map_entry->vme_next; + if (next_entry == vm_map_to_entry(map)) { + /* no next entry */ + prev_hole_end = map->max_offset; + } else { + prev_hole_end = next_entry->vme_start; + } + } + + if (prev_hole_end <= base_offset + mapping_offset) { + /* hole is to our left: try and wiggle to fit */ + wiggle = base_offset + mapping_offset - prev_hole_end + mapping_size; + if (wiggle > base_offset) { + /* we're getting out of the map */ + kr = KERN_FAILURE; + goto done; + } + base_offset -= wiggle; + if (wiggle > wiggle_room) { + /* can't wiggle that much: start over */ + goto start_over; + } + /* account for the wiggling done */ + wiggle_room -= wiggle; + } + + if (prev_hole_end > + base_offset + mapping_offset + mapping_size) { + /* + * The hole extends further to the right + * than what we need. Ignore the extra space. + */ + prev_hole_end = (base_offset + mapping_offset + + mapping_size); + } + + if (prev_hole_end < + base_offset + mapping_offset + mapping_size) { + /* + * The hole is not big enough to establish + * the mapping right there: wiggle towards + * the beginning of the hole so that the end + * of our mapping fits in the hole... + */ + wiggle = base_offset + mapping_offset + + mapping_size - prev_hole_end; + if (wiggle > base_offset) { + /* we're getting out of the map */ + kr = KERN_FAILURE; + goto done; + } + base_offset -= wiggle; + if (wiggle > wiggle_room) { + /* can't wiggle that much: start over */ + goto start_over; + } + /* account for the wiggling done */ + wiggle_room -= wiggle; + + /* keep searching from this new base */ + continue; + } + + if (prev_hole_start > base_offset + mapping_offset) { + /* no hole found: keep looking */ + continue; + } + + /* compute wiggling room at this hole */ + wiggle = base_offset + mapping_offset - prev_hole_start; + if (wiggle < wiggle_room) { + /* less wiggle room than before... */ + wiggle_room = wiggle; + } + + /* found a hole that fits: skip to next mapping */ + break; + } /* while we look for a hole */ + } /* for each mapping */ + + *base_offset_p = base_offset; + kr = KERN_SUCCESS; + +done: + vm_map_unlock_read(text_map); + vm_map_unlock_read(data_map); + + kmem_free(kernel_map, + (vm_offset_t) mappings, + map_cnt * sizeof (mappings[0])); + + return kr; +} + +/* + * lsf_map: + * + * Attempt to establish the mappings for a split library into the shared region. + */ +static kern_return_t +lsf_map( + struct shared_file_mapping_np *mappings, + int map_cnt, + void *file_control, + memory_object_offset_t file_size, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t base_offset, + mach_vm_offset_t *slide_p) +{ + load_struct_t *entry; + loaded_mapping_t *file_mapping; + loaded_mapping_t **tptr; + ipc_port_t region_handle; + vm_named_entry_t region_entry; + mach_port_t map_port; + vm_object_t file_object; + kern_return_t kr; + int i; + mach_vm_offset_t original_base_offset; + + /* get the VM object from the file's memory object handle */ + file_object = memory_object_control_to_vm_object(file_control); + + original_base_offset = base_offset; + + LSF_DEBUG(("lsf_map" + "(cnt=%d,file=%p,sm_info=%p)" + "\n", + map_cnt, file_object, + sm_info)); + +restart_after_slide: + /* get a new "load_struct_t" to described the mappings for that file */ + entry = (load_struct_t *)zalloc(lsf_zone); + LSF_ALLOC_DEBUG(("lsf_map: entry=%p map_cnt=%d\n", entry, map_cnt)); + LSF_DEBUG(("lsf_map" + "(cnt=%d,file=%p,sm_info=%p) " + "entry=%p\n", + map_cnt, file_object, + sm_info, entry)); + if (entry == NULL) { + printf("lsf_map: unable to allocate memory\n"); + return KERN_NO_SPACE; + } + shared_file_available_hash_ele--; + entry->file_object = (int)file_object; + entry->mapping_cnt = map_cnt; + entry->mappings = NULL; + entry->links.prev = (queue_entry_t) 0; + entry->links.next = (queue_entry_t) 0; + entry->regions_instance = (shared_region_mapping_t)sm_info->self; + entry->depth=((shared_region_mapping_t)sm_info->self)->depth; + entry->file_offset = mappings[0].sfm_file_offset; + + /* insert the new file entry in the hash table, for later lookups */ + lsf_hash_insert(entry, sm_info); + + /* where we should add the next mapping description for that file */ + tptr = &(entry->mappings); + + entry->base_address = base_offset; + + + /* establish each requested mapping */ + for (i = 0; i < map_cnt; i++) { + mach_vm_offset_t target_address; + mach_vm_offset_t region_mask; + + if (mappings[i].sfm_init_prot & VM_PROT_COW) { + region_handle = (ipc_port_t)sm_info->data_region; + region_mask = SHARED_DATA_REGION_MASK; + if ((((mappings[i].sfm_address + base_offset) + & GLOBAL_SHARED_SEGMENT_MASK) != 0x10000000) || + (((mappings[i].sfm_address + base_offset + + mappings[i].sfm_size - 1) + & GLOBAL_SHARED_SEGMENT_MASK) != 0x10000000)) { + lsf_unload(file_object, + entry->base_address, sm_info); + return KERN_INVALID_ARGUMENT; + } + } else { + region_mask = SHARED_TEXT_REGION_MASK; + region_handle = (ipc_port_t)sm_info->text_region; + if (((mappings[i].sfm_address + base_offset) + & GLOBAL_SHARED_SEGMENT_MASK) || + ((mappings[i].sfm_address + base_offset + + mappings[i].sfm_size - 1) + & GLOBAL_SHARED_SEGMENT_MASK)) { + lsf_unload(file_object, + entry->base_address, sm_info); + return KERN_INVALID_ARGUMENT; + } + } + if (!(mappings[i].sfm_init_prot & VM_PROT_ZF) && + ((mappings[i].sfm_file_offset + mappings[i].sfm_size) > + (file_size))) { + lsf_unload(file_object, entry->base_address, sm_info); + return KERN_INVALID_ARGUMENT; + } + target_address = entry->base_address + + ((mappings[i].sfm_address) & region_mask); + if (mappings[i].sfm_init_prot & VM_PROT_ZF) { + map_port = MACH_PORT_NULL; + } else { + map_port = (ipc_port_t) file_object->pager; + } + region_entry = (vm_named_entry_t) region_handle->ip_kobject; + + if (mach_vm_map(region_entry->backing.map, + &target_address, + vm_map_round_page(mappings[i].sfm_size), + 0, + VM_FLAGS_FIXED, + map_port, + mappings[i].sfm_file_offset, + TRUE, + (mappings[i].sfm_init_prot & + (VM_PROT_READ|VM_PROT_EXECUTE)), + (mappings[i].sfm_max_prot & + (VM_PROT_READ|VM_PROT_EXECUTE)), + VM_INHERIT_DEFAULT) != KERN_SUCCESS) { + lsf_unload(file_object, entry->base_address, sm_info); + + if (slide_p != NULL) { + /* + * Requested mapping failed but the caller + * is OK with sliding the library in the + * shared region, so let's try and slide it... + */ + + /* lookup an appropriate spot */ + kr = lsf_slide(map_cnt, mappings, + sm_info, &base_offset); + if (kr == KERN_SUCCESS) { + /* try and map it there ... */ + entry->base_address = base_offset; + goto restart_after_slide; + } + /* couldn't slide ... */ + } + + return KERN_FAILURE; + } + + /* record this mapping */ file_mapping = (loaded_mapping_t *)zalloc(lsf_zone); - if(file_mapping == 0) - panic("lsf_load: OUT OF MAPPINGS!"); + if (file_mapping == NULL) { + lsf_unload(file_object, entry->base_address, sm_info); + printf("lsf_map: unable to allocate memory\n"); + return KERN_NO_SPACE; + } shared_file_available_hash_ele--; - file_mapping->mapping_offset = (mappings[i].mapping_offset) + file_mapping->mapping_offset = (mappings[i].sfm_address) & region_mask; - file_mapping->size = mappings[i].size; - file_mapping->file_offset = mappings[i].file_offset; - file_mapping->protection = mappings[i].protection; + file_mapping->size = mappings[i].sfm_size; + file_mapping->file_offset = mappings[i].sfm_file_offset; + file_mapping->protection = mappings[i].sfm_init_prot; file_mapping->next = NULL; + LSF_DEBUG(("lsf_map: file_mapping %p " + "for offset=0x%x size=0x%x\n", + file_mapping, file_mapping->mapping_offset, + file_mapping->size)); + + /* and link it to the file entry */ *tptr = file_mapping; + + /* where to put the next mapping's description */ tptr = &(file_mapping->next); } - shared_region_mapping_set_alt_next(sm_info->self, alternate_load_next); - return KERN_SUCCESS; - + + if (slide_p != NULL) { + *slide_p = base_offset - original_base_offset; + } + + if (sm_info->flags & SHARED_REGION_STANDALONE) { + /* + * We have a standalone and private shared region, so we + * don't really need to keep the information about each file + * and each mapping. Just deallocate it all. + * XXX we still have the hash table, though... + */ + lsf_deallocate(file_object, entry->base_address, sm_info, + FALSE); + } + + LSF_DEBUG(("lsf_map: done\n")); + return KERN_SUCCESS; } @@ -1109,38 +2572,73 @@ lsf_unload( void *file_object, vm_offset_t base_offset, shared_region_task_mappings_t sm_info) +{ + lsf_deallocate(file_object, base_offset, sm_info, TRUE); +} + +/* + * lsf_deallocate: + * + * Deallocates all the "shared region" internal data structures describing + * the file and its mappings. + * Also deallocate the actual file mappings if requested ("unload" arg). + */ +static void +lsf_deallocate( + void *file_object, + vm_offset_t base_offset, + shared_region_task_mappings_t sm_info, + boolean_t unload) { load_struct_t *entry; - ipc_port_t local_map; loaded_mapping_t *map_ele; loaded_mapping_t *back_ptr; + LSF_DEBUG(("lsf_deallocate(file=%p,base=0x%x,sm_info=%p,unload=%d)\n", + file_object, base_offset, sm_info, unload)); entry = lsf_hash_delete(file_object, base_offset, sm_info); if(entry) { map_ele = entry->mappings; while(map_ele != NULL) { - if(map_ele->protection & VM_PROT_COW) { - local_map = (ipc_port_t)sm_info->data_region; - } else { - local_map = (ipc_port_t)sm_info->text_region; + if (unload) { + ipc_port_t region_handle; + vm_named_entry_t region_entry; + + if(map_ele->protection & VM_PROT_COW) { + region_handle = (ipc_port_t) + sm_info->data_region; + } else { + region_handle = (ipc_port_t) + sm_info->text_region; + } + region_entry = (vm_named_entry_t) + region_handle->ip_kobject; + + vm_deallocate(region_entry->backing.map, + (entry->base_address + + map_ele->mapping_offset), + map_ele->size); } - vm_deallocate(((vm_named_entry_t)local_map->ip_kobject) - ->backing.map, entry->base_address + - map_ele->mapping_offset, - map_ele->size); back_ptr = map_ele; map_ele = map_ele->next; - zfree(lsf_zone, (vm_offset_t)back_ptr); + LSF_DEBUG(("lsf_deallocate: freeing mapping %p " + "offset 0x%x size 0x%x\n", + back_ptr, back_ptr->mapping_offset, + back_ptr->size)); + zfree(lsf_zone, back_ptr); shared_file_available_hash_ele++; } - zfree(lsf_zone, (vm_offset_t)entry); + LSF_DEBUG(("lsf_deallocate: freeing entry %p\n", entry)); + LSF_ALLOC_DEBUG(("lsf_deallocate: entry=%p", entry)); + zfree(lsf_zone, entry); shared_file_available_hash_ele++; } + LSF_DEBUG(("lsf_unload: done\n")); } /* integer is from 1 to 100 and represents percent full */ unsigned int -lsf_mapping_pool_gauge() +lsf_mapping_pool_gauge(void) { return ((lsf_zone->count * lsf_zone->elem_size) * 100)/lsf_zone->max_size; } diff --git a/osfmk/vm/vm_shared_memory_server.h b/osfmk/vm/vm_shared_memory_server.h index 5d3254b2b..7d0774825 100644 --- a/osfmk/vm/vm_shared_memory_server.h +++ b/osfmk/vm/vm_shared_memory_server.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002,2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -30,9 +30,7 @@ #ifndef _VM_SHARED_MEMORY_SERVER_H_ #define _VM_SHARED_MEMORY_SERVER_H_ -#include - -#ifdef __APPLE_API_PRIVATE +#ifdef KERNEL_PRIVATE #include #include @@ -40,6 +38,18 @@ #include +#if DEBUG +extern int shared_region_debug; +#define SHARED_REGION_DEBUG(args) \ + MACRO_BEGIN \ + if (shared_region_debug) { \ + kprintf args; \ + } \ + MACRO_END +#else /* DEBUG */ +#define SHARED_REGION_DEBUG(args) +#endif /* DEBUG */ + extern mach_port_t shared_text_region_handle; extern mach_port_t shared_data_region_handle; @@ -61,6 +71,7 @@ struct shared_region_task_mappings { #define SHARED_REGION_SYSTEM 0x1 // Default env for system and fs_root #define SHARED_REGION_FULL 0x2 // Shared regions are full #define SHARED_REGION_STALE 0x4 // Indicates no longer in default list +#define SHARED_REGION_STANDALONE 0x10 // Shared region is not shared ! /* defines for default environment, and co-resident systems */ @@ -70,7 +81,6 @@ struct shared_region_task_mappings { typedef struct shared_region_task_mappings *shared_region_task_mappings_t; typedef struct shared_region_mapping *shared_region_mapping_t; - #ifdef MACH_KERNEL_PRIVATE #include @@ -150,7 +160,7 @@ struct shared_region_mapping { }; #define shared_region_mapping_lock_init(object) \ - mutex_init(&(object)->Lock, ETAP_VM_OBJ) + mutex_init(&(object)->Lock, 0) #define shared_region_mapping_lock(object) mutex_lock(&(object)->Lock) #define shared_region_mapping_unlock(object) mutex_unlock(&(object)->Lock) @@ -160,6 +170,9 @@ struct shared_region_mapping ; #endif /* MACH_KERNEL_PRIVATE */ +#define load_file_hash(file_object, size) \ + ((((natural_t)file_object) & 0xffffff) % size) + extern kern_return_t copyin_shared_file( vm_offset_t mapped_file, vm_size_t mapped_file_size, @@ -170,6 +183,20 @@ extern kern_return_t copyin_shared_file( shared_region_task_mappings_t shared_region, int *flags); +extern kern_return_t map_shared_file( + int map_cnt, + struct shared_file_mapping_np *mappings, + memory_object_control_t file_control, + memory_object_size_t file_size, + shared_region_task_mappings_t sm_info, + mach_vm_offset_t base_offset, + mach_vm_offset_t *slide_p); + +extern kern_return_t shared_region_cleanup( + unsigned int range_count, + struct shared_region_range_np *ranges, + shared_region_task_mappings_t sm_info); + extern kern_return_t shared_region_mapping_info( shared_region_mapping_t shared_region, mach_port_t *text_region, @@ -202,14 +229,13 @@ extern kern_return_t shared_region_mapping_ref( extern kern_return_t shared_region_mapping_dealloc( shared_region_mapping_t shared_region); -__private_extern__ kern_return_t shared_region_mapping_dealloc_lock( - shared_region_mapping_t shared_region, - int need_lock); - extern kern_return_t shared_region_object_chain_attach( shared_region_mapping_t target_region, shared_region_mapping_t object_chain); +extern void shared_region_object_chain_detached( + shared_region_mapping_t target_region); + extern kern_return_t vm_get_shared_region( task_t task, shared_region_mapping_t *shared_region); @@ -230,10 +256,41 @@ extern void remove_default_shared_region( __private_extern__ void remove_default_shared_region_lock( shared_region_mapping_t system_region, + int need_sfh_lock, + int need_drl_lock); + +__private_extern__ struct load_struct *lsf_remove_regions_mappings_lock( + shared_region_mapping_t region, + shared_region_task_mappings_t sm_info, int need_lock); -extern unsigned int lsf_mapping_pool_gauge(); +extern unsigned int lsf_mapping_pool_gauge(void); + +extern kern_return_t shared_file_create_system_region( + shared_region_mapping_t *shared_region); + +extern void remove_all_shared_regions(void); + +extern void shared_file_boot_time_init( + unsigned int fs_base, + unsigned int system); + +extern struct load_struct *lsf_remove_regions_mappings( + shared_region_mapping_t region, + shared_region_task_mappings_t sm_info); + +extern kern_return_t shared_region_mapping_set_alt_next( + shared_region_mapping_t shared_region, + vm_offset_t alt_next); + +extern void mach_memory_entry_port_release(ipc_port_t port); +extern void mach_destroy_memory_entry(ipc_port_t port); + +extern kern_return_t mach_memory_entry_purgable_control( + ipc_port_t entry_port, + vm_purgable_t control, + int *state); -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ -#endif /* _VM_SHARED_MEMORY_SERVER_H_ */ +#endif /* _VM_SHARED_MEMORY_SERVER_H_ */ diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index 3a655c3f8..52c2e054b 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -56,36 +56,37 @@ * User-exported virtual memory functions. */ +#include + #include #include #include #include /* to get vm_address_t */ #include #include /* to get pointer_t */ +#include #include #include #include -#include #include +#include +#include #include +#include #include #include +#include #include #include +#include #include #include #include #include #include - -__private_extern__ load_struct_t * -lsf_remove_regions_mappings_lock( - shared_region_mapping_t region, - shared_region_task_mappings_t sm_info, - int need_lock); - +#include vm_size_t upl_offset_to_pagelist = 0; @@ -96,18 +97,20 @@ vm_size_t upl_offset_to_pagelist = 0; ipc_port_t dynamic_pager_control_port=NULL; /* - * vm_allocate allocates "zero fill" memory in the specfied + * mach_vm_allocate allocates "zero fill" memory in the specfied * map. */ kern_return_t -vm_allocate( - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, +mach_vm_allocate( + vm_map_t map, + mach_vm_offset_t *addr, + mach_vm_size_t size, int flags) { + vm_map_offset_t map_addr; + vm_map_size_t map_size; kern_return_t result; - boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; + boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); @@ -116,20 +119,94 @@ vm_allocate( return(KERN_SUCCESS); } - if (anywhere) - *addr = vm_map_min(map); - else - *addr = trunc_page_32(*addr); - size = round_page_32(size); + if (anywhere) { + /* + * No specific address requested, so start candidate address + * search at the minimum address in the map. However, if that + * minimum is 0, bump it up by PAGE_SIZE. We want to limit + * allocations of PAGEZERO to explicit requests since its + * normal use is to catch dereferences of NULL and many + * applications also treat pointers with a value of 0 as + * special and suddenly having address 0 contain useable + * memory would tend to confuse those applications. + */ + map_addr = vm_map_min(map); + if (map_addr == 0) + map_addr += PAGE_SIZE; + } else + map_addr = vm_map_trunc_page(*addr); + map_size = vm_map_round_page(size); + if (map_size == 0) { + return(KERN_INVALID_ARGUMENT); + } + + result = vm_map_enter( + map, + &map_addr, + map_size, + (vm_map_offset_t)0, + flags, + VM_OBJECT_NULL, + (vm_object_offset_t)0, + FALSE, + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); + + *addr = map_addr; + return(result); +} + +/* + * vm_allocate + * Legacy routine that allocates "zero fill" memory in the specfied + * map (which is limited to the same size as the kernel). + */ +kern_return_t +vm_allocate( + vm_map_t map, + vm_offset_t *addr, + vm_size_t size, + int flags) +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t result; + boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); if (size == 0) { + *addr = 0; + return(KERN_SUCCESS); + } + + if (anywhere) { + /* + * No specific address requested, so start candidate address + * search at the minimum address in the map. However, if that + * minimum is 0, bump it up by PAGE_SIZE. We want to limit + * allocations of PAGEZERO to explicit requests since its + * normal use is to catch dereferences of NULL and many + * applications also treat pointers with a value of 0 as + * special and suddenly having address 0 contain useable + * memory would tend to confuse those applications. + */ + map_addr = vm_map_min(map); + if (map_addr == 0) + map_addr += PAGE_SIZE; + } else + map_addr = vm_map_trunc_page(*addr); + map_size = vm_map_round_page(size); + if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } result = vm_map_enter( map, - addr, - size, - (vm_offset_t)0, + &map_addr, + map_size, + (vm_map_offset_t)0, flags, VM_OBJECT_NULL, (vm_object_offset_t)0, @@ -138,98 +215,268 @@ vm_allocate( VM_PROT_ALL, VM_INHERIT_DEFAULT); + *addr = CAST_DOWN(vm_offset_t, map_addr); return(result); } /* - * vm_deallocate deallocates the specified range of addresses in the + * mach_vm_deallocate - + * deallocates the specified range of addresses in the * specified address map. */ kern_return_t +mach_vm_deallocate( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + if (size == (mach_vm_offset_t) 0) + return(KERN_SUCCESS); + + return(vm_map_remove(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); +} + +/* + * vm_deallocate - + * deallocates the specified range of addresses in the + * specified address map (limited to addresses the same + * size as the kernel). + */ +kern_return_t vm_deallocate( register vm_map_t map, vm_offset_t start, vm_size_t size) { - if (map == VM_MAP_NULL) + if ((map == VM_MAP_NULL) || (start + size < start)) return(KERN_INVALID_ARGUMENT); if (size == (vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, trunc_page_32(start), - round_page_32(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); } /* - * vm_inherit sets the inheritance of the specified range in the + * mach_vm_inherit - + * Sets the inheritance of the specified range in the * specified map. */ kern_return_t +mach_vm_inherit( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_inherit_t new_inheritance) +{ + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_inheritance > VM_INHERIT_LAST_VALID)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return(vm_map_inherit(map, + vm_map_trunc_page(start), + vm_map_round_page(start+size), + new_inheritance)); +} + +/* + * vm_inherit - + * Sets the inheritance of the specified range in the + * specified map (range limited to addresses + */ +kern_return_t vm_inherit( register vm_map_t map, vm_offset_t start, vm_size_t size, vm_inherit_t new_inheritance) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); - - if (new_inheritance > VM_INHERIT_LAST_VALID) + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_inheritance > VM_INHERIT_LAST_VALID)) return(KERN_INVALID_ARGUMENT); + if (size == 0) + return KERN_SUCCESS; + return(vm_map_inherit(map, - trunc_page_32(start), - round_page_32(start+size), + vm_map_trunc_page(start), + vm_map_round_page(start+size), new_inheritance)); } /* - * vm_protect sets the protection of the specified range in the + * mach_vm_protect - + * Sets the protection of the specified range in the * specified map. */ +kern_return_t +mach_vm_protect( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + boolean_t set_maximum, + vm_prot_t new_protection) +{ + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return(vm_map_protect(map, + vm_map_trunc_page(start), + vm_map_round_page(start+size), + new_protection, + set_maximum)); +} + +/* + * vm_protect - + * Sets the protection of the specified range in the + * specified map. Addressability of the range limited + * to the same size as the kernel. + */ + kern_return_t vm_protect( - register vm_map_t map, + vm_map_t map, vm_offset_t start, vm_size_t size, boolean_t set_maximum, vm_prot_t new_protection) { - if ((map == VM_MAP_NULL) || - (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) + if ((map == VM_MAP_NULL) || (start + size < start) || + (new_protection & ~(VM_PROT_ALL | VM_PROT_COPY))) return(KERN_INVALID_ARGUMENT); + if (size == 0) + return KERN_SUCCESS; + return(vm_map_protect(map, - trunc_page_32(start), - round_page_32(start+size), + vm_map_trunc_page(start), + vm_map_round_page(start+size), new_protection, set_maximum)); } /* + * mach_vm_machine_attributes - * Handle machine-specific attributes for a mapping, such * as cachability, migrability, etc. */ kern_return_t +mach_vm_machine_attribute( + vm_map_t map, + mach_vm_address_t addr, + mach_vm_size_t size, + vm_machine_attribute_t attribute, + vm_machine_attribute_val_t* value) /* IN/OUT */ +{ + if ((map == VM_MAP_NULL) || (addr + size < addr)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return vm_map_machine_attribute(map, + vm_map_trunc_page(addr), + vm_map_round_page(addr+size), + attribute, + value); +} + +/* + * vm_machine_attribute - + * Handle machine-specific attributes for a mapping, such + * as cachability, migrability, etc. Limited addressability + * (same range limits as for the native kernel map). + */ +kern_return_t vm_machine_attribute( vm_map_t map, - vm_address_t address, + vm_address_t addr, vm_size_t size, vm_machine_attribute_t attribute, vm_machine_attribute_val_t* value) /* IN/OUT */ { + if ((map == VM_MAP_NULL) || (addr + size < addr)) + return(KERN_INVALID_ARGUMENT); + + if (size == 0) + return KERN_SUCCESS; + + return vm_map_machine_attribute(map, + vm_map_trunc_page(addr), + vm_map_round_page(addr+size), + attribute, + value); +} + +/* + * mach_vm_read - + * Read/copy a range from one address space and return it to the caller. + * + * It is assumed that the address for the returned memory is selected by + * the IPC implementation as part of receiving the reply to this call. + * If IPC isn't used, the caller must deal with the vm_map_copy_t object + * that gets returned. + * + * JMM - because of mach_msg_type_number_t, this call is limited to a + * single 4GB region at this time. + * + */ +kern_return_t +mach_vm_read( + vm_map_t map, + mach_vm_address_t addr, + mach_vm_size_t size, + pointer_t *data, + mach_msg_type_number_t *data_size) +{ + kern_return_t error; + vm_map_copy_t ipc_address; + if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); - return vm_map_machine_attribute(map, address, size, attribute, value); + + error = vm_map_copyin(map, + (vm_map_address_t)addr, + (vm_map_size_t)size, + FALSE, /* src_destroy */ + &ipc_address); + + if (KERN_SUCCESS == error) { + *data = (pointer_t) ipc_address; + *data_size = size; + } + return(error); } +/* + * vm_read - + * Read/copy a range from one address space and return it to the caller. + * Limited addressability (same range limits as for the native kernel map). + * + * It is assumed that the address for the returned memory is selected by + * the IPC implementation as part of receiving the reply to this call. + * If IPC isn't used, the caller must deal with the vm_map_copy_t object + * that gets returned. + */ kern_return_t vm_read( vm_map_t map, - vm_address_t address, + vm_address_t addr, vm_size_t size, pointer_t *data, mach_msg_type_number_t *data_size) @@ -240,130 +487,293 @@ vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); - if ((error = vm_map_copyin(map, - address, - size, - FALSE, /* src_destroy */ - &ipc_address)) == KERN_SUCCESS) { + error = vm_map_copyin(map, + (vm_map_address_t)addr, + (vm_map_size_t)size, + FALSE, /* src_destroy */ + &ipc_address); + + if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; *data_size = size; } return(error); } +/* + * mach_vm_read_list - + * Read/copy a list of address ranges from specified map. + * + * MIG does not know how to deal with a returned array of + * vm_map_copy_t structures, so we have to do the copyout + * manually here. + */ +kern_return_t +mach_vm_read_list( + vm_map_t map, + mach_vm_read_entry_t data_list, + natural_t count) +{ + mach_msg_type_number_t i; + kern_return_t error; + vm_map_copy_t copy; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + error = KERN_SUCCESS; + for(i=0; imap, + &map_addr, + copy); + if (KERN_SUCCESS == error) { + data_list[i].address = map_addr; + continue; + } + vm_map_copy_discard(copy); + } + } + data_list[i].address = (mach_vm_address_t)0; + data_list[i].size = (mach_vm_size_t)0; + } + return(error); +} + +/* + * vm_read_list - + * Read/copy a list of address ranges from specified map. + * + * MIG does not know how to deal with a returned array of + * vm_map_copy_t structures, so we have to do the copyout + * manually here. + * + * The source and destination ranges are limited to those + * that can be described with a vm_address_t (i.e. same + * size map as the kernel). + * + * JMM - If the result of the copyout is an address range + * that cannot be described with a vm_address_t (i.e. the + * caller had a larger address space but used this call + * anyway), it will result in a truncated address being + * returned (and a likely confused caller). + */ + kern_return_t vm_read_list( vm_map_t map, - vm_read_entry_t data_list, - mach_msg_type_number_t count) + vm_read_entry_t data_list, + natural_t count) { mach_msg_type_number_t i; kern_return_t error; - vm_map_copy_t ipc_address; + vm_map_copy_t copy; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + error = KERN_SUCCESS; for(i=0; imap, - &(data_list[i].address), - (vm_map_copy_t) ipc_address); - if(error != KERN_SUCCESS) { - data_list[i].address = (vm_address_t)0; - data_list[i].size = (vm_size_t)0; - break; + vm_map_address_t map_addr; + vm_map_size_t map_size; + + map_addr = (vm_map_address_t)(data_list[i].address); + map_size = (vm_map_size_t)(data_list[i].size); + + if(map_size != 0) { + error = vm_map_copyin(map, + map_addr, + map_size, + FALSE, /* src_destroy */ + ©); + if (KERN_SUCCESS == error) { + error = vm_map_copyout(current_task()->map, + &map_addr, + copy); + if (KERN_SUCCESS == error) { + data_list[i].address = + CAST_DOWN(vm_offset_t, map_addr); + continue; + } + vm_map_copy_discard(copy); } } + data_list[i].address = (mach_vm_address_t)0; + data_list[i].size = (mach_vm_size_t)0; } return(error); } /* - * This routine reads from the specified map and overwrites part of the current - * activation's map. In making an assumption that the current thread is local, - * it is no longer cluster-safe without a fully supportive local proxy thread/ - * task (but we don't support cluster's anymore so this is moot). + * mach_vm_read_overwrite - + * Overwrite a range of the current map with data from the specified + * map/address range. + * + * In making an assumption that the current thread is local, it is + * no longer cluster-safe without a fully supportive local proxy + * thread/task (but we don't support cluster's anymore so this is moot). */ -#define VM_OVERWRITE_SMALL 512 - kern_return_t -vm_read_overwrite( - vm_map_t map, - vm_address_t address, - vm_size_t size, - vm_address_t data, - vm_size_t *data_size) -{ - struct { - long align; - char buf[VM_OVERWRITE_SMALL]; - } inbuf; - vm_map_t oldmap; - kern_return_t error = KERN_SUCCESS; +mach_vm_read_overwrite( + vm_map_t map, + mach_vm_address_t address, + mach_vm_size_t size, + mach_vm_address_t data, + mach_vm_size_t *data_size) +{ + kern_return_t error; vm_map_copy_t copy; if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); - if (size <= VM_OVERWRITE_SMALL) { - if(vm_map_read_user(map, (vm_offset_t)address, - (vm_offset_t)&inbuf, size)) { - error = KERN_INVALID_ADDRESS; - } else { - if(vm_map_write_user(current_map(), - (vm_offset_t)&inbuf, (vm_offset_t)data, size)) - error = KERN_INVALID_ADDRESS; + error = vm_map_copyin(map, (vm_map_address_t)address, + (vm_map_size_t)size, FALSE, ©); + + if (KERN_SUCCESS == error) { + error = vm_map_copy_overwrite(current_thread()->map, + (vm_map_address_t)data, + copy, FALSE); + if (KERN_SUCCESS == error) { + *data_size = size; + return error; } + vm_map_copy_discard(copy); } - else { - if ((error = vm_map_copyin(map, - address, - size, - FALSE, /* src_destroy */ - ©)) == KERN_SUCCESS) { - if ((error = vm_map_copy_overwrite( - current_act()->map, - data, - copy, - FALSE)) == KERN_SUCCESS) { - } - else { - vm_map_copy_discard(copy); - } + return(error); +} + +/* + * vm_read_overwrite - + * Overwrite a range of the current map with data from the specified + * map/address range. + * + * This routine adds the additional limitation that the source and + * destination ranges must be describable with vm_address_t values + * (i.e. the same size address spaces as the kernel, or at least the + * the ranges are in that first portion of the respective address + * spaces). + */ + +kern_return_t +vm_read_overwrite( + vm_map_t map, + vm_address_t address, + vm_size_t size, + vm_address_t data, + vm_size_t *data_size) +{ + kern_return_t error; + vm_map_copy_t copy; + + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + error = vm_map_copyin(map, (vm_map_address_t)address, + (vm_map_size_t)size, FALSE, ©); + + if (KERN_SUCCESS == error) { + error = vm_map_copy_overwrite(current_thread()->map, + (vm_map_address_t)data, + copy, FALSE); + if (KERN_SUCCESS == error) { + *data_size = size; + return error; } + vm_map_copy_discard(copy); } - *data_size = size; return(error); } +/* + * mach_vm_write - + * Overwrite the specified address range with the data provided + * (from the current map). + */ +kern_return_t +mach_vm_write( + vm_map_t map, + mach_vm_address_t address, + pointer_t data, + __unused mach_msg_type_number_t size) +{ + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; + return vm_map_copy_overwrite(map, (vm_map_address_t)address, + (vm_map_copy_t) data, FALSE /* interruptible XXX */); +} -/*ARGSUSED*/ +/* + * vm_write - + * Overwrite the specified address range with the data provided + * (from the current map). + * + * The addressability of the range of addresses to overwrite is + * limited bu the use of a vm_address_t (same size as kernel map). + * Either the target map is also small, or the range is in the + * low addresses within it. + */ kern_return_t vm_write( + vm_map_t map, + vm_address_t address, + pointer_t data, + __unused mach_msg_type_number_t size) +{ + if (map == VM_MAP_NULL) + return KERN_INVALID_ARGUMENT; + + return vm_map_copy_overwrite(map, (vm_map_address_t)address, + (vm_map_copy_t) data, FALSE /* interruptible XXX */); +} + +/* + * mach_vm_copy - + * Overwrite one range of the specified map with the contents of + * another range within that same map (i.e. both address ranges + * are "over there"). + */ +kern_return_t +mach_vm_copy( vm_map_t map, - vm_address_t address, - vm_offset_t data, - mach_msg_type_number_t size) + mach_vm_address_t source_address, + mach_vm_size_t size, + mach_vm_address_t dest_address) { + vm_map_copy_t copy; + kern_return_t kr; + if (map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; - return vm_map_copy_overwrite(map, address, (vm_map_copy_t) data, - FALSE /* interruptible XXX */); + kr = vm_map_copyin(map, (vm_map_address_t)source_address, + (vm_map_size_t)size, FALSE, ©); + + if (KERN_SUCCESS == kr) { + kr = vm_map_copy_overwrite(map, + (vm_map_address_t)dest_address, + copy, FALSE /* interruptible XXX */); + + if (KERN_SUCCESS != kr) + vm_map_copy_discard(copy); + } + return kr; } kern_return_t @@ -379,30 +789,37 @@ vm_copy( if (map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; - kr = vm_map_copyin(map, source_address, size, - FALSE, ©); - if (kr != KERN_SUCCESS) - return kr; + kr = vm_map_copyin(map, (vm_map_address_t)source_address, + (vm_map_size_t)size, FALSE, ©); - kr = vm_map_copy_overwrite(map, dest_address, copy, - FALSE /* interruptible XXX */); - if (kr != KERN_SUCCESS) { - vm_map_copy_discard(copy); - return kr; - } + if (KERN_SUCCESS == kr) { + kr = vm_map_copy_overwrite(map, + (vm_map_address_t)dest_address, + copy, FALSE /* interruptible XXX */); - return KERN_SUCCESS; + if (KERN_SUCCESS != kr) + vm_map_copy_discard(copy); + } + return kr; } /* - * Routine: vm_map + * mach_vm_map - + * Map some range of an object into an address space. + * + * The object can be one of several types of objects: + * NULL - anonymous memory + * a named entry - a range within another address space + * or a range within a memory object + * a whole memory object + * */ kern_return_t -vm_map_64( +mach_vm_map( vm_map_t target_map, - vm_offset_t *address, - vm_size_t initial_size, - vm_offset_t mask, + mach_vm_offset_t *address, + mach_vm_size_t initial_size, + mach_vm_offset_t mask, int flags, ipc_port_t port, vm_object_offset_t offset, @@ -411,10 +828,10 @@ vm_map_64( vm_prot_t max_protection, vm_inherit_t inheritance) { - register + vm_map_address_t map_addr; + vm_map_size_t map_size; vm_object_t object; - vm_prot_t prot; - vm_object_size_t size = (vm_object_size_t)initial_size; + vm_object_size_t size; kern_return_t result; /* @@ -424,9 +841,13 @@ vm_map_64( (cur_protection & ~VM_PROT_ALL) || (max_protection & ~VM_PROT_ALL) || (inheritance > VM_INHERIT_LAST_VALID) || - size == 0) + initial_size == 0) return(KERN_INVALID_ARGUMENT); + map_addr = vm_map_trunc_page(*address); + map_size = vm_map_round_page(initial_size); + size = vm_object_round_page(initial_size); + /* * Find the vm object (if any) corresponding to this port. */ @@ -460,11 +881,10 @@ vm_map_64( vm_map_entry_t map_entry; named_entry_unlock(named_entry); - *address = trunc_page_32(*address); - size = round_page_64(size); vm_object_reference(vm_submap_object); if ((result = vm_map_enter(target_map, - address, size, mask, flags, + &map_addr, map_size, + (vm_map_offset_t)mask, flags, vm_submap_object, 0, FALSE, cur_protection, max_protection, inheritance @@ -476,40 +896,31 @@ vm_map_64( VM_GET_FLAGS_ALIAS(flags, alias); if ((alias == VM_MEMORY_SHARED_PMAP) && !copy) { - vm_map_submap(target_map, *address, - (*address) + size, + vm_map_submap(target_map, map_addr, + map_addr + map_size, named_entry->backing.map, - (vm_offset_t)offset, TRUE); + (vm_map_offset_t)offset, TRUE); } else { - vm_map_submap(target_map, *address, - (*address) + size, + vm_map_submap(target_map, map_addr, + map_addr + map_size, named_entry->backing.map, - (vm_offset_t)offset, FALSE); + (vm_map_offset_t)offset, FALSE); } if(copy) { if(vm_map_lookup_entry( - target_map, *address, &map_entry)) { + target_map, map_addr, &map_entry)) { map_entry->needs_copy = TRUE; } } + *address = map_addr; } return(result); - } else if(named_entry->object) { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - named_entry_unlock(named_entry); - vm_object_reference(named_entry->object); - object = named_entry->object; - } else { + } else if (named_entry->is_pager) { unsigned int access; vm_prot_t protections; unsigned int wimg_mode; - boolean_t cache_attr; + boolean_t cache_attr; protections = named_entry->protection & VM_PROT_ALL; @@ -526,11 +937,13 @@ vm_map_64( return(KERN_INVALID_OBJECT); } - vm_object_lock(object); + /* JMM - drop reference on pager here */ /* create an extra ref for the named entry */ + vm_object_lock(object); vm_object_reference_locked(object); - named_entry->object = object; + named_entry->backing.object = object; + named_entry->is_pager = FALSE; named_entry_unlock(named_entry); wimg_mode = object->wimg_bits; @@ -549,15 +962,16 @@ vm_map_64( else cache_attr = FALSE; - if (named_entry->backing.pager) { - /* wait for object (if any) to be ready */ + /* wait for object (if any) to be ready */ + if (!named_entry->internal) { while (!object->pager_ready) { vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); vm_object_lock(object); } } + if(object->wimg_bits != wimg_mode) { vm_page_t p; @@ -566,12 +980,9 @@ vm_map_64( object->wimg_bits = wimg_mode; queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { - pmap_page_protect( - p->phys_page, - VM_PROT_NONE); - if(cache_attr) - pmap_sync_caches_phys( - p->phys_page); + pmap_disconnect(p->phys_page); + if (cache_attr) + pmap_sync_page_attributes_phys(p->phys_page); } } } @@ -579,6 +990,17 @@ vm_map_64( if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); + } else { + /* This is the case where we are going to map */ + /* an already mapped object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + assert(object != VM_OBJECT_NULL); + named_entry_unlock(named_entry); + vm_object_reference(object); } } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { /* @@ -615,9 +1037,6 @@ vm_map_64( return (KERN_INVALID_OBJECT); } - *address = trunc_page_32(*address); - size = round_page_64(size); - /* * Perform the copy if requested */ @@ -669,15 +1088,50 @@ vm_map_64( } if ((result = vm_map_enter(target_map, - address, size, mask, flags, - object, offset, - copy, - cur_protection, max_protection, inheritance - )) != KERN_SUCCESS) - vm_object_deallocate(object); + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, inheritance + )) != KERN_SUCCESS) + vm_object_deallocate(object); + *address = map_addr; return(result); } + +/* legacy interface */ +kern_return_t +vm_map_64( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + mach_vm_address_t map_addr; + mach_vm_size_t map_size; + mach_vm_offset_t map_mask; + kern_return_t kr; + + map_addr = (mach_vm_address_t)*address; + map_size = (mach_vm_size_t)size; + map_mask = (mach_vm_offset_t)mask; + + kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + port, offset, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN(vm_address_t, map_addr); + return kr; +} + /* temporary, until world build */ kern_return_t vm_map( @@ -693,18 +1147,163 @@ vm_map( vm_prot_t max_protection, vm_inherit_t inheritance) { - return vm_map_64(target_map, address, size, mask, flags, - port, (vm_object_offset_t)offset, copy, - cur_protection, max_protection, inheritance); + mach_vm_address_t map_addr; + mach_vm_size_t map_size; + mach_vm_offset_t map_mask; + vm_object_offset_t obj_offset; + kern_return_t kr; + + map_addr = (mach_vm_address_t)*address; + map_size = (mach_vm_size_t)size; + map_mask = (mach_vm_offset_t)mask; + obj_offset = (vm_object_offset_t)offset; + + kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, + port, obj_offset, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN(vm_address_t, map_addr); + return kr; +} + +/* + * mach_vm_remap - + * Remap a range of memory from one task into another, + * to another address range within the same task, or + * over top of itself (with altered permissions and/or + * as an in-place copy of itself). + */ + +kern_return_t +mach_vm_remap( + vm_map_t target_map, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + boolean_t anywhere, + vm_map_t src_map, + mach_vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_map_offset_t map_addr; + kern_return_t kr; + + if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_offset_t)*address; + + kr = vm_map_remap(target_map, + &map_addr, + size, + mask, + anywhere, + src_map, + memory_address, + copy, + cur_protection, + max_protection, + inheritance); + *address = map_addr; + return kr; } +/* + * vm_remap - + * Remap a range of memory from one task into another, + * to another address range within the same task, or + * over top of itself (with altered permissions and/or + * as an in-place copy of itself). + * + * The addressability of the source and target address + * range is limited by the size of vm_address_t (in the + * kernel context). + */ +kern_return_t +vm_remap( + vm_map_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + boolean_t anywhere, + vm_map_t src_map, + vm_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + vm_map_offset_t map_addr; + kern_return_t kr; + + if (VM_MAP_NULL == target_map || VM_MAP_NULL == src_map) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_offset_t)*address; + + kr = vm_map_remap(target_map, + &map_addr, + size, + mask, + anywhere, + src_map, + memory_address, + copy, + cur_protection, + max_protection, + inheritance); + *address = CAST_DOWN(vm_offset_t, map_addr); + return kr; +} /* - * NOTE: this routine (and this file) will no longer require mach_host_server.h - * when vm_wire is changed to use ledgers. + * NOTE: these routine (and this file) will no longer require mach_host_server.h + * when mach_vm_wire and vm_wire are changed to use ledgers. */ #include /* + * mach_vm_wire + * Specify that the range of the virtual address space + * of the target task must not cause page faults for + * the indicated accesses. + * + * [ To unwire the pages, specify VM_PROT_NONE. ] + */ +kern_return_t +mach_vm_wire( + host_priv_t host_priv, + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_prot_t access) +{ + kern_return_t rc; + + if (host_priv == HOST_PRIV_NULL) + return KERN_INVALID_HOST; + + assert(host_priv == &realhost); + + if (map == VM_MAP_NULL) + return KERN_INVALID_TASK; + + if (access & ~VM_PROT_ALL) + return KERN_INVALID_ARGUMENT; + + if (access != VM_PROT_NONE) { + rc = vm_map_wire(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), access, TRUE); + } else { + rc = vm_map_unwire(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), TRUE); + } + return rc; +} + +/* + * vm_wire - * Specify that the range of the virtual address space * of the target task must not cause page faults for * the indicated accesses. @@ -729,15 +1328,17 @@ vm_wire( if (map == VM_MAP_NULL) return KERN_INVALID_TASK; - if (access & ~VM_PROT_ALL) + if ((access & ~VM_PROT_ALL) || (start + size < start)) return KERN_INVALID_ARGUMENT; - if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, trunc_page_32(start), - round_page_32(start+size), access, TRUE); + if (size == 0) { + rc = KERN_SUCCESS; + } else if (access != VM_PROT_NONE) { + rc = vm_map_wire(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), access, TRUE); } else { - rc = vm_map_unwire(map, trunc_page_32(start), - round_page_32(start+size), TRUE); + rc = vm_map_unwire(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), TRUE); } return rc; } @@ -747,10 +1348,53 @@ vm_wire( * * Synchronises the memory range specified with its backing store * image by either flushing or cleaning the contents to the appropriate - * memory manager engaging in a memory object synchronize dialog with - * the manager. The client doesn't return until the manager issues - * m_o_s_completed message. MIG Magically converts user task parameter - * to the task's address map. + * memory manager. + * + * interpretation of sync_flags + * VM_SYNC_INVALIDATE - discard pages, only return precious + * pages to manager. + * + * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) + * - discard pages, write dirty or precious + * pages back to memory manager. + * + * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS + * - write dirty or precious pages back to + * the memory manager. + * + * VM_SYNC_CONTIGUOUS - does everything normally, but if there + * is a hole in the region, and we would + * have returned KERN_SUCCESS, return + * KERN_INVALID_ADDRESS instead. + * + * RETURNS + * KERN_INVALID_TASK Bad task parameter + * KERN_INVALID_ARGUMENT both sync and async were specified. + * KERN_SUCCESS The usual. + * KERN_INVALID_ADDRESS There was a hole in the region. + */ + +kern_return_t +mach_vm_msync( + vm_map_t map, + mach_vm_address_t address, + mach_vm_size_t size, + vm_sync_t sync_flags) +{ + + if (map == VM_MAP_NULL) + return(KERN_INVALID_TASK); + + return vm_map_msync(map, (vm_map_address_t)address, + (vm_map_size_t)size, sync_flags); +} + +/* + * vm_msync + * + * Synchronises the memory range specified with its backing store + * image by either flushing or cleaning the contents to the appropriate + * memory manager. * * interpretation of sync_flags * VM_SYNC_INVALIDATE - discard pages, only return precious @@ -764,14 +1408,19 @@ vm_wire( * - write dirty or precious pages back to * the memory manager. * - * NOTE - * The memory object attributes have not yet been implemented, this - * function will have to deal with the invalidate attribute + * VM_SYNC_CONTIGUOUS - does everything normally, but if there + * is a hole in the region, and we would + * have returned KERN_SUCCESS, return + * KERN_INVALID_ADDRESS instead. + * + * The addressability of the range is limited to that which can + * be described by a vm_address_t. * * RETURNS * KERN_INVALID_TASK Bad task parameter * KERN_INVALID_ARGUMENT both sync and async were specified. * KERN_SUCCESS The usual. + * KERN_INVALID_ADDRESS There was a hole in the region. */ kern_return_t @@ -781,298 +1430,356 @@ vm_msync( vm_size_t size, vm_sync_t sync_flags) { - msync_req_t msr; - msync_req_t new_msr; - queue_chain_t req_q; /* queue of requests for this msync */ - vm_map_entry_t entry; - vm_size_t amount_left; - vm_object_offset_t offset; - boolean_t do_sync_req; - boolean_t modifiable; - - if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && - (sync_flags & VM_SYNC_SYNCHRONOUS)) - return(KERN_INVALID_ARGUMENT); + if (map == VM_MAP_NULL) + return(KERN_INVALID_TASK); - /* - * align address and size on page boundaries - */ - size = round_page_32(address + size) - trunc_page_32(address); - address = trunc_page_32(address); + return vm_map_msync(map, (vm_map_address_t)address, + (vm_map_size_t)size, sync_flags); +} - if (map == VM_MAP_NULL) - return(KERN_INVALID_TASK); + +/* + * mach_vm_behavior_set + * + * Sets the paging behavior attribute for the specified range + * in the specified map. + * + * This routine will fail with KERN_INVALID_ADDRESS if any address + * in [start,start+size) is not a valid allocated memory region. + */ +kern_return_t +mach_vm_behavior_set( + vm_map_t map, + mach_vm_offset_t start, + mach_vm_size_t size, + vm_behavior_t new_behavior) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); if (size == 0) - return(KERN_SUCCESS); + return KERN_SUCCESS; - queue_init(&req_q); - amount_left = size; + return(vm_map_behavior_set(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), new_behavior)); +} - while (amount_left > 0) { - vm_size_t flush_size; - vm_object_t object; +/* + * vm_behavior_set + * + * Sets the paging behavior attribute for the specified range + * in the specified map. + * + * This routine will fail with KERN_INVALID_ADDRESS if any address + * in [start,start+size) is not a valid allocated memory region. + * + * This routine is potentially limited in addressibility by the + * use of vm_offset_t (if the map provided is larger than the + * kernel's). + */ +kern_return_t +vm_behavior_set( + vm_map_t map, + vm_offset_t start, + vm_size_t size, + vm_behavior_t new_behavior) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); - vm_map_lock(map); - if (!vm_map_lookup_entry(map, address, &entry)) { - vm_size_t skip; + if (size == 0) + return KERN_SUCCESS; - /* - * hole in the address map. - */ + return(vm_map_behavior_set(map, vm_map_trunc_page(start), + vm_map_round_page(start+size), new_behavior)); +} - /* - * Check for empty map. - */ - if (entry == vm_map_to_entry(map) && - entry->vme_next == entry) { - vm_map_unlock(map); - break; - } - /* - * Check that we don't wrap and that - * we have at least one real map entry. - */ - if ((map->hdr.nentries == 0) || - (entry->vme_next->vme_start < address)) { - vm_map_unlock(map); - break; - } - /* - * Move up to the next entry if needed - */ - skip = (entry->vme_next->vme_start - address); - if (skip >= amount_left) - amount_left = 0; - else - amount_left -= skip; - address = entry->vme_next->vme_start; - vm_map_unlock(map); - continue; - } +/* + * mach_vm_region: + * + * User call to obtain information about a region in + * a task's address map. Currently, only one flavor is + * supported. + * + * XXX The reserved and behavior fields cannot be filled + * in until the vm merge from the IK is completed, and + * vm_reserve is implemented. + * + * XXX Dependency: syscall_vm_region() also supports only one flavor. + */ - offset = address - entry->vme_start; +kern_return_t +mach_vm_region( + vm_map_t map, + mach_vm_offset_t *address, /* IN/OUT */ + mach_vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - /* - * do we have more to flush than is contained in this - * entry ? - */ - if (amount_left + entry->vme_start + offset > entry->vme_end) { - flush_size = entry->vme_end - - (entry->vme_start + offset); - } else { - flush_size = amount_left; - } - amount_left -= flush_size; - address += flush_size; + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - if (entry->is_sub_map == TRUE) { - vm_map_t local_map; - vm_offset_t local_offset; + map_addr = (vm_map_offset_t)*address; + map_size = (vm_map_size_t)*size; - local_map = entry->object.sub_map; - local_offset = entry->offset; - vm_map_unlock(map); - vm_msync( - local_map, - local_offset, - flush_size, - sync_flags); - continue; - } - object = entry->object.vm_object; + /* legacy conversion */ + if (VM_REGION_BASIC_INFO == flavor) + flavor = VM_REGION_BASIC_INFO_64; - /* - * We can't sync this object if the object has not been - * created yet - */ - if (object == VM_OBJECT_NULL) { - vm_map_unlock(map); - continue; - } - offset += entry->offset; - modifiable = (entry->protection & VM_PROT_WRITE) - != VM_PROT_NONE; + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); - vm_object_lock(object); + *address = map_addr; + *size = map_size; + return kr; +} - if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { - boolean_t kill_pages = 0; +/* + * vm_region_64 and vm_region: + * + * User call to obtain information about a region in + * a task's address map. Currently, only one flavor is + * supported. + * + * XXX The reserved and behavior fields cannot be filled + * in until the vm merge from the IK is completed, and + * vm_reserve is implemented. + * + * XXX Dependency: syscall_vm_region() also supports only one flavor. + */ - if (sync_flags & VM_SYNC_KILLPAGES) { - if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) - kill_pages = 1; - else - kill_pages = -1; - } - if (kill_pages != -1) - vm_object_deactivate_pages(object, offset, - (vm_object_size_t)flush_size, kill_pages); - vm_object_unlock(object); - vm_map_unlock(map); - continue; - } - /* - * We can't sync this object if there isn't a pager. - * Don't bother to sync internal objects, since there can't - * be any "permanent" storage for these objects anyway. - */ - if ((object->pager == MEMORY_OBJECT_NULL) || - (object->internal) || (object->private)) { - vm_object_unlock(object); - vm_map_unlock(map); - continue; - } - /* - * keep reference on the object until syncing is done - */ - assert(object->ref_count > 0); - object->ref_count++; - vm_object_res_reference(object); - vm_object_unlock(object); +kern_return_t +vm_region_64( + vm_map_t map, + vm_offset_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_offset_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - vm_map_unlock(map); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - do_sync_req = vm_object_sync(object, - offset, - flush_size, - sync_flags & VM_SYNC_INVALIDATE, - (modifiable && - (sync_flags & VM_SYNC_SYNCHRONOUS || - sync_flags & VM_SYNC_ASYNCHRONOUS))); + map_addr = (vm_map_offset_t)*address; + map_size = (vm_map_size_t)*size; - /* - * only send a m_o_s if we returned pages or if the entry - * is writable (ie dirty pages may have already been sent back) - */ - if (!do_sync_req && !modifiable) { - vm_object_deallocate(object); - continue; - } - msync_req_alloc(new_msr); + /* legacy conversion */ + if (VM_REGION_BASIC_INFO == flavor) + flavor = VM_REGION_BASIC_INFO_64; - vm_object_lock(object); - offset += object->paging_offset; + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); - new_msr->offset = offset; - new_msr->length = flush_size; - new_msr->object = object; - new_msr->flag = VM_MSYNC_SYNCHRONIZING; -re_iterate: - queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { - /* - * need to check for overlapping entry, if found, wait - * on overlapping msr to be done, then reiterate - */ - msr_lock(msr); - if (msr->flag == VM_MSYNC_SYNCHRONIZING && - ((offset >= msr->offset && - offset < (msr->offset + msr->length)) || - (msr->offset >= offset && - msr->offset < (offset + flush_size)))) - { - assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); - msr_unlock(msr); - vm_object_unlock(object); - thread_block((void (*)(void))0); - vm_object_lock(object); - goto re_iterate; - } - msr_unlock(msr); - }/* queue_iterate */ + *address = CAST_DOWN(vm_offset_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); - queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); - vm_object_unlock(object); + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} - queue_enter(&req_q, new_msr, msync_req_t, req_q); +kern_return_t +vm_region( + vm_map_t map, + vm_address_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - (void) memory_object_synchronize( - object->pager, - offset, - flush_size, - sync_flags); - }/* while */ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - /* - * wait for memory_object_sychronize_completed messages from pager(s) - */ + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; - while (!queue_empty(&req_q)) { - msr = (msync_req_t)queue_first(&req_q); - msr_lock(msr); - while(msr->flag != VM_MSYNC_DONE) { - assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); - msr_unlock(msr); - thread_block((void (*)(void))0); - msr_lock(msr); - }/* while */ - queue_remove(&req_q, msr, msync_req_t, req_q); - msr_unlock(msr); - vm_object_deallocate(msr->object); - msync_req_free(msr); - }/* queue_iterate */ + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); - return(KERN_SUCCESS); -}/* vm_msync */ + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} /* - * task_wire + * vm_region_recurse: A form of vm_region which follows the + * submaps in a target map * - * Set or clear the map's wiring_required flag. This flag, if set, - * will cause all future virtual memory allocation to allocate - * user wired memory. Unwiring pages wired down as a result of - * this routine is done with the vm_wire interface. */ kern_return_t -task_wire( - vm_map_t map, - boolean_t must_wire) +mach_vm_region_recurse( + vm_map_t map, + mach_vm_address_t *address, + mach_vm_size_t *size, + uint32_t *depth, + vm_region_recurse_info_t info, + mach_msg_type_number_t *infoCnt) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; - if (must_wire) - map->wiring_required = TRUE; - else - map->wiring_required = FALSE; + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - return(KERN_SUCCESS); + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region_recurse_64( + map, + &map_addr, + &map_size, + depth, + (vm_region_submap_info_64_t)info, + infoCnt); + + *address = map_addr; + *size = map_size; + return kr; } /* - * vm_behavior_set sets the paging behavior attribute for the - * specified range in the specified map. This routine will fail - * with KERN_INVALID_ADDRESS if any address in [start,start+size) - * is not a valid allocated or reserved memory region. + * vm_region_recurse: A form of vm_region which follows the + * submaps in a target map + * */ -kern_return_t -vm_behavior_set( - vm_map_t map, - vm_offset_t start, - vm_size_t size, - vm_behavior_t new_behavior) +kern_return_t +vm_region_recurse_64( + vm_map_t map, + vm_address_t *address, + vm_size_t *size, + uint32_t *depth, + vm_region_recurse_info_64_t info, + mach_msg_type_number_t *infoCnt) { - if (map == VM_MAP_NULL) - return(KERN_INVALID_ARGUMENT); + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region_recurse_64( + map, + &map_addr, + &map_size, + depth, + (vm_region_submap_info_64_t)info, + infoCnt); - return(vm_map_behavior_set(map, trunc_page_32(start), - round_page_32(start+size), new_behavior)); + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; } -#if VM_CPM -/* - * Control whether the kernel will permit use of - * vm_allocate_cpm at all. - */ -unsigned int vm_allocate_cpm_enabled = 1; +kern_return_t +vm_region_recurse( + vm_map_t map, + vm_offset_t *address, /* IN/OUT */ + vm_size_t *size, /* OUT */ + natural_t *depth, /* IN/OUT */ + vm_region_recurse_info_t info32, /* IN/OUT */ + mach_msg_type_number_t *infoCnt) /* IN/OUT */ +{ + vm_region_submap_info_data_64_t info64; + vm_region_submap_info_t info; + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map || *infoCnt < VM_REGION_SUBMAP_INFO_COUNT) + return KERN_INVALID_ARGUMENT; + + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + info = (vm_region_submap_info_t)info32; + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT_64; + + kr = vm_map_region_recurse_64(map, &map_addr,&map_size, + depth, &info64, infoCnt); + + info->protection = info64.protection; + info->max_protection = info64.max_protection; + info->inheritance = info64.inheritance; + info->offset = (uint32_t)info64.offset; /* trouble-maker */ + info->user_tag = info64.user_tag; + info->pages_resident = info64.pages_resident; + info->pages_shared_now_private = info64.pages_shared_now_private; + info->pages_swapped_out = info64.pages_swapped_out; + info->pages_dirtied = info64.pages_dirtied; + info->ref_count = info64.ref_count; + info->shadow_depth = info64.shadow_depth; + info->external_pager = info64.external_pager; + info->share_mode = info64.share_mode; + info->is_submap = info64.is_submap; + info->behavior = info64.behavior; + info->object_id = info64.object_id; + info->user_wired_count = info64.user_wired_count; + + *address = CAST_DOWN(vm_address_t, map_addr); + *size = CAST_DOWN(vm_size_t, map_size); + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT; + + if (KERN_SUCCESS == kr && map_addr + map_size > VM_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +vm_purgable_control( + vm_map_t map, + vm_offset_t address, + vm_purgable_t control, + int *state) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_purgable_control(map, + vm_map_trunc_page(address), + control, + state); +} + /* * Ordinarily, the right to allocate CPM is restricted * to privileged applications (those that can gain access - * to the host port). Set this variable to zero if you - * want to let any application allocate CPM. + * to the host priv port). Set this variable to zero if + * you want to let any application allocate CPM. */ unsigned int vm_allocate_cpm_privileged = 0; @@ -1089,363 +1796,244 @@ unsigned int vm_allocate_cpm_privileged = 0; kern_return_t vm_allocate_cpm( host_priv_t host_priv, - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, + vm_map_t map, + vm_address_t *addr, + vm_size_t size, int flags) { - vm_object_t cpm_obj; - pmap_t pmap; - vm_page_t m, pages; + vm_map_address_t map_addr; + vm_map_size_t map_size; kern_return_t kr; - vm_offset_t va, start, end, offset; -#if MACH_ASSERT - extern vm_offset_t avail_start, avail_end; - vm_offset_t prev_addr; -#endif /* MACH_ASSERT */ - - boolean_t anywhere = VM_FLAGS_ANYWHERE & flags; - - if (!vm_allocate_cpm_enabled) - return KERN_FAILURE; - if (vm_allocate_cpm_privileged && host_priv == HOST_PRIV_NULL) + if (vm_allocate_cpm_privileged && HOST_PRIV_NULL == host_priv) return KERN_INVALID_HOST; - if (map == VM_MAP_NULL) + if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - - assert(host_priv == &realhost); - - if (size == 0) { - *addr = 0; - return KERN_SUCCESS; - } - - if (anywhere) - *addr = vm_map_min(map); - else - *addr = trunc_page_32(*addr); - size = round_page_32(size); - - if ((kr = cpm_allocate(size, &pages, TRUE)) != KERN_SUCCESS) - return kr; - - cpm_obj = vm_object_allocate(size); - assert(cpm_obj != VM_OBJECT_NULL); - assert(cpm_obj->internal); - assert(cpm_obj->size == size); - assert(cpm_obj->can_persist == FALSE); - assert(cpm_obj->pager_created == FALSE); - assert(cpm_obj->pageout == FALSE); - assert(cpm_obj->shadow == VM_OBJECT_NULL); - - /* - * Insert pages into object. - */ - - vm_object_lock(cpm_obj); - for (offset = 0; offset < size; offset += PAGE_SIZE) { - m = pages; - pages = NEXT_PAGE(m); - - assert(!m->gobbled); - assert(!m->wanted); - assert(!m->pageout); - assert(!m->tabled); - assert(m->busy); - assert(m->phys_page>=avail_start && m->phys_page<=avail_end); - - m->busy = FALSE; - vm_page_insert(m, cpm_obj, offset); - } - assert(cpm_obj->resident_page_count == size / PAGE_SIZE); - vm_object_unlock(cpm_obj); - - /* - * Hang onto a reference on the object in case a - * multi-threaded application for some reason decides - * to deallocate the portion of the address space into - * which we will insert this object. - * - * Unfortunately, we must insert the object now before - * we can talk to the pmap module about which addresses - * must be wired down. Hence, the race with a multi- - * threaded app. - */ - vm_object_reference(cpm_obj); - - /* - * Insert object into map. - */ - - kr = vm_map_enter( - map, - addr, - size, - (vm_offset_t)0, - flags, - cpm_obj, - (vm_object_offset_t)0, - FALSE, - VM_PROT_ALL, - VM_PROT_ALL, - VM_INHERIT_DEFAULT); - - if (kr != KERN_SUCCESS) { - /* - * A CPM object doesn't have can_persist set, - * so all we have to do is deallocate it to - * free up these pages. - */ - assert(cpm_obj->pager_created == FALSE); - assert(cpm_obj->can_persist == FALSE); - assert(cpm_obj->pageout == FALSE); - assert(cpm_obj->shadow == VM_OBJECT_NULL); - vm_object_deallocate(cpm_obj); /* kill acquired ref */ - vm_object_deallocate(cpm_obj); /* kill creation ref */ - } - - /* - * Inform the physical mapping system that the - * range of addresses may not fault, so that - * page tables and such can be locked down as well. - */ - start = *addr; - end = start + size; - pmap = vm_map_pmap(map); - pmap_pageable(pmap, start, end, FALSE); - - /* - * Enter each page into the pmap, to avoid faults. - * Note that this loop could be coded more efficiently, - * if the need arose, rather than looking up each page - * again. - */ - for (offset = 0, va = start; offset < size; - va += PAGE_SIZE, offset += PAGE_SIZE) { - vm_object_lock(cpm_obj); - m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); - vm_object_unlock(cpm_obj); - assert(m != VM_PAGE_NULL); - PMAP_ENTER(pmap, va, m, VM_PROT_ALL, - ((unsigned int)(m->object->wimg_bits)) & VM_WIMG_MASK, - TRUE); - } -#if MACH_ASSERT - /* - * Verify ordering in address space. - */ - for (offset = 0; offset < size; offset += PAGE_SIZE) { - vm_object_lock(cpm_obj); - m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); - vm_object_unlock(cpm_obj); - if (m == VM_PAGE_NULL) - panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", - cpm_obj, offset); - assert(m->tabled); - assert(!m->busy); - assert(!m->wanted); - assert(!m->fictitious); - assert(!m->private); - assert(!m->absent); - assert(!m->error); - assert(!m->cleaning); - assert(!m->precious); - assert(!m->clustered); - if (offset != 0) { - if (m->phys_page != prev_addr + 1) { - printf("start 0x%x end 0x%x va 0x%x\n", - start, end, va); - printf("obj 0x%x off 0x%x\n", cpm_obj, offset); - printf("m 0x%x prev_address 0x%x\n", m, - prev_addr); - panic("vm_allocate_cpm: pages not contig!"); - } - } - prev_addr = m->phys_page; - } -#endif /* MACH_ASSERT */ + map_addr = (vm_map_address_t)*addr; + map_size = (vm_map_size_t)size; - vm_object_deallocate(cpm_obj); /* kill extra ref */ + kr = vm_map_enter_cpm(map, + &map_addr, + map_size, + flags); + *addr = CAST_DOWN(vm_address_t, map_addr); return kr; } -#else /* VM_CPM */ +kern_return_t +mach_vm_page_query( + vm_map_t map, + mach_vm_offset_t offset, + int *disposition, + int *ref_count) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; -/* - * Interface is defined in all cases, but unless the kernel - * is built explicitly for this option, the interface does - * nothing. - */ + return vm_map_page_info(map, + vm_map_trunc_page(offset), + disposition, ref_count); +} kern_return_t -vm_allocate_cpm( - host_priv_t host_priv, - register vm_map_t map, - register vm_offset_t *addr, - register vm_size_t size, - int flags) +vm_map_page_query( + vm_map_t map, + vm_offset_t offset, + int *disposition, + int *ref_count) { - return KERN_FAILURE; + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_page_info(map, + vm_map_trunc_page(offset), + disposition, ref_count); } -/* - */ +/* map a (whole) upl into an address space */ kern_return_t -mach_memory_object_memory_entry_64( - host_t host, - boolean_t internal, - vm_object_offset_t size, - vm_prot_t permission, - memory_object_t pager, - ipc_port_t *entry_handle) +vm_upl_map( + vm_map_t map, + upl_t upl, + vm_offset_t *dst_addr) { - unsigned int access; - vm_named_entry_t user_object; - ipc_port_t user_handle; - ipc_port_t previous; + vm_map_offset_t map_addr; kern_return_t kr; - if (host == HOST_NULL) - return(KERN_INVALID_HOST); + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) - return KERN_FAILURE; - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); - ip_lock(user_handle); + kr = vm_map_enter_upl(map, upl, &map_addr); + *dst_addr = CAST_DOWN(vm_offset_t, map_addr); + return kr; +} - /* make a sonce right */ - user_handle->ip_sorights++; - ip_reference(user_handle); +kern_return_t +vm_upl_unmap( + vm_map_t map, + upl_t upl) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - user_handle->ip_destination = IP_NULL; - user_handle->ip_receiver_name = MACH_PORT_NULL; - user_handle->ip_receiver = ipc_space_kernel; + return (vm_map_remove_upl(map, upl)); +} - /* make a send right */ - user_handle->ip_mscount++; - user_handle->ip_srights++; - ip_reference(user_handle); +/* Retrieve a upl for an object underlying an address range in a map */ + +kern_return_t +vm_map_get_upl( + vm_map_t map, + vm_address_t offset, + upl_size_t *upl_size, + upl_t *upl, + upl_page_info_array_t page_list, + unsigned int *count, + int *flags, + int force_data_sync) +{ + vm_map_offset_t map_offset; + int map_flags; + kern_return_t kr; - ipc_port_nsrequest(user_handle, 1, user_handle, &previous); - /* nsrequest unlocks user_handle */ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; - user_object->object = NULL; - user_object->size = size; - user_object->offset = 0; - user_object->backing.pager = pager; - user_object->protection = permission & VM_PROT_ALL; - access = GET_MAP_MEM(permission); - SET_MAP_MEM(access, user_object->protection); - user_object->internal = internal; - user_object->is_sub_map = FALSE; - user_object->ref_count = 1; + map_offset = (vm_map_offset_t)offset; + map_flags = *flags & ~UPL_NOZEROFILL; + if (force_data_sync) + map_flags |= UPL_FORCE_DATA_SYNC; - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); - *entry_handle = user_handle; - return KERN_SUCCESS; -} + kr = vm_map_create_upl(map, + map_offset, + upl_size, + upl, + page_list, + count, + &map_flags); -kern_return_t -mach_memory_object_memory_entry( - host_t host, - boolean_t internal, - vm_size_t size, - vm_prot_t permission, - memory_object_t pager, - ipc_port_t *entry_handle) -{ - return mach_memory_object_memory_entry_64( host, internal, - (vm_object_offset_t)size, permission, pager, entry_handle); + *flags = (map_flags & ~UPL_FORCE_DATA_SYNC); + return kr; } +__private_extern__ kern_return_t +mach_memory_entry_allocate( + vm_named_entry_t *user_entry_p, + ipc_port_t *user_handle_p); /* forward */ /* + * mach_make_memory_entry_64 + * + * Think of it as a two-stage vm_remap() operation. First + * you get a handle. Second, you get map that handle in + * somewhere else. Rather than doing it all at once (and + * without needing access to the other whole map). */ kern_return_t mach_make_memory_entry_64( vm_map_t target_map, - vm_object_size_t *size, - vm_object_offset_t offset, + memory_object_size_t *size, + memory_object_offset_t offset, vm_prot_t permission, ipc_port_t *object_handle, - ipc_port_t parent_entry) + ipc_port_t parent_handle) { vm_map_version_t version; - vm_named_entry_t user_object; + vm_named_entry_t parent_entry; + vm_named_entry_t user_entry; ipc_port_t user_handle; - ipc_port_t previous; kern_return_t kr; - vm_map_t pmap_map; + vm_map_t real_map; /* needed for call to vm_map_lookup_locked */ - boolean_t wired; + boolean_t wired; vm_object_offset_t obj_off; - vm_prot_t prot; - vm_object_offset_t lo_offset, hi_offset; + vm_prot_t prot; + vm_map_offset_t lo_offset, hi_offset; vm_behavior_t behavior; - vm_object_t object; - vm_object_t shadow_object; + vm_object_t object; + vm_object_t shadow_object; /* needed for direct map entry manipulation */ vm_map_entry_t map_entry; vm_map_entry_t next_entry; - vm_map_t local_map; - vm_map_t original_map = target_map; - vm_offset_t local_offset; + vm_map_t local_map; + vm_map_t original_map = target_map; + vm_map_size_t total_size; + vm_map_size_t map_size; + vm_map_offset_t map_offset; + vm_map_offset_t local_offset; vm_object_size_t mappable_size; - vm_object_size_t total_size; - unsigned int access; - vm_prot_t protections; - unsigned int wimg_mode; - boolean_t cache_attr; + unsigned int access; + vm_prot_t protections; + unsigned int wimg_mode; + boolean_t cache_attr = FALSE; + + if (((permission & 0x00FF0000) & + ~(MAP_MEM_ONLY | + MAP_MEM_NAMED_CREATE | + MAP_MEM_PURGABLE | + MAP_MEM_NAMED_REUSE))) { + /* + * Unknown flag: reject for forward compatibility. + */ + return KERN_INVALID_VALUE; + } + + if (parent_handle != IP_NULL && + ip_kotype(parent_handle) == IKOT_NAMED_ENTRY) { + parent_entry = (vm_named_entry_t) parent_handle->ip_kobject; + } else { + parent_entry = NULL; + } protections = permission & VM_PROT_ALL; access = GET_MAP_MEM(permission); + user_handle = IP_NULL; + user_entry = NULL; + + map_offset = vm_map_trunc_page(offset); + map_size = vm_map_round_page(*size); - offset = trunc_page_64(offset); - *size = round_page_64(*size); + if (permission & MAP_MEM_ONLY) { + boolean_t parent_is_object; - if((parent_entry != NULL) - && (permission & MAP_MEM_ONLY)) { - vm_named_entry_t parent_object; - if(ip_kotype(parent_entry) != IKOT_NAMED_ENTRY) { + if (parent_entry == NULL) { return KERN_INVALID_ARGUMENT; } - parent_object = (vm_named_entry_t)parent_entry->ip_kobject; - object = parent_object->object; - if(object != VM_OBJECT_NULL) + + parent_is_object = !(parent_entry->is_sub_map || parent_entry->is_pager); + object = parent_entry->backing.object; + if(parent_is_object && object != VM_OBJECT_NULL) wimg_mode = object->wimg_bits; - if((access != GET_MAP_MEM(parent_object->protection)) && - !(parent_object->protection & VM_PROT_WRITE)) { + else + wimg_mode = VM_WIMG_DEFAULT; + if((access != GET_MAP_MEM(parent_entry->protection)) && + !(parent_entry->protection & VM_PROT_WRITE)) { return KERN_INVALID_RIGHT; } if(access == MAP_MEM_IO) { - SET_MAP_MEM(access, parent_object->protection); + SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_IO; } else if (access == MAP_MEM_COPYBACK) { - SET_MAP_MEM(access, parent_object->protection); + SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_DEFAULT; } else if (access == MAP_MEM_WTHRU) { - SET_MAP_MEM(access, parent_object->protection); + SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_WTHRU; } else if (access == MAP_MEM_WCOMB) { - SET_MAP_MEM(access, parent_object->protection); + SET_MAP_MEM(access, parent_entry->protection); wimg_mode = VM_WIMG_WCOMB; } - if(object && + if(parent_is_object && object && (access != MAP_MEM_NOOP) && (!(object->nophyscache))) { if(object->wimg_bits != wimg_mode) { @@ -1456,94 +2044,121 @@ mach_make_memory_entry_64( else cache_attr = FALSE; vm_object_lock(object); - while(object->paging_in_progress) { - vm_object_unlock(object); - vm_object_wait(object, - VM_OBJECT_EVENT_PAGING_IN_PROGRESS, - THREAD_UNINT); - vm_object_lock(object); - } + vm_object_paging_wait(object, THREAD_UNINT); object->wimg_bits = wimg_mode; queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { - pmap_page_protect( - p->phys_page, - VM_PROT_NONE); - if(cache_attr) - pmap_sync_caches_phys( - p->phys_page); + pmap_disconnect(p->phys_page); + if (cache_attr) + pmap_sync_page_attributes_phys(p->phys_page); } } vm_object_unlock(object); } } + if (object_handle) + *object_handle = IP_NULL; return KERN_SUCCESS; } - if(permission & MAP_MEM_ONLY) { - return KERN_INVALID_ARGUMENT; - } + if(permission & MAP_MEM_NAMED_CREATE) { + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + return KERN_FAILURE; + } - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) - return KERN_FAILURE; - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); - ip_lock(user_handle); + /* + * Force the creation of the VM object now. + */ + if (map_size > (vm_map_size_t) VM_MAX_ADDRESS) { + /* + * LP64todo - for now, we can only allocate 4GB + * internal objects because the default pager can't + * page bigger ones. Remove this when it can. + */ + kr = KERN_FAILURE; + goto make_mem_done; + } - /* make a sonce right */ - user_handle->ip_sorights++; - ip_reference(user_handle); + object = vm_object_allocate(map_size); + assert(object != VM_OBJECT_NULL); - user_handle->ip_destination = IP_NULL; - user_handle->ip_receiver_name = MACH_PORT_NULL; - user_handle->ip_receiver = ipc_space_kernel; + if (permission & MAP_MEM_PURGABLE) { + if (! (permission & VM_PROT_WRITE)) { + /* if we can't write, we can't purge */ + vm_object_deallocate(object); + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + object->purgable = VM_OBJECT_PURGABLE_NONVOLATILE; + } - /* make a send right */ - user_handle->ip_mscount++; - user_handle->ip_srights++; - ip_reference(user_handle); + /* + * The VM object is brand new and nobody else knows about it, + * so we don't need to lock it. + */ - ipc_port_nsrequest(user_handle, 1, user_handle, &previous); - /* nsrequest unlocks user_handle */ + wimg_mode = object->wimg_bits; + if (access == MAP_MEM_IO) { + wimg_mode = VM_WIMG_IO; + } else if (access == MAP_MEM_COPYBACK) { + wimg_mode = VM_WIMG_DEFAULT; + } else if (access == MAP_MEM_WTHRU) { + wimg_mode = VM_WIMG_WTHRU; + } else if (access == MAP_MEM_WCOMB) { + wimg_mode = VM_WIMG_WCOMB; + } + if (access != MAP_MEM_NOOP) { + object->wimg_bits = wimg_mode; + } + /* the object has no pages, so no WIMG bits to update here */ - user_object->backing.pager = NULL; - user_object->ref_count = 1; + /* + * XXX + * We use this path when we want to make sure that + * nobody messes with the object (coalesce, for + * example) before we map it. + * We might want to use these objects for transposition via + * vm_object_transpose() too, so we don't want any copy or + * shadow objects either... + */ + object->copy_strategy = MEMORY_OBJECT_COPY_NONE; - if(permission & MAP_MEM_NAMED_CREATE) { - user_object->object = NULL; - user_object->internal = TRUE; - user_object->is_sub_map = FALSE; - user_object->offset = 0; - user_object->protection = protections; - SET_MAP_MEM(access, user_object->protection); - user_object->size = *size; + user_entry->backing.object = object; + user_entry->internal = TRUE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->offset = 0; + user_entry->protection = protections; + SET_MAP_MEM(access, user_entry->protection); + user_entry->size = map_size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); + *size = CAST_DOWN(vm_size_t, map_size); *object_handle = user_handle; return KERN_SUCCESS; } - if(parent_entry == NULL) { - /* Create a named object based on address range within the task map */ - /* Go find the object at given address */ + if (parent_entry == NULL || + (permission & MAP_MEM_NAMED_REUSE)) { + + /* Create a named object based on address range within the task map */ + /* Go find the object at given address */ +redo_lookup: vm_map_lock_read(target_map); /* get the object associated with the target address */ /* note we check the permission of the range against */ /* that requested by the caller */ - kr = vm_map_lookup_locked(&target_map, offset, + kr = vm_map_lookup_locked(&target_map, map_offset, protections, &version, &object, &obj_off, &prot, &wired, &behavior, - &lo_offset, &hi_offset, &pmap_map); + &lo_offset, &hi_offset, &real_map); if (kr != KERN_SUCCESS) { vm_map_unlock_read(target_map); goto make_mem_done; @@ -1553,8 +2168,8 @@ mach_make_memory_entry_64( kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); if(object == kernel_object) { printf("Warning: Attempt to create a named" " entry from the kernel_object\n"); @@ -1564,41 +2179,52 @@ mach_make_memory_entry_64( /* We have an object, now check to see if this object */ /* is suitable. If not, create a shadow and share that */ - -redo_lookup: + + /* + * We have to unlock the VM object to avoid deadlocking with + * a VM map lock (the lock ordering is map, the object), if we + * need to modify the VM map to create a shadow object. Since + * we might release the VM map lock below anyway, we have + * to release the VM map lock now. + * XXX FBDP There must be a way to avoid this double lookup... + * + * Take an extra reference on the VM object to make sure it's + * not going to disappear. + */ + vm_object_reference_locked(object); /* extra ref to hold obj */ + vm_object_unlock(object); + local_map = original_map; - local_offset = offset; + local_offset = map_offset; if(target_map != local_map) { vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); vm_map_lock_read(local_map); target_map = local_map; - pmap_map = local_map; + real_map = local_map; } while(TRUE) { if(!vm_map_lookup_entry(local_map, local_offset, &map_entry)) { kr = KERN_INVALID_ARGUMENT; - vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); /* release extra ref */ + object = VM_OBJECT_NULL; goto make_mem_done; } if(!(map_entry->is_sub_map)) { if(map_entry->object.vm_object != object) { kr = KERN_INVALID_ARGUMENT; - vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); /* release extra ref */ + object = VM_OBJECT_NULL; goto make_mem_done; } - if(map_entry->wired_count) { - /* JMM - The check below should be reworked instead. */ - object->true_share = TRUE; - } break; } else { vm_map_t tmap; @@ -1608,30 +2234,41 @@ redo_lookup: vm_map_lock_read(local_map); vm_map_unlock_read(tmap); target_map = local_map; - pmap_map = local_map; + real_map = local_map; local_offset = local_offset - map_entry->vme_start; local_offset += map_entry->offset; } } + + /* + * We found the VM map entry, lock the VM object again. + */ + vm_object_lock(object); + if(map_entry->wired_count) { + /* JMM - The check below should be reworked instead. */ + object->true_share = TRUE; + } if(((map_entry->max_protection) & protections) != protections) { kr = KERN_INVALID_RIGHT; vm_object_unlock(object); vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); + vm_object_deallocate(object); + object = VM_OBJECT_NULL; goto make_mem_done; } mappable_size = hi_offset - obj_off; total_size = map_entry->vme_end - map_entry->vme_start; - if(*size > mappable_size) { + if(map_size > mappable_size) { /* try to extend mappable size if the entries */ /* following are from the same object and are */ /* compatible */ next_entry = map_entry->vme_next; /* lets see if the next map entry is still */ /* pointing at this object and is contiguous */ - while(*size > mappable_size) { + while(map_size > mappable_size) { if((next_entry->object.vm_object == object) && (next_entry->vme_start == next_entry->vme_prev->vme_end) && @@ -1668,10 +2305,31 @@ redo_lookup: if ((map_entry->needs_copy || object->shadowed || (object->size > total_size)) && !object->true_share) { + /* + * We have to unlock the VM object before + * trying to upgrade the VM map lock, to + * honor lock ordering (map then object). + * Otherwise, we would deadlock if another + * thread holds a read lock on the VM map and + * is trying to acquire the VM object's lock. + * We still hold an extra reference on the + * VM object, guaranteeing that it won't + * disappear. + */ + vm_object_unlock(object); + if (vm_map_lock_read_to_write(target_map)) { - vm_map_lock_read(target_map); + /* + * We couldn't upgrade our VM map lock + * from "read" to "write" and we lost + * our "read" lock. + * Start all over again... + */ + vm_object_deallocate(object); /* extra ref */ + target_map = original_map; goto redo_lookup; } + vm_object_lock(object); /* * JMM - We need to avoid coming here when the object @@ -1685,6 +2343,7 @@ redo_lookup: &map_entry->offset, total_size); shadow_object = map_entry->object.vm_object; vm_object_unlock(object); + vm_object_pmap_protect( object, map_entry->offset, total_size, @@ -1720,12 +2379,19 @@ redo_lookup: next_entry = next_entry->vme_next; } + /* + * Transfer our extra reference to the + * shadow object. + */ + vm_object_reference_locked(shadow_object); + vm_object_deallocate(object); /* extra ref */ object = shadow_object; - vm_object_lock(object); + obj_off = (local_offset - map_entry->vme_start) + map_entry->offset; - vm_map_lock_write_to_read(target_map); + vm_map_lock_write_to_read(target_map); + vm_object_lock(object); } } @@ -1757,35 +2423,38 @@ redo_lookup: if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; - /* we now point to this object, hold on to it */ - vm_object_reference_locked(object); + /* + * The memory entry now points to this VM object and we + * need to hold a reference on the VM object. Use the extra + * reference we took earlier to keep the object alive when we + * had to unlock it. + */ + vm_map_unlock_read(target_map); - if(pmap_map != target_map) - vm_map_unlock_read(pmap_map); + if(real_map != target_map) + vm_map_unlock_read(real_map); if(object->wimg_bits != wimg_mode) { vm_page_t p; vm_object_paging_wait(object, THREAD_UNINT); + if ((wimg_mode == VM_WIMG_IO) + || (wimg_mode == VM_WIMG_WCOMB)) + cache_attr = TRUE; + else + cache_attr = FALSE; + queue_iterate(&object->memq, p, vm_page_t, listq) { if (!p->fictitious) { - pmap_page_protect( - p->phys_page, - VM_PROT_NONE); - if(cache_attr) - pmap_sync_caches_phys( - p->phys_page); + pmap_disconnect(p->phys_page); + if (cache_attr) + pmap_sync_page_attributes_phys(p->phys_page); } } object->wimg_bits = wimg_mode; } - user_object->object = object; - user_object->internal = object->internal; - user_object->is_sub_map = FALSE; - user_object->offset = obj_off; - user_object->protection = permission; /* the size of mapped entry that overlaps with our region */ /* which is targeted for share. */ @@ -1793,77 +2462,152 @@ redo_lookup: /* offset of our beg addr within entry */ /* it corresponds to this: */ - if(*size > mappable_size) - *size = mappable_size; + if(map_size > mappable_size) + map_size = mappable_size; + + if (permission & MAP_MEM_NAMED_REUSE) { + /* + * Compare what we got with the "parent_entry". + * If they match, re-use the "parent_entry" instead + * of creating a new one. + */ + if (parent_entry != NULL && + parent_entry->backing.object == object && + parent_entry->internal == object->internal && + parent_entry->is_sub_map == FALSE && + parent_entry->is_pager == FALSE && + parent_entry->offset == obj_off && + parent_entry->protection == protections && + parent_entry->size == map_size) { + /* + * We have a match: re-use "parent_entry". + */ + /* release our extra reference on object */ + vm_object_unlock(object); + vm_object_deallocate(object); + /* parent_entry->ref_count++; XXX ? */ + /* Get an extra send-right on handle */ + ipc_port_copy_send(parent_handle); + *object_handle = parent_handle; + return KERN_SUCCESS; + } else { + /* + * No match: we need to create a new entry. + * fall through... + */ + } + } + + vm_object_unlock(object); + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + /* release our unused reference on the object */ + vm_object_deallocate(object); + return KERN_FAILURE; + } - user_object->size = *size; + user_entry->backing.object = object; + user_entry->internal = object->internal; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->offset = obj_off; + user_entry->protection = permission; + user_entry->size = map_size; /* user_object pager and internal fields are not used */ /* when the object field is filled in. */ - vm_object_unlock(object); - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); + *size = CAST_DOWN(vm_size_t, map_size); *object_handle = user_handle; return KERN_SUCCESS; - } else { - - vm_named_entry_t parent_object; + } else { /* The new object will be base on an existing named object */ - if(ip_kotype(parent_entry) != IKOT_NAMED_ENTRY) { + + if (parent_entry == NULL) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - parent_object = (vm_named_entry_t)parent_entry->ip_kobject; - if((offset + *size) > parent_object->size) { + if((offset + map_size) > parent_entry->size) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - user_object->object = parent_object->object; - user_object->size = *size; - user_object->offset = parent_object->offset + offset; - user_object->protection = parent_object->protection; - user_object->protection &= ~VM_PROT_ALL; - user_object->protection = permission & VM_PROT_ALL; - if(access != MAP_MEM_NOOP) { - SET_MAP_MEM(access, user_object->protection); + if((protections & parent_entry->protection) != protections) { + kr = KERN_PROTECTION_FAILURE; + goto make_mem_done; + } + + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + kr = KERN_FAILURE; + goto make_mem_done; } - if(parent_object->is_sub_map) { - user_object->backing.map = parent_object->backing.map; - vm_map_lock(user_object->backing.map); - user_object->backing.map->ref_count++; - vm_map_unlock(user_object->backing.map); + + user_entry->size = map_size; + user_entry->offset = parent_entry->offset + map_offset; + user_entry->is_sub_map = parent_entry->is_sub_map; + user_entry->is_pager = parent_entry->is_pager; + user_entry->internal = parent_entry->internal; + user_entry->protection = protections; + + if(access != MAP_MEM_NOOP) { + SET_MAP_MEM(access, user_entry->protection); } - else { - user_object->backing.pager = parent_object->backing.pager; + + if(parent_entry->is_sub_map) { + user_entry->backing.map = parent_entry->backing.map; + vm_map_lock(user_entry->backing.map); + user_entry->backing.map->ref_count++; + vm_map_unlock(user_entry->backing.map); } - user_object->internal = parent_object->internal; - user_object->is_sub_map = parent_object->is_sub_map; - - if(parent_object->object != NULL) { - /* we now point to this object, hold on */ - vm_object_reference(parent_object->object); - vm_object_lock(parent_object->object); - parent_object->object->true_share = TRUE; - if (parent_object->object->copy_strategy == - MEMORY_OBJECT_COPY_SYMMETRIC) - parent_object->object->copy_strategy = - MEMORY_OBJECT_COPY_DELAY; - vm_object_unlock(parent_object->object); + else if (parent_entry->is_pager) { + user_entry->backing.pager = parent_entry->backing.pager; + /* JMM - don't we need a reference here? */ + } else { + object = parent_entry->backing.object; + assert(object != VM_OBJECT_NULL); + user_entry->backing.object = object; + /* we now point to this object, hold on */ + vm_object_reference(object); + vm_object_lock(object); + object->true_share = TRUE; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + vm_object_unlock(object); } - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); + *size = CAST_DOWN(vm_size_t, map_size); *object_handle = user_handle; return KERN_SUCCESS; } - - make_mem_done: - ipc_port_dealloc_kernel(user_handle); - kfree((vm_offset_t)user_object, sizeof (struct vm_named_entry)); + if (user_handle != IP_NULL) { + ipc_port_dealloc_kernel(user_handle); + } + if (user_entry != NULL) { + kfree(user_entry, sizeof *user_entry); + } + return kr; +} + +kern_return_t +_mach_make_memory_entry( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_entry) +{ + memory_object_offset_t mo_size; + kern_return_t kr; + + mo_size = (memory_object_offset_t)*size; + kr = mach_make_memory_entry_64(target_map, &mo_size, + (memory_object_offset_t)offset, permission, object_handle, + parent_entry); + *size = mo_size; return kr; } @@ -1875,43 +2619,62 @@ mach_make_memory_entry( vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_entry) -{ - vm_object_offset_t size_64; +{ + memory_object_offset_t mo_size; kern_return_t kr; - size_64 = (vm_object_offset_t)*size; - kr = mach_make_memory_entry_64(target_map, &size_64, - (vm_object_offset_t)offset, permission, object_handle, + mo_size = (memory_object_offset_t)*size; + kr = mach_make_memory_entry_64(target_map, &mo_size, + (memory_object_offset_t)offset, permission, object_handle, parent_entry); - *size = (vm_size_t)size_64; + *size = CAST_DOWN(vm_size_t, mo_size); return kr; } /* + * task_wire + * + * Set or clear the map's wiring_required flag. This flag, if set, + * will cause all future virtual memory allocation to allocate + * user wired memory. Unwiring pages wired down as a result of + * this routine is done with the vm_wire interface. */ - kern_return_t -vm_region_object_create( - vm_map_t target_map, - vm_size_t size, - ipc_port_t *object_handle) +task_wire( + vm_map_t map, + boolean_t must_wire) +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + if (must_wire) + map->wiring_required = TRUE; + else + map->wiring_required = FALSE; + + return(KERN_SUCCESS); +} + +__private_extern__ kern_return_t +mach_memory_entry_allocate( + vm_named_entry_t *user_entry_p, + ipc_port_t *user_handle_p) { - vm_named_entry_t user_object; + vm_named_entry_t user_entry; ipc_port_t user_handle; - kern_return_t kr; + ipc_port_t previous; - ipc_port_t previous; - vm_map_t new_map; - - user_object = (vm_named_entry_t) - kalloc(sizeof (struct vm_named_entry)); - if(user_object == NULL) { + user_entry = (vm_named_entry_t) kalloc(sizeof *user_entry); + if (user_entry == NULL) return KERN_FAILURE; - } - named_entry_lock_init(user_object); - user_handle = ipc_port_alloc_kernel(); + named_entry_lock_init(user_entry); + user_handle = ipc_port_alloc_kernel(); + if (user_handle == IP_NULL) { + kfree(user_entry, sizeof *user_entry); + return KERN_FAILURE; + } ip_lock(user_handle); /* make a sonce right */ @@ -1930,126 +2693,155 @@ vm_region_object_create( ipc_port_nsrequest(user_handle, 1, user_handle, &previous); /* nsrequest unlocks user_handle */ - /* Create a named object based on a submap of specified size */ + user_entry->backing.pager = NULL; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->size = 0; + user_entry->internal = FALSE; + user_entry->ref_count = 1; - new_map = vm_map_create(0, 0, size, TRUE); - user_object->backing.map = new_map; + ipc_kobject_set(user_handle, (ipc_kobject_t) user_entry, + IKOT_NAMED_ENTRY); + *user_entry_p = user_entry; + *user_handle_p = user_handle; - user_object->object = VM_OBJECT_NULL; - user_object->internal = TRUE; - user_object->is_sub_map = TRUE; - user_object->offset = 0; - user_object->protection = VM_PROT_ALL; - user_object->size = size; - user_object->ref_count = 1; + return KERN_SUCCESS; +} - ipc_kobject_set(user_handle, (ipc_kobject_t) user_object, - IKOT_NAMED_ENTRY); - *object_handle = user_handle; +/* + * mach_memory_object_memory_entry_64 + * + * Create a named entry backed by the provided pager. + * + * JMM - we need to hold a reference on the pager - + * and release it when the named entry is destroyed. + */ +kern_return_t +mach_memory_object_memory_entry_64( + host_t host, + boolean_t internal, + vm_object_offset_t size, + vm_prot_t permission, + memory_object_t pager, + ipc_port_t *entry_handle) +{ + unsigned int access; + vm_named_entry_t user_entry; + ipc_port_t user_handle; + + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { + return KERN_FAILURE; + } + + user_entry->backing.pager = pager; + user_entry->size = size; + user_entry->offset = 0; + user_entry->protection = permission & VM_PROT_ALL; + access = GET_MAP_MEM(permission); + SET_MAP_MEM(access, user_entry->protection); + user_entry->internal = internal; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = TRUE; + assert(user_entry->ref_count == 1); + + *entry_handle = user_handle; return KERN_SUCCESS; +} + +kern_return_t +mach_memory_object_memory_entry( + host_t host, + boolean_t internal, + vm_size_t size, + vm_prot_t permission, + memory_object_t pager, + ipc_port_t *entry_handle) +{ + return mach_memory_object_memory_entry_64( host, internal, + (vm_object_offset_t)size, permission, pager, entry_handle); +} + + +kern_return_t +mach_memory_entry_purgable_control( + ipc_port_t entry_port, + vm_purgable_t control, + int *state) +{ + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; -} + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } -/* For a given range, check all map entries. If the entry coresponds to */ -/* the old vm_region/map provided on the call, replace it with the */ -/* corresponding range in the new vm_region/map */ -kern_return_t vm_map_region_replace( - vm_map_t target_map, - ipc_port_t old_region, - ipc_port_t new_region, - vm_offset_t start, - vm_offset_t end) -{ - vm_named_entry_t old_object; - vm_named_entry_t new_object; - vm_map_t old_submap; - vm_map_t new_submap; - vm_offset_t addr; - vm_map_entry_t entry; - int nested_pmap = 0; + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + named_entry_lock(mem_entry); - vm_map_lock(target_map); - old_object = (vm_named_entry_t)old_region->ip_kobject; - new_object = (vm_named_entry_t)new_region->ip_kobject; - if((!old_object->is_sub_map) || (!new_object->is_sub_map)) { - vm_map_unlock(target_map); + if (mem_entry->is_sub_map || mem_entry->is_pager) { + named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } - old_submap = (vm_map_t)old_object->backing.map; - new_submap = (vm_map_t)new_object->backing.map; - vm_map_lock(old_submap); - if((old_submap->min_offset != new_submap->min_offset) || - (old_submap->max_offset != new_submap->max_offset)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } - if(!vm_map_lookup_entry(target_map, start, &entry)) { - /* if the src is not contained, the entry preceeds */ - /* our range */ - addr = entry->vme_start; - if(entry == vm_map_to_entry(target_map)) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; - } - } - if ((entry->use_pmap) && - (new_submap->pmap == NULL)) { - new_submap->pmap = pmap_create((vm_size_t) 0); - if(new_submap->pmap == PMAP_NULL) { - vm_map_unlock(old_submap); - vm_map_unlock(target_map); - return(KERN_NO_SPACE); - } - } - addr = entry->vme_start; - vm_map_reference(old_submap); - while((entry != vm_map_to_entry(target_map)) && - (entry->vme_start < end)) { - if((entry->is_sub_map) && - (entry->object.sub_map == old_submap)) { - if(entry->use_pmap) { - if((start & 0x0fffffff) || - ((end - start) != 0x10000000)) { - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_INVALID_ARGUMENT; - } - nested_pmap = 1; - } - entry->object.sub_map = new_submap; - vm_map_reference(new_submap); - vm_map_deallocate(old_submap); - } - entry = entry->vme_next; - addr = entry->vme_start; - } - if(nested_pmap) { -#ifndef i386 - pmap_unnest(target_map->pmap, (addr64_t)start); - if(target_map->mapped) { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); - } - pmap_nest(target_map->pmap, new_submap->pmap, - (addr64_t)start, (addr64_t)start, - (addr64_t)(end - start)); -#endif /* i386 */ - } else { - vm_map_submap_pmap_clean(target_map, - start, end, old_submap, 0); + + vm_object_lock(object); + + /* check that named entry covers entire object ? */ + if (mem_entry->offset != 0 || object->size != mem_entry->size) { + vm_object_unlock(object); + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; } - vm_map_unlock(old_submap); - vm_map_deallocate(old_submap); - vm_map_unlock(target_map); - return KERN_SUCCESS; + + named_entry_unlock(mem_entry); + + kr = vm_object_purgable_control(object, control, state); + + vm_object_unlock(object); + + return kr; } +/* + * mach_memory_entry_port_release: + * + * Release a send right on a named entry port. This is the correct + * way to destroy a named entry. When the last right on the port is + * released, ipc_kobject_destroy() will call mach_destroy_memory_entry(). + */ +void +mach_memory_entry_port_release( + ipc_port_t port) +{ + assert(ip_kotype(port) == IKOT_NAMED_ENTRY); + ipc_port_release_send(port); +} +/* + * mach_destroy_memory_entry: + * + * Drops a reference on a memory entry and destroys the memory entry if + * there are no more references on it. + * NOTE: This routine should not be called to destroy a memory entry from the + * kernel, as it will not release the Mach port associated with the memory + * entry. The proper way to destroy a memory entry in the kernel is to + * call mach_memort_entry_port_release() to release the kernel's send-right on + * the memory entry's port. When the last send right is released, the memory + * entry will be destroyed via ipc_kobject_destroy(). + */ void mach_destroy_memory_entry( ipc_port_t port) @@ -2060,107 +2852,24 @@ mach_destroy_memory_entry( #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; mutex_lock(&(named_entry)->Lock); - named_entry->ref_count-=1; + named_entry->ref_count -= 1; if(named_entry->ref_count == 0) { - if(named_entry->object) { - /* release the memory object we've been pointing to */ - vm_object_deallocate(named_entry->object); - } - if(named_entry->is_sub_map) { + if (named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); - } - kfree((vm_offset_t)port->ip_kobject, - sizeof (struct vm_named_entry)); + } else if (!named_entry->is_pager) { + /* release the memory object we've been pointing to */ + vm_object_deallocate(named_entry->backing.object); + } /* else JMM - need to drop reference on pager in that case */ + + mutex_unlock(&(named_entry)->Lock); + + kfree((void *) port->ip_kobject, + sizeof (struct vm_named_entry)); } else mutex_unlock(&(named_entry)->Lock); } -kern_return_t -vm_map_page_query( - vm_map_t target_map, - vm_offset_t offset, - int *disposition, - int *ref_count) -{ - vm_map_entry_t map_entry; - vm_object_t object; - vm_page_t m; - -restart_page_query: - *disposition = 0; - *ref_count = 0; - vm_map_lock(target_map); - if(!vm_map_lookup_entry(target_map, offset, &map_entry)) { - vm_map_unlock(target_map); - return KERN_FAILURE; - } - offset -= map_entry->vme_start; /* adjust to offset within entry */ - offset += map_entry->offset; /* adjust to target object offset */ - if(map_entry->object.vm_object != VM_OBJECT_NULL) { - if(!map_entry->is_sub_map) { - object = map_entry->object.vm_object; - } else { - vm_map_unlock(target_map); - target_map = map_entry->object.sub_map; - goto restart_page_query; - } - } else { - vm_map_unlock(target_map); - return KERN_FAILURE; - } - vm_object_lock(object); - vm_map_unlock(target_map); - while(TRUE) { - m = vm_page_lookup(object, offset); - if (m != VM_PAGE_NULL) { - *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; - break; - } else { - if(object->shadow) { - offset += object->shadow_offset; - vm_object_unlock(object); - object = object->shadow; - vm_object_lock(object); - continue; - } - vm_object_unlock(object); - return KERN_FAILURE; - } - } - - /* The ref_count is not strictly accurate, it measures the number */ - /* of entities holding a ref on the object, they may not be mapping */ - /* the object or may not be mapping the section holding the */ - /* target page but its still a ball park number and though an over- */ - /* count, it picks up the copy-on-write cases */ - - /* We could also get a picture of page sharing from pmap_attributes */ - /* but this would under count as only faulted-in mappings would */ - /* show up. */ - - *ref_count = object->ref_count; - - if (m->fictitious) { - *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; - vm_object_unlock(object); - return KERN_SUCCESS; - } - - if (m->dirty) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - else if(pmap_is_modified(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; - - if (m->reference) - *disposition |= VM_PAGE_QUERY_PAGE_REF; - else if(pmap_is_referenced(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_REF; - - vm_object_unlock(object); - return KERN_SUCCESS; - -} kern_return_t set_dp_control_port( @@ -2190,234 +2899,48 @@ get_dp_control_port( } +/* ******* Temporary Internal calls to UPL for BSD ***** */ -/* Retrieve a upl for an object underlying an address range in a map */ - -kern_return_t -vm_map_get_upl( - vm_map_t map, - vm_address_t offset, - vm_size_t *upl_size, - upl_t *upl, - upl_page_info_array_t page_list, - unsigned int *count, - int *flags, - int force_data_sync) -{ - vm_map_entry_t entry; - int caller_flags; - int sync_cow_data = FALSE; - vm_object_t local_object; - vm_offset_t local_offset; - vm_offset_t local_start; - kern_return_t ret; - - caller_flags = *flags; - if (!(caller_flags & UPL_COPYOUT_FROM)) { - sync_cow_data = TRUE; - } - if(upl == NULL) - return KERN_INVALID_ARGUMENT; - - -REDISCOVER_ENTRY: - vm_map_lock(map); - if (vm_map_lookup_entry(map, offset, &entry)) { - if (entry->object.vm_object == VM_OBJECT_NULL || - !entry->object.vm_object->phys_contiguous) { - if((*upl_size/page_size) > MAX_UPL_TRANSFER) { - *upl_size = MAX_UPL_TRANSFER * page_size; - } - } - if((entry->vme_end - offset) < *upl_size) { - *upl_size = entry->vme_end - offset; - } - if (caller_flags & UPL_QUERY_OBJECT_TYPE) { - if (entry->object.vm_object == VM_OBJECT_NULL) { - *flags = 0; - } else if (entry->object.vm_object->private) { - *flags = UPL_DEV_MEMORY; - if (entry->object.vm_object->phys_contiguous) { - *flags |= UPL_PHYS_CONTIG; - } - } else { - *flags = 0; - } - vm_map_unlock(map); - return KERN_SUCCESS; - } - /* - * Create an object if necessary. - */ - if (entry->object.vm_object == VM_OBJECT_NULL) { - entry->object.vm_object = vm_object_allocate( - (vm_size_t)(entry->vme_end - entry->vme_start)); - entry->offset = 0; - } - if (!(caller_flags & UPL_COPYOUT_FROM)) { - if (!(entry->protection & VM_PROT_WRITE)) { - vm_map_unlock(map); - return KERN_PROTECTION_FAILURE; - } - if (entry->needs_copy) { - vm_map_t local_map; - vm_object_t object; - vm_object_offset_t offset_hi; - vm_object_offset_t offset_lo; - vm_object_offset_t new_offset; - vm_prot_t prot; - boolean_t wired; - vm_behavior_t behavior; - vm_map_version_t version; - vm_map_t pmap_map; - - local_map = map; - vm_map_lock_write_to_read(map); - if(vm_map_lookup_locked(&local_map, - offset, VM_PROT_WRITE, - &version, &object, - &new_offset, &prot, &wired, - &behavior, &offset_lo, - &offset_hi, &pmap_map)) { - vm_map_unlock(local_map); - return KERN_FAILURE; - } - if (pmap_map != map) { - vm_map_unlock(pmap_map); - } - vm_object_unlock(object); - vm_map_unlock(local_map); - - goto REDISCOVER_ENTRY; - } - } - if (entry->is_sub_map) { - vm_map_t submap; - - submap = entry->object.sub_map; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_map_reference(submap); - vm_map_unlock(map); - - ret = (vm_map_get_upl(submap, - local_offset + (offset - local_start), - upl_size, upl, page_list, count, - flags, force_data_sync)); - - vm_map_deallocate(submap); - return ret; - } - - if (sync_cow_data) { - if (entry->object.vm_object->shadow - || entry->object.vm_object->copy) { - int flags; - - local_object = entry->object.vm_object; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_object_reference(local_object); - vm_map_unlock(map); +extern int kernel_upl_map( + vm_map_t map, + upl_t upl, + vm_offset_t *dst_addr); - if(local_object->copy == NULL) { - flags = MEMORY_OBJECT_DATA_SYNC; - } else { - flags = MEMORY_OBJECT_COPY_SYNC; - } +extern int kernel_upl_unmap( + vm_map_t map, + upl_t upl); - if (entry->object.vm_object->shadow && - entry->object.vm_object->copy) { - vm_object_lock_request( - local_object->shadow, - (vm_object_offset_t) - ((offset - local_start) + - local_offset) + - local_object->shadow_offset, - *upl_size, FALSE, - MEMORY_OBJECT_DATA_SYNC, - VM_PROT_NO_CHANGE); - } - sync_cow_data = FALSE; - vm_object_deallocate(local_object); - goto REDISCOVER_ENTRY; - } - } +extern int kernel_upl_commit( + upl_t upl, + upl_page_info_t *pl, + mach_msg_type_number_t count); - if (force_data_sync) { - - local_object = entry->object.vm_object; - local_start = entry->vme_start; - local_offset = entry->offset; - vm_object_reference(local_object); - vm_map_unlock(map); - - vm_object_lock_request( - local_object, - (vm_object_offset_t) - ((offset - local_start) + local_offset), - (vm_object_size_t)*upl_size, FALSE, - MEMORY_OBJECT_DATA_SYNC, - VM_PROT_NO_CHANGE); - force_data_sync = FALSE; - vm_object_deallocate(local_object); - goto REDISCOVER_ENTRY; - } +extern int kernel_upl_commit_range( + upl_t upl, + upl_offset_t offset, + upl_size_t size, + int flags, + upl_page_info_array_t pl, + mach_msg_type_number_t count); - if(!(entry->object.vm_object->private)) { - if(*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE)) - *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE); - if(entry->object.vm_object->phys_contiguous) { - *flags = UPL_PHYS_CONTIG; - } else { - *flags = 0; - } - } else { - *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG; - } - local_object = entry->object.vm_object; - local_offset = entry->offset; - local_start = entry->vme_start; - vm_object_reference(local_object); - vm_map_unlock(map); - if(caller_flags & UPL_SET_IO_WIRE) { - ret = (vm_object_iopl_request(local_object, - (vm_object_offset_t) - ((offset - local_start) - + local_offset), - *upl_size, - upl, - page_list, - count, - caller_flags)); - } else { - ret = (vm_object_upl_request(local_object, - (vm_object_offset_t) - ((offset - local_start) - + local_offset), - *upl_size, - upl, - page_list, - count, - caller_flags)); - } - vm_object_deallocate(local_object); - return(ret); - } +extern int kernel_upl_abort( + upl_t upl, + int abort_type); - vm_map_unlock(map); - return(KERN_FAILURE); +extern int kernel_upl_abort_range( + upl_t upl, + upl_offset_t offset, + upl_size_t size, + int abort_flags); -} -/* ******* Temporary Internal calls to UPL for BSD ***** */ kern_return_t kernel_upl_map( vm_map_t map, upl_t upl, vm_offset_t *dst_addr) { - return (vm_upl_map(map, upl, dst_addr)); + return vm_upl_map(map, upl, dst_addr); } @@ -2426,13 +2949,13 @@ kernel_upl_unmap( vm_map_t map, upl_t upl) { - return(vm_upl_unmap(map, upl)); + return vm_upl_unmap(map, upl); } kern_return_t kernel_upl_commit( - upl_t upl, - upl_page_info_t *pl, + upl_t upl, + upl_page_info_t *pl, mach_msg_type_number_t count) { kern_return_t kr; @@ -2446,8 +2969,8 @@ kernel_upl_commit( kern_return_t kernel_upl_commit_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int flags, upl_page_info_array_t pl, mach_msg_type_number_t count) @@ -2469,8 +2992,8 @@ kernel_upl_commit_range( kern_return_t kernel_upl_abort_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int abort_flags) { kern_return_t kr; @@ -2499,237 +3022,76 @@ kernel_upl_abort( return kr; } +/* + * Now a kernel-private interface (for BootCache + * use only). Need a cleaner way to create an + * empty vm_map() and return a handle to it. + */ kern_return_t -vm_get_shared_region( - task_t task, - shared_region_mapping_t *shared_region) -{ - *shared_region = (shared_region_mapping_t) task->system_shared_region; - return KERN_SUCCESS; -} - -kern_return_t -vm_set_shared_region( - task_t task, - shared_region_mapping_t shared_region) -{ - task->system_shared_region = (vm_offset_t) shared_region; - return KERN_SUCCESS; -} - -kern_return_t -shared_region_mapping_info( - shared_region_mapping_t shared_region, - ipc_port_t *text_region, - vm_size_t *text_size, - ipc_port_t *data_region, - vm_size_t *data_size, - vm_offset_t *region_mappings, - vm_offset_t *client_base, - vm_offset_t *alt_base, - vm_offset_t *alt_next, - unsigned int *fs_base, - unsigned int *system, - int *flags, - shared_region_mapping_t *next) +vm_region_object_create( + __unused vm_map_t target_map, + vm_size_t size, + ipc_port_t *object_handle) { - shared_region_mapping_lock(shared_region); - - *text_region = shared_region->text_region; - *text_size = shared_region->text_size; - *data_region = shared_region->data_region; - *data_size = shared_region->data_size; - *region_mappings = shared_region->region_mappings; - *client_base = shared_region->client_base; - *alt_base = shared_region->alternate_base; - *alt_next = shared_region->alternate_next; - *flags = shared_region->flags; - *fs_base = shared_region->fs_base; - *system = shared_region->system; - *next = shared_region->next; - - shared_region_mapping_unlock(shared_region); -} + vm_named_entry_t user_entry; + ipc_port_t user_handle; -kern_return_t -shared_region_object_chain_attach( - shared_region_mapping_t target_region, - shared_region_mapping_t object_chain_region) -{ - shared_region_object_chain_t object_ele; + vm_map_t new_map; - if(target_region->object_chain) + if (mach_memory_entry_allocate(&user_entry, &user_handle) + != KERN_SUCCESS) { return KERN_FAILURE; - object_ele = (shared_region_object_chain_t) - kalloc(sizeof (struct shared_region_object_chain)); - shared_region_mapping_lock(object_chain_region); - target_region->object_chain = object_ele; - object_ele->object_chain_region = object_chain_region; - object_ele->next = object_chain_region->object_chain; - object_ele->depth = object_chain_region->depth; - object_chain_region->depth++; - target_region->alternate_next = object_chain_region->alternate_next; - shared_region_mapping_unlock(object_chain_region); - return KERN_SUCCESS; -} + } -kern_return_t -shared_region_mapping_create( - ipc_port_t text_region, - vm_size_t text_size, - ipc_port_t data_region, - vm_size_t data_size, - vm_offset_t region_mappings, - vm_offset_t client_base, - shared_region_mapping_t *shared_region, - vm_offset_t alt_base, - vm_offset_t alt_next) -{ - *shared_region = (shared_region_mapping_t) - kalloc(sizeof (struct shared_region_mapping)); - if(*shared_region == NULL) - return KERN_FAILURE; - shared_region_mapping_lock_init((*shared_region)); - (*shared_region)->text_region = text_region; - (*shared_region)->text_size = text_size; - (*shared_region)->fs_base = ENV_DEFAULT_ROOT; - (*shared_region)->system = machine_slot[cpu_number()].cpu_type; - (*shared_region)->data_region = data_region; - (*shared_region)->data_size = data_size; - (*shared_region)->region_mappings = region_mappings; - (*shared_region)->client_base = client_base; - (*shared_region)->ref_count = 1; - (*shared_region)->next = NULL; - (*shared_region)->object_chain = NULL; - (*shared_region)->self = *shared_region; - (*shared_region)->flags = 0; - (*shared_region)->depth = 0; - (*shared_region)->default_env_list = NULL; - (*shared_region)->alternate_base = alt_base; - (*shared_region)->alternate_next = alt_next; - return KERN_SUCCESS; -} + /* Create a named object based on a submap of specified size */ -kern_return_t -shared_region_mapping_set_alt_next( - shared_region_mapping_t shared_region, - vm_offset_t alt_next) -{ - shared_region->alternate_next = alt_next; - return KERN_SUCCESS; -} + new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS, + vm_map_round_page(size), TRUE); -kern_return_t -shared_region_mapping_ref( - shared_region_mapping_t shared_region) -{ - if(shared_region == NULL) - return KERN_SUCCESS; - hw_atomic_add(&shared_region->ref_count, 1); - return KERN_SUCCESS; -} + user_entry->backing.map = new_map; + user_entry->internal = TRUE; + user_entry->is_sub_map = TRUE; + user_entry->offset = 0; + user_entry->protection = VM_PROT_ALL; + user_entry->size = size; + assert(user_entry->ref_count == 1); -__private_extern__ kern_return_t -shared_region_mapping_dealloc_lock( - shared_region_mapping_t shared_region, - int need_lock) -{ - struct shared_region_task_mappings sm_info; - shared_region_mapping_t next = NULL; - int ref_count; - - while (shared_region) { - if ((ref_count = - hw_atomic_sub(&shared_region->ref_count, 1)) == 0) { - shared_region_mapping_lock(shared_region); - - sm_info.text_region = shared_region->text_region; - sm_info.text_size = shared_region->text_size; - sm_info.data_region = shared_region->data_region; - sm_info.data_size = shared_region->data_size; - sm_info.region_mappings = shared_region->region_mappings; - sm_info.client_base = shared_region->client_base; - sm_info.alternate_base = shared_region->alternate_base; - sm_info.alternate_next = shared_region->alternate_next; - sm_info.flags = shared_region->flags; - sm_info.self = (vm_offset_t)shared_region; - - if(shared_region->region_mappings) { - lsf_remove_regions_mappings_lock(shared_region, &sm_info, need_lock); - } - if(((vm_named_entry_t) - (shared_region->text_region->ip_kobject)) - ->backing.map->pmap) { - pmap_remove(((vm_named_entry_t) - (shared_region->text_region->ip_kobject)) - ->backing.map->pmap, - sm_info.client_base, - sm_info.client_base + sm_info.text_size); - } - ipc_port_release_send(shared_region->text_region); - if(shared_region->data_region) - ipc_port_release_send(shared_region->data_region); - if (shared_region->object_chain) { - next = shared_region->object_chain->object_chain_region; - kfree((vm_offset_t)shared_region->object_chain, - sizeof (struct shared_region_object_chain)); - } else { - next = NULL; - } - shared_region_mapping_unlock(shared_region); - kfree((vm_offset_t)shared_region, - sizeof (struct shared_region_mapping)); - shared_region = next; - } else { - /* Stale indicates that a system region is no */ - /* longer in the default environment list. */ - if((ref_count == 1) && - (shared_region->flags & SHARED_REGION_SYSTEM) - && (shared_region->flags & ~SHARED_REGION_STALE)) { - remove_default_shared_region_lock(shared_region,need_lock); - } - break; - } - } + *object_handle = user_handle; return KERN_SUCCESS; -} -/* - * Stub function; always indicates that the lock needs to be taken in the - * call to lsf_remove_regions_mappings_lock(). - */ -kern_return_t -shared_region_mapping_dealloc( - shared_region_mapping_t shared_region) -{ - return shared_region_mapping_dealloc_lock(shared_region, 1); } +ppnum_t vm_map_get_phys_page( /* forward */ + vm_map_t map, + vm_offset_t offset); + ppnum_t vm_map_get_phys_page( - vm_map_t map, - vm_offset_t offset) + vm_map_t map, + vm_offset_t addr) { - vm_map_entry_t entry; - int ops; - int flags; - ppnum_t phys_page = 0; - vm_object_t object; + vm_object_offset_t offset; + vm_object_t object; + vm_map_offset_t map_offset; + vm_map_entry_t entry; + ppnum_t phys_page = 0; + + map_offset = vm_map_trunc_page(addr); vm_map_lock(map); - while (vm_map_lookup_entry(map, offset, &entry)) { + while (vm_map_lookup_entry(map, map_offset, &entry)) { if (entry->object.vm_object == VM_OBJECT_NULL) { vm_map_unlock(map); - return (vm_offset_t) 0; + return (ppnum_t) 0; } if (entry->is_sub_map) { vm_map_t old_map; vm_map_lock(entry->object.sub_map); old_map = map; map = entry->object.sub_map; - offset = entry->offset + (offset - entry->vme_start); + map_offset = entry->offset + (map_offset - entry->vme_start); vm_map_unlock(old_map); continue; } @@ -2741,19 +3103,19 @@ vm_map_get_phys_page( if(entry->object.vm_object->shadow_offset == 0) { /* need to call vm_fault */ vm_map_unlock(map); - vm_fault(map, offset, VM_PROT_NONE, + vm_fault(map, map_offset, VM_PROT_NONE, FALSE, THREAD_UNINT, NULL, 0); vm_map_lock(map); continue; } - offset = entry->offset + (offset - entry->vme_start); + offset = entry->offset + (map_offset - entry->vme_start); phys_page = (ppnum_t) ((entry->object.vm_object->shadow_offset + offset) >> 12); break; } - offset = entry->offset + (offset - entry->vme_start); + offset = entry->offset + (map_offset - entry->vme_start); object = entry->object.vm_object; vm_object_lock(object); while (TRUE) { @@ -2786,6 +3148,15 @@ vm_map_get_phys_page( +kern_return_t kernel_object_iopl_request( /* forward */ + vm_named_entry_t named_entry, + memory_object_offset_t offset, + vm_size_t *upl_size, + upl_t *upl_ptr, + upl_page_info_array_t user_page_list, + unsigned int *page_list_count, + int *flags); + kern_return_t kernel_object_iopl_request( vm_named_entry_t named_entry, @@ -2803,6 +3174,14 @@ kernel_object_iopl_request( caller_flags = *flags; + if (caller_flags & ~UPL_VALID_FLAGS) { + /* + * For forward compatibility's sake, + * reject any unknown flag. + */ + return KERN_INVALID_VALUE; + } + /* a few checks to make sure user is obeying rules */ if(*upl_size == 0) { if(offset >= named_entry->size) @@ -2833,17 +3212,7 @@ kernel_object_iopl_request( named_entry_lock(named_entry); - if(named_entry->object) { - /* This is the case where we are going to map */ - /* an already mapped object. If the object is */ - /* not ready it is internal. An external */ - /* object cannot be mapped until it is ready */ - /* we can therefore avoid the ready check */ - /* in this case. */ - vm_object_reference(named_entry->object); - object = named_entry->object; - named_entry_unlock(named_entry); - } else { + if (named_entry->is_pager) { object = vm_object_enter(named_entry->backing.pager, named_entry->offset + named_entry->size, named_entry->internal, @@ -2853,21 +3222,37 @@ kernel_object_iopl_request( named_entry_unlock(named_entry); return(KERN_INVALID_OBJECT); } - vm_object_lock(object); - /* create an extra reference for the named entry */ + /* JMM - drop reference on the pager here? */ + + /* create an extra reference for the object */ + vm_object_lock(object); vm_object_reference_locked(object); - named_entry->object = object; + named_entry->backing.object = object; + named_entry->is_pager = FALSE; named_entry_unlock(named_entry); /* wait for object (if any) to be ready */ - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); - vm_object_lock(object); + if (!named_entry->internal) { + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } } vm_object_unlock(object); + + } else { + /* This is the case where we are going to operate */ + /* an an already known object. If the object is */ + /* not ready it is internal. An external */ + /* object cannot be mapped until it is ready */ + /* we can therefore avoid the ready check */ + /* in this case. */ + object = named_entry->backing.object; + vm_object_reference(object); + named_entry_unlock(named_entry); } if (!object->private) { @@ -2892,5 +3277,3 @@ kernel_object_iopl_request( vm_object_deallocate(object); return ret; } - -#endif /* VM_CPM */ diff --git a/pexpert/conf/Makefile.i386 b/pexpert/conf/Makefile.i386 index c64ebabcc..f807e9ecb 100644 --- a/pexpert/conf/Makefile.i386 +++ b/pexpert/conf/Makefile.i386 @@ -1,7 +1,21 @@ ###################################################################### #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### -CFLAGS += -DNCPUS=2 + +# Enable -Werror for i386 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR= \ + ioconf.o \ + bootargs.o \ + device_tree.o + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/pexpert/conf/Makefile.ppc b/pexpert/conf/Makefile.ppc index 657b63741..0d463a9a2 100644 --- a/pexpert/conf/Makefile.ppc +++ b/pexpert/conf/Makefile.ppc @@ -1,7 +1,6 @@ ###################################################################### #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### -CFLAGS += -DNCPUS=1 ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/pexpert/conf/Makefile.template b/pexpert/conf/Makefile.template index 8ef62f8b4..388c6f036 100644 --- a/pexpert/conf/Makefile.template +++ b/pexpert/conf/Makefile.template @@ -45,14 +45,6 @@ COMP_SUBDIRS = # .PRECIOUS: Makefile -VERSION_FILES= \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.major \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.minor \ - $(SOURCE_DIR)/$(COMPONENT)/conf/version.variant - -COPYRIGHT_FILES = \ - $(SOURCE_DIR)/$(COMPONENT)/conf/copyright - # # Theses macros are filled in by the config program depending on the # current configuration. The MACHDEP macro is replaced by the @@ -88,12 +80,8 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "creating $(COMPONENT).o" - $(RM) $(RMFLAGS) vers.c - $(COMPOBJROOT)/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - ${KCC} $(CFLAGS) $(INCLUDES) -c vers.c @echo [ updating $(COMPONENT).o ${PEXPERT_KERNEL_CONFIG} ] - $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} do_depend: do_all ${MD} -u Makedep -f -d `ls *.d`; diff --git a/pexpert/conf/tools/Makefile b/pexpert/conf/tools/Makefile index 9df86ce8c..4f9ccd553 100644 --- a/pexpert/conf/tools/Makefile +++ b/pexpert/conf/tools/Makefile @@ -7,13 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = \ - doconf \ - newvers +SETUP_SUBDIRS = doconf -COMP_SUBDIRS = \ - doconf \ - newvers +COMP_SUBDIRS = doconf INST_SUBDIRS = \ diff --git a/pexpert/conf/tools/newvers/Makefile b/pexpert/conf/tools/newvers/Makefile deleted file mode 100644 index 73603c753..000000000 --- a/pexpert/conf/tools/newvers/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - -# -# Who and where -# -BINDIR= -DSTDIR= $(strip $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/) -PROGRAM= $(DSTDIR)newvers - -# -# How to install it -# -IFLAGS= -c -m 555 - -$(PROGRAM): $(DSTDIR)% : $(SOURCE)%.csh - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS - sed -e "s/#PROGRAM.*/#`vers_string $(notdir $(PROGRAM))`/" \ - < $< >$(notdir $(PROGRAM)).VERS; - install $(IFLAGS) $(notdir $(PROGRAM)).VERS $(PROGRAM); - -$(RM) $(RMFLAGS) $(notdir $(PROGRAM)).VERS; - -do_build_setup: $(PROGRAM) - -do_build_all: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: - @echo "[ $(SOURCE) ] make do_build_all $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/pexpert/conf/tools/newvers/newvers.csh b/pexpert/conf/tools/newvers/newvers.csh deleted file mode 100644 index 293d416e4..000000000 --- a/pexpert/conf/tools/newvers/newvers.csh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh - -# -# Mach Operating System -# Copyright (c) 1990 Carnegie-Mellon University -# Copyright (c) 1989 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement specifies -# the terms and conditions for use and redistribution. -# - -# -# newvers.sh copyright major minor variant -# - -major="$1"; minor="$2"; variant="$3" -v="${major}.${minor}" d=`pwd` h="rcbuilder" t=`date` w=`whoami` -if [ -z "$d" -o -z "$h" -o -z "$t" ]; then - exit 1 -fi -CONFIG=`expr "$d" : '.*/\([^/]*\)$'` -d=`expr "$d" : '.*/\([^/]*/[^/]*/[^/]*\)$'` -( - /bin/echo "int ${COMPONENT}_version_major = ${major};" ; - /bin/echo "int ${COMPONENT}_version_minor = ${minor};" ; - /bin/echo "char ${COMPONENT}_version_variant[] = \"${variant}\";" ; - /bin/echo "char ${COMPONENT}_version[] = \"Platform Expert Component Version ${v}:\\n${t}; $w($h):$d\\n\";" ; - /bin/echo "char ${COMPONENT}_osrelease[] = \"${major}.${minor}\";" ; - /bin/echo "char ${COMPONENT}_ostype[] = \"Platform Expert\";" ; -) > vers.c -if [ -s vers.suffix -o ! -f vers.suffix ]; then - rm -f vers.suffix - echo ".${variant}.${CONFIG}" > vers.suffix -fi -exit 0 diff --git a/pexpert/conf/version.major b/pexpert/conf/version.major deleted file mode 100644 index 7f8f011eb..000000000 --- a/pexpert/conf/version.major +++ /dev/null @@ -1 +0,0 @@ -7 diff --git a/pexpert/conf/version.minor b/pexpert/conf/version.minor deleted file mode 100644 index ec635144f..000000000 --- a/pexpert/conf/version.minor +++ /dev/null @@ -1 +0,0 @@ -9 diff --git a/pexpert/conf/version.variant b/pexpert/conf/version.variant deleted file mode 100644 index 573541ac9..000000000 --- a/pexpert/conf/version.variant +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/pexpert/gen/bootargs.c b/pexpert/gen/bootargs.c index a3e5e88dd..09272e238 100644 --- a/pexpert/gen/bootargs.c +++ b/pexpert/gen/bootargs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -28,10 +28,19 @@ extern int getval(char *s, int *val); #define NUM 0 #define STR 1 -boolean_t +boolean_t PE_parse_boot_arg( - char *arg_string, - void *arg_ptr) + const char *arg_string, + void *arg_ptr) +{ + return PE_parse_boot_argn(arg_string, arg_ptr, -1); +} + +boolean_t +PE_parse_boot_argn( + const char *arg_string, + void *arg_ptr, + int max_len) { char *args; char *cp, c; @@ -78,11 +87,12 @@ PE_parse_boot_arg( goto gotit; } if ('_' == *arg_string) /* Force a string copy if the argument name begins with an underscore */ - { - argstrcpy2 (++cp, (char *)arg_ptr, 16); /* Hack - terminate after 16 characters */ - arg_found = TRUE; - break; - } + { + int hacklen = 16 > max_len ? 16 : max_len; + argstrcpy2 (++cp, (char *)arg_ptr, hacklen); /* Hack - terminate after 16 characters */ + arg_found = TRUE; + break; + } switch (getval(cp, &val)) { case NUM: @@ -90,7 +100,10 @@ PE_parse_boot_arg( arg_found = TRUE; break; case STR: - argstrcpy(++cp, (char *)arg_ptr); + if(max_len > 0) //max_len of 0 performs no copy at all + argstrcpy2(++cp, (char *)arg_ptr, max_len); + else if(max_len == -1) + argstrcpy(++cp, (char *)arg_ptr); arg_found = TRUE; break; } diff --git a/pexpert/gen/device_tree.c b/pexpert/gen/device_tree.c index a26878559..fe8e6940c 100644 --- a/pexpert/gen/device_tree.c +++ b/pexpert/gen/device_tree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -26,7 +26,12 @@ #include #include #include + +#include #include +#include +#include + #include #ifdef i386 #include @@ -299,9 +304,9 @@ DTDisposeEntryIterator(DTEntryIterator iterator) while ((scope = iter->savedScope) != NULL) { iter->savedScope = scope->nextScope; - kfree((vm_offset_t) scope, sizeof(struct DTSavedScope)); + kfree(scope, sizeof(struct DTSavedScope)); } - kfree((vm_offset_t) iterator, sizeof(struct OpaqueDTEntryIterator)); + kfree(iterator, sizeof(struct OpaqueDTEntryIterator)); return kSuccess; } @@ -344,7 +349,7 @@ DTExitEntry(DTEntryIterator iterator, DTEntry *currentPosition) iter->currentIndex = newScope->index; *currentPosition = iter->currentEntry; - kfree((vm_offset_t) newScope, sizeof(struct DTSavedScope)); + kfree(newScope, sizeof(struct DTSavedScope)); return kSuccess; } @@ -432,7 +437,7 @@ DTCreatePropertyIterator(const DTEntry entry, DTPropertyIterator *iterator) int DTDisposePropertyIterator(DTPropertyIterator iterator) { - kfree((vm_offset_t)iterator, sizeof(struct OpaqueDTPropertyIterator)); + kfree(iterator, sizeof(struct OpaqueDTPropertyIterator)); return kSuccess; } diff --git a/pexpert/gen/pe_gen.c b/pexpert/gen/pe_gen.c index 2d96feadf..bed890a89 100644 --- a/pexpert/gen/pe_gen.c +++ b/pexpert/gen/pe_gen.c @@ -28,6 +28,8 @@ static int DEBUGFlag; +int32_t gPESerialBaud = -1; + void pe_init_debug(void) { if (!PE_parse_boot_arg("debug", &DEBUGFlag)) diff --git a/pexpert/i386/fakePPCDeviceTree.c b/pexpert/i386/fakePPCDeviceTree.c index 474c0a6b7..66480bc14 100644 --- a/pexpert/i386/fakePPCDeviceTree.c +++ b/pexpert/i386/fakePPCDeviceTree.c @@ -24,21 +24,12 @@ #include "fakePPCStructs.h" -boot_args fakePPCBootArgs = { - 0, // Revision - kBootArgsVersion, // Version - "", // CommandLine - {{0}}, // PhysicalDRAM - {0}, // machine_type - 0, // deviceTreeP - 0, // deviceTreeLength - 0, // topOfKernelData -}; +boot_args fakePPCBootArgs = { .Version = kBootArgsVersion }; void * createdt(dt_init * template, long * retSize) { dt_init * next; - int size, allocSize; + size_t size, allocSize; vm_address_t out, saveout; void * source; @@ -56,12 +47,17 @@ void * createdt(dt_init * template, long * retSize) { allocSize += *(next->dataInit.length); } + else if ( next->stringInit.two == 2 ) + { + dt_data *dp = (dt_data *)(next->stringInit.data); + allocSize += (32 + 4 + 3 + dp->length) & (-4); + } else { allocSize += (32 + 4 + 3 + next->propInit.length) & (-4); } } - saveout = out = kalloc(allocSize); + saveout = out = (vm_address_t) kalloc(allocSize); if ( out == 0 ) return 0; // copy out @@ -75,13 +71,24 @@ void * createdt(dt_init * template, long * retSize) } else if ( next->dataInit.one == 1 ) { - *(next->dataInit.address) = out; + *((long *)next->dataInit.address) = out; source = 0; size = *(next->dataInit.length); } + else if ( next->stringInit.two == 2 ) + { + dt_data *dp = (dt_data *)next->stringInit.data; + bcopy( (void *)(uintptr_t)next->stringInit.name, (void *)out, 32); + out += 32; + size = dp->length; + *(long *)out = size; + out += sizeof(long); + source = (char *)dp->address; + size = (size + 3) & (-4); + } else { - bcopy( next->propInit.name, (void *)out, 32); + bcopy( (void *)(uintptr_t)next->propInit.name, (void *)out, 32); out += 32; size = next->propInit.length; *(long *)out = size; @@ -125,14 +132,14 @@ typedef struct node_t { } node_t; -int indent = 0; +unsigned int indent = 0; void printdt() { node_t *nodeptr = (node_t *)nptr; - long num_props = nodeptr->nProperties; - long len; - int i, j; + unsigned long num_props = nodeptr->nProperties; + unsigned long len; + unsigned int i, j; unsigned char *sptr; nptr = (unsigned char *)&nodeptr->props; @@ -143,7 +150,7 @@ void printdt() printf("'"); printf("%s", nptr); nptr+=32; - len = *((long*)nptr); + len = *((unsigned long*)nptr); nptr += 4; printf("'\t\t(%ld) '", len); sptr = nptr; diff --git a/pexpert/i386/fakePPCDeviceTree.h b/pexpert/i386/fakePPCDeviceTree.h index 40300fbb3..5c234c08e 100644 --- a/pexpert/i386/fakePPCDeviceTree.h +++ b/pexpert/i386/fakePPCDeviceTree.h @@ -22,14 +22,19 @@ extern dt_data gMemoryMapNode; extern dt_data gDriversProp; +extern dt_data gRootpathProp; +extern dt_data gCompatibleProp; + +#define kDefaultPlatformName "ACPI" dt_init fakePPCDeviceTree[] = { NODE( 2, 1 ), PROP( "name", "device-tree"), - PROP( "compatible", "X86PC"), - NODE( 1, 1 ), + STRINGPROP( "compatible", gCompatibleProp ), + NODE( 2, 1 ), PROP( "name", "chosen" ), + STRINGPROP( "rootpath", gRootpathProp ), DATANODE( gMemoryMapNode ), PROP( "name", "memory-map" ), DATAPROP( gDriversProp ), diff --git a/pexpert/i386/fakePPCStructs.h b/pexpert/i386/fakePPCStructs.h index 2f0fef665..53447b30a 100644 --- a/pexpert/i386/fakePPCStructs.h +++ b/pexpert/i386/fakePPCStructs.h @@ -23,7 +23,7 @@ #include typedef struct { - char * name; + const char * name; unsigned long length; void * value; } prop_init; @@ -40,15 +40,22 @@ typedef struct { long * address; } data_init; +typedef struct { + long two; + const char * name; + void * data; +} string_init; + typedef union { prop_init propInit; node_init nodeInit; data_init dataInit; + string_init stringInit; } dt_init; typedef struct { long length; - long * address; + void * address; } dt_data; extern boot_args fakePPCBootArgs; @@ -58,19 +65,22 @@ void printdt(void); void * createdt(dt_init * template, long * retSize); #define NODE(props,children) \ - {{(char *)0, props, (void *)children }} + { .nodeInit = {0, props, children }} #define INTPROP(name,value) \ - {{name, 4, (void *)value }} + { .propInit = {name, 4, (void *)(uintptr_t)value }} #define PROP(name,value) \ - {{name, sizeof( value), value }} + { .propInit = {name, sizeof( value), (void *)(uintptr_t)value }} + +#define STRINGPROP(name,value) \ + { .stringInit = { 2, name, (void *)&(value) }} #define NULLPROP(name) \ - {{name, 0, (void *)0 }} + { propInit = {name, 0, (void *)0 }} #define DATAPROP(data) \ - {{(char *)1, (long)&((data).length), (void *)&((data).address) }} + { .dataInit = {1, &((data).length), (long *) &((data).address) }} #define DATANODE(data) \ - {{(char *)1, (long)&((data).length), (void *)&((data).address) }} + { .dataInit = {1, &((data).length), (long *)&((data).address) }} diff --git a/pexpert/i386/kd.c b/pexpert/i386/kd.c index 0e8dd758f..a01e99c12 100644 --- a/pexpert/i386/kd.c +++ b/pexpert/i386/kd.c @@ -79,6 +79,12 @@ #include +extern void cpu_shutdown(void); + +int cngetc(void); +int cnmaygetc(void); +void kdreboot(void); + /* * Common I/O ports. */ @@ -165,8 +171,6 @@ kd_sendcmd(unsigned char ch) void kdreboot(void) { - extern void cpu_shutdown(void); - kd_sendcmd( K_CMD_RESET ); /* diff --git a/pexpert/i386/pe_identify_machine.c b/pexpert/i386/pe_identify_machine.c index d83472337..d66e70ace 100644 --- a/pexpert/i386/pe_identify_machine.c +++ b/pexpert/i386/pe_identify_machine.c @@ -34,15 +34,15 @@ void pe_identify_machine(boot_args *args); * Sets up platform parameters. * Returns: nothing */ -void pe_identify_machine(boot_args *args) +void pe_identify_machine(__unused boot_args *args) { // Clear the gPEClockFrequencyInfo struct bzero((void *)&gPEClockFrequencyInfo, sizeof(clock_frequency_info_t)); // Start with default values. - gPEClockFrequencyInfo.timebase_frequency_hz = 25000000; - gPEClockFrequencyInfo.bus_frequency_hz = 100000000; - gPEClockFrequencyInfo.cpu_frequency_hz = 300000000; + gPEClockFrequencyInfo.timebase_frequency_hz = 1000000000; + gPEClockFrequencyInfo.bus_frequency_hz = 100000000; + gPEClockFrequencyInfo.cpu_frequency_hz = 300000000; gPEClockFrequencyInfo.bus_frequency_min_hz = gPEClockFrequencyInfo.bus_frequency_hz; gPEClockFrequencyInfo.bus_frequency_max_hz = gPEClockFrequencyInfo.bus_frequency_hz; diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 21d0e34ce..89d90db7f 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -41,14 +41,14 @@ extern void pe_identify_machine(void * args); extern void initialize_screen(void *, unsigned int); /* Local references */ -static vm_offset_t mapframebuffer(caddr_t,int); -static vm_offset_t PE_fb_vaddr = 0; static int PE_fb_mode = TEXT_MODE; /* private globals */ PE_state_t PE_state; dt_data gMemoryMapNode; dt_data gDriversProp; +dt_data gRootpathProp; +dt_data gCompatibleProp; /* Clock Frequency Info */ clock_frequency_info_t gPEClockFrequencyInfo; @@ -63,15 +63,19 @@ int PE_initialize_console( PE_Video * info, int op ) * Refuse changes from outside pexpert. * The video mode setup by the booter cannot be changed. */ - if ( info && (info == &PE_state.video) ) + if ( info ) { - bootInfo.v_baseAddr = PE_fb_vaddr; + bootInfo.v_baseAddr = info->v_baseAddr; bootInfo.v_rowBytes = info->v_rowBytes; bootInfo.v_width = info->v_width; bootInfo.v_height = info->v_height; bootInfo.v_depth = info->v_depth; - bootInfo.v_display = PE_fb_mode; bInfo = &bootInfo; + if (info == &PE_state.video) { + bootInfo.v_display = PE_fb_mode; + } else { + bootInfo.v_display = GRAPHICS_MODE; + } } else bInfo = 0; @@ -108,6 +112,8 @@ void PE_init_iokit(void) long * dt; int i; KernelBootArgs_t *kap = (KernelBootArgs_t *)PE_state.bootArgs; + enum { kMaxBootVar = 128 }; + char *rdValue, *platformValue; typedef struct { char name[32]; @@ -126,9 +132,35 @@ void PE_init_iokit(void) gDriversProp.length = kap->numBootDrivers * sizeof(DriversPackageProp); gMemoryMapNode.length = 2 * sizeof(long); + rdValue = kalloc(kMaxBootVar); + if ( PE_parse_boot_arg("rd", rdValue) ) { + if (*rdValue == '*') { + gRootpathProp.address = (rdValue + 1); + } else { + gRootpathProp.address = rdValue; + } + strcat(rdValue, ","); + } else { + gRootpathProp.address = rdValue; + rdValue[0] = '\0'; + } + strcat(rdValue, kap->bootFile); + gRootpathProp.length = strlen(rdValue) + 1; + + platformValue = kalloc(kMaxBootVar); + if ( ! PE_parse_boot_arg("platform", platformValue) ) { + strcpy(platformValue, kDefaultPlatformName); + } + gCompatibleProp.address = platformValue; + gCompatibleProp.length = strlen(platformValue) + 1; + dt = (long *) createdt( fakePPCDeviceTree, &((boot_args*)PE_state.fakePPCBootArgs)->deviceTreeLength ); + kfree(rdValue, kMaxBootVar); + kfree(platformValue, kMaxBootVar); + + if ( dt ) { DriversPackageProp * prop = (DriversPackageProp *) gDriversProp.address; @@ -156,13 +188,13 @@ void PE_init_iokit(void) prop->value[1] = kap->driverConfig[i].size; } - *gMemoryMapNode.address = kap->numBootDrivers + 1; + *((long *)gMemoryMapNode.address) = kap->numBootDrivers + 1; } /* Setup powermac_info and powermac_machine_info structures */ ((boot_args*)PE_state.fakePPCBootArgs)->deviceTreeP = (unsigned long *) dt; - ((boot_args*)PE_state.fakePPCBootArgs)->topOfKernelData = (unsigned int) kalloc(0x2000); + ((boot_args*)PE_state.fakePPCBootArgs)->topOfKernelData = (unsigned long) kalloc(0x2000); /* * Setup the OpenFirmware Device Tree routines @@ -174,7 +206,7 @@ void PE_init_iokit(void) /* * Fetch the CLUT and the noroot image. */ - bcopy( (void *) bootClut, appleClut8, sizeof(appleClut8) ); + bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) ); default_noroot.width = kFailedBootWidth; default_noroot.height = kFailedBootHeight; @@ -239,15 +271,6 @@ void PE_init_platform(boolean_t vm_initialized, void * args) void PE_create_console( void ) { - if ( (PE_fb_vaddr == 0) && (PE_state.video.v_baseAddr != 0) ) - { - PE_fb_vaddr = mapframebuffer((caddr_t) PE_state.video.v_baseAddr, - (PE_fb_mode == TEXT_MODE) ? - /* text mode */ PE_state.video.v_rowBytes : - /* grfx mode */ PE_state.video.v_rowBytes * - PE_state.video.v_height); - } - if ( PE_state.video.v_display ) PE_initialize_console( &PE_state.video, kPEGraphicsMode ); else @@ -277,13 +300,13 @@ int PE_current_console( PE_Video * info ) return (0); } -void PE_display_icon( unsigned int flags, const char * name ) +void PE_display_icon( __unused unsigned int flags, __unused const char * name ) { if ( default_noroot_data ) vc_display_icon( &default_noroot, default_noroot_data ); } -extern boolean_t PE_get_hotkey( unsigned char key ) +extern boolean_t PE_get_hotkey( __unused unsigned char key ) { return (FALSE); } @@ -322,30 +345,11 @@ void PE_call_timebase_callback(void) if (gTimebaseCallback) gTimebaseCallback(&timebase_freq); } -/* - * map the framebuffer into kernel vm and return the (virtual) - * address. - */ -static vm_offset_t -mapframebuffer( caddr_t physaddr, /* start of framebuffer */ - int length) /* num bytes to map */ -{ - vm_offset_t vmaddr; - - if (physaddr != (caddr_t)trunc_page(physaddr)) - panic("Framebuffer not on page boundary"); - vmaddr = io_map((vm_offset_t)physaddr, length); - if (vmaddr == 0) - panic("can't alloc VM for framebuffer"); - - return vmaddr; -} - /* * The default (non-functional) PE_poll_input handler. */ static int -PE_stub_poll_input(unsigned int options, char * c) +PE_stub_poll_input(__unused unsigned int options, char * c) { *c = 0xff; return 1; /* 0 for success, 1 for unsupported */ diff --git a/pexpert/i386/pe_interrupt.c b/pexpert/i386/pe_interrupt.c index 173dc13fa..322de1037 100644 --- a/pexpert/i386/pe_interrupt.c +++ b/pexpert/i386/pe_interrupt.c @@ -22,10 +22,13 @@ #include #include #include -#include #include #include + +void PE_incoming_interrupt(int, void *); + + struct i386_interrupt_handler { IOInterruptHandler handler; void *nub; @@ -37,10 +40,6 @@ typedef struct i386_interrupt_handler i386_interrupt_handler_t; i386_interrupt_handler_t PE_interrupt_handler; -void PE_platform_interrupt_initialize(void) -{ -} - void @@ -49,17 +48,11 @@ PE_incoming_interrupt(int interrupt, void *state) i386_interrupt_handler_t *vector; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, - 0, ((unsigned int *)state)[5], 0, 0, 0); + 0, ((unsigned int *)state)[7], 0, 0, 0); vector = &PE_interrupt_handler; - switch (interrupt) { - case APIC_ERROR_INTERRUPT: - case SPURIOUS_INTERRUPT: - case INTERPROCESS_INTERRUPT: - lapic_interrupt(interrupt, state); - break; - default: + if (!lapic_interrupt(interrupt, state)) { vector->handler(vector->target, state, vector->nub, interrupt); } @@ -67,7 +60,8 @@ PE_incoming_interrupt(int interrupt, void *state) 0, 0, 0, 0, 0); } -void PE_install_interrupt_handler(void *nub, int source, +void PE_install_interrupt_handler(void *nub, + __unused int source, void *target, IOInterruptHandler handler, void *refCon) diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c index b82d5d43b..7e0ed56f1 100644 --- a/pexpert/i386/pe_kprintf.c +++ b/pexpert/i386/pe_kprintf.c @@ -84,8 +84,17 @@ void kprintf(const char *fmt, ...) boolean_t state; if (!disableSerialOuput) { - state = ml_set_interrupts_enabled(FALSE); - simple_lock(&kprintf_lock); + + /* + * Spin to get kprintf lock but re-enable interrupts while failing. + * This allows interrupts to be handled while waiting but + * interrupts are disabled once we have the lock. + */ + state = ml_set_interrupts_enabled(FALSE); + while (!simple_lock_try(&kprintf_lock)) { + ml_set_interrupts_enabled(state); + ml_set_interrupts_enabled(FALSE); + } if (cpu_number() != cpu_last_locked) { MP_DEBUG_KPRINTF("[cpu%d...]\n", cpu_number()); diff --git a/pexpert/i386/pe_serial.c b/pexpert/i386/pe_serial.c index 201b76e9c..72ec3e136 100644 --- a/pexpert/i386/pe_serial.c +++ b/pexpert/i386/pe_serial.c @@ -28,6 +28,10 @@ #include #include +void serial_putc(char); +int serial_getc(void); +int serial_init(void); + /* standard port addresses */ enum { COM1_PORT_ADDR = 0x3f8, @@ -118,7 +122,7 @@ uart_putc( char c ) int serial_init( void ) { - if ( uart_initted || uart_probe() == 0 ) return 0; + if ( /*uart_initted ||*/ uart_probe() == 0 ) return 0; /* Disable hardware interrupts */ diff --git a/pexpert/pexpert/i386/protos.h b/pexpert/pexpert/i386/protos.h index a8ca3e9ab..cfcbdd9f3 100644 --- a/pexpert/pexpert/i386/protos.h +++ b/pexpert/pexpert/i386/protos.h @@ -64,7 +64,6 @@ __OUT(w, short) __OUT(l, long) extern void cninit(void); -extern void bcopy(void * from, void * to, int size); extern int sprintf(char * str, const char * format, ...); //------------------------------------------------------------------------ diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index 0e2e8f0af..d69c8cbd1 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,6 +54,8 @@ void PE_init_platform( void PE_init_kprintf( boolean_t vm_initialized); +extern int32_t gPESerialBaud; + unsigned int PE_init_taproot(vm_offset_t *taddr); extern void (*PE_kputc)(char c); @@ -106,8 +108,10 @@ void PE_install_interrupt_handler( void *nub, int source, void *target, IOInterruptHandler handler, void *refCon); -void kprintf( - const char *fmt, ...); +#ifndef _FN_KPRINTF +#define _FN_KPRINTF +void kprintf(const char *fmt, ...); +#endif void init_display_putc(unsigned char *baseaddr, int rowbytes, int height); void display_putc(char c); @@ -190,8 +194,13 @@ extern char * PE_boot_args( void); extern boolean_t PE_parse_boot_arg( - char *arg_string, - void *arg_ptr); + const char *arg_string, + void *arg_ptr); + +extern boolean_t PE_parse_boot_argn( + const char *arg_string, + void *arg_ptr, + int max_arg); enum { kPEOptionKey = 0x3a, diff --git a/pexpert/pexpert/ppc/protos.h b/pexpert/pexpert/ppc/protos.h index 1159ee53f..0c7c88d17 100644 --- a/pexpert/pexpert/ppc/protos.h +++ b/pexpert/pexpert/ppc/protos.h @@ -68,7 +68,7 @@ static __inline__ unsigned int byte_reverse_word(unsigned int word) { //------------------------------------------------------------------------ // from ppc/serial_io.h -extern void initialize_serial(void * scc_phys_base); +extern void initialize_serial(void * scc_phys_base, uint32_t serial_baud); //------------------------------------------------------------------------ @@ -143,9 +143,4 @@ extern void GratefulDebInit(void); extern void GratefulDebDisp(unsigned int coord, unsigned int data); extern void checkNMI(void); -/* - * Temporarily stolen from ppc/cpu_number.h - */ -int cpu_number(void); - #endif /* _PEXPERT_PPC_PROTOS_H_ */ diff --git a/pexpert/pexpert/protos.h b/pexpert/pexpert/protos.h index 6ec4ed336..d146f7a12 100644 --- a/pexpert/pexpert/protos.h +++ b/pexpert/pexpert/protos.h @@ -28,8 +28,8 @@ #include #include #include -#include #include +#include #include #include @@ -38,15 +38,8 @@ // from ppc/misc_protos.h extern void printf(const char *fmt, ...); -extern int strcmp(const char *s1, const char *s2); -extern int strncmp(const char *s1, const char *s2, unsigned long n); -extern int strlen( register const char *string); -extern char *strcat(char *dest, const char *src); -extern char *strcpy(char *dest, const char *src); -extern char *strncpy(char *dest, const char *src, unsigned long n); extern void interrupt_enable(void); extern void interrupt_disable(void); -extern void bcopy(void * from, void * to, int size); #if __ppc__ extern void bcopy_nc(char *from, char *to, int size); /* uncached-safe */ #else @@ -55,13 +48,6 @@ extern void bcopy_nc(char *from, char *to, int size); /* uncached-safe */ //------------------------------------------------------------------------ //from kern/misc_protos.h -extern void panic(const char *string, ...); - -/* Zero an arbitrarily aligned array */ -extern void bzero( - char *from, - vm_size_t nbytes); - extern void _doprnt( register const char *fmt, diff --git a/pexpert/ppc/pe_init.c b/pexpert/ppc/pe_init.c index 636962161..02b5251d6 100644 --- a/pexpert/ppc/pe_init.c +++ b/pexpert/ppc/pe_init.c @@ -23,7 +23,6 @@ * file: pe_init.c * PPC platform expert initialization. */ -#include #include #include #include diff --git a/pexpert/ppc/pe_kprintf.c b/pexpert/ppc/pe_kprintf.c index 57794ef8e..e8713ce46 100644 --- a/pexpert/ppc/pe_kprintf.c +++ b/pexpert/ppc/pe_kprintf.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -52,16 +53,48 @@ struct slock kprintf_lock; void PE_init_kprintf(boolean_t vm_initialized) { unsigned int boot_arg; + int32_t cnt, size, serial_baud = -1; + DTEntry options; + char *str, baud[7]; if (PE_state.initialized == FALSE) panic("Platform Expert not initialized"); - if (PE_parse_boot_arg("debug", &boot_arg)) + if (PE_parse_boot_arg("debug", &boot_arg)) if(boot_arg & DB_KPRT) disableSerialOuput = FALSE; + if (DTLookupEntry(0, "/options", &options) == kSuccess) { + if (DTGetProperty(options, "input-device", &str, &size) == kSuccess) { + if ((size > 5) && !strncmp("scca:", str, 5)) { + size -= 5; + str += 5; + if (size <= 6) { + strncpy(baud, str, size); + baud[size] = '\0'; + gPESerialBaud = strtol(baud, 0, 0); + } + } + } + if (DTGetProperty(options, "output-device", &str, &size) == kSuccess) { + if ((size > 5) && !strncmp("scca:", str, 5)) { + size -= 5; + str += 5; + if (size <= 6) { + strncpy(baud, str, size); + baud[size] = '\0'; + gPESerialBaud = strtol(baud, 0, 0); + } + } + } + } + + /* Check the boot-args for new serial baud. */ + if (PE_parse_boot_arg("serialbaud", &serial_baud)) + if (serial_baud != -1) gPESerialBaud = serial_baud; + if( (scc = PE_find_scc())) { /* See if we can find the serial port */ - scc = io_map_spec(scc, 0x1000); /* Map it in */ - initialize_serial((void *)scc); /* Start up the serial driver */ + scc = io_map_spec(scc, 0x1000); /* Map it in */ + initialize_serial((void *)scc, gPESerialBaud); /* Start up the serial driver */ PE_kputc = serial_putc; simple_lock_init(&kprintf_lock, 0); -- 2.45.2